summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/Makefile13
-rw-r--r--usr/src/Makefile.lint4
-rw-r--r--usr/src/Makefile.master36
-rw-r--r--usr/src/Makefile.master.643
-rw-r--r--usr/src/Targetdirs11
-rw-r--r--usr/src/cmd/Makefile28
-rw-r--r--usr/src/cmd/Makefile.check4
-rw-r--r--usr/src/cmd/cmd-inet/etc/services3
-rw-r--r--usr/src/cmd/cmd-inet/etc/sock2path.d/system%2Fkernel4
-rw-r--r--usr/src/cmd/cmd-inet/lib/ipmgmtd/Makefile4
-rw-r--r--usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_door.c37
-rw-r--r--usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_impl.h25
-rw-r--r--usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_main.c76
-rw-r--r--usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_path.c84
-rw-r--r--usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_persist.c39
-rw-r--r--usr/src/cmd/cmd-inet/lib/ipmgmtd/net-ipmgmt7
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/defaults.c36
-rw-r--r--usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c22
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/wanboot/p12split/Makefile2
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/wanboot/wanboot-cgi/Makefile2
-rw-r--r--usr/src/cmd/cmd-inet/usr.lib/wpad/Makefile2
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/arp.c7
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ndp.c9
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/ping/Makefile1
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/routeadm/routeadm.c32
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/snoop/Makefile5
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.c12
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.h4
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c13
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ether.c4
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_rport.c5
-rw-r--r--usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_vxlan.c68
-rw-r--r--usr/src/cmd/column/Makefile43
-rw-r--r--usr/src/cmd/column/THIRDPARTYLICENSE26
-rw-r--r--usr/src/cmd/column/THIRDPARTYLICENSE.descrip1
-rw-r--r--usr/src/cmd/column/column.c335
-rw-r--r--usr/src/cmd/coreadm/coreadm.xml8
-rw-r--r--usr/src/cmd/cron/Makefile36
-rw-r--r--usr/src/cmd/cron/cron.c154
-rw-r--r--usr/src/cmd/cron/cron.h3
-rw-r--r--usr/src/cmd/cron/crontab.c38
-rw-r--r--usr/src/cmd/ctfconvert/Makefile33
-rw-r--r--usr/src/cmd/ctfconvert/ctfconvert.c389
-rw-r--r--usr/src/cmd/ctfdiff/Makefile33
-rw-r--r--usr/src/cmd/ctfdiff/ctfdiff.c518
-rw-r--r--usr/src/cmd/ctfdump/Makefile33
-rw-r--r--usr/src/cmd/ctfdump/ctfdump.c815
-rw-r--r--usr/src/cmd/ctfmerge/Makefile33
-rw-r--r--usr/src/cmd/ctfmerge/ctfmerge.c538
-rw-r--r--usr/src/cmd/devfsadm/Makefile.com1
-rw-r--r--usr/src/cmd/devfsadm/devlink.tab.sh5
-rw-r--r--usr/src/cmd/devfsadm/i386/Makefile3
-rw-r--r--usr/src/cmd/devfsadm/i386/lx_link_i386.c81
-rw-r--r--usr/src/cmd/devfsadm/misc_link.c43
-rw-r--r--usr/src/cmd/dladm/Makefile3
-rw-r--r--usr/src/cmd/dladm/dladm.c957
-rw-r--r--usr/src/cmd/dlmgmtd/dlmgmt_db.c189
-rw-r--r--usr/src/cmd/dlmgmtd/dlmgmt_door.c66
-rw-r--r--usr/src/cmd/dlmgmtd/dlmgmt_impl.h6
-rw-r--r--usr/src/cmd/dlmgmtd/dlmgmt_main.c66
-rw-r--r--usr/src/cmd/dlmgmtd/dlmgmt_util.c17
-rw-r--r--usr/src/cmd/dlmgmtd/svc-dlmgmtd9
-rw-r--r--usr/src/cmd/dlstat/dlstat.c70
-rw-r--r--usr/src/cmd/dtrace/test/cmd/jdtrace/Makefile2
-rw-r--r--usr/src/cmd/dtrace/test/cmd/scripts/dtest.pl2
-rw-r--r--usr/src/cmd/dtrace/test/tst/common/Makefile8
-rw-r--r--usr/src/cmd/dtrace/test/tst/common/java_api/Makefile2
-rw-r--r--usr/src/cmd/dtrace/test/tst/common/llquantize/tst.range.d4
-rw-r--r--usr/src/cmd/dtrace/test/tst/common/mdb/tst.postmort.ksh69
-rw-r--r--usr/src/cmd/dtrace/test/tst/common/ustack/tst.unpriv.c7
-rw-r--r--usr/src/cmd/dtrace/test/tst/common/ustack/tst.unpriv.ksh68
-rw-r--r--usr/src/cmd/dtrace/test/tst/common/ustack/unpriv_helper.d4
-rw-r--r--usr/src/cmd/flowadm/flowadm.c51
-rw-r--r--usr/src/cmd/flowstat/flowstat.c21
-rw-r--r--usr/src/cmd/fs.d/Makefile5
-rw-r--r--usr/src/cmd/fs.d/hyprlofs/Makefile42
-rw-r--r--usr/src/cmd/fs.d/hyprlofs/hlcfg/Makefile30
-rw-r--r--usr/src/cmd/fs.d/hyprlofs/hlcfg/hlcfg.c244
-rw-r--r--usr/src/cmd/fs.d/hyprlofs/mount/Makefile31
-rw-r--r--usr/src/cmd/fs.d/hyprlofs/mount/mount.c148
-rw-r--r--usr/src/cmd/fs.d/lxproc/Makefile32
-rw-r--r--usr/src/cmd/fs.d/lxproc/mount.c140
-rw-r--r--usr/src/cmd/fs.d/nfs/lib/smfcfg.c20
-rw-r--r--usr/src/cmd/fs.d/nfs/lib/smfcfg.h2
-rw-r--r--usr/src/cmd/fs.d/nfs/mount/Makefile14
-rw-r--r--usr/src/cmd/fs.d/nfs/mount/mount.c2
-rw-r--r--usr/src/cmd/fs.d/nfs/mountd/mountd.c8
-rw-r--r--usr/src/cmd/fs.d/nfs/nfsd/nfsd.c8
-rw-r--r--usr/src/cmd/fs.d/nfs/svc/nfs-server4
-rw-r--r--usr/src/cmd/fs.d/nfs/umount/umount.c2
-rw-r--r--usr/src/cmd/halt/halt.c19
-rw-r--r--usr/src/cmd/ibd_upgrade/ibd_delete_link.c2
-rw-r--r--usr/src/cmd/init/Makefile10
-rw-r--r--usr/src/cmd/init/init.c64
-rw-r--r--usr/src/cmd/initpkg/mountall.sh5
-rw-r--r--usr/src/cmd/initpkg/shutdown.sh2
-rw-r--r--usr/src/cmd/initpkg/umountall.sh4
-rw-r--r--usr/src/cmd/ipf/tools/Makefile.tools1
-rw-r--r--usr/src/cmd/ipf/tools/ipfstat.c4
-rw-r--r--usr/src/cmd/ksh/Makefile.com2
-rw-r--r--usr/src/cmd/lofiadm/main.c28
-rw-r--r--usr/src/cmd/machid/Makefile (renamed from usr/src/cmd/ssh/libopenbsd-compat/Makefile)64
-rw-r--r--usr/src/cmd/machid/machid.c137
-rw-r--r--usr/src/cmd/man/Makefile7
-rw-r--r--usr/src/cmd/man/Makefile.com23
-rw-r--r--usr/src/cmd/mdb/Makefile.common6
-rw-r--r--usr/src/cmd/mdb/common/kmdb/kmdb_promif.c7
-rw-r--r--usr/src/cmd/mdb/common/libstandctf/ctf_subr.c9
-rw-r--r--usr/src/cmd/mdb/common/mdb/mdb_cmds.c5
-rw-r--r--usr/src/cmd/mdb/common/mdb/mdb_ctf.c16
-rw-r--r--usr/src/cmd/mdb/common/mdb/mdb_debug.c4
-rw-r--r--usr/src/cmd/mdb/common/mdb/mdb_debug.h4
-rw-r--r--usr/src/cmd/mdb/common/mdb/mdb_demangle.c11
-rw-r--r--usr/src/cmd/mdb/common/mdb/mdb_fmt.c27
-rw-r--r--usr/src/cmd/mdb/common/mdb/mdb_main.c13
-rw-r--r--usr/src/cmd/mdb/common/mdb/mdb_modapi.h27
-rw-r--r--usr/src/cmd/mdb/common/mdb/mdb_print.c45
-rw-r--r--usr/src/cmd/mdb/common/mdb/mdb_tab.c24
-rw-r--r--usr/src/cmd/mdb/common/modules/dtrace/dtrace.c3
-rw-r--r--usr/src/cmd/mdb/common/modules/genunix/Makefile.files3
-rw-r--r--usr/src/cmd/mdb/common/modules/genunix/devinfo.c65
-rw-r--r--usr/src/cmd/mdb/common/modules/genunix/devinfo.h2
-rw-r--r--usr/src/cmd/mdb/common/modules/genunix/dnlc.c104
-rw-r--r--usr/src/cmd/mdb/common/modules/genunix/dnlc.h33
-rw-r--r--usr/src/cmd/mdb/common/modules/genunix/genunix.c194
-rw-r--r--usr/src/cmd/mdb/common/modules/genunix/hotplug.c1
-rw-r--r--usr/src/cmd/mdb/common/modules/genunix/kmem.c4
-rw-r--r--usr/src/cmd/mdb/common/modules/genunix/zone.c41
-rw-r--r--usr/src/cmd/mdb/common/modules/genunix/zone.h1
-rw-r--r--usr/src/cmd/mdb/common/modules/i40e/i40e.c114
-rw-r--r--usr/src/cmd/mdb/common/modules/ipc/ipc.c5
-rw-r--r--usr/src/cmd/mdb/common/modules/libc/libc.c28
-rw-r--r--usr/src/cmd/mdb/common/modules/mpt_sas/mpt_sas.c4
-rw-r--r--usr/src/cmd/mdb/common/modules/v8/mdb_v8.c5709
-rw-r--r--usr/src/cmd/mdb/common/modules/v8/mdb_v8_cfg.c728
-rw-r--r--usr/src/cmd/mdb/common/modules/v8/v8cfg.h55
-rw-r--r--usr/src/cmd/mdb/common/modules/v8/v8dbg.h91
-rw-r--r--usr/src/cmd/mdb/intel/amd64/i40e/Makefile35
-rw-r--r--usr/src/cmd/mdb/intel/amd64/libumem/Makefile1
-rw-r--r--usr/src/cmd/mdb/intel/amd64/v8/Makefile45
-rw-r--r--usr/src/cmd/mdb/intel/ia32/i40e/Makefile33
-rw-r--r--usr/src/cmd/mdb/intel/ia32/libumem/Makefile1
-rw-r--r--usr/src/cmd/mdb/intel/ia32/v8/Makefile44
-rw-r--r--usr/src/cmd/mdb/sparc/v7/libumem/Makefile1
-rw-r--r--usr/src/cmd/mdb/sparc/v9/libumem/Makefile1
-rw-r--r--usr/src/cmd/mdb/test/exit-e/err.nowrite.ksh1
-rw-r--r--usr/src/cmd/mdb/test/exit-e/err.unmapped.ksh1
-rwxr-xr-x[-rw-r--r--]usr/src/cmd/mdb/test/mtest.sh0
-rw-r--r--usr/src/cmd/mdb/test/typedef/tst.dellist.mdb.out0
-rw-r--r--usr/src/cmd/mdb/test/typedef/tst.emptylist.mdb.out0
-rw-r--r--usr/src/cmd/nicstat/Makefile31
-rw-r--r--usr/src/cmd/nicstat/nicstat.pl424
-rw-r--r--usr/src/cmd/nscd/Makefile1
-rw-r--r--usr/src/cmd/nscd/svc-nscd4
-rw-r--r--usr/src/cmd/passwd/Makefile2
-rw-r--r--usr/src/cmd/pgrep/pgrep.c8
-rw-r--r--usr/src/cmd/prstat/prstat.c48
-rw-r--r--usr/src/cmd/prstat/prstat.h2
-rw-r--r--usr/src/cmd/prstat/prutil.c3
-rw-r--r--usr/src/cmd/prtconf/prtconf.c5
-rw-r--r--usr/src/cmd/ps/ps.c14
-rw-r--r--usr/src/cmd/ptools/Makefile.amd643
-rw-r--r--usr/src/cmd/ptools/Makefile.bld61
-rw-r--r--usr/src/cmd/ptools/Makefile.i3863
-rw-r--r--usr/src/cmd/ptools/Makefile.ptool2
-rw-r--r--usr/src/cmd/ptools/Makefile.sparcv93
-rw-r--r--usr/src/cmd/ptools/common/ptools_common.c50
-rw-r--r--usr/src/cmd/ptools/common/ptools_common.h36
-rw-r--r--usr/src/cmd/ptools/pargs/pargs.c49
-rw-r--r--usr/src/cmd/ptools/pfiles/pfiles.c3
-rw-r--r--usr/src/cmd/ptools/pflags/pflags.c5
-rw-r--r--usr/src/cmd/ptools/pmap/pmap.c10
-rw-r--r--usr/src/cmd/ptools/pmap/pmap_common.c5
-rw-r--r--usr/src/cmd/ptools/preap/preap.c8
-rw-r--r--usr/src/cmd/ptools/psig/psig.c33
-rw-r--r--usr/src/cmd/ptools/ptime/ptime.c7
-rw-r--r--usr/src/cmd/ptools/ptree/ptree.c17
-rw-r--r--usr/src/cmd/ptools/pwait/pwait.c29
-rw-r--r--usr/src/cmd/ptools/pwdx/pwdx.c7
-rw-r--r--usr/src/cmd/rcap/common/utils.c75
-rw-r--r--usr/src/cmd/rcap/common/utils.h2
-rw-r--r--usr/src/cmd/rcap/rcapadm/rcapadm.c16
-rw-r--r--usr/src/cmd/rcap/rcapd/rcapd_collection_zone.c141
-rw-r--r--usr/src/cmd/rcap/rcapd/rcapd_scanner.c3
-rw-r--r--usr/src/cmd/rcap/rcapstat/rcapstat.c7
-rw-r--r--usr/src/cmd/rcm_daemon/Makefile.com4
-rw-r--r--usr/src/cmd/savecore/savecore.c8
-rw-r--r--usr/src/cmd/sed/main.c27
-rw-r--r--usr/src/cmd/sendmail/src/Makefile2
-rw-r--r--usr/src/cmd/sgs/elfdump/Makefile.targ2
-rw-r--r--usr/src/cmd/sgs/elfdump/amd64/Makefile2
-rw-r--r--usr/src/cmd/sgs/elfdump/i386/Makefile2
-rw-r--r--usr/src/cmd/sgs/include/conv.h3
-rw-r--r--usr/src/cmd/sgs/lex/Makefile.targ1
-rw-r--r--usr/src/cmd/sgs/lex/common/main.c45
-rw-r--r--usr/src/cmd/sgs/lex/common/once.h12
-rw-r--r--usr/src/cmd/sgs/libconv/common/corenote.c24
-rw-r--r--usr/src/cmd/sgs/libconv/common/corenote.msg7
-rw-r--r--usr/src/cmd/sgs/libconv/common/phdr.c46
-rw-r--r--usr/src/cmd/sgs/libconv/common/phdr.msg30
-rw-r--r--usr/src/cmd/sgs/librtld_db/common/librtld_db.msg6
-rw-r--r--usr/src/cmd/sgs/librtld_db/common/rd_elf.c28
-rw-r--r--usr/src/cmd/sgs/rtld/common/_rtld.h4
-rw-r--r--usr/src/cmd/sgs/rtld/common/analyze.c63
-rw-r--r--usr/src/cmd/sgs/rtld/common/external.c17
-rw-r--r--usr/src/cmd/sgs/rtld/common/globals.c3
-rw-r--r--usr/src/cmd/sgs/rtld/common/rtld.msg7
-rw-r--r--usr/src/cmd/sgs/rtld/common/setup.c10
-rw-r--r--usr/src/cmd/sgs/rtld/common/util.c16
-rw-r--r--usr/src/cmd/sgs/yacc/Makefile.targ1
-rw-r--r--usr/src/cmd/sgs/yacc/common/dextern.h19
-rw-r--r--usr/src/cmd/sgs/yacc/common/y1.c36
-rw-r--r--usr/src/cmd/sgs/yacc/common/y2.c7
-rw-r--r--usr/src/cmd/ssh/Makefile91
-rw-r--r--usr/src/cmd/ssh/Makefile.ssh-common99
-rw-r--r--usr/src/cmd/ssh/README.altprivsep687
-rw-r--r--usr/src/cmd/ssh/THIRDPARTYLICENSE.descrip1
-rw-r--r--usr/src/cmd/ssh/doc/COPYING.Ylonen70
-rw-r--r--usr/src/cmd/ssh/doc/CREDITS87
-rw-r--r--usr/src/cmd/ssh/doc/ChangeLog2590
-rw-r--r--usr/src/cmd/ssh/doc/INSTALL199
-rw-r--r--usr/src/cmd/ssh/doc/LICENCE194
-rw-r--r--usr/src/cmd/ssh/doc/OVERVIEW164
-rw-r--r--usr/src/cmd/ssh/doc/README70
-rw-r--r--usr/src/cmd/ssh/doc/README.Ylonen567
-rw-r--r--usr/src/cmd/ssh/doc/WARNING.RNG79
-rw-r--r--usr/src/cmd/ssh/doc/nchan.ms97
-rw-r--r--usr/src/cmd/ssh/doc/nchan2.ms64
-rw-r--r--usr/src/cmd/ssh/etc/Makefile66
-rw-r--r--usr/src/cmd/ssh/etc/ssh.xml177
-rw-r--r--usr/src/cmd/ssh/etc/ssh_config31
-rw-r--r--usr/src/cmd/ssh/etc/sshd127
-rw-r--r--usr/src/cmd/ssh/etc/sshd_config145
-rw-r--r--usr/src/cmd/ssh/include/altprivsep.h64
-rw-r--r--usr/src/cmd/ssh/include/atomicio.h55
-rw-r--r--usr/src/cmd/ssh/include/auth-options.h46
-rw-r--r--usr/src/cmd/ssh/include/auth-pam.h69
-rw-r--r--usr/src/cmd/ssh/include/auth.h325
-rw-r--r--usr/src/cmd/ssh/include/auth2-pam.h38
-rw-r--r--usr/src/cmd/ssh/include/authfd.h103
-rw-r--r--usr/src/cmd/ssh/include/authfile.h36
-rw-r--r--usr/src/cmd/ssh/include/base64.h29
-rw-r--r--usr/src/cmd/ssh/include/bindresvport.h22
-rw-r--r--usr/src/cmd/ssh/include/bsd-arc4random.h48
-rw-r--r--usr/src/cmd/ssh/include/bsd-cray.h65
-rw-r--r--usr/src/cmd/ssh/include/bsd-cygwin_util.h63
-rw-r--r--usr/src/cmd/ssh/include/bsd-getpeereid.h24
-rw-r--r--usr/src/cmd/ssh/include/bsd-misc.h94
-rw-r--r--usr/src/cmd/ssh/include/bsd-snprintf.h28
-rw-r--r--usr/src/cmd/ssh/include/bsd-waitpid.h61
-rw-r--r--usr/src/cmd/ssh/include/bufaux.h75
-rw-r--r--usr/src/cmd/ssh/include/buffer.h60
-rw-r--r--usr/src/cmd/ssh/include/canohost.h41
-rw-r--r--usr/src/cmd/ssh/include/channels.h263
-rw-r--r--usr/src/cmd/ssh/include/cipher.h103
-rw-r--r--usr/src/cmd/ssh/include/clientloop.h60
-rw-r--r--usr/src/cmd/ssh/include/compat.h93
-rw-r--r--usr/src/cmd/ssh/include/compress.h36
-rw-r--r--usr/src/cmd/ssh/include/config.h983
-rw-r--r--usr/src/cmd/ssh/include/crc32.h32
-rw-r--r--usr/src/cmd/ssh/include/deattack.h43
-rw-r--r--usr/src/cmd/ssh/include/defines.h600
-rw-r--r--usr/src/cmd/ssh/include/dh.h60
-rw-r--r--usr/src/cmd/ssh/include/dirname.h25
-rw-r--r--usr/src/cmd/ssh/include/dispatch.h54
-rw-r--r--usr/src/cmd/ssh/include/entropy.h44
-rw-r--r--usr/src/cmd/ssh/include/fake-gai-errnos.h30
-rw-r--r--usr/src/cmd/ssh/include/fake-getaddrinfo.h57
-rw-r--r--usr/src/cmd/ssh/include/fake-getnameinfo.h30
-rw-r--r--usr/src/cmd/ssh/include/fake-socket.h56
-rw-r--r--usr/src/cmd/ssh/include/g11n.h98
-rw-r--r--usr/src/cmd/ssh/include/getcwd.h24
-rw-r--r--usr/src/cmd/ssh/include/getgrouplist.h26
-rw-r--r--usr/src/cmd/ssh/include/getopt.h24
-rw-r--r--usr/src/cmd/ssh/include/getput.h69
-rw-r--r--usr/src/cmd/ssh/include/groupaccess.h46
-rw-r--r--usr/src/cmd/ssh/include/hostfile.h44
-rw-r--r--usr/src/cmd/ssh/include/includes.h216
-rw-r--r--usr/src/cmd/ssh/include/inet_ntoa.h22
-rw-r--r--usr/src/cmd/ssh/include/inet_ntop.h23
-rw-r--r--usr/src/cmd/ssh/include/kex.h202
-rw-r--r--usr/src/cmd/ssh/include/key.h94
-rw-r--r--usr/src/cmd/ssh/include/log.h91
-rw-r--r--usr/src/cmd/ssh/include/loginrec.h156
-rw-r--r--usr/src/cmd/ssh/include/mac.h44
-rw-r--r--usr/src/cmd/ssh/include/match.h37
-rw-r--r--usr/src/cmd/ssh/include/misc.h79
-rw-r--r--usr/src/cmd/ssh/include/mktemp.h23
-rw-r--r--usr/src/cmd/ssh/include/mpaux.h33
-rw-r--r--usr/src/cmd/ssh/include/msg.h43
-rw-r--r--usr/src/cmd/ssh/include/myproposal.h103
-rw-r--r--usr/src/cmd/ssh/include/openbsd-compat.h83
-rw-r--r--usr/src/cmd/ssh/include/packet.h130
-rw-r--r--usr/src/cmd/ssh/include/pathnames.h194
-rw-r--r--usr/src/cmd/ssh/include/port-aix.h45
-rw-r--r--usr/src/cmd/ssh/include/port-irix.h25
-rw-r--r--usr/src/cmd/ssh/include/progressmeter.h43
-rw-r--r--usr/src/cmd/ssh/include/proxy-io.h27
-rw-r--r--usr/src/cmd/ssh/include/readconf.h179
-rw-r--r--usr/src/cmd/ssh/include/readpass.h37
-rw-r--r--usr/src/cmd/ssh/include/readpassphrase.h60
-rw-r--r--usr/src/cmd/ssh/include/realpath.h24
-rw-r--r--usr/src/cmd/ssh/include/rresvport.h22
-rw-r--r--usr/src/cmd/ssh/include/rsa.h37
-rw-r--r--usr/src/cmd/ssh/include/servconf.h190
-rw-r--r--usr/src/cmd/ssh/include/serverloop.h39
-rw-r--r--usr/src/cmd/ssh/include/session.h92
-rw-r--r--usr/src/cmd/ssh/include/setproctitle.h22
-rw-r--r--usr/src/cmd/ssh/include/sftp-client.h109
-rw-r--r--usr/src/cmd/ssh/include/sftp-common.h66
-rw-r--r--usr/src/cmd/ssh/include/sftp.h109
-rw-r--r--usr/src/cmd/ssh/include/sigact.h99
-rw-r--r--usr/src/cmd/ssh/include/ssh-dss.h44
-rw-r--r--usr/src/cmd/ssh/include/ssh-gss.h168
-rw-r--r--usr/src/cmd/ssh/include/ssh-rsa.h44
-rw-r--r--usr/src/cmd/ssh/include/ssh.h133
-rw-r--r--usr/src/cmd/ssh/include/ssh1.h105
-rw-r--r--usr/src/cmd/ssh/include/ssh2.h184
-rw-r--r--usr/src/cmd/ssh/include/sshconnect.h83
-rw-r--r--usr/src/cmd/ssh/include/sshlogin.h40
-rw-r--r--usr/src/cmd/ssh/include/sshpty.h37
-rw-r--r--usr/src/cmd/ssh/include/sshtty.h58
-rw-r--r--usr/src/cmd/ssh/include/strlcat.h22
-rw-r--r--usr/src/cmd/ssh/include/strlcpy.h22
-rw-r--r--usr/src/cmd/ssh/include/strmode.h23
-rw-r--r--usr/src/cmd/ssh/include/sys-queue.h595
-rw-r--r--usr/src/cmd/ssh/include/sys-tree.h682
-rw-r--r--usr/src/cmd/ssh/include/tildexpand.h28
-rw-r--r--usr/src/cmd/ssh/include/ttymodes.h190
-rw-r--r--usr/src/cmd/ssh/include/uidswap.h30
-rw-r--r--usr/src/cmd/ssh/include/uuencode.h45
-rw-r--r--usr/src/cmd/ssh/include/version.h24
-rw-r--r--usr/src/cmd/ssh/include/xlist.h24
-rw-r--r--usr/src/cmd/ssh/include/xmalloc.h39
-rw-r--r--usr/src/cmd/ssh/include/xmmap.h39
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/Makefile.com92
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/common/bindresvport.c127
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/common/bsd-arc4random.c83
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/common/bsd-asprintf.c104
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/common/bsd-cray.c802
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/common/bsd-cygwin_util.c184
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/common/bsd-getpeereid.c80
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/common/bsd-misc.c133
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/common/bsd-snprintf.c746
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/common/bsd-waitpid.c54
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/common/dirname.c82
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/common/fake-getaddrinfo.c123
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/common/fake-getnameinfo.c57
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/common/getcwd.c239
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/common/getopt.c123
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/common/glob.c933
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/common/inet_ntop.c232
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/common/llib-lopenbsd-compat83
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/common/mktemp.c186
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/common/port-aix.c58
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/common/port-irix.c63
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/common/readpassphrase.c192
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/common/realpath.c168
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/common/setproctitle.c104
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/common/sigact.c104
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/common/strlcat.c81
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/common/strlcpy.c77
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/common/strmode.c158
-rw-r--r--usr/src/cmd/ssh/libopenbsd-compat/common/xmmap.c69
-rw-r--r--usr/src/cmd/ssh/libssh/Makefile.com116
-rw-r--r--usr/src/cmd/ssh/libssh/common/addrmatch.c426
-rw-r--r--usr/src/cmd/ssh/libssh/common/atomicio.c118
-rw-r--r--usr/src/cmd/ssh/libssh/common/authfd.c631
-rw-r--r--usr/src/cmd/ssh/libssh/common/authfile.c624
-rw-r--r--usr/src/cmd/ssh/libssh/common/bufaux.c469
-rw-r--r--usr/src/cmd/ssh/libssh/common/buffer.c263
-rw-r--r--usr/src/cmd/ssh/libssh/common/canohost.c391
-rw-r--r--usr/src/cmd/ssh/libssh/common/channels.c3035
-rw-r--r--usr/src/cmd/ssh/libssh/common/cipher-ctr.c172
-rw-r--r--usr/src/cmd/ssh/libssh/common/cipher.c585
-rw-r--r--usr/src/cmd/ssh/libssh/common/compat.c260
-rw-r--r--usr/src/cmd/ssh/libssh/common/compress.c162
-rw-r--r--usr/src/cmd/ssh/libssh/common/crc32.c116
-rw-r--r--usr/src/cmd/ssh/libssh/common/deattack.c178
-rw-r--r--usr/src/cmd/ssh/libssh/common/dh.c289
-rw-r--r--usr/src/cmd/ssh/libssh/common/dispatch.c101
-rw-r--r--usr/src/cmd/ssh/libssh/common/engine.c118
-rw-r--r--usr/src/cmd/ssh/libssh/common/entropy.c156
-rw-r--r--usr/src/cmd/ssh/libssh/common/fatal.c42
-rw-r--r--usr/src/cmd/ssh/libssh/common/g11n.c964
-rw-r--r--usr/src/cmd/ssh/libssh/common/hostfile.c357
-rw-r--r--usr/src/cmd/ssh/libssh/common/kex.c711
-rw-r--r--usr/src/cmd/ssh/libssh/common/kexdh.c89
-rw-r--r--usr/src/cmd/ssh/libssh/common/kexdhc.c144
-rw-r--r--usr/src/cmd/ssh/libssh/common/kexdhs.c144
-rw-r--r--usr/src/cmd/ssh/libssh/common/kexgex.c100
-rw-r--r--usr/src/cmd/ssh/libssh/common/kexgexc.c195
-rw-r--r--usr/src/cmd/ssh/libssh/common/kexgexs.c191
-rw-r--r--usr/src/cmd/ssh/libssh/common/kexgssc.c325
-rw-r--r--usr/src/cmd/ssh/libssh/common/kexgsss.c252
-rw-r--r--usr/src/cmd/ssh/libssh/common/key.c876
-rw-r--r--usr/src/cmd/ssh/libssh/common/llib-lssh130
-rw-r--r--usr/src/cmd/ssh/libssh/common/log.c445
-rw-r--r--usr/src/cmd/ssh/libssh/common/mac.c164
-rw-r--r--usr/src/cmd/ssh/libssh/common/match.c271
-rw-r--r--usr/src/cmd/ssh/libssh/common/misc.c702
-rw-r--r--usr/src/cmd/ssh/libssh/common/mpaux.c48
-rw-r--r--usr/src/cmd/ssh/libssh/common/msg.c75
-rw-r--r--usr/src/cmd/ssh/libssh/common/nchan.c512
-rw-r--r--usr/src/cmd/ssh/libssh/common/packet.c1845
-rw-r--r--usr/src/cmd/ssh/libssh/common/progressmeter.c308
-rw-r--r--usr/src/cmd/ssh/libssh/common/proxy-io.c38
-rw-r--r--usr/src/cmd/ssh/libssh/common/radix.c160
-rw-r--r--usr/src/cmd/ssh/libssh/common/readconf.c1276
-rw-r--r--usr/src/cmd/ssh/libssh/common/readpass.c163
-rw-r--r--usr/src/cmd/ssh/libssh/common/rsa.c146
-rw-r--r--usr/src/cmd/ssh/libssh/common/sftp-common.c227
-rw-r--r--usr/src/cmd/ssh/libssh/common/ssh-dss.c183
-rw-r--r--usr/src/cmd/ssh/libssh/common/ssh-gss.c782
-rw-r--r--usr/src/cmd/ssh/libssh/common/ssh-rsa.c270
-rw-r--r--usr/src/cmd/ssh/libssh/common/tildexpand.c95
-rw-r--r--usr/src/cmd/ssh/libssh/common/ttymodes.c480
-rw-r--r--usr/src/cmd/ssh/libssh/common/uidswap.c257
-rw-r--r--usr/src/cmd/ssh/libssh/common/uuencode.c75
-rw-r--r--usr/src/cmd/ssh/libssh/common/xlist.c85
-rw-r--r--usr/src/cmd/ssh/libssh/common/xmalloc.c86
-rw-r--r--usr/src/cmd/ssh/req.flg10
-rw-r--r--usr/src/cmd/ssh/scp/scp.c1341
-rw-r--r--usr/src/cmd/ssh/sftp-server/sftp-server-main.c51
-rw-r--r--usr/src/cmd/ssh/sftp-server/sftp-server.c1364
-rw-r--r--usr/src/cmd/ssh/sftp/Makefile68
-rw-r--r--usr/src/cmd/ssh/sftp/sftp-client.c1184
-rw-r--r--usr/src/cmd/ssh/sftp/sftp-glob.c153
-rw-r--r--usr/src/cmd/ssh/sftp/sftp.c1690
-rw-r--r--usr/src/cmd/ssh/ssh-add/ssh-add.c394
-rw-r--r--usr/src/cmd/ssh/ssh-agent/Makefile65
-rw-r--r--usr/src/cmd/ssh/ssh-agent/ssh-agent.c1192
-rw-r--r--usr/src/cmd/ssh/ssh-http-proxy-connect/ssh-http-proxy-connect.c284
-rw-r--r--usr/src/cmd/ssh/ssh-keygen/ssh-keygen.c1208
-rw-r--r--usr/src/cmd/ssh/ssh-keyscan/Makefile65
-rw-r--r--usr/src/cmd/ssh/ssh-keyscan/ssh-keyscan.c832
-rw-r--r--usr/src/cmd/ssh/ssh-keysign/Makefile67
-rw-r--r--usr/src/cmd/ssh/ssh-keysign/ssh-keysign.c263
-rw-r--r--usr/src/cmd/ssh/ssh-socks5-proxy-connect/Makefile65
-rw-r--r--usr/src/cmd/ssh/ssh-socks5-proxy-connect/ssh-socks5-proxy-connect.c382
-rw-r--r--usr/src/cmd/ssh/ssh/Makefile76
-rw-r--r--usr/src/cmd/ssh/ssh/clientloop.c1626
-rw-r--r--usr/src/cmd/ssh/ssh/gss-clnt.c191
-rw-r--r--usr/src/cmd/ssh/ssh/ssh.c1245
-rw-r--r--usr/src/cmd/ssh/ssh/sshconnect.c1117
-rw-r--r--usr/src/cmd/ssh/ssh/sshconnect1.c1312
-rw-r--r--usr/src/cmd/ssh/ssh/sshconnect2.c1685
-rw-r--r--usr/src/cmd/ssh/ssh/sshtty.c98
-rw-r--r--usr/src/cmd/ssh/sshd/Makefile119
-rw-r--r--usr/src/cmd/ssh/sshd/altprivsep.c1187
-rw-r--r--usr/src/cmd/ssh/sshd/auth-bsdauth.c126
-rw-r--r--usr/src/cmd/ssh/sshd/auth-chall.c84
-rw-r--r--usr/src/cmd/ssh/sshd/auth-krb4.c370
-rw-r--r--usr/src/cmd/ssh/sshd/auth-krb5.c407
-rw-r--r--usr/src/cmd/ssh/sshd/auth-options.c299
-rw-r--r--usr/src/cmd/ssh/sshd/auth-pam.c617
-rw-r--r--usr/src/cmd/ssh/sshd/auth-passwd.c206
-rw-r--r--usr/src/cmd/ssh/sshd/auth-rh-rsa.c91
-rw-r--r--usr/src/cmd/ssh/sshd/auth-rhosts.c299
-rw-r--r--usr/src/cmd/ssh/sshd/auth-rsa.c328
-rw-r--r--usr/src/cmd/ssh/sshd/auth-sia.c126
-rw-r--r--usr/src/cmd/ssh/sshd/auth-skey.c104
-rw-r--r--usr/src/cmd/ssh/sshd/auth.c794
-rw-r--r--usr/src/cmd/ssh/sshd/auth1.c466
-rw-r--r--usr/src/cmd/ssh/sshd/auth2-chall.c358
-rw-r--r--usr/src/cmd/ssh/sshd/auth2-gss.c533
-rw-r--r--usr/src/cmd/ssh/sshd/auth2-hostbased.c220
-rw-r--r--usr/src/cmd/ssh/sshd/auth2-kbdint.c90
-rw-r--r--usr/src/cmd/ssh/sshd/auth2-none.c139
-rw-r--r--usr/src/cmd/ssh/sshd/auth2-pam.c458
-rw-r--r--usr/src/cmd/ssh/sshd/auth2-passwd.c78
-rw-r--r--usr/src/cmd/ssh/sshd/auth2-pubkey.c359
-rw-r--r--usr/src/cmd/ssh/sshd/auth2.c695
-rw-r--r--usr/src/cmd/ssh/sshd/bsmaudit.c316
-rw-r--r--usr/src/cmd/ssh/sshd/groupaccess.c123
-rw-r--r--usr/src/cmd/ssh/sshd/gss-serv.c516
-rw-r--r--usr/src/cmd/ssh/sshd/loginrec.c1533
-rw-r--r--usr/src/cmd/ssh/sshd/servconf.c1494
-rw-r--r--usr/src/cmd/ssh/sshd/serverloop.c1310
-rw-r--r--usr/src/cmd/ssh/sshd/session.c2641
-rw-r--r--usr/src/cmd/ssh/sshd/sshd.c2051
-rw-r--r--usr/src/cmd/ssh/sshd/sshlogin.c126
-rw-r--r--usr/src/cmd/ssh/sshd/sshpty.c420
-rw-r--r--usr/src/cmd/stat/Makefile11
-rw-r--r--usr/src/cmd/stat/arcstat/Makefile1
-rw-r--r--[-rwxr-xr-x]usr/src/cmd/stat/arcstat/arcstat.pl0
-rw-r--r--usr/src/cmd/stat/vfsstat/Makefile41
-rw-r--r--usr/src/cmd/stat/vfsstat/vfsstat.pl227
-rw-r--r--usr/src/cmd/stat/ziostat/Makefile41
-rwxr-xr-xusr/src/cmd/stat/ziostat/ziostat.pl204
-rw-r--r--usr/src/cmd/svc/configd/configd.h13
-rw-r--r--usr/src/cmd/svc/configd/rc_node.c3
-rw-r--r--usr/src/cmd/svc/milestone/net-routing-setup23
-rw-r--r--usr/src/cmd/svc/milestone/network-location.xml2
-rw-r--r--usr/src/cmd/svc/milestone/network-routing-setup.xml14
-rw-r--r--usr/src/cmd/svc/milestone/network.xml8
-rw-r--r--usr/src/cmd/svc/milestone/single-user.xml2
-rw-r--r--usr/src/cmd/svc/shell/smf_include.sh6
-rw-r--r--usr/src/cmd/svc/startd/graph.c26
-rw-r--r--usr/src/cmd/svc/startd/method.c96
-rw-r--r--usr/src/cmd/svc/startd/startd.h19
-rw-r--r--usr/src/cmd/svc/svcadm/Makefile7
-rw-r--r--usr/src/cmd/svc/svcadm/svcadm.c15
-rw-r--r--usr/src/cmd/svc/svccfg/svccfg.h13
-rw-r--r--usr/src/cmd/svc/svcs/Makefile2
-rw-r--r--usr/src/cmd/svc/svcs/explain.c16
-rw-r--r--usr/src/cmd/svc/svcs/svcs.c94
-rw-r--r--usr/src/cmd/svr4pkg/pkgadd/Makefile2
-rw-r--r--usr/src/cmd/svr4pkg/pkgadm/Makefile2
-rw-r--r--usr/src/cmd/tail/Makefile5
-rw-r--r--usr/src/cmd/truss/print.c14
-rw-r--r--usr/src/cmd/truss/systable.c8
-rw-r--r--usr/src/cmd/varpd/Makefile74
-rw-r--r--usr/src/cmd/varpd/svc-varpd32
-rw-r--r--usr/src/cmd/varpd/varpd.c526
-rw-r--r--usr/src/cmd/varpd/varpd.xml60
-rw-r--r--usr/src/cmd/vi/port/Makefile3
-rw-r--r--usr/src/cmd/vi/port/ex_cmdsub.c2
-rw-r--r--usr/src/cmd/vndadm/Makefile65
-rw-r--r--usr/src/cmd/vndadm/test/Makefile19
-rw-r--r--usr/src/cmd/vndadm/test/Makefile.com43
-rw-r--r--usr/src/cmd/vndadm/test/Makefile.subdirs29
-rw-r--r--usr/src/cmd/vndadm/test/Makefile.targ59
-rw-r--r--usr/src/cmd/vndadm/test/scripts/Makefile28
-rwxr-xr-xusr/src/cmd/vndadm/test/scripts/vndtest.ksh300
-rw-r--r--usr/src/cmd/vndadm/test/tst/Makefile18
-rw-r--r--usr/src/cmd/vndadm/test/tst/cmd/Makefile34
-rw-r--r--usr/src/cmd/vndadm/test/tst/cmd/cmd.common.ksh33
-rw-r--r--usr/src/cmd/vndadm/test/tst/cmd/create.list.ksh30
-rw-r--r--usr/src/cmd/vndadm/test/tst/cmd/create.list.ksh.out2
-rw-r--r--usr/src/cmd/vndadm/test/tst/cmd/create.sdev.ksh25
-rw-r--r--usr/src/cmd/vndadm/test/tst/cmd/create.setbuf.ksh34
-rw-r--r--usr/src/cmd/vndadm/test/tst/cmd/ecreate.destroy.ksh25
-rw-r--r--usr/src/cmd/vndadm/test/tst/cmd/ecreate.setbadprop.ksh24
-rw-r--r--usr/src/cmd/vndadm/test/tst/cmd/ecreate.setbadvalue.ksh24
-rw-r--r--usr/src/cmd/vndadm/test/tst/cmd/ecreate.setbuftoobig.ksh24
-rw-r--r--usr/src/cmd/vndadm/test/tst/cmd/ecreate.setrdonlyprop.ksh24
-rw-r--r--usr/src/cmd/vndadm/test/tst/dld/Makefile27
-rw-r--r--usr/src/cmd/vndadm/test/tst/dld/create.reuse.ksh31
-rw-r--r--usr/src/cmd/vndadm/test/tst/dld/dld.common.ksh29
-rw-r--r--usr/src/cmd/vndadm/test/tst/dld/ecreate.ipfirst.ksh27
-rw-r--r--usr/src/cmd/vndadm/test/tst/dld/ecreate.vndfirst.ksh27
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/Makefile49
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.attach.c63
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.attachnolink.c67
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.badlinkname.c119
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.doublelink.c82
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.gioctlattach.c69
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.link.c76
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.linkexists.c90
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.ngioctlfault.c96
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv1.c69
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv2.c69
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv3.c70
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv4.c75
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv5.c77
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.olink.c77
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.olinknopriv.c83
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/create.rmenolink.c69
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/tst.attachrdonly.c63
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/tst.badioctl.c79
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/tst.basicopenctl.c76
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/tst.gioctlfault.c78
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/tst.gioctlnattach.c100
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/tst.iocsize.ksh54
-rw-r--r--usr/src/cmd/vndadm/test/tst/ioctl/tst.openctlbadflags.c88
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/Makefile44
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/create.badlink.c39
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/create.badpropid.c76
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/create.badpropsize.c63
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/create.badzone.c43
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/create.basic.c49
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/create.enomem.c91
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/create.frameioeagain.c80
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/create.open.c56
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/create.propiter.c79
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/create.proprdonly.c63
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/err.badclose.c33
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/tst.badopen.c49
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/tst.strerror.c30
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/tst.strerror.exe.out37
-rw-r--r--usr/src/cmd/vndadm/test/tst/lib/tst.strsyserror.c50
-rw-r--r--usr/src/cmd/vndadm/vndadm.c872
-rw-r--r--usr/src/cmd/vndstat/Makefile33
-rw-r--r--usr/src/cmd/vndstat/vndstat.c542
-rw-r--r--usr/src/cmd/zfs/zfs_main.c148
-rw-r--r--usr/src/cmd/zlogin/zlogin.c587
-rw-r--r--usr/src/cmd/zoneadm/Makefile11
-rw-r--r--usr/src/cmd/zoneadm/svc-zones2
-rw-r--r--usr/src/cmd/zoneadm/zfs.c16
-rw-r--r--usr/src/cmd/zoneadm/zoneadm.c487
-rw-r--r--usr/src/cmd/zoneadm/zones.xml10
-rw-r--r--usr/src/cmd/zoneadmd/Makefile49
-rw-r--r--usr/src/cmd/zoneadmd/Makefile.com70
-rw-r--r--usr/src/cmd/zoneadmd/amd64/Makefile31
-rw-r--r--usr/src/cmd/zoneadmd/i386/Makefile30
-rw-r--r--usr/src/cmd/zoneadmd/mcap.c1182
-rw-r--r--usr/src/cmd/zoneadmd/vplat.c568
-rw-r--r--usr/src/cmd/zoneadmd/zcons.c144
-rw-r--r--usr/src/cmd/zoneadmd/zfd.c1428
-rw-r--r--usr/src/cmd/zoneadmd/zoneadmd.c696
-rw-r--r--usr/src/cmd/zoneadmd/zoneadmd.h27
-rw-r--r--usr/src/cmd/zonecfg/Makefile4
-rw-r--r--usr/src/cmd/zonecfg/zonecfg.c926
-rw-r--r--usr/src/cmd/zonecfg/zonecfg.h13
-rw-r--r--usr/src/cmd/zonecfg/zonecfg_grammar.y89
-rw-r--r--usr/src/cmd/zonecfg/zonecfg_lex.l29
-rw-r--r--usr/src/cmd/zonename/Makefile10
-rw-r--r--usr/src/cmd/zonestat/zonestatd/zonestatd.c11
-rw-r--r--usr/src/cmd/zpool/zpool_main.c1
-rw-r--r--usr/src/common/brand/lx/lx_auxv.c85
-rw-r--r--usr/src/common/brand/lx/lx_auxv.h32
-rw-r--r--usr/src/common/brand/lx/lx_errno.c206
-rw-r--r--usr/src/common/brand/lx/lx_errno.h29
-rw-r--r--usr/src/common/brand/lx/lx_signum.c339
-rw-r--r--usr/src/common/brand/lx/lx_signum.h114
-rw-r--r--usr/src/common/brand/lx/lx_syscall.h95
-rw-r--r--usr/src/common/brand/lx/tools/Makefile42
-rw-r--r--usr/src/common/brand/lx/tools/README.md39
-rw-r--r--usr/src/common/brand/lx/tools/gen_errno.c444
-rw-r--r--usr/src/common/ctf/ctf_create.c841
-rw-r--r--usr/src/common/ctf/ctf_error.c12
-rw-r--r--usr/src/common/ctf/ctf_hash.c3
-rw-r--r--usr/src/common/ctf/ctf_impl.h90
-rw-r--r--usr/src/common/ctf/ctf_open.c40
-rw-r--r--usr/src/common/ctf/ctf_types.c329
-rw-r--r--usr/src/common/ctf/ctf_util.c43
-rw-r--r--usr/src/common/dis/i386/dis_tables.c80
-rw-r--r--usr/src/common/elfcap/elfcap.c8
-rw-r--r--usr/src/common/elfcap/elfcap.h2
-rw-r--r--usr/src/common/idspace/id_space.c (renamed from usr/src/uts/common/os/id_space.c)35
-rw-r--r--usr/src/common/inet/inet_hash.c359
-rw-r--r--usr/src/common/zfs/zfs_prop.c17
-rw-r--r--usr/src/common/zfs/zfs_prop.h2
-rw-r--r--usr/src/grub/Makefile6
-rw-r--r--usr/src/grub/Makefile.grub1
-rw-r--r--usr/src/grub/grub-0.97/stage2/boot.c13
-rw-r--r--usr/src/grub/grub-0.97/stage2/cmdline.c23
-rw-r--r--usr/src/head/Makefile2
-rw-r--r--usr/src/head/libzonecfg.h63
-rw-r--r--usr/src/head/limits.h4
-rw-r--r--usr/src/head/regexp.h12
-rw-r--r--usr/src/head/resolv_joy.h472
-rw-r--r--usr/src/head/zdoor.h74
-rw-r--r--usr/src/head/zone.h9
-rw-r--r--usr/src/lib/Makefile42
-rw-r--r--usr/src/lib/Makefile.astmsg4
-rw-r--r--usr/src/lib/Makefile.lib32
-rw-r--r--usr/src/lib/Makefile.targ8
-rw-r--r--usr/src/lib/Makefile.usdt28
-rw-r--r--usr/src/lib/brand/Makefile3
-rw-r--r--usr/src/lib/brand/lx/Makefile (renamed from usr/src/cmd/ssh/libssh/Makefile)40
-rw-r--r--usr/src/lib/brand/lx/Makefile.lx (renamed from usr/src/cmd/ssh/etc/ssh-askpass)17
-rw-r--r--usr/src/lib/brand/lx/librtld_db/Makefile (renamed from usr/src/cmd/ssh/ssh-add/Makefile)51
-rw-r--r--usr/src/lib/brand/lx/librtld_db/Makefile.com83
-rw-r--r--usr/src/lib/brand/lx/librtld_db/amd64/Makefile38
-rw-r--r--usr/src/lib/brand/lx/librtld_db/amd64/mapfile-vers (renamed from usr/src/cmd/ssh/Makefile.msg.targ)33
-rw-r--r--usr/src/lib/brand/lx/librtld_db/common/lx_librtld_db.c852
-rw-r--r--usr/src/lib/brand/lx/librtld_db/common/mapfile-vers (renamed from usr/src/tools/ctf/dump/Makefile.com)71
-rw-r--r--usr/src/lib/brand/lx/librtld_db/i386/Makefile33
-rw-r--r--usr/src/lib/brand/lx/lx_brand/Makefile49
-rw-r--r--usr/src/lib/brand/lx/lx_brand/Makefile.com103
-rw-r--r--usr/src/lib/brand/lx/lx_brand/amd64/Makefile42
-rw-r--r--usr/src/lib/brand/lx/lx_brand/amd64/lx_crt.s62
-rw-r--r--usr/src/lib/brand/lx/lx_brand/amd64/lx_handler.s53
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/aio.c556
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/capabilities.c507
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/clock.c344
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/clone.c702
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/debug.c179
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/dir.c82
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/fcntl.c165
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/file.c582
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/fork.c183
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/id.c271
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/lx_brand.c1618
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/lx_provider.d39
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/mapfile47
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/mapfile-vers47
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/mem.c814
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/misc.c795
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/module.c90
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/mount.c1071
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/mount_nfs.c2317
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/pgrp.c172
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/priority.c117
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/ptrace.c105
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/sched.c622
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/sendfile.c145
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/signal.c2381
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/socket.c349
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/stack.c338
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/statfs.c346
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/sysctl.c137
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/sysv_ipc.c975
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/time.c132
-rw-r--r--usr/src/lib/brand/lx/lx_brand/common/truncate.c173
-rw-r--r--usr/src/lib/brand/lx/lx_brand/i386/Makefile48
-rw-r--r--usr/src/lib/brand/lx/lx_brand/i386/lx_crt.s65
-rw-r--r--usr/src/lib/brand/lx/lx_brand/i386/lx_handler.s91
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_aio.h80
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_debug.h (renamed from usr/src/cmd/ssh/sshd/bsmaudit.h)34
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h206
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_mount.h163
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_poll.h65
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h289
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_sigstack.h78
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_socket.h147
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_statfs.h83
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h301
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_sysv_ipc.h222
-rw-r--r--usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h80
-rw-r--r--usr/src/lib/brand/lx/lx_init/Makefile (renamed from usr/src/cmd/ssh/ssh-keygen/Makefile)62
-rw-r--r--usr/src/lib/brand/lx/lx_init/lxinit.c872
-rw-r--r--usr/src/lib/brand/lx/lx_init/pipe_stream.c326
-rw-r--r--usr/src/lib/brand/lx/lx_init/pipe_stream.h48
-rw-r--r--usr/src/lib/brand/lx/lx_init/run_command.c281
-rw-r--r--usr/src/lib/brand/lx/lx_init/run_command.h32
-rw-r--r--usr/src/lib/brand/lx/lx_support/Makefile (renamed from usr/src/cmd/ssh/sftp-server/Makefile)40
-rw-r--r--usr/src/lib/brand/lx/lx_support/lx_support.c368
-rw-r--r--usr/src/lib/brand/lx/lx_vdso/Makefile39
-rw-r--r--usr/src/lib/brand/lx/lx_vdso/Makefile.com85
-rw-r--r--usr/src/lib/brand/lx/lx_vdso/amd64/Makefile39
-rw-r--r--usr/src/lib/brand/lx/lx_vdso/amd64/vdso_subr.s68
-rw-r--r--usr/src/lib/brand/lx/lx_vdso/common/mapfile-vers59
-rw-r--r--usr/src/lib/brand/lx/lx_vdso/common/vdso_main.c159
-rw-r--r--usr/src/lib/brand/lx/lx_vdso/i386/Makefile43
-rw-r--r--usr/src/lib/brand/lx/lx_vdso/i386/vdso_subr.s92
-rw-r--r--usr/src/lib/brand/lx/lx_vdso/tools/Makefile42
-rw-r--r--usr/src/lib/brand/lx/lx_vdso/tools/vdso_tool.c388
-rw-r--r--usr/src/lib/brand/lx/netfiles/Makefile47
-rw-r--r--usr/src/lib/brand/lx/testing/Makefile36
-rw-r--r--usr/src/lib/brand/lx/testing/Readme_ltp291
-rw-r--r--usr/src/lib/brand/lx/testing/ltp_skiplist258
-rw-r--r--usr/src/lib/brand/lx/testing/ltp_tests1
-rw-r--r--usr/src/lib/brand/lx/zone/Makefile70
-rw-r--r--usr/src/lib/brand/lx/zone/SUNWlx.xml34
-rw-r--r--usr/src/lib/brand/lx/zone/SUNWlx26.xml35
-rw-r--r--usr/src/lib/brand/lx/zone/config.xml104
-rw-r--r--usr/src/lib/brand/lx/zone/lx_boot.ksh92
-rw-r--r--usr/src/lib/brand/lx/zone/lx_boot_zone_busybox.ksh168
-rw-r--r--usr/src/lib/brand/lx/zone/lx_boot_zone_debian.ksh172
-rw-r--r--usr/src/lib/brand/lx/zone/lx_boot_zone_redhat.ksh361
-rw-r--r--usr/src/lib/brand/lx/zone/lx_boot_zone_ubuntu.ksh146
-rw-r--r--usr/src/lib/brand/lx/zone/lx_install.ksh194
-rw-r--r--usr/src/lib/brand/lx/zone/platform.xml159
-rw-r--r--usr/src/lib/brand/shared/zone/common.ksh98
-rw-r--r--usr/src/lib/brand/shared/zone/uninstall.ksh50
-rw-r--r--usr/src/lib/common/i386/crtn.s1
-rw-r--r--usr/src/lib/commpage/Makefile.shared.com27
-rw-r--r--usr/src/lib/commpage/Makefile.shared.targ26
-rw-r--r--usr/src/lib/commpage/amd64/cp_subr.s140
-rw-r--r--usr/src/lib/commpage/common/cp_defs.h36
-rw-r--r--usr/src/lib/commpage/common/cp_main.c185
-rw-r--r--usr/src/lib/commpage/i386/cp_subr.s153
-rw-r--r--usr/src/lib/fm/topo/libtopo/common/libtopo.h5
-rw-r--r--usr/src/lib/fm/topo/libtopo/common/mapfile-vers2
-rw-r--r--usr/src/lib/fm/topo/libtopo/common/topo_mod.h5
-rw-r--r--usr/src/lib/fm/topo/libtopo/common/topo_string.c64
-rwxr-xr-x[-rw-r--r--]usr/src/lib/fm/topo/maps/Joyent,Joyent-Compute-Platform-1101/Joyent-Compute-Platform-1101-disk-hc-topology.xmlgenksh10
-rw-r--r--usr/src/lib/fm/topo/maps/i86pc/i86pc-legacy-hc-topology.xml4
-rw-r--r--usr/src/lib/fm/topo/modules/common/disk/disk_mptsas.c65
-rw-r--r--usr/src/lib/fm/topo/modules/common/fac_prov_mptsas/fac_prov_mptsas.c51
-rw-r--r--usr/src/lib/krb5/plugins/preauth/pkinit/Makefile.com2
-rw-r--r--usr/src/lib/libbc/inc/include/sys/socket.h4
-rw-r--r--usr/src/lib/libbe/common/be_list.c1
-rw-r--r--usr/src/lib/libbrand/common/libbrand.c39
-rw-r--r--usr/src/lib/libbrand/common/libbrand.h6
-rw-r--r--usr/src/lib/libbsm/common/adt.c102
-rw-r--r--usr/src/lib/libbunyan/Makefile42
-rw-r--r--usr/src/lib/libbunyan/Makefile.com36
-rw-r--r--usr/src/lib/libbunyan/amd64/Makefile19
-rw-r--r--usr/src/lib/libbunyan/common/bunyan.c913
-rw-r--r--usr/src/lib/libbunyan/common/bunyan.h88
-rw-r--r--usr/src/lib/libbunyan/common/bunyan_provider.d32
-rw-r--r--usr/src/lib/libbunyan/common/llib-lbunyan19
-rw-r--r--usr/src/lib/libbunyan/common/mapfile-vers53
-rw-r--r--usr/src/lib/libbunyan/i386/Makefile18
-rw-r--r--usr/src/lib/libbunyan/sparc/Makefile18
-rw-r--r--usr/src/lib/libbunyan/sparcv9/Makefile19
-rw-r--r--usr/src/lib/libc/Makefile.targ5
-rw-r--r--usr/src/lib/libc/amd64/Makefile9
-rw-r--r--usr/src/lib/libc/amd64/gen/siginfolst.c2
-rw-r--r--usr/src/lib/libc/amd64/sys/__clock_gettime_sys.s (renamed from usr/src/lib/libc/amd64/sys/__clock_gettime.s)17
-rw-r--r--usr/src/lib/libc/i386/Makefile.com9
-rw-r--r--usr/src/lib/libc/i386/gen/siginfolst.c2
-rw-r--r--usr/src/lib/libc/i386/sys/__clock_gettime.c45
-rw-r--r--usr/src/lib/libc/i386/sys/__clock_gettime_sys.s (renamed from usr/src/lib/libc/i386/sys/__clock_gettime.s)18
-rw-r--r--usr/src/lib/libc/inc/thr_inlines.h12
-rw-r--r--usr/src/lib/libc/inc/thr_uberdata.h6
-rw-r--r--usr/src/lib/libc/port/gen/getauxv.c15
-rw-r--r--usr/src/lib/libc/port/gen/getenv.c11
-rw-r--r--usr/src/lib/libc/port/gen/psiginfo.c32
-rw-r--r--usr/src/lib/libc/port/gen/psignal.c9
-rw-r--r--usr/src/lib/libc/port/gen/sh_locks.c2
-rw-r--r--usr/src/lib/libc/port/gen/siglist.c2
-rw-r--r--usr/src/lib/libc/port/gen/str2sig.c3
-rw-r--r--usr/src/lib/libc/port/llib-lc1
-rw-r--r--usr/src/lib/libc/port/mapfile-vers7
-rw-r--r--usr/src/lib/libc/port/sys/inotify.c142
-rw-r--r--usr/src/lib/libc/port/sys/zone.c13
-rw-r--r--usr/src/lib/libc/port/threads/sigaction.c74
-rw-r--r--usr/src/lib/libc/port/threads/thr.c34
-rw-r--r--usr/src/lib/libc/sparc/Makefile.com1
-rw-r--r--usr/src/lib/libc/sparc/gen/siginfolst.c2
-rw-r--r--usr/src/lib/libc/sparcv9/Makefile.com1
-rw-r--r--usr/src/lib/libc/sparcv9/gen/siginfolst.c2
-rw-r--r--usr/src/lib/libctf/Makefile.com41
-rw-r--r--usr/src/lib/libctf/Makefile.shared.com90
-rw-r--r--usr/src/lib/libctf/Makefile.shared.targ30
-rw-r--r--usr/src/lib/libctf/common/ctf_convert.c210
-rw-r--r--usr/src/lib/libctf/common/ctf_diff.c1362
-rw-r--r--usr/src/lib/libctf/common/ctf_dwarf.c2956
-rw-r--r--usr/src/lib/libctf/common/ctf_elfwrite.c422
-rw-r--r--usr/src/lib/libctf/common/ctf_lib.c325
-rw-r--r--usr/src/lib/libctf/common/ctf_merge.c1570
-rw-r--r--usr/src/lib/libctf/common/ctf_subr.c28
-rw-r--r--usr/src/lib/libctf/common/libctf.h48
-rw-r--r--usr/src/lib/libctf/common/libctf_impl.h59
-rw-r--r--usr/src/lib/libctf/common/mapfile-vers36
-rw-r--r--usr/src/lib/libcurses/screen/setupterm.c7
-rw-r--r--usr/src/lib/libdhcpagent/common/dhcpagent_util.c9
-rw-r--r--usr/src/lib/libdladm/Makefile13
-rw-r--r--usr/src/lib/libdladm/Makefile.com9
-rw-r--r--usr/src/lib/libdladm/common/libdladm.c85
-rw-r--r--usr/src/lib/libdladm/common/libdladm.h24
-rw-r--r--usr/src/lib/libdladm/common/libdladm_impl.h7
-rw-r--r--usr/src/lib/libdladm/common/libdllink.c28
-rw-r--r--usr/src/lib/libdladm/common/libdllink.h6
-rw-r--r--usr/src/lib/libdladm/common/libdlmgmt.c19
-rw-r--r--usr/src/lib/libdladm/common/libdloverlay.c887
-rw-r--r--usr/src/lib/libdladm/common/libdloverlay.h107
-rw-r--r--usr/src/lib/libdladm/common/libdlvlan.c2
-rw-r--r--usr/src/lib/libdladm/common/libdlvnic.c38
-rw-r--r--usr/src/lib/libdladm/common/libdlvnic.h3
-rw-r--r--usr/src/lib/libdladm/common/linkprop.c69
-rw-r--r--usr/src/lib/libdladm/common/llib-ldladm2
-rw-r--r--usr/src/lib/libdladm/common/mapfile-vers19
-rw-r--r--usr/src/lib/libdlpi/common/libdlpi.c61
-rw-r--r--usr/src/lib/libdlpi/common/libdlpi.h2
-rw-r--r--usr/src/lib/libdlpi/common/libdlpi_impl.h3
-rw-r--r--usr/src/lib/libdlpi/common/mapfile-vers5
-rw-r--r--usr/src/lib/libdns_sd/java/com/apple/dnssd/DNSSDRecordRegistrar.java12
-rw-r--r--usr/src/lib/libdtrace/Makefile.com10
-rw-r--r--usr/src/lib/libdtrace/common/dt_decl.c4
-rw-r--r--usr/src/lib/libdtrace/common/dt_impl.h1
-rw-r--r--usr/src/lib/libdtrace/common/dt_module.c92
-rw-r--r--usr/src/lib/libdtrace/common/dt_open.c41
-rw-r--r--usr/src/lib/libdtrace/common/dt_options.c73
-rw-r--r--usr/src/lib/libdtrace/common/dt_parser.c4
-rw-r--r--usr/src/lib/libdtrace/common/dt_program.c9
-rw-r--r--usr/src/lib/libdtrace/common/dt_work.c32
-rw-r--r--usr/src/lib/libdtrace/common/dtrace.h1
-rw-r--r--usr/src/lib/libdtrace/common/mac.d.in66
-rw-r--r--usr/src/lib/libdtrace/common/mac.sed.in45
-rw-r--r--usr/src/lib/libdtrace/common/vnd.d28
-rw-r--r--usr/src/lib/libdwarf/Makefile40
-rw-r--r--usr/src/lib/libdwarf/Makefile.com92
-rw-r--r--usr/src/lib/libdwarf/THIRDPARTYLICENSE (renamed from usr/src/tools/ctf/dwarf/THIRDPARTYLICENSE)0
-rw-r--r--usr/src/lib/libdwarf/THIRDPARTYLICENSE.descrip (renamed from usr/src/tools/ctf/dwarf/THIRDPARTYLICENSE.descrip)0
-rw-r--r--usr/src/lib/libdwarf/amd64/Makefile19
-rw-r--r--usr/src/lib/libdwarf/common/cmplrs/dwarf_addr_finder.h (renamed from usr/src/tools/ctf/dwarf/common/cmplrs/dwarf_addr_finder.h)0
-rw-r--r--usr/src/lib/libdwarf/common/config.h (renamed from usr/src/tools/ctf/dwarf/common/config.h)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf.h (renamed from usr/src/tools/ctf/dwarf/common/dwarf.h)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_abbrev.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_abbrev.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_abbrev.h (renamed from usr/src/tools/ctf/dwarf/common/dwarf_abbrev.h)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_addr_finder.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_addr_finder.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_alloc.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_alloc.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_alloc.h (renamed from usr/src/tools/ctf/dwarf/common/dwarf_alloc.h)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_arange.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_arange.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_arange.h (renamed from usr/src/tools/ctf/dwarf/common/dwarf_arange.h)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_base_types.h (renamed from usr/src/tools/ctf/dwarf/common/dwarf_base_types.h)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_die_deliv.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_die_deliv.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_die_deliv.h (renamed from usr/src/tools/ctf/dwarf/common/dwarf_die_deliv.h)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_elf_access.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_elf_access.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_elf_access.h (renamed from usr/src/tools/ctf/dwarf/common/dwarf_elf_access.h)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_error.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_error.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_error.h (renamed from usr/src/tools/ctf/dwarf/common/dwarf_error.h)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_form.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_form.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_frame.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_frame.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_frame.h (renamed from usr/src/tools/ctf/dwarf/common/dwarf_frame.h)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_frame2.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_frame2.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_frame3.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_frame3.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_funcs.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_funcs.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_funcs.h (renamed from usr/src/tools/ctf/dwarf/common/dwarf_funcs.h)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_global.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_global.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_global.h (renamed from usr/src/tools/ctf/dwarf/common/dwarf_global.h)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_harmless.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_harmless.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_harmless.h (renamed from usr/src/tools/ctf/dwarf/common/dwarf_harmless.h)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_incl.h (renamed from usr/src/tools/ctf/dwarf/common/dwarf_incl.h)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_init_finish.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_init_finish.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_leb.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_leb.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_line.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_line.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_line.h (renamed from usr/src/tools/ctf/dwarf/common/dwarf_line.h)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_line2.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_line2.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_loc.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_loc.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_loc.h (renamed from usr/src/tools/ctf/dwarf/common/dwarf_loc.h)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_macro.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_macro.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_macro.h (renamed from usr/src/tools/ctf/dwarf/common/dwarf_macro.h)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_names.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_names.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_names.h (renamed from usr/src/tools/ctf/dwarf/common/dwarf_names.h)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_opaque.h (renamed from usr/src/tools/ctf/dwarf/common/dwarf_opaque.h)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_original_elf_init.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_original_elf_init.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_print_lines.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_print_lines.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_pubtypes.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_pubtypes.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_query.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_query.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_ranges.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_ranges.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_sort_line.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_sort_line.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_string.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_string.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_stubs.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_stubs.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_types.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_types.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_types.h (renamed from usr/src/tools/ctf/dwarf/common/dwarf_types.h)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_util.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_util.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_util.h (renamed from usr/src/tools/ctf/dwarf/common/dwarf_util.h)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_vars.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_vars.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_vars.h (renamed from usr/src/tools/ctf/dwarf/common/dwarf_vars.h)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_weaks.c (renamed from usr/src/tools/ctf/dwarf/common/dwarf_weaks.c)0
-rw-r--r--usr/src/lib/libdwarf/common/dwarf_weaks.h (renamed from usr/src/tools/ctf/dwarf/common/dwarf_weaks.h)0
-rw-r--r--usr/src/lib/libdwarf/common/libdwarf.h (renamed from usr/src/tools/ctf/dwarf/common/libdwarf.h)0
-rw-r--r--usr/src/lib/libdwarf/common/libdwarfdefs.h (renamed from usr/src/tools/ctf/dwarf/common/libdwarfdefs.h)0
-rw-r--r--usr/src/lib/libdwarf/common/malloc_check.c (renamed from usr/src/tools/ctf/dwarf/common/malloc_check.c)0
-rw-r--r--usr/src/lib/libdwarf/common/malloc_check.h (renamed from usr/src/tools/ctf/dwarf/common/malloc_check.h)0
-rw-r--r--usr/src/lib/libdwarf/common/mapfile-vers (renamed from usr/src/tools/ctf/dwarf/common/mapfile-vers)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_alloc.c (renamed from usr/src/tools/ctf/dwarf/common/pro_alloc.c)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_alloc.h (renamed from usr/src/tools/ctf/dwarf/common/pro_alloc.h)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_arange.c (renamed from usr/src/tools/ctf/dwarf/common/pro_arange.c)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_arange.h (renamed from usr/src/tools/ctf/dwarf/common/pro_arange.h)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_die.c (renamed from usr/src/tools/ctf/dwarf/common/pro_die.c)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_die.h (renamed from usr/src/tools/ctf/dwarf/common/pro_die.h)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_encode_nm.c (renamed from usr/src/tools/ctf/dwarf/common/pro_encode_nm.c)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_encode_nm.h (renamed from usr/src/tools/ctf/dwarf/common/pro_encode_nm.h)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_error.c (renamed from usr/src/tools/ctf/dwarf/common/pro_error.c)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_error.h (renamed from usr/src/tools/ctf/dwarf/common/pro_error.h)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_expr.c (renamed from usr/src/tools/ctf/dwarf/common/pro_expr.c)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_expr.h (renamed from usr/src/tools/ctf/dwarf/common/pro_expr.h)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_finish.c (renamed from usr/src/tools/ctf/dwarf/common/pro_finish.c)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_forms.c (renamed from usr/src/tools/ctf/dwarf/common/pro_forms.c)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_frame.c (renamed from usr/src/tools/ctf/dwarf/common/pro_frame.c)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_frame.h (renamed from usr/src/tools/ctf/dwarf/common/pro_frame.h)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_funcs.c (renamed from usr/src/tools/ctf/dwarf/common/pro_funcs.c)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_incl.h (renamed from usr/src/tools/ctf/dwarf/common/pro_incl.h)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_init.c (renamed from usr/src/tools/ctf/dwarf/common/pro_init.c)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_line.c (renamed from usr/src/tools/ctf/dwarf/common/pro_line.c)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_line.h (renamed from usr/src/tools/ctf/dwarf/common/pro_line.h)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_macinfo.c (renamed from usr/src/tools/ctf/dwarf/common/pro_macinfo.c)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_macinfo.h (renamed from usr/src/tools/ctf/dwarf/common/pro_macinfo.h)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_opaque.h (renamed from usr/src/tools/ctf/dwarf/common/pro_opaque.h)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_pubnames.c (renamed from usr/src/tools/ctf/dwarf/common/pro_pubnames.c)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_reloc.c (renamed from usr/src/tools/ctf/dwarf/common/pro_reloc.c)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_reloc.h (renamed from usr/src/tools/ctf/dwarf/common/pro_reloc.h)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_reloc_stream.c (renamed from usr/src/tools/ctf/dwarf/common/pro_reloc_stream.c)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_reloc_stream.h (renamed from usr/src/tools/ctf/dwarf/common/pro_reloc_stream.h)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_reloc_symbolic.c (renamed from usr/src/tools/ctf/dwarf/common/pro_reloc_symbolic.c)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_reloc_symbolic.h (renamed from usr/src/tools/ctf/dwarf/common/pro_reloc_symbolic.h)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_section.c (renamed from usr/src/tools/ctf/dwarf/common/pro_section.c)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_section.h (renamed from usr/src/tools/ctf/dwarf/common/pro_section.h)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_types.c (renamed from usr/src/tools/ctf/dwarf/common/pro_types.c)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_types.h (renamed from usr/src/tools/ctf/dwarf/common/pro_types.h)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_util.h (renamed from usr/src/tools/ctf/dwarf/common/pro_util.h)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_vars.c (renamed from usr/src/tools/ctf/dwarf/common/pro_vars.c)0
-rw-r--r--usr/src/lib/libdwarf/common/pro_weaks.c (renamed from usr/src/tools/ctf/dwarf/common/pro_weaks.c)0
-rw-r--r--usr/src/lib/libdwarf/i386/Makefile18
-rw-r--r--usr/src/lib/libdwarf/sparc/Makefile18
-rw-r--r--usr/src/lib/libdwarf/sparcv9/Makefile19
-rw-r--r--usr/src/lib/libgrubmgmt/common/libgrub_fs.c2
-rw-r--r--usr/src/lib/libidspace/Makefile45
-rw-r--r--usr/src/lib/libidspace/Makefile.com42
-rw-r--r--usr/src/lib/libidspace/amd64/Makefile19
-rw-r--r--usr/src/lib/libidspace/common/libidspace.c25
-rw-r--r--usr/src/lib/libidspace/common/libidspace.h42
-rw-r--r--usr/src/lib/libidspace/common/llib-lidspace19
-rw-r--r--usr/src/lib/libidspace/common/mapfile-vers47
-rw-r--r--usr/src/lib/libidspace/i386/Makefile18
-rw-r--r--usr/src/lib/libidspace/sparc/Makefile18
-rw-r--r--usr/src/lib/libidspace/sparcv9/Makefile19
-rw-r--r--usr/src/lib/libipadm/common/libipadm.c27
-rw-r--r--usr/src/lib/libkmf/plugins/kmf_openssl/Makefile.com4
-rw-r--r--usr/src/lib/libnisdb/db_mindex3.cc4
-rw-r--r--usr/src/lib/libnisdb/db_table.cc4
-rw-r--r--usr/src/lib/libnisdb/nis_db.cc8
-rw-r--r--usr/src/lib/libnsl/common/mapfile-vers2
-rw-r--r--usr/src/lib/libnsl/netselect/netselect.c99
-rw-r--r--usr/src/lib/libnvpair/libnvpair.h2
-rw-r--r--usr/src/lib/libnvpair/mapfile-vers2
-rw-r--r--usr/src/lib/libnvpair/nvpair_json.c284
-rw-r--r--usr/src/lib/libpkg/Makefile.com2
-rw-r--r--usr/src/lib/libproc/common/Pcontrol.c11
-rw-r--r--usr/src/lib/libproc/common/Pcontrol.h7
-rw-r--r--usr/src/lib/libproc/common/Pcore.c1
-rw-r--r--usr/src/lib/libproc/common/Pidle.c1
-rw-r--r--usr/src/lib/libproc/common/Psymtab.c486
-rw-r--r--usr/src/lib/librename/Makefile44
-rw-r--r--usr/src/lib/librename/Makefile.com34
-rw-r--r--usr/src/lib/librename/amd64/Makefile19
-rw-r--r--usr/src/lib/librename/common/librename.c229
-rw-r--r--usr/src/lib/librename/common/librename.h43
-rw-r--r--usr/src/lib/librename/common/llib-lrename19
-rw-r--r--usr/src/lib/librename/common/mapfile-vers41
-rw-r--r--usr/src/lib/librename/i386/Makefile18
-rw-r--r--usr/src/lib/librename/sparc/Makefile18
-rw-r--r--usr/src/lib/librename/sparcv9/Makefile19
-rw-r--r--usr/src/lib/libresolv2_joy/Makefile76
-rw-r--r--usr/src/lib/libresolv2_joy/Makefile.com162
-rw-r--r--usr/src/lib/libresolv2_joy/THIRDPARTYLICENSE185
-rw-r--r--usr/src/lib/libresolv2_joy/THIRDPARTYLICENSE.descrip1
-rw-r--r--usr/src/lib/libresolv2_joy/amd64/Makefile31
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/daemon.c (renamed from usr/src/cmd/ssh/libopenbsd-compat/common/rresvport.c)95
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/ftruncate.c64
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/gettimeofday.c62
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/mktemp.c (renamed from usr/src/cmd/ssh/libopenbsd-compat/common/inet_aton.c)219
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/putenv.c27
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/readv.c39
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/setenv.c (renamed from usr/src/cmd/ssh/libopenbsd-compat/common/setenv.c)171
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/setitimer.c29
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/strcasecmp.c124
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/strdup.c20
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/strerror.c94
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/strpbrk.c70
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/strsep.c (renamed from usr/src/cmd/ssh/libopenbsd-compat/common/getgrouplist.c)111
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/strtoul.c119
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/utimes.c40
-rw-r--r--usr/src/lib/libresolv2_joy/common/bsd/writev.c89
-rw-r--r--usr/src/lib/libresolv2_joy/common/dst/dst_api.c1048
-rw-r--r--usr/src/lib/libresolv2_joy/common/dst/dst_internal.h155
-rw-r--r--usr/src/lib/libresolv2_joy/common/dst/hmac_link.c491
-rw-r--r--usr/src/lib/libresolv2_joy/common/dst/support.c342
-rw-r--r--usr/src/lib/libresolv2_joy/common/inet/inet_cidr_ntop.c263
-rw-r--r--usr/src/lib/libresolv2_joy/common/inet/inet_cidr_pton.c277
-rw-r--r--usr/src/lib/libresolv2_joy/common/inet/inet_data.c46
-rw-r--r--usr/src/lib/libresolv2_joy/common/inet/inet_lnaof.c (renamed from usr/src/cmd/ssh/libopenbsd-compat/common/inet_ntoa.c)45
-rw-r--r--usr/src/lib/libresolv2_joy/common/inet/inet_makeaddr.c68
-rw-r--r--usr/src/lib/libresolv2_joy/common/inet/inet_net_ntop.c279
-rw-r--r--usr/src/lib/libresolv2_joy/common/inet/inet_net_pton.c407
-rw-r--r--usr/src/lib/libresolv2_joy/common/inet/inet_neta.c89
-rw-r--r--usr/src/lib/libresolv2_joy/common/inet/inet_netof.c64
-rw-r--r--usr/src/lib/libresolv2_joy/common/inet/inet_network.c106
-rw-r--r--usr/src/lib/libresolv2_joy/common/inet/nsap_addr.c111
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/dns.c154
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/dns_ho.c1143
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/dns_nw.c591
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/dns_p.h52
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/dns_pr.c268
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/dns_sv.c300
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/gai_strerror.c105
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/gen.c459
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/gen_ho.c391
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/gen_ng.c174
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/gen_nw.c264
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/gen_p.h113
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/gen_pr.c228
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/gen_sv.c229
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/getaddrinfo.c1253
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/gethostent.c1096
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/gethostent_r.c278
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/getnameinfo.c334
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/getnetent.c345
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/getnetent_r.c234
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/getnetgrent.c160
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/getnetgrent_r.c219
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/getprotoent.c176
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/getprotoent_r.c223
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/getservent.c179
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/getservent_r.c242
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/hesiod.c505
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/hesiod_p.h48
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/irp.c583
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/irp_ho.c405
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/irp_ng.c253
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/irp_nw.c348
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/irp_p.h60
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/irp_pr.c327
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/irp_sv.c346
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/irpmarshall.c2301
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/irs_data.c254
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/irs_data.h63
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/irs_p.h51
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/lcl.c142
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/lcl_ho.c578
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/lcl_ng.c446
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/lcl_nw.c373
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/lcl_p.h51
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/lcl_pr.c294
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/lcl_sv.c432
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/nis.c156
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/nis_p.h47
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/nul_ng.c127
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/pathnames.h52
-rw-r--r--usr/src/lib/libresolv2_joy/common/irs/util.c109
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/assertions.c94
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/base64.c (renamed from usr/src/cmd/ssh/libopenbsd-compat/common/base64.c)98
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/bitncmp.c68
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/ctl_clnt.c620
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/ctl_p.c188
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/ctl_p.h28
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/ctl_srvr.c787
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/ev_connects.c369
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/ev_files.c277
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/ev_streams.c308
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/ev_timers.c499
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/ev_waits.c247
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/eventlib.c933
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/eventlib_p.h281
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/heap.c236
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/hex.c119
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/logging.c716
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/logging_p.h61
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/memcluster.c588
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/movefile.c43
-rw-r--r--usr/src/lib/libresolv2_joy/common/isc/tree.c534
-rw-r--r--usr/src/lib/libresolv2_joy/common/llib-lresolv_joy59
-rw-r--r--usr/src/lib/libresolv2_joy/common/mapfile-vers60
-rw-r--r--usr/src/lib/libresolv2_joy/common/nameser/ns_date.c129
-rw-r--r--usr/src/lib/libresolv2_joy/common/nameser/ns_name.c1153
-rw-r--r--usr/src/lib/libresolv2_joy/common/nameser/ns_netint.c61
-rw-r--r--usr/src/lib/libresolv2_joy/common/nameser/ns_newmsg.c273
-rw-r--r--usr/src/lib/libresolv2_joy/common/nameser/ns_parse.c275
-rw-r--r--usr/src/lib/libresolv2_joy/common/nameser/ns_print.c1242
-rw-r--r--usr/src/lib/libresolv2_joy/common/nameser/ns_rdata.c300
-rw-r--r--usr/src/lib/libresolv2_joy/common/nameser/ns_samedomain.c207
-rw-r--r--usr/src/lib/libresolv2_joy/common/nameser/ns_sign.c387
-rw-r--r--usr/src/lib/libresolv2_joy/common/nameser/ns_ttl.c162
-rw-r--r--usr/src/lib/libresolv2_joy/common/nameser/ns_verify.c484
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/herror.c129
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/mtctxres.c135
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/res_comp.c287
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/res_data.c322
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/res_debug.c1252
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/res_debug.h35
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/res_findzonecut.c722
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/res_init.c958
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/res_mkquery.c386
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/res_mkupdate.c1163
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/res_mkupdate.h25
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/res_private.h22
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/res_query.c440
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/res_send.c1120
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/res_sendsigned.c170
-rw-r--r--usr/src/lib/libresolv2_joy/common/resolv/res_update.c213
-rw-r--r--usr/src/lib/libresolv2_joy/common/sunw/sunw_mtctxres.c126
-rw-r--r--usr/src/lib/libresolv2_joy/common/sunw/sunw_updrec.c249
-rw-r--r--usr/src/lib/libresolv2_joy/common/sunw/sunw_wrappers.c23
-rw-r--r--usr/src/lib/libresolv2_joy/i386/Makefile30
-rw-r--r--usr/src/lib/libresolv2_joy/include/Makefile60
-rw-r--r--usr/src/lib/libresolv2_joy/include/arpa/port_inet.h41
-rw-r--r--usr/src/lib/libresolv2_joy/include/arpa/port_nameser.h40
-rw-r--r--usr/src/lib/libresolv2_joy/include/conf/sunoptions.h30
-rw-r--r--usr/src/lib/libresolv2_joy/include/config.h75
-rw-r--r--usr/src/lib/libresolv2_joy/include/err.h62
-rw-r--r--usr/src/lib/libresolv2_joy/include/fd_setsize.h10
-rw-r--r--usr/src/lib/libresolv2_joy/include/hesiod.h39
-rw-r--r--usr/src/lib/libresolv2_joy/include/irp.h107
-rw-r--r--usr/src/lib/libresolv2_joy/include/irs.h348
-rw-r--r--usr/src/lib/libresolv2_joy/include/isc/assertions.h123
-rw-r--r--usr/src/lib/libresolv2_joy/include/isc/ctl.h112
-rw-r--r--usr/src/lib/libresolv2_joy/include/isc/dst.h168
-rw-r--r--usr/src/lib/libresolv2_joy/include/isc/eventlib.h206
-rw-r--r--usr/src/lib/libresolv2_joy/include/isc/heap.h49
-rw-r--r--usr/src/lib/libresolv2_joy/include/isc/irpmarshall.h112
-rw-r--r--usr/src/lib/libresolv2_joy/include/isc/list.h118
-rw-r--r--usr/src/lib/libresolv2_joy/include/isc/logging.h113
-rw-r--r--usr/src/lib/libresolv2_joy/include/isc/memcluster.h50
-rw-r--r--usr/src/lib/libresolv2_joy/include/isc/misc.h44
-rw-r--r--usr/src/lib/libresolv2_joy/include/isc/platform.h42
-rw-r--r--usr/src/lib/libresolv2_joy/include/isc/tree.h59
-rwxr-xr-xusr/src/lib/libresolv2_joy/include/make_os_version34
-rw-r--r--usr/src/lib/libresolv2_joy/include/make_os_version.sh34
-rw-r--r--usr/src/lib/libresolv2_joy/include/port_after.h539
-rw-r--r--usr/src/lib/libresolv2_joy/include/port_before.h201
-rw-r--r--usr/src/lib/libresolv2_joy/include/port_netdb.h188
-rw-r--r--usr/src/lib/libresolv2_joy/include/port_resolv.h42
-rwxr-xr-xusr/src/lib/libresolv2_joy/include/probe_ipv673
-rw-r--r--usr/src/lib/libresolv2_joy/include/probe_ipv6.sh73
-rw-r--r--usr/src/lib/libresolv2_joy/include/res_update.h88
-rw-r--r--usr/src/lib/libresolv2_joy/include/resolv_mt.h47
-rw-r--r--usr/src/lib/libresolv2_joy/include/sunw_port_after.h123
-rw-r--r--usr/src/lib/libresolv2_joy/include/sunw_port_before.h43
-rw-r--r--usr/src/lib/libresolv2_joy/include/sys/bitypes.h37
-rw-r--r--usr/src/lib/libresolv2_joy/include/sys/cdefs.h144
-rw-r--r--usr/src/lib/libresolv2_joy/sparc/Makefile30
-rw-r--r--usr/src/lib/libresolv2_joy/sparcv9/Makefile38
-rw-r--r--usr/src/lib/libscf/inc/libscf.h6
-rw-r--r--usr/src/lib/libshare/nfs/libshare_nfs.c14
-rw-r--r--usr/src/lib/libshell/Makefile1
-rw-r--r--usr/src/lib/libshell/Makefile.doc77
-rw-r--r--usr/src/lib/libshell/common/COMPATIBILITY134
-rw-r--r--usr/src/lib/libshell/common/DESIGN170
-rw-r--r--usr/src/lib/libshell/common/OBSOLETE152
-rw-r--r--usr/src/lib/libshell/common/README232
-rw-r--r--usr/src/lib/libshell/common/RELEASE2204
-rw-r--r--usr/src/lib/libshell/common/TYPES182
-rw-r--r--usr/src/lib/libshell/misc/images/callouts/1.pngbin236 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/callouts/10.pngbin284 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/callouts/2.pngbin261 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/callouts/3.pngbin265 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/callouts/4.pngbin260 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/callouts/5.pngbin261 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/callouts/6.pngbin278 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/callouts/7.pngbin253 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/callouts/8.pngbin275 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/callouts/9.pngbin284 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/tag_bourne.pngbin130 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/tag_i18n.pngbin124 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/tag_ksh.pngbin122 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/tag_ksh88.pngbin124 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/tag_ksh93.pngbin140 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/tag_l10n.pngbin118 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/images/tag_perf.pngbin127 -> 0 bytes
-rw-r--r--usr/src/lib/libshell/misc/shell_styleguide.docbook1464
-rw-r--r--usr/src/lib/libsmartsshd/Makefile48
-rw-r--r--usr/src/lib/libsmartsshd/Makefile.com47
-rw-r--r--usr/src/lib/libsmartsshd/amd64/Makefile (renamed from usr/src/cmd/ssh/libssh/i386/Makefile)6
-rw-r--r--usr/src/lib/libsmartsshd/common/llib-lsmartsshd32
-rw-r--r--usr/src/lib/libsmartsshd/common/mapfile-vers (renamed from usr/src/cmd/ssh/sshd/mapfile-intf)16
-rw-r--r--usr/src/lib/libsmartsshd/common/sshd-plugin.c123
-rw-r--r--usr/src/lib/libsmartsshd/i386/Makefile (renamed from usr/src/cmd/ssh/libssh/sparc/Makefile)5
-rw-r--r--usr/src/lib/libumem/amd64/umem_genasm.c2
-rw-r--r--usr/src/lib/libuutil/common/libuutil.h3
-rw-r--r--usr/src/lib/libuutil/common/libuutil_impl.h10
-rw-r--r--usr/src/lib/libuutil/common/mapfile-vers2
-rw-r--r--usr/src/lib/libvnd/Makefile50
-rw-r--r--usr/src/lib/libvnd/Makefile.com39
-rw-r--r--usr/src/lib/libvnd/amd64/Makefile19
-rw-r--r--usr/src/lib/libvnd/common/libvnd.c550
-rw-r--r--usr/src/lib/libvnd/common/libvnd.h84
-rw-r--r--usr/src/lib/libvnd/common/llib-lvnd19
-rw-r--r--usr/src/lib/libvnd/common/mapfile-vers55
-rw-r--r--usr/src/lib/libvnd/i386/Makefile18
-rw-r--r--usr/src/lib/libwanboot/Makefile.com2
-rw-r--r--usr/src/lib/libzdoor/Makefile48
-rw-r--r--usr/src/lib/libzdoor/Makefile.com50
-rw-r--r--usr/src/lib/libzdoor/amd64/Makefile (renamed from usr/src/cmd/ssh/libopenbsd-compat/i386/Makefile)6
-rw-r--r--usr/src/lib/libzdoor/common/llib-lzdoor34
-rw-r--r--usr/src/lib/libzdoor/common/mapfile-vers48
-rw-r--r--usr/src/lib/libzdoor/common/zdoor-int.c324
-rw-r--r--usr/src/lib/libzdoor/common/zdoor-int.h64
-rw-r--r--usr/src/lib/libzdoor/common/zdoor.c437
-rw-r--r--usr/src/lib/libzdoor/common/zerror.c117
-rw-r--r--usr/src/lib/libzdoor/common/zerror.h48
-rw-r--r--usr/src/lib/libzdoor/common/ztree.c344
-rw-r--r--usr/src/lib/libzdoor/common/ztree.h88
-rw-r--r--usr/src/lib/libzdoor/i386/Makefile (renamed from usr/src/cmd/ssh/libopenbsd-compat/sparc/Makefile)5
-rw-r--r--usr/src/lib/libzfs/common/libzfs.h2
-rw-r--r--usr/src/lib/libzfs/common/libzfs_dataset.c20
-rw-r--r--usr/src/lib/libzfs/common/libzfs_impl.h3
-rw-r--r--usr/src/lib/libzfs/common/libzfs_iter.c15
-rw-r--r--usr/src/lib/libzfs/common/libzfs_pool.c1
-rw-r--r--usr/src/lib/libzfs/common/libzfs_util.c41
-rw-r--r--usr/src/lib/libzfs/common/mapfile-vers3
-rw-r--r--usr/src/lib/libzonecfg/Makefile.com17
-rw-r--r--usr/src/lib/libzonecfg/common/getzoneent.c145
-rw-r--r--usr/src/lib/libzonecfg/common/libzonecfg.c851
-rw-r--r--usr/src/lib/libzonecfg/common/mapfile-vers14
-rw-r--r--usr/src/lib/libzonecfg/dtd/zonecfg.dtd.115
-rw-r--r--usr/src/lib/libzpool/common/sys/zfs_context.h3
-rw-r--r--usr/src/lib/mergeq/mergeq.c606
-rw-r--r--usr/src/lib/mergeq/mergeq.h52
-rw-r--r--usr/src/lib/mergeq/workq.c311
-rw-r--r--usr/src/lib/mergeq/workq.h52
-rw-r--r--usr/src/lib/nsswitch/dns/Makefile.com2
-rw-r--r--usr/src/lib/nsswitch/dns/common/dns_common.h4
-rw-r--r--usr/src/lib/nsswitch/dns/common/dns_mt.c155
-rw-r--r--usr/src/lib/nsswitch/dns/common/gethostent.c8
-rw-r--r--usr/src/lib/nsswitch/dns/common/gethostent6.c4
-rw-r--r--usr/src/lib/pkcs11/pkcs11_tpm/Makefile.com2
-rw-r--r--usr/src/lib/pysolaris/Makefile.com1
-rw-r--r--usr/src/lib/varpd/Makefile33
-rw-r--r--usr/src/lib/varpd/Makefile.plugin19
-rw-r--r--usr/src/lib/varpd/direct/Makefile40
-rw-r--r--usr/src/lib/varpd/direct/Makefile.com38
-rw-r--r--usr/src/lib/varpd/direct/amd64/Makefile19
-rw-r--r--usr/src/lib/varpd/direct/common/libvarpd_direct.c411
-rw-r--r--usr/src/lib/varpd/direct/common/llib-lvarpd_direct18
-rw-r--r--usr/src/lib/varpd/direct/common/mapfile-vers35
-rw-r--r--usr/src/lib/varpd/direct/i386/Makefile18
-rw-r--r--usr/src/lib/varpd/direct/sparc/Makefile18
-rw-r--r--usr/src/lib/varpd/direct/sparcv9/Makefile19
-rw-r--r--usr/src/lib/varpd/files/Makefile40
-rw-r--r--usr/src/lib/varpd/files/Makefile.com42
-rw-r--r--usr/src/lib/varpd/files/amd64/Makefile19
-rw-r--r--usr/src/lib/varpd/files/common/libvarpd_files.c605
-rw-r--r--usr/src/lib/varpd/files/common/libvarpd_files_json.c936
-rw-r--r--usr/src/lib/varpd/files/common/libvarpd_files_json.h52
-rw-r--r--usr/src/lib/varpd/files/common/llib-lvarpd_files18
-rw-r--r--usr/src/lib/varpd/files/common/mapfile-vers35
-rw-r--r--usr/src/lib/varpd/files/i386/Makefile18
-rw-r--r--usr/src/lib/varpd/files/sparc/Makefile18
-rw-r--r--usr/src/lib/varpd/files/sparcv9/Makefile19
-rw-r--r--usr/src/lib/varpd/libvarpd/Makefile55
-rw-r--r--usr/src/lib/varpd/libvarpd/Makefile.com53
-rw-r--r--usr/src/lib/varpd/libvarpd/amd64/Makefile19
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd.c360
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd.h77
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_arp.c650
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_client.c626
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_client.h92
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_door.c469
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_impl.h247
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_overlay.c584
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_panic.c53
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_persist.c586
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_plugin.c269
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_prop.c299
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_provider.h419
-rw-r--r--usr/src/lib/varpd/libvarpd/common/libvarpd_util.c97
-rw-r--r--usr/src/lib/varpd/libvarpd/common/llib-lvarpd19
-rw-r--r--usr/src/lib/varpd/libvarpd/common/mapfile-plugin57
-rw-r--r--usr/src/lib/varpd/libvarpd/common/mapfile-vers113
-rw-r--r--usr/src/lib/varpd/libvarpd/i386/Makefile18
-rw-r--r--usr/src/lib/varpd/libvarpd/sparc/Makefile18
-rw-r--r--usr/src/lib/varpd/libvarpd/sparcv9/Makefile19
-rw-r--r--usr/src/lib/varpd/svp/Makefile40
-rw-r--r--usr/src/lib/varpd/svp/Makefile.com54
-rw-r--r--usr/src/lib/varpd/svp/amd64/Makefile19
-rw-r--r--usr/src/lib/varpd/svp/common/libvarpd_svp.c1138
-rw-r--r--usr/src/lib/varpd/svp/common/libvarpd_svp.h357
-rw-r--r--usr/src/lib/varpd/svp/common/libvarpd_svp_conn.c1031
-rw-r--r--usr/src/lib/varpd/svp/common/libvarpd_svp_crc.c53
-rw-r--r--usr/src/lib/varpd/svp/common/libvarpd_svp_host.c171
-rw-r--r--usr/src/lib/varpd/svp/common/libvarpd_svp_loop.c210
-rw-r--r--usr/src/lib/varpd/svp/common/libvarpd_svp_prot.h236
-rw-r--r--usr/src/lib/varpd/svp/common/libvarpd_svp_remote.c821
-rw-r--r--usr/src/lib/varpd/svp/common/libvarpd_svp_shootdown.c474
-rw-r--r--usr/src/lib/varpd/svp/common/libvarpd_svp_timer.c150
-rw-r--r--usr/src/lib/varpd/svp/common/llib-lvarpd_svp18
-rw-r--r--usr/src/lib/varpd/svp/common/mapfile-vers35
-rw-r--r--usr/src/lib/varpd/svp/i386/Makefile18
-rw-r--r--usr/src/lib/varpd/svp/sparc/Makefile18
-rw-r--r--usr/src/lib/varpd/svp/sparcv9/Makefile19
-rw-r--r--usr/src/lib/xml/os_dtd.c238
-rw-r--r--usr/src/lib/xml/os_dtd.h49
-rw-r--r--usr/src/man/Makefile1
-rw-r--r--usr/src/man/man1/Makefile41
-rw-r--r--usr/src/man/man1/column.1129
-rw-r--r--usr/src/man/man1/crontab.148
-rw-r--r--usr/src/man/man1/ctfdiff.1337
-rw-r--r--usr/src/man/man1/ctfdump.1393
-rw-r--r--usr/src/man/man1/ld.12
-rw-r--r--usr/src/man/man1/ld.so.1.121
-rw-r--r--usr/src/man/man1/machid.125
-rw-r--r--usr/src/man/man1/nawk.19
-rw-r--r--usr/src/man/man1/pargs.126
-rw-r--r--usr/src/man/man1/proc.112
-rw-r--r--usr/src/man/man1/ps.16
-rw-r--r--usr/src/man/man1/sed.122
-rw-r--r--usr/src/man/man1/zlogin.181
-rw-r--r--usr/src/man/man1m/Makefile13
-rw-r--r--usr/src/man/man1m/dladm.1m466
-rw-r--r--usr/src/man/man1m/flowadm.1m49
-rw-r--r--usr/src/man/man1m/mount_tmpfs.1m40
-rw-r--r--usr/src/man/man1m/prstat.1m25
-rw-r--r--usr/src/man/man1m/reboot.1m3
-rw-r--r--usr/src/man/man1m/snoop.1m21
-rw-r--r--usr/src/man/man1m/svc.startd.1m9
-rw-r--r--usr/src/man/man1m/tunefs.1m8
-rw-r--r--usr/src/man/man1m/vfsstat.1m213
-rw-r--r--usr/src/man/man1m/vndadm.1m651
-rw-r--r--usr/src/man/man1m/vndstat.1m163
-rw-r--r--usr/src/man/man1m/zfs.1m2
-rw-r--r--usr/src/man/man1m/zoneadm.1m28
-rw-r--r--usr/src/man/man1m/zonecfg.1m109
-rw-r--r--usr/src/man/man1m/zpool.1m1
-rw-r--r--usr/src/man/man3c/Makefile3
-rw-r--r--usr/src/man/man3c/inotify_add_watch.3c120
-rw-r--r--usr/src/man/man3c/inotify_init.3c107
-rw-r--r--usr/src/man/man3c/inotify_rm_watch.3c81
-rw-r--r--usr/src/man/man3c/psignal.3c9
-rw-r--r--usr/src/man/man3c/putenv.3c14
-rw-r--r--usr/src/man/man3c/signalfd.3c22
-rw-r--r--usr/src/man/man3c/timer_create.3c16
-rw-r--r--usr/src/man/man3c/timer_settime.3c12
-rw-r--r--usr/src/man/man3dlpi/Makefile2
-rw-r--r--usr/src/man/man3dlpi/dlpi_open.3dlpi31
-rw-r--r--usr/src/man/man3lib/Makefile1
-rw-r--r--usr/src/man/man3lib/libvnd.3lib690
-rw-r--r--usr/src/man/man3vnd/Makefile70
-rw-r--r--usr/src/man/man3vnd/vnd_create.3vnd280
-rw-r--r--usr/src/man/man3vnd/vnd_errno.3vnd170
-rw-r--r--usr/src/man/man3vnd/vnd_frameio_read.3vnd705
-rw-r--r--usr/src/man/man3vnd/vnd_pollfd.3vnd155
-rw-r--r--usr/src/man/man3vnd/vnd_prop_get.3vnd242
-rw-r--r--usr/src/man/man3vnd/vnd_prop_iter.3vnd148
-rw-r--r--usr/src/man/man3vnd/vnd_prop_writeable.3vnd101
-rw-r--r--usr/src/man/man3vnd/vnd_walk.3vnd155
-rw-r--r--usr/src/man/man3xnet/Makefile4
-rw-r--r--usr/src/man/man3xnet/htonl.3xnet29
-rw-r--r--usr/src/man/man4/Makefile8
-rw-r--r--usr/src/man/man4/overlay_files.4169
-rw-r--r--usr/src/man/man4/proc.4109
-rw-r--r--usr/src/man/man4/process.416
-rw-r--r--usr/src/man/man5/Makefile5
-rw-r--r--usr/src/man/man5/brands.53
-rw-r--r--usr/src/man/man5/inotify.5305
-rw-r--r--usr/src/man/man5/lx.5108
-rw-r--r--usr/src/man/man5/overlay.5499
-rw-r--r--usr/src/man/man5/privileges.526
-rw-r--r--usr/src/man/man5/resource_controls.5114
-rw-r--r--usr/src/man/man7d/Makefile6
-rw-r--r--usr/src/man/man7d/cpuid.7d18
-rw-r--r--usr/src/man/man7d/i40e.7d282
-rw-r--r--usr/src/man/man7d/vnd.7d118
-rw-r--r--usr/src/man/man7d/zfd.7d81
-rw-r--r--usr/src/man/man7fs/Makefile2
-rw-r--r--usr/src/man/man7fs/hyprlofs.7fs62
-rw-r--r--usr/src/man/man7fs/lxproc.7fs115
-rw-r--r--usr/src/man/man7m/Makefile2
-rw-r--r--usr/src/man/man7m/datafilt.7m46
-rw-r--r--usr/src/man/man7p/Makefile6
-rw-r--r--usr/src/man/man7p/vxlan.7p124
-rw-r--r--usr/src/man/man9e/chpoll.9e32
-rw-r--r--usr/src/man/man9f/Makefile1
-rw-r--r--usr/src/man/man9f/kmem_alloc.9f7
-rw-r--r--usr/src/man/man9f/pollhead_clean.9f64
-rw-r--r--usr/src/pkg/manifests/SUNWcs.mf27
-rw-r--r--usr/src/pkg/manifests/SUNWlx.mf11
-rw-r--r--usr/src/pkg/manifests/developer-debug-mdb.mf4
-rw-r--r--usr/src/pkg/manifests/driver-crypto-nfp.mf33
-rw-r--r--usr/src/pkg/manifests/driver-network-i40e.mf43
-rw-r--r--usr/src/pkg/manifests/system-dtrace-tests.mf2
-rw-r--r--usr/src/pkg/manifests/system-zones-brand-joyent.mf54
-rw-r--r--usr/src/pkg/manifests/system-zones-brand-lx.mf101
-rw-r--r--usr/src/stand/lib/fs/hsfs/hsfsops.c1
-rw-r--r--usr/src/test/libc-tests/runfiles/default.run2
-rw-r--r--usr/src/test/libc-tests/tests/Makefile1
-rw-r--r--usr/src/test/libc-tests/tests/env-OS-4089.c73
-rw-r--r--usr/src/test/libc-tests/tests/random/Makefile10
-rw-r--r--usr/src/test/test-runner/cmd/Makefile4
-rw-r--r--usr/src/test/test-runner/cmd/run.py2
-rw-r--r--usr/src/test/util-tests/runfiles/default.run5
-rw-r--r--usr/src/test/util-tests/tests/Makefile3
-rw-r--r--usr/src/test/util-tests/tests/bunyan/Makefile68
-rw-r--r--usr/src/test/util-tests/tests/bunyan/btest.c312
-rw-r--r--usr/src/test/util-tests/tests/bunyan/bunyan.ksh26
-rw-r--r--usr/src/test/util-tests/tests/dladm/Makefile2
-rw-r--r--usr/src/test/util-tests/tests/dladm/vnic-mtu.ksh116
-rw-r--r--usr/src/test/util-tests/tests/libnvpair_json/Makefile21
-rw-r--r--usr/src/test/util-tests/tests/libnvpair_json/json_08_large_data.ksh37
-rw-r--r--usr/src/test/util-tests/tests/mergeq/Makefile64
-rw-r--r--usr/src/test/util-tests/tests/mergeq/mqt.c217
-rw-r--r--usr/src/test/util-tests/tests/workq/Makefile64
-rw-r--r--usr/src/test/util-tests/tests/workq/wqt.c196
-rw-r--r--usr/src/tools/Makefile1
-rw-r--r--usr/src/tools/README.tools3
-rw-r--r--usr/src/tools/ctf/Makefile7
-rw-r--r--usr/src/tools/ctf/Makefile.ctf1
-rw-r--r--usr/src/tools/ctf/common/ctf_headers.h5
-rw-r--r--usr/src/tools/ctf/common/utils.h3
-rw-r--r--usr/src/tools/ctf/ctfconvert/Makefile (renamed from usr/src/tools/ctf/dump/Makefile)1
-rw-r--r--usr/src/tools/ctf/ctfconvert/Makefile.com44
-rw-r--r--usr/src/tools/ctf/ctfconvert/i386/Makefile (renamed from usr/src/tools/ctf/dump/i386/Makefile)1
-rw-r--r--usr/src/tools/ctf/ctfconvert/sparc/Makefile (renamed from usr/src/tools/ctf/dump/sparc/Makefile)1
-rw-r--r--usr/src/tools/ctf/ctfdiff/Makefile44
-rw-r--r--usr/src/tools/ctf/ctfdiff/Makefile.com44
-rw-r--r--usr/src/tools/ctf/ctfdiff/i386/Makefile16
-rw-r--r--usr/src/tools/ctf/ctfdiff/sparc/Makefile16
-rw-r--r--usr/src/tools/ctf/ctfdump/Makefile33
-rw-r--r--usr/src/tools/ctf/ctfdump/Makefile.com44
-rw-r--r--usr/src/tools/ctf/ctfdump/i386/Makefile16
-rw-r--r--usr/src/tools/ctf/ctfdump/sparc/Makefile16
-rw-r--r--usr/src/tools/ctf/ctfmerge/Makefile44
-rw-r--r--usr/src/tools/ctf/ctfmerge/Makefile.com46
-rw-r--r--usr/src/tools/ctf/ctfmerge/i386/Makefile27
-rw-r--r--usr/src/tools/ctf/ctfmerge/sparc/Makefile27
-rw-r--r--usr/src/tools/ctf/cvt/Makefile.com3
-rw-r--r--usr/src/tools/ctf/cvt/altexec.c45
-rw-r--r--usr/src/tools/ctf/cvt/ctfconvert.c12
-rw-r--r--usr/src/tools/ctf/cvt/ctfmerge.c2
-rw-r--r--usr/src/tools/ctf/cvt/ctftools.h14
-rw-r--r--usr/src/tools/ctf/dump/dump.c1028
-rw-r--r--usr/src/tools/ctf/dwarf/Makefile.com7
-rw-r--r--usr/src/tools/ctf/libctf/Makefile46
-rw-r--r--usr/src/tools/ctf/libctf/Makefile.com55
-rw-r--r--usr/src/tools/ctf/libctf/i386/Makefile18
-rw-r--r--usr/src/tools/ctf/libctf/sparc/Makefile18
-rw-r--r--usr/src/tools/env/illumos.sh6
-rw-r--r--usr/src/tools/findunref/exception_list.git39
-rw-r--r--usr/src/tools/findunref/exception_list.unknown0
-rw-r--r--usr/src/tools/man/Makefile37
-rw-r--r--usr/src/tools/onbld/Checks/Cddl.py2
-rw-r--r--usr/src/tools/onbld/Checks/CmtBlk.py2
-rw-r--r--usr/src/tools/onbld/Checks/Comments.py4
-rw-r--r--usr/src/tools/onbld/Checks/Copyright.py2
-rw-r--r--usr/src/tools/onbld/Checks/DbLookups.py31
-rw-r--r--usr/src/tools/onbld/Checks/HdrChk.py2
-rw-r--r--usr/src/tools/onbld/Checks/Keywords.py4
-rw-r--r--usr/src/tools/onbld/Checks/Mapfile.py2
-rw-r--r--usr/src/tools/onbld/Checks/__init__.py2
-rw-r--r--usr/src/tools/onbld/Scm/__init__.py4
-rw-r--r--usr/src/tools/onbld/hgext/__init__.py4
-rw-r--r--usr/src/tools/scripts/bldenv.sh3
-rw-r--r--usr/src/tools/scripts/cddlchk.py2
-rw-r--r--usr/src/tools/scripts/copyrightchk.py2
-rw-r--r--usr/src/tools/scripts/git-pbchk.py2
-rw-r--r--usr/src/tools/scripts/hdrchk.py2
-rw-r--r--usr/src/tools/scripts/hg-active.py2
-rw-r--r--usr/src/tools/scripts/mapfilechk.py2
-rw-r--r--usr/src/tools/scripts/nightly.sh8
-rw-r--r--usr/src/tools/scripts/validate_pkg.py2
-rw-r--r--usr/src/tools/scripts/webrev.sh258
-rw-r--r--usr/src/tools/scripts/wsdiff.py2
-rw-r--r--usr/src/ucbhead/Makefile (renamed from usr/src/cmd/ssh/ssh-http-proxy-connect/Makefile)57
-rw-r--r--usr/src/ucbhead/sys/file.h4
-rw-r--r--usr/src/ucblib/libucb/port/sys/flock.c2
-rw-r--r--usr/src/uts/Makefile.mapfile43
-rw-r--r--usr/src/uts/Makefile.targ6
-rw-r--r--usr/src/uts/Makefile.uts16
-rw-r--r--usr/src/uts/common/Makefile.files72
-rw-r--r--usr/src/uts/common/Makefile.rules123
-rw-r--r--usr/src/uts/common/brand/lx/autofs/lx_autofs.c3152
-rw-r--r--usr/src/uts/common/brand/lx/autofs/lxautofs.conf14
-rw-r--r--usr/src/uts/common/brand/lx/cgroups/cgrps.h223
-rw-r--r--usr/src/uts/common/brand/lx/cgroups/cgrps_node.c1019
-rw-r--r--usr/src/uts/common/brand/lx/cgroups/cgrps_vfsops.c1052
-rw-r--r--usr/src/uts/common/brand/lx/cgroups/cgrps_vnops.c1608
-rw-r--r--usr/src/uts/common/brand/lx/devfs/lxd.h232
-rw-r--r--usr/src/uts/common/brand/lx/devfs/lxd_node.c1004
-rw-r--r--usr/src/uts/common/brand/lx/devfs/lxd_vfsops.c830
-rw-r--r--usr/src/uts/common/brand/lx/devfs/lxd_vnops.c1506
-rw-r--r--usr/src/uts/common/brand/lx/dtrace/lx_systrace.c497
-rw-r--r--usr/src/uts/common/brand/lx/dtrace/lx_systrace.conf27
-rw-r--r--usr/src/uts/common/brand/lx/io/lx_netlink.c1684
-rw-r--r--usr/src/uts/common/brand/lx/io/lx_ptm.c1188
-rw-r--r--usr/src/uts/common/brand/lx/io/lx_ptm.conf27
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_brand.c2586
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_misc.c1103
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_pid.c395
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_ptrace.c2564
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_signal.c50
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_syscall.c1316
-rw-r--r--usr/src/uts/common/brand/lx/procfs/lx_proc.h350
-rw-r--r--usr/src/uts/common/brand/lx/procfs/lx_prsubr.c851
-rw-r--r--usr/src/uts/common/brand/lx/procfs/lx_prvfsops.c377
-rw-r--r--usr/src/uts/common/brand/lx/procfs/lx_prvnops.c7085
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_autofs.h511
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_autofs_impl.h162
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_brand.h680
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_fcntl.h161
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_futex.h121
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_impl.h52
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_ldt.h91
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_misc.h117
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_ptm.h (renamed from usr/src/cmd/ssh/include/engine.h)21
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_sched.h60
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_siginfo.h190
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_signal.h32
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_socket.h434
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_syscalls.h232
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_types.h144
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_access.c224
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_aio.c45
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_brk.c57
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_chmod.c107
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_chown.c180
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_clone.c143
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_close.c57
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_cpu.c35
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_epoll.c272
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_fallocate.c251
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_fcntl.c644
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_futex.c1104
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_getcwd.c50
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_getdents.c350
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_getpid.c79
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_getrandom.c33
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_id.c296
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_ioctl.c1741
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_ioprio.c66
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_kill.c402
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_link.c97
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_mkdir.c38
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_modify_ldt.c121
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_open.c260
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_personality.c112
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_pipe.c200
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_poll.c762
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_prctl.c210
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_rlimit.c575
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_rw.c949
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_sched.c524
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_socket.c3750
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_stat.c439
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_sync.c86
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_sysinfo.c218
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_thread_area.c196
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_timer.c379
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_uname.c82
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_wait.c377
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_xattr.c371
-rw-r--r--usr/src/uts/common/brand/lx/sysfs/lx_sysfs.h196
-rw-r--r--usr/src/uts/common/brand/lx/sysfs/lx_syssubr.c457
-rw-r--r--usr/src/uts/common/brand/lx/sysfs/lx_sysvfsops.c348
-rw-r--r--usr/src/uts/common/brand/lx/sysfs/lx_sysvnops.c1796
-rw-r--r--usr/src/uts/common/brand/sn1/sn1_brand.c99
-rw-r--r--usr/src/uts/common/brand/sn1/sn1_brand.h7
-rw-r--r--usr/src/uts/common/brand/solaris10/s10_brand.c99
-rw-r--r--usr/src/uts/common/brand/solaris10/s10_brand.h6
-rw-r--r--usr/src/uts/common/conf/param.c12
-rw-r--r--usr/src/uts/common/contract/process.c13
-rw-r--r--usr/src/uts/common/crypto/api/kcf_random.c4
-rw-r--r--usr/src/uts/common/crypto/core/kcf_sched.c6
-rw-r--r--usr/src/uts/common/ctf/ctf_mod.c11
-rw-r--r--usr/src/uts/common/disp/cmt.c8
-rw-r--r--usr/src/uts/common/disp/cpucaps.c285
-rw-r--r--usr/src/uts/common/disp/disp.c26
-rw-r--r--usr/src/uts/common/disp/fx.c12
-rw-r--r--usr/src/uts/common/disp/rt.c9
-rw-r--r--usr/src/uts/common/disp/rt_dptbl.c4
-rw-r--r--usr/src/uts/common/disp/thread.c136
-rw-r--r--usr/src/uts/common/disp/thread_intr.c37
-rw-r--r--usr/src/uts/common/dtrace/dtrace.c23
-rw-r--r--usr/src/uts/common/dtrace/sdt_subr.c33
-rw-r--r--usr/src/uts/common/exec/aout/aout.c5
-rw-r--r--usr/src/uts/common/exec/elf/elf.c429
-rw-r--r--usr/src/uts/common/exec/intp/intp.c35
-rw-r--r--usr/src/uts/common/exec/java/java.c3
-rw-r--r--usr/src/uts/common/exec/shbin/shbin.c5
-rw-r--r--usr/src/uts/common/fs/dev/sdev_netops.c259
-rw-r--r--usr/src/uts/common/fs/dev/sdev_plugin.c913
-rw-r--r--usr/src/uts/common/fs/dev/sdev_subr.c209
-rw-r--r--usr/src/uts/common/fs/dev/sdev_vfsops.c23
-rw-r--r--usr/src/uts/common/fs/dev/sdev_vnops.c10
-rw-r--r--usr/src/uts/common/fs/dev/sdev_zvolops.c6
-rw-r--r--usr/src/uts/common/fs/dnlc.c44
-rw-r--r--usr/src/uts/common/fs/fem.c688
-rw-r--r--usr/src/uts/common/fs/fifofs/fifosubr.c7
-rw-r--r--usr/src/uts/common/fs/hyprlofs/hyprlofs_dir.c640
-rw-r--r--usr/src/uts/common/fs/hyprlofs/hyprlofs_subr.c127
-rw-r--r--usr/src/uts/common/fs/hyprlofs/hyprlofs_vfsops.c614
-rw-r--r--usr/src/uts/common/fs/hyprlofs/hyprlofs_vnops.c1441
-rw-r--r--usr/src/uts/common/fs/lookup.c316
-rw-r--r--usr/src/uts/common/fs/lxproc/lxpr_subr.c524
-rw-r--r--usr/src/uts/common/fs/lxproc/lxpr_vfsops.c367
-rw-r--r--usr/src/uts/common/fs/lxproc/lxpr_vnops.c3099
-rw-r--r--usr/src/uts/common/fs/lxproc/lxproc.h278
-rw-r--r--usr/src/uts/common/fs/nfs/nfs3_vfsops.c1
-rw-r--r--usr/src/uts/common/fs/nfs/nfs3_vnops.c14
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_srv_ns.c5
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_vfsops.c1
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_vnops.c48
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_auth.c26
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_server.c6
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_vfsops.c1
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_vnops.c24
-rw-r--r--usr/src/uts/common/fs/pcfs/pc_dir.c5
-rw-r--r--usr/src/uts/common/fs/pcfs/pc_vnops.c5
-rw-r--r--usr/src/uts/common/fs/portfs/port.c36
-rw-r--r--usr/src/uts/common/fs/proc/prargv.c441
-rw-r--r--usr/src/uts/common/fs/proc/prcontrol.c16
-rw-r--r--usr/src/uts/common/fs/proc/prdata.h5
-rw-r--r--usr/src/uts/common/fs/proc/prsubr.c9
-rw-r--r--usr/src/uts/common/fs/proc/prvnops.c162
-rw-r--r--usr/src/uts/common/fs/sockfs/sockcommon.c8
-rw-r--r--usr/src/uts/common/fs/sockfs/sockcommon_sops.c50
-rw-r--r--usr/src/uts/common/fs/sockfs/sockcommon_subr.c71
-rw-r--r--usr/src/uts/common/fs/sockfs/sockfilter.c24
-rw-r--r--usr/src/uts/common/fs/sockfs/sockfilter_impl.h2
-rw-r--r--usr/src/uts/common/fs/sockfs/socksubr.c9
-rw-r--r--usr/src/uts/common/fs/sockfs/socksyscalls.c120
-rw-r--r--usr/src/uts/common/fs/sockfs/socktpi_impl.h3
-rw-r--r--usr/src/uts/common/fs/swapfs/swap_subr.c6
-rw-r--r--usr/src/uts/common/fs/tmpfs/tmp_dir.c12
-rw-r--r--usr/src/uts/common/fs/tmpfs/tmp_subr.c195
-rw-r--r--usr/src/uts/common/fs/tmpfs/tmp_vfsops.c269
-rw-r--r--usr/src/uts/common/fs/tmpfs/tmp_vnops.c79
-rw-r--r--usr/src/uts/common/fs/udfs/udf_dir.c6
-rw-r--r--usr/src/uts/common/fs/udfs/udf_vnops.c14
-rw-r--r--usr/src/uts/common/fs/ufs/ufs_vnops.c27
-rw-r--r--usr/src/uts/common/fs/vfs.c7
-rw-r--r--usr/src/uts/common/fs/vnode.c166
-rw-r--r--usr/src/uts/common/fs/zfs/arc.c9
-rw-r--r--usr/src/uts/common/fs/zfs/dbuf.c15
-rw-r--r--usr/src/uts/common/fs/zfs/dmu.c1
-rw-r--r--usr/src/uts/common/fs/zfs/dmu_send.c8
-rw-r--r--usr/src/uts/common/fs/zfs/dmu_tx.c4
-rw-r--r--usr/src/uts/common/fs/zfs/dsl_dir.c3
-rw-r--r--usr/src/uts/common/fs/zfs/dsl_pool.c1
-rw-r--r--usr/src/uts/common/fs/zfs/metaslab.c8
-rw-r--r--usr/src/uts/common/fs/zfs/sa.c12
-rw-r--r--usr/src/uts/common/fs/zfs/sys/vdev_impl.h1
-rw-r--r--usr/src/uts/common/fs/zfs/sys/zfs_zone.h63
-rw-r--r--usr/src/uts/common/fs/zfs/sys/zio.h2
-rw-r--r--usr/src/uts/common/fs/zfs/txg.c3
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_disk.c13
-rw-r--r--usr/src/uts/common/fs/zfs/vdev_queue.c12
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_dir.c7
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_ioctl.c62
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_vfsops.c12
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_vnops.c60
-rw-r--r--usr/src/uts/common/fs/zfs/zfs_zone.c1336
-rw-r--r--usr/src/uts/common/fs/zfs/zil.c14
-rw-r--r--usr/src/uts/common/fs/zfs/zio.c5
-rw-r--r--usr/src/uts/common/fs/zfs/zvol.c103
-rw-r--r--usr/src/uts/common/inet/inet_hash.h37
-rw-r--r--usr/src/uts/common/inet/ip/conn_opt.c22
-rw-r--r--usr/src/uts/common/inet/ip/ip.c12
-rw-r--r--usr/src/uts/common/inet/ip/ip_attr.c5
-rw-r--r--usr/src/uts/common/inet/ip/ip_squeue.c2
-rw-r--r--usr/src/uts/common/inet/ip/ipclassifier.c163
-rw-r--r--usr/src/uts/common/inet/ip/ipsecesp.c3
-rw-r--r--usr/src/uts/common/inet/ipclassifier.h4
-rw-r--r--usr/src/uts/common/inet/ipf/ip_fil_solaris.c122
-rw-r--r--usr/src/uts/common/inet/ipf/ipf.conf5
-rw-r--r--usr/src/uts/common/inet/ipf/netinet/ipf_stack.h10
-rw-r--r--usr/src/uts/common/inet/ipf/solaris.c1
-rw-r--r--usr/src/uts/common/inet/sockmods/datafilt.c116
-rw-r--r--usr/src/uts/common/inet/squeue.c100
-rw-r--r--usr/src/uts/common/inet/tcp.h10
-rw-r--r--usr/src/uts/common/inet/tcp/tcp.c20
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_bind.c224
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_input.c4
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_opt_data.c86
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_socket.c11
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_time_wait.c668
-rw-r--r--usr/src/uts/common/inet/tcp/tcp_tunables.c6
-rw-r--r--usr/src/uts/common/inet/tcp_impl.h103
-rw-r--r--usr/src/uts/common/inet/udp/udp.c165
-rw-r--r--usr/src/uts/common/inet/udp/udp_opt_data.c4
-rw-r--r--usr/src/uts/common/inet/udp_impl.h7
-rw-r--r--usr/src/uts/common/io/aggr/aggr_port.c8
-rw-r--r--usr/src/uts/common/io/aggr/aggr_recv.c36
-rw-r--r--usr/src/uts/common/io/axf/ax88172reg.h163
-rw-r--r--usr/src/uts/common/io/axf/axf_usbgem.c1539
-rw-r--r--usr/src/uts/common/io/cons.c16
-rw-r--r--usr/src/uts/common/io/devpoll.c45
-rw-r--r--usr/src/uts/common/io/dld/dld_drv.c9
-rw-r--r--usr/src/uts/common/io/dld/dld_proto.c87
-rw-r--r--usr/src/uts/common/io/dld/dld_str.c104
-rw-r--r--usr/src/uts/common/io/dls/dls.c128
-rw-r--r--usr/src/uts/common/io/dls/dls_link.c72
-rw-r--r--usr/src/uts/common/io/dls/dls_mgmt.c259
-rw-r--r--usr/src/uts/common/io/dls/dls_stat.c172
-rw-r--r--usr/src/uts/common/io/dr_sas/THIRDPARTYLICENSE32
-rw-r--r--usr/src/uts/common/io/dr_sas/THIRDPARTYLICENSE.descrip1
-rw-r--r--usr/src/uts/common/io/dr_sas/dr_sas.c5506
-rw-r--r--usr/src/uts/common/io/dr_sas/dr_sas.conf15
-rw-r--r--usr/src/uts/common/io/dr_sas/dr_sas.h1766
-rw-r--r--usr/src/uts/common/io/dr_sas/dr_sas_list.h212
-rw-r--r--usr/src/uts/common/io/fibre-channel/impl/fctl.c6
-rw-r--r--usr/src/uts/common/io/gsqueue/gsqueue.c612
-rw-r--r--usr/src/uts/common/io/i40e/core/README410
-rw-r--r--usr/src/uts/common/io/i40e/core/THIRDPARTYLICENSE29
-rw-r--r--usr/src/uts/common/io/i40e/core/THIRDPARTYLICENSE.descrip1
-rw-r--r--usr/src/uts/common/io/i40e/core/i40e_adminq.c1101
-rw-r--r--usr/src/uts/common/io/i40e/core/i40e_adminq.h125
-rw-r--r--usr/src/uts/common/io/i40e/core/i40e_adminq_cmd.h2424
-rw-r--r--usr/src/uts/common/io/i40e/core/i40e_alloc.h66
-rw-r--r--usr/src/uts/common/io/i40e/core/i40e_common.c5708
-rw-r--r--usr/src/uts/common/io/i40e/core/i40e_devids.h68
-rw-r--r--usr/src/uts/common/io/i40e/core/i40e_hmc.c373
-rw-r--r--usr/src/uts/common/io/i40e/core/i40e_hmc.h246
-rw-r--r--usr/src/uts/common/io/i40e/core/i40e_lan_hmc.c1412
-rw-r--r--usr/src/uts/common/io/i40e/core/i40e_lan_hmc.h201
-rw-r--r--usr/src/uts/common/io/i40e/core/i40e_nvm.c712
-rw-r--r--usr/src/uts/common/io/i40e/core/i40e_prototype.h478
-rw-r--r--usr/src/uts/common/io/i40e/core/i40e_register.h5317
-rw-r--r--usr/src/uts/common/io/i40e/core/i40e_status.h108
-rw-r--r--usr/src/uts/common/io/i40e/core/i40e_type.h1581
-rw-r--r--usr/src/uts/common/io/i40e/core/i40e_virtchnl.h378
-rw-r--r--usr/src/uts/common/io/i40e/i40e.conf19
-rw-r--r--usr/src/uts/common/io/i40e/i40e_gld.c1097
-rw-r--r--usr/src/uts/common/io/i40e/i40e_intr.c757
-rw-r--r--usr/src/uts/common/io/i40e/i40e_main.c2883
-rw-r--r--usr/src/uts/common/io/i40e/i40e_osdep.c236
-rw-r--r--usr/src/uts/common/io/i40e/i40e_osdep.h201
-rw-r--r--usr/src/uts/common/io/i40e/i40e_stats.c1310
-rw-r--r--usr/src/uts/common/io/i40e/i40e_sw.h974
-rw-r--r--usr/src/uts/common/io/i40e/i40e_transceiver.c2266
-rw-r--r--usr/src/uts/common/io/i40e/i40e_xregs.h53
-rw-r--r--usr/src/uts/common/io/inotify.c1504
-rw-r--r--usr/src/uts/common/io/inotify.conf16
-rw-r--r--usr/src/uts/common/io/ixgbe/ixgbe_main.c8
-rw-r--r--usr/src/uts/common/io/ksocket/ksocket.c14
-rw-r--r--usr/src/uts/common/io/ksocket/ksocket_impl.h6
-rw-r--r--usr/src/uts/common/io/ksyms.c9
-rw-r--r--usr/src/uts/common/io/mac/mac.c3
-rw-r--r--usr/src/uts/common/io/mac/mac_client.c68
-rw-r--r--usr/src/uts/common/io/mac/mac_datapath_setup.c12
-rw-r--r--usr/src/uts/common/io/mac/mac_protect.c3
-rw-r--r--usr/src/uts/common/io/mac/mac_provider.c8
-rw-r--r--usr/src/uts/common/io/mac/mac_sched.c10
-rw-r--r--usr/src/uts/common/io/mac/mac_stat.c9
-rw-r--r--usr/src/uts/common/io/mem.c13
-rw-r--r--usr/src/uts/common/io/mr_sas/mr_sas.conf8
-rw-r--r--usr/src/uts/common/io/nfp/THIRDPARTYLICENSE19
-rw-r--r--usr/src/uts/common/io/nfp/THIRDPARTYLICENSE.descrip1
-rw-r--r--usr/src/uts/common/io/nfp/autoversion.h21
-rw-r--r--usr/src/uts/common/io/nfp/drvlist.c19
-rw-r--r--usr/src/uts/common/io/nfp/hostif.c1192
-rw-r--r--usr/src/uts/common/io/nfp/i21285.c310
-rw-r--r--usr/src/uts/common/io/nfp/i21285.h43
-rw-r--r--usr/src/uts/common/io/nfp/i21555.c423
-rw-r--r--usr/src/uts/common/io/nfp/i21555.h51
-rw-r--r--usr/src/uts/common/io/nfp/i21555d.c28
-rw-r--r--usr/src/uts/common/io/nfp/nfdev-common.h141
-rw-r--r--usr/src/uts/common/io/nfp/nfdev-solaris.h37
-rw-r--r--usr/src/uts/common/io/nfp/nfp.h113
-rw-r--r--usr/src/uts/common/io/nfp/nfp_cmd.h68
-rw-r--r--usr/src/uts/common/io/nfp/nfp_common.h68
-rw-r--r--usr/src/uts/common/io/nfp/nfp_error.h48
-rw-r--r--usr/src/uts/common/io/nfp/nfp_hostif.h54
-rw-r--r--usr/src/uts/common/io/nfp/nfp_ifvers.c51
-rw-r--r--usr/src/uts/common/io/nfp/nfp_osif.h105
-rw-r--r--usr/src/uts/common/io/nfp/nfpci.h171
-rw-r--r--usr/src/uts/common/io/nfp/osif.c184
-rw-r--r--usr/src/uts/common/io/overlay/overlay.c2184
-rw-r--r--usr/src/uts/common/io/overlay/overlay.conf16
-rw-r--r--usr/src/uts/common/io/overlay/overlay.mapfile46
-rw-r--r--usr/src/uts/common/io/overlay/overlay_fm.c82
-rw-r--r--usr/src/uts/common/io/overlay/overlay_mux.c354
-rw-r--r--usr/src/uts/common/io/overlay/overlay_plugin.c281
-rw-r--r--usr/src/uts/common/io/overlay/overlay_prop.c122
-rw-r--r--usr/src/uts/common/io/overlay/overlay_target.c1651
-rw-r--r--usr/src/uts/common/io/overlay/plugins/overlay_vxlan.c372
-rw-r--r--usr/src/uts/common/io/physmem.c8
-rw-r--r--usr/src/uts/common/io/pseudo.conf9
-rw-r--r--usr/src/uts/common/io/pseudonex.c26
-rw-r--r--usr/src/uts/common/io/ptm.c47
-rw-r--r--usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas.c4
-rw-r--r--usr/src/uts/common/io/scsi/targets/sd.c79
-rw-r--r--usr/src/uts/common/io/signalfd.c488
-rw-r--r--usr/src/uts/common/io/stream.c10
-rw-r--r--usr/src/uts/common/io/udmf/dm9601reg.h348
-rw-r--r--usr/src/uts/common/io/udmf/udmf_usbgem.c1036
-rw-r--r--usr/src/uts/common/io/upf/adm8511reg.h205
-rw-r--r--usr/src/uts/common/io/upf/upf_usbgem.c1213
-rw-r--r--usr/src/uts/common/io/urf/rtl8150reg.h218
-rw-r--r--usr/src/uts/common/io/urf/urf_usbgem.c1039
-rw-r--r--usr/src/uts/common/io/usbgem/usbgem.c6389
-rw-r--r--usr/src/uts/common/io/usbgem/usbgem.h428
-rw-r--r--usr/src/uts/common/io/usbgem/usbgem_mii.h242
-rw-r--r--usr/src/uts/common/io/vioif/vioif.c120
-rw-r--r--usr/src/uts/common/io/vnd/frameio.c464
-rw-r--r--usr/src/uts/common/io/vnd/vnd.c5800
-rw-r--r--usr/src/uts/common/io/vnd/vnd.conf16
-rw-r--r--usr/src/uts/common/io/vnic/vnic_dev.c87
-rw-r--r--usr/src/uts/common/io/vscan/vscan_svc.c4
-rw-r--r--usr/src/uts/common/io/zfd.c1154
-rw-r--r--usr/src/uts/common/mapfiles/README68
-rw-r--r--usr/src/uts/common/mapfiles/ddi.mapfile190
-rw-r--r--usr/src/uts/common/mapfiles/dtrace.mapfile.awk34
-rw-r--r--usr/src/uts/common/mapfiles/kernel.mapfile41
-rw-r--r--usr/src/uts/common/mapfiles/mac.mapfile55
-rw-r--r--usr/src/uts/common/mapfiles/random.mapfile37
-rw-r--r--usr/src/uts/common/netinet/in.h7
-rw-r--r--usr/src/uts/common/netinet/udp.h14
-rw-r--r--usr/src/uts/common/os/brand.c182
-rw-r--r--usr/src/uts/common/os/clock_highres.c59
-rw-r--r--usr/src/uts/common/os/contract.c6
-rw-r--r--usr/src/uts/common/os/cred.c8
-rw-r--r--usr/src/uts/common/os/ddi_intr_irm.c2
-rw-r--r--usr/src/uts/common/os/dumpsubr.c24
-rw-r--r--usr/src/uts/common/os/exec.c119
-rw-r--r--usr/src/uts/common/os/exit.c198
-rw-r--r--usr/src/uts/common/os/fio.c3
-rw-r--r--usr/src/uts/common/os/fork.c50
-rw-r--r--usr/src/uts/common/os/grow.c35
-rw-r--r--usr/src/uts/common/os/ipc.c26
-rw-r--r--usr/src/uts/common/os/kmem.c318
-rw-r--r--usr/src/uts/common/os/logsubr.c4
-rw-r--r--usr/src/uts/common/os/lwp.c129
-rw-r--r--usr/src/uts/common/os/main.c14
-rw-r--r--usr/src/uts/common/os/mmapobj.c13
-rw-r--r--usr/src/uts/common/os/pid.c27
-rw-r--r--usr/src/uts/common/os/policy.c34
-rw-r--r--usr/src/uts/common/os/priv_defs8
-rw-r--r--usr/src/uts/common/os/project.c41
-rw-r--r--usr/src/uts/common/os/sched.c15
-rw-r--r--usr/src/uts/common/os/shm.c41
-rw-r--r--usr/src/uts/common/os/sig.c93
-rw-r--r--usr/src/uts/common/os/smb_subr.c8
-rw-r--r--usr/src/uts/common/os/streamio.c48
-rw-r--r--usr/src/uts/common/os/sysent.c29
-rw-r--r--usr/src/uts/common/os/vmem.c2
-rw-r--r--usr/src/uts/common/os/zone.c727
-rw-r--r--usr/src/uts/common/refhash/refhash.c (renamed from usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas_hash.c)6
-rw-r--r--usr/src/uts/common/sys/Makefile17
-rw-r--r--usr/src/uts/common/sys/aggr_impl.h3
-rw-r--r--usr/src/uts/common/sys/auxv.h37
-rw-r--r--usr/src/uts/common/sys/auxv_386.h4
-rw-r--r--usr/src/uts/common/sys/brand.h95
-rw-r--r--usr/src/uts/common/sys/buf.h8
-rw-r--r--usr/src/uts/common/sys/contract/process.h6
-rw-r--r--usr/src/uts/common/sys/cpucaps.h5
-rw-r--r--usr/src/uts/common/sys/cpucaps_impl.h6
-rw-r--r--usr/src/uts/common/sys/cred.h1
-rw-r--r--usr/src/uts/common/sys/ctf_api.h103
-rw-r--r--usr/src/uts/common/sys/dktp/dadk.h2
-rw-r--r--usr/src/uts/common/sys/dld.h7
-rw-r--r--usr/src/uts/common/sys/dld_impl.h5
-rw-r--r--usr/src/uts/common/sys/dld_ioc.h3
-rw-r--r--usr/src/uts/common/sys/dlpi.h12
-rw-r--r--usr/src/uts/common/sys/dls.h12
-rw-r--r--usr/src/uts/common/sys/dls_impl.h11
-rw-r--r--usr/src/uts/common/sys/dls_mgmt.h9
-rw-r--r--usr/src/uts/common/sys/elf.h46
-rw-r--r--usr/src/uts/common/sys/exec.h26
-rw-r--r--usr/src/uts/common/sys/frameio.h107
-rw-r--r--usr/src/uts/common/sys/fs/hyprlofs.h91
-rw-r--r--usr/src/uts/common/sys/fs/hyprlofs_info.h174
-rw-r--r--usr/src/uts/common/sys/fs/sdev_impl.h61
-rw-r--r--usr/src/uts/common/sys/fs/sdev_plugin.h106
-rw-r--r--usr/src/uts/common/sys/fs/tmp.h45
-rw-r--r--usr/src/uts/common/sys/fx.h10
-rw-r--r--usr/src/uts/common/sys/gsqueue.h65
-rw-r--r--usr/src/uts/common/sys/id_space.h5
-rw-r--r--usr/src/uts/common/sys/inotify.h153
-rw-r--r--usr/src/uts/common/sys/ipc_impl.h2
-rw-r--r--usr/src/uts/common/sys/iso/signal_iso.h3
-rw-r--r--usr/src/uts/common/sys/klwp.h16
-rw-r--r--usr/src/uts/common/sys/kmem_impl.h1
-rw-r--r--usr/src/uts/common/sys/ksocket.h5
-rw-r--r--usr/src/uts/common/sys/limits.h32
-rw-r--r--usr/src/uts/common/sys/mac.h12
-rw-r--r--usr/src/uts/common/sys/mac_client.h3
-rw-r--r--usr/src/uts/common/sys/mac_client_impl.h6
-rw-r--r--usr/src/uts/common/sys/mac_client_priv.h3
-rw-r--r--usr/src/uts/common/sys/mac_impl.h7
-rw-r--r--usr/src/uts/common/sys/mac_provider.h4
-rw-r--r--usr/src/uts/common/sys/mman.h1
-rw-r--r--usr/src/uts/common/sys/mntent.h2
-rw-r--r--usr/src/uts/common/sys/netconfig.h3
-rw-r--r--usr/src/uts/common/sys/neti.h2
-rw-r--r--usr/src/uts/common/sys/netstack.h3
-rw-r--r--usr/src/uts/common/sys/overlay.h96
-rw-r--r--usr/src/uts/common/sys/overlay_common.h65
-rw-r--r--usr/src/uts/common/sys/overlay_impl.h205
-rw-r--r--usr/src/uts/common/sys/overlay_plugin.h324
-rw-r--r--usr/src/uts/common/sys/overlay_target.h292
-rw-r--r--usr/src/uts/common/sys/param.h2
-rw-r--r--usr/src/uts/common/sys/policy.h2
-rw-r--r--usr/src/uts/common/sys/proc.h14
-rw-r--r--usr/src/uts/common/sys/procfs.h2
-rw-r--r--usr/src/uts/common/sys/ptms.h19
-rw-r--r--usr/src/uts/common/sys/refhash.h (renamed from usr/src/uts/common/sys/scsi/adapters/mpt_sas/mptsas_hash.h)8
-rw-r--r--usr/src/uts/common/sys/resource.h2
-rw-r--r--usr/src/uts/common/sys/rt.h13
-rw-r--r--usr/src/uts/common/sys/scsi/adapters/mpt_sas/mptsas_var.h4
-rw-r--r--usr/src/uts/common/sys/shm.h5
-rw-r--r--usr/src/uts/common/sys/shm_impl.h17
-rw-r--r--usr/src/uts/common/sys/signal.h5
-rw-r--r--usr/src/uts/common/sys/signalfd.h8
-rw-r--r--usr/src/uts/common/sys/socket.h10
-rw-r--r--usr/src/uts/common/sys/socketvar.h42
-rw-r--r--usr/src/uts/common/sys/sockfilter.h10
-rw-r--r--usr/src/uts/common/sys/squeue.h14
-rw-r--r--usr/src/uts/common/sys/squeue_impl.h2
-rw-r--r--usr/src/uts/common/sys/stream.h8
-rw-r--r--usr/src/uts/common/sys/sysevent.h3
-rw-r--r--usr/src/uts/common/sys/sysevent/datalink.h54
-rw-r--r--usr/src/uts/common/sys/sysevent/eventdefs.h4
-rw-r--r--usr/src/uts/common/sys/systrace.h13
-rw-r--r--usr/src/uts/common/sys/termios.h18
-rw-r--r--usr/src/uts/common/sys/thread.h15
-rw-r--r--usr/src/uts/common/sys/uadmin.h3
-rw-r--r--usr/src/uts/common/sys/uio.h12
-rw-r--r--usr/src/uts/common/sys/user.h7
-rw-r--r--usr/src/uts/common/sys/vm_usage.h5
-rw-r--r--usr/src/uts/common/sys/vmsystm.h5
-rw-r--r--usr/src/uts/common/sys/vnd.h141
-rw-r--r--usr/src/uts/common/sys/vnd_errno.h72
-rw-r--r--usr/src/uts/common/sys/vnic_impl.h3
-rw-r--r--usr/src/uts/common/sys/vnode.h15
-rw-r--r--usr/src/uts/common/sys/vxlan.h47
-rw-r--r--usr/src/uts/common/sys/zfd.h78
-rw-r--r--usr/src/uts/common/sys/zone.h119
-rw-r--r--usr/src/uts/common/syscall/brandsys.c8
-rw-r--r--usr/src/uts/common/syscall/fcntl.c3
-rw-r--r--usr/src/uts/common/syscall/memcntl.c8
-rw-r--r--usr/src/uts/common/syscall/open.c8
-rw-r--r--usr/src/uts/common/syscall/poll.c328
-rw-r--r--usr/src/uts/common/syscall/rusagesys.c14
-rw-r--r--usr/src/uts/common/syscall/rw.c222
-rw-r--r--usr/src/uts/common/syscall/sendfile.c19
-rw-r--r--usr/src/uts/common/syscall/stat.c2
-rw-r--r--usr/src/uts/common/syscall/sysconfig.c24
-rw-r--r--usr/src/uts/common/syscall/uadmin.c6
-rw-r--r--usr/src/uts/common/vm/hat.h10
-rw-r--r--usr/src/uts/common/vm/seg_kmem.c3
-rw-r--r--usr/src/uts/common/vm/seg_kmem.h3
-rw-r--r--usr/src/uts/common/vm/seg_umap.c466
-rw-r--r--usr/src/uts/common/vm/seg_umap.h43
-rw-r--r--usr/src/uts/common/vm/seg_vn.c11
-rw-r--r--usr/src/uts/common/vm/vm_anon.c11
-rw-r--r--usr/src/uts/common/vm/vm_as.c20
-rw-r--r--usr/src/uts/common/vm/vm_pvn.c28
-rw-r--r--usr/src/uts/common/vm/vm_swap.c27
-rw-r--r--usr/src/uts/common/vm/vm_usage.c387
-rw-r--r--usr/src/uts/i86pc/Makefile.files3
-rw-r--r--usr/src/uts/i86pc/dboot/dboot_startkern.c41
-rw-r--r--usr/src/uts/i86pc/io/ppm/acpisleep.c24
-rw-r--r--usr/src/uts/i86pc/io/psm/psm_common.c3
-rw-r--r--usr/src/uts/i86pc/ml/comm_page.s88
-rw-r--r--usr/src/uts/i86pc/ml/offsets.in6
-rw-r--r--usr/src/uts/i86pc/ml/syscall_asm.s30
-rw-r--r--usr/src/uts/i86pc/ml/syscall_asm_amd64.s188
-rw-r--r--usr/src/uts/i86pc/os/comm_page_util.c62
-rw-r--r--usr/src/uts/i86pc/os/cpr_impl.c14
-rw-r--r--usr/src/uts/i86pc/os/cpuid.c46
-rw-r--r--usr/src/uts/i86pc/os/ibft.c6
-rw-r--r--usr/src/uts/i86pc/os/lgrpplat.c14
-rw-r--r--usr/src/uts/i86pc/os/mlsetup.c10
-rw-r--r--usr/src/uts/i86pc/os/mp_startup.c30
-rw-r--r--usr/src/uts/i86pc/os/timestamp.c161
-rw-r--r--usr/src/uts/i86pc/os/trap.c33
-rw-r--r--usr/src/uts/i86pc/sys/acpidev.h3
-rw-r--r--usr/src/uts/i86pc/sys/apic.h2
-rw-r--r--usr/src/uts/i86pc/sys/comm_page.h102
-rw-r--r--usr/src/uts/i86pc/sys/machparam.h5
-rw-r--r--usr/src/uts/i86pc/sys/tsc.h28
-rw-r--r--usr/src/uts/i86pc/vm/hat_i86.c79
-rw-r--r--usr/src/uts/i86pc/vm/vm_machdep.c7
-rw-r--r--usr/src/uts/i86xpv/os/xpv_timestamp.c6
-rw-r--r--usr/src/uts/intel/Makefile6
-rw-r--r--usr/src/uts/intel/Makefile.files91
-rw-r--r--usr/src/uts/intel/Makefile.intel39
-rw-r--r--usr/src/uts/intel/Makefile.rules7
-rw-r--r--usr/src/uts/intel/axf/Makefile73
-rw-r--r--usr/src/uts/intel/brand/common/brand_asm.h2
-rw-r--r--usr/src/uts/intel/brand/lx/lx_archdep.c1714
-rw-r--r--usr/src/uts/intel/core_pcbe/Makefile4
-rw-r--r--usr/src/uts/intel/datafilt/Makefile74
-rw-r--r--usr/src/uts/intel/dev/Makefile1
-rw-r--r--usr/src/uts/intel/dr_sas/Makefile90
-rw-r--r--usr/src/uts/intel/dtrace/fasttrap_isa.c15
-rw-r--r--usr/src/uts/intel/e1000g/Makefile2
-rw-r--r--usr/src/uts/intel/elfexec/Makefile6
-rw-r--r--usr/src/uts/intel/genassym/Makefile85
-rw-r--r--usr/src/uts/intel/genassym/offsets.in43
-rw-r--r--usr/src/uts/intel/gsqueue/Makefile49
-rw-r--r--usr/src/uts/intel/hyprlofs/Makefile (renamed from usr/src/cmd/ssh/scp/Makefile)71
-rw-r--r--usr/src/uts/intel/i40e/Makefile62
-rw-r--r--usr/src/uts/intel/ia32/ml/i86_subr.s17
-rw-r--r--usr/src/uts/intel/ia32/ml/modstubs.s12
-rw-r--r--usr/src/uts/intel/ia32/ml/swtch.s104
-rw-r--r--usr/src/uts/intel/ia32/os/archdep.c26
-rw-r--r--usr/src/uts/intel/ia32/os/desctbls.c28
-rw-r--r--usr/src/uts/intel/ia32/os/sendsig.c78
-rw-r--r--usr/src/uts/intel/ia32/syscall/getcontext.c72
-rw-r--r--usr/src/uts/intel/igb/Makefile2
-rw-r--r--usr/src/uts/intel/inotify/Makefile70
-rw-r--r--usr/src/uts/intel/io/acpica/changes.txt585
-rw-r--r--usr/src/uts/intel/io/acpica/debugger/dbcmds.c473
-rw-r--r--usr/src/uts/intel/io/acpica/debugger/dbdisply.c34
-rw-r--r--usr/src/uts/intel/io/acpica/debugger/dbexec.c2
-rw-r--r--usr/src/uts/intel/io/acpica/debugger/dbfileio.c7
-rw-r--r--usr/src/uts/intel/io/acpica/debugger/dbhistry.c2
-rw-r--r--usr/src/uts/intel/io/acpica/debugger/dbinput.c14
-rw-r--r--usr/src/uts/intel/io/acpica/debugger/dbmethod.c2
-rw-r--r--usr/src/uts/intel/io/acpica/debugger/dbnames.c2
-rw-r--r--usr/src/uts/intel/io/acpica/debugger/dbstats.c30
-rw-r--r--usr/src/uts/intel/io/acpica/debugger/dbutils.c9
-rw-r--r--usr/src/uts/intel/io/acpica/debugger/dbxface.c2
-rw-r--r--usr/src/uts/intel/io/acpica/disassembler/dmbuffer.c10
-rw-r--r--usr/src/uts/intel/io/acpica/disassembler/dmnames.c2
-rw-r--r--usr/src/uts/intel/io/acpica/disassembler/dmobject.c2
-rw-r--r--usr/src/uts/intel/io/acpica/disassembler/dmopcode.c75
-rw-r--r--usr/src/uts/intel/io/acpica/disassembler/dmresrc.c15
-rw-r--r--usr/src/uts/intel/io/acpica/disassembler/dmresrcl.c3
-rw-r--r--usr/src/uts/intel/io/acpica/disassembler/dmresrcl2.c700
-rw-r--r--usr/src/uts/intel/io/acpica/disassembler/dmresrcs.c51
-rw-r--r--usr/src/uts/intel/io/acpica/disassembler/dmutils.c52
-rw-r--r--usr/src/uts/intel/io/acpica/disassembler/dmwalk.c5
-rw-r--r--usr/src/uts/intel/io/acpica/dispatcher/dsargs.c12
-rw-r--r--usr/src/uts/intel/io/acpica/dispatcher/dscontrol.c2
-rw-r--r--usr/src/uts/intel/io/acpica/dispatcher/dsfield.c80
-rw-r--r--usr/src/uts/intel/io/acpica/dispatcher/dsinit.c2
-rw-r--r--usr/src/uts/intel/io/acpica/dispatcher/dsmethod.c2
-rw-r--r--usr/src/uts/intel/io/acpica/dispatcher/dsmthdat.c2
-rw-r--r--usr/src/uts/intel/io/acpica/dispatcher/dsobject.c2
-rw-r--r--usr/src/uts/intel/io/acpica/dispatcher/dsopcode.c2
-rw-r--r--usr/src/uts/intel/io/acpica/dispatcher/dsutils.c2
-rw-r--r--usr/src/uts/intel/io/acpica/dispatcher/dswexec.c2
-rw-r--r--usr/src/uts/intel/io/acpica/dispatcher/dswload.c2
-rw-r--r--usr/src/uts/intel/io/acpica/dispatcher/dswload2.c2
-rw-r--r--usr/src/uts/intel/io/acpica/dispatcher/dswscope.c2
-rw-r--r--usr/src/uts/intel/io/acpica/dispatcher/dswstate.c2
-rw-r--r--usr/src/uts/intel/io/acpica/events/evevent.c20
-rw-r--r--usr/src/uts/intel/io/acpica/events/evglock.c12
-rw-r--r--usr/src/uts/intel/io/acpica/events/evgpe.c5
-rw-r--r--usr/src/uts/intel/io/acpica/events/evgpeblk.c5
-rw-r--r--usr/src/uts/intel/io/acpica/events/evgpeinit.c5
-rw-r--r--usr/src/uts/intel/io/acpica/events/evgpeutil.c5
-rw-r--r--usr/src/uts/intel/io/acpica/events/evmisc.c186
-rw-r--r--usr/src/uts/intel/io/acpica/events/evregion.c32
-rw-r--r--usr/src/uts/intel/io/acpica/events/evrgnini.c2
-rw-r--r--usr/src/uts/intel/io/acpica/events/evsci.c6
-rw-r--r--usr/src/uts/intel/io/acpica/events/evxface.c686
-rw-r--r--usr/src/uts/intel/io/acpica/events/evxfevnt.c5
-rw-r--r--usr/src/uts/intel/io/acpica/events/evxfgpe.c5
-rw-r--r--usr/src/uts/intel/io/acpica/events/evxfregn.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exconfig.c4
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exconvrt.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/excreate.c30
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exdebug.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exdump.c37
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exfield.c28
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exfldio.c28
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exmisc.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exmutex.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exnames.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exoparg1.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exoparg2.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exoparg3.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exoparg6.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exprep.c29
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exregion.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exresnte.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exresolv.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exresop.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exstore.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exstoren.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exstorob.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exsystem.c2
-rw-r--r--usr/src/uts/intel/io/acpica/executer/exutils.c32
-rw-r--r--usr/src/uts/intel/io/acpica/hardware/hwacpi.c5
-rw-r--r--usr/src/uts/intel/io/acpica/hardware/hwesleep.c277
-rw-r--r--usr/src/uts/intel/io/acpica/hardware/hwgpe.c5
-rw-r--r--usr/src/uts/intel/io/acpica/hardware/hwpci.c2
-rw-r--r--usr/src/uts/intel/io/acpica/hardware/hwregs.c16
-rw-r--r--usr/src/uts/intel/io/acpica/hardware/hwsleep.c395
-rw-r--r--usr/src/uts/intel/io/acpica/hardware/hwtimer.c4
-rw-r--r--usr/src/uts/intel/io/acpica/hardware/hwvalid.c4
-rw-r--r--usr/src/uts/intel/io/acpica/hardware/hwxface.c57
-rw-r--r--usr/src/uts/intel/io/acpica/hardware/hwxfsleep.c478
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsaccess.c2
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsalloc.c2
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsdump.c18
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsdumpdv.c4
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nseval.c2
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsinit.c2
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsload.c2
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsnames.c2
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsobject.c2
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsparse.c2
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nspredef.c47
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsrepair.c167
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsrepair2.c22
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nssearch.c2
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsutils.c4
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nswalk.c2
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsxfeval.c2
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsxfname.c2
-rw-r--r--usr/src/uts/intel/io/acpica/namespace/nsxfobj.c2
-rw-r--r--usr/src/uts/intel/io/acpica/osl.c17
-rw-r--r--usr/src/uts/intel/io/acpica/parser/psargs.c147
-rw-r--r--usr/src/uts/intel/io/acpica/parser/psloop.c2
-rw-r--r--usr/src/uts/intel/io/acpica/parser/psopcode.c13
-rw-r--r--usr/src/uts/intel/io/acpica/parser/psparse.c2
-rw-r--r--usr/src/uts/intel/io/acpica/parser/psscope.c2
-rw-r--r--usr/src/uts/intel/io/acpica/parser/pstree.c9
-rw-r--r--usr/src/uts/intel/io/acpica/parser/psutils.c2
-rw-r--r--usr/src/uts/intel/io/acpica/parser/pswalk.c2
-rw-r--r--usr/src/uts/intel/io/acpica/parser/psxface.c2
-rw-r--r--usr/src/uts/intel/io/acpica/resources/rsaddr.c2
-rw-r--r--usr/src/uts/intel/io/acpica/resources/rscalc.c69
-rw-r--r--usr/src/uts/intel/io/acpica/resources/rscreate.c75
-rw-r--r--usr/src/uts/intel/io/acpica/resources/rsdump.c158
-rw-r--r--usr/src/uts/intel/io/acpica/resources/rsinfo.c61
-rw-r--r--usr/src/uts/intel/io/acpica/resources/rsio.c2
-rw-r--r--usr/src/uts/intel/io/acpica/resources/rsirq.c34
-rw-r--r--usr/src/uts/intel/io/acpica/resources/rslist.c77
-rw-r--r--usr/src/uts/intel/io/acpica/resources/rsmemory.c2
-rw-r--r--usr/src/uts/intel/io/acpica/resources/rsmisc.c268
-rw-r--r--usr/src/uts/intel/io/acpica/resources/rsserial.c425
-rw-r--r--usr/src/uts/intel/io/acpica/resources/rsutils.c61
-rw-r--r--usr/src/uts/intel/io/acpica/resources/rsxface.c58
-rw-r--r--usr/src/uts/intel/io/acpica/tables/tbfadt.c54
-rw-r--r--usr/src/uts/intel/io/acpica/tables/tbfind.c2
-rw-r--r--usr/src/uts/intel/io/acpica/tables/tbinstal.c121
-rw-r--r--usr/src/uts/intel/io/acpica/tables/tbutils.c103
-rw-r--r--usr/src/uts/intel/io/acpica/tables/tbxface.c2
-rw-r--r--usr/src/uts/intel/io/acpica/tables/tbxfroot.c2
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utaddress.c322
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utalloc.c2
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utcache.c2
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utclib.c2
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utcopy.c2
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utdebug.c2
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utdecode.c41
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utdelete.c33
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/uteval.c2
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utglobal.c22
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utids.c2
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utinit.c46
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utlock.c2
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utmath.c2
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utmisc.c40
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utmutex.c11
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utobject.c2
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utosi.c2
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utresrc.c295
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utstate.c2
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/uttrack.c8
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utxface.c54
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utxferror.c2
-rw-r--r--usr/src/uts/intel/io/acpica/utilities/utxfmutex.c213
-rw-r--r--usr/src/uts/intel/io/dktp/dcdev/dadk.c48
-rw-r--r--usr/src/uts/intel/io/ipmi/ipmivars.h1
-rw-r--r--usr/src/uts/intel/io/vmxnet/buildNumber.h12
-rw-r--r--usr/src/uts/intel/io/vmxnet/includeCheck.h159
-rw-r--r--usr/src/uts/intel/io/vmxnet/net.h220
-rw-r--r--usr/src/uts/intel/io/vmxnet/net_sg.h84
-rw-r--r--usr/src/uts/intel/io/vmxnet/vm_basic_types.h1037
-rw-r--r--usr/src/uts/intel/io/vmxnet/vm_device_version.h246
-rw-r--r--usr/src/uts/intel/io/vmxnet/vmnet_def.h91
-rw-r--r--usr/src/uts/intel/io/vmxnet/vmxnet.c2438
-rw-r--r--usr/src/uts/intel/io/vmxnet/vmxnet.conf24
-rw-r--r--usr/src/uts/intel/io/vmxnet/vmxnet2_def.h436
-rw-r--r--usr/src/uts/intel/io/vmxnet/vmxnet_def.h184
-rw-r--r--usr/src/uts/intel/ipf/Makefile2
-rw-r--r--usr/src/uts/intel/ipf/ipf.global-objs.debug648
-rw-r--r--usr/src/uts/intel/ixgbe/Makefile2
-rw-r--r--usr/src/uts/intel/kdi/kdi_idt.c3
-rw-r--r--usr/src/uts/intel/lx_brand/Makefile111
-rw-r--r--usr/src/uts/intel/lx_brand/Makefile.rules97
-rw-r--r--usr/src/uts/intel/lx_cgroup/Makefile57
-rw-r--r--usr/src/uts/intel/lx_cgroup/Makefile.rules21
-rw-r--r--usr/src/uts/intel/lx_devfs/Makefile57
-rw-r--r--usr/src/uts/intel/lx_devfs/Makefile.rules21
-rw-r--r--usr/src/uts/intel/lx_netlink/Makefile77
-rw-r--r--usr/src/uts/intel/lx_proc/Makefile117
-rw-r--r--usr/src/uts/intel/lx_proc/Makefile.rules38
-rw-r--r--usr/src/uts/intel/lx_ptm/Makefile91
-rw-r--r--usr/src/uts/intel/lx_sysfs/Makefile66
-rw-r--r--usr/src/uts/intel/lx_sysfs/Makefile.rules21
-rw-r--r--usr/src/uts/intel/lx_systrace/Makefile80
-rw-r--r--usr/src/uts/intel/lxautofs/Makefile114
-rw-r--r--usr/src/uts/intel/lxautofs/Makefile.rules38
-rw-r--r--usr/src/uts/intel/lxprocfs/Makefile88
-rw-r--r--usr/src/uts/intel/nfp/Makefile82
-rw-r--r--usr/src/uts/intel/opteron_pcbe/Makefile2
-rw-r--r--usr/src/uts/intel/os/driver_aliases1
-rw-r--r--usr/src/uts/intel/os/name_to_major1
-rw-r--r--usr/src/uts/intel/overlay/Makefile54
-rw-r--r--usr/src/uts/intel/p123_pcbe/Makefile4
-rw-r--r--usr/src/uts/intel/p4_pcbe/Makefile2
-rw-r--r--usr/src/uts/intel/promif/prom_emul.c6
-rw-r--r--usr/src/uts/intel/sys/acpi/acapps.h13
-rw-r--r--usr/src/uts/intel/sys/acpi/accommon.h2
-rw-r--r--usr/src/uts/intel/sys/acpi/acconfig.h30
-rw-r--r--usr/src/uts/intel/sys/acpi/acdebug.h12
-rw-r--r--usr/src/uts/intel/sys/acpi/acdisasm.h202
-rw-r--r--usr/src/uts/intel/sys/acpi/acdispat.h2
-rw-r--r--usr/src/uts/intel/sys/acpi/acevents.h21
-rw-r--r--usr/src/uts/intel/sys/acpi/acexcep.h8
-rw-r--r--usr/src/uts/intel/sys/acpi/acglobal.h34
-rw-r--r--usr/src/uts/intel/sys/acpi/achware.h63
-rw-r--r--usr/src/uts/intel/sys/acpi/acinterp.h6
-rw-r--r--usr/src/uts/intel/sys/acpi/aclocal.h56
-rw-r--r--usr/src/uts/intel/sys/acpi/acmacros.h11
-rw-r--r--usr/src/uts/intel/sys/acpi/acnames.h15
-rw-r--r--usr/src/uts/intel/sys/acpi/acnamesp.h5
-rw-r--r--usr/src/uts/intel/sys/acpi/acobject.h13
-rw-r--r--usr/src/uts/intel/sys/acpi/acopcode.h6
-rw-r--r--usr/src/uts/intel/sys/acpi/acoutput.h3
-rw-r--r--usr/src/uts/intel/sys/acpi/acparser.h2
-rw-r--r--usr/src/uts/intel/sys/acpi/acpi.h2
-rw-r--r--usr/src/uts/intel/sys/acpi/acpiosxf.h12
-rw-r--r--usr/src/uts/intel/sys/acpi/acpixf.h201
-rw-r--r--usr/src/uts/intel/sys/acpi/acpredef.h40
-rw-r--r--usr/src/uts/intel/sys/acpi/acresrc.h117
-rw-r--r--usr/src/uts/intel/sys/acpi/acrestyp.h228
-rw-r--r--usr/src/uts/intel/sys/acpi/acstruct.h2
-rw-r--r--usr/src/uts/intel/sys/acpi/actables.h7
-rw-r--r--usr/src/uts/intel/sys/acpi/actbl.h32
-rw-r--r--usr/src/uts/intel/sys/acpi/actbl1.h87
-rw-r--r--usr/src/uts/intel/sys/acpi/actbl2.h2
-rw-r--r--usr/src/uts/intel/sys/acpi/actbl3.h650
-rw-r--r--usr/src/uts/intel/sys/acpi/actypes.h48
-rw-r--r--usr/src/uts/intel/sys/acpi/acutils.h50
-rw-r--r--usr/src/uts/intel/sys/acpi/amlcode.h31
-rw-r--r--usr/src/uts/intel/sys/acpi/amlresrc.h162
-rw-r--r--usr/src/uts/intel/sys/acpi/platform/accygwin.h2
-rw-r--r--usr/src/uts/intel/sys/acpi/platform/acefi.h2
-rw-r--r--usr/src/uts/intel/sys/acpi/platform/acenv.h26
-rw-r--r--usr/src/uts/intel/sys/acpi/platform/acfreebsd.h2
-rw-r--r--usr/src/uts/intel/sys/acpi/platform/acgcc.h2
-rw-r--r--usr/src/uts/intel/sys/acpi/platform/acintel.h2
-rw-r--r--usr/src/uts/intel/sys/acpi/platform/aclinux.h5
-rw-r--r--usr/src/uts/intel/sys/acpi/platform/acmsvc.h24
-rw-r--r--usr/src/uts/intel/sys/acpi/platform/acnetbsd.h2
-rw-r--r--usr/src/uts/intel/sys/acpi/platform/acos2.h2
-rw-r--r--usr/src/uts/intel/sys/acpi/platform/acwin.h2
-rw-r--r--usr/src/uts/intel/sys/acpi/platform/acwin64.h2
-rw-r--r--usr/src/uts/intel/sys/machbrand.h11
-rw-r--r--usr/src/uts/intel/sys/machlock.h5
-rw-r--r--usr/src/uts/intel/sys/segments.h9
-rw-r--r--usr/src/uts/intel/sys/ucontext.h15
-rw-r--r--usr/src/uts/intel/sys/x86_archext.h33
-rw-r--r--usr/src/uts/intel/udmf/Makefile74
-rw-r--r--usr/src/uts/intel/upf/Makefile74
-rw-r--r--usr/src/uts/intel/urf/Makefile74
-rw-r--r--usr/src/uts/intel/usbgem/Makefile84
-rw-r--r--usr/src/uts/intel/vmxnet/Makefile93
-rw-r--r--usr/src/uts/intel/vnd/Makefile56
-rw-r--r--usr/src/uts/intel/vxlan/Makefile51
-rw-r--r--usr/src/uts/intel/zfd/Makefile48
-rw-r--r--usr/src/uts/sparc/Makefile.sparc5
-rw-r--r--usr/src/uts/sparc/datafilt/Makefile73
-rw-r--r--usr/src/uts/sparc/inotify/Makefile70
-rw-r--r--usr/src/uts/sparc/ipf/ipf.global-objs.debug644
-rw-r--r--usr/src/uts/sparc/syscall/getcontext.c31
-rw-r--r--usr/src/uts/sparc/zfd/Makefile50
-rw-r--r--usr/src/uts/sun4/brand/common/brand_solaris.s2
-rw-r--r--usr/src/uts/sun4/ml/offsets.in2
-rw-r--r--usr/src/uts/sun4/os/machdep.c11
-rw-r--r--usr/src/uts/sun4u/io/pci/pcisch.c5
2306 files changed, 306042 insertions, 94730 deletions
diff --git a/usr/src/Makefile b/usr/src/Makefile
index b71c3485c8..6fd3995104 100644
--- a/usr/src/Makefile
+++ b/usr/src/Makefile
@@ -47,7 +47,7 @@ $(SPARC_BLD)psm: stand
SUBDIRS= $(COMMON_SUBDIRS) $($(MACH)_SUBDIRS)
-HDRSUBDIRS= uts head lib cmd
+HDRSUBDIRS= uts head lib cmd ucbhead
# UCB headers are bug-for-bug compatible and not checkable against the header
# standards.
@@ -57,7 +57,7 @@ CHKHDRSUBDIRS= head uts lib
#
# Headers that can be built in parallel
#
-PARALLEL_HEADERS = sysheaders userheaders libheaders cmdheaders
+PARALLEL_HEADERS = sysheaders userheaders libheaders ucbheaders cmdheaders
#
# Directories that can be built in parallel
@@ -112,7 +112,7 @@ all: mapfiles closedbins sgs .WAIT $(SUBDIRS) pkg
# packaging to be pulled from $(SRC) and $(CLOSED) and staged in
# $(ROOT)/licenses.
#
-install: install1 install2 _msg stage-licenses
+install: install1 install2 _msg
@cd msg; pwd; $(MAKE) _msg
@rm -rf "$(ROOT)/catalog"
@@ -172,13 +172,13 @@ closedbins: bldtools $(ROOTDIRS) FRC
.PARALLEL: $(PARALLEL_HEADERS) DUMMY
.PARALLEL: $(PARALLEL_DIRS) DUMMY
-$(SUBDIRS) head pkg: FRC
+$(SUBDIRS) head ucbhead pkg: FRC
@cd $@; pwd; $(MAKE) $(TARGET)
# librpcsvc has a dependency on headers installed by
# userheaders, hence the .WAIT before libheaders.
sgs: rootdirs .WAIT sysheaders userheaders .WAIT \
- libheaders cmdheaders
+ libheaders ucbheaders cmdheaders
#
# Top-level setup target to setup the development environment that includes
@@ -215,6 +215,9 @@ libheaders: bldtools
sysheaders: FRC
@cd uts; pwd; $(MAKE) install_h
+ucbheaders: FRC
+ @cd ucbhead; pwd; $(MAKE) install_h
+
cmdheaders: FRC
@cd cmd/fm; pwd; $(MAKE) install_h
@cd cmd/mdb; pwd; $(MAKE) install_h
diff --git a/usr/src/Makefile.lint b/usr/src/Makefile.lint
index 465c40d3a6..2ecdfced7f 100644
--- a/usr/src/Makefile.lint
+++ b/usr/src/Makefile.lint
@@ -194,6 +194,7 @@ COMMON_SUBDIRS = \
cmd/ls \
cmd/luxadm \
cmd/lvm \
+ cmd/machid \
cmd/mailwrapper \
cmd/makekey \
cmd/mdb \
@@ -279,7 +280,6 @@ COMMON_SUBDIRS = \
cmd/split \
cmd/srptadm \
cmd/srptsvc \
- cmd/ssh \
cmd/stat \
cmd/stmfadm \
cmd/stmfsvc \
@@ -460,7 +460,6 @@ COMMON_SUBDIRS = \
lib/pam_modules \
lib/passwdutil \
lib/pkcs11 \
- lib/print \
lib/raidcfg_plugins \
lib/scsi \
lib/smbsrv \
@@ -485,6 +484,7 @@ i386_SUBDIRS= \
cmd/biosdev \
cmd/rtc \
cmd/ucodeadm \
+ lib/brand/lx \
lib/cfgadm_plugins/sata \
lib/cfgadm_plugins/sbd \
lib/libfdisk
diff --git a/usr/src/Makefile.master b/usr/src/Makefile.master
index 95e5b5faec..036da7d7e9 100644
--- a/usr/src/Makefile.master
+++ b/usr/src/Makefile.master
@@ -163,6 +163,7 @@ JAVADOC= $(JAVA_ROOT)/bin/javadoc
RMIC= $(JAVA_ROOT)/bin/rmic
JAR= $(JAVA_ROOT)/bin/jar
CTFCONVERT= $(ONBLD_TOOLS)/bin/$(MACH)/ctfconvert
+CTFDIFF= $(ONBLD_TOOLS)/bin/$(MACH)/ctfdiff
CTFMERGE= $(ONBLD_TOOLS)/bin/$(MACH)/ctfmerge
CTFSTABS= $(ONBLD_TOOLS)/bin/$(MACH)/ctfstabs
CTFSTRIP= $(ONBLD_TOOLS)/bin/$(MACH)/ctfstrip
@@ -171,8 +172,8 @@ GENOFFSETS= $(ONBLD_TOOLS)/bin/genoffsets
XREF= $(ONBLD_TOOLS)/bin/xref
FIND= /usr/bin/find
PERL= /usr/bin/perl
-PERL_VERSION= 5.10.0
-PERL_PKGVERS= -510
+PERL_VERSION= 5.12
+PERL_PKGVERS= -512
PERL_ARCH = i86pc-solaris-64int
$(SPARC_BLD)PERL_ARCH = sun4-solaris-64int
PYTHON_26= /usr/bin/python2.6
@@ -242,7 +243,8 @@ INS.symlink= $(RM) $@; $(SYMLINK) $(INSLINKTARGET) $@
# rebuilds if the baked-in mtime != the mtime of the source file
# (rather than only if it's less than), thus when installing python
# files we must make certain to not adjust the mtime of the source
-# (.py) file.
+# (.py) file. As a part of this we also go through and change the #!
+# line in the python script to that of the actual python we are using.
#
INS.pyfile= $(INS.file); $(TOUCH) -r $< $@
@@ -424,9 +426,17 @@ sparcv9_COPTFLAG= -xO3
i386_COPTFLAG= -O
amd64_COPTFLAG= -xO3
+# This would normally be added by cw(1) but cannot be while we want to support
+# Both GCC 3.x and GCC 4.x
+$(__GNUC4)$(MACH)_COPTFLAG += -_gcc=-fno-inline-small-functions \
+ -_gcc=-fno-inline-functions-called-once
+$(__GNUC4)$(MACH64)_COPTFLAG += -_gcc=-fno-inline-small-functions \
+ -_gcc=-fno-inline-functions-called-once
+
COPTFLAG= $($(MACH)_COPTFLAG)
COPTFLAG64= $($(MACH64)_COPTFLAG)
+
# When -g is used, the compiler globalizes static objects
# (gives them a unique prefix). Disable that.
CNOGLOBAL= -W0,-noglobal
@@ -558,8 +568,7 @@ CPPFLAGS.first= # Please keep empty. Only lower makefiles should set this.
CPPFLAGS.master=$(DTEXTDOM) $(DTS_ERRNO) \
$(ENVCPPFLAGS1) $(ENVCPPFLAGS2) $(ENVCPPFLAGS3) $(ENVCPPFLAGS4) \
$(ADJUNCT_PROTO:%=-I%/usr/include)
-CPPFLAGS.native=$(ENVCPPFLAGS1) $(ENVCPPFLAGS2) $(ENVCPPFLAGS3) \
- $(ENVCPPFLAGS4) -I$(NATIVE_ADJUNCT)/include
+CPPFLAGS.native=-I$(NATIVE_ADJUNCT)/include
CPPFLAGS= $(CPPFLAGS.first) $(CPPFLAGS.master)
AS_CPPFLAGS= $(CPPFLAGS.first) $(CPPFLAGS.master)
JAVAFLAGS= -source 1.6 -target 1.6 -Xlint:deprecation,-options
@@ -911,6 +920,14 @@ CTFCVTFLAGS= -i -L VERSION
#
CTFMRGFLAGS=
+#
+# Make the transition between old and new CTF Tools. The new ctf tools
+# do not support stabs (eg. Sun Studio), so if GNUC is not set, we don't
+# honor them.
+#
+$(_GNUC)CTFCONVERT= $(ONBLD_TOOLS)/bin/$(MACH)/ctfconvert-altexec
+$(_GNUC)CTFMERGE= $(ONBLD_TOOLS)/bin/$(MACH)/ctfmerge-altexec
+
CTFCONVERT_O = $(CTFCONVERT) $(CTFCVTFLAGS) $@
ELFSIGN_O= $(TRUE)
@@ -1091,9 +1108,16 @@ PKGPUBLISHER_NONREDIST= on-extra
$(RM) $@;
$(SED) -e "s@TEXT_DOMAIN@\"$(TEXT_DOMAIN)\"@" $< > $@;
$(CHMOD) +x $@
+#
+# You might ask why we aren't using -i for sed here. That's because illumos
+# 1815 hasn't been fixed and sadly we didn't back out something that broke a lot
+# of working code in favor of GNU compatibility.
+#
+SED.py= $(SED) -e 's?^$(POUND_SIGN)!ON_PYTHON_26$$?$(POUND_SIGN)!$(PYTHON_26)?' \
+ -e 's?^$(POUND_SIGN)!ON_PYTHON$$?$(POUND_SIGN)!$(PYTHON)?'
.py:
- $(RM) $@; $(CAT) $< > $@; $(CHMOD) +x $@
+ $(RM) $@; $(SED.py) $< > $@; $(CHMOD) +x $@
.py.pyc:
$(RM) $@
diff --git a/usr/src/Makefile.master.64 b/usr/src/Makefile.master.64
index f671406fef..704a0411a4 100644
--- a/usr/src/Makefile.master.64
+++ b/usr/src/Makefile.master.64
@@ -21,6 +21,7 @@
#
# Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright 2014 Garrett D'Amore <garrett@damore.org>
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
#
# rebind basic build macros to 64-bit versions
@@ -67,7 +68,7 @@ REAL_CC= $(CW_CC_CMD:sh)
REAL_CCC= $(CW_CCC_CMD:sh)
BUILD.SO= $(CC) $(CFLAGS) -o $@ $(GSHARED) $(DYNFLAGS) \
- $(PICS) $(EXTPICS) -L $(ROOTLIBDIR64) $(LDLIBS)
+ $(PICS) $(EXTPICS) $(USDT_PICS) -L $(ROOTLIBDIR64) $(LDLIBS)
#
# ld(1) requires the -64 option to create a 64-bit filter solely from a mapfile
diff --git a/usr/src/Targetdirs b/usr/src/Targetdirs
index a015bf9b9b..31f5efc532 100644
--- a/usr/src/Targetdirs
+++ b/usr/src/Targetdirs
@@ -53,6 +53,9 @@ i386_DIRS= \
/boot/grub/bin \
/platform/i86pc \
/lib/libmvec \
+ /usr/lib/brand/lx \
+ /usr/lib/brand/lx/amd64 \
+ /usr/lib/brand/lx/distros \
/usr/lib/xen \
/usr/lib/xen/bin
@@ -280,6 +283,7 @@ DIRS= \
/usr/lib/mdb/kvm \
/usr/lib/mdb/proc \
/usr/lib/nfs \
+ /usr/lib/varpd \
/usr/net \
/usr/net/servers \
/usr/lib/pool \
@@ -466,6 +470,7 @@ DIRS64= \
/usr/lib/security/$(MACH64) \
/usr/lib/smbsrv/$(MACH64) \
/usr/lib/abi/$(MACH64) \
+ /usr/lib/varpd/$(MACH64) \
/usr/sbin/$(MACH64) \
/usr/ucb/$(MACH64) \
/usr/ucblib/$(MACH64) \
@@ -508,6 +513,9 @@ SYM.DIRS= \
/usr/ucblib/32 \
/var/ld/32
+i386_SYM.DIRS64= \
+ /usr/lib/brand/lx/64
+
sparc_SYM.DIRS64=
SYM.DIRS64= \
@@ -523,6 +531,7 @@ SYM.DIRS64= \
/usr/lib/lwp/64 \
/usr/lib/secure/64 \
/usr/lib/security/64 \
+ /usr/lib/varpd/64 \
/usr/xpg4/lib/64 \
/var/ld/64 \
/usr/ucblib/64
@@ -623,12 +632,14 @@ $(BUILD64) $(ROOT)/lib/crypto/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/lib/secure/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/usr/lib/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/usr/lib/elfedit/64:= LINKDEST=$(MACH64)
+$(BUILD64) $(ROOT)/usr/lib/brand/lx/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/usr/lib/brand/sn1/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/usr/lib/brand/solaris10/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/usr/lib/lwp/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/usr/lib/link_audit/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/usr/lib/secure/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/usr/lib/security/64:= LINKDEST=$(MACH64)
+$(BUILD64) $(ROOT)/usr/lib/varpd/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/usr/xpg4/lib/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/var/ld/64:= LINKDEST=$(MACH64)
$(BUILD64) $(ROOT)/usr/ucblib/64:= LINKDEST=$(MACH64)
diff --git a/usr/src/cmd/Makefile b/usr/src/cmd/Makefile
index 7fd184e68a..5bcb40089c 100644
--- a/usr/src/cmd/Makefile
+++ b/usr/src/cmd/Makefile
@@ -21,7 +21,7 @@
# Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright 2015 Nexenta Systems, Inc. All rights reserved.
-# Copyright (c) 2012 Joyent, Inc. All rights reserved.
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
# Copyright (c) 2012 by Delphix. All rights reserved.
# Copyright (c) 2013 DEY Storage Systems, Inc. All rights reserved.
# Copyright 2014 Garrett D'Amore <garrett@damore.org>
@@ -29,8 +29,8 @@
include ../Makefile.master
#
-# Note that the commands 'lp', and 'perl' are first in
-# the list, violating alphabetical order. This is because they are very
+# Note that if the 'lp' command were built, it would be first in
+# the list, violating alphabetical order. This is because it is very
# long-running and should be given the most wall-clock time for a
# parallel build.
#
@@ -51,7 +51,6 @@ FIRST_SUBDIRS= \
COMMON_SUBDIRS= \
allocate \
availdevs \
- lp \
perl \
Adm \
abi \
@@ -94,6 +93,7 @@ COMMON_SUBDIRS= \
cmd-crypto \
cmd-inet \
col \
+ column \
compress \
consadm \
coreadm \
@@ -103,6 +103,10 @@ COMMON_SUBDIRS= \
crypt \
csh \
csplit \
+ ctfconvert \
+ ctfdiff \
+ ctfdump \
+ ctfmerge \
ctrun \
ctstat \
ctwatch \
@@ -186,7 +190,6 @@ COMMON_SUBDIRS= \
groups \
grpck \
gss \
- hal \
halt \
head \
hostid \
@@ -223,7 +226,6 @@ COMMON_SUBDIRS= \
kvmstat \
last \
lastcomm \
- latencytop \
ldap \
ldapcachemgr \
lgrpinfo \
@@ -244,8 +246,8 @@ COMMON_SUBDIRS= \
look \
ls \
luxadm \
- lvm \
mach \
+ machid \
mail \
mailwrapper \
mailx \
@@ -280,6 +282,7 @@ COMMON_SUBDIRS= \
news \
newtask \
nice \
+ nicstat \
nl \
nlsadmin \
nohup \
@@ -312,7 +315,6 @@ COMMON_SUBDIRS= \
plockstat \
pr \
prctl \
- print \
printf \
priocntl \
profiles \
@@ -330,7 +332,6 @@ COMMON_SUBDIRS= \
pwck \
pwconv \
pwd \
- pyzfs \
raidctl \
ramdiskadm \
rcap \
@@ -381,7 +382,6 @@ COMMON_SUBDIRS= \
srchtxt \
srptadm \
srptsvc \
- ssh \
stat \
stmfadm \
stmfproxy \
@@ -434,8 +434,11 @@ COMMON_SUBDIRS= \
utmpd \
uuidgen \
valtools \
+ varpd \
vgrind \
vi \
+ vndadm \
+ vndstat \
volcheck \
volrmmount \
vrrpadm \
@@ -501,7 +504,6 @@ sparc_SUBDIRS= \
# (see previous comment about 'lp'.)
#
MSGSUBDIRS= \
- lp \
abi \
acctadm \
allocate \
@@ -613,7 +615,6 @@ MSGSUBDIRS= \
logins \
ls \
luxadm \
- lvm \
mailx \
make \
man \
@@ -644,7 +645,6 @@ MSGSUBDIRS= \
power \
pr \
praudit \
- print \
profiles \
projadd \
projects \
@@ -655,7 +655,6 @@ MSGSUBDIRS= \
ptools \
pwconv \
pwd \
- pyzfs \
raidctl \
ramdiskadm \
rcap \
@@ -683,7 +682,6 @@ MSGSUBDIRS= \
sort \
split \
srptadm \
- ssh \
stat \
stmfadm \
stmsboot \
diff --git a/usr/src/cmd/Makefile.check b/usr/src/cmd/Makefile.check
index 96fea6b370..d3b6dcc4a4 100644
--- a/usr/src/cmd/Makefile.check
+++ b/usr/src/cmd/Makefile.check
@@ -117,7 +117,6 @@ MANIFEST_SUBDIRS= \
krb5/krb5kdc \
krb5/kwarn \
krb5/slave \
- lp/cmd/lpsched \
lvm/rpc.mdcommd \
lvm/rpc.metad \
lvm/rpc.metamedd \
@@ -126,13 +125,10 @@ MANIFEST_SUBDIRS= \
lvm/util \
picl/picld \
pools/poold \
- print/bsd-sysv-commands \
- print/ppdmgr \
rcap/rcapd \
rpcsvc/rpc.bootparamd \
sendmail/lib \
smbsrv/smbd \
- ssh/etc \
svc/milestone \
tsol/labeld \
tsol/tnctl \
diff --git a/usr/src/cmd/cmd-inet/etc/services b/usr/src/cmd/cmd-inet/etc/services
index 37514ac0a7..25ccc7cc43 100644
--- a/usr/src/cmd/cmd-inet/etc/services
+++ b/usr/src/cmd/cmd-inet/etc/services
@@ -1,6 +1,7 @@
#
# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
+# Copyright 2015 Joyent, Inc.
#
# CDDL HEADER START
#
@@ -215,6 +216,8 @@ krb5_prop 754/tcp # Kerberos V5 KDC propogation
swat 901/tcp # Samba Web Adm.Tool
ufsd 1008/tcp ufsd # UFS-aware server
ufsd 1008/udp ufsd
+portolan 1296/tcp # Portolan
+svp-underlay 1339/tcp # SDC VXLAN underlay invalidation
cvc 1495/tcp # Network Console
ingreslock 1524/tcp
www-ldap-gw 1760/tcp # HTTP to LDAP gateway
diff --git a/usr/src/cmd/cmd-inet/etc/sock2path.d/system%2Fkernel b/usr/src/cmd/cmd-inet/etc/sock2path.d/system%2Fkernel
index c62e339953..49151907eb 100644
--- a/usr/src/cmd/cmd-inet/etc/sock2path.d/system%2Fkernel
+++ b/usr/src/cmd/cmd-inet/etc/sock2path.d/system%2Fkernel
@@ -18,6 +18,7 @@
# CDDL HEADER END
#
# Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2014, Joyent, Inc. All rights reserved.
#
# socket configuration information
#
@@ -52,3 +53,6 @@
29 4 1 /dev/spdsock
31 1 0 trill
+
+ 33 1 0 lx_netlink
+ 33 4 0 lx_netlink
diff --git a/usr/src/cmd/cmd-inet/lib/ipmgmtd/Makefile b/usr/src/cmd/cmd-inet/lib/ipmgmtd/Makefile
index 199f92807a..f150602cc9 100644
--- a/usr/src/cmd/cmd-inet/lib/ipmgmtd/Makefile
+++ b/usr/src/cmd/cmd-inet/lib/ipmgmtd/Makefile
@@ -19,13 +19,15 @@
# CDDL HEADER END
#
# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2015 Joyent, Inc.
#
# Needed for ROOTFS_LIBDIR definition
include ../../../../lib/Makefile.lib
PROG= ipmgmtd
-OBJS= ipmgmt_main.o ipmgmt_door.o ipmgmt_persist.o ipmgmt_util.o
+OBJS= ipmgmt_main.o ipmgmt_door.o ipmgmt_persist.o ipmgmt_util.o \
+ ipmgmt_path.o
SRCS= $(OBJS:.o=.c)
SVCMETHOD= net-ipmgmt
MANIFEST= network-ipmgmt.xml
diff --git a/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_door.c b/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_door.c
index 8e9e153b21..9f546bcad9 100644
--- a/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_door.c
+++ b/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_door.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2014, Joyent, Inc. All rights reserved.
*/
/*
@@ -112,7 +113,9 @@ ipmgmt_handler(void *cookie, char *argp, size_t argsz, door_desc_t *dp,
goto fail;
}
- /* check for solaris.network.interface.config authorization */
+ /*
+ * if not root, check for solaris.network.interface.config authorization
+ */
if (infop->idi_set) {
uid_t uid;
struct passwd pwd;
@@ -124,24 +127,32 @@ ipmgmt_handler(void *cookie, char *argp, size_t argsz, door_desc_t *dp,
goto fail;
}
uid = ucred_getruid(cred);
+ ucred_free(cred);
if ((int)uid < 0) {
err = errno;
ipmgmt_log(LOG_ERR, "Could not get user id.");
goto fail;
}
- if (getpwuid_r(uid, &pwd, buf, sizeof (buf)) ==
- NULL) {
- err = errno;
- ipmgmt_log(LOG_ERR, "Could not get password entry.");
- goto fail;
- }
- if (chkauthattr(NETWORK_INTERFACE_CONFIG_AUTH,
- pwd.pw_name) != 1) {
- err = EPERM;
- ipmgmt_log(LOG_ERR, "Not authorized for operation.");
- goto fail;
+
+ /*
+ * Branded zones may have different auth, but root always
+ * allowed.
+ */
+ if (uid != 0) {
+ if (getpwuid_r(uid, &pwd, buf, sizeof (buf)) == NULL) {
+ err = errno;
+ ipmgmt_log(LOG_ERR,
+ "Could not get password entry.");
+ goto fail;
+ }
+ if (chkauthattr(NETWORK_INTERFACE_CONFIG_AUTH,
+ pwd.pw_name) != 1) {
+ err = EPERM;
+ ipmgmt_log(LOG_ERR,
+ "Not authorized for operation.");
+ goto fail;
+ }
}
- ucred_free(cred);
}
/* individual handlers take care of calling door_return */
diff --git a/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_impl.h b/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_impl.h
index 0d905f2ff0..32d752a6eb 100644
--- a/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_impl.h
+++ b/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_impl.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
#ifndef _IPMGMT_IMPL_H
@@ -134,8 +135,6 @@ extern ipmgmt_aobjmap_list_t aobjmap;
#define ADDROBJ_LOOKUPADD 0x00000004
#define ADDROBJ_SETLIFNUM 0x00000008
-/* Permanent data store for ipadm */
-#define IPADM_DB_FILE "/etc/ipadm/ipadm.conf"
#define IPADM_FILE_MODE (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)
/*
@@ -145,20 +144,12 @@ extern ipmgmt_aobjmap_list_t aobjmap;
*/
#define IPADM_DB_VERSION 1
-/*
- * A temporary file created in SMF volatile filesystem. This file captures the
- * in-memory copy of list `aobjmap' on disk. This is done to recover from
- * daemon reboot (using svcadm) or crashes.
- */
-#define IPADM_TMPFS_DIR "/etc/svc/volatile/ipadm"
-#define ADDROBJ_MAPPING_DB_FILE IPADM_TMPFS_DIR"/aobjmap.conf"
-
-/*
- * A temporary copy of the ipadm configuration file might need
- * to be created if write requests are encountered during boottime
- * and the root filesystem is mounted read-only.
- */
-#define IPADM_VOL_DB_FILE IPADM_TMPFS_DIR"/ipadm.conf"
+typedef enum ipadm_path {
+ IPADM_PATH_TMPFS_DIR = 1,
+ IPADM_PATH_ADDROBJ_MAP_DB,
+ IPADM_PATH_DB,
+ IPADM_PATH_VOL_DB
+} ipadm_path_t;
/* SCF resources required to interact with svc.configd */
typedef struct scf_resources {
@@ -188,6 +179,8 @@ extern void ipmgmt_release_scf_resources(scf_resources_t *);
extern boolean_t ipmgmt_needs_upgrade(scf_resources_t *);
extern void ipmgmt_update_dbver(scf_resources_t *);
+extern void ipmgmt_path(ipadm_path_t, char *, size_t);
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_main.c b/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_main.c
index 5cdc0f5697..994d1b0125 100644
--- a/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_main.c
+++ b/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_main.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
/*
@@ -105,6 +106,7 @@ ipmgmt_db_init()
int fd, err, scferr;
scf_resources_t res;
boolean_t upgrade = B_TRUE;
+ char aobjpath[MAXPATHLEN];
/*
* Check to see if we need to upgrade the data-store. We need to
@@ -134,11 +136,11 @@ ipmgmt_db_init()
ipmgmt_release_scf_resources(&res);
/* creates the address object data store, if it doesn't exist */
- if ((fd = open(ADDROBJ_MAPPING_DB_FILE, O_CREAT|O_RDONLY,
- IPADM_FILE_MODE)) == -1) {
+ ipmgmt_path(IPADM_PATH_ADDROBJ_MAP_DB, aobjpath, sizeof (aobjpath));
+ if ((fd = open(aobjpath, O_CREAT|O_RDONLY, IPADM_FILE_MODE)) == -1) {
err = errno;
- ipmgmt_log(LOG_ERR, "could not open %s: %s",
- ADDROBJ_MAPPING_DB_FILE, strerror(err));
+ ipmgmt_log(LOG_ERR, "could not open %s: %s", aobjpath,
+ strerror(err));
return (err);
}
(void) close(fd);
@@ -152,8 +154,8 @@ ipmgmt_db_init()
* representation of the mapping. That is, build `aobjmap' structure
* from address object data store.
*/
- if ((err = ipadm_rw_db(ipmgmt_aobjmap_init, NULL,
- ADDROBJ_MAPPING_DB_FILE, 0, IPADM_DB_READ)) != 0) {
+ if ((err = ipadm_rw_db(ipmgmt_aobjmap_init, NULL, aobjpath, 0,
+ IPADM_DB_READ)) != 0) {
/* if there was nothing to initialize, it's fine */
if (err != ENOENT)
return (err);
@@ -165,17 +167,42 @@ ipmgmt_db_init()
return (err);
}
+static const char *
+ipmgmt_door_path()
+{
+ static char door[MAXPATHLEN];
+ static boolean_t init_done = B_FALSE;
+
+ if (!init_done) {
+ const char *zroot = zone_get_nroot();
+
+ /*
+ * If this is a branded zone, make sure we use the "/native"
+ * prefix for the door path:
+ */
+ (void) snprintf(door, sizeof (door), "%s%s", zroot != NULL ?
+ zroot : "", IPMGMT_DOOR);
+
+ init_done = B_TRUE;
+ }
+
+ return (door);
+}
+
static int
ipmgmt_door_init()
{
int fd;
int err;
+ const char *door = ipmgmt_door_path();
- /* create the door file for ipmgmtd */
- if ((fd = open(IPMGMT_DOOR, O_CREAT|O_RDONLY, IPADM_FILE_MODE)) == -1) {
+ /*
+ * Create the door file for ipmgmtd.
+ */
+ if ((fd = open(door, O_CREAT | O_RDONLY, IPADM_FILE_MODE)) == -1) {
err = errno;
- ipmgmt_log(LOG_ERR, "could not open %s: %s",
- IPMGMT_DOOR, strerror(err));
+ ipmgmt_log(LOG_ERR, "could not open %s: %s", door,
+ strerror(err));
return (err);
}
(void) close(fd);
@@ -186,15 +213,16 @@ ipmgmt_door_init()
ipmgmt_log(LOG_ERR, "failed to create door: %s", strerror(err));
return (err);
}
+
/*
* fdetach first in case a previous daemon instance exited
* ungracefully.
*/
- (void) fdetach(IPMGMT_DOOR);
- if (fattach(ipmgmt_door_fd, IPMGMT_DOOR) != 0) {
+ (void) fdetach(door);
+ if (fattach(ipmgmt_door_fd, door) != 0) {
err = errno;
- ipmgmt_log(LOG_ERR, "failed to attach door to %s: %s",
- IPMGMT_DOOR, strerror(err));
+ ipmgmt_log(LOG_ERR, "failed to attach door to %s: %s", door,
+ strerror(err));
goto fail;
}
return (0);
@@ -207,13 +235,15 @@ fail:
static void
ipmgmt_door_fini()
{
+ const char *door = ipmgmt_door_path();
+
if (ipmgmt_door_fd == -1)
return;
- (void) fdetach(IPMGMT_DOOR);
+ (void) fdetach(door);
if (door_revoke(ipmgmt_door_fd) == -1) {
ipmgmt_log(LOG_ERR, "failed to revoke access to door %s: %s",
- IPMGMT_DOOR, strerror(errno));
+ door, strerror(errno));
}
}
@@ -350,10 +380,14 @@ ipmgmt_init_privileges()
{
struct stat statbuf;
int err;
+ char tmpfsdir[MAXPATHLEN];
- /* create the IPADM_TMPFS_DIR directory */
- if (stat(IPADM_TMPFS_DIR, &statbuf) < 0) {
- if (mkdir(IPADM_TMPFS_DIR, (mode_t)0755) < 0) {
+ /*
+ * Create the volatile storage directory:
+ */
+ ipmgmt_path(IPADM_PATH_TMPFS_DIR, tmpfsdir, sizeof (tmpfsdir));
+ if (stat(tmpfsdir, &statbuf) < 0) {
+ if (mkdir(tmpfsdir, (mode_t)0755) < 0) {
err = errno;
goto fail;
}
@@ -364,8 +398,8 @@ ipmgmt_init_privileges()
}
}
- if ((chmod(IPADM_TMPFS_DIR, 0755) < 0) ||
- (chown(IPADM_TMPFS_DIR, UID_NETADM, GID_NETADM) < 0)) {
+ if ((chmod(tmpfsdir, 0755) < 0) ||
+ (chown(tmpfsdir, UID_NETADM, GID_NETADM) < 0)) {
err = errno;
goto fail;
}
diff --git a/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_path.c b/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_path.c
new file mode 100644
index 0000000000..0219ac1522
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_path.c
@@ -0,0 +1,84 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * Lookup functions for various file paths used by ipmgmtd. This mechanism
+ * primarily exists to account for a native root prefix when run within a
+ * branded zone (e.g. "/native").
+ */
+
+#include <stdio.h>
+#include <zone.h>
+#include "ipmgmt_impl.h"
+
+#define IPADM_PERM_DIR "/etc/ipadm"
+#define IPADM_TMPFS_DIR "/etc/svc/volatile/ipadm"
+
+typedef struct ipadm_path_ent {
+ ipadm_path_t ipe_id;
+ const char *ipe_path;
+} ipadm_path_ent_t;
+
+static ipadm_path_ent_t ipadm_paths[] = {
+ /*
+ * A temporary directory created in the SMF volatile filesystem.
+ */
+ { IPADM_PATH_TMPFS_DIR, IPADM_TMPFS_DIR },
+
+ /*
+ * This file captures the in-memory copy of list `aobjmap' on disk.
+ * This allows the system to recover in the event that the daemon
+ * crashes or is restarted.
+ */
+ { IPADM_PATH_ADDROBJ_MAP_DB, IPADM_TMPFS_DIR "/aobjmap.conf" },
+
+ /*
+ * The permanent data store for ipadm.
+ */
+ { IPADM_PATH_DB, IPADM_PERM_DIR "/ipadm.conf" },
+
+ /*
+ * A temporary copy of the ipadm configuration created, if needed, to
+ * service write requests early in boot. This file is merged with the
+ * permanent data store once it is available for writes.
+ */
+ { IPADM_PATH_VOL_DB, IPADM_TMPFS_DIR "/ipadm.conf" },
+
+ { 0, NULL }
+};
+
+/*
+ * Load one of the paths used by ipadm into the provided string buffer.
+ * Prepends the native system prefix (e.g. "/native") if one is in effect,
+ * such as when running within a branded zone.
+ */
+void
+ipmgmt_path(ipadm_path_t ip, char *buf, size_t bufsz)
+{
+ int i;
+
+ for (i = 0; ipadm_paths[i].ipe_path != NULL; i++) {
+ if (ipadm_paths[i].ipe_id == ip) {
+ const char *zroot = zone_get_nroot();
+
+ (void) snprintf(buf, bufsz, "%s%s", zroot != NULL ?
+ zroot : "", ipadm_paths[i].ipe_path);
+
+ return;
+ }
+ }
+
+ abort();
+}
diff --git a/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_persist.c b/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_persist.c
index 73a0c621d0..5c5057a172 100644
--- a/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_persist.c
+++ b/usr/src/cmd/cmd-inet/lib/ipmgmtd/ipmgmt_persist.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
/*
@@ -380,13 +381,18 @@ static void *
ipmgmt_db_restore_thread(void *arg)
{
int err;
+ char confpath[MAXPATHLEN];
+ char tmpconfpath[MAXPATHLEN];
+
+ ipmgmt_path(IPADM_PATH_DB, confpath, sizeof (confpath));
+ ipmgmt_path(IPADM_PATH_VOL_DB, tmpconfpath, sizeof (tmpconfpath));
for (;;) {
(void) sleep(5);
(void) pthread_rwlock_wrlock(&ipmgmt_dbconf_lock);
if (!ipmgmt_rdonly_root)
break;
- err = ipmgmt_cpfile(IPADM_VOL_DB_FILE, IPADM_DB_FILE, B_FALSE);
+ err = ipmgmt_cpfile(tmpconfpath, confpath, B_FALSE);
if (err == 0) {
ipmgmt_rdonly_root = B_FALSE;
break;
@@ -418,6 +424,11 @@ ipmgmt_db_walk(db_wfunc_t *db_walk_func, void *db_warg, ipadm_db_op_t db_op)
mode_t mode;
pthread_t tid;
pthread_attr_t attr;
+ char confpath[MAXPATHLEN];
+ char tmpconfpath[MAXPATHLEN];
+
+ ipmgmt_path(IPADM_PATH_DB, confpath, sizeof (confpath));
+ ipmgmt_path(IPADM_PATH_VOL_DB, tmpconfpath, sizeof (tmpconfpath));
writeop = (db_op != IPADM_DB_READ);
if (writeop) {
@@ -430,11 +441,10 @@ ipmgmt_db_walk(db_wfunc_t *db_walk_func, void *db_warg, ipadm_db_op_t db_op)
/*
* Did a previous write attempt fail? If so, don't even try to
- * read/write to IPADM_DB_FILE.
+ * read/write to the permanent configuration file.
*/
if (!ipmgmt_rdonly_root) {
- err = ipadm_rw_db(db_walk_func, db_warg, IPADM_DB_FILE,
- mode, db_op);
+ err = ipadm_rw_db(db_walk_func, db_warg, confpath, mode, db_op);
if (err != EROFS)
goto done;
}
@@ -442,11 +452,11 @@ ipmgmt_db_walk(db_wfunc_t *db_walk_func, void *db_warg, ipadm_db_op_t db_op)
/*
* If we haven't already copied the file to the volatile
* file system, do so. This should only happen on a failed
- * writeop(i.e., we have acquired the write lock above).
+ * writeop (i.e., we have acquired the write lock above).
*/
- if (access(IPADM_VOL_DB_FILE, F_OK) != 0) {
+ if (access(tmpconfpath, F_OK) != 0) {
assert(writeop);
- err = ipmgmt_cpfile(IPADM_DB_FILE, IPADM_VOL_DB_FILE, B_TRUE);
+ err = ipmgmt_cpfile(confpath, tmpconfpath, B_TRUE);
if (err != 0)
goto done;
(void) pthread_attr_init(&attr);
@@ -456,7 +466,7 @@ ipmgmt_db_walk(db_wfunc_t *db_walk_func, void *db_warg, ipadm_db_op_t db_op)
NULL);
(void) pthread_attr_destroy(&attr);
if (err != 0) {
- (void) unlink(IPADM_VOL_DB_FILE);
+ (void) unlink(tmpconfpath);
goto done;
}
ipmgmt_rdonly_root = B_TRUE;
@@ -465,7 +475,7 @@ ipmgmt_db_walk(db_wfunc_t *db_walk_func, void *db_warg, ipadm_db_op_t db_op)
/*
* Read/write from the volatile copy.
*/
- err = ipadm_rw_db(db_walk_func, db_warg, IPADM_VOL_DB_FILE,
+ err = ipadm_rw_db(db_walk_func, db_warg, tmpconfpath,
mode, db_op);
done:
(void) pthread_rwlock_unlock(&ipmgmt_dbconf_lock);
@@ -1233,6 +1243,9 @@ ipmgmt_persist_aobjmap(ipmgmt_aobjmap_t *nodep, ipadm_db_op_t op)
int err;
ipadm_dbwrite_cbarg_t cb;
nvlist_t *nvl = NULL;
+ char aobjpath[MAXPATHLEN];
+
+ ipmgmt_path(IPADM_PATH_ADDROBJ_MAP_DB, aobjpath, sizeof (aobjpath));
if (op == IPADM_DB_WRITE) {
if ((err = i_ipmgmt_node2nvl(&nvl, nodep)) != 0)
@@ -1243,14 +1256,14 @@ ipmgmt_persist_aobjmap(ipmgmt_aobjmap_t *nodep, ipadm_db_op_t op)
else
cb.dbw_flags = 0;
- err = ipadm_rw_db(ipmgmt_update_aobjmap, &cb,
- ADDROBJ_MAPPING_DB_FILE, IPADM_FILE_MODE, IPADM_DB_WRITE);
+ err = ipadm_rw_db(ipmgmt_update_aobjmap, &cb, aobjpath,
+ IPADM_FILE_MODE, IPADM_DB_WRITE);
nvlist_free(nvl);
} else {
assert(op == IPADM_DB_DELETE);
- err = ipadm_rw_db(ipmgmt_delete_aobjmap, nodep,
- ADDROBJ_MAPPING_DB_FILE, IPADM_FILE_MODE, IPADM_DB_DELETE);
+ err = ipadm_rw_db(ipmgmt_delete_aobjmap, nodep, aobjpath,
+ IPADM_FILE_MODE, IPADM_DB_DELETE);
}
return (err);
}
diff --git a/usr/src/cmd/cmd-inet/lib/ipmgmtd/net-ipmgmt b/usr/src/cmd/cmd-inet/lib/ipmgmtd/net-ipmgmt
index 77b6be9f54..d5812793d4 100644
--- a/usr/src/cmd/cmd-inet/lib/ipmgmtd/net-ipmgmt
+++ b/usr/src/cmd/cmd-inet/lib/ipmgmtd/net-ipmgmt
@@ -21,6 +21,7 @@
#
#
# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2012, Joyent, Inc. All rights reserved.
#
# This daemon stores address object to logical interface number mappings
# (among other things) and reads/writes from/to ipmgmtd data store.
@@ -38,14 +39,16 @@ fi
# When the non-global shared-IP stack zone boots, it tries to bring up this
# service as well. If we don't start a background process and simply exit the
# service, the service will go into maintenance mode and so will all it's
-# dependents.
+# dependents. Ideally we would simply exit with SMF_EXIT_NODAEMON, but since
+# this method is also used in an S10C zone, where support for SMF_EXIT_NODAEMON
+# does not exist, we have to stick around.
#
# In S10C zone (where this script is also used) smf_isnonglobalzone
# function is unavailable in smf_include.sh
#
if [ `/sbin/zonename` != global ]; then
if [ `/sbin/zonename -t` = shared ]; then
- (while true ; do sleep 3600 ; done) &
+ (while true ; do sleep 3600 ; done) &
exit $SMF_EXIT_OK
fi
fi
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/defaults.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/defaults.c
index 0381f0c080..ac94cec99b 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/defaults.c
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/defaults.c
@@ -31,6 +31,7 @@
#include <stdio.h>
#include <sys/stat.h>
#include <libnvpair.h>
+#include <zone.h>
#include "common.h"
#include "defaults.h"
@@ -62,6 +63,32 @@ static struct dhcp_default defaults[] = {
{ "PARAM_IGNORE_LIST", NULL, 0, 0 }
};
+
+/*
+ * df_find_defaults(): builds the path to the default configuration file
+ *
+ * input: void
+ * output: void
+ */
+
+static const char *
+df_find_defaults(void)
+{
+ static char agent_defaults_path[MAXPATHLEN] = { 0 };
+ const char *zroot = NULL;
+
+ if (agent_defaults_path[0] != '\0') {
+ return agent_defaults_path;
+ }
+
+ zroot = zone_get_nroot();
+
+ (void) snprintf(agent_defaults_path, MAXPATHLEN, "%s%s",
+ zroot != NULL ? zroot : "", DHCP_AGENT_DEFAULTS);
+
+ return agent_defaults_path;
+}
+
/*
* df_build_cache(): builds the defaults nvlist cache
*
@@ -72,6 +99,7 @@ static struct dhcp_default defaults[] = {
static nvlist_t *
df_build_cache(void)
{
+ const char *agent_defaults_path = df_find_defaults();
char entry[1024];
int i;
char *param, *pastv6, *value, *end;
@@ -79,7 +107,7 @@ df_build_cache(void)
nvlist_t *nvlist;
struct dhcp_default *defp;
- if ((fp = fopen(DHCP_AGENT_DEFAULTS, "r")) == NULL)
+ if ((fp = fopen(agent_defaults_path, "r")) == NULL)
return (NULL);
if (nvlist_alloc(&nvlist, NV_UNIQUE_NAME, 0) != 0) {
@@ -159,6 +187,7 @@ df_build_cache(void)
const char *
df_get_string(const char *if_name, boolean_t isv6, uint_t param)
{
+ const char *agent_defaults_path = df_find_defaults();
char *value;
char paramstr[256];
char name[256];
@@ -170,10 +199,11 @@ df_get_string(const char *if_name, boolean_t isv6, uint_t param)
if (param >= (sizeof (defaults) / sizeof (*defaults)))
return (NULL);
- if (stat(DHCP_AGENT_DEFAULTS, &statbuf) != 0) {
+
+ if (stat(agent_defaults_path, &statbuf) != 0) {
if (!df_unavail_msg) {
dhcpmsg(MSG_WARNING, "cannot access %s; using "
- "built-in defaults", DHCP_AGENT_DEFAULTS);
+ "built-in defaults", agent_defaults_path);
df_unavail_msg = B_TRUE;
}
return (defaults[param].df_default);
diff --git a/usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c b/usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c
index 78da07aebf..9a710f9125 100644
--- a/usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c
+++ b/usr/src/cmd/cmd-inet/sbin/dhcpagent/request.c
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011 Joyent, Inc. All rights reserved.
*
* REQUESTING state of the client state machine.
*/
@@ -38,6 +39,7 @@
#include <dhcp_hostconf.h>
#include <dhcpagent_util.h>
#include <dhcpmsg.h>
+#include <strings.h>
#include "states.h"
#include "util.h"
@@ -641,8 +643,24 @@ accept_v4_acknak(dhcp_smach_t *dsmp, PKT_LIST *plp)
stop_pkt_retransmission(dsmp);
if (*plp->opts[CD_DHCP_TYPE]->value == NAK) {
- dhcpmsg(MSG_WARNING, "accept_v4_acknak: NAK on interface %s",
- dsmp->dsm_name);
+ char saddr[18];
+
+ saddr[0] = '\0';
+ if (plp->opts[CD_SERVER_ID] != NULL &&
+ plp->opts[CD_SERVER_ID]->len == sizeof (struct in_addr)) {
+ struct in_addr t_server;
+
+ bcopy(plp->opts[CD_SERVER_ID]->value, &t_server,
+ plp->opts[CD_SERVER_ID]->len);
+ (void) strlcpy(saddr, inet_ntoa(t_server),
+ sizeof (saddr));
+ }
+
+ dhcpmsg(MSG_WARNING, "accept_v4_acknak: NAK on interface %s "
+ "from %s %s",
+ dsmp->dsm_name,
+ inet_ntoa(plp->pktfrom.v4.sin_addr), saddr);
+
dsmp->dsm_bad_offers++;
free_pkt_entry(plp);
dhcp_restart(dsmp);
diff --git a/usr/src/cmd/cmd-inet/usr.lib/wanboot/p12split/Makefile b/usr/src/cmd/cmd-inet/usr.lib/wanboot/p12split/Makefile
index f855d268c1..1b71e71af9 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/wanboot/p12split/Makefile
+++ b/usr/src/cmd/cmd-inet/usr.lib/wanboot/p12split/Makefile
@@ -25,7 +25,7 @@
include ../Makefile.com
PROG= p12split
-LDLIBS += -lwanboot -linetutil -lwanbootutil -lcrypto
+LDLIBS += -lwanboot -linetutil -lwanbootutil -lsunw_crypto
CPPFLAGS += -I$(CMNCRYPTDIR)
LINTFLAGS += -erroff=E_SUPPRESSION_DIRECTIVE_UNUSED
diff --git a/usr/src/cmd/cmd-inet/usr.lib/wanboot/wanboot-cgi/Makefile b/usr/src/cmd/cmd-inet/usr.lib/wanboot/wanboot-cgi/Makefile
index 16e3eb8261..9f671d4df7 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/wanboot/wanboot-cgi/Makefile
+++ b/usr/src/cmd/cmd-inet/usr.lib/wanboot/wanboot-cgi/Makefile
@@ -25,7 +25,7 @@
include ../Makefile.com
PROG = wanboot-cgi
-LDLIBS += -lgen -lnsl -lwanbootutil -lnvpair -lwanboot -lcrypto
+LDLIBS += -lgen -lnsl -lwanbootutil -lnvpair -lwanboot -lsunw_crypto
CPPFLAGS += -I$(CMNCRYPTDIR)
LINTFLAGS += -erroff=E_SUPPRESSION_DIRECTIVE_UNUSED
diff --git a/usr/src/cmd/cmd-inet/usr.lib/wpad/Makefile b/usr/src/cmd/cmd-inet/usr.lib/wpad/Makefile
index 1b82adef80..a743bed065 100644
--- a/usr/src/cmd/cmd-inet/usr.lib/wpad/Makefile
+++ b/usr/src/cmd/cmd-inet/usr.lib/wpad/Makefile
@@ -33,7 +33,7 @@ include ../../../Makefile.cmd
ROOTMANIFESTDIR = $(ROOTSVCNETWORK)
LDLIBS += -ldladm -ldlpi
-all install := LDLIBS += -lcrypto
+all install := LDLIBS += -lsunw_crypto
LINTFLAGS += -u
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/arp.c b/usr/src/cmd/cmd-inet/usr.sbin/arp.c
index 19ab13fd47..aa9adac6ce 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/arp.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/arp.c
@@ -58,6 +58,7 @@
#include <arpa/inet.h>
#include <net/if_types.h>
#include <net/if_dl.h>
+#include <zone.h>
static int file(char *);
static int set(int, char *[]);
@@ -118,7 +119,11 @@ main(int argc, char *argv[])
* is to let netstat, which prints it as part of
* the MIB statistics, do it.
*/
- (void) execl("/usr/bin/netstat", "netstat",
+ char netstat_path[MAXPATHLEN];
+ const char *zroot = zone_get_nroot();
+ (void) snprintf(netstat_path, sizeof (netstat_path), "%s%s", zroot != NULL ?
+ zroot : "", "/usr/bin/netstat");
+ (void) execl(netstat_path, "netstat",
(n_flag ? "-np" : "-p"),
"-f", "inet", (char *)0);
(void) fprintf(stderr, "failed to exec netstat: %s\n",
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ndp.c b/usr/src/cmd/cmd-inet/usr.sbin/ndp.c
index 23b940c686..2fc19775ad 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/ndp.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ndp.c
@@ -40,6 +40,7 @@
#include <inet/ip.h>
#include <net/if_dl.h>
#include <net/route.h>
+#include <zone.h>
typedef struct sockaddr_in6 sin6_t;
@@ -95,7 +96,6 @@ static int ndp_set_nce(char *, char *, char *[], int);
static int ndp_set_file(char *);
static char *ndp_iface = NULL;
-static char *netstat_path = "/usr/bin/netstat";
static pid_t ndp_pid;
static boolean_t ndp_noresolve = B_FALSE; /* Don't lookup addresses */
static boolean_t ndp_run = B_TRUE;
@@ -103,6 +103,7 @@ static boolean_t ndp_run = B_TRUE;
#define MAX_ATTEMPTS 5
#define MAX_OPTS 5
#define WORDSEPS " \t\r\n"
+#define NETSTAT_PATH "/usr/bin/netstat"
/*
* Macros borrowed from route(1M) for working with PF_ROUTE messages
@@ -767,6 +768,12 @@ ndp_get(int fd, struct lifreq *lifrp, void *unused)
static void
ndp_get_all(void)
{
+ char netstat_path[MAXPATHLEN];
+ const char *zroot = zone_get_nroot();
+
+ (void) snprintf(netstat_path, sizeof (netstat_path), "%s%s", zroot != NULL ?
+ zroot : "", NETSTAT_PATH);
+
(void) execl(netstat_path, "netstat",
(ndp_noresolve ? "-np" : "-p"),
"-f", "inet6", (char *)0);
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/ping/Makefile b/usr/src/cmd/cmd-inet/usr.sbin/ping/Makefile
index 9724fff4e5..504d64b4ef 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/ping/Makefile
+++ b/usr/src/cmd/cmd-inet/usr.sbin/ping/Makefile
@@ -42,6 +42,7 @@ LDLIBS += -lxnet -lsocket -lnsl -lm -linetutil
CPPFLAGS += -D_XOPEN_SOURCE=600 -D__EXTENSIONS__
C99MODE= -xc99=%all
+C99LMODE= -Xc99=%all
# Setting the above defines to use the UNIX98 ancillary data feature
# causes lint to output warnings about lint library declarations conflicting
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/routeadm/routeadm.c b/usr/src/cmd/cmd-inet/usr.sbin/routeadm/routeadm.c
index 71a2fc9853..be826baba2 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/routeadm/routeadm.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/routeadm/routeadm.c
@@ -21,10 +21,9 @@
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -46,6 +45,7 @@
#include <libscf.h>
#include <libscf_priv.h>
#include <libuutil.h>
+#include <ifaddrs.h>
/*
* This program moves routing management under SMF. We do this by giving
@@ -2335,8 +2335,8 @@ out:
/*
*
- * Return the number of IPv6 addresses configured. This answers the
- * generic question, "is IPv6 configured?". We only start in.ndpd if IPv6
+ * Return the number of non-loopback IPv6 addresses configured. This answers
+ * the generic question, "is IPv6 configured?". We only start in.ndpd if IPv6
* is configured, and we also only enable IPv6 routing daemons if IPv6 is
* enabled.
*/
@@ -2344,28 +2344,24 @@ static int
ra_numv6intfs(void)
{
static int num = -1;
- int ipsock;
- struct lifnum lifn;
+ int cnt;
+ struct ifaddrs *ifp_head, *ifp;
if (num != -1)
return (num);
- if ((ipsock = socket(PF_INET6, SOCK_DGRAM, 0)) == -1) {
- (void) fprintf(stderr,
- gettext("%1$s: unable to open %2$s: %3$s\n"),
- myname, IP_DEV_NAME, strerror(errno));
+ if (getifaddrs(&ifp_head) < 0)
return (0);
- }
- lifn.lifn_family = AF_INET6;
- lifn.lifn_flags = 0;
- if (ioctl(ipsock, SIOCGLIFNUM, &lifn) == -1) {
- (void) close(ipsock);
- return (0);
+ cnt = 0;
+ for (ifp = ifp_head; ifp; ifp = ifp->ifa_next) {
+ if (!(ifp->ifa_flags & IFF_LOOPBACK) &&
+ (ifp->ifa_flags & IFF_IPV6))
+ cnt++;
}
- (void) close(ipsock);
- return (num = lifn.lifn_count);
+ freeifaddrs(ifp_head);
+ return (num = cnt);
}
/*
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/Makefile b/usr/src/cmd/cmd-inet/usr.sbin/snoop/Makefile
index 1d408bccba..5d9ef9e64d 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/Makefile
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/Makefile
@@ -22,6 +22,7 @@
#
# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
+# Copyright 2015 Joyent, Inc.
#
PROG= snoop
@@ -39,12 +40,14 @@ OBJS= nfs4_xdr.o snoop.o snoop_aarp.o snoop_adsp.o snoop_aecho.o \
snoop_pppoe.o snoop_rip.o snoop_rip6.o snoop_rpc.o snoop_rpcprint.o \
snoop_rpcsec.o snoop_rport.o snoop_rquota.o snoop_rstat.o snoop_rtmp.o \
snoop_sctp.o snoop_slp.o snoop_smb.o snoop_socks.o snoop_solarnet.o \
- snoop_tcp.o snoop_tftp.o snoop_trill.o snoop_udp.o snoop_zip.o
+ snoop_tcp.o snoop_tftp.o snoop_trill.o snoop_udp.o snoop_vxlan.o \
+ snoop_zip.o
SRCS= $(OBJS:.o=.c)
HDRS= snoop.h snoop_mip.h at.h snoop_ospf.h snoop_ospf6.h
include ../../../Makefile.cmd
+include ../../../Makefile.ctf
CPPFLAGS += -I. -I$(SRC)/common/net/dhcp \
-D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.c b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.c
index 097dd6ee90..f2192df089 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.c
@@ -121,6 +121,7 @@ main(int argc, char **argv)
char *output_area;
int nbytes;
char *datalink = NULL;
+ char *zonename = NULL;
dlpi_handle_t dh;
names[0] = '\0';
@@ -227,7 +228,7 @@ main(int argc, char **argv)
}
(void) setvbuf(stdout, NULL, _IOLBF, BUFSIZ);
- while ((c = getopt(argc, argv, "at:CPDSi:o:Nn:s:d:I:vVp:f:c:x:U?rqz"))
+ while ((c = getopt(argc, argv, "at:CPDSi:o:Nn:s:d:I:vVp:f:c:x:U?rqz:Z"))
!= EOF) {
switch (c) {
case 'a':
@@ -348,8 +349,11 @@ main(int argc, char **argv)
case 'U':
Uflg = B_TRUE;
break;
-#ifdef DEBUG
case 'z':
+ zonename = optarg;
+ break;
+#ifdef DEBUG
+ case 'Z':
zflg = B_TRUE;
break;
#endif /* DEBUG */
@@ -371,7 +375,7 @@ main(int argc, char **argv)
* requested was chosen, but that's too hard.
*/
if (!icapfile) {
- use_kern_pf = open_datalink(&dh, datalink);
+ use_kern_pf = open_datalink(&dh, datalink, zonename);
} else {
use_kern_pf = B_FALSE;
cap_open_read(icapfile);
@@ -816,6 +820,8 @@ usage(void)
(void) fprintf(stderr,
"\t[ -r ] # Do not resolve address to name\n");
(void) fprintf(stderr,
+ "\t[ -z zone ] # Open links from named zone\n");
+ (void) fprintf(stderr,
"\n\t[ filter expression ]\n");
(void) fprintf(stderr, "\nExample:\n");
(void) fprintf(stderr, "\tsnoop -o saved host fred\n\n");
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.h b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.h
index e4f182572b..42095bcd34 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.h
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop.h
@@ -24,6 +24,7 @@
* Use is subject to license terms.
*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
#ifndef _SNOOP_H
@@ -182,7 +183,7 @@ extern void cap_open_read(const char *);
extern void cap_open_write(const char *);
extern void cap_read(int, int, int, void (*)(), int);
extern void cap_close(void);
-extern boolean_t open_datalink(dlpi_handle_t *, const char *);
+extern boolean_t open_datalink(dlpi_handle_t *, const char *, const char *);
extern void init_datalink(dlpi_handle_t, ulong_t, ulong_t, struct timeval *,
struct Pf_ext_packetfilt *);
extern void net_read(dlpi_handle_t, size_t, int, void (*)(), int);
@@ -260,6 +261,7 @@ extern int interpret_socks_reply(int, char *, int);
extern int interpret_trill(int, struct ether_header **, char *, int *);
extern int interpret_isis(int, char *, int, boolean_t);
extern int interpret_bpdu(int, char *, int);
+extern int interpret_vxlan(int, char *, int);
extern void init_ldap(void);
extern boolean_t arp_for_ether(char *, struct ether_addr *);
extern char *ether_ouiname(uint32_t);
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c
index ab6bc292ac..8fbf3fc15f 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_capture.c
@@ -29,6 +29,7 @@
#include <strings.h>
#include <errno.h>
#include <fcntl.h>
+#include <limits.h>
#include <setjmp.h>
#include <sys/types.h>
#include <sys/signal.h>
@@ -114,7 +115,7 @@ select_datalink(const char *linkname, void *arg)
* about the datalink useful for building the proper packet filters.
*/
boolean_t
-open_datalink(dlpi_handle_t *dhp, const char *linkname)
+open_datalink(dlpi_handle_t *dhp, const char *linkname, const char *zonename)
{
int retval;
int flags = DLPI_PASSIVE | DLPI_RAW;
@@ -122,6 +123,9 @@ open_datalink(dlpi_handle_t *dhp, const char *linkname)
dlpi_info_t dlinfo;
if (linkname == NULL) {
+ if (zonename != NULL)
+ pr_err("a datalink must be specified with a zone name");
+
/*
* Select a datalink to use by default. Prefer datalinks that
* are plumbed by IP.
@@ -145,7 +149,8 @@ open_datalink(dlpi_handle_t *dhp, const char *linkname)
flags |= DLPI_DEVIPNET;
if (Iflg || strcmp(linkname, "lo0") == 0)
flags |= DLPI_IPNETINFO;
- if ((retval = dlpi_open(linkname, dhp, flags)) != DLPI_SUCCESS) {
+ if ((retval = dlpi_open_zone(linkname, zonename, dhp,
+ flags)) != DLPI_SUCCESS) {
pr_err("cannot open \"%s\": %s", linkname,
dlpi_strerror(retval));
}
@@ -614,6 +619,10 @@ cap_open_read(const char *name)
if (fstat(capfile_in, &st) < 0)
pr_err("couldn't stat %s: %m", name);
+ if (st.st_size > INT_MAX)
+ pr_err("input file size (%llu bytes) exceeds maximum "
+ "supported size (%d bytes)",
+ (unsigned long long)st.st_size, INT_MAX);
cap_len = st.st_size;
cap_buffp = mmap(0, cap_len, PROT_READ, MAP_PRIVATE, capfile_in, 0);
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ether.c b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ether.c
index 5c6bde0cd6..029ed66116 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ether.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_ether.c
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
*/
#include <stdio.h>
@@ -54,8 +55,9 @@
static headerlen_fn_t ether_header_len, fddi_header_len, tr_header_len,
ib_header_len, ipnet_header_len, ipv4_header_len, ipv6_header_len;
-static interpreter_fn_t interpret_ether, interpret_fddi, interpret_tr,
+static interpreter_fn_t interpret_fddi, interpret_tr,
interpret_ib, interpret_ipnet, interpret_iptun;
+interpreter_fn_t interpret_ether;
static void addr_copy_swap(struct ether_addr *, struct ether_addr *);
static int tr_machdr_len(char *, int *, int *);
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_rport.c b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_rport.c
index b84ee3c1b4..c2fcfd7c72 100644
--- a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_rport.c
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_rport.c
@@ -21,6 +21,7 @@
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015, Joyent, Inc.
*/
#include <stdio.h>
@@ -76,6 +77,7 @@ static const struct porttable pt_udp[] = {
{ 560, "RMONITOR" },
{ 561, "MONITOR" },
{ IPPORT_SOCKS, "SOCKS" },
+ { IPPORT_VXLAN, "VXLAN" },
{ 0, NULL }
};
@@ -428,6 +430,9 @@ interpret_reserved(int flags, int proto, in_port_t src, in_port_t dst,
(void) interpret_socks_reply(flags, data,
dlen);
return (1);
+ case IPPORT_VXLAN:
+ (void) interpret_vxlan(flags, data, dlen);
+ return (1);
}
}
diff --git a/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_vxlan.c b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_vxlan.c
new file mode 100644
index 0000000000..36025ca8f3
--- /dev/null
+++ b/usr/src/cmd/cmd-inet/usr.sbin/snoop/snoop_vxlan.c
@@ -0,0 +1,68 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Decode VXLAN encapsulated packets.
+ */
+
+#include <sys/vxlan.h>
+#include "snoop.h"
+
+int
+interpret_vxlan(int flags, char *data, int fraglen)
+{
+ vxlan_hdr_t *vxlan = (vxlan_hdr_t *)data;
+ uint32_t id, vxf;
+
+ if (fraglen < sizeof (vxlan_hdr_t)) {
+ if (flags & F_SUM)
+ (void) snprintf(get_sum_line(), MAXLINE,
+ "VXLAN RUNT");
+ if (flags & F_DTAIL)
+ show_header("VXLAN RUNT: ", "Short packet", fraglen);
+
+ return (fraglen);
+ }
+
+ id = ntohl(vxlan->vxlan_id) >> VXLAN_ID_SHIFT;
+ vxf = ntohl(vxlan->vxlan_flags);
+
+ if (flags & F_SUM) {
+ (void) snprintf(get_sum_line(), MAXLINE,
+ "VXLAN VNI=%d", id);
+ }
+
+ if (flags & F_DTAIL) {
+ show_header("VXLAN: ", "VXLAN Header", sizeof (vxlan_hdr_t));
+ show_space();
+ (void) snprintf(get_line(0, 0), get_line_remain(),
+ "Flags = 0x%08x", vxf);
+ (void) snprintf(get_line(0, 0), get_line_remain(), " %s",
+ getflag(vxf >> 24, VXLAN_F_VDI >> 24, "vni present",
+ "vni missing"));
+ (void) snprintf(get_line(0, 0), get_line_remain(),
+ "VXLAN network id (VNI) = %d", id);
+ show_space();
+ }
+
+ if (flags & (F_DTAIL | F_ALLSUM)) {
+ fraglen -= sizeof (vxlan_hdr_t);
+ data += sizeof (vxlan_hdr_t);
+
+ return (interpret_ether(flags, data, fraglen, fraglen));
+ }
+
+ return (0);
+}
diff --git a/usr/src/cmd/column/Makefile b/usr/src/cmd/column/Makefile
new file mode 100644
index 0000000000..ab4cf3390e
--- /dev/null
+++ b/usr/src/cmd/column/Makefile
@@ -0,0 +1,43 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2013 Joyent, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+PROG=column
+
+include ../Makefile.cmd
+
+CFLAGS += $(CCVERBOSE)
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+install: all $(ROOTPROG)
+
+clean:
+
+lint: lint_PROG
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/column/THIRDPARTYLICENSE b/usr/src/cmd/column/THIRDPARTYLICENSE
new file mode 100644
index 0000000000..a80f56cb43
--- /dev/null
+++ b/usr/src/cmd/column/THIRDPARTYLICENSE
@@ -0,0 +1,26 @@
+Copyright (c) 1989, 1993, 1994
+ The Regents of the University of California. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+4. Neither the name of the University nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+ *
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
diff --git a/usr/src/cmd/column/THIRDPARTYLICENSE.descrip b/usr/src/cmd/column/THIRDPARTYLICENSE.descrip
new file mode 100644
index 0000000000..42051a2982
--- /dev/null
+++ b/usr/src/cmd/column/THIRDPARTYLICENSE.descrip
@@ -0,0 +1 @@
+PORTIONS OF COLUMN COMMAND FUNCTIONALITY
diff --git a/usr/src/cmd/column/column.c b/usr/src/cmd/column/column.c
new file mode 100644
index 0000000000..4f9a3c81a6
--- /dev/null
+++ b/usr/src/cmd/column/column.c
@@ -0,0 +1,335 @@
+/*
+ * Copyright (c) 1989, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Portions Copyright (c) 2013 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef lint
+static const char copyright[] =
+"@(#) Copyright (c) 1989, 1993, 1994\n\
+ The Regents of the University of California. All rights reserved.\n";
+#endif
+
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/param.h>
+#include <sys/termios.h>
+
+#include <err.h>
+#include <limits.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#define TAB 8
+#define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) /* to any y */
+
+static void c_columnate(void);
+static void input(FILE *);
+static void maketbl(void);
+static void print(void);
+static void r_columnate(void);
+static void usage(void);
+static int width(const wchar_t *);
+
+static int termwidth = 80; /* default terminal width */
+
+static int entries; /* number of records */
+static int eval; /* exit value */
+static int maxlength; /* longest record */
+static wchar_t **list; /* array of pointers to records */
+static const wchar_t *separator = L"\t "; /* field separator for table option */
+
+int
+main(int argc, char **argv)
+{
+ struct winsize win;
+ FILE *fp;
+ int ch, tflag, xflag;
+ char *p;
+ const char *src;
+ wchar_t *newsep;
+ size_t seplen;
+
+ (void) setlocale(LC_ALL, "");
+
+ if (ioctl(1, TIOCGWINSZ, &win) == -1 || !win.ws_col) {
+ if ((p = getenv("COLUMNS")))
+ termwidth = atoi(p);
+ } else
+ termwidth = win.ws_col;
+
+ tflag = xflag = 0;
+ while ((ch = getopt(argc, argv, "c:s:tx")) != -1)
+ switch (ch) {
+ case 'c':
+ termwidth = atoi(optarg);
+ break;
+ case 's':
+ src = optarg;
+ seplen = mbsrtowcs(NULL, &src, 0, NULL);
+ if (seplen == (size_t)-1)
+ err(1, "bad separator");
+ newsep = malloc((seplen + 1) * sizeof (wchar_t));
+ if (newsep == NULL)
+ err(1, NULL);
+ (void) mbsrtowcs(newsep, &src, seplen + 1, NULL);
+ separator = newsep;
+ break;
+ case 't':
+ tflag = 1;
+ break;
+ case 'x':
+ xflag = 1;
+ break;
+ case '?':
+ default:
+ usage();
+ }
+ argc -= optind;
+ argv += optind;
+
+ if (!*argv)
+ input(stdin);
+ else for (; *argv; ++argv)
+ if ((fp = fopen(*argv, "rF"))) {
+ input(fp);
+ (void) fclose(fp);
+ } else {
+ warn("%s", *argv);
+ eval = 1;
+ }
+
+ if (!entries)
+ exit(eval);
+
+ maxlength = roundup(maxlength + 1, TAB);
+ if (tflag)
+ maketbl();
+ else if (maxlength >= termwidth)
+ print();
+ else if (xflag)
+ c_columnate();
+ else
+ r_columnate();
+ exit(eval);
+
+ /*NOTREACHED*/
+ return (eval);
+}
+
+static void
+c_columnate(void)
+{
+ int chcnt, col, cnt, endcol, numcols;
+ wchar_t **lp;
+
+ numcols = termwidth / maxlength;
+ endcol = maxlength;
+ for (chcnt = col = 0, lp = list; ; ++lp) {
+ (void) wprintf(L"%ls", *lp);
+ chcnt += width(*lp);
+ if (!--entries)
+ break;
+ if (++col == numcols) {
+ chcnt = col = 0;
+ endcol = maxlength;
+ (void) putwchar('\n');
+ } else {
+ while ((cnt = roundup(chcnt + 1, TAB)) <= endcol) {
+ (void) putwchar('\t');
+ chcnt = cnt;
+ }
+ endcol += maxlength;
+ }
+ }
+ if (chcnt)
+ (void) putwchar('\n');
+}
+
+static void
+r_columnate(void)
+{
+ int base, chcnt, cnt, col, endcol, numcols, numrows, row;
+
+ numcols = termwidth / maxlength;
+ numrows = entries / numcols;
+ if (entries % numcols)
+ ++numrows;
+
+ for (row = 0; row < numrows; ++row) {
+ endcol = maxlength;
+ for (base = row, chcnt = col = 0; col < numcols; ++col) {
+ (void) wprintf(L"%ls", list[base]);
+ chcnt += width(list[base]);
+ if ((base += numrows) >= entries)
+ break;
+ while ((cnt = roundup(chcnt + 1, TAB)) <= endcol) {
+ (void) putwchar('\t');
+ chcnt = cnt;
+ }
+ endcol += maxlength;
+ }
+ (void) putwchar('\n');
+ }
+}
+
+static void
+print(void)
+{
+ int cnt;
+ wchar_t **lp;
+
+ for (cnt = entries, lp = list; cnt--; ++lp)
+ (void) wprintf(L"%ls\n", *lp);
+}
+
+typedef struct _tbl {
+ wchar_t **list;
+ int cols, *len;
+} TBL;
+#define DEFCOLS 25
+
+static void
+maketbl(void)
+{
+ TBL *t;
+ int coloff, cnt;
+ wchar_t *p, **lp;
+ int *lens, maxcols;
+ TBL *tbl;
+ wchar_t **cols;
+ wchar_t *last;
+
+ if ((t = tbl = calloc(entries, sizeof (TBL))) == NULL)
+ err(1, (char *)NULL);
+ if ((cols = calloc((maxcols = DEFCOLS), sizeof (*cols))) == NULL)
+ err(1, (char *)NULL);
+ if ((lens = calloc(maxcols, sizeof (int))) == NULL)
+ err(1, (char *)NULL);
+ for (cnt = 0, lp = list; cnt < entries; ++cnt, ++lp, ++t) {
+ for (coloff = 0, p = *lp;
+ (cols[coloff] = wcstok(p, separator, &last));
+ p = NULL)
+ if (++coloff == maxcols) {
+ if (!(cols = realloc(cols, ((uint_t)maxcols +
+ DEFCOLS) * sizeof (char *))) ||
+ !(lens = realloc(lens,
+ ((uint_t)maxcols + DEFCOLS) *
+ sizeof (int))))
+ err(1, NULL);
+ (void) memset((char *)lens + maxcols *
+ sizeof (int), 0, DEFCOLS * sizeof (int));
+ maxcols += DEFCOLS;
+ }
+ if ((t->list = calloc(coloff, sizeof (*t->list))) == NULL)
+ err(1, (char *)NULL);
+ if ((t->len = calloc(coloff, sizeof (int))) == NULL)
+ err(1, (char *)NULL);
+ for (t->cols = coloff; --coloff >= 0; ) {
+ t->list[coloff] = cols[coloff];
+ t->len[coloff] = width(cols[coloff]);
+ if (t->len[coloff] > lens[coloff])
+ lens[coloff] = t->len[coloff];
+ }
+ }
+ for (cnt = 0, t = tbl; cnt < entries; ++cnt, ++t) {
+ for (coloff = 0; coloff < t->cols - 1; ++coloff)
+ (void) wprintf(L"%ls%*ls", t->list[coloff],
+ lens[coloff] - t->len[coloff] + 2, L" ");
+ (void) wprintf(L"%ls\n", t->list[coloff]);
+ }
+}
+
+#define DEFNUM 1000
+#define MAXLINELEN (LINE_MAX + 1)
+
+static void
+input(FILE *fp)
+{
+ static int maxentry;
+ int len;
+ wchar_t *p, buf[MAXLINELEN];
+
+ if (!list)
+ if ((list = calloc((maxentry = DEFNUM), sizeof (*list))) ==
+ NULL)
+ err(1, (char *)NULL);
+ while (fgetws(buf, MAXLINELEN, fp)) {
+ for (p = buf; *p && iswspace(*p); ++p)
+ ;
+ if (!*p)
+ continue;
+ if (!(p = wcschr(p, L'\n'))) {
+ warnx("line too long");
+ eval = 1;
+ continue;
+ }
+ *p = L'\0';
+ len = width(buf);
+ if (maxlength < len)
+ maxlength = len;
+ if (entries == maxentry) {
+ maxentry += DEFNUM;
+ if (!(list = realloc(list,
+ (uint_t)maxentry * sizeof (*list))))
+ err(1, NULL);
+ }
+ list[entries] = malloc((wcslen(buf) + 1) * sizeof (wchar_t));
+ if (list[entries] == NULL)
+ err(1, NULL);
+ (void) wcscpy(list[entries], buf);
+ entries++;
+ }
+}
+
+/* Like wcswidth(), but ignores non-printing characters. */
+static int
+width(const wchar_t *wcs)
+{
+ int w, cw;
+
+ for (w = 0; *wcs != L'\0'; wcs++)
+ if ((cw = wcwidth(*wcs)) > 0)
+ w += cw;
+ return (w);
+}
+
+static void
+usage(void)
+{
+
+ (void) fprintf(stderr,
+ "usage: column [-tx] [-c columns] [-s sep] [file ...]\n");
+ exit(1);
+}
diff --git a/usr/src/cmd/coreadm/coreadm.xml b/usr/src/cmd/coreadm/coreadm.xml
index 46a4cda17a..28f1e27240 100644
--- a/usr/src/cmd/coreadm/coreadm.xml
+++ b/usr/src/cmd/coreadm/coreadm.xml
@@ -48,14 +48,6 @@
<service_fmri value='svc:/system/filesystem/minimal' />
</dependency>
- <dependency
- name='coreadm_manifest-import'
- type='service'
- grouping='require_all'
- restart_on='none'>
- <service_fmri value='svc:/system/manifest-import:default' />
- </dependency>
-
<instance name='default' enabled='false'>
<exec_method
type='method'
diff --git a/usr/src/cmd/cron/Makefile b/usr/src/cmd/cron/Makefile
index c9ffeadffe..fb05e292a3 100644
--- a/usr/src/cmd/cron/Makefile
+++ b/usr/src/cmd/cron/Makefile
@@ -26,6 +26,7 @@
DEFAULTFILES = cron.dfl
include ../Makefile.cmd
+include ../Makefile.ctf
MANIFEST = cron.xml
@@ -96,20 +97,20 @@ XPG4ATOBJS= $(ATOBJS:%=objs.xpg4/%) $(XPG4OBJS:%=objs.xpg4/%)
XPG6COMMONOBJS= $(COMMONOBJS:%=objs.xpg6/%)
XPG6CTOBJS= $(CRONTABOBJS:%=objs.xpg6/%)
-cron := POBJS = $(CRONOBJS) $(COMMONOBJ2)
-at := POBJS = $(ATOBJS) $(COMMONOBJS)
-at.xpg4 := POBJS = $(XPG4ATOBJS) $(XPG4COMMONOBJS)
-atrm := POBJS = $(ATRMOBJS) $(COMMONOBJS)
-atq := POBJS = $(ATQOBJS) $(COMMONOBJS)
-crontab := POBJS = $(CRONTABOBJS) $(COMMONOBJS)
-crontab.xpg4 := POBJS = $(XPG4CTOBJS) $(XPG4COMMONOBJS)
-crontab.xpg6 := POBJS = $(XPG6CTOBJS) $(XPG6COMMONOBJS)
+cron := OBJS = $(CRONOBJS) $(COMMONOBJ2)
+at := OBJS = $(ATOBJS) $(COMMONOBJS)
+at.xpg4 := OBJS = $(XPG4ATOBJS) $(XPG4COMMONOBJS)
+atrm := OBJS = $(ATRMOBJS) $(COMMONOBJS)
+atq := OBJS = $(ATQOBJS) $(COMMONOBJS)
+crontab := OBJS = $(CRONTABOBJS) $(COMMONOBJS)
+crontab.xpg4 := OBJS = $(XPG4CTOBJS) $(XPG4COMMONOBJS)
+crontab.xpg6 := OBJS = $(XPG6CTOBJS) $(XPG6COMMONOBJS)
CFLAGS += $(CCVERBOSE)
NOBJS= $(CRONOBJS) $(ATOBJS) $(ATRMOBJS1) $(ATQOBJS) $(CRONTABOBJS1) \
$(COMMONOBJS)
-OBJS = $(NOBJS) $(XPG4COMMONOBJS) $(XPG4ATOBJS) $(XPG4CTOBJS) \
+COBJS = $(NOBJS) $(XPG4COMMONOBJS) $(XPG4ATOBJS) $(XPG4CTOBJS) \
$(XPG6COMMONOBJS) $(XPG6CTOBJS) $(GETRESPOBJ)
SRCS = $(NOBJS:%.o=%.c) $(GETRESPSRC)
@@ -157,32 +158,35 @@ all : $(PROG) $(XPG4) $(XPG6) $(SCRIPT) $(XPG4SCRIPT) $(FILES)
install : all $(ROOTPROG) $(ROOTETCDEFAULTFILES) $(ROOTSYMLINK) \
$(ROOTMANIFEST) $(ROOTMETHOD)
-$(PROG) : $$(POBJS)
- $(LINK.c) $(POBJS) -o $@ $(LDLIBS)
+$(PROG) : $$(OBJS)
+ $(LINK.c) $(OBJS) -o $@ $(LDLIBS)
$(POST_PROCESS)
-$(XPG4) : objs.xpg4 $$(POBJS)
- $(LINK.c) $(POBJS) -o $@ $(LDLIBS)
+$(XPG4) : objs.xpg4 $$(OBJS)
+ $(LINK.c) $(OBJS) -o $@ $(LDLIBS)
$(POST_PROCESS)
-$(XPG6) : objs.xpg6 $$(POBJS)
- $(LINK.c) $(POBJS) -o $@ $(LDLIBS)
+$(XPG6) : objs.xpg6 $$(OBJS)
+ $(LINK.c) $(OBJS) -o $@ $(LDLIBS)
$(POST_PROCESS)
objs.xpg6/%.o: %.c
$(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
objs.xpg6:
-@mkdir -p $@
objs.xpg4/%.o: %.c
$(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
objs.xpg4:
-@mkdir -p $@
objs.xpg4/values-xpg4.o: ../../lib/common/common/values-xpg4.c
$(COMPILE.c) -o $@ ../../lib/common/common/values-xpg4.c
+ $(POST_PROCESS_O)
%.o: $(SRC)/common/util/%.c
$(COMPILE.c) $(OUTPUT_OPTION) $<
@@ -219,7 +223,7 @@ $(POFILE): $(POFILES)
$(RM) $@; cat $(POFILES) > $@
clean :
- $(RM) $(OBJS) att1.h att1.c att2.c
+ $(RM) $(COBJS) att1.h att1.c att2.c
lint : lint_SRCS
diff --git a/usr/src/cmd/cron/cron.c b/usr/src/cmd/cron/cron.c
index ab36d09037..33a7373f1e 100644
--- a/usr/src/cmd/cron/cron.c
+++ b/usr/src/cmd/cron/cron.c
@@ -23,7 +23,7 @@
* Use is subject to license terms.
*
* Copyright 2013 Joshua M. Clulow <josh@sysmgr.org>
- *
+ * Copyright 2013 Joyent, Inc. All rights reserved.
* Copyright (c) 2014 Gary Mills
*/
@@ -315,7 +315,8 @@ static int ex(struct event *e);
static void read_dirs(int);
static void mail(char *, char *, int);
static char *next_field(int, int);
-static void readcron(struct usr *, time_t);
+static void readcron(char *, struct usr *, time_t);
+static void readcronfile(FILE *, struct usr *, time_t);
static int next_ge(int, char *);
static void free_if_unused(struct usr *);
static void del_atjob(char *, char *);
@@ -420,7 +421,7 @@ extern void el_delete(void);
static int valid_entry(char *, int);
static struct usr *create_ulist(char *, int);
-static void init_cronevent(char *, int);
+static void init_cronevent(char *, char *);
static void init_atevent(char *, time_t, int, int);
static void update_atevent(struct usr *, char *, time_t, int);
@@ -759,6 +760,18 @@ read_dirs(int first)
time_t tim;
+ if (chdir(SYSCRONDIR) != -1) {
+ cwd = CRON;
+ if ((dir = opendir(".")) != NULL) {
+ while ((dp = readdir(dir)) != NULL) {
+ if (!valid_entry(dp->d_name, CRONEVENT))
+ continue;
+ init_cronevent(SYSCRONDIR, dp->d_name);
+ }
+ (void) closedir(dir);
+ }
+ }
+
if (chdir(CRONDIR) == -1)
crabort(BADCD, REMOVE_FIFO|CONSOLE_MSG);
cwd = CRON;
@@ -767,7 +780,7 @@ read_dirs(int first)
while ((dp = readdir(dir)) != NULL) {
if (!valid_entry(dp->d_name, CRONEVENT))
continue;
- init_cronevent(dp->d_name, first);
+ init_cronevent(CRONDIR, dp->d_name);
}
(void) closedir(dir);
@@ -853,23 +866,18 @@ create_ulist(char *name, int type)
}
void
-init_cronevent(char *name, int first)
+init_cronevent(char *basedir, char *name)
{
struct usr *u;
- if (first) {
+ if ((u = find_usr(name)) == NULL) {
u = create_ulist(name, CRONEVENT);
- readcron(u, 0);
+ readcron(basedir, u, 0);
} else {
- if ((u = find_usr(name)) == NULL) {
- u = create_ulist(name, CRONEVENT);
- readcron(u, 0);
- } else {
- u->ctexists = TRUE;
- rm_ctevents(u);
- el_remove(u->ctid, 0);
- readcron(u, 0);
- }
+ u->ctexists = TRUE;
+ rm_ctevents(u);
+ el_remove(u->ctid, 0);
+ readcron(basedir, u, 0);
}
}
@@ -950,7 +958,7 @@ mod_ctab(char *name, time_t reftime)
(void) strcpy(u->home, pw->pw_dir);
u->uid = pw->pw_uid;
u->gid = pw->pw_gid;
- readcron(u, reftime);
+ readcron(CRONDIR, u, reftime);
} else {
u->uid = pw->pw_uid;
u->gid = pw->pw_gid;
@@ -973,7 +981,7 @@ mod_ctab(char *name, time_t reftime)
/* user didnt have a crontab last time */
u->ctid = ecid++;
u->ctevents = NULL;
- readcron(u, reftime);
+ readcron(CRONDIR, u, reftime);
return;
}
#ifdef DEBUG
@@ -981,7 +989,7 @@ mod_ctab(char *name, time_t reftime)
#endif
rm_ctevents(u);
el_remove(u->ctid, 0);
- readcron(u, reftime);
+ readcron(CRONDIR, u, reftime);
}
}
@@ -1116,8 +1124,94 @@ update_atevent(struct usr *u, char *name, time_t tim, int jobtype)
static char line[CTLINESIZE]; /* holds a line from a crontab file */
static int cursor; /* cursor for the above line */
+static int
+copyfile(char *name, FILE *dp)
+{
+ FILE *tf;
+
+ if ((tf = fopen(name, "r")) == NULL) {
+ (void) fclose(dp);
+ return (1);
+ }
+
+ while (fgets(line, CTLINESIZE, tf) != NULL) {
+ if (fputs(line, dp) == EOF) {
+ (void) fclose(tf);
+ (void) fclose(dp);
+ return (1);
+ }
+ }
+ (void) fclose(tf);
+
+ return (0);
+}
+
+static void
+readcron(char *basedir, struct usr *u, time_t reftime)
+{
+ char *altpath;
+ struct stat sb;
+ FILE *cf; /* cf will be a user's crontab file */
+ char altnamebuf[PATH_MAX];
+ char namebuf[PATH_MAX];
+
+ if (strcmp(basedir, SYSCRONDIR) == 0)
+ altpath = CRONDIR;
+ else
+ altpath = SYSCRONDIR;
+
+ if (snprintf(altnamebuf, sizeof (altnamebuf), "%s/%s", altpath,
+ u->name) >= sizeof (altnamebuf))
+ return;
+
+ if (snprintf(namebuf, sizeof (namebuf), "%s/%s", basedir, u->name) >=
+ sizeof (namebuf))
+ return;
+
+ if (stat(altnamebuf, &sb) != -1) {
+ /*
+ * There is a secondary crontab for this user. We need to
+ * merge the two crontabs into a temporary file for loading.
+ */
+ int fd;
+ char tmpfile[PATH_MAX];
+
+ (void) strlcpy(tmpfile, "/tmp/cronXXXXXX", sizeof (tmpfile));
+ if ((fd = mkstemp(tmpfile)) == -1)
+ return;
+
+ unlink(tmpfile);
+ if ((cf = fdopen(fd, "w+")) == NULL) {
+ close(fd);
+ return;
+ }
+
+ if (copyfile(namebuf, cf) != 0)
+ return;
+
+ if (copyfile(altnamebuf, cf) != 0)
+ return;
+
+ (void) fflush(cf);
+ rewind(cf);
+
+ } else {
+ /*
+ * Only one crontab, open it directly.
+ */
+ if ((cf = fopen(namebuf, "r")) == NULL) {
+ mail(u->name, NOREAD, ERR_UNIXERR);
+ return;
+ }
+ }
+
+ readcronfile(cf, u, reftime);
+
+ (void) fclose(cf);
+}
+
static void
-readcron(struct usr *u, time_t reftime)
+readcronfile(FILE *cf, struct usr *u, time_t reftime)
{
/*
* readcron reads in a crontab file for a user (u). The list of
@@ -1125,12 +1219,9 @@ readcron(struct usr *u, time_t reftime)
* this list. Each event is also entered into the main event
* list.
*/
- FILE *cf; /* cf will be a user's crontab file */
struct event *e;
int start;
unsigned int i;
- char namebuf[PATH_MAX];
- char *pname;
struct shared *tz = NULL;
struct shared *home = NULL;
struct shared *shell = NULL;
@@ -1138,19 +1229,6 @@ readcron(struct usr *u, time_t reftime)
/* read the crontab file */
cte_init(); /* Init error handling */
- if (cwd != CRON) {
- if (snprintf(namebuf, sizeof (namebuf), "%s/%s",
- CRONDIR, u->name) >= sizeof (namebuf)) {
- return;
- }
- pname = namebuf;
- } else {
- pname = u->name;
- }
- if ((cf = fopen(pname, "r")) == NULL) {
- mail(u->name, NOREAD, ERR_UNIXERR);
- return;
- }
while (fgets(line, CTLINESIZE, cf) != NULL) {
char *tmp;
/* process a line of a crontab file */
@@ -1279,7 +1357,6 @@ again:
#endif
}
cte_sendmail(u->name); /* mail errors if any to user */
- (void) fclose(cf);
rel_shared(tz);
rel_shared(shell);
rel_shared(home);
@@ -2442,6 +2519,9 @@ ex(struct event *e)
} else {
r = audit_cron_session(e->u->name, CRONDIR,
e->u->uid, e->u->gid, NULL);
+ if (r != 0)
+ r = audit_cron_session(e->u->name, SYSCRONDIR,
+ e->u->uid, e->u->gid, NULL);
}
if (r != 0) {
msg("cron audit problem. job failed (%s) for user %s",
diff --git a/usr/src/cmd/cron/cron.h b/usr/src/cmd/cron/cron.h
index a76016299c..93e21e7b41 100644
--- a/usr/src/cmd/cron/cron.h
+++ b/usr/src/cmd/cron/cron.h
@@ -71,6 +71,9 @@ struct message {
char logname[LLEN];
};
+/* anything below here can be changed */
+
+#define SYSCRONDIR "/etc/cron.d/crontabs"
#define CRONDIR "/var/spool/cron/crontabs"
#define ATDIR "/var/spool/cron/atjobs"
#define ACCTFILE "/var/cron/log"
diff --git a/usr/src/cmd/cron/crontab.c b/usr/src/cmd/cron/crontab.c
index 327a71388b..cdb4e1e394 100644
--- a/usr/src/cmd/cron/crontab.c
+++ b/usr/src/cmd/cron/crontab.c
@@ -71,7 +71,7 @@
"usage:\n" \
"\tcrontab [file]\n" \
"\tcrontab -e [username]\n" \
- "\tcrontab -l [username]\n" \
+ "\tcrontab -l [-g] [username]\n" \
"\tcrontab -r [username]"
#define INVALIDUSER "you are not a valid user (no entry in /etc/passwd)."
#define NOTALLOWED "you are not authorized to use cron. Sorry."
@@ -120,6 +120,7 @@ main(int argc, char **argv)
int c, r;
int rflag = 0;
int lflag = 0;
+ int gflag = 0;
int eflag = 0;
int errflg = 0;
char *pp;
@@ -151,11 +152,14 @@ main(int argc, char **argv)
exit(1);
}
- while ((c = getopt(argc, argv, "elr")) != EOF)
+ while ((c = getopt(argc, argv, "eglr")) != EOF)
switch (c) {
case 'e':
eflag++;
break;
+ case 'g':
+ gflag++;
+ break;
case 'l':
lflag++;
break;
@@ -170,6 +174,9 @@ main(int argc, char **argv)
if (eflag + lflag + rflag > 1)
errflg++;
+ if (gflag && !lflag)
+ errflg++;
+
argc -= optind;
argv += optind;
if (errflg || argc > 1)
@@ -236,12 +243,27 @@ main(int argc, char **argv)
exit(0);
}
if (lflag) {
- if ((fp = fopen(cf, "r")) == NULL)
- crabort(BADOPEN);
- while (fgets(line, CTLINESIZE, fp) != NULL)
- fputs(line, stdout);
- fclose(fp);
- exit(0);
+ char sysconf[PATH_MAX];
+
+ if (gflag) {
+ if (snprintf(sysconf, sizeof (sysconf), "%s/%s",
+ SYSCRONDIR, login) < sizeof (sysconf) &&
+ (fp = fopen(sysconf, "r")) != NULL) {
+ while (fgets(line, CTLINESIZE, fp) != NULL)
+ fputs(line, stdout);
+ fclose(fp);
+ exit(0);
+ } else {
+ crabort(BADOPEN);
+ }
+ } else {
+ if ((fp = fopen(cf, "r")) == NULL)
+ crabort(BADOPEN);
+ while (fgets(line, CTLINESIZE, fp) != NULL)
+ fputs(line, stdout);
+ fclose(fp);
+ exit(0);
+ }
}
if (eflag) {
if ((fp = fopen(cf, "r")) == NULL) {
diff --git a/usr/src/cmd/ctfconvert/Makefile b/usr/src/cmd/ctfconvert/Makefile
new file mode 100644
index 0000000000..688addd9d1
--- /dev/null
+++ b/usr/src/cmd/ctfconvert/Makefile
@@ -0,0 +1,33 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2015, Joyent, Inc.
+#
+
+PROG= ctfconvert
+
+include ../Makefile.cmd
+
+CFLAGS += $(CCVERBOSE)
+LDLIBS += -lctf -lelf
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+install: all $(ROOTPROG)
+
+clean:
+
+lint: lint_PROG
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/ctfconvert/ctfconvert.c b/usr/src/cmd/ctfconvert/ctfconvert.c
new file mode 100644
index 0000000000..a4394c3ec8
--- /dev/null
+++ b/usr/src/cmd/ctfconvert/ctfconvert.c
@@ -0,0 +1,389 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2015, Joyent, Inc.
+ */
+
+/*
+ * Create CTF from extant debugging information
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <libelf.h>
+#include <libctf.h>
+#include <string.h>
+#include <libgen.h>
+#include <limits.h>
+#include <strings.h>
+#include <sys/debug.h>
+
+#define CTFCONVERT_OK 0
+#define CTFCONVERT_FATAL 1
+#define CTFCONVERT_USAGE 2
+
+#define CTFCONVERT_DEFAULT_NTHREADS 4
+
+#define CTFCONVERT_ALTEXEC "CTFCONVERT_ALTEXEC"
+
+static char *ctfconvert_progname;
+
+static void
+ctfconvert_fatal(const char *fmt, ...)
+{
+ va_list ap;
+
+ (void) fprintf(stderr, "%s: ", ctfconvert_progname);
+ va_start(ap, fmt);
+ (void) vfprintf(stderr, fmt, ap);
+ va_end(ap);
+
+ exit(CTFCONVERT_FATAL);
+}
+
+
+static void
+ctfconvert_usage(const char *fmt, ...)
+{
+ if (fmt != NULL) {
+ va_list ap;
+
+ (void) fprintf(stderr, "%s: ", ctfconvert_progname);
+ va_start(ap, fmt);
+ (void) vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ }
+
+ (void) fprintf(stderr, "Usage: %s [-is] [-j nthrs] [-l label | "
+ "-L labelenv] [-o outfile] input\n"
+ "\n"
+ "\t-i ignore files not built partially from C sources\n"
+ "\t-j use nthrs threads to perform the merge\n"
+ "\t-k keep around original input file on failure\n"
+ "\t-o copy input to outfile and add CTF\n"
+ "\t-l set output container's label to specified value\n"
+ "\t-L set output container's label to value from environment\n",
+ ctfconvert_progname);
+}
+
+/*
+ * This is a bit unfortunate. Traditionally we do type uniquification across all
+ * modules in the kernel, including ip and unix against genunix. However, when
+ * _MACHDEP is defined, then the cpu_t ends up having an additional member
+ * (cpu_m), thus changing the ability for us to uniquify against it. This in
+ * turn causes a lot of type sprawl, as there's a lot of things that end up
+ * referring to the cpu_t and it chains out from there.
+ *
+ * So, if we find that a cpu_t has been defined and it has a couple of useful
+ * sentinel members and it does *not* have the cpu_m member, then we will try
+ * and lookup or create a forward declaration to the machcpu, append it to the
+ * end, and update the file.
+ *
+ * This currently is only invoked if an undocumented option -X is passed. This
+ * value is private to illumos and it can be changed at any time inside of it,
+ * so if -X wants to be used for something, it should be. The ability to rely on
+ * -X for others is strictly not an interface in any way, shape, or form.
+ *
+ * The following struct contains most of the information that we care about and
+ * that we want to validate exists before we decide what to do.
+ */
+
+typedef struct ctfconvert_fixup {
+ boolean_t cf_cyclic; /* Do we have a cpu_cyclic member */
+ boolean_t cf_mcpu; /* We have a cpu_m member */
+ boolean_t cf_lastpad; /* Is the pad member the last entry */
+ ulong_t cf_padoff; /* offset of the pad */
+} ctfconvert_fixup_t;
+
+/* ARGSUSED */
+static int
+ctfconvert_fixup_genunix_cb(const char *name, ctf_id_t tid, ulong_t off,
+ void *arg)
+{
+ ctfconvert_fixup_t *cfp = arg;
+
+ cfp->cf_lastpad = B_FALSE;
+ if (strcmp(name, "cpu_cyclic") == 0) {
+ cfp->cf_cyclic = B_TRUE;
+ return (0);
+ }
+
+ if (strcmp(name, "cpu_m") == 0) {
+ cfp->cf_mcpu = B_TRUE;
+ return (0);
+ }
+
+ if (strcmp(name, "cpu_m_pad") == 0) {
+ cfp->cf_lastpad = B_TRUE;
+ cfp->cf_padoff = off;
+ return (0);
+ }
+
+ return (0);
+}
+
+static void
+ctfconvert_fixup_genunix(ctf_file_t *fp)
+{
+ ctf_id_t cpuid, mcpu;
+ ssize_t sz;
+ ctfconvert_fixup_t cf;
+ int model, ptrsz;
+
+ cpuid = ctf_lookup_by_name(fp, "struct cpu");
+ if (cpuid == CTF_ERR)
+ return;
+
+ if (ctf_type_kind(fp, cpuid) != CTF_K_STRUCT)
+ return;
+
+ if ((sz = ctf_type_size(fp, cpuid)) == CTF_ERR)
+ return;
+
+ model = ctf_getmodel(fp);
+ VERIFY(model == CTF_MODEL_ILP32 || model == CTF_MODEL_LP64);
+ ptrsz = model == CTF_MODEL_ILP32 ? 4 : 8;
+
+ bzero(&cf, sizeof (ctfconvert_fixup_t));
+ if (ctf_member_iter(fp, cpuid, ctfconvert_fixup_genunix_cb, &cf) ==
+ CTF_ERR)
+ return;
+
+ /*
+ * Finally, we want to verify that the cpu_m is actually the last member
+ * that we have here.
+ */
+ if (cf.cf_cyclic == B_FALSE || cf.cf_mcpu == B_TRUE ||
+ cf.cf_lastpad == B_FALSE) {
+ return;
+ }
+
+ if (cf.cf_padoff + ptrsz * NBBY != sz * NBBY) {
+ return;
+ }
+
+ /*
+ * Okay, we're going to do this, try to find a struct machcpu. We either
+ * want a forward or a struct. If we find something else, error. If we
+ * find nothing, add a forward and then add the member.
+ */
+ mcpu = ctf_lookup_by_name(fp, "struct machcpu");
+ if (mcpu == CTF_ERR) {
+ mcpu = ctf_add_forward(fp, CTF_ADD_NONROOT, "machcpu",
+ CTF_K_STRUCT);
+ if (mcpu == CTF_ERR) {
+ ctfconvert_fatal("failed to add 'struct machcpu' "
+ "forward: %s", ctf_errmsg(ctf_errno(fp)));
+ }
+ } else {
+ int kind;
+ if ((kind = ctf_type_kind(fp, mcpu)) == CTF_ERR) {
+ ctfconvert_fatal("failed to get the type kind for "
+ "the struct machcpu: %s",
+ ctf_errmsg(ctf_errno(fp)));
+ }
+
+ if (kind != CTF_K_STRUCT && kind != CTF_K_FORWARD)
+ ctfconvert_fatal("encountered a struct machcpu of the "
+ "wrong type, found type kind %d\n", kind);
+ }
+
+ if (ctf_update(fp) == CTF_ERR) {
+ ctfconvert_fatal("failed to update output file: %s\n",
+ ctf_errmsg(ctf_errno(fp)));
+ }
+
+ if (ctf_add_member(fp, cpuid, "cpu_m", mcpu, sz * NBBY) == CTF_ERR) {
+ ctfconvert_fatal("failed to add the m_cpu member: %s\n",
+ ctf_errmsg(ctf_errno(fp)));
+ }
+
+ if (ctf_update(fp) == CTF_ERR) {
+ ctfconvert_fatal("failed to update output file: %s\n",
+ ctf_errmsg(ctf_errno(fp)));
+ }
+
+ VERIFY(ctf_type_size(fp, cpuid) == sz);
+}
+
+static void
+ctfconvert_altexec(char **argv)
+{
+ const char *alt;
+ char *altexec;
+
+ alt = getenv(CTFCONVERT_ALTEXEC);
+ if (alt == NULL || *alt == '\0')
+ return;
+
+ altexec = strdup(alt);
+ if (altexec == NULL)
+ ctfconvert_fatal("failed to allocate memory for altexec\n");
+ if (unsetenv(CTFCONVERT_ALTEXEC) != 0)
+ ctfconvert_fatal("failed to unset %s from environment: %s\n",
+ CTFCONVERT_ALTEXEC, strerror(errno));
+
+ (void) execv(altexec, argv);
+ ctfconvert_fatal("failed to execute alternate program %s: %s",
+ altexec, strerror(errno));
+}
+
+int
+main(int argc, char *argv[])
+{
+ int c, ifd, err;
+ boolean_t keep = B_FALSE;
+ uint_t flags = 0;
+ uint_t nthreads = CTFCONVERT_DEFAULT_NTHREADS;
+ const char *outfile = NULL;
+ const char *label = NULL;
+ const char *infile = NULL;
+ char *tmpfile;
+ ctf_file_t *ofp;
+ long argj;
+ char *eptr;
+ char buf[4096];
+ boolean_t optx = B_FALSE;
+
+ ctfconvert_progname = basename(argv[0]);
+
+ ctfconvert_altexec(argv);
+
+ while ((c = getopt(argc, argv, ":j:kl:L:o:iX")) != -1) {
+ switch (c) {
+ case 'k':
+ keep = B_TRUE;
+ break;
+ case 'l':
+ label = optarg;
+ break;
+ case 'L':
+ label = getenv(optarg);
+ break;
+ case 'j':
+ errno = 0;
+ argj = strtol(optarg, &eptr, 10);
+ if (errno != 0 || argj == LONG_MAX ||
+ argj == LONG_MIN || argj <= 0 ||
+ argj > UINT_MAX || *eptr != '\0') {
+ ctfconvert_fatal("invalid argument for -j: "
+ "%s\n", optarg);
+ }
+ nthreads = (uint_t)argj;
+ break;
+ case 'o':
+ outfile = optarg;
+ break;
+ case 'i':
+ flags |= CTF_CONVERT_F_IGNNONC;
+ break;
+ case 'X':
+ optx = B_TRUE;
+ break;
+ case ':':
+ ctfconvert_usage("Option -%c requires an operand\n",
+ optopt);
+ return (CTFCONVERT_USAGE);
+ case '?':
+ ctfconvert_usage("Unknown option: -%c\n", optopt);
+ return (CTFCONVERT_USAGE);
+ }
+ }
+
+ argv += optind;
+ argc -= optind;
+
+ if (argc < 1) {
+ ctfconvert_usage("Missing required input file\n");
+ return (CTFCONVERT_USAGE);
+ }
+ infile = argv[0];
+
+ if (elf_version(EV_CURRENT) == EV_NONE)
+ ctfconvert_fatal("failed to initialize libelf: library is "
+ "out of date\n");
+
+ ifd = open(infile, O_RDONLY);
+ if (ifd < 0) {
+ ctfconvert_fatal("failed to open input file %s: %s\n", infile,
+ strerror(errno));
+ }
+
+ /*
+ * By default we remove the input file on failure unless we've been
+ * given an output file or -k has been specified.
+ */
+ if (outfile != NULL && strcmp(infile, outfile) != 0)
+ keep = B_TRUE;
+
+ ofp = ctf_fdconvert(ifd, label, nthreads, flags, &err, buf,
+ sizeof (buf));
+ if (ofp == NULL) {
+ /*
+ * -i says that we shouldn't concern ourselves with source files
+ * that weren't built from C source code in part. Because this
+ * has been traditionally used across all of illumos, we still
+ * honor it.
+ */
+ if ((flags & CTF_CONVERT_F_IGNNONC) != 0 &&
+ err == ECTF_CONVNOCSRC) {
+ exit(CTFCONVERT_OK);
+ }
+ if (keep == B_FALSE)
+ (void) unlink(infile);
+ ctfconvert_fatal("CTF conversion failed: %s\n",
+ err == ECTF_CONVBKERR ? buf : ctf_errmsg(err));
+ }
+
+ if (optx == B_TRUE)
+ ctfconvert_fixup_genunix(ofp);
+
+ tmpfile = NULL;
+ if (outfile == NULL || strcmp(infile, outfile) == 0) {
+ if (asprintf(&tmpfile, "%s.ctf", infile) == -1) {
+ if (keep == B_FALSE)
+ (void) unlink(infile);
+ ctfconvert_fatal("failed to allocate memory for "
+ "temporary file: %s\n", strerror(errno));
+ }
+ outfile = tmpfile;
+ }
+ err = ctf_elfwrite(ofp, infile, outfile, CTF_ELFWRITE_F_COMPRESS);
+ if (err == CTF_ERR) {
+ (void) unlink(outfile);
+ if (keep == B_FALSE)
+ (void) unlink(infile);
+ ctfconvert_fatal("failed to write CTF section to output file: "
+ "%s", ctf_errmsg(ctf_errno(ofp)));
+ }
+ ctf_close(ofp);
+
+ if (tmpfile != NULL) {
+ if (rename(tmpfile, infile) != 0) {
+ int e = errno;
+ (void) unlink(outfile);
+ if (keep == B_FALSE)
+ (void) unlink(infile);
+ ctfconvert_fatal("failed to rename temporary file: "
+ "%s\n", strerror(e));
+ }
+ }
+ free(tmpfile);
+
+ return (CTFCONVERT_OK);
+}
diff --git a/usr/src/cmd/ctfdiff/Makefile b/usr/src/cmd/ctfdiff/Makefile
new file mode 100644
index 0000000000..268bf9f3ed
--- /dev/null
+++ b/usr/src/cmd/ctfdiff/Makefile
@@ -0,0 +1,33 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015, Joyent, Inc.
+#
+
+PROG= ctfdiff
+
+include ../Makefile.cmd
+
+CFLAGS += $(CCVERBOSE)
+LDLIBS += -lctf
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+install: all $(ROOTPROG)
+
+clean:
+
+lint: lint_PROG
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/ctfdiff/ctfdiff.c b/usr/src/cmd/ctfdiff/ctfdiff.c
new file mode 100644
index 0000000000..8018761257
--- /dev/null
+++ b/usr/src/cmd/ctfdiff/ctfdiff.c
@@ -0,0 +1,518 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2015, Joyent, Inc.
+ */
+
+/*
+ * diff two CTF containers
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <strings.h>
+#include <libctf.h>
+#include <libgen.h>
+#include <stdarg.h>
+
+#define CTFDIFF_NAMELEN 256
+
+#define CTFDIFF_EXIT_SIMILAR 0
+#define CTFDIFF_EXIT_DIFFERENT 1
+#define CTFDIFF_EXIT_USAGE 2
+#define CTFDIFF_EXIT_ERROR 3
+
+typedef enum ctf_diff_cmd {
+ CTF_DIFF_TYPES = 0x01,
+ CTF_DIFF_FUNCS = 0x02,
+ CTF_DIFF_OBJS = 0x04,
+ CTF_DIFF_DEFAULT = 0x07,
+ CTF_DIFF_LABEL = 0x08,
+ CTF_DIFF_ALL = 0x0f
+} ctf_diff_cmd_t;
+
+typedef struct {
+ int dil_next;
+ const char **dil_labels;
+} ctfdiff_label_t;
+
+static char *g_progname;
+static const char *g_iname;
+static ctf_file_t *g_ifp;
+static const char *g_oname;
+static ctf_file_t *g_ofp;
+static char **g_typelist = NULL;
+static int g_nexttype = 0;
+static int g_ntypes = 0;
+static char **g_objlist = NULL;
+static int g_nextfunc = 0;
+static int g_nfuncs = 0;
+static char **g_funclist = NULL;
+static int g_nextobj = 0;
+static int g_nobjs = 0;
+static boolean_t g_onlydiff = B_FALSE;
+static boolean_t g_different = B_FALSE;
+static ctf_diff_cmd_t g_flag = 0;
+
+static void
+ctfdiff_fatal(const char *fmt, ...)
+{
+ va_list ap;
+
+ (void) fprintf(stderr, "%s: ", g_progname);
+ va_start(ap, fmt);
+ (void) vfprintf(stderr, fmt, ap);
+ va_end(ap);
+
+ exit(CTFDIFF_EXIT_ERROR);
+}
+
+static const char *
+ctfdiff_fp_to_name(ctf_file_t *fp)
+{
+ if (fp == g_ifp)
+ return (g_iname);
+ if (fp == g_ofp)
+ return (g_oname);
+ return (NULL);
+}
+
+/* ARGSUSED */
+static void
+ctfdiff_func_cb(ctf_file_t *ifp, ulong_t iidx, boolean_t similar,
+ ctf_file_t *ofp, ulong_t oidx, void *arg)
+{
+ char namebuf[CTFDIFF_NAMELEN];
+
+ if (similar == B_TRUE)
+ return;
+
+ if (ctf_symbol_name(ifp, iidx, namebuf, sizeof (namebuf)) == NULL) {
+ if (g_nextfunc != 0)
+ return;
+ (void) printf("ctf container %s function %ld is different\n",
+ ctfdiff_fp_to_name(ifp), iidx);
+ } else {
+ if (g_nextfunc != 0) {
+ int i;
+ for (i = 0; i < g_nextfunc; i++) {
+ if (strcmp(g_funclist[i], namebuf) == 0)
+ break;
+ }
+ if (i == g_nextfunc)
+ return;
+ }
+ (void) printf("ctf container %s function %s (%ld) is "
+ "different\n", ctfdiff_fp_to_name(ifp), namebuf, iidx);
+ }
+
+ g_different = B_TRUE;
+}
+
+/* ARGSUSED */
+static void
+ctfdiff_obj_cb(ctf_file_t *ifp, ulong_t iidx, ctf_id_t iid, boolean_t similar,
+ ctf_file_t *ofp, ulong_t oidx, ctf_id_t oid, void *arg)
+{
+ char namebuf[CTFDIFF_NAMELEN];
+
+ if (similar == B_TRUE)
+ return;
+
+ if (ctf_symbol_name(ifp, iidx, namebuf, sizeof (namebuf)) == NULL) {
+ if (g_nextobj != 0)
+ return;
+ (void) printf("ctf container %s object %ld is different\n",
+ ctfdiff_fp_to_name(ifp), iidx);
+ } else {
+ if (g_nextobj != 0) {
+ int i;
+ for (i = 0; i < g_nextobj; i++) {
+ if (strcmp(g_objlist[i], namebuf) == 0)
+ break;
+ }
+ if (i == g_nextobj)
+ return;
+ }
+ (void) printf("ctf container %s object %s (%ld) is different\n",
+ ctfdiff_fp_to_name(ifp), namebuf, iidx);
+ }
+
+ g_different = B_TRUE;
+}
+
+/* ARGSUSED */
+static void
+ctfdiff_cb(ctf_file_t *ifp, ctf_id_t iid, boolean_t similar, ctf_file_t *ofp,
+ ctf_id_t oid, void *arg)
+{
+ if (similar == B_TRUE)
+ return;
+
+ if (ctf_type_kind(ifp, iid) == CTF_K_UNKNOWN)
+ return;
+
+ /*
+ * Check if it's the type the user cares about.
+ */
+ if (g_nexttype != 0) {
+ int i;
+ char namebuf[CTFDIFF_NAMELEN];
+
+ if (ctf_type_name(ifp, iid, namebuf, sizeof (namebuf)) ==
+ NULL) {
+ ctfdiff_fatal("failed to obtain the name "
+ "of type %ld from %s: %s\n",
+ iid, ctfdiff_fp_to_name(ifp),
+ ctf_errmsg(ctf_errno(ifp)));
+ }
+
+ for (i = 0; i < g_nexttype; i++) {
+ if (strcmp(g_typelist[i], namebuf) == 0)
+ break;
+ }
+
+ if (i == g_nexttype)
+ return;
+ }
+
+ g_different = B_TRUE;
+
+ if (g_onlydiff == B_TRUE)
+ return;
+
+ (void) printf("ctf container %s type %ld is different\n",
+ ctfdiff_fp_to_name(ifp), iid);
+}
+
+/* ARGSUSED */
+static int
+ctfdiff_labels_count(const char *name, const ctf_lblinfo_t *li, void *arg)
+{
+ uint32_t *count = arg;
+ *count = *count + 1;
+
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+ctfdiff_labels_fill(const char *name, const ctf_lblinfo_t *li, void *arg)
+{
+ ctfdiff_label_t *dil = arg;
+
+ dil->dil_labels[dil->dil_next] = name;
+ dil->dil_next++;
+
+ return (0);
+}
+
+static int
+ctfdiff_labels(ctf_file_t *ifp, ctf_file_t *ofp)
+{
+ int ret;
+ uint32_t nilabel, nolabel, i, j;
+ ctfdiff_label_t idl, odl;
+ const char **ilptr, **olptr;
+
+ nilabel = nolabel = 0;
+ ret = ctf_label_iter(ifp, ctfdiff_labels_count, &nilabel);
+ if (ret == CTF_ERR)
+ return (ret);
+ ret = ctf_label_iter(ofp, ctfdiff_labels_count, &nolabel);
+ if (ret == CTF_ERR)
+ return (ret);
+
+ if (nilabel != nolabel) {
+ (void) printf("ctf container %s labels differ from ctf "
+ "container %s\n", ctfdiff_fp_to_name(ifp),
+ ctfdiff_fp_to_name(ofp));
+ g_different = B_TRUE;
+ return (0);
+ }
+
+ if (nilabel == 0)
+ return (0);
+
+ ilptr = malloc(sizeof (char *) * nilabel);
+ olptr = malloc(sizeof (char *) * nolabel);
+ if (ilptr == NULL || olptr == NULL) {
+ ctfdiff_fatal("failed to allocate memory for label "
+ "comparison\n");
+ }
+
+ idl.dil_next = 0;
+ idl.dil_labels = ilptr;
+ odl.dil_next = 0;
+ odl.dil_labels = olptr;
+
+ if ((ret = ctf_label_iter(ifp, ctfdiff_labels_fill, &idl)) != 0)
+ goto out;
+ if ((ret = ctf_label_iter(ofp, ctfdiff_labels_fill, &odl)) != 0)
+ goto out;
+
+ for (i = 0; i < nilabel; i++) {
+ for (j = 0; j < nolabel; j++) {
+ if (strcmp(ilptr[i], olptr[j]) == 0)
+ break;
+ }
+
+ if (j == nolabel) {
+ (void) printf("ctf container %s labels differ from ctf "
+ "container %s\n", ctfdiff_fp_to_name(ifp),
+ ctfdiff_fp_to_name(ofp));
+ g_different = B_TRUE;
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ free(ilptr);
+ free(olptr);
+ return (ret);
+}
+
+static void
+ctfdiff_usage(const char *fmt, ...)
+{
+ if (fmt != NULL) {
+ va_list ap;
+
+ (void) fprintf(stderr, "%s: ", g_progname);
+ va_start(ap, fmt);
+ (void) vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ }
+
+ (void) fprintf(stderr, "Usage: %s [-afIloqt] [-F function] [-O object]"
+ "[-p parent] [-P parent]\n"
+ "\t[-T type] file1 file2\n"
+ "\n"
+ "\t-a diff label, types, objects, and functions\n"
+ "\t-f diff function type information\n"
+ "\t-F when diffing functions, only consider those named\n"
+ "\t-I ignore the names of integral types\n"
+ "\t-l diff CTF labels\n"
+ "\t-o diff global object type information\n"
+ "\t-O when diffing objects, only consider those named\n"
+ "\t-p set the CTF parent for file1\n"
+ "\t-P set the CTF parent for file2\n"
+ "\t-q set quiet mode (no diff information sent to stdout)\n"
+ "\t-t diff CTF type information\n"
+ "\t-T when diffing types, only consider those named\n",
+ g_progname);
+}
+
+int
+main(int argc, char *argv[])
+{
+ ctf_diff_flag_t flags = 0;
+ int err, c;
+ ctf_file_t *ifp, *ofp;
+ ctf_diff_t *cdp;
+ ctf_file_t *pifp = NULL;
+ ctf_file_t *pofp = NULL;
+
+ g_progname = basename(argv[0]);
+
+ while ((c = getopt(argc, argv, ":aqtfolIp:F:O:P:T:")) != -1) {
+ switch (c) {
+ case 'a':
+ g_flag |= CTF_DIFF_ALL;
+ break;
+ case 't':
+ g_flag |= CTF_DIFF_TYPES;
+ break;
+ case 'f':
+ g_flag |= CTF_DIFF_FUNCS;
+ break;
+ case 'o':
+ g_flag |= CTF_DIFF_OBJS;
+ break;
+ case 'l':
+ g_flag |= CTF_DIFF_LABEL;
+ break;
+ case 'q':
+ g_onlydiff = B_TRUE;
+ break;
+ case 'p':
+ pifp = ctf_open(optarg, &err);
+ if (pifp == NULL) {
+ ctfdiff_fatal("failed to open parent input "
+ "container %s: %s\n", optarg,
+ ctf_errmsg(err));
+ }
+ break;
+ case 'F':
+ if (g_nextfunc == g_nfuncs) {
+ if (g_nfuncs == 0)
+ g_nfuncs = 16;
+ else
+ g_nfuncs *= 2;
+ g_funclist = realloc(g_funclist,
+ sizeof (char *) * g_nfuncs);
+ if (g_funclist == NULL) {
+ ctfdiff_fatal("failed to allocate "
+ "memory for the %dth -F option: "
+ "%s\n", g_nexttype + 1,
+ strerror(errno));
+ }
+ }
+ g_funclist[g_nextfunc] = optarg;
+ g_nextfunc++;
+ break;
+ case 'O':
+ if (g_nextobj == g_nobjs) {
+ if (g_nobjs == 0)
+ g_nobjs = 16;
+ else
+ g_nobjs *= 2;
+ g_objlist = realloc(g_objlist,
+ sizeof (char *) * g_nobjs);
+ if (g_objlist == NULL) {
+ ctfdiff_fatal("failed to allocate "
+ "memory for the %dth -F option: "
+ "%s\n", g_nexttype + 1,
+ strerror(errno));
+ return (CTFDIFF_EXIT_ERROR);
+ }
+ }
+ g_objlist[g_nextobj] = optarg;
+ g_nextobj++;
+ break;
+ case 'I':
+ flags |= CTF_DIFF_F_IGNORE_INTNAMES;
+ break;
+ case 'P':
+ pofp = ctf_open(optarg, &err);
+ if (pofp == NULL) {
+ ctfdiff_fatal("failed to open parent output "
+ "container %s: %s\n", optarg,
+ ctf_errmsg(err));
+ }
+ break;
+ case 'T':
+ if (g_nexttype == g_ntypes) {
+ if (g_ntypes == 0)
+ g_ntypes = 16;
+ else
+ g_ntypes *= 2;
+ g_typelist = realloc(g_typelist,
+ sizeof (char *) * g_ntypes);
+ if (g_typelist == NULL) {
+ ctfdiff_fatal("failed to allocate "
+ "memory for the %dth -T option: "
+ "%s\n", g_nexttype + 1,
+ strerror(errno));
+ }
+ }
+ g_typelist[g_nexttype] = optarg;
+ g_nexttype++;
+ break;
+ case ':':
+ ctfdiff_usage("Option -%c requires an operand\n",
+ optopt);
+ return (CTFDIFF_EXIT_USAGE);
+ case '?':
+ ctfdiff_usage("Unknown option: -%c\n", optopt);
+ return (CTFDIFF_EXIT_USAGE);
+ }
+ }
+
+ argc -= optind - 1;
+ argv += optind - 1;
+
+ if (g_flag == 0)
+ g_flag = CTF_DIFF_DEFAULT;
+
+ if (argc != 3) {
+ ctfdiff_usage(NULL);
+ return (CTFDIFF_EXIT_USAGE);
+ }
+
+ if (g_nexttype != 0 && !(g_flag & CTF_DIFF_TYPES)) {
+ ctfdiff_usage("-T cannot be used if not diffing types\n");
+ return (CTFDIFF_EXIT_USAGE);
+ }
+
+ if (g_nextfunc != 0 && !(g_flag & CTF_DIFF_FUNCS)) {
+ ctfdiff_usage("-F cannot be used if not diffing functions\n");
+ return (CTFDIFF_EXIT_USAGE);
+ }
+
+ if (g_nextobj != 0 && !(g_flag & CTF_DIFF_OBJS)) {
+ ctfdiff_usage("-O cannot be used if not diffing objects\n");
+ return (CTFDIFF_EXIT_USAGE);
+ }
+
+ ifp = ctf_open(argv[1], &err);
+ if (ifp == NULL) {
+ ctfdiff_fatal("failed to open %s: %s\n", argv[1],
+ ctf_errmsg(err));
+ }
+ if (pifp != NULL) {
+ err = ctf_import(ifp, pifp);
+ if (err != 0) {
+ ctfdiff_fatal("failed to set parent container: %s\n",
+ ctf_errmsg(ctf_errno(pifp)));
+ }
+ }
+ g_iname = argv[1];
+ g_ifp = ifp;
+
+ ofp = ctf_open(argv[2], &err);
+ if (ofp == NULL) {
+ ctfdiff_fatal("failed to open %s: %s\n", argv[2],
+ ctf_errmsg(err));
+ }
+
+ if (pofp != NULL) {
+ err = ctf_import(ofp, pofp);
+ if (err != 0) {
+ ctfdiff_fatal("failed to set parent container: %s\n",
+ ctf_errmsg(ctf_errno(pofp)));
+ }
+ }
+ g_oname = argv[2];
+ g_ofp = ofp;
+
+ if (ctf_diff_init(ifp, ofp, &cdp) != 0) {
+ ctfdiff_fatal("failed to initialize libctf diff engine: %s\n",
+ ctf_errmsg(ctf_errno(ifp)));
+ }
+
+ if (ctf_diff_setflags(cdp, flags) != 0) {
+ ctfdiff_fatal("failed to set ctfdiff flags: %s\n",
+ ctf_errmsg(ctf_errno(ifp)));
+ }
+
+ err = 0;
+ if ((g_flag & CTF_DIFF_TYPES) && err != CTF_ERR)
+ err = ctf_diff_types(cdp, ctfdiff_cb, NULL);
+ if ((g_flag & CTF_DIFF_FUNCS) && err != CTF_ERR)
+ err = ctf_diff_functions(cdp, ctfdiff_func_cb, NULL);
+ if ((g_flag & CTF_DIFF_OBJS) && err != CTF_ERR)
+ err = ctf_diff_objects(cdp, ctfdiff_obj_cb, NULL);
+ if ((g_flag & CTF_DIFF_LABEL) && err != CTF_ERR)
+ err = ctfdiff_labels(ifp, ofp);
+
+ ctf_diff_fini(cdp);
+ if (err == CTF_ERR) {
+ ctfdiff_fatal("encountered a libctf error: %s!\n",
+ ctf_errmsg(ctf_errno(ifp)));
+ }
+
+ return (g_different == B_TRUE ? CTFDIFF_EXIT_DIFFERENT :
+ CTFDIFF_EXIT_SIMILAR);
+}
diff --git a/usr/src/cmd/ctfdump/Makefile b/usr/src/cmd/ctfdump/Makefile
new file mode 100644
index 0000000000..962ca43a8f
--- /dev/null
+++ b/usr/src/cmd/ctfdump/Makefile
@@ -0,0 +1,33 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2015, Joyent, Inc.
+#
+
+PROG= ctfdump
+
+include ../Makefile.cmd
+
+CFLAGS += $(CCVERBOSE)
+LDLIBS += -lctf
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+install: all $(ROOTPROG)
+
+clean:
+
+lint: lint_PROG
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/ctfdump/ctfdump.c b/usr/src/cmd/ctfdump/ctfdump.c
new file mode 100644
index 0000000000..d0ff63a02a
--- /dev/null
+++ b/usr/src/cmd/ctfdump/ctfdump.c
@@ -0,0 +1,815 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2015, Joyent, Inc.
+ */
+
+/*
+ * Dump information about CTF containers. This was inspired by the original
+ * ctfdump written in tools/ctf, but this has been reimplemented in terms of
+ * libctf.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <libctf.h>
+#include <libgen.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+
+typedef enum ctfdump_arg {
+ CTFDUMP_OBJECTS = 0x01,
+ CTFDUMP_FUNCTIONS = 0x02,
+ CTFDUMP_HEADER = 0x04,
+ CTFDUMP_LABELS = 0x08,
+ CTFDUMP_STRINGS = 0x10,
+ CTFDUMP_STATS = 0x20,
+ CTFDUMP_TYPES = 0x40,
+ CTFDUMP_DEFAULT = 0x7f,
+ CTFDUMP_OUTPUT = 0x80,
+ CTFDUMP_ALL = 0xff
+} ctfdump_arg_t;
+
+typedef struct ctfdump_stat {
+ ulong_t cs_ndata; /* number of data objects */
+ ulong_t cs_nfuncs; /* number of functions */
+ ulong_t cs_nfuncargs; /* number of function args */
+ ulong_t cs_nfuncmax; /* largest number of args */
+ ulong_t cs_ntypes[CTF_K_MAX]; /* number of types */
+ ulong_t cs_nsmembs; /* number of struct members */
+ ulong_t cs_nsmax; /* largest number of members */
+ ulong_t cs_structsz; /* sum of structures sizes */
+ ulong_t cs_sszmax; /* largest structure */
+ ulong_t cs_numembs; /* number of union members */
+ ulong_t cs_numax; /* largest number of members */
+ ulong_t cs_unionsz; /* sum of unions sizes */
+ ulong_t cs_uszmax; /* largest union */
+ ulong_t cs_nemembs; /* number of enum members */
+ ulong_t cs_nemax; /* largest number of members */
+ ulong_t cs_nstrings; /* number of strings */
+ ulong_t cs_strsz; /* string size */
+ ulong_t cs_strmax; /* longest string */
+} ctfdump_stat_t;
+
+static const char *g_progname;
+static ctfdump_arg_t g_dump;
+static ctf_file_t *g_fp;
+static ctfdump_stat_t g_stats;
+static ctf_id_t *g_fargc;
+static int g_nfargc;
+
+static int g_exit = 0;
+
+static const char *ctfdump_fpenc[] = {
+ NULL,
+ "SINGLE",
+ "DOUBLE",
+ "COMPLEX",
+ "DCOMPLEX",
+ "LDCOMPLEX",
+ "LDOUBLE",
+ "INTERVAL",
+ "DINTERVAL",
+ "LDINTERVAL",
+ "IMAGINARY",
+ "DIMAGINARY",
+ "LDIMAGINARY"
+};
+
+/*
+ * When stats are requested, we have to go through everything. To make our lives
+ * easier, we'll just always allow the code to print everything out, but only
+ * output it if we have actually enabled that section.
+ */
+static void
+ctfdump_printf(ctfdump_arg_t arg, const char *fmt, ...)
+{
+ va_list ap;
+
+ if ((arg & g_dump) == 0)
+ return;
+
+ va_start(ap, fmt);
+ vfprintf(stdout, fmt, ap);
+ va_end(ap);
+}
+
+static void
+ctfdump_warn(const char *fmt, ...)
+{
+ va_list ap;
+
+ (void) fprintf(stderr, "%s: ", g_progname);
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+}
+
+static void
+ctfdump_fatal(const char *fmt, ...)
+{
+ va_list ap;
+
+ (void) fprintf(stderr, "%s: ", g_progname);
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+
+ exit(1);
+}
+
+static void
+ctfdump_usage(const char *fmt, ...)
+{
+ if (fmt != NULL) {
+ va_list ap;
+ (void) fprintf(stderr, "%s: ", g_progname);
+ va_start(ap, fmt);
+ (void) vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ }
+
+ (void) fprintf(stderr, "Usage: %s [-dfhlsSt] [-p parent] [-u outfile] "
+ "file\n"
+ "\n"
+ "\t-d dump object data\n"
+ "\t-f dump function data\n"
+ "\t-h dump the CTF header\n"
+ "\t-l dump the label table\n"
+ "\t-p use parent to supply additional information\n"
+ "\t-s dump the string table\n"
+ "\t-S dump statistics about the CTF container\n"
+ "\t-t dump type information\n"
+ "\t-u dump uncompressed CTF data to outfile\n",
+ g_progname);
+}
+
+static void
+ctfdump_title(ctfdump_arg_t arg, const char *header)
+{
+ static const char line[] = "----------------------------------------"
+ "----------------------------------------";
+ ctfdump_printf(arg, "\n- %s %.*s\n\n", header, (int)78 - strlen(header),
+ line);
+}
+
+static int
+ctfdump_objects_cb(const char *name, ctf_id_t id, ulong_t symidx, void *arg)
+{
+ int len;
+
+ len = snprintf(NULL, 0, " [%u] %u", g_stats.cs_ndata, id);
+ ctfdump_printf(CTFDUMP_OBJECTS, " [%u] %u %*s%s (%u)\n",
+ g_stats.cs_ndata, id, MAX(15 - len, 0), "", name, symidx);
+ g_stats.cs_ndata++;
+ return (0);
+}
+
+static void
+ctfdump_objects(void)
+{
+ ctfdump_title(CTFDUMP_OBJECTS, "Data Objects");
+ if (ctf_object_iter(g_fp, ctfdump_objects_cb, NULL) == CTF_ERR) {
+ ctfdump_warn("failed to dump objects: %s\n",
+ ctf_errmsg(ctf_errno(g_fp)));
+ g_exit = 1;
+ }
+}
+
+static void
+ctfdump_fargs_grow(int nargs)
+{
+ if (g_nfargc < nargs) {
+ g_fargc = realloc(g_fargc, sizeof (ctf_id_t) * nargs);
+ if (g_fargc == NULL)
+ ctfdump_fatal("failed to get memory for %d "
+ "ctf_id_t's\n", nargs);
+ g_nfargc = nargs;
+ }
+}
+
+static int
+ctfdump_functions_cb(const char *name, ulong_t symidx, ctf_funcinfo_t *ctc,
+ void *arg)
+{
+ int i;
+
+ if (ctc->ctc_argc != 0) {
+ ctfdump_fargs_grow(ctc->ctc_argc);
+ if (ctf_func_args(g_fp, symidx, g_nfargc, g_fargc) == CTF_ERR)
+ ctfdump_fatal("failed to get arguments for function "
+ "%s: %s\n", name, ctf_errmsg(ctf_errno(g_fp)));
+ }
+
+ ctfdump_printf(CTFDUMP_FUNCTIONS,
+ " [%lu] %s (%lu) returns: %u args: (", g_stats.cs_nfuncs, name,
+ symidx, ctc->ctc_return);
+ for (i = 0; i < ctc->ctc_argc; i++)
+ ctfdump_printf(CTFDUMP_FUNCTIONS, "%lu%s", g_fargc[i],
+ i + 1 == ctc->ctc_argc ? "" : ", ");
+ if (ctc->ctc_flags & CTF_FUNC_VARARG)
+ ctfdump_printf(CTFDUMP_FUNCTIONS, "%s...",
+ ctc->ctc_argc == 0 ? "" : ", ");
+ ctfdump_printf(CTFDUMP_FUNCTIONS, ")\n");
+
+ g_stats.cs_nfuncs++;
+ g_stats.cs_nfuncargs += ctc->ctc_argc;
+ g_stats.cs_nfuncmax = MAX(ctc->ctc_argc, g_stats.cs_nfuncmax);
+
+ return (0);
+}
+
+static void
+ctfdump_functions(void)
+{
+ ctfdump_title(CTFDUMP_FUNCTIONS, "Functions");
+
+ if (ctf_function_iter(g_fp, ctfdump_functions_cb, NULL) == CTF_ERR) {
+ ctfdump_warn("failed to dump functions: %s\n",
+ ctf_errmsg(ctf_errno(g_fp)));
+ g_exit = 1;
+ }
+}
+
+static void
+ctfdump_header(void)
+{
+ const ctf_header_t *hp;
+ const char *parname, *parlabel;
+
+ ctfdump_title(CTFDUMP_HEADER, "CTF Header");
+ ctf_dataptr(g_fp, (const void **)&hp, NULL);
+ ctfdump_printf(CTFDUMP_HEADER, " cth_magic = 0x%04x\n",
+ hp->cth_magic);
+ ctfdump_printf(CTFDUMP_HEADER, " cth_version = %u\n",
+ hp->cth_version);
+ ctfdump_printf(CTFDUMP_HEADER, " cth_flags = 0x%02x\n",
+ ctf_flags(g_fp));
+ parname = ctf_parent_name(g_fp);
+ parlabel = ctf_parent_label(g_fp);
+ ctfdump_printf(CTFDUMP_HEADER, " cth_parlabel = %s\n",
+ parlabel == NULL ? "(anon)" : parlabel);
+ ctfdump_printf(CTFDUMP_HEADER, " cth_parname = %s\n",
+ parname == NULL ? "(anon)" : parname);
+ ctfdump_printf(CTFDUMP_HEADER, " cth_lbloff = %u\n",
+ hp->cth_lbloff);
+ ctfdump_printf(CTFDUMP_HEADER, " cth_objtoff = %u\n",
+ hp->cth_objtoff);
+ ctfdump_printf(CTFDUMP_HEADER, " cth_funcoff = %u\n",
+ hp->cth_funcoff);
+ ctfdump_printf(CTFDUMP_HEADER, " cth_typeoff = %u\n",
+ hp->cth_typeoff);
+ ctfdump_printf(CTFDUMP_HEADER, " cth_stroff = %u\n",
+ hp->cth_stroff);
+ ctfdump_printf(CTFDUMP_HEADER, " cth_strlen = %u\n",
+ hp->cth_strlen);
+}
+
+static int
+ctfdump_labels_cb(const char *name, const ctf_lblinfo_t *li, void *arg)
+{
+ ctfdump_printf(CTFDUMP_LABELS, " %5lu %s\n", li->ctb_typeidx, name);
+ return (0);
+}
+
+static void
+ctfdump_labels(void)
+{
+ ctfdump_title(CTFDUMP_LABELS, "Label Table");
+ if (ctf_label_iter(g_fp, ctfdump_labels_cb, NULL) == CTF_ERR) {
+ ctfdump_warn("failed to dump labels: %s\n",
+ ctf_errmsg(ctf_errno(g_fp)));
+ g_exit = 1;
+ }
+}
+
+static int
+ctfdump_strings_cb(const char *s, void *arg)
+{
+ size_t len = strlen(s) + 1;
+ ulong_t *stroff = arg;
+ ctfdump_printf(CTFDUMP_STRINGS, " [%lu] %s\n", *stroff,
+ *s == '\0' ? "\\0" : s);
+ *stroff = *stroff + len;
+ g_stats.cs_nstrings++;
+ g_stats.cs_strsz += len;
+ g_stats.cs_strmax = MAX(g_stats.cs_strmax, len);
+ return (0);
+}
+
+static void
+ctfdump_strings(void)
+{
+ ulong_t stroff = 0;
+
+ ctfdump_title(CTFDUMP_STRINGS, "String Table");
+ if (ctf_string_iter(g_fp, ctfdump_strings_cb, &stroff) == CTF_ERR) {
+ ctfdump_warn("failed to dump strings: %s\n",
+ ctf_errmsg(ctf_errno(g_fp)));
+ g_exit = 1;
+ }
+}
+
+static void
+ctfdump_stat_int(const char *name, ulong_t value)
+{
+ ctfdump_printf(CTFDUMP_STATS, " %-36s= %lu\n", name, value);
+}
+
+static void
+ctfdump_stat_fp(const char *name, float value)
+{
+ ctfdump_printf(CTFDUMP_STATS, " %-36s= %.2f\n", name, value);
+}
+
+static void
+ctfdump_stats(void)
+{
+ int i;
+ ulong_t sum;
+
+ ctfdump_title(CTFDUMP_STATS, "CTF Statistics");
+
+ ctfdump_stat_int("total number of data objects", g_stats.cs_ndata);
+ ctfdump_printf(CTFDUMP_STATS, "\n");
+ ctfdump_stat_int("total number of functions", g_stats.cs_nfuncs);
+ ctfdump_stat_int("total number of function arguments",
+ g_stats.cs_nfuncargs);
+ ctfdump_stat_int("maximum argument list length", g_stats.cs_nfuncmax);
+ if (g_stats.cs_nfuncs != 0)
+ ctfdump_stat_fp("average argument list length",
+ (float)g_stats.cs_nfuncargs / (float)g_stats.cs_nfuncs);
+ ctfdump_printf(CTFDUMP_STATS, "\n");
+
+ sum = 0;
+ for (i = 0; i < CTF_K_MAX; i++)
+ sum += g_stats.cs_ntypes[i];
+ ctfdump_stat_int("total number of types", sum);
+ ctfdump_stat_int("total number of integers",
+ g_stats.cs_ntypes[CTF_K_INTEGER]);
+ ctfdump_stat_int("total number of floats",
+ g_stats.cs_ntypes[CTF_K_FLOAT]);
+ ctfdump_stat_int("total number of pointers",
+ g_stats.cs_ntypes[CTF_K_POINTER]);
+ ctfdump_stat_int("total number of arrays",
+ g_stats.cs_ntypes[CTF_K_ARRAY]);
+ ctfdump_stat_int("total number of func types",
+ g_stats.cs_ntypes[CTF_K_FUNCTION]);
+ ctfdump_stat_int("total number of structs",
+ g_stats.cs_ntypes[CTF_K_STRUCT]);
+ ctfdump_stat_int("total number of unions",
+ g_stats.cs_ntypes[CTF_K_UNION]);
+ ctfdump_stat_int("total number of enums",
+ g_stats.cs_ntypes[CTF_K_ENUM]);
+ ctfdump_stat_int("total number of forward tags",
+ g_stats.cs_ntypes[CTF_K_FORWARD]);
+ ctfdump_stat_int("total number of typedefs",
+ g_stats.cs_ntypes[CTF_K_TYPEDEF]);
+ ctfdump_stat_int("total number of volatile types",
+ g_stats.cs_ntypes[CTF_K_VOLATILE]);
+ ctfdump_stat_int("total number of const types",
+ g_stats.cs_ntypes[CTF_K_CONST]);
+ ctfdump_stat_int("total number of restrict types",
+ g_stats.cs_ntypes[CTF_K_RESTRICT]);
+ ctfdump_stat_int("total number of unknowns (holes)",
+ g_stats.cs_ntypes[CTF_K_UNKNOWN]);
+
+ ctfdump_printf(CTFDUMP_STATS, "\n");
+ ctfdump_stat_int("total number of struct members", g_stats.cs_nsmembs);
+ ctfdump_stat_int("maximum number of struct members", g_stats.cs_nsmax);
+ ctfdump_stat_int("total size of all structs", g_stats.cs_structsz);
+ ctfdump_stat_int("maximum size of a struct", g_stats.cs_sszmax);
+ if (g_stats.cs_ntypes[CTF_K_STRUCT] != 0) {
+ ctfdump_stat_fp("average number of struct members",
+ (float)g_stats.cs_nsmembs /
+ (float)g_stats.cs_ntypes[CTF_K_STRUCT]);
+ ctfdump_stat_fp("average size of a struct",
+ (float)g_stats.cs_structsz /
+ (float)g_stats.cs_ntypes[CTF_K_STRUCT]);
+ }
+ ctfdump_printf(CTFDUMP_STATS, "\n");
+ ctfdump_stat_int("total number of union members", g_stats.cs_numembs);
+ ctfdump_stat_int("maximum number of union members", g_stats.cs_numax);
+ ctfdump_stat_int("total size of all unions", g_stats.cs_unionsz);
+ ctfdump_stat_int("maximum size of a union", g_stats.cs_uszmax);
+ if (g_stats.cs_ntypes[CTF_K_UNION] != 0) {
+ ctfdump_stat_fp("average number of union members",
+ (float)g_stats.cs_numembs /
+ (float)g_stats.cs_ntypes[CTF_K_UNION]);
+ ctfdump_stat_fp("average size of a union",
+ (float)g_stats.cs_unionsz /
+ (float)g_stats.cs_ntypes[CTF_K_UNION]);
+ }
+ ctfdump_printf(CTFDUMP_STATS, "\n");
+
+ ctfdump_stat_int("total number of enum members", g_stats.cs_nemembs);
+ ctfdump_stat_int("maximum number of enum members", g_stats.cs_nemax);
+ if (g_stats.cs_ntypes[CTF_K_ENUM] != 0) {
+ ctfdump_stat_fp("average number of enum members",
+ (float)g_stats.cs_nemembs /
+ (float)g_stats.cs_ntypes[CTF_K_ENUM]);
+ }
+ ctfdump_printf(CTFDUMP_STATS, "\n");
+
+ ctfdump_stat_int("total number of strings", g_stats.cs_nstrings);
+ ctfdump_stat_int("bytes of string data", g_stats.cs_strsz);
+ ctfdump_stat_int("maximum string length", g_stats.cs_strmax);
+ if (g_stats.cs_nstrings != 0)
+ ctfdump_stat_fp("average string length",
+ (float)g_stats.cs_strsz / (float)g_stats.cs_nstrings);
+ ctfdump_printf(CTFDUMP_STATS, "\n");
+}
+
+static void
+ctfdump_intenc_name(ctf_encoding_t *cte, char *buf, int len)
+{
+ int off = 0;
+ boolean_t space = B_FALSE;
+
+ if (cte->cte_format == 0 || (cte->cte_format &
+ ~(CTF_INT_SIGNED | CTF_INT_CHAR | CTF_INT_BOOL |
+ CTF_INT_VARARGS)) != 0) {
+ (void) snprintf(buf, len, "0x%x", cte->cte_format);
+ return;
+ }
+
+ if (cte->cte_format & CTF_INT_SIGNED) {
+ off += snprintf(buf + off, MAX(len - off, 0), "%sSIGNED",
+ space == B_TRUE ? " " : "");
+ space = B_TRUE;
+ }
+
+ if (cte->cte_format & CTF_INT_CHAR) {
+ off += snprintf(buf + off, MAX(len - off, 0), "%sCHAR",
+ space == B_TRUE ? " " : "");
+ space = B_TRUE;
+ }
+
+ if (cte->cte_format & CTF_INT_BOOL) {
+ off += snprintf(buf + off, MAX(len - off, 0), "%sBOOL",
+ space == B_TRUE ? " " : "");
+ space = B_TRUE;
+ }
+
+ if (cte->cte_format & CTF_INT_VARARGS) {
+ off += snprintf(buf + off, MAX(len - off, 0), "%sVARARGS",
+ space == B_TRUE ? " " : "");
+ space = B_TRUE;
+ }
+}
+
+static int
+ctfdump_member_cb(const char *member, ctf_id_t type, ulong_t off, void *arg)
+{
+ int *count = arg;
+ ctfdump_printf(CTFDUMP_TYPES, "\t%s type=%lu off=%lu\n", member, type,
+ off);
+ *count = *count + 1;
+ return (0);
+}
+
+static int
+ctfdump_enum_cb(const char *name, int value, void *arg)
+{
+ int *count = arg;
+ ctfdump_printf(CTFDUMP_TYPES, "\t%s = %d\n", name, value);
+ *count = *count + 1;
+ return (0);
+}
+
+static int
+ctfdump_types_cb(ctf_id_t id, boolean_t root, void *arg)
+{
+ int kind, i, count;
+ ctf_id_t ref;
+ char name[512], ienc[128];
+ const char *encn;
+ ctf_funcinfo_t ctc;
+ ctf_arinfo_t ar;
+ ctf_encoding_t cte;
+ ssize_t size;
+
+ if ((kind = ctf_type_kind(g_fp, id)) == CTF_ERR)
+ ctfdump_fatal("encountered malformed ctf, type %s does not "
+ "have a kind: %s\n", name, ctf_errmsg(ctf_errno(g_fp)));
+
+ if (ctf_type_name(g_fp, id, name, sizeof (name)) == NULL) {
+ if (ctf_errno(g_fp) != ECTF_NOPARENT)
+ ctfdump_fatal("type %lu missing name: %s\n", id,
+ ctf_errmsg(ctf_errno(g_fp)));
+ (void) snprintf(name, sizeof (name), "(unknown %s)",
+ ctf_kind_name(g_fp, kind));
+ }
+
+ g_stats.cs_ntypes[kind]++;
+ if (root == B_TRUE)
+ ctfdump_printf(CTFDUMP_TYPES, " <%lu> ", id);
+ else
+ ctfdump_printf(CTFDUMP_TYPES, " [%lu] ", id);
+
+ switch (kind) {
+ case CTF_K_UNKNOWN:
+ break;
+ case CTF_K_INTEGER:
+ if (ctf_type_encoding(g_fp, id, &cte) == CTF_ERR)
+ ctfdump_fatal("failed to get encoding information "
+ "for %s: %s\n", name, ctf_errmsg(ctf_errno(g_fp)));
+ ctfdump_intenc_name(&cte, ienc, sizeof (ienc));
+ ctfdump_printf(CTFDUMP_TYPES,
+ "%s encoding=%s offset=%u bits=%u",
+ name, ienc, cte.cte_offset, cte.cte_bits);
+ break;
+ case CTF_K_FLOAT:
+ if (ctf_type_encoding(g_fp, id, &cte) == CTF_ERR)
+ ctfdump_fatal("failed to get encoding information "
+ "for %s: %s\n", name, ctf_errmsg(ctf_errno(g_fp)));
+ if (cte.cte_format < 1 || cte.cte_format > 12)
+ encn = "unknown";
+ else
+ encn = ctfdump_fpenc[cte.cte_format];
+ ctfdump_printf(CTFDUMP_TYPES, "%s encoding=%s offset=%u "
+ "bits=%u", name, encn, cte.cte_offset, cte.cte_bits);
+ break;
+ case CTF_K_POINTER:
+ if ((ref = ctf_type_reference(g_fp, id)) == CTF_ERR)
+ ctfdump_fatal("failed to get reference type for %s: "
+ "%s\n", name, ctf_errmsg(ctf_errno(g_fp)));
+ ctfdump_printf(CTFDUMP_TYPES, "%s refers to %lu", name,
+ ref);
+ break;
+ case CTF_K_ARRAY:
+ if (ctf_array_info(g_fp, id, &ar) == CTF_ERR)
+ ctfdump_fatal("failed to get array information for "
+ "%s: %s\n", name, ctf_errmsg(ctf_errno(g_fp)));
+ ctfdump_printf(CTFDUMP_TYPES, "%s contents: %lu, index: %lu",
+ name, ar.ctr_contents, ar.ctr_index);
+ break;
+ case CTF_K_FUNCTION:
+ if (ctf_func_info_by_id(g_fp, id, &ctc) == CTF_ERR)
+ ctfdump_fatal("failed to get function info for %s: "
+ "%s\n", name, ctf_errmsg(ctf_errno(g_fp)));
+ if (ctc.ctc_argc > 0) {
+ ctfdump_fargs_grow(ctc.ctc_argc);
+ if (ctf_func_args_by_id(g_fp, id, g_nfargc, g_fargc) ==
+ CTF_ERR)
+ ctfdump_fatal("failed to get function "
+ "arguments for %s: %s\n", name,
+ ctf_errmsg(ctf_errno(g_fp)));
+ }
+ ctfdump_printf(CTFDUMP_TYPES,
+ "%s returns: %lu args: (", name, ctc.ctc_return);
+ for (i = 0; i < ctc.ctc_argc; i++) {
+ ctfdump_printf(CTFDUMP_TYPES, "%lu%s", g_fargc[i],
+ i + 1 == ctc.ctc_argc ? "" : ", ");
+ }
+ if (ctc.ctc_flags & CTF_FUNC_VARARG)
+ ctfdump_printf(CTFDUMP_TYPES, "%s...",
+ ctc.ctc_argc == 0 ? "" : ", ");
+ ctfdump_printf(CTFDUMP_TYPES, ")");
+ break;
+ case CTF_K_STRUCT:
+ case CTF_K_UNION:
+ size = ctf_type_size(g_fp, id);
+ if (size == CTF_ERR)
+ ctfdump_fatal("failed to get size of %s: %s\n", name,
+ ctf_errmsg(ctf_errno(g_fp)));
+ ctfdump_printf(CTFDUMP_TYPES, "%s (%d bytes)\n", name, size);
+ count = 0;
+ if (ctf_member_iter(g_fp, id, ctfdump_member_cb, &count) != 0)
+ ctfdump_fatal("failed to iterate members of %s: %s\n",
+ name, ctf_errmsg(ctf_errno(g_fp)));
+ if (kind == CTF_K_STRUCT) {
+ g_stats.cs_nsmembs += count;
+ g_stats.cs_nsmax = MAX(count, g_stats.cs_nsmax);
+ g_stats.cs_structsz += size;
+ g_stats.cs_sszmax = MAX(size, g_stats.cs_sszmax);
+ } else {
+ g_stats.cs_numembs += count;
+ g_stats.cs_numax = MAX(count, g_stats.cs_numax);
+ g_stats.cs_unionsz += size;
+ g_stats.cs_uszmax = MAX(count, g_stats.cs_uszmax);
+ }
+ break;
+ case CTF_K_ENUM:
+ ctfdump_printf(CTFDUMP_TYPES, "%s\n", name);
+ count = 0;
+ if (ctf_enum_iter(g_fp, id, ctfdump_enum_cb, &count) != 0)
+ ctfdump_fatal("failed to iterate enumerators of %s: "
+ "%s\n", name, ctf_errmsg(ctf_errno(g_fp)));
+ g_stats.cs_nemembs += count;
+ g_stats.cs_nemax = MAX(g_stats.cs_nemax, count);
+ break;
+ case CTF_K_FORWARD:
+ ctfdump_printf(CTFDUMP_TYPES, "forward %s\n", name);
+ break;
+ case CTF_K_TYPEDEF:
+ if ((ref = ctf_type_reference(g_fp, id)) == CTF_ERR)
+ ctfdump_fatal("failed to get reference type for %s: "
+ "%s\n", name, ctf_errmsg(ctf_errno(g_fp)));
+ ctfdump_printf(CTFDUMP_TYPES, "typedef %s refers to %lu", name,
+ ref);
+ break;
+ case CTF_K_VOLATILE:
+ if ((ref = ctf_type_reference(g_fp, id)) == CTF_ERR)
+ ctfdump_fatal("failed to get reference type for %s: "
+ "%s\n", name, ctf_errmsg(ctf_errno(g_fp)));
+ ctfdump_printf(CTFDUMP_TYPES, "%s refers to %lu", name,
+ ref);
+ break;
+ case CTF_K_CONST:
+ if ((ref = ctf_type_reference(g_fp, id)) == CTF_ERR)
+ ctfdump_fatal("failed to get reference type for %s: "
+ "%s\n", name, ctf_errmsg(ctf_errno(g_fp)));
+ ctfdump_printf(CTFDUMP_TYPES, "%s refers to %lu", name,
+ ref);
+ break;
+ case CTF_K_RESTRICT:
+ if ((ref = ctf_type_reference(g_fp, id)) == CTF_ERR)
+ ctfdump_fatal("failed to get reference type for %s: "
+ "%s\n", name, ctf_errmsg(ctf_errno(g_fp)));
+ ctfdump_printf(CTFDUMP_TYPES, "%s refers to %lu", name,
+ ref);
+ break;
+ default:
+ ctfdump_fatal("encountered unknown kind for type %s: %d\n",
+ name, kind);
+ }
+
+ ctfdump_printf(CTFDUMP_TYPES, "\n");
+
+ return (0);
+}
+
+static void
+ctfdump_types(void)
+{
+ ctfdump_title(CTFDUMP_TYPES, "Types");
+
+ if (ctf_type_iter(g_fp, B_TRUE, ctfdump_types_cb, NULL) == CTF_ERR) {
+ ctfdump_warn("failed to dump labels: %s\n",
+ ctf_errmsg(ctf_errno(g_fp)));
+ g_exit = 1;
+ }
+}
+
+static void
+ctfdump_output(const char *out)
+{
+ int fd, ret;
+ const void *data;
+ size_t len;
+
+ ctf_dataptr(g_fp, &data, &len);
+ if ((fd = open(out, O_WRONLY | O_CREAT | O_TRUNC, 0666)) < 0)
+ ctfdump_fatal("failed to open output file %s: %s\n", out,
+ strerror(errno));
+
+ while (len > 0) {
+ ret = write(fd, data, len);
+ if (ret == -1 && errno == EINTR)
+ continue;
+ else if (ret == -1 && (errno == EFAULT || errno == EBADF))
+ abort();
+ else if (ret == -1)
+ ctfdump_fatal("failed to write to %s: %s\n", out,
+ strerror(errno));
+ data += ret;
+ len -= ret;
+ }
+
+ do {
+ ret = close(fd);
+ } while (ret == -1 && errno == EINTR);
+ if (ret != 0 && errno == EBADF)
+ abort();
+ if (ret != 0)
+ ctfdump_fatal("failed to close %s: %s\n", out, strerror(errno));
+}
+
+int
+main(int argc, char *argv[])
+{
+ int c, fd, err;
+ const char *ufile = NULL, *parent = NULL;
+
+ g_progname = basename(argv[0]);
+ while ((c = getopt(argc, argv, ":dfhlp:sStu:")) != -1) {
+ switch (c) {
+ case 'd':
+ g_dump |= CTFDUMP_OBJECTS;
+ break;
+ case 'f':
+ g_dump |= CTFDUMP_FUNCTIONS;
+ break;
+ case 'h':
+ g_dump |= CTFDUMP_HEADER;
+ break;
+ case 'l':
+ g_dump |= CTFDUMP_LABELS;
+ break;
+ case 'p':
+ parent = optarg;
+ break;
+ case 's':
+ g_dump |= CTFDUMP_STRINGS;
+ break;
+ case 'S':
+ g_dump |= CTFDUMP_STATS;
+ break;
+ case 't':
+ g_dump |= CTFDUMP_TYPES;
+ break;
+ case 'u':
+ g_dump |= CTFDUMP_OUTPUT;
+ ufile = optarg;
+ break;
+ case '?':
+ ctfdump_usage("Unknown option: -%c\n", optopt);
+ return (2);
+ case ':':
+ ctfdump_usage("Option -%c requires an operand\n",
+ optopt);
+ return (2);
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ /*
+ * Dump all information by default.
+ */
+ if (g_dump == 0)
+ g_dump = CTFDUMP_DEFAULT;
+
+ if (argc != 1) {
+ ctfdump_usage("no file to dump\n");
+ return (2);
+ }
+
+ if ((fd = open(argv[0], O_RDONLY)) < 0)
+ ctfdump_fatal("failed to open file %s: %s\n", argv[0],
+ strerror(errno));
+
+ g_fp = ctf_fdopen(fd, &err);
+ if (g_fp == NULL)
+ ctfdump_fatal("failed to open file %s: %s\n", argv[0],
+ ctf_errmsg(err));
+
+ if (parent != NULL) {
+ ctf_file_t *pfp = ctf_open(parent, &err);
+
+ if (pfp == NULL)
+ ctfdump_fatal("failed to open parent file %s: %s\n",
+ parent, ctf_errmsg(err));
+ if (ctf_import(g_fp, pfp) != 0)
+ ctfdump_fatal("failed to import parent %s: %s\n",
+ parent, ctf_errmsg(ctf_errno(g_fp)));
+ }
+
+ /*
+ * If stats is set, we must run through everything exect CTFDUMP_OUTPUT.
+ * We also do CTFDUMP_STATS last as a result.
+ */
+ if (g_dump & CTFDUMP_HEADER)
+ ctfdump_header();
+
+ if (g_dump & (CTFDUMP_LABELS | CTFDUMP_STATS))
+ ctfdump_labels();
+
+ if (g_dump & (CTFDUMP_OBJECTS | CTFDUMP_STATS))
+ ctfdump_objects();
+
+ if (g_dump & (CTFDUMP_FUNCTIONS | CTFDUMP_STATS))
+ ctfdump_functions();
+
+ if (g_dump & (CTFDUMP_TYPES | CTFDUMP_STATS))
+ ctfdump_types();
+
+ if (g_dump & (CTFDUMP_STRINGS | CTFDUMP_STATS))
+ ctfdump_strings();
+
+ if (g_dump & CTFDUMP_STATS)
+ ctfdump_stats();
+
+ if (g_dump & CTFDUMP_OUTPUT)
+ ctfdump_output(ufile);
+
+ return (g_exit);
+}
diff --git a/usr/src/cmd/ctfmerge/Makefile b/usr/src/cmd/ctfmerge/Makefile
new file mode 100644
index 0000000000..02ead98687
--- /dev/null
+++ b/usr/src/cmd/ctfmerge/Makefile
@@ -0,0 +1,33 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2015, Joyent, Inc.
+#
+
+PROG= ctfmerge
+
+include ../Makefile.cmd
+
+CFLAGS += $(CCVERBOSE)
+LDLIBS += -lctf -lelf
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+install: all $(ROOTPROG)
+
+clean:
+
+lint: lint_PROG
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/ctfmerge/ctfmerge.c b/usr/src/cmd/ctfmerge/ctfmerge.c
new file mode 100644
index 0000000000..5dea32b3aa
--- /dev/null
+++ b/usr/src/cmd/ctfmerge/ctfmerge.c
@@ -0,0 +1,538 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2015, Joyent, Inc.
+ */
+
+/*
+ * merge CTF containers
+ */
+
+#include <stdio.h>
+#include <libctf.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <assert.h>
+#include <unistd.h>
+#include <sys/fcntl.h>
+#include <stdlib.h>
+#include <libelf.h>
+#include <gelf.h>
+#include <sys/mman.h>
+#include <libgen.h>
+#include <stdarg.h>
+#include <limits.h>
+
+static char *g_progname;
+static char *g_unique;
+static char *g_outfile;
+static boolean_t g_req;
+static uint_t g_nctf;
+
+#define CTFMERGE_OK 0
+#define CTFMERGE_FATAL 1
+#define CTFMERGE_USAGE 2
+
+#define CTFMERGE_DEFAULT_NTHREADS 8
+#define CTFMERGE_ALTEXEC "CTFMERGE_ALTEXEC"
+
+static void
+ctfmerge_fatal(const char *fmt, ...)
+{
+ va_list ap;
+
+ (void) fprintf(stderr, "%s: ", g_progname);
+ va_start(ap, fmt);
+ (void) vfprintf(stderr, fmt, ap);
+ va_end(ap);
+
+ if (g_outfile != NULL)
+ (void) unlink(g_outfile);
+
+ exit(CTFMERGE_FATAL);
+}
+
+static boolean_t
+ctfmerge_expect_ctf(const char *name, Elf *elf)
+{
+ Elf_Scn *scn, *strscn;
+ Elf_Data *data, *strdata;
+ GElf_Shdr shdr;
+ ulong_t i;
+
+ if (g_req == B_FALSE)
+ return (B_FALSE);
+
+ scn = NULL;
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
+ if (gelf_getshdr(scn, &shdr) == NULL) {
+ ctfmerge_fatal("failed to get section header for file "
+ "%s: %s\n", name, elf_errmsg(elf_errno()));
+ }
+
+ if (shdr.sh_type == SHT_SYMTAB)
+ break;
+ }
+
+ if (scn == NULL)
+ return (B_FALSE);
+
+ if ((strscn = elf_getscn(elf, shdr.sh_link)) == NULL)
+ ctfmerge_fatal("failed to get section header for file %s: %s\n",
+ name, elf_errmsg(elf_errno()));
+
+ if ((data = elf_getdata(scn, NULL)) == NULL)
+ ctfmerge_fatal("failed to read symbol table for %s: %s\n",
+ name, elf_errmsg(elf_errno()));
+
+ if ((strdata = elf_getdata(strscn, NULL)) == NULL)
+ ctfmerge_fatal("failed to read string table for %s: %s\n",
+ name, elf_errmsg(elf_errno()));
+
+ for (i = 0; i < shdr.sh_size / shdr.sh_entsize; i++) {
+ GElf_Sym sym;
+ const char *file;
+ size_t len;
+
+ if (gelf_getsym(data, i, &sym) == NULL)
+ ctfmerge_fatal("failed to read symbol table entry %d "
+ "for %s: %s\n", i, name, elf_errmsg(elf_errno()));
+
+ if (GELF_ST_TYPE(sym.st_info) != STT_FILE)
+ continue;
+
+ file = (const char *)((uintptr_t)strdata->d_buf + sym.st_name);
+ len = strlen(file);
+ if (len < 2 || name[len - 2] != '.')
+ continue;
+
+ if (name[len - 1] == 'c')
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+/*
+ * Go through and construct enough information for this Elf Object to try and do
+ * a ctf_bufopen().
+ */
+static void
+ctfmerge_elfopen(const char *name, Elf *elf, ctf_merge_t *cmh)
+{
+ GElf_Ehdr ehdr;
+ GElf_Shdr shdr;
+ Elf_Scn *scn;
+ Elf_Data *ctf_data, *str_data, *sym_data;
+ ctf_sect_t ctfsect, symsect, strsect;
+ ctf_file_t *fp;
+ int err;
+
+ if (gelf_getehdr(elf, &ehdr) == NULL)
+ ctfmerge_fatal("failed to get ELF header for %s: %s\n",
+ name, elf_errmsg(elf_errno()));
+
+ bzero(&ctfsect, sizeof (ctf_sect_t));
+ bzero(&symsect, sizeof (ctf_sect_t));
+ bzero(&strsect, sizeof (ctf_sect_t));
+
+ scn = NULL;
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
+ const char *sname;
+
+ if (gelf_getshdr(scn, &shdr) == NULL)
+ ctfmerge_fatal("failed to get section header for "
+ "file %s: %s\n", name, elf_errmsg(elf_errno()));
+
+ sname = elf_strptr(elf, ehdr.e_shstrndx, shdr.sh_name);
+ if (shdr.sh_type == SHT_PROGBITS &&
+ strcmp(sname, ".SUNW_ctf") == 0) {
+ ctfsect.cts_name = sname;
+ ctfsect.cts_type = shdr.sh_type;
+ ctfsect.cts_flags = shdr.sh_flags;
+ ctfsect.cts_size = shdr.sh_size;
+ ctfsect.cts_entsize = shdr.sh_entsize;
+ ctfsect.cts_offset = (off64_t)shdr.sh_offset;
+
+ ctf_data = elf_getdata(scn, NULL);
+ if (ctf_data == NULL)
+ ctfmerge_fatal("failed to get ELF CTF "
+ "data section for %s: %s\n", name,
+ elf_errmsg(elf_errno()));
+ ctfsect.cts_data = ctf_data->d_buf;
+ } else if (shdr.sh_type == SHT_SYMTAB) {
+ Elf_Scn *strscn;
+ GElf_Shdr strhdr;
+
+ symsect.cts_name = sname;
+ symsect.cts_type = shdr.sh_type;
+ symsect.cts_flags = shdr.sh_flags;
+ symsect.cts_size = shdr.sh_size;
+ symsect.cts_entsize = shdr.sh_entsize;
+ symsect.cts_offset = (off64_t)shdr.sh_offset;
+
+ if ((strscn = elf_getscn(elf, shdr.sh_link)) == NULL ||
+ gelf_getshdr(strscn, &strhdr) == NULL)
+ ctfmerge_fatal("failed to get "
+ "string table for file %s: %s\n", name,
+ elf_errmsg(elf_errno()));
+
+ strsect.cts_name = elf_strptr(elf, ehdr.e_shstrndx,
+ strhdr.sh_name);
+ strsect.cts_type = strhdr.sh_type;
+ strsect.cts_flags = strhdr.sh_flags;
+ strsect.cts_size = strhdr.sh_size;
+ strsect.cts_entsize = strhdr.sh_entsize;
+ strsect.cts_offset = (off64_t)strhdr.sh_offset;
+
+ sym_data = elf_getdata(scn, NULL);
+ if (sym_data == NULL)
+ ctfmerge_fatal("failed to get ELF CTF "
+ "data section for %s: %s\n", name,
+ elf_errmsg(elf_errno()));
+ symsect.cts_data = sym_data->d_buf;
+
+ str_data = elf_getdata(strscn, NULL);
+ if (str_data == NULL)
+ ctfmerge_fatal("failed to get ELF CTF "
+ "data section for %s: %s\n", name,
+ elf_errmsg(elf_errno()));
+ strsect.cts_data = str_data->d_buf;
+ }
+ }
+
+ if (ctfsect.cts_type == SHT_NULL) {
+ if (ctfmerge_expect_ctf(name, elf) == B_FALSE)
+ return;
+ ctfmerge_fatal("failed to open %s: %s\n", name,
+ ctf_errmsg(ECTF_NOCTFDATA));
+ }
+
+ if (symsect.cts_type != SHT_NULL && strsect.cts_type != SHT_NULL) {
+ fp = ctf_bufopen(&ctfsect, &symsect, &strsect, &err);
+ } else {
+ fp = ctf_bufopen(&ctfsect, NULL, NULL, &err);
+ }
+
+ if (fp == NULL) {
+ if (ctfmerge_expect_ctf(name, elf) == B_TRUE) {
+ ctfmerge_fatal("failed to open file %s: %s\n",
+ name, ctf_errmsg(err));
+ }
+ } else {
+ if ((err = ctf_merge_add(cmh, fp)) != 0) {
+ ctfmerge_fatal("failed to add input %s: %s\n",
+ name, ctf_errmsg(err));
+ }
+ g_nctf++;
+ }
+}
+
+static void
+ctfmerge_read_archive(const char *name, int fd, Elf *elf,
+ ctf_merge_t *cmh)
+{
+ Elf *aelf;
+ Elf_Cmd cmd = ELF_C_READ;
+ int cursec = 1;
+ char *nname;
+
+ while ((aelf = elf_begin(fd, cmd, elf)) != NULL) {
+ Elf_Arhdr *arhdr;
+ boolean_t leakelf = B_FALSE;
+
+ if ((arhdr = elf_getarhdr(aelf)) == NULL)
+ ctfmerge_fatal("failed to get archive header %d for "
+ "%s: %s\n", cursec, name, elf_errmsg(elf_errno()));
+
+ if (*(arhdr->ar_name) == '/')
+ goto next;
+
+ if (asprintf(&nname, "%s.%s.%d", name, arhdr->ar_name,
+ cursec) < 0)
+ ctfmerge_fatal("failed to allocate memory for archive "
+ "%d of file %s\n", cursec, name);
+
+ switch (elf_kind(aelf)) {
+ case ELF_K_AR:
+ ctfmerge_read_archive(nname, fd, aelf, cmh);
+ free(nname);
+ break;
+ case ELF_K_ELF:
+ ctfmerge_elfopen(nname, aelf, cmh);
+ free(nname);
+ leakelf = B_TRUE;
+ break;
+ default:
+ ctfmerge_fatal("unknown elf kind (%d) in archive %d "
+ "for %s\n", elf_kind(aelf), cursec, name);
+ }
+
+next:
+ cmd = elf_next(aelf);
+ if (leakelf == B_FALSE)
+ (void) elf_end(aelf);
+ cursec++;
+ }
+}
+
+static void
+ctfmerge_usage(const char *fmt, ...)
+{
+ if (fmt != NULL) {
+ va_list ap;
+
+ (void) fprintf(stderr, "%s: ", g_progname);
+ va_start(ap, fmt);
+ (void) vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ }
+
+ (void) fprintf(stderr, "Usage: %s [-gt] [-d uniqfile] [-l label] "
+ "[-L labelenv] [-j nthrs] -o outfile file ...\n"
+ "\n"
+ "\t-d uniquify merged output against uniqfile\n"
+ "\t-g do not remove source debug information (STABS, DWARF)\n"
+ "\t-j use nthrs threads to perform the merge\n"
+ "\t-l set output container's label to specified value\n"
+ "\t-L set output container's label to value from environment\n"
+ "\t-o file to add CTF data to\n"
+ "\t-t require CTF data from all inputs built from C sources\n",
+ g_progname);
+}
+
+static void
+ctfmerge_altexec(char **argv)
+{
+ const char *alt;
+ char *altexec;
+
+ alt = getenv(CTFMERGE_ALTEXEC);
+ if (alt == NULL || *alt == '\0')
+ return;
+
+ altexec = strdup(alt);
+ if (altexec == NULL)
+ ctfmerge_fatal("failed to allocate memory for altexec\n");
+ if (unsetenv(CTFMERGE_ALTEXEC) != 0)
+ ctfmerge_fatal("failed to unset %s from environment: %s\n",
+ CTFMERGE_ALTEXEC, strerror(errno));
+
+ (void) execv(altexec, argv);
+ ctfmerge_fatal("failed to execute alternate program %s: %s",
+ altexec, strerror(errno));
+}
+
+int
+main(int argc, char *argv[])
+{
+ int err, i, c, ofd;
+ uint_t nthreads = CTFMERGE_DEFAULT_NTHREADS;
+ char *tmpfile = NULL, *label = NULL;
+ int wflags = CTF_ELFWRITE_F_COMPRESS;
+ ctf_file_t *ofp;
+ ctf_merge_t *cmh;
+ long argj;
+ char *eptr;
+
+ g_progname = basename(argv[0]);
+
+ ctfmerge_altexec(argv);
+
+ /*
+ * We support a subset of the old CTF merge flags, mostly for
+ * compatability.
+ */
+ while ((c = getopt(argc, argv, ":d:fgj:L:o:t")) != -1) {
+ switch (c) {
+ case 'd':
+ g_unique = optarg;
+ break;
+ case 'f':
+ /* Silently ignored for compatibility */
+ break;
+ case 'g':
+ /* Silently ignored for compatibility */
+ break;
+ case 'j':
+ errno = 0;
+ argj = strtol(optarg, &eptr, 10);
+ if (errno != 0 || argj == LONG_MAX ||
+ argj == LONG_MIN || argj <= 0 ||
+ argj > UINT_MAX || *eptr != '\0') {
+ ctfmerge_fatal("invalid argument for -j: %s\n",
+ optarg);
+ }
+ nthreads = (uint_t)argj;
+ break;
+ case 'l':
+ label = optarg;
+ break;
+ case 'L':
+ label = getenv(optarg);
+ break;
+ case 'o':
+ g_outfile = optarg;
+ break;
+ case 't':
+ g_req = B_TRUE;
+ break;
+ case ':':
+ ctfmerge_usage("Option -%c requires an operand\n",
+ optopt);
+ return (CTFMERGE_USAGE);
+ case '?':
+ ctfmerge_usage("Unknown option: -%c\n", optopt);
+ return (CTFMERGE_USAGE);
+ }
+ }
+
+ if (g_outfile == NULL) {
+ ctfmerge_usage("missing required -o output file\n");
+ return (CTFMERGE_USAGE);
+ }
+
+ (void) elf_version(EV_CURRENT);
+
+ /*
+ * Obviously this isn't atomic, but at least gives us a good starting
+ * point.
+ */
+ if ((ofd = open(g_outfile, O_RDWR)) < 0)
+ ctfmerge_fatal("cannot open output file %s: %s\n", g_outfile,
+ strerror(errno));
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc < 1) {
+ ctfmerge_usage("no input files specified");
+ return (CTFMERGE_USAGE);
+ }
+
+ cmh = ctf_merge_init(ofd, &err);
+ if (cmh == NULL)
+ ctfmerge_fatal("failed to create merge handle: %s\n",
+ ctf_errmsg(err));
+
+ if ((err = ctf_merge_set_nthreads(cmh, nthreads)) != 0)
+ ctfmerge_fatal("failed to set parallelism to %d: %s\n",
+ nthreads, ctf_errmsg(err));
+
+ for (i = 0; i < argc; i++) {
+ ctf_file_t *ifp;
+ int fd;
+
+ if ((fd = open(argv[i], O_RDONLY)) < 0)
+ ctfmerge_fatal("failed to open file %s: %s\n",
+ argv[i], strerror(errno));
+ ifp = ctf_fdopen(fd, &err);
+ if (ifp == NULL) {
+ Elf *e;
+
+ if ((e = elf_begin(fd, ELF_C_READ, NULL)) == NULL) {
+ (void) close(fd);
+ ctfmerge_fatal("failed to open %s: %s\n",
+ argv[i], ctf_errmsg(err));
+ }
+
+ /*
+ * It's an ELF file, check if we have an archive or if
+ * we're expecting CTF here.
+ */
+ switch (elf_kind(e)) {
+ case ELF_K_AR:
+ break;
+ case ELF_K_ELF:
+ if (ctfmerge_expect_ctf(argv[i], e) == B_TRUE) {
+ (void) elf_end(e);
+ (void) close(fd);
+ ctfmerge_fatal("failed to "
+ "open %s: file was built from C "
+ "sources, but missing CTF\n",
+ argv[i]);
+ }
+ (void) elf_end(e);
+ (void) close(fd);
+ continue;
+ default:
+ (void) elf_end(e);
+ (void) close(fd);
+ ctfmerge_fatal("failed to open %s: "
+ "unsupported ELF file type", argv[i]);
+ }
+
+ ctfmerge_read_archive(argv[i], fd, e, cmh);
+ (void) elf_end(e);
+ (void) close(fd);
+ continue;
+ }
+ (void) close(fd);
+ if ((err = ctf_merge_add(cmh, ifp)) != 0)
+ ctfmerge_fatal("failed to add input %s: %s\n",
+ argv[i], ctf_errmsg(err));
+ g_nctf++;
+ }
+
+ if (g_nctf == 0) {
+ ctf_merge_fini(cmh);
+ return (0);
+ }
+
+ if (g_unique != NULL) {
+ ctf_file_t *ufp;
+ char *base;
+
+ ufp = ctf_open(g_unique, &err);
+ if (ufp == NULL) {
+ ctfmerge_fatal("failed to open uniquify file %s: %s\n",
+ g_unique, ctf_errmsg(err));
+ }
+
+ base = basename(g_unique);
+ (void) ctf_merge_uniquify(cmh, ufp, base);
+ }
+
+ if (label != NULL) {
+ if ((err = ctf_merge_label(cmh, label)) != 0)
+ ctfmerge_fatal("failed to add label %s: %s\n", label,
+ ctf_errmsg(err));
+ }
+
+ err = ctf_merge_merge(cmh, &ofp);
+ if (err != 0)
+ ctfmerge_fatal("failed to merge types: %s\n", ctf_errmsg(err));
+ ctf_merge_fini(cmh);
+
+ if (asprintf(&tmpfile, "%s.ctf", g_outfile) == -1)
+ ctfmerge_fatal("ran out of memory for temporary file name\n");
+ err = ctf_elfwrite(ofp, g_outfile, tmpfile, wflags);
+ if (err == CTF_ERR) {
+ (void) unlink(tmpfile);
+ free(tmpfile);
+ ctfmerge_fatal("encountered a libctf error: %s!\n",
+ ctf_errmsg(ctf_errno(ofp)));
+ }
+
+ if (rename(tmpfile, g_outfile) != 0) {
+ (void) unlink(tmpfile);
+ free(tmpfile);
+ ctfmerge_fatal("failed to rename temporary file: %s\n",
+ strerror(errno));
+ }
+ free(tmpfile);
+
+ return (CTFMERGE_OK);
+}
diff --git a/usr/src/cmd/devfsadm/Makefile.com b/usr/src/cmd/devfsadm/Makefile.com
index 4df3b00585..1585db2894 100644
--- a/usr/src/cmd/devfsadm/Makefile.com
+++ b/usr/src/cmd/devfsadm/Makefile.com
@@ -67,7 +67,6 @@ LINK_OBJS_CMN = \
fssnap_link.o \
sgen_link.o \
smp_link.o \
- md_link.o \
dtrace_link.o \
vscan_link.o \
zfs_link.o \
diff --git a/usr/src/cmd/devfsadm/devlink.tab.sh b/usr/src/cmd/devfsadm/devlink.tab.sh
index 6724fcb573..0267efeb9f 100644
--- a/usr/src/cmd/devfsadm/devlink.tab.sh
+++ b/usr/src/cmd/devfsadm/devlink.tab.sh
@@ -22,8 +22,7 @@
#
# Copyright (c) 1998, 2000 by Sun Microsystems, Inc.
# All rights reserved.
-#
-#ident "%Z%%M% %I% %E% SMI"
+# Copyright (c) 2012 Joyent, Inc. All rights reserved.
#
# This is the script that generates the devlink.tab file. It is
# architecture-aware, and dumps different stuff for x86 and sparc.
@@ -34,8 +33,6 @@
#
cat <<EOM
-#ident "%Z%%M% %I% %E% SMI"
-#
# Copyright (c) 1998 by Sun Microsystems, Inc.
#
#
diff --git a/usr/src/cmd/devfsadm/i386/Makefile b/usr/src/cmd/devfsadm/i386/Makefile
index 1f14c93dad..75f2da3436 100644
--- a/usr/src/cmd/devfsadm/i386/Makefile
+++ b/usr/src/cmd/devfsadm/i386/Makefile
@@ -24,8 +24,11 @@
LINK_OBJS_i386 = \
misc_link_i386.o \
+ lx_link_i386.o \
xen_link.o
+lx_link_i386.o lx_link_i386.po lx_link_i386.ln := CPPFLAGS += -I$(UTSBASE)/common/brand/lx
+
xen_link.o xen_link.ln xen_link.po := CPPFLAGS += -I$(UTSBASE)/i86xpv
include ../Makefile.com
diff --git a/usr/src/cmd/devfsadm/i386/lx_link_i386.c b/usr/src/cmd/devfsadm/i386/lx_link_i386.c
new file mode 100644
index 0000000000..b99a8361a0
--- /dev/null
+++ b/usr/src/cmd/devfsadm/i386/lx_link_i386.c
@@ -0,0 +1,81 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <devfsadm.h>
+#include <strings.h>
+#include <stdio.h>
+#include <sys/lx_ptm.h>
+#include <sys/lx_autofs.h>
+
+static int lx_ptm(di_minor_t minor, di_node_t node);
+static int lx_autofs(di_minor_t minor, di_node_t node);
+static int lx_systrace(di_minor_t minor, di_node_t node);
+
+static devfsadm_create_t lx_create_cbt[] = {
+ { "pseudo", "ddi_pseudo", LX_PTM_DRV,
+ TYPE_EXACT | DRV_EXACT, ILEVEL_0, lx_ptm },
+ { "pseudo", "ddi_pseudo", LX_AUTOFS_NAME,
+ TYPE_EXACT | DRV_EXACT, ILEVEL_0, lx_autofs },
+ { "pseudo", "ddi_pseudo", "lx_systrace",
+ TYPE_EXACT | DRV_EXACT, ILEVEL_0, lx_systrace },
+};
+
+DEVFSADM_CREATE_INIT_V0(lx_create_cbt);
+
+static int
+lx_ptm(di_minor_t minor, di_node_t node)
+{
+ char *mname = di_minor_name(minor);
+
+ if (strcmp(LX_PTM_MINOR_NODE, mname) == 0)
+ (void) devfsadm_mklink("brand/lx/ptmx", node, minor, 0);
+
+ return (DEVFSADM_CONTINUE);
+}
+
+static int
+lx_autofs(di_minor_t minor, di_node_t node)
+{
+ char *mname = di_minor_name(minor);
+
+ if (strcmp(LX_AUTOFS_MINORNAME, mname) == 0)
+ (void) devfsadm_mklink("brand/lx/autofs", node, minor, 0);
+
+ return (DEVFSADM_CONTINUE);
+}
+
+static int
+lx_systrace(di_minor_t minor, di_node_t node)
+{
+ char *mname = di_minor_name(minor);
+ char path[MAXPATHLEN];
+
+ (void) snprintf(path, sizeof (path), "dtrace/provider/%s", mname);
+ (void) devfsadm_mklink(path, node, minor, 0);
+
+ return (DEVFSADM_CONTINUE);
+}
diff --git a/usr/src/cmd/devfsadm/misc_link.c b/usr/src/cmd/devfsadm/misc_link.c
index 5f241df296..55aff1e4f7 100644
--- a/usr/src/cmd/devfsadm/misc_link.c
+++ b/usr/src/cmd/devfsadm/misc_link.c
@@ -32,6 +32,7 @@
#include <limits.h>
#include <sys/zone.h>
#include <sys/zcons.h>
+#include <sys/zfd.h>
#include <sys/cpuid_drv.h>
static int display(di_minor_t minor, di_node_t node);
@@ -53,6 +54,7 @@ static int av_create(di_minor_t minor, di_node_t node);
static int tsalarm_create(di_minor_t minor, di_node_t node);
static int ntwdt_create(di_minor_t minor, di_node_t node);
static int zcons_create(di_minor_t minor, di_node_t node);
+static int zfd_create(di_minor_t minor, di_node_t node);
static int cpuid(di_minor_t minor, di_node_t node);
static int glvc(di_minor_t minor, di_node_t node);
static int ses_callback(di_minor_t minor, di_node_t node);
@@ -114,6 +116,9 @@ static devfsadm_create_t misc_cbt[] = {
"(^nca$)|(^rds$)|(^sdp$)|(^ipnet$)|(^dlpistub$)|(^bpf$)",
TYPE_EXACT | DRV_RE, ILEVEL_1, minor_name
},
+ { "pseudo", "ddi_pseudo", "inotify",
+ TYPE_EXACT | DRV_EXACT, ILEVEL_0, minor_name
+ },
{ "pseudo", "ddi_pseudo", "ipd",
TYPE_EXACT | DRV_EXACT, ILEVEL_0, minor_name
},
@@ -180,6 +185,9 @@ static devfsadm_create_t misc_cbt[] = {
{ "pseudo", "ddi_pseudo", "zcons",
TYPE_EXACT | DRV_EXACT, ILEVEL_0, zcons_create,
},
+ { "pseudo", "ddi_pseudo", "zfd",
+ TYPE_EXACT | DRV_EXACT, ILEVEL_0, zfd_create,
+ },
{ "pseudo", "ddi_pseudo", CPUID_DRIVER_NAME,
TYPE_EXACT | DRV_EXACT, ILEVEL_0, cpuid,
},
@@ -204,6 +212,9 @@ static devfsadm_create_t misc_cbt[] = {
{ "pseudo", "ddi_pseudo", "tpm",
TYPE_EXACT | DRV_EXACT, ILEVEL_0, minor_name
},
+ { "pseudo", "ddi_pseudo", "overlay",
+ TYPE_EXACT | DRV_EXACT, ILEVEL_0, minor_name
+ }
};
DEVFSADM_CREATE_INIT_V0(misc_cbt);
@@ -228,6 +239,9 @@ static devfsadm_remove_t misc_remove_cbt[] = {
ZCONS_SLAVE_NAME ")$",
RM_PRE | RM_HOT | RM_ALWAYS, ILEVEL_0, devfsadm_rm_all
},
+ { "pseudo", "^zfd/" ZONENAME_REGEXP "/(master|slave)/[0-9]+$",
+ RM_PRE | RM_HOT | RM_ALWAYS, ILEVEL_0, devfsadm_rm_all
+ },
{ "pseudo", "^" CPUID_SELF_NAME "$", RM_ALWAYS | RM_PRE | RM_HOT,
ILEVEL_0, devfsadm_rm_all
},
@@ -675,6 +689,35 @@ zcons_create(di_minor_t minor, di_node_t node)
return (DEVFSADM_CONTINUE);
}
+static int
+zfd_create(di_minor_t minor, di_node_t node)
+{
+ char *minor_str;
+ char *zonename;
+ int *id;
+ char path[MAXPATHLEN];
+
+ minor_str = di_minor_name(minor);
+
+ if (di_prop_lookup_strings(DDI_DEV_T_ANY, node, "zfd_zname",
+ &zonename) == -1)
+ return (DEVFSADM_CONTINUE);
+
+ if (di_prop_lookup_ints(DDI_DEV_T_ANY, node, "zfd_id", &id) == -1)
+ return (DEVFSADM_CONTINUE);
+
+ if (strncmp(minor_str, "slave", 5) == 0) {
+ (void) snprintf(path, sizeof (path), "zfd/%s/slave/%d",
+ zonename, id[0]);
+ } else {
+ (void) snprintf(path, sizeof (path), "zfd/%s/master/%d",
+ zonename, id[0]);
+ }
+ (void) devfsadm_mklink(path, node, minor, 0);
+
+ return (DEVFSADM_CONTINUE);
+}
+
/*
* /dev/cpu/self/cpuid -> /devices/pseudo/cpuid@0:self
*/
diff --git a/usr/src/cmd/dladm/Makefile b/usr/src/cmd/dladm/Makefile
index 143086e26a..1f07bbd714 100644
--- a/usr/src/cmd/dladm/Makefile
+++ b/usr/src/cmd/dladm/Makefile
@@ -21,6 +21,7 @@
#
# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
+# Copyright 2015 Joyent, Inc.
#
PROG= dladm
@@ -35,7 +36,7 @@ include ../Makefile.cmd
XGETFLAGS += -a -x $(PROG).xcl
LDLIBS += -L$(ROOT)/lib -lsocket
LDLIBS += -ldladm -ldlpi -lkstat -lsecdb -lbsm -linetutil -ldevinfo
-LDLIBS += $(ZLAZYLOAD) -lrstp $(ZNOLAZYLOAD)
+LDLIBS += $(ZLAZYLOAD) -lrstp $(ZNOLAZYLOAD) -lnsl -lumem -lcmdutils
CERRWARN += -_gcc=-Wno-switch
CERRWARN += -_gcc=-Wno-unused-label
diff --git a/usr/src/cmd/dladm/dladm.c b/usr/src/cmd/dladm/dladm.c
index 8277d60ac2..5c44ecf0ef 100644
--- a/usr/src/cmd/dladm/dladm.c
+++ b/usr/src/cmd/dladm/dladm.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015 Joyent, Inc. All rights reserved.
*/
#include <stdio.h>
@@ -59,6 +60,7 @@
#include <libdliptun.h>
#include <libdlsim.h>
#include <libdlbridge.h>
+#include <libdloverlay.h>
#include <libinetutil.h>
#include <libvrrpadm.h>
#include <bsm/adt.h>
@@ -74,6 +76,7 @@
#include <stddef.h>
#include <stp_in.h>
#include <ofmt.h>
+#include <libcmdutils.h>
#define MAXPORT 256
#define MAXVNIC 256
@@ -154,6 +157,7 @@ typedef struct show_vnic_state {
dladm_status_t vs_status;
uint32_t vs_flags;
ofmt_handle_t vs_ofmt;
+ char *vs_zonename;
} show_vnic_state_t;
typedef struct show_part_state {
@@ -192,6 +196,7 @@ static ofmt_cb_t print_lacp_cb, print_phys_one_mac_cb;
static ofmt_cb_t print_xaggr_cb, print_aggr_stats_cb;
static ofmt_cb_t print_phys_one_hwgrp_cb, print_wlan_attr_cb;
static ofmt_cb_t print_wifi_status_cb, print_link_attr_cb;
+static ofmt_cb_t print_overlay_cb, print_overlay_fma_cb, print_overlay_targ_cb;
static void dladm_ofmt_check(ofmt_status_t, boolean_t, ofmt_handle_t);
typedef void cmdfunc_t(int, char **, const char *);
@@ -219,6 +224,8 @@ static cmdfunc_t do_create_bridge, do_modify_bridge, do_delete_bridge;
static cmdfunc_t do_add_bridge, do_remove_bridge, do_show_bridge;
static cmdfunc_t do_create_iptun, do_modify_iptun, do_delete_iptun;
static cmdfunc_t do_show_iptun, do_up_iptun, do_down_iptun;
+static cmdfunc_t do_create_overlay, do_delete_overlay, do_modify_overlay;
+static cmdfunc_t do_show_overlay;
static void do_up_vnic_common(int, char **, const char *, boolean_t);
@@ -254,8 +261,11 @@ static void die(const char *, ...);
static void die_optdup(int);
static void die_opterr(int, int, const char *);
static void die_dlerr(dladm_status_t, const char *, ...);
+static void die_dlerrlist(dladm_status_t, dladm_errlist_t *,
+ const char *, ...);
static void warn(const char *, ...);
static void warn_dlerr(dladm_status_t, const char *, ...);
+static void warn_dlerrlist(dladm_errlist_t *);
typedef struct cmd {
char *c_name;
@@ -265,7 +275,7 @@ typedef struct cmd {
static cmd_t cmds[] = {
{ "rename-link", do_rename_link,
- " rename-link <oldlink> <newlink>" },
+ " rename-link [-z zonename] <oldlink> <newlink>" },
{ "show-link", do_show_link,
" show-link [-pP] [-o <field>,..] [-s [-i <interval>]] "
"[<link>]\n" },
@@ -300,12 +310,13 @@ static cmd_t cmds[] = {
{ "show-wifi", do_show_wifi,
" show-wifi [-p] [-o <field>,...] [<link>]\n" },
{ "set-linkprop", do_set_linkprop,
- " set-linkprop [-t] -p <prop>=<value>[,...] <name>" },
+ " set-linkprop [-t] [-z zonename] -p <prop>=<value>[,...] "
+ "<name>" },
{ "reset-linkprop", do_reset_linkprop,
- " reset-linkprop [-t] [-p <prop>,...] <name>" },
+ " reset-linkprop [-t] [-z zonename] [-p <prop>,...] <name>"},
{ "show-linkprop", do_show_linkprop,
- " show-linkprop [-cP] [-o <field>,...] [-p <prop>,...] "
- "<name>\n" },
+ " show-linkprop [-cP] [-o <field>,...] [-z zonename] "
+ "[-p <prop>,...] <name>\n" },
{ "show-ether", do_show_ether,
" show-ether [-px][-o <field>,...] <link>\n" },
{ "create-secobj", do_create_secobj,
@@ -347,10 +358,10 @@ static cmd_t cmds[] = {
"\t\t {vrrp -V <vrid> -A {inet | inet6}} [-v <vid> [-f]]\n"
"\t\t [-p <prop>=<value>[,...]] <vnic-link>" },
{ "delete-vnic", do_delete_vnic,
- " delete-vnic [-t] <vnic-link>" },
+ " delete-vnic [-t] [-z zonename] <vnic-link>" },
{ "show-vnic", do_show_vnic,
- " show-vnic [-pP] [-l <link>] [-s [-i <interval>]] "
- "[<link>]\n" },
+ " show-vnic [-pP] [-l <link>] [-z zonename] "
+ "[-s [-i <interval>]] [<link>]\n" },
{ "up-vnic", do_up_vnic, NULL },
{ "create-part", do_create_part,
" create-part [-t] [-f] -l <link> [-P <pkey>]\n"
@@ -401,6 +412,17 @@ static cmd_t cmds[] = {
" <bridge>\n"
" show-bridge -t [-p] [-o <field>,...] [-s [-i <interval>]]"
" <bridge>\n" },
+ { "create-overlay", do_create_overlay,
+ " create-overlay [-t] -e <encap> -s <search> -v <vnetid>\n"
+ "\t\t [ -p <prop>=<value>[,...]] <overlay>" },
+ { "delete-overlay", do_delete_overlay,
+ " delete-overlay <overlay>" },
+ { "modify-overlay", do_modify_overlay,
+ " modify-overlay -d mac | -f | -s mac=ip:port "
+ "<overlay>" },
+ { "show-overlay", do_show_overlay,
+ " show-overlay [-f | -t] [[-p] -o <field>,...] "
+ "[<overlay>]\n" },
{ "show-usage", do_show_usage,
" show-usage [-a] [-d | -F <format>] "
"[-s <DD/MM/YYYY,HH:MM:SS>]\n"
@@ -959,6 +981,7 @@ typedef struct show_linkprop_state {
char ls_link[MAXLINKNAMELEN];
char *ls_line;
char **ls_propvals;
+ char *ls_zonename;
dladm_arg_list_t *ls_proplist;
boolean_t ls_parsable;
boolean_t ls_persist;
@@ -1011,21 +1034,24 @@ typedef struct vnic_fields_buf_s
char vnic_macaddr[18];
char vnic_macaddrtype[19];
char vnic_vid[6];
+ char vnic_zone[ZONENAME_MAX];
} vnic_fields_buf_t;
static const ofmt_field_t vnic_fields[] = {
{ "LINK", 13,
offsetof(vnic_fields_buf_t, vnic_link), print_default_cb},
-{ "OVER", 13,
+{ "OVER", 11,
offsetof(vnic_fields_buf_t, vnic_over), print_default_cb},
-{ "SPEED", 7,
+{ "SPEED", 6,
offsetof(vnic_fields_buf_t, vnic_speed), print_default_cb},
{ "MACADDRESS", 18,
offsetof(vnic_fields_buf_t, vnic_macaddr), print_default_cb},
-{ "MACADDRTYPE", 20,
+{ "MACADDRTYPE", 12,
offsetof(vnic_fields_buf_t, vnic_macaddrtype), print_default_cb},
-{ "VID", 7,
+{ "VID", 5,
offsetof(vnic_fields_buf_t, vnic_vid), print_default_cb},
+{ "ZONE", 20,
+ offsetof(vnic_fields_buf_t, vnic_zone), print_default_cb},
{ NULL, 0, 0, NULL}}
;
@@ -1425,6 +1451,82 @@ static ofmt_field_t bridge_trill_fields[] = {
offsetof(bridge_trill_fields_buf_t, bridget_nexthop), print_default_cb },
{ NULL, 0, 0, NULL}};
+static const struct option overlay_create_lopts[] = {
+ { "encap", required_argument, NULL, 'e' },
+ { "prop", required_argument, NULL, 'p' },
+ { "search", required_argument, NULL, 's' },
+ { "temporary", no_argument, NULL, 't' },
+ { "vnetid", required_argument, NULL, 'v' },
+ { NULL, 0, NULL, 0 }
+};
+
+static const struct option overlay_modify_lopts[] = {
+ { "delete-entry", required_argument, NULL, 'd' },
+ { "flush-table", no_argument, NULL, 'f' },
+ { "set-entry", required_argument, NULL, 's' },
+ { NULL, 0, NULL, 0 }
+};
+
+static const struct option overlay_show_lopts[] = {
+ { "fma", no_argument, NULL, 'f' },
+ { "target", no_argument, NULL, 't' },
+ { "parsable", no_argument, NULL, 'p' },
+ { "parseable", no_argument, NULL, 'p' },
+ { "output", required_argument, NULL, 'o' },
+ { NULL, 0, NULL, 0 }
+};
+
+/*
+ * Structures for dladm show-overlay
+ */
+typedef enum {
+ OVERLAY_LINK,
+ OVERLAY_PROPERTY,
+ OVERLAY_PERM,
+ OVERLAY_REQ,
+ OVERLAY_VALUE,
+ OVERLAY_DEFAULT,
+ OVERLAY_POSSIBLE
+} overlay_field_index_t;
+
+static const ofmt_field_t overlay_fields[] = {
+/* name, field width, index */
+{ "LINK", 19, OVERLAY_LINK, print_overlay_cb },
+{ "PROPERTY", 19, OVERLAY_PROPERTY, print_overlay_cb },
+{ "PERM", 5, OVERLAY_PERM, print_overlay_cb },
+{ "REQ", 4, OVERLAY_REQ, print_overlay_cb },
+{ "VALUE", 11, OVERLAY_VALUE, print_overlay_cb },
+{ "DEFAULT", 10, OVERLAY_DEFAULT, print_overlay_cb },
+{ "POSSIBLE", 10, OVERLAY_POSSIBLE, print_overlay_cb },
+{ NULL, 0, 0, NULL }
+};
+
+typedef enum {
+ OVERLAY_FMA_LINK,
+ OVERLAY_FMA_STATUS,
+ OVERLAY_FMA_DETAILS
+} overlay_fma_field_index_t;
+
+static const ofmt_field_t overlay_fma_fields[] = {
+{ "LINK", 20, OVERLAY_FMA_LINK, print_overlay_fma_cb },
+{ "STATUS", 8, OVERLAY_FMA_STATUS, print_overlay_fma_cb },
+{ "DETAILS", 52, OVERLAY_FMA_DETAILS, print_overlay_fma_cb },
+{ NULL, 0, 0, NULL }
+};
+
+typedef enum {
+ OVERLAY_TARG_LINK,
+ OVERLAY_TARG_TARGET,
+ OVERLAY_TARG_DEST
+} overlay_targ_field_index_t;
+
+static const ofmt_field_t overlay_targ_fields[] = {
+{ "LINK", 20, OVERLAY_TARG_LINK, print_overlay_targ_cb },
+{ "TARGET", 18, OVERLAY_TARG_TARGET, print_overlay_targ_cb },
+{ "DESTINATION", 42, OVERLAY_TARG_DEST, print_overlay_targ_cb },
+{ NULL, 0, 0, NULL }
+};
+
static char *progname;
static sig_atomic_t signalled;
@@ -1434,6 +1536,12 @@ static sig_atomic_t signalled;
*/
static dladm_handle_t handle = NULL;
+/*
+ * Global error list that all routines can use. It's initialized by the main
+ * code.
+ */
+static dladm_errlist_t errlist;
+
#define DLADM_ETHERSTUB_NAME "etherstub"
#define DLADM_IS_ETHERSTUB(id) (id == DATALINK_INVALID_LINKID)
@@ -1484,6 +1592,8 @@ main(int argc, char *argv[])
"could not open /dev/dld");
}
+ dladm_errlist_init(&errlist);
+
cmdp->c_fn(argc - 1, &argv[1], cmdp->c_usage);
dladm_close(handle);
@@ -2495,13 +2605,17 @@ do_rename_link(int argc, char *argv[], const char *use)
char *link1, *link2;
char *altroot = NULL;
dladm_status_t status;
+ char *zonename = NULL;
opterr = 0;
- while ((option = getopt_long(argc, argv, ":R:", lopts, NULL)) != -1) {
+ while ((option = getopt_long(argc, argv, ":R:z:", lopts, NULL)) != -1) {
switch (option) {
case 'R':
altroot = optarg;
break;
+ case 'z':
+ zonename = optarg;
+ break;
default:
die_opterr(optopt, option, use);
break;
@@ -2517,7 +2631,7 @@ do_rename_link(int argc, char *argv[], const char *use)
link1 = argv[optind++];
link2 = argv[optind];
- if ((status = dladm_rename_link(handle, link1, link2)) !=
+ if ((status = dladm_rename_link(handle, zonename, link1, link2)) !=
DLADM_STATUS_OK)
die_dlerr(status, "rename operation failed");
}
@@ -3406,11 +3520,12 @@ do_show_link(int argc, char *argv[], const char *use)
ofmt_handle_t ofmt;
ofmt_status_t oferr;
uint_t ofmtflags = 0;
+ char *zonename = NULL;
bzero(&state, sizeof (state));
opterr = 0;
- while ((option = getopt_long(argc, argv, ":pPsi:o:",
+ while ((option = getopt_long(argc, argv, ":pPsi:o:z:",
show_lopts, NULL)) != -1) {
switch (option) {
case 'p':
@@ -3443,6 +3558,9 @@ do_show_link(int argc, char *argv[], const char *use)
if (!dladm_str2interval(optarg, &interval))
die("invalid interval value '%s'", optarg);
break;
+ case 'z':
+ zonename = optarg;
+ break;
default:
die_opterr(optopt, option, use);
break;
@@ -3462,8 +3580,8 @@ do_show_link(int argc, char *argv[], const char *use)
if (strlcpy(linkname, argv[optind], MAXLINKNAMELEN) >=
MAXLINKNAMELEN)
die("link name too long");
- if ((status = dladm_name2info(handle, linkname, &linkid, &f,
- NULL, NULL)) != DLADM_STATUS_OK) {
+ if ((status = dladm_zname2info(handle, zonename, linkname,
+ &linkid, &f, NULL, NULL)) != DLADM_STATUS_OK) {
die_dlerr(status, "link %s is not valid", linkname);
}
@@ -4723,6 +4841,12 @@ do_create_vnic(int argc, char *argv[], const char *use)
if ((flags & DLADM_OPT_FORCE) != 0 && vid == 0)
die("-f option can only be used with -v");
+ /*
+ * If creating a transient VNIC for a zone, mark it in the kernel.
+ */
+ if (strstr(propstr, "zone=") != NULL && !(flags & DLADM_OPT_PERSIST))
+ flags |= DLADM_OPT_TRANSIENT;
+
if (mac_prefix_len != 0 && mac_addr_type != VNIC_MAC_ADDR_TYPE_RANDOM &&
mac_addr_type != VNIC_MAC_ADDR_TYPE_FIXED)
usage();
@@ -4767,7 +4891,7 @@ do_create_vnic(int argc, char *argv[], const char *use)
status = dladm_vnic_create(handle, name, dev_linkid, mac_addr_type,
mac_addr, maclen, &mac_slot, mac_prefix_len, vid, vrid, af,
- &linkid, proplist, flags);
+ &linkid, proplist, &errlist, flags);
switch (status) {
case DLADM_STATUS_OK:
break;
@@ -4778,7 +4902,8 @@ do_create_vnic(int argc, char *argv[], const char *use)
break;
default:
- die_dlerr(status, "vnic creation over %s failed", devname);
+ die_dlerrlist(status, &errlist, "vnic creation over %s failed",
+ devname);
}
dladm_free_props(proplist);
@@ -4814,9 +4939,10 @@ do_delete_vnic_common(int argc, char *argv[], const char *use,
datalink_id_t linkid;
char *altroot = NULL;
dladm_status_t status;
+ char *zonename = NULL;
opterr = 0;
- while ((option = getopt_long(argc, argv, ":R:t", lopts,
+ while ((option = getopt_long(argc, argv, ":R:tz:", lopts,
NULL)) != -1) {
switch (option) {
case 't':
@@ -4825,6 +4951,9 @@ do_delete_vnic_common(int argc, char *argv[], const char *use,
case 'R':
altroot = optarg;
break;
+ case 'z':
+ zonename = optarg;
+ break;
default:
die_opterr(optopt, option, use);
}
@@ -4837,8 +4966,8 @@ do_delete_vnic_common(int argc, char *argv[], const char *use,
if (altroot != NULL)
altroot_cmd(altroot, argc, argv);
- status = dladm_name2info(handle, argv[optind], &linkid, NULL, NULL,
- NULL);
+ status = dladm_zname2info(handle, zonename, argv[optind], &linkid, NULL,
+ NULL, NULL);
if (status != DLADM_STATUS_OK)
die("invalid link name '%s'", argv[optind]);
@@ -4970,6 +5099,9 @@ print_vnic(show_vnic_state_t *state, datalink_id_t linkid)
char vnic_name[MAXLINKNAMELEN];
char mstr[MAXMACADDRLEN * 3];
vnic_fields_buf_t vbuf;
+ uint_t valcnt = 1;
+ char zonename[DLADM_PROP_VAL_MAX + 1];
+ char *valptr[1];
if ((status = dladm_vnic_info(handle, linkid, vnic, state->vs_flags)) !=
DLADM_STATUS_OK)
@@ -4999,6 +5131,18 @@ print_vnic(show_vnic_state_t *state, datalink_id_t linkid)
NULL, devname, sizeof (devname)) != DLADM_STATUS_OK)
(void) sprintf(devname, "?");
+
+ zonename[0] = '\0';
+ if (!is_etherstub) {
+ valptr[0] = zonename;
+ (void) dladm_get_linkprop(handle, linkid,
+ DLADM_PROP_VAL_CURRENT, "zone", (char **)valptr, &valcnt);
+ }
+
+ if (state->vs_zonename != NULL &&
+ strcmp(state->vs_zonename, zonename) != 0)
+ return (DLADM_STATUS_OK);
+
state->vs_found = B_TRUE;
if (state->vs_stats) {
/* print vnic statistics */
@@ -5074,6 +5218,13 @@ print_vnic(show_vnic_state_t *state, datalink_id_t linkid)
(void) snprintf(vbuf.vnic_vid, sizeof (vbuf.vnic_vid),
"%d", vnic->va_vid);
+
+ if (zonename[0] != '\0')
+ (void) snprintf(vbuf.vnic_zone,
+ sizeof (vbuf.vnic_zone), "%s", zonename);
+ else
+ (void) strlcpy(vbuf.vnic_zone, "--",
+ sizeof (vbuf.vnic_zone));
}
ofmt_print(state->vs_ofmt, &vbuf);
@@ -5112,10 +5263,11 @@ do_show_vnic_common(int argc, char *argv[], const char *use,
ofmt_handle_t ofmt;
ofmt_status_t oferr;
uint_t ofmtflags = 0;
+ char *zonename = NULL;
bzero(&state, sizeof (state));
opterr = 0;
- while ((option = getopt_long(argc, argv, ":pPl:si:o:", lopts,
+ while ((option = getopt_long(argc, argv, ":pPl:si:o:z:", lopts,
NULL)) != -1) {
switch (option) {
case 'p':
@@ -5154,6 +5306,9 @@ do_show_vnic_common(int argc, char *argv[], const char *use,
o_arg = B_TRUE;
fields_str = optarg;
break;
+ case 'z':
+ zonename = optarg;
+ break;
default:
die_opterr(optopt, option, use);
}
@@ -5164,8 +5319,8 @@ do_show_vnic_common(int argc, char *argv[], const char *use,
/* get vnic ID (optional last argument) */
if (optind == (argc - 1)) {
- status = dladm_name2info(handle, argv[optind], &linkid, NULL,
- NULL, NULL);
+ status = dladm_zname2info(handle, zonename, argv[optind],
+ &linkid, NULL, NULL, NULL);
if (status != DLADM_STATUS_OK) {
die_dlerr(status, "invalid vnic name '%s'",
argv[optind]);
@@ -5176,8 +5331,8 @@ do_show_vnic_common(int argc, char *argv[], const char *use,
}
if (l_arg) {
- status = dladm_name2info(handle, state.vs_link, &dev_linkid,
- NULL, NULL, NULL);
+ status = dladm_zname2info(handle, zonename, state.vs_link,
+ &dev_linkid, NULL, NULL, NULL);
if (status != DLADM_STATUS_OK) {
die_dlerr(status, "invalid link name '%s'",
state.vs_link);
@@ -5189,6 +5344,7 @@ do_show_vnic_common(int argc, char *argv[], const char *use,
state.vs_etherstub = etherstub;
state.vs_found = B_FALSE;
state.vs_flags = flags;
+ state.vs_zonename = zonename;
if (!o_arg || (o_arg && strcasecmp(fields_str, "all") == 0)) {
if (etherstub)
@@ -5277,7 +5433,7 @@ do_create_etherstub(int argc, char *argv[], const char *use)
status = dladm_vnic_create(handle, name, DATALINK_INVALID_LINKID,
VNIC_MAC_ADDR_TYPE_AUTO, mac_addr, ETHERADDRL, NULL, 0, 0,
- VRRP_VRID_NONE, AF_UNSPEC, NULL, NULL, flags);
+ VRRP_VRID_NONE, AF_UNSPEC, NULL, NULL, &errlist, flags);
if (status != DLADM_STATUS_OK)
die_dlerr(status, "etherstub creation failed");
}
@@ -6677,6 +6833,7 @@ do_show_linkprop(int argc, char **argv, const char *use)
ofmt_handle_t ofmt;
ofmt_status_t oferr;
uint_t ofmtflags = 0;
+ char *zonename = NULL;
bzero(propstr, DLADM_STRSIZE);
opterr = 0;
@@ -6687,7 +6844,7 @@ do_show_linkprop(int argc, char **argv, const char *use)
state.ls_header = B_TRUE;
state.ls_retstatus = DLADM_STATUS_OK;
- while ((option = getopt_long(argc, argv, ":p:cPo:",
+ while ((option = getopt_long(argc, argv, ":p:cPo:z:",
prop_longopts, NULL)) != -1) {
switch (option) {
case 'p':
@@ -6706,6 +6863,9 @@ do_show_linkprop(int argc, char **argv, const char *use)
case 'o':
fields_str = optarg;
break;
+ case 'z':
+ zonename = optarg;
+ break;
default:
die_opterr(optopt, option, use);
break;
@@ -6713,8 +6873,8 @@ do_show_linkprop(int argc, char **argv, const char *use)
}
if (optind == (argc - 1)) {
- if ((status = dladm_name2info(handle, argv[optind], &linkid,
- NULL, NULL, NULL)) != DLADM_STATUS_OK) {
+ if ((status = dladm_zname2info(handle, zonename, argv[optind],
+ &linkid, NULL, NULL, NULL)) != DLADM_STATUS_OK) {
die_dlerr(status, "link %s is not valid", argv[optind]);
}
} else if (optind != argc) {
@@ -6725,6 +6885,7 @@ do_show_linkprop(int argc, char **argv, const char *use)
!= DLADM_STATUS_OK)
die("invalid link properties specified");
state.ls_proplist = proplist;
+ state.ls_zonename = zonename;
state.ls_status = DLADM_STATUS_OK;
if (state.ls_parsable)
@@ -6769,6 +6930,17 @@ show_linkprop_onelink(dladm_handle_t hdl, datalink_id_t linkid, void *arg)
return (DLADM_WALK_CONTINUE);
}
+ if (statep->ls_zonename != NULL) {
+ datalink_id_t tlinkid;
+
+ if (dladm_zname2info(hdl, statep->ls_zonename, statep->ls_link,
+ &tlinkid, NULL, NULL, NULL) != DLADM_STATUS_OK ||
+ linkid != tlinkid) {
+ statep->ls_status = DLADM_STATUS_NOTFOUND;
+ return (DLADM_WALK_CONTINUE);
+ }
+ }
+
if ((statep->ls_persist && !(flags & DLADM_OPT_PERSIST)) ||
(!statep->ls_persist && !(flags & DLADM_OPT_ACTIVE))) {
statep->ls_status = DLADM_STATUS_BADARG;
@@ -6851,11 +7023,12 @@ set_linkprop(int argc, char **argv, boolean_t reset, const char *use)
dladm_status_t status = DLADM_STATUS_OK;
char propstr[DLADM_STRSIZE];
dladm_arg_list_t *proplist = NULL;
+ char *zonename = NULL;
opterr = 0;
bzero(propstr, DLADM_STRSIZE);
- while ((option = getopt_long(argc, argv, ":p:R:t",
+ while ((option = getopt_long(argc, argv, ":p:R:tz:",
prop_longopts, NULL)) != -1) {
switch (option) {
case 'p':
@@ -6870,6 +7043,9 @@ set_linkprop(int argc, char **argv, boolean_t reset, const char *use)
case 'R':
altroot = optarg;
break;
+ case 'z':
+ zonename = optarg;
+ break;
default:
die_opterr(optopt, option, use);
@@ -6892,8 +7068,8 @@ set_linkprop(int argc, char **argv, boolean_t reset, const char *use)
altroot_cmd(altroot, argc, argv);
}
- status = dladm_name2info(handle, argv[optind], &linkid, NULL, NULL,
- NULL);
+ status = dladm_zname2info(handle, zonename, argv[optind], &linkid,
+ NULL, NULL, NULL);
if (status != DLADM_STATUS_OK)
die_dlerr(status, "link %s is not valid", argv[optind]);
@@ -8916,6 +9092,21 @@ warn_dlerr(dladm_status_t err, const char *format, ...)
(void) fprintf(stderr, ": %s\n", dladm_status2str(err, errmsg));
}
+static void
+warn_dlerrlist(dladm_errlist_t *errlist)
+{
+ if (errlist != NULL && errlist->el_count > 0) {
+ int i;
+ for (i = 0; i < errlist->el_count; i++) {
+ (void) fprintf(stderr, gettext("%s: warning: "),
+ progname);
+
+ (void) fprintf(stderr, "%s\n",
+ gettext(errlist->el_errs[i]));
+ }
+ }
+}
+
/*
* Also closes the dladm handle if it is not NULL.
*/
@@ -8941,6 +9132,34 @@ die_dlerr(dladm_status_t err, const char *format, ...)
exit(EXIT_FAILURE);
}
+/*
+ * Like die_dlerr, but uses the errlist for additional information.
+ */
+/* PRINTFLIKE3 */
+static void
+die_dlerrlist(dladm_status_t err, dladm_errlist_t *errlist,
+ const char *format, ...)
+{
+ va_list alist;
+ char errmsg[DLADM_STRSIZE];
+
+ warn_dlerrlist(errlist);
+ format = gettext(format);
+ (void) fprintf(stderr, "%s: ", progname);
+
+ va_start(alist, format);
+ (void) vfprintf(stderr, format, alist);
+ va_end(alist);
+ (void) fprintf(stderr, ": %s\n", dladm_status2str(err, errmsg));
+
+ /* close dladm handle if it was opened */
+ if (handle != NULL)
+ dladm_close(handle);
+
+ exit(EXIT_FAILURE);
+
+}
+
/* PRINTFLIKE1 */
static void
die(const char *format, ...)
@@ -9671,3 +9890,673 @@ do_up_part(int argc, char *argv[], const char *use)
(void) dladm_part_up(handle, partid, 0);
}
+
+static void
+do_create_overlay(int argc, char *argv[], const char *use)
+{
+ int opt;
+ char *encap = NULL, *endp, *search = NULL;
+ char name[MAXLINKNAMELEN];
+ dladm_status_t status;
+ uint32_t flags = DLADM_OPT_ACTIVE | DLADM_OPT_PERSIST;
+ uint64_t vid;
+ boolean_t havevid = B_FALSE;
+ char propstr[DLADM_STRSIZE];
+ dladm_arg_list_t *proplist = NULL;
+
+ bzero(propstr, sizeof (propstr));
+ while ((opt = getopt_long(argc, argv, ":te:v:p:s:",
+ overlay_create_lopts, NULL)) != -1) {
+ switch (opt) {
+ case 'e':
+ encap = optarg;
+ break;
+ case 's':
+ search = optarg;
+ break;
+ case 't':
+ flags &= ~DLADM_OPT_PERSIST;
+ break;
+ case 'p':
+ (void) strlcat(propstr, optarg, DLADM_STRSIZE);
+ if (strlcat(propstr, ",", DLADM_STRSIZE) >=
+ DLADM_STRSIZE)
+ die("property list too long '%s'", propstr);
+ break;
+ case 'v':
+ vid = strtoul(optarg, &endp, 10);
+ if (*endp != '\0' || (vid == 0 && errno == EINVAL))
+ die("couldn't parse virtual networkd id: %s",
+ optarg);
+ if (vid == ULONG_MAX && errno == ERANGE)
+ die("virtual networkd id too large: %s",
+ optarg);
+ havevid = B_TRUE;
+ break;
+ default:
+ die_opterr(optopt, opt, use);
+ }
+ }
+
+ if (havevid == B_FALSE)
+ die("missing required virtual network id");
+
+ if (encap == NULL)
+ die("missing required encapsulation plugin");
+
+ if (encap == NULL)
+ die("missing required search plugin");
+
+ if (optind != (argc - 1))
+ die("missing device name");
+
+ if (strlcpy(name, argv[optind], MAXLINKNAMELEN) >= MAXLINKNAMELEN)
+ die("link name too long '%s'", argv[optind]);
+
+ if (!dladm_valid_linkname(name))
+ die("invalid link name '%s'", argv[optind]);
+
+ if (strlen(encap) + 1 > MAXLINKNAMELEN)
+ die("encapsulation plugin name too long '%s'", encap);
+
+ if (strlen(search) + 1 > MAXLINKNAMELEN)
+ die("search plugin name too long '%s'", encap);
+
+ if (dladm_parse_link_props(propstr, &proplist, B_FALSE)
+ != DLADM_STATUS_OK)
+ die("invalid overlay property");
+
+ status = dladm_overlay_create(handle, name, encap, search, vid,
+ proplist, &errlist, flags);
+ dladm_free_props(proplist);
+ if (status != DLADM_STATUS_OK) {
+ die_dlerrlist(status, &errlist, "overlay creation failed");
+ }
+}
+
+/* ARGSUSED */
+static void
+do_delete_overlay(int argc, char *argv[], const char *use)
+{
+ datalink_id_t linkid = DATALINK_ALL_LINKID;
+ dladm_status_t status;
+
+ if (argc != 2) {
+ usage();
+ }
+
+ status = dladm_name2info(handle, argv[1], &linkid, NULL, NULL, NULL);
+ if (status != DLADM_STATUS_OK)
+ die_dlerr(status, "failed to delete %s", argv[1]);
+
+ status = dladm_overlay_delete(handle, linkid);
+ if (status != DLADM_STATUS_OK)
+ die_dlerr(status, "failed to delete %s", argv[1]);
+}
+
+typedef struct showoverlay_state {
+ ofmt_handle_t sho_ofmt;
+ const char *sho_linkname;
+ dladm_overlay_propinfo_handle_t sho_info;
+ uint8_t sho_value[DLADM_OVERLAY_PROP_SIZEMAX];
+ uint32_t sho_size;
+} showoverlay_state_t;
+
+typedef struct showoverlay_fma_state {
+ ofmt_handle_t shof_ofmt;
+ const char *shof_linkname;
+ dladm_overlay_status_t *shof_status;
+} showoverlay_fma_state_t;
+
+typedef struct showoverlay_targ_state {
+ ofmt_handle_t shot_ofmt;
+ const char *shot_linkname;
+ const struct ether_addr *shot_key;
+ const dladm_overlay_point_t *shot_point;
+} showoverlay_targ_state_t;
+
+static void
+print_overlay_value(char *outbuf, uint_t bufsize, uint_t type, const void *pbuf,
+ const size_t psize)
+{
+ const struct in6_addr *ipv6;
+ struct in_addr ip;
+
+ switch (type) {
+ case OVERLAY_PROP_T_INT:
+ if (psize != 1 && psize != 2 && psize != 4 && psize != 8) {
+ (void) snprintf(outbuf, bufsize, "?");
+ break;
+ }
+ if (psize == 1)
+ (void) snprintf(outbuf, bufsize, "%d", *(int8_t *)pbuf);
+ if (psize == 2)
+ (void) snprintf(outbuf, bufsize, "%d",
+ *(int16_t *)pbuf);
+ if (psize == 4)
+ (void) snprintf(outbuf, bufsize, "%d",
+ *(int32_t *)pbuf);
+ if (psize == 8)
+ (void) snprintf(outbuf, bufsize, "%d",
+ *(int64_t *)pbuf);
+ break;
+ case OVERLAY_PROP_T_UINT:
+ if (psize != 1 && psize != 2 && psize != 4 && psize != 8) {
+ (void) snprintf(outbuf, bufsize, "?");
+ break;
+ }
+ if (psize == 1)
+ (void) snprintf(outbuf, bufsize, "%d",
+ *(uint8_t *)pbuf);
+ if (psize == 2)
+ (void) snprintf(outbuf, bufsize, "%d",
+ *(uint16_t *)pbuf);
+ if (psize == 4)
+ (void) snprintf(outbuf, bufsize, "%d",
+ *(uint32_t *)pbuf);
+ if (psize == 8)
+ (void) snprintf(outbuf, bufsize, "%d",
+ *(uint64_t *)pbuf);
+ break;
+ case OVERLAY_PROP_T_IP:
+ if (psize != sizeof (struct in6_addr)) {
+ warn("malformed overlay IP property: %d bytes\n",
+ psize);
+ (void) snprintf(outbuf, bufsize, "--");
+ break;
+ }
+
+ ipv6 = pbuf;
+ if (IN6_IS_ADDR_V4MAPPED(ipv6)) {
+ IN6_V4MAPPED_TO_INADDR(ipv6, &ip);
+ if (inet_ntop(AF_INET, &ip, outbuf, bufsize) == NULL) {
+ warn("malformed overlay IP property\n");
+ (void) snprintf(outbuf, bufsize, "--");
+ break;
+ }
+ } else {
+ if (inet_ntop(AF_INET6, ipv6, outbuf, bufsize) ==
+ NULL) {
+ warn("malformed overlay IP property\n");
+ (void) snprintf(outbuf, bufsize, "--");
+ break;
+ }
+ }
+
+ break;
+ case OVERLAY_PROP_T_STRING:
+ (void) snprintf(outbuf, bufsize, "%s", pbuf);
+ break;
+ default:
+ abort();
+ }
+
+ return;
+
+}
+
+static boolean_t
+print_overlay_cb(ofmt_arg_t *ofarg, char *buf, uint_t bufsize)
+{
+ dladm_status_t status;
+ showoverlay_state_t *sp = ofarg->ofmt_cbarg;
+ dladm_overlay_propinfo_handle_t infop = sp->sho_info;
+ const char *pname;
+ uint_t type, prot;
+ const void *def;
+ uint32_t defsize;
+ const mac_propval_range_t *rangep;
+
+ if ((status = dladm_overlay_prop_info(infop, &pname, &type, &prot, &def,
+ &defsize, &rangep)) != DLADM_STATUS_OK) {
+ warn_dlerr(status, "failed to get get property info");
+ return (B_TRUE);
+ }
+
+ switch (ofarg->ofmt_id) {
+ case OVERLAY_LINK:
+ (void) snprintf(buf, bufsize, "%s", sp->sho_linkname);
+ break;
+ case OVERLAY_PROPERTY:
+ (void) snprintf(buf, bufsize, "%s", pname);
+ break;
+ case OVERLAY_PERM:
+ if ((prot & OVERLAY_PROP_PERM_RW) == OVERLAY_PROP_PERM_RW) {
+ (void) snprintf(buf, bufsize, "%s", "rw");
+ } else if ((prot & OVERLAY_PROP_PERM_RW) ==
+ OVERLAY_PROP_PERM_READ) {
+ (void) snprintf(buf, bufsize, "%s", "r-");
+ } else {
+ (void) snprintf(buf, bufsize, "%s", "--");
+ }
+ break;
+ case OVERLAY_REQ:
+ (void) snprintf(buf, bufsize, "%s",
+ prot & OVERLAY_PROP_PERM_REQ ? "y" : "-");
+ break;
+ case OVERLAY_VALUE:
+ if (sp->sho_size == 0) {
+ (void) snprintf(buf, bufsize, "%s", "--");
+ } else {
+ print_overlay_value(buf, bufsize, type, sp->sho_value,
+ sp->sho_size);
+ }
+ break;
+ case OVERLAY_DEFAULT:
+ if (defsize == 0) {
+ (void) snprintf(buf, bufsize, "%s", "--");
+ } else {
+ print_overlay_value(buf, bufsize, type, def, defsize);
+ }
+ break;
+ case OVERLAY_POSSIBLE: {
+ int i;
+ char **vals, *ptr, *lim;
+ if (rangep->mpr_count == 0) {
+ (void) snprintf(buf, bufsize, "%s", "--");
+ break;
+ }
+
+ vals = malloc((sizeof (char *) + DLADM_PROP_VAL_MAX) *
+ rangep->mpr_count);
+ if (vals == NULL)
+ die("insufficient memory");
+ for (i = 0; i < rangep->mpr_count; i++) {
+ vals[i] = (char *)vals + sizeof (char *) *
+ rangep->mpr_count + i * DLADM_MAX_PROP_VALCNT;
+ }
+
+ if (dladm_range2strs(rangep, vals) != 0) {
+ free(vals);
+ (void) snprintf(buf, bufsize, "%s", "?");
+ break;
+ }
+
+ ptr = buf;
+ lim = buf + bufsize;
+ for (i = 0; i < rangep->mpr_count; i++) {
+ ptr += snprintf(ptr, lim - ptr, "%s,", vals[i]);
+ if (ptr >= lim)
+ break;
+ }
+ if (rangep->mpr_count > 0)
+ buf[strlen(buf) - 1] = '\0';
+ free(vals);
+ break;
+ }
+ default:
+ abort();
+ }
+ return (B_TRUE);
+}
+
+static int
+dladm_overlay_show_one(dladm_handle_t handle, datalink_id_t linkid,
+ dladm_overlay_propinfo_handle_t phdl, void *arg)
+{
+ showoverlay_state_t *sp = arg;
+ sp->sho_info = phdl;
+
+ sp->sho_size = sizeof (sp->sho_value);
+ if (dladm_overlay_get_prop(handle, linkid, phdl, &sp->sho_value,
+ &sp->sho_size) != DLADM_STATUS_OK)
+ return (DLADM_WALK_CONTINUE);
+
+ ofmt_print(sp->sho_ofmt, sp);
+ return (DLADM_WALK_CONTINUE);
+}
+
+static int
+show_one_overlay(dladm_handle_t hdl, datalink_id_t linkid, void *arg)
+{
+ char buf[MAXLINKNAMELEN];
+ showoverlay_state_t state;
+ datalink_class_t class;
+
+ if (dladm_datalink_id2info(hdl, linkid, NULL, &class, NULL, buf,
+ MAXLINKNAMELEN) != DLADM_STATUS_OK ||
+ class != DATALINK_CLASS_OVERLAY)
+ return (DLADM_WALK_CONTINUE);
+
+ state.sho_linkname = buf;
+ state.sho_ofmt = arg;
+
+ dladm_errlist_reset(&errlist);
+ (void) dladm_overlay_walk_prop(handle, linkid, dladm_overlay_show_one,
+ &state, &errlist);
+ warn_dlerrlist(&errlist);
+
+ return (DLADM_WALK_CONTINUE);
+}
+
+static boolean_t
+print_overlay_targ_cb(ofmt_arg_t *ofarg, char *buf, uint_t bufsize)
+{
+ char keybuf[ETHERADDRSTRL];
+ const showoverlay_targ_state_t *shot = ofarg->ofmt_cbarg;
+ const dladm_overlay_point_t *point = shot->shot_point;
+ char macbuf[ETHERADDRSTRL];
+ char ipbuf[INET6_ADDRSTRLEN];
+ custr_t *cus;
+
+ switch (ofarg->ofmt_id) {
+ case OVERLAY_TARG_LINK:
+ (void) snprintf(buf, bufsize, shot->shot_linkname);
+ break;
+ case OVERLAY_TARG_TARGET:
+ if ((point->dop_flags & DLADM_OVERLAY_F_DEFAULT) != 0) {
+ (void) snprintf(buf, bufsize, "*:*:*:*:*:*");
+ } else {
+ if (ether_ntoa_r(shot->shot_key, keybuf) == NULL) {
+ warn("encountered malformed mac address key\n");
+ return (B_FALSE);
+ }
+ (void) snprintf(buf, bufsize, "%s", keybuf);
+ }
+ break;
+ case OVERLAY_TARG_DEST:
+ if (custr_alloc_buf(&cus, buf, bufsize) != 0) {
+ die("ran out of memory for printing the overlay "
+ "target destination");
+ }
+
+ if (point->dop_dest & OVERLAY_PLUGIN_D_ETHERNET) {
+ if (ether_ntoa_r(&point->dop_mac, macbuf) == NULL) {
+ warn("encountered malformed mac address target "
+ "for key %s\n", keybuf);
+ return (B_FALSE);
+ }
+ (void) custr_append(cus, macbuf);
+ }
+
+ if (point->dop_dest & OVERLAY_PLUGIN_D_IP) {
+ if (IN6_IS_ADDR_V4MAPPED(&point->dop_ip)) {
+ struct in_addr v4;
+ IN6_V4MAPPED_TO_INADDR(&point->dop_ip, &v4);
+ if (inet_ntop(AF_INET, &v4, ipbuf,
+ sizeof (ipbuf)) == NULL)
+ abort();
+ } else if (inet_ntop(AF_INET6, &point->dop_ip, ipbuf,
+ sizeof (ipbuf)) == NULL) {
+ /*
+ * The only failrues we should get are
+ * EAFNOSUPPORT and ENOSPC because of buffer
+ * exhaustion. In either of these cases, that
+ * means something has gone horribly wrong.
+ */
+ abort();
+ }
+ if (point->dop_dest & OVERLAY_PLUGIN_D_ETHERNET)
+ (void) custr_appendc(cus, ',');
+ (void) custr_append(cus, ipbuf);
+ }
+
+ if (point->dop_dest & OVERLAY_PLUGIN_D_PORT) {
+ if (point->dop_dest & OVERLAY_PLUGIN_D_IP)
+ (void) custr_appendc(cus, ':');
+ else if (point->dop_dest & OVERLAY_PLUGIN_D_ETHERNET)
+ (void) custr_appendc(cus, ',');
+ (void) custr_append_printf(cus, "%u", point->dop_port);
+ }
+
+ custr_free(cus);
+
+ break;
+ }
+ return (B_TRUE);
+}
+
+/* ARGSUSED */
+static int
+show_one_overlay_table_entry(dladm_handle_t handle, datalink_id_t linkid,
+ const struct ether_addr *key, const dladm_overlay_point_t *point, void *arg)
+{
+ showoverlay_targ_state_t *shot = arg;
+
+ shot->shot_key = key;
+ shot->shot_point = point;
+ ofmt_print(shot->shot_ofmt, shot);
+
+ return (DLADM_WALK_CONTINUE);
+}
+
+/* ARGSUSED */
+static int
+show_one_overlay_table(dladm_handle_t handle, datalink_id_t linkid, void *arg)
+{
+ char linkbuf[MAXLINKNAMELEN];
+ showoverlay_targ_state_t shot;
+ datalink_class_t class;
+
+ if (dladm_datalink_id2info(handle, linkid, NULL, &class, NULL, linkbuf,
+ MAXLINKNAMELEN) != DLADM_STATUS_OK ||
+ class != DATALINK_CLASS_OVERLAY)
+ return (DLADM_WALK_CONTINUE);
+
+ shot.shot_ofmt = arg;
+ shot.shot_linkname = linkbuf;
+
+ (void) dladm_overlay_walk_cache(handle, linkid,
+ show_one_overlay_table_entry, &shot);
+
+ return (DLADM_WALK_CONTINUE);
+}
+
+static boolean_t
+print_overlay_fma_cb(ofmt_arg_t *ofarg, char *buf, uint_t bufsize)
+{
+ showoverlay_fma_state_t *shof = ofarg->ofmt_cbarg;
+ dladm_overlay_status_t *st = shof->shof_status;
+
+ switch (ofarg->ofmt_id) {
+ case OVERLAY_FMA_LINK:
+ (void) snprintf(buf, bufsize, "%s", shof->shof_linkname);
+ break;
+ case OVERLAY_FMA_STATUS:
+ (void) snprintf(buf, bufsize, st->dos_degraded == B_TRUE ?
+ "DEGRADED": "ONLINE");
+ break;
+ case OVERLAY_FMA_DETAILS:
+ (void) snprintf(buf, bufsize, "%s", st->dos_degraded == B_TRUE ?
+ st->dos_fmamsg : "-");
+ break;
+ default:
+ abort();
+ }
+ return (B_TRUE);
+}
+
+/* ARGSUSED */
+static void
+show_one_overlay_fma_cb(dladm_handle_t handle, datalink_id_t linkid,
+ dladm_overlay_status_t *stat, void *arg)
+{
+ showoverlay_fma_state_t *shof = arg;
+ shof->shof_status = stat;
+ ofmt_print(shof->shof_ofmt, shof);
+}
+
+
+static int
+show_one_overlay_fma(dladm_handle_t handle, datalink_id_t linkid, void *arg)
+{
+ dladm_status_t status;
+ char linkbuf[MAXLINKNAMELEN];
+ datalink_class_t class;
+ showoverlay_fma_state_t shof;
+
+ if (dladm_datalink_id2info(handle, linkid, NULL, &class, NULL, linkbuf,
+ MAXLINKNAMELEN) != DLADM_STATUS_OK ||
+ class != DATALINK_CLASS_OVERLAY) {
+ die("datalink %s is not an overlay device\n", linkbuf);
+ }
+
+ shof.shof_ofmt = arg;
+ shof.shof_linkname = linkbuf;
+
+ status = dladm_overlay_status(handle, linkid,
+ show_one_overlay_fma_cb, &shof);
+ if (status != DLADM_STATUS_OK)
+ die_dlerr(status, "failed to obtain device status for %s",
+ linkbuf);
+
+ return (DLADM_WALK_CONTINUE);
+}
+
+static void
+do_show_overlay(int argc, char *argv[], const char *use)
+{
+ int i, opt;
+ datalink_id_t linkid = DATALINK_ALL_LINKID;
+ dladm_status_t status;
+ int (*funcp)(dladm_handle_t, datalink_id_t, void *);
+ char *fields_str = NULL;
+ const ofmt_field_t *fieldsp;
+ ofmt_status_t oferr;
+ boolean_t parse;
+ ofmt_handle_t ofmt;
+ uint_t ofmtflags;
+ int err;
+
+
+ funcp = show_one_overlay;
+ fieldsp = overlay_fields;
+ parse = B_FALSE;
+ ofmtflags = OFMT_WRAP;
+ while ((opt = getopt_long(argc, argv, ":o:pft", overlay_show_lopts,
+ NULL)) != -1) {
+ switch (opt) {
+ case 'f':
+ funcp = show_one_overlay_fma;
+ fieldsp = overlay_fma_fields;
+ break;
+ case 'o':
+ fields_str = optarg;
+ break;
+ case 'p':
+ parse = B_TRUE;
+ ofmtflags = OFMT_PARSABLE;
+ break;
+ case 't':
+ funcp = show_one_overlay_table;
+ fieldsp = overlay_targ_fields;
+ break;
+ default:
+ die_opterr(optopt, opt, use);
+ }
+ }
+
+ if (fields_str != NULL && strcasecmp(fields_str, "all") == 0)
+ fields_str = NULL;
+
+ oferr = ofmt_open(fields_str, fieldsp, ofmtflags, 0, &ofmt);
+ dladm_ofmt_check(oferr, parse, ofmt);
+
+ err = 0;
+ if (argc > optind) {
+ for (i = optind; i < argc; i++) {
+ status = dladm_name2info(handle, argv[i], &linkid,
+ NULL, NULL, NULL);
+ if (status != DLADM_STATUS_OK) {
+ warn_dlerr(status, "failed to find %s",
+ argv[i]);
+ err = 1;
+ continue;
+ }
+ funcp(handle, linkid, ofmt);
+ }
+ } else {
+ (void) dladm_walk_datalink_id(funcp, handle, ofmt,
+ DATALINK_CLASS_OVERLAY, DATALINK_ANY_MEDIATYPE,
+ DLADM_OPT_ACTIVE);
+ }
+ ofmt_close(ofmt);
+
+ exit(err);
+}
+
+static void
+do_modify_overlay(int argc, char *argv[], const char *use)
+{
+ int opt, ocnt = 0;
+ boolean_t flush, set, delete;
+ struct ether_addr e;
+ char *dest;
+ datalink_id_t linkid = DATALINK_ALL_LINKID;
+ dladm_status_t status;
+
+ flush = set = delete = B_FALSE;
+ while ((opt = getopt_long(argc, argv, ":fd:s:", overlay_modify_lopts,
+ NULL)) != -1) {
+ switch (opt) {
+ case 'd':
+ if (delete == B_TRUE)
+ die_optdup('d');
+ delete = B_TRUE;
+ ocnt++;
+ if (ether_aton_r(optarg, &e) == NULL)
+ die("invalid mac address: %s\n", optarg);
+ break;
+ case 'f':
+ if (flush == B_TRUE)
+ die_optdup('f');
+ flush = B_TRUE;
+ ocnt++;
+ break;
+ case 's':
+ if (set == B_TRUE)
+ die_optdup('s');
+ set = B_TRUE;
+ ocnt++;
+ dest = strchr(optarg, '=');
+ *dest = '\0';
+ dest++;
+ if (dest == NULL)
+ die("malformed value, expected mac=dest, "
+ "got: %s\n", optarg);
+ if (ether_aton_r(optarg, &e) == NULL)
+ die("invalid mac address: %s\n", optarg);
+ break;
+ default:
+ die_opterr(optopt, opt, use);
+ }
+ }
+
+ if (ocnt == 0)
+ die("need to specify one of -d, -f, or -s");
+ if (ocnt > 1)
+ die("only one of -d, -f, or -s may be used");
+
+ if (argv[optind] == NULL)
+ die("missing required overlay device\n");
+ if (argc > optind + 1)
+ die("only one overlay device may be specified\n");
+
+ status = dladm_name2info(handle, argv[optind], &linkid, NULL, NULL,
+ NULL);
+ if (status != DLADM_STATUS_OK) {
+ die_dlerr(status, "failed to find overlay %s", argv[optind]);
+ }
+
+ if (flush == B_TRUE) {
+ status = dladm_overlay_cache_flush(handle, linkid);
+ if (status != DLADM_STATUS_OK)
+ die_dlerr(status, "failed to flush target cache for "
+ "overlay %s", argv[optind]);
+ }
+
+ if (delete == B_TRUE) {
+ status = dladm_overlay_cache_delete(handle, linkid, &e);
+ if (status != DLADM_STATUS_OK)
+ die_dlerr(status, "failed to flush target %s from "
+ "overlay target cache %s", optarg, argv[optind]);
+ }
+
+ if (set == B_TRUE) {
+ status = dladm_overlay_cache_set(handle, linkid, &e, dest);
+ if (status != DLADM_STATUS_OK)
+ die_dlerr(status, "failed to set target %s for overlay "
+ "target cache %s", optarg, argv[optind]);
+ }
+
+}
diff --git a/usr/src/cmd/dlmgmtd/dlmgmt_db.c b/usr/src/cmd/dlmgmtd/dlmgmt_db.c
index 99307dbc03..8eecc807e5 100644
--- a/usr/src/cmd/dlmgmtd/dlmgmt_db.c
+++ b/usr/src/cmd/dlmgmtd/dlmgmt_db.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015, Joyent Inc.
*/
#include <assert.h>
@@ -43,6 +44,8 @@
#include <libcontract.h>
#include <libcontract_priv.h>
#include <sys/contract/process.h>
+#include <sys/vnic.h>
+#include <zone.h>
#include "dlmgmt_impl.h"
typedef enum dlmgmt_db_op {
@@ -552,6 +555,10 @@ dlmgmt_db_update(dlmgmt_db_op_t op, const char *entryname, dlmgmt_link_t *linkp,
linkp->ll_zoneid, flags, &err)) == NULL)
return (err);
+ /* If transient op and onloan, use the global zone cache file. */
+ if (flags == DLMGMT_ACTIVE && linkp->ll_onloan)
+ req->ls_zoneid = GLOBAL_ZONEID;
+
/*
* If the return error is EINPROGRESS, this request is handled
* asynchronously; return success.
@@ -714,11 +721,13 @@ parse_linkprops(char *buf, dlmgmt_link_t *linkp)
char attr_name[MAXLINKATTRLEN];
size_t attr_buf_len = 0;
void *attr_buf = NULL;
+ boolean_t rename;
curr = buf;
len = strlen(buf);
attr_name[0] = '\0';
for (i = 0; i < len; i++) {
+ rename = B_FALSE;
char c = buf[i];
boolean_t match = (c == '=' ||
(c == ',' && !found_type) || c == ';');
@@ -768,6 +777,21 @@ parse_linkprops(char *buf, dlmgmt_link_t *linkp)
goto parse_fail;
linkp->ll_media =
(uint32_t)*(int64_t *)attr_buf;
+ } else if (strcmp(attr_name, "zone") == 0) {
+ if (read_str(curr, &attr_buf) == 0)
+ goto parse_fail;
+ linkp->ll_zoneid = getzoneidbyname(attr_buf);
+ if (linkp->ll_zoneid == -1) {
+ if (errno == EFAULT)
+ abort();
+ /*
+ * If we can't find the zone, assign the
+ * link to the GZ and mark it for being
+ * renamed.
+ */
+ linkp->ll_zoneid = 0;
+ rename = B_TRUE;
+ }
} else {
attr_buf_len = translators[type].read_func(curr,
&attr_buf);
@@ -811,6 +835,16 @@ parse_linkprops(char *buf, dlmgmt_link_t *linkp)
(void) snprintf(attr_name, MAXLINKATTRLEN, "%s", curr);
}
+
+ /*
+ * The zone that this link belongs to has died, we are
+ * reparenting it to the GZ and renaming it to avoid name
+ * collisions.
+ */
+ if (rename == B_TRUE) {
+ (void) snprintf(linkp->ll_link, MAXLINKNAMELEN,
+ "SUNWorphan%u", (uint16_t)(gethrtime() / 1000));
+ }
curr = buf + i + 1;
}
@@ -1222,12 +1256,19 @@ generate_link_line(dlmgmt_link_t *linkp, boolean_t persist, char *buf)
ptr += snprintf(ptr, BUFLEN(lim, ptr), "%s\t", linkp->ll_link);
if (!persist) {
+ char zname[ZONENAME_MAX];
/*
- * We store the linkid in the active database so that dlmgmtd
- * can recover in the event that it is restarted.
+ * We store the linkid and the zone name in the active database
+ * so that dlmgmtd can recover in the event that it is
+ * restarted.
*/
u64 = linkp->ll_linkid;
ptr += write_uint64(ptr, BUFLEN(lim, ptr), "linkid", &u64);
+
+ if (getzonenamebyid(linkp->ll_zoneid, zname,
+ sizeof (zname)) != -1) {
+ ptr += write_str(ptr, BUFLEN(lim, ptr), "zone", zname);
+ }
}
u64 = linkp->ll_class;
ptr += write_uint64(ptr, BUFLEN(lim, ptr), "class", &u64);
@@ -1382,38 +1423,88 @@ dlmgmt_db_walk(zoneid_t zoneid, datalink_class_t class, db_walk_func_t *func)
}
/*
+ * Attempt to mitigate one of the deadlocks in the dlmgmtd architecture.
+ *
+ * dlmgmt_db_init() calls dlmgmt_process_db_req() which eventually gets to
+ * dlmgmt_zfop() which tries to fork, enter the zone and read the file.
+ * Because of the upcall architecture of dlmgmtd this can lead to deadlock
+ * with the following scenario:
+ * a) the thread preparing to fork will have acquired the malloc locks
+ * then attempt to suspend every thread in preparation to fork.
+ * b) all of the upcalls will be blocked in door_ucred() trying to malloc()
+ * and get the credentials of their caller.
+ * c) we can't suspend the in-kernel thread making the upcall.
+ *
+ * Thus, we cannot serve door requests because we're blocked in malloc()
+ * which fork() owns, but fork() is in turn blocked on the in-kernel thread
+ * making the door upcall. This is a fundamental architectural problem with
+ * any server handling upcalls and also trying to fork().
+ *
+ * To minimize the chance of this deadlock occuring, we check ahead of time to
+ * see if the file we want to read actually exists in the zone (which it almost
+ * never does), so we don't need fork in that case (i.e. rarely to never).
+ */
+static boolean_t
+zone_file_exists(char *zoneroot, char *filename)
+{
+ struct stat sb;
+ char fname[MAXPATHLEN];
+
+ (void) snprintf(fname, sizeof (fname), "%s/%s", zoneroot, filename);
+
+ if (stat(fname, &sb) == -1)
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+/*
* Initialize the datalink <link name, linkid> mapping and the link's
* attributes list based on the configuration file /etc/dladm/datalink.conf
* and the active configuration cache file
* /etc/svc/volatile/dladm/datalink-management:default.cache.
*/
int
-dlmgmt_db_init(zoneid_t zoneid)
+dlmgmt_db_init(zoneid_t zoneid, char *zoneroot)
{
dlmgmt_db_req_t *req;
int err;
boolean_t boot = B_FALSE;
+ char tdir[MAXPATHLEN];
+ char *path = cachefile;
if ((req = dlmgmt_db_req_alloc(DLMGMT_DB_OP_READ, NULL,
DATALINK_INVALID_LINKID, zoneid, DLMGMT_ACTIVE, &err)) == NULL)
return (err);
- if ((err = dlmgmt_process_db_req(req)) != 0) {
- /*
- * If we get back ENOENT, that means that the active
- * configuration file doesn't exist yet, and is not an error.
- * We'll create it down below after we've loaded the
- * persistent configuration.
- */
- if (err != ENOENT)
- goto done;
+ /* Handle running in a non-native branded zone (i.e. has /native) */
+ if (zone_file_exists(zoneroot, "/native" DLMGMT_TMPFS_DIR)) {
+ (void) snprintf(tdir, sizeof (tdir), "/native%s", cachefile);
+ path = tdir;
+ }
+
+ if (zone_file_exists(zoneroot, path)) {
+ if ((err = dlmgmt_process_db_req(req)) != 0) {
+ /*
+ * If we get back ENOENT, that means that the active
+ * configuration file doesn't exist yet, and is not an
+ * error. We'll create it down below after we've
+ * loaded the persistent configuration.
+ */
+ if (err != ENOENT)
+ goto done;
+ boot = B_TRUE;
+ }
+ } else {
boot = B_TRUE;
}
- req->ls_flags = DLMGMT_PERSIST;
- err = dlmgmt_process_db_req(req);
- if (err != 0 && err != ENOENT)
- goto done;
+ if (zone_file_exists(zoneroot, DLMGMT_PERSISTENT_DB_PATH)) {
+ req->ls_flags = DLMGMT_PERSIST;
+ err = dlmgmt_process_db_req(req);
+ if (err != 0 && err != ENOENT)
+ goto done;
+ }
err = 0;
if (rewrite_needed) {
/*
@@ -1442,6 +1533,11 @@ done:
/*
* Remove all links in the given zoneid.
+ *
+ * We do this work in two different passes. In the first pass, we remove any
+ * entry that hasn't been loaned and mark every entry that has been loaned as
+ * something that is going to be tombstomed. In the second pass, we drop the
+ * table lock for every entry and remove the tombstombed entry for our zone.
*/
void
dlmgmt_db_fini(zoneid_t zoneid)
@@ -1451,9 +1547,66 @@ dlmgmt_db_fini(zoneid_t zoneid)
while (linkp != NULL) {
next_linkp = AVL_NEXT(&dlmgmt_name_avl, linkp);
if (linkp->ll_zoneid == zoneid) {
- (void) dlmgmt_destroy_common(linkp,
- DLMGMT_ACTIVE | DLMGMT_PERSIST);
+ boolean_t onloan = linkp->ll_onloan;
+
+ /*
+ * Cleanup any VNICs that were loaned to the zone
+ * before the zone goes away and we can no longer
+ * refer to the VNIC by the name/zoneid.
+ */
+ if (onloan) {
+ (void) dlmgmt_delete_db_entry(linkp,
+ DLMGMT_ACTIVE);
+ linkp->ll_tomb = B_TRUE;
+ } else {
+ (void) dlmgmt_destroy_common(linkp,
+ DLMGMT_ACTIVE | DLMGMT_PERSIST);
+ }
+
}
linkp = next_linkp;
}
+
+again:
+ linkp = avl_first(&dlmgmt_name_avl);
+ while (linkp != NULL) {
+ vnic_ioc_delete_t ioc;
+
+ next_linkp = AVL_NEXT(&dlmgmt_name_avl, linkp);
+
+ if (linkp->ll_zoneid != zoneid) {
+ linkp = next_linkp;
+ continue;
+ }
+ ioc.vd_vnic_id = linkp->ll_linkid;
+ if (linkp->ll_tomb != B_TRUE)
+ abort();
+
+ /*
+ * We have to drop the table lock while going up into the
+ * kernel. If we hold the table lock while deleting a vnic, we
+ * may get blocked on the mac perimeter and the holder of it may
+ * want something from dlmgmtd.
+ */
+ dlmgmt_table_unlock();
+
+ if (ioctl(dladm_dld_fd(dld_handle),
+ VNIC_IOC_DELETE, &ioc) < 0)
+ dlmgmt_log(LOG_WARNING, "dlmgmt_db_fini "
+ "delete VNIC ioctl failed %d %d",
+ ioc.vd_vnic_id, errno);
+
+ /*
+ * Even though we've dropped the lock, we know that nothing else
+ * could have removed us. Therefore, it should be safe to go
+ * through and delete ourselves, but do nothing else. We'll have
+ * to restart iteration from the beginning. This can be painful.
+ */
+ dlmgmt_table_lock(B_TRUE);
+
+ (void) dlmgmt_destroy_common(linkp,
+ DLMGMT_ACTIVE | DLMGMT_PERSIST);
+ goto again;
+ }
+
}
diff --git a/usr/src/cmd/dlmgmtd/dlmgmt_door.c b/usr/src/cmd/dlmgmtd/dlmgmt_door.c
index 11e4329669..137c2a6fb3 100644
--- a/usr/src/cmd/dlmgmtd/dlmgmt_door.c
+++ b/usr/src/cmd/dlmgmtd/dlmgmt_door.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent Inc. All rights reserved.
*/
/*
@@ -58,6 +59,10 @@
#include <libsysevent.h>
#include <libdlmgmt.h>
#include <librcm.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
#include "dlmgmt_impl.h"
typedef void dlmgmt_door_handler_t(void *, void *, size_t *, zoneid_t,
@@ -379,6 +384,11 @@ dlmgmt_upcall_destroy(void *argp, void *retp, size_t *sz, zoneid_t zoneid,
if ((err = dlmgmt_checkprivs(linkp->ll_class, cred)) != 0)
goto done;
+ if (linkp->ll_tomb == B_TRUE) {
+ err = EINPROGRESS;
+ goto done;
+ }
+
if (((linkp->ll_flags & flags) & DLMGMT_ACTIVE) != 0) {
if ((err = dlmgmt_delete_db_entry(linkp, DLMGMT_ACTIVE)) != 0)
goto done;
@@ -439,6 +449,10 @@ dlmgmt_getlinkid(void *argp, void *retp, size_t *sz, zoneid_t zoneid,
dlmgmt_link_t *linkp;
int err = 0;
+ /* Enable the global zone to lookup links it has given away. */
+ if (zoneid == GLOBAL_ZONEID && getlinkid->ld_zoneid != -1)
+ zoneid = getlinkid->ld_zoneid;
+
/*
* Hold the reader lock to access the link
*/
@@ -648,6 +662,12 @@ dlmgmt_remapid(void *argp, void *retp, size_t *sz, zoneid_t zoneid,
if ((err = dlmgmt_checkprivs(linkp->ll_class, cred)) != 0)
goto done;
+ if (linkp->ll_tomb == B_TRUE) {
+ err = EBUSY;
+ goto done;
+ }
+
+
if (link_by_name(remapid->ld_link, linkp->ll_zoneid) != NULL) {
err = EEXIST;
goto done;
@@ -709,6 +729,11 @@ dlmgmt_upid(void *argp, void *retp, size_t *sz, zoneid_t zoneid,
if ((err = dlmgmt_checkprivs(linkp->ll_class, cred)) != 0)
goto done;
+ if (linkp->ll_tomb == B_TRUE) {
+ err = EBUSY;
+ goto done;
+ }
+
if (linkp->ll_flags & DLMGMT_ACTIVE) {
err = EINVAL;
goto done;
@@ -1216,6 +1241,11 @@ dlmgmt_setzoneid(void *argp, void *retp, size_t *sz, zoneid_t zoneid,
if ((err = dlmgmt_checkprivs(linkp->ll_class, cred)) != 0)
goto done;
+ if (linkp->ll_tomb == B_TRUE) {
+ err = EBUSY;
+ goto done;
+ }
+
/* We can only assign an active link to a zone. */
if (!(linkp->ll_flags & DLMGMT_ACTIVE)) {
err = EINVAL;
@@ -1245,7 +1275,19 @@ dlmgmt_setzoneid(void *argp, void *retp, size_t *sz, zoneid_t zoneid,
"zone %d: %s", linkid, oldzoneid, strerror(err));
goto done;
}
- avl_remove(&dlmgmt_loan_avl, linkp);
+
+ if (newzoneid == GLOBAL_ZONEID && linkp->ll_onloan) {
+ /*
+ * We can only reassign a loaned VNIC back to the
+ * global zone when the zone is shutting down, since
+ * otherwise the VNIC is in use by the zone and will be
+ * busy. Leave the VNIC assigned to the zone so we can
+ * still see it and delete it when dlmgmt_zonehalt()
+ * runs.
+ */
+ goto done;
+ }
+
linkp->ll_onloan = B_FALSE;
}
if (newzoneid != GLOBAL_ZONEID) {
@@ -1256,7 +1298,6 @@ dlmgmt_setzoneid(void *argp, void *retp, size_t *sz, zoneid_t zoneid,
(void) zone_add_datalink(oldzoneid, linkid);
goto done;
}
- avl_add(&dlmgmt_loan_avl, linkp);
linkp->ll_onloan = B_TRUE;
}
@@ -1309,6 +1350,10 @@ dlmgmt_zonehalt(void *argp, void *retp, size_t *sz, zoneid_t zoneid,
int err = 0;
dlmgmt_door_zonehalt_t *zonehalt = argp;
dlmgmt_zonehalt_retval_t *retvalp = retp;
+ static char my_pid[10];
+
+ if (my_pid[0] == NULL)
+ (void) snprintf(my_pid, sizeof (my_pid), "%d\n", getpid());
if ((err = dlmgmt_checkprivs(0, cred)) == 0) {
if (zoneid != GLOBAL_ZONEID) {
@@ -1316,9 +1361,26 @@ dlmgmt_zonehalt(void *argp, void *retp, size_t *sz, zoneid_t zoneid,
} else if (zonehalt->ld_zoneid == GLOBAL_ZONEID) {
err = EINVAL;
} else {
+ /*
+ * dls and mac don't honor the locking rules defined in
+ * mac. In order to try and make that case less likely
+ * to happen, we try to serialize some of the zone
+ * activity here between dlmgmtd and the brands on
+ * /etc/dladm/zone.lck
+ */
+ int fd;
+
+ while ((fd = open(ZONE_LOCK, O_WRONLY |
+ O_CREAT | O_EXCL, S_IRUSR | S_IWUSR)) < 0)
+ (void) sleep(1);
+ (void) write(fd, my_pid, sizeof (my_pid));
+ (void) close(fd);
+
dlmgmt_table_lock(B_TRUE);
dlmgmt_db_fini(zonehalt->ld_zoneid);
dlmgmt_table_unlock();
+
+ (void) unlink(ZONE_LOCK);
}
}
retvalp->lr_err = err;
diff --git a/usr/src/cmd/dlmgmtd/dlmgmt_impl.h b/usr/src/cmd/dlmgmtd/dlmgmt_impl.h
index cdfd0d8a4d..dde27ef66e 100644
--- a/usr/src/cmd/dlmgmtd/dlmgmt_impl.h
+++ b/usr/src/cmd/dlmgmtd/dlmgmt_impl.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent Inc. All rights reserved.
*/
/*
@@ -67,6 +68,7 @@ typedef struct dlmgmt_link_s {
avl_node_t ll_loan_node;
uint32_t ll_flags;
uint32_t ll_gen; /* generation number */
+ boolean_t ll_tomb; /* tombstombed */
} dlmgmt_link_t;
/*
@@ -84,6 +86,8 @@ typedef struct dlmgmt_dlconf_s {
avl_node_t ld_node;
} dlmgmt_dlconf_t;
+#define ZONE_LOCK "/etc/dladm/zone.lck"
+
extern boolean_t debug;
extern const char *progname;
extern char cachefile[];
@@ -138,7 +142,7 @@ void dlmgmt_handler(void *, char *, size_t, door_desc_t *, uint_t);
void dlmgmt_log(int, const char *, ...);
int dlmgmt_write_db_entry(const char *, dlmgmt_link_t *, uint32_t);
int dlmgmt_delete_db_entry(dlmgmt_link_t *, uint32_t);
-int dlmgmt_db_init(zoneid_t);
+int dlmgmt_db_init(zoneid_t, char *);
void dlmgmt_db_fini(zoneid_t);
#ifdef __cplusplus
diff --git a/usr/src/cmd/dlmgmtd/dlmgmt_main.c b/usr/src/cmd/dlmgmtd/dlmgmt_main.c
index c02610bb5f..d8397cc0e6 100644
--- a/usr/src/cmd/dlmgmtd/dlmgmt_main.c
+++ b/usr/src/cmd/dlmgmtd/dlmgmt_main.c
@@ -22,6 +22,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
/*
@@ -125,15 +126,24 @@ dlmgmt_door_fini(void)
dlmgmt_door_fd = -1;
}
-int
+static int
dlmgmt_door_attach(zoneid_t zoneid, char *rootdir)
{
int fd;
int err = 0;
char doorpath[MAXPATHLEN];
+ struct stat statbuf;
- (void) snprintf(doorpath, sizeof (doorpath), "%s%s", rootdir,
- DLMGMT_DOOR);
+ /* Handle running in a non-native branded zone (i.e. has /native) */
+ (void) snprintf(doorpath, sizeof (doorpath), "%s/native%s",
+ rootdir, DLMGMT_TMPFS_DIR);
+ if (stat(doorpath, &statbuf) == 0) {
+ (void) snprintf(doorpath, sizeof (doorpath), "%s/native%s",
+ rootdir, DLMGMT_DOOR);
+ } else {
+ (void) snprintf(doorpath, sizeof (doorpath), "%s%s",
+ rootdir, DLMGMT_DOOR);
+ }
/*
* Create the door file for dlmgmtd.
@@ -192,8 +202,16 @@ dlmgmt_zone_init(zoneid_t zoneid)
(void) snprintf(tmpfsdir, sizeof (tmpfsdir), "%s%s", rootdir,
DLMGMT_TMPFS_DIR);
if (stat(tmpfsdir, &statbuf) < 0) {
- if (mkdir(tmpfsdir, (mode_t)0755) < 0)
- return (errno);
+ if (mkdir(tmpfsdir, (mode_t)0755) < 0) {
+ /*
+ * Handle running in a non-native branded zone
+ * (i.e. has /native)
+ */
+ (void) snprintf(tmpfsdir, sizeof (tmpfsdir),
+ "%s/native%s", rootdir, DLMGMT_TMPFS_DIR);
+ if (mkdir(tmpfsdir, (mode_t)0755) < 0)
+ return (errno);
+ }
} else if ((statbuf.st_mode & S_IFMT) != S_IFDIR) {
return (ENOTDIR);
}
@@ -203,7 +221,7 @@ dlmgmt_zone_init(zoneid_t zoneid)
return (EPERM);
}
- if ((err = dlmgmt_db_init(zoneid)) != 0)
+ if ((err = dlmgmt_db_init(zoneid, rootdir)) != 0)
return (err);
return (dlmgmt_door_attach(zoneid, rootdir));
}
@@ -214,7 +232,7 @@ dlmgmt_zone_init(zoneid_t zoneid)
static int
dlmgmt_allzones_init(void)
{
- int err, i;
+ int i;
zoneid_t *zids = NULL;
uint_t nzids, nzids_saved;
@@ -235,11 +253,37 @@ again:
}
for (i = 0; i < nzids; i++) {
- if ((err = dlmgmt_zone_init(zids[i])) != 0)
- break;
+ int res;
+ zone_status_t status;
+
+ /*
+ * Skip over zones that have gone away or are going down
+ * since we got the list. Process all zones in the list,
+ * logging errors for any that failed.
+ */
+ if (zone_getattr(zids[i], ZONE_ATTR_STATUS, &status,
+ sizeof (status)) < 0)
+ continue;
+ switch (status) {
+ case ZONE_IS_SHUTTING_DOWN:
+ case ZONE_IS_EMPTY:
+ case ZONE_IS_DOWN:
+ case ZONE_IS_DYING:
+ case ZONE_IS_DEAD:
+ /* FALLTHRU */
+ continue;
+ default:
+ break;
+ }
+ if ((res = dlmgmt_zone_init(zids[i])) != 0) {
+ (void) fprintf(stderr, "zone (%ld) init error %s",
+ zids[i], strerror(res));
+ dlmgmt_log(LOG_ERR, "zone (%d) init error %s",
+ zids[i], strerror(res));
+ }
}
free(zids);
- return (err);
+ return (0);
}
static int
@@ -262,6 +306,8 @@ dlmgmt_init(void)
return (err);
}
+ (void) unlink(ZONE_LOCK);
+
/*
* First derive the name of the cache file from the FMRI name. This
* cache name is used to keep active datalink configuration.
diff --git a/usr/src/cmd/dlmgmtd/dlmgmt_util.c b/usr/src/cmd/dlmgmtd/dlmgmt_util.c
index afcfbed37b..7493ee3577 100644
--- a/usr/src/cmd/dlmgmtd/dlmgmt_util.c
+++ b/usr/src/cmd/dlmgmtd/dlmgmt_util.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent Inc. All rights reserved.
*/
/*
@@ -45,13 +46,10 @@
/*
* There are three datalink AVL tables. The dlmgmt_name_avl tree contains all
* datalinks and is keyed by zoneid and link name. The dlmgmt_id_avl also
- * contains all datalinks, and it is keyed by link ID. The dlmgmt_loan_avl is
- * keyed by link name, and contains the set of global-zone links that are
- * currently on loan to non-global zones.
+ * contains all datalinks, and it is keyed by link ID.
*/
avl_tree_t dlmgmt_name_avl;
avl_tree_t dlmgmt_id_avl;
-avl_tree_t dlmgmt_loan_avl;
avl_tree_t dlmgmt_dlconf_avl;
@@ -162,8 +160,6 @@ dlmgmt_linktable_init(void)
offsetof(dlmgmt_link_t, ll_name_node));
avl_create(&dlmgmt_id_avl, cmp_link_by_id, sizeof (dlmgmt_link_t),
offsetof(dlmgmt_link_t, ll_id_node));
- avl_create(&dlmgmt_loan_avl, cmp_link_by_name, sizeof (dlmgmt_link_t),
- offsetof(dlmgmt_link_t, ll_loan_node));
avl_create(&dlmgmt_dlconf_avl, cmp_dlconf_by_id,
sizeof (dlmgmt_dlconf_t), offsetof(dlmgmt_dlconf_t, ld_node));
dlmgmt_nextlinkid = 1;
@@ -181,7 +177,6 @@ dlmgmt_linktable_fini(void)
avl_destroy(&dlmgmt_dlconf_avl);
avl_destroy(&dlmgmt_name_avl);
- avl_destroy(&dlmgmt_loan_avl);
avl_destroy(&dlmgmt_id_avl);
}
@@ -385,7 +380,6 @@ link_activate(dlmgmt_link_t *linkp)
linkp->ll_zoneid = zoneid;
avl_add(&dlmgmt_name_avl, linkp);
- avl_add(&dlmgmt_loan_avl, linkp);
linkp->ll_onloan = B_TRUE;
}
} else if (linkp->ll_zoneid != GLOBAL_ZONEID) {
@@ -430,10 +424,6 @@ link_by_name(const char *name, zoneid_t zoneid)
(void) strlcpy(link.ll_link, name, MAXLINKNAMELEN);
link.ll_zoneid = zoneid;
linkp = avl_find(&dlmgmt_name_avl, &link, NULL);
- if (linkp == NULL && zoneid == GLOBAL_ZONEID) {
- /* The link could be on loan to a non-global zone? */
- linkp = avl_find(&dlmgmt_loan_avl, &link, NULL);
- }
return (linkp);
}
@@ -461,6 +451,7 @@ dlmgmt_create_common(const char *name, datalink_class_t class, uint32_t media,
linkp->ll_linkid = dlmgmt_nextlinkid;
linkp->ll_zoneid = zoneid;
linkp->ll_gen = 0;
+ linkp->ll_tomb = B_FALSE;
if (avl_find(&dlmgmt_name_avl, linkp, &name_where) != NULL ||
avl_find(&dlmgmt_id_avl, linkp, &id_where) != NULL) {
@@ -511,8 +502,6 @@ dlmgmt_destroy_common(dlmgmt_link_t *linkp, uint32_t flags)
if ((flags & DLMGMT_ACTIVE) && linkp->ll_zoneid != GLOBAL_ZONEID) {
(void) zone_remove_datalink(linkp->ll_zoneid, linkp->ll_linkid);
- if (linkp->ll_onloan)
- avl_remove(&dlmgmt_loan_avl, linkp);
}
if (linkp->ll_flags == 0) {
diff --git a/usr/src/cmd/dlmgmtd/svc-dlmgmtd b/usr/src/cmd/dlmgmtd/svc-dlmgmtd
index 7559207535..a75e71f9b3 100644
--- a/usr/src/cmd/dlmgmtd/svc-dlmgmtd
+++ b/usr/src/cmd/dlmgmtd/svc-dlmgmtd
@@ -23,17 +23,16 @@
#
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
+# Copyright 2012 Joyent, Inc. All rights reserved.
#
-# ident "%Z%%M% %I% %E% SMI"
. /lib/svc/share/smf_include.sh
-# The real daemon is not started in a non-global zone. But we need to
-# create a dummy background process to preserve contract lifetime.
+# The real daemon is not started in a non-global zone. Exit to leave
+# an empty contract.
if smf_is_nonglobalzone; then
- (while true ; do sleep 3600 ; done) &
- exit $SMF_EXIT_OK
+ exit $SMF_EXIT_NODAEMON
fi
# Start the dlmgmtd daemon.
diff --git a/usr/src/cmd/dlstat/dlstat.c b/usr/src/cmd/dlstat/dlstat.c
index a931ba82ff..2615fdbb12 100644
--- a/usr/src/cmd/dlstat/dlstat.c
+++ b/usr/src/cmd/dlstat/dlstat.c
@@ -62,7 +62,7 @@ typedef struct link_chain_s {
struct link_chain_s *lc_next;
} link_chain_t;
-typedef void * (*stats2str_t)(const char *, void *,
+typedef void * (*stats2str_t)(const char *, const char *, void *,
char, boolean_t);
typedef struct show_state {
@@ -141,6 +141,7 @@ typedef struct total_fields_buf_s {
char t_rbytes[MAXSTATLEN];
char t_opackets[MAXSTATLEN];
char t_obytes[MAXSTATLEN];
+ char t_zone[ZONENAME_MAX];
} total_fields_buf_t;
static ofmt_field_t total_s_fields[] = {
@@ -154,6 +155,8 @@ static ofmt_field_t total_s_fields[] = {
offsetof(total_fields_buf_t, t_opackets), print_default_cb},
{ "OBYTES", 8,
offsetof(total_fields_buf_t, t_obytes), print_default_cb},
+{ "ZONE", 20,
+ offsetof(total_fields_buf_t, t_zone), print_default_cb},
{ NULL, 0, 0, NULL}};
/*
@@ -957,8 +960,8 @@ cleanup_removed_links(show_state_t *state)
}
void *
-print_total_stats(const char *linkname, void *statentry, char unit,
- boolean_t parsable)
+print_total_stats(const char *linkname, const char *zonename, void *statentry,
+ char unit, boolean_t parsable)
{
total_stat_entry_t *sentry = statentry;
total_stat_t *link_stats = &sentry->tse_stats;
@@ -970,6 +973,7 @@ print_total_stats(const char *linkname, void *statentry, char unit,
(void) snprintf(buf->t_linkname, sizeof (buf->t_linkname), "%s",
linkname);
+ (void) snprintf(buf->t_zone, sizeof (buf->t_zone), "%s", zonename);
map_to_units(buf->t_ipackets, sizeof (buf->t_ipackets),
link_stats->ts_ipackets, unit, parsable);
@@ -988,8 +992,8 @@ done:
}
void *
-print_rx_generic_ring_stats(const char *linkname, void *statentry, char unit,
- boolean_t parsable)
+print_rx_generic_ring_stats(const char *linkname, const char *zonename,
+ void *statentry, char unit, boolean_t parsable)
{
ring_stat_entry_t *sentry = statentry;
ring_stat_t *link_stats = &sentry->re_stats;
@@ -1022,8 +1026,8 @@ done:
}
void *
-print_tx_generic_ring_stats(const char *linkname, void *statentry, char unit,
- boolean_t parsable)
+print_tx_generic_ring_stats(const char *linkname, const char *zonename,
+ void *statentry, char unit, boolean_t parsable)
{
ring_stat_entry_t *sentry = statentry;
ring_stat_t *link_stats = &sentry->re_stats;
@@ -1056,8 +1060,8 @@ done:
}
void *
-print_rx_ring_stats(const char *linkname, void *statentry, char unit,
- boolean_t parsable)
+print_rx_ring_stats(const char *linkname, const char *zonename, void *statentry,
+ char unit, boolean_t parsable)
{
ring_stat_entry_t *sentry = statentry;
ring_stat_t *link_stats = &sentry->re_stats;
@@ -1090,8 +1094,8 @@ done:
}
void *
-print_tx_ring_stats(const char *linkname, void *statentry, char unit,
- boolean_t parsable)
+print_tx_ring_stats(const char *linkname, const char *zonename, void *statentry,
+ char unit, boolean_t parsable)
{
ring_stat_entry_t *sentry = statentry;
ring_stat_t *link_stats = &sentry->re_stats;
@@ -1124,8 +1128,8 @@ done:
}
void *
-print_rx_generic_lane_stats(const char *linkname, void *statentry, char unit,
- boolean_t parsable)
+print_rx_generic_lane_stats(const char *linkname, const char *zonename,
+ void *statentry, char unit, boolean_t parsable)
{
rx_lane_stat_entry_t *sentry = statentry;
rx_lane_stat_t *link_stats = &sentry->rle_stats;
@@ -1172,8 +1176,8 @@ done:
}
void *
-print_tx_generic_lane_stats(const char *linkname, void *statentry, char unit,
- boolean_t parsable)
+print_tx_generic_lane_stats(const char *linkname, const char *zonename,
+ void *statentry, char unit, boolean_t parsable)
{
tx_lane_stat_entry_t *sentry = statentry;
tx_lane_stat_t *link_stats = &sentry->tle_stats;
@@ -1217,8 +1221,8 @@ done:
}
void *
-print_rx_lane_stats(const char *linkname, void *statentry, char unit,
- boolean_t parsable)
+print_rx_lane_stats(const char *linkname, const char *zonename, void *statentry,
+ char unit, boolean_t parsable)
{
rx_lane_stat_entry_t *sentry = statentry;
rx_lane_stat_t *link_stats = &sentry->rle_stats;
@@ -1283,9 +1287,8 @@ done:
}
void *
-print_tx_lane_stats(const char *linkname, void *statentry, char unit,
- boolean_t parsable)
-{
+print_tx_lane_stats(const char *linkname, const char *zonename, void *statentry,
+ char unit, boolean_t parsable) {
tx_lane_stat_entry_t *sentry = statentry;
tx_lane_stat_t *link_stats = &sentry->tle_stats;
tx_lane_fields_buf_t *buf = NULL;
@@ -1338,8 +1341,8 @@ done:
}
void *
-print_fanout_stats(const char *linkname, void *statentry, char unit,
- boolean_t parsable)
+print_fanout_stats(const char *linkname, const char *zonename, void *statentry,
+ char unit, boolean_t parsable)
{
fanout_stat_entry_t *sentry = statentry;
fanout_stat_t *link_stats = &sentry->fe_stats;
@@ -1392,8 +1395,8 @@ done:
}
void *
-print_aggr_port_stats(const char *linkname, void *statentry, char unit,
- boolean_t parsable)
+print_aggr_port_stats(const char *linkname, const char *zonename,
+ void *statentry, char unit, boolean_t parsable)
{
aggr_port_stat_entry_t *sentry = statentry;
aggr_port_stat_t *link_stats = &sentry->ape_stats;
@@ -1470,7 +1473,8 @@ done:
void
walk_dlstat_stats(show_state_t *state, const char *linkname,
- dladm_stat_type_t stattype, dladm_stat_chain_t *diff_stat)
+ const char *zonename, dladm_stat_type_t stattype,
+ dladm_stat_chain_t *diff_stat)
{
dladm_stat_chain_t *curr;
@@ -1480,7 +1484,8 @@ walk_dlstat_stats(show_state_t *state, const char *linkname,
/* Format the raw numbers for printing */
fields_buf = state->ls_stats2str[stattype](linkname,
- curr->dc_statentry, state->ls_unit, state->ls_parsable);
+ zonename, curr->dc_statentry, state->ls_unit,
+ state->ls_parsable);
/* Print the stats */
if (fields_buf != NULL)
ofmt_print(state->ls_ofmt, fields_buf);
@@ -1495,12 +1500,20 @@ show_queried_stats(dladm_handle_t dh, datalink_id_t linkid, void *arg)
int i;
dladm_stat_chain_t *diff_stat;
char linkname[DLPI_LINKNAME_MAX];
+ char zonename[DLADM_PROP_VAL_MAX + 1];
+ char *valptr[1];
+ uint_t valcnt = 1;
if (dladm_datalink_id2info(dh, linkid, NULL, NULL, NULL, linkname,
DLPI_LINKNAME_MAX) != DLADM_STATUS_OK) {
goto done;
}
+ valptr[0] = zonename;
+ if (dladm_get_linkprop(handle, linkid, DLADM_PROP_VAL_CURRENT, "zone",
+ (char **)valptr, &valcnt) != 0)
+ zonename[0] = '\0';
+
for (i = 0; i < DLADM_STAT_NUM_STATS; i++) {
if (state->ls_stattype[i]) {
/*
@@ -1508,7 +1521,8 @@ show_queried_stats(dladm_handle_t dh, datalink_id_t linkid, void *arg)
* Stats are returned as chain of raw numbers
*/
diff_stat = query_link_stats(handle, linkid, arg, i);
- walk_dlstat_stats(state, linkname, i, diff_stat);
+ walk_dlstat_stats(state, linkname, zonename, i,
+ diff_stat);
dladm_link_stat_free(diff_stat);
}
}
@@ -1628,7 +1642,7 @@ do_show(int argc, char *argv[], const char *use)
char *o_fields_str = NULL;
char *total_stat_fields =
- "link,ipkts,rbytes,opkts,obytes";
+ "link,ipkts,rbytes,opkts,obytes,zone";
char *rx_total_stat_fields =
"link,ipkts,rbytes,intrs,polls,ch<10,ch10-50,ch>50";
char *tx_total_stat_fields =
diff --git a/usr/src/cmd/dtrace/test/cmd/jdtrace/Makefile b/usr/src/cmd/dtrace/test/cmd/jdtrace/Makefile
index 66479e77bc..ee93b7a258 100644
--- a/usr/src/cmd/dtrace/test/cmd/jdtrace/Makefile
+++ b/usr/src/cmd/dtrace/test/cmd/jdtrace/Makefile
@@ -75,7 +75,7 @@ $(PROG): $(SRCS)
$(POST_PROCESS) ; $(STRIP_STABS)
JFLAGS= -g -cp $(CLASSPATH) -d $(CLASSDIR)
-JFLAGS += -source 1.5 -target 1.6 -Xlint:all,-options
+JFLAGS += -source 1.5 -target 1.6 -Xlint:all,-options,-path
COMPILE.java=$(JAVAC) $(JFLAGS)
JAVASRC= JDTrace.java Getopt.java
diff --git a/usr/src/cmd/dtrace/test/cmd/scripts/dtest.pl b/usr/src/cmd/dtrace/test/cmd/scripts/dtest.pl
index d6f1c8c277..e7f9189822 100644
--- a/usr/src/cmd/dtrace/test/cmd/scripts/dtest.pl
+++ b/usr/src/cmd/dtrace/test/cmd/scripts/dtest.pl
@@ -566,7 +566,7 @@ $defdir = -d $dt_tst ? $dt_tst : '.';
$bindir = -d $dt_bin ? $dt_bin : '.';
if (!$opt_F) {
- my @dependencies = ("gcc", "make", "java", "perl");
+ my @dependencies = ("gcc", "cc", "make", "java", "perl", "printenv");
for my $dep (@dependencies) {
if (!inpath($dep)) {
diff --git a/usr/src/cmd/dtrace/test/tst/common/Makefile b/usr/src/cmd/dtrace/test/tst/common/Makefile
index d7ed139203..32d6a1db05 100644
--- a/usr/src/cmd/dtrace/test/tst/common/Makefile
+++ b/usr/src/cmd/dtrace/test/tst/common/Makefile
@@ -153,6 +153,14 @@ usdt/tst.forker.o: usdt/forker.h
usdt/forker.h: usdt/forker.d
$(DTRACE) -h -s usdt/forker.d -o usdt/forker.h
+ustack/tst.unpriv.exe: ustack/tst.unpriv.o ustack/unpriv_helper.o
+ $(LINK.c) -o ustack/tst.unpriv.exe \
+ ustack/tst.unpriv.o ustack/unpriv_helper.o $(LDLIBS)
+ $(POST_PROCESS) ; $(STRIP_STABS)
+
+ustack/unpriv_helper.o: ustack/unpriv_helper.d
+ $(COMPILE.d) -o ustack/unpriv_helper.o -s ustack/unpriv_helper.d
+
usdt/tst.lazyprobe.exe: usdt/tst.lazyprobe.o usdt/lazyprobe.o
$(LINK.c) -o usdt/tst.lazyprobe.exe \
usdt/tst.lazyprobe.o usdt/lazyprobe.o $(LDLIBS)
diff --git a/usr/src/cmd/dtrace/test/tst/common/java_api/Makefile b/usr/src/cmd/dtrace/test/tst/common/java_api/Makefile
index 43daede8e8..04e6cca7b4 100644
--- a/usr/src/cmd/dtrace/test/tst/common/java_api/Makefile
+++ b/usr/src/cmd/dtrace/test/tst/common/java_api/Makefile
@@ -60,7 +60,7 @@ lint:
install: all $(PROTO_TEST_JAR)
JFLAGS= -g -cp $(CLASSPATH) -d $(CLASSDIR)
-JFLAGS += -source 1.5 -target 1.6 -Xlint:all,-options,-rawtypes
+JFLAGS += -source 1.5 -target 1.6 -Xlint:all,-options,-rawtypes,-path
COMPILE.java=$(JAVAC) $(JFLAGS)
$(TEST_JAR): $(SRCDIR)/*.java
diff --git a/usr/src/cmd/dtrace/test/tst/common/llquantize/tst.range.d b/usr/src/cmd/dtrace/test/tst/common/llquantize/tst.range.d
index e2882b3f8e..a9138d2f54 100644
--- a/usr/src/cmd/dtrace/test/tst/common/llquantize/tst.range.d
+++ b/usr/src/cmd/dtrace/test/tst/common/llquantize/tst.range.d
@@ -19,10 +19,6 @@
* CDDL HEADER END
*/
-/*
- * Copyright (c) 2011, Joyent, Inc. All rights reserved.
- */
-
#pragma D option quiet
BEGIN
diff --git a/usr/src/cmd/dtrace/test/tst/common/mdb/tst.postmort.ksh b/usr/src/cmd/dtrace/test/tst/common/mdb/tst.postmort.ksh
new file mode 100644
index 0000000000..c5921b8d28
--- /dev/null
+++ b/usr/src/cmd/dtrace/test/tst/common/mdb/tst.postmort.ksh
@@ -0,0 +1,69 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2011, Joyent, Inc. All rights reserved.
+#
+
+if [ $# != 1 ]; then
+ echo expected one argument: '<'dtrace-path'>'
+ exit 2
+fi
+
+dtrace=$1
+DIR=/var/tmp/dtest.$$
+
+mkdir $DIR
+cd $DIR
+
+expected=`od -t u8 -N 8 /dev/urandom | head -1 | cut -d ' ' -f2`
+
+$dtrace -x bufpolicy=ring -x bufsize=10k -qs /dev/stdin > /dev/null 2>&1 <<EOF &
+ tick-1ms
+ /i < 10000/
+ {
+ printf("%d: expected is $expected!\n", i++);
+ }
+
+ tick-1ms
+ /i >= 10000/
+ {
+ exit(0);
+ }
+EOF
+
+background=$!
+
+#
+# Give some time for the enabling to get there...
+#
+sleep 2
+
+echo "::walk dtrace_state | ::dtrace" | mdb -k | tee test.out
+grep "expected is $expected" test.out 2> /dev/null 1>&2
+status=$?
+
+kill $background
+
+cd /
+/usr/bin/rm -rf $DIR
+
+exit $status
diff --git a/usr/src/cmd/dtrace/test/tst/common/ustack/tst.unpriv.c b/usr/src/cmd/dtrace/test/tst/common/ustack/tst.unpriv.c
new file mode 100644
index 0000000000..43ba244444
--- /dev/null
+++ b/usr/src/cmd/dtrace/test/tst/common/ustack/tst.unpriv.c
@@ -0,0 +1,7 @@
+int
+main(int argc, char *argv[])
+{
+ for (;;)
+ ;
+ return (0);
+}
diff --git a/usr/src/cmd/dtrace/test/tst/common/ustack/tst.unpriv.ksh b/usr/src/cmd/dtrace/test/tst/common/ustack/tst.unpriv.ksh
new file mode 100644
index 0000000000..26c430bff7
--- /dev/null
+++ b/usr/src/cmd/dtrace/test/tst/common/ustack/tst.unpriv.ksh
@@ -0,0 +1,68 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2011, Joyent, Inc. All rights reserved.
+#
+
+ppriv -s A=basic,dtrace_user,dtrace_proc $$
+
+if [ $# != 1 ]; then
+ echo expected one argument: '<'dtrace-path'>'
+ exit 2
+fi
+
+file=out.$$
+dtrace=$1
+
+rm -f $file
+
+dir=`/bin/dirname $tst`
+
+$dtrace -o $file -c $dir/tst.unpriv.exe -ws /dev/stdin <<EOF
+ profile-1234hz
+ /pid == \$target/
+ {
+ @[ustack(20, 8192)] = count();
+ }
+
+ tick-1s
+ {
+ secs++;
+ }
+
+ tick-1s
+ /secs > 10/
+ {
+ trace("test timed out");
+ exit(1);
+ }
+
+ profile-1234hz
+ /pid == \$target && secs > 5/
+ {
+ raise(SIGINT);
+ exit(0);
+ }
+EOF
+
+status=$?
+exit $status
diff --git a/usr/src/cmd/dtrace/test/tst/common/ustack/unpriv_helper.d b/usr/src/cmd/dtrace/test/tst/common/ustack/unpriv_helper.d
new file mode 100644
index 0000000000..eb7b0e9e9d
--- /dev/null
+++ b/usr/src/cmd/dtrace/test/tst/common/ustack/unpriv_helper.d
@@ -0,0 +1,4 @@
+dtrace:helper:ustack:
+{
+ this->otherstr = "doogle";
+}
diff --git a/usr/src/cmd/flowadm/flowadm.c b/usr/src/cmd/flowadm/flowadm.c
index 374fa1675c..34e597dc78 100644
--- a/usr/src/cmd/flowadm/flowadm.c
+++ b/usr/src/cmd/flowadm/flowadm.c
@@ -21,6 +21,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011 Joyent, Inc. All rights reserved.
*/
#include <stdio.h>
@@ -233,9 +234,9 @@ usage(void)
(void) fprintf(stderr, gettext("usage: flowadm <subcommand>"
" <args>...\n"
" add-flow [-t] -l <link> -a <attr>=<value>[,...]\n"
- "\t\t [-p <prop>=<value>,...] <flow>\n"
- " remove-flow [-t] {-l <link> | <flow>}\n"
- " show-flow [-p] [-l <link>] "
+ "\t\t [-p <prop>=<value>,...] [-z zonename] <flow>\n"
+ " remove-flow [-t] [-z zonename] {-l <link> | <flow>}\n"
+ " show-flow [-p] [-l <link>] [-z zonename] "
"[<flow>]\n\n"
" set-flowprop [-t] -p <prop>=<value>[,...] <flow>\n"
" reset-flowprop [-t] [-p <prop>,...] <flow>\n"
@@ -333,11 +334,12 @@ do_add_flow(int argc, char *argv[])
dladm_arg_list_t *proplist = NULL;
dladm_arg_list_t *attrlist = NULL;
dladm_status_t status;
+ char *zonename = NULL;
bzero(propstr, DLADM_STRSIZE);
bzero(attrstr, DLADM_STRSIZE);
- while ((option = getopt_long(argc, argv, "tR:l:a:p:",
+ while ((option = getopt_long(argc, argv, "tR:l:a:p:z:",
prop_longopts, NULL)) != -1) {
switch (option) {
case 't':
@@ -351,9 +353,6 @@ do_add_flow(int argc, char *argv[])
MAXLINKNAMELEN) >= MAXLINKNAMELEN) {
die("link name too long");
}
- if (dladm_name2info(handle, devname, &linkid, NULL,
- NULL, NULL) != DLADM_STATUS_OK)
- die("invalid link '%s'", devname);
l_arg = B_TRUE;
break;
case 'a':
@@ -368,6 +367,9 @@ do_add_flow(int argc, char *argv[])
DLADM_STRSIZE)
die("property list too long '%s'", propstr);
break;
+ case 'z':
+ zonename = optarg;
+ break;
default:
die_opterr(optopt, option);
}
@@ -376,6 +378,10 @@ do_add_flow(int argc, char *argv[])
die("link is required");
}
+ if (dladm_zname2info(handle, zonename, devname, &linkid, NULL,
+ NULL, NULL) != DLADM_STATUS_OK)
+ die("invalid link '%s'", devname);
+
opterr = 0;
index = optind;
@@ -414,11 +420,12 @@ do_remove_flow(int argc, char *argv[])
boolean_t l_arg = B_FALSE;
remove_flow_state_t state;
dladm_status_t status;
+ char *zonename = NULL;
bzero(&state, sizeof (state));
opterr = 0;
- while ((option = getopt_long(argc, argv, ":tR:l:",
+ while ((option = getopt_long(argc, argv, ":tR:l:z:",
longopts, NULL)) != -1) {
switch (option) {
case 't':
@@ -432,12 +439,11 @@ do_remove_flow(int argc, char *argv[])
MAXLINKNAMELEN) >= MAXLINKNAMELEN) {
die("link name too long");
}
- if (dladm_name2info(handle, linkname, &linkid, NULL,
- NULL, NULL) != DLADM_STATUS_OK) {
- die("invalid link '%s'", linkname);
- }
l_arg = B_TRUE;
break;
+ case 'z':
+ zonename = optarg;
+ break;
default:
die_opterr(optopt, option);
break;
@@ -458,6 +464,12 @@ do_remove_flow(int argc, char *argv[])
/* if link is specified then flow name should not be there */
if (optind == argc-1)
usage();
+
+ if (dladm_zname2info(handle, zonename, linkname, &linkid, NULL,
+ NULL, NULL) != DLADM_STATUS_OK) {
+ die("invalid link '%s'", linkname);
+ }
+
/* walk the link to find flows and remove them */
state.fs_tempop = t_arg;
state.fs_altroot = altroot;
@@ -597,11 +609,12 @@ do_show_flow(int argc, char *argv[])
ofmt_handle_t ofmt;
ofmt_status_t oferr;
uint_t ofmtflags = 0;
+ char *zonename = NULL;
bzero(&state, sizeof (state));
opterr = 0;
- while ((option = getopt_long(argc, argv, ":pPl:o:",
+ while ((option = getopt_long(argc, argv, ":pPl:o:z:",
longopts, NULL)) != -1) {
switch (option) {
case 'p':
@@ -622,17 +635,23 @@ do_show_flow(int argc, char *argv[])
if (strlcpy(linkname, optarg, MAXLINKNAMELEN)
>= MAXLINKNAMELEN)
die("link name too long\n");
- if (dladm_name2info(handle, linkname, &linkid, NULL,
- NULL, NULL) != DLADM_STATUS_OK)
- die("invalid link '%s'", linkname);
l_arg = B_TRUE;
break;
+ case 'z':
+ zonename = optarg;
+ break;
default:
die_opterr(optopt, option);
break;
}
}
+ if (l_arg) {
+ if (dladm_zname2info(handle, zonename, linkname, &linkid, NULL,
+ NULL, NULL) != DLADM_STATUS_OK)
+ die("invalid link '%s'", linkname);
+ }
+
/* get flow name (optional last argument */
if (optind == (argc-1)) {
if (strlcpy(flowname, argv[optind], MAXFLOWNAMELEN)
diff --git a/usr/src/cmd/flowstat/flowstat.c b/usr/src/cmd/flowstat/flowstat.c
index 1f8c65c957..faabd74a14 100644
--- a/usr/src/cmd/flowstat/flowstat.c
+++ b/usr/src/cmd/flowstat/flowstat.c
@@ -21,6 +21,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011 Joyent, Inc. All rights reserved.
*/
#include <stdio.h>
@@ -190,9 +191,9 @@ static char *progname;
static dladm_handle_t handle = NULL;
const char *usage_ermsg = "flowstat [-r | -t] [-i interval] "
- "[-l link] [flow]\n"
+ "[-l link] [-z zonename] [flow]\n"
" flowstat [-A] [-i interval] [-p] [ -o field[,...]]\n"
- " [-u R|K|M|G|T|P] [-l link] [flow]\n"
+ " [-u R|K|M|G|T|P] [-l link] [-z zonename] [flow]\n"
" flowstat -h [-a] [-d] [-F format]"
" [-s <DD/MM/YYYY,HH:MM:SS>]\n"
" [-e <DD/MM/YYYY,HH:MM:SS>] -f <logfile> "
@@ -555,6 +556,7 @@ main(int argc, char *argv[])
show_flow_state_t state;
char *fields_str = NULL;
char *o_fields_str = NULL;
+ char *zonename = NULL;
char *total_stat_fields =
"flow,ipkts,rbytes,ierrs,opkts,obytes,oerrs";
@@ -581,10 +583,11 @@ main(int argc, char *argv[])
if ((status = dladm_open(&handle)) != DLADM_STATUS_OK)
die_dlerr(status, "could not open /dev/dld");
+ linkname[0] = '\0';
bzero(&state, sizeof (state));
opterr = 0;
- while ((option = getopt_long(argc, argv, ":rtApi:o:u:l:h",
+ while ((option = getopt_long(argc, argv, ":rtApi:o:u:l:hz:",
NULL, NULL)) != -1) {
switch (option) {
case 'r':
@@ -636,9 +639,6 @@ main(int argc, char *argv[])
if (strlcpy(linkname, optarg, MAXLINKNAMELEN)
>= MAXLINKNAMELEN)
die("link name too long\n");
- if (dladm_name2info(handle, linkname, &linkid, NULL,
- NULL, NULL) != DLADM_STATUS_OK)
- die("invalid link '%s'", linkname);
break;
case 'h':
if (r_arg || t_arg || p_arg || o_arg || u_arg ||
@@ -649,6 +649,9 @@ main(int argc, char *argv[])
do_show_history(argc, argv);
return (0);
break;
+ case 'z':
+ zonename = optarg;
+ break;
default:
die_opterr(optopt, option, usage_ermsg);
break;
@@ -672,6 +675,12 @@ main(int argc, char *argv[])
die("the option -A is not compatible with "
"-r, -t, -p, -o, -u, -i");
+ if (linkname[0] != '\0') {
+ if (dladm_zname2info(handle, zonename, linkname, &linkid, NULL,
+ NULL, NULL) != DLADM_STATUS_OK)
+ die("invalid link '%s'", linkname);
+ }
+
/* get flow name (optional last argument) */
if (optind == (argc-1)) {
if (strlcpy(flowname, argv[optind], MAXFLOWNAMELEN)
diff --git a/usr/src/cmd/fs.d/Makefile b/usr/src/cmd/fs.d/Makefile
index 3c36817494..54460a6084 100644
--- a/usr/src/cmd/fs.d/Makefile
+++ b/usr/src/cmd/fs.d/Makefile
@@ -19,6 +19,7 @@
# CDDL HEADER END
#
# Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2012, Joyent, Inc. All rights reserved.
# Copyright 2015 Nexenta Systems, Inc. All rights reserved.
#
@@ -39,8 +40,8 @@ DEFAULTFILES= fs.dfl
include ../Makefile.cmd
-SUBDIR1= bootfs lofs zfs
-SUBDIR2= dev fd pcfs nfs hsfs proc ctfs udfs ufs tmpfs \
+SUBDIR1= bootfs hyprlofs lofs zfs
+SUBDIR2= dev fd pcfs nfs hsfs lxproc proc ctfs udfs ufs tmpfs \
autofs mntfs objfs sharefs smbclnt reparsed
SUBDIRS= $(SUBDIR1) $(SUBDIR2)
I18NDIRS= $(SUBDIR2)
diff --git a/usr/src/cmd/fs.d/hyprlofs/Makefile b/usr/src/cmd/fs.d/hyprlofs/Makefile
new file mode 100644
index 0000000000..1a3aaf18d3
--- /dev/null
+++ b/usr/src/cmd/fs.d/hyprlofs/Makefile
@@ -0,0 +1,42 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2012, Joyent, Inc. All rights reserved
+#
+
+SUBDIRS= hlcfg mount
+
+all:= TARGET= all
+install:= TARGET= install
+clean:= TARGET= clean
+clobber:= TARGET= clobber
+lint:= TARGET= lint
+
+.KEEP_STATE:
+
+.PARALLEL: $(SUBDIRS)
+
+all install clean clobber lint: $(SUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/cmd/fs.d/hyprlofs/hlcfg/Makefile b/usr/src/cmd/fs.d/hyprlofs/hlcfg/Makefile
new file mode 100644
index 0000000000..d2ae22e9fd
--- /dev/null
+++ b/usr/src/cmd/fs.d/hyprlofs/hlcfg/Makefile
@@ -0,0 +1,30 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2012, Joyent, Inc. All rights reserved.
+#
+
+FSTYPE= hyprlofs
+LIBPROG= hlcfg
+
+include ../../Makefile.fstype
+include ../../Makefile.mount
+include ../../Makefile.mount.targ
diff --git a/usr/src/cmd/fs.d/hyprlofs/hlcfg/hlcfg.c b/usr/src/cmd/fs.d/hyprlofs/hlcfg/hlcfg.c
new file mode 100644
index 0000000000..16e8e32b1c
--- /dev/null
+++ b/usr/src/cmd/fs.d/hyprlofs/hlcfg/hlcfg.c
@@ -0,0 +1,244 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2012, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * This is a simple test program to exercise the hyprlofs ioctls. This is
+ * not designed as a full featured CLI and only does minimal error checking
+ * and reporting.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <strings.h>
+#include <sys/errno.h>
+#include <sys/fs/hyprlofs.h>
+
+extern int errno;
+
+char *usage = "usage: <fs path> add [<file name> <alias>]+\n"
+ " <fs path> addl [<file name>]+\n"
+ " <fs path> rm [<alias>]+\n"
+ " <fs path> clear"
+ " <fs path> get";
+
+typedef enum {
+ CMD_ADD,
+ CMD_RM,
+ CMD_CLR,
+ CMD_ADDL,
+ CMD_GET
+} cmd_t;
+
+static int
+get_entries(int fd)
+{
+ int err;
+ int i;
+ hyprlofs_curr_entries_t e;
+ hyprlofs_curr_entry_t *ep;
+
+ e.hce_cnt = 0;
+ e.hce_entries = NULL;
+
+ err = ioctl(fd, HYPRLOFS_GET_ENTRIES, &e);
+ if (err != 0 && errno != E2BIG) {
+ perror("ioctl");
+ return (1);
+ }
+
+ if (err == 0) {
+ (void) printf("success, but no entries\n");
+ return (0);
+ }
+
+ /*
+ * E2BIG is what we expect when there are existing mappings
+ * since the current cnt is still returned in that case.
+ */
+ (void) printf("cnt: %d\n", e.hce_cnt);
+
+ /* alloc array and call again, then print array */
+ if ((ep = (hyprlofs_curr_entry_t *)
+ malloc(sizeof (hyprlofs_curr_entry_t) * e.hce_cnt)) == NULL) {
+ (void) fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+
+ e.hce_entries = ep;
+ errno = 0;
+ if (ioctl(fd, HYPRLOFS_GET_ENTRIES, &e) != 0) {
+ /*
+ * Not handling an increase here. We would need to free and
+ * start over to do that, but ok for a test program.
+ */
+ perror("ioctl");
+ free(ep);
+ return (1);
+ }
+ for (i = 0; i < e.hce_cnt; i++)
+ (void) printf("%s %s\n", ep[i].hce_path, ep[i].hce_name);
+
+ free(ep);
+ return (0);
+}
+
+int
+main(int argc, char **argv)
+{
+ int i, ap;
+ cmd_t cmd;
+ int cnt = 0;
+ int fd;
+ int rv = 0;
+ hyprlofs_entry_t *e = NULL;
+ hyprlofs_entries_t ents;
+
+ if (argc < 3) {
+ (void) fprintf(stderr, "%s\n", usage);
+ exit(1);
+ }
+
+ if ((fd = open(argv[1], O_RDONLY)) < 0) {
+ perror("can't open hyprlofs mount");
+ exit(1);
+ }
+
+ if (strcmp(argv[2], "add") == 0) {
+ cmd = CMD_ADD;
+ } else if (strcmp(argv[2], "rm") == 0) {
+ cmd = CMD_RM;
+ } else if (strcmp(argv[2], "clear") == 0) {
+ cmd = CMD_CLR;
+ } else if (strcmp(argv[2], "addl") == 0) {
+ cmd = CMD_ADDL;
+ } else if (strcmp(argv[2], "get") == 0) {
+ cmd = CMD_GET;
+ } else {
+ (void) fprintf(stderr, "%s\n", usage);
+ exit(1);
+ }
+
+ /* Count up the number of parameters. The arg format varies w/ cmd */
+ switch (cmd) {
+ case CMD_ADD:
+ for (i = 3; i < argc; i++) {
+ /* argv[i] is the file path */
+
+ /* The next arg is the alias */
+ if (++i >= argc) {
+ (void) fprintf(stderr, "missing alias for %s\n",
+ argv[i - 1]);
+ exit(1);
+ }
+
+ cnt++;
+ }
+ break;
+ case CMD_ADDL:
+ cnt = argc - 3;
+ break;
+ case CMD_RM:
+ cnt = argc - 3;
+ break;
+ case CMD_CLR: /*FALLTHRU*/
+ case CMD_GET:
+ if (argc > 3) {
+ (void) fprintf(stderr, "%s\n", usage);
+ exit(1);
+ }
+ break;
+ }
+
+ if (cnt > 0) {
+ if ((e = (hyprlofs_entry_t *)malloc(sizeof (hyprlofs_entry_t) *
+ cnt)) == NULL) {
+ (void) fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+ }
+
+ /*
+ * Format up the args.
+ * We only setup the path member for the add cmd.
+ * We won't run this loop for the clear cmd.
+ * The addl command is special since we use basename to get the alias.
+ */
+ for (i = 0, ap = 3; i < cnt; i++, ap++) {
+ if (cmd == CMD_ADDL) {
+ e[i].hle_path = argv[ap];
+ e[i].hle_plen = strlen(e[i].hle_path);
+
+ e[i].hle_name = basename(argv[ap]);
+ e[i].hle_nlen = strlen(e[i].hle_name);
+
+ continue;
+ }
+
+ if (cmd == CMD_ADD) {
+ e[i].hle_path = argv[ap++];
+ e[i].hle_plen = strlen(e[i].hle_path);
+ }
+
+ e[i].hle_name = argv[ap];
+ e[i].hle_nlen = strlen(e[i].hle_name);
+ }
+
+ ents.hle_entries = e;
+ ents.hle_len = cnt;
+
+ switch (cmd) {
+ case CMD_ADD: /*FALLTHRU*/
+ case CMD_ADDL:
+ if (ioctl(fd, HYPRLOFS_ADD_ENTRIES, &ents) < 0) {
+ perror("ioctl");
+ rv = 1;
+ }
+ break;
+ case CMD_RM:
+ if (ioctl(fd, HYPRLOFS_RM_ENTRIES, &ents) < 0) {
+ perror("ioctl");
+ rv = 1;
+ }
+ break;
+ case CMD_CLR:
+ if (ioctl(fd, HYPRLOFS_RM_ALL) < 0) {
+ perror("ioctl");
+ rv = 1;
+ }
+ break;
+ case CMD_GET:
+ rv = get_entries(fd);
+ break;
+ }
+
+ (void) close(fd);
+ if (cnt > 0)
+ free(e);
+ return (rv);
+}
diff --git a/usr/src/cmd/fs.d/hyprlofs/mount/Makefile b/usr/src/cmd/fs.d/hyprlofs/mount/Makefile
new file mode 100644
index 0000000000..a0b63d211c
--- /dev/null
+++ b/usr/src/cmd/fs.d/hyprlofs/mount/Makefile
@@ -0,0 +1,31 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2012 Joyent, Inc. All rights reserved.
+#
+
+FSTYPE= hyprlofs
+LIBPROG= mount
+
+include ../../Makefile.fstype
+include ../../Makefile.mount
+include ../../Makefile.mount.targ
diff --git a/usr/src/cmd/fs.d/hyprlofs/mount/mount.c b/usr/src/cmd/fs.d/hyprlofs/mount/mount.c
new file mode 100644
index 0000000000..a95c9ca3c2
--- /dev/null
+++ b/usr/src/cmd/fs.d/hyprlofs/mount/mount.c
@@ -0,0 +1,148 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
+ */
+
+#define HLFS
+#define MNTTYPE_HYFS "hyprlofs"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <libintl.h>
+#include <errno.h>
+#include <sys/fstyp.h>
+#include <sys/fsid.h>
+#include <sys/mntent.h>
+#include <sys/mnttab.h>
+#include <sys/mount.h>
+#include <sys/signal.h>
+#include <sys/stat.h>
+#include <fslib.h>
+
+#define RET_OK 0
+/*
+ * /sbin/mount and the fs-local method understand this exit code to
+ * mean that all the mount failures were related to hyprlofs mounts. Since
+ * this program only attempts to mount hyfs file systems, when it fails
+ * it returns this exit status.
+ */
+#define RET_ERR 111
+
+static void usage(void);
+
+static char optbuf[MAX_MNTOPT_STR] = { '\0', };
+static int optsize = 0;
+
+static char fstype[] = MNTTYPE_HYFS;
+
+/*
+ * usage: mount [-Ormq] [-o options] special mountp
+ *
+ * This mount program is exec'ed by /usr/sbin/mount if '-F hyprlofs' is
+ * specified.
+ */
+int
+main(int argc, char *argv[])
+{
+ int c;
+ char *special; /* Entity being mounted */
+ char *mountp; /* Entity being mounted on */
+ char *savedoptbuf;
+ char *myname;
+ char typename[64];
+ int flags = 0;
+ int errflag = 0;
+ int qflg = 0;
+
+ myname = strrchr(argv[0], '/');
+ myname = myname ? myname+1 : argv[0];
+ (void) snprintf(typename, sizeof (typename), "%s %s", fstype, myname);
+ argv[0] = typename;
+
+ while ((c = getopt(argc, argv, "o:rmOq")) != EOF) {
+ switch (c) {
+ case '?':
+ errflag++;
+ break;
+
+ case 'o':
+ if (strlcpy(optbuf, optarg, sizeof (optbuf)) >=
+ sizeof (optbuf)) {
+ (void) fprintf(stderr,
+ gettext("%s: Invalid argument: %s\n"),
+ myname, optarg);
+ return (2);
+ }
+ optsize = strlen(optbuf);
+ break;
+ case 'O':
+ flags |= MS_OVERLAY;
+ break;
+ case 'r':
+ flags |= MS_RDONLY;
+ break;
+
+ case 'm':
+ flags |= MS_NOMNTTAB;
+ break;
+
+ case 'q':
+ qflg = 1;
+ break;
+
+ default:
+ usage();
+ }
+ }
+ if ((argc - optind != 2) || errflag) {
+ usage();
+ }
+ special = argv[argc - 2];
+ mountp = argv[argc - 1];
+
+ if ((savedoptbuf = strdup(optbuf)) == NULL) {
+ (void) fprintf(stderr, gettext("%s: out of memory\n"),
+ myname);
+ exit(2);
+ }
+ if (mount(special, mountp, flags | MS_OPTIONSTR, fstype, NULL, 0,
+ optbuf, MAX_MNTOPT_STR)) {
+ (void) fprintf(stderr, "mount: ");
+ perror(special);
+ exit(RET_ERR);
+ }
+ if (optsize && !qflg)
+ cmp_requested_to_actual_options(savedoptbuf, optbuf,
+ special, mountp);
+ return (0);
+}
+
+void
+usage(void)
+{
+ (void) fprintf(stderr,
+ "Usage: mount [-Ormq] [-o options] special mountpoint\n");
+ exit(RET_ERR);
+}
diff --git a/usr/src/cmd/fs.d/lxproc/Makefile b/usr/src/cmd/fs.d/lxproc/Makefile
new file mode 100644
index 0000000000..77075ef1dd
--- /dev/null
+++ b/usr/src/cmd/fs.d/lxproc/Makefile
@@ -0,0 +1,32 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+FSTYPE= lxproc
+LIBPROG= mount
+
+include ../Makefile.fstype
+include ../Makefile.mount
+include ../Makefile.mount.targ
diff --git a/usr/src/cmd/fs.d/lxproc/mount.c b/usr/src/cmd/fs.d/lxproc/mount.c
new file mode 100644
index 0000000000..5a000997bd
--- /dev/null
+++ b/usr/src/cmd/fs.d/lxproc/mount.c
@@ -0,0 +1,140 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <libintl.h>
+#include <errno.h>
+#include <sys/fstyp.h>
+#include <sys/fsid.h>
+#include <sys/mntent.h>
+#include <sys/mnttab.h>
+#include <sys/mount.h>
+#include <sys/signal.h>
+#include <sys/stat.h>
+#include <fslib.h>
+
+#define RET_OK 0
+#define RET_ERR 33
+
+static void usage(void);
+
+static char optbuf[MAX_MNTOPT_STR] = { '\0', };
+static int optsize = 0;
+
+static char fstype[] = "lxproc";
+
+/*
+ * usage: mount [-Ormq] [-o options] special mountp
+ *
+ * This mount program is exec'ed by /usr/sbin/mount if '-F lxproc' is
+ * specified.
+ */
+int
+main(int argc, char *argv[])
+{
+ int c;
+ char *special; /* Entity being mounted */
+ char *mountp; /* Entity being mounted on */
+ char *savedoptbuf;
+ char *myname;
+ char typename[64];
+ int flags = 0;
+ int errflag = 0;
+ int qflg = 0;
+
+ myname = strrchr(argv[0], '/');
+ myname = myname ? myname+1 : argv[0];
+ (void) snprintf(typename, sizeof (typename), "%s %s", fstype, myname);
+ argv[0] = typename;
+
+ while ((c = getopt(argc, argv, "o:rmOq")) != EOF) {
+ switch (c) {
+ case '?':
+ errflag++;
+ break;
+
+ case 'o':
+ if (strlcpy(optbuf, optarg, sizeof (optbuf)) >=
+ sizeof (optbuf)) {
+ (void) fprintf(stderr,
+ gettext("%s: Invalid argument: %s\n"),
+ myname, optarg);
+ return (2);
+ }
+ optsize = strlen(optbuf);
+ break;
+ case 'O':
+ flags |= MS_OVERLAY;
+ break;
+ case 'r':
+ flags |= MS_RDONLY;
+ break;
+
+ case 'm':
+ flags |= MS_NOMNTTAB;
+ break;
+
+ case 'q':
+ qflg = 1;
+ break;
+
+ default:
+ usage();
+ }
+ }
+ if ((argc - optind != 2) || errflag) {
+ usage();
+ }
+ special = argv[argc - 2];
+ mountp = argv[argc - 1];
+
+ if ((savedoptbuf = strdup(optbuf)) == NULL) {
+ (void) fprintf(stderr, gettext("%s: out of memory\n"),
+ myname);
+ exit(2);
+ }
+ if (mount(special, mountp, flags | MS_OPTIONSTR, fstype, NULL, 0,
+ optbuf, MAX_MNTOPT_STR)) {
+ (void) fprintf(stderr, "mount: ");
+ perror(special);
+ exit(RET_ERR);
+ }
+ if (optsize && !qflg)
+ cmp_requested_to_actual_options(savedoptbuf, optbuf,
+ special, mountp);
+ return (0);
+}
+
+void
+usage(void)
+{
+ (void) fprintf(stderr,
+ "Usage: mount [-Ormq] [-o options] special mountpoint\n");
+ exit(RET_ERR);
+}
diff --git a/usr/src/cmd/fs.d/nfs/lib/smfcfg.c b/usr/src/cmd/fs.d/nfs/lib/smfcfg.c
index 892f0cd052..4f9399143c 100644
--- a/usr/src/cmd/fs.d/nfs/lib/smfcfg.c
+++ b/usr/src/cmd/fs.d/nfs/lib/smfcfg.c
@@ -22,6 +22,7 @@
/*
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#include <stdio.h>
@@ -344,8 +345,23 @@ fs_smf_get_prop(smf_fstype_t fstype, char *prop_name, char *cbuf,
} else {
ret = scf_error();
}
- if ((ret != 0) && scf_error() != SCF_ERROR_NONE)
- fprintf(stdout, gettext("%s\n"), scf_strerror(ret));
+ if ((ret != 0) && scf_error() != SCF_ERROR_NONE) {
+ /*
+ * This is a workaround for the NFS service manifests not
+ * containing the proper properties in local zones.
+ *
+ * When in a local zone and the property doesn't exist on an NFS
+ * service (most likely nfs/server or nfs/client), don't print
+ * the error. The caller will still see the correct error code,
+ * but a user creating a delegated dataset or mounting an NFS
+ * share won't see this spurious error.
+ */
+ if (getzoneid() == GLOBAL_ZONEID ||
+ scf_error() != SCF_ERROR_NOT_FOUND) {
+ fprintf(stdout, gettext("%s\n"), scf_strerror(ret));
+ }
+ }
+
out:
fs_smf_fini(phandle);
return (ret);
diff --git a/usr/src/cmd/fs.d/nfs/lib/smfcfg.h b/usr/src/cmd/fs.d/nfs/lib/smfcfg.h
index c06327d801..f0b70907aa 100644
--- a/usr/src/cmd/fs.d/nfs/lib/smfcfg.h
+++ b/usr/src/cmd/fs.d/nfs/lib/smfcfg.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#ifndef _SMFCFG_H
@@ -42,6 +43,7 @@
#include <locale.h>
#include <errno.h>
#include <sys/types.h>
+#include <zone.h>
#ifdef __cplusplus
extern "C" {
diff --git a/usr/src/cmd/fs.d/nfs/mount/Makefile b/usr/src/cmd/fs.d/nfs/mount/Makefile
index dad33922a3..c2485208a8 100644
--- a/usr/src/cmd/fs.d/nfs/mount/Makefile
+++ b/usr/src/cmd/fs.d/nfs/mount/Makefile
@@ -19,18 +19,12 @@
# CDDL HEADER END
#
# Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2014, Joyent, Inc. All rights reserved.
#
# cmd/fs.d/nfs/mount/Makefile
FSTYPE= nfs
LIBPROG= mount
-ROOTFS_PROG= $(LIBPROG)
-
-# duplicate ROOTLIBFSTYPE value needed for installation rule
-# we must define this before including Makefile.fstype
-ROOTLIBFSTYPE = $(ROOT)/usr/lib/fs/$(FSTYPE)
-$(ROOTLIBFSTYPE)/%: $(ROOTLIBFSTYPE) %
- $(RM) $@; $(SYMLINK) ../../../../etc/fs/$(FSTYPE)/$(LIBPROG) $@
include ../../Makefile.fstype
@@ -65,7 +59,7 @@ CLOBBERFILES += $(LIBPROG)
.KEEP_STATE:
-all: $(ROOTFS_PROG)
+all: $(LIBPROG)
$(LIBPROG): webnfs.h $(OBJS)
$(LINK.c) -o $@ $(OBJS) $(LDLIBS)
@@ -114,7 +108,9 @@ $(POFILE): $(SRCS)
sed "/^domain/d" messages.po > $@
$(RM) $(POFILE).i messages.po
-install: $(ROOTETCPROG)
+install: all $(FSTYPEPROG)
+ $(RM) $(ROOTETCPROG)
+ $(SYMLINK) ../../../usr/lib/fs/$(FSTYPE)/$(LIBPROG) $(ROOTETCPROG)
lint: webnfs.h webnfs_xdr.c webnfs_client.c lint_SRCS
diff --git a/usr/src/cmd/fs.d/nfs/mount/mount.c b/usr/src/cmd/fs.d/nfs/mount/mount.c
index c228a91d05..b3b255d098 100644
--- a/usr/src/cmd/fs.d/nfs/mount/mount.c
+++ b/usr/src/cmd/fs.d/nfs/mount/mount.c
@@ -2104,7 +2104,7 @@ get_fh(struct nfs_args *args, char *fshost, char *fspath, int *versp,
}
while ((cl = clnt_create_vers(fshost, MOUNTPROG, &outvers,
- vers_min, vers_to_try, "datagram_v")) == NULL) {
+ vers_min, vers_to_try, NULL)) == NULL) {
if (rpc_createerr.cf_stat == RPC_UNKNOWNHOST) {
pr_err(gettext("%s: %s\n"), fshost,
clnt_spcreateerror(""));
diff --git a/usr/src/cmd/fs.d/nfs/mountd/mountd.c b/usr/src/cmd/fs.d/nfs/mountd/mountd.c
index f0a6735a68..448d649985 100644
--- a/usr/src/cmd/fs.d/nfs/mountd/mountd.c
+++ b/usr/src/cmd/fs.d/nfs/mountd/mountd.c
@@ -22,6 +22,7 @@
/*
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
@@ -405,6 +406,13 @@ main(int argc, char *argv[])
exit(1);
}
+ /* Mountd cannot run in a non-global zone. */
+ if (getzoneid() != GLOBAL_ZONEID) {
+ (void) fprintf(stderr, "%s: can only run in the global zone\n",
+ argv[0]);
+ exit(1);
+ }
+
if (getrlimit(RLIMIT_NOFILE, &rl) != 0) {
syslog(LOG_ERR, "getrlimit failed");
} else {
diff --git a/usr/src/cmd/fs.d/nfs/nfsd/nfsd.c b/usr/src/cmd/fs.d/nfs/nfsd/nfsd.c
index 6c0e0bda5e..c34c39a13e 100644
--- a/usr/src/cmd/fs.d/nfs/nfsd/nfsd.c
+++ b/usr/src/cmd/fs.d/nfs/nfsd/nfsd.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -176,6 +177,13 @@ main(int ac, char *av[])
exit(1);
}
+ /* Nfsd cannot run in a non-global zone. */
+ if (getzoneid() != GLOBAL_ZONEID) {
+ (void) fprintf(stderr, "%s: can only run in the global zone\n",
+ av[0]);
+ exit(1);
+ }
+
(void) enable_extended_FILE_stdio(-1, -1);
/*
diff --git a/usr/src/cmd/fs.d/nfs/svc/nfs-server b/usr/src/cmd/fs.d/nfs/svc/nfs-server
index 11a54fea8a..5c8c1a67dd 100644
--- a/usr/src/cmd/fs.d/nfs/svc/nfs-server
+++ b/usr/src/cmd/fs.d/nfs/svc/nfs-server
@@ -53,13 +53,13 @@ configure_ipfilter()
#
# Nothing to do if:
+ # - service's policy is 'use_global'
# - ipfilter isn't online
# - global policy is 'custom'
- # - service's policy is 'use_global'
#
+ [ "`get_policy $SMF_FMRI`" = "use_global" ] && return 0
service_check_state $IPF_FMRI $SMF_ONLINE || return 0
[ "`get_global_def_policy`" = "custom" ] && return 0
- [ "`get_policy $SMF_FMRI`" = "use_global" ] && return 0
svcadm restart $IPF_FMRI
}
diff --git a/usr/src/cmd/fs.d/nfs/umount/umount.c b/usr/src/cmd/fs.d/nfs/umount/umount.c
index aabdc8a592..66d280bcdb 100644
--- a/usr/src/cmd/fs.d/nfs/umount/umount.c
+++ b/usr/src/cmd/fs.d/nfs/umount/umount.c
@@ -297,7 +297,7 @@ retry:
*/
timep = (quick ? &create_timeout : NULL);
cl = clnt_create_timed(list[i].host, MOUNTPROG, vers,
- "datagram_n", timep);
+ NULL, timep);
/*
* Do not print any error messages in case of forced
* unmount.
diff --git a/usr/src/cmd/halt/halt.c b/usr/src/cmd/halt/halt.c
index 36867c88dc..3b57e02c64 100644
--- a/usr/src/cmd/halt/halt.c
+++ b/usr/src/cmd/halt/halt.c
@@ -21,6 +21,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011 Joyent, Inc. All rights reserved.
*/
/*
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
@@ -1236,6 +1237,17 @@ do_archives_update(int do_fast_reboot)
pid_t pid;
char *cmd_argv[MAXARGS];
+#if defined(__i386)
+ {
+ /*
+ * bootadm will complain and exit if not a grub root, so
+ * just skip running it.
+ */
+ struct stat sb;
+ if (stat("/boot/grub/stage2", &sb) == -1)
+ return;
+ }
+#endif /* __i386 */
cmd_argv[i++] = "/sbin/bootadm";
cmd_argv[i++] = "-ea";
@@ -1300,7 +1312,7 @@ main(int argc, char *argv[])
optstring = "dlnqfp";
usage = gettext("usage: %s [ -dlnq(p|f) ] [ boot args ]\n");
#endif
- cmd = A_SHUTDOWN;
+ cmd = A_REBOOT;
fcn = AD_BOOT;
} else {
(void) fprintf(stderr,
@@ -1498,7 +1510,8 @@ main(int argc, char *argv[])
* check_zone_haltedness later on.
*/
if (zoneid == GLOBAL_ZONEID && cmd != A_DUMP) {
- need_check_zones = halt_zones();
+ if (!qflag)
+ need_check_zones = halt_zones();
}
#if defined(__i386)
@@ -1598,7 +1611,7 @@ main(int argc, char *argv[])
(void) signal(SIGINT, SIG_IGN);
- if (!qflag && !nosync) {
+ if (!nosync) {
struct utmpx wtmpx;
bzero(&wtmpx, sizeof (struct utmpx));
diff --git a/usr/src/cmd/ibd_upgrade/ibd_delete_link.c b/usr/src/cmd/ibd_upgrade/ibd_delete_link.c
index b9d10a56cd..f63630207d 100644
--- a/usr/src/cmd/ibd_upgrade/ibd_delete_link.c
+++ b/usr/src/cmd/ibd_upgrade/ibd_delete_link.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent Inc. All rights reserved.
*/
#include <stdio.h>
@@ -86,6 +87,7 @@ ibd_delete_link(dladm_handle_t dlh, char *link)
getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID;
(void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN);
+ getlinkid.ld_zoneid = -1;
if ((status = ibd_dladm_door_call(dlh, &getlinkid, sizeof (getlinkid),
&retval, sizeof (retval))) != DLADM_STATUS_OK) {
diff --git a/usr/src/cmd/init/Makefile b/usr/src/cmd/init/Makefile
index a867a0e780..b4b857882b 100644
--- a/usr/src/cmd/init/Makefile
+++ b/usr/src/cmd/init/Makefile
@@ -25,11 +25,13 @@
#
PROG= init
+OBJS= init.o
ROOTFS_PROG= $(PROG)
DEFAULTFILES= init.dfl
include ../Makefile.cmd
+include ../Makefile.ctf
LDLIBS += -lpam -lbsm -lcontract -lscf
CERRWARN += -_gcc=-Wno-parentheses
@@ -41,6 +43,10 @@ CLOBBERFILES= $(STATIC)
all: $(ROOTFS_PROG)
+$(ROOTFS_PROG): $(OBJS)
+ $(LINK.c) -o $@ $(OBJS) $(LDLIBS)
+ $(POST_PROCESS)
+
install: all $(ROOTETCDEFAULTFILES) $(ROOTSBINPROG)
$(RM) $(ROOTETCPROG)
$(RM) $(ROOTUSRSBINPROG)
@@ -58,4 +64,8 @@ clean:
lint: lint_PROG
+%.o: %.c
+ $(COMPILE.c) $<
+ $(POST_PROCESS_O)
+
include ../Makefile.targ
diff --git a/usr/src/cmd/init/init.c b/usr/src/cmd/init/init.c
index 031a053b65..fa1b9d687c 100644
--- a/usr/src/cmd/init/init.c
+++ b/usr/src/cmd/init/init.c
@@ -23,6 +23,7 @@
* Copyright (c) 2013 Gary Mills
*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -144,6 +145,8 @@
#define UT_USER_SZ 32 /* Size of a utmpx ut_user field */
#define UT_LINE_SZ 32 /* Size of a utmpx ut_line field */
+#define CHECK_SVC SCF_INSTANCE_FS_MINIMAL
+
/*
* SLEEPTIME The number of seconds "init" sleeps between wakeups if
* nothing else requires this "init" wakeup.
@@ -548,6 +551,8 @@ static time_t init_boot_time; /* Substitute for kernel boot time. */
#define NSTARTD_FAILURE_TIMES 3 /* trigger after 3 failures */
#define STARTD_FAILURE_RATE_NS 5000000000LL /* 1 failure/5 seconds */
+#define STARTD_THROTTLE_RETRY 60 /* space failure retry after 60 secs */
+#define ROOT_MIN_FREE 524288 /* 512KB min. space needed in root */
static hrtime_t startd_failure_time[NSTARTD_FAILURE_TIMES];
static uint_t startd_failure_index;
@@ -696,9 +701,8 @@ main(int argc, char *argv[])
console(B_FALSE,
"\n\n%s Release %s Version %s %d-bit\r\n",
un.sysname, un.release, un.version, bits);
- console(B_FALSE,
- "Copyright (c) 1983, 2010, Oracle and/or its affiliates."
- " All rights reserved.\r\n");
+ console(B_FALSE, "Copyright (c) 2010-2012, "
+ "Joyent Inc. All rights reserved.\r\n");
}
/*
@@ -3509,6 +3513,28 @@ bail:
}
/*
+ * Attempt to confirm that svc.startd is ready to accept a user-initiated
+ * run-level change. startd is not ready until it has started its
+ * _scf_notify_wait thread to watch for events from svc.configd. This is
+ * inherently racy. To workaround this, we check the status of a file that
+ * startd will create once it has started the _scf_notify_wait thread.
+ * If we don't see this file after one minute, then charge ahead.
+ */
+static void
+verify_startd_ready()
+{
+ struct stat64 buf;
+ int i;
+
+ for (i = 0; i < 60; i++) {
+ if (stat64("/etc/svc/volatile/startd.ready", &buf) == 0)
+ return;
+ sleep(1);
+ }
+ console(B_TRUE, "verify startd timeout\n");
+}
+
+/*
* Function to handle requests from users to main init running as process 1.
*/
static void
@@ -3596,6 +3622,12 @@ userinit(int argc, char **argv)
(void) audit_put_record(ADT_SUCCESS, ADT_SUCCESS, argv[1]);
/*
+ * Before we tell init to start a run-level change, we need to be
+ * sure svc.startd is ready to accept that.
+ */
+ verify_startd_ready();
+
+ /*
* Signal init; init will take care of telling svc.startd.
*/
if (kill(init_pid, init_signal) == FAILURE) {
@@ -4305,9 +4337,7 @@ contract_event(struct pollfd *poll)
if (ret == 0) {
if (cookie == STARTD_COOKIE &&
do_restart_startd) {
- if (smf_debug)
- console(B_TRUE, "Restarting "
- "svc.startd.\n");
+ console(B_TRUE, "Restarting svc.startd.\n");
/*
* Account for the failure. If the failure rate
@@ -4438,6 +4468,28 @@ startd_run(const char *cline, int tmpl, ctid_t old_ctid)
if (pid == 0) {
/* child */
+ struct statvfs64 sbuf;
+
+ /*
+ * svc.configd needs some space (a few hundred KB) in / for its
+ * database. One common cause for startd failure is when
+ * configd dies because / is full. We don't want to go into the
+ * fast restart loop (startd_failure_rate_critical) and enter
+ * maintenance so we check for this case and slow down the
+ * failure rate so as to keep retrying in the hope space will
+ * free up.
+ */
+ if (statvfs64("/", &sbuf) != -1 &&
+ (sbuf.f_bsize * sbuf.f_bfree) < ROOT_MIN_FREE) {
+ syslog(LOG_ERR, "Insufficent space (%ld) in / to "
+ "start svc.startd.\n",
+ (long)(sbuf.f_bsize * sbuf.f_bfree));
+ console(B_TRUE, "Insufficent space (%ld) in / to "
+ "start svc.startd.\n",
+ (long)(sbuf.f_bsize * sbuf.f_bfree));
+ sleep(STARTD_THROTTLE_RETRY);
+ exit(1);
+ }
/* See the comment in efork() */
for (i = SIGHUP; i <= SIGRTMAX; ++i) {
diff --git a/usr/src/cmd/initpkg/mountall.sh b/usr/src/cmd/initpkg/mountall.sh
index fa59a20a41..2302c7592c 100644
--- a/usr/src/cmd/initpkg/mountall.sh
+++ b/usr/src/cmd/initpkg/mountall.sh
@@ -29,6 +29,8 @@
# Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
# All Rights Reserved
#
+# Copyright (c) 2012, Joyent, Inc. All rights reserved.
+#
usage () {
if [ -n "$1" ]; then
@@ -149,6 +151,9 @@ isremote() {
# Get list of remote FS types (just once)
RemoteFSTypes=`while read t junk; do echo $t; done < /etc/dfs/fstypes`
+# Ensure nfs/smbfs are remote FS types even if not delivered from fstypes
+isremote "nfs" || set -A RemoteFSTypes "nfs"
+isremote "smbfs" || set -A RemoteFSTypes "smbfs"
#
# Process command line args
diff --git a/usr/src/cmd/initpkg/shutdown.sh b/usr/src/cmd/initpkg/shutdown.sh
index e65224c632..820d50310c 100644
--- a/usr/src/cmd/initpkg/shutdown.sh
+++ b/usr/src/cmd/initpkg/shutdown.sh
@@ -41,7 +41,7 @@ usage() {
}
notify() {
- /usr/sbin/wall -a <<-!
+ /usr/sbin/wall -Za <<-!
$*
!
if [ -x /usr/sbin/showmount -a -x /usr/sbin/rwall ]
diff --git a/usr/src/cmd/initpkg/umountall.sh b/usr/src/cmd/initpkg/umountall.sh
index c9a94fd8f1..4a45e19e18 100644
--- a/usr/src/cmd/initpkg/umountall.sh
+++ b/usr/src/cmd/initpkg/umountall.sh
@@ -25,6 +25,7 @@
# Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
# All Rights Reserved
#
+# Copyright (c) 2012, Joyent, Inc. All rights reserved.
#
usage () {
@@ -98,6 +99,9 @@ isremote() {
# Get list of remote FS types (just once)
RemoteFSTypes=`while read t junk; do echo $t; done < /etc/dfs/fstypes`
+# Ensure nfs/smbfs are remote FS types even if not delivered from fstypes
+isremote "nfs" || set -A RemoteFSTypes "nfs"
+isremote "smbfs" || set -A RemoteFSTypes "smbfs"
#
# Process command line args
diff --git a/usr/src/cmd/ipf/tools/Makefile.tools b/usr/src/cmd/ipf/tools/Makefile.tools
index 7c1e151762..ce0db79970 100644
--- a/usr/src/cmd/ipf/tools/Makefile.tools
+++ b/usr/src/cmd/ipf/tools/Makefile.tools
@@ -23,7 +23,6 @@
# Use is subject to license terms.
#
# Copyright 2013 Nexenta Systems, Inc. All rights reserved.
-#
# Copyright (c) 2012, Joyent Inc. All rights reserved.
#
diff --git a/usr/src/cmd/ipf/tools/ipfstat.c b/usr/src/cmd/ipf/tools/ipfstat.c
index fd39556465..f50b722bde 100644
--- a/usr/src/cmd/ipf/tools/ipfstat.c
+++ b/usr/src/cmd/ipf/tools/ipfstat.c
@@ -165,6 +165,10 @@ static int sort_dstip __P((const void *, const void *));
static int sort_dstpt __P((const void *, const void *));
#endif
+#if SOLARIS
+#include "ipfzone.h"
+#endif
+
static void usage(name)
char *name;
diff --git a/usr/src/cmd/ksh/Makefile.com b/usr/src/cmd/ksh/Makefile.com
index 8716f4eb9d..e5615cb407 100644
--- a/usr/src/cmd/ksh/Makefile.com
+++ b/usr/src/cmd/ksh/Makefile.com
@@ -36,11 +36,13 @@ LIBSHELLBASE=../../../lib/libshell
LIBSHELLSRC=$(LIBSHELLBASE)/common/sh
SRCS= $(OBJECTS:%.o=$(LIBSHELLSRC)/%.c)
+OBJS= $(OBJECTS)
LDLIBS += -lshell
# Set common AST build flags (e.g., needed to support the math stuff).
include ../../../Makefile.ast
+include ../../Makefile.ctf
# 1. Make sure that the -D/-U defines in CFLAGS below are in sync
# with usr/src/lib/libshell/Makefile.com
diff --git a/usr/src/cmd/lofiadm/main.c b/usr/src/cmd/lofiadm/main.c
index 542bec8063..d41ceaead3 100644
--- a/usr/src/cmd/lofiadm/main.c
+++ b/usr/src/cmd/lofiadm/main.c
@@ -352,7 +352,8 @@ out:
* DO NOT use this function if the filename is actually the device name.
*/
static int
-lofi_map_file(int lfd, struct lofi_ioctl li, const char *filename)
+lofi_map_file(int lfd, struct lofi_ioctl li, const char *filename,
+ boolean_t no_devlink_flag)
{
int minor;
@@ -365,7 +366,8 @@ lofi_map_file(int lfd, struct lofi_ioctl li, const char *filename)
"unsupported"));
die(gettext("could not map file %s"), filename);
}
- wait_until_dev_complete(minor);
+ if (!no_devlink_flag)
+ wait_until_dev_complete(minor);
return (minor);
}
@@ -375,7 +377,8 @@ lofi_map_file(int lfd, struct lofi_ioctl li, const char *filename)
*/
static void
add_mapping(int lfd, const char *devicename, const char *filename,
- mech_alias_t *cipher, const char *rkey, size_t rksz, boolean_t rdonly)
+ mech_alias_t *cipher, const char *rkey, size_t rksz,
+ boolean_t rdonly, boolean_t no_devlink_flag)
{
struct lofi_ioctl li;
@@ -411,7 +414,7 @@ add_mapping(int lfd, const char *devicename, const char *filename,
int minor;
/* pick one via the driver */
- minor = lofi_map_file(lfd, li, filename);
+ minor = lofi_map_file(lfd, li, filename, no_devlink_flag);
/* if mapping succeeds, print the one picked */
(void) printf("/dev/%s/%d\n", LOFI_BLOCK_NAME, minor);
return;
@@ -432,7 +435,8 @@ add_mapping(int lfd, const char *devicename, const char *filename,
die(gettext("could not map file %s to %s"), filename,
devicename);
}
- wait_until_dev_complete(li.li_minor);
+ if (!no_devlink_flag)
+ wait_until_dev_complete(li.li_minor);
}
/*
@@ -1316,7 +1320,7 @@ lofi_uncompress(int lfd, const char *filename)
if (statbuf.st_size == 0)
return;
- minor = lofi_map_file(lfd, li, filename);
+ minor = lofi_map_file(lfd, li, filename, B_FALSE);
(void) snprintf(devicename, sizeof (devicename), "/dev/%s/%d",
LOFI_BLOCK_NAME, minor);
@@ -1857,6 +1861,7 @@ main(int argc, char *argv[])
boolean_t ephflag = B_FALSE;
boolean_t compressflag = B_FALSE;
boolean_t uncompressflag = B_FALSE;
+ boolean_t no_devlink_flag = B_FALSE;
/* the next two work together for -c, -k, -T, -e options only */
boolean_t need_crypto = B_FALSE; /* if any -c, -k, -T, -e */
boolean_t cipher_only = B_TRUE; /* if -c only */
@@ -1872,7 +1877,7 @@ main(int argc, char *argv[])
(void) setlocale(LC_ALL, "");
(void) textdomain(TEXT_DOMAIN);
- while ((c = getopt(argc, argv, "a:c:Cd:efk:rs:T:U")) != EOF) {
+ while ((c = getopt(argc, argv, "a:c:Cd:efk:rs:T:UX")) != EOF) {
switch (c) {
case 'a':
addflag = B_TRUE;
@@ -1950,6 +1955,13 @@ main(int argc, char *argv[])
case 'U':
uncompressflag = B_TRUE;
break;
+ case 'X':
+ /*
+ * Private flag to skip the wait for the /dev links to
+ * be created.
+ */
+ no_devlink_flag = B_TRUE;
+ break;
case '?':
default:
errflag = B_TRUE;
@@ -2079,7 +2091,7 @@ main(int argc, char *argv[])
*/
if (addflag)
add_mapping(lfd, devicename, filename, cipher, rkey, rksz,
- rdflag);
+ rdflag, no_devlink_flag);
else if (compressflag)
lofi_compress(&lfd, filename, compress_index, segsize);
else if (uncompressflag)
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/Makefile b/usr/src/cmd/machid/Makefile
index 75acb9e97b..6a3c7138dc 100644
--- a/usr/src/cmd/ssh/libopenbsd-compat/Makefile
+++ b/usr/src/cmd/machid/Makefile
@@ -19,38 +19,62 @@
#
# CDDL HEADER END
#
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
+
+PROG= machid
+
+include ../Makefile.cmd
+
+#
+# List of all links present on all architectures and machines.
#
-# cmd/ssh/libssh/Makefile
+# Note that this function is obsolesent and we don't generally
+# add to this list (see psarc/1992/171).
+#
+FIRSTLINK = i286
+LINKS = i386 i486 i860 i86pc iAPX286 \
+ m68k mc68000 mc68010 mc68020 mc68030 mc68040 \
+ sparc sun sun2 sun3 sun3x sun4 sun4c sun4m sun4d sun4e \
+ u370 u3b u3b15 u3b2 u3b5 vax pdp11
+
+ROOTFIRSTLINK = $(ROOTBIN)/$(FIRSTLINK)
+ROOTLINKS = $(LINKS:%=$(ROOTBIN)/%)
+
+#
+# Install the program as the first machine in the list.
+#
+INSTALLIT= $(INS.link)
+$(ROOTFIRSTLINK):= INSTALLIT = $(INS.rename)
+$(ROOTLINKS):= INSLINKTARGET = $(ROOTFIRSTLINK)
-include $(SRC)/lib/Makefile.lib
-include ../Makefile.ssh-common
+$(ROOTLINKS): $(ROOTFIRSTLINK)
-SUBDIRS= $(MACH)
+#
+# Link installation rules
+#
+$(ROOTBIN)/%: $(PROG)
+ $(INSTALLIT)
-# Need to override the TEXT_DOMAIN here because we included a Makefile from
-# usr/src/lib but this is a private library which is part of SUNW_OST_OSCMD
-TEXT_DOMAIN= SUNW_OST_OSCMD
+$(ROOTFIRSTLINK): $(ROOTBIN)
-POFILE=_messages.po
+$(ROOTBIN):
+ $(INS.dir)
-all := TARGET= all
-clean := TARGET= clean
-clobber := TARGET= clobber
-install := TARGET= install
-lint := TARGET= lint
-$(POFILE) := TARGET= $(POFILE)
+CFLAGS += $(CCVERBOSE)
.KEEP_STATE:
-all clean clobber install lint $(POFILE): $(SUBDIRS)
+all: $(PROG)
+
+install: all $(ROOTLINKS)
-$(SUBDIRS): FRC
- @cd $@; pwd; $(MAKE) $(TARGET)
+clean:
-FRC:
+lint: lint_PROG
-include $(SRC)/lib/Makefile.targ
+include ../Makefile.targ
diff --git a/usr/src/cmd/machid/machid.c b/usr/src/cmd/machid/machid.c
new file mode 100644
index 0000000000..c3d57b91f9
--- /dev/null
+++ b/usr/src/cmd/machid/machid.c
@@ -0,0 +1,137 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 1993-1994, by Sun Microsystems, Inc.
+ */
+
+/*
+ * This program replicates the function of the links from a machine name
+ * (such as sun4c) through /usr/kvm to true or false as appropriate. It
+ * knows the correct special cases.
+ *
+ * IMPORTANT NOTE:
+ *
+ * Do not modify this program to know about additional special cases or
+ * reflect new platforms or instruction set architectures. This is a
+ * deprecated interface and strictly for backwards compatibility. This
+ * is psarc/1992/171. Note the following excerpt from the opinion:
+ *
+ * It is most important to note that the manual page states in
+ * the NOTES section: "The machid family of commands is
+ * obsolete. Use uname -p and uname -m instead."
+ *
+ * The intent of Kernel Architecture Project team is to provide
+ * only enough functionality to mimic the existing definitions
+ * on the SPARC and Intel x86 versions of Solaris 2.x. No new
+ * identifiers will ever be added to the documented and
+ * undocumented identifiers listed above.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <sys/systeminfo.h>
+
+static char static_buf[SYS_NMLN];
+static char *progname;
+
+static void get_info_item(int command, char **buf, long *count);
+
+/* ARGSUSED */
+int
+main(int argc, char *argv[], char *envp[])
+{
+ char *buf = &static_buf[0];
+ long buflen = SYS_NMLN;
+
+ if ((progname = strrchr(argv[0], '/')) == NULL)
+ progname = argv[0];
+ else
+ progname++;
+
+ /*
+ * First possible match is on the processor type.
+ *
+ * Special case for architectures: i386 matches i486 and visa versa.
+ */
+ get_info_item(SI_ARCHITECTURE, &buf, &buflen);
+ if (strcmp(buf, progname) == 0)
+ return (0);
+ if ((strcmp(buf, "i386") == 0 && strcmp(progname, "i486") == 0) ||
+ (strcmp(buf, "i486") == 0 && strcmp(progname, "i386") == 0))
+ return (0);
+
+ /*
+ * Next possible match is the machine, or more exactly, the value
+ * which would be returned by uname(2) in the machine field or uname(1)
+ * with the -m option. For historical reasons this is really is
+ * often a class of platforms which are identical to userland processes
+ * such as sun4c, sun4m, etc.
+ */
+ get_info_item(SI_MACHINE, &buf, &buflen);
+ if (strcmp(buf, progname) == 0)
+ return (0);
+
+ /*
+ * Finally, match the vendor. We hardwire in one historical match.
+ */
+ get_info_item(SI_HW_PROVIDER, &buf, &buflen);
+ if (strcmp(buf, progname) == 0)
+ return (0);
+ if (strcasecmp(buf, "Sun_Microsystems") == 0 &&
+ strcmp("sun", progname) == 0)
+ return (0);
+
+ return (255);
+}
+
+/*
+ * get_info_item is a wrapper around the sysinfo system call. It makes sure
+ * the buffer is large enough, returning a larger buffer if needed. On
+ * unrecoverable error, it exits. An error message doesn't help and makes
+ * this tiny program link stdio and maybe deal with internationalization,
+ * so the best thing is to die silently. Note that the larger buffer is
+ * retained for later use. Reality is that the buffer will always be big
+ * enough, but this is coded to the spec rather than implementation.
+ */
+static void
+get_info_item(int command, char **buf, long *count)
+{
+ long error;
+
+ error = sysinfo(command, *buf, *count);
+ if (error > *count) {
+ *count = error;
+ if (*buf != static_buf) {
+ free(*buf);
+ }
+ *buf = (char *) malloc(*count);
+ error = sysinfo(command, *buf, *count);
+ }
+
+ if (error == -1)
+ exit(-1);
+}
diff --git a/usr/src/cmd/man/Makefile b/usr/src/cmd/man/Makefile
index 911bae6340..7c7afebb10 100644
--- a/usr/src/cmd/man/Makefile
+++ b/usr/src/cmd/man/Makefile
@@ -14,12 +14,7 @@
# Copyright 2014 Garrett D'Amore <garrett@damore.org>
#
-PROG= man
-LINKS= apropos whatis catman
-LIBLINKS = makewhatis
-OBJS= makewhatis.o man.o stringlist.o
-SRCS= $(OBJS:%.o=%.c)
-
+include Makefile.com
include $(SRC)/cmd/Makefile.cmd
CFLAGS += $(CCVERBOSE)
diff --git a/usr/src/cmd/man/Makefile.com b/usr/src/cmd/man/Makefile.com
new file mode 100644
index 0000000000..8f1b3adf7d
--- /dev/null
+++ b/usr/src/cmd/man/Makefile.com
@@ -0,0 +1,23 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2012 Nexenta Systems, Inc. All rights reserved.
+# Copyright 2014 Garrett D'Amore <garrett@damore.org>
+#
+
+PROG= man
+LINKS= apropos whatis catman
+LIBLINKS = makewhatis
+OBJS= makewhatis.o man.o stringlist.o
+SRCS= $(OBJS:%.o=%.c)
+
+
diff --git a/usr/src/cmd/mdb/Makefile.common b/usr/src/cmd/mdb/Makefile.common
index a344688c1b..8a11721d3b 100644
--- a/usr/src/cmd/mdb/Makefile.common
+++ b/usr/src/cmd/mdb/Makefile.common
@@ -20,7 +20,7 @@
#
# Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright 2013 Nexenta Systems, Inc. All rights reserved.
-# Copyright (c) 2015, Joyent, Inc. All rights reserved.
+# Copyright 2016 Joyent, Inc.
#
# MDB modules used for debugging user processes that every ISA's build
# subdirectory will need to build.
@@ -41,7 +41,8 @@ COMMON_MODULES_PROC = \
libuutil \
libzpool \
mdb_ds \
- mdb_test
+ mdb_test \
+ v8
#
# MDB modules used for debugging user processes which are only 32-bit
@@ -65,6 +66,7 @@ COMMON_MODULES_KVM = \
genunix \
hook \
neti \
+ i40e \
idm \
ii \
ip \
diff --git a/usr/src/cmd/mdb/common/kmdb/kmdb_promif.c b/usr/src/cmd/mdb/common/kmdb/kmdb_promif.c
index 81ac1eacfc..f39fb83c9b 100644
--- a/usr/src/cmd/mdb/common/kmdb/kmdb_promif.c
+++ b/usr/src/cmd/mdb/common/kmdb/kmdb_promif.c
@@ -742,10 +742,10 @@ kmdb_prom_debugger_exit(void)
mdb.m_pio = NULL;
}
-#ifdef DEBUG
/*
- * The prom_* files use ASSERT, which is #defined as assfail().
- * We need to redirect that to our assert function.
+ * The prom_* files use ASSERT, which is #defined as assfail(). We need to
+ * redirect that to our assert function. This is also used by the various STAND
+ * libraries.
*/
int
kmdb_prom_assfail(const char *assertion, const char *file, int line)
@@ -754,7 +754,6 @@ kmdb_prom_assfail(const char *assertion, const char *file, int line)
/*NOTREACHED*/
return (0);
}
-#endif
/*
* Begin the initialization of the debugger/PROM interface. Initialization is
diff --git a/usr/src/cmd/mdb/common/libstandctf/ctf_subr.c b/usr/src/cmd/mdb/common/libstandctf/ctf_subr.c
index 0a211fcd22..e8c703e754 100644
--- a/usr/src/cmd/mdb/common/libstandctf/ctf_subr.c
+++ b/usr/src/cmd/mdb/common/libstandctf/ctf_subr.c
@@ -24,8 +24,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <mdb/mdb_debug.h>
#include <mdb/mdb.h>
@@ -67,6 +65,13 @@ ctf_fdopen(int fd, int *errp)
/*ARGSUSED*/
ctf_file_t *
+ctf_fdcreate_int(int fd, int *errp, ctf_sect_t *ctfp)
+{
+ return (ctf_set_open_errno(errp, ENOTSUP));
+}
+
+/*ARGSUSED*/
+ctf_file_t *
ctf_open(const char *filename, int *errp)
{
return (ctf_set_open_errno(errp, ENOTSUP));
diff --git a/usr/src/cmd/mdb/common/mdb/mdb_cmds.c b/usr/src/cmd/mdb/common/mdb/mdb_cmds.c
index 95b216cfbe..add162a839 100644
--- a/usr/src/cmd/mdb/common/mdb/mdb_cmds.c
+++ b/usr/src/cmd/mdb/common/mdb/mdb_cmds.c
@@ -252,7 +252,7 @@ write_arglist(mdb_tgt_as_t as, mdb_tgt_addr_t addr,
mdb_warn("failed to write %llr at address 0x%llx",
value, addr);
mdb.m_incr = 0;
- break;
+ return (DCMD_ERR);
}
mdb.m_incr = naddr - addr;
@@ -520,8 +520,7 @@ cmd_print_phys(uintptr_t x, uint_t flags, int argc, const mdb_arg_t *argv)
/*ARGSUSED*/
static int
-cmd_print_value(uintptr_t addr, uint_t flags,
- int argc, const mdb_arg_t *argv)
+cmd_print_value(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
{
uintmax_t ndot, dot = mdb_get_dot();
const char *tgt_argv[1];
diff --git a/usr/src/cmd/mdb/common/mdb/mdb_ctf.c b/usr/src/cmd/mdb/common/mdb/mdb_ctf.c
index 66a8b009a3..4d4cb226c6 100644
--- a/usr/src/cmd/mdb/common/mdb/mdb_ctf.c
+++ b/usr/src/cmd/mdb/common/mdb/mdb_ctf.c
@@ -24,7 +24,7 @@
*/
/*
* Copyright (c) 2013 by Delphix. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
*/
#include <mdb/mdb_ctf.h>
@@ -37,6 +37,7 @@
#include <libctf.h>
#include <string.h>
+#include <limits.h>
typedef struct tnarg {
mdb_tgt_t *tn_tgt; /* target to use for lookup */
@@ -781,8 +782,9 @@ mdb_ctf_enum_iter(mdb_ctf_id_t id, mdb_ctf_enum_f *cb, void *data)
/*
* callback proxy for mdb_ctf_type_iter
*/
+/* ARGSUSED */
static int
-type_iter_cb(ctf_id_t type, void *data)
+type_iter_cb(ctf_id_t type, boolean_t root, void *data)
{
type_iter_t *tip = data;
mdb_ctf_id_t id;
@@ -812,7 +814,7 @@ mdb_ctf_type_iter(const char *object, mdb_ctf_type_f *cb, void *data)
ti.ti_arg = data;
ti.ti_fp = fp;
- if ((ret = ctf_type_iter(fp, type_iter_cb, &ti)) == CTF_ERR)
+ if ((ret = ctf_type_iter(fp, B_FALSE, type_iter_cb, &ti)) == CTF_ERR)
return (set_errno(ctf_to_errno(ctf_errno(fp))));
return (ret);
@@ -1780,7 +1782,6 @@ mdb_ctf_synthetics_create_base(int kind)
return (0);
discard:
- err = set_errno(ctf_to_errno(ctf_errno(cp)));
(void) ctf_discard(cp);
return (err);
}
@@ -1939,7 +1940,7 @@ mdb_ctf_add_member(const mdb_ctf_id_t *p, const char *name,
return (set_errno(ctf_to_errno(ctf_errno(mdb.m_synth))));
}
- id = ctf_add_member(mdb.m_synth, mcip->mci_id, name, mtid);
+ id = ctf_add_member(mdb.m_synth, mcip->mci_id, name, mtid, ULONG_MAX);
if (id == CTF_ERR) {
mdb_dprintf(MDB_DBG_CTF, "failed to add member %s: %s\n",
name, ctf_errmsg(ctf_errno(mdb.m_synth)));
@@ -2040,7 +2041,7 @@ mdb_ctf_add_pointer(const mdb_ctf_id_t *p, mdb_ctf_id_t *rid)
}
- id = ctf_add_pointer(mdb.m_synth, CTF_ADD_ROOT, id);
+ id = ctf_add_pointer(mdb.m_synth, CTF_ADD_ROOT, NULL, id);
if (id == CTF_ERR) {
mdb_dprintf(MDB_DBG_CTF, "failed to add pointer: %s\n",
ctf_errmsg(ctf_errno(mdb.m_synth)));
@@ -2088,6 +2089,7 @@ mdb_ctf_type_delete(const mdb_ctf_id_t *id)
return (0);
}
+/* ARGSUSED */
static int
mdb_ctf_synthetics_file_cb(mdb_ctf_id_t id, void *arg)
{
@@ -2125,7 +2127,7 @@ mdb_ctf_synthetics_from_file(const char *file)
ti.ti_fp = fp;
ti.ti_arg = syn;
ti.ti_cb = mdb_ctf_synthetics_file_cb;
- if (ctf_type_iter(fp, type_iter_cb, &ti) == CTF_ERR) {
+ if (ctf_type_iter(fp, B_FALSE, type_iter_cb, &ti) == CTF_ERR) {
ret = set_errno(ctf_to_errno(ctf_errno(fp)));
mdb_warn("failed to add types");
goto cleanup;
diff --git a/usr/src/cmd/mdb/common/mdb/mdb_debug.c b/usr/src/cmd/mdb/common/mdb/mdb_debug.c
index d373d3726e..a158864a28 100644
--- a/usr/src/cmd/mdb/common/mdb/mdb_debug.c
+++ b/usr/src/cmd/mdb/common/mdb/mdb_debug.c
@@ -24,8 +24,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <mdb/mdb_debug.h>
#include <mdb/mdb_err.h>
#include <mdb/mdb_io.h>
@@ -157,7 +155,6 @@ mdb_dmode(uint_t bits)
mdb.m_debug = bits;
}
-#ifdef DEBUG
int
mdb_dassert(const char *expr, const char *file, int line)
{
@@ -165,7 +162,6 @@ mdb_dassert(const char *expr, const char *file, int line)
/*NOTREACHED*/
return (0);
}
-#endif
/*
* Function to convert mdb longjmp codes (see <mdb/mdb.h>) into a string for
diff --git a/usr/src/cmd/mdb/common/mdb/mdb_debug.h b/usr/src/cmd/mdb/common/mdb/mdb_debug.h
index cf3b97b499..f889238e4f 100644
--- a/usr/src/cmd/mdb/common/mdb/mdb_debug.h
+++ b/usr/src/cmd/mdb/common/mdb/mdb_debug.h
@@ -27,8 +27,6 @@
#ifndef _MDB_DEBUG_H
#define _MDB_DEBUG_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -66,8 +64,8 @@ extern void mdb_dmode(uint_t);
extern const char *mdb_err2str(int);
-#ifdef DEBUG
extern int mdb_dassert(const char *, const char *, int);
+#ifdef DEBUG
#define ASSERT(x) ((void)((x) || mdb_dassert(#x, __FILE__, __LINE__)))
#else
#define ASSERT(x)
diff --git a/usr/src/cmd/mdb/common/mdb/mdb_demangle.c b/usr/src/cmd/mdb/common/mdb/mdb_demangle.c
index 7ab7d19716..27c8a1d21d 100644
--- a/usr/src/cmd/mdb/common/mdb/mdb_demangle.c
+++ b/usr/src/cmd/mdb/common/mdb/mdb_demangle.c
@@ -24,7 +24,9 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
#include <mdb/mdb_modapi.h>
#include <mdb/mdb_demangle.h>
@@ -263,7 +265,8 @@ int
cmd_demangle(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
{
mdb_demangler_t *dmp = mdb.m_demangler;
- const char *path = LIB_DEMANGLE;
+ const char *path;
+ char buf[MAXPATHLEN];
if (argc > 1 || (argc > 0 && argv->a_type != MDB_TYPE_STRING))
return (DCMD_USAGE);
@@ -272,6 +275,10 @@ cmd_demangle(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
if (dmp != NULL)
mdb_dem_unload(mdb.m_demangler);
path = argv->a_un.a_str;
+ } else {
+ (void) snprintf(buf, MAXPATHLEN,
+ "%s/%s", mdb.m_root, LIB_DEMANGLE);
+ path = buf;
}
if (dmp != NULL && argc == 0 && !(mdb.m_flags & MDB_FL_DEMANGLE)) {
diff --git a/usr/src/cmd/mdb/common/mdb/mdb_fmt.c b/usr/src/cmd/mdb/common/mdb/mdb_fmt.c
index 773c451b8c..539a4c249e 100644
--- a/usr/src/cmd/mdb/common/mdb/mdb_fmt.c
+++ b/usr/src/cmd/mdb/common/mdb/mdb_fmt.c
@@ -21,10 +21,9 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Format String Decoder
*
@@ -90,6 +89,7 @@ typedef struct mdb_fmt_desc {
void *f_ptr; /* Data pointer (see above) */
const char *f_help; /* Additional help string */
size_t f_size; /* Size of type in bytes, or SZ_NONE */
+ boolean_t f_float; /* Is this a floating point type */
} mdb_fmt_desc_t;
static const char help_plus[] = "increment dot by the count";
@@ -556,7 +556,7 @@ static const mdb_fmt_desc_t fmttab[] = {
#ifdef _KMDB
{ FMT_NONE, NULL, NULL, 0 }, /* 70 = F */
#else
- { FMT_PRINTF, "%g", NULL, sizeof (double) }, /* 70 = F */
+ { FMT_PRINTF, "%g", NULL, sizeof (double), B_TRUE }, /* 70 = F */
#endif
{ FMT_PRINTF, "%-16llo", NULL, 8 }, /* 71 = G */
{ FMT_FUNC, FUNCP(fmt_swapint), help_swapint, 4 }, /* 72 = H */
@@ -596,7 +596,8 @@ static const mdb_fmt_desc_t fmttab[] = {
#ifdef _KMDB
{ FMT_NONE, NULL, NULL, 0 }, /* 102 = f */
#else
- { FMT_FUNC, FUNCP(fmt_float), help_f, sizeof (float) }, /* 102 = f */
+ { FMT_FUNC, FUNCP(fmt_float), help_f, sizeof (float),
+ B_TRUE }, /* 102 = f */
#endif
{ FMT_PRINTF, "%-16llq", NULL, 8 }, /* 103 = g */
{ FMT_FUNC, FUNCP(fmt_swapshort), help_swapshort, 2 }, /* 104 = h */
@@ -634,6 +635,7 @@ mdb_fmt_print(mdb_tgt_t *t, mdb_tgt_as_t as,
uint32_t i4;
uint16_t i2;
uint8_t i1;
+ double d;
} u;
if (fmt < 0 || fmt > (sizeof (fmttab) / sizeof (fmttab[0]))) {
@@ -665,6 +667,15 @@ mdb_fmt_print(mdb_tgt_t *t, mdb_tgt_as_t as,
fail("format %c is defined using illegal size\n", fmt);
}
+ if (fp->f_float == B_TRUE) {
+ if (fp->f_size != 8) {
+ fail("format %c is using illegal fp size\n",
+ fmt);
+ }
+
+ buf = &u.d;
+ }
+
while (cnt-- != 0) {
if (mdb_tgt_aread(t, as, buf, fp->f_size, addr) !=
fp->f_size) {
@@ -686,7 +697,13 @@ mdb_fmt_print(mdb_tgt_t *t, mdb_tgt_as_t as,
rvalue = u.i4;
break;
case 8:
- mdb_iob_printf(mdb.m_out, fp->f_ptr, u.i8);
+ if (fp->f_float) {
+ mdb_iob_printf(mdb.m_out, fp->f_ptr,
+ u.d);
+ } else {
+ mdb_iob_printf(mdb.m_out, fp->f_ptr,
+ u.i8);
+ }
rvalue = u.i8;
break;
}
diff --git a/usr/src/cmd/mdb/common/mdb/mdb_main.c b/usr/src/cmd/mdb/common/mdb/mdb_main.c
index 90e048bb82..a30ee45b7e 100644
--- a/usr/src/cmd/mdb/common/mdb/mdb_main.c
+++ b/usr/src/cmd/mdb/common/mdb/mdb_main.c
@@ -25,7 +25,7 @@
*/
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -51,6 +51,7 @@
#include <libctf.h>
#include <errno.h>
#include <kvm.h>
+#include <zone.h>
#include <mdb/mdb_lex.h>
#include <mdb/mdb_debug.h>
@@ -797,9 +798,15 @@ main(int argc, char *argv[], char *envp[])
if (strchr(pidarg, '/') != NULL)
(void) mdb_iob_snprintf(object, MAXPATHLEN,
"%s/object/a.out", pidarg);
- else
+ else {
+ const char *root;
+
(void) mdb_iob_snprintf(object, MAXPATHLEN,
- "/proc/%s/object/a.out", pidarg);
+ "%s/proc/%s/object/a.out",
+ (root = zone_get_nroot()) != NULL ? root : "",
+ pidarg);
+ }
+
tgt_argv[tgt_argc++] = object;
tgt_argv[tgt_argc++] = pidarg;
}
diff --git a/usr/src/cmd/mdb/common/mdb/mdb_modapi.h b/usr/src/cmd/mdb/common/mdb/mdb_modapi.h
index 4e5d54d7d1..f9a2baa323 100644
--- a/usr/src/cmd/mdb/common/mdb/mdb_modapi.h
+++ b/usr/src/cmd/mdb/common/mdb/mdb_modapi.h
@@ -22,7 +22,7 @@
/*
* Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
- * Copyright (c) 2012 Joyent, Inc. All rights reserved.
+ * Copyright (c) 2013 Joyent, Inc. All rights reserved.
*/
#ifndef _MDB_MODAPI_H
@@ -66,7 +66,13 @@ extern "C" {
#define MAX(x, y) ((x) > (y) ? (x) : (y))
#endif
+#ifdef MDB_API_VERSION
+#if (MDB_API_VERSION != 3 && MDB_API_VERSION != 4)
+#error "Only modapi versions three and four are supported."
+#endif
+#else /* !MDB_API_VERISON */
#define MDB_API_VERSION 4 /* Current API version number */
+#endif /* MDB_API_VERISON */
/*
* Debugger command function flags:
@@ -80,11 +86,6 @@ extern "C" {
#define DCMD_HDRSPEC(fl) (((fl) & DCMD_LOOPFIRST) || !((fl) & DCMD_LOOP))
/*
- * Debugger tab command function flags
- */
-#define DCMD_TAB_SPACE 0x01 /* Tab cb invoked with trailing space */
-
-/*
* Debugger command function return values:
*/
#define DCMD_OK 0 /* Dcmd completed successfully */
@@ -113,10 +114,18 @@ typedef struct mdb_arg {
} a_un;
} mdb_arg_t;
+#if (MDB_API_VERSION >= 4)
+/*
+ * Debugger tab command function flags
+ */
+#define DCMD_TAB_SPACE 0x01 /* Tab cb invoked with trailing space */
+
typedef struct mdb_tab_cookie mdb_tab_cookie_t;
-typedef int mdb_dcmd_f(uintptr_t, uint_t, int, const mdb_arg_t *);
typedef int mdb_dcmd_tab_f(mdb_tab_cookie_t *, uint_t, int,
const mdb_arg_t *);
+#endif /* MDB_API_VERSION >= 4 */
+
+typedef int mdb_dcmd_f(uintptr_t, uint_t, int, const mdb_arg_t *);
typedef struct mdb_dcmd {
const char *dc_name; /* Command name */
@@ -124,7 +133,9 @@ typedef struct mdb_dcmd {
const char *dc_descr; /* Description */
mdb_dcmd_f *dc_funcp; /* Command function */
void (*dc_help)(void); /* Command help function (or NULL) */
+#if (MDB_API_VERSION >= 4)
mdb_dcmd_tab_f *dc_tabp; /* Tab completion function */
+#endif
} mdb_dcmd_t;
#define WALK_ERR -1 /* Walk fatal error (terminate walk) */
@@ -341,6 +352,7 @@ typedef void (*mdb_callback_f)(void *);
extern void *mdb_callback_add(int, mdb_callback_f, void *);
extern void mdb_callback_remove(void *);
+#if (MDB_API_VERSION >= 4)
#define MDB_TABC_ALL_TYPES 0x1 /* Include array types in type output */
#define MDB_TABC_MEMBERS 0x2 /* Tab comp. types with members */
#define MDB_TABC_NOPOINT 0x4 /* Tab comp. everything but pointers */
@@ -365,6 +377,7 @@ extern int mdb_tab_typename(int *, const mdb_arg_t **, char *buf, size_t len);
*/
extern int mdb_tab_complete_mt(mdb_tab_cookie_t *, uint_t, int,
const mdb_arg_t *);
+#endif /* MDB_API_VERSION >= 4 */
extern size_t strlcat(char *, const char *, size_t);
extern char *strcat(char *, const char *);
diff --git a/usr/src/cmd/mdb/common/mdb/mdb_print.c b/usr/src/cmd/mdb/common/mdb/mdb_print.c
index 3a083d4ac0..fe74265b68 100644
--- a/usr/src/cmd/mdb/common/mdb/mdb_print.c
+++ b/usr/src/cmd/mdb/common/mdb/mdb_print.c
@@ -2106,10 +2106,10 @@ cmd_print_tab_common(mdb_tab_cookie_t *mcp, uint_t flags, int argc,
/*
* This is the reason that tab completion was created. We're going to go
- * along and walk the delimiters until we find something a member that
- * we don't recognize, at which point we'll try and tab complete it.
- * Note that ::print takes multiple args, so this is going to operate on
- * whatever the last arg that we have is.
+ * along and walk the delimiters until we find something in a member
+ * that we don't recognize, at which point we'll try and tab complete
+ * it. Note that ::print takes multiple args, so this is going to
+ * operate on whatever the last arg that we have is.
*/
if (mdb_ctf_lookup_by_name(tn, &id) != 0)
return (1);
@@ -2119,11 +2119,11 @@ cmd_print_tab_common(mdb_tab_cookie_t *mcp, uint_t flags, int argc,
delim = parse_delimiter(&start);
/*
- * If we hit the case where we actually have no delimiters, than we need
+ * If we hit the case where we actually have no delimiters, then we need
* to make sure that we properly set up the fields the loops would.
*/
if (delim == MEMBER_DELIM_DONE)
- (void) mdb_snprintf(member, sizeof (member), "%s", start);
+ (void) mdb_snprintf(member, sizeof (member), start);
while (delim != MEMBER_DELIM_DONE) {
switch (delim) {
@@ -2180,7 +2180,7 @@ cmd_print_tab_common(mdb_tab_cookie_t *mcp, uint_t flags, int argc,
/*
* We are going to try to resolve this name as a member. There
- * are a few two different questions that we need to answer. The
+ * are a two different questions that we need to answer. The
* first is do we recognize this member. The second is are we at
* the end of the string. If we encounter a member that we don't
* recognize before the end, then we have to error out and can't
@@ -2210,6 +2210,7 @@ cmd_print_tab_common(mdb_tab_cookie_t *mcp, uint_t flags, int argc,
* already have in rid.
*/
return (mdb_tab_complete_member_by_id(mcp, rid, member));
+
}
int
@@ -2539,8 +2540,7 @@ print_help(void)
}
static int
-printf_signed(mdb_ctf_id_t id, uintptr_t addr, ulong_t off, char *fmt,
- boolean_t sign)
+printf_signed(mdb_ctf_id_t id, uintptr_t addr, ulong_t off, char *fmt, int sign)
{
ssize_t size;
mdb_ctf_id_t base;
@@ -2558,7 +2558,7 @@ printf_signed(mdb_ctf_id_t id, uintptr_t addr, ulong_t off, char *fmt,
} u;
if (mdb_ctf_type_resolve(id, &base) == -1) {
- mdb_warn("could not resolve type");
+ mdb_warn("could not resolve type\n");
return (DCMD_ABORT);
}
@@ -2796,7 +2796,6 @@ printf_string(mdb_ctf_id_t id, uintptr_t addr, ulong_t off, char *fmt)
if (size != 1) {
mdb_warn("string format specifier requires "
"an array of characters\n");
- return (DCMD_ABORT);
}
bzero(buf, sizeof (buf));
@@ -2930,7 +2929,7 @@ cmd_printf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
if (argc == 0 || argv[0].a_type != MDB_TYPE_STRING) {
mdb_warn("expected a format string\n");
- return (DCMD_USAGE);
+ return (DCMD_ABORT);
}
/*
@@ -2939,12 +2938,6 @@ cmd_printf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
* subset of mdb_printf() format strings that we allow.
*/
fmt = argv[0].a_un.a_str;
- /*
- * 'dest' must be large enough to hold a copy of the format string,
- * plus a NUL and up to 2 additional characters for each conversion
- * in the format string. This gives us a bloat factor of 5/2 ~= 3.
- * e.g. "%d" (strlen of 2) --> "%lld\0" (need 5 bytes)
- */
dest = mdb_zalloc(strlen(fmt) * 3, UM_SLEEP | UM_GC);
fmts = mdb_zalloc(strlen(fmt) * sizeof (char *), UM_SLEEP | UM_GC);
funcs = mdb_zalloc(strlen(fmt) * sizeof (void *), UM_SLEEP | UM_GC);
@@ -3128,22 +3121,22 @@ static char _mdb_printf_help[] =
"\n"
" %% Prints the '%' symbol.\n"
" %a Prints the member in symbolic form.\n"
-" %d Prints the member as a decimal integer. If the member is a signed\n"
+" %d Prints the member as a decimal integer. If the member is a signed\n"
" integer type, the output will be signed.\n"
" %H Prints the member as a human-readable size.\n"
" %I Prints the member as an IPv4 address (must be 32-bit integer type).\n"
" %N Prints the member as an IPv6 address (must be of type in6_addr_t).\n"
" %o Prints the member as an unsigned octal integer.\n"
" %p Prints the member as a pointer, in hexadecimal.\n"
-" %q Prints the member in signed octal. Honk if you ever use this!\n"
-" %r Prints the member as an unsigned value in the current output radix.\n"
-" %R Prints the member as a signed value in the current output radix.\n"
+" %q Prints the member in signed octal. Honk if you ever use this!\n"
+" %r Prints the member as an unsigned value in the current output radix. \n"
+" %R Prints the member as a signed value in the current output radix. \n"
" %s Prints the member as a string (requires a pointer or an array of\n"
" characters).\n"
" %u Prints the member as an unsigned decimal integer.\n"
" %x Prints the member in hexadecimal.\n"
" %X Prints the member in hexadecimal, using the characters A-F as the\n"
-" digits for the values 10-15.\n"
+" digits for the values 10-15. \n"
" %Y Prints the member as a time_t as the string "
"'year month day HH:MM:SS'.\n"
"\n"
@@ -3156,13 +3149,13 @@ static char _mdb_printf_help[] =
"\n"
"The following flag specifers are recognized by ::printf:\n"
"\n"
-" %- Left-justify the output within the specified field width. If the\n"
+" %- Left-justify the output within the specified field width. If the\n"
" width of the output is less than the specified field width, the\n"
-" output will be padded with blanks on the right-hand side. Without\n"
+" output will be padded with blanks on the right-hand side. Without\n"
" %-, values are right-justified by default.\n"
"\n"
" %0 Zero-fill the output field if the output is right-justified and the\n"
-" width of the output is less than the specified field width. Without\n"
+" width of the output is less than the specified field width. Without\n"
" %0, right-justified values are prepended with blanks in order to\n"
" fill the field.\n"
"\n"
diff --git a/usr/src/cmd/mdb/common/mdb/mdb_tab.c b/usr/src/cmd/mdb/common/mdb/mdb_tab.c
index af32623470..66bd18586e 100644
--- a/usr/src/cmd/mdb/common/mdb/mdb_tab.c
+++ b/usr/src/cmd/mdb/common/mdb/mdb_tab.c
@@ -388,11 +388,6 @@ mdb_tab_size(mdb_tab_cookie_t *mcp)
return (mdb_nv_size(&mcp->mtc_nv));
}
-/*
- * Determine whether the specified name is a valid tab completion for
- * the given command. If the name is a valid tab completion then
- * it will be saved in the mdb_tab_cookie_t.
- */
void
mdb_tab_insert(mdb_tab_cookie_t *mcp, const char *name)
{
@@ -508,31 +503,18 @@ tab_complete_type(mdb_ctf_id_t id, void *arg)
mdb_tab_cookie_t *mcp = arg;
uint_t flags = (uint_t)(uintptr_t)mcp->mtc_cba;
- /*
- * CTF data includes types that mdb commands don't understand. Before
- * we resolve the actual type prune any entry that is a type we
- * don't care about.
- */
- switch (mdb_ctf_type_kind(id)) {
- case CTF_K_CONST:
- case CTF_K_RESTRICT:
- case CTF_K_VOLATILE:
- return (0);
- }
-
if (mdb_ctf_type_resolve(id, &rid) != 0)
return (1);
rkind = mdb_ctf_type_kind(rid);
-
- if ((flags & MDB_TABC_MEMBERS) && rkind != CTF_K_STRUCT &&
+ if (flags & MDB_TABC_MEMBERS && rkind != CTF_K_STRUCT &&
rkind != CTF_K_UNION)
return (0);
- if ((flags & MDB_TABC_NOPOINT) && rkind == CTF_K_POINTER)
+ if (flags & MDB_TABC_NOPOINT && rkind == CTF_K_POINTER)
return (0);
- if ((flags & MDB_TABC_NOARRAY) && rkind == CTF_K_ARRAY)
+ if (flags & MDB_TABC_NOARRAY && rkind == CTF_K_ARRAY)
return (0);
(void) mdb_ctf_type_name(id, buf, sizeof (buf));
diff --git a/usr/src/cmd/mdb/common/modules/dtrace/dtrace.c b/usr/src/cmd/mdb/common/modules/dtrace/dtrace.c
index 7858866a01..50ad2c3497 100644
--- a/usr/src/cmd/mdb/common/modules/dtrace/dtrace.c
+++ b/usr/src/cmd/mdb/common/modules/dtrace/dtrace.c
@@ -22,7 +22,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
/*
@@ -433,6 +433,7 @@ dtracemdb_bufsnap(dtrace_buffer_t *which, dtrace_bufdesc_t *desc)
desc->dtbd_size = bufsize;
desc->dtbd_drops = buf.dtb_drops;
desc->dtbd_errors = buf.dtb_errors;
+ desc->dtbd_timestamp = gethrtime();
return (0);
}
diff --git a/usr/src/cmd/mdb/common/modules/genunix/Makefile.files b/usr/src/cmd/mdb/common/modules/genunix/Makefile.files
index bb9fed2547..5d9cf9bb8f 100644
--- a/usr/src/cmd/mdb/common/modules/genunix/Makefile.files
+++ b/usr/src/cmd/mdb/common/modules/genunix/Makefile.files
@@ -21,7 +21,7 @@
#
# Copyright 2011 Nexenta Systems, Inc. All rights reserved.
# Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright (c) 2013, Joyent, Inc. All rights reserved.
+# Copyright 2016 Joyent, Inc.
# Copyright (c) 2013 by Delphix. All rights reserved.
#
@@ -44,6 +44,7 @@ GENUNIX_SRCS = \
ddi_periodic.c \
devinfo.c \
dist.c \
+ dnlc.c \
findstack.c \
findstack_subr.c \
fm.c \
diff --git a/usr/src/cmd/mdb/common/modules/genunix/devinfo.c b/usr/src/cmd/mdb/common/modules/genunix/devinfo.c
index 513103a682..d5da280040 100644
--- a/usr/src/cmd/mdb/common/modules/genunix/devinfo.c
+++ b/usr/src/cmd/mdb/common/modules/genunix/devinfo.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
*/
#include <sys/types.h>
@@ -74,9 +75,10 @@ prtconf_help(void)
"with an address, prints the parents of, "
"and all children of, that address.\n\n"
"Switches:\n"
- " -v be verbose - print device property lists\n"
- " -p only print the ancestors of the given node\n"
- " -c only print the children of the given node\n");
+ " -v be verbose - print device property lists\n"
+ " -p only print the ancestors of the given node\n"
+ " -c only print the children of the given node\n"
+ " -d driver only print instances of driver\n");
}
void
@@ -580,7 +582,8 @@ is_printable_string(unsigned char *prop_value)
}
static void
-devinfo_print_props_type(int type) {
+devinfo_print_props_type(int type)
+{
char *type_str = NULL;
switch (type) {
@@ -612,7 +615,7 @@ devinfo_print_props_type(int type) {
static void
devinfo_print_props_value(int elem_size, int nelem,
- unsigned char *prop_value, int prop_value_len)
+ unsigned char *prop_value, int prop_value_len)
{
int i;
@@ -839,7 +842,8 @@ next:
}
static void
-devinfo_pathinfo_state(mdi_pathinfo_state_t state) {
+devinfo_pathinfo_state(mdi_pathinfo_state_t state)
+{
char *type_str = NULL;
switch (state) {
@@ -866,7 +870,8 @@ devinfo_pathinfo_state(mdi_pathinfo_state_t state) {
}
static void
-devinfo_print_pathing(int mdi_component, void *mdi_client) {
+devinfo_print_pathing(int mdi_component, void *mdi_client)
+{
mdi_client_t mdi_c;
struct mdi_pathinfo *pip;
@@ -898,7 +903,7 @@ devinfo_print_pathing(int mdi_component, void *mdi_client) {
/* read in the pathinfo structure */
if (mdb_vread((void*)&pi, sizeof (pi),
- (uintptr_t)pip) == -1) {
+ (uintptr_t)pip) == -1) {
mdb_warn("failed to read mdi_pathinfo at %p",
(uintptr_t)pip);
goto exit;
@@ -906,7 +911,7 @@ devinfo_print_pathing(int mdi_component, void *mdi_client) {
/* read in the pchi (path host adapter) info */
if (mdb_vread((void*)&ph, sizeof (ph),
- (uintptr_t)pi.pi_phci) == -1) {
+ (uintptr_t)pi.pi_phci) == -1) {
mdb_warn("failed to read mdi_pchi at %p",
(uintptr_t)pi.pi_phci);
goto exit;
@@ -914,13 +919,13 @@ devinfo_print_pathing(int mdi_component, void *mdi_client) {
/* read in the dip of the phci so we can get it's name */
if (mdb_vread((void*)&ph_di, sizeof (ph_di),
- (uintptr_t)ph.ph_dip) == -1) {
+ (uintptr_t)ph.ph_dip) == -1) {
mdb_warn("failed to read mdi_pchi at %p",
(uintptr_t)ph.ph_dip);
goto exit;
}
if (mdb_vread(binding_name, sizeof (binding_name),
- (uintptr_t)ph_di.devi_binding_name) == -1) {
+ (uintptr_t)ph_di.devi_binding_name) == -1) {
mdb_warn("failed to read binding_name at %p",
(uintptr_t)ph_di.devi_binding_name);
goto exit;
@@ -932,7 +937,7 @@ devinfo_print_pathing(int mdi_component, void *mdi_client) {
/* print out the pathing info */
mdb_inc_indent(DEVINFO_PROP_INDENT);
if (mdb_pwalk_dcmd(NVPAIR_WALKER_FQNAME, NVPAIR_DCMD_FQNAME,
- 0, NULL, (uintptr_t)pi.pi_prop) != 0) {
+ 0, NULL, (uintptr_t)pi.pi_prop) != 0) {
mdb_dec_indent(DEVINFO_PROP_INDENT);
goto exit;
}
@@ -954,6 +959,7 @@ devinfo_print(uintptr_t addr, struct dev_info *dev, devinfo_cb_data_t *data)
char dname[MODMAXNAMELEN + 1];
devinfo_node_t *din = (devinfo_node_t *)dev;
ddi_prop_t *global_props = NULL;
+ boolean_t hdname = B_FALSE;
if (mdb_readstr(binding_name, sizeof (binding_name),
(uintptr_t)dev->devi_binding_name) == -1) {
@@ -974,6 +980,30 @@ devinfo_print(uintptr_t addr, struct dev_info *dev, devinfo_cb_data_t *data)
global_props = plist.prop_list;
}
+ if (dev->devi_node_state > DS_ATTACHED) {
+ if (mdb_devinfo2driver(addr, dname, sizeof (dname)) == 0)
+ hdname = B_TRUE;
+ }
+
+ /*
+ * If a filter is installed and we don't have the driver's name, we
+ * always skip it. Also if the filter doesn't match, then we'll also
+ * skip the driver.
+ */
+ if (data->di_filter != NULL &&
+ (!hdname || strcmp(data->di_filter, dname) != 0)) {
+ return (WALK_NEXT);
+ }
+
+ /*
+ * If we are output to a pipe, we only print the address of the
+ * devinfo_t.
+ */
+ if (data->di_flags & DEVINFO_PIPE) {
+ mdb_printf("%-0?p\n", addr);
+ return (WALK_NEXT);
+ }
+
mdb_inc_indent(din->din_depth * DEVINFO_TREE_INDENT);
if ((addr == data->di_base) || (data->di_flags & DEVINFO_ALLBOLD))
mdb_printf("%<b>");
@@ -985,7 +1015,7 @@ devinfo_print(uintptr_t addr, struct dev_info *dev, devinfo_cb_data_t *data)
if (dev->devi_node_state < DS_ATTACHED)
mdb_printf(" (driver not attached)");
- else if (mdb_devinfo2driver(addr, dname, sizeof (dname)) != 0)
+ else if (hdname == B_FALSE)
mdb_printf(" (could not determine driver name)");
else
mdb_printf(" (driver name: %s)", dname);
@@ -1017,8 +1047,13 @@ prtconf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
int status;
data.di_flags = DEVINFO_PARENT | DEVINFO_CHILD;
+ data.di_filter = NULL;
+
+ if (flags & DCMD_PIPE_OUT)
+ data.di_flags |= DEVINFO_PIPE;
if (mdb_getopts(argc, argv,
+ 'd', MDB_OPT_STR, &data.di_filter,
'v', MDB_OPT_SETBITS, DEVINFO_VERBOSE, &data.di_flags,
'p', MDB_OPT_CLRBITS, DEVINFO_CHILD, &data.di_flags,
'c', MDB_OPT_CLRBITS, DEVINFO_PARENT, &data.di_flags, NULL) != argc)
@@ -1036,7 +1071,8 @@ prtconf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
}
data.di_base = addr;
- mdb_printf("%<u>%-?s %-50s%</u>\n", "DEVINFO", "NAME");
+ if (!(flags & DCMD_PIPE_OUT))
+ mdb_printf("%<u>%-?s %-50s%</u>\n", "DEVINFO", "NAME");
if ((data.di_flags & (DEVINFO_PARENT | DEVINFO_CHILD)) ==
(DEVINFO_PARENT | DEVINFO_CHILD)) {
@@ -1118,6 +1154,7 @@ devinfo(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
data.di_flags = DEVINFO_VERBOSE;
data.di_base = addr;
+ data.di_filter = NULL;
if (mdb_getopts(argc, argv,
'q', MDB_OPT_CLRBITS, DEVINFO_VERBOSE, &data.di_flags,
diff --git a/usr/src/cmd/mdb/common/modules/genunix/devinfo.h b/usr/src/cmd/mdb/common/modules/genunix/devinfo.h
index a279faf215..438fc2268f 100644
--- a/usr/src/cmd/mdb/common/modules/genunix/devinfo.h
+++ b/usr/src/cmd/mdb/common/modules/genunix/devinfo.h
@@ -42,10 +42,12 @@ extern "C" {
#define DEVINFO_ALLBOLD 0x8
#define DEVINFO_SUMMARY 0x10
#define DEVINFO_HP_PHYSICAL 0x20
+#define DEVINFO_PIPE 0x40
typedef struct devinfo_cb_data {
uintptr_t di_base;
uint_t di_flags;
+ char *di_filter;
} devinfo_cb_data_t;
extern int devinfo_walk_init(mdb_walk_state_t *);
diff --git a/usr/src/cmd/mdb/common/modules/genunix/dnlc.c b/usr/src/cmd/mdb/common/modules/genunix/dnlc.c
new file mode 100644
index 0000000000..27a93c4ae7
--- /dev/null
+++ b/usr/src/cmd/mdb/common/modules/genunix/dnlc.c
@@ -0,0 +1,104 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include "dnlc.h"
+
+#include <mdb/mdb_modapi.h>
+#include <sys/dnlc.h>
+
+typedef struct dnlc_walk {
+ int dw_hashsz;
+ int dw_index;
+ uintptr_t dw_hash;
+ uintptr_t dw_head;
+} dnlc_walk_t;
+
+
+int
+dnlc_walk_init(mdb_walk_state_t *wsp)
+{
+ dnlc_walk_t *dwp;
+
+ if (wsp->walk_addr != NULL) {
+ mdb_warn("dnlc walk doesn't support global walks\n");
+ return (WALK_ERR);
+ }
+
+ dwp = mdb_zalloc(sizeof (dnlc_walk_t), UM_SLEEP);
+ if (mdb_readvar(&dwp->dw_hashsz, "nc_hashsz") == -1 ||
+ dwp->dw_hashsz <= 0) {
+ mdb_warn("failed to read 'nc_hashsz'\n");
+ mdb_free(dwp, sizeof (dnlc_walk_t));
+ return (WALK_ERR);
+ }
+ if (dwp->dw_hashsz <= 0) {
+ mdb_warn("invalid 'nc_hashsz' value\n");
+ mdb_free(dwp, sizeof (dnlc_walk_t));
+ return (WALK_ERR);
+ }
+ if (mdb_readvar(&dwp->dw_hash, "nc_hash") == -1) {
+ mdb_warn("failed to read 'nc_hash'\n");
+ mdb_free(dwp, sizeof (dnlc_walk_t));
+ return (WALK_ERR);
+ }
+
+ wsp->walk_data = dwp;
+ return (WALK_NEXT);
+}
+
+int
+dnlc_walk_step(mdb_walk_state_t *wsp)
+{
+ dnlc_walk_t *dwp = wsp->walk_data;
+ nc_hash_t hash;
+ uintptr_t result, addr = wsp->walk_addr;
+
+next:
+ while (addr == dwp->dw_head || addr == NULL) {
+ if (dwp->dw_index >= dwp->dw_hashsz) {
+ return (WALK_DONE);
+ }
+ dwp->dw_head = dwp->dw_hash +
+ (sizeof (nc_hash_t) * dwp->dw_index);
+ if (mdb_vread(&hash, sizeof (hash), dwp->dw_head) == -1) {
+ mdb_warn("failed to read nc_hash_t at %#lx",
+ dwp->dw_hash);
+ return (WALK_ERR);
+ }
+ dwp->dw_index++;
+ addr = (uintptr_t)hash.hash_next;
+ }
+
+ result = addr;
+ if (mdb_vread(&addr, sizeof (uintptr_t), addr) == -1) {
+ /*
+ * This entry may have become bogus since acquiring the address
+ * from its neighbor. Continue on if that is the case.
+ */
+ addr = NULL;
+ goto next;
+ }
+ wsp->walk_addr = addr;
+
+ return (wsp->walk_callback(result, &result, wsp->walk_cbdata));
+}
+
+void
+dnlc_walk_fini(mdb_walk_state_t *wsp)
+{
+ dnlc_walk_t *dwp = wsp->walk_data;
+
+ mdb_free(dwp, sizeof (dnlc_walk_t));
+}
diff --git a/usr/src/cmd/mdb/common/modules/genunix/dnlc.h b/usr/src/cmd/mdb/common/modules/genunix/dnlc.h
new file mode 100644
index 0000000000..a046d8b0b7
--- /dev/null
+++ b/usr/src/cmd/mdb/common/modules/genunix/dnlc.h
@@ -0,0 +1,33 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#ifndef _MDB_DNLC_H
+#define _MDB_DNLC_H
+
+#include <mdb/mdb_modapi.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int dnlc_walk_init(mdb_walk_state_t *);
+extern int dnlc_walk_step(mdb_walk_state_t *);
+extern void dnlc_walk_fini(mdb_walk_state_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _MDB_DNLC_H */
diff --git a/usr/src/cmd/mdb/common/modules/genunix/genunix.c b/usr/src/cmd/mdb/common/modules/genunix/genunix.c
index 4b24ee5a31..11a4233704 100644
--- a/usr/src/cmd/mdb/common/modules/genunix/genunix.c
+++ b/usr/src/cmd/mdb/common/modules/genunix/genunix.c
@@ -21,7 +21,7 @@
/*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 Joyent, Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
* Copyright (c) 2013 by Delphix. All rights reserved.
*/
@@ -74,6 +74,7 @@
#include "damap.h"
#include "ddi_periodic.h"
#include "devinfo.h"
+#include "dnlc.h"
#include "findstack.h"
#include "fm.h"
#include "gcore.h"
@@ -113,10 +114,6 @@
*/
#define NINTR 16
-#define KILOS 10
-#define MEGS 20
-#define GIGS 30
-
#ifndef STACK_BIAS
#define STACK_BIAS 0
#endif
@@ -169,6 +166,88 @@ ps_threadprint(uintptr_t addr, const void *data, void *private)
return (WALK_NEXT);
}
+static int
+pflags(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ proc_t pr;
+ struct pid pid;
+
+ static const mdb_bitmask_t p_flag_bits[] = {
+ { "SSYS", SSYS, SSYS },
+ { "SEXITING", SEXITING, SEXITING },
+ { "SITBUSY", SITBUSY, SITBUSY },
+ { "SFORKING", SFORKING, SFORKING },
+ { "SWATCHOK", SWATCHOK, SWATCHOK },
+ { "SKILLED", SKILLED, SKILLED },
+ { "SSCONT", SSCONT, SSCONT },
+ { "SZONETOP", SZONETOP, SZONETOP },
+ { "SEXTKILLED", SEXTKILLED, SEXTKILLED },
+ { "SUGID", SUGID, SUGID },
+ { "SEXECED", SEXECED, SEXECED },
+ { "SJCTL", SJCTL, SJCTL },
+ { "SNOWAIT", SNOWAIT, SNOWAIT },
+ { "SVFORK", SVFORK, SVFORK },
+ { "SVFWAIT", SVFWAIT, SVFWAIT },
+ { "SEXITLWPS", SEXITLWPS, SEXITLWPS },
+ { "SHOLDFORK", SHOLDFORK, SHOLDFORK },
+ { "SHOLDFORK1", SHOLDFORK1, SHOLDFORK1 },
+ { "SCOREDUMP", SCOREDUMP, SCOREDUMP },
+ { "SMSACCT", SMSACCT, SMSACCT },
+ { "SLWPWRAP", SLWPWRAP, SLWPWRAP },
+ { "SAUTOLPG", SAUTOLPG, SAUTOLPG },
+ { "SNOCD", SNOCD, SNOCD },
+ { "SHOLDWATCH", SHOLDWATCH, SHOLDWATCH },
+ { "SMSFORK", SMSFORK, SMSFORK },
+ { "SDOCORE", SDOCORE, SDOCORE },
+ { NULL, 0, 0 }
+ };
+
+ static const mdb_bitmask_t p_pidflag_bits[] = {
+ { "CLDPEND", CLDPEND, CLDPEND },
+ { "CLDCONT", CLDCONT, CLDCONT },
+ { "CLDNOSIGCHLD", CLDNOSIGCHLD, CLDNOSIGCHLD },
+ { "CLDWAITPID", CLDWAITPID, CLDWAITPID },
+ { NULL, 0, 0 }
+ };
+
+ static const mdb_bitmask_t p_proc_flag_bits[] = {
+ { "P_PR_TRACE", P_PR_TRACE, P_PR_TRACE },
+ { "P_PR_PTRACE", P_PR_PTRACE, P_PR_PTRACE },
+ { "P_PR_FORK", P_PR_FORK, P_PR_FORK },
+ { "P_PR_LOCK", P_PR_LOCK, P_PR_LOCK },
+ { "P_PR_ASYNC", P_PR_ASYNC, P_PR_ASYNC },
+ { "P_PR_EXEC", P_PR_EXEC, P_PR_EXEC },
+ { "P_PR_BPTADJ", P_PR_BPTADJ, P_PR_BPTADJ },
+ { "P_PR_RUNLCL", P_PR_RUNLCL, P_PR_RUNLCL },
+ { "P_PR_KILLCL", P_PR_KILLCL, P_PR_KILLCL },
+ { NULL, 0, 0 }
+ };
+
+ if (!(flags & DCMD_ADDRSPEC)) {
+ if (mdb_walk_dcmd("proc", "pflags", argc, argv) == -1) {
+ mdb_warn("can't walk 'proc'");
+ return (DCMD_ERR);
+ }
+ return (DCMD_OK);
+ }
+
+ if (mdb_vread(&pr, sizeof (pr), addr) == -1 ||
+ mdb_vread(&pid, sizeof (pid), (uintptr_t)pr.p_pidp) == -1) {
+ mdb_warn("cannot read proc_t or pid");
+ return (DCMD_ERR);
+ }
+
+ mdb_printf("%p [pid %d]:\n", addr, pid.pid_id);
+ mdb_printf("\tp_flag: %08x <%b>\n", pr.p_flag, pr.p_flag,
+ p_flag_bits);
+ mdb_printf("\tp_pidflag: %08x <%b>\n", pr.p_pidflag, pr.p_pidflag,
+ p_pidflag_bits);
+ mdb_printf("\tp_proc_flag: %08x <%b>\n", pr.p_proc_flag, pr.p_proc_flag,
+ p_proc_flag_bits);
+
+ return (DCMD_OK);
+}
+
int
ps(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
{
@@ -1904,24 +1983,24 @@ typedef struct datafmt {
} datafmt_t;
static datafmt_t kmemfmt[] = {
- { "cache ", "name ",
- "-------------------------", "%-25s " },
- { " buf", " size", "------", "%6u " },
- { " buf", "in use", "------", "%6u " },
- { " buf", " total", "------", "%6u " },
- { " memory", " in use", "----------", "%10lu%c " },
- { " alloc", " succeed", "---------", "%9u " },
- { "alloc", " fail", "-----", "%5u " },
+ { "cache ", "name ",
+ "------------------------------", "%-30s " },
+ { " buf", " size", "-----", "%5H " },
+ { " buf", " in use", "---------", "%9u " },
+ { " buf", " total", "---------", "%9u " },
+ { "memory", "in use", "------", "%6lH " },
+ { " alloc", " succeed", "----------", "%10u " },
+ { "alloc", " fail", "-----", "%5u" },
{ NULL, NULL, NULL, NULL }
};
static datafmt_t vmemfmt[] = {
- { "vmem ", "name ",
- "-------------------------", "%-*s " },
- { " memory", " in use", "----------", "%9llu%c " },
- { " memory", " total", "-----------", "%10llu%c " },
- { " memory", " import", "----------", "%9llu%c " },
- { " alloc", " succeed", "---------", "%9llu " },
+ { "vmem ", "name ",
+ "------------------------------", "%-*s " },
+ { " memory", " in use", "---------", "%9llH " },
+ { " memory", " total", "----------", "%10llH " },
+ { " memory", " import", "---------", "%9llH " },
+ { " alloc", " succeed", "----------", "%10llu " },
{ "alloc", " fail", "-----", "%5llu " },
{ NULL, NULL, NULL, NULL }
};
@@ -1973,15 +2052,9 @@ typedef struct kmastat_vmem {
int kv_fail;
} kmastat_vmem_t;
-typedef struct kmastat_args {
- kmastat_vmem_t **ka_kvpp;
- uint_t ka_shift;
-} kmastat_args_t;
-
static int
-kmastat_cache(uintptr_t addr, const kmem_cache_t *cp, kmastat_args_t *kap)
+kmastat_cache(uintptr_t addr, const kmem_cache_t *cp, kmastat_vmem_t **kvpp)
{
- kmastat_vmem_t **kvpp = kap->ka_kvpp;
kmastat_vmem_t *kv;
datafmt_t *dfp = kmemfmt;
int magsize;
@@ -2022,9 +2095,7 @@ out:
mdb_printf((dfp++)->fmt, cp->cache_bufsize);
mdb_printf((dfp++)->fmt, total - avail);
mdb_printf((dfp++)->fmt, total);
- mdb_printf((dfp++)->fmt, meminuse >> kap->ka_shift,
- kap->ka_shift == GIGS ? 'G' : kap->ka_shift == MEGS ? 'M' :
- kap->ka_shift == KILOS ? 'K' : 'B');
+ mdb_printf((dfp++)->fmt, meminuse);
mdb_printf((dfp++)->fmt, alloc);
mdb_printf((dfp++)->fmt, cp->cache_alloc_fail);
mdb_printf("\n");
@@ -2033,9 +2104,8 @@ out:
}
static int
-kmastat_vmem_totals(uintptr_t addr, const vmem_t *v, kmastat_args_t *kap)
+kmastat_vmem_totals(uintptr_t addr, const vmem_t *v, kmastat_vmem_t *kv)
{
- kmastat_vmem_t *kv = *kap->ka_kvpp;
size_t len;
while (kv != NULL && kv->kv_addr != addr)
@@ -2044,20 +2114,18 @@ kmastat_vmem_totals(uintptr_t addr, const vmem_t *v, kmastat_args_t *kap)
if (kv == NULL || kv->kv_alloc == 0)
return (WALK_NEXT);
- len = MIN(17, strlen(v->vm_name));
+ len = MIN(22, strlen(v->vm_name));
- mdb_printf("Total [%s]%*s %6s %6s %6s %10lu%c %9u %5u\n", v->vm_name,
- 17 - len, "", "", "", "",
- kv->kv_meminuse >> kap->ka_shift,
- kap->ka_shift == GIGS ? 'G' : kap->ka_shift == MEGS ? 'M' :
- kap->ka_shift == KILOS ? 'K' : 'B', kv->kv_alloc, kv->kv_fail);
+ mdb_printf("Total [%s]%*s %5s %9s %9s %6lH %10u %5u\n", v->vm_name,
+ 22 - len, "", "", "", "",
+ kv->kv_meminuse, kv->kv_alloc, kv->kv_fail);
return (WALK_NEXT);
}
/*ARGSUSED*/
static int
-kmastat_vmem(uintptr_t addr, const vmem_t *v, const uint_t *shiftp)
+kmastat_vmem(uintptr_t addr, const vmem_t *v, const void *ignored)
{
datafmt_t *dfp = vmemfmt;
const vmem_kstat_t *vkp = &v->vm_kstat;
@@ -2075,16 +2143,10 @@ kmastat_vmem(uintptr_t addr, const vmem_t *v, const uint_t *shiftp)
}
mdb_printf("%*s", ident, "");
- mdb_printf((dfp++)->fmt, 25 - ident, v->vm_name);
- mdb_printf((dfp++)->fmt, vkp->vk_mem_inuse.value.ui64 >> *shiftp,
- *shiftp == GIGS ? 'G' : *shiftp == MEGS ? 'M' :
- *shiftp == KILOS ? 'K' : 'B');
- mdb_printf((dfp++)->fmt, vkp->vk_mem_total.value.ui64 >> *shiftp,
- *shiftp == GIGS ? 'G' : *shiftp == MEGS ? 'M' :
- *shiftp == KILOS ? 'K' : 'B');
- mdb_printf((dfp++)->fmt, vkp->vk_mem_import.value.ui64 >> *shiftp,
- *shiftp == GIGS ? 'G' : *shiftp == MEGS ? 'M' :
- *shiftp == KILOS ? 'K' : 'B');
+ mdb_printf((dfp++)->fmt, 30 - ident, v->vm_name);
+ mdb_printf((dfp++)->fmt, vkp->vk_mem_inuse.value.ui64);
+ mdb_printf((dfp++)->fmt, vkp->vk_mem_total.value.ui64);
+ mdb_printf((dfp++)->fmt, vkp->vk_mem_import.value.ui64);
mdb_printf((dfp++)->fmt, vkp->vk_alloc.value.ui64);
mdb_printf((dfp++)->fmt, vkp->vk_fail.value.ui64);
@@ -2099,44 +2161,35 @@ kmastat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
{
kmastat_vmem_t *kv = NULL;
datafmt_t *dfp;
- kmastat_args_t ka;
-
- ka.ka_shift = 0;
- if (mdb_getopts(argc, argv,
- 'k', MDB_OPT_SETBITS, KILOS, &ka.ka_shift,
- 'm', MDB_OPT_SETBITS, MEGS, &ka.ka_shift,
- 'g', MDB_OPT_SETBITS, GIGS, &ka.ka_shift, NULL) != argc)
- return (DCMD_USAGE);
for (dfp = kmemfmt; dfp->hdr1 != NULL; dfp++)
- mdb_printf("%s ", dfp->hdr1);
+ mdb_printf("%s%s", dfp == kmemfmt ? "" : " ", dfp->hdr1);
mdb_printf("\n");
for (dfp = kmemfmt; dfp->hdr1 != NULL; dfp++)
- mdb_printf("%s ", dfp->hdr2);
+ mdb_printf("%s%s", dfp == kmemfmt ? "" : " ", dfp->hdr2);
mdb_printf("\n");
for (dfp = kmemfmt; dfp->hdr1 != NULL; dfp++)
- mdb_printf("%s ", dfp->dashes);
+ mdb_printf("%s%s", dfp == kmemfmt ? "" : " ", dfp->dashes);
mdb_printf("\n");
- ka.ka_kvpp = &kv;
- if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmastat_cache, &ka) == -1) {
+ if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmastat_cache, &kv) == -1) {
mdb_warn("can't walk 'kmem_cache'");
return (DCMD_ERR);
}
for (dfp = kmemfmt; dfp->hdr1 != NULL; dfp++)
- mdb_printf("%s ", dfp->dashes);
+ mdb_printf("%s%s", dfp == kmemfmt ? "" : " ", dfp->dashes);
mdb_printf("\n");
- if (mdb_walk("vmem", (mdb_walk_cb_t)kmastat_vmem_totals, &ka) == -1) {
+ if (mdb_walk("vmem", (mdb_walk_cb_t)kmastat_vmem_totals, kv) == -1) {
mdb_warn("can't walk 'vmem'");
return (DCMD_ERR);
}
for (dfp = kmemfmt; dfp->hdr1 != NULL; dfp++)
- mdb_printf("%s ", dfp->dashes);
+ mdb_printf("%s%s", dfp == kmemfmt ? "" : " ", dfp->dashes);
mdb_printf("\n");
mdb_printf("\n");
@@ -2153,7 +2206,7 @@ kmastat(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
mdb_printf("%s ", dfp->dashes);
mdb_printf("\n");
- if (mdb_walk("vmem", (mdb_walk_cb_t)kmastat_vmem, &ka.ka_shift) == -1) {
+ if (mdb_walk("vmem", (mdb_walk_cb_t)kmastat_vmem, NULL) == -1) {
mdb_warn("can't walk 'vmem'");
return (DCMD_ERR);
}
@@ -3867,6 +3920,7 @@ static const mdb_dcmd_t dcmds[] = {
{ "pid2proc", "?", "convert PID to proc_t address", pid2proc },
{ "project", NULL, "display kernel project(s)", project },
{ "ps", "[-fltzTP]", "list processes (and associated thr,lwp)", ps },
+ { "pflags", NULL, "display various proc_t flags", pflags },
{ "pgrep", "[-x] [-n | -o] pattern",
"pattern match against all processes", pgrep },
{ "ptree", NULL, "print process tree", ptree },
@@ -3947,7 +4001,8 @@ static const mdb_dcmd_t dcmds[] = {
modctl2devinfo },
{ "name2major", "<dev-name>", "convert dev name to major number",
name2major },
- { "prtconf", "?[-vpc]", "print devinfo tree", prtconf, prtconf_help },
+ { "prtconf", "?[-vpc] [-d driver]", "print devinfo tree", prtconf,
+ prtconf_help },
{ "softstate", ":<instance>", "retrieve soft-state pointer",
softstate },
{ "devinfo_fm", ":", "devinfo fault managment configuration",
@@ -3993,8 +4048,7 @@ static const mdb_dcmd_t dcmds[] = {
{ "freedby", ":", "given a thread, print its freed buffers", freedby },
{ "kmalog", "?[ fail | slab ]",
"display kmem transaction log and stack traces", kmalog },
- { "kmastat", "[-kmg]", "kernel memory allocator stats",
- kmastat },
+ { "kmastat", NULL, "kernel memory allocator stats", kmastat },
{ "kmausers", "?[-ef] [cache ...]", "current medium and large users "
"of the kmem allocator", kmausers, kmausers_help },
{ "kmem_cache", "?[-n name]",
@@ -4191,6 +4245,8 @@ static const mdb_dcmd_t dcmds[] = {
/* from zone.c */
{ "zid2zone", ":", "find the zone_t with the given zone id",
zid2zone },
+ { "zdid2zone", ":", "find the zone_t with the given zone debug id",
+ zdid2zone },
{ "zone", "?[-r [-v]]", "display kernel zone(s)", zoneprt },
{ "zsd", ":[-v] [zsd_key]", "display zone-specific-data entries for "
"selected zones", zsd },
@@ -4217,6 +4273,8 @@ static const mdb_walker_t walkers[] = {
{ "callout_table", "walk callout table array", callout_table_walk_init,
callout_table_walk_step, callout_table_walk_fini },
{ "cpu", "walk cpu structures", cpu_walk_init, cpu_walk_step },
+ { "dnlc", "walk dnlc entries",
+ dnlc_walk_init, dnlc_walk_step, dnlc_walk_fini },
{ "ereportq_dump", "walk list of ereports in dump error queue",
ereportq_dump_walk_init, ereportq_dump_walk_step, NULL },
{ "ereportq_pend", "walk list of ereports in pending error queue",
diff --git a/usr/src/cmd/mdb/common/modules/genunix/hotplug.c b/usr/src/cmd/mdb/common/modules/genunix/hotplug.c
index 12b70b3669..a6a21d4f82 100644
--- a/usr/src/cmd/mdb/common/modules/genunix/hotplug.c
+++ b/usr/src/cmd/mdb/common/modules/genunix/hotplug.c
@@ -114,6 +114,7 @@ hotplug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
int status;
data.di_flags = 0;
+ data.di_filter = NULL;
if (mdb_getopts(argc, argv,
'p', MDB_OPT_SETBITS, DEVINFO_HP_PHYSICAL, &data.di_flags, NULL)
!= argc)
diff --git a/usr/src/cmd/mdb/common/modules/genunix/kmem.c b/usr/src/cmd/mdb/common/modules/genunix/kmem.c
index f1693d7bf6..0df368d70c 100644
--- a/usr/src/cmd/mdb/common/modules/genunix/kmem.c
+++ b/usr/src/cmd/mdb/common/modules/genunix/kmem.c
@@ -24,7 +24,7 @@
*/
/*
- * Copyright 2011 Joyent, Inc. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
@@ -3896,6 +3896,8 @@ kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
logname = "kmem_failure_log";
else if (strcmp(argv->a_un.a_str, "slab") == 0)
logname = "kmem_slab_log";
+ else if (strcmp(argv->a_un.a_str, "zerosized") == 0)
+ logname = "kmem_zerosized_log";
else
return (DCMD_USAGE);
}
diff --git a/usr/src/cmd/mdb/common/modules/genunix/zone.c b/usr/src/cmd/mdb/common/modules/genunix/zone.c
index a332cdfc01..77ed2cbc48 100644
--- a/usr/src/cmd/mdb/common/modules/genunix/zone.c
+++ b/usr/src/cmd/mdb/common/modules/genunix/zone.c
@@ -34,9 +34,9 @@
#define ZONE_NAMELEN 20
#ifdef _LP64
-#define ZONE_PATHLEN 32
+#define ZONE_PATHLEN 25
#else
-#define ZONE_PATHLEN 40
+#define ZONE_PATHLEN 33
#endif
/*
@@ -80,6 +80,31 @@ zid2zone(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
return (DCMD_OK);
}
+static int
+zdid_lookup_cb(uintptr_t addr, const zone_t *zone, void *arg)
+{
+ zoneid_t zdid = *(uintptr_t *)arg;
+ if (zone->zone_did == zdid)
+ mdb_printf("%p\n", addr);
+
+ return (WALK_NEXT);
+}
+
+/*ARGSUSED*/
+int
+zdid2zone(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ if (!(flags & DCMD_ADDRSPEC) || argc != 0)
+ return (DCMD_USAGE);
+
+ if (mdb_walk("zone", (mdb_walk_cb_t)zdid_lookup_cb, &addr) == -1) {
+ mdb_warn("failed to walk zone");
+ return (DCMD_ERR);
+ }
+
+ return (DCMD_OK);
+}
+
int
zoneprt(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
{
@@ -122,10 +147,10 @@ zoneprt(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
*/
if (DCMD_HDRSPEC(flags)) {
if (ropt_given == FALSE)
- mdb_printf("%<u>%?s %6s %-13s %-20s %-s%</u>\n",
+ mdb_printf("%<u>%?s %4s %-13s %-19s %-s%</u>\n",
"ADDR", "ID", "STATUS", "NAME", "PATH");
else
- mdb_printf("%<u>%?s %6s %10s %10s %-20s%</u>\n",
+ mdb_printf("%<u>%?s %6s %10s %10s %-19s%</u>\n",
"ADDR", "ID", "REFS", "CREFS", "NAME");
}
@@ -164,7 +189,7 @@ zoneprt(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
statusp = zone_status_names[zn.zone_status];
else
statusp = "???";
- mdb_printf("%0?p %6d %-13s %-20s %s\n", addr, zn.zone_id,
+ mdb_printf("%0?p %4d %-13s %-19s %s\n", addr, zn.zone_id,
statusp, name, path);
} else {
/*
@@ -172,7 +197,7 @@ zoneprt(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
* Display the zone's subsystem-specific reference counts if
* the user specified the '-v' option.
*/
- mdb_printf("%0?p %6d %10u %10u %-20s\n", addr, zn.zone_id,
+ mdb_printf("%0?p %6d %10u %10u %-19s\n", addr, zn.zone_id,
zn.zone_ref, zn.zone_cred_ref, name);
if (vopt_given == TRUE) {
GElf_Sym subsys_names_sym;
@@ -410,7 +435,7 @@ zsd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
* Prepare to output the specified zone's ZSD information.
*/
if (DCMD_HDRSPEC(flags))
- mdb_printf("%<u>%-20s %?s %?s %8s%</u>\n", "ZONE", "KEY",
+ mdb_printf("%<u>%-19s %?s %?s %8s%</u>\n", "ZONE", "KEY",
"VALUE", "FLAGS");
len = mdb_readstr(name, ZONE_NAMELEN, (uintptr_t)zone.zone_name);
if (len > 0) {
@@ -419,7 +444,7 @@ zsd(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
} else {
(void) strcpy(name, "??");
}
- mdb_printf("%-20s ", name);
+ mdb_printf("%-19s ", name);
/*
* Display the requested ZSD entries.
diff --git a/usr/src/cmd/mdb/common/modules/genunix/zone.h b/usr/src/cmd/mdb/common/modules/genunix/zone.h
index 0881f9bbae..94a383e41c 100644
--- a/usr/src/cmd/mdb/common/modules/genunix/zone.h
+++ b/usr/src/cmd/mdb/common/modules/genunix/zone.h
@@ -34,6 +34,7 @@ extern "C" {
#endif
extern int zid2zone(uintptr_t, uint_t, int argc, const mdb_arg_t *);
+extern int zdid2zone(uintptr_t, uint_t, int argc, const mdb_arg_t *);
extern int zoneprt(uintptr_t, uint_t, int argc, const mdb_arg_t *);
extern int zone_walk_init(mdb_walk_state_t *);
diff --git a/usr/src/cmd/mdb/common/modules/i40e/i40e.c b/usr/src/cmd/mdb/common/modules/i40e/i40e.c
new file mode 100644
index 0000000000..6d1f900b43
--- /dev/null
+++ b/usr/src/cmd/mdb/common/modules/i40e/i40e.c
@@ -0,0 +1,114 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <sys/mdb_modapi.h>
+#include "i40e_sw.h"
+
+#define RSRC_MAX 0x13
+static const char *i40e_switch_rsrc_names[] = {
+ "VEBs",
+ "VSIs",
+ "Perfect Match MAC Addresses",
+ "S-Tags",
+ "Reserved",
+ "Multicast Hash Entries",
+ "Reserved",
+ "VLANs",
+ "VSI Lists",
+ "Reserved",
+ "VLAN Stat pools",
+ "Mirror rules",
+ "Queue sets",
+ "Inner VLAN Forwarding",
+ "Reserved",
+ "Inner MACs",
+ "IPs",
+ "GRE/VN1 Keys",
+ "VN2 Keys",
+ "Tunnelling Ports"
+};
+
+/*
+ * i40e mdb dcmds
+ */
+/* ARGSUSED */
+static int
+i40e_switch_rsrcs_dcmd(uintptr_t addr, uint_t flags, int argc,
+ const mdb_arg_t *argv)
+{
+ i40e_t i40e;
+ int i;
+
+ if (!(flags & DCMD_ADDRSPEC)) {
+ mdb_warn("::i40e_switch_rsrcs does not operate globally\n");
+ return (DCMD_USAGE);
+ }
+
+ if (mdb_vread(&i40e, sizeof (i40e_t), addr) != sizeof (i40e_t)) {
+ mdb_warn("failed to read i40e_t at %p", addr);
+ return (DCMD_ERR);
+ }
+
+ mdb_printf("%-28s %-12s %-8s %-8s %s\n", "TYPE", "GUARANTEE",
+ "TOTAL", "USED", "UNALLOCED");
+
+ for (i = 0; i < i40e.i40e_switch_rsrc_actual; i++) {
+ i40e_switch_rsrc_t rsrc;
+ uintptr_t raddr = (uintptr_t)i40e.i40e_switch_rsrcs +
+ i * sizeof (i40e_switch_rsrc_t);
+ const char *name;
+
+ if (mdb_vread(&rsrc, sizeof (i40e_switch_rsrc_t), raddr) !=
+ sizeof (i40e_switch_rsrc_t)) {
+ mdb_warn("failed to read i40e_switch_rsrc_t %d at %p",
+ i, raddr);
+ return (DCMD_ERR);
+ }
+
+ if (rsrc.resource_type <= RSRC_MAX) {
+ name = i40e_switch_rsrc_names[rsrc.resource_type];
+ } else {
+ char *buf;
+ size_t s = mdb_snprintf(NULL, 0, "Unknown type (%d)",
+ rsrc.resource_type);
+ buf = mdb_alloc(s + 1, UM_GC | UM_SLEEP);
+ (void) mdb_snprintf(buf, s + 1, "Unknown type (%d)",
+ rsrc.resource_type);
+ name = buf;
+ }
+
+ mdb_printf("%-28s %-12d %-8d %-8d %d\n", name,
+ LE_16(rsrc.guaranteed), LE_16(rsrc.total), LE_16(rsrc.used),
+ LE_16(rsrc.total_unalloced));
+ }
+
+ return (DCMD_OK);
+}
+
+static const mdb_dcmd_t i40e_dcmds[] = {
+ { "i40e_switch_rsrcs", NULL, "print switch resources",
+ i40e_switch_rsrcs_dcmd, NULL },
+ { NULL }
+};
+
+static const mdb_modinfo_t i40e_modinfo = {
+ MDB_API_VERSION, i40e_dcmds, NULL
+};
+
+const mdb_modinfo_t *
+_mdb_init(void)
+{
+ return (&i40e_modinfo);
+}
diff --git a/usr/src/cmd/mdb/common/modules/ipc/ipc.c b/usr/src/cmd/mdb/common/modules/ipc/ipc.c
index 5871e45dd9..7054f59477 100644
--- a/usr/src/cmd/mdb/common/modules/ipc/ipc.c
+++ b/usr/src/cmd/mdb/common/modules/ipc/ipc.c
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
*/
#include <mdb/mdb_modapi.h>
@@ -229,7 +230,9 @@ shm_print(kshmid_t *shmid, uintptr_t addr)
printtime_nice("ctime: ", shmid->shm_ctime);
mdb_printf("sptinfo: %-?p sptseg: %-?p\n",
shmid->shm_sptinfo, shmid->shm_sptseg);
- mdb_printf("sptprot: <%lb>\n", shmid->shm_sptprot, prot_flag_bits);
+ mdb_printf("opts: rmpend: %d prot: <%b>\n",
+ ((shmid->shm_opts & SHM_RM_PENDING) != 0),
+ (shmid->shm_opts & SHM_PROT_MASK), prot_flag_bits);
}
diff --git a/usr/src/cmd/mdb/common/modules/libc/libc.c b/usr/src/cmd/mdb/common/modules/libc/libc.c
index a10e7f5ef8..ad4c5c82ef 100644
--- a/usr/src/cmd/mdb/common/modules/libc/libc.c
+++ b/usr/src/cmd/mdb/common/modules/libc/libc.c
@@ -135,6 +135,8 @@ d_ucontext(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
uc.uc_stack.ss_sp, uc.uc_stack.ss_size, stack_flags(&uc.uc_stack));
mdb_printf(" mcontext = 0x%p\n",
addr + OFFSETOF(ucontext_t, uc_mcontext));
+ mdb_printf(" brand = 0x%p 0x%p 0x%p\n",
+ uc.uc_brand_data[0], uc.uc_brand_data[1], uc.uc_brand_data[2]);
return (DCMD_OK);
}
@@ -846,14 +848,19 @@ d_uberdata(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
prt_addr(uberdata.all_lwps, 1),
prt_addr(uberdata.all_zombies, 0));
- HD("nthreads nzombies ndaemons pid sigacthandler");
- mdb_printf(OFFSTR "%-10d %-10d %-10d %-10d %s\n",
+ HD("nthreads nzombies ndaemons pid");
+ mdb_printf(OFFSTR "%-10d %-10d %-10d %-10d\n",
OFFSET(nthreads),
uberdata.nthreads,
uberdata.nzombies,
uberdata.ndaemons,
- (int)uberdata.pid,
- prt_addr((void *)uberdata.sigacthandler, 0));
+ (int)uberdata.pid);
+
+ HD("sigacthandler setctxt");
+ mdb_printf(OFFSTR "%s %s\n",
+ OFFSET(sigacthandler),
+ prt_addr((void *)uberdata.sigacthandler, 1),
+ prt_addr((void *)uberdata.setctxt, 1));
HD("lwp_stacks lwp_laststack nfreestack stk_cache");
mdb_printf(OFFSTR "%s %s %-10d %d\n",
@@ -876,12 +883,17 @@ d_uberdata(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
prt_addr(uberdata.ulwp_replace_last, 1),
prt_addr(uberdata.atforklist, 0));
- HD("robustlocks robustlist progname");
- mdb_printf(OFFSTR "%s %s %s\n",
+ HD("robustlocks robustlist");
+ mdb_printf(OFFSTR "%s %s\n",
OFFSET(robustlocks),
prt_addr(uberdata.robustlocks, 1),
- prt_addr(uberdata.robustlist, 1),
- prt_addr(uberdata.progname, 0));
+ prt_addr(uberdata.robustlist, 1));
+
+ HD("progname ub_broot");
+ mdb_printf(OFFSTR "%s %s\n",
+ OFFSET(progname),
+ prt_addr(uberdata.progname, 1),
+ prt_addr(uberdata.ub_broot, 1));
HD("tdb_bootstrap tdb_sync_addr_hash tdb_'count tdb_'fail");
mdb_printf(OFFSTR "%s %s %-10d %d\n",
diff --git a/usr/src/cmd/mdb/common/modules/mpt_sas/mpt_sas.c b/usr/src/cmd/mdb/common/modules/mpt_sas/mpt_sas.c
index c39583b1f5..6556fc9cd5 100644
--- a/usr/src/cmd/mdb/common/modules/mpt_sas/mpt_sas.c
+++ b/usr/src/cmd/mdb/common/modules/mpt_sas/mpt_sas.c
@@ -24,7 +24,7 @@
*/
/*
- * Copyright 2014 Joyent, Inc. All rights reserved.
+ * Copyright 2015 Joyent, Inc. All rights reserved.
* Copyright (c) 2014, Tegile Systems Inc. All rights reserved.
*/
@@ -34,6 +34,7 @@
#include <sys/sunmdi.h>
#include <sys/list.h>
#include <sys/scsi/scsi.h>
+#include <sys/refhash.h>
#pragma pack(1)
#include <sys/scsi/adapters/mpt_sas/mpi/mpi2_type.h>
@@ -47,7 +48,6 @@
#pragma pack()
#include <sys/scsi/adapters/mpt_sas/mptsas_var.h>
-#include <sys/scsi/adapters/mpt_sas/mptsas_hash.h>
struct {
int value;
diff --git a/usr/src/cmd/mdb/common/modules/v8/mdb_v8.c b/usr/src/cmd/mdb/common/modules/v8/mdb_v8.c
new file mode 100644
index 0000000000..6609097742
--- /dev/null
+++ b/usr/src/cmd/mdb/common/modules/v8/mdb_v8.c
@@ -0,0 +1,5709 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * mdb(1M) module for debugging the V8 JavaScript engine. This implementation
+ * makes heavy use of metadata defined in the V8 binary for inspecting in-memory
+ * structures. Canned configurations can be manually loaded for V8 binaries
+ * that predate this metadata. See mdb_v8_cfg.c for details.
+ *
+ * NOTE: This dmod implementation (including this file and related headers and C
+ * files) exist in both the Node and illumos source trees. THESE SHOULD BE KEPT
+ * IN SYNC. The version in the Node tree is built directly into modern Node
+ * binaries as part of the build process, and the version in the illumos source
+ * tree is delivered with the OS for debugging Node binaries that predate
+ * support for including the dmod directly in the binary. Note too that these
+ * files have different licenses to match their corresponding repositories.
+ */
+
+/*
+ * We hard-code our MDB_API_VERSION to be 3 to allow this module to be
+ * compiled on systems with higher version numbers, but still allow the
+ * resulting binary object to be used on older systems. (We do not make use
+ * of functionality present in versions later than 3.) This is particularly
+ * important for mdb_v8 because (1) it's used in particular to debug
+ * application-level software and (2) it has a history of rapid evolution.
+ */
+#define MDB_API_VERSION 3
+
+#include <sys/mdb_modapi.h>
+#include <assert.h>
+#include <ctype.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <libproc.h>
+#include <sys/avl.h>
+#include <alloca.h>
+
+#include "v8dbg.h"
+#include "v8cfg.h"
+
+#define offsetof(s, m) ((size_t)(&(((s *)0)->m)))
+
+/*
+ * The "v8_class" and "v8_field" structures describe the C++ classes used to
+ * represent V8 heap objects.
+ */
+typedef struct v8_class {
+ struct v8_class *v8c_next; /* list linkage */
+ struct v8_class *v8c_parent; /* parent class (inheritance) */
+ struct v8_field *v8c_fields; /* array of class fields */
+ size_t v8c_start; /* offset of first class field */
+ size_t v8c_end; /* offset of first subclass field */
+ char v8c_name[64]; /* heap object class name */
+} v8_class_t;
+
+typedef struct v8_field {
+ struct v8_field *v8f_next; /* list linkage */
+ ssize_t v8f_offset; /* field offset */
+ char v8f_name[64]; /* field name */
+ boolean_t v8f_isbyte; /* 1-byte int field */
+ boolean_t v8f_isstr; /* NUL-terminated string */
+} v8_field_t;
+
+/*
+ * Similarly, the "v8_enum" structure describes an enum from V8.
+ */
+typedef struct {
+ char v8e_name[64];
+ uint_t v8e_value;
+} v8_enum_t;
+
+/*
+ * During configuration, the dmod updates these globals with the actual set of
+ * classes, types, and frame types based on the debug metadata.
+ */
+static v8_class_t *v8_classes;
+
+static v8_enum_t v8_types[128];
+static int v8_next_type;
+
+static v8_enum_t v8_frametypes[16];
+static int v8_next_frametype;
+
+static int v8_warnings;
+static int v8_silent;
+
+/*
+ * The following constants describe offsets from the frame pointer that are used
+ * to inspect each stack frame. They're initialized from the debug metadata.
+ */
+static ssize_t V8_OFF_FP_CONTEXT;
+static ssize_t V8_OFF_FP_MARKER;
+static ssize_t V8_OFF_FP_FUNCTION;
+static ssize_t V8_OFF_FP_ARGS;
+
+/*
+ * The following constants are used by macros defined in heap-dbg-common.h to
+ * examine the types of various V8 heap objects. In general, the macros should
+ * be preferred to using the constants directly. The values of these constants
+ * are initialized from the debug metadata.
+ */
+static intptr_t V8_FirstNonstringType;
+static intptr_t V8_IsNotStringMask;
+static intptr_t V8_StringTag;
+static intptr_t V8_NotStringTag;
+static intptr_t V8_StringEncodingMask;
+static intptr_t V8_TwoByteStringTag;
+static intptr_t V8_AsciiStringTag;
+static intptr_t V8_StringRepresentationMask;
+static intptr_t V8_SeqStringTag;
+static intptr_t V8_ConsStringTag;
+static intptr_t V8_SlicedStringTag;
+static intptr_t V8_ExternalStringTag;
+static intptr_t V8_FailureTag;
+static intptr_t V8_FailureTagMask;
+static intptr_t V8_HeapObjectTag;
+static intptr_t V8_HeapObjectTagMask;
+static intptr_t V8_SmiTag;
+static intptr_t V8_SmiTagMask;
+static intptr_t V8_SmiValueShift;
+static intptr_t V8_SmiShiftSize;
+static intptr_t V8_PointerSizeLog2;
+
+static intptr_t V8_ISSHARED_SHIFT;
+static intptr_t V8_DICT_SHIFT;
+static intptr_t V8_DICT_PREFIX_SIZE;
+static intptr_t V8_DICT_ENTRY_SIZE;
+static intptr_t V8_DICT_START_INDEX;
+static intptr_t V8_FIELDINDEX_MASK;
+static intptr_t V8_FIELDINDEX_SHIFT;
+static intptr_t V8_PROP_IDX_CONTENT;
+static intptr_t V8_PROP_IDX_FIRST;
+static intptr_t V8_PROP_TYPE_FIELD;
+static intptr_t V8_PROP_TYPE_MASK;
+static intptr_t V8_PROP_DESC_KEY;
+static intptr_t V8_PROP_DESC_DETAILS;
+static intptr_t V8_PROP_DESC_VALUE;
+static intptr_t V8_PROP_DESC_SIZE;
+static intptr_t V8_TRANSITIONS_IDX_DESC;
+
+static intptr_t V8_TYPE_JSOBJECT = -1;
+static intptr_t V8_TYPE_JSARRAY = -1;
+static intptr_t V8_TYPE_JSFUNCTION = -1;
+static intptr_t V8_TYPE_FIXEDARRAY = -1;
+
+static intptr_t V8_ELEMENTS_KIND_SHIFT;
+static intptr_t V8_ELEMENTS_KIND_BITCOUNT;
+static intptr_t V8_ELEMENTS_FAST_ELEMENTS;
+static intptr_t V8_ELEMENTS_FAST_HOLEY_ELEMENTS;
+static intptr_t V8_ELEMENTS_DICTIONARY_ELEMENTS;
+
+/*
+ * Although we have this information in v8_classes, the following offsets are
+ * defined explicitly because they're used directly in code below.
+ */
+static ssize_t V8_OFF_CODE_INSTRUCTION_SIZE;
+static ssize_t V8_OFF_CODE_INSTRUCTION_START;
+static ssize_t V8_OFF_CONSSTRING_FIRST;
+static ssize_t V8_OFF_CONSSTRING_SECOND;
+static ssize_t V8_OFF_EXTERNALSTRING_RESOURCE;
+static ssize_t V8_OFF_FIXEDARRAY_DATA;
+static ssize_t V8_OFF_FIXEDARRAY_LENGTH;
+static ssize_t V8_OFF_HEAPNUMBER_VALUE;
+static ssize_t V8_OFF_HEAPOBJECT_MAP;
+static ssize_t V8_OFF_JSARRAY_LENGTH;
+static ssize_t V8_OFF_JSDATE_VALUE;
+static ssize_t V8_OFF_JSFUNCTION_SHARED;
+static ssize_t V8_OFF_JSOBJECT_ELEMENTS;
+static ssize_t V8_OFF_JSOBJECT_PROPERTIES;
+static ssize_t V8_OFF_MAP_CONSTRUCTOR;
+static ssize_t V8_OFF_MAP_INOBJECT_PROPERTIES;
+static ssize_t V8_OFF_MAP_INSTANCE_ATTRIBUTES;
+static ssize_t V8_OFF_MAP_INSTANCE_DESCRIPTORS;
+static ssize_t V8_OFF_MAP_INSTANCE_SIZE;
+static ssize_t V8_OFF_MAP_BIT_FIELD;
+static ssize_t V8_OFF_MAP_BIT_FIELD2;
+static ssize_t V8_OFF_MAP_BIT_FIELD3;
+static ssize_t V8_OFF_MAP_TRANSITIONS;
+static ssize_t V8_OFF_ODDBALL_TO_STRING;
+static ssize_t V8_OFF_SCRIPT_LINE_ENDS;
+static ssize_t V8_OFF_SCRIPT_NAME;
+static ssize_t V8_OFF_SCRIPT_SOURCE;
+static ssize_t V8_OFF_SEQASCIISTR_CHARS;
+static ssize_t V8_OFF_SEQONEBYTESTR_CHARS;
+static ssize_t V8_OFF_SEQTWOBYTESTR_CHARS;
+static ssize_t V8_OFF_SHAREDFUNCTIONINFO_CODE;
+static ssize_t V8_OFF_SHAREDFUNCTIONINFO_END_POSITION;
+static ssize_t V8_OFF_SHAREDFUNCTIONINFO_FUNCTION_TOKEN_POSITION;
+static ssize_t V8_OFF_SHAREDFUNCTIONINFO_INFERRED_NAME;
+static ssize_t V8_OFF_SHAREDFUNCTIONINFO_LENGTH;
+static ssize_t V8_OFF_SHAREDFUNCTIONINFO_SCRIPT;
+static ssize_t V8_OFF_SHAREDFUNCTIONINFO_NAME;
+static ssize_t V8_OFF_SLICEDSTRING_PARENT;
+static ssize_t V8_OFF_SLICEDSTRING_OFFSET;
+static ssize_t V8_OFF_STRING_LENGTH;
+
+/* see node_string.h */
+#define NODE_OFF_EXTSTR_DATA sizeof (uintptr_t)
+
+#define V8_CONSTANT_OPTIONAL 1
+#define V8_CONSTANT_HASFALLBACK 2
+#define V8_CONSTANT_REMOVED 4
+
+#define V8_CONSTANT_MAJORSHIFT 3
+#define V8_CONSTANT_MAJORMASK ((1 << 4) - 1)
+#define V8_CONSTANT_MAJOR(flags) \
+ (((flags) >> V8_CONSTANT_MAJORSHIFT) & V8_CONSTANT_MAJORMASK)
+
+#define V8_CONSTANT_MINORSHIFT 7
+#define V8_CONSTANT_MINORMASK ((1 << 9) - 1)
+#define V8_CONSTANT_MINOR(flags) \
+ (((flags) >> V8_CONSTANT_MINORSHIFT) & V8_CONSTANT_MINORMASK)
+
+#define V8_CONSTANT_FALLBACK(maj, min) \
+ (V8_CONSTANT_OPTIONAL | V8_CONSTANT_HASFALLBACK | \
+ ((maj) << V8_CONSTANT_MAJORSHIFT) | ((min) << V8_CONSTANT_MINORSHIFT))
+
+#define V8_CONSTANT_REMOVED_SINCE(maj, min) \
+ (V8_CONSTANT_REMOVED | \
+ ((maj) << V8_CONSTANT_MAJORSHIFT) | ((min) << V8_CONSTANT_MINORSHIFT))
+
+/*
+ * Table of constants used directly by this file.
+ */
+typedef struct v8_constant {
+ intptr_t *v8c_valp;
+ const char *v8c_symbol;
+ uint32_t v8c_flags;
+ intptr_t v8c_fallback;
+} v8_constant_t;
+
+static v8_constant_t v8_constants[] = {
+ { &V8_OFF_FP_CONTEXT, "v8dbg_off_fp_context" },
+ { &V8_OFF_FP_FUNCTION, "v8dbg_off_fp_function" },
+ { &V8_OFF_FP_MARKER, "v8dbg_off_fp_marker" },
+ { &V8_OFF_FP_ARGS, "v8dbg_off_fp_args" },
+
+ { &V8_FirstNonstringType, "v8dbg_FirstNonstringType" },
+ { &V8_IsNotStringMask, "v8dbg_IsNotStringMask" },
+ { &V8_StringTag, "v8dbg_StringTag" },
+ { &V8_NotStringTag, "v8dbg_NotStringTag" },
+ { &V8_StringEncodingMask, "v8dbg_StringEncodingMask" },
+ { &V8_TwoByteStringTag, "v8dbg_TwoByteStringTag" },
+ { &V8_AsciiStringTag, "v8dbg_AsciiStringTag" },
+ { &V8_StringRepresentationMask, "v8dbg_StringRepresentationMask" },
+ { &V8_SeqStringTag, "v8dbg_SeqStringTag" },
+ { &V8_ConsStringTag, "v8dbg_ConsStringTag" },
+ { &V8_SlicedStringTag, "v8dbg_SlicedStringTag",
+ V8_CONSTANT_FALLBACK(0, 0), 0x3 },
+ { &V8_ExternalStringTag, "v8dbg_ExternalStringTag" },
+ { &V8_FailureTag, "v8dbg_FailureTag",
+ V8_CONSTANT_REMOVED_SINCE(3, 28) },
+ { &V8_FailureTagMask, "v8dbg_FailureTagMask",
+ V8_CONSTANT_REMOVED_SINCE(3, 28) },
+ { &V8_HeapObjectTag, "v8dbg_HeapObjectTag" },
+ { &V8_HeapObjectTagMask, "v8dbg_HeapObjectTagMask" },
+ { &V8_SmiTag, "v8dbg_SmiTag" },
+ { &V8_SmiTagMask, "v8dbg_SmiTagMask" },
+ { &V8_SmiValueShift, "v8dbg_SmiValueShift" },
+ { &V8_SmiShiftSize, "v8dbg_SmiShiftSize",
+#ifdef _LP64
+ V8_CONSTANT_FALLBACK(0, 0), 31 },
+#else
+ V8_CONSTANT_FALLBACK(0, 0), 0 },
+#endif
+ { &V8_PointerSizeLog2, "v8dbg_PointerSizeLog2" },
+
+ { &V8_DICT_SHIFT, "v8dbg_bit_field3_dictionary_map_shift",
+ V8_CONSTANT_FALLBACK(3, 13), 24 },
+ { &V8_DICT_PREFIX_SIZE, "v8dbg_dict_prefix_size",
+ V8_CONSTANT_FALLBACK(3, 11), 2 },
+ { &V8_DICT_ENTRY_SIZE, "v8dbg_dict_entry_size",
+ V8_CONSTANT_FALLBACK(3, 11), 3 },
+ { &V8_DICT_START_INDEX, "v8dbg_dict_start_index",
+ V8_CONSTANT_FALLBACK(3, 11), 3 },
+ { &V8_FIELDINDEX_MASK, "v8dbg_fieldindex_mask",
+ V8_CONSTANT_FALLBACK(3, 26), 0x3ff00000 },
+ { &V8_FIELDINDEX_SHIFT, "v8dbg_fieldindex_shift",
+ V8_CONSTANT_FALLBACK(3, 26), 20 },
+ { &V8_ISSHARED_SHIFT, "v8dbg_isshared_shift",
+ V8_CONSTANT_FALLBACK(3, 11), 0 },
+ { &V8_PROP_IDX_FIRST, "v8dbg_prop_idx_first" },
+ { &V8_PROP_TYPE_FIELD, "v8dbg_prop_type_field" },
+ { &V8_PROP_TYPE_MASK, "v8dbg_prop_type_mask" },
+ { &V8_PROP_IDX_CONTENT, "v8dbg_prop_idx_content",
+ V8_CONSTANT_OPTIONAL },
+ { &V8_PROP_DESC_KEY, "v8dbg_prop_desc_key",
+ V8_CONSTANT_FALLBACK(0, 0), 0 },
+ { &V8_PROP_DESC_DETAILS, "v8dbg_prop_desc_details",
+ V8_CONSTANT_FALLBACK(0, 0), 1 },
+ { &V8_PROP_DESC_VALUE, "v8dbg_prop_desc_value",
+ V8_CONSTANT_FALLBACK(0, 0), 2 },
+ { &V8_PROP_DESC_SIZE, "v8dbg_prop_desc_size",
+ V8_CONSTANT_FALLBACK(0, 0), 3 },
+ { &V8_TRANSITIONS_IDX_DESC, "v8dbg_transitions_idx_descriptors",
+ V8_CONSTANT_OPTIONAL },
+
+ { &V8_ELEMENTS_KIND_SHIFT, "v8dbg_elements_kind_shift",
+ V8_CONSTANT_FALLBACK(0, 0), 3 },
+ { &V8_ELEMENTS_KIND_BITCOUNT, "v8dbg_elements_kind_bitcount",
+ V8_CONSTANT_FALLBACK(0, 0), 5 },
+ { &V8_ELEMENTS_FAST_ELEMENTS,
+ "v8dbg_elements_fast_elements",
+ V8_CONSTANT_FALLBACK(0, 0), 2 },
+ { &V8_ELEMENTS_FAST_HOLEY_ELEMENTS,
+ "v8dbg_elements_fast_holey_elements",
+ V8_CONSTANT_FALLBACK(0, 0), 3 },
+ { &V8_ELEMENTS_DICTIONARY_ELEMENTS,
+ "v8dbg_elements_dictionary_elements",
+ V8_CONSTANT_FALLBACK(0, 0), 6 },
+};
+
+static int v8_nconstants = sizeof (v8_constants) / sizeof (v8_constants[0]);
+
+typedef struct v8_offset {
+ ssize_t *v8o_valp;
+ const char *v8o_class;
+ const char *v8o_member;
+ boolean_t v8o_optional;
+} v8_offset_t;
+
+static v8_offset_t v8_offsets[] = {
+ { &V8_OFF_CODE_INSTRUCTION_SIZE,
+ "Code", "instruction_size" },
+ { &V8_OFF_CODE_INSTRUCTION_START,
+ "Code", "instruction_start" },
+ { &V8_OFF_CONSSTRING_FIRST,
+ "ConsString", "first" },
+ { &V8_OFF_CONSSTRING_SECOND,
+ "ConsString", "second" },
+ { &V8_OFF_EXTERNALSTRING_RESOURCE,
+ "ExternalString", "resource" },
+ { &V8_OFF_FIXEDARRAY_DATA,
+ "FixedArray", "data" },
+ { &V8_OFF_FIXEDARRAY_LENGTH,
+ "FixedArray", "length" },
+ { &V8_OFF_HEAPNUMBER_VALUE,
+ "HeapNumber", "value" },
+ { &V8_OFF_HEAPOBJECT_MAP,
+ "HeapObject", "map" },
+ { &V8_OFF_JSARRAY_LENGTH,
+ "JSArray", "length" },
+ { &V8_OFF_JSDATE_VALUE,
+ "JSDate", "value", B_TRUE },
+ { &V8_OFF_JSFUNCTION_SHARED,
+ "JSFunction", "shared" },
+ { &V8_OFF_JSOBJECT_ELEMENTS,
+ "JSObject", "elements" },
+ { &V8_OFF_JSOBJECT_PROPERTIES,
+ "JSObject", "properties" },
+ { &V8_OFF_MAP_CONSTRUCTOR,
+ "Map", "constructor" },
+ { &V8_OFF_MAP_INOBJECT_PROPERTIES,
+ "Map", "inobject_properties" },
+ { &V8_OFF_MAP_INSTANCE_ATTRIBUTES,
+ "Map", "instance_attributes" },
+ { &V8_OFF_MAP_INSTANCE_DESCRIPTORS,
+ "Map", "instance_descriptors", B_TRUE },
+ { &V8_OFF_MAP_TRANSITIONS,
+ "Map", "transitions", B_TRUE },
+ { &V8_OFF_MAP_INSTANCE_SIZE,
+ "Map", "instance_size" },
+ { &V8_OFF_MAP_BIT_FIELD2,
+ "Map", "bit_field2", B_TRUE },
+ { &V8_OFF_MAP_BIT_FIELD3,
+ "Map", "bit_field3", B_TRUE },
+ { &V8_OFF_ODDBALL_TO_STRING,
+ "Oddball", "to_string" },
+ { &V8_OFF_SCRIPT_LINE_ENDS,
+ "Script", "line_ends" },
+ { &V8_OFF_SCRIPT_NAME,
+ "Script", "name" },
+ { &V8_OFF_SCRIPT_SOURCE,
+ "Script", "source" },
+ { &V8_OFF_SEQASCIISTR_CHARS,
+ "SeqAsciiString", "chars", B_TRUE },
+ { &V8_OFF_SEQONEBYTESTR_CHARS,
+ "SeqOneByteString", "chars", B_TRUE },
+ { &V8_OFF_SEQTWOBYTESTR_CHARS,
+ "SeqTwoByteString", "chars", B_TRUE },
+ { &V8_OFF_SHAREDFUNCTIONINFO_CODE,
+ "SharedFunctionInfo", "code" },
+ { &V8_OFF_SHAREDFUNCTIONINFO_END_POSITION,
+ "SharedFunctionInfo", "end_position" },
+ { &V8_OFF_SHAREDFUNCTIONINFO_FUNCTION_TOKEN_POSITION,
+ "SharedFunctionInfo", "function_token_position" },
+ { &V8_OFF_SHAREDFUNCTIONINFO_INFERRED_NAME,
+ "SharedFunctionInfo", "inferred_name" },
+ { &V8_OFF_SHAREDFUNCTIONINFO_LENGTH,
+ "SharedFunctionInfo", "length" },
+ { &V8_OFF_SHAREDFUNCTIONINFO_NAME,
+ "SharedFunctionInfo", "name" },
+ { &V8_OFF_SHAREDFUNCTIONINFO_SCRIPT,
+ "SharedFunctionInfo", "script" },
+ { &V8_OFF_SLICEDSTRING_OFFSET,
+ "SlicedString", "offset" },
+ { &V8_OFF_SLICEDSTRING_PARENT,
+ "SlicedString", "parent", B_TRUE },
+ { &V8_OFF_STRING_LENGTH,
+ "String", "length" },
+};
+
+static int v8_noffsets = sizeof (v8_offsets) / sizeof (v8_offsets[0]);
+
+static uintptr_t v8_major;
+static uintptr_t v8_minor;
+static uintptr_t v8_build;
+static uintptr_t v8_patch;
+
+static int autoconf_iter_symbol(mdb_symbol_t *, void *);
+static v8_class_t *conf_class_findcreate(const char *);
+static v8_field_t *conf_field_create(v8_class_t *, const char *, size_t);
+static char *conf_next_part(char *, char *);
+static int conf_update_parent(const char *);
+static int conf_update_field(v8_cfg_t *, const char *);
+static int conf_update_enum(v8_cfg_t *, const char *, const char *,
+ v8_enum_t *);
+static int conf_update_type(v8_cfg_t *, const char *);
+static int conf_update_frametype(v8_cfg_t *, const char *);
+static void conf_class_compute_offsets(v8_class_t *);
+
+static int read_typebyte(uint8_t *, uintptr_t);
+static int heap_offset(const char *, const char *, ssize_t *);
+static int jsfunc_name(uintptr_t, char **, size_t *);
+
+/*
+ * When iterating properties, it's useful to keep track of what kinds of
+ * properties were found. This is useful for developers to identify objects of
+ * different kinds in order to debug them.
+ */
+typedef enum {
+ JPI_NONE = 0,
+
+ /*
+ * Indicates how properties are stored in this object. There can be
+ * both numeric properties and some of the other kinds.
+ */
+ JPI_NUMERIC = 0x01, /* numeric-named properties in "elements" */
+ JPI_DICT = 0x02, /* dictionary properties */
+ JPI_INOBJECT = 0x04, /* properties stored inside object */
+ JPI_PROPS = 0x08, /* "properties" array */
+
+ /* error-like cases */
+ JPI_SKIPPED = 0x10, /* some properties were skipped */
+ JPI_BADLAYOUT = 0x20, /* we didn't recognize the layout at all */
+
+ /* fallback cases */
+ JPI_HASTRANSITIONS = 0x100, /* found a transitions array */
+ JPI_HASCONTENT = 0x200, /* found a separate content array */
+} jspropinfo_t;
+
+typedef struct jsobj_print {
+ char **jsop_bufp;
+ size_t *jsop_lenp;
+ int jsop_indent;
+ uint64_t jsop_depth;
+ boolean_t jsop_printaddr;
+ uintptr_t jsop_baseaddr;
+ int jsop_nprops;
+ const char *jsop_member;
+ boolean_t jsop_found;
+ boolean_t jsop_descended;
+ jspropinfo_t jsop_propinfo;
+} jsobj_print_t;
+
+static int jsobj_print_number(uintptr_t, jsobj_print_t *);
+static int jsobj_print_oddball(uintptr_t, jsobj_print_t *);
+static int jsobj_print_jsobject(uintptr_t, jsobj_print_t *);
+static int jsobj_print_jsarray(uintptr_t, jsobj_print_t *);
+static int jsobj_print_jsfunction(uintptr_t, jsobj_print_t *);
+static int jsobj_print_jsdate(uintptr_t, jsobj_print_t *);
+
+/*
+ * Returns 1 if the V8 version v8_major.v8.minor is strictly older than
+ * the V8 version represented by "flags".
+ * Returns 0 otherwise.
+ */
+static int
+v8_version_older(uintptr_t v8_major, uintptr_t v8_minor, uint32_t flags) {
+ return (v8_major < V8_CONSTANT_MAJOR(flags) ||
+ (v8_major == V8_CONSTANT_MAJOR(flags) &&
+ v8_minor < V8_CONSTANT_MINOR(flags)));
+}
+
+/*
+ * Invoked when this dmod is initially loaded to load the set of classes, enums,
+ * and other constants from the metadata in the target binary.
+ */
+static int
+autoconfigure(v8_cfg_t *cfgp)
+{
+ v8_class_t *clp;
+ v8_enum_t *ep;
+ struct v8_constant *cnp;
+ int ii;
+ int failed = 0;
+
+ assert(v8_classes == NULL);
+
+ /*
+ * Iterate all global symbols looking for metadata.
+ */
+ if (cfgp->v8cfg_iter(cfgp, autoconf_iter_symbol, cfgp) != 0) {
+ mdb_warn("failed to autoconfigure V8 support\n");
+ return (-1);
+ }
+
+ /*
+ * By now we've configured all of the classes so we can update the
+ * "start" and "end" fields in each class with information from its
+ * parent class.
+ */
+ for (clp = v8_classes; clp != NULL; clp = clp->v8c_next) {
+ if (clp->v8c_end != (size_t)-1)
+ continue;
+
+ conf_class_compute_offsets(clp);
+ };
+
+ /*
+ * Load various constants used directly in the module.
+ */
+ for (ii = 0; ii < v8_nconstants; ii++) {
+ cnp = &v8_constants[ii];
+
+ if (cfgp->v8cfg_readsym(cfgp,
+ cnp->v8c_symbol, cnp->v8c_valp) != -1) {
+ continue;
+ }
+
+ if (!(cnp->v8c_flags & V8_CONSTANT_OPTIONAL) &&
+ (!(cnp->v8c_flags & V8_CONSTANT_REMOVED) ||
+ v8_version_older(v8_major, v8_minor, cnp->v8c_flags))) {
+ mdb_warn("failed to read \"%s\"", cnp->v8c_symbol);
+ failed++;
+ continue;
+ }
+
+ if (!(cnp->v8c_flags & V8_CONSTANT_HASFALLBACK) ||
+ v8_major < V8_CONSTANT_MAJOR(cnp->v8c_flags) ||
+ (v8_major == V8_CONSTANT_MAJOR(cnp->v8c_flags) &&
+ v8_minor < V8_CONSTANT_MINOR(cnp->v8c_flags))) {
+ *cnp->v8c_valp = -1;
+ continue;
+ }
+
+ /*
+ * We have a fallback -- and we know that the version satisfies
+ * the fallback's version constraints; use the fallback value.
+ */
+ *cnp->v8c_valp = cnp->v8c_fallback;
+ }
+
+ /*
+ * Load type values for well-known classes that we use a lot.
+ */
+ for (ep = v8_types; ep->v8e_name[0] != '\0'; ep++) {
+ if (strcmp(ep->v8e_name, "JSObject") == 0)
+ V8_TYPE_JSOBJECT = ep->v8e_value;
+
+ if (strcmp(ep->v8e_name, "JSArray") == 0)
+ V8_TYPE_JSARRAY = ep->v8e_value;
+
+ if (strcmp(ep->v8e_name, "JSFunction") == 0)
+ V8_TYPE_JSFUNCTION = ep->v8e_value;
+
+ if (strcmp(ep->v8e_name, "FixedArray") == 0)
+ V8_TYPE_FIXEDARRAY = ep->v8e_value;
+ }
+
+ if (V8_TYPE_JSOBJECT == -1) {
+ mdb_warn("couldn't find JSObject type\n");
+ failed++;
+ }
+
+ if (V8_TYPE_JSARRAY == -1) {
+ mdb_warn("couldn't find JSArray type\n");
+ failed++;
+ }
+
+ if (V8_TYPE_JSFUNCTION == -1) {
+ mdb_warn("couldn't find JSFunction type\n");
+ failed++;
+ }
+
+ if (V8_TYPE_FIXEDARRAY == -1) {
+ mdb_warn("couldn't find FixedArray type\n");
+ failed++;
+ }
+
+ /*
+ * Finally, load various class offsets.
+ */
+ for (ii = 0; ii < v8_noffsets; ii++) {
+ struct v8_offset *offp = &v8_offsets[ii];
+ const char *klass = offp->v8o_class;
+
+again:
+ if (heap_offset(klass, offp->v8o_member, offp->v8o_valp) == 0)
+ continue;
+
+ if (strcmp(klass, "FixedArray") == 0) {
+ /*
+ * The V8 included in node v0.6 uses a FixedArrayBase
+ * class to contain the "length" field, while the one
+ * in v0.4 has no such base class and stores the field
+ * directly in FixedArray; if we failed to derive
+ * the offset from FixedArray, try FixedArrayBase.
+ */
+ klass = "FixedArrayBase";
+ goto again;
+ }
+
+ if (offp->v8o_optional) {
+ *offp->v8o_valp = -1;
+ continue;
+ }
+
+ mdb_warn("couldn't find class \"%s\", field \"%s\"\n",
+ offp->v8o_class, offp->v8o_member);
+ failed++;
+ }
+
+ if (!((V8_OFF_SEQASCIISTR_CHARS != -1) ^
+ (V8_OFF_SEQONEBYTESTR_CHARS != -1))) {
+ mdb_warn("expected exactly one of SeqAsciiString and "
+ "SeqOneByteString to be defined\n");
+ failed++;
+ }
+
+ if (V8_OFF_SEQONEBYTESTR_CHARS != -1)
+ V8_OFF_SEQASCIISTR_CHARS = V8_OFF_SEQONEBYTESTR_CHARS;
+
+ if (V8_OFF_SEQTWOBYTESTR_CHARS == -1)
+ V8_OFF_SEQTWOBYTESTR_CHARS = V8_OFF_SEQASCIISTR_CHARS;
+
+ if (V8_OFF_SLICEDSTRING_PARENT == -1)
+ V8_OFF_SLICEDSTRING_PARENT = V8_OFF_SLICEDSTRING_OFFSET -
+ sizeof (uintptr_t);
+
+ /*
+ * If we don't have bit_field/bit_field2 for Map, we know that they're
+ * the second and third byte of instance_attributes.
+ */
+ if (V8_OFF_MAP_BIT_FIELD == -1)
+ V8_OFF_MAP_BIT_FIELD = V8_OFF_MAP_INSTANCE_ATTRIBUTES + 2;
+
+ if (V8_OFF_MAP_BIT_FIELD2 == -1)
+ V8_OFF_MAP_BIT_FIELD2 = V8_OFF_MAP_INSTANCE_ATTRIBUTES + 3;
+
+ return (failed ? -1 : 0);
+}
+
+/* ARGSUSED */
+static int
+autoconf_iter_symbol(mdb_symbol_t *symp, void *arg)
+{
+ v8_cfg_t *cfgp = arg;
+
+ if (strncmp(symp->sym_name, "v8dbg_parent_",
+ sizeof ("v8dbg_parent_") - 1) == 0)
+ return (conf_update_parent(symp->sym_name));
+
+ if (strncmp(symp->sym_name, "v8dbg_class_",
+ sizeof ("v8dbg_class_") - 1) == 0)
+ return (conf_update_field(cfgp, symp->sym_name));
+
+ if (strncmp(symp->sym_name, "v8dbg_type_",
+ sizeof ("v8dbg_type_") - 1) == 0)
+ return (conf_update_type(cfgp, symp->sym_name));
+
+ if (strncmp(symp->sym_name, "v8dbg_frametype_",
+ sizeof ("v8dbg_frametype_") - 1) == 0)
+ return (conf_update_frametype(cfgp, symp->sym_name));
+
+ return (0);
+}
+
+/*
+ * Extracts the next field of a string whose fields are separated by "__" (as
+ * the V8 metadata symbols are).
+ */
+static char *
+conf_next_part(char *buf, char *start)
+{
+ char *pp;
+
+ if ((pp = strstr(start, "__")) == NULL) {
+ mdb_warn("malformed symbol name: %s\n", buf);
+ return (NULL);
+ }
+
+ *pp = '\0';
+ return (pp + sizeof ("__") - 1);
+}
+
+static v8_class_t *
+conf_class_findcreate(const char *name)
+{
+ v8_class_t *clp, *iclp, *prev = NULL;
+ int cmp;
+
+ for (iclp = v8_classes; iclp != NULL; iclp = iclp->v8c_next) {
+ if ((cmp = strcmp(iclp->v8c_name, name)) == 0)
+ return (iclp);
+
+ if (cmp > 0)
+ break;
+
+ prev = iclp;
+ }
+
+ if ((clp = mdb_zalloc(sizeof (*clp), UM_NOSLEEP)) == NULL)
+ return (NULL);
+
+ (void) strlcpy(clp->v8c_name, name, sizeof (clp->v8c_name));
+ clp->v8c_end = (size_t)-1;
+ clp->v8c_next = iclp;
+
+ if (prev != NULL) {
+ prev->v8c_next = clp;
+ } else {
+ v8_classes = clp;
+ }
+
+ return (clp);
+}
+
+static v8_field_t *
+conf_field_create(v8_class_t *clp, const char *name, size_t offset)
+{
+ v8_field_t *flp, *iflp;
+
+ if ((flp = mdb_zalloc(sizeof (*flp), UM_NOSLEEP)) == NULL)
+ return (NULL);
+
+ (void) strlcpy(flp->v8f_name, name, sizeof (flp->v8f_name));
+ flp->v8f_offset = offset;
+
+ if (clp->v8c_fields == NULL || clp->v8c_fields->v8f_offset > offset) {
+ flp->v8f_next = clp->v8c_fields;
+ clp->v8c_fields = flp;
+ return (flp);
+ }
+
+ for (iflp = clp->v8c_fields; iflp->v8f_next != NULL;
+ iflp = iflp->v8f_next) {
+ if (iflp->v8f_next->v8f_offset > offset)
+ break;
+ }
+
+ flp->v8f_next = iflp->v8f_next;
+ iflp->v8f_next = flp;
+ return (flp);
+}
+
+/*
+ * Given a "v8dbg_parent_X__Y", symbol, update the parent of class X to class Y.
+ * Note that neither class necessarily exists already.
+ */
+static int
+conf_update_parent(const char *symbol)
+{
+ char *pp, *qq;
+ char buf[128];
+ v8_class_t *clp, *pclp;
+
+ (void) strlcpy(buf, symbol, sizeof (buf));
+ pp = buf + sizeof ("v8dbg_parent_") - 1;
+ qq = conf_next_part(buf, pp);
+
+ if (qq == NULL)
+ return (-1);
+
+ clp = conf_class_findcreate(pp);
+ pclp = conf_class_findcreate(qq);
+
+ if (clp == NULL || pclp == NULL) {
+ mdb_warn("mdb_v8: out of memory\n");
+ return (-1);
+ }
+
+ clp->v8c_parent = pclp;
+ return (0);
+}
+
+/*
+ * Given a "v8dbg_class_CLASS__FIELD__TYPE", symbol, save field "FIELD" into
+ * class CLASS with the offset described by the symbol. Note that CLASS does
+ * not necessarily exist already.
+ */
+static int
+conf_update_field(v8_cfg_t *cfgp, const char *symbol)
+{
+ v8_class_t *clp;
+ v8_field_t *flp;
+ intptr_t offset;
+ char *pp, *qq, *tt;
+ char buf[128];
+
+ (void) strlcpy(buf, symbol, sizeof (buf));
+
+ pp = buf + sizeof ("v8dbg_class_") - 1;
+ qq = conf_next_part(buf, pp);
+
+ if (qq == NULL || (tt = conf_next_part(buf, qq)) == NULL)
+ return (-1);
+
+ if (cfgp->v8cfg_readsym(cfgp, symbol, &offset) == -1) {
+ mdb_warn("failed to read symbol \"%s\"", symbol);
+ return (-1);
+ }
+
+ if ((clp = conf_class_findcreate(pp)) == NULL ||
+ (flp = conf_field_create(clp, qq, (size_t)offset)) == NULL)
+ return (-1);
+
+ if (strcmp(tt, "int") == 0)
+ flp->v8f_isbyte = B_TRUE;
+
+ if (strcmp(tt, "char") == 0)
+ flp->v8f_isstr = B_TRUE;
+
+ return (0);
+}
+
+static int
+conf_update_enum(v8_cfg_t *cfgp, const char *symbol, const char *name,
+ v8_enum_t *enp)
+{
+ intptr_t value;
+
+ if (cfgp->v8cfg_readsym(cfgp, symbol, &value) == -1) {
+ mdb_warn("failed to read symbol \"%s\"", symbol);
+ return (-1);
+ }
+
+ enp->v8e_value = (int)value;
+ (void) strlcpy(enp->v8e_name, name, sizeof (enp->v8e_name));
+ return (0);
+}
+
+/*
+ * Given a "v8dbg_type_TYPENAME" constant, save the type name in v8_types. Note
+ * that this enum has multiple integer values with the same string label.
+ */
+static int
+conf_update_type(v8_cfg_t *cfgp, const char *symbol)
+{
+ char *klass;
+ v8_enum_t *enp;
+ char buf[128];
+
+ if (v8_next_type > sizeof (v8_types) / sizeof (v8_types[0])) {
+ mdb_warn("too many V8 types\n");
+ return (-1);
+ }
+
+ (void) strlcpy(buf, symbol, sizeof (buf));
+
+ klass = buf + sizeof ("v8dbg_type_") - 1;
+ if (conf_next_part(buf, klass) == NULL)
+ return (-1);
+
+ enp = &v8_types[v8_next_type++];
+ return (conf_update_enum(cfgp, symbol, klass, enp));
+}
+
+/*
+ * Given a "v8dbg_frametype_TYPENAME" constant, save the frame type in
+ * v8_frametypes.
+ */
+static int
+conf_update_frametype(v8_cfg_t *cfgp, const char *symbol)
+{
+ const char *frametype;
+ v8_enum_t *enp;
+
+ if (v8_next_frametype >
+ sizeof (v8_frametypes) / sizeof (v8_frametypes[0])) {
+ mdb_warn("too many V8 frame types\n");
+ return (-1);
+ }
+
+ enp = &v8_frametypes[v8_next_frametype++];
+ frametype = symbol + sizeof ("v8dbg_frametype_") - 1;
+ return (conf_update_enum(cfgp, symbol, frametype, enp));
+}
+
+/*
+ * Now that all classes have been loaded, update the "start" and "end" fields of
+ * each class based on the values of its parent class.
+ */
+static void
+conf_class_compute_offsets(v8_class_t *clp)
+{
+ v8_field_t *flp;
+
+ assert(clp->v8c_start == 0);
+ assert(clp->v8c_end == (size_t)-1);
+
+ if (clp->v8c_parent != NULL) {
+ if (clp->v8c_parent->v8c_end == (size_t)-1)
+ conf_class_compute_offsets(clp->v8c_parent);
+
+ clp->v8c_start = clp->v8c_parent->v8c_end;
+ }
+
+ if (clp->v8c_fields == NULL) {
+ clp->v8c_end = clp->v8c_start;
+ return;
+ }
+
+ for (flp = clp->v8c_fields; flp->v8f_next != NULL; flp = flp->v8f_next)
+ ;
+
+ if (flp == NULL)
+ clp->v8c_end = clp->v8c_start;
+ else
+ clp->v8c_end = flp->v8f_offset + sizeof (uintptr_t);
+}
+
+/*
+ * Utility functions
+ */
+#define JSSTR_NONE 0
+#define JSSTR_NUDE JSSTR_NONE
+
+#define JSSTR_FLAGSHIFT 16
+#define JSSTR_VERBOSE (0x1 << JSSTR_FLAGSHIFT)
+#define JSSTR_QUOTED (0x2 << JSSTR_FLAGSHIFT)
+#define JSSTR_ISASCII (0x4 << JSSTR_FLAGSHIFT)
+
+#define JSSTR_MAXDEPTH 512
+#define JSSTR_DEPTH(f) ((f) & ((1 << JSSTR_FLAGSHIFT) - 1))
+#define JSSTR_BUMPDEPTH(f) ((f) + 1)
+
+static int jsstr_print(uintptr_t, uint_t, char **, size_t *);
+static boolean_t jsobj_is_undefined(uintptr_t addr);
+static boolean_t jsobj_is_hole(uintptr_t addr);
+
+static const char *
+enum_lookup_str(v8_enum_t *enums, int val, const char *dflt)
+{
+ v8_enum_t *ep;
+
+ for (ep = enums; ep->v8e_name[0] != '\0'; ep++) {
+ if (ep->v8e_value == val)
+ return (ep->v8e_name);
+ }
+
+ return (dflt);
+}
+
+static void
+enum_print(v8_enum_t *enums)
+{
+ v8_enum_t *itp;
+
+ for (itp = enums; itp->v8e_name[0] != '\0'; itp++)
+ mdb_printf("%-30s = 0x%02x\n", itp->v8e_name, itp->v8e_value);
+}
+
+/*
+ * b[v]snprintf behave like [v]snprintf(3c), except that they update the buffer
+ * and length arguments based on how much buffer space is used by the operation.
+ * This makes it much easier to combine multiple calls in sequence without
+ * worrying about buffer overflow.
+ */
+static size_t
+bvsnprintf(char **bufp, size_t *buflenp, const char *format, va_list alist)
+{
+ size_t rv, len;
+
+ if (*buflenp == 0)
+ return (vsnprintf(NULL, 0, format, alist));
+
+ rv = vsnprintf(*bufp, *buflenp, format, alist);
+
+ len = MIN(rv, *buflenp);
+ *buflenp -= len;
+ *bufp += len;
+
+ return (len);
+}
+
+static size_t
+bsnprintf(char **bufp, size_t *buflenp, const char *format, ...)
+{
+ va_list alist;
+ size_t rv;
+
+ va_start(alist, format);
+ rv = bvsnprintf(bufp, buflenp, format, alist);
+ va_end(alist);
+
+ return (rv);
+}
+
+static void
+v8_warn(const char *format, ...)
+{
+ char buf[512];
+ va_list alist;
+ int len;
+
+ if (!v8_warnings || v8_silent)
+ return;
+
+ va_start(alist, format);
+ (void) vsnprintf(buf, sizeof (buf), format, alist);
+ va_end(alist);
+
+ /*
+ * This is made slightly annoying because we need to effectively
+ * preserve the original format string to allow for mdb to use the
+ * new-line at the end to indicate that strerror should be elided.
+ */
+ if ((len = strlen(format)) > 0 && format[len - 1] == '\n') {
+ buf[strlen(buf) - 1] = '\0';
+ mdb_warn("%s\n", buf);
+ } else {
+ mdb_warn("%s", buf);
+ }
+}
+
+static v8_field_t *
+conf_field_lookup(const char *klass, const char *field)
+{
+ v8_class_t *clp;
+ v8_field_t *flp;
+
+ for (clp = v8_classes; clp != NULL; clp = clp->v8c_next) {
+ if (strcmp(klass, clp->v8c_name) == 0)
+ break;
+ }
+
+ if (clp == NULL)
+ return (NULL);
+
+ for (flp = clp->v8c_fields; flp != NULL; flp = flp->v8f_next) {
+ if (strcmp(field, flp->v8f_name) == 0)
+ break;
+ }
+
+ return (flp);
+}
+
+/*
+ * Returns in "offp" the offset of field "field" in C++ class "klass".
+ */
+static int
+heap_offset(const char *klass, const char *field, ssize_t *offp)
+{
+ v8_field_t *flp;
+
+ flp = conf_field_lookup(klass, field);
+
+ if (flp == NULL)
+ return (-1);
+
+ *offp = V8_OFF_HEAP(flp->v8f_offset);
+ return (0);
+}
+
+/*
+ * Assuming "addr" is an instance of the C++ heap class "klass", read into *valp
+ * the pointer-sized value of field "field".
+ */
+static int
+read_heap_ptr(uintptr_t *valp, uintptr_t addr, ssize_t off)
+{
+ if (mdb_vread(valp, sizeof (*valp), addr + off) == -1) {
+ v8_warn("failed to read offset %d from %p", off, addr);
+ return (-1);
+ }
+
+ return (0);
+}
+
+/*
+ * Like read_heap_ptr, but assume the field is an SMI and store the actual value
+ * into *valp rather than the encoded representation.
+ */
+static int
+read_heap_smi(uintptr_t *valp, uintptr_t addr, ssize_t off)
+{
+ if (read_heap_ptr(valp, addr, off) != 0)
+ return (-1);
+
+ if (!V8_IS_SMI(*valp)) {
+ v8_warn("expected SMI, got %p\n", *valp);
+ return (-1);
+ }
+
+ *valp = V8_SMI_VALUE(*valp);
+
+ return (0);
+}
+
+static int
+read_heap_double(double *valp, uintptr_t addr, ssize_t off)
+{
+ if (mdb_vread(valp, sizeof (*valp), addr + off) == -1) {
+ v8_warn("failed to read heap value at %p", addr + off);
+ return (-1);
+ }
+
+ return (0);
+}
+
+/*
+ * Assuming "addr" refers to a FixedArray, return a newly-allocated array
+ * representing its contents.
+ */
+static int
+read_heap_array(uintptr_t addr, uintptr_t **retp, size_t *lenp, int flags)
+{
+ uint8_t type;
+ uintptr_t len;
+
+ if (!V8_IS_HEAPOBJECT(addr))
+ return (-1);
+
+ if (read_typebyte(&type, addr) != 0)
+ return (-1);
+
+ if (type != V8_TYPE_FIXEDARRAY)
+ return (-1);
+
+ if (read_heap_smi(&len, addr, V8_OFF_FIXEDARRAY_LENGTH) != 0)
+ return (-1);
+
+ *lenp = len;
+
+ if (len == 0) {
+ *retp = NULL;
+ return (0);
+ }
+
+ if ((*retp = mdb_zalloc(len * sizeof (uintptr_t), flags)) == NULL)
+ return (-1);
+
+ if (mdb_vread(*retp, len * sizeof (uintptr_t),
+ addr + V8_OFF_FIXEDARRAY_DATA) == -1) {
+ if (!(flags & UM_GC))
+ mdb_free(*retp, len * sizeof (uintptr_t));
+
+ *retp = NULL;
+ return (-1);
+ }
+
+ return (0);
+}
+
+static int
+read_heap_byte(uint8_t *valp, uintptr_t addr, ssize_t off)
+{
+ if (mdb_vread(valp, sizeof (*valp), addr + off) == -1) {
+ v8_warn("failed to read heap value at %p", addr + off);
+ return (-1);
+ }
+
+ return (0);
+}
+
+/*
+ * This is truly horrific. Inside the V8 Script class are a number of
+ * small-integer fields like the function_token_position (an offset into the
+ * script's text where the "function" token appears). For 32-bit processes, V8
+ * stores these as a sequence of SMI fields, which we know how to interpret well
+ * enough. For 64-bit processes, "to avoid wasting space", they use a different
+ * trick: each 8-byte word contains two integer fields. The low word is
+ * represented like an SMI: shifted left by one. They don't bother shifting the
+ * high word, since its low bit will never be looked at (since it's not
+ * word-aligned).
+ *
+ * This function is used for cases where we would use read_heap_smi(), except
+ * that this is one of those fields that might be encoded or might not be,
+ * depending on whether the address is word-aligned.
+ */
+static int
+read_heap_maybesmi(uintptr_t *valp, uintptr_t addr, ssize_t off)
+{
+#ifdef _LP64
+ uint32_t readval;
+
+ if (mdb_vread(&readval, sizeof (readval), addr + off) == -1) {
+ *valp = -1;
+ v8_warn("failed to read offset %d from %p", off, addr);
+ return (-1);
+ }
+
+ /*
+ * If this was the low half-word, it needs to be shifted right.
+ */
+ if ((addr + off) % sizeof (uintptr_t) == 0)
+ readval >>= 1;
+
+ *valp = (uintptr_t)readval;
+ return (0);
+#else
+ return (read_heap_smi(valp, addr, off));
+#endif
+}
+
+/*
+ * Given a heap object, returns in *valp the byte describing the type of the
+ * object. This is shorthand for first retrieving the Map at the start of the
+ * heap object and then retrieving the type byte from the Map object.
+ */
+static int
+read_typebyte(uint8_t *valp, uintptr_t addr)
+{
+ uintptr_t mapaddr;
+ ssize_t off = V8_OFF_HEAPOBJECT_MAP;
+
+ if (mdb_vread(&mapaddr, sizeof (mapaddr), addr + off) == -1) {
+ v8_warn("failed to read type of %p", addr);
+ return (-1);
+ }
+
+ if (!V8_IS_HEAPOBJECT(mapaddr)) {
+ v8_warn("object map is not a heap object\n");
+ return (-1);
+ }
+
+ if (read_heap_byte(valp, mapaddr, V8_OFF_MAP_INSTANCE_ATTRIBUTES) == -1)
+ return (-1);
+
+ return (0);
+}
+
+/*
+ * Given a heap object, returns in *valp the size of the object. For
+ * variable-size objects, returns an undefined value.
+ */
+static int
+read_size(size_t *valp, uintptr_t addr)
+{
+ uintptr_t mapaddr;
+ uint8_t size;
+
+ if (read_heap_ptr(&mapaddr, addr, V8_OFF_HEAPOBJECT_MAP) != 0)
+ return (-1);
+
+ if (!V8_IS_HEAPOBJECT(mapaddr)) {
+ v8_warn("heap object map is not itself a heap object\n");
+ return (-1);
+ }
+
+ if (read_heap_byte(&size, mapaddr, V8_OFF_MAP_INSTANCE_SIZE) != 0)
+ return (-1);
+
+ *valp = size << V8_PointerSizeLog2;
+ return (0);
+}
+
+/*
+ * Assuming "addr" refers to a FixedArray that is implementing a
+ * StringDictionary, iterate over its contents calling the specified function
+ * with key and value.
+ */
+static int
+read_heap_dict(uintptr_t addr,
+ int (*func)(const char *, uintptr_t, void *), void *arg)
+{
+ uint8_t type;
+ uintptr_t len;
+ char buf[512];
+ char *bufp;
+ int rval = -1;
+ uintptr_t *dict, ndict, i;
+
+ if (read_heap_array(addr, &dict, &ndict, UM_SLEEP) != 0)
+ return (-1);
+
+ if (V8_DICT_ENTRY_SIZE < 2) {
+ v8_warn("dictionary entry size (%d) is too small for a "
+ "key and value\n", V8_DICT_ENTRY_SIZE);
+ goto out;
+ }
+
+ for (i = V8_DICT_START_INDEX + V8_DICT_PREFIX_SIZE; i < ndict;
+ i += V8_DICT_ENTRY_SIZE) {
+ /*
+ * The layout here is key, value, details. (This is hardcoded
+ * in Dictionary<Shape, Key>::SetEntry().)
+ */
+ if (jsobj_is_undefined(dict[i]))
+ continue;
+
+ if (V8_IS_SMI(dict[i])) {
+ intptr_t val = V8_SMI_VALUE(dict[i]);
+ (void) snprintf(buf, sizeof (buf), "%" PRIdPTR, val);
+ } else {
+ if (jsobj_is_hole(dict[i])) {
+ /*
+ * In some cases, the key can (apparently) be a
+ * hole, in which case we skip over it.
+ */
+ continue;
+ }
+
+ if (read_typebyte(&type, dict[i]) != 0)
+ goto out;
+
+ if (!V8_TYPE_STRING(type))
+ goto out;
+
+ bufp = buf;
+ len = sizeof (buf);
+
+ if (jsstr_print(dict[i], JSSTR_NUDE, &bufp, &len) != 0)
+ goto out;
+ }
+
+ if (func(buf, dict[i + 1], arg) == -1)
+ goto out;
+ }
+
+ rval = 0;
+out:
+ mdb_free(dict, ndict * sizeof (uintptr_t));
+
+ return (rval);
+}
+
+/*
+ * Given an object, returns in "buf" the name of the constructor function. With
+ * "verbose", prints the pointer to the JSFunction object. Given anything else,
+ * returns an error (and warns the user why).
+ */
+static int
+obj_jsconstructor(uintptr_t addr, char **bufp, size_t *lenp, boolean_t verbose)
+{
+ uint8_t type;
+ uintptr_t map, consfunc, funcinfop;
+ const char *constype;
+
+ if (!V8_IS_HEAPOBJECT(addr) ||
+ read_typebyte(&type, addr) != 0 ||
+ (type != V8_TYPE_JSOBJECT && type != V8_TYPE_JSARRAY)) {
+ mdb_warn("%p is not a JSObject\n", addr);
+ return (-1);
+ }
+
+ if (mdb_vread(&map, sizeof (map), addr + V8_OFF_HEAPOBJECT_MAP) == -1 ||
+ mdb_vread(&consfunc, sizeof (consfunc),
+ map + V8_OFF_MAP_CONSTRUCTOR) == -1) {
+ mdb_warn("unable to read object map\n");
+ return (-1);
+ }
+
+ if (read_typebyte(&type, consfunc) != 0)
+ return (-1);
+
+ constype = enum_lookup_str(v8_types, type, "");
+ if (strcmp(constype, "Oddball") == 0) {
+ jsobj_print_t jsop;
+ bzero(&jsop, sizeof (jsop));
+ jsop.jsop_bufp = bufp;
+ jsop.jsop_lenp = lenp;
+ return (jsobj_print_oddball(consfunc, &jsop));
+ }
+
+ if (strcmp(constype, "JSFunction") != 0) {
+ mdb_warn("constructor: expected JSFunction, found %s\n",
+ constype);
+ return (-1);
+ }
+
+ if (read_heap_ptr(&funcinfop, consfunc, V8_OFF_JSFUNCTION_SHARED) != 0)
+ return (-1);
+
+ if (jsfunc_name(funcinfop, bufp, lenp) != 0)
+ return (-1);
+
+ if (verbose)
+ bsnprintf(bufp, lenp, " (JSFunction: %p)", consfunc);
+
+ return (0);
+}
+
+/*
+ * Returns in "buf" a description of the type of "addr" suitable for printing.
+ */
+static int
+obj_jstype(uintptr_t addr, char **bufp, size_t *lenp, uint8_t *typep)
+{
+ uint8_t typebyte;
+ uintptr_t strptr, map, consfunc, funcinfop;
+ const char *typename;
+
+ if (V8_IS_FAILURE(addr)) {
+ if (typep)
+ *typep = 0;
+ (void) bsnprintf(bufp, lenp, "'Failure' object");
+ return (0);
+ }
+
+ if (V8_IS_SMI(addr)) {
+ if (typep)
+ *typep = 0;
+ (void) bsnprintf(bufp, lenp, "SMI: value = %d",
+ V8_SMI_VALUE(addr));
+ return (0);
+ }
+
+ if (read_typebyte(&typebyte, addr) != 0)
+ return (-1);
+
+ if (typep)
+ *typep = typebyte;
+
+ typename = enum_lookup_str(v8_types, typebyte, "<unknown>");
+ (void) bsnprintf(bufp, lenp, typename);
+
+ if (strcmp(typename, "Oddball") == 0) {
+ if (read_heap_ptr(&strptr, addr,
+ V8_OFF_ODDBALL_TO_STRING) != -1) {
+ (void) bsnprintf(bufp, lenp, ": \"");
+ (void) jsstr_print(strptr, JSSTR_NUDE, bufp, lenp);
+ (void) bsnprintf(bufp, lenp, "\"");
+ }
+ }
+
+ if (strcmp(typename, "JSObject") == 0 &&
+ mdb_vread(&map, sizeof (map), addr + V8_OFF_HEAPOBJECT_MAP) != -1 &&
+ mdb_vread(&consfunc, sizeof (consfunc),
+ map + V8_OFF_MAP_CONSTRUCTOR) != -1 &&
+ read_typebyte(&typebyte, consfunc) == 0 &&
+ strcmp(enum_lookup_str(v8_types, typebyte, ""),
+ "JSFunction") == 0 &&
+ mdb_vread(&funcinfop, sizeof (funcinfop),
+ consfunc + V8_OFF_JSFUNCTION_SHARED) != -1) {
+ (void) bsnprintf(bufp, lenp, ": ");
+ (void) jsfunc_name(funcinfop, bufp, lenp);
+ }
+
+ return (0);
+}
+
+/*
+ * V8 allows implementers (like Node) to store pointer-sized values into
+ * internal fields within V8 heap objects. Implementors access these values by
+ * 0-based index (e.g., SetInternalField(0, value)). These values are stored as
+ * an array directly after the last actual C++ field in the C++ object.
+ *
+ * Node uses internal fields to refer to handles. For example, a socket's C++
+ * HandleWrap object is typically stored as internal field 0 in the JavaScript
+ * Socket object. Similarly, the native-heap-allocated chunk of memory
+ * associated with a Node Buffer is referenced by field 0 in the External array
+ * pointed-to by the Node Buffer JSObject.
+ */
+static int
+obj_v8internal(uintptr_t addr, uint_t idx, uintptr_t *valp)
+{
+ char *bufp;
+ size_t len;
+ ssize_t off;
+ uint8_t type;
+
+ v8_class_t *clp;
+ char buf[256];
+
+ bufp = buf;
+ len = sizeof (buf);
+ if (obj_jstype(addr, &bufp, &len, &type) != 0)
+ return (DCMD_ERR);
+
+ if (type == 0) {
+ mdb_warn("%p: unsupported type\n", addr);
+ return (DCMD_ERR);
+ }
+
+ for (clp = v8_classes; clp != NULL; clp = clp->v8c_next) {
+ if (strcmp(buf, clp->v8c_name) == 0)
+ break;
+ }
+
+ if (clp == NULL) {
+ mdb_warn("%p: didn't find expected class\n", addr);
+ return (DCMD_ERR);
+ }
+
+ off = clp->v8c_end + (idx * sizeof (uintptr_t)) - 1;
+ if (read_heap_ptr(valp, addr, off) != 0) {
+ mdb_warn("%p: failed to read from %p\n", addr, addr + off);
+ return (DCMD_ERR);
+ }
+
+ return (DCMD_OK);
+}
+
+/*
+ * Print out the fields of the given object that come from the given class.
+ */
+static int
+obj_print_fields(uintptr_t baddr, v8_class_t *clp)
+{
+ v8_field_t *flp;
+ uintptr_t addr, value;
+ int rv;
+ char *bufp;
+ size_t len;
+ uint8_t type;
+ char buf[256];
+
+ for (flp = clp->v8c_fields; flp != NULL; flp = flp->v8f_next) {
+ bufp = buf;
+ len = sizeof (buf);
+
+ addr = baddr + V8_OFF_HEAP(flp->v8f_offset);
+
+ if (flp->v8f_isstr) {
+ if (mdb_readstr(buf, sizeof (buf), addr) == -1) {
+ mdb_printf("%p %s (unreadable)\n",
+ addr, flp->v8f_name);
+ continue;
+ }
+
+ mdb_printf("%p %s = \"%s\"\n",
+ addr, flp->v8f_name, buf);
+ continue;
+ }
+
+ if (flp->v8f_isbyte) {
+ uint8_t sv;
+ if (mdb_vread(&sv, sizeof (sv), addr) == -1) {
+ mdb_printf("%p %s (unreadable)\n",
+ addr, flp->v8f_name);
+ continue;
+ }
+
+ mdb_printf("%p %s = 0x%x\n", addr, flp->v8f_name, sv);
+ continue;
+ }
+
+ rv = mdb_vread((void *)&value, sizeof (value), addr);
+
+ if (rv != sizeof (value) ||
+ obj_jstype(value, &bufp, &len, &type) != 0) {
+ mdb_printf("%p %s (unreadable)\n", addr, flp->v8f_name);
+ continue;
+ }
+
+ if (type != 0 && V8_TYPE_STRING(type)) {
+ (void) bsnprintf(&bufp, &len, ": ");
+ (void) jsstr_print(value, JSSTR_QUOTED, &bufp, &len);
+ }
+
+ mdb_printf("%p %s = %p (%s)\n", addr, flp->v8f_name, value,
+ buf);
+ }
+
+ return (DCMD_OK);
+}
+
+/*
+ * Print out all fields of the given object, starting with the root of the class
+ * hierarchy and working down the most specific type.
+ */
+static int
+obj_print_class(uintptr_t addr, v8_class_t *clp)
+{
+ int rv = 0;
+
+ /*
+ * If we have no fields, we just print a simple inheritance hierarchy.
+ * If we have fields but our parent doesn't, our header includes the
+ * inheritance hierarchy.
+ */
+ if (clp->v8c_end == 0) {
+ mdb_printf("%s ", clp->v8c_name);
+
+ if (clp->v8c_parent != NULL) {
+ mdb_printf("< ");
+ (void) obj_print_class(addr, clp->v8c_parent);
+ }
+
+ return (0);
+ }
+
+ mdb_printf("%p %s", addr, clp->v8c_name);
+
+ if (clp->v8c_start == 0 && clp->v8c_parent != NULL) {
+ mdb_printf(" < ");
+ (void) obj_print_class(addr, clp->v8c_parent);
+ }
+
+ mdb_printf(" {\n");
+ (void) mdb_inc_indent(4);
+
+ if (clp->v8c_start > 0 && clp->v8c_parent != NULL)
+ rv = obj_print_class(addr, clp->v8c_parent);
+
+ rv |= obj_print_fields(addr, clp);
+ (void) mdb_dec_indent(4);
+ mdb_printf("}\n");
+
+ return (rv);
+}
+
+/*
+ * Print the ASCII string for the given JS string, expanding ConsStrings and
+ * ExternalStrings as needed.
+ */
+static int jsstr_print_seq(uintptr_t, uint_t, char **, size_t *, size_t,
+ ssize_t);
+static int jsstr_print_cons(uintptr_t, uint_t, char **, size_t *);
+static int jsstr_print_sliced(uintptr_t, uint_t, char **, size_t *);
+static int jsstr_print_external(uintptr_t, uint_t, char **, size_t *);
+
+static int
+jsstr_print(uintptr_t addr, uint_t flags, char **bufp, size_t *lenp)
+{
+ uint8_t typebyte;
+ int err = 0;
+ char *lbufp;
+ size_t llen;
+ char buf[64];
+ boolean_t verbose = flags & JSSTR_VERBOSE ? B_TRUE : B_FALSE;
+
+ if (read_typebyte(&typebyte, addr) != 0) {
+ (void) bsnprintf(bufp, lenp, "<could not read type>");
+ return (-1);
+ }
+
+ if (!V8_TYPE_STRING(typebyte)) {
+ (void) bsnprintf(bufp, lenp, "<not a string>");
+ return (-1);
+ }
+
+ if (verbose) {
+ lbufp = buf;
+ llen = sizeof (buf);
+ (void) obj_jstype(addr, &lbufp, &llen, NULL);
+ mdb_printf("%s\n", buf);
+ (void) mdb_inc_indent(4);
+ }
+
+ if (JSSTR_DEPTH(flags) > JSSTR_MAXDEPTH) {
+ (void) bsnprintf(bufp, lenp, "<maximum depth exceeded>");
+ return (-1);
+ }
+
+ if (V8_STRENC_ASCII(typebyte))
+ flags |= JSSTR_ISASCII;
+ else
+ flags &= ~JSSTR_ISASCII;
+
+ flags = JSSTR_BUMPDEPTH(flags);
+
+ if (V8_STRREP_SEQ(typebyte))
+ err = jsstr_print_seq(addr, flags, bufp, lenp, 0, -1);
+ else if (V8_STRREP_CONS(typebyte))
+ err = jsstr_print_cons(addr, flags, bufp, lenp);
+ else if (V8_STRREP_EXT(typebyte))
+ err = jsstr_print_external(addr, flags, bufp, lenp);
+ else if (V8_STRREP_SLICED(typebyte))
+ err = jsstr_print_sliced(addr, flags, bufp, lenp);
+ else {
+ (void) bsnprintf(bufp, lenp, "<unknown string type>");
+ err = -1;
+ }
+
+ if (verbose)
+ (void) mdb_dec_indent(4);
+
+ return (err);
+}
+
+static int
+jsstr_print_seq(uintptr_t addr, uint_t flags, char **bufp, size_t *lenp,
+ size_t sliceoffset, ssize_t slicelen)
+{
+ /*
+ * To allow the caller to allocate a very large buffer for strings,
+ * we'll allocate a buffer sized based on our input, making it at
+ * least enough space for our ellipsis and at most 256K.
+ */
+ uintptr_t i, nreadoffset, blen, nstrbytes, nstrchrs;
+ ssize_t nreadbytes;
+ boolean_t verbose = flags & JSSTR_VERBOSE ? B_TRUE : B_FALSE;
+ boolean_t quoted = flags & JSSTR_QUOTED ? B_TRUE : B_FALSE;
+ char *buf;
+ uint16_t chrval;
+
+ if (read_heap_smi(&nstrchrs, addr, V8_OFF_STRING_LENGTH) != 0) {
+ (void) bsnprintf(bufp, lenp,
+ "<string (failed to read length)>");
+ return (-1);
+ }
+
+ if (slicelen != -1)
+ nstrchrs = slicelen;
+
+ blen = ((flags & JSSTR_ISASCII) != 0) ? *lenp : 2 * (*lenp);
+ if ((blen = MIN(blen, 256 * 1024)) == 0)
+ return (0);
+
+ if ((flags & JSSTR_ISASCII) != 0) {
+ nstrbytes = nstrchrs;
+ nreadoffset = sliceoffset;
+ nreadbytes = nstrbytes + sizeof ("\"\"") <= *lenp ?
+ nstrbytes : *lenp - sizeof ("\"\"[...]");
+ } else {
+ nstrbytes = 2 * nstrchrs;
+ nreadoffset = 2 * sliceoffset;
+ nreadbytes = nstrchrs + sizeof ("\"\"") <= *lenp ?
+ nstrbytes : 2 * (*lenp - sizeof ("\"\"[...]"));
+ }
+
+ if (nreadbytes < 0) {
+ /*
+ * We don't even have the room to store the ellipsis; zero
+ * the buffer out and set the length to zero.
+ */
+ *bufp = '\0';
+ *lenp = 0;
+ return (0);
+ }
+
+ if (verbose) {
+ mdb_printf("length: %d chars (%d bytes), "
+ "will read %d bytes from offset %d\n",
+ nstrchrs, nstrbytes, nreadbytes, nreadoffset);
+ mdb_printf("given buffer size: %d, internal buffer: %d\n",
+ *lenp, blen);
+ }
+
+ if (nstrbytes == 0) {
+ (void) bsnprintf(bufp, lenp, "%s%s",
+ quoted ? "\"" : "", quoted ? "\"" : "");
+ return (0);
+ }
+
+ buf = alloca(blen);
+ buf[0] = '\0';
+
+ if ((flags & JSSTR_ISASCII) != 0) {
+ if (mdb_readstr(buf, nreadbytes + 1,
+ addr + V8_OFF_SEQASCIISTR_CHARS + nreadoffset) == -1) {
+ v8_warn("failed to read SeqString data");
+ return (-1);
+ }
+
+ if (nreadbytes != nstrbytes)
+ (void) strlcat(buf, "[...]", blen);
+
+ (void) bsnprintf(bufp, lenp, "%s%s%s",
+ quoted ? "\"" : "", buf, quoted ? "\"" : "");
+ } else {
+ if (mdb_readstr(buf, nreadbytes,
+ addr + V8_OFF_SEQTWOBYTESTR_CHARS + nreadoffset) == -1) {
+ v8_warn("failed to read SeqTwoByteString data");
+ return (-1);
+ }
+
+ (void) bsnprintf(bufp, lenp, "%s", quoted ? "\"" : "");
+ for (i = 0; i < nreadbytes; i += 2) {
+ /*LINTED*/
+ chrval = *((uint16_t *)(buf + i));
+ (void) bsnprintf(bufp, lenp, "%c",
+ (isascii(chrval) || chrval == 0) ?
+ (char)chrval : '?');
+ }
+ if (nreadbytes != nstrbytes)
+ (void) bsnprintf(bufp, lenp, "[...]");
+ (void) bsnprintf(bufp, lenp, "%s", quoted ? "\"" : "");
+ }
+
+ return (0);
+}
+
+static int
+jsstr_print_cons(uintptr_t addr, uint_t flags, char **bufp, size_t *lenp)
+{
+ boolean_t verbose = flags & JSSTR_VERBOSE ? B_TRUE : B_FALSE;
+ boolean_t quoted = flags & JSSTR_QUOTED ? B_TRUE : B_FALSE;
+ uintptr_t ptr1, ptr2;
+
+ if (read_heap_ptr(&ptr1, addr, V8_OFF_CONSSTRING_FIRST) != 0) {
+ (void) bsnprintf(bufp, lenp,
+ "<cons string (failed to read first)>");
+ return (-1);
+ }
+
+ if (read_heap_ptr(&ptr2, addr, V8_OFF_CONSSTRING_SECOND) != 0) {
+ (void) bsnprintf(bufp, lenp,
+ "<cons string (failed to read second)>");
+ return (-1);
+ }
+
+ if (verbose) {
+ mdb_printf("ptr1: %p\n", ptr1);
+ mdb_printf("ptr2: %p\n", ptr2);
+ }
+
+ if (quoted)
+ (void) bsnprintf(bufp, lenp, "\"");
+
+ flags = JSSTR_BUMPDEPTH(flags) & ~JSSTR_QUOTED;
+
+ if (jsstr_print(ptr1, flags, bufp, lenp) != 0)
+ return (-1);
+
+ if (jsstr_print(ptr2, flags, bufp, lenp) != 0)
+ return (-1);
+
+ if (quoted)
+ (void) bsnprintf(bufp, lenp, "\"");
+
+ return (0);
+}
+
+static int
+jsstr_print_sliced(uintptr_t addr, uint_t flags, char **bufp, size_t *lenp)
+{
+ uintptr_t parent, offset, length;
+ uint8_t typebyte;
+ boolean_t verbose = flags & JSSTR_VERBOSE ? B_TRUE : B_FALSE;
+ boolean_t quoted = flags & JSSTR_QUOTED ? B_TRUE : B_FALSE;
+
+ if (read_heap_ptr(&parent, addr, V8_OFF_SLICEDSTRING_PARENT) != 0) {
+ (void) bsnprintf(bufp, lenp,
+ "<sliced string (failed to read parent)>");
+ return (-1);
+ }
+
+ if (read_heap_smi(&offset, addr, V8_OFF_SLICEDSTRING_OFFSET) != 0) {
+ (void) bsnprintf(bufp, lenp,
+ "<sliced string (failed to read offset)>");
+ return (-1);
+ }
+
+ if (read_heap_smi(&length, addr, V8_OFF_STRING_LENGTH) != 0) {
+ (void) bsnprintf(bufp, lenp,
+ "<sliced string (failed to read length)>");
+ return (-1);
+ }
+
+ if (verbose)
+ mdb_printf("parent: %p, offset = %d, length = %d\n",
+ parent, offset, length);
+
+ if (read_typebyte(&typebyte, parent) != 0) {
+ (void) bsnprintf(bufp, lenp,
+ "<sliced string (failed to read parent type)>");
+ return (0);
+ }
+
+ if (!V8_STRREP_SEQ(typebyte)) {
+ (void) bsnprintf(bufp, lenp,
+ "<sliced string (parent is not a sequential string)>");
+ return (0);
+ }
+
+ if (quoted)
+ (void) bsnprintf(bufp, lenp, "\"");
+
+ flags = JSSTR_BUMPDEPTH(flags) & ~JSSTR_QUOTED;
+
+ if (V8_STRENC_ASCII(typebyte))
+ flags |= JSSTR_ISASCII;
+
+ if (jsstr_print_seq(parent, flags, bufp, lenp, offset, length) != 0)
+ return (-1);
+
+ if (quoted)
+ (void) bsnprintf(bufp, lenp, "\"");
+
+ return (0);
+}
+
+static int
+jsstr_print_external(uintptr_t addr, uint_t flags, char **bufp, size_t *lenp)
+{
+ uintptr_t ptr1, ptr2;
+ size_t blen = *lenp + 1;
+ char *buf;
+ boolean_t quoted = flags & JSSTR_QUOTED ? B_TRUE : B_FALSE;
+ int rval = -1;
+
+ if ((flags & JSSTR_ISASCII) == 0) {
+ (void) bsnprintf(bufp, lenp, "<external two-byte string>");
+ return (0);
+ }
+
+ if (flags & JSSTR_VERBOSE)
+ mdb_printf("assuming Node.js string\n");
+
+ if (read_heap_ptr(&ptr1, addr, V8_OFF_EXTERNALSTRING_RESOURCE) != 0) {
+ (void) bsnprintf(bufp, lenp,
+ "<external string (failed to read resource)>");
+ return (-1);
+ }
+
+ if (mdb_vread(&ptr2, sizeof (ptr2),
+ ptr1 + NODE_OFF_EXTSTR_DATA) == -1) {
+ (void) bsnprintf(bufp, lenp, "<external string (failed to "
+ "read node external pointer %p)>",
+ ptr1 + NODE_OFF_EXTSTR_DATA);
+ return (-1);
+ }
+
+ buf = mdb_alloc(blen, UM_SLEEP);
+
+ if (mdb_readstr(buf, blen, ptr2) == -1) {
+ (void) bsnprintf(bufp, lenp, "<external string "
+ "(failed to read ExternalString data)>");
+ goto out;
+ }
+
+ if (buf[0] != '\0' && !isascii(buf[0])) {
+ (void) bsnprintf(bufp, lenp, "<external string "
+ "(failed to read ExternalString ascii data)>");
+ goto out;
+ }
+
+ (void) bsnprintf(bufp, lenp, "%s%s%s",
+ quoted ? "\"" : "", buf, quoted ? "\"" : "");
+
+ rval = 0;
+out:
+ mdb_free(buf, blen);
+
+ return (rval);
+}
+
+/*
+ * Returns true if the given address refers to the named oddball object (e.g.
+ * "undefined"). Returns false on failure (since we shouldn't fail on the
+ * actual "undefined" value).
+ */
+static boolean_t
+jsobj_is_oddball(uintptr_t addr, char *oddball)
+{
+ uint8_t type;
+ uintptr_t strptr;
+ const char *typename;
+ char buf[16];
+ char *bufp = buf;
+ size_t len = sizeof (buf);
+
+ v8_silent++;
+
+ if (read_typebyte(&type, addr) != 0) {
+ v8_silent--;
+ return (B_FALSE);
+ }
+
+ v8_silent--;
+ typename = enum_lookup_str(v8_types, type, "<unknown>");
+ if (strcmp(typename, "Oddball") != 0)
+ return (B_FALSE);
+
+ if (read_heap_ptr(&strptr, addr, V8_OFF_ODDBALL_TO_STRING) == -1)
+ return (B_FALSE);
+
+ if (jsstr_print(strptr, JSSTR_NUDE, &bufp, &len) != 0)
+ return (B_FALSE);
+
+ return (strcmp(buf, oddball) == 0);
+}
+
+static boolean_t
+jsobj_is_undefined(uintptr_t addr)
+{
+ return (jsobj_is_oddball(addr, "undefined"));
+}
+
+static boolean_t
+jsobj_is_hole(uintptr_t addr)
+{
+ return (jsobj_is_oddball(addr, "hole"));
+}
+
+/*
+ * Iterate the properties of a JavaScript object "addr".
+ *
+ * Every heap object refers to a Map that describes how that heap object is laid
+ * out. The Map includes information like the constructor function used to
+ * create the object, how many bytes each object uses, and how many properties
+ * are stored inside the object. (A single Map object can be shared by many
+ * objects of the same general type, which is why this information is encoded by
+ * reference rather than contained in each object.)
+ *
+ * V8 knows about lots of different kinds of properties:
+ *
+ * o properties with numeric names (e.g., array elements)
+ * o dictionary properties
+ * o "fast" properties stored inside each object, much like a C struct
+ * o properties stored in the separate "properties" array
+ * o getters, setters, and other magic (not supported by this module)
+ *
+ * While property lookup in JavaScript involves traversing an object's prototype
+ * chain, this module only iterates the properties local to the object itself.
+ *
+ *
+ * Numeric properties
+ *
+ * Properties having numeric indexes are stored in the "elements" array attached
+ * to each object. Objects with numeric properties can also have other
+ * properties.
+ *
+ *
+ * Dictionary properties
+ *
+ * An object with dictionary properties is identified by one of the bits in
+ * "bitfield3" in the object's Map. For details on slow properties, see
+ * read_heap_dict().
+ *
+ *
+ * Other properties
+ *
+ * The Map object refers to an array of "instance descriptors". This array has
+ * a few metadata entries at the front, followed by groups of three entries for
+ * each property. In Node v0.10 and later, it looks roughly like this:
+ *
+ * +--------------+ +----------------------+
+ * | JSObject | +--> | Map |
+ * +--------------| | +----------------------+
+ * | map | ---+ | ... |
+ * | ... | | instance_descriptors | --+
+ * in-object | [prop 0 val] | | ... | |
+ * properties | [prop 1 val] | +----------------------+ |
+ * (not for all | ... | |
+ * objects) | [prop N val] | |
+ * +--------------+ |
+ * +------------------------------------------------+
+ * |
+ * +----> +------------------------------+
+ * | FixedArray |
+ * +------------------------------+
+ * | ... |
+ * | prop 0 "key" descriptor |
+ * | prop 0 "details" descriptor |
+ * | prop 0 "value" descriptor |
+ * | prop 1 "key" descriptor |
+ * | prop 1 "details" descriptor |
+ * | prop 1 "value" descriptor |
+ * | ... |
+ * | prop N "key" descriptor |
+ * | prop N "details" descriptor |
+ * | prop N "value" descriptor |
+ * +------------------------------+
+ *
+ * In versions of Node prior to 0.10, there's an extra level of indirection.
+ * The Map refers to a "transitions" array, which has an entry that points to
+ * the instance descriptors. In both cases, the descriptors look roughly the
+ * same.
+ *
+ * Each property is described by three pointer-sized entries:
+ *
+ * o key: a string denoting the name of the property
+ * o details: a bitfield describing attributes of this property
+ * o value: an integer describing where this property's value is stored
+ *
+ * "key" is straightforward: it's just the name of the property as the
+ * JavaScript programmer knows it.
+ *
+ * In versions prior to Node 0.12, "value" is an integer. If "value" is less
+ * than the number of properties stored inside the object (which is also
+ * recorded in the Map), then it denotes which of the in-object property value
+ * slots (shown above inside the JSObject object) stores the value for this
+ * property. If "value" is greater than the number of properties stored inside
+ * the object, then it denotes which index into the separate "properties" array
+ * (a separate field in the JSObject, not shown above) contains the value for
+ * this property.
+ *
+ * In Node 0.12, for properties that are stored inside the object, the offset is
+ * obtained not using "value", but using a bitfield from the "details" part of
+ * the descriptor.
+ *
+ * Terminology notes: it's important to keep straight the different senses of
+ * "object" and "property" here. We use "JavaScript objects" to refer to the
+ * things that JavaScript programmers would call objects, including instances of
+ * Object and Array and subclasses of those. These are a subset of V8 heap
+ * objects, since V8 uses its heap to manage lots of other objects that
+ * JavaScript programmers don't think about. This function iterates JavaScript
+ * properties of these JavaScript objects, not internal properties of heap
+ * objects in general.
+ *
+ * Relatedly, while JavaScript programmers frequently interchange the notions of
+ * property names, property values, and property configurations (e.g., getters
+ * and setters, read-only or not, hidden or not), these are all distinct in the
+ * implementation of the VM, and "property" typically refers to the whole
+ * configuration, which may include a way to get the property name and value.
+ *
+ * The canonical source of the information used here is the implementation of
+ * property lookup in the V8 source code, currently in Object::GetProperty.
+ */
+
+static int
+jsobj_properties(uintptr_t addr,
+ int (*func)(const char *, uintptr_t, void *), void *arg,
+ jspropinfo_t *propinfop)
+{
+ uintptr_t ptr, map, elements;
+ uintptr_t *props = NULL, *descs = NULL, *content = NULL, *trans, *elts;
+ size_t size, nprops, ndescs, ncontent, ntrans, len;
+ ssize_t ii, rndescs;
+ uint8_t type, ninprops;
+ int rval = -1;
+ size_t ps = sizeof (uintptr_t);
+ ssize_t off;
+ jspropinfo_t propinfo = JPI_NONE;
+
+ /*
+ * First, check if the JSObject's "properties" field is a FixedArray.
+ * If not, then this is something we don't know how to deal with, and
+ * we'll just pass the caller a NULL value.
+ */
+ if (mdb_vread(&ptr, ps, addr + V8_OFF_JSOBJECT_PROPERTIES) == -1)
+ return (-1);
+
+ if (read_typebyte(&type, ptr) != 0)
+ return (-1);
+
+ if (type != V8_TYPE_FIXEDARRAY) {
+ char buf[256];
+ (void) mdb_snprintf(buf, sizeof (buf), "<%s>",
+ enum_lookup_str(v8_types, type, "unknown"));
+ if (propinfop != NULL)
+ *propinfop = JPI_BADLAYOUT;
+ return (func(buf, NULL, arg));
+ }
+
+ /*
+ * As described above, we need the Map to figure out how to iterate the
+ * properties for this object.
+ */
+ if (mdb_vread(&map, ps, addr + V8_OFF_HEAPOBJECT_MAP) == -1)
+ goto err;
+
+ /*
+ * Check to see if our elements member is an array and non-zero; if
+ * so, it contains numerically-named properties. Whether or not there
+ * are any numerically-named properties, there may be other kinds of
+ * properties.
+ */
+ if (V8_ELEMENTS_KIND_SHIFT != -1 &&
+ read_heap_ptr(&elements, addr, V8_OFF_JSOBJECT_ELEMENTS) == 0 &&
+ read_heap_array(elements, &elts, &len, UM_SLEEP) == 0 && len != 0) {
+ uint8_t bit_field2, kind;
+ size_t sz = len * sizeof (uintptr_t);
+
+ if (mdb_vread(&bit_field2, sizeof (bit_field2),
+ map + V8_OFF_MAP_BIT_FIELD2) == -1) {
+ mdb_free(elts, sz);
+ goto err;
+ }
+
+ kind = bit_field2 >> V8_ELEMENTS_KIND_SHIFT;
+ kind &= (1 << V8_ELEMENTS_KIND_BITCOUNT) - 1;
+ propinfo |= JPI_NUMERIC;
+
+ if (kind == V8_ELEMENTS_FAST_ELEMENTS ||
+ kind == V8_ELEMENTS_FAST_HOLEY_ELEMENTS) {
+ for (ii = 0; ii < len; ii++) {
+ char name[10];
+
+ if (kind == V8_ELEMENTS_FAST_HOLEY_ELEMENTS &&
+ jsobj_is_hole(elts[ii]))
+ continue;
+
+ snprintf(name, sizeof (name), "%" PRIdPTR, ii);
+
+ if (func(name, elts[ii], arg) != 0) {
+ mdb_free(elts, sz);
+ goto err;
+ }
+ }
+ } else if (kind == V8_ELEMENTS_DICTIONARY_ELEMENTS) {
+ propinfo |= JPI_DICT;
+ if (read_heap_dict(elements, func, arg) != 0) {
+ mdb_free(elts, sz);
+ goto err;
+ }
+ }
+
+ mdb_free(elts, sz);
+ }
+
+ if (V8_DICT_SHIFT != -1) {
+ v8_field_t *flp;
+ uintptr_t bit_field3;
+
+ /*
+ * If dictionary properties are supported (the V8_DICT_SHIFT
+ * offset is not -1), then bitfield 3 tells us if the properties
+ * for this object are stored in "properties" field of the
+ * object using a Dictionary representation.
+ *
+ * Versions of V8 prior to Node 0.12 treated bit_field3 as an
+ * SMI, so it was pointer-sized, and it has to be converted from
+ * an SMI before using it. In 0.12, it's treated as a raw
+ * uint32_t, meaning it's always int-sized and it should not be
+ * converted. We can tell which case we're in because the debug
+ * constant (v8dbg_class_map__bit_field3__TYPE) tells us whether
+ * the TYPE is "SMI" or "int".
+ */
+
+ flp = conf_field_lookup("Map", "bit_field3");
+ if (flp == NULL || flp->v8f_isbyte) {
+ /*
+ * v8f_isbyte indicates the type is "int", so we're in
+ * the int-sized not-a-SMI world.
+ */
+ unsigned int bf3_value;
+ if (mdb_vread(&bf3_value, sizeof (bf3_value),
+ map + V8_OFF_MAP_BIT_FIELD3) == -1)
+ goto err;
+ bit_field3 = (uintptr_t)bf3_value;
+ } else {
+ /* The metadata indicates this is an SMI. */
+ if (mdb_vread(&bit_field3, sizeof (bit_field3),
+ map + V8_OFF_MAP_BIT_FIELD3) == -1)
+ goto err;
+ bit_field3 = V8_SMI_VALUE(bit_field3);
+ }
+
+ if (bit_field3 & (1 << V8_DICT_SHIFT)) {
+ propinfo |= JPI_DICT;
+ if (propinfop != NULL)
+ *propinfop = propinfo;
+ return (read_heap_dict(ptr, func, arg));
+ }
+ } else if (V8_OFF_MAP_INSTANCE_DESCRIPTORS != -1) {
+ uintptr_t bit_field3;
+
+ if (mdb_vread(&bit_field3, sizeof (bit_field3),
+ map + V8_OFF_MAP_INSTANCE_DESCRIPTORS) == -1)
+ goto err;
+
+ if (V8_SMI_VALUE(bit_field3) == (1 << V8_ISSHARED_SHIFT)) {
+ /*
+ * On versions of V8 prior to that used in 0.10,
+ * the instance descriptors were overloaded to also
+ * be bit_field3 -- and there was no way from that
+ * field to infer a dictionary type. Because we
+ * can't determine if the map is actually the
+ * hash_table_map, we assume that if it's an object
+ * that has kIsShared set, that it is in fact a
+ * dictionary -- an assumption that is assuredly in
+ * error in some cases.
+ */
+ propinfo |= JPI_DICT;
+ if (propinfop != NULL)
+ *propinfop = propinfo;
+ return (read_heap_dict(ptr, func, arg));
+ }
+ }
+
+ if (read_heap_array(ptr, &props, &nprops, UM_SLEEP) != 0)
+ goto err;
+
+ /*
+ * Check if we're looking at an older version of V8, where the instance
+ * descriptors are stored not directly in the Map, but in the
+ * "transitions" array that's stored in the Map.
+ */
+ if (V8_OFF_MAP_INSTANCE_DESCRIPTORS == -1) {
+ if (V8_OFF_MAP_TRANSITIONS == -1 ||
+ V8_TRANSITIONS_IDX_DESC == -1 ||
+ V8_PROP_IDX_CONTENT != -1) {
+ mdb_warn("missing instance_descriptors, but did "
+ "not find expected transitions array metadata; "
+ "cannot read properties\n");
+ goto err;
+ }
+
+ propinfo |= JPI_HASTRANSITIONS;
+ off = V8_OFF_MAP_TRANSITIONS;
+ if (mdb_vread(&ptr, ps, map + off) == -1)
+ goto err;
+
+ if (read_heap_array(ptr, &trans, &ntrans, UM_SLEEP) != 0)
+ goto err;
+
+ ptr = trans[V8_TRANSITIONS_IDX_DESC];
+ mdb_free(trans, ntrans * sizeof (uintptr_t));
+ } else {
+ off = V8_OFF_MAP_INSTANCE_DESCRIPTORS;
+ if (mdb_vread(&ptr, ps, map + off) == -1)
+ goto err;
+ }
+
+ /*
+ * Either way, at this point "ptr" should refer to the descriptors
+ * array.
+ */
+ if (read_heap_array(ptr, &descs, &ndescs, UM_SLEEP) != 0)
+ goto err;
+
+ /*
+ * For cases where property values are stored directly inside the object
+ * ("fast properties"), we need to know the whole size of the object and
+ * the number of properties in the object in order to calculate the
+ * correct offset for each property.
+ */
+ if (read_size(&size, addr) != 0)
+ size = 0;
+ if (mdb_vread(&ninprops, ps,
+ map + V8_OFF_MAP_INOBJECT_PROPERTIES) == -1)
+ goto err;
+
+ if (V8_PROP_IDX_CONTENT == -1) {
+ /*
+ * On node v0.8 and later, the content is not stored in a
+ * separate FixedArray, but rather with the descriptors. The
+ * number of actual properties is the length of the array minus
+ * the first (non-property) elements divided by the number of
+ * elements per property.
+ */
+ content = descs;
+ ncontent = ndescs;
+ rndescs = ndescs > V8_PROP_IDX_FIRST ?
+ (ndescs - V8_PROP_IDX_FIRST) / V8_PROP_DESC_SIZE : 0;
+ } else {
+ /*
+ * On older versions, the content is stored in a separate array,
+ * and there's one entry per property (rather than three).
+ */
+ if (V8_PROP_IDX_CONTENT < ndescs &&
+ read_heap_array(descs[V8_PROP_IDX_CONTENT], &content,
+ &ncontent, UM_SLEEP) != 0)
+ goto err;
+
+ rndescs = ndescs - V8_PROP_IDX_FIRST;
+ propinfo |= JPI_HASCONTENT;
+ }
+
+ /*
+ * At this point, we've read all the pieces we need to process the list
+ * of instance descriptors.
+ */
+ for (ii = 0; ii < rndescs; ii++) {
+ uintptr_t keyidx, validx, detidx, baseidx;
+ char buf[1024];
+ intptr_t val;
+ size_t len = sizeof (buf);
+ char *c = buf;
+
+ if (V8_PROP_IDX_CONTENT != -1) {
+ /*
+ * In node versions prior to v0.8, this was hardcoded
+ * in the V8 implementation, so we hardcode it here
+ * as well.
+ */
+ keyidx = ii + V8_PROP_IDX_FIRST;
+ validx = ii << 1;
+ detidx = (ii << 1) + 1;
+ } else {
+ baseidx = V8_PROP_IDX_FIRST + (ii * V8_PROP_DESC_SIZE);
+ keyidx = baseidx + V8_PROP_DESC_KEY;
+ validx = baseidx + V8_PROP_DESC_VALUE;
+ detidx = baseidx + V8_PROP_DESC_DETAILS;
+ }
+
+ /*
+ * Ignore cases where our understanding doesn't appear to match
+ * what's here.
+ */
+ if (detidx >= ncontent) {
+ propinfo |= JPI_SKIPPED;
+ v8_warn("property descriptor %d: detidx (%d) "
+ "out of bounds for content array (length %d)\n",
+ ii, detidx, ncontent);
+ continue;
+ }
+
+ /*
+ * We only process fields. There are other entries here
+ * (notably: transitions) that we don't care about (and these
+ * are not errors).
+ */
+ if (!V8_DESC_ISFIELD(content[detidx]))
+ continue;
+
+ if (keyidx >= ndescs) {
+ propinfo |= JPI_SKIPPED;
+ v8_warn("property descriptor %d: keyidx (%d) "
+ "out of bounds for descriptor array (length %d)\n",
+ ii, keyidx, ndescs);
+ continue;
+ }
+
+ if (jsstr_print(descs[keyidx], JSSTR_NUDE, &c, &len) != 0) {
+ propinfo |= JPI_SKIPPED;
+ continue;
+ }
+
+ val = (intptr_t)content[validx];
+ if (!V8_IS_SMI(val)) {
+ propinfo |= JPI_SKIPPED;
+ v8_warn("object %p: property descriptor %d: value "
+ "index is not an SMI: %p\n", addr, ii, val);
+ continue;
+ }
+
+ /*
+ * The "value" part of each property descriptor tells us whether
+ * the property value is stored directly in the object or in the
+ * related "props" array. See JSObject::RawFastPropertyAt() in
+ * the V8 source.
+ */
+ val = V8_SMI_VALUE(val) - ninprops;
+ if (val < 0) {
+ uintptr_t propaddr;
+
+ /*
+ * The property is stored directly inside the object.
+ * In Node 0.10, "val - ninprops" is the (negative)
+ * index of the property counted from the end of the
+ * object. In that context, -1 refers to the last
+ * word in the object; -2 refers to the second-last
+ * word, and so on.
+ *
+ * In Node 0.12, we get the 0-based index from the
+ * first property inside the object by reading certain
+ * bits from the property descriptor details word.
+ * These constants are literal here because they're
+ * literal in the V8 source itself.
+ */
+ if (v8_major > 3 || (v8_major == 3 && v8_minor >= 26)) {
+ val = V8_PROP_FIELDINDEX(content[detidx]);
+ propaddr = addr + V8_OFF_HEAP(
+ size - (ninprops - val) * ps);
+ } else {
+ propaddr = addr + V8_OFF_HEAP(size + val * ps);
+ }
+
+ if (mdb_vread(&ptr, sizeof (ptr), propaddr) == -1) {
+ propinfo |= JPI_SKIPPED;
+ v8_warn("object %p: failed to read in-object "
+ "property at %p", addr, propaddr);
+ continue;
+ }
+
+ propinfo |= JPI_INOBJECT;
+ } else {
+ /*
+ * The property is in the separate "props" array.
+ */
+ if (val >= nprops) {
+ /*
+ * This can happen when properties are deleted.
+ * If this value isn't obviously corrupt, we'll
+ * just silently ignore it.
+ */
+ if (val < rndescs)
+ continue;
+
+ propinfo |= JPI_SKIPPED;
+ v8_warn("object %p: property descriptor %d: "
+ "value index value (%d) out of bounds "
+ "(%d)\n", addr, ii, val, nprops);
+ goto err;
+ }
+
+ propinfo |= JPI_PROPS;
+ ptr = props[val];
+ }
+
+ if (func(buf, ptr, arg) != 0)
+ goto err;
+ }
+
+ rval = 0;
+ if (propinfop != NULL)
+ *propinfop = propinfo;
+
+err:
+ if (props != NULL)
+ mdb_free(props, nprops * sizeof (uintptr_t));
+
+ if (descs != NULL)
+ mdb_free(descs, ndescs * sizeof (uintptr_t));
+
+ if (content != NULL && V8_PROP_IDX_CONTENT != -1)
+ mdb_free(content, ncontent * sizeof (uintptr_t));
+
+ return (rval);
+}
+
+/*
+ * Given the line endings table in "lendsp", computes the line number for the
+ * given token position and print the result into "buf". If "lendsp" is
+ * undefined, prints the token position instead.
+ */
+static int
+jsfunc_lineno(uintptr_t lendsp, uintptr_t tokpos,
+ char *buf, size_t buflen, int *lineno)
+{
+ uintptr_t size, bufsz, lower, upper, ii = 0;
+ uintptr_t *data;
+
+ if (lineno != NULL)
+ *lineno = -1;
+
+ if (jsobj_is_undefined(lendsp)) {
+ /*
+ * The token position is an SMI, but it comes in as its raw
+ * value so we can more easily compare it to values in the line
+ * endings table. If we're just printing the position directly,
+ * we must convert it here, unless we're checking against the
+ * "-1" sentinel.
+ */
+ if (tokpos == V8_VALUE_SMI(-1))
+ mdb_snprintf(buf, buflen, "unknown position");
+ else
+ mdb_snprintf(buf, buflen, "position %d",
+ V8_SMI_VALUE(tokpos));
+
+ if (lineno != NULL)
+ *lineno = 0;
+
+ return (0);
+ }
+
+ if (read_heap_smi(&size, lendsp, V8_OFF_FIXEDARRAY_LENGTH) != 0)
+ return (-1);
+
+ bufsz = size * sizeof (data[0]);
+
+ if ((data = mdb_alloc(bufsz, UM_NOSLEEP)) == NULL) {
+ v8_warn("failed to alloc %d bytes for FixedArray data", bufsz);
+ return (-1);
+ }
+
+ if (mdb_vread(data, bufsz, lendsp + V8_OFF_FIXEDARRAY_DATA) != bufsz) {
+ v8_warn("failed to read FixedArray data");
+ mdb_free(data, bufsz);
+ return (-1);
+ }
+
+ lower = 0;
+ upper = size - 1;
+
+ if (tokpos > data[upper]) {
+ (void) strlcpy(buf, "position out of range", buflen);
+ mdb_free(data, bufsz);
+
+ if (lineno != NULL)
+ *lineno = 0;
+
+ return (0);
+ }
+
+ if (tokpos <= data[0]) {
+ (void) strlcpy(buf, "line 1", buflen);
+ mdb_free(data, bufsz);
+
+ if (lineno != NULL)
+ *lineno = 1;
+
+ return (0);
+ }
+
+ while (upper >= 1) {
+ ii = (lower + upper) >> 1;
+ if (tokpos > data[ii])
+ lower = ii + 1;
+ else if (tokpos <= data[ii - 1])
+ upper = ii - 1;
+ else
+ break;
+ }
+
+ if (lineno != NULL)
+ *lineno = ii + 1;
+
+ (void) mdb_snprintf(buf, buflen, "line %d", ii + 1);
+ mdb_free(data, bufsz);
+ return (0);
+}
+
+/*
+ * Given a Script object, prints nlines on either side of lineno, with each
+ * line prefixed by prefix (if non-NULL).
+ */
+static void
+jsfunc_lines(uintptr_t scriptp,
+ uintptr_t start, uintptr_t end, int nlines, char *prefix)
+{
+ uintptr_t src;
+ char *buf, *bufp;
+ size_t bufsz = 1024, len;
+ int i, line, slop = 10;
+ boolean_t newline = B_TRUE;
+ int startline = -1, endline = -1;
+
+ if (read_heap_ptr(&src, scriptp, V8_OFF_SCRIPT_SOURCE) != 0)
+ return;
+
+ for (;;) {
+ if ((buf = mdb_zalloc(bufsz, UM_NOSLEEP)) == NULL) {
+ mdb_warn("failed to allocate source code "
+ "buffer of size %d", bufsz);
+ return;
+ }
+
+ bufp = buf;
+ len = bufsz;
+
+ if (jsstr_print(src, JSSTR_NUDE, &bufp, &len) != 0) {
+ mdb_free(buf, bufsz);
+ return;
+ }
+
+ if (len > slop)
+ break;
+
+ mdb_free(buf, bufsz);
+ bufsz <<= 1;
+ }
+
+ if (end >= bufsz)
+ return;
+
+ /*
+ * First, take a pass to determine where our lines actually start.
+ */
+ for (i = 0, line = 1; buf[i] != '\0'; i++) {
+ if (buf[i] == '\n')
+ line++;
+
+ if (i == start)
+ startline = line;
+
+ if (i == end) {
+ endline = line;
+ break;
+ }
+ }
+
+ if (startline == -1 || endline == -1) {
+ mdb_warn("for script %p, could not determine startline/endline"
+ " (start %ld, end %ld, nlines %d)\n",
+ scriptp, start, end, nlines);
+ mdb_free(buf, bufsz);
+ return;
+ }
+
+ for (i = 0, line = 1; buf[i] != '\0'; i++) {
+ if (buf[i] == '\n') {
+ line++;
+ newline = B_TRUE;
+ }
+
+ if (line < startline - nlines)
+ continue;
+
+ if (line > endline + nlines)
+ break;
+
+ mdb_printf("%c", buf[i]);
+
+ if (newline) {
+ if (line >= startline && line <= endline)
+ mdb_printf("%<b>");
+
+ if (prefix != NULL)
+ mdb_printf(prefix, line);
+
+ if (line >= startline && line <= endline)
+ mdb_printf("%</b>");
+
+ newline = B_FALSE;
+ }
+ }
+
+ mdb_printf("\n");
+
+ if (line == endline)
+ mdb_printf("%</b>");
+
+ mdb_free(buf, bufsz);
+}
+
+/*
+ * Given a SharedFunctionInfo object, prints into bufp a name of the function
+ * suitable for printing. This function attempts to infer a name for anonymous
+ * functions.
+ */
+static int
+jsfunc_name(uintptr_t funcinfop, char **bufp, size_t *lenp)
+{
+ uintptr_t ptrp;
+ char *bufs = *bufp;
+
+ if (read_heap_ptr(&ptrp, funcinfop,
+ V8_OFF_SHAREDFUNCTIONINFO_NAME) != 0) {
+ (void) bsnprintf(bufp, lenp,
+ "<function (failed to read SharedFunctionInfo)>");
+ return (-1);
+ }
+
+ if (jsstr_print(ptrp, JSSTR_NUDE, bufp, lenp) != 0)
+ return (-1);
+
+ if (*bufp != bufs)
+ return (0);
+
+ if (read_heap_ptr(&ptrp, funcinfop,
+ V8_OFF_SHAREDFUNCTIONINFO_INFERRED_NAME) != 0) {
+ (void) bsnprintf(bufp, lenp, "<anonymous>");
+ return (0);
+ }
+
+ (void) bsnprintf(bufp, lenp, "<anonymous> (as ");
+ bufs = *bufp;
+
+ if (jsstr_print(ptrp, JSSTR_NUDE, bufp, lenp) != 0)
+ return (-1);
+
+ if (*bufp == bufs)
+ (void) bsnprintf(bufp, lenp, "<anon>");
+
+ (void) bsnprintf(bufp, lenp, ")");
+
+ return (0);
+}
+
+/*
+ * JavaScript-level object printing
+ */
+
+static int
+jsobj_print(uintptr_t addr, jsobj_print_t *jsop)
+{
+ uint8_t type;
+ const char *klass;
+ char **bufp = jsop->jsop_bufp;
+ size_t *lenp = jsop->jsop_lenp;
+
+ const struct {
+ char *name;
+ int (*func)(uintptr_t, jsobj_print_t *);
+ } table[] = {
+ { "HeapNumber", jsobj_print_number },
+ { "Oddball", jsobj_print_oddball },
+ { "JSObject", jsobj_print_jsobject },
+ { "JSArray", jsobj_print_jsarray },
+ { "JSFunction", jsobj_print_jsfunction },
+ { "JSDate", jsobj_print_jsdate },
+ { NULL }
+ }, *ent;
+
+ if (jsop->jsop_baseaddr != NULL && jsop->jsop_member == NULL)
+ (void) bsnprintf(bufp, lenp, "%p: ", jsop->jsop_baseaddr);
+
+ if (jsop->jsop_printaddr && jsop->jsop_member == NULL)
+ (void) bsnprintf(bufp, lenp, "%p: ", addr);
+
+ if (V8_IS_SMI(addr)) {
+ (void) bsnprintf(bufp, lenp, "%d", V8_SMI_VALUE(addr));
+ return (0);
+ }
+
+ if (!V8_IS_HEAPOBJECT(addr)) {
+ (void) bsnprintf(bufp, lenp, "<not a heap object>");
+ return (-1);
+ }
+
+ if (read_typebyte(&type, addr) != 0) {
+ (void) bsnprintf(bufp, lenp, "<couldn't read type>");
+ return (-1);
+ }
+
+ if (V8_TYPE_STRING(type)) {
+ if (jsstr_print(addr, JSSTR_QUOTED, bufp, lenp) == -1)
+ return (-1);
+
+ return (0);
+ }
+
+ klass = enum_lookup_str(v8_types, type, "<unknown>");
+
+ for (ent = &table[0]; ent->name != NULL; ent++) {
+ if (strcmp(klass, ent->name) == 0) {
+ jsop->jsop_descended = B_TRUE;
+ return (ent->func(addr, jsop));
+ }
+ }
+
+ (void) bsnprintf(bufp, lenp,
+ "<unknown JavaScript object type \"%s\">", klass);
+ return (-1);
+}
+
+static int
+jsobj_print_number(uintptr_t addr, jsobj_print_t *jsop)
+{
+ char **bufp = jsop->jsop_bufp;
+ size_t *lenp = jsop->jsop_lenp;
+ double numval;
+
+ if (read_heap_double(&numval, addr, V8_OFF_HEAPNUMBER_VALUE) == -1)
+ return (-1);
+
+ if (numval == (long long)numval)
+ (void) bsnprintf(bufp, lenp, "%lld", (long long)numval);
+ else
+ (void) bsnprintf(bufp, lenp, "%e", numval);
+
+ return (0);
+}
+
+static int
+jsobj_print_oddball(uintptr_t addr, jsobj_print_t *jsop)
+{
+ char **bufp = jsop->jsop_bufp;
+ size_t *lenp = jsop->jsop_lenp;
+ uintptr_t strptr;
+
+ if (read_heap_ptr(&strptr, addr, V8_OFF_ODDBALL_TO_STRING) != 0)
+ return (-1);
+
+ return (jsstr_print(strptr, JSSTR_NUDE, bufp, lenp));
+}
+
+static int
+jsobj_print_prop(const char *desc, uintptr_t val, void *arg)
+{
+ jsobj_print_t *jsop = arg, descend;
+ char **bufp = jsop->jsop_bufp;
+ size_t *lenp = jsop->jsop_lenp;
+
+ (void) bsnprintf(bufp, lenp, "%s\n%*s\"%s\": ", jsop->jsop_nprops == 0 ?
+ "{" : "", jsop->jsop_indent + 4, "", desc);
+
+ descend = *jsop;
+ descend.jsop_depth--;
+ descend.jsop_indent += 4;
+
+ (void) jsobj_print(val, &descend);
+ (void) bsnprintf(bufp, lenp, ",");
+
+ jsop->jsop_nprops++;
+
+ return (0);
+}
+
+static int
+jsobj_print_prop_member(const char *desc, uintptr_t val, void *arg)
+{
+ jsobj_print_t *jsop = arg, descend;
+ const char *member = jsop->jsop_member, *next = member;
+ int rv;
+
+ for (; *next != '\0' && *next != '.' && *next != '['; next++)
+ continue;
+
+ if (*member == '[') {
+ mdb_warn("cannot use array indexing on an object\n");
+ return (-1);
+ }
+
+ if (strncmp(member, desc, next - member) != 0)
+ return (0);
+
+ if (desc[next - member] != '\0')
+ return (0);
+
+ /*
+ * This property matches the desired member; descend.
+ */
+ descend = *jsop;
+
+ if (*next == '\0') {
+ descend.jsop_member = NULL;
+ descend.jsop_found = B_TRUE;
+ } else {
+ descend.jsop_member = *next == '.' ? next + 1 : next;
+ }
+
+ rv = jsobj_print(val, &descend);
+ jsop->jsop_found = descend.jsop_found;
+
+ return (rv);
+}
+
+static int
+jsobj_print_jsobject(uintptr_t addr, jsobj_print_t *jsop)
+{
+ char **bufp = jsop->jsop_bufp;
+ size_t *lenp = jsop->jsop_lenp;
+
+ if (jsop->jsop_member != NULL)
+ return (jsobj_properties(addr, jsobj_print_prop_member,
+ jsop, &jsop->jsop_propinfo));
+
+ if (jsop->jsop_depth == 0) {
+ (void) bsnprintf(bufp, lenp, "[...]");
+ return (0);
+ }
+
+ jsop->jsop_nprops = 0;
+
+ if (jsobj_properties(addr, jsobj_print_prop, jsop,
+ &jsop->jsop_propinfo) != 0)
+ return (-1);
+
+ if (jsop->jsop_nprops > 0) {
+ (void) bsnprintf(bufp, lenp, "\n%*s", jsop->jsop_indent, "");
+ } else if (jsop->jsop_nprops == 0) {
+ (void) bsnprintf(bufp, lenp, "{");
+ } else {
+ (void) bsnprintf(bufp, lenp, "{ /* unknown property */ ");
+ }
+
+ (void) bsnprintf(bufp, lenp, "}");
+
+ return (0);
+}
+
+static int
+jsobj_print_jsarray_member(uintptr_t addr, jsobj_print_t *jsop)
+{
+ uintptr_t *elts;
+ jsobj_print_t descend;
+ uintptr_t ptr;
+ const char *member = jsop->jsop_member, *end, *p;
+ size_t elt = 0, place = 1, len, rv;
+ char **bufp = jsop->jsop_bufp;
+ size_t *lenp = jsop->jsop_lenp;
+
+ if (read_heap_ptr(&ptr, addr, V8_OFF_JSOBJECT_ELEMENTS) != 0) {
+ (void) bsnprintf(bufp, lenp,
+ "<array member (failed to read elements)>");
+ return (-1);
+ }
+
+ if (read_heap_array(ptr, &elts, &len, UM_SLEEP | UM_GC) != 0) {
+ (void) bsnprintf(bufp, lenp,
+ "<array member (failed to read array)>");
+ return (-1);
+ }
+
+ if (*member != '[') {
+ mdb_warn("expected bracketed array index; "
+ "found '%s'\n", member);
+ return (-1);
+ }
+
+ if ((end = strchr(member, ']')) == NULL) {
+ mdb_warn("missing array index terminator\n");
+ return (-1);
+ }
+
+ /*
+ * We know where our array index ends; convert it to an integer
+ * by stepping through it from least significant digit to most.
+ */
+ for (p = end - 1; p > member; p--) {
+ if (*p < '0' || *p > '9') {
+ mdb_warn("illegal array index at '%c'\n", *p);
+ return (-1);
+ }
+
+ elt += (*p - '0') * place;
+ place *= 10;
+ }
+
+ if (place == 1) {
+ mdb_warn("missing array index\n");
+ return (-1);
+ }
+
+ if (elt >= len) {
+ mdb_warn("array index %d exceeds size of %d\n", elt, len);
+ return (-1);
+ }
+
+ descend = *jsop;
+
+ switch (*(++end)) {
+ case '\0':
+ descend.jsop_member = NULL;
+ descend.jsop_found = B_TRUE;
+ break;
+
+ case '.':
+ descend.jsop_member = end + 1;
+ break;
+
+ case '[':
+ descend.jsop_member = end;
+ break;
+
+ default:
+ mdb_warn("illegal character '%c' following "
+ "array index terminator\n", *end);
+ return (-1);
+ }
+
+ rv = jsobj_print(elts[elt], &descend);
+ jsop->jsop_found = descend.jsop_found;
+
+ return (rv);
+}
+
+static int
+jsobj_print_jsarray(uintptr_t addr, jsobj_print_t *jsop)
+{
+ char **bufp = jsop->jsop_bufp;
+ size_t *lenp = jsop->jsop_lenp;
+ int indent = jsop->jsop_indent;
+ jsobj_print_t descend;
+ uintptr_t ptr;
+ uintptr_t *elts;
+ size_t ii, len;
+
+ if (jsop->jsop_member != NULL)
+ return (jsobj_print_jsarray_member(addr, jsop));
+
+ if (jsop->jsop_depth == 0) {
+ (void) bsnprintf(bufp, lenp, "[...]");
+ return (0);
+ }
+
+ if (read_heap_ptr(&ptr, addr, V8_OFF_JSOBJECT_ELEMENTS) != 0) {
+ (void) bsnprintf(bufp, lenp,
+ "<array (failed to read elements)>");
+ return (-1);
+ }
+
+ if (read_heap_array(ptr, &elts, &len, UM_SLEEP | UM_GC) != 0) {
+ (void) bsnprintf(bufp, lenp, "<array (failed to read array)>");
+ return (-1);
+ }
+
+ if (len == 0) {
+ (void) bsnprintf(bufp, lenp, "[]");
+ return (0);
+ }
+
+ descend = *jsop;
+ descend.jsop_depth--;
+ descend.jsop_indent += 4;
+
+ if (len == 1) {
+ (void) bsnprintf(bufp, lenp, "[ ");
+ (void) jsobj_print(elts[0], &descend);
+ (void) bsnprintf(bufp, lenp, " ]");
+ return (0);
+ }
+
+ (void) bsnprintf(bufp, lenp, "[\n");
+
+ for (ii = 0; ii < len && *lenp > 0; ii++) {
+ (void) bsnprintf(bufp, lenp, "%*s", indent + 4, "");
+ (void) jsobj_print(elts[ii], &descend);
+ (void) bsnprintf(bufp, lenp, ",\n");
+ }
+
+ (void) bsnprintf(bufp, lenp, "%*s", indent, "");
+ (void) bsnprintf(bufp, lenp, "]");
+
+ return (0);
+}
+
+static int
+jsobj_print_jsfunction(uintptr_t addr, jsobj_print_t *jsop)
+{
+ char **bufp = jsop->jsop_bufp;
+ size_t *lenp = jsop->jsop_lenp;
+ uintptr_t shared;
+
+ if (read_heap_ptr(&shared, addr, V8_OFF_JSFUNCTION_SHARED) != 0)
+ return (-1);
+
+ (void) bsnprintf(bufp, lenp, "function ");
+ return (jsfunc_name(shared, bufp, lenp) != 0);
+}
+
+static int
+jsobj_print_jsdate(uintptr_t addr, jsobj_print_t *jsop)
+{
+ char **bufp = jsop->jsop_bufp;
+ size_t *lenp = jsop->jsop_lenp;
+ char buf[128];
+ uintptr_t value;
+ uint8_t type;
+ double numval;
+
+ if (V8_OFF_JSDATE_VALUE == -1) {
+ (void) bsnprintf(bufp, lenp, "<JSDate>", buf);
+ return (0);
+ }
+
+ if (read_heap_ptr(&value, addr, V8_OFF_JSDATE_VALUE) != 0) {
+ (void) bsnprintf(bufp, lenp, "<JSDate (failed to read value)>");
+ return (-1);
+ }
+
+ if (read_typebyte(&type, value) != 0) {
+ (void) bsnprintf(bufp, lenp, "<JSDate (failed to read type)>");
+ return (-1);
+ }
+
+ if (strcmp(enum_lookup_str(v8_types, type, ""), "HeapNumber") != 0)
+ return (-1);
+
+ if (read_heap_double(&numval, value, V8_OFF_HEAPNUMBER_VALUE) == -1) {
+ (void) bsnprintf(bufp, lenp, "<JSDate (failed to read num)>");
+ return (-1);
+ }
+
+ mdb_snprintf(buf, sizeof (buf), "%Y",
+ (time_t)((long long)numval / MILLISEC));
+ (void) bsnprintf(bufp, lenp, "%lld (%s)", (long long)numval, buf);
+
+ return (0);
+}
+
+/*
+ * dcmd implementations
+ */
+
+/* ARGSUSED */
+static int
+dcmd_v8classes(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ v8_class_t *clp;
+
+ for (clp = v8_classes; clp != NULL; clp = clp->v8c_next)
+ mdb_printf("%s\n", clp->v8c_name);
+
+ return (DCMD_OK);
+}
+
+static int
+do_v8code(uintptr_t addr, boolean_t opt_d)
+{
+ uintptr_t instrlen;
+ ssize_t instroff = V8_OFF_CODE_INSTRUCTION_START;
+
+ if (read_heap_ptr(&instrlen, addr, V8_OFF_CODE_INSTRUCTION_SIZE) != 0)
+ return (DCMD_ERR);
+
+ mdb_printf("code: %p\n", addr);
+ mdb_printf("instructions: [%p, %p)\n", addr + instroff,
+ addr + instroff + instrlen);
+
+ if (!opt_d)
+ return (DCMD_OK);
+
+ mdb_set_dot(addr + instroff);
+
+ do {
+ (void) mdb_inc_indent(8); /* gets reset by mdb_eval() */
+
+ /*
+ * This is absolutely awful. We want to disassemble the above
+ * range of instructions. Because we don't know how many there
+ * are, we can't use "::dis". We resort to evaluating "./i",
+ * but then we need to advance "." by the size of the
+ * instruction just printed. The only way to do that is by
+ * printing out "+", but we don't want that to show up, so we
+ * redirect it to /dev/null.
+ */
+ if (mdb_eval("/i") != 0 ||
+ mdb_eval("+=p ! cat > /dev/null") != 0) {
+ (void) mdb_dec_indent(8);
+ v8_warn("failed to disassemble at %p", mdb_get_dot());
+ return (DCMD_ERR);
+ }
+ } while (mdb_get_dot() < addr + instroff + instrlen);
+
+ (void) mdb_dec_indent(8);
+ return (DCMD_OK);
+}
+
+/* ARGSUSED */
+static int
+dcmd_v8code(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ boolean_t opt_d = B_FALSE;
+
+ if (mdb_getopts(argc, argv, 'd', MDB_OPT_SETBITS, B_TRUE, &opt_d,
+ NULL) != argc)
+ return (DCMD_USAGE);
+
+ return (do_v8code(addr, opt_d));
+}
+
+/* ARGSUSED */
+static int
+dcmd_v8function(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ uint8_t type;
+ uintptr_t funcinfop, scriptp, lendsp, tokpos, namep, codep;
+ char *bufp;
+ size_t len;
+ boolean_t opt_d = B_FALSE;
+ char buf[512];
+
+ if (mdb_getopts(argc, argv, 'd', MDB_OPT_SETBITS, B_TRUE, &opt_d,
+ NULL) != argc)
+ return (DCMD_USAGE);
+
+ v8_warnings++;
+
+ if (read_typebyte(&type, addr) != 0)
+ goto err;
+
+ if (strcmp(enum_lookup_str(v8_types, type, ""), "JSFunction") != 0) {
+ v8_warn("%p is not an instance of JSFunction\n", addr);
+ goto err;
+ }
+
+ if (read_heap_ptr(&funcinfop, addr, V8_OFF_JSFUNCTION_SHARED) != 0 ||
+ read_heap_maybesmi(&tokpos, funcinfop,
+ V8_OFF_SHAREDFUNCTIONINFO_FUNCTION_TOKEN_POSITION) != 0 ||
+ read_heap_ptr(&scriptp, funcinfop,
+ V8_OFF_SHAREDFUNCTIONINFO_SCRIPT) != 0 ||
+ read_heap_ptr(&namep, scriptp, V8_OFF_SCRIPT_NAME) != 0 ||
+ read_heap_ptr(&lendsp, scriptp, V8_OFF_SCRIPT_LINE_ENDS) != 0)
+ goto err;
+
+ /*
+ * The token position is normally a SMI, so read_heap_maybesmi() will
+ * interpret the value for us. However, this code uses its SMI-encoded
+ * value, so convert it back here.
+ */
+ tokpos = V8_VALUE_SMI(tokpos);
+
+ bufp = buf;
+ len = sizeof (buf);
+ if (jsfunc_name(funcinfop, &bufp, &len) != 0)
+ goto err;
+
+ mdb_printf("%p: JSFunction: %s\n", addr, buf);
+
+ bufp = buf;
+ len = sizeof (buf);
+ mdb_printf("defined at ");
+
+ if (jsstr_print(namep, JSSTR_NUDE, &bufp, &len) == 0)
+ mdb_printf("%s ", buf);
+
+ if (jsfunc_lineno(lendsp, tokpos, buf, sizeof (buf), NULL) == 0)
+ mdb_printf("%s", buf);
+
+ mdb_printf("\n");
+
+ if (read_heap_ptr(&codep,
+ funcinfop, V8_OFF_SHAREDFUNCTIONINFO_CODE) != 0)
+ goto err;
+
+ v8_warnings--;
+
+ return (do_v8code(codep, opt_d));
+
+err:
+ v8_warnings--;
+ return (DCMD_ERR);
+}
+
+/*
+ * Access an internal field of a V8 object.
+ */
+/* ARGSUSED */
+static int
+dcmd_v8internal(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ uintptr_t idx;
+ uintptr_t fieldaddr;
+
+ if (mdb_getopts(argc, argv, NULL) != argc - 1 ||
+ argv[argc - 1].a_type != MDB_TYPE_STRING)
+ return (DCMD_USAGE);
+
+ idx = mdb_strtoull(argv[argc - 1].a_un.a_str);
+ if (obj_v8internal(addr, idx, &fieldaddr) != 0)
+ return (DCMD_ERR);
+
+ mdb_printf("%p\n", fieldaddr);
+ return (DCMD_OK);
+}
+
+/* ARGSUSED */
+static int
+dcmd_v8frametypes(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ enum_print(v8_frametypes);
+ return (DCMD_OK);
+}
+
+static void
+dcmd_v8print_help(void)
+{
+ mdb_printf(
+ "Prints out \".\" (a V8 heap object) as an instance of its C++\n"
+ "class. With no arguments, the appropriate class is detected\n"
+ "automatically. The 'class' argument overrides this to print an\n"
+ "object as an instance of the given class. The list of known\n"
+ "classes can be viewed with ::jsclasses.");
+}
+
+/* ARGSUSED */
+static int
+dcmd_v8print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ const char *rqclass;
+ v8_class_t *clp;
+ char *bufp;
+ size_t len;
+ uint8_t type;
+ char buf[256];
+
+ if (argc < 1) {
+ /*
+ * If no type was specified, determine it automatically.
+ */
+ bufp = buf;
+ len = sizeof (buf);
+ if (obj_jstype(addr, &bufp, &len, &type) != 0)
+ return (DCMD_ERR);
+
+ if (type == 0) {
+ /* For SMI or Failure, just print out the type. */
+ mdb_printf("%s\n", buf);
+ return (DCMD_OK);
+ }
+
+ if ((rqclass = enum_lookup_str(v8_types, type, NULL)) == NULL) {
+ v8_warn("object has unknown type\n");
+ return (DCMD_ERR);
+ }
+ } else {
+ if (argv[0].a_type != MDB_TYPE_STRING)
+ return (DCMD_USAGE);
+
+ rqclass = argv[0].a_un.a_str;
+ }
+
+ for (clp = v8_classes; clp != NULL; clp = clp->v8c_next) {
+ if (strcmp(rqclass, clp->v8c_name) == 0)
+ break;
+ }
+
+ if (clp == NULL) {
+ v8_warn("unknown class '%s'\n", rqclass);
+ return (DCMD_USAGE);
+ }
+
+ return (obj_print_class(addr, clp));
+}
+
+/* ARGSUSED */
+static int
+dcmd_v8type(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ char buf[64];
+ char *bufp = buf;
+ size_t len = sizeof (buf);
+
+ if (obj_jstype(addr, &bufp, &len, NULL) != 0)
+ return (DCMD_ERR);
+
+ mdb_printf("0x%p: %s\n", addr, buf);
+ return (DCMD_OK);
+}
+
+/* ARGSUSED */
+static int
+dcmd_v8types(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ enum_print(v8_types);
+ return (DCMD_OK);
+}
+
+static int
+load_current_context(uintptr_t *fpp, uintptr_t *raddrp)
+{
+ mdb_reg_t regfp, regip;
+
+#ifdef __amd64
+ if (mdb_getareg(1, "rbp", &regfp) != 0 ||
+ mdb_getareg(1, "rip", &regip) != 0) {
+#else
+#ifdef __i386
+ if (mdb_getareg(1, "ebp", &regfp) != 0 ||
+ mdb_getareg(1, "eip", &regip) != 0) {
+#else
+#error Unrecognized microprocessor
+#endif
+#endif
+ v8_warn("failed to load current context");
+ return (-1);
+ }
+
+ if (fpp != NULL)
+ *fpp = (uintptr_t)regfp;
+
+ if (raddrp != NULL)
+ *raddrp = (uintptr_t)regip;
+
+ return (0);
+}
+
+typedef struct jsframe {
+ boolean_t jsf_showall; /* show hidden frames and pointers */
+ boolean_t jsf_verbose; /* show arguments and JS code */
+ char *jsf_func; /* filter frames for named function */
+ char *jsf_prop; /* filter arguments */
+ uintptr_t jsf_nlines; /* lines of context (for verbose) */
+ uint_t jsf_nskipped; /* skipped frames */
+} jsframe_t;
+
+static void
+jsframe_skip(jsframe_t *jsf)
+{
+ jsf->jsf_nskipped++;
+}
+
+static void
+jsframe_print_skipped(jsframe_t *jsf)
+{
+ if (jsf->jsf_nskipped == 1)
+ mdb_printf(" (1 internal frame elided)\n");
+ else if (jsf->jsf_nskipped > 1)
+ mdb_printf(" (%d internal frames elided)\n",
+ jsf->jsf_nskipped);
+ jsf->jsf_nskipped = 0;
+}
+
+static int
+do_jsframe_special(uintptr_t fptr, uintptr_t raddr, jsframe_t *jsf)
+{
+ uint_t count;
+ uintptr_t ftype;
+ const char *ftypename;
+ char *prop = jsf->jsf_prop;
+
+ /*
+ * First see if this looks like a native frame rather than a JavaScript
+ * frame. We check this by asking MDB to print the return address
+ * symbolically. If that works, we assume this was NOT a V8 frame,
+ * since those are never in the symbol table.
+ */
+ count = mdb_snprintf(NULL, 0, "%A", raddr);
+ if (count > 1) {
+ if (prop != NULL)
+ return (0);
+
+ jsframe_print_skipped(jsf);
+ if (jsf->jsf_showall) {
+ mdb_printf("%p %a\n", fptr, raddr);
+ } else if (count <= 65) {
+ mdb_printf("native: %a\n", raddr);
+ } else {
+ char buf[65];
+ mdb_snprintf(buf, sizeof (buf), "%a", raddr);
+ mdb_printf("native: %s...\n", buf);
+ }
+ return (0);
+ }
+
+ /*
+ * Figure out what kind of frame this is using the same algorithm as
+ * V8's ComputeType function. First, look for an ArgumentsAdaptorFrame.
+ */
+ if (mdb_vread(&ftype, sizeof (ftype), fptr + V8_OFF_FP_CONTEXT) != -1 &&
+ V8_IS_SMI(ftype) &&
+ (ftypename = enum_lookup_str(v8_frametypes, V8_SMI_VALUE(ftype),
+ NULL)) != NULL && strstr(ftypename, "ArgumentsAdaptor") != NULL) {
+ if (prop != NULL)
+ return (0);
+
+ if (jsf->jsf_showall) {
+ jsframe_print_skipped(jsf);
+ mdb_printf("%p %a <%s>\n", fptr, raddr, ftypename);
+ } else {
+ jsframe_skip(jsf);
+ }
+ return (0);
+ }
+
+ /*
+ * Other special frame types are indicated by a marker.
+ */
+ if (mdb_vread(&ftype, sizeof (ftype), fptr + V8_OFF_FP_MARKER) != -1 &&
+ V8_IS_SMI(ftype)) {
+ if (prop != NULL)
+ return (0);
+
+ ftypename = enum_lookup_str(v8_frametypes, V8_SMI_VALUE(ftype),
+ NULL);
+
+ if (jsf->jsf_showall && ftypename != NULL) {
+ jsframe_print_skipped(jsf);
+ mdb_printf("%p %a <%s>\n", fptr, raddr, ftypename);
+ } else {
+ jsframe_skip(jsf);
+ }
+
+ return (0);
+ }
+
+ return (-1);
+}
+
+static int
+do_jsframe(uintptr_t fptr, uintptr_t raddr, jsframe_t *jsf)
+{
+ boolean_t showall = jsf->jsf_showall;
+ boolean_t verbose = jsf->jsf_verbose;
+ const char *func = jsf->jsf_func;
+ const char *prop = jsf->jsf_prop;
+ uintptr_t nlines = jsf->jsf_nlines;
+
+ uintptr_t funcp, funcinfop, tokpos, endpos, scriptp, lendsp, ptrp;
+ uintptr_t ii, nargs;
+ const char *typename;
+ char *bufp;
+ size_t len;
+ uint8_t type;
+ char buf[256];
+ int lineno;
+
+ /*
+ * Check for non-JavaScript frames first.
+ */
+ if (func == NULL && do_jsframe_special(fptr, raddr, jsf) == 0)
+ return (DCMD_OK);
+
+ /*
+ * At this point we assume we're looking at a JavaScript frame. As with
+ * native frames, fish the address out of the parent frame.
+ */
+ if (mdb_vread(&funcp, sizeof (funcp),
+ fptr + V8_OFF_FP_FUNCTION) == -1) {
+ v8_warn("failed to read stack at %p",
+ fptr + V8_OFF_FP_FUNCTION);
+ return (DCMD_ERR);
+ }
+
+ /*
+ * Check if this thing is really a JSFunction at all. For some frames,
+ * it's a Code object, presumably indicating some internal frame.
+ */
+ if (read_typebyte(&type, funcp) != 0 ||
+ (typename = enum_lookup_str(v8_types, type, NULL)) == NULL) {
+ if (func != NULL || prop != NULL)
+ return (DCMD_OK);
+
+ if (showall) {
+ jsframe_print_skipped(jsf);
+ mdb_printf("%p %a\n", fptr, raddr);
+ } else {
+ jsframe_skip(jsf);
+ }
+ return (DCMD_OK);
+ }
+
+ if (strcmp("Code", typename) == 0) {
+ if (func != NULL || prop != NULL)
+ return (DCMD_OK);
+
+ if (showall) {
+ jsframe_print_skipped(jsf);
+ mdb_printf("%p %a internal (Code: %p)\n",
+ fptr, raddr, funcp);
+ } else {
+ jsframe_skip(jsf);
+ }
+ return (DCMD_OK);
+ }
+
+ if (strcmp("JSFunction", typename) != 0) {
+ if (func != NULL || prop != NULL)
+ return (DCMD_OK);
+
+ if (showall) {
+ jsframe_print_skipped(jsf);
+ mdb_printf("%p %a unknown (%s: %p)",
+ fptr, raddr, typename, funcp);
+ } else {
+ jsframe_skip(jsf);
+ }
+ return (DCMD_OK);
+ }
+
+ if (read_heap_ptr(&funcinfop, funcp, V8_OFF_JSFUNCTION_SHARED) != 0)
+ return (DCMD_ERR);
+
+ bufp = buf;
+ len = sizeof (buf);
+ if (jsfunc_name(funcinfop, &bufp, &len) != 0)
+ return (DCMD_ERR);
+
+ if (func != NULL && strcmp(buf, func) != 0)
+ return (DCMD_OK);
+
+ if (prop == NULL) {
+ jsframe_print_skipped(jsf);
+ if (showall)
+ mdb_printf("%p %a ", fptr, raddr);
+ else
+ mdb_printf("js: ");
+ mdb_printf("%s", buf);
+ if (showall)
+ mdb_printf(" (JSFunction: %p)\n", funcp);
+ else
+ mdb_printf("\n");
+ }
+
+ if (!verbose && prop == NULL)
+ return (DCMD_OK);
+
+ if (verbose)
+ jsframe_print_skipped(jsf);
+
+ /*
+ * Although the token position is technically an SMI, we're going to
+ * byte-compare it to other SMI values so we don't want decode it here.
+ */
+ if (read_heap_maybesmi(&tokpos, funcinfop,
+ V8_OFF_SHAREDFUNCTIONINFO_FUNCTION_TOKEN_POSITION) != 0)
+ return (DCMD_ERR);
+ tokpos = V8_VALUE_SMI(tokpos);
+
+ if (read_heap_ptr(&scriptp, funcinfop,
+ V8_OFF_SHAREDFUNCTIONINFO_SCRIPT) != 0)
+ return (DCMD_ERR);
+
+ if (read_heap_ptr(&ptrp, scriptp, V8_OFF_SCRIPT_NAME) != 0)
+ return (DCMD_ERR);
+
+ bufp = buf;
+ len = sizeof (buf);
+ (void) jsstr_print(ptrp, JSSTR_NUDE, &bufp, &len);
+
+ if (prop != NULL && strcmp(prop, "file") == 0) {
+ mdb_printf("%s\n", buf);
+ return (DCMD_OK);
+ }
+
+ if (prop == NULL) {
+ (void) mdb_inc_indent(10);
+ mdb_printf("file: %s\n", buf);
+ }
+
+ if (read_heap_ptr(&lendsp, scriptp, V8_OFF_SCRIPT_LINE_ENDS) != 0)
+ return (DCMD_ERR);
+
+ (void) jsfunc_lineno(lendsp, tokpos, buf, sizeof (buf), &lineno);
+
+ if (prop != NULL && strcmp(prop, "posn") == 0) {
+ mdb_printf("%s\n", buf);
+ return (DCMD_OK);
+ }
+
+ if (prop == NULL)
+ mdb_printf("posn: %s\n", buf);
+
+ if (read_heap_maybesmi(&nargs, funcinfop,
+ V8_OFF_SHAREDFUNCTIONINFO_LENGTH) == 0) {
+ uintptr_t argptr;
+ char arg[10];
+
+ if (mdb_vread(&argptr, sizeof (argptr),
+ fptr + V8_OFF_FP_ARGS + nargs * sizeof (uintptr_t)) != -1 &&
+ argptr != NULL) {
+ (void) snprintf(arg, sizeof (arg), "this");
+ if (prop != NULL && strcmp(arg, prop) == 0) {
+ mdb_printf("%p\n", argptr);
+ return (DCMD_OK);
+ }
+
+ if (prop == NULL) {
+ bufp = buf;
+ len = sizeof (buf);
+ (void) obj_jstype(argptr, &bufp, &len, NULL);
+
+ mdb_printf("this: %p (%s)\n", argptr, buf);
+ }
+ }
+
+ for (ii = 0; ii < nargs; ii++) {
+ if (mdb_vread(&argptr, sizeof (argptr),
+ fptr + V8_OFF_FP_ARGS + (nargs - ii - 1) *
+ sizeof (uintptr_t)) == -1)
+ continue;
+
+ (void) snprintf(arg, sizeof (arg), "arg%" PRIuPTR,
+ ii + 1);
+
+ if (prop != NULL) {
+ if (strcmp(arg, prop) != 0)
+ continue;
+
+ mdb_printf("%p\n", argptr);
+ return (DCMD_OK);
+ }
+
+ bufp = buf;
+ len = sizeof (buf);
+ (void) obj_jstype(argptr, &bufp, &len, NULL);
+
+ mdb_printf("arg%d: %p (%s)\n", (ii + 1), argptr, buf);
+ }
+ }
+
+
+ if (prop != NULL) {
+ mdb_warn("unknown frame property '%s'\n", prop);
+ return (DCMD_ERR);
+ }
+
+ if (nlines != 0 && read_heap_maybesmi(&endpos, funcinfop,
+ V8_OFF_SHAREDFUNCTIONINFO_END_POSITION) == 0) {
+ jsfunc_lines(scriptp,
+ V8_SMI_VALUE(tokpos), endpos, nlines, "%5d ");
+ mdb_printf("\n");
+ }
+
+ (void) mdb_dec_indent(10);
+
+ return (DCMD_OK);
+}
+
+typedef struct findjsobjects_prop {
+ struct findjsobjects_prop *fjsp_next;
+ char fjsp_desc[1];
+} findjsobjects_prop_t;
+
+typedef struct findjsobjects_instance {
+ uintptr_t fjsi_addr;
+ struct findjsobjects_instance *fjsi_next;
+} findjsobjects_instance_t;
+
+typedef struct findjsobjects_obj {
+ findjsobjects_prop_t *fjso_props;
+ findjsobjects_prop_t *fjso_last;
+ jspropinfo_t fjso_propinfo;
+ size_t fjso_nprops;
+ findjsobjects_instance_t fjso_instances;
+ int fjso_ninstances;
+ avl_node_t fjso_node;
+ struct findjsobjects_obj *fjso_next;
+ boolean_t fjso_malformed;
+ char fjso_constructor[80];
+} findjsobjects_obj_t;
+
+typedef struct findjsobjects_func {
+ findjsobjects_instance_t fjsf_instances;
+ int fjsf_ninstances;
+ avl_node_t fjsf_node;
+ struct findjsobjects_func *fjsf_next;
+ uintptr_t fjsf_shared;
+ char fjsf_funcname[40];
+ char fjsf_scriptname[80];
+ char fjsf_location[20];
+} findjsobjects_func_t;
+
+typedef struct findjsobjects_stats {
+ int fjss_heapobjs;
+ int fjss_cached;
+ int fjss_typereads;
+ int fjss_jsobjs;
+ int fjss_objects;
+ int fjss_arrays;
+ int fjss_uniques;
+ int fjss_funcs;
+ int fjss_funcs_skipped;
+ int fjss_funcs_unique;
+} findjsobjects_stats_t;
+
+typedef struct findjsobjects_reference {
+ uintptr_t fjsrf_addr;
+ char *fjsrf_desc;
+ size_t fjsrf_index;
+ struct findjsobjects_reference *fjsrf_next;
+} findjsobjects_reference_t;
+
+typedef struct findjsobjects_referent {
+ avl_node_t fjsr_node;
+ uintptr_t fjsr_addr;
+ findjsobjects_reference_t *fjsr_head;
+ findjsobjects_reference_t *fjsr_tail;
+ struct findjsobjects_referent *fjsr_next;
+} findjsobjects_referent_t;
+
+typedef struct findjsobjects_state {
+ uintptr_t fjs_addr;
+ uintptr_t fjs_size;
+ boolean_t fjs_verbose;
+ boolean_t fjs_brk;
+ boolean_t fjs_allobjs;
+ boolean_t fjs_initialized;
+ boolean_t fjs_marking;
+ boolean_t fjs_referred;
+ avl_tree_t fjs_tree;
+ avl_tree_t fjs_referents;
+ avl_tree_t fjs_funcinfo;
+ findjsobjects_referent_t *fjs_head;
+ findjsobjects_referent_t *fjs_tail;
+ findjsobjects_obj_t *fjs_current;
+ findjsobjects_obj_t *fjs_objects;
+ findjsobjects_func_t *fjs_funcs;
+ findjsobjects_stats_t fjs_stats;
+} findjsobjects_state_t;
+
+findjsobjects_obj_t *
+findjsobjects_alloc(uintptr_t addr)
+{
+ findjsobjects_obj_t *obj;
+
+ obj = mdb_zalloc(sizeof (findjsobjects_obj_t), UM_SLEEP);
+ obj->fjso_instances.fjsi_addr = addr;
+ obj->fjso_ninstances = 1;
+
+ return (obj);
+}
+
+void
+findjsobjects_free(findjsobjects_obj_t *obj)
+{
+ findjsobjects_prop_t *prop, *next;
+
+ for (prop = obj->fjso_props; prop != NULL; prop = next) {
+ next = prop->fjsp_next;
+ mdb_free(prop, sizeof (findjsobjects_prop_t) +
+ strlen(prop->fjsp_desc));
+ }
+
+ mdb_free(obj, sizeof (findjsobjects_obj_t));
+}
+
+int
+findjsobjects_cmp(findjsobjects_obj_t *lhs, findjsobjects_obj_t *rhs)
+{
+ findjsobjects_prop_t *lprop, *rprop;
+ int rv;
+
+ lprop = lhs->fjso_props;
+ rprop = rhs->fjso_props;
+
+ while (lprop != NULL && rprop != NULL) {
+ if ((rv = strcmp(lprop->fjsp_desc, rprop->fjsp_desc)) != 0)
+ return (rv > 0 ? 1 : -1);
+
+ lprop = lprop->fjsp_next;
+ rprop = rprop->fjsp_next;
+ }
+
+ if (lprop != NULL)
+ return (1);
+
+ if (rprop != NULL)
+ return (-1);
+
+ if (lhs->fjso_nprops > rhs->fjso_nprops)
+ return (1);
+
+ if (lhs->fjso_nprops < rhs->fjso_nprops)
+ return (-1);
+
+ rv = strcmp(lhs->fjso_constructor, rhs->fjso_constructor);
+
+ return (rv < 0 ? -1 : rv > 0 ? 1 : 0);
+}
+
+int
+findjsobjects_cmp_funcinfo(findjsobjects_func_t *lhs,
+ findjsobjects_func_t *rhs)
+{
+ int diff = lhs->fjsf_shared - rhs->fjsf_shared;
+ return (diff < 0 ? -1 : diff > 0 ? 1 : 0);
+}
+
+int
+findjsobjects_cmp_referents(findjsobjects_referent_t *lhs,
+ findjsobjects_referent_t *rhs)
+{
+ if (lhs->fjsr_addr < rhs->fjsr_addr)
+ return (-1);
+
+ if (lhs->fjsr_addr > rhs->fjsr_addr)
+ return (1);
+
+ return (0);
+}
+
+int
+findjsobjects_cmp_ninstances(const void *l, const void *r)
+{
+ findjsobjects_obj_t *lhs = *((findjsobjects_obj_t **)l);
+ findjsobjects_obj_t *rhs = *((findjsobjects_obj_t **)r);
+ size_t lprod = lhs->fjso_ninstances * lhs->fjso_nprops;
+ size_t rprod = rhs->fjso_ninstances * rhs->fjso_nprops;
+
+ if (lprod < rprod)
+ return (-1);
+
+ if (lprod > rprod)
+ return (1);
+
+ if (lhs->fjso_ninstances < rhs->fjso_ninstances)
+ return (-1);
+
+ if (lhs->fjso_ninstances > rhs->fjso_ninstances)
+ return (1);
+
+ if (lhs->fjso_nprops < rhs->fjso_nprops)
+ return (-1);
+
+ if (lhs->fjso_nprops > rhs->fjso_nprops)
+ return (1);
+
+ return (0);
+}
+
+/*ARGSUSED*/
+int
+findjsobjects_prop(const char *desc, uintptr_t val, void *arg)
+{
+ findjsobjects_state_t *fjs = arg;
+ findjsobjects_obj_t *current = fjs->fjs_current;
+ findjsobjects_prop_t *prop;
+
+ if (desc == NULL)
+ desc = "<unknown>";
+
+ prop = mdb_zalloc(sizeof (findjsobjects_prop_t) +
+ strlen(desc), UM_SLEEP);
+
+ strcpy(prop->fjsp_desc, desc);
+
+ if (current->fjso_last != NULL) {
+ current->fjso_last->fjsp_next = prop;
+ } else {
+ current->fjso_props = prop;
+ }
+
+ current->fjso_last = prop;
+ current->fjso_nprops++;
+ current->fjso_malformed =
+ val == NULL && current->fjso_nprops == 1 && desc[0] == '<';
+
+ return (0);
+}
+
+static void
+findjsobjects_constructor(findjsobjects_obj_t *obj)
+{
+ char *bufp = obj->fjso_constructor;
+ size_t len = sizeof (obj->fjso_constructor);
+ uintptr_t map, funcinfop;
+ uintptr_t addr = obj->fjso_instances.fjsi_addr;
+ uint8_t type;
+
+ v8_silent++;
+
+ if (read_heap_ptr(&map, addr, V8_OFF_HEAPOBJECT_MAP) != 0 ||
+ read_heap_ptr(&addr, map, V8_OFF_MAP_CONSTRUCTOR) != 0)
+ goto out;
+
+ if (read_typebyte(&type, addr) != 0)
+ goto out;
+
+ if (strcmp(enum_lookup_str(v8_types, type, ""), "JSFunction") != 0)
+ goto out;
+
+ if (read_heap_ptr(&funcinfop, addr, V8_OFF_JSFUNCTION_SHARED) != 0)
+ goto out;
+
+ if (jsfunc_name(funcinfop, &bufp, &len) != 0)
+ goto out;
+out:
+ v8_silent--;
+}
+
+static void
+findjsobjects_jsfunc(findjsobjects_state_t *fjs, uintptr_t addr)
+{
+ findjsobjects_func_t *func, *ofunc;
+ findjsobjects_instance_t *inst;
+ uintptr_t funcinfo, script, name;
+ avl_index_t where;
+ int err;
+ char *bufp;
+ size_t len;
+
+ /*
+ * This may be somewhat expensive to do for all JSFunctions, but in most
+ * core files, there aren't that many. We could defer some of this work
+ * until the user tries to print the function ::jsfunctions, but this
+ * step is useful to do early to filter out garbage data.
+ */
+
+ v8_silent++;
+ if (read_heap_ptr(&funcinfo, addr, V8_OFF_JSFUNCTION_SHARED) != 0 ||
+ read_heap_ptr(&script, funcinfo,
+ V8_OFF_SHAREDFUNCTIONINFO_SCRIPT) != 0 ||
+ read_heap_ptr(&name, script, V8_OFF_SCRIPT_NAME) != 0) {
+ fjs->fjs_stats.fjss_funcs_skipped++;
+ v8_silent--;
+ return;
+ }
+
+ func = mdb_zalloc(sizeof (findjsobjects_func_t), UM_SLEEP);
+ func->fjsf_ninstances = 1;
+ func->fjsf_instances.fjsi_addr = addr;
+ func->fjsf_shared = funcinfo;
+
+ bufp = func->fjsf_funcname;
+ len = sizeof (func->fjsf_funcname);
+ err = jsfunc_name(funcinfo, &bufp, &len);
+
+ bufp = func->fjsf_scriptname;
+ len = sizeof (func->fjsf_scriptname);
+ err |= jsstr_print(name, JSSTR_NUDE, &bufp, &len);
+
+ v8_silent--;
+ if (err != 0) {
+ fjs->fjs_stats.fjss_funcs_skipped++;
+ mdb_free(func, sizeof (findjsobjects_func_t));
+ return;
+ }
+
+ fjs->fjs_stats.fjss_funcs++;
+ ofunc = avl_find(&fjs->fjs_funcinfo, func, &where);
+ if (ofunc == NULL) {
+ avl_add(&fjs->fjs_funcinfo, func);
+ func->fjsf_next = fjs->fjs_funcs;
+ fjs->fjs_funcs = func;
+ fjs->fjs_stats.fjss_funcs_unique++;
+ } else {
+ inst = mdb_alloc(sizeof (findjsobjects_instance_t), UM_SLEEP);
+ inst->fjsi_addr = addr;
+ inst->fjsi_next = ofunc->fjsf_instances.fjsi_next;
+ ofunc->fjsf_instances.fjsi_next = inst;
+ ofunc->fjsf_ninstances++;
+ mdb_free(func, sizeof (findjsobjects_func_t));
+ }
+}
+
+int
+findjsobjects_range(findjsobjects_state_t *fjs, uintptr_t addr, uintptr_t size)
+{
+ uintptr_t limit;
+ findjsobjects_stats_t *stats = &fjs->fjs_stats;
+ uint8_t type;
+ int jsobject = V8_TYPE_JSOBJECT, jsarray = V8_TYPE_JSARRAY;
+ int jsfunction = V8_TYPE_JSFUNCTION;
+ caddr_t range = mdb_alloc(size, UM_SLEEP);
+ uintptr_t base = addr, mapaddr;
+
+ if (mdb_vread(range, size, addr) == -1)
+ return (0);
+
+ for (limit = addr + size; addr < limit; addr++) {
+ findjsobjects_instance_t *inst;
+ findjsobjects_obj_t *obj;
+ avl_index_t where;
+
+ if (V8_IS_SMI(addr))
+ continue;
+
+ if (!V8_IS_HEAPOBJECT(addr))
+ continue;
+
+ stats->fjss_heapobjs++;
+
+ mapaddr = *((uintptr_t *)((uintptr_t)range +
+ (addr - base) + V8_OFF_HEAPOBJECT_MAP));
+
+ if (!V8_IS_HEAPOBJECT(mapaddr))
+ continue;
+
+ mapaddr += V8_OFF_MAP_INSTANCE_ATTRIBUTES;
+ stats->fjss_typereads++;
+
+ if (mapaddr >= base && mapaddr < base + size) {
+ stats->fjss_cached++;
+
+ type = *((uint8_t *)((uintptr_t)range +
+ (mapaddr - base)));
+ } else {
+ if (mdb_vread(&type, sizeof (uint8_t), mapaddr) == -1)
+ continue;
+ }
+
+ if (type == jsfunction) {
+ findjsobjects_jsfunc(fjs, addr);
+ continue;
+ }
+
+ if (type != jsobject && type != jsarray)
+ continue;
+
+ stats->fjss_jsobjs++;
+
+ fjs->fjs_current = findjsobjects_alloc(addr);
+
+ if (type == jsobject) {
+ if (jsobj_properties(addr,
+ findjsobjects_prop, fjs,
+ &fjs->fjs_current->fjso_propinfo) != 0) {
+ findjsobjects_free(fjs->fjs_current);
+ fjs->fjs_current = NULL;
+ continue;
+ }
+
+ findjsobjects_constructor(fjs->fjs_current);
+ stats->fjss_objects++;
+ } else {
+ uintptr_t ptr;
+ size_t *nprops = &fjs->fjs_current->fjso_nprops;
+ ssize_t len = V8_OFF_JSARRAY_LENGTH;
+ ssize_t elems = V8_OFF_JSOBJECT_ELEMENTS;
+ ssize_t flen = V8_OFF_FIXEDARRAY_LENGTH;
+ uintptr_t nelems;
+ uint8_t t;
+
+ if (read_heap_smi(nprops, addr, len) != 0 ||
+ read_heap_ptr(&ptr, addr, elems) != 0 ||
+ !V8_IS_HEAPOBJECT(ptr) ||
+ read_typebyte(&t, ptr) != 0 ||
+ t != V8_TYPE_FIXEDARRAY ||
+ read_heap_smi(&nelems, ptr, flen) != 0 ||
+ nelems < *nprops) {
+ findjsobjects_free(fjs->fjs_current);
+ fjs->fjs_current = NULL;
+ continue;
+ }
+
+ strcpy(fjs->fjs_current->fjso_constructor, "Array");
+ stats->fjss_arrays++;
+ }
+
+ /*
+ * Now determine if we already have an object matching our
+ * properties. If we don't, we'll add our new object; if we
+ * do we'll merely enqueue our instance.
+ */
+ obj = avl_find(&fjs->fjs_tree, fjs->fjs_current, &where);
+
+ if (obj == NULL) {
+ avl_add(&fjs->fjs_tree, fjs->fjs_current);
+ fjs->fjs_current->fjso_next = fjs->fjs_objects;
+ fjs->fjs_objects = fjs->fjs_current;
+ fjs->fjs_current = NULL;
+ stats->fjss_uniques++;
+ continue;
+ }
+
+ findjsobjects_free(fjs->fjs_current);
+ fjs->fjs_current = NULL;
+
+ inst = mdb_alloc(sizeof (findjsobjects_instance_t), UM_SLEEP);
+ inst->fjsi_addr = addr;
+ inst->fjsi_next = obj->fjso_instances.fjsi_next;
+ obj->fjso_instances.fjsi_next = inst;
+ obj->fjso_ninstances++;
+ }
+
+ mdb_free(range, size);
+
+ return (0);
+}
+
+static int
+findjsobjects_mapping(findjsobjects_state_t *fjs, const prmap_t *pmp,
+ const char *name)
+{
+ if (name != NULL && !(fjs->fjs_brk && (pmp->pr_mflags & MA_BREAK)))
+ return (0);
+
+ if (fjs->fjs_addr != NULL && (fjs->fjs_addr < pmp->pr_vaddr ||
+ fjs->fjs_addr >= pmp->pr_vaddr + pmp->pr_size))
+ return (0);
+
+ return (findjsobjects_range(fjs, pmp->pr_vaddr, pmp->pr_size));
+}
+
+static void
+findjsobjects_references_add(findjsobjects_state_t *fjs, uintptr_t val,
+ const char *desc, size_t index)
+{
+ findjsobjects_referent_t search, *referent;
+ findjsobjects_reference_t *reference;
+
+ search.fjsr_addr = val;
+
+ if ((referent = avl_find(&fjs->fjs_referents, &search, NULL)) == NULL)
+ return;
+
+ reference = mdb_zalloc(sizeof (*reference), UM_SLEEP | UM_GC);
+ reference->fjsrf_addr = fjs->fjs_addr;
+
+ if (desc != NULL) {
+ reference->fjsrf_desc =
+ mdb_alloc(strlen(desc) + 1, UM_SLEEP | UM_GC);
+ (void) strcpy(reference->fjsrf_desc, desc);
+ } else {
+ reference->fjsrf_index = index;
+ }
+
+ if (referent->fjsr_head == NULL) {
+ referent->fjsr_head = reference;
+ } else {
+ referent->fjsr_tail->fjsrf_next = reference;
+ }
+
+ referent->fjsr_tail = reference;
+}
+
+static int
+findjsobjects_references_prop(const char *desc, uintptr_t val, void *arg)
+{
+ findjsobjects_references_add(arg, val, desc, -1);
+
+ return (0);
+}
+
+static void
+findjsobjects_references_array(findjsobjects_state_t *fjs,
+ findjsobjects_obj_t *obj)
+{
+ findjsobjects_instance_t *inst = &obj->fjso_instances;
+ uintptr_t *elts;
+ size_t i, len;
+
+ for (; inst != NULL; inst = inst->fjsi_next) {
+ uintptr_t addr = inst->fjsi_addr, ptr;
+
+ if (read_heap_ptr(&ptr, addr, V8_OFF_JSOBJECT_ELEMENTS) != 0 ||
+ read_heap_array(ptr, &elts, &len, UM_SLEEP) != 0)
+ continue;
+
+ fjs->fjs_addr = addr;
+
+ for (i = 0; i < len; i++)
+ findjsobjects_references_add(fjs, elts[i], NULL, i);
+
+ mdb_free(elts, len * sizeof (uintptr_t));
+ }
+}
+
+static void
+findjsobjects_referent(findjsobjects_state_t *fjs, uintptr_t addr)
+{
+ findjsobjects_referent_t search, *referent;
+
+ search.fjsr_addr = addr;
+
+ if (avl_find(&fjs->fjs_referents, &search, NULL) != NULL) {
+ assert(fjs->fjs_marking);
+ mdb_warn("%p is already marked; ignoring\n", addr);
+ return;
+ }
+
+ referent = mdb_zalloc(sizeof (findjsobjects_referent_t), UM_SLEEP);
+ referent->fjsr_addr = addr;
+
+ avl_add(&fjs->fjs_referents, referent);
+
+ if (fjs->fjs_tail != NULL) {
+ fjs->fjs_tail->fjsr_next = referent;
+ } else {
+ fjs->fjs_head = referent;
+ }
+
+ fjs->fjs_tail = referent;
+
+ if (fjs->fjs_marking)
+ mdb_printf("findjsobjects: marked %p\n", addr);
+}
+
+static void
+findjsobjects_references(findjsobjects_state_t *fjs)
+{
+ findjsobjects_reference_t *reference;
+ findjsobjects_referent_t *referent;
+ avl_tree_t *referents = &fjs->fjs_referents;
+ findjsobjects_obj_t *obj;
+ void *cookie = NULL;
+ uintptr_t addr;
+
+ fjs->fjs_referred = B_FALSE;
+
+ v8_silent++;
+
+ /*
+ * First traverse over all objects and arrays, looking for references
+ * to our designated referent(s).
+ */
+ for (obj = fjs->fjs_objects; obj != NULL; obj = obj->fjso_next) {
+ findjsobjects_instance_t *head = &obj->fjso_instances, *inst;
+
+ if (obj->fjso_nprops != 0 && obj->fjso_props == NULL) {
+ findjsobjects_references_array(fjs, obj);
+ continue;
+ }
+
+ for (inst = head; inst != NULL; inst = inst->fjsi_next) {
+ fjs->fjs_addr = inst->fjsi_addr;
+
+ (void) jsobj_properties(inst->fjsi_addr,
+ findjsobjects_references_prop, fjs, NULL);
+ }
+ }
+
+ v8_silent--;
+ fjs->fjs_addr = NULL;
+
+ /*
+ * Now go over our referent(s), reporting any references that we have
+ * accumulated.
+ */
+ for (referent = fjs->fjs_head; referent != NULL;
+ referent = referent->fjsr_next) {
+ addr = referent->fjsr_addr;
+
+ if ((reference = referent->fjsr_head) == NULL) {
+ mdb_printf("%p is not referred to by a "
+ "known object.\n", addr);
+ continue;
+ }
+
+ for (; reference != NULL; reference = reference->fjsrf_next) {
+ mdb_printf("%p referred to by %p",
+ addr, reference->fjsrf_addr);
+
+ if (reference->fjsrf_desc == NULL) {
+ mdb_printf("[%d]\n", reference->fjsrf_index);
+ } else {
+ mdb_printf(".%s\n", reference->fjsrf_desc);
+ }
+ }
+ }
+
+ /*
+ * Finally, destroy our referent nodes.
+ */
+ while ((referent = avl_destroy_nodes(referents, &cookie)) != NULL)
+ mdb_free(referent, sizeof (findjsobjects_referent_t));
+
+ fjs->fjs_head = NULL;
+ fjs->fjs_tail = NULL;
+}
+
+static findjsobjects_instance_t *
+findjsobjects_instance(findjsobjects_state_t *fjs, uintptr_t addr,
+ findjsobjects_instance_t **headp)
+{
+ findjsobjects_obj_t *obj;
+
+ for (obj = fjs->fjs_objects; obj != NULL; obj = obj->fjso_next) {
+ findjsobjects_instance_t *head = &obj->fjso_instances, *inst;
+
+ for (inst = head; inst != NULL; inst = inst->fjsi_next) {
+ if (inst->fjsi_addr == addr) {
+ *headp = head;
+ return (inst);
+ }
+ }
+ }
+
+ return (NULL);
+}
+
+/*ARGSUSED*/
+static void
+findjsobjects_match_all(findjsobjects_obj_t *obj, const char *ignored)
+{
+ mdb_printf("%p\n", obj->fjso_instances.fjsi_addr);
+}
+
+static void
+findjsobjects_match_propname(findjsobjects_obj_t *obj, const char *propname)
+{
+ findjsobjects_prop_t *prop;
+
+ for (prop = obj->fjso_props; prop != NULL; prop = prop->fjsp_next) {
+ if (strcmp(prop->fjsp_desc, propname) == 0) {
+ mdb_printf("%p\n", obj->fjso_instances.fjsi_addr);
+ return;
+ }
+ }
+}
+
+static void
+findjsobjects_match_constructor(findjsobjects_obj_t *obj,
+ const char *constructor)
+{
+ if (strcmp(constructor, obj->fjso_constructor) == 0)
+ mdb_printf("%p\n", obj->fjso_instances.fjsi_addr);
+}
+
+static void
+findjsobjects_match_kind(findjsobjects_obj_t *obj, const char *propkind)
+{
+ jspropinfo_t p = obj->fjso_propinfo;
+
+ if (((p & JPI_NUMERIC) != 0 && strstr(propkind, "numeric") != NULL) ||
+ ((p & JPI_DICT) != 0 && strstr(propkind, "dict") != NULL) ||
+ ((p & JPI_INOBJECT) != 0 && strstr(propkind, "inobject") != NULL) ||
+ ((p & JPI_PROPS) != 0 && strstr(propkind, "props") != NULL) ||
+ ((p & JPI_HASTRANSITIONS) != 0 &&
+ strstr(propkind, "transitions") != NULL) ||
+ ((p & JPI_HASCONTENT) != 0 &&
+ strstr(propkind, "content") != NULL) ||
+ ((p & JPI_SKIPPED) != 0 && strstr(propkind, "skipped") != NULL) ||
+ ((p & JPI_BADLAYOUT) != 0 &&
+ strstr(propkind, "badlayout") != NULL)) {
+ mdb_printf("%p\n", obj->fjso_instances.fjsi_addr);
+ }
+}
+
+static int
+findjsobjects_match(findjsobjects_state_t *fjs, uintptr_t addr,
+ uint_t flags, void (*func)(findjsobjects_obj_t *, const char *),
+ const char *match)
+{
+ findjsobjects_obj_t *obj;
+
+ if (!(flags & DCMD_ADDRSPEC)) {
+ for (obj = fjs->fjs_objects; obj != NULL;
+ obj = obj->fjso_next) {
+ if (obj->fjso_malformed && !fjs->fjs_allobjs)
+ continue;
+
+ func(obj, match);
+ }
+
+ return (DCMD_OK);
+ }
+
+ /*
+ * First, look for the specified address among the representative
+ * objects.
+ */
+ for (obj = fjs->fjs_objects; obj != NULL; obj = obj->fjso_next) {
+ if (obj->fjso_instances.fjsi_addr == addr) {
+ func(obj, match);
+ return (DCMD_OK);
+ }
+ }
+
+ /*
+ * We didn't find it among the representative objects; iterate over
+ * all objects.
+ */
+ for (obj = fjs->fjs_objects; obj != NULL; obj = obj->fjso_next) {
+ findjsobjects_instance_t *head = &obj->fjso_instances, *inst;
+
+ for (inst = head; inst != NULL; inst = inst->fjsi_next) {
+ if (inst->fjsi_addr == addr) {
+ func(obj, match);
+ return (DCMD_OK);
+ }
+ }
+ }
+
+ mdb_warn("%p does not correspond to a known object\n", addr);
+ return (DCMD_ERR);
+}
+
+static void
+findjsobjects_print(findjsobjects_obj_t *obj)
+{
+ int col = 19 + (sizeof (uintptr_t) * 2) + strlen("..."), len;
+ uintptr_t addr = obj->fjso_instances.fjsi_addr;
+ findjsobjects_prop_t *prop;
+
+ mdb_printf("%?p %8d %8d ",
+ addr, obj->fjso_ninstances, obj->fjso_nprops);
+
+ if (obj->fjso_constructor[0] != '\0') {
+ mdb_printf("%s%s", obj->fjso_constructor,
+ obj->fjso_props != NULL ? ": " : "");
+ col += strlen(obj->fjso_constructor) + 2;
+ }
+
+ for (prop = obj->fjso_props; prop != NULL; prop = prop->fjsp_next) {
+ if (col + (len = strlen(prop->fjsp_desc) + 2) < 80) {
+ mdb_printf("%s%s", prop->fjsp_desc,
+ prop->fjsp_next != NULL ? ", " : "");
+ col += len;
+ } else {
+ mdb_printf("...");
+ break;
+ }
+ }
+
+ mdb_printf("\n", col);
+}
+
+static void
+dcmd_findjsobjects_help(void)
+{
+ mdb_printf("%s\n\n",
+"Finds all JavaScript objects in the V8 heap via brute force iteration over\n"
+"all mapped anonymous memory. (This can take up to several minutes on large\n"
+"dumps.) The output consists of representative objects, the number of\n"
+"instances of that object and the number of properties on the object --\n"
+"followed by the constructor and first few properties of the objects. Once\n"
+"run, subsequent calls to ::findjsobjects use cached data. If provided an\n"
+"address (and in the absence of -r, described below), ::findjsobjects treats\n"
+"the address as that of a representative object, and lists all instances of\n"
+"that object (that is, all objects that have a matching property signature).");
+
+ mdb_dec_indent(2);
+ mdb_printf("%<b>OPTIONS%</b>\n");
+ mdb_inc_indent(2);
+
+ mdb_printf("%s\n",
+" -b Include the heap denoted by the brk(2) (normally excluded)\n"
+" -c cons Display representative objects with the specified constructor\n"
+" -p prop Display representative objects that have the specified property\n"
+" -l List all objects that match the representative object\n"
+" -m Mark specified object for later reference determination via -r\n"
+" -r Find references to the specified and/or marked object(s)\n"
+" -v Provide verbose statistics\n");
+}
+
+static findjsobjects_state_t findjsobjects_state;
+
+static int
+findjsobjects_run(findjsobjects_state_t *fjs)
+{
+ struct ps_prochandle *Pr;
+ findjsobjects_obj_t *obj;
+ findjsobjects_stats_t *stats = &fjs->fjs_stats;
+
+ if (!fjs->fjs_initialized) {
+ avl_create(&fjs->fjs_tree,
+ (int(*)(const void *, const void *))findjsobjects_cmp,
+ sizeof (findjsobjects_obj_t),
+ offsetof(findjsobjects_obj_t, fjso_node));
+
+ avl_create(&fjs->fjs_referents,
+ (int(*)(const void *, const void *))
+ findjsobjects_cmp_referents,
+ sizeof (findjsobjects_referent_t),
+ offsetof(findjsobjects_referent_t, fjsr_node));
+
+ avl_create(&fjs->fjs_funcinfo,
+ (int(*)(const void *, const void*))
+ findjsobjects_cmp_funcinfo,
+ sizeof (findjsobjects_func_t),
+ offsetof(findjsobjects_func_t, fjsf_node));
+
+ fjs->fjs_initialized = B_TRUE;
+ }
+
+ if (avl_is_empty(&fjs->fjs_tree)) {
+ findjsobjects_obj_t **sorted;
+ int nobjs, i;
+ hrtime_t start = gethrtime();
+
+ if (mdb_get_xdata("pshandle", &Pr, sizeof (Pr)) == -1) {
+ mdb_warn("couldn't read pshandle xdata");
+ return (-1);
+ }
+
+ v8_silent++;
+
+ if (Pmapping_iter(Pr,
+ (proc_map_f *)findjsobjects_mapping, fjs) != 0) {
+ v8_silent--;
+ return (-1);
+ }
+
+ if ((nobjs = avl_numnodes(&fjs->fjs_tree)) != 0) {
+ /*
+ * We have the objects -- now sort them.
+ */
+ sorted = mdb_alloc(nobjs * sizeof (void *),
+ UM_SLEEP | UM_GC);
+
+ for (obj = fjs->fjs_objects, i = 0; obj != NULL;
+ obj = obj->fjso_next, i++) {
+ sorted[i] = obj;
+ }
+
+ qsort(sorted, avl_numnodes(&fjs->fjs_tree),
+ sizeof (void *), findjsobjects_cmp_ninstances);
+
+ for (i = 1, fjs->fjs_objects = sorted[0];
+ i < nobjs; i++)
+ sorted[i - 1]->fjso_next = sorted[i];
+
+ sorted[nobjs - 1]->fjso_next = NULL;
+ }
+
+ v8_silent--;
+
+ if (fjs->fjs_verbose) {
+ const char *f = "findjsobjects: %30s => %d\n";
+ int elapsed = (int)((gethrtime() - start) / NANOSEC);
+
+ mdb_printf(f, "elapsed time (seconds)", elapsed);
+ mdb_printf(f, "heap objects", stats->fjss_heapobjs);
+ mdb_printf(f, "type reads", stats->fjss_typereads);
+ mdb_printf(f, "cached reads", stats->fjss_cached);
+ mdb_printf(f, "JavaScript objects", stats->fjss_jsobjs);
+ mdb_printf(f, "processed objects", stats->fjss_objects);
+ mdb_printf(f, "processed arrays", stats->fjss_arrays);
+ mdb_printf(f, "unique objects", stats->fjss_uniques);
+ mdb_printf(f, "functions found", stats->fjss_funcs);
+ mdb_printf(f, "unique functions",
+ stats->fjss_funcs_unique);
+ mdb_printf(f, "functions skipped",
+ stats->fjss_funcs_skipped);
+ }
+ }
+
+ return (0);
+}
+
+static int
+dcmd_findjsobjects(uintptr_t addr,
+ uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ findjsobjects_state_t *fjs = &findjsobjects_state;
+ findjsobjects_obj_t *obj;
+ boolean_t references = B_FALSE, listlike = B_FALSE;
+ const char *propname = NULL;
+ const char *constructor = NULL;
+ const char *propkind = NULL;
+
+ fjs->fjs_verbose = B_FALSE;
+ fjs->fjs_brk = B_FALSE;
+ fjs->fjs_marking = B_FALSE;
+ fjs->fjs_allobjs = B_FALSE;
+
+ if (mdb_getopts(argc, argv,
+ 'a', MDB_OPT_SETBITS, B_TRUE, &fjs->fjs_allobjs,
+ 'b', MDB_OPT_SETBITS, B_TRUE, &fjs->fjs_brk,
+ 'c', MDB_OPT_STR, &constructor,
+ 'k', MDB_OPT_STR, &propkind,
+ 'l', MDB_OPT_SETBITS, B_TRUE, &listlike,
+ 'm', MDB_OPT_SETBITS, B_TRUE, &fjs->fjs_marking,
+ 'p', MDB_OPT_STR, &propname,
+ 'r', MDB_OPT_SETBITS, B_TRUE, &references,
+ 'v', MDB_OPT_SETBITS, B_TRUE, &fjs->fjs_verbose,
+ NULL) != argc)
+ return (DCMD_USAGE);
+
+ if (findjsobjects_run(fjs) != 0)
+ return (DCMD_ERR);
+
+ if (listlike && !(flags & DCMD_ADDRSPEC)) {
+ if (propname != NULL || constructor != NULL ||
+ propkind != NULL) {
+ char opt = propname != NULL ? 'p' :
+ propkind != NULL ? 'k' :'c';
+
+ mdb_warn("cannot specify -l with -%c; instead, pipe "
+ "output of ::findjsobjects -%c to "
+ "::findjsobjects -l\n", opt, opt);
+ return (DCMD_ERR);
+ }
+
+ return (findjsobjects_match(fjs, addr, flags,
+ findjsobjects_match_all, NULL));
+ }
+
+ if (propname != NULL) {
+ if (constructor != NULL || propkind != NULL) {
+ mdb_warn("cannot specify both a property name "
+ "and a %s\n", constructor != NULL ?
+ "constructor" : "property kind");
+ return (DCMD_ERR);
+ }
+
+ return (findjsobjects_match(fjs, addr, flags,
+ findjsobjects_match_propname, propname));
+ }
+
+ if (constructor != NULL) {
+ if (propkind != NULL) {
+ mdb_warn("cannot specify both a constructor name "
+ "and a property kind\n");
+ return (DCMD_ERR);
+ }
+
+ return (findjsobjects_match(fjs, addr, flags,
+ findjsobjects_match_constructor, constructor));
+ }
+
+ if (propkind != NULL) {
+ return (findjsobjects_match(fjs, addr, flags,
+ findjsobjects_match_kind, propkind));
+ }
+
+ if (references && !(flags & DCMD_ADDRSPEC) &&
+ avl_is_empty(&fjs->fjs_referents)) {
+ mdb_warn("must specify or mark an object to find references\n");
+ return (DCMD_ERR);
+ }
+
+ if (fjs->fjs_marking && !(flags & DCMD_ADDRSPEC)) {
+ mdb_warn("must specify an object to mark\n");
+ return (DCMD_ERR);
+ }
+
+ if (references && fjs->fjs_marking) {
+ mdb_warn("can't both mark an object and find its references\n");
+ return (DCMD_ERR);
+ }
+
+ if (flags & DCMD_ADDRSPEC) {
+ findjsobjects_instance_t *inst, *head;
+
+ /*
+ * If we've been passed an address, it's to either list like
+ * objects (-l), mark an object (-m) or find references to the
+ * specified/marked objects (-r). (Note that the absence of
+ * any of these options implies -l.)
+ */
+ inst = findjsobjects_instance(fjs, addr, &head);
+
+ if (inst == NULL) {
+ mdb_warn("%p is not a valid object\n", addr);
+ return (DCMD_ERR);
+ }
+
+ if (!references && !fjs->fjs_marking) {
+ for (inst = head; inst != NULL; inst = inst->fjsi_next)
+ mdb_printf("%p\n", inst->fjsi_addr);
+
+ return (DCMD_OK);
+ }
+
+ if (!listlike) {
+ findjsobjects_referent(fjs, inst->fjsi_addr);
+ } else {
+ for (inst = head; inst != NULL; inst = inst->fjsi_next)
+ findjsobjects_referent(fjs, inst->fjsi_addr);
+ }
+ }
+
+ if (references)
+ findjsobjects_references(fjs);
+
+ if (references || fjs->fjs_marking)
+ return (DCMD_OK);
+
+ mdb_printf("%?s %8s %8s %s\n", "OBJECT",
+ "#OBJECTS", "#PROPS", "CONSTRUCTOR: PROPS");
+
+ for (obj = fjs->fjs_objects; obj != NULL; obj = obj->fjso_next) {
+ if (obj->fjso_malformed && !fjs->fjs_allobjs)
+ continue;
+
+ findjsobjects_print(obj);
+ }
+
+ return (DCMD_OK);
+}
+
+/*
+ * Given a Node Buffer object, print out details about it. With "-a", just
+ * print the address.
+ */
+/* ARGSUSED */
+static int
+dcmd_nodebuffer(uintptr_t addr, uint_t flags, int argc,
+ const mdb_arg_t *argv)
+{
+ boolean_t opt_f = B_FALSE;
+ char buf[80];
+ char *bufp = buf;
+ size_t len = sizeof (buf);
+ uintptr_t elts, rawbuf;
+
+ /*
+ * The undocumented "-f" option allows users to override constructor
+ * checks.
+ */
+ if (mdb_getopts(argc, argv,
+ 'f', MDB_OPT_SETBITS, B_TRUE, &opt_f, NULL) != argc)
+ return (DCMD_USAGE);
+
+ if (!opt_f) {
+ if (obj_jsconstructor(addr, &bufp, &len, B_FALSE) != 0)
+ return (DCMD_ERR);
+
+ if (strcmp(buf, "Buffer") != 0) {
+ mdb_warn("%p does not appear to be a buffer\n", addr);
+ return (DCMD_ERR);
+ }
+ }
+
+ if (read_heap_ptr(&elts, addr, V8_OFF_JSOBJECT_ELEMENTS) != 0)
+ return (DCMD_ERR);
+
+ if (obj_v8internal(elts, 0, &rawbuf) != 0)
+ return (DCMD_ERR);
+
+ mdb_printf("%p\n", rawbuf);
+ return (DCMD_OK);
+}
+
+/* ARGSUSED */
+static int
+dcmd_jsconstructor(uintptr_t addr, uint_t flags, int argc,
+ const mdb_arg_t *argv)
+{
+ boolean_t opt_v = B_FALSE;
+ char buf[80];
+ char *bufp;
+ size_t len = sizeof (buf);
+
+ if (mdb_getopts(argc, argv, 'v', MDB_OPT_SETBITS, B_TRUE, &opt_v,
+ NULL) != argc)
+ return (DCMD_USAGE);
+
+ bufp = buf;
+ if (obj_jsconstructor(addr, &bufp, &len, opt_v))
+ return (DCMD_ERR);
+
+ mdb_printf("%s\n", buf);
+ return (DCMD_OK);
+}
+
+/* ARGSUSED */
+static int
+dcmd_jsframe(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ uintptr_t fptr, raddr;
+ boolean_t opt_i = B_FALSE;
+ jsframe_t jsf;
+ int rv;
+
+ bzero(&jsf, sizeof (jsf));
+ jsf.jsf_nlines = 5;
+
+ if (mdb_getopts(argc, argv,
+ 'a', MDB_OPT_SETBITS, B_TRUE, &jsf.jsf_showall,
+ 'v', MDB_OPT_SETBITS, B_TRUE, &jsf.jsf_verbose,
+ 'i', MDB_OPT_SETBITS, B_TRUE, &opt_i,
+ 'f', MDB_OPT_STR, &jsf.jsf_func,
+ 'n', MDB_OPT_UINTPTR, &jsf.jsf_nlines,
+ 'p', MDB_OPT_STR, &jsf.jsf_prop, NULL) != argc)
+ return (DCMD_USAGE);
+
+ /*
+ * As with $C, we assume we are given a *pointer* to the frame pointer
+ * for a frame, rather than the actual frame pointer for the frame of
+ * interest. This is needed to show the instruction pointer, which is
+ * actually stored with the next frame. For debugging, this can be
+ * overridden with the "-i" option (for "immediate").
+ */
+ if (opt_i) {
+ rv = do_jsframe(addr, 0, &jsf);
+ if (rv == 0)
+ jsframe_print_skipped(&jsf);
+ return (rv);
+ }
+
+ if (mdb_vread(&raddr, sizeof (raddr),
+ addr + sizeof (uintptr_t)) == -1) {
+ mdb_warn("failed to read return address from %p",
+ addr + sizeof (uintptr_t));
+ return (DCMD_ERR);
+ }
+
+ if (mdb_vread(&fptr, sizeof (fptr), addr) == -1) {
+ mdb_warn("failed to read frame pointer from %p", addr);
+ return (DCMD_ERR);
+ }
+
+ if (fptr == NULL)
+ return (DCMD_OK);
+
+ rv = do_jsframe(fptr, raddr, &jsf);
+ if (rv == 0)
+ jsframe_print_skipped(&jsf);
+ return (rv);
+}
+
+static void
+jsobj_print_propinfo(jspropinfo_t propinfo)
+{
+ if (propinfo == JPI_NONE)
+ return;
+
+ mdb_printf("property kind: ");
+ if ((propinfo & JPI_NUMERIC) != 0)
+ mdb_printf("numeric-named ");
+ if ((propinfo & JPI_DICT) != 0)
+ mdb_printf("dictionary ");
+ if ((propinfo & JPI_INOBJECT) != 0)
+ mdb_printf("in-object ");
+ if ((propinfo & JPI_PROPS) != 0)
+ mdb_printf("\"properties\" array ");
+ mdb_printf("\n");
+
+ if ((propinfo & (JPI_HASTRANSITIONS | JPI_HASCONTENT)) != 0) {
+ mdb_printf("fallbacks: ");
+ if ((propinfo & JPI_HASTRANSITIONS) != 0)
+ mdb_printf("transitions ");
+ if ((propinfo & JPI_HASCONTENT) != 0)
+ mdb_printf("content ");
+ mdb_printf("\n");
+ }
+
+ if ((propinfo & JPI_SKIPPED) != 0)
+ mdb_printf(
+ "some properties skipped due to unexpected layout\n");
+ if ((propinfo & JPI_BADLAYOUT) != 0)
+ mdb_printf("object has unexpected layout\n");
+}
+
+/* ARGSUSED */
+static int
+dcmd_jsprint(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ char *buf, *bufp;
+ size_t bufsz = 262144, len = bufsz;
+ jsobj_print_t jsop;
+ boolean_t opt_b = B_FALSE;
+ boolean_t opt_v = B_FALSE;
+ int rv, i;
+
+ bzero(&jsop, sizeof (jsop));
+ jsop.jsop_depth = 2;
+ jsop.jsop_printaddr = B_FALSE;
+
+ i = mdb_getopts(argc, argv,
+ 'a', MDB_OPT_SETBITS, B_TRUE, &jsop.jsop_printaddr,
+ 'b', MDB_OPT_SETBITS, B_TRUE, &opt_b,
+ 'd', MDB_OPT_UINT64, &jsop.jsop_depth,
+ 'v', MDB_OPT_SETBITS, B_TRUE, &opt_v, NULL);
+
+ if (opt_b)
+ jsop.jsop_baseaddr = addr;
+
+ do {
+ if (i != argc) {
+ const mdb_arg_t *member = &argv[i++];
+
+ if (member->a_type != MDB_TYPE_STRING)
+ return (DCMD_USAGE);
+
+ jsop.jsop_member = member->a_un.a_str;
+ }
+
+ for (;;) {
+ if ((buf = bufp =
+ mdb_zalloc(bufsz, UM_NOSLEEP)) == NULL)
+ return (DCMD_ERR);
+
+ jsop.jsop_bufp = &bufp;
+ jsop.jsop_lenp = &len;
+
+ rv = jsobj_print(addr, &jsop);
+
+ if (len > 0)
+ break;
+
+ mdb_free(buf, bufsz);
+ bufsz <<= 1;
+ len = bufsz;
+ }
+
+ if (jsop.jsop_member == NULL && rv != 0) {
+ if (!jsop.jsop_descended)
+ mdb_warn("%s\n", buf);
+
+ return (DCMD_ERR);
+ }
+
+ if (jsop.jsop_member && !jsop.jsop_found) {
+ if (jsop.jsop_baseaddr)
+ (void) mdb_printf("%p: ", jsop.jsop_baseaddr);
+
+ (void) mdb_printf("undefined%s",
+ i < argc ? " " : "");
+ } else {
+ (void) mdb_printf("%s%s", buf, i < argc &&
+ !isspace(buf[strlen(buf) - 1]) ? " " : "");
+ }
+
+ mdb_free(buf, bufsz);
+ jsop.jsop_found = B_FALSE;
+ jsop.jsop_baseaddr = NULL;
+ } while (i < argc);
+
+ mdb_printf("\n");
+
+ if (opt_v)
+ jsobj_print_propinfo(jsop.jsop_propinfo);
+
+ return (DCMD_OK);
+}
+
+/* ARGSUSED */
+static int
+dcmd_jssource(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ const char *typename;
+ uintptr_t nlines = 5;
+ uintptr_t funcinfop, scriptp, funcnamep;
+ uintptr_t tokpos, endpos;
+ uint8_t type;
+ char buf[256];
+ char *bufp = buf;
+ size_t len = sizeof (buf);
+
+ if (mdb_getopts(argc, argv, 'n', MDB_OPT_UINTPTR, &nlines,
+ NULL) != argc)
+ return (DCMD_USAGE);
+
+ if (!V8_IS_HEAPOBJECT(addr) || read_typebyte(&type, addr) != 0) {
+ mdb_warn("%p is not a heap object\n", addr);
+ return (DCMD_ERR);
+ }
+
+ typename = enum_lookup_str(v8_types, type, "");
+ if (strcmp(typename, "JSFunction") != 0) {
+ mdb_warn("%p is not a JSFunction\n", addr);
+ return (DCMD_ERR);
+ }
+
+ if (read_heap_ptr(&funcinfop, addr, V8_OFF_JSFUNCTION_SHARED) != 0 ||
+ read_heap_ptr(&scriptp, funcinfop,
+ V8_OFF_SHAREDFUNCTIONINFO_SCRIPT) != 0 ||
+ read_heap_ptr(&funcnamep, scriptp, V8_OFF_SCRIPT_NAME) != 0) {
+ mdb_warn("%p: failed to find script for function\n", addr);
+ return (DCMD_ERR);
+ }
+
+ if (read_heap_maybesmi(&tokpos, funcinfop,
+ V8_OFF_SHAREDFUNCTIONINFO_FUNCTION_TOKEN_POSITION) != 0 ||
+ read_heap_maybesmi(&endpos, funcinfop,
+ V8_OFF_SHAREDFUNCTIONINFO_END_POSITION) != 0) {
+ mdb_warn("%p: failed to find function's boundaries\n", addr);
+ }
+
+ if (jsstr_print(funcnamep, JSSTR_NUDE, &bufp, &len) == 0)
+ mdb_printf("file: %s\n", buf);
+
+ if (tokpos != endpos)
+ jsfunc_lines(scriptp, tokpos, endpos, nlines, "%5d ");
+ mdb_printf("\n");
+ return (DCMD_OK);
+}
+
+/* ARGSUSED */
+static int
+dcmd_jsfunctions(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ findjsobjects_state_t *fjs = &findjsobjects_state;
+ findjsobjects_func_t *func;
+ uintptr_t funcinfo;
+ boolean_t showrange = B_FALSE;
+ const char *name = NULL, *filename = NULL;
+ uintptr_t instr = 0;
+
+ if (mdb_getopts(argc, argv,
+ 'x', MDB_OPT_UINTPTR, &instr,
+ 'X', MDB_OPT_SETBITS, B_TRUE, &showrange,
+ 'n', MDB_OPT_STR, &name,
+ 's', MDB_OPT_STR, &filename,
+ NULL) != argc)
+ return (DCMD_USAGE);
+
+ if (findjsobjects_run(fjs) != 0)
+ return (DCMD_ERR);
+
+ if (!showrange)
+ mdb_printf("%?s %8s %-40s %s\n", "FUNC", "#FUNCS", "NAME",
+ "FROM");
+ else
+ mdb_printf("%?s %8s %?s %?s %-40s %s\n", "FUNC", "#FUNCS",
+ "START", "END", "NAME", "FROM");
+
+ for (func = fjs->fjs_funcs; func != NULL; func = func->fjsf_next) {
+ uintptr_t code, ilen;
+
+ funcinfo = func->fjsf_shared;
+
+ if (func->fjsf_location[0] == '\0') {
+ uintptr_t tokpos, script, lends;
+ ptrdiff_t tokposoff =
+ V8_OFF_SHAREDFUNCTIONINFO_FUNCTION_TOKEN_POSITION;
+
+ /*
+ * We don't want to actually decode the token position
+ * as an SMI here, so we re-encode it when we pass it to
+ * jsfunc_lineno() below.
+ */
+ if (read_heap_maybesmi(&tokpos, funcinfo,
+ tokposoff) != 0 ||
+ read_heap_ptr(&script, funcinfo,
+ V8_OFF_SHAREDFUNCTIONINFO_SCRIPT) != 0 ||
+ read_heap_ptr(&lends, script,
+ V8_OFF_SCRIPT_LINE_ENDS) != 0 ||
+ jsfunc_lineno(lends, V8_VALUE_SMI(tokpos),
+ func->fjsf_location,
+ sizeof (func->fjsf_location), NULL) != 0) {
+ func->fjsf_location[0] = '\0';
+ }
+ }
+
+ if (name != NULL && strstr(func->fjsf_funcname, name) == NULL)
+ continue;
+
+ if (filename != NULL &&
+ strstr(func->fjsf_scriptname, filename) == NULL)
+ continue;
+
+ code = 0;
+ ilen = 0;
+ if ((showrange || instr != 0) &&
+ (read_heap_ptr(&code, funcinfo,
+ V8_OFF_SHAREDFUNCTIONINFO_CODE) != 0 ||
+ read_heap_ptr(&ilen, code,
+ V8_OFF_CODE_INSTRUCTION_SIZE) != 0)) {
+ code = 0;
+ ilen = 0;
+ }
+
+ if ((instr != 0 && ilen != 0) &&
+ (instr < code + V8_OFF_CODE_INSTRUCTION_START ||
+ instr >= code + V8_OFF_CODE_INSTRUCTION_START + ilen))
+ continue;
+
+ if (!showrange) {
+ mdb_printf("%?p %8d %-40s %s %s\n",
+ func->fjsf_instances.fjsi_addr,
+ func->fjsf_ninstances, func->fjsf_funcname,
+ func->fjsf_scriptname, func->fjsf_location);
+ } else {
+ uintptr_t code, ilen;
+
+ if (read_heap_ptr(&code, funcinfo,
+ V8_OFF_SHAREDFUNCTIONINFO_CODE) != 0 ||
+ read_heap_ptr(&ilen, code,
+ V8_OFF_CODE_INSTRUCTION_SIZE) != 0) {
+ mdb_printf("%?p %8d %?s %?s %-40s %s %s\n",
+ func->fjsf_instances.fjsi_addr,
+ func->fjsf_ninstances, "?", "?",
+ func->fjsf_funcname, func->fjsf_scriptname,
+ func->fjsf_location);
+ } else {
+ mdb_printf("%?p %8d %?p %?p %-40s %s %s\n",
+ func->fjsf_instances.fjsi_addr,
+ func->fjsf_ninstances,
+ code + V8_OFF_CODE_INSTRUCTION_START,
+ code + V8_OFF_CODE_INSTRUCTION_START + ilen,
+ func->fjsf_funcname, func->fjsf_scriptname,
+ func->fjsf_location);
+ }
+ }
+ }
+
+ return (DCMD_OK);
+}
+
+static void
+dcmd_jsfunctions_help(void)
+{
+ mdb_printf("%s\n\n",
+"Lists JavaScript functions, optionally filtered by a substring of the\n"
+"function name or script filename or by the instruction address. This uses\n"
+"the cache created by ::findjsobjects. If ::findjsobjects has not already\n"
+"been run, this command runs it automatically without printing the output.\n"
+"This can take anywhere from a second to several minutes, depending on the\n"
+"size of the core dump.\n"
+"\n"
+"It's important to keep in mind that each time you create a function in\n"
+"JavaScript (even from a function definition that has already been used),\n"
+"the VM must create a new object to represent it. For example, if your\n"
+"program has a function A that returns a closure B, the VM will create new\n"
+"instances of the closure function (B) each time the surrounding function (A)\n"
+"is called. To show this, the output of this command consists of one line \n"
+"per function definition that appears in the JavaScript source, and the\n"
+"\"#FUNCS\" column shows how many different functions were created by VM from\n"
+"this definition.");
+
+ mdb_dec_indent(2);
+ mdb_printf("%<b>OPTIONS%</b>\n");
+ mdb_inc_indent(2);
+
+ mdb_printf("%s\n",
+" -f file List functions that were defined in a file whose name contains\n"
+" this substring.\n"
+" -n func List functions whose name contains this substring\n"
+" -x instr List functions whose compiled instructions include this address\n"
+" -X Show where the function's instructions are stored in memory\n");
+}
+
+/* ARGSUSED */
+static int
+dcmd_v8field(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ v8_class_t *clp;
+ v8_field_t *flp;
+ const char *klass, *field;
+ uintptr_t offset = 0;
+
+ /*
+ * We may be invoked with either two arguments (class and field name) or
+ * three (an offset to save).
+ */
+ if (argc != 2 && argc != 3)
+ return (DCMD_USAGE);
+
+ if (argv[0].a_type != MDB_TYPE_STRING ||
+ argv[1].a_type != MDB_TYPE_STRING)
+ return (DCMD_USAGE);
+
+ klass = argv[0].a_un.a_str;
+ field = argv[1].a_un.a_str;
+
+ if (argc == 3) {
+ if (argv[2].a_type != MDB_TYPE_STRING)
+ return (DCMD_USAGE);
+
+ offset = mdb_strtoull(argv[2].a_un.a_str);
+ }
+
+ for (clp = v8_classes; clp != NULL; clp = clp->v8c_next)
+ if (strcmp(clp->v8c_name, klass) == 0)
+ break;
+
+ if (clp == NULL) {
+ (void) mdb_printf("error: no such class: \"%s\"", klass);
+ return (DCMD_ERR);
+ }
+
+ for (flp = clp->v8c_fields; flp != NULL; flp = flp->v8f_next)
+ if (strcmp(field, flp->v8f_name) == 0)
+ break;
+
+ if (flp == NULL) {
+ if (argc == 2) {
+ mdb_printf("error: no such field in class \"%s\": "
+ "\"%s\"", klass, field);
+ return (DCMD_ERR);
+ }
+
+ flp = conf_field_create(clp, field, offset);
+ if (flp == NULL) {
+ mdb_warn("failed to create field");
+ return (DCMD_ERR);
+ }
+ } else if (argc == 3) {
+ flp->v8f_offset = offset;
+ }
+
+ mdb_printf("%s::%s at offset 0x%x\n", klass, field, flp->v8f_offset);
+ return (DCMD_OK);
+}
+
+/* ARGSUSED */
+static int
+dcmd_v8array(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ uint8_t type;
+ uintptr_t *array;
+ size_t ii, len;
+
+ if (read_typebyte(&type, addr) != 0)
+ return (DCMD_ERR);
+
+ if (type != V8_TYPE_FIXEDARRAY) {
+ mdb_warn("%p is not an instance of FixedArray\n", addr);
+ return (DCMD_ERR);
+ }
+
+ if (read_heap_array(addr, &array, &len, UM_SLEEP | UM_GC) != 0)
+ return (DCMD_ERR);
+
+ for (ii = 0; ii < len; ii++)
+ mdb_printf("%p\n", array[ii]);
+
+ return (DCMD_OK);
+}
+
+/* ARGSUSED */
+static int
+dcmd_jsstack(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ uintptr_t raddr;
+ jsframe_t jsf;
+
+ bzero(&jsf, sizeof (jsf));
+ jsf.jsf_nlines = 5;
+
+ if (mdb_getopts(argc, argv,
+ 'a', MDB_OPT_SETBITS, B_TRUE, &jsf.jsf_showall,
+ 'v', MDB_OPT_SETBITS, B_TRUE, &jsf.jsf_verbose,
+ 'f', MDB_OPT_STR, &jsf.jsf_func,
+ 'n', MDB_OPT_UINTPTR, &jsf.jsf_nlines,
+ 'p', MDB_OPT_STR, &jsf.jsf_prop,
+ NULL) != argc)
+ return (DCMD_USAGE);
+
+ /*
+ * The "::jsframe" walker iterates the valid frame pointers, but the
+ * "::jsframe" dcmd looks at the frame after the one it was given, so we
+ * have to explicitly examine the top frame here.
+ */
+ if (!(flags & DCMD_ADDRSPEC)) {
+ if (load_current_context(&addr, &raddr) != 0 ||
+ do_jsframe(addr, raddr, &jsf) != 0)
+ return (DCMD_ERR);
+ }
+
+ if (mdb_pwalk_dcmd("jsframe", "jsframe", argc, argv, addr) == -1)
+ return (DCMD_ERR);
+
+ jsframe_print_skipped(&jsf);
+ return (DCMD_OK);
+}
+
+/* ARGSUSED */
+static int
+dcmd_v8str(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ boolean_t opt_v = B_FALSE;
+ char buf[512 * 1024];
+ char *bufp;
+ size_t len;
+
+ if (mdb_getopts(argc, argv, 'v', MDB_OPT_SETBITS, B_TRUE, &opt_v,
+ NULL) != argc)
+ return (DCMD_USAGE);
+
+ bufp = buf;
+ len = sizeof (buf);
+ if (jsstr_print(addr, (opt_v ? JSSTR_VERBOSE : JSSTR_NONE) |
+ JSSTR_QUOTED, &bufp, &len) != 0)
+ return (DCMD_ERR);
+
+ mdb_printf("%s\n", buf);
+ return (DCMD_OK);
+}
+
+static void
+dcmd_v8load_help(void)
+{
+ v8_cfg_t *cfp, **cfgpp;
+
+ mdb_printf(
+ "To traverse in-memory V8 structures, the V8 dmod requires\n"
+ "configuration that describes the layout of various V8 structures\n"
+ "in memory. Normally, this information is pulled from metadata\n"
+ "in the target binary. However, it's possible to use the module\n"
+ "with a binary not built with metadata by loading one of the\n"
+ "canned configurations.\n\n");
+
+ mdb_printf("Available configurations:\n");
+
+ (void) mdb_inc_indent(4);
+
+ for (cfgpp = v8_cfgs; *cfgpp != NULL; cfgpp++) {
+ cfp = *cfgpp;
+ mdb_printf("%-10s %s\n", cfp->v8cfg_name, cfp->v8cfg_label);
+ }
+
+ (void) mdb_dec_indent(4);
+}
+
+/* ARGSUSED */
+static int
+dcmd_v8load(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ v8_cfg_t *cfgp = NULL, **cfgpp;
+
+ if (v8_classes != NULL) {
+ mdb_warn("v8 module already configured\n");
+ return (DCMD_ERR);
+ }
+
+ if (argc < 1 || argv->a_type != MDB_TYPE_STRING)
+ return (DCMD_USAGE);
+
+ for (cfgpp = v8_cfgs; *cfgpp != NULL; cfgpp++) {
+ cfgp = *cfgpp;
+ if (strcmp(argv->a_un.a_str, cfgp->v8cfg_name) == 0)
+ break;
+ }
+
+ if (cfgp == NULL || cfgp->v8cfg_name == NULL) {
+ mdb_warn("unknown configuration: \"%s\"\n", argv->a_un.a_str);
+ return (DCMD_ERR);
+ }
+
+ if (autoconfigure(cfgp) == -1) {
+ mdb_warn("autoconfigure failed\n");
+ return (DCMD_ERR);
+ }
+
+ mdb_printf("V8 dmod configured based on %s\n", cfgp->v8cfg_name);
+ return (DCMD_OK);
+}
+
+/* ARGSUSED */
+static int
+dcmd_v8warnings(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
+{
+ v8_warnings ^= 1;
+ mdb_printf("v8 warnings are now %s\n", v8_warnings ? "on" : "off");
+
+ return (DCMD_OK);
+}
+
+static int
+walk_jsframes_init(mdb_walk_state_t *wsp)
+{
+ if (wsp->walk_addr != NULL)
+ return (WALK_NEXT);
+
+ if (load_current_context(&wsp->walk_addr, NULL) != 0)
+ return (WALK_ERR);
+
+ return (WALK_NEXT);
+}
+
+static int
+walk_jsframes_step(mdb_walk_state_t *wsp)
+{
+ uintptr_t addr, next;
+ int rv;
+
+ addr = wsp->walk_addr;
+ rv = wsp->walk_callback(wsp->walk_addr, NULL, wsp->walk_cbdata);
+
+ if (rv != WALK_NEXT)
+ return (rv);
+
+ if (mdb_vread(&next, sizeof (next), addr) == -1)
+ return (WALK_ERR);
+
+ if (next == NULL)
+ return (WALK_DONE);
+
+ wsp->walk_addr = next;
+ return (WALK_NEXT);
+}
+
+typedef struct jsprop_walk_data {
+ int jspw_nprops;
+ int jspw_current;
+ uintptr_t *jspw_props;
+} jsprop_walk_data_t;
+
+/*ARGSUSED*/
+static int
+walk_jsprop_nprops(const char *desc, uintptr_t val, void *arg)
+{
+ jsprop_walk_data_t *jspw = arg;
+ jspw->jspw_nprops++;
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+walk_jsprop_props(const char *desc, uintptr_t val, void *arg)
+{
+ jsprop_walk_data_t *jspw = arg;
+ jspw->jspw_props[jspw->jspw_current++] = val;
+
+ return (0);
+}
+
+static int
+walk_jsprop_init(mdb_walk_state_t *wsp)
+{
+ jsprop_walk_data_t *jspw;
+ uintptr_t addr;
+ uint8_t type;
+
+ if ((addr = wsp->walk_addr) == NULL) {
+ mdb_warn("'jsprop' does not support global walks\n");
+ return (WALK_ERR);
+ }
+
+ if (!V8_IS_HEAPOBJECT(addr) || read_typebyte(&type, addr) != 0 ||
+ type != V8_TYPE_JSOBJECT) {
+ mdb_warn("%p is not a JSObject\n", addr);
+ return (WALK_ERR);
+ }
+
+ jspw = mdb_zalloc(sizeof (jsprop_walk_data_t), UM_SLEEP | UM_GC);
+
+ if (jsobj_properties(addr, walk_jsprop_nprops, jspw, NULL) == -1) {
+ mdb_warn("couldn't iterate over properties for %p\n", addr);
+ return (WALK_ERR);
+ }
+
+ jspw->jspw_props = mdb_zalloc(jspw->jspw_nprops *
+ sizeof (uintptr_t), UM_SLEEP | UM_GC);
+
+ if (jsobj_properties(addr, walk_jsprop_props, jspw, NULL) == -1) {
+ mdb_warn("couldn't iterate over properties for %p\n", addr);
+ return (WALK_ERR);
+ }
+
+ jspw->jspw_current = 0;
+ wsp->walk_data = jspw;
+
+ return (WALK_NEXT);
+}
+
+static int
+walk_jsprop_step(mdb_walk_state_t *wsp)
+{
+ jsprop_walk_data_t *jspw = wsp->walk_data;
+ int rv;
+
+ if (jspw->jspw_current >= jspw->jspw_nprops)
+ return (WALK_DONE);
+
+ if ((rv = wsp->walk_callback(jspw->jspw_props[jspw->jspw_current++],
+ NULL, wsp->walk_cbdata)) != WALK_NEXT)
+ return (rv);
+
+ return (WALK_NEXT);
+}
+
+/*
+ * MDB linkage
+ */
+
+static const mdb_dcmd_t v8_mdb_dcmds[] = {
+ /*
+ * Commands to inspect Node-level state
+ */
+ { "nodebuffer", ":[-a]",
+ "print details about the given Node Buffer", dcmd_nodebuffer },
+
+ /*
+ * Commands to inspect JavaScript-level state
+ */
+ { "jsconstructor", ":[-v]",
+ "print the constructor for a JavaScript object",
+ dcmd_jsconstructor },
+ { "jsframe", ":[-aiv] [-f function] [-p property] [-n numlines]",
+ "summarize a JavaScript stack frame", dcmd_jsframe },
+ { "jsprint", ":[-ab] [-d depth] [member]", "print a JavaScript object",
+ dcmd_jsprint },
+ { "jssource", ":[-n numlines]",
+ "print the source code for a JavaScript function",
+ dcmd_jssource },
+ { "jsstack", "[-av] [-f function] [-p property] [-n numlines]",
+ "print a JavaScript stacktrace", dcmd_jsstack },
+ { "findjsobjects", "?[-vb] [-r | -c cons | -p prop]", "find JavaScript "
+ "objects", dcmd_findjsobjects, dcmd_findjsobjects_help },
+ { "jsfunctions", "[-X] [-s file_filter] [-n name_filter] "
+ "[-x instr_filter]", "list JavaScript functions",
+ dcmd_jsfunctions, dcmd_jsfunctions_help },
+
+ /*
+ * Commands to inspect V8-level state
+ */
+ { "v8array", ":", "print elements of a V8 FixedArray",
+ dcmd_v8array },
+ { "v8classes", NULL, "list known V8 heap object C++ classes",
+ dcmd_v8classes },
+ { "v8code", ":[-d]", "print information about a V8 Code object",
+ dcmd_v8code },
+ { "v8field", "classname fieldname offset",
+ "manually add a field to a given class", dcmd_v8field },
+ { "v8function", ":[-d]", "print JSFunction object details",
+ dcmd_v8function },
+ { "v8internal", ":[fieldidx]", "print v8 object internal fields",
+ dcmd_v8internal },
+ { "v8load", "version", "load canned config for a specific V8 version",
+ dcmd_v8load, dcmd_v8load_help },
+ { "v8frametypes", NULL, "list known V8 frame types",
+ dcmd_v8frametypes },
+ { "v8print", ":[class]", "print a V8 heap object",
+ dcmd_v8print, dcmd_v8print_help },
+ { "v8str", ":[-v]", "print the contents of a V8 string",
+ dcmd_v8str },
+ { "v8type", ":", "print the type of a V8 heap object",
+ dcmd_v8type },
+ { "v8types", NULL, "list known V8 heap object types",
+ dcmd_v8types },
+ { "v8warnings", NULL, "toggle V8 warnings",
+ dcmd_v8warnings },
+
+ { NULL }
+};
+
+static const mdb_walker_t v8_mdb_walkers[] = {
+ { "jsframe", "walk V8 JavaScript stack frames",
+ walk_jsframes_init, walk_jsframes_step },
+ { "jsprop", "walk property values for an object",
+ walk_jsprop_init, walk_jsprop_step },
+ { NULL }
+};
+
+static mdb_modinfo_t v8_mdb = { MDB_API_VERSION, v8_mdb_dcmds, v8_mdb_walkers };
+
+static void
+configure(void)
+{
+ char *success;
+ v8_cfg_t *cfgp = NULL;
+ GElf_Sym sym;
+ int major, minor, build, patch;
+
+ if (mdb_readsym(&major, sizeof (major),
+ "_ZN2v88internal7Version6major_E") == -1 ||
+ mdb_readsym(&minor, sizeof (minor),
+ "_ZN2v88internal7Version6minor_E") == -1 ||
+ mdb_readsym(&build, sizeof (build),
+ "_ZN2v88internal7Version6build_E") == -1 ||
+ mdb_readsym(&patch, sizeof (patch),
+ "_ZN2v88internal7Version6patch_E") == -1) {
+ mdb_warn("failed to determine V8 version");
+ return;
+ }
+
+ v8_major = major;
+ v8_minor = minor;
+ v8_build = build;
+ v8_patch = patch;
+ mdb_printf("V8 version: %d.%d.%d.%d\n",
+ v8_major, v8_minor, v8_build, v8_patch);
+
+ /*
+ * First look for debug metadata embedded within the binary, which may
+ * be present in recent V8 versions built with postmortem metadata.
+ */
+ if (mdb_lookup_by_name("v8dbg_SmiTag", &sym) == 0) {
+ cfgp = &v8_cfg_target;
+ success = "Autoconfigured V8 support from target";
+ } else if (v8_major == 3 && v8_minor == 1 && v8_build == 8) {
+ cfgp = &v8_cfg_04;
+ success = "Configured V8 support based on node v0.4";
+ } else if (v8_major == 3 && v8_minor == 6 && v8_build == 6) {
+ cfgp = &v8_cfg_06;
+ success = "Configured V8 support based on node v0.6";
+ } else {
+ mdb_printf("mdb_v8: target has no debug metadata and "
+ "no existing config found\n");
+ return;
+ }
+
+ if (autoconfigure(cfgp) != 0) {
+ mdb_warn("failed to autoconfigure from target; "
+ "commands may have incorrect results!\n");
+ return;
+ }
+
+ mdb_printf("%s\n", success);
+}
+
+static void
+enable_demangling(void)
+{
+ const char *symname = "_ZN2v88internal7Version6major_E";
+ GElf_Sym sym;
+ char buf[64];
+
+ /*
+ * Try to determine whether C++ symbol demangling has been enabled. If
+ * not, enable it.
+ */
+ if (mdb_lookup_by_name("_ZN2v88internal7Version6major_E", &sym) != 0)
+ return;
+
+ (void) mdb_snprintf(buf, sizeof (buf), "%a", sym.st_value);
+ if (strstr(buf, symname) != NULL)
+ (void) mdb_eval("$G");
+}
+
+const mdb_modinfo_t *
+_mdb_init(void)
+{
+ configure();
+ enable_demangling();
+ return (&v8_mdb);
+}
diff --git a/usr/src/cmd/mdb/common/modules/v8/mdb_v8_cfg.c b/usr/src/cmd/mdb/common/modules/v8/mdb_v8_cfg.c
new file mode 100644
index 0000000000..d907242435
--- /dev/null
+++ b/usr/src/cmd/mdb/common/modules/v8/mdb_v8_cfg.c
@@ -0,0 +1,728 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * mdb_v8_cfg.c: canned configurations for previous V8 versions.
+ *
+ * The functions and data defined here enable this dmod to support debugging
+ * Node.js binaries that predated V8's built-in postmortem debugging support.
+ */
+
+#include "v8cfg.h"
+
+/*ARGSUSED*/
+static int
+v8cfg_target_iter(v8_cfg_t *cfgp, int (*func)(mdb_symbol_t *, void *),
+ void *arg)
+{
+ return (mdb_symbol_iter(MDB_OBJ_EVERY, MDB_DYNSYM,
+ MDB_BIND_GLOBAL | MDB_TYPE_OBJECT | MDB_TYPE_FUNC,
+ func, arg));
+}
+
+/*ARGSUSED*/
+static int
+v8cfg_target_readsym(v8_cfg_t *cfgp, const char *name, intptr_t *valp)
+{
+ int val, rval;
+
+ if ((rval = mdb_readsym(&val, sizeof (val), name)) != -1)
+ *valp = (intptr_t)val;
+
+ return (rval);
+}
+
+/*
+ * Analog of mdb_symbol_iter() for a canned configuration.
+ */
+static int
+v8cfg_canned_iter(v8_cfg_t *cfgp, int (*func)(mdb_symbol_t *, void *),
+ void *arg)
+{
+ v8_cfg_symbol_t *v8sym;
+ mdb_symbol_t mdbsym;
+ int rv;
+
+ for (v8sym = cfgp->v8cfg_symbols; v8sym->v8cs_name != NULL; v8sym++) {
+ mdbsym.sym_name = v8sym->v8cs_name;
+ mdbsym.sym_object = NULL;
+ mdbsym.sym_sym = NULL;
+ mdbsym.sym_table = 0;
+ mdbsym.sym_id = 0;
+
+ if ((rv = func(&mdbsym, arg)) != 0)
+ return (rv);
+ }
+
+ return (0);
+}
+
+/*
+ * Analog of mdb_readsym() for a canned configuration.
+ */
+static int
+v8cfg_canned_readsym(v8_cfg_t *cfgp, const char *name, intptr_t *valp)
+{
+ v8_cfg_symbol_t *v8sym;
+
+ for (v8sym = cfgp->v8cfg_symbols; v8sym->v8cs_name != NULL; v8sym++) {
+ if (strcmp(name, v8sym->v8cs_name) == 0)
+ break;
+ }
+
+ if (v8sym->v8cs_name == NULL)
+ return (-1);
+
+ *valp = v8sym->v8cs_value;
+ return (0);
+}
+
+/*
+ * Canned configuration for the V8 bundled with Node.js v0.4.8 and later.
+ */
+static v8_cfg_symbol_t v8_symbols_node_04[] = {
+ { "v8dbg_type_AccessCheckInfo__ACCESS_CHECK_INFO_TYPE", 0x91 },
+ { "v8dbg_type_AccessorInfo__ACCESSOR_INFO_TYPE", 0x90 },
+ { "v8dbg_type_BreakPointInfo__BREAK_POINT_INFO_TYPE", 0x9b },
+ { "v8dbg_type_ByteArray__BYTE_ARRAY_TYPE", 0x86 },
+ { "v8dbg_type_CallHandlerInfo__CALL_HANDLER_INFO_TYPE", 0x93 },
+ { "v8dbg_type_Code__CODE_TYPE", 0x81 },
+ { "v8dbg_type_CodeCache__CODE_CACHE_TYPE", 0x99 },
+ { "v8dbg_type_ConsString__CONS_ASCII_STRING_TYPE", 0x5 },
+ { "v8dbg_type_ConsString__CONS_ASCII_SYMBOL_TYPE", 0x45 },
+ { "v8dbg_type_ConsString__CONS_STRING_TYPE", 0x1 },
+ { "v8dbg_type_ConsString__CONS_SYMBOL_TYPE", 0x41 },
+ { "v8dbg_type_DebugInfo__DEBUG_INFO_TYPE", 0x9a },
+ { "v8dbg_type_ExternalAsciiString__EXTERNAL_ASCII_STRING_TYPE", 0x6 },
+ { "v8dbg_type_ExternalAsciiString__EXTERNAL_ASCII_SYMBOL_TYPE", 0x46 },
+ { "v8dbg_type_ExternalByteArray__EXTERNAL_BYTE_ARRAY_TYPE", 0x88 },
+ { "v8dbg_type_ExternalFloatArray__EXTERNAL_FLOAT_ARRAY_TYPE", 0x8e },
+ { "v8dbg_type_ExternalIntArray__EXTERNAL_INT_ARRAY_TYPE", 0x8c },
+ { "v8dbg_type_ExternalShortArray__EXTERNAL_SHORT_ARRAY_TYPE", 0x8a },
+ { "v8dbg_type_ExternalString__EXTERNAL_STRING_TYPE", 0x2 },
+ { "v8dbg_type_ExternalString__EXTERNAL_SYMBOL_TYPE", 0x42 },
+ { "v8dbg_type_ExternalUnsignedByteArray__"
+ "EXTERNAL_UNSIGNED_BYTE_ARRAY_TYPE", 0x89 },
+ { "v8dbg_type_ExternalUnsignedIntArray__"
+ "EXTERNAL_UNSIGNED_INT_ARRAY_TYPE", 0x8d },
+ { "v8dbg_type_ExternalUnsignedShortArray__"
+ "EXTERNAL_UNSIGNED_SHORT_ARRAY_TYPE", 0x8b },
+ { "v8dbg_type_FixedArray__FIXED_ARRAY_TYPE", 0x9c },
+ { "v8dbg_type_FunctionTemplateInfo__"
+ "FUNCTION_TEMPLATE_INFO_TYPE", 0x94 },
+ { "v8dbg_type_HeapNumber__HEAP_NUMBER_TYPE", 0x84 },
+ { "v8dbg_type_InterceptorInfo__INTERCEPTOR_INFO_TYPE", 0x92 },
+ { "v8dbg_type_JSArray__JS_ARRAY_TYPE", 0xa5 },
+ { "v8dbg_type_JSBuiltinsObject__JS_BUILTINS_OBJECT_TYPE", 0xa3 },
+ { "v8dbg_type_JSFunction__JS_FUNCTION_TYPE", 0xa7 },
+ { "v8dbg_type_JSGlobalObject__JS_GLOBAL_OBJECT_TYPE", 0xa2 },
+ { "v8dbg_type_JSGlobalPropertyCell__"
+ "JS_GLOBAL_PROPERTY_CELL_TYPE", 0x83 },
+ { "v8dbg_type_JSGlobalProxy__JS_GLOBAL_PROXY_TYPE", 0xa4 },
+ { "v8dbg_type_JSMessageObject__JS_MESSAGE_OBJECT_TYPE", 0x9e },
+ { "v8dbg_type_JSObject__JS_OBJECT_TYPE", 0xa0 },
+ { "v8dbg_type_JSRegExp__JS_REGEXP_TYPE", 0xa6 },
+ { "v8dbg_type_JSValue__JS_VALUE_TYPE", 0x9f },
+ { "v8dbg_type_Map__MAP_TYPE", 0x80 },
+ { "v8dbg_type_ObjectTemplateInfo__OBJECT_TEMPLATE_INFO_TYPE", 0x95 },
+ { "v8dbg_type_Oddball__ODDBALL_TYPE", 0x82 },
+ { "v8dbg_type_Script__SCRIPT_TYPE", 0x98 },
+ { "v8dbg_type_SeqAsciiString__ASCII_STRING_TYPE", 0x4 },
+ { "v8dbg_type_SeqAsciiString__ASCII_SYMBOL_TYPE", 0x44 },
+ { "v8dbg_type_SharedFunctionInfo__SHARED_FUNCTION_INFO_TYPE", 0x9d },
+ { "v8dbg_type_SignatureInfo__SIGNATURE_INFO_TYPE", 0x96 },
+ { "v8dbg_type_String__STRING_TYPE", 0x0 },
+ { "v8dbg_type_String__SYMBOL_TYPE", 0x40 },
+ { "v8dbg_type_TypeSwitchInfo__TYPE_SWITCH_INFO_TYPE", 0x97 },
+
+ { "v8dbg_class_AccessCheckInfo__data__Object", 0xc },
+ { "v8dbg_class_AccessCheckInfo__indexed_callback__Object", 0x8 },
+ { "v8dbg_class_AccessCheckInfo__named_callback__Object", 0x4 },
+ { "v8dbg_class_AccessorInfo__data__Object", 0xc },
+ { "v8dbg_class_AccessorInfo__flag__Smi", 0x14 },
+ { "v8dbg_class_AccessorInfo__getter__Object", 0x4 },
+ { "v8dbg_class_AccessorInfo__name__Object", 0x10 },
+ { "v8dbg_class_AccessorInfo__setter__Object", 0x8 },
+ { "v8dbg_class_BreakPointInfo__break_point_objects__Object", 0x10 },
+ { "v8dbg_class_BreakPointInfo__code_position__Smi", 0x4 },
+ { "v8dbg_class_BreakPointInfo__source_position__Smi", 0x8 },
+ { "v8dbg_class_BreakPointInfo__statement_position__Smi", 0xc },
+ { "v8dbg_class_ByteArray__length__SMI", 0x4 },
+ { "v8dbg_class_CallHandlerInfo__callback__Object", 0x4 },
+ { "v8dbg_class_CallHandlerInfo__data__Object", 0x8 },
+ { "v8dbg_class_Code__deoptimization_data__FixedArray", 0xc },
+ { "v8dbg_class_Code__instruction_size__int", 0x4 },
+ { "v8dbg_class_Code__instruction_start__int", 0x20 },
+ { "v8dbg_class_Code__relocation_info__ByteArray", 0x8 },
+ { "v8dbg_class_CodeCache__default_cache__FixedArray", 0x4 },
+ { "v8dbg_class_CodeCache__normal_type_cache__Object", 0x8 },
+ { "v8dbg_class_ConsString__first__String", 0xc },
+ { "v8dbg_class_ConsString__second__String", 0x10 },
+ { "v8dbg_class_DebugInfo__break_points__FixedArray", 0x14 },
+ { "v8dbg_class_DebugInfo__code__Code", 0xc },
+ { "v8dbg_class_DebugInfo__original_code__Code", 0x8 },
+ { "v8dbg_class_DebugInfo__shared__SharedFunctionInfo", 0x4 },
+ { "v8dbg_class_ExternalString__resource__Object", 0xc },
+ { "v8dbg_class_FixedArray__data__uintptr_t", 0x8 },
+ { "v8dbg_class_FixedArray__length__SMI", 0x4 },
+ { "v8dbg_class_FunctionTemplateInfo__access_check_info__Object", 0x38 },
+ { "v8dbg_class_FunctionTemplateInfo__call_code__Object", 0x10 },
+ { "v8dbg_class_FunctionTemplateInfo__class_name__Object", 0x2c },
+ { "v8dbg_class_FunctionTemplateInfo__flag__Smi", 0x3c },
+ { "v8dbg_class_FunctionTemplateInfo__"
+ "indexed_property_handler__Object", 0x24 },
+ { "v8dbg_class_FunctionTemplateInfo__"
+ "instance_call_handler__Object", 0x34 },
+ { "v8dbg_class_FunctionTemplateInfo__instance_template__Object", 0x28 },
+ { "v8dbg_class_FunctionTemplateInfo__"
+ "named_property_handler__Object", 0x20 },
+ { "v8dbg_class_FunctionTemplateInfo__parent_template__Object", 0x1c },
+ { "v8dbg_class_FunctionTemplateInfo__"
+ "property_accessors__Object", 0x14 },
+ { "v8dbg_class_FunctionTemplateInfo__"
+ "prototype_template__Object", 0x18 },
+ { "v8dbg_class_FunctionTemplateInfo__serial_number__Object", 0xc },
+ { "v8dbg_class_FunctionTemplateInfo__signature__Object", 0x30 },
+ { "v8dbg_class_GlobalObject__builtins__JSBuiltinsObject", 0xc },
+ { "v8dbg_class_GlobalObject__global_context__Context", 0x10 },
+ { "v8dbg_class_GlobalObject__global_receiver__JSObject", 0x14 },
+ { "v8dbg_class_HeapNumber__value__SMI", 0x4 },
+ { "v8dbg_class_HeapObject__map__Map", 0x0 },
+ { "v8dbg_class_InterceptorInfo__data__Object", 0x18 },
+ { "v8dbg_class_InterceptorInfo__deleter__Object", 0x10 },
+ { "v8dbg_class_InterceptorInfo__enumerator__Object", 0x14 },
+ { "v8dbg_class_InterceptorInfo__getter__Object", 0x4 },
+ { "v8dbg_class_InterceptorInfo__query__Object", 0xc },
+ { "v8dbg_class_InterceptorInfo__setter__Object", 0x8 },
+ { "v8dbg_class_JSArray__length__Object", 0xc },
+ { "v8dbg_class_JSFunction__literals__FixedArray", 0x1c },
+ { "v8dbg_class_JSFunction__next_function_link__Object", 0x20 },
+ { "v8dbg_class_JSFunction__prototype_or_initial_map__Object", 0x10 },
+ { "v8dbg_class_JSFunction__shared__SharedFunctionInfo", 0x14 },
+ { "v8dbg_class_JSGlobalProxy__context__Object", 0xc },
+ { "v8dbg_class_JSMessageObject__arguments__JSArray", 0x10 },
+ { "v8dbg_class_JSMessageObject__end_position__SMI", 0x24 },
+ { "v8dbg_class_JSMessageObject__script__Object", 0x14 },
+ { "v8dbg_class_JSMessageObject__stack_frames__Object", 0x1c },
+ { "v8dbg_class_JSMessageObject__stack_trace__Object", 0x18 },
+ { "v8dbg_class_JSMessageObject__start_position__SMI", 0x20 },
+ { "v8dbg_class_JSMessageObject__type__String", 0xc },
+ { "v8dbg_class_JSObject__elements__Object", 0x8 },
+ { "v8dbg_class_JSObject__properties__FixedArray", 0x4 },
+ { "v8dbg_class_JSRegExp__data__Object", 0xc },
+ { "v8dbg_class_JSValue__value__Object", 0xc },
+ { "v8dbg_class_Map__code_cache__Object", 0x18 },
+ { "v8dbg_class_Map__constructor__Object", 0x10 },
+ { "v8dbg_class_Map__inobject_properties__int", 0x5 },
+ { "v8dbg_class_Map__instance_size__int", 0x4 },
+ { "v8dbg_class_Map__instance_attributes__int", 0x8 },
+ { "v8dbg_class_Map__instance_descriptors__DescriptorArray", 0x14 },
+ { "v8dbg_class_ObjectTemplateInfo__constructor__Object", 0xc },
+ { "v8dbg_class_ObjectTemplateInfo__"
+ "internal_field_count__Object", 0x10 },
+ { "v8dbg_class_Oddball__to_number__Object", 0x8 },
+ { "v8dbg_class_Oddball__to_string__String", 0x4 },
+ { "v8dbg_class_Script__column_offset__Smi", 0x10 },
+ { "v8dbg_class_Script__compilation_type__Smi", 0x24 },
+ { "v8dbg_class_Script__context_data__Object", 0x18 },
+ { "v8dbg_class_Script__data__Object", 0x14 },
+ { "v8dbg_class_Script__eval_from_instructions_offset__Smi", 0x34 },
+ { "v8dbg_class_Script__eval_from_shared__Object", 0x30 },
+ { "v8dbg_class_Script__id__Object", 0x2c },
+ { "v8dbg_class_Script__line_ends__Object", 0x28 },
+ { "v8dbg_class_Script__line_offset__Smi", 0xc },
+ { "v8dbg_class_Script__name__Object", 0x8 },
+ { "v8dbg_class_Script__source__Object", 0x4 },
+ { "v8dbg_class_Script__type__Smi", 0x20 },
+ { "v8dbg_class_Script__wrapper__Proxy", 0x1c },
+ { "v8dbg_class_SeqAsciiString__chars__char", 0xc },
+ { "v8dbg_class_SharedFunctionInfo__code__Code", 0x8 },
+ { "v8dbg_class_SharedFunctionInfo__compiler_hints__SMI", 0x50 },
+ { "v8dbg_class_SharedFunctionInfo__construct_stub__Code", 0x10 },
+ { "v8dbg_class_SharedFunctionInfo__debug_info__Object", 0x20 },
+ { "v8dbg_class_SharedFunctionInfo__end_position__SMI", 0x48 },
+ { "v8dbg_class_SharedFunctionInfo__"
+ "expected_nof_properties__SMI", 0x3c },
+ { "v8dbg_class_SharedFunctionInfo__formal_parameter_count__SMI", 0x38 },
+ { "v8dbg_class_SharedFunctionInfo__function_data__Object", 0x18 },
+ { "v8dbg_class_SharedFunctionInfo__"
+ "function_token_position__SMI", 0x4c },
+ { "v8dbg_class_SharedFunctionInfo__inferred_name__String", 0x24 },
+ { "v8dbg_class_SharedFunctionInfo__initial_map__Object", 0x28 },
+ { "v8dbg_class_SharedFunctionInfo__instance_class_name__Object", 0x14 },
+ { "v8dbg_class_SharedFunctionInfo__length__SMI", 0x34 },
+ { "v8dbg_class_SharedFunctionInfo__name__Object", 0x4 },
+ { "v8dbg_class_SharedFunctionInfo__num_literals__SMI", 0x40 },
+ { "v8dbg_class_SharedFunctionInfo__opt_count__SMI", 0x58 },
+ { "v8dbg_class_SharedFunctionInfo__script__Object", 0x1c },
+ { "v8dbg_class_SharedFunctionInfo__"
+ "start_position_and_type__SMI", 0x44 },
+ { "v8dbg_class_SharedFunctionInfo__"
+ "this_property_assignments__Object", 0x2c },
+ { "v8dbg_class_SharedFunctionInfo__"
+ "this_property_assignments_count__SMI", 0x54 },
+ { "v8dbg_class_SignatureInfo__args__Object", 0x8 },
+ { "v8dbg_class_SignatureInfo__receiver__Object", 0x4 },
+ { "v8dbg_class_String__length__SMI", 0x4 },
+ { "v8dbg_class_TemplateInfo__property_list__Object", 0x8 },
+ { "v8dbg_class_TemplateInfo__tag__Object", 0x4 },
+ { "v8dbg_class_TypeSwitchInfo__types__Object", 0x4 },
+
+ { "v8dbg_parent_AccessCheckInfo__Struct", 0x0 },
+ { "v8dbg_parent_AccessorInfo__Struct", 0x0 },
+ { "v8dbg_parent_BreakPointInfo__Struct", 0x0 },
+ { "v8dbg_parent_ByteArray__HeapObject", 0x0 },
+ { "v8dbg_parent_CallHandlerInfo__Struct", 0x0 },
+ { "v8dbg_parent_Code__HeapObject", 0x0 },
+ { "v8dbg_parent_CodeCache__Struct", 0x0 },
+ { "v8dbg_parent_ConsString__String", 0x0 },
+ { "v8dbg_parent_DebugInfo__Struct", 0x0 },
+ { "v8dbg_parent_DeoptimizationInputData__FixedArray", 0x0 },
+ { "v8dbg_parent_DeoptimizationOutputData__FixedArray", 0x0 },
+ { "v8dbg_parent_DescriptorArray__FixedArray", 0x0 },
+ { "v8dbg_parent_ExternalArray__HeapObject", 0x0 },
+ { "v8dbg_parent_ExternalAsciiString__ExternalString", 0x0 },
+ { "v8dbg_parent_ExternalByteArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_ExternalFloatArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_ExternalIntArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_ExternalShortArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_ExternalString__String", 0x0 },
+ { "v8dbg_parent_ExternalTwoByteString__ExternalString", 0x0 },
+ { "v8dbg_parent_ExternalUnsignedByteArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_ExternalUnsignedIntArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_ExternalUnsignedShortArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_Failure__MaybeObject", 0x0 },
+ { "v8dbg_parent_FixedArray__HeapObject", 0x0 },
+ { "v8dbg_parent_FunctionTemplateInfo__TemplateInfo", 0x0 },
+ { "v8dbg_parent_GlobalObject__JSObject", 0x0 },
+ { "v8dbg_parent_HeapNumber__HeapObject", 0x0 },
+ { "v8dbg_parent_HeapObject__Object", 0x0 },
+ { "v8dbg_parent_InterceptorInfo__Struct", 0x0 },
+ { "v8dbg_parent_JSArray__JSObject", 0x0 },
+ { "v8dbg_parent_JSBuiltinsObject__GlobalObject", 0x0 },
+ { "v8dbg_parent_JSFunction__JSObject", 0x0 },
+ { "v8dbg_parent_JSFunctionResultCache__FixedArray", 0x0 },
+ { "v8dbg_parent_JSGlobalObject__GlobalObject", 0x0 },
+ { "v8dbg_parent_JSGlobalPropertyCell__HeapObject", 0x0 },
+ { "v8dbg_parent_JSGlobalProxy__JSObject", 0x0 },
+ { "v8dbg_parent_JSMessageObject__JSObject", 0x0 },
+ { "v8dbg_parent_JSObject__HeapObject", 0x0 },
+ { "v8dbg_parent_JSRegExp__JSObject", 0x0 },
+ { "v8dbg_parent_JSRegExpResult__JSArray", 0x0 },
+ { "v8dbg_parent_JSValue__JSObject", 0x0 },
+ { "v8dbg_parent_Map__HeapObject", 0x0 },
+ { "v8dbg_parent_NormalizedMapCache__FixedArray", 0x0 },
+ { "v8dbg_parent_Object__MaybeObject", 0x0 },
+ { "v8dbg_parent_ObjectTemplateInfo__TemplateInfo", 0x0 },
+ { "v8dbg_parent_Oddball__HeapObject", 0x0 },
+ { "v8dbg_parent_Script__Struct", 0x0 },
+ { "v8dbg_parent_SeqAsciiString__SeqString", 0x0 },
+ { "v8dbg_parent_SeqString__String", 0x0 },
+ { "v8dbg_parent_SeqTwoByteString__SeqString", 0x0 },
+ { "v8dbg_parent_SharedFunctionInfo__HeapObject", 0x0 },
+ { "v8dbg_parent_SignatureInfo__Struct", 0x0 },
+ { "v8dbg_parent_Smi__Object", 0x0 },
+ { "v8dbg_parent_String__HeapObject", 0x0 },
+ { "v8dbg_parent_Struct__HeapObject", 0x0 },
+ { "v8dbg_parent_TemplateInfo__Struct", 0x0 },
+ { "v8dbg_parent_TypeSwitchInfo__Struct", 0x0 },
+
+ { "v8dbg_frametype_ArgumentsAdaptorFrame", 0x8 },
+ { "v8dbg_frametype_ConstructFrame", 0x7 },
+ { "v8dbg_frametype_EntryConstructFrame", 0x2 },
+ { "v8dbg_frametype_EntryFrame", 0x1 },
+ { "v8dbg_frametype_ExitFrame", 0x3 },
+ { "v8dbg_frametype_InternalFrame", 0x6 },
+ { "v8dbg_frametype_JavaScriptFrame", 0x4 },
+ { "v8dbg_frametype_OptimizedFrame", 0x5 },
+
+ { "v8dbg_off_fp_context", -0x4 },
+ { "v8dbg_off_fp_function", -0x8 },
+ { "v8dbg_off_fp_marker", -0x8 },
+ { "v8dbg_off_fp_args", 0x8 },
+
+ { "v8dbg_prop_idx_content", 0x0 },
+ { "v8dbg_prop_idx_first", 0x2 },
+ { "v8dbg_prop_type_field", 0x1 },
+ { "v8dbg_prop_type_first_phantom", 0x6 },
+ { "v8dbg_prop_type_mask", 0xf },
+
+ { "v8dbg_AsciiStringTag", 0x4 },
+ { "v8dbg_ConsStringTag", 0x1 },
+ { "v8dbg_ExternalStringTag", 0x2 },
+ { "v8dbg_FailureTag", 0x3 },
+ { "v8dbg_FailureTagMask", 0x3 },
+ { "v8dbg_FirstNonstringType", 0x80 },
+ { "v8dbg_HeapObjectTag", 0x1 },
+ { "v8dbg_HeapObjectTagMask", 0x3 },
+ { "v8dbg_IsNotStringMask", 0x80 },
+ { "v8dbg_NotStringTag", 0x80 },
+ { "v8dbg_SeqStringTag", 0x0 },
+ { "v8dbg_SmiTag", 0x0 },
+ { "v8dbg_SmiTagMask", 0x1 },
+ { "v8dbg_SmiValueShift", 0x1 },
+ { "v8dbg_StringEncodingMask", 0x4 },
+ { "v8dbg_StringRepresentationMask", 0x3 },
+ { "v8dbg_StringTag", 0x0 },
+ { "v8dbg_TwoByteStringTag", 0x0 },
+ { "v8dbg_PointerSizeLog2", 0x2 },
+
+ { NULL }
+};
+
+/*
+ * Canned configuration for the V8 bundled with Node.js v0.6.5.
+ */
+static v8_cfg_symbol_t v8_symbols_node_06[] = {
+ { "v8dbg_type_AccessCheckInfo__ACCESS_CHECK_INFO_TYPE", 0x93 },
+ { "v8dbg_type_AccessorInfo__ACCESSOR_INFO_TYPE", 0x92 },
+ { "v8dbg_type_BreakPointInfo__BREAK_POINT_INFO_TYPE", 0x9e },
+ { "v8dbg_type_ByteArray__BYTE_ARRAY_TYPE", 0x86 },
+ { "v8dbg_type_CallHandlerInfo__CALL_HANDLER_INFO_TYPE", 0x95 },
+ { "v8dbg_type_Code__CODE_TYPE", 0x81 },
+ { "v8dbg_type_CodeCache__CODE_CACHE_TYPE", 0x9b },
+ { "v8dbg_type_ConsString__CONS_ASCII_STRING_TYPE", 0x5 },
+ { "v8dbg_type_ConsString__CONS_ASCII_SYMBOL_TYPE", 0x45 },
+ { "v8dbg_type_ConsString__CONS_STRING_TYPE", 0x1 },
+ { "v8dbg_type_ConsString__CONS_SYMBOL_TYPE", 0x41 },
+ { "v8dbg_type_DebugInfo__DEBUG_INFO_TYPE", 0x9d },
+ { "v8dbg_type_ExternalAsciiString__EXTERNAL_ASCII_STRING_TYPE", 0x6 },
+ { "v8dbg_type_ExternalAsciiString__EXTERNAL_ASCII_SYMBOL_TYPE", 0x46 },
+ { "v8dbg_type_ExternalByteArray__EXTERNAL_BYTE_ARRAY_TYPE", 0x87 },
+ { "v8dbg_type_ExternalDoubleArray__EXTERNAL_DOUBLE_ARRAY_TYPE", 0x8e },
+ { "v8dbg_type_ExternalFloatArray__EXTERNAL_FLOAT_ARRAY_TYPE", 0x8d },
+ { "v8dbg_type_ExternalIntArray__EXTERNAL_INT_ARRAY_TYPE", 0x8b },
+ { "v8dbg_type_ExternalPixelArray__EXTERNAL_PIXEL_ARRAY_TYPE", 0x8f },
+ { "v8dbg_type_ExternalShortArray__EXTERNAL_SHORT_ARRAY_TYPE", 0x89 },
+ { "v8dbg_type_ExternalTwoByteString__EXTERNAL_STRING_TYPE", 0x2 },
+ { "v8dbg_type_ExternalTwoByteString__EXTERNAL_SYMBOL_TYPE", 0x42 },
+ { "v8dbg_type_ExternalUnsignedByteArray__"
+ "EXTERNAL_UNSIGNED_BYTE_ARRAY_TYPE", 0x88 },
+ { "v8dbg_type_ExternalUnsignedIntArray__"
+ "EXTERNAL_UNSIGNED_INT_ARRAY_TYPE", 0x8c },
+ { "v8dbg_type_ExternalUnsignedShortArray__"
+ "EXTERNAL_UNSIGNED_SHORT_ARRAY_TYPE", 0x8a },
+ { "v8dbg_type_FixedArray__FIXED_ARRAY_TYPE", 0x9f },
+ { "v8dbg_type_FixedDoubleArray__FIXED_DOUBLE_ARRAY_TYPE", 0x90 },
+ { "v8dbg_type_Foreign__FOREIGN_TYPE", 0x85 },
+ { "v8dbg_type_FunctionTemplateInfo__FUNCTION_TEMPLATE_INFO_TYPE",
+ 0x96 },
+ { "v8dbg_type_HeapNumber__HEAP_NUMBER_TYPE", 0x84 },
+ { "v8dbg_type_InterceptorInfo__INTERCEPTOR_INFO_TYPE", 0x94 },
+ { "v8dbg_type_JSArray__JS_ARRAY_TYPE", 0xa8 },
+ { "v8dbg_type_JSBuiltinsObject__JS_BUILTINS_OBJECT_TYPE", 0xa6 },
+ { "v8dbg_type_JSFunction__JS_FUNCTION_TYPE", 0xac },
+ { "v8dbg_type_JSFunctionProxy__JS_FUNCTION_PROXY_TYPE", 0xad },
+ { "v8dbg_type_JSGlobalObject__JS_GLOBAL_OBJECT_TYPE", 0xa5 },
+ { "v8dbg_type_JSGlobalPropertyCell__JS_GLOBAL_PROPERTY_CELL_TYPE",
+ 0x83 },
+ { "v8dbg_type_JSMessageObject__JS_MESSAGE_OBJECT_TYPE", 0xa1 },
+ { "v8dbg_type_JSObject__JS_OBJECT_TYPE", 0xa3 },
+ { "v8dbg_type_JSProxy__JS_PROXY_TYPE", 0xa9 },
+ { "v8dbg_type_JSRegExp__JS_REGEXP_TYPE", 0xab },
+ { "v8dbg_type_JSValue__JS_VALUE_TYPE", 0xa2 },
+ { "v8dbg_type_JSWeakMap__JS_WEAK_MAP_TYPE", 0xaa },
+ { "v8dbg_type_Map__MAP_TYPE", 0x80 },
+ { "v8dbg_type_ObjectTemplateInfo__OBJECT_TEMPLATE_INFO_TYPE", 0x97 },
+ { "v8dbg_type_Oddball__ODDBALL_TYPE", 0x82 },
+ { "v8dbg_type_PolymorphicCodeCache__POLYMORPHIC_CODE_CACHE_TYPE",
+ 0x9c },
+ { "v8dbg_type_Script__SCRIPT_TYPE", 0x9a },
+ { "v8dbg_type_SeqAsciiString__ASCII_STRING_TYPE", 0x4 },
+ { "v8dbg_type_SeqAsciiString__ASCII_SYMBOL_TYPE", 0x44 },
+ { "v8dbg_type_SeqTwoByteString__STRING_TYPE", 0x0 },
+ { "v8dbg_type_SeqTwoByteString__SYMBOL_TYPE", 0x40 },
+ { "v8dbg_type_SharedFunctionInfo__SHARED_FUNCTION_INFO_TYPE", 0xa0 },
+ { "v8dbg_type_SignatureInfo__SIGNATURE_INFO_TYPE", 0x98 },
+ { "v8dbg_type_SlicedString__SLICED_ASCII_STRING_TYPE", 0x7 },
+ { "v8dbg_type_SlicedString__SLICED_STRING_TYPE", 0x3 },
+ { "v8dbg_type_TypeSwitchInfo__TYPE_SWITCH_INFO_TYPE", 0x99 },
+
+ { "v8dbg_class_AccessCheckInfo__data__Object", 0xc },
+ { "v8dbg_class_AccessCheckInfo__indexed_callback__Object", 0x8 },
+ { "v8dbg_class_AccessCheckInfo__named_callback__Object", 0x4 },
+ { "v8dbg_class_AccessorInfo__data__Object", 0xc },
+ { "v8dbg_class_AccessorInfo__flag__Smi", 0x14 },
+ { "v8dbg_class_AccessorInfo__getter__Object", 0x4 },
+ { "v8dbg_class_AccessorInfo__name__Object", 0x10 },
+ { "v8dbg_class_AccessorInfo__setter__Object", 0x8 },
+ { "v8dbg_class_BreakPointInfo__break_point_objects__Object", 0x10 },
+ { "v8dbg_class_BreakPointInfo__code_position__Smi", 0x4 },
+ { "v8dbg_class_BreakPointInfo__source_position__Smi", 0x8 },
+ { "v8dbg_class_BreakPointInfo__statement_position__Smi", 0xc },
+ { "v8dbg_class_CallHandlerInfo__callback__Object", 0x4 },
+ { "v8dbg_class_CallHandlerInfo__data__Object", 0x8 },
+ { "v8dbg_class_Code__deoptimization_data__FixedArray", 0xc },
+ { "v8dbg_class_Code__instruction_size__int", 0x4 },
+ { "v8dbg_class_Code__instruction_start__int", 0x20 },
+ { "v8dbg_class_Code__next_code_flushing_candidate__Object", 0x10 },
+ { "v8dbg_class_Code__relocation_info__ByteArray", 0x8 },
+ { "v8dbg_class_CodeCache__default_cache__FixedArray", 0x4 },
+ { "v8dbg_class_CodeCache__normal_type_cache__Object", 0x8 },
+ { "v8dbg_class_ConsString__first__String", 0xc },
+ { "v8dbg_class_ConsString__second__String", 0x10 },
+ { "v8dbg_class_DebugInfo__break_points__FixedArray", 0x14 },
+ { "v8dbg_class_DebugInfo__code__Code", 0xc },
+ { "v8dbg_class_DebugInfo__original_code__Code", 0x8 },
+ { "v8dbg_class_DebugInfo__shared__SharedFunctionInfo", 0x4 },
+ { "v8dbg_class_ExternalString__resource__Object", 0xc },
+ { "v8dbg_class_FixedArray__data__uintptr_t", 0x8 },
+ { "v8dbg_class_FixedArrayBase__length__SMI", 0x4 },
+ { "v8dbg_class_FunctionTemplateInfo__access_check_info__Object", 0x38 },
+ { "v8dbg_class_FunctionTemplateInfo__call_code__Object", 0x10 },
+ { "v8dbg_class_FunctionTemplateInfo__class_name__Object", 0x2c },
+ { "v8dbg_class_FunctionTemplateInfo__flag__Smi", 0x3c },
+ { "v8dbg_class_FunctionTemplateInfo__indexed_property_handler__Object",
+ 0x24 },
+ { "v8dbg_class_FunctionTemplateInfo__instance_call_handler__Object",
+ 0x34 },
+ { "v8dbg_class_FunctionTemplateInfo__instance_template__Object", 0x28 },
+ { "v8dbg_class_FunctionTemplateInfo__named_property_handler__Object",
+ 0x20 },
+ { "v8dbg_class_FunctionTemplateInfo__parent_template__Object", 0x1c },
+ { "v8dbg_class_FunctionTemplateInfo__property_accessors__Object",
+ 0x14 },
+ { "v8dbg_class_FunctionTemplateInfo__prototype_template__Object",
+ 0x18 },
+ { "v8dbg_class_FunctionTemplateInfo__serial_number__Object", 0xc },
+ { "v8dbg_class_FunctionTemplateInfo__signature__Object", 0x30 },
+ { "v8dbg_class_GlobalObject__builtins__JSBuiltinsObject", 0xc },
+ { "v8dbg_class_GlobalObject__global_context__Context", 0x10 },
+ { "v8dbg_class_GlobalObject__global_receiver__JSObject", 0x14 },
+ { "v8dbg_class_HeapNumber__value__double", 0x4 },
+ { "v8dbg_class_HeapObject__map__Map", 0x0 },
+ { "v8dbg_class_InterceptorInfo__data__Object", 0x18 },
+ { "v8dbg_class_InterceptorInfo__deleter__Object", 0x10 },
+ { "v8dbg_class_InterceptorInfo__enumerator__Object", 0x14 },
+ { "v8dbg_class_InterceptorInfo__getter__Object", 0x4 },
+ { "v8dbg_class_InterceptorInfo__query__Object", 0xc },
+ { "v8dbg_class_InterceptorInfo__setter__Object", 0x8 },
+ { "v8dbg_class_JSArray__length__Object", 0xc },
+ { "v8dbg_class_JSFunction__literals__FixedArray", 0x1c },
+ { "v8dbg_class_JSFunction__next_function_link__Object", 0x20 },
+ { "v8dbg_class_JSFunction__prototype_or_initial_map__Object", 0x10 },
+ { "v8dbg_class_JSFunction__shared__SharedFunctionInfo", 0x14 },
+ { "v8dbg_class_JSFunctionProxy__call_trap__Object", 0x8 },
+ { "v8dbg_class_JSFunctionProxy__construct_trap__Object", 0xc },
+ { "v8dbg_class_JSGlobalProxy__context__Object", 0xc },
+ { "v8dbg_class_JSMessageObject__arguments__JSArray", 0x10 },
+ { "v8dbg_class_JSMessageObject__end_position__SMI", 0x24 },
+ { "v8dbg_class_JSMessageObject__script__Object", 0x14 },
+ { "v8dbg_class_JSMessageObject__stack_frames__Object", 0x1c },
+ { "v8dbg_class_JSMessageObject__stack_trace__Object", 0x18 },
+ { "v8dbg_class_JSMessageObject__start_position__SMI", 0x20 },
+ { "v8dbg_class_JSMessageObject__type__String", 0xc },
+ { "v8dbg_class_JSObject__elements__Object", 0x8 },
+ { "v8dbg_class_JSObject__properties__FixedArray", 0x4 },
+ { "v8dbg_class_JSProxy__handler__Object", 0x4 },
+ { "v8dbg_class_JSRegExp__data__Object", 0xc },
+ { "v8dbg_class_JSValue__value__Object", 0xc },
+ { "v8dbg_class_JSWeakMap__next__Object", 0x10 },
+ { "v8dbg_class_JSWeakMap__table__ObjectHashTable", 0xc },
+ { "v8dbg_class_Map__code_cache__Object", 0x18 },
+ { "v8dbg_class_Map__constructor__Object", 0x10 },
+ { "v8dbg_class_Map__inobject_properties__int", 0x5 },
+ { "v8dbg_class_Map__instance_attributes__int", 0x8 },
+ { "v8dbg_class_Map__instance_descriptors__FixedArray", 0x14 },
+ { "v8dbg_class_Map__instance_size__int", 0x4 },
+ { "v8dbg_class_Map__prototype_transitions__FixedArray", 0x1c },
+ { "v8dbg_class_ObjectTemplateInfo__constructor__Object", 0xc },
+ { "v8dbg_class_ObjectTemplateInfo__internal_field_count__Object",
+ 0x10 },
+ { "v8dbg_class_Oddball__to_number__Object", 0x8 },
+ { "v8dbg_class_Oddball__to_string__String", 0x4 },
+ { "v8dbg_class_PolymorphicCodeCache__cache__Object", 0x4 },
+ { "v8dbg_class_Script__column_offset__Smi", 0x10 },
+ { "v8dbg_class_Script__compilation_type__Smi", 0x24 },
+ { "v8dbg_class_Script__context_data__Object", 0x18 },
+ { "v8dbg_class_Script__data__Object", 0x14 },
+ { "v8dbg_class_Script__eval_from_instructions_offset__Smi", 0x34 },
+ { "v8dbg_class_Script__eval_from_shared__Object", 0x30 },
+ { "v8dbg_class_Script__id__Object", 0x2c },
+ { "v8dbg_class_Script__line_ends__Object", 0x28 },
+ { "v8dbg_class_Script__line_offset__Smi", 0xc },
+ { "v8dbg_class_Script__name__Object", 0x8 },
+ { "v8dbg_class_Script__source__Object", 0x4 },
+ { "v8dbg_class_Script__type__Smi", 0x20 },
+ { "v8dbg_class_Script__wrapper__Foreign", 0x1c },
+ { "v8dbg_class_SeqAsciiString__chars__char", 0xc },
+ { "v8dbg_class_SharedFunctionInfo__code__Code", 0x8 },
+ { "v8dbg_class_SharedFunctionInfo__compiler_hints__SMI", 0x50 },
+ { "v8dbg_class_SharedFunctionInfo__construct_stub__Code", 0x10 },
+ { "v8dbg_class_SharedFunctionInfo__debug_info__Object", 0x20 },
+ { "v8dbg_class_SharedFunctionInfo__end_position__SMI", 0x48 },
+ { "v8dbg_class_SharedFunctionInfo__expected_nof_properties__SMI",
+ 0x3c },
+ { "v8dbg_class_SharedFunctionInfo__formal_parameter_count__SMI", 0x38 },
+ { "v8dbg_class_SharedFunctionInfo__function_data__Object", 0x18 },
+ { "v8dbg_class_SharedFunctionInfo__function_token_position__SMI",
+ 0x4c },
+ { "v8dbg_class_SharedFunctionInfo__inferred_name__String", 0x24 },
+ { "v8dbg_class_SharedFunctionInfo__initial_map__Object", 0x28 },
+ { "v8dbg_class_SharedFunctionInfo__instance_class_name__Object", 0x14 },
+ { "v8dbg_class_SharedFunctionInfo__length__SMI", 0x34 },
+ { "v8dbg_class_SharedFunctionInfo__name__Object", 0x4 },
+ { "v8dbg_class_SharedFunctionInfo__num_literals__SMI", 0x40 },
+ { "v8dbg_class_SharedFunctionInfo__opt_count__SMI", 0x58 },
+ { "v8dbg_class_SharedFunctionInfo__script__Object", 0x1c },
+ { "v8dbg_class_SharedFunctionInfo__"
+ "start_position_and_type__SMI", 0x44 },
+ { "v8dbg_class_SharedFunctionInfo__"
+ "this_property_assignments__Object", 0x2c },
+ { "v8dbg_class_SharedFunctionInfo__"
+ "this_property_assignments_count__SMI", 0x54 },
+ { "v8dbg_class_SignatureInfo__args__Object", 0x8 },
+ { "v8dbg_class_SignatureInfo__receiver__Object", 0x4 },
+ { "v8dbg_class_SlicedString__offset__SMI", 0x10 },
+ { "v8dbg_class_String__length__SMI", 0x4 },
+ { "v8dbg_class_TemplateInfo__property_list__Object", 0x8 },
+ { "v8dbg_class_TemplateInfo__tag__Object", 0x4 },
+ { "v8dbg_class_TypeSwitchInfo__types__Object", 0x4 },
+
+ { "v8dbg_parent_AccessCheckInfo__Struct", 0x0 },
+ { "v8dbg_parent_AccessorInfo__Struct", 0x0 },
+ { "v8dbg_parent_BreakPointInfo__Struct", 0x0 },
+ { "v8dbg_parent_ByteArray__FixedArrayBase", 0x0 },
+ { "v8dbg_parent_CallHandlerInfo__Struct", 0x0 },
+ { "v8dbg_parent_Code__HeapObject", 0x0 },
+ { "v8dbg_parent_CodeCache__Struct", 0x0 },
+ { "v8dbg_parent_ConsString__String", 0x0 },
+ { "v8dbg_parent_DebugInfo__Struct", 0x0 },
+ { "v8dbg_parent_DeoptimizationInputData__FixedArray", 0x0 },
+ { "v8dbg_parent_DeoptimizationOutputData__FixedArray", 0x0 },
+ { "v8dbg_parent_DescriptorArray__FixedArray", 0x0 },
+ { "v8dbg_parent_ExternalArray__FixedArrayBase", 0x0 },
+ { "v8dbg_parent_ExternalAsciiString__ExternalString", 0x0 },
+ { "v8dbg_parent_ExternalByteArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_ExternalDoubleArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_ExternalFloatArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_ExternalIntArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_ExternalPixelArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_ExternalShortArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_ExternalString__String", 0x0 },
+ { "v8dbg_parent_ExternalTwoByteString__ExternalString", 0x0 },
+ { "v8dbg_parent_ExternalUnsignedByteArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_ExternalUnsignedIntArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_ExternalUnsignedShortArray__ExternalArray", 0x0 },
+ { "v8dbg_parent_Failure__MaybeObject", 0x0 },
+ { "v8dbg_parent_FixedArray__FixedArrayBase", 0x0 },
+ { "v8dbg_parent_FixedArrayBase__HeapObject", 0x0 },
+ { "v8dbg_parent_FixedDoubleArray__FixedArrayBase", 0x0 },
+ { "v8dbg_parent_Foreign__HeapObject", 0x0 },
+ { "v8dbg_parent_FunctionTemplateInfo__TemplateInfo", 0x0 },
+ { "v8dbg_parent_GlobalObject__JSObject", 0x0 },
+ { "v8dbg_parent_HashTable__FixedArray", 0x0 },
+ { "v8dbg_parent_HeapNumber__HeapObject", 0x0 },
+ { "v8dbg_parent_HeapObject__Object", 0x0 },
+ { "v8dbg_parent_InterceptorInfo__Struct", 0x0 },
+ { "v8dbg_parent_JSArray__JSObject", 0x0 },
+ { "v8dbg_parent_JSBuiltinsObject__GlobalObject", 0x0 },
+ { "v8dbg_parent_JSFunction__JSObject", 0x0 },
+ { "v8dbg_parent_JSFunctionProxy__JSProxy", 0x0 },
+ { "v8dbg_parent_JSFunctionResultCache__FixedArray", 0x0 },
+ { "v8dbg_parent_JSGlobalObject__GlobalObject", 0x0 },
+ { "v8dbg_parent_JSGlobalPropertyCell__HeapObject", 0x0 },
+ { "v8dbg_parent_JSMessageObject__JSObject", 0x0 },
+ { "v8dbg_parent_JSObject__JSReceiver", 0x0 },
+ { "v8dbg_parent_JSProxy__JSReceiver", 0x0 },
+ { "v8dbg_parent_JSReceiver__HeapObject", 0x0 },
+ { "v8dbg_parent_JSRegExp__JSObject", 0x0 },
+ { "v8dbg_parent_JSRegExpResult__JSArray", 0x0 },
+ { "v8dbg_parent_JSValue__JSObject", 0x0 },
+ { "v8dbg_parent_JSWeakMap__JSObject", 0x0 },
+ { "v8dbg_parent_Map__HeapObject", 0x0 },
+ { "v8dbg_parent_NormalizedMapCache__FixedArray", 0x0 },
+ { "v8dbg_parent_ObjectTemplateInfo__TemplateInfo", 0x0 },
+ { "v8dbg_parent_Oddball__HeapObject", 0x0 },
+ { "v8dbg_parent_PolymorphicCodeCache__Struct", 0x0 },
+ { "v8dbg_parent_Script__Struct", 0x0 },
+ { "v8dbg_parent_SeqAsciiString__SeqString", 0x0 },
+ { "v8dbg_parent_SeqString__String", 0x0 },
+ { "v8dbg_parent_SeqTwoByteString__SeqString", 0x0 },
+ { "v8dbg_parent_SharedFunctionInfo__HeapObject", 0x0 },
+ { "v8dbg_parent_SignatureInfo__Struct", 0x0 },
+ { "v8dbg_parent_SlicedString__String", 0x0 },
+ { "v8dbg_parent_Smi__Object", 0x0 },
+ { "v8dbg_parent_String__HeapObject", 0x0 },
+ { "v8dbg_parent_Struct__HeapObject", 0x0 },
+ { "v8dbg_parent_TemplateInfo__Struct", 0x0 },
+ { "v8dbg_parent_TypeSwitchInfo__Struct", 0x0 },
+
+ { "v8dbg_frametype_ArgumentsAdaptorFrame", 0x8 },
+ { "v8dbg_frametype_ConstructFrame", 0x7 },
+ { "v8dbg_frametype_EntryConstructFrame", 0x2 },
+ { "v8dbg_frametype_EntryFrame", 0x1 },
+ { "v8dbg_frametype_ExitFrame", 0x3 },
+ { "v8dbg_frametype_InternalFrame", 0x6 },
+ { "v8dbg_frametype_JavaScriptFrame", 0x4 },
+ { "v8dbg_frametype_OptimizedFrame", 0x5 },
+
+ { "v8dbg_off_fp_args", 0x8 },
+ { "v8dbg_off_fp_context", -0x4 },
+ { "v8dbg_off_fp_function", -0x8 },
+ { "v8dbg_off_fp_marker", -0x8 },
+
+ { "v8dbg_prop_idx_content", 0x1 },
+ { "v8dbg_prop_idx_first", 0x3 },
+ { "v8dbg_prop_type_field", 0x1 },
+ { "v8dbg_prop_type_first_phantom", 0x6 },
+ { "v8dbg_prop_type_mask", 0xf },
+
+ { "v8dbg_AsciiStringTag", 0x4 },
+ { "v8dbg_PointerSizeLog2", 0x2 },
+ { "v8dbg_SeqStringTag", 0x0 },
+ { "v8dbg_SmiTag", 0x0 },
+ { "v8dbg_SmiTagMask", 0x1 },
+ { "v8dbg_SmiValueShift", 0x1 },
+ { "v8dbg_StringEncodingMask", 0x4 },
+ { "v8dbg_StringRepresentationMask", 0x3 },
+ { "v8dbg_StringTag", 0x0 },
+ { "v8dbg_TwoByteStringTag", 0x0 },
+ { "v8dbg_ConsStringTag", 0x1 },
+ { "v8dbg_ExternalStringTag", 0x2 },
+ { "v8dbg_FailureTag", 0x3 },
+ { "v8dbg_FailureTagMask", 0x3 },
+ { "v8dbg_FirstNonstringType", 0x80 },
+ { "v8dbg_HeapObjectTag", 0x1 },
+ { "v8dbg_HeapObjectTagMask", 0x3 },
+ { "v8dbg_IsNotStringMask", 0x80 },
+ { "v8dbg_NotStringTag", 0x80 },
+
+ { NULL },
+};
+
+v8_cfg_t v8_cfg_04 = { "node-0.4", "node v0.4", v8_symbols_node_04,
+ v8cfg_canned_iter, v8cfg_canned_readsym };
+
+v8_cfg_t v8_cfg_06 = { "node-0.6", "node v0.6", v8_symbols_node_06,
+ v8cfg_canned_iter, v8cfg_canned_readsym };
+
+v8_cfg_t *v8_cfgs[] = {
+ &v8_cfg_04,
+ &v8_cfg_06,
+ NULL
+};
+
+v8_cfg_t v8_cfg_target = { NULL, NULL, NULL, v8cfg_target_iter,
+ v8cfg_target_readsym };
diff --git a/usr/src/cmd/mdb/common/modules/v8/v8cfg.h b/usr/src/cmd/mdb/common/modules/v8/v8cfg.h
new file mode 100644
index 0000000000..9e722b0e2b
--- /dev/null
+++ b/usr/src/cmd/mdb/common/modules/v8/v8cfg.h
@@ -0,0 +1,55 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * v8cfg.h: canned configurations for previous V8 versions
+ */
+
+#ifndef V8CFG_H
+#define V8CFG_H
+
+#include <sys/types.h>
+#include <sys/mdb_modapi.h>
+
+typedef struct {
+ const char *v8cs_name; /* symbol name */
+ intptr_t v8cs_value; /* symbol value */
+} v8_cfg_symbol_t;
+
+typedef struct v8_cfg {
+ const char *v8cfg_name; /* canned config name */
+ const char *v8cfg_label; /* description */
+ v8_cfg_symbol_t *v8cfg_symbols; /* actual symbol values */
+
+ int (*v8cfg_iter)(struct v8_cfg *, int (*)(mdb_symbol_t *, void *),
+ void *);
+ int (*v8cfg_readsym)(struct v8_cfg *, const char *, intptr_t *);
+} v8_cfg_t;
+
+extern v8_cfg_t v8_cfg_04;
+extern v8_cfg_t v8_cfg_06;
+extern v8_cfg_t v8_cfg_target;
+extern v8_cfg_t *v8_cfgs[];
+
+#endif /* V8CFG_H */
diff --git a/usr/src/cmd/mdb/common/modules/v8/v8dbg.h b/usr/src/cmd/mdb/common/modules/v8/v8dbg.h
new file mode 100644
index 0000000000..b17f241fac
--- /dev/null
+++ b/usr/src/cmd/mdb/common/modules/v8/v8dbg.h
@@ -0,0 +1,91 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * v8dbg.h: macros for use by V8 heap inspection tools. The consumer must
+ * define values for various tags and shifts. The MDB module gets these
+ * constants from information encoded in the binary itself.
+ */
+
+#ifndef _V8DBG_H
+#define _V8DBG_H
+
+/*
+ * Recall that while V8 heap objects are always 4-byte aligned, heap object
+ * pointers always have the last bit set. So when looking for a field nominally
+ * at offset X, one must be sure to clear the tag bit first.
+ */
+#define V8_OFF_HEAP(x) ((x) - V8_HeapObjectTag)
+
+/*
+ * Determine whether a given pointer refers to a SMI, Failure, or HeapObject.
+ */
+#define V8_IS_SMI(ptr) (((ptr) & V8_SmiTagMask) == V8_SmiTag)
+#define V8_IS_FAILURE(ptr) (V8_FailureTagMask != -1 && \
+ V8_FailureTagMask != -1 && \
+ ((ptr) & V8_FailureTagMask) == V8_FailureTag)
+
+#define V8_IS_HEAPOBJECT(ptr) \
+ (((ptr) & V8_HeapObjectTagMask) == V8_HeapObjectTag)
+
+/*
+ * Extract the value of a SMI "pointer". Recall that small integers are stored
+ * using the upper 31 bits.
+ */
+#define V8_SMI_VALUE(smi) ((smi) >> (V8_SmiValueShift + V8_SmiShiftSize))
+#define V8_VALUE_SMI(value) \
+ ((value) << (V8_SmiValueShift + V8_SmiShiftSize))
+
+/*
+ * Determine the encoding and representation of a V8 string.
+ */
+#define V8_TYPE_STRING(type) (((type) & V8_IsNotStringMask) == V8_StringTag)
+
+#define V8_STRENC_ASCII(type) \
+ (((type) & V8_StringEncodingMask) == V8_AsciiStringTag)
+
+#define V8_STRREP_SEQ(type) \
+ (((type) & V8_StringRepresentationMask) == V8_SeqStringTag)
+#define V8_STRREP_CONS(type) \
+ (((type) & V8_StringRepresentationMask) == V8_ConsStringTag)
+#define V8_STRREP_SLICED(type) \
+ (((type) & V8_StringRepresentationMask) == V8_SlicedStringTag)
+#define V8_STRREP_EXT(type) \
+ (((type) & V8_StringRepresentationMask) == V8_ExternalStringTag)
+
+/*
+ * Several of the following constants and transformations are hardcoded in V8 as
+ * well, so there's no way to extract them programmatically from the binary.
+ */
+#define V8_DESC_KEYIDX(x) ((x) + V8_PROP_IDX_FIRST)
+#define V8_DESC_VALIDX(x) ((x) << 1)
+#define V8_DESC_DETIDX(x) (((x) << 1) + 1)
+
+#define V8_DESC_ISFIELD(x) \
+ ((V8_SMI_VALUE(x) & V8_PROP_TYPE_MASK) == V8_PROP_TYPE_FIELD)
+
+#define V8_PROP_FIELDINDEX(value) \
+ ((V8_SMI_VALUE(value) & V8_FIELDINDEX_MASK) >> V8_FIELDINDEX_SHIFT)
+
+#endif /* _V8DBG_H */
diff --git a/usr/src/cmd/mdb/intel/amd64/i40e/Makefile b/usr/src/cmd/mdb/intel/amd64/i40e/Makefile
new file mode 100644
index 0000000000..de089d8a14
--- /dev/null
+++ b/usr/src/cmd/mdb/intel/amd64/i40e/Makefile
@@ -0,0 +1,35 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2016 Joyent, Inc.
+#
+
+
+MODULE = i40e.so
+MDBTGT = kvm
+
+MODSRCS = i40e.c
+
+include ../../../../Makefile.cmd
+include ../../../../Makefile.cmd.64
+include ../../Makefile.amd64
+include ../../../Makefile.module
+
+CPPFLAGS += -I$(SRC)/uts/common/io/i40e
+CPPFLAGS += -I$(SRC)/uts/common/io/i40e/core
+CPPFLAGS += -I$(SRC)/uts/common
+CPPFLAGS += -D_I40E_MDB_DMOD
+
+#
+# i40e needs this due to the use of Intel provided headers.
+#
+LINTFLAGS64 += -erroff=E_STATIC_UNUSED
diff --git a/usr/src/cmd/mdb/intel/amd64/libumem/Makefile b/usr/src/cmd/mdb/intel/amd64/libumem/Makefile
index 704ff65873..ae22217a1b 100644
--- a/usr/src/cmd/mdb/intel/amd64/libumem/Makefile
+++ b/usr/src/cmd/mdb/intel/amd64/libumem/Makefile
@@ -22,6 +22,7 @@
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
+# Copyright (c) 2012 Joyent, Inc. All rights reserved.
MODULE = libumem.so
MDBTGT = proc
diff --git a/usr/src/cmd/mdb/intel/amd64/v8/Makefile b/usr/src/cmd/mdb/intel/amd64/v8/Makefile
new file mode 100644
index 0000000000..cf74ba1d1c
--- /dev/null
+++ b/usr/src/cmd/mdb/intel/amd64/v8/Makefile
@@ -0,0 +1,45 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2011, Joyent, Inc. All rights reserved.
+#
+
+MODULE = v8.so
+MDBTGT = proc
+
+MODSRCS_DIR = ../../../common/modules/v8
+
+MODSRCS = mdb_v8.c mdb_v8_cfg.c
+
+include ../../../../Makefile.cmd
+include ../../../../Makefile.cmd.64
+include ../../Makefile.amd64
+include ../../../Makefile.module
+
+dmod/$(MODULE) := LDLIBS += -lproc -lavl
+
+%.o: $(MODSRCS_DIR)/%.c
+ $(COMPILE.c) $<
+ $(CTFCONVERT_O)
+
+%.ln: $(MODSRCS_DIR)/%.c
+ $(LINT.c) -c $<
diff --git a/usr/src/cmd/mdb/intel/ia32/i40e/Makefile b/usr/src/cmd/mdb/intel/ia32/i40e/Makefile
new file mode 100644
index 0000000000..dd3834ae1f
--- /dev/null
+++ b/usr/src/cmd/mdb/intel/ia32/i40e/Makefile
@@ -0,0 +1,33 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2016 Joyent, Inc.
+#
+
+MODULE = i40e.so
+MDBTGT = kvm
+
+MODSRCS = i40e.c
+
+include ../../../../Makefile.cmd
+include ../../Makefile.ia32
+include ../../../Makefile.module
+
+CPPFLAGS += -I$(SRC)/uts/common/io/i40e
+CPPFLAGS += -I$(SRC)/uts/common/io/i40e/core
+CPPFLAGS += -I$(SRC)/uts/common
+CPPFLAGS += -D_I40E_MDB_DMOD
+
+#
+# i40e needs this due to the use of Intel provided headers.
+#
+LINTFLAGS += -erroff=E_STATIC_UNUSED
diff --git a/usr/src/cmd/mdb/intel/ia32/libumem/Makefile b/usr/src/cmd/mdb/intel/ia32/libumem/Makefile
index a1ab338f40..bde1be90ac 100644
--- a/usr/src/cmd/mdb/intel/ia32/libumem/Makefile
+++ b/usr/src/cmd/mdb/intel/ia32/libumem/Makefile
@@ -22,6 +22,7 @@
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
+# Copyright (c) 2012 Joyent, Inc. All rights reserved.
MODULE = libumem.so
MDBTGT = proc
diff --git a/usr/src/cmd/mdb/intel/ia32/v8/Makefile b/usr/src/cmd/mdb/intel/ia32/v8/Makefile
new file mode 100644
index 0000000000..c84532289e
--- /dev/null
+++ b/usr/src/cmd/mdb/intel/ia32/v8/Makefile
@@ -0,0 +1,44 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2011, Joyent, Inc. All rights reserved.
+#
+
+MODULE = v8.so
+MDBTGT = proc
+
+MODSRCS_DIR = ../../../common/modules/v8
+
+MODSRCS = mdb_v8.c mdb_v8_cfg.c
+
+include ../../../../Makefile.cmd
+include ../../Makefile.ia32
+include ../../../Makefile.module
+
+dmod/$(MODULE) := LDLIBS += -lproc -lavl
+
+%.o: $(MODSRCS_DIR)/%.c
+ $(COMPILE.c) $<
+ $(CTFCONVERT_O)
+
+%.ln: $(MODSRCS_DIR)/%.c
+ $(LINT.c) -c $<
diff --git a/usr/src/cmd/mdb/sparc/v7/libumem/Makefile b/usr/src/cmd/mdb/sparc/v7/libumem/Makefile
index 906d05d5ea..0488e6739a 100644
--- a/usr/src/cmd/mdb/sparc/v7/libumem/Makefile
+++ b/usr/src/cmd/mdb/sparc/v7/libumem/Makefile
@@ -22,6 +22,7 @@
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
+# Copyright (c) 2012 Joyent, Inc. All rights reserved.
MODULE = libumem.so
MDBTGT = proc
diff --git a/usr/src/cmd/mdb/sparc/v9/libumem/Makefile b/usr/src/cmd/mdb/sparc/v9/libumem/Makefile
index 09ea0473c6..87ce977423 100644
--- a/usr/src/cmd/mdb/sparc/v9/libumem/Makefile
+++ b/usr/src/cmd/mdb/sparc/v9/libumem/Makefile
@@ -22,6 +22,7 @@
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
+# Copyright (c) 2012 Joyent, Inc. All rights reserved.
MODULE = libumem.so
MDBTGT = proc
diff --git a/usr/src/cmd/mdb/test/exit-e/err.nowrite.ksh b/usr/src/cmd/mdb/test/exit-e/err.nowrite.ksh
new file mode 100644
index 0000000000..8f0fda2253
--- /dev/null
+++ b/usr/src/cmd/mdb/test/exit-e/err.nowrite.ksh
@@ -0,0 +1 @@
+$MDB -e '0xbaddcafe/v 0x02'
diff --git a/usr/src/cmd/mdb/test/exit-e/err.unmapped.ksh b/usr/src/cmd/mdb/test/exit-e/err.unmapped.ksh
new file mode 100644
index 0000000000..874283f186
--- /dev/null
+++ b/usr/src/cmd/mdb/test/exit-e/err.unmapped.ksh
@@ -0,0 +1 @@
+$MDB -e '0x0/v 0x02' /bin/ls
diff --git a/usr/src/cmd/mdb/test/mtest.sh b/usr/src/cmd/mdb/test/mtest.sh
index f21d0faa21..f21d0faa21 100644..100755
--- a/usr/src/cmd/mdb/test/mtest.sh
+++ b/usr/src/cmd/mdb/test/mtest.sh
diff --git a/usr/src/cmd/mdb/test/typedef/tst.dellist.mdb.out b/usr/src/cmd/mdb/test/typedef/tst.dellist.mdb.out
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/usr/src/cmd/mdb/test/typedef/tst.dellist.mdb.out
diff --git a/usr/src/cmd/mdb/test/typedef/tst.emptylist.mdb.out b/usr/src/cmd/mdb/test/typedef/tst.emptylist.mdb.out
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/usr/src/cmd/mdb/test/typedef/tst.emptylist.mdb.out
diff --git a/usr/src/cmd/nicstat/Makefile b/usr/src/cmd/nicstat/Makefile
new file mode 100644
index 0000000000..935011119a
--- /dev/null
+++ b/usr/src/cmd/nicstat/Makefile
@@ -0,0 +1,31 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2013 Joyent, Inc. All rights reserved.
+#
+
+PROG = nicstat
+
+include ../Makefile.cmd
+
+all: $(PROG)
+
+install: all .WAIT $(ROOTPROG)
+
+clean:
+
+$(ROOTBINPROG): $(PROG)
+ $(INS.file)
+
+lint:
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/nicstat/nicstat.pl b/usr/src/cmd/nicstat/nicstat.pl
new file mode 100644
index 0000000000..fae4c797df
--- /dev/null
+++ b/usr/src/cmd/nicstat/nicstat.pl
@@ -0,0 +1,424 @@
+#!/usr/perl5/bin/perl -w
+#
+# nicstat - print network traffic, Kbyte/s read and written.
+# Solaris 8+, Perl (Sun::Solaris::Kstat).
+#
+# "netstat -i" only gives a packet count, this program gives Kbytes.
+#
+# 04-Apr-2011, ver 1.00J
+#
+# USAGE: nicstat [-hsz] [-i int[,int...]] | [interval [count]]
+#
+# -h # help
+# -s # print summary output
+# -z # skip zero lines
+# -i int[,int...] # print these instances only
+# eg,
+# nicstat # print summary since boot
+# nicstat 1 # print continually, every 1 second
+# nicstat 1 5 # print 5 times, every 1 second
+# nicstat -i hme0 # only examine hme0
+#
+# This prints out the KB/s transferred for all the network cards (NICs),
+# including packet counts and average sizes. The first line is the summary
+# data since boot.
+#
+# FIELDS:
+# Int Interface
+# rKB/s read Kbytes/s
+# wKB/s write Kbytes/s
+# rPk/s read Packets/s
+# wPk/s write Packets/s
+# rAvs read Average size, bytes
+# wAvs write Average size, bytes
+# %Util %Utilisation (r or w/ifspeed)
+# Sat Saturation (defer, nocanput, norecvbuf, noxmtbuf)
+#
+# NOTES:
+#
+# - Some unusual network cards may not provide all the details to Kstat,
+# (or provide different symbols). Check for newer versions of this program,
+# and the @Network array in the code below.
+# - Utilisation is based on bytes transferred divided by speed of the interface
+# (if the speed is known). It should be impossible to reach 100% as there
+# are overheads due to bus negotiation and timing.
+# - Loopback interfaces may only provide packet counts (if anything), and so
+# bytes and %util will always be zero. Newer versions of Solaris (newer than
+# Solaris 10 6/06) may provide loopback byte stats.
+# - Saturation is determined by counting read and write errors caused by the
+# interface running at saturation. This approach is not ideal, and the value
+# reported is often lower than it should be (eg, 0.0). Reading the rKB/s and
+# wKB/s fields may be more useful.
+#
+# SEE ALSO:
+# nicstat.c # the C version, also on my website
+# kstat -n hme0 [interval [count]] # or qfe0, ...
+# netstat -iI hme0 [interval [count]]
+# se netstat.se [interval] # SE Toolkit
+# se nx.se [interval] # SE Toolkit
+#
+# COPYRIGHT: Copyright (c) 2013 Brendan Gregg.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at docs/cddl1.txt or
+# http://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at docs/cddl1.txt.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2015 Joyent, Inc. All rights reserved.
+#
+# Author: Brendan Gregg [Sydney, Australia]
+#
+# 18-Jul-2004 Brendan Gregg Created this.
+# 07-Jan-2005 " " added saturation value.
+# 07-Jan-2005 " " added summary style (from Peter Tribble).
+# 23-Jan-2006 " " Tweaked style.
+# 11-Aug-2006 " " Improved output neatness.
+# 30-Sep-2006 " " Added loopback, tweaked output.
+# 04-Apr-2011 brendan@joyent.com Updated for smartmachines.
+
+use strict;
+use Getopt::Std;
+use Sun::Solaris::Kstat;
+my $Kstat = Sun::Solaris::Kstat->new();
+
+
+#
+# Process command line args
+#
+usage() if defined $ARGV[0] and $ARGV[0] eq "--help";
+getopts('hi:sz') or usage();
+usage() if defined $main::opt_h;
+my $STYLE = defined $main::opt_s ? $main::opt_s : 0;
+my $SKIPZERO = defined $main::opt_z ? $main::opt_z : 0;
+
+# process [interval [count]],
+my ($interval, $loop_max);
+if (defined $ARGV[0]) {
+ $interval = $ARGV[0];
+ $loop_max = defined $ARGV[1] ? $ARGV[1] : 2**32;
+ usage() if $interval == 0;
+}
+else {
+ $interval = 1;
+ $loop_max = 1;
+}
+
+# check for -i,
+my %NetworkOnly; # network interfaces to print
+my $NETWORKONLY = 0; # match on network interfaces
+if (defined $main::opt_i) {
+ foreach my $net (split /,/, $main::opt_i) {
+ $NetworkOnly{$net} = 1;
+ }
+ $NETWORKONLY = 1;
+}
+
+# globals,
+my $loop = 0; # current loop number
+my $PAGESIZE = 20; # max lines per header
+my $line = $PAGESIZE; # counter for lines printed
+my %NetworkNames; # Kstat network interfaces
+my %NetworkData; # network interface data
+my %NetworkDataOld; # network interface data
+$main::opt_h = 0;
+$| = 1; # autoflush
+
+# kstat "link" module includes:
+my @Network = qw(dmfe bge be bnx ce eri eth external ge hme igb ige internal ixgbe le net ppp qfe rtls);
+my %Network;
+$Network{$_} = 1 foreach (@Network);
+my $ZONENAME = `/usr/bin/zonename`;
+chomp $ZONENAME;
+
+### Determine network interfaces
+unless (find_nets()) {
+ if ($NETWORKONLY) {
+ print STDERR "ERROR1: $main::opt_i matched no network interfaces.\n";
+ }
+ else {
+ print STDERR "ERROR1: No network interfaces found!\n";
+ }
+ exit 1;
+}
+
+
+#
+# Main
+#
+while (1) {
+
+ ### Print Header
+ if ($line >= $PAGESIZE) {
+ if ($STYLE == 0) {
+ printf "%8s %12s %7s %7s %7s %7s %7s %7s %6s %5s\n",
+ "Time", "Int", "rKB/s", "wKB/s", "rPk/s", "wPk/s", "rAvs",
+ "wAvs", "%Util", "Sat";
+ }
+ elsif ($STYLE == 1) {
+ printf "%8s %12s %14s %14s\n", "Time", "Int", "rKB/s", "wKB/s";
+ }
+
+ $line = 0;
+ }
+
+ ### Get new data
+ my (@NetworkData) = fetch_net_data();
+
+ foreach my $network_data (@NetworkData) {
+
+ ### Extract values
+ my ($int, $rbytes, $wbytes, $rpackets, $wpackets, $speed, $sat, $time)
+ = split /:/, $network_data;
+
+ ### Retrieve old values
+ my ($old_rbytes, $old_wbytes, $old_rpackets, $old_wpackets, $old_sat,
+ $old_time);
+ if (defined $NetworkDataOld{$int}) {
+ ($old_rbytes, $old_wbytes, $old_rpackets, $old_wpackets,
+ $old_sat, $old_time) = split /:/, $NetworkDataOld{$int};
+ }
+ else {
+ $old_rbytes = $old_wbytes = $old_rpackets = $old_wpackets
+ = $old_sat = $old_time = 0;
+ }
+
+ #
+ # Calculate statistics
+ #
+
+ # delta time
+ my $tdiff = $time - $old_time;
+
+ # per second values
+ my $rbps = ($rbytes - $old_rbytes) / $tdiff;
+ my $wbps = ($wbytes - $old_wbytes) / $tdiff;
+ my $rkps = $rbps / 1024;
+ my $wkps = $wbps / 1024;
+ my $rpps = ($rpackets - $old_rpackets) / $tdiff;
+ my $wpps = ($wpackets - $old_wpackets) / $tdiff;
+ my $ravs = $rpps > 0 ? $rbps / $rpps : 0;
+ my $wavs = $wpps > 0 ? $wbps / $wpps : 0;
+
+ # skip zero lines if asked
+ next if $SKIPZERO and ($rbps + $wbps) == 0;
+
+ # % utilisation
+ my $util;
+ if ($speed > 0) {
+ # the following has a mysterious "800", it is 100
+ # for the % conversion, and 8 for bytes2bits.
+ my $rutil = $rbps * 800 / $speed;
+ my $wutil = $wbps * 800 / $speed;
+ $util = $rutil > $wutil ? $rutil : $wutil;
+ $util = 100 if $util > 100;
+ }
+ else {
+ $util = 0;
+ }
+
+ # saturation per sec
+ my $sats = ($sat - $old_sat) / $tdiff;
+
+ #
+ # Print statistics
+ #
+ if ($rbps ne "") {
+ my @Time = localtime();
+
+ if ($STYLE == 0) {
+ printf "%02d:%02d:%02d %12s ",
+ $Time[2], $Time[1], $Time[0], $int;
+ print_neat($rkps);
+ print_neat($wkps);
+ print_neat($rpps);
+ print_neat($wpps);
+ print_neat($ravs);
+ print_neat($wavs);
+ printf "%6.2f %5.2f\n", $util, $sats;
+ }
+ elsif ($STYLE == 1) {
+ printf "%02d:%02d:%02d %12s %14.3f %14.3f\n",
+ $Time[2], $Time[1], $Time[0], $int, $rkps, $wkps;
+ }
+
+ $line++;
+
+ # for multiple interfaces, always print the header
+ $line += $PAGESIZE if @NetworkData > 1;
+ }
+
+ ### Store old values
+ $NetworkDataOld{$int}
+ = "$rbytes:$wbytes:$rpackets:$wpackets:$sat:$time";
+ }
+
+ ### Check for end
+ last if ++$loop == $loop_max;
+
+ ### Interval
+ sleep $interval;
+}
+
+
+# find_nets - walk Kstat to discover network interfaces.
+#
+# This walks %Kstat and populates a %NetworkNames with discovered
+# network interfaces.
+#
+sub find_nets {
+ my $found = 0;
+
+ ### Loop over all Kstat modules
+ foreach my $module (keys %$Kstat) {
+ my $Modules = $Kstat->{$module};
+
+ foreach my $instance (keys %$Modules) {
+ my $Instances = $Modules->{$instance};
+
+ foreach my $name (keys %$Instances) {
+
+ ### Skip interface if asked
+ if ($NETWORKONLY) {
+ next unless $NetworkOnly{$name};
+ }
+
+ my $Names = $Instances->{$name};
+
+ # Check this is a network device.
+ # Matching on ifspeed has been more reliable than "class"
+ # we also match loopback and "link" interfaces.
+ if (defined $$Names{ifspeed} || $module eq "lo"
+ || $module eq "link") {
+ next if $name eq "mac";
+ if ($module eq "link") {
+ my $nname = $name;
+ $nname =~ s/\d+$//;
+ next unless defined $Network{$nname}
+ or $ZONENAME eq $nname
+ or $ZONENAME eq "global";
+ }
+ ### Save network interface
+ $NetworkNames{$name} = $Names;
+ $found++;
+ }
+ }
+ }
+ }
+
+ return $found;
+}
+
+# fetch - fetch Kstat data for the network interfaces.
+#
+# This uses the interfaces in %NetworkNames and returns useful Kstat data.
+# The Kstat values used are rbytes64, obytes64, ipackets64, opackets64
+# (or the 32 bit versions if the 64 bit values are not there).
+#
+sub fetch_net_data {
+ my ($rbytes, $wbytes, $rpackets, $wpackets, $speed, $time);
+ my @NetworkData = ();
+
+ $Kstat->update();
+
+ ### Loop over previously found network interfaces
+ foreach my $name (sort keys %NetworkNames) {
+ my $Names = $NetworkNames{$name};
+
+ if (defined $$Names{opackets}) {
+
+ ### Fetch write bytes
+ if (defined $$Names{obytes64}) {
+ $rbytes = $$Names{rbytes64};
+ $wbytes = $$Names{obytes64};
+ }
+ elsif (defined $$Names{obytes}) {
+ $rbytes = $$Names{rbytes};
+ $wbytes = $$Names{obytes};
+ } else {
+ $rbytes = $wbytes = 0;
+ }
+
+ ### Fetch read bytes
+ if (defined $$Names{opackets64}) {
+ $rpackets = $$Names{ipackets64};
+ $wpackets = $$Names{opackets64};
+ }
+ else {
+ $rpackets = $$Names{ipackets};
+ $wpackets = $$Names{opackets};
+ }
+
+ ### Fetch interface speed
+ if (defined $$Names{ifspeed}) {
+ $speed = $$Names{ifspeed};
+ }
+ else {
+ # if we can't fetch the speed, print the
+ # %Util as 0.0 . To do this we,
+ $speed = 2 ** 48;
+ }
+
+ ### Determine saturation value
+ my $sat = 0;
+ if (defined $$Names{nocanput} or defined $$Names{norcvbuf}) {
+ $sat += defined $$Names{defer} ? $$Names{defer} : 0;
+ $sat += defined $$Names{nocanput} ? $$Names{nocanput} : 0;
+ $sat += defined $$Names{norcvbuf} ? $$Names{norcvbuf} : 0;
+ $sat += defined $$Names{noxmtbuf} ? $$Names{noxmtbuf} : 0;
+ }
+
+ ### use the last snaptime value,
+ $time = $$Names{snaptime};
+
+ ### store data
+ push @NetworkData, "$name:$rbytes:$wbytes:" .
+ "$rpackets:$wpackets:$speed:$sat:$time";
+ }
+ }
+
+ return @NetworkData;
+}
+
+# print_neat - print a float with decimal places if appropriate.
+#
+# This specifically keeps the width to 7 characters, if possible, plus
+# a trailing space.
+#
+sub print_neat {
+ my $num = shift;
+ if ($num >= 100000) {
+ printf "%7d ", $num;
+ } elsif ($num >= 100) {
+ printf "%7.1f ", $num;
+ } else {
+ printf "%7.2f ", $num;
+ }
+}
+
+# usage - print usage and exit.
+#
+sub usage {
+ print STDERR <<END;
+USAGE: nicstat [-hsz] [-i int[,int...]] | [interval [count]]
+ eg, nicstat # print summary since boot
+ nicstat 1 # print continually every 1 second
+ nicstat 1 5 # print 5 times, every 1 second
+ nicstat -s # summary output
+ nicstat -i hme0 # print hme0 only
+END
+ exit 1;
+}
diff --git a/usr/src/cmd/nscd/Makefile b/usr/src/cmd/nscd/Makefile
index aa478fa3b0..1e51819bb2 100644
--- a/usr/src/cmd/nscd/Makefile
+++ b/usr/src/cmd/nscd/Makefile
@@ -29,6 +29,7 @@ MANIFEST= name-service-cache.xml
SVCMETHOD= svc-nscd
include ../Makefile.cmd
+include ../Makefile.ctf
ROOTMANIFESTDIR= $(ROOTSVCSYSTEM)
diff --git a/usr/src/cmd/nscd/svc-nscd b/usr/src/cmd/nscd/svc-nscd
index 0c6aa1bc4b..78b318bf87 100644
--- a/usr/src/cmd/nscd/svc-nscd
+++ b/usr/src/cmd/nscd/svc-nscd
@@ -23,8 +23,8 @@
#
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
+# Copyright 2012 Joyent, Inc. All rights reserved.
#
-#ident "%Z%%M% %I% %E% SMI"
. /lib/svc/share/smf_include.sh
@@ -51,7 +51,7 @@ if (smf_is_system_labeled); then
$SMF_FMRI`
fi
if [ "$duration" != "transient" ]; then
- ( while true ; do sleep 3600 ; done ) &
+ exit $SMF_EXIT_NODAEMON
fi
# The real daemon is not started in non-global zones,
diff --git a/usr/src/cmd/passwd/Makefile b/usr/src/cmd/passwd/Makefile
index 561357a16c..079e8b6050 100644
--- a/usr/src/cmd/passwd/Makefile
+++ b/usr/src/cmd/passwd/Makefile
@@ -33,6 +33,8 @@ lint := LDLIBS += -lpasswdutil
LDFLAGS += $(ZIGNORE)
LDLIBS += -lbsm -lpam -lnsl
+CPPFLAGS += -D__EXTENSIONS__
+
FILEMODE = 06555
XGETFLAGS += -a -x $(PROG).xcl
diff --git a/usr/src/cmd/pgrep/pgrep.c b/usr/src/cmd/pgrep/pgrep.c
index 4531f11267..857f6ef818 100644
--- a/usr/src/cmd/pgrep/pgrep.c
+++ b/usr/src/cmd/pgrep/pgrep.c
@@ -597,6 +597,8 @@ main(int argc, char *argv[])
const char *optstr;
optdesc_t *optd;
int nmatches, c;
+ const char *zroot;
+ char buf[PATH_MAX];
DIR *dirp;
@@ -626,6 +628,12 @@ main(int argc, char *argv[])
opterr = 0;
+ zroot = zone_get_nroot();
+ if (zroot != NULL) {
+ (void) snprintf(buf, sizeof (buf), "%s/%s", zroot, g_procdir);
+ g_procdir = buf;
+ }
+
while (optind < argc) {
while ((c = getopt(argc, argv, optstr)) != (int)EOF) {
diff --git a/usr/src/cmd/prstat/prstat.c b/usr/src/cmd/prstat/prstat.c
index fc16e435f6..982c860d0d 100644
--- a/usr/src/cmd/prstat/prstat.c
+++ b/usr/src/cmd/prstat/prstat.c
@@ -26,6 +26,7 @@
* Use is subject to license terms.
*
* Portions Copyright 2009 Chad Mynhier
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -181,6 +182,33 @@ static optdesc_t opts = {
-1 /* sort in decreasing order */
};
+
+static int
+proc_snprintf(char *_RESTRICT_KYWD s, size_t n,
+ const char *_RESTRICT_KYWD fmt, ...)
+{
+ static boolean_t ptools_zroot_valid = B_FALSE;
+ static const char *ptools_zroot = NULL;
+ va_list args;
+ int ret, nret = 0;
+
+ if (ptools_zroot_valid == B_FALSE) {
+ ptools_zroot_valid = B_TRUE;
+ ptools_zroot = zone_get_nroot();
+ }
+
+ if (ptools_zroot != NULL) {
+ nret = snprintf(s, n, "%s", ptools_zroot);
+ if (nret > n)
+ return (nret);
+ }
+ va_start(args, fmt);
+ ret = vsnprintf(s + nret, n - nret, fmt, args);
+ va_end(args);
+
+ return (ret + nret);
+}
+
/*
* Print timestamp as decimal reprentation of time_t value (-d u was specified)
* or the standard date format (-d d was specified).
@@ -236,7 +264,12 @@ list_getsize(list_t *list)
size_t i;
uint_t flags = 0;
int ret;
- size_t physmem = sysconf(_SC_PHYS_PAGES) * pagesize;
+ size_t physmem;
+
+ if (!(opts.o_outpmode & OPT_VMUSAGE))
+ return;
+
+ physmem = sysconf(_SC_PHYS_PAGES) * pagesize;
/*
* Determine what swap/rss results to calculate. getvmusage() will
@@ -848,9 +881,9 @@ lwp_update(lwp_info_t *lwp, pid_t pid, id_t lwpid, struct prusage *usage)
static int
read_procfile(fd_t **fd, char *pidstr, char *file, void *buf, size_t bufsize)
{
- char procfile[MAX_PROCFS_PATH];
+ char procfile[PATH_MAX];
- (void) snprintf(procfile, MAX_PROCFS_PATH,
+ (void) proc_snprintf(procfile, PATH_MAX,
"/proc/%s/%s", pidstr, file);
if ((*fd = fd_open(procfile, O_RDONLY, *fd)) == NULL)
return (1);
@@ -1376,6 +1409,7 @@ main(int argc, char **argv)
int timeout;
struct pollfd pollset;
char key;
+ char procpath[PATH_MAX];
(void) setlocale(LC_ALL, "");
(void) textdomain(TEXT_DOMAIN);
@@ -1386,7 +1420,7 @@ main(int argc, char **argv)
pagesize = sysconf(_SC_PAGESIZE);
while ((opt = getopt(argc, argv,
- "vcd:HmarRLtu:U:n:p:C:P:h:s:S:j:k:TJWz:Z")) != (int)EOF) {
+ "vVcd:HmarRLtu:U:n:p:C:P:h:s:S:j:k:TJWz:Z")) != (int)EOF) {
switch (opt) {
case 'r':
opts.o_outpmode |= OPT_NORESOLVE;
@@ -1466,6 +1500,9 @@ main(int argc, char **argv)
while (p = strtok(NULL, ", "))
add_uid(&ruid_tbl, p);
break;
+ case 'V':
+ opts.o_outpmode |= OPT_VMUSAGE;
+ break;
case 'p':
fill_table(&pid_tbl, optarg, 'p');
break;
@@ -1575,7 +1612,8 @@ main(int argc, char **argv)
list_setkeyfunc(NULL, &opts, &lgroups, LT_LGRPS);
if (opts.o_outpmode & OPT_TERMCAP)
curses_on();
- if ((procdir = opendir("/proc")) == NULL)
+ (void) proc_snprintf(procpath, sizeof (procpath), "/proc");
+ if ((procdir = opendir(procpath)) == NULL)
Die(gettext("cannot open /proc directory\n"));
if (opts.o_outpmode & OPT_TTY) {
(void) printf(gettext("Please wait...\r"));
diff --git a/usr/src/cmd/prstat/prstat.h b/usr/src/cmd/prstat/prstat.h
index 293123c5b9..bf38b3e2bd 100644
--- a/usr/src/cmd/prstat/prstat.h
+++ b/usr/src/cmd/prstat/prstat.h
@@ -26,6 +26,7 @@
* Use is subject to license terms.
*
* Portions Copyright 2009 Chad Mynhier
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
#ifndef _PRSTAT_H
@@ -72,6 +73,7 @@ extern "C" {
#define OPT_ZONES 0x2000 /* report about zones */
#define OPT_PSETS 0x4000 /* report for specified psets */
#define OPT_LGRP 0x8000 /* report home lgroups */
+#define OPT_VMUSAGE 0x10000 /* print accurate, but expensive RSS */
#define OPT_UDATE 0x20000 /* print unix timestamp */
#define OPT_DDATE 0x40000 /* print timestamp in date(1) format */
#define OPT_NORESOLVE 0x80000 /* no nsswitch lookups */
diff --git a/usr/src/cmd/prstat/prutil.c b/usr/src/cmd/prstat/prutil.c
index 0f9cbd6c4d..7def1bd40b 100644
--- a/usr/src/cmd/prstat/prutil.c
+++ b/usr/src/cmd/prstat/prutil.c
@@ -25,6 +25,7 @@
* Use is subject to license terms.
*
* Portions Copyright 2009 Chad Mynhier
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -108,7 +109,7 @@ void
Usage()
{
(void) fprintf(stderr, gettext(
- "Usage:\tprstat [-acHJLmrRtTvWZ] [-u euidlist] [-U uidlist]\n"
+ "Usage:\tprstat [-acHJLmrRtTvVWZ] [-u euidlist] [-U uidlist]\n"
"\t[-p pidlist] [-P cpulist] [-C psrsetlist] [-h lgrouplist]\n"
"\t[-j projidlist] [-k taskidlist] [-z zoneidlist]\n"
"\t[-s key | -S key] [-n nprocs[,nusers]] [-d d|u]\n"
diff --git a/usr/src/cmd/prtconf/prtconf.c b/usr/src/cmd/prtconf/prtconf.c
index b7551166cf..0837948936 100644
--- a/usr/src/cmd/prtconf/prtconf.c
+++ b/usr/src/cmd/prtconf/prtconf.c
@@ -347,10 +347,11 @@ main(int argc, char *argv[])
sizeof (hw_provider));
/*
* If 0 bytes are returned (the system returns '1', for the \0),
- * we're probably on x86, default to "Unknown Hardware Vendor".
+ * we're probably on x86, and there has been no si-hw-provider
+ * set in /etc/bootrc, default to Joyent.
*/
if (ret <= 1) {
- (void) strncpy(hw_provider, "Unknown Hardware Vendor",
+ (void) strncpy(hw_provider, "Joyent",
sizeof (hw_provider));
}
(void) printf("System Configuration: %s %s\n", hw_provider,
diff --git a/usr/src/cmd/ps/ps.c b/usr/src/cmd/ps/ps.c
index 78dabbccfe..2773444803 100644
--- a/usr/src/cmd/ps/ps.c
+++ b/usr/src/cmd/ps/ps.c
@@ -27,7 +27,7 @@
*/
/*
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -286,7 +286,7 @@ static int nzoneid = 0;
static int kbytes_per_page;
static int pidwidth;
-static char *procdir = "/proc"; /* standard /proc directory */
+static char procdir[MAXPATHLEN]; /* standard /proc directory */
static struct ughead euid_tbl; /* table to store selected euid's */
static struct ughead ruid_tbl; /* table to store selected real uid's */
@@ -339,6 +339,14 @@ int
main(int argc, char **argv)
{
const char *me;
+ const char *zroot = zone_get_nroot();
+
+ /*
+ * If this is a branded zone, the native procfs may mounted in a
+ * non-standard location. Apply such a path prefix if it exists.
+ */
+ (void) snprintf(procdir, sizeof (procdir), "%s/proc", zroot != NULL ?
+ zroot : "");
/*
* The original two ps'es are linked in a single binary;
@@ -2077,7 +2085,7 @@ print_zombie_field(psinfo_t *psinfo, struct field *f, const char *ttyp)
static void
pr_fields(psinfo_t *psinfo, const char *ttyp,
- void (*print_fld)(psinfo_t *, struct field *, const char *))
+ void (*print_fld)(psinfo_t *, struct field *, const char *))
{
struct field *f;
diff --git a/usr/src/cmd/ptools/Makefile.amd64 b/usr/src/cmd/ptools/Makefile.amd64
index fb0b2d07f8..5e9b4111b6 100644
--- a/usr/src/cmd/ptools/Makefile.amd64
+++ b/usr/src/cmd/ptools/Makefile.amd64
@@ -23,8 +23,6 @@
# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
-#
include ../../../Makefile.cmd
include ../../../Makefile.cmd.64
@@ -33,6 +31,7 @@ CPPFLAGS += -D_SYSCALL32
CFLAGS64 += $(CCVERBOSE)
ROOTISAPROG=$(ROOTBIN64)/$(PROG)
+ROOTISALN=$(LN_$(PROG):%=$(ROOTBIN64)/%)
include ../../Makefile.bld
diff --git a/usr/src/cmd/ptools/Makefile.bld b/usr/src/cmd/ptools/Makefile.bld
index f5b50f5ea1..a295ce5259 100644
--- a/usr/src/cmd/ptools/Makefile.bld
+++ b/usr/src/cmd/ptools/Makefile.bld
@@ -22,10 +22,13 @@
#
# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
+# Copyright 2015 Joyent, Inc.
#
PROG:sh = basename `cd ..; pwd`
+include ../../../Makefile.ctf
+
OBJS = $(PROG).o
SRCS = ../$(PROG).c
@@ -69,6 +72,12 @@ CERRWARN_pargs += -_gcc=-Wno-type-limits
CERRWARN += $(CERRWARN_$(PROG))
+#
+# Common code definitions
+#
+COBJS = ptools_common.o
+CINC = -I../../common
+
# pargs depends on ../../common/elfcap components
# pmadvise depends on pmap components
@@ -79,14 +88,34 @@ CPPFLAGS_pargs = -I$(ELFCAP)
OBJS_pargs = elfcap.o
SRCS_pargs = $(ELFCAP)/elfcap.c
-CPPFLAGS_pmap = -I$(PMAP)
-OBJS_pmap = pmap_common.o
+CPPFLAGS_pmap = -I$(PMAP) $(CINC)
+OBJS_pmap = pmap_common.o $(COBJS)
SRCS_pmap = $(PMAP)/pmap_common.c
-CPPFLAGS_pmadvise = -I$(PMAP)
-OBJS_pmadvise = pmap_common.o
+CPPFLAGS_pmadvise = -I$(PMAP) $(CINC)
+OBJS_pmadvise = pmap_common.o $(COBJS)
SRCS_pmadvise = $(PMAP)/pmap_common.c
+CPPFLAGS_preap = $(CINC)
+OBJS_preap = $(COBJS)
+
+CPPFLAGS_psig = $(CINC)
+OBJS_psig = $(COBJS)
+
+CPPFLAGS_ptime = $(CINC)
+OBJS_ptime = $(COBJS)
+
+CPPFLAGS_ptree = $(CINC)
+OBJS_ptree = $(COBJS)
+
+CPPFLAGS_pwait = $(CINC)
+OBJS_pwait = $(COBJS)
+
+CPPFLAGS_pwdx = $(CINC)
+OBJS_pwdx = $(COBJS)
+
+LN_pargs = penv pauxv
+
CPPFLAGS += $(CPPFLAGS_$(PROG))
OBJS += $(OBJS_$(PROG))
SRCS += $(SRCS_$(PROG))
@@ -99,15 +128,23 @@ INSTALL_LEGACY=$(RM) $(ROOTPROCBINSYMLINK) ; \
elfcap.o: $(ELFCAP)/elfcap.c
$(COMPILE.c) -o $@ $(ELFCAP)/elfcap.c
+ $(POST_PROCESS_O)
pmap_common.o: $(PMAP)/pmap_common.c
$(COMPILE.c) -o $@ $(PMAP)/pmap_common.c
+ $(POST_PROCESS_O)
%.o: ../%.c
$(COMPILE.c) $<
+ $(POST_PROCESS_O)
+
+%.o: ../../common/%.c
+ $(COMPILE.c) $<
+ $(POST_PROCESS_O)
-all: $(PROG)
+all: $(PROG) $(LN_$(PROG))
+ROOTBINLN=$(LN_$(PROG):%=$(ROOTBIN)/%)
ROOTBINPROG=$(ROOTBIN)/$(PROG)
ROOTPROCBINSYMLINK=$(ROOT)/usr/proc/bin/$(PROG)
@@ -119,11 +156,23 @@ $(PROG): $$(OBJS)
# Install the ptool, symlinking it into /usr/proc/bin if PTOOL_TYPE is set
# to LEGACY.
#
-install: all $(ROOTISAPROG)
+install: all $(ROOTISAPROG) $(ROOTISALN)
-$(RM) $(ROOTBINPROG)
-$(LN) $(ISAEXEC) $(ROOTBINPROG)
-$(INSTALL_$(PTOOL_TYPE))
+$(ROOTBINLN):
+ -$(RM) $@
+ -$(LN) $(ISAEXEC) $@
+
+$(ROOTISALN): $(ROOTISAPROG)
+ -$(RM) $@
+ -$(LN) $(ROOTISAPROG) $@
+
+$(LN_$(PROG)): $(PROG)
+ -$(RM) $@
+ -$(LN) $(PROG) $@
+
clean:
$(RM) $(OBJS)
diff --git a/usr/src/cmd/ptools/Makefile.i386 b/usr/src/cmd/ptools/Makefile.i386
index 59f42e19a0..9f4ea86151 100644
--- a/usr/src/cmd/ptools/Makefile.i386
+++ b/usr/src/cmd/ptools/Makefile.i386
@@ -20,8 +20,6 @@
# CDDL HEADER END
#
#
-#ident "%Z%%M% %I% %E% SMI"
-#
# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
@@ -31,6 +29,7 @@ include ../../../Makefile.cmd
CFLAGS += $(CCVERBOSE)
ROOTISAPROG=$(ROOTBIN32)/$(PROG)
+ROOTISALN=$(LN_$(PROG):%=$(ROOTBIN32)/%)
include ../../Makefile.bld
diff --git a/usr/src/cmd/ptools/Makefile.ptool b/usr/src/cmd/ptools/Makefile.ptool
index c010eee64f..6a4cdcaa8e 100644
--- a/usr/src/cmd/ptools/Makefile.ptool
+++ b/usr/src/cmd/ptools/Makefile.ptool
@@ -20,8 +20,6 @@
# CDDL HEADER END
#
#
-#ident "%Z%%M% %I% %E% SMI"
-#
# Copyright 2003 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
diff --git a/usr/src/cmd/ptools/Makefile.sparcv9 b/usr/src/cmd/ptools/Makefile.sparcv9
index fb0b2d07f8..5e9b4111b6 100644
--- a/usr/src/cmd/ptools/Makefile.sparcv9
+++ b/usr/src/cmd/ptools/Makefile.sparcv9
@@ -23,8 +23,6 @@
# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
-#
include ../../../Makefile.cmd
include ../../../Makefile.cmd.64
@@ -33,6 +31,7 @@ CPPFLAGS += -D_SYSCALL32
CFLAGS64 += $(CCVERBOSE)
ROOTISAPROG=$(ROOTBIN64)/$(PROG)
+ROOTISALN=$(LN_$(PROG):%=$(ROOTBIN64)/%)
include ../../Makefile.bld
diff --git a/usr/src/cmd/ptools/common/ptools_common.c b/usr/src/cmd/ptools/common/ptools_common.c
new file mode 100644
index 0000000000..a747ab213e
--- /dev/null
+++ b/usr/src/cmd/ptools/common/ptools_common.c
@@ -0,0 +1,50 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/feature_tests.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <zone.h>
+
+/*
+ * Common routines for ptools.
+ */
+
+int
+proc_snprintf(char *_RESTRICT_KYWD s, size_t n,
+ const char *_RESTRICT_KYWD fmt, ...)
+{
+ static boolean_t ptools_zroot_valid = B_FALSE;
+ static const char *ptools_zroot = NULL;
+ va_list args;
+ int ret, nret = 0;
+
+ if (ptools_zroot_valid == B_FALSE) {
+ ptools_zroot_valid = B_TRUE;
+ ptools_zroot = zone_get_nroot();
+ }
+
+ if (ptools_zroot != NULL) {
+ nret = snprintf(s, n, "%s", ptools_zroot);
+ if (nret > n)
+ return (nret);
+ }
+ va_start(args, fmt);
+ ret = vsnprintf(s + nret, n - nret, fmt, args);
+ va_end(args);
+
+ return (ret + nret);
+}
diff --git a/usr/src/cmd/ptools/common/ptools_common.h b/usr/src/cmd/ptools/common/ptools_common.h
new file mode 100644
index 0000000000..52bcae9e51
--- /dev/null
+++ b/usr/src/cmd/ptools/common/ptools_common.h
@@ -0,0 +1,36 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _PTOOLS_COMMON_H
+#define _PTOOLS_COMMON_H
+
+#include <sys/feature_tests.h>
+
+/*
+ * Common functions for the ptools.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int proc_snprintf(char *_RESTRICT_KYWD, size_t,
+ const char *_RESTRICT_KYWD, ...);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PTOOLS_COMMON_H */
diff --git a/usr/src/cmd/ptools/pargs/pargs.c b/usr/src/cmd/ptools/pargs/pargs.c
index c363b34a7c..fd2be024f3 100644
--- a/usr/src/cmd/ptools/pargs/pargs.c
+++ b/usr/src/cmd/ptools/pargs/pargs.c
@@ -23,7 +23,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
*/
/*
@@ -73,6 +73,13 @@
#include <wctype.h>
#include <widec.h>
#include <elfcap.h>
+#include <libgen.h>
+
+typedef enum pargs_cmd {
+ PARGS_ARGV,
+ PARGS_ENV,
+ PARGS_AUXV
+} pargs_cmd_t;
typedef struct pargs_data {
struct ps_prochandle *pd_proc; /* target proc handle */
@@ -797,6 +804,7 @@ static struct aux_id aux_arr[] = {
{ AT_BASE, "AT_BASE", at_null },
{ AT_FLAGS, "AT_FLAGS", at_null },
{ AT_ENTRY, "AT_ENTRY", at_null },
+ { AT_RANDOM, "AT_RANDOM", at_null },
{ AT_SUN_UID, "AT_SUN_UID", at_uid },
{ AT_SUN_RUID, "AT_SUN_RUID", at_uid },
{ AT_SUN_GID, "AT_SUN_GID", at_gid },
@@ -816,9 +824,12 @@ static struct aux_id aux_arr[] = {
{ AT_SUN_AUXFLAGS, "AT_SUN_AUXFLAGS", at_flags },
{ AT_SUN_EMULATOR, "AT_SUN_EMULATOR", at_str },
{ AT_SUN_BRANDNAME, "AT_SUN_BRANDNAME", at_str },
+ { AT_SUN_BRAND_NROOT, "AT_SUN_BRAND_NROOT", at_str },
{ AT_SUN_BRAND_AUX1, "AT_SUN_BRAND_AUX1", at_null },
{ AT_SUN_BRAND_AUX2, "AT_SUN_BRAND_AUX2", at_null },
- { AT_SUN_BRAND_AUX3, "AT_SUN_BRAND_AUX3", at_null }
+ { AT_SUN_BRAND_AUX3, "AT_SUN_BRAND_AUX3", at_null },
+ { AT_SUN_BRAND_AUX4, "AT_SUN_BRAND_AUX4", at_null },
+ { AT_SUN_COMMPAGE, "AT_SUN_COMMPAGE", at_null }
};
#define N_AT_ENTS (sizeof (aux_arr) / sizeof (struct aux_id))
@@ -1279,19 +1290,24 @@ main(int argc, char *argv[])
int opt;
int error = 1;
core_content_t content = 0;
+ pargs_cmd_t cmd = PARGS_ARGV;
(void) setlocale(LC_ALL, "");
- if ((command = strrchr(argv[0], '/')) != NULL)
- command++;
- else
- command = argv[0];
+ command = basename(argv[0]);
+
+ if (strcmp(command, "penv") == 0)
+ cmd = PARGS_ENV;
+ else if (strcmp(command, "pauxv") == 0)
+ cmd = PARGS_AUXV;
while ((opt = getopt(argc, argv, "acelxF")) != EOF) {
switch (opt) {
case 'a': /* show process arguments */
content |= CC_CONTENT_STACK;
aflag++;
+ if (cmd != PARGS_ARGV)
+ errflg++;
break;
case 'c': /* force 7-bit ascii */
cflag++;
@@ -1299,13 +1315,19 @@ main(int argc, char *argv[])
case 'e': /* show environment variables */
content |= CC_CONTENT_STACK;
eflag++;
+ if (cmd != PARGS_ARGV)
+ errflg++;
break;
case 'l':
lflag++;
aflag++; /* -l implies -a */
+ if (cmd != PARGS_ARGV)
+ errflg++;
break;
case 'x': /* show aux vector entries */
xflag++;
+ if (cmd != PARGS_ARGV)
+ errflg++;
break;
case 'F':
/*
@@ -1322,8 +1344,19 @@ main(int argc, char *argv[])
/* -a is the default if no options are specified */
if ((aflag + eflag + xflag + lflag) == 0) {
- aflag++;
- content |= CC_CONTENT_STACK;
+ switch (cmd) {
+ case PARGS_ARGV:
+ aflag++;
+ content |= CC_CONTENT_STACK;
+ break;
+ case PARGS_ENV:
+ content |= CC_CONTENT_STACK;
+ eflag++;
+ break;
+ case PARGS_AUXV:
+ xflag++;
+ break;
+ }
}
/* -l cannot be used with the -x or -e flags */
diff --git a/usr/src/cmd/ptools/pfiles/pfiles.c b/usr/src/cmd/ptools/pfiles/pfiles.c
index 0b84047806..1c0c30172a 100644
--- a/usr/src/cmd/ptools/pfiles/pfiles.c
+++ b/usr/src/cmd/ptools/pfiles/pfiles.c
@@ -24,7 +24,7 @@
* Copyright 2012 DEY Storage Systems, Inc. All rights reserved.
*/
/*
- * Copyright (c) 2013 Joyent, Inc. All Rights reserved.
+ * Copyright (c) 2014 Joyent, Inc. All Rights reserved.
*/
#include <stdio.h>
@@ -557,6 +557,7 @@ show_sockaddr(const char *str, struct sockaddr *sa, socklen_t len)
case AF_IPX: p = "AF_IPX"; break;
case AF_ROUTE: p = "AF_ROUTE"; break;
case AF_LINK: p = "AF_LINK"; break;
+ case AF_LX_NETLINK: p = "AF_LX_NETLINK"; break;
}
(void) printf("\t%s: %s\n", str, p);
diff --git a/usr/src/cmd/ptools/pflags/pflags.c b/usr/src/cmd/ptools/pflags/pflags.c
index 8054a80d3c..f19a945d95 100644
--- a/usr/src/cmd/ptools/pflags/pflags.c
+++ b/usr/src/cmd/ptools/pflags/pflags.c
@@ -25,7 +25,7 @@
*/
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#include <stdio.h>
@@ -469,6 +469,9 @@ prwhy(int why)
case PR_SUSPENDED:
str = "PR_SUSPENDED";
break;
+ case PR_BRAND:
+ str = "PR_BRAND";
+ break;
default:
str = buf;
(void) sprintf(str, "%d", why);
diff --git a/usr/src/cmd/ptools/pmap/pmap.c b/usr/src/cmd/ptools/pmap/pmap.c
index 78bfa6b596..03f8bde791 100644
--- a/usr/src/cmd/ptools/pmap/pmap.c
+++ b/usr/src/cmd/ptools/pmap/pmap.c
@@ -22,6 +22,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#include <stdio.h>
@@ -42,6 +43,7 @@
#include <sys/mman.h>
#include <sys/lgrp_user.h>
#include <libproc.h>
+#include "ptools_common.h"
#include "pmap_common.h"
@@ -199,7 +201,7 @@ main(int argc, char **argv)
const char *bar;
struct rlimit rlim;
struct stat64 statbuf;
- char buf[128];
+ char buf[PATH_MAX];
int mapfd;
int prg_gflags = PGRAB_RDONLY;
int prr_flags = 0;
@@ -358,7 +360,7 @@ main(int argc, char **argv)
proc_unctrl_psinfo(&psinfo);
if (Pstate(Pr) != PS_DEAD) {
- (void) snprintf(buf, sizeof (buf),
+ (void) proc_snprintf(buf, sizeof (buf),
"/proc/%d/map", (int)psinfo.pr_pid);
if ((mapfd = open(buf, O_RDONLY)) < 0) {
(void) fprintf(stderr, "%s: cannot "
@@ -590,7 +592,7 @@ rmapping_iter(struct ps_prochandle *Pr, proc_map_f *func, void *cd)
prmap_t *prmapp, *pmp;
ssize_t n;
- (void) snprintf(mapname, sizeof (mapname),
+ (void) proc_snprintf(mapname, sizeof (mapname),
"/proc/%d/rmap", (int)Pstatus(Pr)->pr_pid);
if ((mapfd = open(mapname, O_RDONLY)) < 0 || fstat(mapfd, &st) != 0) {
@@ -631,7 +633,7 @@ xmapping_iter(struct ps_prochandle *Pr, proc_xmap_f *func, void *cd, int doswap)
prxmap_t *prmapp, *pmp;
ssize_t n;
- (void) snprintf(mapname, sizeof (mapname),
+ (void) proc_snprintf(mapname, sizeof (mapname),
"/proc/%d/xmap", (int)Pstatus(Pr)->pr_pid);
if ((mapfd = open(mapname, O_RDONLY)) < 0 || fstat(mapfd, &st) != 0) {
diff --git a/usr/src/cmd/ptools/pmap/pmap_common.c b/usr/src/cmd/ptools/pmap/pmap_common.c
index fff55ffdbc..81f42d67f7 100644
--- a/usr/src/cmd/ptools/pmap/pmap_common.c
+++ b/usr/src/cmd/ptools/pmap/pmap_common.c
@@ -37,6 +37,7 @@
#include <sys/types.h>
#include "pmap_common.h"
+#include "ptools_common.h"
/*
* We compare the high memory addresses since stacks are faulted in from
@@ -88,7 +89,7 @@ make_name(struct ps_prochandle *Pr, int lflag, uintptr_t addr,
return (NULL);
/* first see if we can find a path via /proc */
- (void) snprintf(path, sizeof (path), "/proc/%d/path/%s",
+ (void) proc_snprintf(path, sizeof (path), "/proc/%d/path/%s",
(int)Psp->pr_pid, mapname);
len = readlink(path, buf, bufsz - 1);
if (len >= 0) {
@@ -97,7 +98,7 @@ make_name(struct ps_prochandle *Pr, int lflag, uintptr_t addr,
}
/* fall back to object information reported by /proc */
- (void) snprintf(path, sizeof (path),
+ (void) proc_snprintf(path, sizeof (path),
"/proc/%d/object/%s", (int)Psp->pr_pid, mapname);
if (stat(path, &statb) == 0) {
dev_t dev = statb.st_dev;
diff --git a/usr/src/cmd/ptools/preap/preap.c b/usr/src/cmd/ptools/preap/preap.c
index 8d30b8027c..6d8eb75611 100644
--- a/usr/src/cmd/ptools/preap/preap.c
+++ b/usr/src/cmd/ptools/preap/preap.c
@@ -24,8 +24,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
@@ -37,6 +35,8 @@
#include <sys/types.h>
#include <sys/wait.h>
#include <libproc.h>
+#include <limits.h>
+#include "ptools_common.h"
#define NOREAP_TIME 60 /* wait 60 seconds before allow a reap */
@@ -53,11 +53,11 @@ intr(int sig)
static int
open_usage(pid_t pid, int *perr)
{
- char path[64];
+ char path[PATH_MAX];
struct stat64 st;
int fd;
- (void) snprintf(path, sizeof (path), "/proc/%d/usage", (int)pid);
+ (void) proc_snprintf(path, sizeof (path), "/proc/%d/usage", (int)pid);
/*
* Attempt to open the usage file, and return the fd if we can
diff --git a/usr/src/cmd/ptools/psig/psig.c b/usr/src/cmd/ptools/psig/psig.c
index 70af35cb5e..848fda834c 100644
--- a/usr/src/cmd/ptools/psig/psig.c
+++ b/usr/src/cmd/ptools/psig/psig.c
@@ -21,10 +21,9 @@
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <stdio.h>
#include <stdio_ext.h>
#include <stdlib.h>
@@ -37,6 +36,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <libproc.h>
+#include "ptools_common.h"
/* evil knowledge of libc internals */
#include "../../../lib/libc/inc/thr_uberdata.h"
@@ -172,7 +172,7 @@ lwp_iter(void *cd, const lwpstatus_t *lwpstatus)
static int
look(char *arg)
{
- char pathname[100];
+ char pathname[PATH_MAX];
struct stat statb;
int fd = -1;
int sig, gcode;
@@ -199,7 +199,8 @@ look(char *arg)
(void) memcpy(&psinfo, psinfop, sizeof (psinfo_t));
proc_unctrl_psinfo(&psinfo);
- (void) sprintf(pathname, "/proc/%d/sigact", (int)psinfo.pr_pid);
+ (void) proc_snprintf(pathname, sizeof (pathname), "/proc/%d/sigact",
+ (int)psinfo.pr_pid);
if ((fd = open(pathname, O_RDONLY)) < 0) {
perr("open sigact");
goto look_error;
@@ -213,8 +214,8 @@ look(char *arg)
action = malloc(maxsig * sizeof (struct sigaction));
if (action == NULL) {
(void) fprintf(stderr,
- "%s: cannot malloc() space for %d sigaction structures\n",
- command, maxsig);
+ "%s: cannot malloc() space for %d sigaction structures\n",
+ command, maxsig);
goto look_error;
}
if (read(fd, (char *)action, maxsig * sizeof (struct sigaction)) !=
@@ -239,7 +240,7 @@ look(char *arg)
if (psinfo.pr_dmodel != PR_MODEL_NATIVE) {
caddr32_t addr;
aharraddr = uberaddr +
- offsetof(uberdata32_t, siguaction);
+ offsetof(uberdata32_t, siguaction);
aharrlen = sizeof (siguaction32_t) * NSIG;
(void) Pread(Pr, &addr, sizeof (addr),
uberaddr + offsetof(uberdata32_t, sigacthandler));
@@ -248,7 +249,7 @@ look(char *arg)
#endif
{
aharraddr = uberaddr +
- offsetof(uberdata_t, siguaction);
+ offsetof(uberdata_t, siguaction);
aharrlen = sizeof (siguaction_t) * NSIG;
(void) Pread(Pr, &intfnaddr, sizeof (intfnaddr),
uberaddr + offsetof(uberdata_t, sigacthandler));
@@ -323,7 +324,7 @@ look(char *arg)
(void) printf("\t%-8s", hname);
else
(void) printf("\t0x%-8lx",
- (ulong_t)haddr);
+ (ulong_t)haddr);
s = sigflags(sig, sp->sa_flags);
(void) printf("%s", (*s != '\0')? s : "\t0");
@@ -334,7 +335,7 @@ look(char *arg)
}
} else if (sig == SIGCLD) {
s = sigflags(sig,
- sp->sa_flags & (SA_NOCLDWAIT|SA_NOCLDSTOP));
+ sp->sa_flags & (SA_NOCLDWAIT|SA_NOCLDSTOP));
if (*s != '\0')
(void) printf("\t\t%s", s);
}
@@ -371,7 +372,7 @@ sigflags(int sig, int flags)
static char code_buf[100];
char *str = code_buf;
int flagmask =
- (SA_ONSTACK|SA_RESETHAND|SA_RESTART|SA_SIGINFO|SA_NODEFER);
+ (SA_ONSTACK|SA_RESETHAND|SA_RESTART|SA_SIGINFO|SA_NODEFER);
if (sig == SIGCLD)
flagmask |= (SA_NOCLDSTOP|SA_NOCLDWAIT);
@@ -414,19 +415,19 @@ deinterpose(int sig, void *aharr, psinfo_t *psinfo, struct sigaction *sp)
#ifdef _LP64
if (psinfo->pr_dmodel != PR_MODEL_NATIVE) {
struct sigaction32 *sa32 = (struct sigaction32 *)
- ((uintptr_t)aharr + sig * sizeof (siguaction32_t) +
- offsetof(siguaction32_t, sig_uaction));
+ ((uintptr_t)aharr + sig * sizeof (siguaction32_t) +
+ offsetof(siguaction32_t, sig_uaction));
sp->sa_flags = sa32->sa_flags;
sp->sa_handler = (void (*)())(uintptr_t)sa32->sa_handler;
(void) memcpy(&sp->sa_mask, &sa32->sa_mask,
- sizeof (sp->sa_mask));
+ sizeof (sp->sa_mask));
} else
#endif
{
struct sigaction *sa = (struct sigaction *)
- ((uintptr_t)aharr + sig * sizeof (siguaction_t) +
- offsetof(siguaction_t, sig_uaction));
+ ((uintptr_t)aharr + sig * sizeof (siguaction_t) +
+ offsetof(siguaction_t, sig_uaction));
sp->sa_flags = sa->sa_flags;
sp->sa_handler = sa->sa_handler;
diff --git a/usr/src/cmd/ptools/ptime/ptime.c b/usr/src/cmd/ptools/ptime/ptime.c
index 2da2f8d281..b1f53593c2 100644
--- a/usr/src/cmd/ptools/ptime/ptime.c
+++ b/usr/src/cmd/ptools/ptime/ptime.c
@@ -41,6 +41,8 @@
#include <sys/time.h>
#include <signal.h>
#include <libproc.h>
+#include <limits.h>
+#include "ptools_common.h"
static int look(pid_t);
static void hr_min_sec(char *, long);
@@ -189,7 +191,7 @@ main(int argc, char **argv)
static int
look(pid_t pid)
{
- char pathname[100];
+ char pathname[PATH_MAX];
int rval = 0;
int fd;
psinfo_t psinfo;
@@ -203,7 +205,8 @@ look(pid_t pid)
if (proc_get_psinfo(pid, &psinfo) < 0)
return (perr("read psinfo"));
- (void) sprintf(pathname, "/proc/%d/usage", (int)pid);
+ (void) proc_snprintf(pathname, sizeof (pathname), "/proc/%d/usage",
+ (int)pid);
if ((fd = open(pathname, O_RDONLY)) < 0)
return (perr("open usage"));
diff --git a/usr/src/cmd/ptools/ptree/ptree.c b/usr/src/cmd/ptools/ptree/ptree.c
index 27fef9b7f0..92a65e2f44 100644
--- a/usr/src/cmd/ptools/ptree/ptree.c
+++ b/usr/src/cmd/ptools/ptree/ptree.c
@@ -21,14 +21,13 @@
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
/*
* ptree -- print family tree of processes
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <assert.h>
#include <stdio.h>
#include <string.h>
@@ -48,6 +47,7 @@
#include <sys/ctfs.h>
#include <libcontract_priv.h>
#include <sys/stat.h>
+#include "ptools_common.h"
#define FAKEDPID0(p) (p->pid == 0 && p->psargs[0] == '\0')
@@ -103,10 +103,11 @@ main(int argc, char **argv)
char *s;
int n;
int retc = 0;
+ char ppath[PATH_MAX];
DIR *dirp;
struct dirent *dentp;
- char pname[100];
+ char pname[PATH_MAX];
int pdlen;
ps_t *p;
@@ -169,16 +170,18 @@ main(int argc, char **argv)
psize = 0;
ps = NULL;
+ (void) proc_snprintf(ppath, sizeof (ppath), "/proc");
+
/*
* Search the /proc directory for all processes.
*/
- if ((dirp = opendir("/proc")) == NULL) {
- (void) fprintf(stderr, "%s: cannot open /proc directory\n",
- command);
+ if ((dirp = opendir(ppath)) == NULL) {
+ (void) fprintf(stderr, "%s: cannot open %s directory\n",
+ command, ppath);
return (1);
}
- (void) strcpy(pname, "/proc");
+ (void) strcpy(pname, ppath);
pdlen = strlen(pname);
pname[pdlen++] = '/';
diff --git a/usr/src/cmd/ptools/pwait/pwait.c b/usr/src/cmd/ptools/pwait/pwait.c
index 0733c355cf..ec11573477 100644
--- a/usr/src/cmd/ptools/pwait/pwait.c
+++ b/usr/src/cmd/ptools/pwait/pwait.c
@@ -23,8 +23,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <stdio.h>
#include <stdio_ext.h>
#include <ctype.h>
@@ -39,6 +37,8 @@
#include <poll.h>
#include <procfs.h>
#include <sys/resource.h>
+#include <limits.h>
+#include "ptools_common.h"
static int count_my_files();
static char *command;
@@ -49,6 +49,7 @@ static char *command;
int
main(int argc, char **argv)
{
+ char buf[PATH_MAX];
unsigned long remain = 0;
struct pollfd *pollfd;
struct pollfd *pfd;
@@ -75,10 +76,12 @@ main(int argc, char **argv)
(void) fprintf(stderr, "usage:\t%s [-v] pid ...\n", command);
(void) fprintf(stderr, " (wait for processes to terminate)\n");
(void) fprintf(stderr,
- " -v: verbose; report terminations to standard out\n");
+ " -v: verbose; report terminations to standard out\n");
return (2);
}
+ (void) proc_snprintf(buf, sizeof (buf), "/proc/");
+
/* make sure we have enough file descriptors */
if (getrlimit(RLIMIT_NOFILE, &rlim) == 0) {
int nfiles = count_my_files();
@@ -87,8 +90,8 @@ main(int argc, char **argv)
rlim.rlim_cur = argc + nfiles + SLOP;
if (setrlimit(RLIMIT_NOFILE, &rlim) != 0) {
(void) fprintf(stderr,
- "%s: insufficient file descriptors\n",
- command);
+ "%s: insufficient file descriptors\n",
+ command);
return (2);
}
}
@@ -108,11 +111,11 @@ main(int argc, char **argv)
if (strchr(arg, '/') != NULL)
(void) strncpy(psinfofile, arg, sizeof (psinfofile));
else {
- (void) strcpy(psinfofile, "/proc/");
+ (void) strcpy(psinfofile, buf);
(void) strncat(psinfofile, arg, sizeof (psinfofile)-6);
}
(void) strncat(psinfofile, "/psinfo",
- sizeof (psinfofile)-strlen(psinfofile));
+ sizeof (psinfofile)-strlen(psinfofile));
pfd = &pollfd[i];
if ((pfd->fd = open(psinfofile, O_RDONLY)) >= 0) {
@@ -126,7 +129,7 @@ main(int argc, char **argv)
pfd->revents = 0;
} else if (errno == ENOENT) {
(void) fprintf(stderr, "%s: no such process: %s\n",
- command, arg);
+ command, arg);
} else {
perror(arg);
}
@@ -160,9 +163,9 @@ main(int argc, char **argv)
if (pread(pfd->fd, &psinfo,
sizeof (psinfo), (off_t)0)
== sizeof (psinfo)) {
- (void) printf(
- "%s: terminated, wait status 0x%.4x\n",
- arg, psinfo.pr_wstat);
+ (void) printf("%s: terminated, "
+ "wait status 0x%.4x\n",
+ arg, psinfo.pr_wstat);
} else {
(void) printf(
"%s: terminated\n", arg);
@@ -170,10 +173,10 @@ main(int argc, char **argv)
}
if (pfd->revents & POLLNVAL)
(void) printf("%s: system process\n",
- arg);
+ arg);
if (pfd->revents & ~(POLLPRI|POLLHUP|POLLNVAL))
(void) printf("%s: unknown error\n",
- arg);
+ arg);
}
(void) close(pfd->fd);
diff --git a/usr/src/cmd/ptools/pwdx/pwdx.c b/usr/src/cmd/ptools/pwdx/pwdx.c
index adf42c0877..4a2c6f0c3f 100644
--- a/usr/src/cmd/ptools/pwdx/pwdx.c
+++ b/usr/src/cmd/ptools/pwdx/pwdx.c
@@ -22,10 +22,9 @@
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <stdio.h>
#include <unistd.h>
#include <string.h>
@@ -33,6 +32,8 @@
#include <libproc.h>
#include <sys/param.h>
+#include "ptools_common.h"
+
static char *command;
static int
@@ -49,7 +50,7 @@ show_cwd(const char *arg)
return (1);
}
- (void) snprintf(proc, sizeof (proc), "/proc/%d/path/cwd",
+ (void) proc_snprintf(proc, sizeof (proc), "/proc/%d/path/cwd",
(int)p.pr_pid);
if ((ret = readlink(proc, cwd, sizeof (cwd) - 1)) <= 0) {
diff --git a/usr/src/cmd/rcap/common/utils.c b/usr/src/cmd/rcap/common/utils.c
index 799fdcef23..dd511c7c50 100644
--- a/usr/src/cmd/rcap/common/utils.c
+++ b/usr/src/cmd/rcap/common/utils.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent, Inc. All rights reserved.
*/
#include <sys/param.h>
@@ -257,77 +258,3 @@ xatoi(char *p)
return (i);
}
}
-
-/*
- * get_running_zones() calls zone_list(2) to find out how many zones are
- * running. It then calls zone_list(2) again to fetch the list of running
- * zones (stored in *zents).
- */
-int
-get_running_zones(uint_t *nzents, zone_entry_t **zents)
-{
- zoneid_t *zids;
- uint_t nzents_saved;
- int i;
- zone_entry_t *zentp;
- zone_state_t zstate;
-
- *zents = NULL;
- if (zone_list(NULL, nzents) != 0) {
- warn(gettext("could not get zoneid list\n"));
- return (E_ERROR);
- }
-
-again:
- if (*nzents == 0)
- return (E_SUCCESS);
-
- if ((zids = (zoneid_t *)calloc(*nzents, sizeof (zoneid_t))) == NULL) {
- warn(gettext("out of memory: zones will not be capped\n"));
- return (E_ERROR);
- }
-
- nzents_saved = *nzents;
-
- if (zone_list(zids, nzents) != 0) {
- warn(gettext("could not get zone list\n"));
- free(zids);
- return (E_ERROR);
- }
- if (*nzents != nzents_saved) {
- /* list changed, try again */
- free(zids);
- goto again;
- }
-
- *zents = calloc(*nzents, sizeof (zone_entry_t));
- if (*zents == NULL) {
- warn(gettext("out of memory: zones will not be capped\n"));
- free(zids);
- return (E_ERROR);
- }
-
- zentp = *zents;
- for (i = 0; i < *nzents; i++) {
- char name[ZONENAME_MAX];
-
- if (getzonenamebyid(zids[i], name, sizeof (name)) < 0) {
- warn(gettext("could not get name for "
- "zoneid %d\n"), zids[i]);
- continue;
- }
-
- (void) strlcpy(zentp->zname, name, sizeof (zentp->zname));
- zentp->zid = zids[i];
- if (zone_get_state(name, &zstate) != Z_OK ||
- zstate != ZONE_STATE_RUNNING)
- continue;
-
-
- zentp++;
- }
- *nzents = zentp - *zents;
-
- free(zids);
- return (E_SUCCESS);
-}
diff --git a/usr/src/cmd/rcap/common/utils.h b/usr/src/cmd/rcap/common/utils.h
index 7196cfb4ce..cf2e17c080 100644
--- a/usr/src/cmd/rcap/common/utils.h
+++ b/usr/src/cmd/rcap/common/utils.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent, Inc. All rights reserved.
*/
#ifndef _UTILS_H
@@ -98,7 +99,6 @@ extern void vdprintfe(int, const char *, va_list);
extern void dprintfe(int, char *, ...);
extern void hrt2ts(hrtime_t, timestruc_t *);
extern int xatoi(char *);
-extern int get_running_zones(uint_t *, zone_entry_t **);
#ifdef __cplusplus
}
diff --git a/usr/src/cmd/rcap/rcapadm/rcapadm.c b/usr/src/cmd/rcap/rcapadm/rcapadm.c
index 92888b2071..b92115469a 100644
--- a/usr/src/cmd/rcap/rcapadm/rcapadm.c
+++ b/usr/src/cmd/rcap/rcapadm/rcapadm.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -145,20 +146,29 @@ out:
scf_handle_destroy(h);
}
+static int
+set_zone_cap(char *zonename, uint64_t mcap)
+{
+ char cmd[128 + ZONENAME_MAX];
+
+ (void) snprintf(cmd, sizeof (cmd), "/usr/bin/prctl -r "
+ "-n zone.max-physical-memory -v %llu -i zone %s", mcap, zonename);
+ return (system(cmd));
+}
+
/*
* Update the in-kernel memory cap for the specified zone.
*/
static int
update_zone_mcap(char *zonename, char *maxrss)
{
- zoneid_t zone_id;
uint64_t num;
if (getzoneid() != GLOBAL_ZONEID || zonecfg_in_alt_root())
return (E_SUCCESS);
/* get the running zone from the kernel */
- if ((zone_id = getzoneidbyname(zonename)) == -1) {
+ if (getzoneidbyname(zonename) == -1) {
(void) fprintf(stderr, gettext("zone '%s' must be running\n"),
zonename);
return (E_ERROR);
@@ -169,7 +179,7 @@ update_zone_mcap(char *zonename, char *maxrss)
return (E_ERROR);
}
- if (zone_setattr(zone_id, ZONE_ATTR_PHYS_MCAP, &num, 0) == -1) {
+ if (set_zone_cap(zonename, num) == -1) {
(void) fprintf(stderr, gettext("could not set memory "
"cap for zone '%s'\n"), zonename);
return (E_ERROR);
diff --git a/usr/src/cmd/rcap/rcapd/rcapd_collection_zone.c b/usr/src/cmd/rcap/rcapd/rcapd_collection_zone.c
index db86aa6276..88403dda37 100644
--- a/usr/src/cmd/rcap/rcapd/rcapd_collection_zone.c
+++ b/usr/src/cmd/rcap/rcapd/rcapd_collection_zone.c
@@ -21,16 +21,17 @@
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011 Joyent, Inc. All rights reserved.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <procfs.h>
#include <project.h>
#include <stdlib.h>
#include <strings.h>
#include <zone.h>
#include <libzonecfg.h>
+#include <dirent.h>
+#include <libproc.h>
#include "rcapd.h"
#include "utils.h"
@@ -39,61 +40,117 @@ extern boolean_t gz_capped;
/* round up to next y = 2^n */
#define ROUNDUP(x, y) (((x) + ((y) - 1)) & ~((y) - 1))
-static void
-update_zone(zone_entry_t *zent, void *walk_data)
+static struct ps_prochandle *
+grab_zone_proc(zoneid_t zid)
{
- void(*update_notification_cb)(char *, char *, int, uint64_t, int) =
- (void(*)(char *, char *, int, uint64_t, int))walk_data;
- int changes;
- int64_t max_rss;
+ DIR *dirp;
+ struct dirent *dentp;
+ int pid, pid_self, tmp;
+ psinfo_t psinfo;
+ struct ps_prochandle *pr = NULL;
+
+ pid_self = getpid();
+
+ if ((dirp = opendir("/proc")) == NULL)
+ return (NULL);
+
+ while (dentp = readdir(dirp)) {
+ pid = atoi(dentp->d_name);
+
+ /* Skip self */
+ if (pid == pid_self)
+ continue;
+
+ if (proc_get_psinfo(pid, &psinfo) != 0)
+ continue;
+
+ if (psinfo.pr_zoneid != zid)
+ continue;
+
+ /* attempt to grab process */
+ if ((pr = Pgrab(pid, 0, &tmp)) != NULL) {
+ if (Psetflags(pr, PR_RLC) != 0) {
+ Prelease(pr, 0);
+ }
+ if (Pcreate_agent(pr) == 0) {
+ if (pr_getzoneid(pr) != zid) {
+ Prelease(pr, 0);
+ continue;
+ }
+
+ (void) closedir(dirp);
+ return (pr);
+ } else {
+ Prelease(pr, 0);
+ }
+ }
+ }
+
+ (void) closedir(dirp);
+ return (NULL);
+}
+
+static uint64_t
+get_zone_cap(zoneid_t zid)
+{
+ rctlblk_t *rblk;
uint64_t mcap;
- lcollection_t *lcol;
- rcid_t colid;
+ struct ps_prochandle *pr;
- if (zone_getattr(zent->zid, ZONE_ATTR_PHYS_MCAP, &mcap,
- sizeof (mcap)) != -1 && mcap != 0)
- max_rss = ROUNDUP(mcap, 1024) / 1024;
- else
- max_rss = 0;
-
- if (zent->zid == GLOBAL_ZONEID) {
- if (max_rss > 0)
- gz_capped = B_TRUE;
- else
- gz_capped = B_FALSE;
+ if ((rblk = (rctlblk_t *)malloc(rctlblk_size())) == NULL)
+ return (UINT64_MAX);
+
+ if ((pr = grab_zone_proc(zid)) == NULL) {
+ free(rblk);
+ return (UINT64_MAX);
}
+ if (pr_getrctl(pr, "zone.max-physical-memory", NULL, rblk,
+ RCTL_FIRST)) {
+ Pdestroy_agent(pr);
+ Prelease(pr, 0);
+ free(rblk);
+ return (UINT64_MAX);
+ }
- colid.rcid_type = RCIDT_ZONE;
- colid.rcid_val = zent->zid;
+ Pdestroy_agent(pr);
+ Prelease(pr, 0);
- lcol = lcollection_insert_update(&colid, max_rss, zent->zname,
- &changes);
- if (update_notification_cb != NULL)
- update_notification_cb("zone", zent->zname, changes, max_rss,
- (lcol != NULL) ? lcol->lcol_mark : 0);
+ mcap = rctlblk_get_value(rblk);
+ free(rblk);
+ return (mcap);
}
-
+/*
+ * For zones, rcapd only caps the global zone, since each non-global zone
+ * caps itself.
+ */
/* ARGSUSED */
void
lcollection_update_zone(lcollection_update_type_t ut,
void(*update_notification_cb)(char *, char *, int, uint64_t, int))
{
- int i;
- uint_t nzents;
- zone_entry_t *zents;
-
- /*
- * Enumerate running zones.
- */
- if (get_running_zones(&nzents, &zents) != 0)
- return;
-
- for (i = 0; i < nzents; i++) {
- update_zone(&zents[i], (void *)update_notification_cb);
+ int changes;
+ int64_t max_rss;
+ uint64_t mcap;
+ lcollection_t *lcol;
+ rcid_t colid;
+ mcap = get_zone_cap(GLOBAL_ZONEID);
+ if (mcap != 0 && mcap != UINT64_MAX) {
+ max_rss = ROUNDUP(mcap, 1024) / 1024;
+ gz_capped = B_TRUE;
+ } else {
+ max_rss = UINT64_MAX / 1024;
+ gz_capped = B_FALSE;
}
- free(zents);
+ colid.rcid_type = RCIDT_ZONE;
+ colid.rcid_val = GLOBAL_ZONEID;
+
+ lcol = lcollection_insert_update(&colid, max_rss, GLOBAL_ZONENAME,
+ &changes);
+ if (update_notification_cb != NULL)
+ update_notification_cb("zone", GLOBAL_ZONENAME, changes,
+ max_rss, (lcol != NULL) ? lcol->lcol_mark : 0);
}
diff --git a/usr/src/cmd/rcap/rcapd/rcapd_scanner.c b/usr/src/cmd/rcap/rcapd/rcapd_scanner.c
index b39811b552..254bb9e922 100644
--- a/usr/src/cmd/rcap/rcapd/rcapd_scanner.c
+++ b/usr/src/cmd/rcap/rcapd/rcapd_scanner.c
@@ -21,6 +21,7 @@
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
@@ -551,7 +552,7 @@ pageout(pid_t pid, struct ps_prochandle *Pr, caddr_t start, caddr_t end)
errno = 0;
res = pr_memcntl(Pr, start, (end - start), MC_SYNC,
- (caddr_t)(MS_ASYNC | MS_INVALIDATE), 0, 0);
+ (caddr_t)(MS_ASYNC | MS_INVALCURPROC), 0, 0);
debug_high("pr_memcntl [%p-%p): %d", (void *)start, (void *)end, res);
/*
diff --git a/usr/src/cmd/rcap/rcapstat/rcapstat.c b/usr/src/cmd/rcap/rcapstat/rcapstat.c
index 0632250fed..2838c6e5d5 100644
--- a/usr/src/cmd/rcap/rcapstat/rcapstat.c
+++ b/usr/src/cmd/rcap/rcapstat/rcapstat.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -72,6 +73,8 @@ typedef struct col {
static col_t *col_head;
static int ncol;
+#define RCAPD_NA "rcapd is not active (try zonememstat)\n"
+
static col_t *
col_find(rcid_t id)
{
@@ -152,7 +155,7 @@ read_stats(rcid_type_t stat_type)
struct stat st;
if ((fd = open(STAT_FILE_DEFAULT, O_RDONLY)) < 0) {
- warn(gettext("rcapd is not active\n"));
+ warn(gettext(RCAPD_NA));
return (E_ERROR);
}
@@ -173,7 +176,7 @@ read_stats(rcid_type_t stat_type)
pid = hdr.rs_pid;
(void) snprintf(procfile, 20, "/proc/%lld/psinfo", pid);
if ((proc_fd = open(procfile, O_RDONLY)) < 0) {
- warn(gettext("rcapd is not active\n"));
+ warn(gettext(RCAPD_NA));
(void) close(fd);
return (E_ERROR);
}
diff --git a/usr/src/cmd/rcm_daemon/Makefile.com b/usr/src/cmd/rcm_daemon/Makefile.com
index 4b12d1cd27..59e6e49371 100644
--- a/usr/src/cmd/rcm_daemon/Makefile.com
+++ b/usr/src/cmd/rcm_daemon/Makefile.com
@@ -56,7 +56,6 @@ COMMON_MOD_SRC = \
$(COMMON)/pool_rcm.c \
$(COMMON)/mpxio_rcm.c \
$(COMMON)/ip_anon_rcm.c \
- $(COMMON)/svm_rcm.c \
$(COMMON)/bridge_rcm.c
sparc_MOD_SRC = $(COMMON)/ttymux_rcm.c
@@ -82,7 +81,6 @@ COMMON_MOD_OBJ = \
pool_rcm.o \
mpxio_rcm.o \
ip_anon_rcm.o \
- svm_rcm.o \
bridge_rcm.o
sparc_MOD_OBJ = ttymux_rcm.o
@@ -103,7 +101,6 @@ COMMON_RCM_MODS = \
SUNW_pool_rcm.so \
SUNW_mpxio_rcm.so \
SUNW_ip_anon_rcm.so \
- SUNW_svm_rcm.so \
SUNW_bridge_rcm.so
sparc_RCM_MODS = SUNW_ttymux_rcm.so
@@ -132,7 +129,6 @@ LINTFLAGS += -u -erroff=E_FUNC_ARG_UNUSED
LDLIBS_MODULES =
SUNW_pool_rcm.so := LDLIBS_MODULES += -L$(ROOT)/usr/lib -lpool
-SUNW_svm_rcm.so := LDLIBS_MODULES += -L$(ROOT)/usr/lib -lmeta
SUNW_network_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm
SUNW_vlan_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm
SUNW_vnic_rcm.so := LDLIBS_MODULES += -L$(ROOT)/lib -ldladm
diff --git a/usr/src/cmd/savecore/savecore.c b/usr/src/cmd/savecore/savecore.c
index f57309ec9b..221005ca28 100644
--- a/usr/src/cmd/savecore/savecore.c
+++ b/usr/src/cmd/savecore/savecore.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 1983, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2015, Joyent, Inc.
+ * Copyright 2016 Joyent, Inc.
*/
/*
* Copyright 2012 Nexenta Systems, Inc. All rights reserved.
@@ -83,6 +83,7 @@ static int interactive; /* user invoked; no syslog */
static int csave; /* save dump compressed */
static int filemode; /* processing file, not dump device */
static int percent_done; /* progress indicator */
+static int sec_done; /* progress last report time */
static hrtime_t startts; /* timestamp at start */
static volatile uint64_t saved; /* count of pages written */
static volatile uint64_t zpages; /* count of zero pages not written */
@@ -1084,11 +1085,12 @@ report_progress()
return;
percent = saved * 100LL / corehdr.dump_npages;
- if (percent > percent_done) {
- sec = (gethrtime() - startts) / 1000 / 1000 / 1000;
+ sec = (gethrtime() - startts) / NANOSEC;
+ if (percent > percent_done || sec > sec_done) {
(void) printf("\r%2d:%02d %3d%% done", sec / 60, sec % 60,
percent);
(void) fflush(stdout);
+ sec_done = sec;
percent_done = percent;
}
}
diff --git a/usr/src/cmd/sed/main.c b/usr/src/cmd/sed/main.c
index 204583b877..dc3ef02619 100644
--- a/usr/src/cmd/sed/main.c
+++ b/usr/src/cmd/sed/main.c
@@ -1,7 +1,7 @@
/*
* Copyright (c) 2013 Johann 'Myrkraverk' Oskarsson <johann@myrkraverk.com>
* Copyright (c) 2011 Gary Mills
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1992 Diomidis Spinellis.
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
@@ -42,9 +42,7 @@
#include <err.h>
#include <errno.h>
#include <fcntl.h>
-#include <getopt.h>
#include <libgen.h>
-#include <libintl.h>
#include <limits.h>
#include <locale.h>
#include <regex.h>
@@ -53,6 +51,7 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <libintl.h>
#include "defs.h"
#include "extern.h"
@@ -107,11 +106,6 @@ static char tmpfname[PATH_MAX]; /* Temporary file name (for in-place editing) */
static const char *inplace; /* Inplace edit file extension. */
ulong_t linenum;
-static const struct option lopts[] = {
- {"in-place", optional_argument, NULL, 'i'},
- {NULL, 0, NULL, 0}
-};
-
static void add_compunit(enum e_cut, char *);
static void add_file(char *);
static void usage(void);
@@ -133,18 +127,14 @@ main(int argc, char *argv[])
fflag = 0;
inplace = NULL;
- while ((c = getopt_long(argc, argv, "EI::ae:f:i::lnr", lopts, NULL)) !=
- -1)
+ while ((c = getopt(argc, argv, "EI:ae:f:i:lnr")) != -1)
switch (c) {
case 'r': /* Gnu sed compat */
case 'E':
rflags = REG_EXTENDED;
break;
case 'I':
- if (optarg != NULL)
- inplace = optarg;
- else
- inplace = "";
+ inplace = optarg;
ispan = 1; /* span across input files */
break;
case 'a':
@@ -161,10 +151,7 @@ main(int argc, char *argv[])
add_compunit(CU_FILE, optarg);
break;
case 'i':
- if (optarg != NULL)
- inplace = optarg;
- else
- inplace = "";
+ inplace = optarg;
ispan = 0; /* don't span across input files */
break;
case 'l':
@@ -205,8 +192,8 @@ main(int argc, char *argv[])
static void
usage(void)
{
- (void) fputs(_("usage: sed script [-Ealn] [-i[extension]] [file...]\n"
- " sed [-Ealn] [-i[extension]] [-e script]... "
+ (void) fputs(_("usage: sed script [-Ealn] [-i extension] [file...]\n"
+ " sed [-Ealn] [-i extension] [-e script]... "
"[-f script_file]... [file...]\n"),
stderr);
exit(1);
diff --git a/usr/src/cmd/sendmail/src/Makefile b/usr/src/cmd/sendmail/src/Makefile
index 14793754fd..9512a7d976 100644
--- a/usr/src/cmd/sendmail/src/Makefile
+++ b/usr/src/cmd/sendmail/src/Makefile
@@ -46,7 +46,7 @@ LDFLAGS += $(MAPFILES:%=-M%)
LDLIBS += ../libsmutil/libsmutil.a ../libsm/libsm.a -lresolv -lsocket \
-lnsl ../db/libdb.a -lldap -lsldap -lwrap -lumem \
- -lssl -lcrypto -lsasl
+ -lsunw_ssl -lsunw_crypto -lsasl
INCPATH= -I. -I../include -I../db
diff --git a/usr/src/cmd/sgs/elfdump/Makefile.targ b/usr/src/cmd/sgs/elfdump/Makefile.targ
index 5cb4d96437..a0125d8a3d 100644
--- a/usr/src/cmd/sgs/elfdump/Makefile.targ
+++ b/usr/src/cmd/sgs/elfdump/Makefile.targ
@@ -78,6 +78,8 @@ delete:
package \
install: all $(VAR_SGSBINPROG) $(VAR_SGSCCSLINK)
+ -$(RM) $(ROOTPROG)
+ -$(LN) $(ISAEXEC) $(ROOTPROG)
.PARALLEL: $(LINTOUT32) $(LINTOUT64)
diff --git a/usr/src/cmd/sgs/elfdump/amd64/Makefile b/usr/src/cmd/sgs/elfdump/amd64/Makefile
index be0f2c33f6..039edb65b6 100644
--- a/usr/src/cmd/sgs/elfdump/amd64/Makefile
+++ b/usr/src/cmd/sgs/elfdump/amd64/Makefile
@@ -38,6 +38,8 @@ LINTFLAGS64 += $(VAR_LINTFLAGS64)
VAR_SGSBINPROG= $(VAR_SGSBINPROG64)
VAR_SGSCCSLINK= $(VAR_SGSCCSLINK64)
+install: all $(ROOTPROG64)
+
include ../Makefile.targ
include ../../Makefile.sub.64
diff --git a/usr/src/cmd/sgs/elfdump/i386/Makefile b/usr/src/cmd/sgs/elfdump/i386/Makefile
index d3cb302ac1..95390a2899 100644
--- a/usr/src/cmd/sgs/elfdump/i386/Makefile
+++ b/usr/src/cmd/sgs/elfdump/i386/Makefile
@@ -30,4 +30,6 @@ include ../Makefile.com
ARCH = i386
+install: all $(ROOTPROG32)
+
include ../Makefile.targ
diff --git a/usr/src/cmd/sgs/include/conv.h b/usr/src/cmd/sgs/include/conv.h
index 8f4dd4a584..e9232cd463 100644
--- a/usr/src/cmd/sgs/include/conv.h
+++ b/usr/src/cmd/sgs/include/conv.h
@@ -25,6 +25,7 @@
*
* Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2012 DEY Storage Systems, Inc. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
*/
#ifndef _CONV_H
@@ -277,7 +278,7 @@ typedef union {
} Conv_bnd_obj_buf_t;
/* conv_phdr_flags() */
-#define CONV_PHDR_FLAGS_BUFSIZE 88
+#define CONV_PHDR_FLAGS_BUFSIZE 244
typedef union {
Conv_inv_buf_t inv_buf;
char buf[CONV_PHDR_FLAGS_BUFSIZE];
diff --git a/usr/src/cmd/sgs/lex/Makefile.targ b/usr/src/cmd/sgs/lex/Makefile.targ
index fcf853208f..8d9ee4f7b7 100644
--- a/usr/src/cmd/sgs/lex/Makefile.targ
+++ b/usr/src/cmd/sgs/lex/Makefile.targ
@@ -94,4 +94,3 @@ $(LINTLIB): $$(SRCS)
lintcheck: $$(SRCS)
$(LINT.c) $(LINTCHECKFLAGS) $(SRCS) $(LDLIBS)
-
diff --git a/usr/src/cmd/sgs/lex/common/main.c b/usr/src/cmd/sgs/lex/common/main.c
index 17ba4808a4..a1fd526cf9 100644
--- a/usr/src/cmd/sgs/lex/common/main.c
+++ b/usr/src/cmd/sgs/lex/common/main.c
@@ -30,11 +30,15 @@
/* Copyright 1976, Bell Telephone Laboratories, Inc. */
+/* Copyright (c) 2013, joyent, Inc. All rights reserved. */
+
#include <string.h>
#include "once.h"
#include "sgs.h"
#include <locale.h>
#include <limits.h>
+#include <unistd.h>
+#include <libgen.h>
static wchar_t L_INITIAL[] = {'I', 'N', 'I', 'T', 'I', 'A', 'L', 0};
static void get1core(void);
@@ -46,6 +50,25 @@ static void get3core(void);
static void free3core(void);
#endif
+static int
+lex_construct_path(char *buf, size_t size, const char *file, int type)
+{
+ int ret;
+ char origin[PATH_MAX];
+
+ if (type != 0) {
+ ret = readlink("/proc/self/path/a.out", origin, PATH_MAX - 1);
+ if (ret < 0)
+ error(
+ "lex: failed to read origin from /proc\n");
+ origin[ret] = '\0';
+ return (snprintf(buf, size, "%s/../%s/%s", dirname(origin),
+ NBASE, file));
+ }
+
+ return (snprintf(buf, size, "%s/%s/%s", NPREFIX, NBASE, file));
+}
+
int
main(int argc, char **argv)
{
@@ -53,6 +76,7 @@ main(int argc, char **argv)
int c;
char *apath = NULL;
char *ypath;
+ char pathbuf[PATH_MAX];
Boolean eoption = 0, woption = 0;
sargv = argv;
@@ -224,6 +248,11 @@ main(int argc, char **argv)
free3core();
#endif
+ /*
+ * Try to find the file relative to $ORIGIN. Note that we don't touch
+ * antyhing related to -Y. In fact, unfortunately it's always been
+ * ignored it seems.
+ */
if (handleeuc) {
if (ratfor)
error("Ratfor is not supported by -w or -e option.");
@@ -232,9 +261,19 @@ main(int argc, char **argv)
else
ypath = ratfor ? RATNAME : CNAME;
- if (apath != NULL)
- ypath = strcat(apath, strrchr(ypath, '/'));
- fother = fopen(ypath, "r");
+ if (apath == NULL) {
+ (void) lex_construct_path(pathbuf, sizeof (pathbuf), ypath, 1);
+ fother = fopen(pathbuf, "r");
+ if (fother == NULL) {
+ (void) lex_construct_path(pathbuf, sizeof (pathbuf),
+ ypath, 0);
+ fother = fopen(pathbuf, "r");
+ }
+ } else {
+ apath = strcat(apath, "/");
+ ypath = strcat(apath, ypath);
+ fother = fopen(ypath, "r");
+ }
if (fother == NULL)
error("Lex driver missing, file %s", ypath);
while ((i = getc(fother)) != EOF)
diff --git a/usr/src/cmd/sgs/lex/common/once.h b/usr/src/cmd/sgs/lex/common/once.h
index 014ca00b17..9e4b0e5e00 100644
--- a/usr/src/cmd/sgs/lex/common/once.h
+++ b/usr/src/cmd/sgs/lex/common/once.h
@@ -26,11 +26,11 @@
/* Copyright (c) 1988 AT&T */
/* All Rights Reserved */
+/* Copyright (c) 2013, joyent, Inc. All rights reserved. */
+
#ifndef _ONCE_H
#define _ONCE_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include "ldefs.h"
/* once.c */
@@ -73,9 +73,11 @@ int peek = '\n'; /* next input character */
CHR *pushptr = pushc;
CHR *slptr = slist;
-#define CNAME "/usr/share/lib/ccs/ncform"
-#define RATNAME "/usr/share/lib/ccs/nrform"
-#define EUCNAME "/usr/share/lib/ccs/nceucform"
+#define NPREFIX "/usr"
+#define NBASE "/share/lib/ccs/"
+#define CNAME "ncform"
+#define RATNAME "nrform"
+#define EUCNAME "nceucform"
int ccount = 1;
int casecount = 1;
diff --git a/usr/src/cmd/sgs/libconv/common/corenote.c b/usr/src/cmd/sgs/libconv/common/corenote.c
index eb998bae45..1e579415f6 100644
--- a/usr/src/cmd/sgs/libconv/common/corenote.c
+++ b/usr/src/cmd/sgs/libconv/common/corenote.c
@@ -25,7 +25,7 @@
*/
/*
* Copyright 2012 DEY Storage Systems, Inc. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
*/
/*
@@ -76,7 +76,7 @@ const char *
conv_cnote_auxv_type(Word type, Conv_fmt_flags_t fmt_flags,
Conv_inv_buf_t *inv_buf)
{
- static const Msg types_0_22[] = {
+ static const Msg types_0_25[] = {
MSG_AUXV_AT_NULL, MSG_AUXV_AT_IGNORE,
MSG_AUXV_AT_EXECFD, MSG_AUXV_AT_PHDR,
MSG_AUXV_AT_PHENT, MSG_AUXV_AT_PHNUM,
@@ -88,10 +88,11 @@ conv_cnote_auxv_type(Word type, Conv_fmt_flags_t fmt_flags,
MSG_AUXV_AT_HWCAP, MSG_AUXV_AT_CLKTCK,
MSG_AUXV_AT_FPUCW, MSG_AUXV_AT_DCACHEBSIZE,
MSG_AUXV_AT_ICACHEBSIZE, MSG_AUXV_AT_UCACHEBSIZE,
- MSG_AUXV_AT_IGNOREPPC
+ MSG_AUXV_AT_IGNOREPPC, MSG_AUXV_AT_SECURE,
+ MSG_AUXV_AT_BASE_PLATFORM, MSG_AUXV_AT_RANDOM
};
- static const conv_ds_msg_t ds_types_0_22 = {
- CONV_DS_MSG_INIT(0, types_0_22) };
+ static const conv_ds_msg_t ds_types_0_25 = {
+ CONV_DS_MSG_INIT(0, types_0_25) };
static const Msg types_2000_2011[] = {
MSG_AUXV_AT_SUN_UID, MSG_AUXV_AT_SUN_RUID,
@@ -104,19 +105,20 @@ conv_cnote_auxv_type(Word type, Conv_fmt_flags_t fmt_flags,
static const conv_ds_msg_t ds_types_2000_2011 = {
CONV_DS_MSG_INIT(2000, types_2000_2011) };
- static const Msg types_2014_2023[] = {
+ static const Msg types_2014_2025[] = {
MSG_AUXV_AT_SUN_EXECNAME, MSG_AUXV_AT_SUN_MMU,
MSG_AUXV_AT_SUN_LDDATA, MSG_AUXV_AT_SUN_AUXFLAGS,
MSG_AUXV_AT_SUN_EMULATOR, MSG_AUXV_AT_SUN_BRANDNAME,
MSG_AUXV_AT_SUN_BRAND_AUX1, MSG_AUXV_AT_SUN_BRAND_AUX2,
- MSG_AUXV_AT_SUN_BRAND_AUX3, MSG_AUXV_AT_SUN_HWCAP2
+ MSG_AUXV_AT_SUN_BRAND_AUX3, MSG_AUXV_AT_SUN_HWCAP2,
+ MSG_AUXV_AT_SUN_BRAND_NROOT, MSG_AUXV_AT_SUN_COMMPAGE
};
- static const conv_ds_msg_t ds_types_2014_2023 = {
- CONV_DS_MSG_INIT(2014, types_2014_2023) };
+ static const conv_ds_msg_t ds_types_2014_2025 = {
+ CONV_DS_MSG_INIT(2014, types_2014_2025) };
static const conv_ds_t *ds[] = {
- CONV_DS_ADDR(ds_types_0_22), CONV_DS_ADDR(ds_types_2000_2011),
- CONV_DS_ADDR(ds_types_2014_2023), NULL };
+ CONV_DS_ADDR(ds_types_0_25), CONV_DS_ADDR(ds_types_2000_2011),
+ CONV_DS_ADDR(ds_types_2014_2025), NULL };
return (conv_map_ds(ELFOSABI_NONE, EM_NONE, type, ds, fmt_flags,
inv_buf));
diff --git a/usr/src/cmd/sgs/libconv/common/corenote.msg b/usr/src/cmd/sgs/libconv/common/corenote.msg
index f5866a5b69..2dcd2a697f 100644
--- a/usr/src/cmd/sgs/libconv/common/corenote.msg
+++ b/usr/src/cmd/sgs/libconv/common/corenote.msg
@@ -24,7 +24,7 @@
# Use is subject to license terms.
#
# Copyright 2012 DEY Storage Systems, Inc. All rights reserved.
-# Copyright (c) 2013, Joyent, Inc. All rights reserved.
+# Copyright 2016 Joyent, Inc.
#
@ MSG_NT_PRSTATUS "[ NT_PRSTATUS ]"
@@ -78,6 +78,9 @@
@ MSG_AUXV_AT_ICACHEBSIZE "ICACHEBSIZE"
@ MSG_AUXV_AT_UCACHEBSIZE "UCACHEBSIZE"
@ MSG_AUXV_AT_IGNOREPPC "IGNOREPPC"
+@ MSG_AUXV_AT_SECURE "SECURE"
+@ MSG_AUXV_AT_BASE_PLATFORM "BASE_PLATFORM"
+@ MSG_AUXV_AT_RANDOM "RANDOM"
@ MSG_AUXV_AT_SUN_UID "SUN_UID"
@ MSG_AUXV_AT_SUN_RUID "SUN_RUID"
@ MSG_AUXV_AT_SUN_GID "SUN_GID"
@@ -100,6 +103,8 @@
@ MSG_AUXV_AT_SUN_BRAND_AUX2 "SUN_BRAND_AUX2"
@ MSG_AUXV_AT_SUN_BRAND_AUX3 "SUN_BRAND_AUX3"
@ MSG_AUXV_AT_SUN_HWCAP2 "SUN_HWCAP2"
+@ MSG_AUXV_AT_SUN_BRAND_NROOT "SUN_BRAND_NROOT"
+@ MSG_AUXV_AT_SUN_COMMPAGE "SUN_COMMPAGE"
@ MSG_CC_CONTENT_STACK "STACK"
diff --git a/usr/src/cmd/sgs/libconv/common/phdr.c b/usr/src/cmd/sgs/libconv/common/phdr.c
index 05aeb0a89b..e9da5c5194 100644
--- a/usr/src/cmd/sgs/libconv/common/phdr.c
+++ b/usr/src/cmd/sgs/libconv/common/phdr.c
@@ -25,6 +25,10 @@
*/
/*
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
+ */
+
+/*
* String conversion routines for program header attributes.
*/
#include <stdio.h>
@@ -95,6 +99,7 @@ error "PT_NUM has grown. Update phdrs[]"
{ PT_GNU_EH_FRAME, LIN, MSG_PT_GNU_EH_FRAME },
{ PT_GNU_STACK, LIN, MSG_PT_GNU_STACK },
{ PT_GNU_RELRO, LIN, MSG_PT_GNU_RELRO },
+ { PT_PAX_FLAGS, LIN, MSG_PT_PAX_FLAGS },
{ 0 }
};
@@ -109,6 +114,7 @@ error "PT_NUM has grown. Update phdrs[]"
{ PT_GNU_EH_FRAME, LIN, MSG_PT_GNU_EH_FRAME_CF },
{ PT_GNU_STACK, LIN, MSG_PT_GNU_STACK_CF },
{ PT_GNU_RELRO, LIN, MSG_PT_GNU_RELRO_CF },
+ { PT_PAX_FLAGS, LIN, MSG_PT_PAX_FLAGS_CF },
{ 0 }
};
@@ -123,6 +129,7 @@ error "PT_NUM has grown. Update phdrs[]"
{ PT_GNU_EH_FRAME, LIN, MSG_PT_GNU_EH_FRAME_CFNP },
{ PT_GNU_STACK, LIN, MSG_PT_GNU_STACK_CFNP },
{ PT_GNU_RELRO, LIN, MSG_PT_GNU_RELRO_CFNP },
+ { PT_PAX_FLAGS, LIN, MSG_PT_PAX_FLAGS_CFNP },
{ 0 }
};
@@ -137,6 +144,7 @@ error "PT_NUM has grown. Update phdrs[]"
{ PT_GNU_EH_FRAME, LIN, MSG_PT_GNU_EH_FRAME_NF },
{ PT_GNU_STACK, LIN, MSG_PT_GNU_STACK_NF },
{ PT_GNU_RELRO, LIN, MSG_PT_GNU_RELRO_NF },
+ { PT_PAX_FLAGS, LIN, MSG_PT_PAX_FLAGS_NF },
{ 0 }
};
@@ -214,6 +222,18 @@ conv_phdr_flags_strings(Conv_fmt_flags_t fmt_flags)
MSG_PF_X_CF_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \
MSG_PF_W_CF_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \
MSG_PF_R_CF_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \
+ MSG_PF_PAGEEXEC_CF_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \
+ MSG_PF_NOPAGEEXEC_CF_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \
+ MSG_PF_SEGMEXEC_CF_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \
+ MSG_PF_NOSEGMEXEC_CF_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \
+ MSG_PF_MPROTECT_CF_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \
+ MSG_PF_NOMPROTECT_CF_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \
+ MSG_PF_RANDEXEC_CF_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \
+ MSG_PF_NORANDEXEC_CF_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \
+ MSG_PF_EMUTRAMP_CF_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \
+ MSG_PF_NOEMUTRAMP_CF_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \
+ MSG_PF_RANDMMAP_CF_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \
+ MSG_PF_NORANDMMAP_CF_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \
MSG_PF_SUNW_FAILURE_CF_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \
MSG_PF_SUNW_KILLED_CF_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \
MSG_PF_SUNW_SIGINFO_CF_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \
@@ -237,11 +257,24 @@ conv_phdr_flags_strings(Conv_fmt_flags_t fmt_flags)
#define ALL ELFOSABI_NONE, EM_NONE
#define SOL ELFOSABI_SOLARIS, EM_NONE
+#define LIN ELFOSABI_LINUX, EM_NONE
static const Val_desc2 vda_cf[] = {
{ PF_X, ALL, MSG_PF_X_CF },
{ PF_W, ALL, MSG_PF_W_CF },
{ PF_R, ALL, MSG_PF_R_CF },
+ { PF_PAGEEXEC, LIN, MSG_PF_PAGEEXEC_CF },
+ { PF_NOPAGEEXEC, LIN, MSG_PF_NOPAGEEXEC_CF },
+ { PF_SEGMEXEC, LIN, MSG_PF_SEGMEXEC_CF },
+ { PF_NOSEGMEXEC, LIN, MSG_PF_NOSEGMEXEC_CF },
+ { PF_MPROTECT, LIN, MSG_PF_MPROTECT_CF },
+ { PF_NOMPROTECT, LIN, MSG_PF_NOMPROTECT_CF },
+ { PF_RANDEXEC, LIN, MSG_PF_RANDEXEC_CF },
+ { PF_NORANDEXEC, LIN, MSG_PF_NORANDEXEC_CF },
+ { PF_EMUTRAMP, LIN, MSG_PF_EMUTRAMP_CF },
+ { PF_NOEMUTRAMP, LIN, MSG_PF_NOEMUTRAMP_CF },
+ { PF_RANDMMAP, LIN, MSG_PF_RANDMMAP_CF },
+ { PF_NORANDMMAP, LIN, MSG_PF_NORANDMMAP_CF },
{ PF_SUNW_FAILURE, SOL, MSG_PF_SUNW_FAILURE_CF },
{ PF_SUNW_KILLED, SOL, MSG_PF_SUNW_KILLED_CF },
{ PF_SUNW_SIGINFO, SOL, MSG_PF_SUNW_SIGINFO_CF },
@@ -251,6 +284,18 @@ conv_phdr_flags_strings(Conv_fmt_flags_t fmt_flags)
{ PF_X, ALL, MSG_PF_X_NF },
{ PF_W, ALL, MSG_PF_W_NF },
{ PF_R, ALL, MSG_PF_R_NF },
+ { PF_PAGEEXEC, LIN, MSG_PF_PAGEEXEC_NF },
+ { PF_NOPAGEEXEC, LIN, MSG_PF_NOPAGEEXEC_NF },
+ { PF_SEGMEXEC, LIN, MSG_PF_SEGMEXEC_NF },
+ { PF_NOSEGMEXEC, LIN, MSG_PF_NOSEGMEXEC_NF },
+ { PF_MPROTECT, LIN, MSG_PF_MPROTECT_NF },
+ { PF_NOMPROTECT, LIN, MSG_PF_NOMPROTECT_NF },
+ { PF_RANDEXEC, LIN, MSG_PF_RANDEXEC_NF },
+ { PF_NORANDEXEC, LIN, MSG_PF_NORANDEXEC_NF },
+ { PF_EMUTRAMP, LIN, MSG_PF_EMUTRAMP_NF },
+ { PF_NOEMUTRAMP, LIN, MSG_PF_NOEMUTRAMP_NF },
+ { PF_RANDMMAP, LIN, MSG_PF_RANDMMAP_NF },
+ { PF_NORANDMMAP, LIN, MSG_PF_NORANDMMAP_NF },
{ PF_SUNW_FAILURE, SOL, MSG_PF_SUNW_FAILURE_NF },
{ PF_SUNW_KILLED, SOL, MSG_PF_SUNW_KILLED_NF },
{ PF_SUNW_SIGINFO, SOL, MSG_PF_SUNW_SIGINFO_NF },
@@ -262,6 +307,7 @@ conv_phdr_flags_strings(Conv_fmt_flags_t fmt_flags)
#undef ALL
#undef SOL
+#undef LIN
}
const char *
diff --git a/usr/src/cmd/sgs/libconv/common/phdr.msg b/usr/src/cmd/sgs/libconv/common/phdr.msg
index 789832a16a..e84182277b 100644
--- a/usr/src/cmd/sgs/libconv/common/phdr.msg
+++ b/usr/src/cmd/sgs/libconv/common/phdr.msg
@@ -24,6 +24,8 @@
# Use is subject to license terms.
#
+# Copyright (c) 2015, Joyent, Inc. All rights reserved.
+
@ MSG_PT_NULL "[ PT_NULL ]" # 0
@ MSG_PT_NULL_CF "PT_NULL"
@ MSG_PT_NULL_CFNP "NULL"
@@ -79,6 +81,10 @@
@ MSG_PT_GNU_RELRO_CF "PT_GNU_RELRO"
@ MSG_PT_GNU_RELRO_CFNP "GNU_RELRO"
@ MSG_PT_GNU_RELRO_NF "gnu_relro"
+@ MSG_PT_PAX_FLAGS "[ PT_PAX_FLAGS ]" # 0x65041580
+@ MSG_PT_PAX_FLAGS_CF "PT_PAX_FLAGS"
+@ MSG_PT_PAX_FLAGS_CFNP "PAX_FLAGS"
+@ MSG_PT_PAX_FLAGS_NF "pax_flags"
@ MSG_PT_SUNWBSS "[ PT_SUNWBSS ]" # 0x6ffffffa
@ MSG_PT_SUNWBSS_CF "PT_SUNWBSS"
@@ -103,6 +109,30 @@
@ MSG_PF_W_NF "w"
@ MSG_PF_R_CF "PF_R" # 0x4
@ MSG_PF_R_NF "r"
+@ MSG_PF_PAGEEXEC_CF "PF_PAGEEXEC" # 0x00000010
+@ MSG_PF_PAGEEXEC_NF "pageexec"
+@ MSG_PF_NOPAGEEXEC_CF "PF_NOPAGEEXEC" # 0x00000020
+@ MSG_PF_NOPAGEEXEC_NF "nopageexec"
+@ MSG_PF_SEGMEXEC_CF "PF_SEGMEXEC" # 0x00000040
+@ MSG_PF_SEGMEXEC_NF "segmexec"
+@ MSG_PF_NOSEGMEXEC_CF "PF_NOSEGMEXEC" # 0x00000080
+@ MSG_PF_NOSEGMEXEC_NF "nosegmexec"
+@ MSG_PF_MPROTECT_CF "PF_MPROTECT" # 0x00000100
+@ MSG_PF_MPROTECT_NF "mprotect"
+@ MSG_PF_NOMPROTECT_CF "PF_NOMPROTECT" # 0x00000200
+@ MSG_PF_NOMPROTECT_NF "nomprotect"
+@ MSG_PF_RANDEXEC_CF "PF_RANDEXEC" # 0x00000400
+@ MSG_PF_RANDEXEC_NF "randexec"
+@ MSG_PF_NORANDEXEC_CF "PF_NORANDEXEC" # 0x00000800
+@ MSG_PF_NORANDEXEC_NF "norandexec"
+@ MSG_PF_EMUTRAMP_CF "PF_EMUTRAMP" # 0x00001000
+@ MSG_PF_EMUTRAMP_NF "emutramp"
+@ MSG_PF_NOEMUTRAMP_CF "PF_NOEMUTRAMP" # 0x00002000
+@ MSG_PF_NOEMUTRAMP_NF "noemutramp"
+@ MSG_PF_RANDMMAP_CF "PF_RANDMMAP" # 0x00004000
+@ MSG_PF_RANDMMAP_NF "randmmap"
+@ MSG_PF_NORANDMMAP_CF "PF_NORANDMMAP" # 0x00008000
+@ MSG_PF_NORANDMMAP_NF "norandmmap"
@ MSG_PF_SUNW_FAILURE_CF "PF_SUNW_FAILURE" # 0x00100000
@ MSG_PF_SUNW_FAILURE_NF "sunw_failure"
@ MSG_PF_SUNW_KILLED_CF "PF_SUNW_KILLED" # 0x00200000
diff --git a/usr/src/cmd/sgs/librtld_db/common/librtld_db.msg b/usr/src/cmd/sgs/librtld_db/common/librtld_db.msg
index fbc595f5f4..5b0ad9533a 100644
--- a/usr/src/cmd/sgs/librtld_db/common/librtld_db.msg
+++ b/usr/src/cmd/sgs/librtld_db/common/librtld_db.msg
@@ -24,6 +24,10 @@
# Use is subject to license terms.
#
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
# Message file for cmd/sgs/librtld_db.
@ MSG_ID_LIBRTLD_DB
@@ -104,7 +108,7 @@
@ MSG_DB_RDOBJPADE "rtld_db: rd_objpad_enable(padsize=0x%llx)"
@ MSG_DB_64BIT_PREFIX "64/"
@ MSG_DB_BRAND_HELPERPATH_PREFIX "%s/%s/%s/%s%s_librtld_db.so.1"
-@ MSG_DB_BRAND_HELPERPATH "%s/%s/%s%s_librtld_db.so.1"
+@ MSG_DB_BRAND_HELPERPATH "%s/%s/%s/%s%s_librtld_db.so.1"
@ MSG_DB_HELPERNOOPS "rtld_db: helper lib loaded but ops not preset"
@ MSG_DB_HELPERLOADED "rtld_db: helper library loaded for brand \"%s\""
@ MSG_DB_HELPERLOADFAILED "rtld_db: couldn't load brand helper library %s"
diff --git a/usr/src/cmd/sgs/librtld_db/common/rd_elf.c b/usr/src/cmd/sgs/librtld_db/common/rd_elf.c
index 410b819b30..b3de685b81 100644
--- a/usr/src/cmd/sgs/librtld_db/common/rd_elf.c
+++ b/usr/src/cmd/sgs/librtld_db/common/rd_elf.c
@@ -23,6 +23,10 @@
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
#include <stdlib.h>
#include <stdio.h>
#include <proc_service.h>
@@ -38,6 +42,14 @@
#include <sys/param.h>
/*
+ * We want to include zone.h to pull in the prototype for zone_get_nroot(),
+ * but we need to avoid pulling in <sys/stream.h>, which has a definition
+ * of M_DATA that conflicts with the ELF-related definition in machdep_*.h.
+ */
+#define _SYS_STREAM_H
+#include <zone.h>
+
+/*
* 64-bit builds are going to compile this module twice, the
* second time with _ELF64 defined. These defines should make
* all the necessary adjustments to the code.
@@ -283,7 +295,9 @@ _rd_reset32(struct rd_agent *rap)
* If we are debugging a branded executable, load the appropriate
* helper library, and call its initialization routine. Being unable
* to load the helper library is not a critical error. (Hopefully
- * we'll still be able to access some objects in the target.)
+ * we'll still be able to access some objects in the target.) Note
+ * that we pull in the native root here to allow for helper libraries
+ * to be properly found from within the branded zone.
*/
ps_pbrandname = (ps_pbrandname_fp_t)dlsym(RTLD_PROBE, "ps_pbrandname");
while ((ps_pbrandname != NULL) &&
@@ -294,17 +308,23 @@ _rd_reset32(struct rd_agent *rap)
isa = MSG_ORIG(MSG_DB_64BIT_PREFIX);
#endif /* _LP64 */
- if (rtld_db_helper_path[0] != '\0')
+ if (rtld_db_helper_path[0] != '\0') {
(void) snprintf(brandlib, MAXPATHLEN,
MSG_ORIG(MSG_DB_BRAND_HELPERPATH_PREFIX),
rtld_db_helper_path,
MSG_ORIG(MSG_DB_HELPER_PREFIX), brandname, isa,
brandname);
- else
+ } else {
+ const char *nroot = zone_get_nroot();
+
+ if (nroot == NULL)
+ nroot = "";
+
(void) snprintf(brandlib, MAXPATHLEN,
- MSG_ORIG(MSG_DB_BRAND_HELPERPATH),
+ MSG_ORIG(MSG_DB_BRAND_HELPERPATH), nroot,
MSG_ORIG(MSG_DB_HELPER_PREFIX), brandname, isa,
brandname);
+ }
rap->rd_helper.rh_dlhandle = dlopen(brandlib,
RTLD_LAZY | RTLD_LOCAL);
diff --git a/usr/src/cmd/sgs/rtld/common/_rtld.h b/usr/src/cmd/sgs/rtld/common/_rtld.h
index ece14a855e..83b7071471 100644
--- a/usr/src/cmd/sgs/rtld/common/_rtld.h
+++ b/usr/src/cmd/sgs/rtld/common/_rtld.h
@@ -26,7 +26,7 @@
* Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#ifndef __RTLD_H
#define __RTLD_H
@@ -589,6 +589,8 @@ extern const char *rpl_ldflags; /* replaceable LD_FLAGS string */
extern const char *rpl_libpath; /* replaceable LD_LIBRARY string */
extern Alist *rpl_libdirs; /* and its associated Pdesc list */
extern const char *rpl_preload; /* replaceable LD_PRELOAD string */
+extern const char *rpl_ldtoxic; /* replaceable LD_TOXIC_PATH string */
+extern Alist *rpl_toxdirs; /* and associated Pdesc list */
extern const char *prm_audit; /* permanent LD_AUDIT string */
extern const char *prm_debug; /* permanent LD_DEBUG string */
diff --git a/usr/src/cmd/sgs/rtld/common/analyze.c b/usr/src/cmd/sgs/rtld/common/analyze.c
index e14c121f07..3cae1036ae 100644
--- a/usr/src/cmd/sgs/rtld/common/analyze.c
+++ b/usr/src/cmd/sgs/rtld/common/analyze.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
/*
@@ -834,6 +835,57 @@ is_so_loaded(Lm_list *lml, const char *name, int *in_nfavl)
}
/*
+ * Walk the toxic path list and determine if the object in question has violated
+ * the toxic path. When evaluating the toxic path we need to ensure that we
+ * match any path that's a subdirectory of a listed entry. In other words if
+ * /foo/bar is toxic, something in /foo/bar/baz/ is no good. However, we need to
+ * ensure that we don't mark /foo/barbaz/ as bad.
+ */
+static int
+is_load_toxic(Lm_list *lml, Rt_map *nlmp)
+{
+ const char *fpath;
+ size_t flen;
+ Pdesc *pdp;
+ Aliste idx;
+
+ fpath = PATHNAME(nlmp);
+
+ /*
+ * If we have a NULL path name, that indicates that rtld is processing
+ * an in-memory shared object. For example, trying to run ldd or doing
+ * an LD_PRELOAD on an object file. In those cases, we'll always allow
+ * it.
+ */
+ if (fpath == NULL)
+ return (0);
+
+ flen = strlen(fpath);
+
+ for (ALIST_TRAVERSE(rpl_toxdirs, idx, pdp)) {
+ if (pdp->pd_plen == 0)
+ continue;
+
+ if (strncmp(pdp->pd_pname, fpath, pdp->pd_plen) == 0) {
+ if (pdp->pd_pname[pdp->pd_plen-1] != '/') {
+ /*
+ * Path didn't end in a /, make sure
+ * we're at a directory boundary
+ * nonetheless.
+ */
+ if (flen > pdp->pd_plen &&
+ fpath[pdp->pd_plen] == '/')
+ return (1);
+ continue;
+ }
+ return (1);
+ }
+ }
+
+ return (0);
+}
+
+/*
* Tracing is enabled by the LD_TRACE_LOADED_OPTIONS environment variable which
* is normally set from ldd(1). For each link map we load, print the load name
* and the full pathname of the associated object.
@@ -2169,6 +2221,17 @@ load_finish(Lm_list *lml, const char *name, Rt_map *clmp, int nmode,
uint_t rdflags;
/*
+ * If this dependency is associated with a toxic path, then we must
+ * honor the user's request to die.
+ */
+ if (is_load_toxic(lml, nlmp) != 0) {
+ eprintf(lml, ERR_FATAL, MSG_INTL(MSG_TOXIC_FILE),
+ PATHNAME(nlmp));
+ rtldexit(lml, 1);
+
+ }
+
+ /*
* If this dependency is associated with a required version ensure that
* the version is present in the loaded file.
*/
diff --git a/usr/src/cmd/sgs/rtld/common/external.c b/usr/src/cmd/sgs/rtld/common/external.c
index 4a16ffcf9b..cd7365c524 100644
--- a/usr/src/cmd/sgs/rtld/common/external.c
+++ b/usr/src/cmd/sgs/rtld/common/external.c
@@ -22,7 +22,7 @@
/*
* Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2014 Garrett D'Amore <garrett@damore.org>
- * Copyright 2015 Joyent, Inc.
+ * Copyright 2016 Joyent, Inc.
*/
/*
@@ -721,6 +721,21 @@ isalnum(int c)
return ((isalpha(c) || isdigit(c)) ? 1 : 0);
}
+#if defined(__i386) || defined(__amd64)
+/*
+ * Instead of utilizing the comm page for clock_gettime, rtld uses the raw
+ * syscall instead. Doing so decreases the surface of symbols needed from libc
+ * for a modest performance cost.
+ */
+extern int __clock_gettime_sys(clockid_t, struct timespec *);
+
+int
+__clock_gettime(clockid_t clock_id, struct timespec *tp)
+{
+ return (__clock_gettime_sys(clock_id, tp));
+}
+#endif /* defined(__i386) || defined(__amd64) */
+
/*
* In a similar vein to the is* functions above, we also have to define our own
* version of strerror, as it is implemented in terms of the locale aware
diff --git a/usr/src/cmd/sgs/rtld/common/globals.c b/usr/src/cmd/sgs/rtld/common/globals.c
index 5f48fafc5f..2e98768551 100644
--- a/usr/src/cmd/sgs/rtld/common/globals.c
+++ b/usr/src/cmd/sgs/rtld/common/globals.c
@@ -24,6 +24,7 @@
* All Rights Reserved
*
* Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -132,6 +133,8 @@ const char *rpl_ldflags = NULL; /* replaceable LD_FLAGS string */
const char *rpl_libpath = NULL; /* replaceable LD_LIBRARY_PATH string */
Alist *rpl_libdirs = NULL; /* and associated Pdesc list */
const char *rpl_preload = NULL; /* replaceable LD_PRELOAD string */
+const char *rpl_ldtoxic = NULL; /* replaceable LD_TOXIC string */
+Alist *rpl_toxdirs = NULL; /* and associated Pdesc list */
const char *prm_audit = NULL; /* permanent LD_AUDIT string */
const char *prm_debug = NULL; /* permanent LD_DEBUG string */
diff --git a/usr/src/cmd/sgs/rtld/common/rtld.msg b/usr/src/cmd/sgs/rtld/common/rtld.msg
index 97e2841b8f..9309d0c62e 100644
--- a/usr/src/cmd/sgs/rtld/common/rtld.msg
+++ b/usr/src/cmd/sgs/rtld/common/rtld.msg
@@ -21,6 +21,7 @@
#
# Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2014, Joyent, Inc. All rights reserved.
#
@ _START_
@@ -99,9 +100,14 @@
@ MSG_SYS_MPROT "%s: mprotect failed: %s"
@ MSG_SYS_MMAPANON "mmap anon failed: %s"
+# Secure path failures
+
@ MSG_SEC_OPEN "%s: open failed: No such file in secure directories"
@ MSG_SEC_ILLEGAL "%s: open failed: illegal insecure pathname"
+# Toxic failures
+
+@ MSG_TOXIC_FILE "%s: dependency marked as toxic"
# Configuration failures
@@ -395,6 +401,7 @@
@ MSG_LD_PROFILE_OUTPUT "PROFILE_OUTPUT"
@ MSG_LD_SFCAP "SFCAP"
@ MSG_LD_SIGNAL "SIGNAL"
+@ MSG_LD_TOXICPATH "TOXIC_PATH"
@ MSG_LD_TRACE_OBJS "TRACE_LOADED_OBJECTS"
@ MSG_LD_TRACE_OBJS_E "TRACE_LOADED_OBJECTS_E"
@ MSG_LD_TRACE_OBJS_A "TRACE_LOADED_OBJECTS_A"
diff --git a/usr/src/cmd/sgs/rtld/common/setup.c b/usr/src/cmd/sgs/rtld/common/setup.c
index 862bb7d61f..98e3ba5d33 100644
--- a/usr/src/cmd/sgs/rtld/common/setup.c
+++ b/usr/src/cmd/sgs/rtld/common/setup.c
@@ -28,7 +28,7 @@
* All Rights Reserved
*/
/*
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
/*
@@ -819,6 +819,14 @@ setup(char **envp, auxv_t *auxv, Word _flags, char *_platform, int _syspagsz,
return (0);
}
+ /*
+ * Initialize our toxic paths
+ */
+ if (rpl_ldtoxic != NULL) {
+ (void) expand_paths(mlmp, rpl_ldtoxic, &rpl_toxdirs,
+ AL_CNT_SEARCH, 0, PD_TKN_CAP);
+ }
+
#if defined(_ELF64)
/*
* If this is a 64-bit process, determine whether this process has
diff --git a/usr/src/cmd/sgs/rtld/common/util.c b/usr/src/cmd/sgs/rtld/common/util.c
index 046c8297a0..f10f7a4aed 100644
--- a/usr/src/cmd/sgs/rtld/common/util.c
+++ b/usr/src/cmd/sgs/rtld/common/util.c
@@ -24,6 +24,7 @@
* All Rights Reserved
*
* Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
/*
@@ -1428,6 +1429,7 @@ static u_longlong_t cmdisa = 0; /* command line (-e) ISA */
#define ENV_FLG_CAP_FILES 0x0080000000000ULL
#define ENV_FLG_DEFERRED 0x0100000000000ULL
#define ENV_FLG_NOENVIRON 0x0200000000000ULL
+#define ENV_FLG_TOXICPATH 0x0400000000000ULL
#define SEL_REPLACE 0x0001
#define SEL_PERMANT 0x0002
@@ -1601,8 +1603,7 @@ ld_generic_env(const char *s1, size_t len, const char *s2, Word *lmflags,
if ((len == MSG_LD_FLAGS_SIZE) && (strncmp(s1,
MSG_ORIG(MSG_LD_FLAGS), MSG_LD_FLAGS_SIZE) == 0)) {
select |= SEL_ACT_SPEC_1;
- str = (select & SEL_REPLACE) ? &rpl_ldflags :
- &prm_ldflags;
+ str = &rpl_ldflags;
variable = ENV_FLG_FLAGS;
}
}
@@ -1813,6 +1814,8 @@ ld_generic_env(const char *s1, size_t len, const char *s2, Word *lmflags,
* In case an auditor is called, which in turn might exec(2) a
* subprocess, this variable is disabled, so that any subprocess
* escapes ldd(1) processing.
+ *
+ * Also, look for LD_TOXIC_PATH
*/
else if (*s1 == 'T') {
if (((len == MSG_LD_TRACE_OBJS_SIZE) &&
@@ -1850,7 +1853,13 @@ ld_generic_env(const char *s1, size_t len, const char *s2, Word *lmflags,
select |= SEL_ACT_LML;
val = LML_FLG_TRC_SEARCH;
variable = ENV_FLG_TRACE_PTHS;
+ } else if ((len == MSG_LD_TOXICPATH_SIZE) && (strncmp(s1,
+ MSG_ORIG(MSG_LD_TOXICPATH), MSG_LD_TOXICPATH_SIZE) == 0)) {
+ select |= SEL_ACT_SPEC_1;
+ str = &rpl_ldtoxic;
+ variable = ENV_FLG_TOXICPATH;
}
+
}
/*
* LD_UNREF and LD_UNUSED (internal, used by ldd(1)).
@@ -1974,7 +1983,8 @@ ld_generic_env(const char *s1, size_t len, const char *s2, Word *lmflags,
*lmtflags &= ~val;
} else if (select & SEL_ACT_SPEC_1) {
/*
- * variable is either ENV_FLG_FLAGS or ENV_FLG_LIBPATH
+ * variable is either ENV_FLG_FLAGS, ENV_FLG_LIBPATH, or
+ * ENV_FLG_TOXICPATH
*/
if (env_flags & ENV_TYP_NULL)
*str = NULL;
diff --git a/usr/src/cmd/sgs/yacc/Makefile.targ b/usr/src/cmd/sgs/yacc/Makefile.targ
index 9cbf1c440f..59a89218d7 100644
--- a/usr/src/cmd/sgs/yacc/Makefile.targ
+++ b/usr/src/cmd/sgs/yacc/Makefile.targ
@@ -90,4 +90,3 @@ $(LINTLIB): $$(SRCS)
lintcheck: $$(SRCS)
$(LINT.c) $(LINTCHECKFLAGS) $(SRCS) $(LDLIBS)
-
diff --git a/usr/src/cmd/sgs/yacc/common/dextern.h b/usr/src/cmd/sgs/yacc/common/dextern.h
index e90aa60468..54b441cac4 100644
--- a/usr/src/cmd/sgs/yacc/common/dextern.h
+++ b/usr/src/cmd/sgs/yacc/common/dextern.h
@@ -26,11 +26,13 @@
/* Copyright (c) 1988 AT&T */
/* All Rights Reserved */
+/*
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ */
+
#ifndef _DEXTERN_H
#define _DEXTERN_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <stdio.h>
#include <inttypes.h>
#include <ctype.h>
@@ -42,6 +44,7 @@
#include <unistd.h>
#include <stdlib.h>
#include <wctype.h>
+#include <limits.h>
#ifdef __cplusplus
extern "C" {
@@ -301,6 +304,12 @@ extern int wscmp(const wchar_t *, const wchar_t *);
extern char *parser;
+#ifndef PBUFSIZE
+#define PBUFSIZE PATH_MAX
+#endif
+
+extern char pbuf[PBUFSIZE];
+
/* default settings for a number of macros */
/* name of yacc tempfiles */
@@ -324,7 +333,11 @@ extern char *parser;
#endif
#ifndef PARSER
-#define PARSER "/usr/share/lib/ccs/yaccpar"
+#define PARSER "/share/lib/ccs/yaccpar"
+#endif
+
+#ifndef PARSERPREFIX
+#define PARSERPREFIX "/usr"
#endif
/*
diff --git a/usr/src/cmd/sgs/yacc/common/y1.c b/usr/src/cmd/sgs/yacc/common/y1.c
index 845f82d367..0e67d9047b 100644
--- a/usr/src/cmd/sgs/yacc/common/y1.c
+++ b/usr/src/cmd/sgs/yacc/common/y1.c
@@ -26,7 +26,9 @@
/* Copyright (c) 1988 AT&T */
/* All Rights Reserved */
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ */
#include "dextern.h"
#include <sys/param.h>
@@ -34,6 +36,7 @@
#include <unistd.h>
#include <locale.h>
#include <stdarg.h> /* For error() */
+#include <libgen.h>
static void mktbls(void);
static void others(void);
@@ -236,6 +239,25 @@ mktbls()
lsetsize = INIT_LSIZE + 1;
}
+static int
+yacc_assemble_path(char *buf, size_t size, const char *file, int type)
+{
+ int ret;
+ char origin[PATH_MAX];
+
+ if (type != 0) {
+ ret = readlink("/proc/self/path/a.out", origin, PATH_MAX - 1);
+ if (ret < 0)
+ error(gettext(
+ "yacc: failed to read origin from /proc\n"));
+ origin[ret] = '\0';
+ return (snprintf(buf, size, "%s/../%s", dirname(origin),
+ file));
+ }
+
+ return (snprintf(buf, size, "%s/%s", PARSERPREFIX, file));
+}
+
/* put out other arrays, copy the parsers */
static void
others()
@@ -244,7 +266,17 @@ others()
int c, i, j;
int tmpline;
- finput = fopen(parser, "r");
+ if (parser == NULL) {
+ parser = pbuf;
+ (void) yacc_assemble_path(pbuf, PBUFSIZE, PARSER, 1);
+ finput = fopen(parser, "r");
+ if (finput == NULL) {
+ (void) yacc_assemble_path(pbuf, PBUFSIZE, PARSER, 0);
+ finput = fopen(parser, "r");
+ }
+ } else {
+ finput = fopen(parser, "r");
+ }
if (finput == NULL)
/*
* TRANSLATION_NOTE -- This is a message from yacc.
diff --git a/usr/src/cmd/sgs/yacc/common/y2.c b/usr/src/cmd/sgs/yacc/common/y2.c
index 3599d40904..ca8dcc61f5 100644
--- a/usr/src/cmd/sgs/yacc/common/y2.c
+++ b/usr/src/cmd/sgs/yacc/common/y2.c
@@ -26,7 +26,9 @@
/* Copyright (c) 1988 AT&T */
/* All Rights Reserved */
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ */
#include "dextern.h"
#include "sgs.h"
@@ -60,7 +62,8 @@ char *infile; /* input file name */
static int numbval; /* value of an input number */
static int toksize = NAMESIZE;
static wchar_t *tokname; /* input token name */
-char *parser = PARSER; /* location of common parser */
+char *parser = NULL; /* location of common parser */
+char pbuf[PBUFSIZE];
static void finact(void);
static wchar_t *cstash(wchar_t *);
diff --git a/usr/src/cmd/ssh/Makefile b/usr/src/cmd/ssh/Makefile
deleted file mode 100644
index a36a9fb762..0000000000
--- a/usr/src/cmd/ssh/Makefile
+++ /dev/null
@@ -1,91 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-
-#
-# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
-#
-
-include ../Makefile.cmd
-
-# libopenbsd-compat and libssh are used by all SSH binaries, and sftp-server is
-# also used as an internal part of sshd.
-SUBDIRS= \
- etc \
- libopenbsd-compat \
- libssh \
- sftp-server \
- .WAIT \
- ssh \
- sshd \
- scp \
- ssh-add \
- ssh-agent \
- ssh-keygen \
- ssh-keysign \
- ssh-keyscan \
- sftp \
- ssh-http-proxy-connect \
- ssh-socks5-proxy-connect
-
-MSGFILE=ssh.po
-POFILE=_messages.po
-
-CLOBBERFILES += $(MSGFILE) THIRDPARTYLICENSE
-
-.KEEP_STATE:
-
-all := TARGET= all
-clean := TARGET= clean
-clobber := TARGET= clobber
-delete := TARGET= delete
-install := TARGET= install
-lint := TARGET= lint
-catalog := TARGET= catalog
-package := TARGET= package
-_msg := TARGET= _msg
-$(POFILE) := TARGET= $(POFILE)
-
-all clean install lint $(POFILE): $(SUBDIRS)
-clobber: $(SUBDIRS) clobber_local
-clobber_local:
- $(RM) $(CLOBBERFILES)
-
-all install: THIRDPARTYLICENSE
-
-check: $(CHECKHDRS)
-
-# See Makefile.msg.targ for $(MSGFILE) update instructions
-_msg:
- $(RM) $(POFILE)
- $(TOUCH) $(POFILE)
- $(MAKE) $(POFILE) XGETTEXT=/usr/bin/gxgettext
- $(CP) $(POFILE) $(MSGFILE)
- $(CP) $(MSGFILE) $(MSGDOMAIN)
-
-$(SUBDIRS): FRC
- cd $@; pwd; $(MAKE) $(TARGET)
-
-# skip the summary; just include the actual license clauses.
-THIRDPARTYLICENSE: doc/LICENCE
- $(SED) -n '/1)/,$$p' doc/LICENCE > $@
-
-FRC:
diff --git a/usr/src/cmd/ssh/Makefile.ssh-common b/usr/src/cmd/ssh/Makefile.ssh-common
deleted file mode 100644
index 4bb45a2e6a..0000000000
--- a/usr/src/cmd/ssh/Makefile.ssh-common
+++ /dev/null
@@ -1,99 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
-#
-# Copyright 2012 Milan Jurik. All rights reserved.
-#
-# Common definitions for all of usr/src/cmd/ssh subdirs
-#
-
-
-TEXT_DOMAIN=SUNW_OST_OSCMD
-
-CFLAGS += $(CCVERBOSE)
-LDFLAGS += $(MAPFILE.NGB:%=-M%)
-
-SSH_VERSION=\"Sun_SSH_1.5\"
-
-C99MODE= $(C99_ENABLE)
-
-CPPFLAGS = -DSSH_VERSION=$(SSH_VERSION) \
- -I../include -I../../include \
- -D_FILE_OFFSET_BITS=64 \
- -erroff=E_STATEMENT_NOT_REACHED \
- $(CPPFLAGS.master)
-
-SSH_COMMON_LDLIBS = \
- -L../libssh/$(MACH) -lssh \
- -L../libopenbsd-compat/$(MACH) -lopenbsd-compat
-
-$(PROG): $(MAPFILE.NGB)
-
-#
-# Some the lint -erroff flags listed below are because of deficiencies in
-# lint not because we are hiding real errors or to avoid massive resync-
-# hindering changes that will not be returned to OpenSSH.
-#
-# OpenSSH has several instances of "do {...} while (0);" - a common
-# idiom in C macros, but it elicits E_CONSTANT_CONDITION from lint.
-#
-# The MD5 macros in <openssl/md5.h> trigger E_EXPR_NULL_EFFECT.
-#
-# Lots of function return values are ignored in OpenSSH.
-#
-# Lots of globals could be static, sometimes due to portable code paths
-# which are dead on Solaris.
-#
-LINTFLAGS += \
- -erroff=E_FUNC_ARG_UNUSED \
- -erroff=E_NAME_USED_NOT_DEF2 \
- -erroff=E_FUNC_DECL_VAR_ARG2 \
- -erroff=E_INCONS_VAL_TYPE_DECL2 \
- -erroff=E_INCONS_ARG_DECL2 \
- -erroff=E_STATIC_UNUSED \
- -erroff=E_STATEMENT_NOT_REACHED \
- -erroff=E_FUNC_RET_ALWAYS_IGNOR2 \
- -erroff=E_FUNC_RET_MAYBE_IGNORED2 \
- -erroff=E_GLOBAL_COULD_BE_STATIC2 \
- -erroff=E_CONSTANT_CONDITION \
- -erroff=E_EXPR_NULL_EFFECT \
- -erroff=E_STMT_NOT_REACHED \
- -errtags=yes
-
-CERRWARN += -_gcc=-Wno-parentheses
-CERRWARN += -_gcc=-Wno-uninitialized
-CERRWARN += -_gcc=-Wno-unused-function
-CERRWARN += -_gcc=-Wno-unused-variable
-
-ROOTLIBSSH= $(ROOTLIB)/ssh
-ROOTLIBSSHPROG= $(PROG:%=$(ROOTLIBSSH)/%)
-
-ROOTLIBSUNSSH = $(ROOTLIB)/sunssh
-ROOTLIBSUNSSHPROG = $(PROG:%=$(ROOTLIBSUNSSH)/%)
-
-POFILE= _messages.po
-
-$(ROOTLIBSSH)/%: $(ROOTLIBSUNSSH)/%
- -$(RM) $@; $(SYMLINK) ../sunssh/$(@F) $@
-
-$(ROOTLIBSUNSSH)/%: %
- $(INS.file)
diff --git a/usr/src/cmd/ssh/README.altprivsep b/usr/src/cmd/ssh/README.altprivsep
deleted file mode 100644
index f91dbbd2a1..0000000000
--- a/usr/src/cmd/ssh/README.altprivsep
+++ /dev/null
@@ -1,687 +0,0 @@
- Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- Use is subject to license terms.
-
- Sun's Alternative "Privilege Separation" for OpenSSH
-
-
-Table of Contents
-
-1. Introduction
-2. What is "Privilege?"
-3. Analysis of the SSH Protocols
-3.1. Privileged Resources, Operations, in the SSH Protocols
-4. OpenSSH's Privilege Separation
-5. SUNWssh's Alternative Privilege Separation
-6. Comparison of the OpenSSH and SUNWssh PrivSep Models
-7. Future Directions
-8. Guide to the AltPrivSep Source Code
-A. References
-
-
-
-
-
-1. Introduction
-
- Implementations of SSH servers require some degree of privilege in
- order to function properly. Often such implementations retain such
- privilege throughout normal operation even while users are logged
- in. This means that vulnerabilities in the implementation of the
- protocols can be exploited in such ways as to escalate the privilege
- that would normally be accorded to mer-mortal users.
-
- The OpenSSH team introduced support for "privilege separation" in
- the OpenSSH ssh server some years ago to minimize the extent of
- extant, undiscovered vulnerabilities in the OpenSSH server source
- code. The basic concept is to have a multi-process server
- implementation where one process, the "monitor" is privileged and
- implements a smaller protocol than the ssh protocols, and thus is,
- hopefully, less likely to sport exploitable security bugs.
-
- The ssh team at Sun agrees with the basic OpenSSH privilege
- separation concept, but disagrees with its design.
-
- Here we present our alternative to the OpenSSH design. We begin
- with the question of just what is "privilege" and follow on with an
- analysis of the SSH protocols vis-a-vis privilege. Then we briefly
- describe the OpenSSH model, followed by an exposition of our
- alternative model.
-
-
-2. What is "Privilege?"
-
- Privilege, in a traditional Unix sense, is that which the "root"
- user can do that other users cannot directly do. In Solaris 10
- there is a new approach to this sort of privilege with the aim of
- running much of the operating system with the Least Privilege
- required; root's privilege is broken down into many privileges and
- these are managed through privilege sets. We won't go into the
- details of Solaris 10's Least Privilege facility here.
-
- But privilege is also access to data and resources that can be used
- to escalate the privilege of those who have access to them. For
- example: secret, or private cryptographic keys used in
- authentication. Network security typically requires the use of
- cryptographic keys for authentication.
-
-
-3. Analysis of the SSH Protocols
-
- There are two or, rather three SSH protocols:
-
- - version 1
- - version 1.5
- - version 2
-
- Version 1 and 1.5 are much the same, from our point of view; version
- 2 is significantly different from the other two.
-
- Familiarity by the reader with the specifications for these
- protocols is not assumed, but would be beneficial to the reader.
-
- Quite roughly, these protocols consist of the following:
-
- a) initial version exchange (for protocol version negotiation)
- b) a binary encoding of message data
- c) message syntaxes for the protocols' messages
- d) specifications on use of cryptography for transport
- privacy (encryption) and integrity protection
- e) a key exchange protocol (which also authenticates servers to
- clients)
- f) a protocol for user authentication
- g) a session protocol
- h) a re-keying protocol (v2-only)
-
- Some of these parts of the ssh protocols are quite complex, some
- quite straightforward. Altogether implementation of the ssh
- protocols requires a source code base of significant size.
-
- The OpenSSH implementation relies on OpenSSL for cryptographic
- service, on libz for compression service and miscellaneous other
- libraries. Besides these OpenSSH consists of several tens of
- thousands of lines of source code in C.
-
- SUNWssh is based on OpenSSH, so it is comparable in size and
- complexity to OpenSSH.
-
- There is, then, plenty of space for security bugs in the OpenSSH,
- and, therefore, also in the SUNWssh source code bases.
-
- The OpenSSH team designed and implemented a "privilege separation"
- feature in their ssh server to reduce the risk that a security bug
- in OpenSSH could be successfully exploited and an attacker's
- privilege escalated.
-
-
-3.1. Privileged Resources, Operations, in the SSH Protocols
-
- What privileges does an SSH server need then?
-
- Observation with Solaris 10's ppriv(1) and truss(1) commands as well
- as analysis of the ssh protocols leads to conclude as follows.
-
- No privilege or privileged resources are needed to implement the
- parts (a)-(d) mentioned in section 3.
-
-
- For key exchange and server authentication (e) an ssh server requires:
-
- - Access to the host's ssh private keys.
-
- - Access to the host's GSS-API acceptor credentials. [SSHv2-only]
-
-
- An ssh server requires practically all privileges for user
- authentication (f) (at least PAM does), particularly
- PRIV_PROC_SETID, for logging the user in.
-
-
- Post-authentication an ssh server requires the following privileges:
-
- - Those required for auditing a user's subsequent logout.
-
- That is, PRIV_PROC_AUDIT.
-
-
- - Those required for record keeping (i.e., utmpx/wtmpx logging).
-
- That is, either open file descriptor for those files or
- PRIV_FILE_DAC_WRITE or otherwise access to those files, perhaps
- through a special user id or group id which would be granted
- write access through the ACLs on those files.
-
- Since SSHv2 allows clients to open many channels with
- pseudo-terminals a server may need to open and close utmpx/wtmpx
- records multiple times in the lifetime of an SSHv2 connection.
-
-
- - Those required for accessing the host's ssh private keys for
- SSHv2 re-keying. [SSHv2-only]
-
- These keys can be (and are) loaded at server startup time,
- requiring PRIV_FILE_DAC_READ, or access through file ACLs, at
- that time, but not thence.
-
-
- - Those required for accessing the host's GSS-API acceptor
- credentials for SSHv2 re-keying.
-
- These credentials may require a large set of privileges. The
- Solaris 10 Kerberos V GSS-API mechanism, for example, requires
- PRIV_FILE_DAC_READ (for access to the system keytab) and
- PRIV_FILE_DAC_WRITE (for access to the Kerberos V replay cache).
-
-
- It is worth pointing out that because of a wrinkle in the
- specification of the SSHv2 protocol and various implementations,
- access to a host's ssh private keys can allow one not only to
- impersonate the host as a server (which is, in practice, difficult),
- but also to impersonate the host as a client (which is quite easy to
- do) using "hostbased" user authentication.
-
- It is entirely possible to have one-process server implementation
- that drops most privileges and access to privileged resources after
- user authentication succeeds. Such an implementation would make
- some privileges, such as PRIV_PROC_SETID, available to any attacker
- that successfully exploited a security bug in the ssh server.
-
- But such an implementation would also have to retain access to
- resources needed for authenticating the server, which, as described
- above, can be used to impersonate the server, in some cases with
- ease.
-
-
-4. OpenSSH's Privilege Separation
-
- The OpenSSH privilege separation model is quite complex.
-
- It consists of a monitor, which retains all privileges and access to
- privileged resources, and two processes which run with much less
- privilege: one process running as a special user, "sshd," for
- hosting all phases of the SSH protocols up to and including
- authentication, and one process running as the actual user that logs
- in and which hosts all phases of the SSH protocols post-user-
- authentication.
-
- The monitor and its companion processes speak a private protocol
- over IPC. This protocol is intended to be smaller and simpler than
- the SSH wire protocols.
-
- In practice the OpenSSH monitor protocols relating to user
- authentication are neither smaller nor simpler than the SSH user
- authentication protocols; and though they are different they also
- transport much the same data, including RSA/DSA signatures,
- usernames, PAM conversations, and GSS-API context and MIC tokens.
-
- The key exchange protocols have been broken down into their
- essentials and the monitor serves only services such as signing
- server replies with private host keys.
-
- Note also that the OpenSSH monitor protocol uses the same encodings
- as the SSH protocols and uses the same implementation of those
- encodings.
-
-
-5. SUNWssh's Alternative Privilege Separation
-
- The Sun Microsystems ssh team believes that the OpenSSH team has
- reached the point of diminishing returns in attempting to separate
- processing of the user authentication protocols and that the OpenSSH
- approach to privilege separation of the key exchange protocols has
- led to a situation in which the monitor acts as an oracle, willing
- to sign anything provided by the unprivileged processes that talk to
- it.
-
- The Sun ssh team proposes a somewhat different privilege separation
- implementation that shares with the OpenSSH model the goal of
- minimizing and simplifying the protocol spoken by the monitor, but
- little source code.
-
- We eschew any temptation to apply the privilege separation concept
- to the version negotiation, initial key exchange and user
- authentication phases of the ssh protocols (but see section 7).
-
- Instead we focus on separating processing of auditing, record
- keeping and re-keying from processing of the session protocols. We
- also wish to avoid creating any oracles in the monitor.
-
- This approach allows us to have a very simple monitor protocol. Our
- monitor protocol consists of the following operations:
-
- - record a new pseudo-terminal session
- - record the end of a pseudo-terminal session
- - process a re-key protocol messages
- - get keys negotiated during re-keying to the session process to it
- can use them
-
- Logout auditing is done when the session process dies and so does
- not require a monitor protocol message.
-
- By processing all re-key protocol messages in the monitor we prevent
- the creation of oracles in the monitor. This is so because the
- monitor signs only material which it has generated and over which an
- attacker would have little influence (through the attackers offered
- DH public key, for example).
-
- Odds and ends:
-
- - If the monitor receives SIGHUP, SIGTERM or SIGINT it will call
- fatal_cleanup(), and thence will forcibly shutdown(3SOCKET) the
- ssh connection socket, causing its child to exit, and audit a
- logout.
-
- - The monitor does not attempt to update utmpx/wtmpx independently
- of its child -- it depends on the child asking it to.
-
- - The child now is unable to chown() ptys back to root. That's Ok,
- other services on Solaris do the same and everything still works
- because of grantpt(3C).
-
- - The sshd server process (the one that will become a monitor)
- forks a child process before the key exchange starts. The reason
- for it is that if we forked after that we would end up using
- PKCS#11 sessions initialized in the monitor unless
- UseOpenSSLEngine was explicitly set to 'no'. Using any existing
- PKCS#11 sessions or object handles over fork is what the PKCS#11
- standard explicitly prohibits. To solve that, we would have to
- rekey before fork and then newly initialize the engine in the
- child, together with the new crypto contexts initialized with the
- keys produced by the key re-exchange. And, that wouldn't help in
- situations where the client does not support rekeying which also
- includes the whole protocol version 1. The pre-fork solution is
- simpler and also much faster. So, the key exchange and
- authentication is fully done in the child server process while
- the monitor waits aside to read the authentication context that
- is needed for further operation. The child drops privileges after
- the authentication finishes.
-
- With the ssh client, the situation is slightly more complicated.
- Given the fact that the user can request to go to the background
- during the connection using the ~& sequence we must be prepared
- to rekey before forking, to reinitialize the engine in the child
- after that, and then set the new crypto contexts with the new
- keys. If the server we are communicating with does not support
- rekeying we will not use the engine at all. We expect this
- situation to be extremely rare and will not offer any workaround
- for that. This also includes the protocol version 1. However,
- this version is already considered obsolete and should not be used
- if possible.
-
-6. Comparison of the OpenSSH and SUNWssh PrivSep Models
-
- The OpenSSH server involves three processes which we will term
- "pre-session," "session" and "monitor."
-
- The OpenSSH pre-session process implements:
-
- - the ssh version string exchange
- - the ssh message encoding/decoding
- - most of the initial key exchange protocols
- - transport protection
- - part of the user authentication protocols
-
- The OpenSSH session process implements:
-
- - the ssh message encoding/decoding
- - transport protection
- - most of the re-keying protocols
- - the session protocols
-
- The OpenSSH monitor process implements:
-
- - the ssh message encoding/decoding
- - parts of the key exchange and re-key protocols (primarily signing
- of server replies with host private keys)
- - most of the user authentication protocols, specifically:
-
- - evaluation of ~/.ssh/authorized_keys (for pubkey userauth)
- - evaluation of known hosts files (for hostbased userauth)
- - evaluation of .shosts/.rhosts files (for hostbased userauth)
- - verification of signatures w/ public keys (pubkey, hostbased)
- - PAM API calls, conversation function
- - GSS-API calls
-
- Note that any vulnerabilities in the parsing of authorized_keys,
- known hosts and .shosts/rhosts files are as exploitable in the
- monitor as in a server w/o privilege separation.
-
- Similarly for any vulnerabilities in PAM modules and GSS-API
- mechanisms.
-
- The SUNWssh server involves two processes which we will term
- "session" and "monitor."
-
- The SUNWssh monitor process implements:
-
- - the ssh version string exchange
- - the ssh message encoding/decoding
- - transport protection
- - all of the key exchange and re-key protocols
- - all of the user authentication protocols
-
- The SUNWssh session process implements:
-
- - the ssh message encoding/decoding
- - transport protection
- - the session protocols
-
- Obviously all of these processes also implement their side of the
- monitor protocols.
-
- The OpenSSH 3.5p1 monitor protocol, on Solaris, has approximately 20
- monitor request and corresponding response messages.
-
- The SUNWssh monitor protocol has 5 monitor request and response
- messages; additionally, the monitor processes standard re-key
- messages (but note: the monitor and the session process IPC is
- completely unencrypted), which amounts to about 14 more messages
- altogether.
-
- Much of the OpenSSH monitor protocol is a variation of the
- on-the-wire ssh protocols, with some contents re-packaging. We
- believe this does not afford the monitor much additional, if any
- protection from attacks in the key exchange and user authentication
- protocols.
-
- The re-packaging that is done in the OpenSSH monitor protocol is
- risky business. By separating the act of signing some blob of data
- from computing that blob of data one can create an oracle; this is
- exactly what happened in the OpenSSH case.
-
- As you can see in the next section, the SUNWssh privilege separation
- could evolve somewhat in the OpenSSH direction by saving the monitor
- all transport protection work, but we cannot save the monitor much,
- if any work relating to authentication or key exchange.
-
-
-7. Future Directions
-
- The SUNWssh server privilege separation implementation could stand
- several improvements.
-
- The first improvement would be to have a single system-wide monitor.
- This would reduce resource consumption. The work needed to
- implement such an enhancement is very similar to the work needed to
- produce an SSH API and library, and it is not trivial. If this is
- not done then at least dropping PRIV_PROC_SETID and instead setting
- the saved-set-user-id in the monitor to that of the logged in user
- would be nice.
-
- The second enhancement would be to add a "none" host key algorithm
- to SSHv2 and a corresponding option in SUNWssh to disallow re-keying
- with any other host key algorithm. This would allow customers to
- configure their server and monitor so that no re-key protocol
- messages need be processed by the monitor.
-
- A third enhancement would be to enhance the GSS-API mechanisms to
- require fewer privileges. In practice this means overhauling the
- Kerberos V mechanism's replay cache. This would allow the monitor
- to run with fewer privileges.
-
- Further, even without improving the Kerberos V mechanism's replay
- cache it should be possible to drop at least PRIV_PROC_FORK/EXEC/
- SESSION.
-
- A fourth enhancement would to have the unprivileged process handle
- all transport protection and proxy to the monitor all key exchange
- and user authentication protocol messages. This is a variation on
- the OpenSSH model, but without the re-packaging of ssh message
- contents seen there. After authentication succeeds the monitor
- could either change the unprivileged process' credentials (as can be
- done with ppriv(1) or the unprivileged process would, as in OpenSSH,
- pass the session keys/IVs/keystate to the monitor which would then
- pass them to a new process, the session process, that would then run
- as the logged in user.
-
-
-8. Guide to the AltPrivSep Source Code
-
-
- First, a brief introduction to the SUNWssh/OpenSSH source code.
-
- The source code is organized as follows:
-
- $SRC/cmd/ssh/etc/
- |
- +-> config files
-
- $SRC/cmd/ssh/include/
- |
- +-> header files (note: none are installed/shipped)
-
- $SRC/cmd/ssh/libopenbsd-compat/common/
- |
- +-> misc. portability source code
-
- $SRC/cmd/ssh/libssh/common/
- |
- +-> implementation of encoding, transport protection,
- various wrappers around cryptography, the key exchange
- and host authentication protocols, the session
- protocols, and misc. other code
-
- cipher.c
- mac.c
- compress.c
- packet.c
- |
- +-> transport protocol
-
- buffer.c
- bufaux.c
- |
- +-> encoding
-
- channels.c
- nchan.c
- |
- +-> session protocol
-
- kex.c
- kexdh.c
- kexgex.c
- |
- +-> key exchange/re-key code common to ssh and sshd
-
- kexdhs.c
- kexgexs.c
- kexgsss.c
- |
- +-> key exchange/re-key code (server only)
-
- kexdhc.c
- kexgexc.c
- kexgssc.c
- |
- +-> key exchange/re-key code (client only)
-
- dh.c
- rsa.c
- mpaux.c
- ssh-rsa.c
- ssh-dss.c
- ssh-gss.c
- |
- +-> crypto wrappers/utilities
-
- log.c
- |
- +-> logging, including debug logging, on stderr or
- syslog
-
-
- $SRC/cmd/ssh/ssh/
- |
- +-> ssh(1)
-
- $SRC/cmd/ssh/sshd/
- |
- +-> sshd(1M), including auditing, implementation of user
- authentication and the OpenSSH and SUNWssh monitors
-
- sshd.c
- |
- +-> main()
-
- auth*.c
- |
- +-> user authentication
-
- serverloop.c
- session.c
- |
- +-> session protocols
-
- bsmaudit.[ch]
- sshlogin.c
- loginrec.c
- |
- +-> auditing and record-keeping
-
- $SRC/cmd/ssh/<misc commands>/
- |
- +-> scp, sftp, sftp-server, ssh-agent, ssh-add, ...
-
-
- The SUNWssh altprivsep adds two new source files:
-
- $SRC/cmd/ssh/include/altprivsep.h
- $SRC/cmd/ssh/sshd/altprivsep.c
- |
- +-> monitor start routine, altprivsep_packet_*() routines
- for communication with the monitor, routines to help
- with key exchanges, service procedures for the monitor,
- etc...
-
- and modifies the following:
-
- $SRC/cmd/ssh/include/config.h
- |
- +> adds cpp define "ALTPRIVSEP"
-
- $SRC/cmd/ssh/include/ssh2.h
- |
- +-> adds private message type "SSH2_PRIV_MSG_ALTPRIVSEP" (254)
-
- $SRC/cmd/ssh/include/packet.h
- |
- +-> adds prototypes for several simple utility functions,
- some of which are specifically meant to avoid having to
- link altprivsep.c into ssh(1)
-
- $SRC/cmd/ssh/libssh/common/kex.c
- $SRC/cmd/ssh/libssh/common/packet.c
- |
- +-> implements the hooks needed to proxy re-key messages
- to/from the monitor
-
- $SRC/cmd/ssh/sshd/Makefile
- |
- +-> adds altprivsep.o to list of objects linked into sshd(1M)
-
- $SRC/cmd/ssh/sshd/serverloop.c
- |
- +-> adds an event loop for the monitor
- modifies the usual event loops for SSHv2
-
- $SRC/cmd/ssh/sshd/session.c
- |
- +-> modifies do_login() and session_pty_cleanup2() to call
- altprivsep_record_login/logout() instead of
- record_login/logout().
-
- modifies do_exec_pty() so that the server waits for the
- call to altprivsep_record_login() in child process to
- complete before returning so that the server and the
- child processes do not compete for monitor IPC I/O.
-
- $SRC/cmd/ssh/include/log.h
- $SRC/cmd/ssh/libssh/common/log.c
- |
- +-> adds an internal interface, set_log_txt_prefix() so that
- the monitor's debug and log messages get prefixed with a
- string ("monitor ") that indicates they are from the
- monitor
-
- $SRC/cmd/ssh/sshd/sshd.c
- |
- +-> modifies the body of code that follows the user
- authentication phase of the ssh protocols so as to start
- the monitor and move the relevant code into the monitor
- or session processes as appropriate while dropping
- privileges and access to privileged resources in the
- session process
-
- The monitor uses the packet.h interfaces to communicate with the
- session process as though it were its ssh client peer, but always
- uses the "none" cipher, mac and compression algorithms and installs
- even handlers only for the relevant key exchange messages and the
- private monitor message used for the other monitor services.
-
- The monitor serves the following services:
-
- - APS_MSG_NEWKEYS_REQ -> used to obtain keys/IVs after re-keys
- - APS_MSG_RECORD_LOGIN -> used to update utmpx/wtmpx
- - APS_MSG_RECORD_LOGOUT -> used to update utmpx/wtmpx
-
- The session and monitor processes communicate over a pipe.
-
- All monitor IPC I/O from the session process is blocking (though the
- pipe is set to non-blocking I/O). The monitor protocol is entirely
- synchronous and relies on the re-key protocols being entirely
- synchronous also (which they are, unlike the session protocols).
-
- The kex.c and packet.c files are minimally modified, primarily to
- prevent the monitor from handling SSH_MSG_NEWKEYS messages as a
- normal ssh server should, instead letting the session process
- process SSH_MSG_NEWKEYS messages by requesting the new keys
- negotiated with client from the monitor.
-
- Note that for SSHv1 no on-the-wire messages are processed by the
- monitor after authentication. In fact, the monitor thinks it's
- running SSHv2, even if the on-the-wire protocol is v1.
-
-
-A. References
-
- The IETF SECSH Working Group:
-
- http://www.ietf.org/html.charters/secsh-charter.html
-
- The SSHv2 architecture, assigned numbers:
-
- http://www.ietf.org/internet-drafts/draft-ietf-secsh-architecture-16.txt
- http://www.ietf.org/internet-drafts/draft-ietf-secsh-assignednumbers-06.txt
-
- New cipher modes for SSHv2:
-
- http://www.ietf.org/internet-drafts/draft-ietf-secsh-newmodes-02.txt
-
- The SSHv2 "transport," including initial key exchange and re-key
- protocols, but excluding negotiable DH group size and GSS-API-based
- key exchange:
-
- http://www.ietf.org/internet-drafts/draft-ietf-secsh-transport-18.txt
-
- Additional key exchange protocols for SSHv2:
-
- http://www.ietf.org/internet-drafts/draft-ietf-secsh-gsskeyex-08.txt
- http://www.ietf.org/internet-drafts/draft-ietf-secsh-dh-group-exchange-04.txt
-
- Base user authentication spec for SSHv2 (includes none, password,
- pubkey and hostbased user authentication):
-
- http://www.ietf.org/internet-drafts/draft-ietf-secsh-userauth-21.txt
-
- SSHv2 user authentication using PAM-style prompting:
-
- http://www.ietf.org/internet-drafts/draft-ietf-secsh-auth-kbdinteract-06.txt
-
- SSHv2 user authentication using the GSS-API:
-
- http://www.ietf.org/internet-drafts/draft-ietf-secsh-gsskeyex-08.txt
-
- SSHv2 "session" protocol (i.e., the protocol used for pty sessions,
- port forwarding, agent forwarding, X display forwarding, etc...):
-
- http://www.ietf.org/internet-drafts/draft-ietf-secsh-connect-19.txt
diff --git a/usr/src/cmd/ssh/THIRDPARTYLICENSE.descrip b/usr/src/cmd/ssh/THIRDPARTYLICENSE.descrip
deleted file mode 100644
index 7e936fffc7..0000000000
--- a/usr/src/cmd/ssh/THIRDPARTYLICENSE.descrip
+++ /dev/null
@@ -1 +0,0 @@
-OPENSSH SOFTWARE
diff --git a/usr/src/cmd/ssh/doc/COPYING.Ylonen b/usr/src/cmd/ssh/doc/COPYING.Ylonen
deleted file mode 100644
index ad17df17a0..0000000000
--- a/usr/src/cmd/ssh/doc/COPYING.Ylonen
+++ /dev/null
@@ -1,70 +0,0 @@
-This file is part of the ssh software, Copyright (c) 1995 Tatu Ylonen, Finland
-
-
-COPYING POLICY AND OTHER LEGAL ISSUES
-
-As far as I am concerned, the code I have written for this software
-can be used freely for any purpose. Any derived versions of this
-software must be clearly marked as such, and if the derived work is
-incompatible with the protocol description in the RFC file, it must be
-called by a name other than "ssh" or "Secure Shell".
-
-However, I am not implying to give any licenses to any patents or
-copyrights held by third parties, and the software includes parts that
-are not under my direct control. As far as I know, all included
-source code is used in accordance with the relevant license agreements
-and can be used freely for any purpose (the GNU license being the most
-restrictive); see below for details.
-
-[ RSA is no longer included. ]
-[ IDEA is no longer included. ]
-[ DES is now external. ]
-[ GMP is now external. No more GNU licence. ]
-[ Zlib is now external. ]
-[ The make-ssh-known-hosts script is no longer included. ]
-[ TSS has been removed. ]
-[ MD5 is now external. ]
-[ RC4 support has been removed (RC4 is used internally for arc4random). ]
-[ Blowfish is now external. ]
-
-The 32-bit CRC implementation in crc32.c is due to Gary S. Brown.
-Comments in the file indicate it may be used for any purpose without
-restrictions.
-
-The 32-bit CRC compensation attack detector in deattack.c was
-contributed by CORE SDI S.A. under a BSD-style license. See
-http://www.core-sdi.com/english/ssh/ for details.
-
-Note that any information and cryptographic algorithms used in this
-software are publicly available on the Internet and at any major
-bookstore, scientific library, and patent office worldwide. More
-information can be found e.g. at "http://www.cs.hut.fi/crypto".
-
-The legal status of this program is some combination of all these
-permissions and restrictions. Use only at your own responsibility.
-You will be responsible for any legal consequences yourself; I am not
-making any claims whether possessing or using this is legal or not in
-your country, and I am not taking any responsibility on your behalf.
-
-
- NO WARRANTY
-
-BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
-FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
-OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
-PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
-OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
-TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
-PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
-REPAIR OR CORRECTION.
-
-IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
-REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
-INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
-OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
-TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
-YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
-PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGES.
diff --git a/usr/src/cmd/ssh/doc/CREDITS b/usr/src/cmd/ssh/doc/CREDITS
deleted file mode 100644
index 8831cdd5ac..0000000000
--- a/usr/src/cmd/ssh/doc/CREDITS
+++ /dev/null
@@ -1,87 +0,0 @@
-Tatu Ylonen <ylo@cs.hut.fi> - Creator of SSH
-
-Aaron Campbell, Bob Beck, Markus Friedl, Niels Provos,
-Theo de Raadt, and Dug Song - Creators of OpenSSH
-
-Alain St-Denis <Alain.St-Denis@ec.gc.ca> - Irix fix
-Alexandre Oliva <oliva@lsd.ic.unicamp.br> - AIX fixes
-Andre Lucas <andre.lucas@dial.pipex.com> - new login code, many fixes
-Andreas Steinmetz <ast@domdv.de> - Shadow password expiry support
-Andrew McGill <andrewm@datrix.co.za> - SCO fixes
-Andrew Stribblehill <a.d.stribblehill@durham.ac.uk> - Bugfixes
-Andy Sloane <andy@guildsoftware.com> - bugfixes
-Aran Cox <acox@cv.telegroup.com> - SCO bugfixes
-Arkadiusz Miskiewicz <misiek@pld.org.pl> - IPv6 compat fixes
-Ben Lindstrom <mouring@pconline.com> - NeXT support
-Ben Taylor <bent@clark.net> - Solaris debugging and fixes
-Bratislav ILICH <bilic@zepter.ru> - Configure fix
-Charles Levert <charles@comm.polymtl.ca> - SunOS 4 & bug fixes
-Chip Salzenberg <chip@valinux.com> - Assorted patches
-Chris Adams <cmadams@hiwaay.net> - OSF SIA support
-Chris Saia <csaia@wtower.com> - SuSE packaging
-Chris, the Young One <cky@pobox.com> - Password auth fixes
-Christos Zoulas <christos@zoulas.com> - Autoconf fixes
-Chun-Chung Chen <cjj@u.washington.edu> - RPM fixes
-Corinna Vinschen <vinschen@cygnus.com> - Cygwin support
-Dan Brosemer <odin@linuxfreak.com> - Autoconf support, build fixes
-Darren Hall <dhall@virage.org> - AIX patches
-David Agraz <dagraz@jahoopa.com> - Build fixes
-David Del Piero <David.DelPiero@qed.qld.gov.au> - bug fixes
-David Hesprich <darkgrue@gue-tech.org> - Configure fixes
-David Rankin <drankin@bohemians.lexington.ky.us> - libwrap, AIX, NetBSD fixes
-Ed Eden <ede370@stl.rural.usda.gov> - configure fixes
-Garrick James <garrick@james.net> - configure fixes
-Gary E. Miller <gem@rellim.com> - SCO support
-Ged Lodder <lodder@yacc.com.au> - HPUX fixes and enhancements
-Gert Doering <gd@hilb1.medat.de> - bug and portability fixes
-HARUYAMA Seigo <haruyama@nt.phys.s.u-tokyo.ac.jp> - Translations & doc fixes
-Hideaki YOSHIFUJI <yoshfuji@ecei.tohoku.ac.jp> - IPv6 and bug fixes
-Hiroshi Takekawa <takekawa@sr3.t.u-tokyo.ac.jp> - Configure fixes
-Holger Trapp <Holger.Trapp@Informatik.TU-Chemnitz.DE> - KRB4/AFS config patch
-IWAMURO Motonori <iwa@mmp.fujitsu.co.jp> - bugfixes
-Jani Hakala <jahakala@cc.jyu.fi> - Patches
-Jarno Huuskonen <jhuuskon@hytti.uku.fi> - Bugfixes
-Jim Knoble <jmknoble@pobox.com> - Many patches
-Jonchen (email unknown) - the original author of PAM support of SSH
-Juergen Keil <jk@tools.de> - scp bugfixing
-KAMAHARA Junzo <kamahara@cc.kshosen.ac.jp> - Configure fixes
-Kees Cook <cook@cpoint.net> - scp fixes
-Kenji Miyake <kenji@miyake.org> - Configure fixes
-Kevin O'Connor <kevin_oconnor@standardandpoors.com> - RSAless operation
-Kevin Steves <stevesk@sweden.hp.com> - HP support, bugfixes, improvements
-Kiyokazu SUTO <suto@ks-and-ks.ne.jp> - Bugfixes
-Larry Jones <larry.jones@sdrc.com> - Bugfixes
-Lutz Jaenicke <Lutz.Jaenicke@aet.TU-Cottbus.DE> - Bugfixes
-Marc G. Fournier <marc.fournier@acadiau.ca> - Solaris patches
-Martin Johansson <fatbob@acc.umu.se> - Linux fixes
-Mark Miller <markm@swoon.net> - Bugfixes
-Matt Richards <v2matt@btv.ibm.com> - AIX patches
-Michael Stone <mstone@cs.loyola.edu> - Irix enhancements
-Nakaji Hiroyuki <nakaji@tutrp.tut.ac.jp> - Sony News-OS patch
-Nalin Dahyabhai <nalin.dahyabhai@pobox.com> - PAM environment patch
-Nate Itkin <nitkin@europa.com> - SunOS 4.1.x fixes
-Niels Kristian Bech Jensen <nkbj@image.dk> - Assorted patches
-Pavel Kankovsky <peak@argo.troja.mff.cuni.cz> - Security fixes
-Pavel Troller <patrol@omni.sinus.cz> - Bugfixes
-Pekka Savola <pekkas@netcore.fi> - Bugfixes
-Peter Kocks <peter.kocks@baygate.com> - Makefile fixes
-Phil Hands <phil@hands.com> - Debian scripts, assorted patches
-Phil Karn <karn@ka9q.ampr.org> - Autoconf fixes
-Philippe WILLEM <Philippe.WILLEM@urssaf.fr> - Bugfixes
-Phill Camp <P.S.S.Camp@ukc.ac.uk> - login code fix
-Rip Loomis <loomisg@cist.saic.com> - Solaris package support, fixes
-SAKAI Kiyotaka <ksakai@kso.netwk.ntt-at.co.jp> - Multiple bugfixes
-Simon Wilkinson <sxw@dcs.ed.ac.uk> - PAM fixes
-Svante Signell <svante.signell@telia.com> - Bugfixes
-Thomas Neumann <tom@smart.ruhr.de> - Shadow passwords
-Tim Rice <tim@multitalents.net> - Portability & SCO fixes
-Tobias Oetiker <oetiker@ee.ethz.ch> - Bugfixes
-Tom Bertelson's <tbert@abac.com> - AIX auth fixes
-Tor-Ake Fransson <torake@hotmail.com> - AIX support
-Tudor Bosman <tudorb@jm.nu> - MD5 password support
-Udo Schweigert <ust@cert.siemens.de> - ReliantUNIX support
-Zack Weinberg <zack@wolery.cumb.org> - GNOME askpass enhancement
-
-Apologies to anyone I have missed.
-
-Damien Miller <djm@mindrot.org>
diff --git a/usr/src/cmd/ssh/doc/ChangeLog b/usr/src/cmd/ssh/doc/ChangeLog
deleted file mode 100644
index 7333c81a3e..0000000000
--- a/usr/src/cmd/ssh/doc/ChangeLog
+++ /dev/null
@@ -1,2590 +0,0 @@
-20001106
- - (djm) Use Jim's new 1.0.3 askpass in Redhat RPMs
- - (djm) Manually fix up missed diff hunks (mainly RCS idents)
- - (djm) Remove UPGRADING document in favour of a link to the better
- maintained FAQ on www.openssh.com
- - (djm) Fix multiple dependancy on gnome-libs from Pekka Savola
- <pekkas@netcore.fi>
- - (djm) Don't need X11-askpass in RPM spec file if building without it
- from Pekka Savola <pekkas@netcore.fi>
- - (djm) Release 2.3.0p1
-
-20001105
- - (bal) Sync with OpenBSD:
- - markus@cvs.openbsd.org 2000/10/31 9:31:58
- [compat.c]
- handle all old openssh versions
- - markus@cvs.openbsd.org 2000/10/31 13:1853
- [deattack.c]
- so that large packets do not wrap "n"; from netbsd
- - (bal) rijndel.c - fix up RCSID to match OpenBSD tree
- - (bal) auth2-skey.c - Checked in. Missing from portable tree.
- - (bal) Reworked NEWS-OS and NeXT ports to extract waitpid() and
- setsid() into more common files
- - (stevesk) pty.c: use __hpux to identify HP-UX.
- - (bal) Missed auth-skey.o in Makefile.in and minor correction to
- bsd-waitpid.c
-
-20001029
- - (stevesk) Fix typo in auth.c: USE_PAM not PAM
- - (stevesk) Create contrib/cygwin/ directory; patch from
- Corinna Vinschen <vinschen@redhat.com>
- - (bal) Resolved more $xno and $xyes issues in configure.in
- - (bal) next-posix.h - spelling and forgot a prototype
-
-20001028
- - (djm) fix select hack in serverloop.c from Philippe WILLEM
- <Philippe.WILLEM@urssaf.fr>
- - (djm) Fix mangled AIXAUTHENTICATE code
- - (djm) authctxt->pw may be NULL. Fix from Markus Friedl
- <markus.friedl@informatik.uni-erlangen.de>
- - (djm) Sync with OpenBSD:
- - markus@cvs.openbsd.org 2000/10/16 15:46:32
- [ssh.1]
- fixes from pekkas@netcore.fi
- - markus@cvs.openbsd.org 2000/10/17 14:28:11
- [atomicio.c]
- return number of characters processed; ok deraadt@
- - markus@cvs.openbsd.org 2000/10/18 12:04:02
- [atomicio.c]
- undo
- - markus@cvs.openbsd.org 2000/10/18 12:23:02
- [scp.c]
- replace atomicio(read,...) with read(); ok deraadt@
- - markus@cvs.openbsd.org 2000/10/18 12:42:00
- [session.c]
- restore old record login behaviour
- - deraadt@cvs.openbsd.org 2000/10/19 10:41:13
- [auth-skey.c]
- fmt string problem in unused code
- - provos@cvs.openbsd.org 2000/10/19 10:45:16
- [sshconnect2.c]
- don't reference freed memory. okay deraadt@
- - markus@cvs.openbsd.org 2000/10/21 11:04:23
- [canohost.c]
- typo, eramore@era-t.ericsson.se; ok niels@
- - markus@cvs.openbsd.org 2000/10/23 13:31:55
- [cipher.c]
- non-alignment dependent swap_bytes(); from
- simonb@wasabisystems.com/netbsd
- - markus@cvs.openbsd.org 2000/10/26 12:38:28
- [compat.c]
- add older vandyke products
- - markus@cvs.openbsd.org 2000/10/27 01:32:19
- [channels.c channels.h clientloop.c serverloop.c session.c]
- [ssh.c util.c]
- enable non-blocking IO on channels, and tty's (except for the
- client ttys).
-
-20001027
- - (djm) Increase REKEY_BYTES to 2^24 for arc4random
-
-20001025
- - (djm) Added WARNING.RNG file and modified configure to ask users of the
- builtin entropy code to read it.
- - (djm) Prefer builtin regex to PCRE.
- - (bal) Added USE_PIPS defined to NeXT configure.in since scp hangs randomly.
- - (bal) Apply fixes to configure.in pointed out by Pavel Roskin
- <proski@gnu.org>
-
-20001020
- - (djm) Don't define _REENTRANT for SNI/Reliant Unix
- - (bal) Imported NEWS-OS waitpid() macros into NeXT. Since implementation
- is more correct then current version.
-
-20001018
- - (stevesk) Add initial support for setproctitle(). Current
- support is for the HP-UX pstat(PSTAT_SETCMD, ...) method.
- - (stevesk) Add egd startup scripts to contrib/hpux/
-
-20001017
- - (djm) Add -lregex to cywin libs from Corinna Vinschen
- <vinschen@cygnus.com>
- - (djm) Don't rely on atomicio's retval to determine length of askpass
- supplied passphrase. Problem report from Lutz Jaenicke
- <Lutz.Jaenicke@aet.TU-Cottbus.DE>
- - (bal) Changed from GNU rx to PCRE on suggestion from djm.
- - (bal) Integrated Sony NEWS-OS patches from NAKAJI Hirouyuki
- <nakaji@tutrp.tut.ac.jp>
-
-20001016
- - (djm) Sync with OpenBSD:
- - markus@cvs.openbsd.org 2000/10/14 04:01:15
- [cipher.c]
- debug3
- - markus@cvs.openbsd.org 2000/10/14 04:07:23
- [scp.c]
- remove spaces from arguments; from djm@mindrot.org
- - markus@cvs.openbsd.org 2000/10/14 06:09:46
- [ssh.1]
- Cipher is for SSH-1 only
- - markus@cvs.openbsd.org 2000/10/14 06:12:09
- [servconf.c servconf.h serverloop.c session.c sshd.8]
- AllowTcpForwarding; from naddy@
- - markus@cvs.openbsd.org 2000/10/14 06:16:56
- [auth2.c compat.c compat.h sshconnect2.c version.h]
- OpenSSH_2.3; note that is is not complete, but the version number
- needs to be changed for interoperability reasons
- - markus@cvs.openbsd.org 2000/10/14 06:19:45
- [auth-rsa.c]
- do not send RSA challenge if key is not allowed by key-options; from
- eivind@ThinkSec.com
- - markus@cvs.openbsd.org 2000/10/15 08:14:01
- [rijndael.c session.c]
- typos; from stevesk@sweden.hp.com
- - markus@cvs.openbsd.org 2000/10/15 08:18:31
- [rijndael.c]
- typo
- - (djm) Copy manpages back over from OpenBSD - too tedious to wade
- through diffs
- - (djm) Added condrestart to Redhat init script. Patch from Pekka Savola
- <pekkas@netcore.fi>
- - (djm) Update version in Redhat spec file
- - (djm) Merge some of Nalin Dahyabhai <nalin@redhat.com> changes from the
- Redhat 7.0 spec file
- - (djm) Make inability to read/write PRNG seedfile non-fatal
-
-
-20001015
- - (djm) Fix ssh2 hang on background processes at logout.
-
-20001014
- - (bal) Add support for realpath and getcwd for platforms with broken
- or missing realpath implementations for sftp-server.
- - (bal) Corrected mistake in INSTALL in regards to GNU rx library
- - (bal) Add support for GNU rx library for those lacking regexp support
- - (djm) Don't accept PAM_PROMPT_ECHO_ON messages during initial auth
- - (djm) Revert SSH2 serverloop hack, will find a better way.
- - (djm) Add workaround for Linux 2.4's gratuitious errno change. Patch
- from Martin Johansson <fatbob@acc.umu.se>
- - (djm) Big OpenBSD sync:
- - markus@cvs.openbsd.org 2000/09/30 10:27:44
- [log.c]
- allow loglevel debug
- - markus@cvs.openbsd.org 2000/10/03 11:59:57
- [packet.c]
- hmac->mac
- - markus@cvs.openbsd.org 2000/10/03 12:03:03
- [auth-krb4.c auth-passwd.c auth-rh-rsa.c auth-rhosts.c auth-rsa.c auth1.c]
- move fake-auth from auth1.c to individual auth methods, disables s/key in
- debug-msg
- - markus@cvs.openbsd.org 2000/10/03 12:16:48
- ssh.c
- do not resolve canonname, i have no idea why this was added oin ossh
- - markus@cvs.openbsd.org 2000/10/09 15:30:44
- ssh-keygen.1 ssh-keygen.c
- -X now reads private ssh.com DSA keys, too.
- - markus@cvs.openbsd.org 2000/10/09 15:32:34
- auth-options.c
- clear options on every call.
- - markus@cvs.openbsd.org 2000/10/09 15:51:00
- authfd.c authfd.h
- interop with ssh-agent2, from <res@shore.net>
- - markus@cvs.openbsd.org 2000/10/10 14:20:45
- compat.c
- use rexexp for version string matching
- - provos@cvs.openbsd.org 2000/10/10 22:02:18
- [kex.c kex.h myproposal.h ssh.h ssh2.h sshconnect2.c sshd.c dh.c dh.h]
- First rough implementation of the diffie-hellman group exchange. The
- client can ask the server for bigger groups to perform the diffie-hellman
- in, thus increasing the attack complexity when using ciphers with longer
- keys. University of Windsor provided network, T the company.
- - markus@cvs.openbsd.org 2000/10/11 13:59:52
- [auth-rsa.c auth2.c]
- clear auth options unless auth sucessfull
- - markus@cvs.openbsd.org 2000/10/11 14:00:27
- [auth-options.h]
- clear auth options unless auth sucessfull
- - markus@cvs.openbsd.org 2000/10/11 14:03:27
- [scp.1 scp.c]
- support 'scp -o' with help from mouring@pconline.com
- - markus@cvs.openbsd.org 2000/10/11 14:11:35
- [dh.c]
- Wall
- - markus@cvs.openbsd.org 2000/10/11 14:14:40
- [auth.h auth2.c readconf.c readconf.h readpass.c servconf.c servconf.h]
- [ssh.h sshconnect2.c sshd_config auth2-skey.c cli.c cli.h]
- add support for s/key (kbd-interactive) to ssh2, based on work by
- mkiernan@avantgo.com and me
- - markus@cvs.openbsd.org 2000/10/11 14:27:24
- [auth.c auth1.c auth2.c authfile.c cipher.c cipher.h kex.c kex.h]
- [myproposal.h packet.c readconf.c session.c ssh.c ssh.h sshconnect1.c]
- [sshconnect2.c sshd.c]
- new cipher framework
- - markus@cvs.openbsd.org 2000/10/11 14:45:21
- [cipher.c]
- remove DES
- - markus@cvs.openbsd.org 2000/10/12 03:59:20
- [cipher.c cipher.h sshconnect1.c sshconnect2.c sshd.c]
- enable DES in SSH-1 clients only
- - markus@cvs.openbsd.org 2000/10/12 08:21:13
- [kex.h packet.c]
- remove unused
- - markus@cvs.openbsd.org 2000/10/13 12:34:46
- [sshd.c]
- Kludge for F-Secure Macintosh < 1.0.2; appro@fy.chalmers.se
- - markus@cvs.openbsd.org 2000/10/13 12:59:15
- [cipher.c cipher.h myproposal.h rijndael.c rijndael.h]
- rijndael/aes support
- - markus@cvs.openbsd.org 2000/10/13 13:10:54
- [sshd.8]
- more info about -V
- - markus@cvs.openbsd.org 2000/10/13 13:12:02
- [myproposal.h]
- prefer no compression
- - (djm) Fix scp user@host handling
- - (djm) Don't clobber ssh_prng_cmds on install
- - (stevesk) Include config.h in rijndael.c so we define intXX_t and
- u_intXX_t types on all platforms.
- - (stevesk) rijndael.c: cleanup missing declaration warnings.
- - (stevesk) ~/.hushlogin shouldn't cause required password change to
- be bypassed.
- - (stevesk) Display correct path to ssh-askpass in configure output.
- Report from Lutz Jaenicke.
-
-20001007
- - (stevesk) Print PAM return value in PAM log messages to aid
- with debugging.
- - (stevesk) Fix detection of pw_class struct member in configure;
- patch from KAMAHARA Junzo <kamahara@cc.kshosen.ac.jp>
-
-20001002
- - (djm) Fix USER_PATH, report from Kevin Steves <stevesk@sweden.hp.com>
- - (djm) Add host system and CC to end-of-configure report. Suggested by
- Lutz Jaenicke <Lutz.Jaenicke@aet.TU-Cottbus.DE>
-
-20000931
- - (djm) Cygwin fixes from Corinna Vinschen <vinschen@cygnus.com>
-
-20000930
- - (djm) Irix ssh_prng_cmds path fix from Pekka Savola <pekkas@netcore.fi>
- - (djm) Support in bsd-snprintf.c for long long conversions from
- Ben Lindstrom <mouring@pconline.com>
- - (djm) Cleanup NeXT support from Ben Lindstrom <mouring@pconline.com>
- - (djm) Ignore SIGPIPEs from serverloop to child. Fixes crashes with
- very short lived X connections. Bug report from Tobias Oetiker
- <oetiker@ee.ethz.ch>. Fix from Markus Friedl <markus@cvs.openbsd.org>
- - (djm) Add recent InitScripts as a RPM dependancy for openssh-server
- patch from Pekka Savola <pekkas@netcore.fi>
- - (djm) Forgot to cvs add LICENSE file
- - (djm) Add LICENSE to RPM spec files
- - (djm) CVS OpenBSD sync:
- - markus@cvs.openbsd.org 2000/09/26 13:59:59
- [clientloop.c]
- use debug2
- - markus@cvs.openbsd.org 2000/09/27 15:41:34
- [auth2.c sshconnect2.c]
- use key_type()
- - markus@cvs.openbsd.org 2000/09/28 12:03:18
- [channels.c]
- debug -> debug2 cleanup
- - (djm) Irix strips "/dev/tty" from [uw]tmp entries (other systems only
- strip "/dev/"). Fix loginrec.c based on patch from Alain St-Denis
- <Alain.St-Denis@ec.gc.ca>
- - (djm) Fix 9 character passphrase failure with gnome-ssh-askpass.
- Problem was caused by interrupted read in ssh-add. Report from Donald
- J. Barry <don@astro.cornell.edu>
-
-20000929
- - (djm) Fix SSH2 not terminating until all background tasks done problem.
- - (djm) Another off-by-one fix from Pavel Kankovsky
- <peak@argo.troja.mff.cuni.cz>
- - (djm) Clean up. Strip some unnecessary differences with OpenBSD's code,
- tidy necessary differences. Use Markus' new debugN() in entropy.c
- - (djm) Merged big SCO portability patch from Tim Rice
- <tim@multitalents.net>
-
-20000926
- - (djm) Update X11-askpass to 1.0.2 in RPM spec file
- - (djm) Define _REENTRANT to pickup strtok_r() on HP/UX
- - (djm) Security: fix off-by-one buffer overrun in fake-getnameinfo.c.
- Report and fix from Pavel Kankovsky <peak@argo.troja.mff.cuni.cz>
-
-20000924
- - (djm) Merged cleanup patch from Mark Miller <markm@swoon.net>
- - (djm) A bit more cleanup - created cygwin_util.h
- - (djm) Include strtok_r() from OpenBSD libc. Fixes report from Mark Miller
- <markm@swoon.net>
-
-20000923
- - (djm) Fix address logging in utmp from Kevin Steves
- <stevesk@sweden.hp.com>
- - (djm) Redhat spec and manpage fixes from Pekka Savola <pekkas@netcore.fi>
- - (djm) Seperate tests for int64_t and u_int64_t types
- - (djm) Tweak password expiry checking at suggestion of Kevin Steves
- <stevesk@sweden.hp.com>
- - (djm) NeXT patch from Ben Lindstrom <mouring@pconline.com>
- - (djm) Use printf %lld instead of %qd in sftp-server.c. Fix from
- Michael Stone <mstone@cs.loyola.edu>
- - (djm) OpenBSD CVS sync:
- - markus@cvs.openbsd.org 2000/09/17 09:38:59
- [sshconnect2.c sshd.c]
- fix DEBUG_KEXDH
- - markus@cvs.openbsd.org 2000/09/17 09:52:51
- [sshconnect.c]
- yes no; ok niels@
- - markus@cvs.openbsd.org 2000/09/21 04:55:11
- [sshd.8]
- typo
- - markus@cvs.openbsd.org 2000/09/21 05:03:54
- [serverloop.c]
- typo
- - markus@cvs.openbsd.org 2000/09/21 05:11:42
- scp.c
- utime() to utimes(); mouring@pconline.com
- - markus@cvs.openbsd.org 2000/09/21 05:25:08
- sshconnect2.c
- change login logic in ssh2, allows plugin of other auth methods
- - markus@cvs.openbsd.org 2000/09/21 05:25:35
- [auth2.c channels.c channels.h clientloop.c dispatch.c dispatch.h]
- [serverloop.c]
- add context to dispatch_run
- - markus@cvs.openbsd.org 2000/09/21 05:07:52
- authfd.c authfd.h ssh-agent.c
- bug compat for old ssh.com software
-
-20000920
- - (djm) Fix bad path substitution. Report from Andrew Miner
- <asminer@cs.iastate.edu>
-
-20000916
- - (djm) Fix SSL search order from Lutz Jaenicke
- <Lutz.Jaenicke@aet.TU-Cottbus.DE>
- - (djm) New SuSE spec from Corinna Vinschen <corinna@vinschen.de>
- - (djm) Update CygWin support from Corinna Vinschen <vinschen@cygnus.com>
- - (djm) Use a real struct sockaddr inside the fake struct sockaddr_storage.
- Patch from Larry Jones <larry.jones@sdrc.com>
- - (djm) Add Steve VanDevender's <stevev@darkwing.uoregon.edu> PAM
- password change patch.
- - (djm) Bring licenses on my stuff in line with OpenBSD's
- - (djm) Cleanup auth-passwd.c and unify HP/UX authentication. Patch from
- Kevin Steves <stevesk@sweden.hp.com>
- - (djm) Shadow expiry check fix from Pavel Troller <patrol@omni.sinus.cz>
- - (djm) Re-enable int64_t types - we need them for sftp
- - (djm) Use libexecdir from configure , rather than libexecdir/ssh
- - (djm) Update Redhat SPEC file accordingly
- - (djm) Add Kevin Steves <stevesk@sweden.hp.com> HP/UX contrib files
- - (djm) Add Charles Levert <charles@comm.polymtl.ca> getpgrp patch
- - (djm) Fix password auth on HP/UX 10.20. Patch from Dirk De Wachter
- <Dirk.DeWachter@rug.ac.be>
- - (djm) Fixprogs and entropy list fixes from Larry Jones
- <larry.jones@sdrc.com>
- - (djm) Fix for SuSE spec file from Takashi YOSHIDA
- <tyoshida@gemini.rc.kyushu-u.ac.jp>
- - (djm) Merge OpenBSD changes:
- - markus@cvs.openbsd.org 2000/09/05 02:59:57
- [session.c]
- print hostname (not hushlogin)
- - markus@cvs.openbsd.org 2000/09/05 13:18:48
- [authfile.c ssh-add.c]
- enable ssh-add -d for DSA keys
- - markus@cvs.openbsd.org 2000/09/05 13:20:49
- [sftp-server.c]
- cleanup
- - markus@cvs.openbsd.org 2000/09/06 03:46:41
- [authfile.h]
- prototype
- - deraadt@cvs.openbsd.org 2000/09/07 14:27:56
- [ALL]
- cleanup copyright notices on all files. I have attempted to be
- accurate with the details. everything is now under Tatu's licence
- (which I copied from his readme), and/or the core-sdi bsd-ish thing
- for deattack, or various openbsd developers under a 2-term bsd
- licence. We're not changing any rules, just being accurate.
- - markus@cvs.openbsd.org 2000/09/07 14:40:30
- [channels.c channels.h clientloop.c serverloop.c ssh.c]
- cleanup window and packet sizes for ssh2 flow control; ok niels
- - markus@cvs.openbsd.org 2000/09/07 14:53:00
- [scp.c]
- typo
- - markus@cvs.openbsd.org 2000/09/07 15:13:37
- [auth-options.c auth-options.h auth-rh-rsa.c auth-rsa.c auth.c]
- [authfile.h canohost.c channels.h compat.c hostfile.h log.c match.h]
- [pty.c readconf.c]
- some more Copyright fixes
- - markus@cvs.openbsd.org 2000/09/08 03:02:51
- [README.openssh2]
- bye bye
- - deraadt@cvs.openbsd.org 2000/09/11 18:38:33
- [LICENCE cipher.c]
- a few more comments about it being ARC4 not RC4
- - markus@cvs.openbsd.org 2000/09/12 14:53:11
- [log-client.c log-server.c log.c ssh.1 ssh.c ssh.h sshd.8 sshd.c]
- multiple debug levels
- - markus@cvs.openbsd.org 2000/09/14 14:25:15
- [clientloop.c]
- typo
- - deraadt@cvs.openbsd.org 2000/09/15 01:13:51
- [ssh-agent.c]
- check return value for setenv(3) for failure, and deal appropriately
-
-20000913
- - (djm) Fix server not exiting with jobs in background.
-
-20000905
- - (djm) Import OpenBSD CVS changes
- - markus@cvs.openbsd.org 2000/08/31 15:52:24
- [Makefile sshd.8 sshd_config sftp-server.8 sftp-server.c]
- implement a SFTP server. interops with sftp2, scp2 and the windows
- client from ssh.com
- - markus@cvs.openbsd.org 2000/08/31 15:56:03
- [README.openssh2]
- sync
- - markus@cvs.openbsd.org 2000/08/31 16:05:42
- [session.c]
- Wall
- - markus@cvs.openbsd.org 2000/08/31 16:09:34
- [authfd.c ssh-agent.c]
- add a flag to SSH2_AGENTC_SIGN_REQUEST for future extensions
- - deraadt@cvs.openbsd.org 2000/09/01 09:25:13
- [scp.1 scp.c]
- cleanup and fix -S support; stevesk@sweden.hp.com
- - markus@cvs.openbsd.org 2000/09/01 16:29:32
- [sftp-server.c]
- portability fixes
- - markus@cvs.openbsd.org 2000/09/01 16:32:41
- [sftp-server.c]
- fix cast; mouring@pconline.com
- - itojun@cvs.openbsd.org 2000/09/03 09:23:28
- [ssh-add.1 ssh.1]
- add missing .El against .Bl.
- - markus@cvs.openbsd.org 2000/09/04 13:03:41
- [session.c]
- missing close; ok theo
- - markus@cvs.openbsd.org 2000/09/04 13:07:21
- [session.c]
- fix get_last_login_time order; from andre@van-veen.de
- - markus@cvs.openbsd.org 2000/09/04 13:10:09
- [sftp-server.c]
- more cast fixes; from mouring@pconline.com
- - markus@cvs.openbsd.org 2000/09/04 13:06:04
- [session.c]
- set SSH_ORIGINAL_COMMAND; from Leakin@dfw.nostrum.com, bet@rahul.net
- - (djm) Cleanup after import. Fix sftp-server compilation, Makefile
- - (djm) Merge cygwin support from Corinna Vinschen <vinschen@cygnus.com>
-
-20000903
- - (djm) Fix Redhat init script
-
-20000901
- - (djm) Pick up Jim's new X11-askpass
- - (djm) Release 2.2.0p1
-
-20000831
- - (djm) Workaround SIGPIPE problems on SCO. Fix from Aran Cox
- <acox@cv.telegroup.com>
- - (djm) Pick up new version (2.2.0) from OpenBSD CVS
-
-20000830
- - (djm) Compile warning fixes from Mark Miller <markm@swoon.net>
- - (djm) Periodically rekey arc4random
- - (djm) Clean up diff against OpenBSD.
- - (djm) HPUX 11 needs USE_PIPES as well: Kevin Steves
- <stevesk@sweden.hp.com>
- - (djm) Quieten the pam delete credentials error message
- - (djm) Fix printing of $DISPLAY hack if set by system type. Report from
- Kevin Steves <stevesk@sweden.hp.com>
- - (djm) NeXT patch from Ben Lindstrom <mouring@pconline.com>
- - (djm) Fix doh in bsd-arc4random.c
-
-20000829
- - (djm) Fix ^C ignored issue on Solaris. Diagnosis from Gert
- Doering <gert@greenie.muc.de>, John Horne <J.Horne@plymouth.ac.uk> and
- Garrick James <garrick@james.net>
- - (djm) Check for SCO pty naming style (ptyp%d/ttyp%d). Based on fix from
- Bastian Trompetter <btrompetter@firemail.de>
- - (djm) NeXT tweaks from Ben Lindstrom <mouring@pconline.com>
- - More OpenBSD updates:
- - deraadt@cvs.openbsd.org 2000/08/24 15:46:59
- [scp.c]
- off_t in sink, to fix files > 2GB, i think, test is still running ;-)
- - deraadt@cvs.openbsd.org 2000/08/25 10:10:06
- [session.c]
- Wall
- - markus@cvs.openbsd.org 2000/08/26 04:33:43
- [compat.c]
- ssh.com-2.3.0
- - markus@cvs.openbsd.org 2000/08/27 12:18:05
- [compat.c]
- compatibility with future ssh.com versions
- - deraadt@cvs.openbsd.org 2000/08/27 21:50:55
- [auth-krb4.c session.c ssh-add.c sshconnect.c uidswap.c]
- print uid/gid as unsigned
- - markus@cvs.openbsd.org 2000/08/28 13:51:00
- [ssh.c]
- enable -n and -f for ssh2
- - markus@cvs.openbsd.org 2000/08/28 14:19:53
- [ssh.c]
- allow combination of -N and -f
- - markus@cvs.openbsd.org 2000/08/28 14:20:56
- [util.c]
- util.c
- - markus@cvs.openbsd.org 2000/08/28 14:22:02
- [util.c]
- undo
- - markus@cvs.openbsd.org 2000/08/28 14:23:38
- [util.c]
- don't complain if setting NONBLOCK fails with ENODEV
-
-20000823
- - (djm) Define USE_PIPES to avoid socketpair problems on HPUX 10 and SunOS 4
- Avoids "scp never exits" problem. Reports from Lutz Jaenicke
- <Lutz.Jaenicke@aet.TU-Cottbus.DE> and Tamito KAJIYAMA
- <kajiyama@grad.sccs.chukyo-u.ac.jp>
- - (djm) Pick up LOGIN_PROGRAM from environment or PATH if not set by headers
- - (djm) Add local version to version.h
- - (djm) Don't reseed arc4random everytime it is used
- - (djm) OpenBSD CVS updates:
- - deraadt@cvs.openbsd.org 2000/08/18 20:07:23
- [ssh.c]
- accept remsh as a valid name as well; roman@buildpoint.com
- - deraadt@cvs.openbsd.org 2000/08/18 20:17:13
- [deattack.c crc32.c packet.c]
- rename crc32() to ssh_crc32() to avoid zlib name clash. do not move to
- libz crc32 function yet, because it has ugly "long"'s in it;
- oneill@cs.sfu.ca
- - deraadt@cvs.openbsd.org 2000/08/18 20:26:08
- [scp.1 scp.c]
- -S prog support; tv@debian.org
- - deraadt@cvs.openbsd.org 2000/08/18 20:50:07
- [scp.c]
- knf
- - deraadt@cvs.openbsd.org 2000/08/18 20:57:33
- [log-client.c]
- shorten
- - markus@cvs.openbsd.org 2000/08/19 12:48:11
- [channels.c channels.h clientloop.c ssh.c ssh.h]
- support for ~. in ssh2
- - deraadt@cvs.openbsd.org 2000/08/19 15:29:40
- [crc32.h]
- proper prototype
- - markus@cvs.openbsd.org 2000/08/19 15:34:44
- [authfd.c authfd.h key.c key.h ssh-add.1 ssh-add.c ssh-agent.1]
- [ssh-agent.c ssh-keygen.c sshconnect1.c sshconnect2.c Makefile]
- [fingerprint.c fingerprint.h]
- add SSH2/DSA support to the agent and some other DSA related cleanups.
- (note that we cannot talk to ssh.com's ssh2 agents)
- - markus@cvs.openbsd.org 2000/08/19 15:55:52
- [channels.c channels.h clientloop.c]
- more ~ support for ssh2
- - markus@cvs.openbsd.org 2000/08/19 16:21:19
- [clientloop.c]
- oops
- - millert@cvs.openbsd.org 2000/08/20 12:25:53
- [session.c]
- We have to stash the result of get_remote_name_or_ip() before we
- close our socket or getpeername() will get EBADF and the process
- will exit. Only a problem for "UseLogin yes".
- - millert@cvs.openbsd.org 2000/08/20 12:30:59
- [session.c]
- Only check /etc/nologin if "UseLogin no" since login(1) may have its
- own policy on determining who is allowed to login when /etc/nologin
- is present. Also use the _PATH_NOLOGIN define.
- - millert@cvs.openbsd.org 2000/08/20 12:42:43
- [auth1.c auth2.c session.c ssh.c]
- Add calls to setusercontext() and login_get*(). We basically call
- setusercontext() in most places where previously we did a setlogin().
- Add default login.conf file and put root in the "daemon" login class.
- - millert@cvs.openbsd.org 2000/08/21 10:23:31
- [session.c]
- Fix incorrect PATH setting; noted by Markus.
-
-20000818
- - (djm) OpenBSD CVS changes:
- - markus@cvs.openbsd.org 2000/07/22 03:14:37
- [servconf.c servconf.h sshd.8 sshd.c sshd_config]
- random early drop; ok theo, niels
- - deraadt@cvs.openbsd.org 2000/07/26 11:46:51
- [ssh.1]
- typo
- - deraadt@cvs.openbsd.org 2000/08/01 11:46:11
- [sshd.8]
- many fixes from pepper@mail.reppep.com
- - provos@cvs.openbsd.org 2000/08/01 13:01:42
- [Makefile.in util.c aux.c]
- rename aux.c to util.c to help with cygwin port
- - deraadt@cvs.openbsd.org 2000/08/02 00:23:31
- [authfd.c]
- correct sun_len; Alexander@Leidinger.net
- - provos@cvs.openbsd.org 2000/08/02 10:27:17
- [readconf.c sshd.8]
- disable kerberos authentication by default
- - provos@cvs.openbsd.org 2000/08/02 11:27:05
- [sshd.8 readconf.c auth-krb4.c]
- disallow kerberos authentication if we can't verify the TGT; from
- dugsong@
- kerberos authentication is on by default only if you have a srvtab.
- - markus@cvs.openbsd.org 2000/08/04 14:30:07
- [auth.c]
- unused
- - markus@cvs.openbsd.org 2000/08/04 14:30:35
- [sshd_config]
- MaxStartups
- - markus@cvs.openbsd.org 2000/08/15 13:20:46
- [authfd.c]
- cleanup; ok niels@
- - markus@cvs.openbsd.org 2000/08/17 14:05:10
- [session.c]
- cleanup login(1)-like jobs, no duplicate utmp entries
- - markus@cvs.openbsd.org 2000/08/17 14:06:34
- [session.c sshd.8 sshd.c]
- sshd -u len, similar to telnetd
- - (djm) Lastlog was not getting closed after writing login entry
- - (djm) Add Solaris package support from Rip Loomis <loomisg@cist.saic.com>
-
-20000816
- - (djm) Replacement for inet_ntoa for Irix (which breaks on gcc)
- - (djm) Fix strerror replacement for old SunOS. Based on patch from
- Charles Levert <charles@comm.polymtl.ca>
- - (djm) Seperate arc4random into seperate file and use OpenSSL's RC4
- implementation.
- - (djm) SUN_LEN macro for systems which lack it
-
-20000815
- - (djm) More SunOS 4.1.x fixes from Nate Itkin <nitkin@europa.com>
- - (djm) Avoid failures on Irix when ssh is not setuid. Fix from
- Michael Stone <mstone@cs.loyola.edu>
- - (djm) Don't seek in directory based lastlogs
- - (djm) Fix --with-ipaddr-display configure option test. Patch from
- Jarno Huuskonen <jhuuskon@messi.uku.fi>
- - (djm) Fix AIX limits from Alexandre Oliva <oliva@lsd.ic.unicamp.br>
-
-20000813
- - (djm) Add $(srcdir) to includes when compiling (for VPATH). Report from
- Fabrice bacchella <fabrice.bacchella@marchfirst.fr>
-
-20000809
- - (djm) Define AIX hard limits if headers don't. Report from
- Bill Painter <william.t.painter@lmco.com>
- - (djm) utmp direct write & SunOS 4 patch from Charles Levert
- <charles@comm.polymtl.ca>
-
-20000808
- - (djm) Cleanup Redhat RPMs. Generate keys at runtime rather than install
- time, spec file cleanup.
-
-20000807
- - (djm) Set 0755 on binaries during install. Report from Lutz Jaenicke
- - (djm) Suppress error messages on channel close shutdown() failurs
- works around Linux bug. Patch from Zack Weinberg <zack@wolery.cumb.org>
- - (djm) Add some more entropy collection commands from Lutz Jaenicke
-
-20000725
- - (djm) Fix autoconf typo: HAVE_BINRESVPORT_AF -> HAVE_BINDRESVPORT_AF
-
-20000721
- - (djm) OpenBSD CVS updates:
- - markus@cvs.openbsd.org 2000/07/16 02:27:22
- [authfd.c authfd.h channels.c clientloop.c ssh-add.c ssh-agent.c ssh.c]
- [sshconnect1.c sshconnect2.c]
- make ssh-add accept dsa keys (the agent does not)
- - djm@cvs.openbsd.org 2000/07/17 19:25:02
- [sshd.c]
- Another closing of stdin; ok deraadt
- - markus@cvs.openbsd.org 2000/07/19 18:33:12
- [dsa.c]
- missing free, reorder
- - markus@cvs.openbsd.org 2000/07/20 16:23:14
- [ssh-keygen.1]
- document input and output files
-
-20000720
- - (djm) Spec file fix from Petr Novotny <Petr.Novotny@antek.cz>
-
-20000716
- - (djm) Release 2.1.1p4
-
-20000715
- - (djm) OpenBSD CVS updates
- - provos@cvs.openbsd.org 2000/07/13 16:53:22
- [aux.c readconf.c servconf.c ssh.h]
- allow multiple whitespace but only one '=' between tokens, bug report from
- Ralf S. Engelschall <rse@engelschall.com> but different fix. okay deraadt@
- - provos@cvs.openbsd.org 2000/07/13 17:14:09
- [clientloop.c]
- typo; todd@fries.net
- - provos@cvs.openbsd.org 2000/07/13 17:19:31
- [scp.c]
- close can fail on AFS, report error; from Greg Hudson <ghudson@mit.edu>
- - markus@cvs.openbsd.org 2000/07/14 16:59:46
- [readconf.c servconf.c]
- allow leading whitespace. ok niels
- - djm@cvs.openbsd.org 2000/07/14 22:01:38
- [ssh-keygen.c ssh.c]
- Always create ~/.ssh with mode 700; ok Markus
- - Fixes for SunOS 4.1.4 from Gordon Atwood <gordon@cs.ualberta.ca>
- - Include floatingpoint.h for entropy.c
- - strerror replacement
-
-20000712
- - (djm) Remove -lresolve for Reliant Unix
- - (djm) OpenBSD CVS Updates:
- - deraadt@cvs.openbsd.org 2000/07/11 02:11:34
- [session.c sshd.c ]
- make MaxStartups code still work with -d; djm
- - deraadt@cvs.openbsd.org 2000/07/11 13:17:45
- [readconf.c ssh_config]
- disable FallBackToRsh by default
- - (djm) Replace in_addr_t with u_int32_t in bsd-inet_aton.c. Report from
- Ben Lindstrom <mouring@pconline.com>
- - (djm) Make building of X11-Askpass and GNOME-Askpass optional in RPM
- spec file.
- - (djm) Released 2.1.1p3
-
-20000711
- - (djm) Fixup for AIX getuserattr() support from Tom Bertelson
- <tbert@abac.com>
- - (djm) ReliantUNIX support from Udo Schweigert <ust@cert.siemens.de>
- - (djm) NeXT: dirent structures to get scp working from Ben Lindstrom
- <mouring@pconline.com>
- - (djm) Fix broken inet_ntoa check and ut_user/ut_name confusion, report
- from Jim Watt <jimw@peisj.pebio.com>
- - (djm) Replaced bsd-snprintf.c with one from Mutt source tree, it is known
- to compile on more platforms (incl NeXT).
- - (djm) Added bsd-inet_aton and configure support for NeXT
- - (djm) Misc NeXT fixes from Ben Lindstrom <mouring@pconline.com>
- - (djm) OpenBSD CVS updates:
- - markus@cvs.openbsd.org 2000/06/26 03:22:29
- [authfd.c]
- cleanup, less cut&paste
- - markus@cvs.openbsd.org 2000/06/26 15:59:19
- [servconf.c servconf.h session.c sshd.8 sshd.c]
- MaxStartups: limit number of unauthenticated connections, work by
- theo and me
- - deraadt@cvs.openbsd.org 2000/07/05 14:18:07
- [session.c]
- use no_x11_forwarding_flag correctly; provos ok
- - provos@cvs.openbsd.org 2000/07/05 15:35:57
- [sshd.c]
- typo
- - aaron@cvs.openbsd.org 2000/07/05 22:06:58
- [scp.1 ssh-agent.1 ssh-keygen.1 sshd.8]
- Insert more missing .El directives. Our troff really should identify
- these and spit out a warning.
- - todd@cvs.openbsd.org 2000/07/06 21:55:04
- [auth-rsa.c auth2.c ssh-keygen.c]
- clean code is good code
- - deraadt@cvs.openbsd.org 2000/07/07 02:14:29
- [serverloop.c]
- sense of port forwarding flag test was backwards
- - provos@cvs.openbsd.org 2000/07/08 17:17:31
- [compat.c readconf.c]
- replace strtok with strsep; from David Young <dyoung@onthejob.net>
- - deraadt@cvs.openbsd.org 2000/07/08 19:21:15
- [auth.h]
- KNF
- - ho@cvs.openbsd.org 2000/07/08 19:27:33
- [compat.c readconf.c]
- Better conditions for strsep() ending.
- - ho@cvs.openbsd.org 2000/07/10 10:27:05
- [readconf.c]
- Get the correct message on errors. (niels@ ok)
- - ho@cvs.openbsd.org 2000/07/10 10:30:25
- [cipher.c kex.c servconf.c]
- strtok() --> strsep(). (niels@ ok)
- - (djm) Fix problem with debug mode and MaxStartups
- - (djm) Don't generate host keys when $(DESTDIR) is set (e.g. during RPM
- builds)
- - (djm) Add strsep function from OpenBSD libc for systems that lack it
-
-20000709
- - (djm) Only enable PAM_TTY kludge for Linux. Problem report from
- Kevin Steves <stevesk@sweden.hp.com>
- - (djm) Match prototype and function declaration for rresvport_af.
- Problem report from Niklas Edmundsson <nikke@ing.umu.se>
- - (djm) Missing $(DESTDIR) on host-key target causing problems with RPM
- builds. Problem report from Gregory Leblanc <GLeblanc@cu-portland.edu>
- - (djm) Replace ut_name with ut_user. Patch from Jim Watt
- <jimw@peisj.pebio.com>
- - (djm) Fix pam sprintf fix
- - (djm) Cleanup entropy collection code a little more. Split initialisation
- from seeding, perform intialisation immediatly at start, be careful with
- uids. Based on problem report from Jim Watt <jimw@peisj.pebio.com>
- - (djm) More NeXT compatibility from Ben Lindstrom <mouring@pconline.com>
- Including sigaction() et al. replacements
- - (djm) AIX getuserattr() session initialisation from Tom Bertelson
- <tbert@abac.com>
-
-20000708
- - (djm) Fix bad fprintf format handling in auth-pam.c. Patch from
- Aaron Hopkins <aaron@die.net>
- - (djm) Fix incorrect configure handling of --with-rsh-path option. Fix from
- Lutz Jaenicke <Lutz.Jaenicke@aet.TU-Cottbus.DE>
- - (djm) Fixed undefined variables for OSF SIA. Report from
- Baars, Henk <Hendrik.Baars@nl.origin-it.com>
- - (djm) Handle EWOULDBLOCK returns from read() and write() in atomicio.c
- Fix from Marquess, Steve Mr JMLFDC <Steve.Marquess@DET.AMEDD.ARMY.MIL>
- - (djm) Don't use inet_addr.
-
-20000702
- - (djm) Fix brace mismatch from Corinna Vinschen <vinschen@cygnus.com>
- - (djm) Stop shadow expiry checking from preventing logins with NIS. Based
- on fix from HARUYAMA Seigo <haruyama@nt.phys.s.u-tokyo.ac.jp>
- - (djm) Use standard OpenSSL functions in auth-skey.c. Patch from
- Chris, the Young One <cky@pobox.com>
- - (djm) Fix scp progress meter on really wide terminals. Based on patch
- from James H. Cloos Jr. <cloos@jhcloos.com>
-
-20000701
- - (djm) Fix Tru64 SIA problems reported by John P Speno <speno@isc.upenn.edu>
- - (djm) Login fixes from Tom Bertelson <tbert@abac.com>
- - (djm) Replace "/bin/sh" with _PATH_BSHELL. Report from Corinna Vinschen
- <vinschen@cygnus.com>
- - (djm) Replace "/usr/bin/login" with LOGIN_PROGRAM
- - (djm) Added check for broken snprintf() functions which do not correctly
- terminate output string and attempt to use replacement.
- - (djm) Released 2.1.1p2
-
-20000628
- - (djm) Fixes to lastlog code for Irix
- - (djm) Use atomicio in loginrec
- - (djm) Patch from Michael Stone <mstone@cs.loyola.edu> to add support for
- Irix 6.x array sessions, project id's, and system audit trail id.
- - (djm) Added 'distprep' make target to simplify packaging
- - (djm) Added patch from Chris Adams <cmadams@hiwaay.net> to add OSF SIA
- support. Enable using "USE_SIA=1 ./configure [options]"
-
-20000627
- - (djm) Fixes to login code - not setting li->uid, cleanups
- - (djm) Formatting
-
-20000626
- - (djm) Better fix to aclocal tests from Garrick James <garrick@james.net>
- - (djm) Account expiry support from Andreas Steinmetz <ast@domdv.de>
- - (djm) Added password expiry checking (no password change support)
- - (djm) Make EGD failures non-fatal if OpenSSL's entropy pool is still OK
- based on patch from Lutz Jaenicke <Lutz.Jaenicke@aet.TU-Cottbus.DE>
- - (djm) Fix fixed EGD code.
- - OpenBSD CVS update
- - provos@cvs.openbsd.org 2000/06/25 14:17:58
- [channels.c]
- correct check for bad channel ids; from Wei Dai <weidai@eskimo.com>
-
-20000623
- - (djm) Use sa_family_t in prototype for rresvport_af. Patch from
- Svante Signell <svante.signell@telia.com>
- - (djm) Autoconf logic to define sa_family_t if it is missing
- - OpenBSD CVS Updates:
- - markus@cvs.openbsd.org 2000/06/22 10:32:27
- [sshd.c]
- missing atomicio; report from Steve.Marquess@DET.AMEDD.ARMY.MIL
- - djm@cvs.openbsd.org 2000/06/22 17:55:00
- [auth-krb4.c key.c radix.c uuencode.c]
- Missing CVS idents; ok markus
-
-20000622
- - (djm) Automatically generate host key during "make install". Suggested
- by Gary E. Miller <gem@rellim.com>
- - (djm) Paranoia before kill() system call
- - OpenBSD CVS Updates:
- - markus@cvs.openbsd.org 2000/06/18 18:50:11
- [auth2.c compat.c compat.h sshconnect2.c]
- make userauth+pubkey interop with ssh.com-2.2.0
- - markus@cvs.openbsd.org 2000/06/18 20:56:17
- [dsa.c]
- mem leak + be more paranoid in dsa_verify.
- - markus@cvs.openbsd.org 2000/06/18 21:29:50
- [key.c]
- cleanup fingerprinting, less hardcoded sizes
- - markus@cvs.openbsd.org 2000/06/19 19:39:45
- [atomicio.c auth-options.c auth-passwd.c auth-rh-rsa.c auth-rhosts.c]
- [auth-rsa.c auth-skey.c authfd.c authfd.h authfile.c bufaux.c bufaux.h]
- [buffer.c buffer.h canohost.c channels.c channels.h cipher.c cipher.h]
- [clientloop.c compat.c compat.h compress.c compress.h crc32.c crc32.h]
- [deattack.c dispatch.c dsa.c fingerprint.c fingerprint.h getput.h hmac.c]
- [kex.c log-client.c log-server.c login.c match.c mpaux.c mpaux.h nchan.c]
- [nchan.h packet.c packet.h pty.c pty.h readconf.c readconf.h readpass.c]
- [rsa.c rsa.h scp.c servconf.c servconf.h ssh-add.c ssh-keygen.c ssh.c]
- [ssh.h tildexpand.c ttymodes.c ttymodes.h uidswap.c xmalloc.c xmalloc.h]
- OpenBSD tag
- - markus@cvs.openbsd.org 2000/06/21 10:46:10
- sshconnect2.c missing free; nuke old comment
-
-20000620
- - (djm) Replace use of '-o' and '-a' logical operators in configure tests
- with '||' and '&&'. As suggested by Jim Knoble <jmknoble@pint-stowp.cx>
- to fix SCO Unixware problem reported by Gary E. Miller <gem@rellim.com>
- - (djm) Typo in loginrec.c
-
-20000618
- - (djm) Add summary of configure options to end of ./configure run
- - (djm) Not all systems define RUSAGE_SELF & RUSAGE_CHILDREN. Report from
- Michael Stone <mstone@cs.loyola.edu>
- - (djm) rusage is a privileged operation on some Unices (incl.
- Solaris 2.5.1). Report from Paul D. Smith <pausmith@nortelnetworks.com>
- - (djm) Avoid PAM failures when running without a TTY. Report from
- Martin Petrak <petrak@spsknm.schools.sk>
- - (djm) Include sys/types.h when including netinet/in.h in configure tests.
- Patch from Jun-ichiro itojun Hagino <itojun@iijlab.net>
- - (djm) Started merge of Ben Lindstrom's <mouring@pconline.com> NeXT support
- - OpenBSD CVS updates:
- - deraadt@cvs.openbsd.org 2000/06/17 09:58:46
- [channels.c]
- everyone says "nix it" (remove protocol 2 debugging message)
- - markus@cvs.openbsd.org 2000/06/17 13:24:34
- [sshconnect.c]
- allow extended server banners
- - markus@cvs.openbsd.org 2000/06/17 14:30:10
- [sshconnect.c]
- missing atomicio, typo
- - jakob@cvs.openbsd.org 2000/06/17 16:52:34
- [servconf.c servconf.h session.c sshd.8 sshd_config]
- add support for ssh v2 subsystems. ok markus@.
- - deraadt@cvs.openbsd.org 2000/06/17 18:57:48
- [readconf.c servconf.c]
- include = in WHITESPACE; markus ok
- - markus@cvs.openbsd.org 2000/06/17 19:09:10
- [auth2.c]
- implement bug compatibility with ssh-2.0.13 pubkey, server side
- - markus@cvs.openbsd.org 2000/06/17 21:00:28
- [compat.c]
- initial support for ssh.com's 2.2.0
- - markus@cvs.openbsd.org 2000/06/17 21:16:09
- [scp.c]
- typo
- - markus@cvs.openbsd.org 2000/06/17 22:05:02
- [auth-rsa.c auth2.c serverloop.c session.c auth-options.c auth-options.h]
- split auth-rsa option parsing into auth-options
- add options support to authorized_keys2
- - markus@cvs.openbsd.org 2000/06/17 22:42:54
- [session.c]
- typo
-
-20000613
- - (djm) Fixes from Andrew McGill <andrewm@datrix.co.za>:
- - Platform define for SCO 3.x which breaks on /dev/ptmx
- - Detect and try to fix missing MAXPATHLEN
- - (djm) Fix short copy in loginrec.c (based on patch from Phill Camp
- <P.S.S.Camp@ukc.ac.uk>
-
-20000612
- - (djm) Glob manpages in RPM spec files to catch compressed files
- - (djm) Full license in auth-pam.c
- - (djm) Configure fixes from SAKAI Kiyotaka <ksakai@kso.netwk.ntt-at.co.jp>
- - (andre) AIX, lastlog, configure fixes from Tom Bertelson <tbert@abac.com>:
- - Don't try to retrieve lastlog from wtmp/wtmpx if DISABLE_LASTLOG is
- def'd
- - Set AIX to use preformatted manpages
-
-20000610
- - (djm) Minor doc tweaks
- - (djm) Fix for configure on bash2 from Jim Knoble <jmknoble@jmknoble.cx>
-
-20000609
- - (djm) Patch from Kenji Miyake <kenji@miyake.org> to disable utmp usage
- (in favour of utmpx) on Solaris 8
-
-20000606
- - (djm) Cleanup of entropy.c. Reorganised code, removed second pass through
- list of commands (by default). Removed verbose debugging (by default).
- - (djm) Increased command entropy estimates and default entropy collection
- timeout
- - (djm) Remove duplicate headers from loginrec.c
- - (djm) Don't add /usr/local/lib to library search path on Irix
- - (djm) Fix rsh path in RPMs. Report from Jason L Tibbitts III
- <tibbs@math.uh.edu>
- - (djm) Warn user if grabs fail in GNOME askpass. Patch from Zack Weinberg
- <zack@wolery.cumb.org>
- - (djm) OpenBSD CVS updates:
- - todd@cvs.openbsd.org
- [sshconnect2.c]
- teach protocol v2 to count login failures properly and also enable an
- explanation of why the password prompt comes up again like v1; this is NOT
- crypto
- - markus@cvs.openbsd.org
- [readconf.c readconf.h servconf.c servconf.h session.c ssh.1 ssh.c sshd.8]
- xauth_location support; pr 1234
- [readconf.c sshconnect2.c]
- typo, unused
- [session.c]
- allow use_login only for login sessions, otherwise remote commands are
- execed with uid==0
- [sshd.8]
- document UseLogin better
- [version.h]
- OpenSSH 2.1.1
- [auth-rsa.c]
- fix match_hostname() logic for auth-rsa: deny access if we have a
- negative match or no match at all
- [channels.c hostfile.c match.c]
- don't panic if mkdtemp fails for authfwd; jkb@yahoo-inc.com via
- kris@FreeBSD.org
-
-20000606
- - (djm) Added --with-cflags, --with-ldflags and --with-libs options to
- configure.
-
-20000604
- - Configure tweaking for new login code on Irix 5.3
- - (andre) login code changes based on djm feedback
-
-20000603
- - (andre) New login code
- - Remove bsd-login.[ch] and all the OpenBSD-derived code in login.c
- - Add loginrec.[ch], logintest.c and autoconf code
-
-20000531
- - Cleanup of auth.c, login.c and fake-*
- - Cleanup of auth-pam.c, save and print "account expired" error messages
- - Fix EGD read bug by IWAMURO Motonori <iwa@mmp.fujitsu.co.jp>
- - Rewrote bsd-login to use proper utmp API if available. Major cleanup
- of fallback DIY code.
-
-20000530
- - Define atexit for old Solaris
- - Fix buffer overrun in login.c for systems which use syslen in utmpx.
- patch from YOSHIFUJI Hideaki <yoshfuji@cerberus.nemoto.ecei.tohoku.ac.jp>
- - OpenBSD CVS updates:
- - markus@cvs.openbsd.org
- [session.c]
- make x11-fwd work w/ localhost (xauth add host/unix:11)
- [cipher.c compat.c readconf.c servconf.c]
- check strtok() != NULL; ok niels@
- [key.c]
- fix key_read() for uuencoded keys w/o '='
- [serverloop.c]
- group ssh1 vs. ssh2 in serverloop
- [kex.c kex.h myproposal.h sshconnect2.c sshd.c]
- split kexinit/kexdh, factor out common code
- [readconf.c ssh.1 ssh.c]
- forwardagent defaults to no, add ssh -A
- - theo@cvs.openbsd.org
- [session.c]
- just some line shortening
- - Released 2.1.0p3
-
-20000520
- - Xauth fix from Markus Friedl <markus.friedl@informatik.uni-erlangen.de>
- - Don't touch utmp if USE_UTMPX defined
- - SunOS 4.x support from Todd C. Miller <Todd.Miller@courtesan.com>
- - SIGCHLD fix for AIX and HPUX from Tom Bertelson <tbert@abac.com>
- - HPUX and Configure fixes from Lutz Jaenicke
- <Lutz.Jaenicke@aet.TU-Cottbus.DE>
- - Use mkinstalldirs script to make directories instead of non-portable
- "install -d". Suggested by Lutz Jaenicke <Lutz.Jaenicke@aet.TU-Cottbus.DE>
- - Doc cleanup
-
-20000518
- - Include Andre Lucas' fixprogs script. Forgot to "cvs add" it yesterday
- - OpenBSD CVS updates:
- - markus@cvs.openbsd.org
- [sshconnect.c]
- copy only ai_addrlen bytes; misiek@pld.org.pl
- [auth.c]
- accept an empty shell in authentication; bug reported by
- chris@tinker.ucr.edu
- [serverloop.c]
- we don't have stderr for interactive terminal sessions (fcntl errors)
-
-20000517
- - Fix from Andre Lucas <andre.lucas@dial.pipex.com>
- - Fixes command line printing segfaults (spotter: Bladt Norbert)
- - Fixes erroneous printing of debug messages to syslog
- - Fixes utmp for MacOS X (spotter: Aristedes Maniatis)
- - Gives useful error message if PRNG initialisation fails
- - Reduced ssh startup delay
- - Measures cumulative command time rather than the time between reads
- after select()
- - 'fixprogs' perl script to eliminate non-working entropy commands, and
- optionally run 'ent' to measure command entropy
- - Applied Tom Bertelson's <tbert@abac.com> AIX authentication fix
- - Avoid WCOREDUMP complation errors for systems that lack it
- - Avoid SIGCHLD warnings from entropy commands
- - Fix HAVE_PAM_GETENVLIST setting from Simon Wilkinson <sxw@dcs.ed.ac.uk>
- - OpenBSD CVS update:
- - markus@cvs.openbsd.org
- [ssh.c]
- fix usage()
- [ssh2.h]
- draft-ietf-secsh-architecture-05.txt
- [ssh.1]
- document ssh -T -N (ssh2 only)
- [channels.c serverloop.c ssh.h sshconnect.c sshd.c aux.c]
- enable nonblocking IO for sshd w/ proto 1, too; split out common code
- [aux.c]
- missing include
- - Several patches from SAKAI Kiyotaka <ksakai@kso.netwk.ntt-at.co.jp>
- - INSTALL typo and URL fix
- - Makefile fix
- - Solaris fixes
- - Checking for ssize_t and memmove. Based on patch from SAKAI Kiyotaka
- <ksakai@kso.netwk.ntt-at.co.jp>
- - RSAless operation patch from kevin_oconnor@standardandpoors.com
- - Detect OpenSSL seperatly from RSA
- - Better test for RSA (more compatible with RSAref). Based on work by
- Ed Eden <ede370@stl.rural.usda.gov>
-
-20000513
- - Fix for non-recognised DSA keys from Arkadiusz Miskiewicz
- <misiek@pld.org.pl>
-
-20000511
- - Fix for prng_seed permissions checking from Lutz Jaenicke
- <Lutz.Jaenicke@aet.TU-Cottbus.DE>
- - "make host-key" fix for Irix
-
-20000509
- - OpenBSD CVS update
- - markus@cvs.openbsd.org
- [cipher.h myproposal.h readconf.c readconf.h servconf.c ssh.1 ssh.c]
- [ssh.h sshconnect1.c sshconnect2.c sshd.8]
- - complain about invalid ciphers in SSH1 (e.g. arcfour is SSH2 only)
- - hugh@cvs.openbsd.org
- [ssh.1]
- - zap typo
- [ssh-keygen.1]
- - One last nit fix. (markus approved)
- [sshd.8]
- - some markus certified spelling adjustments
- - markus@cvs.openbsd.org
- [auth2.c channels.c clientloop.c compat compat.h dsa.c kex.c]
- [sshconnect2.c ]
- - bug compat w/ ssh-2.0.13 x11, split out bugs
- [nchan.c]
- - no drain if ibuf_empty, fixes x11fwd problems; tests by fries@
- [ssh-keygen.c]
- - handle escapes in real and original key format, ok millert@
- [version.h]
- - OpenSSH-2.1
- - Moved all the bsd-* and fake-* stuff into new libopenbsd-compat.a
- - Doc updates
- - Cleanup of bsd-base64 headers, bugfix definitions of __b64_*. Reported
- by Andre Lucas <andre.lucas@dial.pipex.com>
-
-20000508
- - Makefile and RPM spec fixes
- - Generate DSA host keys during "make key" or RPM installs
- - OpenBSD CVS update
- - markus@cvs.openbsd.org
- [clientloop.c sshconnect2.c]
- - make x11-fwd interop w/ ssh-2.0.13
- [README.openssh2]
- - interop w/ SecureFX
- - Release 2.0.0beta2
-
- - Configure caching and cleanup patch from Andre Lucas'
- <andre.lucas@dial.pipex.com>
-
-20000507
- - Remove references to SSLeay.
- - Big OpenBSD CVS update
- - markus@cvs.openbsd.org
- [clientloop.c]
- - typo
- [session.c]
- - update proctitle on pty alloc/dealloc, e.g. w/ windows client
- [session.c]
- - update proctitle for proto 1, too
- [channels.h nchan.c serverloop.c session.c sshd.c]
- - use c-style comments
- - deraadt@cvs.openbsd.org
- [scp.c]
- - more atomicio
- - markus@cvs.openbsd.org
- [channels.c]
- - set O_NONBLOCK
- [ssh.1]
- - update AUTHOR
- [readconf.c ssh-keygen.c ssh.h]
- - default DSA key file ~/.ssh/id_dsa
- [clientloop.c]
- - typo, rm verbose debug
- - deraadt@cvs.openbsd.org
- [ssh-keygen.1]
- - document DSA use of ssh-keygen
- [sshd.8]
- - a start at describing what i understand of the DSA side
- [ssh-keygen.1]
- - document -X and -x
- [ssh-keygen.c]
- - simplify usage
- - markus@cvs.openbsd.org
- [sshd.8]
- - there is no rhosts_dsa
- [ssh-keygen.1]
- - document -y, update -X,-x
- [nchan.c]
- - fix close for non-open ssh1 channels
- [servconf.c servconf.h ssh.h sshd.8 sshd.c ]
- - s/DsaKey/HostDSAKey/, document option
- [sshconnect2.c]
- - respect number_of_password_prompts
- [channels.c channels.h servconf.c servconf.h session.c sshd.8]
- - GatewayPorts for sshd, ok deraadt@
- [ssh-add.1 ssh-agent.1 ssh.1]
- - more doc on: DSA, id_dsa, known_hosts2, authorized_keys2
- [ssh.1]
- - more info on proto 2
- [sshd.8]
- - sync AUTHOR w/ ssh.1
- [key.c key.h sshconnect.c]
- - print key type when talking about host keys
- [packet.c]
- - clear padding in ssh2
- [dsa.c key.c radix.c ssh.h sshconnect1.c uuencode.c uuencode.h]
- - replace broken uuencode w/ libc b64_ntop
- [auth2.c]
- - log failure before sending the reply
- [key.c radix.c uuencode.c]
- - remote trailing comments before calling __b64_pton
- [auth2.c readconf.c readconf.h servconf.c servconf.h ssh.1]
- [sshconnect2.c sshd.8]
- - add DSAAuthetication option to ssh/sshd, document SSH2 in sshd.8
- - Bring in b64_ntop and b64_pton from OpenBSD libc (bsd-base64.[ch])
-
-20000502
- - OpenBSD CVS update
- [channels.c]
- - init all fds, close all fds.
- [sshconnect2.c]
- - check whether file exists before asking for passphrase
- [servconf.c servconf.h sshd.8 sshd.c]
- - PidFile, pr 1210
- [channels.c]
- - EINTR
- [channels.c]
- - unbreak, ok niels@
- [sshd.c]
- - unlink pid file, ok niels@
- [auth2.c]
- - Add missing #ifdefs; ok - markus
- - Add Andre Lucas' <andre.lucas@dial.pipex.com> patch to read entropy
- gathering commands from a text file
- - Release 2.0.0beta1
-
-20000501
- - OpenBSD CVS update
- [packet.c]
- - send debug messages in SSH2 format
- [scp.c]
- - fix very rare EAGAIN/EINTR issues; based on work by djm
- [packet.c]
- - less debug, rm unused
- [auth2.c]
- - disable kerb,s/key in ssh2
- [sshd.8]
- - Minor tweaks and typo fixes.
- [ssh-keygen.c]
- - Put -d into usage and reorder. markus ok.
- - Include missing headers for OpenSSL tests. Fix from Phil Karn
- <karn@ka9q.ampr.org>
- - Fixed __progname symbol collisions reported by Andre Lucas
- <andre.lucas@dial.pipex.com>
- - Merged bsd-login ttyslot and AIX utmp patch from Gert Doering
- <gd@hilb1.medat.de>
- - Add some missing ifdefs to auth2.c
- - Deprecate perl-tk askpass.
- - Irix portability fixes - don't include netinet headers more than once
- - Make sure we don't save PRNG seed more than once
-
-20000430
- - Merge HP-UX fixes and TCB support from Ged Lodder <lodder@yacc.com.au>
- - Integrate Andre Lucas' <andre.lucas@dial.pipex.com> entropy collection
- patch.
- - Adds timeout to entropy collection
- - Disables slow entropy sources
- - Load and save seed file
- - Changed entropy seed code to user per-user seeds only (server seed is
- saved in root's .ssh directory)
- - Use atexit() and fatal cleanups to save seed on exit
- - More OpenBSD updates:
- [session.c]
- - don't call chan_write_failed() if we are not writing
- [auth-rsa.c auth1.c authfd.c hostfile.c ssh-agent.c]
- - keysize warnings error() -> log()
-
-20000429
- - Merge big update to OpenSSH-2.0 from OpenBSD CVS
- [README.openssh2]
- - interop w/ F-secure windows client
- - sync documentation
- - ssh_host_dsa_key not ssh_dsa_key
- [auth-rsa.c]
- - missing fclose
- [auth.c authfile.c compat.c dsa.c dsa.h hostfile.c key.c key.h radix.c]
- [readconf.c readconf.h ssh-add.c ssh-keygen.c ssh.c ssh.h sshconnect.c]
- [sshd.c uuencode.c uuencode.h authfile.h]
- - add DSA pubkey auth and other SSH2 fixes. use ssh-keygen -[xX]
- for trading keys with the real and the original SSH, directly from the
- people who invented the SSH protocol.
- [auth.c auth.h authfile.c sshconnect.c auth1.c auth2.c sshconnect.h]
- [sshconnect1.c sshconnect2.c]
- - split auth/sshconnect in one file per protocol version
- [sshconnect2.c]
- - remove debug
- [uuencode.c]
- - add trailing =
- [version.h]
- - OpenSSH-2.0
- [ssh-keygen.1 ssh-keygen.c]
- - add -R flag: exit code indicates if RSA is alive
- [sshd.c]
- - remove unused
- silent if -Q is specified
- [ssh.h]
- - host key becomes /etc/ssh_host_dsa_key
- [readconf.c servconf.c ]
- - ssh/sshd default to proto 1 and 2
- [uuencode.c]
- - remove debug
- [auth2.c ssh-keygen.c sshconnect2.c sshd.c]
- - xfree DSA blobs
- [auth2.c serverloop.c session.c]
- - cleanup logging for sshd/2, respect PasswordAuth no
- [sshconnect2.c]
- - less debug, respect .ssh/config
- [README.openssh2 channels.c channels.h]
- - clientloop.c session.c ssh.c
- - support for x11-fwding, client+server
-
-20000421
- - Merge fix from OpenBSD CVS
- [ssh-agent.c]
- - Fix memory leak per connection. Report from Andy Spiegl <Andy@Spiegl.de>
- via Debian bug #59926
- - Define __progname in session.c if libc doesn't
- - Remove indentation on autoconf #include statements to avoid bug in
- DEC Tru64 compiler. Report and fix from David Del Piero
- <David.DelPiero@qed.qld.gov.au>
-
-20000420
- - Make fixpaths work with perl4, patch from Andre Lucas
- <andre.lucas@dial.pipex.com>
- - Sync with OpenBSD CVS:
- [clientloop.c login.c serverloop.c ssh-agent.c ssh.h sshconnect.c sshd.c]
- - pid_t
- [session.c]
- - remove bogus chan_read_failed. this could cause data
- corruption (missing data) at end of a SSH2 session.
- - Merge fixes from Debian patch from Phil Hands <phil@hands.com>
- - Allow setting of PAM service name through CFLAGS (SSHD_PAM_SERVICE)
- - Use vhangup to clean up Linux ttys
- - Force posix getopt processing on GNU libc systems
- - Debian bug #55910 - remove references to ssl(8) manpages
- - Debian bug #58031 - ssh_config lies about default cipher
-
-20000419
- - OpenBSD CVS updates
- [channels.c]
- - fix pr 1196, listen_port and port_to_connect interchanged
- [scp.c]
- - after completion, replace the progress bar ETA counter with a final
- elapsed time; my idea, aaron wrote the patch
- [ssh_config sshd_config]
- - show 'Protocol' as an example, ok markus@
- [sshd.c]
- - missing xfree()
- - Add missing header to bsd-misc.c
-
-20000416
- - Reduce diff against OpenBSD source
- - All OpenSSL includes are now unconditionally referenced as
- openssl/foo.h
- - Pick up formatting changes
- - Other minor changed (typecasts, etc) that I missed
-
-20000415
- - OpenBSD CVS updates.
- [ssh.1 ssh.c]
- - ssh -2
- [auth.c channels.c clientloop.c packet.c packet.h serverloop.c]
- [session.c sshconnect.c]
- - check payload for (illegal) extra data
- [ALL]
- whitespace cleanup
-
-20000413
- - INSTALL doc updates
- - Merged OpenBSD updates to include paths.
-
-20000412
- - OpenBSD CVS updates:
- - [channels.c]
- repair x11-fwd
- - [sshconnect.c]
- fix passwd prompt for ssh2, less debugging output.
- - [clientloop.c compat.c dsa.c kex.c sshd.c]
- less debugging output
- - [kex.c kex.h sshconnect.c sshd.c]
- check for reasonable public DH values
- - [README.openssh2 cipher.c cipher.h compat.c compat.h readconf.c]
- [readconf.h servconf.c servconf.h ssh.c ssh.h sshconnect.c sshd.c]
- add Cipher and Protocol options to ssh/sshd, e.g.:
- ssh -o 'Protocol 1,2' if you prefer proto 1, ssh -o 'Ciphers
- arcfour,3des-cbc'
- - [sshd.c]
- print 1.99 only if server supports both
-
-20000408
- - Avoid some compiler warnings in fake-get*.c
- - Add IPTOS macros for systems which lack them
- - Only set define entropy collection macros if they are found
- - More large OpenBSD CVS updates:
- - [auth.c auth.h servconf.c servconf.h serverloop.c session.c]
- [session.h ssh.h sshd.c README.openssh2]
- ssh2 server side, see README.openssh2; enable with 'sshd -2'
- - [channels.c]
- no adjust after close
- - [sshd.c compat.c ]
- interop w/ latest ssh.com windows client.
-
-20000406
- - OpenBSD CVS update:
- - [channels.c]
- close efd on eof
- - [clientloop.c compat.c ssh.c sshconnect.c myproposal.h]
- ssh2 client implementation, interops w/ ssh.com and lsh servers.
- - [sshconnect.c]
- missing free.
- - [authfile.c cipher.c cipher.h packet.c sshconnect.c sshd.c]
- remove unused argument, split cipher_mask()
- - [clientloop.c]
- re-order: group ssh1 vs. ssh2
- - Make Redhat spec require openssl >= 0.9.5a
-
-20000404
- - Add tests for RAND_add function when searching for OpenSSL
- - OpenBSD CVS update:
- - [packet.h packet.c]
- ssh2 packet format
- - [packet.h packet.c nchan2.ms nchan.h compat.h compat.c]
- [channels.h channels.c]
- channel layer support for ssh2
- - [kex.h kex.c hmac.h hmac.c dsa.c dsa.h]
- DSA, keyexchange, algorithm agreement for ssh2
- - Generate manpages before make install not at the end of make all
- - Don't seed the rng quite so often
- - Always reseed rng when requested
-
-20000403
- - Wrote entropy collection routines for systems that lack /dev/random
- and EGD
- - Disable tests and typedefs for 64 bit types. They are currently unused.
-
-20000401
- - Big OpenBSD CVS update (mainly beginnings of SSH2 infrastructure)
- - [auth.c session.c sshd.c auth.h]
- split sshd.c -> auth.c session.c sshd.c plus cleanup and goto-removal
- - [bufaux.c bufaux.h]
- support ssh2 bignums
- - [channels.c channels.h clientloop.c sshd.c nchan.c nchan.h packet.c]
- [readconf.c ssh.c ssh.h serverloop.c]
- replace big switch() with function tables (prepare for ssh2)
- - [ssh2.h]
- ssh2 message type codes
- - [sshd.8]
- reorder Xr to avoid cutting
- - [serverloop.c]
- close(fdin) if fdin != fdout, shutdown otherwise, ok theo@
- - [channels.c]
- missing close
- allow bigger packets
- - [cipher.c cipher.h]
- support ssh2 ciphers
- - [compress.c]
- cleanup, less code
- - [dispatch.c dispatch.h]
- function tables for different message types
- - [log-server.c]
- do not log() if debuggin to stderr
- rename a cpp symbol, to avoid param.h collision
- - [mpaux.c]
- KNF
- - [nchan.c]
- sync w/ channels.c
-
-20000326
- - Better tests for OpenSSL w/ RSAref
- - Added replacement setenv() function from OpenBSD libc. Suggested by
- Ben Lindstrom <mouring@pconline.com>
- - OpenBSD CVS update
- - [auth-krb4.c]
- -Wall
- - [auth-rh-rsa.c auth-rsa.c hostfile.c hostfile.h key.c key.h match.c]
- [match.h ssh.c ssh.h sshconnect.c sshd.c]
- initial support for DSA keys. ok deraadt@, niels@
- - [cipher.c cipher.h]
- remove unused cipher_attack_detected code
- - [scp.1 ssh-add.1 ssh-agent.1 ssh-keygen.1 ssh.1 sshd.8]
- Fix some formatting problems I missed before.
- - [ssh.1 sshd.8]
- fix spelling errors, From: FreeBSD
- - [ssh.c]
- switch to raw mode only if he _get_ a pty (not if we _want_ a pty).
-
-20000324
- - Released 1.2.3
-
-20000317
- - Clarified --with-default-path option.
- - Added -blibpath handling for AIX to work around stupid runtime linking.
- Problem elucidated by gshapiro@SENDMAIL.ORG by way of Jim Knoble
- <jmknoble@pobox.com>
- - Checks for 64 bit int types. Problem report from Mats Fredholm
- <matsf@init.se>
- - OpenBSD CVS updates:
- - [atomicio.c auth-krb4.c bufaux.c channels.c compress.c fingerprint.c]
- [packet.h radix.c rsa.c scp.c ssh-agent.c ssh-keygen.c sshconnect.c]
- [sshd.c]
- pedantic: signed vs. unsigned, void*-arithm, etc
- - [ssh.1 sshd.8]
- Various cleanups and standardizations.
- - Runtime error fix for HPUX from Otmar Stahl
- <O.Stahl@lsw.uni-heidelberg.de>
-
-20000316
- - Fixed configure not passing LDFLAGS to Solaris. Report from David G.
- Hesprich <dghespri@sprintparanet.com>
- - Propogate LD through to Makefile
- - Doc cleanups
- - Added blurb about "scp: command not found" errors to UPGRADING
-
-20000315
- - Fix broken CFLAGS handling during search for OpenSSL. Fixes va_list
- problems with gcc/Solaris.
- - Don't free argument to putenv() after use (in setenv() replacement).
- Report from Seigo Tanimura <tanimura@r.dl.itc.u-tokyo.ac.jp>
- - Created contrib/ subdirectory. Included helpers from Phil Hands'
- Debian package, README file and chroot patch from Ricardo Cerqueira
- <rmcc@clix.pt>
- - Moved gnome-ssh-askpass.c to contrib directory and removed config
- option.
- - Slight cleanup to doc files
- - Configure fix from Bratislav ILICH <bilic@zepter.ru>
-
-20000314
- - Include macro for IN6_IS_ADDR_V4MAPPED. Report from
- peter@frontierflying.com
- - Include /usr/local/include and /usr/local/lib for systems that don't
- do it themselves
- - -R/usr/local/lib for Solaris
- - Fix RSAref detection
- - Fix IN6_IS_ADDR_V4MAPPED macro
-
-20000311
- - Detect RSAref
- - OpenBSD CVS change
- [sshd.c]
- - disallow guessing of root password
- - More configure fixes
- - IPv6 workarounds from Hideaki YOSHIFUJI <yoshfuji@ecei.tohoku.ac.jp>
-
-20000309
- - OpenBSD CVS updates to v1.2.3
- [ssh.h atomicio.c]
- - int atomicio -> ssize_t (for alpha). ok deraadt@
- [auth-rsa.c]
- - delay MD5 computation until client sends response, free() early, cleanup.
- [cipher.c]
- - void* -> unsigned char*, ok niels@
- [hostfile.c]
- - remove unused variable 'len'. fix comments.
- - remove unused variable
- [log-client.c log-server.c]
- - rename a cpp symbol, to avoid param.h collision
- [packet.c]
- - missing xfree()
- - getsockname() requires initialized tolen; andy@guildsoftware.com
- - use getpeername() in packet_connection_is_on_socket(), fixes sshd -i;
- from Holger.Trapp@Informatik.TU-Chemnitz.DE
- [pty.c pty.h]
- - register cleanup for pty earlier. move code for pty-owner handling to
- pty.c ok provos@, dugsong@
- [readconf.c]
- - turn off x11-fwd for the client, too.
- [rsa.c]
- - PKCS#1 padding
- [scp.c]
- - allow '.' in usernames; from jedgar@fxp.org
- [servconf.c]
- - typo: ignore_user_known_hosts int->flag; naddy@mips.rhein-neckar.de
- - sync with sshd_config
- [ssh-keygen.c]
- - enable ssh-keygen -l -f ~/.ssh/known_hosts, ok deraadt@
- [ssh.1]
- - Change invalid 'CHAT' loglevel to 'VERBOSE'
- [ssh.c]
- - suppress AAAA query host when '-4' is used; from shin@nd.net.fujitsu.co.jp
- - turn off x11-fwd for the client, too.
- [sshconnect.c]
- - missing xfree()
- - retry rresvport_af(), too. from sumikawa@ebina.hitachi.co.jp.
- - read error vs. "Connection closed by remote host"
- [sshd.8]
- - ie. -> i.e.,
- - do not link to a commercial page..
- - sync with sshd_config
- [sshd.c]
- - no need for poll.h; from bright@wintelcom.net
- - log with level log() not fatal() if peer behaves badly.
- - don't panic if client behaves strange. ok deraadt@
- - make no-port-forwarding for RSA keys deny both -L and -R style fwding
- - delay close() of pty until the pty has been chowned back to root
- - oops, fix comment, too.
- - missing xfree()
- - move XAUTHORITY to subdir. ok dugsong@. fixes debian bug #57907, too.
- (http://cgi.debian.org/cgi-bin/bugreport.cgi?archive=no&bug=57907)
- - register cleanup for pty earlier. move code for pty-owner handling to
- pty.c ok provos@, dugsong@
- - create x11 cookie file
- - fix pr 1113, fclose() -> pclose(), todo: remote popen()
- - version 1.2.3
- - Cleaned up
- - Removed warning workaround for Linux and devpts filesystems (no longer
- required after OpenBSD updates)
-
-20000308
- - Configure fix from Hiroshi Takekawa <takekawa@sr3.t.u-tokyo.ac.jp>
-
-20000307
- - Released 1.2.2p1
-
-20000305
- - Fix DEC compile fix
- - Explicitly seed OpenSSL's PRNG before checking rsa_alive()
- - Check for getpagesize in libucb.a if not found in libc. Fix for old
- Solaris from Andre Lucas <andre.lucas@dial.pipex.com>
- - Check for libwrap if --with-tcp-wrappers option specified. Suggestion
- Mate Wierdl <mw@moni.msci.memphis.edu>
-
-20000303
- - Added "make host-key" target, Suggestion from Dominik Brettnacher
- <domi@saargate.de>
- - Don't permanently fail on bind() if getaddrinfo has more choices left for
- us. Needed to work around messy IPv6 on Linux. Patch from Arkadiusz
- Miskiewicz <misiek@pld.org.pl>
- - DEC Unix compile fix from David Del Piero <David.DelPiero@qed.qld.gov.au>
- - Manpage fix from David Del Piero <David.DelPiero@qed.qld.gov.au>
-
-20000302
- - Big cleanup of autoconf code
- - Rearranged to be a little more logical
- - Added -R option for Solaris
- - Rewrote OpenSSL detection code. Now uses AC_TRY_RUN with a test program
- to detect library and header location _and_ ensure library has proper
- RSA support built in (this is a problem with OpenSSL 0.9.5).
- - Applied pty cleanup patch from markus.friedl@informatik.uni-erlangen.de
- - Avoid warning message with Unix98 ptys
- - Warning was valid - possible race condition on PTYs. Avoided using
- platform-specific code.
- - Document some common problems
- - Allow root access to any key. Patch from
- markus.friedl@informatik.uni-erlangen.de
-
-20000207
- - Removed SOCKS code. Will support through a ProxyCommand.
-
-20000203
- - Fixed SEGVs in authloop, fix from vbzoli@hbrt.hu
- - Add --with-ssl-dir option
-
-20000202
- - Fix lastlog code for directory based lastlogs. Fix from Josh Durham
- <jmd@aoe.vt.edu>
- - Documentation fixes from HARUYAMA Seigo <haruyama@nt.phys.s.u-tokyo.ac.jp>
- - Added URLs to Japanese translations of documents by HARUYAMA Seigo
- <haruyama@nt.phys.s.u-tokyo.ac.jp>
-
-20000201
- - Use socket pairs by default (instead of pipes). Prevents race condition
- on several (buggy) OSs. Report and fix from tridge@linuxcare.com
-
-20000127
- - Seed OpenSSL's random number generator before generating RSA keypairs
- - Split random collector into seperate file
- - Compile fix from Andre Lucas <andre.lucas@dial.pipex.com>
-
-20000126
- - Released 1.2.2 stable
-
- - NeXT keeps it lastlog in /usr/adm. Report from
- mouring@newton.pconline.com
- - Added note in UPGRADING re interop with commercial SSH using idea.
- Report from Jim Knoble <jmknoble@pobox.com>
- - Fix linking order for Kerberos/AFS. Fix from Holget Trapp
- <Holger.Trapp@Informatik.TU-Chemnitz.DE>
-
-20000125
- - Fix NULL pointer dereference in login.c. Fix from Andre Lucas
- <andre.lucas@dial.pipex.com>
- - Reorder PAM initialisation so it does not mess up lastlog. Reported
- by Andre Lucas <andre.lucas@dial.pipex.com>
- - Use preformatted manpages on SCO, report from Gary E. Miller
- <gem@rellim.com>
- - New URL for x11-ssh-askpass.
- - Fixpaths was missing /etc/ssh_known_hosts. Report from Jim Knoble
- <jmknoble@pobox.com>
- - Added 'DESTDIR' option to Makefile to ease package building. Patch from
- Jim Knoble <jmknoble@pobox.com>
- - Updated RPM spec files to use DESTDIR
-
-20000124
- - Pick up version 1.2.2 from OpenBSD CVS (no changes, just version number
- increment)
-
-20000123
- - OpenBSD CVS:
- - [packet.c]
- getsockname() requires initialized tolen; andy@guildsoftware.com
- - AIX patch from Matt Richards <v2matt@btv.ibm.com> and David Rankin
- <drankin@bohemians.lexington.ky.us>
- - Fix lastlog support, patch from Andre Lucas <andre.lucas@dial.pipex.com>
-
-20000122
- - Fix compilation of bsd-snprintf.c on Solaris, fix from Ben Taylor
- <bent@clark.net>
- - Merge preformatted manpage patch from Andre Lucas
- <andre.lucas@dial.pipex.com>
- - Make IPv4 use the default in RPM packages
- - Irix uses preformatted manpages
- - Missing htons() in bsd-bindresvport.c, fix from Holger Trapp
- <Holger.Trapp@Informatik.TU-Chemnitz.DE>
- - OpenBSD CVS updates:
- - [packet.c]
- use getpeername() in packet_connection_is_on_socket(), fixes sshd -i;
- from Holger.Trapp@Informatik.TU-Chemnitz.DE
- - [sshd.c]
- log with level log() not fatal() if peer behaves badly.
- - [readpass.c]
- instead of blocking SIGINT, catch it ourselves, so that we can clean
- the tty modes up and kill ourselves -- instead of our process group
- leader (scp, cvs, ...) going away and leaving us in noecho mode.
- people with cbreak shells never even noticed..
- - [ssh-add.1 ssh-agent.1 ssh-keygen.1 ssh.1 sshd.8]
- ie. -> i.e.,
-
-20000120
- - Don't use getaddrinfo on AIX
- - Update to latest OpenBSD CVS:
- - [auth-rsa.c]
- - fix user/1056, sshd keeps restrictions; dbt@meat.net
- - [sshconnect.c]
- - disable agent fwding for proto 1.3, remove abuse of auth-rsa flags.
- - destroy keys earlier
- - split key exchange (kex) and user authentication (user-auth),
- ok: provos@
- - [sshd.c]
- - no need for poll.h; from bright@wintelcom.net
- - disable agent fwding for proto 1.3, remove abuse of auth-rsa flags.
- - split key exchange (kex) and user authentication (user-auth),
- ok: provos@
- - Big manpage and config file cleanup from Andre Lucas
- <andre.lucas@dial.pipex.com>
- - Re-added latest (unmodified) OpenBSD manpages
- - Doc updates
- - NetBSD patch from David Rankin <drankin@bohemians.lexington.ky.us> and
- Christos Zoulas <christos@netbsd.org>
-
-20000119
- - SCO compile fixes from Gary E. Miller <gem@rellim.com>
- - Compile fix from Darren_Hall@progressive.com
- - Linux/glibc-2.1.2 takes a *long* time to look up names for AF_UNSPEC
- addresses using getaddrinfo(). Added a configure switch to make the
- default lookup mode AF_INET
-
-20000118
- - Fixed --with-pid-dir option
- - Makefile fix from Gary E. Miller <gem@rellim.com>
- - Compile fix for HPUX and Solaris from Andre Lucas
- <andre.lucas@dial.pipex.com>
-
-20000117
- - Clean up bsd-bindresvport.c. Use arc4random() for picking initial
- port, ignore EINVAL errors (Linux) when searching for free port.
- - Revert __snprintf -> snprintf aliasing. Apparently Solaris
- __snprintf isn't. Report from Theo de Raadt <theo@cvs.openbsd.org>
- - Document location of Redhat PAM file in INSTALL.
- - Fixed X11 forwarding bug on Linux. libc advertises AF_INET6
- INADDR_ANY_INIT addresses via getaddrinfo, but may not be able to
- deliver (no IPv6 kernel support)
- - Released 1.2.1pre27
-
- - Fix rresvport_af failure errors (logic error in bsd-bindresvport.c)
- - Fix --with-ipaddr-display option test. Fix from Jarno Huuskonen
- <jhuuskon@hytti.uku.fi>
- - Fix hang on logout if processes are still using the pty. Needs
- further testing.
- - Patch from Christos Zoulas <christos@zoulas.com>
- - Try $prefix first when looking for OpenSSL.
- - Include sys/types.h when including sys/socket.h in test programs
- - Substitute PID directory in sshd.8. Suggestion from Andrew
- Stribblehill <a.d.stribblehill@durham.ac.uk>
-
-20000116
- - Renamed --with-xauth-path to --with-xauth
- - Added --with-pid-dir option
- - Released 1.2.1pre26
-
- - Compilation fix from Kiyokazu SUTO <suto@ks-and-ks.ne.jp>
- - Fixed broken bugfix for /dev/ptmx on Linux systems which lack
- openpty(). Report from Kiyokazu SUTO <suto@ks-and-ks.ne.jp>
-
-20000115
- - Add --with-xauth-path configure directive and explicit test for
- /usr/openwin/bin/xauth for Solaris systems. Report from Anders
- Nordby <anders@fix.no>
- - Fix incorrect detection of /dev/ptmx on Linux systems that lack
- openpty. Report from John Seifarth <john@waw.be>
- - Look for intXX_t and u_intXX_t in sys/bitypes.h if they are not in
- sys/types.h. Fixes problems on SCO, report from Gary E. Miller
- <gem@rellim.com>
- - Use __snprintf and __vnsprintf if they are found where snprintf and
- vnsprintf are lacking. Suggested by Ben Taylor <bent@shell.clark.net>
- and others.
-
-20000114
- - Merged OpenBSD IPv6 patch:
- - [sshd.c sshd.8 sshconnect.c ssh.h ssh.c servconf.h servconf.c scp.1]
- [scp.c packet.h packet.c login.c log.c canohost.c channels.c]
- [hostfile.c sshd_config]
- ipv6 support: mostly gethostbyname->getaddrinfo/getnameinfo, new
- features: sshd allows multiple ListenAddress and Port options. note
- that libwrap is not IPv6-ready. (based on patches from
- fujiwara@rcac.tdi.co.jp)
- - [ssh.c canohost.c]
- more hints (hints.ai_socktype=SOCK_STREAM) for getaddrinfo,
- from itojun@
- - [channels.c]
- listen on _all_ interfaces for X11-Fwd (hints.ai_flags = AI_PASSIVE)
- - [packet.h]
- allow auth-kerberos for IPv4 only
- - [scp.1 sshd.8 servconf.h scp.c]
- document -4, -6, and 'ssh -L 2022/::1/22'
- - [ssh.c]
- 'ssh @host' is illegal (null user name), from
- karsten@gedankenpolizei.de
- - [sshconnect.c]
- better error message
- - [sshd.c]
- allow auth-kerberos for IPv4 only
- - Big IPv6 merge:
- - Cleanup overrun in sockaddr copying on RHL 6.1
- - Replacements for getaddrinfo, getnameinfo, etc based on versions
- from patch from KIKUCHI Takahiro <kick@kyoto.wide.ad.jp>
- - Replacement for missing structures on systems that lack IPv6
- - record_login needed to know about AF_INET6 addresses
- - Borrowed more code from OpenBSD: rresvport_af and requisites
-
-20000110
- - Fixes to auth-skey to enable it to use the standard OpenSSL libraries
-
-20000107
- - New config.sub and config.guess to fix problems on SCO. Supplied
- by Gary E. Miller <gem@rellim.com>
- - SCO build fix from Gary E. Miller <gem@rellim.com>
- - Released 1.2.1pre25
-
-20000106
- - Documentation update & cleanup
- - Better KrbIV / AFS detection, based on patch from:
- Holger Trapp <Holger.Trapp@Informatik.TU-Chemnitz.DE>
-
-20000105
- - Fixed annoying DES corruption problem. libcrypt has been
- overriding symbols in libcrypto. Removed libcrypt and crypt.h
- altogether (libcrypto includes its own crypt(1) replacement)
- - Added platform-specific rules for Irix 6.x. Included warning that
- they are untested.
-
-20000103
- - Add explicit make rules for files proccessed by fixpaths.
- - Fix "make install" in RPM spec files. Report from Tenkou N. Hattori
- <tnh@kondara.org>
- - Removed "nullok" directive from default PAM configuration files.
- Added information on enabling EmptyPasswords on openssh+PAM in
- UPGRADING file.
- - OpenBSD CVS updates
- - [ssh-agent.c]
- cleanup_exit() for SIGTERM/SIGHUP, too. from fgsch@ and
- dgaudet@arctic.org
- - [sshconnect.c]
- compare correct version for 1.3 compat mode
-
-20000102
- - Prevent multiple inclusion of config.h and defines.h. Suggested
- by Andre Lucas <andre.lucas@dial.pipex.com>
- - Properly clean up on exit of ssh-agent. Patch from Dean Gaudet
- <dgaudet@arctic.org>
-
-19991231
- - Fix password support on systems with a mixture of shadowed and
- non-shadowed passwords (e.g. NIS). Report and fix from
- HARUYAMA Seigo <haruyama@nt.phys.s.u-tokyo.ac.jp>
- - Fix broken autoconf typedef detection. Report from Marc G.
- Fournier <marc.fournier@acadiau.ca>
- - Fix occasional crash on LinuxPPC. Patch from Franz Sirl
- <Franz.Sirl-kernel@lauterbach.com>
- - Prevent typedefs from being compiled more than once. Report from
- Marc G. Fournier <marc.fournier@acadiau.ca>
- - Fill in ut_utaddr utmp field. Report from Benjamin Charron
- <iretd@bigfoot.com>
- - Really fix broken default path. Fix from Jim Knoble
- <jmknoble@pobox.com>
- - Remove test for quad_t. No longer needed.
- - Released 1.2.1pre24
-
- - Added support for directory-based lastlogs
- - Really fix typedefs, patch from Ben Taylor <bent@clark.net>
-
-19991230
- - OpenBSD CVS updates:
- - [auth-passwd.c]
- check for NULL 1st
- - Removed most of the pam code into its own file auth-pam.[ch]. This
- cleaned up sshd.c up significantly.
- - PAM authentication was incorrectly interpreting
- "PermitRootLogin without-password". Report from Matthias Andree
- <ma@dt.e-technik.uni-dortmund.de
- - Several other cleanups
- - Merged Dante SOCKS support patch from David Rankin
- <drankin@bohemians.lexington.ky.us>
- - Updated documentation with ./configure options
- - Released 1.2.1pre23
-
-19991229
- - Applied another NetBSD portability patch from David Rankin
- <drankin@bohemians.lexington.ky.us>
- - Fix --with-default-path option.
- - Autodetect perl, patch from David Rankin
- <drankin@bohemians.lexington.ky.us>
- - Print whether OpenSSH was compiled with RSARef, patch from
- Nalin Dahyabhai <nalin@thermo.stat.ncsu.edu>
- - Calls to pam_setcred, patch from Nalin Dahyabhai
- <nalin@thermo.stat.ncsu.edu>
- - Detect missing size_t and typedef it.
- - Rename helper.[ch] to (more appropriate) bsd-misc.[ch]
- - Minor Makefile cleaning
-
-19991228
- - Replacement for getpagesize() for systems which lack it
- - NetBSD login.c compile fix from David Rankin
- <drankin@bohemians.lexington.ky.us>
- - Fully set ut_tv if present in utmp or utmpx
- - Portability fixes for Irix 5.3 (now compiles OK!)
- - autoconf and other misc cleanups
- - Merged AIX patch from Darren Hall <dhall@virage.org>
- - Cleaned up defines.h
- - Released 1.2.1pre22
-
-19991227
- - Automatically correct paths in manpages and configuration files. Patch
- and script from Andre Lucas <andre.lucas@dial.pipex.com>
- - Removed credits from README to CREDITS file, updated.
- - Added --with-default-path to specify custom path for server
- - Removed #ifdef trickery from acconfig.h into defines.h
- - PAM bugfix. PermitEmptyPassword was being ignored.
- - Fixed PAM config files to allow empty passwords if server does.
- - Explained spurious PAM auth warning workaround in UPGRADING
- - Use last few chars of tty line as ut_id
- - New SuSE RPM spec file from Chris Saia <csaia@wtower.com>
- - OpenBSD CVS updates:
- - [packet.h auth-rhosts.c]
- check format string for packet_disconnect and packet_send_debug, too
- - [channels.c]
- use packet_get_maxsize for channels. consistence.
-
-19991226
- - Enabled utmpx support by default for Solaris
- - Cleanup sshd.c PAM a little more
- - Revised RPM package to include Jim Knoble's <jmknoble@pobox.com>
- X11 ssh-askpass program.
- - Disable logging of PAM success and failures, PAM is verbose enough.
- Unfortunatly there is currently no way to disable auth failure
- messages. Mention this in UPGRADING file and sent message to PAM
- developers
- - OpenBSD CVS update:
- - [ssh-keygen.1 ssh.1]
- remove ref to .ssh/random_seed, mention .ssh/environment in
- .Sh FILES, too
- - Released 1.2.1pre21
- - Fixed implicit '.' in default path, report from Jim Knoble
- <jmknoble@pobox.com>
- - Redhat RPM spec fixes from Jim Knoble <jmknoble@pobox.com>
-
-19991225
- - More fixes from Andre Lucas <andre.lucas@dial.pipex.com>
- - Cleanup of auth-passwd.c for shadow and MD5 passwords
- - Cleanup and bugfix of PAM authentication code
- - Released 1.2.1pre20
-
- - Merged fixes from Ben Taylor <bent@clark.net>
- - Fixed configure support for PAM. Reported by Naz <96na@eng.cam.ac.uk>
- - Disabled logging of PAM password authentication failures when password
- is empty. (e.g start of authentication loop). Reported by Naz
- <96na@eng.cam.ac.uk>)
-
-19991223
- - Merged later HPUX patch from Andre Lucas
- <andre.lucas@dial.pipex.com>
- - Above patch included better utmpx support from Ben Taylor
- <bent@clark.net>
-
-19991222
- - Fix undefined fd_set type in ssh.h from Povl H. Pedersen
- <pope@netguide.dk>
- - Fix login.c breakage on systems which lack ut_host in struct
- utmp. Reported by Willard Dawson <willard.dawson@sbs.siemens.com>
-
-19991221
- - Integration of large HPUX patch from Andre Lucas
- <andre.lucas@dial.pipex.com>. Integrating it had a few other
- benefits:
- - Ability to disable shadow passwords at configure time
- - Ability to disable lastlog support at configure time
- - Support for IP address in $DISPLAY
- - OpenBSD CVS update:
- - [sshconnect.c]
- say "REMOTE HOST IDENTIFICATION HAS CHANGED"
- - Fix DISABLE_SHADOW support
- - Allow MD5 passwords even if shadow passwords are disabled
- - Release 1.2.1pre19
-
-19991218
- - Redhat init script patch from Chun-Chung Chen
- <cjj@u.washington.edu>
- - Avoid breakage on systems without IPv6 headers
-
-19991216
- - Makefile changes for Solaris from Peter Kocks
- <peter.kocks@baygate.com>
- - Minor updates to docs
- - Merged OpenBSD CVS changes:
- - [authfd.c ssh-agent.c]
- keysize warnings talk about identity files
- - [packet.c]
- "Connection closed by x.x.x.x": fatal() -> log()
- - Correctly handle empty passwords in shadow file. Patch from:
- "Chris, the Young One" <cky@pobox.com>
- - Released 1.2.1pre18
-
-19991215
- - Integrated patchs from Juergen Keil <jk@tools.de>
- - Avoid void* pointer arithmatic
- - Use LDFLAGS correctly
- - Fix SIGIO error in scp
- - Simplify status line printing in scp
- - Added better test for inline functions compiler support from
- Darren_Hall@progressive.com
-
-19991214
- - OpenBSD CVS Changes
- - [canohost.c]
- fix get_remote_port() and friends for sshd -i;
- Holger.Trapp@Informatik.TU-Chemnitz.DE
- - [mpaux.c]
- make code simpler. no need for memcpy. niels@ ok
- - [pty.c]
- namebuflen not sizeof namebuflen; bnd@ep-ag.com via djm@mindrot.org
- fix proto; markus
- - [ssh.1]
- typo; mark.baushke@solipsa.com
- - [channels.c ssh.c ssh.h sshd.c]
- type conflict for 'extern Type *options' in channels.c; dot@dotat.at
- - [sshconnect.c]
- move checking of hostkey into own function.
- - [version.h]
- OpenSSH-1.2.1
- - Clean up broken includes in pty.c
- - Some older systems don't have poll.h, they use sys/poll.h instead
- - Doc updates
-
-19991211
- - Fix compilation on systems with AFS. Reported by
- aloomis@glue.umd.edu
- - Fix installation on Solaris. Reported by
- Gordon Rowell <gordonr@gormand.com.au>
- - Fix gccisms (__attribute__ and inline). Report by edgy@us.ibm.com,
- patch from Markus Friedl <markus.friedl@informatik.uni-erlangen.de>
- - Auto-locate xauth. Patch from David Agraz <dagraz@jahoopa.com>
- - Compile fix from David Agraz <dagraz@jahoopa.com>
- - Avoid compiler warning in bsd-snprintf.c
- - Added pam_limits.so to default PAM config. Suggested by
- Jim Knoble <jmknoble@pobox.com>
-
-19991209
- - Import of patch from Ben Taylor <bent@clark.net>:
- - Improved PAM support
- - "uninstall" rule for Makefile
- - utmpx support
- - Should fix PAM problems on Solaris
- - OpenBSD CVS updates:
- - [readpass.c]
- avoid stdio; based on work by markus, millert, and I
- - [sshd.c]
- make sure the client selects a supported cipher
- - [sshd.c]
- fix sighup handling. accept would just restart and daemon handled
- sighup only after the next connection was accepted. use poll on
- listen sock now.
- - [sshd.c]
- make that a fatal
- - Applied patch from David Rankin <drankin@bohemians.lexington.ky.us>
- to fix libwrap support on NetBSD
- - Released 1.2pre17
-
-19991208
- - Compile fix for Solaris with /dev/ptmx from
- David Agraz <dagraz@jahoopa.com>
-
-19991207
- - sshd Redhat init script patch from Jim Knoble <jmknoble@pobox.com>
- fixes compatability with 4.x and 5.x
- - Fixed default SSH_ASKPASS
- - Fix PAM account and session being called multiple times. Problem
- reported by Adrian Baugh <adrian@merlin.keble.ox.ac.uk>
- - Merged more OpenBSD changes:
- - [atomicio.c authfd.c scp.c serverloop.c ssh.h sshconnect.c sshd.c]
- move atomicio into it's own file. wrap all socket write()s which
- were doing write(sock, buf, len) != len, with atomicio() calls.
- - [auth-skey.c]
- fd leak
- - [authfile.c]
- properly name fd variable
- - [channels.c]
- display great hatred towards strcpy
- - [pty.c pty.h sshd.c]
- use openpty() if it exists (it does on BSD4_4)
- - [tildexpand.c]
- check for ~ expansion past MAXPATHLEN
- - Modified helper.c to use new atomicio function.
- - Reformat Makefile a little
- - Moved RC4 routines from rc4.[ch] into helper.c
- - Added autoconf code to detect /dev/ptmx (Solaris) and /dev/ptc (AIX)
- - Updated SuSE spec from Chris Saia <csaia@wtower.com>
- - Tweaked Redhat spec
- - Clean up bad imports of a few files (forgot -kb)
- - Released 1.2pre16
-
-19991204
- - Small cleanup of PAM code in sshd.c
- - Merged OpenBSD CVS changes:
- - [auth-krb4.c auth-passwd.c auth-skey.c ssh.h]
- move skey-auth from auth-passwd.c to auth-skey.c, same for krb4
- - [auth-rsa.c]
- warn only about mismatch if key is _used_
- warn about keysize-mismatch with log() not error()
- channels.c readconf.c readconf.h ssh.c ssh.h sshconnect.c
- ports are u_short
- - [hostfile.c]
- indent, shorter warning
- - [nchan.c]
- use error() for internal errors
- - [packet.c]
- set loglevel for SSH_MSG_DISCONNECT to log(), not fatal()
- serverloop.c
- indent
- - [ssh-add.1 ssh-add.c ssh.h]
- document $SSH_ASKPASS, reasonable default
- - [ssh.1]
- CheckHostIP is not available for connects via proxy command
- - [sshconnect.c]
- typo
- easier to read client code for passwd and skey auth
- turn of checkhostip for proxy connects, since we don't know the remote ip
-
-19991126
- - Add definition for __P()
- - Added [v]snprintf() replacement for systems that lack it
-
-19991125
- - More reformatting merged from OpenBSD CVS
- - Merged OpenBSD CVS changes:
- - [channels.c]
- fix packet_integrity_check() for !have_hostname_in_open.
- report from mrwizard@psu.edu via djm@ibs.com.au
- - [channels.c]
- set SO_REUSEADDR and SO_LINGER for forwarded ports.
- chip@valinux.com via damien@ibs.com.au
- - [nchan.c]
- it's not an error() if shutdown_write failes in nchan.
- - [readconf.c]
- remove dead #ifdef-0-code
- - [readconf.c servconf.c]
- strcasecmp instead of tolower
- - [scp.c]
- progress meter overflow fix from damien@ibs.com.au
- - [ssh-add.1 ssh-add.c]
- SSH_ASKPASS support
- - [ssh.1 ssh.c]
- postpone fork_after_authentication until command execution,
- request/patch from jahakala@cc.jyu.fi via damien@ibs.com.au
- plus: use daemon() for backgrounding
- - Added BSD compatible install program and autoconf test, thanks to
- Niels Kristian Bech Jensen <nkbj@image.dk>
- - Solaris fixing, thanks to Ben Taylor <bent@clark.net>
- - Merged beginnings of AIX support from Tor-Ake Fransson <torake@hotmail.com>
- - Release 1.2pre15
-
-19991124
- - Merged very large OpenBSD source code reformat
- - OpenBSD CVS updates
- - [channels.c cipher.c compat.c log-client.c scp.c serverloop.c]
- [ssh.h sshd.8 sshd.c]
- syslog changes:
- * Unified Logmessage for all auth-types, for success and for failed
- * Standard connections get only ONE line in the LOG when level==LOG:
- Auth-attempts are logged only, if authentication is:
- a) successfull or
- b) with passwd or
- c) we had more than AUTH_FAIL_LOG failues
- * many log() became verbose()
- * old behaviour with level=VERBOSE
- - [readconf.c readconf.h ssh.1 ssh.h sshconnect.c sshd.c]
- tranfer s/key challenge/response data in SSH_SMSG_AUTH_TIS_CHALLENGE
- messages. allows use of s/key in windows (ttssh, securecrt) and
- ssh-1.2.27 clients without 'ssh -v', ok: niels@
- - [sshd.8]
- -V, for fallback to openssh in SSH2 compatibility mode
- - [sshd.c]
- fix sigchld race; cjc5@po.cwru.edu
-
-19991123
- - Added SuSE package files from Chris Saia <csaia@wtower.com>
- - Restructured package-related files under packages/*
- - Added generic PAM config
- - Numerous little Solaris fixes
- - Add recommendation to use GNU make to INSTALL document
-
-19991122
- - Make <enter> close gnome-ssh-askpass (Debian bug #50299)
- - OpenBSD CVS Changes
- - [ssh-keygen.c]
- don't create ~/.ssh only if the user wants to store the private
- key there. show fingerprint instead of public-key after
- keygeneration. ok niels@
- - Added OpenBSD bsd-strlcat.c, created bsd-strlcat.h
- - Added timersub() macro
- - Tidy RCSIDs of bsd-*.c
- - Added autoconf test and macro to deal with old PAM libraries
- pam_strerror definition (one arg vs two).
- - Fix EGD problems (Thanks to Ben Taylor <bent@clark.net>)
- - Retry /dev/urandom reads interrupted by signal (report from
- Robert Hardy <rhardy@webcon.net>)
- - Added a setenv replacement for systems which lack it
- - Only display public key comment when presenting ssh-askpass dialog
- - Released 1.2pre14
-
- - Configure, Make and changelog corrections from Tudor Bosman
- <tudorb@jm.nu> and Niels Kristian Bech Jensen <nkbj@image.dk>
-
-19991121
- - OpenBSD CVS Changes:
- - [channels.c]
- make this compile, bad markus
- - [log.c readconf.c servconf.c ssh.h]
- bugfix: loglevels are per host in clientconfig,
- factor out common log-level parsing code.
- - [servconf.c]
- remove unused index (-Wall)
- - [ssh-agent.c]
- only one 'extern char *__progname'
- - [sshd.8]
- document SIGHUP, -Q to synopsis
- - [sshconnect.c serverloop.c sshd.c packet.c packet.h]
- [channels.c clientloop.c]
- SSH_CMSG_MAX_PACKET_SIZE, some clients use this, some need this, niels@
- [hope this time my ISP stays alive during commit]
- - [OVERVIEW README] typos; green@freebsd
- - [ssh-keygen.c]
- replace xstrdup+strcat with strlcat+fixed buffer, fixes OF (bad me)
- exit if writing the key fails (no infinit loop)
- print usage() everytime we get bad options
- - [ssh-keygen.c] overflow, djm@mindrot.org
- - [sshd.c] fix sigchld race; cjc5@po.cwru.edu
-
-19991120
- - Merged more Solaris support from Marc G. Fournier
- <marc.fournier@acadiau.ca>
- - Wrote autoconf tests for integer bit-types
- - Fixed enabling kerberos support
- - Fix segfault in ssh-keygen caused by buffer overrun in filename
- handling.
-
-19991119
- - Merged PAM buffer overrun patch from Chip Salzenberg <chip@valinux.com>
- - Merged OpenBSD CVS changes
- - [auth-rhosts.c auth-rsa.c ssh-agent.c sshconnect.c sshd.c]
- more %d vs. %s in fmt-strings
- - [authfd.c]
- Integers should not be printed with %s
- - EGD uses a socket, not a named pipe. Duh.
- - Fix includes in fingerprint.c
- - Fix scp progress bar bug again.
- - Move ssh-askpass from ${libdir}/ssh to ${libexecdir}/ssh at request of
- David Rankin <drankin@bohemians.lexington.ky.us>
- - Added autoconf option to enable Kerberos 4 support (untested)
- - Added autoconf option to enable AFS support (untested)
- - Added autoconf option to enable S/Key support (untested)
- - Added autoconf option to enable TCP wrappers support (compiles OK)
- - Renamed BSD helper function files to bsd-*
- - Added tests for login and daemon and enable OpenBSD replacements for
- when they are absent.
- - Added non-PAM MD5 password support patch from Tudor Bosman <tudorb@jm.nu>
-
-19991118
- - Merged OpenBSD CVS changes
- - [scp.c] foregroundproc() in scp
- - [sshconnect.h] include fingerprint.h
- - [sshd.c] bugfix: the log() for passwd-auth escaped during logging
- changes.
- - [ssh.1] Spell my name right.
- - Added openssh.com info to README
-
-19991117
- - Merged OpenBSD CVS changes
- - [ChangeLog.Ylonen] noone needs this anymore
- - [authfd.c] close-on-exec for auth-socket, ok deraadt
- - [hostfile.c]
- in known_hosts key lookup the entry for the bits does not need
- to match, all the information is contained in n and e. This
- solves the problem with buggy servers announcing the wrong
- modulus length. markus and me.
- - [serverloop.c]
- bugfix: check for space if child has terminated, from:
- iedowse@maths.tcd.ie
- - [ssh-add.1 ssh-add.c ssh-keygen.1 ssh-keygen.c sshconnect.c]
- [fingerprint.c fingerprint.h]
- rsa key fingerprints, idea from Bjoern Groenvall <bg@sics.se>
- - [ssh-agent.1] typo
- - [ssh.1] add OpenSSH information to AUTHOR section. okay markus@
- - [sshd.c]
- force logging to stderr while loading private key file
- (lost while converting to new log-levels)
-
-19991116
- - Fix some Linux libc5 problems reported by Miles Wilson <mw@mctitle.com>
- - Merged OpenBSD CVS changes:
- - [auth-rh-rsa.c auth-rsa.c authfd.c authfd.h hostfile.c mpaux.c]
- [mpaux.h ssh-add.c ssh-agent.c ssh.h ssh.c sshd.c]
- the keysize of rsa-parameter 'n' is passed implizit,
- a few more checks and warnings about 'pretended' keysizes.
- - [cipher.c cipher.h packet.c packet.h sshd.c]
- remove support for cipher RC4
- - [ssh.c]
- a note for legay systems about secuity issues with permanently_set_uid(),
- the private hostkey and ptrace()
- - [sshconnect.c]
- more detailed messages about adding and checking hostkeys
-
-19991115
- - Merged OpenBSD CVS changes:
- - [ssh-add.c] change passphrase loop logic and remove ref to
- $DISPLAY, ok niels
- - Changed to ssh-add.c broke askpass support. Revised it to be a little more
- modular.
- - Revised autoconf support for enabling/disabling askpass support.
- - Merged more OpenBSD CVS changes:
- [auth-krb4.c]
- - disconnect if getpeername() fails
- - missing xfree(*client)
- [canohost.c]
- - disconnect if getpeername() fails
- - fix comment: we _do_ disconnect if ip-options are set
- [sshd.c]
- - disconnect if getpeername() fails
- - move checking of remote port to central place
- [auth-rhosts.c] move checking of remote port to central place
- [log-server.c] avoid extra fd per sshd, from millert@
- [readconf.c] print _all_ bad config-options in ssh(1), too
- [readconf.h] print _all_ bad config-options in ssh(1), too
- [ssh.c] print _all_ bad config-options in ssh(1), too
- [sshconnect.c] disconnect if getpeername() fails
- - OpenBSD's changes to sshd.c broke the PAM stuff, re-merged it.
- - Various small cleanups to bring diff (against OpenBSD) size down.
- - Merged more Solaris compability from Marc G. Fournier
- <marc.fournier@acadiau.ca>
- - Wrote autoconf tests for __progname symbol
- - RPM spec file fixes from Jim Knoble <jmknoble@pobox.com>
- - Released 1.2pre12
-
- - Another OpenBSD CVS update:
- - [ssh-keygen.1] fix .Xr
-
-19991114
- - Solaris compilation fixes (still imcomplete)
-
-19991113
- - Build patch from Niels Kristian Bech Jensen <nkbj@image.dk>
- - Don't install config files if they already exist
- - Fix inclusion of additional preprocessor directives from acconfig.h
- - Removed redundant inclusions of config.h
- - Added 'Obsoletes' lines to RPM spec file
- - Merged OpenBSD CVS changes:
- - [bufaux.c] save a view malloc/memcpy/memset/free's, ok niels
- - [scp.c] fix overflow reported by damien@ibs.com.au: off_t
- totalsize, ok niels,aaron
- - Delay fork (-f option) in ssh until after port forwarded connections
- have been initialised. Patch from Jani Hakala <jahakala@cc.jyu.fi>
- - Added shadow password patch from Thomas Neumann <tom@smart.ruhr.de>
- - Added ifdefs to auth-passwd.c to exclude it when PAM is enabled
- - Tidied default config file some more
- - Revised Redhat initscript to fix bug: sshd (re)start would fail
- if executed from inside a ssh login.
-
-19991112
- - Merged changes from OpenBSD CVS
- - [sshd.c] session_key_int may be zero
- - [auth-rh-rsa.c servconf.c servconf.h ssh.h sshd.8 sshd.c sshd_config]
- IgnoreUserKnownHosts(default=no), used for RhostRSAAuth, ok
- deraadt,millert
- - Brought default sshd_config more in line with OpenBSD's
- - Grab server in gnome-ssh-askpass (Debian bug #49872)
- - Released 1.2pre10
-
- - Added INSTALL documentation
- - Merged yet more changes from OpenBSD CVS
- - [auth-rh-rsa.c auth-rhosts.c auth-rsa.c channels.c clientloop.c]
- [ssh.c ssh.h sshconnect.c sshd.c]
- make all access to options via 'extern Options options'
- and 'extern ServerOptions options' respectively;
- options are no longer passed as arguments:
- * make options handling more consistent
- * remove #include "readconf.h" from ssh.h
- * readconf.h is only included if necessary
- - [mpaux.c] clear temp buffer
- - [servconf.c] print _all_ bad options found in configfile
- - Make ssh-askpass support optional through autoconf
- - Fix nasty division-by-zero error in scp.c
- - Released 1.2pre11
-
-19991111
- - Added (untested) Entropy Gathering Daemon (EGD) support
- - Fixed /dev/urandom fd leak (Debian bug #49722)
- - Merged OpenBSD CVS changes:
- - [auth-rh-rsa.c] user/958: check ~/.ssh/known_hosts for rhosts-rsa, too
- - [ssh.1] user/958: check ~/.ssh/known_hosts for rhosts-rsa, too
- - [sshd.8] user/958: check ~/.ssh/known_hosts for rhosts-rsa, too
- - Fix integer overflow which was messing up scp's progress bar for large
- file transfers. Fix submitted to OpenBSD developers. Report and fix
- from Kees Cook <cook@cpoint.net>
- - Merged more OpenBSD CVS changes:
- - [auth-krb4.c auth-passwd.c] remove x11- and krb-cleanup from fatal()
- + krb-cleanup cleanup
- - [clientloop.c log-client.c log-server.c ]
- [readconf.c readconf.h servconf.c servconf.h ]
- [ssh.1 ssh.c ssh.h sshd.8]
- add LogLevel {QUIET, FATAL, ERROR, INFO, CHAT, DEBUG} to ssh/sshd,
- obsoletes QuietMode and FascistLogging in sshd.
- - [sshd.c] fix fatal/assert() bug reported by damien@ibs.com.au:
- allow session_key_int != sizeof(session_key)
- [this should fix the pre-assert-removal-core-files]
- - Updated default config file to use new LogLevel option and to improve
- readability
-
-19991110
- - Merged several minor fixes:
- - ssh-agent commandline parsing
- - RPM spec file now installs ssh setuid root
- - Makefile creates libdir
- - Merged beginnings of Solaris compability from Marc G. Fournier
- <marc.fournier@acadiau.ca>
-
-19991109
- - Autodetection of SSL/Crypto library location via autoconf
- - Fixed location of ssh-askpass to follow autoconf
- - Integrated Makefile patch from Niels Kristian Bech Jensen <nkbj@image.dk>
- - Autodetection of RSAref library for US users
- - Minor doc updates
- - Merged OpenBSD CVS changes:
- - [rsa.c] bugfix: use correct size for memset()
- - [sshconnect.c] warn if announced size of modulus 'n' != real size
- - Added GNOME passphrase requestor (use --with-gnome-askpass)
- - RPM build now creates subpackages
- - Released 1.2pre9
-
-19991108
- - Removed debian/ directory. This is now being maintained separately.
- - Added symlinks for slogin in RPM spec file
- - Fixed permissions on manpages in RPM spec file
- - Added references to required libraries in README file
- - Removed config.h.in from CVS
- - Removed pwdb support (better pluggable auth is provided by glibc)
- - Made PAM and requisite libdl optional
- - Removed lots of unnecessary checks from autoconf
- - Added support and autoconf test for openpty() function (Unix98 pty support)
- - Fix for scp not finding ssh if not installed as /usr/bin/ssh
- - Added TODO file
- - Merged parts of Debian patch From Phil Hands <phil@hands.com>:
- - Added ssh-askpass program
- - Added ssh-askpass support to ssh-add.c
- - Create symlinks for slogin on install
- - Fix "distclean" target in makefile
- - Added example for ssh-agent to manpage
- - Added support for PAM_TEXT_INFO messages
- - Disable internal /etc/nologin support if PAM enabled
- - Merged latest OpenBSD CVS changes:
- - [all] replace assert() with error, fatal or packet_disconnect
- - [sshd.c] don't send fail-msg but disconnect if too many authentication
- failures
- - [sshd.c] remove unused argument. ok dugsong
- - [sshd.c] typo
- - [rsa.c] clear buffers used for encryption. ok: niels
- - [rsa.c] replace assert() with error, fatal or packet_disconnect
- - [auth-krb4.c] remove unused argument. ok dugsong
- - Fixed coredump after merge of OpenBSD rsa.c patch
- - Released 1.2pre8
-
-19991102
- - Merged change from OpenBSD CVS
- - One-line cleanup in sshd.c
-
-19991030
- - Integrated debian package support from Dan Brosemer <odin@linuxfreak.com>
- - Merged latest updates for OpenBSD CVS:
- - channels.[ch] - remove broken x11 fix and document istate/ostate
- - ssh-agent.c - call setsid() regardless of argv[]
- - ssh.c - save a few lines when disabling rhosts-{rsa-}auth
- - Documentation cleanups
- - Renamed README -> README.Ylonen
- - Renamed README.openssh ->README
-
-19991029
- - Renamed openssh* back to ssh* at request of Theo de Raadt
- - Incorporated latest changes from OpenBSD's CVS
- - Integrated Makefile patch from Niels Kristian Bech Jensen <nkbj@image.dk>
- - Integrated PAM env patch from Nalin Dahyabhai <nalin.dahyabhai@pobox.com>
- - Make distclean now removed configure script
- - Improved PAM logging
- - Added some debug() calls for PAM
- - Removed redundant subdirectories
- - Integrated part of a patch from Dan Brosemer <odin@linuxfreak.com> for
- building on Debian.
- - Fixed off-by-one error in PAM env patch
- - Released 1.2pre6
-
-19991028
- - Further PAM enhancements.
- - Much cleaner
- - Now uses account and session modules for all logins.
- - Integrated patch from Dan Brosemer <odin@linuxfreak.com>
- - Build fixes
- - Autoconf
- - Change binary names to open*
- - Fixed autoconf script to detect PAM on RH6.1
- - Added tests for libpwdb, and OpenBSD functions to autoconf
- - Released 1.2pre4
-
- - Imported latest OpenBSD CVS code
- - Updated README.openssh
- - Released 1.2pre5
-
-19991027
- - Adapted PAM patch.
- - Released 1.0pre2
-
- - Excised my buggy replacements for strlcpy and mkdtemp
- - Imported correct OpenBSD strlcpy and mkdtemp routines.
- - Reduced arc4random_stir entropy read to 32 bytes (256 bits)
- - Picked up correct version number from OpenBSD
- - Added sshd.pam PAM configuration file
- - Added sshd.init Redhat init script
- - Added openssh.spec RPM spec file
- - Released 1.2pre3
-
-19991026
- - Fixed include paths of OpenSSL functions
- - Use OpenSSL MD5 routines
- - Imported RC4 code from nanocrypt
- - Wrote replacements for OpenBSD arc4random* functions
- - Wrote replacements for strlcpy and mkdtemp
- - Released 1.0pre1
diff --git a/usr/src/cmd/ssh/doc/INSTALL b/usr/src/cmd/ssh/doc/INSTALL
deleted file mode 100644
index 9112b92b57..0000000000
--- a/usr/src/cmd/ssh/doc/INSTALL
+++ /dev/null
@@ -1,199 +0,0 @@
-1. Prerequisites
-----------------
-
-You will need working installations of Zlib and OpenSSL.
-
-Zlib:
-http://www.freesoftware.com/pub/infozip/zlib/
-
-OpenSSL 0.9.5a or greater:
-http://www.openssl.org/
-
-RPMs of OpenSSL are available at http://violet.ibs.com.au/openssh/files/support
-
-OpenSSH can utilise Pluggable Authentication Modules (PAM) if your system
-supports it. PAM is standard on Redhat and Debian Linux and on Solaris.
-
-PAM:
-http://www.kernel.org/pub/linux/libs/pam/
-
-If you wish to build the GNOME passphrase requester, you will need the GNOME
-libraries and headers.
-
-GNOME:
-http://www.gnome.org/
-
-Alternatively, Jim Knoble <jmknoble@pobox.com> has written an excellent X11
-passphrase requester. This is maintained separately at:
-
-http://www.ntrnet.net/~jmknoble/software/x11-ssh-askpass/index.html
-
-The Entropy Gathering Daemon (EGD) is supported if you have a system which
-lacks /dev/random and don't want to use OpenSSH's internal entropy collection.
-
-EGD:
-http://www.lothar.com/tech/crypto/
-
-GNU Make:
-ftp://ftp.gnu.org/gnu/make/
-
-OpenSSH has only been tested with GNU make. It may work with other
-'make' programs, but you are on your own.
-
-pcre (POSIX Regular Expression library):
-ftp://ftp.cus.cam.ac.uk/pub/software/programs/pcre/
-
-Most platforms do not required this. However older 4.3 BSD do not
-have a posix regex library.
-
-
-2. Building / Installation
---------------------------
-
-To install OpenSSH with default options:
-
-./configure
-make
-make install
-
-This will install the OpenSSH binaries in /usr/local/bin, configuration files
-in /usr/local/etc, the server in /usr/local/sbin, etc. To specify a different
-installation prefix, use the --prefix option to configure:
-
-./configure --prefix=/opt
-make
-make install
-
-Will install OpenSSH in /opt/{bin,etc,lib,sbin}. You can also override
-specific paths, for example:
-
-./configure --prefix=/opt --sysconfdir=/etc/ssh
-make
-make install
-
-This will install the binaries in /opt/{bin,lib,sbin}, but will place the
-configuration files in /etc/ssh.
-
-If you are using PAM, you will need to manually install a PAM
-control file as "/etc/pam.d/sshd" (or wherever your system
-prefers to keep them). A generic PAM configuration is included as
-"contrib/sshd.pam.generic", you may need to edit it before using it on
-your system. If you are using a recent version of Redhat Linux, the
-config file in contrib/redhat/sshd.pam should be more useful.
-Failure to install a valid PAM file may result in an inability to
-use password authentication.
-
-There are a few other options to the configure script:
-
---with-rsh=PATH allows you to specify the path to your rsh program.
-Normally ./configure will search the current $PATH for 'rsh'. You
-may need to specify this option if rsh is not in your path or has a
-different name.
-
---without-pam will disable PAM support. PAM is automatically detected
-and switched on if found.
-
---enable-gnome-askpass will build the GNOME passphrase dialog. You
-need a working installation of GNOME, including the development
-headers, for this to work.
-
---with-random=/some/file allows you to specify an alternate source of
-random numbers (the default is /dev/urandom). Unless you are absolutely
-sure of what you are doing, it is best to leave this alone.
-
---with-egd-pool=/some/file allows you to enable Entropy Gathering
-Daemon support and to specify a EGD pool socket. Use this if your
-Unix lacks /dev/random and you don't want to use OpenSSH's builtin
-entropy collection support.
-
---with-lastlog=FILE will specify the location of the lastlog file.
-./configure searches a few locations for lastlog, but may not find
-it if lastlog is installed in a different place.
-
---without-lastlog will disable lastlog support entirely.
-
---with-kerberos4=PATH will enable Kerberos IV support. You will need
-to have the Kerberos libraries and header files installed for this
-to work. Use the optional PATH argument to specify the root of your
-Kerberos installation.
-
---with-afs=PATH will enable AFS support. You will need to have the
-Kerberos IV and the AFS libraries and header files installed for this
-to work. Use the optional PATH argument to specify the root of your
-AFS installation. AFS requires Kerberos support to be enabled.
-
---with-skey will enable S/Key one time password support. You will need
-the S/Key libraries and header files installed for this to work.
-
---with-tcp-wrappers will enable TCP Wrappers (/etc/hosts.allow|deny)
-support. You will need libwrap.a and tcpd.h installed.
-
---with-md5-passwords will enable the use of MD5 passwords. Enable this
-if your operating system uses MD5 passwords without using PAM.
-
---with-utmpx enables utmpx support. utmpx support is automatic for
-some platforms.
-
---without-shadow disables shadow password support.
-
---with-ipaddr-display forces the use of a numeric IP address in the
-$DISPLAY environment variable. Some broken systems need this.
-
---with-default-path=PATH allows you to specify a default $PATH for sessions
-started by sshd. This replaces the standard path entirely.
-
---with-pid-dir=PATH specifies the directory in which the ssh.pid file is
-created.
-
---with-xauth=PATH specifies the location of the xauth binary
-
---with-ipv4-default instructs OpenSSH to use IPv4 by default for new
-connections. Normally OpenSSH will try attempt to lookup both IPv6 and
-IPv4 addresses. On Linux/glibc-2.1.2 this causes long delays in name
-resolution. If this option is specified, you can still attempt to
-connect to IPv6 addresses using the command line option '-6'.
-
---with-ssl-dir=DIR allows you to specify where your OpenSSL libraries
-are installed.
-
---with-4in6 Check for IPv4 in IPv6 mapped addresses and convert them to
-real (AF_INET) IPv4 addresses. Works around some quirks on Linux.
-
-If you need to pass special options to the compiler or linker, you
-can specify these as environment variables before running ./configure.
-For example:
-
-CFLAGS="-O -m486" LDFLAGS="-s" LIBS="-lrubbish" LD="/usr/foo/ld" ./configure
-
-3. Configuration
-----------------
-
-The runtime configuration files are installed by in ${prefix}/etc or
-whatever you specified as your --sysconfdir (/usr/local/etc by default).
-
-The default configuration should be instantly usable, though you should
-review it to ensure that it matches your security requirements.
-
-To generate a host key, run "make host-key". Alternately you can do so
-manually using the following commands:
-
- ssh-keygen -b 1024 -f /etc/ssh/ssh_host_key -N ""
- ssh-keygen -d -f /etc/ssh/ssh_host_dsa_key -N ""
-
-Replacing /etc/ssh with the correct path to the configuration directory.
-(${prefix}/etc or whatever you specified with --sysconfdir during
-configuration)
-
-If you have configured OpenSSH with EGD support, ensure that EGD is
-running and has collected some Entropy.
-
-For more information on configuration, please refer to the manual pages
-for sshd, ssh and ssh-agent.
-
-4. Problems?
-------------
-
-If you experience problems compiling, installing or running OpenSSH.
-Please refer to the "reporting bugs" section of the webpage at
-http://www.openssh.com/
-
diff --git a/usr/src/cmd/ssh/doc/LICENCE b/usr/src/cmd/ssh/doc/LICENCE
deleted file mode 100644
index 04d6fe18e3..0000000000
--- a/usr/src/cmd/ssh/doc/LICENCE
+++ /dev/null
@@ -1,194 +0,0 @@
-This file is part of the ssh software.
-
-The licences which components of this software falls under are as
-follows. First, we will summarize and say that that all components
-are under a BSD licence, or a licence more free than that.
-
-OpenSSH contains no GPL code.
-
-1)
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
-
- [Tatu continues]
- * However, I am not implying to give any licenses to any patents or
- * copyrights held by third parties, and the software includes parts that
- * are not under my direct control. As far as I know, all included
- * source code is used in accordance with the relevant license agreements
- * and can be used freely for any purpose (the GNU license being the most
- * restrictive); see below for details.
-
- [However, none of that term is relevant at this point in time. All of
- these restrictively licenced software components which he talks about
- have been removed from OpenSSH, ie.
-
- - RSA is no longer included, found in the OpenSSL library
- - IDEA is no longer included, it's use is depricated
- - DES is now external, in the OpenSSL library
- - GMP is no longer used, and instead we call BN code from OpenSSL
- - Zlib is now external, in a library
- - The make-ssh-known-hosts script is no longer included
- - TSS has been removed
- - MD5 is now external, in the OpenSSL library
- - RC4 support has been replaced with ARC4 support from OpenSSL
- - Blowfish is now external, in the OpenSSL library
-
- [The licence continues]
-
- Note that any information and cryptographic algorithms used in this
- software are publicly available on the Internet and at any major
- bookstore, scientific library, and patent office worldwide. More
- information can be found e.g. at "http://www.cs.hut.fi/crypto".
-
- The legal status of this program is some combination of all these
- permissions and restrictions. Use only at your own responsibility.
- You will be responsible for any legal consequences yourself; I am not
- making any claims whether possessing or using this is legal or not in
- your country, and I am not taking any responsibility on your behalf.
-
-
- NO WARRANTY
-
- BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
- FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
- OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
- PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
- OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
- TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
- PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
- REPAIR OR CORRECTION.
-
- IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
- WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
- REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
- INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
- OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
- TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
- YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
- PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
- POSSIBILITY OF SUCH DAMAGES.
-
-2)
- The 32-bit CRC implementation in crc32.c is due to Gary S. Brown.
- Comments in the file indicate it may be used for any purpose without
- restrictions:
-
- * COPYRIGHT (C) 1986 Gary S. Brown. You may use this program, or
- * code or tables extracted from it, as desired without restriction.
-
-3)
- The 32-bit CRC compensation attack detector in deattack.c was
- contributed by CORE SDI S.A. under a BSD-style license. See
- http://www.core-sdi.com/english/ssh/ for details.
-
- * Cryptographic attack detector for ssh - source code
- *
- * Copyright (c) 1998 CORE SDI S.A., Buenos Aires, Argentina.
- *
- * All rights reserved. Redistribution and use in source and binary
- * forms, with or without modification, are permitted provided that
- * this copyright notice is retained.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES ARE DISCLAIMED. IN NO EVENT SHALL CORE SDI S.A. BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY OR
- * CONSEQUENTIAL DAMAGES RESULTING FROM THE USE OR MISUSE OF THIS
- * SOFTWARE.
- *
- * Ariel Futoransky <futo@core-sdi.com>
- * <http://www.core-sdi.com>
-
-3a)
- Various parts are from the University of California.
-
- * Copyright (c) 1983, 1987, 1989-1995
- * The Regents of the University of California. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
-
- * Copyright (c) 1989, 1991, 1993, 1994
- * The Regents of the University of California. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
-
-4)
- Remaining components of the software are provided under a standard
- 2-term BSD licence with the following names as copyright holders:
-
- Markus Friedl
- Theo de Raadt
- Niels Provos
- Dug Song
- Aaron Campbell
-
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/usr/src/cmd/ssh/doc/OVERVIEW b/usr/src/cmd/ssh/doc/OVERVIEW
deleted file mode 100644
index 7f34ac45bd..0000000000
--- a/usr/src/cmd/ssh/doc/OVERVIEW
+++ /dev/null
@@ -1,164 +0,0 @@
-This document is intended for those who wish to read the ssh source
-code. This tries to give an overview of the structure of the code.
-
-Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>
-Updated 17 Nov 1995.
-Updated 19 Oct 1999 for OpenSSH-1.2
-
-The software consists of ssh (client), sshd (server), scp, sdist, and
-the auxiliary programs ssh-keygen, ssh-agent, ssh-add, and
-make-ssh-known-hosts. The main program for each of these is in a .c
-file with the same name.
-
-There are some subsystems/abstractions that are used by a number of
-these programs.
-
- Buffer manipulation routines
-
- - These provide an arbitrary size buffer, where data can be appended.
- Data can be consumed from either end. The code is used heavily
- throughout ssh. The basic buffer manipulation functions are in
- buffer.c (header buffer.h), and additional code to manipulate specific
- data types is in bufaux.c.
-
- Compression Library
-
- - Ssh uses the GNU GZIP compression library (ZLIB).
-
- Encryption/Decryption
-
- - Ssh contains several encryption algorithms. These are all
- accessed through the cipher.h interface. The interface code is
- in cipher.c, and the implementations are in libc.
-
- Multiple Precision Integer Library
-
- - Uses the SSLeay BIGNUM sublibrary.
- - Some auxiliary functions for mp-int manipulation are in mpaux.c.
-
- Random Numbers
-
- - Uses arc4random() and such.
-
- RSA key generation, encryption, decryption
-
- - Ssh uses the RSA routines in libssl.
-
- RSA key files
-
- - RSA keys are stored in files with a special format. The code to
- read/write these files is in authfile.c. The files are normally
- encrypted with a passphrase. The functions to read passphrases
- are in readpass.c (the same code is used to read passwords).
-
- Binary packet protocol
-
- - The ssh binary packet protocol is implemented in packet.c. The
- code in packet.c does not concern itself with packet types or their
- execution; it contains code to build packets, to receive them and
- extract data from them, and the code to compress and/or encrypt
- packets. CRC code comes from crc32.c.
-
- - The code in packet.c calls the buffer manipulation routines
- (buffer.c, bufaux.c), compression routines (compress.c, zlib),
- and the encryption routines.
-
- X11, TCP/IP, and Agent forwarding
-
- - Code for various types of channel forwarding is in channels.c.
- The file defines a generic framework for arbitrary communication
- channels inside the secure channel, and uses this framework to
- implement X11 forwarding, TCP/IP forwarding, and authentication
- agent forwarding.
- The new, Protocol 1.5, channel close implementation is in nchan.c
-
- Authentication agent
-
- - Code to communicate with the authentication agent is in authfd.c.
-
- Authentication methods
-
- - Code for various authentication methods resides in auth-*.c
- (auth-passwd.c, auth-rh-rsa.c, auth-rhosts.c, auth-rsa.c). This
- code is linked into the server. The routines also manipulate
- known hosts files using code in hostfile.c. Code in canohost.c
- is used to retrieve the canonical host name of the remote host.
- Code in match.c is used to match host names.
-
- - In the client end, authentication code is in sshconnect.c. It
- reads Passwords/passphrases using code in readpass.c. It reads
- RSA key files with authfile.c. It communicates the
- authentication agent using authfd.c.
-
- The ssh client
-
- - The client main program is in ssh.c. It first parses arguments
- and reads configuration (readconf.c), then calls ssh_connect (in
- sshconnect.c) to open a connection to the server (possibly via a
- proxy), and performs authentication (ssh_login in sshconnect.c).
- It then makes any pty, forwarding, etc. requests. It may call
- code in ttymodes.c to encode current tty modes. Finally it
- calls client_loop in clientloop.c. This does the real work for
- the session.
-
- - The client is suid root. It tries to temporarily give up this
- rights while reading the configuration data. The root
- privileges are only used to make the connection (from a
- privileged socket). Any extra privileges are dropped before
- calling ssh_login.
-
- Pseudo-tty manipulation and tty modes
-
- - Code to allocate and use a pseudo tty is in pty.c. Code to
- encode and set terminal modes is in ttymodes.c.
-
- Logging in (updating utmp, lastlog, etc.)
-
- - The code to do things that are done when a user logs in are in
- login.c. This includes things such as updating the utmp, wtmp,
- and lastlog files. Some of the code is in sshd.c.
-
- Writing to the system log and terminal
-
- - The programs use the functions fatal(), log(), debug(), error()
- in many places to write messages to system log or user's
- terminal. The implementation that logs to system log is in
- log-server.c; it is used in the server program. The other
- programs use an implementation that sends output to stderr; it
- is in log-client.c. The definitions are in ssh.h.
-
- The sshd server (daemon)
-
- - The sshd daemon starts by processing arguments and reading the
- configuration file (servconf.c). It then reads the host key,
- starts listening for connections, and generates the server key.
- The server key will be regenerated every hour by an alarm.
-
- - When the server receives a connection, it forks, disables the
- regeneration alarm, and starts communicating with the client.
- They first perform identification string exchange, then
- negotiate encryption, then perform authentication, preparatory
- operations, and finally the server enters the normal session
- mode by calling server_loop in serverloop.c. This does the real
- work, calling functions in other modules.
-
- - The code for the server is in sshd.c. It contains a lot of
- stuff, including:
- - server main program
- - waiting for connections
- - processing new connection
- - authentication
- - preparatory operations
- - building up the execution environment for the user program
- - starting the user program.
-
- Auxiliary files
-
- - There are several other files in the distribution that contain
- various auxiliary routines:
- ssh.h the main header file for ssh (various definitions)
- getput.h byte-order independent storage of integers
- includes.h includes most system headers. Lots of #ifdefs.
- tildexpand.c expand tilde in file names
- uidswap.c uid-swapping
- xmalloc.c "safe" malloc routines
diff --git a/usr/src/cmd/ssh/doc/README b/usr/src/cmd/ssh/doc/README
deleted file mode 100644
index f94e2ed1c5..0000000000
--- a/usr/src/cmd/ssh/doc/README
+++ /dev/null
@@ -1,70 +0,0 @@
-[ A Japanese translation of this document is available at
-[ http://www.unixuser.org/%7Eharuyama/security/openssh/index.html
-[ Thanks to HARUYAMA Seigo <haruyama@nt.phys.s.u-tokyo.ac.jp>
-
-******* IMPORTANT
-* On systmes which lack a /dev/random driver, version of this port
-* prior to 1.2.2 were not correctly seeding OpenSSL's random number
-* pool. This resulted in lower quality RSA keys being generated. If
-* you generated host or user keys with v1.2.2 or previous versions,
-* please generate new ones using a more recent version.
-
-This is the port of OpenBSD's excellent OpenSSH[0] to Linux and other
-Unices.
-
-OpenSSH is based on the last free version of Tatu Ylonen's SSH with
-all patent-encumbered algorithms removed (to external libraries), all
-known security bugs fixed, new features reintroduced and many other
-clean-ups. More information about SSH itself can be found in the file
-README.Ylonen. OpenSSH has been created by Aaron Campbell, Bob Beck,
-Markus Friedl, Niels Provos, Theo de Raadt, and Dug Song. It has a
-homepage at http://www.openssh.com/
-
-This port consists of the re-introduction of autoconf support, PAM
-support (for Linux and Solaris), EGD[1] support and replacements for
-OpenBSD library functions that are (regrettably) absent from other
-unices. This port has been best tested on Linux, Solaris, HPUX, NetBSD
-and Irix. Support for AIX, SCO, NeXT and other Unices is underway.
-This version actively tracks changes in the OpenBSD CVS repository.
-
-The PAM support is now more functional than the popular packages of
-commercial ssh-1.2.x. It checks "account" and "session" modules for
-all logins, not just when using password authentication.
-
-OpenSSH depends on Zlib[2], OpenSSL[3] and optionally PAM[4].
-
-There is now several mailing lists for this port of OpenSSH. Please
-refer to http://www.openssh.com/list.html for details on how to join.
-
-Please send bug reports and patches to the mailing list
-openssh-unix-dev@mindrot.org. The list is open to posting by
-unsubscribed users.
-
-If you are a citizen of the USA or another country which restricts
-export of cryptographic products, then please refrain from sending
-crypto-related code or patches to the list. We cannot accept them.
-Other code contribution are accepted, but please follow the OpenBSD
-style guidelines[5].
-
-Please refer to the INSTALL document for information on how to install
-OpenSSH on your system. There are a number of differences between this
-port of OpenSSH and F-Secure SSH 1.x, please refer to the OpenSSH FAQ[6]
-for details and general tips.
-
-Damien Miller <djm@mindrot.org>
-
-Miscellania -
-
-This version of SSH is based upon code retrieved from the OpenBSD CVS
-repository which in turn was based on the last free
-version of SSH released by Tatu Ylonen.
-
-References -
-
-[0] http://www.openssh.com/faq.html
-[1] http://www.lothar.com/tech/crypto/
-[2] ftp://ftp.freesoftware.com/pub/infozip/zlib/
-[3] http://www.openssl.org/
-[4] http://www.kernel.org/pub/linux/libs/pam/ (PAM is standard on Solaris)
-[5] http://www.openbsd.org/cgi-bin/man.cgi?query=style&sektion=9&apropos=0&manpath=OpenBSD+Current
-[6] http://www.openssh.com/faq.html
diff --git a/usr/src/cmd/ssh/doc/README.Ylonen b/usr/src/cmd/ssh/doc/README.Ylonen
deleted file mode 100644
index 38987b926a..0000000000
--- a/usr/src/cmd/ssh/doc/README.Ylonen
+++ /dev/null
@@ -1,567 +0,0 @@
-
-[ Please note that this file has not been updated for OpenSSH and
- covers the ssh-1.2.12 release from Dec 1995 only. ]
-
-Ssh (Secure Shell) is a program to log into another computer over a
-network, to execute commands in a remote machine, and to move files
-from one machine to another. It provides strong authentication and
-secure communications over insecure channels. It is inteded as a
-replacement for rlogin, rsh, rcp, and rdist.
-
-See the file INSTALL for installation instructions. See COPYING for
-license terms and other legal issues. See RFC for a description of
-the protocol. There is a WWW page for ssh; see http://www.cs.hut.fi/ssh.
-
-This file has been updated to match ssh-1.2.12.
-
-
-FEATURES
-
- o Strong authentication. Closes several security holes (e.g., IP,
- routing, and DNS spoofing). New authentication methods: .rhosts
- together with RSA based host authentication, and pure RSA
- authentication.
-
- o Improved privacy. All communications are automatically and
- transparently encrypted. RSA is used for key exchange, and a
- conventional cipher (normally IDEA, DES, or triple-DES) for
- encrypting the session. Encryption is started before
- authentication, and no passwords or other information is
- transmitted in the clear. Encryption is also used to protect
- against spoofed packets.
-
- o Secure X11 sessions. The program automatically sets DISPLAY on
- the server machine, and forwards any X11 connections over the
- secure channel. Fake Xauthority information is automatically
- generated and forwarded to the remote machine; the local client
- automatically examines incoming X11 connections and replaces the
- fake authorization data with the real data (never telling the
- remote machine the real information).
-
- o Arbitrary TCP/IP ports can be redirected through the encrypted channel
- in both directions (e.g., for e-cash transactions).
-
- o No retraining needed for normal users; everything happens
- automatically, and old .rhosts files will work with strong
- authentication if administration installs host key files.
-
- o Never trusts the network. Minimal trust on the remote side of
- the connection. Minimal trust on domain name servers. Pure RSA
- authentication never trusts anything but the private key.
-
- o Client RSA-authenticates the server machine in the beginning of
- every connection to prevent trojan horses (by routing or DNS
- spoofing) and man-in-the-middle attacks, and the server
- RSA-authenticates the client machine before accepting .rhosts or
- /etc/hosts.equiv authentication (to prevent DNS, routing, or
- IP-spoofing).
-
- o Host authentication key distribution can be centrally by the
- administration, automatically when the first connection is made
- to a machine (the key obtained on the first connection will be
- recorded and used for authentication in the future), or manually
- by each user for his/her own use. The central and per-user host
- key repositories are both used and complement each other. Host
- keys can be generated centrally or automatically when the software
- is installed. Host authentication keys are typically 1024 bits.
-
- o Any user can create any number of user authentication RSA keys for
- his/her own use. Each user has a file which lists the RSA public
- keys for which proof of possession of the corresponding private
- key is accepted as authentication. User authentication keys are
- typically 1024 bits.
-
- o The server program has its own server RSA key which is
- automatically regenerated every hour. This key is never saved in
- any file. Exchanged session keys are encrypted using both the
- server key and the server host key. The purpose of the separate
- server key is to make it impossible to decipher a captured session by
- breaking into the server machine at a later time; one hour from
- the connection even the server machine cannot decipher the session
- key. The key regeneration interval is configurable. The server
- key is normally 768 bits.
-
- o An authentication agent, running in the user's laptop or local
- workstation, can be used to hold the user's RSA authentication
- keys. Ssh automatically forwards the connection to the
- authentication agent over any connections, and there is no need to
- store the RSA authentication keys on any machine in the network
- (except the user's own local machine). The authentication
- protocols never reveal the keys; they can only be used to verify
- that the user's agent has a certain key. Eventually the agent
- could rely on a smart card to perform all authentication
- computations.
-
- o The software can be installed and used (with restricted
- functionality) even without root privileges.
-
- o The client is customizable in system-wide and per-user
- configuration files. Most aspects of the client's operation can
- be configured. Different options can be specified on a per-host basis.
-
- o Automatically executes conventional rsh (after displaying a
- warning) if the server machine is not running sshd.
-
- o Optional compression of all data with gzip (including forwarded X11
- and TCP/IP port data), which may result in significant speedups on
- slow connections.
-
- o Complete replacement for rlogin, rsh, and rcp.
-
-
-WHY TO USE SECURE SHELL
-
-Currently, almost all communications in computer networks are done
-without encryption. As a consequence, anyone who has access to any
-machine connected to the network can listen in on any communication.
-This is being done by hackers, curious administrators, employers,
-criminals, industrial spies, and governments. Some networks leak off
-enough electromagnetic radiation that data may be captured even from a
-distance.
-
-When you log in, your password goes in the network in plain
-text. Thus, any listener can then use your account to do any evil he
-likes. Many incidents have been encountered worldwide where crackers
-have started programs on workstations without the owners knowledge
-just to listen to the network and collect passwords. Programs for
-doing this are available on the Internet, or can be built by a
-competent programmer in a few hours.
-
-Any information that you type or is printed on your screen can be
-monitored, recorded, and analyzed. For example, an intruder who has
-penetrated a host connected to a major network can start a program
-that listens to all data flowing in the network, and whenever it
-encounters a 16-digit string, it checks if it is a valid credit card
-number (using the check digit), and saves the number plus any
-surrounding text (to catch expiration date and holder) in a file.
-When the intruder has collected a few thousand credit card numbers, he
-makes smallish mail-order purchases from a few thousand stores around
-the world, and disappears when the goods arrive but before anyone
-suspects anything.
-
-Businesses have trade secrets, patent applications in preparation,
-pricing information, subcontractor information, client data, personnel
-data, financial information, etc. Currently, anyone with access to
-the network (any machine on the network) can listen to anything that
-goes in the network, without any regard to normal access restrictions.
-
-Many companies are not aware that information can so easily be
-recovered from the network. They trust that their data is safe
-since nobody is supposed to know that there is sensitive information
-in the network, or because so much other data is transferred in the
-network. This is not a safe policy.
-
-Individual persons also have confidential information, such as
-diaries, love letters, health care documents, information about their
-personal interests and habits, professional data, job applications,
-tax reports, political documents, unpublished manuscripts, etc.
-
-One should also be aware that economical intelligence and industrial
-espionage has recently become a major priority of the intelligence
-agencies of major governments. President Clinton recently assigned
-economical espionage as the primary task of the CIA, and the French
-have repeatedly been publicly boasting about their achievements on
-this field.
-
-
-There is also another frightening aspect about the poor security of
-communications. Computer storage and analysis capability has
-increased so much that it is feasible for governments, major
-companies, and criminal organizations to automatically analyze,
-identify, classify, and file information about millions of people over
-the years. Because most of the work can be automated, the cost of
-collecting this information is getting very low.
-
-Government agencies may be able to monitor major communication
-systems, telephones, fax, computer networks, etc., and passively
-collect huge amounts of information about all people with any
-significant position in the society. Most of this information is not
-sensitive, and many people would say there is no harm in someone
-getting that information. However, the information starts to get
-sensitive when someone has enough of it. You may not mind someone
-knowing what you bought from the shop one random day, but you might
-not like someone knowing every small thing you have bought in the last
-ten years.
-
-If the government some day starts to move into a more totalitarian
-direction (one should remember that Nazi Germany was created by
-democratic elections), there is considerable danger of an ultimate
-totalitarian state. With enough information (the automatically
-collected records of an individual can be manually analyzed when the
-person becomes interesting), one can form a very detailed picture of
-the individual's interests, opinions, beliefs, habits, friends,
-lovers, weaknesses, etc. This information can be used to 1) locate
-any persons who might oppose the new system 2) use deception to
-disturb any organizations which might rise against the government 3)
-eliminate difficult individuals without anyone understanding what
-happened. Additionally, if the government can monitor communications
-too effectively, it becomes too easy to locate and eliminate any
-persons distributing information contrary to the official truth.
-
-Fighting crime and terrorism are often used as grounds for domestic
-surveillance and restricting encryption. These are good goals, but
-there is considerable danger that the surveillance data starts to get
-used for questionable purposes. I find that it is better to tolerate
-a small amount of crime in the society than to let the society become
-fully controlled. I am in favor of a fairly strong state, but the
-state must never get so strong that people become unable to spread
-contra-offical information and unable to overturn the government if it
-is bad. The danger is that when you notice that the government is
-too powerful, it is too late. Also, the real power may not be where
-the official government is.
-
-For these reasons (privacy, protecting trade secrets, and making it
-more difficult to create a totalitarian state), I think that strong
-cryptography should be integrated to the tools we use every day.
-Using it causes no harm (except for those who wish to monitor
-everything), but not using it can cause huge problems. If the society
-changes in undesirable ways, then it will be to late to start
-encrypting.
-
-Encryption has had a "military" or "classified" flavor to it. There
-are no longer any grounds for this. The military can and will use its
-own encryption; that is no excuse to prevent the civilians from
-protecting their privacy and secrets. Information on strong
-encryption is available in every major bookstore, scientific library,
-and patent office around the world, and strong encryption software is
-available in every country on the Internet.
-
-Some people would like to make it illegal to use encryption, or to
-force people to use encryption that governments can break. This
-approach offers no protection if the government turns bad. Also, the
-"bad guys" will be using true strong encryption anyway. Good
-encryption techniques are too widely known to make them disappear.
-Thus, any "key escrow encryption" or other restrictions will only help
-monitor ordinary people and petty criminals. It does not help against
-powerful criminals, terrorists, or espionage, because they will know
-how to use strong encryption anyway. (One source for internationally
-available encryption software is http://www.cs.hut.fi/crypto.)
-
-
-OVERVIEW OF SECURE SHELL
-
-The software consists of a number of programs.
-
- sshd Server program run on the server machine. This
- listens for connections from client machines, and
- whenever it receives a connection, it performs
- authentication and starts serving the client.
-
- ssh This is the client program used to log into another
- machine or to execute commands on the other machine.
- "slogin" is another name for this program.
-
- scp Securely copies files from one machine to another.
-
- ssh-keygen Used to create RSA keys (host keys and user
- authentication keys).
-
- ssh-agent Authentication agent. This can be used to hold RSA
- keys for authentication.
-
- ssh-add Used to register new keys with the agent.
-
- make-ssh-known-hosts
- Used to create the /etc/ssh_known_hosts file.
-
-
-Ssh is the program users normally use. It is started as
-
- ssh host
-
-or
-
- ssh host command
-
-The first form opens a new shell on the remote machine (after
-authentication). The latter form executes the command on the remote
-machine.
-
-When started, the ssh connects sshd on the server machine, verifies
-that the server machine really is the machine it wanted to connect,
-exchanges encryption keys (in a manner which prevents an outside
-listener from getting the keys), performs authentication using .rhosts
-and /etc/hosts.equiv, RSA authentication, or conventional password
-based authentication. The server then (normally) allocates a
-pseudo-terminal and starts an interactive shell or user program.
-
-The TERM environment variable (describing the type of the user's
-terminal) is passed from the client side to the remote side. Also,
-terminal modes will be copied from the client side to the remote side
-to preserve user preferences (e.g., the erase character).
-
-If the DISPLAY variable is set on the client side, the server will
-create a dummy X server and set DISPLAY accordingly. Any connections
-to the dummy X server will be forwarded through the secure channel,
-and will be made to the real X server from the client side. An
-arbitrary number of X programs can be started during the session, and
-starting them does not require anything special from the user. (Note
-that the user must not manually set DISPLAY, because then it would
-connect directly to the real display instead of going through the
-encrypted channel). This behavior can be disabled in the
-configuration file or by giving the -x option to the client.
-
-Arbitrary IP ports can be forwarded over the secure channel. The
-program then creates a port on one side, and whenever a connection is
-opened to this port, it will be passed over the secure channel, and a
-connection will be made from the other side to a specified host:port
-pair. Arbitrary IP forwarding must always be explicitly requested,
-and cannot be used to forward privileged ports (unless the user is
-root). It is possible to specify automatic forwards in a per-user
-configuration file, for example to make electronic cash systems work
-securely.
-
-If there is an authentication agent on the client side, connection to
-it will be automatically forwarded to the server side.
-
-For more infomation, see the manual pages ssh(1), sshd(8), scp(1),
-ssh-keygen(1), ssh-agent(1), ssh-add(1), and make-ssh-known-hosts(1)
-included in this distribution.
-
-
-X11 CONNECTION FORWARDING
-
-X11 forwarding serves two purposes: it is a convenience to the user
-because there is no need to set the DISPLAY variable, and it provides
-encrypted X11 connections. I cannot think of any other easy way to
-make X11 connections encrypted; modifying the X server, clients or
-libraries would require special work for each machine, vendor and
-application. Widely used IP-level encryption does not seem likely for
-several years. Thus what we have left is faking an X server on the
-same machine where the clients are run, and forwarding the connections
-to a real X server over the secure channel.
-
-X11 forwarding works as follows. The client extracts Xauthority
-information for the server. It then creates random authorization
-data, and sends the random data to the server. The server allocates
-an X11 display number, and stores the (fake) Xauthority data for this
-display. Whenever an X11 connection is opened, the server forwards
-the connection over the secure channel to the client, and the client
-parses the first packet of the X11 protocol, substitutes real
-authentication data for the fake data (if the fake data matched), and
-forwards the connection to the real X server.
-
-If the display does not have Xauthority data, the server will create a
-unix domain socket in /tmp/.X11-unix, and use the unix domain socket
-as the display. No authentication information is forwarded in this
-case. X11 connections are again forwarded over the secure channel.
-To the X server the connections appear to come from the client
-machine, and the server must have connections allowed from the local
-machine. Using authentication data is always recommended because not
-using it makes the display insecure. If XDM is used, it automatically
-generates the authentication data.
-
-One should be careful not to use "xin" or "xstart" or other similar
-scripts that explicitly set DISPLAY to start X sessions in a remote
-machine, because the connection will then not go over the secure
-channel. The recommended way to start a shell in a remote machine is
-
- xterm -e ssh host &
-
-and the recommended way to execute an X11 application in a remote
-machine is
-
- ssh -n host emacs &
-
-If you need to type a password/passphrase for the remote machine,
-
- ssh -f host emacs
-
-may be useful.
-
-
-
-RSA AUTHENTICATION
-
-RSA authentication is based on public key cryptograpy. The idea is
-that there are two encryption keys, one for encryption and another for
-decryption. It is not possible (on human timescale) to derive the
-decryption key from the encryption key. The encryption key is called
-the public key, because it can be given to anyone and it is not
-secret. The decryption key, on the other hand, is secret, and is
-called the private key.
-
-RSA authentication is based on the impossibility of deriving the
-private key from the public key. The public key is stored on the
-server machine in the user's $HOME/.ssh/authorized_keys file. The
-private key is only kept on the user's local machine, laptop, or other
-secure storage. Then the user tries to log in, the client tells the
-server the public key that the user wishes to use for authentication.
-The server then checks if this public key is admissible. If so, it
-generates a 256 bit random number, encrypts it with the public key,
-and sends the value to the client. The client then decrypts the
-number with its private key, computes a 128 bit MD5 checksum from the
-resulting data, and sends the checksum back to the server. (Only a
-checksum is sent to prevent chosen-plaintext attacks against RSA.)
-The server checks computes a checksum from the correct data,
-and compares the checksums. Authentication is accepted if the
-checksums match. (Theoretically this indicates that the client
-only probably knows the correct key, but for all practical purposes
-there is no doubt.)
-
-The RSA private key can be protected with a passphrase. The
-passphrase can be any string; it is hashed with MD5 to produce an
-encryption key for IDEA, which is used to encrypt the private part of
-the key file. With passphrase, authorization requires access to the key
-file and the passphrase. Without passphrase, authorization only
-depends on possession of the key file.
-
-RSA authentication is the most secure form of authentication supported
-by this software. It does not rely on the network, routers, domain
-name servers, or the client machine. The only thing that matters is
-access to the private key.
-
-All this, of course, depends on the security of the RSA algorithm
-itself. RSA has been widely known since about 1978, and no effective
-methods for breaking it are known if it is used properly. Care has
-been taken to avoid the well-known pitfalls. Breaking RSA is widely
-believed to be equivalent to factoring, which is a very hard
-mathematical problem that has received considerable public research.
-So far, no effective methods are known for numbers bigger than about
-512 bits. However, as computer speeds and factoring methods are
-increasing, 512 bits can no longer be considered secure. The
-factoring work is exponential, and 768 or 1024 bits are widely
-considered to be secure in the near future.
-
-
-RHOSTS AUTHENTICATION
-
-Conventional .rhosts and hosts.equiv based authentication mechanisms
-are fundamentally insecure due to IP, DNS (domain name server) and
-routing spoofing attacks. Additionally this authentication method
-relies on the integrity of the client machine. These weaknesses is
-tolerable, and been known and exploited for a long time.
-
-Ssh provides an improved version of these types of authentication,
-because they are very convenient for the user (and allow easy
-transition from rsh and rlogin). It permits these types of
-authentication, but additionally requires that the client host be
-authenticated using RSA.
-
-The server has a list of host keys stored in /etc/ssh_known_host, and
-additionally each user has host keys in $HOME/.ssh/known_hosts. Ssh
-uses the name servers to obtain the canonical name of the client host,
-looks for its public key in its known host files, and requires the
-client to prove that it knows the private host key. This prevents IP
-and routing spoofing attacks (as long as the client machine private
-host key has not been compromized), but is still vulnerable to DNS
-attacks (to a limited extent), and relies on the integrity of the
-client machine as to who is requesting to log in. This prevents
-outsiders from attacking, but does not protect against very powerful
-attackers. If maximal security is desired, only RSA authentication
-should be used.
-
-It is possible to enable conventional .rhosts and /etc/hosts.equiv
-authentication (without host authentication) at compile time by giving
-the option --with-rhosts to configure. However, this is not
-recommended, and is not done by default.
-
-These weaknesses are present in rsh and rlogin. No improvement in
-security will be obtained unless rlogin and rsh are completely
-disabled (commented out in /etc/inetd.conf). This is highly
-recommended.
-
-
-WEAKEST LINKS IN SECURITY
-
-One should understand that while this software may provide
-cryptographically secure communications, it may be easy to
-monitor the communications at their endpoints.
-
-Basically, anyone with root access on the local machine on which you
-are running the software may be able to do anything. Anyone with root
-access on the server machine may be able to monitor your
-communications, and a very talented root user might even be able to
-send his/her own requests to your authentication agent.
-
-One should also be aware that computers send out electromagnetic
-radition that can sometimes be picked up hundreds of meters away.
-Your keyboard is particularly easy to listen to. The image on your
-monitor might also be seen on another monitor in a van parked behind
-your house.
-
-Beware that unwanted visitors might come to your home or office and
-use your machine while you are away. They might also make
-modifications or install bugs in your hardware or software.
-
-Beware that the most effective way for someone to decrypt your data
-may be with a rubber hose.
-
-
-LEGAL ISSUES
-
-As far as I am concerned, anyone is permitted to use this software
-freely. However, see the file COPYING for detailed copying,
-licensing, and distribution information.
-
-In some countries, particularly France, Russia, Iraq, and Pakistan,
-it may be illegal to use any encryption at all without a special
-permit, and the rumor has it that you cannot get a permit for any
-strong encryption.
-
-This software may be freely imported into the United States; however,
-the United States Government may consider re-exporting it a criminal
-offence.
-
-Note that any information and cryptographic algorithms used in this
-software are publicly available on the Internet and at any major
-bookstore, scientific library, or patent office worldwide.
-
-THERE IS NO WARRANTY FOR THIS PROGRAM. Please consult the file
-COPYING for more information.
-
-
-MAILING LISTS AND OTHER INFORMATION
-
-There is a mailing list for ossh. It is ossh@sics.se. If you would
-like to join, send a message to majordomo@sics.se with "subscribe
-ssh" in body.
-
-The WWW home page for ssh is http://www.cs.hut.fi/ssh. It contains an
-archive of the mailing list, and detailed information about new
-releases, mailing lists, and other relevant issues.
-
-Bug reports should be sent to ossh-bugs@sics.se.
-
-
-ABOUT THE AUTHOR
-
-This software was written by Tatu Ylonen <ylo@cs.hut.fi>. I work as a
-researcher at Helsinki University of Technology, Finland. For more
-information, see http://www.cs.hut.fi/~ylo/. My PGP public key is
-available via finger from ylo@cs.hut.fi and from the key servers. I
-prefer PGP encrypted mail.
-
-The author can be contacted via ordinary mail at
- Tatu Ylonen
- Helsinki University of Technology
- Otakaari 1
- FIN-02150 ESPOO
- Finland
-
- Fax. +358-0-4513293
-
-
-ACKNOWLEDGEMENTS
-
-I thank Tero Kivinen, Timo Rinne, Janne Snabb, and Heikki Suonsivu for
-their help and comments in the design, implementation and porting of
-this software. I also thank numerous contributors, including but not
-limited to Walker Aumann, Jurgen Botz, Hans-Werner Braun, Stephane
-Bortzmeyer, Adrian Colley, Michael Cooper, David Dombek, Jerome
-Etienne, Bill Fithen, Mark Fullmer, Bert Gijsbers, Andreas Gustafsson,
-Michael Henits, Steve Johnson, Thomas Koenig, Felix Leitner, Gunnar
-Lindberg, Andrew Macpherson, Marc Martinec, Paul Mauvais, Donald
-McKillican, Leon Mlakar, Robert Muchsel, Mark Treacy, Bryan
-O'Sullivan, Mikael Suokas, Ollivier Robert, Jakob Schlyter, Tomasz
-Surmacz, Alvar Vinacua, Petri Virkkula, Michael Warfield, and
-Cristophe Wolfhugel.
-
-Thanks also go to Philip Zimmermann, whose PGP software and the
-associated legal battle provided inspiration, motivation, and many
-useful techniques, and to Bruce Schneier whose book Applied
-Cryptography has done a great service in widely distributing knowledge
-about cryptographic methods.
-
-
-Copyright (c) 1995 Tatu Ylonen, Espoo, Finland.
diff --git a/usr/src/cmd/ssh/doc/WARNING.RNG b/usr/src/cmd/ssh/doc/WARNING.RNG
deleted file mode 100644
index 21f4901c98..0000000000
--- a/usr/src/cmd/ssh/doc/WARNING.RNG
+++ /dev/null
@@ -1,79 +0,0 @@
-This document contains a description of portable OpenSSH's random
-number collection code. An alternate reading of this text could
-well be titled "Why I should pressure my system vendor to supply
-/dev/random in their OS".
-
-Why is this important? OpenSSH depends on good, unpredictable numbers
-for generating keys, performing digital signatures and forming
-cryptographic challenges. If the random numbers that it uses are
-predictable, then the strength of the whole system is compromised.
-
-A particularly pernicious problem arises with DSA keys (used by the
-ssh2 protocol). Performing a DSA signature (which is required for
-authentication), entails the use of a 160 bit random number. If an
-attacker can predict this number, then they can deduce your *private*
-key and impersonate you or your hosts.
-
-If you are using the builtin random number support (configure will
-tell you if this is the case), then read this document in its entirety.
-
-Please also request that your OS vendor provides a kernel-based random
-number collector (/dev/random) in future versions of your operating
-systems by default.
-
-On to the description...
-
-The portable OpenSSH contains random number collection support for
-systems which lack a kernel entropy pool (/dev/random).
-
-This collector operates by executing the programs listed in
-($etcdir)/ssh_prng_cmds, reading their output and adding it to the
-PRNG supplied by OpenSSL (which is hash-based). It also stirs in the
-output of several system calls and timings from the execution of the
-programs that it runs.
-
-The ssh_prng_cmds file also specifies a 'rate' for each program. This
-represents the number of bits of randomness per byte of output from
-the specified program.
-
-The random number code will also read and save a seed file to
-~/.ssh/prng_seed. This contents of this file are added to the random
-number generator at startup. The goal here is to maintain as much
-randomness between sessions as possible.
-
-The entropy collection code has two main problems:
-
-1. It is slow.
-
-Executing each program in the list can take a large amount of time,
-especially on slower machines. Additionally some program can take a
-disproportionate time to execute.
-
-This can be tuned by the administrator. To debug the entropy
-collection is great detail, turn on full debugging ("ssh -v -v -v" or
-"sshd -d -d -d"). This will list each program as it is executed, how
-long it took to execute, its exit status and whether and how much data
-it generated. You can the find the culprit programs which are causing
-the real slow-downs.
-
-The entropy collector will timeout programs which take too long
-to execute, the actual timeout used can be adjusted with the
---with-entropy-timeout configure option. OpenSSH will not try to
-re-execute programs which have not been found, have had a non-zero
-exit status or have timed out more than a couple of times.
-
-2. Estimating the real 'rate' of program outputs is non-trivial
-
-The shear volume of the task is problematic: there are currently
-around 50 commands in the ssh_prng_cmds list, portable OpenSSH
-supports at least 12 different OSs. That is already 600 sets of data
-to be analysed, without taking into account the numerous differences
-between versions of each OS.
-
-On top of this, the different commands can produce varying amounts of
-usable data depending on how busy the machine is, how long it has been
-up and various other factors.
-
-To make matters even more complex, some of the commands are reporting
-largely the same data as other commands (eg. the various "ps" calls).
-
diff --git a/usr/src/cmd/ssh/doc/nchan.ms b/usr/src/cmd/ssh/doc/nchan.ms
deleted file mode 100644
index 1679d39f30..0000000000
--- a/usr/src/cmd/ssh/doc/nchan.ms
+++ /dev/null
@@ -1,97 +0,0 @@
-.\"
-.\" Copyright (c) 1999 Markus Friedl. All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
-.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
-.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
-.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
-.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
-.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-.\"
-.TL
-OpenSSH Channel Close Protocol 1.5 Implementation
-.SH
-Channel Input State Diagram
-.PS
-reset
-l=1
-s=1.2
-ellipsewid=s*ellipsewid
-boxwid=s*boxwid
-ellipseht=s*ellipseht
-S1: ellipse "INPUT" "OPEN"
-move right 2*l from last ellipse.e
-S4: ellipse "INPUT" "CLOSED"
-move down l from last ellipse.s
-S3: ellipse "INPUT" "WAIT" "OCLOSED"
-move down l from 1st ellipse.s
-S2: ellipse "INPUT" "WAIT" "DRAIN"
-arrow "" "rcvd OCLOSE/" "shutdown_read" "send IEOF" from S1.e to S4.w
-arrow "ibuf_empty/" "send IEOF" from S2.e to S3.w
-arrow from S1.s to S2.n
-box invis "read_failed/" "shutdown_read" with .e at last arrow.c
-arrow from S3.n to S4.s
-box invis "rcvd OCLOSE/" "-" with .w at last arrow.c
-ellipse wid .9*ellipsewid ht .9*ellipseht at S4
-arrow "start" "" from S1.w+(-0.5,0) to S1.w
-arrow from S2.ne to S4.sw
-box invis "rcvd OCLOSE/ " with .e at last arrow.c
-box invis " send IEOF" with .w at last arrow.c
-.PE
-.SH
-Channel Output State Diagram
-.PS
-S1: ellipse "OUTPUT" "OPEN"
-move right 2*l from last ellipse.e
-S3: ellipse "OUTPUT" "WAIT" "IEOF"
-move down l from last ellipse.s
-S4: ellipse "OUTPUT" "CLOSED"
-move down l from 1st ellipse.s
-S2: ellipse "OUTPUT" "WAIT" "DRAIN"
-arrow "" "write_failed/" "shutdown_write" "send OCLOSE" from S1.e to S3.w
-arrow "obuf_empty ||" "write_failed/" "shutdown_write" "send OCLOSE" from S2.e to S4.w
-arrow from S1.s to S2.n
-box invis "rcvd IEOF/" "-" with .e at last arrow.c
-arrow from S3.s to S4.n
-box invis "rcvd IEOF/" "-" with .w at last arrow.c
-ellipse wid .9*ellipsewid ht .9*ellipseht at S4
-arrow "start" "" from S1.w+(-0.5,0) to S1.w
-.PE
-.SH
-Notes
-.PP
-The input buffer is filled with data from the socket
-(the socket represents the local consumer/producer of the
-forwarded channel).
-The data is then sent over the INPUT-end (transmit-end) of the channel to the
-remote peer.
-Data sent by the peer is received on the OUTPUT-end (receive-end),
-saved in the output buffer and written to the socket.
-.PP
-If the local protocol instance has forwarded all data on the
-INPUT-end of the channel, it sends an IEOF message to the peer.
-If the peer receives the IEOF and has consumed all
-data he replies with an OCLOSE.
-When the local instance receives the OCLOSE
-he considers the INPUT-half of the channel closed.
-The peer has his OUTOUT-half closed.
-.PP
-A channel can be deallocated by a protocol instance
-if both the INPUT- and the OUTOUT-half on his
-side of the channel are closed.
-Note that when an instance is unable to consume the
-received data, he is permitted to send an OCLOSE
-before the matching IEOF is received.
diff --git a/usr/src/cmd/ssh/doc/nchan2.ms b/usr/src/cmd/ssh/doc/nchan2.ms
deleted file mode 100644
index 1b119d1353..0000000000
--- a/usr/src/cmd/ssh/doc/nchan2.ms
+++ /dev/null
@@ -1,64 +0,0 @@
-.TL
-OpenSSH Channel Close Protocol 2.0 Implementation
-.SH
-Channel Input State Diagram
-.PS
-reset
-l=1
-s=1.2
-ellipsewid=s*ellipsewid
-boxwid=s*boxwid
-ellipseht=s*ellipseht
-S1: ellipse "INPUT" "OPEN"
-move right 2*l from last ellipse.e
-S3: ellipse invis
-move down l from last ellipse.s
-S4: ellipse "INPUT" "CLOSED"
-move down l from 1st ellipse.s
-S2: ellipse "INPUT" "WAIT" "DRAIN"
-arrow from S1.e to S4.n
-box invis "rcvd CLOSE/" "shutdown_read" with .sw at last arrow.c
-arrow "ibuf_empty ||" "rcvd CLOSE/" "send EOF" "" from S2.e to S4.w
-arrow from S1.s to S2.n
-box invis "read_failed/" "shutdown_read" with .e at last arrow.c
-ellipse wid .9*ellipsewid ht .9*ellipseht at S4
-arrow "start" "" from S1.w+(-0.5,0) to S1.w
-.PE
-.SH
-Channel Output State Diagram
-.PS
-S1: ellipse "OUTPUT" "OPEN"
-move right 2*l from last ellipse.e
-S3: ellipse invis
-move down l from last ellipse.s
-S4: ellipse "OUTPUT" "CLOSED"
-move down l from 1st ellipse.s
-S2: ellipse "OUTPUT" "WAIT" "DRAIN"
-arrow from S1.e to S4.n
-box invis "write_failed/" "shutdown_write" with .sw at last arrow.c
-arrow "obuf_empty ||" "write_failed/" "shutdown_write" "" from S2.e to S4.w
-arrow from S1.s to S2.n
-box invis "rcvd EOF ||" "rcvd CLOSE/" "-" with .e at last arrow.c
-ellipse wid .9*ellipsewid ht .9*ellipseht at S4
-arrow "start" "" from S1.w+(-0.5,0) to S1.w
-.PE
-.SH
-Notes
-.PP
-The input buffer is filled with data from the socket
-(the socket represents the local consumer/producer of the
-forwarded channel).
-The data is then sent over the INPUT-end (transmit-end) of the channel to the
-remote peer.
-Data sent by the peer is received on the OUTPUT-end (receive-end),
-saved in the output buffer and written to the socket.
-.PP
-If the local protocol instance has forwarded all data on the
-INPUT-end of the channel, it sends an EOF message to the peer.
-.PP
-A CLOSE message is sent to the peer if
-both the INPUT- and the OUTOUT-half of the local
-end of the channel are closed.
-.PP
-The channel can be deallocated by a protocol instance
-if a CLOSE message he been both sent and received.
diff --git a/usr/src/cmd/ssh/etc/Makefile b/usr/src/cmd/ssh/etc/Makefile
deleted file mode 100644
index 54f782259e..0000000000
--- a/usr/src/cmd/ssh/etc/Makefile
+++ /dev/null
@@ -1,66 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
-#
-
-MANIFEST = ssh.xml
-SVCMETHOD = sshd
-
-include ../../Makefile.cmd
-include ../Makefile.ssh-common
-
-SSHASKPASS= $(ROOTLIBSSH)/ssh-askpass
-ETCSSHDIR= $(ROOTETC)/ssh
-DIRS= $(ETCSSHDIR) $(ROOTLIBSSH) $(ROOTLIBSUNSSH)
-
-FILES= sshd_config ssh_config moduli.sunssh
-
-ETCSSHLINKS= $(ETCSSHDIR)/moduli
-
-ETCSSHFILES= $(FILES:%=$(ETCSSHDIR)/%)
-
-$(ETCSSHFILES) := FILEMODE= 644
-
-ROOTMANIFESTDIR = $(ROOTSVCNETWORK)
-
-$(ETCSSHDIR)/% : %
- $(INS.file)
-
-$(DIRS):
- $(INS.dir)
-
-$(ETCSSHDIR)/moduli: $(ETCSSHDIR)/moduli.sunssh
- -$(RM) $@; $(SYMLINK) moduli.sunssh $@
-
-$(ROOTLIBSSH)/%: %
- $(INS.file)
-
-$(POFILE):
-
-all lint clean clobber _msg:
-
-install: all $(DIRS) $(ETCSSHFILES) $(ETCSSHLINKS) $(ROOTMANIFEST) $(ROOTSVCMETHOD) \
- $(SSHASKPASS)
-
-check: $(CHKMANIFEST)
-
-include ../../Makefile.targ
diff --git a/usr/src/cmd/ssh/etc/ssh.xml b/usr/src/cmd/ssh/etc/ssh.xml
deleted file mode 100644
index f5fb471669..0000000000
--- a/usr/src/cmd/ssh/etc/ssh.xml
+++ /dev/null
@@ -1,177 +0,0 @@
-<?xml version="1.0"?>
-<!DOCTYPE service_bundle SYSTEM "/usr/share/lib/xml/dtd/service_bundle.dtd.1">
-<!--
- CDDL HEADER START
-
- The contents of this file are subject to the terms of the
- Common Development and Distribution License (the "License").
- You may not use this file except in compliance with the License.
-
- You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- or http://www.opensolaris.org/os/licensing.
- See the License for the specific language governing permissions
- and limitations under the License.
-
- When distributing Covered Code, include this CDDL HEADER in each
- file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- If applicable, add the following below this CDDL HEADER, with the
- fields enclosed by brackets "[]" replaced with your own identifying
- information: Portions Copyright [yyyy] [name of copyright owner]
-
- CDDL HEADER END
-
- Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- Use is subject to license terms.
-
- Copyright 2016 Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org>
-
- NOTE: This service manifest is not editable; its contents will
- be overwritten by package or patch operations, including
- operating system upgrade. Make customizations in a different
- file.
--->
-
-<service_bundle type='manifest' name='SUNWsshdr:ssh'>
-
-<service
- name='network/ssh'
- type='service'
- version='1'>
-
- <create_default_instance enabled='false' />
-
- <single_instance />
-
- <dependency name='fs-local'
- grouping='require_all'
- restart_on='none'
- type='service'>
- <service_fmri
- value='svc:/system/filesystem/local' />
- </dependency>
-
- <dependency name='fs-autofs'
- grouping='optional_all'
- restart_on='none'
- type='service'>
- <service_fmri value='svc:/system/filesystem/autofs' />
- </dependency>
-
- <dependency name='net-loopback'
- grouping='require_all'
- restart_on='none'
- type='service'>
- <service_fmri value='svc:/network/loopback' />
- </dependency>
-
- <dependency name='net-physical'
- grouping='require_all'
- restart_on='none'
- type='service'>
- <service_fmri value='svc:/network/physical' />
- </dependency>
-
- <dependency name='cryptosvc'
- grouping='require_all'
- restart_on='none'
- type='service'>
- <service_fmri value='svc:/system/cryptosvc' />
- </dependency>
-
- <dependency name='utmp'
- grouping='require_all'
- restart_on='none'
- type='service'>
- <service_fmri value='svc:/system/utmp' />
- </dependency>
-
- <dependency name='network_ipfilter'
- grouping='optional_all'
- restart_on='error'
- type='service'>
- <service_fmri value='svc:/network/ipfilter:default' />
- </dependency>
-
- <dependency name='config_data'
- grouping='require_all'
- restart_on='restart'
- type='path'>
- <service_fmri
- value='file://localhost/etc/ssh/sshd_config' />
- </dependency>
-
- <dependent
- name='ssh_multi-user-server'
- grouping='optional_all'
- restart_on='none'>
- <service_fmri
- value='svc:/milestone/multi-user-server' />
- </dependent>
-
- <exec_method
- type='method'
- name='start'
- exec='/lib/svc/method/sshd start'
- timeout_seconds='60'/>
-
- <exec_method
- type='method'
- name='stop'
- exec=':kill'
- timeout_seconds='60' />
-
- <exec_method
- type='method'
- name='refresh'
- exec='/lib/svc/method/sshd restart'
- timeout_seconds='60' />
-
- <property_group name='startd'
- type='framework'>
- <!-- sub-process core dumps shouldn't restart session -->
- <propval name='ignore_error'
- type='astring' value='core,signal' />
- </property_group>
-
- <property_group name='general' type='framework'>
- <!-- to start stop sshd -->
- <propval name='action_authorization' type='astring'
- value='solaris.smf.manage.ssh' />
- </property_group>
-
- <property_group name='firewall_context' type='com.sun,fw_definition'>
- <propval name='name' type='astring' value='ssh' />
- <propval name='ipf_method' type='astring'
- value='/lib/svc/method/sshd ipfilter' />
- </property_group>
-
- <property_group name='firewall_config' type='com.sun,fw_configuration'>
- <propval name='policy' type='astring' value='use_global' />
- <propval name='block_policy' type='astring'
- value='use_global' />
- <propval name='apply_to' type='astring' value='' />
- <propval name='apply_to_6' type='astring' value='' />
- <propval name='exceptions' type='astring' value='' />
- <propval name='exceptions_6' type='astring' value='' />
- <propval name='target' type='astring' value='' />
- <propval name='target_6' type='astring' value='' />
- <propval name='value_authorization' type='astring'
- value='solaris.smf.value.firewall.config' />
- </property_group>
-
- <stability value='Unstable' />
-
- <template>
- <common_name>
- <loctext xml:lang='C'>
- SSH server
- </loctext>
- </common_name>
- <documentation>
- <manpage title='sshd' section='1M' manpath='/usr/share/man' />
- </documentation>
- </template>
-
-</service>
-
-</service_bundle>
diff --git a/usr/src/cmd/ssh/etc/ssh_config b/usr/src/cmd/ssh/etc/ssh_config
deleted file mode 100644
index cdb9d97d45..0000000000
--- a/usr/src/cmd/ssh/etc/ssh_config
+++ /dev/null
@@ -1,31 +0,0 @@
-# Copyright (c) 2001 by Sun Microsystems, Inc.
-# All rights reserved.
-#
-# ident "%Z%%M% %I% %E% SMI"
-#
-# This file provides defaults for ssh(1).
-# The values can be changed in per-user configuration files $HOME/.ssh/config
-# or on the command line of ssh(1).
-
-# Configuration data is parsed as follows:
-# 1. command line options
-# 2. user-specific file
-# 3. system-wide file /etc/ssh/ssh_config
-#
-# Any configuration value is only changed the first time it is set.
-# host-specific definitions should be at the beginning of the
-# configuration file, and defaults at the end.
-
-# Example (matches compiled in defaults):
-#
-# Host *
-# ForwardAgent no
-# ForwardX11 no
-# PubkeyAuthentication yes
-# PasswordAuthentication yes
-# FallBackToRsh no
-# UseRsh no
-# BatchMode no
-# CheckHostIP yes
-# StrictHostKeyChecking ask
-# EscapeChar ~
diff --git a/usr/src/cmd/ssh/etc/sshd b/usr/src/cmd/ssh/etc/sshd
deleted file mode 100644
index d52b1afd25..0000000000
--- a/usr/src/cmd/ssh/etc/sshd
+++ /dev/null
@@ -1,127 +0,0 @@
-#!/sbin/sh
-#
-# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
-#
-# Copyright 2016 Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org>
-#
-
-. /lib/svc/share/ipf_include.sh
-. /lib/svc/share/smf_include.sh
-
-SSHDIR=/etc/ssh
-KEYGEN="/usr/bin/ssh-keygen -q"
-PIDFILE=/var/run/sshd.pid
-
-# Checks to see if RSA, and DSA host keys are available
-# if any of these keys are not present, the respective keys are created.
-create_key()
-{
- keypath=$1
- keytype=$2
-
- if [ ! -f $keypath ]; then
- #
- # HostKey keywords in sshd_config may be preceded or
- # followed by a mix of any number of space or tabs,
- # and optionally have an = between keyword and
- # argument. We use two grep invocations such that we
- # can match HostKey case insensitively but still have
- # the case of the path name be significant, keeping
- # the pattern somewhat more readable.
- #
- # The character classes below contain one literal
- # space and one literal tab.
- #
- grep -i "^[ ]*HostKey[ ]*=\{0,1\}[ ]*$keypath" \
- $SSHDIR/sshd_config | grep "$keypath" > /dev/null 2>&1
-
- if [ $? -eq 0 ]; then
- echo Creating new $keytype public/private host key pair
- $KEYGEN -f $keypath -t $keytype -N ''
- if [ $? -ne 0 ]; then
- echo "Could not create $keytype key: $keypath"
- exit $SMF_EXIT_ERR_CONFIG
- fi
- fi
- fi
-}
-
-create_ipf_rules()
-{
- FMRI=$1
- ipf_file=`fmri_to_file ${FMRI} $IPF_SUFFIX`
- ipf6_file=`fmri_to_file ${FMRI} $IPF6_SUFFIX`
- policy=`get_policy ${FMRI}`
-
- #
- # Get port from /etc/ssh/sshd_config
- #
- tports=`grep "^Port" /etc/ssh/sshd_config 2>/dev/null | \
- awk '{print $2}'`
-
- echo "# $FMRI" >$ipf_file
- echo "# $FMRI" >$ipf6_file
- for port in $tports; do
- generate_rules $FMRI $policy "tcp" $port $ipf_file
- generate_rules $FMRI $policy "tcp" $port $ipf6_file _6
- done
-}
-
-# This script is being used for two purposes: as part of an SMF
-# start/stop/refresh method, and as a sysidconfig(1M)/sys-unconfig(1M)
-# application.
-#
-# Both, the SMF methods and sysidconfig/sys-unconfig use different
-# arguments..
-
-case $1 in
- # sysidconfig/sys-unconfig arguments (-c and -u)
-'-c')
- /usr/bin/ssh-keygen -A
- if [ $? -ne 0 ]; then
- create_key $SSHDIR/ssh_host_rsa_key rsa
- create_key $SSHDIR/ssh_host_dsa_key dsa
- fi
- ;;
-
-'-u')
- # sys-unconfig(1M) knows how to remove ssh host keys, so there's
- # nothing to do here.
- :
- ;;
-
- # SMF arguments (start and restart [really "refresh"])
-
-'ipfilter')
- create_ipf_rules $2
- ;;
-
-'start')
- #
- # If host keys don't exist when the service is started, create
- # them; sysidconfig is not run in every situation (such as on
- # the install media).
- #
- /usr/bin/ssh-keygen -A
- if [ $? -ne 0 ]; then
- create_key $SSHDIR/ssh_host_rsa_key rsa
- create_key $SSHDIR/ssh_host_dsa_key dsa
- fi
-
- /usr/lib/ssh/sshd
- ;;
-
-'restart')
- if [ -f "$PIDFILE" ]; then
- /usr/bin/kill -HUP `/usr/bin/cat $PIDFILE`
- fi
- ;;
-
-*)
- echo "Usage: $0 { start | restart }"
- exit 1
- ;;
-esac
-
-exit $?
diff --git a/usr/src/cmd/ssh/etc/sshd_config b/usr/src/cmd/ssh/etc/sshd_config
deleted file mode 100644
index fd4aa5df46..0000000000
--- a/usr/src/cmd/ssh/etc/sshd_config
+++ /dev/null
@@ -1,145 +0,0 @@
-#
-# Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
-#
-# Configuration file for sshd(1m) (see also sshd_config(4))
-#
-
-# Protocol versions supported
-#
-# The sshd shipped in this release of Solaris has support for major versions
-# 1 and 2. It is recommended due to security weaknesses in the v1 protocol
-# that sites run only v2 if possible. Support for v1 is provided to help sites
-# with existing ssh v1 clients/servers to transition.
-# Support for v1 may not be available in a future release of Solaris.
-#
-# To enable support for v1 an RSA1 key must be created with ssh-keygen(1).
-# RSA and DSA keys for protocol v2 are created by /etc/init.d/sshd if they
-# do not already exist, RSA1 keys for protocol v1 are not automatically created.
-
-# Uncomment ONLY ONE of the following Protocol statements.
-
-# Only v2 (recommended)
-Protocol 2
-
-# Both v1 and v2 (not recommended)
-#Protocol 2,1
-
-# Only v1 (not recommended)
-#Protocol 1
-
-# Listen port (the IANA registered port number for ssh is 22)
-Port 22
-
-# The default listen address is all interfaces, this may need to be changed
-# if you wish to restrict the interfaces sshd listens on for a multi homed host.
-# Multiple ListenAddress entries are allowed.
-
-# IPv4 only
-#ListenAddress 0.0.0.0
-# IPv4 & IPv6
-ListenAddress ::
-
-# If port forwarding is enabled (default), specify if the server can bind to
-# INADDR_ANY.
-# This allows the local port forwarding to work when connections are received
-# from any remote host.
-GatewayPorts no
-
-# X11 tunneling options
-X11Forwarding yes
-X11DisplayOffset 10
-X11UseLocalhost yes
-
-# The maximum number of concurrent unauthenticated connections to sshd.
-# start:rate:full see sshd(1) for more information.
-# The default is 10 unauthenticated clients.
-#MaxStartups 10:30:60
-
-# Banner to be printed before authentication starts.
-#Banner /etc/issue
-
-# Should sshd print the /etc/motd file and check for mail.
-# On Solaris it is assumed that the login shell will do these (eg /etc/profile).
-PrintMotd no
-
-# KeepAlive specifies whether keep alive messages are sent to the client.
-# See sshd(1) for detailed description of what this means.
-# Note that the client may also be sending keep alive messages to the server.
-KeepAlive yes
-
-# Syslog facility and level
-SyslogFacility auth
-LogLevel info
-
-#
-# Authentication configuration
-#
-
-# Host private key files
-# Must be on a local disk and readable only by the root user (root:sys 600).
-HostKey /etc/ssh/ssh_host_rsa_key
-HostKey /etc/ssh/ssh_host_dsa_key
-
-# Length of the server key
-# Default 768, Minimum 512
-ServerKeyBits 768
-
-# sshd regenerates the key every KeyRegenerationInterval seconds.
-# The key is never stored anywhere except the memory of sshd.
-# The default is 1 hour (3600 seconds).
-KeyRegenerationInterval 3600
-
-# Ensure secure permissions on users .ssh directory.
-StrictModes yes
-
-# Length of time in seconds before a client that hasn't completed
-# authentication is disconnected.
-# Default is 600 seconds. 0 means no time limit.
-LoginGraceTime 600
-
-# Maximum number of retries for authentication
-# Default is 6. Default (if unset) for MaxAuthTriesLog is MaxAuthTries / 2
-MaxAuthTries 6
-MaxAuthTriesLog 3
-
-# Are logins to accounts with empty passwords allowed.
-# If PermitEmptyPasswords is no, pass PAM_DISALLOW_NULL_AUTHTOK
-# to pam_authenticate(3PAM).
-PermitEmptyPasswords no
-
-# To disable tunneled clear text passwords, change PasswordAuthentication to no.
-PasswordAuthentication yes
-
-# Are root logins permitted using sshd.
-# Note that sshd uses pam_authenticate(3PAM) so the root (or any other) user
-# maybe denied access by a PAM module regardless of this setting.
-# Valid options are yes, without-password, no.
-PermitRootLogin no
-
-# sftp subsystem
-Subsystem sftp internal-sftp
-
-
-# SSH protocol v1 specific options
-#
-# The following options only apply to the v1 protocol and provide
-# some form of backwards compatibility with the very weak security
-# of /usr/bin/rsh. Their use is not recommended and the functionality
-# will be removed when support for v1 protocol is removed.
-
-# Should sshd use .rhosts and .shosts for password less authentication.
-IgnoreRhosts yes
-RhostsAuthentication no
-
-# Rhosts RSA Authentication
-# For this to work you will also need host keys in /etc/ssh/ssh_known_hosts.
-# If the user on the client side is not root then this won't work on
-# Solaris since /usr/bin/ssh is not installed setuid.
-RhostsRSAAuthentication no
-
-# Uncomment if you don't trust ~/.ssh/known_hosts for RhostsRSAAuthentication.
-#IgnoreUserKnownHosts yes
-
-# Is pure RSA authentication allowed.
-# Default is yes
-RSAAuthentication yes
diff --git a/usr/src/cmd/ssh/include/altprivsep.h b/usr/src/cmd/ssh/include/altprivsep.h
deleted file mode 100644
index fb230d28a7..0000000000
--- a/usr/src/cmd/ssh/include/altprivsep.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- *
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _ALTPRIVSEP_H
-#define _ALTPRIVSEP_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <sys/types.h>
-#include "auth.h"
-#include "kex.h"
-
-#define APS_MSG_NEWKEYS_REQ 0
-#define APS_MSG_NEWKEYS_REP 1
-#define APS_MSG_RECORD_LOGIN 2
-#define APS_MSG_RECORD_LOGOUT 3
-#define APS_MSG_START_REKEX 4
-#define APS_MSG_AUTH_CONTEXT 5
-
-void altprivsep_start_and_do_monitor(int use_engine, int inetd, int newsock,
- int statup_pipe);
-int altprivsep_get_pipe_fd(void);
-
-/* child-side handler of re-key packets */
-void altprivsep_rekey(int type, u_int32_t seq, void *ctxt);
-
-/* Calls _to_ monitor from unprivileged process */
-void altprivsep_process_input(fd_set *rset);
-void altprivsep_record_login(pid_t pid, const char *ttyname);
-void altprivsep_record_logout(pid_t pid);
-void altprivsep_start_rekex(void);
-void altprivsep_send_auth_context(Authctxt *authctxt);
-
-/* Functions for use in the monitor */
-void aps_input_altpriv_msg(int type, u_int32_t seq, void *ctxt);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _ALTPRIVSEP_H */
diff --git a/usr/src/cmd/ssh/include/atomicio.h b/usr/src/cmd/ssh/include/atomicio.h
deleted file mode 100644
index e1ba7b21e6..0000000000
--- a/usr/src/cmd/ssh/include/atomicio.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2006 Damien Miller. All rights reserved.
- * Copyright (c) 1995,1999 Theo de Raadt. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _ATOMICIO_H
-#define _ATOMICIO_H
-
-/* $OpenBSD: atomicio.h,v 1.4 2001/06/26 06:32:46 itojun Exp $ */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Ensure all of data on socket comes through. f==read || f==write
- */
-ssize_t atomicio(ssize_t (*)(), int, void *, size_t);
-
-#define vwrite (ssize_t (*)(int, void *, size_t))write
-
-/*
- * ensure all of data on socket comes through. f==readv || f==writev
- */
-size_t atomiciov(ssize_t (*)(int, const struct iovec *, int),
- int, const struct iovec *, int);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _ATOMICIO_H */
diff --git a/usr/src/cmd/ssh/include/auth-options.h b/usr/src/cmd/ssh/include/auth-options.h
deleted file mode 100644
index 31d7fd6ce1..0000000000
--- a/usr/src/cmd/ssh/include/auth-options.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* $OpenBSD: auth-options.h,v 1.12 2002/07/21 18:34:43 stevesk Exp $ */
-
-#ifndef _AUTH_OPTIONS_H
-#define _AUTH_OPTIONS_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-/* Linked list of custom environment strings */
-struct envstring {
- struct envstring *next;
- char *s;
-};
-
-/* Flags that may be set in authorized_keys options. */
-extern int no_port_forwarding_flag;
-extern int no_agent_forwarding_flag;
-extern int no_x11_forwarding_flag;
-extern int no_pty_flag;
-extern char *forced_command;
-extern struct envstring *custom_environment;
-
-int auth_parse_options(struct passwd *, char *, char *, u_long);
-void auth_clear_options(void);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _AUTH_OPTIONS_H */
diff --git a/usr/src/cmd/ssh/include/auth-pam.h b/usr/src/cmd/ssh/include/auth-pam.h
deleted file mode 100644
index 3c3dd409fd..0000000000
--- a/usr/src/cmd/ssh/include/auth-pam.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2000 Damien Miller. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
- */
-
-/* $Id: auth-pam.h,v 1.16 2002/07/23 00:44:07 stevesk Exp $ */
-
-#ifndef _AUTH_PAM_H
-#define _AUTH_PAM_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "includes.h"
-#ifdef USE_PAM
-
-char * derive_pam_svc_name(Authmethod *method);
-void new_start_pam(Authctxt *authctxt, struct pam_conv *conv);
-int auth_pam_password(Authctxt *authctxt, const char *password);
-int do_pam_non_initial_userauth(Authctxt *authctxt);
-int finish_userauth_do_pam(Authctxt *authctxt);
-void finish_pam(Authctxt *authctxt);
-char **fetch_pam_environment(Authctxt *authctxt);
-void free_pam_environment(char **env);
-void message_cat(char **p, const char *a);
-void print_pam_messages(void);
-
-#define AUTHPAM_DONE(ac) (ac != NULL && \
- ac->pam != NULL && \
- ac->pam->h != NULL && \
- ac->pam->state == PAM_S_DONE)
-
-#define AUTHPAM_RETVAL(ac, rv) ((ac != NULL && ac->pam != NULL) ? \
- ac->pam->last_pam_retval : rv)
-
-#define AUTHPAM_ERROR(ac, rv) ((ac != NULL && ac->pam != NULL && \
- ac->pam->last_pam_retval != PAM_SUCCESS) ? \
- ac->pam->last_pam_retval : rv)
-
-#endif /* USE_PAM */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _AUTH_PAM_H */
diff --git a/usr/src/cmd/ssh/include/auth.h b/usr/src/cmd/ssh/include/auth.h
deleted file mode 100644
index c932fafa6d..0000000000
--- a/usr/src/cmd/ssh/include/auth.h
+++ /dev/null
@@ -1,325 +0,0 @@
-/* $OpenBSD: auth.h,v 1.41 2002/09/26 11:38:43 markus Exp $ */
-
-#ifndef _AUTH_H
-#define _AUTH_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Copyright (c) 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "key.h"
-#include "hostfile.h"
-#include <openssl/rsa.h>
-
-#ifdef USE_PAM
-#include <security/pam_appl.h>
-#endif /* USE_PAM */
-
-#ifdef HAVE_LOGIN_CAP
-#include <login_cap.h>
-#endif
-#ifdef BSD_AUTH
-#include <bsd_auth.h>
-#endif
-#ifdef KRB5
-#include <krb5.h>
-#endif
-
-typedef struct Authctxt Authctxt;
-typedef struct Authmethod Authmethod;
-typedef struct KbdintDevice KbdintDevice;
-
-#ifdef USE_PAM
-typedef struct pam_stuff pam_stuff;
-
-struct pam_stuff {
- Authctxt *authctxt;
- pam_handle_t *h;
- int state;
- int last_pam_retval;
-};
-
-/* See auth-pam.h and auth-pam.c */
-
-#define PAM_S_DONE_ACCT_MGMT 0x01 /* acct_mgmt done */
-#define PAM_S_DONE_SETCRED 0x02 /* setcred done */
-#define PAM_S_DONE_OPEN_SESSION 0x04 /* open_session done */
-#define PAM_S_DONE 0x07 /* all done */
-#endif /* USE_PAM */
-
-struct Authctxt {
- int success;
- int valid;
- int attempt; /* all userauth attempt count */
- int init_attempt; /* passwd/kbd-int attempt count */
- int failures;
- int init_failures;
- int unwind_dispatch_loop;
- int v1_auth_type;
- char *v1_auth_name;
- Authmethod *method;
- char *user;
- char *service;
- struct passwd *pw;
- char *style;
- void *kbdintctxt; /* XXX Switch to method_data;
- v1 still needs this*/
-#ifdef USE_PAM
- pam_stuff *pam;
- char *cuser; /* client side user, needed for setting
- PAM_AUSER for hostbased authentication
- using roles */
- u_long last_login_time; /* need to get the time of
- last login before calling
- pam_open_session() */
- char last_login_host[MAXHOSTNAMELEN];
- int pam_retval; /* pam_stuff is cleaned before
- BSM login failure auditing */
-#endif /* USE_PAM */
-
- /* SUNW - What follows remains to reduce diffs with OpenSSH but
- * is not used in Solaris. The Solaris SSH internal
- * architecture requires that this stuff move into the
- * Authmethod method_data.
- */
-#ifndef SUNW_SSH
-#ifdef BSD_AUTH
- auth_session_t *as;
-#endif
-#ifdef KRB4
- char *krb4_ticket_file;
-#endif
-#ifdef KRB5
- krb5_context krb5_ctx;
- krb5_auth_context krb5_auth_ctx;
- krb5_ccache krb5_fwd_ccache;
- krb5_principal krb5_user;
- char *krb5_ticket_file;
-#endif
- void *methoddata;
-#endif /* SUNW_SSH */
-};
-
-struct Authmethod {
- char *name;
- int *enabled;
- /*
- * Userauth method state tracking fields updated in
- * input_userauth_request() and auth-pam.c.
- *
- * The "void (*userauth)(Authctxt *authctxt)" function
- * communicates the userauth result (success, failure,
- * "postponed," abandoned) through the 'authenticated',
- * 'postponed' and 'abandoned' fields. Partial success is
- * indicated by requiring other userauths to be used by setting
- * their 'required' or 'sufficient' fields.
- *
- * Individual methods should only ever set 'not_again' if it
- * makes no sense to complete the same userauth more than once,
- * and they should set any methods' sufficient or required flags
- * in order to force partial authentication and require that
- * more userauths be tried. The (void *) 'method_data' and
- * 'hist_method_data' pointers can be used by methods such as
- * pubkey which may make sense to run more than once during
- * userauth or which may require multiple round tripes (e.g.,
- * keyboard-interactive) and which need to keep some state;
- * 'hist_method_data' is there specifically for pubkey userauth
- * where multiple successful attempts should all use different
- * keys.
- *
- * The "attempts," "abandons," "successes" and "failures" fields
- * count the number of times a method has been attempted,
- * abandoned, and has succeeded or failed. Note that pubkey
- * userauth does not double-count sig-less probes that are
- * followed by a pubkey request for the same pubkey anw with a
- * signature.
- */
- void (*userauth)(Authctxt *authctxt);
- void (*abandon)(Authctxt *, Authmethod *);
- void *method_data;
- void *hist_method_data;
- unsigned int is_initial;
- unsigned int attempts:8;
- unsigned int abandons:8;
- unsigned int successes:8;
- unsigned int failures:8;
- /*
- * Post-attempt state booleans (authenticated, abandoned, etc...)
- */
- unsigned int authenticated:1;
- unsigned int not_again:1;
- unsigned int sufficient:1;
- unsigned int required:1;
- unsigned int postponed:1;
- unsigned int abandoned:1;
- /*
- * NOTE: multi-round-trip userauth methods can either
- * recursively call dispatch_run and detect abandonment
- * within their message handlers (as PAM kbd-int does) or
- * set the postponed flag and let input_userauth_request()
- * detect abandonment (i.e., initiation of some userauth
- * method before completion of a started, multi-round-trip
- * userauth method).
- *
- */
-};
-
-/*
- * Keyboard interactive device:
- * init_ctx returns: non NULL upon success
- * query returns: 0 - success, otherwise failure
- * respond returns: 0 - success, 1 - need further interaction,
- * otherwise - failure
- */
-struct KbdintDevice
-{
- const char *name;
- void* (*init_ctx)(Authctxt*);
- int (*query)(void *ctx, char **name, char **infotxt,
- u_int *numprompts, char ***prompts, u_int **echo_on);
- int (*respond)(void *ctx, u_int numresp, char **responses);
- void (*free_ctx)(void *ctx);
-};
-
-int auth_rhosts(struct passwd *, const char *);
-int
-auth_rhosts2(struct passwd *, const char *, const char *, const char *);
-
-int auth_rhosts_rsa(struct passwd *, char *, Key *);
-int auth_password(Authctxt *, const char *);
-int auth_rsa(struct passwd *, BIGNUM *);
-int auth_rsa_challenge_dialog(Key *);
-BIGNUM *auth_rsa_generate_challenge(Key *);
-int auth_rsa_verify_response(Key *, BIGNUM *, u_char[]);
-int auth_rsa_key_allowed(struct passwd *, BIGNUM *, Key **);
-
-int auth_rhosts_rsa_key_allowed(struct passwd *, char *, char *, Key *);
-int hostbased_key_allowed(struct passwd *, const char *, char *, Key *);
-int user_key_allowed(struct passwd *, Key *);
-
-#ifdef KRB4
-#include <krb.h>
-int auth_krb4(Authctxt *, KTEXT, char **, KTEXT);
-int auth_krb4_password(Authctxt *, const char *);
-void krb4_cleanup_proc(void *);
-
-#ifdef AFS
-#include <kafs.h>
-int auth_krb4_tgt(Authctxt *, const char *);
-int auth_afs_token(Authctxt *, const char *);
-#endif /* AFS */
-
-#endif /* KRB4 */
-
-#ifdef KRB5
-int auth_krb5(Authctxt *authctxt, krb5_data *auth, char **client, krb5_data *);
-int auth_krb5_tgt(Authctxt *authctxt, krb5_data *tgt);
-int auth_krb5_password(Authctxt *authctxt, const char *password);
-void krb5_cleanup_proc(void *authctxt);
-#endif /* KRB5 */
-
-#include "auth-pam.h"
-#include "auth2-pam.h"
-
-Authctxt *do_authentication(void);
-Authctxt *do_authentication2(void);
-
-#ifdef HAVE_BSM
-void audit_failed_login_cleanup(void *);
-#endif /* HAVE_BSM */
-
-int userauth_check_partial_failure(Authctxt *authctxt);
-void userauth_force_kbdint(void);
-
-Authctxt *authctxt_new(void);
-void auth_log(Authctxt *, int, char *, char *);
-void userauth_finish(Authctxt *, char *);
-void userauth_user_svc_change(Authctxt *authctxt,
- char *user,
- char *service);
-int auth_root_allowed(char *);
-
-char *auth2_read_banner(void);
-
-void privsep_challenge_enable(void);
-
-void auth2_challenge(Authctxt *, char *);
-void auth2_challenge_abandon(Authctxt *);
-int bsdauth_query(void *, char **, char **, u_int *, char ***, u_int **);
-int bsdauth_respond(void *, u_int, char **);
-int skey_query(void *, char **, char **, u_int *, char ***, u_int **);
-int skey_respond(void *, u_int, char **);
-
-struct passwd * getpwnamallow(const char *user);
-
-int run_auth_hook(const char *, const char *, const char *);
-
-char *get_challenge(Authctxt *);
-int verify_response(Authctxt *, const char *);
-
-struct passwd * auth_get_user(void);
-
-char *authorized_keys_file(struct passwd *);
-char *authorized_keys_file2(struct passwd *);
-
-int
-secure_filename(FILE *, const char *, struct passwd *, char *, size_t);
-
-HostStatus
-check_key_in_hostfiles(struct passwd *, Key *, const char *,
- const char *, const char *);
-
-/* hostkey handling */
-#ifndef lint
-Key *get_hostkey_by_index(int);
-Key *get_hostkey_by_type(int);
-int get_hostkey_index(Key *);
-#endif /* lint */
-int ssh1_session_key(BIGNUM *);
-
-/* debug messages during authentication */
-void auth_debug_add(const char *fmt,...) __attribute__((format(printf, 1, 2)));
-void auth_debug_send(void);
-void auth_debug_reset(void);
-
-#define AUTH_FAIL_MAX 6
-#define AUTH_FAIL_LOG (AUTH_FAIL_MAX/2)
-#define AUTH_FAIL_MSG "Too many authentication failures for %.100s"
-
-#define SKEY_PROMPT "\nS/Key Password: "
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _AUTH_H */
diff --git a/usr/src/cmd/ssh/include/auth2-pam.h b/usr/src/cmd/ssh/include/auth2-pam.h
deleted file mode 100644
index dae25c0819..0000000000
--- a/usr/src/cmd/ssh/include/auth2-pam.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* $Id: auth2-pam.h,v 1.2 2001/02/09 01:55:36 djm Exp $ */
-/*
- * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _AUTH2_PAM_H
-#define _AUTH2_PAM_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "includes.h"
-#ifdef USE_PAM
-
-typedef struct Convctxt Convctxt;
-
-struct Convctxt {
- int abandoned, finished, num_received, num_expected;
- int *prompts;
- struct pam_response *responses;
-};
-
-int kbdint_pam_abandon_chk(Authctxt *authctxt, Authmethod *method);
-void kbdint_pam_abandon(Authctxt *authctxt, Authmethod *method);
-
-void auth2_pam(Authctxt *authctxt);
-
-#endif /* USE_PAM */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _AUTH2_PAM_H */
diff --git a/usr/src/cmd/ssh/include/authfd.h b/usr/src/cmd/ssh/include/authfd.h
deleted file mode 100644
index 81b27cc0e6..0000000000
--- a/usr/src/cmd/ssh/include/authfd.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/* $OpenBSD: authfd.h,v 1.31 2002/09/11 18:27:25 stevesk Exp $ */
-
-#ifndef _AUTHFD_H
-#define _AUTHFD_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Functions to interface with the SSH_AUTHENTICATION_FD socket.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-#include "buffer.h"
-
-/* Messages for the authentication agent connection. */
-#define SSH_AGENTC_REQUEST_RSA_IDENTITIES 1
-#define SSH_AGENT_RSA_IDENTITIES_ANSWER 2
-#define SSH_AGENTC_RSA_CHALLENGE 3
-#define SSH_AGENT_RSA_RESPONSE 4
-#define SSH_AGENT_FAILURE 5
-#define SSH_AGENT_SUCCESS 6
-#define SSH_AGENTC_ADD_RSA_IDENTITY 7
-#define SSH_AGENTC_REMOVE_RSA_IDENTITY 8
-#define SSH_AGENTC_REMOVE_ALL_RSA_IDENTITIES 9
-
-/* private OpenSSH extensions for SSH2 */
-#define SSH2_AGENTC_REQUEST_IDENTITIES 11
-#define SSH2_AGENT_IDENTITIES_ANSWER 12
-#define SSH2_AGENTC_SIGN_REQUEST 13
-#define SSH2_AGENT_SIGN_RESPONSE 14
-#define SSH2_AGENTC_ADD_IDENTITY 17
-#define SSH2_AGENTC_REMOVE_IDENTITY 18
-#define SSH2_AGENTC_REMOVE_ALL_IDENTITIES 19
-
-/* smartcard */
-#define SSH_AGENTC_ADD_SMARTCARD_KEY 20
-#define SSH_AGENTC_REMOVE_SMARTCARD_KEY 21
-
-/* lock/unlock the agent */
-#define SSH_AGENTC_LOCK 22
-#define SSH_AGENTC_UNLOCK 23
-
-/* add key with constraints */
-#define SSH_AGENTC_ADD_RSA_ID_CONSTRAINED 24
-#define SSH2_AGENTC_ADD_ID_CONSTRAINED 25
-
-#define SSH_AGENT_CONSTRAIN_LIFETIME 1
-#define SSH_AGENT_CONSTRAIN_CONFIRM 2
-
-/* extended failure messages */
-#define SSH2_AGENT_FAILURE 30
-
-/* additional error code for ssh.com's ssh-agent2 */
-#define SSH_COM_AGENT2_FAILURE 102
-
-#define SSH_AGENT_OLD_SIGNATURE 0x01
-
-typedef struct {
- int fd;
- Buffer identities;
- int howmany;
-} AuthenticationConnection;
-
-int ssh_agent_present(void);
-int ssh_get_authentication_socket(void);
-void ssh_close_authentication_socket(int);
-
-AuthenticationConnection *ssh_get_authentication_connection(void);
-void ssh_close_authentication_connection(AuthenticationConnection *);
-int ssh_get_num_identities(AuthenticationConnection *, int);
-Key *ssh_get_first_identity(AuthenticationConnection *, char **, int);
-Key *ssh_get_next_identity(AuthenticationConnection *, char **, int);
-int ssh_add_identity(AuthenticationConnection *, Key *, const char *);
-int ssh_add_identity_constrained(AuthenticationConnection *, Key *, const char *, u_int);
-int ssh_remove_identity(AuthenticationConnection *, Key *);
-int ssh_remove_all_identities(AuthenticationConnection *, int);
-int ssh_lock_agent(AuthenticationConnection *, int, const char *);
-int ssh_update_card(AuthenticationConnection *, int, const char *, const char *);
-
-int
-ssh_decrypt_challenge(AuthenticationConnection *, Key *, BIGNUM *, u_char[16],
- u_int, u_char[16]);
-
-int
-ssh_agent_sign(AuthenticationConnection *, Key *, u_char **, u_int *, u_char *,
- u_int);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _AUTHFD_H */
diff --git a/usr/src/cmd/ssh/include/authfile.h b/usr/src/cmd/ssh/include/authfile.h
deleted file mode 100644
index 68031d32cb..0000000000
--- a/usr/src/cmd/ssh/include/authfile.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/* $OpenBSD: authfile.h,v 1.10 2002/05/23 19:24:30 markus Exp $ */
-
-#ifndef _AUTHFILE_H
-#define _AUTHFILE_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-int key_save_private(Key *, const char *, const char *, const char *);
-Key *key_load_public(const char *, char **);
-Key *key_load_public_type(int, const char *, char **);
-Key *key_load_private(const char *, const char *, char **);
-Key *key_load_private_type(int, const char *, const char *, char **);
-Key *key_load_private_pem(int, int, const char *, char **);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _AUTHFILE_H */
diff --git a/usr/src/cmd/ssh/include/base64.h b/usr/src/cmd/ssh/include/base64.h
deleted file mode 100644
index 7e2ccf7d3e..0000000000
--- a/usr/src/cmd/ssh/include/base64.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef _BASE64_H
-#define _BASE64_H
-
-/* $Id: base64.h,v 1.3 2002/02/26 16:59:59 stevesk Exp $ */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-#include "config.h"
-
-#ifndef HAVE___B64_NTOP
-# ifndef HAVE_B64_NTOP
-int b64_ntop(u_char const *src, size_t srclength, char *target,
- size_t targsize);
-int b64_pton(u_char const *src, u_char *target, size_t targsize);
-# endif /* !HAVE_B64_NTOP */
-# define __b64_ntop b64_ntop
-# define __b64_pton b64_pton
-#endif /* HAVE___B64_NTOP */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _BASE64_H */
diff --git a/usr/src/cmd/ssh/include/bindresvport.h b/usr/src/cmd/ssh/include/bindresvport.h
deleted file mode 100644
index 9cd968bf9b..0000000000
--- a/usr/src/cmd/ssh/include/bindresvport.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* $Id: bindresvport.h,v 1.2 2001/02/09 01:55:36 djm Exp $ */
-
-#ifndef _BINDRESVPORT_H
-#define _BINDRESVPORT_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "config.h"
-
-#ifndef HAVE_BINDRESVPORT_SA
-int bindresvport_sa(int sd, struct sockaddr *sa);
-#endif /* !HAVE_BINDRESVPORT_SA */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _BINDRESVPORT_H */
diff --git a/usr/src/cmd/ssh/include/bsd-arc4random.h b/usr/src/cmd/ssh/include/bsd-arc4random.h
deleted file mode 100644
index c9238636c5..0000000000
--- a/usr/src/cmd/ssh/include/bsd-arc4random.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 1999-2000 Damien Miller. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _BSD_ARC4RANDOM_H
-#define _BSD_ARC4RANDOM_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/* $Id: bsd-arc4random.h,v 1.2 2001/02/09 01:55:36 djm Exp $ */
-
-#include "config.h"
-
-#ifndef HAVE_ARC4RANDOM
-unsigned int arc4random(void);
-void arc4random_stir(void);
-#endif /* !HAVE_ARC4RANDOM */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _BSD_ARC4RANDOM_H */
diff --git a/usr/src/cmd/ssh/include/bsd-cray.h b/usr/src/cmd/ssh/include/bsd-cray.h
deleted file mode 100644
index 8b8f9113c7..0000000000
--- a/usr/src/cmd/ssh/include/bsd-cray.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * $Id: bsd-cray.h,v 1.5 2002/09/26 00:38:51 tim Exp $
- *
- * bsd-cray.h
- *
- * Copyright (c) 2002, Cray Inc. (Wendy Palm <wendyp@cray.com>)
- * Significant portions provided by
- * Wayne Schroeder, SDSC <schroeder@sdsc.edu>
- * William Jones, UTexas <jones@tacc.utexas.edu>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Created: Apr 22 16.34:00 2002 wp
- *
- * This file contains functions required for proper execution
- * on UNICOS systems.
- *
- */
-
-#ifndef _BSD_CRAY_H
-#define _BSD_CRAY_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef _UNICOS
-void cray_init_job(struct passwd *); /* init cray job */
-void cray_job_termination_handler(int); /* process end of job signal */
-void cray_login_failure(char *username, int errcode);
-int cray_access_denied(char *username);
-extern char cray_tmpdir[]; /* cray tmpdir */
-#ifndef IA_SSHD
-#define IA_SSHD IA_LOGIN
-#endif
-#ifndef MAXHOSTNAMELEN
-#define MAXHOSTNAMELEN 64
-#endif
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _BSD_CRAY_H */
diff --git a/usr/src/cmd/ssh/include/bsd-cygwin_util.h b/usr/src/cmd/ssh/include/bsd-cygwin_util.h
deleted file mode 100644
index dc44268563..0000000000
--- a/usr/src/cmd/ssh/include/bsd-cygwin_util.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/* $Id: bsd-cygwin_util.h,v 1.7 2002/04/15 22:00:52 stevesk Exp $ */
-
-#ifndef _BSD_CYGWIN_UTIL_H
-#define _BSD_CYGWIN_UTIL_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * cygwin_util.c
- *
- * Copyright (c) 2000, 2001, Corinna Vinschen <vinschen@cygnus.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Created: Sat Sep 02 12:17:00 2000 cv
- *
- * This file contains functions for forcing opened file descriptors to
- * binary mode on Windows systems.
- */
-
-#ifdef HAVE_CYGWIN
-
-#include <io.h>
-
-int binary_open(const char *filename, int flags, ...);
-int binary_pipe(int fd[2]);
-int check_nt_auth(int pwd_authenticated, struct passwd *pw);
-int check_ntsec(const char *filename);
-void register_9x_service(void);
-
-#define open binary_open
-#define pipe binary_pipe
-
-#endif /* HAVE_CYGWIN */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _BSD_CYGWIN_UTIL_H */
diff --git a/usr/src/cmd/ssh/include/bsd-getpeereid.h b/usr/src/cmd/ssh/include/bsd-getpeereid.h
deleted file mode 100644
index 44430f7619..0000000000
--- a/usr/src/cmd/ssh/include/bsd-getpeereid.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* $Id: bsd-getpeereid.h,v 1.1 2002/09/12 00:33:02 djm Exp $ */
-
-#ifndef _BSD_GETPEEREID_H
-#define _BSD_GETPEEREID_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "config.h"
-
-#include <sys/types.h> /* For uid_t, gid_t */
-
-#ifndef HAVE_GETPEEREID
-int getpeereid(int , uid_t *, gid_t *);
-#endif /* HAVE_GETPEEREID */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _BSD_GETPEEREID_H */
diff --git a/usr/src/cmd/ssh/include/bsd-misc.h b/usr/src/cmd/ssh/include/bsd-misc.h
deleted file mode 100644
index 9990c87f3d..0000000000
--- a/usr/src/cmd/ssh/include/bsd-misc.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 1999-2000 Damien Miller. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _BSD_MISC_H
-#define _BSD_MISC_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/* $Id: bsd-misc.h,v 1.6 2002/06/13 21:34:58 mouring Exp $ */
-
-#include "config.h"
-
-char *get_progname(char *argv0);
-
-#ifndef HAVE_SETSID
-#define setsid() setpgrp(0, getpid())
-#endif /* !HAVE_SETSID */
-
-#ifndef HAVE_SETENV
-int setenv(const char *name, const char *value, int overwrite);
-#endif /* !HAVE_SETENV */
-
-#ifndef HAVE_SETLOGIN
-int setlogin(const char *name);
-#endif /* !HAVE_SETLOGIN */
-
-#ifndef HAVE_INNETGR
-int innetgr(const char *netgroup, const char *host,
- const char *user, const char *domain);
-#endif /* HAVE_INNETGR */
-
-#if !defined(HAVE_SETEUID) && defined(HAVE_SETREUID)
-int seteuid(uid_t euid);
-#endif /* !defined(HAVE_SETEUID) && defined(HAVE_SETREUID) */
-
-#if !defined(HAVE_SETEGID) && defined(HAVE_SETRESGID)
-int setegid(uid_t egid);
-#endif /* !defined(HAVE_SETEGID) && defined(HAVE_SETRESGID) */
-
-#if !defined(HAVE_STRERROR) && defined(HAVE_SYS_ERRLIST) && defined(HAVE_SYS_NERR)
-const char *strerror(int e);
-#endif
-
-
-#ifndef HAVE_UTIMES
-#ifndef HAVE_STRUCT_TIMEVAL
-struct timeval {
- long tv_sec;
- long tv_usec;
-}
-#endif /* HAVE_STRUCT_TIMEVAL */
-
-int utimes(char *filename, struct timeval *tvp);
-#endif /* HAVE_UTIMES */
-
-#ifndef HAVE_TRUNCATE
-int truncate (const char *path, off_t length);
-#endif /* HAVE_TRUNCATE */
-
-#if !defined(HAVE_SETGROUPS) && defined(SETGROUPS_NOOP)
-int setgroups(size_t size, const gid_t *list);
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _BSD_MISC_H */
diff --git a/usr/src/cmd/ssh/include/bsd-snprintf.h b/usr/src/cmd/ssh/include/bsd-snprintf.h
deleted file mode 100644
index 71691a01d3..0000000000
--- a/usr/src/cmd/ssh/include/bsd-snprintf.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* $Id: bsd-snprintf.h,v 1.2 2001/02/09 01:55:36 djm Exp $ */
-
-#ifndef _BSD_SNPRINTF_H
-#define _BSD_SNPRINTF_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "config.h"
-
-#include <sys/types.h> /* For size_t */
-
-#ifndef HAVE_SNPRINTF
-int snprintf(char *str, size_t count, const char *fmt, ...);
-#endif /* !HAVE_SNPRINTF */
-
-#ifndef HAVE_VSNPRINTF
-int vsnprintf(char *str, size_t count, const char *fmt, va_list args);
-#endif /* !HAVE_SNPRINTF */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _BSD_SNPRINTF_H */
diff --git a/usr/src/cmd/ssh/include/bsd-waitpid.h b/usr/src/cmd/ssh/include/bsd-waitpid.h
deleted file mode 100644
index 9c6385fdb8..0000000000
--- a/usr/src/cmd/ssh/include/bsd-waitpid.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#ifndef _BSD_WAITPID_H
-#define _BSD_WAITPID_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/* $Id: bsd-waitpid.h,v 1.2 2001/02/09 01:55:36 djm Exp $ */
-
-#ifndef HAVE_WAITPID
-/* Clean out any potental issues */
-#undef WIFEXITED
-#undef WIFSTOPPED
-#undef WIFSIGNALED
-
-/* Define required functions to mimic a POSIX look and feel */
-#define _W_INT(w) (*(int*)&(w)) /* convert union wait to int */
-#define WIFEXITED(w) (!((_W_INT(w)) & 0377))
-#define WIFSTOPPED(w) ((_W_INT(w)) & 0100)
-#define WIFSIGNALED(w) (!WIFEXITED(w) && !WIFSTOPPED(w))
-#define WEXITSTATUS(w) (int)(WIFEXITED(w) ? ((_W_INT(w) >> 8) & 0377) : -1)
-#define WTERMSIG(w) (int)(WIFSIGNALED(w) ? (_W_INT(w) & 0177) : -1)
-#define WCOREFLAG 0x80
-#define WCOREDUMP(w) ((_W_INT(w)) & WCOREFLAG)
-
-/* Prototype */
-pid_t waitpid(int pid, int *stat_loc, int options);
-
-#endif /* !HAVE_WAITPID */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _BSD_WAITPID_H */
diff --git a/usr/src/cmd/ssh/include/bufaux.h b/usr/src/cmd/ssh/include/bufaux.h
deleted file mode 100644
index aa673cccb1..0000000000
--- a/usr/src/cmd/ssh/include/bufaux.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-/* $OpenBSD: bufaux.h,v 1.18 2002/04/20 09:14:58 markus Exp $ */
-
-#ifndef _BUFAUX_H
-#define _BUFAUX_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-#include "buffer.h"
-#include <openssl/bn.h>
-
-void buffer_put_bignum(Buffer *, const BIGNUM *);
-void buffer_put_bignum2(Buffer *, const BIGNUM *);
-void buffer_get_bignum(Buffer *, BIGNUM *);
-void buffer_get_bignum2(Buffer *, BIGNUM *);
-
-u_short buffer_get_short(Buffer *);
-void buffer_put_short(Buffer *, u_short);
-
-u_int buffer_get_int(Buffer *);
-void buffer_put_int(Buffer *, u_int);
-
-#ifdef HAVE_U_INT64_T
-u_int64_t buffer_get_int64(Buffer *);
-void buffer_put_int64(Buffer *, u_int64_t);
-#endif
-
-int buffer_get_char(Buffer *);
-void buffer_put_char(Buffer *, int);
-
-void *buffer_get_string(Buffer *, u_int *);
-char *buffer_get_utf8_string(Buffer *, uint_t *);
-void buffer_put_string(Buffer *, const void *, u_int);
-void buffer_put_cstring(Buffer *, const char *);
-void buffer_put_utf8_string(Buffer *, const char *, uint_t len);
-void buffer_put_utf8_cstring(Buffer *, const char *);
-
-#define buffer_skip_string(b) \
- do { u_int l = buffer_get_int(b); buffer_consume(b, l); } while(0)
-
-int buffer_put_bignum_ret(Buffer *, const BIGNUM *);
-int buffer_get_bignum_ret(Buffer *, BIGNUM *);
-int buffer_put_bignum2_ret(Buffer *, const BIGNUM *);
-int buffer_get_bignum2_ret(Buffer *, BIGNUM *);
-int buffer_get_short_ret(u_short *, Buffer *);
-int buffer_get_int_ret(u_int *, Buffer *);
-#ifdef HAVE_U_INT64_T
-int buffer_get_int64_ret(u_int64_t *, Buffer *);
-#endif
-void *buffer_get_string_ret(Buffer *, u_int *);
-int buffer_get_char_ret(char *, Buffer *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _BUFAUX_H */
diff --git a/usr/src/cmd/ssh/include/buffer.h b/usr/src/cmd/ssh/include/buffer.h
deleted file mode 100644
index 4a6c7ad5e1..0000000000
--- a/usr/src/cmd/ssh/include/buffer.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/* $OpenBSD: buffer.h,v 1.11 2002/03/04 17:27:39 stevesk Exp $ */
-
-#ifndef _BUFFER_H
-#define _BUFFER_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Code for manipulating FIFO buffers.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-typedef struct {
- u_char *buf; /* Buffer for data. */
- u_int alloc; /* Number of bytes allocated for data. */
- u_int offset; /* Offset of first byte containing data. */
- u_int end; /* Offset of last byte containing data. */
-} Buffer;
-
-void buffer_init(Buffer *);
-void buffer_clear(Buffer *);
-void buffer_free(Buffer *);
-
-u_int buffer_len(Buffer *);
-void *buffer_ptr(Buffer *);
-
-void buffer_append(Buffer *, const void *, u_int);
-void *buffer_append_space(Buffer *, u_int);
-
-int buffer_check_alloc(Buffer *, u_int);
-
-void buffer_get(Buffer *, void *, u_int);
-
-void buffer_consume(Buffer *, u_int);
-void buffer_consume_end(Buffer *, u_int);
-
-void buffer_dump(Buffer *);
-
-int buffer_get_ret(Buffer *, void *, u_int);
-int buffer_consume_ret(Buffer *, u_int);
-int buffer_consume_end_ret(Buffer *, u_int);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _BUFFER_H */
diff --git a/usr/src/cmd/ssh/include/canohost.h b/usr/src/cmd/ssh/include/canohost.h
deleted file mode 100644
index a60c0ef7e9..0000000000
--- a/usr/src/cmd/ssh/include/canohost.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/* $OpenBSD: canohost.h,v 1.8 2001/06/26 17:27:23 markus Exp $ */
-
-#ifndef _CANOHOST_H
-#define _CANOHOST_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-const char *get_canonical_hostname(int);
-const char *get_remote_ipaddr(void);
-const char *get_remote_name_or_ip(u_int, int);
-
-char *get_peer_ipaddr(int);
-int get_peer_port(int);
-char *get_local_ipaddr(int);
-char *get_local_name(int);
-
-int get_remote_port(void);
-int get_local_port(void);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _CANOHOST_H */
diff --git a/usr/src/cmd/ssh/include/channels.h b/usr/src/cmd/ssh/include/channels.h
deleted file mode 100644
index 4440f1996a..0000000000
--- a/usr/src/cmd/ssh/include/channels.h
+++ /dev/null
@@ -1,263 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-/*
- * Copyright (c) 1999, 2000, 2001, 2002 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
- */
-/* $OpenBSD: channels.h,v 1.70 2002/06/24 14:33:27 markus Exp $ */
-
-
-#ifndef _CHANNELS_H
-#define _CHANNELS_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "buffer.h"
-
-/* Definitions for channel types. */
-#define SSH_CHANNEL_X11_LISTENER 1 /* Listening for inet X11 conn. */
-#define SSH_CHANNEL_PORT_LISTENER 2 /* Listening on a port. */
-#define SSH_CHANNEL_OPENING 3 /* waiting for confirmation */
-#define SSH_CHANNEL_OPEN 4 /* normal open two-way channel */
-#define SSH_CHANNEL_CLOSED 5 /* waiting for close confirmation */
-#define SSH_CHANNEL_AUTH_SOCKET 6 /* authentication socket */
-#define SSH_CHANNEL_X11_OPEN 7 /* reading first X11 packet */
-#define SSH_CHANNEL_INPUT_DRAINING 8 /* sending remaining data to conn */
-#define SSH_CHANNEL_OUTPUT_DRAINING 9 /* sending remaining data to app */
-#define SSH_CHANNEL_LARVAL 10 /* larval session */
-#define SSH_CHANNEL_RPORT_LISTENER 11 /* Listening to a R-style port */
-#define SSH_CHANNEL_CONNECTING 12
-#define SSH_CHANNEL_DYNAMIC 13
-#define SSH_CHANNEL_ZOMBIE 14 /* Almost dead. */
-#define SSH_CHANNEL_MAX_TYPE 15
-
-#define SSH_CHANNEL_PATH_LEN 256
-
-struct Channel;
-typedef struct Channel Channel;
-
-typedef void channel_callback_fn(int, void *);
-typedef int channel_filter_fn(struct Channel *, char *, int);
-
-struct Channel {
- int type; /* channel type/state */
- int self; /* my own channel identifier */
- int remote_id; /* channel identifier for remote peer */
- u_int istate; /* input from channel (state of receive half) */
- u_int ostate; /* output to channel (state of transmit half) */
- int wait_for_exit; /* no close till after exit-status is sent */
- int flags; /* close sent/rcvd */
- int rfd; /* read fd */
- int wfd; /* write fd */
- int efd; /* extended fd */
- int sock; /* sock fd */
- int isatty; /* rfd is a tty */
- int wfd_isatty; /* wfd is a tty */
- int force_drain; /* force close on iEOF */
- int delayed; /* post-select handlers for newly created
- * channels are delayed until the first call
- * to a matching pre-select handler.
- * this way post-select handlers are not
- * accidenly called if a FD gets reused */
- Buffer input; /* data read from socket, to be sent over
- * encrypted connection */
- Buffer output; /* data received over encrypted connection for
- * send on socket */
- Buffer extended;
- char path[SSH_CHANNEL_PATH_LEN];
- /* path for unix domain sockets, or host name for forwards */
- int listening_port; /* port being listened for forwards */
- int host_port; /* remote port to connect for forwards */
- char *remote_name; /* remote hostname */
-
- u_int remote_window;
- u_int remote_maxpacket;
- u_int local_window;
- u_int local_window_max;
- u_int local_consumed;
- u_int local_maxpacket;
- int extended_usage;
- int single_connection;
-
- char *ctype; /* type */
-
- /* callback */
- channel_callback_fn *confirm;
- channel_callback_fn *detach_user;
-
- /* filter */
- channel_filter_fn *input_filter;
-};
-
-#define CHAN_EXTENDED_IGNORE 0
-#define CHAN_EXTENDED_READ 1
-#define CHAN_EXTENDED_WRITE 2
-
-/* default window/packet sizes for tcp/x11-fwd-channel */
-#define CHAN_SES_PACKET_DEFAULT (32*1024)
-#define CHAN_SES_WINDOW_DEFAULT (4*CHAN_SES_PACKET_DEFAULT)
-#define CHAN_TCP_PACKET_DEFAULT (32*1024)
-#define CHAN_TCP_WINDOW_DEFAULT (4*CHAN_TCP_PACKET_DEFAULT)
-#define CHAN_X11_PACKET_DEFAULT (16*1024)
-#define CHAN_X11_WINDOW_DEFAULT (4*CHAN_X11_PACKET_DEFAULT)
-
-/* possible input states */
-#define CHAN_INPUT_OPEN 0
-#define CHAN_INPUT_WAIT_DRAIN 1
-#define CHAN_INPUT_WAIT_OCLOSE 2
-#define CHAN_INPUT_CLOSED 3
-
-/* possible output states */
-#define CHAN_OUTPUT_OPEN 0
-#define CHAN_OUTPUT_WAIT_DRAIN 1
-#define CHAN_OUTPUT_WAIT_IEOF 2
-#define CHAN_OUTPUT_CLOSED 3
-
-/*
- * Other channel flag bits are specific to each type of channel and are
- * defined locally with the code that uses them.
- */
-#define CHAN_CLOSE_SENT 0x01
-#define CHAN_CLOSE_RCVD 0x02
-#define CHAN_EOF_SENT 0x04
-#define CHAN_EOF_RCVD 0x08
-
-#define CHAN_RBUF 16*1024
-
-/* check whether 'efd' is still in use */
-#define CHANNEL_EFD_INPUT_ACTIVE(c) \
- (compat20 && c->extended_usage == CHAN_EXTENDED_READ && \
- (c->efd != -1 || \
- buffer_len(&c->extended) > 0))
-#define CHANNEL_EFD_OUTPUT_ACTIVE(c) \
- (compat20 && c->extended_usage == CHAN_EXTENDED_WRITE && \
- ((c->efd != -1 && !(c->flags & (CHAN_EOF_RCVD|CHAN_CLOSE_RCVD))) || \
- buffer_len(&c->extended) > 0))
-
-/* channel management */
-
-Channel *channel_lookup(int);
-Channel *channel_new(char *, int, int, int, int, u_int, u_int, int, char *, int);
-void channel_set_fds(int, int, int, int, int, int, u_int);
-void channel_set_wait_for_exit(int, int);
-void channel_free(Channel *);
-void channel_free_all(void);
-void channel_stop_listening(void);
-
-void channel_send_open(int);
-void channel_request_start(int, char *, int);
-void channel_register_cleanup(int, channel_callback_fn *);
-void channel_register_confirm(int, channel_callback_fn *);
-void channel_register_filter(int, channel_filter_fn *);
-void channel_cancel_cleanup(int);
-int channel_close_fd(int *);
-
-/* protocol handler */
-
-void channel_input_close(int, u_int32_t, void *);
-void channel_input_close_confirmation(int, u_int32_t, void *);
-void channel_input_data(int, u_int32_t, void *);
-void channel_input_extended_data(int, u_int32_t, void *);
-void channel_input_ieof(int, u_int32_t, void *);
-void channel_input_oclose(int, u_int32_t, void *);
-void channel_input_open_confirmation(int, u_int32_t, void *);
-void channel_input_open_failure(int, u_int32_t, void *);
-void channel_input_port_open(int, u_int32_t, void *);
-void channel_input_window_adjust(int, u_int32_t, void *);
-
-/* file descriptor handling (read/write) */
-
-void channel_prepare_select(fd_set **, fd_set **, int *, int*, int);
-void channel_after_select(fd_set *, fd_set *);
-void channel_output_poll(void);
-
-int channel_not_very_much_buffered_data(void);
-void channel_close_all(void);
-int channel_still_open(void);
-char *channel_open_message(void);
-int channel_find_open(void);
-
-/* tcp forwarding */
-void channel_set_af(int af);
-void channel_permit_all_opens(void);
-void channel_add_permitted_opens(char *, int);
-void channel_clear_permitted_opens(void);
-void channel_input_port_forward_request(int, int);
-int channel_connect_to(const char *, u_short);
-int channel_connect_by_listen_address(u_short);
-int channel_request_remote_forwarding(const char *, u_short,
- const char *, u_short);
-int channel_setup_local_fwd_listener(const char *, u_short,
- const char *, u_short, int);
-void channel_request_rforward_cancel(const char *host, u_short port);
-int channel_setup_remote_fwd_listener(const char *, u_short, int);
-int channel_cancel_rport_listener(const char *, u_short);
-
-/* x11 forwarding */
-
-int x11_connect_display(void);
-int x11_create_display_inet(int, int, int, u_int *);
-void x11_input_open(int, u_int32_t, void *);
-void x11_request_forwarding_with_spoofing(int, const char *, const char *,
- const char *);
-void deny_input_open(int, u_int32_t, void *);
-
-/* agent forwarding */
-
-void auth_request_forwarding(void);
-void auth_input_open_request(int, u_int32_t, void *);
-
-/* channel close */
-
-int chan_is_dead(Channel *, int);
-void chan_mark_dead(Channel *);
-
-/* channel events */
-
-void chan_rcvd_oclose(Channel *);
-void chan_rcvd_eow(Channel *); /* SSH2-only */
-void chan_read_failed(Channel *);
-void chan_ibuf_empty(Channel *);
-
-void chan_rcvd_ieof(Channel *);
-void chan_write_failed(Channel *);
-void chan_obuf_empty(Channel *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _CHANNELS_H */
diff --git a/usr/src/cmd/ssh/include/cipher.h b/usr/src/cmd/ssh/include/cipher.h
deleted file mode 100644
index e9362fedc4..0000000000
--- a/usr/src/cmd/ssh/include/cipher.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/* $OpenBSD: cipher.h,v 1.33 2002/03/18 17:13:15 markus Exp $ */
-
-#ifndef _CIPHER_H
-#define _CIPHER_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- *
- * Copyright (c) 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <openssl/evp.h>
-/*
- * Cipher types for SSH-1. New types can be added, but old types should not
- * be removed for compatibility. The maximum allowed value is 31.
- */
-#define SSH_CIPHER_SSH2 -3
-#define SSH_CIPHER_ILLEGAL -2 /* No valid cipher selected. */
-#define SSH_CIPHER_NOT_SET -1 /* None selected (invalid number). */
-#define SSH_CIPHER_NONE 0 /* no encryption */
-#define SSH_CIPHER_IDEA 1 /* IDEA CFB */
-#define SSH_CIPHER_DES 2 /* DES CBC */
-#define SSH_CIPHER_3DES 3 /* 3DES CBC */
-#define SSH_CIPHER_BROKEN_TSS 4 /* TRI's Simple Stream encryption CBC */
-#define SSH_CIPHER_BROKEN_RC4 5 /* Alleged RC4 */
-#define SSH_CIPHER_BLOWFISH 6
-#define SSH_CIPHER_RESERVED 7
-#define SSH_CIPHER_MAX 31
-
-#define CIPHER_ENCRYPT 1
-#define CIPHER_DECRYPT 0
-
-typedef struct Cipher Cipher;
-typedef struct CipherContext CipherContext;
-
-struct Cipher;
-struct CipherContext {
- int plaintext;
- EVP_CIPHER_CTX evp;
- Cipher *cipher;
-};
-
-u_int cipher_mask_ssh1(int);
-Cipher *cipher_by_name(const char *);
-Cipher *cipher_by_number(int);
-int cipher_number(const char *);
-char *cipher_name(int);
-int ciphers_valid(const char *);
-void cipher_init(CipherContext *, Cipher *, const u_char *, u_int,
- const u_char *, u_int, int);
-void cipher_crypt(CipherContext *, u_char *, const u_char *, u_int);
-void cipher_cleanup(CipherContext *);
-void cipher_set_key_string(CipherContext *, Cipher *, const char *, int);
-u_int cipher_blocksize(Cipher *);
-u_int cipher_keylen(Cipher *);
-
-u_int cipher_get_number(Cipher *);
-void cipher_get_keyiv(CipherContext *, u_char *, u_int);
-void cipher_set_keyiv(CipherContext *, u_char *);
-int cipher_get_keyiv_len(CipherContext *);
-int cipher_get_keycontext(CipherContext *, u_char *);
-void cipher_set_keycontext(CipherContext *, u_char *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _CIPHER_H */
diff --git a/usr/src/cmd/ssh/include/clientloop.h b/usr/src/cmd/ssh/include/clientloop.h
deleted file mode 100644
index 46c801e405..0000000000
--- a/usr/src/cmd/ssh/include/clientloop.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-/*
- * Copyright (c) 2001 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _CLIENTLOOP_H
-#define _CLIENTLOOP_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* $OpenBSD: clientloop.h,v 1.7 2002/04/22 21:04:52 markus Exp $ */
-
-/* Client side main loop for the interactive session. */
-int client_loop(int, int, int);
-void client_x11_get_proto(const char *, const char *, uint_t,
- char **, char **);
-void client_global_request_reply_fwd(int type, u_int32_t seq, void *ctxt);
-void client_daemonize(void);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _CLIENTLOOP_H */
diff --git a/usr/src/cmd/ssh/include/compat.h b/usr/src/cmd/ssh/include/compat.h
deleted file mode 100644
index 63b845017a..0000000000
--- a/usr/src/cmd/ssh/include/compat.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (c) 1999, 2000, 2001 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _COMPAT_H
-#define _COMPAT_H
-
-/* $OpenBSD: compat.h,v 1.33 2002/09/27 10:42:09 mickey Exp $ */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-#define SSH_PROTO_UNKNOWN 0x00
-#define SSH_PROTO_1 0x01
-#define SSH_PROTO_1_PREFERRED 0x02
-#define SSH_PROTO_2 0x04
-
-#define SSH_BUG_SIGBLOB 0x00000001
-#define SSH_BUG_PKSERVICE 0x00000002
-#define SSH_BUG_HMAC 0x00000004
-#define SSH_BUG_X11FWD 0x00000008
-#define SSH_OLD_SESSIONID 0x00000010
-#define SSH_BUG_PKAUTH 0x00000020
-#define SSH_BUG_DEBUG 0x00000040
-#define SSH_BUG_BANNER 0x00000080
-#define SSH_BUG_IGNOREMSG 0x00000100
-#define SSH_BUG_PKOK 0x00000200
-#define SSH_BUG_PASSWORDPAD 0x00000400
-#define SSH_BUG_SCANNER 0x00000800
-#define SSH_BUG_BIGENDIANAES 0x00001000
-#define SSH_BUG_RSASIGMD5 0x00002000
-#define SSH_OLD_DHGEX 0x00004000
-#define SSH_BUG_NOREKEY 0x00008000
-#define SSH_BUG_HBSERVICE 0x00010000
-#define SSH_BUG_OPENFAILURE 0x00020000
-#define SSH_BUG_DERIVEKEY 0x00040000
-/*#define this is free slot 0x00080000 */
-#define SSH_BUG_DUMMYCHAN 0x00100000
-#define SSH_BUG_EXTEOF 0x00200000
-#define SSH_BUG_K5USER 0x00400000
-#define SSH_BUG_PROBE 0x00800000
-#define SSH_BUG_LOCALES_NOT_LANGTAGS 0x01000000
-#define SSH_OLD_GSSAPI 0x02000000
-#define SSH_BUG_GSSAPI_BER 0x04000000
-#define SSH_BUG_FIRSTKEX 0x08000000
-#define SSH_BUG_RFWD_ADDR 0x10000000
-#define SSH_BUG_GSSKEX_HOSTKEY 0x20000000
-/* SSH_OLD_FORWARD_ADDR flag bumped up the SunSSH version to 1.2 */
-#define SSH_OLD_FORWARD_ADDR 0x40000000
-/* SSH_BUG_STRING_ENCODING flag bumped up the SunSSH version to 1.4 */
-#define SSH_BUG_STRING_ENCODING 0x80000000
-
-void enable_compat13(void);
-void enable_compat20(void);
-void compat_datafellows(const char *);
-int proto_spec(const char *);
-char *compat_cipher_proposal(char *);
-
-extern int compat13;
-extern int compat20;
-extern uint32_t datafellows;
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _COMPAT_H */
diff --git a/usr/src/cmd/ssh/include/compress.h b/usr/src/cmd/ssh/include/compress.h
deleted file mode 100644
index ad87892227..0000000000
--- a/usr/src/cmd/ssh/include/compress.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/* $OpenBSD: compress.h,v 1.11 2002/03/04 17:27:39 stevesk Exp $ */
-
-#ifndef _COMPRESS_H
-#define _COMPRESS_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Interface to packet compression for ssh.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-void buffer_compress_init_send(int);
-void buffer_compress_init_recv(void);
-void buffer_compress_uninit(void);
-void buffer_compress(Buffer *, Buffer *);
-void buffer_uncompress(Buffer *, Buffer *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _COMPRESS_H */
diff --git a/usr/src/cmd/ssh/include/config.h b/usr/src/cmd/ssh/include/config.h
deleted file mode 100644
index 437b120029..0000000000
--- a/usr/src/cmd/ssh/include/config.h
+++ /dev/null
@@ -1,983 +0,0 @@
-/* config.h. Generated by configure. */
-/* config.h.in. Generated from configure.ac by autoheader. */
-/* $Id: acconfig.h,v 1.145 2002/09/26 00:38:48 tim Exp $ */
-
-/*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 Gary Mills
- */
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/* Generated automatically from acconfig.h by autoheader. */
-/* Please make your changes there */
-
-
-/* Define to a Set Process Title type if your system is */
-/* supported by bsd-setproctitle.c */
-/* #undef SPT_TYPE */
-
-/* setgroups() NOOP allowed */
-/* #undef SETGROUPS_NOOP */
-
-/* SCO workaround */
-/* #undef BROKEN_SYS_TERMIO_H */
-
-/* If your header files don't define LOGIN_PROGRAM, then use this (detected) */
-/* from environment and PATH */
-#define LOGIN_PROGRAM_FALLBACK "/usr/bin/login"
-
-/* Define if your password has a pw_class field */
-/* #undef HAVE_PW_CLASS_IN_PASSWD */
-
-/* Define if your password has a pw_expire field */
-/* #undef HAVE_PW_EXPIRE_IN_PASSWD */
-
-/* Define if your password has a pw_change field */
-/* #undef HAVE_PW_CHANGE_IN_PASSWD */
-
-/* Define if your system uses access rights style file descriptor passing */
-#define HAVE_ACCRIGHTS_IN_MSGHDR 1
-
-/* Define if your system uses ancillary data style file descriptor passing */
-/* #undef HAVE_CONTROL_IN_MSGHDR */
-
-/* Define if you system's inet_ntoa is busted (e.g. Irix gcc issue) */
-/* #undef BROKEN_INET_NTOA */
-
-/* Define if your system defines sys_errlist[] */
-#define HAVE_SYS_ERRLIST 1
-
-/* Define if your system defines sys_nerr */
-#define HAVE_SYS_NERR 1
-
-/* Define if your system choked on IP TOS setting */
-#define IP_TOS_IS_BROKEN 1
-
-/* Define if you have the getuserattr function. */
-/* #undef HAVE_GETUSERATTR */
-
-/* Work around problematic Linux PAM modules handling of PAM_TTY */
-#define PAM_TTY_KLUDGE 1
-
-/* Define if your snprintf is busted */
-/* #undef BROKEN_SNPRINTF */
-
-/* Define if you are on Cygwin */
-/* #undef HAVE_CYGWIN */
-
-/* Define if you have a broken realpath. */
-/* #undef BROKEN_REALPATH */
-
-/* Define if you are on NEWS-OS */
-/* #undef HAVE_NEWS4 */
-
-/* Define if you want to enable PAM support */
-#define USE_PAM 1
-
-/* Define if you want to enable AIX4's authenticate function */
-/* #undef WITH_AIXAUTHENTICATE */
-
-/*
- * Define if you have/want arrays (cluster-wide session managment, not C
- * arrays)
- */
-/* #undef WITH_IRIX_ARRAY */
-
-/* Define if you want IRIX project management */
-/* #undef WITH_IRIX_PROJECT */
-
-/* Define if you want IRIX audit trails */
-/* #undef WITH_IRIX_AUDIT */
-
-/* Define if you want IRIX kernel jobs */
-/* #undef WITH_IRIX_JOBS */
-
-/* Location of PRNGD/EGD random number socket */
-/* #undef PRNGD_SOCKET */
-
-/* Port number of PRNGD/EGD random number socket */
-/* #undef PRNGD_PORT */
-
-/* Builtin PRNG command timeout */
-#define ENTROPY_TIMEOUT_MSEC 200
-
-/* non-privileged user for privilege separation */
-#define SSH_PRIVSEP_USER "sshd"
-
-/* Define if you want to install preformatted manpages. */
-/* #undef MANTYPE */
-
-/* Define if your ssl headers are included with #include <openssl/header.h> */
-#define HAVE_OPENSSL 1
-
-/* Define if Solaris' OpenSSL lacks AES support */
-#define SOLARIS_OPENSSL_NO_AES 1
-
-/* Define if Solaris-style Least Privilege is available */
-#define HAVE_SOLARIS_PRIVILEGE 1
-
-/* Define if you want Sun's alternative privilege separation */
-#define ALTPRIVSEP
-
-/* Define if you have Solaris-style Contracts */
-#define HAVE_SOLARIS_CONTRACTS 1
-
-/* Define if SVR4-style libcmd (for accessing /etc/default/ files) */
-#define HAVE_DEFOPEN 1
-
-/*
- * Define if you are linking against RSAref. Used only to print the right
- * message at run-time.
- */
-/* #undef RSAREF */
-
-/* struct timeval */
-#define HAVE_STRUCT_TIMEVAL 1
-
-/* struct utmp and struct utmpx fields */
-/* #undef HAVE_HOST_IN_UTMP */
-#define HAVE_HOST_IN_UTMPX 1
-/* #undef HAVE_ADDR_IN_UTMP */
-/* #undef HAVE_ADDR_IN_UTMPX */
-/* #undef HAVE_ADDR_V6_IN_UTMP */
-/* #undef HAVE_ADDR_V6_IN_UTMPX */
-#define HAVE_SYSLEN_IN_UTMPX 1
-#define HAVE_PID_IN_UTMP 1
-#define HAVE_TYPE_IN_UTMP 1
-#define HAVE_TYPE_IN_UTMPX 1
-/* #undef HAVE_TV_IN_UTMP */
-#define HAVE_TV_IN_UTMPX 1
-#define HAVE_ID_IN_UTMP 1
-#define HAVE_ID_IN_UTMPX 1
-#define HAVE_EXIT_IN_UTMP 1
-#define HAVE_TIME_IN_UTMP 1
-#define HAVE_TIME_IN_UTMPX 1
-
-/* Define if you don't want to use your system's login() call */
-/* #undef DISABLE_LOGIN */
-
-/* Define if you don't want to use pututline() etc. to write [uw]tmp */
-/* #undef DISABLE_PUTUTLINE */
-
-/* Define if you don't want to use pututxline() etc. to write [uw]tmpx */
-/* #undef DISABLE_PUTUTXLINE */
-
-/* Define if you don't want to use lastlog */
-/* #undef DISABLE_LASTLOG */
-
-/* Define if you don't want to use lastlog in session.c */
-/* #undef NO_SSH_LASTLOG */
-
-/* Define if you don't want to use utmp */
-#define DISABLE_UTMP 1
-
-/* Define if you don't want to use utmpx */
-/* #undef DISABLE_UTMPX */
-
-/* Define if you don't want to use wtmp */
-#define DISABLE_WTMP 1
-
-/* Define if you don't want to use wtmpx */
-/* #undef DISABLE_WTMPX */
-
-/* Some systems need a utmpx entry for /bin/login to work */
-#define LOGIN_NEEDS_UTMPX 1
-
-/* Some versions of /bin/login need the TERM supplied on the commandline */
-#define LOGIN_NEEDS_TERM 1
-
-/* Define if your login program cannot handle end of options ("--") */
-/* #undef LOGIN_NO_ENDOPT */
-
-/* Define if you want to specify the path to your lastlog file */
-#define CONF_LASTLOG_FILE "/var/adm/lastlog"
-
-/* Define if you want to specify the path to your utmp file */
-/* #undef CONF_UTMP_FILE */
-
-/* Define if you want to specify the path to your wtmp file */
-/* #undef CONF_WTMP_FILE */
-
-/* Define if you want to specify the path to your utmpx file */
-/* #undef CONF_UTMPX_FILE */
-
-/* Define if you want to specify the path to your wtmpx file */
-/* #undef CONF_WTMPX_FILE */
-
-/* Define if you want external askpass support */
-/* #undef USE_EXTERNAL_ASKPASS */
-
-/* Define if libc defines __progname */
-#define HAVE___PROGNAME 1
-
-/* Define if compiler implements __FUNCTION__ */
-#define HAVE___FUNCTION__ 1
-
-/* Define if compiler implements __func__ */
-#define HAVE___func__ 1
-
-/* Define if you want GSS-API support */
-#define GSSAPI 1
-
-/* Define if you have <gssapi/gssapi.h> */
-#define SUNW_GSSAPI 1
-
-/* Define if you have GSS_Store_cred() */
-#define HAVE_GSS_STORE_CRED 1
-
-/* Define if you have __gss_userok() */
-#define HAVE___GSS_USEROK 1
-
-/* Define for simple authorization of GSS-API principals */
-/* #undef GSSAPI_SIMPLE_USEROK */
-
-/* Define if you have gsscred_name_to_unix_cred() (Solaris) */
-#define HAVE_GSSCRED_API 1
-
-/* Define if you have __gss_oid_to_mech() */
-#define HAVE_GSS_OID_TO_MECH 1
-
-/* Define if you have gss_oid_to_str() */
-#define HAVE_GSS_OID_TO_STR 1
-
-/* Define if you want support for MIT krb5 GSS internals */
-/* #undef KRB5_GSS */
-
-/* Define if you want support for GSI GSS internals */
-/* #undef GSI_GSS */
-
-/* Define if you want raw Kerberos 5 support */
-/* #undef KRB5 */
-
-/* Define if you want GSI/Globus authentication support */
-/* #undef GSI */
-
-/* Define this if you are using the Heimdal version of Kerberos V5 */
-/* #undef HEIMDAL */
-
-/* Define if you want Kerberos 4 support */
-/* #undef KRB4 */
-
-/* Define if you want AFS support */
-/* #undef AFS */
-
-/* Define if you want S/Key support */
-/* #undef SKEY */
-
-/* Define if you want TCP Wrappers support */
-#define LIBWRAP 1
-
-/* Define if your libraries define login() */
-/* #undef HAVE_LOGIN */
-
-/* Define if your libraries define getpagesize() */
-#define HAVE_GETPAGESIZE 1
-
-/* Define if xauth is found in your path */
-#define XAUTH_PATH "/usr/X11/bin/xauth"
-
-/* Define if rsh is found in your path */
-#define RSH_PATH "/usr/bin/rsh"
-
-/* Define if you want to allow MD5 passwords */
-/* #undef HAVE_MD5_PASSWORDS */
-
-/* Define if you want to disable shadow passwords */
-/* #undef DISABLE_SHADOW */
-
-/* Define if you want to use shadow password expire field */
-/* #undef HAS_SHADOW_EXPIRE */
-
-/* Define if you have Digital Unix Security Integration Architecture */
-/* #undef HAVE_OSF_SIA */
-
-/* Define if you have getpwanam(3) [SunOS 4.x] */
-/* #undef HAVE_GETPWANAM */
-
-/* Define if you have an old version of PAM which takes only one argument */
-/* to pam_strerror */
-/* #undef HAVE_OLD_PAM */
-
-/* Define if you are using Solaris-derived PAM which passes pam_messages */
-/* to the conversation function with an extra level of indirection */
-#define PAM_SUN_CODEBASE 1
-
-/* Set this to your mail directory if you don't have maillock.h */
-/* #undef MAIL_DIRECTORY */
-
-/* Data types */
-#define HAVE_U_INT 1
-#define HAVE_INTXX_T 1
-/* #undef HAVE_U_INTXX_T */
-#define HAVE_UINTXX_T 1
-#define HAVE_INT64_T 1
-/* #undef HAVE_U_INT64_T */
-#define HAVE_U_CHAR 1
-#define HAVE_SIZE_T 1
-#define HAVE_SSIZE_T 1
-#define HAVE_CLOCK_T 1
-#define HAVE_MODE_T 1
-#define HAVE_PID_T 1
-#define HAVE_SA_FAMILY_T 1
-#define HAVE_STRUCT_SOCKADDR_STORAGE 1
-#define HAVE_STRUCT_ADDRINFO 1
-#define HAVE_STRUCT_IN6_ADDR 1
-#define HAVE_STRUCT_SOCKADDR_IN6 1
-
-/* Fields in struct sockaddr_storage */
-#define HAVE_SS_FAMILY_IN_SS 1
-/* #undef HAVE___SS_FAMILY_IN_SS */
-
-/* Define if you have /dev/ptmx */
-#define HAVE_DEV_PTMX 1
-
-/* Define if you have /dev/ptc */
-/* #undef HAVE_DEV_PTS_AND_PTC */
-
-/* Define if you need to use IP address instead of hostname in $DISPLAY */
-/* #undef IPADDR_IN_DISPLAY */
-
-/*
- * Specify the default $PATH. While /bin is a symbolic link to /usr/bin in
- * Solaris, to include both of them there may help when users use
- * ChrootDirectory options with plain SSH connections, without their own shell
- * profiles.
- */
-#define USER_PATH "/usr/bin:/bin"
-
-/* Specify location of ssh.pid */
-#define _PATH_SSH_PIDDIR "/var/run"
-
-/* Use IPv4 for connection by default, IPv6 can still if explicity asked */
-/* #undef IPV4_DEFAULT */
-
-/* getaddrinfo is broken (if present) */
-/* #undef BROKEN_GETADDRINFO */
-
-/* Workaround more Linux IPv6 quirks */
-/* #undef DONT_TRY_OTHER_AF */
-
-/* Detect IPv4 in IPv6 mapped addresses and treat as IPv4 */
-#define IPV4_IN_IPV6 1
-
-/* Define if you have BSD auth support */
-/* #undef BSD_AUTH */
-
-/* Define if X11 doesn't support AF_UNIX sockets on that system */
-/* #undef NO_X11_UNIX_SOCKETS */
-
-/* Define if the concept of ports only accessible to superusers isn't known */
-/* #undef NO_IPPORT_RESERVED_CONCEPT */
-
-/* Needed for SCO and NeXT */
-/* #undef BROKEN_SAVED_UIDS */
-
-/* Define if your system glob() function has the GLOB_ALTDIRFUNC extension */
-#define GLOB_HAS_ALTDIRFUNC 1
-
-/* Define if your system glob() function has gl_matchc options in glob_t */
-#define GLOB_HAS_GL_MATCHC 1
-
-/*
- * Define in your struct dirent expects you to allocate extra space for
- * d_name
- */
-#define BROKEN_ONE_BYTE_DIRENT_D_NAME 1
-
-/* Define if your getopt(3) defines and uses optreset */
-/* #undef HAVE_GETOPT_OPTRESET */
-
-/* Define on *nto-qnx systems */
-/* #undef MISSING_NFDBITS */
-
-/* Define on *nto-qnx systems */
-/* #undef MISSING_HOWMANY */
-
-/* Define on *nto-qnx systems */
-/* #undef MISSING_FD_MASK */
-
-/*
- * Use libedit or libtecla for sftp
- * If both USE_LIBEDIT and USE_LIBTECLA are defined, then USE_LIBEDIT will
- * have higher precedence.
- */
-#undef USE_LIBEDIT
-#define USE_LIBTECLA 1
-
-/* Define if you want to use OpenSSL's internally seeded PRNG only */
-#define OPENSSL_PRNG_ONLY 1
-
-/* Define if you shouldn't strip 'tty' from your ttyname in [uw]tmp */
-/* #undef WITH_ABBREV_NO_TTY */
-
-/* Define if you want a different $PATH for the superuser */
-#define SUPERUSER_PATH "/usr/sbin:/usr/bin"
-
-/* Path that unprivileged child will chroot() to in privep mode */
-/* #undef PRIVSEP_PATH */
-
-/* Define if your platform needs to skip post auth file descriptor passing */
-/* #undef DISABLE_FD_PASSING */
-
-
-/* Define to 1 if the `getpgrp' function requires zero arguments. */
-#define GETPGRP_VOID 1
-
-/* Define to 1 if you have the `arc4random' function. */
-/* #undef HAVE_ARC4RANDOM */
-
-/* Define to 1 if you have the `asprintf' function. */
-#define HAVE_ASPRINTF 1
-
-/* Define to 1 if you have the `b64_ntop' function. */
-/* #undef HAVE_B64_NTOP */
-
-/* Define to 1 if you have the `bcopy' function. */
-#define HAVE_BCOPY 1
-
-/* Define to 1 if you have the `bindresvport_sa' function. */
-/* #undef HAVE_BINDRESVPORT_SA */
-
-/* Define to 1 if you have the <bstring.h> header file. */
-/* #undef HAVE_BSTRING_H */
-
-/* Define to 1 if you have the `clock' function. */
-#define HAVE_CLOCK 1
-
-/* Define to 1 if you have the <crypt.h> header file. */
-#define HAVE_CRYPT_H 1
-
-/* Define to 1 if you have the `dirname' function. */
-#define HAVE_DIRNAME 1
-
-/* Define to 1 if you have the <endian.h> header file. */
-/* #undef HAVE_ENDIAN_H */
-
-/* Define to 1 if you have the `endutent' function. */
-#define HAVE_ENDUTENT 1
-
-/* Define to 1 if you have the `endutxent' function. */
-#define HAVE_ENDUTXENT 1
-
-/* Define to 1 if you have the `fchmod' function. */
-#define HAVE_FCHMOD 1
-
-/* Define to 1 if you have the `fchown' function. */
-#define HAVE_FCHOWN 1
-
-/* Define to 1 if you have the <floatingpoint.h> header file. */
-#define HAVE_FLOATINGPOINT_H 1
-
-/* Define to 1 if you have the `freeaddrinfo' function. */
-#define HAVE_FREEADDRINFO 1
-
-/* Define to 1 if you have the `futimes' function. */
-/* #undef HAVE_FUTIMES */
-
-/* Define to 1 if you have the `gai_strerror' function. */
-#define HAVE_GAI_STRERROR 1
-
-/* Define to 1 if you have the `getaddrinfo' function. */
-#define HAVE_GETADDRINFO 1
-
-/* Define to 1 if you have the `getcwd' function. */
-#define HAVE_GETCWD 1
-
-/* Define to 1 if you have the `getgrouplist' function. */
-/* #undef HAVE_GETGROUPLIST */
-
-/* Define to 1 if you have the `getluid' function. */
-/* #undef HAVE_GETLUID */
-
-/* Define to 1 if you have the `getnameinfo' function. */
-#define HAVE_GETNAMEINFO 1
-
-/* Define to 1 if you have the `getopt' function. */
-#define HAVE_GETOPT 1
-
-/* Define to 1 if you have the <getopt.h> header file. */
-/* #undef HAVE_GETOPT_H */
-
-/* Define to 1 if you have the `getpeereid' function. */
-/* #undef HAVE_GETPEEREID */
-
-/* Define to 1 if you have the `getpeerucred' function. */
-#define HAVE_GETPEERUCRED 1
-
-/* Define to 1 if you have the `getpwanam' function. */
-/* #undef HAVE_GETPWANAM */
-
-/* Define to 1 if you have the `getrlimit' function. */
-#define HAVE_GETRLIMIT 1
-
-/* Define to 1 if you have the `getrusage' function. */
-#define HAVE_GETRUSAGE 1
-
-/* Define to 1 if you have the `gettimeofday' function. */
-#define HAVE_GETTIMEOFDAY 1
-
-/* Define to 1 if you have the `getttyent' function. */
-/* #undef HAVE_GETTTYENT */
-
-/* Define to 1 if you have the `getutent' function. */
-#define HAVE_GETUTENT 1
-
-/* Define to 1 if you have the `getutid' function. */
-#define HAVE_GETUTID 1
-
-/* Define to 1 if you have the `getutline' function. */
-#define HAVE_GETUTLINE 1
-
-/* Define to 1 if you have the `getutxent' function. */
-#define HAVE_GETUTXENT 1
-
-/* Define to 1 if you have the `getutxid' function. */
-#define HAVE_GETUTXID 1
-
-/* Define to 1 if you have the `getutxline' function. */
-#define HAVE_GETUTXLINE 1
-
-/* Define to 1 if you have the `glob' function. */
-#define HAVE_GLOB 1
-
-/* Define to 1 if you have the <glob.h> header file. */
-#define HAVE_GLOB_H 1
-
-/* Define to 1 if you have the <ia.h> header file. */
-/* #undef HAVE_IA_H */
-
-/* Define to 1 if you have the `inet_aton' function. */
-/* #undef HAVE_INET_ATON */
-
-/* Define to 1 if you have the `inet_ntoa' function. */
-#define HAVE_INET_NTOA 1
-
-/* Define to 1 if you have the `inet_ntop' function. */
-#define HAVE_INET_NTOP 1
-
-/* Define to 1 if you have the `innetgr' function. */
-#define HAVE_INNETGR 1
-
-/* Define to 1 if you have the <inttypes.h> header file. */
-#define HAVE_INTTYPES_H 1
-
-/* Define to 1 if you have the <krb.h> header file. */
-/* #undef HAVE_KRB_H */
-
-/* Define to 1 if you have the <lastlog.h> header file. */
-#define HAVE_LASTLOG_H 1
-
-/* Define to 1 if you have the `crypt' library (-lcrypt). */
-/* #undef HAVE_LIBCRYPT */
-
-/* Define to 1 if you have the `des' library (-ldes). */
-/* #undef HAVE_LIBDES */
-
-/* Define to 1 if you have the `des425' library (-ldes425). */
-/* #undef HAVE_LIBDES425 */
-
-/* Define to 1 if you have the `dl' library (-ldl). */
-#define HAVE_LIBDL 1
-
-/* Define to 1 if you have the <libgen.h> header file. */
-#define HAVE_LIBGEN_H 1
-
-/* Define to 1 if you have the `krb' library (-lkrb). */
-/* #undef HAVE_LIBKRB */
-
-/* Define to 1 if you have the `krb4' library (-lkrb4). */
-/* #undef HAVE_LIBKRB4 */
-
-/* Define to 1 if you have the `nsl' library (-lnsl). */
-#define HAVE_LIBNSL 1
-
-/* Define to 1 if you have the `pam' library (-lpam). */
-#define HAVE_LIBPAM 1
-
-/* Define to 1 if you have the `resolv' library (-lresolv). */
-/* #undef HAVE_LIBRESOLV */
-
-/* Define to 1 if you have the `sectok' library (-lsectok). */
-/* #undef HAVE_LIBSECTOK */
-
-/* Define to 1 if you have the `socket' library (-lsocket). */
-#define HAVE_LIBSOCKET 1
-
-/* Define to 1 if you have the <libutil.h> header file. */
-/* #undef HAVE_LIBUTIL_H */
-
-/* Define to 1 if you have the `xnet' library (-lxnet). */
-/* #undef HAVE_LIBXNET */
-
-/* Define to 1 if you have the `z' library (-lz). */
-#define HAVE_LIBZ 1
-
-/* Define to 1 if you have the <limits.h> header file. */
-#define HAVE_LIMITS_H 1
-
-/* Define to 1 if you have the <login.h> header file. */
-/* #undef HAVE_LOGIN_H */
-
-/* Define to 1 if you have the `logout' function. */
-/* #undef HAVE_LOGOUT */
-
-/* Define to 1 if you have the `logwtmp' function. */
-/* #undef HAVE_LOGWTMP */
-
-/* Define to 1 if you have the <maillock.h> header file. */
-#define HAVE_MAILLOCK_H 1
-
-/* Define to 1 if you have the `md5_crypt' function. */
-/* #undef HAVE_MD5_CRYPT */
-
-/* Define to 1 if you have the `memmove' function. */
-#define HAVE_MEMMOVE 1
-
-/* Define to 1 if you have the <memory.h> header file. */
-#define HAVE_MEMORY_H 1
-
-/* Define to 1 if you have mkstemp, mkstemps and mkdtemp */
-#define HAVE_MKDTEMP 1
-
-/* Define to 1 if you have the `mmap' function. */
-#define HAVE_MMAP 1
-
-/* Define to 1 if you have the <netdb.h> header file. */
-#define HAVE_NETDB_H 1
-
-/* Define to 1 if you have the <netgroup.h> header file. */
-/* #undef HAVE_NETGROUP_H */
-
-/* Define to 1 if you have the <netinet/in_systm.h> header file. */
-#define HAVE_NETINET_IN_SYSTM_H 1
-
-/* Define to 1 if you have the `ngetaddrinfo' function. */
-/* #undef HAVE_NGETADDRINFO */
-
-/* Define to 1 if you have the `ogetaddrinfo' function. */
-/* #undef HAVE_OGETADDRINFO */
-
-/* Define to 1 if you have the `openpty' function. */
-/* #undef HAVE_OPENPTY */
-
-/* Define to 1 if you have the `pam_getenvlist' function. */
-#define HAVE_PAM_GETENVLIST 1
-
-/* Define to 1 if you have the <paths.h> header file. */
-/* #undef HAVE_PATHS_H */
-
-/* Define to 1 if you have the <pty.h> header file. */
-/* #undef HAVE_PTY_H */
-
-/* Define to 1 if you have the `pututline' function. */
-#define HAVE_PUTUTLINE 1
-
-/* Define to 1 if you have the `pututxline' function. */
-#define HAVE_PUTUTXLINE 1
-
-/* Define to 1 if you have the `readpassphrase' function. */
-/* #undef HAVE_READPASSPHRASE */
-
-/* Define to 1 if you have the <readpassphrase.h> header file. */
-/* #undef HAVE_READPASSPHRASE_H */
-
-/* Define to 1 if you have the `realpath' function. */
-#define HAVE_REALPATH 1
-
-/* Define to 1 if you have the `recvmsg' function. */
-#define HAVE_RECVMSG 1
-
-/* Define to 1 if you have the <rpc/types.h> header file. */
-#define HAVE_RPC_TYPES_H 1
-
-/* Define to 1 if you have the `rresvport_af' function. */
-#define HAVE_RRESVPORT_AF 1
-
-/* Define to 1 if you have the <sectok.h> header file. */
-/* #undef HAVE_SECTOK_H */
-
-/* Define to 1 if you have the <security/pam_appl.h> header file. */
-#define HAVE_SECURITY_PAM_APPL_H 1
-
-/* Define to 1 if you have the `sendmsg' function. */
-#define HAVE_SENDMSG 1
-
-/* Define to 1 if you have the `setdtablesize' function. */
-/* #undef HAVE_SETDTABLESIZE */
-
-/* Define to 1 if you have the `setegid' function. */
-#define HAVE_SETEGID 1
-
-/* Define to 1 if you have the `setenv' function. */
-#define HAVE_SETENV 1
-
-/* Define to 1 if you have the `seteuid' function. */
-#define HAVE_SETEUID 1
-
-/* Define to 1 if you have the `setgroups' function. */
-#define HAVE_SETGROUPS 1
-
-/* Define to 1 if you have the `setlogin' function. */
-/* #undef HAVE_SETLOGIN */
-
-/* Define to 1 if you have the `setluid' function. */
-/* #undef HAVE_SETLUID */
-
-/* Define to 1 if you have the `setpcred' function. */
-/* #undef HAVE_SETPCRED */
-
-/* Define to 1 if you have the `setproctitle' function. */
-/* #undef HAVE_SETPROCTITLE */
-
-/* Define to 1 if you have the `setresgid' function. */
-/* #undef HAVE_SETRESGID */
-
-/* Define to 1 if you have the `setreuid' function. */
-#define HAVE_SETREUID 1
-
-/* Define to 1 if you have the `setrlimit' function. */
-#define HAVE_SETRLIMIT 1
-
-/* Define to 1 if you have the `setsid' function. */
-#define HAVE_SETSID 1
-
-/* Define to 1 if you have the `setutent' function. */
-#define HAVE_SETUTENT 1
-
-/* Define to 1 if you have the `setutxent' function. */
-#define HAVE_SETUTXENT 1
-
-/* Define to 1 if you have the `setvbuf' function. */
-#define HAVE_SETVBUF 1
-
-/* Define to 1 if you have the <shadow.h> header file. */
-#define HAVE_SHADOW_H 1
-
-/* Define to 1 if you have the `sigaction' function. */
-#define HAVE_SIGACTION 1
-
-/* Define to 1 if you have the `sigvec' function. */
-/* #undef HAVE_SIGVEC */
-
-/* Define to 1 if the system has the type `sig_atomic_t'. */
-#define HAVE_SIG_ATOMIC_T 1
-
-/* Define to 1 if you have the `snprintf' function. */
-#define HAVE_SNPRINTF 1
-
-/* Define to 1 if you have the `socketpair' function. */
-#define HAVE_SOCKETPAIR 1
-
-/* Define to 1 if you have the <stddef.h> header file. */
-#define HAVE_STDDEF_H 1
-
-/* Define to 1 if you have the <stdint.h> header file. */
-/* #undef HAVE_STDINT_H */
-
-/* Define to 1 if you have the <stdlib.h> header file. */
-#define HAVE_STDLIB_H 1
-
-/* Define to 1 if you have the `strerror' function. */
-#define HAVE_STRERROR 1
-
-/* Define to 1 if you have the `strftime' function. */
-#define HAVE_STRFTIME 1
-
-/* Define to 1 if you have the <strings.h> header file. */
-#define HAVE_STRINGS_H 1
-
-/* Define to 1 if you have the <string.h> header file. */
-#define HAVE_STRING_H 1
-
-/* Define to 1 if you have the `strlcat' function. */
-#define HAVE_STRLCAT 1
-
-/* Define to 1 if you have the `strlcpy' function. */
-#define HAVE_STRLCPY 1
-
-/* Define to 1 if you have the `strmode' function. */
-/* #undef HAVE_STRMODE */
-
-/* Define to 1 if `st_blksize' is member of `struct stat'. */
-#define HAVE_STRUCT_STAT_ST_BLKSIZE 1
-
-/* Define to 1 if you have the `sysconf' function. */
-#define HAVE_SYSCONF 1
-
-/* Define to 1 if you have the <sys/bitypes.h> header file. */
-/* #undef HAVE_SYS_BITYPES_H */
-
-/* Define to 1 if you have the <sys/bsdtty.h> header file. */
-/* #undef HAVE_SYS_BSDTTY_H */
-
-/* Define to 1 if you have the <sys/cdefs.h> header file. */
-/* #undef HAVE_SYS_CDEFS_H */
-
-
-/* Define to 1 if you have the <sys/mman.h> header file. */
-#define HAVE_SYS_MMAN_H 1
-
-/* Define to 1 if you have the <sys/select.h> header file. */
-#define HAVE_SYS_SELECT_H 1
-
-/* Define to 1 if you have the <sys/stat.h> header file. */
-#define HAVE_SYS_STAT_H 1
-
-/* Define to 1 if you have the <sys/stropts.h> header file. */
-#define HAVE_SYS_STROPTS_H 1
-
-/* Define to 1 if you have the <sys/sysmacros.h> header file. */
-#define HAVE_SYS_SYSMACROS_H 1
-
-/* Define to 1 if you have the <sys/time.h> header file. */
-#define HAVE_SYS_TIME_H 1
-
-/* Define to 1 if you have the <sys/types.h> header file. */
-#define HAVE_SYS_TYPES_H 1
-
-/* Define to 1 if you have the <sys/un.h> header file. */
-#define HAVE_SYS_UN_H 1
-
-/* Define to 1 if you have the `tcgetpgrp' function. */
-#define HAVE_TCGETPGRP 1
-
-/* Define to 1 if you have the `time' function. */
-#define HAVE_TIME 1
-
-/* Define to 1 if you have the <time.h> header file. */
-#define HAVE_TIME_H 1
-
-/* Define to 1 if you have the <tmpdir.h> header file. */
-/* #undef HAVE_TMPDIR_H */
-
-/* Define to 1 if you have the `truncate' function. */
-#define HAVE_TRUNCATE 1
-
-/* Define to 1 if you have the <ttyent.h> header file. */
-/* #undef HAVE_TTYENT_H */
-
-/* Define to 1 if you have the <ucred.h> header file. */
-#define HAVE_UCRED_H 1
-
-/* Define to 1 if you have the <unistd.h> header file. */
-#define HAVE_UNISTD_H 1
-
-/* Define to 1 if you have the `updwtmp' function. */
-#define HAVE_UPDWTMP 1
-
-/* Define to 1 if you have the <usersec.h> header file. */
-/* #undef HAVE_USERSEC_H */
-
-/* Define to 1 if you have the <util.h> header file. */
-/* #undef HAVE_UTIL_H */
-
-/* Define to 1 if you have the `utimes' function. */
-#define HAVE_UTIMES 1
-
-/* Define to 1 if you have the <utime.h> header file. */
-#define HAVE_UTIME_H 1
-
-/* Define to 1 if you have the `utmpname' function. */
-#define HAVE_UTMPNAME 1
-
-/* Define to 1 if you have the `utmpxname' function. */
-#define HAVE_UTMPXNAME 1
-
-/* Define to 1 if you have the <utmpx.h> header file. */
-#define HAVE_UTMPX_H 1
-
-/* Define to 1 if you have the <utmp.h> header file. */
-#define HAVE_UTMP_H 1
-
-/* Define to 1 if you have the `vasprintf' function. */
-#define HAVE_VASPRINTF 1
-
-/* Define to 1 if you have the `vhangup' function. */
-#define HAVE_VHANGUP 1
-
-/* Define to 1 if you have the `vsnprintf' function. */
-#define HAVE_VSNPRINTF 1
-
-/* Define to 1 if you have the `waitpid' function. */
-#define HAVE_WAITPID 1
-
-/* Define to 1 if you have the `_getpty' function. */
-/* #undef HAVE__GETPTY */
-
-/* Define to 1 if you have the `__b64_ntop' function. */
-/* #undef HAVE___B64_NTOP */
-
-/* Define to the address where bug reports for this package should be sent. */
-#define PACKAGE_BUGREPORT ""
-
-/* Define to the full name of this package. */
-#define PACKAGE_NAME ""
-
-/* Define to the full name and version of this package. */
-#define PACKAGE_STRING ""
-
-/* Define to the one symbol short name of this package. */
-#define PACKAGE_TARNAME ""
-
-/* Define to the version of this package. */
-#define PACKAGE_VERSION ""
-
-/* The size of a `char', as computed by sizeof. */
-#define SIZEOF_CHAR 1
-
-/* The size of a `int', as computed by sizeof. */
-#define SIZEOF_INT 4
-
-/* The size of a `long int', as computed by sizeof. */
-#define SIZEOF_LONG_INT 4
-
-/* The size of a `long long int', as computed by sizeof. */
-#define SIZEOF_LONG_LONG_INT 8
-
-/* The size of a `short int', as computed by sizeof. */
-#define SIZEOF_SHORT_INT 2
-
-/* Define to 1 if you have the ANSI C header files. */
-#define STDC_HEADERS 1
-
-/*
- * Define to 1 if your processor stores words with the most significant byte
- * first (like Motorola and SPARC, unlike Intel and VAX).
- */
-#define WORDS_BIGENDIAN 1
-
-/* Number of bits in a file offset, on hosts where this is settable. */
-#define _FILE_OFFSET_BITS 64
-
-/* Define for large files, on AIX-style hosts. */
-/* #undef _LARGE_FILES */
-
-/*
- * Define as `__inline' if that's what the C compiler calls it, or to nothing if
- * it is not supported.
- */
-/* #undef inline */
-
-/* type to use in place of socklen_t if not defined */
-/* #undef socklen_t */
-
-/* Define for BSM auditing (Solaris) support */
-#define HAVE_BSM 1
-
-/* Define if compiling in ON */
-#define SUNW_SSH 1
-
-/* ******************* Shouldn't need to edit below this line ************** */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _CONFIG_H */
diff --git a/usr/src/cmd/ssh/include/crc32.h b/usr/src/cmd/ssh/include/crc32.h
deleted file mode 100644
index 3218cc925c..0000000000
--- a/usr/src/cmd/ssh/include/crc32.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* $OpenBSD: crc32.h,v 1.13 2002/03/04 17:27:39 stevesk Exp $ */
-
-#ifndef _CRC32_H
-#define _CRC32_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1992 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Functions for computing 32-bit CRC.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-u_int ssh_crc32(const u_char *, u_int);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _CRC32_H */
diff --git a/usr/src/cmd/ssh/include/deattack.h b/usr/src/cmd/ssh/include/deattack.h
deleted file mode 100644
index fa7fc82312..0000000000
--- a/usr/src/cmd/ssh/include/deattack.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* $OpenBSD: deattack.h,v 1.7 2001/06/26 17:27:23 markus Exp $ */
-
-#ifndef _DEATTACK_H
-#define _DEATTACK_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Cryptographic attack detector for ssh - Header file
- *
- * Copyright (c) 1998 CORE SDI S.A., Buenos Aires, Argentina.
- *
- * All rights reserved. Redistribution and use in source and binary
- * forms, with or without modification, are permitted provided that
- * this copyright notice is retained.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES ARE DISCLAIMED. IN NO EVENT SHALL CORE SDI S.A. BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY OR
- * CONSEQUENTIAL DAMAGES RESULTING FROM THE USE OR MISUSE OF THIS
- * SOFTWARE.
- *
- * Ariel Futoransky <futo@core-sdi.com>
- * <http://www.core-sdi.com>
- */
-
-/* Return codes */
-#define DEATTACK_OK 0
-#define DEATTACK_DETECTED 1
-#define DEATTACK_DOS_DETECTED 2
-
-int detect_attack(u_char *, u_int32_t, u_char[8]);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _DEATTACK_H */
diff --git a/usr/src/cmd/ssh/include/defines.h b/usr/src/cmd/ssh/include/defines.h
deleted file mode 100644
index 13eb012162..0000000000
--- a/usr/src/cmd/ssh/include/defines.h
+++ /dev/null
@@ -1,600 +0,0 @@
-/*
- * Copyright (c) 1999-2003 Damien Miller. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _DEFINES_H
-#define _DEFINES_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* $Id: defines.h,v 1.96 2002/09/26 00:38:48 tim Exp $ */
-
-
-/* Constants */
-#ifndef SHUT_RDWR
-enum
-{
- SHUT_RD = 0, /* No more receptions. */
- SHUT_WR, /* No more transmissions. */
- SHUT_RDWR /* No more receptions or transmissions. */
-};
-#define SHUT_RD SHUT_RD
-#define SHUT_WR SHUT_WR
-#define SHUT_RDWR SHUT_RDWR
-#endif
-
-#ifndef IPTOS_LOWDELAY
-#define IPTOS_LOWDELAY 0x10
-#define IPTOS_THROUGHPUT 0x08
-#define IPTOS_RELIABILITY 0x04
-#define IPTOS_LOWCOST 0x02
-#define IPTOS_MINCOST IPTOS_LOWCOST
-#endif /* IPTOS_LOWDELAY */
-
-#ifndef MAXPATHLEN
-#ifdef PATH_MAX
-#define MAXPATHLEN PATH_MAX
-#else /* PATH_MAX */
-#define MAXPATHLEN 64 /* Should be safe */
-#endif /* PATH_MAX */
-#endif /* MAXPATHLEN */
-
-#ifndef STDIN_FILENO
-#define STDIN_FILENO 0
-#endif
-
-#ifndef STDOUT_FILENO
-#define STDOUT_FILENO 1
-#endif
-
-#ifndef STDERR_FILENO
-#define STDERR_FILENO 2
-#endif
-
-/* Disable groupaccess if NGROUPS_UMAX, NGROUPS_MAX and NGROUPS are not set */
-#ifndef NGROUPS_UMAX
-#ifdef NGROUPS_MAX
-#define NGROUPS_UMAX NGROUPS_MAX
-#elif defined(NGROUPS)
-#define NGROUPS_UMAX NGROUPS
-#else
-#define NGROUPS_UMAX 0
-#endif
-#endif
-
-#ifndef O_NONBLOCK /* Non Blocking Open */
-#define O_NONBLOCK 00004
-#endif
-
-#ifndef S_ISDIR
-#define S_ISDIR(mode) (((mode) & (_S_IFMT)) == (_S_IFDIR))
-#endif /* S_ISDIR */
-
-#ifndef S_ISREG
-#define S_ISREG(mode) (((mode) & (_S_IFMT)) == (_S_IFREG))
-#endif /* S_ISREG */
-
-#ifndef S_ISLNK
-#define S_ISLNK(mode) (((mode) & S_IFMT) == S_IFLNK)
-#endif /* S_ISLNK */
-
-#ifndef S_IXUSR
-#define S_IXUSR 0000100 /* execute/search permission, */
-#define S_IXGRP 0000010 /* execute/search permission, */
-#define S_IXOTH 0000001 /* execute/search permission, */
-#define _S_IWUSR 0000200 /* write permission, */
-#define S_IWUSR _S_IWUSR /* write permission, owner */
-#define S_IWGRP 0000020 /* write permission, group */
-#define S_IWOTH 0000002 /* write permission, other */
-#define S_IRUSR 0000400 /* read permission, owner */
-#define S_IRGRP 0000040 /* read permission, group */
-#define S_IROTH 0000004 /* read permission, other */
-#define S_IRWXU 0000700 /* read, write, execute */
-#define S_IRWXG 0000070 /* read, write, execute */
-#define S_IRWXO 0000007 /* read, write, execute */
-#endif /* S_IXUSR */
-
-#if !defined(MAP_ANON) && defined(MAP_ANONYMOUS)
-#define MAP_ANON MAP_ANONYMOUS
-#endif
-
-#ifndef MAP_FAILED
-#define MAP_FAILED ((void *)-1)
-#endif
-
-/* *-*-nto-qnx doesn't define this constant in the system headers */
-#ifdef MISSING_NFDBITS
-#define NFDBITS (8 * sizeof (unsigned long))
-#endif
-
-/*
- * SCO Open Server 3 has INADDR_LOOPBACK defined in rpc/rpc.h but including
- * rpc/rpc.h breaks Solaris 6
- */
-#ifndef INADDR_LOOPBACK
-#define INADDR_LOOPBACK ((ulong_t)0x7f000001)
-#endif
-
-/* Types */
-
-/*
- * If sys/types.h does not supply intXX_t, supply them ourselves (or die trying)
- */
-#ifndef HAVE_U_INT
-/* for now, we can't remove u_int without changing almost all other files */
-/* CSTYLED */
-typedef unsigned int u_int;
-#endif
-
-#ifndef HAVE_INTXX_T
-#if (SIZEOF_CHAR == 1)
-typedef char int8_t;
-#else
-#error "8 bit int type not found."
-#endif
-#if (SIZEOF_SHORT_INT == 2)
-typedef short int int16_t;
-#else
-#ifdef _UNICOS
-#if (SIZEOF_SHORT_INT == 4)
-typedef short int16_t;
-#else
-typedef long int16_t;
-#endif
-#else
-#error "16 bit int type not found."
-#endif /* _UNICOS */
-#endif
-#if (SIZEOF_INT == 4)
-typedef int int32_t;
-#else
-#ifdef _UNICOS
-typedef long int32_t;
-#else
-#error "32 bit int type not found."
-#endif /* _UNICOS */
-#endif
-#endif
-
-/* If sys/types.h does not supply u_intXX_t, supply them ourselves */
-#ifndef HAVE_U_INTXX_T
-#ifdef HAVE_UINTXX_T
-typedef uint8_t u_int8_t;
-typedef uint16_t u_int16_t;
-typedef uint32_t u_int32_t;
-#define HAVE_U_INTXX_T 1
-#else
-#if (SIZEOF_CHAR == 1)
-typedef unsigned char u_int8_t;
-#else
-#error "8 bit int type not found."
-#endif
-#if (SIZEOF_SHORT_INT == 2)
-typedef unsigned short int u_int16_t;
-#else
-#ifdef _UNICOS
-#if (SIZEOF_SHORT_INT == 4)
-typedef unsigned short u_int16_t;
-#else
-typedef unsigned long u_int16_t;
-#endif
-#else
-#error "16 bit int type not found."
-#endif
-#endif
-#if (SIZEOF_INT == 4)
-typedef unsigned int u_int32_t;
-#else
-#ifdef _UNICOS
-typedef unsigned long u_int32_t;
-#else
-#error "32 bit int type not found."
-#endif
-#endif
-#endif
-#define __BIT_TYPES_DEFINED__
-#endif
-
-/* 64-bit types */
-#ifndef HAVE_INT64_T
-#if (SIZEOF_LONG_INT == 8)
-typedef long int int64_t;
-#define HAVE_INT64_T 1
-#else
-#if (SIZEOF_LONG_LONG_INT == 8)
-typedef long long int int64_t;
-#define HAVE_INT64_T 1
-#endif
-#endif
-#endif
-
-#ifndef HAVE_U_INT64_T
-#if (SIZEOF_LONG_INT == 8)
-typedef unsigned long int u_int64_t;
-#define HAVE_U_INT64_T 1
-#else
-#if (SIZEOF_LONG_LONG_INT == 8)
-typedef unsigned long long int u_int64_t;
-#define HAVE_U_INT64_T 1
-#endif
-#endif
-#endif
-
-#if !defined(HAVE_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT == 8)
-#define HAVE_LONG_LONG_INT 1
-#endif
-
-#ifndef HAVE_U_CHAR
-/* for now, we can't remove u_char without changing almost all other files */
-/* CSTYLED */
-typedef unsigned char u_char;
-#define HAVE_U_CHAR
-#endif /* HAVE_U_CHAR */
-
-#ifndef SIZE_T_MAX
-#define SIZE_T_MAX ULONG_MAX
-#endif /* SIZE_T_MAX */
-
-#ifndef HAVE_SIZE_T
-typedef unsigned int size_t;
-#define HAVE_SIZE_T
-#endif /* HAVE_SIZE_T */
-
-#ifndef HAVE_SSIZE_T
-typedef int ssize_t;
-#define HAVE_SSIZE_T
-#endif /* HAVE_SSIZE_T */
-
-#ifndef HAVE_CLOCK_T
-typedef long clock_t;
-#define HAVE_CLOCK_T
-#endif /* HAVE_CLOCK_T */
-
-#ifndef HAVE_SA_FAMILY_T
-typedef int sa_family_t;
-#define HAVE_SA_FAMILY_T
-#endif /* HAVE_SA_FAMILY_T */
-
-#ifndef HAVE_PID_T
-typedef int pid_t;
-#define HAVE_PID_T
-#endif /* HAVE_PID_T */
-
-#ifndef HAVE_SIG_ATOMIC_T
-typedef int sig_atomic_t;
-#define HAVE_SIG_ATOMIC_T
-#endif /* HAVE_SIG_ATOMIC_T */
-
-#ifndef HAVE_MODE_T
-typedef int mode_t;
-#define HAVE_MODE_T
-#endif /* HAVE_MODE_T */
-
-#if !defined(HAVE_SS_FAMILY_IN_SS) && defined(HAVE___SS_FAMILY_IN_SS)
-#define ss_family __ss_family
-#endif /* !defined(HAVE_SS_FAMILY_IN_SS) && defined(HAVE_SA_FAMILY_IN_SS) */
-
-#ifndef HAVE_SYS_UN_H
-struct sockaddr_un {
- short sun_family; /* AF_UNIX */
- char sun_path[108]; /* path name (gag) */
-};
-#endif /* HAVE_SYS_UN_H */
-
-#if defined(BROKEN_SYS_TERMIO_H) && !defined(_STRUCT_WINSIZE)
-#define _STRUCT_WINSIZE
-struct winsize {
- unsigned short ws_row; /* rows, in characters */
- unsigned short ws_col; /* columns, in character */
- unsigned short ws_xpixel; /* horizontal size, pixels */
- unsigned short ws_ypixel; /* vertical size, pixels */
-};
-#endif
-
-/* *-*-nto-qnx does not define this type in the system headers */
-#ifdef MISSING_FD_MASK
-typedef unsigned long int fd_mask;
-#endif
-
-/* Paths */
-
-#ifndef _PATH_BSHELL
-#define _PATH_BSHELL "/bin/sh"
-#endif
-
-#ifndef _PATH_CSHELL
-#define _PATH_CSHELL "/bin/csh"
-#endif
-
-#ifndef _PATH_SHELLS
-#define _PATH_SHELLS "/etc/shells"
-#endif
-
-#ifdef USER_PATH
-#ifdef _PATH_STDPATH
-#undef _PATH_STDPATH
-#endif
-#define _PATH_STDPATH USER_PATH
-#endif
-
-#ifndef _PATH_STDPATH
-#define _PATH_STDPATH "/usr/bin"
-#endif
-
-#ifndef _PATH_DEVNULL
-#define _PATH_DEVNULL "/dev/null"
-#endif
-
-#ifndef MAIL_DIRECTORY
-#define MAIL_DIRECTORY "/var/spool/mail"
-#endif
-
-#ifndef MAILDIR
-#define MAILDIR MAIL_DIRECTORY
-#endif
-
-#if !defined(_PATH_MAILDIR) && defined(MAILDIR)
-#define _PATH_MAILDIR MAILDIR
-#endif /* !defined(_PATH_MAILDIR) && defined(MAILDIR) */
-
-#ifndef _PATH_RSH
-#ifdef RSH_PATH
-#define _PATH_RSH RSH_PATH
-#else /* RSH_PATH */
-#define _PATH_RSH "/usr/bin/rsh"
-#endif /* RSH_PATH */
-#endif /* _PATH_RSH */
-
-#ifndef _PATH_NOLOGIN
-#define _PATH_NOLOGIN "/etc/nologin"
-#endif
-
-/* Define this to be the path of the xauth program. */
-#ifdef XAUTH_PATH
-#define _PATH_XAUTH XAUTH_PATH
-#endif /* XAUTH_PATH */
-
-/* derived from XF4/xc/lib/dps/Xlibnet.h */
-#ifndef X_UNIX_PATH
-#ifdef __hpux
-#define X_UNIX_PATH "/var/spool/sockets/X11/%u"
-#else
-#define X_UNIX_PATH "/tmp/.X11-unix/X%u"
-#endif
-#endif /* X_UNIX_PATH */
-
-#define _PATH_UNIX_X X_UNIX_PATH
-
-#ifndef _PATH_TTY
-#define _PATH_TTY "/dev/tty"
-#endif
-
-/* Macros */
-
-#ifndef MAX
-#define MAX(a, b) (((a) > (b)) ? (a) : (b))
-#define MIN(a, b) (((a) < (b)) ? (a) : (b))
-#endif
-
-#ifndef roundup
-#define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
-#endif
-
-#ifndef timersub
-#define timersub(a, b, result) \
- do { \
- (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \
- (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \
- if ((result)->tv_usec < 0) { \
- --(result)->tv_sec; \
- (result)->tv_usec += 1000000; \
- } \
- } while (0)
-#endif
-
-#ifndef __P
-#define __P(x) x
-#endif
-
-#if !defined(IN6_IS_ADDR_V4MAPPED)
-#define IN6_IS_ADDR_V4MAPPED(a) \
- ((((u_int32_t *)(a))[0] == 0) && (((u_int32_t *)(a))[1] == 0) && \
- (((u_int32_t *)(a))[2] == htonl(0xffff)))
-#endif /* !defined(IN6_IS_ADDR_V4MAPPED) */
-
-#if !defined(__GNUC__) || (__GNUC__ < 2)
-#define __attribute__(x)
-#endif /* !defined(__GNUC__) || (__GNUC__ < 2) */
-
-#if !defined(HAVE_ATTRIBUTE__BOUNDED__) && !defined(__bounded__)
-#define __bounded__(x, y, z)
-#endif
-
-/* *-*-nto-qnx doesn't define this macro in the system headers */
-#ifdef MISSING_HOWMANY
-#define howmany(x, y) (((x) + ((y) - 1)) / (y))
-#endif
-
-#ifndef OSSH_ALIGNBYTES
-#define OSSH_ALIGNBYTES (sizeof (int) - 1)
-#endif
-
-#ifndef __CMSG_ALIGN
-#define __CMSG_ALIGN(p) (((uint_t)(p) + OSSH_ALIGNBYTES) &~ OSSH_ALIGNBYTES)
-#endif
-
-/* Length of the contents of a control message of length len */
-#ifndef CMSG_LEN
-#define CMSG_LEN(len) (__CMSG_ALIGN(sizeof (struct cmsghdr)) + (len))
-#endif
-
-/* Length of the space taken up by a padded control message of length len */
-#ifndef CMSG_SPACE
-#define CMSG_SPACE(len) \
- (__CMSG_ALIGN(sizeof (struct cmsghdr)) + __CMSG_ALIGN(len))
-#endif
-
-/* Function replacement / compatibility hacks */
-
-#if !defined(HAVE_GETADDRINFO) && (defined(HAVE_OGETADDRINFO) || \
- defined(HAVE_NGETADDRINFO))
-#define HAVE_GETADDRINFO
-#endif
-
-#ifndef HAVE_GETOPT_OPTRESET
-#undef getopt
-#undef opterr
-#undef optind
-#undef optopt
-#undef optreset
-#undef optarg
-#define getopt(ac, av, o) BSDgetopt(ac, av, o)
-#define opterr BSDopterr
-#define optind BSDoptind
-#define optopt BSDoptopt
-#define optreset BSDoptreset
-#define optarg BSDoptarg
-#endif
-
-/* In older versions of libpam, pam_strerror takes a single argument */
-#ifdef HAVE_OLD_PAM
-#define PAM_STRERROR(a, b) pam_strerror((b))
-#else
-#define PAM_STRERROR(a, b) pam_strerror((a), (b))
-#endif
-
-#ifdef PAM_SUN_CODEBASE
-#define PAM_MSG_MEMBER(msg, n, member) ((*(msg))[(n)].member)
-#else
-#define PAM_MSG_MEMBER(msg, n, member) ((msg)[(n)]->member)
-#endif
-
-#if defined(BROKEN_GETADDRINFO) && defined(HAVE_GETADDRINFO)
-#undef HAVE_GETADDRINFO
-#endif
-
-#if defined(BROKEN_GETADDRINFO) && defined(HAVE_FREEADDRINFO)
-#undef HAVE_FREEADDRINFO
-#endif
-
-#if defined(BROKEN_GETADDRINFO) && defined(HAVE_GAI_STRERROR)
-#undef HAVE_GAI_STRERROR
-#endif
-
-#if !defined(HAVE_MEMMOVE) && defined(HAVE_BCOPY)
-#define memmove(s1, s2, n) bcopy((s2), (s1), (n))
-#endif /* !defined(HAVE_MEMMOVE) && defined(HAVE_BCOPY) */
-
-#if defined(HAVE_VHANGUP) && !defined(HAVE_DEV_PTMX)
-#define USE_VHANGUP
-#endif /* defined(HAVE_VHANGUP) && !defined(HAVE_DEV_PTMX) */
-
-#ifndef GETPGRP_VOID
-#define getpgrp() getpgrp(0)
-#endif
-
-/* OPENSSL_free() is Free() in versions before OpenSSL 0.9.6 */
-#if !defined(OPENSSL_VERSION_NUMBER) || (OPENSSL_VERSION_NUMBER < 0x0090600f)
-#define OPENSSL_free(x) Free(x)
-#endif
-
-#if !defined(HAVE___func__) && defined(HAVE___FUNCTION__)
-#define __func__ __FUNCTION__
-#elif !defined(HAVE___func__)
-#define __func__ ""
-#endif
-
-/*
- * login recorder definitions
- */
-
-/* FIXME: put default paths back in */
-#ifndef UTMP_FILE
-#ifdef _PATH_UTMP
-#define UTMP_FILE _PATH_UTMP
-#else
-#ifdef CONF_UTMP_FILE
-#define UTMP_FILE CONF_UTMP_FILE
-#endif
-#endif
-#endif
-
-#ifndef WTMP_FILE
-#ifdef _PATH_WTMP
-#define WTMP_FILE _PATH_WTMP
-#else
-#ifdef CONF_WTMP_FILE
-#define WTMP_FILE CONF_WTMP_FILE
-#endif
-#endif
-#endif
-
-/* pick up the user's location for lastlog if given */
-#ifndef LASTLOG_FILE
-#ifdef _PATH_LASTLOG
-#define LASTLOG_FILE _PATH_LASTLOG
-#else
-#ifdef CONF_LASTLOG_FILE
-#define LASTLOG_FILE CONF_LASTLOG_FILE
-#endif
-#endif
-#endif
-
-/* The login() library function in libutil is first choice */
-#if defined(HAVE_LOGIN) && !defined(DISABLE_LOGIN)
-#define USE_LOGIN
-#else
-/* Simply select your favourite login types. */
-/* Can't do if-else because some systems use several... <sigh> */
-#if defined(UTMPX_FILE) && !defined(DISABLE_UTMPX)
-#define USE_UTMPX
-#endif
-#if defined(UTMP_FILE) && !defined(DISABLE_UTMP)
-#define USE_UTMP
-#endif
-#if defined(WTMPX_FILE) && !defined(DISABLE_WTMPX)
-#define USE_WTMPX
-#endif
-#if defined(WTMP_FILE) && !defined(DISABLE_WTMP)
-#define USE_WTMP
-#endif
-#endif
-
-/* I hope that the presence of LASTLOG_FILE is enough to detect this */
-#if defined(LASTLOG_FILE) && !defined(DISABLE_LASTLOG)
-#define USE_LASTLOG
-#endif
-
-/* end of login recorder definitions */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _DEFINES_H */
diff --git a/usr/src/cmd/ssh/include/dh.h b/usr/src/cmd/ssh/include/dh.h
deleted file mode 100644
index e977847119..0000000000
--- a/usr/src/cmd/ssh/include/dh.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/* $OpenBSD: dh.h,v 1.7 2001/06/26 17:27:23 markus Exp $ */
-
-#ifndef _DH_H
-#define _DH_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Copyright (c) 2000 Niels Provos. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-struct dhgroup {
- int size;
- BIGNUM *g;
- BIGNUM *p;
-};
-
-DH *choose_dh(int, int, int);
-DH *dh_new_group_asc(const char *, const char *);
-DH *dh_new_group(BIGNUM *, BIGNUM *);
-DH *dh_new_group1(void);
-
-void dh_gen_key(DH *, int);
-int dh_pub_is_valid(DH *, BIGNUM *);
-
-int dh_estimate(int);
-
-#define DH_GRP_MIN 1024
-#define DH_GRP_MAX 8192
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _DH_H */
diff --git a/usr/src/cmd/ssh/include/dirname.h b/usr/src/cmd/ssh/include/dirname.h
deleted file mode 100644
index 4201e3399d..0000000000
--- a/usr/src/cmd/ssh/include/dirname.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * XXX - Add OpenSSH copyright...
- */
-
-#ifndef _DIRNAME_H
-#define _DIRNAME_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-#ifndef HAVE_DIRNAME
-
-char *dirname(const char *path);
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _DIRNAME_H */
diff --git a/usr/src/cmd/ssh/include/dispatch.h b/usr/src/cmd/ssh/include/dispatch.h
deleted file mode 100644
index 9262c9a034..0000000000
--- a/usr/src/cmd/ssh/include/dispatch.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/* $OpenBSD: dispatch.h,v 1.9 2002/01/11 13:39:36 markus Exp $ */
-
-#ifndef _DISPATCH_H
-#define _DISPATCH_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Copyright (c) 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-enum {
- DISPATCH_BLOCK,
- DISPATCH_NONBLOCK
-};
-
-typedef void dispatch_fn(int, u_int32_t, void *);
-
-void dispatch_init(dispatch_fn *);
-void dispatch_set(int, dispatch_fn *);
-void dispatch_range(u_int, u_int, dispatch_fn *);
-void dispatch_run(int, int *, void *);
-void dispatch_protocol_error(int, u_int32_t, void *);
-void dispatch_protocol_ignore(int, u_int32_t, void *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _DISPATCH_H */
diff --git a/usr/src/cmd/ssh/include/entropy.h b/usr/src/cmd/ssh/include/entropy.h
deleted file mode 100644
index 79a2884eca..0000000000
--- a/usr/src/cmd/ssh/include/entropy.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 1999-2000 Damien Miller. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _ENTROPY_H
-#define _ENTROPY_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/* $Id: entropy.h,v 1.4 2001/02/09 01:55:36 djm Exp $ */
-
-void seed_rng(void);
-void init_rng(void);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _ENTROPY_H */
diff --git a/usr/src/cmd/ssh/include/fake-gai-errnos.h b/usr/src/cmd/ssh/include/fake-gai-errnos.h
deleted file mode 100644
index 8df9159f18..0000000000
--- a/usr/src/cmd/ssh/include/fake-gai-errnos.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * fake library for ssh
- *
- * This file is included in getaddrinfo.c and getnameinfo.c.
- * See getaddrinfo.c and getnameinfo.c.
- */
-
-#ifndef _FAKE_GAI_ERRNOS_H
-#define _FAKE_GAI_ERRNOS_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/* $Id: fake-gai-errnos.h,v 1.2 2001/02/09 01:55:36 djm Exp $ */
-
-/* for old netdb.h */
-#ifndef EAI_NODATA
-#define EAI_NODATA 1
-#define EAI_MEMORY 2
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _FAKE_GAI_ERRNOS_H */
diff --git a/usr/src/cmd/ssh/include/fake-getaddrinfo.h b/usr/src/cmd/ssh/include/fake-getaddrinfo.h
deleted file mode 100644
index b5a1a6441e..0000000000
--- a/usr/src/cmd/ssh/include/fake-getaddrinfo.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/* $Id: fake-getaddrinfo.h,v 1.2 2001/02/09 01:55:36 djm Exp $ */
-
-#ifndef _FAKE_GETADDRINFO_H
-#define _FAKE_GETADDRINFO_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "config.h"
-
-#include "fake-gai-errnos.h"
-
-#ifndef AI_PASSIVE
-# define AI_PASSIVE 1
-# define AI_CANONNAME 2
-#endif
-
-#ifndef NI_NUMERICHOST
-# define NI_NUMERICHOST 2
-# define NI_NAMEREQD 4
-# define NI_NUMERICSERV 8
-#endif
-
-#ifndef HAVE_STRUCT_ADDRINFO
-struct addrinfo {
- int ai_flags; /* AI_PASSIVE, AI_CANONNAME */
- int ai_family; /* PF_xxx */
- int ai_socktype; /* SOCK_xxx */
- int ai_protocol; /* 0 or IPPROTO_xxx for IPv4 and IPv6 */
- size_t ai_addrlen; /* length of ai_addr */
- char *ai_canonname; /* canonical name for hostname */
- struct sockaddr *ai_addr; /* binary address */
- struct addrinfo *ai_next; /* next structure in linked list */
-};
-#endif /* !HAVE_STRUCT_ADDRINFO */
-
-#ifndef HAVE_GETADDRINFO
-int getaddrinfo(const char *hostname, const char *servname,
- const struct addrinfo *hints, struct addrinfo **res);
-#endif /* !HAVE_GETADDRINFO */
-
-#ifndef HAVE_GAI_STRERROR
-char *gai_strerror(int ecode);
-#endif /* !HAVE_GAI_STRERROR */
-
-#ifndef HAVE_FREEADDRINFO
-void freeaddrinfo(struct addrinfo *ai);
-#endif /* !HAVE_FREEADDRINFO */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _FAKE_GETADDRINFO_H */
diff --git a/usr/src/cmd/ssh/include/fake-getnameinfo.h b/usr/src/cmd/ssh/include/fake-getnameinfo.h
deleted file mode 100644
index 2527882ad0..0000000000
--- a/usr/src/cmd/ssh/include/fake-getnameinfo.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/* $Id: fake-getnameinfo.h,v 1.2 2001/02/09 01:55:36 djm Exp $ */
-
-#ifndef _FAKE_GETNAMEINFO_H
-#define _FAKE_GETNAMEINFO_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "config.h"
-
-#ifndef HAVE_GETNAMEINFO
-int getnameinfo(const struct sockaddr *sa, size_t salen, char *host,
- size_t hostlen, char *serv, size_t servlen, int flags);
-#endif /* !HAVE_GETNAMEINFO */
-
-#ifndef NI_MAXSERV
-# define NI_MAXSERV 32
-#endif /* !NI_MAXSERV */
-#ifndef NI_MAXHOST
-# define NI_MAXHOST 1025
-#endif /* !NI_MAXHOST */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _FAKE_GETNAMEINFO_H */
diff --git a/usr/src/cmd/ssh/include/fake-socket.h b/usr/src/cmd/ssh/include/fake-socket.h
deleted file mode 100644
index 30444dd91c..0000000000
--- a/usr/src/cmd/ssh/include/fake-socket.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/* $Id: fake-socket.h,v 1.3 2002/04/12 03:35:40 tim Exp $ */
-
-#ifndef _FAKE_SOCKET_H
-#define _FAKE_SOCKET_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "includes.h"
-#include "sys/types.h"
-
-#ifndef HAVE_STRUCT_SOCKADDR_STORAGE
-# define _SS_MAXSIZE 128 /* Implementation specific max size */
-# define _SS_PADSIZE (_SS_MAXSIZE - sizeof (struct sockaddr))
-
-struct sockaddr_storage {
- struct sockaddr ss_sa;
- char __ss_pad2[_SS_PADSIZE];
-};
-# define ss_family ss_sa.sa_family
-#endif /* !HAVE_STRUCT_SOCKADDR_STORAGE */
-
-#ifndef IN6_IS_ADDR_LOOPBACK
-# define IN6_IS_ADDR_LOOPBACK(a) \
- (((u_int32_t *) (a))[0] == 0 && ((u_int32_t *) (a))[1] == 0 && \
- ((u_int32_t *) (a))[2] == 0 && ((u_int32_t *) (a))[3] == htonl (1))
-#endif /* !IN6_IS_ADDR_LOOPBACK */
-
-#ifndef HAVE_STRUCT_IN6_ADDR
-struct in6_addr {
- u_int8_t s6_addr[16];
-};
-#endif /* !HAVE_STRUCT_IN6_ADDR */
-
-#ifndef HAVE_STRUCT_SOCKADDR_IN6
-struct sockaddr_in6 {
- unsigned short sin6_family;
- u_int16_t sin6_port;
- u_int32_t sin6_flowinfo;
- struct in6_addr sin6_addr;
-};
-#endif /* !HAVE_STRUCT_SOCKADDR_IN6 */
-
-#ifndef AF_INET6
-/* Define it to something that should never appear */
-#define AF_INET6 AF_MAX
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _FAKE_SOCKET_H */
diff --git a/usr/src/cmd/ssh/include/g11n.h b/usr/src/cmd/ssh/include/g11n.h
deleted file mode 100644
index 64acf8e630..0000000000
--- a/usr/src/cmd/ssh/include/g11n.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- *
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _G11N_H
-#define _G11N_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-#include "includes.h"
-#include <sys/types.h>
-
-/*
- * Functions for language tag negotiation
- */
-
-/* boolean */
-uint_t g11n_langtag_is_default(char *langtag);
-
-/* return 0 if not, 1 if yes, 2 if the country is matched too */
-uint_t g11n_langtag_matches_locale(char *langtag, char *locale);
-
-/* get current locale */
-char *g11n_getlocale();
-
-/* get current locale */
-void g11n_setlocale(int category, const char *locale);
-
-/* get list of locales - returns pointer to array of pointers to char */
-char **g11n_getlocales();
-
-/* get list of langs spoken by the user, from SSH_LANGS env var */
-char *g11n_getlangs();
-
-/* make a comma-separated list of language tags from list of locales */
-char *g11n_locales2langs(char **locale_set);
-
-int g11n_langtag_match(char *langtag1, char *langtag2);
-
-/* intersect comma-separated lists of IETF language tags */
-char *g11n_langtag_set_intersect(char *set1, char *set2);
-
-char *g11n_clnt_langtag_negotiate(char *clnt_langtags, char *srvr_langtags);
-
-char **g11n_langtag_set_locale_set_intersect(char *langtag_set,
- char **locale_set);
-
-char *g11n_srvr_locale_negotiate(char *clnt_langtags, char **srvr_locales);
-
-/* auxiliary functions */
-void g11n_freelist(char **list);
-
-/*
- * Functions for converting to ASCII or UTF-8 from the local codeset
- * Functions for converting from ASCII or UTF-8 to the local codeset
- *
- * The error_str parameter is an optional pointer to a char variable
- * where to store a string suitable for use with error() or fatal() or
- * friends.
- *
- */
-extern char *g11n_convert_from_utf8(const char *str, uint_t *lenp,
- char **error_str);
-
-extern char *g11n_convert_to_utf8(const char *str, uint_t *lenp, int native,
- char **error_str);
-
-extern char *g11n_filter_string(char *);
-extern void g11n_test_langtag(const char *, int);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _G11N_H */
diff --git a/usr/src/cmd/ssh/include/getcwd.h b/usr/src/cmd/ssh/include/getcwd.h
deleted file mode 100644
index 2d9069e6e4..0000000000
--- a/usr/src/cmd/ssh/include/getcwd.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* $Id: getcwd.h,v 1.2 2001/02/09 01:55:36 djm Exp $ */
-
-#ifndef _GETCWD_H
-#define _GETCWD_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "config.h"
-
-#if !defined(HAVE_GETCWD)
-
-char *getcwd(char *pt, size_t size);
-
-#endif /* !defined(HAVE_GETCWD) */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _GETCWD_H */
diff --git a/usr/src/cmd/ssh/include/getgrouplist.h b/usr/src/cmd/ssh/include/getgrouplist.h
deleted file mode 100644
index 587402800f..0000000000
--- a/usr/src/cmd/ssh/include/getgrouplist.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/* $Id: getgrouplist.h,v 1.2 2001/02/09 01:55:36 djm Exp $ */
-
-#ifndef _GETGROUPLIST_H
-#define _GETGROUPLIST_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "config.h"
-
-#ifndef HAVE_GETGROUPLIST
-
-#include <grp.h>
-
-int getgrouplist(const char *, gid_t, gid_t *, int *);
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _GETGROUPLIST_H */
diff --git a/usr/src/cmd/ssh/include/getopt.h b/usr/src/cmd/ssh/include/getopt.h
deleted file mode 100644
index 7ae4576d0c..0000000000
--- a/usr/src/cmd/ssh/include/getopt.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* $Id: getopt.h,v 1.4 2001/09/18 05:05:21 djm Exp $ */
-
-#ifndef _GETOPT_H
-#define _GETOPT_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "config.h"
-
-#if !defined(HAVE_GETOPT) || !defined(HAVE_GETOPT_OPTRESET)
-
-int BSDgetopt(int argc, char * const *argv, const char *opts);
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _GETOPT_H */
diff --git a/usr/src/cmd/ssh/include/getput.h b/usr/src/cmd/ssh/include/getput.h
deleted file mode 100644
index 1a33883507..0000000000
--- a/usr/src/cmd/ssh/include/getput.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/* $OpenBSD: getput.h,v 1.8 2002/03/04 17:27:39 stevesk Exp $ */
-
-#ifndef _GETPUT_H
-#define _GETPUT_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Macros for storing and retrieving data in msb first and lsb first order.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-/*------------ macros for storing/extracting msb first words -------------*/
-
-#define GET_64BIT(cp) (((u_int64_t)(u_char)(cp)[0] << 56) | \
- ((u_int64_t)(u_char)(cp)[1] << 48) | \
- ((u_int64_t)(u_char)(cp)[2] << 40) | \
- ((u_int64_t)(u_char)(cp)[3] << 32) | \
- ((u_int64_t)(u_char)(cp)[4] << 24) | \
- ((u_int64_t)(u_char)(cp)[5] << 16) | \
- ((u_int64_t)(u_char)(cp)[6] << 8) | \
- ((u_int64_t)(u_char)(cp)[7]))
-
-#define GET_32BIT(cp) (((u_long)(u_char)(cp)[0] << 24) | \
- ((u_long)(u_char)(cp)[1] << 16) | \
- ((u_long)(u_char)(cp)[2] << 8) | \
- ((u_long)(u_char)(cp)[3]))
-
-#define GET_16BIT(cp) (((u_long)(u_char)(cp)[0] << 8) | \
- ((u_long)(u_char)(cp)[1]))
-
-#define PUT_64BIT(cp, value) do { \
- (cp)[0] = (value) >> 56; \
- (cp)[1] = (value) >> 48; \
- (cp)[2] = (value) >> 40; \
- (cp)[3] = (value) >> 32; \
- (cp)[4] = (value) >> 24; \
- (cp)[5] = (value) >> 16; \
- (cp)[6] = (value) >> 8; \
- (cp)[7] = (value); } while (0)
-
-#define PUT_32BIT(cp, value) do { \
- (cp)[0] = (value) >> 24; \
- (cp)[1] = (value) >> 16; \
- (cp)[2] = (value) >> 8; \
- (cp)[3] = (value); } while (0)
-
-#define PUT_16BIT(cp, value) do { \
- (cp)[0] = (value) >> 8; \
- (cp)[1] = (value); } while (0)
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _GETPUT_H */
diff --git a/usr/src/cmd/ssh/include/groupaccess.h b/usr/src/cmd/ssh/include/groupaccess.h
deleted file mode 100644
index bc40898888..0000000000
--- a/usr/src/cmd/ssh/include/groupaccess.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* $OpenBSD: groupaccess.h,v 1.4 2001/06/26 17:27:23 markus Exp $ */
-
-#ifndef _GROUPACCESS_H
-#define _GROUPACCESS_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Copyright (c) 2001 Kevin Steves. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <grp.h>
-
-int ga_init(const char *, gid_t);
-int ga_match(char * const *, int);
-int ga_match_pattern_list(const char *);
-void ga_free(void);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _GROUPACCESS_H */
diff --git a/usr/src/cmd/ssh/include/hostfile.h b/usr/src/cmd/ssh/include/hostfile.h
deleted file mode 100644
index 3a2c7e6a06..0000000000
--- a/usr/src/cmd/ssh/include/hostfile.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-#ifndef _HOSTFILE_H
-#define _HOSTFILE_H
-
-/* $OpenBSD: hostfile.h,v 1.12 2002/09/08 20:24:08 markus Exp $ */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef enum {
- HOST_OK, HOST_NEW, HOST_CHANGED, HOST_FOUND
-} HostStatus;
-
-int hostfile_read_key(char **, u_int *, Key *);
-HostStatus
-check_host_in_hostfile(const char *, const char *, const Key *, Key *, int *);
-int add_host_to_hostfile(const char *, const char *, const Key *, int);
-int
-lookup_key_in_hostfile_by_type(const char *, const char *, int , Key *, int *);
-
-#define HASH_MAGIC "|1|"
-#define HASH_DELIM '|'
-
-char *host_hash(const char *, const char *, u_int);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _HOSTFILE_H */
diff --git a/usr/src/cmd/ssh/include/includes.h b/usr/src/cmd/ssh/include/includes.h
deleted file mode 100644
index ecd55ea737..0000000000
--- a/usr/src/cmd/ssh/include/includes.h
+++ /dev/null
@@ -1,216 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * This file includes most of the needed system headers.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _INCLUDES_H
-#define _INCLUDES_H
-
-/* $OpenBSD: includes.h,v 1.17 2002/01/26 16:44:22 stevesk Exp $ */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define RCSID(msg) \
-static const char *const rcsid[] = { (char *)rcsid, "\100(#)" msg }
-
-#include "config.h"
-
-#include <stdio.h>
-#include <ctype.h>
-#include <errno.h>
-#include <fcntl.h> /* For O_NONBLOCK */
-#include <signal.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdarg.h>
-#include <pwd.h>
-#include <grp.h>
-#include <time.h>
-#include <dirent.h>
-#include <libintl.h>
-#include <locale.h>
-
-#ifdef HAVE_LIMITS_H
-#include <limits.h> /* For PATH_MAX */
-#endif
-
-#ifdef HAVE_GETOPT_H
-#include <getopt.h>
-#endif
-
-#ifdef HAVE_BSTRING_H
-#include <bstring.h>
-#endif
-
-#if defined(HAVE_GLOB_H) && defined(GLOB_HAS_ALTDIRFUNC) && \
- defined(GLOB_HAS_GL_MATCHC)
-#include <glob.h>
-#endif
-
-#ifdef HAVE_NETGROUP_H
-#include <netgroup.h>
-#endif
-
-#if defined(HAVE_NETDB_H)
-#include <netdb.h>
-#endif
-
-#ifdef HAVE_ENDIAN_H
-#include <endian.h>
-#endif
-
-#ifdef HAVE_TTYENT_H
-#include <ttyent.h>
-#endif
-
-#ifdef HAVE_UTIME_H
-#include <utime.h>
-#endif
-
-#ifdef HAVE_MAILLOCK_H
-#include <maillock.h> /* For _PATH_MAILDIR */
-#endif
-
-#include <unistd.h> /* For STDIN_FILENO, etc */
-#include <termios.h> /* Struct winsize */
-
-/*
- * *-*-nto-qnx needs these headers for strcasecmp and LASTLOG_FILE
- * respectively
- */
-#ifdef HAVE_STRINGS_H
-#include <strings.h>
-#endif
-
-#ifdef HAVE_LOGIN_H
-#include <login.h>
-#endif
-
-#ifdef HAVE_UCRED_H
-#include <ucred.h>
-#endif
-
-#ifdef HAVE_UTMP_H
-#include <utmp.h>
-#endif
-
-#ifdef HAVE_UTMPX_H
-#ifdef HAVE_TV_IN_UTMPX
-#include <sys/time.h>
-#endif
-#include <utmpx.h>
-#endif
-
-#ifdef HAVE_LASTLOG_H
-#include <lastlog.h>
-#endif
-
-#ifdef HAVE_PATHS_H
-#include <paths.h> /* For _PATH_XXX */
-#endif
-
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <sys/wait.h>
-
-#ifdef HAVE_SYS_TIME_H
-#include <sys/time.h> /* For timersub */
-#endif
-
-#include <sys/resource.h>
-#ifdef HAVE_SYS_SELECT_H
-#include <sys/select.h>
-#endif
-
-#ifdef HAVE_SYS_BSDTTY_H
-#include <sys/bsdtty.h>
-#endif
-
-#include <sys/param.h> /* For MAXPATHLEN and roundup() */
-#ifdef HAVE_SYS_UN_H
-#include <sys/un.h> /* For sockaddr_un */
-#endif
-
-#ifdef HAVE_STDINT_H
-#include <stdint.h>
-#endif
-
-#ifdef HAVE_SYS_BITYPES_H
-#include <sys/bitypes.h> /* For u_intXX_t */
-#endif
-
-#ifdef HAVE_SYS_CDEFS_H
-#include <sys/cdefs.h> /* For __P() */
-#endif
-
-#ifdef HAVE_SYS_STAT_H
-#include <sys/stat.h> /* For S_* constants and macros */
-#endif
-
-#ifdef HAVE_SYS_SYSMACROS_H
-#include <sys/sysmacros.h> /* For MIN, MAX, etc */
-#endif
-
-#ifdef HAVE_SYS_MMAN_H
-#include <sys/mman.h> /* for MAP_ANONYMOUS */
-#endif
-
-#include <netinet/in_systm.h> /* For typedefs */
-#include <netinet/in.h> /* For IPv6 macros */
-#include <netinet/ip.h> /* For IPTOS macros */
-#include <netinet/tcp.h>
-#include <arpa/inet.h>
-
-#ifdef HAVE_RPC_TYPES_H
-#include <rpc/types.h> /* For INADDR_LOOPBACK */
-#endif
-
-#ifdef USE_PAM
-#include <security/pam_appl.h>
-#endif
-
-#ifdef HAVE_READPASSPHRASE_H
-#include <readpassphrase.h>
-#endif
-
-#ifdef HAVE_IA_H
-#include <ia.h>
-#endif
-
-#ifdef HAVE_TMPDIR_H
-#include <tmpdir.h>
-#endif
-
-#include <openssl/opensslv.h> /* For OPENSSL_VERSION_NUMBER */
-
-#include "defines.h"
-
-#include "version.h"
-#include "openbsd-compat.h"
-#include "bsd-cygwin_util.h"
-
-#include "entropy.h"
-#include "g11n.h"
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _INCLUDES_H */
diff --git a/usr/src/cmd/ssh/include/inet_ntoa.h b/usr/src/cmd/ssh/include/inet_ntoa.h
deleted file mode 100644
index b491398f7f..0000000000
--- a/usr/src/cmd/ssh/include/inet_ntoa.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* $Id: inet_ntoa.h,v 1.2 2001/02/09 01:55:36 djm Exp $ */
-
-#ifndef _INET_NTOA_H
-#define _INET_NTOA_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "config.h"
-
-#if defined(BROKEN_INET_NTOA) || !defined(HAVE_INET_NTOA)
-char *inet_ntoa(struct in_addr in);
-#endif /* defined(BROKEN_INET_NTOA) || !defined(HAVE_INET_NTOA) */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _INET_NTOA_H */
diff --git a/usr/src/cmd/ssh/include/inet_ntop.h b/usr/src/cmd/ssh/include/inet_ntop.h
deleted file mode 100644
index 91005955f8..0000000000
--- a/usr/src/cmd/ssh/include/inet_ntop.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* $Id: inet_ntop.h,v 1.4 2001/08/09 00:56:53 mouring Exp $ */
-
-#ifndef _INET_NTOP_H
-#define _INET_NTOP_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "config.h"
-
-#ifndef HAVE_INET_NTOP
-const char *
-inet_ntop(int af, const void *src, char *dst, size_t size);
-#endif /* !HAVE_INET_NTOP */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _INET_NTOP_H */
diff --git a/usr/src/cmd/ssh/include/kex.h b/usr/src/cmd/ssh/include/kex.h
deleted file mode 100644
index 30a6cdb4a5..0000000000
--- a/usr/src/cmd/ssh/include/kex.h
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Copyright (c) 2000, 2001 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* $OpenBSD: kex.h,v 1.32 2002/09/09 14:54:14 markus Exp $ */
-
-#ifndef _KEX_H
-#define _KEX_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <openssl/evp.h>
-#include <openssl/hmac.h>
-#include "buffer.h"
-#include "cipher.h"
-#include "key.h"
-
-#ifdef GSSAPI
-#ifdef SUNW_GSSAPI
-#include <gssapi/gssapi.h>
-#include <gssapi/gssapi_ext.h>
-#else
-#ifdef GSS_KRB5
-#ifdef HEIMDAL
-#include <gssapi.h>
-#else
-#include <gssapi_generic.h>
-#endif /* HEIMDAL */
-#endif /* GSS_KRB5 */
-#endif /* SUNW_GSSAPI */
-#endif /* GSSAPI */
-
-#define KEX_DH1 "diffie-hellman-group1-sha1"
-#define KEX_DHGEX "diffie-hellman-group-exchange-sha1"
-
-enum kex_init_proposals {
- PROPOSAL_KEX_ALGS,
- PROPOSAL_SERVER_HOST_KEY_ALGS,
- PROPOSAL_ENC_ALGS_CTOS,
- PROPOSAL_ENC_ALGS_STOC,
- PROPOSAL_MAC_ALGS_CTOS,
- PROPOSAL_MAC_ALGS_STOC,
- PROPOSAL_COMP_ALGS_CTOS,
- PROPOSAL_COMP_ALGS_STOC,
- PROPOSAL_LANG_CTOS,
- PROPOSAL_LANG_STOC,
- PROPOSAL_MAX
-};
-
-enum kex_modes {
- MODE_IN,
- MODE_OUT,
- MODE_MAX
-};
-
-enum kex_exchange {
- KEX_DH_GRP1_SHA1,
- KEX_DH_GEX_SHA1,
-#ifdef GSSAPI
- KEX_GSS_GRP1_SHA1,
-#endif /* GSSAPI */
- KEX_MAX
-};
-
-
-#define KEX_INIT_SENT 0x0001
-
-typedef struct Kex Kex;
-typedef struct Mac Mac;
-typedef struct Comp Comp;
-typedef struct Enc Enc;
-typedef struct Newkeys Newkeys;
-
-struct Enc {
- char *name;
- Cipher *cipher;
- int enabled;
- u_int key_len;
- u_int block_size;
- u_char *key;
- u_char *iv;
-};
-struct Mac {
- char *name;
- int enabled;
- u_int mac_len;
- u_char *key;
- u_int key_len;
- int type;
- const EVP_MD *evp_md;
- HMAC_CTX evp_ctx;
-};
-struct Comp {
- int type;
- int enabled;
- char *name;
-};
-struct Newkeys {
- Enc enc;
- Mac mac;
- Comp comp;
-};
-
-struct KexOptions {
- int gss_deleg_creds;
-};
-
-struct Kex {
- u_char *session_id;
- u_int session_id_len;
- Newkeys *newkeys[MODE_MAX];
- int we_need;
- int server;
- char *serverhost;
- char *name;
- int hostkey_type;
- int kex_type;
- Buffer my;
- Buffer peer;
- int initial_kex_done;
- int done;
- int flags;
- char *client_version_string;
- char *server_version_string;
- struct KexOptions options;
- int (*verify_host_key)(Key *);
- int (*accept_host_key)(Key *); /* for GSS keyex */
- Key *(*load_host_key)(int);
- int (*host_key_index)(Key *);
- void (*kex[KEX_MAX])(Kex *);
- void (*kex_hook)(Kex *, char **); /* for GSS keyex rekeying */
-#ifdef GSSAPI
- gss_OID_set mechs; /* mechs in my proposal */
-#endif /* GSSAPI */
-};
-
-typedef void (*Kex_hook_func)(Kex *, char **); /* for GSS-API rekeying */
-
-Kex *kex_setup(const char *host,
- char *proposal[PROPOSAL_MAX],
- Kex_hook_func hook);
-void kex_start(Kex *);
-void kex_finish(Kex *);
-
-void kex_send_kexinit(Kex *);
-void kex_input_kexinit(int, u_int32_t, void *);
-void kex_derive_keys(Kex *, u_char *, BIGNUM *);
-
-Newkeys *kex_get_newkeys(int);
-
-void kexdh_client(Kex *);
-void kexdh_server(Kex *);
-void kexgex_client(Kex *);
-void kexgex_server(Kex *);
-
-u_char *
-kex_dh_hash(char *, char *, char *, int, char *, int, u_char *, int,
- BIGNUM *, BIGNUM *, BIGNUM *);
-u_char *
-kexgex_hash(char *, char *, char *, int, char *, int, u_char *, int,
- int, int, int, BIGNUM *, BIGNUM *, BIGNUM *, BIGNUM *, BIGNUM *);
-
-#ifdef GSSAPI
-void kexgss_client(Kex *);
-void kexgss_server(Kex *);
-#endif
-
-#if defined(DEBUG_KEX) || defined(DEBUG_KEXDH)
-void dump_digest(char *, u_char *, int);
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _KEX_H */
diff --git a/usr/src/cmd/ssh/include/key.h b/usr/src/cmd/ssh/include/key.h
deleted file mode 100644
index 862b2d81d4..0000000000
--- a/usr/src/cmd/ssh/include/key.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2000, 2001 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _KEY_H
-#define _KEY_H
-
-/* $OpenBSD: key.h,v 1.19 2002/03/18 17:23:31 markus Exp $ */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-#include <openssl/rsa.h>
-#include <openssl/dsa.h>
-
-typedef struct Key Key;
-enum types {
- KEY_RSA1,
- KEY_RSA,
- KEY_DSA,
- KEY_NULL,
- KEY_UNSPEC
-};
-enum fp_type {
- SSH_FP_SHA1,
- SSH_FP_MD5
-};
-enum fp_rep {
- SSH_FP_HEX,
- SSH_FP_BUBBLEBABBLE
-};
-
-/* key is stored in external hardware */
-#define KEY_FLAG_EXT 0x0001
-
-struct Key {
- int type;
- int flags;
- RSA *rsa;
- DSA *dsa;
-};
-
-Key *key_new(int);
-Key *key_new_private(int);
-void key_free(Key *);
-Key *key_demote(Key *);
-int key_equal(const Key *, const Key *);
-char *key_fingerprint(Key *, enum fp_type, enum fp_rep);
-char *key_type(Key *);
-int key_write(const Key *, FILE *);
-int key_read(Key *, char **);
-u_int key_size(Key *);
-
-Key *key_generate(int, u_int);
-Key *key_from_private(Key *);
-int key_type_from_name(char *);
-
-Key *key_from_blob(u_char *, int);
-int key_to_blob(const Key *, u_char **, u_int *);
-char *key_ssh_name(const Key *);
-int key_names_valid2(const char *);
-
-int key_sign(Key *, u_char **, u_int *, u_char *, u_int);
-int key_verify(Key *, u_char *, u_int, u_char *, u_int);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _KEY_H */
diff --git a/usr/src/cmd/ssh/include/log.h b/usr/src/cmd/ssh/include/log.h
deleted file mode 100644
index 34bc82f5e3..0000000000
--- a/usr/src/cmd/ssh/include/log.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-/* $OpenBSD: log.h,v 1.8 2002/07/19 15:43:33 markus Exp $ */
-
-#ifndef _LOG_H
-#define _LOG_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/* $OpenBSD: log.h,v 1.8 2002/07/19 15:43:33 markus Exp $ */
-
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-#include <syslog.h> /* Needed for LOG_AUTHPRIV (if present) */
-
-/* Supported syslog facilities and levels. */
-typedef enum {
- SYSLOG_FACILITY_DAEMON,
- SYSLOG_FACILITY_USER,
- SYSLOG_FACILITY_AUTH,
-#ifdef LOG_AUTHPRIV
- SYSLOG_FACILITY_AUTHPRIV,
-#endif
- SYSLOG_FACILITY_LOCAL0,
- SYSLOG_FACILITY_LOCAL1,
- SYSLOG_FACILITY_LOCAL2,
- SYSLOG_FACILITY_LOCAL3,
- SYSLOG_FACILITY_LOCAL4,
- SYSLOG_FACILITY_LOCAL5,
- SYSLOG_FACILITY_LOCAL6,
- SYSLOG_FACILITY_LOCAL7,
- SYSLOG_FACILITY_NOT_SET = -1
-} SyslogFacility;
-
-typedef enum {
- SYSLOG_LEVEL_QUIET,
- SYSLOG_LEVEL_FATAL,
- SYSLOG_LEVEL_ERROR,
- SYSLOG_LEVEL_NOTICE,
- SYSLOG_LEVEL_INFO,
- SYSLOG_LEVEL_VERBOSE,
- SYSLOG_LEVEL_DEBUG1,
- SYSLOG_LEVEL_DEBUG2,
- SYSLOG_LEVEL_DEBUG3,
- SYSLOG_LEVEL_NOT_SET = -1
-} LogLevel;
-
-void log_init(char *, LogLevel, SyslogFacility, int);
-
-SyslogFacility log_facility_number(char *);
-LogLevel log_level_number(char *);
-
-void set_log_txt_prefix(const char *);
-void fatal(const char *, ...) __attribute__((format(printf, 1, 2)));
-void error(const char *, ...) __attribute__((format(printf, 1, 2)));
-void notice(const char *, ...) __attribute__((format(printf, 1, 2)));
-void log(const char *, ...) __attribute__((format(printf, 1, 2)));
-void verbose(const char *, ...) __attribute__((format(printf, 1, 2)));
-void debug(const char *, ...) __attribute__((format(printf, 1, 2)));
-void debug2(const char *, ...) __attribute__((format(printf, 1, 2)));
-void debug3(const char *, ...) __attribute__((format(printf, 1, 2)));
-
-void fatal_cleanup(void);
-void fatal_add_cleanup(void (*) (void *), void *);
-void fatal_remove_cleanup(void (*) (void *), void *);
-void fatal_remove_all_cleanups(void);
-
-void do_log(LogLevel, const char *, va_list);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _LOG_H */
diff --git a/usr/src/cmd/ssh/include/loginrec.h b/usr/src/cmd/ssh/include/loginrec.h
deleted file mode 100644
index 8086d96686..0000000000
--- a/usr/src/cmd/ssh/include/loginrec.h
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * Copyright (c) 2000 Andre Lucas. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Markus Friedl.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * loginrec.h: platform-independent login recording and lastlog retrieval
- */
-
-#ifndef _LOGINREC_H
-#define _LOGINREC_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "includes.h"
-
-#include <sys/types.h>
-#include <netinet/in.h>
-#include <sys/socket.h>
-
-/* RCSID("$Id: loginrec.h,v 1.6 2001/05/08 20:33:06 mouring Exp $"); */
-
-/**
- ** you should use the login_* calls to work around platform dependencies
- **/
-
-/*
- * login_netinfo structure
- */
-
-union login_netinfo {
- struct sockaddr sa;
- struct sockaddr_in sa_in;
- struct sockaddr_storage sa_storage;
-};
-
-/*
- * * logininfo structure *
- */
-/* types - different to utmp.h 'type' macros */
-/* (though set to the same value as linux, openbsd and others...) */
-#define LTYPE_LOGIN 7
-#define LTYPE_LOGOUT 8
-
-/* string lengths - set very long */
-#define LINFO_PROGSIZE 64
-#define LINFO_LINESIZE 64
-#define LINFO_NAMESIZE 64
-#define LINFO_HOSTSIZE 256
-
-struct logininfo {
- int progname_null;
- char progname[LINFO_PROGSIZE]; /* name of program (for PAM) */
- short int type; /* type of login (LTYPE_*) */
- int pid; /* PID of login process */
- int uid; /* UID of this user */
- int line_null;
- char line[LINFO_LINESIZE]; /* tty/pty name */
- char username[LINFO_NAMESIZE]; /* login username */
- char hostname[LINFO_HOSTSIZE]; /* remote hostname */
- /* 'exit_status' structure components */
- int exit; /* process exit status */
- int termination; /* process termination status */
- /* struct timeval (sys/time.h) isn't always available, if it isn't we'll
- * use time_t's value as tv_sec and set tv_usec to 0
- */
- unsigned int tv_sec;
- unsigned int tv_usec;
- union login_netinfo hostaddr; /* caller's host address(es) */
-}; /* struct logininfo */
-
-/*
- * login recording functions
- */
-
-/** 'public' functions */
-
-/* construct a new login entry */
-struct logininfo *login_alloc_entry(int pid, const char *username,
- const char *hostname, const char *line,
- const char *progname);
-/* free a structure */
-void login_free_entry(struct logininfo *li);
-/* fill out a pre-allocated structure with useful information */
-int login_init_entry(struct logininfo *li, int pid, const char *username,
- const char *hostname, const char *line,
- const char *progname);
-/* place the current time in a logininfo struct */
-void login_set_current_time(struct logininfo *li);
-
-/* record the entry */
-int login_login (struct logininfo *li);
-int login_logout(struct logininfo *li);
-#ifdef LOGIN_NEEDS_UTMPX
-int login_utmp_only(struct logininfo *li);
-#endif
-
-/** End of public functions */
-
-/* record the entry */
-int login_write (struct logininfo *li);
-int login_log_entry(struct logininfo *li);
-
-/* set the network address based on network address type */
-void login_set_addr(struct logininfo *li, const struct sockaddr *sa,
- const unsigned int sa_size);
-
-/*
- * lastlog retrieval functions
- */
-/* lastlog *entry* functions fill out a logininfo */
-struct logininfo *login_get_lastlog(struct logininfo *li, const int uid);
-/* lastlog *time* functions return time_t equivalent (uint) */
-unsigned int login_get_lastlog_time(const int uid);
-
-/* produce various forms of the line filename */
-char *line_fullname(char *dst, const char *src, int dstsize);
-char *line_stripname(char *dst, const char *src, int dstsize);
-char *line_abbrevname(char *dst, const char *src, int dstsize);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _LOGINREC_H */
diff --git a/usr/src/cmd/ssh/include/mac.h b/usr/src/cmd/ssh/include/mac.h
deleted file mode 100644
index cc05d332ae..0000000000
--- a/usr/src/cmd/ssh/include/mac.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2001 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* $OpenBSD: mac.h,v 1.3 2001/06/26 17:27:24 markus Exp $ */
-
-#ifndef _MAC_H
-#define _MAC_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-int mac_valid(const char *);
-int mac_setup(Mac *, char *);
-int mac_init(Mac *);
-u_char *mac_compute(Mac *, u_int32_t, u_char *, int);
-void mac_clear(Mac *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _MAC_H */
diff --git a/usr/src/cmd/ssh/include/match.h b/usr/src/cmd/ssh/include/match.h
deleted file mode 100644
index 81729c33da..0000000000
--- a/usr/src/cmd/ssh/include/match.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* $OpenBSD: match.h,v 1.12 2002/03/01 13:12:10 markus Exp $ */
-
-#ifndef _MATCH_H
-#define _MATCH_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-int match_pattern(const char *, const char *);
-int match_pattern_list(const char *, const char *, u_int, int);
-int match_hostname(const char *, const char *, u_int);
-int match_host_and_ip(const char *, const char *, const char *);
-int match_user(const char *, const char *, const char *, const char *);
-char *match_list(const char *, const char *, u_int *);
-
-/* addrmatch.c */
-int addr_match_list(const char *, const char *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _MATCH_H */
diff --git a/usr/src/cmd/ssh/include/misc.h b/usr/src/cmd/ssh/include/misc.h
deleted file mode 100644
index c626301f84..0000000000
--- a/usr/src/cmd/ssh/include/misc.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _MISC_H
-#define _MISC_H
-
-/* $OpenBSD: misc.h,v 1.12 2002/03/19 10:49:35 markus Exp $ */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-char *chop(char *);
-char *strdelim(char **);
-void set_nonblock(int);
-void unset_nonblock(int);
-void set_nodelay(int);
-int a2port(const char *);
-char *cleanhostname(char *);
-char *hpdelim(char **);
-char *colon(char *);
-long convtime(const char *);
-char *percent_expand(const char *, ...);
-char *tohex(const void *, size_t);
-void sanitise_stdfd(void);
-int get_yes_no_flag(int *option, const char *arg, const char *filename,
- int linenum, int active);
-char *tolowercase(const char *s);
-
-struct passwd *pwcopy(struct passwd *);
-void pwfree(struct passwd **);
-
-typedef struct arglist arglist;
-struct arglist {
- char **list;
- int num;
- int nalloc;
-};
-void addargs(arglist *, char *, ...) __attribute__((format(printf, 2, 3)));
-void replacearg(arglist *, u_int, char *, ...)
- __attribute__((format(printf, 3, 4)));
-void freeargs(arglist *);
-
-/* wrapper for signal interface */
-typedef void (*mysig_t)(int);
-mysig_t mysignal(int sig, mysig_t act);
-
-/* Functions to extract or store big-endian words of various sizes */
-u_int64_t get_u64(const void *)
- __attribute__((__bounded__( __minbytes__, 1, 8)));
-u_int32_t get_u32(const void *)
- __attribute__((__bounded__( __minbytes__, 1, 4)));
-u_int16_t get_u16(const void *)
- __attribute__((__bounded__( __minbytes__, 1, 2)));
-void put_u64(void *, u_int64_t)
- __attribute__((__bounded__( __minbytes__, 1, 8)));
-void put_u32(void *, u_int32_t)
- __attribute__((__bounded__( __minbytes__, 1, 4)));
-void put_u16(void *, u_int16_t)
- __attribute__((__bounded__( __minbytes__, 1, 2)));
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _MISC_H */
diff --git a/usr/src/cmd/ssh/include/mktemp.h b/usr/src/cmd/ssh/include/mktemp.h
deleted file mode 100644
index b93f2b5c10..0000000000
--- a/usr/src/cmd/ssh/include/mktemp.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* $Id: mktemp.h,v 1.2 2001/02/09 01:55:36 djm Exp $ */
-
-#ifndef _MKTEMP_H
-#define _MKTEMP_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "config.h"
-#ifndef HAVE_MKDTEMP
-int mkstemps(char *path, int slen);
-int mkstemp(char *path);
-char *mkdtemp(char *path);
-#endif /* !HAVE_MKDTEMP */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _MKTEMP_H */
diff --git a/usr/src/cmd/ssh/include/mpaux.h b/usr/src/cmd/ssh/include/mpaux.h
deleted file mode 100644
index 6577a9b05d..0000000000
--- a/usr/src/cmd/ssh/include/mpaux.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/* $OpenBSD: mpaux.h,v 1.12 2002/03/04 17:27:39 stevesk Exp $ */
-
-#ifndef _MPAUX_H
-#define _MPAUX_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * This file contains various auxiliary functions related to multiple
- * precision integers.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-void compute_session_id(u_char[16], u_char[8], BIGNUM *, BIGNUM *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _MPAUX_H */
diff --git a/usr/src/cmd/ssh/include/msg.h b/usr/src/cmd/ssh/include/msg.h
deleted file mode 100644
index c04f7d48b2..0000000000
--- a/usr/src/cmd/ssh/include/msg.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* $OpenBSD: msg.h,v 1.1 2002/05/23 19:24:30 markus Exp $ */
-/*
- * Copyright (c) 2002 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _MSG_H
-#define _MSG_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-void ssh_msg_send(int, u_char, Buffer *);
-int ssh_msg_recv(int, Buffer *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _MSG_H */
diff --git a/usr/src/cmd/ssh/include/myproposal.h b/usr/src/cmd/ssh/include/myproposal.h
deleted file mode 100644
index dc0043e49a..0000000000
--- a/usr/src/cmd/ssh/include/myproposal.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* $OpenBSD: myproposal.h,v 1.14 2002/04/03 09:26:11 markus Exp $ */
-
-#ifndef _MYPROPOSAL_H
-#define _MYPROPOSAL_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-#define KEX_DEFAULT_KEX "diffie-hellman-group-exchange-sha1," \
- "diffie-hellman-group1-sha1"
-
-#define KEX_DEFAULT_PK_ALG "ssh-rsa,ssh-dss"
-
-/*
- * Keep CBC modes in the back of the client default cipher list for backward
- * compatibility but remove them from the server side because there are some
- * potential security issues with those modes regarding SSH protocol version 2.
- * Since the client is the one who picks the cipher from the list offered by the
- * server the only way to force the client not to use CBC modes is not to
- * advertise those at all. Note that we still support all such CBC modes in the
- * server code, this is about the default server cipher list only. The list can
- * be changed in the Ciphers option in the sshd_config(4) file.
- *
- * Note that the ordering of ciphers on the server side is not relevant but we
- * must do it properly even here so that we can use the macro for the client
- * list as well.
- */
-#define KEX_DEFAULT_SERVER_ENCRYPT "aes128-ctr,aes192-ctr,aes256-ctr," \
- "arcfour128,arcfour256,arcfour"
-
-#define KEX_DEFAULT_CLIENT_ENCRYPT KEX_DEFAULT_SERVER_ENCRYPT \
- ",aes128-cbc,aes192-cbc,aes256-cbc," \
- "blowfish-cbc,3des-cbc"
-
-#define KEX_DEFAULT_MAC "hmac-md5,hmac-sha1,hmac-sha1-96," \
- "hmac-md5-96"
-
-#define KEX_DEFAULT_COMP "none,zlib"
-#define KEX_DEFAULT_LANG ""
-
-
-static char *my_srv_proposal[PROPOSAL_MAX] = {
- KEX_DEFAULT_KEX,
- KEX_DEFAULT_PK_ALG,
- KEX_DEFAULT_SERVER_ENCRYPT,
- KEX_DEFAULT_SERVER_ENCRYPT,
- KEX_DEFAULT_MAC,
- KEX_DEFAULT_MAC,
- KEX_DEFAULT_COMP,
- KEX_DEFAULT_COMP,
- KEX_DEFAULT_LANG,
- KEX_DEFAULT_LANG
-};
-
-static char *my_clnt_proposal[PROPOSAL_MAX] = {
- KEX_DEFAULT_KEX,
- KEX_DEFAULT_PK_ALG,
- KEX_DEFAULT_CLIENT_ENCRYPT,
- KEX_DEFAULT_CLIENT_ENCRYPT,
- KEX_DEFAULT_MAC,
- KEX_DEFAULT_MAC,
- KEX_DEFAULT_COMP,
- KEX_DEFAULT_COMP,
- KEX_DEFAULT_LANG,
- KEX_DEFAULT_LANG
-};
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _MYPROPOSAL_H */
diff --git a/usr/src/cmd/ssh/include/openbsd-compat.h b/usr/src/cmd/ssh/include/openbsd-compat.h
deleted file mode 100644
index aab8e36c62..0000000000
--- a/usr/src/cmd/ssh/include/openbsd-compat.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 1999-2003 Damien Miller. All rights reserved.
- * Copyright (c) 2003 Ben Lindstrom. All rights reserved.
- * Copyright (c) 2002 Tim Rice. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _OPENBSD_COMPAT_H
-#define _OPENBSD_COMPAT_H
-
-/* $Id: openbsd-compat.h,v 1.17 2002/09/12 00:33:02 djm Exp $ */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "config.h"
-
-/* OpenBSD function replacements */
-#include "bindresvport.h"
-#include "getcwd.h"
-#include "realpath.h"
-#include "rresvport.h"
-#include "strlcpy.h"
-#include "strlcat.h"
-#include "strmode.h"
-#include "mktemp.h"
-#include "dirname.h"
-#include "base64.h"
-#include "sigact.h"
-#include "inet_ntoa.h"
-#include "inet_ntop.h"
-#include "setproctitle.h"
-#include "getgrouplist.h"
-#include "glob.h"
-#include "readpassphrase.h"
-#include "getopt.h"
-
-/* Home grown routines */
-#include "bsd-arc4random.h"
-#include "bsd-getpeereid.h"
-#include "bsd-misc.h"
-#include "bsd-snprintf.h"
-#include "bsd-waitpid.h"
-
-/* rfc2553 socket API replacements */
-#include "fake-getaddrinfo.h"
-#include "fake-getnameinfo.h"
-#include "fake-socket.h"
-
-/* Routines for a single OS platform */
-#include "bsd-cray.h"
-#include "port-irix.h"
-#include "port-aix.h"
-
-#ifndef HAVE_VASPRINTF
-int vasprintf(char **, const char *, va_list);
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _OPENBSD_COMPAT_H */
diff --git a/usr/src/cmd/ssh/include/packet.h b/usr/src/cmd/ssh/include/packet.h
deleted file mode 100644
index 26c3f8843e..0000000000
--- a/usr/src/cmd/ssh/include/packet.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Interface for the packet protocol functions.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _PACKET_H
-#define _PACKET_H
-
-/* $OpenBSD: packet.h,v 1.35 2002/06/19 18:01:00 markus Exp $ */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-#include <openssl/bn.h>
-#include "kex.h"
-
-#ifdef ALTPRIVSEP
-/* Monitor-side functions */
-void packet_set_server(void);
-void packet_set_no_monitor(void);
-void packet_set_monitor(int pip_fd);
-int packet_is_server(void);
-int packet_is_monitor(void);
-void packet_set_packet(const void *buf, u_int len);
-void packet_set_fds(int fd, int restore);
-#endif /* ALTPRIVSEP */
-
-void packet_set_connection(int, int);
-void packet_set_nonblocking(void);
-int packet_get_connection_in(void);
-int packet_get_connection_out(void);
-void packet_close(void);
-void packet_set_encryption_key(const u_char *, u_int, int);
-u_int packet_get_encryption_key(u_char *);
-void packet_set_protocol_flags(u_int);
-u_int packet_get_protocol_flags(void);
-void packet_start_compression(int);
-void packet_set_interactive(int);
-int packet_is_interactive(void);
-
-void packet_start(u_char);
-void packet_put_char(int ch);
-void packet_put_int(u_int value);
-void packet_put_bignum(BIGNUM * value);
-void packet_put_bignum2(BIGNUM * value);
-void packet_put_string(const void *buf, u_int len);
-void packet_put_cstring(const char *str);
-void packet_put_raw(const void *buf, u_int len);
-void packet_send(void);
-
-void packet_put_utf8_string(const char *str, uint_t len);
-void packet_put_utf8_cstring(const char *str);
-
-int packet_read(void);
-void packet_read_expect(int type);
-int packet_read_poll(void);
-void packet_process_incoming(const char *buf, u_int len);
-int packet_read_seqnr(u_int32_t *seqnr_p);
-int packet_read_poll_seqnr(u_int32_t *seqnr_p);
-
-u_int packet_get_char(void);
-u_int packet_get_int(void);
-void packet_get_bignum(BIGNUM * value);
-void packet_get_bignum2(BIGNUM * value);
-void *packet_get_raw(u_int *length_ptr);
-void *packet_get_string(u_int *length_ptr);
-char *packet_get_utf8_string(uint_t *length_ptr);
-void packet_disconnect(const char *fmt,...) __attribute__((format(printf, 1, 2)));
-void packet_send_debug(const char *fmt,...) __attribute__((format(printf, 1, 2)));
-
-void set_newkeys(int mode);
-void free_keys(Newkeys *keys);
-
-void packet_write_poll(void);
-void packet_write_wait(void);
-int packet_have_data_to_write(void);
-int packet_not_very_much_data_to_write(void);
-
-int packet_connection_is_on_socket(void);
-int packet_connection_is_ipv4(void);
-int packet_remaining(void);
-void packet_send_ignore(int);
-void packet_add_padding(u_char);
-
-void tty_make_modes(int, struct termios *);
-void tty_parse_modes(int, int *);
-
-extern int max_packet_size;
-int packet_set_maxsize(int);
-#define packet_get_maxsize() max_packet_size
-
-/* don't allow remaining bytes after the end of the message */
-#define packet_check_eom() \
-do { \
- int _len = packet_remaining(); \
- if (_len > 0) { \
- log("Packet integrity error (%d bytes remaining) at %s:%d", \
- _len ,__FILE__, __LINE__); \
- packet_disconnect("Packet integrity error."); \
- } \
-} while (0)
-
-int packet_need_rekeying(void);
-void packet_set_rekey_limit(u_int32_t);
-
-/* see a comment attached to will_daemonize in packet.c for more information */
-#define NOT_DAEMONIZING 0
-#define DAEMONIZING_REQUESTED 1
-#define FIRST_NEWKEYS_PROCESSED 2
-#define SECOND_NEWKEYS_PROCESSED 3
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _PACKET_H */
diff --git a/usr/src/cmd/ssh/include/pathnames.h b/usr/src/cmd/ssh/include/pathnames.h
deleted file mode 100644
index 4c6638387d..0000000000
--- a/usr/src/cmd/ssh/include/pathnames.h
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-/* $OpenBSD: pathnames.h,v 1.13 2002/05/23 19:24:30 markus Exp $ */
-
-#ifndef _PATHNAMES_H
-#define _PATHNAMES_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-#define ETCDIR "/etc"
-
-#ifndef SSHDIR
-#define SSHDIR ETCDIR "/ssh"
-#endif
-
-#ifndef _PATH_SSH_PIDDIR
-#define _PATH_SSH_PIDDIR "/var/run"
-#endif
-
-/*
- * System-wide file containing host keys of known hosts. This file should be
- * world-readable.
- */
-#define _PATH_SSH_SYSTEM_HOSTFILE SSHDIR "/ssh_known_hosts"
-/* backward compat for protocol 2 */
-#define _PATH_SSH_SYSTEM_HOSTFILE2 SSHDIR "/ssh_known_hosts2"
-
-/*
- * Of these, ssh_host_key must be readable only by root, whereas ssh_config
- * should be world-readable.
- */
-#define _PATH_SERVER_CONFIG_FILE SSHDIR "/sshd_config"
-#define _PATH_HOST_CONFIG_FILE SSHDIR "/ssh_config"
-#define _PATH_HOST_KEY_FILE SSHDIR "/ssh_host_key"
-#define _PATH_HOST_DSA_KEY_FILE SSHDIR "/ssh_host_dsa_key"
-#define _PATH_HOST_RSA_KEY_FILE SSHDIR "/ssh_host_rsa_key"
-#define _PATH_DH_MODULI SSHDIR "/moduli"
-/* Backwards compatibility */
-#define _PATH_DH_PRIMES SSHDIR "/primes"
-
-#ifndef _PATH_SSH_PROGRAM
-#define _PATH_SSH_PROGRAM "/usr/bin/ssh"
-#endif
-
-/*
- * The process id of the daemon listening for connections is saved here to
- * make it easier to kill the correct daemon when necessary.
- */
-#define _PATH_SSH_DAEMON_PID_FILE _PATH_SSH_PIDDIR "/sshd.pid"
-
-/*
- * The directory in user\'s home directory in which the files reside. The
- * directory should be world-readable (though not all files are).
- */
-#define _PATH_SSH_USER_DIR ".ssh"
-
-/*
- * Per-user file containing host keys of known hosts. This file need not be
- * readable by anyone except the user him/herself, though this does not
- * contain anything particularly secret.
- */
-#define _PATH_SSH_USER_HOSTFILE "~/.ssh/known_hosts"
-/* backward compat for protocol 2 */
-#define _PATH_SSH_USER_HOSTFILE2 "~/.ssh/known_hosts2"
-
-/*
- * Name of the default file containing client-side authentication key. This
- * file should only be readable by the user him/herself.
- */
-#define _PATH_SSH_CLIENT_IDENTITY ".ssh/identity"
-#define _PATH_SSH_CLIENT_ID_DSA ".ssh/id_dsa"
-#define _PATH_SSH_CLIENT_ID_RSA ".ssh/id_rsa"
-
-/*
- * Configuration file in user\'s home directory. This file need not be
- * readable by anyone but the user him/herself, but does not contain anything
- * particularly secret. If the user\'s home directory resides on an NFS
- * volume where root is mapped to nobody, this may need to be world-readable.
- */
-#define _PATH_SSH_USER_CONFFILE ".ssh/config"
-
-/*
- * File containing a list of those rsa keys that permit logging in as this
- * user. This file need not be readable by anyone but the user him/herself,
- * but does not contain anything particularly secret. If the user\'s home
- * directory resides on an NFS volume where root is mapped to nobody, this
- * may need to be world-readable. (This file is read by the daemon which is
- * running as root.)
- */
-#define _PATH_SSH_USER_PERMITTED_KEYS ".ssh/authorized_keys"
-
-/* backward compat for protocol v2 */
-#define _PATH_SSH_USER_PERMITTED_KEYS2 ".ssh/authorized_keys2"
-
-/*
- * Per-user and system-wide ssh "rc" files. These files are executed with
- * /bin/sh before starting the shell or command if they exist. They will be
- * passed "proto cookie" as arguments if X11 forwarding with spoofing is in
- * use. xauth will be run if neither of these exists.
- */
-#define _PATH_SSH_USER_RC ".ssh/rc"
-#define _PATH_SSH_SYSTEM_RC SSHDIR "/sshrc"
-
-/*
- * Ssh-only version of /etc/hosts.equiv. Additionally, the daemon may use
- * ~/.rhosts and /etc/hosts.equiv if rhosts authentication is enabled.
- */
-#define _PATH_SSH_HOSTS_EQUIV SSHDIR "/shosts.equiv"
-#define _PATH_RHOSTS_EQUIV "/etc/hosts.equiv"
-
-/*
- * /etc/default/login
- */
-#define _PATH_DEFAULT_LOGIN "/etc/default/login"
-
-/*
- * Default location of askpass
- */
-#ifndef _PATH_SSH_ASKPASS_DEFAULT
-#define _PATH_SSH_ASKPASS_DEFAULT "/usr/lib/ssh/ssh-askpass"
-#endif
-
-/* Location of ssh-keysign for hostbased authentication */
-#ifndef _PATH_SSH_KEY_SIGN
-#define _PATH_SSH_KEY_SIGN "/usr/lib/ssh/ssh-keysign"
-#endif
-
-/* xauth for X11 forwarding */
-#ifndef _PATH_XAUTH
-#define _PATH_XAUTH "/usr/openwin/bin/xauth"
-#endif
-
-/* UNIX domain socket for X11 server; displaynum will replace %u */
-#ifndef _PATH_UNIX_X
-#define _PATH_UNIX_X "/tmp/.X11-unix/X%u"
-#endif
-
-/* for scp */
-#ifndef _PATH_CP
-#define _PATH_CP "cp"
-#endif
-
-/* for sftp */
-#ifndef _PATH_SFTP_SERVER
-#define _PATH_SFTP_SERVER "/usr/lib/ssh/sftp-server"
-#endif
-
-/* chroot directory for unprivileged user when UsePrivilegeSeparation=yes */
-#ifndef _PATH_PRIVSEP_CHROOT_DIR
-#define _PATH_PRIVSEP_CHROOT_DIR "/var/empty"
-#endif
-
-#ifndef _PATH_LS
-#define _PATH_LS "ls"
-#endif
-
-/* path to login program */
-#ifndef LOGIN_PROGRAM
-# ifdef LOGIN_PROGRAM_FALLBACK
-# define LOGIN_PROGRAM LOGIN_PROGRAM_FALLBACK
-# else
-# define LOGIN_PROGRAM "/usr/bin/login"
-# endif
-#endif /* LOGIN_PROGRAM */
-
-/* Askpass program define */
-#ifndef ASKPASS_PROGRAM
-#define ASKPASS_PROGRAM "/usr/lib/ssh/ssh-askpass"
-#endif /* ASKPASS_PROGRAM */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _PATHNAMES_H */
diff --git a/usr/src/cmd/ssh/include/port-aix.h b/usr/src/cmd/ssh/include/port-aix.h
deleted file mode 100644
index e14fb811b8..0000000000
--- a/usr/src/cmd/ssh/include/port-aix.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- *
- * Copyright (c) 2001 Gert Doering. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#ifndef _PORT_AIX_H
-#define _PORT_AIX_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-#ifdef _AIX
-void aix_usrinfo(struct passwd *pw);
-#endif /* _AIX */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _PORT_AIX_H */
diff --git a/usr/src/cmd/ssh/include/port-irix.h b/usr/src/cmd/ssh/include/port-irix.h
deleted file mode 100644
index 79397e0674..0000000000
--- a/usr/src/cmd/ssh/include/port-irix.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * XXX - Add OpenSSH copyright
- */
-
-#ifndef _PORT_IRIX_H
-#define _PORT_IRIX_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-#if defined(WITH_IRIX_PROJECT) || defined(WITH_IRIX_JOBS) || defined(WITH_IRIX_ARRAY)
-
-void irix_setusercontext(struct passwd *pw);
-
-#endif /* defined(WITH_IRIX_PROJECT) || defined(WITH_IRIX_JOBS) || defined(WITH_IRIX_ARRAY) */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _PORT_IRIX_H */
diff --git a/usr/src/cmd/ssh/include/progressmeter.h b/usr/src/cmd/ssh/include/progressmeter.h
deleted file mode 100644
index d8a3bc6737..0000000000
--- a/usr/src/cmd/ssh/include/progressmeter.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2002 Nils Nordman. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _PROGRESSMETER_H
-#define _PROGRESSMETER_H
-
-/* $OpenBSD: progressmeter.h,v 1.2 2006/03/25 22:22:43 djm Exp $ */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void start_progress_meter(char *, off_t, off_t *);
-void stop_progress_meter(void);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _PROGRESSMETER_H */
diff --git a/usr/src/cmd/ssh/include/proxy-io.h b/usr/src/cmd/ssh/include/proxy-io.h
deleted file mode 100644
index e240118c8d..0000000000
--- a/usr/src/cmd/ssh/include/proxy-io.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _PROXY_IO_H
-#define _PROXY_IO_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * Read/write loop for ssh proxies.
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define BUFFER_SIZ 8192
-
-int proxy_read_write_loop(int readfd, int writefd);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _PROXY_IO_H */
diff --git a/usr/src/cmd/ssh/include/readconf.h b/usr/src/cmd/ssh/include/readconf.h
deleted file mode 100644
index 1aceb9cb7c..0000000000
--- a/usr/src/cmd/ssh/include/readconf.h
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Functions for reading the configuration file.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _READCONF_H
-#define _READCONF_H
-
-/* $OpenBSD: readconf.h,v 1.43 2002/06/08 05:17:01 markus Exp $ */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "key.h"
-
-/*
- * We accept only fixed amount of unknown options. Note that we must treat all
- * options in different Host sections separately since we need to remember the
- * line number. See IgnoreIfUnknown for more information.
- */
-#define MAX_UNKNOWN_OPTIONS 64
-
-/* Data structure for representing a forwarding request. */
-
-typedef struct {
- char *listen_host; /* Host (address) to listen on. */
- u_short listen_port; /* Port to forward. */
- char *connect_host; /* Host to connect. */
- u_short connect_port; /* Port to connect on connect_host. */
-} Forward;
-/* Data structure for representing option data. */
-
-/* For postponed processing of option keywords. */
-typedef struct {
- char *keyword; /* option keyword name */
- char *filename; /* config file it was found in */
- int linenum; /* line number in the config file */
-} StoredOption;
-
-typedef struct {
- int forward_agent; /* Forward authentication agent. */
- int forward_x11; /* Forward X11 display. */
- int forward_x11_trusted; /* Trust Forward X11 display. */
- char *xauth_location; /* Location for xauth program */
- int gateway_ports; /* Allow remote connects to forwarded ports. */
- int use_privileged_port; /* Don't use privileged port if false. */
- int rhosts_authentication; /* Try rhosts authentication. */
- int rhosts_rsa_authentication; /* Try rhosts with RSA
- * authentication. */
- int rsa_authentication; /* Try RSA authentication. */
- int pubkey_authentication; /* Try ssh2 pubkey authentication. */
- int hostbased_authentication; /* ssh2's rhosts_rsa */
- int challenge_response_authentication;
- int fallback_to_rsh; /* Use rsh if cannot connect with ssh. */
- int use_rsh; /* Always use rsh(don\'t try ssh). */
- /* Try S/Key or TIS, authentication. */
-#if defined(KRB4) || defined(KRB5)
- int kerberos_authentication; /* Try Kerberos authentication. */
-#endif
-#if defined(AFS) || defined(KRB5)
- int kerberos_tgt_passing; /* Try Kerberos TGT passing. */
-#endif
-
-#ifdef GSSAPI
- int gss_keyex;
- int gss_authentication;
- int gss_deleg_creds;
-#ifdef GSI
- int gss_globus_deleg_limited_proxy;
-#endif /* GSI */
-#endif /* GSSAPI */
-
-#ifdef AFS
- int afs_token_passing; /* Try AFS token passing. */
-#endif
- int password_authentication; /* Try password
- * authentication. */
- int kbd_interactive_authentication; /* Try keyboard-interactive auth. */
- char *kbd_interactive_devices; /* Keyboard-interactive auth devices. */
- int batch_mode; /* Batch mode: do not ask for passwords. */
- int check_host_ip; /* Also keep track of keys for IP address */
- int strict_host_key_checking; /* Strict host key checking. */
- int compression; /* Compress packets in both directions. */
- int compression_level; /* Compression level 1 (fast) to 9
- * (best). */
- int keepalives; /* Set SO_KEEPALIVE. */
- LogLevel log_level; /* Level for logging. */
-
- int port; /* Port to connect. */
- int connection_attempts; /* Max attempts (seconds) before
- * giving up */
- int connection_timeout; /* Max time (seconds) before
- * aborting connection attempt */
- int number_of_password_prompts; /* Max number of password
- * prompts. */
- int cipher; /* Cipher to use. */
- char *ciphers; /* SSH2 ciphers in order of preference. */
- char *macs; /* SSH2 macs in order of preference. */
- char *hostkeyalgorithms; /* SSH2 server key types in order of preference. */
- int protocol; /* Protocol in order of preference. */
- char *hostname; /* Real host to connect. */
- char *host_key_alias; /* hostname alias for .ssh/known_hosts */
- char *proxy_command; /* Proxy command for connecting the host. */
- char *user; /* User to log in as. */
- int escape_char; /* Escape character; -2 = none */
-
- char *system_hostfile;/* Path for /etc/ssh/ssh_known_hosts. */
- char *user_hostfile; /* Path for $HOME/.ssh/known_hosts. */
- char *system_hostfile2;
- char *user_hostfile2;
- char *preferred_authentications;
- char *bind_address; /* local socket address for connection to sshd */
- char *smartcard_device; /* Smartcard reader device */
- int disable_banner; /* Disable display of banner */
-
- /*
- * Unknown options listed in IgnoreIfUnknown will not cause ssh to
- * exit. So, we must store all unknown options here and can't process
- * them before the command line options and all config files are read
- * and IgnoreIfUnknown is properly set.
- */
- char *ignore_if_unknown;
- int unknown_opts_num;
- StoredOption unknown_opts[MAX_UNKNOWN_OPTIONS];
-
- int num_identity_files; /* Number of files for RSA/DSA identities. */
- char *identity_files[SSH_MAX_IDENTITY_FILES];
- Key *identity_keys[SSH_MAX_IDENTITY_FILES];
-
- /* Local TCP/IP forward requests. */
- int num_local_forwards;
- Forward local_forwards[SSH_MAX_FORWARDS_PER_DIRECTION];
-
- /* Remote TCP/IP forward requests. */
- int num_remote_forwards;
- Forward remote_forwards[SSH_MAX_FORWARDS_PER_DIRECTION];
- int clear_forwardings;
-
- int64_t rekey_limit;
- int no_host_authentication_for_localhost;
- int server_alive_interval;
- int server_alive_count_max;
-
- int hash_known_hosts;
- int use_openssl_engine;
-} Options;
-
-
-void initialize_options(Options *);
-void fill_default_options(Options *);
-int read_config_file(const char *, const char *, Options *);
-int parse_forward(int, Forward *, const char *);
-
-int
-process_config_line(Options *, const char *, char *, const char *, int, int *);
-
-void add_local_forward(Options *, const Forward *);
-void add_remote_forward(Options *, const Forward *);
-
-void process_unknown_options(Options *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _READCONF_H */
diff --git a/usr/src/cmd/ssh/include/readpass.h b/usr/src/cmd/ssh/include/readpass.h
deleted file mode 100644
index 1917f08e30..0000000000
--- a/usr/src/cmd/ssh/include/readpass.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* $OpenBSD: readpass.h,v 1.7 2002/03/26 15:58:46 markus Exp $ */
-
-#ifndef _READPASS_H
-#define _READPASS_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-#define RP_ECHO 0x0001
-#define RP_ALLOW_STDIN 0x0002
-#define RP_ALLOW_EOF 0x0004
-#define RP_USE_ASKPASS 0x0008
-
-char *read_passphrase(const char *, int);
-int ask_permission(const char *, ...)
- __attribute__((format(printf, 1, 2)));
-int read_keyfile_line(FILE *, const char *, char *, size_t, u_long *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _READPASS_H */
diff --git a/usr/src/cmd/ssh/include/readpassphrase.h b/usr/src/cmd/ssh/include/readpassphrase.h
deleted file mode 100644
index 3dd7b367d0..0000000000
--- a/usr/src/cmd/ssh/include/readpassphrase.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/* $OpenBSD: readpassphrase.h,v 1.3 2002/06/28 12:32:22 millert Exp $ */
-
-#ifndef _READPASSPHRASE_H
-#define _READPASSPHRASE_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Copyright (c) 2000 Todd C. Miller <Todd.Miller@courtesan.com>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-
-#ifndef HAVE_READPASSPHRASE
-
-#define RPP_ECHO_OFF 0x00 /* Turn off echo (default). */
-#define RPP_ECHO_ON 0x01 /* Leave echo on. */
-#define RPP_REQUIRE_TTY 0x02 /* Fail if there is no tty. */
-#define RPP_FORCELOWER 0x04 /* Force input to lower case. */
-#define RPP_FORCEUPPER 0x08 /* Force input to upper case. */
-#define RPP_SEVENBIT 0x10 /* Strip the high bit from input. */
-#define RPP_STDIN 0x20 /* Read from stdin, not /dev/tty */
-
-char * readpassphrase(const char *, char *, size_t, int);
-
-#endif /* HAVE_READPASSPHRASE */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _READPASSPHRASE_H */
diff --git a/usr/src/cmd/ssh/include/realpath.h b/usr/src/cmd/ssh/include/realpath.h
deleted file mode 100644
index 1bd00053ac..0000000000
--- a/usr/src/cmd/ssh/include/realpath.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* $Id: realpath.h,v 1.2 2001/02/09 01:55:36 djm Exp $ */
-
-#ifndef _REALPATH_H
-#define _REALPATH_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "config.h"
-
-#if !defined(HAVE_REALPATH) || defined(BROKEN_REALPATH)
-
-char *realpath(const char *path, char *resolved);
-
-#endif /* !defined(HAVE_REALPATH) || defined(BROKEN_REALPATH) */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _REALPATH_H */
diff --git a/usr/src/cmd/ssh/include/rresvport.h b/usr/src/cmd/ssh/include/rresvport.h
deleted file mode 100644
index 33c47d84cf..0000000000
--- a/usr/src/cmd/ssh/include/rresvport.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* $Id: rresvport.h,v 1.2 2001/02/09 01:55:36 djm Exp $ */
-
-#ifndef _RRESVPORT_H
-#define _RRESVPORT_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "config.h"
-
-#ifndef HAVE_RRESVPORT_AF
-int rresvport_af(int *alport, sa_family_t af);
-#endif /* !HAVE_RRESVPORT_AF */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _RRESVPORT_H */
diff --git a/usr/src/cmd/ssh/include/rsa.h b/usr/src/cmd/ssh/include/rsa.h
deleted file mode 100644
index f0fcc49ba0..0000000000
--- a/usr/src/cmd/ssh/include/rsa.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* $OpenBSD: rsa.h,v 1.15 2002/03/04 17:27:39 stevesk Exp $ */
-
-#ifndef _RSA_H
-#define _RSA_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * RSA key generation, encryption and decryption.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-#include <openssl/bn.h>
-#include <openssl/rsa.h>
-
-void rsa_public_encrypt(BIGNUM *, BIGNUM *, RSA *);
-int rsa_private_decrypt(BIGNUM *, BIGNUM *, RSA *);
-void rsa_generate_additional_parameters(RSA *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _RSA_H */
diff --git a/usr/src/cmd/ssh/include/servconf.h b/usr/src/cmd/ssh/include/servconf.h
deleted file mode 100644
index a66c6415cb..0000000000
--- a/usr/src/cmd/ssh/include/servconf.h
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Definitions for server configuration data and for the functions reading it.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-/*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
- */
-
-/* $OpenBSD: servconf.h,v 1.59 2002/07/30 17:03:55 markus Exp $ */
-
-#ifndef _SERVCONF_H
-#define _SERVCONF_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define MAX_PORTS 256 /* Max # ports. */
-
-#define MAX_ALLOW_USERS 256 /* Max # users on allow list. */
-#define MAX_DENY_USERS 256 /* Max # users on deny list. */
-#define MAX_ALLOW_GROUPS 256 /* Max # groups on allow list. */
-#define MAX_DENY_GROUPS 256 /* Max # groups on deny list. */
-#define MAX_SUBSYSTEMS 256 /* Max # subsystems. */
-#define MAX_HOSTKEYS 256 /* Max # hostkeys. */
-
-/* permit_root_login */
-#define PERMIT_NOT_SET -1
-#define PERMIT_NO 0
-#define PERMIT_FORCED_ONLY 1
-#define PERMIT_NO_PASSWD 2
-#define PERMIT_YES 3
-
-/* Magic name for internal sftp-server */
-#define INTERNAL_SFTP_NAME "internal-sftp"
-#define _SSH_PAM_SERVICE_PREFIX "sshd"
-
-typedef struct {
- u_int num_ports;
- u_int ports_from_cmdline;
- u_short ports[MAX_PORTS]; /* Port number to listen on. */
- char *listen_addr; /* Address on which the server listens. */
- struct addrinfo *listen_addrs; /* Addresses on which the server listens. */
- char *host_key_files[MAX_HOSTKEYS]; /* Files containing host keys. */
- int num_host_key_files; /* Number of files for host keys. */
- char *pid_file; /* Where to put our pid */
- int server_key_bits;/* Size of the server key. */
- int login_grace_time; /* Disconnect if no auth in this time
- * (sec). */
- int key_regeneration_time; /* Server key lifetime (seconds). */
- int permit_root_login; /* PERMIT_*, see above */
- int ignore_rhosts; /* Ignore .rhosts and .shosts. */
- int ignore_user_known_hosts; /* Ignore ~/.ssh/known_hosts
- * for RhostsRsaAuth */
- int print_motd; /* If true, print /etc/motd. */
- int print_lastlog; /* If true, print lastlog */
- int x11_forwarding; /* If true, permit inet (spoofing) X11 fwd. */
- int x11_display_offset; /* What DISPLAY number to start
- * searching at */
- int x11_use_localhost; /* If true, use localhost for fake X11 server. */
- char *xauth_location; /* Location of xauth program */
- int strict_modes; /* If true, require string home dir modes. */
- int keepalives; /* If true, set SO_KEEPALIVE. */
- char *ciphers; /* Supported SSH2 ciphers. */
- char *macs; /* Supported SSH2 macs. */
- int protocol; /* Supported protocol versions. */
- int gateway_ports; /* If true, allow remote connects to forwarded ports. */
- SyslogFacility log_facility; /* Facility for system logging. */
- LogLevel log_level; /* Level for system logging. */
- int rhosts_authentication; /* If true, permit rhosts
- * authentication. */
- int rhosts_rsa_authentication; /* If true, permit rhosts RSA
- * authentication. */
- int hostbased_authentication; /* If true, permit ssh2 hostbased auth */
- int hostbased_uses_name_from_packet_only; /* experimental */
- int rsa_authentication; /* If true, permit RSA authentication. */
- int pubkey_authentication; /* If true, permit ssh2 pubkey authentication. */
-#ifdef GSSAPI
- int gss_authentication;
- int gss_keyex;
- int gss_store_creds;
- int gss_use_session_ccache; /* If true, delegated credentials are
- * stored in a session specific cache */
- int gss_cleanup_creds; /* If true, destroy cred cache on logout */
-#endif /* GSSAPI */
-#if defined(KRB4) || defined(KRB5)
- int kerberos_authentication; /* If true, permit Kerberos
- * authentication. */
- int kerberos_or_local_passwd; /* If true, permit kerberos
- * and any other password
- * authentication mechanism,
- * such as SecurID or
- * /etc/passwd */
- int kerberos_ticket_cleanup; /* If true, destroy ticket
- * file on logout. */
-#endif
-#if defined(AFS) || defined(KRB5)
- int kerberos_tgt_passing; /* If true, permit Kerberos TGT
- * passing. */
-#endif
-#ifdef AFS
- int afs_token_passing; /* If true, permit AFS token passing. */
-#endif
- int password_authentication; /* If true, permit password
- * authentication. */
-
- int kbd_interactive_authentication;
- int challenge_response_authentication;
- int pam_authentication_via_kbd_int;
-
- int permit_empty_passwd; /* If false, do not permit empty
- * passwords. */
- int permit_user_env; /* If true, read ~/.ssh/environment */
- int use_login; /* If true, login(1) is used */
- int compression; /* If true, compression is allowed */
- int allow_tcp_forwarding;
-
- u_int num_allow_users;
- char *allow_users[MAX_ALLOW_USERS];
- u_int num_deny_users;
- char *deny_users[MAX_DENY_USERS];
- u_int num_allow_groups;
- char *allow_groups[MAX_ALLOW_GROUPS];
- u_int num_deny_groups;
- char *deny_groups[MAX_DENY_GROUPS];
-
- u_int num_subsystems;
- char *subsystem_name[MAX_SUBSYSTEMS];
- char *subsystem_command[MAX_SUBSYSTEMS];
- char *subsystem_args[MAX_SUBSYSTEMS];
-
- int max_startups_begin;
- int max_startups_rate;
- int max_startups;
- char *banner; /* SSH-2 banner message */
- int verify_reverse_mapping; /* cross-check ip and dns */
- int client_alive_interval; /*
- * poke the client this often to
- * see if it's still there
- */
- int client_alive_count_max; /*
- * If the client is unresponsive
- * for this many intervals above,
- * disconnect the session
- */
-
- char *authorized_keys_file; /* File containing public keys */
- char *authorized_keys_file2;
-
- int max_auth_tries;
- int max_auth_tries_log;
-
- int max_init_auth_tries; /* SUNW: /etc/default/login */
- int max_init_auth_tries_log; /* SUNW: /etc/default/login */
-
- int lookup_client_hostnames;
- int use_openssl_engine;
- char *chroot_directory;
- char *pre_userauth_hook;
- char *pam_service_prefix;
- char *pam_service_name;
-
-} ServerOptions;
-
-void initialize_server_options(ServerOptions *);
-void fill_default_server_options(ServerOptions *);
-int process_server_config_line(ServerOptions *, char *, const char *, int,
- int *, const char *, const char *, const char *);
-void load_server_config(const char *, Buffer *);
-void parse_server_config(ServerOptions *, const char *, Buffer *,
- const char *, const char *, const char *);
-void parse_server_match_config(ServerOptions *, const char *, const char *,
- const char *);
-void copy_set_server_options(ServerOptions *, ServerOptions *, int);
-int chroot_requested(char *chroot_directory);
-
-#ifdef __cplusplus
-}
-#endif
-
-
-#endif /* _SERVCONF_H */
diff --git a/usr/src/cmd/ssh/include/serverloop.h b/usr/src/cmd/ssh/include/serverloop.h
deleted file mode 100644
index ec5b34a6b2..0000000000
--- a/usr/src/cmd/ssh/include/serverloop.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* $OpenBSD: serverloop.h,v 1.5 2001/06/27 02:12:53 markus Exp $ */
-
-#ifndef _SERVERLOOP_H
-#define _SERVERLOOP_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-/*
- * Performs the interactive session. This handles data transmission between
- * the client and the program. Note that the notion of stdin, stdout, and
- * stderr in this function is sort of reversed: this function writes to stdin
- * (of the child program), and reads from stdout and stderr (of the child
- * program).
- */
-
-void server_loop(pid_t, int, int, int);
-void server_loop2(Authctxt *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SERVERLOOP_H */
diff --git a/usr/src/cmd/ssh/include/session.h b/usr/src/cmd/ssh/include/session.h
deleted file mode 100644
index 0d81189842..0000000000
--- a/usr/src/cmd/ssh/include/session.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2000, 2001 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SESSION_H
-#define _SESSION_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* $OpenBSD: session.h,v 1.19 2002/06/30 21:59:45 deraadt Exp $ */
-#define TTYSZ 64
-typedef struct Session Session;
-struct Session {
- int used;
- int self;
- struct passwd *pw;
- Authctxt *authctxt;
- pid_t pid;
- /* tty */
- char *term;
- int ptyfd, ttyfd, ptymaster;
- u_int row, col, xpixel, ypixel;
- char tty[TTYSZ];
- /* last login */
- char hostname[MAXHOSTNAMELEN];
- time_t last_login_time;
- /* X11 */
- u_int display_number;
- char *display;
- u_int screen;
- char *auth_display;
- char *auth_proto;
- char *auth_data;
- char *auth_file; /* xauth(1) authority file */
- int single_connection;
- /* proto 2 */
- int chanid;
- int is_subsystem;
- char *command;
- char **env;
-};
-
-void do_authenticated(Authctxt *);
-
-int session_open(Authctxt *, int);
-int session_input_channel_req(Channel *, const char *);
-void session_close_by_pid(pid_t, int);
-void session_close_by_channel(int, void *);
-void session_destroy_all(void (*)(Session *));
-void session_pty_cleanup2(void *);
-
-Session *session_new(void);
-Session *session_by_tty(char *);
-void session_close(Session *);
-void do_setusercontext(struct passwd *);
-void child_set_env(char ***envp, u_int *envsizep, const char *name,
- const char *value);
-void child_set_env_silent(char ***envp, u_int *envsizep, const char *name,
- const char *value);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SESSION_H */
diff --git a/usr/src/cmd/ssh/include/setproctitle.h b/usr/src/cmd/ssh/include/setproctitle.h
deleted file mode 100644
index 9488be3cf7..0000000000
--- a/usr/src/cmd/ssh/include/setproctitle.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* $Id: setproctitle.h,v 1.2 2001/02/09 01:55:36 djm Exp $ */
-
-#ifndef _SETPROCTITLE_H
-#define _SETPROCTITLE_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "config.h"
-
-#ifndef HAVE_SETPROCTITLE
-void setproctitle(const char *fmt, ...);
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SETPROCTITLE_H */
diff --git a/usr/src/cmd/ssh/include/sftp-client.h b/usr/src/cmd/ssh/include/sftp-client.h
deleted file mode 100644
index 1927e3df16..0000000000
--- a/usr/src/cmd/ssh/include/sftp-client.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2001-2004 Damien Miller <djm@openbsd.org>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#ifndef _SFTP_CLIENT_H
-#define _SFTP_CLIENT_H
-
-/* $OpenBSD: sftp-client.h,v 1.14 2005/04/26 12:59:02 jmc Exp $ */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Client side of SSH2 filexfer protocol */
-
-typedef struct SFTP_DIRENT SFTP_DIRENT;
-
-struct SFTP_DIRENT {
- char *filename;
- char *longname;
- Attrib a;
-};
-
-/*
- * Initialise a SSH filexfer connection. Returns NULL on error or
- * a pointer to a initialized sftp_conn struct on success.
- */
-struct sftp_conn *do_init(int, int, u_int, u_int);
-
-u_int sftp_proto_version(struct sftp_conn *);
-
-/* Close file referred to by 'handle' */
-int do_close(struct sftp_conn *, char *, u_int);
-
-/* Read contents of 'path' to NULL-terminated array 'dir' */
-int do_readdir(struct sftp_conn *, char *, SFTP_DIRENT ***);
-
-/* Frees a NULL-terminated array of SFTP_DIRENTs (eg. from do_readdir) */
-void free_sftp_dirents(SFTP_DIRENT **);
-
-/* Delete file 'path' */
-int do_rm(struct sftp_conn *, char *);
-
-/* Create directory 'path' */
-int do_mkdir(struct sftp_conn *, char *, Attrib *);
-
-/* Remove directory 'path' */
-int do_rmdir(struct sftp_conn *, char *);
-
-/* Get file attributes of 'path' (follows symlinks) */
-Attrib *do_stat(struct sftp_conn *, char *, int);
-
-/* Get file attributes of 'path' (does not follow symlinks) */
-Attrib *do_lstat(struct sftp_conn *, char *, int);
-
-/* Get file attributes of open file 'handle' */
-Attrib *do_fstat(struct sftp_conn *, char *, u_int, int);
-
-/* Set file attributes of 'path' */
-int do_setstat(struct sftp_conn *, char *, Attrib *);
-
-/* Set file attributes of open file 'handle' */
-int do_fsetstat(struct sftp_conn *, char *, u_int, Attrib *);
-
-/* Canonicalise 'path' - caller must free result */
-char *do_realpath(struct sftp_conn *, char *);
-
-/* Rename 'oldpath' to 'newpath' */
-int do_rename(struct sftp_conn *, char *, char *);
-
-/* Rename 'oldpath' to 'newpath' */
-int do_symlink(struct sftp_conn *, char *, char *);
-
-/* Return target of symlink 'path' - caller must free result */
-char *do_readlink(struct sftp_conn *, char *);
-
-/* XXX: add callbacks to do_download/do_upload so we can do progress meter */
-
-/*
- * Download 'remote_path' to 'local_path'. Preserve permissions and times
- * if 'pflag' is set
- */
-int do_download(struct sftp_conn *, char *, char *, int);
-
-/*
- * Upload 'local_path' to 'remote_path'. Preserve permissions and times
- * if 'pflag' is set
- */
-int do_upload(struct sftp_conn *, char *, char *, int);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SFTP_CLIENT_H */
diff --git a/usr/src/cmd/ssh/include/sftp-common.h b/usr/src/cmd/ssh/include/sftp-common.h
deleted file mode 100644
index 86225bc175..0000000000
--- a/usr/src/cmd/ssh/include/sftp-common.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2001 Markus Friedl. All rights reserved.
- * Copyright (c) 2001 Damien Miller. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _SFTP_COMMON_H
-#define _SFTP_COMMON_H
-
-/* $OpenBSD: sftp-common.h,v 1.10 2006/08/03 03:34:42 deraadt Exp $ */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Maximum packet that we are willing to send/accept */
-#define SFTP_MAX_MSG_LENGTH (256 * 1024)
-
-typedef struct Attrib Attrib;
-
-/* File attributes */
-struct Attrib {
- u_int32_t flags;
- u_int64_t size;
- u_int32_t uid;
- u_int32_t gid;
- u_int32_t perm;
- u_int32_t atime;
- u_int32_t mtime;
-};
-
-void attrib_clear(Attrib *);
-void stat_to_attrib(const struct stat *, Attrib *);
-void attrib_to_stat(const Attrib *, struct stat *);
-Attrib *decode_attrib(Buffer *);
-void encode_attrib(Buffer *, const Attrib *);
-char *ls_file(const char *, const struct stat *, int);
-
-const char *fx2txt(int);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SFTP_COMMON_H */
diff --git a/usr/src/cmd/ssh/include/sftp.h b/usr/src/cmd/ssh/include/sftp.h
deleted file mode 100644
index e35cbdedf1..0000000000
--- a/usr/src/cmd/ssh/include/sftp.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2001 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* $OpenBSD: sftp.h,v 1.4 2002/02/13 00:59:23 djm Exp $ */
-
-/*
- * draft-ietf-secsh-filexfer-01.txt
- */
-
-#ifndef _SFTP_H
-#define _SFTP_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* version */
-#define SSH2_FILEXFER_VERSION 3
-
-/* client to server */
-#define SSH2_FXP_INIT 1
-#define SSH2_FXP_OPEN 3
-#define SSH2_FXP_CLOSE 4
-#define SSH2_FXP_READ 5
-#define SSH2_FXP_WRITE 6
-#define SSH2_FXP_LSTAT 7
-#define SSH2_FXP_STAT_VERSION_0 7
-#define SSH2_FXP_FSTAT 8
-#define SSH2_FXP_SETSTAT 9
-#define SSH2_FXP_FSETSTAT 10
-#define SSH2_FXP_OPENDIR 11
-#define SSH2_FXP_READDIR 12
-#define SSH2_FXP_REMOVE 13
-#define SSH2_FXP_MKDIR 14
-#define SSH2_FXP_RMDIR 15
-#define SSH2_FXP_REALPATH 16
-#define SSH2_FXP_STAT 17
-#define SSH2_FXP_RENAME 18
-#define SSH2_FXP_READLINK 19
-#define SSH2_FXP_SYMLINK 20
-
-/* server to client */
-#define SSH2_FXP_VERSION 2
-#define SSH2_FXP_STATUS 101
-#define SSH2_FXP_HANDLE 102
-#define SSH2_FXP_DATA 103
-#define SSH2_FXP_NAME 104
-#define SSH2_FXP_ATTRS 105
-
-#define SSH2_FXP_EXTENDED 200
-#define SSH2_FXP_EXTENDED_REPLY 201
-
-/* attributes */
-#define SSH2_FILEXFER_ATTR_SIZE 0x00000001
-#define SSH2_FILEXFER_ATTR_UIDGID 0x00000002
-#define SSH2_FILEXFER_ATTR_PERMISSIONS 0x00000004
-#define SSH2_FILEXFER_ATTR_ACMODTIME 0x00000008
-#define SSH2_FILEXFER_ATTR_EXTENDED 0x80000000
-
-/* portable open modes */
-#define SSH2_FXF_READ 0x00000001
-#define SSH2_FXF_WRITE 0x00000002
-#define SSH2_FXF_APPEND 0x00000004
-#define SSH2_FXF_CREAT 0x00000008
-#define SSH2_FXF_TRUNC 0x00000010
-#define SSH2_FXF_EXCL 0x00000020
-
-/* status messages */
-#define SSH2_FX_OK 0
-#define SSH2_FX_EOF 1
-#define SSH2_FX_NO_SUCH_FILE 2
-#define SSH2_FX_PERMISSION_DENIED 3
-#define SSH2_FX_FAILURE 4
-#define SSH2_FX_BAD_MESSAGE 5
-#define SSH2_FX_NO_CONNECTION 6
-#define SSH2_FX_CONNECTION_LOST 7
-#define SSH2_FX_OP_UNSUPPORTED 8
-#define SSH2_FX_MAX 8
-
-struct passwd;
-
-int sftp_server_main(int, char **, struct passwd *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SFTP_H */
diff --git a/usr/src/cmd/ssh/include/sigact.h b/usr/src/cmd/ssh/include/sigact.h
deleted file mode 100644
index fc5b3b6427..0000000000
--- a/usr/src/cmd/ssh/include/sigact.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/* $OpenBSD: SigAction.h,v 1.2 1999/06/27 08:15:19 millert Exp $ */
-
-#ifndef _SIGACT_H
-#define _SIGACT_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/****************************************************************************
- * Copyright (c) 1998 Free Software Foundation, Inc. *
- * *
- * Permission is hereby granted, free of charge, to any person obtaining a *
- * copy of this software and associated documentation files (the *
- * "Software"), to deal in the Software without restriction, including *
- * without limitation the rights to use, copy, modify, merge, publish, *
- * distribute, distribute with modifications, sublicense, and/or sell *
- * copies of the Software, and to permit persons to whom the Software is *
- * furnished to do so, subject to the following conditions: *
- * *
- * The above copyright notice and this permission notice shall be included *
- * in all copies or substantial portions of the Software. *
- * *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS *
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF *
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. *
- * IN NO EVENT SHALL THE ABOVE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR *
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE. *
- * *
- * Except as contained in this notice, the name(s) of the above copyright *
- * holders shall not be used in advertising or otherwise to promote the *
- * sale, use or other dealings in this Software without prior written *
- * authorization. *
- ****************************************************************************/
-
-/****************************************************************************
- * Author: Zeyd M. Ben-Halim <zmbenhal@netcom.com> 1992,1995 *
- * and: Eric S. Raymond <esr@snark.thyrsus.com> *
- ****************************************************************************/
-
-/*
- * $From: SigAction.h,v 1.5 1999/06/19 23:00:54 tom Exp $
- *
- * This file exists to handle non-POSIX systems which don't have <unistd.h>,
- * and usually no sigaction() nor <termios.h>
- */
-
-#if !defined(HAVE_SIGACTION) && defined(HAVE_SIGVEC)
-
-#undef SIG_BLOCK
-#define SIG_BLOCK 00
-
-#undef SIG_UNBLOCK
-#define SIG_UNBLOCK 01
-
-#undef SIG_SETMASK
-#define SIG_SETMASK 02
-
-/*
- * <bsd/signal.h> is in the Linux 1.2.8 + gcc 2.7.0 configuration,
- * and is useful for testing this header file.
- */
-#if HAVE_BSD_SIGNAL_H
-# include <bsd/signal.h>
-#endif
-
-struct sigaction
-{
- struct sigvec sv;
-};
-
-typedef unsigned long sigset_t;
-
-#undef sa_mask
-#define sa_mask sv.sv_mask
-#undef sa_handler
-#define sa_handler sv.sv_handler
-#undef sa_flags
-#define sa_flags sv.sv_flags
-
-int sigaction(int sig, struct sigaction *sigact, struct sigaction *osigact);
-int sigprocmask (int how, sigset_t *mask, sigset_t *omask);
-int sigemptyset (sigset_t *mask);
-int sigsuspend (sigset_t *mask);
-int sigdelset (sigset_t *mask, int sig);
-int sigaddset (sigset_t *mask, int sig);
-
-#endif /* !defined(HAVE_SIGACTION) && defined(HAVE_SIGVEC) */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SIGACT_H */
diff --git a/usr/src/cmd/ssh/include/ssh-dss.h b/usr/src/cmd/ssh/include/ssh-dss.h
deleted file mode 100644
index 7f4e6d19fe..0000000000
--- a/usr/src/cmd/ssh/include/ssh-dss.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* $OpenBSD: ssh-dss.h,v 1.6 2002/02/24 19:14:59 markus Exp $ */
-
-#ifndef _SSH_DSS_H
-#define _SSH_DSS_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Copyright (c) 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-int ssh_dss_sign(Key *, u_char **, u_int *, u_char *, u_int);
-int ssh_dss_verify(Key *, u_char *, u_int, u_char *, u_int);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SSH_DSS_H */
diff --git a/usr/src/cmd/ssh/include/ssh-gss.h b/usr/src/cmd/ssh/include/ssh-gss.h
deleted file mode 100644
index 6e35d9ed61..0000000000
--- a/usr/src/cmd/ssh/include/ssh-gss.h
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Copyright (c) 2001-2003 Simon Wilkinson. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR `AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _SSH_GSS_H
-#define _SSH_GSS_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef GSSAPI
-
-#include "kex.h"
-#include "buffer.h"
-
-#ifdef SUNW_GSSAPI
-#include <gssapi/gssapi.h>
-#include <gssapi/gssapi_ext.h>
-#else
-#ifdef GSS_KRB5
-#ifndef HEIMDAL
-#include <gssapi_generic.h>
-
-/* MIT Kerberos doesn't seem to define GSS_NT_HOSTBASED_SERVICE */
-#ifndef GSS_C_NT_HOSTBASED_SERVICE
-#define GSS_C_NT_HOSTBASED_SERVICE gss_nt_service_name
-#endif /* GSS_C_NT_... */
-#endif /* !HEIMDAL */
-#endif /* GSS_KRB5 */
-#endif /* SUNW_GSSAPI */
-
-/* draft-ietf-secsh-gsskeyex-03 */
-#define SSH2_MSG_KEXGSS_INIT 30
-#define SSH2_MSG_KEXGSS_CONTINUE 31
-#define SSH2_MSG_KEXGSS_COMPLETE 32
-#define SSH2_MSG_KEXGSS_HOSTKEY 33
-#define SSH2_MSG_KEXGSS_ERROR 34
-#define SSH2_MSG_USERAUTH_GSSAPI_RESPONSE 60
-#define SSH2_MSG_USERAUTH_GSSAPI_TOKEN 61
-#define SSH2_MSG_USERAUTH_GSSAPI_EXCHANGE_COMPLETE 63
-#define SSH2_MSG_USERAUTH_GSSAPI_ERROR 64
-#define SSH2_MSG_USERAUTH_GSSAPI_ERRTOK 65
-#define SSH2_MSG_USERAUTH_GSSAPI_MIC 66
-
-#define KEX_GSS_SHA1 "gss-group1-sha1-"
-#define SSH_GSS_HOSTBASED_SERVICE "host"
-
-#ifndef HAVE_GSS_STORE_CRED
-typedef struct ssh_gssapi_cred_store ssh_gssapi_cred_store; /* server-only */
-#endif /* !HAVE_GSS_STORE_CRED */
-
-typedef struct {
- OM_uint32 major;
- OM_uint32 minor;
- int local; /* true on client, false on server */
- int established;
- OM_uint32 flags;
- gss_ctx_id_t context;
- gss_OID desired_mech; /* client-side only */
- gss_OID actual_mech;
- gss_name_t desired_name; /* targ on both */
- gss_name_t src_name;
- gss_name_t dst_name;
- gss_cred_id_t creds; /* server-side only */
- gss_cred_id_t deleg_creds; /* server-side only */
- int default_creds; /* server-side only */
-#ifndef HAVE_GSS_STORE_CRED
- ssh_gssapi_cred_store *cred_store; /* server-side only */
-#endif /* !HAVE_GSS_STORE_CRED */
-} Gssctxt;
-
-/* Functions to get supported mech lists */
-void ssh_gssapi_server_mechs(gss_OID_set *mechs);
-void ssh_gssapi_client_mechs(const char *server_host, gss_OID_set *mechs);
-
-/* Functions to get fix KEX proposals (needed for rekey cases) */
-void ssh_gssapi_modify_kex(Kex *kex, gss_OID_set mechs, char **proposal);
-void ssh_gssapi_server_kex_hook(Kex *kex, char **proposal);
-void ssh_gssapi_client_kex_hook(Kex *kex, char **proposal);
-
-/* Map an encoded mechanism keyex name to a mechanism OID */
-void ssh_gssapi_mech_oid_to_kexname(const gss_OID mech, char **kexname);
-void ssh_gssapi_mech_oids_to_kexnames(const gss_OID_set mechs,
- char **kexname_list);
-/* dup oid? */
-void ssh_gssapi_oid_of_kexname(const char *kexname, gss_OID *mech);
-
-/*
- * Unfortunately, the GSS-API is not generic enough for some things --
- * see gss-serv.c and ssh-gss.c
- */
-int ssh_gssapi_is_spnego(gss_OID oid);
-int ssh_gssapi_is_krb5(gss_OID oid);
-int ssh_gssapi_is_gsi(gss_OID oid);
-int ssh_gssapi_is_dh(gss_OID oid);
-
-/* GSS_Init/Accept_sec_context() and GSS_Acquire_cred() wrappers */
-/* client-only */
-OM_uint32 ssh_gssapi_init_ctx(Gssctxt *ctx, const char *server_host,
- int deleg_creds, gss_buffer_t recv_tok, gss_buffer_t send_tok);
-/* server-only */
-OM_uint32 ssh_gssapi_accept_ctx(Gssctxt *ctx, gss_buffer_t recv_tok,
- gss_buffer_t send_tok);
-/* server-only */
-OM_uint32 ssh_gssapi_acquire_cred(Gssctxt *ctx);
-
-/* MIC wrappers */
-OM_uint32 ssh_gssapi_get_mic(Gssctxt *ctx, gss_buffer_t buffer,
- gss_buffer_t hash);
-OM_uint32 ssh_gssapi_verify_mic(Gssctxt *ctx, gss_buffer_t buffer,
- gss_buffer_t hash);
-
-/* Gssctxt functions */
-void ssh_gssapi_build_ctx(Gssctxt **ctx, int client, gss_OID mech);
-void ssh_gssapi_delete_ctx(Gssctxt **ctx);
-int ssh_gssapi_check_mech_oid(Gssctxt *ctx, void *data, size_t len);
-void ssh_gssapi_error(Gssctxt *ctx, const char *where);
-char *ssh_gssapi_last_error(Gssctxt *ctxt, OM_uint32 *maj, OM_uint32 *min);
-
-/* Server-side */
-int ssh_gssapi_userok(Gssctxt *ctx, char *name);
-char *ssh_gssapi_localname(Gssctxt *ctx);
-
-/* Server-side, if PAM and gss_store_cred() are available, ... */
-struct Authctxt; /* needed to avoid conflicts between auth.h, sshconnect2.c */
-void ssh_gssapi_storecreds(Gssctxt *ctx, struct Authctxt *authctxt);
-
-/* ... else, if other interfaces are available for GSS-API cred storing */
-void ssh_gssapi_do_child(Gssctxt *ctx, char ***envp, uint_t *envsizep);
-void ssh_gssapi_cleanup_creds(Gssctxt *ctx);
-
-/* Misc */
-int ssh_gssapi_import_name(Gssctxt *ctx, const char *server_host);
-const char *ssh_gssapi_oid_to_name(gss_OID oid);
-char *ssh_gssapi_oid_to_str(gss_OID oid);
-gss_OID ssh_gssapi_dup_oid(gss_OID oid);
-gss_OID ssh_gssapi_make_oid(size_t length, void *elements);
-gss_OID ssh_gssapi_make_oid_ext(size_t length, void *elements,
- int der_wrapped);
-void *ssh_gssapi_der_wrap(size_t, size_t *length);
-size_t ssh_gssapi_der_wrap_size(size_t, size_t *length);
-void ssh_gssapi_release_oid(gss_OID *oid);
-#endif /* GSSAPI */
-
-#endif /* _SSH_GSS_H */
diff --git a/usr/src/cmd/ssh/include/ssh-rsa.h b/usr/src/cmd/ssh/include/ssh-rsa.h
deleted file mode 100644
index 2d883fa774..0000000000
--- a/usr/src/cmd/ssh/include/ssh-rsa.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* $OpenBSD: ssh-rsa.h,v 1.6 2002/02/24 19:14:59 markus Exp $ */
-
-#ifndef _SSH_RSA_H
-#define _SSH_RSA_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Copyright (c) 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-int ssh_rsa_sign(Key *, u_char **, u_int *, u_char *, u_int);
-int ssh_rsa_verify(Key *, u_char *, u_int, u_char *, u_int);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SSH_RSA_H */
diff --git a/usr/src/cmd/ssh/include/ssh.h b/usr/src/cmd/ssh/include/ssh.h
deleted file mode 100644
index 746d2ff69b..0000000000
--- a/usr/src/cmd/ssh/include/ssh.h
+++ /dev/null
@@ -1,133 +0,0 @@
-/* $OpenBSD: ssh.h,v 1.71 2002/06/22 02:00:29 stevesk Exp $ */
-
-#ifndef _SSH_H
-#define _SSH_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include <netinet/in.h> /* For struct sockaddr_in */
-#include <pwd.h> /* For struct pw */
-#include <stdarg.h> /* For va_list */
-#include <syslog.h> /* For LOG_AUTH and friends */
-#include <sys/socket.h> /* For struct sockaddr_storage */
-#include "fake-socket.h" /* For struct sockaddr_storage */
-#ifdef HAVE_SYS_SELECT_H
-# include <sys/select.h>
-#endif
-
-/* Cipher used for encrypting authentication files. */
-#define SSH_AUTHFILE_CIPHER SSH_CIPHER_3DES
-
-/* Default port number. */
-#define SSH_DEFAULT_PORT 22
-
-/* Maximum number of TCP/IP ports forwarded per direction. */
-#define SSH_MAX_FORWARDS_PER_DIRECTION 100
-
-/*
- * Maximum number of RSA authentication identity files that can be specified
- * in configuration files or on the command line.
- */
-#define SSH_MAX_IDENTITY_FILES 100
-
-/*
- * Major protocol version. Different version indicates major incompatibility
- * that prevents communication.
- *
- * Minor protocol version. Different version indicates minor incompatibility
- * that does not prevent interoperation.
- */
-#define PROTOCOL_MAJOR_1 1
-#define PROTOCOL_MINOR_1 5
-
-/* We support both SSH1 and SSH2 */
-#define PROTOCOL_MAJOR_2 2
-#define PROTOCOL_MINOR_2 0
-
-/*
- * Name for the service. The port named by this service overrides the
- * default port if present.
- */
-#define SSH_SERVICE_NAME "ssh"
-
-/*
- * Name of the environment variable containing the process ID of the
- * authentication agent.
- */
-#define SSH_AGENTPID_ENV_NAME "SSH_AGENT_PID"
-
-/*
- * Name of the environment variable containing the pathname of the
- * authentication socket.
- */
-#define SSH_AUTHSOCKET_ENV_NAME "SSH_AUTH_SOCK"
-
-/*
- * Environment variable for overwriting the default location of askpass
- */
-#define SSH_ASKPASS_ENV "SSH_ASKPASS"
-
-/*
- * Force host key length and server key length to differ by at least this
- * many bits. This is to make double encryption with rsaref work.
- */
-#define SSH_KEY_BITS_RESERVED 128
-
-/*
- * Length of the session key in bytes. (Specified as 256 bits in the
- * protocol.)
- */
-#define SSH_SESSION_KEY_LENGTH 32
-
-/* Name of Kerberos service for SSH to use. */
-#define KRB4_SERVICE_NAME "rcmd"
-
-/* Used to identify ``EscapeChar none'' */
-#define SSH_ESCAPECHAR_NONE -2
-
-/*
- * unprivileged user when UsePrivilegeSeparation=yes;
- * sshd will change its privileges to this user and its
- * primary group.
- */
-#ifndef SSH_PRIVSEP_USER
-#define SSH_PRIVSEP_USER "sshd"
-#endif
-
-/* Minimum modulus size (n) for RSA keys. */
-#define SSH_RSA_MINIMUM_MODULUS_SIZE 768
-
-/* Listen backlog for sshd, ssh-agent and forwarding sockets */
-#define SSH_LISTEN_BACKLOG 128
-
-/*
- * Do not display banner when in remote command mode only. Note that RFC 4254
- * uses "exec" as a mode name for the channel opened for the execution of the
- * given command.
- */
-#define SSH_NO_BANNER_IN_EXEC_MODE 2
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SSH_H */
diff --git a/usr/src/cmd/ssh/include/ssh1.h b/usr/src/cmd/ssh/include/ssh1.h
deleted file mode 100644
index 8a6f84c594..0000000000
--- a/usr/src/cmd/ssh/include/ssh1.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/* $OpenBSD: ssh1.h,v 1.3 2001/05/30 12:55:13 markus Exp $ */
-
-#ifndef _SSH1_H
-#define _SSH1_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-/*
- * Definition of message types. New values can be added, but old values
- * should not be removed or without careful consideration of the consequences
- * for compatibility. The maximum value is 254; value 255 is reserved for
- * future extension.
- */
-/* Message name */ /* msg code */ /* arguments */
-#define SSH_MSG_NONE 0 /* no message */
-#define SSH_MSG_DISCONNECT 1 /* cause (string) */
-#define SSH_SMSG_PUBLIC_KEY 2 /* ck,msk,srvk,hostk */
-#define SSH_CMSG_SESSION_KEY 3 /* key (BIGNUM) */
-#define SSH_CMSG_USER 4 /* user (string) */
-#define SSH_CMSG_AUTH_RHOSTS 5 /* user (string) */
-#define SSH_CMSG_AUTH_RSA 6 /* modulus (BIGNUM) */
-#define SSH_SMSG_AUTH_RSA_CHALLENGE 7 /* int (BIGNUM) */
-#define SSH_CMSG_AUTH_RSA_RESPONSE 8 /* int (BIGNUM) */
-#define SSH_CMSG_AUTH_PASSWORD 9 /* pass (string) */
-#define SSH_CMSG_REQUEST_PTY 10 /* TERM, tty modes */
-#define SSH_CMSG_WINDOW_SIZE 11 /* row,col,xpix,ypix */
-#define SSH_CMSG_EXEC_SHELL 12 /* */
-#define SSH_CMSG_EXEC_CMD 13 /* cmd (string) */
-#define SSH_SMSG_SUCCESS 14 /* */
-#define SSH_SMSG_FAILURE 15 /* */
-#define SSH_CMSG_STDIN_DATA 16 /* data (string) */
-#define SSH_SMSG_STDOUT_DATA 17 /* data (string) */
-#define SSH_SMSG_STDERR_DATA 18 /* data (string) */
-#define SSH_CMSG_EOF 19 /* */
-#define SSH_SMSG_EXITSTATUS 20 /* status (int) */
-#define SSH_MSG_CHANNEL_OPEN_CONFIRMATION 21 /* channel (int) */
-#define SSH_MSG_CHANNEL_OPEN_FAILURE 22 /* channel (int) */
-#define SSH_MSG_CHANNEL_DATA 23 /* ch,data (int,str) */
-#define SSH_MSG_CHANNEL_CLOSE 24 /* channel (int) */
-#define SSH_MSG_CHANNEL_CLOSE_CONFIRMATION 25 /* channel (int) */
-/* SSH_CMSG_X11_REQUEST_FORWARDING 26 OBSOLETE */
-#define SSH_SMSG_X11_OPEN 27 /* channel (int) */
-#define SSH_CMSG_PORT_FORWARD_REQUEST 28 /* p,host,hp (i,s,i) */
-#define SSH_MSG_PORT_OPEN 29 /* ch,h,p (i,s,i) */
-#define SSH_CMSG_AGENT_REQUEST_FORWARDING 30 /* */
-#define SSH_SMSG_AGENT_OPEN 31 /* port (int) */
-#define SSH_MSG_IGNORE 32 /* string */
-#define SSH_CMSG_EXIT_CONFIRMATION 33 /* */
-#define SSH_CMSG_X11_REQUEST_FORWARDING 34 /* proto,data (s,s) */
-#define SSH_CMSG_AUTH_RHOSTS_RSA 35 /* user,mod (s,mpi) */
-#define SSH_MSG_DEBUG 36 /* string */
-#define SSH_CMSG_REQUEST_COMPRESSION 37 /* level 1-9 (int) */
-#define SSH_CMSG_MAX_PACKET_SIZE 38 /* size 4k-1024k (int) */
-#define SSH_CMSG_AUTH_TIS 39 /* we use this for s/key */
-#define SSH_SMSG_AUTH_TIS_CHALLENGE 40 /* challenge (string) */
-#define SSH_CMSG_AUTH_TIS_RESPONSE 41 /* response (string) */
-#define SSH_CMSG_AUTH_KERBEROS 42 /* (KTEXT) */
-#define SSH_SMSG_AUTH_KERBEROS_RESPONSE 43 /* (KTEXT) */
-#define SSH_CMSG_HAVE_KERBEROS_TGT 44 /* credentials (s) */
-#define SSH_CMSG_HAVE_AFS_TOKEN 65 /* token (s) */
-
-/* protocol version 1.5 overloads some version 1.3 message types */
-#define SSH_MSG_CHANNEL_INPUT_EOF SSH_MSG_CHANNEL_CLOSE
-#define SSH_MSG_CHANNEL_OUTPUT_CLOSE SSH_MSG_CHANNEL_CLOSE_CONFIRMATION
-
-/*
- * Authentication methods. New types can be added, but old types should not
- * be removed for compatibility. The maximum allowed value is 31.
- */
-#define SSH_AUTH_RHOSTS 1
-#define SSH_AUTH_RSA 2
-#define SSH_AUTH_PASSWORD 3
-#define SSH_AUTH_RHOSTS_RSA 4
-#define SSH_AUTH_TIS 5
-#define SSH_AUTH_KERBEROS 6
-#define SSH_PASS_KERBEROS_TGT 7
- /* 8 to 15 are reserved */
-#define SSH_PASS_AFS_TOKEN 21
-
-/* Protocol flags. These are bit masks. */
-#define SSH_PROTOFLAG_SCREEN_NUMBER 1 /* X11 forwarding includes screen */
-#define SSH_PROTOFLAG_HOST_IN_FWD_OPEN 2 /* forwarding opens contain host */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SSH1_H */
diff --git a/usr/src/cmd/ssh/include/ssh2.h b/usr/src/cmd/ssh/include/ssh2.h
deleted file mode 100644
index 20782bb266..0000000000
--- a/usr/src/cmd/ssh/include/ssh2.h
+++ /dev/null
@@ -1,184 +0,0 @@
-/* $OpenBSD: ssh2.h,v 1.8 2002/03/04 17:27:39 stevesk Exp $ */
-
-#ifndef _SSH2_H
-#define _SSH2_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Copyright (c) 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/*
- * draft-ietf-secsh-architecture-05.txt
- *
- * Transport layer protocol:
- *
- * 1-19 Transport layer generic (e.g. disconnect, ignore, debug,
- * etc)
- * 20-29 Algorithm negotiation
- * 30-49 Key exchange method specific (numbers can be reused for
- * different authentication methods)
- *
- * User authentication protocol:
- *
- * 50-59 User authentication generic
- * 60-79 User authentication method specific (numbers can be reused
- * for different authentication methods)
- *
- * Connection protocol:
- *
- * 80-89 Connection protocol generic
- * 90-127 Channel related messages
- *
- * Reserved for client protocols:
- *
- * 128-191 Reserved
- *
- * Local extensions:
- *
- * 192-255 Local extensions
- */
-
-/* ranges */
-
-#define SSH2_MSG_TRANSPORT_MIN 1
-#define SSH2_MSG_TRANSPORT_MAX 49
-#define SSH2_MSG_USERAUTH_MIN 50
-#define SSH2_MSG_USERAUTH_MAX 79
-#define SSH2_MSG_CONNECTION_MIN 80
-#define SSH2_MSG_CONNECTION_MAX 127
-#define SSH2_MSG_RESERVED_MIN 128
-#define SSH2_MSG_RESERVED_MAX 191
-#define SSH2_MSG_LOCAL_MIN 192
-#define SSH2_MSG_LOCAL_MAX 255
-#define SSH2_MSG_MIN 1
-#define SSH2_MSG_MAX 255
-
-/* transport layer: generic */
-
-#define SSH2_MSG_DISCONNECT 1
-#define SSH2_MSG_IGNORE 2
-#define SSH2_MSG_UNIMPLEMENTED 3
-#define SSH2_MSG_DEBUG 4
-#define SSH2_MSG_SERVICE_REQUEST 5
-#define SSH2_MSG_SERVICE_ACCEPT 6
-
-/* transport layer: alg negotiation */
-
-#define SSH2_MSG_KEXINIT 20
-#define SSH2_MSG_NEWKEYS 21
-
-/* transport layer: kex specific messages, can be reused */
-
-#define SSH2_MSG_KEXDH_INIT 30
-#define SSH2_MSG_KEXDH_REPLY 31
-
-/* dh-group-exchange */
-#define SSH2_MSG_KEX_DH_GEX_REQUEST_OLD 30
-#define SSH2_MSG_KEX_DH_GEX_GROUP 31
-#define SSH2_MSG_KEX_DH_GEX_INIT 32
-#define SSH2_MSG_KEX_DH_GEX_REPLY 33
-#define SSH2_MSG_KEX_DH_GEX_REQUEST 34
-
-/* user authentication: generic */
-
-#define SSH2_MSG_USERAUTH_REQUEST 50
-#define SSH2_MSG_USERAUTH_FAILURE 51
-#define SSH2_MSG_USERAUTH_SUCCESS 52
-#define SSH2_MSG_USERAUTH_BANNER 53
-
-/* user authentication: method specific, can be reused */
-
-#define SSH2_MSG_USERAUTH_PK_OK 60
-#define SSH2_MSG_USERAUTH_PASSWD_CHANGEREQ 60
-#define SSH2_MSG_USERAUTH_INFO_REQUEST 60
-#define SSH2_MSG_USERAUTH_INFO_RESPONSE 61
-
-/* connection protocol: generic */
-
-#define SSH2_MSG_GLOBAL_REQUEST 80
-#define SSH2_MSG_REQUEST_SUCCESS 81
-#define SSH2_MSG_REQUEST_FAILURE 82
-
-/* channel related messages */
-
-#define SSH2_MSG_CHANNEL_OPEN 90
-#define SSH2_MSG_CHANNEL_OPEN_CONFIRMATION 91
-#define SSH2_MSG_CHANNEL_OPEN_FAILURE 92
-#define SSH2_MSG_CHANNEL_WINDOW_ADJUST 93
-#define SSH2_MSG_CHANNEL_DATA 94
-#define SSH2_MSG_CHANNEL_EXTENDED_DATA 95
-#define SSH2_MSG_CHANNEL_EOF 96
-#define SSH2_MSG_CHANNEL_CLOSE 97
-#define SSH2_MSG_CHANNEL_REQUEST 98
-#define SSH2_MSG_CHANNEL_SUCCESS 99
-#define SSH2_MSG_CHANNEL_FAILURE 100
-
-/* ALTPRIVSEP */
-#ifdef ALTPRIVSEP
-#define SSH2_PRIV_MSG_ALTPRIVSEP 254
-#endif /* ALTPRIVSEP */
-
-/* disconnect reason code */
-
-#define SSH2_DISCONNECT_HOST_NOT_ALLOWED_TO_CONNECT 1
-#define SSH2_DISCONNECT_PROTOCOL_ERROR 2
-#define SSH2_DISCONNECT_KEY_EXCHANGE_FAILED 3
-#define SSH2_DISCONNECT_HOST_AUTHENTICATION_FAILED 4
-#define SSH2_DISCONNECT_RESERVED 4
-#define SSH2_DISCONNECT_MAC_ERROR 5
-#define SSH2_DISCONNECT_COMPRESSION_ERROR 6
-#define SSH2_DISCONNECT_SERVICE_NOT_AVAILABLE 7
-#define SSH2_DISCONNECT_PROTOCOL_VERSION_NOT_SUPPORTED 8
-#define SSH2_DISCONNECT_HOST_KEY_NOT_VERIFIABLE 9
-#define SSH2_DISCONNECT_CONNECTION_LOST 10
-#define SSH2_DISCONNECT_BY_APPLICATION 11
-#define SSH2_DISCONNECT_TOO_MANY_CONNECTIONS 12
-#define SSH2_DISCONNECT_AUTH_CANCELLED_BY_USER 13
-#define SSH2_DISCONNECT_NO_MORE_AUTH_METHODS_AVAILABLE 14
-#define SSH2_DISCONNECT_ILLEGAL_USER_NAME 15
-
-/* misc */
-
-#define SSH2_OPEN_ADMINISTRATIVELY_PROHIBITED 1
-#define SSH2_OPEN_CONNECT_FAILED 2
-#define SSH2_OPEN_UNKNOWN_CHANNEL_TYPE 3
-#define SSH2_OPEN_RESOURCE_SHORTAGE 4
-
-#define SSH2_EXTENDED_DATA_STDERR 1
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SSH2_H */
diff --git a/usr/src/cmd/ssh/include/sshconnect.h b/usr/src/cmd/ssh/include/sshconnect.h
deleted file mode 100644
index 74eaccfbbc..0000000000
--- a/usr/src/cmd/ssh/include/sshconnect.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* $OpenBSD: sshconnect.h,v 1.17 2002/06/19 00:27:55 deraadt Exp $ */
-
-#ifndef _SSHCONNECT_H
-#define _SSHCONNECT_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef struct Sensitive Sensitive;
-struct Sensitive {
- Key **keys;
- int nkeys;
- int external_keysign;
-};
-
-int
-ssh_connect(const char *, struct sockaddr_storage *, ushort_t, int, int,
- int, const char *);
-
-void
-ssh_login(Sensitive *, const char *, struct sockaddr *, char *);
-
-int verify_host_key(char *, struct sockaddr *, Key *);
-int accept_host_key(char *, struct sockaddr *, Key *);
-
-void ssh_kex(char *, struct sockaddr *);
-void ssh_kex2(char *, struct sockaddr *);
-
-void ssh_userauth1(const char *, const char *, char *, Sensitive *);
-void ssh_userauth2(const char *, const char *, char *, Sensitive *);
-
-void ssh_put_password(char *);
-
-
-/*
- * Macros to raise/lower permissions.
- */
-#define PRIV_START do { \
- int save_errno = errno; \
- (void) seteuid(original_effective_uid); \
- errno = save_errno; \
-} while (0)
-
-#define PRIV_END do { \
- int save_errno = errno; \
- (void) seteuid(original_real_uid); \
- errno = save_errno; \
-} while (0)
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SSHCONNECT_H */
diff --git a/usr/src/cmd/ssh/include/sshlogin.h b/usr/src/cmd/ssh/include/sshlogin.h
deleted file mode 100644
index e7d2ee7c89..0000000000
--- a/usr/src/cmd/ssh/include/sshlogin.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* $OpenBSD: sshlogin.h,v 1.4 2002/08/29 15:57:25 stevesk Exp $ */
-
-#ifndef _SSHLOGIN_H
-#define _SSHLOGIN_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void
-record_login(pid_t pid, const char *ttyname, const char *progname,
- const char *user);
-void
-record_logout(pid_t pid, const char *ttyname, const char *progname,
- const char *user);
-
-u_long
-get_last_login_time(uid_t uid, const char *logname, char *buf, u_int bufsize);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SSHLOGIN_H */
diff --git a/usr/src/cmd/ssh/include/sshpty.h b/usr/src/cmd/ssh/include/sshpty.h
deleted file mode 100644
index f5dbf25640..0000000000
--- a/usr/src/cmd/ssh/include/sshpty.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* $OpenBSD: sshpty.h,v 1.4 2002/03/04 17:27:39 stevesk Exp $ */
-
-#ifndef _SSHPTY_H
-#define _SSHPTY_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Functions for allocating a pseudo-terminal and making it the controlling
- * tty.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-int pty_allocate(int *, int *, char *, int);
-void pty_release(const char *);
-void pty_make_controlling_tty(int *, const char *);
-void pty_change_window_size(int, int, int, int, int);
-void pty_setowner(struct passwd *, const char *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SSHPTY_H */
diff --git a/usr/src/cmd/ssh/include/sshtty.h b/usr/src/cmd/ssh/include/sshtty.h
deleted file mode 100644
index 584c63b6b9..0000000000
--- a/usr/src/cmd/ssh/include/sshtty.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/* $OpenBSD: sshtty.h,v 1.2 2001/06/26 17:27:25 markus Exp $ */
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-/*
- * Copyright (c) 2001 Markus Friedl. All rights reserved.
- * Copyright (c) 2001 Kevin Steves. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _SSHTTY_H
-#define _SSHTTY_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <termios.h>
-
-int in_raw_mode(void);
-struct termios get_saved_tio(void);
-void leave_raw_mode(void);
-void enter_raw_mode(void);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SSHTTY_H */
diff --git a/usr/src/cmd/ssh/include/strlcat.h b/usr/src/cmd/ssh/include/strlcat.h
deleted file mode 100644
index b1b28cf33e..0000000000
--- a/usr/src/cmd/ssh/include/strlcat.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* $Id: strlcat.h,v 1.2 2001/02/09 01:55:36 djm Exp $ */
-
-#ifndef _STRLCAT_H
-#define _STRLCAT_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "config.h"
-#ifndef HAVE_STRLCAT
-#include <sys/types.h>
-size_t strlcat(char *dst, const char *src, size_t siz);
-#endif /* !HAVE_STRLCAT */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _STRLCAT_H */
diff --git a/usr/src/cmd/ssh/include/strlcpy.h b/usr/src/cmd/ssh/include/strlcpy.h
deleted file mode 100644
index 6215c105f7..0000000000
--- a/usr/src/cmd/ssh/include/strlcpy.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* $Id: strlcpy.h,v 1.2 2001/02/09 01:55:36 djm Exp $ */
-
-#ifndef _STRLCPY_H
-#define _STRLCPY_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "config.h"
-#ifndef HAVE_STRLCPY
-#include <sys/types.h>
-size_t strlcpy(char *dst, const char *src, size_t siz);
-#endif /* !HAVE_STRLCPY */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _STRLCPY_H */
diff --git a/usr/src/cmd/ssh/include/strmode.h b/usr/src/cmd/ssh/include/strmode.h
deleted file mode 100644
index cdbc4bd3c9..0000000000
--- a/usr/src/cmd/ssh/include/strmode.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* $Id: strmode.h,v 1.3 2001/06/09 02:22:17 mouring Exp $ */
-
-#ifndef _STRMODE_H
-#define _STRMODE_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-#ifndef HAVE_STRMODE
-
-void strmode(register mode_t mode, register char *p);
-
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _STRMODE_H */
diff --git a/usr/src/cmd/ssh/include/sys-queue.h b/usr/src/cmd/ssh/include/sys-queue.h
deleted file mode 100644
index c5d8e6a5b5..0000000000
--- a/usr/src/cmd/ssh/include/sys-queue.h
+++ /dev/null
@@ -1,595 +0,0 @@
-/* $OpenBSD: queue.h,v 1.22 2001/06/23 04:39:35 angelos Exp $ */
-/* $NetBSD: queue.h,v 1.11 1996/05/16 05:17:14 mycroft Exp $ */
-
-#ifndef _SYS_QUEUE_H
-#define _SYS_QUEUE_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Copyright (c) 1991, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)queue.h 8.5 (Berkeley) 8/20/94
- */
-
-/*
- * Ignore all <sys/queue.h> since older platforms have broken/incomplete
- * <sys/queue.h> that are too hard to work around.
- */
-#undef SLIST_HEAD
-#undef SLIST_HEAD_INITIALIZER
-#undef SLIST_ENTRY
-#undef SLIST_FIRST
-#undef SLIST_END
-#undef SLIST_EMPTY
-#undef SLIST_NEXT
-#undef SLIST_FOREACH
-#undef SLIST_INIT
-#undef SLIST_INSERT_AFTER
-#undef SLIST_INSERT_HEAD
-#undef SLIST_REMOVE_HEAD
-#undef SLIST_REMOVE
-#undef LIST_HEAD
-#undef LIST_HEAD_INITIALIZER
-#undef LIST_ENTRY
-#undef LIST_FIRST
-#undef LIST_END
-#undef LIST_EMPTY
-#undef LIST_NEXT
-#undef LIST_FOREACH
-#undef LIST_INIT
-#undef LIST_INSERT_AFTER
-#undef LIST_INSERT_BEFORE
-#undef LIST_INSERT_HEAD
-#undef LIST_REMOVE
-#undef LIST_REPLACE
-#undef SIMPLEQ_HEAD
-#undef SIMPLEQ_HEAD_INITIALIZER
-#undef SIMPLEQ_ENTRY
-#undef SIMPLEQ_FIRST
-#undef SIMPLEQ_END
-#undef SIMPLEQ_EMPTY
-#undef SIMPLEQ_NEXT
-#undef SIMPLEQ_FOREACH
-#undef SIMPLEQ_INIT
-#undef SIMPLEQ_INSERT_HEAD
-#undef SIMPLEQ_INSERT_TAIL
-#undef SIMPLEQ_INSERT_AFTER
-#undef SIMPLEQ_REMOVE_HEAD
-#undef TAILQ_HEAD
-#undef TAILQ_HEAD_INITIALIZER
-#undef TAILQ_ENTRY
-#undef TAILQ_FIRST
-#undef TAILQ_END
-#undef TAILQ_NEXT
-#undef TAILQ_LAST
-#undef TAILQ_PREV
-#undef TAILQ_EMPTY
-#undef TAILQ_FOREACH
-#undef TAILQ_FOREACH_REVERSE
-#undef TAILQ_INIT
-#undef TAILQ_INSERT_HEAD
-#undef TAILQ_INSERT_TAIL
-#undef TAILQ_INSERT_AFTER
-#undef TAILQ_INSERT_BEFORE
-#undef TAILQ_REMOVE
-#undef TAILQ_REPLACE
-#undef CIRCLEQ_HEAD
-#undef CIRCLEQ_HEAD_INITIALIZER
-#undef CIRCLEQ_ENTRY
-#undef CIRCLEQ_FIRST
-#undef CIRCLEQ_LAST
-#undef CIRCLEQ_END
-#undef CIRCLEQ_NEXT
-#undef CIRCLEQ_PREV
-#undef CIRCLEQ_EMPTY
-#undef CIRCLEQ_FOREACH
-#undef CIRCLEQ_FOREACH_REVERSE
-#undef CIRCLEQ_INIT
-#undef CIRCLEQ_INSERT_AFTER
-#undef CIRCLEQ_INSERT_BEFORE
-#undef CIRCLEQ_INSERT_HEAD
-#undef CIRCLEQ_INSERT_TAIL
-#undef CIRCLEQ_REMOVE
-#undef CIRCLEQ_REPLACE
-
-/*
- * This file defines five types of data structures: singly-linked lists,
- * lists, simple queues, tail queues, and circular queues.
- *
- *
- * A singly-linked list is headed by a single forward pointer. The elements
- * are singly linked for minimum space and pointer manipulation overhead at
- * the expense of O(n) removal for arbitrary elements. New elements can be
- * added to the list after an existing element or at the head of the list.
- * Elements being removed from the head of the list should use the explicit
- * macro for this purpose for optimum efficiency. A singly-linked list may
- * only be traversed in the forward direction. Singly-linked lists are ideal
- * for applications with large datasets and few or no removals or for
- * implementing a LIFO queue.
- *
- * A list is headed by a single forward pointer (or an array of forward
- * pointers for a hash table header). The elements are doubly linked
- * so that an arbitrary element can be removed without a need to
- * traverse the list. New elements can be added to the list before
- * or after an existing element or at the head of the list. A list
- * may only be traversed in the forward direction.
- *
- * A simple queue is headed by a pair of pointers, one the head of the
- * list and the other to the tail of the list. The elements are singly
- * linked to save space, so elements can only be removed from the
- * head of the list. New elements can be added to the list before or after
- * an existing element, at the head of the list, or at the end of the
- * list. A simple queue may only be traversed in the forward direction.
- *
- * A tail queue is headed by a pair of pointers, one to the head of the
- * list and the other to the tail of the list. The elements are doubly
- * linked so that an arbitrary element can be removed without a need to
- * traverse the list. New elements can be added to the list before or
- * after an existing element, at the head of the list, or at the end of
- * the list. A tail queue may be traversed in either direction.
- *
- * A circle queue is headed by a pair of pointers, one to the head of the
- * list and the other to the tail of the list. The elements are doubly
- * linked so that an arbitrary element can be removed without a need to
- * traverse the list. New elements can be added to the list before or after
- * an existing element, at the head of the list, or at the end of the list.
- * A circle queue may be traversed in either direction, but has a more
- * complex end of list detection.
- *
- * For details on the use of these macros, see the queue(3) manual page.
- */
-
-/*
- * Singly-linked List definitions.
- */
-#define SLIST_HEAD(name, type) \
-struct name { \
- struct type *slh_first; /* first element */ \
-}
-
-#define SLIST_HEAD_INITIALIZER(head) \
- { NULL }
-
-#define SLIST_ENTRY(type) \
-struct { \
- struct type *sle_next; /* next element */ \
-}
-
-/*
- * Singly-linked List access methods.
- */
-#define SLIST_FIRST(head) ((head)->slh_first)
-#define SLIST_END(head) NULL
-#define SLIST_EMPTY(head) (SLIST_FIRST(head) == SLIST_END(head))
-#define SLIST_NEXT(elm, field) ((elm)->field.sle_next)
-
-#define SLIST_FOREACH(var, head, field) \
- for((var) = SLIST_FIRST(head); \
- (var) != SLIST_END(head); \
- (var) = SLIST_NEXT(var, field))
-
-/*
- * Singly-linked List functions.
- */
-#define SLIST_INIT(head) { \
- SLIST_FIRST(head) = SLIST_END(head); \
-}
-
-#define SLIST_INSERT_AFTER(slistelm, elm, field) do { \
- (elm)->field.sle_next = (slistelm)->field.sle_next; \
- (slistelm)->field.sle_next = (elm); \
-} while (0)
-
-#define SLIST_INSERT_HEAD(head, elm, field) do { \
- (elm)->field.sle_next = (head)->slh_first; \
- (head)->slh_first = (elm); \
-} while (0)
-
-#define SLIST_REMOVE_HEAD(head, field) do { \
- (head)->slh_first = (head)->slh_first->field.sle_next; \
-} while (0)
-
-#define SLIST_REMOVE(head, elm, type, field) do { \
- if ((head)->slh_first == (elm)) { \
- SLIST_REMOVE_HEAD((head), field); \
- } \
- else { \
- struct type *curelm = (head)->slh_first; \
- while( curelm->field.sle_next != (elm) ) \
- curelm = curelm->field.sle_next; \
- curelm->field.sle_next = \
- curelm->field.sle_next->field.sle_next; \
- } \
-} while (0)
-
-/*
- * List definitions.
- */
-#define LIST_HEAD(name, type) \
-struct name { \
- struct type *lh_first; /* first element */ \
-}
-
-#define LIST_HEAD_INITIALIZER(head) \
- { NULL }
-
-#define LIST_ENTRY(type) \
-struct { \
- struct type *le_next; /* next element */ \
- struct type **le_prev; /* address of previous next element */ \
-}
-
-/*
- * List access methods
- */
-#define LIST_FIRST(head) ((head)->lh_first)
-#define LIST_END(head) NULL
-#define LIST_EMPTY(head) (LIST_FIRST(head) == LIST_END(head))
-#define LIST_NEXT(elm, field) ((elm)->field.le_next)
-
-#define LIST_FOREACH(var, head, field) \
- for((var) = LIST_FIRST(head); \
- (var)!= LIST_END(head); \
- (var) = LIST_NEXT(var, field))
-
-/*
- * List functions.
- */
-#define LIST_INIT(head) do { \
- LIST_FIRST(head) = LIST_END(head); \
-} while (0)
-
-#define LIST_INSERT_AFTER(listelm, elm, field) do { \
- if (((elm)->field.le_next = (listelm)->field.le_next) != NULL) \
- (listelm)->field.le_next->field.le_prev = \
- &(elm)->field.le_next; \
- (listelm)->field.le_next = (elm); \
- (elm)->field.le_prev = &(listelm)->field.le_next; \
-} while (0)
-
-#define LIST_INSERT_BEFORE(listelm, elm, field) do { \
- (elm)->field.le_prev = (listelm)->field.le_prev; \
- (elm)->field.le_next = (listelm); \
- *(listelm)->field.le_prev = (elm); \
- (listelm)->field.le_prev = &(elm)->field.le_next; \
-} while (0)
-
-#define LIST_INSERT_HEAD(head, elm, field) do { \
- if (((elm)->field.le_next = (head)->lh_first) != NULL) \
- (head)->lh_first->field.le_prev = &(elm)->field.le_next;\
- (head)->lh_first = (elm); \
- (elm)->field.le_prev = &(head)->lh_first; \
-} while (0)
-
-#define LIST_REMOVE(elm, field) do { \
- if ((elm)->field.le_next != NULL) \
- (elm)->field.le_next->field.le_prev = \
- (elm)->field.le_prev; \
- *(elm)->field.le_prev = (elm)->field.le_next; \
-} while (0)
-
-#define LIST_REPLACE(elm, elm2, field) do { \
- if (((elm2)->field.le_next = (elm)->field.le_next) != NULL) \
- (elm2)->field.le_next->field.le_prev = \
- &(elm2)->field.le_next; \
- (elm2)->field.le_prev = (elm)->field.le_prev; \
- *(elm2)->field.le_prev = (elm2); \
-} while (0)
-
-/*
- * Simple queue definitions.
- */
-#define SIMPLEQ_HEAD(name, type) \
-struct name { \
- struct type *sqh_first; /* first element */ \
- struct type **sqh_last; /* addr of last next element */ \
-}
-
-#define SIMPLEQ_HEAD_INITIALIZER(head) \
- { NULL, &(head).sqh_first }
-
-#define SIMPLEQ_ENTRY(type) \
-struct { \
- struct type *sqe_next; /* next element */ \
-}
-
-/*
- * Simple queue access methods.
- */
-#define SIMPLEQ_FIRST(head) ((head)->sqh_first)
-#define SIMPLEQ_END(head) NULL
-#define SIMPLEQ_EMPTY(head) (SIMPLEQ_FIRST(head) == SIMPLEQ_END(head))
-#define SIMPLEQ_NEXT(elm, field) ((elm)->field.sqe_next)
-
-#define SIMPLEQ_FOREACH(var, head, field) \
- for((var) = SIMPLEQ_FIRST(head); \
- (var) != SIMPLEQ_END(head); \
- (var) = SIMPLEQ_NEXT(var, field))
-
-/*
- * Simple queue functions.
- */
-#define SIMPLEQ_INIT(head) do { \
- (head)->sqh_first = NULL; \
- (head)->sqh_last = &(head)->sqh_first; \
-} while (0)
-
-#define SIMPLEQ_INSERT_HEAD(head, elm, field) do { \
- if (((elm)->field.sqe_next = (head)->sqh_first) == NULL) \
- (head)->sqh_last = &(elm)->field.sqe_next; \
- (head)->sqh_first = (elm); \
-} while (0)
-
-#define SIMPLEQ_INSERT_TAIL(head, elm, field) do { \
- (elm)->field.sqe_next = NULL; \
- *(head)->sqh_last = (elm); \
- (head)->sqh_last = &(elm)->field.sqe_next; \
-} while (0)
-
-#define SIMPLEQ_INSERT_AFTER(head, listelm, elm, field) do { \
- if (((elm)->field.sqe_next = (listelm)->field.sqe_next) == NULL)\
- (head)->sqh_last = &(elm)->field.sqe_next; \
- (listelm)->field.sqe_next = (elm); \
-} while (0)
-
-#define SIMPLEQ_REMOVE_HEAD(head, elm, field) do { \
- if (((head)->sqh_first = (elm)->field.sqe_next) == NULL) \
- (head)->sqh_last = &(head)->sqh_first; \
-} while (0)
-
-/*
- * Tail queue definitions.
- */
-#define TAILQ_HEAD(name, type) \
-struct name { \
- struct type *tqh_first; /* first element */ \
- struct type **tqh_last; /* addr of last next element */ \
-}
-
-#define TAILQ_HEAD_INITIALIZER(head) \
- { NULL, &(head).tqh_first }
-
-#define TAILQ_ENTRY(type) \
-struct { \
- struct type *tqe_next; /* next element */ \
- struct type **tqe_prev; /* address of previous next element */ \
-}
-
-/*
- * tail queue access methods
- */
-#define TAILQ_FIRST(head) ((head)->tqh_first)
-#define TAILQ_END(head) NULL
-#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next)
-#define TAILQ_LAST(head, headname) \
- (*(((struct headname *)((head)->tqh_last))->tqh_last))
-/* XXX */
-#define TAILQ_PREV(elm, headname, field) \
- (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last))
-#define TAILQ_EMPTY(head) \
- (TAILQ_FIRST(head) == TAILQ_END(head))
-
-#define TAILQ_FOREACH(var, head, field) \
- for((var) = TAILQ_FIRST(head); \
- (var) != TAILQ_END(head); \
- (var) = TAILQ_NEXT(var, field))
-
-#define TAILQ_FOREACH_REVERSE(var, head, field, headname) \
- for((var) = TAILQ_LAST(head, headname); \
- (var) != TAILQ_END(head); \
- (var) = TAILQ_PREV(var, headname, field))
-
-/*
- * Tail queue functions.
- */
-#define TAILQ_INIT(head) do { \
- (head)->tqh_first = NULL; \
- (head)->tqh_last = &(head)->tqh_first; \
-} while (0)
-
-#define TAILQ_INSERT_HEAD(head, elm, field) do { \
- if (((elm)->field.tqe_next = (head)->tqh_first) != NULL) \
- (head)->tqh_first->field.tqe_prev = \
- &(elm)->field.tqe_next; \
- else \
- (head)->tqh_last = &(elm)->field.tqe_next; \
- (head)->tqh_first = (elm); \
- (elm)->field.tqe_prev = &(head)->tqh_first; \
-} while (0)
-
-#define TAILQ_INSERT_TAIL(head, elm, field) do { \
- (elm)->field.tqe_next = NULL; \
- (elm)->field.tqe_prev = (head)->tqh_last; \
- *(head)->tqh_last = (elm); \
- (head)->tqh_last = &(elm)->field.tqe_next; \
-} while (0)
-
-#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \
- if (((elm)->field.tqe_next = (listelm)->field.tqe_next) != NULL)\
- (elm)->field.tqe_next->field.tqe_prev = \
- &(elm)->field.tqe_next; \
- else \
- (head)->tqh_last = &(elm)->field.tqe_next; \
- (listelm)->field.tqe_next = (elm); \
- (elm)->field.tqe_prev = &(listelm)->field.tqe_next; \
-} while (0)
-
-#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \
- (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \
- (elm)->field.tqe_next = (listelm); \
- *(listelm)->field.tqe_prev = (elm); \
- (listelm)->field.tqe_prev = &(elm)->field.tqe_next; \
-} while (0)
-
-#define TAILQ_REMOVE(head, elm, field) do { \
- if (((elm)->field.tqe_next) != NULL) \
- (elm)->field.tqe_next->field.tqe_prev = \
- (elm)->field.tqe_prev; \
- else \
- (head)->tqh_last = (elm)->field.tqe_prev; \
- *(elm)->field.tqe_prev = (elm)->field.tqe_next; \
-} while (0)
-
-#define TAILQ_REPLACE(head, elm, elm2, field) do { \
- if (((elm2)->field.tqe_next = (elm)->field.tqe_next) != NULL) \
- (elm2)->field.tqe_next->field.tqe_prev = \
- &(elm2)->field.tqe_next; \
- else \
- (head)->tqh_last = &(elm2)->field.tqe_next; \
- (elm2)->field.tqe_prev = (elm)->field.tqe_prev; \
- *(elm2)->field.tqe_prev = (elm2); \
-} while (0)
-
-/*
- * Circular queue definitions.
- */
-#define CIRCLEQ_HEAD(name, type) \
-struct name { \
- struct type *cqh_first; /* first element */ \
- struct type *cqh_last; /* last element */ \
-}
-
-#define CIRCLEQ_HEAD_INITIALIZER(head) \
- { CIRCLEQ_END(&head), CIRCLEQ_END(&head) }
-
-#define CIRCLEQ_ENTRY(type) \
-struct { \
- struct type *cqe_next; /* next element */ \
- struct type *cqe_prev; /* previous element */ \
-}
-
-/*
- * Circular queue access methods
- */
-#define CIRCLEQ_FIRST(head) ((head)->cqh_first)
-#define CIRCLEQ_LAST(head) ((head)->cqh_last)
-#define CIRCLEQ_END(head) ((void *)(head))
-#define CIRCLEQ_NEXT(elm, field) ((elm)->field.cqe_next)
-#define CIRCLEQ_PREV(elm, field) ((elm)->field.cqe_prev)
-#define CIRCLEQ_EMPTY(head) \
- (CIRCLEQ_FIRST(head) == CIRCLEQ_END(head))
-
-#define CIRCLEQ_FOREACH(var, head, field) \
- for((var) = CIRCLEQ_FIRST(head); \
- (var) != CIRCLEQ_END(head); \
- (var) = CIRCLEQ_NEXT(var, field))
-
-#define CIRCLEQ_FOREACH_REVERSE(var, head, field) \
- for((var) = CIRCLEQ_LAST(head); \
- (var) != CIRCLEQ_END(head); \
- (var) = CIRCLEQ_PREV(var, field))
-
-/*
- * Circular queue functions.
- */
-#define CIRCLEQ_INIT(head) do { \
- (head)->cqh_first = CIRCLEQ_END(head); \
- (head)->cqh_last = CIRCLEQ_END(head); \
-} while (0)
-
-#define CIRCLEQ_INSERT_AFTER(head, listelm, elm, field) do { \
- (elm)->field.cqe_next = (listelm)->field.cqe_next; \
- (elm)->field.cqe_prev = (listelm); \
- if ((listelm)->field.cqe_next == CIRCLEQ_END(head)) \
- (head)->cqh_last = (elm); \
- else \
- (listelm)->field.cqe_next->field.cqe_prev = (elm); \
- (listelm)->field.cqe_next = (elm); \
-} while (0)
-
-#define CIRCLEQ_INSERT_BEFORE(head, listelm, elm, field) do { \
- (elm)->field.cqe_next = (listelm); \
- (elm)->field.cqe_prev = (listelm)->field.cqe_prev; \
- if ((listelm)->field.cqe_prev == CIRCLEQ_END(head)) \
- (head)->cqh_first = (elm); \
- else \
- (listelm)->field.cqe_prev->field.cqe_next = (elm); \
- (listelm)->field.cqe_prev = (elm); \
-} while (0)
-
-#define CIRCLEQ_INSERT_HEAD(head, elm, field) do { \
- (elm)->field.cqe_next = (head)->cqh_first; \
- (elm)->field.cqe_prev = CIRCLEQ_END(head); \
- if ((head)->cqh_last == CIRCLEQ_END(head)) \
- (head)->cqh_last = (elm); \
- else \
- (head)->cqh_first->field.cqe_prev = (elm); \
- (head)->cqh_first = (elm); \
-} while (0)
-
-#define CIRCLEQ_INSERT_TAIL(head, elm, field) do { \
- (elm)->field.cqe_next = CIRCLEQ_END(head); \
- (elm)->field.cqe_prev = (head)->cqh_last; \
- if ((head)->cqh_first == CIRCLEQ_END(head)) \
- (head)->cqh_first = (elm); \
- else \
- (head)->cqh_last->field.cqe_next = (elm); \
- (head)->cqh_last = (elm); \
-} while (0)
-
-#define CIRCLEQ_REMOVE(head, elm, field) do { \
- if ((elm)->field.cqe_next == CIRCLEQ_END(head)) \
- (head)->cqh_last = (elm)->field.cqe_prev; \
- else \
- (elm)->field.cqe_next->field.cqe_prev = \
- (elm)->field.cqe_prev; \
- if ((elm)->field.cqe_prev == CIRCLEQ_END(head)) \
- (head)->cqh_first = (elm)->field.cqe_next; \
- else \
- (elm)->field.cqe_prev->field.cqe_next = \
- (elm)->field.cqe_next; \
-} while (0)
-
-#define CIRCLEQ_REPLACE(head, elm, elm2, field) do { \
- if (((elm2)->field.cqe_next = (elm)->field.cqe_next) == \
- CIRCLEQ_END(head)) \
- (head).cqh_last = (elm2); \
- else \
- (elm2)->field.cqe_next->field.cqe_prev = (elm2); \
- if (((elm2)->field.cqe_prev = (elm)->field.cqe_prev) == \
- CIRCLEQ_END(head)) \
- (head).cqh_first = (elm2); \
- else \
- (elm2)->field.cqe_prev->field.cqe_next = (elm2); \
-} while (0)
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_QUEUE_H */
diff --git a/usr/src/cmd/ssh/include/sys-tree.h b/usr/src/cmd/ssh/include/sys-tree.h
deleted file mode 100644
index fbd31a8f7c..0000000000
--- a/usr/src/cmd/ssh/include/sys-tree.h
+++ /dev/null
@@ -1,682 +0,0 @@
-/* $OpenBSD: tree.h,v 1.6 2002/06/11 22:09:52 provos Exp $ */
-/*
- * Copyright 2002 Niels Provos <provos@citi.umich.edu>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _SYS_TREE_H
-#define _SYS_TREE_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * This file defines data structures for different types of trees:
- * splay trees and red-black trees.
- *
- * A splay tree is a self-organizing data structure. Every operation
- * on the tree causes a splay to happen. The splay moves the requested
- * node to the root of the tree and partly rebalances it.
- *
- * This has the benefit that request locality causes faster lookups as
- * the requested nodes move to the top of the tree. On the other hand,
- * every lookup causes memory writes.
- *
- * The Balance Theorem bounds the total access time for m operations
- * and n inserts on an initially empty tree as O((m + n)lg n). The
- * amortized cost for a sequence of m accesses to a splay tree is O(lg n);
- *
- * A red-black tree is a binary search tree with the node color as an
- * extra attribute. It fulfills a set of conditions:
- * - every search path from the root to a leaf consists of the
- * same number of black nodes,
- * - each red node (except for the root) has a black parent,
- * - each leaf node is black.
- *
- * Every operation on a red-black tree is bounded as O(lg n).
- * The maximum height of a red-black tree is 2lg (n+1).
- */
-
-#define SPLAY_HEAD(name, type) \
-struct name { \
- struct type *sph_root; /* root of the tree */ \
-}
-
-#define SPLAY_INITIALIZER(root) \
- { NULL }
-
-#define SPLAY_INIT(root) do { \
- (root)->sph_root = NULL; \
-} while (0)
-
-#define SPLAY_ENTRY(type) \
-struct { \
- struct type *spe_left; /* left element */ \
- struct type *spe_right; /* right element */ \
-}
-
-#define SPLAY_LEFT(elm, field) (elm)->field.spe_left
-#define SPLAY_RIGHT(elm, field) (elm)->field.spe_right
-#define SPLAY_ROOT(head) (head)->sph_root
-#define SPLAY_EMPTY(head) (SPLAY_ROOT(head) == NULL)
-
-/* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */
-#define SPLAY_ROTATE_RIGHT(head, tmp, field) do { \
- SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field); \
- SPLAY_RIGHT(tmp, field) = (head)->sph_root; \
- (head)->sph_root = tmp; \
-} while (0)
-
-#define SPLAY_ROTATE_LEFT(head, tmp, field) do { \
- SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field); \
- SPLAY_LEFT(tmp, field) = (head)->sph_root; \
- (head)->sph_root = tmp; \
-} while (0)
-
-#define SPLAY_LINKLEFT(head, tmp, field) do { \
- SPLAY_LEFT(tmp, field) = (head)->sph_root; \
- tmp = (head)->sph_root; \
- (head)->sph_root = SPLAY_LEFT((head)->sph_root, field); \
-} while (0)
-
-#define SPLAY_LINKRIGHT(head, tmp, field) do { \
- SPLAY_RIGHT(tmp, field) = (head)->sph_root; \
- tmp = (head)->sph_root; \
- (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field); \
-} while (0)
-
-#define SPLAY_ASSEMBLE(head, node, left, right, field) do { \
- SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field); \
- SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field);\
- SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field); \
- SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field); \
-} while (0)
-
-/* Generates prototypes and inline functions */
-
-#define SPLAY_PROTOTYPE(name, type, field, cmp) \
-void name##_SPLAY(struct name *, struct type *); \
-void name##_SPLAY_MINMAX(struct name *, int); \
-struct type *name##_SPLAY_INSERT(struct name *, struct type *); \
-struct type *name##_SPLAY_REMOVE(struct name *, struct type *); \
- \
-/* Finds the node with the same key as elm */ \
-static __inline struct type * \
-name##_SPLAY_FIND(struct name *head, struct type *elm) \
-{ \
- if (SPLAY_EMPTY(head)) \
- return(NULL); \
- name##_SPLAY(head, elm); \
- if ((cmp)(elm, (head)->sph_root) == 0) \
- return (head->sph_root); \
- return (NULL); \
-} \
- \
-static __inline struct type * \
-name##_SPLAY_NEXT(struct name *head, struct type *elm) \
-{ \
- name##_SPLAY(head, elm); \
- if (SPLAY_RIGHT(elm, field) != NULL) { \
- elm = SPLAY_RIGHT(elm, field); \
- while (SPLAY_LEFT(elm, field) != NULL) { \
- elm = SPLAY_LEFT(elm, field); \
- } \
- } else \
- elm = NULL; \
- return (elm); \
-} \
- \
-static __inline struct type * \
-name##_SPLAY_MIN_MAX(struct name *head, int val) \
-{ \
- name##_SPLAY_MINMAX(head, val); \
- return (SPLAY_ROOT(head)); \
-}
-
-/* Main splay operation.
- * Moves node close to the key of elm to top
- */
-#define SPLAY_GENERATE(name, type, field, cmp) \
-struct type * \
-name##_SPLAY_INSERT(struct name *head, struct type *elm) \
-{ \
- if (SPLAY_EMPTY(head)) { \
- SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = NULL; \
- } else { \
- int __comp; \
- name##_SPLAY(head, elm); \
- __comp = (cmp)(elm, (head)->sph_root); \
- if(__comp < 0) { \
- SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field);\
- SPLAY_RIGHT(elm, field) = (head)->sph_root; \
- SPLAY_LEFT((head)->sph_root, field) = NULL; \
- } else if (__comp > 0) { \
- SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field);\
- SPLAY_LEFT(elm, field) = (head)->sph_root; \
- SPLAY_RIGHT((head)->sph_root, field) = NULL; \
- } else \
- return ((head)->sph_root); \
- } \
- (head)->sph_root = (elm); \
- return (NULL); \
-} \
- \
-struct type * \
-name##_SPLAY_REMOVE(struct name *head, struct type *elm) \
-{ \
- struct type *__tmp; \
- if (SPLAY_EMPTY(head)) \
- return (NULL); \
- name##_SPLAY(head, elm); \
- if ((cmp)(elm, (head)->sph_root) == 0) { \
- if (SPLAY_LEFT((head)->sph_root, field) == NULL) { \
- (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);\
- } else { \
- __tmp = SPLAY_RIGHT((head)->sph_root, field); \
- (head)->sph_root = SPLAY_LEFT((head)->sph_root, field);\
- name##_SPLAY(head, elm); \
- SPLAY_RIGHT((head)->sph_root, field) = __tmp; \
- } \
- return (elm); \
- } \
- return (NULL); \
-} \
- \
-void \
-name##_SPLAY(struct name *head, struct type *elm) \
-{ \
- struct type __node, *__left, *__right, *__tmp; \
- int __comp; \
-\
- SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\
- __left = __right = &__node; \
-\
- while ((__comp = (cmp)(elm, (head)->sph_root))) { \
- if (__comp < 0) { \
- __tmp = SPLAY_LEFT((head)->sph_root, field); \
- if (__tmp == NULL) \
- break; \
- if ((cmp)(elm, __tmp) < 0){ \
- SPLAY_ROTATE_RIGHT(head, __tmp, field); \
- if (SPLAY_LEFT((head)->sph_root, field) == NULL)\
- break; \
- } \
- SPLAY_LINKLEFT(head, __right, field); \
- } else if (__comp > 0) { \
- __tmp = SPLAY_RIGHT((head)->sph_root, field); \
- if (__tmp == NULL) \
- break; \
- if ((cmp)(elm, __tmp) > 0){ \
- SPLAY_ROTATE_LEFT(head, __tmp, field); \
- if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\
- break; \
- } \
- SPLAY_LINKRIGHT(head, __left, field); \
- } \
- } \
- SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \
-} \
- \
-/* Splay with either the minimum or the maximum element \
- * Used to find minimum or maximum element in tree. \
- */ \
-void name##_SPLAY_MINMAX(struct name *head, int __comp) \
-{ \
- struct type __node, *__left, *__right, *__tmp; \
-\
- SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\
- __left = __right = &__node; \
-\
- while (1) { \
- if (__comp < 0) { \
- __tmp = SPLAY_LEFT((head)->sph_root, field); \
- if (__tmp == NULL) \
- break; \
- if (__comp < 0){ \
- SPLAY_ROTATE_RIGHT(head, __tmp, field); \
- if (SPLAY_LEFT((head)->sph_root, field) == NULL)\
- break; \
- } \
- SPLAY_LINKLEFT(head, __right, field); \
- } else if (__comp > 0) { \
- __tmp = SPLAY_RIGHT((head)->sph_root, field); \
- if (__tmp == NULL) \
- break; \
- if (__comp > 0) { \
- SPLAY_ROTATE_LEFT(head, __tmp, field); \
- if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\
- break; \
- } \
- SPLAY_LINKRIGHT(head, __left, field); \
- } \
- } \
- SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \
-}
-
-#define SPLAY_NEGINF -1
-#define SPLAY_INF 1
-
-#define SPLAY_INSERT(name, x, y) name##_SPLAY_INSERT(x, y)
-#define SPLAY_REMOVE(name, x, y) name##_SPLAY_REMOVE(x, y)
-#define SPLAY_FIND(name, x, y) name##_SPLAY_FIND(x, y)
-#define SPLAY_NEXT(name, x, y) name##_SPLAY_NEXT(x, y)
-#define SPLAY_MIN(name, x) (SPLAY_EMPTY(x) ? NULL \
- : name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF))
-#define SPLAY_MAX(name, x) (SPLAY_EMPTY(x) ? NULL \
- : name##_SPLAY_MIN_MAX(x, SPLAY_INF))
-
-#define SPLAY_FOREACH(x, name, head) \
- for ((x) = SPLAY_MIN(name, head); \
- (x) != NULL; \
- (x) = SPLAY_NEXT(name, head, x))
-
-/* Macros that define a red-back tree */
-#define RB_HEAD(name, type) \
-struct name { \
- struct type *rbh_root; /* root of the tree */ \
-}
-
-#define RB_INITIALIZER(root) \
- { NULL }
-
-#define RB_INIT(root) do { \
- (root)->rbh_root = NULL; \
-} while (0)
-
-#define RB_BLACK 0
-#define RB_RED 1
-#define RB_ENTRY(type) \
-struct { \
- struct type *rbe_left; /* left element */ \
- struct type *rbe_right; /* right element */ \
- struct type *rbe_parent; /* parent element */ \
- int rbe_color; /* node color */ \
-}
-
-#define RB_LEFT(elm, field) (elm)->field.rbe_left
-#define RB_RIGHT(elm, field) (elm)->field.rbe_right
-#define RB_PARENT(elm, field) (elm)->field.rbe_parent
-#define RB_COLOR(elm, field) (elm)->field.rbe_color
-#define RB_ROOT(head) (head)->rbh_root
-#define RB_EMPTY(head) (RB_ROOT(head) == NULL)
-
-#define RB_SET(elm, parent, field) do { \
- RB_PARENT(elm, field) = parent; \
- RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL; \
- RB_COLOR(elm, field) = RB_RED; \
-} while (0)
-
-#define RB_SET_BLACKRED(black, red, field) do { \
- RB_COLOR(black, field) = RB_BLACK; \
- RB_COLOR(red, field) = RB_RED; \
-} while (0)
-
-#ifndef RB_AUGMENT
-#define RB_AUGMENT(x)
-#endif
-
-#define RB_ROTATE_LEFT(head, elm, tmp, field) do { \
- (tmp) = RB_RIGHT(elm, field); \
- if ((RB_RIGHT(elm, field) = RB_LEFT(tmp, field))) { \
- RB_PARENT(RB_LEFT(tmp, field), field) = (elm); \
- } \
- RB_AUGMENT(elm); \
- if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field))) { \
- if ((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \
- RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \
- else \
- RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \
- RB_AUGMENT(RB_PARENT(elm, field)); \
- } else \
- (head)->rbh_root = (tmp); \
- RB_LEFT(tmp, field) = (elm); \
- RB_PARENT(elm, field) = (tmp); \
- RB_AUGMENT(tmp); \
-} while (0)
-
-#define RB_ROTATE_RIGHT(head, elm, tmp, field) do { \
- (tmp) = RB_LEFT(elm, field); \
- if ((RB_LEFT(elm, field) = RB_RIGHT(tmp, field))) { \
- RB_PARENT(RB_RIGHT(tmp, field), field) = (elm); \
- } \
- RB_AUGMENT(elm); \
- if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field))) { \
- if ((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \
- RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \
- else \
- RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \
- RB_AUGMENT(RB_PARENT(elm, field)); \
- } else \
- (head)->rbh_root = (tmp); \
- RB_RIGHT(tmp, field) = (elm); \
- RB_PARENT(elm, field) = (tmp); \
- RB_AUGMENT(tmp); \
-} while (0)
-
-/* Generates prototypes and inline functions */
-#define RB_PROTOTYPE(name, type, field, cmp) \
-void name##_RB_INSERT_COLOR(struct name *, struct type *); \
-void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *);\
-struct type *name##_RB_REMOVE(struct name *, struct type *); \
-struct type *name##_RB_INSERT(struct name *, struct type *); \
-struct type *name##_RB_FIND(struct name *, struct type *); \
-struct type *name##_RB_NEXT(struct name *, struct type *); \
-struct type *name##_RB_MINMAX(struct name *, int);
-
-/* Main rb operation.
- * Moves node close to the key of elm to top
- */
-#define RB_GENERATE(name, type, field, cmp) \
-void \
-name##_RB_INSERT_COLOR(struct name *head, struct type *elm) \
-{ \
- struct type *parent, *gparent, *tmp; \
- while ((parent = RB_PARENT(elm, field)) && \
- RB_COLOR(parent, field) == RB_RED) { \
- gparent = RB_PARENT(parent, field); \
- if (parent == RB_LEFT(gparent, field)) { \
- tmp = RB_RIGHT(gparent, field); \
- if (tmp && RB_COLOR(tmp, field) == RB_RED) { \
- RB_COLOR(tmp, field) = RB_BLACK; \
- RB_SET_BLACKRED(parent, gparent, field);\
- elm = gparent; \
- continue; \
- } \
- if (RB_RIGHT(parent, field) == elm) { \
- RB_ROTATE_LEFT(head, parent, tmp, field);\
- tmp = parent; \
- parent = elm; \
- elm = tmp; \
- } \
- RB_SET_BLACKRED(parent, gparent, field); \
- RB_ROTATE_RIGHT(head, gparent, tmp, field); \
- } else { \
- tmp = RB_LEFT(gparent, field); \
- if (tmp && RB_COLOR(tmp, field) == RB_RED) { \
- RB_COLOR(tmp, field) = RB_BLACK; \
- RB_SET_BLACKRED(parent, gparent, field);\
- elm = gparent; \
- continue; \
- } \
- if (RB_LEFT(parent, field) == elm) { \
- RB_ROTATE_RIGHT(head, parent, tmp, field);\
- tmp = parent; \
- parent = elm; \
- elm = tmp; \
- } \
- RB_SET_BLACKRED(parent, gparent, field); \
- RB_ROTATE_LEFT(head, gparent, tmp, field); \
- } \
- } \
- RB_COLOR(head->rbh_root, field) = RB_BLACK; \
-} \
- \
-void \
-name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *elm) \
-{ \
- struct type *tmp; \
- while ((elm == NULL || RB_COLOR(elm, field) == RB_BLACK) && \
- elm != RB_ROOT(head)) { \
- if (RB_LEFT(parent, field) == elm) { \
- tmp = RB_RIGHT(parent, field); \
- if (RB_COLOR(tmp, field) == RB_RED) { \
- RB_SET_BLACKRED(tmp, parent, field); \
- RB_ROTATE_LEFT(head, parent, tmp, field);\
- tmp = RB_RIGHT(parent, field); \
- } \
- if ((RB_LEFT(tmp, field) == NULL || \
- RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
- (RB_RIGHT(tmp, field) == NULL || \
- RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
- RB_COLOR(tmp, field) = RB_RED; \
- elm = parent; \
- parent = RB_PARENT(elm, field); \
- } else { \
- if (RB_RIGHT(tmp, field) == NULL || \
- RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK) {\
- struct type *oleft; \
- if ((oleft = RB_LEFT(tmp, field)))\
- RB_COLOR(oleft, field) = RB_BLACK;\
- RB_COLOR(tmp, field) = RB_RED; \
- RB_ROTATE_RIGHT(head, tmp, oleft, field);\
- tmp = RB_RIGHT(parent, field); \
- } \
- RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
- RB_COLOR(parent, field) = RB_BLACK; \
- if (RB_RIGHT(tmp, field)) \
- RB_COLOR(RB_RIGHT(tmp, field), field) = RB_BLACK;\
- RB_ROTATE_LEFT(head, parent, tmp, field);\
- elm = RB_ROOT(head); \
- break; \
- } \
- } else { \
- tmp = RB_LEFT(parent, field); \
- if (RB_COLOR(tmp, field) == RB_RED) { \
- RB_SET_BLACKRED(tmp, parent, field); \
- RB_ROTATE_RIGHT(head, parent, tmp, field);\
- tmp = RB_LEFT(parent, field); \
- } \
- if ((RB_LEFT(tmp, field) == NULL || \
- RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
- (RB_RIGHT(tmp, field) == NULL || \
- RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
- RB_COLOR(tmp, field) = RB_RED; \
- elm = parent; \
- parent = RB_PARENT(elm, field); \
- } else { \
- if (RB_LEFT(tmp, field) == NULL || \
- RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) {\
- struct type *oright; \
- if ((oright = RB_RIGHT(tmp, field)))\
- RB_COLOR(oright, field) = RB_BLACK;\
- RB_COLOR(tmp, field) = RB_RED; \
- RB_ROTATE_LEFT(head, tmp, oright, field);\
- tmp = RB_LEFT(parent, field); \
- } \
- RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
- RB_COLOR(parent, field) = RB_BLACK; \
- if (RB_LEFT(tmp, field)) \
- RB_COLOR(RB_LEFT(tmp, field), field) = RB_BLACK;\
- RB_ROTATE_RIGHT(head, parent, tmp, field);\
- elm = RB_ROOT(head); \
- break; \
- } \
- } \
- } \
- if (elm) \
- RB_COLOR(elm, field) = RB_BLACK; \
-} \
- \
-struct type * \
-name##_RB_REMOVE(struct name *head, struct type *elm) \
-{ \
- struct type *child, *parent, *old = elm; \
- int color; \
- if (RB_LEFT(elm, field) == NULL) \
- child = RB_RIGHT(elm, field); \
- else if (RB_RIGHT(elm, field) == NULL) \
- child = RB_LEFT(elm, field); \
- else { \
- struct type *left; \
- elm = RB_RIGHT(elm, field); \
- while ((left = RB_LEFT(elm, field))) \
- elm = left; \
- child = RB_RIGHT(elm, field); \
- parent = RB_PARENT(elm, field); \
- color = RB_COLOR(elm, field); \
- if (child) \
- RB_PARENT(child, field) = parent; \
- if (parent) { \
- if (RB_LEFT(parent, field) == elm) \
- RB_LEFT(parent, field) = child; \
- else \
- RB_RIGHT(parent, field) = child; \
- RB_AUGMENT(parent); \
- } else \
- RB_ROOT(head) = child; \
- if (RB_PARENT(elm, field) == old) \
- parent = elm; \
- (elm)->field = (old)->field; \
- if (RB_PARENT(old, field)) { \
- if (RB_LEFT(RB_PARENT(old, field), field) == old)\
- RB_LEFT(RB_PARENT(old, field), field) = elm;\
- else \
- RB_RIGHT(RB_PARENT(old, field), field) = elm;\
- RB_AUGMENT(RB_PARENT(old, field)); \
- } else \
- RB_ROOT(head) = elm; \
- RB_PARENT(RB_LEFT(old, field), field) = elm; \
- if (RB_RIGHT(old, field)) \
- RB_PARENT(RB_RIGHT(old, field), field) = elm; \
- if (parent) { \
- left = parent; \
- do { \
- RB_AUGMENT(left); \
- } while ((left = RB_PARENT(left, field))); \
- } \
- goto color; \
- } \
- parent = RB_PARENT(elm, field); \
- color = RB_COLOR(elm, field); \
- if (child) \
- RB_PARENT(child, field) = parent; \
- if (parent) { \
- if (RB_LEFT(parent, field) == elm) \
- RB_LEFT(parent, field) = child; \
- else \
- RB_RIGHT(parent, field) = child; \
- RB_AUGMENT(parent); \
- } else \
- RB_ROOT(head) = child; \
-color: \
- if (color == RB_BLACK) \
- name##_RB_REMOVE_COLOR(head, parent, child); \
- return (old); \
-} \
- \
-/* Inserts a node into the RB tree */ \
-struct type * \
-name##_RB_INSERT(struct name *head, struct type *elm) \
-{ \
- struct type *tmp; \
- struct type *parent = NULL; \
- int comp = 0; \
- tmp = RB_ROOT(head); \
- while (tmp) { \
- parent = tmp; \
- comp = (cmp)(elm, parent); \
- if (comp < 0) \
- tmp = RB_LEFT(tmp, field); \
- else if (comp > 0) \
- tmp = RB_RIGHT(tmp, field); \
- else \
- return (tmp); \
- } \
- RB_SET(elm, parent, field); \
- if (parent != NULL) { \
- if (comp < 0) \
- RB_LEFT(parent, field) = elm; \
- else \
- RB_RIGHT(parent, field) = elm; \
- RB_AUGMENT(parent); \
- } else \
- RB_ROOT(head) = elm; \
- name##_RB_INSERT_COLOR(head, elm); \
- return (NULL); \
-} \
- \
-/* Finds the node with the same key as elm */ \
-struct type * \
-name##_RB_FIND(struct name *head, struct type *elm) \
-{ \
- struct type *tmp = RB_ROOT(head); \
- int comp; \
- while (tmp) { \
- comp = cmp(elm, tmp); \
- if (comp < 0) \
- tmp = RB_LEFT(tmp, field); \
- else if (comp > 0) \
- tmp = RB_RIGHT(tmp, field); \
- else \
- return (tmp); \
- } \
- return (NULL); \
-} \
- \
-struct type * \
-name##_RB_NEXT(struct name *head, struct type *elm) \
-{ \
- if (RB_RIGHT(elm, field)) { \
- elm = RB_RIGHT(elm, field); \
- while (RB_LEFT(elm, field)) \
- elm = RB_LEFT(elm, field); \
- } else { \
- if (RB_PARENT(elm, field) && \
- (elm == RB_LEFT(RB_PARENT(elm, field), field))) \
- elm = RB_PARENT(elm, field); \
- else { \
- while (RB_PARENT(elm, field) && \
- (elm == RB_RIGHT(RB_PARENT(elm, field), field)))\
- elm = RB_PARENT(elm, field); \
- elm = RB_PARENT(elm, field); \
- } \
- } \
- return (elm); \
-} \
- \
-struct type * \
-name##_RB_MINMAX(struct name *head, int val) \
-{ \
- struct type *tmp = RB_ROOT(head); \
- struct type *parent = NULL; \
- while (tmp) { \
- parent = tmp; \
- if (val < 0) \
- tmp = RB_LEFT(tmp, field); \
- else \
- tmp = RB_RIGHT(tmp, field); \
- } \
- return (parent); \
-}
-
-#define RB_NEGINF -1
-#define RB_INF 1
-
-#define RB_INSERT(name, x, y) name##_RB_INSERT(x, y)
-#define RB_REMOVE(name, x, y) name##_RB_REMOVE(x, y)
-#define RB_FIND(name, x, y) name##_RB_FIND(x, y)
-#define RB_NEXT(name, x, y) name##_RB_NEXT(x, y)
-#define RB_MIN(name, x) name##_RB_MINMAX(x, RB_NEGINF)
-#define RB_MAX(name, x) name##_RB_MINMAX(x, RB_INF)
-
-#define RB_FOREACH(x, name, head) \
- for ((x) = RB_MIN(name, head); \
- (x) != NULL; \
- (x) = name##_RB_NEXT(head, x))
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_TREE_H */
diff --git a/usr/src/cmd/ssh/include/tildexpand.h b/usr/src/cmd/ssh/include/tildexpand.h
deleted file mode 100644
index 22e95b7779..0000000000
--- a/usr/src/cmd/ssh/include/tildexpand.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-/* $OpenBSD: tildexpand.h,v 1.4 2001/06/26 17:27:25 markus Exp $ */
-
-#ifndef _TILDEXPAND_H
-#define _TILDEXPAND_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-char *tilde_expand_filename(const char *, uid_t);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _TILDEXPAND_H */
diff --git a/usr/src/cmd/ssh/include/ttymodes.h b/usr/src/cmd/ssh/include/ttymodes.h
deleted file mode 100644
index d464e7f308..0000000000
--- a/usr/src/cmd/ssh/include/ttymodes.h
+++ /dev/null
@@ -1,190 +0,0 @@
-/* $OpenBSD: ttymodes.h,v 1.12 2002/03/04 17:27:39 stevesk Exp $ */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * NOTE: This file MUST NOT have a header guard added!!!
- *
- * This header is included twice in ttymodes.c, which defines the TTYCHAR()
- * and TTYMODE() macros, used below, twice, once prior to inclusion of this
- * file in tty_make_modes() and once prior to inclusion of this file in
- * tty_parse_modes().
- */
-
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-/*
- * SSH2 tty modes support by Kevin Steves.
- * Copyright (c) 2001 Kevin Steves. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/*
- * SSH1:
- * The tty mode description is a stream of bytes. The stream consists of
- * opcode-arguments pairs. It is terminated by opcode TTY_OP_END (0).
- * Opcodes 1-127 have one-byte arguments. Opcodes 128-159 have integer
- * arguments. Opcodes 160-255 are not yet defined, and cause parsing to
- * stop (they should only be used after any other data).
- *
- * SSH2:
- * Differences between SSH1 and SSH2 terminal mode encoding include:
- * 1. Encoded terminal modes are represented as a string, and a stream
- * of bytes within that string.
- * 2. Opcode arguments are uint32 (1-159); 160-255 remain undefined.
- * 3. The values for TTY_OP_ISPEED and TTY_OP_OSPEED are different;
- * 128 and 129 vs. 192 and 193 respectively.
- *
- * The client puts in the stream any modes it knows about, and the
- * server ignores any modes it does not know about. This allows some degree
- * of machine-independence, at least between systems that use a posix-like
- * tty interface. The protocol can support other systems as well, but might
- * require reimplementing as mode names would likely be different.
- */
-
-/*
- * Some constants and prototypes are defined in packet.h; this file
- * is only intended for including from ttymodes.c.
- */
-
-/* termios macro */
-/* name, op */
-TTYCHAR(VINTR, 1)
-TTYCHAR(VQUIT, 2)
-TTYCHAR(VERASE, 3)
-#if defined(VKILL)
-TTYCHAR(VKILL, 4)
-#endif /* VKILL */
-TTYCHAR(VEOF, 5)
-#if defined(VEOL)
-TTYCHAR(VEOL, 6)
-#endif /* VEOL */
-#ifdef VEOL2
-TTYCHAR(VEOL2, 7)
-#endif /* VEOL2 */
-TTYCHAR(VSTART, 8)
-TTYCHAR(VSTOP, 9)
-#if defined(VSUSP)
-TTYCHAR(VSUSP, 10)
-#endif /* VSUSP */
-#if defined(VDSUSP)
-TTYCHAR(VDSUSP, 11)
-#endif /* VDSUSP */
-#if defined(VREPRINT)
-TTYCHAR(VREPRINT, 12)
-#endif /* VREPRINT */
-#if defined(VWERASE)
-TTYCHAR(VWERASE, 13)
-#endif /* VWERASE */
-#if defined(VLNEXT)
-TTYCHAR(VLNEXT, 14)
-#endif /* VLNEXT */
-#if defined(VFLUSH)
-TTYCHAR(VFLUSH, 15)
-#endif /* VFLUSH */
-#ifdef VSWTCH
-TTYCHAR(VSWTCH, 16)
-#endif /* VSWTCH */
-#if defined(VSTATUS)
-TTYCHAR(VSTATUS, 17)
-#endif /* VSTATUS */
-#ifdef VDISCARD
-TTYCHAR(VDISCARD, 18)
-#endif /* VDISCARD */
-
-/* name, field, op */
-TTYMODE(IGNPAR, c_iflag, 30)
-TTYMODE(PARMRK, c_iflag, 31)
-TTYMODE(INPCK, c_iflag, 32)
-TTYMODE(ISTRIP, c_iflag, 33)
-TTYMODE(INLCR, c_iflag, 34)
-TTYMODE(IGNCR, c_iflag, 35)
-TTYMODE(ICRNL, c_iflag, 36)
-#if defined(IUCLC)
-TTYMODE(IUCLC, c_iflag, 37)
-#endif
-TTYMODE(IXON, c_iflag, 38)
-TTYMODE(IXANY, c_iflag, 39)
-TTYMODE(IXOFF, c_iflag, 40)
-#ifdef IMAXBEL
-TTYMODE(IMAXBEL,c_iflag, 41)
-#endif /* IMAXBEL */
-
-TTYMODE(ISIG, c_lflag, 50)
-TTYMODE(ICANON, c_lflag, 51)
-#ifdef XCASE
-TTYMODE(XCASE, c_lflag, 52)
-#endif
-TTYMODE(ECHO, c_lflag, 53)
-TTYMODE(ECHOE, c_lflag, 54)
-TTYMODE(ECHOK, c_lflag, 55)
-TTYMODE(ECHONL, c_lflag, 56)
-TTYMODE(NOFLSH, c_lflag, 57)
-TTYMODE(TOSTOP, c_lflag, 58)
-#ifdef IEXTEN
-TTYMODE(IEXTEN, c_lflag, 59)
-#endif /* IEXTEN */
-#if defined(ECHOCTL)
-TTYMODE(ECHOCTL,c_lflag, 60)
-#endif /* ECHOCTL */
-#ifdef ECHOKE
-TTYMODE(ECHOKE, c_lflag, 61)
-#endif /* ECHOKE */
-#if defined(PENDIN)
-TTYMODE(PENDIN, c_lflag, 62)
-#endif /* PENDIN */
-
-TTYMODE(OPOST, c_oflag, 70)
-#if defined(OLCUC)
-TTYMODE(OLCUC, c_oflag, 71)
-#endif
-#ifdef ONLCR
-TTYMODE(ONLCR, c_oflag, 72)
-#endif
-#ifdef OCRNL
-TTYMODE(OCRNL, c_oflag, 73)
-#endif
-#ifdef ONOCR
-TTYMODE(ONOCR, c_oflag, 74)
-#endif
-#ifdef ONLRET
-TTYMODE(ONLRET, c_oflag, 75)
-#endif
-
-TTYMODE(CS7, c_cflag, 90)
-TTYMODE(CS8, c_cflag, 91)
-TTYMODE(PARENB, c_cflag, 92)
-TTYMODE(PARODD, c_cflag, 93)
diff --git a/usr/src/cmd/ssh/include/uidswap.h b/usr/src/cmd/ssh/include/uidswap.h
deleted file mode 100644
index 5444f02d32..0000000000
--- a/usr/src/cmd/ssh/include/uidswap.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-/* $OpenBSD: uidswap.h,v 1.9 2001/06/26 17:27:25 markus Exp $ */
-
-#ifndef _UIDSWAP_H
-#define _UIDSWAP_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void temporarily_use_uid(struct passwd *);
-void restore_uid(void);
-void permanently_set_uid(struct passwd *, char *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _UIDSWAP_H */
diff --git a/usr/src/cmd/ssh/include/uuencode.h b/usr/src/cmd/ssh/include/uuencode.h
deleted file mode 100644
index 89ba430779..0000000000
--- a/usr/src/cmd/ssh/include/uuencode.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/* $OpenBSD: uuencode.h,v 1.9 2002/02/25 16:33:27 markus Exp $ */
-
-#ifndef _UUENCODE_H
-#define _UUENCODE_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Copyright (c) 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-int uuencode(u_char *, u_int, char *, size_t);
-int uudecode(const char *, u_char *, size_t);
-void dump_base64(FILE *, u_char *, u_int);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _UUENCODE_H */
diff --git a/usr/src/cmd/ssh/include/version.h b/usr/src/cmd/ssh/include/version.h
deleted file mode 100644
index 7d641b6967..0000000000
--- a/usr/src/cmd/ssh/include/version.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-/* $OpenBSD: version.h,v 1.35 2002/10/01 13:24:50 markus Exp $ */
-
-#ifndef _VERSION_H
-#define _VERSION_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/* #define SSH_VERSION "OpenSSH_3.5p1" */
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _VERSION_H */
diff --git a/usr/src/cmd/ssh/include/xlist.h b/usr/src/cmd/ssh/include/xlist.h
deleted file mode 100644
index a6675c246b..0000000000
--- a/usr/src/cmd/ssh/include/xlist.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef _XLIST_H
-#define _XLIST_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-char ** xsplit(char *list, char sep);
-char * xjoin(char **alist, char sep);
-void xfree_split_list(char **list);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _XLIST_H */
diff --git a/usr/src/cmd/ssh/include/xmalloc.h b/usr/src/cmd/ssh/include/xmalloc.h
deleted file mode 100644
index 6477737a4e..0000000000
--- a/usr/src/cmd/ssh/include/xmalloc.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* $OpenBSD: xmalloc.h,v 1.9 2002/06/19 00:27:55 deraadt Exp $ */
-
-#ifndef _XMALLOC_H
-#define _XMALLOC_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Created: Mon Mar 20 22:09:17 1995 ylo
- *
- * Versions of malloc and friends that check their results, and never return
- * failure (they call fatal if they encounter an error).
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-void *xmalloc(size_t);
-void *xcalloc(size_t, size_t);
-void *xrealloc(void *, size_t);
-void xfree(void *);
-char *xstrdup(const char *);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _XMALLOC_H */
diff --git a/usr/src/cmd/ssh/include/xmmap.h b/usr/src/cmd/ssh/include/xmmap.h
deleted file mode 100644
index 09723b4ecf..0000000000
--- a/usr/src/cmd/ssh/include/xmmap.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _XMMAP_H
-#define _XMMAP_H
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-void *xmmap(size_t size);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _XMMAP_H */
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/Makefile.com b/usr/src/cmd/ssh/libopenbsd-compat/Makefile.com
deleted file mode 100644
index 0b8ad1622b..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/Makefile.com
+++ /dev/null
@@ -1,92 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
-#
-# cmd/ssh/libopenbsd-compat/Makefile.com
-#
-
-LIBRARY = libopenbsd-compat.a
-VERS = .1
-
-OBJECTS = \
- bsd-arc4random.o \
- bsd-cray.o \
- bsd-cygwin_util.o \
- bsd-getpeereid.o \
- bsd-misc.o \
- bsd-asprintf.o \
- bsd-snprintf.o \
- bsd-waitpid.o \
- fake-getaddrinfo.o \
- fake-getnameinfo.o \
- xmmap.o \
- base64.o \
- bindresvport.o \
- dirname.o \
- getcwd.o \
- getgrouplist.o \
- getopt.o \
- glob.o \
- inet_aton.o \
- inet_ntoa.o \
- inet_ntop.o \
- mktemp.o \
- readpassphrase.o \
- realpath.o \
- rresvport.o \
- setenv.o \
- setproctitle.o \
- sigact.o \
- strlcat.o \
- strlcpy.o \
- strmode.o \
- port-irix.o \
- port-aix.o
-
-include $(SRC)/lib/Makefile.lib
-
-BUILD.AR = $(RM) $@ ; $(AR) $(ARFLAGS) $@ $(AROBJS)
-
-SRCDIR = ../common
-SRCS = $(OBJECTS:%.o=../common/%.c)
-
-LIBS = $(LIBRARY) $(LINTLIB)
-
-$(LINTLIB) := SRCS = $(SRCDIR)/$(LINTSRC)
-
-POFILE_DIR = ../..
-
-.KEEP_STATE:
-
-all: $(LIBS)
-
-# lint requires the (non-installed) lint library
-lint: $(LINTLIB) .WAIT lintcheck
-
-include $(SRC)/lib/Makefile.targ
-
-objs/%.o: $(SRCDIR)/%.c
- $(COMPILE.c) -o $@ $<
- $(POST_PROCESS_O)
-
-include ../../Makefile.ssh-common
-include ../../Makefile.msg.targ
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/bindresvport.c b/usr/src/cmd/ssh/libopenbsd-compat/common/bindresvport.c
deleted file mode 100644
index 8432233bb6..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/bindresvport.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/* This file has be modified from the original OpenBSD source */
-
-/*
- * Sun RPC is a product of Sun Microsystems, Inc. and is provided for
- * unrestricted use provided that this legend is included on all tape
- * media and as a part of the software program in whole or part. Users
- * may copy or modify Sun RPC without charge, but are not authorized
- * to license or distribute it to anyone else except as part of a product or
- * program developed by the user.
- *
- * SUN RPC IS PROVIDED AS IS WITH NO WARRANTIES OF ANY KIND INCLUDING THE
- * WARRANTIES OF DESIGN, MERCHANTIBILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE, OR ARISING FROM A COURSE OF DEALING, USAGE OR TRADE PRACTICE.
- *
- * Sun RPC is provided with no support and without any obligation on the
- * part of Sun Microsystems, Inc. to assist in its use, correction,
- * modification or enhancement.
- *
- * SUN MICROSYSTEMS, INC. SHALL HAVE NO LIABILITY WITH RESPECT TO THE
- * INFRINGEMENT OF COPYRIGHTS, TRADE SECRETS OR ANY PATENTS BY SUN RPC
- * OR ANY PART THEREOF.
- *
- * In no event will Sun Microsystems, Inc. be liable for any lost revenue
- * or profits or other special, indirect and consequential damages, even if
- * Sun has been advised of the possibility of such damages.
- *
- * Sun Microsystems, Inc.
- * 2550 Garcia Avenue
- * Mountain View, California 94043
- */
-
-#include "includes.h"
-
-#ifndef HAVE_BINDRESVPORT_SA
-
-#if defined(LIBC_SCCS) && !defined(lint)
-static char *rcsid = "$OpenBSD: bindresvport.c,v 1.13 2000/01/26 03:43:21 deraadt Exp $";
-#endif /* LIBC_SCCS and not lint */
-
-/*
- * Copyright (c) 1987 by Sun Microsystems, Inc.
- *
- * Portions Copyright(C) 1996, Jason Downs. All rights reserved.
- */
-
-#include "includes.h"
-
-#define STARTPORT 600
-#define ENDPORT (IPPORT_RESERVED - 1)
-#define NPORTS (ENDPORT - STARTPORT + 1)
-
-/*
- * Bind a socket to a privileged IP port
- */
-int
-bindresvport_sa(sd, sa)
- int sd;
- struct sockaddr *sa;
-{
- int error, af;
- struct sockaddr_storage myaddr;
- struct sockaddr_in *sin;
- struct sockaddr_in6 *sin6;
- u_int16_t *portp;
- u_int16_t port;
- socklen_t salen;
- int i;
-
- if (sa == NULL) {
- memset(&myaddr, 0, sizeof(myaddr));
- sa = (struct sockaddr *)&myaddr;
-
- if (getsockname(sd, sa, &salen) == -1)
- return -1; /* errno is correctly set */
-
- af = sa->sa_family;
- memset(&myaddr, 0, salen);
- } else
- af = sa->sa_family;
-
- if (af == AF_INET) {
- /* LINTED */
- sin = (struct sockaddr_in *)sa;
- salen = sizeof(struct sockaddr_in);
- portp = &sin->sin_port;
- } else if (af == AF_INET6) {
- /* LINTED */
- sin6 = (struct sockaddr_in6 *)sa;
- salen = sizeof(struct sockaddr_in6);
- portp = &sin6->sin6_port;
- } else {
- errno = EPFNOSUPPORT;
- return (-1);
- }
- sa->sa_family = af;
-
- port = ntohs(*portp);
- if (port == 0)
- port = (arc4random() % NPORTS) + STARTPORT;
-
- /* Avoid warning */
- error = -1;
-
- for(i = 0; i < NPORTS; i++) {
- *portp = htons(port);
-
- error = bind(sd, sa, salen);
-
- /* Terminate on success */
- if (error == 0)
- break;
-
- /* Terminate on errors, except "address already in use" */
- if ((error < 0) && !((errno == EADDRINUSE) || (errno == EINVAL)))
- break;
-
- port++;
- if (port > ENDPORT)
- port = STARTPORT;
- }
-
- return (error);
-}
-
-#endif /* HAVE_BINDRESVPORT_SA */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/bsd-arc4random.c b/usr/src/cmd/ssh/libopenbsd-compat/common/bsd-arc4random.c
deleted file mode 100644
index 3dac16f70a..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/bsd-arc4random.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * Copyright (c) 1999-2000 Damien Miller. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-#include "log.h"
-
-RCSID("$Id: bsd-arc4random.c,v 1.5 2002/05/08 22:57:18 tim Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifndef HAVE_ARC4RANDOM
-
-#include <openssl/rand.h>
-#include <openssl/rc4.h>
-#include <openssl/err.h>
-
-/* Size of key to use */
-#define SEED_SIZE 20
-
-/* Number of bytes to reseed after */
-#define REKEY_BYTES (1 << 24)
-
-static int rc4_ready = 0;
-static RC4_KEY rc4;
-
-unsigned int arc4random(void)
-{
- unsigned int r = 0;
- static int first_time = 1;
-
- if (rc4_ready <= 0) {
- if (first_time)
- seed_rng();
- first_time = 0;
- arc4random_stir();
- }
-
- RC4(&rc4, sizeof(r), (unsigned char *)&r, (unsigned char *)&r);
-
- rc4_ready -= sizeof(r);
-
- return(r);
-}
-
-void arc4random_stir(void)
-{
- unsigned char rand_buf[SEED_SIZE];
-
- memset(&rc4, 0, sizeof(rc4));
- if (!RAND_bytes(rand_buf, sizeof(rand_buf)))
- fatal("Couldn't obtain random bytes (error %ld)",
- ERR_get_error());
- RC4_set_key(&rc4, sizeof(rand_buf), rand_buf);
- memset(rand_buf, 0, sizeof(rand_buf));
-
- rc4_ready = REKEY_BYTES;
-}
-#endif /* !HAVE_ARC4RANDOM */
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/bsd-asprintf.c b/usr/src/cmd/ssh/libopenbsd-compat/common/bsd-asprintf.c
deleted file mode 100644
index f06e7415de..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/bsd-asprintf.c
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2004 Darren Tucker.
- *
- * Based originally on asprintf.c from OpenBSD:
- * Copyright (c) 1997 Todd C. Miller <Todd.Miller@courtesan.com>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "includes.h"
-
-#ifndef HAVE_VASPRINTF
-
-#include <errno.h>
-#include <stdarg.h>
-#include <stdlib.h>
-
-#ifndef VA_COPY
-#ifdef HAVE_VA_COPY
-#define VA_COPY(dest, src) va_copy(dest, src)
-#else
-#ifdef HAVE___VA_COPY
-#define VA_COPY(dest, src) __va_copy(dest, src)
-#else
-#define VA_COPY(dest, src) (dest) = (src)
-#endif
-#endif
-#endif
-
-#define INIT_SZ 128
-
-int
-vasprintf(char **str, const char *fmt, va_list ap)
-{
- int ret = -1;
- va_list ap2;
- char *string, *newstr;
- size_t len;
-
- VA_COPY(ap2, ap);
- if ((string = malloc(INIT_SZ)) == NULL)
- goto fail;
-
- ret = vsnprintf(string, INIT_SZ, fmt, ap2);
- if (ret >= 0 && ret < INIT_SZ) { /* succeeded with initial alloc */
- *str = string;
- } else if (ret == INT_MAX || ret < 0) { /* Bad length */
- free(string);
- goto fail;
- } else { /* bigger than initial, realloc allowing for nul */
- len = (size_t)ret + 1;
- if ((newstr = realloc(string, len)) == NULL) {
- free(string);
- goto fail;
- } else {
- va_end(ap2);
- VA_COPY(ap2, ap);
- ret = vsnprintf(newstr, len, fmt, ap2);
- if (ret >= 0 && (size_t)ret < len) {
- *str = newstr;
- } else { /* failed with realloc'ed string, give up */
- free(newstr);
- goto fail;
- }
- }
- }
- va_end(ap2);
- return (ret);
-
-fail:
- *str = NULL;
- errno = ENOMEM;
- va_end(ap2);
- return (-1);
-}
-#endif
-
-#ifndef HAVE_ASPRINTF
-int
-asprintf(char **str, const char *fmt, ...)
-{
- va_list ap;
- int ret;
-
- *str = NULL;
- va_start(ap, fmt);
- ret = vasprintf(str, fmt, ap);
- va_end(ap);
-
- return (ret);
-}
-#endif
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/bsd-cray.c b/usr/src/cmd/ssh/libopenbsd-compat/common/bsd-cray.c
deleted file mode 100644
index dd3bd2cac5..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/bsd-cray.c
+++ /dev/null
@@ -1,802 +0,0 @@
-/*
- * $Id: bsd-cray.c,v 1.8 2002/09/26 00:38:51 tim Exp $
- *
- * bsd-cray.c
- *
- * Copyright (c) 2002, Cray Inc. (Wendy Palm <wendyp@cray.com>)
- * Significant portions provided by
- * Wayne Schroeder, SDSC <schroeder@sdsc.edu>
- * William Jones, UTexas <jones@tacc.utexas.edu>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Created: Apr 22 16.34:00 2002 wp
- *
- * This file contains functions required for proper execution
- * on UNICOS systems.
- *
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "includes.h"
-
-#ifdef _UNICOS
-
-#include <udb.h>
-#include <tmpdir.h>
-#include <unistd.h>
-#include <sys/category.h>
-#include <utmp.h>
-#include <sys/jtab.h>
-#include <signal.h>
-#include <sys/priv.h>
-#include <sys/secparm.h>
-#include <sys/tfm.h>
-#include <sys/usrv.h>
-#include <sys/sysv.h>
-#include <sys/sectab.h>
-#include <sys/secstat.h>
-#include <sys/stat.h>
-#include <sys/session.h>
-#include <stdlib.h>
-#include <pwd.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <ia.h>
-#include <urm.h>
-#include "ssh.h"
-#include "log.h"
-#include "servconf.h"
-#include "bsd-cray.h"
-
-#define MAXACID 80
-
-extern ServerOptions options;
-
-char cray_tmpdir[TPATHSIZ+1]; /* job TMPDIR path */
-
-struct sysv sysv; /* system security structure */
-struct usrv usrv; /* user security structure */
-
-/*
- * Functions.
- */
-void cray_retain_utmp(struct utmp *, int);
-void cray_delete_tmpdir(char *, int, uid_t);
-void cray_init_job(struct passwd *);
-void cray_set_tmpdir(struct utmp *);
-void cray_login_failure(char *, int);
-int cray_setup(uid_t, char *, const char *);
-int cray_access_denied(char *);
-
-void
-cray_login_failure(char *username, int errcode)
-{
- struct udb *ueptr; /* UDB pointer for username */
- ia_failure_t fsent; /* ia_failure structure */
- ia_failure_ret_t fret; /* ia_failure return stuff */
- struct jtab jtab; /* job table structure */
- int jid = 0; /* job id */
-
- if ((jid = getjtab(&jtab)) < 0) {
- debug("cray_login_failure(): getjtab error");
- }
- getsysudb();
- if ((ueptr = getudbnam(username)) == UDB_NULL) {
- debug("cray_login_failure(): getudbname() returned NULL");
- }
- endudb();
- fsent.revision = 0;
- fsent.uname = username;
- fsent.host = (char *)get_canonical_hostname(options.verify_reverse_mapping);
- fsent.ttyn = "sshd";
- fsent.caller = IA_SSHD;
- fsent.flags = IA_INTERACTIVE;
- fsent.ueptr = ueptr;
- fsent.jid = jid;
- fsent.errcode = errcode;
- fsent.pwdp = NULL;
- fsent.exitcode = 0; /* dont exit in ia_failure() */
-
- fret.revision = 0;
- fret.normal = 0;
-
- /*
- * Call ia_failure because of an login failure.
- */
- ia_failure(&fsent,&fret);
-}
-
-/*
- * Cray access denied
- */
-int
-cray_access_denied(char *username)
-{
- struct udb *ueptr; /* UDB pointer for username */
- int errcode; /* IA errorcode */
-
- errcode = 0;
- getsysudb();
- if ((ueptr = getudbnam(username)) == UDB_NULL) {
- debug("cray_login_failure(): getudbname() returned NULL");
- }
- endudb();
- if (ueptr && ueptr->ue_disabled)
- errcode = IA_DISABLED;
- if (errcode)
- cray_login_failure(username, errcode);
- return (errcode);
-}
-
-int
-cray_setup (uid_t uid, char *username, const char *command)
-{
- extern struct udb *getudb();
- extern char *setlimits();
-
- int err; /* error return */
- time_t system_time; /* current system clock */
- time_t expiration_time; /* password expiration time */
- int maxattempts; /* maximum no. of failed login attempts */
- int SecureSys; /* unicos security flag */
- int minslevel = 0; /* system minimum security level */
- int i, j;
- int valid_acct = -1; /* flag for reading valid acct */
- char acct_name[MAXACID] = { "" }; /* used to read acct name */
- struct jtab jtab; /* Job table struct */
- struct udb ue; /* udb entry for logging-in user */
- struct udb *up; /* pointer to UDB entry */
- struct secstat secinfo; /* file security attributes */
- struct servprov init_info; /* used for sesscntl() call */
- int jid; /* job ID */
- int pid; /* process ID */
- char *sr; /* status return from setlimits() */
- char *ttyn = NULL; /* ttyname or command name*/
- char hostname[MAXHOSTNAMELEN];
- passwd_t pwdacm,
- pwddialup,
- pwdudb,
- pwdwal,
- pwddce; /* passwd stuff for ia_user */
- ia_user_ret_t uret; /* stuff returned from ia_user */
- ia_user_t usent; /* ia_user main structure */
- int ia_rcode; /* ia_user return code */
- ia_failure_t fsent; /* ia_failure structure */
- ia_failure_ret_t fret; /* ia_failure return stuff */
- ia_success_t ssent; /* ia_success structure */
- ia_success_ret_t sret; /* ia_success return stuff */
- int ia_mlsrcode; /* ia_mlsuser return code */
- int secstatrc; /* [f]secstat return code */
-
- if (SecureSys = (int)sysconf(_SC_CRAY_SECURE_SYS)) {
- getsysv(&sysv, sizeof(struct sysv));
- minslevel = sysv.sy_minlvl;
- if (getusrv(&usrv) < 0) {
- debug("getusrv() failed, errno = %d",errno);
- exit(1);
- }
- }
- hostname[0] = '\0';
- strncpy(hostname,
- (char *)get_canonical_hostname(options.verify_reverse_mapping),
- MAXHOSTNAMELEN);
- /*
- * Fetch user's UDB entry.
- */
- getsysudb();
- if ((up = getudbnam(username)) == UDB_NULL) {
- debug("cannot fetch user's UDB entry");
- exit(1);
- }
-
- /*
- * Prevent any possible fudging so perform a data
- * safety check and compare the supplied uid against
- * the udb's uid.
- */
- if (up->ue_uid != uid) {
- debug("IA uid missmatch");
- exit(1);
- }
- endudb();
-
- if ((jid = getjtab (&jtab)) < 0) {
- debug("getjtab");
- return -1;
- }
- pid = getpid();
- ttyn = ttyname(0);
- if (SecureSys) {
- if (ttyn) {
- secstatrc = secstat(ttyn, &secinfo);
- } else {
- secstatrc = fsecstat(1, &secinfo);
- }
- if (secstatrc == 0) {
- debug("[f]secstat() successful");
- } else {
- debug("[f]secstat() error, rc = %d", secstatrc);
- exit(1);
- }
- }
- if ((ttyn == NULL) && ((char *)command != NULL))
- ttyn = (char *)command;
- /*
- * Initialize all structures to call ia_user
- */
- usent.revision = 0;
- usent.uname = username;
- usent.host = hostname;
- usent.ttyn = ttyn;
- usent.caller = IA_SSHD;
- usent.pswdlist = &pwdacm;
- usent.ueptr = &ue;
- usent.flags = IA_INTERACTIVE | IA_FFLAG;
- pwdacm.atype = IA_SECURID;
- pwdacm.pwdp = NULL;
- pwdacm.next = &pwdudb;
-
- pwdudb.atype = IA_UDB;
- pwdudb.pwdp = NULL;
- pwdudb.next = &pwddce;
-
- pwddce.atype = IA_DCE;
- pwddce.pwdp = NULL;
- pwddce.next = &pwddialup;
-
- pwddialup.atype = IA_DIALUP;
- pwddialup.pwdp = NULL;
- /* pwddialup.next = &pwdwal; */
- pwddialup.next = NULL;
-
- pwdwal.atype = IA_WAL;
- pwdwal.pwdp = NULL;
- pwdwal.next = NULL;
-
- uret.revision = 0;
- uret.pswd = NULL;
- uret.normal = 0;
-
- ia_rcode = ia_user(&usent, &uret);
-
- switch (ia_rcode) {
- /*
- * These are acceptable return codes from ia_user()
- */
- case IA_UDBWEEK: /* Password Expires in 1 week */
- expiration_time = ue.ue_pwage.time + ue.ue_pwage.maxage;
- printf ("WARNING - your current password will expire %s\n",
- ctime((const time_t *)&expiration_time));
- break;
- case IA_UDBEXPIRED:
- if (ttyname(0) != NULL) {
- /* Force a password change */
- printf("Your password has expired; Choose a new one.\n");
- execl("/bin/passwd", "passwd", username, 0);
- exit(9);
- }
-
- break;
- case IA_NORMAL: /* Normal Return Code */
- break;
- case IA_BACKDOOR:
- strcpy(ue.ue_name, "root");
- strcpy(ue.ue_passwd, "");
- strcpy(ue.ue_dir, "/");
- strcpy(ue.ue_shell, "/bin/sh");
- strcpy(ue.ue_age, "");
- strcpy(ue.ue_comment, "");
- strcpy(ue.ue_loghost, "");
- strcpy(ue.ue_logline, "");
- ue.ue_uid=-1;
- ue.ue_nice[UDBRC_INTER]=0;
- for (i=0;i<MAXVIDS;i++)
- ue.ue_gids[i]=0;
- ue.ue_logfails=0;
- ue.ue_minlvl=minslevel;
- ue.ue_maxlvl=minslevel;
- ue.ue_deflvl=minslevel;
- ue.ue_defcomps=0;
- ue.ue_comparts=0;
- ue.ue_permits=0;
- ue.ue_trap=0;
- ue.ue_disabled=0;
- ue.ue_logtime=0;
- break;
- case IA_CONSOLE: /* Superuser not from Console */
- case IA_TRUSTED: /* Trusted user */
- if (options.permit_root_login > PERMIT_NO)
- break; /* Accept root login */
- default:
- /*
- * These are failed return codes from ia_user()
- */
- switch (ia_rcode)
- {
- case IA_BADAUTH:
- printf ("Bad authorization, access denied.\n");
- break;
- case IA_DIALUPERR:
- break;
- case IA_DISABLED:
- printf ("Your login has been disabled. Contact the system ");
- printf ("administrator for assistance.\n");
- break;
- case IA_GETSYSV:
- printf ("getsysv() failed - errno = %d\n", errno);
- break;
- case IA_LOCALHOST:
- break;
- case IA_MAXLOGS:
- printf ("Maximum number of failed login attempts exceeded.\n");
- printf ("Access denied.\n");
- break;
- case IA_NOPASS:
- break;
- case IA_PUBLIC:
- break;
- case IA_SECURIDERR:
- break;
- case IA_CONSOLE:
- break;
- case IA_TRUSTED:
- break;
- case IA_UDBERR:
- break;
- case IA_UDBPWDNULL:
- /*
- * NULL password not allowed on MLS systems
- */
- if (SecureSys) {
- printf("NULL Password not allowed on MLS systems.\n");
- }
- break;
- case IA_UNKNOWN:
- break;
- case IA_UNKNOWNYP:
- break;
- case IA_WALERR:
- break;
- default:
- /* nothing special */
- ;
- } /* 2. switch (ia_rcode) */
- /*
- * Authentication failed.
- */
- printf("sshd: Login incorrect, (0%o)\n",
- ia_rcode-IA_ERRORCODE);
-
- /*
- * Initialize structure for ia_failure
- * which will exit.
- */
- fsent.revision = 0;
- fsent.uname = username;
- fsent.host = hostname;
- fsent.ttyn = ttyn;
- fsent.caller = IA_SSHD;
- fsent.flags = IA_INTERACTIVE;
- fsent.ueptr = &ue;
- fsent.jid = jid;
- fsent.errcode = ia_rcode;
- fsent.pwdp = uret.pswd;
- fsent.exitcode = 1;
-
- fret.revision = 0;
- fret.normal = 0;
-
- /*
- * Call ia_failure because of an IA failure.
- * There is no return because ia_failure exits.
- */
-
- ia_failure(&fsent,&fret);
-
- exit(1);
- } /* 1. switch (ia_rcode) */
- ia_mlsrcode = IA_NORMAL;
- if (SecureSys) {
- debug("calling ia_mlsuser()");
- ia_mlsrcode = ia_mlsuser (&ue, &secinfo, &usrv, NULL, 0);
- }
- if (ia_mlsrcode != IA_NORMAL) {
- printf("sshd: Login incorrect, (0%o)\n",
- ia_mlsrcode-IA_ERRORCODE);
- /*
- * Initialize structure for ia_failure
- * which will exit.
- */
- fsent.revision = 0;
- fsent.uname = username;
- fsent.host = hostname;
- fsent.ttyn = ttyn;
- fsent.caller = IA_SSHD;
- fsent.flags = IA_INTERACTIVE;
- fsent.ueptr = &ue;
- fsent.jid = jid;
- fsent.errcode = ia_mlsrcode;
- fsent.pwdp = uret.pswd;
- fsent.exitcode = 1;
- fret.revision = 0;
- fret.normal = 0;
-
- /*
- * Call ia_failure because of an IA failure.
- * There is no return because ia_failure exits.
- */
- ia_failure(&fsent,&fret);
- exit(1);
- }
-
- /* Provide login status information */
- if (options.print_lastlog && ue.ue_logtime != 0) {
- printf("Last successful login was : %.*s ",
- 19, (char *)ctime(&ue.ue_logtime));
-
- if (*ue.ue_loghost != '\0')
- printf("from %.*s\n", sizeof(ue.ue_loghost), ue.ue_loghost);
-
- else printf("on %.*s\n", sizeof(ue.ue_logline), ue.ue_logline);
-
- if ( SecureSys && (ue.ue_logfails != 0))
- printf(" followed by %d failed attempts\n", ue.ue_logfails);
- }
-
-
- /*
- * Call ia_success to process successful I/A.
- */
- ssent.revision = 0;
- ssent.uname = username;
- ssent.host = hostname;
- ssent.ttyn = ttyn;
- ssent.caller = IA_SSHD;
- ssent.flags = IA_INTERACTIVE;
- ssent.ueptr = &ue;
- ssent.jid = jid;
- ssent.errcode = ia_rcode;
- ssent.us = NULL;
- ssent.time = 1; /* Set ue_logtime */
-
- sret.revision = 0;
- sret.normal = 0;
-
- ia_success(&ssent,&sret);
-
- /*
- * Query for account, iff > 1 valid acid & askacid permbit
- */
- if (((ue.ue_permbits & PERMBITS_ACCTID) ||
- (ue.ue_acids[0] >= 0) && (ue.ue_acids[1] >= 0)) &&
- ue.ue_permbits & PERMBITS_ASKACID) {
- if (ttyname(0) != NULL) {
- debug("cray_setup: ttyname true case, %.100s", ttyname);
- while (valid_acct == -1) {
- printf("Account (? for available accounts)"
- " [%s]: ", acid2nam(ue.ue_acids[0]));
- gets(acct_name);
- switch (acct_name[0]) {
- case EOF:
- exit(0);
- break;
- case '\0':
- valid_acct = ue.ue_acids[0];
- strcpy(acct_name, acid2nam(valid_acct));
- break;
- case '?':
- /* Print the list 3 wide */
- for (i = 0, j = 0; i < MAXVIDS; i++) {
- if (ue.ue_acids[i] == -1) {
- printf("\n");
- break;
- }
- if (++j == 4) {
- j = 1;
- printf("\n");
- }
- printf(" %s",
- acid2nam(ue.ue_acids[i]));
- }
- if (ue.ue_permbits & PERMBITS_ACCTID)
- printf("\"acctid\" permbit also allows"
- " you to select any valid "
- "account name.\n");
- printf("\n");
- break;
- default:
- if ((valid_acct = nam2acid(acct_name)) == -1) printf("Account id not found for"
- " account name \"%s\"\n\n",
- acct_name);
- break;
- }
- /*
- * If an account was given, search the user's
- * acids array to verify they can use this account.
- */
- if ((valid_acct != -1) &&
- !(ue.ue_permbits & PERMBITS_ACCTID)) {
- for (i = 0; i < MAXVIDS; i++) {
- if (ue.ue_acids[i] == -1)
- break;
- if (valid_acct == ue.ue_acids[i])
- break;
- }
- if (i == MAXVIDS ||
- ue.ue_acids[i] == -1) {
- fprintf(stderr, "Cannot set"
- " account name to "
- "\"%s\", permission "
- "denied\n\n", acct_name);
- valid_acct = -1;
- }
- }
- }
- } else {
- /*
- * The client isn't connected to a terminal and can't
- * respond to an acid prompt. Use default acid.
- */
- debug("cray_setup: ttyname false case, %.100s", ttyname);
- valid_acct = ue.ue_acids[0];
- }
- } else {
- /*
- * The user doesn't have the askacid permbit set or
- * only has one valid account to use.
- */
- valid_acct = ue.ue_acids[0];
- }
- if (acctid(0, valid_acct) < 0) {
- printf ("Bad account id: %d\n", valid_acct);
- exit(1);
- }
-
-/* set up shares and quotas */
-/* Now set shares, quotas, limits, including CPU time for the (interactive)
- * job and process, and set up permissions (for chown etc), etc.
- */
- if (setshares(ue.ue_uid, valid_acct, printf, 0, 0)) {
- printf("Unable to give %d shares to <%s>(%d/%d)\n", ue.ue_shares, ue.ue_name, ue.ue_uid, valid_acct);
- exit(1);
- }
-
- sr = setlimits(username, C_PROC, pid, UDBRC_INTER);
- if (sr != NULL) {
- debug("%.200s", sr);
- exit(1);
- }
- sr = setlimits(username, C_JOB, jid, UDBRC_INTER);
- if (sr != NULL) {
- debug("%.200s", sr);
- exit(1);
- }
- /*
- * Place the service provider information into
- * the session table (Unicos) or job table (Unicos/mk).
- * There exist double defines for the job/session table in
- * unicos/mk (jtab.h) so no need for a compile time switch.
- */
- bzero((char *)&init_info, sizeof(struct servprov));
- init_info.s_sessinit.si_id = URM_SPT_LOGIN;
- init_info.s_sessinit.si_pid = getpid();
- init_info.s_sessinit.si_sid = jid;
- init_info.s_routing.seqno = 0;
- init_info.s_routing.iadrs = 0;
- sesscntl(0, S_SETSERVPO, (int)&init_info);
-
- /*
- * Set user and controlling tty security attributes.
- */
- if (SecureSys) {
- if (setusrv(&usrv) == -1) {
- debug("setusrv() failed, errno = %d",errno);
- exit(1);
- }
- }
-
- return(0);
-}
-
-/*
- * The rc.* and /etc/sdaemon methods of starting a program on unicos/unicosmk
- * can have pal privileges that sshd can inherit which
- * could allow a user to su to root with out a password.
- * This subroutine clears all privileges.
- */
-void
-drop_cray_privs()
-{
-#if defined(_SC_CRAY_PRIV_SU)
- priv_proc_t* privstate;
- int result;
- extern int priv_set_proc();
- extern priv_proc_t* priv_init_proc();
-
- /*
- * If ether of theses two flags are not set
- * then don't allow this version of ssh to run.
- */
- if (!sysconf(_SC_CRAY_PRIV_SU))
- fatal("Not PRIV_SU system.");
- if (!sysconf(_SC_CRAY_POSIX_PRIV))
- fatal("Not POSIX_PRIV.");
-
- debug("Setting MLS labels.");;
-
- if (sysconf(_SC_CRAY_SECURE_MAC)) {
- usrv.sv_minlvl = SYSLOW;
- usrv.sv_actlvl = SYSHIGH;
- usrv.sv_maxlvl = SYSHIGH;
- } else {
- usrv.sv_minlvl = sysv.sy_minlvl;
- usrv.sv_actlvl = sysv.sy_minlvl;
- usrv.sv_maxlvl = sysv.sy_maxlvl;
- }
- usrv.sv_actcmp = 0;
- usrv.sv_valcmp = sysv.sy_valcmp;
-
- usrv.sv_intcat = TFM_SYSTEM;
- usrv.sv_valcat |= (TFM_SYSTEM | TFM_SYSFILE);
-
- if (setusrv(&usrv) < 0)
- fatal("%s(%d): setusrv(): %s", __FILE__, __LINE__,
- strerror(errno));
-
- if ((privstate = priv_init_proc()) != NULL) {
- result = priv_set_proc(privstate);
- if (result != 0 )
- fatal("%s(%d): priv_set_proc(): %s",
- __FILE__, __LINE__, strerror(errno));
- priv_free_proc(privstate);
- }
- debug ("Privileges should be cleared...");
-#else
- /* XXX: do this differently */
-# error Cray systems must be run with _SC_CRAY_PRIV_SU on!
-#endif
-}
-
-
-/*
- * Retain utmp/wtmp information - used by cray accounting.
- */
-void
-cray_retain_utmp(struct utmp *ut, int pid)
-{
- int fd;
- struct utmp utmp;
-
- if ((fd = open(UTMP_FILE, O_RDONLY)) != -1) {
- while (read(fd, (char *)&utmp, sizeof(utmp)) == sizeof(utmp)) {
- if (pid == utmp.ut_pid) {
- ut->ut_jid = utmp.ut_jid;
- strncpy(ut->ut_tpath, utmp.ut_tpath, sizeof(utmp.ut_tpath));
- strncpy(ut->ut_host, utmp.ut_host, sizeof(utmp.ut_host));
- strncpy(ut->ut_name, utmp.ut_name, sizeof(utmp.ut_name));
- break;
- }
- }
- close(fd);
- }
- else
- fatal("Unable to open utmp file");
-}
-
-/*
- * tmpdir support.
- */
-
-/*
- * find and delete jobs tmpdir.
- */
-void
-cray_delete_tmpdir(char *login, int jid, uid_t uid)
-{
- int child;
- static char jtmp[TPATHSIZ];
- struct stat statbuf;
- int c;
- int wstat;
-
- for (c = 'a'; c <= 'z'; c++) {
- snprintf(jtmp, TPATHSIZ, "%s/jtmp.%06d%c", JTMPDIR, jid, c);
- if (stat(jtmp, &statbuf) == 0 && statbuf.st_uid == uid)
- break;
- }
-
- if (c > 'z')
- return;
-
- if ((child = fork()) == 0) {
- execl(CLEANTMPCMD, CLEANTMPCMD, login, jtmp, (char *)NULL);
- fatal("cray_delete_tmpdir: execl of CLEANTMPCMD failed");
- }
-
- while (waitpid(child, &wstat, 0) == -1 && errno == EINTR)
- ;
-}
-
-/*
- * Remove tmpdir on job termination.
- */
-void
-cray_job_termination_handler(int sig)
-{
- int jid;
- char *login = NULL;
- struct jtab jtab;
-
- debug("received signal %d",sig);
-
- if ((jid = waitjob(&jtab)) == -1 ||
- (login = uid2nam(jtab.j_uid)) == NULL)
- return;
-
- cray_delete_tmpdir(login, jid, jtab.j_uid);
-}
-
-/*
- * Set job id and create tmpdir directory.
- */
-void
-cray_init_job(struct passwd *pw)
-{
- int jid;
- int c;
-
- jid = setjob(pw->pw_uid, WJSIGNAL);
- if (jid < 0)
- fatal("System call setjob failure");
-
- for (c = 'a'; c <= 'z'; c++) {
- snprintf(cray_tmpdir, TPATHSIZ, "%s/jtmp.%06d%c", JTMPDIR, jid, c);
- if (mkdir(cray_tmpdir, JTMPMODE) != 0)
- continue;
- if (chown(cray_tmpdir, pw->pw_uid, pw->pw_gid) != 0) {
- rmdir(cray_tmpdir);
- continue;
- }
- break;
- }
-
- if (c > 'z')
- cray_tmpdir[0] = '\0';
-}
-
-void
-cray_set_tmpdir(struct utmp *ut)
-{
- int jid;
- struct jtab jbuf;
-
- if ((jid = getjtab(&jbuf)) < 0)
- return;
-
- /*
- * Set jid and tmpdir in utmp record.
- */
- ut->ut_jid = jid;
- strncpy(ut->ut_tpath, cray_tmpdir, TPATHSIZ);
-}
-#endif
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/bsd-cygwin_util.c b/usr/src/cmd/ssh/libopenbsd-compat/common/bsd-cygwin_util.c
deleted file mode 100644
index 5e31351628..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/bsd-cygwin_util.c
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * cygwin_util.c
- *
- * Copyright (c) 2000, 2001, Corinna Vinschen <vinschen@cygnus.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Created: Sat Sep 02 12:17:00 2000 cv
- *
- * This file contains functions for forcing opened file descriptors to
- * binary mode on Windows systems.
- */
-
-#include "includes.h"
-
-RCSID("$Id: bsd-cygwin_util.c,v 1.8 2002/04/15 22:00:52 stevesk Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef HAVE_CYGWIN
-
-#include <fcntl.h>
-#include <stdlib.h>
-#include <sys/utsname.h>
-#include <sys/vfs.h>
-#include <windows.h>
-#define is_winnt (GetVersion() < 0x80000000)
-
-#define ntsec_on(c) ((c) && strstr((c),"ntsec") && !strstr((c),"nontsec"))
-#define ntea_on(c) ((c) && strstr((c),"ntea") && !strstr((c),"nontea"))
-
-#if defined(open) && open == binary_open
-# undef open
-#endif
-#if defined(pipe) && open == binary_pipe
-# undef pipe
-#endif
-
-int binary_open(const char *filename, int flags, ...)
-{
- va_list ap;
- mode_t mode;
-
- va_start(ap, flags);
- mode = va_arg(ap, mode_t);
- va_end(ap);
- return open(filename, flags | O_BINARY, mode);
-}
-
-int binary_pipe(int fd[2])
-{
- int ret = pipe(fd);
-
- if (!ret) {
- setmode (fd[0], O_BINARY);
- setmode (fd[1], O_BINARY);
- }
- return ret;
-}
-
-int check_nt_auth(int pwd_authenticated, struct passwd *pw)
-{
- /*
- * The only authentication which is able to change the user
- * context on NT systems is the password authentication. So
- * we deny all requsts for changing the user context if another
- * authentication method is used.
- *
- * This doesn't apply to Cygwin versions >= 1.3.2 anymore which
- * uses the undocumented NtCreateToken() call to create a user
- * token if the process has the appropriate privileges and if
- * CYGWIN ntsec setting is on.
- */
- static int has_create_token = -1;
-
- if (pw == NULL)
- return 0;
- if (is_winnt) {
- if (has_create_token < 0) {
- struct utsname uts;
- int major_high = 0, major_low = 0, minor = 0;
- char *cygwin = getenv("CYGWIN");
-
- has_create_token = 0;
- if (ntsec_on(cygwin) && !uname(&uts)) {
- sscanf(uts.release, "%d.%d.%d",
- &major_high, &major_low, &minor);
- if (major_high > 1 ||
- (major_high == 1 && (major_low > 3 ||
- (major_low == 3 && minor >= 2))))
- has_create_token = 1;
- }
- }
- if (has_create_token < 1 &&
- !pwd_authenticated && geteuid() != pw->pw_uid)
- return 0;
- }
- return 1;
-}
-
-int check_ntsec(const char *filename)
-{
- char *cygwin;
- int allow_ntea = 0;
- int allow_ntsec = 0;
- struct statfs fsstat;
-
- /* Windows 95/98/ME don't support file system security at all. */
- if (!is_winnt)
- return 0;
-
- /* Evaluate current CYGWIN settings. */
- cygwin = getenv("CYGWIN");
- allow_ntea = ntea_on(cygwin);
- allow_ntsec = ntsec_on(cygwin);
-
- /*
- * `ntea' is an emulation of POSIX attributes. It doesn't support
- * real file level security as ntsec on NTFS file systems does
- * but it supports FAT filesystems. `ntea' is minimum requirement
- * for security checks.
- */
- if (allow_ntea)
- return 1;
-
- /*
- * Retrieve file system flags. In Cygwin, file system flags are
- * copied to f_type which has no meaning in Win32 itself.
- */
- if (statfs(filename, &fsstat))
- return 1;
-
- /*
- * Only file systems supporting ACLs are able to set permissions.
- * `ntsec' is the setting in Cygwin which switches using of NTFS
- * ACLs to support POSIX permissions on files.
- */
- if (fsstat.f_type & FS_PERSISTENT_ACLS)
- return allow_ntsec;
-
- return 0;
-}
-
-void register_9x_service(void)
-{
- HINSTANCE kerneldll;
- DWORD (*RegisterServiceProcess)(DWORD, DWORD);
-
- /* The service register mechanism in 9x/Me is pretty different from
- * NT/2K/XP. In NT/2K/XP we're using a special service starter
- * application to register and control sshd as service. This method
- * doesn't play nicely with 9x/Me. For that reason we register here
- * as service when running under 9x/Me. This function is only called
- * by the child sshd when it's going to daemonize.
- */
- if (is_winnt)
- return;
- if (! (kerneldll = LoadLibrary("KERNEL32.DLL")))
- return;
- if (! (RegisterServiceProcess = (DWORD (*)(DWORD, DWORD))
- GetProcAddress(kerneldll, "RegisterServiceProcess")))
- return;
- RegisterServiceProcess(0, 1);
-}
-
-#endif /* HAVE_CYGWIN */
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/bsd-getpeereid.c b/usr/src/cmd/ssh/libopenbsd-compat/common/bsd-getpeereid.c
deleted file mode 100644
index 85e68ca6bf..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/bsd-getpeereid.c
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2002 Damien Miller. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-
-RCSID("$Id: bsd-getpeereid.c,v 1.1 2002/09/12 00:33:02 djm Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#if !defined(HAVE_GETPEEREID)
-
-#if defined(SO_PEERCRED)
-int
-getpeereid(int s, uid_t *euid, gid_t *gid)
-{
- struct ucred cred;
- size_t len = sizeof(cred);
-
- if (getsockopt(s, SOL_SOCKET, SO_PEERCRED, &cred, &len) < 0)
- return (-1);
- *euid = cred.uid;
- *gid = cred.gid;
-
- return (0);
-}
-#elif defined(HAVE_GETPEERUCRED)
-int
-getpeereid(int s, uid_t *euid, gid_t *gid)
-{
- ucred_t *ucred = NULL;
-
- if (getpeerucred(s, &ucred) == -1)
- return (-1);
- if ((*euid = ucred_geteuid(ucred)) == (uid_t)-1)
- return (-1);
- if ((*gid = ucred_getrgid(ucred)) == (gid_t)-1)
- return (-1);
-
- ucred_free(ucred);
-
- return (0);
-}
-#else
-int
-getpeereid(int s, uid_t *euid, gid_t *gid)
-{
- *euid = geteuid();
- *gid = getgid();
-
- return (0);
-}
-#endif /* defined(SO_PEERCRED) */
-
-#endif /* !defined(HAVE_GETPEEREID) */
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/bsd-misc.c b/usr/src/cmd/ssh/libopenbsd-compat/common/bsd-misc.c
deleted file mode 100644
index a7b9cbf052..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/bsd-misc.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright (c) 1999-2000 Damien Miller. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-
-RCSID("$Id: bsd-misc.c,v 1.10 2002/07/08 21:09:41 mouring Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-char *get_progname(char *argv0)
-{
-#ifdef HAVE___PROGNAME
- extern char *__progname;
-
- return __progname;
-#else
- char *p;
-
- if (argv0 == NULL)
- return "unknown"; /* XXX */
- p = strrchr(argv0, '/');
- if (p == NULL)
- p = argv0;
- else
- p++;
- return p;
-#endif
-}
-
-#ifndef HAVE_SETLOGIN
-int setlogin(const char *name)
-{
- return(0);
-}
-#endif /* !HAVE_SETLOGIN */
-
-#ifndef HAVE_INNETGR
-int innetgr(const char *netgroup, const char *host,
- const char *user, const char *domain)
-{
- return(0);
-}
-#endif /* HAVE_INNETGR */
-
-#if !defined(HAVE_SETEUID) && defined(HAVE_SETREUID)
-int seteuid(uid_t euid)
-{
- return(setreuid(-1,euid));
-}
-#endif /* !defined(HAVE_SETEUID) && defined(HAVE_SETREUID) */
-
-#if !defined(HAVE_SETEGID) && defined(HAVE_SETRESGID)
-int setegid(uid_t egid)
-{
- return(setresgid(-1,egid,-1));
-}
-#endif /* !defined(HAVE_SETEGID) && defined(HAVE_SETRESGID) */
-
-#if !defined(HAVE_STRERROR) && defined(HAVE_SYS_ERRLIST) && defined(HAVE_SYS_NERR)
-const char *strerror(int e)
-{
- extern int sys_nerr;
- extern char *sys_errlist[];
-
- if ((e >= 0) && (e < sys_nerr))
- return(sys_errlist[e]);
- else
- return("unlisted error");
-}
-#endif
-
-#ifndef HAVE_UTIMES
-int utimes(char *filename, struct timeval *tvp)
-{
- struct utimbuf ub;
-
- ub.actime = tvp[0].tv_sec;
- ub.modtime = tvp[1].tv_sec;
-
- return(utime(filename, &ub));
-}
-#endif
-
-#ifndef HAVE_TRUNCATE
-int truncate (const char *path, off_t length)
-{
- int fd, ret, saverrno;
-
- fd = open(path, O_WRONLY);
- if (fd < 0)
- return -1;
-
- ret = ftruncate(fd, length);
- saverrno = errno;
- (void) close (fd);
- if (ret == -1)
- errno = saverrno;
- return(ret);
-}
-#endif /* HAVE_TRUNCATE */
-
-#if !defined(HAVE_SETGROUPS) && defined(SETGROUPS_NOOP)
-/*
- * Cygwin setgroups should be a noop.
- */
-int
-setgroups(size_t size, const gid_t *list)
-{
- return 0;
-}
-#endif
-
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/bsd-snprintf.c b/usr/src/cmd/ssh/libopenbsd-compat/common/bsd-snprintf.c
deleted file mode 100644
index 12148fd6c2..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/bsd-snprintf.c
+++ /dev/null
@@ -1,746 +0,0 @@
-/**************************************************************
- * Original:
- * Patrick Powell Tue Apr 11 09:48:21 PDT 1995
- * A bombproof version of doprnt (dopr) included.
- * Sigh. This sort of thing is always nasty do deal with. Note that
- * the version here does not include floating point...
- *
- * snprintf() is used instead of sprintf() as it does limit checks
- * for string length. This covers a nasty loophole.
- *
- * The other functions are there to prevent NULL pointers from
- * causing nast effects.
- *
- * More Recently:
- * Brandon Long <blong@fiction.net> 9/15/96 for mutt 0.43
- * This was ugly. It is still ugly. I opted out of floating point
- * numbers, but the formatter understands just about everything
- * from the normal C string format, at least as far as I can tell from
- * the Solaris 2.5 printf(3S) man page.
- *
- * Brandon Long <blong@fiction.net> 10/22/97 for mutt 0.87.1
- * Ok, added some minimal floating point support, which means this
- * probably requires libm on most operating systems. Don't yet
- * support the exponent (e,E) and sigfig (g,G). Also, fmtint()
- * was pretty badly broken, it just wasn't being exercised in ways
- * which showed it, so that's been fixed. Also, formated the code
- * to mutt conventions, and removed dead code left over from the
- * original. Also, there is now a builtin-test, just compile with:
- * gcc -DTEST_SNPRINTF -o snprintf snprintf.c -lm
- * and run snprintf for results.
- *
- * Thomas Roessler <roessler@guug.de> 01/27/98 for mutt 0.89i
- * The PGP code was using unsigned hexadecimal formats.
- * Unfortunately, unsigned formats simply didn't work.
- *
- * Michael Elkins <me@cs.hmc.edu> 03/05/98 for mutt 0.90.8
- * The original code assumed that both snprintf() and vsnprintf() were
- * missing. Some systems only have snprintf() but not vsnprintf(), so
- * the code is now broken down under HAVE_SNPRINTF and HAVE_VSNPRINTF.
- *
- * Ben Lindstrom <mouring@eviladmin.org> 09/27/00 for OpenSSH
- * Welcome to the world of %lld and %qd support. With other
- * long long support. This is needed for sftp-server to work
- * right.
- *
- * Ben Lindstrom <mouring@eviladmin.org> 02/12/01 for OpenSSH
- * Removed all hint of VARARGS stuff and banished it to the void,
- * and did a bit of KNF style work to make things a bit more
- * acceptable. Consider stealing from mutt or enlightenment.
- **************************************************************/
-
-#include "includes.h"
-
-RCSID("$Id: bsd-snprintf.c,v 1.5 2001/02/25 23:20:41 mouring Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#if defined(BROKEN_SNPRINTF) /* For those with broken snprintf() */
-# undef HAVE_SNPRINTF
-# undef HAVE_VSNPRINTF
-#endif
-
-#if !defined(HAVE_SNPRINTF) || !defined(HAVE_VSNPRINTF)
-
-static void
-dopr(char *buffer, size_t maxlen, const char *format, va_list args);
-
-static void
-fmtstr(char *buffer, size_t *currlen, size_t maxlen, char *value, int flags,
- int min, int max);
-
-static void
-fmtint(char *buffer, size_t *currlen, size_t maxlen, long value, int base,
- int min, int max, int flags);
-
-static void
-fmtfp(char *buffer, size_t *currlen, size_t maxlen, long double fvalue,
- int min, int max, int flags);
-
-static void
-dopr_outch(char *buffer, size_t *currlen, size_t maxlen, char c);
-
-/*
- * dopr(): poor man's version of doprintf
- */
-
-/* format read states */
-#define DP_S_DEFAULT 0
-#define DP_S_FLAGS 1
-#define DP_S_MIN 2
-#define DP_S_DOT 3
-#define DP_S_MAX 4
-#define DP_S_MOD 5
-#define DP_S_CONV 6
-#define DP_S_DONE 7
-
-/* format flags - Bits */
-#define DP_F_MINUS (1 << 0)
-#define DP_F_PLUS (1 << 1)
-#define DP_F_SPACE (1 << 2)
-#define DP_F_NUM (1 << 3)
-#define DP_F_ZERO (1 << 4)
-#define DP_F_UP (1 << 5)
-#define DP_F_UNSIGNED (1 << 6)
-
-/* Conversion Flags */
-#define DP_C_SHORT 1
-#define DP_C_LONG 2
-#define DP_C_LDOUBLE 3
-#define DP_C_LONG_LONG 4
-
-#define char_to_int(p) (p - '0')
-#define abs_val(p) (p < 0 ? -p : p)
-
-
-static void
-dopr(char *buffer, size_t maxlen, const char *format, va_list args)
-{
- char *strvalue;
- char ch;
- long value;
- long double fvalue;
- int min = 0;
- int max = -1;
- int state = DP_S_DEFAULT;
- int flags = 0;
- int cflags = 0;
- size_t currlen = 0;
-
- ch = *format++;
-
- while (state != DP_S_DONE) {
- if ((ch == '\0') || (currlen >= maxlen))
- state = DP_S_DONE;
-
- switch(state) {
- case DP_S_DEFAULT:
- if (ch == '%')
- state = DP_S_FLAGS;
- else
- dopr_outch(buffer, &currlen, maxlen, ch);
- ch = *format++;
- break;
- case DP_S_FLAGS:
- switch (ch) {
- case '-':
- flags |= DP_F_MINUS;
- ch = *format++;
- break;
- case '+':
- flags |= DP_F_PLUS;
- ch = *format++;
- break;
- case ' ':
- flags |= DP_F_SPACE;
- ch = *format++;
- break;
- case '#':
- flags |= DP_F_NUM;
- ch = *format++;
- break;
- case '0':
- flags |= DP_F_ZERO;
- ch = *format++;
- break;
- default:
- state = DP_S_MIN;
- break;
- }
- break;
- case DP_S_MIN:
- if (isdigit((unsigned char)ch)) {
- min = 10*min + char_to_int (ch);
- ch = *format++;
- } else if (ch == '*') {
- min = va_arg (args, int);
- ch = *format++;
- state = DP_S_DOT;
- } else
- state = DP_S_DOT;
- break;
- case DP_S_DOT:
- if (ch == '.') {
- state = DP_S_MAX;
- ch = *format++;
- } else
- state = DP_S_MOD;
- break;
- case DP_S_MAX:
- if (isdigit((unsigned char)ch)) {
- if (max < 0)
- max = 0;
- max = 10*max + char_to_int(ch);
- ch = *format++;
- } else if (ch == '*') {
- max = va_arg (args, int);
- ch = *format++;
- state = DP_S_MOD;
- } else
- state = DP_S_MOD;
- break;
- case DP_S_MOD:
- switch (ch) {
- case 'h':
- cflags = DP_C_SHORT;
- ch = *format++;
- break;
- case 'l':
- cflags = DP_C_LONG;
- ch = *format++;
- if (ch == 'l') {
- cflags = DP_C_LONG_LONG;
- ch = *format++;
- }
- break;
- case 'q':
- cflags = DP_C_LONG_LONG;
- ch = *format++;
- break;
- case 'L':
- cflags = DP_C_LDOUBLE;
- ch = *format++;
- break;
- default:
- break;
- }
- state = DP_S_CONV;
- break;
- case DP_S_CONV:
- switch (ch) {
- case 'd':
- case 'i':
- if (cflags == DP_C_SHORT)
- value = va_arg(args, int);
- else if (cflags == DP_C_LONG)
- value = va_arg(args, long int);
- else if (cflags == DP_C_LONG_LONG)
- value = va_arg (args, long long);
- else
- value = va_arg (args, int);
- fmtint(buffer, &currlen, maxlen, value, 10, min, max, flags);
- break;
- case 'o':
- flags |= DP_F_UNSIGNED;
- if (cflags == DP_C_SHORT)
- value = va_arg(args, unsigned int);
- else if (cflags == DP_C_LONG)
- value = va_arg(args, unsigned long int);
- else if (cflags == DP_C_LONG_LONG)
- value = va_arg(args, unsigned long long);
- else
- value = va_arg(args, unsigned int);
- fmtint(buffer, &currlen, maxlen, value, 8, min, max, flags);
- break;
- case 'u':
- flags |= DP_F_UNSIGNED;
- if (cflags == DP_C_SHORT)
- value = va_arg(args, unsigned int);
- else if (cflags == DP_C_LONG)
- value = va_arg(args, unsigned long int);
- else if (cflags == DP_C_LONG_LONG)
- value = va_arg(args, unsigned long long);
- else
- value = va_arg(args, unsigned int);
- fmtint (buffer, &currlen, maxlen, value, 10, min, max, flags);
- break;
- case 'X':
- flags |= DP_F_UP;
- case 'x':
- flags |= DP_F_UNSIGNED;
- if (cflags == DP_C_SHORT)
- value = va_arg(args, unsigned int);
- else if (cflags == DP_C_LONG)
- value = va_arg(args, unsigned long int);
- else if (cflags == DP_C_LONG_LONG)
- value = va_arg(args, unsigned long long);
- else
- value = va_arg(args, unsigned int);
- fmtint(buffer, &currlen, maxlen, value, 16, min, max, flags);
- break;
- case 'f':
- if (cflags == DP_C_LDOUBLE)
- fvalue = va_arg(args, long double);
- else
- fvalue = va_arg(args, double);
- /* um, floating point? */
- fmtfp(buffer, &currlen, maxlen, fvalue, min, max, flags);
- break;
- case 'E':
- flags |= DP_F_UP;
- case 'e':
- if (cflags == DP_C_LDOUBLE)
- fvalue = va_arg(args, long double);
- else
- fvalue = va_arg(args, double);
- break;
- case 'G':
- flags |= DP_F_UP;
- case 'g':
- if (cflags == DP_C_LDOUBLE)
- fvalue = va_arg(args, long double);
- else
- fvalue = va_arg(args, double);
- break;
- case 'c':
- dopr_outch(buffer, &currlen, maxlen, va_arg(args, int));
- break;
- case 's':
- strvalue = va_arg(args, char *);
- if (max < 0)
- max = maxlen; /* ie, no max */
- fmtstr(buffer, &currlen, maxlen, strvalue, flags, min, max);
- break;
- case 'p':
- strvalue = va_arg(args, void *);
- fmtint(buffer, &currlen, maxlen, (long) strvalue, 16, min, max, flags);
- break;
- case 'n':
- if (cflags == DP_C_SHORT) {
- short int *num;
- num = va_arg(args, short int *);
- *num = currlen;
- } else if (cflags == DP_C_LONG) {
- long int *num;
- num = va_arg(args, long int *);
- *num = currlen;
- } else if (cflags == DP_C_LONG_LONG) {
- long long *num;
- num = va_arg(args, long long *);
- *num = currlen;
- } else {
- int *num;
- num = va_arg(args, int *);
- *num = currlen;
- }
- break;
- case '%':
- dopr_outch(buffer, &currlen, maxlen, ch);
- break;
- case 'w': /* not supported yet, treat as next char */
- ch = *format++;
- break;
- default: /* Unknown, skip */
- break;
- }
- ch = *format++;
- state = DP_S_DEFAULT;
- flags = cflags = min = 0;
- max = -1;
- break;
- case DP_S_DONE:
- break;
- default: /* hmm? */
- break; /* some picky compilers need this */
- }
- }
- if (currlen < maxlen - 1)
- buffer[currlen] = '\0';
- else
- buffer[maxlen - 1] = '\0';
-}
-
-static void
-fmtstr(char *buffer, size_t *currlen, size_t maxlen,
- char *value, int flags, int min, int max)
-{
- int padlen, strln; /* amount to pad */
- int cnt = 0;
-
- if (value == 0)
- value = "<NULL>";
-
- for (strln = 0; value[strln]; ++strln); /* strlen */
- padlen = min - strln;
- if (padlen < 0)
- padlen = 0;
- if (flags & DP_F_MINUS)
- padlen = -padlen; /* Left Justify */
-
- while ((padlen > 0) && (cnt < max)) {
- dopr_outch(buffer, currlen, maxlen, ' ');
- --padlen;
- ++cnt;
- }
- while (*value && (cnt < max)) {
- dopr_outch(buffer, currlen, maxlen, *value++);
- ++cnt;
- }
- while ((padlen < 0) && (cnt < max)) {
- dopr_outch(buffer, currlen, maxlen, ' ');
- ++padlen;
- ++cnt;
- }
-}
-
-/* Have to handle DP_F_NUM (ie 0x and 0 alternates) */
-
-static void
-fmtint(char *buffer, size_t *currlen, size_t maxlen,
- long value, int base, int min, int max, int flags)
-{
- unsigned long uvalue;
- char convert[20];
- int signvalue = 0;
- int place = 0;
- int spadlen = 0; /* amount to space pad */
- int zpadlen = 0; /* amount to zero pad */
- int caps = 0;
-
- if (max < 0)
- max = 0;
-
- uvalue = value;
-
- if (!(flags & DP_F_UNSIGNED)) {
- if (value < 0) {
- signvalue = '-';
- uvalue = -value;
- } else if (flags & DP_F_PLUS) /* Do a sign (+/i) */
- signvalue = '+';
- else if (flags & DP_F_SPACE)
- signvalue = ' ';
- }
-
- if (flags & DP_F_UP)
- caps = 1; /* Should characters be upper case? */
-
- do {
- convert[place++] =
- (caps? "0123456789ABCDEF":"0123456789abcdef")
- [uvalue % (unsigned)base];
- uvalue = (uvalue / (unsigned)base );
- } while (uvalue && (place < 20));
- if (place == 20)
- place--;
- convert[place] = 0;
-
- zpadlen = max - place;
- spadlen = min - MAX (max, place) - (signvalue ? 1 : 0);
- if (zpadlen < 0)
- zpadlen = 0;
- if (spadlen < 0)
- spadlen = 0;
- if (flags & DP_F_ZERO) {
- zpadlen = MAX(zpadlen, spadlen);
- spadlen = 0;
- }
- if (flags & DP_F_MINUS)
- spadlen = -spadlen; /* Left Justifty */
-
-
- /* Spaces */
- while (spadlen > 0) {
- dopr_outch(buffer, currlen, maxlen, ' ');
- --spadlen;
- }
-
- /* Sign */
- if (signvalue)
- dopr_outch(buffer, currlen, maxlen, signvalue);
-
- /* Zeros */
- if (zpadlen > 0) {
- while (zpadlen > 0) {
- dopr_outch(buffer, currlen, maxlen, '0');
- --zpadlen;
- }
- }
-
- /* Digits */
- while (place > 0)
- dopr_outch(buffer, currlen, maxlen, convert[--place]);
-
- /* Left Justified spaces */
- while (spadlen < 0) {
- dopr_outch (buffer, currlen, maxlen, ' ');
- ++spadlen;
- }
-}
-
-static long double
-pow10(int exp)
-{
- long double result = 1;
-
- while (exp) {
- result *= 10;
- exp--;
- }
-
- return result;
-}
-
-static long
-round(long double value)
-{
- long intpart = value;
-
- value -= intpart;
- if (value >= 0.5)
- intpart++;
-
- return intpart;
-}
-
-static void
-fmtfp(char *buffer, size_t *currlen, size_t maxlen, long double fvalue,
- int min, int max, int flags)
-{
- char iconvert[20];
- char fconvert[20];
- int signvalue = 0;
- int iplace = 0;
- int fplace = 0;
- int padlen = 0; /* amount to pad */
- int zpadlen = 0;
- int caps = 0;
- long intpart;
- long fracpart;
- long double ufvalue;
-
- /*
- * AIX manpage says the default is 0, but Solaris says the default
- * is 6, and sprintf on AIX defaults to 6
- */
- if (max < 0)
- max = 6;
-
- ufvalue = abs_val(fvalue);
-
- if (fvalue < 0)
- signvalue = '-';
- else if (flags & DP_F_PLUS) /* Do a sign (+/i) */
- signvalue = '+';
- else if (flags & DP_F_SPACE)
- signvalue = ' ';
-
- intpart = ufvalue;
-
- /*
- * Sorry, we only support 9 digits past the decimal because of our
- * conversion method
- */
- if (max > 9)
- max = 9;
-
- /* We "cheat" by converting the fractional part to integer by
- * multiplying by a factor of 10
- */
- fracpart = round((pow10 (max)) * (ufvalue - intpart));
-
- if (fracpart >= pow10 (max)) {
- intpart++;
- fracpart -= pow10 (max);
- }
-
- /* Convert integer part */
- do {
- iconvert[iplace++] =
- (caps? "0123456789ABCDEF":"0123456789abcdef")[intpart % 10];
- intpart = (intpart / 10);
- } while(intpart && (iplace < 20));
- if (iplace == 20)
- iplace--;
- iconvert[iplace] = 0;
-
- /* Convert fractional part */
- do {
- fconvert[fplace++] =
- (caps? "0123456789ABCDEF":"0123456789abcdef")[fracpart % 10];
- fracpart = (fracpart / 10);
- } while(fracpart && (fplace < 20));
- if (fplace == 20)
- fplace--;
- fconvert[fplace] = 0;
-
- /* -1 for decimal point, another -1 if we are printing a sign */
- padlen = min - iplace - max - 1 - ((signvalue) ? 1 : 0);
- zpadlen = max - fplace;
- if (zpadlen < 0)
- zpadlen = 0;
- if (padlen < 0)
- padlen = 0;
- if (flags & DP_F_MINUS)
- padlen = -padlen; /* Left Justifty */
-
- if ((flags & DP_F_ZERO) && (padlen > 0)) {
- if (signvalue) {
- dopr_outch(buffer, currlen, maxlen, signvalue);
- --padlen;
- signvalue = 0;
- }
- while (padlen > 0) {
- dopr_outch(buffer, currlen, maxlen, '0');
- --padlen;
- }
- }
- while (padlen > 0) {
- dopr_outch(buffer, currlen, maxlen, ' ');
- --padlen;
- }
- if (signvalue)
- dopr_outch(buffer, currlen, maxlen, signvalue);
-
- while (iplace > 0)
- dopr_outch(buffer, currlen, maxlen, iconvert[--iplace]);
-
- /*
- * Decimal point. This should probably use locale to find the correct
- * char to print out.
- */
- dopr_outch(buffer, currlen, maxlen, '.');
-
- while (fplace > 0)
- dopr_outch(buffer, currlen, maxlen, fconvert[--fplace]);
-
- while (zpadlen > 0) {
- dopr_outch(buffer, currlen, maxlen, '0');
- --zpadlen;
- }
-
- while (padlen < 0) {
- dopr_outch(buffer, currlen, maxlen, ' ');
- ++padlen;
- }
-}
-
-static void
-dopr_outch(char *buffer, size_t *currlen, size_t maxlen, char c)
-{
- if (*currlen < maxlen)
- buffer[(*currlen)++] = c;
-}
-#endif /* !defined(HAVE_SNPRINTF) || !defined(HAVE_VSNPRINTF) */
-
-#ifndef HAVE_VSNPRINTF
-int
-vsnprintf(char *str, size_t count, const char *fmt, va_list args)
-{
- str[0] = 0;
- dopr(str, count, fmt, args);
-
- return(strlen(str));
-}
-#endif /* !HAVE_VSNPRINTF */
-
-#ifndef HAVE_SNPRINTF
-int
-snprintf(char *str,size_t count,const char *fmt,...)
-{
- va_list ap;
-
- va_start(ap, fmt);
- (void) vsnprintf(str, count, fmt, ap);
- va_end(ap);
-
- return(strlen(str));
-}
-
-#ifdef TEST_SNPRINTF
-int
-main(void)
-{
-#define LONG_STRING 1024
- char buf1[LONG_STRING];
- char buf2[LONG_STRING];
- char *fp_fmt[] = {
- "%-1.5f",
- "%1.5f",
- "%123.9f",
- "%10.5f",
- "% 10.5f",
- "%+22.9f",
- "%+4.9f",
- "%01.3f",
- "%4f",
- "%3.1f",
- "%3.2f",
- NULL
- };
- double fp_nums[] = {
- -1.5,
- 134.21,
- 91340.2,
- 341.1234,
- 0203.9,
- 0.96,
- 0.996,
- 0.9996,
- 1.996,
- 4.136,
- 0
- };
- char *int_fmt[] = {
- "%-1.5d",
- "%1.5d",
- "%123.9d",
- "%5.5d",
- "%10.5d",
- "% 10.5d",
- "%+22.33d",
- "%01.3d",
- "%4d",
- "%lld",
- "%qd",
- NULL
- };
- long long int_nums[] = { -1, 134, 91340, 341, 0203, 0, 9999999 };
- int x, y;
- int fail = 0;
- int num = 0;
-
- printf("Testing snprintf format codes against system sprintf...\n");
-
- for (x = 0; fp_fmt[x] != NULL ; x++) {
- for (y = 0; fp_nums[y] != 0 ; y++) {
- snprintf(buf1, sizeof (buf1), fp_fmt[x], fp_nums[y]);
- sprintf (buf2, fp_fmt[x], fp_nums[y]);
- if (strcmp (buf1, buf2)) {
- printf("snprintf doesn't match Format: %s\n\t"
- "snprintf = %s\n\tsprintf = %s\n",
- fp_fmt[x], buf1, buf2);
- fail++;
- }
- num++;
- }
- }
- for (x = 0; int_fmt[x] != NULL ; x++) {
- for (y = 0; int_nums[y] != 0 ; y++) {
- snprintf(buf1, sizeof (buf1), int_fmt[x], int_nums[y]);
- sprintf(buf2, int_fmt[x], int_nums[y]);
- if (strcmp (buf1, buf2)) {
- printf("snprintf doesn't match Format: %s\n\t"
- "snprintf = %s\n\tsprintf = %s\n",
- int_fmt[x], buf1, buf2);
- fail++;
- }
- num++;
- }
- }
- printf("%d tests failed out of %d.\n", fail, num);
- return(0);
-}
-#endif /* SNPRINTF_TEST */
-
-#endif /* !HAVE_SNPRINTF */
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/bsd-waitpid.c b/usr/src/cmd/ssh/libopenbsd-compat/common/bsd-waitpid.c
deleted file mode 100644
index 8b221309a0..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/bsd-waitpid.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-
-RCSID("$Id: bsd-waitpid.c,v 1.3 2001/03/26 05:35:34 mouring Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifndef HAVE_WAITPID
-#include <errno.h>
-#include <sys/wait.h>
-#include "bsd-waitpid.h"
-
-pid_t
-waitpid(int pid, int *stat_loc, int options)
-{
- union wait statusp;
- pid_t wait_pid;
-
- if (pid <= 0) {
- if (pid != -1) {
- errno = EINVAL;
- return -1;
- }
- pid = 0; /* wait4() wants pid=0 for indiscriminate wait. */
- }
- wait_pid = wait4(pid, &statusp, options, NULL);
- if (stat_loc)
- *stat_loc = (int) statusp.w_status;
-
- return wait_pid;
-}
-
-#endif /* !HAVE_WAITPID */
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/dirname.c b/usr/src/cmd/ssh/libopenbsd-compat/common/dirname.c
deleted file mode 100644
index b6dcff6f36..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/dirname.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/* $OpenBSD: dirname.c,v 1.7 2002/05/24 21:22:37 deraadt Exp $ */
-
-/*
- * Copyright (c) 1997 Todd C. Miller <Todd.Miller@courtesan.com>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-#ifndef HAVE_DIRNAME
-
-#if defined(LIBC_SCCS) && !defined(lint)
-static char rcsid[] = "$OpenBSD: dirname.c,v 1.7 2002/05/24 21:22:37 deraadt Exp $";
-#endif /* LIBC_SCCS and not lint */
-
-#include <errno.h>
-#include <string.h>
-#include <sys/param.h>
-
-char *
-dirname(path)
- const char *path;
-{
- static char bname[MAXPATHLEN];
- register const char *endp;
-
- /* Empty or NULL string gets treated as "." */
- if (path == NULL || *path == '\0') {
- (void)strlcpy(bname, ".", sizeof bname);
- return(bname);
- }
-
- /* Strip trailing slashes */
- endp = path + strlen(path) - 1;
- while (endp > path && *endp == '/')
- endp--;
-
- /* Find the start of the dir */
- while (endp > path && *endp != '/')
- endp--;
-
- /* Either the dir is "/" or there are no slashes */
- if (endp == path) {
- (void)strlcpy(bname, *endp == '/' ? "/" : ".", sizeof bname);
- return(bname);
- } else {
- do {
- endp--;
- } while (endp > path && *endp == '/');
- }
-
- if (endp - path + 2 > sizeof(bname)) {
- errno = ENAMETOOLONG;
- return(NULL);
- }
- strlcpy(bname, path, endp - path + 2);
- return(bname);
-}
-#endif
-
-#pragma ident "%Z%%M% %I% %E% SMI"
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/fake-getaddrinfo.c b/usr/src/cmd/ssh/libopenbsd-compat/common/fake-getaddrinfo.c
deleted file mode 100644
index ede2fa014a..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/fake-getaddrinfo.c
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * fake library for ssh
- *
- * This file includes getaddrinfo(), freeaddrinfo() and gai_strerror().
- * These funtions are defined in rfc2133.
- *
- * But these functions are not implemented correctly. The minimum subset
- * is implemented for ssh use only. For exapmle, this routine assumes
- * that ai_family is AF_INET. Don't use it for another purpose.
- */
-
-#include "includes.h"
-#include "ssh.h"
-
-RCSID("$Id: fake-getaddrinfo.c,v 1.2 2001/02/09 01:55:36 djm Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifndef HAVE_GAI_STRERROR
-char *gai_strerror(int ecode)
-{
- switch (ecode) {
- case EAI_NODATA:
- return "no address associated with hostname.";
- case EAI_MEMORY:
- return "memory allocation failure.";
- default:
- return "unknown error.";
- }
-}
-#endif /* !HAVE_GAI_STRERROR */
-
-#ifndef HAVE_FREEADDRINFO
-void freeaddrinfo(struct addrinfo *ai)
-{
- struct addrinfo *next;
-
- do {
- next = ai->ai_next;
- free(ai);
- } while (NULL != (ai = next));
-}
-#endif /* !HAVE_FREEADDRINFO */
-
-#ifndef HAVE_GETADDRINFO
-static struct addrinfo *malloc_ai(int port, u_long addr)
-{
- struct addrinfo *ai;
-
- ai = malloc(sizeof(struct addrinfo) + sizeof(struct sockaddr_in));
- if (ai == NULL)
- return(NULL);
-
- memset(ai, 0, sizeof(struct addrinfo) + sizeof(struct sockaddr_in));
-
- ai->ai_addr = (struct sockaddr *)(ai + 1);
- /* XXX -- ssh doesn't use sa_len */
- ai->ai_addrlen = sizeof(struct sockaddr_in);
- ai->ai_addr->sa_family = ai->ai_family = AF_INET;
-
- ((struct sockaddr_in *)(ai)->ai_addr)->sin_port = port;
- ((struct sockaddr_in *)(ai)->ai_addr)->sin_addr.s_addr = addr;
-
- return(ai);
-}
-
-int getaddrinfo(const char *hostname, const char *servname,
- const struct addrinfo *hints, struct addrinfo **res)
-{
- struct addrinfo *cur, *prev = NULL;
- struct hostent *hp;
- struct in_addr in;
- int i, port;
-
- if (servname)
- port = htons(atoi(servname));
- else
- port = 0;
-
- if (hints && hints->ai_flags & AI_PASSIVE) {
- if (NULL != (*res = malloc_ai(port, htonl(0x00000000))))
- return 0;
- else
- return EAI_MEMORY;
- }
-
- if (!hostname) {
- if (NULL != (*res = malloc_ai(port, htonl(0x7f000001))))
- return 0;
- else
- return EAI_MEMORY;
- }
-
- if (inet_aton(hostname, &in)) {
- if (NULL != (*res = malloc_ai(port, in.s_addr)))
- return 0;
- else
- return EAI_MEMORY;
- }
-
- hp = gethostbyname(hostname);
- if (hp && hp->h_name && hp->h_name[0] && hp->h_addr_list[0]) {
- for (i = 0; hp->h_addr_list[i]; i++) {
- cur = malloc_ai(port, ((struct in_addr *)hp->h_addr_list[i])->s_addr);
- if (cur == NULL) {
- if (*res)
- freeaddrinfo(*res);
- return EAI_MEMORY;
- }
-
- if (prev)
- prev->ai_next = cur;
- else
- *res = cur;
-
- prev = cur;
- }
- return 0;
- }
-
- return EAI_NODATA;
-}
-#endif /* !HAVE_GETADDRINFO */
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/fake-getnameinfo.c b/usr/src/cmd/ssh/libopenbsd-compat/common/fake-getnameinfo.c
deleted file mode 100644
index 60cef5cc2d..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/fake-getnameinfo.c
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * fake library for ssh
- *
- * This file includes getnameinfo().
- * These funtions are defined in rfc2133.
- *
- * But these functions are not implemented correctly. The minimum subset
- * is implemented for ssh use only. For exapmle, this routine assumes
- * that ai_family is AF_INET. Don't use it for another purpose.
- */
-
-#include "includes.h"
-#include "ssh.h"
-
-RCSID("$Id: fake-getnameinfo.c,v 1.2 2001/02/09 01:55:36 djm Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifndef HAVE_GETNAMEINFO
-int getnameinfo(const struct sockaddr *sa, size_t salen, char *host,
- size_t hostlen, char *serv, size_t servlen, int flags)
-{
- struct sockaddr_in *sin = (struct sockaddr_in *)sa;
- struct hostent *hp;
- char tmpserv[16];
-
- if (serv) {
- snprintf(tmpserv, sizeof(tmpserv), "%d", ntohs(sin->sin_port));
- if (strlen(tmpserv) >= servlen)
- return EAI_MEMORY;
- else
- strcpy(serv, tmpserv);
- }
-
- if (host) {
- if (flags & NI_NUMERICHOST) {
- if (strlen(inet_ntoa(sin->sin_addr)) >= hostlen)
- return EAI_MEMORY;
-
- strcpy(host, inet_ntoa(sin->sin_addr));
- return 0;
- } else {
- hp = gethostbyaddr((char *)&sin->sin_addr,
- sizeof(struct in_addr), AF_INET);
- if (hp == NULL)
- return EAI_NODATA;
-
- if (strlen(hp->h_name) >= hostlen)
- return EAI_MEMORY;
-
- strcpy(host, hp->h_name);
- return 0;
- }
- }
- return 0;
-}
-#endif /* !HAVE_GETNAMEINFO */
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/getcwd.c b/usr/src/cmd/ssh/libopenbsd-compat/common/getcwd.c
deleted file mode 100644
index 35027a0e7b..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/getcwd.c
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * Copyright (c) 1989, 1991, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "includes.h"
-
-#if !defined(HAVE_GETCWD)
-
-#if defined(LIBC_SCCS) && !defined(lint)
-static char rcsid[] = "$OpenBSD: getcwd.c,v 1.6 2000/07/19 15:25:13 deraadt Exp $";
-#endif /* LIBC_SCCS and not lint */
-
-#include <sys/param.h>
-#include <sys/stat.h>
-#include <errno.h>
-#include <dirent.h>
-#include <sys/dir.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "includes.h"
-
-#define ISDOT(dp) \
- (dp->d_name[0] == '.' && (dp->d_name[1] == '\0' || \
- (dp->d_name[1] == '.' && dp->d_name[2] == '\0')))
-
-char *
-getcwd(char *pt,size_t size)
-{
- register struct dirent *dp;
- register DIR *dir = NULL;
- register dev_t dev;
- register ino_t ino;
- register int first;
- register char *bpt, *bup;
- struct stat s;
- dev_t root_dev;
- ino_t root_ino;
- size_t ptsize, upsize;
- int save_errno;
- char *ept, *eup, *up;
-
- /*
- * If no buffer specified by the user, allocate one as necessary.
- * If a buffer is specified, the size has to be non-zero. The path
- * is built from the end of the buffer backwards.
- */
- if (pt) {
- ptsize = 0;
- if (!size) {
- errno = EINVAL;
- return (NULL);
- }
- ept = pt + size;
- } else {
- if ((pt = malloc(ptsize = 1024 - 4)) == NULL)
- return (NULL);
- ept = pt + ptsize;
- }
- bpt = ept - 1;
- *bpt = '\0';
-
- /*
- * Allocate bytes (1024 - malloc space) for the string of "../"'s.
- * Should always be enough (it's 340 levels). If it's not, allocate
- * as necessary. Special * case the first stat, it's ".", not "..".
- */
- if ((up = malloc(upsize = 1024 - 4)) == NULL)
- goto err;
- eup = up + MAXPATHLEN;
- bup = up;
- up[0] = '.';
- up[1] = '\0';
-
- /* Save root values, so know when to stop. */
- if (stat("/", &s))
- goto err;
- root_dev = s.st_dev;
- root_ino = s.st_ino;
-
- errno = 0; /* XXX readdir has no error return. */
-
- for (first = 1;; first = 0) {
- /* Stat the current level. */
- if (lstat(up, &s))
- goto err;
-
- /* Save current node values. */
- ino = s.st_ino;
- dev = s.st_dev;
-
- /* Check for reaching root. */
- if (root_dev == dev && root_ino == ino) {
- *--bpt = '/';
- /*
- * It's unclear that it's a requirement to copy the
- * path to the beginning of the buffer, but it's always
- * been that way and stuff would probably break.
- */
- memmove(pt, bpt, ept - bpt);
- free(up);
- return (pt);
- }
-
- /*
- * Build pointer to the parent directory, allocating memory
- * as necessary. Max length is 3 for "../", the largest
- * possible component name, plus a trailing NULL.
- */
- if (bup + 3 + MAXNAMLEN + 1 >= eup) {
- char *nup;
-
- if ((nup = realloc(up, upsize *= 2)) == NULL)
- goto err;
- up = nup;
- bup = up;
- eup = up + upsize;
- }
- *bup++ = '.';
- *bup++ = '.';
- *bup = '\0';
-
- /* Open and stat parent directory.
- * RACE?? - replaced fstat(dirfd(dir), &s) w/ lstat(up,&s)
- */
- if (!(dir = opendir(up)) || lstat(up,&s))
- goto err;
-
- /* Add trailing slash for next directory. */
- *bup++ = '/';
-
- /*
- * If it's a mount point, have to stat each element because
- * the inode number in the directory is for the entry in the
- * parent directory, not the inode number of the mounted file.
- */
- save_errno = 0;
- if (s.st_dev == dev) {
- for (;;) {
- if (!(dp = readdir(dir)))
- goto notfound;
- if (dp->d_fileno == ino)
- break;
- }
- } else
- for (;;) {
- if (!(dp = readdir(dir)))
- goto notfound;
- if (ISDOT(dp))
- continue;
- memmove(bup, dp->d_name, dp->d_namlen + 1);
-
- /* Save the first error for later. */
- if (lstat(up, &s)) {
- if (!save_errno)
- save_errno = errno;
- errno = 0;
- continue;
- }
- if (s.st_dev == dev && s.st_ino == ino)
- break;
- }
-
- /*
- * Check for length of the current name, preceding slash,
- * leading slash.
- */
- if (bpt - pt < dp->d_namlen + (first ? 1 : 2)) {
- size_t len, off;
- char *npt;
-
- if (!ptsize) {
- errno = ERANGE;
- goto err;
- }
- off = bpt - pt;
- len = ept - bpt;
- if ((npt = realloc(pt, ptsize *= 2)) == NULL)
- goto err;
- pt = npt;
- bpt = pt + off;
- ept = pt + ptsize;
- memmove(ept - len, bpt, len);
- bpt = ept - len;
- }
- if (!first)
- *--bpt = '/';
- bpt -= dp->d_namlen;
- memmove(bpt, dp->d_name, dp->d_namlen);
- (void)closedir(dir);
-
- /* Truncate any file name. */
- *bup = '\0';
- }
-
-notfound:
- /*
- * If readdir set errno, use it, not any saved error; otherwise,
- * didn't find the current directory in its parent directory, set
- * errno to ENOENT.
- */
- if (!errno)
- errno = save_errno ? save_errno : ENOENT;
- /* FALLTHROUGH */
-err:
- if (ptsize)
- free(pt);
- if (up)
- free(up);
- if (dir)
- (void)closedir(dir);
- return (NULL);
-}
-
-#endif /* !defined(HAVE_GETCWD) */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/getopt.c b/usr/src/cmd/ssh/libopenbsd-compat/common/getopt.c
deleted file mode 100644
index f69487950f..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/getopt.c
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 1987, 1993, 1994
- * The Regents of the University of California. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "includes.h"
-#if !defined(HAVE_GETOPT) || !defined(HAVE_GETOPT_OPTRESET)
-
-#if defined(LIBC_SCCS) && !defined(lint)
-static char *rcsid = "$OpenBSD: getopt.c,v 1.2 1996/08/19 08:33:32 tholo Exp $";
-#endif /* LIBC_SCCS and not lint */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-int BSDopterr = 1, /* if error message should be printed */
- BSDoptind = 1, /* index into parent argv vector */
- BSDoptopt, /* character checked for validity */
- BSDoptreset; /* reset getopt */
-char *BSDoptarg; /* argument associated with option */
-
-#define BADCH (int)'?'
-#define BADARG (int)':'
-#define EMSG ""
-
-/*
- * getopt --
- * Parse argc/argv argument vector.
- */
-int
-BSDgetopt(nargc, nargv, ostr)
- int nargc;
- char * const *nargv;
- const char *ostr;
-{
- extern char *__progname;
- static char *place = EMSG; /* option letter processing */
- char *oli; /* option letter list index */
-
- if (BSDoptreset || !*place) { /* update scanning pointer */
- BSDoptreset = 0;
- if (BSDoptind >= nargc || *(place = nargv[BSDoptind]) != '-') {
- place = EMSG;
- return (-1);
- }
- if (place[1] && *++place == '-') { /* found "--" */
- ++BSDoptind;
- place = EMSG;
- return (-1);
- }
- } /* option letter okay? */
- if ((BSDoptopt = (int)*place++) == (int)':' ||
- !(oli = strchr(ostr, BSDoptopt))) {
- /*
- * if the user didn't specify '-' as an option,
- * assume it means -1.
- */
- if (BSDoptopt == (int)'-')
- return (-1);
- if (!*place)
- ++BSDoptind;
- if (BSDopterr && *ostr != ':')
- (void) fprintf(stderr,
- gettext("%s: illegal option -- %c\n"), __progname,
- BSDoptopt);
- return (BADCH);
- }
- if (*++oli != ':') { /* don't need argument */
- BSDoptarg = NULL;
- if (!*place)
- ++BSDoptind;
- } else { /* need an argument */
- if (*place) /* no white space */
- BSDoptarg = place;
- else if (nargc <= ++BSDoptind) { /* no arg */
- place = EMSG;
- if (*ostr == ':')
- return (BADARG);
- if (BSDopterr)
- (void) fprintf(stderr,
- "%s: option requires an argument -- %c\n",
- __progname, BSDoptopt);
- return (BADCH);
- /* white space */
- } else
- BSDoptarg = nargv[BSDoptind];
-
- place = EMSG;
- ++BSDoptind;
- }
- return (BSDoptopt); /* dump back option letter */
-}
-
-#endif /* !defined(HAVE_GETOPT) || !defined(HAVE_OPTRESET) */
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/glob.c b/usr/src/cmd/ssh/libopenbsd-compat/common/glob.c
deleted file mode 100644
index 2688cccaee..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/glob.c
+++ /dev/null
@@ -1,933 +0,0 @@
-/*
- * Copyright (c) 1989, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Guido van Rossum.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "includes.h"
-#include <ctype.h>
-
-#if !defined(HAVE_GLOB) || !defined(GLOB_HAS_ALTDIRFUNC) || \
- !defined(GLOB_HAS_GL_MATCHC)
-
-#if defined(LIBC_SCCS) && !defined(lint)
-#if 0
-static char sccsid[] = "@(#)glob.c 8.3 (Berkeley) 10/13/93";
-#else
-static char rcsid[] = "$OpenBSD: glob.c,v 1.20 2002/06/14 21:34:58 todd Exp $";
-#endif
-#endif /* LIBC_SCCS and not lint */
-
-/*
- * glob(3) -- a superset of the one defined in POSIX 1003.2.
- *
- * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
- *
- * Optional extra services, controlled by flags not defined by POSIX:
- *
- * GLOB_QUOTE:
- * Escaping convention: \ inhibits any special meaning the following
- * character might have (except \ at end of string is retained).
- * GLOB_MAGCHAR:
- * Set in gl_flags if pattern contained a globbing character.
- * GLOB_NOMAGIC:
- * Same as GLOB_NOCHECK, but it will only append pattern if it did
- * not contain any magic characters. [Used in csh style globbing]
- * GLOB_ALTDIRFUNC:
- * Use alternately specified directory access functions.
- * GLOB_TILDE:
- * expand ~user/foo to the /home/dir/of/user/foo
- * GLOB_BRACE:
- * expand {1,2}{a,b} to 1a 1b 2a 2b
- * gl_matchc:
- * Number of matches in the current invocation of glob.
- */
-
-
-#define DOLLAR '$'
-#define DOT '.'
-#define EOS '\0'
-#define LBRACKET '['
-#define NOT '!'
-#define QUESTION '?'
-#define QUOTE '\\'
-#define RANGE '-'
-#define RBRACKET ']'
-#define SEP '/'
-#define STAR '*'
-#undef TILDE /* Some platforms may already define it */
-#define TILDE '~'
-#define UNDERSCORE '_'
-#define LBRACE '{'
-#define RBRACE '}'
-#define SLASH '/'
-#define COMMA ','
-
-#ifndef DEBUG
-
-#define M_QUOTE 0x8000
-#define M_PROTECT 0x4000
-#define M_MASK 0xffff
-#define M_ASCII 0x00ff
-
-typedef u_short Char;
-
-#else
-
-#define M_QUOTE 0x80
-#define M_PROTECT 0x40
-#define M_MASK 0xff
-#define M_ASCII 0x7f
-
-typedef char Char;
-
-#endif
-
-
-#define CHAR(c) ((Char)((c)&M_ASCII))
-#define META(c) ((Char)((c)|M_QUOTE))
-#define M_ALL META('*')
-#define M_END META(']')
-#define M_NOT META('!')
-#define M_ONE META('?')
-#define M_RNG META('-')
-#define M_SET META('[')
-#define ismeta(c) (((c)&M_QUOTE) != 0)
-
-
-#define GLOB_LIMIT_MALLOC 65536
-#define GLOB_LIMIT_STAT 128
-#define GLOB_LIMIT_READDIR 16384
-
-#define GLOB_INDEX_MALLOC 0
-#define GLOB_INDEX_STAT 1
-#define GLOB_INDEX_READDIR 2
-
-static int compare(const void *, const void *);
-static int g_Ctoc(const Char *, char *, u_int);
-static int g_lstat(Char *, struct stat *, glob_t *);
-static DIR *g_opendir(Char *, glob_t *);
-static Char *g_strchr(Char *, int);
-static int g_stat(Char *, struct stat *, glob_t *);
-static int glob0(const Char *, glob_t *, size_t *);
-static int glob1(Char *, Char *, glob_t *, size_t *);
-static int glob2(Char *, Char *, Char *, Char *, Char *, Char *,
- glob_t *, size_t *);
-static int glob3(Char *, Char *, Char *, Char *, Char *, Char *,
- Char *, Char *, glob_t *, size_t *);
-static int globextend(const Char *, glob_t *, size_t *);
-static const Char *
- globtilde(const Char *, Char *, size_t, glob_t *);
-static int globexp1(const Char *, glob_t *, size_t *);
-static int globexp2(const Char *, const Char *, glob_t *, int *,
- size_t *);
-static int match(Char *, Char *, Char *);
-#ifdef DEBUG
-static void qprintf(const char *, Char *);
-#endif
-
-int
-glob(pattern, flags, errfunc, pglob)
- const char *pattern;
- int flags, (*errfunc)(const char *, int);
- glob_t *pglob;
-{
- const u_char *patnext;
- int c;
- Char *bufnext, *bufend, patbuf[MAXPATHLEN];
- /* 0 = malloc(), 1 = stat(), 2 = readdir() */
- static size_t limit[] = { 0, 0, 0 };
-
- patnext = (u_char *) pattern;
- if (!(flags & GLOB_APPEND)) {
- pglob->gl_pathc = 0;
- pglob->gl_pathv = NULL;
- if (!(flags & GLOB_DOOFFS))
- pglob->gl_offs = 0;
- }
- pglob->gl_flags = flags & ~GLOB_MAGCHAR;
- pglob->gl_errfunc = errfunc;
- pglob->gl_matchc = 0;
-
- bufnext = patbuf;
- bufend = bufnext + MAXPATHLEN - 1;
- if (flags & GLOB_NOESCAPE)
- while (bufnext < bufend && (c = *patnext++) != EOS)
- *bufnext++ = c;
- else {
- /* Protect the quoted characters. */
- while (bufnext < bufend && (c = *patnext++) != EOS)
- if (c == QUOTE) {
- if ((c = *patnext++) == EOS) {
- c = QUOTE;
- --patnext;
- }
- *bufnext++ = c | M_PROTECT;
- } else
- *bufnext++ = c;
- }
- *bufnext = EOS;
-
- if (flags & GLOB_BRACE)
- return globexp1(patbuf, pglob, limit);
- else
- return glob0(patbuf, pglob, limit);
-}
-
-/*
- * Expand recursively a glob {} pattern. When there is no more expansion
- * invoke the standard globbing routine to glob the rest of the magic
- * characters
- */
-static int
-globexp1(pattern, pglob, limit)
- const Char *pattern;
- glob_t *pglob;
- size_t *limit;
-{
- const Char* ptr = pattern;
- int rv;
-
- /* Protect a single {}, for find(1), like csh */
- if (pattern[0] == LBRACE && pattern[1] == RBRACE && pattern[2] == EOS)
- return glob0(pattern, pglob, limit);
-
- while ((ptr = (const Char *) g_strchr((Char *) ptr, LBRACE)) != NULL)
- if (!globexp2(ptr, pattern, pglob, &rv, limit))
- return rv;
-
- return glob0(pattern, pglob, limit);
-}
-
-
-/*
- * Recursive brace globbing helper. Tries to expand a single brace.
- * If it succeeds then it invokes globexp1 with the new pattern.
- * If it fails then it tries to glob the rest of the pattern and returns.
- */
-static int
-globexp2(ptr, pattern, pglob, rv, limit)
- const Char *ptr, *pattern;
- glob_t *pglob;
- int *rv;
- size_t *limit;
-{
- int i;
- Char *lm, *ls;
- const Char *pe, *pm, *pl;
- Char patbuf[MAXPATHLEN];
-
- /* copy part up to the brace */
- for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++)
- ;
- *lm = EOS;
- ls = lm;
-
- /* Find the balanced brace */
- for (i = 0, pe = ++ptr; *pe; pe++)
- if (*pe == LBRACKET) {
- /* Ignore everything between [] */
- for (pm = pe++; *pe != RBRACKET && *pe != EOS; pe++)
- ;
- if (*pe == EOS) {
- /*
- * We could not find a matching RBRACKET.
- * Ignore and just look for RBRACE
- */
- pe = pm;
- }
- } else if (*pe == LBRACE)
- i++;
- else if (*pe == RBRACE) {
- if (i == 0)
- break;
- i--;
- }
-
- /* Non matching braces; just glob the pattern */
- if (i != 0 || *pe == EOS) {
- *rv = glob0(patbuf, pglob, limit);
- return 0;
- }
-
- for (i = 0, pl = pm = ptr; pm <= pe; pm++) {
- switch (*pm) {
- case LBRACKET:
- /* Ignore everything between [] */
- for (pl = pm++; *pm != RBRACKET && *pm != EOS; pm++)
- ;
- if (*pm == EOS) {
- /*
- * We could not find a matching RBRACKET.
- * Ignore and just look for RBRACE
- */
- pm = pl;
- }
- break;
-
- case LBRACE:
- i++;
- break;
-
- case RBRACE:
- if (i) {
- i--;
- break;
- }
- /* FALLTHROUGH */
- case COMMA:
- if (i && *pm == COMMA)
- break;
- else {
- /* Append the current string */
- for (lm = ls; (pl < pm); *lm++ = *pl++)
- ;
-
- /*
- * Append the rest of the pattern after the
- * closing brace
- */
- for (pl = pe + 1; (*lm++ = *pl++) != EOS; )
- ;
-
- /* Expand the current pattern */
-#ifdef DEBUG
- qprintf("globexp2:", patbuf);
-#endif
- *rv = globexp1(patbuf, pglob, limit);
-
- /* move after the comma, to the next string */
- pl = pm + 1;
- }
- break;
-
- default:
- break;
- }
- }
- *rv = 0;
- return 0;
-}
-
-
-
-/*
- * expand tilde from the passwd file.
- */
-static const Char *
-globtilde(pattern, patbuf, patbuf_len, pglob)
- const Char *pattern;
- Char *patbuf;
- size_t patbuf_len;
- glob_t *pglob;
-{
- struct passwd *pwd;
- char *h;
- const Char *p;
- Char *b, *eb;
-
- if (*pattern != TILDE || !(pglob->gl_flags & GLOB_TILDE))
- return pattern;
-
- /* Copy up to the end of the string or / */
- eb = &patbuf[patbuf_len - 1];
- for (p = pattern + 1, h = (char *) patbuf;
- h < (char *)eb && *p && *p != SLASH; *h++ = *p++)
- ;
-
- *h = EOS;
-
-#if 0
- if (h == (char *)eb)
- return what;
-#endif
-
- if (((char *) patbuf)[0] == EOS) {
- /*
- * handle a plain ~ or ~/ by expanding $HOME
- * first and then trying the password file
- */
-#if 0
- if (issetugid() != 0 || (h = getenv("HOME")) == NULL) {
-#endif
- if ((getuid() != geteuid()) || (h = getenv("HOME")) == NULL) {
- if ((pwd = getpwuid(getuid())) == NULL)
- return pattern;
- else
- h = pwd->pw_dir;
- }
- } else {
- /*
- * Expand a ~user
- */
- if ((pwd = getpwnam((char*) patbuf)) == NULL)
- return pattern;
- else
- h = pwd->pw_dir;
- }
-
- /* Copy the home directory */
- for (b = patbuf; b < eb && *h; *b++ = *h++)
- ;
-
- /* Append the rest of the pattern */
- while (b < eb && (*b++ = *p++) != EOS)
- ;
- *b = EOS;
-
- return patbuf;
-}
-
-
-/*
- * The main glob() routine: compiles the pattern (optionally processing
- * quotes), calls glob1() to do the real pattern matching, and finally
- * sorts the list (unless unsorted operation is requested). Returns 0
- * if things went well, nonzero if errors occurred. It is not an error
- * to find no matches.
- */
-static int
-glob0(pattern, pglob, limit)
- const Char *pattern;
- glob_t *pglob;
- size_t *limit;
-{
- const Char *qpatnext;
- int c, err, oldpathc;
- Char *bufnext, patbuf[MAXPATHLEN];
-
- qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob);
- oldpathc = pglob->gl_pathc;
- bufnext = patbuf;
-
- /* We don't need to check for buffer overflow any more. */
- while ((c = *qpatnext++) != EOS) {
- switch (c) {
- case LBRACKET:
- c = *qpatnext;
- if (c == NOT)
- ++qpatnext;
- if (*qpatnext == EOS ||
- g_strchr((Char *) qpatnext+1, RBRACKET) == NULL) {
- *bufnext++ = LBRACKET;
- if (c == NOT)
- --qpatnext;
- break;
- }
- *bufnext++ = M_SET;
- if (c == NOT)
- *bufnext++ = M_NOT;
- c = *qpatnext++;
- do {
- *bufnext++ = CHAR(c);
- if (*qpatnext == RANGE &&
- (c = qpatnext[1]) != RBRACKET) {
- *bufnext++ = M_RNG;
- *bufnext++ = CHAR(c);
- qpatnext += 2;
- }
- } while ((c = *qpatnext++) != RBRACKET);
- pglob->gl_flags |= GLOB_MAGCHAR;
- *bufnext++ = M_END;
- break;
- case QUESTION:
- pglob->gl_flags |= GLOB_MAGCHAR;
- *bufnext++ = M_ONE;
- break;
- case STAR:
- pglob->gl_flags |= GLOB_MAGCHAR;
- /* collapse adjacent stars to one,
- * to avoid exponential behavior
- */
- if (bufnext == patbuf || bufnext[-1] != M_ALL)
- *bufnext++ = M_ALL;
- break;
- default:
- *bufnext++ = CHAR(c);
- break;
- }
- }
- *bufnext = EOS;
-#ifdef DEBUG
- qprintf("glob0:", patbuf);
-#endif
-
- if ((err = glob1(patbuf, patbuf+MAXPATHLEN-1, pglob, limit)) != 0)
- return(err);
-
- /*
- * If there was no match we are going to append the pattern
- * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
- * and the pattern did not contain any magic characters
- * GLOB_NOMAGIC is there just for compatibility with csh.
- */
- if (pglob->gl_pathc == oldpathc) {
- if ((pglob->gl_flags & GLOB_NOCHECK) ||
- ((pglob->gl_flags & GLOB_NOMAGIC) &&
- !(pglob->gl_flags & GLOB_MAGCHAR)))
- return(globextend(pattern, pglob, limit));
- else
- return(GLOB_NOMATCH);
- }
- if (!(pglob->gl_flags & GLOB_NOSORT))
- qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
- pglob->gl_pathc - oldpathc, sizeof(char *), compare);
- return(0);
-}
-
-static int
-compare(p, q)
- const void *p, *q;
-{
- return(strcmp(*(char **)p, *(char **)q));
-}
-
-static int
-glob1(pattern, pattern_last, pglob, limit)
- Char *pattern, *pattern_last;
- glob_t *pglob;
- size_t *limit;
-{
- Char pathbuf[MAXPATHLEN];
-
- /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
- if (*pattern == EOS)
- return(0);
- return(glob2(pathbuf, pathbuf+MAXPATHLEN-1,
- pathbuf, pathbuf+MAXPATHLEN-1,
- pattern, pattern_last, pglob, limit));
-}
-
-/*
- * The functions glob2 and glob3 are mutually recursive; there is one level
- * of recursion for each segment in the pattern that contains one or more
- * meta characters.
- */
-static int
-glob2(pathbuf, pathbuf_last, pathend, pathend_last, pattern,
- pattern_last, pglob, limit)
- Char *pathbuf, *pathbuf_last, *pathend, *pathend_last;
- Char *pattern, *pattern_last;
- glob_t *pglob;
- size_t *limit;
-{
- struct stat sb;
- Char *p, *q;
- int anymeta;
-
- /*
- * Loop over pattern segments until end of pattern or until
- * segment with meta character found.
- */
- for (anymeta = 0;;) {
- if (*pattern == EOS) { /* End of pattern? */
- *pathend = EOS;
- if (g_lstat(pathbuf, &sb, pglob))
- return(0);
-
- if ((pglob->gl_flags & GLOB_LIMIT) &&
- limit[GLOB_INDEX_STAT]++ >= GLOB_LIMIT_STAT) {
- errno = 0;
- *pathend++ = SEP;
- *pathend = EOS;
- return GLOB_NOSPACE;
- }
-
- if (((pglob->gl_flags & GLOB_MARK) &&
- pathend[-1] != SEP) && (S_ISDIR(sb.st_mode) ||
- (S_ISLNK(sb.st_mode) &&
- (g_stat(pathbuf, &sb, pglob) == 0) &&
- S_ISDIR(sb.st_mode)))) {
- if (pathend+1 > pathend_last)
- return (1);
- *pathend++ = SEP;
- *pathend = EOS;
- }
- ++pglob->gl_matchc;
- return(globextend(pathbuf, pglob, limit));
- }
-
- /* Find end of next segment, copy tentatively to pathend. */
- q = pathend;
- p = pattern;
- while (*p != EOS && *p != SEP) {
- if (ismeta(*p))
- anymeta = 1;
- if (q+1 > pathend_last)
- return (1);
- *q++ = *p++;
- }
-
- if (!anymeta) { /* No expansion, do next segment. */
- pathend = q;
- pattern = p;
- while (*pattern == SEP) {
- if (pathend+1 > pathend_last)
- return (1);
- *pathend++ = *pattern++;
- }
- } else
- /* Need expansion, recurse. */
- return(glob3(pathbuf, pathbuf_last, pathend,
- pathend_last, pattern, pattern_last,
- p, pattern_last, pglob, limit));
- }
- /* NOTREACHED */
-}
-
-static int
-glob3(pathbuf, pathbuf_last, pathend, pathend_last, pattern, pattern_last,
- restpattern, restpattern_last, pglob, limit)
- Char *pathbuf, *pathbuf_last, *pathend, *pathend_last;
- Char *pattern, *pattern_last, *restpattern, *restpattern_last;
- glob_t *pglob;
- size_t *limit;
-{
- register struct dirent *dp;
- DIR *dirp;
- int err;
- char buf[MAXPATHLEN];
-
- /*
- * The readdirfunc declaration can't be prototyped, because it is
- * assigned, below, to two functions which are prototyped in glob.h
- * and dirent.h as taking pointers to differently typed opaque
- * structures.
- */
- struct dirent *(*readdirfunc)();
-
- if (pathend > pathend_last)
- return (1);
- *pathend = EOS;
- errno = 0;
-
- if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
- /* TODO: don't call for ENOENT or ENOTDIR? */
- if (pglob->gl_errfunc) {
- if (g_Ctoc(pathbuf, buf, sizeof(buf)))
- return(GLOB_ABORTED);
- if (pglob->gl_errfunc(buf, errno) ||
- pglob->gl_flags & GLOB_ERR)
- return(GLOB_ABORTED);
- }
- return(0);
- }
-
- err = 0;
-
- /* Search directory for matching names. */
- if (pglob->gl_flags & GLOB_ALTDIRFUNC)
- readdirfunc = pglob->gl_readdir;
- else
- readdirfunc = readdir;
- while ((dp = (*readdirfunc)(dirp))) {
- register u_char *sc;
- register Char *dc;
-
- if ((pglob->gl_flags & GLOB_LIMIT) &&
- limit[GLOB_INDEX_READDIR]++ >= GLOB_LIMIT_READDIR) {
- errno = 0;
- *pathend++ = SEP;
- *pathend = EOS;
- return GLOB_NOSPACE;
- }
-
- /* Initial DOT must be matched literally. */
- if (dp->d_name[0] == DOT && *pattern != DOT)
- continue;
- dc = pathend;
- sc = (u_char *) dp->d_name;
- while (dc < pathend_last && (*dc++ = *sc++) != EOS)
- ;
- if (dc >= pathend_last) {
- *dc = EOS;
- err = 1;
- break;
- }
-
- if (!match(pathend, pattern, restpattern)) {
- *pathend = EOS;
- continue;
- }
- err = glob2(pathbuf, pathbuf_last, --dc, pathend_last,
- restpattern, restpattern_last, pglob, limit);
- if (err)
- break;
- }
-
- if (pglob->gl_flags & GLOB_ALTDIRFUNC)
- (*pglob->gl_closedir)(dirp);
- else
- closedir(dirp);
- return(err);
-}
-
-
-/*
- * Extend the gl_pathv member of a glob_t structure to accommodate a new item,
- * add the new item, and update gl_pathc.
- *
- * This assumes the BSD realloc, which only copies the block when its size
- * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
- * behavior.
- *
- * Return 0 if new item added, error code if memory couldn't be allocated.
- *
- * Invariant of the glob_t structure:
- * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
- * gl_pathv points to (gl_offs + gl_pathc + 1) items.
- */
-static int
-globextend(path, pglob, limit)
- const Char *path;
- glob_t *pglob;
- size_t *limit;
-{
- register char **pathv;
- register int i;
- u_int newsize, len;
- char *copy;
- const Char *p;
-
- newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
- pathv = pglob->gl_pathv ? realloc((char *)pglob->gl_pathv, newsize) :
- malloc(newsize);
- if (pathv == NULL) {
- if (pglob->gl_pathv) {
- free(pglob->gl_pathv);
- pglob->gl_pathv = NULL;
- }
- return(GLOB_NOSPACE);
- }
-
- if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
- /* first time around -- clear initial gl_offs items */
- pathv += pglob->gl_offs;
- for (i = pglob->gl_offs; --i >= 0; )
- *--pathv = NULL;
- }
- pglob->gl_pathv = pathv;
-
- for (p = path; *p++;)
- ;
- len = (size_t)(p - path);
- limit[GLOB_INDEX_MALLOC] += len;
- if ((copy = malloc(len)) != NULL) {
- if (g_Ctoc(path, copy, len)) {
- free(copy);
- return(GLOB_NOSPACE);
- }
- pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
- }
- pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
- if ((pglob->gl_flags & GLOB_LIMIT) &&
- (newsize + limit[GLOB_INDEX_MALLOC]) >= GLOB_LIMIT_MALLOC) {
- errno = 0;
- return GLOB_NOSPACE;
- }
-
- return(copy == NULL ? GLOB_NOSPACE : 0);
-}
-
-
-/*
- * pattern matching function for filenames. Each occurrence of the *
- * pattern causes a recursion level.
- */
-static int
-match(name, pat, patend)
- register Char *name, *pat, *patend;
-{
- int ok, negate_range;
- Char c, k;
-
- while (pat < patend) {
- c = *pat++;
- switch (c & M_MASK) {
- case M_ALL:
- if (pat == patend)
- return(1);
- do
- if (match(name, pat, patend))
- return(1);
- while (*name++ != EOS)
- ;
- return(0);
- case M_ONE:
- if (*name++ == EOS)
- return(0);
- break;
- case M_SET:
- ok = 0;
- if ((k = *name++) == EOS)
- return(0);
- if ((negate_range = ((*pat & M_MASK) == M_NOT)) != EOS)
- ++pat;
- while (((c = *pat++) & M_MASK) != M_END)
- if ((*pat & M_MASK) == M_RNG) {
- if (c <= k && k <= pat[1])
- ok = 1;
- pat += 2;
- } else if (c == k)
- ok = 1;
- if (ok == negate_range)
- return(0);
- break;
- default:
- if (*name++ != c)
- return(0);
- break;
- }
- }
- return(*name == EOS);
-}
-
-/* Free allocated data belonging to a glob_t structure. */
-void
-globfree(pglob)
- glob_t *pglob;
-{
- register int i;
- register char **pp;
-
- if (pglob->gl_pathv != NULL) {
- pp = pglob->gl_pathv + pglob->gl_offs;
- for (i = pglob->gl_pathc; i--; ++pp)
- if (*pp)
- free(*pp);
- free(pglob->gl_pathv);
- pglob->gl_pathv = NULL;
- }
-}
-
-static DIR *
-g_opendir(str, pglob)
- register Char *str;
- glob_t *pglob;
-{
- char buf[MAXPATHLEN];
-
- if (!*str)
- strlcpy(buf, ".", sizeof buf);
- else {
- if (g_Ctoc(str, buf, sizeof(buf)))
- return(NULL);
- }
-
- if (pglob->gl_flags & GLOB_ALTDIRFUNC)
- return((*pglob->gl_opendir)(buf));
-
- return(opendir(buf));
-}
-
-static int
-g_lstat(fn, sb, pglob)
- register Char *fn;
- struct stat *sb;
- glob_t *pglob;
-{
- char buf[MAXPATHLEN];
-
- if (g_Ctoc(fn, buf, sizeof(buf)))
- return(-1);
- if (pglob->gl_flags & GLOB_ALTDIRFUNC)
- return((*pglob->gl_lstat)(buf, sb));
- return(lstat(buf, sb));
-}
-
-static int
-g_stat(fn, sb, pglob)
- register Char *fn;
- struct stat *sb;
- glob_t *pglob;
-{
- char buf[MAXPATHLEN];
-
- if (g_Ctoc(fn, buf, sizeof(buf)))
- return(-1);
- if (pglob->gl_flags & GLOB_ALTDIRFUNC)
- return((*pglob->gl_stat)(buf, sb));
- return(stat(buf, sb));
-}
-
-static Char *
-g_strchr(str, ch)
- Char *str;
- int ch;
-{
- do {
- if (*str == ch)
- return (str);
- } while (*str++);
- return (NULL);
-}
-
-static int
-g_Ctoc(str, buf, len)
- register const Char *str;
- char *buf;
- u_int len;
-{
-
- while (len--) {
- if ((*buf++ = *str++) == EOS)
- return (0);
- }
- return (1);
-}
-
-#ifdef DEBUG
-static void
-qprintf(str, s)
- const char *str;
- register Char *s;
-{
- register Char *p;
-
- (void)printf("%s:\n", str);
- for (p = s; *p; p++)
- (void)printf("%c", CHAR(*p));
- (void)printf("\n");
- for (p = s; *p; p++)
- (void)printf("%c", *p & M_PROTECT ? '"' : ' ');
- (void)printf("\n");
- for (p = s; *p; p++)
- (void)printf("%c", ismeta(*p) ? '_' : ' ');
- (void)printf("\n");
-}
-#endif
-
-#endif /* !defined(HAVE_GLOB) || !defined(GLOB_HAS_ALTDIRFUNC) ||
- !defined(GLOB_HAS_GL_MATCHC) */
-
-
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/inet_ntop.c b/usr/src/cmd/ssh/libopenbsd-compat/common/inet_ntop.c
deleted file mode 100644
index ed1acbfb1e..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/inet_ntop.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/* $OpenBSD: inet_ntop.c,v 1.5 2002/08/23 16:27:31 itojun Exp $ */
-
-/* Copyright (c) 1996 by Internet Software Consortium.
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
- * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
- * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
- * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
- * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
- * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- */
-
-#include "includes.h"
-
-#ifndef HAVE_INET_NTOP
-
-#if defined(LIBC_SCCS) && !defined(lint)
-#if 0
-static char rcsid[] = "$From: inet_ntop.c,v 8.7 1996/08/05 08:41:18 vixie Exp $";
-#else
-static char rcsid[] = "$OpenBSD: inet_ntop.c,v 1.5 2002/08/23 16:27:31 itojun Exp $";
-#endif
-#endif /* LIBC_SCCS and not lint */
-
-#include <sys/param.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include "fake-socket.h"
-#include <netinet/in.h>
-#include <arpa/inet.h>
-#ifndef HAVE_CYGWIN
-#include <arpa/nameser.h>
-#endif
-#include <string.h>
-#include <errno.h>
-#include <stdio.h>
-
-#ifndef IN6ADDRSZ
-#define IN6ADDRSZ 16 /* IPv6 T_AAAA */
-#endif
-
-#ifndef INT16SZ
-#define INT16SZ 2 /* for systems without 16-bit ints */
-#endif
-
-/*
- * WARNING: Don't even consider trying to compile this on a system where
- * sizeof(int) < 4. sizeof(int) > 4 is fine; all the world's not a VAX.
- */
-
-static const char *inet_ntop4(const u_char *src, char *dst, size_t size);
-static const char *inet_ntop6(const u_char *src, char *dst, size_t size);
-
-/* char *
- * inet_ntop(af, src, dst, size)
- * convert a network format address to presentation format.
- * return:
- * pointer to presentation format address (`dst'), or NULL (see errno).
- * author:
- * Paul Vixie, 1996.
- */
-const char *
-inet_ntop(af, src, dst, size)
- int af;
- const void *src;
- char *dst;
- size_t size;
-{
- switch (af) {
- case AF_INET:
- return (inet_ntop4(src, dst, size));
- case AF_INET6:
- return (inet_ntop6(src, dst, size));
- default:
- errno = EAFNOSUPPORT;
- return (NULL);
- }
- /* NOTREACHED */
-}
-
-/* const char *
- * inet_ntop4(src, dst, size)
- * format an IPv4 address, more or less like inet_ntoa()
- * return:
- * `dst' (as a const)
- * notes:
- * (1) uses no statics
- * (2) takes a u_char* not an in_addr as input
- * author:
- * Paul Vixie, 1996.
- */
-static const char *
-inet_ntop4(src, dst, size)
- const u_char *src;
- char *dst;
- size_t size;
-{
- static const char fmt[] = "%u.%u.%u.%u";
- char tmp[sizeof "255.255.255.255"];
- int l;
-
- l = snprintf(tmp, size, fmt, src[0], src[1], src[2], src[3]);
- if (l <= 0 || l >= size) {
- errno = ENOSPC;
- return (NULL);
- }
- strlcpy(dst, tmp, size);
- return (dst);
-}
-
-/* const char *
- * inet_ntop6(src, dst, size)
- * convert IPv6 binary address into presentation (printable) format
- * author:
- * Paul Vixie, 1996.
- */
-static const char *
-inet_ntop6(src, dst, size)
- const u_char *src;
- char *dst;
- size_t size;
-{
- /*
- * Note that int32_t and int16_t need only be "at least" large enough
- * to contain a value of the specified size. On some systems, like
- * Crays, there is no such thing as an integer variable with 16 bits.
- * Keep this in mind if you think this function should have been coded
- * to use pointer overlays. All the world's not a VAX.
- */
- char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"];
- char *tp, *ep;
- struct { int base, len; } best, cur;
- u_int words[IN6ADDRSZ / INT16SZ];
- int i;
- int advance;
-
- /*
- * Preprocess:
- * Copy the input (bytewise) array into a wordwise array.
- * Find the longest run of 0x00's in src[] for :: shorthanding.
- */
- memset(words, '\0', sizeof words);
- for (i = 0; i < IN6ADDRSZ; i++)
- words[i / 2] |= (src[i] << ((1 - (i % 2)) << 3));
- best.base = -1;
- cur.base = -1;
- for (i = 0; i < (IN6ADDRSZ / INT16SZ); i++) {
- if (words[i] == 0) {
- if (cur.base == -1)
- cur.base = i, cur.len = 1;
- else
- cur.len++;
- } else {
- if (cur.base != -1) {
- if (best.base == -1 || cur.len > best.len)
- best = cur;
- cur.base = -1;
- }
- }
- }
- if (cur.base != -1) {
- if (best.base == -1 || cur.len > best.len)
- best = cur;
- }
- if (best.base != -1 && best.len < 2)
- best.base = -1;
-
- /*
- * Format the result.
- */
- tp = tmp;
- ep = tmp + sizeof(tmp);
- for (i = 0; i < (IN6ADDRSZ / INT16SZ) && tp < ep; i++) {
- /* Are we inside the best run of 0x00's? */
- if (best.base != -1 && i >= best.base &&
- i < (best.base + best.len)) {
- if (i == best.base) {
- if (tp + 1 >= ep)
- return (NULL);
- *tp++ = ':';
- }
- continue;
- }
- /* Are we following an initial run of 0x00s or any real hex? */
- if (i != 0) {
- if (tp + 1 >= ep)
- return (NULL);
- *tp++ = ':';
- }
- /* Is this address an encapsulated IPv4? */
- if (i == 6 && best.base == 0 &&
- (best.len == 6 || (best.len == 5 && words[5] == 0xffff))) {
- if (!inet_ntop4(src+12, tp, (size_t)(ep - tp)))
- return (NULL);
- tp += strlen(tp);
- break;
- }
- advance = snprintf(tp, ep - tp, "%x", words[i]);
- if (advance <= 0 || advance >= ep - tp)
- return (NULL);
- tp += advance;
- }
- /* Was it a trailing run of 0x00's? */
- if (best.base != -1 && (best.base + best.len) == (IN6ADDRSZ / INT16SZ)) {
- if (tp + 1 >= ep)
- return (NULL);
- *tp++ = ':';
- }
- if (tp + 1 >= ep)
- return (NULL);
- *tp++ = '\0';
-
- /*
- * Check for overflow, copy, and we're done.
- */
- if ((size_t)(tp - tmp) > size) {
- errno = ENOSPC;
- return (NULL);
- }
- strlcpy(dst, tmp, size);
- return (dst);
-}
-
-#endif /* !HAVE_INET_NTOP */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/llib-lopenbsd-compat b/usr/src/cmd/ssh/libopenbsd-compat/common/llib-lopenbsd-compat
deleted file mode 100644
index 3c5912997f..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/llib-lopenbsd-compat
+++ /dev/null
@@ -1,83 +0,0 @@
-/* LINTLIBRARY */
-/* PROTOLIB1 */
-
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- *
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include <includes.h>
-#include <atomicio.h>
-#include <base64.h>
-#include <bindresvport.h>
-#include <bsd-arc4random.h>
-#include <bsd-cray.h>
-#include <bsd-cygwin_util.h>
-#include <bsd-getpeereid.h>
-#include <bsd-misc.h>
-#include <bsd-snprintf.h>
-#include <bsd-waitpid.h>
-#include <config.h>
-#include <crc32.h>
-#include <deattack.h>
-#include <defines.h>
-#include <dirname.h>
-#include <dispatch.h>
-#include <entropy.h>
-#include <fake-gai-errnos.h>
-#include <fake-getaddrinfo.h>
-#include <fake-getnameinfo.h>
-#include <fake-socket.h>
-#include <getcwd.h>
-#include <getgrouplist.h>
-#include <getopt.h>
-#include <getput.h>
-#include <glob.h>
-#include <groupaccess.h>
-#include <inet_ntoa.h>
-#include <inet_ntop.h>
-#include <log.h>
-#include <match.h>
-#include <misc.h>
-#include <mktemp.h>
-#include <openbsd-compat.h>
-#include <pathnames.h>
-#include <port-aix.h>
-#include <port-irix.h>
-#include <proxy-io.h>
-#include <readpass.h>
-#include <readpassphrase.h>
-#include <realpath.h>
-#include <rresvport.h>
-#include <rsa.h>
-#include <setproctitle.h>
-#include <sigact.h>
-#include <strlcat.h>
-#include <strlcpy.h>
-#include <strmode.h>
-#include <sys-queue.h>
-#include <sys-tree.h>
-#include <tildexpand.h>
-#include <uuencode.h>
-#include <version.h>
-#include <xmalloc.h>
-#include <xmmap.h>
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/mktemp.c b/usr/src/cmd/ssh/libopenbsd-compat/common/mktemp.c
deleted file mode 100644
index c6b18d22d4..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/mktemp.c
+++ /dev/null
@@ -1,186 +0,0 @@
-/* THIS FILE HAS BEEN MODIFIED FROM THE ORIGINAL OPENBSD SOURCE */
-/* Changes: Removed mktemp */
-
-/*
- * Copyright (c) 1987, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "includes.h"
-
-#ifndef HAVE_MKDTEMP
-
-#if defined(LIBC_SCCS) && !defined(lint)
-static char rcsid[] = "$OpenBSD: mktemp.c,v 1.16 2002/05/27 18:20:45 millert Exp $";
-#endif /* LIBC_SCCS and not lint */
-
-#ifdef HAVE_CYGWIN
-#define open binary_open
-extern int binary_open();
-#endif
-
-static int _gettemp(char *, int *, int, int);
-
-int
-mkstemps(path, slen)
- char *path;
- int slen;
-{
- int fd;
-
- return (_gettemp(path, &fd, 0, slen) ? fd : -1);
-}
-
-int
-mkstemp(path)
- char *path;
-{
- int fd;
-
- return (_gettemp(path, &fd, 0, 0) ? fd : -1);
-}
-
-char *
-mkdtemp(path)
- char *path;
-{
- return(_gettemp(path, (int *)NULL, 1, 0) ? path : (char *)NULL);
-}
-
-static int
-_gettemp(path, doopen, domkdir, slen)
- char *path;
- register int *doopen;
- int domkdir;
- int slen;
-{
- register char *start, *trv, *suffp;
- struct stat sbuf;
- int rval;
- pid_t pid;
-
- if (doopen && domkdir) {
- errno = EINVAL;
- return(0);
- }
-
- for (trv = path; *trv; ++trv)
- ;
- trv -= slen;
- suffp = trv;
- --trv;
- if (trv < path) {
- errno = EINVAL;
- return (0);
- }
- pid = getpid();
- while (trv >= path && *trv == 'X' && pid != 0) {
- *trv-- = (pid % 10) + '0';
- pid /= 10;
- }
- while (trv >= path && *trv == 'X') {
- char c;
-
- pid = (arc4random() & 0xffff) % (26+26);
- if (pid < 26)
- c = pid + 'A';
- else
- c = (pid - 26) + 'a';
- *trv-- = c;
- }
- start = trv + 1;
-
- /*
- * check the target directory; if you have six X's and it
- * doesn't exist this runs for a *very* long time.
- */
- if (doopen || domkdir) {
- for (;; --trv) {
- if (trv <= path)
- break;
- if (*trv == '/') {
- *trv = '\0';
- rval = stat(path, &sbuf);
- *trv = '/';
- if (rval != 0)
- return(0);
- if (!S_ISDIR(sbuf.st_mode)) {
- errno = ENOTDIR;
- return(0);
- }
- break;
- }
- }
- }
-
- for (;;) {
- if (doopen) {
- if ((*doopen =
- open(path, O_CREAT|O_EXCL|O_RDWR, 0600)) >= 0)
- return(1);
- if (errno != EEXIST)
- return(0);
- } else if (domkdir) {
- if (mkdir(path, 0700) == 0)
- return(1);
- if (errno != EEXIST)
- return(0);
- } else if (lstat(path, &sbuf))
- return(errno == ENOENT ? 1 : 0);
-
- /* tricky little algorithm for backward compatibility */
- for (trv = start;;) {
- if (!*trv)
- return (0);
- if (*trv == 'Z') {
- if (trv == suffp)
- return (0);
- *trv++ = 'a';
- } else {
- if (isdigit(*trv))
- *trv = 'a';
- else if (*trv == 'z') /* inc from z to A */
- *trv = 'A';
- else {
- if (trv == suffp)
- return (0);
- ++*trv;
- }
- break;
- }
- }
- }
- /*NOTREACHED*/
-}
-
-#endif /* !HAVE_MKDTEMP */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/port-aix.c b/usr/src/cmd/ssh/libopenbsd-compat/common/port-aix.c
deleted file mode 100644
index 4b83f53c07..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/port-aix.c
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- *
- * Copyright (c) 2001 Gert Doering. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-#include "includes.h"
-
-#ifdef _AIX
-
-#include <uinfo.h>
-#include <../xmalloc.h>
-
-/*
- * AIX has a "usrinfo" area where logname and other stuff is stored -
- * a few applications actually use this and die if it's not set
- *
- * NOTE: TTY= should be set, but since no one uses it and it's hard to
- * acquire due to privsep code. We will just drop support.
- */
-void
-aix_usrinfo(struct passwd *pw)
-{
- u_int i;
- char *cp;
-
- cp = xmalloc(16 + 2 * strlen(pw->pw_name));
- i = sprintf(cp, "LOGNAME=%s%cNAME=%s%c", pw->pw_name, 0,
- pw->pw_name, 0);
- if (usrinfo(SETUINFO, cp, i) == -1)
- fatal("Couldn't set usrinfo: %s", strerror(errno));
- debug3("AIX/UsrInfo: set len %d", i);
- xfree(cp);
-}
-
-#endif /* _AIX */
-
-
-#pragma ident "%Z%%M% %I% %E% SMI"
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/port-irix.c b/usr/src/cmd/ssh/libopenbsd-compat/common/port-irix.c
deleted file mode 100644
index 5f839043c0..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/port-irix.c
+++ /dev/null
@@ -1,63 +0,0 @@
-#include "includes.h"
-
-#if defined(WITH_IRIX_PROJECT) || defined(WITH_IRIX_JOBS) || defined(WITH_IRIX_ARRAY)
-
-#ifdef WITH_IRIX_PROJECT
-#include <proj.h>
-#endif /* WITH_IRIX_PROJECT */
-#ifdef WITH_IRIX_JOBS
-#include <sys/resource.h>
-#endif
-#ifdef WITH_IRIX_AUDIT
-#include <sat.h>
-#endif /* WITH_IRIX_AUDIT */
-
-void
-irix_setusercontext(struct passwd *pw)
-{
-#ifdef WITH_IRIX_PROJECT
- prid_t projid;
-#endif /* WITH_IRIX_PROJECT */
-#ifdef WITH_IRIX_JOBS
- jid_t jid = 0;
-#else
-# ifdef WITH_IRIX_ARRAY
- int jid = 0;
-# endif /* WITH_IRIX_ARRAY */
-#endif /* WITH_IRIX_JOBS */
-
-#ifdef WITH_IRIX_JOBS
- jid = jlimit_startjob(pw->pw_name, pw->pw_uid, "interactive");
- if (jid == -1)
- fatal("Failed to create job container: %.100s",
- strerror(errno));
-#endif /* WITH_IRIX_JOBS */
-#ifdef WITH_IRIX_ARRAY
- /* initialize array session */
- if (jid == 0 && newarraysess() != 0)
- fatal("Failed to set up new array session: %.100s",
- strerror(errno));
-#endif /* WITH_IRIX_ARRAY */
-#ifdef WITH_IRIX_PROJECT
- /* initialize irix project info */
- if ((projid = getdfltprojuser(pw->pw_name)) == -1) {
- debug("Failed to get project id, using projid 0");
- projid = 0;
- }
- if (setprid(projid))
- fatal("Failed to initialize project %d for %s: %.100s",
- (int)projid, pw->pw_name, strerror(errno));
-#endif /* WITH_IRIX_PROJECT */
-#ifdef WITH_IRIX_AUDIT
- if (sysconf(_SC_AUDIT)) {
- debug("Setting sat id to %d", (int) pw->pw_uid);
- if (satsetid(pw->pw_uid))
- debug("error setting satid: %.100s", strerror(errno));
- }
-#endif /* WITH_IRIX_AUDIT */
-}
-
-
-#endif /* defined(WITH_IRIX_PROJECT) || defined(WITH_IRIX_JOBS) || defined(WITH_IRIX_ARRAY) */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/readpassphrase.c b/usr/src/cmd/ssh/libopenbsd-compat/common/readpassphrase.c
deleted file mode 100644
index 67ed2caff7..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/readpassphrase.c
+++ /dev/null
@@ -1,192 +0,0 @@
-/* $OpenBSD: readpassphrase.c,v 1.14 2002/06/28 01:43:58 millert Exp $ */
-
-/*
- * Copyright (c) 2000-2002 Todd C. Miller <Todd.Miller@courtesan.com>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#if defined(LIBC_SCCS) && !defined(lint)
-static const char rcsid[] = "$OpenBSD: readpassphrase.c,v 1.14 2002/06/28 01:43:58 millert Exp $";
-#endif /* LIBC_SCCS and not lint */
-
-#include "includes.h"
-
-#ifndef HAVE_READPASSPHRASE
-
-#include <termios.h>
-#include <readpassphrase.h>
-
-#ifdef TCSASOFT
-# define _T_FLUSH (TCSAFLUSH|TCSASOFT)
-#else
-# define _T_FLUSH (TCSAFLUSH)
-#endif
-
-/* SunOS 4.x which lacks _POSIX_VDISABLE, but has VDISABLE */
-#if !defined(_POSIX_VDISABLE) && defined(VDISABLE)
-# define _POSIX_VDISABLE VDISABLE
-#endif
-
-static volatile sig_atomic_t signo;
-
-static void handler(int);
-
-char *
-readpassphrase(const char *prompt, char *buf, size_t bufsiz, int flags)
-{
- ssize_t nr;
- int input, output, save_errno;
- char ch, *p, *end;
- struct termios term, oterm;
- struct sigaction sa, savealrm, saveint, savehup, savequit, saveterm;
- struct sigaction savetstp, savettin, savettou, savepipe;
-
- /* I suppose we could alloc on demand in this case (XXX). */
- if (bufsiz == 0) {
- errno = EINVAL;
- return(NULL);
- }
-
-restart:
- signo = 0;
- /*
- * Read and write to /dev/tty if available. If not, read from
- * stdin and write to stderr unless a tty is required.
- */
- if ((flags & RPP_STDIN) ||
- (input = output = open(_PATH_TTY, O_RDWR)) == -1) {
- if (flags & RPP_REQUIRE_TTY) {
- errno = ENOTTY;
- return(NULL);
- }
- input = STDIN_FILENO;
- output = STDERR_FILENO;
- }
-
- /*
- * Catch signals that would otherwise cause the user to end
- * up with echo turned off in the shell. Don't worry about
- * things like SIGXCPU and SIGVTALRM for now.
- */
- sigemptyset(&sa.sa_mask);
- sa.sa_flags = 0; /* don't restart system calls */
- sa.sa_handler = handler;
- (void)sigaction(SIGALRM, &sa, &savealrm);
- (void)sigaction(SIGHUP, &sa, &savehup);
- (void)sigaction(SIGINT, &sa, &saveint);
- (void)sigaction(SIGPIPE, &sa, &savepipe);
- (void)sigaction(SIGQUIT, &sa, &savequit);
- (void)sigaction(SIGTERM, &sa, &saveterm);
- (void)sigaction(SIGTSTP, &sa, &savetstp);
- (void)sigaction(SIGTTIN, &sa, &savettin);
- (void)sigaction(SIGTTOU, &sa, &savettou);
-
- /* Turn off echo if possible. */
- if (input != STDIN_FILENO && tcgetattr(input, &oterm) == 0) {
- memcpy(&term, &oterm, sizeof(term));
- if (!(flags & RPP_ECHO_ON))
- term.c_lflag &= ~(ECHO | ECHONL);
-#ifdef VSTATUS
- if (term.c_cc[VSTATUS] != _POSIX_VDISABLE)
- term.c_cc[VSTATUS] = _POSIX_VDISABLE;
-#endif
- (void)tcsetattr(input, _T_FLUSH, &term);
- } else {
- memset(&term, 0, sizeof(term));
- term.c_lflag |= ECHO;
- memset(&oterm, 0, sizeof(oterm));
- oterm.c_lflag |= ECHO;
- }
-
- if (!(flags & RPP_STDIN))
- (void)write(output, prompt, strlen(prompt));
- end = buf + bufsiz - 1;
- for (p = buf; (nr = read(input, &ch, 1)) == 1 && ch != '\n' && ch != '\r';) {
- if (p < end) {
- if ((flags & RPP_SEVENBIT))
- ch &= 0x7f;
- if (isalpha(ch)) {
- if ((flags & RPP_FORCELOWER))
- ch = tolower(ch);
- if ((flags & RPP_FORCEUPPER))
- ch = toupper(ch);
- }
- *p++ = ch;
- }
- }
- *p = '\0';
- save_errno = errno;
- if (!(term.c_lflag & ECHO))
- (void)write(output, "\n", 1);
-
- /* Restore old terminal settings and signals. */
- if (memcmp(&term, &oterm, sizeof(term)) != 0)
- (void)tcsetattr(input, _T_FLUSH, &oterm);
- (void)sigaction(SIGALRM, &savealrm, NULL);
- (void)sigaction(SIGHUP, &savehup, NULL);
- (void)sigaction(SIGINT, &saveint, NULL);
- (void)sigaction(SIGQUIT, &savequit, NULL);
- (void)sigaction(SIGPIPE, &savepipe, NULL);
- (void)sigaction(SIGTERM, &saveterm, NULL);
- (void)sigaction(SIGTSTP, &savetstp, NULL);
- (void)sigaction(SIGTTIN, &savettin, NULL);
- if (input != STDIN_FILENO)
- (void)close(input);
-
- /*
- * If we were interrupted by a signal, resend it to ourselves
- * now that we have restored the signal handlers.
- */
- if (signo) {
- kill(getpid(), signo);
- switch (signo) {
- case SIGTSTP:
- case SIGTTIN:
- case SIGTTOU:
- goto restart;
- }
- }
-
- errno = save_errno;
- return(nr == -1 ? NULL : buf);
-}
-
-#if 0
-char *
-getpass(const char *prompt)
-{
- static char buf[_PASSWORD_LEN + 1];
-
- return(readpassphrase(prompt, buf, sizeof(buf), RPP_ECHO_OFF));
-}
-#endif
-
-static void handler(int s)
-{
- signo = s;
-}
-#endif /* HAVE_READPASSPHRASE */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/realpath.c b/usr/src/cmd/ssh/libopenbsd-compat/common/realpath.c
deleted file mode 100644
index e962264a50..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/realpath.c
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Copyright (c) 1994
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Jan-Simon Pendry.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "includes.h"
-
-#if !defined(HAVE_REALPATH) || defined(BROKEN_REALPATH)
-
-#if defined(LIBC_SCCS) && !defined(lint)
-static char *rcsid = "$OpenBSD: realpath.c,v 1.7 2002/05/24 21:22:37 deraadt Exp $";
-#endif /* LIBC_SCCS and not lint */
-
-#include <sys/param.h>
-#include <sys/stat.h>
-
-#include <errno.h>
-#include <fcntl.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-
-/*
- * MAXSYMLINKS
- */
-#ifndef MAXSYMLINKS
-#define MAXSYMLINKS 5
-#endif
-
-/*
- * char *realpath(const char *path, char resolved_path[MAXPATHLEN]);
- *
- * Find the real name of path, by removing all ".", ".." and symlink
- * components. Returns (resolved) on success, or (NULL) on failure,
- * in which case the path which caused trouble is left in (resolved).
- */
-char *
-realpath(const char *path, char *resolved)
-{
- struct stat sb;
- int fd, n, rootd, serrno = 0;
- char *p, *q, wbuf[MAXPATHLEN], start[MAXPATHLEN];
- int symlinks = 0;
-
- /* Save the starting point. */
- getcwd(start,MAXPATHLEN);
- if ((fd = open(".", O_RDONLY)) < 0) {
- (void)strlcpy(resolved, ".", MAXPATHLEN);
- return (NULL);
- }
- close(fd);
-
- /* Convert "." -> "" to optimize away a needless lstat() and chdir() */
- if (path[0] == '.' && path[1] == '\0')
- path = "";
-
- /*
- * Find the dirname and basename from the path to be resolved.
- * Change directory to the dirname component.
- * lstat the basename part.
- * if it is a symlink, read in the value and loop.
- * if it is a directory, then change to that directory.
- * get the current directory name and append the basename.
- */
- strlcpy(resolved, path, MAXPATHLEN);
-loop:
- q = strrchr(resolved, '/');
- if (q != NULL) {
- p = q + 1;
- if (q == resolved)
- q = "/";
- else {
- do {
- --q;
- } while (q > resolved && *q == '/');
- q[1] = '\0';
- q = resolved;
- }
- if (chdir(q) < 0)
- goto err1;
- } else
- p = resolved;
-
- /* Deal with the last component. */
- if (*p != '\0' && lstat(p, &sb) == 0) {
- if (S_ISLNK(sb.st_mode)) {
- if (++symlinks > MAXSYMLINKS) {
- serrno = ELOOP;
- goto err1;
- }
- n = readlink(p, resolved, MAXPATHLEN-1);
- if (n < 0)
- goto err1;
- resolved[n] = '\0';
- goto loop;
- }
- if (S_ISDIR(sb.st_mode)) {
- if (chdir(p) < 0)
- goto err1;
- p = "";
- }
- }
-
- /*
- * Save the last component name and get the full pathname of
- * the current directory.
- */
- (void)strlcpy(wbuf, p, sizeof wbuf);
- if (getcwd(resolved, MAXPATHLEN) == 0)
- goto err1;
-
- /*
- * Join the two strings together, ensuring that the right thing
- * happens if the last component is empty, or the dirname is root.
- */
- if (resolved[0] == '/' && resolved[1] == '\0')
- rootd = 1;
- else
- rootd = 0;
-
- if (*wbuf) {
- if (strlen(resolved) + strlen(wbuf) + rootd + 1 > MAXPATHLEN) {
- serrno = ENAMETOOLONG;
- goto err1;
- }
- if (rootd == 0)
- (void)strcat(resolved, "/");
- (void)strcat(resolved, wbuf);
- }
-
- /* Go back to where we came from. */
- if (chdir(start) < 0) {
- serrno = errno;
- goto err2;
- }
- return (resolved);
-
-err1: chdir(start);
-err2: errno = serrno;
- return (NULL);
-}
-#endif /* !defined(HAVE_REALPATH) || defined(BROKEN_REALPATH) */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/setproctitle.c b/usr/src/cmd/ssh/libopenbsd-compat/common/setproctitle.c
deleted file mode 100644
index 4163f0cfec..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/setproctitle.c
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Modified for OpenSSH by Kevin Steves
- * October 2000
- */
-
-/*
- * Copyright (c) 1994, 1995 Christopher G. Demetriou
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Christopher G. Demetriou
- * for the NetBSD Project.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#if defined(LIBC_SCCS) && !defined(lint)
-static char rcsid[] = "$OpenBSD: setproctitle.c,v 1.8 2001/11/06 19:21:40 art Exp $";
-#endif /* LIBC_SCCS and not lint */
-
-#include "includes.h"
-
-#ifndef HAVE_SETPROCTITLE
-
-#define SPT_NONE 0
-#define SPT_PSTAT 1
-
-#ifndef SPT_TYPE
-#define SPT_TYPE SPT_NONE
-#endif
-
-#if SPT_TYPE == SPT_PSTAT
-#include <sys/param.h>
-#include <sys/pstat.h>
-#endif /* SPT_TYPE == SPT_PSTAT */
-
-#define MAX_PROCTITLE 2048
-
-extern char *__progname;
-
-/*
- * Set Process Title (SPT) defines. Modeled after sendmail's
- * SPT type definition strategy.
- *
- * SPT_TYPE:
- *
- * SPT_NONE: Don't set the process title. Default.
- * SPT_PSTAT: Use pstat(PSTAT_SETCMD). HP-UX specific.
- */
-
-void
-setproctitle(const char *fmt, ...)
-{
-#if SPT_TYPE != SPT_NONE
- va_list ap;
-
- char buf[MAX_PROCTITLE];
- size_t used;
-
-#if SPT_TYPE == SPT_PSTAT
- union pstun pst;
-#endif /* SPT_TYPE == SPT_PSTAT */
-
- va_start(ap, fmt);
- if (fmt != NULL) {
- used = snprintf(buf, MAX_PROCTITLE, "%s: ", __progname);
- if (used >= MAX_PROCTITLE)
- used = MAX_PROCTITLE - 1;
- (void)vsnprintf(buf + used, MAX_PROCTITLE - used, fmt, ap);
- } else
- (void)snprintf(buf, MAX_PROCTITLE, "%s", __progname);
- va_end(ap);
- used = strlen(buf);
-
-#if SPT_TYPE == SPT_PSTAT
- pst.pst_command = buf;
- pstat(PSTAT_SETCMD, pst, used, 0, 0);
-#endif /* SPT_TYPE == SPT_PSTAT */
-
-#endif /* SPT_TYPE != SPT_NONE */
-}
-#endif /* HAVE_SETPROCTITLE */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/sigact.c b/usr/src/cmd/ssh/libopenbsd-compat/common/sigact.c
deleted file mode 100644
index 5d89f8bb37..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/sigact.c
+++ /dev/null
@@ -1,104 +0,0 @@
-/* $OpenBSD: sigaction.c,v 1.3 1999/06/27 08:14:21 millert Exp $ */
-
-/****************************************************************************
- * Copyright (c) 1998 Free Software Foundation, Inc. *
- * *
- * Permission is hereby granted, free of charge, to any person obtaining a *
- * copy of this software and associated documentation files (the *
- * "Software"), to deal in the Software without restriction, including *
- * without limitation the rights to use, copy, modify, merge, publish, *
- * distribute, distribute with modifications, sublicense, and/or sell *
- * copies of the Software, and to permit persons to whom the Software is *
- * furnished to do so, subject to the following conditions: *
- * *
- * The above copyright notice and this permission notice shall be included *
- * in all copies or substantial portions of the Software. *
- * *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS *
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF *
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. *
- * IN NO EVENT SHALL THE ABOVE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR *
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE. *
- * *
- * Except as contained in this notice, the name(s) of the above copyright *
- * holders shall not be used in advertising or otherwise to promote the *
- * sale, use or other dealings in this Software without prior written *
- * authorization. *
- ****************************************************************************/
-
-/****************************************************************************
- * Author: Zeyd M. Ben-Halim <zmbenhal@netcom.com> 1992,1995 *
- * and: Eric S. Raymond <esr@snark.thyrsus.com> *
- ****************************************************************************/
-
-#include "includes.h"
-#include <signal.h>
-#include "sigact.h"
-
-/* This file provides sigaction() emulation using sigvec() */
-/* Use only if this is non POSIX system */
-
-#if !HAVE_SIGACTION && HAVE_SIGVEC
-
-int
-sigaction(int sig, struct sigaction *sigact, struct sigaction *osigact)
-{
- return sigvec(sig, &(sigact->sv), &(osigact->sv));
-}
-
-int
-sigemptyset (sigset_t * mask)
-{
- *mask = 0;
- return 0;
-}
-
-int
-sigprocmask (int mode, sigset_t * mask, sigset_t * omask)
-{
- sigset_t current = sigsetmask(0);
-
- if (omask) *omask = current;
-
- if (mode==SIG_BLOCK)
- current |= *mask;
- else if (mode==SIG_UNBLOCK)
- current &= ~*mask;
- else if (mode==SIG_SETMASK)
- current = *mask;
-
- sigsetmask(current);
- return 0;
-}
-
-int
-sigsuspend (sigset_t * mask)
-{
- return sigpause(*mask);
-}
-
-int
-sigdelset (sigset_t * mask, int sig)
-{
- *mask &= ~sigmask(sig);
- return 0;
-}
-
-int
-sigaddset (sigset_t * mask, int sig)
-{
- *mask |= sigmask(sig);
- return 0;
-}
-
-int
-sigismember (sigset_t * mask, int sig)
-{
- return (*mask & sigmask(sig)) != 0;
-}
-
-#endif
-
-#pragma ident "%Z%%M% %I% %E% SMI"
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/strlcat.c b/usr/src/cmd/ssh/libopenbsd-compat/common/strlcat.c
deleted file mode 100644
index 8143a627d0..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/strlcat.c
+++ /dev/null
@@ -1,81 +0,0 @@
-/* $OpenBSD: strlcat.c,v 1.8 2001/05/13 15:40:15 deraadt Exp $ */
-
-/*
- * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-#ifndef HAVE_STRLCAT
-
-#if defined(LIBC_SCCS) && !defined(lint)
-static char *rcsid = "$OpenBSD: strlcat.c,v 1.8 2001/05/13 15:40:15 deraadt Exp $";
-#endif /* LIBC_SCCS and not lint */
-
-#include <sys/types.h>
-#include <string.h>
-#include "strlcat.h"
-
-/*
- * Appends src to string dst of size siz (unlike strncat, siz is the
- * full size of dst, not space left). At most siz-1 characters
- * will be copied. Always NUL terminates (unless siz <= strlen(dst)).
- * Returns strlen(src) + MIN(siz, strlen(initial dst)).
- * If retval >= siz, truncation occurred.
- */
-size_t
-strlcat(dst, src, siz)
- char *dst;
- const char *src;
- size_t siz;
-{
- register char *d = dst;
- register const char *s = src;
- register size_t n = siz;
- size_t dlen;
-
- /* Find the end of dst and adjust bytes left but don't go past end */
- while (n-- != 0 && *d != '\0')
- d++;
- dlen = d - dst;
- n = siz - dlen;
-
- if (n == 0)
- return(dlen + strlen(s));
- while (*s != '\0') {
- if (n != 1) {
- *d++ = *s;
- n--;
- }
- s++;
- }
- *d = '\0';
-
- return(dlen + (s - src)); /* count does not include NUL */
-}
-
-#endif /* !HAVE_STRLCAT */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/strlcpy.c b/usr/src/cmd/ssh/libopenbsd-compat/common/strlcpy.c
deleted file mode 100644
index fb87445cd2..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/strlcpy.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/* $OpenBSD: strlcpy.c,v 1.5 2001/05/13 15:40:16 deraadt Exp $ */
-
-/*
- * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-#ifndef HAVE_STRLCPY
-
-#if defined(LIBC_SCCS) && !defined(lint)
-static char *rcsid = "$OpenBSD: strlcpy.c,v 1.5 2001/05/13 15:40:16 deraadt Exp $";
-#endif /* LIBC_SCCS and not lint */
-
-#include <sys/types.h>
-#include <string.h>
-#include "strlcpy.h"
-
-/*
- * Copy src to string dst of size siz. At most siz-1 characters
- * will be copied. Always NUL terminates (unless siz == 0).
- * Returns strlen(src); if retval >= siz, truncation occurred.
- */
-size_t
-strlcpy(dst, src, siz)
- char *dst;
- const char *src;
- size_t siz;
-{
- register char *d = dst;
- register const char *s = src;
- register size_t n = siz;
-
- /* Copy as many bytes as will fit */
- if (n != 0 && --n != 0) {
- do {
- if ((*d++ = *s++) == 0)
- break;
- } while (--n != 0);
- }
-
- /* Not enough room in dst, add NUL and traverse rest of src */
- if (n == 0) {
- if (siz != 0)
- *d = '\0'; /* NUL-terminate dst */
- while (*s++)
- ;
- }
-
- return(s - src - 1); /* count does not include NUL */
-}
-
-#endif /* !HAVE_STRLCPY */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/strmode.c b/usr/src/cmd/ssh/libopenbsd-compat/common/strmode.c
deleted file mode 100644
index 2a52c22d19..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/strmode.c
+++ /dev/null
@@ -1,158 +0,0 @@
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "includes.h"
-#ifndef HAVE_STRMODE
-
-#if defined(LIBC_SCCS) && !defined(lint)
-static char *rcsid = "$OpenBSD: strmode.c,v 1.3 1997/06/13 13:57:20 deraadt Exp $";
-#endif /* LIBC_SCCS and not lint */
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-void
-strmode(register mode_t mode, register char *p)
-{
- /* print type */
- switch (mode & S_IFMT) {
- case S_IFDIR: /* directory */
- *p++ = 'd';
- break;
- case S_IFCHR: /* character special */
- *p++ = 'c';
- break;
- case S_IFBLK: /* block special */
- *p++ = 'b';
- break;
- case S_IFREG: /* regular */
- *p++ = '-';
- break;
- case S_IFLNK: /* symbolic link */
- *p++ = 'l';
- break;
-#ifdef S_IFSOCK
- case S_IFSOCK: /* socket */
- *p++ = 's';
- break;
-#endif
-#ifdef S_IFIFO
- case S_IFIFO: /* fifo */
- *p++ = 'p';
- break;
-#endif
-#ifdef S_IFWHT
- case S_IFWHT: /* whiteout */
- *p++ = 'w';
- break;
-#endif
- default: /* unknown */
- *p++ = '?';
- break;
- }
- /* usr */
- if (mode & S_IRUSR)
- *p++ = 'r';
- else
- *p++ = '-';
- if (mode & S_IWUSR)
- *p++ = 'w';
- else
- *p++ = '-';
- switch (mode & (S_IXUSR | S_ISUID)) {
- case 0:
- *p++ = '-';
- break;
- case S_IXUSR:
- *p++ = 'x';
- break;
- case S_ISUID:
- *p++ = 'S';
- break;
- case S_IXUSR | S_ISUID:
- *p++ = 's';
- break;
- }
- /* group */
- if (mode & S_IRGRP)
- *p++ = 'r';
- else
- *p++ = '-';
- if (mode & S_IWGRP)
- *p++ = 'w';
- else
- *p++ = '-';
- switch (mode & (S_IXGRP | S_ISGID)) {
- case 0:
- *p++ = '-';
- break;
- case S_IXGRP:
- *p++ = 'x';
- break;
- case S_ISGID:
- *p++ = 'S';
- break;
- case S_IXGRP | S_ISGID:
- *p++ = 's';
- break;
- }
- /* other */
- if (mode & S_IROTH)
- *p++ = 'r';
- else
- *p++ = '-';
- if (mode & S_IWOTH)
- *p++ = 'w';
- else
- *p++ = '-';
- switch (mode & (S_IXOTH | S_ISVTX)) {
- case 0:
- *p++ = '-';
- break;
- case S_IXOTH:
- *p++ = 'x';
- break;
- case S_ISVTX:
- *p++ = 'T';
- break;
- case S_IXOTH | S_ISVTX:
- *p++ = 't';
- break;
- }
- *p++ = ' '; /* will be a '+' if ACL's implemented */
- *p = '\0';
-}
-#endif
-
-#pragma ident "%Z%%M% %I% %E% SMI"
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/xmmap.c b/usr/src/cmd/ssh/libopenbsd-compat/common/xmmap.c
deleted file mode 100644
index 9548d47e0d..0000000000
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/xmmap.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-
-#ifdef HAVE_SYS_MMAN_H
-#include <sys/mman.h>
-#endif
-
-#include "log.h"
-
-void *xmmap(size_t size)
-{
- void *address;
-
-#ifdef HAVE_MMAP
-# ifdef MAP_ANON
- address = mmap(NULL, size, PROT_WRITE|PROT_READ, MAP_ANON|MAP_SHARED,
- -1, 0);
-# else
- address = mmap(NULL, size, PROT_WRITE|PROT_READ, MAP_SHARED,
- open("/dev/zero", O_RDWR), 0);
-# endif
-
-#define MM_SWAP_TEMPLATE "/var/run/sshd.mm.XXXXXXXX"
- if (address == MAP_FAILED) {
- char tmpname[sizeof(MM_SWAP_TEMPLATE)] = MM_SWAP_TEMPLATE;
- int tmpfd;
-
- tmpfd = mkstemp(tmpname);
- if (tmpfd == -1)
- fatal("mkstemp(\"%s\"): %s",
- MM_SWAP_TEMPLATE, strerror(errno));
- unlink(tmpname);
- ftruncate(tmpfd, size);
- address = mmap(NULL, size, PROT_WRITE|PROT_READ, MAP_SHARED,
- tmpfd, 0);
- close(tmpfd);
- }
-
- return (address);
-#else
- fatal("%s: UsePrivilegeSeparation=yes and Compression=yes not supported",
- __func__);
-#endif /* HAVE_MMAP */
-
-}
-
-
-#pragma ident "%Z%%M% %I% %E% SMI"
diff --git a/usr/src/cmd/ssh/libssh/Makefile.com b/usr/src/cmd/ssh/libssh/Makefile.com
deleted file mode 100644
index 569afe7207..0000000000
--- a/usr/src/cmd/ssh/libssh/Makefile.com
+++ /dev/null
@@ -1,116 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
-#
-
-LIBRARY = libssh.a
-VERS = .1
-
-OBJECTS = \
- addrmatch.o \
- atomicio.o \
- authfd.o \
- authfile.o \
- bufaux.o \
- buffer.o \
- canohost.o \
- channels.o \
- cipher.o \
- cipher-ctr.o \
- compat.o \
- compress.o \
- crc32.o \
- deattack.o \
- dh.o \
- dispatch.o \
- engine.o \
- entropy.o \
- fatal.o \
- g11n.o \
- hostfile.o \
- key.o \
- kex.o \
- kexdh.o \
- kexdhc.o \
- kexdhs.o \
- kexgex.o \
- kexgexc.o \
- kexgexs.o \
- kexgssc.o \
- kexgsss.o \
- log.o \
- mac.o \
- match.o \
- misc.o \
- mpaux.o \
- msg.o \
- nchan.o \
- packet.o \
- progressmeter.o \
- proxy-io.o \
- radix.o \
- readconf.o \
- readpass.o \
- rsa.o \
- sftp-common.o \
- ssh-dss.o \
- ssh-gss.o \
- ssh-rsa.o \
- tildexpand.o \
- ttymodes.o \
- uidswap.o \
- uuencode.o \
- xlist.o \
- xmalloc.o
-
-include $(SRC)/lib/Makefile.lib
-
-BUILD.AR = $(RM) $@ ; $(AR) $(ARFLAGS) $@ $(AROBJS)
-
-SRCDIR = ../common
-SRCS = $(OBJECTS:%.o=../common/%.c)
-
-LIBS = $(LIBRARY) $(LINTLIB)
-
-# Define LDLIBS conditionally for lintcheck, rather than in general, since
-# we're building an archive library which itself links to nothing, we just
-# want lint to know about these libraries.
-lintcheck := LDLIBS += -lcrypto -lz -lsocket -lnsl -lc
-$(LINTLIB) := SRCS = $(SRCDIR)/$(LINTSRC)
-
-POFILE_DIR = ../..
-
-.KEEP_STATE:
-
-all: $(LIBS)
-
-# lint requires the (not installed) lint library
-lint: $(LINTLIB) .WAIT lintcheck
-
-include $(SRC)/lib/Makefile.targ
-
-objs/%.o: $(SRCDIR)/%.c
- $(COMPILE.c) -o $@ $<
- $(POST_PROCESS_O)
-
-include ../../Makefile.ssh-common
-include ../../Makefile.msg.targ
diff --git a/usr/src/cmd/ssh/libssh/common/addrmatch.c b/usr/src/cmd/ssh/libssh/common/addrmatch.c
deleted file mode 100644
index 91aa1e7ac7..0000000000
--- a/usr/src/cmd/ssh/libssh/common/addrmatch.c
+++ /dev/null
@@ -1,426 +0,0 @@
-/* $OpenBSD: addrmatch.c,v 1.4 2008/12/10 03:55:20 stevesk Exp $ */
-
-/*
- * Copyright (c) 2004-2008 Damien Miller <djm@mindrot.org>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include "includes.h"
-
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
-
-#include <netdb.h>
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <stdarg.h>
-
-#include "match.h"
-#include "log.h"
-#include "xmalloc.h"
-
-struct xaddr {
- sa_family_t af;
- union {
- struct in_addr v4;
- struct in6_addr v6;
- u_int8_t addr8[16];
- u_int32_t addr32[4];
- } xa; /* 128-bit address */
- u_int32_t scope_id; /* iface scope id for v6 */
-#define v4 xa.v4
-#define v6 xa.v6
-#define addr8 xa.addr8
-#define addr32 xa.addr32
-};
-
-static int
-addr_unicast_masklen(int af)
-{
- switch (af) {
- case AF_INET:
- return 32;
- case AF_INET6:
- return 128;
- default:
- return -1;
- }
-}
-
-static inline int
-masklen_valid(int af, u_int masklen)
-{
- switch (af) {
- case AF_INET:
- return masklen <= 32 ? 0 : -1;
- case AF_INET6:
- return masklen <= 128 ? 0 : -1;
- default:
- return -1;
- }
-}
-
-/*
- * Convert struct sockaddr to struct xaddr
- * Returns 0 on success, -1 on failure.
- */
-static int
-addr_sa_to_xaddr(struct sockaddr *sa, socklen_t slen, struct xaddr *xa)
-{
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- struct sockaddr_in *in4 = (struct sockaddr_in *)sa;
- /* LINTED E_BAD_PTR_CAST_ALIGN */
- struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)sa;
-
- memset(xa, '\0', sizeof(*xa));
-
- switch (sa->sa_family) {
- case AF_INET:
- if (slen < sizeof(*in4))
- return -1;
- xa->af = AF_INET;
- memcpy(&xa->v4, &in4->sin_addr, sizeof(xa->v4));
- break;
- case AF_INET6:
- if (slen < sizeof(*in6))
- return -1;
- xa->af = AF_INET6;
- memcpy(&xa->v6, &in6->sin6_addr, sizeof(xa->v6));
-#ifdef HAVE_STRUCT_SOCKADDR_IN6_SIN6_SCOPE_ID
- xa->scope_id = in6->sin6_scope_id;
-#endif
- break;
- default:
- return -1;
- }
-
- return 0;
-}
-
-/*
- * Calculate a netmask of length 'l' for address family 'af' and
- * store it in 'n'.
- * Returns 0 on success, -1 on failure.
- */
-static int
-addr_netmask(int af, u_int l, struct xaddr *n)
-{
- int i;
-
- if (masklen_valid(af, l) != 0 || n == NULL)
- return -1;
-
- memset(n, '\0', sizeof(*n));
- switch (af) {
- case AF_INET:
- n->af = AF_INET;
- n->v4.s_addr = htonl((0xffffffff << (32 - l)) & 0xffffffff);
- return 0;
- case AF_INET6:
- n->af = AF_INET6;
- for (i = 0; i < 4 && l >= 32; i++, l -= 32)
- n->addr32[i] = 0xffffffffU;
- if (i < 4 && l != 0)
- n->addr32[i] = htonl((0xffffffff << (32 - l)) &
- 0xffffffff);
- return 0;
- default:
- return -1;
- }
-}
-
-/*
- * Perform logical AND of addresses 'a' and 'b', storing result in 'dst'.
- * Returns 0 on success, -1 on failure.
- */
-static int
-addr_and(struct xaddr *dst, const struct xaddr *a, const struct xaddr *b)
-{
- int i;
-
- if (dst == NULL || a == NULL || b == NULL || a->af != b->af)
- return -1;
-
- memcpy(dst, a, sizeof(*dst));
- switch (a->af) {
- case AF_INET:
- dst->v4.s_addr &= b->v4.s_addr;
- return 0;
- case AF_INET6:
- dst->scope_id = a->scope_id;
- for (i = 0; i < 4; i++)
- dst->addr32[i] &= b->addr32[i];
- return 0;
- default:
- return -1;
- }
-}
-
-/*
- * Compare addresses 'a' and 'b'
- * Return 0 if addresses are identical, -1 if (a < b) or 1 if (a > b)
- */
-static int
-addr_cmp(const struct xaddr *a, const struct xaddr *b)
-{
- int i;
-
- if (a->af != b->af)
- return a->af == AF_INET6 ? 1 : -1;
-
- switch (a->af) {
- case AF_INET:
- if (a->v4.s_addr == b->v4.s_addr)
- return 0;
- return ntohl(a->v4.s_addr) > ntohl(b->v4.s_addr) ? 1 : -1;
- case AF_INET6:
- for (i = 0; i < 16; i++)
- if (a->addr8[i] - b->addr8[i] != 0)
- return a->addr8[i] > b->addr8[i] ? 1 : -1;
- if (a->scope_id == b->scope_id)
- return 0;
- return a->scope_id > b->scope_id ? 1 : -1;
- default:
- return -1;
- }
-}
-
-/*
- * Parse string address 'p' into 'n'
- * Returns 0 on success, -1 on failure.
- */
-static int
-addr_pton(const char *p, struct xaddr *n)
-{
- struct addrinfo hints, *ai;
-
- memset(&hints, '\0', sizeof(hints));
- hints.ai_flags = AI_NUMERICHOST;
-
- if (p == NULL || getaddrinfo(p, NULL, &hints, &ai) != 0)
- return -1;
-
- if (ai == NULL || ai->ai_addr == NULL)
- return -1;
-
- if (n != NULL &&
- addr_sa_to_xaddr(ai->ai_addr, ai->ai_addrlen, n) == -1) {
- freeaddrinfo(ai);
- return -1;
- }
-
- freeaddrinfo(ai);
- return 0;
-}
-
-/*
- * Perform bitwise negation of address
- * Returns 0 on success, -1 on failure.
- */
-static int
-addr_invert(struct xaddr *n)
-{
- int i;
-
- if (n == NULL)
- return (-1);
-
- switch (n->af) {
- case AF_INET:
- n->v4.s_addr = ~n->v4.s_addr;
- return (0);
- case AF_INET6:
- for (i = 0; i < 4; i++)
- n->addr32[i] = ~n->addr32[i];
- return (0);
- default:
- return (-1);
- }
-}
-
-/*
- * Calculate a netmask of length 'l' for address family 'af' and
- * store it in 'n'.
- * Returns 0 on success, -1 on failure.
- */
-static int
-addr_hostmask(int af, u_int l, struct xaddr *n)
-{
- if (addr_netmask(af, l, n) == -1 || addr_invert(n) == -1)
- return (-1);
- return (0);
-}
-
-/*
- * Test whether address 'a' is all zeros (i.e. 0.0.0.0 or ::)
- * Returns 0 on if address is all-zeros, -1 if not all zeros or on failure.
- */
-static int
-addr_is_all0s(const struct xaddr *a)
-{
- int i;
-
- switch (a->af) {
- case AF_INET:
- return (a->v4.s_addr == 0 ? 0 : -1);
- case AF_INET6:;
- for (i = 0; i < 4; i++)
- if (a->addr32[i] != 0)
- return (-1);
- return (0);
- default:
- return (-1);
- }
-}
-
-/*
- * Test whether host portion of address 'a', as determined by 'masklen'
- * is all zeros.
- * Returns 0 on if host portion of address is all-zeros,
- * -1 if not all zeros or on failure.
- */
-static int
-addr_host_is_all0s(const struct xaddr *a, u_int masklen)
-{
- struct xaddr tmp_addr, tmp_mask, tmp_result;
-
- memcpy(&tmp_addr, a, sizeof(tmp_addr));
- if (addr_hostmask(a->af, masklen, &tmp_mask) == -1)
- return (-1);
- if (addr_and(&tmp_result, &tmp_addr, &tmp_mask) == -1)
- return (-1);
- return (addr_is_all0s(&tmp_result));
-}
-
-/*
- * Parse a CIDR address (x.x.x.x/y or xxxx:yyyy::/z).
- * Return -1 on parse error, -2 on inconsistency or 0 on success.
- */
-static int
-addr_pton_cidr(const char *p, struct xaddr *n, u_int *l)
-{
- struct xaddr tmp;
- long unsigned int masklen = 999;
- char addrbuf[64], *mp, *cp;
-
- /* Don't modify argument */
- if (p == NULL || strlcpy(addrbuf, p, sizeof(addrbuf)) > sizeof(addrbuf))
- return -1;
-
- if ((mp = strchr(addrbuf, '/')) != NULL) {
- *mp = '\0';
- mp++;
- masklen = strtoul(mp, &cp, 10);
- if (*mp == '\0' || *cp != '\0' || masklen > 128)
- return -1;
- }
-
- if (addr_pton(addrbuf, &tmp) == -1)
- return -1;
-
- if (mp == NULL)
- masklen = addr_unicast_masklen(tmp.af);
- if (masklen_valid(tmp.af, masklen) == -1)
- return -2;
- if (addr_host_is_all0s(&tmp, masklen) != 0)
- return -2;
-
- if (n != NULL)
- memcpy(n, &tmp, sizeof(*n));
- if (l != NULL)
- *l = masklen;
-
- return 0;
-}
-
-static int
-addr_netmatch(const struct xaddr *host, const struct xaddr *net, u_int masklen)
-{
- struct xaddr tmp_mask, tmp_result;
-
- if (host->af != net->af)
- return -1;
-
- if (addr_netmask(host->af, masklen, &tmp_mask) == -1)
- return -1;
- if (addr_and(&tmp_result, host, &tmp_mask) == -1)
- return -1;
- return addr_cmp(&tmp_result, net);
-}
-
-/*
- * Match "addr" against list pattern list "_list", which may contain a
- * mix of CIDR addresses and old-school wildcards.
- *
- * If addr is NULL, then no matching is performed, but _list is parsed
- * and checked for well-formedness.
- *
- * Returns 1 on match found (never returned when addr == NULL).
- * Returns 0 on if no match found, or no errors found when addr == NULL.
- * Returns -1 on negated match found (never returned when addr == NULL).
- * Returns -2 on invalid list entry.
- */
-int
-addr_match_list(const char *addr, const char *_list)
-{
- char *list, *cp, *o;
- struct xaddr try_addr, match_addr;
- u_int masklen, neg;
- int ret = 0, r;
-
- if (addr != NULL && addr_pton(addr, &try_addr) != 0) {
- debug2("%s: couldn't parse address %.100s", __func__, addr);
- return 0;
- }
- if ((o = list = strdup(_list)) == NULL)
- return -1;
- while ((cp = strsep(&list, ",")) != NULL) {
- neg = *cp == '!';
- if (neg)
- cp++;
- if (*cp == '\0') {
- ret = -2;
- break;
- }
- /* Prefer CIDR address matching */
- r = addr_pton_cidr(cp, &match_addr, &masklen);
- if (r == -2) {
- error("Inconsistent mask length for "
- "network \"%.100s\"", cp);
- ret = -2;
- break;
- } else if (r == 0) {
- if (addr != NULL && addr_netmatch(&try_addr,
- &match_addr, masklen) == 0) {
- foundit:
- if (neg) {
- ret = -1;
- break;
- }
- ret = 1;
- }
- continue;
- } else {
- /* If CIDR parse failed, try wildcard string match */
- if (addr != NULL && match_pattern(addr, cp) == 1)
- goto foundit;
- }
- }
- xfree(o);
-
- return ret;
-}
diff --git a/usr/src/cmd/ssh/libssh/common/atomicio.c b/usr/src/cmd/ssh/libssh/common/atomicio.c
deleted file mode 100644
index f4a7945702..0000000000
--- a/usr/src/cmd/ssh/libssh/common/atomicio.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (c) 2006 Damien Miller. All rights reserved.
- * Copyright (c) 2005 Anil Madhavapeddy. All rights reserved.
- * Copyright (c) 1995,1999 Theo de Raadt. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: atomicio.c,v 1.10 2001/05/08 22:48:07 markus Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "atomicio.h"
-
-/*
- * ensure all of data on socket comes through. f==read || f==write
- */
-ssize_t
-atomicio(f, fd, _s, n)
- ssize_t (*f) ();
- int fd;
- void *_s;
- size_t n;
-{
- char *s = _s;
- ssize_t res, pos = 0;
-
- while (n > pos) {
- res = (f) (fd, s + pos, n - pos);
- switch (res) {
- case -1:
-#ifdef EWOULDBLOCK
- if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK)
-#else
- if (errno == EINTR || errno == EAGAIN)
-#endif
- continue;
- /* FALLTHROUGH */
- case 0:
- return (res);
- default:
- pos += res;
- }
- }
- return (pos);
-}
-
-/*
- * ensure all of data on socket comes through. f==readv || f==writev
- */
-size_t
-atomiciov(ssize_t (*f) (int, const struct iovec *, int), int fd,
- const struct iovec *_iov, int iovcnt)
-{
- size_t pos = 0, rem;
- ssize_t res;
- struct iovec iov_array[IOV_MAX], *iov = iov_array;
-
- if (iovcnt > IOV_MAX) {
- errno = EINVAL;
- return 0;
- }
- /* Make a copy of the iov array because we may modify it below */
- memcpy(iov, _iov, iovcnt * sizeof(*_iov));
-
- for (; iovcnt > 0 && iov[0].iov_len > 0;) {
- res = (f) (fd, iov, iovcnt);
- switch (res) {
- case -1:
- if (errno == EINTR || errno == EAGAIN)
- continue;
- return 0;
- case 0:
- errno = EPIPE;
- return pos;
- default:
- rem = (size_t)res;
- pos += rem;
- /* skip completed iov entries */
- while (iovcnt > 0 && rem >= iov[0].iov_len) {
- rem -= iov[0].iov_len;
- iov++;
- iovcnt--;
- }
- /* This shouldn't happen... */
- if (rem > 0 && (iovcnt <= 0 || rem > iov[0].iov_len)) {
- errno = EFAULT;
- return 0;
- }
- if (iovcnt == 0)
- break;
- /* update pointer in partially complete iov */
- iov[0].iov_base = ((char *)iov[0].iov_base) + rem;
- iov[0].iov_len -= rem;
- }
- }
- return pos;
-}
diff --git a/usr/src/cmd/ssh/libssh/common/authfd.c b/usr/src/cmd/ssh/libssh/common/authfd.c
deleted file mode 100644
index 43fbfbeb3c..0000000000
--- a/usr/src/cmd/ssh/libssh/common/authfd.c
+++ /dev/null
@@ -1,631 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Functions for connecting the local authentication agent.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- *
- * SSH2 implementation,
- * Copyright (c) 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: authfd.c,v 1.57 2002/09/11 18:27:26 stevesk Exp $");
-
-#include <openssl/evp.h>
-
-#include "ssh.h"
-#include "rsa.h"
-#include "buffer.h"
-#include "bufaux.h"
-#include "xmalloc.h"
-#include "getput.h"
-#include "key.h"
-#include "authfd.h"
-#include "cipher.h"
-#include "kex.h"
-#include "compat.h"
-#include "log.h"
-#include "atomicio.h"
-
-static int agent_present = 0;
-
-/* helper */
-int decode_reply(int type);
-
-/* macro to check for "agent failure" message */
-#define agent_failed(x) \
- ((x == SSH_AGENT_FAILURE) || (x == SSH_COM_AGENT2_FAILURE) || \
- (x == SSH2_AGENT_FAILURE))
-
-int
-ssh_agent_present(void)
-{
- int authfd;
-
- if (agent_present)
- return 1;
- if ((authfd = ssh_get_authentication_socket()) == -1)
- return 0;
- else {
- ssh_close_authentication_socket(authfd);
- return 1;
- }
-}
-
-/* Returns the number of the authentication fd, or -1 if there is none. */
-
-int
-ssh_get_authentication_socket(void)
-{
- const char *authsocket;
- int sock;
- struct sockaddr_un sunaddr;
-
- authsocket = getenv(SSH_AUTHSOCKET_ENV_NAME);
- if (!authsocket)
- return -1;
-
- sunaddr.sun_family = AF_UNIX;
- strlcpy(sunaddr.sun_path, authsocket, sizeof(sunaddr.sun_path));
-
- sock = socket(AF_UNIX, SOCK_STREAM, 0);
- if (sock < 0)
- return -1;
-
- /* close on exec */
- if (fcntl(sock, F_SETFD, FD_CLOEXEC) == -1) {
- close(sock);
- return -1;
- }
- if (connect(sock, (struct sockaddr *) &sunaddr, sizeof sunaddr) < 0) {
- close(sock);
- return -1;
- }
- agent_present = 1;
- return sock;
-}
-
-static int
-ssh_request_reply(AuthenticationConnection *auth, Buffer *request, Buffer *reply)
-{
- int l, len;
- char buf[1024];
-
- /* Get the length of the message, and format it in the buffer. */
- len = buffer_len(request);
- PUT_32BIT(buf, len);
-
- /* Send the length and then the packet to the agent. */
- if (atomicio(write, auth->fd, buf, 4) != 4 ||
- atomicio(write, auth->fd, buffer_ptr(request),
- buffer_len(request)) != buffer_len(request)) {
- error("Error writing to authentication socket.");
- return 0;
- }
- /*
- * Wait for response from the agent. First read the length of the
- * response packet.
- */
- len = 4;
- while (len > 0) {
- l = read(auth->fd, buf + 4 - len, len);
- if (l == -1 && (errno == EAGAIN || errno == EINTR))
- continue;
- if (l <= 0) {
- error("Error reading response length from authentication socket.");
- return 0;
- }
- len -= l;
- }
-
- /* Extract the length, and check it for sanity. */
- len = GET_32BIT(buf);
- if (len > 256 * 1024)
- fatal("Authentication response too long: %d", len);
-
- /* Read the rest of the response in to the buffer. */
- buffer_clear(reply);
- while (len > 0) {
- l = len;
- if (l > sizeof(buf))
- l = sizeof(buf);
- l = read(auth->fd, buf, l);
- if (l == -1 && (errno == EAGAIN || errno == EINTR))
- continue;
- if (l <= 0) {
- error("Error reading response from authentication socket.");
- return 0;
- }
- buffer_append(reply, buf, l);
- len -= l;
- }
- return 1;
-}
-
-/*
- * Closes the agent socket if it should be closed (depends on how it was
- * obtained). The argument must have been returned by
- * ssh_get_authentication_socket().
- */
-
-void
-ssh_close_authentication_socket(int sock)
-{
- if (getenv(SSH_AUTHSOCKET_ENV_NAME))
- close(sock);
-}
-
-/*
- * Opens and connects a private socket for communication with the
- * authentication agent. Returns the file descriptor (which must be
- * shut down and closed by the caller when no longer needed).
- * Returns NULL if an error occurred and the connection could not be
- * opened.
- */
-
-AuthenticationConnection *
-ssh_get_authentication_connection(void)
-{
- AuthenticationConnection *auth;
- int sock;
-
- sock = ssh_get_authentication_socket();
-
- /*
- * Fail if we couldn't obtain a connection. This happens if we
- * exited due to a timeout.
- */
- if (sock < 0)
- return NULL;
-
- auth = xmalloc(sizeof(*auth));
- auth->fd = sock;
- buffer_init(&auth->identities);
- auth->howmany = 0;
-
- return auth;
-}
-
-/*
- * Closes the connection to the authentication agent and frees any associated
- * memory.
- */
-
-void
-ssh_close_authentication_connection(AuthenticationConnection *auth)
-{
- buffer_free(&auth->identities);
- close(auth->fd);
- xfree(auth);
-}
-
-/* Lock/unlock agent */
-int
-ssh_lock_agent(AuthenticationConnection *auth, int lock, const char *password)
-{
- int type;
- Buffer msg;
-
- buffer_init(&msg);
- buffer_put_char(&msg, lock ? SSH_AGENTC_LOCK : SSH_AGENTC_UNLOCK);
- buffer_put_cstring(&msg, password);
-
- if (ssh_request_reply(auth, &msg, &msg) == 0) {
- buffer_free(&msg);
- return 0;
- }
- type = buffer_get_char(&msg);
- buffer_free(&msg);
- return decode_reply(type);
-}
-
-/*
- * Returns the first authentication identity held by the agent.
- */
-
-int
-ssh_get_num_identities(AuthenticationConnection *auth, int version)
-{
- int type, code1 = 0, code2 = 0;
- Buffer request;
-
- switch (version) {
- case 1:
- code1 = SSH_AGENTC_REQUEST_RSA_IDENTITIES;
- code2 = SSH_AGENT_RSA_IDENTITIES_ANSWER;
- break;
- case 2:
- code1 = SSH2_AGENTC_REQUEST_IDENTITIES;
- code2 = SSH2_AGENT_IDENTITIES_ANSWER;
- break;
- default:
- return 0;
- }
-
- /*
- * Send a message to the agent requesting for a list of the
- * identities it can represent.
- */
- buffer_init(&request);
- buffer_put_char(&request, code1);
-
- buffer_clear(&auth->identities);
- if (ssh_request_reply(auth, &request, &auth->identities) == 0) {
- buffer_free(&request);
- return 0;
- }
- buffer_free(&request);
-
- /* Get message type, and verify that we got a proper answer. */
- type = buffer_get_char(&auth->identities);
- if (agent_failed(type)) {
- return 0;
- } else if (type != code2) {
- fatal("Bad authentication reply message type: %d", type);
- }
-
- /* Get the number of entries in the response and check it for sanity. */
- auth->howmany = buffer_get_int(&auth->identities);
- if (auth->howmany > 1024)
- fatal("Too many identities in authentication reply: %d",
- auth->howmany);
-
- return auth->howmany;
-}
-
-Key *
-ssh_get_first_identity(AuthenticationConnection *auth, char **comment, int version)
-{
- /* get number of identities and return the first entry (if any). */
- if (ssh_get_num_identities(auth, version) > 0)
- return ssh_get_next_identity(auth, comment, version);
- return NULL;
-}
-
-Key *
-ssh_get_next_identity(AuthenticationConnection *auth, char **comment, int version)
-{
- u_int bits;
- u_char *blob;
- u_int blen;
- Key *key = NULL;
-
- /* Return failure if no more entries. */
- if (auth->howmany <= 0)
- return NULL;
-
- /*
- * Get the next entry from the packet. These will abort with a fatal
- * error if the packet is too short or contains corrupt data.
- */
- switch (version) {
- case 1:
- key = key_new(KEY_RSA1);
- bits = buffer_get_int(&auth->identities);
- buffer_get_bignum(&auth->identities, key->rsa->e);
- buffer_get_bignum(&auth->identities, key->rsa->n);
- *comment = buffer_get_string(&auth->identities, NULL);
- if (bits != BN_num_bits(key->rsa->n))
- log("Warning: identity keysize mismatch: actual %d, announced %u",
- BN_num_bits(key->rsa->n), bits);
- break;
- case 2:
- blob = buffer_get_string(&auth->identities, &blen);
- *comment = buffer_get_string(&auth->identities, NULL);
- key = key_from_blob(blob, blen);
- xfree(blob);
- break;
- default:
- return NULL;
- break;
- }
- /* Decrement the number of remaining entries. */
- auth->howmany--;
- return key;
-}
-
-/*
- * Generates a random challenge, sends it to the agent, and waits for
- * response from the agent. Returns true (non-zero) if the agent gave the
- * correct answer, zero otherwise. Response type selects the style of
- * response desired, with 0 corresponding to protocol version 1.0 (no longer
- * supported) and 1 corresponding to protocol version 1.1.
- */
-
-int
-ssh_decrypt_challenge(AuthenticationConnection *auth,
- Key* key, BIGNUM *challenge,
- u_char session_id[16],
- u_int response_type,
- u_char response[16])
-{
- Buffer buffer;
- int success = 0;
- int i;
- int type;
-
- if (key->type != KEY_RSA1)
- return 0;
- if (response_type == 0) {
- log("Compatibility with ssh protocol version 1.0 no longer supported.");
- return 0;
- }
- buffer_init(&buffer);
- buffer_put_char(&buffer, SSH_AGENTC_RSA_CHALLENGE);
- buffer_put_int(&buffer, BN_num_bits(key->rsa->n));
- buffer_put_bignum(&buffer, key->rsa->e);
- buffer_put_bignum(&buffer, key->rsa->n);
- buffer_put_bignum(&buffer, challenge);
- buffer_append(&buffer, session_id, 16);
- buffer_put_int(&buffer, response_type);
-
- if (ssh_request_reply(auth, &buffer, &buffer) == 0) {
- buffer_free(&buffer);
- return 0;
- }
- type = buffer_get_char(&buffer);
-
- if (agent_failed(type)) {
- log("Agent admitted failure to authenticate using the key.");
- } else if (type != SSH_AGENT_RSA_RESPONSE) {
- fatal("Bad authentication response: %d", type);
- } else {
- success = 1;
- /*
- * Get the response from the packet. This will abort with a
- * fatal error if the packet is corrupt.
- */
- for (i = 0; i < 16; i++)
- response[i] = buffer_get_char(&buffer);
- }
- buffer_free(&buffer);
- return success;
-}
-
-/* ask agent to sign data, returns -1 on error, 0 on success */
-int
-ssh_agent_sign(AuthenticationConnection *auth,
- Key *key,
- u_char **sigp, u_int *lenp,
- u_char *data, u_int datalen)
-{
- Buffer msg;
- u_char *blob;
- u_int blen;
- int type, flags = 0;
- int ret = -1;
-
- if (key_to_blob(key, &blob, &blen) == 0)
- return -1;
-
- if (datafellows & SSH_BUG_SIGBLOB)
- flags = SSH_AGENT_OLD_SIGNATURE;
-
- buffer_init(&msg);
- buffer_put_char(&msg, SSH2_AGENTC_SIGN_REQUEST);
- buffer_put_string(&msg, blob, blen);
- buffer_put_string(&msg, data, datalen);
- buffer_put_int(&msg, flags);
- xfree(blob);
-
- if (ssh_request_reply(auth, &msg, &msg) == 0) {
- buffer_free(&msg);
- return -1;
- }
- type = buffer_get_char(&msg);
- if (agent_failed(type)) {
- log("Agent admitted failure to sign using the key.");
- } else if (type != SSH2_AGENT_SIGN_RESPONSE) {
- fatal("Bad authentication response: %d", type);
- } else {
- ret = 0;
- *sigp = buffer_get_string(&msg, lenp);
- }
- buffer_free(&msg);
- return ret;
-}
-
-/* Encode key for a message to the agent. */
-
-static void
-ssh_encode_identity_rsa1(Buffer *b, RSA *key, const char *comment)
-{
- buffer_put_int(b, BN_num_bits(key->n));
- buffer_put_bignum(b, key->n);
- buffer_put_bignum(b, key->e);
- buffer_put_bignum(b, key->d);
- /* To keep within the protocol: p < q for ssh. in SSL p > q */
- buffer_put_bignum(b, key->iqmp); /* ssh key->u */
- buffer_put_bignum(b, key->q); /* ssh key->p, SSL key->q */
- buffer_put_bignum(b, key->p); /* ssh key->q, SSL key->p */
- buffer_put_cstring(b, comment);
-}
-
-static void
-ssh_encode_identity_ssh2(Buffer *b, Key *key, const char *comment)
-{
- buffer_put_cstring(b, key_ssh_name(key));
- switch (key->type) {
- case KEY_RSA:
- buffer_put_bignum2(b, key->rsa->n);
- buffer_put_bignum2(b, key->rsa->e);
- buffer_put_bignum2(b, key->rsa->d);
- buffer_put_bignum2(b, key->rsa->iqmp);
- buffer_put_bignum2(b, key->rsa->p);
- buffer_put_bignum2(b, key->rsa->q);
- break;
- case KEY_DSA:
- buffer_put_bignum2(b, key->dsa->p);
- buffer_put_bignum2(b, key->dsa->q);
- buffer_put_bignum2(b, key->dsa->g);
- buffer_put_bignum2(b, key->dsa->pub_key);
- buffer_put_bignum2(b, key->dsa->priv_key);
- break;
- }
- buffer_put_cstring(b, comment);
-}
-
-/*
- * Adds an identity to the authentication server. This call is not meant to
- * be used by normal applications.
- */
-
-int
-ssh_add_identity_constrained(AuthenticationConnection *auth, Key *key,
- const char *comment, u_int life)
-{
- Buffer msg;
- int type, constrained = (life != 0);
-
- buffer_init(&msg);
-
- switch (key->type) {
- case KEY_RSA1:
- type = constrained ?
- SSH_AGENTC_ADD_RSA_ID_CONSTRAINED :
- SSH_AGENTC_ADD_RSA_IDENTITY;
- buffer_put_char(&msg, type);
- ssh_encode_identity_rsa1(&msg, key->rsa, comment);
- break;
- case KEY_RSA:
- case KEY_DSA:
- type = constrained ?
- SSH2_AGENTC_ADD_ID_CONSTRAINED :
- SSH2_AGENTC_ADD_IDENTITY;
- buffer_put_char(&msg, type);
- ssh_encode_identity_ssh2(&msg, key, comment);
- break;
- default:
- buffer_free(&msg);
- return 0;
- break;
- }
- if (constrained) {
- if (life != 0) {
- buffer_put_char(&msg, SSH_AGENT_CONSTRAIN_LIFETIME);
- buffer_put_int(&msg, life);
- }
- }
- if (ssh_request_reply(auth, &msg, &msg) == 0) {
- buffer_free(&msg);
- return 0;
- }
- type = buffer_get_char(&msg);
- buffer_free(&msg);
- return decode_reply(type);
-}
-
-int
-ssh_add_identity(AuthenticationConnection *auth, Key *key, const char *comment)
-{
- return ssh_add_identity_constrained(auth, key, comment, 0);
-}
-
-/*
- * Removes an identity from the authentication server. This call is not
- * meant to be used by normal applications.
- */
-
-int
-ssh_remove_identity(AuthenticationConnection *auth, Key *key)
-{
- Buffer msg;
- int type;
- u_char *blob;
- u_int blen;
-
- buffer_init(&msg);
-
- if (key->type == KEY_RSA1) {
- buffer_put_char(&msg, SSH_AGENTC_REMOVE_RSA_IDENTITY);
- buffer_put_int(&msg, BN_num_bits(key->rsa->n));
- buffer_put_bignum(&msg, key->rsa->e);
- buffer_put_bignum(&msg, key->rsa->n);
- } else if (key->type == KEY_DSA || key->type == KEY_RSA) {
- key_to_blob(key, &blob, &blen);
- buffer_put_char(&msg, SSH2_AGENTC_REMOVE_IDENTITY);
- buffer_put_string(&msg, blob, blen);
- xfree(blob);
- } else {
- buffer_free(&msg);
- return 0;
- }
- if (ssh_request_reply(auth, &msg, &msg) == 0) {
- buffer_free(&msg);
- return 0;
- }
- type = buffer_get_char(&msg);
- buffer_free(&msg);
- return decode_reply(type);
-}
-
-
-/*
- * Removes all identities from the agent. This call is not meant to be used
- * by normal applications.
- */
-
-int
-ssh_remove_all_identities(AuthenticationConnection *auth, int version)
-{
- Buffer msg;
- int type;
- int code = (version==1) ?
- SSH_AGENTC_REMOVE_ALL_RSA_IDENTITIES :
- SSH2_AGENTC_REMOVE_ALL_IDENTITIES;
-
- buffer_init(&msg);
- buffer_put_char(&msg, code);
-
- if (ssh_request_reply(auth, &msg, &msg) == 0) {
- buffer_free(&msg);
- return 0;
- }
- type = buffer_get_char(&msg);
- buffer_free(&msg);
- return decode_reply(type);
-}
-
-int
-decode_reply(int type)
-{
- switch (type) {
- case SSH_AGENT_FAILURE:
- case SSH_COM_AGENT2_FAILURE:
- case SSH2_AGENT_FAILURE:
- log("SSH_AGENT_FAILURE");
- return 0;
- case SSH_AGENT_SUCCESS:
- return 1;
- default:
- fatal("Bad response from authentication agent: %d", type);
- }
- /* NOTREACHED */
- return 0;
-}
diff --git a/usr/src/cmd/ssh/libssh/common/authfile.c b/usr/src/cmd/ssh/libssh/common/authfile.c
deleted file mode 100644
index 29076f390f..0000000000
--- a/usr/src/cmd/ssh/libssh/common/authfile.c
+++ /dev/null
@@ -1,624 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * This file contains functions for reading and writing identity files, and
- * for reading the passphrase from the user.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- *
- *
- * Copyright (c) 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: authfile.c,v 1.50 2002/06/24 14:55:38 markus Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <openssl/err.h>
-#include <openssl/evp.h>
-#include <openssl/pem.h>
-
-#include "cipher.h"
-#include "xmalloc.h"
-#include "buffer.h"
-#include "bufaux.h"
-#include "key.h"
-#include "ssh.h"
-#include "log.h"
-#include "authfile.h"
-#include "rsa.h"
-
-/* Version identification string for SSH v1 identity files. */
-static const char authfile_id_string[] =
- "SSH PRIVATE KEY FILE FORMAT 1.1\n";
-
-/*
- * Saves the authentication (private) key in a file, encrypting it with
- * passphrase. The identification of the file (lowest 64 bits of n) will
- * precede the key to provide identification of the key without needing a
- * passphrase.
- */
-
-static int
-key_save_private_rsa1(Key *key, const char *filename, const char *passphrase,
- const char *comment)
-{
- Buffer buffer, encrypted;
- u_char buf[100], *cp;
- int fd, i, cipher_num;
- CipherContext ciphercontext;
- Cipher *cipher;
- u_int32_t rand;
-
- /*
- * If the passphrase is empty, use SSH_CIPHER_NONE to ease converting
- * to another cipher; otherwise use SSH_AUTHFILE_CIPHER.
- */
- cipher_num = (strcmp(passphrase, "") == 0) ?
- SSH_CIPHER_NONE : SSH_AUTHFILE_CIPHER;
- if ((cipher = cipher_by_number(cipher_num)) == NULL)
- fatal("save_private_key_rsa: bad cipher");
-
- /* This buffer is used to built the secret part of the private key. */
- buffer_init(&buffer);
-
- /* Put checkbytes for checking passphrase validity. */
- rand = arc4random();
- buf[0] = rand & 0xff;
- buf[1] = (rand >> 8) & 0xff;
- buf[2] = buf[0];
- buf[3] = buf[1];
- buffer_append(&buffer, buf, 4);
-
- /*
- * Store the private key (n and e will not be stored because they
- * will be stored in plain text, and storing them also in encrypted
- * format would just give known plaintext).
- */
- buffer_put_bignum(&buffer, key->rsa->d);
- buffer_put_bignum(&buffer, key->rsa->iqmp);
- buffer_put_bignum(&buffer, key->rsa->q); /* reverse from SSL p */
- buffer_put_bignum(&buffer, key->rsa->p); /* reverse from SSL q */
-
- /* Pad the part to be encrypted until its size is a multiple of 8. */
- while (buffer_len(&buffer) % 8 != 0)
- buffer_put_char(&buffer, 0);
-
- /* This buffer will be used to contain the data in the file. */
- buffer_init(&encrypted);
-
- /* First store keyfile id string. */
- for (i = 0; authfile_id_string[i]; i++)
- buffer_put_char(&encrypted, authfile_id_string[i]);
- buffer_put_char(&encrypted, 0);
-
- /* Store cipher type. */
- buffer_put_char(&encrypted, cipher_num);
- buffer_put_int(&encrypted, 0); /* For future extension */
-
- /* Store public key. This will be in plain text. */
- buffer_put_int(&encrypted, BN_num_bits(key->rsa->n));
- buffer_put_bignum(&encrypted, key->rsa->n);
- buffer_put_bignum(&encrypted, key->rsa->e);
- buffer_put_cstring(&encrypted, comment);
-
- /* Allocate space for the private part of the key in the buffer. */
- cp = buffer_append_space(&encrypted, buffer_len(&buffer));
-
- cipher_set_key_string(&ciphercontext, cipher, passphrase,
- CIPHER_ENCRYPT);
- cipher_crypt(&ciphercontext, cp,
- buffer_ptr(&buffer), buffer_len(&buffer));
- cipher_cleanup(&ciphercontext);
- memset(&ciphercontext, 0, sizeof(ciphercontext));
-
- /* Destroy temporary data. */
- memset(buf, 0, sizeof(buf));
- buffer_free(&buffer);
-
- fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0600);
- if (fd < 0) {
- error("open %s failed: %s.", filename, strerror(errno));
- return 0;
- }
- if (write(fd, buffer_ptr(&encrypted), buffer_len(&encrypted)) !=
- buffer_len(&encrypted)) {
- error("write to key file %s failed: %s", filename,
- strerror(errno));
- buffer_free(&encrypted);
- close(fd);
- unlink(filename);
- return 0;
- }
- close(fd);
- buffer_free(&encrypted);
- return 1;
-}
-
-/* save SSH v2 key in OpenSSL PEM format */
-static int
-key_save_private_pem(Key *key, const char *filename, const char *_passphrase,
- const char *comment)
-{
- FILE *fp;
- int fd;
- int success = 0;
- int len = strlen(_passphrase);
- u_char *passphrase = (len > 0) ? (u_char *)_passphrase : NULL;
- const EVP_CIPHER *cipher = (len > 0) ? EVP_des_ede3_cbc() : NULL;
-
- if (len > 0 && len <= 4) {
- error("passphrase too short: have %d bytes, need > 4", len);
- return 0;
- }
- fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0600);
- if (fd < 0) {
- error("open %s failed: %s.", filename, strerror(errno));
- return 0;
- }
- fp = fdopen(fd, "w");
- if (fp == NULL ) {
- error("fdopen %s failed: %s.", filename, strerror(errno));
- close(fd);
- return 0;
- }
- switch (key->type) {
- case KEY_DSA:
- success = PEM_write_DSAPrivateKey(fp, key->dsa,
- cipher, passphrase, len, NULL, NULL);
- break;
- case KEY_RSA:
- success = PEM_write_RSAPrivateKey(fp, key->rsa,
- cipher, passphrase, len, NULL, NULL);
- break;
- }
- fclose(fp);
- return success;
-}
-
-int
-key_save_private(Key *key, const char *filename, const char *passphrase,
- const char *comment)
-{
- switch (key->type) {
- case KEY_RSA1:
- return key_save_private_rsa1(key, filename, passphrase,
- comment);
- break;
- case KEY_DSA:
- case KEY_RSA:
- return key_save_private_pem(key, filename, passphrase,
- comment);
- break;
- default:
- break;
- }
- error("key_save_private: cannot save key type %d", key->type);
- return 0;
-}
-
-/*
- * Loads the public part of the ssh v1 key file. Returns NULL if an error was
- * encountered (the file does not exist or is not readable), and the key
- * otherwise.
- */
-
-static Key *
-key_load_public_rsa1(int fd, const char *filename, char **commentp)
-{
- Buffer buffer;
- Key *pub;
- char *cp;
- int i;
- off_t len;
-
- len = lseek(fd, (off_t) 0, SEEK_END);
- lseek(fd, (off_t) 0, SEEK_SET);
-
- buffer_init(&buffer);
- cp = buffer_append_space(&buffer, len);
-
- if (read(fd, cp, (size_t) len) != (size_t) len) {
- debug("Read from key file %.200s failed: %.100s", filename,
- strerror(errno));
- buffer_free(&buffer);
- return NULL;
- }
-
- /* Check that it is at least big enough to contain the ID string. */
- if (len < sizeof(authfile_id_string)) {
- debug3("Not a RSA1 key file %.200s.", filename);
- buffer_free(&buffer);
- return NULL;
- }
- /*
- * Make sure it begins with the id string. Consume the id string
- * from the buffer.
- */
- for (i = 0; i < sizeof(authfile_id_string); i++)
- if (buffer_get_char(&buffer) != authfile_id_string[i]) {
- debug3("Not a RSA1 key file %.200s.", filename);
- buffer_free(&buffer);
- return NULL;
- }
- /* Skip cipher type and reserved data. */
- (void) buffer_get_char(&buffer); /* cipher type */
- (void) buffer_get_int(&buffer); /* reserved */
-
- /* Read the public key from the buffer. */
- (void) buffer_get_int(&buffer);
- pub = key_new(KEY_RSA1);
- buffer_get_bignum(&buffer, pub->rsa->n);
- buffer_get_bignum(&buffer, pub->rsa->e);
- if (commentp)
- *commentp = buffer_get_string(&buffer, NULL);
- /* The encrypted private part is not parsed by this function. */
-
- buffer_free(&buffer);
- return pub;
-}
-
-/* load public key from private-key file, works only for SSH v1 */
-Key *
-key_load_public_type(int type, const char *filename, char **commentp)
-{
- Key *pub;
- int fd;
-
- if (type == KEY_RSA1) {
- fd = open(filename, O_RDONLY);
- if (fd < 0)
- return NULL;
- pub = key_load_public_rsa1(fd, filename, commentp);
- close(fd);
- return pub;
- }
- return NULL;
-}
-
-/*
- * Loads the private key from the file. Returns 0 if an error is encountered
- * (file does not exist or is not readable, or passphrase is bad). This
- * initializes the private key.
- * Assumes we are called under uid of the owner of the file.
- */
-
-static Key *
-key_load_private_rsa1(int fd, const char *filename, const char *passphrase,
- char **commentp)
-{
- int i, check1, check2, cipher_type;
- off_t len;
- Buffer buffer, decrypted;
- u_char *cp;
- CipherContext ciphercontext;
- Cipher *cipher;
- Key *prv = NULL;
-
- len = lseek(fd, (off_t) 0, SEEK_END);
- lseek(fd, (off_t) 0, SEEK_SET);
-
- buffer_init(&buffer);
- cp = buffer_append_space(&buffer, len);
-
- if (read(fd, cp, (size_t) len) != (size_t) len) {
- debug("Read from key file %.200s failed: %.100s", filename,
- strerror(errno));
- buffer_free(&buffer);
- close(fd);
- return NULL;
- }
-
- /* Check that it is at least big enough to contain the ID string. */
- if (len < sizeof(authfile_id_string)) {
- debug3("Not a RSA1 key file %.200s.", filename);
- buffer_free(&buffer);
- close(fd);
- return NULL;
- }
- /*
- * Make sure it begins with the id string. Consume the id string
- * from the buffer.
- */
- for (i = 0; i < sizeof(authfile_id_string); i++)
- if (buffer_get_char(&buffer) != authfile_id_string[i]) {
- debug3("Not a RSA1 key file %.200s.", filename);
- buffer_free(&buffer);
- close(fd);
- return NULL;
- }
-
- /* Read cipher type. */
- cipher_type = buffer_get_char(&buffer);
- (void) buffer_get_int(&buffer); /* Reserved data. */
-
- /* Read the public key from the buffer. */
- (void) buffer_get_int(&buffer);
- prv = key_new_private(KEY_RSA1);
-
- buffer_get_bignum(&buffer, prv->rsa->n);
- buffer_get_bignum(&buffer, prv->rsa->e);
- if (commentp)
- *commentp = buffer_get_string(&buffer, NULL);
- else
- xfree(buffer_get_string(&buffer, NULL));
-
- /* Check that it is a supported cipher. */
- cipher = cipher_by_number(cipher_type);
- if (cipher == NULL) {
- debug("Unsupported cipher %d used in key file %.200s.",
- cipher_type, filename);
- buffer_free(&buffer);
- goto fail;
- }
- /* Initialize space for decrypted data. */
- buffer_init(&decrypted);
- cp = buffer_append_space(&decrypted, buffer_len(&buffer));
-
- /* Rest of the buffer is encrypted. Decrypt it using the passphrase. */
- cipher_set_key_string(&ciphercontext, cipher, passphrase,
- CIPHER_DECRYPT);
- cipher_crypt(&ciphercontext, cp,
- buffer_ptr(&buffer), buffer_len(&buffer));
- cipher_cleanup(&ciphercontext);
- memset(&ciphercontext, 0, sizeof(ciphercontext));
- buffer_free(&buffer);
-
- check1 = buffer_get_char(&decrypted);
- check2 = buffer_get_char(&decrypted);
- if (check1 != buffer_get_char(&decrypted) ||
- check2 != buffer_get_char(&decrypted)) {
- if (strcmp(passphrase, "") != 0)
- debug("Bad passphrase supplied for key file %.200s.",
- filename);
- /* Bad passphrase. */
- buffer_free(&decrypted);
- goto fail;
- }
- /* Read the rest of the private key. */
- buffer_get_bignum(&decrypted, prv->rsa->d);
- buffer_get_bignum(&decrypted, prv->rsa->iqmp); /* u */
- /* in SSL and SSH v1 p and q are exchanged */
- buffer_get_bignum(&decrypted, prv->rsa->q); /* p */
- buffer_get_bignum(&decrypted, prv->rsa->p); /* q */
-
- /* calculate p-1 and q-1 */
- rsa_generate_additional_parameters(prv->rsa);
-
- buffer_free(&decrypted);
- close(fd);
- return prv;
-
-fail:
- if (commentp)
- xfree(*commentp);
- close(fd);
- key_free(prv);
- return NULL;
-}
-
-Key *
-key_load_private_pem(int fd, int type, const char *passphrase,
- char **commentp)
-{
- FILE *fp;
- EVP_PKEY *pk = NULL;
- Key *prv = NULL;
- char *name = "<no key>";
-
- fp = fdopen(fd, "r");
- if (fp == NULL) {
- error("fdopen failed: %s", strerror(errno));
- close(fd);
- return NULL;
- }
- pk = PEM_read_PrivateKey(fp, NULL, NULL, (char *)passphrase);
- if (pk == NULL) {
- debug("PEM_read_PrivateKey failed");
- (void)ERR_get_error();
- } else if (pk->type == EVP_PKEY_RSA &&
- (type == KEY_UNSPEC||type==KEY_RSA)) {
- prv = key_new(KEY_UNSPEC);
- prv->rsa = EVP_PKEY_get1_RSA(pk);
- prv->type = KEY_RSA;
- name = "rsa w/o comment";
-#ifdef DEBUG_PK
- RSA_print_fp(stderr, prv->rsa, 8);
-#endif
- } else if (pk->type == EVP_PKEY_DSA &&
- (type == KEY_UNSPEC||type==KEY_DSA)) {
- prv = key_new(KEY_UNSPEC);
- prv->dsa = EVP_PKEY_get1_DSA(pk);
- prv->type = KEY_DSA;
- name = "dsa w/o comment";
-#ifdef DEBUG_PK
- DSA_print_fp(stderr, prv->dsa, 8);
-#endif
- } else {
- error("PEM_read_PrivateKey: mismatch or "
- "unknown EVP_PKEY save_type %d", pk->save_type);
- }
- fclose(fp);
- if (pk != NULL)
- EVP_PKEY_free(pk);
- if (prv != NULL && commentp)
- *commentp = xstrdup(name);
- debug("read PEM private key done: type %s",
- prv ? key_type(prv) : "<unknown>");
- return prv;
-}
-
-static int
-key_perm_ok(int fd, const char *filename)
-{
- struct stat st;
-
- if (fstat(fd, &st) < 0)
- return 0;
- /*
- * if a key owned by the user is accessed, then we check the
- * permissions of the file. if the key owned by a different user,
- * then we don't care.
- */
-#ifdef HAVE_CYGWIN
- if (check_ntsec(filename))
-#endif
- if ((st.st_uid == getuid()) && (st.st_mode & 077) != 0) {
- error("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@");
- error("@ WARNING: UNPROTECTED PRIVATE KEY FILE! @");
- error("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@");
- error("Permissions 0%3.3o for '%s' are too open.",
- (int)(st.st_mode & 0777), filename);
- error("It is recommended that your private key files are NOT accessible by others.");
- error("This private key will be ignored.");
- return 0;
- }
- return 1;
-}
-
-Key *
-key_load_private_type(int type, const char *filename, const char *passphrase,
- char **commentp)
-{
- int fd;
-
- fd = open(filename, O_RDONLY);
- if (fd < 0)
- return NULL;
- if (!key_perm_ok(fd, filename)) {
- error("bad permissions: ignore key: %s", filename);
- close(fd);
- return NULL;
- }
- switch (type) {
- case KEY_RSA1:
- return key_load_private_rsa1(fd, filename, passphrase,
- commentp);
- /* closes fd */
- break;
- case KEY_DSA:
- case KEY_RSA:
- case KEY_UNSPEC:
- return key_load_private_pem(fd, type, passphrase, commentp);
- /* closes fd */
- break;
- default:
- close(fd);
- break;
- }
- return NULL;
-}
-
-Key *
-key_load_private(const char *filename, const char *passphrase,
- char **commentp)
-{
- Key *pub, *prv;
- int fd;
-
- fd = open(filename, O_RDONLY);
- if (fd < 0)
- return NULL;
- if (!key_perm_ok(fd, filename)) {
- error("bad permissions: ignore key: %s", filename);
- close(fd);
- return NULL;
- }
- pub = key_load_public_rsa1(fd, filename, commentp);
- lseek(fd, (off_t) 0, SEEK_SET); /* rewind */
- if (pub == NULL) {
- /* closes fd */
- prv = key_load_private_pem(fd, KEY_UNSPEC, passphrase, NULL);
- /* use the filename as a comment for PEM */
- if (commentp && prv)
- *commentp = xstrdup(filename);
- } else {
- /* it's a SSH v1 key if the public key part is readable */
- key_free(pub);
- /* closes fd */
- prv = key_load_private_rsa1(fd, filename, passphrase, NULL);
- }
- return prv;
-}
-
-static int
-key_try_load_public(Key *k, const char *filename, char **commentp)
-{
- FILE *f;
- char line[4096];
- char *cp;
-
- f = fopen(filename, "r");
- if (f != NULL) {
- while (fgets(line, sizeof(line), f)) {
- line[sizeof(line)-1] = '\0';
- cp = line;
- switch (*cp) {
- case '#':
- case '\n':
- case '\0':
- continue;
- }
- /* Skip leading whitespace. */
- for (; *cp && (*cp == ' ' || *cp == '\t'); cp++)
- ;
- if (*cp) {
- if (key_read(k, &cp) == 1) {
- if (commentp)
- *commentp=xstrdup(filename);
- fclose(f);
- return 1;
- }
- }
- }
- fclose(f);
- }
- return 0;
-}
-
-/* load public key from ssh v1 private or any pubkey file */
-Key *
-key_load_public(const char *filename, char **commentp)
-{
- Key *pub;
- char file[MAXPATHLEN];
-
- pub = key_load_public_type(KEY_RSA1, filename, commentp);
- if (pub != NULL)
- return pub;
- pub = key_new(KEY_UNSPEC);
- if (key_try_load_public(pub, filename, commentp) == 1)
- return pub;
- if ((strlcpy(file, filename, sizeof file) < sizeof(file)) &&
- (strlcat(file, ".pub", sizeof file) < sizeof(file)) &&
- (key_try_load_public(pub, file, commentp) == 1))
- return pub;
- key_free(pub);
- return NULL;
-}
diff --git a/usr/src/cmd/ssh/libssh/common/bufaux.c b/usr/src/cmd/ssh/libssh/common/bufaux.c
deleted file mode 100644
index 0a94ba9054..0000000000
--- a/usr/src/cmd/ssh/libssh/common/bufaux.c
+++ /dev/null
@@ -1,469 +0,0 @@
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Auxiliary functions for storing and retrieving various data types to/from
- * Buffers.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- *
- *
- * SSH2 packet format added by Markus Friedl
- * Copyright (c) 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: bufaux.c,v 1.27 2002/06/26 08:53:12 markus Exp $");
-
-#include <langinfo.h>
-#include <openssl/bn.h>
-#include "bufaux.h"
-#include "xmalloc.h"
-#include "getput.h"
-#include "log.h"
-#include "g11n.h"
-
-/*
- * Stores an BIGNUM in the buffer with a 2-byte msb first bit count, followed
- * by (bits+7)/8 bytes of binary data, msb first.
- */
-int
-buffer_put_bignum_ret(Buffer *buffer, const BIGNUM *value)
-{
- int bits = BN_num_bits(value);
- int bin_size = (bits + 7) / 8;
- u_char *buf = xmalloc(bin_size);
- int oi;
- char msg[2];
-
- /* Get the value of in binary */
- oi = BN_bn2bin(value, buf);
- if (oi != bin_size) {
- error("buffer_put_bignum_ret: BN_bn2bin() failed: oi %d != bin_size %d",
- oi, bin_size);
- xfree(buf);
- return (-1);
- }
-
- /* Store the number of bits in the buffer in two bytes, msb first. */
- PUT_16BIT(msg, bits);
- buffer_append(buffer, msg, 2);
- /* Store the binary data. */
- buffer_append(buffer, (char *)buf, oi);
-
- memset(buf, 0, bin_size);
- xfree(buf);
-
- return (0);
-}
-
-void
-buffer_put_bignum(Buffer *buffer, const BIGNUM *value)
-{
- if (buffer_put_bignum_ret(buffer, value) == -1)
- fatal("buffer_put_bignum: buffer error");
-}
-
-/*
- * Retrieves an BIGNUM from the buffer.
- */
-int
-buffer_get_bignum_ret(Buffer *buffer, BIGNUM *value)
-{
- u_int bits, bytes;
- u_char buf[2], *bin;
-
- /* Get the number for bits. */
- if (buffer_get_ret(buffer, (char *) buf, 2) == -1) {
- error("buffer_get_bignum_ret: invalid length");
- return (-1);
- }
- bits = GET_16BIT(buf);
- /* Compute the number of binary bytes that follow. */
- bytes = (bits + 7) / 8;
- if (bytes > 8 * 1024) {
- error("buffer_get_bignum_ret: cannot handle BN of size %d", bytes);
- return (-1);
- }
- if (buffer_len(buffer) < bytes) {
- error("buffer_get_bignum_ret: input buffer too small");
- return (-1);
- }
- bin = buffer_ptr(buffer);
- BN_bin2bn(bin, bytes, value);
- if (buffer_consume_ret(buffer, bytes) == -1) {
- error("buffer_get_bignum_ret: buffer_consume failed");
- return (-1);
- }
- return (0);
-}
-
-void
-buffer_get_bignum(Buffer *buffer, BIGNUM *value)
-{
- if (buffer_get_bignum_ret(buffer, value) == -1)
- fatal("buffer_get_bignum: buffer error");
-}
-
-/*
- * Stores an BIGNUM in the buffer in SSH2 format.
- */
-int
-buffer_put_bignum2_ret(Buffer *buffer, const BIGNUM *value)
-{
- u_int bytes;
- u_char *buf;
- int oi;
- u_int hasnohigh = 0;
-
- if (BN_is_zero(value)) {
- buffer_put_int(buffer, 0);
- return 0;
- }
- if (value->neg) {
- error("buffer_put_bignum2_ret: negative numbers not supported");
- return (-1);
- }
- bytes = BN_num_bytes(value) + 1; /* extra padding byte */
- if (bytes < 2) {
- error("buffer_put_bignum2_ret: BN too small");
- return (-1);
- }
- buf = xmalloc(bytes);
- buf[0] = 0x00;
- /* Get the value of in binary */
- oi = BN_bn2bin(value, buf+1);
- if (oi < 0 || (u_int)oi != bytes - 1) {
- error("buffer_put_bignum2_ret: BN_bn2bin() failed: "
- "oi %d != bin_size %d", oi, bytes);
- xfree(buf);
- return (-1);
- }
- hasnohigh = (buf[1] & 0x80) ? 0 : 1;
- buffer_put_string(buffer, buf+hasnohigh, bytes-hasnohigh);
- memset(buf, 0, bytes);
- xfree(buf);
- return (0);
-}
-
-void
-buffer_put_bignum2(Buffer *buffer, const BIGNUM *value)
-{
- if (buffer_put_bignum2_ret(buffer, value) == -1)
- fatal("buffer_put_bignum2: buffer error");
-}
-
-/* XXX does not handle negative BNs */
-int
-buffer_get_bignum2_ret(Buffer *buffer, BIGNUM *value)
-{
- u_int len;
- u_char *bin;
-
- if ((bin = buffer_get_string_ret(buffer, &len)) == NULL) {
- error("buffer_get_bignum2_ret: invalid bignum");
- return (-1);
- }
-
- if (len > 0 && (bin[0] & 0x80)) {
- error("buffer_get_bignum2_ret: negative numbers not supported");
- xfree(bin);
- return (-1);
- }
- if (len > 8 * 1024) {
- error("buffer_get_bignum2_ret: cannot handle BN of size %d", len);
- xfree(bin);
- return (-1);
- }
- BN_bin2bn(bin, len, value);
- xfree(bin);
- return (0);
-}
-
-void
-buffer_get_bignum2(Buffer *buffer, BIGNUM *value)
-{
- if (buffer_get_bignum2_ret(buffer, value) == -1)
- fatal("buffer_get_bignum2: buffer error");
-}
-
-/*
- * Returns integers from the buffer (msb first).
- */
-
-int
-buffer_get_short_ret(u_short *ret, Buffer *buffer)
-{
- u_char buf[2];
-
- if (buffer_get_ret(buffer, (char *) buf, 2) == -1)
- return (-1);
- *ret = GET_16BIT(buf);
- return (0);
-}
-
-u_short
-buffer_get_short(Buffer *buffer)
-{
- u_short ret;
-
- if (buffer_get_short_ret(&ret, buffer) == -1)
- fatal("buffer_get_short: buffer error");
-
- return (ret);
-}
-
-int
-buffer_get_int_ret(u_int *ret, Buffer *buffer)
-{
- u_char buf[4];
-
- if (buffer_get_ret(buffer, (char *) buf, 4) == -1)
- return (-1);
- *ret = GET_32BIT(buf);
- return (0);
-}
-
-u_int
-buffer_get_int(Buffer *buffer)
-{
- u_int ret;
-
- if (buffer_get_int_ret(&ret, buffer) == -1)
- fatal("buffer_get_int: buffer error");
-
- return (ret);
-}
-
-#ifdef HAVE_U_INT64_T
-int
-buffer_get_int64_ret(u_int64_t *ret, Buffer *buffer)
-{
- u_char buf[8];
-
- if (buffer_get_ret(buffer, (char *) buf, 8) == -1)
- return (-1);
- *ret = GET_64BIT(buf);
- return (0);
-}
-
-u_int64_t
-buffer_get_int64(Buffer *buffer)
-{
- u_int64_t ret;
-
- if (buffer_get_int64_ret(&ret, buffer) == -1)
- fatal("buffer_get_int: buffer error");
-
- return (ret);
-}
-#endif
-
-/*
- * Stores integers in the buffer, msb first.
- */
-void
-buffer_put_short(Buffer *buffer, u_short value)
-{
- char buf[2];
-
- PUT_16BIT(buf, value);
- buffer_append(buffer, buf, 2);
-}
-
-void
-buffer_put_int(Buffer *buffer, u_int value)
-{
- char buf[4];
-
- PUT_32BIT(buf, value);
- buffer_append(buffer, buf, 4);
-}
-
-#ifdef HAVE_U_INT64_T
-void
-buffer_put_int64(Buffer *buffer, u_int64_t value)
-{
- char buf[8];
-
- PUT_64BIT(buf, value);
- buffer_append(buffer, buf, 8);
-}
-#endif
-
-/*
- * Returns an arbitrary binary string from the buffer. The string cannot
- * be longer than 256k. The returned value points to memory allocated
- * with xmalloc; it is the responsibility of the calling function to free
- * the data. If length_ptr is non-NULL, the length of the returned data
- * will be stored there. A null character will be automatically appended
- * to the returned string, and is not counted in length.
- */
-void *
-buffer_get_string_ret(Buffer *buffer, u_int *length_ptr)
-{
- u_char *value;
- u_int len;
-
- /* Get the length. */
- len = buffer_get_int(buffer);
- if (len > 256 * 1024) {
- error("buffer_get_string_ret: bad string length %u", len);
- return (NULL);
- }
- /* Allocate space for the string. Add one byte for a null character. */
- value = xmalloc(len + 1);
- /* Get the string. */
- if (buffer_get_ret(buffer, value, len) == -1) {
- error("buffer_get_string_ret: buffer_get failed");
- xfree(value);
- return (NULL);
- }
- /* Append a null character to make processing easier. */
- value[len] = 0;
- /* Optionally return the length of the string. */
- if (length_ptr)
- *length_ptr = len;
- return (value);
-}
-
-void *
-buffer_get_string(Buffer *buffer, u_int *length_ptr)
-{
- void *ret;
-
- if ((ret = buffer_get_string_ret(buffer, length_ptr)) == NULL)
- fatal("buffer_get_string: buffer error");
- return (ret);
-}
-
-char *
-buffer_get_utf8_string(Buffer *buffer, uint_t *length_ptr)
-{
- char *value, *converted, *estr;
- uint_t len;
-
- if ((value = buffer_get_string(buffer, &len)) == NULL)
- return (value);
-
- converted = g11n_convert_from_utf8(value, &len, &estr);
- if (converted == NULL) {
- if (estr != NULL)
- error("invalid UTF-8 sequence: %s", estr);
- converted = value;
- } else {
- xfree(value);
- }
-
- if (length_ptr != NULL)
- *length_ptr = len;
-
- return (converted);
-}
-
-/*
- * Stores and arbitrary binary string in the buffer.
- */
-void
-buffer_put_string(Buffer *buffer, const void *buf, u_int len)
-{
- buffer_put_int(buffer, len);
- buffer_append(buffer, buf, len);
-}
-void
-buffer_put_cstring(Buffer *buffer, const char *s)
-{
- if (s == NULL)
- fatal("buffer_put_cstring: s == NULL");
- buffer_put_string(buffer, s, strlen(s));
-}
-
-/*
- * UTF-8 versions of the above.
- */
-void
-buffer_put_utf8_string(Buffer *buffer, const char *s, uint_t len)
-{
- char *converted, *estr;
- uint_t nlen = len;
-
- converted = g11n_convert_to_utf8(s, &nlen, 0, &estr);
- if (converted == NULL) {
- if (estr != NULL)
- error("Can't convert to UTF-8: %s", estr);
- converted = (char *)s;
- }
-
- buffer_put_string(buffer, converted, nlen);
-
- if (converted != s)
- xfree(converted);
-}
-
-void
-buffer_put_utf8_cstring(Buffer *buffer, const char *s)
-{
- buffer_put_utf8_string(buffer, s, strlen(s));
-}
-
-/*
- * Returns a character from the buffer (0 - 255).
- */
-int
-buffer_get_char_ret(char *ret, Buffer *buffer)
-{
- if (buffer_get_ret(buffer, ret, 1) == -1) {
- error("buffer_get_char_ret: buffer_get_ret failed");
- return (-1);
- }
- return (0);
-}
-
-int
-buffer_get_char(Buffer *buffer)
-{
- char ch;
-
- if (buffer_get_char_ret(&ch, buffer) == -1)
- fatal("buffer_get_char: buffer error");
- return (u_char) ch;
-}
-
-/*
- * Stores a character in the buffer.
- */
-void
-buffer_put_char(Buffer *buffer, int value)
-{
- char ch = value;
-
- buffer_append(buffer, &ch, 1);
-}
diff --git a/usr/src/cmd/ssh/libssh/common/buffer.c b/usr/src/cmd/ssh/libssh/common/buffer.c
deleted file mode 100644
index 2ec761b7ad..0000000000
--- a/usr/src/cmd/ssh/libssh/common/buffer.c
+++ /dev/null
@@ -1,263 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Functions for manipulating fifo buffers (that can grow if needed).
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* $OpenBSD: buffer.c,v 1.31 2006/08/03 03:34:41 deraadt Exp $ */
-
-#include "includes.h"
-
-#include "xmalloc.h"
-#include "buffer.h"
-#include "log.h"
-
-#define BUFFER_MAX_CHUNK 0x100000
-#define BUFFER_MAX_LEN 0xa00000
-#define BUFFER_ALLOCSZ 0x008000
-
-/* Initializes the buffer structure. */
-
-void
-buffer_init(Buffer *buffer)
-{
- const u_int len = 4096;
-
- buffer->alloc = 0;
- buffer->buf = xmalloc(len);
- buffer->alloc = len;
- buffer->offset = 0;
- buffer->end = 0;
-}
-
-/* Frees any memory used for the buffer. */
-
-void
-buffer_free(Buffer *buffer)
-{
- if (buffer->alloc > 0) {
- memset(buffer->buf, 0, buffer->alloc);
- buffer->alloc = 0;
- xfree(buffer->buf);
- }
-}
-
-/*
- * Clears any data from the buffer, making it empty. This does not actually
- * zero the memory.
- */
-
-void
-buffer_clear(Buffer *buffer)
-{
- buffer->offset = 0;
- buffer->end = 0;
-}
-
-/* Appends data to the buffer, expanding it if necessary. */
-
-void
-buffer_append(Buffer *buffer, const void *data, u_int len)
-{
- void *p;
- p = buffer_append_space(buffer, len);
- memcpy(p, data, len);
-}
-
-static int
-buffer_compact(Buffer *buffer)
-{
- /*
- * If the buffer is quite empty, but all data is at the end, move the
- * data to the beginning.
- */
- if (buffer->offset > MIN(buffer->alloc, BUFFER_MAX_CHUNK)) {
- memmove(buffer->buf, buffer->buf + buffer->offset,
- buffer->end - buffer->offset);
- buffer->end -= buffer->offset;
- buffer->offset = 0;
- return (1);
- }
- return (0);
-}
-
-/*
- * Appends space to the buffer, expanding the buffer if necessary. This does
- * not actually copy the data into the buffer, but instead returns a pointer
- * to the allocated region.
- */
-
-void *
-buffer_append_space(Buffer *buffer, u_int len)
-{
- u_int newlen;
- void *p;
-
- if (len > BUFFER_MAX_CHUNK)
- fatal("buffer_append_space: len %u not supported", len);
-
- /* If the buffer is empty, start using it from the beginning. */
- if (buffer->offset == buffer->end) {
- buffer->offset = 0;
- buffer->end = 0;
- }
-restart:
- /* If there is enough space to store all data, store it now. */
- if (buffer->end + len < buffer->alloc) {
- p = buffer->buf + buffer->end;
- buffer->end += len;
- return p;
- }
-
- /* Compact data back to the start of the buffer if necessary */
- if (buffer_compact(buffer))
- goto restart;
-
- /* Increase the size of the buffer and retry. */
- newlen = roundup(buffer->alloc + len, BUFFER_ALLOCSZ);
- if (newlen > BUFFER_MAX_LEN)
- fatal("buffer_append_space: alloc %u not supported",
- newlen);
- buffer->buf = xrealloc(buffer->buf, newlen);
- buffer->alloc = newlen;
- goto restart;
- /* NOTREACHED */
-}
-
-/*
- * Check whether an allocation of 'len' will fit in the buffer
- * This must follow the same math as buffer_append_space
- */
-int
-buffer_check_alloc(Buffer *buffer, u_int len)
-{
- if (buffer->offset == buffer->end) {
- buffer->offset = 0;
- buffer->end = 0;
- }
- restart:
- if (buffer->end + len < buffer->alloc)
- return (1);
- if (buffer_compact(buffer))
- goto restart;
- if (roundup(buffer->alloc + len, BUFFER_ALLOCSZ) <= BUFFER_MAX_LEN)
- return (1);
- return (0);
-}
-
-/* Returns the number of bytes of data in the buffer. */
-
-u_int
-buffer_len(Buffer *buffer)
-{
- return buffer->end - buffer->offset;
-}
-
-/* Gets data from the beginning of the buffer. */
-
-int
-buffer_get_ret(Buffer *buffer, void *buf, u_int len)
-{
- if (len > buffer->end - buffer->offset) {
- error("buffer_get_ret: trying to get more bytes %d than in buffer %d",
- len, buffer->end - buffer->offset);
- return (-1);
- }
- memcpy(buf, buffer->buf + buffer->offset, len);
- buffer->offset += len;
- return (0);
-}
-
-void
-buffer_get(Buffer *buffer, void *buf, u_int len)
-{
- if (buffer_get_ret(buffer, buf, len) == -1)
- fatal("buffer_get: buffer error");
-}
-
-/* Consumes the given number of bytes from the beginning of the buffer. */
-
-int
-buffer_consume_ret(Buffer *buffer, u_int bytes)
-{
- if (bytes > buffer->end - buffer->offset) {
- error("buffer_consume_ret: trying to get more bytes than in buffer");
- return (-1);
- }
- buffer->offset += bytes;
- return (0);
-}
-
-void
-buffer_consume(Buffer *buffer, u_int bytes)
-{
- if (buffer_consume_ret(buffer, bytes) == -1)
- fatal("buffer_consume: buffer error");
-}
-
-/* Consumes the given number of bytes from the end of the buffer. */
-
-int
-buffer_consume_end_ret(Buffer *buffer, u_int bytes)
-{
- if (bytes > buffer->end - buffer->offset)
- return (-1);
- buffer->end -= bytes;
- return (0);
-}
-
-void
-buffer_consume_end(Buffer *buffer, u_int bytes)
-{
- if (buffer_consume_end_ret(buffer, bytes) == -1)
- fatal("buffer_consume_end: trying to get more bytes than in buffer");
-}
-
-/* Returns a pointer to the first used byte in the buffer. */
-
-void *
-buffer_ptr(Buffer *buffer)
-{
- return buffer->buf + buffer->offset;
-}
-
-/* Dumps the contents of the buffer to stderr. */
-void
-buffer_dump(Buffer *buffer)
-{
- u_int i;
- u_char *ucp = buffer->buf;
-
- for (i = buffer->offset; i < buffer->end; i++) {
- fprintf(stderr, "%02x", ucp[i]);
- if ((i-buffer->offset)%16==15)
- fprintf(stderr, "\n");
- else if ((i-buffer->offset)%2==1)
- fprintf(stderr, " ");
- }
-
- if (buffer->offset == buffer->end) {
- /* explicitly state when the buffer is empty */
- fprintf(stderr, "<EMPTY BUFFER>\n");
- } else {
- /* print the terminal '\n' if it wasn't already printed */
- if ((i - buffer->offset) % 16 != 0)
- fprintf(stderr, "\n");
- }
- /*
- * We want an extra empty line after the packet dump for better
- * readability.
- */
- fprintf(stderr, "\n");
-}
diff --git a/usr/src/cmd/ssh/libssh/common/canohost.c b/usr/src/cmd/ssh/libssh/common/canohost.c
deleted file mode 100644
index 2d427b9e8d..0000000000
--- a/usr/src/cmd/ssh/libssh/common/canohost.c
+++ /dev/null
@@ -1,391 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Functions for returning the canonical host name of the remote site.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-/*
- * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: canohost.c,v 1.34 2002/09/23 20:46:27 stevesk Exp $");
-
-#include "packet.h"
-#include "xmalloc.h"
-#include "log.h"
-#include "canohost.h"
-
-static const char *inet_ntop_native(int af, const void *src,
- char *dst, size_t size);
-
-
-/*
- * Return the canonical name of the host at the other end of the socket. The
- * caller should free the returned string with xfree.
- */
-
-static char *
-get_remote_hostname(int socket, int verify_reverse_mapping)
-{
- struct sockaddr_storage from;
- int i, res;
- socklen_t fromlen;
- struct addrinfo hints, *ai, *aitop;
- char name[NI_MAXHOST], ntop[NI_MAXHOST], ntop2[NI_MAXHOST];
-
- /* Get IP address of client. */
- fromlen = sizeof(from);
- memset(&from, 0, sizeof(from));
- if (getpeername(socket, (struct sockaddr *) &from, &fromlen) < 0) {
- debug("getpeername failed: %.100s", strerror(errno));
- fatal_cleanup();
- }
-
- if ((res = getnameinfo((struct sockaddr *)&from, fromlen, ntop, sizeof(ntop),
- NULL, 0, NI_NUMERICHOST)) != 0)
- fatal("get_remote_hostname: getnameinfo NI_NUMERICHOST failed: %d", res);
-
-#ifdef IPV4_IN_IPV6
- if (from.ss_family == AF_INET6) {
- struct sockaddr_in6 *from6 = (struct sockaddr_in6 *)&from;
-
- (void) inet_ntop_native(from.ss_family,
- from6->sin6_addr.s6_addr,
- ntop, sizeof(ntop));
- }
-#endif /* IPV4_IN_IPV6 */
-
- if (!verify_reverse_mapping)
- return xstrdup(ntop);
-
- debug3("Trying to reverse map address %.100s.", ntop);
- /* Map the IP address to a host name. */
- if (getnameinfo((struct sockaddr *)&from, fromlen, name, sizeof(name),
- NULL, 0, NI_NAMEREQD) != 0) {
- /* Host name not found. Use ip address. */
-#if 0
- log("Could not reverse map address %.100s.", ntop);
-#endif
- return xstrdup(ntop);
- }
-
- /* Got host name. */
- name[sizeof(name) - 1] = '\0';
- /*
- * Convert it to all lowercase (which is expected by the rest
- * of this software).
- */
- for (i = 0; name[i]; i++)
- if (isupper(name[i]))
- name[i] = tolower(name[i]);
-
- /*
- * Map it back to an IP address and check that the given
- * address actually is an address of this host. This is
- * necessary because anyone with access to a name server can
- * define arbitrary names for an IP address. Mapping from
- * name to IP address can be trusted better (but can still be
- * fooled if the intruder has access to the name server of
- * the domain).
- */
- memset(&hints, 0, sizeof(hints));
- hints.ai_family = from.ss_family;
- hints.ai_socktype = SOCK_STREAM;
- if (getaddrinfo(name, NULL, &hints, &aitop) != 0) {
- log("reverse mapping checking getaddrinfo for %.700s "
- "failed - POSSIBLE BREAKIN ATTEMPT!", name);
- return xstrdup(ntop);
- }
- /* Look for the address from the list of addresses. */
- for (ai = aitop; ai; ai = ai->ai_next) {
- if (getnameinfo(ai->ai_addr, ai->ai_addrlen, ntop2,
- sizeof(ntop2), NULL, 0, NI_NUMERICHOST) == 0 &&
- (strcmp(ntop, ntop2) == 0))
- break;
- }
- freeaddrinfo(aitop);
- /* If we reached the end of the list, the address was not there. */
- if (!ai) {
- /* Address not found for the host name. */
- log("Address %.100s maps to %.600s, but this does not "
- "map back to the address - POSSIBLE BREAKIN ATTEMPT!",
- ntop, name);
- return xstrdup(ntop);
- }
- return xstrdup(name);
-}
-
-/*
- * Return the canonical name of the host in the other side of the current
- * connection. The host name is cached, so it is efficient to call this
- * several times.
- */
-
-const char *
-get_canonical_hostname(int verify_reverse_mapping)
-{
- static char *canonical_host_name = NULL;
- static int verify_reverse_mapping_done = 0;
-
- /* Check if we have previously retrieved name with same option. */
- if (canonical_host_name != NULL) {
- if (verify_reverse_mapping_done != verify_reverse_mapping)
- xfree(canonical_host_name);
- else
- return canonical_host_name;
- }
-
- /* Get the real hostname if socket; otherwise return UNKNOWN. */
- if (packet_connection_is_on_socket())
- canonical_host_name = get_remote_hostname(
- packet_get_connection_in(), verify_reverse_mapping);
- else
- canonical_host_name = xstrdup("UNKNOWN");
-
- verify_reverse_mapping_done = verify_reverse_mapping;
- return canonical_host_name;
-}
-
-/*
- * Returns the remote IP-address of socket as a string. The returned
- * string must be freed.
- */
-char *
-get_socket_address(int socket, int remote, int flags)
-{
- struct sockaddr_storage addr;
- struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&addr;
- socklen_t addrlen;
- char ntop[NI_MAXHOST];
- const char *result;
- char abuf[INET6_ADDRSTRLEN];
-
- /* Get IP address of client. */
- addrlen = sizeof (addr);
- memset(&addr, 0, sizeof (addr));
-
- if (remote) {
- if (getpeername(socket, (struct sockaddr *)&addr, &addrlen)
- < 0) {
- debug("get_socket_ipaddr: getpeername failed: %.100s",
- strerror(errno));
- return (NULL);
- }
- } else {
- if (getsockname(socket, (struct sockaddr *)&addr, &addrlen)
- < 0) {
- debug("get_socket_ipaddr: getsockname failed: %.100s",
- strerror(errno));
- return (NULL);
- }
- }
-
- /* Get the address in ascii. */
- if (getnameinfo((struct sockaddr *)&addr, addrlen, ntop, sizeof (ntop),
- NULL, 0, flags) != 0) {
- error("get_socket_ipaddr: getnameinfo %d failed", flags);
- return (NULL);
- }
-
- if (addr.ss_family == AF_INET) {
- return (xstrdup(ntop));
- }
-
- result = inet_ntop_native(addr.ss_family,
- addr6->sin6_addr.s6_addr, abuf, sizeof (abuf));
-
- return (xstrdup(result));
-}
-#if 0
-static char *
-get_socket_address(int socket, int remote, int flags)
-{
- struct sockaddr_storage addr;
- socklen_t addrlen;
- char ntop[NI_MAXHOST];
-
- /* Get IP address of client. */
- addrlen = sizeof(addr);
- memset(&addr, 0, sizeof(addr));
-
- if (remote) {
- if (getpeername(socket, (struct sockaddr *)&addr, &addrlen)
- < 0)
- return NULL;
- } else {
- if (getsockname(socket, (struct sockaddr *)&addr, &addrlen)
- < 0)
- return NULL;
- }
- /* Get the address in ascii. */
- if (getnameinfo((struct sockaddr *)&addr, addrlen, ntop, sizeof(ntop),
- NULL, 0, flags) != 0) {
- error("get_socket_ipaddr: getnameinfo %d failed", flags);
- return NULL;
- }
- return xstrdup(ntop);
-}
-#endif
-
-char *
-get_peer_ipaddr(int socket)
-{
- char *p;
-
- if ((p = get_socket_address(socket, 1, NI_NUMERICHOST)) != NULL)
- return p;
- return xstrdup("UNKNOWN");
-}
-
-char *
-get_local_ipaddr(int socket)
-{
- char *p;
-
- if ((p = get_socket_address(socket, 0, NI_NUMERICHOST)) != NULL)
- return p;
- return xstrdup("UNKNOWN");
-}
-
-char *
-get_local_name(int socket)
-{
- return get_socket_address(socket, 0, NI_NAMEREQD);
-}
-
-/*
- * Returns the IP-address of the remote host as a string. The returned
- * string must not be freed.
- */
-
-const char *
-get_remote_ipaddr(void)
-{
- static char *canonical_host_ip = NULL;
-
- /* Check whether we have cached the ipaddr. */
- if (canonical_host_ip == NULL) {
- if (packet_connection_is_on_socket()) {
- canonical_host_ip =
- get_peer_ipaddr(packet_get_connection_in());
- if (canonical_host_ip == NULL)
- fatal_cleanup();
- } else {
- /* If not on socket, return UNKNOWN. */
- canonical_host_ip = xstrdup("UNKNOWN");
- }
- }
- return canonical_host_ip;
-}
-
-const char *
-get_remote_name_or_ip(u_int utmp_len, int verify_reverse_mapping)
-{
- static const char *remote = "";
- if (utmp_len > 0)
- remote = get_canonical_hostname(verify_reverse_mapping);
- if (utmp_len == 0 || strlen(remote) > utmp_len)
- remote = get_remote_ipaddr();
- return remote;
-}
-
-/* Returns the local/remote port for the socket. */
-
-static int
-get_sock_port(int sock, int local)
-{
- struct sockaddr_storage from;
- socklen_t fromlen;
- char strport[NI_MAXSERV];
-
- /* Get IP address of client. */
- fromlen = sizeof(from);
- memset(&from, 0, sizeof(from));
- if (local) {
- if (getsockname(sock, (struct sockaddr *)&from, &fromlen) < 0) {
- error("getsockname failed: %.100s", strerror(errno));
- return 0;
- }
- } else {
- if (getpeername(sock, (struct sockaddr *) & from, &fromlen) < 0) {
- debug("getpeername failed: %.100s", strerror(errno));
- fatal_cleanup();
- }
- }
- /* Return port number. */
- if (getnameinfo((struct sockaddr *)&from, fromlen, NULL, 0,
- strport, sizeof(strport), NI_NUMERICSERV) != 0)
- fatal("get_sock_port: getnameinfo NI_NUMERICSERV failed");
- return atoi(strport);
-}
-
-/* Returns remote/local port number for the current connection. */
-
-static int
-get_port(int local)
-{
- /*
- * If the connection is not a socket, return 65535. This is
- * intentionally chosen to be an unprivileged port number.
- */
- if (!packet_connection_is_on_socket())
- return 65535;
-
- /* Get socket and return the port number. */
- return get_sock_port(packet_get_connection_in(), local);
-}
-
-int
-get_peer_port(int sock)
-{
- return get_sock_port(sock, 0);
-}
-
-int
-get_remote_port(void)
-{
- return get_port(0);
-}
-
-int
-get_local_port(void)
-{
- return get_port(1);
-}
-
-/*
- * Taken from inetd.c
- * This is a wrapper function for inet_ntop(). In case the af is AF_INET6
- * and the address pointed by src is a IPv4-mapped IPv6 address, it
- * returns printable IPv4 address, not IPv4-mapped IPv6 address. In other cases
- * it behaves just like inet_ntop().
- */
-static const char *
-inet_ntop_native(int af, const void *src, char *dst, size_t size)
-{
- struct in_addr src4;
- const char *result;
-
- if (af == AF_INET6) {
- if (IN6_IS_ADDR_V4MAPPED((struct in6_addr *)src)) {
- IN6_V4MAPPED_TO_INADDR((struct in6_addr *)src, &src4);
- result = inet_ntop(AF_INET, &src4, dst, size);
- } else {
- result = inet_ntop(AF_INET6, src, dst, size);
- }
- } else {
- result = inet_ntop(af, src, dst, size);
- }
-
- return (result);
-}
diff --git a/usr/src/cmd/ssh/libssh/common/channels.c b/usr/src/cmd/ssh/libssh/common/channels.c
deleted file mode 100644
index 7133758b73..0000000000
--- a/usr/src/cmd/ssh/libssh/common/channels.c
+++ /dev/null
@@ -1,3035 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * This file contains functions for generic socket connection forwarding.
- * There is also code for initiating connection forwarding for X11 connections,
- * arbitrary tcp/ip connections, and the authentication agent connection.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- *
- * SSH2 support added by Markus Friedl.
- * Copyright (c) 1999, 2000, 2001, 2002 Markus Friedl. All rights reserved.
- * Copyright (c) 1999 Dug Song. All rights reserved.
- * Copyright (c) 1999 Theo de Raadt. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: channels.c,v 1.183 2002/09/17 07:47:02 itojun Exp $");
-
-#include "ssh.h"
-#include "ssh1.h"
-#include "ssh2.h"
-#include "packet.h"
-#include "xmalloc.h"
-#include "log.h"
-#include "misc.h"
-#include "channels.h"
-#include "compat.h"
-#include "canohost.h"
-#include "key.h"
-#include "authfd.h"
-#include "pathnames.h"
-#include "bufaux.h"
-
-
-/* -- channel core */
-
-/*
- * Pointer to an array containing all allocated channels. The array is
- * dynamically extended as needed.
- */
-static Channel **channels = NULL;
-
-/*
- * Size of the channel array. All slots of the array must always be
- * initialized (at least the type field); unused slots set to NULL
- */
-static int channels_alloc = 0;
-
-/*
- * Maximum file descriptor value used in any of the channels. This is
- * updated in channel_new.
- */
-static int channel_max_fd = 0;
-
-
-/* -- tcp forwarding */
-
-/*
- * Data structure for storing which hosts are permitted for forward requests.
- * The local sides of any remote forwards are stored in this array to prevent
- * a corrupt remote server from accessing arbitrary TCP/IP ports on our local
- * network (which might be behind a firewall).
- */
-typedef struct {
- char *host_to_connect; /* Connect to 'host'. */
- u_short port_to_connect; /* Connect to 'port'. */
- u_short listen_port; /* Remote side should listen port number. */
-} ForwardPermission;
-
-/* List of all permitted host/port pairs to connect. */
-static ForwardPermission permitted_opens[SSH_MAX_FORWARDS_PER_DIRECTION];
-
-/* Number of permitted host/port pairs in the array. */
-static int num_permitted_opens = 0;
-/*
- * If this is true, all opens are permitted. This is the case on the server
- * on which we have to trust the client anyway, and the user could do
- * anything after logging in anyway.
- */
-static int all_opens_permitted = 0;
-
-
-/* -- X11 forwarding */
-
-/* Maximum number of fake X11 displays to try. */
-#define MAX_DISPLAYS 1000
-
-/* Saved X11 authentication protocol name. */
-static char *x11_saved_proto = NULL;
-
-/* Saved X11 authentication data. This is the real data. */
-static char *x11_saved_data = NULL;
-static u_int x11_saved_data_len = 0;
-
-/*
- * Fake X11 authentication data. This is what the server will be sending us;
- * we should replace any occurrences of this by the real data.
- */
-static u_char *x11_fake_data = NULL;
-static u_int x11_fake_data_len;
-
-
-/* -- agent forwarding */
-
-#define NUM_SOCKS 10
-
-/* AF_UNSPEC or AF_INET or AF_INET6 */
-static int IPv4or6 = AF_UNSPEC;
-
-/* helper */
-static void port_open_helper(Channel *c, char *rtype);
-
-/* -- channel core */
-
-Channel *
-channel_lookup(int id)
-{
- Channel *c;
-
- if (id < 0 || id >= channels_alloc) {
- log("channel_lookup: %d: bad id", id);
- return NULL;
- }
- c = channels[id];
- if (c == NULL) {
- log("channel_lookup: %d: bad id: channel free", id);
- return NULL;
- }
- return c;
-}
-
-/*
- * Register filedescriptors for a channel, used when allocating a channel or
- * when the channel consumer/producer is ready, e.g. shell exec'd
- */
-
-static void
-channel_register_fds(Channel *c, int rfd, int wfd, int efd,
- int extusage, int nonblock)
-{
- /* Update the maximum file descriptor value. */
- channel_max_fd = MAX(channel_max_fd, rfd);
- channel_max_fd = MAX(channel_max_fd, wfd);
- channel_max_fd = MAX(channel_max_fd, efd);
-
- /* XXX set close-on-exec -markus */
-
- c->rfd = rfd;
- c->wfd = wfd;
- c->sock = (rfd == wfd) ? rfd : -1;
- c->efd = efd;
- c->extended_usage = extusage;
-
- /* XXX ugly hack: nonblock is only set by the server */
- if (nonblock && isatty(c->rfd)) {
- debug("channel %d: rfd %d isatty", c->self, c->rfd);
- c->isatty = 1;
- if (!isatty(c->wfd)) {
- error("channel %d: wfd %d is not a tty?",
- c->self, c->wfd);
- }
- } else {
- c->isatty = 0;
- }
- c->wfd_isatty = isatty(c->wfd);
-
- /* enable nonblocking mode */
- if (nonblock) {
- if (rfd != -1)
- set_nonblock(rfd);
- if (wfd != -1)
- set_nonblock(wfd);
- if (efd != -1)
- set_nonblock(efd);
- }
-}
-
-/*
- * Allocate a new channel object and set its type and socket. This will cause
- * remote_name to be freed.
- */
-
-Channel *
-channel_new(char *ctype, int type, int rfd, int wfd, int efd,
- u_int window, u_int maxpack, int extusage, char *remote_name, int nonblock)
-{
- int i, found;
- Channel *c;
-
- /* Do initial allocation if this is the first call. */
- if (channels_alloc == 0) {
- channels_alloc = 10;
- channels = xmalloc(channels_alloc * sizeof(Channel *));
- for (i = 0; i < channels_alloc; i++)
- channels[i] = NULL;
- fatal_add_cleanup((void (*) (void *)) channel_free_all, NULL);
- }
- /* Try to find a free slot where to put the new channel. */
- for (found = -1, i = 0; i < channels_alloc; i++)
- if (channels[i] == NULL) {
- /* Found a free slot. */
- found = i;
- break;
- }
- if (found == -1) {
- /* There are no free slots. Take last+1 slot and expand the array. */
- found = channels_alloc;
- if (channels_alloc > 10000)
- fatal("channel_new: internal error: channels_alloc %d "
- "too big.", channels_alloc);
- channels = xrealloc(channels,
- (channels_alloc + 10) * sizeof(Channel *));
- channels_alloc += 10;
- debug2("channel: expanding %d", channels_alloc);
- for (i = found; i < channels_alloc; i++)
- channels[i] = NULL;
- }
- /* Initialize and return new channel. */
- c = channels[found] = xmalloc(sizeof(Channel));
- memset(c, 0, sizeof(Channel));
- buffer_init(&c->input);
- buffer_init(&c->output);
- buffer_init(&c->extended);
- c->ostate = CHAN_OUTPUT_OPEN;
- c->istate = CHAN_INPUT_OPEN;
- c->flags = 0;
- channel_register_fds(c, rfd, wfd, efd, extusage, nonblock);
- c->self = found;
- c->type = type;
- c->ctype = ctype;
- c->local_window = window;
- c->local_window_max = window;
- c->local_consumed = 0;
- c->local_maxpacket = maxpack;
- c->remote_id = -1;
- c->remote_name = remote_name;
- c->remote_window = 0;
- c->remote_maxpacket = 0;
- c->force_drain = 0;
- c->single_connection = 0;
- c->detach_user = NULL;
- c->confirm = NULL;
- c->input_filter = NULL;
- c->delayed = 1; /* prevent call to channel_post handler */
- debug("channel %d: new [%s]", found, remote_name);
- return c;
-}
-
-static int
-channel_find_maxfd(void)
-{
- int i, max = 0;
- Channel *c;
-
- for (i = 0; i < channels_alloc; i++) {
- c = channels[i];
- if (c != NULL) {
- max = MAX(max, c->rfd);
- max = MAX(max, c->wfd);
- max = MAX(max, c->efd);
- }
- }
- return max;
-}
-
-int
-channel_close_fd(int *fdp)
-{
- int ret = 0, fd = *fdp;
-
- if (fd != -1) {
- ret = close(fd);
- *fdp = -1;
- if (fd == channel_max_fd)
- channel_max_fd = channel_find_maxfd();
- }
- return ret;
-}
-
-/* Close all channel fd/socket. */
-
-static void
-channel_close_fds(Channel *c)
-{
- debug3("channel_close_fds: channel %d: r %d w %d e %d",
- c->self, c->rfd, c->wfd, c->efd);
-
- channel_close_fd(&c->sock);
- channel_close_fd(&c->rfd);
- channel_close_fd(&c->wfd);
- channel_close_fd(&c->efd);
-}
-
-/* Free the channel and close its fd/socket. */
-
-void
-channel_free(Channel *c)
-{
- char *s;
- int i, n;
-
- for (n = 0, i = 0; i < channels_alloc; i++)
- if (channels[i])
- n++;
- debug("channel_free: channel %d: %s, nchannels %d", c->self,
- c->remote_name ? c->remote_name : "???", n);
-
- s = channel_open_message();
- debug3("channel_free: status: %s", s);
- xfree(s);
-
- if (c->sock != -1)
- shutdown(c->sock, SHUT_RDWR);
- channel_close_fds(c);
- buffer_free(&c->input);
- buffer_free(&c->output);
- buffer_free(&c->extended);
- if (c->remote_name) {
- xfree(c->remote_name);
- c->remote_name = NULL;
- }
- channels[c->self] = NULL;
- xfree(c);
-}
-
-void
-channel_free_all(void)
-{
- int i;
-
- for (i = 0; i < channels_alloc; i++)
- if (channels[i] != NULL)
- channel_free(channels[i]);
-}
-
-/*
- * Closes the sockets/fds of all channels. This is used to close extra file
- * descriptors after a fork.
- */
-
-void
-channel_close_all(void)
-{
- int i;
-
- for (i = 0; i < channels_alloc; i++)
- if (channels[i] != NULL)
- channel_close_fds(channels[i]);
-}
-
-/*
- * Stop listening to channels.
- */
-
-void
-channel_stop_listening(void)
-{
- int i;
- Channel *c;
-
- for (i = 0; i < channels_alloc; i++) {
- c = channels[i];
- if (c != NULL) {
- switch (c->type) {
- case SSH_CHANNEL_AUTH_SOCKET:
- case SSH_CHANNEL_PORT_LISTENER:
- case SSH_CHANNEL_RPORT_LISTENER:
- case SSH_CHANNEL_X11_LISTENER:
- channel_close_fd(&c->sock);
- channel_free(c);
- break;
- }
- }
- }
-}
-
-/*
- * Returns true if no channel has too much buffered data, and false if one or
- * more channel is overfull.
- */
-
-int
-channel_not_very_much_buffered_data(void)
-{
- u_int i;
- Channel *c;
-
- for (i = 0; i < channels_alloc; i++) {
- c = channels[i];
- if (c != NULL && c->type == SSH_CHANNEL_OPEN) {
-#if 0
- if (!compat20 &&
- buffer_len(&c->input) > packet_get_maxsize()) {
- debug("channel %d: big input buffer %d",
- c->self, buffer_len(&c->input));
- return 0;
- }
-#endif
- if (buffer_len(&c->output) > packet_get_maxsize()) {
- debug("channel %d: big output buffer %d > %d",
- c->self, buffer_len(&c->output),
- packet_get_maxsize());
- return 0;
- }
- }
- }
- return 1;
-}
-
-/* Returns true if any channel is still open. */
-
-int
-channel_still_open(void)
-{
- int i;
- Channel *c;
-
- for (i = 0; i < channels_alloc; i++) {
- c = channels[i];
- if (c == NULL)
- continue;
- switch (c->type) {
- case SSH_CHANNEL_X11_LISTENER:
- case SSH_CHANNEL_PORT_LISTENER:
- case SSH_CHANNEL_RPORT_LISTENER:
- case SSH_CHANNEL_CLOSED:
- case SSH_CHANNEL_AUTH_SOCKET:
- case SSH_CHANNEL_DYNAMIC:
- case SSH_CHANNEL_CONNECTING:
- case SSH_CHANNEL_ZOMBIE:
- continue;
- case SSH_CHANNEL_LARVAL:
- if (!compat20)
- fatal("cannot happen: SSH_CHANNEL_LARVAL");
- continue;
- case SSH_CHANNEL_OPENING:
- case SSH_CHANNEL_OPEN:
- case SSH_CHANNEL_X11_OPEN:
- return 1;
- case SSH_CHANNEL_INPUT_DRAINING:
- case SSH_CHANNEL_OUTPUT_DRAINING:
- if (!compat13)
- fatal("cannot happen: OUT_DRAIN");
- return 1;
- default:
- fatal("channel_still_open: bad channel type %d", c->type);
- /* NOTREACHED */
- }
- }
- return 0;
-}
-
-/* Returns the id of an open channel suitable for keepaliving */
-
-int
-channel_find_open(void)
-{
- int i;
- Channel *c;
-
- for (i = 0; i < channels_alloc; i++) {
- c = channels[i];
- if (c == NULL)
- continue;
- switch (c->type) {
- case SSH_CHANNEL_CLOSED:
- case SSH_CHANNEL_DYNAMIC:
- case SSH_CHANNEL_X11_LISTENER:
- case SSH_CHANNEL_PORT_LISTENER:
- case SSH_CHANNEL_RPORT_LISTENER:
- case SSH_CHANNEL_OPENING:
- case SSH_CHANNEL_CONNECTING:
- case SSH_CHANNEL_ZOMBIE:
- continue;
- case SSH_CHANNEL_LARVAL:
- case SSH_CHANNEL_AUTH_SOCKET:
- case SSH_CHANNEL_OPEN:
- case SSH_CHANNEL_X11_OPEN:
- return i;
- case SSH_CHANNEL_INPUT_DRAINING:
- case SSH_CHANNEL_OUTPUT_DRAINING:
- if (!compat13)
- fatal("cannot happen: OUT_DRAIN");
- return i;
- default:
- fatal("channel_find_open: bad channel type %d", c->type);
- /* NOTREACHED */
- }
- }
- return -1;
-}
-
-
-/*
- * Returns a message describing the currently open forwarded connections,
- * suitable for sending to the client. The message contains crlf pairs for
- * newlines.
- */
-
-char *
-channel_open_message(void)
-{
- Buffer buffer;
- Channel *c;
- char buf[1024], *cp;
- int i;
-
- buffer_init(&buffer);
- snprintf(buf, sizeof buf, "The following connections are open:\r\n");
- buffer_append(&buffer, buf, strlen(buf));
- for (i = 0; i < channels_alloc; i++) {
- c = channels[i];
- if (c == NULL)
- continue;
- switch (c->type) {
- case SSH_CHANNEL_X11_LISTENER:
- case SSH_CHANNEL_PORT_LISTENER:
- case SSH_CHANNEL_RPORT_LISTENER:
- case SSH_CHANNEL_CLOSED:
- case SSH_CHANNEL_AUTH_SOCKET:
- case SSH_CHANNEL_ZOMBIE:
- continue;
- case SSH_CHANNEL_LARVAL:
- case SSH_CHANNEL_OPENING:
- case SSH_CHANNEL_CONNECTING:
- case SSH_CHANNEL_DYNAMIC:
- case SSH_CHANNEL_OPEN:
- case SSH_CHANNEL_X11_OPEN:
- case SSH_CHANNEL_INPUT_DRAINING:
- case SSH_CHANNEL_OUTPUT_DRAINING:
- snprintf(buf, sizeof buf, " #%d %.300s (t%d r%d i%d/%d o%d/%d fd %d/%d)\r\n",
- c->self, c->remote_name,
- c->type, c->remote_id,
- c->istate, buffer_len(&c->input),
- c->ostate, buffer_len(&c->output),
- c->rfd, c->wfd);
- buffer_append(&buffer, buf, strlen(buf));
- continue;
- default:
- fatal("channel_open_message: bad channel type %d", c->type);
- /* NOTREACHED */
- }
- }
- buffer_append(&buffer, "\0", 1);
- cp = xstrdup(buffer_ptr(&buffer));
- buffer_free(&buffer);
- return cp;
-}
-
-void
-channel_send_open(int id)
-{
- Channel *c = channel_lookup(id);
-
- if (c == NULL) {
- log("channel_send_open: %d: bad id", id);
- return;
- }
- debug("send channel open %d", id);
- packet_start(SSH2_MSG_CHANNEL_OPEN);
- packet_put_cstring(c->ctype);
- packet_put_int(c->self);
- packet_put_int(c->local_window);
- packet_put_int(c->local_maxpacket);
- packet_send();
-}
-
-void
-channel_request_start(int local_id, char *service, int wantconfirm)
-{
- Channel *c = channel_lookup(local_id);
-
- debug("channel request %d: %s", local_id, service);
- if (c == NULL) {
- log("channel_request_start: %d: unknown channel id", local_id);
- return;
- }
- packet_start(SSH2_MSG_CHANNEL_REQUEST);
- packet_put_int(c->remote_id);
- packet_put_cstring(service);
- packet_put_char(wantconfirm);
-}
-void
-channel_register_confirm(int id, channel_callback_fn *fn)
-{
- Channel *c = channel_lookup(id);
-
- if (c == NULL) {
- log("channel_register_comfirm: %d: bad id", id);
- return;
- }
- c->confirm = fn;
-}
-void
-channel_register_cleanup(int id, channel_callback_fn *fn)
-{
- Channel *c = channel_lookup(id);
-
- if (c == NULL) {
- log("channel_register_cleanup: %d: bad id", id);
- return;
- }
- c->detach_user = fn;
-}
-void
-channel_cancel_cleanup(int id)
-{
- Channel *c = channel_lookup(id);
-
- if (c == NULL) {
- log("channel_cancel_cleanup: %d: bad id", id);
- return;
- }
- c->detach_user = NULL;
-}
-void
-channel_register_filter(int id, channel_filter_fn *fn)
-{
- Channel *c = channel_lookup(id);
-
- if (c == NULL) {
- log("channel_register_filter: %d: bad id", id);
- return;
- }
- c->input_filter = fn;
-}
-
-void
-channel_set_fds(int id, int rfd, int wfd, int efd,
- int extusage, int nonblock, u_int window_max)
-{
- Channel *c = channel_lookup(id);
-
- if (c == NULL || c->type != SSH_CHANNEL_LARVAL)
- fatal("channel_activate for non-larval channel %d.", id);
- channel_register_fds(c, rfd, wfd, efd, extusage, nonblock);
- c->type = SSH_CHANNEL_OPEN;
- c->local_window = c->local_window_max = window_max;
- packet_start(SSH2_MSG_CHANNEL_WINDOW_ADJUST);
- packet_put_int(c->remote_id);
- packet_put_int(c->local_window);
- packet_send();
-}
-
-void
-channel_set_wait_for_exit(int id, int wait_for_exit)
-{
- Channel *c = channel_lookup(id);
-
- if (c == NULL || c->type != SSH_CHANNEL_OPEN)
- fatal("channel_set_wait_for_exit for non-open channel %d.", id);
-
- debug3("channel_set_wait_for_exit %d, %d (type: %d)", id, wait_for_exit, c->type);
- c->wait_for_exit = wait_for_exit;
-}
-
-/*
- * 'channel_pre*' are called just before select() to add any bits relevant to
- * channels in the select bitmasks.
- */
-/*
- * 'channel_post*': perform any appropriate operations for channels which
- * have events pending.
- */
-typedef void chan_fn(Channel *c, fd_set * readset, fd_set * writeset);
-chan_fn *channel_pre[SSH_CHANNEL_MAX_TYPE];
-chan_fn *channel_post[SSH_CHANNEL_MAX_TYPE];
-
-static void
-channel_pre_listener(Channel *c, fd_set * readset, fd_set * writeset)
-{
- FD_SET(c->sock, readset);
-}
-
-static void
-channel_pre_connecting(Channel *c, fd_set * readset, fd_set * writeset)
-{
- debug3("channel %d: waiting for connection", c->self);
- FD_SET(c->sock, writeset);
-}
-
-static void
-channel_pre_open_13(Channel *c, fd_set * readset, fd_set * writeset)
-{
- if (buffer_len(&c->input) < packet_get_maxsize())
- FD_SET(c->sock, readset);
- if (buffer_len(&c->output) > 0)
- FD_SET(c->sock, writeset);
-}
-
-static void
-channel_pre_open(Channel *c, fd_set * readset, fd_set * writeset)
-{
- u_int limit = compat20 ? c->remote_window : packet_get_maxsize();
-
- if (c->istate == CHAN_INPUT_OPEN &&
- limit > 0 &&
- buffer_len(&c->input) < limit &&
- buffer_check_alloc(&c->input, CHAN_RBUF))
- FD_SET(c->rfd, readset);
- if (c->ostate == CHAN_OUTPUT_OPEN ||
- c->ostate == CHAN_OUTPUT_WAIT_DRAIN) {
- if (buffer_len(&c->output) > 0) {
- FD_SET(c->wfd, writeset);
- } else if (c->ostate == CHAN_OUTPUT_WAIT_DRAIN) {
- if (CHANNEL_EFD_OUTPUT_ACTIVE(c))
- debug2("channel %d: obuf_empty delayed efd %d/(%d)",
- c->self, c->efd, buffer_len(&c->extended));
- else
- chan_obuf_empty(c);
- }
- }
- /** XXX check close conditions, too */
- if (compat20 && c->efd != -1) {
- if (c->extended_usage == CHAN_EXTENDED_WRITE &&
- buffer_len(&c->extended) > 0)
- FD_SET(c->efd, writeset);
- else if (!(c->flags & CHAN_EOF_SENT) &&
- c->extended_usage == CHAN_EXTENDED_READ &&
- buffer_len(&c->extended) < c->remote_window)
- FD_SET(c->efd, readset);
- }
-}
-
-static void
-channel_pre_input_draining(Channel *c, fd_set * readset, fd_set * writeset)
-{
- if (buffer_len(&c->input) == 0) {
- packet_start(SSH_MSG_CHANNEL_CLOSE);
- packet_put_int(c->remote_id);
- packet_send();
- c->type = SSH_CHANNEL_CLOSED;
- debug("channel %d: closing after input drain.", c->self);
- }
-}
-
-static void
-channel_pre_output_draining(Channel *c, fd_set * readset, fd_set * writeset)
-{
- if (buffer_len(&c->output) == 0)
- chan_mark_dead(c);
- else
- FD_SET(c->sock, writeset);
-}
-
-/*
- * This is a special state for X11 authentication spoofing. An opened X11
- * connection (when authentication spoofing is being done) remains in this
- * state until the first packet has been completely read. The authentication
- * data in that packet is then substituted by the real data if it matches the
- * fake data, and the channel is put into normal mode.
- * XXX All this happens at the client side.
- * Returns: 0 = need more data, -1 = wrong cookie, 1 = ok
- */
-static int
-x11_open_helper(Buffer *b)
-{
- u_char *ucp;
- u_int proto_len, data_len;
-
- /* Check if the fixed size part of the packet is in buffer. */
- if (buffer_len(b) < 12)
- return 0;
-
- /* Parse the lengths of variable-length fields. */
- ucp = buffer_ptr(b);
- if (ucp[0] == 0x42) { /* Byte order MSB first. */
- proto_len = 256 * ucp[6] + ucp[7];
- data_len = 256 * ucp[8] + ucp[9];
- } else if (ucp[0] == 0x6c) { /* Byte order LSB first. */
- proto_len = ucp[6] + 256 * ucp[7];
- data_len = ucp[8] + 256 * ucp[9];
- } else {
- debug("Initial X11 packet contains bad byte order byte: 0x%x",
- ucp[0]);
- return -1;
- }
-
- /* Check if the whole packet is in buffer. */
- if (buffer_len(b) <
- 12 + ((proto_len + 3) & ~3) + ((data_len + 3) & ~3))
- return 0;
-
- /* Check if authentication protocol matches. */
- if (proto_len != strlen(x11_saved_proto) ||
- memcmp(ucp + 12, x11_saved_proto, proto_len) != 0) {
- debug("X11 connection uses different authentication protocol.");
- return -1;
- }
- /* Check if authentication data matches our fake data. */
- if (data_len != x11_fake_data_len ||
- memcmp(ucp + 12 + ((proto_len + 3) & ~3),
- x11_fake_data, x11_fake_data_len) != 0) {
- debug("X11 auth data does not match fake data.");
- return -1;
- }
- /* Check fake data length */
- if (x11_fake_data_len != x11_saved_data_len) {
- error("X11 fake_data_len %d != saved_data_len %d",
- x11_fake_data_len, x11_saved_data_len);
- return -1;
- }
- /*
- * Received authentication protocol and data match
- * our fake data. Substitute the fake data with real
- * data.
- */
- memcpy(ucp + 12 + ((proto_len + 3) & ~3),
- x11_saved_data, x11_saved_data_len);
- return 1;
-}
-
-static void
-channel_pre_x11_open_13(Channel *c, fd_set * readset, fd_set * writeset)
-{
- int ret = x11_open_helper(&c->output);
-
- if (ret == 1) {
- /* Start normal processing for the channel. */
- c->type = SSH_CHANNEL_OPEN;
- channel_pre_open_13(c, readset, writeset);
- } else if (ret == -1) {
- /*
- * We have received an X11 connection that has bad
- * authentication information.
- */
- log("X11 connection rejected because of wrong authentication.");
- buffer_clear(&c->input);
- buffer_clear(&c->output);
- channel_close_fd(&c->sock);
- c->sock = -1;
- c->type = SSH_CHANNEL_CLOSED;
- packet_start(SSH_MSG_CHANNEL_CLOSE);
- packet_put_int(c->remote_id);
- packet_send();
- }
-}
-
-static void
-channel_pre_x11_open(Channel *c, fd_set * readset, fd_set * writeset)
-{
- int ret = x11_open_helper(&c->output);
-
- /* c->force_drain = 1; */
-
- if (ret == 1) {
- c->type = SSH_CHANNEL_OPEN;
- channel_pre_open(c, readset, writeset);
- } else if (ret == -1) {
- log("X11 connection rejected because of wrong authentication.");
- debug("X11 rejected %d i%d/o%d", c->self, c->istate, c->ostate);
- chan_read_failed(c);
- buffer_clear(&c->input);
- chan_ibuf_empty(c);
- buffer_clear(&c->output);
- /* for proto v1, the peer will send an IEOF */
- if (compat20)
- chan_write_failed(c);
- else
- c->type = SSH_CHANNEL_OPEN;
- debug("X11 closed %d i%d/o%d", c->self, c->istate, c->ostate);
- }
-}
-
-/* try to decode a socks4 header */
-static int
-channel_decode_socks4(Channel *c, fd_set * readset, fd_set * writeset)
-{
- char *p, *host;
- int len, have, i, found;
- char username[256];
- struct {
- u_int8_t version;
- u_int8_t command;
- u_int16_t dest_port;
- struct in_addr dest_addr;
- } s4_req, s4_rsp;
-
- debug2("channel %d: decode socks4", c->self);
-
- have = buffer_len(&c->input);
- len = sizeof(s4_req);
- if (have < len)
- return 0;
- p = buffer_ptr(&c->input);
- for (found = 0, i = len; i < have; i++) {
- if (p[i] == '\0') {
- found = 1;
- break;
- }
- if (i > 1024) {
- /* the peer is probably sending garbage */
- debug("channel %d: decode socks4: too long",
- c->self);
- return -1;
- }
- }
- if (!found)
- return 0;
- buffer_get(&c->input, (char *)&s4_req.version, 1);
- buffer_get(&c->input, (char *)&s4_req.command, 1);
- buffer_get(&c->input, (char *)&s4_req.dest_port, 2);
- buffer_get(&c->input, (char *)&s4_req.dest_addr, 4);
- have = buffer_len(&c->input);
- p = buffer_ptr(&c->input);
- len = strlen(p);
- debug2("channel %d: decode socks4: user %s/%d", c->self, p, len);
- if (len > have)
- fatal("channel %d: decode socks4: len %d > have %d",
- c->self, len, have);
- strlcpy(username, p, sizeof(username));
- buffer_consume(&c->input, len);
- buffer_consume(&c->input, 1); /* trailing '\0' */
-
- host = inet_ntoa(s4_req.dest_addr);
- strlcpy(c->path, host, sizeof(c->path));
- c->host_port = ntohs(s4_req.dest_port);
-
- debug("channel %d: dynamic request: socks4 host %s port %u command %u",
- c->self, host, c->host_port, s4_req.command);
-
- if (s4_req.command != 1) {
- debug("channel %d: cannot handle: socks4 cn %d",
- c->self, s4_req.command);
- return -1;
- }
- s4_rsp.version = 0; /* vn: 0 for reply */
- s4_rsp.command = 90; /* cd: req granted */
- s4_rsp.dest_port = 0; /* ignored */
- s4_rsp.dest_addr.s_addr = INADDR_ANY; /* ignored */
- buffer_append(&c->output, (char *)&s4_rsp, sizeof(s4_rsp));
- return 1;
-}
-
-/* try to decode a socks5 header */
-#define SSH_SOCKS5_AUTHDONE 0x1000
-#define SSH_SOCKS5_NOAUTH 0x00
-#define SSH_SOCKS5_IPV4 0x01
-#define SSH_SOCKS5_DOMAIN 0x03
-#define SSH_SOCKS5_IPV6 0x04
-#define SSH_SOCKS5_CONNECT 0x01
-#define SSH_SOCKS5_SUCCESS 0x00
-
-/* ARGSUSED */
-static int
-channel_decode_socks5(Channel *c, fd_set *readset, fd_set *writeset)
-{
- struct {
- u_int8_t version;
- u_int8_t command;
- u_int8_t reserved;
- u_int8_t atyp;
- } s5_req, s5_rsp;
- u_int16_t dest_port;
- u_char *p, dest_addr[255+1];
- u_int have, need, i, found, nmethods, addrlen;
- struct in_addr bnd_addr;
- int af;
-
- debug2("channel %d: decode socks5", c->self);
- p = buffer_ptr(&c->input);
- if (p[0] != 0x05)
- return -1;
- have = buffer_len(&c->input);
- if (!(c->flags & SSH_SOCKS5_AUTHDONE)) {
- /* format: ver | nmethods | methods */
- if (have < 2)
- return 0;
- nmethods = p[1];
- if (have < nmethods + 2)
- return 0;
- /* look for method: "NO AUTHENTICATION REQUIRED" */
- for (found = 0, i = 2 ; i < nmethods + 2; i++) {
- if (p[i] == SSH_SOCKS5_NOAUTH) {
- found = 1;
- break;
- }
- }
- if (!found) {
- error("channel %d: socks5 authentication methods not implemented",
- c->self);
- error("channel %d: forwarding failed: "
- "SSH_SOCKS5_NOAUTH method not found", c->self);
- return -1;
- }
- buffer_consume(&c->input, nmethods + 2);
- buffer_put_char(&c->output, 0x05); /* version */
- buffer_put_char(&c->output, SSH_SOCKS5_NOAUTH); /* method */
- FD_SET(c->sock, writeset);
- c->flags |= SSH_SOCKS5_AUTHDONE;
- debug2("channel %d: socks5 auth done", c->self);
- return 0; /* need more */
- }
- debug2("channel %d: socks5 post auth", c->self);
- if (have < sizeof(s5_req)+1)
- return 0; /* need more */
- memcpy(&s5_req, p, sizeof(s5_req));
- if (s5_req.version != 0x05 ||
- s5_req.command != SSH_SOCKS5_CONNECT ||
- s5_req.reserved != 0x00) {
- error("channel %d: forwarding failed: "
- "only socks5 connect is supported", c->self);
- return -1;
- }
- switch (s5_req.atyp){
- case SSH_SOCKS5_IPV4:
- addrlen = 4;
- af = AF_INET;
- break;
- case SSH_SOCKS5_DOMAIN:
- addrlen = p[sizeof(s5_req)];
- af = -1;
- break;
- case SSH_SOCKS5_IPV6:
- addrlen = 16;
- af = AF_INET6;
- break;
- default:
- error("channel %d: forwarding failed: "
- "bad socks5 atyp %d", c->self, s5_req.atyp);
- return -1;
- }
- need = sizeof(s5_req) + addrlen + 2;
- if (s5_req.atyp == SSH_SOCKS5_DOMAIN)
- need++;
- if (have < need)
- return 0;
- buffer_consume(&c->input, sizeof(s5_req));
- if (s5_req.atyp == SSH_SOCKS5_DOMAIN)
- buffer_consume(&c->input, 1); /* host string length */
- buffer_get(&c->input, (char *)&dest_addr, addrlen);
- buffer_get(&c->input, (char *)&dest_port, 2);
- dest_addr[addrlen] = '\0';
- if (s5_req.atyp == SSH_SOCKS5_DOMAIN)
- strlcpy(c->path, (char *)dest_addr, sizeof(c->path));
- else if (inet_ntop(af, dest_addr, c->path, sizeof(c->path)) == NULL)
- return -1;
- c->host_port = ntohs(dest_port);
-
- debug2("channel %d: dynamic request: socks5 host %s port %u command %u",
- c->self, c->path, c->host_port, s5_req.command);
-
- s5_rsp.version = 0x05;
- s5_rsp.command = SSH_SOCKS5_SUCCESS;
- s5_rsp.reserved = 0; /* ignored */
- s5_rsp.atyp = SSH_SOCKS5_IPV4;
- bzero(&bnd_addr, sizeof(bnd_addr));
- bnd_addr.s_addr = htonl(INADDR_ANY);
- dest_port = 0; /* ignored */
-
- buffer_append(&c->output, &s5_rsp, sizeof(s5_rsp));
- buffer_append(&c->output, &bnd_addr, sizeof(struct in_addr));
- buffer_append(&c->output, &dest_port, sizeof(dest_port));
- return 1;
-}
-
-/* dynamic port forwarding */
-static void
-channel_pre_dynamic(Channel *c, fd_set * readset, fd_set * writeset)
-{
- u_char *p;
- int have, ret;
-
- have = buffer_len(&c->input);
- debug2("channel %d: pre_dynamic: have %d", c->self, have);
- /* buffer_dump(&c->input); */
- /* check if the fixed size part of the packet is in buffer. */
- if (have < 3) {
- /* need more */
- FD_SET(c->sock, readset);
- return;
- }
- /* try to guess the protocol */
- p = buffer_ptr(&c->input);
- switch (p[0]) {
- case 0x04:
- ret = channel_decode_socks4(c, readset, writeset);
- break;
- case 0x05:
- ret = channel_decode_socks5(c, readset, writeset);
- break;
- default:
- error("channel %d: forwarding failed: unknown socks "
- "version 0x%02X", c->self, p[0]);
- ret = -1;
- break;
- }
- if (ret < 0) {
- chan_mark_dead(c);
- } else if (ret == 0) {
- debug2("channel %d: pre_dynamic: need more", c->self);
- /* need more */
- FD_SET(c->sock, readset);
- } else {
- /* switch to the next state */
- c->type = SSH_CHANNEL_OPENING;
- port_open_helper(c, "direct-tcpip");
- }
-}
-
-/* This is our fake X11 server socket. */
-static void
-channel_post_x11_listener(Channel *c, fd_set * readset, fd_set * writeset)
-{
- Channel *nc;
- struct sockaddr addr;
- int newsock;
- socklen_t addrlen;
- char buf[16384], *remote_ipaddr;
- int remote_port;
-
- if (FD_ISSET(c->sock, readset)) {
- debug("X11 connection requested.");
- addrlen = sizeof(addr);
- newsock = accept(c->sock, &addr, &addrlen);
- if (c->single_connection) {
- debug("single_connection: closing X11 listener.");
- channel_close_fd(&c->sock);
- chan_mark_dead(c);
- }
- if (newsock < 0) {
- error("accept: %.100s", strerror(errno));
- return;
- }
- set_nodelay(newsock);
- remote_ipaddr = get_peer_ipaddr(newsock);
- remote_port = get_peer_port(newsock);
- snprintf(buf, sizeof buf, "X11 connection from %.200s port %d",
- remote_ipaddr, remote_port);
-
- nc = channel_new("accepted x11 socket",
- SSH_CHANNEL_OPENING, newsock, newsock, -1,
- c->local_window_max, c->local_maxpacket,
- 0, xstrdup(buf), 1);
- if (compat20) {
- packet_start(SSH2_MSG_CHANNEL_OPEN);
- packet_put_cstring("x11");
- packet_put_int(nc->self);
- packet_put_int(nc->local_window_max);
- packet_put_int(nc->local_maxpacket);
- /* originator ipaddr and port */
- packet_put_cstring(remote_ipaddr);
- if (datafellows & SSH_BUG_X11FWD) {
- debug("ssh2 x11 bug compat mode");
- } else {
- packet_put_int(remote_port);
- }
- packet_send();
- } else {
- packet_start(SSH_SMSG_X11_OPEN);
- packet_put_int(nc->self);
- if (packet_get_protocol_flags() &
- SSH_PROTOFLAG_HOST_IN_FWD_OPEN)
- packet_put_cstring(buf);
- packet_send();
- }
- xfree(remote_ipaddr);
- }
-}
-
-static void
-port_open_helper(Channel *c, char *rtype)
-{
- int direct;
- char buf[1024];
- char *remote_ipaddr = get_peer_ipaddr(c->sock);
- u_short remote_port = get_peer_port(c->sock);
-
- direct = (strcmp(rtype, "direct-tcpip") == 0);
-
- snprintf(buf, sizeof buf,
- "%s: listening port %d for %.100s port %d, "
- "connect from %.200s port %d",
- rtype, c->listening_port, c->path, c->host_port,
- remote_ipaddr, remote_port);
-
- xfree(c->remote_name);
- c->remote_name = xstrdup(buf);
-
- if (compat20) {
- packet_start(SSH2_MSG_CHANNEL_OPEN);
- packet_put_cstring(rtype);
- packet_put_int(c->self);
- packet_put_int(c->local_window_max);
- packet_put_int(c->local_maxpacket);
- if (direct) {
- /* target host, port */
- packet_put_cstring(c->path);
- packet_put_int(c->host_port);
- } else {
- /* listen address, port */
- packet_put_cstring(c->path);
- packet_put_int(c->listening_port);
- }
- /* originator host and port */
- packet_put_cstring(remote_ipaddr);
- packet_put_int(remote_port);
- packet_send();
- } else {
- packet_start(SSH_MSG_PORT_OPEN);
- packet_put_int(c->self);
- packet_put_cstring(c->path);
- packet_put_int(c->host_port);
- if (packet_get_protocol_flags() &
- SSH_PROTOFLAG_HOST_IN_FWD_OPEN)
- packet_put_cstring(c->remote_name);
- packet_send();
- }
- xfree(remote_ipaddr);
-}
-
-/*
- * This socket is listening for connections to a forwarded TCP/IP port.
- */
-static void
-channel_post_port_listener(Channel *c, fd_set * readset, fd_set * writeset)
-{
- Channel *nc;
- struct sockaddr addr;
- int newsock, nextstate;
- socklen_t addrlen;
- char *rtype;
-
- if (FD_ISSET(c->sock, readset)) {
- debug("Connection to port %d forwarding "
- "to %.100s port %d requested.",
- c->listening_port, c->path, c->host_port);
-
- if (c->type == SSH_CHANNEL_RPORT_LISTENER) {
- nextstate = SSH_CHANNEL_OPENING;
- rtype = "forwarded-tcpip";
- } else {
- if (c->host_port == 0) {
- nextstate = SSH_CHANNEL_DYNAMIC;
- rtype = "dynamic-tcpip";
- } else {
- nextstate = SSH_CHANNEL_OPENING;
- rtype = "direct-tcpip";
- }
- }
-
- addrlen = sizeof(addr);
- newsock = accept(c->sock, &addr, &addrlen);
- if (newsock < 0) {
- error("accept: %.100s", strerror(errno));
- return;
- }
- set_nodelay(newsock);
- nc = channel_new(rtype,
- nextstate, newsock, newsock, -1,
- c->local_window_max, c->local_maxpacket,
- 0, xstrdup(rtype), 1);
- nc->listening_port = c->listening_port;
- nc->host_port = c->host_port;
- strlcpy(nc->path, c->path, sizeof(nc->path));
-
- if (nextstate != SSH_CHANNEL_DYNAMIC)
- port_open_helper(nc, rtype);
- }
-}
-
-/*
- * This is the authentication agent socket listening for connections from
- * clients.
- */
-static void
-channel_post_auth_listener(Channel *c, fd_set * readset, fd_set * writeset)
-{
- Channel *nc;
- char *name;
- int newsock;
- struct sockaddr addr;
- socklen_t addrlen;
-
- if (FD_ISSET(c->sock, readset)) {
- addrlen = sizeof(addr);
- newsock = accept(c->sock, &addr, &addrlen);
- if (newsock < 0) {
- error("accept from auth socket: %.100s", strerror(errno));
- return;
- }
- name = xstrdup("accepted auth socket");
- nc = channel_new("accepted auth socket",
- SSH_CHANNEL_OPENING, newsock, newsock, -1,
- c->local_window_max, c->local_maxpacket,
- 0, name, 1);
- if (compat20) {
- packet_start(SSH2_MSG_CHANNEL_OPEN);
- packet_put_cstring("auth-agent@openssh.com");
- packet_put_int(nc->self);
- packet_put_int(c->local_window_max);
- packet_put_int(c->local_maxpacket);
- } else {
- packet_start(SSH_SMSG_AGENT_OPEN);
- packet_put_int(nc->self);
- }
- packet_send();
- }
-}
-
-static void
-channel_post_connecting(Channel *c, fd_set * readset, fd_set * writeset)
-{
- int err = 0;
- socklen_t sz = sizeof(err);
-
- if (FD_ISSET(c->sock, writeset)) {
- if (getsockopt(c->sock, SOL_SOCKET, SO_ERROR, &err, &sz) < 0) {
- err = errno;
- error("getsockopt SO_ERROR failed");
- }
- if (err == 0) {
- debug("channel %d: connected", c->self);
- c->type = SSH_CHANNEL_OPEN;
- if (compat20) {
- packet_start(SSH2_MSG_CHANNEL_OPEN_CONFIRMATION);
- packet_put_int(c->remote_id);
- packet_put_int(c->self);
- packet_put_int(c->local_window);
- packet_put_int(c->local_maxpacket);
- } else {
- packet_start(SSH_MSG_CHANNEL_OPEN_CONFIRMATION);
- packet_put_int(c->remote_id);
- packet_put_int(c->self);
- }
- } else {
- debug("channel %d: not connected: %s",
- c->self, strerror(err));
- if (compat20) {
- packet_start(SSH2_MSG_CHANNEL_OPEN_FAILURE);
- packet_put_int(c->remote_id);
- packet_put_int(SSH2_OPEN_CONNECT_FAILED);
- if (!(datafellows & SSH_BUG_OPENFAILURE)) {
- packet_put_utf8_cstring(strerror(err));
- packet_put_cstring("");
- }
- } else {
- packet_start(SSH_MSG_CHANNEL_OPEN_FAILURE);
- packet_put_int(c->remote_id);
- }
- chan_mark_dead(c);
- }
- packet_send();
- }
-}
-
-static int
-channel_handle_rfd(Channel *c, fd_set * readset, fd_set * writeset)
-{
- char buf[CHAN_RBUF];
- int len;
-
- if (c->rfd != -1 &&
- FD_ISSET(c->rfd, readset)) {
- len = read(c->rfd, buf, sizeof(buf));
- if (len < 0 && (errno == EINTR || errno == EAGAIN))
- return 1;
- if (len <= 0) {
- debug("channel %d: read<=0 rfd %d len %d",
- c->self, c->rfd, len);
- if (c->type != SSH_CHANNEL_OPEN) {
- debug("channel %d: not open", c->self);
- chan_mark_dead(c);
- return -1;
- } else if (compat13) {
- buffer_clear(&c->output);
- c->type = SSH_CHANNEL_INPUT_DRAINING;
- debug("channel %d: input draining.", c->self);
- } else {
- chan_read_failed(c);
- }
- return -1;
- }
- if (c->input_filter != NULL) {
- if (c->input_filter(c, buf, len) == -1) {
- debug("channel %d: filter stops", c->self);
- chan_read_failed(c);
- }
- } else {
- buffer_append(&c->input, buf, len);
- }
- }
- return 1;
-}
-static int
-channel_handle_wfd(Channel *c, fd_set * readset, fd_set * writeset)
-{
- struct termios tio;
- u_char *data;
- u_int dlen;
- int len;
-
- /* Send buffered output data to the socket. */
- if (c->wfd != -1 &&
- FD_ISSET(c->wfd, writeset) &&
- buffer_len(&c->output) > 0) {
- data = buffer_ptr(&c->output);
- dlen = buffer_len(&c->output);
-#ifdef _AIX
- /* XXX: Later AIX versions can't push as much data to tty */
- if (compat20 && c->wfd_isatty && dlen > 8*1024)
- dlen = 8*1024;
-#endif
- len = write(c->wfd, data, dlen);
- if (len < 0 && (errno == EINTR || errno == EAGAIN))
- return 1;
- if (len <= 0) {
- if (c->type != SSH_CHANNEL_OPEN) {
- debug("channel %d: not open", c->self);
- chan_mark_dead(c);
- return -1;
- } else if (compat13) {
- buffer_clear(&c->output);
- debug("channel %d: input draining.", c->self);
- c->type = SSH_CHANNEL_INPUT_DRAINING;
- } else {
- chan_write_failed(c);
- }
- return -1;
- }
- if (compat20 && c->isatty && dlen >= 1 && data[0] != '\r') {
- if (tcgetattr(c->wfd, &tio) == 0 &&
- !(tio.c_lflag & ECHO) && (tio.c_lflag & ICANON)) {
- /*
- * Simulate echo to reduce the impact of
- * traffic analysis. We need to match the
- * size of a SSH2_MSG_CHANNEL_DATA message
- * (4 byte channel id + data)
- */
- packet_send_ignore(4 + len);
- packet_send();
- }
- }
- buffer_consume(&c->output, len);
- if (compat20 && len > 0) {
- c->local_consumed += len;
- }
- }
- return 1;
-}
-static int
-channel_handle_efd(Channel *c, fd_set * readset, fd_set * writeset)
-{
- char buf[CHAN_RBUF];
- int len;
-
-/** XXX handle drain efd, too */
- if (c->efd != -1) {
- if (c->extended_usage == CHAN_EXTENDED_WRITE &&
- FD_ISSET(c->efd, writeset) &&
- buffer_len(&c->extended) > 0) {
- len = write(c->efd, buffer_ptr(&c->extended),
- buffer_len(&c->extended));
- debug2("channel %d: written %d to efd %d",
- c->self, len, c->efd);
- if (len < 0 && (errno == EINTR || errno == EAGAIN))
- return 1;
- if (len <= 0) {
- debug2("channel %d: closing write-efd %d",
- c->self, c->efd);
- channel_close_fd(&c->efd);
- } else {
- buffer_consume(&c->extended, len);
- c->local_consumed += len;
- }
- } else if (c->extended_usage == CHAN_EXTENDED_READ &&
- FD_ISSET(c->efd, readset)) {
- len = read(c->efd, buf, sizeof(buf));
- debug2("channel %d: read %d from efd %d",
- c->self, len, c->efd);
- if (len < 0 && (errno == EINTR || errno == EAGAIN))
- return 1;
- if (len <= 0) {
- debug2("channel %d: closing read-efd %d",
- c->self, c->efd);
- channel_close_fd(&c->efd);
- } else {
- buffer_append(&c->extended, buf, len);
- }
- }
- }
- return 1;
-}
-static int
-channel_check_window(Channel *c)
-{
- if (c->type == SSH_CHANNEL_OPEN &&
- !(c->flags & (CHAN_CLOSE_SENT|CHAN_CLOSE_RCVD)) &&
- c->local_window < c->local_window_max/2 &&
- c->local_consumed > 0) {
- packet_start(SSH2_MSG_CHANNEL_WINDOW_ADJUST);
- packet_put_int(c->remote_id);
- packet_put_int(c->local_consumed);
- packet_send();
- debug2("channel %d: window %d sent adjust %d",
- c->self, c->local_window,
- c->local_consumed);
- c->local_window += c->local_consumed;
- c->local_consumed = 0;
- }
- return 1;
-}
-
-static void
-channel_post_open(Channel *c, fd_set * readset, fd_set * writeset)
-{
- channel_handle_rfd(c, readset, writeset);
- channel_handle_wfd(c, readset, writeset);
- if (!compat20)
- return;
- channel_handle_efd(c, readset, writeset);
- channel_check_window(c);
-}
-
-static void
-channel_post_output_drain_13(Channel *c, fd_set * readset, fd_set * writeset)
-{
- int len;
-
- /* Send buffered output data to the socket. */
- if (FD_ISSET(c->sock, writeset) && buffer_len(&c->output) > 0) {
- len = write(c->sock, buffer_ptr(&c->output),
- buffer_len(&c->output));
- if (len <= 0)
- buffer_clear(&c->output);
- else
- buffer_consume(&c->output, len);
- }
-}
-
-static void
-channel_handler_init_20(void)
-{
- channel_pre[SSH_CHANNEL_OPEN] = &channel_pre_open;
- channel_pre[SSH_CHANNEL_X11_OPEN] = &channel_pre_x11_open;
- channel_pre[SSH_CHANNEL_PORT_LISTENER] = &channel_pre_listener;
- channel_pre[SSH_CHANNEL_RPORT_LISTENER] = &channel_pre_listener;
- channel_pre[SSH_CHANNEL_X11_LISTENER] = &channel_pre_listener;
- channel_pre[SSH_CHANNEL_AUTH_SOCKET] = &channel_pre_listener;
- channel_pre[SSH_CHANNEL_CONNECTING] = &channel_pre_connecting;
- channel_pre[SSH_CHANNEL_DYNAMIC] = &channel_pre_dynamic;
-
- channel_post[SSH_CHANNEL_OPEN] = &channel_post_open;
- channel_post[SSH_CHANNEL_PORT_LISTENER] = &channel_post_port_listener;
- channel_post[SSH_CHANNEL_RPORT_LISTENER] = &channel_post_port_listener;
- channel_post[SSH_CHANNEL_X11_LISTENER] = &channel_post_x11_listener;
- channel_post[SSH_CHANNEL_AUTH_SOCKET] = &channel_post_auth_listener;
- channel_post[SSH_CHANNEL_CONNECTING] = &channel_post_connecting;
- channel_post[SSH_CHANNEL_DYNAMIC] = &channel_post_open;
-}
-
-static void
-channel_handler_init_13(void)
-{
- channel_pre[SSH_CHANNEL_OPEN] = &channel_pre_open_13;
- channel_pre[SSH_CHANNEL_X11_OPEN] = &channel_pre_x11_open_13;
- channel_pre[SSH_CHANNEL_X11_LISTENER] = &channel_pre_listener;
- channel_pre[SSH_CHANNEL_PORT_LISTENER] = &channel_pre_listener;
- channel_pre[SSH_CHANNEL_AUTH_SOCKET] = &channel_pre_listener;
- channel_pre[SSH_CHANNEL_INPUT_DRAINING] = &channel_pre_input_draining;
- channel_pre[SSH_CHANNEL_OUTPUT_DRAINING] = &channel_pre_output_draining;
- channel_pre[SSH_CHANNEL_CONNECTING] = &channel_pre_connecting;
- channel_pre[SSH_CHANNEL_DYNAMIC] = &channel_pre_dynamic;
-
- channel_post[SSH_CHANNEL_OPEN] = &channel_post_open;
- channel_post[SSH_CHANNEL_X11_LISTENER] = &channel_post_x11_listener;
- channel_post[SSH_CHANNEL_PORT_LISTENER] = &channel_post_port_listener;
- channel_post[SSH_CHANNEL_AUTH_SOCKET] = &channel_post_auth_listener;
- channel_post[SSH_CHANNEL_OUTPUT_DRAINING] = &channel_post_output_drain_13;
- channel_post[SSH_CHANNEL_CONNECTING] = &channel_post_connecting;
- channel_post[SSH_CHANNEL_DYNAMIC] = &channel_post_open;
-}
-
-static void
-channel_handler_init_15(void)
-{
- channel_pre[SSH_CHANNEL_OPEN] = &channel_pre_open;
- channel_pre[SSH_CHANNEL_X11_OPEN] = &channel_pre_x11_open;
- channel_pre[SSH_CHANNEL_X11_LISTENER] = &channel_pre_listener;
- channel_pre[SSH_CHANNEL_PORT_LISTENER] = &channel_pre_listener;
- channel_pre[SSH_CHANNEL_AUTH_SOCKET] = &channel_pre_listener;
- channel_pre[SSH_CHANNEL_CONNECTING] = &channel_pre_connecting;
- channel_pre[SSH_CHANNEL_DYNAMIC] = &channel_pre_dynamic;
-
- channel_post[SSH_CHANNEL_X11_LISTENER] = &channel_post_x11_listener;
- channel_post[SSH_CHANNEL_PORT_LISTENER] = &channel_post_port_listener;
- channel_post[SSH_CHANNEL_AUTH_SOCKET] = &channel_post_auth_listener;
- channel_post[SSH_CHANNEL_OPEN] = &channel_post_open;
- channel_post[SSH_CHANNEL_CONNECTING] = &channel_post_connecting;
- channel_post[SSH_CHANNEL_DYNAMIC] = &channel_post_open;
-}
-
-static void
-channel_handler_init(void)
-{
- int i;
-
- for (i = 0; i < SSH_CHANNEL_MAX_TYPE; i++) {
- channel_pre[i] = NULL;
- channel_post[i] = NULL;
- }
- if (compat20)
- channel_handler_init_20();
- else if (compat13)
- channel_handler_init_13();
- else
- channel_handler_init_15();
-}
-
-/* gc dead channels */
-static void
-channel_garbage_collect(Channel *c)
-{
- if (c == NULL)
- return;
- if (c->detach_user != NULL) {
- if (!chan_is_dead(c, 0))
- return;
- debug("channel %d: gc: notify user", c->self);
- c->detach_user(c->self, NULL);
- /* if we still have a callback */
- if (c->detach_user != NULL)
- return;
- debug("channel %d: gc: user detached", c->self);
- }
- if (!c->wait_for_exit && !chan_is_dead(c, 1))
- return;
- debug("channel %d: garbage collecting", c->self);
- channel_free(c);
-}
-
-static void
-channel_handler(chan_fn *ftab[], fd_set * readset, fd_set * writeset)
-{
- static int did_init = 0;
- int i, oalloc;
- Channel *c;
-
- if (!did_init) {
- channel_handler_init();
- did_init = 1;
- }
- for (i = 0, oalloc = channels_alloc; i < oalloc; i++) {
- c = channels[i];
- if (c == NULL)
- continue;
- if (c->delayed) {
- if (ftab == channel_pre)
- c->delayed = 0;
- else
- continue;
- }
- if (ftab[c->type] != NULL)
- (*ftab[c->type])(c, readset, writeset);
- channel_garbage_collect(c);
- }
-}
-
-/*
- * Allocate/update select bitmasks and add any bits relevant to channels in
- * select bitmasks.
- */
-void
-channel_prepare_select(fd_set **readsetp, fd_set **writesetp, int *maxfdp,
- int *nallocp, int rekeying)
-{
- int n;
- u_int sz;
-
- n = MAX(*maxfdp, channel_max_fd);
-
- sz = howmany(n+1, NFDBITS) * sizeof(fd_mask);
- /* perhaps check sz < nalloc/2 and shrink? */
- if (*readsetp == NULL || sz > *nallocp) {
- *readsetp = xrealloc(*readsetp, sz);
- *writesetp = xrealloc(*writesetp, sz);
- *nallocp = sz;
- }
- *maxfdp = n;
- memset(*readsetp, 0, sz);
- memset(*writesetp, 0, sz);
-
- if (!rekeying)
- channel_handler(channel_pre, *readsetp, *writesetp);
-}
-
-/*
- * After select, perform any appropriate operations for channels which have
- * events pending.
- */
-void
-channel_after_select(fd_set * readset, fd_set * writeset)
-{
- channel_handler(channel_post, readset, writeset);
-}
-
-
-/* If there is data to send to the connection, enqueue some of it now. */
-
-void
-channel_output_poll(void)
-{
- Channel *c;
- int i;
- u_int len;
-
- for (i = 0; i < channels_alloc; i++) {
- c = channels[i];
- if (c == NULL)
- continue;
-
- /*
- * We are only interested in channels that can have buffered
- * incoming data.
- */
- if (compat13) {
- if (c->type != SSH_CHANNEL_OPEN &&
- c->type != SSH_CHANNEL_INPUT_DRAINING)
- continue;
- } else {
- if (c->type != SSH_CHANNEL_OPEN)
- continue;
- }
- if (compat20 &&
- (c->flags & (CHAN_CLOSE_SENT|CHAN_CLOSE_RCVD))) {
- /* XXX is this true? */
- debug3("channel %d: will not send data after close", c->self);
- continue;
- }
-
- /* Get the amount of buffered data for this channel. */
- if ((c->istate == CHAN_INPUT_OPEN ||
- c->istate == CHAN_INPUT_WAIT_DRAIN) &&
- (len = buffer_len(&c->input)) > 0) {
- /*
- * Send some data for the other side over the secure
- * connection.
- */
- if (compat20) {
- if (len > c->remote_window)
- len = c->remote_window;
- if (len > c->remote_maxpacket)
- len = c->remote_maxpacket;
- } else {
- if (packet_is_interactive()) {
- if (len > 1024)
- len = 512;
- } else {
- /* Keep the packets at reasonable size. */
- if (len > packet_get_maxsize()/2)
- len = packet_get_maxsize()/2;
- }
- }
- if (len > 0) {
- packet_start(compat20 ?
- SSH2_MSG_CHANNEL_DATA : SSH_MSG_CHANNEL_DATA);
- packet_put_int(c->remote_id);
- packet_put_string(buffer_ptr(&c->input), len);
- packet_send();
- buffer_consume(&c->input, len);
- c->remote_window -= len;
- }
- } else if (c->istate == CHAN_INPUT_WAIT_DRAIN) {
- if (compat13)
- fatal("cannot happen: istate == INPUT_WAIT_DRAIN for proto 1.3");
- /*
- * input-buffer is empty and read-socket shutdown:
- * tell peer, that we will not send more data: send IEOF.
- * hack for extended data: delay EOF if EFD still in use.
- */
- if (CHANNEL_EFD_INPUT_ACTIVE(c))
- debug2("channel %d: ibuf_empty delayed efd %d/(%d)",
- c->self, c->efd, buffer_len(&c->extended));
- else
- chan_ibuf_empty(c);
- }
- /* Send extended data, i.e. stderr */
- if (compat20 &&
- !(c->flags & CHAN_EOF_SENT) &&
- c->remote_window > 0 &&
- (len = buffer_len(&c->extended)) > 0 &&
- c->extended_usage == CHAN_EXTENDED_READ) {
- debug2("channel %d: rwin %u elen %u euse %d",
- c->self, c->remote_window, buffer_len(&c->extended),
- c->extended_usage);
- if (len > c->remote_window)
- len = c->remote_window;
- if (len > c->remote_maxpacket)
- len = c->remote_maxpacket;
- packet_start(SSH2_MSG_CHANNEL_EXTENDED_DATA);
- packet_put_int(c->remote_id);
- packet_put_int(SSH2_EXTENDED_DATA_STDERR);
- packet_put_string(buffer_ptr(&c->extended), len);
- packet_send();
- buffer_consume(&c->extended, len);
- c->remote_window -= len;
- debug2("channel %d: sent ext data %d", c->self, len);
- }
- }
-}
-
-
-/* -- protocol input */
-
-void
-channel_input_data(int type, u_int32_t seq, void *ctxt)
-{
- int id;
- char *data;
- u_int data_len;
- Channel *c;
-
- /* Get the channel number and verify it. */
- id = packet_get_int();
- c = channel_lookup(id);
- if (c == NULL)
- packet_disconnect("Received data for nonexistent channel %d.", id);
-
- /* Ignore any data for non-open channels (might happen on close) */
- if (c->type != SSH_CHANNEL_OPEN &&
- c->type != SSH_CHANNEL_X11_OPEN)
- return;
-
- /* same for protocol 1.5 if output end is no longer open */
- if (!compat13 && c->ostate != CHAN_OUTPUT_OPEN)
- return;
-
- /* Get the data. */
- data = packet_get_string(&data_len);
-
- if (compat20) {
- if (data_len > c->local_maxpacket) {
- log("channel %d: rcvd big packet %d, maxpack %d",
- c->self, data_len, c->local_maxpacket);
- }
- if (data_len > c->local_window) {
- log("channel %d: rcvd too much data %d, win %d",
- c->self, data_len, c->local_window);
- xfree(data);
- return;
- }
- c->local_window -= data_len;
- }
- packet_check_eom();
- buffer_append(&c->output, data, data_len);
- xfree(data);
-}
-
-void
-channel_input_extended_data(int type, u_int32_t seq, void *ctxt)
-{
- int id;
- char *data;
- u_int data_len, tcode;
- Channel *c;
-
- /* Get the channel number and verify it. */
- id = packet_get_int();
- c = channel_lookup(id);
-
- if (c == NULL)
- packet_disconnect("Received extended_data for bad channel %d.", id);
- if (c->type != SSH_CHANNEL_OPEN) {
- log("channel %d: ext data for non open", id);
- return;
- }
- if (c->flags & CHAN_EOF_RCVD) {
- if (datafellows & SSH_BUG_EXTEOF)
- debug("channel %d: accepting ext data after eof", id);
- else
- packet_disconnect("Received extended_data after EOF "
- "on channel %d.", id);
- }
- tcode = packet_get_int();
- if (c->efd == -1 ||
- c->extended_usage != CHAN_EXTENDED_WRITE ||
- tcode != SSH2_EXTENDED_DATA_STDERR) {
- log("channel %d: bad ext data", c->self);
- return;
- }
- data = packet_get_string(&data_len);
- packet_check_eom();
- if (data_len > c->local_window) {
- log("channel %d: rcvd too much extended_data %d, win %d",
- c->self, data_len, c->local_window);
- xfree(data);
- return;
- }
- debug2("channel %d: rcvd ext data %d", c->self, data_len);
- c->local_window -= data_len;
- buffer_append(&c->extended, data, data_len);
- xfree(data);
-}
-
-void
-channel_input_ieof(int type, u_int32_t seq, void *ctxt)
-{
- int id;
- Channel *c;
-
- id = packet_get_int();
- packet_check_eom();
- c = channel_lookup(id);
- if (c == NULL)
- packet_disconnect("Received ieof for nonexistent channel %d.", id);
- chan_rcvd_ieof(c);
-
- /* XXX force input close */
- if (c->force_drain && c->istate == CHAN_INPUT_OPEN) {
- debug("channel %d: FORCE input drain", c->self);
- c->istate = CHAN_INPUT_WAIT_DRAIN;
- if (buffer_len(&c->input) == 0)
- chan_ibuf_empty(c);
- }
-
-}
-
-void
-channel_input_close(int type, u_int32_t seq, void *ctxt)
-{
- int id;
- Channel *c;
-
- id = packet_get_int();
- packet_check_eom();
- c = channel_lookup(id);
- if (c == NULL)
- packet_disconnect("Received close for nonexistent channel %d.", id);
-
- /*
- * Send a confirmation that we have closed the channel and no more
- * data is coming for it.
- */
- packet_start(SSH_MSG_CHANNEL_CLOSE_CONFIRMATION);
- packet_put_int(c->remote_id);
- packet_send();
-
- /*
- * If the channel is in closed state, we have sent a close request,
- * and the other side will eventually respond with a confirmation.
- * Thus, we cannot free the channel here, because then there would be
- * no-one to receive the confirmation. The channel gets freed when
- * the confirmation arrives.
- */
- if (c->type != SSH_CHANNEL_CLOSED) {
- /*
- * Not a closed channel - mark it as draining, which will
- * cause it to be freed later.
- */
- buffer_clear(&c->input);
- c->type = SSH_CHANNEL_OUTPUT_DRAINING;
- }
-}
-
-/* proto version 1.5 overloads CLOSE_CONFIRMATION with OCLOSE */
-void
-channel_input_oclose(int type, u_int32_t seq, void *ctxt)
-{
- int id = packet_get_int();
- Channel *c = channel_lookup(id);
-
- packet_check_eom();
- if (c == NULL)
- packet_disconnect("Received oclose for nonexistent channel %d.", id);
- chan_rcvd_oclose(c);
-}
-
-void
-channel_input_close_confirmation(int type, u_int32_t seq, void *ctxt)
-{
- int id = packet_get_int();
- Channel *c = channel_lookup(id);
-
- packet_check_eom();
- if (c == NULL)
- packet_disconnect("Received close confirmation for "
- "out-of-range channel %d.", id);
- if (c->type != SSH_CHANNEL_CLOSED)
- packet_disconnect("Received close confirmation for "
- "non-closed channel %d (type %d).", id, c->type);
- channel_free(c);
-}
-
-void
-channel_input_open_confirmation(int type, u_int32_t seq, void *ctxt)
-{
- int id, remote_id;
- Channel *c;
-
- id = packet_get_int();
- c = channel_lookup(id);
-
- if (c==NULL || c->type != SSH_CHANNEL_OPENING)
- packet_disconnect("Received open confirmation for "
- "non-opening channel %d.", id);
- remote_id = packet_get_int();
- /* Record the remote channel number and mark that the channel is now open. */
- c->remote_id = remote_id;
- c->type = SSH_CHANNEL_OPEN;
-
- if (compat20) {
- c->remote_window = packet_get_int();
- c->remote_maxpacket = packet_get_int();
- if (c->confirm) {
- debug2("callback start");
- c->confirm(c->self, NULL);
- debug2("callback done");
- }
- debug("channel %d: open confirm rwindow %u rmax %u", c->self,
- c->remote_window, c->remote_maxpacket);
- }
- packet_check_eom();
-}
-
-static char *
-reason2txt(int reason)
-{
- switch (reason) {
- case SSH2_OPEN_ADMINISTRATIVELY_PROHIBITED:
- return "administratively prohibited";
- case SSH2_OPEN_CONNECT_FAILED:
- return "connect failed";
- case SSH2_OPEN_UNKNOWN_CHANNEL_TYPE:
- return "unknown channel type";
- case SSH2_OPEN_RESOURCE_SHORTAGE:
- return "resource shortage";
- }
- return "unknown reason";
-}
-
-void
-channel_input_open_failure(int type, u_int32_t seq, void *ctxt)
-{
- int id, reason;
- char *msg = NULL, *lang = NULL;
- Channel *c;
-
- id = packet_get_int();
- c = channel_lookup(id);
-
- if (c==NULL || c->type != SSH_CHANNEL_OPENING)
- packet_disconnect("Received open failure for "
- "non-opening channel %d.", id);
- if (compat20) {
- reason = packet_get_int();
- if (!(datafellows & SSH_BUG_OPENFAILURE)) {
- msg = packet_get_string(NULL);
- lang = packet_get_string(NULL);
- }
- log("channel %d: open failed: %s%s%s", id,
- reason2txt(reason), msg ? ": ": "", msg ? msg : "");
- if (msg != NULL)
- xfree(msg);
- if (lang != NULL)
- xfree(lang);
- }
- packet_check_eom();
- /* Free the channel. This will also close the socket. */
- channel_free(c);
-}
-
-void
-channel_input_window_adjust(int type, u_int32_t seq, void *ctxt)
-{
- Channel *c;
- int id;
- u_int adjust;
-
- if (!compat20)
- return;
-
- /* Get the channel number and verify it. */
- id = packet_get_int();
- c = channel_lookup(id);
-
- if (c == NULL || c->type != SSH_CHANNEL_OPEN) {
- log("Received window adjust for "
- "non-open channel %d.", id);
- return;
- }
- adjust = packet_get_int();
- packet_check_eom();
- debug2("channel %d: rcvd adjust %u", id, adjust);
- c->remote_window += adjust;
-}
-
-void
-channel_input_port_open(int type, u_int32_t seq, void *ctxt)
-{
- Channel *c = NULL;
- u_short host_port;
- char *host, *originator_string;
- int remote_id, sock = -1;
-
- remote_id = packet_get_int();
- host = packet_get_string(NULL);
- host_port = packet_get_int();
-
- if (packet_get_protocol_flags() & SSH_PROTOFLAG_HOST_IN_FWD_OPEN) {
- originator_string = packet_get_string(NULL);
- } else {
- originator_string = xstrdup("unknown (remote did not supply name)");
- }
- packet_check_eom();
- sock = channel_connect_to(host, host_port);
- if (sock != -1) {
- c = channel_new("connected socket",
- SSH_CHANNEL_CONNECTING, sock, sock, -1, 0, 0, 0,
- originator_string, 1);
- c->remote_id = remote_id;
- }
- if (c == NULL) {
- packet_start(SSH_MSG_CHANNEL_OPEN_FAILURE);
- packet_put_int(remote_id);
- packet_send();
- }
- xfree(host);
-}
-
-
-/* -- tcp forwarding */
-
-void
-channel_set_af(int af)
-{
- IPv4or6 = af;
-}
-
-static int
-channel_setup_fwd_listener(int type, const char *listen_addr, u_short listen_port,
- const char *host_to_connect, u_short port_to_connect, int gateway_ports)
-{
- Channel *c;
- int sock, r, is_client, on = 1, wildcard = 0, success = 0;
- struct addrinfo hints, *ai, *aitop;
- const char *host, *addr;
- char ntop[NI_MAXHOST], strport[NI_MAXSERV];
-
- host = (type == SSH_CHANNEL_RPORT_LISTENER) ?
- listen_addr : host_to_connect;
- is_client = (type == SSH_CHANNEL_PORT_LISTENER);
-
- if (host == NULL) {
- error("No forward host name.");
- return 0;
- }
- if (strlen(host) > SSH_CHANNEL_PATH_LEN - 1) {
- error("Forward host name too long.");
- return 0;
- }
-
- /*
- * Determine whether or not a port forward listens to loopback,
- * specified address or wildcard. On the client, a specified bind
- * address will always override gateway_ports. On the server, a
- * gateway_ports of 1 (``yes'') will override the client's
- * specification and force a wildcard bind, whereas a value of 2
- * (``clientspecified'') will bind to whatever address the client
- * asked for.
- *
- * Special-case listen_addrs are:
- *
- * "0.0.0.0" -> wildcard v4/v6 if SSH_OLD_FORWARD_ADDR
- * "" (empty string), "*" -> wildcard v4/v6
- * "localhost" -> loopback v4/v6
- */
- addr = NULL;
- if (listen_addr == NULL) {
- /* No address specified: default to gateway_ports setting */
- if (gateway_ports)
- wildcard = 1;
- } else if (gateway_ports || is_client) {
- if (((datafellows & SSH_OLD_FORWARD_ADDR) &&
- strcmp(listen_addr, "0.0.0.0") == 0 && is_client == 0) ||
- *listen_addr == '\0' || strcmp(listen_addr, "*") == 0 ||
- (!is_client && gateway_ports == 1))
- wildcard = 1;
- else if (strcmp(listen_addr, "localhost") != 0)
- addr = listen_addr;
- }
-
- debug3("channel_setup_fwd_listener: type %d wildcard %d addr %s",
- type, wildcard, (addr == NULL) ? "NULL" : addr);
-
- /*
- * getaddrinfo returns a loopback address if the hostname is
- * set to NULL and hints.ai_flags is not AI_PASSIVE
- */
- memset(&hints, 0, sizeof(hints));
- hints.ai_family = IPv4or6;
- hints.ai_flags = wildcard ? AI_PASSIVE : 0;
- hints.ai_socktype = SOCK_STREAM;
- snprintf(strport, sizeof strport, "%d", listen_port);
- if ((r = getaddrinfo(addr, strport, &hints, &aitop)) != 0) {
- if (addr == NULL) {
- /* This really shouldn't happen */
- packet_disconnect("getaddrinfo: fatal error: %s",
- gai_strerror(r));
- } else {
- error("channel_setup_fwd_listener: "
- "getaddrinfo(%.64s): %s", addr, gai_strerror(r));
- }
- return 0;
- }
-
- for (ai = aitop; ai; ai = ai->ai_next) {
- if (ai->ai_family != AF_INET && ai->ai_family != AF_INET6)
- continue;
- if (getnameinfo(ai->ai_addr, ai->ai_addrlen, ntop, sizeof(ntop),
- strport, sizeof(strport), NI_NUMERICHOST|NI_NUMERICSERV) != 0) {
- error("channel_setup_fwd_listener: getnameinfo failed");
- continue;
- }
- /* Create a port to listen for the host. */
- sock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
- if (sock < 0) {
- /* this is no error since kernel may not support ipv6 */
- verbose("socket: %.100s", strerror(errno));
- continue;
- }
- /*
- * Set socket options.
- * Allow local port reuse in TIME_WAIT.
- */
- if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &on,
- sizeof(on)) == -1)
- error("setsockopt SO_REUSEADDR: %s", strerror(errno));
-
- debug("Local forwarding listening on %s port %s.", ntop, strport);
-
- /* Bind the socket to the address. */
- if (bind(sock, ai->ai_addr, ai->ai_addrlen) < 0) {
- /* address can be in use ipv6 address is already bound */
- if (!ai->ai_next)
- error("bind: %.100s", strerror(errno));
- else
- verbose("bind: %.100s", strerror(errno));
-
- close(sock);
- continue;
- }
- /* Start listening for connections on the socket. */
- if (listen(sock, 5) < 0) {
- error("listen: %.100s", strerror(errno));
- close(sock);
- continue;
- }
- /* Allocate a channel number for the socket. */
- c = channel_new("port listener", type, sock, sock, -1,
- CHAN_TCP_WINDOW_DEFAULT, CHAN_TCP_PACKET_DEFAULT,
- 0, xstrdup("port listener"), 1);
- strlcpy(c->path, host, sizeof(c->path));
- c->host_port = port_to_connect;
- c->listening_port = listen_port;
- success = 1;
- }
- if (success == 0)
- error("channel_setup_fwd_listener: cannot listen to port: %d",
- listen_port);
- freeaddrinfo(aitop);
- return success;
-}
-
-int
-channel_cancel_rport_listener(const char *host, u_short port)
-{
- u_int i;
- int found = 0;
-
- for (i = 0; i < channels_alloc; i++) {
- Channel *c = channels[i];
-
- if (c != NULL && c->type == SSH_CHANNEL_RPORT_LISTENER &&
- strncmp(c->path, host, sizeof(c->path)) == 0 &&
- c->listening_port == port) {
- debug2("%s: close channel %d", __func__, i);
- channel_free(c);
- found = 1;
- }
- }
-
- return (found);
-}
-
-/* protocol local port fwd, used by ssh (and sshd in v1) */
-int
-channel_setup_local_fwd_listener(const char *listen_host, u_short listen_port,
- const char *host_to_connect, u_short port_to_connect, int gateway_ports)
-{
- return channel_setup_fwd_listener(SSH_CHANNEL_PORT_LISTENER,
- listen_host, listen_port, host_to_connect, port_to_connect,
- gateway_ports);
-}
-
-/* protocol v2 remote port fwd, used by sshd */
-int
-channel_setup_remote_fwd_listener(const char *listen_address,
- u_short listen_port, int gateway_ports)
-{
- return channel_setup_fwd_listener(SSH_CHANNEL_RPORT_LISTENER,
- listen_address, listen_port, NULL, 0, gateway_ports);
-}
-
-/*
- * Initiate forwarding of connections to port "port" on remote host through
- * the secure channel to host:port from local side.
- */
-
-int
-channel_request_remote_forwarding(const char *listen_host, u_short listen_port,
- const char *host_to_connect, u_short port_to_connect)
-{
- int type, success = 0;
-
- /* Record locally that connection to this host/port is permitted. */
- if (num_permitted_opens >= SSH_MAX_FORWARDS_PER_DIRECTION)
- fatal("channel_request_remote_forwarding: too many forwards");
-
- if (listen_host != NULL &&
- strlen(listen_host) > SSH_CHANNEL_PATH_LEN - 1) {
- error("Binding address too long.");
- return -1;
- }
-
- /* Send the forward request to the remote side. */
- if (compat20) {
- const char *address_to_bind;
- if (listen_host == NULL) {
- if (datafellows & SSH_BUG_RFWD_ADDR)
- address_to_bind = "127.0.0.1";
- else
- address_to_bind = "localhost";
- } else if (*listen_host == '\0' ||
- strcmp(listen_host, "*") == 0) {
- if (datafellows & SSH_BUG_RFWD_ADDR)
- address_to_bind = "0.0.0.0";
- else
- address_to_bind = "";
- } else
- address_to_bind = listen_host;
-
- packet_start(SSH2_MSG_GLOBAL_REQUEST);
- packet_put_cstring("tcpip-forward");
- packet_put_char(1); /* boolean: want reply */
- packet_put_cstring(address_to_bind);
- packet_put_int(listen_port);
- packet_send();
- packet_write_wait();
- /* Assume that server accepts the request */
- success = 1;
- } else {
- packet_start(SSH_CMSG_PORT_FORWARD_REQUEST);
- packet_put_int(listen_port);
- packet_put_cstring(host_to_connect);
- packet_put_int(port_to_connect);
- packet_send();
- packet_write_wait();
-
- /* Wait for response from the remote side. */
- type = packet_read();
- switch (type) {
- case SSH_SMSG_SUCCESS:
- success = 1;
- break;
- case SSH_SMSG_FAILURE:
- log("Warning: Server denied remote port forwarding.");
- break;
- default:
- /* Unknown packet */
- packet_disconnect("Protocol error for port forward request:"
- "received packet type %d.", type);
- }
- }
- if (success) {
- permitted_opens[num_permitted_opens].host_to_connect = xstrdup(host_to_connect);
- permitted_opens[num_permitted_opens].port_to_connect = port_to_connect;
- permitted_opens[num_permitted_opens].listen_port = listen_port;
- num_permitted_opens++;
- }
- return (success ? 0 : -1);
-}
-
-/*
- * Request cancellation of remote forwarding of connection host:port from
- * local side.
- */
-void
-channel_request_rforward_cancel(const char *host, u_short port)
-{
- int i;
-
- if (!compat20)
- return;
-
- for (i = 0; i < num_permitted_opens; i++) {
- if (permitted_opens[i].host_to_connect != NULL &&
- permitted_opens[i].listen_port == port)
- break;
- }
- if (i >= num_permitted_opens) {
- debug("%s: requested forward not found", __func__);
- return;
- }
- packet_start(SSH2_MSG_GLOBAL_REQUEST);
- packet_put_cstring("cancel-tcpip-forward");
- packet_put_char(0);
- packet_put_cstring(host == NULL ? "" : host);
- packet_put_int(port);
- packet_send();
-
- permitted_opens[i].listen_port = 0;
- permitted_opens[i].port_to_connect = 0;
- xfree(permitted_opens[i].host_to_connect);
- permitted_opens[i].host_to_connect = NULL;
-}
-
-/*
- * This is called after receiving CHANNEL_FORWARDING_REQUEST. This initates
- * listening for the port, and sends back a success reply (or disconnect
- * message if there was an error). This never returns if there was an error.
- */
-
-void
-channel_input_port_forward_request(int is_root, int gateway_ports)
-{
- u_short port, host_port;
- char *hostname;
-
- /* Get arguments from the packet. */
- port = packet_get_int();
- hostname = packet_get_string(NULL);
- host_port = packet_get_int();
-
-#ifndef HAVE_CYGWIN
- /*
- * Check that an unprivileged user is not trying to forward a
- * privileged port.
- */
- if (port < IPPORT_RESERVED && !is_root)
- packet_disconnect("Requested forwarding of port %d but user is not root.",
- port);
-#endif
- /* Initiate forwarding */
- channel_setup_local_fwd_listener(NULL, port, hostname,
- host_port, gateway_ports);
-
- /* Free the argument string. */
- xfree(hostname);
-}
-
-/*
- * Permits opening to any host/port if permitted_opens[] is empty. This is
- * usually called by the server, because the user could connect to any port
- * anyway, and the server has no way to know but to trust the client anyway.
- */
-void
-channel_permit_all_opens(void)
-{
- if (num_permitted_opens == 0)
- all_opens_permitted = 1;
-}
-
-void
-channel_add_permitted_opens(char *host, int port)
-{
- if (num_permitted_opens >= SSH_MAX_FORWARDS_PER_DIRECTION)
- fatal("channel_add_permitted_opens: too many forwards");
- debug("allow port forwarding to host %s port %d", host, port);
-
- permitted_opens[num_permitted_opens].host_to_connect = xstrdup(host);
- permitted_opens[num_permitted_opens].port_to_connect = port;
- num_permitted_opens++;
-
- all_opens_permitted = 0;
-}
-
-void
-channel_clear_permitted_opens(void)
-{
- int i;
-
- for (i = 0; i < num_permitted_opens; i++)
- xfree(permitted_opens[i].host_to_connect);
- num_permitted_opens = 0;
-}
-
-
-/* return socket to remote host, port */
-static int
-connect_to(const char *host, u_short port)
-{
- struct addrinfo hints, *ai, *aitop;
- char ntop[NI_MAXHOST], strport[NI_MAXSERV];
- int gaierr;
- int sock = -1;
-
- memset(&hints, 0, sizeof(hints));
- hints.ai_family = IPv4or6;
- hints.ai_socktype = SOCK_STREAM;
- snprintf(strport, sizeof strport, "%d", port);
- if ((gaierr = getaddrinfo(host, strport, &hints, &aitop)) != 0) {
- error("connect_to %.100s: unknown host (%s)", host,
- gai_strerror(gaierr));
- return -1;
- }
- for (ai = aitop; ai; ai = ai->ai_next) {
- if (ai->ai_family != AF_INET && ai->ai_family != AF_INET6)
- continue;
- if (getnameinfo(ai->ai_addr, ai->ai_addrlen, ntop, sizeof(ntop),
- strport, sizeof(strport), NI_NUMERICHOST|NI_NUMERICSERV) != 0) {
- error("connect_to: getnameinfo failed");
- continue;
- }
- sock = socket(ai->ai_family, SOCK_STREAM, 0);
- if (sock < 0) {
- error("socket: %.100s", strerror(errno));
- continue;
- }
- if (fcntl(sock, F_SETFL, O_NONBLOCK) < 0)
- fatal("connect_to: F_SETFL: %s", strerror(errno));
- if (connect(sock, ai->ai_addr, ai->ai_addrlen) < 0 &&
- errno != EINPROGRESS) {
- error("connect_to %.100s port %s: %.100s", ntop, strport,
- strerror(errno));
- close(sock);
- continue; /* fail -- try next */
- }
- break; /* success */
-
- }
- freeaddrinfo(aitop);
- if (!ai) {
- error("connect_to %.100s port %d: failed.", host, port);
- return -1;
- }
- /* success */
- set_nodelay(sock);
- return sock;
-}
-
-int
-channel_connect_by_listen_address(u_short listen_port)
-{
- int i;
-
- for (i = 0; i < num_permitted_opens; i++)
- if (permitted_opens[i].listen_port == listen_port)
- return connect_to(
- permitted_opens[i].host_to_connect,
- permitted_opens[i].port_to_connect);
- error("WARNING: Server requests forwarding for unknown listen_port %d",
- listen_port);
- return -1;
-}
-
-/* Check if connecting to that port is permitted and connect. */
-int
-channel_connect_to(const char *host, u_short port)
-{
- int i, permit;
-
- permit = all_opens_permitted;
- if (!permit) {
- for (i = 0; i < num_permitted_opens; i++)
- if (permitted_opens[i].port_to_connect == port &&
- strcmp(permitted_opens[i].host_to_connect, host) == 0)
- permit = 1;
-
- }
- if (!permit) {
- log("Received request to connect to host %.100s port %d, "
- "but the request was denied.", host, port);
- return -1;
- }
- return connect_to(host, port);
-}
-
-/* -- X11 forwarding */
-
-/*
- * Creates an internet domain socket for listening for X11 connections.
- * Returns 0 and a suitable display number for the DISPLAY variable
- * stored in display_numberp , or -1 if an error occurs.
- */
-int
-x11_create_display_inet(int x11_display_offset, int x11_use_localhost,
- int single_connection, u_int *display_numberp)
-{
- Channel *nc = NULL;
- int display_number, sock;
- u_short port;
- struct addrinfo hints, *ai, *aitop;
- char strport[NI_MAXSERV];
- int gaierr, n, num_socks = 0, socks[NUM_SOCKS];
-
- for (display_number = x11_display_offset;
- display_number < MAX_DISPLAYS;
- display_number++) {
- port = 6000 + display_number;
- memset(&hints, 0, sizeof(hints));
- hints.ai_family = IPv4or6;
- hints.ai_flags = x11_use_localhost ? 0: AI_PASSIVE;
- hints.ai_socktype = SOCK_STREAM;
- snprintf(strport, sizeof strport, "%d", port);
- if ((gaierr = getaddrinfo(NULL, strport, &hints, &aitop)) != 0) {
- error("getaddrinfo: %.100s", gai_strerror(gaierr));
- return -1;
- }
- for (ai = aitop; ai; ai = ai->ai_next) {
- if (ai->ai_family != AF_INET && ai->ai_family != AF_INET6)
- continue;
- sock = socket(ai->ai_family, SOCK_STREAM, 0);
- if (sock < 0) {
- if ((errno != EINVAL) && (errno != EAFNOSUPPORT)) {
- error("socket: %.100s", strerror(errno));
- freeaddrinfo(aitop);
- for (n = 0; n < num_socks; n++)
- close(socks[n]);
- return -1;
- } else {
- debug("x11_create_display_inet: Socket family %d not supported",
- ai->ai_family);
- continue;
- }
- }
-#ifdef IPV6_V6ONLY
- if (ai->ai_family == AF_INET6) {
- int on = 1;
- if (setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on)) < 0)
- error("setsockopt IPV6_V6ONLY: %.100s", strerror(errno));
- }
-#endif
- if (bind(sock, ai->ai_addr, ai->ai_addrlen) < 0) {
- /*
- * If bind() fails with EADDRNOTAVAIL, we
- * should not break immediately but rather
- * try the next address available.
- */
- if (errno == EADDRNOTAVAIL) {
- close(sock);
- continue;
- }
-
- debug("bind port %d: %.100s; skipping this port", port,
- strerror(errno));
- close(sock);
-
- for (n = 0; n < num_socks; n++) {
- close(socks[n]);
- }
- num_socks = 0;
- break;
- }
- socks[num_socks++] = sock;
-#ifndef DONT_TRY_OTHER_AF
- if (num_socks == NUM_SOCKS)
- break;
-#else
- if (x11_use_localhost) {
- if (num_socks == NUM_SOCKS)
- break;
- } else {
- break;
- }
-#endif
- }
- freeaddrinfo(aitop);
- if (num_socks > 0)
- break;
- }
- if (display_number >= MAX_DISPLAYS) {
- error("Failed to allocate internet-domain X11 display socket.");
- return -1;
- }
- /* Start listening for connections on the socket. */
- for (n = 0; n < num_socks; n++) {
- sock = socks[n];
- if (listen(sock, 5) < 0) {
- int i;
- error("listen: %.100s", strerror(errno));
- for (i = 0; i < num_socks; i++)
- close(socks[i]);
- return -1;
- }
- }
-
- /* Allocate a channel for each socket. */
- for (n = 0; n < num_socks; n++) {
- sock = socks[n];
- nc = channel_new("x11 listener",
- SSH_CHANNEL_X11_LISTENER, sock, sock, -1,
- CHAN_X11_WINDOW_DEFAULT, CHAN_X11_PACKET_DEFAULT,
- 0, xstrdup("X11 inet listener"), 1);
- nc->single_connection = single_connection;
- }
-
- /* Return the display number for the DISPLAY environment variable. */
- *display_numberp = display_number;
- return (0);
-}
-
-static int
-connect_local_xsocket(u_int dnr)
-{
- int sock;
- struct sockaddr_un addr;
-
- sock = socket(AF_UNIX, SOCK_STREAM, 0);
- if (sock < 0)
- error("socket: %.100s", strerror(errno));
- memset(&addr, 0, sizeof(addr));
- addr.sun_family = AF_UNIX;
- snprintf(addr.sun_path, sizeof addr.sun_path, _PATH_UNIX_X, dnr);
- if (connect(sock, (struct sockaddr *) & addr, sizeof(addr)) == 0)
- return sock;
- close(sock);
- error("connect %.100s: %.100s", addr.sun_path, strerror(errno));
- return -1;
-}
-
-int
-x11_connect_display(void)
-{
- int display_number, sock = 0;
- const char *display;
- char buf[1024], *cp;
- struct addrinfo hints, *ai, *aitop;
- char strport[NI_MAXSERV];
- int gaierr;
-
- /* Try to open a socket for the local X server. */
- display = getenv("DISPLAY");
- if (!display) {
- error("DISPLAY not set.");
- return -1;
- }
- /*
- * Now we decode the value of the DISPLAY variable and make a
- * connection to the real X server.
- */
-
- /*
- * Check if it is a unix domain socket. Unix domain displays are in
- * one of the following formats: unix:d[.s], :d[.s], ::d[.s]
- */
- if (strncmp(display, "unix:", 5) == 0 ||
- display[0] == ':') {
- /* Connect to the unix domain socket. */
- if (sscanf(strrchr(display, ':') + 1, "%d", &display_number) != 1) {
- error("Could not parse display number from DISPLAY: %.100s",
- display);
- return -1;
- }
- /* Create a socket. */
- sock = connect_local_xsocket(display_number);
- if (sock < 0)
- return -1;
-
- /* OK, we now have a connection to the display. */
- return sock;
- }
- /*
- * Connect to an inet socket. The DISPLAY value is supposedly
- * hostname:d[.s], where hostname may also be numeric IP address.
- */
- strlcpy(buf, display, sizeof(buf));
- cp = strchr(buf, ':');
- if (!cp) {
- error("Could not find ':' in DISPLAY: %.100s", display);
- return -1;
- }
- *cp = 0;
- /* buf now contains the host name. But first we parse the display number. */
- if (sscanf(cp + 1, "%d", &display_number) != 1) {
- error("Could not parse display number from DISPLAY: %.100s",
- display);
- return -1;
- }
-
- /* Look up the host address */
- memset(&hints, 0, sizeof(hints));
- hints.ai_family = IPv4or6;
- hints.ai_socktype = SOCK_STREAM;
- snprintf(strport, sizeof strport, "%d", 6000 + display_number);
- if ((gaierr = getaddrinfo(buf, strport, &hints, &aitop)) != 0) {
- error("%.100s: unknown host. (%s)", buf, gai_strerror(gaierr));
- return -1;
- }
- for (ai = aitop; ai; ai = ai->ai_next) {
- /* Create a socket. */
- sock = socket(ai->ai_family, SOCK_STREAM, 0);
- if (sock < 0) {
- debug("socket: %.100s", strerror(errno));
- continue;
- }
- /* Connect it to the display. */
- if (connect(sock, ai->ai_addr, ai->ai_addrlen) < 0) {
- debug("connect %.100s port %d: %.100s", buf,
- 6000 + display_number, strerror(errno));
- close(sock);
- continue;
- }
- /* Success */
- break;
- }
- freeaddrinfo(aitop);
- if (!ai) {
- error("connect %.100s port %d: %.100s", buf, 6000 + display_number,
- strerror(errno));
- return -1;
- }
- set_nodelay(sock);
- return sock;
-}
-
-/*
- * This is called when SSH_SMSG_X11_OPEN is received. The packet contains
- * the remote channel number. We should do whatever we want, and respond
- * with either SSH_MSG_OPEN_CONFIRMATION or SSH_MSG_OPEN_FAILURE.
- */
-
-void
-x11_input_open(int type, u_int32_t seq, void *ctxt)
-{
- Channel *c = NULL;
- int remote_id, sock = 0;
- char *remote_host;
-
- debug("Received X11 open request.");
-
- remote_id = packet_get_int();
-
- if (packet_get_protocol_flags() & SSH_PROTOFLAG_HOST_IN_FWD_OPEN) {
- remote_host = packet_get_string(NULL);
- } else {
- remote_host = xstrdup("unknown (remote did not supply name)");
- }
- packet_check_eom();
-
- /* Obtain a connection to the real X display. */
- sock = x11_connect_display();
- if (sock != -1) {
- /* Allocate a channel for this connection. */
- c = channel_new("connected x11 socket",
- SSH_CHANNEL_X11_OPEN, sock, sock, -1, 0, 0, 0,
- remote_host, 1);
- c->remote_id = remote_id;
- c->force_drain = 1;
- }
- if (c == NULL) {
- /* Send refusal to the remote host. */
- packet_start(SSH_MSG_CHANNEL_OPEN_FAILURE);
- packet_put_int(remote_id);
- } else {
- /* Send a confirmation to the remote host. */
- packet_start(SSH_MSG_CHANNEL_OPEN_CONFIRMATION);
- packet_put_int(remote_id);
- packet_put_int(c->self);
- }
- packet_send();
-}
-
-/* dummy protocol handler that denies SSH-1 requests (agent/x11) */
-void
-deny_input_open(int type, u_int32_t seq, void *ctxt)
-{
- int rchan = packet_get_int();
-
- switch (type) {
- case SSH_SMSG_AGENT_OPEN:
- error("Warning: ssh server tried agent forwarding.");
- break;
- case SSH_SMSG_X11_OPEN:
- error("Warning: ssh server tried X11 forwarding.");
- break;
- default:
- error("deny_input_open: type %d", type);
- break;
- }
- error("Warning: this is probably a break in attempt by a malicious server.");
- packet_start(SSH_MSG_CHANNEL_OPEN_FAILURE);
- packet_put_int(rchan);
- packet_send();
-}
-
-/*
- * Requests forwarding of X11 connections, generates fake authentication
- * data, and enables authentication spoofing.
- * This should be called in the client only.
- */
-void
-x11_request_forwarding_with_spoofing(int client_session_id, const char *disp,
- const char *proto, const char *data)
-{
- u_int data_len = (u_int) strlen(data) / 2;
- u_int i, value;
- char *new_data;
- int screen_number;
- const char *cp;
- u_int32_t rand = 0;
-
- cp = disp;
- if (disp)
- cp = strchr(disp, ':');
- if (cp)
- cp = strchr(cp, '.');
- if (cp)
- screen_number = atoi(cp + 1);
- else
- screen_number = 0;
-
- /* Save protocol name. */
- x11_saved_proto = xstrdup(proto);
-
- /*
- * Extract real authentication data and generate fake data of the
- * same length.
- */
- x11_saved_data = xmalloc(data_len);
- x11_fake_data = xmalloc(data_len);
- for (i = 0; i < data_len; i++) {
- if (sscanf(data + 2 * i, "%2x", &value) != 1)
- fatal("x11_request_forwarding: bad authentication data: %.100s", data);
- if (i % 4 == 0)
- rand = arc4random();
- x11_saved_data[i] = value;
- x11_fake_data[i] = rand & 0xff;
- rand >>= 8;
- }
- x11_saved_data_len = data_len;
- x11_fake_data_len = data_len;
-
- /* Convert the fake data into hex. */
- new_data = tohex(x11_fake_data, data_len);
-
- /* Send the request packet. */
- if (compat20) {
- channel_request_start(client_session_id, "x11-req", 0);
- packet_put_char(0); /* XXX bool single connection */
- } else {
- packet_start(SSH_CMSG_X11_REQUEST_FORWARDING);
- }
- packet_put_cstring(proto);
- packet_put_cstring(new_data);
- packet_put_int(screen_number);
- packet_send();
- packet_write_wait();
- xfree(new_data);
-}
-
-
-/* -- agent forwarding */
-
-/* Sends a message to the server to request authentication fd forwarding. */
-
-void
-auth_request_forwarding(void)
-{
- packet_start(SSH_CMSG_AGENT_REQUEST_FORWARDING);
- packet_send();
- packet_write_wait();
-}
-
-/* This is called to process an SSH_SMSG_AGENT_OPEN message. */
-
-void
-auth_input_open_request(int type, u_int32_t seq, void *ctxt)
-{
- Channel *c = NULL;
- int remote_id, sock;
- char *name;
-
- /* Read the remote channel number from the message. */
- remote_id = packet_get_int();
- packet_check_eom();
-
- /*
- * Get a connection to the local authentication agent (this may again
- * get forwarded).
- */
- sock = ssh_get_authentication_socket();
-
- /*
- * If we could not connect the agent, send an error message back to
- * the server. This should never happen unless the agent dies,
- * because authentication forwarding is only enabled if we have an
- * agent.
- */
- if (sock >= 0) {
- name = xstrdup("authentication agent connection");
- c = channel_new("", SSH_CHANNEL_OPEN, sock, sock,
- -1, 0, 0, 0, name, 1);
- c->remote_id = remote_id;
- c->force_drain = 1;
- }
- if (c == NULL) {
- packet_start(SSH_MSG_CHANNEL_OPEN_FAILURE);
- packet_put_int(remote_id);
- } else {
- /* Send a confirmation to the remote host. */
- debug("Forwarding authentication connection.");
- packet_start(SSH_MSG_CHANNEL_OPEN_CONFIRMATION);
- packet_put_int(remote_id);
- packet_put_int(c->self);
- }
- packet_send();
-}
diff --git a/usr/src/cmd/ssh/libssh/common/cipher-ctr.c b/usr/src/cmd/ssh/libssh/common/cipher-ctr.c
deleted file mode 100644
index d728064b53..0000000000
--- a/usr/src/cmd/ssh/libssh/common/cipher-ctr.c
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Copyright (c) 2003 Markus Friedl <markus@openbsd.org>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: cipher-ctr.c,v 1.4 2004/02/06 23:41:13 dtucker Exp $");
-
-#include <openssl/evp.h>
-
-#include "log.h"
-#include "xmalloc.h"
-#include <openssl/aes.h>
-
-const EVP_CIPHER *evp_aes_128_ctr(void);
-void ssh_aes_ctr_iv(EVP_CIPHER_CTX *, int, u_char *, u_int);
-
-struct ssh_aes_ctr_ctx
-{
- AES_KEY aes_ctx;
- u_char aes_counter[AES_BLOCK_SIZE];
-};
-
-/*
- * increment counter 'ctr',
- * the counter is of size 'len' bytes and stored in network-byte-order.
- * (LSB at ctr[len-1], MSB at ctr[0])
- */
-static void
-ssh_ctr_inc(u_char *ctr, u_int len)
-{
- int i;
-
- for (i = len - 1; i >= 0; i--)
- if (++ctr[i]) /* continue on overflow */
- return;
-}
-
-static int
-ssh_aes_ctr(EVP_CIPHER_CTX *ctx, u_char *dest, const u_char *src,
- u_int len)
-{
- struct ssh_aes_ctr_ctx *c;
- u_int n = 0;
- u_char buf[AES_BLOCK_SIZE];
-
- if (len == 0)
- return (1);
- if ((c = EVP_CIPHER_CTX_get_app_data(ctx)) == NULL)
- return (0);
-
- while ((len--) > 0) {
- if (n == 0) {
- AES_encrypt(c->aes_counter, buf, &c->aes_ctx);
- ssh_ctr_inc(c->aes_counter, AES_BLOCK_SIZE);
- }
- *(dest++) = *(src++) ^ buf[n];
- n = (n + 1) % AES_BLOCK_SIZE;
- }
- return (1);
-}
-
-static int
-ssh_aes_ctr_init(EVP_CIPHER_CTX *ctx, const u_char *key, const u_char *iv,
- int enc)
-{
- struct ssh_aes_ctr_ctx *c;
-
- if ((c = EVP_CIPHER_CTX_get_app_data(ctx)) == NULL) {
- c = xmalloc(sizeof(*c));
- EVP_CIPHER_CTX_set_app_data(ctx, c);
- }
- if (key != NULL)
- AES_set_encrypt_key(key, EVP_CIPHER_CTX_key_length(ctx) * 8,
- &c->aes_ctx);
- if (iv != NULL)
- memcpy(c->aes_counter, iv, AES_BLOCK_SIZE);
- return (1);
-}
-
-static int
-ssh_aes_ctr_cleanup(EVP_CIPHER_CTX *ctx)
-{
- struct ssh_aes_ctr_ctx *c;
-
- if ((c = EVP_CIPHER_CTX_get_app_data(ctx)) != NULL) {
- memset(c, 0, sizeof(*c));
- xfree(c);
- EVP_CIPHER_CTX_set_app_data(ctx, NULL);
- }
- return (1);
-}
-
-void
-ssh_aes_ctr_iv(EVP_CIPHER_CTX *evp, int doset, u_char * iv, u_int len)
-{
- struct ssh_aes_ctr_ctx *c;
-
- if ((c = EVP_CIPHER_CTX_get_app_data(evp)) == NULL)
- fatal("ssh_aes_ctr_iv: no context");
- if (doset)
- memcpy(c->aes_counter, iv, len);
- else
- memcpy(iv, c->aes_counter, len);
-}
-
-/*
- * Function fills an EVP_CIPHER structure for AES CTR functions based on the NID
- * and the key length.
- */
-static const EVP_CIPHER *
-evp_aes_ctr(const char *nid, int key_len, EVP_CIPHER *aes_ctr)
-{
- memset(aes_ctr, 0, sizeof(EVP_CIPHER));
- /*
- * If the PKCS#11 engine is used the AES CTR NIDs were dynamically
- * created during the engine initialization. If the engine is not used
- * we work with NID_undef's which is OK since in that case OpenSSL
- * doesn't use NIDs at all.
- */
- if ((aes_ctr->nid = OBJ_ln2nid(nid)) != NID_undef)
- debug3("%s NID found", nid);
-
- aes_ctr->block_size = AES_BLOCK_SIZE;
- aes_ctr->iv_len = AES_BLOCK_SIZE;
- aes_ctr->key_len = key_len;
- aes_ctr->init = ssh_aes_ctr_init;
- aes_ctr->cleanup = ssh_aes_ctr_cleanup;
- aes_ctr->do_cipher = ssh_aes_ctr;
- aes_ctr->flags = EVP_CIPH_CBC_MODE | EVP_CIPH_VARIABLE_LENGTH |
- EVP_CIPH_ALWAYS_CALL_INIT | EVP_CIPH_CUSTOM_IV;
- return (aes_ctr);
-}
-
-const EVP_CIPHER *
-evp_aes_128_ctr(void)
-{
- static EVP_CIPHER aes_ctr;
-
- return (evp_aes_ctr("aes-128-ctr", 16, &aes_ctr));
-}
-
-const EVP_CIPHER *
-evp_aes_192_ctr(void)
-{
- static EVP_CIPHER aes_ctr;
-
- return (evp_aes_ctr("aes-192-ctr", 24, &aes_ctr));
-}
-
-const EVP_CIPHER *
-evp_aes_256_ctr(void)
-{
- static EVP_CIPHER aes_ctr;
-
- return (evp_aes_ctr("aes-256-ctr", 32, &aes_ctr));
-}
diff --git a/usr/src/cmd/ssh/libssh/common/cipher.c b/usr/src/cmd/ssh/libssh/common/cipher.c
deleted file mode 100644
index 3cb001a150..0000000000
--- a/usr/src/cmd/ssh/libssh/common/cipher.c
+++ /dev/null
@@ -1,585 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- *
- *
- * Copyright (c) 1999 Niels Provos. All rights reserved.
- * Copyright (c) 1999, 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: cipher.c,v 1.61 2002/07/12 15:50:17 markus Exp $");
-
-#include "xmalloc.h"
-#include "log.h"
-#include "cipher.h"
-
-#include <openssl/md5.h>
-
-/*
- * Symmetric ciphers can be offloaded to any engine through the EVP API only.
- * However, OpenSSL doesn't offer AES in counter mode through EVP. So, we must
- * define our own EVP functions.
- */
-extern const EVP_CIPHER *evp_aes_128_ctr(void);
-extern const EVP_CIPHER *evp_aes_192_ctr(void);
-extern const EVP_CIPHER *evp_aes_256_ctr(void);
-extern void ssh_aes_ctr_iv(EVP_CIPHER_CTX *, int, u_char *, u_int);
-
-static const EVP_CIPHER *evp_ssh1_3des(void);
-static const EVP_CIPHER *evp_ssh1_bf(void);
-
-struct Cipher {
- char *name;
- int number; /* for ssh1 only */
- u_int block_size;
- u_int key_len;
- u_int discard_len;
- const EVP_CIPHER *(*evptype)(void);
-} ciphers[] = {
- { "none", SSH_CIPHER_NONE, 8, 0, 0, EVP_enc_null },
- { "des", SSH_CIPHER_DES, 8, 8, 0, EVP_des_cbc },
- { "3des", SSH_CIPHER_3DES, 8, 16, 0, evp_ssh1_3des },
- { "blowfish", SSH_CIPHER_BLOWFISH, 8, 32, 0, evp_ssh1_bf },
- { "3des-cbc", SSH_CIPHER_SSH2, 8, 24, 0, EVP_des_ede3_cbc },
- { "blowfish-cbc", SSH_CIPHER_SSH2, 8, 16, 0, EVP_bf_cbc },
-#ifdef SOLARIS_SSH_ENABLE_CAST5_128
- { "cast128-cbc", SSH_CIPHER_SSH2, 8, 16, 0, EVP_cast5_cbc },
-#endif /* SOLARIS_SSH_ENABLE_CAST5_128 */
- { "arcfour", SSH_CIPHER_SSH2, 8, 16, 0, EVP_rc4 },
- { "arcfour128", SSH_CIPHER_SSH2, 8, 16, 1536, EVP_rc4 },
- { "arcfour256", SSH_CIPHER_SSH2, 8, 32, 1536, EVP_rc4 },
- { "aes128-cbc", SSH_CIPHER_SSH2, 16, 16, 0, EVP_aes_128_cbc },
- { "aes192-cbc", SSH_CIPHER_SSH2, 16, 24, 0, EVP_aes_192_cbc },
- { "aes256-cbc", SSH_CIPHER_SSH2, 16, 32, 0, EVP_aes_256_cbc },
- { "aes128-ctr", SSH_CIPHER_SSH2, 16, 16, 0, evp_aes_128_ctr },
- { "aes192-ctr", SSH_CIPHER_SSH2, 16, 24, 0, evp_aes_192_ctr },
- { "aes256-ctr", SSH_CIPHER_SSH2, 16, 32, 0, evp_aes_256_ctr },
- { NULL, SSH_CIPHER_ILLEGAL, 0, 0, 0, NULL }
-};
-
-/*--*/
-
-u_int
-cipher_blocksize(Cipher *c)
-{
- return (c->block_size);
-}
-
-u_int
-cipher_keylen(Cipher *c)
-{
- return (c->key_len);
-}
-
-u_int
-cipher_get_number(Cipher *c)
-{
- return (c->number);
-}
-
-u_int
-cipher_mask_ssh1(int client)
-{
- u_int mask = 0;
- mask |= 1 << SSH_CIPHER_3DES; /* Mandatory */
- mask |= 1 << SSH_CIPHER_BLOWFISH;
- if (client) {
- mask |= 1 << SSH_CIPHER_DES;
- }
- return mask;
-}
-
-Cipher *
-cipher_by_name(const char *name)
-{
- Cipher *c;
- for (c = ciphers; c->name != NULL; c++)
- if (strcasecmp(c->name, name) == 0)
- return c;
- return NULL;
-}
-
-Cipher *
-cipher_by_number(int id)
-{
- Cipher *c;
- for (c = ciphers; c->name != NULL; c++)
- if (c->number == id)
- return c;
- return NULL;
-}
-
-#define CIPHER_SEP ","
-int
-ciphers_valid(const char *names)
-{
- Cipher *c;
- char *ciphers, *cp;
- char *p;
-
- if (names == NULL || strcmp(names, "") == 0)
- return 0;
- ciphers = cp = xstrdup(names);
- for ((p = strsep(&cp, CIPHER_SEP)); p && *p != '\0';
- (p = strsep(&cp, CIPHER_SEP))) {
- c = cipher_by_name(p);
- if (c == NULL || c->number != SSH_CIPHER_SSH2) {
- debug("bad cipher %s [%s]", p, names);
- xfree(ciphers);
- return 0;
- } else {
- debug3("cipher ok: %s [%s]", p, names);
- }
- }
- debug3("ciphers ok: [%s]", names);
- xfree(ciphers);
- return 1;
-}
-
-/*
- * Parses the name of the cipher. Returns the number of the corresponding
- * cipher, or -1 on error.
- */
-
-int
-cipher_number(const char *name)
-{
- Cipher *c;
- if (name == NULL)
- return -1;
- c = cipher_by_name(name);
- return (c==NULL) ? -1 : c->number;
-}
-
-char *
-cipher_name(int id)
-{
- Cipher *c = cipher_by_number(id);
- return (c==NULL) ? "<unknown>" : c->name;
-}
-
-void
-cipher_init(CipherContext *cc, Cipher *cipher,
- const u_char *key, u_int keylen, const u_char *iv, u_int ivlen,
- int encrypt)
-{
- static int dowarn = 1;
- const EVP_CIPHER *type;
- int klen;
- u_char *junk, *discard;
-
- if (cipher->number == SSH_CIPHER_DES) {
- if (dowarn) {
- error("Warning: use of DES is strongly discouraged "
- "due to cryptographic weaknesses");
- dowarn = 0;
- }
- if (keylen > 8)
- keylen = 8;
- }
- cc->plaintext = (cipher->number == SSH_CIPHER_NONE);
-
- if (keylen < cipher->key_len)
- fatal("cipher_init: key length %d is insufficient for %s.",
- keylen, cipher->name);
- if (iv != NULL && ivlen < cipher->block_size)
- fatal("cipher_init: iv length %d is insufficient for %s.",
- ivlen, cipher->name);
- cc->cipher = cipher;
-
- type = (*cipher->evptype)();
-
- EVP_CIPHER_CTX_init(&cc->evp);
- /*
- * cc->evp is of type EVP_CIPHER_CTX and its key_len will be set to the
- * default value here for the cipher type. If the requested key length
- * is different from the default value we will call EVP_CipherInit()
- * again, see below.
- */
- if (EVP_CipherInit(&cc->evp, type, NULL, (u_char *)iv,
- (encrypt == CIPHER_ENCRYPT)) == 0)
- fatal("cipher_init: EVP_CipherInit failed for %s",
- cipher->name);
- klen = EVP_CIPHER_CTX_key_length(&cc->evp);
- if (klen > 0 && keylen != klen) {
- debug("cipher_init: set keylen (%d -> %d)", klen, keylen);
- if (EVP_CIPHER_CTX_set_key_length(&cc->evp, keylen) == 0)
- fatal("cipher_init: set keylen failed (%d -> %d)",
- klen, keylen);
- }
- if (EVP_CipherInit(&cc->evp, NULL, (u_char *)key, NULL, -1) == 0)
- fatal("cipher_init: EVP_CipherInit: set key failed for %s",
- cipher->name);
-
- if (cipher->discard_len > 0) {
- junk = xmalloc(cipher->discard_len);
- discard = xmalloc(cipher->discard_len);
- if (EVP_Cipher(&cc->evp, discard, junk,
- cipher->discard_len) == 0)
- fatal("cipher_init: EVP_Cipher failed during discard");
- memset(discard, 0, cipher->discard_len);
- xfree(junk);
- xfree(discard);
- }
-}
-
-void
-cipher_crypt(CipherContext *cc, u_char *dest, const u_char *src, u_int len)
-{
- if (len % cc->cipher->block_size)
- fatal("cipher_encrypt: bad plaintext length %d", len);
- if (EVP_Cipher(&cc->evp, dest, (u_char *)src, len) == 0)
- fatal("evp_crypt: EVP_Cipher failed");
-}
-
-void
-cipher_cleanup(CipherContext *cc)
-{
- if (EVP_CIPHER_CTX_cleanup(&cc->evp) == 0)
- error("cipher_cleanup: EVP_CIPHER_CTX_cleanup failed");
-}
-
-/*
- * Selects the cipher, and keys if by computing the MD5 checksum of the
- * passphrase and using the resulting 16 bytes as the key.
- */
-
-void
-cipher_set_key_string(CipherContext *cc, Cipher *cipher,
- const char *passphrase, int encrypt)
-{
- MD5_CTX md;
- u_char digest[16];
-
- MD5_Init(&md);
- MD5_Update(&md, (const u_char *)passphrase, strlen(passphrase));
- MD5_Final(digest, &md);
-
- cipher_init(cc, cipher, digest, 16, NULL, 0, encrypt);
-
- memset(digest, 0, sizeof(digest));
- memset(&md, 0, sizeof(md));
-}
-
-/* Implementations for other non-EVP ciphers */
-
-/*
- * This is used by SSH1:
- *
- * What kind of triple DES are these 2 routines?
- *
- * Why is there a redundant initialization vector?
- *
- * If only iv3 was used, then, this would till effect have been
- * outer-cbc. However, there is also a private iv1 == iv2 which
- * perhaps makes differential analysis easier. On the other hand, the
- * private iv1 probably makes the CRC-32 attack ineffective. This is a
- * result of that there is no longer any known iv1 to use when
- * choosing the X block.
- */
-struct ssh1_3des_ctx
-{
- EVP_CIPHER_CTX k1, k2, k3;
-};
-
-static int
-ssh1_3des_init(EVP_CIPHER_CTX *ctx, const u_char *key, const u_char *iv,
- int enc)
-{
- struct ssh1_3des_ctx *c;
- u_char *k1, *k2, *k3;
-
- if ((c = EVP_CIPHER_CTX_get_app_data(ctx)) == NULL) {
- c = xmalloc(sizeof(*c));
- EVP_CIPHER_CTX_set_app_data(ctx, c);
- }
- if (key == NULL)
- return (1);
- if (enc == -1)
- enc = ctx->encrypt;
- k1 = k2 = k3 = (u_char *) key;
- k2 += 8;
- if (EVP_CIPHER_CTX_key_length(ctx) >= 16+8) {
- if (enc)
- k3 += 16;
- else
- k1 += 16;
- }
- EVP_CIPHER_CTX_init(&c->k1);
- EVP_CIPHER_CTX_init(&c->k2);
- EVP_CIPHER_CTX_init(&c->k3);
- if (EVP_CipherInit(&c->k1, EVP_des_cbc(), k1, NULL, enc) == 0 ||
- EVP_CipherInit(&c->k2, EVP_des_cbc(), k2, NULL, !enc) == 0 ||
- EVP_CipherInit(&c->k3, EVP_des_cbc(), k3, NULL, enc) == 0) {
- memset(c, 0, sizeof(*c));
- xfree(c);
- EVP_CIPHER_CTX_set_app_data(ctx, NULL);
- return (0);
- }
- return (1);
-}
-
-static int
-ssh1_3des_cbc(EVP_CIPHER_CTX *ctx, u_char *dest, const u_char *src, u_int len)
-{
- struct ssh1_3des_ctx *c;
-
- if ((c = EVP_CIPHER_CTX_get_app_data(ctx)) == NULL) {
- error("ssh1_3des_cbc: no context");
- return (0);
- }
- if (EVP_Cipher(&c->k1, dest, (u_char *)src, len) == 0 ||
- EVP_Cipher(&c->k2, dest, dest, len) == 0 ||
- EVP_Cipher(&c->k3, dest, dest, len) == 0)
- return (0);
- return (1);
-}
-
-static int
-ssh1_3des_cleanup(EVP_CIPHER_CTX *ctx)
-{
- struct ssh1_3des_ctx *c;
-
- if ((c = EVP_CIPHER_CTX_get_app_data(ctx)) != NULL) {
- memset(c, 0, sizeof(*c));
- xfree(c);
- EVP_CIPHER_CTX_set_app_data(ctx, NULL);
- }
- return (1);
-}
-
-static const EVP_CIPHER *
-evp_ssh1_3des(void)
-{
- static EVP_CIPHER ssh1_3des;
-
- memset(&ssh1_3des, 0, sizeof(EVP_CIPHER));
- ssh1_3des.nid = NID_undef;
- ssh1_3des.block_size = 8;
- ssh1_3des.iv_len = 0;
- ssh1_3des.key_len = 16;
- ssh1_3des.init = ssh1_3des_init;
- ssh1_3des.cleanup = ssh1_3des_cleanup;
- ssh1_3des.do_cipher = ssh1_3des_cbc;
- ssh1_3des.flags = EVP_CIPH_CBC_MODE | EVP_CIPH_VARIABLE_LENGTH;
- return (&ssh1_3des);
-}
-
-/*
- * SSH1 uses a variation on Blowfish, all bytes must be swapped before
- * and after encryption/decryption. Thus the swap_bytes stuff (yuk).
- */
-static void
-swap_bytes(const u_char *src, u_char *dst, int n)
-{
- u_char c[4];
-
- /* Process 4 bytes every lap. */
- for (n = n / 4; n > 0; n--) {
- c[3] = *src++;
- c[2] = *src++;
- c[1] = *src++;
- c[0] = *src++;
-
- *dst++ = c[0];
- *dst++ = c[1];
- *dst++ = c[2];
- *dst++ = c[3];
- }
-}
-
-static int (*orig_bf)(EVP_CIPHER_CTX *, u_char *, const u_char *, u_int) = NULL;
-
-static int
-bf_ssh1_cipher(EVP_CIPHER_CTX *ctx, u_char *out, const u_char *in, u_int len)
-{
- int ret;
-
- swap_bytes(in, out, len);
- ret = (*orig_bf)(ctx, out, out, len);
- swap_bytes(out, out, len);
- return (ret);
-}
-
-static const EVP_CIPHER *
-evp_ssh1_bf(void)
-{
- static EVP_CIPHER ssh1_bf;
-
- memcpy(&ssh1_bf, EVP_bf_cbc(), sizeof(EVP_CIPHER));
- orig_bf = ssh1_bf.do_cipher;
- ssh1_bf.nid = NID_undef;
- ssh1_bf.do_cipher = bf_ssh1_cipher;
- ssh1_bf.key_len = 32;
- return (&ssh1_bf);
-}
-
-/*
- * Exports an IV from the CipherContext required to export the key
- * state back from the unprivileged child to the privileged parent
- * process.
- */
-
-int
-cipher_get_keyiv_len(CipherContext *cc)
-{
- Cipher *c = cc->cipher;
- int ivlen;
-
- if (c->number == SSH_CIPHER_3DES)
- ivlen = 24;
- else
- ivlen = EVP_CIPHER_CTX_iv_length(&cc->evp);
- return (ivlen);
-}
-
-void
-cipher_get_keyiv(CipherContext *cc, u_char *iv, u_int len)
-{
- Cipher *c = cc->cipher;
- u_char *civ = NULL;
- int evplen;
-
- switch (c->number) {
- case SSH_CIPHER_SSH2:
- case SSH_CIPHER_DES:
- case SSH_CIPHER_BLOWFISH:
- evplen = EVP_CIPHER_CTX_iv_length(&cc->evp);
- if (evplen == 0)
- return;
- if (evplen != len)
- fatal("%s: wrong iv length %d != %d", __func__,
- evplen, len);
-
- if (c->evptype == evp_aes_128_ctr) {
- ssh_aes_ctr_iv(&cc->evp, 0, iv, len);
- return;
- } else {
- civ = cc->evp.iv;
- }
- break;
- case SSH_CIPHER_3DES: {
- struct ssh1_3des_ctx *desc;
- if (len != 24)
- fatal("%s: bad 3des iv length: %d", __func__, len);
- desc = EVP_CIPHER_CTX_get_app_data(&cc->evp);
- if (desc == NULL)
- fatal("%s: no 3des context", __func__);
- debug3("%s: Copying 3DES IV", __func__);
- memcpy(iv, desc->k1.iv, 8);
- memcpy(iv + 8, desc->k2.iv, 8);
- memcpy(iv + 16, desc->k3.iv, 8);
- return;
- }
- default:
- fatal("%s: bad cipher %d", __func__, c->number);
- }
- memcpy(iv, civ, len);
-}
-
-void
-cipher_set_keyiv(CipherContext *cc, u_char *iv)
-{
- Cipher *c = cc->cipher;
- u_char *div = NULL;
- int evplen = 0;
-
- switch (c->number) {
- case SSH_CIPHER_SSH2:
- case SSH_CIPHER_DES:
- case SSH_CIPHER_BLOWFISH:
- evplen = EVP_CIPHER_CTX_iv_length(&cc->evp);
- if (evplen == 0)
- return;
-
- if (c->evptype == evp_aes_128_ctr) {
- ssh_aes_ctr_iv(&cc->evp, 1, iv, evplen);
- return;
- } else {
- div = cc->evp.iv;
- }
- break;
- case SSH_CIPHER_3DES: {
- struct ssh1_3des_ctx *desc;
- desc = EVP_CIPHER_CTX_get_app_data(&cc->evp);
- if (desc == NULL)
- fatal("%s: no 3des context", __func__);
- debug3("%s: Installed 3DES IV", __func__);
- memcpy(desc->k1.iv, iv, 8);
- memcpy(desc->k2.iv, iv + 8, 8);
- memcpy(desc->k3.iv, iv + 16, 8);
- return;
- }
- default:
- fatal("%s: bad cipher %d", __func__, c->number);
- }
- memcpy(div, iv, evplen);
-}
-
-#if OPENSSL_VERSION_NUMBER < 0x00907000L
-#define EVP_X_STATE(evp) &(evp).c
-#define EVP_X_STATE_LEN(evp) sizeof((evp).c)
-#else
-#define EVP_X_STATE(evp) (evp).cipher_data
-#define EVP_X_STATE_LEN(evp) (evp).cipher->ctx_size
-#endif
-
-int
-cipher_get_keycontext(CipherContext *cc, u_char *dat)
-{
- int plen = 0;
- Cipher *c = cc->cipher;
-
- if (c->evptype == EVP_rc4) {
- plen = EVP_X_STATE_LEN(cc->evp);
- if (dat == NULL)
- return (plen);
- memcpy(dat, EVP_X_STATE(cc->evp), plen);
- }
- return (plen);
-}
-
-void
-cipher_set_keycontext(CipherContext *cc, u_char *dat)
-{
- Cipher *c = cc->cipher;
- int plen;
-
- if (c->evptype == EVP_rc4) {
- plen = EVP_X_STATE_LEN(cc->evp);
- memcpy(EVP_X_STATE(cc->evp), dat, plen);
- }
-}
diff --git a/usr/src/cmd/ssh/libssh/common/compat.c b/usr/src/cmd/ssh/libssh/common/compat.c
deleted file mode 100644
index 6d85d6e511..0000000000
--- a/usr/src/cmd/ssh/libssh/common/compat.c
+++ /dev/null
@@ -1,260 +0,0 @@
-/*
- * Copyright (c) 1999, 2000, 2001, 2002 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: compat.c,v 1.65 2002/09/27 10:42:09 mickey Exp $");
-
-#include "buffer.h"
-#include "packet.h"
-#include "xmalloc.h"
-#include "compat.h"
-#include "log.h"
-#include "match.h"
-
-int compat13 = 0;
-int compat20 = 0;
-uint32_t datafellows = 0;
-
-void
-enable_compat20(void)
-{
- debug("Enabling compatibility mode for protocol 2.0");
- compat20 = 1;
-}
-void
-enable_compat13(void)
-{
- debug("Enabling compatibility mode for protocol 1.3");
- compat13 = 1;
-}
-/* datafellows bug compatibility */
-void
-compat_datafellows(const char *version)
-{
- int i;
- static struct {
- char *pat;
- uint32_t bugs;
- } check[] = {
- { "OpenSSH-2.0*,"
- "OpenSSH-2.1*,"
- "OpenSSH_2.1*,"
- "OpenSSH_2.2*", SSH_OLD_SESSIONID|SSH_BUG_BANNER|
- SSH_OLD_DHGEX|SSH_BUG_NOREKEY|
- SSH_BUG_EXTEOF|SSH_OLD_FORWARD_ADDR},
- { "OpenSSH_2.3.0*", SSH_BUG_BANNER|SSH_BUG_BIGENDIANAES|
- SSH_OLD_DHGEX|SSH_BUG_NOREKEY|
- SSH_BUG_EXTEOF|SSH_OLD_FORWARD_ADDR},
- { "OpenSSH_2.3.*", SSH_BUG_BIGENDIANAES|SSH_OLD_DHGEX|
- SSH_BUG_NOREKEY|SSH_BUG_EXTEOF|
- SSH_OLD_FORWARD_ADDR},
- { "OpenSSH_2.5.0p1*,"
- "OpenSSH_2.5.1p1*",
- SSH_BUG_BIGENDIANAES|SSH_OLD_DHGEX|
- SSH_BUG_NOREKEY|SSH_BUG_EXTEOF|
- SSH_OLD_GSSAPI|SSH_OLD_FORWARD_ADDR},
- { "OpenSSH_2.5.0*,"
- "OpenSSH_2.5.1*,"
- "OpenSSH_2.5.2*", SSH_OLD_DHGEX|SSH_BUG_NOREKEY|
- SSH_BUG_EXTEOF|SSH_OLD_FORWARD_ADDR},
- { "OpenSSH_2.5.3*", SSH_BUG_NOREKEY|SSH_BUG_EXTEOF|
- SSH_OLD_FORWARD_ADDR},
- { "OpenSSH_2.9p*", SSH_BUG_EXTEOF|SSH_OLD_GSSAPI|
- SSH_BUG_GSSKEX_HOSTKEY|SSH_OLD_FORWARD_ADDR},
- { "OpenSSH_2.*,"
- "OpenSSH_3.0*,"
- "OpenSSH_3.1*", SSH_BUG_EXTEOF|SSH_OLD_FORWARD_ADDR|
- SSH_OLD_GSSAPI|SSH_BUG_GSSAPI_BER|
- SSH_BUG_GSSKEX_HOSTKEY},
- { "OpenSSH_3.2*,"
- "OpenSSH_3.3*,"
- "OpenSSH_3.4*,"
- "OpenSSH_3.5*", SSH_BUG_GSSAPI_BER|SSH_OLD_GSSAPI|
- SSH_BUG_GSSKEX_HOSTKEY|SSH_OLD_FORWARD_ADDR},
- { "OpenSSH_3.6*,"
- "OpenSSH_3.7*,"
- "OpenSSH_3.8*", SSH_BUG_GSSKEX_HOSTKEY|SSH_OLD_FORWARD_ADDR},
- { "OpenSSH_3.*", SSH_OLD_FORWARD_ADDR},
- { "OpenSSH*", 0 },
- { "Sun_SSH_1.0.*", SSH_BUG_NOREKEY|
- SSH_BUG_LOCALES_NOT_LANGTAGS|SSH_OLD_FORWARD_ADDR},
- { "Sun_SSH_1.0*", SSH_BUG_NOREKEY|SSH_BUG_EXTEOF|
- SSH_BUG_LOCALES_NOT_LANGTAGS|SSH_OLD_FORWARD_ADDR},
- { "Sun_SSH_1.1.1*", SSH_OLD_FORWARD_ADDR|SSH_BUG_STRING_ENCODING},
- { "Sun_SSH_1.1.2*", SSH_OLD_FORWARD_ADDR},
- { "Sun_SSH_1.1*", SSH_OLD_FORWARD_ADDR|SSH_BUG_STRING_ENCODING},
- { "Sun_SSH_1.2*", SSH_BUG_STRING_ENCODING},
- { "Sun_SSH_1.3*", SSH_BUG_STRING_ENCODING},
- { "Sun_SSH_1.4*", 0 },
- { "Sun_SSH_1.5*", 0 },
- { "Sun_SSH_*", 0 },
- { "*MindTerm*", 0 },
- { "2.1.0*", SSH_BUG_SIGBLOB|SSH_BUG_HMAC|
- SSH_OLD_SESSIONID|SSH_BUG_DEBUG|
- SSH_BUG_RSASIGMD5|SSH_BUG_HBSERVICE|
- SSH_BUG_FIRSTKEX },
- { "2.1 *", SSH_BUG_SIGBLOB|SSH_BUG_HMAC|
- SSH_OLD_SESSIONID|SSH_BUG_DEBUG|
- SSH_BUG_RSASIGMD5|SSH_BUG_HBSERVICE|
- SSH_BUG_FIRSTKEX },
- { "2.0.13*,"
- "2.0.14*,"
- "2.0.15*,"
- "2.0.16*,"
- "2.0.17*,"
- "2.0.18*,"
- "2.0.19*", SSH_BUG_SIGBLOB|SSH_BUG_HMAC|
- SSH_OLD_SESSIONID|SSH_BUG_DEBUG|
- SSH_BUG_PKSERVICE|SSH_BUG_X11FWD|
- SSH_BUG_PKOK|SSH_BUG_RSASIGMD5|
- SSH_BUG_HBSERVICE|SSH_BUG_OPENFAILURE|
- SSH_BUG_DUMMYCHAN|SSH_BUG_FIRSTKEX },
- { "2.0.11*,"
- "2.0.12*", SSH_BUG_SIGBLOB|SSH_BUG_HMAC|
- SSH_OLD_SESSIONID|SSH_BUG_DEBUG|
- SSH_BUG_PKSERVICE|SSH_BUG_X11FWD|
- SSH_BUG_PKAUTH|SSH_BUG_PKOK|
- SSH_BUG_RSASIGMD5|SSH_BUG_OPENFAILURE|
- SSH_BUG_DUMMYCHAN|SSH_BUG_FIRSTKEX },
- { "2.0.*", SSH_BUG_SIGBLOB|SSH_BUG_HMAC|
- SSH_OLD_SESSIONID|SSH_BUG_DEBUG|
- SSH_BUG_PKSERVICE|SSH_BUG_X11FWD|
- SSH_BUG_PKAUTH|SSH_BUG_PKOK|
- SSH_BUG_RSASIGMD5|SSH_BUG_OPENFAILURE|
- SSH_BUG_DERIVEKEY|SSH_BUG_DUMMYCHAN|
- SSH_BUG_FIRSTKEX },
- { "2.2.0*,"
- "2.3.0*", SSH_BUG_HMAC|SSH_BUG_DEBUG|
- SSH_BUG_RSASIGMD5|SSH_BUG_FIRSTKEX },
- { "2.3.*", SSH_BUG_DEBUG|SSH_BUG_RSASIGMD5|
- SSH_BUG_FIRSTKEX },
- { "2.4", SSH_OLD_SESSIONID }, /* Van Dyke */
- { "2.*", SSH_BUG_DEBUG|SSH_BUG_FIRSTKEX|
- SSH_BUG_RFWD_ADDR},
- { "3.0.*", SSH_BUG_DEBUG },
- { "3.0 SecureCRT*", SSH_OLD_SESSIONID },
- { "1.7 SecureFX*", SSH_OLD_SESSIONID },
- { "1.2.18*,"
- "1.2.19*,"
- "1.2.20*,"
- "1.2.21*,"
- "1.2.22*", SSH_BUG_IGNOREMSG|SSH_BUG_K5USER },
- { "1.3.2*", /* F-Secure */
- SSH_BUG_IGNOREMSG|SSH_BUG_K5USER },
- { "1.2.1*,"
- "1.2.2*,"
- "1.2.3*", SSH_BUG_K5USER },
- { "*SSH Compatible Server*", /* Netscreen */
- SSH_BUG_PASSWORDPAD },
- { "*OSU_0*,"
- "OSU_1.0*,"
- "OSU_1.1*,"
- "OSU_1.2*,"
- "OSU_1.3*,"
- "OSU_1.4*,"
- "OSU_1.5alpha1*,"
- "OSU_1.5alpha2*,"
- "OSU_1.5alpha3*", SSH_BUG_PASSWORDPAD },
- { "*SSH_Version_Mapper*",
- SSH_BUG_SCANNER },
- { "Probe-*",
- SSH_BUG_PROBE },
- { NULL, 0 }
- };
-
- /* process table, return first match */
- for (i = 0; check[i].pat; i++) {
- if (match_pattern_list(version, check[i].pat,
- strlen(check[i].pat), 0) == 1) {
- debug("match: %s pat %s", version, check[i].pat);
- datafellows = check[i].bugs;
- return;
- }
- }
- debug("no match: %s", version);
-}
-
-#define SEP ","
-int
-proto_spec(const char *spec)
-{
- char *s, *p, *q;
- int ret = SSH_PROTO_UNKNOWN;
-
- if (spec == NULL)
- return ret;
- q = s = xstrdup(spec);
- for ((p = strsep(&q, SEP)); p && *p != '\0'; (p = strsep(&q, SEP))) {
- switch (atoi(p)) {
- case 1:
- if (ret == SSH_PROTO_UNKNOWN)
- ret |= SSH_PROTO_1_PREFERRED;
- ret |= SSH_PROTO_1;
- break;
- case 2:
- ret |= SSH_PROTO_2;
- break;
- default:
- log("ignoring bad proto spec: '%s'.", p);
- break;
- }
- }
- xfree(s);
- return ret;
-}
-
-char *
-compat_cipher_proposal(char *cipher_prop)
-{
- Buffer b;
- char *orig_prop, *fix_ciphers;
- char *cp, *tmp;
-
- if (!(datafellows & SSH_BUG_BIGENDIANAES))
- return(cipher_prop);
-
- buffer_init(&b);
- tmp = orig_prop = xstrdup(cipher_prop);
- while ((cp = strsep(&tmp, ",")) != NULL) {
- if (strncmp(cp, "aes", 3) != 0) {
- if (buffer_len(&b) > 0)
- buffer_append(&b, ",", 1);
- buffer_append(&b, cp, strlen(cp));
- }
- }
- buffer_append(&b, "\0", 1);
- fix_ciphers = xstrdup(buffer_ptr(&b));
- buffer_free(&b);
- xfree(orig_prop);
- debug2("Original cipher proposal: %s", cipher_prop);
- debug2("Compat cipher proposal: %s", fix_ciphers);
- if (!*fix_ciphers)
- fatal("No available ciphers found.");
-
- return(fix_ciphers);
-}
diff --git a/usr/src/cmd/ssh/libssh/common/compress.c b/usr/src/cmd/ssh/libssh/common/compress.c
deleted file mode 100644
index 6a4965c461..0000000000
--- a/usr/src/cmd/ssh/libssh/common/compress.c
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Interface to packet compression for ssh.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: compress.c,v 1.19 2002/03/18 17:31:54 provos Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "log.h"
-#include "buffer.h"
-#include "zlib.h"
-#include "compress.h"
-
-z_stream incoming_stream;
-z_stream outgoing_stream;
-static int compress_init_send_called = 0;
-static int compress_init_recv_called = 0;
-static int inflate_failed = 0;
-static int deflate_failed = 0;
-
-/*
- * Initializes compression; level is compression level from 1 to 9
- * (as in gzip).
- */
-
-void
-buffer_compress_init_send(int level)
-{
- if (compress_init_send_called == 1)
- deflateEnd(&outgoing_stream);
- compress_init_send_called = 1;
- debug("Enabling compression at level %d.", level);
- if (level < 1 || level > 9)
- fatal("Bad compression level %d.", level);
- deflateInit(&outgoing_stream, level);
-}
-void
-buffer_compress_init_recv(void)
-{
- if (compress_init_recv_called == 1)
- inflateEnd(&incoming_stream);
- compress_init_recv_called = 1;
- inflateInit(&incoming_stream);
-}
-
-/* Frees any data structures allocated for compression. */
-
-void
-buffer_compress_uninit(void)
-{
- debug("compress outgoing: raw data %lu, compressed %lu, factor %.2f",
- outgoing_stream.total_in, outgoing_stream.total_out,
- outgoing_stream.total_in == 0 ? 0.0 :
- (double) outgoing_stream.total_out / outgoing_stream.total_in);
- debug("compress incoming: raw data %lu, compressed %lu, factor %.2f",
- incoming_stream.total_out, incoming_stream.total_in,
- incoming_stream.total_out == 0 ? 0.0 :
- (double) incoming_stream.total_in / incoming_stream.total_out);
- if (compress_init_recv_called == 1 && inflate_failed == 0)
- inflateEnd(&incoming_stream);
- if (compress_init_send_called == 1 && deflate_failed == 0)
- deflateEnd(&outgoing_stream);
-}
-
-/*
- * Compresses the contents of input_buffer into output_buffer. All packets
- * compressed using this function will form a single compressed data stream;
- * however, data will be flushed at the end of every call so that each
- * output_buffer can be decompressed independently (but in the appropriate
- * order since they together form a single compression stream) by the
- * receiver. This appends the compressed data to the output buffer.
- */
-
-void
-buffer_compress(Buffer * input_buffer, Buffer * output_buffer)
-{
- u_char buf[4096];
- int status;
-
- /* This case is not handled below. */
- if (buffer_len(input_buffer) == 0)
- return;
-
- /* Input is the contents of the input buffer. */
- outgoing_stream.next_in = buffer_ptr(input_buffer);
- outgoing_stream.avail_in = buffer_len(input_buffer);
-
- /* Loop compressing until deflate() returns with avail_out != 0. */
- do {
- /* Set up fixed-size output buffer. */
- outgoing_stream.next_out = buf;
- outgoing_stream.avail_out = sizeof(buf);
-
- /* Compress as much data into the buffer as possible. */
- status = deflate(&outgoing_stream, Z_PARTIAL_FLUSH);
- switch (status) {
- case Z_OK:
- /* Append compressed data to output_buffer. */
- buffer_append(output_buffer, buf,
- sizeof(buf) - outgoing_stream.avail_out);
- break;
- default:
- deflate_failed = 1;
- fatal("buffer_compress: deflate returned %d", status);
- /* NOTREACHED */
- }
- } while (outgoing_stream.avail_out == 0);
-}
-
-/*
- * Uncompresses the contents of input_buffer into output_buffer. All packets
- * uncompressed using this function will form a single compressed data
- * stream; however, data will be flushed at the end of every call so that
- * each output_buffer. This must be called for the same size units that the
- * buffer_compress was called, and in the same order that buffers compressed
- * with that. This appends the uncompressed data to the output buffer.
- */
-
-void
-buffer_uncompress(Buffer * input_buffer, Buffer * output_buffer)
-{
- u_char buf[4096];
- int status;
-
- incoming_stream.next_in = buffer_ptr(input_buffer);
- incoming_stream.avail_in = buffer_len(input_buffer);
-
- for (;;) {
- /* Set up fixed-size output buffer. */
- incoming_stream.next_out = buf;
- incoming_stream.avail_out = sizeof(buf);
-
- status = inflate(&incoming_stream, Z_PARTIAL_FLUSH);
- switch (status) {
- case Z_OK:
- buffer_append(output_buffer, buf,
- sizeof(buf) - incoming_stream.avail_out);
- break;
- case Z_BUF_ERROR:
- /*
- * Comments in zlib.h say that we should keep calling
- * inflate() until we get an error. This appears to
- * be the error that we get.
- */
- return;
- default:
- inflate_failed = 1;
- fatal("buffer_uncompress: inflate returned %d", status);
- /* NOTREACHED */
- }
- }
-}
diff --git a/usr/src/cmd/ssh/libssh/common/crc32.c b/usr/src/cmd/ssh/libssh/common/crc32.c
deleted file mode 100644
index 213b7b8aa8..0000000000
--- a/usr/src/cmd/ssh/libssh/common/crc32.c
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * COPYRIGHT (C) 1986 Gary S. Brown. You may use this program, or
- * code or tables extracted from it, as desired without restriction.
- *
- * First, the polynomial itself and its table of feedback terms. The
- * polynomial is
- * X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
- *
- * Note that we take it "backwards" and put the highest-order term in
- * the lowest-order bit. The X^32 term is "implied"; the LSB is the
- * X^31 term, etc. The X^0 term (usually shown as "+1") results in
- * the MSB being 1
- *
- * Note that the usual hardware shift register implementation, which
- * is what we're using (we're merely optimizing it by doing eight-bit
- * chunks at a time) shifts bits into the lowest-order term. In our
- * implementation, that means shifting towards the right. Why do we
- * do it this way? Because the calculated CRC must be transmitted in
- * order from highest-order term to lowest-order term. UARTs transmit
- * characters in order from LSB to MSB. By storing the CRC this way
- * we hand it to the UART in the order low-byte to high-byte; the UART
- * sends each low-bit to hight-bit; and the result is transmission bit
- * by bit from highest- to lowest-order term without requiring any bit
- * shuffling on our part. Reception works similarly
- *
- * The feedback terms table consists of 256, 32-bit entries. Notes
- *
- * The table can be generated at runtime if desired; code to do so
- * is shown later. It might not be obvious, but the feedback
- * terms simply represent the results of eight shift/xor opera
- * tions for all combinations of data and CRC register values
- *
- * The values must be right-shifted by eight bits by the "updcrc
- * logic; the shift must be u_(bring in zeroes). On some
- * hardware you could probably optimize the shift in assembler by
- * using byte-swap instructions
- * polynomial $edb88320
- */
-
-
-#include "includes.h"
-RCSID("$OpenBSD: crc32.c,v 1.8 2000/12/19 23:17:56 markus Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "crc32.h"
-
-static u_int crc32_tab[] = {
- 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419,
- 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4,
- 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07,
- 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
- 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856,
- 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
- 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4,
- 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
- 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3,
- 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a,
- 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599,
- 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
- 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190,
- 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
- 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e,
- 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
- 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed,
- 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
- 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3,
- 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
- 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a,
- 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5,
- 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010,
- 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
- 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17,
- 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6,
- 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615,
- 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
- 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344,
- 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
- 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a,
- 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
- 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1,
- 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c,
- 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef,
- 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
- 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe,
- 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31,
- 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c,
- 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
- 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b,
- 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
- 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1,
- 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
- 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278,
- 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7,
- 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66,
- 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
- 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605,
- 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8,
- 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b,
- 0x2d02ef8d
-};
-
-/* Return a 32-bit CRC of the contents of the buffer. */
-
-u_int
-ssh_crc32(const u_char *s, u_int len)
-{
- u_int i;
- u_int crc32val;
-
- crc32val = 0;
- for (i = 0; i < len; i ++) {
- crc32val = crc32_tab[(crc32val ^ s[i]) & 0xff] ^ (crc32val >> 8);
- }
- return crc32val;
-}
diff --git a/usr/src/cmd/ssh/libssh/common/deattack.c b/usr/src/cmd/ssh/libssh/common/deattack.c
deleted file mode 100644
index 82afd4f16b..0000000000
--- a/usr/src/cmd/ssh/libssh/common/deattack.c
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Cryptographic attack detector for ssh - source code
- *
- * Copyright (c) 1998 CORE SDI S.A., Buenos Aires, Argentina.
- *
- * All rights reserved. Redistribution and use in source and binary
- * forms, with or without modification, are permitted provided that
- * this copyright notice is retained.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES ARE DISCLAIMED. IN NO EVENT SHALL CORE SDI S.A. BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY OR
- * CONSEQUENTIAL DAMAGES RESULTING FROM THE USE OR MISUSE OF THIS
- * SOFTWARE.
- *
- * Ariel Futoransky <futo@core-sdi.com>
- * <http://www.core-sdi.com>
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: deattack.c,v 1.18 2002/03/04 17:27:39 stevesk Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "deattack.h"
-#include "log.h"
-#include "crc32.h"
-#include "getput.h"
-#include "xmalloc.h"
-#include "deattack.h"
-
-/*
- * CRC attack detection has a worst-case behaviour that is O(N^2) over
- * the number of identical blocks in a packet. This behaviour can be
- * exploited to create a limited denial of service attack.
- *
- * However, because we are dealing with encrypted data, identical
- * blocks should only occur every 2^35 maximally-sized packets or so.
- * Consequently, we can detect this DoS by looking for identical blocks
- * in a packet.
- *
- * The parameter below determines how many identical blocks we will
- * accept in a single packet, trading off between attack detection and
- * likelihood of terminating a legitimate connection. A value of 32
- * corresponds to an average of 2^40 messages before an attack is
- * misdetected
- */
-#define MAX_IDENTICAL 32
-
-/* SSH Constants */
-#define SSH_MAXBLOCKS (32 * 1024)
-#define SSH_BLOCKSIZE (8)
-
-/* Hashing constants */
-#define HASH_MINSIZE (8 * 1024)
-#define HASH_ENTRYSIZE (2)
-#define HASH_FACTOR(x) ((x)*3/2)
-#define HASH_UNUSEDCHAR (0xff)
-#define HASH_UNUSED (0xffff)
-#define HASH_IV (0xfffe)
-
-#define HASH_MINBLOCKS (7*SSH_BLOCKSIZE)
-
-
-/* Hash function (Input keys are cipher results) */
-#define HASH(x) GET_32BIT(x)
-
-#define CMP(a, b) (memcmp(a, b, SSH_BLOCKSIZE))
-
-static void
-crc_update(u_int32_t *a, u_int32_t b)
-{
- b ^= *a;
- *a = ssh_crc32((u_char *) &b, sizeof(b));
-}
-
-/* detect if a block is used in a particular pattern */
-static int
-check_crc(u_char *S, u_char *buf, u_int32_t len,
- u_char *IV)
-{
- u_int32_t crc;
- u_char *c;
-
- crc = 0;
- if (IV && !CMP(S, IV)) {
- crc_update(&crc, 1);
- crc_update(&crc, 0);
- }
- for (c = buf; c < buf + len; c += SSH_BLOCKSIZE) {
- if (!CMP(S, c)) {
- crc_update(&crc, 1);
- crc_update(&crc, 0);
- } else {
- crc_update(&crc, 0);
- crc_update(&crc, 0);
- }
- }
- return (crc == 0);
-}
-
-
-/* Detect a crc32 compensation attack on a packet */
-int
-detect_attack(u_char *buf, u_int32_t len, u_char *IV)
-{
- static u_int16_t *h = (u_int16_t *) NULL;
- static u_int32_t n = HASH_MINSIZE / HASH_ENTRYSIZE;
- u_int32_t i, j;
- u_int32_t l, same;
- u_char *c;
- u_char *d;
-
- if (len > (SSH_MAXBLOCKS * SSH_BLOCKSIZE) ||
- len % SSH_BLOCKSIZE != 0) {
- fatal("detect_attack: bad length %d", len);
- }
- for (l = n; l < HASH_FACTOR(len / SSH_BLOCKSIZE); l = l << 2)
- ;
-
- if (h == NULL) {
- debug("Installing crc compensation attack detector.");
- n = l;
- h = (u_int16_t *) xmalloc(n * HASH_ENTRYSIZE);
- } else {
- if (l > n) {
- n = l;
- h = (u_int16_t *) xrealloc(h, n * HASH_ENTRYSIZE);
- }
- }
-
- if (len <= HASH_MINBLOCKS) {
- for (c = buf; c < buf + len; c += SSH_BLOCKSIZE) {
- if (IV && (!CMP(c, IV))) {
- if ((check_crc(c, buf, len, IV)))
- return (DEATTACK_DETECTED);
- else
- break;
- }
- for (d = buf; d < c; d += SSH_BLOCKSIZE) {
- if (!CMP(c, d)) {
- if ((check_crc(c, buf, len, IV)))
- return (DEATTACK_DETECTED);
- else
- break;
- }
- }
- }
- return (DEATTACK_OK);
- }
- memset(h, HASH_UNUSEDCHAR, n * HASH_ENTRYSIZE);
-
- if (IV)
- h[HASH(IV) & (n - 1)] = HASH_IV;
-
- for (c = buf, same = j = 0; c < (buf + len); c += SSH_BLOCKSIZE, j++) {
- for (i = HASH(c) & (n - 1); h[i] != HASH_UNUSED;
- i = (i + 1) & (n - 1)) {
- if (h[i] == HASH_IV) {
- if (!CMP(c, IV)) {
- if (check_crc(c, buf, len, IV))
- return (DEATTACK_DETECTED);
- else
- break;
- }
- } else if (!CMP(c, buf + h[i] * SSH_BLOCKSIZE)) {
- if (++same > MAX_IDENTICAL)
- return (DEATTACK_DOS_DETECTED);
- if (check_crc(c, buf, len, IV))
- return (DEATTACK_DETECTED);
- else
- break;
- }
- }
- h[i] = j;
- }
- return (DEATTACK_OK);
-}
diff --git a/usr/src/cmd/ssh/libssh/common/dh.c b/usr/src/cmd/ssh/libssh/common/dh.c
deleted file mode 100644
index 9293db7bb7..0000000000
--- a/usr/src/cmd/ssh/libssh/common/dh.c
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- * Copyright (c) 2000 Niels Provos. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: dh.c,v 1.22 2002/06/27 08:49:44 markus Exp $");
-
-#include "xmalloc.h"
-
-#include <openssl/bn.h>
-#include <openssl/dh.h>
-#include <openssl/evp.h>
-
-#include "buffer.h"
-#include "cipher.h"
-#include "kex.h"
-#include "dh.h"
-#include "pathnames.h"
-#include "log.h"
-#include "misc.h"
-
-static int
-parse_prime(int linenum, char *line, struct dhgroup *dhg)
-{
- char *cp, *arg;
- char *strsize, *gen, *prime;
-
- cp = line;
- arg = strdelim(&cp);
- /* Ignore leading whitespace */
- if (*arg == '\0')
- arg = strdelim(&cp);
- if (!arg || !*arg || *arg == '#')
- return 0;
-
- /* time */
- if (cp == NULL || *arg == '\0')
- goto fail;
- arg = strsep(&cp, " "); /* type */
- if (cp == NULL || *arg == '\0')
- goto fail;
- arg = strsep(&cp, " "); /* tests */
- if (cp == NULL || *arg == '\0')
- goto fail;
- arg = strsep(&cp, " "); /* tries */
- if (cp == NULL || *arg == '\0')
- goto fail;
- strsize = strsep(&cp, " "); /* size */
- if (cp == NULL || *strsize == '\0' ||
- (dhg->size = atoi(strsize)) == 0)
- goto fail;
- /* The whole group is one bit larger */
- dhg->size++;
- gen = strsep(&cp, " "); /* gen */
- if (cp == NULL || *gen == '\0')
- goto fail;
- prime = strsep(&cp, " "); /* prime */
- if (cp != NULL || *prime == '\0')
- goto fail;
-
- if ((dhg->g = BN_new()) == NULL)
- fatal("parse_prime: BN_new failed");
- if ((dhg->p = BN_new()) == NULL)
- fatal("parse_prime: BN_new failed");
- if (BN_hex2bn(&dhg->g, gen) == 0)
- goto failclean;
-
- if (BN_hex2bn(&dhg->p, prime) == 0)
- goto failclean;
-
- if (BN_num_bits(dhg->p) != dhg->size)
- goto failclean;
-
- return (1);
-
- failclean:
- BN_clear_free(dhg->g);
- BN_clear_free(dhg->p);
- fail:
- error("Bad prime description in line %d", linenum);
- return (0);
-}
-
-DH *
-choose_dh(int min, int wantbits, int max)
-{
- FILE *f;
- char line[2048];
- int best, bestcount, which;
- int linenum;
- struct dhgroup dhg;
-
- if ((f = fopen(_PATH_DH_MODULI, "r")) == NULL &&
- (f = fopen(_PATH_DH_PRIMES, "r")) == NULL) {
- log("WARNING: %s does not exist, using old modulus", _PATH_DH_MODULI);
- return (dh_new_group1());
- }
-
- linenum = 0;
- best = bestcount = 0;
- while (fgets(line, sizeof(line), f)) {
- linenum++;
- if (!parse_prime(linenum, line, &dhg))
- continue;
- BN_clear_free(dhg.g);
- BN_clear_free(dhg.p);
-
- if (dhg.size > max || dhg.size < min)
- continue;
-
- if ((dhg.size > wantbits && dhg.size < best) ||
- (dhg.size > best && best < wantbits)) {
- best = dhg.size;
- bestcount = 0;
- }
- if (dhg.size == best)
- bestcount++;
- }
- rewind(f);
-
- if (bestcount == 0) {
- fclose(f);
- log("WARNING: no suitable primes in %s", _PATH_DH_PRIMES);
- return (NULL);
- }
-
- linenum = 0;
- which = arc4random() % bestcount;
- while (fgets(line, sizeof(line), f)) {
- if (!parse_prime(linenum, line, &dhg))
- continue;
- if ((dhg.size > max || dhg.size < min) ||
- dhg.size != best ||
- linenum++ != which) {
- BN_clear_free(dhg.g);
- BN_clear_free(dhg.p);
- continue;
- }
- break;
- }
- fclose(f);
- if (linenum != which+1)
- fatal("WARNING: line %d disappeared in %s, giving up",
- which, _PATH_DH_PRIMES);
-
- return (dh_new_group(dhg.g, dhg.p));
-}
-
-/* diffie-hellman-group1-sha1 */
-
-int
-dh_pub_is_valid(DH *dh, BIGNUM *dh_pub)
-{
- int i;
- int n = BN_num_bits(dh_pub);
- int bits_set = 0;
-
- if (dh_pub->neg) {
- log("invalid public DH value: negativ");
- return 0;
- }
- for (i = 0; i <= n; i++)
- if (BN_is_bit_set(dh_pub, i))
- bits_set++;
- debug("bits set: %d/%d", bits_set, BN_num_bits(dh->p));
-
- /* if g==2 and bits_set==1 then computing log_g(dh_pub) is trivial */
- if (bits_set > 1 && (BN_cmp(dh_pub, dh->p) == -1))
- return 1;
- log("invalid public DH value (%d/%d)", bits_set, BN_num_bits(dh->p));
- return 0;
-}
-
-void
-dh_gen_key(DH *dh, int need)
-{
- int i, bits_set = 0, tries = 0;
-
- if (dh->p == NULL)
- fatal("dh_gen_key: dh->p == NULL");
- if (2*need >= BN_num_bits(dh->p))
- fatal("dh_gen_key: group too small: %d (2*need %d)",
- BN_num_bits(dh->p), 2*need);
- do {
- if (dh->priv_key != NULL)
- BN_clear_free(dh->priv_key);
- if ((dh->priv_key = BN_new()) == NULL)
- fatal("dh_gen_key: BN_new failed");
- /* generate a 2*need bits random private exponent */
- if (!BN_rand(dh->priv_key, 2*need, 0, 0))
- fatal("dh_gen_key: BN_rand failed");
- if (DH_generate_key(dh) == 0)
- fatal("dh_gen_key: DH_generate_key() failed");
- for (i = 0; i <= BN_num_bits(dh->priv_key); i++)
- if (BN_is_bit_set(dh->priv_key, i))
- bits_set++;
- debug("dh_gen_key: priv key bits set: %d/%d",
- bits_set, BN_num_bits(dh->priv_key));
- if (tries++ > 10)
- fatal("dh_gen_key: too many bad keys: giving up");
- } while (!dh_pub_is_valid(dh, dh->pub_key));
-}
-
-DH *
-dh_new_group_asc(const char *gen, const char *modulus)
-{
- DH *dh;
-
- if ((dh = DH_new()) == NULL)
- fatal("dh_new_group_asc: DH_new");
-
- if (BN_hex2bn(&dh->p, modulus) == 0)
- fatal("BN_hex2bn p");
- if (BN_hex2bn(&dh->g, gen) == 0)
- fatal("BN_hex2bn g");
-
- return (dh);
-}
-
-/*
- * This just returns the group, we still need to generate the exchange
- * value.
- */
-
-DH *
-dh_new_group(BIGNUM *gen, BIGNUM *modulus)
-{
- DH *dh;
-
- if ((dh = DH_new()) == NULL)
- fatal("dh_new_group: DH_new");
- dh->p = modulus;
- dh->g = gen;
-
- return (dh);
-}
-
-DH *
-dh_new_group1(void)
-{
- static char *gen = "2", *group1 =
- "FFFFFFFF" "FFFFFFFF" "C90FDAA2" "2168C234" "C4C6628B" "80DC1CD1"
- "29024E08" "8A67CC74" "020BBEA6" "3B139B22" "514A0879" "8E3404DD"
- "EF9519B3" "CD3A431B" "302B0A6D" "F25F1437" "4FE1356D" "6D51C245"
- "E485B576" "625E7EC6" "F44C42E9" "A637ED6B" "0BFF5CB6" "F406B7ED"
- "EE386BFB" "5A899FA5" "AE9F2411" "7C4B1FE6" "49286651" "ECE65381"
- "FFFFFFFF" "FFFFFFFF";
-
- return (dh_new_group_asc(gen, group1));
-}
-
-/*
- * Estimates the group order for a Diffie-Hellman group that has an
- * attack complexity approximately the same as O(2**bits). Estimate
- * with: O(exp(1.9223 * (ln q)^(1/3) (ln ln q)^(2/3)))
- */
-
-int
-dh_estimate(int bits)
-{
-
- if (bits < 64)
- return (512); /* O(2**63) */
- if (bits < 128)
- return (1024); /* O(2**86) */
- if (bits < 192)
- return (2048); /* O(2**116) */
- return (4096); /* O(2**156) */
-}
diff --git a/usr/src/cmd/ssh/libssh/common/dispatch.c b/usr/src/cmd/ssh/libssh/common/dispatch.c
deleted file mode 100644
index f2bf9b6847..0000000000
--- a/usr/src/cmd/ssh/libssh/common/dispatch.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#include "includes.h"
-RCSID("$OpenBSD: dispatch.c,v 1.15 2002/01/11 13:39:36 markus Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "ssh1.h"
-#include "ssh2.h"
-#include "log.h"
-#include "dispatch.h"
-#include "packet.h"
-#include "compat.h"
-
-#define DISPATCH_MIN 0
-#define DISPATCH_MAX 255
-
-dispatch_fn *dispatch[DISPATCH_MAX];
-
-void
-dispatch_protocol_error(int type, u_int32_t seq, void *ctxt)
-{
- log("dispatch_protocol_error: type %d seq %u", type, seq);
- if (!compat20)
- fatal("protocol error");
- packet_start(SSH2_MSG_UNIMPLEMENTED);
- packet_put_int(seq);
- packet_send();
- packet_write_wait();
-}
-void
-dispatch_protocol_ignore(int type, u_int32_t seq, void *ctxt)
-{
- log("dispatch_protocol_ignore: type %d seq %u", type, seq);
-}
-void
-dispatch_init(dispatch_fn *dflt)
-{
- u_int i;
- for (i = 0; i < DISPATCH_MAX; i++)
- dispatch[i] = dflt;
-}
-void
-dispatch_range(u_int from, u_int to, dispatch_fn *fn)
-{
- u_int i;
-
- for (i = from; i <= to; i++) {
- if (i >= DISPATCH_MAX)
- break;
- dispatch[i] = fn;
- }
-}
-void
-dispatch_set(int type, dispatch_fn *fn)
-{
- dispatch[type] = fn;
-}
-void
-dispatch_run(int mode, int *done, void *ctxt)
-{
- for (;;) {
- int type;
- u_int32_t seqnr;
-
- if (mode == DISPATCH_BLOCK) {
- type = packet_read_seqnr(&seqnr);
- } else {
- type = packet_read_poll_seqnr(&seqnr);
- if (type == SSH_MSG_NONE)
- return;
- }
- if (type > 0 && type < DISPATCH_MAX && dispatch[type] != NULL)
- (*dispatch[type])(type, seqnr, ctxt);
- else
- packet_disconnect("protocol error: rcvd type %d", type);
- if (done != NULL && *done)
- return;
- }
-}
diff --git a/usr/src/cmd/ssh/libssh/common/engine.c b/usr/src/cmd/ssh/libssh/common/engine.c
deleted file mode 100644
index 0541c658df..0000000000
--- a/usr/src/cmd/ssh/libssh/common/engine.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-#include "log.h"
-#include "engine.h"
-
-#define PKCS11_ENGINE "pkcs11"
-
-/*
- * Loads the PKCS#11 engine if the UseOpenSSLEngine is set to yes which is the
- * default value.
- */
-ENGINE *
-pkcs11_engine_load(int use_engine)
-{
- ENGINE *e = NULL;
-
- debug("use_engine is '%s'", use_engine == 1 ? "yes" : "no");
- if (use_engine == 0)
- return (NULL);
-
- ENGINE_load_pk11();
- /* get structural reference */
- if ((e = ENGINE_by_id(PKCS11_ENGINE)) == NULL) {
- error("%s engine does not exist", PKCS11_ENGINE);
- return (NULL);
- }
-
- /* get functional reference */
- if (ENGINE_init(e) == 0) {
- error("can't initialize %s engine", PKCS11_ENGINE);
- return (NULL);
- }
-
- debug("%s engine initialized, now setting it as default for "
- "RSA, DSA, and symmetric ciphers", PKCS11_ENGINE);
-
- /*
- * Offloading RSA, DSA and symmetric ciphers to the engine is all we
- * want. We don't offload Diffie-Helmann since we use longer DH keys
- * than supported in ncp/n2cp (2048 bits). And, we don't offload digest
- * operations since that would be beneficial if only big packets were
- * processed (~8K). However, that's not the case. For example,
- * SSH_MSG_CHANNEL_WINDOW_ADJUST messages are always small. Given the
- * fact that digest operations are fast in software and the inherent
- * overhead of offloading anything to HW is quite big, not offloading
- * digests to HW actually makes SSH data transfer faster.
- */
- if (!ENGINE_set_default_RSA(e)) {
- error("can't use %s engine for RSA", PKCS11_ENGINE);
- return (NULL);
- }
- if (!ENGINE_set_default_DSA(e)) {
- error("can't use %s engine for DSA", PKCS11_ENGINE);
- return (NULL);
- }
- if (!ENGINE_set_default_ciphers(e)) {
- error("can't use %s engine for symmetric ciphers",
- PKCS11_ENGINE);
- return (NULL);
- }
-
- debug("%s engine initialization complete", PKCS11_ENGINE);
- return (e);
-}
-
-/*
- * Finishes the PKCS#11 engine after all remaining structural and functional
- * references to the ENGINE structure are freed.
- */
-void
-pkcs11_engine_finish(void *engine)
-{
- ENGINE *e = (ENGINE *)engine;
-
- debug("in pkcs11_engine_finish(), engine pointer is %p", e);
- /* UseOpenSSLEngine was 'no' */
- if (engine == NULL)
- return;
-
- debug("unregistering RSA");
- ENGINE_unregister_RSA(e);
- debug("unregistering DSA");
- ENGINE_unregister_DSA(e);
- debug("unregistering ciphers");
- ENGINE_unregister_ciphers(e);
-
- debug("calling ENGINE_finish()");
- ENGINE_finish(engine);
- debug("calling ENGINE_remove()");
- ENGINE_remove(engine);
- debug("calling ENGINE_free()");
- ENGINE_free(engine);
- debug("%s engine finished", PKCS11_ENGINE);
-}
diff --git a/usr/src/cmd/ssh/libssh/common/entropy.c b/usr/src/cmd/ssh/libssh/common/entropy.c
deleted file mode 100644
index c661b74496..0000000000
--- a/usr/src/cmd/ssh/libssh/common/entropy.c
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright (c) 2001 Damien Miller. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-
-#include <openssl/rand.h>
-#include <openssl/crypto.h>
-
-#include "ssh.h"
-#include "misc.h"
-#include "xmalloc.h"
-#include "atomicio.h"
-#include "pathnames.h"
-#include "log.h"
-
-/*
- * Portable OpenSSH PRNG seeding:
- * If OpenSSL has not "internally seeded" itself (e.g. pulled data from
- * /dev/random), then we execute a "ssh-rand-helper" program which
- * collects entropy and writes it to stdout. The child program must
- * write at least RANDOM_SEED_SIZE bytes. The child is run with stderr
- * attached, so error/debugging output should be visible.
- *
- * XXX: we should tell the child how many bytes we need.
- */
-
-RCSID("$Id: entropy.c,v 1.44 2002/06/09 19:41:48 mouring Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifndef OPENSSL_PRNG_ONLY
-#define RANDOM_SEED_SIZE 48
-static uid_t original_uid, original_euid;
-#endif
-
-void
-seed_rng(void)
-{
-#ifndef OPENSSL_PRNG_ONLY
- int devnull;
- int p[2];
- pid_t pid;
- int ret;
- unsigned char buf[RANDOM_SEED_SIZE];
- mysig_t old_sigchld;
-
- if (RAND_status() == 1) {
- debug3("RNG is ready, skipping seeding");
- return;
- }
-
- debug3("Seeding PRNG from %s", SSH_RAND_HELPER);
-
- if ((devnull = open("/dev/null", O_RDWR)) == -1)
- fatal("Couldn't open /dev/null: %s", strerror(errno));
- if (pipe(p) == -1)
- fatal("pipe: %s", strerror(errno));
-
- old_sigchld = mysignal(SIGCHLD, SIG_DFL);
- if ((pid = fork()) == -1)
- fatal("Couldn't fork: %s", strerror(errno));
- if (pid == 0) {
- dup2(devnull, STDIN_FILENO);
- dup2(p[1], STDOUT_FILENO);
- /* Keep stderr open for errors */
- close(p[0]);
- close(p[1]);
- close(devnull);
-
- if (original_uid != original_euid &&
- ( seteuid(getuid()) == -1 ||
- setuid(original_uid) == -1) ) {
- fprintf(stderr, "(rand child) setuid(%d): %s\n",
- original_uid, strerror(errno));
- _exit(1);
- }
-
- execl(SSH_RAND_HELPER, "ssh-rand-helper", NULL);
- fprintf(stderr, "(rand child) Couldn't exec '%s': %s\n",
- SSH_RAND_HELPER, strerror(errno));
- _exit(1);
- }
-
- close(devnull);
- close(p[1]);
-
- memset(buf, '\0', sizeof(buf));
- ret = atomicio(read, p[0], buf, sizeof(buf));
- if (ret == -1)
- fatal("Couldn't read from ssh-rand-helper: %s",
- strerror(errno));
- if (ret != sizeof(buf))
- fatal("ssh-rand-helper child produced insufficient data");
-
- close(p[0]);
-
- if (waitpid(pid, &ret, 0) == -1)
- fatal("Couldn't wait for ssh-rand-helper completion: %s",
- strerror(errno));
- mysignal(SIGCHLD, old_sigchld);
-
- /* We don't mind if the child exits upon a SIGPIPE */
- if (!WIFEXITED(ret) &&
- (!WIFSIGNALED(ret) || WTERMSIG(ret) != SIGPIPE))
- fatal("ssh-rand-helper terminated abnormally");
- if (WEXITSTATUS(ret) != 0)
- fatal("ssh-rand-helper exit with exit status %d", ret);
-
- RAND_add(buf, sizeof(buf), sizeof(buf));
- memset(buf, '\0', sizeof(buf));
-
-#endif /* OPENSSL_PRNG_ONLY */
- if (RAND_status() != 1)
- fatal("PRNG is not seeded");
-}
-
-void
-init_rng(void)
-{
- /*
- * OpenSSL version numbers: MNNFFPPS: major minor fix patch status
- * We match major, minor, fix and status (not patch)
- */
- if ((SSLeay() ^ OPENSSL_VERSION_NUMBER) & ~0xff0L)
- fatal("OpenSSL version mismatch. Built against %lx, you "
- "have %lx", OPENSSL_VERSION_NUMBER, SSLeay());
-
-#ifndef OPENSSL_PRNG_ONLY
- if ((original_uid = getuid()) == -1)
- fatal("getuid: %s", strerror(errno));
- if ((original_euid = geteuid()) == -1)
- fatal("geteuid: %s", strerror(errno));
-#endif
-}
-
diff --git a/usr/src/cmd/ssh/libssh/common/fatal.c b/usr/src/cmd/ssh/libssh/common/fatal.c
deleted file mode 100644
index 0b5038f365..0000000000
--- a/usr/src/cmd/ssh/libssh/common/fatal.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2002 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: fatal.c,v 1.1 2002/02/22 12:20:34 markus Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "log.h"
-
-/* Fatal messages. This function never returns. */
-
-void
-fatal(const char *fmt,...)
-{
- va_list args;
- va_start(args, fmt);
- do_log(SYSLOG_LEVEL_FATAL, fmt, args);
- va_end(args);
- fatal_cleanup();
-}
diff --git a/usr/src/cmd/ssh/libssh/common/g11n.c b/usr/src/cmd/ssh/libssh/common/g11n.c
deleted file mode 100644
index 558b410c96..0000000000
--- a/usr/src/cmd/ssh/libssh/common/g11n.c
+++ /dev/null
@@ -1,964 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- *
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include <errno.h>
-#include <locale.h>
-#include <langinfo.h>
-#include <iconv.h>
-#include <ctype.h>
-#include <wctype.h>
-#include <strings.h>
-#include <string.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include "includes.h"
-#include "xmalloc.h"
-#include "xlist.h"
-#include "compat.h"
-#include "log.h"
-
-#ifdef MIN
-#undef MIN
-#endif /* MIN */
-
-#define MIN(x, y) ((x) < (y) ? (x) : (y))
-
-#define LOCALE_PATH "/usr/bin/locale"
-
-/* two-char country code, '-' and two-char region code */
-#define LANGTAG_MAX 5
-
-static int locale_cmp(const void *d1, const void *d2);
-static char *g11n_locale2langtag(char *locale);
-
-static char *do_iconv(iconv_t cd, const char *s, uint_t *lenp, char **err_str);
-
-/*
- * native_codeset records the codeset of the default system locale.
- * It is used to convert the contents of file (eg /etc/issue) which is
- * supposed to be in the codeset of default system locale.
- */
-static char *native_codeset;
-
-/*
- * Convert locale string name into a language tag. The caller is responsible for
- * freeing the memory allocated for the result.
- */
-static char *
-g11n_locale2langtag(char *locale)
-{
- char *langtag;
-
- /* base cases */
- if (!locale || !*locale)
- return (NULL);
-
- if (strcmp(locale, "POSIX") == 0 || strcmp(locale, "C") == 0)
- return (xstrdup("i-default"));
-
- /* punt for language codes which are not exactly 2 letters */
- if (strlen(locale) < 2 ||
- !isalpha(locale[0]) ||
- !isalpha(locale[1]) ||
- (locale[2] != '\0' &&
- locale[2] != '_' &&
- locale[2] != '.' &&
- locale[2] != '@'))
- return (NULL);
-
-
- /* we have a primary language sub-tag */
- langtag = (char *)xmalloc(LANGTAG_MAX + 1);
-
- strncpy(langtag, locale, 2);
- langtag[2] = '\0';
-
- /* do we have country sub-tag? For example: cs_CZ */
- if (locale[2] == '_') {
- if (strlen(locale) < 5 ||
- !isalpha(locale[3]) ||
- !isalpha(locale[4]) ||
- (locale[5] != '\0' && (locale[5] != '.' &&
- locale[5] != '@'))) {
- return (langtag);
- }
-
- /* example: create cs-CZ from cs_CZ */
- if (snprintf(langtag, 6, "%.*s-%.*s", 2, locale, 2,
- locale + 3) == 5)
- return (langtag);
- }
-
- /* in all other cases we just use the primary language sub-tag */
- return (langtag);
-}
-
-uint_t
-g11n_langtag_is_default(char *langtag)
-{
- return (strcmp(langtag, "i-default") == 0);
-}
-
-/*
- * This lang tag / locale matching function works only for two-character
- * language primary sub-tags and two-character country sub-tags.
- */
-uint_t
-g11n_langtag_matches_locale(char *langtag, char *locale)
-{
- /* match "i-default" to the process' current locale if possible */
- if (g11n_langtag_is_default(langtag)) {
- if (strcasecmp(locale, "POSIX") == 0 ||
- strcasecmp(locale, "C") == 0)
- return (1);
- else
- return (0);
- }
-
- /*
- * locale must be at least 2 chars long and the lang part must be
- * exactly two characters
- */
- if (strlen(locale) < 2 ||
- (!isalpha(locale[0]) || !isalpha(locale[1]) ||
- (locale[2] != '\0' && locale[2] != '_' &&
- locale[2] != '.' && locale[2] != '@')))
- return (0);
-
- /* same thing with the langtag */
- if (strlen(langtag) < 2 ||
- (!isalpha(langtag[0]) || !isalpha(langtag[1]) ||
- (langtag[2] != '\0' && langtag[2] != '-')))
- return (0);
-
- /* primary language sub-tag and the locale's language part must match */
- if (strncasecmp(langtag, locale, 2) != 0)
- return (0);
-
- /*
- * primary language sub-tag and the locale's language match, now
- * fuzzy check country part
- */
-
- /* neither langtag nor locale have more than one component */
- if (langtag[2] == '\0' &&
- (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@'))
- return (2);
-
- /* langtag has only one sub-tag... */
- if (langtag[2] == '\0')
- return (1);
-
- /* locale has no country code... */
- if (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@')
- return (1);
-
- /* langtag has more than one subtag and the locale has a country code */
-
- /* ignore second subtag if not two chars */
- if (strlen(langtag) < 5)
- return (1);
-
- if (!isalpha(langtag[3]) || !isalpha(langtag[4]) ||
- (langtag[5] != '\0' && langtag[5] != '-'))
- return (1);
-
- /* ignore rest of locale if there is no two-character country part */
- if (strlen(locale) < 5)
- return (1);
-
- if (locale[2] != '_' || !isalpha(locale[3]) || !isalpha(locale[4]) ||
- (locale[5] != '\0' && locale[5] != '.' && locale[5] != '@'))
- return (1);
-
- /* if the country part matches, return 2 */
- if (strncasecmp(&langtag[3], &locale[3], 2) == 0)
- return (2);
-
- return (1);
-}
-
-char *
-g11n_getlocale()
-{
- /* we have one text domain - always set it */
- (void) textdomain(TEXT_DOMAIN);
-
- /* if the locale is not set, set it from the env vars */
- if (!setlocale(LC_MESSAGES, NULL))
- (void) setlocale(LC_MESSAGES, "");
-
- return (setlocale(LC_MESSAGES, NULL));
-}
-
-void
-g11n_setlocale(int category, const char *locale)
-{
- char *curr;
-
- if (native_codeset == NULL) {
- /* set default locale, and record current codeset */
- (void) setlocale(LC_ALL, "");
- curr = nl_langinfo(CODESET);
- native_codeset = xstrdup(curr);
- }
-
- /* we have one text domain - always set it */
- (void) textdomain(TEXT_DOMAIN);
-
- if (!locale)
- return;
-
- if (*locale && ((curr = setlocale(category, NULL))) &&
- strcmp(curr, locale) == 0)
- return;
-
- /* if <category> is bogus, setlocale() will do nothing */
- (void) setlocale(category, locale);
-}
-
-char **
-g11n_getlocales()
-{
- FILE *locale_out;
- uint_t n_elems, list_size, long_line = 0;
- char **list;
- char locale[64]; /* 64 bytes is plenty for locale names */
-
- if ((locale_out = popen(LOCALE_PATH " -a", "r")) == NULL)
- return (NULL);
-
- /*
- * start with enough room for 65 locales - that's a lot fewer than
- * all the locales available for installation, but a lot more than
- * what most users will need and install
- */
- n_elems = 0;
- list_size = 192;
- list = (char **) xmalloc(sizeof (char *) * (list_size + 1));
- memset(list, 0, sizeof (char *) * (list_size + 1));
-
- while (fgets(locale, sizeof (locale), locale_out)) {
- /* skip long locale names (if any) */
- if (!strchr(locale, '\n')) {
- long_line = 1;
- continue;
- } else if (long_line) {
- long_line = 0;
- continue;
- }
-
- if (strncmp(locale, "iso_8859", 8) == 0)
- /* ignore locale names like "iso_8859-1" */
- continue;
-
- if (n_elems == list_size) {
- list_size *= 2;
- list = (char **)xrealloc((void *) list,
- (list_size + 1) * sizeof (char *));
- memset(&list[n_elems + 1], 0,
- sizeof (char *) * (list_size - n_elems + 1));
- }
-
- *(strchr(locale, '\n')) = '\0'; /* remove the trailing \n */
- list[n_elems++] = xstrdup(locale);
- }
-
- (void) pclose(locale_out);
-
- if (n_elems == 0) {
- xfree(list);
- return (NULL);
- }
-
- list[n_elems] = NULL;
-
- qsort(list, n_elems - 1, sizeof (char *), locale_cmp);
- return (list);
-}
-
-char *
-g11n_getlangs()
-{
- char *locale;
-
- if (getenv("SSH_LANGS"))
- return (xstrdup(getenv("SSH_LANGS")));
-
- locale = g11n_getlocale();
-
- if (!locale || !*locale)
- return (xstrdup("i-default"));
-
- return (g11n_locale2langtag(locale));
-}
-
-char *
-g11n_locales2langs(char **locale_set)
-{
- char **p, **r, **q;
- char *langtag, *langs;
- int locales, skip;
-
- for (locales = 0, p = locale_set; p && *p; p++)
- locales++;
-
- r = (char **)xmalloc((locales + 1) * sizeof (char *));
- memset(r, 0, (locales + 1) * sizeof (char *));
-
- for (p = locale_set; p && *p && ((p - locale_set) <= locales); p++) {
- skip = 0;
- if ((langtag = g11n_locale2langtag(*p)) == NULL)
- continue;
- for (q = r; (q - r) < locales; q++) {
- if (!*q)
- break;
- if (*q && strcmp(*q, langtag) == 0)
- skip = 1;
- }
- if (!skip)
- *(q++) = langtag;
- else
- xfree(langtag);
- *q = NULL;
- }
-
- langs = xjoin(r, ',');
- g11n_freelist(r);
-
- return (langs);
-}
-
-static int
-sortcmp(const void *d1, const void *d2)
-{
- char *s1 = *(char **)d1;
- char *s2 = *(char **)d2;
-
- return (strcmp(s1, s2));
-}
-
-int
-g11n_langtag_match(char *langtag1, char *langtag2)
-{
- int len1, len2;
- char c1, c2;
-
- len1 = (strchr(langtag1, '-')) ?
- (strchr(langtag1, '-') - langtag1)
- : strlen(langtag1);
-
- len2 = (strchr(langtag2, '-')) ?
- (strchr(langtag2, '-') - langtag2)
- : strlen(langtag2);
-
- /* no match */
- if (len1 != len2 || strncmp(langtag1, langtag2, len1) != 0)
- return (0);
-
- c1 = *(langtag1 + len1);
- c2 = *(langtag2 + len2);
-
- /* no country sub-tags - exact match */
- if (c1 == '\0' && c2 == '\0')
- return (2);
-
- /* one langtag has a country sub-tag, the other doesn't */
- if (c1 == '\0' || c2 == '\0')
- return (1);
-
- /* can't happen - both langtags have a country sub-tag */
- if (c1 != '-' || c2 != '-')
- return (1);
-
- /* compare country subtags */
- langtag1 = langtag1 + len1 + 1;
- langtag2 = langtag2 + len2 + 1;
-
- len1 = (strchr(langtag1, '-')) ?
- (strchr(langtag1, '-') - langtag1) : strlen(langtag1);
-
- len2 = (strchr(langtag2, '-')) ?
- (strchr(langtag2, '-') - langtag2) : strlen(langtag2);
-
- if (len1 != len2 || strncmp(langtag1, langtag2, len1) != 0)
- return (1);
-
- /* country tags matched - exact match */
- return (2);
-}
-
-char *
-g11n_langtag_set_intersect(char *set1, char *set2)
-{
- char **list1, **list2, **list3, **p, **q, **r;
- char *set3, *lang_subtag;
- uint_t n1, n2, n3;
- uint_t do_append;
-
- list1 = xsplit(set1, ',');
- list2 = xsplit(set2, ',');
-
- for (n1 = 0, p = list1; p && *p; p++, n1++)
- ;
- for (n2 = 0, p = list2; p && *p; p++, n2++)
- ;
-
- list3 = (char **) xmalloc(sizeof (char *) * (n1 + n2 + 1));
- *list3 = NULL;
-
- /*
- * we must not sort the user langtags - sorting or not the server's
- * should not affect the outcome
- */
- qsort(list2, n2, sizeof (char *), sortcmp);
-
- for (n3 = 0, p = list1; p && *p; p++) {
- do_append = 0;
- for (q = list2; q && *q; q++) {
- if (g11n_langtag_match(*p, *q) != 2) continue;
- /* append element */
- for (r = list3; (r - list3) <= (n1 + n2); r++) {
- do_append = 1;
- if (!*r)
- break;
- if (strcmp(*p, *r) == 0) {
- do_append = 0;
- break;
- }
- }
- if (do_append && n3 <= (n1 + n2)) {
- list3[n3++] = xstrdup(*p);
- list3[n3] = NULL;
- }
- }
- }
-
- for (p = list1; p && *p; p++) {
- do_append = 0;
- for (q = list2; q && *q; q++) {
- if (g11n_langtag_match(*p, *q) != 1)
- continue;
-
- /* append element */
- lang_subtag = xstrdup(*p);
- if (strchr(lang_subtag, '-'))
- *(strchr(lang_subtag, '-')) = '\0';
- for (r = list3; (r - list3) <= (n1 + n2); r++) {
- do_append = 1;
- if (!*r)
- break;
- if (strcmp(lang_subtag, *r) == 0) {
- do_append = 0;
- break;
- }
- }
- if (do_append && n3 <= (n1 + n2)) {
- list3[n3++] = lang_subtag;
- list3[n3] = NULL;
- } else
- xfree(lang_subtag);
- }
- }
-
- set3 = xjoin(list3, ',');
- xfree_split_list(list1);
- xfree_split_list(list2);
- xfree_split_list(list3);
-
- return (set3);
-}
-
-char *
-g11n_clnt_langtag_negotiate(char *clnt_langtags, char *srvr_langtags)
-{
- char *list, *result;
- char **xlist;
-
- /* g11n_langtag_set_intersect uses xmalloc - should not return NULL */
- list = g11n_langtag_set_intersect(clnt_langtags, srvr_langtags);
-
- if (!list)
- return (NULL);
-
- xlist = xsplit(list, ',');
-
- xfree(list);
-
- if (!xlist || !*xlist)
- return (NULL);
-
- result = xstrdup(*xlist);
- xfree_split_list(xlist);
-
- return (result);
-}
-
-/*
- * Compare locales, preferring UTF-8 codesets to others, otherwise doing
- * a stright strcmp()
- */
-static int
-locale_cmp(const void *d1, const void *d2)
-{
- char *dot_ptr;
- char *s1 = *(char **)d1;
- char *s2 = *(char **)d2;
- int s1_is_utf8 = 0;
- int s2_is_utf8 = 0;
-
- /* check if s1 is a UTF-8 locale */
- if (((dot_ptr = strchr((char *)s1, '.')) != NULL) &&
- (*dot_ptr != '\0') && (strncmp(dot_ptr + 1, "UTF-8", 5) == 0) &&
- (*(dot_ptr + 6) == '\0' || *(dot_ptr + 6) == '@')) {
- s1_is_utf8++;
- }
-
- /* check if s2 is a UTF-8 locale */
- if (((dot_ptr = strchr((char *)s2, '.')) != NULL) &&
- (*dot_ptr != '\0') && (strncmp(dot_ptr + 1, "UTF-8", 5) == 0) &&
- (*(dot_ptr + 6) == '\0' || *(dot_ptr + 6) == '@')) {
- s2_is_utf8++;
- }
-
- /* prefer UTF-8 locales */
- if (s1_is_utf8 && !s2_is_utf8)
- return (-1);
-
- if (s2_is_utf8 && !s1_is_utf8)
- return (1);
-
- /* prefer any locale over the default locales */
- if (strcmp(s1, "C") == 0 || strcmp(s1, "POSIX") == 0 ||
- strcmp(s1, "common") == 0) {
- if (strcmp(s2, "C") != 0 && strcmp(s2, "POSIX") != 0 &&
- strcmp(s2, "common") != 0)
- return (1);
- }
-
- if (strcmp(s2, "C") == 0 || strcmp(s2, "POSIX") == 0 ||
- strcmp(s2, "common") == 0) {
- if (strcmp(s1, "C") != 0 &&
- strcmp(s1, "POSIX") != 0 &&
- strcmp(s1, "common") != 0)
- return (-1);
- }
-
- return (strcmp(s1, s2));
-}
-
-
-char **
-g11n_langtag_set_locale_set_intersect(char *langtag_set, char **locale_set)
-{
- char **langtag_list, **result, **p, **q, **r;
- char *s;
- uint_t do_append, n_langtags, n_locales, n_results, max_results;
-
- if (locale_set == NULL)
- return (NULL);
-
- /* count lang tags and locales */
- for (n_locales = 0, p = locale_set; p && *p; p++)
- n_locales++;
-
- n_langtags = ((s = langtag_set) != NULL && *s && *s != ',') ? 1 : 0;
- /* count the number of langtags */
- for (; s = strchr(s, ','); s++, n_langtags++)
- ;
-
- qsort(locale_set, n_locales, sizeof (char *), locale_cmp);
-
- langtag_list = xsplit(langtag_set, ',');
- for (n_langtags = 0, p = langtag_list; p && *p; p++, n_langtags++)
- ;
-
- max_results = MIN(n_locales, n_langtags) * 2;
- result = (char **) xmalloc(sizeof (char *) * (max_results + 1));
- *result = NULL;
- n_results = 0;
-
- /* more specific matches first */
- for (p = langtag_list; p && *p; p++) {
- do_append = 0;
- for (q = locale_set; q && *q; q++) {
- if (g11n_langtag_matches_locale(*p, *q) == 2) {
- do_append = 1;
- for (r = result; (r - result) <=
- MIN(n_locales, n_langtags); r++) {
- if (!*r)
- break;
- if (strcmp(*q, *r) == 0) {
- do_append = 0;
- break;
- }
- }
- if (do_append && n_results < max_results) {
- result[n_results++] = xstrdup(*q);
- result[n_results] = NULL;
- }
- break;
- }
- }
- }
-
- for (p = langtag_list; p && *p; p++) {
- do_append = 0;
- for (q = locale_set; q && *q; q++) {
- if (g11n_langtag_matches_locale(*p, *q) == 1) {
- do_append = 1;
- for (r = result; (r - result) <=
- MIN(n_locales, n_langtags); r++) {
- if (!*r)
- break;
- if (strcmp(*q, *r) == 0) {
- do_append = 0;
- break;
- }
- }
- if (do_append && n_results < max_results) {
- result[n_results++] = xstrdup(*q);
- result[n_results] = NULL;
- }
- break;
- }
- }
- }
-
- xfree_split_list(langtag_list);
-
- return (result);
-}
-
-char *
-g11n_srvr_locale_negotiate(char *clnt_langtags, char **srvr_locales)
-{
- char **results, **locales, *result = NULL;
-
- if (srvr_locales == NULL)
- locales = g11n_getlocales();
- else
- locales = srvr_locales;
-
- if ((results = g11n_langtag_set_locale_set_intersect(clnt_langtags,
- locales)) == NULL)
- goto err;
-
- if (*results != NULL)
- result = xstrdup(*results);
-
- xfree_split_list(results);
-
-err:
- if (locales != NULL && locales != srvr_locales)
- g11n_freelist(locales);
- return (result);
-}
-
-/*
- * Functions for converting to UTF-8 from the local codeset and
- * converting from UTF-8 to the local codeset.
- *
- * The error_str parameter is an pointer to a char variable where to
- * store a string suitable for use with error() or fatal() or friends.
- * It is also used for an error indicator when NULL is returned.
- *
- * If conversion isn't necessary, *error_str is set to NULL, and
- * NULL is returned.
- * If conversion error occured, *error_str points to an error message,
- * and NULL is returned.
- */
-char *
-g11n_convert_from_utf8(const char *str, uint_t *lenp, char **error_str)
-{
- static char *last_codeset;
- static iconv_t cd = (iconv_t)-1;
- char *codeset;
-
- *error_str = NULL;
-
- codeset = nl_langinfo(CODESET);
-
- if (strcmp(codeset, "UTF-8") == 0)
- return (NULL);
-
- if (last_codeset == NULL || strcmp(codeset, last_codeset) != 0) {
- if (last_codeset != NULL) {
- xfree(last_codeset);
- last_codeset = NULL;
- }
- if (cd != (iconv_t)-1)
- (void) iconv_close(cd);
-
- if ((cd = iconv_open(codeset, "UTF-8")) == (iconv_t)-1) {
- *error_str = gettext("Cannot convert UTF-8 "
- "strings to the local codeset");
- return (NULL);
- }
- last_codeset = xstrdup(codeset);
- }
- return (do_iconv(cd, str, lenp, error_str));
-}
-
-char *
-g11n_convert_to_utf8(const char *str, uint_t *lenp,
- int native, char **error_str)
-{
- static char *last_codeset;
- static iconv_t cd = (iconv_t)-1;
- char *codeset;
-
- *error_str = NULL;
-
- if (native)
- codeset = native_codeset;
- else
- codeset = nl_langinfo(CODESET);
-
- if (strcmp(codeset, "UTF-8") == 0)
- return (NULL);
-
- if (last_codeset == NULL || strcmp(codeset, last_codeset) != 0) {
- if (last_codeset != NULL) {
- xfree(last_codeset);
- last_codeset = NULL;
- }
- if (cd != (iconv_t)-1)
- (void) iconv_close(cd);
-
- if ((cd = iconv_open("UTF-8", codeset)) == (iconv_t)-1) {
- *error_str = gettext("Cannot convert the "
- "local codeset strings to UTF-8");
- return (NULL);
- }
- last_codeset = xstrdup(codeset);
- }
- return (do_iconv(cd, str, lenp, error_str));
-}
-
-/*
- * Wrapper around iconv()
- *
- * The caller is responsible for freeing the result. NULL is returned when
- * (errno && errno != E2BIG) (i.e., EILSEQ, EINVAL, EBADF).
- * The caller must ensure that the input string isn't NULL pointer.
- */
-static char *
-do_iconv(iconv_t cd, const char *str, uint_t *lenp, char **err_str)
-{
- int ilen, olen;
- size_t ileft, oleft;
- char *ostr, *optr;
- const char *istr;
-
- ilen = *lenp;
- olen = ilen + 1;
-
- ostr = NULL;
- for (;;) {
- olen *= 2;
- oleft = olen;
- ostr = optr = xrealloc(ostr, olen);
- istr = (const char *)str;
- if ((ileft = ilen) == 0)
- break;
-
- if (iconv(cd, &istr, &ileft, &optr, &oleft) != (size_t)-1) {
- /* success: generate reset sequence */
- if (iconv(cd, NULL, NULL,
- &optr, &oleft) == (size_t)-1 && errno == E2BIG) {
- continue;
- }
- break;
- }
- /* failed */
- if (errno != E2BIG) {
- oleft = olen;
- (void) iconv(cd, NULL, NULL, &ostr, &oleft);
- xfree(ostr);
- *err_str = gettext("Codeset conversion failed");
- return (NULL);
- }
- }
- olen = optr - ostr;
- optr = xmalloc(olen + 1);
- (void) memcpy(optr, ostr, olen);
- xfree(ostr);
-
- optr[olen] = '\0';
- *lenp = olen;
-
- return (optr);
-}
-
-/*
- * A filter for output string. Control and unprintable characters
- * are converted into visible form (eg "\ooo").
- */
-char *
-g11n_filter_string(char *s)
-{
- int mb_cur_max = MB_CUR_MAX;
- int mblen, len;
- char *os = s;
- wchar_t wc;
- char *obuf, *op;
-
- /* all character may be converted into the form of \ooo */
- obuf = op = xmalloc(strlen(s) * 4 + 1);
-
- while (*s != '\0') {
- mblen = mbtowc(&wc, s, mb_cur_max);
- if (mblen <= 0) {
- mblen = 1;
- wc = (unsigned char)*s;
- }
- if (!iswprint(wc) &&
- wc != L'\n' && wc != L'\r' && wc != L'\t') {
- /*
- * control chars which need to be replaced
- * with safe character sequence.
- */
- while (mblen != 0) {
- op += sprintf(op, "\\%03o",
- (unsigned char)*s++);
- mblen--;
- }
- } else {
- while (mblen != 0) {
- *op++ = *s++;
- mblen--;
- }
- }
- }
- *op = '\0';
- len = op - obuf + 1;
- op = xrealloc(os, len);
- (void) memcpy(op, obuf, len);
- xfree(obuf);
- return (op);
-}
-
-/*
- * Once we negotiated with a langtag, server need to map it to a system
- * locale. That is done based on the locale supported on the server side.
- * We know (with the locale supported on Solaris) how the langtag is
- * mapped to. However, from the client point of view, there is no way to
- * know exactly what locale(encoding) will be used.
- *
- * With the bug fix of SSH_BUG_STRING_ENCODING, it is guaranteed that the
- * UTF-8 characters always come over the wire, so it is no longer the problem
- * as long as both side has the bug fix. However if the server side doesn't
- * have the fix, client can't safely perform the code conversion since the
- * incoming character encoding is unknown.
- *
- * To alleviate this situation, we take an empirical approach to find
- * encoding from langtag.
- *
- * If langtag has a subtag, we can directly map the langtag to UTF-8 locale
- * (eg en-US can be mapped to en_US.UTF-8) with a few exceptions.
- * Certain xx_YY locales don't support UTF-8 encoding (probably due to lack
- * of L10N support ..). Those are:
- *
- * no_NO, no_NY, sr_SP, sr_YU
- *
- * They all use ISO8859-X encoding.
- *
- * For those "xx" langtags, some of them can be mapped to "xx.UTF-8",
- * but others cannot. So we need to use the "xx" as the locale name.
- * Those locales are:
- *
- * ar, ca, cs, da, et, fi, he, hu, ja, lt, lv, nl, no, pt, sh, th, tr
- *
- * Their encoding vary. They could be ISO8859-X or EUC or something else.
- * So we don't perform code conversion for these langtags.
- */
-static const char *non_utf8_langtag[] = {
- "no-NO", "no-NY", "sr-SP", "sr-YU",
- "ar", "ca", "cs", "da", "et", "fi", "he", "hu", "ja",
- "lt", "lv", "nl", "no", "pt", "sh", "th", "tr", NULL};
-
-void
-g11n_test_langtag(const char *lang, int server)
-{
- const char **lp;
-
- if (datafellows & SSH_BUG_LOCALES_NOT_LANGTAGS) {
- /*
- * We negotiated with real locale name (not lang tag).
- * We shouldn't expect UTF-8, thus shouldn't do code
- * conversion.
- */
- datafellows |= SSH_BUG_STRING_ENCODING;
- return;
- }
-
- if (datafellows & SSH_BUG_STRING_ENCODING) {
- if (server) {
- /*
- * Whatever bug exists in the client side, server
- * side has nothing to do, since server has no way
- * to know what actual encoding is used on the client
- * side. For example, even if we negotiated with
- * en_US, client locale could be en_US.ISO8859-X or
- * en_US.UTF-8.
- */
- return;
- }
- /*
- * We are on the client side. We'll check with known
- * locales to see if non-UTF8 characters could come in.
- */
- for (lp = non_utf8_langtag; *lp != NULL; lp++) {
- if (strcmp(lang, *lp) == 0)
- break;
- }
- if (*lp == NULL) {
- debug2("Server is expected to use UTF-8 locale");
- datafellows &= ~SSH_BUG_STRING_ENCODING;
- } else {
- /*
- * Server is expected to use non-UTF8 encoding.
- */
- debug2("Enforcing no code conversion: %s", lang);
- }
- }
-}
-
-/*
- * Free all strings in the list and then free the list itself. We know that the
- * list ends with a NULL pointer.
- */
-void
-g11n_freelist(char **list)
-{
- int i = 0;
-
- while (list[i] != NULL) {
- xfree(list[i]);
- i++;
- }
-
- xfree(list);
-}
diff --git a/usr/src/cmd/ssh/libssh/common/hostfile.c b/usr/src/cmd/ssh/libssh/common/hostfile.c
deleted file mode 100644
index f71463a973..0000000000
--- a/usr/src/cmd/ssh/libssh/common/hostfile.c
+++ /dev/null
@@ -1,357 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Functions for manipulating the known hosts files.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- *
- *
- * Copyright (c) 1999, 2000 Markus Friedl. All rights reserved.
- * Copyright (c) 1999 Niels Provos. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* $OpenBSD: hostfile.c,v 1.45 2006/08/03 03:34:42 deraadt Exp $ */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "includes.h"
-
-#include <openssl/hmac.h>
-#include <openssl/sha.h>
-
-#include "packet.h"
-#include "xmalloc.h"
-#include "match.h"
-#include "key.h"
-#include "hostfile.h"
-#include "log.h"
-
-/*
- * Format of a hashed hostname is <MAGIC><SALT>|<HASHED_HOSTNAME>. <MAGIC> is
- * "|1|". As in non-hashed hostnames this whole string is then followed by a
- * space, a key type and the key (which is out of scope of this function).
- *
- * Example what can be in 's':
- *
- * |1|t17NtsuXSLwP0H0eYdd8vJeNakM=|9XFVPh3jZUrfY6YCWn8Ua5eGZtA=
- */
-static int
-extract_salt(const char *s, u_int l, char *salt, size_t salt_len)
-{
- char *p;
- u_char *b64salt;
- u_int b64len;
- int ret;
-
- if (l < sizeof(HASH_MAGIC) - 1) {
- debug2("extract_salt: string too short");
- return (-1);
- }
- if (strncmp(s, HASH_MAGIC, sizeof(HASH_MAGIC) - 1) != 0) {
- debug2("extract_salt: invalid magic identifier");
- return (-1);
- }
- s += sizeof(HASH_MAGIC) - 1;
- l -= sizeof(HASH_MAGIC) - 1;
- if ((p = memchr(s, HASH_DELIM, l)) == NULL) {
- debug2("extract_salt: missing salt termination character");
- return (-1);
- }
-
- b64len = p - s;
- /* Sanity check */
- if (b64len == 0 || b64len > 1024) {
- debug2("extract_salt: bad encoded salt length %u", b64len);
- return (-1);
- }
- b64salt = xmalloc(1 + b64len);
- memcpy(b64salt, s, b64len);
- b64salt[b64len] = '\0';
-
- ret = __b64_pton(b64salt, (u_char *) salt, salt_len);
- xfree(b64salt);
- if (ret == -1) {
- debug2("extract_salt: salt decode error");
- return (-1);
- }
- if (ret != SHA_DIGEST_LENGTH) {
- debug2("extract_salt: expected salt len %d, got %d",
- SHA_DIGEST_LENGTH, ret);
- return (-1);
- }
-
- return (0);
-}
-
-char *
-host_hash(const char *host, const char *name_from_hostfile, u_int src_len)
-{
- const EVP_MD *md = EVP_sha1();
- HMAC_CTX mac_ctx;
- char salt[256], result[256], uu_salt[512], uu_result[512];
- static char encoded[1024];
- u_int i, len;
-
- len = EVP_MD_size(md);
-
- if (name_from_hostfile == NULL) {
- /* Create new salt */
- for (i = 0; i < len; i++)
- salt[i] = arc4random();
- } else {
- /* Extract salt from known host entry */
- if (extract_salt(name_from_hostfile, src_len, salt,
- sizeof(salt)) == -1)
- return (NULL);
- }
-
- HMAC_Init(&mac_ctx, salt, len, md);
- HMAC_Update(&mac_ctx, (u_char *) host, strlen(host));
- HMAC_Final(&mac_ctx, (u_char *) result, NULL);
- HMAC_cleanup(&mac_ctx);
-
- if (__b64_ntop((u_char *) salt, len, uu_salt, sizeof(uu_salt)) == -1 ||
- __b64_ntop((u_char *) result, len, uu_result, sizeof(uu_result)) == -1)
- fatal("host_hash: __b64_ntop failed");
-
- snprintf(encoded, sizeof(encoded), "%s%s%c%s", HASH_MAGIC, uu_salt,
- HASH_DELIM, uu_result);
-
- return (encoded);
-}
-
-/*
- * Parses an RSA (number of bits, e, n) or DSA key from a string. Moves the
- * pointer over the key. Skips any whitespace at the beginning and at end.
- */
-
-int
-hostfile_read_key(char **cpp, u_int *bitsp, Key *ret)
-{
- char *cp;
-
- /* Skip leading whitespace. */
- for (cp = *cpp; *cp == ' ' || *cp == '\t'; cp++)
- ;
-
- if (key_read(ret, &cp) != 1)
- return 0;
-
- /* Skip trailing whitespace. */
- for (; *cp == ' ' || *cp == '\t'; cp++)
- ;
-
- /* Return results. */
- *cpp = cp;
- *bitsp = key_size(ret);
- return 1;
-}
-
-static int
-hostfile_check_key(int bits, const Key *key, const char *host, const char *filename, int linenum)
-{
- if (key == NULL || key->type != KEY_RSA1 || key->rsa == NULL)
- return 1;
- if (bits != BN_num_bits(key->rsa->n)) {
- log("Warning: %s, line %d: keysize mismatch for host %s: "
- "actual %d vs. announced %d.",
- filename, linenum, host, BN_num_bits(key->rsa->n), bits);
- log("Warning: replace %d with %d in %s, line %d.",
- bits, BN_num_bits(key->rsa->n), filename, linenum);
- }
- return 1;
-}
-
-/*
- * Checks whether the given host (which must be in all lowercase) is already
- * in the list of our known hosts. Returns HOST_OK if the host is known and
- * has the specified key, HOST_NEW if the host is not known, and HOST_CHANGED
- * if the host is known but used to have a different host key.
- *
- * If no 'key' has been specified and a key of type 'keytype' is known
- * for the specified host, then HOST_FOUND is returned.
- */
-
-static HostStatus
-check_host_in_hostfile_by_key_or_type(const char *filename,
- const char *host, const Key *key, int keytype, Key *found, int *numret)
-{
- FILE *f;
- char line[8192];
- int linenum = 0;
- u_int kbits;
- char *cp, *cp2, *hashed_host;
- HostStatus end_return;
-
- debug3("check_host_in_hostfile: filename %s", filename);
-
- /* Open the file containing the list of known hosts. */
- f = fopen(filename, "r");
- if (!f)
- return HOST_NEW;
-
- /*
- * Return value when the loop terminates. This is set to
- * HOST_CHANGED if we have seen a different key for the host and have
- * not found the proper one.
- */
- end_return = HOST_NEW;
-
- /* Go through the file. */
- while (fgets(line, sizeof(line), f)) {
- cp = line;
- linenum++;
-
- /* Skip any leading whitespace, comments and empty lines. */
- for (; *cp == ' ' || *cp == '\t'; cp++)
- ;
- if (!*cp || *cp == '#' || *cp == '\n')
- continue;
-
- /* Find the end of the host name portion. */
- for (cp2 = cp; *cp2 && *cp2 != ' ' && *cp2 != '\t'; cp2++)
- ;
-
- /* Check if the host name matches. */
- if (match_hostname(host, cp, (u_int) (cp2 - cp)) != 1) {
- if (*cp != HASH_DELIM)
- continue;
- hashed_host = host_hash(host, cp, (u_int) (cp2 - cp));
- if (hashed_host == NULL) {
- debug("Invalid hashed host line %d of %s",
- linenum, filename);
- continue;
- }
- if (strncmp(hashed_host, cp, (u_int) (cp2 - cp)) != 0)
- continue;
- }
-
- /* Got a match. Skip host name. */
- cp = cp2;
-
- /*
- * Extract the key from the line. This will skip any leading
- * whitespace. Ignore badly formatted lines.
- */
- if (!hostfile_read_key(&cp, &kbits, found))
- continue;
-
- if (numret != NULL)
- *numret = linenum;
-
- if (key == NULL) {
- /* we found a key of the requested type */
- if (found->type == keytype) {
- fclose(f);
- return HOST_FOUND;
- }
- continue;
- }
-
- if (!hostfile_check_key(kbits, found, host, filename, linenum))
- continue;
-
- /* Check if the current key is the same as the given key. */
- if (key_equal(key, found)) {
- /* Ok, they match. */
- debug3("check_host_in_hostfile: match line %d", linenum);
- fclose(f);
- return HOST_OK;
- }
- /*
- * They do not match. We will continue to go through the
- * file; however, we note that we will not return that it is
- * new.
- */
- end_return = HOST_CHANGED;
- }
- /* Clear variables and close the file. */
- fclose(f);
-
- /*
- * Return either HOST_NEW or HOST_CHANGED, depending on whether we
- * saw a different key for the host.
- */
- return end_return;
-}
-
-HostStatus
-check_host_in_hostfile(const char *filename, const char *host, const Key *key,
- Key *found, int *numret)
-{
- if (key == NULL)
- fatal("no key to look up");
- return (check_host_in_hostfile_by_key_or_type(filename, host, key, 0,
- found, numret));
-}
-
-int
-lookup_key_in_hostfile_by_type(const char *filename, const char *host,
- int keytype, Key *found, int *numret)
-{
- return (check_host_in_hostfile_by_key_or_type(filename, host, NULL,
- keytype, found, numret) == HOST_FOUND);
-}
-
-/*
- * Appends an entry to the host file. Returns false if the entry could not
- * be appended.
- */
-
-int
-add_host_to_hostfile(const char *filename, const char *host, const Key *key,
- int store_hash)
-{
- FILE *f;
- int success = 0;
- char *hashed_host = NULL;
-
- if (key == NULL)
- return 1; /* XXX ? */
- f = fopen(filename, "a");
- if (!f)
- return 0;
-
- if (store_hash) {
- if ((hashed_host = host_hash(host, NULL, 0)) == NULL) {
- error("add_host_to_hostfile: host_hash failed");
- fclose(f);
- return 0;
- }
- }
- fprintf(f, "%s ", store_hash ? hashed_host : host);
-
- if (key_write(key, f)) {
- success = 1;
- } else {
- error("add_host_to_hostfile: saving key in %s failed", filename);
- }
- fprintf(f, "\n");
- fclose(f);
- return success;
-}
diff --git a/usr/src/cmd/ssh/libssh/common/kex.c b/usr/src/cmd/ssh/libssh/common/kex.c
deleted file mode 100644
index 0eb9e780fa..0000000000
--- a/usr/src/cmd/ssh/libssh/common/kex.c
+++ /dev/null
@@ -1,711 +0,0 @@
-/*
- * Copyright (c) 2000, 2001 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: kex.c,v 1.51 2002/06/24 14:55:38 markus Exp $");
-
-#include <locale.h>
-
-#include <openssl/crypto.h>
-
-#include "ssh2.h"
-#include "xmalloc.h"
-#include "buffer.h"
-#include "bufaux.h"
-#include "packet.h"
-#include "compat.h"
-#include "cipher.h"
-#include "kex.h"
-#include "key.h"
-#include "log.h"
-#include "mac.h"
-#include "match.h"
-#include "dispatch.h"
-#include "g11n.h"
-
-#ifdef GSSAPI
-#include "ssh-gss.h"
-#endif
-
-#define KEX_COOKIE_LEN 16
-
-char *session_lang = NULL;
-
-
-/* prototype */
-static void kex_do_hook(Kex *kex);
-static void kex_kexinit_finish(Kex *);
-static void kex_choose_conf(Kex *);
-
-/* put algorithm proposal into buffer */
-static
-void
-kex_prop2buf(Buffer *b, char *proposal[PROPOSAL_MAX])
-{
- int i;
-
- buffer_clear(b);
- /*
- * add a dummy cookie, the cookie will be overwritten by
- * kex_send_kexinit(), each time a kexinit is set
- */
- for (i = 0; i < KEX_COOKIE_LEN; i++)
- buffer_put_char(b, 0);
- for (i = 0; i < PROPOSAL_MAX; i++)
- buffer_put_cstring(b, proposal[i]);
- buffer_put_char(b, 0); /* first_kex_packet_follows */
- buffer_put_int(b, 0); /* uint32 reserved */
-}
-
-/* parse buffer and return algorithm proposal */
-static
-char **
-kex_buf2prop(Buffer *raw, int *first_kex_follows)
-{
- Buffer b;
- int i;
- char **proposal;
-
- proposal = xmalloc(PROPOSAL_MAX * sizeof(char *));
-
- buffer_init(&b);
- buffer_append(&b, buffer_ptr(raw), buffer_len(raw));
- /* skip cookie */
- for (i = 0; i < KEX_COOKIE_LEN; i++)
- buffer_get_char(&b);
- /* extract kex init proposal strings */
- for (i = 0; i < PROPOSAL_MAX; i++) {
- proposal[i] = buffer_get_string(&b,NULL);
- debug2("kex_parse_kexinit: %s", proposal[i]);
- }
- /* first kex follows / reserved */
- i = buffer_get_char(&b);
- if (first_kex_follows != NULL)
- *first_kex_follows = i;
- debug2("kex_parse_kexinit: first_kex_follows %d ", i);
- i = buffer_get_int(&b);
- debug2("kex_parse_kexinit: reserved %d ", i);
- buffer_free(&b);
- return proposal;
-}
-
-static
-void
-kex_prop_free(char **proposal)
-{
- int i;
-
- for (i = 0; i < PROPOSAL_MAX; i++)
- xfree(proposal[i]);
- xfree(proposal);
-}
-
-static void
-kex_protocol_error(int type, u_int32_t seq, void *ctxt)
-{
- error("Hm, kex protocol error: type %d seq %u", type, seq);
-}
-
-static void
-kex_reset_dispatch(void)
-{
-#ifdef ALTPRIVSEP
- /* unprivileged sshd has a kex packet handler that must not be reset */
- debug3("kex_reset_dispatch -- should we dispatch_set(KEXINIT) here? %d && !%d",
- packet_is_server(), packet_is_monitor());
- if (packet_is_server() && !packet_is_monitor()) {
- debug3("kex_reset_dispatch -- skipping dispatch_set(KEXINIT) in unpriv proc");
- return;
- }
-#endif /* ALTPRIVSEP */
-
- dispatch_range(SSH2_MSG_TRANSPORT_MIN,
- SSH2_MSG_TRANSPORT_MAX, &kex_protocol_error);
- dispatch_set(SSH2_MSG_KEXINIT, &kex_input_kexinit);
-}
-
-void
-kex_finish(Kex *kex)
-{
- kex_reset_dispatch();
-
- packet_start(SSH2_MSG_NEWKEYS);
- packet_send();
- /* packet_write_wait(); */
- debug("SSH2_MSG_NEWKEYS sent");
-
-#ifdef ALTPRIVSEP
- if (packet_is_monitor())
- goto skip_newkeys;
-#endif /* ALTPRIVSEP */
- debug("expecting SSH2_MSG_NEWKEYS");
- packet_read_expect(SSH2_MSG_NEWKEYS);
- packet_check_eom();
- debug("SSH2_MSG_NEWKEYS received");
-#ifdef ALTPRIVSEP
-skip_newkeys:
-#endif /* ALTPRIVSEP */
-
- kex->done = 1;
- kex->initial_kex_done = 1; /* never to be cleared once set */
- buffer_clear(&kex->peer);
- /* buffer_clear(&kex->my); */
- kex->flags &= ~KEX_INIT_SENT;
- xfree(kex->name);
- kex->name = NULL;
-}
-
-void
-kex_send_kexinit(Kex *kex)
-{
- u_int32_t rand = 0;
- u_char *cookie;
- int i;
-
- if (kex == NULL) {
- error("kex_send_kexinit: no kex, cannot rekey");
- return;
- }
- if (kex->flags & KEX_INIT_SENT) {
- debug("KEX_INIT_SENT");
- return;
- }
- kex->done = 0;
-
- /* update my proposal -- e.g., add/remove GSS kexalgs */
- kex_do_hook(kex);
-
- /* generate a random cookie */
- if (buffer_len(&kex->my) < KEX_COOKIE_LEN)
- fatal("kex_send_kexinit: kex proposal too short");
- cookie = buffer_ptr(&kex->my);
- for (i = 0; i < KEX_COOKIE_LEN; i++) {
- if (i % 4 == 0)
- rand = arc4random();
- cookie[i] = rand;
- rand >>= 8;
- }
- packet_start(SSH2_MSG_KEXINIT);
- packet_put_raw(buffer_ptr(&kex->my), buffer_len(&kex->my));
- packet_send();
- debug("SSH2_MSG_KEXINIT sent");
- kex->flags |= KEX_INIT_SENT;
-}
-
-void
-kex_input_kexinit(int type, u_int32_t seq, void *ctxt)
-{
- char *ptr;
- u_int dlen;
- int i;
- Kex *kex = (Kex *)ctxt;
-
- debug("SSH2_MSG_KEXINIT received");
- if (kex == NULL)
- fatal("kex_input_kexinit: no kex, cannot rekey");
-
- ptr = packet_get_raw(&dlen);
- buffer_append(&kex->peer, ptr, dlen);
-
- /* discard packet */
- for (i = 0; i < KEX_COOKIE_LEN; i++)
- packet_get_char();
- for (i = 0; i < PROPOSAL_MAX; i++)
- xfree(packet_get_string(NULL));
- (void) packet_get_char();
- (void) packet_get_int();
- packet_check_eom();
-
- kex_kexinit_finish(kex);
-}
-
-/*
- * This is for GSS keyex, where actual KEX offer can change at rekey
- * time due to credential expiration/renewal...
- */
-static
-void
-kex_do_hook(Kex *kex)
-{
- char **prop;
-
- if (kex->kex_hook == NULL)
- return;
-
- /* Unmarshall my proposal, let the hook modify it, remarshall it */
- prop = kex_buf2prop(&kex->my, NULL);
- buffer_clear(&kex->my);
- (kex->kex_hook)(kex, prop);
- kex_prop2buf(&kex->my, prop);
- kex_prop_free(prop);
-}
-
-/* Initiate the key exchange by sending the SSH2_MSG_KEXINIT message. */
-void
-kex_start(Kex *kex)
-{
- kex_send_kexinit(kex);
- kex_reset_dispatch();
-}
-
-/*
- * Allocate a key exchange structure and populate it with a proposal we are
- * going to use. This function does not start the actual key exchange.
- */
-Kex *
-kex_setup(const char *host, char *proposal[PROPOSAL_MAX], Kex_hook_func hook)
-{
- Kex *kex;
-
- kex = xmalloc(sizeof(*kex));
- memset(kex, 0, sizeof(*kex));
- buffer_init(&kex->peer);
- buffer_init(&kex->my);
-
- kex->kex_hook = hook; /* called by kex_send_kexinit() */
-
- if (host != NULL && *host != '\0')
- kex->serverhost = xstrdup(host);
- else
- kex->server = 1;
-
- kex_prop2buf(&kex->my, proposal);
-
- return kex;
-}
-
-static void
-kex_kexinit_finish(Kex *kex)
-{
- if (!(kex->flags & KEX_INIT_SENT))
- kex_send_kexinit(kex);
-
- kex_choose_conf(kex);
-
- if (kex->kex_type >= 0 && kex->kex_type < KEX_MAX &&
- kex->kex[kex->kex_type] != NULL)
- (kex->kex[kex->kex_type])(kex);
- else
- fatal("Unsupported key exchange %d", kex->kex_type);
-}
-
-static void
-choose_lang(char **lang, char *client, char *server)
-{
- if (datafellows & SSH_BUG_LOCALES_NOT_LANGTAGS)
- *lang = match_list(client, server, NULL);
- else
- *lang = g11n_srvr_locale_negotiate(client, NULL);
-}
-
-/*
- * Make the message clear enough so that if this happens the user can figure out
- * the workaround of changing the Ciphers option.
- */
-#define CLIENT_ERR_MSG \
- "Client and server could not agree on a common cipher:\n" \
- " client: %s\n" \
- " server: %s\n" \
- "\n" \
- "The client cipher list can be controlled using the \"Ciphers\" option, \n" \
- "see ssh_config(4) for more information. The \"-o Ciphers=<cipher-list>\"\n" \
- "option may be used to temporarily override the ciphers the client\n" \
- "offers."
-
-/*
- * The server side message goes to syslogd and we do not want to send multiline
- * messages there. What's more, the server side notification may be shorter
- * since we expect that an administrator will deal with that, not the user.
- */
-#define SERVER_ERR_MSG \
- "Client and server could not agree on a common cipher: client \"%s\", " \
- "server \"%s\". The server cipher list can be controlled using the " \
- "\"Ciphers\" option, see sshd_config(4) for more information."
-
-static void
-choose_enc(int is_server, Enc *enc, char *client, char *server)
-{
- char *name = match_list(client, server, NULL);
-
- if (name == NULL) {
- if (is_server == 1)
- fatal(SERVER_ERR_MSG, client, server);
- else
- fatal(CLIENT_ERR_MSG, client, server);
- }
-
- if ((enc->cipher = cipher_by_name(name)) == NULL)
- fatal("matching cipher is not supported: %s", name);
-
- enc->name = name;
- enc->enabled = 0;
- enc->iv = NULL;
- enc->key = NULL;
- enc->key_len = cipher_keylen(enc->cipher);
- enc->block_size = cipher_blocksize(enc->cipher);
-}
-
-static void
-choose_mac(Mac *mac, char *client, char *server)
-{
- char *name = match_list(client, server, NULL);
- if (name == NULL)
- fatal("no matching mac found: client %s server %s",
- client, server);
- if (mac_setup(mac, name) < 0)
- fatal("unsupported mac %s", name);
- /* truncate the key */
- if (datafellows & SSH_BUG_HMAC)
- mac->key_len = 16;
- mac->name = name;
- mac->key = NULL;
- mac->enabled = 0;
-}
-
-static void
-choose_comp(Comp *comp, char *client, char *server)
-{
- char *name = match_list(client, server, NULL);
- if (name == NULL)
- fatal("no matching comp found: client %s server %s", client, server);
- if (strcmp(name, "zlib") == 0) {
- comp->type = 1;
- } else if (strcmp(name, "none") == 0) {
- comp->type = 0;
- } else {
- fatal("unsupported comp %s", name);
- }
- comp->name = name;
-}
-
-static void
-choose_kex(Kex *k, char *client, char *server)
-{
- k->name = match_list(client, server, NULL);
- if (k->name == NULL)
- fatal("no common kex alg: client '%s', server '%s'", client,
- server);
- /* XXX Finish 3.6/7 merge of kex stuff -- choose_kex() done */
- if (strcmp(k->name, KEX_DH1) == 0) {
- k->kex_type = KEX_DH_GRP1_SHA1;
- } else if (strcmp(k->name, KEX_DHGEX) == 0) {
- k->kex_type = KEX_DH_GEX_SHA1;
-#ifdef GSSAPI
- } else if (strncmp(k->name, KEX_GSS_SHA1, sizeof(KEX_GSS_SHA1)-1) == 0) {
- k->kex_type = KEX_GSS_GRP1_SHA1;
-#endif
- } else
- fatal("bad kex alg %s", k->name);
-}
-
-static void
-choose_hostkeyalg(Kex *k, char *client, char *server)
-{
- char *hostkeyalg = match_list(client, server, NULL);
- if (hostkeyalg == NULL)
- fatal("no hostkey alg");
- k->hostkey_type = key_type_from_name(hostkeyalg);
- if (k->hostkey_type == KEY_UNSPEC)
- fatal("bad hostkey alg '%s'", hostkeyalg);
- xfree(hostkeyalg);
-}
-
-static int
-proposals_match(char *my[PROPOSAL_MAX], char *peer[PROPOSAL_MAX])
-{
- static int check[] = {
- PROPOSAL_KEX_ALGS, PROPOSAL_SERVER_HOST_KEY_ALGS, -1
- };
- int *idx;
- char *p;
-
- for (idx = &check[0]; *idx != -1; idx++) {
- if ((p = strchr(my[*idx], ',')) != NULL)
- *p = '\0';
- if ((p = strchr(peer[*idx], ',')) != NULL)
- *p = '\0';
- if (strcmp(my[*idx], peer[*idx]) != 0) {
- debug2("proposal mismatch: my %s peer %s",
- my[*idx], peer[*idx]);
- return (0);
- }
- }
- debug2("proposals match");
- return (1);
-}
-
-static void
-kex_choose_conf(Kex *kex)
-{
- Newkeys *newkeys;
- char **my, **peer;
- char **cprop, **sprop;
- char *p_langs_c2s, *p_langs_s2c; /* peer's langs */
- char *plangs = NULL; /* peer's langs*/
- char *mlangs = NULL; /* my langs */
- int nenc, nmac, ncomp;
- int mode;
- int ctos; /* direction: if true client-to-server */
- int need;
- int first_kex_follows, type;
-
- my = kex_buf2prop(&kex->my, NULL);
- peer = kex_buf2prop(&kex->peer, &first_kex_follows);
-
- if (kex->server) {
- cprop=peer;
- sprop=my;
- } else {
- cprop=my;
- sprop=peer;
- }
-
- /* Algorithm Negotiation */
- for (mode = 0; mode < MODE_MAX; mode++) {
- newkeys = xmalloc(sizeof(*newkeys));
- memset(newkeys, 0, sizeof(*newkeys));
- kex->newkeys[mode] = newkeys;
- ctos = (!kex->server && mode == MODE_OUT) || (kex->server && mode == MODE_IN);
- nenc = ctos ? PROPOSAL_ENC_ALGS_CTOS : PROPOSAL_ENC_ALGS_STOC;
- nmac = ctos ? PROPOSAL_MAC_ALGS_CTOS : PROPOSAL_MAC_ALGS_STOC;
- ncomp = ctos ? PROPOSAL_COMP_ALGS_CTOS : PROPOSAL_COMP_ALGS_STOC;
- choose_enc(kex->server, &newkeys->enc, cprop[nenc], sprop[nenc]);
- choose_mac(&newkeys->mac, cprop[nmac], sprop[nmac]);
- choose_comp(&newkeys->comp, cprop[ncomp], sprop[ncomp]);
- debug("kex: %s %s %s %s",
- ctos ? "client->server" : "server->client",
- newkeys->enc.name,
- newkeys->mac.name,
- newkeys->comp.name);
- }
- choose_kex(kex, cprop[PROPOSAL_KEX_ALGS], sprop[PROPOSAL_KEX_ALGS]);
- choose_hostkeyalg(kex, cprop[PROPOSAL_SERVER_HOST_KEY_ALGS],
- sprop[PROPOSAL_SERVER_HOST_KEY_ALGS]);
- need = 0;
- for (mode = 0; mode < MODE_MAX; mode++) {
- newkeys = kex->newkeys[mode];
- if (need < newkeys->enc.key_len)
- need = newkeys->enc.key_len;
- if (need < newkeys->enc.block_size)
- need = newkeys->enc.block_size;
- if (need < newkeys->mac.key_len)
- need = newkeys->mac.key_len;
- }
- /* XXX need runden? */
- kex->we_need = need;
-
- /* ignore the next message if the proposals do not match */
- if (first_kex_follows && !proposals_match(my, peer) &&
- !(datafellows & SSH_BUG_FIRSTKEX)) {
- type = packet_read();
- debug2("skipping next packet (type %u)", type);
- }
-
- /* Language/locale negotiation -- not worth doing on re-key */
-
- if (!kex->initial_kex_done) {
- p_langs_c2s = peer[PROPOSAL_LANG_CTOS];
- p_langs_s2c = peer[PROPOSAL_LANG_STOC];
- debug("Peer sent proposed langtags, ctos: %s", p_langs_c2s);
- debug("Peer sent proposed langtags, stoc: %s", p_langs_s2c);
- plangs = NULL;
-
- /* We propose the same langs for each protocol direction */
- mlangs = my[PROPOSAL_LANG_STOC];
- debug("We proposed langtags, ctos: %s", my[PROPOSAL_LANG_CTOS]);
- debug("We proposed langtags, stoc: %s", mlangs);
-
- /*
- * Why oh why did they bother with negotiating langs for
- * each protocol direction?!
- *
- * The semantics of this are vaguely specified, but one can
- * imagine using one language (locale) for the whole session and
- * a different one for message localization (e.g., 'en_US.UTF-8'
- * overall and 'fr' for messages). Weird? Maybe. But lang
- * tags don't include codeset info, like locales do...
- *
- * So, server-side we want:
- * - setlocale(LC_ALL, c2s_locale);
- * and
- * - setlocale(LC_MESSAGES, s2c_locale);
- *
- * Client-side we don't really care. But we could do:
- *
- * - when very verbose, tell the use what lang the server's
- * messages are in, if left out in the protocol
- * - when sending messages to the server, and if applicable, we
- * can localize them according to the language negotiated for
- * that direction.
- *
- * But for now we do nothing on the client side.
- */
- if ((p_langs_c2s && *p_langs_c2s) && !(p_langs_s2c && *p_langs_s2c))
- plangs = p_langs_c2s;
- else if ((p_langs_s2c && *p_langs_s2c) && !(p_langs_c2s && *p_langs_c2s))
- plangs = p_langs_s2c;
- else
- plangs = p_langs_c2s;
-
- if (kex->server) {
- if (plangs && mlangs && *plangs && *mlangs) {
- char *locale;
-
- g11n_test_langtag(plangs, 1);
-
- choose_lang(&locale, plangs, mlangs);
- if (locale) {
- g11n_setlocale(LC_ALL, locale);
- debug("Negotiated main locale: %s", locale);
- packet_send_debug("Negotiated main locale: %s", locale);
- xfree(locale);
- }
- if (plangs != p_langs_s2c &&
- p_langs_s2c && *p_langs_s2c) {
- choose_lang(&locale, p_langs_s2c, mlangs);
- if (locale) {
- g11n_setlocale(LC_MESSAGES, locale);
- debug("Negotiated messages locale: %s", locale);
- packet_send_debug("Negotiated "
- "messages locale: %s", locale);
- xfree(locale);
- }
- }
- }
- }
- else {
- if (plangs && mlangs && *plangs && *mlangs &&
- !(datafellows & SSH_BUG_LOCALES_NOT_LANGTAGS)) {
- char *lang;
- lang = g11n_clnt_langtag_negotiate(mlangs, plangs);
- if (lang) {
- session_lang = lang;
- debug("Negotiated lang: %s", lang);
- g11n_test_langtag(lang, 0);
- }
- }
- }
- }
-
- kex_prop_free(my);
- kex_prop_free(peer);
-}
-
-static u_char *
-derive_key(Kex *kex, int id, int need, u_char *hash, BIGNUM *shared_secret)
-{
- Buffer b;
- const EVP_MD *evp_md = EVP_sha1();
- EVP_MD_CTX md;
- char c = id;
- int have;
- int mdsz = EVP_MD_size(evp_md);
- u_char *digest = xmalloc(roundup(need, mdsz));
-
- buffer_init(&b);
- buffer_put_bignum2(&b, shared_secret);
-
- /* K1 = HASH(K || H || "A" || session_id) */
- EVP_DigestInit(&md, evp_md);
- if (!(datafellows & SSH_BUG_DERIVEKEY))
- EVP_DigestUpdate(&md, buffer_ptr(&b), buffer_len(&b));
- EVP_DigestUpdate(&md, hash, mdsz);
- EVP_DigestUpdate(&md, &c, 1);
- EVP_DigestUpdate(&md, kex->session_id, kex->session_id_len);
- EVP_DigestFinal(&md, digest, NULL);
-
- /*
- * expand key:
- * Kn = HASH(K || H || K1 || K2 || ... || Kn-1)
- * Key = K1 || K2 || ... || Kn
- */
- for (have = mdsz; need > have; have += mdsz) {
- EVP_DigestInit(&md, evp_md);
- if (!(datafellows & SSH_BUG_DERIVEKEY))
- EVP_DigestUpdate(&md, buffer_ptr(&b), buffer_len(&b));
- EVP_DigestUpdate(&md, hash, mdsz);
- EVP_DigestUpdate(&md, digest, have);
- EVP_DigestFinal(&md, digest + have, NULL);
- }
- buffer_free(&b);
-#ifdef DEBUG_KEX
- fprintf(stderr, "key '%c'== ", c);
- dump_digest("key", digest, need);
-#endif
- return digest;
-}
-
-Newkeys *current_keys[MODE_MAX];
-
-#define NKEYS 6
-void
-kex_derive_keys(Kex *kex, u_char *hash, BIGNUM *shared_secret)
-{
- u_char *keys[NKEYS];
- int i, mode, ctos;
-
- for (i = 0; i < NKEYS; i++)
- keys[i] = derive_key(kex, 'A'+i, kex->we_need, hash, shared_secret);
-
- debug2("kex_derive_keys");
- for (mode = 0; mode < MODE_MAX; mode++) {
- current_keys[mode] = kex->newkeys[mode];
- kex->newkeys[mode] = NULL;
- ctos = (!kex->server && mode == MODE_OUT) || (kex->server && mode == MODE_IN);
- current_keys[mode]->enc.iv = keys[ctos ? 0 : 1];
- current_keys[mode]->enc.key = keys[ctos ? 2 : 3];
- current_keys[mode]->mac.key = keys[ctos ? 4 : 5];
- }
-}
-
-Newkeys *
-kex_get_newkeys(int mode)
-{
- Newkeys *ret;
-
- ret = current_keys[mode];
- current_keys[mode] = NULL;
- return ret;
-}
-
-#if defined(DEBUG_KEX) || defined(DEBUG_KEXDH)
-void
-dump_digest(char *msg, u_char *digest, int len)
-{
- int i;
-
- fprintf(stderr, "%s\n", msg);
- for (i = 0; i< len; i++) {
- fprintf(stderr, "%02x", digest[i]);
- if (i%32 == 31)
- fprintf(stderr, "\n");
- else if (i%8 == 7)
- fprintf(stderr, " ");
- }
- fprintf(stderr, "\n");
-}
-#endif
diff --git a/usr/src/cmd/ssh/libssh/common/kexdh.c b/usr/src/cmd/ssh/libssh/common/kexdh.c
deleted file mode 100644
index b15ecd2c5b..0000000000
--- a/usr/src/cmd/ssh/libssh/common/kexdh.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2001 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: kexdh.c,v 1.18 2002/03/18 17:50:31 provos Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <openssl/crypto.h>
-#include <openssl/bn.h>
-
-#include "xmalloc.h"
-#include "buffer.h"
-#include "bufaux.h"
-#include "key.h"
-#include "kex.h"
-#include "log.h"
-#include "packet.h"
-#include "dh.h"
-#include "ssh2.h"
-
-u_char *
-kex_dh_hash(
- char *client_version_string,
- char *server_version_string,
- char *ckexinit, int ckexinitlen,
- char *skexinit, int skexinitlen,
- u_char *serverhostkeyblob, int sbloblen,
- BIGNUM *client_dh_pub,
- BIGNUM *server_dh_pub,
- BIGNUM *shared_secret)
-{
- Buffer b;
- static u_char digest[EVP_MAX_MD_SIZE];
- const EVP_MD *evp_md = EVP_sha1();
- EVP_MD_CTX md;
-
- buffer_init(&b);
- buffer_put_cstring(&b, client_version_string);
- buffer_put_cstring(&b, server_version_string);
-
- /* kexinit messages: fake header: len+SSH2_MSG_KEXINIT */
- buffer_put_int(&b, ckexinitlen+1);
- buffer_put_char(&b, SSH2_MSG_KEXINIT);
- buffer_append(&b, ckexinit, ckexinitlen);
- buffer_put_int(&b, skexinitlen+1);
- buffer_put_char(&b, SSH2_MSG_KEXINIT);
- buffer_append(&b, skexinit, skexinitlen);
-
- buffer_put_string(&b, serverhostkeyblob, sbloblen);
- buffer_put_bignum2(&b, client_dh_pub);
- buffer_put_bignum2(&b, server_dh_pub);
- buffer_put_bignum2(&b, shared_secret);
-
-#ifdef DEBUG_KEX
- buffer_dump(&b);
-#endif
- EVP_DigestInit(&md, evp_md);
- EVP_DigestUpdate(&md, buffer_ptr(&b), buffer_len(&b));
- EVP_DigestFinal(&md, digest, NULL);
-
- buffer_free(&b);
-
-#ifdef DEBUG_KEX
- dump_digest("hash", digest, EVP_MD_size(evp_md));
-#endif
- return digest;
-}
diff --git a/usr/src/cmd/ssh/libssh/common/kexdhc.c b/usr/src/cmd/ssh/libssh/common/kexdhc.c
deleted file mode 100644
index 1c75f8449f..0000000000
--- a/usr/src/cmd/ssh/libssh/common/kexdhc.c
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (c) 2001 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: kexdh.c,v 1.18 2002/03/18 17:50:31 provos Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <openssl/crypto.h>
-#include <openssl/bn.h>
-
-#include "xmalloc.h"
-#include "buffer.h"
-#include "bufaux.h"
-#include "key.h"
-#include "kex.h"
-#include "log.h"
-#include "packet.h"
-#include "dh.h"
-#include "ssh2.h"
-
-void
-kexdh_client(Kex *kex)
-{
- BIGNUM *dh_server_pub = NULL, *shared_secret = NULL;
- DH *dh;
- Key *server_host_key;
- u_char *server_host_key_blob = NULL, *signature = NULL;
- u_char *kbuf, *hash;
- u_int klen, kout, slen, sbloblen;
-
- /* generate and send 'e', client DH public key */
- dh = dh_new_group1();
- dh_gen_key(dh, kex->we_need * 8);
- packet_start(SSH2_MSG_KEXDH_INIT);
- packet_put_bignum2(dh->pub_key);
- packet_send();
-
- debug("sending SSH2_MSG_KEXDH_INIT");
-#ifdef DEBUG_KEXDH
- DHparams_print_fp(stderr, dh);
- fprintf(stderr, "pub= ");
- BN_print_fp(stderr, dh->pub_key);
- fprintf(stderr, "\n");
-#endif
-
- debug("expecting SSH2_MSG_KEXDH_REPLY");
- packet_read_expect(SSH2_MSG_KEXDH_REPLY);
-
- /* key, cert */
- server_host_key_blob = packet_get_string(&sbloblen);
- server_host_key = key_from_blob(server_host_key_blob, sbloblen);
- if (server_host_key == NULL)
- fatal("cannot decode server_host_key_blob");
- if (server_host_key->type != kex->hostkey_type)
- fatal("type mismatch for decoded server_host_key_blob");
- if (kex->verify_host_key == NULL)
- fatal("cannot verify server_host_key");
- if (kex->verify_host_key(server_host_key) == -1)
- fatal("server_host_key verification failed");
-
- /* DH paramter f, server public DH key */
- if ((dh_server_pub = BN_new()) == NULL)
- fatal("dh_server_pub == NULL");
- packet_get_bignum2(dh_server_pub);
-
-#ifdef DEBUG_KEXDH
- fprintf(stderr, "dh_server_pub= ");
- BN_print_fp(stderr, dh_server_pub);
- fprintf(stderr, "\n");
- debug("bits %d", BN_num_bits(dh_server_pub));
-#endif
-
- /* signed H */
- signature = packet_get_string(&slen);
- packet_check_eom();
-
- if (!dh_pub_is_valid(dh, dh_server_pub))
- packet_disconnect("bad server public DH value");
-
- klen = DH_size(dh);
- kbuf = xmalloc(klen);
- kout = DH_compute_key(kbuf, dh_server_pub, dh);
-#ifdef DEBUG_KEXDH
- dump_digest("shared secret", kbuf, kout);
-#endif
- if ((shared_secret = BN_new()) == NULL)
- fatal("kexdh_client: BN_new failed");
- BN_bin2bn(kbuf, kout, shared_secret);
- memset(kbuf, 0, klen);
- xfree(kbuf);
-
- /* calc and verify H */
- hash = kex_dh_hash(
- kex->client_version_string,
- kex->server_version_string,
- buffer_ptr(&kex->my), buffer_len(&kex->my),
- buffer_ptr(&kex->peer), buffer_len(&kex->peer),
- server_host_key_blob, sbloblen,
- dh->pub_key,
- dh_server_pub,
- shared_secret
- );
- xfree(server_host_key_blob);
- BN_clear_free(dh_server_pub);
- DH_free(dh);
-
- if (key_verify(server_host_key, signature, slen, hash, 20) != 1)
- fatal("key_verify failed for server_host_key");
- key_free(server_host_key);
- xfree(signature);
-
- /* save session id */
- if (kex->session_id == NULL) {
- kex->session_id_len = 20;
- kex->session_id = xmalloc(kex->session_id_len);
- memcpy(kex->session_id, hash, kex->session_id_len);
- }
-
- kex_derive_keys(kex, hash, shared_secret);
- BN_clear_free(shared_secret);
- kex_finish(kex);
-}
diff --git a/usr/src/cmd/ssh/libssh/common/kexdhs.c b/usr/src/cmd/ssh/libssh/common/kexdhs.c
deleted file mode 100644
index 5e14b1333f..0000000000
--- a/usr/src/cmd/ssh/libssh/common/kexdhs.c
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (c) 2001 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: kexdh.c,v 1.18 2002/03/18 17:50:31 provos Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <openssl/crypto.h>
-#include <openssl/bn.h>
-
-#include "xmalloc.h"
-#include "buffer.h"
-#include "bufaux.h"
-#include "key.h"
-#include "kex.h"
-#include "log.h"
-#include "packet.h"
-#include "dh.h"
-#include "ssh2.h"
-
-void
-kexdh_server(Kex *kex)
-{
- BIGNUM *shared_secret = NULL, *dh_client_pub = NULL;
- DH *dh;
- Key *server_host_key;
- u_char *kbuf, *hash, *signature = NULL, *server_host_key_blob = NULL;
- u_int sbloblen, klen, kout;
- u_int slen;
-
- /* generate server DH public key */
- dh = dh_new_group1();
- dh_gen_key(dh, kex->we_need * 8);
-
- debug("expecting SSH2_MSG_KEXDH_INIT");
- packet_read_expect(SSH2_MSG_KEXDH_INIT);
-
- if (kex->load_host_key == NULL)
- fatal("Cannot load hostkey");
- server_host_key = kex->load_host_key(kex->hostkey_type);
- if (server_host_key == NULL)
- fatal("Unsupported hostkey type %d", kex->hostkey_type);
-
- /* key, cert */
- if ((dh_client_pub = BN_new()) == NULL)
- fatal("dh_client_pub == NULL");
- packet_get_bignum2(dh_client_pub);
- packet_check_eom();
-
-#ifdef DEBUG_KEXDH
- fprintf(stderr, "dh_client_pub= ");
- BN_print_fp(stderr, dh_client_pub);
- fprintf(stderr, "\n");
- debug("bits %d", BN_num_bits(dh_client_pub));
-#endif
-
-#ifdef DEBUG_KEXDH
- DHparams_print_fp(stderr, dh);
- fprintf(stderr, "pub= ");
- BN_print_fp(stderr, dh->pub_key);
- fprintf(stderr, "\n");
-#endif
- if (!dh_pub_is_valid(dh, dh_client_pub))
- packet_disconnect("bad client public DH value");
-
- klen = DH_size(dh);
- kbuf = xmalloc(klen);
- kout = DH_compute_key(kbuf, dh_client_pub, dh);
-#ifdef DEBUG_KEXDH
- dump_digest("shared secret", kbuf, kout);
-#endif
- if ((shared_secret = BN_new()) == NULL)
- fatal("kexdh_server: BN_new failed");
- BN_bin2bn(kbuf, kout, shared_secret);
- memset(kbuf, 0, klen);
- xfree(kbuf);
-
- key_to_blob(server_host_key, &server_host_key_blob, &sbloblen);
-
- /* calc H */
- hash = kex_dh_hash(
- kex->client_version_string,
- kex->server_version_string,
- buffer_ptr(&kex->peer), buffer_len(&kex->peer),
- buffer_ptr(&kex->my), buffer_len(&kex->my),
- server_host_key_blob, sbloblen,
- dh_client_pub,
- dh->pub_key,
- shared_secret
- );
- BN_clear_free(dh_client_pub);
-
- /* save session id := H */
- /* XXX hashlen depends on KEX */
- if (kex->session_id == NULL) {
- kex->session_id_len = 20;
- kex->session_id = xmalloc(kex->session_id_len);
- memcpy(kex->session_id, hash, kex->session_id_len);
- }
-
- /* sign H */
- /* XXX hashlen depends on KEX */
- key_sign(server_host_key, &signature, &slen, hash, 20);
-
- /* destroy_sensitive_data(); */
-
- /* send server hostkey, DH pubkey 'f' and singed H */
- packet_start(SSH2_MSG_KEXDH_REPLY);
- packet_put_string(server_host_key_blob, sbloblen);
- packet_put_bignum2(dh->pub_key); /* f */
- packet_put_string(signature, slen);
- packet_send();
-
- xfree(signature);
- xfree(server_host_key_blob);
- /* have keys, free DH */
- DH_free(dh);
-
- kex_derive_keys(kex, hash, shared_secret);
- BN_clear_free(shared_secret);
- kex_finish(kex);
-}
diff --git a/usr/src/cmd/ssh/libssh/common/kexgex.c b/usr/src/cmd/ssh/libssh/common/kexgex.c
deleted file mode 100644
index 3652e1c020..0000000000
--- a/usr/src/cmd/ssh/libssh/common/kexgex.c
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2000 Niels Provos. All rights reserved.
- * Copyright (c) 2001 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: kexgex.c,v 1.22 2002/03/24 17:27:03 stevesk Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <openssl/bn.h>
-
-#include "xmalloc.h"
-#include "buffer.h"
-#include "bufaux.h"
-#include "key.h"
-#include "kex.h"
-#include "log.h"
-#include "packet.h"
-#include "dh.h"
-#include "ssh2.h"
-#include "compat.h"
-
-u_char *
-kexgex_hash(
- char *client_version_string,
- char *server_version_string,
- char *ckexinit, int ckexinitlen,
- char *skexinit, int skexinitlen,
- u_char *serverhostkeyblob, int sbloblen,
- int min, int wantbits, int max, BIGNUM *prime, BIGNUM *gen,
- BIGNUM *client_dh_pub,
- BIGNUM *server_dh_pub,
- BIGNUM *shared_secret)
-{
- Buffer b;
- static u_char digest[EVP_MAX_MD_SIZE];
- const EVP_MD *evp_md = EVP_sha1();
- EVP_MD_CTX md;
-
- buffer_init(&b);
- buffer_put_cstring(&b, client_version_string);
- buffer_put_cstring(&b, server_version_string);
-
- /* kexinit messages: fake header: len+SSH2_MSG_KEXINIT */
- buffer_put_int(&b, ckexinitlen+1);
- buffer_put_char(&b, SSH2_MSG_KEXINIT);
- buffer_append(&b, ckexinit, ckexinitlen);
- buffer_put_int(&b, skexinitlen+1);
- buffer_put_char(&b, SSH2_MSG_KEXINIT);
- buffer_append(&b, skexinit, skexinitlen);
-
- buffer_put_string(&b, serverhostkeyblob, sbloblen);
- if (min == -1 || max == -1)
- buffer_put_int(&b, wantbits);
- else {
- buffer_put_int(&b, min);
- buffer_put_int(&b, wantbits);
- buffer_put_int(&b, max);
- }
- buffer_put_bignum2(&b, prime);
- buffer_put_bignum2(&b, gen);
- buffer_put_bignum2(&b, client_dh_pub);
- buffer_put_bignum2(&b, server_dh_pub);
- buffer_put_bignum2(&b, shared_secret);
-
-#ifdef DEBUG_KEXDH
- buffer_dump(&b);
-#endif
- EVP_DigestInit(&md, evp_md);
- EVP_DigestUpdate(&md, buffer_ptr(&b), buffer_len(&b));
- EVP_DigestFinal(&md, digest, NULL);
-
- buffer_free(&b);
-
-#ifdef DEBUG_KEXDH
- dump_digest("hash", digest, EVP_MD_size(evp_md));
-#endif
- return digest;
-}
diff --git a/usr/src/cmd/ssh/libssh/common/kexgexc.c b/usr/src/cmd/ssh/libssh/common/kexgexc.c
deleted file mode 100644
index 5fddebaed9..0000000000
--- a/usr/src/cmd/ssh/libssh/common/kexgexc.c
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Copyright (c) 2000 Niels Provos. All rights reserved.
- * Copyright (c) 2001 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: kexgex.c,v 1.22 2002/03/24 17:27:03 stevesk Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <openssl/bn.h>
-
-#include "xmalloc.h"
-#include "buffer.h"
-#include "bufaux.h"
-#include "key.h"
-#include "kex.h"
-#include "log.h"
-#include "packet.h"
-#include "dh.h"
-#include "ssh2.h"
-#include "compat.h"
-
-void
-kexgex_client(Kex *kex)
-{
- BIGNUM *dh_server_pub = NULL, *shared_secret = NULL;
- BIGNUM *p = NULL, *g = NULL;
- Key *server_host_key;
- u_char *kbuf, *hash, *signature = NULL, *server_host_key_blob = NULL;
- u_int klen, kout, slen, sbloblen;
- int min, max, nbits;
- DH *dh;
-
- nbits = dh_estimate(kex->we_need * 8);
-
- if (datafellows & SSH_OLD_DHGEX) {
- debug("SSH2_MSG_KEX_DH_GEX_REQUEST_OLD sent");
-
- /* Old GEX request */
- packet_start(SSH2_MSG_KEX_DH_GEX_REQUEST_OLD);
- packet_put_int(nbits);
- min = DH_GRP_MIN;
- max = DH_GRP_MAX;
- } else {
- debug("SSH2_MSG_KEX_DH_GEX_REQUEST sent");
-
- /* New GEX request */
- min = DH_GRP_MIN;
- max = DH_GRP_MAX;
- packet_start(SSH2_MSG_KEX_DH_GEX_REQUEST);
- packet_put_int(min);
- packet_put_int(nbits);
- packet_put_int(max);
- }
-#ifdef DEBUG_KEXDH
- fprintf(stderr, "\nmin = %d, nbits = %d, max = %d\n",
- min, nbits, max);
-#endif
- packet_send();
-
- debug("expecting SSH2_MSG_KEX_DH_GEX_GROUP");
- packet_read_expect(SSH2_MSG_KEX_DH_GEX_GROUP);
-
- if ((p = BN_new()) == NULL)
- fatal("BN_new");
- packet_get_bignum2(p);
- if ((g = BN_new()) == NULL)
- fatal("BN_new");
- packet_get_bignum2(g);
- packet_check_eom();
-
- if (BN_num_bits(p) < min || BN_num_bits(p) > max)
- fatal("DH_GEX group out of range: %d !< %d !< %d",
- min, BN_num_bits(p), max);
-
- dh = dh_new_group(g, p);
- dh_gen_key(dh, kex->we_need * 8);
-
-#ifdef DEBUG_KEXDH
- DHparams_print_fp(stderr, dh);
- fprintf(stderr, "pub= ");
- BN_print_fp(stderr, dh->pub_key);
- fprintf(stderr, "\n");
-#endif
-
- debug("SSH2_MSG_KEX_DH_GEX_INIT sent");
- /* generate and send 'e', client DH public key */
- packet_start(SSH2_MSG_KEX_DH_GEX_INIT);
- packet_put_bignum2(dh->pub_key);
- packet_send();
-
- debug("expecting SSH2_MSG_KEX_DH_GEX_REPLY");
- packet_read_expect(SSH2_MSG_KEX_DH_GEX_REPLY);
-
- /* key, cert */
- server_host_key_blob = packet_get_string(&sbloblen);
- server_host_key = key_from_blob(server_host_key_blob, sbloblen);
- if (server_host_key == NULL)
- fatal("cannot decode server_host_key_blob");
- if (server_host_key->type != kex->hostkey_type)
- fatal("type mismatch for decoded server_host_key_blob");
- if (kex->verify_host_key == NULL)
- fatal("cannot verify server_host_key");
- if (kex->verify_host_key(server_host_key) == -1)
- fatal("server_host_key verification failed");
-
- /* DH paramter f, server public DH key */
- if ((dh_server_pub = BN_new()) == NULL)
- fatal("dh_server_pub == NULL");
- packet_get_bignum2(dh_server_pub);
-
-#ifdef DEBUG_KEXDH
- fprintf(stderr, "dh_server_pub= ");
- BN_print_fp(stderr, dh_server_pub);
- fprintf(stderr, "\n");
- debug("bits %d", BN_num_bits(dh_server_pub));
-#endif
-
- /* signed H */
- signature = packet_get_string(&slen);
- packet_check_eom();
-
- if (!dh_pub_is_valid(dh, dh_server_pub))
- packet_disconnect("bad server public DH value");
-
- klen = DH_size(dh);
- kbuf = xmalloc(klen);
- kout = DH_compute_key(kbuf, dh_server_pub, dh);
-#ifdef DEBUG_KEXDH
- dump_digest("shared secret", kbuf, kout);
-#endif
- if ((shared_secret = BN_new()) == NULL)
- fatal("kexgex_client: BN_new failed");
- BN_bin2bn(kbuf, kout, shared_secret);
- memset(kbuf, 0, klen);
- xfree(kbuf);
-
- if (datafellows & SSH_OLD_DHGEX)
- min = max = -1;
-
- /* calc and verify H */
- hash = kexgex_hash(
- kex->client_version_string,
- kex->server_version_string,
- buffer_ptr(&kex->my), buffer_len(&kex->my),
- buffer_ptr(&kex->peer), buffer_len(&kex->peer),
- server_host_key_blob, sbloblen,
- min, nbits, max,
- dh->p, dh->g,
- dh->pub_key,
- dh_server_pub,
- shared_secret
- );
- /* have keys, free DH */
- DH_free(dh);
- xfree(server_host_key_blob);
- BN_clear_free(dh_server_pub);
-
- if (key_verify(server_host_key, signature, slen, hash, 20) != 1)
- fatal("key_verify failed for server_host_key");
- key_free(server_host_key);
- xfree(signature);
-
- /* save session id */
- if (kex->session_id == NULL) {
- kex->session_id_len = 20;
- kex->session_id = xmalloc(kex->session_id_len);
- memcpy(kex->session_id, hash, kex->session_id_len);
- }
- kex_derive_keys(kex, hash, shared_secret);
- BN_clear_free(shared_secret);
-
- kex_finish(kex);
-}
diff --git a/usr/src/cmd/ssh/libssh/common/kexgexs.c b/usr/src/cmd/ssh/libssh/common/kexgexs.c
deleted file mode 100644
index b0bd4e3272..0000000000
--- a/usr/src/cmd/ssh/libssh/common/kexgexs.c
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Copyright (c) 2000 Niels Provos. All rights reserved.
- * Copyright (c) 2001 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: kexgex.c,v 1.22 2002/03/24 17:27:03 stevesk Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <openssl/bn.h>
-
-#include "xmalloc.h"
-#include "buffer.h"
-#include "bufaux.h"
-#include "key.h"
-#include "kex.h"
-#include "log.h"
-#include "packet.h"
-#include "dh.h"
-#include "ssh2.h"
-#include "compat.h"
-
-void
-kexgex_server(Kex *kex)
-{
- BIGNUM *shared_secret = NULL, *dh_client_pub = NULL;
- Key *server_host_key;
- DH *dh;
- u_char *kbuf, *hash, *signature = NULL, *server_host_key_blob = NULL;
- u_int sbloblen, klen, kout, slen;
- int min = -1, max = -1, nbits = -1, type;
-
- if (kex->load_host_key == NULL)
- fatal("Cannot load hostkey");
- server_host_key = kex->load_host_key(kex->hostkey_type);
- if (server_host_key == NULL)
- fatal("Unsupported hostkey type %d", kex->hostkey_type);
-
- type = packet_read();
- switch (type) {
- case SSH2_MSG_KEX_DH_GEX_REQUEST:
- debug("SSH2_MSG_KEX_DH_GEX_REQUEST received");
- min = packet_get_int();
- nbits = packet_get_int();
- max = packet_get_int();
- min = MAX(DH_GRP_MIN, min);
- max = MIN(DH_GRP_MAX, max);
- break;
- case SSH2_MSG_KEX_DH_GEX_REQUEST_OLD:
- debug("SSH2_MSG_KEX_DH_GEX_REQUEST_OLD received");
- nbits = packet_get_int();
- min = DH_GRP_MIN;
- max = DH_GRP_MAX;
- /* unused for old GEX */
- break;
- default:
- fatal("protocol error during kex, no DH_GEX_REQUEST: %d", type);
- }
- packet_check_eom();
-
- if (max < min || nbits < min || max < nbits)
- fatal("DH_GEX_REQUEST, bad parameters: %d !< %d !< %d",
- min, nbits, max);
-
- /* Contact privileged parent */
- dh = choose_dh(min, nbits, max);
- if (dh == NULL)
- packet_disconnect("Protocol error: no matching DH grp found");
-
- debug("SSH2_MSG_KEX_DH_GEX_GROUP sent");
- packet_start(SSH2_MSG_KEX_DH_GEX_GROUP);
- packet_put_bignum2(dh->p);
- packet_put_bignum2(dh->g);
- packet_send();
-
- /* flush */
- packet_write_wait();
-
- /* Compute our exchange value in parallel with the client */
- dh_gen_key(dh, kex->we_need * 8);
-
- debug("expecting SSH2_MSG_KEX_DH_GEX_INIT");
- packet_read_expect(SSH2_MSG_KEX_DH_GEX_INIT);
-
- /* key, cert */
- if ((dh_client_pub = BN_new()) == NULL)
- fatal("dh_client_pub == NULL");
- packet_get_bignum2(dh_client_pub);
- packet_check_eom();
-
-#ifdef DEBUG_KEXDH
- fprintf(stderr, "dh_client_pub= ");
- BN_print_fp(stderr, dh_client_pub);
- fprintf(stderr, "\n");
- debug("bits %d", BN_num_bits(dh_client_pub));
-#endif
-
-#ifdef DEBUG_KEXDH
- DHparams_print_fp(stderr, dh);
- fprintf(stderr, "pub= ");
- BN_print_fp(stderr, dh->pub_key);
- fprintf(stderr, "\n");
-#endif
- if (!dh_pub_is_valid(dh, dh_client_pub))
- packet_disconnect("bad client public DH value");
-
- klen = DH_size(dh);
- kbuf = xmalloc(klen);
- kout = DH_compute_key(kbuf, dh_client_pub, dh);
-#ifdef DEBUG_KEXDH
- dump_digest("shared secret", kbuf, kout);
-#endif
- if ((shared_secret = BN_new()) == NULL)
- fatal("kexgex_server: BN_new failed");
- BN_bin2bn(kbuf, kout, shared_secret);
- memset(kbuf, 0, klen);
- xfree(kbuf);
-
- key_to_blob(server_host_key, &server_host_key_blob, &sbloblen);
-
- if (type == SSH2_MSG_KEX_DH_GEX_REQUEST_OLD)
- min = max = -1;
-
- /* calc H */ /* XXX depends on 'kex' */
- hash = kexgex_hash(
- kex->client_version_string,
- kex->server_version_string,
- buffer_ptr(&kex->peer), buffer_len(&kex->peer),
- buffer_ptr(&kex->my), buffer_len(&kex->my),
- server_host_key_blob, sbloblen,
- min, nbits, max,
- dh->p, dh->g,
- dh_client_pub,
- dh->pub_key,
- shared_secret
- );
- BN_clear_free(dh_client_pub);
-
- /* save session id := H */
- /* XXX hashlen depends on KEX */
- if (kex->session_id == NULL) {
- kex->session_id_len = 20;
- kex->session_id = xmalloc(kex->session_id_len);
- memcpy(kex->session_id, hash, kex->session_id_len);
- }
-
- /* sign H */
- /* XXX hashlen depends on KEX */
- key_sign(server_host_key, &signature, &slen, hash, 20);
-
- /* destroy_sensitive_data(); */
-
- /* send server hostkey, DH pubkey 'f' and singed H */
- debug("SSH2_MSG_KEX_DH_GEX_REPLY sent");
- packet_start(SSH2_MSG_KEX_DH_GEX_REPLY);
- packet_put_string(server_host_key_blob, sbloblen);
- packet_put_bignum2(dh->pub_key); /* f */
- packet_put_string(signature, slen);
- packet_send();
-
- xfree(signature);
- xfree(server_host_key_blob);
- /* have keys, free DH */
- DH_free(dh);
-
- kex_derive_keys(kex, hash, shared_secret);
- BN_clear_free(shared_secret);
-
- kex_finish(kex);
-}
diff --git a/usr/src/cmd/ssh/libssh/common/kexgssc.c b/usr/src/cmd/ssh/libssh/common/kexgssc.c
deleted file mode 100644
index 60c91ed57b..0000000000
--- a/usr/src/cmd/ssh/libssh/common/kexgssc.c
+++ /dev/null
@@ -1,325 +0,0 @@
-/*
- * Copyright (c) 2001-2003 Simon Wilkinson. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR `AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-
-#include "includes.h"
-
-#ifdef GSSAPI
-
-#include <openssl/crypto.h>
-#include <openssl/bn.h>
-
-#include "xmalloc.h"
-#include "buffer.h"
-#include "bufaux.h"
-#include "kex.h"
-#include "log.h"
-#include "packet.h"
-#include "dh.h"
-#include "canohost.h"
-#include "ssh2.h"
-#include "ssh-gss.h"
-
-extern char *xxx_host;
-
-Gssctxt *xxx_gssctxt;
-
-static void kexgss_verbose_cleanup(void *arg);
-
-void
-kexgss_client(Kex *kex)
-{
- gss_buffer_desc gssbuf, send_tok, recv_tok, msg_tok;
- gss_buffer_t token_ptr;
- gss_OID mech = GSS_C_NULL_OID;
- Gssctxt *ctxt = NULL;
- OM_uint32 maj_status, min_status, smaj_status, smin_status;
- unsigned int klen, kout;
- DH *dh;
- BIGNUM *dh_server_pub = 0;
- BIGNUM *shared_secret = 0;
- Key *server_host_key = NULL;
- unsigned char *kbuf;
- unsigned char *hash;
- unsigned char *server_host_key_blob = NULL;
- char *msg, *lang;
- int type = 0;
- int first = 1;
- uint_t sbloblen = 0;
- uint_t strlen;
-
- /* Map the negotiated kex name to a mech OID */
- ssh_gssapi_oid_of_kexname(kex->name, &mech);
- if (mech == GSS_C_NULL_OID)
- fatal("Couldn't match the negotiated GSS key exchange");
-
- ssh_gssapi_build_ctx(&ctxt, 1, mech);
-
- /* This code should match that in ssh_dh1_client */
-
- /* Step 1 - e is dh->pub_key */
- dh = dh_new_group1();
- dh_gen_key(dh, kex->we_need * 8);
-
- /* This is f, we initialise it now to make life easier */
- dh_server_pub = BN_new();
- if (dh_server_pub == NULL) {
- fatal("dh_server_pub == NULL");
- }
-
- token_ptr = GSS_C_NO_BUFFER;
-
- recv_tok.value = NULL;
- recv_tok.length = 0;
-
- do {
- debug("Calling gss_init_sec_context");
-
- maj_status = ssh_gssapi_init_ctx(ctxt, xxx_host,
- kex->options.gss_deleg_creds, token_ptr, &send_tok);
-
- if (GSS_ERROR(maj_status)) {
- ssh_gssapi_error(ctxt, "performing GSS-API protected "
- "SSHv2 key exchange");
- (void) gss_release_buffer(&min_status, &send_tok);
- packet_disconnect("A GSS-API error occurred during "
- "GSS-API protected SSHv2 key exchange\n");
- }
-
- /* If we've got an old receive buffer get rid of it */
- if (token_ptr != GSS_C_NO_BUFFER) {
- /* We allocated recv_tok */
- xfree(recv_tok.value);
- recv_tok.value = NULL;
- recv_tok.length = 0;
- token_ptr = GSS_C_NO_BUFFER;
- }
-
- if (maj_status == GSS_S_COMPLETE) {
- /* If mutual state flag is not true, kex fails */
- if (!(ctxt->flags & GSS_C_MUTUAL_FLAG)) {
- fatal("Mutual authentication failed");
- }
- /* If integ avail flag is not true kex fails */
- if (!(ctxt->flags & GSS_C_INTEG_FLAG)) {
- fatal("Integrity check failed");
- }
- }
-
- /*
- * If we have data to send, then the last message that we
- * received cannot have been a 'complete'.
- */
- if (send_tok.length != 0) {
- if (first) {
- packet_start(SSH2_MSG_KEXGSS_INIT);
- packet_put_string(send_tok.value,
- send_tok.length);
- packet_put_bignum2(dh->pub_key);
- first = 0;
- } else {
- packet_start(SSH2_MSG_KEXGSS_CONTINUE);
- packet_put_string(send_tok.value,
- send_tok.length);
- }
- (void) gss_release_buffer(&min_status, &send_tok);
- packet_send();
- packet_write_wait();
-
-
- /*
- * If we've sent them data, they'd better be polite and
- * reply.
- */
-
-next_packet:
- /*
- * We need to catch connection closing w/o error
- * tokens or messages so we can tell the user
- * _something_ more useful than "Connection
- * closed by ..."
- *
- * We use a fatal cleanup function as that's
- * all, really, that we can do for now.
- */
- fatal_add_cleanup(kexgss_verbose_cleanup, NULL);
- type = packet_read();
- fatal_remove_cleanup(kexgss_verbose_cleanup, NULL);
- switch (type) {
- case SSH2_MSG_KEXGSS_HOSTKEY:
- debug("Received KEXGSS_HOSTKEY");
- server_host_key_blob =
- packet_get_string(&sbloblen);
- server_host_key =
- key_from_blob(server_host_key_blob,
- sbloblen);
- goto next_packet; /* there MUSt be another */
- break;
- case SSH2_MSG_KEXGSS_CONTINUE:
- debug("Received GSSAPI_CONTINUE");
- if (maj_status == GSS_S_COMPLETE)
- packet_disconnect("Protocol error: "
- "received GSS-API context token "
- "though the context was already "
- "established");
- recv_tok.value = packet_get_string(&strlen);
- recv_tok.length = strlen; /* u_int vs. size_t */
- break;
- case SSH2_MSG_KEXGSS_COMPLETE:
- debug("Received GSSAPI_COMPLETE");
- packet_get_bignum2(dh_server_pub);
- msg_tok.value = packet_get_string(&strlen);
- msg_tok.length = strlen; /* u_int vs. size_t */
-
- /* Is there a token included? */
- if (packet_get_char()) {
- recv_tok.value =
- packet_get_string(&strlen);
- /* u_int/size_t */
- recv_tok.length = strlen;
- }
- if (recv_tok.length > 0 &&
- maj_status == GSS_S_COMPLETE) {
- packet_disconnect("Protocol error: "
- "received GSS-API context token "
- "though the context was already "
- "established");
- } else if (recv_tok.length == 0 &&
- maj_status == GSS_S_CONTINUE_NEEDED) {
- /* No token included */
- packet_disconnect("Protocol error: "
- "did not receive expected "
- "GSS-API context token");
- }
- break;
- case SSH2_MSG_KEXGSS_ERROR:
- smaj_status = packet_get_int();
- smin_status = packet_get_int();
- msg = packet_get_string(NULL);
- lang = packet_get_string(NULL);
- xfree(lang);
- error("Server had a GSS-API error; the "
- "connection will close (%d/%d):\n%s",
- smaj_status, smin_status, msg);
- error("Use the GssKeyEx option to disable "
- "GSS-API key exchange and try again.");
- packet_disconnect("The server had a GSS-API "
- "error during GSS-API protected SSHv2 "
- "key exchange\n");
- break;
- default:
- packet_disconnect("Protocol error: "
- "didn't expect packet type %d", type);
- }
- if (recv_tok.value)
- token_ptr = &recv_tok;
- } else {
- /* No data, and not complete */
- if (maj_status != GSS_S_COMPLETE) {
- fatal("Not complete, and no token output");
- }
- }
- } while (maj_status == GSS_S_CONTINUE_NEEDED);
-
- /*
- * We _must_ have received a COMPLETE message in reply from the
- * server, which will have set dh_server_pub and msg_tok.
- */
- if (type != SSH2_MSG_KEXGSS_COMPLETE)
- fatal("Expected SSH2_MSG_KEXGSS_COMPLETE never arrived");
- if (maj_status != GSS_S_COMPLETE)
- fatal("Internal error in GSS-API protected SSHv2 key exchange");
-
- /* Check f in range [1, p-1] */
- if (!dh_pub_is_valid(dh, dh_server_pub))
- packet_disconnect("bad server public DH value");
-
- /* compute K=f^x mod p */
- klen = DH_size(dh);
- kbuf = xmalloc(klen);
- kout = DH_compute_key(kbuf, dh_server_pub, dh);
-
- shared_secret = BN_new();
- BN_bin2bn(kbuf, kout, shared_secret);
- (void) memset(kbuf, 0, klen);
- xfree(kbuf);
-
- /* The GSS hash is identical to the DH one */
- hash = kex_dh_hash(
- kex->client_version_string,
- kex->server_version_string,
- buffer_ptr(&kex->my), buffer_len(&kex->my),
- buffer_ptr(&kex->peer), buffer_len(&kex->peer),
- server_host_key_blob, sbloblen, /* server host key */
- dh->pub_key, /* e */
- dh_server_pub, /* f */
- shared_secret); /* K */
-
- gssbuf.value = hash;
- gssbuf.length = 20;
-
- /* Verify that H matches the token we just got. */
- if ((maj_status = gss_verify_mic(&min_status, ctxt->context, &gssbuf,
- &msg_tok, NULL))) {
- packet_disconnect("Hash's MIC didn't verify");
- }
-
- if (server_host_key && kex->accept_host_key != NULL)
- (void) kex->accept_host_key(server_host_key);
-
- DH_free(dh);
-
- xxx_gssctxt = ctxt; /* for gss keyex w/ mic userauth */
-
- /* save session id */
- if (kex->session_id == NULL) {
- kex->session_id_len = 20;
- kex->session_id = xmalloc(kex->session_id_len);
- (void) memcpy(kex->session_id, hash, kex->session_id_len);
- }
-
- kex_derive_keys(kex, hash, shared_secret);
- BN_clear_free(shared_secret);
- kex_finish(kex);
-}
-
-/* ARGSUSED */
-static
-void
-kexgss_verbose_cleanup(void *arg)
-{
- error("The GSS-API protected key exchange has failed without "
- "indication\nfrom the server, possibly due to misconfiguration "
- "of the server.");
- error("Use the GssKeyEx option to disable GSS-API key exchange "
- "and try again.");
-}
-
-#endif /* GSSAPI */
diff --git a/usr/src/cmd/ssh/libssh/common/kexgsss.c b/usr/src/cmd/ssh/libssh/common/kexgsss.c
deleted file mode 100644
index 9ee8b630bb..0000000000
--- a/usr/src/cmd/ssh/libssh/common/kexgsss.c
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- * Copyright (c) 2001-2003 Simon Wilkinson. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR `AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "includes.h"
-
-#ifdef GSSAPI
-
-#include <openssl/crypto.h>
-#include <openssl/bn.h>
-
-#include "xmalloc.h"
-#include "buffer.h"
-#include "bufaux.h"
-#include "compat.h"
-#include "kex.h"
-#include "log.h"
-#include "packet.h"
-#include "dh.h"
-#include "ssh2.h"
-#include "ssh-gss.h"
-#include "auth.h"
-
-Gssctxt *xxx_gssctxt;
-extern Authctxt *x_authctxt;
-
-static void kex_gss_send_error(Gssctxt *ctxt);
-
-void
-kexgss_server(Kex *kex)
-{
- OM_uint32 maj_status, min_status;
- gss_buffer_desc gssbuf, send_tok, recv_tok, msg_tok;
- Gssctxt *ctxt = NULL;
- unsigned int klen, kout;
- unsigned int sbloblen = 0;
- unsigned char *kbuf, *hash;
- unsigned char *server_host_key_blob = NULL;
- DH *dh;
- Key *server_host_key = NULL;
- BIGNUM *shared_secret = NULL;
- BIGNUM *dh_client_pub = NULL;
- int type = 0;
- uint_t slen;
- gss_OID oid;
-
- /*
- * Load host key to advertise in a SSH_MSG_KEXGSS_HOSTKEY packet
- * -- unlike KEX_DH/KEX_GEX no host key, no problem since it's
- * the GSS-API that provides for server host authentication.
- */
- if (kex->load_host_key != NULL &&
- !(datafellows & SSH_BUG_GSSKEX_HOSTKEY))
- server_host_key = kex->load_host_key(kex->hostkey_type);
- if (server_host_key != NULL)
- key_to_blob(server_host_key, &server_host_key_blob, &sbloblen);
-
-
- /* Initialise GSSAPI */
-
- ssh_gssapi_oid_of_kexname(kex->name, &oid);
- if (oid == GSS_C_NULL_OID) {
- fatal("Couldn't match the negotiated GSS key exchange");
- }
-
- ssh_gssapi_build_ctx(&xxx_gssctxt, 0, oid);
-
- ctxt = xxx_gssctxt;
-
- do {
- debug("Wait SSH2_MSG_GSSAPI_INIT");
- type = packet_read();
- switch (type) {
- case SSH2_MSG_KEXGSS_INIT:
- if (dh_client_pub != NULL)
- fatal("Received KEXGSS_INIT after "
- "initialising");
- recv_tok.value = packet_get_string(&slen);
- recv_tok.length = slen; /* int vs. size_t */
-
- dh_client_pub = BN_new();
-
- if (dh_client_pub == NULL)
- fatal("dh_client_pub == NULL");
- packet_get_bignum2(dh_client_pub);
-
- /* Send SSH_MSG_KEXGSS_HOSTKEY here, if we want */
- if (sbloblen) {
- packet_start(SSH2_MSG_KEXGSS_HOSTKEY);
- packet_put_string(server_host_key_blob,
- sbloblen);
- packet_send();
- packet_write_wait();
- }
- break;
- case SSH2_MSG_KEXGSS_CONTINUE:
- recv_tok.value = packet_get_string(&slen);
- recv_tok.length = slen; /* int vs. size_t */
- break;
- default:
- packet_disconnect("Protocol error: didn't expect "
- "packet type %d", type);
- }
-
- maj_status = ssh_gssapi_accept_ctx(ctxt, &recv_tok, &send_tok);
-
- xfree(recv_tok.value); /* We allocated this, not gss */
-
- if (dh_client_pub == NULL)
- fatal("No client public key");
-
- if (maj_status == GSS_S_CONTINUE_NEEDED) {
- debug("Sending GSSAPI_CONTINUE");
- packet_start(SSH2_MSG_KEXGSS_CONTINUE);
- packet_put_string(send_tok.value, send_tok.length);
- packet_send();
- packet_write_wait();
- (void) gss_release_buffer(&min_status, &send_tok);
- }
- } while (maj_status == GSS_S_CONTINUE_NEEDED);
-
- if (GSS_ERROR(maj_status)) {
- kex_gss_send_error(ctxt);
- if (send_tok.length > 0) {
- packet_start(SSH2_MSG_KEXGSS_CONTINUE);
- packet_put_string(send_tok.value, send_tok.length);
- packet_send();
- packet_write_wait();
- (void) gss_release_buffer(&min_status, &send_tok);
- }
- fatal("accept_ctx died");
- }
-
- debug("gss_complete");
- if (!(ctxt->flags & GSS_C_MUTUAL_FLAG))
- fatal("Mutual authentication flag wasn't set");
-
- if (!(ctxt->flags & GSS_C_INTEG_FLAG))
- fatal("Integrity flag wasn't set");
-
- dh = dh_new_group1();
- dh_gen_key(dh, kex->we_need * 8);
-
- if (!dh_pub_is_valid(dh, dh_client_pub))
- packet_disconnect("bad client public DH value");
-
- klen = DH_size(dh);
- kbuf = xmalloc(klen);
- kout = DH_compute_key(kbuf, dh_client_pub, dh);
-
- shared_secret = BN_new();
- BN_bin2bn(kbuf, kout, shared_secret);
- (void) memset(kbuf, 0, klen);
- xfree(kbuf);
-
- /* The GSSAPI hash is identical to the Diffie Helman one */
- hash = kex_dh_hash(
- kex->client_version_string,
- kex->server_version_string,
- buffer_ptr(&kex->peer), buffer_len(&kex->peer),
- buffer_ptr(&kex->my), buffer_len(&kex->my),
- server_host_key_blob, sbloblen,
- dh_client_pub,
- dh->pub_key,
- shared_secret);
-
- BN_free(dh_client_pub);
-
- if (kex->session_id == NULL) {
- kex->session_id_len = 20;
- kex->session_id = xmalloc(kex->session_id_len);
- (void) memcpy(kex->session_id, hash, kex->session_id_len);
- } else if (x_authctxt != NULL && x_authctxt->success) {
- ssh_gssapi_storecreds(ctxt, x_authctxt);
- }
-
- /* Should fix kex_dh_hash to output hash length */
- gssbuf.length = 20; /* yes, it's always 20 (SHA-1) */
- gssbuf.value = hash; /* and it's static constant storage */
-
- if (GSS_ERROR(ssh_gssapi_get_mic(ctxt, &gssbuf, &msg_tok))) {
- kex_gss_send_error(ctxt);
- fatal("Couldn't get MIC");
- }
-
- packet_start(SSH2_MSG_KEXGSS_COMPLETE);
- packet_put_bignum2(dh->pub_key);
- packet_put_string((char *)msg_tok.value, msg_tok.length);
- (void) gss_release_buffer(&min_status, &msg_tok);
-
- if (send_tok.length != 0) {
- packet_put_char(1); /* true */
- packet_put_string((char *)send_tok.value, send_tok.length);
- (void) gss_release_buffer(&min_status, &send_tok);
- } else {
- packet_put_char(0); /* false */
- }
- packet_send();
- packet_write_wait();
-
- DH_free(dh);
-
- kex_derive_keys(kex, hash, shared_secret);
- BN_clear_free(shared_secret);
- kex_finish(kex);
-}
-
-static void
-kex_gss_send_error(Gssctxt *ctxt) {
- char *errstr;
- OM_uint32 maj, min;
-
- errstr = ssh_gssapi_last_error(ctxt, &maj, &min);
- if (errstr) {
- packet_start(SSH2_MSG_KEXGSS_ERROR);
- packet_put_int(maj);
- packet_put_int(min);
- packet_put_cstring(errstr);
- packet_put_cstring("");
- packet_send();
- packet_write_wait();
- /* XXX - We should probably log the error locally here */
- xfree(errstr);
- }
-}
-#endif /* GSSAPI */
diff --git a/usr/src/cmd/ssh/libssh/common/key.c b/usr/src/cmd/ssh/libssh/common/key.c
deleted file mode 100644
index f648d3b640..0000000000
--- a/usr/src/cmd/ssh/libssh/common/key.c
+++ /dev/null
@@ -1,876 +0,0 @@
-/*
- * read_bignum():
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- *
- *
- * Copyright (c) 2000, 2001 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#include "includes.h"
-RCSID("$OpenBSD: key.c,v 1.49 2002/09/09 14:54:14 markus Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <openssl/evp.h>
-
-#include "xmalloc.h"
-#include "key.h"
-#include "rsa.h"
-#include "ssh-dss.h"
-#include "ssh-rsa.h"
-#include "uuencode.h"
-#include "buffer.h"
-#include "bufaux.h"
-#include "log.h"
-
-Key *
-key_new(int type)
-{
- Key *k;
- RSA *rsa;
- DSA *dsa;
- k = xmalloc(sizeof(*k));
- k->type = type;
- k->flags = 0;
- k->dsa = NULL;
- k->rsa = NULL;
- switch (k->type) {
- case KEY_RSA1:
- case KEY_RSA:
- if ((rsa = RSA_new()) == NULL)
- fatal("key_new: RSA_new failed");
- if ((rsa->n = BN_new()) == NULL)
- fatal("key_new: BN_new failed");
- if ((rsa->e = BN_new()) == NULL)
- fatal("key_new: BN_new failed");
- k->rsa = rsa;
- break;
- case KEY_DSA:
- if ((dsa = DSA_new()) == NULL)
- fatal("key_new: DSA_new failed");
- if ((dsa->p = BN_new()) == NULL)
- fatal("key_new: BN_new failed");
- if ((dsa->q = BN_new()) == NULL)
- fatal("key_new: BN_new failed");
- if ((dsa->g = BN_new()) == NULL)
- fatal("key_new: BN_new failed");
- if ((dsa->pub_key = BN_new()) == NULL)
- fatal("key_new: BN_new failed");
- k->dsa = dsa;
- break;
- case KEY_UNSPEC:
- break;
- default:
- fatal("key_new: bad key type %d", k->type);
- break;
- }
- return k;
-}
-
-Key *
-key_new_private(int type)
-{
- Key *k = key_new(type);
- switch (k->type) {
- case KEY_RSA1:
- case KEY_RSA:
- if ((k->rsa->d = BN_new()) == NULL)
- fatal("key_new_private: BN_new failed");
- if ((k->rsa->iqmp = BN_new()) == NULL)
- fatal("key_new_private: BN_new failed");
- if ((k->rsa->q = BN_new()) == NULL)
- fatal("key_new_private: BN_new failed");
- if ((k->rsa->p = BN_new()) == NULL)
- fatal("key_new_private: BN_new failed");
- if ((k->rsa->dmq1 = BN_new()) == NULL)
- fatal("key_new_private: BN_new failed");
- if ((k->rsa->dmp1 = BN_new()) == NULL)
- fatal("key_new_private: BN_new failed");
- break;
- case KEY_DSA:
- if ((k->dsa->priv_key = BN_new()) == NULL)
- fatal("key_new_private: BN_new failed");
- break;
- case KEY_UNSPEC:
- break;
- default:
- break;
- }
- return k;
-}
-
-void
-key_free(Key *k)
-{
- switch (k->type) {
- case KEY_RSA1:
- case KEY_RSA:
- if (k->rsa != NULL)
- RSA_free(k->rsa);
- k->rsa = NULL;
- break;
- case KEY_DSA:
- if (k->dsa != NULL)
- DSA_free(k->dsa);
- k->dsa = NULL;
- break;
- case KEY_UNSPEC:
- break;
- default:
- fatal("key_free: bad key type %d", k->type);
- break;
- }
- xfree(k);
-}
-int
-key_equal(const Key *a, const Key *b)
-{
- if (a == NULL || b == NULL || a->type != b->type)
- return 0;
- switch (a->type) {
- case KEY_RSA1:
- case KEY_RSA:
- return a->rsa != NULL && b->rsa != NULL &&
- BN_cmp(a->rsa->e, b->rsa->e) == 0 &&
- BN_cmp(a->rsa->n, b->rsa->n) == 0;
- break;
- case KEY_DSA:
- return a->dsa != NULL && b->dsa != NULL &&
- BN_cmp(a->dsa->p, b->dsa->p) == 0 &&
- BN_cmp(a->dsa->q, b->dsa->q) == 0 &&
- BN_cmp(a->dsa->g, b->dsa->g) == 0 &&
- BN_cmp(a->dsa->pub_key, b->dsa->pub_key) == 0;
- break;
- default:
- fatal("key_equal: bad key type %d", a->type);
- break;
- }
- return 0;
-}
-
-static u_char *
-key_fingerprint_raw(Key *k, enum fp_type dgst_type, u_int *dgst_raw_length)
-{
- const EVP_MD *md = NULL;
- EVP_MD_CTX ctx;
- u_char *blob = NULL;
- u_char *retval = NULL;
- u_int len = 0;
- int nlen, elen;
-
- *dgst_raw_length = 0;
-
- switch (dgst_type) {
- case SSH_FP_MD5:
- md = EVP_md5();
- break;
- case SSH_FP_SHA1:
- md = EVP_sha1();
- break;
- default:
- fatal("key_fingerprint_raw: bad digest type %d",
- dgst_type);
- }
- switch (k->type) {
- case KEY_RSA1:
- nlen = BN_num_bytes(k->rsa->n);
- elen = BN_num_bytes(k->rsa->e);
- len = nlen + elen;
- blob = xmalloc(len);
- BN_bn2bin(k->rsa->n, blob);
- BN_bn2bin(k->rsa->e, blob + nlen);
- break;
- case KEY_DSA:
- case KEY_RSA:
- key_to_blob(k, &blob, &len);
- break;
- case KEY_UNSPEC:
- return retval;
- break;
- default:
- fatal("key_fingerprint_raw: bad key type %d", k->type);
- break;
- }
- if (blob != NULL) {
- retval = xmalloc(EVP_MAX_MD_SIZE);
- EVP_DigestInit(&ctx, md);
- EVP_DigestUpdate(&ctx, blob, len);
- EVP_DigestFinal(&ctx, retval, dgst_raw_length);
- memset(blob, 0, len);
- xfree(blob);
- } else {
- fatal("key_fingerprint_raw: blob is null");
- }
- return retval;
-}
-
-static char *
-key_fingerprint_hex(u_char *dgst_raw, u_int dgst_raw_len)
-{
- char *retval;
- int i;
-
- retval = xmalloc(dgst_raw_len * 3 + 1);
- retval[0] = '\0';
- for (i = 0; i < dgst_raw_len; i++) {
- char hex[4];
- snprintf(hex, sizeof(hex), "%02x:", dgst_raw[i]);
- strlcat(retval, hex, dgst_raw_len * 3);
- }
- retval[(dgst_raw_len * 3) - 1] = '\0';
- return retval;
-}
-
-static char *
-key_fingerprint_bubblebabble(u_char *dgst_raw, u_int dgst_raw_len)
-{
- char vowels[] = { 'a', 'e', 'i', 'o', 'u', 'y' };
- char consonants[] = { 'b', 'c', 'd', 'f', 'g', 'h', 'k', 'l', 'm',
- 'n', 'p', 'r', 's', 't', 'v', 'z', 'x' };
- u_int i, j = 0, rounds, seed = 1;
- char *retval;
-
- rounds = (dgst_raw_len / 2) + 1;
- retval = xmalloc(sizeof(char) * (rounds*6));
- retval[j++] = 'x';
- for (i = 0; i < rounds; i++) {
- u_int idx0, idx1, idx2, idx3, idx4;
- if ((i + 1 < rounds) || (dgst_raw_len % 2 != 0)) {
- idx0 = (((((u_int)(dgst_raw[2 * i])) >> 6) & 3) +
- seed) % 6;
- idx1 = (((u_int)(dgst_raw[2 * i])) >> 2) & 15;
- idx2 = ((((u_int)(dgst_raw[2 * i])) & 3) +
- (seed / 6)) % 6;
- retval[j++] = vowels[idx0];
- retval[j++] = consonants[idx1];
- retval[j++] = vowels[idx2];
- if ((i + 1) < rounds) {
- idx3 = (((u_int)(dgst_raw[(2 * i) + 1])) >> 4) & 15;
- idx4 = (((u_int)(dgst_raw[(2 * i) + 1]))) & 15;
- retval[j++] = consonants[idx3];
- retval[j++] = '-';
- retval[j++] = consonants[idx4];
- seed = ((seed * 5) +
- ((((u_int)(dgst_raw[2 * i])) * 7) +
- ((u_int)(dgst_raw[(2 * i) + 1])))) % 36;
- }
- } else {
- idx0 = seed % 6;
- idx1 = 16;
- idx2 = seed / 6;
- retval[j++] = vowels[idx0];
- retval[j++] = consonants[idx1];
- retval[j++] = vowels[idx2];
- }
- }
- retval[j++] = 'x';
- retval[j++] = '\0';
- return retval;
-}
-
-char *
-key_fingerprint(Key *k, enum fp_type dgst_type, enum fp_rep dgst_rep)
-{
- char *retval = NULL;
- u_char *dgst_raw;
- u_int dgst_raw_len;
-
- dgst_raw = key_fingerprint_raw(k, dgst_type, &dgst_raw_len);
- if (!dgst_raw)
- fatal("key_fingerprint: null from key_fingerprint_raw()");
- switch (dgst_rep) {
- case SSH_FP_HEX:
- retval = key_fingerprint_hex(dgst_raw, dgst_raw_len);
- break;
- case SSH_FP_BUBBLEBABBLE:
- retval = key_fingerprint_bubblebabble(dgst_raw, dgst_raw_len);
- break;
- default:
- fatal("key_fingerprint_ex: bad digest representation %d",
- dgst_rep);
- break;
- }
- memset(dgst_raw, 0, dgst_raw_len);
- xfree(dgst_raw);
- return retval;
-}
-
-/*
- * Reads a multiple-precision integer in decimal from the buffer, and advances
- * the pointer. The integer must already be initialized. This function is
- * permitted to modify the buffer. This leaves *cpp to point just beyond the
- * last processed (and maybe modified) character. Note that this may modify
- * the buffer containing the number.
- */
-static int
-read_bignum(char **cpp, BIGNUM * value)
-{
- char *cp = *cpp;
- int old;
-
- /* Skip any leading whitespace. */
- for (; *cp == ' ' || *cp == '\t'; cp++)
- ;
-
- /* Check that it begins with a decimal digit. */
- if (*cp < '0' || *cp > '9')
- return 0;
-
- /* Save starting position. */
- *cpp = cp;
-
- /* Move forward until all decimal digits skipped. */
- for (; *cp >= '0' && *cp <= '9'; cp++)
- ;
-
- /* Save the old terminating character, and replace it by \0. */
- old = *cp;
- *cp = 0;
-
- /* Parse the number. */
- if (BN_dec2bn(&value, *cpp) == 0)
- return 0;
-
- /* Restore old terminating character. */
- *cp = old;
-
- /* Move beyond the number and return success. */
- *cpp = cp;
- return 1;
-}
-
-static int
-write_bignum(FILE *f, BIGNUM *num)
-{
- char *buf = BN_bn2dec(num);
- if (buf == NULL) {
- error("write_bignum: BN_bn2dec() failed");
- return 0;
- }
- fprintf(f, " %s", buf);
- OPENSSL_free(buf);
- return 1;
-}
-
-/* returns 1 ok, -1 error */
-int
-key_read(Key *ret, char **cpp)
-{
- Key *k;
- int success = -1;
- char *cp, *space;
- int len, n, type;
- u_int bits;
- u_char *blob;
-
- cp = *cpp;
-
- switch (ret->type) {
- case KEY_RSA1:
- /* Get number of bits. */
- if (*cp < '0' || *cp > '9')
- return -1; /* Bad bit count... */
- for (bits = 0; *cp >= '0' && *cp <= '9'; cp++)
- bits = 10 * bits + *cp - '0';
- if (bits == 0)
- return -1;
- *cpp = cp;
- /* Get public exponent, public modulus. */
- if (!read_bignum(cpp, ret->rsa->e))
- return -1;
- if (!read_bignum(cpp, ret->rsa->n))
- return -1;
- success = 1;
- break;
- case KEY_UNSPEC:
- case KEY_RSA:
- case KEY_DSA:
- space = strchr(cp, ' ');
- if (space == NULL) {
- debug3("key_read: no space");
- return -1;
- }
- *space = '\0';
- type = key_type_from_name(cp);
- *space = ' ';
- if (type == KEY_UNSPEC) {
- debug3("key_read: no key found");
- return -1;
- }
- cp = space+1;
- if (*cp == '\0') {
- debug3("key_read: short string");
- return -1;
- }
- if (ret->type == KEY_UNSPEC) {
- ret->type = type;
- } else if (ret->type != type) {
- /* is a key, but different type */
- debug3("key_read: type mismatch");
- return -1;
- }
- len = 2*strlen(cp);
- blob = xmalloc(len);
- n = uudecode(cp, blob, len);
- if (n < 0) {
- error("key_read: uudecode %s failed", cp);
- xfree(blob);
- return -1;
- }
- k = key_from_blob(blob, n);
- xfree(blob);
- if (k == NULL) {
- error("key_read: key_from_blob %s failed", cp);
- return -1;
- }
- if (k->type != type) {
- error("key_read: type mismatch: encoding error");
- key_free(k);
- return -1;
- }
-/*XXXX*/
- if (ret->type == KEY_RSA) {
- if (ret->rsa != NULL)
- RSA_free(ret->rsa);
- ret->rsa = k->rsa;
- k->rsa = NULL;
- success = 1;
-#ifdef DEBUG_PK
- RSA_print_fp(stderr, ret->rsa, 8);
-#endif
- } else {
- if (ret->dsa != NULL)
- DSA_free(ret->dsa);
- ret->dsa = k->dsa;
- k->dsa = NULL;
- success = 1;
-#ifdef DEBUG_PK
- DSA_print_fp(stderr, ret->dsa, 8);
-#endif
- }
-/*XXXX*/
- key_free(k);
- if (success != 1)
- break;
- /* advance cp: skip whitespace and data */
- while (*cp == ' ' || *cp == '\t')
- cp++;
- while (*cp != '\0' && *cp != ' ' && *cp != '\t')
- cp++;
- *cpp = cp;
- break;
- default:
- fatal("key_read: bad key type: %d", ret->type);
- break;
- }
- return success;
-}
-
-int
-key_write(const Key *key, FILE *f)
-{
- int n, success = 0;
- u_int len, bits = 0;
- u_char *blob;
- char *uu;
-
- if (key->type == KEY_RSA1 && key->rsa != NULL) {
- /* size of modulus 'n' */
- bits = BN_num_bits(key->rsa->n);
- fprintf(f, "%u", bits);
- if (write_bignum(f, key->rsa->e) &&
- write_bignum(f, key->rsa->n)) {
- success = 1;
- } else {
- error("key_write: failed for RSA key");
- }
- } else if ((key->type == KEY_DSA && key->dsa != NULL) ||
- (key->type == KEY_RSA && key->rsa != NULL)) {
- key_to_blob(key, &blob, &len);
- uu = xmalloc(2*len);
- n = uuencode(blob, len, uu, 2*len);
- if (n > 0) {
- fprintf(f, "%s %s", key_ssh_name(key), uu);
- success = 1;
- }
- xfree(blob);
- xfree(uu);
- }
- return success;
-}
-
-char *
-key_type(Key *k)
-{
- switch (k->type) {
- case KEY_RSA1:
- return "RSA1";
- break;
- case KEY_RSA:
- return "RSA";
- break;
- case KEY_DSA:
- return "DSA";
- break;
- }
- return "unknown";
-}
-
-char *
-key_ssh_name(const Key *k)
-{
- switch (k->type) {
- case KEY_RSA:
- return "ssh-rsa";
- break;
- case KEY_DSA:
- return "ssh-dss";
- break;
- }
- return "ssh-unknown";
-}
-
-u_int
-key_size(Key *k)
-{
- switch (k->type) {
- case KEY_RSA1:
- case KEY_RSA:
- return BN_num_bits(k->rsa->n);
- break;
- case KEY_DSA:
- return BN_num_bits(k->dsa->p);
- break;
- }
- return 0;
-}
-
-static RSA *
-rsa_generate_private_key(u_int bits)
-{
- RSA *private;
- private = RSA_generate_key(bits, 35, NULL, NULL);
- if (private == NULL)
- fatal("rsa_generate_private_key: key generation failed.");
- return private;
-}
-
-static DSA*
-dsa_generate_private_key(u_int bits)
-{
- DSA *private = DSA_generate_parameters(bits, NULL, 0, NULL, NULL, NULL, NULL);
- if (private == NULL)
- fatal("dsa_generate_private_key: DSA_generate_parameters failed");
- if (!DSA_generate_key(private))
- fatal("dsa_generate_private_key: DSA_generate_key failed.");
- if (private == NULL)
- fatal("dsa_generate_private_key: NULL.");
- return private;
-}
-
-Key *
-key_generate(int type, u_int bits)
-{
- Key *k = key_new(KEY_UNSPEC);
- switch (type) {
- case KEY_DSA:
- k->dsa = dsa_generate_private_key(bits);
- break;
- case KEY_RSA:
- case KEY_RSA1:
- k->rsa = rsa_generate_private_key(bits);
- break;
- default:
- fatal("key_generate: unknown type %d", type);
- }
- k->type = type;
- return k;
-}
-
-Key *
-key_from_private(Key *k)
-{
- Key *n = NULL;
- switch (k->type) {
- case KEY_DSA:
- n = key_new(k->type);
- BN_copy(n->dsa->p, k->dsa->p);
- BN_copy(n->dsa->q, k->dsa->q);
- BN_copy(n->dsa->g, k->dsa->g);
- BN_copy(n->dsa->pub_key, k->dsa->pub_key);
- break;
- case KEY_RSA:
- case KEY_RSA1:
- n = key_new(k->type);
- BN_copy(n->rsa->n, k->rsa->n);
- BN_copy(n->rsa->e, k->rsa->e);
- break;
- default:
- fatal("key_from_private: unknown type %d", k->type);
- break;
- }
- return n;
-}
-
-int
-key_type_from_name(char *name)
-{
- if (strcmp(name, "rsa1") == 0) {
- return KEY_RSA1;
- } else if (strcmp(name, "rsa") == 0) {
- return KEY_RSA;
- } else if (strcmp(name, "dsa") == 0) {
- return KEY_DSA;
- } else if (strcmp(name, "ssh-rsa") == 0) {
- return KEY_RSA;
- } else if (strcmp(name, "ssh-dss") == 0) {
- return KEY_DSA;
- } else if (strcmp(name, "null") == 0){
- return KEY_NULL;
- }
- debug2("key_type_from_name: unknown key type '%s'", name);
- return KEY_UNSPEC;
-}
-
-int
-key_names_valid2(const char *names)
-{
- char *s, *cp, *p;
-
- if (names == NULL || strcmp(names, "") == 0)
- return 0;
- s = cp = xstrdup(names);
- for ((p = strsep(&cp, ",")); p && *p != '\0';
- (p = strsep(&cp, ","))) {
- switch (key_type_from_name(p)) {
- case KEY_RSA1:
- case KEY_UNSPEC:
- xfree(s);
- return 0;
- }
- }
- debug3("key names ok: [%s]", names);
- xfree(s);
- return 1;
-}
-
-Key *
-key_from_blob(u_char *blob, int blen)
-{
- Buffer b;
- char *ktype;
- int rlen, type;
- Key *key = NULL;
-
-#ifdef DEBUG_PK
- dump_base64(stderr, blob, blen);
-#endif
- buffer_init(&b);
- buffer_append(&b, blob, blen);
- if ((ktype = buffer_get_string_ret(&b, NULL)) == NULL) {
- error("key_from_blob: can't read key type");
- goto out;
- }
-
- type = key_type_from_name(ktype);
-
- switch (type) {
- case KEY_RSA:
- key = key_new(type);
- if (buffer_get_bignum2_ret(&b, key->rsa->e) == -1 ||
- buffer_get_bignum2_ret(&b, key->rsa->n) == -1) {
- error("key_from_blob: can't read rsa key");
- key_free(key);
- key = NULL;
- goto out;
- }
-#ifdef DEBUG_PK
- RSA_print_fp(stderr, key->rsa, 8);
-#endif
- break;
- case KEY_DSA:
- key = key_new(type);
- if (buffer_get_bignum2_ret(&b, key->dsa->p) == -1 ||
- buffer_get_bignum2_ret(&b, key->dsa->q) == -1 ||
- buffer_get_bignum2_ret(&b, key->dsa->g) == -1 ||
- buffer_get_bignum2_ret(&b, key->dsa->pub_key) == -1) {
- error("key_from_blob: can't read dsa key");
- key_free(key);
- key = NULL;
- goto out;
- }
-#ifdef DEBUG_PK
- DSA_print_fp(stderr, key->dsa, 8);
-#endif
- break;
- case KEY_UNSPEC:
- key = key_new(type);
- break;
- default:
- error("key_from_blob: cannot handle type %s", ktype);
- goto out;
- }
- rlen = buffer_len(&b);
- if (key != NULL && rlen != 0)
- error("key_from_blob: remaining bytes in key blob %d", rlen);
- out:
- if (ktype != NULL)
- xfree(ktype);
- buffer_free(&b);
- return key;
-}
-
-int
-key_to_blob(const Key *key, u_char **blobp, u_int *lenp)
-{
- Buffer b;
- int len;
-
- if (key == NULL) {
- error("key_to_blob: key == NULL");
- return 0;
- }
- buffer_init(&b);
- switch (key->type) {
- case KEY_DSA:
- buffer_put_cstring(&b, key_ssh_name(key));
- buffer_put_bignum2(&b, key->dsa->p);
- buffer_put_bignum2(&b, key->dsa->q);
- buffer_put_bignum2(&b, key->dsa->g);
- buffer_put_bignum2(&b, key->dsa->pub_key);
- break;
- case KEY_RSA:
- buffer_put_cstring(&b, key_ssh_name(key));
- buffer_put_bignum2(&b, key->rsa->e);
- buffer_put_bignum2(&b, key->rsa->n);
- break;
- default:
- error("key_to_blob: unsupported key type %d", key->type);
- buffer_free(&b);
- return 0;
- }
- len = buffer_len(&b);
- if (lenp != NULL)
- *lenp = len;
- if (blobp != NULL) {
- *blobp = xmalloc(len);
- memcpy(*blobp, buffer_ptr(&b), len);
- }
- memset(buffer_ptr(&b), 0, len);
- buffer_free(&b);
- return len;
-}
-
-int
-key_sign(
- Key *key,
- u_char **sigp, u_int *lenp,
- u_char *data, u_int datalen)
-{
- switch (key->type) {
- case KEY_DSA:
- return ssh_dss_sign(key, sigp, lenp, data, datalen);
- break;
- case KEY_RSA:
- return ssh_rsa_sign(key, sigp, lenp, data, datalen);
- break;
- default:
- error("key_sign: illegal key type %d", key->type);
- return -1;
- break;
- }
-}
-
-/*
- * key_verify returns 1 for a correct signature, 0 for an incorrect signature
- * and -1 on error.
- */
-int
-key_verify(
- Key *key,
- u_char *signature, u_int signaturelen,
- u_char *data, u_int datalen)
-{
- if (signaturelen == 0)
- return -1;
-
- switch (key->type) {
- case KEY_DSA:
- return ssh_dss_verify(key, signature, signaturelen, data, datalen);
- break;
- case KEY_RSA:
- return ssh_rsa_verify(key, signature, signaturelen, data, datalen);
- break;
- default:
- error("key_verify: illegal key type %d", key->type);
- return -1;
- break;
- }
-}
-
-/* Converts a private to a public key */
-Key *
-key_demote(Key *k)
-{
- Key *pk;
-
- pk = xmalloc(sizeof(*pk));
- pk->type = k->type;
- pk->flags = k->flags;
- pk->dsa = NULL;
- pk->rsa = NULL;
-
- switch (k->type) {
- case KEY_RSA1:
- case KEY_RSA:
- if ((pk->rsa = RSA_new()) == NULL)
- fatal("key_demote: RSA_new failed");
- if ((pk->rsa->e = BN_dup(k->rsa->e)) == NULL)
- fatal("key_demote: BN_dup failed");
- if ((pk->rsa->n = BN_dup(k->rsa->n)) == NULL)
- fatal("key_demote: BN_dup failed");
- break;
- case KEY_DSA:
- if ((pk->dsa = DSA_new()) == NULL)
- fatal("key_demote: DSA_new failed");
- if ((pk->dsa->p = BN_dup(k->dsa->p)) == NULL)
- fatal("key_demote: BN_dup failed");
- if ((pk->dsa->q = BN_dup(k->dsa->q)) == NULL)
- fatal("key_demote: BN_dup failed");
- if ((pk->dsa->g = BN_dup(k->dsa->g)) == NULL)
- fatal("key_demote: BN_dup failed");
- if ((pk->dsa->pub_key = BN_dup(k->dsa->pub_key)) == NULL)
- fatal("key_demote: BN_dup failed");
- break;
- default:
- fatal("key_free: bad key type %d", k->type);
- break;
- }
-
- return (pk);
-}
diff --git a/usr/src/cmd/ssh/libssh/common/llib-lssh b/usr/src/cmd/ssh/libssh/common/llib-lssh
deleted file mode 100644
index c44090abe6..0000000000
--- a/usr/src/cmd/ssh/libssh/common/llib-lssh
+++ /dev/null
@@ -1,130 +0,0 @@
-/* LINTLIBRARY */
-/* PROTOLIB1 */
-
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- *
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include <includes.h>
-#include <ssh.h>
-#include <atomicio.h>
-#include <auth.h>
-#include <auth-pam.h>
-#include <auth2-pam.h>
-#include <authfd.h>
-#include <authfile.h>
-#include <auth-options.h>
-#include <base64.h>
-#include <bindresvport.h>
-#include <bsd-arc4random.h>
-#include <bsd-cray.h>
-#include <bsd-cygwin_util.h>
-#include <bsd-getpeereid.h>
-#include <bsd-misc.h>
-#include <bsd-snprintf.h>
-#include <bsd-waitpid.h>
-#include <bufaux.h>
-#include <buffer.h>
-#include <canohost.h>
-#include <channels.h>
-#include <cipher.h>
-#include <clientloop.h>
-#include <compat.h>
-#include <compress.h>
-#include <config.h>
-#include <crc32.h>
-#include <deattack.h>
-#include <defines.h>
-#include <dh.h>
-#include <dirname.h>
-#include <dispatch.h>
-#include <entropy.h>
-#include <fake-gai-errnos.h>
-#include <fake-getaddrinfo.h>
-#include <fake-getnameinfo.h>
-#include <fake-socket.h>
-#include <g11n.h>
-#include <getcwd.h>
-#include <getgrouplist.h>
-#include <getopt.h>
-#include <getput.h>
-#include <glob.h>
-#include <groupaccess.h>
-#include <hostfile.h>
-#include <inet_ntoa.h>
-#include <inet_ntop.h>
-#include <kex.h>
-#include <key.h>
-#include <log.h>
-#include <loginrec.h>
-#include <mac.h>
-#include <match.h>
-#include <misc.h>
-#include <mktemp.h>
-#include <mpaux.h>
-#include <msg.h>
-#include <myproposal.h>
-#include <openbsd-compat.h>
-#include <packet.h>
-#include <pathnames.h>
-#include <port-aix.h>
-#include <port-irix.h>
-#include <proxy-io.h>
-#include <readconf.h>
-#include <readpass.h>
-#include <readpassphrase.h>
-#include <realpath.h>
-#include <rresvport.h>
-#include <rsa.h>
-#include <servconf.h>
-#include <serverloop.h>
-#include <session.h>
-#include <setproctitle.h>
-#include <sftp-common.h>
-#include <sftp.h>
-#include <sftp-client.h>
-#include <sigact.h>
-#include <ssh1.h>
-#include <ssh2.h>
-#include <sshconnect.h>
-#include <ssh-dss.h>
-#include <sshlogin.h>
-#include <sshpty.h>
-#include <ssh-rsa.h>
-#include <sshtty.h>
-#include <strlcat.h>
-#include <strlcpy.h>
-#include <strmode.h>
-#include <sys-queue.h>
-#include <sys-tree.h>
-#include <tildexpand.h>
-#include <uidswap.h>
-#include <uuencode.h>
-#include <version.h>
-#include <xlist.h>
-#include <xmalloc.h>
-#include <xmmap.h>
-
-extern uid_t original_real_uid;
-extern char *__progname;
-
diff --git a/usr/src/cmd/ssh/libssh/common/log.c b/usr/src/cmd/ssh/libssh/common/log.c
deleted file mode 100644
index 79e4cace6e..0000000000
--- a/usr/src/cmd/ssh/libssh/common/log.c
+++ /dev/null
@@ -1,445 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-/*
- * Copyright (c) 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: log.c,v 1.24 2002/07/19 15:43:33 markus Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "log.h"
-#include "xmalloc.h"
-
-#include <atomic.h>
-#include <syslog.h>
-
-static LogLevel log_level = SYSLOG_LEVEL_INFO;
-static int log_on_stderr = 1;
-static int log_facility = LOG_AUTH;
-static char *argv0;
-
-extern char *__progname;
-
-static const char *log_txt_prefix = "";
-
-/* textual representation of log-facilities/levels */
-
-static struct {
- const char *name;
- SyslogFacility val;
-} log_facilities[] = {
- { "DAEMON", SYSLOG_FACILITY_DAEMON },
- { "USER", SYSLOG_FACILITY_USER },
- { "AUTH", SYSLOG_FACILITY_AUTH },
-#ifdef LOG_AUTHPRIV
- { "AUTHPRIV", SYSLOG_FACILITY_AUTHPRIV },
-#endif
- { "LOCAL0", SYSLOG_FACILITY_LOCAL0 },
- { "LOCAL1", SYSLOG_FACILITY_LOCAL1 },
- { "LOCAL2", SYSLOG_FACILITY_LOCAL2 },
- { "LOCAL3", SYSLOG_FACILITY_LOCAL3 },
- { "LOCAL4", SYSLOG_FACILITY_LOCAL4 },
- { "LOCAL5", SYSLOG_FACILITY_LOCAL5 },
- { "LOCAL6", SYSLOG_FACILITY_LOCAL6 },
- { "LOCAL7", SYSLOG_FACILITY_LOCAL7 },
- { NULL, SYSLOG_FACILITY_NOT_SET }
-};
-
-static struct {
- const char *name;
- LogLevel val;
-} log_levels[] =
-{
- { "QUIET", SYSLOG_LEVEL_QUIET },
- { "FATAL", SYSLOG_LEVEL_FATAL },
- { "ERROR", SYSLOG_LEVEL_ERROR },
- { "NOTICE", SYSLOG_LEVEL_NOTICE },
- { "INFO", SYSLOG_LEVEL_INFO },
- { "VERBOSE", SYSLOG_LEVEL_VERBOSE },
- { "DEBUG", SYSLOG_LEVEL_DEBUG1 },
- { "DEBUG1", SYSLOG_LEVEL_DEBUG1 },
- { "DEBUG2", SYSLOG_LEVEL_DEBUG2 },
- { "DEBUG3", SYSLOG_LEVEL_DEBUG3 },
- { NULL, SYSLOG_LEVEL_NOT_SET }
-};
-
-SyslogFacility
-log_facility_number(char *name)
-{
- int i;
-
- if (name != NULL)
- for (i = 0; log_facilities[i].name; i++)
- if (strcasecmp(log_facilities[i].name, name) == 0)
- return log_facilities[i].val;
- return SYSLOG_FACILITY_NOT_SET;
-}
-
-LogLevel
-log_level_number(char *name)
-{
- int i;
-
- if (name != NULL)
- for (i = 0; log_levels[i].name; i++)
- if (strcasecmp(log_levels[i].name, name) == 0)
- return log_levels[i].val;
- return SYSLOG_LEVEL_NOT_SET;
-}
-
-void
-set_log_txt_prefix(const char *txt)
-{
- log_txt_prefix = txt;
-}
-
-/* Error messages that should be logged. */
-
-void
-error(const char *fmt,...)
-{
- va_list args;
-
- va_start(args, fmt);
- do_log(SYSLOG_LEVEL_ERROR, fmt, args);
- va_end(args);
-}
-
-void
-notice(const char *fmt,...)
-{
- va_list args;
-
- va_start(args, fmt);
- do_log(SYSLOG_LEVEL_NOTICE, fmt, args);
- va_end(args);
-}
-
-/* Log this message (information that usually should go to the log). */
-
-void
-log(const char *fmt,...)
-{
- va_list args;
-
- va_start(args, fmt);
- do_log(SYSLOG_LEVEL_INFO, fmt, args);
- va_end(args);
-}
-
-/* More detailed messages (information that does not need to go to the log). */
-
-void
-verbose(const char *fmt,...)
-{
- va_list args;
-
- va_start(args, fmt);
- do_log(SYSLOG_LEVEL_VERBOSE, fmt, args);
- va_end(args);
-}
-
-/* Debugging messages that should not be logged during normal operation. */
-
-void
-debug(const char *fmt,...)
-{
- va_list args;
-
- va_start(args, fmt);
- do_log(SYSLOG_LEVEL_DEBUG1, fmt, args);
- va_end(args);
-}
-
-void
-debug2(const char *fmt,...)
-{
- va_list args;
-
- va_start(args, fmt);
- do_log(SYSLOG_LEVEL_DEBUG2, fmt, args);
- va_end(args);
-}
-
-void
-debug3(const char *fmt,...)
-{
- va_list args;
-
- va_start(args, fmt);
- do_log(SYSLOG_LEVEL_DEBUG3, fmt, args);
- va_end(args);
-}
-
-/* Fatal cleanup */
-
-struct fatal_cleanup {
- struct fatal_cleanup *next;
- void (*proc) (void *);
- void *context;
-};
-
-static struct fatal_cleanup *fatal_cleanups = NULL;
-
-/* Registers a cleanup function to be called by fatal() before exiting. */
-
-void
-fatal_add_cleanup(void (*proc) (void *), void *context)
-{
- struct fatal_cleanup *cu;
-
- cu = xmalloc(sizeof(*cu));
- cu->proc = proc;
- cu->context = context;
- cu->next = fatal_cleanups;
- fatal_cleanups = cu;
-}
-
-/* Removes a cleanup function to be called at fatal(). */
-
-void
-fatal_remove_cleanup(void (*proc) (void *context), void *context)
-{
- struct fatal_cleanup **cup, *cu;
-
- for (cup = &fatal_cleanups; *cup; cup = &cu->next) {
- cu = *cup;
- if (cu->proc == proc && cu->context == context) {
- *cup = cu->next;
- xfree(cu);
- return;
- }
- }
- debug3("fatal_remove_cleanup: no such cleanup function: 0x%lx 0x%lx",
- (u_long) proc, (u_long) context);
-}
-
-/* Remove all cleanups, to be called after fork() */
-void
-fatal_remove_all_cleanups(void)
-{
- struct fatal_cleanup *cu, *next_cu;
-
- for (cu = fatal_cleanups; cu; cu = next_cu) {
- next_cu = cu->next;
- xfree(cu);
- }
-
- fatal_cleanups = NULL;
-}
-
-/* Cleanup and exit. Make sure each cleanup is called only once. */
-void
-fatal_cleanup(void)
-{
- struct fatal_cleanup *cu, *next_cu;
- static volatile u_int called = 0;
-
- if (atomic_cas_uint(&called, 0, 1) == 1)
- exit(255);
- /* Call cleanup functions. */
- for (cu = fatal_cleanups; cu; cu = next_cu) {
- next_cu = cu->next;
- debug("Calling cleanup 0x%lx(0x%lx)",
- (u_long) cu->proc, (u_long) cu->context);
- (*cu->proc) (cu->context);
- }
- exit(255);
-}
-
-
-/*
- * Initialize the log.
- */
-
-void
-log_init(char *av0, LogLevel level, SyslogFacility facility, int on_stderr)
-{
- argv0 = av0;
-
- switch (level) {
- case SYSLOG_LEVEL_QUIET:
- case SYSLOG_LEVEL_FATAL:
- case SYSLOG_LEVEL_ERROR:
- case SYSLOG_LEVEL_NOTICE:
- case SYSLOG_LEVEL_INFO:
- case SYSLOG_LEVEL_VERBOSE:
- case SYSLOG_LEVEL_DEBUG1:
- case SYSLOG_LEVEL_DEBUG2:
- case SYSLOG_LEVEL_DEBUG3:
- log_level = level;
- break;
- default:
- fprintf(stderr, "Unrecognized internal syslog level code %d\n",
- (int) level);
- exit(1);
- }
-
- log_on_stderr = on_stderr;
- if (on_stderr)
- return;
-
- switch (facility) {
- case SYSLOG_FACILITY_DAEMON:
- log_facility = LOG_DAEMON;
- break;
- case SYSLOG_FACILITY_USER:
- log_facility = LOG_USER;
- break;
- case SYSLOG_FACILITY_AUTH:
- log_facility = LOG_AUTH;
- break;
-#ifdef LOG_AUTHPRIV
- case SYSLOG_FACILITY_AUTHPRIV:
- log_facility = LOG_AUTHPRIV;
- break;
-#endif
- case SYSLOG_FACILITY_LOCAL0:
- log_facility = LOG_LOCAL0;
- break;
- case SYSLOG_FACILITY_LOCAL1:
- log_facility = LOG_LOCAL1;
- break;
- case SYSLOG_FACILITY_LOCAL2:
- log_facility = LOG_LOCAL2;
- break;
- case SYSLOG_FACILITY_LOCAL3:
- log_facility = LOG_LOCAL3;
- break;
- case SYSLOG_FACILITY_LOCAL4:
- log_facility = LOG_LOCAL4;
- break;
- case SYSLOG_FACILITY_LOCAL5:
- log_facility = LOG_LOCAL5;
- break;
- case SYSLOG_FACILITY_LOCAL6:
- log_facility = LOG_LOCAL6;
- break;
- case SYSLOG_FACILITY_LOCAL7:
- log_facility = LOG_LOCAL7;
- break;
- default:
- fprintf(stderr,
- "Unrecognized internal syslog facility code %d\n",
- (int) facility);
- exit(1);
- }
-}
-
-#define MSGBUFSIZ 1024
-
-/* PRINTFLIKE2 */
-void
-do_log(LogLevel level, const char *fmt, va_list args)
-{
- char msgbuf[MSGBUFSIZ];
- char fmtbuf[MSGBUFSIZ];
- char *txt = NULL;
- int pri = LOG_INFO;
- int do_gettext = log_on_stderr; /*
- * Localize user messages - not
- * syslog()ed messages.
- */
-
- if (level > log_level)
- return;
-
- switch (level) {
- case SYSLOG_LEVEL_FATAL:
- if (!log_on_stderr)
- txt = "fatal";
- pri = LOG_CRIT;
- break;
- case SYSLOG_LEVEL_ERROR:
- if (!log_on_stderr)
- txt = "error";
- pri = LOG_ERR;
- break;
- case SYSLOG_LEVEL_NOTICE:
- pri = LOG_NOTICE;
- break;
- case SYSLOG_LEVEL_INFO:
- pri = LOG_INFO;
- break;
- case SYSLOG_LEVEL_VERBOSE:
- pri = LOG_INFO;
- break;
- case SYSLOG_LEVEL_DEBUG1:
- txt = "debug1";
- pri = LOG_DEBUG;
- /*
- * Don't localize debug messages - such are not intended
- * for users but for support staff whose preferred
- * language is unknown, therefore we default to the
- * language used in the source code: English.
- */
- do_gettext = 0;
- break;
- case SYSLOG_LEVEL_DEBUG2:
- txt = "debug2";
- pri = LOG_DEBUG;
- do_gettext = 0; /* Don't localize debug messages. */
- break;
- case SYSLOG_LEVEL_DEBUG3:
- txt = "debug3";
- pri = LOG_DEBUG;
- do_gettext = 0; /* Don't localize debug messages. */
- break;
- default:
- txt = "internal error";
- pri = LOG_ERR;
- break;
- }
- if (txt != NULL) {
- snprintf(fmtbuf, sizeof(fmtbuf), "%s%s: %s", log_txt_prefix,
- do_gettext ? gettext(txt) : txt,
- do_gettext ? gettext(fmt) : fmt);
- vsnprintf(msgbuf, sizeof(msgbuf), fmtbuf, args);
- } else {
- vsnprintf(msgbuf, sizeof(msgbuf),
- do_gettext ? gettext(fmt) : fmt,
- args);
- }
- if (log_on_stderr) {
- fprintf(stderr, "%s\r\n", msgbuf);
- } else {
- openlog(argv0 ? argv0 : __progname, LOG_PID, log_facility);
- syslog(pri, "%.500s", msgbuf);
- closelog();
- }
-}
diff --git a/usr/src/cmd/ssh/libssh/common/mac.c b/usr/src/cmd/ssh/libssh/common/mac.c
deleted file mode 100644
index e25f5eb95d..0000000000
--- a/usr/src/cmd/ssh/libssh/common/mac.c
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * Copyright (c) 2001 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: mac.c,v 1.5 2002/05/16 22:02:50 markus Exp $");
-
-#include <openssl/hmac.h>
-
-#include "xmalloc.h"
-#include "getput.h"
-#include "log.h"
-#include "cipher.h"
-#include "kex.h"
-#include "mac.h"
-#include "misc.h"
-
-#define SSH_EVP 1 /* OpenSSL EVP-based MAC */
-
-struct {
- char *name;
- int type;
- const EVP_MD * (*mdfunc)(void);
- int truncatebits; /* truncate digest if != 0 */
- int key_len; /* will be used if we have UMAC */
-} macs[] = {
- { "hmac-sha1", SSH_EVP, EVP_sha1, 0, -1 },
- { "hmac-sha1-96", SSH_EVP, EVP_sha1, 96, -1 },
- { "hmac-md5", SSH_EVP, EVP_md5, 0, -1 },
- { "hmac-md5-96", SSH_EVP, EVP_md5, 96, -1 },
-#ifdef SOLARIS_SSH_ENABLE_RIPEMD160
- { "hmac-ripemd160", SSH_EVP, EVP_ripemd160, 0, -1 },
- { "hmac-ripemd160@openssh.com", SSH_EVP, EVP_ripemd160, 0, -1 },
-#endif /* SOLARIS_SSH_ENABLE_RIPEMD160 */
- { NULL, 0, NULL, 0, -1 }
-};
-
-static void
-mac_setup_by_id(Mac *mac, int which)
-{
- int evp_len;
- mac->type = macs[which].type;
- if (mac->type == SSH_EVP) {
- mac->evp_md = (*macs[which].mdfunc)();
- if ((evp_len = EVP_MD_size(mac->evp_md)) <= 0)
- fatal("mac %s len %d", mac->name, evp_len);
- mac->key_len = mac->mac_len = (u_int)evp_len;
- } else
- fatal("wrong MAC type (%d)", mac->type);
- if (macs[which].truncatebits != 0)
- mac->mac_len = macs[which].truncatebits / 8;
-}
-
-int
-mac_setup(Mac *mac, char *name)
-{
- int i;
-
- for (i = 0; macs[i].name; i++) {
- if (strcmp(name, macs[i].name) == 0) {
- if (mac != NULL)
- mac_setup_by_id(mac, i);
- debug2("mac_setup: found %s", name);
- return (0);
- }
- }
- debug2("mac_setup: unknown %s", name);
- return (-1);
-}
-
-int
-mac_init(Mac *mac)
-{
- if (mac->key == NULL)
- fatal("mac_init: no key");
- switch (mac->type) {
- case SSH_EVP:
- if (mac->evp_md == NULL)
- return -1;
- HMAC_Init(&mac->evp_ctx, mac->key, mac->key_len, mac->evp_md);
- return 0;
- default:
- return -1;
- }
-}
-
-u_char *
-mac_compute(Mac *mac, u_int32_t seqno, u_char *data, int datalen)
-{
- static u_char m[EVP_MAX_MD_SIZE];
- u_char b[4];
-
- if (mac->mac_len > sizeof(m))
- fatal("mac_compute: mac too long %u %lu",
- mac->mac_len, (u_long)sizeof(m));
-
- switch (mac->type) {
- case SSH_EVP:
- put_u32(b, seqno);
- /* reset HMAC context */
- HMAC_Init(&mac->evp_ctx, NULL, 0, NULL);
- HMAC_Update(&mac->evp_ctx, b, sizeof(b));
- HMAC_Update(&mac->evp_ctx, data, datalen);
- HMAC_Final(&mac->evp_ctx, m, NULL);
- break;
- default:
- fatal("mac_compute: unknown MAC type");
- }
-
- return (m);
-}
-
-void
-mac_clear(Mac *mac)
-{
- if (mac->evp_md != NULL)
- HMAC_cleanup(&mac->evp_ctx);
- mac->evp_md = NULL;
-}
-
-/* XXX copied from ciphers_valid */
-#define MAC_SEP ","
-int
-mac_valid(const char *names)
-{
- char *maclist, *cp, *p;
-
- if (names == NULL || strcmp(names, "") == 0)
- return (0);
- maclist = cp = xstrdup(names);
- for ((p = strsep(&cp, MAC_SEP)); p && *p != '\0';
- (p = strsep(&cp, MAC_SEP))) {
- if (mac_setup(NULL, p) < 0) {
- debug("bad mac %s [%s]", p, names);
- xfree(maclist);
- return (0);
- } else {
- debug3("mac ok: %s [%s]", p, names);
- }
- }
- debug3("macs ok: [%s]", names);
- xfree(maclist);
- return (1);
-}
diff --git a/usr/src/cmd/ssh/libssh/common/match.c b/usr/src/cmd/ssh/libssh/common/match.c
deleted file mode 100644
index 4724d435e7..0000000000
--- a/usr/src/cmd/ssh/libssh/common/match.c
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Simple pattern matching, with '*' and '?' as wildcards.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-/*
- * Copyright (c) 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: match.c,v 1.19 2002/03/01 13:12:10 markus Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "match.h"
-#include "xmalloc.h"
-
-/*
- * Returns true if the given string matches the pattern (which may contain ?
- * and * as wildcards), and zero if it does not match.
- */
-
-int
-match_pattern(const char *s, const char *pattern)
-{
- for (;;) {
- /* If at end of pattern, accept if also at end of string. */
- if (!*pattern)
- return !*s;
-
- if (*pattern == '*') {
- /* Skip the asterisk. */
- pattern++;
-
- /* If at end of pattern, accept immediately. */
- if (!*pattern)
- return 1;
-
- /* If next character in pattern is known, optimize. */
- if (*pattern != '?' && *pattern != '*') {
- /*
- * Look instances of the next character in
- * pattern, and try to match starting from
- * those.
- */
- for (; *s; s++)
- if (*s == *pattern &&
- match_pattern(s + 1, pattern + 1))
- return 1;
- /* Failed. */
- return 0;
- }
- /*
- * Move ahead one character at a time and try to
- * match at each position.
- */
- for (; *s; s++)
- if (match_pattern(s, pattern))
- return 1;
- /* Failed. */
- return 0;
- }
- /*
- * There must be at least one more character in the string.
- * If we are at the end, fail.
- */
- if (!*s)
- return 0;
-
- /* Check if the next character of the string is acceptable. */
- if (*pattern != '?' && *pattern != *s)
- return 0;
-
- /* Move to the next character, both in string and in pattern. */
- s++;
- pattern++;
- }
- /* NOTREACHED */
-}
-
-/*
- * Tries to match the string against the
- * comma-separated sequence of subpatterns (each possibly preceded by ! to
- * indicate negation). Returns -1 if negation matches, 1 if there is
- * a positive match, 0 if there is no match at all.
- */
-
-int
-match_pattern_list(const char *string, const char *pattern, u_int len,
- int dolower)
-{
- char sub[1024];
- int negated;
- int got_positive;
- u_int i, subi;
-
- got_positive = 0;
- for (i = 0; i < len;) {
- /* Check if the subpattern is negated. */
- if (pattern[i] == '!') {
- negated = 1;
- i++;
- } else
- negated = 0;
-
- /*
- * Extract the subpattern up to a comma or end. Convert the
- * subpattern to lowercase.
- */
- for (subi = 0;
- i < len && subi < sizeof(sub) - 1 && pattern[i] != ',';
- subi++, i++)
- sub[subi] = dolower && isupper(pattern[i]) ?
- tolower(pattern[i]) : pattern[i];
- /* If subpattern too long, return failure (no match). */
- if (subi >= sizeof(sub) - 1)
- return 0;
-
- /* If the subpattern was terminated by a comma, skip the comma. */
- if (i < len && pattern[i] == ',')
- i++;
-
- /* Null-terminate the subpattern. */
- sub[subi] = '\0';
-
- /* Try to match the subpattern against the string. */
- if (match_pattern(string, sub)) {
- if (negated)
- return -1; /* Negative */
- else
- got_positive = 1; /* Positive */
- }
- }
-
- /*
- * Return success if got a positive match. If there was a negative
- * match, we have already returned -1 and never get here.
- */
- return got_positive;
-}
-
-/*
- * Tries to match the host name (which must be in all lowercase) against the
- * comma-separated sequence of subpatterns (each possibly preceded by ! to
- * indicate negation). Returns -1 if negation matches, 1 if there is
- * a positive match, 0 if there is no match at all.
- */
-int
-match_hostname(const char *host, const char *pattern, u_int len)
-{
- return match_pattern_list(host, pattern, len, 1);
-}
-
-/*
- * returns 0 if we get a negative match for the hostname or the ip
- * or if we get no match at all. returns 1 otherwise.
- */
-int
-match_host_and_ip(const char *host, const char *ipaddr,
- const char *patterns)
-{
- int mhost, mip;
-
- /* negative ipaddr match */
- if ((mip = match_hostname(ipaddr, patterns, strlen(patterns))) == -1)
- return 0;
- /* negative hostname match */
- if ((mhost = match_hostname(host, patterns, strlen(patterns))) == -1)
- return 0;
- /* no match at all */
- if (mhost == 0 && mip == 0)
- return 0;
- return 1;
-}
-
-/*
- * match user, user@host_or_ip, user@host_or_ip_list against pattern
- */
-int
-match_user(const char *user, const char *host, const char *ipaddr,
- const char *pattern)
-{
- char *p, *pat;
- int ret;
-
- if ((p = strchr(pattern,'@')) == NULL)
- return match_pattern(user, pattern);
-
- pat = xstrdup(pattern);
- p = strchr(pat, '@');
- *p++ = '\0';
-
- if ((ret = match_pattern(user, pat)) == 1)
- ret = match_host_and_ip(host, ipaddr, p);
- xfree(pat);
-
- return ret;
-}
-
-/*
- * Returns first item from client-list that is also supported by server-list,
- * caller must xfree() returned string.
- */
-#define MAX_PROP 40
-#define SEP ","
-char *
-match_list(const char *client, const char *server, u_int *next)
-{
- char *sproposals[MAX_PROP];
- char *c, *s, *p, *ret, *cp, *sp;
- int i, j, nproposals;
-
- c = cp = xstrdup(client);
- s = sp = xstrdup(server);
-
- for ((p = strsep(&sp, SEP)), i=0; p && *p != '\0';
- (p = strsep(&sp, SEP)), i++) {
- if (i < MAX_PROP)
- sproposals[i] = p;
- else
- break;
- }
- nproposals = i;
-
- for ((p = strsep(&cp, SEP)), i=0; p && *p != '\0';
- (p = strsep(&cp, SEP)), i++) {
- for (j = 0; j < nproposals; j++) {
- if (strcmp(p, sproposals[j]) == 0) {
- ret = xstrdup(p);
- if (next != NULL)
- *next = (cp == NULL) ?
- strlen(c) : cp - c;
- xfree(c);
- xfree(s);
- return ret;
- }
- }
- }
- if (next != NULL)
- *next = strlen(c);
- xfree(c);
- xfree(s);
- return NULL;
-}
diff --git a/usr/src/cmd/ssh/libssh/common/misc.c b/usr/src/cmd/ssh/libssh/common/misc.c
deleted file mode 100644
index e73d3f364b..0000000000
--- a/usr/src/cmd/ssh/libssh/common/misc.c
+++ /dev/null
@@ -1,702 +0,0 @@
-/*
- * Copyright (c) 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: misc.c,v 1.19 2002/03/04 17:27:39 stevesk Exp $");
-
-#include "misc.h"
-#include "log.h"
-#include "xmalloc.h"
-
-/* remove newline at end of string */
-char *
-chop(char *s)
-{
- char *t = s;
- while (*t) {
- if (*t == '\n' || *t == '\r') {
- *t = '\0';
- return s;
- }
- t++;
- }
- return s;
-
-}
-
-/* set/unset filedescriptor to non-blocking */
-void
-set_nonblock(int fd)
-{
- int val;
-
- val = fcntl(fd, F_GETFL, 0);
- if (val < 0) {
- error("fcntl(%d, F_GETFL, 0): %s", fd, strerror(errno));
- return;
- }
- if (val & O_NONBLOCK) {
- debug2("fd %d is O_NONBLOCK", fd);
- return;
- }
- debug("fd %d setting O_NONBLOCK", fd);
- val |= O_NONBLOCK;
- if (fcntl(fd, F_SETFL, val) == -1)
- debug("fcntl(%d, F_SETFL, O_NONBLOCK): %s",
- fd, strerror(errno));
-}
-
-void
-unset_nonblock(int fd)
-{
- int val;
-
- val = fcntl(fd, F_GETFL, 0);
- if (val < 0) {
- error("fcntl(%d, F_GETFL, 0): %s", fd, strerror(errno));
- return;
- }
- if (!(val & O_NONBLOCK)) {
- debug2("fd %d is not O_NONBLOCK", fd);
- return;
- }
- debug("fd %d clearing O_NONBLOCK", fd);
- val &= ~O_NONBLOCK;
- if (fcntl(fd, F_SETFL, val) == -1)
- debug("fcntl(%d, F_SETFL, O_NONBLOCK): %s",
- fd, strerror(errno));
-}
-
-/* disable nagle on socket */
-void
-set_nodelay(int fd)
-{
- int opt;
- socklen_t optlen;
-
- optlen = sizeof opt;
- if (getsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &opt, &optlen) == -1) {
- error("getsockopt TCP_NODELAY: %.100s", strerror(errno));
- return;
- }
- if (opt == 1) {
- debug2("fd %d is TCP_NODELAY", fd);
- return;
- }
- opt = 1;
- debug("fd %d setting TCP_NODELAY", fd);
- if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &opt, sizeof opt) == -1)
- error("setsockopt TCP_NODELAY: %.100s", strerror(errno));
-}
-
-/* Characters considered whitespace in strsep calls. */
-#define WHITESPACE " \t\r\n"
-
-/*
- * Function returns a pointer to the 1st token on the line. Such a token can
- * be an empty string in the case of '*s' equal to " value". It changes the
- * first whitespace token or '=' character after the 1st token to '\0'. Upon
- * return it changes '*s' to point to the first character of the next token.
- * That token may be an empty string if the 1st token was followed only by
- * whitespace or it could be a NULL pointer if the line contained one token
- * only.
- */
-char *
-strdelim(char **s)
-{
- char *old;
- int wspace = 0;
-
- if (*s == NULL)
- return NULL;
-
- old = *s;
-
- *s = strpbrk(*s, WHITESPACE "=");
- if (*s == NULL)
- return (old);
-
- /* Allow only one '=' to be skipped */
- if (*s[0] == '=')
- wspace = 1;
- *s[0] = '\0';
-
- *s += strspn(*s + 1, WHITESPACE) + 1;
- if (*s[0] == '=' && !wspace)
- *s += strspn(*s + 1, WHITESPACE) + 1;
-
- return (old);
-}
-
-struct passwd *
-pwcopy(struct passwd *pw)
-{
- struct passwd *copy = xmalloc(sizeof(*copy));
-
- memset(copy, 0, sizeof(*copy));
- copy->pw_name = xstrdup(pw->pw_name);
- copy->pw_passwd = xstrdup(pw->pw_passwd);
- copy->pw_gecos = xstrdup(pw->pw_gecos);
- copy->pw_uid = pw->pw_uid;
- copy->pw_gid = pw->pw_gid;
-#ifdef HAVE_PW_EXPIRE_IN_PASSWD
- copy->pw_expire = pw->pw_expire;
-#endif
-#ifdef HAVE_PW_CHANGE_IN_PASSWD
- copy->pw_change = pw->pw_change;
-#endif
-#ifdef HAVE_PW_CLASS_IN_PASSWD
- copy->pw_class = xstrdup(pw->pw_class);
-#endif
- copy->pw_dir = xstrdup(pw->pw_dir);
- copy->pw_shell = xstrdup(pw->pw_shell);
- return copy;
-}
-
-void
-pwfree(struct passwd **pw)
-{
- struct passwd *p;
-
- if (pw == NULL || *pw == NULL)
- return;
-
- p = *pw;
- *pw = NULL;
-
- xfree(p->pw_name);
- xfree(p->pw_passwd);
- xfree(p->pw_gecos);
-#ifdef HAVE_PW_CLASS_IN_PASSWD
- xfree(p->pw_class);
-#endif
- xfree(p->pw_dir);
- xfree(p->pw_shell);
- xfree(p);
-}
-
-/*
- * Convert ASCII string to TCP/IP port number.
- * Port must be >0 and <=65535.
- * Return 0 if invalid.
- */
-int
-a2port(const char *s)
-{
- long port;
- char *endp;
-
- errno = 0;
- port = strtol(s, &endp, 0);
- if (s == endp || *endp != '\0' ||
- (errno == ERANGE && (port == LONG_MIN || port == LONG_MAX)) ||
- port <= 0 || port > 65535)
- return 0;
-
- return port;
-}
-
-#define SECONDS 1
-#define MINUTES (SECONDS * 60)
-#define HOURS (MINUTES * 60)
-#define DAYS (HOURS * 24)
-#define WEEKS (DAYS * 7)
-
-/*
- * Convert a time string into seconds; format is
- * a sequence of:
- * time[qualifier]
- *
- * Valid time qualifiers are:
- * <none> seconds
- * s|S seconds
- * m|M minutes
- * h|H hours
- * d|D days
- * w|W weeks
- *
- * Examples:
- * 90m 90 minutes
- * 1h30m 90 minutes
- * 2d 2 days
- * 1w 1 week
- *
- * Return -1 if time string is invalid.
- */
-long
-convtime(const char *s)
-{
- long total, secs;
- const char *p;
- char *endp;
-
- errno = 0;
- total = 0;
- p = s;
-
- if (p == NULL || *p == '\0')
- return -1;
-
- while (*p) {
- secs = strtol(p, &endp, 10);
- if (p == endp ||
- (errno == ERANGE && (secs == LONG_MIN || secs == LONG_MAX)) ||
- secs < 0)
- return -1;
-
- switch (*endp++) {
- case '\0':
- endp--;
- break;
- case 's':
- case 'S':
- break;
- case 'm':
- case 'M':
- secs *= MINUTES;
- break;
- case 'h':
- case 'H':
- secs *= HOURS;
- break;
- case 'd':
- case 'D':
- secs *= DAYS;
- break;
- case 'w':
- case 'W':
- secs *= WEEKS;
- break;
- default:
- return -1;
- }
- total += secs;
- if (total < 0)
- return -1;
- p = endp;
- }
-
- return total;
-}
-
-/*
- * Search for next delimiter between hostnames/addresses and ports.
- * Argument may be modified (for termination).
- * Returns *cp if parsing succeeds.
- * *cp is set to the start of the next delimiter, if one was found.
- * If this is the last field, *cp is set to NULL.
- */
-char *
-hpdelim(char **cp)
-{
- char *s, *old;
-
- if (cp == NULL || *cp == NULL)
- return NULL;
-
- old = s = *cp;
- if (*s == '[') {
- if ((s = strchr(s, ']')) == NULL)
- return NULL;
- else
- s++;
- } else if ((s = strpbrk(s, ":/")) == NULL)
- s = *cp + strlen(*cp); /* skip to end (see first case below) */
-
- switch (*s) {
- case '\0':
- *cp = NULL; /* no more fields*/
- break;
-
- case ':':
- case '/':
- *s = '\0'; /* terminate */
- *cp = s + 1;
- break;
-
- default:
- return NULL;
- }
-
- return old;
-}
-
-char *
-cleanhostname(char *host)
-{
- if (*host == '[' && host[strlen(host) - 1] == ']') {
- host[strlen(host) - 1] = '\0';
- return (host + 1);
- } else
- return host;
-}
-
-char *
-colon(char *cp)
-{
- int flag = 0;
-
- if (*cp == ':') /* Leading colon is part of file name. */
- return (0);
- if (*cp == '[')
- flag = 1;
-
- for (; *cp; ++cp) {
- if (*cp == '@' && *(cp+1) == '[')
- flag = 1;
- if (*cp == ']' && *(cp+1) == ':' && flag)
- return (cp+1);
- if (*cp == ':' && !flag)
- return (cp);
- if (*cp == '/')
- return (0);
- }
- return (0);
-}
-
-/* function to assist building execv() arguments */
-/* PRINTFLIKE2 */
-void
-addargs(arglist *args, char *fmt, ...)
-{
- va_list ap;
- char buf[1024];
-
- va_start(ap, fmt);
- vsnprintf(buf, sizeof(buf), fmt, ap);
- va_end(ap);
-
- if (args->list == NULL) {
- args->nalloc = 32;
- args->num = 0;
- } else if (args->num+2 >= args->nalloc)
- args->nalloc *= 2;
-
- args->list = xrealloc(args->list, args->nalloc * sizeof(char *));
- args->list[args->num++] = xstrdup(buf);
- args->list[args->num] = NULL;
-}
-
-void
-replacearg(arglist *args, u_int which, char *fmt, ...)
-{
- va_list ap;
- char *cp;
- int r;
-
- va_start(ap, fmt);
- r = vasprintf(&cp, fmt, ap);
- va_end(ap);
- if (r == -1)
- fatal("replacearg: argument too long");
-
- if (which >= args->num)
- fatal("replacearg: tried to replace invalid arg %d >= %d",
- which, args->num);
- xfree(args->list[which]);
- args->list[which] = cp;
-}
-
-void
-freeargs(arglist *args)
-{
- u_int i;
-
- if (args->list != NULL) {
- for (i = 0; i < args->num; i++)
- xfree(args->list[i]);
- xfree(args->list);
- args->nalloc = args->num = 0;
- args->list = NULL;
- }
-}
-
-/*
- * Expand a string with a set of %[char] escapes. A number of escapes may be
- * specified as (char *escape_chars, char *replacement) pairs. The list must
- * be terminated by a NULL escape_char. Returns replaced string in memory
- * allocated by xmalloc.
- */
-char *
-percent_expand(const char *string, ...)
-{
-#define EXPAND_MAX_KEYS 16
- struct {
- const char *key;
- const char *repl;
- } keys[EXPAND_MAX_KEYS];
- u_int num_keys, i, j;
- char buf[4096];
- va_list ap;
-
- /* Gather keys */
- va_start(ap, string);
- for (num_keys = 0; num_keys < EXPAND_MAX_KEYS; num_keys++) {
- keys[num_keys].key = va_arg(ap, char *);
- if (keys[num_keys].key == NULL)
- break;
- keys[num_keys].repl = va_arg(ap, char *);
- if (keys[num_keys].repl == NULL)
- fatal("percent_expand: NULL replacement");
- }
- va_end(ap);
-
- if (num_keys >= EXPAND_MAX_KEYS)
- fatal("percent_expand: too many keys");
-
- /* Expand string */
- *buf = '\0';
- for (i = 0; *string != '\0'; string++) {
- if (*string != '%') {
- append:
- buf[i++] = *string;
- if (i >= sizeof(buf))
- fatal("percent_expand: string too long");
- buf[i] = '\0';
- continue;
- }
- string++;
- if (*string == '%')
- goto append;
- for (j = 0; j < num_keys; j++) {
- if (strchr(keys[j].key, *string) != NULL) {
- i = strlcat(buf, keys[j].repl, sizeof(buf));
- if (i >= sizeof(buf))
- fatal("percent_expand: string too long");
- break;
- }
- }
- if (j >= num_keys)
- fatal("percent_expand: unknown key %%%c", *string);
- }
- return (xstrdup(buf));
-#undef EXPAND_MAX_KEYS
-}
-
-/*
- * Ensure that file descriptors 0, 1 and 2 are open or directed to /dev/null,
- * do not touch those that are already open.
- */
-void
-sanitise_stdfd(void)
-{
- int nullfd, dupfd;
-
- if ((nullfd = dupfd = open(_PATH_DEVNULL, O_RDWR)) == -1) {
- fprintf(stderr, "Couldn't open /dev/null: %s", strerror(errno));
- exit(1);
- }
- while (++dupfd <= 2) {
- /* Only clobber closed fds */
- if (fcntl(dupfd, F_GETFL, 0) >= 0)
- continue;
- if (dup2(nullfd, dupfd) == -1) {
- fprintf(stderr, "dup2: %s", strerror(errno));
- exit(1);
- }
- }
- if (nullfd > 2)
- close(nullfd);
-}
-
-char *
-tohex(const void *vp, size_t l)
-{
- const u_char *p = (const u_char *)vp;
- char b[3], *r;
- size_t i, hl;
-
- if (l > 65536)
- return xstrdup("tohex: length > 65536");
-
- hl = l * 2 + 1;
- r = xcalloc(1, hl);
- for (i = 0; i < l; i++) {
- snprintf(b, sizeof(b), "%02x", p[i]);
- strlcat(r, b, hl);
- }
- return (r);
-}
-
-u_int64_t
-get_u64(const void *vp)
-{
- const u_char *p = (const u_char *)vp;
- u_int64_t v;
-
- v = (u_int64_t)p[0] << 56;
- v |= (u_int64_t)p[1] << 48;
- v |= (u_int64_t)p[2] << 40;
- v |= (u_int64_t)p[3] << 32;
- v |= (u_int64_t)p[4] << 24;
- v |= (u_int64_t)p[5] << 16;
- v |= (u_int64_t)p[6] << 8;
- v |= (u_int64_t)p[7];
-
- return (v);
-}
-
-u_int32_t
-get_u32(const void *vp)
-{
- const u_char *p = (const u_char *)vp;
- u_int32_t v;
-
- v = (u_int32_t)p[0] << 24;
- v |= (u_int32_t)p[1] << 16;
- v |= (u_int32_t)p[2] << 8;
- v |= (u_int32_t)p[3];
-
- return (v);
-}
-
-u_int16_t
-get_u16(const void *vp)
-{
- const u_char *p = (const u_char *)vp;
- u_int16_t v;
-
- v = (u_int16_t)p[0] << 8;
- v |= (u_int16_t)p[1];
-
- return (v);
-}
-
-void
-put_u64(void *vp, u_int64_t v)
-{
- u_char *p = (u_char *)vp;
-
- p[0] = (u_char)(v >> 56) & 0xff;
- p[1] = (u_char)(v >> 48) & 0xff;
- p[2] = (u_char)(v >> 40) & 0xff;
- p[3] = (u_char)(v >> 32) & 0xff;
- p[4] = (u_char)(v >> 24) & 0xff;
- p[5] = (u_char)(v >> 16) & 0xff;
- p[6] = (u_char)(v >> 8) & 0xff;
- p[7] = (u_char)v & 0xff;
-}
-
-void
-put_u32(void *vp, u_int32_t v)
-{
- u_char *p = (u_char *)vp;
-
- p[0] = (u_char)(v >> 24) & 0xff;
- p[1] = (u_char)(v >> 16) & 0xff;
- p[2] = (u_char)(v >> 8) & 0xff;
- p[3] = (u_char)v & 0xff;
-}
-
-
-void
-put_u16(void *vp, u_int16_t v)
-{
- u_char *p = (u_char *)vp;
-
- p[0] = (u_char)(v >> 8) & 0xff;
- p[1] = (u_char)v & 0xff;
-}
-
-mysig_t
-mysignal(int sig, mysig_t act)
-{
-#ifdef HAVE_SIGACTION
- struct sigaction sa, osa;
-
- if (sigaction(sig, NULL, &osa) == -1)
- return (mysig_t) -1;
- if (osa.sa_handler != act) {
- memset(&sa, 0, sizeof(sa));
- sigemptyset(&sa.sa_mask);
- sa.sa_flags = 0;
-#if defined(SA_INTERRUPT)
- if (sig == SIGALRM)
- sa.sa_flags |= SA_INTERRUPT;
-#endif
- sa.sa_handler = act;
- if (sigaction(sig, &sa, NULL) == -1)
- return (mysig_t) -1;
- }
- return (osa.sa_handler);
-#else
- return (signal(sig, act));
-#endif
-}
-
-/*
- * Return true if argument is one of "yes", "true", "no" or "false". If
- * 'active' is 0 than we are in a non-matching Host section of the
- * configuration file so we check the syntax but will not set the value of
- * '*option'. Otherwise we set its value if not already set.
- */
-int
-get_yes_no_flag(int *option, const char *arg, const char *filename, int linenum,
- int active)
-{
- int value = -1;
-
- if (arg == NULL || *arg == '\0')
- fatal("%.200s line %d: Missing argument.", filename, linenum);
- if (strcmp(arg, "yes") == 0 || strcmp(arg, "true") == 0)
- value = 1;
- else if (strcmp(arg, "no") == 0 || strcmp(arg, "false") == 0)
- value = 0;
-
- if (active && *option == -1 && value != -1)
- *option = value;
-
- return (value != -1);
-}
-
-/*
- * Convert a string to lowercase. The string returned is an internally allocated
- * one so the consumer of this function is not expected to change it or free it.
- */
-char *
-tolowercase(const char *s)
-{
- int i, len;
- static int lenret = 0;
- static char *ret = NULL;
-
- /* allocate a new string if the old one it not long enough to store s */
- len = strlen(s) + 1;
- if (len > lenret) {
- if (ret != NULL)
- xfree(ret);
- ret = xmalloc(len);
- lenret = len;
- }
-
- /* process the string including the ending '\0' */
- for (i = 0; i < len; ++i)
- ret[i] = tolower(s[i]);
-
- return (ret);
-}
diff --git a/usr/src/cmd/ssh/libssh/common/mpaux.c b/usr/src/cmd/ssh/libssh/common/mpaux.c
deleted file mode 100644
index 1b77961b62..0000000000
--- a/usr/src/cmd/ssh/libssh/common/mpaux.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * This file contains various auxiliary functions related to multiple
- * precision integers.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: mpaux.c,v 1.16 2001/02/08 19:30:52 itojun Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <openssl/bn.h>
-#include "getput.h"
-#include "xmalloc.h"
-
-#include <openssl/md5.h>
-
-#include "mpaux.h"
-
-void
-compute_session_id(u_char session_id[16],
- u_char cookie[8],
- BIGNUM* host_key_n,
- BIGNUM* session_key_n)
-{
- u_int host_key_bytes = BN_num_bytes(host_key_n);
- u_int session_key_bytes = BN_num_bytes(session_key_n);
- u_int bytes = host_key_bytes + session_key_bytes;
- u_char *buf = xmalloc(bytes);
- MD5_CTX md;
-
- BN_bn2bin(host_key_n, buf);
- BN_bn2bin(session_key_n, buf + host_key_bytes);
- MD5_Init(&md);
- MD5_Update(&md, buf, bytes);
- MD5_Update(&md, cookie, 8);
- MD5_Final(session_id, &md);
- memset(buf, 0, bytes);
- xfree(buf);
-}
diff --git a/usr/src/cmd/ssh/libssh/common/msg.c b/usr/src/cmd/ssh/libssh/common/msg.c
deleted file mode 100644
index 26c5eeec61..0000000000
--- a/usr/src/cmd/ssh/libssh/common/msg.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2002 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#include "includes.h"
-RCSID("$OpenBSD: msg.c,v 1.4 2002/07/01 16:15:25 deraadt Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "buffer.h"
-#include "getput.h"
-#include "log.h"
-#include "atomicio.h"
-#include "msg.h"
-
-void
-ssh_msg_send(int fd, u_char type, Buffer *m)
-{
- u_char buf[5];
- u_int mlen = buffer_len(m);
-
- debug3("ssh_msg_send: type %u", (unsigned int)type & 0xff);
-
- PUT_32BIT(buf, mlen + 1);
- buf[4] = type; /* 1st byte of payload is mesg-type */
- if (atomicio(write, fd, buf, sizeof(buf)) != sizeof(buf))
- fatal("ssh_msg_send: write");
- if (atomicio(write, fd, buffer_ptr(m), mlen) != mlen)
- fatal("ssh_msg_send: write");
-}
-
-int
-ssh_msg_recv(int fd, Buffer *m)
-{
- u_char buf[4];
- ssize_t res;
- u_int msg_len;
-
- debug3("ssh_msg_recv entering");
-
- res = atomicio(read, fd, buf, sizeof(buf));
- if (res != sizeof(buf)) {
- if (res == 0)
- return -1;
- fatal("ssh_msg_recv: read: header %ld", (long)res);
- }
- msg_len = GET_32BIT(buf);
- if (msg_len > 256 * 1024)
- fatal("ssh_msg_recv: read: bad msg_len %u", msg_len);
- buffer_clear(m);
- buffer_append_space(m, msg_len);
- res = atomicio(read, fd, buffer_ptr(m), msg_len);
- if (res != msg_len)
- fatal("ssh_msg_recv: read: %ld != msg_len", (long)res);
- return 0;
-}
diff --git a/usr/src/cmd/ssh/libssh/common/nchan.c b/usr/src/cmd/ssh/libssh/common/nchan.c
deleted file mode 100644
index 82a371af5b..0000000000
--- a/usr/src/cmd/ssh/libssh/common/nchan.c
+++ /dev/null
@@ -1,512 +0,0 @@
-/*
- * Copyright (c) 1999, 2000, 2001, 2002 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: nchan.c,v 1.47 2002/06/19 00:27:55 deraadt Exp $");
-
-#include "ssh1.h"
-#include "ssh2.h"
-#include "buffer.h"
-#include "packet.h"
-#include "channels.h"
-#include "compat.h"
-#include "log.h"
-
-/*
- * SSH Protocol 1.5 aka New Channel Protocol
- * Thanks to Martina, Axel and everyone who left Erlangen, leaving me bored.
- * Written by Markus Friedl in October 1999
- *
- * Protocol versions 1.3 and 1.5 differ in the handshake protocol used for the
- * tear down of channels:
- *
- * 1.3: strict request-ack-protocol:
- * CLOSE ->
- * <- CLOSE_CONFIRM
- *
- * 1.5: uses variations of:
- * IEOF ->
- * <- OCLOSE
- * <- IEOF
- * OCLOSE ->
- * i.e. both sides have to close the channel
- *
- * 2.0: the EOF messages are optional
- *
- * See the debugging output from 'ssh -v' and 'sshd -d' of
- * ssh-1.2.27 as an example.
- *
- */
-
-/* functions manipulating channel states */
-/*
- * EVENTS update channel input/output states execute ACTIONS
- */
-/*
- * ACTIONS: should never update the channel states
- */
-static void chan_send_ieof1(Channel *);
-static void chan_send_oclose1(Channel *);
-static void chan_send_close2(Channel *);
-static void chan_send_eof2(Channel *);
-static void chan_send_eow2(Channel *);
-
-/* helper */
-static void chan_shutdown_write(Channel *);
-static void chan_shutdown_read(Channel *);
-
-static char *ostates[] = { "open", "drain", "wait_ieof", "closed" };
-static char *istates[] = { "open", "drain", "wait_oclose", "closed" };
-
-static void
-chan_set_istate(Channel *c, u_int next)
-{
- if (c->istate > CHAN_INPUT_CLOSED || next > CHAN_INPUT_CLOSED)
- fatal("chan_set_istate: bad state %d -> %d", c->istate, next);
- debug("channel %d: input %s -> %s", c->self, istates[c->istate],
- istates[next]);
- c->istate = next;
-}
-static void
-chan_set_ostate(Channel *c, u_int next)
-{
- if (c->ostate > CHAN_OUTPUT_CLOSED || next > CHAN_OUTPUT_CLOSED)
- fatal("chan_set_ostate: bad state %d -> %d", c->ostate, next);
- debug("channel %d: output %s -> %s", c->self, ostates[c->ostate],
- ostates[next]);
- c->ostate = next;
-}
-
-/*
- * SSH1 specific implementation of event functions
- */
-
-static void
-chan_rcvd_oclose1(Channel *c)
-{
- debug("channel %d: rcvd oclose", c->self);
- switch (c->istate) {
- case CHAN_INPUT_WAIT_OCLOSE:
- chan_set_istate(c, CHAN_INPUT_CLOSED);
- break;
- case CHAN_INPUT_OPEN:
- chan_shutdown_read(c);
- chan_send_ieof1(c);
- chan_set_istate(c, CHAN_INPUT_CLOSED);
- break;
- case CHAN_INPUT_WAIT_DRAIN:
- /* both local read_failed and remote write_failed */
- chan_send_ieof1(c);
- chan_set_istate(c, CHAN_INPUT_CLOSED);
- break;
- default:
- error("channel %d: protocol error: rcvd_oclose for istate %d",
- c->self, c->istate);
- return;
- }
-}
-void
-chan_read_failed(Channel *c)
-{
- debug("channel %d: read failed", c->self);
- switch (c->istate) {
- case CHAN_INPUT_OPEN:
- chan_shutdown_read(c);
- chan_set_istate(c, CHAN_INPUT_WAIT_DRAIN);
- break;
- default:
- error("channel %d: chan_read_failed for istate %d",
- c->self, c->istate);
- break;
- }
-}
-void
-chan_ibuf_empty(Channel *c)
-{
- debug("channel %d: ibuf empty", c->self);
- if (buffer_len(&c->input)) {
- error("channel %d: chan_ibuf_empty for non empty buffer",
- c->self);
- return;
- }
- switch (c->istate) {
- case CHAN_INPUT_WAIT_DRAIN:
- if (compat20) {
- if (!(c->flags & CHAN_CLOSE_SENT))
- chan_send_eof2(c);
- chan_set_istate(c, CHAN_INPUT_CLOSED);
- } else {
- chan_send_ieof1(c);
- chan_set_istate(c, CHAN_INPUT_WAIT_OCLOSE);
- }
- break;
- default:
- error("channel %d: chan_ibuf_empty for istate %d",
- c->self, c->istate);
- break;
- }
-}
-static void
-chan_rcvd_ieof1(Channel *c)
-{
- debug("channel %d: rcvd ieof", c->self);
- switch (c->ostate) {
- case CHAN_OUTPUT_OPEN:
- chan_set_ostate(c, CHAN_OUTPUT_WAIT_DRAIN);
- break;
- case CHAN_OUTPUT_WAIT_IEOF:
- chan_set_ostate(c, CHAN_OUTPUT_CLOSED);
- break;
- default:
- error("channel %d: protocol error: rcvd_ieof for ostate %d",
- c->self, c->ostate);
- break;
- }
-}
-static void
-chan_write_failed1(Channel *c)
-{
- debug("channel %d: write failed", c->self);
- switch (c->ostate) {
- case CHAN_OUTPUT_OPEN:
- chan_shutdown_write(c);
- chan_send_oclose1(c);
- chan_set_ostate(c, CHAN_OUTPUT_WAIT_IEOF);
- break;
- case CHAN_OUTPUT_WAIT_DRAIN:
- chan_shutdown_write(c);
- chan_send_oclose1(c);
- chan_set_ostate(c, CHAN_OUTPUT_CLOSED);
- break;
- default:
- error("channel %d: chan_write_failed for ostate %d",
- c->self, c->ostate);
- break;
- }
-}
-void
-chan_obuf_empty(Channel *c)
-{
- debug("channel %d: obuf empty", c->self);
- if (buffer_len(&c->output)) {
- error("channel %d: chan_obuf_empty for non empty buffer",
- c->self);
- return;
- }
- switch (c->ostate) {
- case CHAN_OUTPUT_WAIT_DRAIN:
- chan_shutdown_write(c);
- if (!compat20)
- chan_send_oclose1(c);
- chan_set_ostate(c, CHAN_OUTPUT_CLOSED);
- break;
- default:
- error("channel %d: internal error: obuf_empty for ostate %d",
- c->self, c->ostate);
- break;
- }
-}
-static void
-chan_send_ieof1(Channel *c)
-{
- debug("channel %d: send ieof", c->self);
- switch (c->istate) {
- case CHAN_INPUT_OPEN:
- case CHAN_INPUT_WAIT_DRAIN:
- packet_start(SSH_MSG_CHANNEL_INPUT_EOF);
- packet_put_int(c->remote_id);
- packet_send();
- break;
- default:
- error("channel %d: cannot send ieof for istate %d",
- c->self, c->istate);
- break;
- }
-}
-static void
-chan_send_oclose1(Channel *c)
-{
- debug("channel %d: send oclose", c->self);
- switch (c->ostate) {
- case CHAN_OUTPUT_OPEN:
- case CHAN_OUTPUT_WAIT_DRAIN:
- buffer_clear(&c->output);
- packet_start(SSH_MSG_CHANNEL_OUTPUT_CLOSE);
- packet_put_int(c->remote_id);
- packet_send();
- break;
- default:
- error("channel %d: cannot send oclose for ostate %d",
- c->self, c->ostate);
- break;
- }
-}
-
-/*
- * the same for SSH2
- */
-static void
-chan_rcvd_close2(Channel *c)
-{
- debug("channel %d: rcvd close", c->self);
- if (c->flags & CHAN_CLOSE_RCVD)
- error("channel %d: protocol error: close rcvd twice", c->self);
- c->flags |= CHAN_CLOSE_RCVD;
- if (c->type == SSH_CHANNEL_LARVAL) {
- /* tear down larval channels immediately */
- chan_set_ostate(c, CHAN_OUTPUT_CLOSED);
- chan_set_istate(c, CHAN_INPUT_CLOSED);
- return;
- }
- switch (c->ostate) {
- case CHAN_OUTPUT_OPEN:
- /*
- * wait until a data from the channel is consumed if a CLOSE
- * is received
- */
- chan_set_ostate(c, CHAN_OUTPUT_WAIT_DRAIN);
- break;
- }
- switch (c->istate) {
- case CHAN_INPUT_OPEN:
- chan_shutdown_read(c);
- chan_set_istate(c, CHAN_INPUT_CLOSED);
- break;
- case CHAN_INPUT_WAIT_DRAIN:
- chan_send_eof2(c);
- chan_set_istate(c, CHAN_INPUT_CLOSED);
- break;
- }
-}
-void
-chan_rcvd_eow(Channel *c)
-{
- debug2("channel %d: rcvd eow", c->self);
- switch (c->istate) {
- case CHAN_INPUT_OPEN:
- chan_shutdown_read(c);
- chan_set_istate(c, CHAN_INPUT_CLOSED);
- break;
- }
-}
-static void
-chan_rcvd_eof2(Channel *c)
-{
- debug("channel %d: rcvd eof", c->self);
- c->flags |= CHAN_EOF_RCVD;
- if (c->ostate == CHAN_OUTPUT_OPEN)
- chan_set_ostate(c, CHAN_OUTPUT_WAIT_DRAIN);
-}
-static void
-chan_write_failed2(Channel *c)
-{
- debug("channel %d: write failed", c->self);
- switch (c->ostate) {
- case CHAN_OUTPUT_OPEN:
- case CHAN_OUTPUT_WAIT_DRAIN:
- chan_shutdown_write(c);
- if (strcmp(c->ctype, "session") == 0)
- chan_send_eow2(c);
- chan_set_ostate(c, CHAN_OUTPUT_CLOSED);
- break;
- default:
- error("channel %d: chan_write_failed for ostate %d",
- c->self, c->ostate);
- break;
- }
-}
-static void
-chan_send_eof2(Channel *c)
-{
- debug("channel %d: send eof", c->self);
- switch (c->istate) {
- case CHAN_INPUT_WAIT_DRAIN:
- packet_start(SSH2_MSG_CHANNEL_EOF);
- packet_put_int(c->remote_id);
- packet_send();
- c->flags |= CHAN_EOF_SENT;
- break;
- default:
- error("channel %d: cannot send eof for istate %d",
- c->self, c->istate);
- break;
- }
-}
-static void
-chan_send_close2(Channel *c)
-{
- debug("channel %d: send close", c->self);
- if (c->ostate != CHAN_OUTPUT_CLOSED ||
- c->istate != CHAN_INPUT_CLOSED) {
- error("channel %d: cannot send close for istate/ostate %d/%d",
- c->self, c->istate, c->ostate);
- } else if (c->flags & CHAN_CLOSE_SENT) {
- error("channel %d: already sent close", c->self);
- } else {
- packet_start(SSH2_MSG_CHANNEL_CLOSE);
- packet_put_int(c->remote_id);
- packet_send();
- c->flags |= CHAN_CLOSE_SENT;
- }
-}
-static void
-chan_send_eow2(Channel *c)
-{
- debug2("channel %d: send eow", c->self);
- if (c->ostate == CHAN_OUTPUT_CLOSED) {
- error("channel %d: must not sent eow on closed output",
- c->self);
- return;
- }
- packet_start(SSH2_MSG_CHANNEL_REQUEST);
- packet_put_int(c->remote_id);
- packet_put_cstring("eow@openssh.com");
- packet_put_char(0);
- packet_send();
-}
-
-/* shared */
-
-void
-chan_rcvd_ieof(Channel *c)
-{
- if (compat20)
- chan_rcvd_eof2(c);
- else
- chan_rcvd_ieof1(c);
- if (c->ostate == CHAN_OUTPUT_WAIT_DRAIN &&
- buffer_len(&c->output) == 0 &&
- !CHANNEL_EFD_OUTPUT_ACTIVE(c))
- chan_obuf_empty(c);
-}
-void
-chan_rcvd_oclose(Channel *c)
-{
- if (compat20)
- chan_rcvd_close2(c);
- else
- chan_rcvd_oclose1(c);
-}
-void
-chan_write_failed(Channel *c)
-{
- if (compat20)
- chan_write_failed2(c);
- else
- chan_write_failed1(c);
-}
-
-void
-chan_mark_dead(Channel *c)
-{
- c->type = SSH_CHANNEL_ZOMBIE;
-}
-
-int
-chan_is_dead(Channel *c, int send)
-{
- if (c->type == SSH_CHANNEL_ZOMBIE) {
- debug("channel %d: zombie", c->self);
- return 1;
- }
- if (c->istate != CHAN_INPUT_CLOSED || c->ostate != CHAN_OUTPUT_CLOSED)
- return 0;
- if (!compat20) {
- debug("channel %d: is dead", c->self);
- return 1;
- }
- if ((datafellows & SSH_BUG_EXTEOF) &&
- c->extended_usage == CHAN_EXTENDED_WRITE &&
- c->efd != -1 &&
- buffer_len(&c->extended) > 0) {
- debug2("channel %d: active efd: %d len %d",
- c->self, c->efd, buffer_len(&c->extended));
- return 0;
- }
- if (!(c->flags & CHAN_CLOSE_SENT)) {
- if (send) {
- chan_send_close2(c);
- } else {
- /* channel would be dead if we sent a close */
- if (c->flags & CHAN_CLOSE_RCVD) {
- debug("channel %d: almost dead",
- c->self);
- return 1;
- }
- }
- }
- if ((c->flags & CHAN_CLOSE_SENT) &&
- (c->flags & CHAN_CLOSE_RCVD)) {
- debug("channel %d: is dead", c->self);
- return 1;
- }
- return 0;
-}
-
-/* helper */
-static void
-chan_shutdown_write(Channel *c)
-{
- buffer_clear(&c->output);
- if (compat20 && c->type == SSH_CHANNEL_LARVAL)
- return;
- /* shutdown failure is allowed if write failed already */
- debug("channel %d: close_write", c->self);
- if (c->sock != -1) {
- if (shutdown(c->sock, SHUT_WR) < 0)
- debug("channel %d: chan_shutdown_write: "
- "shutdown() failed for fd%d: %.100s",
- c->self, c->sock, strerror(errno));
- } else {
- if (channel_close_fd(&c->wfd) < 0)
- log("channel %d: chan_shutdown_write: "
- "close() failed for fd%d: %.100s",
- c->self, c->wfd, strerror(errno));
- }
-}
-static void
-chan_shutdown_read(Channel *c)
-{
- if (compat20 && c->type == SSH_CHANNEL_LARVAL)
- return;
- debug("channel %d: close_read", c->self);
- if (c->sock != -1) {
- /*
- * shutdown(sock, SHUT_READ) may return ENOTCONN if the
- * write side has been closed already. (bug on Linux)
- * HP-UX may return ENOTCONN also.
- */
- if (shutdown(c->sock, SHUT_RD) < 0
- && errno != ENOTCONN)
- error("channel %d: chan_shutdown_read: "
- "shutdown() failed for fd%d [i%d o%d]: %.100s",
- c->self, c->sock, c->istate, c->ostate,
- strerror(errno));
- } else {
- if (channel_close_fd(&c->rfd) < 0)
- log("channel %d: chan_shutdown_read: "
- "close() failed for fd%d: %.100s",
- c->self, c->rfd, strerror(errno));
- }
-}
diff --git a/usr/src/cmd/ssh/libssh/common/packet.c b/usr/src/cmd/ssh/libssh/common/packet.c
deleted file mode 100644
index 1221db134a..0000000000
--- a/usr/src/cmd/ssh/libssh/common/packet.c
+++ /dev/null
@@ -1,1845 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * This file contains code implementing the packet protocol and communication
- * with the other side. This same code is used both on client and server side.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- *
- *
- * SSH2 packet format added by Markus Friedl.
- * Copyright (c) 2000, 2001 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* $OpenBSD: packet.c,v 1.148 2007/06/07 19:37:34 pvalchev Exp $ */
-
-#include "includes.h"
-
-#include "sys-queue.h"
-#include "xmalloc.h"
-#include "buffer.h"
-#include "packet.h"
-#include "bufaux.h"
-#include "crc32.h"
-#include "getput.h"
-#include "compress.h"
-#include "deattack.h"
-#include "channels.h"
-#include "compat.h"
-#include "ssh1.h"
-#include "ssh2.h"
-#include "cipher.h"
-#include "kex.h"
-#include "mac.h"
-#include "log.h"
-#include "canohost.h"
-#include "misc.h"
-#include "ssh.h"
-#include "engine.h"
-
-/* PKCS#11 engine */
-ENGINE *e;
-
-#ifdef ALTPRIVSEP
-static int packet_server = 0;
-static int packet_monitor = 0;
-#endif /* ALTPRIVSEP */
-
-#ifdef PACKET_DEBUG
-#define DBG(x) x
-#else
-#define DBG(x)
-#endif
-
-static void packet_send2(void);
-
-/*
- * This variable contains the file descriptors used for communicating with
- * the other side. connection_in is used for reading; connection_out for
- * writing. These can be the same descriptor, in which case it is assumed to
- * be a socket.
- */
-static int connection_in = -1;
-static int connection_out = -1;
-
-/* Protocol flags for the remote side. */
-static u_int remote_protocol_flags = 0;
-
-/* Encryption context for receiving data. This is only used for decryption. */
-static CipherContext receive_context;
-
-/* Encryption context for sending data. This is only used for encryption. */
-static CipherContext send_context;
-
-/* Buffer for raw input data from the socket. */
-Buffer input;
-
-/* Buffer for raw output data going to the socket. */
-Buffer output;
-
-/* Buffer for the partial outgoing packet being constructed. */
-static Buffer outgoing_packet;
-
-/* Buffer for the incoming packet currently being processed. */
-static Buffer incoming_packet;
-
-/* Scratch buffer for packet compression/decompression. */
-static Buffer compression_buffer;
-static int compression_buffer_ready = 0;
-
-/* Flag indicating whether packet compression/decompression is enabled. */
-static int packet_compression = 0;
-
-/* default maximum packet size */
-int max_packet_size = 32768;
-
-/* Flag indicating whether this module has been initialized. */
-static int initialized = 0;
-
-/* Set to true if the connection is interactive. */
-static int interactive_mode = 0;
-
-/* Session key information for Encryption and MAC */
-Newkeys *newkeys[MODE_MAX];
-static struct packet_state {
- u_int32_t seqnr;
- u_int32_t packets;
- u_int64_t blocks;
-} p_read, p_send;
-
-static u_int64_t max_blocks_in, max_blocks_out;
-static u_int32_t rekey_limit;
-
-/* Session key for protocol v1 */
-static u_char ssh1_key[SSH_SESSION_KEY_LENGTH];
-static u_int ssh1_keylen;
-
-/* roundup current message to extra_pad bytes */
-static u_char extra_pad = 0;
-
-struct packet {
- TAILQ_ENTRY(packet) next;
- u_char type;
- Buffer payload;
-};
-TAILQ_HEAD(, packet) outgoing;
-
-/*
- * Part of what -f option and ~& escape sequence do in the client is that they
- * will force it to daemonize itself. Due to the fork safety rules inherent in
- * any PKCS#11 environment, if the engine is used we must do a key re-exchange
- * before forking a child to negotiate the new keys. Those keys will be used to
- * inicialize the new crypto contexts. This involves finishing the engine in the
- * parent and reinitializing it again in both processes after fork() returns.
- * This approach also leaves protocol 1 out since it doesn't support rekeying.
- */
-int will_daemonize;
-
-#ifdef PACKET_DEBUG
-/* This function dumps data onto stderr. This is for debugging only. */
-void
-data_dump(void *data, u_int len)
-{
- Buffer buf;
-
- buffer_init(&buf);
- buffer_append(&buf, data, len);
- buffer_dump(&buf);
- buffer_free(&buf);
-}
-#endif
-
-/*
- * Sets the descriptors used for communication. Disables encryption until
- * packet_set_encryption_key is called.
- */
-void
-packet_set_connection(int fd_in, int fd_out)
-{
- Cipher *none = cipher_by_name("none");
-
- if (none == NULL)
- fatal("packet_set_connection: cannot load cipher 'none'");
- connection_in = fd_in;
- connection_out = fd_out;
- cipher_init(&send_context, none, (unsigned char *) "", 0, NULL, 0, CIPHER_ENCRYPT);
- cipher_init(&receive_context, none, (unsigned char *) "", 0, NULL, 0, CIPHER_DECRYPT);
- newkeys[MODE_IN] = newkeys[MODE_OUT] = NULL;
- if (!initialized) {
- initialized = 1;
- buffer_init(&input);
- buffer_init(&output);
- buffer_init(&outgoing_packet);
- buffer_init(&incoming_packet);
- TAILQ_INIT(&outgoing);
- } else {
- buffer_clear(&input);
- buffer_clear(&output);
- buffer_clear(&outgoing_packet);
- buffer_clear(&incoming_packet);
- }
-
- /*
- * Prime the cache for get_remote_ipaddr() while we have a
- * socket on which to do a getpeername().
- */
- (void) get_remote_ipaddr();
-
- /* Kludge: arrange the close function to be called from fatal(). */
- fatal_add_cleanup((void (*) (void *)) packet_close, NULL);
-}
-
-/* Returns 1 if remote host is connected via socket, 0 if not. */
-
-int
-packet_connection_is_on_socket(void)
-{
- struct sockaddr_storage from, to;
- socklen_t fromlen, tolen;
-
- /* filedescriptors in and out are the same, so it's a socket */
- if (connection_in != -1 && connection_in == connection_out)
- return 1;
- fromlen = sizeof(from);
- memset(&from, 0, sizeof(from));
- if (getpeername(connection_in, (struct sockaddr *)&from, &fromlen) < 0)
- return 0;
- tolen = sizeof(to);
- memset(&to, 0, sizeof(to));
- if (getpeername(connection_out, (struct sockaddr *)&to, &tolen) < 0)
- return 0;
- if (fromlen != tolen || memcmp(&from, &to, fromlen) != 0)
- return 0;
- if (from.ss_family != AF_INET && from.ss_family != AF_INET6)
- return 0;
- return 1;
-}
-
-/* returns 1 if connection is via ipv4 */
-
-int
-packet_connection_is_ipv4(void)
-{
- struct sockaddr_storage to;
- socklen_t tolen = sizeof(to);
-
- memset(&to, 0, sizeof(to));
- if (getsockname(connection_out, (struct sockaddr *)&to, &tolen) < 0)
- return 0;
- if (to.ss_family == AF_INET)
- return 1;
-#ifdef IPV4_IN_IPV6
- if (to.ss_family == AF_INET6 &&
- IN6_IS_ADDR_V4MAPPED(&((struct sockaddr_in6 *)&to)->sin6_addr))
- return 1;
-#endif
- return 0;
-}
-
-/* Sets the connection into non-blocking mode. */
-
-void
-packet_set_nonblocking(void)
-{
- /* Set the socket into non-blocking mode. */
- if (fcntl(connection_in, F_SETFL, O_NONBLOCK) < 0)
- error("fcntl O_NONBLOCK: %.100s", strerror(errno));
-
- if (connection_out != connection_in) {
- if (fcntl(connection_out, F_SETFL, O_NONBLOCK) < 0)
- error("fcntl O_NONBLOCK: %.100s", strerror(errno));
- }
-}
-
-/* Returns the socket used for reading. */
-
-int
-packet_get_connection_in(void)
-{
- return connection_in;
-}
-
-/* Returns the descriptor used for writing. */
-
-int
-packet_get_connection_out(void)
-{
- return connection_out;
-}
-
-/* Closes the connection and clears and frees internal data structures. */
-
-void
-packet_close(void)
-{
- if (!initialized)
- return;
- initialized = 0;
- if (connection_in == connection_out) {
- shutdown(connection_out, SHUT_RDWR);
- close(connection_out);
- } else {
- close(connection_in);
- close(connection_out);
- }
- buffer_free(&input);
- buffer_free(&output);
- buffer_free(&outgoing_packet);
- buffer_free(&incoming_packet);
- if (compression_buffer_ready) {
- buffer_free(&compression_buffer);
- buffer_compress_uninit();
- compression_buffer_ready = 0;
- }
- cipher_cleanup(&send_context);
- cipher_cleanup(&receive_context);
-}
-
-/* Sets remote side protocol flags. */
-
-void
-packet_set_protocol_flags(u_int protocol_flags)
-{
- remote_protocol_flags = protocol_flags;
-}
-
-/* Returns the remote protocol flags set earlier by the above function. */
-
-u_int
-packet_get_protocol_flags(void)
-{
- return remote_protocol_flags;
-}
-
-/*
- * Starts packet compression from the next packet on in both directions.
- * Level is compression level 1 (fastest) - 9 (slow, best) as in gzip.
- */
-
-static void
-packet_init_compression(void)
-{
- if (compression_buffer_ready == 1)
- return;
- compression_buffer_ready = 1;
- buffer_init(&compression_buffer);
-}
-
-void
-packet_start_compression(int level)
-{
-#ifdef ALTPRIVSEP
- /* shouldn't happen! */
- if (packet_monitor)
- fatal("INTERNAL ERROR: The monitor cannot compress.");
-#endif /* ALTPRIVSEP */
-
- if (packet_compression && !compat20)
- fatal("Compression already enabled.");
- packet_compression = 1;
- packet_init_compression();
- buffer_compress_init_send(level);
- buffer_compress_init_recv();
-}
-
-/*
- * Causes any further packets to be encrypted using the given key. The same
- * key is used for both sending and reception. However, both directions are
- * encrypted independently of each other.
- */
-
-void
-packet_set_encryption_key(const u_char *key, u_int keylen,
- int number)
-{
- Cipher *cipher = cipher_by_number(number);
-
- if (cipher == NULL)
- fatal("packet_set_encryption_key: unknown cipher number %d", number);
- if (keylen < 20)
- fatal("packet_set_encryption_key: keylen too small: %d", keylen);
- if (keylen > SSH_SESSION_KEY_LENGTH)
- fatal("packet_set_encryption_key: keylen too big: %d", keylen);
- memcpy(ssh1_key, key, keylen);
- ssh1_keylen = keylen;
- cipher_init(&send_context, cipher, key, keylen, NULL, 0, CIPHER_ENCRYPT);
- cipher_init(&receive_context, cipher, key, keylen, NULL, 0, CIPHER_DECRYPT);
-}
-
-u_int
-packet_get_encryption_key(u_char *key)
-{
- if (key == NULL)
- return (ssh1_keylen);
- memcpy(key, ssh1_key, ssh1_keylen);
- return (ssh1_keylen);
-}
-
-/* Start constructing a packet to send. */
-void
-packet_start(u_char type)
-{
- u_char buf[9];
- int len;
-
- DBG(debug("packet_start[%d]", type));
- len = compat20 ? 6 : 9;
- memset(buf, 0, len - 1);
- buf[len - 1] = type;
- buffer_clear(&outgoing_packet);
- buffer_append(&outgoing_packet, buf, len);
-}
-
-/* Append payload. */
-void
-packet_put_char(int value)
-{
- char ch = value;
-
- buffer_append(&outgoing_packet, &ch, 1);
-}
-
-void
-packet_put_int(u_int value)
-{
- buffer_put_int(&outgoing_packet, value);
-}
-
-void
-packet_put_string(const void *buf, u_int len)
-{
- buffer_put_string(&outgoing_packet, buf, len);
-}
-
-void
-packet_put_cstring(const char *str)
-{
- buffer_put_cstring(&outgoing_packet, str);
-}
-
-void
-packet_put_utf8_cstring(const char *str)
-{
- if (datafellows & SSH_BUG_STRING_ENCODING)
- buffer_put_cstring(&outgoing_packet, str);
- else
- buffer_put_utf8_cstring(&outgoing_packet, str);
-}
-
-void
-packet_put_utf8_string(const char *str, uint_t len)
-{
- if (datafellows & SSH_BUG_STRING_ENCODING)
- buffer_put_string(&outgoing_packet, str, len);
- else
- buffer_put_utf8_string(&outgoing_packet, str, len);
-}
-
-void
-packet_put_raw(const void *buf, u_int len)
-{
- buffer_append(&outgoing_packet, buf, len);
-}
-
-void
-packet_put_bignum(BIGNUM * value)
-{
- buffer_put_bignum(&outgoing_packet, value);
-}
-
-void
-packet_put_bignum2(BIGNUM * value)
-{
- buffer_put_bignum2(&outgoing_packet, value);
-}
-
-/*
- * Finalizes and sends the packet. If the encryption key has been set,
- * encrypts the packet before sending.
- */
-
-static void
-packet_send1(void)
-{
- u_char buf[8], *cp;
- int i, padding, len;
- u_int checksum;
- u_int32_t rnd = 0;
-
- /*
- * If using packet compression, compress the payload of the outgoing
- * packet.
- */
- if (packet_compression) {
- buffer_clear(&compression_buffer);
- /* Skip padding. */
- buffer_consume(&outgoing_packet, 8);
- /* padding */
- buffer_append(&compression_buffer, "\0\0\0\0\0\0\0\0", 8);
- buffer_compress(&outgoing_packet, &compression_buffer);
- buffer_clear(&outgoing_packet);
- buffer_append(&outgoing_packet, buffer_ptr(&compression_buffer),
- buffer_len(&compression_buffer));
- }
- /* Compute packet length without padding (add checksum, remove padding). */
- len = buffer_len(&outgoing_packet) + 4 - 8;
-
- /* Insert padding. Initialized to zero in packet_start1() */
- padding = 8 - len % 8;
- if (!send_context.plaintext) {
- cp = buffer_ptr(&outgoing_packet);
- for (i = 0; i < padding; i++) {
- if (i % 4 == 0)
- rnd = arc4random();
- cp[7 - i] = rnd & 0xff;
- rnd >>= 8;
- }
- }
- buffer_consume(&outgoing_packet, 8 - padding);
-
- /* Add check bytes. */
- checksum = ssh_crc32(buffer_ptr(&outgoing_packet),
- buffer_len(&outgoing_packet));
- PUT_32BIT(buf, checksum);
- buffer_append(&outgoing_packet, buf, 4);
-
-#ifdef PACKET_DEBUG
- fprintf(stderr, "packet_send plain: ");
- buffer_dump(&outgoing_packet);
-#endif
-
- /* Append to output. */
- PUT_32BIT(buf, len);
- buffer_append(&output, buf, 4);
- cp = buffer_append_space(&output, buffer_len(&outgoing_packet));
- cipher_crypt(&send_context, cp, buffer_ptr(&outgoing_packet),
- buffer_len(&outgoing_packet));
-
-#ifdef PACKET_DEBUG
- debug("encrypted output queue now contains (%d bytes):\n",
- buffer_len(&output));
- buffer_dump(&output);
-#endif
-
- buffer_clear(&outgoing_packet);
-
- /*
- * Note that the packet is now only buffered in output. It won\'t be
- * actually sent until packet_write_wait or packet_write_poll is
- * called.
- */
-}
-
-void
-set_newkeys(int mode)
-{
- Enc *enc;
- Mac *mac;
- Comp *comp;
- CipherContext *cc;
- u_int64_t *max_blocks;
- int crypt_type;
-
- debug2("set_newkeys: mode %d", mode);
-
- if (mode == MODE_OUT) {
- cc = &send_context;
- crypt_type = CIPHER_ENCRYPT;
- p_send.packets = p_send.blocks = 0;
- max_blocks = &max_blocks_out;
- } else {
- cc = &receive_context;
- crypt_type = CIPHER_DECRYPT;
- p_read.packets = p_read.blocks = 0;
- max_blocks = &max_blocks_in;
- }
-
- debug("set_newkeys: setting new keys for '%s' mode",
- mode == MODE_IN ? "in" : "out");
-
- if (newkeys[mode] != NULL) {
- cipher_cleanup(cc);
- free_keys(newkeys[mode]);
- }
-
- newkeys[mode] = kex_get_newkeys(mode);
- if (newkeys[mode] == NULL)
- fatal("newkeys: no keys for mode %d", mode);
- enc = &newkeys[mode]->enc;
- mac = &newkeys[mode]->mac;
- comp = &newkeys[mode]->comp;
- if (mac_init(mac) == 0)
- mac->enabled = 1;
-#ifdef PACKET_DEBUG
- debug("new encryption key:\n");
- data_dump(enc->key, enc->key_len);
- debug("new encryption IV:\n");
- data_dump(enc->iv, enc->block_size);
- debug("new MAC key:\n");
- data_dump(mac->key, mac->key_len);
-#endif
- cipher_init(cc, enc->cipher, enc->key, enc->key_len,
- enc->iv, enc->block_size, crypt_type);
- /* Deleting the keys does not gain extra security */
- /* memset(enc->iv, 0, enc->block_size);
- memset(enc->key, 0, enc->key_len); */
- if (comp->type != 0 && comp->enabled == 0) {
- packet_init_compression();
- if (mode == MODE_OUT)
- buffer_compress_init_send(6);
- else
- buffer_compress_init_recv();
- comp->enabled = 1;
- }
-
- /*
- * In accordance to the RFCs listed below we enforce the key
- * re-exchange for:
- *
- * - every 1GB of transmitted data if the selected cipher block size
- * is less than 16 bytes (3DES, Blowfish)
- * - every 2^(2*B) cipher blocks transmitted (B is block size in bytes)
- * if the cipher block size is greater than or equal to 16 bytes (AES)
- * - and we never send more than 2^32 SSH packets using the same keys.
- * The recommendation of 2^31 packets is not enforced here but in
- * packet_need_rekeying(). There is also a hard check in
- * packet_send2_wrapped() that we don't send more than 2^32 packets.
- *
- * Note that if the SSH_BUG_NOREKEY compatibility flag is set then no
- * automatic rekeying is performed nor do we enforce the 3rd rule.
- * This means that we can be always forced by the opposite side to never
- * initiate automatic key re-exchange. This might change in the future.
- *
- * The RekeyLimit option keyword may only enforce more frequent key
- * renegotiation, never less. For more information on key renegotiation,
- * see:
- *
- * - RFC 4253 (SSH Transport Layer Protocol), section "9. Key
- * Re-Exchange"
- * - RFC 4344 (SSH Transport Layer Encryption Modes), sections "3.
- * Rekeying" and "6.1 Rekeying Considerations"
- */
- if (enc->block_size >= 16)
- *max_blocks = (u_int64_t)1 << (enc->block_size * 2);
- else
- *max_blocks = ((u_int64_t)1 << 30) / enc->block_size;
-
- if (rekey_limit)
- *max_blocks = MIN(*max_blocks, rekey_limit / enc->block_size);
-}
-
-void
-free_keys(Newkeys *keys)
-{
- Enc *enc;
- Mac *mac;
- Comp *comp;
-
- enc = &keys->enc;
- mac = &keys->mac;
- comp = &keys->comp;
- xfree(enc->name);
- xfree(enc->iv);
- xfree(enc->key);
-
- memset(mac->key, 0, mac->key_len);
- xfree(mac->key);
- xfree(mac->name);
- mac_clear(mac);
-
- xfree(comp->name);
- xfree(keys);
-}
-
-/*
- * Process SSH2_MSG_NEWKEYS message. If we are using the engine we must have
- * both SSH2_MSG_NEWKEYS processed before we can finish the engine, fork, and
- * reinitialize the crypto contexts. We can't fork before processing the 2nd
- * message otherwise we couldn't encrypt/decrypt that message at all - note that
- * parent's PKCS#11 sessions are useless after the fork and we must process
- * both SSH2_MSG_NEWKEYS messages using the old keys.
- */
-void
-process_newkeys(int mode)
-{
- /* this function is for the client only */
- if (packet_is_server() != 0)
- return;
-
- if (will_daemonize == FIRST_NEWKEYS_PROCESSED) {
- debug3("both SSH2_MSG_NEWKEYS processed, will daemonize now");
- cipher_cleanup(&send_context);
- cipher_cleanup(&receive_context);
- pkcs11_engine_finish(e);
- if (daemon(1, 1) < 0) {
- fatal("daemon() failed: %.200s",
- strerror(errno));
- }
- e = pkcs11_engine_load(e != NULL ? 1 : 0);
-
- set_newkeys(MODE_OUT);
- set_newkeys(MODE_IN);
- will_daemonize = SECOND_NEWKEYS_PROCESSED;
- packet_send2();
- } else {
- if (will_daemonize == DAEMONIZING_REQUESTED)
- will_daemonize = FIRST_NEWKEYS_PROCESSED;
- else
- set_newkeys(mode);
- }
-}
-
-/*
- * Finalize packet in SSH2 format (compress, mac, encrypt, enqueue)
- */
-static void
-packet_send2_wrapped(void)
-{
- u_char type, *cp, *macbuf = NULL;
- u_char padlen, pad;
- u_int packet_length = 0;
- u_int i, len;
- u_int32_t rnd = 0;
- Enc *enc = NULL;
- Mac *mac = NULL;
- Comp *comp = NULL;
- int block_size;
-
- if (newkeys[MODE_OUT] != NULL) {
- enc = &newkeys[MODE_OUT]->enc;
- mac = &newkeys[MODE_OUT]->mac;
- comp = &newkeys[MODE_OUT]->comp;
- }
- block_size = enc ? enc->block_size : 8;
-
- cp = buffer_ptr(&outgoing_packet);
- type = cp[5];
-
-#ifdef PACKET_DEBUG
- debug("plain output packet to be processed (%d bytes):\n",
- buffer_len(&outgoing_packet));
- buffer_dump(&outgoing_packet);
-#endif
-
- if (comp && comp->enabled) {
- len = buffer_len(&outgoing_packet);
- /* skip header, compress only payload */
- buffer_consume(&outgoing_packet, 5);
- buffer_clear(&compression_buffer);
- buffer_compress(&outgoing_packet, &compression_buffer);
- buffer_clear(&outgoing_packet);
- buffer_append(&outgoing_packet, "\0\0\0\0\0", 5);
- buffer_append(&outgoing_packet, buffer_ptr(&compression_buffer),
- buffer_len(&compression_buffer));
- DBG(debug("compression: raw %d compressed %d", len,
- buffer_len(&outgoing_packet)));
- }
-
- /* sizeof (packet_len + pad_len + payload) */
- len = buffer_len(&outgoing_packet);
-
- /*
- * calc size of padding, alloc space, get random data,
- * minimum padding is 4 bytes
- */
- padlen = block_size - (len % block_size);
- if (padlen < 4)
- padlen += block_size;
- if (extra_pad) {
- /* will wrap if extra_pad+padlen > 255 */
- extra_pad = roundup(extra_pad, block_size);
- pad = extra_pad - ((len + padlen) % extra_pad);
- debug3("packet_send2: adding %d (len %d padlen %d extra_pad %d)",
- pad, len, padlen, extra_pad);
- padlen += pad;
- extra_pad = 0;
- }
- cp = buffer_append_space(&outgoing_packet, padlen);
- if (enc && !send_context.plaintext) {
- /* random padding */
- for (i = 0; i < padlen; i++) {
- if (i % 4 == 0)
- rnd = arc4random();
- cp[i] = rnd & 0xff;
- rnd >>= 8;
- }
- } else {
- /* clear padding */
- memset(cp, 0, padlen);
- }
- /* packet_length includes payload, padding and padding length field */
- packet_length = buffer_len(&outgoing_packet) - 4;
- cp = buffer_ptr(&outgoing_packet);
- PUT_32BIT(cp, packet_length);
- cp[4] = padlen;
- DBG(debug("will send %d bytes (includes padlen %d)",
- packet_length + 4, padlen));
-
- /* compute MAC over seqnr and packet(length fields, payload, padding) */
- if (mac && mac->enabled) {
- macbuf = mac_compute(mac, p_send.seqnr,
- buffer_ptr(&outgoing_packet),
- buffer_len(&outgoing_packet));
- DBG(debug("done calc MAC out #%d", p_send.seqnr));
- }
- /* encrypt packet and append to output buffer. */
- cp = buffer_append_space(&output, buffer_len(&outgoing_packet));
- cipher_crypt(&send_context, cp, buffer_ptr(&outgoing_packet),
- buffer_len(&outgoing_packet));
- /* append unencrypted MAC */
- if (mac && mac->enabled)
- buffer_append(&output, (char *)macbuf, mac->mac_len);
-#ifdef PACKET_DEBUG
- debug("encrypted output queue now contains (%d bytes):\n",
- buffer_len(&output));
- buffer_dump(&output);
-#endif
- /* increment sequence number for outgoing packets */
- if (++p_send.seqnr == 0)
- log("outgoing seqnr wraps around");
-
- /*
- * RFC 4344: 3.1. First Rekeying Recommendation
- *
- * "Because of possible information leakage through the MAC tag after a
- * key exchange, .... an SSH implementation SHOULD NOT send more than
- * 2**32 packets before rekeying again."
- *
- * The code below is a hard check so that we are sure we don't go across
- * the suggestion. However, since the largest cipher block size we have
- * (AES) is 16 bytes we can't reach 2^32 SSH packets encrypted with the
- * same key while performing periodic rekeying.
- */
- if (++p_send.packets == 0)
- if (!(datafellows & SSH_BUG_NOREKEY))
- fatal("too many packets encrypted with same key");
- p_send.blocks += (packet_length + 4) / block_size;
- buffer_clear(&outgoing_packet);
-
- if (type == SSH2_MSG_NEWKEYS) {
- /*
- * set_newkeys(MODE_OUT) in the client. Note that in the
- * unprivileged child, set_newkeys() for MODE_OUT are set after
- * SSH2_MSG_NEWKEYS is read from the monitor and forwarded to
- * the client side.
- */
- process_newkeys(MODE_OUT);
- }
-}
-
-/*
- * Packets we deal with here are plain until we encrypt them in
- * packet_send2_wrapped().
- *
- * As already mentioned in a comment at process_newkeys() function we must not
- * fork() until both SSH2_MSG_NEWKEYS packets were processed. Until this is done
- * we must queue all packets so that they can be encrypted with the new keys and
- * then sent to the other side. However, what can happen here is that we get
- * SSH2_MSG_NEWKEYS after we sent it. In that situation we must call
- * packet_send2() anyway to empty the queue, and set the rekey flag to the
- * finished state. If we didn't do that we would just hang and enqueue data.
- */
-static void
-packet_send2(void)
-{
- static int rekeying = 0;
- struct packet *p;
- u_char type, *cp;
-
- if (will_daemonize != SECOND_NEWKEYS_PROCESSED) {
- cp = buffer_ptr(&outgoing_packet);
- type = cp[5];
-
- /* during rekeying we can only send key exchange messages */
- if (rekeying) {
- if (!((type >= SSH2_MSG_TRANSPORT_MIN) &&
- (type <= SSH2_MSG_TRANSPORT_MAX))) {
- debug("enqueue a plain packet because rekex in "
- "progress [type %u]", type);
- p = xmalloc(sizeof(*p));
- p->type = type;
- memcpy(&p->payload, &outgoing_packet, sizeof(Buffer));
- buffer_init(&outgoing_packet);
- TAILQ_INSERT_TAIL(&outgoing, p, next);
- return;
- }
- }
-
- /* rekeying starts with sending KEXINIT */
- if (type == SSH2_MSG_KEXINIT)
- rekeying = 1;
-
- packet_send2_wrapped();
- }
-
- /* after rekex is done we can process the queue of plain packets */
- if (will_daemonize == SECOND_NEWKEYS_PROCESSED ||
- (will_daemonize == NOT_DAEMONIZING && type == SSH2_MSG_NEWKEYS)) {
- rekeying = 0;
- will_daemonize = NOT_DAEMONIZING;
- while ((p = TAILQ_FIRST(&outgoing)) != NULL) {
- type = p->type;
- debug("dequeuing a plain packet since rekex is over "
- "[type %u]", type);
- buffer_free(&outgoing_packet);
- memcpy(&outgoing_packet, &p->payload, sizeof(Buffer));
- TAILQ_REMOVE(&outgoing, p, next);
- xfree(p);
- packet_send2_wrapped();
- }
- }
-}
-
-void
-packet_send(void)
-{
- if (compat20)
- packet_send2();
- else
- packet_send1();
- DBG(debug("packet_send done"));
-}
-
-/*
- * Waits until a packet has been received, and returns its type. Note that
- * no other data is processed until this returns, so this function should not
- * be used during the interactive session.
- *
- * The function is also used in the monitor to read the authentication context
- * in aps_read_auth_context() via packet_read_seqnr(), before the monitor enters
- * aps_monitor_loop() and starts using the process_input() function.
- */
-int
-packet_read_seqnr(u_int32_t *seqnr_p)
-{
- int type, len;
- fd_set *setp;
- char buf[8192];
- DBG(debug("packet_read()"));
-
- setp = (fd_set *)xmalloc(howmany(connection_in+1, NFDBITS) *
- sizeof(fd_mask));
-
- /* Since we are blocking, ensure that all written packets have been sent. */
- packet_write_wait();
-
- /* Stay in the loop until we have received a complete packet. */
- for (;;) {
- /* Try to read a packet from the buffer. */
- type = packet_read_poll_seqnr(seqnr_p);
- if (!compat20 && (
- type == SSH_SMSG_SUCCESS
- || type == SSH_SMSG_FAILURE
- || type == SSH_CMSG_EOF
- || type == SSH_CMSG_EXIT_CONFIRMATION))
- packet_check_eom();
- /* If we got a packet, return it. */
- if (type != SSH_MSG_NONE) {
- xfree(setp);
- return type;
- }
- /*
- * Otherwise, wait for some data to arrive, add it to the
- * buffer, and try again.
- */
- memset(setp, 0, howmany(connection_in + 1, NFDBITS) *
- sizeof(fd_mask));
- FD_SET(connection_in, setp);
-
- /* Wait for some data to arrive. */
- while (select(connection_in + 1, setp, NULL, NULL, NULL) == -1 &&
- (errno == EAGAIN || errno == EINTR))
- ;
-
- /* Read data from the socket. */
- len = read(connection_in, buf, sizeof(buf));
- if (len == 0) {
- if (packet_connection_is_on_socket())
- log("Connection closed by %.200s",
- get_remote_ipaddr());
- else
- debug("child closed the communication pipe "
- "before user auth was finished");
- fatal_cleanup();
- }
- if (len < 0) {
- if (packet_connection_is_on_socket())
- fatal("Read from socket failed: %.100s",
- strerror(errno));
- else
- fatal("Read from communication pipe failed: "
- "%.100s", strerror(errno));
- }
- /* Append it to the buffer. */
- packet_process_incoming(buf, len);
- }
- /* NOTREACHED */
-}
-
-int
-packet_read(void)
-{
- return packet_read_seqnr(NULL);
-}
-
-/*
- * Waits until a packet has been received, verifies that its type matches
- * that given, and gives a fatal error and exits if there is a mismatch.
- */
-
-void
-packet_read_expect(int expected_type)
-{
- int type;
-
- type = packet_read();
- if (type != expected_type)
- packet_disconnect("Protocol error: expected packet type %d, got %d",
- expected_type, type);
-}
-
-/* Checks if a full packet is available in the data received so far via
- * packet_process_incoming. If so, reads the packet; otherwise returns
- * SSH_MSG_NONE. This does not wait for data from the connection.
- *
- * SSH_MSG_DISCONNECT is handled specially here. Also,
- * SSH_MSG_IGNORE messages are skipped by this function and are never returned
- * to higher levels.
- */
-
-static int
-packet_read_poll1(void)
-{
- u_int len, padded_len;
- u_char *cp, type;
- u_int checksum, stored_checksum;
-
- /* Check if input size is less than minimum packet size. */
- if (buffer_len(&input) < 4 + 8)
- return SSH_MSG_NONE;
- /* Get length of incoming packet. */
- cp = buffer_ptr(&input);
- len = GET_32BIT(cp);
- if (len < 1 + 2 + 2 || len > 256 * 1024)
- packet_disconnect("Bad packet length %d.", len);
- padded_len = (len + 8) & ~7;
-
- /* Check if the packet has been entirely received. */
- if (buffer_len(&input) < 4 + padded_len)
- return SSH_MSG_NONE;
-
- /* The entire packet is in buffer. */
-
- /* Consume packet length. */
- buffer_consume(&input, 4);
-
- /*
- * Cryptographic attack detector for ssh
- * (C)1998 CORE-SDI, Buenos Aires Argentina
- * Ariel Futoransky(futo@core-sdi.com)
- */
- if (!receive_context.plaintext) {
- switch (detect_attack(buffer_ptr(&input), padded_len, NULL)) {
- case DEATTACK_DETECTED:
- packet_disconnect("crc32 compensation attack: "
- "network attack detected");
- break;
- case DEATTACK_DOS_DETECTED:
- packet_disconnect("deattack denial of "
- "service detected");
- break;
- }
- }
-
- /* Decrypt data to incoming_packet. */
- buffer_clear(&incoming_packet);
- cp = buffer_append_space(&incoming_packet, padded_len);
- cipher_crypt(&receive_context, cp, buffer_ptr(&input), padded_len);
-
- buffer_consume(&input, padded_len);
-
-#ifdef PACKET_DEBUG
- debug("read_poll plain/full:\n");
- buffer_dump(&incoming_packet);
-#endif
-
- /* Compute packet checksum. */
- checksum = ssh_crc32(buffer_ptr(&incoming_packet),
- buffer_len(&incoming_packet) - 4);
-
- /* Skip padding. */
- buffer_consume(&incoming_packet, 8 - len % 8);
-
- /* Test check bytes. */
- if (len != buffer_len(&incoming_packet))
- packet_disconnect("packet_read_poll1: len %d != buffer_len %d.",
- len, buffer_len(&incoming_packet));
-
- cp = (u_char *)buffer_ptr(&incoming_packet) + len - 4;
- stored_checksum = GET_32BIT(cp);
- if (checksum != stored_checksum)
- packet_disconnect("Corrupted check bytes on input.");
- buffer_consume_end(&incoming_packet, 4);
-
- if (packet_compression) {
- buffer_clear(&compression_buffer);
- buffer_uncompress(&incoming_packet, &compression_buffer);
- buffer_clear(&incoming_packet);
- buffer_append(&incoming_packet, buffer_ptr(&compression_buffer),
- buffer_len(&compression_buffer));
- }
- type = buffer_get_char(&incoming_packet);
- return type;
-}
-
-static int
-packet_read_poll2(u_int32_t *seqnr_p)
-{
- static u_int packet_length = 0;
- u_int padlen, need;
- u_char *macbuf, *cp, type;
- int maclen, block_size;
- Enc *enc = NULL;
- Mac *mac = NULL;
- Comp *comp = NULL;
-
- if (newkeys[MODE_IN] != NULL) {
- enc = &newkeys[MODE_IN]->enc;
- mac = &newkeys[MODE_IN]->mac;
- comp = &newkeys[MODE_IN]->comp;
- }
- maclen = mac && mac->enabled ? mac->mac_len : 0;
- block_size = enc ? enc->block_size : 8;
-
- if (packet_length == 0) {
- /*
- * check if input size is less than the cipher block size,
- * decrypt first block and extract length of incoming packet
- */
- if (buffer_len(&input) < block_size)
- return SSH_MSG_NONE;
-#ifdef PACKET_DEBUG
- debug("encrypted data we have in read queue (%d bytes):\n",
- buffer_len(&input));
- buffer_dump(&input);
-#endif
- buffer_clear(&incoming_packet);
- cp = buffer_append_space(&incoming_packet, block_size);
- cipher_crypt(&receive_context, cp, buffer_ptr(&input),
- block_size);
- cp = buffer_ptr(&incoming_packet);
- packet_length = GET_32BIT(cp);
- if (packet_length < 1 + 4 || packet_length > 256 * 1024) {
- packet_disconnect("Bad packet length.");
- }
- DBG(debug("input: packet len %u", packet_length + 4));
- buffer_consume(&input, block_size);
- }
- /* we have a partial packet of block_size bytes */
- need = 4 + packet_length - block_size;
- DBG(debug("partial packet %d, still need %d, maclen %d", block_size,
- need, maclen));
- if (need % block_size != 0)
- packet_disconnect("Bad packet length.");
- /*
- * check if the entire packet has been received and
- * decrypt into incoming_packet
- */
- if (buffer_len(&input) < need + maclen)
- return SSH_MSG_NONE;
-#ifdef PACKET_DEBUG
- debug("in read_poll, the encrypted input queue now contains "
- "(%d bytes):\n", buffer_len(&input));
- buffer_dump(&input);
-#endif
- cp = buffer_append_space(&incoming_packet, need);
- cipher_crypt(&receive_context, cp, buffer_ptr(&input), need);
- buffer_consume(&input, need);
- /*
- * compute MAC over seqnr and packet,
- * increment sequence number for incoming packet
- */
- if (mac && mac->enabled) {
- macbuf = mac_compute(mac, p_read.seqnr,
- buffer_ptr(&incoming_packet),
- buffer_len(&incoming_packet));
- if (memcmp(macbuf, buffer_ptr(&input), mac->mac_len) != 0)
- packet_disconnect("Corrupted MAC on input.");
- DBG(debug("MAC #%d ok", p_read.seqnr));
- buffer_consume(&input, mac->mac_len);
- }
- if (seqnr_p != NULL)
- *seqnr_p = p_read.seqnr;
- if (++p_read.seqnr == 0)
- log("incoming seqnr wraps around");
-
- /* see above for the comment on "First Rekeying Recommendation" */
- if (++p_read.packets == 0)
- if (!(datafellows & SSH_BUG_NOREKEY))
- fatal("too many packets with same key");
- p_read.blocks += (packet_length + 4) / block_size;
-
- /* get padlen */
- cp = buffer_ptr(&incoming_packet);
- padlen = cp[4];
- DBG(debug("input: padlen %d", padlen));
- if (padlen < 4)
- packet_disconnect("Corrupted padlen %d on input.", padlen);
-
- /* skip packet size + padlen, discard padding */
- buffer_consume(&incoming_packet, 4 + 1);
- buffer_consume_end(&incoming_packet, padlen);
-
- DBG(debug("input: len before de-compress %d", buffer_len(&incoming_packet)));
- if (comp && comp->enabled) {
- buffer_clear(&compression_buffer);
- buffer_uncompress(&incoming_packet, &compression_buffer);
- buffer_clear(&incoming_packet);
- buffer_append(&incoming_packet, buffer_ptr(&compression_buffer),
- buffer_len(&compression_buffer));
- DBG(debug("input: len after de-compress %d",
- buffer_len(&incoming_packet)));
- }
- /*
- * get packet type, implies consume.
- * return length of payload (without type field)
- */
- type = buffer_get_char(&incoming_packet);
- if (type == SSH2_MSG_NEWKEYS) {
- /*
- * set_newkeys(MODE_IN) in the client because it doesn't have a
- * dispatch function for SSH2_MSG_NEWKEYS in contrast to the
- * server processes. Note that in the unprivileged child,
- * set_newkeys() for MODE_IN are set in dispatch function
- * altprivsep_rekey() after SSH2_MSG_NEWKEYS packet is received
- * from the client.
- */
- process_newkeys(MODE_IN);
- }
-
-#ifdef PACKET_DEBUG
- debug("decrypted input packet [type %d]:\n", type);
- buffer_dump(&incoming_packet);
-#endif
- /* reset for next packet */
- packet_length = 0;
- return type;
-}
-
-/*
- * This tries to read a packet from the buffer of received data. Note that it
- * doesn't read() anything from the network socket.
- */
-int
-packet_read_poll_seqnr(u_int32_t *seqnr_p)
-{
- u_int reason, seqnr;
- u_char type;
- char *msg;
-
- for (;;) {
- if (compat20) {
- type = packet_read_poll2(seqnr_p);
- DBG(debug("received packet type %d", type));
- switch (type) {
- case SSH2_MSG_IGNORE:
- break;
- case SSH2_MSG_DEBUG:
- packet_get_char();
- msg = packet_get_utf8_string(NULL);
- msg = g11n_filter_string(msg);
- debug("Remote: %.900s", msg);
- xfree(msg);
- msg = packet_get_string(NULL);
- xfree(msg);
- break;
- case SSH2_MSG_DISCONNECT:
- reason = packet_get_int();
- msg = packet_get_utf8_string(NULL);
- msg = g11n_filter_string(msg);
- log("Received disconnect from %s: %u: %.400s",
- get_remote_ipaddr(), reason, msg);
- xfree(msg);
- fatal_cleanup();
- break;
- case SSH2_MSG_UNIMPLEMENTED:
- seqnr = packet_get_int();
- debug("Received SSH2_MSG_UNIMPLEMENTED for %u",
- seqnr);
- break;
- default:
- return type;
- break;
- }
- } else {
- type = packet_read_poll1();
- DBG(debug("received packet type %d", type));
- switch (type) {
- case SSH_MSG_IGNORE:
- break;
- case SSH_MSG_DEBUG:
- msg = packet_get_string(NULL);
- debug("Remote: %.900s", msg);
- xfree(msg);
- break;
- case SSH_MSG_DISCONNECT:
- msg = packet_get_string(NULL);
- log("Received disconnect from %s: %.400s",
- get_remote_ipaddr(), msg);
- fatal_cleanup();
- xfree(msg);
- break;
- default:
- return type;
- break;
- }
- }
- }
-}
-
-int
-packet_read_poll(void)
-{
- return packet_read_poll_seqnr(NULL);
-}
-
-/*
- * Buffers the given amount of input characters. This is intended to be used
- * together with packet_read_poll.
- */
-
-void
-packet_process_incoming(const char *buf, u_int len)
-{
- buffer_append(&input, buf, len);
-}
-
-/* Returns a character from the packet. */
-
-u_int
-packet_get_char(void)
-{
- char ch;
-
- buffer_get(&incoming_packet, &ch, 1);
- return (u_char) ch;
-}
-
-/* Returns an integer from the packet data. */
-
-u_int
-packet_get_int(void)
-{
- return buffer_get_int(&incoming_packet);
-}
-
-/*
- * Returns an arbitrary precision integer from the packet data. The integer
- * must have been initialized before this call.
- */
-
-void
-packet_get_bignum(BIGNUM * value)
-{
- buffer_get_bignum(&incoming_packet, value);
-}
-
-void
-packet_get_bignum2(BIGNUM * value)
-{
- buffer_get_bignum2(&incoming_packet, value);
-}
-
-void *
-packet_get_raw(u_int *length_ptr)
-{
- u_int bytes = buffer_len(&incoming_packet);
-
- if (length_ptr != NULL)
- *length_ptr = bytes;
- return buffer_ptr(&incoming_packet);
-}
-
-int
-packet_remaining(void)
-{
- return buffer_len(&incoming_packet);
-}
-
-/*
- * Returns a string from the packet data. The string is allocated using
- * xmalloc; it is the responsibility of the calling program to free it when
- * no longer needed. The length_ptr argument may be NULL, or point to an
- * integer into which the length of the string is stored.
- */
-
-void *
-packet_get_string(u_int *length_ptr)
-{
- return buffer_get_string(&incoming_packet, length_ptr);
-}
-
-char *
-packet_get_utf8_string(uint_t *length_ptr)
-{
- if (datafellows & SSH_BUG_STRING_ENCODING)
- return (buffer_get_string(&incoming_packet, length_ptr));
- else
- return (buffer_get_utf8_string(&incoming_packet, length_ptr));
-}
-
-/*
- * Sends a diagnostic message from the server to the client. This message
- * can be sent at any time (but not while constructing another message). The
- * message is printed immediately, but only if the client is being executed
- * in verbose mode. These messages are primarily intended to ease debugging
- * authentication problems. The length of the formatted message must not
- * exceed 1024 bytes. This will automatically call packet_write_wait.
- */
-
-void
-packet_send_debug(const char *fmt,...)
-{
- char buf[1024];
- va_list args;
-
- if (compat20 && (datafellows & SSH_BUG_DEBUG))
- return;
-
- va_start(args, fmt);
- vsnprintf(buf, sizeof(buf), gettext(fmt), args);
- va_end(args);
-
-#ifdef ALTPRIVSEP
- /* shouldn't happen */
- if (packet_monitor) {
- debug("packet_send_debug: %s", buf);
- return;
- }
-#endif /* ALTPRIVSEP */
-
- if (compat20) {
- packet_start(SSH2_MSG_DEBUG);
- packet_put_char(0); /* bool: always display */
- packet_put_utf8_cstring(buf);
- packet_put_cstring("");
- } else {
- packet_start(SSH_MSG_DEBUG);
- packet_put_cstring(buf);
- }
- packet_send();
- packet_write_wait();
-}
-
-/*
- * Logs the error plus constructs and sends a disconnect packet, closes the
- * connection, and exits. This function never returns. The error message
- * should not contain a newline. The length of the formatted message must
- * not exceed 1024 bytes.
- */
-
-void
-packet_disconnect(const char *fmt,...)
-{
- char buf[1024];
- va_list args;
- static int disconnecting = 0;
-
- if (disconnecting) /* Guard against recursive invocations. */
- fatal("packet_disconnect called recursively.");
- disconnecting = 1;
-
- /*
- * Format the message. Note that the caller must make sure the
- * message is of limited size.
- */
- va_start(args, fmt);
- vsnprintf(buf, sizeof(buf), fmt, args);
- va_end(args);
-
-#ifdef ALTPRIVSEP
- /*
- * If we packet_disconnect() in the monitor the fatal cleanups will take
- * care of the child. See main() in sshd.c. We don't send the packet
- * disconnect message here because: a) the child might not be looking
- * for it and b) because we don't really know if the child is compat20
- * or not as we lost that information when packet_set_monitor() was
- * called.
- */
- if (packet_monitor)
- goto close_stuff;
-#endif /* ALTPRIVSEP */
-
- /* Send the disconnect message to the other side, and wait for it to get sent. */
- if (compat20) {
- packet_start(SSH2_MSG_DISCONNECT);
- packet_put_int(SSH2_DISCONNECT_PROTOCOL_ERROR);
- packet_put_utf8_cstring(buf);
- packet_put_cstring("");
- } else {
- packet_start(SSH_MSG_DISCONNECT);
- packet_put_cstring(buf);
- }
- packet_send();
- packet_write_wait();
-
-#ifdef ALTPRIVSEP
-close_stuff:
-#endif /* ALTPRIVSEP */
- /* Stop listening for connections. */
- channel_close_all();
-
- /* Close the connection. */
- packet_close();
-
- /* Display the error locally and exit. */
- log("Disconnecting: %.100s", buf);
- fatal_cleanup();
-}
-
-/* Checks if there is any buffered output, and tries to write some of the output. */
-
-void
-packet_write_poll(void)
-{
- int len = buffer_len(&output);
-
- if (len > 0) {
- len = write(connection_out, buffer_ptr(&output), len);
- if (len <= 0) {
- if (errno == EAGAIN)
- return;
- else
- fatal("Write failed: %.100s", strerror(errno));
- }
-#ifdef PACKET_DEBUG
- debug("in packet_write_poll, %d bytes just sent to the "
- "remote side", len);
-#endif
- buffer_consume(&output, len);
- }
-}
-
-/*
- * Calls packet_write_poll repeatedly until all pending output data has been
- * written.
- */
-
-void
-packet_write_wait(void)
-{
- fd_set *setp;
-
- setp = (fd_set *)xmalloc(howmany(connection_out + 1, NFDBITS) *
- sizeof(fd_mask));
- packet_write_poll();
- while (packet_have_data_to_write()) {
- memset(setp, 0, howmany(connection_out + 1, NFDBITS) *
- sizeof(fd_mask));
- FD_SET(connection_out, setp);
- while (select(connection_out + 1, NULL, setp, NULL, NULL) == -1 &&
- (errno == EAGAIN || errno == EINTR))
- ;
- packet_write_poll();
- }
- xfree(setp);
-}
-
-/* Returns true if there is buffered data to write to the connection. */
-
-int
-packet_have_data_to_write(void)
-{
- return buffer_len(&output) != 0;
-}
-
-/* Returns true if there is not too much data to write to the connection. */
-
-int
-packet_not_very_much_data_to_write(void)
-{
- if (interactive_mode)
- return buffer_len(&output) < 16384;
- else
- return buffer_len(&output) < 128 * 1024;
-}
-
-/* Informs that the current session is interactive. Sets IP flags for that. */
-
-void
-packet_set_interactive(int interactive)
-{
- static int called = 0;
-#if defined(IP_TOS) && !defined(IP_TOS_IS_BROKEN)
- int lowdelay = IPTOS_LOWDELAY;
- int throughput = IPTOS_THROUGHPUT;
-#endif
-
- if (called)
- return;
- called = 1;
-
- /* Record that we are in interactive mode. */
- interactive_mode = interactive;
-
- /* Only set socket options if using a socket. */
- if (!packet_connection_is_on_socket())
- return;
- /*
- * IPTOS_LOWDELAY and IPTOS_THROUGHPUT are IPv4 only
- */
- if (interactive) {
- /*
- * Set IP options for an interactive connection. Use
- * IPTOS_LOWDELAY and TCP_NODELAY.
- */
-#if defined(IP_TOS) && !defined(IP_TOS_IS_BROKEN)
- if (packet_connection_is_ipv4()) {
- if (setsockopt(connection_in, IPPROTO_IP, IP_TOS,
- &lowdelay, sizeof(lowdelay)) < 0)
- error("setsockopt IPTOS_LOWDELAY: %.100s",
- strerror(errno));
- }
-#endif
- set_nodelay(connection_in);
- }
-#if defined(IP_TOS) && !defined(IP_TOS_IS_BROKEN)
- else if (packet_connection_is_ipv4()) {
- /*
- * Set IP options for a non-interactive connection. Use
- * IPTOS_THROUGHPUT.
- */
- if (setsockopt(connection_in, IPPROTO_IP, IP_TOS, &throughput,
- sizeof(throughput)) < 0)
- error("setsockopt IPTOS_THROUGHPUT: %.100s", strerror(errno));
- }
-#endif
-}
-
-/* Returns true if the current connection is interactive. */
-
-int
-packet_is_interactive(void)
-{
- return interactive_mode;
-}
-
-int
-packet_set_maxsize(int s)
-{
- static int called = 0;
-
- if (called) {
- log("packet_set_maxsize: called twice: old %d new %d",
- max_packet_size, s);
- return -1;
- }
- if (s < 4 * 1024 || s > 1024 * 1024) {
- log("packet_set_maxsize: bad size %d", s);
- return -1;
- }
- called = 1;
- debug("packet_set_maxsize: setting to %d", s);
- max_packet_size = s;
- return s;
-}
-
-/* roundup current message to pad bytes */
-void
-packet_add_padding(u_char pad)
-{
- extra_pad = pad;
-}
-
-/*
- * 9.2. Ignored Data Message
- *
- * byte SSH_MSG_IGNORE
- * string data
- *
- * All implementations MUST understand (and ignore) this message at any
- * time (after receiving the protocol version). No implementation is
- * required to send them. This message can be used as an additional
- * protection measure against advanced traffic analysis techniques.
- */
-void
-packet_send_ignore(int nbytes)
-{
- u_int32_t rnd = 0;
- int i;
-
-#ifdef ALTPRIVSEP
- /* shouldn't happen -- see packet_set_monitor() */
- if (packet_monitor)
- return;
-#endif /* ALTPRIVSEP */
-
- packet_start(compat20 ? SSH2_MSG_IGNORE : SSH_MSG_IGNORE);
- packet_put_int(nbytes);
- for (i = 0; i < nbytes; i++) {
- if (i % 4 == 0)
- rnd = arc4random();
- packet_put_char((u_char)rnd & 0xff);
- rnd >>= 8;
- }
-}
-
-#define MAX_PACKETS (1U<<31)
-int
-packet_need_rekeying(void)
-{
- if (datafellows & SSH_BUG_NOREKEY)
- return 0;
- return
- (p_send.packets > MAX_PACKETS) ||
- (p_read.packets > MAX_PACKETS) ||
- (max_blocks_out && (p_send.blocks > max_blocks_out)) ||
- (max_blocks_in && (p_read.blocks > max_blocks_in));
-}
-
-void
-packet_set_rekey_limit(u_int32_t bytes)
-{
- rekey_limit = bytes;
-}
-
-#ifdef ALTPRIVSEP
-void
-packet_set_server(void)
-{
- packet_server = 1;
-}
-
-int
-packet_is_server(void)
-{
- return (packet_server);
-}
-
-void
-packet_set_monitor(int pipe)
-{
- int dup_fd;
-
- packet_server = 1;
- packet_monitor = 1;
-
- /*
- * Awful hack follows.
- *
- * For SSHv1 the monitor does not process any SSHv1 packets, only
- * ALTPRIVSEP packets. We take advantage of that here to keep changes
- * to packet.c to a minimum by using the SSHv2 binary packet protocol,
- * with cipher "none," mac "none" and compression alg "none," as the
- * basis for the monitor protocol. And so to force packet.c to treat
- * packets as SSHv2 we force compat20 == 1 here.
- *
- * For completeness and to help future developers catch this we also
- * force compat20 == 1 in the monitor loop, in serverloop.c.
- */
- compat20 = 1;
-
- /*
- * NOTE: Assumptions below!
- *
- * - lots of packet.c code assumes that (connection_in ==
- * connection_out) -> connection is socket
- *
- * - packet_close() does not shutdown() the connection fildes
- * if connection_in != connection_out
- *
- * - other code assumes the connection is a socket if
- * connection_in == connection_out
- */
-
- if ((dup_fd = dup(pipe)) < 0)
- fatal("Monitor failed to start: %s", strerror(errno));
-
- /*
- * make sure that the monitor's child's socket is not shutdown(3SOCKET)
- * when we packet_close(). Setting connection_out to -1 will take care
- * of that.
- */
- if (packet_connection_is_on_socket())
- connection_out = -1;
-
- /*
- * Now clean up the state related to the server socket. As a side
- * effect, we also clean up existing cipher contexts that were
- * initialized with 'none' cipher in packet_set_connection(). That
- * function was called in the child server process shortly after the
- * master SSH process forked. However, all of that is reinialized again
- * by another packet_set_connection() call right below.
- */
- packet_close();
-
- /*
- * Now make the monitor pipe look like the ssh connection which means
- * that connection_in and connection_out will be set to the
- * communication pipe descriptors.
- */
- packet_set_connection(pipe, dup_fd);
-}
-
-/*
- * We temporarily need to set connection_in and connection_out descriptors so
- * that we can make use of existing code that gets the IP address and hostname
- * of the peer to write a login/logout record. It's not nice but we would have
- * to change more code when implementing the PKCS#11 engine support.
- */
-void
-packet_set_fds(int fd, int restore)
-{
- static int stored_fd;
-
- if (stored_fd == 0 && restore == 0) {
- debug3("packet_set_fds: saving %d, installing %d",
- connection_in, fd);
- stored_fd = connection_in;
- /* we don't have a socket in inetd mode */
- if (fd != -1)
- connection_in = connection_out = fd;
- return;
- }
-
- if (restore == 1) {
- debug3("restoring %d to connection_in/out", stored_fd);
- connection_in = connection_out = stored_fd;
- }
-}
-
-int
-packet_is_monitor(void)
-{
- return (packet_monitor);
-}
-#endif /* ALTPRIVSEP */
diff --git a/usr/src/cmd/ssh/libssh/common/progressmeter.c b/usr/src/cmd/ssh/libssh/common/progressmeter.c
deleted file mode 100644
index 65d28fb596..0000000000
--- a/usr/src/cmd/ssh/libssh/common/progressmeter.c
+++ /dev/null
@@ -1,308 +0,0 @@
-/*
- * Copyright (c) 2003 Nils Nordman. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* $OpenBSD: progressmeter.c,v 1.37 2006/08/03 03:34:42 deraadt Exp $ */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "includes.h"
-
-#include <sys/types.h>
-#include <sys/ioctl.h>
-#include <sys/uio.h>
-
-#include <errno.h>
-#include <signal.h>
-#include <stdio.h>
-#include <string.h>
-#include <time.h>
-#include <unistd.h>
-
-#include "progressmeter.h"
-#include "atomicio.h"
-#include "misc.h"
-
-#define DEFAULT_WINSIZE 80
-#define MAX_WINSIZE 512
-#define PADDING 1 /* padding between the progress indicators */
-#define UPDATE_INTERVAL 1 /* update the progress meter every second */
-#define STALL_TIME 5 /* we're stalled after this many seconds */
-
-/* determines whether we can output to the terminal */
-static int can_output(void);
-
-/* formats and inserts the specified size into the given buffer */
-static void format_size(char *, int, off_t);
-static void format_rate(char *, int, off_t);
-
-/* window resizing */
-static void sig_winch(int);
-static void setscreensize(void);
-
-/* updates the progressmeter to reflect the current state of the transfer */
-void refresh_progress_meter(void);
-
-/* signal handler for updating the progress meter */
-static void update_progress_meter(int);
-
-static time_t start; /* start progress */
-static time_t last_update; /* last progress update */
-static char *file; /* name of the file being transferred */
-static off_t end_pos; /* ending position of transfer */
-static off_t cur_pos; /* transfer position as of last refresh */
-static volatile off_t *counter; /* progress counter */
-static long stalled; /* how long we have been stalled */
-static int bytes_per_second; /* current speed in bytes per second */
-static int win_size; /* terminal window size */
-static volatile sig_atomic_t win_resized; /* for window resizing */
-
-/* units for format_size */
-static const char unit[] = " KMGT";
-
-static int
-can_output(void)
-{
- return (getpgrp() == tcgetpgrp(STDOUT_FILENO));
-}
-
-static void
-format_rate(char *buf, int size, off_t bytes)
-{
- int i;
-
- bytes *= 100;
- for (i = 0; bytes >= 100*1000 && unit[i] != 'T'; i++)
- bytes = (bytes + 512) / 1024;
- if (i == 0) {
- i++;
- bytes = (bytes + 512) / 1024;
- }
- snprintf(buf, size, "%3lld.%1lld%c%s",
- (long long) (bytes + 5) / 100,
- (long long) (bytes + 5) / 10 % 10,
- unit[i],
- i ? "B" : " ");
-}
-
-static void
-format_size(char *buf, int size, off_t bytes)
-{
- int i;
-
- for (i = 0; bytes >= 10000 && unit[i] != 'T'; i++)
- bytes = (bytes + 512) / 1024;
- snprintf(buf, size, "%4lld%c%s",
- (long long) bytes,
- unit[i],
- i ? "B" : " ");
-}
-
-void
-refresh_progress_meter(void)
-{
- char buf[MAX_WINSIZE + 1];
- time_t now;
- off_t transferred;
- double elapsed;
- int percent;
- off_t bytes_left;
- int cur_speed;
- int hours, minutes, seconds;
- int i, len;
- int file_len;
-
- transferred = *counter - cur_pos;
- cur_pos = *counter;
- now = time(NULL);
- bytes_left = end_pos - cur_pos;
-
- if (bytes_left > 0)
- elapsed = now - last_update;
- else {
- elapsed = now - start;
- /* Calculate true total speed when done */
- transferred = end_pos;
- bytes_per_second = 0;
- }
-
- /* calculate speed */
- if (elapsed != 0)
- cur_speed = (int)(transferred / elapsed);
- else
- cur_speed = transferred;
-
-#define AGE_FACTOR 0.9
- if (bytes_per_second != 0) {
- bytes_per_second = (int)((bytes_per_second * AGE_FACTOR) +
- (cur_speed * (1.0 - AGE_FACTOR)));
- } else
- bytes_per_second = cur_speed;
-
- /* filename */
- buf[0] = '\0';
- file_len = win_size - 35;
- if (file_len > 0) {
- len = snprintf(buf, file_len + 1, "\r%s", file);
- if (len < 0)
- len = 0;
- if (len >= file_len + 1)
- len = file_len;
- for (i = len; i < file_len; i++)
- buf[i] = ' ';
- buf[file_len] = '\0';
- }
-
- /* percent of transfer done */
- if (end_pos != 0)
- percent = (int)(((float)cur_pos / end_pos) * 100);
- else
- percent = 100;
- snprintf(buf + strlen(buf), win_size - strlen(buf),
- " %3d%% ", percent);
-
- /* amount transferred */
- format_size(buf + strlen(buf), win_size - strlen(buf),
- cur_pos);
- strlcat(buf, " ", win_size);
-
- /* bandwidth usage */
- format_rate(buf + strlen(buf), win_size - strlen(buf),
- (off_t)bytes_per_second);
- strlcat(buf, "/s ", win_size);
-
- /* ETA */
- if (!transferred)
- stalled += elapsed;
- else
- stalled = 0;
-
- if (stalled >= STALL_TIME)
- strlcat(buf, "- stalled -", win_size);
- else if (bytes_per_second == 0 && bytes_left)
- strlcat(buf, " --:-- ETA", win_size);
- else {
- if (bytes_left > 0)
- seconds = bytes_left / bytes_per_second;
- else
- seconds = (int)elapsed;
-
- hours = seconds / 3600;
- seconds -= hours * 3600;
- minutes = seconds / 60;
- seconds -= minutes * 60;
-
- if (hours != 0)
- snprintf(buf + strlen(buf), win_size - strlen(buf),
- "%d:%02d:%02d", hours, minutes, seconds);
- else
- snprintf(buf + strlen(buf), win_size - strlen(buf),
- " %02d:%02d", minutes, seconds);
-
- if (bytes_left > 0)
- strlcat(buf, " ETA", win_size);
- else
- strlcat(buf, " ", win_size);
- }
-
- atomicio(vwrite, STDOUT_FILENO, buf, win_size - 1);
- last_update = now;
-}
-
-/*ARGSUSED*/
-static void
-update_progress_meter(int ignore)
-{
- int save_errno;
-
- save_errno = errno;
-
- if (win_resized) {
- setscreensize();
- win_resized = 0;
- }
- if (can_output())
- refresh_progress_meter();
-
- signal(SIGALRM, update_progress_meter);
- alarm(UPDATE_INTERVAL);
- errno = save_errno;
-}
-
-void
-start_progress_meter(char *f, off_t filesize, off_t *ctr)
-{
- start = last_update = time(NULL);
- file = f;
- end_pos = filesize;
- cur_pos = 0;
- counter = ctr;
- stalled = 0;
- bytes_per_second = 0;
-
- setscreensize();
- if (can_output())
- refresh_progress_meter();
-
- signal(SIGALRM, update_progress_meter);
- signal(SIGWINCH, sig_winch);
- alarm(UPDATE_INTERVAL);
-}
-
-void
-stop_progress_meter(void)
-{
- alarm(0);
-
- if (!can_output())
- return;
-
- /* Ensure we complete the progress */
- if (cur_pos != end_pos)
- refresh_progress_meter();
-
- atomicio(vwrite, STDOUT_FILENO, "\n", 1);
-}
-
-/*ARGSUSED*/
-static void
-sig_winch(int sig)
-{
- win_resized = 1;
-}
-
-static void
-setscreensize(void)
-{
- struct winsize winsize;
-
- if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &winsize) != -1 &&
- winsize.ws_col != 0) {
- if (winsize.ws_col > MAX_WINSIZE)
- win_size = MAX_WINSIZE;
- else
- win_size = winsize.ws_col;
- } else
- win_size = DEFAULT_WINSIZE;
- win_size += 1; /* trailing \0 */
-}
diff --git a/usr/src/cmd/ssh/libssh/common/proxy-io.c b/usr/src/cmd/ssh/libssh/common/proxy-io.c
deleted file mode 100644
index c025f28f0a..0000000000
--- a/usr/src/cmd/ssh/libssh/common/proxy-io.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <stdio.h>
-#include <unistd.h>
-#include "proxy-io.h"
-
-int
-proxy_read_write_loop(int readfd, int writefd)
-{
- int rbytes, bytes_to_write, bytes_written;
- char readbuf[BUFFER_SIZ];
- char *ptr;
-
- rbytes = read(readfd, readbuf, sizeof (readbuf));
-
- if (rbytes > 0) {
- bytes_to_write = rbytes;
- ptr = readbuf;
- while (bytes_to_write > 0) {
- if ((bytes_written =
- write(writefd, ptr, bytes_to_write)) < 0) {
- perror("write");
- return (0);
- }
- bytes_to_write -= bytes_written;
- ptr += bytes_written;
- }
- } else if (rbytes <= 0) {
- return (0);
- }
- /* Read and write successful */
- return (1);
-}
diff --git a/usr/src/cmd/ssh/libssh/common/radix.c b/usr/src/cmd/ssh/libssh/common/radix.c
deleted file mode 100644
index 4200f71b3b..0000000000
--- a/usr/src/cmd/ssh/libssh/common/radix.c
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Copyright (c) 1999 Dug Song. All rights reserved.
- * Copyright (c) 2002 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-#include "uuencode.h"
-
-RCSID("$OpenBSD: radix.c,v 1.22 2002/09/09 14:54:15 markus Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef AFS
-#include <krb.h>
-
-#include <radix.h>
-#include "bufaux.h"
-
-int
-creds_to_radix(CREDENTIALS *creds, u_char *buf, size_t buflen)
-{
- Buffer b;
- int ret;
-
- buffer_init(&b);
-
- buffer_put_char(&b, 1); /* version */
-
- buffer_append(&b, creds->service, strlen(creds->service));
- buffer_put_char(&b, '\0');
- buffer_append(&b, creds->instance, strlen(creds->instance));
- buffer_put_char(&b, '\0');
- buffer_append(&b, creds->realm, strlen(creds->realm));
- buffer_put_char(&b, '\0');
- buffer_append(&b, creds->pname, strlen(creds->pname));
- buffer_put_char(&b, '\0');
- buffer_append(&b, creds->pinst, strlen(creds->pinst));
- buffer_put_char(&b, '\0');
-
- /* Null string to repeat the realm. */
- buffer_put_char(&b, '\0');
-
- buffer_put_int(&b, creds->issue_date);
- buffer_put_int(&b, krb_life_to_time(creds->issue_date,
- creds->lifetime));
- buffer_append(&b, creds->session, sizeof(creds->session));
- buffer_put_short(&b, creds->kvno);
-
- /* 32 bit size + data */
- buffer_put_string(&b, creds->ticket_st.dat, creds->ticket_st.length);
-
- ret = uuencode(buffer_ptr(&b), buffer_len(&b), (char *)buf, buflen);
-
- buffer_free(&b);
- return ret;
-}
-
-#define GETSTRING(b, t, tlen) \
- do { \
- int i, found = 0; \
- for (i = 0; i < tlen; i++) { \
- if (buffer_len(b) == 0) \
- goto done; \
- t[i] = buffer_get_char(b); \
- if (t[i] == '\0') { \
- found = 1; \
- break; \
- } \
- } \
- if (!found) \
- goto done; \
- } while(0)
-
-int
-radix_to_creds(const char *buf, CREDENTIALS *creds)
-{
- Buffer b;
- u_char *space;
- char c, version, *p;
- u_int endTime, len;
- int blen, ret;
-
- ret = 0;
- blen = strlen(buf);
-
- /* sanity check for size */
- if (blen > 8192)
- return 0;
-
- buffer_init(&b);
- space = buffer_append_space(&b, blen);
-
- /* check version and length! */
- len = uudecode(buf, space, blen);
- if (len < 1)
- goto done;
-
- version = buffer_get_char(&b);
-
- GETSTRING(&b, creds->service, sizeof creds->service);
- GETSTRING(&b, creds->instance, sizeof creds->instance);
- GETSTRING(&b, creds->realm, sizeof creds->realm);
- GETSTRING(&b, creds->pname, sizeof creds->pname);
- GETSTRING(&b, creds->pinst, sizeof creds->pinst);
-
- if (buffer_len(&b) == 0)
- goto done;
-
- /* Ignore possibly different realm. */
- while (buffer_len(&b) > 0 && (c = buffer_get_char(&b)) != '\0')
- ;
-
- if (buffer_len(&b) == 0)
- goto done;
-
- creds->issue_date = buffer_get_int(&b);
-
- endTime = buffer_get_int(&b);
- creds->lifetime = krb_time_to_life(creds->issue_date, endTime);
-
- len = buffer_len(&b);
- if (len < sizeof(creds->session))
- goto done;
- memcpy(&creds->session, buffer_ptr(&b), sizeof(creds->session));
- buffer_consume(&b, sizeof(creds->session));
-
- creds->kvno = buffer_get_short(&b);
-
- p = buffer_get_string(&b, &len);
- if (len < 0 || len > sizeof(creds->ticket_st.dat))
- goto done;
- memcpy(&creds->ticket_st.dat, p, len);
- creds->ticket_st.length = len;
-
- ret = 1;
-done:
- buffer_free(&b);
- return ret;
-}
-#endif /* AFS */
diff --git a/usr/src/cmd/ssh/libssh/common/readconf.c b/usr/src/cmd/ssh/libssh/common/readconf.c
deleted file mode 100644
index b1e7f89c7a..0000000000
--- a/usr/src/cmd/ssh/libssh/common/readconf.c
+++ /dev/null
@@ -1,1276 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Functions for reading the configuration files.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- * Copyright 2013 Joyent, Inc. All rights reserved.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: readconf.c,v 1.100 2002/06/19 00:27:55 deraadt Exp $");
-
-#include "ssh.h"
-#include "xmalloc.h"
-#include "compat.h"
-#include "cipher.h"
-#include "pathnames.h"
-#include "log.h"
-#include "readconf.h"
-#include "match.h"
-#include "misc.h"
-#include "kex.h"
-#include "mac.h"
-
-/* Format of the configuration file:
-
- # Configuration data is parsed as follows:
- # 1. command line options
- # 2. user-specific file
- # 3. system-wide file
- # Any configuration value is only changed the first time it is set.
- # Thus, host-specific definitions should be at the beginning of the
- # configuration file, and defaults at the end.
-
- # Host-specific declarations. These may override anything above. A single
- # host may match multiple declarations; these are processed in the order
- # that they are given in.
-
- Host *.ngs.fi ngs.fi
- User foo
-
- Host fake.com
- HostName another.host.name.real.org
- User blaah
- Port 34289
- ForwardX11 no
- ForwardAgent no
-
- Host books.com
- RemoteForward 9999 shadows.cs.hut.fi:9999
- Cipher 3des
-
- Host fascist.blob.com
- Port 23123
- User tylonen
- RhostsAuthentication no
- PasswordAuthentication no
-
- Host puukko.hut.fi
- User t35124p
- ProxyCommand ssh-proxy %h %p
-
- Host *.fr
- PublicKeyAuthentication no
-
- Host *.su
- Cipher none
- PasswordAuthentication no
-
- # Defaults for various options
- Host *
- ForwardAgent no
- ForwardX11 no
- RhostsAuthentication yes
- PasswordAuthentication yes
- RSAAuthentication yes
- RhostsRSAAuthentication yes
- StrictHostKeyChecking yes
- KeepAlives no
- IdentityFile ~/.ssh/identity
- Port 22
- EscapeChar ~
-
-*/
-
-/* Keyword tokens. */
-
-typedef enum {
- oBadOption,
- oForwardAgent, oForwardX11, oForwardX11Trusted, oGatewayPorts,
- oRhostsAuthentication,
- oPasswordAuthentication, oRSAAuthentication,
- oChallengeResponseAuthentication, oXAuthLocation,
-#if defined(KRB4) || defined(KRB5)
- oKerberosAuthentication,
-#endif
-#ifdef GSSAPI
- oGssKeyEx, oGssAuthentication, oGssDelegateCreds,
-#ifdef GSI
- oGssGlobusDelegateLimitedCreds,
-#endif /* GSI */
-#endif /* GSSAPI */
-#if defined(AFS) || defined(KRB5)
- oKerberosTgtPassing,
-#endif
-#ifdef AFS
- oAFSTokenPassing,
-#endif
- oIdentityFile, oHostName, oPort, oCipher, oRemoteForward, oLocalForward,
- oUser, oHost, oEscapeChar, oRhostsRSAAuthentication, oProxyCommand,
- oGlobalKnownHostsFile, oUserKnownHostsFile, oConnectionAttempts,
- oBatchMode, oCheckHostIP, oStrictHostKeyChecking, oCompression,
- oCompressionLevel, oKeepAlives, oNumberOfPasswordPrompts,
- oUsePrivilegedPort, oLogLevel, oCiphers, oProtocol, oMacs,
- oGlobalKnownHostsFile2, oUserKnownHostsFile2, oPubkeyAuthentication,
- oKbdInteractiveAuthentication, oKbdInteractiveDevices, oHostKeyAlias,
- oDynamicForward, oPreferredAuthentications, oHostbasedAuthentication,
- oHostKeyAlgorithms, oBindAddress, oSmartcardDevice,
- oClearAllForwardings, oNoHostAuthenticationForLocalhost,
- oFallBackToRsh, oUseRsh, oConnectTimeout, oHashKnownHosts,
- oServerAliveInterval, oServerAliveCountMax, oDisableBanner,
- oIgnoreIfUnknown, oRekeyLimit, oUseOpenSSLEngine,
- oDeprecated
-} OpCodes;
-
-/* Textual representations of the tokens. */
-
-static struct {
- const char *name;
- OpCodes opcode;
-} keywords[] = {
- { "forwardagent", oForwardAgent },
- { "forwardx11", oForwardX11 },
- { "forwardx11trusted", oForwardX11Trusted },
- { "xauthlocation", oXAuthLocation },
- { "gatewayports", oGatewayPorts },
- { "useprivilegedport", oUsePrivilegedPort },
- { "rhostsauthentication", oRhostsAuthentication },
- { "passwordauthentication", oPasswordAuthentication },
- { "kbdinteractiveauthentication", oKbdInteractiveAuthentication },
- { "kbdinteractivedevices", oKbdInteractiveDevices },
- { "rsaauthentication", oRSAAuthentication },
- { "pubkeyauthentication", oPubkeyAuthentication },
- { "dsaauthentication", oPubkeyAuthentication }, /* alias */
- { "rhostsrsaauthentication", oRhostsRSAAuthentication },
- { "hostbasedauthentication", oHostbasedAuthentication },
- { "challengeresponseauthentication", oChallengeResponseAuthentication },
- { "skeyauthentication", oChallengeResponseAuthentication }, /* alias */
- { "tisauthentication", oChallengeResponseAuthentication }, /* alias */
-#if defined(KRB4) || defined(KRB5)
- { "kerberosauthentication", oKerberosAuthentication },
-#endif
-#ifdef GSSAPI
- { "gssapikeyexchange", oGssKeyEx },
- { "gssapiauthentication", oGssAuthentication },
- { "gssapidelegatecredentials", oGssDelegateCreds },
- { "gsskeyex", oGssKeyEx }, /* alias */
- { "gssauthentication", oGssAuthentication }, /* alias */
- { "gssdelegatecreds", oGssDelegateCreds }, /* alias */
-#ifdef GSI
- /* For backwards compatability with old 1.2.27 client code */
- { "forwardgssapiglobusproxy", oGssDelegateCreds }, /* alias */
- { "forwardgssapiglobuslimitedproxy", oGssGlobusDelegateLimitedCreds },
-#endif /* GSI */
-#endif /* GSSAPI */
-#if defined(AFS) || defined(KRB5)
- { "kerberostgtpassing", oKerberosTgtPassing },
-#endif
-#ifdef AFS
- { "afstokenpassing", oAFSTokenPassing },
-#endif
- { "fallbacktorsh", oFallBackToRsh },
- { "usersh", oUseRsh },
- { "identityfile", oIdentityFile },
- { "identityfile2", oIdentityFile }, /* alias */
- { "hostname", oHostName },
- { "hostkeyalias", oHostKeyAlias },
- { "proxycommand", oProxyCommand },
- { "port", oPort },
- { "cipher", oCipher },
- { "ciphers", oCiphers },
- { "macs", oMacs },
- { "protocol", oProtocol },
- { "remoteforward", oRemoteForward },
- { "localforward", oLocalForward },
- { "user", oUser },
- { "host", oHost },
- { "escapechar", oEscapeChar },
- { "globalknownhostsfile", oGlobalKnownHostsFile },
- { "userknownhostsfile", oUserKnownHostsFile }, /* obsolete */
- { "globalknownhostsfile2", oGlobalKnownHostsFile2 },
- { "userknownhostsfile2", oUserKnownHostsFile2 }, /* obsolete */
- { "connectionattempts", oConnectionAttempts },
- { "batchmode", oBatchMode },
- { "checkhostip", oCheckHostIP },
- { "stricthostkeychecking", oStrictHostKeyChecking },
- { "compression", oCompression },
- { "compressionlevel", oCompressionLevel },
- { "tcpkeepalive", oKeepAlives },
- { "keepalive", oKeepAlives }, /* obsolete */
- { "numberofpasswordprompts", oNumberOfPasswordPrompts },
- { "loglevel", oLogLevel },
- { "dynamicforward", oDynamicForward },
- { "preferredauthentications", oPreferredAuthentications },
- { "hostkeyalgorithms", oHostKeyAlgorithms },
- { "bindaddress", oBindAddress },
- { "smartcarddevice", oSmartcardDevice },
- { "clearallforwardings", oClearAllForwardings },
- { "nohostauthenticationforlocalhost", oNoHostAuthenticationForLocalhost },
- { "rekeylimit", oRekeyLimit },
- { "connecttimeout", oConnectTimeout },
- { "serveraliveinterval", oServerAliveInterval },
- { "serveralivecountmax", oServerAliveCountMax },
- { "disablebanner", oDisableBanner },
- { "hashknownhosts", oHashKnownHosts },
- { "ignoreifunknown", oIgnoreIfUnknown },
- { "useopensslengine", oUseOpenSSLEngine },
- { NULL, oBadOption }
-};
-
-/*
- * Adds a local TCP/IP port forward to options. Never returns if there is an
- * error.
- */
-
-void
-add_local_forward(Options *options, const Forward *newfwd)
-{
- Forward *fwd;
-#ifndef NO_IPPORT_RESERVED_CONCEPT
- extern uid_t original_real_uid;
- if (newfwd->listen_port < IPPORT_RESERVED && original_real_uid != 0)
- fatal("Privileged ports can only be forwarded by root.");
-#endif
- if (options->num_local_forwards >= SSH_MAX_FORWARDS_PER_DIRECTION)
- fatal("Too many local forwards (max %d).", SSH_MAX_FORWARDS_PER_DIRECTION);
- fwd = &options->local_forwards[options->num_local_forwards++];
-
- fwd->listen_host = (newfwd->listen_host == NULL) ?
- NULL : xstrdup(newfwd->listen_host);
- fwd->listen_port = newfwd->listen_port;
- fwd->connect_host = xstrdup(newfwd->connect_host);
- fwd->connect_port = newfwd->connect_port;
-}
-
-/*
- * Adds a remote TCP/IP port forward to options. Never returns if there is
- * an error.
- */
-
-void
-add_remote_forward(Options *options, const Forward *newfwd)
-{
- Forward *fwd;
- if (options->num_remote_forwards >= SSH_MAX_FORWARDS_PER_DIRECTION)
- fatal("Too many remote forwards (max %d).",
- SSH_MAX_FORWARDS_PER_DIRECTION);
- fwd = &options->remote_forwards[options->num_remote_forwards++];
-
- fwd->listen_host = (newfwd->listen_host == NULL) ?
- NULL : xstrdup(newfwd->listen_host);
- fwd->listen_port = newfwd->listen_port;
- fwd->connect_host = xstrdup(newfwd->connect_host);
- fwd->connect_port = newfwd->connect_port;
-}
-
-static void
-clear_forwardings(Options *options)
-{
- int i;
-
- for (i = 0; i < options->num_local_forwards; i++) {
- if (options->local_forwards[i].listen_host != NULL)
- xfree(options->local_forwards[i].listen_host);
- xfree(options->local_forwards[i].connect_host);
- }
- options->num_local_forwards = 0;
- for (i = 0; i < options->num_remote_forwards; i++) {
- if (options->remote_forwards[i].listen_host != NULL)
- xfree(options->remote_forwards[i].listen_host);
- xfree(options->remote_forwards[i].connect_host);
- }
- options->num_remote_forwards = 0;
-}
-
-/*
- * Returns the number of the token pointed to by cp or oBadOption.
- */
-
-static OpCodes
-parse_token(const char *cp, const char *filename, int linenum)
-{
- u_int i;
-
- for (i = 0; keywords[i].name; i++)
- if (strcasecmp(cp, keywords[i].name) == 0)
- return keywords[i].opcode;
-
- debug("%s: line %d: unknown configuration option: %s",
- filename, linenum, cp);
- return oBadOption;
-}
-
-/*
- * Processes a single option line as used in the configuration files. This
- * only sets those values that have not already been set.
- */
-
-int
-process_config_line(Options *options, const char *host,
- char *line, const char *filename, int linenum,
- int *activep)
-{
- char *s, *string, **charptr, *endofnumber, *keyword, *arg, *arg2, fwdarg[256];
- int opcode, *intptr, value, scale, i;
- long long orig, val64;
- StoredOption *so;
- Forward fwd;
-
- s = line;
- /* Get the keyword. (Each line is supposed to begin with a keyword). */
- keyword = strdelim(&s);
- /* Ignore leading whitespace. */
- if (*keyword == '\0')
- keyword = strdelim(&s);
- if (keyword == NULL || !*keyword || *keyword == '\n' || *keyword == '#')
- return 0;
-
- opcode = parse_token(keyword, filename, linenum);
-
- switch (opcode) {
- case oBadOption:
- if (options->unknown_opts_num == MAX_UNKNOWN_OPTIONS) {
- error("we can't have more than %d unknown options:",
- MAX_UNKNOWN_OPTIONS);
- for (i = 0; i < MAX_UNKNOWN_OPTIONS; ++i) {
- so = &options->unknown_opts[i];
- error("%s:%d:%s",
- so->filename, so->linenum, so->keyword);
- xfree(so->keyword);
- xfree(so->filename);
- }
- fatal("too many unknown options found, can't continue");
- }
-
- /* unknown options will be processed later */
- so = &options->unknown_opts[options->unknown_opts_num];
- so->keyword = xstrdup(keyword);
- so->filename = xstrdup(filename);
- so->linenum = linenum;
- options->unknown_opts_num++;
- return (0);
-
- /* NOTREACHED */
- case oConnectTimeout:
- intptr = &options->connection_timeout;
-parse_time:
- arg = strdelim(&s);
- if (!arg || *arg == '\0')
- fatal("%s line %d: missing time value.",
- filename, linenum);
- if ((value = convtime(arg)) == -1)
- fatal("%s line %d: invalid time value.",
- filename, linenum);
- if (*activep && *intptr == -1)
- *intptr = value;
- break;
-
- case oForwardAgent:
- intptr = &options->forward_agent;
-parse_flag:
- arg = strdelim(&s);
- if (!arg || *arg == '\0')
- fatal("%.200s line %d: Missing yes/no argument.", filename, linenum);
- value = 0; /* To avoid compiler warning... */
- if (strcmp(arg, "yes") == 0 || strcmp(arg, "true") == 0)
- value = 1;
- else if (strcmp(arg, "no") == 0 || strcmp(arg, "false") == 0)
- value = 0;
- else
- fatal("%.200s line %d: Bad yes/no argument.", filename, linenum);
- if (*activep && *intptr == -1)
- *intptr = value;
- break;
-
- case oForwardX11:
- intptr = &options->forward_x11;
- goto parse_flag;
-
- case oForwardX11Trusted:
- intptr = &options->forward_x11_trusted;
- goto parse_flag;
-
- case oGatewayPorts:
- intptr = &options->gateway_ports;
- goto parse_flag;
-
- case oUsePrivilegedPort:
- intptr = &options->use_privileged_port;
- goto parse_flag;
-
- case oRhostsAuthentication:
- intptr = &options->rhosts_authentication;
- goto parse_flag;
-
- case oPasswordAuthentication:
- intptr = &options->password_authentication;
- goto parse_flag;
-
- case oKbdInteractiveAuthentication:
- intptr = &options->kbd_interactive_authentication;
- goto parse_flag;
-
- case oKbdInteractiveDevices:
- charptr = &options->kbd_interactive_devices;
- goto parse_string;
-
- case oPubkeyAuthentication:
- intptr = &options->pubkey_authentication;
- goto parse_flag;
-
- case oRSAAuthentication:
- intptr = &options->rsa_authentication;
- goto parse_flag;
-
- case oRhostsRSAAuthentication:
- intptr = &options->rhosts_rsa_authentication;
- goto parse_flag;
-
- case oHostbasedAuthentication:
- intptr = &options->hostbased_authentication;
- goto parse_flag;
-
- case oChallengeResponseAuthentication:
- intptr = &options->challenge_response_authentication;
- goto parse_flag;
-#if defined(KRB4) || defined(KRB5)
- case oKerberosAuthentication:
- intptr = &options->kerberos_authentication;
- goto parse_flag;
-#endif
-#ifdef GSSAPI
- case oGssKeyEx:
- intptr = &options->gss_keyex;
- goto parse_flag;
-
- case oGssAuthentication:
- intptr = &options->gss_authentication;
- goto parse_flag;
-
- case oGssDelegateCreds:
- intptr = &options->gss_deleg_creds;
- goto parse_flag;
-
-#ifdef GSI
- case oGssGlobusDelegateLimitedCreds:
- intptr = &options->gss_globus_deleg_limited_proxy;
- goto parse_flag;
-#endif /* GSI */
-
-#endif /* GSSAPI */
-
-#if defined(AFS) || defined(KRB5)
- case oKerberosTgtPassing:
- intptr = &options->kerberos_tgt_passing;
- goto parse_flag;
-#endif
-#ifdef AFS
- case oAFSTokenPassing:
- intptr = &options->afs_token_passing;
- goto parse_flag;
-#endif
- case oFallBackToRsh:
- intptr = &options->fallback_to_rsh;
- goto parse_flag;
-
- case oUseRsh:
- intptr = &options->use_rsh;
- goto parse_flag;
-
- case oBatchMode:
- intptr = &options->batch_mode;
- goto parse_flag;
-
- case oCheckHostIP:
- intptr = &options->check_host_ip;
- goto parse_flag;
-
- case oStrictHostKeyChecking:
- intptr = &options->strict_host_key_checking;
- arg = strdelim(&s);
- if (!arg || *arg == '\0')
- fatal("%.200s line %d: Missing yes/no/ask argument.",
- filename, linenum);
- value = 0; /* To avoid compiler warning... */
- if (strcmp(arg, "yes") == 0 || strcmp(arg, "true") == 0)
- value = 1;
- else if (strcmp(arg, "no") == 0 || strcmp(arg, "false") == 0)
- value = 0;
- else if (strcmp(arg, "ask") == 0)
- value = 2;
- else
- fatal("%.200s line %d: Bad yes/no/ask argument.", filename, linenum);
- if (*activep && *intptr == -1)
- *intptr = value;
- break;
-
- case oCompression:
- intptr = &options->compression;
- goto parse_flag;
-
- case oKeepAlives:
- intptr = &options->keepalives;
- goto parse_flag;
-
- case oNoHostAuthenticationForLocalhost:
- intptr = &options->no_host_authentication_for_localhost;
- goto parse_flag;
-
- case oNumberOfPasswordPrompts:
- intptr = &options->number_of_password_prompts;
- goto parse_int;
-
- case oCompressionLevel:
- intptr = &options->compression_level;
- goto parse_int;
-
- case oRekeyLimit:
- arg = strdelim(&s);
- if (!arg || *arg == '\0')
- fatal("%.200s line %d: Missing argument.", filename, linenum);
- if (arg[0] < '0' || arg[0] > '9')
- fatal("%.200s line %d: Bad number.", filename, linenum);
- orig = val64 = strtoll(arg, &endofnumber, 10);
- if (arg == endofnumber)
- fatal("%.200s line %d: Bad number.", filename, linenum);
- switch (toupper(*endofnumber)) {
- case '\0':
- scale = 1;
- break;
- case 'K':
- scale = 1<<10;
- break;
- case 'M':
- scale = 1<<20;
- break;
- case 'G':
- scale = 1<<30;
- break;
- default:
- fatal("%.200s line %d: Invalid RekeyLimit suffix",
- filename, linenum);
- }
- val64 *= scale;
- /* detect integer wrap and too-large limits */
- if ((val64 / scale) != orig || val64 > UINT_MAX)
- fatal("%.200s line %d: RekeyLimit too large",
- filename, linenum);
- if (val64 < 16)
- fatal("%.200s line %d: RekeyLimit too small",
- filename, linenum);
- if (*activep && options->rekey_limit == -1)
- options->rekey_limit = (u_int32_t)val64;
- break;
-
- case oIdentityFile:
- arg = strdelim(&s);
- if (!arg || *arg == '\0')
- fatal("%.200s line %d: Missing argument.", filename, linenum);
- if (*activep) {
- intptr = &options->num_identity_files;
- if (*intptr >= SSH_MAX_IDENTITY_FILES)
- fatal("%.200s line %d: Too many identity files specified (max %d).",
- filename, linenum, SSH_MAX_IDENTITY_FILES);
- charptr = &options->identity_files[*intptr];
- *charptr = xstrdup(arg);
- *intptr = *intptr + 1;
- }
- break;
-
- case oXAuthLocation:
- charptr=&options->xauth_location;
- goto parse_string;
-
- case oUser:
- charptr = &options->user;
-parse_string:
- arg = strdelim(&s);
- if (!arg || *arg == '\0')
- fatal("%.200s line %d: Missing argument.", filename, linenum);
- if (*activep && *charptr == NULL)
- *charptr = xstrdup(arg);
- break;
-
- case oGlobalKnownHostsFile:
- charptr = &options->system_hostfile;
- goto parse_string;
-
- case oUserKnownHostsFile:
- charptr = &options->user_hostfile;
- goto parse_string;
-
- case oGlobalKnownHostsFile2:
- charptr = &options->system_hostfile2;
- goto parse_string;
-
- case oUserKnownHostsFile2:
- charptr = &options->user_hostfile2;
- goto parse_string;
-
- case oHostName:
- charptr = &options->hostname;
- goto parse_string;
-
- case oHostKeyAlias:
- charptr = &options->host_key_alias;
- goto parse_string;
-
- case oPreferredAuthentications:
- charptr = &options->preferred_authentications;
- goto parse_string;
-
- case oBindAddress:
- charptr = &options->bind_address;
- goto parse_string;
-
- case oSmartcardDevice:
- charptr = &options->smartcard_device;
- goto parse_string;
-
- case oProxyCommand:
- charptr = &options->proxy_command;
- string = xstrdup("");
- while ((arg = strdelim(&s)) != NULL && *arg != '\0') {
- string = xrealloc(string, strlen(string) + strlen(arg) + 2);
- strcat(string, " ");
- strcat(string, arg);
- }
- if (*activep && *charptr == NULL)
- *charptr = string;
- else
- xfree(string);
- return 0;
-
- case oPort:
- intptr = &options->port;
-parse_int:
- arg = strdelim(&s);
- if (!arg || *arg == '\0')
- fatal("%.200s line %d: Missing argument.", filename, linenum);
- if (arg[0] < '0' || arg[0] > '9')
- fatal("%.200s line %d: Bad number.", filename, linenum);
-
- /* Octal, decimal, or hex format? */
- value = strtol(arg, &endofnumber, 0);
- if (arg == endofnumber)
- fatal("%.200s line %d: Bad number.", filename, linenum);
- if (*activep && *intptr == -1)
- *intptr = value;
- break;
-
- case oConnectionAttempts:
- intptr = &options->connection_attempts;
- goto parse_int;
-
- case oCipher:
- intptr = &options->cipher;
- arg = strdelim(&s);
- if (!arg || *arg == '\0')
- fatal("%.200s line %d: Missing argument.", filename, linenum);
- value = cipher_number(arg);
- if (value == -1)
- fatal("%.200s line %d: Bad cipher '%s'.",
- filename, linenum, arg ? arg : "<NONE>");
- if (*activep && *intptr == -1)
- *intptr = value;
- break;
-
- case oCiphers:
- arg = strdelim(&s);
- if (!arg || *arg == '\0')
- fatal("%.200s line %d: Missing argument.", filename, linenum);
- if (!ciphers_valid(arg))
- fatal("%.200s line %d: Bad SSH2 cipher spec '%s'.",
- filename, linenum, arg ? arg : "<NONE>");
- if (*activep && options->ciphers == NULL)
- options->ciphers = xstrdup(arg);
- break;
-
- case oMacs:
- arg = strdelim(&s);
- if (!arg || *arg == '\0')
- fatal("%.200s line %d: Missing argument.", filename, linenum);
- if (!mac_valid(arg))
- fatal("%.200s line %d: Bad SSH2 Mac spec '%s'.",
- filename, linenum, arg ? arg : "<NONE>");
- if (*activep && options->macs == NULL)
- options->macs = xstrdup(arg);
- break;
-
- case oHostKeyAlgorithms:
- arg = strdelim(&s);
- if (!arg || *arg == '\0')
- fatal("%.200s line %d: Missing argument.", filename, linenum);
- if (!key_names_valid2(arg))
- fatal("%.200s line %d: Bad protocol 2 host key algorithms '%s'.",
- filename, linenum, arg ? arg : "<NONE>");
- if (*activep && options->hostkeyalgorithms == NULL)
- options->hostkeyalgorithms = xstrdup(arg);
- break;
-
- case oProtocol:
- intptr = &options->protocol;
- arg = strdelim(&s);
- if (!arg || *arg == '\0')
- fatal("%.200s line %d: Missing argument.", filename, linenum);
- value = proto_spec(arg);
- if (value == SSH_PROTO_UNKNOWN)
- fatal("%.200s line %d: Bad protocol spec '%s'.",
- filename, linenum, arg ? arg : "<NONE>");
- if (*activep && *intptr == SSH_PROTO_UNKNOWN)
- *intptr = value;
- break;
-
- case oLogLevel:
- intptr = (int *) &options->log_level;
- arg = strdelim(&s);
- value = log_level_number(arg);
- if (value == SYSLOG_LEVEL_NOT_SET)
- fatal("%.200s line %d: unsupported log level '%s'",
- filename, linenum, arg ? arg : "<NONE>");
- if (*activep && (LogLevel) *intptr == SYSLOG_LEVEL_NOT_SET)
- *intptr = (LogLevel) value;
- break;
-
- case oLocalForward:
- case oRemoteForward:
- arg = strdelim(&s);
- if (arg == NULL || *arg == '\0')
- fatal("%.200s line %d: Missing port argument.",
- filename, linenum);
- arg2 = strdelim(&s);
- if (arg2 == NULL || *arg2 == '\0')
- fatal("%.200s line %d: Missing target argument.",
- filename, linenum);
-
- /* construct a string for parse_forward */
- snprintf(fwdarg, sizeof(fwdarg), "%s:%s", arg, arg2);
-
- if (parse_forward(1, &fwd, fwdarg) == 0)
- fatal("%.200s line %d: Bad forwarding specification.",
- filename, linenum);
-
- if (*activep) {
- if (opcode == oLocalForward)
- add_local_forward(options, &fwd);
- else if (opcode == oRemoteForward)
- add_remote_forward(options, &fwd);
- }
- break;
-
- case oDynamicForward:
- arg = strdelim(&s);
- if (!arg || *arg == '\0')
- fatal("%.200s line %d: Missing port argument.",
- filename, linenum);
-
- if (parse_forward(0, &fwd, arg) == 0) {
- fatal("%.200s line %d: Bad dynamic forwarding specification.",
- filename, linenum);
- }
-
- if (*activep) {
- fwd.connect_host = "socks";
- add_local_forward(options, &fwd);
- }
- break;
-
- case oClearAllForwardings:
- intptr = &options->clear_forwardings;
- goto parse_flag;
-
- case oHost:
- *activep = 0;
- while ((arg = strdelim(&s)) != NULL && *arg != '\0')
- if (match_pattern(host, arg)) {
- debug("Applying options for %.100s", arg);
- *activep = 1;
- break;
- }
- /* Avoid garbage check below, as strdelim is done. */
- return 0;
-
- case oEscapeChar:
- intptr = &options->escape_char;
- arg = strdelim(&s);
- if (!arg || *arg == '\0')
- fatal("%.200s line %d: Missing argument.", filename, linenum);
- if (arg[0] == '^' && arg[2] == 0 &&
- (u_char) arg[1] >= 64 && (u_char) arg[1] < 128)
- value = (u_char) arg[1] & 31;
- else if (strlen(arg) == 1)
- value = (u_char) arg[0];
- else if (strcmp(arg, "none") == 0)
- value = SSH_ESCAPECHAR_NONE;
- else {
- fatal("%.200s line %d: Bad escape character.",
- filename, linenum);
- /* NOTREACHED */
- value = 0; /* Avoid compiler warning. */
- }
- if (*activep && *intptr == -1)
- *intptr = value;
- break;
-
- case oServerAliveInterval:
- intptr = &options->server_alive_interval;
- goto parse_time;
-
- case oServerAliveCountMax:
- intptr = &options->server_alive_count_max;
- goto parse_int;
-
- case oHashKnownHosts:
- intptr = &options->hash_known_hosts;
- goto parse_flag;
-
- case oDisableBanner:
- arg = strdelim(&s);
- if (get_yes_no_flag(&options->disable_banner, arg, filename,
- linenum, *activep) == 1)
- break;
-
- if (strcmp(arg, "in-exec-mode") == 0)
- options->disable_banner = SSH_NO_BANNER_IN_EXEC_MODE;
- else
- fatal("%.200s line %d: Bad yes/no/in-exec-mode "
- "argument.", filename, linenum);
- break;
-
- case oIgnoreIfUnknown:
- charptr = &options->ignore_if_unknown;
- goto parse_string;
-
- case oUseOpenSSLEngine:
- intptr = &options->use_openssl_engine;
- goto parse_flag;
-
- case oDeprecated:
- debug("%s line %d: Deprecated option \"%s\"",
- filename, linenum, keyword);
- return 0;
-
- default:
- fatal("process_config_line: Unimplemented opcode %d", opcode);
- }
-
- /* Check that there is no garbage at end of line. */
- if ((arg = strdelim(&s)) != NULL && *arg != '\0') {
- fatal("%.200s line %d: garbage at end of line; \"%.200s\".",
- filename, linenum, arg);
- }
- return 0;
-}
-
-
-/*
- * Reads the config file and modifies the options accordingly. Options
- * should already be initialized before this call. This never returns if
- * there is an error. If the file does not exist, this returns 0.
- */
-
-int
-read_config_file(const char *filename, const char *host, Options *options)
-{
- FILE *f;
- char line[1024];
- int active, linenum;
-
- /* Open the file. */
- f = fopen(filename, "r");
- if (!f)
- return 0;
-
- debug("Reading configuration data %.200s", filename);
-
- /*
- * Mark that we are now processing the options. This flag is turned
- * on/off by Host specifications.
- */
- active = 1;
- linenum = 0;
- while (fgets(line, sizeof(line), f)) {
- /* Update line number counter. */
- linenum++;
- process_config_line(options, host, line, filename, linenum, &active);
- }
- fclose(f);
- return 1;
-}
-
-/*
- * Initializes options to special values that indicate that they have not yet
- * been set. Read_config_file will only set options with this value. Options
- * are processed in the following order: command line, user config file,
- * system config file. Last, fill_default_options is called.
- */
-
-void
-initialize_options(Options * options)
-{
- memset(options, 'X', sizeof(*options));
- options->forward_agent = -1;
- options->forward_x11 = -1;
- options->forward_x11_trusted = -1;
- options->xauth_location = NULL;
- options->gateway_ports = -1;
- options->use_privileged_port = -1;
- options->rhosts_authentication = -1;
- options->rsa_authentication = -1;
- options->pubkey_authentication = -1;
- options->challenge_response_authentication = -1;
-#ifdef GSSAPI
- options->gss_keyex = -1;
- options->gss_authentication = -1;
- options->gss_deleg_creds = -1;
-#ifdef GSI
- options->gss_globus_deleg_limited_proxy = -1;
-#endif /* GSI */
-#endif /* GSSAPI */
-
-#if defined(KRB4) || defined(KRB5)
- options->kerberos_authentication = -1;
-#endif
-#if defined(AFS) || defined(KRB5)
- options->kerberos_tgt_passing = -1;
-#endif
-#ifdef AFS
- options->afs_token_passing = -1;
-#endif
- options->password_authentication = -1;
- options->kbd_interactive_authentication = -1;
- options->kbd_interactive_devices = NULL;
- options->rhosts_rsa_authentication = -1;
- options->hostbased_authentication = -1;
- options->batch_mode = -1;
- options->check_host_ip = -1;
- options->strict_host_key_checking = -1;
- options->compression = -1;
- options->keepalives = -1;
- options->compression_level = -1;
- options->port = -1;
- options->connection_attempts = -1;
- options->connection_timeout = -1;
- options->number_of_password_prompts = -1;
- options->cipher = -1;
- options->ciphers = NULL;
- options->macs = NULL;
- options->hostkeyalgorithms = NULL;
- options->protocol = SSH_PROTO_UNKNOWN;
- options->num_identity_files = 0;
- options->hostname = NULL;
- options->host_key_alias = NULL;
- options->proxy_command = NULL;
- options->user = NULL;
- options->escape_char = -1;
- options->system_hostfile = NULL;
- options->user_hostfile = NULL;
- options->system_hostfile2 = NULL;
- options->user_hostfile2 = NULL;
- options->num_local_forwards = 0;
- options->num_remote_forwards = 0;
- options->clear_forwardings = -1;
- options->log_level = SYSLOG_LEVEL_NOT_SET;
- options->preferred_authentications = NULL;
- options->bind_address = NULL;
- options->smartcard_device = NULL;
- options->no_host_authentication_for_localhost = -1;
- options->rekey_limit = -1;
- options->fallback_to_rsh = -1;
- options->use_rsh = -1;
- options->server_alive_interval = -1;
- options->server_alive_count_max = -1;
- options->hash_known_hosts = -1;
- options->ignore_if_unknown = NULL;
- options->unknown_opts_num = 0;
- options->disable_banner = -1;
- options->use_openssl_engine = -1;
-}
-
-/*
- * Called after processing other sources of option data, this fills those
- * options for which no value has been specified with their default values.
- */
-
-void
-fill_default_options(Options * options)
-{
- int len;
-
- if (options->forward_agent == -1)
- options->forward_agent = 0;
- if (options->forward_x11 == -1)
- options->forward_x11 = 0;
- /*
- * Unlike OpenSSH, we keep backward compatibility for '-X' option
- * which means that X11 forwarding is trusted by default.
- */
- if (options->forward_x11_trusted == -1)
- options->forward_x11_trusted = 1;
- if (options->xauth_location == NULL)
- options->xauth_location = _PATH_XAUTH;
- if (options->gateway_ports == -1)
- options->gateway_ports = 0;
- if (options->use_privileged_port == -1)
- options->use_privileged_port = 0;
- if (options->rhosts_authentication == -1)
- options->rhosts_authentication = 0;
- if (options->rsa_authentication == -1)
- options->rsa_authentication = 1;
- if (options->pubkey_authentication == -1)
- options->pubkey_authentication = 1;
- if (options->challenge_response_authentication == -1)
- options->challenge_response_authentication = 1;
-#ifdef GSSAPI
- if (options->gss_keyex == -1)
- options->gss_keyex = 1;
- if (options->gss_authentication == -1)
- options->gss_authentication = 1;
- if (options->gss_deleg_creds == -1)
- options->gss_deleg_creds = 0;
-#ifdef GSI
- if (options->gss_globus_deleg_limited_proxy == -1)
- options->gss_globus_deleg_limited_proxy = 0;
-#endif /* GSI */
-#endif /* GSSAPI */
-#if defined(KRB4) || defined(KRB5)
- if (options->kerberos_authentication == -1)
- options->kerberos_authentication = 1;
-#endif
-#if defined(AFS) || defined(KRB5)
- if (options->kerberos_tgt_passing == -1)
- options->kerberos_tgt_passing = 1;
-#endif
-#ifdef AFS
- if (options->afs_token_passing == -1)
- options->afs_token_passing = 1;
-#endif
- if (options->password_authentication == -1)
- options->password_authentication = 1;
- if (options->kbd_interactive_authentication == -1)
- options->kbd_interactive_authentication = 1;
- if (options->rhosts_rsa_authentication == -1)
- options->rhosts_rsa_authentication = 0;
- if (options->hostbased_authentication == -1)
- options->hostbased_authentication = 0;
- if (options->batch_mode == -1)
- options->batch_mode = 0;
- if (options->check_host_ip == -1)
- options->check_host_ip = 1;
- if (options->strict_host_key_checking == -1)
- options->strict_host_key_checking = 2; /* 2 is default */
- if (options->compression == -1)
- options->compression = 0;
- if (options->keepalives == -1)
- options->keepalives = 1;
- if (options->compression_level == -1)
- options->compression_level = 6;
- if (options->port == -1)
- options->port = 0; /* Filled in ssh_connect. */
- if (options->connection_attempts == -1)
- options->connection_attempts = 1;
- if (options->number_of_password_prompts == -1)
- options->number_of_password_prompts = 3;
- /* Selected in ssh_login(). */
- if (options->cipher == -1)
- options->cipher = SSH_CIPHER_NOT_SET;
- /* options->ciphers, default set in myproposals.h */
- /* options->macs, default set in myproposals.h */
- /* options->hostkeyalgorithms, default set in myproposals.h */
- if (options->protocol == SSH_PROTO_UNKNOWN)
- options->protocol = SSH_PROTO_1|SSH_PROTO_2;
- if (options->num_identity_files == 0) {
- if (options->protocol & SSH_PROTO_1) {
- len = 2 + strlen(_PATH_SSH_CLIENT_IDENTITY) + 1;
- options->identity_files[options->num_identity_files] =
- xmalloc(len);
- snprintf(options->identity_files[options->num_identity_files++],
- len, "~/%.100s", _PATH_SSH_CLIENT_IDENTITY);
- }
- if (options->protocol & SSH_PROTO_2) {
- len = 2 + strlen(_PATH_SSH_CLIENT_ID_RSA) + 1;
- options->identity_files[options->num_identity_files] =
- xmalloc(len);
- snprintf(options->identity_files[options->num_identity_files++],
- len, "~/%.100s", _PATH_SSH_CLIENT_ID_RSA);
-
- len = 2 + strlen(_PATH_SSH_CLIENT_ID_DSA) + 1;
- options->identity_files[options->num_identity_files] =
- xmalloc(len);
- snprintf(options->identity_files[options->num_identity_files++],
- len, "~/%.100s", _PATH_SSH_CLIENT_ID_DSA);
- }
- }
- if (options->escape_char == -1)
- options->escape_char = '~';
- if (options->system_hostfile == NULL)
- options->system_hostfile = _PATH_SSH_SYSTEM_HOSTFILE;
- if (options->user_hostfile == NULL)
- options->user_hostfile = _PATH_SSH_USER_HOSTFILE;
- if (options->system_hostfile2 == NULL)
- options->system_hostfile2 = _PATH_SSH_SYSTEM_HOSTFILE2;
- if (options->user_hostfile2 == NULL)
- options->user_hostfile2 = _PATH_SSH_USER_HOSTFILE2;
- if (options->log_level == SYSLOG_LEVEL_NOT_SET)
- options->log_level = SYSLOG_LEVEL_INFO;
- if (options->clear_forwardings == 1)
- clear_forwardings(options);
- if (options->no_host_authentication_for_localhost == -1)
- options->no_host_authentication_for_localhost = 0;
- if (options->rekey_limit == -1)
- options->rekey_limit = 0;
- if (options->fallback_to_rsh == -1)
- options->fallback_to_rsh = 0;
- if (options->use_rsh == -1)
- options->use_rsh = 0;
- if (options->server_alive_interval == -1)
- options->server_alive_interval = 0;
- if (options->server_alive_count_max == -1)
- options->server_alive_count_max = 3;
- if (options->hash_known_hosts == -1)
- options->hash_known_hosts = 0;
- if (options->disable_banner == -1)
- options->disable_banner = 0;
- if (options->use_openssl_engine == -1)
- options->use_openssl_engine = 1;
- /* options->proxy_command should not be set by default */
- /* options->user will be set in the main program if appropriate */
- /* options->hostname will be set in the main program if appropriate */
- /* options->host_key_alias should not be set by default */
- /* options->preferred_authentications will be set in ssh */
- /* options->ignore_if_unknown should not be set by default */
-}
-
-/*
- * Parses a string containing a port forwarding specification of one of the
- * two forms, short or long:
- *
- * [listenhost:]listenport
- * [listenhost:]listenport:connecthost:connectport
- *
- * short forwarding specification is used for dynamic port forwarding and for
- * port forwarding cancelation in process_cmdline(). The function returns number
- * of arguments parsed or zero on any error.
- */
-int
-parse_forward(int long_form, Forward *fwd, const char *fwdspec)
-{
- int i;
- char *p, *cp, *fwdarg[5];
-
- memset(fwd, '\0', sizeof(*fwd));
-
- cp = p = xstrdup(fwdspec);
-
- /* skip leading spaces */
- while (isspace(*cp))
- cp++;
-
- for (i = 0; i < 5; ++i)
- if ((fwdarg[i] = hpdelim(&cp)) == NULL)
- break;
-
- if ((long_form == 0 && i > 2) || (long_form == 1 && i < 3) || (i == 5))
- goto fail_free;
-
- switch (i) {
- case 0:
- goto fail_free;
-
- case 1:
- fwd->listen_host = NULL;
- fwd->listen_port = a2port(fwdarg[0]);
- break;
-
- case 2:
- fwd->listen_host = xstrdup(cleanhostname(fwdarg[0]));
- fwd->listen_port = a2port(fwdarg[1]);
- break;
-
- case 3:
- fwd->listen_host = NULL;
- fwd->listen_port = a2port(fwdarg[0]);
- fwd->connect_host = xstrdup(cleanhostname(fwdarg[1]));
- fwd->connect_port = a2port(fwdarg[2]);
- break;
-
- case 4:
- fwd->listen_host = xstrdup(cleanhostname(fwdarg[0]));
- fwd->listen_port = a2port(fwdarg[1]);
- fwd->connect_host = xstrdup(cleanhostname(fwdarg[2]));
- fwd->connect_port = a2port(fwdarg[3]);
- break;
- }
-
- if (fwd->listen_port == 0 || (fwd->connect_port == 0 && i > 2))
- goto fail_free;
-
- xfree(p);
- return (i);
-
-fail_free:
- if (p != NULL)
- xfree(p);
- if (fwd->connect_host != NULL)
- xfree(fwd->connect_host);
- if (fwd->listen_host != NULL)
- xfree(fwd->listen_host);
- return (0);
-}
-
-/*
- * Process previously stored unknown options. When this function is called we
- * already have IgnoreIfUnknown set so finally we can decide whether each
- * unknown option is to be ignored or not.
- */
-void
-process_unknown_options(Options *options)
-{
- StoredOption *so;
- int m, i, bad_options = 0;
-
- /* if there is no unknown option we are done */
- if (options->unknown_opts_num == 0)
- return;
-
- /*
- * Now go through the list of unknown options and report any one that
- * is not explicitly listed in IgnoreIfUnknown option. If at least one
- * such as that is found it's a show stopper.
- */
- for (i = 0; i < options->unknown_opts_num; ++i) {
- so = &options->unknown_opts[i];
- if (options->ignore_if_unknown == NULL)
- m = 0;
- else
- m = match_pattern_list(tolowercase(so->keyword),
- options->ignore_if_unknown,
- strlen(options->ignore_if_unknown), 1);
- if (m == 1) {
- debug("%s: line %d: ignoring unknown option: %s",
- so->filename, so->linenum, so->keyword);
- }
- else {
- error("%s: line %d: unknown configuration option: %s",
- so->filename, so->linenum, so->keyword);
- bad_options++;
- }
- xfree(so->keyword);
- xfree(so->filename);
- }
-
- /* exit if we found at least one unignored unknown option */
- if (bad_options > 0)
- fatal("terminating, %d bad configuration option(s)",
- bad_options);
-}
diff --git a/usr/src/cmd/ssh/libssh/common/readpass.c b/usr/src/cmd/ssh/libssh/common/readpass.c
deleted file mode 100644
index fa1f1a4982..0000000000
--- a/usr/src/cmd/ssh/libssh/common/readpass.c
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/*
- * Copyright (c) 2001 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: readpass.c,v 1.27 2002/03/26 15:58:46 markus Exp $");
-
-#include "xmalloc.h"
-#include "readpass.h"
-#include "pathnames.h"
-#include "log.h"
-#include "ssh.h"
-#include <langinfo.h>
-
-static char *
-ssh_askpass(char *askpass, const char *msg)
-{
- pid_t pid;
- size_t len;
- char *pass;
- int p[2], status, ret;
- char buf[1024];
-
- if (fflush(stdout) != 0)
- error("ssh_askpass: fflush: %s", strerror(errno));
- if (askpass == NULL)
- fatal("internal error: askpass undefined");
- if (pipe(p) < 0) {
- error("ssh_askpass: pipe: %s", strerror(errno));
- return xstrdup("");
- }
- if ((pid = fork()) < 0) {
- error("ssh_askpass: fork: %s", strerror(errno));
- return xstrdup("");
- }
- if (pid == 0) {
- seteuid(getuid());
- setuid(getuid());
- close(p[0]);
- if (dup2(p[1], STDOUT_FILENO) < 0)
- fatal("ssh_askpass: dup2: %s", strerror(errno));
- execlp(askpass, askpass, msg, (char *) 0);
- fatal("ssh_askpass: exec(%s): %s", askpass, strerror(errno));
- }
- close(p[1]);
-
- len = ret = 0;
- do {
- ret = read(p[0], buf + len, sizeof(buf) - 1 - len);
- if (ret == -1 && errno == EINTR)
- continue;
- if (ret <= 0)
- break;
- len += ret;
- } while (sizeof(buf) - 1 - len > 0);
- buf[len] = '\0';
-
- close(p[0]);
- while (waitpid(pid, &status, 0) < 0)
- if (errno != EINTR)
- break;
-
- buf[strcspn(buf, "\r\n")] = '\0';
- pass = xstrdup(buf);
- memset(buf, 0, sizeof(buf));
- return pass;
-}
-
-/*
- * Reads a passphrase from /dev/tty with echo turned off/on. Returns the
- * passphrase (allocated with xmalloc). Exits if EOF is encountered. If
- * RP_ALLOW_STDIN is set, the passphrase will be read from stdin if no
- * tty is available
- */
-char *
-read_passphrase(const char *prompt, int flags)
-{
- char *askpass = NULL, *ret, buf[1024];
- int rppflags, use_askpass = 0, ttyfd;
-
- rppflags = (flags & RP_ECHO) ? RPP_ECHO_ON : RPP_ECHO_OFF;
- if (flags & RP_ALLOW_STDIN) {
- if (!isatty(STDIN_FILENO))
- use_askpass = 1;
- } else {
- rppflags |= RPP_REQUIRE_TTY;
- ttyfd = open(_PATH_TTY, O_RDWR);
- if (ttyfd >= 0)
- close(ttyfd);
- else
- use_askpass = 1;
- }
-
- if (use_askpass && getenv("DISPLAY")) {
- if (getenv(SSH_ASKPASS_ENV))
- askpass = getenv(SSH_ASKPASS_ENV);
- else
- askpass = _PATH_SSH_ASKPASS_DEFAULT;
- return ssh_askpass(askpass, prompt);
- }
-
- if (readpassphrase(prompt, buf, sizeof buf, rppflags) == NULL) {
- if (flags & RP_ALLOW_EOF)
- return NULL;
- return xstrdup("");
- }
-
- ret = xstrdup(buf);
- memset(buf, 'x', sizeof buf);
- return ret;
-}
-
-int
-ask_permission(const char *fmt, ...)
-{
- va_list args;
- char *p, prompt[1024];
- int allowed = 0;
- char *yeschar = nl_langinfo(YESSTR);
-
- va_start(args, fmt);
- vsnprintf(prompt, sizeof(prompt), fmt, args);
- va_end(args);
-
- p = read_passphrase(prompt, RP_USE_ASKPASS|RP_ALLOW_EOF);
- if (p != NULL) {
- /*
- * Accept empty responses and responses consisting
- * of the word "yes" as affirmative.
- */
- if (*p == '\0' || *p == '\n' ||
- strncasecmp(p, yeschar, 1) == 0)
- allowed = 1;
- xfree(p);
- }
-
- return (allowed);
-}
diff --git a/usr/src/cmd/ssh/libssh/common/rsa.c b/usr/src/cmd/ssh/libssh/common/rsa.c
deleted file mode 100644
index f24a44a770..0000000000
--- a/usr/src/cmd/ssh/libssh/common/rsa.c
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- *
- *
- * Copyright (c) 1999 Niels Provos. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * Description of the RSA algorithm can be found e.g. from the following
- * sources:
- *
- * Bruce Schneier: Applied Cryptography. John Wiley & Sons, 1994.
- *
- * Jennifer Seberry and Josed Pieprzyk: Cryptography: An Introduction to
- * Computer Security. Prentice-Hall, 1989.
- *
- * Man Young Rhee: Cryptography and Secure Data Communications. McGraw-Hill,
- * 1994.
- *
- * R. Rivest, A. Shamir, and L. M. Adleman: Cryptographic Communications
- * System and Method. US Patent 4,405,829, 1983.
- *
- * Hans Riesel: Prime Numbers and Computer Methods for Factorization.
- * Birkhauser, 1994.
- *
- * The RSA Frequently Asked Questions document by RSA Data Security,
- * Inc., 1995.
- *
- * RSA in 3 lines of perl by Adam Back <aba@atlax.ex.ac.uk>, 1995, as
- * included below:
- *
- * [gone - had to be deleted - what a pity]
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: rsa.c,v 1.24 2001/12/27 18:22:16 markus Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "rsa.h"
-#include "log.h"
-#include "xmalloc.h"
-
-void
-rsa_public_encrypt(BIGNUM *out, BIGNUM *in, RSA *key)
-{
- u_char *inbuf, *outbuf;
- int len, ilen, olen;
-
- if (BN_num_bits(key->e) < 2 || !BN_is_odd(key->e))
- fatal("rsa_public_encrypt() exponent too small or not odd");
-
- olen = BN_num_bytes(key->n);
- outbuf = xmalloc(olen);
-
- ilen = BN_num_bytes(in);
- inbuf = xmalloc(ilen);
- BN_bn2bin(in, inbuf);
-
- if ((len = RSA_public_encrypt(ilen, inbuf, outbuf, key,
- RSA_PKCS1_PADDING)) <= 0)
- fatal("rsa_public_encrypt() failed");
-
- BN_bin2bn(outbuf, len, out);
-
- memset(outbuf, 0, olen);
- memset(inbuf, 0, ilen);
- xfree(outbuf);
- xfree(inbuf);
-}
-
-int
-rsa_private_decrypt(BIGNUM *out, BIGNUM *in, RSA *key)
-{
- u_char *inbuf, *outbuf;
- int len, ilen, olen;
-
- olen = BN_num_bytes(key->n);
- outbuf = xmalloc(olen);
-
- ilen = BN_num_bytes(in);
- inbuf = xmalloc(ilen);
- BN_bn2bin(in, inbuf);
-
- if ((len = RSA_private_decrypt(ilen, inbuf, outbuf, key,
- RSA_PKCS1_PADDING)) <= 0) {
- error("rsa_private_decrypt() failed");
- } else {
- BN_bin2bn(outbuf, len, out);
- }
- memset(outbuf, 0, olen);
- memset(inbuf, 0, ilen);
- xfree(outbuf);
- xfree(inbuf);
- return len;
-}
-
-/* calculate p-1 and q-1 */
-void
-rsa_generate_additional_parameters(RSA *rsa)
-{
- BIGNUM *aux;
- BN_CTX *ctx;
-
- if ((aux = BN_new()) == NULL)
- fatal("rsa_generate_additional_parameters: BN_new failed");
- if ((ctx = BN_CTX_new()) == NULL)
- fatal("rsa_generate_additional_parameters: BN_CTX_new failed");
-
- BN_sub(aux, rsa->q, BN_value_one());
- BN_mod(rsa->dmq1, rsa->d, aux, ctx);
-
- BN_sub(aux, rsa->p, BN_value_one());
- BN_mod(rsa->dmp1, rsa->d, aux, ctx);
-
- BN_clear_free(aux);
- BN_CTX_free(ctx);
-}
-
diff --git a/usr/src/cmd/ssh/libssh/common/sftp-common.c b/usr/src/cmd/ssh/libssh/common/sftp-common.c
deleted file mode 100644
index ead9e2406f..0000000000
--- a/usr/src/cmd/ssh/libssh/common/sftp-common.c
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
- * Copyright (c) 2001 Markus Friedl. All rights reserved.
- * Copyright (c) 2001 Damien Miller. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/* $OpenBSD: sftp-common.c,v 1.20 2006/08/03 03:34:42 deraadt Exp $ */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "includes.h"
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/param.h>
-
-#include <grp.h>
-#include <pwd.h>
-#include <stdio.h>
-#include <string.h>
-#include <time.h>
-#include <stdarg.h>
-
-#include "xmalloc.h"
-#include "buffer.h"
-#include "bufaux.h"
-#include "log.h"
-
-#include "sftp.h"
-#include "sftp-common.h"
-
-/* Clear contents of attributes structure */
-void
-attrib_clear(Attrib *a)
-{
- a->flags = 0;
- a->size = 0;
- a->uid = 0;
- a->gid = 0;
- a->perm = 0;
- a->atime = 0;
- a->mtime = 0;
-}
-
-/* Convert from struct stat to filexfer attribs */
-void
-stat_to_attrib(const struct stat *st, Attrib *a)
-{
- attrib_clear(a);
- a->flags = 0;
- a->flags |= SSH2_FILEXFER_ATTR_SIZE;
- a->size = st->st_size;
- a->flags |= SSH2_FILEXFER_ATTR_UIDGID;
- a->uid = st->st_uid;
- a->gid = st->st_gid;
- a->flags |= SSH2_FILEXFER_ATTR_PERMISSIONS;
- a->perm = st->st_mode;
- a->flags |= SSH2_FILEXFER_ATTR_ACMODTIME;
- a->atime = st->st_atime;
- a->mtime = st->st_mtime;
-}
-
-/* Convert from filexfer attribs to struct stat */
-void
-attrib_to_stat(const Attrib *a, struct stat *st)
-{
- memset(st, 0, sizeof(*st));
-
- if (a->flags & SSH2_FILEXFER_ATTR_SIZE)
- st->st_size = a->size;
- if (a->flags & SSH2_FILEXFER_ATTR_UIDGID) {
- st->st_uid = a->uid;
- st->st_gid = a->gid;
- }
- if (a->flags & SSH2_FILEXFER_ATTR_PERMISSIONS)
- st->st_mode = a->perm;
- if (a->flags & SSH2_FILEXFER_ATTR_ACMODTIME) {
- st->st_atime = a->atime;
- st->st_mtime = a->mtime;
- }
-}
-
-/* Decode attributes in buffer */
-Attrib *
-decode_attrib(Buffer *b)
-{
- static Attrib a;
-
- attrib_clear(&a);
- a.flags = buffer_get_int(b);
- if (a.flags & SSH2_FILEXFER_ATTR_SIZE)
- a.size = buffer_get_int64(b);
- if (a.flags & SSH2_FILEXFER_ATTR_UIDGID) {
- a.uid = buffer_get_int(b);
- a.gid = buffer_get_int(b);
- }
- if (a.flags & SSH2_FILEXFER_ATTR_PERMISSIONS)
- a.perm = buffer_get_int(b);
- if (a.flags & SSH2_FILEXFER_ATTR_ACMODTIME) {
- a.atime = buffer_get_int(b);
- a.mtime = buffer_get_int(b);
- }
- /* vendor-specific extensions */
- if (a.flags & SSH2_FILEXFER_ATTR_EXTENDED) {
- char *type, *data;
- int i, count;
-
- count = buffer_get_int(b);
- for (i = 0; i < count; i++) {
- type = buffer_get_string(b, NULL);
- data = buffer_get_string(b, NULL);
- debug3("Got file attribute \"%s\"", type);
- xfree(type);
- xfree(data);
- }
- }
- return &a;
-}
-
-/* Encode attributes to buffer */
-void
-encode_attrib(Buffer *b, const Attrib *a)
-{
- buffer_put_int(b, a->flags);
- if (a->flags & SSH2_FILEXFER_ATTR_SIZE)
- buffer_put_int64(b, a->size);
- if (a->flags & SSH2_FILEXFER_ATTR_UIDGID) {
- buffer_put_int(b, a->uid);
- buffer_put_int(b, a->gid);
- }
- if (a->flags & SSH2_FILEXFER_ATTR_PERMISSIONS)
- buffer_put_int(b, a->perm);
- if (a->flags & SSH2_FILEXFER_ATTR_ACMODTIME) {
- buffer_put_int(b, a->atime);
- buffer_put_int(b, a->mtime);
- }
-}
-
-/* Convert from SSH2_FX_ status to text error message */
-const char *
-fx2txt(int status)
-{
- switch (status) {
- case SSH2_FX_OK:
- return(gettext("No error"));
- case SSH2_FX_EOF:
- return(gettext("End of file"));
- case SSH2_FX_NO_SUCH_FILE:
- return(gettext("No such file or directory"));
- case SSH2_FX_PERMISSION_DENIED:
- return(gettext("Permission denied"));
- case SSH2_FX_FAILURE:
- return(gettext("Failure"));
- case SSH2_FX_BAD_MESSAGE:
- return(gettext("Bad message"));
- case SSH2_FX_NO_CONNECTION:
- return(gettext("No connection"));
- case SSH2_FX_CONNECTION_LOST:
- return(gettext("Connection lost"));
- case SSH2_FX_OP_UNSUPPORTED:
- return(gettext("Operation unsupported"));
- default:
- return(gettext("Unknown status"));
- }
- /* NOTREACHED */
-}
-
-/*
- * drwxr-xr-x 5 markus markus 1024 Jan 13 18:39 .ssh
- */
-char *
-ls_file(const char *name, const struct stat *st, int remote)
-{
- int ulen, glen, sz = 0;
- struct passwd *pw;
- struct group *gr;
- struct tm *ltime = localtime(&st->st_mtime);
- char *user, *group;
- char buf[1024], mode[11+1], tbuf[12+1], ubuf[11+1], gbuf[11+1];
-
- strmode(st->st_mode, mode);
- if (!remote && (pw = getpwuid(st->st_uid)) != NULL) {
- user = pw->pw_name;
- } else {
- snprintf(ubuf, sizeof ubuf, "%u", (u_int)st->st_uid);
- user = ubuf;
- }
- if (!remote && (gr = getgrgid(st->st_gid)) != NULL) {
- group = gr->gr_name;
- } else {
- snprintf(gbuf, sizeof gbuf, "%u", (u_int)st->st_gid);
- group = gbuf;
- }
- if (ltime != NULL) {
- if (time(NULL) - st->st_mtime < (365*24*60*60)/2)
- sz = strftime(tbuf, sizeof tbuf, "%b %e %H:%M", ltime);
- else
- sz = strftime(tbuf, sizeof tbuf, "%b %e %Y", ltime);
- }
- if (sz == 0)
- tbuf[0] = '\0';
- ulen = MAX(strlen(user), 8);
- glen = MAX(strlen(group), 8);
- snprintf(buf, sizeof buf, "%s %3u %-*s %-*s %8llu %s %s", mode,
- (u_int)st->st_nlink, ulen, user, glen, group,
- (unsigned long long)st->st_size, tbuf, name);
- return xstrdup(buf);
-}
diff --git a/usr/src/cmd/ssh/libssh/common/ssh-dss.c b/usr/src/cmd/ssh/libssh/common/ssh-dss.c
deleted file mode 100644
index f73a91672f..0000000000
--- a/usr/src/cmd/ssh/libssh/common/ssh-dss.c
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Copyright (c) 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: ssh-dss.c,v 1.17 2002/07/04 10:41:47 markus Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <openssl/bn.h>
-#include <openssl/evp.h>
-
-#include "xmalloc.h"
-#include "buffer.h"
-#include "bufaux.h"
-#include "compat.h"
-#include "log.h"
-#include "key.h"
-#include "ssh-dss.h"
-
-#define INTBLOB_LEN 20
-#define SIGBLOB_LEN (2*INTBLOB_LEN)
-
-int
-ssh_dss_sign(Key *key, u_char **sigp, u_int *lenp,
- u_char *data, u_int datalen)
-{
- DSA_SIG *sig;
- const EVP_MD *evp_md = EVP_sha1();
- EVP_MD_CTX md;
- u_char digest[EVP_MAX_MD_SIZE], sigblob[SIGBLOB_LEN];
- u_int rlen, slen, len, dlen;
- Buffer b;
-
- if (key == NULL || key->type != KEY_DSA || key->dsa == NULL) {
- error("ssh_dss_sign: no DSA key");
- return -1;
- }
- EVP_DigestInit(&md, evp_md);
- EVP_DigestUpdate(&md, data, datalen);
- EVP_DigestFinal(&md, digest, &dlen);
-
- sig = DSA_do_sign(digest, dlen, key->dsa);
- memset(digest, 'd', sizeof(digest));
-
- if (sig == NULL) {
- error("ssh_dss_sign: sign failed");
- return -1;
- }
-
- rlen = BN_num_bytes(sig->r);
- slen = BN_num_bytes(sig->s);
- if (rlen > INTBLOB_LEN || slen > INTBLOB_LEN) {
- error("bad sig size %u %u", rlen, slen);
- DSA_SIG_free(sig);
- return -1;
- }
- memset(sigblob, 0, SIGBLOB_LEN);
- BN_bn2bin(sig->r, sigblob+ SIGBLOB_LEN - INTBLOB_LEN - rlen);
- BN_bn2bin(sig->s, sigblob+ SIGBLOB_LEN - slen);
- DSA_SIG_free(sig);
-
- if (datafellows & SSH_BUG_SIGBLOB) {
- if (lenp != NULL)
- *lenp = SIGBLOB_LEN;
- if (sigp != NULL) {
- *sigp = xmalloc(SIGBLOB_LEN);
- memcpy(*sigp, sigblob, SIGBLOB_LEN);
- }
- } else {
- /* ietf-drafts */
- buffer_init(&b);
- buffer_put_cstring(&b, "ssh-dss");
- buffer_put_string(&b, sigblob, SIGBLOB_LEN);
- len = buffer_len(&b);
- if (lenp != NULL)
- *lenp = len;
- if (sigp != NULL) {
- *sigp = xmalloc(len);
- memcpy(*sigp, buffer_ptr(&b), len);
- }
- buffer_free(&b);
- }
- return 0;
-}
-int
-ssh_dss_verify(Key *key, u_char *signature, u_int signaturelen,
- u_char *data, u_int datalen)
-{
- DSA_SIG *sig;
- const EVP_MD *evp_md = EVP_sha1();
- EVP_MD_CTX md;
- u_char digest[EVP_MAX_MD_SIZE], *sigblob;
- u_int len, dlen;
- int rlen, ret;
- Buffer b;
-
- if (key == NULL || key->type != KEY_DSA || key->dsa == NULL) {
- error("ssh_dss_verify: no DSA key");
- return -1;
- }
-
- /* fetch signature */
- if (datafellows & SSH_BUG_SIGBLOB) {
- sigblob = signature;
- len = signaturelen;
- } else {
- /* ietf-drafts */
- char *ktype;
- buffer_init(&b);
- buffer_append(&b, signature, signaturelen);
- ktype = buffer_get_string(&b, NULL);
- if (strcmp("ssh-dss", ktype) != 0) {
- error("ssh_dss_verify: cannot handle type %s", ktype);
- buffer_free(&b);
- xfree(ktype);
- return -1;
- }
- xfree(ktype);
- sigblob = buffer_get_string(&b, &len);
- rlen = buffer_len(&b);
- buffer_free(&b);
- if (rlen != 0) {
- error("ssh_dss_verify: "
- "remaining bytes in signature %d", rlen);
- xfree(sigblob);
- return -1;
- }
- }
-
- if (len != SIGBLOB_LEN) {
- fatal("bad sigbloblen %u != SIGBLOB_LEN", len);
- }
-
- /* parse signature */
- if ((sig = DSA_SIG_new()) == NULL)
- fatal("ssh_dss_verify: DSA_SIG_new failed");
- if ((sig->r = BN_new()) == NULL)
- fatal("ssh_dss_verify: BN_new failed");
- if ((sig->s = BN_new()) == NULL)
- fatal("ssh_dss_verify: BN_new failed");
- BN_bin2bn(sigblob, INTBLOB_LEN, sig->r);
- BN_bin2bn(sigblob+ INTBLOB_LEN, INTBLOB_LEN, sig->s);
-
- if (!(datafellows & SSH_BUG_SIGBLOB)) {
- memset(sigblob, 0, len);
- xfree(sigblob);
- }
-
- /* sha1 the data */
- EVP_DigestInit(&md, evp_md);
- EVP_DigestUpdate(&md, data, datalen);
- EVP_DigestFinal(&md, digest, &dlen);
-
- ret = DSA_do_verify(digest, dlen, sig, key->dsa);
- memset(digest, 'd', sizeof(digest));
-
- DSA_SIG_free(sig);
-
- debug("ssh_dss_verify: signature %s",
- ret == 1 ? "correct" : ret == 0 ? "incorrect" : "error");
- return ret;
-}
diff --git a/usr/src/cmd/ssh/libssh/common/ssh-gss.c b/usr/src/cmd/ssh/libssh/common/ssh-gss.c
deleted file mode 100644
index 37aeb04873..0000000000
--- a/usr/src/cmd/ssh/libssh/common/ssh-gss.c
+++ /dev/null
@@ -1,782 +0,0 @@
-/*
- * Copyright (c) 2001-2003 Simon Wilkinson. All rights reserved. *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR `AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-
-#ifdef GSSAPI
-
-#include "ssh.h"
-#include "ssh2.h"
-#include "xmalloc.h"
-#include "buffer.h"
-#include "bufaux.h"
-#include "packet.h"
-#include "compat.h"
-#include <openssl/evp.h>
-#include "cipher.h"
-#include "kex.h"
-#include "log.h"
-#include "compat.h"
-#include "xlist.h"
-
-#include <netdb.h>
-
-#include "ssh-gss.h"
-
-#ifdef HAVE_GSS_OID_TO_MECH
-#include <gssapi/gssapi_ext.h>
-#endif /* HAVE_GSS_OID_TO_MECH */
-
-typedef struct {
- char *encoded;
- gss_OID oid;
-} ssh_gss_kex_mapping;
-
-static ssh_gss_kex_mapping **gss_enc2oid = NULL;
-
-static void ssh_gssapi_encode_oid_for_kex(const gss_OID oid, char **enc_name);
-static char *ssh_gssapi_make_kexalgs_list(gss_OID_set mechs,
- const char *old_kexalgs);
-
-/*
- * Populate gss_enc2oid table and return list of kexnames.
- *
- * If called with both mechs == GSS_C_NULL_OID_SET and kexname_list == NULL
- * then cached gss_enc2oid table is cleaned up.
- */
-void
-ssh_gssapi_mech_oids_to_kexnames(const gss_OID_set mechs, char **kexname_list)
-{
- ssh_gss_kex_mapping **new_gss_enc2oid, **p;
- Buffer buf;
- char *enc_name;
- int i;
-
- if (kexname_list != NULL)
- *kexname_list = NULL; /* default to failed */
-
- if (mechs != GSS_C_NULL_OID_SET || kexname_list == NULL) {
- /* Cleanup gss_enc2oid table */
- for (p = gss_enc2oid; p != NULL && *p != NULL; p++) {
- if ((*p)->encoded)
- xfree((*p)->encoded);
- ssh_gssapi_release_oid(&(*p)->oid);
- xfree(*p);
- }
- if (gss_enc2oid)
- xfree(gss_enc2oid);
- }
-
- if (mechs == GSS_C_NULL_OID_SET && kexname_list == NULL)
- return; /* nothing left to do */
-
- if (mechs) {
- gss_OID mech;
- /* Populate gss_enc2oid table */
- new_gss_enc2oid = xmalloc(sizeof (ssh_gss_kex_mapping *) *
- (mechs->count + 1));
- memset(new_gss_enc2oid, 0,
- sizeof (ssh_gss_kex_mapping *) * (mechs->count + 1));
-
- for (i = 0; i < mechs->count; i++) {
- mech = &mechs->elements[i];
- ssh_gssapi_encode_oid_for_kex((const gss_OID)mech,
- &enc_name);
-
- if (!enc_name)
- continue;
-
- new_gss_enc2oid[i] =
- xmalloc(sizeof (ssh_gss_kex_mapping));
- (new_gss_enc2oid[i])->encoded = enc_name;
- (new_gss_enc2oid[i])->oid =
- ssh_gssapi_dup_oid(&mechs->elements[i]);
- }
-
- /* Do this last to avoid run-ins with fatal_cleanups */
- gss_enc2oid = new_gss_enc2oid;
- }
-
- if (!kexname_list)
- return; /* nothing left to do */
-
- /* Make kex name list */
- buffer_init(&buf);
- for (p = gss_enc2oid; p && *p; p++) {
- buffer_put_char(&buf, ',');
- buffer_append(&buf, (*p)->encoded, strlen((*p)->encoded));
- }
-
- if (buffer_len(&buf) == 0) {
- buffer_free(&buf);
- return;
- }
-
- buffer_consume(&buf, 1); /* consume leading ',' */
- buffer_put_char(&buf, '\0');
-
- *kexname_list = xstrdup(buffer_ptr(&buf));
- buffer_free(&buf);
-}
-
-void
-ssh_gssapi_mech_oid_to_kexname(const gss_OID mech, char **kexname)
-{
- ssh_gss_kex_mapping **p;
-
- if (mech == GSS_C_NULL_OID || !kexname)
- return;
-
- *kexname = NULL; /* default to not found */
- if (gss_enc2oid) {
- for (p = gss_enc2oid; p && *p; p++) {
- if (mech->length == (*p)->oid->length &&
- memcmp(mech->elements, (*p)->oid->elements,
- mech->length) == 0)
- *kexname = xstrdup((*p)->encoded);
- }
- }
-
- if (*kexname)
- return; /* found */
-
- ssh_gssapi_encode_oid_for_kex(mech, kexname);
-}
-
-void
-ssh_gssapi_oid_of_kexname(const char *kexname, gss_OID *mech)
-{
- ssh_gss_kex_mapping **p;
-
- if (!mech || !kexname || !*kexname)
- return;
-
- *mech = GSS_C_NULL_OID; /* default to not found */
-
- if (!gss_enc2oid)
- return;
-
- for (p = gss_enc2oid; p && *p; p++) {
- if (strcmp(kexname, (*p)->encoded) == 0) {
- *mech = (*p)->oid;
- return;
- }
- }
-}
-
-static
-void
-ssh_gssapi_encode_oid_for_kex(const gss_OID oid, char **enc_name)
-{
- Buffer buf;
- OM_uint32 oidlen;
- uint_t enclen;
- const EVP_MD *evp_md = EVP_md5();
- EVP_MD_CTX md;
- uchar_t digest[EVP_MAX_MD_SIZE];
- char *encoded;
-
- if (oid == GSS_C_NULL_OID || !enc_name)
- return;
-
- *enc_name = NULL;
-
- oidlen = oid->length;
-
- /* No GSS mechs have OIDs as long as 128 -- simplify DER encoding */
- if (oidlen > 128)
- return; /* fail gracefully */
-
- /*
- * NOTE: If we need to support SSH_BUG_GSSAPI_BER this is where
- * we'd do it.
- *
- * That means using "Se3H81ismmOC3OE+FwYCiQ==" for the Kerberos
- * V mech and "N3+k7/4wGxHyuP8Yxi4RhA==" for the GSI mech. Ick.
- */
-
- buffer_init(&buf);
-
- /* UNIVERSAL class tag for OBJECT IDENTIFIER */
- buffer_put_char(&buf, 0x06);
- buffer_put_char(&buf, oidlen); /* one octet DER length -- see above */
-
- /* OID elements */
- buffer_append(&buf, oid->elements, oidlen);
-
- /* Make digest */
- EVP_DigestInit(&md, evp_md);
- EVP_DigestUpdate(&md, buffer_ptr(&buf), buffer_len(&buf));
- EVP_DigestFinal(&md, digest, NULL);
- buffer_free(&buf);
-
- /* Base 64 encoding */
- encoded = xmalloc(EVP_MD_size(evp_md)*2);
- enclen = __b64_ntop(digest, EVP_MD_size(evp_md),
- encoded, EVP_MD_size(evp_md) * 2);
- buffer_init(&buf);
- buffer_append(&buf, KEX_GSS_SHA1, sizeof (KEX_GSS_SHA1) - 1);
- buffer_append(&buf, encoded, enclen);
- buffer_put_char(&buf, '\0');
-
- debug2("GSS-API Mechanism encoded as %s", encoded);
- xfree(encoded);
-
- *enc_name = xstrdup(buffer_ptr(&buf));
- buffer_free(&buf);
-}
-
-static char *
-ssh_gssapi_make_kexalgs_list(gss_OID_set mechs, const char *old_kexalgs)
-{
- char *gss_kexalgs, *new_kexalgs;
- int len;
-
- if (mechs == GSS_C_NULL_OID_SET)
- return (xstrdup(old_kexalgs)); /* never null */
-
- ssh_gssapi_mech_oids_to_kexnames(mechs, &gss_kexalgs);
-
- if (gss_kexalgs == NULL || *gss_kexalgs == '\0')
- return (xstrdup(old_kexalgs)); /* never null */
-
- if (old_kexalgs == NULL || *old_kexalgs == '\0')
- return (gss_kexalgs);
-
- len = strlen(old_kexalgs) + strlen(gss_kexalgs) + 2;
- new_kexalgs = xmalloc(len);
- (void) snprintf(new_kexalgs, len, "%s,%s", gss_kexalgs, old_kexalgs);
- xfree(gss_kexalgs);
-
- return (new_kexalgs);
-}
-
-void
-ssh_gssapi_modify_kex(Kex *kex, gss_OID_set mechs, char **proposal)
-{
- char *kexalgs, *orig_kexalgs, *p;
- char **hostalg, *orig_hostalgs, *new_hostalgs;
- char **hostalgs;
- gss_OID_set dup_mechs;
- OM_uint32 maj, min;
- int i;
-
- if (kex == NULL || proposal == NULL ||
- proposal[PROPOSAL_KEX_ALGS] == NULL) {
- fatal("INTERNAL ERROR (%s)", __func__);
- }
-
- orig_hostalgs = proposal[PROPOSAL_SERVER_HOST_KEY_ALGS];
-
- if (kex->mechs == GSS_C_NULL_OID_SET && mechs == GSS_C_NULL_OID_SET)
- return; /* didn't offer GSS last time, not offering now */
-
- if (kex->mechs == GSS_C_NULL_OID_SET || mechs == GSS_C_NULL_OID_SET)
- goto mod_offer; /* didn't offer last time or not offering now */
-
- /* Check if mechs is congruent to kex->mechs (last offered) */
- if (kex->mechs->count == mechs->count) {
- int present, matches = 0;
-
- for (i = 0; i < mechs->count; i++) {
- maj = gss_test_oid_set_member(&min,
- &kex->mechs->elements[i], mechs, &present);
-
- if (GSS_ERROR(maj)) {
- mechs = GSS_C_NULL_OID_SET;
- break;
- }
-
- matches += (present) ? 1 : 0;
- }
-
- if (matches == kex->mechs->count)
- return; /* no change in offer from last time */
- }
-
-mod_offer:
- /*
- * Remove previously offered mechs from PROPOSAL_KEX_ALGS proposal
- *
- * ASSUMPTION: GSS-API kex algs always go in front, so removing
- * them is a matter of skipping them.
- */
- p = kexalgs = orig_kexalgs = proposal[PROPOSAL_KEX_ALGS];
- while (p != NULL && *p != '\0' &&
- strncmp(p, KEX_GSS_SHA1, strlen(KEX_GSS_SHA1)) == 0) {
-
- if ((p = strchr(p, ',')) == NULL)
- break;
- p++;
- kexalgs = p;
-
- }
- kexalgs = proposal[PROPOSAL_KEX_ALGS] = xstrdup(kexalgs);
- xfree(orig_kexalgs);
-
- (void) gss_release_oid_set(&min, &kex->mechs); /* ok if !kex->mechs */
-
- /* Not offering GSS kex algorithms now -> all done */
- if (mechs == GSS_C_NULL_OID_SET)
- return;
-
- /* Remember mechs we're offering */
- maj = gss_create_empty_oid_set(&min, &dup_mechs);
- if (GSS_ERROR(maj))
- return;
- for (i = 0; i < mechs->count; i++) {
- maj = gss_add_oid_set_member(&min, &mechs->elements[i],
- &dup_mechs);
-
- if (GSS_ERROR(maj)) {
- (void) gss_release_oid_set(&min, &dup_mechs);
- return;
- }
- }
-
- /* Add mechs to kex algorithms ... */
- proposal[PROPOSAL_KEX_ALGS] = ssh_gssapi_make_kexalgs_list(mechs,
- kexalgs);
- xfree(kexalgs);
- kex->mechs = dup_mechs; /* remember what we offer now */
-
- /*
- * ... and add null host key alg, if it wasn't there before, but
- * not if we're the server and we have other host key algs to
- * offer.
- *
- * NOTE: Never remove "null" host key alg once added.
- */
- if (orig_hostalgs == NULL || *orig_hostalgs == '\0') {
- proposal[PROPOSAL_SERVER_HOST_KEY_ALGS] = xstrdup("null");
- } else if (!kex->server) {
- hostalgs = xsplit(orig_hostalgs, ',');
- for (hostalg = hostalgs; *hostalg != NULL; hostalg++) {
- if (strcmp(*hostalg, "null") == 0) {
- xfree_split_list(hostalgs);
- return;
- }
- }
- xfree_split_list(hostalgs);
-
- if (kex->mechs != GSS_C_NULL_OID_SET) {
- int len;
-
- len = strlen(orig_hostalgs) + sizeof (",null");
- new_hostalgs = xmalloc(len);
- (void) snprintf(new_hostalgs, len, "%s,null",
- orig_hostalgs);
- proposal[PROPOSAL_SERVER_HOST_KEY_ALGS] = new_hostalgs;
- }
-
- xfree(orig_hostalgs);
- }
-}
-
-/*
- * Yes, we harcode OIDs for some things, for now it's all we can do.
- *
- * We have to reference particular mechanisms due to lack of generality
- * in the GSS-API in several areas: authorization, mapping principal
- * names to usernames, "storing" delegated credentials, and discovering
- * whether a mechanism is a pseudo-mechanism that negotiates mechanisms.
- *
- * Even if they were in some header file or if __gss_mech_to_oid()
- * and/or __gss_oid_to_mech() were standard we'd still have to hardcode
- * the mechanism names, and since the mechanisms have no standard names
- * other than their OIDs it's actually worse [less portable] to hardcode
- * names than OIDs, so we hardcode OIDs.
- *
- * SPNEGO is a difficult problem though -- it MUST NOT be used in SSHv2,
- * but that's true of all possible pseudo-mechanisms that can perform
- * mechanism negotiation, and SPNEGO could have new OIDs in the future.
- * Ideally we could query each mechanism for its feature set and then
- * ignore any mechanisms that negotiate mechanisms, but, alas, there's
- * no interface to do that.
- *
- * In the future, if the necessary generic GSS interfaces for the issues
- * listed above are made available (even if they differ by platform, as
- * we can expect authorization interfaces will), then we can stop
- * referencing specific mechanism OIDs here.
- */
-int
-ssh_gssapi_is_spnego(gss_OID oid)
-{
- return (oid->length == 6 &&
- memcmp("\053\006\001\005\005\002", oid->elements, 6) == 0);
-}
-
-int
-ssh_gssapi_is_krb5(gss_OID oid)
-{
- return (oid->length == 9 &&
- memcmp("\x2A\x86\x48\x86\xF7\x12\x01\x02\x02",
- oid->elements, 9) == 0);
-}
-
-int
-ssh_gssapi_is_dh(gss_OID oid)
-{
- return (oid->length == 9 &&
- memcmp("\053\006\004\001\052\002\032\002\005",
- oid->elements, 9) == 0);
-}
-
-int
-ssh_gssapi_is_gsi(gss_OID oid)
-{
- return (oid->length == 9 &&
- memcmp("\x2B\x06\x01\x04\x01\x9B\x50\x01\x01",
- oid->elements, 9) == 0);
-}
-
-const char *
-ssh_gssapi_oid_to_name(gss_OID oid)
-{
-#ifdef HAVE_GSS_OID_TO_MECH
- return (__gss_oid_to_mech(oid));
-#else
- if (ssh_gssapi_is_krb5(oid))
- return ("Kerberos");
- if (ssh_gssapi_is_gsi(oid))
- return ("GSI");
- return ("(unknown)");
-#endif /* HAVE_GSS_OID_TO_MECH */
-}
-
-char *
-ssh_gssapi_oid_to_str(gss_OID oid)
-{
-#ifdef HAVE_GSS_OID_TO_STR
- gss_buffer_desc str_buf;
- char *str;
- OM_uint32 maj, min;
-
- maj = gss_oid_to_str(&min, oid, &str_buf);
-
- if (GSS_ERROR(maj))
- return (xstrdup("<gss_oid_to_str() failed>"));
-
- str = xmalloc(str_buf.length + 1);
- memset(str, 0, str_buf.length + 1);
- strlcpy(str, str_buf.value, str_buf.length + 1);
- (void) gss_release_buffer(&min, &str_buf);
-
- return (str);
-#else
- return (xstrdup("<gss_oid_to_str() unsupported>"));
-#endif /* HAVE_GSS_OID_TO_STR */
-}
-
-/* Check that the OID in a data stream matches that in the context */
-int
-ssh_gssapi_check_mech_oid(Gssctxt *ctx, void *data, size_t len)
-{
-
- return (ctx != NULL && ctx->desired_mech != GSS_C_NULL_OID &&
- ctx->desired_mech->length == len &&
- memcmp(ctx->desired_mech->elements, data, len) == 0);
-}
-
-/* Set the contexts OID from a data stream */
-void
-ssh_gssapi_set_oid_data(Gssctxt *ctx, void *data, size_t len)
-{
- if (ctx->actual_mech != GSS_C_NULL_OID) {
- xfree(ctx->actual_mech->elements);
- xfree(ctx->actual_mech);
- }
- ctx->actual_mech = xmalloc(sizeof (gss_OID_desc));
- ctx->actual_mech->length = len;
- ctx->actual_mech->elements = xmalloc(len);
- memcpy(ctx->actual_mech->elements, data, len);
-}
-
-/* Set the contexts OID */
-void
-ssh_gssapi_set_oid(Gssctxt *ctx, gss_OID oid)
-{
- ssh_gssapi_set_oid_data(ctx, oid->elements, oid->length);
-}
-
-/* All this effort to report an error ... */
-
-void
-ssh_gssapi_error(Gssctxt *ctxt, const char *where)
-{
- char *errmsg = ssh_gssapi_last_error(ctxt, NULL, NULL);
-
- if (where != NULL)
- debug("GSS-API error while %s: %s", where, errmsg);
- else
- debug("GSS-API error: %s", errmsg);
-
- /* ssh_gssapi_last_error() can't return NULL */
- xfree(errmsg);
-}
-
-char *
-ssh_gssapi_last_error(Gssctxt *ctxt, OM_uint32 *major_status,
- OM_uint32 *minor_status)
-{
- OM_uint32 lmin, more;
- OM_uint32 maj, min;
- gss_OID mech = GSS_C_NULL_OID;
- gss_buffer_desc msg;
- Buffer b;
- char *ret;
-
- buffer_init(&b);
-
- if (ctxt) {
- /* Get status codes from the Gssctxt */
- maj = ctxt->major;
- min = ctxt->minor;
- /* Output them if desired */
- if (major_status)
- *major_status = maj;
- if (minor_status)
- *minor_status = min;
- /* Get mechanism for minor status display */
- mech = (ctxt->actual_mech != GSS_C_NULL_OID) ?
- ctxt->actual_mech : ctxt->desired_mech;
- } else if (major_status && minor_status) {
- maj = *major_status;
- min = *major_status;
- } else {
- maj = GSS_S_COMPLETE;
- min = 0;
- }
-
- more = 0;
- /* The GSSAPI error */
- do {
- gss_display_status(&lmin, maj, GSS_C_GSS_CODE,
- GSS_C_NULL_OID, &more, &msg);
-
- buffer_append(&b, msg.value, msg.length);
- buffer_put_char(&b, '\n');
- gss_release_buffer(&lmin, &msg);
- } while (more != 0);
-
- /* The mechanism specific error */
- do {
- /*
- * If mech == GSS_C_NULL_OID we may get the default
- * mechanism, whatever that is, and that may not be
- * useful.
- */
- gss_display_status(&lmin, min, GSS_C_MECH_CODE, mech, &more,
- &msg);
-
- buffer_append(&b, msg.value, msg.length);
- buffer_put_char(&b, '\n');
-
- gss_release_buffer(&lmin, &msg);
- } while (more != 0);
-
- buffer_put_char(&b, '\0');
- ret = xstrdup(buffer_ptr(&b));
- buffer_free(&b);
-
- return (ret);
-}
-
-/*
- * Initialise our GSSAPI context. We use this opaque structure to contain all
- * of the data which both the client and server need to persist across
- * {accept,init}_sec_context calls, so that when we do it from the userauth
- * stuff life is a little easier
- */
-void
-ssh_gssapi_build_ctx(Gssctxt **ctx, int client, gss_OID mech)
-{
- Gssctxt *newctx;
-
-
- newctx = (Gssctxt*)xmalloc(sizeof (Gssctxt));
- memset(newctx, 0, sizeof (Gssctxt));
-
-
- newctx->local = client;
- newctx->desired_mech = ssh_gssapi_dup_oid(mech);
-
- /* This happens to be redundant given the memset() above */
- newctx->major = GSS_S_COMPLETE;
- newctx->context = GSS_C_NO_CONTEXT;
- newctx->actual_mech = GSS_C_NULL_OID;
- newctx->desired_name = GSS_C_NO_NAME;
- newctx->src_name = GSS_C_NO_NAME;
- newctx->dst_name = GSS_C_NO_NAME;
- newctx->creds = GSS_C_NO_CREDENTIAL;
- newctx->deleg_creds = GSS_C_NO_CREDENTIAL;
-
- newctx->default_creds = (*ctx != NULL) ? (*ctx)->default_creds : 0;
-
- ssh_gssapi_delete_ctx(ctx);
-
- *ctx = newctx;
-}
-
-gss_OID
-ssh_gssapi_dup_oid(gss_OID oid)
-{
- gss_OID new_oid;
-
- new_oid = xmalloc(sizeof (gss_OID_desc));
-
- new_oid->elements = xmalloc(oid->length);
- new_oid->length = oid->length;
- memcpy(new_oid->elements, oid->elements, oid->length);
-
- return (new_oid);
-}
-
-gss_OID
-ssh_gssapi_make_oid(size_t length, void *elements)
-{
- gss_OID_desc oid;
-
- oid.length = length;
- oid.elements = elements;
-
- return (ssh_gssapi_dup_oid(&oid));
-}
-
-void
-ssh_gssapi_release_oid(gss_OID *oid)
-{
- OM_uint32 min;
-
- if (oid && *oid == GSS_C_NULL_OID)
- return;
- (void) gss_release_oid(&min, oid);
-
- if (*oid == GSS_C_NULL_OID)
- return; /* libgss did own this gss_OID and released it */
-
- xfree((*oid)->elements);
- xfree(*oid);
- *oid = GSS_C_NULL_OID;
-}
-
-struct gss_name {
- gss_OID name_type;
- gss_buffer_t external_name;
- gss_OID mech_type;
- void *mech_name;
-};
-
-/* Delete our context, providing it has been built correctly */
-void
-ssh_gssapi_delete_ctx(Gssctxt **ctx)
-{
- OM_uint32 ms;
-
- if ((*ctx) == NULL)
- return;
-
- if ((*ctx)->context != GSS_C_NO_CONTEXT)
- gss_delete_sec_context(&ms, &(*ctx)->context, GSS_C_NO_BUFFER);
-#if 0
- /* XXX */
- if ((*ctx)->desired_mech != GSS_C_NULL_OID)
- ssh_gssapi_release_oid(&(*ctx)->desired_mech);
-#endif
- if ((*ctx)->actual_mech != GSS_C_NULL_OID)
- (void) ssh_gssapi_release_oid(&(*ctx)->actual_mech);
- if ((*ctx)->desired_name != GSS_C_NO_NAME)
- gss_release_name(&ms, &(*ctx)->desired_name);
-#if 0
- if ((*ctx)->src_name != GSS_C_NO_NAME)
- gss_release_name(&ms, &(*ctx)->src_name);
-#endif
- if ((*ctx)->dst_name != GSS_C_NO_NAME)
- gss_release_name(&ms, &(*ctx)->dst_name);
- if ((*ctx)->creds != GSS_C_NO_CREDENTIAL)
- gss_release_cred(&ms, &(*ctx)->creds);
- if ((*ctx)->deleg_creds != GSS_C_NO_CREDENTIAL)
- gss_release_cred(&ms, &(*ctx)->deleg_creds);
-
- xfree(*ctx);
- *ctx = NULL;
-}
-
-/* Create a GSS hostbased service principal name for a given server hostname */
-int
-ssh_gssapi_import_name(Gssctxt *ctx, const char *server_host)
-{
- gss_buffer_desc name_buf;
- int ret;
-
- /* Build target principal */
- name_buf.length = strlen(SSH_GSS_HOSTBASED_SERVICE) +
- strlen(server_host) + 1; /* +1 for '@' */
- name_buf.value = xmalloc(name_buf.length + 1); /* +1 for NUL */
- ret = snprintf(name_buf.value, name_buf.length + 1, "%s@%s",
- SSH_GSS_HOSTBASED_SERVICE, server_host);
-
- debug3("%s: snprintf() returned %d, expected %d", __func__, ret,
- name_buf.length);
-
- ctx->major = gss_import_name(&ctx->minor, &name_buf,
- GSS_C_NT_HOSTBASED_SERVICE, &ctx->desired_name);
-
- if (GSS_ERROR(ctx->major)) {
- ssh_gssapi_error(ctx, "calling GSS_Import_name()");
- return (0);
- }
-
- xfree(name_buf.value);
-
- return (1);
-}
-
-OM_uint32
-ssh_gssapi_get_mic(Gssctxt *ctx, gss_buffer_desc *buffer, gss_buffer_desc *hash)
-{
-
- ctx->major = gss_get_mic(&ctx->minor, ctx->context,
- GSS_C_QOP_DEFAULT, buffer, hash);
- if (GSS_ERROR(ctx->major))
- ssh_gssapi_error(ctx, "while getting MIC");
- return (ctx->major);
-}
-
-OM_uint32
-ssh_gssapi_verify_mic(Gssctxt *ctx, gss_buffer_desc *buffer,
- gss_buffer_desc *hash)
-{
- gss_qop_t qop;
-
- ctx->major = gss_verify_mic(&ctx->minor, ctx->context, buffer,
- hash, &qop);
- if (GSS_ERROR(ctx->major))
- ssh_gssapi_error(ctx, "while verifying MIC");
- return (ctx->major);
-}
-#endif /* GSSAPI */
diff --git a/usr/src/cmd/ssh/libssh/common/ssh-rsa.c b/usr/src/cmd/ssh/libssh/common/ssh-rsa.c
deleted file mode 100644
index bc89d41da1..0000000000
--- a/usr/src/cmd/ssh/libssh/common/ssh-rsa.c
+++ /dev/null
@@ -1,270 +0,0 @@
-/*
- * Copyright (c) 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: ssh-rsa.c,v 1.26 2002/08/27 17:13:56 stevesk Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <openssl/evp.h>
-#include <openssl/err.h>
-
-#include "xmalloc.h"
-#include "log.h"
-#include "buffer.h"
-#include "bufaux.h"
-#include "key.h"
-#include "ssh-rsa.h"
-#include "compat.h"
-#include "ssh.h"
-
-static int openssh_RSA_verify(int, u_char *, u_int, u_char *, u_int , RSA *);
-
-/* RSASSA-PKCS1-v1_5 (PKCS #1 v2.0 signature) with SHA1 */
-int
-ssh_rsa_sign(Key *key, u_char **sigp, u_int *lenp,
- u_char *data, u_int datalen)
-{
- const EVP_MD *evp_md;
- EVP_MD_CTX md;
- u_char digest[EVP_MAX_MD_SIZE], *sig;
- u_int slen, dlen, len;
- int ok, nid;
- Buffer b;
-
- if (key == NULL || key->type != KEY_RSA || key->rsa == NULL) {
- error("ssh_rsa_sign: no RSA key");
- return -1;
- }
- nid = (datafellows & SSH_BUG_RSASIGMD5) ? NID_md5 : NID_sha1;
- if ((evp_md = EVP_get_digestbynid(nid)) == NULL) {
- error("ssh_rsa_sign: EVP_get_digestbynid %d failed", nid);
- return -1;
- }
- EVP_DigestInit(&md, evp_md);
- EVP_DigestUpdate(&md, data, datalen);
- EVP_DigestFinal(&md, digest, &dlen);
-
- slen = RSA_size(key->rsa);
- sig = xmalloc(slen);
-
- ok = RSA_sign(nid, digest, dlen, sig, &len, key->rsa);
- memset(digest, 'd', sizeof(digest));
-
- if (ok != 1) {
- int ecode = ERR_get_error();
- error("ssh_rsa_sign: RSA_sign failed: %s",
- ERR_error_string(ecode, NULL));
- xfree(sig);
- return -1;
- }
- if (len < slen) {
- u_int diff = slen - len;
- debug("slen %u > len %u", slen, len);
- memmove(sig + diff, sig, len);
- memset(sig, 0, diff);
- } else if (len > slen) {
- error("ssh_rsa_sign: slen %u slen2 %u", slen, len);
- xfree(sig);
- return -1;
- }
- /* encode signature */
- buffer_init(&b);
- buffer_put_cstring(&b, "ssh-rsa");
- buffer_put_string(&b, sig, slen);
- len = buffer_len(&b);
- if (lenp != NULL)
- *lenp = len;
- if (sigp != NULL) {
- *sigp = xmalloc(len);
- memcpy(*sigp, buffer_ptr(&b), len);
- }
- buffer_free(&b);
- memset(sig, 's', slen);
- xfree(sig);
-
- return 0;
-}
-
-int
-ssh_rsa_verify(Key *key, u_char *signature, u_int signaturelen,
- u_char *data, u_int datalen)
-{
- Buffer b;
- const EVP_MD *evp_md;
- EVP_MD_CTX md;
- char *ktype;
- u_char digest[EVP_MAX_MD_SIZE], *sigblob;
- u_int len, dlen, modlen;
- int rlen, ret, nid;
-
- if (key == NULL || key->type != KEY_RSA || key->rsa == NULL) {
- error("ssh_rsa_verify: no RSA key");
- return -1;
- }
- if (BN_num_bits(key->rsa->n) < SSH_RSA_MINIMUM_MODULUS_SIZE) {
- error("ssh_rsa_verify: RSA modulus too small: %d < minimum %d bits",
- BN_num_bits(key->rsa->n), SSH_RSA_MINIMUM_MODULUS_SIZE);
- return -1;
- }
- buffer_init(&b);
- buffer_append(&b, signature, signaturelen);
- ktype = buffer_get_string(&b, NULL);
- if (strcmp("ssh-rsa", ktype) != 0) {
- error("ssh_rsa_verify: cannot handle type %s", ktype);
- buffer_free(&b);
- xfree(ktype);
- return -1;
- }
- xfree(ktype);
- sigblob = buffer_get_string(&b, &len);
- rlen = buffer_len(&b);
- buffer_free(&b);
- if (rlen != 0) {
- error("ssh_rsa_verify: remaining bytes in signature %d", rlen);
- xfree(sigblob);
- return -1;
- }
- /* RSA_verify expects a signature of RSA_size */
- modlen = RSA_size(key->rsa);
- if (len > modlen) {
- error("ssh_rsa_verify: len %u > modlen %u", len, modlen);
- xfree(sigblob);
- return -1;
- } else if (len < modlen) {
- u_int diff = modlen - len;
- debug("ssh_rsa_verify: add padding: modlen %u > len %u",
- modlen, len);
- sigblob = xrealloc(sigblob, modlen);
- memmove(sigblob + diff, sigblob, len);
- memset(sigblob, 0, diff);
- len = modlen;
- }
- nid = (datafellows & SSH_BUG_RSASIGMD5) ? NID_md5 : NID_sha1;
- if ((evp_md = EVP_get_digestbynid(nid)) == NULL) {
- error("ssh_rsa_verify: EVP_get_digestbynid %d failed", nid);
- xfree(sigblob);
- return -1;
- }
- EVP_DigestInit(&md, evp_md);
- EVP_DigestUpdate(&md, data, datalen);
- EVP_DigestFinal(&md, digest, &dlen);
-
- ret = openssh_RSA_verify(nid, digest, dlen, sigblob, len, key->rsa);
- memset(digest, 'd', sizeof(digest));
- memset(sigblob, 's', len);
- xfree(sigblob);
- debug("ssh_rsa_verify: signature %scorrect", (ret==0) ? "in" : "");
- return ret;
-}
-
-/*
- * See:
- * http://www.rsasecurity.com/rsalabs/pkcs/pkcs-1/
- * ftp://ftp.rsasecurity.com/pub/pkcs/pkcs-1/pkcs-1v2-1.asn
- */
-/*
- * id-sha1 OBJECT IDENTIFIER ::= { iso(1) identified-organization(3)
- * oiw(14) secsig(3) algorithms(2) 26 }
- */
-static const u_char id_sha1[] = {
- 0x30, 0x21, /* type Sequence, length 0x21 (33) */
- 0x30, 0x09, /* type Sequence, length 0x09 */
- 0x06, 0x05, /* type OID, length 0x05 */
- 0x2b, 0x0e, 0x03, 0x02, 0x1a, /* id-sha1 OID */
- 0x05, 0x00, /* NULL */
- 0x04, 0x14 /* Octet string, length 0x14 (20), followed by sha1 hash */
-};
-/*
- * id-md5 OBJECT IDENTIFIER ::= { iso(1) member-body(2) us(840)
- * rsadsi(113549) digestAlgorithm(2) 5 }
- */
-static const u_char id_md5[] = {
- 0x30, 0x20, /* type Sequence, length 0x20 (32) */
- 0x30, 0x0c, /* type Sequence, length 0x09 */
- 0x06, 0x08, /* type OID, length 0x05 */
- 0x2a, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x02, 0x05, /* id-md5 */
- 0x05, 0x00, /* NULL */
- 0x04, 0x10 /* Octet string, length 0x10 (16), followed by md5 hash */
-};
-
-static int
-openssh_RSA_verify(int type, u_char *hash, u_int hashlen,
- u_char *sigbuf, u_int siglen, RSA *rsa)
-{
- u_int ret, rsasize, oidlen = 0, hlen = 0;
- int len;
- const u_char *oid = NULL;
- u_char *decrypted = NULL;
-
- ret = 0;
- switch (type) {
- case NID_sha1:
- oid = id_sha1;
- oidlen = sizeof(id_sha1);
- hlen = 20;
- break;
- case NID_md5:
- oid = id_md5;
- oidlen = sizeof(id_md5);
- hlen = 16;
- break;
- default:
- goto done;
- break;
- }
- if (hashlen != hlen) {
- error("bad hashlen");
- goto done;
- }
- rsasize = RSA_size(rsa);
- if (siglen == 0 || siglen > rsasize) {
- error("bad siglen");
- goto done;
- }
- decrypted = xmalloc(rsasize);
- if ((len = RSA_public_decrypt(siglen, sigbuf, decrypted, rsa,
- RSA_PKCS1_PADDING)) < 0) {
- error("RSA_public_decrypt failed: %s",
- ERR_error_string(ERR_get_error(), NULL));
- goto done;
- }
- if (len != hlen + oidlen) {
- error("bad decrypted len: %d != %d + %d", len, hlen, oidlen);
- goto done;
- }
- if (memcmp(decrypted, oid, oidlen) != 0) {
- error("oid mismatch");
- goto done;
- }
- if (memcmp(decrypted + oidlen, hash, hlen) != 0) {
- error("hash mismatch");
- goto done;
- }
- ret = 1;
-done:
- if (decrypted)
- xfree(decrypted);
- return ret;
-}
diff --git a/usr/src/cmd/ssh/libssh/common/tildexpand.c b/usr/src/cmd/ssh/libssh/common/tildexpand.c
deleted file mode 100644
index 5fcd07ebe6..0000000000
--- a/usr/src/cmd/ssh/libssh/common/tildexpand.c
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: tildexpand.c,v 1.13 2002/06/23 03:25:50 deraadt Exp $");
-
-#include <libgen.h>
-
-#include "xmalloc.h"
-#include "log.h"
-#include "tildexpand.h"
-
-/*
- * Expands tildes in the file name. Returns data allocated by xmalloc.
- * Warning: this calls getpw*.
- */
-char *
-tilde_expand_filename(const char *filename, uid_t my_uid)
-{
- const char *cp;
- uint_t userlen;
- char *expanded;
- struct passwd *pw;
- char *pw_dir;
- char user[100];
- int len;
-
- /* Return immediately if no tilde. */
- if (filename[0] != '~')
- return (xstrdup(filename));
-
- /* Skip the tilde. */
- filename++;
-
- /* Find where the username ends. */
- cp = strchr(filename, '/');
- if (cp)
- userlen = cp - filename; /* Something after username. */
- else
- userlen = strlen(filename); /* Nothing after username. */
-
- /* This is the ~/xyz case with no ~username specification. */
- if (userlen == 0)
- pw = getpwuid(my_uid);
- else {
- /* Tilde refers to someone elses home directory. */
- if (userlen > sizeof (user) - 1)
- fatal("User name after tilde too long.");
- memcpy(user, filename, userlen);
- user[userlen] = 0;
- pw = getpwnam(user);
- }
-
- /* Use the HOME variable now. */
- if (pw == NULL) {
- debug("User account's password entry not found, trying to use "
- "the HOME variable.");
- if ((pw_dir = getenv("HOME")) == NULL) {
- fatal("User account's password entry not found and "
- "the HOME variable not set.");
- }
- } else {
- pw_dir = pw->pw_dir;
- }
-
- /* If referring to someones home directory, return it now. */
- if (cp == NULL) {
- /* Only home directory specified */
- return (xstrdup(pw_dir));
- }
-
- /* Build a path combining the specified directory and path. */
- len = strlen(pw_dir) + strlen(cp + 1) + 2;
- if (len > MAXPATHLEN)
- fatal("Home directory too long (%d > %d)", len - 1,
- MAXPATHLEN - 1);
-
- expanded = xmalloc(len);
- snprintf(expanded, len, "%s%s%s", pw_dir,
- strcmp(pw_dir, "/") ? "/" : "", cp + 1);
- return (expanded);
-}
diff --git a/usr/src/cmd/ssh/libssh/common/ttymodes.c b/usr/src/cmd/ssh/libssh/common/ttymodes.c
deleted file mode 100644
index b20ab34ff3..0000000000
--- a/usr/src/cmd/ssh/libssh/common/ttymodes.c
+++ /dev/null
@@ -1,480 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-/*
- * SSH2 tty modes support by Kevin Steves.
- * Copyright (c) 2001 Kevin Steves. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/*
- * Encoding and decoding of terminal modes in a portable way.
- * Much of the format is defined in ttymodes.h; it is included multiple times
- * into this file with the appropriate macro definitions to generate the
- * suitable code.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: ttymodes.c,v 1.18 2002/06/19 00:27:55 deraadt Exp $");
-
-#include "packet.h"
-#include "log.h"
-#include "ssh1.h"
-#include "compat.h"
-#include "buffer.h"
-#include "bufaux.h"
-
-#define TTY_OP_END 0
-/*
- * uint32 (u_int) follows speed in SSH1 and SSH2
- */
-#define TTY_OP_ISPEED_PROTO1 192
-#define TTY_OP_OSPEED_PROTO1 193
-#define TTY_OP_ISPEED_PROTO2 128
-#define TTY_OP_OSPEED_PROTO2 129
-
-/*
- * Converts POSIX speed_t to a baud rate. The values of the
- * constants for speed_t are not themselves portable.
- */
-static int
-speed_to_baud(speed_t speed)
-{
- switch (speed) {
- case B0:
- return 0;
- case B50:
- return 50;
- case B75:
- return 75;
- case B110:
- return 110;
- case B134:
- return 134;
- case B150:
- return 150;
- case B200:
- return 200;
- case B300:
- return 300;
- case B600:
- return 600;
- case B1200:
- return 1200;
- case B1800:
- return 1800;
- case B2400:
- return 2400;
- case B4800:
- return 4800;
- case B9600:
- return 9600;
-
-#ifdef B19200
- case B19200:
- return 19200;
-#else /* B19200 */
-#ifdef EXTA
- case EXTA:
- return 19200;
-#endif /* EXTA */
-#endif /* B19200 */
-
-#ifdef B38400
- case B38400:
- return 38400;
-#else /* B38400 */
-#ifdef EXTB
- case EXTB:
- return 38400;
-#endif /* EXTB */
-#endif /* B38400 */
-
-#ifdef B7200
- case B7200:
- return 7200;
-#endif /* B7200 */
-#ifdef B14400
- case B14400:
- return 14400;
-#endif /* B14400 */
-#ifdef B28800
- case B28800:
- return 28800;
-#endif /* B28800 */
-#ifdef B57600
- case B57600:
- return 57600;
-#endif /* B57600 */
-#ifdef B76800
- case B76800:
- return 76800;
-#endif /* B76800 */
-#ifdef B115200
- case B115200:
- return 115200;
-#endif /* B115200 */
-#ifdef B230400
- case B230400:
- return 230400;
-#endif /* B230400 */
-#ifdef B460800
- case B460800:
- return 460800;
-#endif /* B460800 */
-#ifdef B921600
- case B921600:
- return 921600;
-#endif /* B921600 */
- default:
- return 9600;
- }
-}
-
-/*
- * Converts a numeric baud rate to a POSIX speed_t.
- */
-static speed_t
-baud_to_speed(int baud)
-{
- switch (baud) {
- case 0:
- return B0;
- case 50:
- return B50;
- case 75:
- return B75;
- case 110:
- return B110;
- case 134:
- return B134;
- case 150:
- return B150;
- case 200:
- return B200;
- case 300:
- return B300;
- case 600:
- return B600;
- case 1200:
- return B1200;
- case 1800:
- return B1800;
- case 2400:
- return B2400;
- case 4800:
- return B4800;
- case 9600:
- return B9600;
-
-#ifdef B19200
- case 19200:
- return B19200;
-#else /* B19200 */
-#ifdef EXTA
- case 19200:
- return EXTA;
-#endif /* EXTA */
-#endif /* B19200 */
-
-#ifdef B38400
- case 38400:
- return B38400;
-#else /* B38400 */
-#ifdef EXTB
- case 38400:
- return EXTB;
-#endif /* EXTB */
-#endif /* B38400 */
-
-#ifdef B7200
- case 7200:
- return B7200;
-#endif /* B7200 */
-#ifdef B14400
- case 14400:
- return B14400;
-#endif /* B14400 */
-#ifdef B28800
- case 28800:
- return B28800;
-#endif /* B28800 */
-#ifdef B57600
- case 57600:
- return B57600;
-#endif /* B57600 */
-#ifdef B76800
- case 76800:
- return B76800;
-#endif /* B76800 */
-#ifdef B115200
- case 115200:
- return B115200;
-#endif /* B115200 */
-#ifdef B230400
- case 230400:
- return B230400;
-#endif /* B230400 */
-#ifdef B460800
- case 460800:
- return B460800;
-#endif /* B460800 */
-#ifdef B921600
- case 921600:
- return B921600;
-#endif /* B921600 */
- default:
- return B9600;
- }
-}
-
-/*
- * Encodes terminal modes for the terminal referenced by fd
- * or tiop in a portable manner, and appends the modes to a packet
- * being constructed.
- */
-void
-tty_make_modes(int fd, struct termios *tiop)
-{
- struct termios tio;
- int baud;
- Buffer buf;
- int tty_op_ospeed, tty_op_ispeed;
- void (*put_arg)(Buffer *, u_int);
-
- buffer_init(&buf);
- if (compat20) {
- tty_op_ospeed = TTY_OP_OSPEED_PROTO2;
- tty_op_ispeed = TTY_OP_ISPEED_PROTO2;
- put_arg = buffer_put_int;
- } else {
- tty_op_ospeed = TTY_OP_OSPEED_PROTO1;
- tty_op_ispeed = TTY_OP_ISPEED_PROTO1;
- put_arg = (void (*)(Buffer *, u_int)) buffer_put_char;
- }
-
- if (tiop == NULL) {
- if (tcgetattr(fd, &tio) == -1) {
- log("tcgetattr: %.100s", strerror(errno));
- goto end;
- }
- } else
- tio = *tiop;
-
- /* Store input and output baud rates. */
- baud = speed_to_baud(cfgetospeed(&tio));
- debug3("tty_make_modes: ospeed %d", baud);
- buffer_put_char(&buf, tty_op_ospeed);
- buffer_put_int(&buf, baud);
- baud = speed_to_baud(cfgetispeed(&tio));
- debug3("tty_make_modes: ispeed %d", baud);
- buffer_put_char(&buf, tty_op_ispeed);
- buffer_put_int(&buf, baud);
-
- /* Store values of mode flags. */
-#define TTYCHAR(NAME, OP) \
- debug3("tty_make_modes: %d %d", OP, tio.c_cc[NAME]); \
- buffer_put_char(&buf, OP); \
- put_arg(&buf, tio.c_cc[NAME]);
-
-#define TTYMODE(NAME, FIELD, OP) \
- debug3("tty_make_modes: %d %d", OP, ((tio.FIELD & NAME) != 0)); \
- buffer_put_char(&buf, OP); \
- put_arg(&buf, ((tio.FIELD & NAME) != 0));
-
-#include "ttymodes.h"
-
-#undef TTYCHAR
-#undef TTYMODE
-
-end:
- /* Mark end of mode data. */
- buffer_put_char(&buf, TTY_OP_END);
- if (compat20)
- packet_put_string(buffer_ptr(&buf), buffer_len(&buf));
- else
- packet_put_raw(buffer_ptr(&buf), buffer_len(&buf));
- buffer_free(&buf);
-}
-
-/*
- * Decodes terminal modes for the terminal referenced by fd in a portable
- * manner from a packet being read.
- */
-void
-tty_parse_modes(int fd, int *n_bytes_ptr)
-{
- struct termios tio;
- int opcode, baud;
- int n_bytes = 0;
- int failure = 0;
- u_int (*get_arg)(void);
- int arg, arg_size;
-
- if (compat20) {
- *n_bytes_ptr = packet_get_int();
- debug3("tty_parse_modes: SSH2 n_bytes %d", *n_bytes_ptr);
- if (*n_bytes_ptr == 0)
- return;
- get_arg = packet_get_int;
- arg_size = 4;
- } else {
- get_arg = packet_get_char;
- arg_size = 1;
- }
-
- /*
- * Get old attributes for the terminal. We will modify these
- * flags. I am hoping that if there are any machine-specific
- * modes, they will initially have reasonable values.
- */
- if (tcgetattr(fd, &tio) == -1) {
- log("tcgetattr: %.100s", strerror(errno));
- failure = -1;
- }
-
- for (;;) {
- n_bytes += 1;
- opcode = packet_get_char();
- switch (opcode) {
- case TTY_OP_END:
- goto set;
-
- /* XXX: future conflict possible */
- case TTY_OP_ISPEED_PROTO1:
- case TTY_OP_ISPEED_PROTO2:
- n_bytes += 4;
- baud = packet_get_int();
- debug3("tty_parse_modes: ispeed %d", baud);
- if (failure != -1 && cfsetispeed(&tio, baud_to_speed(baud)) == -1)
- error("cfsetispeed failed for %d", baud);
- break;
-
- /* XXX: future conflict possible */
- case TTY_OP_OSPEED_PROTO1:
- case TTY_OP_OSPEED_PROTO2:
- n_bytes += 4;
- baud = packet_get_int();
- debug3("tty_parse_modes: ospeed %d", baud);
- if (failure != -1 && cfsetospeed(&tio, baud_to_speed(baud)) == -1)
- error("cfsetospeed failed for %d", baud);
- break;
-
-#define TTYCHAR(NAME, OP) \
- case OP: \
- n_bytes += arg_size; \
- tio.c_cc[NAME] = get_arg(); \
- debug3("tty_parse_modes: %d %d", OP, tio.c_cc[NAME]); \
- break;
-#define TTYMODE(NAME, FIELD, OP) \
- case OP: \
- n_bytes += arg_size; \
- if ((arg = get_arg())) \
- tio.FIELD |= NAME; \
- else \
- tio.FIELD &= ~NAME; \
- debug3("tty_parse_modes: %d %d", OP, arg); \
- break;
-
-#include "ttymodes.h"
-
-#undef TTYCHAR
-#undef TTYMODE
-
- default:
- debug("Ignoring unsupported tty mode opcode %d (0x%x)",
- opcode, opcode);
- if (!compat20) {
- /*
- * SSH1:
- * Opcodes 1 to 127 are defined to have
- * a one-byte argument.
- * Opcodes 128 to 159 are defined to have
- * an integer argument.
- */
- if (opcode > 0 && opcode < 128) {
- n_bytes += 1;
- (void) packet_get_char();
- break;
- } else if (opcode >= 128 && opcode < 160) {
- n_bytes += 4;
- (void) packet_get_int();
- break;
- } else {
- /*
- * It is a truly undefined opcode (160 to 255).
- * We have no idea about its arguments. So we
- * must stop parsing. Note that some data may be
- * left in the packet; hopefully there is nothing
- * more coming after the mode data.
- */
- log("parse_tty_modes: unknown opcode %d", opcode);
- goto set;
- }
- } else {
- /*
- * SSH2:
- * Opcodes 1 to 159 are defined to have
- * a uint32 argument.
- * Opcodes 160 to 255 are undefined and
- * cause parsing to stop.
- */
- if (opcode > 0 && opcode < 160) {
- n_bytes += 4;
- (void) packet_get_int();
- break;
- } else {
- log("parse_tty_modes: unknown opcode %d", opcode);
- goto set;
- }
- }
- }
- }
-
-set:
- if (*n_bytes_ptr != n_bytes) {
- *n_bytes_ptr = n_bytes;
- log("parse_tty_modes: n_bytes_ptr != n_bytes: %d %d",
- *n_bytes_ptr, n_bytes);
- return; /* Don't process bytes passed */
- }
- if (failure == -1)
- return; /* Packet parsed ok but tcgetattr() failed */
-
- /* Set the new modes for the terminal. */
- if (tcsetattr(fd, TCSANOW, &tio) == -1)
- log("Setting tty modes failed: %.100s", strerror(errno));
-}
diff --git a/usr/src/cmd/ssh/libssh/common/uidswap.c b/usr/src/cmd/ssh/libssh/common/uidswap.c
deleted file mode 100644
index 942b22a749..0000000000
--- a/usr/src/cmd/ssh/libssh/common/uidswap.c
+++ /dev/null
@@ -1,257 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Code for uid-swapping.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: uidswap.c,v 1.23 2002/07/15 17:15:31 stevesk Exp $");
-
-#include <priv.h>
-
-#include "log.h"
-#include "uidswap.h"
-#include "buffer.h"
-#include "servconf.h"
-
-/*
- * Note: all these functions must work in all of the following cases:
- * 1. euid=0, ruid=0
- * 2. euid=0, ruid!=0
- * 3. euid!=0, ruid!=0
- * Additionally, they must work regardless of whether the system has
- * POSIX saved uids or not.
- */
-
-#if defined(_POSIX_SAVED_IDS) && !defined(BROKEN_SAVED_UIDS)
-/* Lets assume that posix saved ids also work with seteuid, even though that
- is not part of the posix specification. */
-#define SAVED_IDS_WORK
-/* Saved effective uid. */
-static uid_t saved_euid = 0;
-static gid_t saved_egid = 0;
-#endif
-
-/* Saved effective uid. */
-static int privileged = 0;
-static int temporarily_use_uid_effective = 0;
-static int ngroups_max = -1;
-static gid_t *saved_egroups, *user_groups;
-static int saved_egroupslen = -1, user_groupslen = -1;
-
-/*
- * Temporarily changes to the given uid. If the effective user
- * id is not root, this does nothing. This call cannot be nested.
- */
-void
-temporarily_use_uid(struct passwd *pw)
-{
- /* Save the current euid, and egroups. */
-#ifdef SAVED_IDS_WORK
- saved_euid = geteuid();
- saved_egid = getegid();
- debug("temporarily_use_uid: %u/%u (e=%u/%u)",
- (u_int)pw->pw_uid, (u_int)pw->pw_gid,
- (u_int)saved_euid, (u_int)saved_egid);
- if (saved_euid != 0) {
- privileged = 0;
- return;
- }
-#else
- if (geteuid() != 0) {
- privileged = 0;
- return;
- }
-#endif /* SAVED_IDS_WORK */
-
- privileged = 1;
- temporarily_use_uid_effective = 1;
-
- if (ngroups_max < 0) {
- ngroups_max = sysconf(_SC_NGROUPS_MAX);
- saved_egroups = malloc(ngroups_max * sizeof (gid_t));
- user_groups = malloc(ngroups_max * sizeof (gid_t));
- if (saved_egroups == NULL || user_groups == NULL)
- fatal("malloc(gid array): %.100s", strerror(errno));
- }
-
- saved_egroupslen = getgroups(ngroups_max, saved_egroups);
- if (saved_egroupslen < 0)
- fatal("getgroups: %.100s", strerror(errno));
-
- /* set and save the user's groups */
- if (user_groupslen == -1) {
- if (initgroups(pw->pw_name, pw->pw_gid) < 0)
- fatal("initgroups: %s: %.100s", pw->pw_name,
- strerror(errno));
- user_groupslen = getgroups(ngroups_max, user_groups);
- if (user_groupslen < 0)
- fatal("getgroups: %.100s", strerror(errno));
- }
- /* Set the effective uid to the given (unprivileged) uid. */
- if (setgroups(user_groupslen, user_groups) < 0)
- fatal("setgroups: %.100s", strerror(errno));
-#ifdef SAVED_IDS_WORK
- /* Set saved gid and set real gid */
- if (setregid(pw->pw_gid, -1) == -1)
- debug("setregid(%u, -1): %.100s", (uint_t)pw->pw_gid, strerror(errno));
- /* Set saved uid and set real uid */
- if (setreuid(pw->pw_uid, -1) == -1)
- debug("setreuid(%u, -1): %.100s", (uint_t)pw->pw_uid, strerror(errno));
-#else
- /* Propagate the privileged gid to all of our gids. */
- if (setgid(getegid()) < 0)
- debug("setgid %u: %.100s", (u_int) getegid(), strerror(errno));
- /* Propagate the privileged uid to all of our uids. */
- if (setuid(geteuid()) < 0)
- debug("setuid %u: %.100s", (u_int) geteuid(), strerror(errno));
-#endif /* SAVED_IDS_WORK */
- /* Set effective gid */
- if (setegid(pw->pw_gid) == -1)
- fatal("setegid %u: %.100s", (u_int)pw->pw_uid,
- strerror(errno));
- /* Set effective uid */
- if (seteuid(pw->pw_uid) == -1)
- fatal("seteuid %u: %.100s", (u_int)pw->pw_uid,
- strerror(errno));
- /*
- * If saved set ids work then
- *
- * ruid == euid == pw->pw_uid
- * saved uid = previous euid
- * rgid == egid == pw->pw_gid
- * saved gid = previous egid
- */
-}
-
-/*
- * Restores to the original (privileged) uid.
- */
-void
-restore_uid(void)
-{
- /* it's a no-op unless privileged */
- if (!privileged) {
- debug("restore_uid: (unprivileged)");
- return;
- }
- if (!temporarily_use_uid_effective)
- fatal("restore_uid: temporarily_use_uid not effective");
-
-#ifdef SAVED_IDS_WORK
- debug("restore_uid: %u/%u", (u_int)saved_euid, (u_int)saved_egid);
- /* Set the effective uid back to the saved privileged uid. */
- if (seteuid(saved_euid) < 0)
- fatal("seteuid %u: %.100s", (u_int)saved_euid, strerror(errno));
- if (setuid(saved_euid) < 0)
- fatal("setuid %u: %.100s", (u_int)saved_euid, strerror(errno));
- if (setegid(saved_egid) < 0)
- fatal("setegid %u: %.100s", (u_int)saved_egid, strerror(errno));
- if (setgid(saved_egid) < 0)
- fatal("setgid %u: %.100s", (u_int)saved_egid, strerror(errno));
-#else /* SAVED_IDS_WORK */
- /*
- * We are unable to restore the real uid to its unprivileged value.
- * Propagate the real uid (usually more privileged) to effective uid
- * as well.
- */
- setuid(getuid());
- setgid(getgid());
-#endif /* SAVED_IDS_WORK */
-
- if (setgroups(saved_egroupslen, saved_egroups) < 0)
- fatal("setgroups: %.100s", strerror(errno));
- temporarily_use_uid_effective = 0;
-}
-
-/*
- * Permanently sets all uids to the given uid. This cannot be called while
- * temporarily_use_uid is effective. Note that when the ChrootDirectory option
- * is in use we keep a few privileges so that we can call chroot(2) later while
- * already running under UIDs of a connecting user.
- */
-void
-permanently_set_uid(struct passwd *pw, char *chroot_directory)
-{
- priv_set_t *pset;
-
- if (temporarily_use_uid_effective)
- fatal("%s: temporarily_use_uid effective", __func__);
-
- debug("%s: %u/%u", __func__, (u_int)pw->pw_uid, (u_int)pw->pw_gid);
-
- if (initgroups(pw->pw_name, pw->pw_gid) < 0)
- fatal("initgroups: %s: %.100s", pw->pw_name,
- strerror(errno));
-
- if (setgid(pw->pw_gid) < 0)
- fatal("setgid %u: %.100s", (u_int)pw->pw_gid, strerror(errno));
-
- /*
- * If root is connecting we are done now. Note that we must have called
- * setgid() in case that the SSH server was run under a group other than
- * root.
- */
- if (pw->pw_uid == 0)
- return;
-
- /*
- * This means we will keep all privileges after the UID change.
- */
- if (setpflags(PRIV_AWARE, 1) != 0)
- fatal("setpflags: %s", strerror(errno));
-
- /* Now we are running under UID of the user. */
- if (setuid(pw->pw_uid) < 0)
- fatal("setuid %u: %.100s", (u_int)pw->pw_uid, strerror(errno));
-
- /*
- * We will run with the privileges from the Inheritable set as
- * we would have after exec(2) if we had stayed in NPA mode
- * before setuid(2) call (see privileges(5), user_attr(4), and
- * pam_unix_cred(5)). We want to run with P = E = I, with I as
- * set by pam_unix_cred(5). We also add PRIV_PROC_CHROOT,
- * obviously, and then PRIV_PROC_FORK and PRIV_PROC_EXEC, since
- * those two might have been removed from the I set. Note that
- * we are expected to finish the login process without them in
- * the I set, the important thing is that those not be passed on
- * to a shell or a subsystem later if they were not set in
- * pam_unix_cred(5).
- */
- if ((pset = priv_allocset()) == NULL)
- fatal("priv_allocset: %s", strerror(errno));
- if (getppriv(PRIV_INHERITABLE, pset) != 0)
- fatal("getppriv: %s", strerror(errno));
-
- /* We do not need PRIV_PROC_CHROOT unless chroot()ing. */
- if (chroot_requested(chroot_directory) &&
- priv_addset(pset, PRIV_PROC_CHROOT) == -1) {
- fatal("%s: priv_addset failed", __func__);
- }
-
- if (priv_addset(pset, PRIV_PROC_FORK) == -1 ||
- priv_addset(pset, PRIV_PROC_EXEC) == -1) {
- fatal("%s: priv_addset failed", __func__);
- }
-
- /* Set only P; this will also set E. */
- if (setppriv(PRIV_SET, PRIV_PERMITTED, pset) == -1)
- fatal("setppriv: %s", strerror(errno));
-
- /* We don't need the PA flag anymore. */
- if (setpflags(PRIV_AWARE, 0) == -1)
- fatal("setpflags: %s", strerror(errno));
-
- priv_freeset(pset);
-}
diff --git a/usr/src/cmd/ssh/libssh/common/uuencode.c b/usr/src/cmd/ssh/libssh/common/uuencode.c
deleted file mode 100644
index 432f5c4369..0000000000
--- a/usr/src/cmd/ssh/libssh/common/uuencode.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: uuencode.c,v 1.16 2002/09/09 14:54:15 markus Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "xmalloc.h"
-#include "uuencode.h"
-
-int
-uuencode(u_char *src, u_int srclength,
- char *target, size_t targsize)
-{
- return __b64_ntop(src, srclength, target, targsize);
-}
-
-int
-uudecode(const char *src, u_char *target, size_t targsize)
-{
- int len;
- char *encoded, *p;
-
- /* copy the 'readonly' source */
- encoded = xstrdup(src);
- /* skip whitespace and data */
- for (p = encoded; *p == ' ' || *p == '\t'; p++)
- ;
- for (; *p != '\0' && *p != ' ' && *p != '\t'; p++)
- ;
- /* and remove trailing whitespace because __b64_pton needs this */
- *p = '\0';
- len = __b64_pton((u_char *) encoded, target, targsize);
- xfree(encoded);
- return len;
-}
-
-void
-dump_base64(FILE *fp, u_char *data, u_int len)
-{
- char *buf = xmalloc(2*len);
- int i, n;
-
- n = uuencode(data, len, buf, 2*len);
- for (i = 0; i < n; i++) {
- fprintf(fp, "%c", buf[i]);
- if (i % 70 == 69)
- fprintf(fp, "\n");
- }
- if (i % 70 != 69)
- fprintf(fp, "\n");
- xfree(buf);
-}
diff --git a/usr/src/cmd/ssh/libssh/common/xlist.c b/usr/src/cmd/ssh/libssh/common/xlist.c
deleted file mode 100644
index c44e420eeb..0000000000
--- a/usr/src/cmd/ssh/libssh/common/xlist.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <stdio.h>
-#include <strings.h>
-#include "xmalloc.h"
-#include "xlist.h"
-
-char **
-xsplit(char *list, char sep)
-{
- char **a;
- char *p, *q;
- uint_t n = 0;
-
- for (n = 0, p = list; p && *p; ) {
- while (p && *p && *p == sep)
- p++;
- if (!*p)
- break;
- n++;
- p = strchr(p, sep);
- }
- a = (char **)xmalloc(sizeof (char *) * (n + 2));
- for (n = 0, p = list; p && *p; ) {
- while (*p == sep)
- p++;
- if (!*p)
- break;
- q = strchr(p, sep);
- if (!q)
- q = p + strlen(p);
- a[n] = (char *)xmalloc((q - p + 2));
- (void) strncpy(a[n], p, q - p);
- a[n][q - p] = '\0';
- n++;
- if (!*q)
- break;
- p = q + 1;
- }
- a[n] = NULL;
- return (a);
-}
-
-void
-xfree_split_list(char **list)
-{
- char **p;
- for (p = list; p && *p; p++) {
- xfree(*p);
- }
- xfree(list);
-}
-
-char *
-xjoin(char **alist, char sep)
-{
- char **p;
- char *list;
- char sep_str[2];
- uint_t n;
-
- for (n = 1, p = alist; p && *p; p++) {
- if (!*p || !**p)
- continue;
- n += strlen(*p) + 1;
- }
- list = (char *)xmalloc(n);
- *list = '\0';
-
- sep_str[0] = sep;
- sep_str[1] = '\0';
- for (p = alist; p && *p; p++) {
- if (!*p || !**p)
- continue;
- if (*list != '\0')
- (void) strlcat(list, sep_str, n);
- (void) strlcat(list, *p, n);
- }
- return (list);
-}
diff --git a/usr/src/cmd/ssh/libssh/common/xmalloc.c b/usr/src/cmd/ssh/libssh/common/xmalloc.c
deleted file mode 100644
index b9ae011d3c..0000000000
--- a/usr/src/cmd/ssh/libssh/common/xmalloc.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Versions of malloc and friends that check their results, and never return
- * failure (they call fatal if they encounter an error).
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: xmalloc.c,v 1.16 2001/07/23 18:21:46 stevesk Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "xmalloc.h"
-#include "log.h"
-
-void *
-xmalloc(size_t size)
-{
- void *ptr;
-
- if (size == 0)
- fatal("xmalloc: zero size");
- ptr = malloc(size);
- if (ptr == NULL)
- fatal("xmalloc: out of memory (allocating %lu bytes)", (u_long) size);
- return ptr;
-}
-
-void *
-xcalloc(size_t nmemb, size_t size)
-{
- void *ptr;
-
- if (size == 0 || nmemb == 0)
- fatal("xcalloc: zero size");
- if (SIZE_T_MAX / nmemb < size)
- fatal("xcalloc: nmemb * size > SIZE_T_MAX");
- ptr = calloc(nmemb, size);
- if (ptr == NULL)
- fatal("xcalloc: out of memory (allocating %lu bytes)",
- (u_long)(size * nmemb));
- return ptr;
-}
-
-void *
-xrealloc(void *ptr, size_t new_size)
-{
- void *new_ptr;
-
- if (new_size == 0)
- fatal("xrealloc: zero size");
- if (ptr == NULL)
- new_ptr = malloc(new_size);
- else
- new_ptr = realloc(ptr, new_size);
- if (new_ptr == NULL)
- fatal("xrealloc: out of memory (new_size %lu bytes)", (u_long) new_size);
- return new_ptr;
-}
-
-void
-xfree(void *ptr)
-{
- if (ptr == NULL)
- fatal("xfree: NULL pointer given as argument");
- free(ptr);
-}
-
-char *
-xstrdup(const char *str)
-{
- size_t len;
- char *cp;
-
- len = strlen(str) + 1;
- cp = xmalloc(len);
- strlcpy(cp, str, len);
- return cp;
-}
diff --git a/usr/src/cmd/ssh/req.flg b/usr/src/cmd/ssh/req.flg
deleted file mode 100644
index f02f86c7fe..0000000000
--- a/usr/src/cmd/ssh/req.flg
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/sh
-#
-# Copyright (c) 2001 by Sun Microsystems, Inc.
-# All rights reserved.
-#
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-echo_file usr/src/lib/Makefile.lib
-echo_file usr/src/lib/Makefile.targ
-echo_file usr/src/cmd/Makefile.cmd
diff --git a/usr/src/cmd/ssh/scp/scp.c b/usr/src/cmd/ssh/scp/scp.c
deleted file mode 100644
index 2759d952c5..0000000000
--- a/usr/src/cmd/ssh/scp/scp.c
+++ /dev/null
@@ -1,1341 +0,0 @@
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * scp - secure remote copy. This is basically patched BSD rcp which
- * uses ssh to do the data transfer (instead of using rcmd).
- *
- * NOTE: This version should NOT be suid root. (This uses ssh to
- * do the transfer and ssh has the necessary privileges.)
- *
- * 1995 Timo Rinne <tri@iki.fi>, Tatu Ylonen <ylo@cs.hut.fi>
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-/*
- * Copyright (c) 1999 Theo de Raadt. All rights reserved.
- * Copyright (c) 1999 Aaron Campbell. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * Parts from:
- *
- * Copyright (c) 1983, 1990, 1992, 1993, 1995
- * The Regents of the University of California. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: scp.c,v 1.91 2002/06/19 00:27:55 deraadt Exp $");
-
-#include "xmalloc.h"
-#include "atomicio.h"
-#include "pathnames.h"
-#include "log.h"
-#include "misc.h"
-
-#ifdef HAVE___PROGNAME
-extern char *__progname;
-#else
-char *__progname;
-#endif
-
-/* For progressmeter() -- number of seconds before xfer considered "stalled" */
-#define STALLTIME 5
-/* alarm() interval for updating progress meter */
-#define PROGRESSTIME 1
-
-/* Visual statistics about files as they are transferred. */
-void progressmeter(int);
-
-/* Returns width of the terminal (for progress meter calculations). */
-int getttywidth(void);
-int do_cmd(char *host, char *remuser, char *cmd, int *fdin, int *fdout,
- int argc);
-
-/* Struct for addargs */
-arglist args;
-
-/* Time a transfer started. */
-static struct timeval start;
-
-/* Number of bytes of current file transferred so far. */
-volatile off_t statbytes;
-
-/* Total size of current file. */
-off_t totalbytes = 0;
-
-/* Name of current file being transferred. */
-char *curfile;
-
-/* This is set to non-zero to enable verbose mode. */
-int verbose_mode = 0;
-
-/* This is set to zero if the progressmeter is not desired. */
-int showprogress = 1;
-
-/* This is the program to execute for the secured connection. ("ssh" or -S) */
-char *ssh_program = _PATH_SSH_PROGRAM;
-
-/* This is used to store the pid of ssh_program */
-static pid_t do_cmd_pid = -1;
-
-static void
-killchild(int signo)
-{
- if (do_cmd_pid > 1) {
- kill(do_cmd_pid, signo ? signo : SIGTERM);
- waitpid(do_cmd_pid, NULL, 0);
- }
-
- if (signo)
- _exit(1);
- exit(1);
-}
-
-/*
- * Run a command via fork(2)/exec(2). This can be a local-to-local copy via
- * cp(1) or one side of a remote-to-remote copy. We must not use system(3) here
- * because we don't want filenames to go through a command expansion in the
- * underlying shell. Note that the user can create a filename that is a piece of
- * shell code itself and this must not be executed.
- */
-static int
-do_local_cmd(arglist *a)
-{
- uint_t i;
- int status;
- pid_t pid;
-
- if (a->num == 0)
- fatal("do_local_cmd: no arguments");
-
- if (verbose_mode) {
- fprintf(stderr, gettext("Executing:"));
- for (i = 0; i < a->num; i++)
- fprintf(stderr, " %s", a->list[i]);
- fprintf(stderr, "\n");
- }
- if ((pid = fork()) == -1)
- fatal("do_local_cmd: fork: %s", strerror(errno));
-
- if (pid == 0) {
- execvp(a->list[0], a->list);
- perror(a->list[0]);
- exit(1);
- }
-
- do_cmd_pid = pid;
- signal(SIGTERM, killchild);
- signal(SIGINT, killchild);
- signal(SIGHUP, killchild);
-
- while (waitpid(pid, &status, 0) == -1)
- if (errno != EINTR)
- fatal("do_local_cmd: waitpid: %s", strerror(errno));
-
- do_cmd_pid = -1;
-
- if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
- return (-1);
-
- return (0);
-}
-
-/*
- * This function executes the given command as the specified user on the
- * given host. This returns < 0 if execution fails, and >= 0 otherwise. This
- * assigns the input and output file descriptors on success.
- */
-
-int
-do_cmd(char *host, char *remuser, char *cmd, int *fdin, int *fdout, int argc)
-{
- int pin[2], pout[2], reserved[2];
-
- if (verbose_mode)
- fprintf(stderr,
- gettext("Executing: program %s host %s, "
- "user %s, command %s\n"),
- ssh_program, host,
- remuser ? remuser : gettext("(unspecified)"), cmd);
-
- /*
- * Reserve two descriptors so that the real pipes won't get
- * descriptors 0 and 1 because that will screw up dup2 below.
- */
- pipe(reserved);
-
- /* Create a socket pair for communicating with ssh. */
- if (pipe(pin) < 0)
- fatal("pipe: %s", strerror(errno));
- if (pipe(pout) < 0)
- fatal("pipe: %s", strerror(errno));
-
- /* Free the reserved descriptors. */
- close(reserved[0]);
- close(reserved[1]);
-
- /* For a child to execute the command on the remote host using ssh. */
- if ((do_cmd_pid = fork()) == 0) {
- /* Child. */
- close(pin[1]);
- close(pout[0]);
- dup2(pin[0], 0);
- dup2(pout[1], 1);
- close(pin[0]);
- close(pout[1]);
-
- args.list[0] = ssh_program;
- if (remuser != NULL)
- addargs(&args, "-l%s", remuser);
- addargs(&args, "%s", host);
- addargs(&args, "%s", cmd);
-
- execvp(ssh_program, args.list);
- perror(ssh_program);
- exit(1);
- } else if (do_cmd_pid == (pid_t)-1) {
- /* fork() failed */
- fatal("fork: %s", strerror(errno));
- }
-
- /* Parent. Close the other side, and return the local side. */
- close(pin[0]);
- *fdout = pin[1];
- close(pout[1]);
- *fdin = pout[0];
- return (0);
-}
-
-typedef struct {
- int cnt;
- char *buf;
-} BUF;
-
-BUF *allocbuf(BUF *, int, int);
-void lostconn(int);
-void nospace(void);
-int okname(char *);
-void run_err(const char *, ...);
-void verifydir(char *);
-
-struct passwd *pwd;
-uid_t userid;
-int errs, remin, remout;
-int pflag, iamremote, iamrecursive, targetshouldbedirectory;
-
-#define CMDNEEDS 64
-char cmd[CMDNEEDS]; /* must hold "rcp -r -p -d\0" */
-
-int response(void);
-void rsource(char *, struct stat *);
-void sink(int, char *[]);
-void source(int, char *[]);
-void tolocal(int, char *[]);
-void toremote(char *, int, char *[]);
-void usage(void);
-
-int
-main(argc, argv)
- int argc;
- char *argv[];
-{
- int ch, fflag, tflag, status;
- char *targ;
- extern char *optarg;
- extern int optind;
-
- __progname = get_progname(argv[0]);
-
- g11n_setlocale(LC_ALL, "");
-
- args.list = NULL;
- addargs(&args, "ssh"); /* overwritten with ssh_program */
- addargs(&args, "-x");
- addargs(&args, "-oForwardAgent no");
- addargs(&args, "-oClearAllForwardings yes");
-
- fflag = tflag = 0;
- while ((ch = getopt(argc, argv, "dfprtvBCc:i:P:q46S:o:F:")) != -1)
- switch (ch) {
- /* User-visible flags. */
- case '4':
- case '6':
- case 'C':
- addargs(&args, "-%c", ch);
- break;
- case 'o':
- case 'c':
- case 'i':
- case 'F':
- addargs(&args, "-%c%s", ch, optarg);
- break;
- case 'P':
- addargs(&args, "-p%s", optarg);
- break;
- case 'B':
- addargs(&args, "-oBatchmode yes");
- break;
- case 'p':
- pflag = 1;
- break;
- case 'r':
- iamrecursive = 1;
- break;
- case 'S':
- ssh_program = xstrdup(optarg);
- break;
- case 'v':
- addargs(&args, "-v");
- verbose_mode = 1;
- break;
- case 'q':
- showprogress = 0;
- break;
-
- /* Server options. */
- case 'd':
- targetshouldbedirectory = 1;
- break;
- case 'f': /* "from" */
- iamremote = 1;
- fflag = 1;
- break;
- case 't': /* "to" */
- iamremote = 1;
- tflag = 1;
-#ifdef HAVE_CYGWIN
- setmode(0, O_BINARY);
-#endif
- break;
- default:
- usage();
- }
- argc -= optind;
- argv += optind;
-
- if ((pwd = getpwuid(userid = getuid())) == NULL)
- fatal("unknown user %d", (int)userid);
-
- if (!isatty(STDERR_FILENO))
- showprogress = 0;
-
- remin = STDIN_FILENO;
- remout = STDOUT_FILENO;
-
- if (fflag) {
- /* Follow "protocol", send data. */
- (void) response();
- source(argc, argv);
- exit(errs != 0);
- }
- if (tflag) {
- /* Receive data. */
- sink(argc, argv);
- exit(errs != 0);
- }
- if (argc < 2)
- usage();
- if (argc > 2)
- targetshouldbedirectory = 1;
-
- remin = remout = -1;
- do_cmd_pid = (pid_t)-1;
-
- /* Command to be executed on remote system using "ssh". */
- (void) snprintf(cmd, sizeof (cmd), "scp%s%s%s%s",
- verbose_mode ? " -v" : "",
- iamrecursive ? " -r" : "", pflag ? " -p" : "",
- targetshouldbedirectory ? " -d" : "");
-
- (void) signal(SIGPIPE, lostconn);
-
- if ((targ = colon(argv[argc - 1]))) /* Dest is remote host. */
- toremote(targ, argc, argv);
- else {
- if (targetshouldbedirectory)
- verifydir(argv[argc - 1]);
- tolocal(argc, argv); /* Dest is local host. */
- }
- /*
- * Finally check the exit status of the ssh process, if one was forked
- * and no error has occurred yet
- */
- if (do_cmd_pid != (pid_t)-1 && errs == 0) {
- if (remin != -1) {
- (void) close(remin);
- }
- if (remout != -1) {
- (void) close(remout);
- }
- if (waitpid(do_cmd_pid, &status, 0) == -1) {
- errs = 1;
- } else if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
- errs = 1;
- }
- }
-
- return (errs != 0);
-}
-
-void
-toremote(targ, argc, argv)
- char *targ, *argv[];
- int argc;
-{
- int i, len;
- char *bp, *host, *src, *suser, *thost, *tuser, *arg;
- arglist alist;
-
- memset(&alist, '\0', sizeof (alist));
- alist.list = NULL;
-
- *targ++ = 0;
- if (*targ == 0)
- targ = ".";
-
- arg = xstrdup(argv[argc - 1]);
- if ((thost = strchr(arg, '@'))) {
- /* user@host */
- *thost++ = 0;
- tuser = arg;
- if (*tuser == '\0')
- tuser = NULL;
- else if (!okname(tuser))
- exit(1);
- } else {
- thost = arg;
- tuser = NULL;
- }
-
- if (tuser != NULL && !okname(tuser)) {
- xfree(arg);
- return;
- }
-
- for (i = 0; i < argc - 1; i++) {
- src = colon(argv[i]);
- if (src) { /* remote to remote */
- freeargs(&alist);
- addargs(&alist, "%s", ssh_program);
- if (verbose_mode)
- addargs(&alist, "-v");
- addargs(&alist, "-x");
- addargs(&alist, "-oClearAllForwardings yes");
- addargs(&alist, "-n");
-
- *src++ = 0;
- if (*src == 0)
- src = ".";
- host = strchr(argv[i], '@');
-
- if (host) {
- *host++ = 0;
- host = cleanhostname(host);
- suser = argv[i];
- if (*suser == '\0')
- suser = pwd->pw_name;
- else if (!okname(suser))
- continue;
- addargs(&alist, "-l");
- addargs(&alist, "%s", suser);
- } else {
- host = cleanhostname(argv[i]);
- }
- addargs(&alist, "%s", host);
- addargs(&alist, "%s", cmd);
- addargs(&alist, "%s", src);
- addargs(&alist, "%s%s%s:%s",
- tuser ? tuser : "", tuser ? "@" : "",
- thost, targ);
- if (do_local_cmd(&alist) != 0)
- errs = 1;
- } else { /* local to remote */
- if (remin == -1) {
- len = strlen(targ) + CMDNEEDS + 20;
- bp = xmalloc(len);
- (void) snprintf(bp, len, "%s -t %s", cmd, targ);
- host = cleanhostname(thost);
- if (do_cmd(host, tuser, bp, &remin,
- &remout, argc) < 0)
- exit(1);
- if (response() < 0)
- exit(1);
- (void) xfree(bp);
- }
- source(1, argv + i);
- }
- }
-}
-
-void
-tolocal(argc, argv)
- int argc;
- char *argv[];
-{
- int i, len;
- char *bp, *host, *src, *suser;
- arglist alist;
-
- memset(&alist, '\0', sizeof (alist));
- alist.list = NULL;
-
- for (i = 0; i < argc - 1; i++) {
- if (!(src = colon(argv[i]))) { /* Local to local. */
- freeargs(&alist);
- addargs(&alist, "%s", _PATH_CP);
- if (iamrecursive)
- addargs(&alist, "-r");
- if (pflag)
- addargs(&alist, "-p");
- addargs(&alist, "%s", argv[i]);
- addargs(&alist, "%s", argv[argc-1]);
- if (do_local_cmd(&alist))
- ++errs;
- continue;
- }
- *src++ = 0;
- if (*src == 0)
- src = ".";
- if ((host = strchr(argv[i], '@')) == NULL) {
- host = argv[i];
- suser = NULL;
- } else {
- *host++ = 0;
- suser = argv[i];
- if (*suser == '\0')
- suser = pwd->pw_name;
- else if (!okname(suser))
- continue;
- }
- host = cleanhostname(host);
- len = strlen(src) + CMDNEEDS + 20;
- bp = xmalloc(len);
- (void) snprintf(bp, len, "%s -f %s", cmd, src);
- if (do_cmd(host, suser, bp, &remin, &remout, argc) < 0) {
- (void) xfree(bp);
- ++errs;
- continue;
- }
- xfree(bp);
- sink(1, argv + argc - 1);
- (void) close(remin);
- remin = remout = -1;
- }
-}
-
-void
-source(argc, argv)
- int argc;
- char *argv[];
-{
- struct stat stb;
- static BUF buffer;
- BUF *bp;
- off_t i, amt, result;
- int fd, haderr, indx;
- char *last, *name, buf[2048];
- int len;
-
- for (indx = 0; indx < argc; ++indx) {
- name = argv[indx];
- statbytes = 0;
- len = strlen(name);
- while (len > 1 && name[len-1] == '/')
- name[--len] = '\0';
- if (strchr(name, '\n') != NULL) {
- run_err("%s: skipping, filename contains a newline",
- name);
- goto next;
- }
- if ((fd = open(name, O_RDONLY, 0)) < 0)
- goto syserr;
- if (fstat(fd, &stb) < 0) {
-syserr: run_err("%s: %s", name, strerror(errno));
- goto next;
- }
- switch (stb.st_mode & S_IFMT) {
- case S_IFREG:
- break;
- case S_IFDIR:
- if (iamrecursive) {
- rsource(name, &stb);
- goto next;
- }
- /* FALLTHROUGH */
- default:
- run_err("%s: not a regular file", name);
- goto next;
- }
- if ((last = strrchr(name, '/')) == NULL)
- last = name;
- else
- ++last;
- curfile = last;
- if (pflag) {
- /*
- * Make it compatible with possible future
- * versions expecting microseconds.
- */
- (void) snprintf(buf, sizeof (buf), "T%lu 0 %lu 0\n",
- (ulong_t)stb.st_mtime,
- (ulong_t)stb.st_atime);
- (void) atomicio(write, remout, buf, strlen(buf));
- if (response() < 0)
- goto next;
- }
-#define FILEMODEMASK (S_ISUID|S_ISGID|S_IRWXU|S_IRWXG|S_IRWXO)
-#ifdef HAVE_LONG_LONG_INT
- snprintf(buf, sizeof (buf), "C%04o %lld %s\n",
- (uint_t)(stb.st_mode & FILEMODEMASK),
- (long long)stb.st_size, last);
-#else
- /* XXX: Handle integer overflow? */
- snprintf(buf, sizeof (buf), "C%04o %lu %s\n",
- (uint_t)(stb.st_mode & FILEMODEMASK),
- (ulong_t)stb.st_size, last);
-#endif
- if (verbose_mode) {
- fprintf(stderr, gettext("Sending file modes: %s"), buf);
- fflush(stderr);
- }
- (void) atomicio(write, remout, buf, strlen(buf));
- if (response() < 0)
- goto next;
- if ((bp = allocbuf(&buffer, fd, 2048)) == NULL) {
-next: (void) close(fd);
- continue;
- }
- if (showprogress) {
- totalbytes = stb.st_size;
- progressmeter(-1);
- }
- /* Keep writing after an error so that we stay sync'd up. */
- for (haderr = i = 0; i < stb.st_size; i += bp->cnt) {
- amt = bp->cnt;
- if (i + amt > stb.st_size)
- amt = stb.st_size - i;
- if (!haderr) {
- result = atomicio(read, fd, bp->buf, amt);
- if (result != amt)
- haderr = result >= 0 ? EIO : errno;
- }
- if (haderr)
- (void) atomicio(write, remout, bp->buf, amt);
- else {
- result = atomicio(write, remout, bp->buf, amt);
- if (result != amt)
- haderr = result >= 0 ? EIO : errno;
- statbytes += result;
- }
- }
- if (showprogress)
- progressmeter(1);
-
- if (close(fd) < 0 && !haderr)
- haderr = errno;
- if (!haderr)
- (void) atomicio(write, remout, "", 1);
- else
- run_err("%s: %s", name, strerror(haderr));
- (void) response();
- }
-}
-
-void
-rsource(name, statp)
- char *name;
- struct stat *statp;
-{
- DIR *dirp;
- struct dirent *dp;
- char *last, *vect[1], path[1100];
-
- if (!(dirp = opendir(name))) {
- run_err("%s: %s", name, strerror(errno));
- return;
- }
- last = strrchr(name, '/');
- if (last == 0)
- last = name;
- else
- last++;
- if (pflag) {
- (void) snprintf(path, sizeof (path), "T%lu 0 %lu 0\n",
- (ulong_t)statp->st_mtime,
- (ulong_t)statp->st_atime);
- (void) atomicio(write, remout, path, strlen(path));
- if (response() < 0) {
- closedir(dirp);
- return;
- }
- }
- (void) snprintf(path, sizeof (path), "D%04o %d %.1024s\n",
- (uint_t)(statp->st_mode & FILEMODEMASK), 0, last);
- if (verbose_mode)
- fprintf(stderr, gettext("Entering directory: %s"), path);
- (void) atomicio(write, remout, path, strlen(path));
- if (response() < 0) {
- closedir(dirp);
- return;
- }
- while ((dp = readdir(dirp)) != NULL) {
- if (dp->d_ino == 0)
- continue;
- if ((strcmp(dp->d_name, ".") == 0) ||
- (strcmp(dp->d_name, "..") == 0))
- continue;
- if (strlen(name) + 1 + strlen(dp->d_name) >=
- sizeof (path) - 1) {
- run_err("%s/%s: name too long", name, dp->d_name);
- continue;
- }
- (void) snprintf(path, sizeof (path), "%s/%s", name, dp->d_name);
- vect[0] = path;
- source(1, vect);
- }
- (void) closedir(dirp);
- (void) atomicio(write, remout, "E\n", 2);
- (void) response();
-}
-
-void
-sink(argc, argv)
- int argc;
- char *argv[];
-{
- static BUF buffer;
- struct stat stb;
- enum {
- YES, NO, DISPLAYED
- } wrerr;
- BUF *bp;
- off_t i, j;
- int amt, count, exists, first, mask, mode, ofd, omode;
- off_t size;
- int setimes, targisdir, wrerrno = 0;
- char ch, *cp, *np, *targ, *why, *vect[1], buf[2048];
- struct timeval tv[2];
-
-#define atime tv[0]
-#define mtime tv[1]
-#define SCREWUP(str) { why = str; goto screwup; }
-
- setimes = targisdir = 0;
- mask = umask(0);
- if (!pflag)
- (void) umask(mask);
- if (argc != 1) {
- run_err("ambiguous target");
- exit(1);
- }
- targ = *argv;
- if (targetshouldbedirectory)
- verifydir(targ);
-
- (void) atomicio(write, remout, "", 1);
- if (stat(targ, &stb) == 0 && S_ISDIR(stb.st_mode))
- targisdir = 1;
- for (first = 1; ; first = 0) {
- cp = buf;
- if (atomicio(read, remin, cp, 1) <= 0)
- return;
- if (*cp++ == '\n')
- SCREWUP("unexpected <newline>")
- do {
- if (atomicio(read, remin, &ch, sizeof (ch)) !=
- sizeof (ch))
- SCREWUP("lost connection")
- *cp++ = ch;
- } while (cp < &buf[sizeof (buf) - 1] && ch != '\n');
- *cp = 0;
-
- if (buf[0] == '\01' || buf[0] == '\02') {
- if (iamremote == 0)
- (void) atomicio(write, STDERR_FILENO,
- buf + 1, strlen(buf + 1));
- if (buf[0] == '\02')
- exit(1);
- ++errs;
- continue;
- }
- if (buf[0] == 'E') {
- (void) atomicio(write, remout, "", 1);
- return;
- }
- if (ch == '\n')
- *--cp = 0;
-
- cp = buf;
- if (*cp == 'T') {
- setimes++;
- cp++;
- mtime.tv_sec = strtol(cp, &cp, 10);
- if (!cp || *cp++ != ' ')
- SCREWUP("mtime.sec not delimited")
- mtime.tv_usec = strtol(cp, &cp, 10);
- if (!cp || *cp++ != ' ')
- SCREWUP("mtime.usec not delimited")
- atime.tv_sec = strtol(cp, &cp, 10);
- if (!cp || *cp++ != ' ')
- SCREWUP("atime.sec not delimited")
- atime.tv_usec = strtol(cp, &cp, 10);
- if (!cp || *cp++ != '\0')
- SCREWUP("atime.usec not delimited")
- (void) atomicio(write, remout, "", 1);
- continue;
- }
- if (*cp != 'C' && *cp != 'D') {
- /*
- * Check for the case "rcp remote:foo\* local:bar".
- * In this case, the line "No match." can be returned
- * by the shell before the rcp command on the remote is
- * executed so the ^Aerror_message convention isn't
- * followed.
- */
- if (first) {
- run_err("%s", cp);
- exit(1);
- }
- SCREWUP("expected control record")
- }
- mode = 0;
- for (++cp; cp < buf + 5; cp++) {
- if (*cp < '0' || *cp > '7')
- SCREWUP("bad mode")
- mode = (mode << 3) | (*cp - '0');
- }
- if (*cp++ != ' ')
- SCREWUP("mode not delimited")
-
- for (size = 0; isdigit(*cp); )
- size = size * 10 + (*cp++ - '0');
- if (*cp++ != ' ')
- SCREWUP("size not delimited")
- if ((strchr(cp, '/') != NULL) || (strcmp(cp, "..") == 0)) {
- run_err("error: unexpected filename: %s", cp);
- exit(1);
- }
- if (targisdir) {
- static char *namebuf;
- static int cursize;
- size_t need;
-
- need = strlen(targ) + strlen(cp) + 250;
- if (need > cursize) {
- if (namebuf)
- xfree(namebuf);
- namebuf = xmalloc(need);
- cursize = need;
- }
- (void) snprintf(namebuf, need, "%s%s%s", targ,
- strcmp(targ, "/") ? "/" : "", cp);
- np = namebuf;
- } else
- np = targ;
- curfile = cp;
- exists = stat(np, &stb) == 0;
- if (buf[0] == 'D') {
- int mod_flag = pflag;
- if (!iamrecursive)
- SCREWUP("received directory without -r");
- if (exists) {
- if (!S_ISDIR(stb.st_mode)) {
- errno = ENOTDIR;
- goto bad;
- }
- if (pflag)
- (void) chmod(np, mode);
- } else {
- /*
- * Handle copying from a read-only
- * directory
- */
- mod_flag = 1;
- if (mkdir(np, mode | S_IRWXU) < 0)
- goto bad;
- }
- vect[0] = xstrdup(np);
- sink(1, vect);
- if (setimes) {
- setimes = 0;
- if (utimes(vect[0], tv) < 0)
- run_err("%s: set times: %s",
- vect[0], strerror(errno));
- }
- if (mod_flag)
- (void) chmod(vect[0], mode);
- if (vect[0])
- xfree(vect[0]);
- continue;
- }
- omode = mode;
- mode |= S_IWRITE;
- if ((ofd = open(np, O_WRONLY|O_CREAT, mode)) < 0) {
-bad: run_err("%s: %s", np, strerror(errno));
- continue;
- }
- (void) atomicio(write, remout, "", 1);
- if ((bp = allocbuf(&buffer, ofd, 4096)) == NULL) {
- (void) close(ofd);
- continue;
- }
- wrerr = NO;
-
- if (showprogress) {
- totalbytes = size;
- progressmeter(-1);
- }
- statbytes = 0;
- for (i = 0; i < size; i += bp->cnt) {
- amt = bp->cnt;
- cp = bp->buf;
- if (i + amt > size)
- amt = size - i;
- count = amt;
- do {
- j = read(remin, cp, amt);
- if (j == -1 && (errno == EINTR ||
- errno == EAGAIN)) {
- continue;
- } else if (j <= 0) {
- run_err("%s", j ? strerror(errno) :
- "dropped connection");
- exit(1);
- }
- amt -= j;
- cp += j;
- statbytes += j;
- } while (amt > 0);
- /* Keep reading so we stay sync'd up. */
- if (wrerr == NO) {
- j = atomicio(write, ofd, bp->buf,
- count);
- if (j != count) {
- wrerr = YES;
- wrerrno = j >= 0 ? EIO : errno;
- }
- }
- }
- if (showprogress)
- progressmeter(1);
- if (ftruncate(ofd, size)) {
- run_err("%s: truncate: %s", np, strerror(errno));
- wrerr = DISPLAYED;
- }
- if (pflag) {
- if (exists || omode != mode)
-#ifdef HAVE_FCHMOD
- if (fchmod(ofd, omode)) {
-#else /* HAVE_FCHMOD */
- if (chmod(np, omode)) {
-#endif /* HAVE_FCHMOD */
- run_err("%s: set mode: %s",
- np, strerror(errno));
- wrerr = DISPLAYED;
- }
- } else {
- if (!exists && omode != mode)
-#ifdef HAVE_FCHMOD
- if (fchmod(ofd, omode & ~mask)) {
-#else /* HAVE_FCHMOD */
- if (chmod(np, omode & ~mask)) {
-#endif /* HAVE_FCHMOD */
- run_err("%s: set mode: %s",
- np, strerror(errno));
- wrerr = DISPLAYED;
- }
- }
- if (close(ofd) == -1) {
- wrerr = YES;
- wrerrno = errno;
- }
- (void) response();
- if (setimes && wrerr == NO) {
- setimes = 0;
- if (utimes(np, tv) < 0) {
- run_err("%s: set times: %s",
- np, strerror(errno));
- wrerr = DISPLAYED;
- }
- }
- switch (wrerr) {
- case YES:
- run_err("%s: %s", np, strerror(wrerrno));
- break;
- case NO:
- (void) atomicio(write, remout, "", 1);
- break;
- case DISPLAYED:
- break;
- }
- }
-screwup:
- run_err("protocol error: %s", why);
- exit(1);
-}
-
-int
-response(void)
-{
- char ch, *cp, resp, rbuf[2048];
-
- if (atomicio(read, remin, &resp, sizeof (resp)) != sizeof (resp))
- lostconn(0);
-
- cp = rbuf;
- switch (resp) {
- case 0: /* ok */
- return (0);
- default:
- *cp++ = resp;
- /* FALLTHROUGH */
- case 1: /* error, followed by error msg */
- case 2: /* fatal error, "" */
- do {
- if (atomicio(read, remin, &ch, sizeof (ch)) !=
- sizeof (ch))
- lostconn(0);
- *cp++ = ch;
- } while (cp < &rbuf[sizeof (rbuf) - 1] && ch != '\n');
-
- if (!iamremote)
- (void) atomicio(write, STDERR_FILENO, rbuf, cp - rbuf);
- ++errs;
- if (resp == 1)
- return (-1);
- exit(1);
- }
- /* NOTREACHED */
-}
-
-void
-usage(void)
-{
- (void) fprintf(stderr,
- gettext(
- "Usage: scp [-pqrvBC46] [-F config] [-S program] [-P port]\n"
- " [-c cipher] [-i identity] [-o option]\n"
- " [[user@]host1:]file1 [...] "
- "[[user@]host2:]file2\n"));
- exit(1);
-}
-
-/* PRINTFLIKE1 */
-void
-run_err(const char *fmt, ...)
-{
- static FILE *fp;
- va_list ap;
-
- ++errs;
-
- if (!iamremote) {
- va_start(ap, fmt);
- vfprintf(stderr, fmt, ap);
- va_end(ap);
- fprintf(stderr, "\n");
- }
-
- if (fp == NULL && !(fp = fdopen(remout, "w")))
- return;
-
- (void) fprintf(fp, "%c", 0x01);
- (void) fprintf(fp, "scp: ");
- va_start(ap, fmt);
- (void) vfprintf(fp, fmt, ap);
- va_end(ap);
- (void) fprintf(fp, "\n");
- (void) fflush(fp);
-
-}
-
-void
-verifydir(cp)
- char *cp;
-{
- struct stat stb;
-
- if (!stat(cp, &stb)) {
- if (S_ISDIR(stb.st_mode))
- return;
- errno = ENOTDIR;
- }
- run_err("%s: %s", cp, strerror(errno));
- exit(1);
-}
-
-int
-okname(cp0)
- char *cp0;
-{
- int c;
- char *cp;
-
- cp = cp0;
- do {
- c = (int)*cp;
- if (c & 0200)
- goto bad;
- if (!isalpha(c) && !isdigit(c)) {
- switch (c) {
- case '\'':
- case '"':
- case '`':
- case ' ':
- case '#':
- goto bad;
- default:
- break;
- }
- }
- } while (*++cp);
- return (1);
-
-bad: fprintf(stderr, gettext("%s: invalid user name\n"), cp0);
- return (0);
-}
-
-BUF *
-allocbuf(bp, fd, blksize)
- BUF *bp;
- int fd, blksize;
-{
- size_t size;
-#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
- struct stat stb;
-
- if (fstat(fd, &stb) < 0) {
- run_err("fstat: %s", strerror(errno));
- return (0);
- }
- if (stb.st_blksize == 0)
- size = blksize;
- else
- size = blksize + (stb.st_blksize - blksize % stb.st_blksize) %
- stb.st_blksize;
-#else /* HAVE_STRUCT_STAT_ST_BLKSIZE */
- size = blksize;
-#endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */
- if (bp->cnt >= size)
- return (bp);
- if (bp->buf == NULL)
- bp->buf = xmalloc(size);
- else
- bp->buf = xrealloc(bp->buf, size);
- memset(bp->buf, 0, size);
- bp->cnt = size;
- return (bp);
-}
-
-void
-lostconn(signo)
- int signo;
-{
- if (!iamremote)
- write(STDERR_FILENO, "lost connection\n", 16);
- if (signo)
- _exit(1);
- else
- exit(1);
-}
-
-static void
-updateprogressmeter(int ignore)
-{
- int save_errno = errno;
-
- progressmeter(0);
- signal(SIGALRM, updateprogressmeter);
- alarm(PROGRESSTIME);
- errno = save_errno;
-}
-
-static int
-foregroundproc(void)
-{
- static pid_t pgrp = -1;
- int ctty_pgrp;
-
- if (pgrp == -1)
- pgrp = getpgrp();
-
-#ifdef HAVE_TCGETPGRP
- return ((ctty_pgrp = tcgetpgrp(STDOUT_FILENO)) != -1 &&
- ctty_pgrp == pgrp);
-#else
- return ((ioctl(STDOUT_FILENO, TIOCGPGRP, &ctty_pgrp) != -1 &&
- ctty_pgrp == pgrp));
-#endif
-}
-
-void
-progressmeter(int flag)
-{
- static const char prefixes[] = " KMGTP";
- static struct timeval lastupdate;
- static off_t lastsize;
- struct timeval now, td, wait;
- off_t cursize, abbrevsize;
- double elapsed;
- int ratio, barlength, i, remaining;
- char buf[512];
-
- if (flag == -1) {
- (void) gettimeofday(&start, (struct timezone *)0);
- lastupdate = start;
- lastsize = 0;
- }
- if (foregroundproc() == 0)
- return;
-
- (void) gettimeofday(&now, (struct timezone *)0);
- cursize = statbytes;
- if (totalbytes != 0) {
- ratio = (int)(100.0 * cursize / totalbytes);
- ratio = MAX(ratio, 0);
- ratio = MIN(ratio, 100);
- } else
- ratio = 100;
-
- snprintf(buf, sizeof (buf), "\r%-20.20s %3d%% ", curfile, ratio);
-
- barlength = getttywidth() - 51;
- if (barlength > 0) {
- i = barlength * ratio / 100;
- snprintf(buf + strlen(buf), sizeof (buf) - strlen(buf),
- "|%.*s%*s|", i,
- "*******************************************************"
- "*******************************************************"
- "*******************************************************"
- "*******************************************************"
- "*******************************************************"
- "*******************************************************"
- "*******************************************************",
- barlength - i, "");
- }
- i = 0;
- abbrevsize = cursize;
- while (abbrevsize >= 100000 && i < strlen(prefixes) - 1) {
- i++;
- abbrevsize >>= 10;
- }
- snprintf(buf + strlen(buf), sizeof (buf) - strlen(buf), " %5lu %c%c ",
- (unsigned long) abbrevsize, prefixes[i],
- prefixes[i] == ' ' ? ' ' : 'B');
-
- timersub(&now, &lastupdate, &wait);
- if (cursize > lastsize) {
- lastupdate = now;
- lastsize = cursize;
- if (wait.tv_sec >= STALLTIME) {
- start.tv_sec += wait.tv_sec;
- start.tv_usec += wait.tv_usec;
- }
- wait.tv_sec = 0;
- }
- timersub(&now, &start, &td);
- elapsed = td.tv_sec + (td.tv_usec / 1000000.0);
-
- if (flag != 1 &&
- (statbytes <= 0 || elapsed <= 0.0 || cursize > totalbytes)) {
- snprintf(buf + strlen(buf), sizeof (buf) - strlen(buf),
- " --:-- ETA");
- } else if (wait.tv_sec >= STALLTIME) {
- snprintf(buf + strlen(buf), sizeof (buf) - strlen(buf),
- " - stalled -");
- } else {
- if (flag != 1)
- remaining = (int)(totalbytes / (statbytes / elapsed) -
- elapsed);
- else
- remaining = (int)elapsed;
-
- i = remaining / 3600;
- if (i)
- snprintf(buf + strlen(buf), sizeof (buf) - strlen(buf),
- "%2d:", i);
- else
- snprintf(buf + strlen(buf), sizeof (buf) - strlen(buf),
- " ");
- i = remaining % 3600;
- snprintf(buf + strlen(buf), sizeof (buf) - strlen(buf),
- "%02d:%02d%s", i / 60, i % 60,
- (flag != 1) ? " ETA" : " ");
- }
- atomicio(write, fileno(stdout), buf, strlen(buf));
-
- if (flag == -1) {
- mysignal(SIGALRM, updateprogressmeter);
- alarm(PROGRESSTIME);
- } else if (flag == 1) {
- alarm(0);
- atomicio(write, fileno(stdout), "\n", 1);
- statbytes = 0;
- }
-}
-
-int
-getttywidth(void)
-{
- struct winsize winsize;
-
- if (ioctl(fileno(stdout), TIOCGWINSZ, &winsize) != -1)
- return (winsize.ws_col ? winsize.ws_col : 80);
- else
- return (80);
-}
diff --git a/usr/src/cmd/ssh/sftp-server/sftp-server-main.c b/usr/src/cmd/ssh/sftp-server/sftp-server-main.c
deleted file mode 100644
index 7b604b7cdc..0000000000
--- a/usr/src/cmd/ssh/sftp-server/sftp-server-main.c
+++ /dev/null
@@ -1,51 +0,0 @@
-/* $OpenBSD: sftp-server-main.c,v 1.4 2009/02/21 19:32:04 tobias Exp $ */
-/*
- * Copyright (c) 2008 Markus Friedl. All rights reserved.
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include "includes.h"
-
-#include <sys/types.h>
-#include <pwd.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <unistd.h>
-
-#include "log.h"
-#include "sftp.h"
-#include "misc.h"
-
-/* defined in sftp-server.c */
-extern struct passwd *pw;
-extern char *client_addr;
-
-void cleanup_exit(int i);
-
-int
-main(int argc, char **argv)
-{
- struct passwd *user_pw;
-
- /* Ensure that fds 0, 1 and 2 are open or directed to /dev/null */
- sanitise_stdfd();
-
- if ((user_pw = getpwuid(getuid())) == NULL) {
- fprintf(stderr, "No user found for uid %lu\n",
- (ulong_t)getuid());
- return (1);
- }
-
- return (sftp_server_main(argc, argv, user_pw));
-}
diff --git a/usr/src/cmd/ssh/sftp-server/sftp-server.c b/usr/src/cmd/ssh/sftp-server/sftp-server.c
deleted file mode 100644
index 030ed79ac2..0000000000
--- a/usr/src/cmd/ssh/sftp-server/sftp-server.c
+++ /dev/null
@@ -1,1364 +0,0 @@
-/*
- * Copyright (c) 2000-2004 Markus Friedl. All rights reserved.
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-/* $OpenBSD: sftp-server.c,v 1.71 2007/01/03 07:22:36 stevesk Exp $ */
-
-#include "includes.h"
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include <sys/stat.h>
-#ifdef HAVE_SYS_TIME_H
-# include <sys/time.h>
-#endif
-
-#include <dirent.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <pwd.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <pwd.h>
-#include <time.h>
-#include <unistd.h>
-#include <stdarg.h>
-
-#include "xmalloc.h"
-#include "buffer.h"
-#include "bufaux.h"
-#include "log.h"
-#include "misc.h"
-#include "uidswap.h"
-
-#include "sftp.h"
-#include "sftp-common.h"
-
-#ifdef HAVE___PROGNAME
-extern char *__progname;
-#else
-char *__progname;
-#endif
-
-/* helper */
-#define get_int64() buffer_get_int64(&iqueue);
-#define get_int() buffer_get_int(&iqueue);
-#define get_string(lenp) buffer_get_string(&iqueue, lenp);
-
-void cleanup_exit(int i);
-
-/* Our verbosity */
-LogLevel log_level = SYSLOG_LEVEL_ERROR;
-
-/* Our client */
-struct passwd *pw = NULL;
-char *client_addr = NULL;
-
-/* input and output queue */
-Buffer iqueue;
-Buffer oqueue;
-
-/* Version of client */
-int version;
-
-/* portable attributes, etc. */
-
-typedef struct Stat Stat;
-
-struct Stat {
- char *name;
- char *long_name;
- Attrib attrib;
-};
-
-static int
-errno_to_portable(int unixerrno)
-{
- int ret = 0;
-
- switch (unixerrno) {
- case 0:
- ret = SSH2_FX_OK;
- break;
- case ENOENT:
- case ENOTDIR:
- case EBADF:
- case ELOOP:
- ret = SSH2_FX_NO_SUCH_FILE;
- break;
- case EPERM:
- case EACCES:
- case EFAULT:
- ret = SSH2_FX_PERMISSION_DENIED;
- break;
- case ENAMETOOLONG:
- case EINVAL:
- ret = SSH2_FX_BAD_MESSAGE;
- break;
- default:
- ret = SSH2_FX_FAILURE;
- break;
- }
- return ret;
-}
-
-static int
-flags_from_portable(int pflags)
-{
- int flags = 0;
-
- if ((pflags & SSH2_FXF_READ) &&
- (pflags & SSH2_FXF_WRITE)) {
- flags = O_RDWR;
- } else if (pflags & SSH2_FXF_READ) {
- flags = O_RDONLY;
- } else if (pflags & SSH2_FXF_WRITE) {
- flags = O_WRONLY;
- }
- if (pflags & SSH2_FXF_CREAT)
- flags |= O_CREAT;
- if (pflags & SSH2_FXF_TRUNC)
- flags |= O_TRUNC;
- if (pflags & SSH2_FXF_EXCL)
- flags |= O_EXCL;
- return flags;
-}
-
-static const char *
-string_from_portable(int pflags)
-{
- static char ret[128];
-
- *ret = '\0';
-
-#define PAPPEND(str) { \
- if (*ret != '\0') \
- strlcat(ret, ",", sizeof(ret)); \
- strlcat(ret, str, sizeof(ret)); \
- }
-
- if (pflags & SSH2_FXF_READ)
- PAPPEND("READ")
- if (pflags & SSH2_FXF_WRITE)
- PAPPEND("WRITE")
- if (pflags & SSH2_FXF_CREAT)
- PAPPEND("CREATE")
- if (pflags & SSH2_FXF_TRUNC)
- PAPPEND("TRUNCATE")
- if (pflags & SSH2_FXF_EXCL)
- PAPPEND("EXCL")
-
- return ret;
-}
-
-static Attrib *
-get_attrib(void)
-{
- return decode_attrib(&iqueue);
-}
-
-/* handle handles */
-
-typedef struct Handle Handle;
-struct Handle {
- int use;
- DIR *dirp;
- int fd;
- char *name;
- u_int64_t bytes_read, bytes_write;
-};
-
-enum {
- HANDLE_UNUSED,
- HANDLE_DIR,
- HANDLE_FILE
-};
-
-Handle handles[100];
-
-static void
-handle_init(void)
-{
- u_int i;
-
- for (i = 0; i < sizeof(handles)/sizeof(Handle); i++)
- handles[i].use = HANDLE_UNUSED;
-}
-
-static int
-handle_new(int use, const char *name, int fd, DIR *dirp)
-{
- u_int i;
-
- for (i = 0; i < sizeof(handles)/sizeof(Handle); i++) {
- if (handles[i].use == HANDLE_UNUSED) {
- handles[i].use = use;
- handles[i].dirp = dirp;
- handles[i].fd = fd;
- handles[i].name = xstrdup(name);
- handles[i].bytes_read = handles[i].bytes_write = 0;
- return i;
- }
- }
- return -1;
-}
-
-static int
-handle_is_ok(int i, int type)
-{
- return i >= 0 && (u_int)i < sizeof(handles)/sizeof(Handle) &&
- handles[i].use == type;
-}
-
-static int
-handle_to_string(int handle, char **stringp, int *hlenp)
-{
- if (stringp == NULL || hlenp == NULL)
- return -1;
- *stringp = xmalloc(sizeof(int32_t));
- put_u32(*stringp, handle);
- *hlenp = sizeof(int32_t);
- return 0;
-}
-
-static int
-handle_from_string(const char *handle, u_int hlen)
-{
- int val;
-
- if (hlen != sizeof(int32_t))
- return -1;
- val = get_u32(handle);
- if (handle_is_ok(val, HANDLE_FILE) ||
- handle_is_ok(val, HANDLE_DIR))
- return val;
- return -1;
-}
-
-static char *
-handle_to_name(int handle)
-{
- if (handle_is_ok(handle, HANDLE_DIR)||
- handle_is_ok(handle, HANDLE_FILE))
- return handles[handle].name;
- return NULL;
-}
-
-static DIR *
-handle_to_dir(int handle)
-{
- if (handle_is_ok(handle, HANDLE_DIR))
- return handles[handle].dirp;
- return NULL;
-}
-
-static int
-handle_to_fd(int handle)
-{
- if (handle_is_ok(handle, HANDLE_FILE))
- return handles[handle].fd;
- return -1;
-}
-
-static void
-handle_update_read(int handle, ssize_t bytes)
-{
- if (handle_is_ok(handle, HANDLE_FILE) && bytes > 0)
- handles[handle].bytes_read += bytes;
-}
-
-static void
-handle_update_write(int handle, ssize_t bytes)
-{
- if (handle_is_ok(handle, HANDLE_FILE) && bytes > 0)
- handles[handle].bytes_write += bytes;
-}
-
-static u_int64_t
-handle_bytes_read(int handle)
-{
- if (handle_is_ok(handle, HANDLE_FILE))
- return (handles[handle].bytes_read);
- return 0;
-}
-
-static u_int64_t
-handle_bytes_write(int handle)
-{
- if (handle_is_ok(handle, HANDLE_FILE))
- return (handles[handle].bytes_write);
- return 0;
-}
-
-static int
-handle_close(int handle)
-{
- int ret = -1;
-
- if (handle_is_ok(handle, HANDLE_FILE)) {
- ret = close(handles[handle].fd);
- handles[handle].use = HANDLE_UNUSED;
- xfree(handles[handle].name);
- } else if (handle_is_ok(handle, HANDLE_DIR)) {
- ret = closedir(handles[handle].dirp);
- handles[handle].use = HANDLE_UNUSED;
- xfree(handles[handle].name);
- } else {
- errno = ENOENT;
- }
- return ret;
-}
-
-static void
-handle_log_close(int handle, char *emsg)
-{
- if (handle_is_ok(handle, HANDLE_FILE)) {
- log("%s%sclose \"%s\" bytes read %llu written %llu",
- emsg == NULL ? "" : emsg, emsg == NULL ? "" : " ",
- handle_to_name(handle),
- (unsigned long long)handle_bytes_read(handle),
- (unsigned long long)handle_bytes_write(handle));
- } else {
- log("%s%sclosedir \"%s\"",
- emsg == NULL ? "" : emsg, emsg == NULL ? "" : " ",
- handle_to_name(handle));
- }
-}
-
-static void
-handle_log_exit(void)
-{
- u_int i;
-
- for (i = 0; i < sizeof(handles)/sizeof(Handle); i++)
- if (handles[i].use != HANDLE_UNUSED)
- handle_log_close(i, "forced");
-}
-
-static int
-get_handle(void)
-{
- char *handle;
- int val = -1;
- u_int hlen;
-
- handle = get_string(&hlen);
- if (hlen < 256)
- val = handle_from_string(handle, hlen);
- xfree(handle);
- return val;
-}
-
-/* send replies */
-
-static void
-send_msg(Buffer *m)
-{
- int mlen = buffer_len(m);
-
- buffer_put_int(&oqueue, mlen);
- buffer_append(&oqueue, buffer_ptr(m), mlen);
- buffer_consume(m, mlen);
-}
-
-static const char *
-status_to_message(u_int32_t status)
-{
- const char *status_messages[] = {
- "Success", /* SSH_FX_OK */
- "End of file", /* SSH_FX_EOF */
- "No such file", /* SSH_FX_NO_SUCH_FILE */
- "Permission denied", /* SSH_FX_PERMISSION_DENIED */
- "Failure", /* SSH_FX_FAILURE */
- "Bad message", /* SSH_FX_BAD_MESSAGE */
- "No connection", /* SSH_FX_NO_CONNECTION */
- "Connection lost", /* SSH_FX_CONNECTION_LOST */
- "Operation unsupported", /* SSH_FX_OP_UNSUPPORTED */
- "Unknown error" /* Others */
- };
- return (status_messages[MIN(status,SSH2_FX_MAX)]);
-}
-
-static void
-send_status(u_int32_t id, u_int32_t status)
-{
- Buffer msg;
-
- debug3("request %u: sent status %u", id, status);
- if (log_level > SYSLOG_LEVEL_VERBOSE ||
- (status != SSH2_FX_OK && status != SSH2_FX_EOF))
- log("sent status %s", status_to_message(status));
- buffer_init(&msg);
- buffer_put_char(&msg, SSH2_FXP_STATUS);
- buffer_put_int(&msg, id);
- buffer_put_int(&msg, status);
- if (version >= 3) {
- buffer_put_cstring(&msg, status_to_message(status));
- buffer_put_cstring(&msg, "");
- }
- send_msg(&msg);
- buffer_free(&msg);
-}
-static void
-send_data_or_handle(char type, u_int32_t id, const char *data, int dlen)
-{
- Buffer msg;
-
- buffer_init(&msg);
- buffer_put_char(&msg, type);
- buffer_put_int(&msg, id);
- buffer_put_string(&msg, data, dlen);
- send_msg(&msg);
- buffer_free(&msg);
-}
-
-static void
-send_data(u_int32_t id, const char *data, int dlen)
-{
- debug("request %u: sent data len %d", id, dlen);
- send_data_or_handle(SSH2_FXP_DATA, id, data, dlen);
-}
-
-static void
-send_handle(u_int32_t id, int handle)
-{
- char *string;
- int hlen;
-
- handle_to_string(handle, &string, &hlen);
- debug("request %u: sent handle handle %d", id, handle);
- send_data_or_handle(SSH2_FXP_HANDLE, id, string, hlen);
- xfree(string);
-}
-
-static void
-send_names(u_int32_t id, int count, const Stat *stats)
-{
- Buffer msg;
- int i;
-
- buffer_init(&msg);
- buffer_put_char(&msg, SSH2_FXP_NAME);
- buffer_put_int(&msg, id);
- buffer_put_int(&msg, count);
- debug("request %u: sent names count %d", id, count);
- for (i = 0; i < count; i++) {
- buffer_put_cstring(&msg, stats[i].name);
- buffer_put_cstring(&msg, stats[i].long_name);
- encode_attrib(&msg, &stats[i].attrib);
- }
- send_msg(&msg);
- buffer_free(&msg);
-}
-
-static void
-send_attrib(u_int32_t id, const Attrib *a)
-{
- Buffer msg;
-
- debug("request %u: sent attrib have 0x%x", id, a->flags);
- buffer_init(&msg);
- buffer_put_char(&msg, SSH2_FXP_ATTRS);
- buffer_put_int(&msg, id);
- encode_attrib(&msg, a);
- send_msg(&msg);
- buffer_free(&msg);
-}
-
-/* parse incoming */
-
-static void
-process_init(void)
-{
- Buffer msg;
-
- version = get_int();
- verbose("received client version %d", version);
- buffer_init(&msg);
- buffer_put_char(&msg, SSH2_FXP_VERSION);
- buffer_put_int(&msg, SSH2_FILEXFER_VERSION);
- send_msg(&msg);
- buffer_free(&msg);
-}
-
-static void
-process_open(void)
-{
- u_int32_t id, pflags;
- Attrib *a;
- char *name;
- int handle, fd, flags, mode, status = SSH2_FX_FAILURE;
-
- id = get_int();
- name = get_string(NULL);
- pflags = get_int(); /* portable flags */
- debug3("request %u: open flags %d", id, pflags);
- a = get_attrib();
- flags = flags_from_portable(pflags);
- mode = (a->flags & SSH2_FILEXFER_ATTR_PERMISSIONS) ? a->perm : 0666;
- log("open \"%s\" flags %s mode 0%o",
- name, string_from_portable(pflags), mode);
- fd = open(name, flags, mode);
- if (fd < 0) {
- status = errno_to_portable(errno);
- } else {
- handle = handle_new(HANDLE_FILE, name, fd, NULL);
- if (handle < 0) {
- close(fd);
- } else {
- send_handle(id, handle);
- status = SSH2_FX_OK;
- }
- }
- if (status != SSH2_FX_OK)
- send_status(id, status);
- xfree(name);
-}
-
-static void
-process_close(void)
-{
- u_int32_t id;
- int handle, ret, status = SSH2_FX_FAILURE;
-
- id = get_int();
- handle = get_handle();
- debug3("request %u: close handle %u", id, handle);
- handle_log_close(handle, NULL);
- ret = handle_close(handle);
- status = (ret == -1) ? errno_to_portable(errno) : SSH2_FX_OK;
- send_status(id, status);
-}
-
-static void
-process_read(void)
-{
- char buf[64*1024];
- u_int32_t id, len;
- int handle, fd, ret, status = SSH2_FX_FAILURE;
- u_int64_t off;
-
- id = get_int();
- handle = get_handle();
- off = get_int64();
- len = get_int();
-
- debug("request %u: read \"%s\" (handle %d) off %llu len %d",
- id, handle_to_name(handle), handle, (unsigned long long)off, len);
- if (len > sizeof buf) {
- len = sizeof buf;
- debug2("read change len %d", len);
- }
- fd = handle_to_fd(handle);
- if (fd >= 0) {
- if (lseek(fd, off, SEEK_SET) < 0) {
- error("process_read: seek failed");
- status = errno_to_portable(errno);
- } else {
- ret = read(fd, buf, len);
- if (ret < 0) {
- status = errno_to_portable(errno);
- } else if (ret == 0) {
- status = SSH2_FX_EOF;
- } else {
- send_data(id, buf, ret);
- status = SSH2_FX_OK;
- handle_update_read(handle, ret);
- }
- }
- }
- if (status != SSH2_FX_OK)
- send_status(id, status);
-}
-
-static void
-process_write(void)
-{
- u_int32_t id;
- u_int64_t off;
- u_int len;
- int handle, fd, ret, status = SSH2_FX_FAILURE;
- char *data;
-
- id = get_int();
- handle = get_handle();
- off = get_int64();
- data = get_string(&len);
-
- debug("request %u: write \"%s\" (handle %d) off %llu len %d",
- id, handle_to_name(handle), handle, (unsigned long long)off, len);
- fd = handle_to_fd(handle);
- if (fd >= 0) {
- if (lseek(fd, off, SEEK_SET) < 0) {
- status = errno_to_portable(errno);
- error("process_write: seek failed");
- } else {
-/* XXX ATOMICIO ? */
- ret = write(fd, data, len);
- if (ret < 0) {
- error("process_write: write failed");
- status = errno_to_portable(errno);
- } else if ((size_t)ret == len) {
- status = SSH2_FX_OK;
- handle_update_write(handle, ret);
- } else {
- debug2("nothing at all written");
- }
- }
- }
- send_status(id, status);
- xfree(data);
-}
-
-static void
-process_do_stat(int do_lstat)
-{
- Attrib a;
- struct stat st;
- u_int32_t id;
- char *name;
- int ret, status = SSH2_FX_FAILURE;
-
- id = get_int();
- name = get_string(NULL);
- debug3("request %u: %sstat", id, do_lstat ? "l" : "");
- verbose("%sstat name \"%s\"", do_lstat ? "l" : "", name);
- ret = do_lstat ? lstat(name, &st) : stat(name, &st);
- if (ret < 0) {
- status = errno_to_portable(errno);
- } else {
- stat_to_attrib(&st, &a);
- send_attrib(id, &a);
- status = SSH2_FX_OK;
- }
- if (status != SSH2_FX_OK)
- send_status(id, status);
- xfree(name);
-}
-
-static void
-process_stat(void)
-{
- process_do_stat(0);
-}
-
-static void
-process_lstat(void)
-{
- process_do_stat(1);
-}
-
-static void
-process_fstat(void)
-{
- Attrib a;
- struct stat st;
- u_int32_t id;
- int fd, ret, handle, status = SSH2_FX_FAILURE;
-
- id = get_int();
- handle = get_handle();
- debug("request %u: fstat \"%s\" (handle %u)",
- id, handle_to_name(handle), handle);
- fd = handle_to_fd(handle);
- if (fd >= 0) {
- ret = fstat(fd, &st);
- if (ret < 0) {
- status = errno_to_portable(errno);
- } else {
- stat_to_attrib(&st, &a);
- send_attrib(id, &a);
- status = SSH2_FX_OK;
- }
- }
- if (status != SSH2_FX_OK)
- send_status(id, status);
-}
-
-static struct timeval *
-attrib_to_tv(const Attrib *a)
-{
- static struct timeval tv[2];
-
- tv[0].tv_sec = a->atime;
- tv[0].tv_usec = 0;
- tv[1].tv_sec = a->mtime;
- tv[1].tv_usec = 0;
- return tv;
-}
-
-static void
-process_setstat(void)
-{
- Attrib *a;
- u_int32_t id;
- char *name;
- int status = SSH2_FX_OK, ret;
-
- id = get_int();
- name = get_string(NULL);
- a = get_attrib();
- debug("request %u: setstat name \"%s\"", id, name);
- if (a->flags & SSH2_FILEXFER_ATTR_SIZE) {
- log("set \"%s\" size %llu",
- name, (unsigned long long)a->size);
- ret = truncate(name, a->size);
- if (ret == -1)
- status = errno_to_portable(errno);
- }
- if (a->flags & SSH2_FILEXFER_ATTR_PERMISSIONS) {
- log("set \"%s\" mode %04o", name, a->perm);
- ret = chmod(name, a->perm & 07777);
- if (ret == -1)
- status = errno_to_portable(errno);
- }
- if (a->flags & SSH2_FILEXFER_ATTR_ACMODTIME) {
- char buf[64];
- time_t t = a->mtime;
-
- strftime(buf, sizeof(buf), "%Y" "%m%d-%H:%M:%S",
- localtime(&t));
- log("set \"%s\" modtime %s", name, buf);
- ret = utimes(name, attrib_to_tv(a));
- if (ret == -1)
- status = errno_to_portable(errno);
- }
- if (a->flags & SSH2_FILEXFER_ATTR_UIDGID) {
- log("set \"%s\" owner %lu group %lu", name,
- (u_long)a->uid, (u_long)a->gid);
- ret = chown(name, a->uid, a->gid);
- if (ret == -1)
- status = errno_to_portable(errno);
- }
- send_status(id, status);
- xfree(name);
-}
-
-static void
-process_fsetstat(void)
-{
- Attrib *a;
- u_int32_t id;
- int handle, fd, ret;
- int status = SSH2_FX_OK;
-
- id = get_int();
- handle = get_handle();
- a = get_attrib();
- debug("request %u: fsetstat handle %d", id, handle);
- fd = handle_to_fd(handle);
- if (fd < 0) {
- status = SSH2_FX_FAILURE;
- } else {
- char *name = handle_to_name(handle);
-
- if (a->flags & SSH2_FILEXFER_ATTR_SIZE) {
- log("set \"%s\" size %llu",
- name, (unsigned long long)a->size);
- ret = ftruncate(fd, a->size);
- if (ret == -1)
- status = errno_to_portable(errno);
- }
- if (a->flags & SSH2_FILEXFER_ATTR_PERMISSIONS) {
- log("set \"%s\" mode %04o", name, a->perm);
-#ifdef HAVE_FCHMOD
- ret = fchmod(fd, a->perm & 07777);
-#else
- ret = chmod(name, a->perm & 07777);
-#endif
- if (ret == -1)
- status = errno_to_portable(errno);
- }
- if (a->flags & SSH2_FILEXFER_ATTR_ACMODTIME) {
- char buf[64];
- time_t t = a->mtime;
-
- strftime(buf, sizeof(buf), "%Y" "%m%d-%H:%M:%S",
- localtime(&t));
- log("set \"%s\" modtime %s", name, buf);
-#ifdef HAVE_FUTIMES
- ret = futimes(fd, attrib_to_tv(a));
-#else
- ret = utimes(name, attrib_to_tv(a));
-#endif
- if (ret == -1)
- status = errno_to_portable(errno);
- }
- if (a->flags & SSH2_FILEXFER_ATTR_UIDGID) {
- log("set \"%s\" owner %lu group %lu", name,
- (u_long)a->uid, (u_long)a->gid);
-#ifdef HAVE_FCHOWN
- ret = fchown(fd, a->uid, a->gid);
-#else
- ret = chown(name, a->uid, a->gid);
-#endif
- if (ret == -1)
- status = errno_to_portable(errno);
- }
- }
- send_status(id, status);
-}
-
-static void
-process_opendir(void)
-{
- DIR *dirp = NULL;
- char *path;
- int handle, status = SSH2_FX_FAILURE;
- u_int32_t id;
-
- id = get_int();
- path = get_string(NULL);
- debug3("request %u: opendir", id);
- log("opendir \"%s\"", path);
- dirp = opendir(path);
- if (dirp == NULL) {
- status = errno_to_portable(errno);
- } else {
- handle = handle_new(HANDLE_DIR, path, 0, dirp);
- if (handle < 0) {
- closedir(dirp);
- } else {
- send_handle(id, handle);
- status = SSH2_FX_OK;
- }
-
- }
- if (status != SSH2_FX_OK)
- send_status(id, status);
- xfree(path);
-}
-
-static void
-process_readdir(void)
-{
- DIR *dirp;
- struct dirent *dp;
- char *path;
- int handle;
- u_int32_t id;
-
- id = get_int();
- handle = get_handle();
- debug("request %u: readdir \"%s\" (handle %d)", id,
- handle_to_name(handle), handle);
- dirp = handle_to_dir(handle);
- path = handle_to_name(handle);
- if (dirp == NULL || path == NULL) {
- send_status(id, SSH2_FX_FAILURE);
- } else {
- struct stat st;
- char pathname[MAXPATHLEN];
- Stat *stats;
- int nstats = 10, count = 0, i;
-
- stats = xcalloc(nstats, sizeof(Stat));
- while ((dp = readdir(dirp)) != NULL) {
- if (count >= nstats) {
- nstats *= 2;
- stats = xrealloc(stats, nstats * sizeof(Stat));
- }
-/* XXX OVERFLOW ? */
- snprintf(pathname, sizeof pathname, "%s%s%s", path,
- strcmp(path, "/") ? "/" : "", dp->d_name);
- if (lstat(pathname, &st) < 0)
- continue;
- stat_to_attrib(&st, &(stats[count].attrib));
- stats[count].name = xstrdup(dp->d_name);
- stats[count].long_name = ls_file(dp->d_name, &st, 0);
- count++;
- /* send up to 100 entries in one message */
- /* XXX check packet size instead */
- if (count == 100)
- break;
- }
- if (count > 0) {
- send_names(id, count, stats);
- for (i = 0; i < count; i++) {
- xfree(stats[i].name);
- xfree(stats[i].long_name);
- }
- } else {
- send_status(id, SSH2_FX_EOF);
- }
- xfree(stats);
- }
-}
-
-static void
-process_remove(void)
-{
- char *name;
- u_int32_t id;
- int status = SSH2_FX_FAILURE;
- int ret;
-
- id = get_int();
- name = get_string(NULL);
- debug3("request %u: remove", id);
- log("remove name \"%s\"", name);
- ret = unlink(name);
- status = (ret == -1) ? errno_to_portable(errno) : SSH2_FX_OK;
- send_status(id, status);
- xfree(name);
-}
-
-static void
-process_mkdir(void)
-{
- Attrib *a;
- u_int32_t id;
- char *name;
- int ret, mode, status = SSH2_FX_FAILURE;
-
- id = get_int();
- name = get_string(NULL);
- a = get_attrib();
- mode = (a->flags & SSH2_FILEXFER_ATTR_PERMISSIONS) ?
- a->perm & 0777 : 0777;
- debug3("request %u: mkdir", id);
- log("mkdir name \"%s\" mode 0%o", name, mode);
- ret = mkdir(name, mode);
- status = (ret == -1) ? errno_to_portable(errno) : SSH2_FX_OK;
- send_status(id, status);
- xfree(name);
-}
-
-static void
-process_rmdir(void)
-{
- u_int32_t id;
- char *name;
- int ret, status;
-
- id = get_int();
- name = get_string(NULL);
- debug3("request %u: rmdir", id);
- log("rmdir name \"%s\"", name);
- ret = rmdir(name);
- status = (ret == -1) ? errno_to_portable(errno) : SSH2_FX_OK;
- send_status(id, status);
- xfree(name);
-}
-
-static void
-process_realpath(void)
-{
- char resolvedname[MAXPATHLEN];
- u_int32_t id;
- char *path;
-
- id = get_int();
- path = get_string(NULL);
- if (path[0] == '\0') {
- xfree(path);
- path = xstrdup(".");
- }
- debug3("request %u: realpath", id);
- verbose("realpath \"%s\"", path);
- if (realpath(path, resolvedname) == NULL) {
- send_status(id, errno_to_portable(errno));
- } else {
- Stat s;
- attrib_clear(&s.attrib);
- s.name = s.long_name = resolvedname;
- send_names(id, 1, &s);
- }
- xfree(path);
-}
-
-static void
-process_rename(void)
-{
- u_int32_t id;
- char *oldpath, *newpath;
- int status;
- struct stat sb;
-
- id = get_int();
- oldpath = get_string(NULL);
- newpath = get_string(NULL);
- debug3("request %u: rename", id);
- log("rename old \"%s\" new \"%s\"", oldpath, newpath);
- status = SSH2_FX_FAILURE;
- if (lstat(oldpath, &sb) == -1)
- status = errno_to_portable(errno);
- else if (S_ISREG(sb.st_mode)) {
- /* Race-free rename of regular files */
- if (link(oldpath, newpath) == -1) {
- if (errno == EOPNOTSUPP
-#ifdef LINK_OPNOTSUPP_ERRNO
- || errno == LINK_OPNOTSUPP_ERRNO
-#endif
- ) {
- struct stat st;
-
- /*
- * fs doesn't support links, so fall back to
- * stat+rename. This is racy.
- */
- if (stat(newpath, &st) == -1) {
- if (rename(oldpath, newpath) == -1)
- status =
- errno_to_portable(errno);
- else
- status = SSH2_FX_OK;
- }
- } else {
- status = errno_to_portable(errno);
- }
- } else if (unlink(oldpath) == -1) {
- status = errno_to_portable(errno);
- /* clean spare link */
- unlink(newpath);
- } else
- status = SSH2_FX_OK;
- } else if (stat(newpath, &sb) == -1) {
- if (rename(oldpath, newpath) == -1)
- status = errno_to_portable(errno);
- else
- status = SSH2_FX_OK;
- }
- send_status(id, status);
- xfree(oldpath);
- xfree(newpath);
-}
-
-static void
-process_readlink(void)
-{
- u_int32_t id;
- int len;
- char buf[MAXPATHLEN];
- char *path;
-
- id = get_int();
- path = get_string(NULL);
- debug3("request %u: readlink", id);
- verbose("readlink \"%s\"", path);
- if ((len = readlink(path, buf, sizeof(buf) - 1)) == -1)
- send_status(id, errno_to_portable(errno));
- else {
- Stat s;
-
- buf[len] = '\0';
- attrib_clear(&s.attrib);
- s.name = s.long_name = buf;
- send_names(id, 1, &s);
- }
- xfree(path);
-}
-
-static void
-process_symlink(void)
-{
- u_int32_t id;
- char *oldpath, *newpath;
- int ret, status;
-
- id = get_int();
- oldpath = get_string(NULL);
- newpath = get_string(NULL);
- debug3("request %u: symlink", id);
- log("symlink old \"%s\" new \"%s\"", oldpath, newpath);
- /* this will fail if 'newpath' exists */
- ret = symlink(oldpath, newpath);
- status = (ret == -1) ? errno_to_portable(errno) : SSH2_FX_OK;
- send_status(id, status);
- xfree(oldpath);
- xfree(newpath);
-}
-
-static void
-process_extended(void)
-{
- u_int32_t id;
- char *request;
-
- id = get_int();
- request = get_string(NULL);
- send_status(id, SSH2_FX_OP_UNSUPPORTED); /* MUST */
- xfree(request);
-}
-
-/* stolen from ssh-agent */
-
-static void
-process(void)
-{
- u_int msg_len;
- u_int buf_len;
- u_int consumed;
- u_int type;
- u_char *cp;
-
- buf_len = buffer_len(&iqueue);
- if (buf_len < 5)
- return; /* Incomplete message. */
- cp = buffer_ptr(&iqueue);
- msg_len = get_u32(cp);
- if (msg_len > SFTP_MAX_MSG_LENGTH) {
- error("bad message from %s local user %s",
- client_addr, pw->pw_name);
- cleanup_exit(11);
- }
- if (buf_len < msg_len + 4)
- return;
- buffer_consume(&iqueue, 4);
- buf_len -= 4;
- type = buffer_get_char(&iqueue);
- switch (type) {
- case SSH2_FXP_INIT:
- process_init();
- break;
- case SSH2_FXP_OPEN:
- process_open();
- break;
- case SSH2_FXP_CLOSE:
- process_close();
- break;
- case SSH2_FXP_READ:
- process_read();
- break;
- case SSH2_FXP_WRITE:
- process_write();
- break;
- case SSH2_FXP_LSTAT:
- process_lstat();
- break;
- case SSH2_FXP_FSTAT:
- process_fstat();
- break;
- case SSH2_FXP_SETSTAT:
- process_setstat();
- break;
- case SSH2_FXP_FSETSTAT:
- process_fsetstat();
- break;
- case SSH2_FXP_OPENDIR:
- process_opendir();
- break;
- case SSH2_FXP_READDIR:
- process_readdir();
- break;
- case SSH2_FXP_REMOVE:
- process_remove();
- break;
- case SSH2_FXP_MKDIR:
- process_mkdir();
- break;
- case SSH2_FXP_RMDIR:
- process_rmdir();
- break;
- case SSH2_FXP_REALPATH:
- process_realpath();
- break;
- case SSH2_FXP_STAT:
- process_stat();
- break;
- case SSH2_FXP_RENAME:
- process_rename();
- break;
- case SSH2_FXP_READLINK:
- process_readlink();
- break;
- case SSH2_FXP_SYMLINK:
- process_symlink();
- break;
- case SSH2_FXP_EXTENDED:
- process_extended();
- break;
- default:
- error("Unknown message %d", type);
- break;
- }
- /* discard the remaining bytes from the current packet */
- if (buf_len < buffer_len(&iqueue))
- fatal("iqueue grew unexpectedly");
- consumed = buf_len - buffer_len(&iqueue);
- if (msg_len < consumed)
- fatal("msg_len %d < consumed %d", msg_len, consumed);
- if (msg_len > consumed)
- buffer_consume(&iqueue, msg_len - consumed);
-}
-
-/*
- * Cleanup handler that logs active handles upon normal exit. Not static since
- * sftp-server-main.c file needs that as well.
- */
-void
-cleanup_exit(int i)
-{
- if (pw != NULL && client_addr != NULL) {
- handle_log_exit();
- log("session closed for local user %s from [%s]",
- pw->pw_name, client_addr);
- }
- _exit(i);
-}
-
-static void
-usage(void)
-{
- fprintf(stderr,
- "Usage: %s [-he] [-l log_level] [-f log_facility]\n", __progname);
- exit(1);
-}
-
-int
-sftp_server_main(int argc, char **argv, struct passwd *user_pw)
-{
- fd_set *rset, *wset;
- int in, out, max, ch, skipargs = 0, log_stderr = 0;
- ssize_t len, olen, set_size;
- SyslogFacility log_facility = SYSLOG_FACILITY_AUTH;
- char *cp, buf[4*4096];
-
- extern char *optarg;
-
- __progname = get_progname(argv[0]);
-
- (void) g11n_setlocale(LC_ALL, "");
-
- log_init(__progname, log_level, log_facility, log_stderr);
-
- while (!skipargs && (ch = getopt(argc, argv, "C:f:l:che")) != -1) {
- switch (ch) {
- case 'c':
- /*
- * Ignore all arguments if we are invoked as a
- * shell using "sftp-server -c command"
- */
- skipargs = 1;
- break;
- case 'e':
- log_stderr = 1;
- break;
- case 'l':
- log_level = log_level_number(optarg);
- if (log_level == SYSLOG_LEVEL_NOT_SET)
- error("Invalid log level \"%s\"", optarg);
- break;
- case 'f':
- log_facility = log_facility_number(optarg);
- if (log_facility == SYSLOG_FACILITY_NOT_SET)
- error("Invalid log facility \"%s\"", optarg);
- break;
- case 'h':
- default:
- usage();
- }
- }
-
- log_init(__progname, log_level, log_facility, log_stderr);
-
- if ((cp = getenv("SSH_CONNECTION")) != NULL) {
- client_addr = xstrdup(cp);
- if ((cp = strchr(client_addr, ' ')) == NULL)
- fatal("Malformed SSH_CONNECTION variable: \"%s\"",
- getenv("SSH_CONNECTION"));
- *cp = '\0';
- } else
- client_addr = xstrdup("UNKNOWN");
-
- pw = pwcopy(user_pw);
-
- log("session opened for local user %s from [%s]",
- pw->pw_name, client_addr);
-
- handle_init();
-
- in = dup(STDIN_FILENO);
- out = dup(STDOUT_FILENO);
-
-#ifdef HAVE_CYGWIN
- setmode(in, O_BINARY);
- setmode(out, O_BINARY);
-#endif
-
- max = 0;
- if (in > max)
- max = in;
- if (out > max)
- max = out;
-
- buffer_init(&iqueue);
- buffer_init(&oqueue);
-
- set_size = howmany(max + 1, NFDBITS) * sizeof(fd_mask);
- rset = (fd_set *)xmalloc(set_size);
- wset = (fd_set *)xmalloc(set_size);
-
- for (;;) {
- memset(rset, 0, set_size);
- memset(wset, 0, set_size);
-
- /*
- * Ensure that we can read a full buffer and handle
- * the worst-case length packet it can generate,
- * otherwise apply backpressure by stopping reads.
- */
- if (buffer_check_alloc(&iqueue, sizeof(buf)) &&
- buffer_check_alloc(&oqueue, SFTP_MAX_MSG_LENGTH))
- FD_SET(in, rset);
-
- olen = buffer_len(&oqueue);
- if (olen > 0)
- FD_SET(out, wset);
-
- if (select(max+1, rset, wset, NULL, NULL) < 0) {
- if (errno == EINTR)
- continue;
- error("select: %s", strerror(errno));
- cleanup_exit(2);
- }
-
- /* copy stdin to iqueue */
- if (FD_ISSET(in, rset)) {
- len = read(in, buf, sizeof buf);
- if (len == 0) {
- debug("read eof");
- cleanup_exit(0);
- } else if (len < 0) {
- error("read: %s", strerror(errno));
- cleanup_exit(1);
- } else {
- buffer_append(&iqueue, buf, len);
- }
- }
- /* send oqueue to stdout */
- if (FD_ISSET(out, wset)) {
- len = write(out, buffer_ptr(&oqueue), olen);
- if (len < 0) {
- error("write: %s", strerror(errno));
- cleanup_exit(1);
- } else {
- buffer_consume(&oqueue, len);
- }
- }
-
- /*
- * Process requests from client if we can fit the results
- * into the output buffer, otherwise stop processing input
- * and let the output queue drain.
- */
- if (buffer_check_alloc(&oqueue, SFTP_MAX_MSG_LENGTH))
- process();
- }
-
- /* NOTREACHED */
- return (0);
-}
diff --git a/usr/src/cmd/ssh/sftp/Makefile b/usr/src/cmd/ssh/sftp/Makefile
deleted file mode 100644
index 1348152977..0000000000
--- a/usr/src/cmd/ssh/sftp/Makefile
+++ /dev/null
@@ -1,68 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
-#
-# cmd/ssh/sftp/Makefile
-
-PROG = sftp
-
-OBJS = \
- sftp.o \
- sftp-client.o \
- sftp-glob.o
-
-SRCS = $(OBJS:.o=.c)
-
-include ../../Makefile.cmd
-include ../Makefile.ssh-common
-
-DIRS= $(ROOTLIBSUNSSH)
-
-LDLIBS += $(SSH_COMMON_LDLIBS) -lsocket -lcrypto -ltecla
-
-POFILE_DIR = ..
-
-.KEEP_STATE:
-
-.PARALLEL: $(OBJS)
-
-all: $(PROG)
-
-$(PROG): $(OBJS) ../libssh/$(MACH)/libssh.a ../libopenbsd-compat/$(MACH)/libopenbsd-compat.a
- $(LINK.c) $(OBJS) -o $@ $(LDLIBS) $(DYNFLAGS)
- $(POST_PROCESS)
-
-$(DIRS):
- $(INS.dir)
-
-$(ROOTBIN)/sftp: $(ROOTLIBSUNSSH)/sftp
- -$(RM) $@; $(SYMLINK) ../lib/sunssh/sftp $@
-
-install: all $(DIRS) $(ROOTPROG)
-
-clean:
- $(RM) -f $(OBJS) $(PROG)
-
-lint: lint_SRCS
-
-include ../Makefile.msg.targ
-include ../../Makefile.targ
diff --git a/usr/src/cmd/ssh/sftp/sftp-client.c b/usr/src/cmd/ssh/sftp/sftp-client.c
deleted file mode 100644
index f194ccaf2b..0000000000
--- a/usr/src/cmd/ssh/sftp/sftp-client.c
+++ /dev/null
@@ -1,1184 +0,0 @@
-/*
- * Copyright (c) 2001-2004 Damien Miller <djm@openbsd.org>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-/* $OpenBSD: sftp-client.c,v 1.76 2007/01/22 11:32:50 djm Exp $ */
-
-/* XXX: memleaks */
-/* XXX: signed vs unsigned */
-/* XXX: remove all logging, only return status codes */
-/* XXX: copy between two remote sites */
-
-#include "includes.h"
-
-#include <sys/types.h>
-#include <sys/param.h>
-#include "sys-queue.h"
-#ifdef HAVE_SYS_STAT_H
-# include <sys/stat.h>
-#endif
-#ifdef HAVE_SYS_TIME_H
-# include <sys/time.h>
-#endif
-#include <sys/uio.h>
-
-#include <errno.h>
-#include <fcntl.h>
-#include <signal.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-
-#include "xmalloc.h"
-#include "buffer.h"
-#include "bufaux.h"
-#include "log.h"
-#include "atomicio.h"
-#include "progressmeter.h"
-#include "misc.h"
-
-#include "sftp.h"
-#include "sftp-common.h"
-#include "sftp-client.h"
-
-extern volatile sig_atomic_t interrupted;
-extern int showprogress;
-
-/* Minimum amount of data to read at a time */
-#define MIN_READ_SIZE 512
-
-struct sftp_conn {
- int fd_in;
- int fd_out;
- u_int transfer_buflen;
- u_int num_requests;
- u_int version;
- u_int msg_id;
-};
-
-static void
-send_msg(int fd, Buffer *m)
-{
- char mlen[4];
- struct iovec iov[2];
-
- if (buffer_len(m) > SFTP_MAX_MSG_LENGTH)
- fatal("Outbound message too long %u", buffer_len(m));
-
- /* Send length first */
- put_u32(mlen, buffer_len(m));
- iov[0].iov_base = mlen;
- iov[0].iov_len = sizeof(mlen);
- iov[1].iov_base = buffer_ptr(m);
- iov[1].iov_len = buffer_len(m);
-
- if (atomiciov(writev, fd, iov, 2) != buffer_len(m) + sizeof(mlen))
- fatal("Couldn't send packet: %s", strerror(errno));
-
- buffer_clear(m);
-}
-
-static void
-get_msg(int fd, Buffer *m)
-{
- u_int msg_len;
-
- buffer_append_space(m, 4);
- if (atomicio(read, fd, buffer_ptr(m), 4) != 4) {
- if (errno == EPIPE)
- fatal("Connection closed");
- else
- fatal("Couldn't read packet: %s", strerror(errno));
- }
-
- msg_len = buffer_get_int(m);
- if (msg_len > SFTP_MAX_MSG_LENGTH)
- fatal("Received message too long %u", msg_len);
-
- buffer_append_space(m, msg_len);
- if (atomicio(read, fd, buffer_ptr(m), msg_len) != msg_len) {
- if (errno == EPIPE)
- fatal("Connection closed");
- else
- fatal("Read packet: %s", strerror(errno));
- }
-}
-
-static void
-send_string_request(int fd, u_int id, u_int code, char *s,
- u_int len)
-{
- Buffer msg;
-
- buffer_init(&msg);
- buffer_put_char(&msg, code);
- buffer_put_int(&msg, id);
- buffer_put_string(&msg, s, len);
- send_msg(fd, &msg);
- debug3("Sent message fd %d T:%u I:%u", fd, code, id);
- buffer_free(&msg);
-}
-
-static void
-send_string_attrs_request(int fd, u_int id, u_int code, char *s,
- u_int len, Attrib *a)
-{
- Buffer msg;
-
- buffer_init(&msg);
- buffer_put_char(&msg, code);
- buffer_put_int(&msg, id);
- buffer_put_string(&msg, s, len);
- encode_attrib(&msg, a);
- send_msg(fd, &msg);
- debug3("Sent message fd %d T:%u I:%u", fd, code, id);
- buffer_free(&msg);
-}
-
-static u_int
-get_status(int fd, u_int expected_id)
-{
- Buffer msg;
- u_int type, id, status;
-
- buffer_init(&msg);
- get_msg(fd, &msg);
- type = buffer_get_char(&msg);
- id = buffer_get_int(&msg);
-
- if (id != expected_id)
- fatal("ID mismatch (%u != %u)", id, expected_id);
- if (type != SSH2_FXP_STATUS)
- fatal("Expected SSH2_FXP_STATUS(%u) packet, got %u",
- SSH2_FXP_STATUS, type);
-
- status = buffer_get_int(&msg);
- buffer_free(&msg);
-
- debug3("SSH2_FXP_STATUS %u", status);
-
- return(status);
-}
-
-static char *
-get_handle(int fd, u_int expected_id, u_int *len)
-{
- Buffer msg;
- u_int type, id;
- char *handle;
-
- buffer_init(&msg);
- get_msg(fd, &msg);
- type = buffer_get_char(&msg);
- id = buffer_get_int(&msg);
-
- if (id != expected_id)
- fatal("ID mismatch (%u != %u)", id, expected_id);
- if (type == SSH2_FXP_STATUS) {
- int status = buffer_get_int(&msg);
-
- error("Couldn't get handle: %s", fx2txt(status));
- buffer_free(&msg);
- return(NULL);
- } else if (type != SSH2_FXP_HANDLE)
- fatal("Expected SSH2_FXP_HANDLE(%u) packet, got %u",
- SSH2_FXP_HANDLE, type);
-
- handle = buffer_get_string(&msg, len);
- buffer_free(&msg);
-
- return(handle);
-}
-
-static Attrib *
-get_decode_stat(int fd, u_int expected_id, int quiet)
-{
- Buffer msg;
- u_int type, id;
- Attrib *a;
-
- buffer_init(&msg);
- get_msg(fd, &msg);
-
- type = buffer_get_char(&msg);
- id = buffer_get_int(&msg);
-
- debug3("Received stat reply T:%u I:%u", type, id);
- if (id != expected_id)
- fatal("ID mismatch (%u != %u)", id, expected_id);
- if (type == SSH2_FXP_STATUS) {
- int status = buffer_get_int(&msg);
-
- if (quiet)
- debug("Couldn't stat remote file: %s", fx2txt(status));
- else
- error("Couldn't stat remote file: %s", fx2txt(status));
- buffer_free(&msg);
- return(NULL);
- } else if (type != SSH2_FXP_ATTRS) {
- fatal("Expected SSH2_FXP_ATTRS(%u) packet, got %u",
- SSH2_FXP_ATTRS, type);
- }
- a = decode_attrib(&msg);
- buffer_free(&msg);
-
- return(a);
-}
-
-struct sftp_conn *
-do_init(int fd_in, int fd_out, u_int transfer_buflen, u_int num_requests)
-{
- u_int type;
- int version;
- Buffer msg;
- struct sftp_conn *ret;
-
- buffer_init(&msg);
- buffer_put_char(&msg, SSH2_FXP_INIT);
- buffer_put_int(&msg, SSH2_FILEXFER_VERSION);
- send_msg(fd_out, &msg);
-
- buffer_clear(&msg);
-
- get_msg(fd_in, &msg);
-
- /* Expecting a VERSION reply */
- if ((type = buffer_get_char(&msg)) != SSH2_FXP_VERSION) {
- error("Invalid packet back from SSH2_FXP_INIT (type %u)",
- type);
- buffer_free(&msg);
- return(NULL);
- }
- version = buffer_get_int(&msg);
-
- debug2("Remote version: %d", version);
-
- /* Check for extensions */
- while (buffer_len(&msg) > 0) {
- char *name = buffer_get_string(&msg, NULL);
- char *value = buffer_get_string(&msg, NULL);
-
- debug2("Init extension: \"%s\"", name);
- xfree(name);
- xfree(value);
- }
-
- buffer_free(&msg);
-
- ret = xmalloc(sizeof(*ret));
- ret->fd_in = fd_in;
- ret->fd_out = fd_out;
- ret->transfer_buflen = transfer_buflen;
- ret->num_requests = num_requests;
- ret->version = version;
- ret->msg_id = 1;
-
- /* Some filexfer v.0 servers don't support large packets */
- if (version == 0)
- ret->transfer_buflen = MIN(ret->transfer_buflen, 20480);
-
- return(ret);
-}
-
-u_int
-sftp_proto_version(struct sftp_conn *conn)
-{
- return(conn->version);
-}
-
-int
-do_close(struct sftp_conn *conn, char *handle, u_int handle_len)
-{
- u_int id, status;
- Buffer msg;
-
- buffer_init(&msg);
-
- id = conn->msg_id++;
- buffer_put_char(&msg, SSH2_FXP_CLOSE);
- buffer_put_int(&msg, id);
- buffer_put_string(&msg, handle, handle_len);
- send_msg(conn->fd_out, &msg);
- debug3("Sent message SSH2_FXP_CLOSE I:%u", id);
-
- status = get_status(conn->fd_in, id);
- if (status != SSH2_FX_OK)
- error("Couldn't close file: %s", fx2txt(status));
-
- buffer_free(&msg);
-
- return(status);
-}
-
-
-static int
-do_lsreaddir(struct sftp_conn *conn, char *path, int printflag,
- SFTP_DIRENT ***dir)
-{
- Buffer msg;
- u_int count, type, id, handle_len, i, expected_id, ents = 0;
- char *handle;
-
- id = conn->msg_id++;
-
- buffer_init(&msg);
- buffer_put_char(&msg, SSH2_FXP_OPENDIR);
- buffer_put_int(&msg, id);
- buffer_put_cstring(&msg, path);
- send_msg(conn->fd_out, &msg);
-
- buffer_clear(&msg);
-
- handle = get_handle(conn->fd_in, id, &handle_len);
- if (handle == NULL)
- return(-1);
-
- if (dir) {
- ents = 0;
- *dir = xmalloc(sizeof(**dir));
- (*dir)[0] = NULL;
- }
-
- for (; !interrupted;) {
- id = expected_id = conn->msg_id++;
-
- debug3("Sending SSH2_FXP_READDIR I:%u", id);
-
- buffer_clear(&msg);
- buffer_put_char(&msg, SSH2_FXP_READDIR);
- buffer_put_int(&msg, id);
- buffer_put_string(&msg, handle, handle_len);
- send_msg(conn->fd_out, &msg);
-
- buffer_clear(&msg);
-
- get_msg(conn->fd_in, &msg);
-
- type = buffer_get_char(&msg);
- id = buffer_get_int(&msg);
-
- debug3("Received reply T:%u I:%u", type, id);
-
- if (id != expected_id)
- fatal("ID mismatch (%u != %u)", id, expected_id);
-
- if (type == SSH2_FXP_STATUS) {
- int status = buffer_get_int(&msg);
-
- debug3("Received SSH2_FXP_STATUS %d", status);
-
- if (status == SSH2_FX_EOF) {
- break;
- } else {
- error("Couldn't read directory: %s",
- fx2txt(status));
- do_close(conn, handle, handle_len);
- xfree(handle);
- return(status);
- }
- } else if (type != SSH2_FXP_NAME)
- fatal("Expected SSH2_FXP_NAME(%u) packet, got %u",
- SSH2_FXP_NAME, type);
-
- count = buffer_get_int(&msg);
- if (count == 0)
- break;
- debug3("Received %d SSH2_FXP_NAME responses", count);
- for (i = 0; i < count; i++) {
- char *filename, *longname;
- Attrib *a;
-
- filename = buffer_get_string(&msg, NULL);
- longname = buffer_get_string(&msg, NULL);
- a = decode_attrib(&msg);
-
- if (printflag)
- printf("%s\n", longname);
-
- if (dir) {
- *dir = xrealloc(*dir, (ents + 2) * sizeof(**dir));
- (*dir)[ents] = xmalloc(sizeof(***dir));
- (*dir)[ents]->filename = xstrdup(filename);
- (*dir)[ents]->longname = xstrdup(longname);
- memcpy(&(*dir)[ents]->a, a, sizeof(*a));
- (*dir)[++ents] = NULL;
- }
-
- xfree(filename);
- xfree(longname);
- }
- }
-
- buffer_free(&msg);
- do_close(conn, handle, handle_len);
- xfree(handle);
-
- /* Don't return partial matches on interrupt */
- if (interrupted && dir != NULL && *dir != NULL) {
- free_sftp_dirents(*dir);
- *dir = xmalloc(sizeof(**dir));
- **dir = NULL;
- }
-
- return(0);
-}
-
-int
-do_readdir(struct sftp_conn *conn, char *path, SFTP_DIRENT ***dir)
-{
- return(do_lsreaddir(conn, path, 0, dir));
-}
-
-void free_sftp_dirents(SFTP_DIRENT **s)
-{
- int i;
-
- for (i = 0; s[i]; i++) {
- xfree(s[i]->filename);
- xfree(s[i]->longname);
- xfree(s[i]);
- }
- xfree(s);
-}
-
-int
-do_rm(struct sftp_conn *conn, char *path)
-{
- u_int status, id;
-
- debug2("Sending SSH2_FXP_REMOVE \"%s\"", path);
-
- id = conn->msg_id++;
- send_string_request(conn->fd_out, id, SSH2_FXP_REMOVE, path,
- strlen(path));
- status = get_status(conn->fd_in, id);
- if (status != SSH2_FX_OK)
- error("Couldn't delete file: %s", fx2txt(status));
- return(status);
-}
-
-int
-do_mkdir(struct sftp_conn *conn, char *path, Attrib *a)
-{
- u_int status, id;
-
- id = conn->msg_id++;
- send_string_attrs_request(conn->fd_out, id, SSH2_FXP_MKDIR, path,
- strlen(path), a);
-
- status = get_status(conn->fd_in, id);
- if (status != SSH2_FX_OK)
- error("Couldn't create directory: %s", fx2txt(status));
-
- return(status);
-}
-
-int
-do_rmdir(struct sftp_conn *conn, char *path)
-{
- u_int status, id;
-
- id = conn->msg_id++;
- send_string_request(conn->fd_out, id, SSH2_FXP_RMDIR, path,
- strlen(path));
-
- status = get_status(conn->fd_in, id);
- if (status != SSH2_FX_OK)
- error("Couldn't remove directory: %s", fx2txt(status));
-
- return(status);
-}
-
-Attrib *
-do_stat(struct sftp_conn *conn, char *path, int quiet)
-{
- u_int id;
-
- id = conn->msg_id++;
-
- send_string_request(conn->fd_out, id,
- conn->version == 0 ? SSH2_FXP_STAT_VERSION_0 : SSH2_FXP_STAT,
- path, strlen(path));
-
- return(get_decode_stat(conn->fd_in, id, quiet));
-}
-
-Attrib *
-do_lstat(struct sftp_conn *conn, char *path, int quiet)
-{
- u_int id;
-
- if (conn->version == 0) {
- if (quiet)
- debug("Server version does not support lstat operation");
- else
- log("Server version does not support lstat operation");
- return(do_stat(conn, path, quiet));
- }
-
- id = conn->msg_id++;
- send_string_request(conn->fd_out, id, SSH2_FXP_LSTAT, path,
- strlen(path));
-
- return(get_decode_stat(conn->fd_in, id, quiet));
-}
-
-/* this is never used so hush the lint */
-#if 0
-Attrib *
-do_fstat(struct sftp_conn *conn, char *handle, u_int handle_len, int quiet)
-{
- u_int id;
-
- id = conn->msg_id++;
- send_string_request(conn->fd_out, id, SSH2_FXP_FSTAT, handle,
- handle_len);
-
- return(get_decode_stat(conn->fd_in, id, quiet));
-}
-#endif
-
-int
-do_setstat(struct sftp_conn *conn, char *path, Attrib *a)
-{
- u_int status, id;
-
- id = conn->msg_id++;
- send_string_attrs_request(conn->fd_out, id, SSH2_FXP_SETSTAT, path,
- strlen(path), a);
-
- status = get_status(conn->fd_in, id);
- if (status != SSH2_FX_OK)
- error("Couldn't setstat on \"%s\": %s", path,
- fx2txt(status));
-
- return(status);
-}
-
-int
-do_fsetstat(struct sftp_conn *conn, char *handle, u_int handle_len,
- Attrib *a)
-{
- u_int status, id;
-
- id = conn->msg_id++;
- send_string_attrs_request(conn->fd_out, id, SSH2_FXP_FSETSTAT, handle,
- handle_len, a);
-
- status = get_status(conn->fd_in, id);
- if (status != SSH2_FX_OK)
- error("Couldn't fsetstat: %s", fx2txt(status));
-
- return(status);
-}
-
-char *
-do_realpath(struct sftp_conn *conn, char *path)
-{
- Buffer msg;
- u_int type, expected_id, count, id;
- char *filename, *longname;
- /* LINTED */
- Attrib *a;
-
- expected_id = id = conn->msg_id++;
- send_string_request(conn->fd_out, id, SSH2_FXP_REALPATH, path,
- strlen(path));
-
- buffer_init(&msg);
-
- get_msg(conn->fd_in, &msg);
- type = buffer_get_char(&msg);
- id = buffer_get_int(&msg);
-
- if (id != expected_id)
- fatal("ID mismatch (%u != %u)", id, expected_id);
-
- if (type == SSH2_FXP_STATUS) {
- u_int status = buffer_get_int(&msg);
-
- error("Couldn't canonicalise: %s", fx2txt(status));
- return(NULL);
- } else if (type != SSH2_FXP_NAME)
- fatal("Expected SSH2_FXP_NAME(%u) packet, got %u",
- SSH2_FXP_NAME, type);
-
- count = buffer_get_int(&msg);
- if (count != 1)
- fatal("Got multiple names (%d) from SSH_FXP_REALPATH", count);
-
- filename = buffer_get_string(&msg, NULL);
- longname = buffer_get_string(&msg, NULL);
- a = decode_attrib(&msg);
-
- debug3("SSH_FXP_REALPATH %s -> %s", path, filename);
-
- xfree(longname);
-
- buffer_free(&msg);
-
- return(filename);
-}
-
-int
-do_rename(struct sftp_conn *conn, char *oldpath, char *newpath)
-{
- Buffer msg;
- u_int status, id;
-
- buffer_init(&msg);
-
- /* Send rename request */
- id = conn->msg_id++;
- buffer_put_char(&msg, SSH2_FXP_RENAME);
- buffer_put_int(&msg, id);
- buffer_put_cstring(&msg, oldpath);
- buffer_put_cstring(&msg, newpath);
- send_msg(conn->fd_out, &msg);
- debug3("Sent message SSH2_FXP_RENAME \"%s\" -> \"%s\"", oldpath,
- newpath);
- buffer_free(&msg);
-
- status = get_status(conn->fd_in, id);
- if (status != SSH2_FX_OK)
- error("Couldn't rename file \"%s\" to \"%s\": %s", oldpath,
- newpath, fx2txt(status));
-
- return(status);
-}
-
-int
-do_symlink(struct sftp_conn *conn, char *oldpath, char *newpath)
-{
- Buffer msg;
- u_int status, id;
-
- if (conn->version < 3) {
- error("This server does not support the symlink operation");
- return(SSH2_FX_OP_UNSUPPORTED);
- }
-
- buffer_init(&msg);
-
- /* Send symlink request */
- id = conn->msg_id++;
- buffer_put_char(&msg, SSH2_FXP_SYMLINK);
- buffer_put_int(&msg, id);
- buffer_put_cstring(&msg, oldpath);
- buffer_put_cstring(&msg, newpath);
- send_msg(conn->fd_out, &msg);
- debug3("Sent message SSH2_FXP_SYMLINK \"%s\" -> \"%s\"", oldpath,
- newpath);
- buffer_free(&msg);
-
- status = get_status(conn->fd_in, id);
- if (status != SSH2_FX_OK)
- error("Couldn't symlink file \"%s\" to \"%s\": %s", oldpath,
- newpath, fx2txt(status));
-
- return(status);
-}
-
-/* this is never used so hush the lint */
-#if 0
-char *
-do_readlink(struct sftp_conn *conn, char *path)
-{
- Buffer msg;
- u_int type, expected_id, count, id;
- char *filename, *longname;
- Attrib *a;
-
- expected_id = id = conn->msg_id++;
- send_string_request(conn->fd_out, id, SSH2_FXP_READLINK, path,
- strlen(path));
-
- buffer_init(&msg);
-
- get_msg(conn->fd_in, &msg);
- type = buffer_get_char(&msg);
- id = buffer_get_int(&msg);
-
- if (id != expected_id)
- fatal("ID mismatch (%u != %u)", id, expected_id);
-
- if (type == SSH2_FXP_STATUS) {
- u_int status = buffer_get_int(&msg);
-
- error("Couldn't readlink: %s", fx2txt(status));
- return(NULL);
- } else if (type != SSH2_FXP_NAME)
- fatal("Expected SSH2_FXP_NAME(%u) packet, got %u",
- SSH2_FXP_NAME, type);
-
- count = buffer_get_int(&msg);
- if (count != 1)
- fatal("Got multiple names (%d) from SSH_FXP_READLINK", count);
-
- filename = buffer_get_string(&msg, NULL);
- longname = buffer_get_string(&msg, NULL);
- a = decode_attrib(&msg);
-
- debug3("SSH_FXP_READLINK %s -> %s", path, filename);
-
- xfree(longname);
-
- buffer_free(&msg);
-
- return(filename);
-}
-#endif
-
-static void
-send_read_request(int fd_out, u_int id, u_int64_t offset, u_int len,
- char *handle, u_int handle_len)
-{
- Buffer msg;
-
- buffer_init(&msg);
- buffer_clear(&msg);
- buffer_put_char(&msg, SSH2_FXP_READ);
- buffer_put_int(&msg, id);
- buffer_put_string(&msg, handle, handle_len);
- buffer_put_int64(&msg, offset);
- buffer_put_int(&msg, len);
- send_msg(fd_out, &msg);
- buffer_free(&msg);
-}
-
-int
-do_download(struct sftp_conn *conn, char *remote_path, char *local_path,
- int pflag)
-{
- Attrib junk, *a;
- Buffer msg;
- char *handle;
- int local_fd, status = 0, write_error;
- int read_error, write_errno;
- u_int64_t offset, size;
- u_int handle_len, mode, type, id, buflen, num_req, max_req;
- off_t progress_counter;
- struct request {
- u_int id;
- u_int len;
- u_int64_t offset;
- TAILQ_ENTRY(request) tq;
- };
- TAILQ_HEAD(reqhead, request) requests;
- struct request *req;
-
- TAILQ_INIT(&requests);
-
- a = do_stat(conn, remote_path, 0);
- if (a == NULL)
- return(-1);
-
- /* Do not preserve set[ug]id here, as we do not preserve ownership */
- if (a->flags & SSH2_FILEXFER_ATTR_PERMISSIONS)
- mode = a->perm & 0777;
- else
- mode = 0666;
-
- if ((a->flags & SSH2_FILEXFER_ATTR_PERMISSIONS) &&
- (!S_ISREG(a->perm))) {
- error("Cannot download non-regular file: %s", remote_path);
- return(-1);
- }
-
- if (a->flags & SSH2_FILEXFER_ATTR_SIZE)
- size = a->size;
- else
- size = 0;
-
- buflen = conn->transfer_buflen;
- buffer_init(&msg);
-
- /* Send open request */
- id = conn->msg_id++;
- buffer_put_char(&msg, SSH2_FXP_OPEN);
- buffer_put_int(&msg, id);
- buffer_put_cstring(&msg, remote_path);
- buffer_put_int(&msg, SSH2_FXF_READ);
- attrib_clear(&junk); /* Send empty attributes */
- encode_attrib(&msg, &junk);
- send_msg(conn->fd_out, &msg);
- debug3("Sent message SSH2_FXP_OPEN I:%u P:%s", id, remote_path);
-
- handle = get_handle(conn->fd_in, id, &handle_len);
- if (handle == NULL) {
- buffer_free(&msg);
- return(-1);
- }
-
- local_fd = open(local_path, O_WRONLY | O_CREAT | O_TRUNC,
- mode | S_IWRITE);
- if (local_fd == -1) {
- error("Couldn't open local file \"%s\" for writing: %s",
- local_path, strerror(errno));
- buffer_free(&msg);
- xfree(handle);
- return(-1);
- }
-
- /* Read from remote and write to local */
- write_error = read_error = write_errno = num_req = offset = 0;
- max_req = 1;
- progress_counter = 0;
-
- if (showprogress && size != 0)
- start_progress_meter(remote_path, size, &progress_counter);
-
- while (num_req > 0 || max_req > 0) {
- char *data;
- u_int len;
-
- /*
- * Simulate EOF on interrupt: stop sending new requests and
- * allow outstanding requests to drain gracefully
- */
- if (interrupted) {
- if (num_req == 0) /* If we haven't started yet... */
- break;
- max_req = 0;
- }
-
- /* Send some more requests */
- while (num_req < max_req) {
- debug3("Request range %llu -> %llu (%d/%d)",
- (unsigned long long)offset,
- (unsigned long long)offset + buflen - 1,
- num_req, max_req);
- req = xmalloc(sizeof(*req));
- req->id = conn->msg_id++;
- req->len = buflen;
- req->offset = offset;
- offset += buflen;
- num_req++;
- TAILQ_INSERT_TAIL(&requests, req, tq);
- send_read_request(conn->fd_out, req->id, req->offset,
- req->len, handle, handle_len);
- }
-
- buffer_clear(&msg);
- get_msg(conn->fd_in, &msg);
- type = buffer_get_char(&msg);
- id = buffer_get_int(&msg);
- debug3("Received reply T:%u I:%u R:%d", type, id, max_req);
-
- /* Find the request in our queue */
- for (req = TAILQ_FIRST(&requests);
- req != NULL && req->id != id;
- req = TAILQ_NEXT(req, tq))
- ;
- if (req == NULL)
- fatal("Unexpected reply %u", id);
-
- switch (type) {
- case SSH2_FXP_STATUS:
- status = buffer_get_int(&msg);
- if (status != SSH2_FX_EOF)
- read_error = 1;
- max_req = 0;
- TAILQ_REMOVE(&requests, req, tq);
- xfree(req);
- num_req--;
- break;
- case SSH2_FXP_DATA:
- data = buffer_get_string(&msg, &len);
- debug3("Received data %llu -> %llu",
- (unsigned long long)req->offset,
- (unsigned long long)req->offset + len - 1);
- if (len > req->len)
- fatal("Received more data than asked for "
- "%u > %u", len, req->len);
- if ((lseek(local_fd, req->offset, SEEK_SET) == -1 ||
- atomicio(vwrite, local_fd, data, len) != len) &&
- !write_error) {
- write_errno = errno;
- write_error = 1;
- max_req = 0;
- }
- progress_counter += len;
- xfree(data);
-
- if (len == req->len) {
- TAILQ_REMOVE(&requests, req, tq);
- xfree(req);
- num_req--;
- } else {
- /* Resend the request for the missing data */
- debug3("Short data block, re-requesting "
- "%llu -> %llu (%2d)",
- (unsigned long long)req->offset + len,
- (unsigned long long)req->offset +
- req->len - 1, num_req);
- req->id = conn->msg_id++;
- req->len -= len;
- req->offset += len;
- send_read_request(conn->fd_out, req->id,
- req->offset, req->len, handle, handle_len);
- /* Reduce the request size */
- if (len < buflen)
- buflen = MAX(MIN_READ_SIZE, len);
- }
- if (max_req > 0) { /* max_req = 0 iff EOF received */
- if (size > 0 && offset > size) {
- /* Only one request at a time
- * after the expected EOF */
- debug3("Finish at %llu (%2d)",
- (unsigned long long)offset,
- num_req);
- max_req = 1;
- } else if (max_req <= conn->num_requests) {
- ++max_req;
- }
- }
- break;
- default:
- fatal("Expected SSH2_FXP_DATA(%u) packet, got %u",
- SSH2_FXP_DATA, type);
- }
- }
-
- if (showprogress && size)
- stop_progress_meter();
-
- /* Sanity check */
- if (TAILQ_FIRST(&requests) != NULL)
- fatal("Transfer complete, but requests still in queue");
-
- if (read_error) {
- error("Couldn't read from remote file \"%s\" : %s",
- remote_path, fx2txt(status));
- do_close(conn, handle, handle_len);
- } else if (write_error) {
- error("Couldn't write to \"%s\": %s", local_path,
- strerror(write_errno));
- status = -1;
- do_close(conn, handle, handle_len);
- } else {
- status = do_close(conn, handle, handle_len);
-
- /* Override umask and utimes if asked */
-#ifdef HAVE_FCHMOD
- if (pflag && fchmod(local_fd, mode) == -1)
-#else
- if (pflag && chmod(local_path, mode) == -1)
-#endif /* HAVE_FCHMOD */
- error("Couldn't set mode on \"%s\": %s", local_path,
- strerror(errno));
- if (pflag && (a->flags & SSH2_FILEXFER_ATTR_ACMODTIME)) {
- struct timeval tv[2];
- tv[0].tv_sec = a->atime;
- tv[1].tv_sec = a->mtime;
- tv[0].tv_usec = tv[1].tv_usec = 0;
- if (utimes(local_path, tv) == -1)
- error("Can't set times on \"%s\": %s",
- local_path, strerror(errno));
- }
- }
- close(local_fd);
- buffer_free(&msg);
- xfree(handle);
-
- return(status);
-}
-
-int
-do_upload(struct sftp_conn *conn, char *local_path, char *remote_path,
- int pflag)
-{
- int local_fd, status;
- u_int handle_len, id, type;
- u_int64_t offset;
- char *handle, *data;
- Buffer msg;
- struct stat sb;
- Attrib a;
- u_int32_t startid;
- u_int32_t ackid;
- struct outstanding_ack {
- u_int id;
- u_int len;
- u_int64_t offset;
- TAILQ_ENTRY(outstanding_ack) tq;
- };
- TAILQ_HEAD(ackhead, outstanding_ack) acks;
- struct outstanding_ack *ack = NULL;
-
- TAILQ_INIT(&acks);
-
- if ((local_fd = open(local_path, O_RDONLY, 0)) == -1) {
- error("Couldn't open local file \"%s\" for reading: %s",
- local_path, strerror(errno));
- return(-1);
- }
- if (fstat(local_fd, &sb) == -1) {
- error("Couldn't fstat local file \"%s\": %s",
- local_path, strerror(errno));
- close(local_fd);
- return(-1);
- }
- if (!S_ISREG(sb.st_mode)) {
- error("%s is not a regular file", local_path);
- close(local_fd);
- return(-1);
- }
- stat_to_attrib(&sb, &a);
-
- a.flags &= ~SSH2_FILEXFER_ATTR_SIZE;
- a.flags &= ~SSH2_FILEXFER_ATTR_UIDGID;
- a.perm &= 0777;
- if (!pflag)
- a.flags &= ~SSH2_FILEXFER_ATTR_ACMODTIME;
-
- buffer_init(&msg);
-
- /* Send open request */
- id = conn->msg_id++;
- buffer_put_char(&msg, SSH2_FXP_OPEN);
- buffer_put_int(&msg, id);
- buffer_put_cstring(&msg, remote_path);
- buffer_put_int(&msg, SSH2_FXF_WRITE|SSH2_FXF_CREAT|SSH2_FXF_TRUNC);
- encode_attrib(&msg, &a);
- send_msg(conn->fd_out, &msg);
- debug3("Sent message SSH2_FXP_OPEN I:%u P:%s", id, remote_path);
-
- buffer_clear(&msg);
-
- handle = get_handle(conn->fd_in, id, &handle_len);
- if (handle == NULL) {
- close(local_fd);
- buffer_free(&msg);
- return(-1);
- }
-
- startid = ackid = id + 1;
- data = xmalloc(conn->transfer_buflen);
-
- /* Read from local and write to remote */
- offset = 0;
- if (showprogress)
- start_progress_meter(local_path, sb.st_size, (off_t *)&offset);
-
- for (;;) {
- int len;
-
- /*
- * Can't use atomicio here because it returns 0 on EOF,
- * thus losing the last block of the file.
- * Simulate an EOF on interrupt, allowing ACKs from the
- * server to drain.
- */
- if (interrupted)
- len = 0;
- else do
- len = read(local_fd, data, conn->transfer_buflen);
- while ((len == -1) && (errno == EINTR || errno == EAGAIN));
-
- if (len == -1)
- fatal("Couldn't read from \"%s\": %s", local_path,
- strerror(errno));
-
- if (len != 0) {
- ack = xmalloc(sizeof(*ack));
- ack->id = ++id;
- ack->offset = offset;
- ack->len = len;
- TAILQ_INSERT_TAIL(&acks, ack, tq);
-
- buffer_clear(&msg);
- buffer_put_char(&msg, SSH2_FXP_WRITE);
- buffer_put_int(&msg, ack->id);
- buffer_put_string(&msg, handle, handle_len);
- buffer_put_int64(&msg, offset);
- buffer_put_string(&msg, data, len);
- send_msg(conn->fd_out, &msg);
- debug3("Sent message SSH2_FXP_WRITE I:%u O:%llu S:%u",
- id, (unsigned long long)offset, len);
- } else if (TAILQ_FIRST(&acks) == NULL)
- break;
-
- if (ack == NULL)
- fatal("Unexpected ACK %u", id);
-
- if (id == startid || len == 0 ||
- id - ackid >= conn->num_requests) {
- u_int r_id;
-
- buffer_clear(&msg);
- get_msg(conn->fd_in, &msg);
- type = buffer_get_char(&msg);
- r_id = buffer_get_int(&msg);
-
- if (type != SSH2_FXP_STATUS)
- fatal("Expected SSH2_FXP_STATUS(%d) packet, "
- "got %d", SSH2_FXP_STATUS, type);
-
- status = buffer_get_int(&msg);
- debug3("SSH2_FXP_STATUS %d", status);
-
- /* Find the request in our queue */
- for (ack = TAILQ_FIRST(&acks);
- ack != NULL && ack->id != r_id;
- ack = TAILQ_NEXT(ack, tq))
- ;
- if (ack == NULL)
- fatal("Can't find request for ID %u", r_id);
- TAILQ_REMOVE(&acks, ack, tq);
-
- if (status != SSH2_FX_OK) {
- error("Couldn't write to remote file \"%s\": %s",
- remote_path, fx2txt(status));
- if (showprogress)
- stop_progress_meter();
- do_close(conn, handle, handle_len);
- close(local_fd);
- xfree(data);
- xfree(ack);
- status = -1;
- goto done;
- }
- debug3("In write loop, ack for %u %u bytes at %llu",
- ack->id, ack->len, (unsigned long long)ack->offset);
- ++ackid;
- xfree(ack);
- }
- offset += len;
- }
- if (showprogress)
- stop_progress_meter();
- xfree(data);
-
- if (close(local_fd) == -1) {
- error("Couldn't close local file \"%s\": %s", local_path,
- strerror(errno));
- do_close(conn, handle, handle_len);
- status = -1;
- goto done;
- }
-
- /* Override umask and utimes if asked */
- if (pflag)
- do_fsetstat(conn, handle, handle_len, &a);
-
- status = do_close(conn, handle, handle_len);
-
-done:
- xfree(handle);
- buffer_free(&msg);
- return(status);
-}
diff --git a/usr/src/cmd/ssh/sftp/sftp-glob.c b/usr/src/cmd/ssh/sftp/sftp-glob.c
deleted file mode 100644
index 626e80922a..0000000000
--- a/usr/src/cmd/ssh/sftp/sftp-glob.c
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Copyright (c) 2001-2004 Damien Miller <djm@openbsd.org>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-/* $OpenBSD: sftp-glob.c,v 1.22 2006/08/03 03:34:42 deraadt Exp $ */
-
-#include "includes.h"
-
-#include <sys/types.h>
-#ifdef HAVE_SYS_STAT_H
-# include <sys/stat.h>
-#endif
-
-#include <dirent.h>
-#include <string.h>
-
-#include "xmalloc.h"
-#include "sftp.h"
-#include "buffer.h"
-#include "sftp-common.h"
-#include "sftp-client.h"
-
-int remote_glob(struct sftp_conn *, const char *, int,
- int (*)(const char *, int), glob_t *);
-
-struct SFTP_OPENDIR {
- SFTP_DIRENT **dir;
- int offset;
-};
-
-static struct {
- struct sftp_conn *conn;
-} cur;
-
-static void *
-fudge_opendir(const char *path)
-{
- struct SFTP_OPENDIR *r;
-
- r = xmalloc(sizeof(*r));
-
- if (do_readdir(cur.conn, (char *)path, &r->dir)) {
- xfree(r);
- return(NULL);
- }
-
- r->offset = 0;
-
- return((void *)r);
-}
-
-static struct dirent *
-fudge_readdir(struct SFTP_OPENDIR *od)
-{
- /* Solaris needs sizeof(dirent) + path length (see below) */
- static union {
- char buf_chars[sizeof (struct dirent) + MAXPATHLEN];
- struct dirent buf_dirent;
- } buf;
- struct dirent *ret = &buf.buf_dirent;
-#ifdef __GNU_LIBRARY__
- static int inum = 1;
-#endif /* __GNU_LIBRARY__ */
-
- if (od->dir[od->offset] == NULL)
- return(NULL);
-
- memset(buf.buf_chars, 0, sizeof (buf.buf_chars));
-
- /*
- * Solaris defines dirent->d_name as a one byte array and expects
- * you to hack around it.
- */
-#ifdef BROKEN_ONE_BYTE_DIRENT_D_NAME
- strlcpy(ret->d_name, od->dir[od->offset++]->filename, MAXPATHLEN);
-#else
- strlcpy(ret->d_name, od->dir[od->offset++]->filename,
- sizeof(ret->d_name));
-#endif
-#ifdef __GNU_LIBRARY__
- /*
- * Idiot glibc uses extensions to struct dirent for readdir with
- * ALTDIRFUNCs. Not that this is documented anywhere but the
- * source... Fake an inode number to appease it.
- */
- ret->d_ino = inum++;
- if (!inum)
- inum = 1;
-#endif /* __GNU_LIBRARY__ */
-
- return(ret);
-}
-
-static void
-fudge_closedir(struct SFTP_OPENDIR *od)
-{
- free_sftp_dirents(od->dir);
- xfree(od);
-}
-
-static int
-fudge_lstat(const char *path, struct stat *st)
-{
- Attrib *a;
-
- if (!(a = do_lstat(cur.conn, (char *)path, 0)))
- return(-1);
-
- attrib_to_stat(a, st);
-
- return(0);
-}
-
-static int
-fudge_stat(const char *path, struct stat *st)
-{
- Attrib *a;
-
- if (!(a = do_stat(cur.conn, (char *)path, 0)))
- return(-1);
-
- attrib_to_stat(a, st);
-
- return(0);
-}
-
-int
-remote_glob(struct sftp_conn *conn, const char *pattern, int flags,
- int (*errfunc)(const char *, int), glob_t *pglob)
-{
- pglob->gl_opendir = fudge_opendir;
- pglob->gl_readdir = (struct dirent *(*)(void *))fudge_readdir;
- pglob->gl_closedir = (void (*)(void *))fudge_closedir;
- pglob->gl_lstat = fudge_lstat;
- pglob->gl_stat = fudge_stat;
-
- memset(&cur, 0, sizeof(cur));
- cur.conn = conn;
-
- return(glob(pattern, flags|GLOB_LIMIT|GLOB_ALTDIRFUNC, errfunc, pglob));
-}
diff --git a/usr/src/cmd/ssh/sftp/sftp.c b/usr/src/cmd/ssh/sftp/sftp.c
deleted file mode 100644
index 10b971f02c..0000000000
--- a/usr/src/cmd/ssh/sftp/sftp.c
+++ /dev/null
@@ -1,1690 +0,0 @@
-/*
- * Copyright (c) 2001-2004 Damien Miller <djm@openbsd.org>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-/* $OpenBSD: sftp.c,v 1.96 2007/01/03 04:09:15 stevesk Exp $ */
-
-#include "includes.h"
-
-#include <sys/types.h>
-#include <sys/ioctl.h>
-#ifdef HAVE_SYS_STAT_H
-# include <sys/stat.h>
-#endif
-#include <sys/param.h>
-#include <sys/socket.h>
-#include <sys/wait.h>
-
-#include <errno.h>
-
-#ifdef HAVE_PATHS_H
-# include <paths.h>
-#endif
-
-#ifdef USE_LIBEDIT
-#include <histedit.h>
-#else
-#ifdef USE_LIBTECLA
-#include <libtecla.h>
-#define MAX_LINE_LEN 2048
-#define MAX_CMD_HIST 10000
-#endif /* USE_LIBTECLA */
-#endif /* USE_LIBEDIT */
-
-#include <signal.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-#include <stdarg.h>
-
-#include "xmalloc.h"
-#include "log.h"
-#include "pathnames.h"
-#include "misc.h"
-
-#include "sftp.h"
-#include "buffer.h"
-#include "sftp-common.h"
-#include "sftp-client.h"
-
-#ifdef HAVE___PROGNAME
-extern char *__progname;
-#else
-char *__progname;
-#endif
-
-
-/* File to read commands from */
-FILE* infile;
-
-/* Are we in batchfile mode? */
-int batchmode = 0;
-
-/* Size of buffer used when copying files */
-size_t copy_buffer_len = 32768;
-
-/* Number of concurrent outstanding requests */
-size_t num_requests = 16;
-
-/* PID of ssh transport process */
-static pid_t sshpid = -1;
-
-/* This is set to 0 if the progressmeter is not desired. */
-int showprogress = 1;
-
-/* SIGINT received during command processing */
-volatile sig_atomic_t interrupted = 0;
-
-/* I wish qsort() took a separate ctx for the comparison function...*/
-int sort_flag;
-
-int remote_glob(struct sftp_conn *, const char *, int,
- int (*)(const char *, int), glob_t *); /* proto for sftp-glob.c */
-
-/* Separators for interactive commands */
-#define WHITESPACE " \t\r\n"
-
-/* ls flags */
-#define LS_LONG_VIEW 0x01 /* Full view ala ls -l */
-#define LS_SHORT_VIEW 0x02 /* Single row view ala ls -1 */
-#define LS_NUMERIC_VIEW 0x04 /* Long view with numeric uid/gid */
-#define LS_NAME_SORT 0x08 /* Sort by name (default) */
-#define LS_TIME_SORT 0x10 /* Sort by mtime */
-#define LS_SIZE_SORT 0x20 /* Sort by file size */
-#define LS_REVERSE_SORT 0x40 /* Reverse sort order */
-#define LS_SHOW_ALL 0x80 /* Don't skip filenames starting with '.' */
-
-#define VIEW_FLAGS (LS_LONG_VIEW|LS_SHORT_VIEW|LS_NUMERIC_VIEW)
-#define SORT_FLAGS (LS_NAME_SORT|LS_TIME_SORT|LS_SIZE_SORT)
-
-/* Commands for interactive mode */
-#define I_CHDIR 1
-#define I_CHGRP 2
-#define I_CHMOD 3
-#define I_CHOWN 4
-#define I_GET 5
-#define I_HELP 6
-#define I_LCHDIR 7
-#define I_LLS 8
-#define I_LMKDIR 9
-#define I_LPWD 10
-#define I_LS 11
-#define I_LUMASK 12
-#define I_MKDIR 13
-#define I_PUT 14
-#define I_PWD 15
-#define I_QUIT 16
-#define I_RENAME 17
-#define I_RM 18
-#define I_RMDIR 19
-#define I_SHELL 20
-#define I_SYMLINK 21
-#define I_VERSION 22
-#define I_PROGRESS 23
-
-struct CMD {
- const char *c;
- const int n;
-};
-
-static const struct CMD cmds[] = {
- { "bye", I_QUIT },
- { "cd", I_CHDIR },
- { "chdir", I_CHDIR },
- { "chgrp", I_CHGRP },
- { "chmod", I_CHMOD },
- { "chown", I_CHOWN },
- { "dir", I_LS },
- { "exit", I_QUIT },
- { "get", I_GET },
- { "mget", I_GET },
- { "help", I_HELP },
- { "lcd", I_LCHDIR },
- { "lchdir", I_LCHDIR },
- { "lls", I_LLS },
- { "lmkdir", I_LMKDIR },
- { "ln", I_SYMLINK },
- { "lpwd", I_LPWD },
- { "ls", I_LS },
- { "lumask", I_LUMASK },
- { "mkdir", I_MKDIR },
- { "progress", I_PROGRESS },
- { "put", I_PUT },
- { "mput", I_PUT },
- { "pwd", I_PWD },
- { "quit", I_QUIT },
- { "rename", I_RENAME },
- { "rm", I_RM },
- { "rmdir", I_RMDIR },
- { "symlink", I_SYMLINK },
- { "version", I_VERSION },
- { "!", I_SHELL },
- { "?", I_HELP },
- { NULL, -1}
-};
-
-int interactive_loop(int fd_in, int fd_out, char *file1, char *file2);
-
-/* ARGSUSED */
-static void
-killchild(int signo)
-{
- if (sshpid > 1) {
- kill(sshpid, SIGTERM);
- waitpid(sshpid, NULL, 0);
- }
-
- _exit(1);
-}
-
-/* ARGSUSED */
-static void
-cmd_interrupt(int signo)
-{
- const char msg[] = "\rInterrupt \n";
- int olderrno = errno;
-
- write(STDERR_FILENO, msg, sizeof(msg) - 1);
- interrupted = 1;
- errno = olderrno;
-}
-
-static void
-help(void)
-{
- printf(gettext("Available commands:\n"
- "cd path Change remote directory to 'path'\n"
- "lcd path Change local directory to 'path'\n"
- "chgrp grp path Change group of file 'path' to 'grp'\n"
- "chmod mode path Change permissions of file 'path' to 'mode'\n"
- "chown own path Change owner of file 'path' to 'own'\n"
- "help Display this help text\n"
- "get remote-path [local-path] Download file\n"
- "lls [ls-options [path]] Display local directory listing\n"
- "ln oldpath newpath Symlink remote file\n"
- "lmkdir path Create local directory\n"
- "lpwd Print local working directory\n"
- "ls [path] Display remote directory listing\n"
- "lumask umask Set local umask to 'umask'\n"
- "mkdir path Create remote directory\n"
- "progress Toggle display of progress meter\n"
- "put local-path [remote-path] Upload file\n"
- "pwd Display remote working directory\n"
- "exit Quit sftp\n"
- "quit Quit sftp\n"
- "rename oldpath newpath Rename remote file\n"
- "rmdir path Remove remote directory\n"
- "rm path Delete remote file\n"
- "symlink oldpath newpath Symlink remote file\n"
- "version Show SFTP version\n"
- "!command Execute 'command' in local shell\n"
- "! Escape to local shell\n"
- "? Synonym for help\n"));
-}
-
-static void
-local_do_shell(const char *args)
-{
- int status;
- char *shell;
- pid_t pid;
-
- if (!*args)
- args = NULL;
-
- if ((shell = getenv("SHELL")) == NULL)
- shell = _PATH_BSHELL;
-
- if ((pid = fork()) == -1)
- fatal("Couldn't fork: %s", strerror(errno));
-
- if (pid == 0) {
- /* XXX: child has pipe fds to ssh subproc open - issue? */
- if (args) {
- debug3("Executing %s -c \"%s\"", shell, args);
- execl(shell, shell, "-c", args, (char *)NULL);
- } else {
- debug3("Executing %s", shell);
- execl(shell, shell, (char *)NULL);
- }
- fprintf(stderr, gettext("Couldn't execute \"%s\": %s\n"), shell,
- strerror(errno));
- _exit(1);
- }
- while (waitpid(pid, &status, 0) == -1)
- if (errno != EINTR)
- fatal("Couldn't wait for child: %s", strerror(errno));
- if (!WIFEXITED(status))
- error("Shell exited abnormally");
- else if (WEXITSTATUS(status))
- error("Shell exited with status %d", WEXITSTATUS(status));
-}
-
-static void
-local_do_ls(const char *args)
-{
- if (!args || !*args)
- local_do_shell(_PATH_LS);
- else {
- int len = strlen(_PATH_LS " ") + strlen(args) + 1;
- char *buf = xmalloc(len);
-
- /* XXX: quoting - rip quoting code from ftp? */
- snprintf(buf, len, _PATH_LS " %s", args);
- local_do_shell(buf);
- xfree(buf);
- }
-}
-
-/* Strip one path (usually the pwd) from the start of another */
-static char *
-path_strip(char *path, char *strip)
-{
- size_t len;
-
- if (strip == NULL)
- return (xstrdup(path));
-
- len = strlen(strip);
- if (strncmp(path, strip, len) == 0) {
- if (strip[len - 1] != '/' && path[len] == '/')
- len++;
- return (xstrdup(path + len));
- }
-
- return (xstrdup(path));
-}
-
-static char *
-path_append(char *p1, char *p2)
-{
- char *ret;
- size_t len = strlen(p1) + strlen(p2) + 2;
-
- ret = xmalloc(len);
- strlcpy(ret, p1, len);
- if (p1[0] != '\0' && p1[strlen(p1) - 1] != '/')
- strlcat(ret, "/", len);
- strlcat(ret, p2, len);
-
- return(ret);
-}
-
-static char *
-make_absolute(char *p, char *pwd)
-{
- char *abs_str;
-
- /* Derelativise */
- if (p && p[0] != '/') {
- abs_str = path_append(pwd, p);
- xfree(p);
- return(abs_str);
- } else
- return(p);
-}
-
-static int
-infer_path(const char *p, char **ifp)
-{
- char *cp;
-
- cp = strrchr(p, '/');
- if (cp == NULL) {
- *ifp = xstrdup(p);
- return(0);
- }
-
- if (!cp[1]) {
- error("Invalid path");
- return(-1);
- }
-
- *ifp = xstrdup(cp + 1);
- return(0);
-}
-
-static int
-parse_getput_flags(const char **cpp, int *pflag)
-{
- const char *cp = *cpp;
-
- /* Check for flags */
- if (cp[0] == '-' && cp[1] && strchr(WHITESPACE, cp[2])) {
- switch (cp[1]) {
- case 'p':
- case 'P':
- *pflag = 1;
- break;
- default:
- error("Invalid flag -%c", cp[1]);
- return(-1);
- }
- cp += 2;
- *cpp = cp + strspn(cp, WHITESPACE);
- }
-
- return(0);
-}
-
-static int
-parse_ls_flags(const char **cpp, int *lflag)
-{
- const char *cp = *cpp;
-
- /* Defaults */
- *lflag = LS_NAME_SORT;
-
- /* Check for flags */
- if (cp++[0] == '-') {
- for (; strchr(WHITESPACE, *cp) == NULL; cp++) {
- switch (*cp) {
- case 'l':
- *lflag &= ~VIEW_FLAGS;
- *lflag |= LS_LONG_VIEW;
- break;
- case '1':
- *lflag &= ~VIEW_FLAGS;
- *lflag |= LS_SHORT_VIEW;
- break;
- case 'n':
- *lflag &= ~VIEW_FLAGS;
- *lflag |= LS_NUMERIC_VIEW|LS_LONG_VIEW;
- break;
- case 'S':
- *lflag &= ~SORT_FLAGS;
- *lflag |= LS_SIZE_SORT;
- break;
- case 't':
- *lflag &= ~SORT_FLAGS;
- *lflag |= LS_TIME_SORT;
- break;
- case 'r':
- *lflag |= LS_REVERSE_SORT;
- break;
- case 'f':
- *lflag &= ~SORT_FLAGS;
- break;
- case 'a':
- *lflag |= LS_SHOW_ALL;
- break;
- default:
- error("Invalid flag -%c", *cp);
- return(-1);
- }
- }
- *cpp = cp + strspn(cp, WHITESPACE);
- }
-
- return(0);
-}
-
-static int
-get_pathname(const char **cpp, char **path)
-{
- const char *cp = *cpp, *end;
- char quot;
- u_int i, j;
-
- cp += strspn(cp, WHITESPACE);
- if (!*cp) {
- *cpp = cp;
- *path = NULL;
- return (0);
- }
-
- *path = xmalloc(strlen(cp) + 1);
-
- /* Check for quoted filenames */
- if (*cp == '\"' || *cp == '\'') {
- quot = *cp++;
-
- /* Search for terminating quote, unescape some chars */
- for (i = j = 0; i <= strlen(cp); i++) {
- if (cp[i] == quot) { /* Found quote */
- i++;
- (*path)[j] = '\0';
- break;
- }
- if (cp[i] == '\0') { /* End of string */
- error("Unterminated quote");
- goto fail;
- }
- if (cp[i] == '\\') { /* Escaped characters */
- i++;
- if (cp[i] != '\'' && cp[i] != '\"' &&
- cp[i] != '\\') {
- error("Bad escaped character '\\%c'",
- cp[i]);
- goto fail;
- }
- }
- (*path)[j++] = cp[i];
- }
-
- if (j == 0) {
- error("Empty quotes");
- goto fail;
- }
- *cpp = cp + i + strspn(cp + i, WHITESPACE);
- } else {
- /* Read to end of filename */
- end = strpbrk(cp, WHITESPACE);
- if (end == NULL)
- end = strchr(cp, '\0');
- *cpp = end + strspn(end, WHITESPACE);
-
- memcpy(*path, cp, end - cp);
- (*path)[end - cp] = '\0';
- }
- return (0);
-
- fail:
- xfree(*path);
- *path = NULL;
- return (-1);
-}
-
-static int
-is_dir(char *path)
-{
- struct stat sb;
-
- /* XXX: report errors? */
- if (stat(path, &sb) == -1)
- return(0);
-
- return(S_ISDIR(sb.st_mode));
-}
-
-static int
-is_reg(char *path)
-{
- struct stat sb;
-
- if (stat(path, &sb) == -1)
- fatal("stat %s: %s", path, strerror(errno));
-
- return(S_ISREG(sb.st_mode));
-}
-
-static int
-remote_is_dir(struct sftp_conn *conn, char *path)
-{
- Attrib *a;
-
- /* XXX: report errors? */
- if ((a = do_stat(conn, path, 1)) == NULL)
- return(0);
- if (!(a->flags & SSH2_FILEXFER_ATTR_PERMISSIONS))
- return(0);
- return(S_ISDIR(a->perm));
-}
-
-static int
-process_get(struct sftp_conn *conn, char *src, char *dst, char *pwd, int pflag)
-{
- char *abs_src = NULL;
- char *abs_dst = NULL;
- char *tmp;
- glob_t g;
- int err = 0;
- int i;
-
- abs_src = xstrdup(src);
- abs_src = make_absolute(abs_src, pwd);
-
- memset(&g, 0, sizeof(g));
- debug3("Looking up %s", abs_src);
- if (remote_glob(conn, abs_src, 0, NULL, &g)) {
- error("File \"%s\" not found.", abs_src);
- err = -1;
- goto out;
- }
-
- /* If multiple matches, dst must be a directory or unspecified */
- if (g.gl_matchc > 1 && dst && !is_dir(dst)) {
- error("Multiple files match, but \"%s\" is not a directory",
- dst);
- err = -1;
- goto out;
- }
-
- for (i = 0; g.gl_pathv[i] && !interrupted; i++) {
- if (infer_path(g.gl_pathv[i], &tmp)) {
- err = -1;
- goto out;
- }
-
- if (g.gl_matchc == 1 && dst) {
- /* If directory specified, append filename */
- xfree(tmp);
- if (is_dir(dst)) {
- if (infer_path(g.gl_pathv[0], &tmp)) {
- err = 1;
- goto out;
- }
- abs_dst = path_append(dst, tmp);
- xfree(tmp);
- } else
- abs_dst = xstrdup(dst);
- } else if (dst) {
- abs_dst = path_append(dst, tmp);
- xfree(tmp);
- } else
- abs_dst = tmp;
-
- printf(gettext("Fetching %s to %s\n"), g.gl_pathv[i], abs_dst);
- if (do_download(conn, g.gl_pathv[i], abs_dst, pflag) == -1)
- err = -1;
- xfree(abs_dst);
- abs_dst = NULL;
- }
-
-out:
- xfree(abs_src);
- globfree(&g);
- return(err);
-}
-
-static int
-process_put(struct sftp_conn *conn, char *src, char *dst, char *pwd, int pflag)
-{
- char *tmp_dst = NULL;
- char *abs_dst = NULL;
- char *tmp;
- glob_t g;
- int err = 0;
- int i;
-
- if (dst) {
- tmp_dst = xstrdup(dst);
- tmp_dst = make_absolute(tmp_dst, pwd);
- }
-
- memset(&g, 0, sizeof(g));
- debug3("Looking up %s", src);
- if (glob(src, GLOB_LIMIT, NULL, &g)) {
- error("File \"%s\" not found.", src);
- err = -1;
- goto out;
- }
-
- /* If multiple matches, dst may be directory or unspecified */
- if (g.gl_matchc > 1 && tmp_dst && !remote_is_dir(conn, tmp_dst)) {
- error("Multiple files match, but \"%s\" is not a directory",
- tmp_dst);
- err = -1;
- goto out;
- }
-
- for (i = 0; g.gl_pathv[i] && !interrupted; i++) {
- if (!is_reg(g.gl_pathv[i])) {
- error("skipping non-regular file %s",
- g.gl_pathv[i]);
- continue;
- }
- if (infer_path(g.gl_pathv[i], &tmp)) {
- err = -1;
- goto out;
- }
-
- if (g.gl_matchc == 1 && tmp_dst) {
- /* If directory specified, append filename */
- if (remote_is_dir(conn, tmp_dst)) {
- if (infer_path(g.gl_pathv[0], &tmp)) {
- err = 1;
- goto out;
- }
- abs_dst = path_append(tmp_dst, tmp);
- xfree(tmp);
- } else
- abs_dst = xstrdup(tmp_dst);
-
- } else if (tmp_dst) {
- abs_dst = path_append(tmp_dst, tmp);
- xfree(tmp);
- } else
- abs_dst = make_absolute(tmp, pwd);
-
- printf(gettext("Uploading %s to %s\n"), g.gl_pathv[i], abs_dst);
- if (do_upload(conn, g.gl_pathv[i], abs_dst, pflag) == -1)
- err = -1;
- }
-
-out:
- if (abs_dst)
- xfree(abs_dst);
- if (tmp_dst)
- xfree(tmp_dst);
- globfree(&g);
- return(err);
-}
-
-static int
-sdirent_comp(const void *aa, const void *bb)
-{
- SFTP_DIRENT *a = *(SFTP_DIRENT **)aa;
- SFTP_DIRENT *b = *(SFTP_DIRENT **)bb;
- int rmul = sort_flag & LS_REVERSE_SORT ? -1 : 1;
-
-#define NCMP(a,b) (a == b ? 0 : (a < b ? 1 : -1))
- if (sort_flag & LS_NAME_SORT)
- return (rmul * strcmp(a->filename, b->filename));
- else if (sort_flag & LS_TIME_SORT)
- return (rmul * NCMP(a->a.mtime, b->a.mtime));
- else if (sort_flag & LS_SIZE_SORT)
- return (rmul * NCMP(a->a.size, b->a.size));
-
- fatal("Unknown ls sort type");
-
- /* NOTREACHED */
- return (0);
-}
-
-/* sftp ls.1 replacement for directories */
-static int
-do_ls_dir(struct sftp_conn *conn, char *path, char *strip_path, int lflag)
-{
- int n;
- u_int c = 1, colspace = 0, columns = 1;
- SFTP_DIRENT **d;
-
- if ((n = do_readdir(conn, path, &d)) != 0)
- return (n);
-
- if (!(lflag & LS_SHORT_VIEW)) {
- u_int m = 0, width = 80;
- struct winsize ws;
- char *tmp;
-
- /* Count entries for sort and find longest filename */
- for (n = 0; d[n] != NULL; n++) {
- if (d[n]->filename[0] != '.' || (lflag & LS_SHOW_ALL))
- m = MAX(m, strlen(d[n]->filename));
- }
-
- /* Add any subpath that also needs to be counted */
- tmp = path_strip(path, strip_path);
- m += strlen(tmp);
- xfree(tmp);
-
- if (ioctl(fileno(stdin), TIOCGWINSZ, &ws) != -1)
- width = ws.ws_col;
-
- columns = width / (m + 2);
- columns = MAX(columns, 1);
- colspace = width / columns;
- colspace = MIN(colspace, width);
- }
-
- if (lflag & SORT_FLAGS) {
- for (n = 0; d[n] != NULL; n++)
- ; /* count entries */
- sort_flag = lflag & (SORT_FLAGS|LS_REVERSE_SORT);
- qsort(d, n, sizeof(*d), sdirent_comp);
- }
-
- for (n = 0; d[n] != NULL && !interrupted; n++) {
- char *tmp, *fname;
-
- if (d[n]->filename[0] == '.' && !(lflag & LS_SHOW_ALL))
- continue;
-
- tmp = path_append(path, d[n]->filename);
- fname = path_strip(tmp, strip_path);
- xfree(tmp);
-
- if (lflag & LS_LONG_VIEW) {
- if (lflag & LS_NUMERIC_VIEW) {
- char *lname;
- struct stat sb;
-
- memset(&sb, 0, sizeof(sb));
- attrib_to_stat(&d[n]->a, &sb);
- lname = ls_file(fname, &sb, 1);
- printf("%s\n", lname);
- xfree(lname);
- } else
- printf("%s\n", d[n]->longname);
- } else {
- printf("%-*s", colspace, fname);
- if (c >= columns) {
- printf("\n");
- c = 1;
- } else
- c++;
- }
-
- xfree(fname);
- }
-
- if (!(lflag & LS_LONG_VIEW) && (c != 1))
- printf("\n");
-
- free_sftp_dirents(d);
- return (0);
-}
-
-/* sftp ls.1 replacement which handles path globs */
-static int
-do_globbed_ls(struct sftp_conn *conn, char *path, char *strip_path,
- int lflag)
-{
- glob_t g;
- u_int i, c = 1, colspace = 0, columns = 1;
- Attrib *a = NULL;
-
- memset(&g, 0, sizeof(g));
-
- if (remote_glob(conn, path, GLOB_MARK|GLOB_NOCHECK|GLOB_BRACE,
- NULL, &g) || (g.gl_pathc && !g.gl_matchc)) {
- if (g.gl_pathc)
- globfree(&g);
- error("Can't ls: \"%s\" not found", path);
- return (-1);
- }
-
- if (interrupted)
- goto out;
-
- /*
- * If the glob returns a single match and it is a directory,
- * then just list its contents.
- */
- if (g.gl_matchc == 1) {
- if ((a = do_lstat(conn, g.gl_pathv[0], 1)) == NULL) {
- globfree(&g);
- return (-1);
- }
- if ((a->flags & SSH2_FILEXFER_ATTR_PERMISSIONS) &&
- S_ISDIR(a->perm)) {
- int err;
-
- err = do_ls_dir(conn, g.gl_pathv[0], strip_path, lflag);
- globfree(&g);
- return (err);
- }
- }
-
- if (!(lflag & LS_SHORT_VIEW)) {
- u_int m = 0, width = 80;
- struct winsize ws;
-
- /* Count entries for sort and find longest filename */
- for (i = 0; g.gl_pathv[i]; i++)
- m = MAX(m, strlen(g.gl_pathv[i]));
-
- if (ioctl(fileno(stdin), TIOCGWINSZ, &ws) != -1)
- width = ws.ws_col;
-
- columns = width / (m + 2);
- columns = MAX(columns, 1);
- colspace = width / columns;
- }
-
- for (i = 0; g.gl_pathv[i] && !interrupted; i++, a = NULL) {
- char *fname;
-
- fname = path_strip(g.gl_pathv[i], strip_path);
-
- if (lflag & LS_LONG_VIEW) {
- char *lname;
- struct stat sb;
-
- /*
- * XXX: this is slow - 1 roundtrip per path
- * A solution to this is to fork glob() and
- * build a sftp specific version which keeps the
- * attribs (which currently get thrown away)
- * that the server returns as well as the filenames.
- */
- memset(&sb, 0, sizeof(sb));
- if (a == NULL)
- a = do_lstat(conn, g.gl_pathv[i], 1);
- if (a != NULL)
- attrib_to_stat(a, &sb);
- lname = ls_file(fname, &sb, 1);
- printf("%s\n", lname);
- xfree(lname);
- } else {
- printf("%-*s", colspace, fname);
- if (c >= columns) {
- printf("\n");
- c = 1;
- } else
- c++;
- }
- xfree(fname);
- }
-
- if (!(lflag & LS_LONG_VIEW) && (c != 1))
- printf("\n");
-
- out:
- if (g.gl_pathc)
- globfree(&g);
-
- return (0);
-}
-
-static int
-parse_args(const char **cpp, int *pflag, int *lflag, int *iflag,
- unsigned long *n_arg, char **path1, char **path2)
-{
- const char *cmd, *cp = *cpp;
- char *cp2;
- int base = 0;
- long l;
- int i, cmdnum;
-
- /* Skip leading whitespace */
- cp = cp + strspn(cp, WHITESPACE);
-
- /* Ignore blank lines and lines which begin with comment '#' char */
- if (*cp == '\0' || *cp == '#')
- return (0);
-
- /* Check for leading '-' (disable error processing) */
- *iflag = 0;
- if (*cp == '-') {
- *iflag = 1;
- cp++;
- }
-
- /* Figure out which command we have */
- for (i = 0; cmds[i].c; i++) {
- int cmdlen = strlen(cmds[i].c);
-
- /* Check for command followed by whitespace */
- if (!strncasecmp(cp, cmds[i].c, cmdlen) &&
- strchr(WHITESPACE, cp[cmdlen])) {
- cp += cmdlen;
- cp = cp + strspn(cp, WHITESPACE);
- break;
- }
- }
- cmdnum = cmds[i].n;
- cmd = cmds[i].c;
-
- /* Special case */
- if (*cp == '!') {
- cp++;
- cmdnum = I_SHELL;
- } else if (cmdnum == -1) {
- error("Invalid command.");
- return (-1);
- }
-
- /* Get arguments and parse flags */
- *lflag = *pflag = *n_arg = 0;
- *path1 = *path2 = NULL;
- switch (cmdnum) {
- case I_GET:
- case I_PUT:
- if (parse_getput_flags(&cp, pflag))
- return(-1);
- /* Get first pathname (mandatory) */
- if (get_pathname(&cp, path1))
- return(-1);
- if (*path1 == NULL) {
- error("You must specify at least one path after a "
- "%s command.", cmd);
- return(-1);
- }
- /* Try to get second pathname (optional) */
- if (get_pathname(&cp, path2))
- return(-1);
- break;
- case I_RENAME:
- case I_SYMLINK:
- if (get_pathname(&cp, path1))
- return(-1);
- if (get_pathname(&cp, path2))
- return(-1);
- if (!*path1 || !*path2) {
- error("You must specify two paths after a %s "
- "command.", cmd);
- return(-1);
- }
- break;
- case I_RM:
- case I_MKDIR:
- case I_RMDIR:
- case I_CHDIR:
- case I_LCHDIR:
- case I_LMKDIR:
- /* Get pathname (mandatory) */
- if (get_pathname(&cp, path1))
- return(-1);
- if (*path1 == NULL) {
- error("You must specify a path after a %s command.",
- cmd);
- return(-1);
- }
- break;
- case I_LS:
- if (parse_ls_flags(&cp, lflag))
- return(-1);
- /* Path is optional */
- if (get_pathname(&cp, path1))
- return(-1);
- break;
- case I_LLS:
- case I_SHELL:
- /* Uses the rest of the line */
- break;
- case I_LUMASK:
- base = 8;
- /* FALLTHRU */
- case I_CHMOD:
- base = 8;
- /* FALLTHRU */
- case I_CHOWN:
- case I_CHGRP:
- /* Get numeric arg (mandatory) */
- errno = 0;
- l = strtol(cp, &cp2, base);
- if (cp2 == cp || ((l == LONG_MIN || l == LONG_MAX) &&
- errno == ERANGE) || l < 0) {
- error("You must supply a numeric argument "
- "to the %s command.", cmd);
- return(-1);
- }
- cp = cp2;
- *n_arg = l;
- if (cmdnum == I_LUMASK && strchr(WHITESPACE, *cp))
- break;
- if (cmdnum == I_LUMASK || !strchr(WHITESPACE, *cp)) {
- error("You must supply a numeric argument "
- "to the %s command.", cmd);
- return(-1);
- }
- cp += strspn(cp, WHITESPACE);
-
- /* Get pathname (mandatory) */
- if (get_pathname(&cp, path1))
- return(-1);
- if (*path1 == NULL) {
- error("You must specify a path after a %s command.",
- cmd);
- return(-1);
- }
- break;
- case I_QUIT:
- case I_PWD:
- case I_LPWD:
- case I_HELP:
- case I_VERSION:
- case I_PROGRESS:
- break;
- default:
- fatal("Command not implemented");
- }
-
- *cpp = cp;
- return(cmdnum);
-}
-
-static int
-parse_dispatch_command(struct sftp_conn *conn, const char *cmd, char **pwd,
- int err_abort)
-{
- char *path1, *path2, *tmp;
- int pflag, lflag, iflag, cmdnum, i;
- unsigned long n_arg;
- Attrib a, *aa;
- char path_buf[MAXPATHLEN];
- int err = 0;
- glob_t g;
-
- path1 = path2 = NULL;
- cmdnum = parse_args(&cmd, &pflag, &lflag, &iflag, &n_arg,
- &path1, &path2);
-
- if (iflag != 0)
- err_abort = 0;
-
- memset(&g, 0, sizeof(g));
-
- /* Perform command */
- switch (cmdnum) {
- case 0:
- /* Blank line */
- break;
- case -1:
- /* Unrecognized command */
- err = -1;
- break;
- case I_GET:
- err = process_get(conn, path1, path2, *pwd, pflag);
- break;
- case I_PUT:
- err = process_put(conn, path1, path2, *pwd, pflag);
- break;
- case I_RENAME:
- path1 = make_absolute(path1, *pwd);
- path2 = make_absolute(path2, *pwd);
- err = do_rename(conn, path1, path2);
- break;
- case I_SYMLINK:
- path2 = make_absolute(path2, *pwd);
- err = do_symlink(conn, path1, path2);
- break;
- case I_RM:
- path1 = make_absolute(path1, *pwd);
- remote_glob(conn, path1, GLOB_NOCHECK, NULL, &g);
- for (i = 0; g.gl_pathv[i] && !interrupted; i++) {
- printf(gettext("Removing %s\n"), g.gl_pathv[i]);
- err = do_rm(conn, g.gl_pathv[i]);
- if (err != 0 && err_abort)
- break;
- }
- break;
- case I_MKDIR:
- path1 = make_absolute(path1, *pwd);
- attrib_clear(&a);
- a.flags |= SSH2_FILEXFER_ATTR_PERMISSIONS;
- a.perm = 0777;
- err = do_mkdir(conn, path1, &a);
- break;
- case I_RMDIR:
- path1 = make_absolute(path1, *pwd);
- err = do_rmdir(conn, path1);
- break;
- case I_CHDIR:
- path1 = make_absolute(path1, *pwd);
- if ((tmp = do_realpath(conn, path1)) == NULL) {
- err = 1;
- break;
- }
- if ((aa = do_stat(conn, tmp, 0)) == NULL) {
- xfree(tmp);
- err = 1;
- break;
- }
- if (!(aa->flags & SSH2_FILEXFER_ATTR_PERMISSIONS)) {
- error("Can't change directory: Can't check target");
- xfree(tmp);
- err = 1;
- break;
- }
- if (!S_ISDIR(aa->perm)) {
- error("Can't change directory: \"%s\" is not "
- "a directory", tmp);
- xfree(tmp);
- err = 1;
- break;
- }
- xfree(*pwd);
- *pwd = tmp;
- break;
- case I_LS:
- if (!path1) {
- do_globbed_ls(conn, *pwd, *pwd, lflag);
- break;
- }
-
- /* Strip pwd off beginning of non-absolute paths */
- tmp = NULL;
- if (*path1 != '/')
- tmp = *pwd;
-
- path1 = make_absolute(path1, *pwd);
- err = do_globbed_ls(conn, path1, tmp, lflag);
- break;
- case I_LCHDIR:
- if (chdir(path1) == -1) {
- error("Couldn't change local directory to "
- "\"%s\": %s", path1, strerror(errno));
- err = 1;
- }
- break;
- case I_LMKDIR:
- if (mkdir(path1, 0777) == -1) {
- error("Couldn't create local directory "
- "\"%s\": %s", path1, strerror(errno));
- err = 1;
- }
- break;
- case I_LLS:
- local_do_ls(cmd);
- break;
- case I_SHELL:
- local_do_shell(cmd);
- break;
- case I_LUMASK:
- umask(n_arg);
- printf(gettext("Local umask: %03lo\n"), n_arg);
- break;
- case I_CHMOD:
- path1 = make_absolute(path1, *pwd);
- attrib_clear(&a);
- a.flags |= SSH2_FILEXFER_ATTR_PERMISSIONS;
- a.perm = n_arg;
- remote_glob(conn, path1, GLOB_NOCHECK, NULL, &g);
- for (i = 0; g.gl_pathv[i] && !interrupted; i++) {
- printf(gettext("Changing mode on %s\n"), g.gl_pathv[i]);
- err = do_setstat(conn, g.gl_pathv[i], &a);
- if (err != 0 && err_abort)
- break;
- }
- break;
- case I_CHOWN:
- case I_CHGRP:
- path1 = make_absolute(path1, *pwd);
- remote_glob(conn, path1, GLOB_NOCHECK, NULL, &g);
- for (i = 0; g.gl_pathv[i] && !interrupted; i++) {
- if (!(aa = do_stat(conn, g.gl_pathv[i], 0))) {
- if (err != 0 && err_abort)
- break;
- else
- continue;
- }
- if (!(aa->flags & SSH2_FILEXFER_ATTR_UIDGID)) {
- error("Can't get current ownership of "
- "remote file \"%s\"", g.gl_pathv[i]);
- if (err != 0 && err_abort)
- break;
- else
- continue;
- }
- aa->flags &= SSH2_FILEXFER_ATTR_UIDGID;
- if (cmdnum == I_CHOWN) {
- printf(gettext("Changing owner on %s\n"), g.gl_pathv[i]);
- aa->uid = n_arg;
- } else {
- printf(gettext("Changing group on %s\n"), g.gl_pathv[i]);
- aa->gid = n_arg;
- }
- err = do_setstat(conn, g.gl_pathv[i], aa);
- if (err != 0 && err_abort)
- break;
- }
- break;
- case I_PWD:
- printf(gettext("Remote working directory: %s\n"), *pwd);
- break;
- case I_LPWD:
- if (!getcwd(path_buf, sizeof(path_buf))) {
- error("Couldn't get local cwd: %s", strerror(errno));
- err = -1;
- break;
- }
- printf(gettext("Local working directory: %s\n"), path_buf);
- break;
- case I_QUIT:
- /* Processed below */
- break;
- case I_HELP:
- help();
- break;
- case I_VERSION:
- printf(gettext("SFTP protocol version %u\n"), sftp_proto_version(conn));
- break;
- case I_PROGRESS:
- showprogress = !showprogress;
- if (showprogress)
- printf("Progress meter enabled\n");
- else
- printf("Progress meter disabled\n");
- break;
- default:
- fatal("%d is not implemented", cmdnum);
- }
-
- if (g.gl_pathc)
- globfree(&g);
- if (path1)
- xfree(path1);
- if (path2)
- xfree(path2);
-
- /* If an unignored error occurs in batch mode we should abort. */
- if (err_abort && err != 0)
- return (-1);
- else if (cmdnum == I_QUIT)
- return (1);
-
- return (0);
-}
-
-#ifdef USE_LIBEDIT
-static char *
-prompt(EditLine *el)
-{
- return ("sftp> ");
-}
-#else
-#ifdef USE_LIBTECLA
-/*
- * Disable default TAB completion for filenames, because it displays local
- * files for every commands, which is not desirable.
- */
-static
-CPL_MATCH_FN(nomatch)
-{
- return (0);
-}
-#endif /* USE_LIBTECLA */
-#endif /* USE_LIBEDIT */
-
-int
-interactive_loop(int fd_in, int fd_out, char *file1, char *file2)
-{
- char *pwd;
- char *dir = NULL;
- char cmd[2048];
- struct sftp_conn *conn;
- int err, interactive;
- void *il = NULL;
-
-#ifdef USE_LIBEDIT
- EditLine *el = NULL;
- History *hl = NULL;
- HistEvent hev;
-
- if (!batchmode && isatty(STDIN_FILENO)) {
- if ((il = el = el_init(__progname, stdin, stdout, stderr)) == NULL)
- fatal("Couldn't initialise editline");
- if ((hl = history_init()) == NULL)
- fatal("Couldn't initialise editline history");
- history(hl, &hev, H_SETSIZE, 100);
- el_set(el, EL_HIST, history, hl);
-
- el_set(el, EL_PROMPT, prompt);
- el_set(el, EL_EDITOR, "emacs");
- el_set(el, EL_TERMINAL, NULL);
- el_set(el, EL_SIGNAL, 1);
- el_source(el, NULL);
- }
-#else
-#ifdef USE_LIBTECLA
- GetLine *gl = NULL;
-
- if (!batchmode && isatty(STDIN_FILENO)) {
- if ((il = gl = new_GetLine(MAX_LINE_LEN, MAX_CMD_HIST)) == NULL)
- fatal("Couldn't initialize GetLine");
- if (gl_customize_completion(gl, NULL, nomatch) != 0) {
- (void) del_GetLine(gl);
- fatal("Couldn't register completion function");
- }
- }
-#endif /* USE_LIBTECLA */
-#endif /* USE_LIBEDIT */
-
- conn = do_init(fd_in, fd_out, copy_buffer_len, num_requests);
- if (conn == NULL)
- fatal("Couldn't initialise connection to server");
-
- pwd = do_realpath(conn, ".");
- if (pwd == NULL)
- fatal("Need cwd");
-
- if (file1 != NULL) {
- dir = xstrdup(file1);
- dir = make_absolute(dir, pwd);
-
- if (remote_is_dir(conn, dir) && file2 == NULL) {
- printf(gettext("Changing to: %s\n"), dir);
- snprintf(cmd, sizeof cmd, "cd \"%s\"", dir);
- if (parse_dispatch_command(conn, cmd, &pwd, 1) != 0) {
- xfree(dir);
- xfree(pwd);
- xfree(conn);
- return (-1);
- }
- } else {
- if (file2 == NULL)
- snprintf(cmd, sizeof cmd, "get %s", dir);
- else
- snprintf(cmd, sizeof cmd, "get %s %s", dir,
- file2);
-
- err = parse_dispatch_command(conn, cmd, &pwd, 1);
- xfree(dir);
- xfree(pwd);
- xfree(conn);
- return (err);
- }
- xfree(dir);
- }
-
-#if defined(HAVE_SETVBUF) && !defined(BROKEN_SETVBUF)
- setvbuf(stdout, NULL, _IOLBF, 0);
- setvbuf(infile, NULL, _IOLBF, 0);
-#else
- setlinebuf(stdout);
- setlinebuf(infile);
-#endif
-
- interactive = !batchmode && isatty(STDIN_FILENO);
- err = 0;
- for (;;) {
- char *cp;
-
- signal(SIGINT, SIG_IGN);
-
- if (il == NULL) {
- if (interactive)
- printf("sftp> ");
- if (fgets(cmd, sizeof(cmd), infile) == NULL) {
- if (interactive)
- printf("\n");
- break;
- }
- if (!interactive) { /* Echo command */
- printf("sftp> %s", cmd);
- if (strlen(cmd) > 0 &&
- cmd[strlen(cmd) - 1] != '\n')
- printf("\n");
- }
- }
-#ifdef USE_LIBEDIT
- else {
- const char *line;
- int count = 0;
-
- if ((line = el_gets(el, &count)) == NULL || count <= 0) {
- printf("\n");
- break;
- }
- history(hl, &hev, H_ENTER, line);
- if (strlcpy(cmd, line, sizeof(cmd)) >= sizeof(cmd)) {
- fprintf(stderr, gettext("Error: input line too long\n"));
- continue;
- }
- }
-#else
-#ifdef USE_LIBTECLA
- else {
- const char *line;
-
- line = gl_get_line(gl, "sftp> ", NULL, -1);
- if (line != NULL) {
- if (strlcpy(cmd, line, sizeof(cmd)) >=
- sizeof(cmd)) {
- fprintf(stderr, gettext(
- "Error: input line too long\n"));
- continue;
- }
- } else {
- GlReturnStatus rtn;
-
- rtn = gl_return_status(gl);
- if (rtn == GLR_SIGNAL) {
- gl_abandon_line(gl);
- continue;
- } else if (rtn == GLR_ERROR) {
- fprintf(stderr, gettext(
- "Error reading terminal: %s/\n"),
- gl_error_message(gl, NULL, 0));
- }
- break;
- }
- }
-#endif /* USE_LIBTECLA */
-#endif /* USE_LIBEDIT */
-
- cp = strrchr(cmd, '\n');
- if (cp)
- *cp = '\0';
-
- /* Handle user interrupts gracefully during commands */
- interrupted = 0;
- signal(SIGINT, cmd_interrupt);
-
- err = parse_dispatch_command(conn, cmd, &pwd, batchmode);
- if (err != 0)
- break;
- }
- xfree(pwd);
- xfree(conn);
-
-#ifdef USE_LIBEDIT
- if (el != NULL)
- el_end(el);
-#else
-#ifdef USE_LIBTECLA
- if (gl != NULL)
- (void) del_GetLine(gl);
-#endif /* USE_LIBTECLA */
-#endif /* USE_LIBEDIT */
-
- /* err == 1 signifies normal "quit" exit */
- return (err >= 0 ? 0 : -1);
-}
-
-static void
-connect_to_server(char *path, char **args, int *in, int *out)
-{
- int c_in, c_out;
-
- int inout[2];
-
- if (socketpair(AF_UNIX, SOCK_STREAM, 0, inout) == -1)
- fatal("socketpair: %s", strerror(errno));
- *in = *out = inout[0];
- c_in = c_out = inout[1];
-
- if ((sshpid = fork()) == -1)
- fatal("fork: %s", strerror(errno));
- else if (sshpid == 0) {
- if ((dup2(c_in, STDIN_FILENO) == -1) ||
- (dup2(c_out, STDOUT_FILENO) == -1)) {
- fprintf(stderr, "dup2: %s\n", strerror(errno));
- _exit(1);
- }
- close(*in);
- close(*out);
- close(c_in);
- close(c_out);
-
- /*
- * The underlying ssh is in the same process group, so we must
- * ignore SIGINT if we want to gracefully abort commands,
- * otherwise the signal will make it to the ssh process and
- * kill it too
- */
- signal(SIGINT, SIG_IGN);
- execvp(path, args);
- fprintf(stderr, "exec: %s: %s\n", path, strerror(errno));
- _exit(1);
- }
-
- signal(SIGTERM, killchild);
- signal(SIGINT, killchild);
- signal(SIGHUP, killchild);
- close(c_in);
- close(c_out);
-}
-
-static void
-usage(void)
-{
- fprintf(stderr,
- gettext("Usage: %s [-1Cv] [-b batchfile] [-B buffer_size]\n"
- " [-F ssh_config] [-o ssh_option] [-P sftp_server_path]\n"
- " [-R num_requests] [-s subsystem | sftp_server]\n"
- " [-S program] [user@]host[:dir[/] | :file [file]]\n"),
- __progname, __progname, __progname, __progname);
- exit(1);
-}
-
-int
-main(int argc, char **argv)
-{
- int in, out, ch, err;
- char *host, *userhost, *cp, *file2 = NULL;
- int debug_level = 0, sshver = 2;
- char *file1 = NULL, *sftp_server = NULL;
- char *ssh_program = _PATH_SSH_PROGRAM, *sftp_direct = NULL;
- LogLevel ll = SYSLOG_LEVEL_INFO;
- arglist args;
- extern int optind;
- extern char *optarg;
-
- /* Ensure that fds 0, 1 and 2 are open or directed to /dev/null */
- sanitise_stdfd();
-
- __progname = get_progname(argv[0]);
-
- (void) g11n_setlocale(LC_ALL, "");
-
- memset(&args, '\0', sizeof(args));
- args.list = NULL;
- addargs(&args, "%s", ssh_program);
- addargs(&args, "-oForwardX11 no");
- addargs(&args, "-oForwardAgent no");
- addargs(&args, "-oClearAllForwardings yes");
-
- ll = SYSLOG_LEVEL_INFO;
- infile = stdin;
-
- while ((ch = getopt(argc, argv, "1hvCo:s:S:b:B:F:P:R:")) != -1) {
- switch (ch) {
- case 'C':
- addargs(&args, "-C");
- break;
- case 'v':
- if (debug_level < 3) {
- addargs(&args, "-v");
- ll = SYSLOG_LEVEL_DEBUG1 + debug_level;
- }
- debug_level++;
- break;
- case 'F':
- case 'o':
- addargs(&args, "-%c%s", ch, optarg);
- break;
- case '1':
- sshver = 1;
- if (sftp_server == NULL)
- sftp_server = _PATH_SFTP_SERVER;
- break;
- case 's':
- sftp_server = optarg;
- break;
- case 'S':
- ssh_program = optarg;
- replacearg(&args, 0, "%s", ssh_program);
- break;
- case 'b':
- if (batchmode)
- fatal("Batch file already specified.");
-
- /* Allow "-" as stdin */
- if (strcmp(optarg, "-") != 0 &&
- (infile = fopen(optarg, "r")) == NULL)
- fatal("%s (%s).", strerror(errno), optarg);
- showprogress = 0;
- batchmode = 1;
- addargs(&args, "-obatchmode yes");
- break;
- case 'P':
- sftp_direct = optarg;
- break;
- case 'B':
- copy_buffer_len = strtol(optarg, &cp, 10);
- if (copy_buffer_len == 0 || *cp != '\0')
- fatal("Invalid buffer size \"%s\"", optarg);
- break;
- case 'R':
- num_requests = strtol(optarg, &cp, 10);
- if (num_requests == 0 || *cp != '\0')
- fatal("Invalid number of requests \"%s\"",
- optarg);
- break;
- case 'h':
- default:
- usage();
- }
- }
-
- if (!isatty(STDERR_FILENO))
- showprogress = 0;
-
- log_init(argv[0], ll, SYSLOG_FACILITY_USER, 1);
-
- if (sftp_direct == NULL) {
- if (optind == argc || argc > (optind + 2))
- usage();
-
- userhost = xstrdup(argv[optind]);
- file2 = argv[optind+1];
-
- if ((host = strrchr(userhost, '@')) == NULL)
- host = userhost;
- else {
- *host++ = '\0';
- if (!userhost[0]) {
- fprintf(stderr, gettext("Missing username\n"));
- usage();
- }
- addargs(&args, "-l%s", userhost);
- }
-
- if ((cp = colon(host)) != NULL) {
- *cp++ = '\0';
- file1 = cp;
- }
-
- host = cleanhostname(host);
- if (!*host) {
- fprintf(stderr, gettext("Missing hostname\n"));
- usage();
- }
-
- addargs(&args, "-oProtocol %d", sshver);
-
- /* no subsystem if the server-spec contains a '/' */
- if (sftp_server == NULL || strchr(sftp_server, '/') == NULL)
- addargs(&args, "-s");
-
- addargs(&args, "%s", host);
- addargs(&args, "%s", (sftp_server != NULL ?
- sftp_server : "sftp"));
-
- if (!batchmode)
- fprintf(stderr, gettext("Connecting to %s...\n"), host);
- connect_to_server(ssh_program, args.list, &in, &out);
- } else {
- args.list = NULL;
- addargs(&args, "sftp-server");
-
- if (!batchmode)
- fprintf(stderr, gettext("Attaching to %s...\n"), sftp_direct);
- connect_to_server(sftp_direct, args.list, &in, &out);
- }
- freeargs(&args);
-
- err = interactive_loop(in, out, file1, file2);
-
- shutdown(in, SHUT_RDWR);
- shutdown(out, SHUT_RDWR);
-
- close(in);
- close(out);
- if (batchmode)
- fclose(infile);
-
- while (waitpid(sshpid, NULL, 0) == -1)
- if (errno != EINTR)
- fatal("Couldn't wait for ssh process: %s",
- strerror(errno));
-
- return (err == 0 ? 0 : 1);
-}
diff --git a/usr/src/cmd/ssh/ssh-add/ssh-add.c b/usr/src/cmd/ssh/ssh-add/ssh-add.c
deleted file mode 100644
index cfa1ce320e..0000000000
--- a/usr/src/cmd/ssh/ssh-add/ssh-add.c
+++ /dev/null
@@ -1,394 +0,0 @@
-/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Adds an identity to the authentication server, or removes an identity.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- *
- * SSH2 implementation,
- * Copyright (c) 2000, 2001 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: ssh-add.c,v 1.63 2002/09/19 15:51:23 markus Exp $");
-
-#include <openssl/evp.h>
-
-#include "ssh.h"
-#include "rsa.h"
-#include "log.h"
-#include "xmalloc.h"
-#include "key.h"
-#include "authfd.h"
-#include "authfile.h"
-#include "pathnames.h"
-#include "readpass.h"
-#include "misc.h"
-
-#ifdef HAVE___PROGNAME
-extern char *__progname;
-#else
-char *__progname;
-#endif
-
-/* argv0 */
-extern char *__progname;
-
-/* Default files to add */
-static char *default_files[] = {
- _PATH_SSH_CLIENT_ID_RSA,
- _PATH_SSH_CLIENT_ID_DSA,
- _PATH_SSH_CLIENT_IDENTITY,
- NULL
-};
-
-/* Default lifetime (0 == forever) */
-static int lifetime = 0;
-
-/* we keep a cache of one passphrases */
-static char *pass = NULL;
-static void
-clear_pass(void)
-{
- if (pass) {
- memset(pass, 0, strlen(pass));
- xfree(pass);
- pass = NULL;
- }
-}
-
-static int
-delete_file(AuthenticationConnection *ac, const char *filename)
-{
- Key *public;
- char *comment = NULL;
- int ret = -1;
-
- public = key_load_public(filename, &comment);
- if (public == NULL) {
- printf(gettext("Bad key file %s\n"), filename);
- return -1;
- }
- if (ssh_remove_identity(ac, public)) {
- fprintf(stderr, gettext("Identity removed: %s (%s)\n"),
- filename, comment);
- ret = 0;
- } else
- fprintf(stderr, gettext("Could not remove identity: %s\n"),
- filename);
-
- key_free(public);
- xfree(comment);
-
- return ret;
-}
-
-/* Send a request to remove all identities. */
-static int
-delete_all(AuthenticationConnection *ac)
-{
- int ret = -1;
-
- if (ssh_remove_all_identities(ac, 1))
- ret = 0;
- /* ignore error-code for ssh2 */
- ssh_remove_all_identities(ac, 2);
-
- if (ret == 0)
- fprintf(stderr, gettext("All identities removed.\n"));
- else
- fprintf(stderr, gettext("Failed to remove all identities.\n"));
-
- return ret;
-}
-
-static int
-add_file(AuthenticationConnection *ac, const char *filename)
-{
- struct stat st;
- Key *private;
- char *comment = NULL;
- char msg[1024];
- int ret = -1;
-
- if (stat(filename, &st) < 0) {
- perror(filename);
- return -1;
- }
- /* At first, try empty passphrase */
- private = key_load_private(filename, "", &comment);
- if (comment == NULL)
- comment = xstrdup(filename);
- /* try last */
- if (private == NULL && pass != NULL)
- private = key_load_private(filename, pass, NULL);
- if (private == NULL) {
- /* clear passphrase since it did not work */
- clear_pass();
- snprintf(msg, sizeof msg,
- gettext("Enter passphrase for %.200s: "), comment);
- for (;;) {
- pass = read_passphrase(msg, RP_ALLOW_STDIN);
- if (strcmp(pass, "") == 0) {
- clear_pass();
- xfree(comment);
- return -1;
- }
- private = key_load_private(filename, pass, &comment);
- if (private != NULL)
- break;
- clear_pass();
- strlcpy(msg, gettext("Bad passphrase, try again: "),
- sizeof msg);
- }
- }
-
- if (ssh_add_identity_constrained(ac, private, comment, lifetime)) {
- fprintf(stderr, gettext("Identity added: %s (%s)\n"),
- filename, comment);
- ret = 0;
- if (lifetime != 0)
- fprintf(stderr,
- gettext("Lifetime set to %d seconds\n"), lifetime);
- } else if (ssh_add_identity(ac, private, comment)) {
- fprintf(stderr, gettext("Identity added: %s (%s)\n"),
- filename, comment);
- ret = 0;
- } else {
- fprintf(stderr, gettext("Could not add identity: %s\n"),
- filename);
- }
-
- xfree(comment);
- key_free(private);
-
- return ret;
-}
-
-static int
-list_identities(AuthenticationConnection *ac, int do_fp)
-{
- Key *key;
- char *comment, *fp;
- int had_identities = 0;
- int version;
-
- for (version = 1; version <= 2; version++) {
- for (key = ssh_get_first_identity(ac, &comment, version);
- key != NULL;
- key = ssh_get_next_identity(ac, &comment, version)) {
- had_identities = 1;
- if (do_fp) {
- fp = key_fingerprint(key, SSH_FP_MD5,
- SSH_FP_HEX);
- printf("%d %s %s (%s)\n",
- key_size(key), fp, comment, key_type(key));
- xfree(fp);
- } else {
- if (!key_write(key, stdout))
- fprintf(stderr,
- gettext("key_write failed"));
- fprintf(stdout, " %s\n", comment);
- }
- key_free(key);
- xfree(comment);
- }
- }
- if (!had_identities) {
- printf(gettext("The agent has no identities.\n"));
- return -1;
- }
- return 0;
-}
-
-static int
-lock_agent(AuthenticationConnection *ac, int lock)
-{
- char prompt[100], *p1, *p2;
- int passok = 1, ret = -1;
-
- strlcpy(prompt, "Enter lock password: ", sizeof(prompt));
- p1 = read_passphrase(prompt, RP_ALLOW_STDIN);
- if (lock) {
- strlcpy(prompt, "Again: ", sizeof prompt);
- p2 = read_passphrase(prompt, RP_ALLOW_STDIN);
- if (strcmp(p1, p2) != 0) {
- fprintf(stderr, gettext("Passwords do not match.\n"));
- passok = 0;
- }
- memset(p2, 0, strlen(p2));
- xfree(p2);
- }
- if (passok && ssh_lock_agent(ac, lock, p1)) {
- if (lock)
- fprintf(stderr, gettext("Agent locked.\n"));
- else
- fprintf(stderr, gettext("Agent unlocked.\n"));
- ret = 0;
- } else {
- if (lock)
- fprintf(stderr, gettext("Failed to lock agent.\n"));
- else
- fprintf(stderr, gettext("Failed to unlock agent.\n"));
- }
- memset(p1, 0, strlen(p1));
- xfree(p1);
- return (ret);
-}
-
-static int
-do_file(AuthenticationConnection *ac, int deleting, char *file)
-{
- if (deleting) {
- if (delete_file(ac, file) == -1)
- return -1;
- } else {
- if (add_file(ac, file) == -1)
- return -1;
- }
- return 0;
-}
-
-static void
-usage(void)
-{
- fprintf(stderr,
- gettext( "Usage: %s [options]\n"
- "Options:\n"
- " -l List fingerprints of all identities.\n"
- " -L List public key parameters of all identities.\n"
- " -d Delete identity.\n"
- " -D Delete all identities.\n"
- " -x Lock agent.\n"
- " -X Unlock agent.\n"
- " -t life Set lifetime (seconds) when adding identities.\n"
- ), __progname);
-}
-
-int
-main(int argc, char **argv)
-{
- extern char *optarg;
- extern int optind;
- AuthenticationConnection *ac = NULL;
- int i, ch, deleting = 0, ret = 0;
-
- __progname = get_progname(argv[0]);
-
- (void) g11n_setlocale(LC_ALL, "");
-
- init_rng();
- seed_rng();
-
- SSLeay_add_all_algorithms();
-
- /* At first, get a connection to the authentication agent. */
- ac = ssh_get_authentication_connection();
- if (ac == NULL) {
- fprintf(stderr, gettext("Could not open a connection "
- "to your authentication agent.\n"));
- exit(2);
- }
- while ((ch = getopt(argc, argv, "lLdDxXe:s:t:")) != -1) {
- switch (ch) {
- case 'l':
- case 'L':
- if (list_identities(ac, ch == 'l' ? 1 : 0) == -1)
- ret = 1;
- goto done;
- break;
- case 'x':
- case 'X':
- if (lock_agent(ac, ch == 'x' ? 1 : 0) == -1)
- ret = 1;
- goto done;
- break;
- case 'd':
- deleting = 1;
- break;
- case 'D':
- if (delete_all(ac) == -1)
- ret = 1;
- goto done;
- break;
- case 't':
- if ((lifetime = convtime(optarg)) == -1) {
- fprintf(stderr, gettext("Invalid lifetime\n"));
- ret = 1;
- goto done;
- }
- break;
- default:
- usage();
- ret = 1;
- goto done;
- }
- }
- argc -= optind;
- argv += optind;
- if (argc == 0) {
- char buf[MAXPATHLEN];
- struct passwd *pw;
- struct stat st;
- int count = 0;
-
- if ((pw = getpwuid(getuid())) == NULL) {
- fprintf(stderr, gettext("No user found with uid %u\n"),
- (u_int)getuid());
- ret = 1;
- goto done;
- }
-
- for(i = 0; default_files[i]; i++) {
- snprintf(buf, sizeof(buf), "%s/%s", pw->pw_dir,
- default_files[i]);
- if (stat(buf, &st) < 0)
- continue;
- if (do_file(ac, deleting, buf) == -1)
- ret = 1;
- else
- count++;
- }
- if (count == 0)
- ret = 1;
- } else {
- for(i = 0; i < argc; i++) {
- if (do_file(ac, deleting, argv[i]) == -1)
- ret = 1;
- }
- }
- clear_pass();
-
-done:
- ssh_close_authentication_connection(ac);
- return ret;
-}
diff --git a/usr/src/cmd/ssh/ssh-agent/Makefile b/usr/src/cmd/ssh/ssh-agent/Makefile
deleted file mode 100644
index 197899b10f..0000000000
--- a/usr/src/cmd/ssh/ssh-agent/Makefile
+++ /dev/null
@@ -1,65 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
-#
-# cmd/ssh/ssh-agent/Makefile
-
-PROG= ssh-agent
-
-OBJS = \
- ssh-agent.o
-SRCS = $(OBJS:.o=.c)
-
-include ../../Makefile.cmd
-include ../Makefile.ssh-common
-
-DIRS= $(ROOTLIBSUNSSH)
-
-LDLIBS += $(SSH_COMMON_LDLIBS) -lsocket -lcrypto
-
-POFILE_DIR= ..
-
-.KEEP_STATE:
-
-.PARALLEL: $(OBJS)
-
-all: $(PROG)
-
-$(PROG): $(OBJS) ../libssh/$(MACH)/libssh.a ../libopenbsd-compat/$(MACH)/libopenbsd-compat.a
- $(LINK.c) $(OBJS) -o $@ $(LDLIBS) $(DYNFLAGS)
- $(POST_PROCESS)
-
-$(DIRS):
- $(INS.dir)
-
-$(ROOTBIN)/ssh-agent: $(ROOTLIBSUNSSH)/ssh-agent
- -$(RM) $@; $(SYMLINK) ../lib/sunssh/ssh-agent $@
-
-install: all $(ROOTPROG)
-
-clean:
- $(RM) -f $(OBJS) $(PROG)
-
-lint: lint_SRCS
-
-include ../Makefile.msg.targ
-include ../../Makefile.targ
diff --git a/usr/src/cmd/ssh/ssh-agent/ssh-agent.c b/usr/src/cmd/ssh/ssh-agent/ssh-agent.c
deleted file mode 100644
index 1ac5f4030d..0000000000
--- a/usr/src/cmd/ssh/ssh-agent/ssh-agent.c
+++ /dev/null
@@ -1,1192 +0,0 @@
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * The authentication agent program.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- *
- * Copyright (c) 2000, 2001 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-#include "sys-queue.h"
-RCSID("$OpenBSD: ssh-agent.c,v 1.159 2008/06/28 14:05:15 djm Exp $");
-
-#ifdef HAVE_SOLARIS_PRIVILEGE
-#include <priv.h>
-#endif /* HAVE_SOLARIS_PRIVILEGE */
-
-#include <openssl/evp.h>
-#include <openssl/md5.h>
-
-#include "ssh.h"
-#include "rsa.h"
-#include "buffer.h"
-#include "bufaux.h"
-#include "xmalloc.h"
-#include "key.h"
-#include "authfd.h"
-#include "compat.h"
-#include "log.h"
-#include "readpass.h"
-#include "misc.h"
-
-typedef enum {
- AUTH_UNUSED,
- AUTH_SOCKET,
- AUTH_CONNECTION
-} sock_type;
-
-typedef struct {
- int fd;
- sock_type type;
- Buffer input;
- Buffer output;
- Buffer request;
-} SocketEntry;
-
-u_int sockets_alloc = 0;
-SocketEntry *sockets = NULL;
-
-typedef struct identity {
- TAILQ_ENTRY(identity) next;
- Key *key;
- char *comment;
- u_int death;
- u_int confirm;
-} Identity;
-
-typedef struct {
- int nentries;
- TAILQ_HEAD(idqueue, identity) idlist;
-} Idtab;
-
-/* private key table, one per protocol version */
-Idtab idtable[3];
-
-int max_fd = 0;
-
-/* pid of shell == parent of agent */
-pid_t parent_pid = -1;
-u_int parent_alive_interval = 0;
-
-/* pathname and directory for AUTH_SOCKET */
-char socket_name[MAXPATHLEN];
-char socket_dir[MAXPATHLEN];
-
-/* locking */
-int locked = 0;
-char *lock_passwd = NULL;
-
-#ifdef HAVE___PROGNAME
-extern char *__progname;
-#else
-char *__progname;
-#endif
-
-/* Default lifetime (0 == forever) */
-static int lifetime = 0;
-
-static void
-close_socket(SocketEntry *e)
-{
- close(e->fd);
- e->fd = -1;
- e->type = AUTH_UNUSED;
- buffer_free(&e->input);
- buffer_free(&e->output);
- buffer_free(&e->request);
-}
-
-static void
-idtab_init(void)
-{
- int i;
-
- for (i = 0; i <=2; i++) {
- TAILQ_INIT(&idtable[i].idlist);
- idtable[i].nentries = 0;
- }
-}
-
-/* return private key table for requested protocol version */
-static Idtab *
-idtab_lookup(int version)
-{
- if (version < 1 || version > 2)
- fatal("internal error, bad protocol version %d", version);
- return &idtable[version];
-}
-
-static void
-free_identity(Identity *id)
-{
- key_free(id->key);
- xfree(id->comment);
- xfree(id);
-}
-
-/* return matching private key for given public key */
-static Identity *
-lookup_identity(Key *key, int version)
-{
- Identity *id;
-
- Idtab *tab = idtab_lookup(version);
- TAILQ_FOREACH(id, &tab->idlist, next) {
- if (key_equal(key, id->key))
- return (id);
- }
- return (NULL);
-}
-
-/* Check confirmation of keysign request */
-static int
-confirm_key(Identity *id)
-{
- char *p;
- int ret = -1;
-
- p = key_fingerprint(id->key, SSH_FP_MD5, SSH_FP_HEX);
- if (ask_permission(
- gettext("Allow use of key %s?\nKey fingerprint %s."),
- id->comment, p))
- ret = 0;
- xfree(p);
-
- return (ret);
-}
-
-/* send list of supported public keys to 'client' */
-static void
-process_request_identities(SocketEntry *e, int version)
-{
- Idtab *tab = idtab_lookup(version);
- Identity *id;
- Buffer msg;
-
- buffer_init(&msg);
- buffer_put_char(&msg, (version == 1) ?
- SSH_AGENT_RSA_IDENTITIES_ANSWER : SSH2_AGENT_IDENTITIES_ANSWER);
- buffer_put_int(&msg, tab->nentries);
- TAILQ_FOREACH(id, &tab->idlist, next) {
- if (id->key->type == KEY_RSA1) {
- buffer_put_int(&msg, BN_num_bits(id->key->rsa->n));
- buffer_put_bignum(&msg, id->key->rsa->e);
- buffer_put_bignum(&msg, id->key->rsa->n);
- } else {
- u_char *blob;
- u_int blen;
- key_to_blob(id->key, &blob, &blen);
- buffer_put_string(&msg, blob, blen);
- xfree(blob);
- }
- buffer_put_cstring(&msg, id->comment);
- }
- buffer_put_int(&e->output, buffer_len(&msg));
- buffer_append(&e->output, buffer_ptr(&msg), buffer_len(&msg));
- buffer_free(&msg);
-}
-
-/* ssh1 only */
-static void
-process_authentication_challenge1(SocketEntry *e)
-{
- u_char buf[32], mdbuf[16], session_id[16];
- u_int response_type;
- BIGNUM *challenge;
- Identity *id;
- int i, len;
- Buffer msg;
- MD5_CTX md;
- Key *key;
-
- buffer_init(&msg);
- key = key_new(KEY_RSA1);
- if ((challenge = BN_new()) == NULL)
- fatal("process_authentication_challenge1: BN_new failed");
-
- (void) buffer_get_int(&e->request); /* ignored */
- buffer_get_bignum(&e->request, key->rsa->e);
- buffer_get_bignum(&e->request, key->rsa->n);
- buffer_get_bignum(&e->request, challenge);
-
- /* Only protocol 1.1 is supported */
- if (buffer_len(&e->request) == 0)
- goto failure;
- buffer_get(&e->request, session_id, 16);
- response_type = buffer_get_int(&e->request);
- if (response_type != 1)
- goto failure;
-
- id = lookup_identity(key, 1);
- if (id != NULL && (!id->confirm || confirm_key(id) == 0)) {
- Key *private = id->key;
- /* Decrypt the challenge using the private key. */
- if (rsa_private_decrypt(challenge, challenge, private->rsa) <= 0)
- goto failure;
-
- /* The response is MD5 of decrypted challenge plus session id. */
- len = BN_num_bytes(challenge);
- if (len <= 0 || len > 32) {
- log("process_authentication_challenge: bad challenge length %d", len);
- goto failure;
- }
- memset(buf, 0, 32);
- BN_bn2bin(challenge, buf + 32 - len);
- MD5_Init(&md);
- MD5_Update(&md, buf, 32);
- MD5_Update(&md, session_id, 16);
- MD5_Final(mdbuf, &md);
-
- /* Send the response. */
- buffer_put_char(&msg, SSH_AGENT_RSA_RESPONSE);
- for (i = 0; i < 16; i++)
- buffer_put_char(&msg, mdbuf[i]);
- goto send;
- }
-
-failure:
- /* Unknown identity or protocol error. Send failure. */
- buffer_put_char(&msg, SSH_AGENT_FAILURE);
-send:
- buffer_put_int(&e->output, buffer_len(&msg));
- buffer_append(&e->output, buffer_ptr(&msg), buffer_len(&msg));
- key_free(key);
- BN_clear_free(challenge);
- buffer_free(&msg);
-}
-
-/* ssh2 only */
-static void
-process_sign_request2(SocketEntry *e)
-{
- u_char *blob, *data, *signature = NULL;
- u_int blen, dlen, slen = 0;
- extern uint32_t datafellows;
- int odatafellows;
- int ok = -1, flags;
- Buffer msg;
- Key *key;
-
- datafellows = 0;
-
- blob = buffer_get_string(&e->request, &blen);
- data = buffer_get_string(&e->request, &dlen);
-
- flags = buffer_get_int(&e->request);
- odatafellows = datafellows;
- if (flags & SSH_AGENT_OLD_SIGNATURE)
- datafellows = SSH_BUG_SIGBLOB;
-
- key = key_from_blob(blob, blen);
- if (key != NULL) {
- Identity *id = lookup_identity(key, 2);
- if (id != NULL && (!id->confirm || confirm_key(id) == 0))
- ok = key_sign(id->key, &signature, &slen, data, dlen);
- key_free(key);
- }
- buffer_init(&msg);
- if (ok == 0) {
- buffer_put_char(&msg, SSH2_AGENT_SIGN_RESPONSE);
- buffer_put_string(&msg, signature, slen);
- } else {
- buffer_put_char(&msg, SSH_AGENT_FAILURE);
- }
- buffer_put_int(&e->output, buffer_len(&msg));
- buffer_append(&e->output, buffer_ptr(&msg),
- buffer_len(&msg));
- buffer_free(&msg);
- xfree(data);
- xfree(blob);
- if (signature != NULL)
- xfree(signature);
- datafellows = odatafellows;
-}
-
-/* shared */
-static void
-process_remove_identity(SocketEntry *e, int version)
-{
- u_int blen, bits;
- int success = 0;
- Key *key = NULL;
- u_char *blob;
-
- switch (version) {
- case 1:
- key = key_new(KEY_RSA1);
- bits = buffer_get_int(&e->request);
- buffer_get_bignum(&e->request, key->rsa->e);
- buffer_get_bignum(&e->request, key->rsa->n);
-
- if (bits != key_size(key))
- log("Warning: identity keysize mismatch: actual %u, announced %u",
- key_size(key), bits);
- break;
- case 2:
- blob = buffer_get_string(&e->request, &blen);
- key = key_from_blob(blob, blen);
- xfree(blob);
- break;
- }
- if (key != NULL) {
- Identity *id = lookup_identity(key, version);
- if (id != NULL) {
- /*
- * We have this key. Free the old key. Since we
- * don't want to leave empty slots in the middle of
- * the array, we actually free the key there and move
- * all the entries between the empty slot and the end
- * of the array.
- */
- Idtab *tab = idtab_lookup(version);
- if (tab->nentries < 1)
- fatal("process_remove_identity: "
- "internal error: tab->nentries %d",
- tab->nentries);
- TAILQ_REMOVE(&tab->idlist, id, next);
- free_identity(id);
- tab->nentries--;
- success = 1;
- }
- key_free(key);
- }
- buffer_put_int(&e->output, 1);
- buffer_put_char(&e->output,
- success ? SSH_AGENT_SUCCESS : SSH_AGENT_FAILURE);
-}
-
-static void
-process_remove_all_identities(SocketEntry *e, int version)
-{
- Idtab *tab = idtab_lookup(version);
- Identity *id;
-
- /* Loop over all identities and clear the keys. */
- for (id = TAILQ_FIRST(&tab->idlist); id;
- id = TAILQ_FIRST(&tab->idlist)) {
- TAILQ_REMOVE(&tab->idlist, id, next);
- free_identity(id);
- }
-
- /* Mark that there are no identities. */
- tab->nentries = 0;
-
- /* Send success. */
- buffer_put_int(&e->output, 1);
- buffer_put_char(&e->output, SSH_AGENT_SUCCESS);
-}
-
-/* removes expired keys and returns number of seconds until the next expiry */
-static u_int
-reaper(void)
-{
- u_int deadline = 0, now = time(NULL);
- Identity *id, *nxt;
- int version;
- Idtab *tab;
-
- for (version = 1; version < 3; version++) {
- tab = idtab_lookup(version);
- for (id = TAILQ_FIRST(&tab->idlist); id; id = nxt) {
- nxt = TAILQ_NEXT(id, next);
- if (id->death == 0)
- continue;
- if (now >= id->death) {
- debug("expiring key '%s'", id->comment);
- TAILQ_REMOVE(&tab->idlist, id, next);
- free_identity(id);
- tab->nentries--;
- } else
- deadline = (deadline == 0) ? id->death :
- MIN(deadline, id->death);
- }
- }
- if (deadline == 0 || deadline <= now)
- return 0;
- else
- return (deadline - now);
-}
-
-static void
-process_add_identity(SocketEntry *e, int version)
-{
- Idtab *tab = idtab_lookup(version);
- Identity *id;
- int type, success = 0, death = 0, confirm = 0;
- char *type_name, *comment;
- Key *k = NULL;
-
- switch (version) {
- case 1:
- k = key_new_private(KEY_RSA1);
- (void) buffer_get_int(&e->request); /* ignored */
- buffer_get_bignum(&e->request, k->rsa->n);
- buffer_get_bignum(&e->request, k->rsa->e);
- buffer_get_bignum(&e->request, k->rsa->d);
- buffer_get_bignum(&e->request, k->rsa->iqmp);
-
- /* SSH and SSL have p and q swapped */
- buffer_get_bignum(&e->request, k->rsa->q); /* p */
- buffer_get_bignum(&e->request, k->rsa->p); /* q */
-
- /* Generate additional parameters */
- rsa_generate_additional_parameters(k->rsa);
- break;
- case 2:
- type_name = buffer_get_string(&e->request, NULL);
- type = key_type_from_name(type_name);
- xfree(type_name);
- switch (type) {
- case KEY_DSA:
- k = key_new_private(type);
- buffer_get_bignum2(&e->request, k->dsa->p);
- buffer_get_bignum2(&e->request, k->dsa->q);
- buffer_get_bignum2(&e->request, k->dsa->g);
- buffer_get_bignum2(&e->request, k->dsa->pub_key);
- buffer_get_bignum2(&e->request, k->dsa->priv_key);
- break;
- case KEY_RSA:
- k = key_new_private(type);
- buffer_get_bignum2(&e->request, k->rsa->n);
- buffer_get_bignum2(&e->request, k->rsa->e);
- buffer_get_bignum2(&e->request, k->rsa->d);
- buffer_get_bignum2(&e->request, k->rsa->iqmp);
- buffer_get_bignum2(&e->request, k->rsa->p);
- buffer_get_bignum2(&e->request, k->rsa->q);
-
- /* Generate additional parameters */
- rsa_generate_additional_parameters(k->rsa);
- break;
- default:
- buffer_clear(&e->request);
- goto send;
- }
- break;
- }
- /* enable blinding */
- switch (k->type) {
- case KEY_RSA:
- case KEY_RSA1:
- if (RSA_blinding_on(k->rsa, NULL) != 1) {
- error("process_add_identity: RSA_blinding_on failed");
- key_free(k);
- goto send;
- }
- break;
- }
- comment = buffer_get_string(&e->request, NULL);
- if (k == NULL) {
- xfree(comment);
- goto send;
- }
- while (buffer_len(&e->request)) {
- switch ((type = buffer_get_char(&e->request))) {
- case SSH_AGENT_CONSTRAIN_LIFETIME:
- death = time(NULL) + buffer_get_int(&e->request);
- break;
- case SSH_AGENT_CONSTRAIN_CONFIRM:
- confirm = 1;
- break;
- default:
- error("process_add_identity: "
- "Unknown constraint type %d", type);
- xfree(comment);
- key_free(k);
- goto send;
- }
- }
- success = 1;
- if (lifetime && !death)
- death = time(NULL) + lifetime;
- if ((id = lookup_identity(k, version)) == NULL) {
- id = xmalloc(sizeof(Identity));
- id->key = k;
- TAILQ_INSERT_TAIL(&tab->idlist, id, next);
- /* Increment the number of identities. */
- tab->nentries++;
- } else {
- key_free(k);
- xfree(id->comment);
- }
- id->comment = comment;
- id->death = death;
- id->confirm = confirm;
-send:
- buffer_put_int(&e->output, 1);
- buffer_put_char(&e->output,
- success ? SSH_AGENT_SUCCESS : SSH_AGENT_FAILURE);
-}
-
-/* XXX todo: encrypt sensitive data with passphrase */
-static void
-process_lock_agent(SocketEntry *e, int lock)
-{
- int success = 0;
- char *passwd;
-
- passwd = buffer_get_string(&e->request, NULL);
- if (locked && !lock && strcmp(passwd, lock_passwd) == 0) {
- locked = 0;
- memset(lock_passwd, 0, strlen(lock_passwd));
- xfree(lock_passwd);
- lock_passwd = NULL;
- success = 1;
- } else if (!locked && lock) {
- locked = 1;
- lock_passwd = xstrdup(passwd);
- success = 1;
- }
- memset(passwd, 0, strlen(passwd));
- xfree(passwd);
-
- buffer_put_int(&e->output, 1);
- buffer_put_char(&e->output,
- success ? SSH_AGENT_SUCCESS : SSH_AGENT_FAILURE);
-}
-
-static void
-no_identities(SocketEntry *e, u_int type)
-{
- Buffer msg;
-
- buffer_init(&msg);
- buffer_put_char(&msg,
- (type == SSH_AGENTC_REQUEST_RSA_IDENTITIES) ?
- SSH_AGENT_RSA_IDENTITIES_ANSWER : SSH2_AGENT_IDENTITIES_ANSWER);
- buffer_put_int(&msg, 0);
- buffer_put_int(&e->output, buffer_len(&msg));
- buffer_append(&e->output, buffer_ptr(&msg), buffer_len(&msg));
- buffer_free(&msg);
-}
-
-/* dispatch incoming messages */
-
-static void
-process_message(SocketEntry *e)
-{
- u_int msg_len, type;
- u_char *cp;
-
- if (buffer_len(&e->input) < 5)
- return; /* Incomplete message. */
- cp = buffer_ptr(&e->input);
- msg_len = get_u32(cp);
- if (msg_len > 256 * 1024) {
- close_socket(e);
- return;
- }
- if (buffer_len(&e->input) < msg_len + 4)
- return;
-
- /* move the current input to e->request */
- buffer_consume(&e->input, 4);
- buffer_clear(&e->request);
- buffer_append(&e->request, buffer_ptr(&e->input), msg_len);
- buffer_consume(&e->input, msg_len);
- type = buffer_get_char(&e->request);
-
- /* check wheter agent is locked */
- if (locked && type != SSH_AGENTC_UNLOCK) {
- buffer_clear(&e->request);
- switch (type) {
- case SSH_AGENTC_REQUEST_RSA_IDENTITIES:
- case SSH2_AGENTC_REQUEST_IDENTITIES:
- /* send empty lists */
- no_identities(e, type);
- break;
- default:
- /* send a fail message for all other request types */
- buffer_put_int(&e->output, 1);
- buffer_put_char(&e->output, SSH_AGENT_FAILURE);
- }
- return;
- }
-
- debug("type %d", type);
- switch (type) {
- case SSH_AGENTC_LOCK:
- case SSH_AGENTC_UNLOCK:
- process_lock_agent(e, type == SSH_AGENTC_LOCK);
- break;
- /* ssh1 */
- case SSH_AGENTC_RSA_CHALLENGE:
- process_authentication_challenge1(e);
- break;
- case SSH_AGENTC_REQUEST_RSA_IDENTITIES:
- process_request_identities(e, 1);
- break;
- case SSH_AGENTC_ADD_RSA_IDENTITY:
- case SSH_AGENTC_ADD_RSA_ID_CONSTRAINED:
- process_add_identity(e, 1);
- break;
- case SSH_AGENTC_REMOVE_RSA_IDENTITY:
- process_remove_identity(e, 1);
- break;
- case SSH_AGENTC_REMOVE_ALL_RSA_IDENTITIES:
- process_remove_all_identities(e, 1);
- break;
- /* ssh2 */
- case SSH2_AGENTC_SIGN_REQUEST:
- process_sign_request2(e);
- break;
- case SSH2_AGENTC_REQUEST_IDENTITIES:
- process_request_identities(e, 2);
- break;
- case SSH2_AGENTC_ADD_IDENTITY:
- case SSH2_AGENTC_ADD_ID_CONSTRAINED:
- process_add_identity(e, 2);
- break;
- case SSH2_AGENTC_REMOVE_IDENTITY:
- process_remove_identity(e, 2);
- break;
- case SSH2_AGENTC_REMOVE_ALL_IDENTITIES:
- process_remove_all_identities(e, 2);
- break;
- default:
- /* Unknown message. Respond with failure. */
- error("Unknown message %d", type);
- buffer_clear(&e->request);
- buffer_put_int(&e->output, 1);
- buffer_put_char(&e->output, SSH_AGENT_FAILURE);
- break;
- }
-}
-
-static void
-new_socket(sock_type type, int fd)
-{
- u_int i, old_alloc, new_alloc;
-
- set_nonblock(fd);
-
- if (fd > max_fd)
- max_fd = fd;
-
- for (i = 0; i < sockets_alloc; i++)
- if (sockets[i].type == AUTH_UNUSED) {
- sockets[i].fd = fd;
- buffer_init(&sockets[i].input);
- buffer_init(&sockets[i].output);
- buffer_init(&sockets[i].request);
- sockets[i].type = type;
- return;
- }
- old_alloc = sockets_alloc;
- new_alloc = sockets_alloc + 10;
- sockets = xrealloc(sockets, new_alloc * sizeof(sockets[0]));
- for (i = old_alloc; i < new_alloc; i++)
- sockets[i].type = AUTH_UNUSED;
- sockets_alloc = new_alloc;
- sockets[old_alloc].fd = fd;
- buffer_init(&sockets[old_alloc].input);
- buffer_init(&sockets[old_alloc].output);
- buffer_init(&sockets[old_alloc].request);
- sockets[old_alloc].type = type;
-}
-
-static int
-prepare_select(fd_set **fdrp, fd_set **fdwp, int *fdl, u_int *nallocp,
- struct timeval **tvpp)
-{
- u_int i, sz, deadline;
- int n = 0;
- static struct timeval tv;
-
- for (i = 0; i < sockets_alloc; i++) {
- switch (sockets[i].type) {
- case AUTH_SOCKET:
- case AUTH_CONNECTION:
- n = MAX(n, sockets[i].fd);
- break;
- case AUTH_UNUSED:
- break;
- default:
- fatal("Unknown socket type %d", sockets[i].type);
- break;
- }
- }
-
- sz = howmany(n+1, NFDBITS) * sizeof(fd_mask);
- if (*fdrp == NULL || sz > *nallocp) {
- if (*fdrp)
- xfree(*fdrp);
- if (*fdwp)
- xfree(*fdwp);
- *fdrp = xmalloc(sz);
- *fdwp = xmalloc(sz);
- *nallocp = sz;
- }
- if (n < *fdl)
- debug("XXX shrink: %d < %d", n, *fdl);
- *fdl = n;
- memset(*fdrp, 0, sz);
- memset(*fdwp, 0, sz);
-
- for (i = 0; i < sockets_alloc; i++) {
- switch (sockets[i].type) {
- case AUTH_SOCKET:
- case AUTH_CONNECTION:
- FD_SET(sockets[i].fd, *fdrp);
- if (buffer_len(&sockets[i].output) > 0)
- FD_SET(sockets[i].fd, *fdwp);
- break;
- default:
- break;
- }
- }
- deadline = reaper();
- if (parent_alive_interval != 0)
- deadline = (deadline == 0) ? parent_alive_interval :
- MIN(deadline, parent_alive_interval);
- if (deadline == 0) {
- *tvpp = NULL;
- } else {
- tv.tv_sec = deadline;
- tv.tv_usec = 0;
- *tvpp = &tv;
- }
- return (1);
-}
-
-static void
-after_select(fd_set *readset, fd_set *writeset)
-{
- struct sockaddr_un sunaddr;
- socklen_t slen;
- char buf[1024];
- int len, sock;
- u_int i;
- uid_t euid;
- gid_t egid;
-
- for (i = 0; i < sockets_alloc; i++)
- switch (sockets[i].type) {
- case AUTH_UNUSED:
- break;
- case AUTH_SOCKET:
- if (FD_ISSET(sockets[i].fd, readset)) {
- slen = sizeof(sunaddr);
- sock = accept(sockets[i].fd,
- (struct sockaddr *)&sunaddr, &slen);
- if (sock < 0) {
- error("accept from AUTH_SOCKET: %s",
- strerror(errno));
- break;
- }
- if (getpeereid(sock, &euid, &egid) < 0) {
- error("getpeereid %d failed: %s",
- sock, strerror(errno));
- close(sock);
- break;
- }
- if ((euid != 0) && (getuid() != euid)) {
- error("uid mismatch: "
- "peer euid %u != uid %u",
- (u_int) euid, (u_int) getuid());
- close(sock);
- break;
- }
- new_socket(AUTH_CONNECTION, sock);
- }
- break;
- case AUTH_CONNECTION:
- if (buffer_len(&sockets[i].output) > 0 &&
- FD_ISSET(sockets[i].fd, writeset)) {
- do {
- len = write(sockets[i].fd,
- buffer_ptr(&sockets[i].output),
- buffer_len(&sockets[i].output));
- if (len == -1 && (errno == EAGAIN ||
- errno == EINTR ||
- errno == EWOULDBLOCK))
- continue;
- break;
- } while (1);
- if (len <= 0) {
- close_socket(&sockets[i]);
- break;
- }
- buffer_consume(&sockets[i].output, len);
- }
- if (FD_ISSET(sockets[i].fd, readset)) {
- do {
- len = read(sockets[i].fd, buf, sizeof(buf));
- if (len == -1 && (errno == EAGAIN ||
- errno == EINTR ||
- errno == EWOULDBLOCK))
- continue;
- break;
- } while (1);
- if (len <= 0) {
- close_socket(&sockets[i]);
- break;
- }
- buffer_append(&sockets[i].input, buf, len);
- process_message(&sockets[i]);
- }
- break;
- default:
- fatal("Unknown type %d", sockets[i].type);
- }
-}
-
-static void
-cleanup_socket(void)
-{
- if (socket_name[0])
- unlink(socket_name);
- if (socket_dir[0])
- rmdir(socket_dir);
-}
-
-void
-cleanup_exit(int i)
-{
- cleanup_socket();
- _exit(i);
-}
-
-/*ARGSUSED*/
-static void
-cleanup_handler(int sig)
-{
- cleanup_socket();
- _exit(2);
-}
-
-static void
-check_parent_exists(void)
-{
-#ifdef HAVE_SOLARIS_PRIVILEGE
- /*
- * We can not simply use "kill(ppid, 0) < 0" to detect if the parent
- * has exited when the child process no longer has the
- * PRIV_PROC_SESSION privilege.
- */
- if (parent_pid != -1 && getppid() != parent_pid) {
-#else
- if (parent_pid != -1 && kill(parent_pid, 0) < 0) {
-
-#endif
- /* printf("Parent has died - Authentication agent exiting.\n"); */
- cleanup_socket();
- _exit(2);
- }
-}
-
-static void
-usage(void)
-{
- fprintf(stderr,
- gettext("Usage: %s [options] [command [args ...]]\n"
- "Options:\n"
- " -c Generate C-shell commands on stdout.\n"
- " -s Generate Bourne shell commands on stdout.\n"
- " -k Kill the current agent.\n"
- " -d Debug mode.\n"
- " -a socket Bind agent socket to given name.\n"
- " -t life Default identity lifetime (seconds).\n"),
- __progname);
- exit(1);
-}
-
-int
-main(int ac, char **av)
-{
- int c_flag = 0, d_flag = 0, k_flag = 0, s_flag = 0;
- int sock, fd, ch, result, saved_errno;
- u_int nalloc;
- char *shell, *pidstr, *agentsocket = NULL;
- const char *format;
- fd_set *readsetp = NULL, *writesetp = NULL;
- struct sockaddr_un sunaddr;
-#ifdef HAVE_SETRLIMIT
- struct rlimit rlim;
-#endif
- int prev_mask;
- extern int optind;
- extern char *optarg;
- pid_t pid;
- char pidstrbuf[1 + 3 * sizeof pid];
- struct timeval *tvp = NULL;
-#ifdef HAVE_SOLARIS_PRIVILEGE
- priv_set_t *myprivs;
-#endif /* HAVE_SOLARIS_PRIVILEGE */
-
- /* Ensure that fds 0, 1 and 2 are open or directed to /dev/null */
- sanitise_stdfd();
-
- /* drop */
- setegid(getgid());
- setgid(getgid());
-
- SSLeay_add_all_algorithms();
-
- __progname = get_progname(av[0]);
- init_rng();
- seed_rng();
-
- while ((ch = getopt(ac, av, "cdksa:t:")) != -1) {
- switch (ch) {
- case 'c':
- if (s_flag)
- usage();
- c_flag++;
- break;
- case 'k':
- k_flag++;
- break;
- case 's':
- if (c_flag)
- usage();
- s_flag++;
- break;
- case 'd':
- if (d_flag)
- usage();
- d_flag++;
- break;
- case 'a':
- agentsocket = optarg;
- break;
- case 't':
- if ((lifetime = convtime(optarg)) == -1) {
- fprintf(stderr, gettext("Invalid lifetime\n"));
- usage();
- }
- break;
- default:
- usage();
- }
- }
- ac -= optind;
- av += optind;
-
- if (ac > 0 && (c_flag || k_flag || s_flag || d_flag))
- usage();
-
- if (ac == 0 && !c_flag && !s_flag) {
- shell = getenv("SHELL");
- if (shell != NULL &&
- strncmp(shell + strlen(shell) - 3, "csh", 3) == 0)
- c_flag = 1;
- }
- if (k_flag) {
- pidstr = getenv(SSH_AGENTPID_ENV_NAME);
- if (pidstr == NULL) {
- fprintf(stderr,
- gettext("%s not set, cannot kill agent\n"),
- SSH_AGENTPID_ENV_NAME);
- exit(1);
- }
- pid = atoi(pidstr);
- if (pid < 1) {
- fprintf(stderr,
- gettext("%s not set, cannot kill agent\n"),
- SSH_AGENTPID_ENV_NAME);
- exit(1);
- }
- if (kill(pid, SIGTERM) == -1) {
- perror("kill");
- exit(1);
- }
- format = c_flag ? "unsetenv %s;\n" : "unset %s;\n";
- printf(format, SSH_AUTHSOCKET_ENV_NAME);
- printf(format, SSH_AGENTPID_ENV_NAME);
- printf("echo ");
- printf(gettext("Agent pid %ld killed;\n"), (long)pid);
- exit(0);
- }
- parent_pid = getpid();
-
- if (agentsocket == NULL) {
- /* Create private directory for agent socket */
- strlcpy(socket_dir, "/tmp/ssh-XXXXXXXXXX", sizeof socket_dir);
- if (mkdtemp(socket_dir) == NULL) {
- perror("mkdtemp: private socket dir");
- exit(1);
- }
- snprintf(socket_name, sizeof socket_name, "%s/agent.%ld", socket_dir,
- (long)parent_pid);
- } else {
- /* Try to use specified agent socket */
- socket_dir[0] = '\0';
- strlcpy(socket_name, agentsocket, sizeof socket_name);
- }
-
- /*
- * Create socket early so it will exist before command gets run from
- * the parent.
- */
- sock = socket(AF_UNIX, SOCK_STREAM, 0);
- if (sock < 0) {
- perror("socket");
- *socket_name = '\0'; /* Don't unlink any existing file */
- cleanup_exit(1);
- }
- memset(&sunaddr, 0, sizeof(sunaddr));
- sunaddr.sun_family = AF_UNIX;
- strlcpy(sunaddr.sun_path, socket_name, sizeof(sunaddr.sun_path));
- prev_mask = umask(0177);
- if (bind(sock, (struct sockaddr *) &sunaddr, sizeof(sunaddr)) < 0) {
- perror("bind");
- *socket_name = '\0'; /* Don't unlink any existing file */
- umask(prev_mask);
- cleanup_exit(1);
- }
- umask(prev_mask);
- if (listen(sock, SSH_LISTEN_BACKLOG) < 0) {
- perror("listen");
- cleanup_exit(1);
- }
-
- /*
- * Fork, and have the parent execute the command, if any, or present
- * the socket data. The child continues as the authentication agent.
- */
- if (d_flag) {
- log_init(__progname, SYSLOG_LEVEL_DEBUG1, SYSLOG_FACILITY_AUTH, 1);
- format = c_flag ? "setenv %s %s;\n" : "%s=%s; export %s;\n";
- printf(format, SSH_AUTHSOCKET_ENV_NAME, socket_name,
- SSH_AUTHSOCKET_ENV_NAME);
- printf("echo ");
- printf(gettext("Agent pid %ld;\n"), (long)parent_pid);
- goto skip;
- }
- pid = fork();
- if (pid == -1) {
- perror("fork");
- cleanup_exit(1);
- }
- if (pid != 0) { /* Parent - execute the given command. */
- close(sock);
- snprintf(pidstrbuf, sizeof pidstrbuf, "%ld", (long)pid);
- if (ac == 0) {
- format = c_flag ? "setenv %s %s;\n" : "%s=%s; export %s;\n";
- printf(format, SSH_AUTHSOCKET_ENV_NAME, socket_name,
- SSH_AUTHSOCKET_ENV_NAME);
- printf(format, SSH_AGENTPID_ENV_NAME, pidstrbuf,
- SSH_AGENTPID_ENV_NAME);
- printf("echo ");
- printf(gettext("Agent pid %ld;\n"), (long)pid);
- exit(0);
- }
- if (setenv(SSH_AUTHSOCKET_ENV_NAME, socket_name, 1) == -1 ||
- setenv(SSH_AGENTPID_ENV_NAME, pidstrbuf, 1) == -1) {
- perror("setenv");
- exit(1);
- }
- execvp(av[0], av);
- perror(av[0]);
- exit(1);
- }
- /* child */
- log_init(__progname, SYSLOG_LEVEL_INFO, SYSLOG_FACILITY_AUTH, 0);
-
-#ifdef HAVE_SOLARIS_PRIVILEGE
- /*
- * Drop unneeded privs, including basic ones like fork/exec.
- *
- * Idiom: remove from 'basic' privs we know we don't want,
- * invert the result and remove the resulting set from P.
- *
- * None of the priv_delset() calls below, nor the setppriv call
- * below can fail, so their return values are not checked.
- */
- if ((myprivs = priv_str_to_set("basic", ",", NULL)) == NULL)
- fatal("priv_str_to_set failed: %m");
- (void) priv_delset(myprivs, PRIV_PROC_EXEC);
- (void) priv_delset(myprivs, PRIV_PROC_FORK);
- (void) priv_delset(myprivs, PRIV_FILE_LINK_ANY);
- (void) priv_delset(myprivs, PRIV_PROC_INFO);
- (void) priv_delset(myprivs, PRIV_PROC_SESSION);
- priv_inverse(myprivs);
- (void) setppriv(PRIV_OFF, PRIV_PERMITTED, myprivs);
- (void) priv_freeset(myprivs);
-#endif /* HAVE_SOLARIS_PRIVILEGE */
-
- if (setsid() == -1) {
- error("setsid: %s", strerror(errno));
- cleanup_exit(1);
- }
-
- (void)chdir("/");
- if ((fd = open(_PATH_DEVNULL, O_RDWR, 0)) != -1) {
- /* XXX might close listen socket */
- (void)dup2(fd, STDIN_FILENO);
- (void)dup2(fd, STDOUT_FILENO);
- (void)dup2(fd, STDERR_FILENO);
- if (fd > 2)
- close(fd);
- }
-
-#ifdef HAVE_SETRLIMIT
- /* deny core dumps, since memory contains unencrypted private keys */
- rlim.rlim_cur = rlim.rlim_max = 0;
- if (setrlimit(RLIMIT_CORE, &rlim) < 0) {
- error("setrlimit RLIMIT_CORE: %s", strerror(errno));
- cleanup_exit(1);
- }
-#endif
-
-skip:
- new_socket(AUTH_SOCKET, sock);
- if (ac > 0)
- parent_alive_interval = 10;
- idtab_init();
- if (!d_flag)
- signal(SIGINT, SIG_IGN);
- signal(SIGPIPE, SIG_IGN);
- signal(SIGHUP, cleanup_handler);
- signal(SIGTERM, cleanup_handler);
- nalloc = 0;
-
- while (1) {
- prepare_select(&readsetp, &writesetp, &max_fd, &nalloc, &tvp);
- result = select(max_fd + 1, readsetp, writesetp, NULL, tvp);
- saved_errno = errno;
- if (parent_alive_interval != 0)
- check_parent_exists();
- (void) reaper(); /* remove expired keys */
- if (result < 0) {
- if (saved_errno == EINTR)
- continue;
- fatal("select: %s", strerror(saved_errno));
- } else if (result > 0)
- after_select(readsetp, writesetp);
- }
- /* NOTREACHED */
- return (0); /* keep lint happy */
-}
diff --git a/usr/src/cmd/ssh/ssh-http-proxy-connect/ssh-http-proxy-connect.c b/usr/src/cmd/ssh/ssh-http-proxy-connect/ssh-http-proxy-connect.c
deleted file mode 100644
index 33182730ae..0000000000
--- a/usr/src/cmd/ssh/ssh-http-proxy-connect/ssh-http-proxy-connect.c
+++ /dev/null
@@ -1,284 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- *
- * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * An http client that let's users 'ssh' to the
- * outside of the firewall by opening up a connection
- * through the http proxy.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <netdb.h>
-#include <strings.h>
-#include <unistd.h>
-#include <inttypes.h>
-#include <errno.h>
-#include <poll.h>
-#include <signal.h>
-#include <locale.h>
-#include <libintl.h>
-#include <netinet/in.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <arpa/inet.h>
-#include <sys/time.h>
-#include <sys/stropts.h>
-#include <sys/stat.h>
-#include <sys/varargs.h>
-#include "proxy-io.h"
-
-#define DEFAULT_HTTPPROXYPORT "80"
-#define CONNECT_STRLEN 256
-
-static int debug_flag = 0;
-
-static void
-usage(void)
-{
- (void) fprintf(stderr, gettext("Usage: ssh-http-proxy-connect "
- "[-h http_proxy_host] [-p http_proxy_port]\n"
- "remote_host remote_port\n"));
- exit(1);
-}
-
-/* PRINTFLIKE1 */
-static void
-debug(const char *format, ...)
-{
- char fmtbuf[BUFFER_SIZ];
- va_list args;
-
- if (debug_flag == 0) {
- return;
- }
- va_start(args, format);
- (void) snprintf(fmtbuf, sizeof (fmtbuf),
- "ssh-http-proxy: %s\n", format);
- (void) vfprintf(stderr, fmtbuf, args);
- va_end(args);
-}
-
-static void
-signal_handler(int sig)
-{
- exit(0);
-}
-
-int
-main(int argc, char **argv)
-{
- extern char *optarg;
- extern int optind;
- int retval, err_code, sock, ssh_port;
- int version, ret_code;
- char *httpproxy = NULL;
- char *temp, *httpproxyport = NULL;
- char *ssh_host;
- char connect_str[CONNECT_STRLEN], connect_reply[BUFFER_SIZ];
- char *ret_string;
- struct addrinfo hints, *ai;
- struct pollfd fds[2];
-
- /* Initialization for variables, set locale and textdomain */
-
- (void) setlocale(LC_ALL, "");
-
-#if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
-#define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */
-#endif
- (void) textdomain(TEXT_DOMAIN);
-
- /* Set up the signal handler */
- (void) signal(SIGINT, signal_handler);
- (void) signal(SIGPIPE, signal_handler);
- (void) signal(SIGPOLL, signal_handler);
-
- while ((retval = getopt(argc, argv, "dp:h:")) != -1) {
- switch (retval) {
- case 'h':
- httpproxy = optarg;
- break;
- case 'p':
- httpproxyport = optarg;
- break;
- case 'd':
- debug_flag = 1;
- break;
- default:
- break;
- }
- }
-
- if (optind != argc - 2) {
- usage();
- }
-
- ssh_host = argv[optind++];
- ssh_port = atoi(argv[optind]);
-
- /*
- * If the name of the http proxy were not
- * passed on the command line, try the
- * user's environment. First try HTTPPROXY.
- * If it's not set, try http_proxy.
- * Check the url specified for http_proxy
- * for errors.
- */
- if (httpproxy == NULL) {
- if ((httpproxy = getenv("HTTPPROXY")) == NULL) {
- /* Try the other environment variable http_proxy */
- if ((temp = getenv("http_proxy")) != NULL) {
- temp += strlen("http://");
- if (strpbrk(temp, ":") == NULL) {
- /* Malformed url */
- (void) fprintf(stderr, gettext("ssh-http-proxy: "
- "Incorrect url specified for http_proxy "
- "environment variable\n"));
- exit(1);
- }
- httpproxy = strtok(temp, ":");
- httpproxyport = strtok(NULL, "/");
- } else {
- (void) fprintf(stderr,
- gettext("ssh-http-proxy: http proxy not specified\n"));
- exit(1);
- }
- }
- }
-
- /*
- * Extract the proxy port number from the user's environment.
- * Ignored if HTTPPROXY is not set.
- */
- if ((httpproxy != NULL) && (httpproxyport == NULL)) {
- if ((httpproxyport = getenv("HTTPPROXYPORT")) == NULL) {
- httpproxyport = DEFAULT_HTTPPROXYPORT;
- }
- }
-
- debug("HTTPPROXY = %s", httpproxy);
- debug("HTTPPROXYPORT = %s", httpproxyport);
-
- bzero(&hints, sizeof (struct addrinfo));
- hints.ai_family = PF_UNSPEC;
- hints.ai_socktype = SOCK_STREAM;
-
- if ((err_code = getaddrinfo(httpproxy, httpproxyport, &hints, &ai))
- != 0) {
- (void) fprintf(stderr, "ssh-http-proxy: Unable to "
- "perform name lookup\n");
- (void) fprintf(stderr, "%s: %s\n", httpproxy,
- gai_strerror(err_code));
- exit(1);
- }
-
- if ((sock = socket(ai->ai_family, SOCK_STREAM, 0)) < 0) {
- perror("socket");
- exit(1);
- }
-
- /* Connect to the http proxy */
- if (connect(sock, ai->ai_addr, ai->ai_addrlen) == -1) {
- (void) fprintf(stderr, gettext("ssh-http-proxy: Unable to connect"
- " to %s: %s\n"), httpproxy, strerror(errno));
- (void) close(sock);
- exit(1);
- } else {
- /* Successful connection. */
- (void) snprintf(connect_str, sizeof (connect_str),
- "CONNECT %s:%d HTTP/1.1\r\n\r\n", ssh_host, ssh_port);
- if (write(sock, &connect_str, strlen(connect_str)) < 0) {
- perror("write");
- (void) close(sock);
- exit(1);
- }
-
- if (read(sock, connect_reply, sizeof (connect_reply)) == -1) {
- perror("read");
- (void) close(sock);
- exit(1);
- }
-
- if (sscanf(connect_reply, "HTTP/1.%d %d",
- &version, &ret_code) != 2) {
- (void) fprintf(stderr,
- gettext("ssh-http-proxy: HTTP reply not understood\n"));
- (void) close(sock);
- exit(1);
- }
-
- ret_string = strtok(connect_reply, "\n");
-
- /* If the return error code is not 200, print an error and quit. */
- if (ret_code != 200) {
- (void) fprintf(stderr, "%s\n", ret_string);
- (void) close(sock);
- exit(1);
- } else {
- debug("%s", ret_string);
- }
- }
-
- fds[0].fd = STDIN_FILENO; /* Poll stdin for data. */
- fds[1].fd = sock; /* Poll the socket for data. */
- fds[0].events = fds[1].events = POLLIN;
-
- for (;;) {
- if (poll(fds, 2, INFTIM) == -1) {
- perror("poll");
- (void) close(sock);
- exit(1);
- }
-
- /* Data arrived on stdin, write it to the socket */
- if (fds[0].revents & POLLIN) {
- if (proxy_read_write_loop(STDIN_FILENO, sock) == 0) {
- (void) close(sock);
- exit(1);
- }
- } else if (fds[0].revents & (POLLERR | POLLHUP | POLLNVAL)) {
- (void) close(sock);
- exit(1);
- }
-
- /* Data arrived on the socket, write it to stdout */
- if (fds[1].revents & POLLIN) {
- if (proxy_read_write_loop(sock, STDOUT_FILENO) == 0) {
- (void) close(sock);
- exit(1);
- }
- } else if (fds[1].revents & (POLLERR | POLLHUP | POLLNVAL)) {
- (void) close(sock);
- exit(1);
- }
- }
-
- /* NOTREACHED */
- return (0);
-}
diff --git a/usr/src/cmd/ssh/ssh-keygen/ssh-keygen.c b/usr/src/cmd/ssh/ssh-keygen/ssh-keygen.c
deleted file mode 100644
index ebad79b0f8..0000000000
--- a/usr/src/cmd/ssh/ssh-keygen/ssh-keygen.c
+++ /dev/null
@@ -1,1208 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1994 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Identity and host key generation and maintenance.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-/* $OpenBSD: ssh-keygen.c,v 1.160 2007/01/21 01:41:54 stevesk Exp $ */
-
-#include "includes.h"
-#include <openssl/evp.h>
-#include <openssl/pem.h>
-
-#include "xmalloc.h"
-#include "key.h"
-#include "rsa.h"
-#include "authfile.h"
-#include "uuencode.h"
-#include "buffer.h"
-#include "bufaux.h"
-#include "pathnames.h"
-#include "log.h"
-#include "readpass.h"
-#include "misc.h"
-#include <langinfo.h>
-#include "match.h"
-#include "hostfile.h"
-#include "tildexpand.h"
-
-/* Number of bits in the RSA/DSA key. This value can be set on the command line. */
-#define DEFAULT_BITS_RSA 2048
-#define DEFAULT_BITS_DSA 1024
-u_int32_t bits = 0;
-
-/*
- * Flag indicating that we just want to change the passphrase. This can be
- * set on the command line.
- */
-int change_passphrase = 0;
-
-/*
- * Flag indicating that we just want to change the comment. This can be set
- * on the command line.
- */
-int change_comment = 0;
-
-int quiet = 0;
-
-/* Flag indicating that we want to hash a known_hosts file */
-int hash_hosts = 0;
-/* Flag indicating that we want to lookup a host in known_hosts file */
-int find_host = 0;
-/* Flag indicating that we want to delete a host from a known_hosts file */
-int delete_host = 0;
-
-/* Flag indicating that we just want to see the key fingerprint */
-int print_fingerprint = 0;
-int print_bubblebabble = 0;
-
-/* The identity file name, given on the command line or entered by the user. */
-char identity_file[1024];
-int have_identity = 0;
-
-/* This is set to the passphrase if given on the command line. */
-char *identity_passphrase = NULL;
-
-/* This is set to the new passphrase if given on the command line. */
-char *identity_new_passphrase = NULL;
-
-/* This is set to the new comment if given on the command line. */
-char *identity_comment = NULL;
-
-/* Dump public key file in format used by real and the original SSH 2 */
-int convert_to_ssh2 = 0;
-int convert_from_ssh2 = 0;
-int print_public = 0;
-
-char *key_type_name = NULL;
-
-/* argv0 */
-#ifdef HAVE___PROGNAME
-extern char *__progname;
-#else
-char *__progname;
-#endif
-
-char hostname[MAXHOSTNAMELEN];
-
-static void
-ask_filename(struct passwd *pw, const char *prompt)
-{
- char buf[1024];
- char *name = NULL;
-
- if (key_type_name == NULL)
- name = _PATH_SSH_CLIENT_ID_RSA;
- else {
- switch (key_type_from_name(key_type_name)) {
- case KEY_RSA1:
- name = _PATH_SSH_CLIENT_IDENTITY;
- break;
- case KEY_DSA:
- name = _PATH_SSH_CLIENT_ID_DSA;
- break;
- case KEY_RSA:
- name = _PATH_SSH_CLIENT_ID_RSA;
- break;
- default:
- fprintf(stderr, gettext("bad key type"));
- exit(1);
- break;
- }
- }
- snprintf(identity_file, sizeof(identity_file), "%s/%s", pw->pw_dir, name);
- fprintf(stderr, "%s (%s): ", gettext(prompt), identity_file);
- if (fgets(buf, sizeof(buf), stdin) == NULL)
- exit(1);
- if (strchr(buf, '\n'))
- *strchr(buf, '\n') = 0;
- if (strcmp(buf, "") != 0)
- strlcpy(identity_file, buf, sizeof(identity_file));
- have_identity = 1;
-}
-
-static Key *
-load_identity(char *filename)
-{
- char *pass;
- Key *prv;
-
- prv = key_load_private(filename, "", NULL);
- if (prv == NULL) {
- if (identity_passphrase)
- pass = xstrdup(identity_passphrase);
- else
- pass = read_passphrase(gettext("Enter passphrase: "),
- RP_ALLOW_STDIN);
- prv = key_load_private(filename, pass, NULL);
- memset(pass, 0, strlen(pass));
- xfree(pass);
- }
- return prv;
-}
-
-#define SSH_COM_PUBLIC_BEGIN "---- BEGIN SSH2 PUBLIC KEY ----"
-#define SSH_COM_PUBLIC_END "---- END SSH2 PUBLIC KEY ----"
-#define SSH_COM_PRIVATE_BEGIN "---- BEGIN SSH2 ENCRYPTED PRIVATE KEY ----"
-#define SSH_COM_PRIVATE_KEY_MAGIC 0x3f6ff9eb
-
-static void
-do_convert_to_ssh2(struct passwd *pw)
-{
- Key *k;
- u_int len;
- u_char *blob;
- struct stat st;
-
- if (!have_identity)
- ask_filename(pw, gettext("Enter file in which the key is"));
- if (stat(identity_file, &st) < 0) {
- perror(identity_file);
- exit(1);
- }
- if ((k = key_load_public(identity_file, NULL)) == NULL) {
- if ((k = load_identity(identity_file)) == NULL) {
- fprintf(stderr, gettext("load failed\n"));
- exit(1);
- }
- }
- if (key_to_blob(k, &blob, &len) <= 0) {
- fprintf(stderr, gettext("key_to_blob failed\n"));
- exit(1);
- }
- fprintf(stdout, "%s\n", SSH_COM_PUBLIC_BEGIN);
- fprintf(stdout, gettext(
- "Comment: \"%u-bit %s, converted from OpenSSH by %s@%s\"\n"),
- key_size(k), key_type(k),
- pw->pw_name, hostname);
- dump_base64(stdout, blob, len);
- fprintf(stdout, "%s\n", SSH_COM_PUBLIC_END);
- key_free(k);
- xfree(blob);
- exit(0);
-}
-
-static void
-buffer_get_bignum_bits(Buffer *b, BIGNUM *value)
-{
- u_int bignum_bits = buffer_get_int(b);
- u_int bytes = (bignum_bits + 7) / 8;
-
- if (buffer_len(b) < bytes)
- fatal("buffer_get_bignum_bits: input buffer too small: "
- "need %d have %d", bytes, buffer_len(b));
- if (BN_bin2bn(buffer_ptr(b), bytes, value) == NULL)
- fatal("buffer_get_bignum_bits: BN_bin2bn failed");
- buffer_consume(b, bytes);
-}
-
-static Key *
-do_convert_private_ssh2_from_blob(u_char *blob, u_int blen)
-{
- Buffer b;
- Key *key = NULL;
- char *type, *cipher;
- u_char *sig, data[] = "abcde12345";
- int magic, rlen, ktype, i1, i2, i3, i4;
- u_int slen;
- u_long e;
-
- buffer_init(&b);
- buffer_append(&b, blob, blen);
-
- magic = buffer_get_int(&b);
- if (magic != SSH_COM_PRIVATE_KEY_MAGIC) {
- error("bad magic 0x%x != 0x%x", magic, SSH_COM_PRIVATE_KEY_MAGIC);
- buffer_free(&b);
- return NULL;
- }
- i1 = buffer_get_int(&b);
- type = buffer_get_string(&b, NULL);
- cipher = buffer_get_string(&b, NULL);
- i2 = buffer_get_int(&b);
- i3 = buffer_get_int(&b);
- i4 = buffer_get_int(&b);
- debug("ignore (%d %d %d %d)", i1, i2, i3, i4);
- if (strcmp(cipher, "none") != 0) {
- error("unsupported cipher %s", cipher);
- xfree(cipher);
- buffer_free(&b);
- xfree(type);
- return NULL;
- }
- xfree(cipher);
-
- if (strstr(type, "dsa")) {
- ktype = KEY_DSA;
- } else if (strstr(type, "rsa")) {
- ktype = KEY_RSA;
- } else {
- buffer_free(&b);
- xfree(type);
- return NULL;
- }
- key = key_new_private(ktype);
- xfree(type);
-
- switch (key->type) {
- case KEY_DSA:
- buffer_get_bignum_bits(&b, key->dsa->p);
- buffer_get_bignum_bits(&b, key->dsa->g);
- buffer_get_bignum_bits(&b, key->dsa->q);
- buffer_get_bignum_bits(&b, key->dsa->pub_key);
- buffer_get_bignum_bits(&b, key->dsa->priv_key);
- break;
- case KEY_RSA:
- e = buffer_get_char(&b);
- debug("e %lx", e);
- if (e < 30) {
- e <<= 8;
- e += buffer_get_char(&b);
- debug("e %lx", e);
- e <<= 8;
- e += buffer_get_char(&b);
- debug("e %lx", e);
- }
- if (!BN_set_word(key->rsa->e, e)) {
- buffer_free(&b);
- key_free(key);
- return NULL;
- }
- buffer_get_bignum_bits(&b, key->rsa->d);
- buffer_get_bignum_bits(&b, key->rsa->n);
- buffer_get_bignum_bits(&b, key->rsa->iqmp);
- buffer_get_bignum_bits(&b, key->rsa->q);
- buffer_get_bignum_bits(&b, key->rsa->p);
- rsa_generate_additional_parameters(key->rsa);
- break;
- }
- rlen = buffer_len(&b);
- if (rlen != 0)
- error("do_convert_private_ssh2_from_blob: "
- "remaining bytes in key blob %d", rlen);
- buffer_free(&b);
-
- /* try the key */
- key_sign(key, &sig, &slen, data, sizeof(data));
- key_verify(key, sig, slen, data, sizeof(data));
- xfree(sig);
- return key;
-}
-
-static int
-get_line(FILE *fp, char *line, size_t len)
-{
- int c;
- size_t pos = 0;
-
- line[0] = '\0';
- while ((c = fgetc(fp)) != EOF) {
- if (pos >= len - 1) {
- fprintf(stderr, "input line too long.\n");
- exit(1);
- }
- switch (c) {
- case '\r':
- c = fgetc(fp);
- if (c != EOF && c != '\n' && ungetc(c, fp) == EOF) {
- fprintf(stderr, "unget: %s\n", strerror(errno));
- exit(1);
- }
- return pos;
- case '\n':
- return pos;
- }
- line[pos++] = c;
- line[pos] = '\0';
- }
- /* We reached EOF */
- return -1;
-}
-
-static void
-do_convert_from_ssh2(struct passwd *pw)
-{
- Key *k;
- int blen;
- u_int len;
- char line[1024];
- u_char blob[8096];
- char encoded[8096];
- struct stat st;
- int escaped = 0, private = 0, ok;
- FILE *fp;
-
- if (!have_identity)
- ask_filename(pw, gettext("Enter file in which the key is"));
- if (stat(identity_file, &st) < 0) {
- perror(identity_file);
- exit(1);
- }
- fp = fopen(identity_file, "r");
- if (fp == NULL) {
- perror(identity_file);
- exit(1);
- }
- encoded[0] = '\0';
- while ((blen = get_line(fp, line, sizeof(line))) != -1) {
- if (line[blen - 1] == '\\')
- escaped++;
- if (strncmp(line, "----", 4) == 0 ||
- strstr(line, ": ") != NULL) {
- if (strstr(line, SSH_COM_PRIVATE_BEGIN) != NULL)
- private = 1;
- if (strstr(line, " END ") != NULL) {
- break;
- }
- /* fprintf(stderr, "ignore: %s", line); */
- continue;
- }
- if (escaped) {
- escaped--;
- /* fprintf(stderr, "escaped: %s", line); */
- continue;
- }
- strlcat(encoded, line, sizeof(encoded));
- }
- len = strlen(encoded);
- if (((len % 4) == 3) &&
- (encoded[len-1] == '=') &&
- (encoded[len-2] == '=') &&
- (encoded[len-3] == '='))
- encoded[len-3] = '\0';
- blen = uudecode(encoded, blob, sizeof(blob));
- if (blen < 0) {
- fprintf(stderr, gettext("uudecode failed.\n"));
- exit(1);
- }
- k = private ?
- do_convert_private_ssh2_from_blob(blob, blen) :
- key_from_blob(blob, blen);
- if (k == NULL) {
- fprintf(stderr, gettext("decode blob failed.\n"));
- exit(1);
- }
- ok = private ?
- (k->type == KEY_DSA ?
- PEM_write_DSAPrivateKey(stdout, k->dsa, NULL, NULL, 0, NULL, NULL) :
- PEM_write_RSAPrivateKey(stdout, k->rsa, NULL, NULL, 0, NULL, NULL)) :
- key_write(k, stdout);
- if (!ok) {
- fprintf(stderr, gettext("key write failed"));
- exit(1);
- }
- key_free(k);
- if (!private)
- fprintf(stdout, "\n");
- fclose(fp);
- exit(0);
-}
-
-static void
-do_print_public(struct passwd *pw)
-{
- Key *prv;
- struct stat st;
-
- if (!have_identity)
- ask_filename(pw, gettext("Enter file in which the key is"));
- if (stat(identity_file, &st) < 0) {
- perror(identity_file);
- exit(1);
- }
- prv = load_identity(identity_file);
- if (prv == NULL) {
- fprintf(stderr, gettext("load failed\n"));
- exit(1);
- }
- if (!key_write(prv, stdout))
- fprintf(stderr, gettext("key_write failed"));
- key_free(prv);
- fprintf(stdout, "\n");
- exit(0);
-}
-
-static void
-do_fingerprint(struct passwd *pw)
-{
- FILE *f;
- Key *public;
- char *comment = NULL, *cp, *ep, line[16*1024], *fp;
- int i, skip = 0, num = 1, invalid = 1;
- enum fp_rep rep;
- enum fp_type fptype;
- struct stat st;
-
- fptype = print_bubblebabble ? SSH_FP_SHA1 : SSH_FP_MD5;
- rep = print_bubblebabble ? SSH_FP_BUBBLEBABBLE : SSH_FP_HEX;
-
- if (!have_identity)
- ask_filename(pw, gettext("Enter file in which the key is"));
- if (stat(identity_file, &st) < 0) {
- perror(identity_file);
- exit(1);
- }
- public = key_load_public(identity_file, &comment);
- if (public != NULL) {
- fp = key_fingerprint(public, fptype, rep);
- printf("%u %s %s\n", key_size(public), fp, comment);
- key_free(public);
- xfree(comment);
- xfree(fp);
- exit(0);
- }
- if (comment) {
- xfree(comment);
- comment = NULL;
- }
-
- f = fopen(identity_file, "r");
- if (f != NULL) {
- while (fgets(line, sizeof(line), f)) {
- i = strlen(line) - 1;
- if (line[i] != '\n') {
- error("line %d too long: %.40s...", num, line);
- skip = 1;
- continue;
- }
- num++;
- if (skip) {
- skip = 0;
- continue;
- }
- line[i] = '\0';
-
- /* Skip leading whitespace, empty and comment lines. */
- for (cp = line; *cp == ' ' || *cp == '\t'; cp++)
- ;
- if (!*cp || *cp == '\n' || *cp == '#')
- continue;
- i = strtol(cp, &ep, 10);
- if (i == 0 || ep == NULL || (*ep != ' ' && *ep != '\t')) {
- int quoted = 0;
- comment = cp;
- for (; *cp && (quoted || (*cp != ' ' &&
- *cp != '\t')); cp++) {
- if (*cp == '\\' && cp[1] == '"')
- cp++; /* Skip both */
- else if (*cp == '"')
- quoted = !quoted;
- }
- if (!*cp)
- continue;
- *cp++ = '\0';
- }
- ep = cp;
- public = key_new(KEY_RSA1);
- if (key_read(public, &cp) != 1) {
- cp = ep;
- key_free(public);
- public = key_new(KEY_UNSPEC);
- if (key_read(public, &cp) != 1) {
- key_free(public);
- continue;
- }
- }
- comment = *cp ? cp : comment;
- fp = key_fingerprint(public, fptype, rep);
- printf("%u %s %s\n", key_size(public), fp,
- comment ? comment : gettext("no comment"));
- xfree(fp);
- key_free(public);
- invalid = 0;
- }
- fclose(f);
- }
- if (invalid) {
- printf(gettext("%s is not a public key file.\n"),
- identity_file);
- exit(1);
- }
- exit(0);
-}
-
-static void
-print_host(FILE *f, const char *name, Key *public, int hash)
-{
- if (hash && (name = host_hash(name, NULL, 0)) == NULL)
- fatal("hash_host failed");
- fprintf(f, "%s ", name);
- if (!key_write(public, f))
- fatal("key_write failed");
- fprintf(f, "\n");
-}
-
-static void
-do_known_hosts(struct passwd *pw, const char *name)
-{
- FILE *in, *out = stdout;
- Key *public;
- char *cp, *cp2, *kp, *kp2;
- char line[16*1024], tmp[MAXPATHLEN], old[MAXPATHLEN];
- int c, i, skip = 0, inplace = 0, num = 0, invalid = 0, has_unhashed = 0;
-
- if (!have_identity) {
- cp = tilde_expand_filename(_PATH_SSH_USER_HOSTFILE, pw->pw_uid);
- if (strlcpy(identity_file, cp, sizeof(identity_file)) >=
- sizeof(identity_file))
- fatal("Specified known hosts path too long");
- xfree(cp);
- have_identity = 1;
- }
- if ((in = fopen(identity_file, "r")) == NULL)
- fatal("fopen: %s", strerror(errno));
-
- /*
- * Find hosts goes to stdout, hash and deletions happen in-place
- * A corner case is ssh-keygen -HF foo, which should go to stdout
- */
- if (!find_host && (hash_hosts || delete_host)) {
- if (strlcpy(tmp, identity_file, sizeof(tmp)) >= sizeof(tmp) ||
- strlcat(tmp, ".XXXXXXXXXX", sizeof(tmp)) >= sizeof(tmp) ||
- strlcpy(old, identity_file, sizeof(old)) >= sizeof(old) ||
- strlcat(old, ".old", sizeof(old)) >= sizeof(old))
- fatal("known_hosts path too long");
- umask(077);
- if ((c = mkstemp(tmp)) == -1)
- fatal("mkstemp: %s", strerror(errno));
- if ((out = fdopen(c, "w")) == NULL) {
- c = errno;
- unlink(tmp);
- fatal("fdopen: %s", strerror(c));
- }
- inplace = 1;
- }
-
- while (fgets(line, sizeof(line), in)) {
- num++;
- i = strlen(line) - 1;
- if (line[i] != '\n') {
- error("line %d too long: %.40s...", num, line);
- skip = 1;
- invalid = 1;
- continue;
- }
- if (skip) {
- skip = 0;
- continue;
- }
- line[i] = '\0';
-
- /* Skip leading whitespace, empty and comment lines. */
- for (cp = line; *cp == ' ' || *cp == '\t'; cp++)
- ;
- if (!*cp || *cp == '\n' || *cp == '#') {
- if (inplace)
- fprintf(out, "%s\n", cp);
- continue;
- }
- /* Find the end of the host name portion. */
- for (kp = cp; *kp && *kp != ' ' && *kp != '\t'; kp++)
- ;
- if (*kp == '\0' || *(kp + 1) == '\0') {
- error("line %d missing key: %.40s...",
- num, line);
- invalid = 1;
- continue;
- }
- *kp++ = '\0';
- kp2 = kp;
-
- public = key_new(KEY_RSA1);
- if (key_read(public, &kp) != 1) {
- kp = kp2;
- key_free(public);
- public = key_new(KEY_UNSPEC);
- if (key_read(public, &kp) != 1) {
- error("line %d invalid key: %.40s...",
- num, line);
- key_free(public);
- invalid = 1;
- continue;
- }
- }
-
- if (*cp == HASH_DELIM) {
- if (find_host || delete_host) {
- cp2 = host_hash(name, cp, strlen(cp));
- if (cp2 == NULL) {
- error("line %d: invalid hashed "
- "name: %.64s...", num, line);
- invalid = 1;
- continue;
- }
- c = (strcmp(cp2, cp) == 0);
- if (find_host && c) {
- printf(gettext("# Host %s found: "
- "line %d type %s\n"), name,
- num, key_type(public));
- print_host(out, cp, public, 0);
- }
- if (delete_host && !c)
- print_host(out, cp, public, 0);
- } else if (hash_hosts)
- print_host(out, cp, public, 0);
- } else {
- if (find_host || delete_host) {
- c = (match_hostname(name, cp,
- strlen(cp)) == 1);
- if (find_host && c) {
- printf(gettext("# Host %s found: "
- "line %d type %s\n"), name,
- num, key_type(public));
- print_host(out, name, public, hash_hosts);
- }
- if (delete_host && !c)
- print_host(out, cp, public, 0);
- } else if (hash_hosts) {
- for (cp2 = strsep(&cp, ",");
- cp2 != NULL && *cp2 != '\0';
- cp2 = strsep(&cp, ",")) {
- if (strcspn(cp2, "*?!") != strlen(cp2))
- fprintf(stderr, gettext("Warning: "
- "ignoring host name with "
- "metacharacters: %.64s\n"),
- cp2);
- else
- print_host(out, cp2, public, 1);
- }
- has_unhashed = 1;
- }
- }
- key_free(public);
- }
- fclose(in);
-
- if (invalid) {
- fprintf(stderr, gettext("%s is not a valid known_host file.\n"),
- identity_file);
- if (inplace) {
- fprintf(stderr, gettext("Not replacing existing known_hosts "
- "file because of errors\n"));
- fclose(out);
- unlink(tmp);
- }
- exit(1);
- }
-
- if (inplace) {
- fclose(out);
-
- /* Backup existing file */
- if (unlink(old) == -1 && errno != ENOENT)
- fatal("unlink %.100s: %s", old, strerror(errno));
- if (link(identity_file, old) == -1)
- fatal("link %.100s to %.100s: %s", identity_file, old,
- strerror(errno));
- /* Move new one into place */
- if (rename(tmp, identity_file) == -1) {
- error("rename\"%s\" to \"%s\": %s", tmp, identity_file,
- strerror(errno));
- unlink(tmp);
- unlink(old);
- exit(1);
- }
-
- fprintf(stderr, gettext("%s updated.\n"), identity_file);
- fprintf(stderr, gettext("Original contents retained as %s\n"), old);
- if (has_unhashed) {
- fprintf(stderr, gettext("WARNING: %s contains unhashed "
- "entries\n"), old);
- fprintf(stderr, gettext("Delete this file to ensure privacy "
- "of hostnames\n"));
- }
- }
-
- exit(0);
-}
-
-/*
- * Perform changing a passphrase. The argument is the passwd structure
- * for the current user.
- */
-static void
-do_change_passphrase(struct passwd *pw)
-{
- char *comment;
- char *old_passphrase, *passphrase1, *passphrase2;
- struct stat st;
- Key *private;
-
- if (!have_identity)
- ask_filename(pw, gettext("Enter file in which the key is"));
- if (stat(identity_file, &st) < 0) {
- perror(identity_file);
- exit(1);
- }
- /* Try to load the file with empty passphrase. */
- private = key_load_private(identity_file, "", &comment);
- if (private == NULL) {
- if (identity_passphrase)
- old_passphrase = xstrdup(identity_passphrase);
- else
- old_passphrase =
- read_passphrase(gettext("Enter old passphrase: "),
- RP_ALLOW_STDIN);
- private = key_load_private(identity_file, old_passphrase,
- &comment);
- memset(old_passphrase, 0, strlen(old_passphrase));
- xfree(old_passphrase);
- if (private == NULL) {
- printf(gettext("Bad passphrase.\n"));
- exit(1);
- }
- }
- printf(gettext("Key has comment '%s'\n"), comment);
-
- /* Ask the new passphrase (twice). */
- if (identity_new_passphrase) {
- passphrase1 = xstrdup(identity_new_passphrase);
- passphrase2 = NULL;
- } else {
- passphrase1 =
- read_passphrase(gettext("Enter new passphrase (empty"
- " for no passphrase): "), RP_ALLOW_STDIN);
- passphrase2 = read_passphrase(gettext("Enter same "
- "passphrase again: "), RP_ALLOW_STDIN);
-
- /* Verify that they are the same. */
- if (strcmp(passphrase1, passphrase2) != 0) {
- memset(passphrase1, 0, strlen(passphrase1));
- memset(passphrase2, 0, strlen(passphrase2));
- xfree(passphrase1);
- xfree(passphrase2);
- printf(gettext("Pass phrases do not match. Try "
- "again.\n"));
- exit(1);
- }
- /* Destroy the other copy. */
- memset(passphrase2, 0, strlen(passphrase2));
- xfree(passphrase2);
- }
-
- /* Save the file using the new passphrase. */
- if (!key_save_private(private, identity_file, passphrase1, comment)) {
- printf(gettext("Saving the key failed: %s.\n"), identity_file);
- memset(passphrase1, 0, strlen(passphrase1));
- xfree(passphrase1);
- key_free(private);
- xfree(comment);
- exit(1);
- }
- /* Destroy the passphrase and the copy of the key in memory. */
- memset(passphrase1, 0, strlen(passphrase1));
- xfree(passphrase1);
- key_free(private); /* Destroys contents */
- xfree(comment);
-
- printf(gettext("Your identification has been saved with the new "
- "passphrase.\n"));
- exit(0);
-}
-
-/*
- * Change the comment of a private key file.
- */
-static void
-do_change_comment(struct passwd *pw)
-{
- char new_comment[1024], *comment, *passphrase;
- Key *private;
- Key *public;
- struct stat st;
- FILE *f;
- int fd;
-
- if (!have_identity)
- ask_filename(pw, gettext("Enter file in which the key is"));
- if (stat(identity_file, &st) < 0) {
- perror(identity_file);
- exit(1);
- }
- private = key_load_private(identity_file, "", &comment);
- if (private == NULL) {
- if (identity_passphrase)
- passphrase = xstrdup(identity_passphrase);
- else if (identity_new_passphrase)
- passphrase = xstrdup(identity_new_passphrase);
- else
- passphrase =
- read_passphrase(gettext("Enter passphrase: "),
- RP_ALLOW_STDIN);
- /* Try to load using the passphrase. */
- private = key_load_private(identity_file, passphrase, &comment);
- if (private == NULL) {
- memset(passphrase, 0, strlen(passphrase));
- xfree(passphrase);
- printf(gettext("Bad passphrase.\n"));
- exit(1);
- }
- } else {
- passphrase = xstrdup("");
- }
- if (private->type != KEY_RSA1) {
- fprintf(stderr, gettext("Comments are only supported for "
- "RSA1 keys.\n"));
- key_free(private);
- exit(1);
- }
- printf(gettext("Key now has comment '%s'\n"), comment);
-
- if (identity_comment) {
- strlcpy(new_comment, identity_comment, sizeof(new_comment));
- } else {
- printf(gettext("Enter new comment: "));
- fflush(stdout);
- if (!fgets(new_comment, sizeof(new_comment), stdin)) {
- memset(passphrase, 0, strlen(passphrase));
- key_free(private);
- exit(1);
- }
- if (strchr(new_comment, '\n'))
- *strchr(new_comment, '\n') = 0;
- }
-
- /* Save the file using the new passphrase. */
- if (!key_save_private(private, identity_file, passphrase, new_comment)) {
- printf(gettext("Saving the key failed: %s.\n"), identity_file);
- memset(passphrase, 0, strlen(passphrase));
- xfree(passphrase);
- key_free(private);
- xfree(comment);
- exit(1);
- }
- memset(passphrase, 0, strlen(passphrase));
- xfree(passphrase);
- public = key_from_private(private);
- key_free(private);
-
- strlcat(identity_file, ".pub", sizeof(identity_file));
- fd = open(identity_file, O_WRONLY | O_CREAT | O_TRUNC, 0644);
- if (fd == -1) {
- printf(gettext("Could not save your public key in %s\n"),
- identity_file);
- exit(1);
- }
- f = fdopen(fd, "w");
- if (f == NULL) {
- printf(gettext("fdopen %s failed"), identity_file);
- exit(1);
- }
- if (!key_write(public, f))
- fprintf(stderr, gettext("write key failed"));
- key_free(public);
- fprintf(f, " %s\n", new_comment);
- fclose(f);
-
- xfree(comment);
-
- printf(gettext("The comment in your key file has been changed.\n"));
- exit(0);
-}
-
-static void
-usage(void)
-{
- fprintf(stderr, gettext(
- "Usage: %s [options]\n"
- "Options:\n"
- " -b bits Number of bits in the key to create.\n"
- " -B Show bubblebabble digest of key file.\n"
- " -c Change comment in private and public key files.\n"
- " -C comment Provide new comment.\n"
- " -e Convert OpenSSH to IETF SECSH key file.\n"
- " -f filename Filename of the key file.\n"
- " -F hostname Find hostname in known hosts file.\n"
- " -H Hash names in known_hosts file.\n"
- " -i Convert IETF SECSH to OpenSSH key file.\n"
- " -l Show fingerprint of key file.\n"
- " -N phrase Provide new passphrase.\n"
- " -p Change passphrase of private key file.\n"
- " -P phrase Provide old passphrase.\n"
- " -q Quiet.\n"
- " -R hostname Remove host from known_hosts file.\n"
- " -t type Specify type of key to create.\n"
- " -y Read private key file and print public key.\n"
- ), __progname);
-
- exit(1);
-}
-
-/*
- * Main program for key management.
- */
-int
-main(int argc, char **argv)
-{
- char dotsshdir[MAXPATHLEN], comment[1024], *passphrase1, *passphrase2;
- char *rr_hostname = NULL;
- Key *private, *public;
- struct passwd *pw;
- struct stat st;
- int opt, type, fd;
- FILE *f;
-
- extern int optind;
- extern char *optarg;
-
- /* Ensure that fds 0, 1 and 2 are open or directed to /dev/null */
- sanitise_stdfd();
-
- __progname = get_progname(argv[0]);
-
- g11n_setlocale(LC_ALL, "");
-
- SSLeay_add_all_algorithms();
- init_rng();
- seed_rng();
-
- /* we need this for the home * directory. */
- pw = getpwuid(getuid());
- if (!pw) {
- printf(gettext("You don't exist, go away!\n"));
- exit(1);
- }
- if (gethostname(hostname, sizeof(hostname)) < 0) {
- perror("gethostname");
- exit(1);
- }
-
-#define GETOPT_ARGS "BcdeHilpqxXyb:C:f:F:N:P:R:t:"
-
- while ((opt = getopt(argc, argv, GETOPT_ARGS)) != -1) {
- switch (opt) {
- case 'b':
- bits = atoi(optarg);
- if (bits < 512 || bits > 32768) {
- printf(gettext("Bits has bad value.\n"));
- exit(1);
- }
- break;
- case 'F':
- find_host = 1;
- rr_hostname = optarg;
- break;
- case 'H':
- hash_hosts = 1;
- break;
- case 'R':
- delete_host = 1;
- rr_hostname = optarg;
- break;
- case 'l':
- print_fingerprint = 1;
- break;
- case 'B':
- print_bubblebabble = 1;
- break;
- case 'p':
- change_passphrase = 1;
- break;
- case 'c':
- change_comment = 1;
- break;
- case 'f':
- strlcpy(identity_file, optarg, sizeof(identity_file));
- have_identity = 1;
- break;
- case 'P':
- identity_passphrase = optarg;
- break;
- case 'N':
- identity_new_passphrase = optarg;
- break;
- case 'C':
- identity_comment = optarg;
- break;
- case 'q':
- quiet = 1;
- break;
- case 'e':
- case 'x':
- /* export key */
- convert_to_ssh2 = 1;
- break;
- case 'i':
- case 'X':
- /* import key */
- convert_from_ssh2 = 1;
- break;
- case 'y':
- print_public = 1;
- break;
- case 'd':
- key_type_name = "dsa";
- break;
- case 't':
- key_type_name = optarg;
- break;
- case '?':
- default:
- usage();
- }
- }
- if (optind < argc) {
- printf(gettext("Too many arguments.\n"));
- usage();
- }
- if (change_passphrase && change_comment) {
- printf(gettext("Can only have one of -p and -c.\n"));
- usage();
- }
- if (delete_host || hash_hosts || find_host)
- do_known_hosts(pw, rr_hostname);
- if (print_fingerprint || print_bubblebabble)
- do_fingerprint(pw);
- if (change_passphrase)
- do_change_passphrase(pw);
- if (change_comment)
- do_change_comment(pw);
- if (convert_to_ssh2)
- do_convert_to_ssh2(pw);
- if (convert_from_ssh2)
- do_convert_from_ssh2(pw);
- if (print_public)
- do_print_public(pw);
-
- arc4random_stir();
-
- if (key_type_name == NULL) {
- printf(gettext("You must specify a key type (-t).\n"));
- usage();
- }
- type = key_type_from_name(key_type_name);
- if (type == KEY_UNSPEC) {
- fprintf(stderr, gettext("unknown key type %s\n"),
- key_type_name);
- exit(1);
- }
- if (bits == 0)
- bits = (type == KEY_DSA) ? DEFAULT_BITS_DSA : DEFAULT_BITS_RSA;
-
- if (!quiet)
- printf(gettext("Generating public/private %s key pair.\n"),
- key_type_name);
- private = key_generate(type, bits);
- if (private == NULL) {
- fprintf(stderr, gettext("key_generate failed"));
- exit(1);
- }
- public = key_from_private(private);
-
- if (!have_identity)
- ask_filename(pw, gettext("Enter file in which to save the key"));
-
- /* Create ~/.ssh directory if it doesn't already exist. */
- snprintf(dotsshdir, sizeof dotsshdir, "%s/%s", pw->pw_dir, _PATH_SSH_USER_DIR);
- if (strstr(identity_file, dotsshdir) != NULL &&
- stat(dotsshdir, &st) < 0) {
- if (mkdir(dotsshdir, 0700) < 0)
- error("Could not create directory '%s'.", dotsshdir);
- else if (!quiet)
- printf(gettext("Created directory '%s'.\n"), dotsshdir);
- }
- /* If the file already exists, ask the user to confirm. */
- if (stat(identity_file, &st) >= 0) {
- char yesno[128];
- printf(gettext("%s already exists.\n"), identity_file);
- printf(gettext("Overwrite (%s/%s)? "),
- nl_langinfo(YESSTR), nl_langinfo(NOSTR));
- fflush(stdout);
- if (fgets(yesno, sizeof(yesno), stdin) == NULL)
- exit(1);
- if (strcasecmp(chop(yesno), nl_langinfo(YESSTR)) != 0)
- exit(1);
- }
- /* Ask for a passphrase (twice). */
- if (identity_passphrase)
- passphrase1 = xstrdup(identity_passphrase);
- else if (identity_new_passphrase)
- passphrase1 = xstrdup(identity_new_passphrase);
- else {
-passphrase_again:
- passphrase1 =
- read_passphrase(gettext("Enter passphrase (empty "
- "for no passphrase): "), RP_ALLOW_STDIN);
- passphrase2 = read_passphrase(gettext("Enter same "
- "passphrase again: "), RP_ALLOW_STDIN);
- if (strcmp(passphrase1, passphrase2) != 0) {
- /*
- * The passphrases do not match. Clear them and
- * retry.
- */
- memset(passphrase1, 0, strlen(passphrase1));
- memset(passphrase2, 0, strlen(passphrase2));
- xfree(passphrase1);
- xfree(passphrase2);
- printf(gettext("Passphrases do not match. Try "
- "again.\n"));
- goto passphrase_again;
- }
- /* Clear the other copy of the passphrase. */
- memset(passphrase2, 0, strlen(passphrase2));
- xfree(passphrase2);
- }
-
- if (identity_comment) {
- strlcpy(comment, identity_comment, sizeof(comment));
- } else {
- /* Create default commend field for the passphrase. */
- snprintf(comment, sizeof comment, "%s@%s", pw->pw_name, hostname);
- }
-
- /* Save the key with the given passphrase and comment. */
- if (!key_save_private(private, identity_file, passphrase1, comment)) {
- printf(gettext("Saving the key failed: %s.\n"), identity_file);
- memset(passphrase1, 0, strlen(passphrase1));
- xfree(passphrase1);
- exit(1);
- }
- /* Clear the passphrase. */
- memset(passphrase1, 0, strlen(passphrase1));
- xfree(passphrase1);
-
- /* Clear the private key and the random number generator. */
- key_free(private);
- arc4random_stir();
-
- if (!quiet)
- printf(gettext("Your identification has been saved in %s.\n"),
- identity_file);
-
- strlcat(identity_file, ".pub", sizeof(identity_file));
- fd = open(identity_file, O_WRONLY | O_CREAT | O_TRUNC, 0644);
- if (fd == -1) {
- printf(gettext("Could not save your public key in %s\n"),
- identity_file);
- exit(1);
- }
- f = fdopen(fd, "w");
- if (f == NULL) {
- printf(gettext("fdopen %s failed"), identity_file);
- exit(1);
- }
- if (!key_write(public, f))
- fprintf(stderr, gettext("write key failed"));
- fprintf(f, " %s\n", comment);
- fclose(f);
-
- if (!quiet) {
- char *fp = key_fingerprint(public, SSH_FP_MD5, SSH_FP_HEX);
- printf(gettext("Your public key has been saved in %s.\n"),
- identity_file);
- printf(gettext("The key fingerprint is:\n"));
- printf("%s %s\n", fp, comment);
- xfree(fp);
- }
-
- key_free(public);
- return(0);
- /* NOTREACHED */
-}
diff --git a/usr/src/cmd/ssh/ssh-keyscan/Makefile b/usr/src/cmd/ssh/ssh-keyscan/Makefile
deleted file mode 100644
index 0628f5db83..0000000000
--- a/usr/src/cmd/ssh/ssh-keyscan/Makefile
+++ /dev/null
@@ -1,65 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
-#
-# cmd/ssh/ssh-keyscan/Makefile
-
-PROG= ssh-keyscan
-
-OBJS = \
- ssh-keyscan.o
-SRCS = $(OBJS:.o=.c)
-
-include ../../Makefile.cmd
-include ../Makefile.ssh-common
-
-DIRS= $(ROOTLIBSUNSSH)
-
-LDLIBS += $(SSH_COMMON_LDLIBS) -lsocket -lnsl -lz -lcrypto
-
-POFILE_DIR= ..
-
-.KEEP_STATE:
-
-.PARALLEL: $(OBJS)
-
-all: $(PROG)
-
-$(PROG): $(OBJS) ../libssh/$(MACH)/libssh.a ../libopenbsd-compat/$(MACH)/libopenbsd-compat.a
- $(LINK.c) $(OBJS) -o $@ $(LDLIBS) $(DYNFLAGS)
- $(POST_PROCESS)
-
-$(DIRS):
- $(INS.dir)
-
-$(ROOTBIN)/ssh-keyscan: $(ROOTLIBSUNSSH)/ssh-keyscan
- -$(RM) $@; $(SYMLINK) ../lib/sunssh/ssh-keyscan $@
-
-clean:
- $(RM) -f $(OBJS) $(PROG)
-
-lint: lint_SRCS
-
-include ../Makefile.msg.targ
-include ../../Makefile.targ
-
-install: all $(ROOTPROG)
diff --git a/usr/src/cmd/ssh/ssh-keyscan/ssh-keyscan.c b/usr/src/cmd/ssh/ssh-keyscan/ssh-keyscan.c
deleted file mode 100644
index 8879289088..0000000000
--- a/usr/src/cmd/ssh/ssh-keyscan/ssh-keyscan.c
+++ /dev/null
@@ -1,832 +0,0 @@
-/*
- * Copyright 1995, 1996 by David Mazieres <dm@lcs.mit.edu>.
- *
- * Modification and redistribution in source and binary forms is
- * permitted provided that due credit is given to the author and the
- * OpenBSD project by leaving this copyright notice intact.
- */
-
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: ssh-keyscan.c,v 1.40 2002/07/06 17:47:58 stevesk Exp $");
-
-#include "sys-queue.h"
-
-#include <openssl/bn.h>
-
-#include <setjmp.h>
-#include "xmalloc.h"
-#include "ssh.h"
-#include "ssh1.h"
-#include "key.h"
-#include "kex.h"
-#include "compat.h"
-#include "myproposal.h"
-#include "packet.h"
-#include "dispatch.h"
-#include "buffer.h"
-#include "bufaux.h"
-#include "log.h"
-#include "atomicio.h"
-#include "misc.h"
-
-/* Flag indicating whether IPv4 or IPv6. This can be set on the command line.
- Default value is AF_UNSPEC means both IPv4 and IPv6. */
-#ifdef IPV4_DEFAULT
-int IPv4or6 = AF_INET;
-#else
-int IPv4or6 = AF_UNSPEC;
-#endif
-
-int ssh_port = SSH_DEFAULT_PORT;
-
-#define KT_RSA1 1
-#define KT_DSA 2
-#define KT_RSA 4
-
-int get_keytypes = KT_RSA1; /* Get only RSA1 keys by default */
-
-#define MAXMAXFD 256
-
-/* The number of seconds after which to give up on a TCP connection */
-int timeout = 5;
-
-int maxfd;
-#define MAXCON (maxfd - 10)
-
-#ifdef HAVE___PROGNAME
-extern char *__progname;
-#else
-char *__progname;
-#endif
-fd_set *read_wait;
-size_t read_wait_size;
-int ncon;
-int nonfatal_fatal = 0;
-jmp_buf kexjmp;
-Key *kexjmp_key;
-
-/*
- * Keep a connection structure for each file descriptor. The state
- * associated with file descriptor n is held in fdcon[n].
- */
-typedef struct Connection {
- u_char c_status; /* State of connection on this file desc. */
-#define CS_UNUSED 0 /* File descriptor unused */
-#define CS_CON 1 /* Waiting to connect/read greeting */
-#define CS_SIZE 2 /* Waiting to read initial packet size */
-#define CS_KEYS 3 /* Waiting to read public key packet */
- int c_fd; /* Quick lookup: c->c_fd == c - fdcon */
- int c_plen; /* Packet length field for ssh packet */
- int c_len; /* Total bytes which must be read. */
- int c_off; /* Length of data read so far. */
- int c_keytype; /* Only one of KT_RSA1, KT_DSA, or KT_RSA */
- char *c_namebase; /* Address to free for c_name and c_namelist */
- char *c_name; /* Hostname of connection for errors */
- char *c_namelist; /* Pointer to other possible addresses */
- char *c_output_name; /* Hostname of connection for output */
- char *c_data; /* Data read from this fd */
- Kex *c_kex; /* The key-exchange struct for ssh2 */
- struct timeval c_tv; /* Time at which connection gets aborted */
- TAILQ_ENTRY(Connection) c_link; /* List of connections in timeout order. */
-} con;
-
-TAILQ_HEAD(conlist, Connection) tq; /* Timeout Queue */
-con *fdcon;
-
-/*
- * This is just a wrapper around fgets() to make it usable.
- */
-
-/* Stress-test. Increase this later. */
-#define LINEBUF_SIZE 16
-
-typedef struct {
- char *buf;
- u_int size;
- int lineno;
- const char *filename;
- FILE *stream;
- void (*errfun) (const char *,...);
-} Linebuf;
-
-static Linebuf *
-Linebuf_alloc(const char *filename, void (*errfun) (const char *,...))
-{
- Linebuf *lb;
-
- if (!(lb = malloc(sizeof(*lb)))) {
- if (errfun)
- (*errfun) ("linebuf (%s): malloc failed\n",
- filename ? filename : "(stdin)");
- return (NULL);
- }
- if (filename) {
- lb->filename = filename;
- if (!(lb->stream = fopen(filename, "r"))) {
- xfree(lb);
- if (errfun)
- (*errfun) ("%s: %s\n", filename, strerror(errno));
- return (NULL);
- }
- } else {
- lb->filename = "(stdin)";
- lb->stream = stdin;
- }
-
- if (!(lb->buf = malloc(lb->size = LINEBUF_SIZE))) {
- if (errfun)
- (*errfun) ("linebuf (%s): malloc failed\n", lb->filename);
- xfree(lb);
- return (NULL);
- }
- lb->errfun = errfun;
- lb->lineno = 0;
- return (lb);
-}
-
-static void
-Linebuf_free(Linebuf * lb)
-{
- fclose(lb->stream);
- xfree(lb->buf);
- xfree(lb);
-}
-
-#if 0
-static void
-Linebuf_restart(Linebuf * lb)
-{
- clearerr(lb->stream);
- rewind(lb->stream);
- lb->lineno = 0;
-}
-
-static int
-Linebuf_lineno(Linebuf * lb)
-{
- return (lb->lineno);
-}
-#endif
-
-static char *
-Linebuf_getline(Linebuf * lb)
-{
- int n = 0;
- void *p;
-
- lb->lineno++;
- for (;;) {
- /* Read a line */
- if (!fgets(&lb->buf[n], lb->size - n, lb->stream)) {
- if (ferror(lb->stream) && lb->errfun)
- (*lb->errfun)("%s: %s\n", lb->filename,
- strerror(errno));
- return (NULL);
- }
- n = strlen(lb->buf);
-
- /* Return it or an error if it fits */
- if (n > 0 && lb->buf[n - 1] == '\n') {
- lb->buf[n - 1] = '\0';
- return (lb->buf);
- }
- if (n != lb->size - 1) {
- if (lb->errfun)
- (*lb->errfun)("%s: skipping incomplete last line\n",
- lb->filename);
- return (NULL);
- }
- /* Double the buffer if we need more space */
- lb->size *= 2;
- if ((p = realloc(lb->buf, lb->size)) == NULL) {
- lb->size /= 2;
- if (lb->errfun)
- (*lb->errfun)("linebuf (%s): realloc failed\n",
- lb->filename);
- return (NULL);
- }
- lb->buf = p;
- }
-}
-
-static int
-fdlim_get(int hard)
-{
-#if defined(HAVE_GETRLIMIT) && defined(RLIMIT_NOFILE)
- struct rlimit rlfd;
-
- if (getrlimit(RLIMIT_NOFILE, &rlfd) < 0)
- return (-1);
- if ((hard ? rlfd.rlim_max : rlfd.rlim_cur) == RLIM_INFINITY)
- return 10000;
- else
- return hard ? rlfd.rlim_max : rlfd.rlim_cur;
-#elif defined (HAVE_SYSCONF)
- return sysconf (_SC_OPEN_MAX);
-#else
- return 10000;
-#endif
-}
-
-static int
-fdlim_set(int lim)
-{
-#if defined(HAVE_SETRLIMIT) && defined(RLIMIT_NOFILE)
- struct rlimit rlfd;
-#endif
-
- if (lim <= 0)
- return (-1);
-#if defined(HAVE_SETRLIMIT) && defined(RLIMIT_NOFILE)
- if (getrlimit(RLIMIT_NOFILE, &rlfd) < 0)
- return (-1);
- rlfd.rlim_cur = lim;
- if (setrlimit(RLIMIT_NOFILE, &rlfd) < 0)
- return (-1);
-#elif defined (HAVE_SETDTABLESIZE)
- setdtablesize(lim);
-#endif
- return (0);
-}
-
-/*
- * This is an strsep function that returns a null field for adjacent
- * separators. This is the same as the 4.4BSD strsep, but different from the
- * one in the GNU libc.
- */
-static char *
-xstrsep(char **str, const char *delim)
-{
- char *s, *e;
-
- if (!**str)
- return (NULL);
-
- s = *str;
- e = s + strcspn(s, delim);
-
- if (*e != '\0')
- *e++ = '\0';
- *str = e;
-
- return (s);
-}
-
-/*
- * Get the next non-null token (like GNU strsep). Strsep() will return a
- * null token for two adjacent separators, so we may have to loop.
- */
-static char *
-strnnsep(char **stringp, char *delim)
-{
- char *tok;
-
- do {
- tok = xstrsep(stringp, delim);
- } while (tok && *tok == '\0');
- return (tok);
-}
-
-static Key *
-keygrab_ssh1(con *c)
-{
- static Key *rsa;
- static Buffer msg;
-
- if (rsa == NULL) {
- buffer_init(&msg);
- rsa = key_new(KEY_RSA1);
- }
- buffer_append(&msg, c->c_data, c->c_plen);
- buffer_consume(&msg, 8 - (c->c_plen & 7)); /* padding */
- if (buffer_get_char(&msg) != (int) SSH_SMSG_PUBLIC_KEY) {
- error("%s: invalid packet type", c->c_name);
- buffer_clear(&msg);
- return NULL;
- }
- buffer_consume(&msg, 8); /* cookie */
-
- /* server key */
- (void) buffer_get_int(&msg);
- buffer_get_bignum(&msg, rsa->rsa->e);
- buffer_get_bignum(&msg, rsa->rsa->n);
-
- /* host key */
- (void) buffer_get_int(&msg);
- buffer_get_bignum(&msg, rsa->rsa->e);
- buffer_get_bignum(&msg, rsa->rsa->n);
-
- buffer_clear(&msg);
-
- return (rsa);
-}
-
-static int
-hostjump(Key *hostkey)
-{
- kexjmp_key = hostkey;
- longjmp(kexjmp, 1);
- /* NOTREACHED */
- return (0);
-}
-
-static int
-ssh2_capable(int remote_major, int remote_minor)
-{
- switch (remote_major) {
- case 1:
- if (remote_minor == 99)
- return 1;
- break;
- case 2:
- return 1;
- default:
- break;
- }
- return 0;
-}
-
-static Key *
-keygrab_ssh2(con *c)
-{
- int j;
-
- packet_set_connection(c->c_fd, c->c_fd);
- enable_compat20();
- my_clnt_proposal[PROPOSAL_SERVER_HOST_KEY_ALGS] =
- c->c_keytype == KT_DSA? "ssh-dss": "ssh-rsa";
- c->c_kex = kex_setup(c->c_name, my_clnt_proposal, NULL);
- kex_start(c->c_kex);
- c->c_kex->kex[KEX_DH_GRP1_SHA1] = kexdh_client;
- c->c_kex->kex[KEX_DH_GEX_SHA1] = kexgex_client;
- c->c_kex->verify_host_key = hostjump;
-
- if (!(j = setjmp(kexjmp))) {
- nonfatal_fatal = 1;
- dispatch_run(DISPATCH_BLOCK, &c->c_kex->done, c->c_kex);
- fprintf(stderr, "Impossible! dispatch_run() returned!\n");
- exit(1);
- }
- nonfatal_fatal = 0;
- xfree(c->c_kex);
- c->c_kex = NULL;
- packet_close();
-
- return j < 0? NULL : kexjmp_key;
-}
-
-static void
-keyprint(con *c, Key *key)
-{
- if (!key)
- return;
-
- fprintf(stdout, "%s ", c->c_output_name ? c->c_output_name : c->c_name);
- key_write(key, stdout);
- fputs("\n", stdout);
-}
-
-static int
-tcpconnect(char *host)
-{
- struct addrinfo hints, *ai, *aitop;
- char strport[NI_MAXSERV];
- int gaierr, s = -1;
-
- snprintf(strport, sizeof strport, "%d", ssh_port);
- memset(&hints, 0, sizeof(hints));
- hints.ai_family = IPv4or6;
- hints.ai_socktype = SOCK_STREAM;
- if ((gaierr = getaddrinfo(host, strport, &hints, &aitop)) != 0)
- fatal("getaddrinfo %s: %s", host, gai_strerror(gaierr));
- for (ai = aitop; ai; ai = ai->ai_next) {
- s = socket(ai->ai_family, SOCK_STREAM, 0);
- if (s < 0) {
- error("socket: %s", strerror(errno));
- continue;
- }
- if (fcntl(s, F_SETFL, O_NONBLOCK) < 0)
- fatal("F_SETFL: %s", strerror(errno));
- if (connect(s, ai->ai_addr, ai->ai_addrlen) < 0 &&
- errno != EINPROGRESS)
- error("connect (`%s'): %s", host, strerror(errno));
- else
- break;
- close(s);
- s = -1;
- }
- freeaddrinfo(aitop);
- return s;
-}
-
-static int
-conalloc(char *iname, char *oname, int keytype)
-{
- char *namebase, *name, *namelist;
- int s;
-
- namebase = namelist = xstrdup(iname);
-
- do {
- name = xstrsep(&namelist, ",");
- if (!name) {
- xfree(namebase);
- return (-1);
- }
- } while ((s = tcpconnect(name)) < 0);
-
- if (s >= maxfd)
- fatal("conalloc: fdno %d too high", s);
- if (fdcon[s].c_status)
- fatal("conalloc: attempt to reuse fdno %d", s);
-
- fdcon[s].c_fd = s;
- fdcon[s].c_status = CS_CON;
- fdcon[s].c_namebase = namebase;
- fdcon[s].c_name = name;
- fdcon[s].c_namelist = namelist;
- fdcon[s].c_output_name = xstrdup(oname);
- fdcon[s].c_data = (char *) &fdcon[s].c_plen;
- fdcon[s].c_len = 4;
- fdcon[s].c_off = 0;
- fdcon[s].c_keytype = keytype;
- gettimeofday(&fdcon[s].c_tv, NULL);
- fdcon[s].c_tv.tv_sec += timeout;
- TAILQ_INSERT_TAIL(&tq, &fdcon[s], c_link);
- FD_SET(s, read_wait);
- ncon++;
- return (s);
-}
-
-static void
-confree(int s)
-{
- if (s >= maxfd || fdcon[s].c_status == CS_UNUSED)
- fatal("confree: attempt to free bad fdno %d", s);
- close(s);
- xfree(fdcon[s].c_namebase);
- xfree(fdcon[s].c_output_name);
- if (fdcon[s].c_status == CS_KEYS)
- xfree(fdcon[s].c_data);
- fdcon[s].c_status = CS_UNUSED;
- fdcon[s].c_keytype = 0;
- TAILQ_REMOVE(&tq, &fdcon[s], c_link);
- FD_CLR(s, read_wait);
- ncon--;
-}
-
-static void
-contouch(int s)
-{
- TAILQ_REMOVE(&tq, &fdcon[s], c_link);
- gettimeofday(&fdcon[s].c_tv, NULL);
- fdcon[s].c_tv.tv_sec += timeout;
- TAILQ_INSERT_TAIL(&tq, &fdcon[s], c_link);
-}
-
-static int
-conrecycle(int s)
-{
- con *c = &fdcon[s];
- int ret;
-
- ret = conalloc(c->c_namelist, c->c_output_name, c->c_keytype);
- confree(s);
- return (ret);
-}
-
-static void
-congreet(int s)
-{
- int remote_major, remote_minor, n = 0;
- char buf[256], *cp;
- char remote_version[sizeof buf];
- size_t bufsiz;
- con *c = &fdcon[s];
-
- bufsiz = sizeof(buf);
- cp = buf;
- while (bufsiz-- && (n = read(s, cp, 1)) == 1 && *cp != '\n') {
- if (*cp == '\r')
- *cp = '\n';
- cp++;
- }
- if (n < 0) {
- if (errno != ECONNREFUSED)
- error("read (%s): %s", c->c_name, strerror(errno));
- conrecycle(s);
- return;
- }
- if (n == 0) {
- error("%s: Connection closed by remote host", c->c_name);
- conrecycle(s);
- return;
- }
- if (*cp != '\n' && *cp != '\r') {
- error("%s: bad greeting", c->c_name);
- confree(s);
- return;
- }
- *cp = '\0';
- if (sscanf(buf, "SSH-%d.%d-%[^\n]\n",
- &remote_major, &remote_minor, remote_version) == 3)
- compat_datafellows(remote_version);
- else
- datafellows = 0;
- if (c->c_keytype != KT_RSA1) {
- if (!ssh2_capable(remote_major, remote_minor)) {
- debug("%s doesn't support ssh2", c->c_name);
- confree(s);
- return;
- }
- } else if (remote_major != 1) {
- debug("%s doesn't support ssh1", c->c_name);
- confree(s);
- return;
- }
- fprintf(stderr, "# %s %s\n", c->c_name, chop(buf));
- n = snprintf(buf, sizeof buf, "SSH-%d.%d-OpenSSH-keyscan\r\n",
- c->c_keytype == KT_RSA1? PROTOCOL_MAJOR_1 : PROTOCOL_MAJOR_2,
- c->c_keytype == KT_RSA1? PROTOCOL_MINOR_1 : PROTOCOL_MINOR_2);
- if (atomicio(write, s, buf, n) != n) {
- error("write (%s): %s", c->c_name, strerror(errno));
- confree(s);
- return;
- }
- if (c->c_keytype != KT_RSA1) {
- keyprint(c, keygrab_ssh2(c));
- confree(s);
- return;
- }
- c->c_status = CS_SIZE;
- contouch(s);
-}
-
-static void
-conread(int s)
-{
- con *c = &fdcon[s];
- int n;
-
- if (c->c_status == CS_CON) {
- congreet(s);
- return;
- }
- n = read(s, c->c_data + c->c_off, c->c_len - c->c_off);
- if (n < 0) {
- error("read (%s): %s", c->c_name, strerror(errno));
- confree(s);
- return;
- }
- c->c_off += n;
-
- if (c->c_off == c->c_len)
- switch (c->c_status) {
- case CS_SIZE:
- c->c_plen = htonl(c->c_plen);
- c->c_len = c->c_plen + 8 - (c->c_plen & 7);
- c->c_off = 0;
- c->c_data = xmalloc(c->c_len);
- c->c_status = CS_KEYS;
- break;
- case CS_KEYS:
- keyprint(c, keygrab_ssh1(c));
- confree(s);
- return;
- break;
- default:
- fatal("conread: invalid status %d", c->c_status);
- break;
- }
-
- contouch(s);
-}
-
-static void
-conloop(void)
-{
- struct timeval seltime, now;
- fd_set *r, *e;
- con *c;
- int i;
-
- gettimeofday(&now, NULL);
- c = TAILQ_FIRST(&tq);
-
- if (c && (c->c_tv.tv_sec > now.tv_sec ||
- (c->c_tv.tv_sec == now.tv_sec && c->c_tv.tv_usec > now.tv_usec))) {
- seltime = c->c_tv;
- seltime.tv_sec -= now.tv_sec;
- seltime.tv_usec -= now.tv_usec;
- if (seltime.tv_usec < 0) {
- seltime.tv_usec += 1000000;
- seltime.tv_sec--;
- }
- } else
- seltime.tv_sec = seltime.tv_usec = 0;
-
- r = xmalloc(read_wait_size);
- memcpy(r, read_wait, read_wait_size);
- e = xmalloc(read_wait_size);
- memcpy(e, read_wait, read_wait_size);
-
- while (select(maxfd, r, NULL, e, &seltime) == -1 &&
- (errno == EAGAIN || errno == EINTR))
- ;
-
- for (i = 0; i < maxfd; i++) {
- if (FD_ISSET(i, e)) {
- error("%s: exception!", fdcon[i].c_name);
- confree(i);
- } else if (FD_ISSET(i, r))
- conread(i);
- }
- xfree(r);
- xfree(e);
-
- c = TAILQ_FIRST(&tq);
- while (c && (c->c_tv.tv_sec < now.tv_sec ||
- (c->c_tv.tv_sec == now.tv_sec && c->c_tv.tv_usec < now.tv_usec))) {
- int s = c->c_fd;
-
- c = TAILQ_NEXT(c, c_link);
- conrecycle(s);
- }
-}
-
-static void
-do_host(char *host)
-{
- char *name = strnnsep(&host, " \t\n");
- int j;
-
- if (name == NULL)
- return;
- for (j = KT_RSA1; j <= KT_RSA; j *= 2) {
- if (get_keytypes & j) {
- while (ncon >= MAXCON)
- conloop();
- conalloc(name, *host ? host : name, j);
- }
- }
-}
-
-void
-fatal(const char *fmt,...)
-{
- va_list args;
-
- va_start(args, fmt);
- do_log(SYSLOG_LEVEL_FATAL, fmt, args);
- va_end(args);
- if (nonfatal_fatal)
- longjmp(kexjmp, -1);
- else
- fatal_cleanup();
-}
-
-static void
-usage(void)
-{
- fprintf(stderr,
- gettext("Usage: %s [-v46] [-p port] [-T timeout] [-f file]\n"
- "\t\t [host | addrlist namelist] [...]\n"),
- __progname);
- exit(1);
-}
-
-int
-main(int argc, char **argv)
-{
- int debug_flag = 0, log_level = SYSLOG_LEVEL_INFO;
- int opt, fopt_count = 0;
- char *tname;
-
- extern int optind;
- extern char *optarg;
-
- __progname = get_progname(argv[0]);
-
- (void) g11n_setlocale(LC_ALL, "");
-
- init_rng();
- seed_rng();
- TAILQ_INIT(&tq);
-
- if (argc <= 1)
- usage();
-
- while ((opt = getopt(argc, argv, "v46p:T:t:f:")) != -1) {
- switch (opt) {
- case 'p':
- ssh_port = a2port(optarg);
- if (ssh_port == 0) {
- fprintf(stderr, gettext("Bad port '%s'\n"),
- optarg);
- exit(1);
- }
- break;
- case 'T':
- timeout = convtime(optarg);
- if (timeout == -1 || timeout == 0) {
- fprintf(stderr, gettext("Bad timeout '%s'\n"),
- optarg);
- usage();
- }
- break;
- case 'v':
- if (!debug_flag) {
- debug_flag = 1;
- log_level = SYSLOG_LEVEL_DEBUG1;
- }
- else if (log_level < SYSLOG_LEVEL_DEBUG3)
- log_level++;
- else
- fatal("Too high debugging level.");
- break;
- case 'f':
- if (strcmp(optarg, "-") == 0)
- optarg = NULL;
- argv[fopt_count++] = optarg;
- break;
- case 't':
- get_keytypes = 0;
- tname = strtok(optarg, ",");
- while (tname) {
- int type = key_type_from_name(tname);
- switch (type) {
- case KEY_RSA1:
- get_keytypes |= KT_RSA1;
- break;
- case KEY_DSA:
- get_keytypes |= KT_DSA;
- break;
- case KEY_RSA:
- get_keytypes |= KT_RSA;
- break;
- case KEY_UNSPEC:
- fatal("unknown key type %s", tname);
- }
- tname = strtok(NULL, ",");
- }
- break;
- case '4':
- IPv4or6 = AF_INET;
- break;
- case '6':
- IPv4or6 = AF_INET6;
- break;
- case '?':
- default:
- usage();
- }
- }
- if (optind == argc && !fopt_count)
- usage();
-
- log_init("ssh-keyscan", log_level, SYSLOG_FACILITY_USER, 1);
-
- maxfd = fdlim_get(1);
- if (maxfd < 0)
- fatal("%s: fdlim_get: bad value", __progname);
- if (maxfd > MAXMAXFD)
- maxfd = MAXMAXFD;
- if (MAXCON <= 0)
- fatal("%s: not enough file descriptors", __progname);
- if (maxfd > fdlim_get(0))
- fdlim_set(maxfd);
- fdcon = xmalloc(maxfd * sizeof(con));
- memset(fdcon, 0, maxfd * sizeof(con));
-
- read_wait_size = howmany(maxfd, NFDBITS) * sizeof(fd_mask);
- read_wait = xmalloc(read_wait_size);
- memset(read_wait, 0, read_wait_size);
-
- if (fopt_count) {
- Linebuf *lb;
- char *line;
- int j;
-
- for (j = 0; j < fopt_count; j++) {
- lb = Linebuf_alloc(argv[j], error);
- if (!lb)
- continue;
- while ((line = Linebuf_getline(lb)) != NULL)
- do_host(line);
- Linebuf_free(lb);
- }
- }
-
- while (optind < argc)
- do_host(argv[optind++]);
-
- while (ncon > 0)
- conloop();
-
- return (0);
-}
diff --git a/usr/src/cmd/ssh/ssh-keysign/Makefile b/usr/src/cmd/ssh/ssh-keysign/Makefile
deleted file mode 100644
index ff0927c3b6..0000000000
--- a/usr/src/cmd/ssh/ssh-keysign/Makefile
+++ /dev/null
@@ -1,67 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
-#
-# cmd/ssh/ssh-keysign/Makefile
-
-PROG= ssh-keysign
-
-DIRS= $(ROOTLIBSSH) $(ROOTLIBSUNSSH)
-
-OBJS = ssh-keysign.o
-SRCS = $(OBJS:.o=.c)
-
-include ../../Makefile.cmd
-include ../Makefile.ssh-common
-
-FILEMODE= 04555
-
-LDLIBS += $(SSH_COMMON_LDLIBS) -lsocket -lnsl -lz -lcrypto
-
-POFILE_DIR= ..
-
-.KEEP_STATE:
-
-.PARALLEL: $(OBJS)
-
-all: $(PROG)
-
-$(PROG): $(OBJS) ../libssh/$(MACH)/libssh.a ../libopenbsd-compat/$(MACH)/libopenbsd-compat.a
- $(LINK.c) $(OBJS) -o $@ $(LDLIBS) $(DYNFLAGS)
- $(POST_PROCESS)
-
-clean:
- $(RM) -f $(OBJS) $(PROG)
-
-lint: lint_SRCS
-
-include ../Makefile.msg.targ
-include ../../Makefile.targ
-
-install: all $(DIRS) $(ROOTLIBSSHPROG) $(ROOTLIBSSH)
-
-
-$(ROOTLIBSSHPROG)/%: %
- $(INS.file)
-
-$(DIRS):
- $(INS.dir)
diff --git a/usr/src/cmd/ssh/ssh-keysign/ssh-keysign.c b/usr/src/cmd/ssh/ssh-keysign/ssh-keysign.c
deleted file mode 100644
index 66f6fde586..0000000000
--- a/usr/src/cmd/ssh/ssh-keysign/ssh-keysign.c
+++ /dev/null
@@ -1,263 +0,0 @@
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * Copyright (c) 2002 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#include "includes.h"
-RCSID("$OpenBSD: ssh-keysign.c,v 1.7 2002/07/03 14:21:05 markus Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <unistd.h>
-
-#include <openssl/evp.h>
-#include <openssl/rand.h>
-#include <openssl/rsa.h>
-
-#include "log.h"
-#include "key.h"
-#include "ssh.h"
-#include "ssh2.h"
-#include "misc.h"
-#include "xmalloc.h"
-#include "buffer.h"
-#include "bufaux.h"
-#include "authfile.h"
-#include "msg.h"
-#include "canohost.h"
-#include "pathnames.h"
-#include "readconf.h"
-
-uid_t original_real_uid; /* XXX readconf.c needs this */
-
-#ifdef HAVE___PROGNAME
-extern char *__progname;
-#else
-#ifndef lint
-char *__progname;
-#endif /* lint */
-#endif
-
-static int
-valid_request(struct passwd *pw, char *host, Key **ret, u_char *data,
- u_int datalen)
-{
- Buffer b;
- Key *key;
- u_char *pkblob;
- u_int blen, len;
- char *pkalg, *p;
- int pktype, fail;
-
- fail = 0;
-
- buffer_init(&b);
- buffer_append(&b, data, datalen);
-
- /* session id, currently limited to SHA1 (20 bytes) */
- p = buffer_get_string(&b, &len);
- if (len != 20)
- fail++;
- xfree(p);
-
- if (buffer_get_char(&b) != SSH2_MSG_USERAUTH_REQUEST)
- fail++;
-
- /* server user */
- buffer_skip_string(&b);
-
- /* service */
- p = buffer_get_string(&b, NULL);
- if (strcmp("ssh-connection", p) != 0)
- fail++;
- xfree(p);
-
- /* method */
- p = buffer_get_string(&b, NULL);
- if (strcmp("hostbased", p) != 0)
- fail++;
- xfree(p);
-
- /* pubkey */
- pkalg = buffer_get_string(&b, NULL);
- pkblob = buffer_get_string(&b, &blen);
-
- pktype = key_type_from_name(pkalg);
- if (pktype == KEY_UNSPEC)
- fail++;
- else if ((key = key_from_blob(pkblob, blen)) == NULL)
- fail++;
- else if (key->type != pktype)
- fail++;
- xfree(pkalg);
- xfree(pkblob);
-
- /* client host name, handle trailing dot */
- p = buffer_get_string(&b, &len);
- debug2("valid_request: check expect chost %s got %s", host, p);
- if (strlen(host) != len - 1)
- fail++;
- else if (p[len - 1] != '.')
- fail++;
- else if (strncasecmp(host, p, len - 1) != 0)
- fail++;
- xfree(p);
-
- /* local user */
- p = buffer_get_string(&b, NULL);
-
- if (strcmp(pw->pw_name, p) != 0)
- fail++;
- xfree(p);
-
- /* end of message */
- if (buffer_len(&b) != 0)
- fail++;
-
- debug3("valid_request: fail %d", fail);
-
- if (fail && key != NULL)
- key_free(key);
- else
- *ret = key;
-
- return (fail ? -1 : 0);
-}
-
-int
-main(int argc, char **argv)
-{
- Buffer b;
- Options options;
- Key *keys[2], *key;
- struct passwd *pw;
- int key_fd[2], i, found, version = 2, fd;
- u_char *signature, *data;
- char *host;
- u_int slen, dlen;
- u_int32_t rnd[256];
-
- /*
- * Since these two open()s are all that's done here before
- * dropping privileges with setreuid(), and since having been
- * privileged protects ssh-keysign from core dumps and tracing,
- * there's no need to use Least Privilege interfaces like
- * setppriv(2).
- */
- key_fd[0] = open(_PATH_HOST_RSA_KEY_FILE, O_RDONLY);
- key_fd[1] = open(_PATH_HOST_DSA_KEY_FILE, O_RDONLY);
-
- (void) setreuid(getuid(), getuid());
-
- (void) g11n_setlocale(LC_ALL, "");
-
- init_rng();
- seed_rng();
- arc4random_stir();
-
-#ifdef DEBUG_SSH_KEYSIGN
- log_init("ssh-keysign", SYSLOG_LEVEL_DEBUG3, SYSLOG_FACILITY_AUTH, 0);
-#endif
-
- /* verify that ssh-keysign is enabled by the admin */
- original_real_uid = getuid(); /* XXX readconf.c needs this */
- initialize_options(&options);
- (void)read_config_file(_PATH_HOST_CONFIG_FILE, "", &options);
- fill_default_options(&options);
- if (options.hostbased_authentication != 1)
- fatal("Hostbased authentication not enabled in %s",
- _PATH_HOST_CONFIG_FILE);
-
- if (key_fd[0] == -1 && key_fd[1] == -1)
- fatal("could not open any host key");
-
- if ((pw = getpwuid(getuid())) == NULL)
- fatal("getpwuid failed");
- pw = pwcopy(pw);
-
- SSLeay_add_all_algorithms();
- for (i = 0; i < 256; i++)
- rnd[i] = arc4random();
- RAND_seed(rnd, sizeof(rnd));
-
- found = 0;
- for (i = 0; i < 2; i++) {
- keys[i] = NULL;
- if (key_fd[i] == -1)
- continue;
- keys[i] = key_load_private_pem(key_fd[i], KEY_UNSPEC,
- NULL, NULL);
- (void) close(key_fd[i]);
- if (keys[i] != NULL && keys[i]->type == KEY_RSA) {
- if (RSA_blinding_on(keys[i]->rsa, NULL) != 1) {
- error("RSA_blinding_on failed");
- key_free(keys[i]);
- keys[i] = NULL;
- }
- }
- if (keys[i] != NULL)
- found = 1;
- }
- if (!found)
- fatal("no hostkey found");
-
- buffer_init(&b);
- if (ssh_msg_recv(STDIN_FILENO, &b) < 0)
- fatal("ssh_msg_recv failed");
- if (buffer_get_char(&b) != version)
- fatal("bad version");
- fd = buffer_get_int(&b);
- if ((fd == STDIN_FILENO) || (fd == STDOUT_FILENO))
- fatal("bad fd");
- if ((host = get_local_name(fd)) == NULL)
- fatal("cannot get sockname for fd");
-
- data = buffer_get_string(&b, &dlen);
- if (valid_request(pw, host, &key, data, dlen) < 0)
- fatal("not a valid request");
- xfree(host);
-
- found = 0;
- for (i = 0; i < 2; i++) {
- if (keys[i] != NULL &&
- key_equal(key, keys[i])) {
- found = 1;
- break;
- }
- }
- if (!found)
- fatal("no matching hostkey found");
-
- if (key_sign(keys[i], &signature, &slen, data, dlen) != 0)
- fatal("key_sign failed");
- xfree(data);
-
- /* send reply */
- buffer_clear(&b);
- buffer_put_string(&b, signature, slen);
- ssh_msg_send(STDOUT_FILENO, version, &b);
-
- return (0);
-}
diff --git a/usr/src/cmd/ssh/ssh-socks5-proxy-connect/Makefile b/usr/src/cmd/ssh/ssh-socks5-proxy-connect/Makefile
deleted file mode 100644
index 0b79f4ec7d..0000000000
--- a/usr/src/cmd/ssh/ssh-socks5-proxy-connect/Makefile
+++ /dev/null
@@ -1,65 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
-#
-
-PROG= ssh-socks5-proxy-connect
-
-DIRS= $(ROOTLIBSSH) $(ROOTLIBSUNSSH)
-
-OBJS= ssh-socks5-proxy-connect.o
-
-SRCS= $(OBJS:.o=.c)
-
-POFILE_DIR= ..
-
-.KEEP_STATE:
-
-.PARALLEL: $(OBJS)
-
-all: $(PROG)
-
-include ../../Makefile.cmd
-include ../Makefile.ssh-common
-
-LDLIBS += $(SSH_COMMON_LDLIBS) -lsocket
-
-install: all $(DIRS) $(ROOTLIBSSHPROG) $(ROOTLIBSSH)
-
-$(ROOTLIBSSHPROG)/%: %
- $(INS.file)
-
-$(DIRS):
- $(INS.dir)
-
-$(PROG): $(OBJS) ../libssh/$(MACH)/libssh.a ../libopenbsd-compat/$(MACH)/libopenbsd-compat.a
- $(LINK.c) $(OBJS) -o $@ $(LDLIBS)
- $(POST_PROCESS)
-
-clean:
- $(RM) -f $(OBJS) $(PROG)
-
-lint: lint_SRCS
-
-include ../../Makefile.targ
-include ../Makefile.msg.targ
diff --git a/usr/src/cmd/ssh/ssh-socks5-proxy-connect/ssh-socks5-proxy-connect.c b/usr/src/cmd/ssh/ssh-socks5-proxy-connect/ssh-socks5-proxy-connect.c
deleted file mode 100644
index 131eb73fcc..0000000000
--- a/usr/src/cmd/ssh/ssh-socks5-proxy-connect/ssh-socks5-proxy-connect.c
+++ /dev/null
@@ -1,382 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- *
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-/*
- * A SOCKS client that let's users 'ssh' to the
- * outside of the firewall by opening up a connection
- * through the SOCKS server. Supports only SOCKS v5.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <netdb.h>
-#include <strings.h>
-#include <unistd.h>
-#include <inttypes.h>
-#include <errno.h>
-#include <poll.h>
-#include <signal.h>
-#include <locale.h>
-#include <libintl.h>
-#include <netinet/in.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <arpa/inet.h>
-#include <sys/time.h>
-#include <sys/stropts.h>
-#include <sys/stat.h>
-#include <sys/varargs.h>
-#include "proxy-io.h"
-
-#define DEFAULT_SOCKS5_PORT "1080"
-
-static int debug_flag = 0;
-
-static void
-usage(void)
-{
- (void) fprintf(stderr, gettext("Usage: ssh-socks5-proxy-connect "
- "[-h socks5_proxy_host] [-p socks5_proxy_port] \n"
- "remote_host remote_port\n"));
- exit(1);
-}
-
-/* PRINTFLIKE1 */
-static void
-debug(const char *format, ...)
-{
- char fmtbuf[BUFFER_SIZ];
- va_list args;
-
- if (debug_flag == 0) {
- return;
- }
- va_start(args, format);
- (void) snprintf(fmtbuf, sizeof (fmtbuf),
- "ssh-socks5-proxy: %s\n", format);
- (void) vfprintf(stderr, fmtbuf, args);
- va_end(args);
-}
-
-static void
-signal_handler(int sig)
-{
- exit(0);
-}
-
-static int
-do_version_exchange(int sockfd)
-{
- char buffer[3], recv_buf[2];
-
- buffer[0] = 0x05; /* VER */
- buffer[1] = 0x01; /* NMETHODS */
- buffer[2] = 0x00; /* METHODS */
-
- if (write(sockfd, &buffer, sizeof (buffer)) < 0) {
- perror("write");
- return (0);
- }
-
- if (read(sockfd, &recv_buf, sizeof (recv_buf)) == -1) {
- perror("read");
- return (0);
- }
-
- /*
- * No need to check the server's version as per
- * the protocol spec. Check the method supported
- * by the server. Currently if the server does not
- * support NO AUTH, we disconnect.
- */
- if (recv_buf[1] != 0x00) {
- debug("Unsupported Authentication Method");
- return (0);
- }
-
- /* Return success. */
- return (1);
-}
-
-static void
-send_request(
- int sockfd,
- const char *ssh_host,
- uchar_t ssh_host_len,
- uint16_t *ssh_port)
-{
- int failure = 1;
- char *buffer, *temp, recv_buf[BUFFER_SIZ];
- uchar_t version = 0x05, cmd = 0x01, rsv = 0x00, atyp = 0x03;
-
- buffer = malloc(strlen(ssh_host) + 7);
-
- temp = buffer;
-
- /* Assemble the request packet */
- (void) memcpy(temp, &version, sizeof (version));
- temp += sizeof (version);
- (void) memcpy(temp, &cmd, sizeof (cmd));
- temp += sizeof (cmd);
- (void) memcpy(temp, &rsv, sizeof (rsv));
- temp += sizeof (rsv);
- (void) memcpy(temp, &atyp, sizeof (atyp));
- temp += sizeof (atyp);
- (void) memcpy(temp, &ssh_host_len, sizeof (ssh_host_len));
- temp += sizeof (ssh_host_len);
- (void) memcpy(temp, ssh_host, strlen(ssh_host));
- temp += strlen(ssh_host);
- (void) memcpy(temp, ssh_port, sizeof (*ssh_port));
- temp += sizeof (*ssh_port);
-
- if (write(sockfd, buffer, temp - buffer) == -1) {
- perror("write");
- exit(1);
- }
-
- /*
- * The maximum size of the protocol message we are waiting for is 10
- * bytes -- VER[1], REP[1], RSV[1], ATYP[1], BND.ADDR[4] and
- * BND.PORT[2]; see RFC 1928, section "6. Replies" for more details.
- * Everything else is already a part of the data we are supposed to
- * deliver to the requester. We know that BND.ADDR is exactly 4 bytes
- * since as you can see below, we accept only ATYP == 1 which specifies
- * that the IPv4 address is in a binary format.
- */
- if (read(sockfd, &recv_buf, 10) == -1) {
- perror("read");
- exit(1);
- }
-
- /* temp now points to the recieve buffer. */
- temp = recv_buf;
-
- /* Check the server's version. */
- if (*temp++ != 0x05) {
- (void) fprintf(stderr, gettext("Unsupported SOCKS version: %x\n"),
- recv_buf[0]);
- exit(1);
- }
-
- /* Check server's reply */
- switch (*temp++) {
- case 0x00:
- failure = 0;
- debug("CONNECT command Succeeded.");
- break;
- case 0x01:
- debug("General SOCKS server failure.");
- break;
- case 0x02:
- debug("Connection not allowed by ruleset.");
- break;
- case 0x03:
- debug("Network Unreachable.");
- break;
- case 0x04:
- debug("Host unreachable.");
- break;
- case 0x05:
- debug("Connection refused.");
- break;
- case 0x06:
- debug("TTL expired.");
- break;
- case 0x07:
- debug("Command not supported");
- break;
- case 0x08:
- debug("Address type not supported.");
- break;
- default:
- (void) fprintf(stderr, gettext("ssh-socks5-proxy: "
- "SOCKS Server reply not understood\n"));
- }
-
- if (failure == 1) {
- exit(1);
- }
-
- /* Parse the rest of the packet */
-
- /* Ignore RSV */
- temp++;
-
- /* Check ATYP */
- if (*temp != 0x01) {
- (void) fprintf(stderr, gettext("ssh-socks5-proxy: "
- "Address type not supported: %u\n"), *temp);
- exit(1);
- }
-
- free(buffer);
-}
-
-int
-main(int argc, char **argv)
-{
- extern char *optarg;
- extern int optind;
- int retval, err_code, sock;
- uint16_t ssh_port;
- uchar_t ssh_host_len;
- char *socks_server = NULL, *socks_port = NULL;
- char *ssh_host;
- struct addrinfo hints, *ai;
- struct pollfd fds[2];
-
- /* Initialization for variables, set locale and textdomain */
-
- (void) setlocale(LC_ALL, "");
-
-#if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
-#define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */
-#endif
- (void) textdomain(TEXT_DOMAIN);
-
- /* Set up the signal handler */
- (void) signal(SIGINT, signal_handler);
- (void) signal(SIGPIPE, signal_handler);
- (void) signal(SIGPOLL, signal_handler);
-
- while ((retval = getopt(argc, argv, "dp:h:")) != -1) {
- switch (retval) {
- case 'h':
- socks_server = optarg;
- break;
- case 'p':
- socks_port = optarg;
- break;
- case 'd':
- debug_flag = 1;
- break;
- default:
- break;
- }
- }
-
- if (optind != argc - 2) {
- usage();
- }
-
- ssh_host = argv[optind++];
- ssh_host_len = (uchar_t)strlen(ssh_host);
- ssh_port = htons(atoi(argv[optind]));
-
- /*
- * If the name and/or port number of the
- * socks server were not passed on the
- * command line, try the user's environment.
- */
- if (socks_server == NULL) {
- if ((socks_server = getenv("SOCKS5_SERVER")) == NULL) {
- (void) fprintf(stderr, gettext("ssh-socks5-proxy: "
- "SOCKS5 SERVER not specified\n"));
- exit(1);
- }
- }
- if (socks_port == NULL) {
- if ((socks_port = getenv("SOCKS5_PORT")) == NULL) {
- socks_port = DEFAULT_SOCKS5_PORT;
- }
- }
-
- debug("SOCKS5_SERVER = %s", socks_server);
- debug("SOCKS5_PORT = %s", socks_port);
-
- bzero(&hints, sizeof (struct addrinfo));
- hints.ai_family = PF_UNSPEC;
- hints.ai_socktype = SOCK_STREAM;
-
- if ((err_code = getaddrinfo(socks_server, socks_port, &hints, &ai))
- != 0) {
- (void) fprintf(stderr, "%s: %s\n", socks_server,
- gai_strerror(err_code));
- exit(1);
- }
-
- if ((sock = socket(ai->ai_family, SOCK_STREAM, 0)) < 0) {
- perror("socket");
- exit(1);
- }
-
- /* Connect to the SOCKS server */
- if (connect(sock, ai->ai_addr, ai->ai_addrlen) == 0) {
- debug("Connected to the SOCKS server");
- /* Do the SOCKS v5 communication with the server. */
- if (do_version_exchange(sock) > 0) {
- debug("Done version exchange");
- send_request(sock, ssh_host, ssh_host_len, &ssh_port);
- } else {
- (void) fprintf(stderr, gettext("ssh-socks5-proxy: Client and "
- "Server versions differ.\n"));
- (void) close(sock);
- exit(1);
- }
- } else {
- perror("connect");
- (void) close(sock);
- exit(1);
- }
-
- fds[0].fd = STDIN_FILENO; /* Poll stdin for data. */
- fds[1].fd = sock; /* Poll the socket for data. */
- fds[0].events = fds[1].events = POLLIN;
-
- for (;;) {
- if (poll(fds, 2, INFTIM) == -1) {
- perror("poll");
- (void) close(sock);
- exit(1);
- }
-
- /* Data arrived on stdin, write it to the socket */
- if (fds[0].revents & POLLIN) {
- if (proxy_read_write_loop(STDIN_FILENO, sock) == 0) {
- (void) close(sock);
- exit(1);
- }
- } else if (fds[0].revents & (POLLERR | POLLHUP | POLLNVAL)) {
- (void) close(sock);
- exit(1);
- }
-
- /* Data arrived on the socket, write it to stdout */
- if (fds[1].revents & POLLIN) {
- if (proxy_read_write_loop(sock, STDOUT_FILENO) == 0) {
- (void) close(sock);
- exit(1);
- }
- } else if (fds[1].revents & (POLLERR | POLLHUP | POLLNVAL)) {
- (void) close(sock);
- exit(1);
- }
- }
-
- /* NOTREACHED */
- return (0);
-}
diff --git a/usr/src/cmd/ssh/ssh/Makefile b/usr/src/cmd/ssh/ssh/Makefile
deleted file mode 100644
index a2f5ae7333..0000000000
--- a/usr/src/cmd/ssh/ssh/Makefile
+++ /dev/null
@@ -1,76 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
-#
-# cmd/ssh/ssh/Makefile
-
-PROG= ssh
-
-OBJS = ssh.o \
- sshconnect.o \
- sshconnect1.o \
- sshconnect2.o \
- sshtty.o \
- clientloop.o \
- gss-clnt.o
-SRCS = $(OBJS:.o=.c)
-
-include ../../Makefile.cmd
-include ../Makefile.ssh-common
-
-DIRS= $(ROOTLIBSSH) $(ROOTLIBSUNSSH)
-
-LDLIBS += $(SSH_COMMON_LDLIBS) -lsocket \
- -lnsl \
- -lz \
- -lcrypto \
- -lgss
-
-POFILE_DIR= ..
-
-.KEEP_STATE:
-
-.PARALLEL: $(OBJS)
-
-all: $(PROG)
-
-$(PROG): $(OBJS) ../libssh/$(MACH)/libssh.a ../libopenbsd-compat/$(MACH)/libopenbsd-compat.a
- $(LINK.c) $(OBJS) -o $@ $(LDLIBS) $(DYNFLAGS)
- $(POST_PROCESS)
-
-$(DIRS):
- $(INS.dir)
-
-$(ROOTBIN)/ssh: $(ROOTLIBSUNSSH)/ssh
- -$(RM) $@; $(SYMLINK) ../lib/sunssh/ssh $@
-
-install: all $(ROOTPROG)
-
-clean:
- $(RM) -f $(OBJS) $(PROG)
-
-lint: lint_SRCS
-
-include ../Makefile.msg.targ
-
-XGETFLAGS += --keyword=log
-include ../../Makefile.targ
diff --git a/usr/src/cmd/ssh/ssh/clientloop.c b/usr/src/cmd/ssh/ssh/clientloop.c
deleted file mode 100644
index 8a9985c632..0000000000
--- a/usr/src/cmd/ssh/ssh/clientloop.c
+++ /dev/null
@@ -1,1626 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * The main loop for the interactive session (client side).
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- *
- *
- * Copyright (c) 1999 Theo de Raadt. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * SSH2 support added by Markus Friedl.
- * Copyright (c) 1999, 2000, 2001 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: clientloop.c,v 1.104 2002/08/22 19:38:42 stevesk Exp $");
-
-#include "ssh.h"
-#include "ssh1.h"
-#include "ssh2.h"
-#include "xmalloc.h"
-#include "packet.h"
-#include "buffer.h"
-#include "compat.h"
-#include "channels.h"
-#include "dispatch.h"
-#include "buffer.h"
-#include "bufaux.h"
-#include "key.h"
-#include "kex.h"
-#include "log.h"
-#include "readconf.h"
-#include "clientloop.h"
-#include "authfd.h"
-#include "atomicio.h"
-#include "sshtty.h"
-#include "misc.h"
-#include "readpass.h"
-
-/* import options */
-extern Options options;
-
-/* Flag indicating that stdin should be redirected from /dev/null. */
-extern int stdin_null_flag;
-
-/*
- * Name of the host we are connecting to. This is the name given on the
- * command line, or the HostName specified for the user-supplied name in a
- * configuration file.
- */
-extern char *host;
-
-/*
- * Flag to indicate that we have received a window change signal which has
- * not yet been processed. This will cause a message indicating the new
- * window size to be sent to the server a little later. This is volatile
- * because this is updated in a signal handler.
- */
-static volatile sig_atomic_t received_window_change_signal = 0;
-static volatile sig_atomic_t received_signal = 0;
-
-/* Flag indicating whether the user's terminal is in non-blocking mode. */
-static int in_non_blocking_mode = 0;
-
-/* Common data for the client loop code. */
-static int quit_pending; /* Set to non-zero to quit the client loop. */
-static int escape_char; /* Escape character. */
-static int escape_pending; /* Last character was the escape character */
-static int last_was_cr; /* Last character was a newline. */
-static int exit_status; /* Used to store the exit status of the command. */
-static int stdin_eof; /* EOF has been encountered on standard error. */
-static Buffer stdin_buffer; /* Buffer for stdin data. */
-static Buffer stdout_buffer; /* Buffer for stdout data. */
-static Buffer stderr_buffer; /* Buffer for stderr data. */
-static u_long stdin_bytes, stdout_bytes, stderr_bytes;
-static u_int buffer_high; /* Soft max buffer size. */
-static int connection_in; /* Connection to server (input). */
-static int connection_out; /* Connection to server (output). */
-static int need_rekeying; /* Set to non-zero if rekeying is requested. */
-static int session_closed = 0; /* In SSH2: login session closed. */
-static int server_alive_timeouts = 0; /* Number of outstanding alive packets. */
-
-static void client_init_dispatch(void);
-int session_ident = -1;
-
-/*XXX*/
-extern Kex *xxx_kex;
-
-extern int will_daemonize;
-
-/* Restores stdin to blocking mode. */
-
-static void
-leave_non_blocking(void)
-{
- if (in_non_blocking_mode) {
- (void) fcntl(fileno(stdin), F_SETFL, 0);
- in_non_blocking_mode = 0;
- fatal_remove_cleanup((void (*) (void *)) leave_non_blocking, NULL);
- }
-}
-
-/* Puts stdin terminal in non-blocking mode. */
-
-static void
-enter_non_blocking(void)
-{
- in_non_blocking_mode = 1;
- (void) fcntl(fileno(stdin), F_SETFL, O_NONBLOCK);
- fatal_add_cleanup((void (*) (void *)) leave_non_blocking, NULL);
-}
-
-/*
- * Signal handler for the window change signal (SIGWINCH). This just sets a
- * flag indicating that the window has changed.
- */
-
-static void
-window_change_handler(int sig)
-{
- received_window_change_signal = 1;
- signal(SIGWINCH, window_change_handler);
-}
-
-/*
- * Signal handler for signals that cause the program to terminate. These
- * signals must be trapped to restore terminal modes.
- */
-
-static void
-signal_handler(int sig)
-{
- received_signal = sig;
- quit_pending = 1;
-}
-
-/*
- * Returns current time in seconds from Jan 1, 1970 with the maximum
- * available resolution.
- */
-
-static double
-get_current_time(void)
-{
- struct timeval tv;
- gettimeofday(&tv, NULL);
- return (double) tv.tv_sec + (double) tv.tv_usec / 1000000.0;
-}
-
-#define SSH_X11_PROTO "MIT-MAGIC-COOKIE-1"
-void
-client_x11_get_proto(const char *display, const char *xauth_path,
- u_int trusted, char **_proto, char **_data)
-{
- char cmd[1024];
- char line[512];
- char xdisplay[512];
- static char proto[512], data[512];
- FILE *f;
- int got_data = 0, generated = 0, do_unlink = 0, i;
- char *xauthdir, *xauthfile;
- struct stat st;
-
- xauthdir = xauthfile = NULL;
- *_proto = proto;
- *_data = data;
- proto[0] = data[0] = '\0';
-
- if (xauth_path == NULL ||(stat(xauth_path, &st) == -1)) {
- debug("No xauth program.");
- } else {
- if (display == NULL) {
- debug("x11_get_proto: DISPLAY not set");
- return;
- }
- /*
- * Handle FamilyLocal case where $DISPLAY does
- * not match an authorization entry. For this we
- * just try "xauth list unix:displaynum.screennum".
- * XXX: "localhost" match to determine FamilyLocal
- * is not perfect.
- */
- if (strncmp(display, "localhost:", 10) == 0) {
- snprintf(xdisplay, sizeof(xdisplay), "unix:%s",
- display + 10);
- display = xdisplay;
- }
- if (trusted == 0) {
- xauthdir = xmalloc(MAXPATHLEN);
- xauthfile = xmalloc(MAXPATHLEN);
- strlcpy(xauthdir, "/tmp/ssh-XXXXXXXXXX", MAXPATHLEN);
- if (mkdtemp(xauthdir) != NULL) {
- do_unlink = 1;
- snprintf(xauthfile, MAXPATHLEN, "%s/xauthfile",
- xauthdir);
- snprintf(cmd, sizeof(cmd),
- "%s -f %s generate %s " SSH_X11_PROTO
- " untrusted timeout 1200 2>" _PATH_DEVNULL,
- xauth_path, xauthfile, display);
- debug2("x11_get_proto: %s", cmd);
- if (system(cmd) == 0)
- generated = 1;
- }
- }
-
- /*
- * When in untrusted mode, we read the cookie only if it was
- * successfully generated as an untrusted one in the step
- * above.
- */
- if (trusted || generated) {
- snprintf(cmd, sizeof(cmd),
- "%s %s%s list %s 2>" _PATH_DEVNULL,
- xauth_path,
- generated ? "-f " : "" ,
- generated ? xauthfile : "",
- display);
- debug2("x11_get_proto: %s", cmd);
- f = popen(cmd, "r");
- if (f && fgets(line, sizeof(line), f) &&
- sscanf(line, "%*s %511s %511s", proto, data) == 2)
- got_data = 1;
- if (f)
- pclose(f);
- }
- else
- error("Warning: untrusted X11 forwarding setup failed: "
- "xauth key data not generated");
- }
-
- if (do_unlink) {
- unlink(xauthfile);
- rmdir(xauthdir);
- }
- if (xauthdir)
- xfree(xauthdir);
- if (xauthfile)
- xfree(xauthfile);
-
- /*
- * If we didn't get authentication data, just make up some
- * data. The forwarding code will check the validity of the
- * response anyway, and substitute this data. The X11
- * server, however, will ignore this fake data and use
- * whatever authentication mechanisms it was using otherwise
- * for the local connection.
- */
- if (!got_data) {
- u_int32_t rnd = 0;
-
- log("Warning: No xauth data; "
- "using fake authentication data for X11 forwarding.");
- strlcpy(proto, SSH_X11_PROTO, sizeof proto);
- for (i = 0; i < 16; i++) {
- if (i % 4 == 0)
- rnd = arc4random();
- snprintf(data + 2 * i, sizeof data - 2 * i, "%02x",
- rnd & 0xff);
- rnd >>= 8;
- }
- }
-}
-
-/*
- * This is called when the interactive is entered. This checks if there is
- * an EOF coming on stdin. We must check this explicitly, as select() does
- * not appear to wake up when redirecting from /dev/null.
- */
-
-static void
-client_check_initial_eof_on_stdin(void)
-{
- int len;
- char buf[1];
-
- /*
- * If standard input is to be "redirected from /dev/null", we simply
- * mark that we have seen an EOF and send an EOF message to the
- * server. Otherwise, we try to read a single character; it appears
- * that for some files, such /dev/null, select() never wakes up for
- * read for this descriptor, which means that we never get EOF. This
- * way we will get the EOF if stdin comes from /dev/null or similar.
- */
- if (stdin_null_flag) {
- /* Fake EOF on stdin. */
- debug("Sending eof.");
- stdin_eof = 1;
- packet_start(SSH_CMSG_EOF);
- packet_send();
- } else {
- enter_non_blocking();
-
- /* Check for immediate EOF on stdin. */
- len = read(fileno(stdin), buf, 1);
- if (len == 0) {
- /* EOF. Record that we have seen it and send EOF to server. */
- debug("Sending eof.");
- stdin_eof = 1;
- packet_start(SSH_CMSG_EOF);
- packet_send();
- } else if (len > 0) {
- /*
- * Got data. We must store the data in the buffer,
- * and also process it as an escape character if
- * appropriate.
- */
- if ((u_char) buf[0] == escape_char)
- escape_pending = 1;
- else
- buffer_append(&stdin_buffer, buf, 1);
- }
- leave_non_blocking();
- }
-}
-
-
-/*
- * Make packets from buffered stdin data, and buffer them for sending to the
- * connection.
- */
-
-static void
-client_make_packets_from_stdin_data(void)
-{
- u_int len;
-
- /* Send buffered stdin data to the server. */
- while (buffer_len(&stdin_buffer) > 0 &&
- packet_not_very_much_data_to_write()) {
- len = buffer_len(&stdin_buffer);
- /* Keep the packets at reasonable size. */
- if (len > packet_get_maxsize())
- len = packet_get_maxsize();
- packet_start(SSH_CMSG_STDIN_DATA);
- packet_put_string(buffer_ptr(&stdin_buffer), len);
- packet_send();
- buffer_consume(&stdin_buffer, len);
- stdin_bytes += len;
- /* If we have a pending EOF, send it now. */
- if (stdin_eof && buffer_len(&stdin_buffer) == 0) {
- packet_start(SSH_CMSG_EOF);
- packet_send();
- }
- }
-}
-
-/*
- * Checks if the client window has changed, and sends a packet about it to
- * the server if so. The actual change is detected elsewhere (by a software
- * interrupt on Unix); this just checks the flag and sends a message if
- * appropriate.
- */
-
-static void
-client_check_window_change(void)
-{
- struct winsize ws;
- Channel *c;
-
- if (! received_window_change_signal)
- return;
-
- /*
- * We want to send a window-change request only when a session is
- * already established and alive.
- *
- * Note: During session handshake we cannot send window-change request,
- * because we do not know the remote channel ID yet. We have to store
- * the information about signals which have arrived and send the
- * window-change request when the channel and the session are fully
- * established.
- */
- if (compat20) {
- if (session_ident == -1 || session_closed)
- return;
- c = channel_lookup(session_ident);
- if (c == NULL || c->type != SSH_CHANNEL_OPEN ||
- c->remote_id == -1)
- return;
- }
-
- received_window_change_signal = 0;
-
- if (ioctl(fileno(stdin), TIOCGWINSZ, &ws) < 0)
- return;
-
- debug2("client_check_window_change: changed");
-
- if (compat20) {
- channel_request_start(session_ident, "window-change", 0);
- packet_put_int(ws.ws_col);
- packet_put_int(ws.ws_row);
- packet_put_int(ws.ws_xpixel);
- packet_put_int(ws.ws_ypixel);
- packet_send();
- } else {
- packet_start(SSH_CMSG_WINDOW_SIZE);
- packet_put_int(ws.ws_row);
- packet_put_int(ws.ws_col);
- packet_put_int(ws.ws_xpixel);
- packet_put_int(ws.ws_ypixel);
- packet_send();
- }
-}
-
-static void
-client_global_request_reply(int type, u_int32_t seq, void *ctxt)
-{
- server_alive_timeouts = 0;
- client_global_request_reply_fwd(type, seq, ctxt);
-}
-
-static void
-server_alive_check(void)
-{
- if (++server_alive_timeouts > options.server_alive_count_max) {
- log("Timeout, server not responding.");
- fatal_cleanup();
- }
- packet_start(SSH2_MSG_GLOBAL_REQUEST);
- packet_put_cstring("keepalive@openssh.com");
- packet_put_char(1); /* boolean: want reply */
- packet_send();
-}
-
-/*
- * Waits until the client can do something (some data becomes available on
- * one of the file descriptors).
- */
-
-static void
-client_wait_until_can_do_something(fd_set **readsetp, fd_set **writesetp,
- int *maxfdp, int *nallocp, int rekeying)
-{
- struct timeval tv, *tvp;
- int ret;
-
- /* Add any selections by the channel mechanism. */
- channel_prepare_select(readsetp, writesetp, maxfdp, nallocp, rekeying);
-
- if (!compat20) {
- /* Read from the connection, unless our buffers are full. */
- if (buffer_len(&stdout_buffer) < buffer_high &&
- buffer_len(&stderr_buffer) < buffer_high &&
- channel_not_very_much_buffered_data())
- FD_SET(connection_in, *readsetp);
- /*
- * Read from stdin, unless we have seen EOF or have very much
- * buffered data to send to the server.
- */
- if (!stdin_eof && packet_not_very_much_data_to_write())
- FD_SET(fileno(stdin), *readsetp);
-
- /* Select stdout/stderr if have data in buffer. */
- if (buffer_len(&stdout_buffer) > 0)
- FD_SET(fileno(stdout), *writesetp);
- if (buffer_len(&stderr_buffer) > 0)
- FD_SET(fileno(stderr), *writesetp);
- } else {
- /* channel_prepare_select could have closed the last channel */
- if (session_closed && !channel_still_open() &&
- !packet_have_data_to_write()) {
- /* clear mask since we did not call select() */
- memset(*readsetp, 0, *nallocp);
- memset(*writesetp, 0, *nallocp);
- return;
- } else {
- FD_SET(connection_in, *readsetp);
- }
- }
-
- /* Select server connection if have data to write to the server. */
- if (packet_have_data_to_write())
- FD_SET(connection_out, *writesetp);
-
- /*
- * Wait for something to happen. This will suspend the process until
- * some selected descriptor can be read, written, or has some other
- * event pending.
- */
-
- if (options.server_alive_interval == 0 || !compat20)
- tvp = NULL;
- else {
- tv.tv_sec = options.server_alive_interval;
- tv.tv_usec = 0;
- tvp = &tv;
- }
- ret = select((*maxfdp)+1, *readsetp, *writesetp, NULL, tvp);
- if (ret < 0) {
- char buf[100];
-
- /*
- * We have to clear the select masks, because we return.
- * We have to return, because the mainloop checks for the flags
- * set by the signal handlers.
- */
- memset(*readsetp, 0, *nallocp);
- memset(*writesetp, 0, *nallocp);
-
- if (errno == EINTR)
- return;
- /* Note: we might still have data in the buffers. */
- snprintf(buf, sizeof buf, "select: %s\r\n", strerror(errno));
- buffer_append(&stderr_buffer, buf, strlen(buf));
- quit_pending = 1;
- } else if (ret == 0)
- server_alive_check();
-}
-
-static void
-client_suspend_self(Buffer *bin, Buffer *bout, Buffer *berr)
-{
- struct winsize oldws, newws;
-
- /* Flush stdout and stderr buffers. */
- if (buffer_len(bout) > 0)
- atomicio(write, fileno(stdout), buffer_ptr(bout), buffer_len(bout));
- if (buffer_len(berr) > 0)
- atomicio(write, fileno(stderr), buffer_ptr(berr), buffer_len(berr));
-
- leave_raw_mode();
-
- /*
- * Free (and clear) the buffer to reduce the amount of data that gets
- * written to swap.
- */
- buffer_free(bin);
- buffer_free(bout);
- buffer_free(berr);
-
- /* Save old window size. */
- ioctl(fileno(stdin), TIOCGWINSZ, &oldws);
-
- /* Send the suspend signal to the program itself. */
- kill(getpid(), SIGTSTP);
-
- /* Check if the window size has changed. */
- if (ioctl(fileno(stdin), TIOCGWINSZ, &newws) >= 0 &&
- (oldws.ws_row != newws.ws_row ||
- oldws.ws_col != newws.ws_col ||
- oldws.ws_xpixel != newws.ws_xpixel ||
- oldws.ws_ypixel != newws.ws_ypixel))
- received_window_change_signal = 1;
-
- /* OK, we have been continued by the user. Reinitialize buffers. */
- buffer_init(bin);
- buffer_init(bout);
- buffer_init(berr);
-
- enter_raw_mode();
-}
-
-static void
-client_process_net_input(fd_set * readset)
-{
- int len;
- char buf[8192];
-
- /*
- * Read input from the server, and add any such data to the buffer of
- * the packet subsystem.
- */
- if (FD_ISSET(connection_in, readset)) {
- /* Read as much as possible. */
- len = read(connection_in, buf, sizeof(buf));
- if (len == 0) {
- /* Received EOF. The remote host has closed the connection. */
- snprintf(buf, sizeof buf,
- gettext("Connection to %.300s closed "
- "by remote host.\n"),
- host);
- buffer_append(&stderr_buffer, buf, strlen(buf));
- quit_pending = 1;
- return;
- }
- /*
- * There is a kernel bug on Solaris that causes select to
- * sometimes wake up even though there is no data available.
- */
- if (len < 0 && (errno == EAGAIN || errno == EINTR))
- len = 0;
-
- if (len < 0) {
- /* An error has encountered. Perhaps there is a network problem. */
- snprintf(buf, sizeof buf,
- gettext("Read from remote host "
- "%.300s: %.100s\n"),
- host, strerror(errno));
- buffer_append(&stderr_buffer, buf, strlen(buf));
- quit_pending = 1;
- return;
- }
- packet_process_incoming(buf, len);
- }
-}
-
-static void
-process_cmdline(void)
-{
- void (*handler)(int);
- char *s, *cmd;
- int delete = 0;
- int local = 0;
- Forward fwd;
-
- memset(&fwd, 0, sizeof(fwd));
-
- leave_raw_mode();
- handler = signal(SIGINT, SIG_IGN);
- cmd = s = read_passphrase("\r\nssh> ", RP_ECHO);
- if (s == NULL)
- goto out;
- while (isspace(*s))
- s++;
- if (*s == '-')
- s++; /* Skip cmdline '-', if any */
- if (*s == '\0')
- goto out;
-
- if (*s == 'h' || *s == 'H' || *s == '?') {
- log("Commands:");
- log(" -L[bind_address:]port:host:hostport "
- "Request local forward");
- log(" -R[bind_address:]port:host:hostport "
- "Request remote forward");
- log(" -KR[bind_address:]port "
- "Cancel remote forward");
- goto out;
- }
-
- if (*s == 'K') {
- delete = 1;
- s++;
- }
- if (*s != 'L' && *s != 'R') {
- log("Invalid command.");
- goto out;
- }
- if (*s == 'L')
- local = 1;
- if (local && delete) {
- log("Not supported.");
- goto out;
- }
- if ((!local || delete) && !compat20) {
- log("Not supported for SSH protocol version 1.");
- goto out;
- }
-
- while (isspace(*++s))
- ;
-
- if (delete) {
- if (parse_forward(0, &fwd, s) == 0) {
- log("Bad forwarding close port");
- goto out;
- }
- channel_request_rforward_cancel(fwd.listen_host, fwd.listen_port);
- } else {
- if (parse_forward(1, &fwd, s) == 0) {
- log("Bad forwarding specification.");
- goto out;
- }
- if (local) {
- if (channel_setup_local_fwd_listener(fwd.listen_host,
- fwd.listen_port, fwd.connect_host,
- fwd.connect_port, options.gateway_ports) < 0) {
- log("Port forwarding failed.");
- goto out;
- }
- } else {
- if (channel_request_remote_forwarding(fwd.listen_host,
- fwd.listen_port, fwd.connect_host,
- fwd.connect_port) < 0) {
- log("Port forwarding failed.");
- goto out;
- }
- }
-
- log("Forwarding port.");
- }
-
-out:
- signal(SIGINT, handler);
- enter_raw_mode();
- if (cmd != NULL)
- xfree(cmd);
- if (fwd.listen_host != NULL)
- xfree(fwd.listen_host);
- if (fwd.connect_host != NULL)
- xfree(fwd.connect_host);
-}
-
-/*
- * If we are using the engine we must not fork until we do key reexchange. See
- * PKCS#11 spec for more information on fork safety and packet.c for information
- * about forking with the engine.
- */
-void
-client_daemonize(void)
-{
- if (compat20 == 1 && options.use_openssl_engine == 1) {
- will_daemonize = 1;
- debug("must rekey before daemonizing");
- kex_send_kexinit(xxx_kex);
- need_rekeying = 0;
- }
- else {
- if (daemon(1, 1) < 0) {
- fatal("daemon() failed: %.200s",
- strerror(errno));
- }
- }
-}
-
-/* process the characters one by one */
-static int
-process_escapes(Buffer *bin, Buffer *bout, Buffer *berr, char *buf, int len)
-{
- char string[1536];
- int bytes = 0;
- u_int i;
- u_char ch;
- char *s;
-
- for (i = 0; i < len; i++) {
- /* Get one character at a time. */
- ch = buf[i];
-
- if (escape_pending) {
- /* We have previously seen an escape character. */
- /* Clear the flag now. */
- escape_pending = 0;
-
- /* Process the escaped character. */
- switch (ch) {
- case '.':
- /* Terminate the connection. */
- snprintf(string, sizeof string, "%c.\r\n", escape_char);
- buffer_append(berr, string, strlen(string));
-
- quit_pending = 1;
- return -1;
-
- case 'Z' - 64:
- /* Suspend the program. */
- /* Print a message to that effect to the user. */
- snprintf(string, sizeof string,
- gettext("%c^Z [suspend ssh]\n"),
- escape_char);
- buffer_append(berr, string, strlen(string));
-
- /* Restore terminal modes and suspend. */
- client_suspend_self(bin, bout, berr);
-
- /* We have been continued. */
- continue;
-
- case 'B':
- if (compat20) {
- snprintf(string, sizeof string,
- gettext("%cB [sent break]\n"),
- escape_char);
- buffer_append(berr, string,
- strlen(string));
- channel_request_start(session_ident,
- "break", 0);
- packet_put_int(1000);
- packet_send();
- }
- continue;
-
- case 'R':
- if (compat20) {
- if (datafellows & SSH_BUG_NOREKEY)
- log("Server does not support re-keying");
- else
- need_rekeying = 1;
- }
- continue;
-
- case '&':
- /*
- * Detach the program (continue to serve connections,
- * but put in background and no more new connections).
- */
- /* Restore tty modes. */
- leave_raw_mode();
-
- /* Stop listening for new connections. */
- channel_stop_listening();
-
- snprintf(string, sizeof string,
- gettext("%c& [backgrounded]\n"),
- escape_char);
- buffer_append(berr, string, strlen(string));
-
- client_daemonize();
-
- /* The child continues serving connections. */
- if (compat20) {
- buffer_append(bin, "\004", 1);
- /* fake EOF on stdin */
- return -1;
- } else if (!stdin_eof) {
- /*
- * Sending SSH_CMSG_EOF alone does not always appear
- * to be enough. So we try to send an EOF character
- * first.
- */
- packet_start(SSH_CMSG_STDIN_DATA);
- packet_put_string("\004", 1);
- packet_send();
- /* Close stdin. */
- stdin_eof = 1;
- if (buffer_len(bin) == 0) {
- packet_start(SSH_CMSG_EOF);
- packet_send();
- }
- }
- continue;
-
- case '?':
- snprintf(string, sizeof string, gettext(
-"%c?\n\
-Supported escape sequences:\n\
-%c. - terminate connection\n\
-%cB - send break (SSH protocol 2 only)\n\
-%cC - open a command line\n\
-%cR - Request rekey (SSH protocol 2 only)\n\
-%c^Z - suspend ssh\n\
-%c# - list forwarded connections\n\
-%c& - background ssh (when waiting for connections to terminate)\n\
-%c? - this message\n\
-%c%c - send the escape character by typing it twice\n\
-(Note that escapes are only recognized immediately after newline.)\n"),
- escape_char, escape_char, escape_char, escape_char,
- escape_char, escape_char, escape_char, escape_char,
- escape_char, escape_char);
- buffer_append(berr, string, strlen(string));
- continue;
-
- case '#':
- snprintf(string, sizeof string, "%c#\r\n", escape_char);
- buffer_append(berr, string, strlen(string));
- s = channel_open_message();
- buffer_append(berr, s, strlen(s));
- xfree(s);
- continue;
-
- case 'C':
- process_cmdline();
- continue;
-
- default:
- if (ch != escape_char) {
- buffer_put_char(bin, escape_char);
- bytes++;
- }
- /* Escaped characters fall through here */
- break;
- }
- } else {
- /*
- * The previous character was not an escape char. Check if this
- * is an escape.
- */
- if (last_was_cr && ch == escape_char) {
- /* It is. Set the flag and continue to next character. */
- escape_pending = 1;
- continue;
- }
- }
-
- /*
- * Normal character. Record whether it was a newline,
- * and append it to the buffer.
- */
- last_was_cr = (ch == '\r' || ch == '\n');
- buffer_put_char(bin, ch);
- bytes++;
- }
- return bytes;
-}
-
-static void
-client_process_input(fd_set * readset)
-{
- int len;
- char buf[8192];
-
- /* Read input from stdin. */
- if (FD_ISSET(fileno(stdin), readset)) {
- /* Read as much as possible. */
- len = read(fileno(stdin), buf, sizeof(buf));
- if (len < 0 && (errno == EAGAIN || errno == EINTR))
- return; /* we'll try again later */
- if (len <= 0) {
- /*
- * Received EOF or error. They are treated
- * similarly, except that an error message is printed
- * if it was an error condition.
- */
- if (len < 0) {
- snprintf(buf, sizeof buf, "read: %.100s\r\n", strerror(errno));
- buffer_append(&stderr_buffer, buf, strlen(buf));
- }
- /* Mark that we have seen EOF. */
- stdin_eof = 1;
- /*
- * Send an EOF message to the server unless there is
- * data in the buffer. If there is data in the
- * buffer, no message will be sent now. Code
- * elsewhere will send the EOF when the buffer
- * becomes empty if stdin_eof is set.
- */
- if (buffer_len(&stdin_buffer) == 0) {
- packet_start(SSH_CMSG_EOF);
- packet_send();
- }
- } else if (escape_char == SSH_ESCAPECHAR_NONE) {
- /*
- * Normal successful read, and no escape character.
- * Just append the data to buffer.
- */
- buffer_append(&stdin_buffer, buf, len);
- } else {
- /*
- * Normal, successful read. But we have an escape character
- * and have to process the characters one by one.
- */
- if (process_escapes(&stdin_buffer, &stdout_buffer,
- &stderr_buffer, buf, len) == -1)
- return;
- }
- }
-}
-
-static void
-client_process_output(fd_set * writeset)
-{
- int len;
- char buf[100];
-
- /* Write buffered output to stdout. */
- if (FD_ISSET(fileno(stdout), writeset)) {
- /* Write as much data as possible. */
- len = write(fileno(stdout), buffer_ptr(&stdout_buffer),
- buffer_len(&stdout_buffer));
- if (len <= 0) {
- if (errno == EINTR || errno == EAGAIN)
- len = 0;
- else {
- /*
- * An error or EOF was encountered. Put an
- * error message to stderr buffer.
- */
- snprintf(buf, sizeof buf, "write stdout: %.50s\r\n", strerror(errno));
- buffer_append(&stderr_buffer, buf, strlen(buf));
- quit_pending = 1;
- return;
- }
- }
- /* Consume printed data from the buffer. */
- buffer_consume(&stdout_buffer, len);
- stdout_bytes += len;
- }
- /* Write buffered output to stderr. */
- if (FD_ISSET(fileno(stderr), writeset)) {
- /* Write as much data as possible. */
- len = write(fileno(stderr), buffer_ptr(&stderr_buffer),
- buffer_len(&stderr_buffer));
- if (len <= 0) {
- if (errno == EINTR || errno == EAGAIN)
- len = 0;
- else {
- /* EOF or error, but can't even print error message. */
- quit_pending = 1;
- return;
- }
- }
- /* Consume printed characters from the buffer. */
- buffer_consume(&stderr_buffer, len);
- stderr_bytes += len;
- }
-}
-
-/*
- * Get packets from the connection input buffer, and process them as long as
- * there are packets available.
- *
- * Any unknown packets received during the actual
- * session cause the session to terminate. This is
- * intended to make debugging easier since no
- * confirmations are sent. Any compatible protocol
- * extensions must be negotiated during the
- * preparatory phase.
- */
-
-static void
-client_process_buffered_input_packets(void)
-{
- dispatch_run(DISPATCH_NONBLOCK, &quit_pending, compat20 ? xxx_kex : NULL);
-}
-
-/* scan buf[] for '~' before sending data to the peer */
-
-static int
-simple_escape_filter(Channel *c, char *buf, int len)
-{
- /* XXX we assume c->extended is writeable */
- return process_escapes(&c->input, &c->output, &c->extended, buf, len);
-}
-
-static void
-client_channel_closed(int id, void *arg)
-{
- if (id != session_ident)
- error("client_channel_closed: id %d != session_ident %d",
- id, session_ident);
- channel_cancel_cleanup(id);
- session_closed = 1;
- if (in_raw_mode())
- leave_raw_mode();
-}
-
-/*
- * Implements the interactive session with the server. This is called after
- * the user has been authenticated, and a command has been started on the
- * remote host. If escape_char != SSH_ESCAPECHAR_NONE, it is the character
- * used as an escape character for terminating or suspending the session.
- */
-
-int
-client_loop(int have_pty, int escape_char_arg, int ssh2_chan_id)
-{
- fd_set *readset = NULL, *writeset = NULL;
- double start_time, total_time;
- int max_fd = 0, max_fd2 = 0, len, rekeying = 0, nalloc = 0;
- char buf[100];
-
- debug("Entering interactive session.");
-
- start_time = get_current_time();
-
- /* Initialize variables. */
- escape_pending = 0;
- last_was_cr = 1;
- exit_status = -1;
- stdin_eof = 0;
- buffer_high = 64 * 1024;
- connection_in = packet_get_connection_in();
- connection_out = packet_get_connection_out();
- max_fd = MAX(connection_in, connection_out);
-
- if (!compat20) {
- /* enable nonblocking unless tty */
- if (!isatty(fileno(stdin)))
- set_nonblock(fileno(stdin));
- if (!isatty(fileno(stdout)))
- set_nonblock(fileno(stdout));
- if (!isatty(fileno(stderr)))
- set_nonblock(fileno(stderr));
- max_fd = MAX(max_fd, fileno(stdin));
- max_fd = MAX(max_fd, fileno(stdout));
- max_fd = MAX(max_fd, fileno(stderr));
- }
- stdin_bytes = 0;
- stdout_bytes = 0;
- stderr_bytes = 0;
- quit_pending = 0;
- escape_char = escape_char_arg;
-
- /* Initialize buffers. */
- buffer_init(&stdin_buffer);
- buffer_init(&stdout_buffer);
- buffer_init(&stderr_buffer);
-
- client_init_dispatch();
-
- /*
- * Set signal handlers to restore non-blocking mode, but
- * don't overwrite SIG_IGN - matches behavious from rsh(1).
- */
- if (signal(SIGINT, SIG_IGN) != SIG_IGN)
- signal(SIGINT, signal_handler);
- if (signal(SIGQUIT, SIG_IGN) != SIG_IGN)
- signal(SIGQUIT, signal_handler);
- if (signal(SIGTERM, SIG_IGN) != SIG_IGN)
- signal(SIGTERM, signal_handler);
- if (have_pty)
- signal(SIGWINCH, window_change_handler);
-
- if (have_pty)
- enter_raw_mode();
-
- if (compat20) {
- session_ident = ssh2_chan_id;
- if (escape_char != SSH_ESCAPECHAR_NONE)
- channel_register_filter(session_ident,
- simple_escape_filter);
- if (session_ident != -1)
- channel_register_cleanup(session_ident,
- client_channel_closed);
- } else {
- /* Check if we should immediately send eof on stdin. */
- client_check_initial_eof_on_stdin();
- }
-
- /* Main loop of the client for the interactive session mode. */
- while (!quit_pending) {
-
- /* Process buffered packets sent by the server. */
- client_process_buffered_input_packets();
-
- if (compat20 && session_closed && !channel_still_open())
- break;
-
- rekeying = (xxx_kex != NULL && !xxx_kex->done);
-
- if (rekeying) {
- debug("rekeying in progress");
- } else {
- /*
- * Make packets of buffered stdin data, and buffer
- * them for sending to the server.
- */
- if (!compat20)
- client_make_packets_from_stdin_data();
-
- /*
- * Make packets from buffered channel data, and
- * enqueue them for sending to the server.
- */
- if (packet_not_very_much_data_to_write())
- channel_output_poll();
-
- /*
- * Check if the window size has changed, and buffer a
- * message about it to the server if so.
- */
- client_check_window_change();
-
- if (quit_pending)
- break;
- }
- /*
- * Wait until we have something to do (something becomes
- * available on one of the descriptors).
- */
- max_fd2 = max_fd;
- client_wait_until_can_do_something(&readset, &writeset,
- &max_fd2, &nalloc, rekeying);
-
- if (quit_pending)
- break;
-
- /* Do channel operations unless rekeying in progress. */
- if (!rekeying) {
- channel_after_select(readset, writeset);
- if (need_rekeying || packet_need_rekeying()) {
- debug("rekey limit reached, need rekeying");
- kex_send_kexinit(xxx_kex);
- need_rekeying = 0;
- }
- }
-
- /* Buffer input from the connection. */
- client_process_net_input(readset);
-
- if (quit_pending)
- break;
-
- if (!compat20) {
- /* Buffer data from stdin */
- client_process_input(readset);
- /*
- * Process output to stdout and stderr. Output to
- * the connection is processed elsewhere (above).
- */
- client_process_output(writeset);
- }
-
- /* Send as much buffered packet data as possible to the sender. */
- if (FD_ISSET(connection_out, writeset))
- packet_write_poll();
- }
- if (readset)
- xfree(readset);
- if (writeset)
- xfree(writeset);
-
- /* Terminate the session. */
-
- /* Stop watching for window change. */
- if (have_pty)
- signal(SIGWINCH, SIG_DFL);
-
- channel_free_all();
-
- if (have_pty)
- leave_raw_mode();
-
- /* restore blocking io */
- if (!isatty(fileno(stdin)))
- unset_nonblock(fileno(stdin));
- if (!isatty(fileno(stdout)))
- unset_nonblock(fileno(stdout));
- if (!isatty(fileno(stderr)))
- unset_nonblock(fileno(stderr));
-
- if (received_signal) {
- if (in_non_blocking_mode) /* XXX */
- leave_non_blocking();
- fatal("Killed by signal %d.", (int) received_signal);
- }
-
- /*
- * In interactive mode (with pseudo tty) display a message indicating
- * that the connection has been closed.
- */
- if (have_pty && options.log_level != SYSLOG_LEVEL_QUIET) {
- snprintf(buf, sizeof buf,
- gettext("Connection to %.64s closed.\n"),
- host);
- buffer_append(&stderr_buffer, buf, strlen(buf));
- }
-
- /* Output any buffered data for stdout. */
- while (buffer_len(&stdout_buffer) > 0) {
- len = write(fileno(stdout), buffer_ptr(&stdout_buffer),
- buffer_len(&stdout_buffer));
- if (len <= 0) {
- error("Write failed flushing stdout buffer.");
- break;
- }
- buffer_consume(&stdout_buffer, len);
- stdout_bytes += len;
- }
-
- /* Output any buffered data for stderr. */
- while (buffer_len(&stderr_buffer) > 0) {
- len = write(fileno(stderr), buffer_ptr(&stderr_buffer),
- buffer_len(&stderr_buffer));
- if (len <= 0) {
- error("Write failed flushing stderr buffer.");
- break;
- }
- buffer_consume(&stderr_buffer, len);
- stderr_bytes += len;
- }
-
- /* Clear and free any buffers. */
- memset(buf, 0, sizeof(buf));
- buffer_free(&stdin_buffer);
- buffer_free(&stdout_buffer);
- buffer_free(&stderr_buffer);
-
- /* Report bytes transferred, and transfer rates. */
- total_time = get_current_time() - start_time;
- debug("Transferred: stdin %lu, stdout %lu, stderr %lu bytes in %.1f seconds",
- stdin_bytes, stdout_bytes, stderr_bytes, total_time);
- if (total_time > 0)
- debug("Bytes per second: stdin %.1f, stdout %.1f, stderr %.1f",
- stdin_bytes / total_time, stdout_bytes / total_time,
- stderr_bytes / total_time);
-
- /* Return the exit status of the program. */
- debug("Exit status %d", exit_status);
- return exit_status;
-}
-
-/*********/
-
-static void
-client_input_stdout_data(int type, u_int32_t seq, void *ctxt)
-{
- u_int data_len;
- char *data = packet_get_string(&data_len);
- packet_check_eom();
- buffer_append(&stdout_buffer, data, data_len);
- memset(data, 0, data_len);
- xfree(data);
-}
-static void
-client_input_stderr_data(int type, u_int32_t seq, void *ctxt)
-{
- u_int data_len;
- char *data = packet_get_string(&data_len);
- packet_check_eom();
- buffer_append(&stderr_buffer, data, data_len);
- memset(data, 0, data_len);
- xfree(data);
-}
-static void
-client_input_exit_status(int type, u_int32_t seq, void *ctxt)
-{
- exit_status = packet_get_int();
- packet_check_eom();
- /* Acknowledge the exit. */
- packet_start(SSH_CMSG_EXIT_CONFIRMATION);
- packet_send();
- /*
- * Must wait for packet to be sent since we are
- * exiting the loop.
- */
- packet_write_wait();
- /* Flag that we want to exit. */
- quit_pending = 1;
-}
-
-static Channel *
-client_request_forwarded_tcpip(const char *request_type, int rchan)
-{
- Channel *c = NULL;
- char *listen_address, *originator_address;
- int listen_port, originator_port;
- int sock;
-
- /* Get rest of the packet */
- listen_address = packet_get_string(NULL);
- listen_port = packet_get_int();
- originator_address = packet_get_string(NULL);
- originator_port = packet_get_int();
- packet_check_eom();
-
- debug("client_request_forwarded_tcpip: listen %s port %d, originator %s port %d",
- listen_address, listen_port, originator_address, originator_port);
-
- sock = channel_connect_by_listen_address(listen_port);
- if (sock < 0) {
- xfree(originator_address);
- xfree(listen_address);
- return NULL;
- }
- c = channel_new("forwarded-tcpip",
- SSH_CHANNEL_CONNECTING, sock, sock, -1,
- CHAN_TCP_WINDOW_DEFAULT, CHAN_TCP_WINDOW_DEFAULT, 0,
- xstrdup(originator_address), 1);
- xfree(originator_address);
- xfree(listen_address);
- return c;
-}
-
-static Channel *
-client_request_x11(const char *request_type, int rchan)
-{
- Channel *c = NULL;
- char *originator;
- int originator_port;
- int sock;
-
- if (!options.forward_x11) {
- error("Warning: ssh server tried X11 forwarding.");
- error("Warning: this is probably a break in attempt by a malicious server.");
- return NULL;
- }
- originator = packet_get_string(NULL);
- if (datafellows & SSH_BUG_X11FWD) {
- debug2("buggy server: x11 request w/o originator_port");
- originator_port = 0;
- } else {
- originator_port = packet_get_int();
- }
- packet_check_eom();
- /* XXX check permission */
- debug("client_request_x11: request from %s %d", originator,
- originator_port);
- xfree(originator);
- sock = x11_connect_display();
- if (sock < 0)
- return NULL;
- c = channel_new("x11",
- SSH_CHANNEL_X11_OPEN, sock, sock, -1,
- CHAN_TCP_WINDOW_DEFAULT, CHAN_X11_PACKET_DEFAULT, 0,
- xstrdup("x11"), 1);
- c->force_drain = 1;
- return c;
-}
-
-static Channel *
-client_request_agent(const char *request_type, int rchan)
-{
- Channel *c = NULL;
- int sock;
-
- if (!options.forward_agent) {
- error("Warning: ssh server tried agent forwarding.");
- error("Warning: this is probably a break in attempt by a malicious server.");
- return NULL;
- }
- sock = ssh_get_authentication_socket();
- if (sock < 0)
- return NULL;
- c = channel_new("authentication agent connection",
- SSH_CHANNEL_OPEN, sock, sock, -1,
- CHAN_X11_WINDOW_DEFAULT, CHAN_TCP_WINDOW_DEFAULT, 0,
- xstrdup("authentication agent connection"), 1);
- c->force_drain = 1;
- return c;
-}
-
-/* XXXX move to generic input handler */
-static void
-client_input_channel_open(int type, u_int32_t seq, void *ctxt)
-{
- Channel *c = NULL;
- char *ctype;
- int rchan;
- u_int rmaxpack, rwindow, len;
-
- ctype = packet_get_string(&len);
- rchan = packet_get_int();
- rwindow = packet_get_int();
- rmaxpack = packet_get_int();
-
- debug("client_input_channel_open: ctype %s rchan %d win %d max %d",
- ctype, rchan, rwindow, rmaxpack);
-
- if (strcmp(ctype, "forwarded-tcpip") == 0) {
- c = client_request_forwarded_tcpip(ctype, rchan);
- } else if (strcmp(ctype, "x11") == 0) {
- c = client_request_x11(ctype, rchan);
- } else if (strcmp(ctype, "auth-agent@openssh.com") == 0) {
- c = client_request_agent(ctype, rchan);
- }
-/* XXX duplicate : */
- if (c != NULL) {
- debug("confirm %s", ctype);
- c->remote_id = rchan;
- c->remote_window = rwindow;
- c->remote_maxpacket = rmaxpack;
- if (c->type != SSH_CHANNEL_CONNECTING) {
- packet_start(SSH2_MSG_CHANNEL_OPEN_CONFIRMATION);
- packet_put_int(c->remote_id);
- packet_put_int(c->self);
- packet_put_int(c->local_window);
- packet_put_int(c->local_maxpacket);
- packet_send();
- }
- } else {
- debug("failure %s", ctype);
- packet_start(SSH2_MSG_CHANNEL_OPEN_FAILURE);
- packet_put_int(rchan);
- packet_put_int(SSH2_OPEN_ADMINISTRATIVELY_PROHIBITED);
- if (!(datafellows & SSH_BUG_OPENFAILURE)) {
- packet_put_utf8_cstring("open failed");
- packet_put_cstring("");
- }
- packet_send();
- }
- xfree(ctype);
-}
-
-static void
-client_input_channel_req(int type, u_int32_t seq, void *ctxt)
-{
- Channel *c = NULL;
- int id, reply, success = 0;
- char *rtype;
-
- id = packet_get_int();
- rtype = packet_get_string(NULL);
- reply = packet_get_char();
-
- debug("client_input_channel_req: channel %d rtype %s reply %d",
- id, rtype, reply);
-
- if (session_ident == -1) {
- error("client_input_channel_req: no channel %d", session_ident);
- } else if (id != session_ident) {
- error("client_input_channel_req: channel %d: wrong channel: %d",
- session_ident, id);
- }
- c = channel_lookup(id);
- if (c == NULL) {
- error("client_input_channel_req: channel %d: unknown channel", id);
- } else if (strcmp(rtype, "eow@openssh.com") == 0) {
- packet_check_eom();
- chan_rcvd_eow(c);
- } else if (strcmp(rtype, "exit-status") == 0) {
- success = 1;
- exit_status = packet_get_int();
- packet_check_eom();
- }
- if (reply) {
- packet_start(success ?
- SSH2_MSG_CHANNEL_SUCCESS : SSH2_MSG_CHANNEL_FAILURE);
- packet_put_int(c->remote_id);
- packet_send();
- }
- xfree(rtype);
-}
-
-static void
-client_input_global_request(int type, u_int32_t seq, void *ctxt)
-{
- char *rtype;
- int want_reply;
- int success = 0;
-
- rtype = packet_get_string(NULL);
- want_reply = packet_get_char();
- debug("client_input_global_request: rtype %s want_reply %d", rtype, want_reply);
- if (want_reply) {
- packet_start(success ?
- SSH2_MSG_REQUEST_SUCCESS : SSH2_MSG_REQUEST_FAILURE);
- packet_send();
- packet_write_wait();
- }
- xfree(rtype);
-}
-
-static void
-client_init_dispatch_20(void)
-{
- dispatch_init(&dispatch_protocol_error);
-
- dispatch_set(SSH2_MSG_CHANNEL_CLOSE, &channel_input_oclose);
- dispatch_set(SSH2_MSG_CHANNEL_DATA, &channel_input_data);
- dispatch_set(SSH2_MSG_CHANNEL_EOF, &channel_input_ieof);
- dispatch_set(SSH2_MSG_CHANNEL_EXTENDED_DATA, &channel_input_extended_data);
- dispatch_set(SSH2_MSG_CHANNEL_OPEN, &client_input_channel_open);
- dispatch_set(SSH2_MSG_CHANNEL_OPEN_CONFIRMATION, &channel_input_open_confirmation);
- dispatch_set(SSH2_MSG_CHANNEL_OPEN_FAILURE, &channel_input_open_failure);
- dispatch_set(SSH2_MSG_CHANNEL_REQUEST, &client_input_channel_req);
- dispatch_set(SSH2_MSG_CHANNEL_WINDOW_ADJUST, &channel_input_window_adjust);
- dispatch_set(SSH2_MSG_GLOBAL_REQUEST, &client_input_global_request);
-
- /* rekeying */
- dispatch_set(SSH2_MSG_KEXINIT, &kex_input_kexinit);
-
- /* global request reply messages */
- dispatch_set(SSH2_MSG_REQUEST_FAILURE, &client_global_request_reply);
- dispatch_set(SSH2_MSG_REQUEST_SUCCESS, &client_global_request_reply);
-}
-
-static void
-client_init_dispatch_13(void)
-{
- dispatch_init(NULL);
- dispatch_set(SSH_MSG_CHANNEL_CLOSE, &channel_input_close);
- dispatch_set(SSH_MSG_CHANNEL_CLOSE_CONFIRMATION, &channel_input_close_confirmation);
- dispatch_set(SSH_MSG_CHANNEL_DATA, &channel_input_data);
- dispatch_set(SSH_MSG_CHANNEL_OPEN_CONFIRMATION, &channel_input_open_confirmation);
- dispatch_set(SSH_MSG_CHANNEL_OPEN_FAILURE, &channel_input_open_failure);
- dispatch_set(SSH_MSG_PORT_OPEN, &channel_input_port_open);
- dispatch_set(SSH_SMSG_EXITSTATUS, &client_input_exit_status);
- dispatch_set(SSH_SMSG_STDERR_DATA, &client_input_stderr_data);
- dispatch_set(SSH_SMSG_STDOUT_DATA, &client_input_stdout_data);
-
- dispatch_set(SSH_SMSG_AGENT_OPEN, options.forward_agent ?
- &auth_input_open_request : &deny_input_open);
- dispatch_set(SSH_SMSG_X11_OPEN, options.forward_x11 ?
- &x11_input_open : &deny_input_open);
-}
-
-static void
-client_init_dispatch_15(void)
-{
- client_init_dispatch_13();
- dispatch_set(SSH_MSG_CHANNEL_CLOSE, &channel_input_ieof);
- dispatch_set(SSH_MSG_CHANNEL_CLOSE_CONFIRMATION, & channel_input_oclose);
-}
-
-static void
-client_init_dispatch(void)
-{
- if (compat20)
- client_init_dispatch_20();
- else if (compat13)
- client_init_dispatch_13();
- else
- client_init_dispatch_15();
-}
diff --git a/usr/src/cmd/ssh/ssh/gss-clnt.c b/usr/src/cmd/ssh/ssh/gss-clnt.c
deleted file mode 100644
index 05cd358217..0000000000
--- a/usr/src/cmd/ssh/ssh/gss-clnt.c
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Copyright (c) 2001-2003 Simon Wilkinson. All rights reserved. *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR `AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-
-#ifdef GSSAPI
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "ssh.h"
-#include "ssh2.h"
-#include "xmalloc.h"
-#include "buffer.h"
-#include "bufaux.h"
-#include "packet.h"
-#include "compat.h"
-#include <openssl/evp.h>
-#include "cipher.h"
-#include "kex.h"
-#include "log.h"
-#include "compat.h"
-
-#include <netdb.h>
-
-#include "ssh-gss.h"
-
-void
-ssh_gssapi_client_kex_hook(Kex *kex, char **proposal)
-{
- gss_OID_set mechs = GSS_C_NULL_OID_SET;
-
- if (kex == NULL || kex->serverhost == NULL)
- fatal("INTERNAL ERROR (%s)", __func__);
-
- ssh_gssapi_client_mechs(kex->serverhost, &mechs);
- ssh_gssapi_modify_kex(kex, mechs, proposal);
-}
-
-void
-ssh_gssapi_client_mechs(const char *server_host, gss_OID_set *mechs)
-{
- gss_OID_set indicated = GSS_C_NULL_OID_SET;
- gss_OID_set acquired, supported;
- gss_OID mech;
- gss_cred_id_t creds;
- Gssctxt *ctxt = NULL;
- gss_buffer_desc tok;
- OM_uint32 maj, min;
- int i;
- char *errmsg;
-
- if (!mechs)
- return;
- *mechs = GSS_C_NULL_OID_SET;
-
- maj = gss_indicate_mechs(&min, &indicated);
- if (GSS_ERROR(maj)) {
- debug("No GSS-API mechanisms are installed");
- return;
- }
-
- maj = gss_create_empty_oid_set(&min, &supported);
- if (GSS_ERROR(maj)) {
- errmsg = ssh_gssapi_last_error(NULL, &maj, &min);
- debug("Failed to allocate resources (%s) for GSS-API", errmsg);
- xfree(errmsg);
- (void) gss_release_oid_set(&min, &indicated);
- return;
- }
- maj = gss_acquire_cred(&min, GSS_C_NO_NAME, 0, indicated,
- GSS_C_INITIATE, &creds, &acquired, NULL);
-
- if (GSS_ERROR(maj)) {
- errmsg = ssh_gssapi_last_error(NULL, &maj, &min);
- debug("Failed to acquire GSS-API credentials for any "
- "mechanisms (%s)", errmsg);
- xfree(errmsg);
- (void) gss_release_oid_set(&min, &indicated);
- (void) gss_release_oid_set(&min, &supported);
- return;
- }
- (void) gss_release_cred(&min, &creds);
-
- for (i = 0; i < acquired->count; i++) {
- mech = &acquired->elements[i];
-
- if (ssh_gssapi_is_spnego(mech))
- continue;
-
- ssh_gssapi_build_ctx(&ctxt, 1, mech);
- if (!ctxt)
- continue;
-
- /*
- * This is useful for mechs like Kerberos, which can
- * detect unknown target princs here, but not for
- * mechs like SPKM, which cannot detect unknown princs
- * until context tokens are actually exchanged.
- *
- * 'Twould be useful to have a test that could save us
- * the bother of trying this for SPKM and the such...
- */
- maj = ssh_gssapi_init_ctx(ctxt, server_host, 0, NULL, &tok);
- if (GSS_ERROR(maj)) {
- errmsg = ssh_gssapi_last_error(ctxt, NULL, NULL);
- debug("Skipping GSS-API mechanism %s (%s)",
- ssh_gssapi_oid_to_name(mech), errmsg);
- xfree(errmsg);
- continue;
- }
-
- (void) gss_release_buffer(&min, &tok);
-
- maj = gss_add_oid_set_member(&min, mech, &supported);
- if (GSS_ERROR(maj)) {
- errmsg = ssh_gssapi_last_error(NULL, &maj, &min);
- debug("Failed to allocate resources (%s) for GSS-API",
- errmsg);
- xfree(errmsg);
- }
- }
-
- *mechs = supported;
-}
-
-
-/*
- * Wrapper to init_sec_context. Requires that the context contains:
- *
- * oid
- * server name (from ssh_gssapi_import_name)
- */
-OM_uint32
-ssh_gssapi_init_ctx(Gssctxt *ctx, const char *server_host, int deleg_creds,
- gss_buffer_t recv_tok, gss_buffer_t send_tok)
-{
- int flags = GSS_C_MUTUAL_FLAG | GSS_C_INTEG_FLAG;
-
- debug("%s(%p, %s, %d, %p, %p)", __func__, ctx, server_host,
- deleg_creds, recv_tok, send_tok);
-
- if (deleg_creds) {
- flags |= GSS_C_DELEG_FLAG;
- debug("Delegating GSS-API credentials");
- }
-
- /* Build target principal */
- if (ctx->desired_name == GSS_C_NO_NAME &&
- !ssh_gssapi_import_name(ctx, server_host)) {
- return (ctx->major);
- }
-
- ctx->major = gss_init_sec_context(&ctx->minor, GSS_C_NO_CREDENTIAL,
- &ctx->context, ctx->desired_name, ctx->desired_mech, flags,
- 0, /* default lifetime */
- NULL, /* no channel bindings */
- recv_tok,
- NULL, /* actual mech type */
- send_tok, &ctx->flags,
- NULL); /* actual lifetime */
-
- if (GSS_ERROR(ctx->major))
- ssh_gssapi_error(ctx, "calling GSS_Init_sec_context()");
-
- return (ctx->major);
-}
-#endif /* GSSAPI */
diff --git a/usr/src/cmd/ssh/ssh/ssh.c b/usr/src/cmd/ssh/ssh/ssh.c
deleted file mode 100644
index d122875470..0000000000
--- a/usr/src/cmd/ssh/ssh/ssh.c
+++ /dev/null
@@ -1,1245 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Ssh client program. This program can be used to log into a remote machine.
- * The software supports strong authentication, encryption, and forwarding
- * of X11, TCP/IP, and authentication connections.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- *
- * Copyright (c) 1999 Niels Provos. All rights reserved.
- * Copyright (c) 2000, 2001, 2002 Markus Friedl. All rights reserved.
- *
- * Modified to work with SSL by Niels Provos <provos@citi.umich.edu>
- * in Canada (German citizen).
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: ssh.c,v 1.186 2002/09/19 01:58:18 djm Exp $");
-
-#include <openssl/err.h>
-
-#include "ssh.h"
-#include "ssh1.h"
-#include "ssh2.h"
-#include "compat.h"
-#include "cipher.h"
-#include "xmalloc.h"
-#include "packet.h"
-#include "buffer.h"
-#include "channels.h"
-#include "key.h"
-#include "authfd.h"
-#include "authfile.h"
-#include "pathnames.h"
-#include "clientloop.h"
-#include "log.h"
-#include "readconf.h"
-#include "sshconnect.h"
-#include "tildexpand.h"
-#include "dispatch.h"
-#include "misc.h"
-#include "kex.h"
-#include "mac.h"
-#include "sshtty.h"
-
-#include "g11n.h"
-
-#ifdef HAVE___PROGNAME
-extern char *__progname;
-#else
-char *__progname;
-#endif
-
-/* Flag indicating whether IPv4 or IPv6. This can be set on the command line.
- Default value is AF_UNSPEC means both IPv4 and IPv6. */
-#ifdef IPV4_DEFAULT
-int IPv4or6 = AF_INET;
-#else
-int IPv4or6 = AF_UNSPEC;
-#endif
-
-/* Flag indicating whether debug mode is on. This can be set on the command line. */
-int debug_flag = 0;
-
-/* Flag indicating whether a tty should be allocated */
-int tty_flag = 0;
-int no_tty_flag = 0;
-int force_tty_flag = 0;
-
-/* don't exec a shell */
-int no_shell_flag = 0;
-
-/*
- * Flag indicating that nothing should be read from stdin. This can be set
- * on the command line.
- */
-int stdin_null_flag = 0;
-
-/*
- * Flag indicating that ssh should fork after authentication. This is useful
- * so that the passphrase can be entered manually, and then ssh goes to the
- * background.
- */
-int fork_after_authentication_flag = 0;
-
-/*
- * General data structure for command line options and options configurable
- * in configuration files. See readconf.h.
- */
-Options options;
-
-/* optional user configfile */
-char *config = NULL;
-
-/*
- * Name of the host we are connecting to. This is the name given on the
- * command line, or the HostName specified for the user-supplied name in a
- * configuration file.
- */
-char *host;
-
-/* socket address the host resolves to */
-struct sockaddr_storage hostaddr;
-
-/* Private host keys. */
-Sensitive sensitive_data;
-
-/* Original real UID. */
-uid_t original_real_uid;
-uid_t original_effective_uid;
-
-/* command to be executed */
-Buffer command;
-
-/* Should we execute a command or invoke a subsystem? */
-int subsystem_flag = 0;
-
-/* # of replies received for global requests */
-static int client_global_request_id = 0;
-
-/* pid of proxycommand child process */
-pid_t proxy_command_pid = 0;
-
-/* Prints a help message to the user. This function never returns. */
-
-static void
-usage(void)
-{
- fprintf(stderr,
- gettext("Usage: %s [options] host [command]\n"
- "Options:\n"
- " -l user Log in using this user name.\n"
- " -n Redirect input from /dev/null.\n"
- " -F config Config file (default: ~/%s).\n"
- " -A Enable authentication agent forwarding.\n"
- " -a Disable authentication agent forwarding "
- "(default).\n"
-#ifdef AFS
- " -k Disable Kerberos ticket and AFS token "
- "forwarding.\n"
-#endif /* AFS */
- " -X Enable X11 connection forwarding.\n"
- " -x Disable X11 connection forwarding (default).\n"
- " -i file Identity for public key authentication "
- "(default: ~/.ssh/identity)\n"
- " -t Tty; allocate a tty even if command is given.\n"
- " -T Do not allocate a tty.\n"
- " -v Verbose; display verbose debugging messages.\n"
- " Multiple -v increases verbosity.\n"
- " -V Display version number only.\n"
- " -q Quiet; don't display any warning messages.\n"
- " -f Fork into background after authentication.\n"
- " -e char Set escape character; ``none'' = disable "
- "(default: ~).\n"
- " -c cipher Select encryption algorithm\n"
- " -m macs Specify MAC algorithms for protocol version 2.\n"
- " -p port Connect to this port. Server must be "
- "on the same port.\n"
- " -L listen-port:host:port Forward local port to "
- "remote address\n"
- " -R listen-port:host:port Forward remote port to "
- "local address\n"
- " These cause %s to listen for connections "
- "on a port, and\n"
- " forward them to the other side by "
- "connecting to host:port.\n"
- " -D port Enable dynamic application-level "
- "port forwarding.\n"
- " -C Enable compression.\n"
- " -N Do not execute a shell or command.\n"
- " -g Allow remote hosts to connect to forwarded "
- "ports.\n"
- " -1 Force protocol version 1.\n"
- " -2 Force protocol version 2.\n"
- " -4 Use IPv4 only.\n"
- " -6 Use IPv6 only.\n"
- " -o 'option' Process the option as if it was read "
- "from a configuration file.\n"
- " -s Invoke command (mandatory) as SSH2 subsystem.\n"
- " -b addr Local IP address.\n"),
- __progname, _PATH_SSH_USER_CONFFILE, __progname);
- exit(1);
-}
-
-static int ssh_session(void);
-static int ssh_session2(void);
-static void load_public_identity_files(void);
-static void rsh_connect(char *host, char *user, Buffer * command);
-
-/*
- * Main program for the ssh client.
- */
-int
-main(int ac, char **av)
-{
- int i, opt, exit_status;
- char *p, *cp, buf[256], *pw_name, *pw_dir;
- struct stat st;
- struct passwd *pw;
- int dummy;
- extern int optind, optreset;
- extern char *optarg;
- Forward fwd;
-
- __progname = get_progname(av[0]);
-
- (void) g11n_setlocale(LC_ALL, "");
-
- init_rng();
-
- /*
- * Save the original real uid. It will be needed later (uid-swapping
- * may clobber the real uid).
- */
- original_real_uid = getuid();
- original_effective_uid = geteuid();
-
- /*
- * Use uid-swapping to give up root privileges for the duration of
- * option processing. We will re-instantiate the rights when we are
- * ready to create the privileged port, and will permanently drop
- * them when the port has been created (actually, when the connection
- * has been made, as we may need to create the port several times).
- */
- PRIV_END;
-
-#ifdef HAVE_SETRLIMIT
- /* If we are installed setuid root be careful to not drop core. */
- if (original_real_uid != original_effective_uid) {
- struct rlimit rlim;
- rlim.rlim_cur = rlim.rlim_max = 0;
- if (setrlimit(RLIMIT_CORE, &rlim) < 0)
- fatal("setrlimit failed: %.100s", strerror(errno));
- }
-#endif
- /*
- * Get user data. It may happen that NIS or LDAP connection breaks down
- * during the user's session. We should try to do our best and use the
- * HOME and LOGNAME variables. Remember that the SSH client might be the
- * only tool available to fix the problem with the naming services.
- */
- pw = getpwuid(original_real_uid);
- if (pw == NULL) {
- if ((pw_dir = getenv("HOME")) == NULL) {
- log("User account's password entry not found and HOME "
- "not set. Set it manually and try again. "
- "Exiting.");
- exit(1);
- }
- log("User account's password entry not found, using "
- "the HOME variable.");
-
- if ((pw_name = getenv("LOGNAME")) == NULL) {
- log("Need a local user name but LOGNAME is not set. "
- "Set it manually and try again. Exiting.");
- exit(1);
- }
- log("Local user name '%s' set from the LOGNAME variable.",
- pw_name);
-
- pw_dir = xstrdup(pw_dir);
- pw_name = xstrdup(pw_name);
- } else {
- pw_name = xstrdup(pw->pw_name);
- pw_dir = xstrdup(pw->pw_dir);
- }
-
- /*
- * Set our umask to something reasonable, as some files are created
- * with the default umask. This will make them world-readable but
- * writable only by the owner, which is ok for all files for which we
- * don't set the modes explicitly.
- */
- umask(022);
-
- /* Initialize option structure to indicate that no values have been set. */
- initialize_options(&options);
-
- /* Parse command-line arguments. */
- host = NULL;
-
-again:
- while ((opt = getopt(ac, av,
- "1246ab:c:e:fgi:kl:m:no:p:qstvxACD:F:I:L:NPR:TVX")) != -1) {
- switch (opt) {
- case '1':
- options.protocol = SSH_PROTO_1;
- break;
- case '2':
- options.protocol = SSH_PROTO_2;
- break;
- case '4':
- IPv4or6 = AF_INET;
- break;
- case '6':
- IPv4or6 = AF_INET6;
- break;
- case 'n':
- stdin_null_flag = 1;
- break;
- case 'f':
- fork_after_authentication_flag = 1;
- stdin_null_flag = 1;
- break;
- case 'x':
- options.forward_x11 = 0;
- break;
- case 'X':
- options.forward_x11 = 1;
- break;
- case 'g':
- options.gateway_ports = 1;
- break;
- case 'P': /* deprecated */
- fprintf(stderr, gettext("Warning: Option -P has "
- "been deprecated\n"));
- options.use_privileged_port = 0;
- break;
- case 'a':
- options.forward_agent = 0;
- break;
- case 'A':
- options.forward_agent = 1;
- break;
-#ifdef AFS
- case 'k':
- options.kerberos_tgt_passing = 0;
- options.afs_token_passing = 0;
- break;
-#endif
- case 'i':
- if (stat(optarg, &st) < 0) {
- fprintf(stderr,
- gettext("Warning: Identity file %s "
- "does not exist.\n"), optarg);
- break;
- }
- if (options.num_identity_files >=
- SSH_MAX_IDENTITY_FILES)
- fatal("Too many identity files specified "
- "(max %d)", SSH_MAX_IDENTITY_FILES);
- options.identity_files[options.num_identity_files++] =
- xstrdup(optarg);
- break;
- case 'I':
- fprintf(stderr, "no support for smartcards.\n");
- break;
- case 't':
- if (tty_flag)
- force_tty_flag = 1;
- tty_flag = 1;
- break;
- case 'v':
- if (0 == debug_flag) {
- debug_flag = 1;
- options.log_level = SYSLOG_LEVEL_DEBUG1;
- } else if (options.log_level < SYSLOG_LEVEL_DEBUG3) {
- options.log_level++;
- break;
- } else
- fatal("Too high debugging level.");
- /* FALLTHROUGH */
- case 'V':
- fprintf(stderr,
- gettext("%s, SSH protocols %d.%d/%d.%d, "
- "OpenSSL 0x%8.8lx\n"),
- SSH_VERSION,
- PROTOCOL_MAJOR_1, PROTOCOL_MINOR_1,
- PROTOCOL_MAJOR_2, PROTOCOL_MINOR_2,
- SSLeay());
- if (opt == 'V')
- exit(0);
- break;
- case 'q':
- options.log_level = SYSLOG_LEVEL_QUIET;
- break;
- case 'e':
- if (optarg[0] == '^' && optarg[2] == 0 &&
- (u_char) optarg[1] >= 64 &&
- (u_char) optarg[1] < 128)
- options.escape_char = (u_char) optarg[1] & 31;
- else if (strlen(optarg) == 1)
- options.escape_char = (u_char) optarg[0];
- else if (strcmp(optarg, "none") == 0)
- options.escape_char = SSH_ESCAPECHAR_NONE;
- else {
- fprintf(stderr,
- gettext("Bad escape character '%s'.\n"),
- optarg);
- exit(1);
- }
- break;
- case 'c':
- if (ciphers_valid(optarg)) {
- /* SSH2 only */
- options.ciphers = xstrdup(optarg);
- options.cipher = SSH_CIPHER_ILLEGAL;
- } else {
- /* SSH1 only */
- options.cipher = cipher_number(optarg);
- if (options.cipher == -1) {
- fprintf(stderr,
- gettext("Unknown cipher "
- "type '%s'\n"),
- optarg);
- exit(1);
- }
- if (options.cipher == SSH_CIPHER_3DES)
- options.ciphers = "3des-cbc";
- else if (options.cipher == SSH_CIPHER_BLOWFISH)
- options.ciphers = "blowfish-cbc";
- else
- options.ciphers = (char *)-1;
- }
- break;
- case 'm':
- if (mac_valid(optarg))
- options.macs = xstrdup(optarg);
- else {
- fprintf(stderr,
- gettext("Unknown mac type '%s'\n"),
- optarg);
- exit(1);
- }
- break;
- case 'p':
- options.port = a2port(optarg);
- if (options.port == 0) {
- fprintf(stderr, gettext("Bad port '%s'\n"),
- optarg);
- exit(1);
- }
- break;
- case 'l':
- options.user = optarg;
- break;
-
- case 'L':
- if (parse_forward(1, &fwd, optarg))
- add_local_forward(&options, &fwd);
- else
- fatal("Bad local forwarding specification "
- "'%s'\n", optarg);
- break;
-
- case 'R':
- if (parse_forward(1, &fwd, optarg))
- add_remote_forward(&options, &fwd);
- else
- fatal("Bad remote forwarding specification "
- "'%s'\n", optarg);
- break;
-
- case 'D':
- if (parse_forward(0, &fwd, optarg) == 0)
- fatal("Bad dynamic forwarding specification "
- "'%s'\n", optarg);
- fwd.connect_host = "socks";
- add_local_forward(&options, &fwd);
- break;
-
- case 'C':
- options.compression = 1;
- break;
- case 'N':
- no_shell_flag = 1;
- no_tty_flag = 1;
- break;
- case 'T':
- no_tty_flag = 1;
- break;
- case 'o':
- dummy = 1;
- if (process_config_line(&options, host ? host : "",
- optarg, "command-line", 0, &dummy) != 0)
- exit(1);
- break;
- case 's':
- subsystem_flag = 1;
- break;
- case 'b':
- options.bind_address = optarg;
- break;
- case 'F':
- config = optarg;
- break;
- default:
- usage();
- }
- }
-
- ac -= optind;
- av += optind;
-
- if (ac > 0 && !host && **av != '-') {
- if (strchr(*av, '@')) {
- p = xstrdup(*av);
- cp = strchr(p, '@');
- if (cp == NULL || cp == p)
- usage();
- options.user = p;
- *cp = '\0';
- host = ++cp;
- } else
- host = *av;
- ac--, av++;
- if (ac > 0) {
- optind = 0;
- optreset = 1;
- goto again;
- }
- }
-
- /* Check that we got a host name. */
- if (!host)
- usage();
-
- SSLeay_add_all_algorithms();
- ERR_load_crypto_strings();
- channel_set_af(IPv4or6);
-
- /* Initialize the command to execute on remote host. */
- buffer_init(&command);
-
- /*
- * Save the command to execute on the remote host in a buffer. There
- * is no limit on the length of the command, except by the maximum
- * packet size. Also sets the tty flag if there is no command.
- */
- if (!ac) {
- /* No command specified - execute shell on a tty. */
- tty_flag = 1;
- if (subsystem_flag) {
- fprintf(stderr,
- gettext("You must specify a subsystem "
- "to invoke.\n"));
- usage();
- }
- } else {
- /* A command has been specified. Store it into the buffer. */
- for (i = 0; i < ac; i++) {
- if (i)
- buffer_append(&command, " ", 1);
- buffer_append(&command, av[i], strlen(av[i]));
- }
- }
-
- /* Cannot fork to background if no command. */
- if (fork_after_authentication_flag && buffer_len(&command) == 0 && !no_shell_flag)
- fatal("Cannot fork into background without a command to execute.");
-
- /* Allocate a tty by default if no command specified. */
- if (buffer_len(&command) == 0)
- tty_flag = 1;
-
- /* Force no tty */
- if (no_tty_flag)
- tty_flag = 0;
- /* Do not allocate a tty if stdin is not a tty. */
- if (!isatty(fileno(stdin)) && !force_tty_flag) {
- if (tty_flag)
- log("Pseudo-terminal will not be allocated because stdin is not a terminal.");
- tty_flag = 0;
- }
-
- /*
- * Initialize "log" output. Since we are the client all output
- * actually goes to stderr.
- */
- log_init(av[0], options.log_level == -1 ? SYSLOG_LEVEL_INFO : options.log_level,
- SYSLOG_FACILITY_USER, 1);
-
- /*
- * Read per-user configuration file. Ignore the system wide config
- * file if the user specifies a config file on the command line.
- */
- if (config != NULL) {
- if (!read_config_file(config, host, &options))
- fatal("Can't open user config file %.100s: "
- "%.100s", config, strerror(errno));
- } else {
- snprintf(buf, sizeof buf, "%.100s/%.100s", pw_dir,
- _PATH_SSH_USER_CONFFILE);
- (void)read_config_file(buf, host, &options);
-
- /* Read systemwide configuration file after use config. */
- (void)read_config_file(_PATH_HOST_CONFIG_FILE, host, &options);
- }
-
- process_unknown_options(&options);
-
- /* Fill configuration defaults. */
- fill_default_options(&options);
-
- /* reinit */
- log_init(av[0], options.log_level, SYSLOG_FACILITY_USER, 1);
-
- seed_rng();
-
- if (options.user == NULL)
- options.user = xstrdup(pw_name);
-
- if (options.hostname != NULL)
- host = options.hostname;
-
- /* Disable rhosts authentication if not running as root. */
-#ifdef HAVE_CYGWIN
- /* Ignore uid if running under Windows */
- if (!options.use_privileged_port) {
-#else
- if (original_effective_uid != 0 || !options.use_privileged_port) {
-#endif
- debug("Rhosts Authentication disabled, "
- "originating port will not be trusted.");
- options.rhosts_authentication = 0;
- }
- /* Open a connection to the remote host. */
-
- /* XXX OpenSSH has deprecated UseRsh */
- if (options.use_rsh) {
- seteuid(original_real_uid);
- setuid(original_real_uid);
- rsh_connect(host, options.user, &command);
- fatal("rsh_connect returned");
- }
-
- if (ssh_connect(host, &hostaddr, options.port, IPv4or6,
- options.connection_attempts,
-#ifdef HAVE_CYGWIN
- options.use_privileged_port,
-#else
- original_effective_uid == 0 && options.use_privileged_port,
-#endif
- options.proxy_command) != 0) {
- /* XXX OpenSSH has deprecated FallbackToRsh */
- if (options.fallback_to_rsh) {
- seteuid(original_real_uid);
- setuid(original_real_uid);
- rsh_connect(host, options.user, &command);
- fatal("rsh_connect returned");
- }
- exit(1);
- }
-
- /*
- * If we successfully made the connection, load the host private key
- * in case we will need it later for combined rsa-rhosts
- * authentication. This must be done before releasing extra
- * privileges, because the file is only readable by root.
- * If we cannot access the private keys, load the public keys
- * instead and try to execute the ssh-keysign helper instead.
- */
- sensitive_data.nkeys = 0;
- sensitive_data.keys = NULL;
- sensitive_data.external_keysign = 0;
- if (options.rhosts_rsa_authentication ||
- options.hostbased_authentication) {
- sensitive_data.nkeys = 3;
- sensitive_data.keys = xmalloc(sensitive_data.nkeys *
- sizeof(Key));
-
- PRIV_START;
- sensitive_data.keys[0] = key_load_private_type(KEY_RSA1,
- _PATH_HOST_KEY_FILE, "", NULL);
- sensitive_data.keys[1] = key_load_private_type(KEY_DSA,
- _PATH_HOST_DSA_KEY_FILE, "", NULL);
- sensitive_data.keys[2] = key_load_private_type(KEY_RSA,
- _PATH_HOST_RSA_KEY_FILE, "", NULL);
- PRIV_END;
-
- if (options.hostbased_authentication == 1 &&
- sensitive_data.keys[0] == NULL &&
- sensitive_data.keys[1] == NULL &&
- sensitive_data.keys[2] == NULL) {
- sensitive_data.keys[1] = key_load_public(
- _PATH_HOST_DSA_KEY_FILE, NULL);
- sensitive_data.keys[2] = key_load_public(
- _PATH_HOST_RSA_KEY_FILE, NULL);
- sensitive_data.external_keysign = 1;
- }
- }
- /*
- * Get rid of any extra privileges that we may have. We will no
- * longer need them. Also, extra privileges could make it very hard
- * to read identity files and other non-world-readable files from the
- * user's home directory if it happens to be on a NFS volume where
- * root is mapped to nobody.
- */
- seteuid(original_real_uid);
- setuid(original_real_uid);
-
- /*
- * Now that we are back to our own permissions, create ~/.ssh
- * directory if it doesn\'t already exist.
- */
- snprintf(buf, sizeof buf, "%.100s%s%.100s", pw_dir,
- strcmp(pw_dir, "/") ? "/" : "", _PATH_SSH_USER_DIR);
- xfree(pw_dir);
- if (stat(buf, &st) < 0)
- if (mkdir(buf, 0700) < 0)
- error("Could not create directory '%.200s'.", buf);
-
- /* load options.identity_files */
- load_public_identity_files();
-
- /* Expand ~ in known host file names. */
- /* XXX mem-leaks: */
- options.system_hostfile =
- tilde_expand_filename(options.system_hostfile, original_real_uid);
- options.user_hostfile =
- tilde_expand_filename(options.user_hostfile, original_real_uid);
- options.system_hostfile2 =
- tilde_expand_filename(options.system_hostfile2, original_real_uid);
- options.user_hostfile2 =
- tilde_expand_filename(options.user_hostfile2, original_real_uid);
-
- signal(SIGPIPE, SIG_IGN); /* ignore SIGPIPE early */
-
- /* Log into the remote system. This never returns if the login fails. */
- ssh_login(&sensitive_data, host, (struct sockaddr *)&hostaddr, pw_name);
- xfree(pw_name);
-
- /* We no longer need the private host keys. Clear them now. */
- if (sensitive_data.nkeys != 0) {
- for (i = 0; i < sensitive_data.nkeys; i++) {
- if (sensitive_data.keys[i] != NULL) {
- /* Destroys contents safely */
- debug3("clear hostkey %d", i);
- key_free(sensitive_data.keys[i]);
- sensitive_data.keys[i] = NULL;
- }
- }
- xfree(sensitive_data.keys);
- }
- for (i = 0; i < options.num_identity_files; i++) {
- if (options.identity_files[i]) {
- xfree(options.identity_files[i]);
- options.identity_files[i] = NULL;
- }
- if (options.identity_keys[i]) {
- key_free(options.identity_keys[i]);
- options.identity_keys[i] = NULL;
- }
- }
-
- exit_status = compat20 ? ssh_session2() : ssh_session();
- packet_close();
-
- /*
- * Send SIGHUP to proxy command if used. We don't wait() in
- * case it hangs and instead rely on init to reap the child
- */
- if (proxy_command_pid > 1)
- kill(proxy_command_pid, SIGHUP);
-
- return exit_status;
-}
-
-static void
-ssh_init_forwarding(void)
-{
- int success = 0;
- int i;
-
- /* Initiate local TCP/IP port forwardings. */
- for (i = 0; i < options.num_local_forwards; i++) {
- debug("Local connections to %.200s:%d forwarded to remote "
- "address %.200s:%d",
- (options.local_forwards[i].listen_host == NULL) ?
- (options.gateway_ports ? "*" : "LOCALHOST") :
- options.local_forwards[i].listen_host,
- options.local_forwards[i].listen_port,
- options.local_forwards[i].connect_host,
- options.local_forwards[i].connect_port);
- success += channel_setup_local_fwd_listener(
- options.local_forwards[i].listen_host,
- options.local_forwards[i].listen_port,
- options.local_forwards[i].connect_host,
- options.local_forwards[i].connect_port,
- options.gateway_ports);
- }
- if (i > 0 && success == 0)
- error("Could not request local forwarding.");
-
- /* Initiate remote TCP/IP port forwardings. */
- for (i = 0; i < options.num_remote_forwards; i++) {
- debug("Remote connections from %.200s:%d forwarded to "
- "local address %.200s:%d",
- (options.remote_forwards[i].listen_host == NULL) ?
- "LOCALHOST" : options.remote_forwards[i].listen_host,
- options.remote_forwards[i].listen_port,
- options.remote_forwards[i].connect_host,
- options.remote_forwards[i].connect_port);
- if (channel_request_remote_forwarding(
- options.remote_forwards[i].listen_host,
- options.remote_forwards[i].listen_port,
- options.remote_forwards[i].connect_host,
- options.remote_forwards[i].connect_port) < 0) {
- log("Warning: Could not request remote forwarding.");
- }
- }
-}
-
-static void
-check_agent_present(void)
-{
- if (options.forward_agent) {
- /* Clear agent forwarding if we don't have an agent. */
- if (!ssh_agent_present())
- options.forward_agent = 0;
- }
-}
-
-static int
-ssh_session(void)
-{
- int type;
- int interactive = 0;
- int have_tty = 0;
- struct winsize ws;
- char *cp;
- const char *display;
-
- /* Enable compression if requested. */
- if (options.compression) {
- debug("Requesting compression at level %d.", options.compression_level);
-
- if (options.compression_level < 1 || options.compression_level > 9)
- fatal("Compression level must be from 1 (fast) to 9 (slow, best).");
-
- /* Send the request. */
- packet_start(SSH_CMSG_REQUEST_COMPRESSION);
- packet_put_int(options.compression_level);
- packet_send();
- packet_write_wait();
- type = packet_read();
- if (type == SSH_SMSG_SUCCESS)
- packet_start_compression(options.compression_level);
- else if (type == SSH_SMSG_FAILURE)
- log("Warning: Remote host refused compression.");
- else
- packet_disconnect("Protocol error waiting for compression response.");
- }
- /* Allocate a pseudo tty if appropriate. */
- if (tty_flag) {
- debug("Requesting pty.");
-
- /* Start the packet. */
- packet_start(SSH_CMSG_REQUEST_PTY);
-
- /* Store TERM in the packet. There is no limit on the
- length of the string. */
- cp = getenv("TERM");
- if (!cp)
- cp = "";
- packet_put_cstring(cp);
-
- /* Store window size in the packet. */
- if (ioctl(fileno(stdin), TIOCGWINSZ, &ws) < 0)
- memset(&ws, 0, sizeof(ws));
- packet_put_int(ws.ws_row);
- packet_put_int(ws.ws_col);
- packet_put_int(ws.ws_xpixel);
- packet_put_int(ws.ws_ypixel);
-
- /* Store tty modes in the packet. */
- tty_make_modes(fileno(stdin), NULL);
-
- /* Send the packet, and wait for it to leave. */
- packet_send();
- packet_write_wait();
-
- /* Read response from the server. */
- type = packet_read();
- if (type == SSH_SMSG_SUCCESS) {
- interactive = 1;
- have_tty = 1;
- } else if (type == SSH_SMSG_FAILURE)
- log("Warning: Remote host failed or refused to allocate a pseudo tty.");
- else
- packet_disconnect("Protocol error waiting for pty request response.");
- }
- /* Request X11 forwarding if enabled and DISPLAY is set. */
- display = getenv("DISPLAY");
- if (options.forward_x11 && display != NULL) {
- char *proto, *data;
- /* Get reasonable local authentication information. */
- client_x11_get_proto(display, options.xauth_location,
- options.forward_x11_trusted, &proto, &data);
- /* Request forwarding with authentication spoofing. */
- debug("Requesting X11 forwarding with authentication spoofing.");
- x11_request_forwarding_with_spoofing(0, display, proto, data);
-
- /* Read response from the server. */
- type = packet_read();
- if (type == SSH_SMSG_SUCCESS) {
- interactive = 1;
- } else if (type == SSH_SMSG_FAILURE) {
- log("Warning: Remote host denied X11 forwarding.");
- } else {
- packet_disconnect("Protocol error waiting for X11 forwarding");
- }
- }
- /* Tell the packet module whether this is an interactive session. */
- packet_set_interactive(interactive);
-
- /* Request authentication agent forwarding if appropriate. */
- check_agent_present();
-
- if (options.forward_agent) {
- debug("Requesting authentication agent forwarding.");
- auth_request_forwarding();
-
- /* Read response from the server. */
- type = packet_read();
- packet_check_eom();
- if (type != SSH_SMSG_SUCCESS)
- log("Warning: Remote host denied authentication agent forwarding.");
- }
-
- /* Initiate port forwardings. */
- ssh_init_forwarding();
-
- /* If requested, let ssh continue in the background. */
- if (fork_after_authentication_flag)
- if (daemon(1, 1) < 0)
- fatal("daemon() failed: %.200s", strerror(errno));
-
- /*
- * If a command was specified on the command line, execute the
- * command now. Otherwise request the server to start a shell.
- */
- if (buffer_len(&command) > 0) {
- int len = buffer_len(&command);
- if (len > 900)
- len = 900;
- debug("Sending command: %.*s", len, (u_char *)buffer_ptr(&command));
- packet_start(SSH_CMSG_EXEC_CMD);
- packet_put_string(buffer_ptr(&command), buffer_len(&command));
- packet_send();
- packet_write_wait();
- } else {
- debug("Requesting shell.");
- packet_start(SSH_CMSG_EXEC_SHELL);
- packet_send();
- packet_write_wait();
- }
-
- /* Enter the interactive session. */
- return client_loop(have_tty, tty_flag ?
- options.escape_char : SSH_ESCAPECHAR_NONE, 0);
-}
-
-static void
-client_subsystem_reply(int type, u_int32_t seq, void *ctxt)
-{
- int id, len;
-
- id = packet_get_int();
- len = buffer_len(&command);
- if (len > 900)
- len = 900;
- packet_check_eom();
- if (type == SSH2_MSG_CHANNEL_FAILURE)
- fatal("Request for subsystem '%.*s' failed on channel %d",
- len, (u_char *)buffer_ptr(&command), id);
-}
-
-void
-client_global_request_reply_fwd(int type, u_int32_t seq, void *ctxt)
-{
- int i;
-
- i = client_global_request_id++;
- if (i >= options.num_remote_forwards)
- return;
- debug("remote forward %s for: listen %d, connect %s:%d",
- type == SSH2_MSG_REQUEST_SUCCESS ? "success" : "failure",
- options.remote_forwards[i].listen_port,
- options.remote_forwards[i].connect_host,
- options.remote_forwards[i].connect_port);
- if (type == SSH2_MSG_REQUEST_FAILURE)
- log("Warning: remote port forwarding failed for listen port %d",
- options.remote_forwards[i].listen_port);
-}
-
-static
-void
-ssh_session2_setlocale(int id)
-{
- char *loc;
-
-#define remote_setlocale(envvar) \
- if ((loc = getenv(envvar)) != NULL) {\
- channel_request_start(id, "env", 0);\
- debug2("Sent request for environment variable %s=%s", envvar, \
- loc); \
- packet_put_cstring(envvar);\
- packet_put_cstring(loc);\
- packet_send();\
- }
-
- remote_setlocale("LANG")
-
- remote_setlocale("LC_CTYPE")
- remote_setlocale("LC_COLLATE")
- remote_setlocale("LC_TIME")
- remote_setlocale("LC_NUMERIC")
- remote_setlocale("LC_MONETARY")
- remote_setlocale("LC_MESSAGES")
-
- remote_setlocale("LC_ALL")
-}
-
-/* request pty/x11/agent/tcpfwd/shell for channel */
-static void
-ssh_session2_setup(int id, void *arg)
-{
- int len;
- const char *display;
- int interactive = 0;
- struct termios tio;
-
- debug("ssh_session2_setup: id %d", id);
-
- ssh_session2_setlocale(id);
-
- if (tty_flag) {
- struct winsize ws;
- char *cp;
- cp = getenv("TERM");
- if (!cp)
- cp = "";
- /* Store window size in the packet. */
- if (ioctl(fileno(stdin), TIOCGWINSZ, &ws) < 0)
- memset(&ws, 0, sizeof(ws));
-
- channel_request_start(id, "pty-req", 0);
- packet_put_cstring(cp);
- packet_put_int(ws.ws_col);
- packet_put_int(ws.ws_row);
- packet_put_int(ws.ws_xpixel);
- packet_put_int(ws.ws_ypixel);
- tio = get_saved_tio();
- tty_make_modes(/*ignored*/ 0, &tio);
- packet_send();
- interactive = 1;
- /* XXX wait for reply */
- }
-
- display = getenv("DISPLAY");
- if (options.forward_x11 && display != NULL) {
- char *proto, *data;
- /* Get reasonable local authentication information. */
- client_x11_get_proto(display, options.xauth_location,
- options.forward_x11_trusted, &proto, &data);
- /* Request forwarding with authentication spoofing. */
- debug("Requesting X11 forwarding with authentication spoofing.");
- x11_request_forwarding_with_spoofing(id, display, proto, data);
- interactive = 1;
- /* XXX wait for reply */
- }
-
- check_agent_present();
- if (options.forward_agent) {
- debug("Requesting authentication agent forwarding.");
- channel_request_start(id, "auth-agent-req@openssh.com", 0);
- packet_send();
- }
-
- len = buffer_len(&command);
- if (len > 0) {
- if (len > 900)
- len = 900;
- if (subsystem_flag) {
- debug("Sending subsystem: %.*s", len, (u_char *)buffer_ptr(&command));
- channel_request_start(id, "subsystem", /*want reply*/ 1);
- /* register callback for reply */
- /* XXX we assume that client_loop has already been called */
- dispatch_set(SSH2_MSG_CHANNEL_FAILURE, &client_subsystem_reply);
- dispatch_set(SSH2_MSG_CHANNEL_SUCCESS, &client_subsystem_reply);
- } else {
- debug("Sending command: %.*s", len, (u_char *)buffer_ptr(&command));
- channel_request_start(id, "exec", 0);
- }
- packet_put_string(buffer_ptr(&command), buffer_len(&command));
- packet_send();
- } else {
- channel_request_start(id, "shell", 0);
- packet_send();
- }
-
- packet_set_interactive(interactive);
-}
-
-/* open new channel for a session */
-static int
-ssh_session2_open(void)
-{
- Channel *c;
- int window, packetmax, in, out, err;
-
- if (stdin_null_flag) {
- in = open(_PATH_DEVNULL, O_RDONLY);
- } else {
- in = dup(STDIN_FILENO);
- }
- out = dup(STDOUT_FILENO);
- err = dup(STDERR_FILENO);
-
- if (in < 0 || out < 0 || err < 0)
- fatal("dup() in/out/err failed");
-
- /* enable nonblocking unless tty */
- if (!isatty(in))
- set_nonblock(in);
- if (!isatty(out))
- set_nonblock(out);
- if (!isatty(err))
- set_nonblock(err);
-
- window = CHAN_SES_WINDOW_DEFAULT;
- packetmax = CHAN_SES_PACKET_DEFAULT;
- if (tty_flag) {
- window >>= 1;
- packetmax >>= 1;
- }
- c = channel_new(
- "session", SSH_CHANNEL_OPENING, in, out, err,
- window, packetmax, CHAN_EXTENDED_WRITE,
- xstrdup("client-session"), /*nonblock*/0);
-
- debug3("ssh_session2_open: channel_new: %d", c->self);
-
- channel_send_open(c->self);
- if (!no_shell_flag)
- channel_register_confirm(c->self, ssh_session2_setup);
-
- return c->self;
-}
-
-static int
-ssh_session2(void)
-{
- int id = -1;
-
- /* XXX should be pre-session */
- ssh_init_forwarding();
-
- if (!no_shell_flag || (datafellows & SSH_BUG_DUMMYCHAN))
- id = ssh_session2_open();
-
- /* If requested, let ssh continue in the background. */
- if (fork_after_authentication_flag)
- client_daemonize();
-
- return (client_loop(tty_flag, tty_flag ?
- options.escape_char : SSH_ESCAPECHAR_NONE, id));
-}
-
-static void
-load_public_identity_files(void)
-{
- char *filename;
- int i = 0;
- Key *public;
-
- for (; i < options.num_identity_files; i++) {
- filename = tilde_expand_filename(options.identity_files[i],
- original_real_uid);
- public = key_load_public(filename, NULL);
- debug("identity file %s type %d", filename,
- public ? public->type : -1);
- xfree(options.identity_files[i]);
- options.identity_files[i] = filename;
- options.identity_keys[i] = public;
- }
-}
-
-/*
- * Connects to the given host using rsh(or prints an error message and exits
- * if rsh is not available). This function never returns.
- */
-static void
-rsh_connect(char *host, char *user, Buffer * command)
-{
- char *args[10];
- int i;
-
- log("Using rsh. WARNING: Connection will not be encrypted.");
- /* Build argument list for rsh. */
- i = 0;
- args[i++] = _PATH_RSH;
- /* host may have to come after user on some systems */
- args[i++] = host;
- if (user) {
- args[i++] = "-l";
- args[i++] = user;
- }
- if (buffer_len(command) > 0) {
- buffer_append(command, "\0", 1);
- args[i++] = buffer_ptr(command);
- }
- args[i++] = NULL;
- if (debug_flag) {
- for (i = 0; args[i]; i++) {
- if (i != 0)
- (void) fprintf(stderr, " ");
- (void) fprintf(stderr, "%s", args[i]);
- }
- (void) fprintf(stderr, "\n");
- }
- (void) execv(_PATH_RSH, args);
- perror(_PATH_RSH);
- exit(1);
-}
diff --git a/usr/src/cmd/ssh/ssh/sshconnect.c b/usr/src/cmd/ssh/ssh/sshconnect.c
deleted file mode 100644
index 74c3b6fa26..0000000000
--- a/usr/src/cmd/ssh/ssh/sshconnect.c
+++ /dev/null
@@ -1,1117 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Code to connect to a remote host, and to perform the client side of the
- * login (authentication) dialog.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: sshconnect.c,v 1.135 2002/09/19 01:58:18 djm Exp $");
-
-#include <openssl/bn.h>
-
-#include "ssh.h"
-#include "xmalloc.h"
-#include "rsa.h"
-#include "buffer.h"
-#include "packet.h"
-#include "uidswap.h"
-#include "compat.h"
-#include "key.h"
-#include "sshconnect.h"
-#include "hostfile.h"
-#include "log.h"
-#include "readconf.h"
-#include "atomicio.h"
-#include "misc.h"
-#include "readpass.h"
-#include <langinfo.h>
-#include <regex.h>
-#include <err.h>
-#include "engine.h"
-
-char *client_version_string = NULL;
-char *server_version_string = NULL;
-
-/* import */
-extern Options options;
-extern char *__progname;
-extern uid_t original_real_uid;
-extern uid_t original_effective_uid;
-extern pid_t proxy_command_pid;
-extern ENGINE *e;
-
-#ifndef INET6_ADDRSTRLEN /* for non IPv6 machines */
-#define INET6_ADDRSTRLEN 46
-#endif
-
-static int show_other_keys(const char *, Key *);
-
-/*
- * Connect to the given ssh server using a proxy command.
- */
-static int
-ssh_proxy_connect(const char *host, u_short port, const char *proxy_command)
-{
- Buffer command;
- const char *cp;
- char *command_string;
- int pin[2], pout[2];
- pid_t pid;
- char strport[NI_MAXSERV];
-
- /* Convert the port number into a string. */
- snprintf(strport, sizeof strport, "%hu", port);
-
- /*
- * Build the final command string in the buffer by making the
- * appropriate substitutions to the given proxy command.
- *
- * Use "exec" to avoid "sh -c" processes on some platforms
- * (e.g. Solaris)
- */
- buffer_init(&command);
-
-#define EXECLEN (sizeof ("exec") - 1)
- for (cp = proxy_command; *cp && isspace(*cp) ; cp++)
- ;
- if (strncmp(cp, "exec", EXECLEN) != 0 ||
- (strlen(cp) >= EXECLEN && !isspace(*(cp + EXECLEN))))
- buffer_append(&command, "exec ", EXECLEN + 1);
-#undef EXECLEN
-
- for (cp = proxy_command; *cp; cp++) {
- if (cp[0] == '%' && cp[1] == '%') {
- buffer_append(&command, "%", 1);
- cp++;
- continue;
- }
- if (cp[0] == '%' && cp[1] == 'h') {
- buffer_append(&command, host, strlen(host));
- cp++;
- continue;
- }
- if (cp[0] == '%' && cp[1] == 'p') {
- buffer_append(&command, strport, strlen(strport));
- cp++;
- continue;
- }
- buffer_append(&command, cp, 1);
- }
- buffer_append(&command, "\0", 1);
-
- /* Get the final command string. */
- command_string = buffer_ptr(&command);
-
- /* Create pipes for communicating with the proxy. */
- if (pipe(pin) < 0 || pipe(pout) < 0)
- fatal("Could not create pipes to communicate with the proxy: %.100s",
- strerror(errno));
-
- debug("Executing proxy command: %.500s", command_string);
-
- /* Fork and execute the proxy command. */
- if ((pid = fork()) == 0) {
- char *argv[10];
-
- /* Child. Permanently give up superuser privileges. */
- seteuid(original_real_uid);
- setuid(original_real_uid);
-
- /* Redirect stdin and stdout. */
- close(pin[1]);
- if (pin[0] != 0) {
- if (dup2(pin[0], 0) < 0)
- perror("dup2 stdin");
- close(pin[0]);
- }
- close(pout[0]);
- if (dup2(pout[1], 1) < 0)
- perror("dup2 stdout");
- /* Cannot be 1 because pin allocated two descriptors. */
- close(pout[1]);
-
- /* Stderr is left as it is so that error messages get
- printed on the user's terminal. */
- argv[0] = _PATH_BSHELL;
- argv[1] = "-c";
- argv[2] = command_string;
- argv[3] = NULL;
-
- /* Execute the proxy command. Note that we gave up any
- extra privileges above. */
- execv(argv[0], argv);
- perror(argv[0]);
- exit(1);
- }
- /* Parent. */
- if (pid < 0)
- fatal("fork failed: %.100s", strerror(errno));
- else
- proxy_command_pid = pid; /* save pid to clean up later */
-
- /* Close child side of the descriptors. */
- close(pin[0]);
- close(pout[1]);
-
- /* Free the command name. */
- buffer_free(&command);
-
- /* Set the connection file descriptors. */
- packet_set_connection(pout[0], pin[1]);
-
- /* Indicate OK return */
- return 0;
-}
-
-/*
- * Creates a (possibly privileged) socket for use as the ssh connection.
- */
-static int
-ssh_create_socket(int privileged, int family)
-{
- int sock, gaierr;
- struct addrinfo hints, *res;
-
- /*
- * If we are running as root and want to connect to a privileged
- * port, bind our own socket to a privileged port.
- */
- if (privileged) {
- int p = IPPORT_RESERVED - 1;
- PRIV_START;
- sock = rresvport_af(&p, family);
- PRIV_END;
- if (sock < 0)
- error("rresvport: af=%d %.100s", family, strerror(errno));
- else
- debug("Allocated local port %d.", p);
- return sock;
- }
- sock = socket(family, SOCK_STREAM, 0);
- if (sock < 0)
- error("socket: %.100s", strerror(errno));
-
- /* Bind the socket to an alternative local IP address */
- if (options.bind_address == NULL)
- return sock;
-
- memset(&hints, 0, sizeof(hints));
- hints.ai_family = family;
- hints.ai_socktype = SOCK_STREAM;
- hints.ai_flags = AI_PASSIVE;
- gaierr = getaddrinfo(options.bind_address, "0", &hints, &res);
- if (gaierr) {
- error("getaddrinfo: %s: %s", options.bind_address,
- gai_strerror(gaierr));
- close(sock);
- return -1;
- }
- if (bind(sock, res->ai_addr, res->ai_addrlen) < 0) {
- error("bind: %s: %s", options.bind_address, strerror(errno));
- close(sock);
- freeaddrinfo(res);
- return -1;
- }
- freeaddrinfo(res);
- return sock;
-}
-
-/*
- * Connect with timeout. Implements ConnectTimeout option.
- */
-static int
-timeout_connect(int sockfd, const struct sockaddr *serv_addr,
- socklen_t addrlen, int timeout)
-{
- fd_set *fdset;
- struct timeval tv;
- socklen_t optlen;
- int optval, rc, result = -1;
-
- if (timeout <= 0)
- return (connect(sockfd, serv_addr, addrlen));
-
- set_nonblock(sockfd);
- rc = connect(sockfd, serv_addr, addrlen);
- if (rc == 0) {
- unset_nonblock(sockfd);
- return (0);
- }
- if (errno != EINPROGRESS)
- return (-1);
-
- fdset = (fd_set *)xcalloc(howmany(sockfd + 1, NFDBITS),
- sizeof(fd_mask));
- FD_SET(sockfd, fdset);
- tv.tv_sec = timeout;
- tv.tv_usec = 0;
-
- for (;;) {
- rc = select(sockfd + 1, NULL, fdset, NULL, &tv);
- if (rc != -1 || errno != EINTR)
- break;
- }
-
- switch (rc) {
- case 0:
- /* Timed out */
- errno = ETIMEDOUT;
- break;
- case -1:
- /* Select error */
- debug("select: %s", strerror(errno));
- break;
- case 1:
- /* Completed or failed */
- optval = 0;
- optlen = sizeof(optval);
- if (getsockopt(sockfd, SOL_SOCKET, SO_ERROR, &optval,
- &optlen) == -1) {
- debug("getsockopt: %s", strerror(errno));
- break;
- }
- if (optval != 0) {
- errno = optval;
- break;
- }
- result = 0;
- unset_nonblock(sockfd);
- break;
- default:
- /* Should not occur */
- fatal("Bogus return (%d) from select()", rc);
- }
-
- xfree(fdset);
- return (result);
-}
-
-/*
- * Opens a TCP/IP connection to the remote server on the given host.
- * The address of the remote host will be returned in hostaddr.
- * If port is 0, the default port will be used. If needpriv is true,
- * a privileged port will be allocated to make the connection.
- * This requires super-user privileges if needpriv is true.
- * Connection_attempts specifies the maximum number of tries (one per
- * second). If proxy_command is non-NULL, it specifies the command (with %h
- * and %p substituted for host and port, respectively) to use to contact
- * the daemon.
- * Return values:
- * 0 for OK
- * ECONNREFUSED if we got a "Connection Refused" by the peer on any address
- * ECONNABORTED if we failed without a "Connection refused"
- * Suitable error messages for the connection failure will already have been
- * printed.
- */
-int
-ssh_connect(const char *host, struct sockaddr_storage * hostaddr,
- ushort_t port, int family, int connection_attempts,
- int needpriv, const char *proxy_command)
-{
- int gaierr;
- int on = 1;
- int sock = -1, attempt;
- char ntop[NI_MAXHOST], strport[NI_MAXSERV];
- struct addrinfo hints, *ai, *aitop;
- struct servent *sp;
- /*
- * Did we get only other errors than "Connection refused" (which
- * should block fallback to rsh and similar), or did we get at least
- * one "Connection refused"?
- */
- int full_failure = 1;
-
- debug("ssh_connect: needpriv %d", needpriv);
-
- /* Get default port if port has not been set. */
- if (port == 0) {
- sp = getservbyname(SSH_SERVICE_NAME, "tcp");
- if (sp)
- port = ntohs(sp->s_port);
- else
- port = SSH_DEFAULT_PORT;
- }
- /* If a proxy command is given, connect using it. */
- if (proxy_command != NULL)
- return ssh_proxy_connect(host, port, proxy_command);
-
- /* No proxy command. */
-
- memset(&hints, 0, sizeof(hints));
- hints.ai_family = family;
- hints.ai_socktype = SOCK_STREAM;
- snprintf(strport, sizeof strport, "%u", port);
- if ((gaierr = getaddrinfo(host, strport, &hints, &aitop)) != 0)
- fatal("%s: %.100s: %s", __progname, host,
- gai_strerror(gaierr));
-
- /*
- * Try to connect several times. On some machines, the first time
- * will sometimes fail. In general socket code appears to behave
- * quite magically on many machines.
- */
- for (attempt = 0; ;) {
- if (attempt > 0)
- debug("Trying again...");
-
- /* Loop through addresses for this host, and try each one in
- sequence until the connection succeeds. */
- for (ai = aitop; ai; ai = ai->ai_next) {
- if (ai->ai_family != AF_INET && ai->ai_family != AF_INET6)
- continue;
- if (getnameinfo(ai->ai_addr, ai->ai_addrlen,
- ntop, sizeof(ntop), strport, sizeof(strport),
- NI_NUMERICHOST|NI_NUMERICSERV) != 0) {
- error("ssh_connect: getnameinfo failed");
- continue;
- }
- debug("Connecting to %.200s [%.100s] port %s.",
- host, ntop, strport);
-
- /* Create a socket for connecting. */
- sock = ssh_create_socket(needpriv, ai->ai_family);
- if (sock < 0)
- /* Any error is already output */
- continue;
-
- if (timeout_connect(sock, ai->ai_addr, ai->ai_addrlen,
- options.connection_timeout) >= 0) {
- /* Successful connection. */
- memcpy(hostaddr, ai->ai_addr, ai->ai_addrlen);
- break;
- } else {
- if (errno == ECONNREFUSED)
- full_failure = 0;
- debug("connect to address %s port %s: %s",
- ntop, strport, strerror(errno));
- /*
- * Close the failed socket; there appear to
- * be some problems when reusing a socket for
- * which connect() has already returned an
- * error.
- */
- close(sock);
- }
- }
- if (ai)
- break; /* Successful connection. */
-
- attempt++;
- if (attempt >= connection_attempts)
- break;
- /* Sleep a moment before retrying. */
- sleep(1);
- }
-
- freeaddrinfo(aitop);
-
- /* Return failure if we didn't get a successful connection. */
- if (attempt >= connection_attempts) {
- log("ssh: connect to host %s port %s: %s",
- host, strport, strerror(errno));
- return full_failure ? ECONNABORTED : ECONNREFUSED;
- }
-
- debug("Connection established.");
-
- /* Set keepalives if requested. */
- if (options.keepalives &&
- setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (void *)&on,
- sizeof(on)) < 0)
- debug2("setsockopt SO_KEEPALIVE: %.100s", strerror(errno));
-
- /* Set the connection. */
- packet_set_connection(sock, sock);
-
- return 0;
-}
-
-/*
- * Waits for the server identification string, and sends our own
- * identification string.
- */
-static void
-ssh_exchange_identification(void)
-{
- char buf[256], remote_version[256]; /* must be same size! */
- int remote_major, remote_minor, i, mismatch;
- int connection_in = packet_get_connection_in();
- int connection_out = packet_get_connection_out();
- int minor1 = PROTOCOL_MINOR_1;
-
- /* Read other side\'s version identification. */
- for (;;) {
- for (i = 0; i < sizeof(buf) - 1; i++) {
- int len = atomicio(read, connection_in, &buf[i], 1);
- if (len < 0)
- fatal("ssh_exchange_identification: read: %.100s", strerror(errno));
- if (len != 1)
- fatal("ssh_exchange_identification: Connection closed by remote host");
- if (buf[i] == '\r') {
- buf[i] = '\n';
- buf[i + 1] = 0;
- continue; /**XXX wait for \n */
- }
- if (buf[i] == '\n') {
- buf[i + 1] = 0;
- break;
- }
- }
- buf[sizeof(buf) - 1] = 0;
- if (strncmp(buf, "SSH-", 4) == 0)
- break;
- debug("ssh_exchange_identification: %s", buf);
- }
- server_version_string = xstrdup(buf);
-
- /*
- * Check that the versions match. In future this might accept
- * several versions and set appropriate flags to handle them.
- */
- if (sscanf(server_version_string, "SSH-%d.%d-%[^\n]\n",
- &remote_major, &remote_minor, remote_version) != 3)
- fatal("Bad remote protocol version identification: '%.100s'", buf);
- debug("Remote protocol version %d.%d, remote software version %.100s",
- remote_major, remote_minor, remote_version);
-
- compat_datafellows(remote_version);
- mismatch = 0;
-
- switch (remote_major) {
- case 1:
- if (remote_minor == 99 &&
- (options.protocol & SSH_PROTO_2) &&
- !(options.protocol & SSH_PROTO_1_PREFERRED)) {
- enable_compat20();
- break;
- }
- if (!(options.protocol & SSH_PROTO_1)) {
- mismatch = 1;
- break;
- }
- if (remote_minor < 3) {
- fatal("Remote machine has too old SSH software version.");
- } else if (remote_minor == 3 || remote_minor == 4) {
- /* We speak 1.3, too. */
- enable_compat13();
- minor1 = 3;
- if (options.forward_agent) {
- log("Agent forwarding disabled for protocol 1.3");
- options.forward_agent = 0;
- }
- }
- break;
- case 2:
- if (options.protocol & SSH_PROTO_2) {
- enable_compat20();
- break;
- }
- /* FALLTHROUGH */
- default:
- mismatch = 1;
- break;
- }
- if (mismatch)
- fatal("Protocol major versions differ: %d vs. %d",
- (options.protocol & SSH_PROTO_2) ? PROTOCOL_MAJOR_2 : PROTOCOL_MAJOR_1,
- remote_major);
- /* Send our own protocol version identification. */
- snprintf(buf, sizeof buf, "SSH-%d.%d-%.100s\n",
- compat20 ? PROTOCOL_MAJOR_2 : PROTOCOL_MAJOR_1,
- compat20 ? PROTOCOL_MINOR_2 : minor1,
- SSH_VERSION);
- if (atomicio(write, connection_out, buf, strlen(buf)) != strlen(buf))
- fatal("write: %.100s", strerror(errno));
- client_version_string = xstrdup(buf);
- chop(client_version_string);
- chop(server_version_string);
- debug("Local version string %.100s", client_version_string);
-}
-
-/* defaults to 'no' */
-static int
-confirm(const char *prompt)
-{
- const char *msg;
- char *p, *again = NULL;
- int n, ret = -1;
- regex_t yesre, nore;
- char *errstr;
- int rerr, errlen;
-
- if (options.batch_mode)
- return 0;
- n = snprintf(NULL, 0, gettext("Please type '%s' or '%s': "),
- nl_langinfo(YESSTR), nl_langinfo(NOSTR));
- again = xmalloc(n + 1);
- (void) snprintf(again, n + 1, gettext("Please type '%s' or '%s': "),
- nl_langinfo(YESSTR), nl_langinfo(NOSTR));
-
- if ((rerr = regcomp(&yesre, nl_langinfo(YESEXPR), REG_EXTENDED)) != 0) {
- errlen = regerror(rerr, &yesre, NULL, 0);
- if ((errstr = malloc(errlen)) == NULL)
- err(1, "malloc");
- regerror(rerr, &yesre, errstr, errlen);
- errx(1, "YESEXPR: %s: %s", nl_langinfo(YESEXPR), errstr);
- }
-
- if ((rerr = regcomp(&nore, nl_langinfo(NOEXPR), REG_EXTENDED)) != 0) {
- errlen = regerror(rerr, &nore, NULL, 0);
- if ((errstr = malloc(errlen)) == NULL)
- err(1, "malloc");
- regerror(rerr, &nore, errstr, errlen);
- errx(1, "NOEXPR: %s: %s", nl_langinfo(NOEXPR), errstr);
- }
-
- for (msg = prompt;;msg = again) {
- p = read_passphrase(msg, RP_ECHO);
- if (p == NULL || (p[0] == '\0') || (p[0] == '\n') ||
- regexec(&nore, p, 0, NULL, 0) == 0)
- ret = 0;
- if (p && regexec(&yesre, p, 0, NULL, 0) == 0)
- ret = 1;
- if (p)
- xfree(p);
- if (ret != -1) {
- regfree(&yesre);
- regfree(&nore);
- return ret;
- }
- }
-}
-
-/*
- * check whether the supplied host key is valid, return -1 if the key
- * is not valid. the user_hostfile will not be updated if 'readonly' is true.
- */
-static int
-check_host_key(char *host, struct sockaddr *hostaddr, Key *host_key, int
- validated, int readonly, const char *user_hostfile, const char
- *system_hostfile)
-{
- Key *file_key;
- char *type = key_type(host_key);
- char *ip = NULL;
- char hostline[1000], *hostp, *fp;
- HostStatus host_status;
- HostStatus ip_status;
- int r, local = 0, host_ip_differ = 0;
- int salen;
- char ntop[NI_MAXHOST];
- char msg[1024];
- int len, host_line, ip_line, has_keys;
- const char *host_file = NULL, *ip_file = NULL;
-
- /*
- * Force accepting of the host key for loopback/localhost. The
- * problem is that if the home directory is NFS-mounted to multiple
- * machines, localhost will refer to a different machine in each of
- * them, and the user will get bogus HOST_CHANGED warnings. This
- * essentially disables host authentication for localhost; however,
- * this is probably not a real problem.
- */
- /** hostaddr == 0! */
- switch (hostaddr->sa_family) {
- case AF_INET:
- /* LINTED */
- local = (ntohl(((struct sockaddr_in *)hostaddr)->
- sin_addr.s_addr) >> 24) == IN_LOOPBACKNET;
- salen = sizeof(struct sockaddr_in);
- break;
- case AF_INET6:
- /* LINTED */
- local = IN6_IS_ADDR_LOOPBACK(
- &(((struct sockaddr_in6 *)hostaddr)->sin6_addr));
- salen = sizeof(struct sockaddr_in6);
- break;
- default:
- local = 0;
- salen = sizeof(struct sockaddr_storage);
- break;
- }
- if (options.no_host_authentication_for_localhost == 1 && local &&
- options.host_key_alias == NULL) {
- debug("Forcing accepting of host key for "
- "loopback/localhost.");
- return 0;
- }
-
- /*
- * We don't have the remote ip-address for connections
- * using a proxy command
- */
- if (options.proxy_command == NULL) {
- if (getnameinfo(hostaddr, salen, ntop, sizeof(ntop),
- NULL, 0, NI_NUMERICHOST) != 0)
- fatal("check_host_key: getnameinfo failed");
- ip = xstrdup(ntop);
- } else {
- ip = xstrdup("<no hostip for proxy command>");
- }
- /*
- * Turn off check_host_ip if the connection is to localhost, via proxy
- * command or if we don't have a hostname to compare with
- */
- if (options.check_host_ip &&
- (local || strcmp(host, ip) == 0 || options.proxy_command != NULL))
- options.check_host_ip = 0;
-
- /*
- * Allow the user to record the key under a different name. This is
- * useful for ssh tunneling over forwarded connections or if you run
- * multiple sshd's on different ports on the same machine.
- */
- if (options.host_key_alias != NULL) {
- host = options.host_key_alias;
- debug("using hostkeyalias: %s", host);
- }
-
- /*
- * Store the host key from the known host file in here so that we can
- * compare it with the key for the IP address.
- */
- file_key = key_new(host_key->type);
-
- /*
- * Check if the host key is present in the user's list of known
- * hosts or in the systemwide list.
- */
- host_file = user_hostfile;
- host_status = check_host_in_hostfile(host_file, host, host_key,
- file_key, &host_line);
- if (host_status == HOST_NEW) {
- host_file = system_hostfile;
- host_status = check_host_in_hostfile(host_file, host, host_key,
- file_key, &host_line);
- }
- /*
- * Also perform check for the ip address, skip the check if we are
- * localhost or the hostname was an ip address to begin with
- */
- if (options.check_host_ip) {
- Key *ip_key = key_new(host_key->type);
-
- ip_file = user_hostfile;
- ip_status = check_host_in_hostfile(ip_file, ip, host_key,
- ip_key, &ip_line);
- if (ip_status == HOST_NEW) {
- ip_file = system_hostfile;
- ip_status = check_host_in_hostfile(ip_file, ip,
- host_key, ip_key, &ip_line);
- }
- if (host_status == HOST_CHANGED &&
- (ip_status != HOST_CHANGED || !key_equal(ip_key, file_key)))
- host_ip_differ = 1;
-
- key_free(ip_key);
- } else
- ip_status = host_status;
-
- key_free(file_key);
-
- switch (host_status) {
- case HOST_OK:
- /* The host is known and the key matches. */
- if (validated)
- debug("Host '%.200s' is known and matches the %s host key.",
- host, type);
- else
- debug("Host '%.200s' is known and matches the %s host "
- "key.", host, type);
- debug("Found key in %s:%d", host_file, host_line);
- if (options.check_host_ip && ip_status == HOST_NEW) {
- if (readonly)
- log("%s host key for IP address "
- "'%.128s' not in list of known hosts.",
- type, ip);
- else if (!add_host_to_hostfile(user_hostfile, ip,
- host_key, options.hash_known_hosts))
- log("Failed to add the %s host key for IP "
- "address '%.128s' to the list of known "
- "hosts (%.30s).", type, ip, user_hostfile);
- else
- log("Warning: Permanently added the %s host "
- "key for IP address '%.128s' to the list "
- "of known hosts.", type, ip);
- }
- break;
- case HOST_NEW:
- if (readonly)
- goto fail;
- /* The host is new. */
- if (!validated && options.strict_host_key_checking == 1) {
- /*
- * User has requested strict host key checking. We
- * will not add the host key automatically. The only
- * alternative left is to abort.
- */
- error("No %s host key is known for %.200s and you "
- "have requested strict checking.", type, host);
- goto fail;
- } else if (!validated &&
- options.strict_host_key_checking == 2) {
- has_keys = show_other_keys(host, host_key);
- /* The default */
- fp = key_fingerprint(host_key, SSH_FP_MD5, SSH_FP_HEX);
- snprintf(msg, sizeof(msg),
- gettext("The authenticity of host '%.200s (%s)' "
- "can't be established%s\n%s key fingerprint "
- "is %s.\n"
- "Are you sure you want to continue connecting "
- "(%s/%s)? "),
- host, ip,
- has_keys ? gettext(",\nbut keys of different type "
- "are already known for this host.") : ".",
- type, fp, nl_langinfo(YESSTR), nl_langinfo(NOSTR));
- xfree(fp);
- if (!confirm(msg))
- goto fail;
- }
- /*
- * If not in strict mode, add the key automatically to the
- * local known_hosts file.
- */
- if (options.check_host_ip && ip_status == HOST_NEW) {
- snprintf(hostline, sizeof(hostline), "%s,%s",
- host, ip);
- hostp = hostline;
- if (options.hash_known_hosts) {
- /* Add hash of host and IP separately */
- r = add_host_to_hostfile(user_hostfile, host,
- host_key, options.hash_known_hosts) &&
- add_host_to_hostfile(user_hostfile, ip,
- host_key, options.hash_known_hosts);
- } else {
- /* Add unhashed "host,ip" */
- r = add_host_to_hostfile(user_hostfile,
- hostline, host_key,
- options.hash_known_hosts);
- }
- } else {
- r = add_host_to_hostfile(user_hostfile, host, host_key,
- options.hash_known_hosts);
- hostp = host;
- }
-
- if (!r)
- log("Failed to add the host to the list of known "
- "hosts (%.500s).", user_hostfile);
- else
- log("Warning: Permanently added '%.200s' (%s) to the "
- "list of known hosts.", hostp, type);
- break;
- case HOST_CHANGED:
- if (validated) {
- log("Warning: The host key for host %s has changed; "
- "please update your known hosts file(s) "
- "(%s:%d)", host, host_file, host_line);
- if (options.check_host_ip && host_ip_differ) {
- log("Warning: The host key for host %s has "
- "changed; please update your known "
- "hosts file(s) (%s:%d)", ip, host_file,
- host_line);
-
- }
- break;
- }
- if (options.check_host_ip && host_ip_differ) {
- char *msg;
- if (ip_status == HOST_NEW)
- msg = "is unknown";
- else if (ip_status == HOST_OK)
- msg = "is unchanged";
- else
- msg = "has a different value";
- error("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"
- "@ WARNING: POSSIBLE DNS SPOOFING DETECTED! @\n"
- "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"
- "The %s host key for %s has changed,\n"
- "and the key for the according IP address %s\n"
- "%s. This could either mean that\n"
- "DNS SPOOFING is happening or the IP address for the host\n"
- "and its host key have changed at the same time.\n",
- type, host, ip, msg);
- if (ip_status != HOST_NEW)
- error("Offending key for IP in %s:%d", ip_file, ip_line);
- }
- /* The host key has changed. */
- fp = key_fingerprint(host_key, SSH_FP_MD5, SSH_FP_HEX);
- error("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"
- "@ WARNING: REMOTE HOST IDENTIFICATION HAS CHANGED! @\n"
- "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"
- "IT IS POSSIBLE THAT SOMEONE IS DOING SOMETHING NASTY!\n"
- "Someone could be eavesdropping on you right now (man-in-the-middle attack)!\n"
- "It is also possible that the %s host key has just been changed.\n"
- "The fingerprint for the %s key sent by the remote host is\n%s.\n"
- "Please contact your system administrator.\n"
- "Add correct host key in %.100s to get rid of this message.\n"
- "Offending key in %s:%d\n",
- type, type, fp, user_hostfile, host_file, host_line);
- xfree(fp);
-
- /*
- * If strict host key checking is in use, the user will have
- * to edit the key manually and we can only abort.
- */
- if (options.strict_host_key_checking) {
- error("%s host key for %.200s has changed and you have "
- "requested strict checking.", type, host);
- goto fail;
- }
-
- /*
- * If strict host key checking has not been requested, allow
- * the connection but without password authentication or
- * agent forwarding.
- */
- if (options.password_authentication) {
- error("Password authentication is disabled to avoid "
- "man-in-the-middle attacks.");
- options.password_authentication = 0;
- }
- if (options.forward_agent) {
- error("Agent forwarding is disabled to avoid "
- "man-in-the-middle attacks.");
- options.forward_agent = 0;
- }
- if (options.forward_x11) {
- error("X11 forwarding is disabled to avoid "
- "man-in-the-middle attacks.");
- options.forward_x11 = 0;
- }
- if (options.num_local_forwards > 0 ||
- options.num_remote_forwards > 0) {
- error("Port forwarding is disabled to avoid "
- "man-in-the-middle attacks.");
- options.num_local_forwards =
- options.num_remote_forwards = 0;
- }
- /*
- * XXX Should permit the user to change to use the new id.
- * This could be done by converting the host key to an
- * identifying sentence, tell that the host identifies itself
- * by that sentence, and ask the user if he/she whishes to
- * accept the authentication.
- */
- break;
- case HOST_FOUND:
- fatal("internal error");
- break;
- }
-
- if (options.check_host_ip && host_status != HOST_CHANGED &&
- ip_status == HOST_CHANGED) {
- snprintf(msg, sizeof(msg),
- gettext("Warning: the %s host key for '%.200s' "
- "differs from the key for the IP address '%.128s'"
- "\nOffending key for IP in %s:%d"),
- type, host, ip, ip_file, ip_line);
- if (host_status == HOST_OK) {
- len = strlen(msg);
- snprintf(msg + len, sizeof(msg) - len,
- "\nMatching host key in %s:%d",
- host_file, host_line);
- }
- if (!validated && options.strict_host_key_checking == 1) {
- log(msg);
- error("Exiting, you have requested strict checking.");
- goto fail;
- } else if (!validated &&
- options.strict_host_key_checking == 2) {
- snprintf(msg + strlen(msg), sizeof(msg) - strlen(msg),
- gettext("\nAre you sure you want to continue "
- "connecting (%s/%s)"),
- nl_langinfo(YESSTR), nl_langinfo(NOSTR));
- if (!confirm(msg))
- goto fail;
- } else {
- log(msg);
- }
- }
-
- xfree(ip);
- return 0;
-
-fail:
- xfree(ip);
- return -1;
-}
-
-int
-verify_host_key(char *host, struct sockaddr *hostaddr, Key *host_key)
-{
- struct stat st;
-
- /* return ok if the key can be found in an old keyfile */
- if (stat(options.system_hostfile2, &st) == 0 ||
- stat(options.user_hostfile2, &st) == 0) {
- if (check_host_key(host, hostaddr, host_key, 0, /*readonly*/ 1,
- options.user_hostfile2, options.system_hostfile2) == 0)
- return 0;
- }
- return check_host_key(host, hostaddr, host_key, 0, /*readonly*/ 0,
- options.user_hostfile, options.system_hostfile);
-}
-
-int
-accept_host_key(char *host, struct sockaddr *hostaddr, Key *host_key)
-{
- struct stat st;
-
- /* return ok if the key can be found in an old keyfile */
- if (stat(options.system_hostfile2, &st) == 0 ||
- stat(options.user_hostfile2, &st) == 0) {
- if (check_host_key(host, hostaddr, host_key, 1, /*readonly*/ 1,
- options.user_hostfile2, options.system_hostfile2) == 0)
- return 0;
- }
- return check_host_key(host, hostaddr, host_key, 1, /*readonly*/ 0,
- options.user_hostfile, options.system_hostfile);
-}
-/*
- * Starts a dialog with the server, and authenticates the current user on the
- * server. This does not need any extra privileges. The basic connection
- * to the server must already have been established before this is called.
- * If login fails, this function prints an error and never returns.
- * This function does not require super-user privileges.
- */
-void
-ssh_login(Sensitive *sensitive, const char *orighost,
- struct sockaddr *hostaddr, char *pw_name)
-{
- char *host, *cp;
- char *server_user, *local_user;
-
- local_user = xstrdup(pw_name);
- server_user = options.user ? options.user : local_user;
-
- /* Convert the user-supplied hostname into all lowercase. */
- host = xstrdup(orighost);
- for (cp = host; *cp; cp++)
- if (isupper(*cp))
- *cp = tolower(*cp);
-
- /* Exchange protocol version identification strings with the server. */
- ssh_exchange_identification();
-
- /*
- * See comment at definition of will_daemonize for information why we
- * don't support the PKCS#11 engine with protocol 1.
- */
- if (compat20 == 1 && options.use_openssl_engine == 1) {
- /*
- * If this fails then 'e' will be NULL which means we do not use
- * the engine, as if UseOpenSSLEngine was set to "no". This is
- * important in case we go to the background after the
- * authentication.
- */
- e = pkcs11_engine_load(options.use_openssl_engine);
- }
-
- /* Put the connection into non-blocking mode. */
- packet_set_nonblocking();
-
- /* key exchange */
- /* authenticate user */
- if (compat20) {
- /*
- * Note that the host pointer is saved in ssh_kex2() for later
- * use during the key re-exchanges so we must not xfree() it.
- */
- ssh_kex2(host, hostaddr);
- ssh_userauth2(local_user, server_user, host, sensitive);
- } else {
- ssh_kex(host, hostaddr);
- ssh_userauth1(local_user, server_user, host, sensitive);
- }
-
- xfree(local_user);
-}
-
-void
-ssh_put_password(char *password)
-{
- int size;
- char *padded;
-
- if (datafellows & SSH_BUG_PASSWORDPAD) {
- packet_put_cstring(password);
- return;
- }
- size = roundup(strlen(password) + 1, 32);
- padded = xmalloc(size);
- memset(padded, 0, size);
- strlcpy(padded, password, size);
- packet_put_string(padded, size);
- memset(padded, 0, size);
- xfree(padded);
-}
-
-static int
-show_key_from_file(const char *file, const char *host, int keytype)
-{
- Key *found;
- char *fp;
- int line, ret;
-
- found = key_new(keytype);
- if ((ret = lookup_key_in_hostfile_by_type(file, host,
- keytype, found, &line))) {
- fp = key_fingerprint(found, SSH_FP_MD5, SSH_FP_HEX);
- log("WARNING: %s key found for host %s\n"
- "in %s:%d\n"
- "%s key fingerprint %s.",
- key_type(found), host, file, line,
- key_type(found), fp);
- xfree(fp);
- }
- key_free(found);
- return (ret);
-}
-
-/* print all known host keys for a given host, but skip keys of given type */
-static int
-show_other_keys(const char *host, Key *key)
-{
- int type[] = { KEY_RSA1, KEY_RSA, KEY_DSA, -1};
- int i, found = 0;
-
- for (i = 0; type[i] != -1; i++) {
- if (type[i] == key->type)
- continue;
- if (type[i] != KEY_RSA1 &&
- show_key_from_file(options.user_hostfile2, host, type[i])) {
- found = 1;
- continue;
- }
- if (type[i] != KEY_RSA1 &&
- show_key_from_file(options.system_hostfile2, host, type[i])) {
- found = 1;
- continue;
- }
- if (show_key_from_file(options.user_hostfile, host, type[i])) {
- found = 1;
- continue;
- }
- if (show_key_from_file(options.system_hostfile, host, type[i])) {
- found = 1;
- continue;
- }
- debug2("no key of type %d for host %s", type[i], host);
- }
- return (found);
-}
diff --git a/usr/src/cmd/ssh/ssh/sshconnect1.c b/usr/src/cmd/ssh/ssh/sshconnect1.c
deleted file mode 100644
index 19cdd84cb6..0000000000
--- a/usr/src/cmd/ssh/ssh/sshconnect1.c
+++ /dev/null
@@ -1,1312 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Code to connect to a remote host, and to perform the client side of the
- * login (authentication) dialog.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: sshconnect1.c,v 1.52 2002/08/08 13:50:23 aaron Exp $");
-
-#include <openssl/bn.h>
-#include <openssl/md5.h>
-
-#ifdef KRB4
-#include <krb.h>
-#endif
-#ifdef KRB5
-#include <krb5.h>
-#ifndef HEIMDAL
-#define krb5_get_err_text(context,code) error_message(code)
-#endif /* !HEIMDAL */
-#endif
-#ifdef AFS
-#include <kafs.h>
-#include "radix.h"
-#endif
-
-#include "ssh.h"
-#include "ssh1.h"
-#include "xmalloc.h"
-#include "rsa.h"
-#include "buffer.h"
-#include "packet.h"
-#include "mpaux.h"
-#include "uidswap.h"
-#include "log.h"
-#include "readconf.h"
-#include "key.h"
-#include "authfd.h"
-#include "sshconnect.h"
-#include "authfile.h"
-#include "readpass.h"
-#include "cipher.h"
-#include "canohost.h"
-#include "auth.h"
-
-/* Session id for the current session. */
-u_char session_id[16];
-u_int supported_authentications = 0;
-
-extern Options options;
-extern char *__progname;
-
-/*
- * Checks if the user has an authentication agent, and if so, tries to
- * authenticate using the agent.
- */
-static int
-try_agent_authentication(void)
-{
- int type;
- char *comment;
- AuthenticationConnection *auth;
- u_char response[16];
- u_int i;
- Key *key;
- BIGNUM *challenge;
-
- /* Get connection to the agent. */
- auth = ssh_get_authentication_connection();
- if (!auth)
- return 0;
-
- if ((challenge = BN_new()) == NULL)
- fatal("try_agent_authentication: BN_new failed");
- /* Loop through identities served by the agent. */
- for (key = ssh_get_first_identity(auth, &comment, 1);
- key != NULL;
- key = ssh_get_next_identity(auth, &comment, 1)) {
-
- /* Try this identity. */
- debug("Trying RSA authentication via agent with '%.100s'", comment);
- xfree(comment);
-
- /* Tell the server that we are willing to authenticate using this key. */
- packet_start(SSH_CMSG_AUTH_RSA);
- packet_put_bignum(key->rsa->n);
- packet_send();
- packet_write_wait();
-
- /* Wait for server's response. */
- type = packet_read();
-
- /* The server sends failure if it doesn\'t like our key or
- does not support RSA authentication. */
- if (type == SSH_SMSG_FAILURE) {
- debug("Server refused our key.");
- key_free(key);
- continue;
- }
- /* Otherwise it should have sent a challenge. */
- if (type != SSH_SMSG_AUTH_RSA_CHALLENGE)
- packet_disconnect("Protocol error during RSA authentication: %d",
- type);
-
- packet_get_bignum(challenge);
- packet_check_eom();
-
- debug("Received RSA challenge from server.");
-
- /* Ask the agent to decrypt the challenge. */
- if (!ssh_decrypt_challenge(auth, key, challenge, session_id, 1, response)) {
- /*
- * The agent failed to authenticate this identifier
- * although it advertised it supports this. Just
- * return a wrong value.
- */
- log("Authentication agent failed to decrypt challenge.");
- memset(response, 0, sizeof(response));
- }
- key_free(key);
- debug("Sending response to RSA challenge.");
-
- /* Send the decrypted challenge back to the server. */
- packet_start(SSH_CMSG_AUTH_RSA_RESPONSE);
- for (i = 0; i < 16; i++)
- packet_put_char(response[i]);
- packet_send();
- packet_write_wait();
-
- /* Wait for response from the server. */
- type = packet_read();
-
- /* The server returns success if it accepted the authentication. */
- if (type == SSH_SMSG_SUCCESS) {
- ssh_close_authentication_connection(auth);
- BN_clear_free(challenge);
- debug("RSA authentication accepted by server.");
- return 1;
- }
- /* Otherwise it should return failure. */
- if (type != SSH_SMSG_FAILURE)
- packet_disconnect("Protocol error waiting RSA auth response: %d",
- type);
- }
- ssh_close_authentication_connection(auth);
- BN_clear_free(challenge);
- debug("RSA authentication using agent refused.");
- return 0;
-}
-
-/*
- * Computes the proper response to a RSA challenge, and sends the response to
- * the server.
- */
-static void
-respond_to_rsa_challenge(BIGNUM * challenge, RSA * prv)
-{
- u_char buf[32], response[16];
- MD5_CTX md;
- int i, len;
-
- /* Decrypt the challenge using the private key. */
- /* XXX think about Bleichenbacher, too */
- if (rsa_private_decrypt(challenge, challenge, prv) <= 0)
- packet_disconnect(
- "respond_to_rsa_challenge: rsa_private_decrypt failed");
-
- /* Compute the response. */
- /* The response is MD5 of decrypted challenge plus session id. */
- len = BN_num_bytes(challenge);
- if (len <= 0 || len > sizeof(buf))
- packet_disconnect(
- "respond_to_rsa_challenge: bad challenge length %d", len);
-
- memset(buf, 0, sizeof(buf));
- BN_bn2bin(challenge, buf + sizeof(buf) - len);
- MD5_Init(&md);
- MD5_Update(&md, buf, 32);
- MD5_Update(&md, session_id, 16);
- MD5_Final(response, &md);
-
- debug("Sending response to host key RSA challenge.");
-
- /* Send the response back to the server. */
- packet_start(SSH_CMSG_AUTH_RSA_RESPONSE);
- for (i = 0; i < 16; i++)
- packet_put_char(response[i]);
- packet_send();
- packet_write_wait();
-
- memset(buf, 0, sizeof(buf));
- memset(response, 0, sizeof(response));
- memset(&md, 0, sizeof(md));
-}
-
-/*
- * Checks if the user has authentication file, and if so, tries to authenticate
- * the user using it.
- */
-static int
-try_rsa_authentication(int idx)
-{
- BIGNUM *challenge;
- Key *public, *private;
- char buf[300], *passphrase, *comment, *authfile;
- int i, type, quit;
-
- public = options.identity_keys[idx];
- authfile = options.identity_files[idx];
- comment = xstrdup(authfile);
-
- debug("Trying RSA authentication with key '%.100s'", comment);
-
- /* Tell the server that we are willing to authenticate using this key. */
- packet_start(SSH_CMSG_AUTH_RSA);
- packet_put_bignum(public->rsa->n);
- packet_send();
- packet_write_wait();
-
- /* Wait for server's response. */
- type = packet_read();
-
- /*
- * The server responds with failure if it doesn\'t like our key or
- * doesn\'t support RSA authentication.
- */
- if (type == SSH_SMSG_FAILURE) {
- debug("Server refused our key.");
- xfree(comment);
- return 0;
- }
- /* Otherwise, the server should respond with a challenge. */
- if (type != SSH_SMSG_AUTH_RSA_CHALLENGE)
- packet_disconnect("Protocol error during RSA authentication: %d", type);
-
- /* Get the challenge from the packet. */
- if ((challenge = BN_new()) == NULL)
- fatal("try_rsa_authentication: BN_new failed");
- packet_get_bignum(challenge);
- packet_check_eom();
-
- debug("Received RSA challenge from server.");
-
- /*
- * If the key is not stored in external hardware, we have to
- * load the private key. Try first with empty passphrase; if it
- * fails, ask for a passphrase.
- */
- if (public->flags & KEY_FLAG_EXT)
- private = public;
- else
- private = key_load_private_type(KEY_RSA1, authfile, "", NULL);
- if (private == NULL && !options.batch_mode) {
- snprintf(buf, sizeof(buf),
- gettext("Enter passphrase for RSA key '%.100s': "),
- comment);
- for (i = 0; i < options.number_of_password_prompts; i++) {
- passphrase = read_passphrase(buf, 0);
- if (strcmp(passphrase, "") != 0) {
- private = key_load_private_type(KEY_RSA1,
- authfile, passphrase, NULL);
- quit = 0;
- } else {
- debug2("no passphrase given, try next key");
- quit = 1;
- }
- memset(passphrase, 0, strlen(passphrase));
- xfree(passphrase);
- if (private != NULL || quit)
- break;
- debug2("bad passphrase given, try again...");
- }
- }
- /* We no longer need the comment. */
- xfree(comment);
-
- if (private == NULL) {
- if (!options.batch_mode)
- error("Bad passphrase.");
-
- /* Send a dummy response packet to avoid protocol error. */
- packet_start(SSH_CMSG_AUTH_RSA_RESPONSE);
- for (i = 0; i < 16; i++)
- packet_put_char(0);
- packet_send();
- packet_write_wait();
-
- /* Expect the server to reject it... */
- packet_read_expect(SSH_SMSG_FAILURE);
- BN_clear_free(challenge);
- return 0;
- }
-
- /* Compute and send a response to the challenge. */
- respond_to_rsa_challenge(challenge, private->rsa);
-
- /* Destroy the private key unless it in external hardware. */
- if (!(private->flags & KEY_FLAG_EXT))
- key_free(private);
-
- /* We no longer need the challenge. */
- BN_clear_free(challenge);
-
- /* Wait for response from the server. */
- type = packet_read();
- if (type == SSH_SMSG_SUCCESS) {
- debug("RSA authentication accepted by server.");
- return 1;
- }
- if (type != SSH_SMSG_FAILURE)
- packet_disconnect("Protocol error waiting RSA auth response: %d", type);
- debug("RSA authentication refused.");
- return 0;
-}
-
-/*
- * Tries to authenticate the user using combined rhosts or /etc/hosts.equiv
- * authentication and RSA host authentication.
- */
-static int
-try_rhosts_rsa_authentication(const char *local_user, Key * host_key)
-{
- int type;
- BIGNUM *challenge;
-
- debug("Trying rhosts or /etc/hosts.equiv with RSA host authentication.");
-
- /* Tell the server that we are willing to authenticate using this key. */
- packet_start(SSH_CMSG_AUTH_RHOSTS_RSA);
- packet_put_cstring(local_user);
- packet_put_int(BN_num_bits(host_key->rsa->n));
- packet_put_bignum(host_key->rsa->e);
- packet_put_bignum(host_key->rsa->n);
- packet_send();
- packet_write_wait();
-
- /* Wait for server's response. */
- type = packet_read();
-
- /* The server responds with failure if it doesn't admit our
- .rhosts authentication or doesn't know our host key. */
- if (type == SSH_SMSG_FAILURE) {
- debug("Server refused our rhosts authentication or host key.");
- return 0;
- }
- /* Otherwise, the server should respond with a challenge. */
- if (type != SSH_SMSG_AUTH_RSA_CHALLENGE)
- packet_disconnect("Protocol error during RSA authentication: %d", type);
-
- /* Get the challenge from the packet. */
- if ((challenge = BN_new()) == NULL)
- fatal("try_rhosts_rsa_authentication: BN_new failed");
- packet_get_bignum(challenge);
- packet_check_eom();
-
- debug("Received RSA challenge for host key from server.");
-
- /* Compute a response to the challenge. */
- respond_to_rsa_challenge(challenge, host_key->rsa);
-
- /* We no longer need the challenge. */
- BN_clear_free(challenge);
-
- /* Wait for response from the server. */
- type = packet_read();
- if (type == SSH_SMSG_SUCCESS) {
- debug("Rhosts or /etc/hosts.equiv with RSA host authentication accepted by server.");
- return 1;
- }
- if (type != SSH_SMSG_FAILURE)
- packet_disconnect("Protocol error waiting RSA auth response: %d", type);
- debug("Rhosts or /etc/hosts.equiv with RSA host authentication refused.");
- return 0;
-}
-
-#ifdef KRB4
-static int
-try_krb4_authentication(void)
-{
- KTEXT_ST auth; /* Kerberos data */
- char *reply;
- char inst[INST_SZ];
- char *realm;
- CREDENTIALS cred;
- int r, type;
- socklen_t slen;
- Key_schedule schedule;
- u_long checksum, cksum;
- MSG_DAT msg_data;
- struct sockaddr_in local, foreign;
- struct stat st;
-
- /* Don't do anything if we don't have any tickets. */
- if (stat(tkt_string(), &st) < 0)
- return 0;
-
- strlcpy(inst, (char *)krb_get_phost(get_canonical_hostname(1)),
- INST_SZ);
-
- realm = (char *)krb_realmofhost(get_canonical_hostname(1));
- if (!realm) {
- debug("Kerberos v4: no realm for %s", get_canonical_hostname(1));
- return 0;
- }
- /* This can really be anything. */
- checksum = (u_long)getpid();
-
- r = krb_mk_req(&auth, KRB4_SERVICE_NAME, inst, realm, checksum);
- if (r != KSUCCESS) {
- debug("Kerberos v4 krb_mk_req failed: %s", krb_err_txt[r]);
- return 0;
- }
- /* Get session key to decrypt the server's reply with. */
- r = krb_get_cred(KRB4_SERVICE_NAME, inst, realm, &cred);
- if (r != KSUCCESS) {
- debug("get_cred failed: %s", krb_err_txt[r]);
- return 0;
- }
- des_key_sched((des_cblock *) cred.session, schedule);
-
- /* Send authentication info to server. */
- packet_start(SSH_CMSG_AUTH_KERBEROS);
- packet_put_string((char *) auth.dat, auth.length);
- packet_send();
- packet_write_wait();
-
- /* Zero the buffer. */
- (void) memset(auth.dat, 0, MAX_KTXT_LEN);
-
- slen = sizeof(local);
- memset(&local, 0, sizeof(local));
- if (getsockname(packet_get_connection_in(),
- (struct sockaddr *)&local, &slen) < 0)
- debug("getsockname failed: %s", strerror(errno));
-
- slen = sizeof(foreign);
- memset(&foreign, 0, sizeof(foreign));
- if (getpeername(packet_get_connection_in(),
- (struct sockaddr *)&foreign, &slen) < 0) {
- debug("getpeername failed: %s", strerror(errno));
- fatal_cleanup();
- }
- /* Get server reply. */
- type = packet_read();
- switch (type) {
- case SSH_SMSG_FAILURE:
- /* Should really be SSH_SMSG_AUTH_KERBEROS_FAILURE */
- debug("Kerberos v4 authentication failed.");
- return 0;
- break;
-
- case SSH_SMSG_AUTH_KERBEROS_RESPONSE:
- /* SSH_SMSG_AUTH_KERBEROS_SUCCESS */
- debug("Kerberos v4 authentication accepted.");
-
- /* Get server's response. */
- reply = packet_get_string((u_int *) &auth.length);
- if (auth.length >= MAX_KTXT_LEN)
- fatal("Kerberos v4: Malformed response from server");
- memcpy(auth.dat, reply, auth.length);
- xfree(reply);
-
- packet_check_eom();
-
- /*
- * If his response isn't properly encrypted with the session
- * key, and the decrypted checksum fails to match, he's
- * bogus. Bail out.
- */
- r = krb_rd_priv(auth.dat, auth.length, schedule, &cred.session,
- &foreign, &local, &msg_data);
- if (r != KSUCCESS) {
- debug("Kerberos v4 krb_rd_priv failed: %s",
- krb_err_txt[r]);
- packet_disconnect("Kerberos v4 challenge failed!");
- }
- /* Fetch the (incremented) checksum that we supplied in the request. */
- memcpy((char *)&cksum, (char *)msg_data.app_data,
- sizeof(cksum));
- cksum = ntohl(cksum);
-
- /* If it matches, we're golden. */
- if (cksum == checksum + 1) {
- debug("Kerberos v4 challenge successful.");
- return 1;
- } else
- packet_disconnect("Kerberos v4 challenge failed!");
- break;
-
- default:
- packet_disconnect("Protocol error on Kerberos v4 response: %d", type);
- }
- return 0;
-}
-
-#endif /* KRB4 */
-
-#ifdef KRB5
-static int
-try_krb5_authentication(krb5_context *context, krb5_auth_context *auth_context)
-{
- krb5_error_code problem;
- const char *tkfile;
- struct stat buf;
- krb5_ccache ccache = NULL;
- const char *remotehost;
- krb5_data ap;
- int type;
- krb5_ap_rep_enc_part *reply = NULL;
- int ret;
-
- memset(&ap, 0, sizeof(ap));
-
- problem = krb5_init_context(context);
- if (problem) {
- debug("Kerberos v5: krb5_init_context failed");
- ret = 0;
- goto out;
- }
-
- problem = krb5_auth_con_init(*context, auth_context);
- if (problem) {
- debug("Kerberos v5: krb5_auth_con_init failed");
- ret = 0;
- goto out;
- }
-
-#ifndef HEIMDAL
- problem = krb5_auth_con_setflags(*context, *auth_context,
- KRB5_AUTH_CONTEXT_RET_TIME);
- if (problem) {
- debug("Keberos v5: krb5_auth_con_setflags failed");
- ret = 0;
- goto out;
- }
-#endif
-
- tkfile = krb5_cc_default_name(*context);
- if (strncmp(tkfile, "FILE:", 5) == 0)
- tkfile += 5;
-
- if (stat(tkfile, &buf) == 0 && getuid() != buf.st_uid) {
- debug("Kerberos v5: could not get default ccache (permission denied).");
- ret = 0;
- goto out;
- }
-
- problem = krb5_cc_default(*context, &ccache);
- if (problem) {
- debug("Kerberos v5: krb5_cc_default failed: %s",
- krb5_get_err_text(*context, problem));
- ret = 0;
- goto out;
- }
-
- remotehost = get_canonical_hostname(1);
-
- problem = krb5_mk_req(*context, auth_context, AP_OPTS_MUTUAL_REQUIRED,
- "host", remotehost, NULL, ccache, &ap);
- if (problem) {
- debug("Kerberos v5: krb5_mk_req failed: %s",
- krb5_get_err_text(*context, problem));
- ret = 0;
- goto out;
- }
-
- packet_start(SSH_CMSG_AUTH_KERBEROS);
- packet_put_string((char *) ap.data, ap.length);
- packet_send();
- packet_write_wait();
-
- xfree(ap.data);
- ap.length = 0;
-
- type = packet_read();
- switch (type) {
- case SSH_SMSG_FAILURE:
- /* Should really be SSH_SMSG_AUTH_KERBEROS_FAILURE */
- debug("Kerberos v5 authentication failed.");
- ret = 0;
- break;
-
- case SSH_SMSG_AUTH_KERBEROS_RESPONSE:
- /* SSH_SMSG_AUTH_KERBEROS_SUCCESS */
- debug("Kerberos v5 authentication accepted.");
-
- /* Get server's response. */
- ap.data = packet_get_string((unsigned int *) &ap.length);
- packet_check_eom();
- /* XXX je to dobre? */
-
- problem = krb5_rd_rep(*context, *auth_context, &ap, &reply);
- if (problem) {
- ret = 0;
- }
- ret = 1;
- break;
-
- default:
- packet_disconnect("Protocol error on Kerberos v5 response: %d",
- type);
- ret = 0;
- break;
-
- }
-
- out:
- if (ccache != NULL)
- krb5_cc_close(*context, ccache);
- if (reply != NULL)
- krb5_free_ap_rep_enc_part(*context, reply);
- if (ap.length > 0)
-#ifdef HEIMDAL
- krb5_data_free(&ap);
-#else
- krb5_free_data_contents(*context, &ap);
-#endif
-
- return (ret);
-}
-
-static void
-send_krb5_tgt(krb5_context context, krb5_auth_context auth_context)
-{
- int fd, type;
- krb5_error_code problem;
- krb5_data outbuf;
- krb5_ccache ccache = NULL;
- krb5_creds creds;
-#ifdef HEIMDAL
- krb5_kdc_flags flags;
-#else
- int forwardable;
-#endif
- const char *remotehost;
-
- memset(&creds, 0, sizeof(creds));
- memset(&outbuf, 0, sizeof(outbuf));
-
- fd = packet_get_connection_in();
-
-#ifdef HEIMDAL
- problem = krb5_auth_con_setaddrs_from_fd(context, auth_context, &fd);
-#else
- problem = krb5_auth_con_genaddrs(context, auth_context, fd,
- KRB5_AUTH_CONTEXT_GENERATE_REMOTE_FULL_ADDR |
- KRB5_AUTH_CONTEXT_GENERATE_LOCAL_FULL_ADDR);
-#endif
- if (problem)
- goto out;
-
- problem = krb5_cc_default(context, &ccache);
- if (problem)
- goto out;
-
- problem = krb5_cc_get_principal(context, ccache, &creds.client);
- if (problem)
- goto out;
-
- remotehost = get_canonical_hostname(1);
-
-#ifdef HEIMDAL
- problem = krb5_build_principal(context, &creds.server,
- strlen(creds.client->realm), creds.client->realm,
- "krbtgt", creds.client->realm, NULL);
-#else
- problem = krb5_build_principal(context, &creds.server,
- creds.client->realm.length, creds.client->realm.data,
- "host", remotehost, NULL);
-#endif
- if (problem)
- goto out;
-
- creds.times.endtime = 0;
-
-#ifdef HEIMDAL
- flags.i = 0;
- flags.b.forwarded = 1;
- flags.b.forwardable = krb5_config_get_bool(context, NULL,
- "libdefaults", "forwardable", NULL);
- problem = krb5_get_forwarded_creds(context, auth_context,
- ccache, flags.i, remotehost, &creds, &outbuf);
-#else
- forwardable = 1;
- problem = krb5_fwd_tgt_creds(context, auth_context, remotehost,
- creds.client, creds.server, ccache, forwardable, &outbuf);
-#endif
-
- if (problem)
- goto out;
-
- packet_start(SSH_CMSG_HAVE_KERBEROS_TGT);
- packet_put_string((char *)outbuf.data, outbuf.length);
- packet_send();
- packet_write_wait();
-
- type = packet_read();
-
- if (type == SSH_SMSG_SUCCESS) {
- char *pname;
-
- krb5_unparse_name(context, creds.client, &pname);
- debug("Kerberos v5 TGT forwarded (%s).", pname);
- xfree(pname);
- } else
- debug("Kerberos v5 TGT forwarding failed.");
-
- return;
-
- out:
- if (problem)
- debug("Kerberos v5 TGT forwarding failed: %s",
- krb5_get_err_text(context, problem));
- if (creds.client)
- krb5_free_principal(context, creds.client);
- if (creds.server)
- krb5_free_principal(context, creds.server);
- if (ccache)
- krb5_cc_close(context, ccache);
- if (outbuf.data)
- xfree(outbuf.data);
-}
-#endif /* KRB5 */
-
-#ifdef AFS
-static void
-send_krb4_tgt(void)
-{
- CREDENTIALS *creds;
- struct stat st;
- char buffer[4096], pname[ANAME_SZ], pinst[INST_SZ], prealm[REALM_SZ];
- int problem, type;
-
- /* Don't do anything if we don't have any tickets. */
- if (stat(tkt_string(), &st) < 0)
- return;
-
- creds = xmalloc(sizeof(*creds));
-
- problem = krb_get_tf_fullname(TKT_FILE, pname, pinst, prealm);
- if (problem)
- goto out;
-
- problem = krb_get_cred("krbtgt", prealm, prealm, creds);
- if (problem)
- goto out;
-
- if (time(0) > krb_life_to_time(creds->issue_date, creds->lifetime)) {
- problem = RD_AP_EXP;
- goto out;
- }
- creds_to_radix(creds, (u_char *)buffer, sizeof(buffer));
-
- packet_start(SSH_CMSG_HAVE_KERBEROS_TGT);
- packet_put_cstring(buffer);
- packet_send();
- packet_write_wait();
-
- type = packet_read();
-
- if (type == SSH_SMSG_SUCCESS)
- debug("Kerberos v4 TGT forwarded (%s%s%s@%s).",
- creds->pname, creds->pinst[0] ? "." : "",
- creds->pinst, creds->realm);
- else
- debug("Kerberos v4 TGT rejected.");
-
- xfree(creds);
- return;
-
- out:
- debug("Kerberos v4 TGT passing failed: %s", krb_err_txt[problem]);
- xfree(creds);
-}
-
-static void
-send_afs_tokens(void)
-{
- CREDENTIALS creds;
- struct ViceIoctl parms;
- struct ClearToken ct;
- int i, type, len;
- char buf[2048], *p, *server_cell;
- char buffer[8192];
-
- /* Move over ktc_GetToken, here's something leaner. */
- for (i = 0; i < 100; i++) { /* just in case */
- parms.in = (char *) &i;
- parms.in_size = sizeof(i);
- parms.out = buf;
- parms.out_size = sizeof(buf);
- if (k_pioctl(0, VIOCGETTOK, &parms, 0) != 0)
- break;
- p = buf;
-
- /* Get secret token. */
- memcpy(&creds.ticket_st.length, p, sizeof(u_int));
- if (creds.ticket_st.length > MAX_KTXT_LEN)
- break;
- p += sizeof(u_int);
- memcpy(creds.ticket_st.dat, p, creds.ticket_st.length);
- p += creds.ticket_st.length;
-
- /* Get clear token. */
- memcpy(&len, p, sizeof(len));
- if (len != sizeof(struct ClearToken))
- break;
- p += sizeof(len);
- memcpy(&ct, p, len);
- p += len;
- p += sizeof(len); /* primary flag */
- server_cell = p;
-
- /* Flesh out our credentials. */
- strlcpy(creds.service, "afs", sizeof(creds.service));
- creds.instance[0] = '\0';
- strlcpy(creds.realm, server_cell, REALM_SZ);
- memcpy(creds.session, ct.HandShakeKey, DES_KEY_SZ);
- creds.issue_date = ct.BeginTimestamp;
- creds.lifetime = krb_time_to_life(creds.issue_date,
- ct.EndTimestamp);
- creds.kvno = ct.AuthHandle;
- snprintf(creds.pname, sizeof(creds.pname), "AFS ID %d", ct.ViceId);
- creds.pinst[0] = '\0';
-
- /* Encode token, ship it off. */
- if (creds_to_radix(&creds, (u_char *)buffer,
- sizeof(buffer)) <= 0)
- break;
- packet_start(SSH_CMSG_HAVE_AFS_TOKEN);
- packet_put_cstring(buffer);
- packet_send();
- packet_write_wait();
-
- /* Roger, Roger. Clearance, Clarence. What's your vector,
- Victor? */
- type = packet_read();
-
- if (type == SSH_SMSG_FAILURE)
- debug("AFS token for cell %s rejected.", server_cell);
- else if (type != SSH_SMSG_SUCCESS)
- packet_disconnect("Protocol error on AFS token response: %d", type);
- }
-}
-
-#endif /* AFS */
-
-/*
- * Tries to authenticate with any string-based challenge/response system.
- * Note that the client code is not tied to s/key or TIS.
- */
-static int
-try_challenge_response_authentication(void)
-{
- int type, i;
- u_int clen;
- char prompt[1024];
- char *challenge, *response;
-
- debug("Doing challenge response authentication.");
-
- for (i = 0; i < options.number_of_password_prompts; i++) {
- /* request a challenge */
- packet_start(SSH_CMSG_AUTH_TIS);
- packet_send();
- packet_write_wait();
-
- type = packet_read();
- if (type != SSH_SMSG_FAILURE &&
- type != SSH_SMSG_AUTH_TIS_CHALLENGE) {
- packet_disconnect("Protocol error: got %d in response "
- "to SSH_CMSG_AUTH_TIS", type);
- }
- if (type != SSH_SMSG_AUTH_TIS_CHALLENGE) {
- debug("No challenge.");
- return 0;
- }
- challenge = packet_get_string(&clen);
- packet_check_eom();
- snprintf(prompt, sizeof prompt, "%s%s", challenge,
- strchr(challenge, '\n') ? "" : gettext("\nResponse: "));
- xfree(challenge);
- if (i != 0)
- error("Permission denied, please try again.");
- if (options.cipher == SSH_CIPHER_NONE)
- log("WARNING: Encryption is disabled! "
- "Response will be transmitted in clear text.");
- response = read_passphrase(prompt, 0);
- if (strcmp(response, "") == 0) {
- xfree(response);
- break;
- }
- packet_start(SSH_CMSG_AUTH_TIS_RESPONSE);
- ssh_put_password(response);
- memset(response, 0, strlen(response));
- xfree(response);
- packet_send();
- packet_write_wait();
- type = packet_read();
- if (type == SSH_SMSG_SUCCESS)
- return 1;
- if (type != SSH_SMSG_FAILURE)
- packet_disconnect("Protocol error: got %d in response "
- "to SSH_CMSG_AUTH_TIS_RESPONSE", type);
- }
- /* failure */
- return 0;
-}
-
-/*
- * Tries to authenticate with plain passwd authentication.
- */
-static int
-try_password_authentication(char *prompt)
-{
- int type, i;
- char *password;
-
- debug("Doing password authentication.");
- if (options.cipher == SSH_CIPHER_NONE)
- log("WARNING: Encryption is disabled! Password will be transmitted in clear text.");
- for (i = 0; i < options.number_of_password_prompts; i++) {
- if (i != 0)
- error("Permission denied, please try again.");
- password = read_passphrase(prompt, 0);
- packet_start(SSH_CMSG_AUTH_PASSWORD);
- ssh_put_password(password);
- memset(password, 0, strlen(password));
- xfree(password);
- packet_send();
- packet_write_wait();
-
- type = packet_read();
- if (type == SSH_SMSG_SUCCESS)
- return 1;
- if (type != SSH_SMSG_FAILURE)
- packet_disconnect("Protocol error: got %d in response to passwd auth", type);
- }
- /* failure */
- return 0;
-}
-
-/*
- * SSH1 key exchange
- */
-void
-ssh_kex(char *host, struct sockaddr *hostaddr)
-{
- int i;
- BIGNUM *key;
- Key *host_key, *server_key;
- int bits, rbits;
- int ssh_cipher_default = SSH_CIPHER_3DES;
- u_char session_key[SSH_SESSION_KEY_LENGTH];
- u_char cookie[8];
- u_int supported_ciphers;
- u_int server_flags, client_flags;
- u_int32_t rand = 0;
-
- debug("Waiting for server public key.");
-
- /* Wait for a public key packet from the server. */
- packet_read_expect(SSH_SMSG_PUBLIC_KEY);
-
- /* Get cookie from the packet. */
- for (i = 0; i < 8; i++)
- cookie[i] = packet_get_char();
-
- /* Get the public key. */
- server_key = key_new(KEY_RSA1);
- bits = packet_get_int();
- packet_get_bignum(server_key->rsa->e);
- packet_get_bignum(server_key->rsa->n);
-
- rbits = BN_num_bits(server_key->rsa->n);
- if (bits != rbits) {
- log("Warning: Server lies about size of server public key: "
- "actual size is %d bits vs. announced %d.", rbits, bits);
- log("Warning: This may be due to an old implementation of ssh.");
- }
- /* Get the host key. */
- host_key = key_new(KEY_RSA1);
- bits = packet_get_int();
- packet_get_bignum(host_key->rsa->e);
- packet_get_bignum(host_key->rsa->n);
-
- rbits = BN_num_bits(host_key->rsa->n);
- if (bits != rbits) {
- log("Warning: Server lies about size of server host key: "
- "actual size is %d bits vs. announced %d.", rbits, bits);
- log("Warning: This may be due to an old implementation of ssh.");
- }
-
- /* Get protocol flags. */
- server_flags = packet_get_int();
- packet_set_protocol_flags(server_flags);
-
- supported_ciphers = packet_get_int();
- supported_authentications = packet_get_int();
- packet_check_eom();
-
- debug("Received server public key (%d bits) and host key (%d bits).",
- BN_num_bits(server_key->rsa->n), BN_num_bits(host_key->rsa->n));
-
- if (verify_host_key(host, hostaddr, host_key) == -1)
- fatal("Host key verification failed.");
-
- client_flags = SSH_PROTOFLAG_SCREEN_NUMBER | SSH_PROTOFLAG_HOST_IN_FWD_OPEN;
-
- compute_session_id(session_id, cookie, host_key->rsa->n, server_key->rsa->n);
-
- /* Generate a session key. */
- arc4random_stir();
-
- /*
- * Generate an encryption key for the session. The key is a 256 bit
- * random number, interpreted as a 32-byte key, with the least
- * significant 8 bits being the first byte of the key.
- */
- for (i = 0; i < 32; i++) {
- if (i % 4 == 0)
- rand = arc4random();
- session_key[i] = rand & 0xff;
- rand >>= 8;
- }
-
- /*
- * According to the protocol spec, the first byte of the session key
- * is the highest byte of the integer. The session key is xored with
- * the first 16 bytes of the session id.
- */
- if ((key = BN_new()) == NULL)
- fatal("respond_to_rsa_challenge: BN_new failed");
- BN_set_word(key, 0);
- for (i = 0; i < SSH_SESSION_KEY_LENGTH; i++) {
- BN_lshift(key, key, 8);
- if (i < 16)
- BN_add_word(key, session_key[i] ^ session_id[i]);
- else
- BN_add_word(key, session_key[i]);
- }
-
- /*
- * Encrypt the integer using the public key and host key of the
- * server (key with smaller modulus first).
- */
- if (BN_cmp(server_key->rsa->n, host_key->rsa->n) < 0) {
- /* Public key has smaller modulus. */
- if (BN_num_bits(host_key->rsa->n) <
- BN_num_bits(server_key->rsa->n) + SSH_KEY_BITS_RESERVED) {
- fatal("respond_to_rsa_challenge: host_key %d < server_key %d + "
- "SSH_KEY_BITS_RESERVED %d",
- BN_num_bits(host_key->rsa->n),
- BN_num_bits(server_key->rsa->n),
- SSH_KEY_BITS_RESERVED);
- }
- rsa_public_encrypt(key, key, server_key->rsa);
- rsa_public_encrypt(key, key, host_key->rsa);
- } else {
- /* Host key has smaller modulus (or they are equal). */
- if (BN_num_bits(server_key->rsa->n) <
- BN_num_bits(host_key->rsa->n) + SSH_KEY_BITS_RESERVED) {
- fatal("respond_to_rsa_challenge: server_key %d < host_key %d + "
- "SSH_KEY_BITS_RESERVED %d",
- BN_num_bits(server_key->rsa->n),
- BN_num_bits(host_key->rsa->n),
- SSH_KEY_BITS_RESERVED);
- }
- rsa_public_encrypt(key, key, host_key->rsa);
- rsa_public_encrypt(key, key, server_key->rsa);
- }
-
- /* Destroy the public keys since we no longer need them. */
- key_free(server_key);
- key_free(host_key);
-
- if (options.cipher == SSH_CIPHER_NOT_SET) {
- if (cipher_mask_ssh1(1) & supported_ciphers & (1 << ssh_cipher_default))
- options.cipher = ssh_cipher_default;
- } else if (options.cipher == SSH_CIPHER_ILLEGAL ||
- !(cipher_mask_ssh1(1) & (1 << options.cipher))) {
- log("No valid SSH1 cipher, using %.100s instead.",
- cipher_name(ssh_cipher_default));
- options.cipher = ssh_cipher_default;
- }
- /* Check that the selected cipher is supported. */
- if (!(supported_ciphers & (1 << options.cipher)))
- fatal("Selected cipher type %.100s not supported by server.",
- cipher_name(options.cipher));
-
- debug("Encryption type: %.100s", cipher_name(options.cipher));
-
- /* Send the encrypted session key to the server. */
- packet_start(SSH_CMSG_SESSION_KEY);
- packet_put_char(options.cipher);
-
- /* Send the cookie back to the server. */
- for (i = 0; i < 8; i++)
- packet_put_char(cookie[i]);
-
- /* Send and destroy the encrypted encryption key integer. */
- packet_put_bignum(key);
- BN_clear_free(key);
-
- /* Send protocol flags. */
- packet_put_int(client_flags);
-
- /* Send the packet now. */
- packet_send();
- packet_write_wait();
-
- debug("Sent encrypted session key.");
-
- /* Set the encryption key. */
- packet_set_encryption_key(session_key, SSH_SESSION_KEY_LENGTH, options.cipher);
-
- /* We will no longer need the session key here. Destroy any extra copies. */
- memset(session_key, 0, sizeof(session_key));
-
- /*
- * Expect a success message from the server. Note that this message
- * will be received in encrypted form.
- */
- packet_read_expect(SSH_SMSG_SUCCESS);
-
- debug("Received encrypted confirmation.");
-}
-
-/*
- * Authenticate user
- */
-void
-ssh_userauth1(const char *local_user, const char *server_user, char *host,
- Sensitive *sensitive)
-{
-#ifdef KRB5
- krb5_context context = NULL;
- krb5_auth_context auth_context = NULL;
-#endif
- int i, type;
-
- if (supported_authentications == 0)
- fatal("ssh_userauth1: server supports no auth methods");
-
- /* Send the name of the user to log in as on the server. */
- packet_start(SSH_CMSG_USER);
- packet_put_cstring(server_user);
- packet_send();
- packet_write_wait();
-
- /*
- * The server should respond with success if no authentication is
- * needed (the user has no password). Otherwise the server responds
- * with failure.
- */
- type = packet_read();
-
- /* check whether the connection was accepted without authentication. */
- if (type == SSH_SMSG_SUCCESS)
- goto success;
- if (type != SSH_SMSG_FAILURE)
- packet_disconnect("Protocol error: got %d in response to SSH_CMSG_USER", type);
-
-#ifdef KRB5
- if ((supported_authentications & (1 << SSH_AUTH_KERBEROS)) &&
- options.kerberos_authentication) {
- debug("Trying Kerberos v5 authentication.");
-
- if (try_krb5_authentication(&context, &auth_context)) {
- type = packet_read();
- if (type == SSH_SMSG_SUCCESS)
- goto success;
- if (type != SSH_SMSG_FAILURE)
- packet_disconnect("Protocol error: got %d in response to Kerberos v5 auth", type);
- }
- }
-#endif /* KRB5 */
-
-#ifdef KRB4
- if ((supported_authentications & (1 << SSH_AUTH_KERBEROS)) &&
- options.kerberos_authentication) {
- debug("Trying Kerberos v4 authentication.");
-
- if (try_krb4_authentication()) {
- type = packet_read();
- if (type == SSH_SMSG_SUCCESS)
- goto success;
- if (type != SSH_SMSG_FAILURE)
- packet_disconnect("Protocol error: got %d in response to Kerberos v4 auth", type);
- }
- }
-#endif /* KRB4 */
-
- /*
- * Use rhosts authentication if running in privileged socket and we
- * do not wish to remain anonymous.
- */
- if ((supported_authentications & (1 << SSH_AUTH_RHOSTS)) &&
- options.rhosts_authentication) {
- debug("Trying rhosts authentication.");
- packet_start(SSH_CMSG_AUTH_RHOSTS);
- packet_put_cstring(local_user);
- packet_send();
- packet_write_wait();
-
- /* The server should respond with success or failure. */
- type = packet_read();
- if (type == SSH_SMSG_SUCCESS)
- goto success;
- if (type != SSH_SMSG_FAILURE)
- packet_disconnect("Protocol error: got %d in response to rhosts auth",
- type);
- }
- /*
- * Try .rhosts or /etc/hosts.equiv authentication with RSA host
- * authentication.
- */
- if ((supported_authentications & (1 << SSH_AUTH_RHOSTS_RSA)) &&
- options.rhosts_rsa_authentication) {
- for (i = 0; i < sensitive->nkeys; i++) {
- if (sensitive->keys[i] != NULL &&
- sensitive->keys[i]->type == KEY_RSA1 &&
- try_rhosts_rsa_authentication(local_user,
- sensitive->keys[i]))
- goto success;
- }
- }
- /* Try RSA authentication if the server supports it. */
- if ((supported_authentications & (1 << SSH_AUTH_RSA)) &&
- options.rsa_authentication) {
- /*
- * Try RSA authentication using the authentication agent. The
- * agent is tried first because no passphrase is needed for
- * it, whereas identity files may require passphrases.
- */
- if (try_agent_authentication())
- goto success;
-
- /* Try RSA authentication for each identity. */
- for (i = 0; i < options.num_identity_files; i++)
- if (options.identity_keys[i] != NULL &&
- options.identity_keys[i]->type == KEY_RSA1 &&
- try_rsa_authentication(i))
- goto success;
- }
- /* Try challenge response authentication if the server supports it. */
- if ((supported_authentications & (1 << SSH_AUTH_TIS)) &&
- options.challenge_response_authentication && !options.batch_mode) {
- if (try_challenge_response_authentication())
- goto success;
- }
- /* Try password authentication if the server supports it. */
- if ((supported_authentications & (1 << SSH_AUTH_PASSWORD)) &&
- options.password_authentication && !options.batch_mode) {
- char prompt[80];
-
- snprintf(prompt, sizeof(prompt),
- gettext("%.30s@%.128s's password: "),
- server_user, host);
- if (try_password_authentication(prompt))
- goto success;
- }
- /* All authentication methods have failed. Exit with an error message. */
- fatal("Permission denied (all authentication methods have failed).");
- /* NOTREACHED */
-
- success:
-#ifdef KRB5
- /* Try Kerberos v5 TGT passing. */
- if ((supported_authentications & (1 << SSH_PASS_KERBEROS_TGT)) &&
- options.kerberos_tgt_passing && context && auth_context) {
- if (options.cipher == SSH_CIPHER_NONE)
- log("WARNING: Encryption is disabled! Ticket will be transmitted in the clear!");
- send_krb5_tgt(context, auth_context);
- }
- if (auth_context)
- krb5_auth_con_free(context, auth_context);
- if (context)
- krb5_free_context(context);
-#endif
-
-#ifdef AFS
- /* Try Kerberos v4 TGT passing if the server supports it. */
- if ((supported_authentications & (1 << SSH_PASS_KERBEROS_TGT)) &&
- options.kerberos_tgt_passing) {
- if (options.cipher == SSH_CIPHER_NONE)
- log("WARNING: Encryption is disabled! Ticket will be transmitted in the clear!");
- send_krb4_tgt();
- }
- /* Try AFS token passing if the server supports it. */
- if ((supported_authentications & (1 << SSH_PASS_AFS_TOKEN)) &&
- options.afs_token_passing && k_hasafs()) {
- if (options.cipher == SSH_CIPHER_NONE)
- log("WARNING: Encryption is disabled! Token will be transmitted in the clear!");
- send_afs_tokens();
- }
-#endif /* AFS */
-
- return; /* need statement after label */
-}
diff --git a/usr/src/cmd/ssh/ssh/sshconnect2.c b/usr/src/cmd/ssh/ssh/sshconnect2.c
deleted file mode 100644
index e485355b6a..0000000000
--- a/usr/src/cmd/ssh/ssh/sshconnect2.c
+++ /dev/null
@@ -1,1685 +0,0 @@
-/*
- * Copyright (c) 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: sshconnect2.c,v 1.107 2002/07/01 19:48:46 markus Exp $");
-
-#include "ssh.h"
-#include "ssh2.h"
-#include "xmalloc.h"
-#include "buffer.h"
-#include "packet.h"
-#include "compat.h"
-#include "bufaux.h"
-#include "cipher.h"
-#include "kex.h"
-#include "myproposal.h"
-#include "sshconnect.h"
-#include "authfile.h"
-#include "dh.h"
-#include "authfd.h"
-#include "log.h"
-#include "readconf.h"
-#include "readpass.h"
-#include "match.h"
-#include "dispatch.h"
-#include "canohost.h"
-#include "msg.h"
-#include "pathnames.h"
-#include "g11n.h"
-
-#ifdef GSSAPI
-#include "ssh-gss.h"
-extern Gssctxt *xxx_gssctxt;
-#endif /* GSSAPI */
-
-/* import */
-extern char *client_version_string;
-extern char *server_version_string;
-extern Options options;
-extern Buffer command;
-
-/*
- * SSH2 key exchange
- */
-
-u_char *session_id2 = NULL;
-int session_id2_len = 0;
-
-char *xxx_host;
-struct sockaddr *xxx_hostaddr;
-
-Kex *xxx_kex = NULL;
-
-static int
-verify_host_key_callback(Key *hostkey)
-{
- if (verify_host_key(xxx_host, xxx_hostaddr, hostkey) == -1)
- fatal("Host key verification failed.");
- return 0;
-}
-
-static int
-accept_host_key_callback(Key *hostkey)
-{
- if (accept_host_key(xxx_host, xxx_hostaddr, hostkey) == -1)
- log("GSS-API authenticated host key addition to "
- "known_hosts file failed");
- return 0;
-}
-
-void
-ssh_kex2(char *host, struct sockaddr *hostaddr)
-{
- Kex *kex;
- Kex_hook_func kex_hook = NULL;
- static char **myproposal;
-
- myproposal = my_clnt_proposal;
-
- xxx_host = host;
- xxx_hostaddr = hostaddr;
-
-#ifdef GSSAPI
- /* Add the GSSAPI mechanisms currently supported on this client to
- * the key exchange algorithm proposal */
- if (options.gss_keyex)
- kex_hook = ssh_gssapi_client_kex_hook;
-#endif /* GSSAPI */
- if (options.ciphers == (char *)-1) {
- log("No valid ciphers for protocol version 2 given, using defaults.");
- options.ciphers = NULL;
- }
- if (options.ciphers != NULL) {
- myproposal[PROPOSAL_ENC_ALGS_CTOS] =
- myproposal[PROPOSAL_ENC_ALGS_STOC] = options.ciphers;
- }
- myproposal[PROPOSAL_ENC_ALGS_CTOS] =
- compat_cipher_proposal(myproposal[PROPOSAL_ENC_ALGS_CTOS]);
- myproposal[PROPOSAL_ENC_ALGS_STOC] =
- compat_cipher_proposal(myproposal[PROPOSAL_ENC_ALGS_STOC]);
- if (options.compression) {
- myproposal[PROPOSAL_COMP_ALGS_CTOS] =
- myproposal[PROPOSAL_COMP_ALGS_STOC] = "zlib,none";
- } else {
- myproposal[PROPOSAL_COMP_ALGS_CTOS] =
- myproposal[PROPOSAL_COMP_ALGS_STOC] = "none,zlib";
- }
- if (options.macs != NULL) {
- myproposal[PROPOSAL_MAC_ALGS_CTOS] =
- myproposal[PROPOSAL_MAC_ALGS_STOC] = options.macs;
- }
- if (options.hostkeyalgorithms != NULL)
- myproposal[PROPOSAL_SERVER_HOST_KEY_ALGS] =
- options.hostkeyalgorithms;
-
- if (options.rekey_limit)
- packet_set_rekey_limit((u_int32_t)options.rekey_limit);
-
- if (datafellows & SSH_BUG_LOCALES_NOT_LANGTAGS) {
- char *locale = setlocale(LC_ALL, "");
-
- /* Solaris 9 SSHD expects a locale, not a langtag list */
- myproposal[PROPOSAL_LANG_CTOS] = "";
- if (locale != NULL && *locale != '\0' &&
- strcmp(locale, "C") != 0)
- myproposal[PROPOSAL_LANG_CTOS] = locale;
- } else {
- myproposal[PROPOSAL_LANG_CTOS] = g11n_getlangs();
- }
-
- /* Same languages proposal for both directions */
- if (myproposal[PROPOSAL_LANG_CTOS] == NULL) {
- myproposal[PROPOSAL_LANG_CTOS] = "";
- myproposal[PROPOSAL_LANG_STOC] = "";
- } else {
- myproposal[PROPOSAL_LANG_STOC] =
- myproposal[PROPOSAL_LANG_CTOS];
- }
-
- /* start key exchange */
- kex = kex_setup(host, myproposal, kex_hook);
- kex_start(kex);
- kex->kex[KEX_DH_GRP1_SHA1] = kexdh_client;
- kex->kex[KEX_DH_GEX_SHA1] = kexgex_client;
-#ifdef GSSAPI
- kex->kex[KEX_GSS_GRP1_SHA1] = kexgss_client;
- kex->options.gss_deleg_creds = options.gss_deleg_creds;
-#endif /* GSSAPI */
- kex->client_version_string=client_version_string;
- kex->server_version_string=server_version_string;
- kex->verify_host_key=&verify_host_key_callback;
- kex->accept_host_key=&accept_host_key_callback;
-
- xxx_kex = kex;
-
- dispatch_run(DISPATCH_BLOCK, &kex->done, kex);
-
- session_id2 = kex->session_id;
- session_id2_len = kex->session_id_len;
-
-#ifdef DEBUG_KEXDH
- /* send 1st encrypted/maced/compressed message */
- packet_start(SSH2_MSG_IGNORE);
- packet_put_cstring("markus");
- packet_send();
- packet_write_wait();
-#endif
- debug("done: ssh_kex2.");
-}
-
-/*
- * Authenticate user
- */
-
-typedef struct Authctxt Authctxt;
-typedef struct Authmethod Authmethod;
-
-typedef int sign_cb_fn(
- Authctxt *authctxt, Key *key,
- u_char **sigp, u_int *lenp, u_char *data, u_int datalen);
-
-struct Authctxt {
- const char *server_user;
- const char *local_user;
- const char *host;
- const char *service;
- Authmethod *method;
- int success;
- char *authlist;
- /* pubkey */
- Key *last_key;
- sign_cb_fn *last_key_sign;
- int last_key_hint;
- AuthenticationConnection *agent;
- /* hostbased */
- Sensitive *sensitive;
- /* kbd-interactive */
- int info_req_seen;
- /* generic */
- void *methoddata;
-};
-struct Authmethod {
- char *name; /* string to compare against server's list */
- int (*userauth)(Authctxt *authctxt);
- void (*cleanup)(Authctxt *authctxt);
- int *enabled; /* flag in option struct that enables method */
- int *batch_flag; /* flag in option struct that disables method */
-};
-
-void input_userauth_success(int, u_int32_t, void *);
-void input_userauth_failure(int, u_int32_t, void *);
-void input_userauth_banner(int, u_int32_t, void *);
-void input_userauth_error(int, u_int32_t, void *);
-void input_userauth_info_req(int, u_int32_t, void *);
-void input_userauth_pk_ok(int, u_int32_t, void *);
-void input_userauth_passwd_changereq(int, u_int32_t, void *);
-
-int userauth_none(Authctxt *);
-int userauth_pubkey(Authctxt *);
-int userauth_passwd(Authctxt *);
-int userauth_kbdint(Authctxt *);
-int userauth_hostbased(Authctxt *);
-
-#ifdef GSSAPI
-static int userauth_gssapi_keyex(Authctxt *authctxt);
-static int userauth_gssapi(Authctxt *authctxt);
-static void userauth_gssapi_cleanup(Authctxt *authctxt);
-static void input_gssapi_response(int type, u_int32_t, void *);
-static void input_gssapi_token(int type, u_int32_t, void *);
-static void input_gssapi_hash(int type, u_int32_t, void *);
-static void input_gssapi_error(int, u_int32_t, void *);
-static void input_gssapi_errtok(int, u_int32_t, void *);
-#endif /* GSSAPI */
-
-void userauth(Authctxt *, char *);
-
-static int sign_and_send_pubkey(Authctxt *, Key *, sign_cb_fn *);
-static void clear_auth_state(Authctxt *);
-
-static Authmethod *authmethod_get(char *authlist);
-static Authmethod *authmethod_lookup(const char *name);
-static char *authmethods_get(void);
-
-Authmethod authmethods[] = {
-#ifdef GSSAPI
- {"gssapi-keyex",
- userauth_gssapi_keyex,
- userauth_gssapi_cleanup,
- &options.gss_keyex,
- NULL},
- {"gssapi-with-mic",
- userauth_gssapi,
- userauth_gssapi_cleanup,
- &options.gss_authentication,
- NULL},
-#endif /* GSSAPI */
- {"hostbased",
- userauth_hostbased,
- NULL,
- &options.hostbased_authentication,
- NULL},
- {"publickey",
- userauth_pubkey,
- NULL,
- &options.pubkey_authentication,
- NULL},
- {"keyboard-interactive",
- userauth_kbdint,
- NULL,
- &options.kbd_interactive_authentication,
- &options.batch_mode},
- {"password",
- userauth_passwd,
- NULL,
- &options.password_authentication,
- &options.batch_mode},
- {"none",
- userauth_none,
- NULL,
- NULL,
- NULL},
- {NULL, NULL, NULL, NULL, NULL}
-};
-
-void
-ssh_userauth2(const char *local_user, const char *server_user, char *host,
- Sensitive *sensitive)
-{
- Authctxt authctxt;
- int type;
-
- if (options.challenge_response_authentication)
- options.kbd_interactive_authentication = 1;
-
- packet_start(SSH2_MSG_SERVICE_REQUEST);
- packet_put_cstring("ssh-userauth");
- packet_send();
- debug("send SSH2_MSG_SERVICE_REQUEST");
- packet_write_wait();
- type = packet_read();
- if (type != SSH2_MSG_SERVICE_ACCEPT)
- fatal("Server denied authentication request: %d", type);
- if (packet_remaining() > 0) {
- char *reply = packet_get_string(NULL);
- debug2("service_accept: %s", reply);
- xfree(reply);
- } else {
- debug2("buggy server: service_accept w/o service");
- }
- packet_check_eom();
- debug("got SSH2_MSG_SERVICE_ACCEPT");
-
- if (options.preferred_authentications == NULL)
- options.preferred_authentications = authmethods_get();
-
- /* setup authentication context */
- memset(&authctxt, 0, sizeof(authctxt));
- authctxt.agent = ssh_get_authentication_connection();
- authctxt.server_user = server_user;
- authctxt.local_user = local_user;
- authctxt.host = host;
- authctxt.service = "ssh-connection"; /* service name */
- authctxt.success = 0;
- authctxt.method = authmethod_lookup("none");
- authctxt.authlist = NULL;
- authctxt.methoddata = NULL;
- authctxt.sensitive = sensitive;
- authctxt.info_req_seen = 0;
- if (authctxt.method == NULL)
- fatal("ssh_userauth2: internal error: cannot send userauth none request");
-
- /* initial userauth request */
- userauth_none(&authctxt);
-
- dispatch_init(&input_userauth_error);
- dispatch_set(SSH2_MSG_USERAUTH_SUCCESS, &input_userauth_success);
- dispatch_set(SSH2_MSG_USERAUTH_FAILURE, &input_userauth_failure);
- dispatch_set(SSH2_MSG_USERAUTH_BANNER, &input_userauth_banner);
- dispatch_run(DISPATCH_BLOCK, &authctxt.success, &authctxt); /* loop until success */
-
- if (authctxt.agent != NULL)
- ssh_close_authentication_connection(authctxt.agent);
-
- debug("Authentication succeeded (%s)", authctxt.method->name);
-}
-void
-userauth(Authctxt *authctxt, char *authlist)
-{
- if (authctxt->method != NULL &&
- authctxt->method->cleanup != NULL)
- authctxt->method->cleanup(authctxt);
-
- if (authlist == NULL) {
- authlist = authctxt->authlist;
- } else {
- if (authctxt->authlist)
- xfree(authctxt->authlist);
- authctxt->authlist = authlist;
- }
- for (;;) {
- Authmethod *method = authmethod_get(authlist);
- if (method == NULL)
- fatal("Permission denied (%s).", authlist);
- authctxt->method = method;
- if (method->userauth(authctxt) != 0) {
- debug2("we sent a %s packet, wait for reply", method->name);
- break;
- } else {
- debug2("we did not send a packet, disable method");
- method->enabled = NULL;
- }
- }
-}
-
-void
-input_userauth_error(int type, u_int32_t seq, void *ctxt)
-{
- fatal("input_userauth_error: bad message during authentication: "
- "type %d", type);
-}
-
-void
-input_userauth_banner(int type, u_int32_t seq, void *ctxt)
-{
- char *msg, *lang;
-
- debug3("input_userauth_banner");
- msg = packet_get_utf8_string(NULL);
- lang = packet_get_string(NULL);
- /*
- * Banner is a warning message according to RFC 4252. So, never print
- * a banner in error log level or lower. If the log level is higher,
- * use DisableBanner option to decide whether to display it or not.
- */
- if (options.log_level > SYSLOG_LEVEL_ERROR) {
- if (options.disable_banner == 0 ||
- (options.disable_banner == SSH_NO_BANNER_IN_EXEC_MODE &&
- buffer_len(&command) == 0)) {
- msg = g11n_filter_string(msg);
- (void) fprintf(stderr, "%s", msg);
- }
- }
- xfree(msg);
- xfree(lang);
-}
-
-void
-input_userauth_success(int type, u_int32_t seq, void *ctxt)
-{
- Authctxt *authctxt = ctxt;
- if (authctxt == NULL)
- fatal("input_userauth_success: no authentication context");
- if (authctxt->authlist)
- xfree(authctxt->authlist);
- if (authctxt->method != NULL &&
- authctxt->method->cleanup != NULL)
- authctxt->method->cleanup(authctxt);
- clear_auth_state(authctxt);
- authctxt->success = 1; /* break out */
-}
-
-void
-input_userauth_failure(int type, u_int32_t seq, void *ctxt)
-{
- Authctxt *authctxt = ctxt;
- char *authlist = NULL;
- int partial;
-
- if (authctxt == NULL)
- fatal("input_userauth_failure: no authentication context");
-
- authlist = packet_get_string(NULL);
- partial = packet_get_char();
- packet_check_eom();
-
- if (partial != 0)
- log("Authenticated with partial success.");
- debug("Authentications that can continue: %s", authlist);
-
- clear_auth_state(authctxt);
- userauth(authctxt, authlist);
-}
-void
-input_userauth_pk_ok(int type, u_int32_t seq, void *ctxt)
-{
- Authctxt *authctxt = ctxt;
- Key *key = NULL;
- Buffer b;
- int pktype, sent = 0;
- u_int alen, blen;
- char *pkalg, *fp;
- u_char *pkblob;
-
- if (authctxt == NULL)
- fatal("input_userauth_pk_ok: no authentication context");
- if (datafellows & SSH_BUG_PKOK) {
- /* this is similar to SSH_BUG_PKAUTH */
- debug2("input_userauth_pk_ok: SSH_BUG_PKOK");
- pkblob = packet_get_string(&blen);
- buffer_init(&b);
- buffer_append(&b, pkblob, blen);
- pkalg = buffer_get_string(&b, &alen);
- buffer_free(&b);
- } else {
- pkalg = packet_get_string(&alen);
- pkblob = packet_get_string(&blen);
- }
- packet_check_eom();
-
- debug("Server accepts key: pkalg %s blen %u lastkey %p hint %d",
- pkalg, blen, authctxt->last_key, authctxt->last_key_hint);
-
- do {
- if (authctxt->last_key == NULL ||
- authctxt->last_key_sign == NULL) {
- debug("no last key or no sign cb");
- break;
- }
- if ((pktype = key_type_from_name(pkalg)) == KEY_UNSPEC) {
- debug("unknown pkalg %s", pkalg);
- break;
- }
- if ((key = key_from_blob(pkblob, blen)) == NULL) {
- debug("no key from blob. pkalg %s", pkalg);
- break;
- }
- if (key->type != pktype) {
- error("input_userauth_pk_ok: type mismatch "
- "for decoded key (received %d, expected %d)",
- key->type, pktype);
- break;
- }
- fp = key_fingerprint(key, SSH_FP_MD5, SSH_FP_HEX);
- debug2("input_userauth_pk_ok: fp %s", fp);
- xfree(fp);
- if (!key_equal(key, authctxt->last_key)) {
- debug("key != last_key");
- break;
- }
- sent = sign_and_send_pubkey(authctxt, key,
- authctxt->last_key_sign);
- } while (0);
-
- if (key != NULL)
- key_free(key);
- xfree(pkalg);
- xfree(pkblob);
-
- /* unregister */
- clear_auth_state(authctxt);
- dispatch_set(SSH2_MSG_USERAUTH_PK_OK, NULL);
-
- /* try another method if we did not send a packet */
- if (sent == 0)
- userauth(authctxt, NULL);
-
-}
-
-#ifdef GSSAPI
-int
-userauth_gssapi(Authctxt *authctxt)
-{
- Gssctxt *gssctxt = NULL;
- static int initialized = 0;
- static int mech_idx = 0;
- static gss_OID_set supported = GSS_C_NULL_OID_SET;
- gss_OID mech = GSS_C_NULL_OID;
-
- /* Things work better if we send one mechanism at a time, rather
- * than them all at once. This means that if we fail at some point
- * in the middle of a negotiation, we can come back and try something
- * different. */
-
- if (datafellows & SSH_OLD_GSSAPI) return 0;
-
- /* Before we offer a mechanism, check that we can support it. Don't
- * bother trying to get credentials - as the standard fallback will
- * deal with that kind of failure.
- */
-
- if (!initialized) {
- initialized = 1;
- ssh_gssapi_client_mechs(authctxt->host, &supported);
- if (supported == GSS_C_NULL_OID_SET || supported->count == 0)
- return (0);
- } else if (supported != GSS_C_NULL_OID_SET) {
- /* Try next mech, if any */
- mech_idx++;
-
- if (mech_idx >= supported->count)
- return (0);
- } else {
- return (0);
- }
-
- mech = &supported->elements[mech_idx];
-
- ssh_gssapi_build_ctx(&gssctxt, 1, mech);
- authctxt->methoddata=(void *)gssctxt;
-
- packet_start(SSH2_MSG_USERAUTH_REQUEST);
- packet_put_cstring(authctxt->server_user);
- packet_put_cstring(authctxt->service);
- packet_put_cstring(authctxt->method->name);
-
- packet_put_int(1);
-
- /* The newest gsskeyex draft stipulates that OIDs should
- * be DER encoded, so we need to add the object type and
- * length information back on */
- if (datafellows & SSH_BUG_GSSAPI_BER) {
- packet_put_string(mech->elements, mech->length);
- } else {
- packet_put_int((mech->length)+2);
- packet_put_char(0x06);
- packet_put_char(mech->length);
- packet_put_raw(mech->elements, mech->length);
- }
-
- packet_send();
- packet_write_wait();
-
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_RESPONSE,&input_gssapi_response);
-
- return 1;
-}
-
-void
-input_gssapi_response(int type, u_int32_t plen, void *ctxt)
-{
- Authctxt *authctxt = ctxt;
- Gssctxt *gssctxt;
- OM_uint32 status,ms;
- u_int oidlen;
- char *oidv;
- gss_buffer_desc send_tok;
-
- if (authctxt == NULL)
- fatal("input_gssapi_response: no authentication context");
- gssctxt = authctxt->methoddata;
-
- /* Setup our OID */
- oidv=packet_get_string(&oidlen);
-
- if (datafellows & SSH_BUG_GSSAPI_BER) {
- if (!ssh_gssapi_check_mech_oid(gssctxt,oidv,oidlen)) {
- gss_OID oid;
-
- oid = ssh_gssapi_make_oid(oidlen, oidv);
- debug("Server returned different OID (%s) than expected (%s)",
- ssh_gssapi_oid_to_str(oid),
- ssh_gssapi_oid_to_str(gssctxt->desired_mech));
- ssh_gssapi_release_oid(&oid);
- clear_auth_state(authctxt);
- userauth(authctxt,NULL);
- return;
- }
- } else {
- if(oidv[0]!=0x06 || oidv[1]!=oidlen-2) {
- debug("Badly encoded mechanism OID received");
- clear_auth_state(authctxt);
- userauth(authctxt,NULL);
- return;
- }
- if (!ssh_gssapi_check_mech_oid(gssctxt,oidv+2,oidlen-2)) {
- gss_OID oid;
-
- oid = ssh_gssapi_make_oid(oidlen-2, oidv+2);
- debug("Server returned different OID (%s) than expected (%s)",
- ssh_gssapi_oid_to_str(oid),
- ssh_gssapi_oid_to_str(gssctxt->desired_mech));
- clear_auth_state(authctxt);
- userauth(authctxt,NULL);
- return;
- }
- }
-
- packet_check_eom();
-
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_TOKEN,&input_gssapi_token);
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_ERROR,&input_gssapi_error);
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_ERRTOK,&input_gssapi_errtok);
-
- status = ssh_gssapi_init_ctx(gssctxt, authctxt->host,
- options.gss_deleg_creds,
- GSS_C_NO_BUFFER, &send_tok);
- if (GSS_ERROR(status)) {
- if (send_tok.length>0) {
- packet_start(SSH2_MSG_USERAUTH_GSSAPI_ERRTOK);
- packet_put_string(send_tok.value,send_tok.length);
- packet_send();
- packet_write_wait();
- }
- /* Start again with next method on list */
- debug("Trying to start again");
- clear_auth_state(authctxt);
- userauth(authctxt,NULL);
- return;
- }
-
- /* We must have data to send */
- packet_start(SSH2_MSG_USERAUTH_GSSAPI_TOKEN);
- packet_put_string(send_tok.value,send_tok.length);
- packet_send();
- packet_write_wait();
- gss_release_buffer(&ms, &send_tok);
-}
-
-void
-input_gssapi_token(int type, u_int32_t plen, void *ctxt)
-{
- Authctxt *authctxt = ctxt;
- Gssctxt *gssctxt;
- gss_buffer_desc send_tok, recv_tok, g_mic_data;
- Buffer mic_data;
- OM_uint32 status;
- u_int slen;
-
- if (authctxt == NULL || authctxt->method == NULL)
- fatal("input_gssapi_response: no authentication context");
- gssctxt = authctxt->methoddata;
-
- recv_tok.value=packet_get_string(&slen);
- recv_tok.length=slen; /* safe typecast */
-
- status=ssh_gssapi_init_ctx(gssctxt, authctxt->host,
- options.gss_deleg_creds,
- &recv_tok, &send_tok);
-
- packet_check_eom();
-
- if (GSS_ERROR(status)) {
- if (send_tok.length>0) {
- packet_start(SSH2_MSG_USERAUTH_GSSAPI_ERRTOK);
- packet_put_string(send_tok.value,send_tok.length);
- packet_send();
- packet_write_wait();
- }
- /* Start again with the next method in the list */
- clear_auth_state(authctxt);
- userauth(authctxt,NULL);
- return;
- }
-
- if (send_tok.length>0) {
- packet_start(SSH2_MSG_USERAUTH_GSSAPI_TOKEN);
- packet_put_string(send_tok.value,send_tok.length);
- packet_send();
- packet_write_wait();
- }
-
- if (status != GSS_S_COMPLETE)
- return;
-
- /* Make data buffer to MIC */
- buffer_init(&mic_data);
- buffer_put_string(&mic_data, session_id2, session_id2_len);
- buffer_put_char(&mic_data, SSH2_MSG_USERAUTH_REQUEST);
- buffer_put_cstring(&mic_data, authctxt->server_user);
- buffer_put_cstring(&mic_data, authctxt->service);
- buffer_put_cstring(&mic_data, authctxt->method->name);
-
- /* Make MIC */
- g_mic_data.value = buffer_ptr(&mic_data);
- g_mic_data.length = buffer_len(&mic_data);
-
- status = ssh_gssapi_get_mic(gssctxt, &g_mic_data, &send_tok);
- buffer_clear(&mic_data);
-
- if (GSS_ERROR(status) || send_tok.length == 0) {
- /*
- * Oops, now what? There's no error token...
- * Next userauth
- */
- debug("GSS_GetMIC() failed! - "
- "Abandoning GSSAPI userauth");
- clear_auth_state(authctxt);
- userauth(authctxt,NULL);
- return;
- }
- packet_start(SSH2_MSG_USERAUTH_GSSAPI_MIC);
- packet_put_string(send_tok.value,send_tok.length);
- packet_send();
- packet_write_wait();
-}
-
-void
-input_gssapi_errtok(int type, u_int32_t plen, void *ctxt)
-{
- OM_uint32 min_status;
- Authctxt *authctxt = ctxt;
- Gssctxt *gssctxt;
- gss_buffer_desc send_tok, recv_tok;
-
- if (authctxt == NULL)
- fatal("input_gssapi_response: no authentication context");
- gssctxt = authctxt->methoddata;
-
- recv_tok.value=packet_get_string(&recv_tok.length);
-
- /* Stick it into GSSAPI and see what it says */
- (void) ssh_gssapi_init_ctx(gssctxt, authctxt->host,
- options.gss_deleg_creds,
- &recv_tok, &send_tok);
-
- xfree(recv_tok.value);
- (void) gss_release_buffer(&min_status, &send_tok);
-
- debug("Server sent a GSS-API error token during GSS userauth -- %s",
- ssh_gssapi_last_error(gssctxt, NULL, NULL));
-
- packet_check_eom();
-
- /* We can't send a packet to the server */
-
- /* The draft says that we should wait for the server to fail
- * before starting the next authentication. So, we clear the
- * state, but don't do anything else
- */
- clear_auth_state(authctxt);
- return;
-}
-
-void
-input_gssapi_error(int type, u_int32_t plen, void *ctxt)
-{
- OM_uint32 maj,min;
- char *msg;
- char *lang;
-
- maj = packet_get_int();
- min = packet_get_int();
- msg = packet_get_string(NULL);
- lang = packet_get_string(NULL);
-
- packet_check_eom();
-
- fprintf(stderr, "Server GSSAPI Error:\n%s (%d, %d)\n", msg, maj, min);
- xfree(msg);
- xfree(lang);
-}
-
-int
-userauth_gssapi_keyex(Authctxt *authctxt)
-{
- Gssctxt *gssctxt;
- gss_buffer_desc send_tok;
- OM_uint32 status;
- static int attempt = 0;
-
- if (authctxt == NULL || authctxt->method == NULL)
- fatal("input_gssapi_response: no authentication context");
-
- if (xxx_gssctxt == NULL || xxx_gssctxt->context == GSS_C_NO_CONTEXT)
- return 0;
-
- if (strcmp(authctxt->method->name, "gssapi-keyex") == 0)
- authctxt->methoddata = gssctxt = xxx_gssctxt;
-
- if (attempt++ >= 1)
- return 0;
-
- if (strcmp(authctxt->method->name, "gssapi-keyex") == 0) {
- gss_buffer_desc g_mic_data;
- Buffer mic_data;
-
- debug2("Authenticating with GSS-API context from key exchange (w/ MIC)");
-
- /* Make data buffer to MIC */
- buffer_init(&mic_data);
- buffer_put_string(&mic_data, session_id2, session_id2_len);
- buffer_put_char(&mic_data, SSH2_MSG_USERAUTH_REQUEST);
- buffer_put_cstring(&mic_data, authctxt->server_user);
- buffer_put_cstring(&mic_data, authctxt->service);
- buffer_put_cstring(&mic_data, authctxt->method->name);
-
- /* Make MIC */
- g_mic_data.value = buffer_ptr(&mic_data);
- g_mic_data.length = buffer_len(&mic_data);
- status = ssh_gssapi_get_mic(gssctxt, &g_mic_data, &send_tok);
- buffer_clear(&mic_data);
-
- if (GSS_ERROR(status) || send_tok.length == 0) {
- /*
- * Oops, now what? There's no error token...
- * Next userauth
- */
- debug("GSS_GetMIC() failed! - "
- "Abandoning GSSAPI userauth");
- clear_auth_state(authctxt);
- userauth(authctxt,NULL);
- return 0;
- }
- packet_start(SSH2_MSG_USERAUTH_REQUEST);
- packet_put_cstring(authctxt->server_user);
- packet_put_cstring(authctxt->service);
- packet_put_cstring(authctxt->method->name);
- packet_put_string(send_tok.value,send_tok.length); /* MIC */
- packet_send();
- packet_write_wait();
- (void) gss_release_buffer(&status, &send_tok);
- } else if (strcmp(authctxt->method->name, "external-keyx") == 0) {
- debug2("Authentication with deprecated \"external-keyx\""
- " method not supported");
- return 0;
- }
- return 1;
-}
-
-static
-void
-userauth_gssapi_cleanup(Authctxt *authctxt)
-{
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_RESPONSE,NULL);
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_TOKEN,NULL);
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_ERROR,NULL);
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_ERRTOK,NULL);
-
- if (authctxt == NULL ||
- authctxt->method == NULL ||
- authctxt->methoddata == NULL)
- return;
-
- if (strncmp(authctxt->method->name, "gssapi", strlen("gssapi")) == 0) {
- ssh_gssapi_delete_ctx((Gssctxt **)&authctxt->methoddata);
- }
-}
-#endif /* GSSAPI */
-
-int
-userauth_none(Authctxt *authctxt)
-{
- /* initial userauth request */
- packet_start(SSH2_MSG_USERAUTH_REQUEST);
- packet_put_cstring(authctxt->server_user);
- packet_put_cstring(authctxt->service);
- packet_put_cstring(authctxt->method->name);
- packet_send();
- return 1;
-
-}
-
-int
-userauth_passwd(Authctxt *authctxt)
-{
- static int attempt = 0;
- char prompt[150];
- char *password;
-
- if (attempt++ >= options.number_of_password_prompts)
- return 0;
-
- if (attempt != 1)
- error("Permission denied, please try again.");
-
- snprintf(prompt, sizeof(prompt), gettext("%.30s@%.128s's password: "),
- authctxt->server_user, authctxt->host);
- password = read_passphrase(prompt, 0);
- packet_start(SSH2_MSG_USERAUTH_REQUEST);
- packet_put_cstring(authctxt->server_user);
- packet_put_cstring(authctxt->service);
- packet_put_cstring(authctxt->method->name);
- packet_put_char(0);
- packet_put_cstring(password);
- memset(password, 0, strlen(password));
- xfree(password);
- packet_add_padding(64);
- packet_send();
-
- dispatch_set(SSH2_MSG_USERAUTH_PASSWD_CHANGEREQ,
- &input_userauth_passwd_changereq);
-
- return 1;
-}
-/*
- * parse PASSWD_CHANGEREQ, prompt user and send SSH2_MSG_USERAUTH_REQUEST
- */
-void
-input_userauth_passwd_changereq(int type, u_int32_t seqnr, void *ctxt)
-{
- Authctxt *authctxt = ctxt;
- char *info, *lang, *password = NULL, *retype = NULL;
- char prompt[150];
-
- debug2("input_userauth_passwd_changereq");
-
- if (authctxt == NULL)
- fatal("input_userauth_passwd_changereq: "
- "no authentication context");
-
- info = packet_get_utf8_string(NULL);
- if (strlen(info) != 0) {
- info = g11n_filter_string(info);
- log("%s", info);
- }
- xfree(info);
- lang = packet_get_string(NULL);
- xfree(lang);
-
- packet_start(SSH2_MSG_USERAUTH_REQUEST);
- packet_put_cstring(authctxt->server_user);
- packet_put_cstring(authctxt->service);
- packet_put_cstring(authctxt->method->name);
- packet_put_char(1); /* additional info */
- snprintf(prompt, sizeof(prompt),
- gettext("Enter %.30s@%.128s's old password: "),
- authctxt->server_user, authctxt->host);
- password = read_passphrase(prompt, 0);
- packet_put_cstring(password);
- memset(password, 0, strlen(password));
- xfree(password);
- password = NULL;
- while (password == NULL) {
- snprintf(prompt, sizeof(prompt),
- gettext("Enter %.30s@%.128s's new password: "),
- authctxt->server_user, authctxt->host);
- password = read_passphrase(prompt, RP_ALLOW_EOF);
- if (password == NULL) {
- /* bail out */
- return;
- }
- snprintf(prompt, sizeof(prompt),
- gettext("Retype %.30s@%.128s's new password: "),
- authctxt->server_user, authctxt->host);
- retype = read_passphrase(prompt, 0);
- if (strcmp(password, retype) != 0) {
- memset(password, 0, strlen(password));
- xfree(password);
- log("Mismatch; try again, EOF to quit.");
- password = NULL;
- }
- memset(retype, 0, strlen(retype));
- xfree(retype);
- }
- packet_put_cstring(password);
- memset(password, 0, strlen(password));
- xfree(password);
- packet_add_padding(64);
- packet_send();
-
- dispatch_set(SSH2_MSG_USERAUTH_PASSWD_CHANGEREQ,
- &input_userauth_passwd_changereq);
-}
-
-static void
-clear_auth_state(Authctxt *authctxt)
-{
- /* XXX clear authentication state */
- dispatch_set(SSH2_MSG_USERAUTH_PASSWD_CHANGEREQ, NULL);
-#ifdef GSSAPI
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_RESPONSE,NULL);
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_TOKEN,NULL);
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_ERROR,NULL);
-#endif /* GSSAPI */
-
- if (authctxt->last_key != NULL && authctxt->last_key_hint == -1) {
- debug3("clear_auth_state: key_free %p", authctxt->last_key);
- key_free(authctxt->last_key);
- }
- authctxt->last_key = NULL;
- authctxt->last_key_hint = -2;
- authctxt->last_key_sign = NULL;
-}
-
-static int
-sign_and_send_pubkey(Authctxt *authctxt, Key *k, sign_cb_fn *sign_callback)
-{
- Buffer b;
- u_char *blob, *signature;
- u_int bloblen, slen;
- int skip = 0;
- int ret = -1;
- int have_sig = 1;
-
- debug3("sign_and_send_pubkey");
-
- if (key_to_blob(k, &blob, &bloblen) == 0) {
- /* we cannot handle this key */
- debug3("sign_and_send_pubkey: cannot handle key");
- return 0;
- }
- /* data to be signed */
- buffer_init(&b);
- if (datafellows & SSH_OLD_SESSIONID) {
- buffer_append(&b, session_id2, session_id2_len);
- skip = session_id2_len;
- } else {
- buffer_put_string(&b, session_id2, session_id2_len);
- skip = buffer_len(&b);
- }
- buffer_put_char(&b, SSH2_MSG_USERAUTH_REQUEST);
- buffer_put_cstring(&b, authctxt->server_user);
- buffer_put_cstring(&b,
- datafellows & SSH_BUG_PKSERVICE ?
- "ssh-userauth" :
- authctxt->service);
- if (datafellows & SSH_BUG_PKAUTH) {
- buffer_put_char(&b, have_sig);
- } else {
- buffer_put_cstring(&b, authctxt->method->name);
- buffer_put_char(&b, have_sig);
- buffer_put_cstring(&b, key_ssh_name(k));
- }
- buffer_put_string(&b, blob, bloblen);
-
- /* generate signature */
- ret = (*sign_callback)(authctxt, k, &signature, &slen,
- buffer_ptr(&b), buffer_len(&b));
- if (ret == -1) {
- xfree(blob);
- buffer_free(&b);
- return 0;
- }
-#ifdef DEBUG_PK
- buffer_dump(&b);
-#endif
- if (datafellows & SSH_BUG_PKSERVICE) {
- buffer_clear(&b);
- buffer_append(&b, session_id2, session_id2_len);
- skip = session_id2_len;
- buffer_put_char(&b, SSH2_MSG_USERAUTH_REQUEST);
- buffer_put_cstring(&b, authctxt->server_user);
- buffer_put_cstring(&b, authctxt->service);
- buffer_put_cstring(&b, authctxt->method->name);
- buffer_put_char(&b, have_sig);
- if (!(datafellows & SSH_BUG_PKAUTH))
- buffer_put_cstring(&b, key_ssh_name(k));
- buffer_put_string(&b, blob, bloblen);
- }
- xfree(blob);
-
- /* append signature */
- buffer_put_string(&b, signature, slen);
- xfree(signature);
-
- /* skip session id and packet type */
- if (buffer_len(&b) < skip + 1)
- fatal("userauth_pubkey: internal error");
- buffer_consume(&b, skip + 1);
-
- /* put remaining data from buffer into packet */
- packet_start(SSH2_MSG_USERAUTH_REQUEST);
- packet_put_raw(buffer_ptr(&b), buffer_len(&b));
- buffer_free(&b);
- packet_send();
-
- return 1;
-}
-
-static int
-send_pubkey_test(Authctxt *authctxt, Key *k, sign_cb_fn *sign_callback,
- int hint)
-{
- u_char *blob;
- u_int bloblen, have_sig = 0;
-
- debug3("send_pubkey_test");
-
- if (key_to_blob(k, &blob, &bloblen) == 0) {
- /* we cannot handle this key */
- debug3("send_pubkey_test: cannot handle key");
- return 0;
- }
- /* register callback for USERAUTH_PK_OK message */
- authctxt->last_key_sign = sign_callback;
- authctxt->last_key_hint = hint;
- authctxt->last_key = k;
- dispatch_set(SSH2_MSG_USERAUTH_PK_OK, &input_userauth_pk_ok);
-
- packet_start(SSH2_MSG_USERAUTH_REQUEST);
- packet_put_cstring(authctxt->server_user);
- packet_put_cstring(authctxt->service);
- packet_put_cstring(authctxt->method->name);
- packet_put_char(have_sig);
- if (!(datafellows & SSH_BUG_PKAUTH))
- packet_put_cstring(key_ssh_name(k));
- packet_put_string(blob, bloblen);
- xfree(blob);
- packet_send();
- return 1;
-}
-
-static Key *
-load_identity_file(char *filename)
-{
- Key *private;
- char prompt[300], *passphrase;
- int quit, i;
- struct stat st;
-
- if (stat(filename, &st) < 0) {
- debug3("no such identity: %s", filename);
- return NULL;
- }
- private = key_load_private_type(KEY_UNSPEC, filename, "", NULL);
- if (private == NULL) {
- if (options.batch_mode)
- return NULL;
- snprintf(prompt, sizeof prompt,
- gettext("Enter passphrase for key '%.100s': "), filename);
- for (i = 0; i < options.number_of_password_prompts; i++) {
- passphrase = read_passphrase(prompt, 0);
- if (strcmp(passphrase, "") != 0) {
- private = key_load_private_type(KEY_UNSPEC, filename,
- passphrase, NULL);
- quit = 0;
- } else {
- debug2("no passphrase given, try next key");
- quit = 1;
- }
- memset(passphrase, 0, strlen(passphrase));
- xfree(passphrase);
- if (private != NULL || quit)
- break;
- debug2("bad passphrase given, try again...");
- }
- }
- return private;
-}
-
-static int
-identity_sign_cb(Authctxt *authctxt, Key *key, u_char **sigp, u_int *lenp,
- u_char *data, u_int datalen)
-{
- Key *private;
- int idx, ret;
-
- idx = authctxt->last_key_hint;
- if (idx < 0)
- return -1;
-
- /* private key is stored in external hardware */
- if (options.identity_keys[idx]->flags & KEY_FLAG_EXT)
- return key_sign(options.identity_keys[idx], sigp, lenp, data, datalen);
-
- private = load_identity_file(options.identity_files[idx]);
- if (private == NULL)
- return -1;
- ret = key_sign(private, sigp, lenp, data, datalen);
- key_free(private);
- return ret;
-}
-
-static int
-agent_sign_cb(Authctxt *authctxt, Key *key, u_char **sigp, u_int *lenp,
- u_char *data, u_int datalen)
-{
- return ssh_agent_sign(authctxt->agent, key, sigp, lenp, data, datalen);
-}
-
-static int
-key_sign_cb(Authctxt *authctxt, Key *key, u_char **sigp, u_int *lenp,
- u_char *data, u_int datalen)
-{
- return key_sign(key, sigp, lenp, data, datalen);
-}
-
-static int
-userauth_pubkey_agent(Authctxt *authctxt)
-{
- static int called = 0;
- int ret = 0;
- char *comment;
- Key *k;
-
- if (called == 0) {
- if (ssh_get_num_identities(authctxt->agent, 2) == 0)
- debug2("userauth_pubkey_agent: no keys at all");
- called = 1;
- }
- k = ssh_get_next_identity(authctxt->agent, &comment, 2);
- if (k == NULL) {
- debug2("userauth_pubkey_agent: no more keys");
- } else {
- debug("Offering agent key: %s", comment);
- xfree(comment);
- ret = send_pubkey_test(authctxt, k, agent_sign_cb, -1);
- if (ret == 0)
- key_free(k);
- }
- if (ret == 0)
- debug2("userauth_pubkey_agent: no message sent");
- return ret;
-}
-
-int
-userauth_pubkey(Authctxt *authctxt)
-{
- static int idx = 0;
- int sent = 0;
- Key *key;
- char *filename;
-
- if (authctxt->agent != NULL) {
- do {
- sent = userauth_pubkey_agent(authctxt);
- } while (!sent && authctxt->agent->howmany > 0);
- }
- while (!sent && idx < options.num_identity_files) {
- key = options.identity_keys[idx];
- filename = options.identity_files[idx];
- if (key == NULL) {
- debug("Trying private key: %s", filename);
- key = load_identity_file(filename);
- if (key != NULL) {
- sent = sign_and_send_pubkey(authctxt, key,
- key_sign_cb);
- key_free(key);
- }
- } else if (key->type != KEY_RSA1) {
- debug("Trying public key: %s", filename);
- sent = send_pubkey_test(authctxt, key,
- identity_sign_cb, idx);
- }
- idx++;
- }
- return sent;
-}
-
-/*
- * Send userauth request message specifying keyboard-interactive method.
- */
-int
-userauth_kbdint(Authctxt *authctxt)
-{
- static int attempt = 0;
-
- if (attempt++ >= options.number_of_password_prompts)
- return 0;
- /* disable if no SSH2_MSG_USERAUTH_INFO_REQUEST has been seen */
- if (attempt > 1 && !authctxt->info_req_seen) {
- debug3("userauth_kbdint: disable: no info_req_seen");
- dispatch_set(SSH2_MSG_USERAUTH_INFO_REQUEST, NULL);
- return 0;
- }
-
- debug2("userauth_kbdint");
- packet_start(SSH2_MSG_USERAUTH_REQUEST);
- packet_put_cstring(authctxt->server_user);
- packet_put_cstring(authctxt->service);
- packet_put_cstring(authctxt->method->name);
- packet_put_cstring(""); /* lang */
- packet_put_cstring(options.kbd_interactive_devices ?
- options.kbd_interactive_devices : "");
- packet_send();
-
- dispatch_set(SSH2_MSG_USERAUTH_INFO_REQUEST, &input_userauth_info_req);
- return 1;
-}
-
-/*
- * parse INFO_REQUEST, prompt user and send INFO_RESPONSE
- */
-void
-input_userauth_info_req(int type, u_int32_t seq, void *ctxt)
-{
- Authctxt *authctxt = ctxt;
- char *name, *inst, *lang, *prompt, *response;
- u_int num_prompts, i;
- int echo = 0;
-
- debug2("input_userauth_info_req");
-
- if (authctxt == NULL)
- fatal("input_userauth_info_req: no authentication context");
-
- authctxt->info_req_seen = 1;
-
- /*
- * We assume that ASCII is used for user name although it is defined
- * by the protocol to use UTF-8 encoding. Therefore, we don't perform
- * code conversion from UTF-8 to native codeset for the user name
- * string.
- */
- name = packet_get_string(NULL);
- inst = packet_get_utf8_string(NULL);
- lang = packet_get_string(NULL);
- if (strlen(name) != 0) {
- name = g11n_filter_string(name);
- log("%s", name);
- }
- if (strlen(inst) != 0) {
- inst = g11n_filter_string(inst);
- log("%s", inst);
- }
- xfree(name);
- xfree(inst);
- xfree(lang);
-
- num_prompts = packet_get_int();
- /*
- * Begin to build info response packet based on prompts requested.
- * We commit to providing the correct number of responses, so if
- * further on we run into a problem that prevents this, we have to
- * be sure and clean this up and send a correct error response.
- */
- packet_start(SSH2_MSG_USERAUTH_INFO_RESPONSE);
- packet_put_int(num_prompts);
-
- debug2("input_userauth_info_req: num_prompts %d", num_prompts);
- for (i = 0; i < num_prompts; i++) {
- prompt = packet_get_utf8_string(NULL);
- echo = packet_get_char();
-
- prompt = g11n_filter_string(prompt);
- response = read_passphrase(prompt, echo ? RP_ECHO : 0);
-
- /*
- * We assume that ASCII is used for password as well. We
- * don't perform code conversion from native codeset to
- * UTF-8 for the password string.
- */
- packet_put_cstring(response);
- memset(response, 0, strlen(response));
- xfree(response);
- xfree(prompt);
- }
- packet_check_eom(); /* done with parsing incoming message. */
-
- packet_add_padding(64);
- packet_send();
-}
-
-static int
-ssh_keysign(Key *key, u_char **sigp, u_int *lenp,
- u_char *data, u_int datalen)
-{
- Buffer b;
- struct stat st;
- pid_t pid;
- int to[2], from[2], status, version = 2;
-
- debug2("ssh_keysign called");
-
- if (stat(_PATH_SSH_KEY_SIGN, &st) < 0) {
- error("ssh_keysign: no installed: %s", strerror(errno));
- return -1;
- }
- if (fflush(stdout) != 0)
- error("ssh_keysign: fflush: %s", strerror(errno));
- if (pipe(to) < 0) {
- error("ssh_keysign: pipe: %s", strerror(errno));
- return -1;
- }
- if (pipe(from) < 0) {
- error("ssh_keysign: pipe: %s", strerror(errno));
- return -1;
- }
- if ((pid = fork()) < 0) {
- error("ssh_keysign: fork: %s", strerror(errno));
- return -1;
- }
- if (pid == 0) {
- seteuid(getuid());
- setuid(getuid());
- close(from[0]);
- if (dup2(from[1], STDOUT_FILENO) < 0)
- fatal("ssh_keysign: dup2: %s", strerror(errno));
- close(to[1]);
- if (dup2(to[0], STDIN_FILENO) < 0)
- fatal("ssh_keysign: dup2: %s", strerror(errno));
- close(from[1]);
- close(to[0]);
- execl(_PATH_SSH_KEY_SIGN, _PATH_SSH_KEY_SIGN, (char *) 0);
- fatal("ssh_keysign: exec(%s): %s", _PATH_SSH_KEY_SIGN,
- strerror(errno));
- }
- close(from[1]);
- close(to[0]);
-
- buffer_init(&b);
- buffer_put_int(&b, packet_get_connection_in()); /* send # of socket */
- buffer_put_string(&b, data, datalen);
- ssh_msg_send(to[1], version, &b);
-
- if (ssh_msg_recv(from[0], &b) < 0) {
- error("ssh_keysign: no reply");
- buffer_clear(&b);
- return -1;
- }
- close(from[0]);
- close(to[1]);
-
- while (waitpid(pid, &status, 0) < 0)
- if (errno != EINTR)
- break;
-
- if (buffer_get_char(&b) != version) {
- error("ssh_keysign: bad version");
- buffer_clear(&b);
- return -1;
- }
- *sigp = buffer_get_string(&b, lenp);
- buffer_clear(&b);
-
- return 0;
-}
-
-int
-userauth_hostbased(Authctxt *authctxt)
-{
- Key *private = NULL;
- Sensitive *sensitive = authctxt->sensitive;
- Buffer b;
- u_char *signature, *blob;
- char *chost, *pkalg, *p;
- const char *service;
- u_int blen, slen;
- int ok, i, len, found = 0;
- static int last_hostkey = -1;
-
- /* check for a useful key */
- for (i = 0; i < sensitive->nkeys; i++) {
- private = sensitive->keys[i];
- if (private && private->type != KEY_RSA1 && i > last_hostkey) {
- found = 1;
- last_hostkey = i;
- /* we take and free the key */
- sensitive->keys[i] = NULL;
- break;
- }
- }
- if (!found) {
- debug("No more client hostkeys for hostbased authentication");
- return 0;
- }
- if (key_to_blob(private, &blob, &blen) == 0) {
- key_free(private);
- return 0;
- }
- /* figure out a name for the client host */
- p = get_local_name(packet_get_connection_in());
- if (p == NULL) {
- error("userauth_hostbased: cannot get local ipaddr/name");
- key_free(private);
- return 0;
- }
-
- service = datafellows & SSH_BUG_HBSERVICE ? "ssh-userauth" :
- authctxt->service;
- pkalg = xstrdup(key_ssh_name(private));
-
- len = strlen(p) + 2;
- chost = xmalloc(len);
- strlcpy(chost, p, len);
- strlcat(chost, ".", len);
- xfree(p);
- debug2("userauth_hostbased: chost %s, pkalg %s", chost, pkalg);
-
- buffer_init(&b);
- /* construct data */
- buffer_put_string(&b, session_id2, session_id2_len);
- buffer_put_char(&b, SSH2_MSG_USERAUTH_REQUEST);
- buffer_put_cstring(&b, authctxt->server_user);
- buffer_put_cstring(&b, service);
- buffer_put_cstring(&b, authctxt->method->name);
- buffer_put_cstring(&b, pkalg);
- buffer_put_string(&b, blob, blen);
- buffer_put_cstring(&b, chost);
- buffer_put_cstring(&b, authctxt->local_user);
-#ifdef DEBUG_PK
- buffer_dump(&b);
-#endif
- if (sensitive->external_keysign)
- ok = ssh_keysign(private, &signature, &slen,
- buffer_ptr(&b), buffer_len(&b));
- else
- ok = key_sign(private, &signature, &slen,
- buffer_ptr(&b), buffer_len(&b));
- key_free(private);
- buffer_free(&b);
- if (ok != 0) {
- error("key_sign failed");
- xfree(chost);
- xfree(pkalg);
- return 0;
- }
- packet_start(SSH2_MSG_USERAUTH_REQUEST);
- packet_put_cstring(authctxt->server_user);
- packet_put_cstring(authctxt->service);
- packet_put_cstring(authctxt->method->name);
- packet_put_cstring(pkalg);
- packet_put_string(blob, blen);
- packet_put_cstring(chost);
- packet_put_cstring(authctxt->local_user);
- packet_put_string(signature, slen);
- memset(signature, 's', slen);
- xfree(signature);
- xfree(chost);
- xfree(pkalg);
-
- packet_send();
- return 1;
-}
-
-/* find auth method */
-
-/*
- * given auth method name, if configurable options permit this method fill
- * in auth_ident field and return true, otherwise return false.
- */
-static int
-authmethod_is_enabled(Authmethod *method)
-{
- if (method == NULL)
- return 0;
- /* return false if options indicate this method is disabled */
- if (method->enabled == NULL || *method->enabled == 0)
- return 0;
- /* return false if batch mode is enabled but method needs interactive mode */
- if (method->batch_flag != NULL && *method->batch_flag != 0)
- return 0;
- return 1;
-}
-
-static Authmethod *
-authmethod_lookup(const char *name)
-{
- Authmethod *method = NULL;
- if (name != NULL) {
- for (method = authmethods; method->name != NULL; method++) {
- if (strcmp(name, method->name) == 0)
- return method;
- }
- }
- debug2("Unrecognized authentication method name: %s", name ? name : "NULL");
- return NULL;
-}
-
-/* XXX internal state */
-static Authmethod *current = NULL;
-static char *supported = NULL;
-static char *preferred = NULL;
-
-/*
- * Given the authentication method list sent by the server, return the
- * next method we should try. If the server initially sends a nil list,
- * use a built-in default list.
- */
-static Authmethod *
-authmethod_get(char *authlist)
-{
- char *name = NULL;
- u_int next;
-
- /* Use a suitable default if we're passed a nil list. */
- if (authlist == NULL || strlen(authlist) == 0)
- authlist = options.preferred_authentications;
-
- if (supported == NULL || strcmp(authlist, supported) != 0) {
- debug3("start over, passed a different list %s", authlist);
- if (supported != NULL)
- xfree(supported);
- supported = xstrdup(authlist);
- preferred = options.preferred_authentications;
- debug3("preferred %s", preferred);
- current = NULL;
- } else if (current != NULL && authmethod_is_enabled(current))
- return current;
-
- for (;;) {
- if ((name = match_list(preferred, supported, &next)) == NULL) {
- debug("No more authentication methods to try.");
- current = NULL;
- return NULL;
- }
- preferred += next;
- debug3("authmethod_lookup %s", name);
- debug3("remaining preferred: %s", preferred);
- if ((current = authmethod_lookup(name)) != NULL &&
- authmethod_is_enabled(current)) {
- debug3("authmethod_is_enabled %s", name);
- debug("Next authentication method: %s", name);
- xfree(name);
- return current;
- }
- xfree(name);
- }
-}
-
-static char *
-authmethods_get(void)
-{
- Authmethod *method = NULL;
- Buffer b;
- char *list;
-
- buffer_init(&b);
- for (method = authmethods; method->name != NULL; method++) {
- if (authmethod_is_enabled(method)) {
- if (buffer_len(&b) > 0)
- buffer_append(&b, ",", 1);
- buffer_append(&b, method->name, strlen(method->name));
- }
- }
- buffer_append(&b, "\0", 1);
- list = xstrdup(buffer_ptr(&b));
- buffer_free(&b);
- return list;
-}
diff --git a/usr/src/cmd/ssh/ssh/sshtty.c b/usr/src/cmd/ssh/ssh/sshtty.c
deleted file mode 100644
index b90ff1d1ad..0000000000
--- a/usr/src/cmd/ssh/ssh/sshtty.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-/*
- * Copyright (c) 2001 Markus Friedl. All rights reserved.
- * Copyright (c) 2001 Kevin Steves. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: sshtty.c,v 1.3 2002/03/04 17:27:39 stevesk Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "sshtty.h"
-#include "log.h"
-
-static struct termios _saved_tio;
-static int _in_raw_mode = 0;
-
-int
-in_raw_mode(void)
-{
- return _in_raw_mode;
-}
-
-struct termios
-get_saved_tio(void)
-{
- return _saved_tio;
-}
-
-void
-leave_raw_mode(void)
-{
- if (!_in_raw_mode)
- return;
- if (tcsetattr(fileno(stdin), TCSADRAIN, &_saved_tio) == -1)
- perror("tcsetattr");
- else
- _in_raw_mode = 0;
-
- fatal_remove_cleanup((void (*) (void *)) leave_raw_mode, NULL);
-}
-
-void
-enter_raw_mode(void)
-{
- struct termios tio;
-
- if (tcgetattr(fileno(stdin), &tio) == -1) {
- perror("tcgetattr");
- return;
- }
- _saved_tio = tio;
- tio.c_iflag |= IGNPAR;
- tio.c_iflag &= ~(ISTRIP | INLCR | IGNCR | ICRNL | IXON | IXANY | IXOFF);
- tio.c_lflag &= ~(ISIG | ICANON | ECHO | ECHOE | ECHOK | ECHONL);
-#ifdef IEXTEN
- tio.c_lflag &= ~IEXTEN;
-#endif
- tio.c_oflag &= ~OPOST;
- tio.c_cc[VMIN] = 1;
- tio.c_cc[VTIME] = 0;
- if (tcsetattr(fileno(stdin), TCSADRAIN, &tio) == -1)
- perror("tcsetattr");
- else
- _in_raw_mode = 1;
-
- fatal_add_cleanup((void (*) (void *)) leave_raw_mode, NULL);
-}
diff --git a/usr/src/cmd/ssh/sshd/Makefile b/usr/src/cmd/ssh/sshd/Makefile
deleted file mode 100644
index 5c4d296bf3..0000000000
--- a/usr/src/cmd/ssh/sshd/Makefile
+++ /dev/null
@@ -1,119 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
-#
-# cmd/ssh/sshd/Makefile
-
-PROG= sshd
-
-DIRS= $(ROOTLIBSSH) $(ROOTLIBSUNSSH)
-
-OBJS = sshd.o \
- altprivsep.o \
- auth.o \
- auth1.o \
- auth2.o \
- auth-options.o \
- auth2-chall.o \
- auth2-gss.o \
- auth2-hostbased.o \
- auth2-kbdint.o \
- auth2-none.o \
- auth2-passwd.o \
- auth2-pam.o \
- auth2-pubkey.o \
- auth-bsdauth.o \
- auth-chall.o \
- auth-rhosts.o \
- auth-krb4.o \
- auth-krb5.o \
- auth-pam.o \
- auth-passwd.o \
- auth-rsa.o \
- auth-rh-rsa.o \
- auth-sia.o \
- auth-skey.o \
- bsmaudit.o \
- groupaccess.o \
- gss-serv.o \
- loginrec.o \
- servconf.o \
- serverloop.o \
- session.o \
- sshlogin.o \
- sshpty.o
-
-EXTOBJS = sftp-server.o
-
-SRCS = $(OBJS:.o=.c) ../sftp-server/sftp-server.c
-
-include ../../Makefile.cmd
-include ../Makefile.ssh-common
-
-LDLIBS += $(SSH_COMMON_LDLIBS) -lsocket \
- -lnsl \
- -lz \
- -lpam \
- -lbsm \
- -lwrap \
- -lcrypto \
- -lgss \
- -lcontract
-MAPFILES = $(MAPFILE.INT) $(MAPFILE.NGB)
-LDFLAGS += $(MAPFILES:%=-M%)
-
-POFILE_DIR= ..
-
-.KEEP_STATE:
-
-.PARALLEL: $(OBJS)
-
-all: $(PROG)
-
-$(PROG): $(OBJS) $(EXTOBJS) $(MAPFILES) ../libssh/$(MACH)/libssh.a \
- ../libopenbsd-compat/$(MACH)/libopenbsd-compat.a
- $(LINK.c) $(OBJS) $(EXTOBJS) -o $@ $(LDLIBS) $(DYNFLAGS)
- $(POST_PROCESS)
-
-%.o : ../sftp-server/%.c
- $(COMPILE.c) -o $@ $<
- $(POST_PROCESS_O)
-
-install: all $(DIRS) $(ROOTLIBSSHPROG) $(ROOTLIBSSH) $(ROOTLIBSUNSSH)
-
-
-$(ROOTLIBSSHPROG)/%: %
- $(INS.file)
-
-$(ROOTLIBSUNSSHPROG)/%: %
- $(INS.file)
-
-$(DIRS):
- $(INS.dir)
-
-clean:
- $(RM) $(OBJS) $(EXTOBJS)
-
-lint: lint_SRCS
-
-include ../Makefile.msg.targ
-include ../../Makefile.targ
diff --git a/usr/src/cmd/ssh/sshd/altprivsep.c b/usr/src/cmd/ssh/sshd/altprivsep.c
deleted file mode 100644
index 6f954feab5..0000000000
--- a/usr/src/cmd/ssh/sshd/altprivsep.c
+++ /dev/null
@@ -1,1187 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- *
- * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- */
-
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/socket.h>
-
-#include <pwd.h>
-
-#include "includes.h"
-#include "atomicio.h"
-#include "auth.h"
-#include "bufaux.h"
-#include "buffer.h"
-#include "cipher.h"
-#include "compat.h"
-#include "dispatch.h"
-#include "getput.h"
-#include "kex.h"
-#include "log.h"
-#include "mac.h"
-#include "packet.h"
-#include "uidswap.h"
-#include "ssh2.h"
-#include "sshlogin.h"
-#include "xmalloc.h"
-#include "altprivsep.h"
-#include "canohost.h"
-#include "engine.h"
-#include "servconf.h"
-
-#ifdef HAVE_BSM
-#include "bsmaudit.h"
-adt_session_data_t *ah = NULL;
-#endif /* HAVE_BSM */
-
-#ifdef GSSAPI
-#include "ssh-gss.h"
-extern Gssctxt *xxx_gssctxt;
-#endif /* GSSAPI */
-
-extern Kex *xxx_kex;
-extern u_char *session_id2;
-extern int session_id2_len;
-
-static Buffer to_monitor;
-static Buffer from_monitor;
-
-/*
- * Sun's Alternative Privilege Separation basics:
- *
- * Abstract
- * --------
- *
- * sshd(1M) fork()s and drops privs in the child while retaining privs
- * in the parent (a.k.a., the monitor). The unprivileged sshd and the
- * monitor talk over a pipe using a simple protocol.
- *
- * The monitor protocol is all about having the monitor carry out the
- * only operations that require privileges OR access to privileged
- * resources. These are: utmpx/wtmpx record keeping, auditing, and
- * SSHv2 re-keying.
- *
- * Re-Keying
- * ---------
- *
- * Re-keying is the only protocol version specific aspect of sshd in
- * which the monitor gets involved.
- *
- * The monitor processes all SSHv2 re-key protocol packets, but the
- * unprivileged sshd process does the transport layer crypto for those
- * packets.
- *
- * The monitor and its unprivileged sshd child process treat
- * SSH_MSG_NEWKEYS SSH2 messages specially: a) the monitor does not call
- * set_newkeys(), but b) the child asks the monitor for the set of
- * negotiated algorithms, key, IV and what not for the relevant
- * transport direction and then calls set_newkeys().
- *
- * Monitor Protocol
- * ----------------
- *
- * Monitor IPC message formats are similar to SSHv2 messages, minus
- * compression, encryption, padding and MACs:
- *
- * - 4 octet message length
- * - message data
- * - 1 octet message type
- * - message data
- *
- * In broad strokes:
- *
- * - IPC: pipe, exit(2)/wait4(2)
- *
- * - threads: the monitor and child are single-threaded
- *
- * - monitor main loop: a variant of server_loop2(), for re-keying only
- * - unpriv child main loop: server_loop2(), as usual
- *
- * - protocol:
- * - key exchange packets are always forwarded as is to the monitor
- * - newkeys, record_login(), record_logout() are special packets
- * using the packet type range reserved for local extensions
- *
- * - the child drops privs and runs like a normal sshd, except that it
- * sets dispatch handlers for key exchange packets that forward the
- * packets to the monitor
- *
- * Event loops:
- *
- * - all monitor protocols are synchronous: because the SSHv2 rekey
- * protocols are synchronous and because the other monitor operations
- * are synchronous (or have no replies),
- *
- * - server_loop2() is modified to check the monitor pipe for rekey
- * packets to forward to the client
- *
- * - and dispatch handlers are set, upon receipt of KEXINIT (and reset
- * when NEWKEYS is sent out) to forward incoming rekey packets to the
- * monitor.
- *
- * - the monitor runs an event loop not unlike server_loop2() and runs
- * key exchanges almost exactly as a pre-altprivsep sshd would
- *
- * - unpriv sshd exit -> monitor cleanup (including audit logout) and exit
- *
- * - fatal() in monitor -> forcibly shutdown() socket and kill/wait for
- * child (so that the audit event for the logout better reflects
- * reality -- i.e., logged out means logged out, but for bg jobs)
- *
- * Message formats:
- *
- * - key exchange packets/replies forwarded "as is"
- *
- * - all other monitor requests are sent as SSH2_PRIV_MSG_ALTPRIVSEP and have a
- * sub-type identifier (one octet)
- * - private request sub-types include:
- * - get new shared secret from last re-key
- * - record login (utmpx/wtmpx), request data contains three arguments:
- * pid, ttyname, program name
- * - record logout (utmpx/wtmpx), request data contains one argument: pid
- *
- * Reply sub-types include:
- *
- * - NOP (for record_login/logout)
- * - new shared secret from last re-key
- */
-
-static int aps_started = 0;
-static int is_monitor = 0;
-
-static pid_t monitor_pid, child_pid;
-static int pipe_fds[2];
-static int pipe_fd = -1;
-static Buffer input_pipe, output_pipe; /* for pipe I/O */
-
-static Authctxt *xxx_authctxt;
-
-/* Monitor functions */
-extern void aps_monitor_loop(Authctxt *authctxt, pid_t child_pid);
-static void aps_record_login(void);
-static void aps_record_logout(void);
-static void aps_start_rekex(void);
-Authctxt *aps_read_auth_context(void);
-
-/* main functions for handling the monitor */
-static pid_t altprivsep_start_monitor(Authctxt **authctxt);
-static void altprivsep_do_monitor(Authctxt *authctxt, pid_t child_pid);
-static int altprivsep_started(void);
-static int altprivsep_is_monitor(void);
-
-/* calls _to_ monitor from unprivileged process */
-static void altprivsep_get_newkeys(enum kex_modes mode);
-
-/* monitor-side fatal_cleanup callbacks */
-static void altprivsep_shutdown_sock(void *arg);
-
-/* Altprivsep packet utilities for communication with the monitor */
-static void altprivsep_packet_start(u_char);
-static int altprivsep_packet_send(void);
-static int altprivsep_fwd_packet(u_char type);
-
-static int altprivsep_packet_read(void);
-static void altprivsep_packet_read_expect(int type);
-
-static void altprivsep_packet_put_char(int ch);
-static void altprivsep_packet_put_int(u_int value);
-static void altprivsep_packet_put_cstring(const char *str);
-static void altprivsep_packet_put_raw(const void *buf, u_int len);
-
-static u_int altprivsep_packet_get_char(void);
-static void *altprivsep_packet_get_raw(u_int *length_ptr);
-static void *altprivsep_packet_get_string(u_int *length_ptr);
-
-Kex *prepare_for_ssh2_kex(void);
-
-/*
- * Start monitor from privileged sshd process.
- *
- * Return values are like fork(2); the parent is the monitor. The caller should
- * fatal() on error.
- *
- * Note that the monitor waits until the still privileged child finishes the
- * authentication. The child drops its privileges after the authentication.
- */
-static pid_t
-altprivsep_start_monitor(Authctxt **authctxt)
-{
- pid_t pid;
- int junk;
-
- if (aps_started)
- fatal("Monitor startup failed: missing state");
-
- buffer_init(&output_pipe);
- buffer_init(&input_pipe);
-
- if (pipe(pipe_fds) != 0) {
- error("Monitor startup failure: could not create pipes: %s",
- strerror(errno));
- return (-1);
- }
-
- (void) fcntl(pipe_fds[0], F_SETFD, FD_CLOEXEC);
- (void) fcntl(pipe_fds[1], F_SETFD, FD_CLOEXEC);
-
- monitor_pid = getpid();
-
- if ((pid = fork()) > 0) {
- /*
- * From now on, all debug messages from monitor will have prefix
- * "monitor "
- */
- set_log_txt_prefix("monitor ");
- (void) prepare_for_ssh2_kex();
- packet_set_server();
- /* parent */
- child_pid = pid;
-
- debug2("Monitor pid %ld, unprivileged child pid %ld",
- monitor_pid, child_pid);
-
- (void) close(pipe_fds[1]);
- pipe_fd = pipe_fds[0];
-
- /*
- * Signal readiness of the monitor and then read the
- * authentication context from the child.
- */
- (void) write(pipe_fd, &pid, sizeof (pid));
- packet_set_monitor(pipe_fd);
- xxx_authctxt = *authctxt = aps_read_auth_context();
-
- if (fcntl(pipe_fd, F_SETFL, O_NONBLOCK) < 0)
- error("fcntl O_NONBLOCK: %.100s", strerror(errno));
-
- aps_started = 1;
- is_monitor = 1;
-
- debug2("Monitor started");
-
- return (pid);
- }
-
- if (pid < 0) {
- debug2("Monitor startup failure: could not fork unprivileged"
- " process: %s", strerror(errno));
- return (pid);
- }
-
- /* this is the child that will later drop privileges */
-
- /* note that Solaris has bi-directional pipes so one pipe is enough */
- (void) close(pipe_fds[0]);
- pipe_fd = pipe_fds[1];
-
- /* wait for monitor to be ready */
- debug2("Waiting for monitor");
- (void) read(pipe_fd, &junk, sizeof (junk));
- debug2("Monitor signalled readiness");
-
- buffer_init(&to_monitor);
- buffer_init(&from_monitor);
-
- /* AltPrivSep interfaces are set up */
- aps_started = 1;
- return (pid);
-}
-
-int
-altprivsep_get_pipe_fd(void)
-{
- return (pipe_fd);
-}
-
-/*
- * This function is used in the unprivileged child for all packets in the range
- * between SSH2_MSG_KEXINIT and SSH2_MSG_TRANSPORT_MAX.
- */
-void
-altprivsep_rekey(int type, u_int32_t seq, void *ctxt)
-{
- Kex *kex = (Kex *)ctxt;
-
- if (kex == NULL)
- fatal("Missing key exchange context in unprivileged process");
-
- if (type != SSH2_MSG_NEWKEYS) {
- debug2("Forwarding re-key packet (%d) to monitor", type);
- if (!altprivsep_fwd_packet(type))
- fatal("altprivsep_rekey: Monitor not responding");
- }
-
- /* tell server_loop2() that we're re-keying */
- kex->done = 0;
-
- /* NEWKEYS is special: get the new keys for client->server direction */
- if (type == SSH2_MSG_NEWKEYS) {
- debug2("received SSH2_MSG_NEWKEYS packet - "
- "getting new inbound keys from the monitor");
- altprivsep_get_newkeys(MODE_IN);
- kex->done = 1;
- }
-}
-
-void
-altprivsep_process_input(fd_set *rset)
-{
- void *data;
- int type;
- u_int dlen;
-
- if (pipe_fd == -1)
- return;
-
- if (!FD_ISSET(pipe_fd, rset))
- return;
-
- debug2("reading from pipe to monitor (%d)", pipe_fd);
- if ((type = altprivsep_packet_read()) == -1)
- fatal("altprivsep_process_input: Monitor not responding");
-
- if (!compat20)
- return; /* shouldn't happen! but be safe */
-
- if (type == 0)
- return; /* EOF -- nothing to do here */
-
- if (type >= SSH2_MSG_MAX)
- fatal("Received garbage from monitor");
-
- debug2("Read packet type %d from pipe to monitor", (u_int)type);
-
- if (type == SSH2_PRIV_MSG_ALTPRIVSEP)
- return; /* shouldn't happen! */
-
- /* NEWKEYS is special: get the new keys for server->client direction */
- if (type == SSH2_MSG_NEWKEYS) {
- debug2("forwarding SSH2_MSG_NEWKEYS packet we got from monitor to "
- "the client");
- packet_start(SSH2_MSG_NEWKEYS);
- packet_send();
- debug2("getting new outbound keys from the monitor");
- altprivsep_get_newkeys(MODE_OUT);
- return;
- }
-
- data = altprivsep_packet_get_raw(&dlen);
-
- packet_start((u_char)type);
-
- if (data != NULL && dlen > 0)
- packet_put_raw(data, dlen);
-
- packet_send();
-}
-
-static void
-altprivsep_do_monitor(Authctxt *authctxt, pid_t child_pid)
-{
- aps_monitor_loop(authctxt, child_pid);
-}
-
-static int
-altprivsep_started(void)
-{
- return (aps_started);
-}
-
-static int
-altprivsep_is_monitor(void)
-{
- return (is_monitor);
-}
-
-/*
- * A fatal cleanup function to forcibly shutdown the connection socket
- */
-static void
-altprivsep_shutdown_sock(void *arg)
-{
- int sock;
-
- if (arg == NULL)
- return;
-
- sock = *(int *)arg;
-
- (void) shutdown(sock, SHUT_RDWR);
-}
-
-/* Calls _to_ monitor from unprivileged process */
-static int
-altprivsep_fwd_packet(u_char type)
-{
- u_int len;
- void *data;
-
- altprivsep_packet_start(type);
- data = packet_get_raw(&len);
- altprivsep_packet_put_raw(data, len);
-
- /* packet_send()s any replies from the monitor to the client */
- return (altprivsep_packet_send());
-}
-
-extern Newkeys *current_keys[MODE_MAX];
-
-/* To be called from packet.c:set_newkeys() before referencing current_keys */
-static void
-altprivsep_get_newkeys(enum kex_modes mode)
-{
- Newkeys *newkeys;
- Comp *comp;
- Enc *enc;
- Mac *mac;
- u_int len;
-
- if (!altprivsep_started())
- return;
-
- if (altprivsep_is_monitor())
- return; /* shouldn't happen */
-
- /* request new keys */
- altprivsep_packet_start(SSH2_PRIV_MSG_ALTPRIVSEP);
- altprivsep_packet_put_char(APS_MSG_NEWKEYS_REQ);
- altprivsep_packet_put_int((u_int)mode);
- altprivsep_packet_send();
- altprivsep_packet_read_expect(SSH2_PRIV_MSG_ALTPRIVSEP);
- if (altprivsep_packet_get_char() != APS_MSG_NEWKEYS_REP)
- fatal("Received garbage from monitor during re-keying");
-
- newkeys = xmalloc(sizeof (*newkeys));
- memset(newkeys, 0, sizeof (*newkeys));
-
- enc = &newkeys->enc;
- mac = &newkeys->mac;
- comp = &newkeys->comp;
-
- /* Cipher name, key, IV */
- enc->name = altprivsep_packet_get_string(NULL);
- if ((enc->cipher = cipher_by_name(enc->name)) == NULL)
- fatal("Monitor negotiated an unknown cipher during re-key");
-
- enc->key = altprivsep_packet_get_string(&enc->key_len);
- enc->iv = altprivsep_packet_get_string(&enc->block_size);
-
- /* MAC name */
- mac->name = altprivsep_packet_get_string(NULL);
- if (mac_setup(mac, mac->name) < 0)
- fatal("Monitor negotiated an unknown MAC algorithm "
- "during re-key");
-
- mac->key = altprivsep_packet_get_string(&len);
- if (len > mac->key_len)
- fatal("%s: bad mac key length: %d > %d", __func__, len,
- mac->key_len);
-
- /* Compression algorithm name */
- comp->name = altprivsep_packet_get_string(NULL);
- if (strcmp(comp->name, "zlib") != 0 && strcmp(comp->name, "none") != 0)
- fatal("Monitor negotiated an unknown compression "
- "algorithm during re-key");
-
- comp->type = 0;
- comp->enabled = 0; /* forces compression re-init, as per-spec */
- if (strcmp(comp->name, "zlib") == 0)
- comp->type = 1;
-
- /*
- * Now install new keys
- *
- * For now abuse kex.c/packet.c non-interfaces. Someday, when
- * the many internal interfaces are parametrized, made reentrant
- * and thread-safe, made more consistent, and when necessary-but-
- * currently-missing interfaces are added then this bit of
- * ugliness can be revisited.
- *
- * The ugliness is in the set_newkeys(), its name and the lack
- * of a (Newkeys *) parameter, which forces us to pass the
- * newkeys through current_keys[mode]. But this saves us some
- * lines of code for now, though not comments.
- *
- * Also, we've abused, in the code above, knowledge of what
- * set_newkeys() expects the current_keys[mode] to contain.
- */
- current_keys[mode] = newkeys;
- set_newkeys(mode);
-
-}
-
-void
-altprivsep_record_login(pid_t pid, const char *ttyname)
-{
- altprivsep_packet_start(SSH2_PRIV_MSG_ALTPRIVSEP);
- altprivsep_packet_put_char(APS_MSG_RECORD_LOGIN);
- altprivsep_packet_put_int(pid);
- altprivsep_packet_put_cstring(ttyname);
- altprivsep_packet_send();
- altprivsep_packet_read_expect(SSH2_PRIV_MSG_ALTPRIVSEP);
-}
-
-void
-altprivsep_record_logout(pid_t pid)
-{
- altprivsep_packet_start(SSH2_PRIV_MSG_ALTPRIVSEP);
- altprivsep_packet_put_char(APS_MSG_RECORD_LOGOUT);
- altprivsep_packet_put_int(pid);
- altprivsep_packet_send();
- altprivsep_packet_read_expect(SSH2_PRIV_MSG_ALTPRIVSEP);
-}
-
-void
-altprivsep_start_rekex(void)
-{
- altprivsep_packet_start(SSH2_PRIV_MSG_ALTPRIVSEP);
- altprivsep_packet_put_char(APS_MSG_START_REKEX);
- altprivsep_packet_send();
- altprivsep_packet_read_expect(SSH2_PRIV_MSG_ALTPRIVSEP);
-}
-
-/*
- * The monitor needs some information that its child learns during the
- * authentication process. Since the child was forked before the key exchange
- * and authentication started it must send some context to the monitor after the
- * authentication is finished. Less obvious part - monitor needs the session ID
- * since it is used in the key generation process after the key (re-)exchange is
- * finished.
- */
-void
-altprivsep_send_auth_context(Authctxt *authctxt)
-{
- debug("sending auth context to the monitor");
- altprivsep_packet_start(SSH2_PRIV_MSG_ALTPRIVSEP);
- altprivsep_packet_put_char(APS_MSG_AUTH_CONTEXT);
- altprivsep_packet_put_int(authctxt->pw->pw_uid);
- altprivsep_packet_put_int(authctxt->pw->pw_gid);
- altprivsep_packet_put_cstring(authctxt->pw->pw_name);
- altprivsep_packet_put_raw(session_id2, session_id2_len);
- debug("will send %d bytes of auth context to the monitor",
- buffer_len(&to_monitor));
- altprivsep_packet_send();
- altprivsep_packet_read_expect(SSH2_PRIV_MSG_ALTPRIVSEP);
-}
-
-static void aps_send_newkeys(void);
-
-/* Monitor side dispatch handler for SSH2_PRIV_MSG_ALTPRIVSEP */
-/* ARGSUSED */
-void
-aps_input_altpriv_msg(int type, u_int32_t seq, void *ctxt)
-{
- u_char req_type;
-
- req_type = packet_get_char();
-
- switch (req_type) {
- case APS_MSG_NEWKEYS_REQ:
- aps_send_newkeys();
- break;
- case APS_MSG_RECORD_LOGIN:
- aps_record_login();
- break;
- case APS_MSG_RECORD_LOGOUT:
- aps_record_logout();
- break;
- case APS_MSG_START_REKEX:
- aps_start_rekex();
- break;
- default:
- break;
- }
-}
-
-/* Monitor-side handlers for APS_MSG_* */
-static
-void
-aps_send_newkeys(void)
-{
- Newkeys *newkeys;
- Enc *enc;
- Mac *mac;
- Comp *comp;
- enum kex_modes mode;
-
- /* get direction for which newkeys are wanted */
- mode = (enum kex_modes) packet_get_int();
- packet_check_eom();
-
- /* get those newkeys */
- newkeys = kex_get_newkeys(mode);
- enc = &newkeys->enc;
- mac = &newkeys->mac;
- comp = &newkeys->comp;
-
- /*
- * Negotiated algorithms, client->server and server->client, for
- * cipher, mac and compression.
- */
- packet_start(SSH2_PRIV_MSG_ALTPRIVSEP);
- packet_put_char(APS_MSG_NEWKEYS_REP);
- packet_put_cstring(enc->name);
- packet_put_string(enc->key, enc->key_len);
- packet_put_string(enc->iv, enc->block_size);
- packet_put_cstring(mac->name);
- packet_put_string(mac->key, mac->key_len);
- packet_put_cstring(comp->name);
-
- packet_send();
- free_keys(newkeys);
-}
-
-struct _aps_login_rec {
- pid_t lr_pid;
- char *lr_tty;
- struct _aps_login_rec *next;
-};
-
-typedef struct _aps_login_rec aps_login_rec;
-
-static aps_login_rec *aps_login_list = NULL;
-
-static
-void
-aps_record_login(void)
-{
- aps_login_rec *new_rec;
- struct stat sbuf;
- size_t proc_path_len;
- char *proc_path;
-
- new_rec = xmalloc(sizeof (aps_login_rec));
- memset(new_rec, 0, sizeof (aps_login_rec));
-
- new_rec->lr_pid = packet_get_int();
- new_rec->lr_tty = packet_get_string(NULL);
-
- proc_path_len = snprintf(NULL, 0, "/proc/%d", new_rec->lr_pid);
- proc_path = xmalloc(proc_path_len + 1);
- (void) snprintf(proc_path, proc_path_len + 1, "/proc/%d",
- new_rec->lr_pid);
-
- if (stat(proc_path, &sbuf) ||
- sbuf.st_uid != xxx_authctxt->pw->pw_uid ||
- stat(new_rec->lr_tty, &sbuf) < 0 ||
- sbuf.st_uid != xxx_authctxt->pw->pw_uid) {
- debug2("Spurious record_login request from unprivileged sshd");
- xfree(proc_path);
- xfree(new_rec->lr_tty);
- xfree(new_rec);
- return;
- }
-
- /* Insert new record on list */
- new_rec->next = aps_login_list;
- aps_login_list = new_rec;
-
- record_login(new_rec->lr_pid, new_rec->lr_tty, NULL,
- xxx_authctxt->user);
-
- packet_start(SSH2_PRIV_MSG_ALTPRIVSEP);
- packet_send();
-
- xfree(proc_path);
-}
-
-static
-void
-aps_record_logout(void)
-{
- aps_login_rec **p, *q;
- pid_t pid;
-
- pid = packet_get_int();
- packet_check_eom();
-
- for (p = &aps_login_list; *p != NULL; p = &q->next) {
- q = *p;
- if (q->lr_pid == pid) {
- record_logout(q->lr_pid, q->lr_tty, NULL,
- xxx_authctxt->user);
-
- /* dequeue */
- *p = q->next;
- xfree(q->lr_tty);
- xfree(q);
- break;
- }
- }
-
- packet_start(SSH2_PRIV_MSG_ALTPRIVSEP);
- packet_send();
-}
-
-static
-void
-aps_start_rekex(void)
-{
- /*
- * Send confirmation. We could implement it without that but it doesn't
- * bring any harm to do that and we are consistent with other subtypes
- * of our private SSH2_PRIV_MSG_ALTPRIVSEP message type.
- */
- packet_start(SSH2_PRIV_MSG_ALTPRIVSEP);
- packet_send();
-
- /*
- * KEX_INIT message could be the one that reached the limit. In that
- * case, it was already forwarded to us from the unnprivileged child,
- * and maybe even acted upon. Obviously we must not send another
- * KEX_INIT message.
- */
- if (!(xxx_kex->flags & KEX_INIT_SENT))
- kex_send_kexinit(xxx_kex);
- else
- debug2("rekeying already in progress");
-}
-
-/*
- * This is the monitor side of altprivsep_send_auth_context().
- */
-Authctxt *
-aps_read_auth_context(void)
-{
- unsigned char *tmp;
- Authctxt *authctxt;
-
- /*
- * After the successful authentication we get the context. Getting
- * end-of-file means that authentication failed and we can exit as well.
- */
- debug("reading the context from the child");
- packet_read_expect(SSH2_PRIV_MSG_ALTPRIVSEP);
- debug3("got SSH2_PRIV_MSG_ALTPRIVSEP");
- if (packet_get_char() != APS_MSG_AUTH_CONTEXT) {
- fatal("APS_MSG_AUTH_CONTEXT message subtype expected.");
- }
-
- authctxt = xcalloc(1, sizeof(Authctxt));
- authctxt->pw = xcalloc(1, sizeof(struct passwd));
-
- /* uid_t and gid_t are integers (UNIX spec) */
- authctxt->pw->pw_uid = packet_get_int();
- authctxt->pw->pw_gid = packet_get_int();
- authctxt->pw->pw_name = packet_get_string(NULL);
- authctxt->user = xstrdup(authctxt->pw->pw_name);
- debug3("uid/gid/username %d/%d/%s", authctxt->pw->pw_uid,
- authctxt->pw->pw_gid, authctxt->user);
- session_id2 = (unsigned char *)packet_get_raw((unsigned int*)&session_id2_len);
-
- /* we don't have this for SSH1. In that case, session_id2_len is 0. */
- if (session_id2_len > 0) {
- tmp = (unsigned char *)xmalloc(session_id2_len);
- memcpy(tmp, session_id2, session_id2_len);
- session_id2 = tmp;
- debug3("read session ID (%d B)", session_id2_len);
- xxx_kex->session_id = tmp;
- xxx_kex->session_id_len = session_id2_len;
- }
- debug("finished reading the context");
-
- /* send confirmation */
- packet_start(SSH2_PRIV_MSG_ALTPRIVSEP);
- packet_send();
-
- return (authctxt);
-}
-
-
-/* Utilities for communication with the monitor */
-static void
-altprivsep_packet_start(u_char type)
-{
- buffer_clear(&to_monitor);
- buffer_put_char(&to_monitor, type);
-}
-
-static void
-altprivsep_packet_put_char(int ch)
-{
- buffer_put_char(&to_monitor, ch);
-}
-
-static void
-altprivsep_packet_put_int(u_int value)
-{
- buffer_put_int(&to_monitor, value);
-}
-
-static void
-altprivsep_packet_put_cstring(const char *str)
-{
- buffer_put_cstring(&to_monitor, str);
-}
-
-static void
-altprivsep_packet_put_raw(const void *buf, u_int len)
-{
- buffer_append(&to_monitor, buf, len);
-}
-
-/*
- * Send a monitor packet to the monitor. This function is blocking.
- *
- * Returns -1 if the monitor pipe has been closed earlier, fatal()s if
- * there's any other problems.
- */
-static int
-altprivsep_packet_send(void)
-{
- ssize_t len;
- u_int32_t plen; /* packet length */
- u_char plen_buf[sizeof (plen)];
- u_char padlen; /* padding length */
- fd_set *setp;
- int err;
-
- if (pipe_fd == -1)
- return (-1);
-
- if ((plen = buffer_len(&to_monitor)) == 0)
- return (0);
-
- /*
- * We talk the SSHv2 binary packet protocol to the monitor,
- * using the none cipher, mac and compression algorithms.
- *
- * But, interestingly, the none cipher has a block size of 8
- * bytes, thus we must pad the packet.
- *
- * Also, encryption includes the packet length, so the padding
- * must account for that field. I.e., (sizeof (packet length) +
- * sizeof (padding length) + packet length + padding length) %
- * block_size must == 0.
- *
- * Also, there must be at least four (4) bytes of padding.
- */
- padlen = (8 - ((plen + sizeof (plen) + sizeof (padlen)) % 8)) % 8;
- if (padlen < 4)
- padlen += 8;
-
- /* packet length counts padding and padding length field */
- plen += padlen + sizeof (padlen);
-
- PUT_32BIT(plen_buf, plen);
-
- setp = xmalloc(howmany(pipe_fd + 1, NFDBITS) * sizeof (fd_mask));
- memset(setp, 0, howmany(pipe_fd + 1, NFDBITS) * sizeof (fd_mask));
- FD_SET(pipe_fd, setp);
-
- while (select(pipe_fd + 1, NULL, setp, NULL, NULL) == -1) {
- if (errno == EAGAIN || errno == EINTR)
- continue;
- else
- goto pipe_gone;
- }
-
- xfree(setp);
-
- /* packet length field */
- len = atomicio(write, pipe_fd, plen_buf, sizeof (plen));
-
- if (len != sizeof (plen))
- goto pipe_gone;
-
- /* padding length field */
- len = atomicio(write, pipe_fd, &padlen, sizeof (padlen));
-
- if (len != sizeof (padlen))
- goto pipe_gone;
-
- len = atomicio(write, pipe_fd, buffer_ptr(&to_monitor), plen - 1);
-
- if (len != (plen - 1))
- goto pipe_gone;
-
- buffer_clear(&to_monitor);
-
- return (1);
-
-pipe_gone:
-
- err = errno;
-
- (void) close(pipe_fd);
-
- pipe_fd = -1;
-
- fatal("altprvsep_packet_send: Monitor not responding: %.100s",
- strerror(err));
-
- /* NOTREACHED */
- return (0);
-}
-
-/*
- * Read a monitor packet from the monitor. This function is blocking.
- */
-static int
-altprivsep_packet_read(void)
-{
- ssize_t len = -1;
- u_int32_t plen;
- u_char plen_buf[sizeof (plen)];
- u_char padlen;
- fd_set *setp;
- int err;
-
- if (pipe_fd == -1)
- return (-1);
-
- setp = xmalloc(howmany(pipe_fd + 1, NFDBITS) * sizeof (fd_mask));
- memset(setp, 0, howmany(pipe_fd + 1, NFDBITS) * sizeof (fd_mask));
- FD_SET(pipe_fd, setp);
-
- while (select(pipe_fd + 1, setp, NULL, NULL, NULL) == -1) {
- if (errno == EAGAIN || errno == EINTR)
- continue;
- else
- goto pipe_gone;
- }
-
- xfree(setp);
-
- /* packet length field */
- len = atomicio(read, pipe_fd, plen_buf, sizeof (plen));
-
- plen = GET_32BIT(plen_buf);
-
- if (len != sizeof (plen))
- goto pipe_gone;
-
- /* padding length field */
- len = atomicio(read, pipe_fd, &padlen, sizeof (padlen));
-
- if (len != sizeof (padlen))
- goto pipe_gone;
-
- plen -= sizeof (padlen);
-
- buffer_clear(&from_monitor);
- buffer_append_space(&from_monitor, plen);
-
- /* packet data + padding */
- len = atomicio(read, pipe_fd, buffer_ptr(&from_monitor), plen);
-
- if (len != plen)
- goto pipe_gone;
-
- /* remove padding */
- if (padlen > 0)
- buffer_consume_end(&from_monitor, padlen);
-
- /* packet type */
- return (buffer_get_char(&from_monitor));
-
-pipe_gone:
-
- err = errno;
-
- (void) close(pipe_fd);
-
- pipe_fd = -1;
-
- if (len < 0)
- fatal("altpriv_packet_read: Monitor not responding %.100s",
- strerror(err));
-
- debug2("Monitor pipe closed by monitor");
- return (0);
-}
-
-static void
-altprivsep_packet_read_expect(int expected)
-{
- int type;
-
- type = altprivsep_packet_read();
-
- if (type <= 0)
- fatal("altprivsep_packet_read_expect: Monitor not responding");
-
- if (type != expected)
- fatal("Protocol error in privilege separation; expected "
- "packet type %d, got %d", expected, type);
-}
-
-static u_int
-altprivsep_packet_get_char(void)
-{
- return (buffer_get_char(&from_monitor));
-}
-void
-*altprivsep_packet_get_raw(u_int *length_ptr)
-{
- if (length_ptr != NULL)
- *length_ptr = buffer_len(&from_monitor);
-
- return (buffer_ptr(&from_monitor));
-}
-void
-*altprivsep_packet_get_string(u_int *length_ptr)
-{
- return (buffer_get_string(&from_monitor, length_ptr));
-}
-
-/*
- * Start and execute the code for the monitor which never returns from this
- * function. The child will return and continue in the caller.
- */
-void
-altprivsep_start_and_do_monitor(int use_engine, int inetd, int newsock,
- int statup_pipe)
-{
- pid_t aps_child;
- Authctxt *authctxt;
-
- /*
- * The monitor will packet_close() in packet_set_monitor() called from
- * altprivsep_start_monitor() below to clean up the socket stuff before
- * it switches to pipes for communication to the child. The socket fd is
- * closed there so we must dup it here - monitor needs that socket to
- * shutdown the connection in case of any problem; see comments below.
- * Note that current newsock was assigned to connection_(in|out) which
- * are the variables used in packet_close() to close the communication
- * socket.
- */
- newsock = dup(newsock);
-
- if ((aps_child = altprivsep_start_monitor(&authctxt)) == -1)
- fatal("Monitor could not be started.");
-
- if (aps_child > 0) {
- /* ALTPRIVSEP Monitor */
-
- /*
- * The ALTPRIVSEP monitor here does:
- *
- * - record keeping and auditing
- * - PAM cleanup
- */
-
- /* this is for MaxStartups and the child takes care of that */
- (void) close(statup_pipe);
- (void) pkcs11_engine_load(use_engine);
-
- /*
- * If the monitor fatal()s it will audit/record a logout, so
- * we'd better do something to really mean it: shutdown the
- * socket but leave the child alone -- it's been disconnected
- * and we hope it exits, but killing any pid from a privileged
- * monitor could be dangerous.
- *
- * NOTE: Order matters -- these fatal cleanups must come before
- * the audit logout fatal cleanup as these functions are called
- * in LIFO.
- */
- fatal_add_cleanup((void (*)(void *))altprivsep_shutdown_sock,
- (void *)&newsock);
-
- if (compat20) {
- debug3("Recording SSHv2 session login in wtmpx");
- /*
- * record_login() relies on connection_in to be the
- * socket to get the peer address. The problem is that
- * connection_in had to be set to the pipe descriptor in
- * altprivsep_start_monitor(). It's not nice but the
- * easiest way to get the peer's address is to
- * temporarily set connection_in to the socket's file
- * descriptor.
- */
- packet_set_fds(inetd == 1 ? -1 : newsock, 0);
- record_login(getpid(), NULL, "sshd", authctxt->user);
- packet_set_fds(0, 1);
- }
-
-#ifdef HAVE_BSM
- /* Initialize the group list, audit sometimes needs it. */
- if (initgroups(authctxt->pw->pw_name,
- authctxt->pw->pw_gid) < 0) {
- perror("initgroups");
- exit (1);
- }
-
- /*
- * The monitor process fork()ed before the authentication
- * process started so at this point we have an unaudited
- * context. Thus we need to obtain the audit session data
- * from the authentication process (aps_child) which will
- * have the correct audit context for the user logging in.
- * To do so we pass along the process-ID of the aps_child
- * process so that it is referenced for this audit session
- * rather than referencing the monitor's unaudited context.
- */
- audit_sshd_login(&ah, aps_child);
-
- fatal_add_cleanup((void (*)(void *))audit_sshd_logout,
- (void *)&ah);
-#endif /* HAVE_BSM */
-
-#ifdef GSSAPI
- fatal_add_cleanup((void (*)(void *))ssh_gssapi_cleanup_creds,
- (void *)&xxx_gssctxt);
-#endif /* GSSAPI */
-
- altprivsep_do_monitor(authctxt, aps_child);
-
- /* If we got here the connection is dead. */
- fatal_remove_cleanup((void (*)(void *))altprivsep_shutdown_sock,
- (void *)&newsock);
-
- if (compat20) {
- debug3("Recording SSHv2 session logout in wtmpx");
- record_logout(getpid(), NULL, "sshd", authctxt->user);
- }
-
- /*
- * Make sure the socket is closed. The monitor can't call
- * packet_close here as it's done a packet_set_connection()
- * with the pipe to the child instead of the socket.
- */
- (void) shutdown(newsock, SHUT_RDWR);
-
-#ifdef GSSAPI
- fatal_remove_cleanup((void (*)(void *))ssh_gssapi_cleanup_creds,
- &xxx_gssctxt);
- ssh_gssapi_cleanup_creds(xxx_gssctxt);
- ssh_gssapi_server_mechs(NULL); /* release cached mechs list */
-#endif /* GSSAPI */
-
-#ifdef HAVE_BSM
- fatal_remove_cleanup((void (*)(void *))audit_sshd_logout, (void *)&ah);
- audit_sshd_logout(&ah);
-#endif /* HAVE_BSM */
-
- exit(0);
- } else {
- /*
- * This is the child, close the dup()ed file descriptor for a
- * socket. It's not needed in the child.
- */
- close(newsock);
- }
-}
diff --git a/usr/src/cmd/ssh/sshd/auth-bsdauth.c b/usr/src/cmd/ssh/sshd/auth-bsdauth.c
deleted file mode 100644
index 090fa0ef39..0000000000
--- a/usr/src/cmd/ssh/sshd/auth-bsdauth.c
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (c) 2001 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#include "includes.h"
-RCSID("$OpenBSD: auth-bsdauth.c,v 1.5 2002/06/30 21:59:45 deraadt Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef BSD_AUTH
-#include "xmalloc.h"
-#include "auth.h"
-#include "log.h"
-
-static void *
-bsdauth_init_ctx(Authctxt *authctxt)
-{
- return authctxt;
-}
-
-int
-bsdauth_query(void *ctx, char **name, char **infotxt,
- u_int *numprompts, char ***prompts, u_int **echo_on)
-{
- Authctxt *authctxt = ctx;
- char *challenge = NULL;
-
- if (authctxt->as != NULL) {
- debug2("bsdauth_query: try reuse session");
- challenge = auth_getitem(authctxt->as, AUTHV_CHALLENGE);
- if (challenge == NULL) {
- auth_close(authctxt->as);
- authctxt->as = NULL;
- }
- }
-
- if (challenge == NULL) {
- debug2("bsdauth_query: new bsd auth session");
- debug3("bsdauth_query: style %s",
- authctxt->style ? authctxt->style : "<default>");
- authctxt->as = auth_userchallenge(authctxt->user,
- authctxt->style, "auth-ssh", &challenge);
- if (authctxt->as == NULL)
- challenge = NULL;
- debug2("bsdauth_query: <%s>", challenge ? challenge : "empty");
- }
-
- if (challenge == NULL)
- return -1;
-
- *name = xstrdup("");
- *infotxt = xstrdup("");
- *numprompts = 1;
- *prompts = xmalloc(*numprompts * sizeof(char *));
- *echo_on = xmalloc(*numprompts * sizeof(u_int));
- (*echo_on)[0] = 0;
- (*prompts)[0] = xstrdup(challenge);
-
- return 0;
-}
-
-int
-bsdauth_respond(void *ctx, u_int numresponses, char **responses)
-{
- Authctxt *authctxt = ctx;
- int authok;
-
- if (authctxt->as == 0)
- error("bsdauth_respond: no bsd auth session");
-
- if (numresponses != 1)
- return -1;
-
- authok = auth_userresponse(authctxt->as, responses[0], 0);
- authctxt->as = NULL;
- debug3("bsdauth_respond: <%s> = <%d>", responses[0], authok);
-
- return (authok == 0) ? -1 : 0;
-}
-
-static void
-bsdauth_free_ctx(void *ctx)
-{
- Authctxt *authctxt = ctx;
-
- if (authctxt && authctxt->as) {
- auth_close(authctxt->as);
- authctxt->as = NULL;
- }
-}
-
-KbdintDevice bsdauth_device = {
- "bsdauth",
- bsdauth_init_ctx,
- bsdauth_query,
- bsdauth_respond,
- bsdauth_free_ctx
-};
-
-KbdintDevice mm_bsdauth_device = {
- "bsdauth",
- bsdauth_init_ctx,
- mm_bsdauth_query,
- mm_bsdauth_respond,
- bsdauth_free_ctx
-};
-#endif
diff --git a/usr/src/cmd/ssh/sshd/auth-chall.c b/usr/src/cmd/ssh/sshd/auth-chall.c
deleted file mode 100644
index 07073f0d98..0000000000
--- a/usr/src/cmd/ssh/sshd/auth-chall.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2001 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: auth-chall.c,v 1.8 2001/05/18 14:13:28 markus Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "auth.h"
-#include "log.h"
-#include "xmalloc.h"
-
-/* limited protocol v1 interface to kbd-interactive authentication */
-
-extern KbdintDevice *devices[];
-static KbdintDevice *device;
-
-char *
-get_challenge(Authctxt *authctxt)
-{
- char *challenge, *name, *info, **prompts;
- u_int i, numprompts;
- u_int *echo_on;
-
- device = devices[0]; /* we always use the 1st device for protocol 1 */
- if (device == NULL)
- return NULL;
- if ((authctxt->kbdintctxt = device->init_ctx(authctxt)) == NULL)
- return NULL;
- if (device->query(authctxt->kbdintctxt, &name, &info,
- &numprompts, &prompts, &echo_on)) {
- device->free_ctx(authctxt->kbdintctxt);
- authctxt->kbdintctxt = NULL;
- return NULL;
- }
- if (numprompts < 1)
- fatal("get_challenge: numprompts < 1");
- challenge = xstrdup(prompts[0]);
- for (i = 0; i < numprompts; i++)
- xfree(prompts[i]);
- xfree(prompts);
- xfree(name);
- xfree(echo_on);
- xfree(info);
-
- return (challenge);
-}
-int
-verify_response(Authctxt *authctxt, const char *response)
-{
- char *resp[1];
- int res;
-
- if (device == NULL)
- return 0;
- if (authctxt->kbdintctxt == NULL)
- return 0;
- resp[0] = (char *)response;
- res = device->respond(authctxt->kbdintctxt, 1, resp);
- device->free_ctx(authctxt->kbdintctxt);
- authctxt->kbdintctxt = NULL;
- return res ? 0 : 1;
-}
diff --git a/usr/src/cmd/ssh/sshd/auth-krb4.c b/usr/src/cmd/ssh/sshd/auth-krb4.c
deleted file mode 100644
index 2a9103f232..0000000000
--- a/usr/src/cmd/ssh/sshd/auth-krb4.c
+++ /dev/null
@@ -1,370 +0,0 @@
-/*
- * Copyright (c) 1999 Dug Song. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: auth-krb4.c,v 1.28 2002/09/26 11:38:43 markus Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "ssh.h"
-#include "ssh1.h"
-#include "packet.h"
-#include "xmalloc.h"
-#include "log.h"
-#include "servconf.h"
-#include "uidswap.h"
-#include "auth.h"
-
-#ifdef AFS
-#include "radix.h"
-#endif
-
-#ifdef KRB4
-extern ServerOptions options;
-
-static int
-krb4_init(void *context)
-{
- static int cleanup_registered = 0;
- Authctxt *authctxt = (Authctxt *)context;
- const char *tkt_root = TKT_ROOT;
- struct stat st;
- int fd;
-
- if (!authctxt->krb4_ticket_file) {
- /* Set unique ticket string manually since we're still root. */
- authctxt->krb4_ticket_file = xmalloc(MAXPATHLEN);
-#ifdef AFS
- if (lstat("/ticket", &st) != -1)
- tkt_root = "/ticket/";
-#endif /* AFS */
- snprintf(authctxt->krb4_ticket_file, MAXPATHLEN, "%s%u_%ld",
- tkt_root, authctxt->pw->pw_uid, (long)getpid());
- krb_set_tkt_string(authctxt->krb4_ticket_file);
- }
- /* Register ticket cleanup in case of fatal error. */
- if (!cleanup_registered) {
- fatal_add_cleanup(krb4_cleanup_proc, authctxt);
- cleanup_registered = 1;
- }
- /* Try to create our ticket file. */
- if ((fd = mkstemp(authctxt->krb4_ticket_file)) != -1) {
- close(fd);
- return (1);
- }
- /* Ticket file exists - make sure user owns it (just passed ticket). */
- if (lstat(authctxt->krb4_ticket_file, &st) != -1) {
- if (st.st_mode == (S_IFREG | S_IRUSR | S_IWUSR) &&
- st.st_uid == authctxt->pw->pw_uid)
- return (1);
- }
- /* Failure - cancel cleanup function, leaving ticket for inspection. */
- log("WARNING: bad ticket file %s", authctxt->krb4_ticket_file);
-
- fatal_remove_cleanup(krb4_cleanup_proc, authctxt);
- cleanup_registered = 0;
-
- xfree(authctxt->krb4_ticket_file);
- authctxt->krb4_ticket_file = NULL;
-
- return (0);
-}
-
-/*
- * try krb4 authentication,
- * return 1 on success, 0 on failure, -1 if krb4 is not available
- */
-int
-auth_krb4_password(Authctxt *authctxt, const char *password)
-{
- AUTH_DAT adata;
- KTEXT_ST tkt;
- struct hostent *hp;
- struct passwd *pw;
- char localhost[MAXHOSTNAMELEN], phost[INST_SZ], realm[REALM_SZ];
- u_int32_t faddr;
- int r;
-
- if ((pw = authctxt->pw) == NULL)
- return (0);
-
- /*
- * Try Kerberos password authentication only for non-root
- * users and only if Kerberos is installed.
- */
- if (pw->pw_uid != 0 && krb_get_lrealm(realm, 1) == KSUCCESS) {
- /* Set up our ticket file. */
- if (!krb4_init(authctxt)) {
- log("Couldn't initialize Kerberos ticket file for %s!",
- pw->pw_name);
- goto failure;
- }
- /* Try to get TGT using our password. */
- r = krb_get_pw_in_tkt((char *) pw->pw_name, "", realm,
- "krbtgt", realm, DEFAULT_TKT_LIFE, (char *)password);
- if (r != INTK_OK) {
- debug("Kerberos v4 password authentication for %s "
- "failed: %s", pw->pw_name, krb_err_txt[r]);
- goto failure;
- }
- /* Successful authentication. */
- chown(tkt_string(), pw->pw_uid, pw->pw_gid);
-
- /*
- * Now that we have a TGT, try to get a local
- * "rcmd" ticket to ensure that we are not talking
- * to a bogus Kerberos server.
- */
- gethostname(localhost, sizeof(localhost));
- strlcpy(phost, (char *)krb_get_phost(localhost),
- sizeof(phost));
- r = krb_mk_req(&tkt, KRB4_SERVICE_NAME, phost, realm, 33);
-
- if (r == KSUCCESS) {
- if ((hp = gethostbyname(localhost)) == NULL) {
- log("Couldn't get local host address!");
- goto failure;
- }
- memmove((void *)&faddr, (void *)hp->h_addr,
- sizeof(faddr));
-
- /* Verify our "rcmd" ticket. */
- r = krb_rd_req(&tkt, KRB4_SERVICE_NAME, phost,
- faddr, &adata, "");
- if (r == RD_AP_UNDEC) {
- /*
- * Probably didn't have a srvtab on
- * localhost. Disallow login.
- */
- log("Kerberos v4 TGT for %s unverifiable, "
- "no srvtab installed? krb_rd_req: %s",
- pw->pw_name, krb_err_txt[r]);
- goto failure;
- } else if (r != KSUCCESS) {
- log("Kerberos v4 %s ticket unverifiable: %s",
- KRB4_SERVICE_NAME, krb_err_txt[r]);
- goto failure;
- }
- } else if (r == KDC_PR_UNKNOWN) {
- /*
- * Disallow login if no rcmd service exists, and
- * log the error.
- */
- log("Kerberos v4 TGT for %s unverifiable: %s; %s.%s "
- "not registered, or srvtab is wrong?", pw->pw_name,
- krb_err_txt[r], KRB4_SERVICE_NAME, phost);
- goto failure;
- } else {
- /*
- * TGT is bad, forget it. Possibly spoofed!
- */
- debug("WARNING: Kerberos v4 TGT possibly spoofed "
- "for %s: %s", pw->pw_name, krb_err_txt[r]);
- goto failure;
- }
- /* Authentication succeeded. */
- return (1);
- } else
- /* Logging in as root or no local Kerberos realm. */
- debug("Unable to authenticate to Kerberos.");
-
- failure:
- krb4_cleanup_proc(authctxt);
-
- if (!options.kerberos_or_local_passwd)
- return (0);
-
- /* Fall back to ordinary passwd authentication. */
- return (-1);
-}
-
-void
-krb4_cleanup_proc(void *context)
-{
- Authctxt *authctxt = (Authctxt *)context;
- debug("krb4_cleanup_proc called");
- if (authctxt->krb4_ticket_file) {
- (void) dest_tkt();
- xfree(authctxt->krb4_ticket_file);
- authctxt->krb4_ticket_file = NULL;
- }
-}
-
-int
-auth_krb4(Authctxt *authctxt, KTEXT auth, char **client, KTEXT reply)
-{
- AUTH_DAT adat = {0};
- Key_schedule schedule;
- struct sockaddr_in local, foreign;
- char instance[INST_SZ];
- socklen_t slen;
- u_int cksum;
- int r, s;
-
- s = packet_get_connection_in();
-
- slen = sizeof(local);
- memset(&local, 0, sizeof(local));
- if (getsockname(s, (struct sockaddr *) & local, &slen) < 0)
- debug("getsockname failed: %.100s", strerror(errno));
- slen = sizeof(foreign);
- memset(&foreign, 0, sizeof(foreign));
- if (getpeername(s, (struct sockaddr *) & foreign, &slen) < 0) {
- debug("getpeername failed: %.100s", strerror(errno));
- fatal_cleanup();
- }
- instance[0] = '*';
- instance[1] = 0;
-
- /* Get the encrypted request, challenge, and session key. */
- if ((r = krb_rd_req(auth, KRB4_SERVICE_NAME, instance,
- 0, &adat, ""))) {
- debug("Kerberos v4 krb_rd_req: %.100s", krb_err_txt[r]);
- return (0);
- }
- des_key_sched((des_cblock *) adat.session, schedule);
-
- *client = xmalloc(MAX_K_NAME_SZ);
- (void) snprintf(*client, MAX_K_NAME_SZ, "%s%s%s@%s", adat.pname,
- *adat.pinst ? "." : "", adat.pinst, adat.prealm);
-
- /* Check ~/.klogin authorization now. */
- if (kuserok(&adat, authctxt->user) != KSUCCESS) {
- log("Kerberos v4 .klogin authorization failed for %s to "
- "account %s", *client, authctxt->user);
- xfree(*client);
- *client = NULL;
- return (0);
- }
- /* Increment the checksum, and return it encrypted with the
- session key. */
- cksum = adat.checksum + 1;
- cksum = htonl(cksum);
-
- /* If we can't successfully encrypt the checksum, we send back an
- empty message, admitting our failure. */
- if ((r = krb_mk_priv((u_char *) & cksum, reply->dat, sizeof(cksum) + 1,
- schedule, &adat.session, &local, &foreign)) < 0) {
- debug("Kerberos v4 mk_priv: (%d) %s", r, krb_err_txt[r]);
- reply->dat[0] = 0;
- reply->length = 0;
- } else
- reply->length = r;
-
- /* Clear session key. */
- memset(&adat.session, 0, sizeof(&adat.session));
- return (1);
-}
-#endif /* KRB4 */
-
-#ifdef AFS
-int
-auth_krb4_tgt(Authctxt *authctxt, const char *string)
-{
- CREDENTIALS creds;
- struct passwd *pw;
-
- if ((pw = authctxt->pw) == NULL)
- goto failure;
-
- temporarily_use_uid(pw);
-
- if (!radix_to_creds(string, &creds)) {
- log("Protocol error decoding Kerberos v4 TGT");
- goto failure;
- }
- if (strncmp(creds.service, "", 1) == 0) /* backward compatibility */
- strlcpy(creds.service, "krbtgt", sizeof creds.service);
-
- if (strcmp(creds.service, "krbtgt")) {
- log("Kerberos v4 TGT (%s%s%s@%s) rejected for %s",
- creds.pname, creds.pinst[0] ? "." : "", creds.pinst,
- creds.realm, pw->pw_name);
- goto failure;
- }
- if (!krb4_init(authctxt))
- goto failure;
-
- if (in_tkt(creds.pname, creds.pinst) != KSUCCESS)
- goto failure;
-
- if (save_credentials(creds.service, creds.instance, creds.realm,
- creds.session, creds.lifetime, creds.kvno, &creds.ticket_st,
- creds.issue_date) != KSUCCESS) {
- debug("Kerberos v4 TGT refused: couldn't save credentials");
- goto failure;
- }
- /* Successful authentication, passed all checks. */
- chown(tkt_string(), pw->pw_uid, pw->pw_gid);
-
- debug("Kerberos v4 TGT accepted (%s%s%s@%s)",
- creds.pname, creds.pinst[0] ? "." : "", creds.pinst, creds.realm);
- memset(&creds, 0, sizeof(creds));
-
- restore_uid();
-
- return (1);
-
- failure:
- krb4_cleanup_proc(authctxt);
- memset(&creds, 0, sizeof(creds));
- restore_uid();
-
- return (0);
-}
-
-int
-auth_afs_token(Authctxt *authctxt, const char *token_string)
-{
- CREDENTIALS creds;
- struct passwd *pw;
- uid_t uid;
-
- if ((pw = authctxt->pw) == NULL)
- return (0);
-
- if (!radix_to_creds(token_string, &creds)) {
- log("Protocol error decoding AFS token");
- return (0);
- }
- if (strncmp(creds.service, "", 1) == 0) /* backward compatibility */
- strlcpy(creds.service, "afs", sizeof creds.service);
-
- if (strncmp(creds.pname, "AFS ID ", 7) == 0)
- uid = atoi(creds.pname + 7);
- else
- uid = pw->pw_uid;
-
- if (kafs_settoken(creds.realm, uid, &creds)) {
- log("AFS token (%s@%s) rejected for %s",
- creds.pname, creds.realm, pw->pw_name);
- memset(&creds, 0, sizeof(creds));
- return (0);
- }
- debug("AFS token accepted (%s@%s)", creds.pname, creds.realm);
- memset(&creds, 0, sizeof(creds));
-
- return (1);
-}
-#endif /* AFS */
diff --git a/usr/src/cmd/ssh/sshd/auth-krb5.c b/usr/src/cmd/ssh/sshd/auth-krb5.c
deleted file mode 100644
index 3f5416af41..0000000000
--- a/usr/src/cmd/ssh/sshd/auth-krb5.c
+++ /dev/null
@@ -1,407 +0,0 @@
-/*
- * Kerberos v5 authentication and ticket-passing routines.
- *
- * $FreeBSD: src/crypto/openssh/auth-krb5.c,v 1.6 2001/02/13 16:58:04 assar Exp $
- */
-/*
- * Copyright (c) 2002 Daniel Kouril. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: auth-krb5.c,v 1.9 2002/09/09 06:48:06 itojun Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "ssh.h"
-#include "ssh1.h"
-#include "packet.h"
-#include "xmalloc.h"
-#include "log.h"
-#include "servconf.h"
-#include "uidswap.h"
-#include "auth.h"
-
-#ifdef KRB5
-#include <krb5.h>
-#ifndef HEIMDAL
-#define krb5_get_err_text(context,code) error_message(code)
-#endif /* !HEIMDAL */
-
-extern ServerOptions options;
-
-static int
-krb5_init(void *context)
-{
- Authctxt *authctxt = (Authctxt *)context;
- krb5_error_code problem;
- static int cleanup_registered = 0;
-
- if (authctxt->krb5_ctx == NULL) {
- problem = krb5_init_context(&authctxt->krb5_ctx);
- if (problem)
- return (problem);
- krb5_init_ets(authctxt->krb5_ctx);
- }
- if (!cleanup_registered) {
- fatal_add_cleanup(krb5_cleanup_proc, authctxt);
- cleanup_registered = 1;
- }
- return (0);
-}
-
-/*
- * Try krb5 authentication. server_user is passed for logging purposes
- * only, in auth is received ticket, in client is returned principal
- * from the ticket
- */
-int
-auth_krb5(Authctxt *authctxt, krb5_data *auth, char **client, krb5_data *reply)
-{
- krb5_error_code problem;
- krb5_principal server;
- krb5_ticket *ticket;
- int fd, ret;
-
- ret = 0;
- server = NULL;
- ticket = NULL;
- reply->length = 0;
-
- problem = krb5_init(authctxt);
- if (problem)
- goto err;
-
- problem = krb5_auth_con_init(authctxt->krb5_ctx,
- &authctxt->krb5_auth_ctx);
- if (problem)
- goto err;
-
- fd = packet_get_connection_in();
-#ifdef HEIMDAL
- problem = krb5_auth_con_setaddrs_from_fd(authctxt->krb5_ctx,
- authctxt->krb5_auth_ctx, &fd);
-#else
- problem = krb5_auth_con_genaddrs(authctxt->krb5_ctx,
- authctxt->krb5_auth_ctx,fd,
- KRB5_AUTH_CONTEXT_GENERATE_REMOTE_FULL_ADDR |
- KRB5_AUTH_CONTEXT_GENERATE_LOCAL_FULL_ADDR);
-#endif
- if (problem)
- goto err;
-
- problem = krb5_sname_to_principal(authctxt->krb5_ctx, NULL, NULL ,
- KRB5_NT_SRV_HST, &server);
- if (problem)
- goto err;
-
- problem = krb5_rd_req(authctxt->krb5_ctx, &authctxt->krb5_auth_ctx,
- auth, server, NULL, NULL, &ticket);
- if (problem)
- goto err;
-
-#ifdef HEIMDAL
- problem = krb5_copy_principal(authctxt->krb5_ctx, ticket->client,
- &authctxt->krb5_user);
-#else
- problem = krb5_copy_principal(authctxt->krb5_ctx,
- ticket->enc_part2->client,
- &authctxt->krb5_user);
-#endif
- if (problem)
- goto err;
-
- /* if client wants mutual auth */
- problem = krb5_mk_rep(authctxt->krb5_ctx, authctxt->krb5_auth_ctx,
- reply);
- if (problem)
- goto err;
-
- /* Check .k5login authorization now. */
- if (!krb5_kuserok(authctxt->krb5_ctx, authctxt->krb5_user,
- authctxt->pw->pw_name))
- goto err;
-
- if (client)
- krb5_unparse_name(authctxt->krb5_ctx, authctxt->krb5_user,
- client);
-
- ret = 1;
- err:
- if (server)
- krb5_free_principal(authctxt->krb5_ctx, server);
- if (ticket)
- krb5_free_ticket(authctxt->krb5_ctx, ticket);
- if (!ret && reply->length) {
- xfree(reply->data);
- memset(reply, 0, sizeof(*reply));
- }
-
- if (problem) {
- if (authctxt->krb5_ctx != NULL)
- debug("Kerberos v5 authentication failed: %s",
- krb5_get_err_text(authctxt->krb5_ctx, problem));
- else
- debug("Kerberos v5 authentication failed: %d",
- problem);
- }
-
- return (ret);
-}
-
-int
-auth_krb5_tgt(Authctxt *authctxt, krb5_data *tgt)
-{
- krb5_error_code problem;
- krb5_ccache ccache = NULL;
- char *pname;
- krb5_creds **creds;
-
- if (authctxt->pw == NULL || authctxt->krb5_user == NULL)
- return (0);
-
- temporarily_use_uid(authctxt->pw);
-
-#ifdef HEIMDAL
- problem = krb5_cc_gen_new(authctxt->krb5_ctx, &krb5_fcc_ops, &ccache);
-#else
-{
- char ccname[40];
- int tmpfd;
-
- snprintf(ccname,sizeof(ccname),"FILE:/tmp/krb5cc_%d_XXXXXX",geteuid());
-
- if ((tmpfd = mkstemp(ccname+strlen("FILE:")))==-1) {
- log("mkstemp(): %.100s", strerror(errno));
- problem = errno;
- goto fail;
- }
- if (fchmod(tmpfd,S_IRUSR | S_IWUSR) == -1) {
- log("fchmod(): %.100s", strerror(errno));
- close(tmpfd);
- problem = errno;
- goto fail;
- }
- close(tmpfd);
- problem = krb5_cc_resolve(authctxt->krb5_ctx, ccname, &ccache);
-}
-#endif
- if (problem)
- goto fail;
-
- problem = krb5_cc_initialize(authctxt->krb5_ctx, ccache,
- authctxt->krb5_user);
- if (problem)
- goto fail;
-
-#ifdef HEIMDAL
- problem = krb5_rd_cred2(authctxt->krb5_ctx, authctxt->krb5_auth_ctx,
- ccache, tgt);
- if (problem)
- goto fail;
-#else
- problem = krb5_rd_cred(authctxt->krb5_ctx, authctxt->krb5_auth_ctx,
- tgt, &creds, NULL);
- if (problem)
- goto fail;
- problem = krb5_cc_store_cred(authctxt->krb5_ctx, ccache, *creds);
- if (problem)
- goto fail;
-#endif
-
- authctxt->krb5_fwd_ccache = ccache;
- ccache = NULL;
-
- authctxt->krb5_ticket_file = (char *)krb5_cc_get_name(authctxt->krb5_ctx, authctxt->krb5_fwd_ccache);
-
- problem = krb5_unparse_name(authctxt->krb5_ctx, authctxt->krb5_user,
- &pname);
- if (problem)
- goto fail;
-
- debug("Kerberos v5 TGT accepted (%s)", pname);
-
- restore_uid();
-
- return (1);
-
- fail:
- if (problem)
- debug("Kerberos v5 TGT passing failed: %s",
- krb5_get_err_text(authctxt->krb5_ctx, problem));
- if (ccache)
- krb5_cc_destroy(authctxt->krb5_ctx, ccache);
-
- restore_uid();
-
- return (0);
-}
-
-int
-auth_krb5_password(Authctxt *authctxt, const char *password)
-{
-#ifndef HEIMDAL
- krb5_creds creds;
- krb5_principal server;
- char ccname[40];
- int tmpfd;
-#endif
- krb5_error_code problem;
-
- if (authctxt->pw == NULL)
- return (0);
-
- temporarily_use_uid(authctxt->pw);
-
- problem = krb5_init(authctxt);
- if (problem)
- goto out;
-
- problem = krb5_parse_name(authctxt->krb5_ctx, authctxt->pw->pw_name,
- &authctxt->krb5_user);
- if (problem)
- goto out;
-
-#ifdef HEIMDAL
- problem = krb5_cc_gen_new(authctxt->krb5_ctx, &krb5_mcc_ops,
- &authctxt->krb5_fwd_ccache);
- if (problem)
- goto out;
-
- problem = krb5_cc_initialize(authctxt->krb5_ctx,
- authctxt->krb5_fwd_ccache, authctxt->krb5_user);
- if (problem)
- goto out;
-
- restore_uid();
- problem = krb5_verify_user(authctxt->krb5_ctx, authctxt->krb5_user,
- authctxt->krb5_fwd_ccache, password, 1, NULL);
- temporarily_use_uid(authctxt->pw);
-
- if (problem)
- goto out;
-
-#else
- problem = krb5_get_init_creds_password(authctxt->krb5_ctx, &creds,
- authctxt->krb5_user, (char *)password, NULL, NULL, 0, NULL, NULL);
- if (problem)
- goto out;
-
- problem = krb5_sname_to_principal(authctxt->krb5_ctx, NULL, NULL,
- KRB5_NT_SRV_HST, &server);
- if (problem)
- goto out;
-
- restore_uid();
- problem = krb5_verify_init_creds(authctxt->krb5_ctx, &creds, server,
- NULL, NULL, NULL);
- krb5_free_principal(authctxt->krb5_ctx, server);
- temporarily_use_uid(authctxt->pw);
- if (problem)
- goto out;
-
- if (!krb5_kuserok(authctxt->krb5_ctx, authctxt->krb5_user,
- authctxt->pw->pw_name)) {
- problem = -1;
- goto out;
- }
-
- snprintf(ccname,sizeof(ccname),"FILE:/tmp/krb5cc_%d_XXXXXX",geteuid());
-
- if ((tmpfd = mkstemp(ccname+strlen("FILE:")))==-1) {
- log("mkstemp(): %.100s", strerror(errno));
- problem = errno;
- goto out;
- }
-
- if (fchmod(tmpfd,S_IRUSR | S_IWUSR) == -1) {
- log("fchmod(): %.100s", strerror(errno));
- close(tmpfd);
- problem = errno;
- goto out;
- }
- close(tmpfd);
-
- problem = krb5_cc_resolve(authctxt->krb5_ctx, ccname, &authctxt->krb5_fwd_ccache);
- if (problem)
- goto out;
-
- problem = krb5_cc_initialize(authctxt->krb5_ctx, authctxt->krb5_fwd_ccache,
- authctxt->krb5_user);
- if (problem)
- goto out;
-
- problem= krb5_cc_store_cred(authctxt->krb5_ctx, authctxt->krb5_fwd_ccache,
- &creds);
- if (problem)
- goto out;
-#endif
-
- authctxt->krb5_ticket_file = (char *)krb5_cc_get_name(authctxt->krb5_ctx, authctxt->krb5_fwd_ccache);
-
- out:
- restore_uid();
-
- if (problem) {
- if (authctxt->krb5_ctx != NULL && problem!=-1)
- debug("Kerberos password authentication failed: %s",
- krb5_get_err_text(authctxt->krb5_ctx, problem));
- else
- debug("Kerberos password authentication failed: %d",
- problem);
-
- krb5_cleanup_proc(authctxt);
-
- if (options.kerberos_or_local_passwd)
- return (-1);
- else
- return (0);
- }
- return (1);
-}
-
-void
-krb5_cleanup_proc(void *context)
-{
- Authctxt *authctxt = (Authctxt *)context;
-
- debug("krb5_cleanup_proc called");
- if (authctxt->krb5_fwd_ccache) {
- krb5_cc_destroy(authctxt->krb5_ctx, authctxt->krb5_fwd_ccache);
- authctxt->krb5_fwd_ccache = NULL;
- }
- if (authctxt->krb5_user) {
- krb5_free_principal(authctxt->krb5_ctx, authctxt->krb5_user);
- authctxt->krb5_user = NULL;
- }
- if (authctxt->krb5_auth_ctx) {
- krb5_auth_con_free(authctxt->krb5_ctx,
- authctxt->krb5_auth_ctx);
- authctxt->krb5_auth_ctx = NULL;
- }
- if (authctxt->krb5_ctx) {
- krb5_free_context(authctxt->krb5_ctx);
- authctxt->krb5_ctx = NULL;
- }
-}
-
-#endif /* KRB5 */
diff --git a/usr/src/cmd/ssh/sshd/auth-options.c b/usr/src/cmd/ssh/sshd/auth-options.c
deleted file mode 100644
index b186cbe045..0000000000
--- a/usr/src/cmd/ssh/sshd/auth-options.c
+++ /dev/null
@@ -1,299 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: auth-options.c,v 1.26 2002/07/30 17:03:55 markus Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "xmalloc.h"
-#include "match.h"
-#include "log.h"
-#include "canohost.h"
-#include "channels.h"
-#include "auth-options.h"
-#include "servconf.h"
-#include "misc.h"
-#include "auth.h"
-
-/* Flags set authorized_keys flags */
-int no_port_forwarding_flag = 0;
-int no_agent_forwarding_flag = 0;
-int no_x11_forwarding_flag = 0;
-int no_pty_flag = 0;
-
-/* "command=" option. */
-char *forced_command = NULL;
-
-/* "environment=" options. */
-struct envstring *custom_environment = NULL;
-
-extern ServerOptions options;
-
-void
-auth_clear_options(void)
-{
- no_agent_forwarding_flag = 0;
- no_port_forwarding_flag = 0;
- no_pty_flag = 0;
- no_x11_forwarding_flag = 0;
- while (custom_environment) {
- struct envstring *ce = custom_environment;
- custom_environment = ce->next;
- xfree(ce->s);
- xfree(ce);
- }
- if (forced_command) {
- xfree(forced_command);
- forced_command = NULL;
- }
- channel_clear_permitted_opens();
- auth_debug_reset();
-}
-
-/*
- * return 1 if access is granted, 0 if not.
- * side effect: sets key option flags
- */
-int
-auth_parse_options(struct passwd *pw, char *opts, char *file, u_long linenum)
-{
- const char *cp;
- int i;
-
- /* reset options */
- auth_clear_options();
-
- if (!opts)
- return 1;
-
- while (*opts && *opts != ' ' && *opts != '\t') {
- cp = "no-port-forwarding";
- if (strncasecmp(opts, cp, strlen(cp)) == 0) {
- auth_debug_add("Port forwarding disabled.");
- no_port_forwarding_flag = 1;
- opts += strlen(cp);
- goto next_option;
- }
- cp = "no-agent-forwarding";
- if (strncasecmp(opts, cp, strlen(cp)) == 0) {
- auth_debug_add("Agent forwarding disabled.");
- no_agent_forwarding_flag = 1;
- opts += strlen(cp);
- goto next_option;
- }
- cp = "no-X11-forwarding";
- if (strncasecmp(opts, cp, strlen(cp)) == 0) {
- auth_debug_add("X11 forwarding disabled.");
- no_x11_forwarding_flag = 1;
- opts += strlen(cp);
- goto next_option;
- }
- cp = "no-pty";
- if (strncasecmp(opts, cp, strlen(cp)) == 0) {
- auth_debug_add("Pty allocation disabled.");
- no_pty_flag = 1;
- opts += strlen(cp);
- goto next_option;
- }
- cp = "command=\"";
- if (strncasecmp(opts, cp, strlen(cp)) == 0) {
- opts += strlen(cp);
- forced_command = xmalloc(strlen(opts) + 1);
- i = 0;
- while (*opts) {
- if (*opts == '"')
- break;
- if (*opts == '\\' && opts[1] == '"') {
- opts += 2;
- forced_command[i++] = '"';
- continue;
- }
- forced_command[i++] = *opts++;
- }
- if (!*opts) {
- debug("%.100s, line %lu: missing end quote",
- file, linenum);
- auth_debug_add("%.100s, line %lu: missing end quote",
- file, linenum);
- xfree(forced_command);
- forced_command = NULL;
- goto bad_option;
- }
- forced_command[i] = 0;
- auth_debug_add("Forced command: %.900s", forced_command);
- opts++;
- goto next_option;
- }
- cp = "environment=\"";
- if (options.permit_user_env &&
- strncasecmp(opts, cp, strlen(cp)) == 0) {
- char *s;
- struct envstring *new_envstring;
-
- opts += strlen(cp);
- s = xmalloc(strlen(opts) + 1);
- i = 0;
- while (*opts) {
- if (*opts == '"')
- break;
- if (*opts == '\\' && opts[1] == '"') {
- opts += 2;
- s[i++] = '"';
- continue;
- }
- s[i++] = *opts++;
- }
- if (!*opts) {
- debug("%.100s, line %lu: missing end quote",
- file, linenum);
- auth_debug_add("%.100s, line %lu: missing end quote",
- file, linenum);
- xfree(s);
- goto bad_option;
- }
- s[i] = 0;
- auth_debug_add("Adding to environment: %.900s", s);
- debug("Adding to environment: %.900s", s);
- opts++;
- new_envstring = xmalloc(sizeof(struct envstring));
- new_envstring->s = s;
- new_envstring->next = custom_environment;
- custom_environment = new_envstring;
- goto next_option;
- }
- cp = "from=\"";
- if (strncasecmp(opts, cp, strlen(cp)) == 0) {
- const char *remote_ip = get_remote_ipaddr();
- const char *remote_host = get_canonical_hostname(
- options.verify_reverse_mapping);
- char *patterns = xmalloc(strlen(opts) + 1);
-
- opts += strlen(cp);
- i = 0;
- while (*opts) {
- if (*opts == '"')
- break;
- if (*opts == '\\' && opts[1] == '"') {
- opts += 2;
- patterns[i++] = '"';
- continue;
- }
- patterns[i++] = *opts++;
- }
- if (!*opts) {
- debug("%.100s, line %lu: missing end quote",
- file, linenum);
- auth_debug_add("%.100s, line %lu: missing end quote",
- file, linenum);
- xfree(patterns);
- goto bad_option;
- }
- patterns[i] = 0;
- opts++;
- if (match_host_and_ip(remote_host, remote_ip,
- patterns) != 1) {
- xfree(patterns);
- log("Authentication tried for %.100s with "
- "correct key but not from a permitted "
- "host (host=%.200s, ip=%.200s).",
- pw->pw_name, remote_host, remote_ip);
- auth_debug_add("Your host '%.200s' is not "
- "permitted to use this key for login.",
- remote_host);
- /* deny access */
- return 0;
- }
- xfree(patterns);
- /* Host name matches. */
- goto next_option;
- }
- cp = "permitopen=\"";
- if (strncasecmp(opts, cp, strlen(cp)) == 0) {
- char host[256], sport[6];
- u_short port;
- char *patterns = xmalloc(strlen(opts) + 1);
-
- opts += strlen(cp);
- i = 0;
- while (*opts) {
- if (*opts == '"')
- break;
- if (*opts == '\\' && opts[1] == '"') {
- opts += 2;
- patterns[i++] = '"';
- continue;
- }
- patterns[i++] = *opts++;
- }
- if (!*opts) {
- debug("%.100s, line %lu: missing end quote",
- file, linenum);
- auth_debug_add("%.100s, line %lu: missing end quote",
- file, linenum);
- xfree(patterns);
- goto bad_option;
- }
- patterns[i] = 0;
- opts++;
- if (sscanf(patterns, "%255[^:]:%5[0-9]", host, sport) != 2 &&
- sscanf(patterns, "%255[^/]/%5[0-9]", host, sport) != 2) {
- debug("%.100s, line %lu: Bad permitopen specification "
- "<%.100s>", file, linenum, patterns);
- auth_debug_add("%.100s, line %lu: "
- "Bad permitopen specification", file, linenum);
- xfree(patterns);
- goto bad_option;
- }
- if ((port = a2port(sport)) == 0) {
- debug("%.100s, line %lu: Bad permitopen port <%.100s>",
- file, linenum, sport);
- auth_debug_add("%.100s, line %lu: "
- "Bad permitopen port", file, linenum);
- xfree(patterns);
- goto bad_option;
- }
- if (options.allow_tcp_forwarding)
- channel_add_permitted_opens(host, port);
- xfree(patterns);
- goto next_option;
- }
-next_option:
- /*
- * Skip the comma, and move to the next option
- * (or break out if there are no more).
- */
- if (!*opts)
- fatal("Bugs in auth-options.c option processing.");
- if (*opts == ' ' || *opts == '\t')
- break; /* End of options. */
- if (*opts != ',')
- goto bad_option;
- opts++;
- /* Process the next option. */
- }
-
- auth_debug_send();
-
- /* grant access */
- return 1;
-
-bad_option:
- log("Bad options in %.100s file, line %lu: %.50s",
- file, linenum, opts);
- auth_debug_add("Bad options in %.100s file, line %lu: %.50s",
- file, linenum, opts);
-
- auth_debug_send();
-
- /* deny access */
- return 0;
-}
diff --git a/usr/src/cmd/ssh/sshd/auth-pam.c b/usr/src/cmd/ssh/sshd/auth-pam.c
deleted file mode 100644
index c3686b4928..0000000000
--- a/usr/src/cmd/ssh/sshd/auth-pam.c
+++ /dev/null
@@ -1,617 +0,0 @@
-/*
- * Copyright (c) 2000 Damien Miller. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
- */
-
-#include "includes.h"
-
-#ifdef USE_PAM
-#include "xmalloc.h"
-#include "log.h"
-#include "auth.h"
-#include "auth-options.h"
-#include "auth-pam.h"
-#include "buffer.h"
-#include "servconf.h"
-#include "canohost.h"
-#include "compat.h"
-#include "misc.h"
-#include "sshlogin.h"
-#include "ssh-gss.h"
-
-#include <security/pam_appl.h>
-
-extern char *__progname;
-
-extern u_int utmp_len;
-extern ServerOptions options;
-
-extern Authmethod method_kbdint;
-
-RCSID("$Id: auth-pam.c,v 1.54 2002/07/28 20:24:08 stevesk Exp $");
-
-#define NEW_AUTHTOK_MSG \
- "Warning: Your password has expired, please change it now."
-
-/* PAM conversation for non-interactive userauth methods */
-static int do_pam_conversation(int num_msg, const struct pam_message **msg,
- struct pam_response **resp, void *appdata_ptr);
-
-static void do_pam_cleanup_proc(void *context);
-
-static char *get_method_name(Authctxt *authctxt);
-
-/* PAM conversation for non-interactive userauth methods */
-static struct pam_conv conv = {
- (int (*)())do_pam_conversation,
- NULL
-};
-static char *__pam_msg = NULL;
-
-static
-char *
-get_method_name(Authctxt *authctxt)
-{
- if (!authctxt)
- return "(unknown)";
-
- if (!compat20)
- return (authctxt->v1_auth_name) ? authctxt->v1_auth_name :
- "(sshv1-unknown)";
-
- if (!authctxt->method || !authctxt->method->name)
- return "(sshv2-unknown)";
-
- return authctxt->method->name;
-}
-
-char *
-derive_pam_service_name(Authmethod *method)
-{
- char *svcname = xmalloc(BUFSIZ);
-
- /*
- * If PamServiceName is set we use that for everything, including
- * SSHv1
- */
- if (options.pam_service_name != NULL) {
- (void) strlcpy(svcname, options.pam_service_name, BUFSIZ);
- return (svcname);
- }
-
- if (compat20 && method) {
- char *method_name = method->name;
-
- if (!method_name)
- fatal("Userauth method unknown while starting PAM");
-
- /*
- * For SSHv2 we use "sshd-<userauth name>
- * The "sshd" prefix can be changed via the PAMServicePrefix
- * sshd_config option.
- */
- if (strcmp(method_name, "none") == 0) {
- snprintf(svcname, BUFSIZ, "%s-none",
- options.pam_service_prefix);
- }
- if (strcmp(method_name, "password") == 0) {
- snprintf(svcname, BUFSIZ, "%s-password",
- options.pam_service_prefix);
- }
- if (strcmp(method_name, "keyboard-interactive") == 0) {
- /* "keyboard-interactive" is too long, shorten it */
- snprintf(svcname, BUFSIZ, "%s-kbdint",
- options.pam_service_prefix);
- }
- if (strcmp(method_name, "publickey") == 0) {
- /* "publickey" is too long, shorten it */
- snprintf(svcname, BUFSIZ, "%s-pubkey",
- options.pam_service_prefix);
- }
- if (strcmp(method_name, "hostbased") == 0) {
- /* "hostbased" can't really be shortened... */
- snprintf(svcname, BUFSIZ, "%s-hostbased",
- options.pam_service_prefix);
- }
- if (strncmp(method_name, "gss", 3) == 0) {
- /* "gss" is too short, elongate it */
- snprintf(svcname, BUFSIZ, "%s-gssapi",
- options.pam_service_prefix);
- }
- return svcname;
- } else {
- /* SSHv1 doesn't get to be so cool */
- snprintf(svcname, BUFSIZ, "%s-v1",
- options.pam_service_prefix);
- }
- return svcname;
-}
-
-void
-new_start_pam(Authctxt *authctxt, struct pam_conv *conv)
-{
- int retval;
- pam_handle_t *pamh;
- const char *rhost;
- char *svc;
- char *user = NULL;
- pam_stuff *pam;
-
- if (authctxt == NULL)
- fatal("Internal error during userauth");
-
- if (compat20 && authctxt->method == NULL)
- fatal("Userauth method unknown while starting PAM");
-
- /* PAM service selected here */
- svc = derive_pam_service_name(authctxt->method);
- debug2("Starting PAM service %s for method %s", svc,
- get_method_name(authctxt));
-
- if (authctxt->user != NULL)
- user = authctxt->user;
-
- /* Cleanup previous PAM state */
- if (authctxt->pam != NULL) {
- fatal_remove_cleanup(&do_pam_cleanup_proc, authctxt->pam);
- do_pam_cleanup_proc(authctxt->pam);
- }
-
- pam = xmalloc(sizeof(pam_stuff));
- (void) memset(pam, 0, sizeof(pam_stuff));
-
- /*
- * pam->last_pam_retval has to be and is considered
- * along with pam->state.
- *
- * pam->state = 0; -> no PAM auth, account, etc, work
- * done yet. (Set by memset() above.)
- *
- * pam->last_pam_retval = PAM_SUCCESS; -> meaningless at
- * this point.
- *
- * See finish_userauth_do_pam() below.
- */
- pam->authctxt = authctxt;
- pam->last_pam_retval = PAM_SUCCESS;
-
- authctxt->pam = pam;
-
- /* Free any previously stored text/error PAM prompts */
- if (__pam_msg) {
- xfree(__pam_msg);
- __pam_msg = NULL;
- }
-
- if ((retval = pam_start(svc, user, conv, &pamh)) != PAM_SUCCESS) {
- fatal("PAM initialization failed during %s userauth",
- get_method_name(authctxt));
- }
-
- free(svc);
-
- fatal_add_cleanup((void (*)(void *)) &do_pam_cleanup_proc,
- (void *) authctxt->pam);
-
- rhost = get_remote_name_or_ip(utmp_len, options.verify_reverse_mapping);
- if ((retval = pam_set_item(pamh, PAM_RHOST, rhost)) != PAM_SUCCESS) {
- (void) pam_end(pamh, retval);
- fatal("Could not set PAM_RHOST item during %s userauth",
- get_method_name(authctxt));
- }
-
- if ((retval = pam_set_item(pamh, PAM_TTY, "sshd")) != PAM_SUCCESS) {
- (void) pam_end(pamh, retval);
- fatal("Could not set PAM_TTY item during %s userauth",
- get_method_name(authctxt));
- }
-
- if (authctxt->cuser != NULL)
- if ((retval = pam_set_item(pamh, PAM_AUSER, authctxt->cuser)) != PAM_SUCCESS) {
- (void) pam_end(pamh, retval);
- fatal("Could not set PAM_AUSER item during %s userauth",
- get_method_name(authctxt));
- }
-
- authctxt->pam->h = pamh;
-}
-
-/*
- * To be called from userauth methods, directly (as in keyboard-interactive) or
- * indirectly (from auth_pam_password() or from do_pam_non_initial_userauth().
- *
- * The caller is responsible for calling new_start_pam() first.
- *
- * PAM state is not cleaned up here on error. This is left to subsequent calls
- * to new_start_pam() or to the cleanup function upon authentication error.
- */
-int
-finish_userauth_do_pam(Authctxt *authctxt)
-{
- int retval;
- char *user, *method;
-
- /* Various checks; fail gracefully */
- if (authctxt == NULL || authctxt->pam == NULL)
- return PAM_SYSTEM_ERR; /* shouldn't happen */
-
- if (compat20) {
- if (authctxt->method == NULL || authctxt->method->name == NULL)
- return PAM_SYSTEM_ERR; /* shouldn't happen */
- method = authctxt->method->name;
- } else if ((method = authctxt->v1_auth_name) == NULL)
- return PAM_SYSTEM_ERR; /* shouldn't happen */
-
- if (AUTHPAM_DONE(authctxt))
- return PAM_SYSTEM_ERR; /* shouldn't happen */
-
- if (!(authctxt->pam->state & PAM_S_DONE_ACCT_MGMT)) {
- retval = pam_acct_mgmt(authctxt->pam->h, 0);
- authctxt->pam->last_pam_retval = retval;
- if (retval == PAM_NEW_AUTHTOK_REQD) {
- userauth_force_kbdint();
- return retval;
- }
- if (retval != PAM_SUCCESS)
- return retval;
- authctxt->pam->state |= PAM_S_DONE_ACCT_MGMT;
- }
-
- /*
- * Handle PAM_USER change, if any.
- *
- * We do this before pam_open_session() because we need the PAM_USER's
- * UID for:
- *
- * a) PermitRootLogin checking
- * b) to get at the lastlog entry before pam_open_session() updates it.
- */
- retval = pam_get_item(authctxt->pam->h, PAM_USER, (void **) &user);
- if (retval != PAM_SUCCESS) {
- fatal("PAM failure: pam_get_item(PAM_USER) "
- "returned %d: %.200s", retval,
- PAM_STRERROR(authctxt->pam->h, retval));
- }
-
- if (user == NULL || *user == '\0') {
- debug("PAM set NULL PAM_USER");
- return PAM_PERM_DENIED;
- }
-
- if (strcmp(user, authctxt->user) != 0) {
- log("PAM changed the SSH username");
- pwfree(&authctxt->pw);
- authctxt->pw = getpwnamallow(user);
- authctxt->valid = (authctxt->pw != NULL);
- xfree(authctxt->user);
- authctxt->user = xstrdup(user);
- }
-
- if (!authctxt->valid) {
- debug2("PAM set PAM_USER to unknown user");
- /*
- * Return success, userauth_finish() will catch
- * this and send back a failure message.
- */
- return PAM_SUCCESS;
- }
-
- /* Check PermitRootLogin semantics */
- if (authctxt->pw->pw_uid == 0 && !auth_root_allowed(method))
- return PAM_PERM_DENIED;
-
- if (!(authctxt->pam->state & PAM_S_DONE_SETCRED)) {
- retval = pam_setcred(authctxt->pam->h,
- PAM_ESTABLISH_CRED);
- authctxt->pam->last_pam_retval = retval;
- if (retval != PAM_SUCCESS)
- return retval;
- authctxt->pam->state |= PAM_S_DONE_SETCRED;
-
-#ifdef GSSAPI
- /*
- * Store GSS-API delegated creds after pam_setcred(), which may
- * have set the current credential store.
- */
- ssh_gssapi_storecreds(NULL, authctxt);
-#endif /* GSSAPI */
- }
-
- /*
- * On Solaris pam_unix_session.so updates the lastlog, but does
- * not converse a PAM_TEXT_INFO message about it. So we need to
- * fetch the lastlog entry here and save it for use later.
- */
- authctxt->last_login_time =
- get_last_login_time(authctxt->pw->pw_uid,
- authctxt->pw->pw_name,
- authctxt->last_login_host,
- sizeof(authctxt->last_login_host));
-
- if (!(authctxt->pam->state & PAM_S_DONE_OPEN_SESSION)) {
- retval = pam_open_session(authctxt->pam->h, 0);
- authctxt->pam->last_pam_retval = retval;
- if (retval != PAM_SUCCESS)
- return retval;
- authctxt->pam->state |= PAM_S_DONE_OPEN_SESSION;
- }
-
- /*
- * All PAM work done successfully.
- *
- * PAM handle stays around so we can call pam_close_session() on
- * it later.
- */
- return PAM_SUCCESS;
-}
-
-/*
- * PAM conversation function for non-interactive userauth methods that
- * really cannot do any prompting. Password userauth and CHANGEREQ can
- * always set the PAM_AUTHTOK and PAM_OLDAUTHTOK items to avoid
- * conversation (and if they do and nonetheless some module tries to
- * converse, then password userauth / CHANGEREQ MUST fail).
- *
- * Except, PAM_TEXT_INFO and PAM_ERROR_MSG prompts can be squirelled
- * away and shown to the user later.
- *
- * Keyboard-interactive userauth has its own much more interesting
- * conversation function.
- *
- */
-static int
-do_pam_conversation(int num_msg, const struct pam_message **msg,
- struct pam_response **resp, void *appdata_ptr)
-{
- struct pam_response *reply;
- int count;
-
- /* PAM will free this later */
- reply = xmalloc(num_msg * sizeof(*reply));
-
- (void) memset(reply, 0, num_msg * sizeof(*reply));
-
- for (count = 0; count < num_msg; count++) {
- /*
- * We can't use stdio yet, queue messages for
- * printing later
- */
- switch(PAM_MSG_MEMBER(msg, count, msg_style)) {
- case PAM_PROMPT_ECHO_ON:
- xfree(reply);
- return PAM_CONV_ERR;
- case PAM_PROMPT_ECHO_OFF:
- xfree(reply);
- return PAM_CONV_ERR;
- break;
- case PAM_ERROR_MSG:
- case PAM_TEXT_INFO:
- if (PAM_MSG_MEMBER(msg, count, msg) != NULL) {
- message_cat(&__pam_msg,
- PAM_MSG_MEMBER(msg, count, msg));
- }
- reply[count].resp = xstrdup("");
- reply[count].resp_retcode = PAM_SUCCESS;
- break;
- default:
- xfree(reply);
- return PAM_CONV_ERR;
- }
- }
-
- *resp = reply;
-
- return PAM_SUCCESS;
-}
-
-/* Called at exit to cleanly shutdown PAM */
-static void
-do_pam_cleanup_proc(void *context)
-{
- int pam_retval;
- pam_stuff *pam = (pam_stuff *) context;
-
- if (pam == NULL)
- return;
-
- if (pam->authctxt != NULL && pam->authctxt->pam == pam) {
- pam->authctxt->pam_retval = pam->last_pam_retval;
- pam->authctxt->pam = NULL;
- pam->authctxt = NULL;
- }
-
- if (pam->h == NULL)
- return;
-
- /*
- * We're in fatal_cleanup() or not in userauth or without a
- * channel -- can't converse now, too bad.
- */
- pam_retval = pam_set_item(pam->h, PAM_CONV, NULL);
- if (pam_retval != PAM_SUCCESS) {
- log("Cannot remove PAM conv, close session or delete creds[%d]: %.200s",
- pam_retval, PAM_STRERROR(pam->h, pam_retval));
- goto cleanup;
- }
-
- if (pam->state & PAM_S_DONE_OPEN_SESSION) {
- pam_retval = pam_close_session(pam->h, 0);
- if (pam_retval != PAM_SUCCESS)
- log("Cannot close PAM session[%d]: %.200s",
- pam_retval, PAM_STRERROR(pam->h, pam_retval));
- }
-
- if (pam->state & PAM_S_DONE_SETCRED) {
- pam_retval = pam_setcred(pam->h, PAM_DELETE_CRED);
- if (pam_retval != PAM_SUCCESS)
- debug("Cannot delete credentials[%d]: %.200s",
- pam_retval, PAM_STRERROR(pam->h, pam_retval));
- }
-
-cleanup:
-
- /* Use the previous PAM result, if not PAM_SUCCESS for pam_end() */
- if (pam->last_pam_retval != PAM_SUCCESS)
- pam_retval = pam_end(pam->h, pam->last_pam_retval);
- else if (pam_retval != PAM_SUCCESS)
- pam_retval = pam_end(pam->h, pam_retval);
- else
- pam_retval = pam_end(pam->h, PAM_ABORT);
-
- if (pam_retval != PAM_SUCCESS)
- log("Cannot release PAM authentication[%d]: %.200s",
- pam_retval, PAM_STRERROR(pam->h, pam_retval));
-
- xfree(pam);
-}
-
-/* Attempt password authentation using PAM */
-int
-auth_pam_password(Authctxt *authctxt, const char *password)
-{
- int retval;
-
- /* Ensure we have a fresh PAM handle / state */
- new_start_pam(authctxt, &conv);
-
- retval = pam_set_item(authctxt->pam->h, PAM_AUTHTOK, password);
- if (retval != PAM_SUCCESS) {
- authctxt->pam->last_pam_retval = retval;
- return 1;
- }
-
- retval = pam_authenticate(authctxt->pam->h,
- options.permit_empty_passwd ? 0 :
- PAM_DISALLOW_NULL_AUTHTOK);
-
- if (retval != PAM_SUCCESS) {
- authctxt->pam->last_pam_retval = retval;
- return 0;
- }
-
- if ((retval = finish_userauth_do_pam(authctxt)) != PAM_SUCCESS)
- return 0;
-
- if (authctxt->method)
- authctxt->method->authenticated = 1; /* SSHv2 */
-
- return 1;
-}
-
-int
-do_pam_non_initial_userauth(Authctxt *authctxt)
-{
- new_start_pam(authctxt, NULL);
- return (finish_userauth_do_pam(authctxt) == PAM_SUCCESS);
-}
-
-/* Cleanly shutdown PAM */
-void finish_pam(Authctxt *authctxt)
-{
- fatal_remove_cleanup(&do_pam_cleanup_proc, authctxt->pam);
- do_pam_cleanup_proc(authctxt->pam);
-}
-
-static
-char **
-find_env(char **env, char *var)
-{
- char **p;
- int len;
-
- if (strchr(var, '=') == NULL)
- len = strlen(var);
- else
- len = (strchr(var, '=') - var) + 1;
-
- for ( p = env ; p != NULL && *p != NULL ; p++ ) {
- if (strncmp(*p, var, len) == 0)
- return (p);
- }
-
- return (NULL);
-}
-
-/* Return list of PAM environment strings */
-char **
-fetch_pam_environment(Authctxt *authctxt)
-{
-#ifdef HAVE_PAM_GETENVLIST
- char **penv;
-
- if (authctxt == NULL || authctxt->pam == NULL ||
- authctxt->pam->h == NULL)
- return (NULL);
-
- penv = pam_getenvlist(authctxt->pam->h);
-
- return (penv);
-#else /* HAVE_PAM_GETENVLIST */
- return(NULL);
-#endif /* HAVE_PAM_GETENVLIST */
-}
-
-void free_pam_environment(char **env)
-{
- int i;
-
- if (env != NULL) {
- for (i = 0; env[i] != NULL; i++)
- xfree(env[i]);
- }
-
- xfree(env);
-}
-
-/* Print any messages that have been generated during authentication */
-/* or account checking to stderr */
-void print_pam_messages(void)
-{
- if (__pam_msg != NULL)
- (void) fputs(__pam_msg, stderr);
-}
-
-/* Append a message to buffer */
-void message_cat(char **p, const char *a)
-{
- char *cp;
- size_t new_len;
-
- new_len = strlen(a);
-
- if (*p) {
- size_t len = strlen(*p);
-
- *p = xrealloc(*p, new_len + len + 2);
- cp = *p + len;
- } else
- *p = cp = xmalloc(new_len + 2);
-
- (void) memcpy(cp, a, new_len);
- cp[new_len] = '\n';
- cp[new_len + 1] = '\0';
-}
-
-#endif /* USE_PAM */
diff --git a/usr/src/cmd/ssh/sshd/auth-passwd.c b/usr/src/cmd/ssh/sshd/auth-passwd.c
deleted file mode 100644
index 815231d4d4..0000000000
--- a/usr/src/cmd/ssh/sshd/auth-passwd.c
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Password authentication. This file contains the functions to check whether
- * the password is valid for the user.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- *
- * Copyright (c) 1999 Dug Song. All rights reserved.
- * Copyright (c) 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: auth-passwd.c,v 1.27 2002/05/24 16:45:16 stevesk Exp $");
-
-#include "packet.h"
-#include "log.h"
-#include "servconf.h"
-#include "auth.h"
-
-#if !defined(USE_PAM) && !defined(HAVE_OSF_SIA)
-/* Don't need any of these headers for the PAM or SIA cases */
-# ifdef HAVE_CRYPT_H
-# include <crypt.h>
-# endif
-# ifdef WITH_AIXAUTHENTICATE
-# include <login.h>
-# endif
-# ifdef __hpux
-# include <hpsecurity.h>
-# include <prot.h>
-# endif
-# if defined(HAVE_SHADOW_H) && !defined(DISABLE_SHADOW)
-# include <shadow.h>
-# endif
-# if defined(HAVE_GETPWANAM) && !defined(DISABLE_SHADOW)
-# include <sys/label.h>
-# include <sys/audit.h>
-# include <pwdadj.h>
-# endif
-# if defined(HAVE_MD5_PASSWORDS) && !defined(HAVE_MD5_CRYPT)
-# include "md5crypt.h"
-# endif /* defined(HAVE_MD5_PASSWORDS) && !defined(HAVE_MD5_CRYPT) */
-
-# ifdef HAVE_CYGWIN
-# undef ERROR
-# include <windows.h>
-# include <sys/cygwin.h>
-# define is_winnt (GetVersion() < 0x80000000)
-# endif
-#endif /* !USE_PAM && !HAVE_OSF_SIA */
-
-extern ServerOptions options;
-#ifdef WITH_AIXAUTHENTICATE
-extern char *aixloginmsg;
-#endif
-
-/*
- * Tries to authenticate the user using password. Returns true if
- * authentication succeeds.
- */
-int
-auth_password(Authctxt *authctxt, const char *password)
-{
-#if defined(USE_PAM)
- if (*password == '\0' && options.permit_empty_passwd == 0)
- return 0;
- return auth_pam_password(authctxt, password);
-#elif defined(HAVE_OSF_SIA)
- if (*password == '\0' && options.permit_empty_passwd == 0)
- return 0;
- return auth_sia_password(authctxt, password);
-#else
- struct passwd * pw = authctxt->pw;
- char *encrypted_password;
- char *pw_password;
- char *salt;
-#if defined(HAVE_SHADOW_H) && !defined(DISABLE_SHADOW)
- struct spwd *spw;
-#endif
-#if defined(HAVE_GETPWANAM) && !defined(DISABLE_SHADOW)
- struct passwd_adjunct *spw;
-#endif
-#ifdef WITH_AIXAUTHENTICATE
- char *authmsg;
- int authsuccess;
- int reenter = 1;
-#endif
-
- /* deny if no user. */
- if (pw == NULL)
- return 0;
-#ifndef HAVE_CYGWIN
- if (pw->pw_uid == 0 && options.permit_root_login != PERMIT_YES)
- return 0;
-#endif
- if (*password == '\0' && options.permit_empty_passwd == 0)
- return 0;
-#ifdef KRB5
- if (options.kerberos_authentication == 1) {
- int ret = auth_krb5_password(authctxt, password);
- if (ret == 1 || ret == 0)
- return ret;
- /* Fall back to ordinary passwd authentication. */
- }
-#endif
-#ifdef HAVE_CYGWIN
- if (is_winnt) {
- HANDLE hToken = cygwin_logon_user(pw, password);
-
- if (hToken == INVALID_HANDLE_VALUE)
- return 0;
- cygwin_set_impersonation_token(hToken);
- return 1;
- }
-#endif
-#ifdef WITH_AIXAUTHENTICATE
- authsuccess = (authenticate(pw->pw_name,password,&reenter,&authmsg) == 0);
-
- if (authsuccess)
- /* We don't have a pty yet, so just label the line as "ssh" */
- if (loginsuccess(authctxt->user,
- get_canonical_hostname(options.verify_reverse_mapping),
- "ssh", &aixloginmsg) < 0)
- aixloginmsg = NULL;
-
- return(authsuccess);
-#endif
-#ifdef KRB4
- if (options.kerberos_authentication == 1) {
- int ret = auth_krb4_password(authctxt, password);
- if (ret == 1 || ret == 0)
- return ret;
- /* Fall back to ordinary passwd authentication. */
- }
-#endif
-#ifdef BSD_AUTH
- if (auth_userokay(pw->pw_name, authctxt->style, "auth-ssh",
- (char *)password) == 0)
- return 0;
- else
- return 1;
-#endif
- pw_password = pw->pw_passwd;
-
- /*
- * Various interfaces to shadow or protected password data
- */
-#if defined(HAVE_SHADOW_H) && !defined(DISABLE_SHADOW)
- spw = getspnam(pw->pw_name);
- if (spw != NULL)
- pw_password = spw->sp_pwdp;
-#endif /* defined(HAVE_SHADOW_H) && !defined(DISABLE_SHADOW) */
-
-#if defined(HAVE_GETPWANAM) && !defined(DISABLE_SHADOW)
- if (issecure() && (spw = getpwanam(pw->pw_name)) != NULL)
- pw_password = spw->pwa_passwd;
-#endif /* defined(HAVE_GETPWANAM) && !defined(DISABLE_SHADOW) */
-
- /* Check for users with no password. */
- if ((password[0] == '\0') && (pw_password[0] == '\0'))
- return 1;
-
- if (pw_password[0] != '\0')
- salt = pw_password;
- else
- salt = "xx";
-
-#ifdef HAVE_MD5_PASSWORDS
- if (is_md5_salt(salt))
- encrypted_password = md5_crypt(password, salt);
- else
- encrypted_password = crypt(password, salt);
-#else /* HAVE_MD5_PASSWORDS */
- encrypted_password = crypt(password, salt);
-#endif /* HAVE_MD5_PASSWORDS */
-
- /* Authentication is accepted if the encrypted passwords are identical. */
- return (strcmp(encrypted_password, pw_password) == 0);
-#endif /* !USE_PAM && !HAVE_OSF_SIA */
-}
diff --git a/usr/src/cmd/ssh/sshd/auth-rh-rsa.c b/usr/src/cmd/ssh/sshd/auth-rh-rsa.c
deleted file mode 100644
index ab10e1738a..0000000000
--- a/usr/src/cmd/ssh/sshd/auth-rh-rsa.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Rhosts or /etc/hosts.equiv authentication combined with RSA host
- * authentication.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: auth-rh-rsa.c,v 1.34 2002/03/25 09:25:06 markus Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "packet.h"
-#include "uidswap.h"
-#include "log.h"
-#include "servconf.h"
-#include "key.h"
-#include "hostfile.h"
-#include "pathnames.h"
-#include "auth.h"
-#include "canohost.h"
-
-/* import */
-extern ServerOptions options;
-
-int
-auth_rhosts_rsa_key_allowed(struct passwd *pw, char *cuser, char *chost,
- Key *client_host_key)
-{
- HostStatus host_status;
-
- /* Check if we would accept it using rhosts authentication. */
- if (!auth_rhosts(pw, cuser))
- return 0;
-
- host_status = check_key_in_hostfiles(pw, client_host_key,
- chost, _PATH_SSH_SYSTEM_HOSTFILE,
- options.ignore_user_known_hosts ? NULL : _PATH_SSH_USER_HOSTFILE);
-
- return (host_status == HOST_OK);
-}
-
-/*
- * Tries to authenticate the user using the .rhosts file and the host using
- * its host key. Returns true if authentication succeeds.
- */
-int
-auth_rhosts_rsa(struct passwd *pw, char *cuser, Key *client_host_key)
-{
- char *chost;
-
- debug("Trying rhosts with RSA host authentication for client user %.100s",
- cuser);
-
- if (pw == NULL || client_host_key == NULL ||
- client_host_key->rsa == NULL)
- return 0;
-
- chost = (char *)get_canonical_hostname(options.verify_reverse_mapping);
- debug("Rhosts RSA authentication: canonical host %.900s", chost);
-
- if (!auth_rhosts_rsa_key_allowed(pw, cuser, chost, client_host_key)) {
- debug("Rhosts with RSA host authentication denied: unknown or invalid host key");
- packet_send_debug("Your host key cannot be verified: unknown or invalid host key.");
- return 0;
- }
- /* A matching host key was found and is known. */
-
- /* Perform the challenge-response dialog with the client for the host key. */
- if (!auth_rsa_challenge_dialog(client_host_key)) {
- log("Client on %.800s failed to respond correctly to host authentication.",
- chost);
- return 0;
- }
- /*
- * We have authenticated the user using .rhosts or /etc/hosts.equiv,
- * and the host using RSA. We accept the authentication.
- */
-
- verbose("Rhosts with RSA host authentication accepted for %.100s, %.100s on %.700s.",
- pw->pw_name, cuser, chost);
- packet_send_debug("Rhosts with RSA host authentication accepted.");
- return 1;
-}
diff --git a/usr/src/cmd/ssh/sshd/auth-rhosts.c b/usr/src/cmd/ssh/sshd/auth-rhosts.c
deleted file mode 100644
index 2326eef8ae..0000000000
--- a/usr/src/cmd/ssh/sshd/auth-rhosts.c
+++ /dev/null
@@ -1,299 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Rhosts authentication. This file contains code to check whether to admit
- * the login based on rhosts authentication. This file also processes
- * /etc/hosts.equiv.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: auth-rhosts.c,v 1.28 2002/05/13 21:26:49 markus Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "packet.h"
-#include "uidswap.h"
-#include "pathnames.h"
-#include "log.h"
-#include "servconf.h"
-#include "canohost.h"
-#include "auth.h"
-
-/* import */
-extern ServerOptions options;
-
-/*
- * This function processes an rhosts-style file (.rhosts, .shosts, or
- * /etc/hosts.equiv). This returns true if authentication can be granted
- * based on the file, and returns zero otherwise.
- */
-
-static int
-check_rhosts_file(const char *filename, const char *hostname,
- const char *ipaddr, const char *client_user,
- const char *server_user)
-{
- FILE *f;
- char buf[1024]; /* Must not be larger than host, user, dummy below. */
-
- /* Open the .rhosts file, deny if unreadable */
- f = fopen(filename, "r");
- if (!f)
- return 0;
-
- while (fgets(buf, sizeof(buf), f)) {
- /* All three must be at least as big as buf to avoid overflows. */
- char hostbuf[1024], userbuf[1024], dummy[1024], *host, *user, *cp;
- int negated;
-
- for (cp = buf; *cp == ' ' || *cp == '\t'; cp++)
- ;
- if (*cp == '#' || *cp == '\n' || !*cp)
- continue;
-
- /*
- * NO_PLUS is supported at least on OSF/1. We skip it (we
- * don't ever support the plus syntax).
- */
- if (strncmp(cp, "NO_PLUS", 7) == 0)
- continue;
-
- /*
- * This should be safe because each buffer is as big as the
- * whole string, and thus cannot be overwritten.
- */
- switch (sscanf(buf, "%1024s %1024s %1024s",
- hostbuf, userbuf, dummy)) {
- case 0:
- auth_debug_add("Found empty line in %.100s.", filename);
- continue;
- case 1:
- /* Host name only. */
- strlcpy(userbuf, server_user, sizeof(userbuf));
- break;
- case 2:
- /* Got both host and user name. */
- break;
- case 3:
- auth_debug_add("Found garbage in %.100s.", filename);
- continue;
- default:
- /* Weird... */
- continue;
- }
-
- host = hostbuf;
- user = userbuf;
- negated = 0;
-
- /* Process negated host names, or positive netgroups. */
- if (host[0] == '-') {
- negated = 1;
- host++;
- } else if (host[0] == '+')
- host++;
-
- if (user[0] == '-') {
- negated = 1;
- user++;
- } else if (user[0] == '+')
- user++;
-
- /* Check for empty host/user names (particularly '+'). */
- if (!host[0] || !user[0]) {
- /* We come here if either was '+' or '-'. */
- auth_debug_add("Ignoring wild host/user names in %.100s.",
- filename);
- continue;
- }
- /* Verify that host name matches. */
- if (host[0] == '@') {
- if (!innetgr(host + 1, hostname, NULL, NULL) &&
- !innetgr(host + 1, ipaddr, NULL, NULL))
- continue;
- } else if (strcasecmp(host, hostname) && strcmp(host, ipaddr) != 0)
- continue; /* Different hostname. */
-
- /* Verify that user name matches. */
- if (user[0] == '@') {
- if (!innetgr(user + 1, NULL, client_user, NULL))
- continue;
- } else if (strcmp(user, client_user) != 0)
- continue; /* Different username. */
-
- /* Found the user and host. */
- fclose(f);
-
- /* If the entry was negated, deny access. */
- if (negated) {
- auth_debug_add("Matched negative entry in %.100s.",
- filename);
- return 0;
- }
- /* Accept authentication. */
- return 1;
- }
-
- /* Authentication using this file denied. */
- fclose(f);
- return 0;
-}
-
-/*
- * Tries to authenticate the user using the .shosts or .rhosts file. Returns
- * true if authentication succeeds. If ignore_rhosts is true, only
- * /etc/hosts.equiv will be considered (.rhosts and .shosts are ignored).
- */
-
-int
-auth_rhosts(struct passwd *pw, const char *client_user)
-{
- const char *hostname, *ipaddr;
-
- hostname = get_canonical_hostname(options.verify_reverse_mapping);
- ipaddr = get_remote_ipaddr();
- return auth_rhosts2(pw, client_user, hostname, ipaddr);
-}
-
-static int
-auth_rhosts2_raw(struct passwd *pw, const char *client_user, const char *hostname,
- const char *ipaddr)
-{
- char buf[1024];
- struct stat st;
- static const char *rhosts_files[] = {".shosts", ".rhosts", NULL};
- u_int rhosts_file_index;
-
- debug2("auth_rhosts2: clientuser %s hostname %s ipaddr %s",
- client_user, hostname, ipaddr);
-
- /* no user given */
- if (pw == NULL)
- return 0;
-
- /* Switch to the user's uid. */
- temporarily_use_uid(pw);
- /*
- * Quick check: if the user has no .shosts or .rhosts files, return
- * failure immediately without doing costly lookups from name
- * servers.
- */
- for (rhosts_file_index = 0; rhosts_files[rhosts_file_index];
- rhosts_file_index++) {
- /* Check users .rhosts or .shosts. */
- snprintf(buf, sizeof buf, "%.500s/%.100s",
- pw->pw_dir, rhosts_files[rhosts_file_index]);
- if (stat(buf, &st) >= 0)
- break;
- }
- /* Switch back to privileged uid. */
- restore_uid();
-
- /* Deny if The user has no .shosts or .rhosts file and there are no system-wide files. */
- if (!rhosts_files[rhosts_file_index] &&
- stat(_PATH_RHOSTS_EQUIV, &st) < 0 &&
- stat(_PATH_SSH_HOSTS_EQUIV, &st) < 0)
- return 0;
-
- /* If not logging in as superuser, try /etc/hosts.equiv and shosts.equiv. */
- if (pw->pw_uid != 0) {
- if (check_rhosts_file(_PATH_RHOSTS_EQUIV, hostname, ipaddr,
- client_user, pw->pw_name)) {
- auth_debug_add("Accepted for %.100s [%.100s] by /etc/hosts.equiv.",
- hostname, ipaddr);
- return 1;
- }
- if (check_rhosts_file(_PATH_SSH_HOSTS_EQUIV, hostname, ipaddr,
- client_user, pw->pw_name)) {
- auth_debug_add("Accepted for %.100s [%.100s] by %.100s.",
- hostname, ipaddr, _PATH_SSH_HOSTS_EQUIV);
- return 1;
- }
- }
- /*
- * Check that the home directory is owned by root or the user, and is
- * not group or world writable.
- */
- if (stat(pw->pw_dir, &st) < 0) {
- log("Rhosts authentication refused for %.100s: "
- "no home directory %.200s", pw->pw_name, pw->pw_dir);
- auth_debug_add("Rhosts authentication refused for %.100s: "
- "no home directory %.200s", pw->pw_name, pw->pw_dir);
- return 0;
- }
- if (options.strict_modes &&
- ((st.st_uid != 0 && st.st_uid != pw->pw_uid) ||
- (st.st_mode & 022) != 0)) {
- log("Rhosts authentication refused for %.100s: "
- "bad ownership or modes for home directory.", pw->pw_name);
- auth_debug_add("Rhosts authentication refused for %.100s: "
- "bad ownership or modes for home directory.", pw->pw_name);
- return 0;
- }
- /* Temporarily use the user's uid. */
- temporarily_use_uid(pw);
-
- /* Check all .rhosts files (currently .shosts and .rhosts). */
- for (rhosts_file_index = 0; rhosts_files[rhosts_file_index];
- rhosts_file_index++) {
- /* Check users .rhosts or .shosts. */
- snprintf(buf, sizeof buf, "%.500s/%.100s",
- pw->pw_dir, rhosts_files[rhosts_file_index]);
- if (stat(buf, &st) < 0)
- continue;
-
- /*
- * Make sure that the file is either owned by the user or by
- * root, and make sure it is not writable by anyone but the
- * owner. This is to help avoid novices accidentally
- * allowing access to their account by anyone.
- */
- if (options.strict_modes &&
- ((st.st_uid != 0 && st.st_uid != pw->pw_uid) ||
- (st.st_mode & 022) != 0)) {
- log("Rhosts authentication refused for %.100s: bad modes for %.200s",
- pw->pw_name, buf);
- auth_debug_add("Bad file modes for %.200s", buf);
- continue;
- }
- /* Check if we have been configured to ignore .rhosts and .shosts files. */
- if (options.ignore_rhosts) {
- auth_debug_add("Server has been configured to ignore %.100s.",
- rhosts_files[rhosts_file_index]);
- continue;
- }
- /* Check if authentication is permitted by the file. */
- if (check_rhosts_file(buf, hostname, ipaddr, client_user, pw->pw_name)) {
- auth_debug_add("Accepted by %.100s.",
- rhosts_files[rhosts_file_index]);
- /* Restore the privileged uid. */
- restore_uid();
- auth_debug_add("Accepted host %s ip %s client_user %s server_user %s",
- hostname, ipaddr, client_user, pw->pw_name);
- return 1;
- }
- }
-
- /* Restore the privileged uid. */
- restore_uid();
- return 0;
-}
-
-int
-auth_rhosts2(struct passwd *pw, const char *client_user, const char *hostname,
- const char *ipaddr)
-{
- int ret;
-
- auth_debug_reset();
- ret = auth_rhosts2_raw(pw, client_user, hostname, ipaddr);
- auth_debug_send();
- return ret;
-}
diff --git a/usr/src/cmd/ssh/sshd/auth-rsa.c b/usr/src/cmd/ssh/sshd/auth-rsa.c
deleted file mode 100644
index 3e0e6ea50d..0000000000
--- a/usr/src/cmd/ssh/sshd/auth-rsa.c
+++ /dev/null
@@ -1,328 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * RSA-based authentication. This code determines whether to admit a login
- * based on RSA authentication. This file also contains functions to check
- * validity of the host key.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: auth-rsa.c,v 1.56 2002/06/10 16:53:06 stevesk Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <openssl/rsa.h>
-#include <openssl/md5.h>
-
-#include "rsa.h"
-#include "packet.h"
-#include "xmalloc.h"
-#include "ssh1.h"
-#include "mpaux.h"
-#include "uidswap.h"
-#include "match.h"
-#include "auth-options.h"
-#include "pathnames.h"
-#include "log.h"
-#include "servconf.h"
-#include "auth.h"
-#include "hostfile.h"
-#include "ssh.h"
-
-/* import */
-extern ServerOptions options;
-
-/*
- * Session identifier that is used to bind key exchange and authentication
- * responses to a particular session.
- */
-extern u_char session_id[16];
-
-/*
- * The .ssh/authorized_keys file contains public keys, one per line, in the
- * following format:
- * options bits e n comment
- * where bits, e and n are decimal numbers,
- * and comment is any string of characters up to newline. The maximum
- * length of a line is 8000 characters. See the documentation for a
- * description of the options.
- */
-
-BIGNUM *
-auth_rsa_generate_challenge(Key *key)
-{
- BIGNUM *challenge;
- BN_CTX *ctx;
-
- if ((challenge = BN_new()) == NULL)
- fatal("auth_rsa_generate_challenge: BN_new() failed");
- /* Generate a random challenge. */
- BN_rand(challenge, 256, 0, 0);
- if ((ctx = BN_CTX_new()) == NULL)
- fatal("auth_rsa_generate_challenge: BN_CTX_new() failed");
- BN_mod(challenge, challenge, key->rsa->n, ctx);
- BN_CTX_free(ctx);
-
- return challenge;
-}
-
-int
-auth_rsa_verify_response(Key *key, BIGNUM *challenge, u_char response[16])
-{
- u_char buf[32], mdbuf[16];
- MD5_CTX md;
- int len;
-
- /* don't allow short keys */
- if (BN_num_bits(key->rsa->n) < SSH_RSA_MINIMUM_MODULUS_SIZE) {
- error("auth_rsa_verify_response: RSA modulus too small: %d < minimum %d bits",
- BN_num_bits(key->rsa->n), SSH_RSA_MINIMUM_MODULUS_SIZE);
- return (0);
- }
-
- /* The response is MD5 of decrypted challenge plus session id. */
- len = BN_num_bytes(challenge);
- if (len <= 0 || len > 32)
- fatal("auth_rsa_verify_response: bad challenge length %d", len);
- memset(buf, 0, 32);
- BN_bn2bin(challenge, buf + 32 - len);
- MD5_Init(&md);
- MD5_Update(&md, buf, 32);
- MD5_Update(&md, session_id, 16);
- MD5_Final(mdbuf, &md);
-
- /* Verify that the response is the original challenge. */
- if (memcmp(response, mdbuf, 16) != 0) {
- /* Wrong answer. */
- return (0);
- }
- /* Correct answer. */
- return (1);
-}
-
-/*
- * Performs the RSA authentication challenge-response dialog with the client,
- * and returns true (non-zero) if the client gave the correct answer to
- * our challenge; returns zero if the client gives a wrong answer.
- */
-
-int
-auth_rsa_challenge_dialog(Key *key)
-{
- BIGNUM *challenge, *encrypted_challenge;
- u_char response[16];
- int i, success;
-
- if ((encrypted_challenge = BN_new()) == NULL)
- fatal("auth_rsa_challenge_dialog: BN_new() failed");
-
- challenge = auth_rsa_generate_challenge(key);
-
- /* Encrypt the challenge with the public key. */
- rsa_public_encrypt(encrypted_challenge, challenge, key->rsa);
-
- /* Send the encrypted challenge to the client. */
- packet_start(SSH_SMSG_AUTH_RSA_CHALLENGE);
- packet_put_bignum(encrypted_challenge);
- packet_send();
- BN_clear_free(encrypted_challenge);
- packet_write_wait();
-
- /* Wait for a response. */
- packet_read_expect(SSH_CMSG_AUTH_RSA_RESPONSE);
- for (i = 0; i < 16; i++)
- response[i] = packet_get_char();
- packet_check_eom();
-
- success = auth_rsa_verify_response(key, challenge, response);
- BN_clear_free(challenge);
- return (success);
-}
-
-/*
- * check if there's user key matching client_n,
- * return key if login is allowed, NULL otherwise
- */
-
-int
-auth_rsa_key_allowed(struct passwd *pw, BIGNUM *client_n, Key **rkey)
-{
- char line[8192], *file;
- int allowed = 0;
- u_int bits;
- FILE *f;
- u_long linenum = 0;
- struct stat st;
- Key *key;
-
- /* Temporarily use the user's uid. */
- temporarily_use_uid(pw);
-
- /* The authorized keys. */
- file = authorized_keys_file(pw);
- debug("trying public RSA key file %s", file);
-
- /* Fail quietly if file does not exist */
- if (stat(file, &st) < 0) {
- /* Restore the privileged uid. */
- restore_uid();
- xfree(file);
- return (0);
- }
- /* Open the file containing the authorized keys. */
- f = fopen(file, "r");
- if (!f) {
- /* Restore the privileged uid. */
- restore_uid();
- xfree(file);
- return (0);
- }
- if (options.strict_modes &&
- secure_filename(f, file, pw, line, sizeof(line)) != 0) {
- xfree(file);
- fclose(f);
- log("Authentication refused: %s", line);
- restore_uid();
- return (0);
- }
-
- /* Flag indicating whether the key is allowed. */
- allowed = 0;
-
- key = key_new(KEY_RSA1);
-
- /*
- * Go though the accepted keys, looking for the current key. If
- * found, perform a challenge-response dialog to verify that the
- * user really has the corresponding private key.
- */
- while (fgets(line, sizeof(line), f)) {
- char *cp;
- char *options;
-
- linenum++;
-
- /* Skip leading whitespace, empty and comment lines. */
- for (cp = line; *cp == ' ' || *cp == '\t'; cp++)
- ;
- if (!*cp || *cp == '\n' || *cp == '#')
- continue;
-
- /*
- * Check if there are options for this key, and if so,
- * save their starting address and skip the option part
- * for now. If there are no options, set the starting
- * address to NULL.
- */
- if (*cp < '0' || *cp > '9') {
- int quoted = 0;
- options = cp;
- for (; *cp && (quoted || (*cp != ' ' && *cp != '\t')); cp++) {
- if (*cp == '\\' && cp[1] == '"')
- cp++; /* Skip both */
- else if (*cp == '"')
- quoted = !quoted;
- }
- } else
- options = NULL;
-
- /* Parse the key from the line. */
- if (hostfile_read_key(&cp, &bits, key) == 0) {
- debug("%.100s, line %lu: non ssh1 key syntax",
- file, linenum);
- continue;
- }
- /* cp now points to the comment part. */
-
- /* Check if the we have found the desired key (identified by its modulus). */
- if (BN_cmp(key->rsa->n, client_n) != 0)
- continue;
-
- /* check the real bits */
- if (bits != BN_num_bits(key->rsa->n))
- log("Warning: %s, line %lu: keysize mismatch: "
- "actual %d vs. announced %d.",
- file, linenum, BN_num_bits(key->rsa->n), bits);
-
- /* We have found the desired key. */
- /*
- * If our options do not allow this key to be used,
- * do not send challenge.
- */
- if (!auth_parse_options(pw, options, file, linenum))
- continue;
-
- /* break out, this key is allowed */
- allowed = 1;
- break;
- }
-
- /* Restore the privileged uid. */
- restore_uid();
-
- /* Close the file. */
- xfree(file);
- fclose(f);
-
- /* return key if allowed */
- if (allowed && rkey != NULL)
- *rkey = key;
- else
- key_free(key);
- return (allowed);
-}
-
-/*
- * Performs the RSA authentication dialog with the client. This returns
- * 0 if the client could not be authenticated, and 1 if authentication was
- * successful. This may exit if there is a serious protocol violation.
- */
-int
-auth_rsa(struct passwd *pw, BIGNUM *client_n)
-{
- Key *key;
- char *fp;
-
- /* no user given */
- if (pw == NULL)
- return 0;
-
- if (!auth_rsa_key_allowed(pw, client_n, &key)) {
- auth_clear_options();
- return (0);
- }
-
- /* Perform the challenge-response dialog for this key. */
- if (!auth_rsa_challenge_dialog(key)) {
- /* Wrong response. */
- verbose("Wrong response to RSA authentication challenge.");
- packet_send_debug("Wrong response to RSA authentication challenge.");
- /*
- * Break out of the loop. Otherwise we might send
- * another challenge and break the protocol.
- */
- key_free(key);
- return (0);
- }
- /*
- * Correct response. The client has been successfully
- * authenticated. Note that we have not yet processed the
- * options; this will be reset if the options cause the
- * authentication to be rejected.
- */
- fp = key_fingerprint(key, SSH_FP_MD5, SSH_FP_HEX);
- verbose("Found matching %s key: %s",
- key_type(key), fp);
- xfree(fp);
- key_free(key);
-
- packet_send_debug("RSA authentication accepted.");
- return (1);
-}
diff --git a/usr/src/cmd/ssh/sshd/auth-sia.c b/usr/src/cmd/ssh/sshd/auth-sia.c
deleted file mode 100644
index 6afa02cf75..0000000000
--- a/usr/src/cmd/ssh/sshd/auth-sia.c
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (c) 2002 Chris Adams. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-
-#ifdef HAVE_OSF_SIA
-#include "ssh.h"
-#include "auth.h"
-#include "auth-sia.h"
-#include "log.h"
-#include "servconf.h"
-#include "canohost.h"
-
-#include <sia.h>
-#include <siad.h>
-#include <pwd.h>
-#include <signal.h>
-#include <setjmp.h>
-#include <sys/resource.h>
-#include <unistd.h>
-#include <string.h>
-
-extern ServerOptions options;
-extern int saved_argc;
-extern char **saved_argv;
-
-extern int errno;
-
-int
-auth_sia_password(Authctxt *authctxt, char *pass)
-{
- int ret;
- SIAENTITY *ent = NULL;
- const char *host;
- char *user = authctxt->user;
-
- host = get_canonical_hostname(options.verify_reverse_mapping);
-
- if (!user || !pass || pass[0] == '\0')
- return(0);
-
- if (sia_ses_init(&ent, saved_argc, saved_argv, host, user, NULL, 0,
- NULL) != SIASUCCESS)
- return(0);
-
- if ((ret = sia_ses_authent(NULL, pass, ent)) != SIASUCCESS) {
- error("Couldn't authenticate %s from %s", user, host);
- if (ret & SIASTOP)
- sia_ses_release(&ent);
- return(0);
- }
-
- sia_ses_release(&ent);
-
- return(1);
-}
-
-void
-session_setup_sia(char *user, char *tty)
-{
- struct passwd *pw;
- SIAENTITY *ent = NULL;
- const char *host;
-
- host = get_canonical_hostname (options.verify_reverse_mapping);
-
- if (sia_ses_init(&ent, saved_argc, saved_argv, host, user, tty, 0,
- NULL) != SIASUCCESS) {
- fatal("sia_ses_init failed");
- }
-
- if ((pw = getpwnam(user)) == NULL) {
- sia_ses_release(&ent);
- fatal("getpwnam: no user: %s", user);
- }
- if (sia_make_entity_pwd(pw, ent) != SIASUCCESS) {
- sia_ses_release(&ent);
- fatal("sia_make_entity_pwd failed");
- }
-
- ent->authtype = SIA_A_NONE;
- if (sia_ses_estab(sia_collect_trm, ent) != SIASUCCESS) {
- fatal("Couldn't establish session for %s from %s", user,
- host);
- }
-
- if (setpriority(PRIO_PROCESS, 0, 0) == -1) {
- sia_ses_release(&ent);
- fatal("setpriority: %s", strerror (errno));
- }
-
- if (sia_ses_launch(sia_collect_trm, ent) != SIASUCCESS) {
- fatal("Couldn't launch session for %s from %s", user, host);
- }
-
- sia_ses_release(&ent);
-
- if (setreuid(geteuid(), geteuid()) < 0) {
- fatal("setreuid: %s", strerror(errno));
- }
-}
-
-#endif /* HAVE_OSF_SIA */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
diff --git a/usr/src/cmd/ssh/sshd/auth-skey.c b/usr/src/cmd/ssh/sshd/auth-skey.c
deleted file mode 100644
index 436f66aed8..0000000000
--- a/usr/src/cmd/ssh/sshd/auth-skey.c
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2001 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#include "includes.h"
-RCSID("$OpenBSD: auth-skey.c,v 1.20 2002/06/30 21:59:45 deraadt Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef SKEY
-
-#include <skey.h>
-
-#include "xmalloc.h"
-#include "auth.h"
-
-static void *
-skey_init_ctx(Authctxt *authctxt)
-{
- return authctxt;
-}
-
-int
-skey_query(void *ctx, char **name, char **infotxt,
- u_int* numprompts, char ***prompts, u_int **echo_on)
-{
- Authctxt *authctxt = ctx;
- char challenge[1024], *p;
- int len;
- struct skey skey;
-
- if (skeychallenge(&skey, authctxt->user, challenge) == -1)
- return -1;
-
- *name = xstrdup("");
- *infotxt = xstrdup("");
- *numprompts = 1;
- *prompts = xmalloc(*numprompts * sizeof(char *));
- *echo_on = xmalloc(*numprompts * sizeof(u_int));
- (*echo_on)[0] = 0;
-
- len = strlen(challenge) + strlen(SKEY_PROMPT) + 1;
- p = xmalloc(len);
- strlcpy(p, challenge, len);
- strlcat(p, SKEY_PROMPT, len);
- (*prompts)[0] = p;
-
- return 0;
-}
-
-int
-skey_respond(void *ctx, u_int numresponses, char **responses)
-{
- Authctxt *authctxt = ctx;
-
- if (authctxt->valid &&
- numresponses == 1 &&
- skey_haskey(authctxt->pw->pw_name) == 0 &&
- skey_passcheck(authctxt->pw->pw_name, responses[0]) != -1)
- return 0;
- return -1;
-}
-
-static void
-skey_free_ctx(void *ctx)
-{
- /* we don't have a special context */
-}
-
-KbdintDevice skey_device = {
- "skey",
- skey_init_ctx,
- skey_query,
- skey_respond,
- skey_free_ctx
-};
-
-KbdintDevice mm_skey_device = {
- "skey",
- skey_init_ctx,
- mm_skey_query,
- mm_skey_respond,
- skey_free_ctx
-};
-#endif /* SKEY */
diff --git a/usr/src/cmd/ssh/sshd/auth.c b/usr/src/cmd/ssh/sshd/auth.c
deleted file mode 100644
index 64e6959ecf..0000000000
--- a/usr/src/cmd/ssh/sshd/auth.c
+++ /dev/null
@@ -1,794 +0,0 @@
-/*
- * Copyright (c) 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: auth.c,v 1.45 2002/09/20 18:41:29 stevesk Exp $");
-
-#ifdef HAVE_LOGIN_H
-#include <login.h>
-#endif
-#if defined(HAVE_SHADOW_H) && !defined(DISABLE_SHADOW)
-#include <shadow.h>
-#endif /* defined(HAVE_SHADOW_H) && !defined(DISABLE_SHADOW) */
-
-#ifdef HAVE_LIBGEN_H
-#include <libgen.h>
-#endif
-
-#include "xmalloc.h"
-#include "match.h"
-#include "groupaccess.h"
-#include "log.h"
-#include "buffer.h"
-#include "servconf.h"
-#include "auth.h"
-#include "auth-options.h"
-#include "canohost.h"
-#include "bufaux.h"
-#include "uidswap.h"
-#include "tildexpand.h"
-#include "misc.h"
-#include "bufaux.h"
-#include "packet.h"
-#include "channels.h"
-#include "session.h"
-
-#ifdef HAVE_BSM
-#include "bsmaudit.h"
-#include <bsm/adt.h>
-#endif /* HAVE_BSM */
-
-/* import */
-extern ServerOptions options;
-
-/* Debugging messages */
-Buffer auth_debug;
-int auth_debug_init;
-
-/*
- * Check if the user is allowed to log in via ssh. If user is listed
- * in DenyUsers or one of user's groups is listed in DenyGroups, false
- * will be returned. If AllowUsers isn't empty and user isn't listed
- * there, or if AllowGroups isn't empty and one of user's groups isn't
- * listed there, false will be returned.
- * If the user's shell is not executable, false will be returned.
- * Otherwise true is returned.
- */
-int
-allowed_user(struct passwd * pw)
-{
- struct stat st;
- const char *hostname = NULL, *ipaddr = NULL;
- char *shell;
- int i;
-#ifdef WITH_AIXAUTHENTICATE
- char *loginmsg;
-#endif /* WITH_AIXAUTHENTICATE */
-#if !defined(USE_PAM) && defined(HAVE_SHADOW_H) && \
- !defined(DISABLE_SHADOW) && defined(HAS_SHADOW_EXPIRE)
- struct spwd *spw;
-
- /* Shouldn't be called if pw is NULL, but better safe than sorry... */
- if (!pw || !pw->pw_name)
- return 0;
-
-#define DAY (24L * 60 * 60) /* 1 day in seconds */
- spw = getspnam(pw->pw_name);
- if (spw != NULL) {
- time_t today = time(NULL) / DAY;
- debug3("allowed_user: today %d sp_expire %d sp_lstchg %d"
- " sp_max %d", (int)today, (int)spw->sp_expire,
- (int)spw->sp_lstchg, (int)spw->sp_max);
-
- /*
- * We assume account and password expiration occurs the
- * day after the day specified.
- */
- if (spw->sp_expire != -1 && today > spw->sp_expire) {
- log("Account %.100s has expired", pw->pw_name);
- return 0;
- }
-
- if (spw->sp_lstchg == 0) {
- log("User %.100s password has expired (root forced)",
- pw->pw_name);
- return 0;
- }
-
- if (spw->sp_max != -1 &&
- today > spw->sp_lstchg + spw->sp_max) {
- log("User %.100s password has expired (password aged)",
- pw->pw_name);
- return 0;
- }
- }
-#else
- /* Shouldn't be called if pw is NULL, but better safe than sorry... */
- if (!pw || !pw->pw_name)
- return 0;
-#endif
-
- /*
- * Get the shell from the password data. An empty shell field is
- * legal, and means /bin/sh.
- */
- shell = (pw->pw_shell[0] == '\0') ? _PATH_BSHELL : pw->pw_shell;
-
- /* deny if shell does not exists or is not executable */
- if (stat(shell, &st) != 0) {
- log("User %.100s not allowed because shell %.100s does not exist",
- pw->pw_name, shell);
- return 0;
- }
- if (S_ISREG(st.st_mode) == 0 ||
- (st.st_mode & (S_IXOTH|S_IXUSR|S_IXGRP)) == 0) {
- log("User %.100s not allowed because shell %.100s is not executable",
- pw->pw_name, shell);
- return 0;
- }
-
- if (options.num_deny_users > 0 || options.num_allow_users > 0) {
- hostname = get_canonical_hostname(options.verify_reverse_mapping);
- ipaddr = get_remote_ipaddr();
- }
-
- /* Return false if user is listed in DenyUsers */
- if (options.num_deny_users > 0) {
- for (i = 0; i < options.num_deny_users; i++)
- if (match_user(pw->pw_name, hostname, ipaddr,
- options.deny_users[i])) {
- log("User %.100s not allowed because listed in DenyUsers",
- pw->pw_name);
- return 0;
- }
- }
- /* Return false if AllowUsers isn't empty and user isn't listed there */
- if (options.num_allow_users > 0) {
- for (i = 0; i < options.num_allow_users; i++)
- if (match_user(pw->pw_name, hostname, ipaddr,
- options.allow_users[i]))
- break;
- /* i < options.num_allow_users iff we break for loop */
- if (i >= options.num_allow_users) {
- log("User %.100s not allowed because not listed in AllowUsers",
- pw->pw_name);
- return 0;
- }
- }
- if (options.num_deny_groups > 0 || options.num_allow_groups > 0) {
- /* Get the user's group access list (primary and supplementary) */
- if (ga_init(pw->pw_name, pw->pw_gid) == 0) {
- log("User %.100s not allowed because not in any group",
- pw->pw_name);
- return 0;
- }
-
- /* Return false if one of user's groups is listed in DenyGroups */
- if (options.num_deny_groups > 0)
- if (ga_match(options.deny_groups,
- options.num_deny_groups)) {
- ga_free();
- log("User %.100s not allowed because a group is listed in DenyGroups",
- pw->pw_name);
- return 0;
- }
- /*
- * Return false if AllowGroups isn't empty and one of user's groups
- * isn't listed there
- */
- if (options.num_allow_groups > 0)
- if (!ga_match(options.allow_groups,
- options.num_allow_groups)) {
- ga_free();
- log("User %.100s not allowed because none of user's groups are listed in AllowGroups",
- pw->pw_name);
- return 0;
- }
- ga_free();
- }
-
-#ifdef WITH_AIXAUTHENTICATE
- if (loginrestrictions(pw->pw_name, S_RLOGIN, NULL, &loginmsg) != 0) {
- if (loginmsg && *loginmsg) {
- /* Remove embedded newlines (if any) */
- char *p;
- for (p = loginmsg; *p; p++) {
- if (*p == '\n')
- *p = ' ';
- }
- /* Remove trailing newline */
- *--p = '\0';
- log("Login restricted for %s: %.100s", pw->pw_name, loginmsg);
- }
- return 0;
- }
-#endif /* WITH_AIXAUTHENTICATE */
-
- /* We found no reason not to let this user try to log on... */
- return 1;
-}
-
-Authctxt *
-authctxt_new(void)
-{
- Authctxt *authctxt = xmalloc(sizeof(*authctxt));
- memset(authctxt, 0, sizeof(*authctxt));
- return authctxt;
-}
-
-void
-auth_log(Authctxt *authctxt, int authenticated, char *method, char *info)
-{
- void (*authlog) (const char *fmt,...) = verbose;
- char *authmsg, *user_str;
-
- if (authctxt == NULL)
- fatal("%s: INTERNAL ERROR", __func__);
-
- /* Raise logging level */
- if (authenticated == 1 || !authctxt->valid)
- authlog = log;
- else if (authctxt->failures >= AUTH_FAIL_LOG ||
- authctxt->attempt >= options.max_auth_tries_log ||
- authctxt->init_attempt >= options.max_init_auth_tries_log)
- authlog = notice;
-
- if (authctxt->method) {
- authmsg = "Failed";
- if (authctxt->method->postponed)
- authmsg = "Postponed"; /* shouldn't happen */
- if (authctxt->method->abandoned)
- authmsg = "Abandoned";
- if (authctxt->method->authenticated) {
- if (userauth_check_partial_failure(authctxt))
- authmsg = "Partially accepted";
- else
- authmsg = "Accepted";
- }
- else
- authmsg = "Failed";
- }
- else {
- authmsg = authenticated ? "Accepted" : "Failed";
- }
-
- if (authctxt->user == NULL || *authctxt->user == '\0')
- user_str = "<implicit>";
- else if (!authctxt->valid)
- user_str = "<invalid username>";
- else
- user_str = authctxt->user;
-
- authlog("%s %s for %s from %.200s port %d%s",
- authmsg,
- (method != NULL) ? method : "<unknown authentication method>",
- user_str,
- get_remote_ipaddr(),
- get_remote_port(),
- info);
-
-#ifdef WITH_AIXAUTHENTICATE
- if (authenticated == 0 && strcmp(method, "password") == 0)
- loginfailed(authctxt->user,
- get_canonical_hostname(options.verify_reverse_mapping),
- "ssh");
-#endif /* WITH_AIXAUTHENTICATE */
-
-}
-
-#ifdef HAVE_BSM
-void
-audit_failed_login_cleanup(void *ctxt)
-{
- Authctxt *authctxt = (Authctxt *)ctxt;
- adt_session_data_t *ah;
-
- /*
- * This table lists the different variable combinations evaluated and
- * what the resulting PAM return value is. As the table shows
- * authctxt and authctxt->valid need to be checked before either of
- * the authctxt->pam* variables.
- *
- * authctxt-> authctxt->
- * authctxt valid authctxt->pam pam_retval PAM rval
- * -------- ---------- ------------- ------------ --------
- * NULL ANY ANY ANY PAM_ABORT
- * OK zero (0) ANY ANY PAM_USER_UNKNOWN
- * OK one (1) NULL PAM_SUCCESS PAM_PERM_DENIED
- * OK one (1) NULL !PAM_SUCCESS authctxt->
- * pam_retval
- * OK one (1) VALID ANY authctxt->
- * pam_retval (+)
- * (+) If not set then default to PAM_PERM_DENIED
- */
-
- if (authctxt == NULL) {
- /* Internal error */
- audit_sshd_login_failure(&ah, PAM_ABORT, NULL);
- return;
- }
-
- if (authctxt->valid == 0) {
- audit_sshd_login_failure(&ah, PAM_USER_UNKNOWN, NULL);
- } else if (authctxt->pam == NULL) {
- if (authctxt->pam_retval == PAM_SUCCESS) {
- audit_sshd_login_failure(&ah, PAM_PERM_DENIED,
- authctxt->user);
- } else {
- audit_sshd_login_failure(&ah, authctxt->pam_retval,
- authctxt->user);
- }
- } else {
- audit_sshd_login_failure(&ah, AUTHPAM_ERROR(authctxt,
- PAM_PERM_DENIED), authctxt->user);
- }
-}
-#endif /* HAVE_BSM */
-
-/*
- * Check whether root logins are disallowed.
- */
-int
-auth_root_allowed(char *method)
-{
- switch (options.permit_root_login) {
- case PERMIT_YES:
- return 1;
- break;
- case PERMIT_NO_PASSWD:
- if (strcmp(method, "password") != 0 &&
- strcmp(method, "keyboard-interactive") != 0)
- return 1;
- break;
- case PERMIT_FORCED_ONLY:
- if (forced_command) {
- log("Root login accepted for forced command.");
- return 1;
- }
- break;
- }
- log("ROOT LOGIN REFUSED FROM %.200s", get_remote_ipaddr());
- return 0;
-}
-
-
-/*
- * Given a template and a passwd structure, build a filename
- * by substituting % tokenised options. Currently, %% becomes '%',
- * %h becomes the home directory and %u the username.
- *
- * This returns a buffer allocated by xmalloc.
- */
-char *
-expand_filename(const char *filename, struct passwd *pw)
-{
- Buffer buffer;
- char *file;
- const char *cp;
-
- if (pw == 0)
- return NULL; /* shouldn't happen */
- /*
- * Build the filename string in the buffer by making the appropriate
- * substitutions to the given file name.
- */
- buffer_init(&buffer);
- for (cp = filename; *cp; cp++) {
- if (cp[0] == '%' && cp[1] == '%') {
- buffer_append(&buffer, "%", 1);
- cp++;
- continue;
- }
- if (cp[0] == '%' && cp[1] == 'h') {
- buffer_append(&buffer, pw->pw_dir, strlen(pw->pw_dir));
- cp++;
- continue;
- }
- if (cp[0] == '%' && cp[1] == 'u') {
- buffer_append(&buffer, pw->pw_name,
- strlen(pw->pw_name));
- cp++;
- continue;
- }
- buffer_append(&buffer, cp, 1);
- }
- buffer_append(&buffer, "\0", 1);
-
- /*
- * Ensure that filename starts anchored. If not, be backward
- * compatible and prepend the '%h/'
- */
- file = xmalloc(MAXPATHLEN);
- cp = buffer_ptr(&buffer);
- if (*cp != '/')
- snprintf(file, MAXPATHLEN, "%s/%s", pw->pw_dir, cp);
- else
- strlcpy(file, cp, MAXPATHLEN);
-
- buffer_free(&buffer);
- return file;
-}
-
-char *
-authorized_keys_file(struct passwd *pw)
-{
- return expand_filename(options.authorized_keys_file, pw);
-}
-
-char *
-authorized_keys_file2(struct passwd *pw)
-{
- return expand_filename(options.authorized_keys_file2, pw);
-}
-
-/* return ok if key exists in sysfile or userfile */
-HostStatus
-check_key_in_hostfiles(struct passwd *pw, Key *key, const char *host,
- const char *sysfile, const char *userfile)
-{
- Key *found;
- char *user_hostfile;
- struct stat st;
- HostStatus host_status;
-
- /* Check if we know the host and its host key. */
- found = key_new(key->type);
- host_status = check_host_in_hostfile(sysfile, host, key, found, NULL);
-
- if (host_status != HOST_OK && userfile != NULL) {
- user_hostfile = tilde_expand_filename(userfile, pw->pw_uid);
- if (options.strict_modes &&
- (stat(user_hostfile, &st) == 0) &&
- ((st.st_uid != 0 && st.st_uid != pw->pw_uid) ||
- (st.st_mode & 022) != 0)) {
- log("Authentication refused for %.100s: "
- "bad owner or modes for %.200s",
- pw->pw_name, user_hostfile);
- } else {
- temporarily_use_uid(pw);
- host_status = check_host_in_hostfile(user_hostfile,
- host, key, found, NULL);
- restore_uid();
- }
- xfree(user_hostfile);
- }
- key_free(found);
-
- debug2("check_key_in_hostfiles: key %s for %s", host_status == HOST_OK ?
- "ok" : "not found", host);
- return host_status;
-}
-
-
-/*
- * Check a given file for security. This is defined as all components
- * of the path to the file must be owned by either the owner of
- * of the file or root and no directories must be group or world writable.
- *
- * XXX Should any specific check be done for sym links ?
- *
- * Takes an open file descriptor, the file name, a uid and and
- * error buffer plus max size as arguments.
- *
- * Returns 0 on success and -1 on failure
- */
-int
-secure_filename(FILE *f, const char *file, struct passwd *pw,
- char *err, size_t errlen)
-{
- uid_t uid;
- char buf[MAXPATHLEN], homedir[MAXPATHLEN];
- char *cp;
- int comparehome = 0;
- struct stat st;
-
- if (pw == NULL)
- return 0;
-
- uid = pw->pw_uid;
-
- if (realpath(file, buf) == NULL) {
- snprintf(err, errlen, "realpath %s failed: %s", file,
- strerror(errno));
- return -1;
- }
-
- /*
- * A user is not required to have all the files that are subject to
- * the strict mode checking in his/her home directory. If the
- * directory is not present at the moment, which might be the case if
- * the directory is not mounted until the user is authenticated, do
- * not perform the home directory check below.
- */
- if (realpath(pw->pw_dir, homedir) != NULL)
- comparehome = 1;
-
- /* check the open file to avoid races */
- if (fstat(fileno(f), &st) < 0 ||
- (st.st_uid != 0 && st.st_uid != uid) ||
- (st.st_mode & 022) != 0) {
- snprintf(err, errlen, "bad ownership or modes for file %s",
- buf);
- return -1;
- }
-
- /* for each component of the canonical path, walking upwards */
- for (;;) {
- if ((cp = dirname(buf)) == NULL) {
- snprintf(err, errlen, "dirname() failed");
- return -1;
- }
- strlcpy(buf, cp, sizeof(buf));
-
- debug3("secure_filename: checking '%s'", buf);
- if (stat(buf, &st) < 0 ||
- (st.st_uid != 0 && st.st_uid != uid) ||
- (st.st_mode & 022) != 0) {
- snprintf(err, errlen,
- "bad ownership or modes for directory %s", buf);
- return -1;
- }
-
- /* If we passed the homedir then we can stop. */
- if (comparehome && strcmp(homedir, buf) == 0) {
- debug3("secure_filename: terminating check at '%s'",
- buf);
- break;
- }
- /*
- * dirname should always complete with a "/" path,
- * but we can be paranoid and check for "." too
- */
- if ((strcmp("/", buf) == 0) || (strcmp(".", buf) == 0))
- break;
- }
- return 0;
-}
-
-struct passwd *
-getpwnamallow(const char *user)
-{
-#ifdef HAVE_LOGIN_CAP
- extern login_cap_t *lc;
-#ifdef BSD_AUTH
- auth_session_t *as;
-#endif
-#endif
- struct passwd *pw;
-
- if (user == NULL || *user == '\0')
- return (NULL); /* implicit user, will be set later */
-
- parse_server_match_config(&options, user,
- get_canonical_hostname(options.verify_reverse_mapping), get_remote_ipaddr());
-
- pw = getpwnam(user);
- if (pw == NULL) {
- log("Illegal user %.100s from %.100s",
- user, get_remote_ipaddr());
- return (NULL);
- }
- if (!allowed_user(pw))
- return (NULL);
-#ifdef HAVE_LOGIN_CAP
- if ((lc = login_getclass(pw->pw_class)) == NULL) {
- debug("unable to get login class: %s", user);
- return (NULL);
- }
-#ifdef BSD_AUTH
- if ((as = auth_open()) == NULL || auth_setpwd(as, pw) != 0 ||
- auth_approval(as, lc, pw->pw_name, "ssh") <= 0) {
- debug("Approval failure for %s", user);
- pw = NULL;
- }
- if (as != NULL)
- auth_close(as);
-#endif
-#endif
- if (pw != NULL)
- return (pwcopy(pw));
- return (NULL);
-}
-
-
-/*
- * The fatal_cleanup method to kill the hook. Since hook has been put into
- * new process group all descendants will be killed as well.
- */
-static void
-kill_hook(void *arg)
-{
- pid_t pid;
-
- pid = *(pid_t*)arg;
- debug("killing hook and all it's children, process group: %ld", pid);
- xfree(arg);
- (void)killpg(pid, SIGTERM);
-}
-
-/*
- * Runs the PreUserauthHook.
- * Returns -1 on execution error or the exit code of the hook if execution is
- * successful.
- */
-int
-run_auth_hook(const char *path, const char *user, const char *method)
-{
- struct stat st;
- int i, status, ret = 1;
- u_int envsize, argsize;
- char buf[256];
- char **env, **args;
- pid_t pid, *ppid;
-
- if (path == NULL || user == NULL || method == NULL) {
- return (-1);
- }
-
- /* Initialize the environment/arguments for the hook. */
- envsize = 4; /* 3 env vars + EndOfList marker */
- argsize = 4; /* 2 args + exe name + EndOfList marker */
- env = xmalloc(envsize * sizeof (char *));
- args = xmalloc(argsize * sizeof (char *));
- env[0] = NULL;
-
- /* we use the SSH env handling scheme */
- child_set_env_silent(&env, &envsize, "PATH", "/usr/bin:/bin");
- child_set_env_silent(&env, &envsize, "IFS", " \t\n");
-
- (void) snprintf(buf, sizeof (buf), "%.50s %d %.50s %d",
- get_remote_ipaddr(), get_remote_port(),
- get_local_ipaddr(packet_get_connection_in()), get_local_port());
- child_set_env_silent(&env, &envsize, "SSH_CONNECTION", buf);
-
- args[0] = xstrdup(path);
- args[1] = xstrdup(method);
- args[2] = xstrdup(user);
- args[3] = NULL;
-
- /*
- * sanity checks
- * note: the checks do not make sure that the file checked is actually
- * the same which is executed. However, in this case it shouldn't be a
- * major issue since the hook is rather static and the worst case would
- * be an uncorrect message in the log or a hook is run even though the
- * permissions are not right.
- */
-
- /* check if script does exist */
- if (stat(path, &st) < 0) {
- log("Error executing PreUserauthHook \"%s\": %s", path,
- strerror(errno));
- goto cleanup;
- }
-
- /* Check correct permissions for script (uid of SSHD, mode 500) */
- if (st.st_uid != getuid() || ((st.st_mode & 0777) != 0500)) {
- log("PreUserauthHook has invalid permissions (should be 500, is"
- " %o) or ownership (should be %d, is %d)",
- (uint) st.st_mode & 0777, getuid(), st.st_uid);
- goto cleanup;
- }
-
- if ((pid = fork()) == 0) {
- /*
- * We put the hook and all its (possible) descendants into
- * a new process group so that in case of a hanging hook
- * we can wipe out the whole "family".
- */
- if (setpgid(0, 0) != 0) {
- log("setpgid: %s", strerror(errno));
- _exit(255);
- }
- (void) execve(path, args, env);
- /* child is gone so we shouldn't get here */
- log("Error executing PreUserauthHook \"%s\": %s", path,
- strerror(errno));
- _exit(255);
- } else if (pid == -1) {
- log("Error executing PreUserauthHook \"%s\": %s", path,
- strerror(errno));
- goto cleanup;
- }
-
- /* make preparations to kill hook if it is hanging */
- ppid = xmalloc(sizeof (pid_t));
- *ppid = pid;
- fatal_add_cleanup((void (*)(void *))kill_hook, (void *) ppid);
-
- if (waitpid(pid, &status, 0) == -1) {
- log("Error executing PreUserauthHook \"%s\": %s", path,
- strerror(errno));
- goto cleanup;
- }
-
- ret = WEXITSTATUS(status);
-
- if (ret == 255) {
- ret = -1; /* execve() failed, error msg already logged */
- } else if (ret != 0) {
- log("PreUserauthHook \"%s\" failed with exit code %d",
- path, ret);
- } else {
- debug("PreUserauthHook \"%s\" finished successfully", path);
- }
-
-cleanup:
- for (i = 0; args[i] != NULL; i++) {
- xfree(args[i]);
- }
- for (i = 0; env[i] != NULL; i++) {
- xfree(env[i]);
- }
- xfree(args);
- xfree(env);
-
- fatal_remove_cleanup((void (*)(void *))kill_hook, (void *) ppid);
-
- return (ret);
-}
-
-void
-auth_debug_add(const char *fmt,...)
-{
- char buf[1024];
- va_list args;
-
- if (!auth_debug_init)
- return;
-
- va_start(args, fmt);
- vsnprintf(buf, sizeof(buf), fmt, args);
- va_end(args);
- buffer_put_cstring(&auth_debug, buf);
-}
-
-void
-auth_debug_send(void)
-{
- char *msg;
-
- if (!auth_debug_init)
- return;
- while (buffer_len(&auth_debug)) {
- msg = buffer_get_string(&auth_debug, NULL);
- packet_send_debug("%s", msg);
- xfree(msg);
- }
-}
-
-void
-auth_debug_reset(void)
-{
- if (auth_debug_init)
- buffer_clear(&auth_debug);
- else {
- buffer_init(&auth_debug);
- auth_debug_init = 1;
- }
-}
diff --git a/usr/src/cmd/ssh/sshd/auth1.c b/usr/src/cmd/ssh/sshd/auth1.c
deleted file mode 100644
index e77a021393..0000000000
--- a/usr/src/cmd/ssh/sshd/auth1.c
+++ /dev/null
@@ -1,466 +0,0 @@
-/*
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: auth1.c,v 1.44 2002/09/26 11:38:43 markus Exp $");
-
-#include "xmalloc.h"
-#include "rsa.h"
-#include "ssh1.h"
-#include "packet.h"
-#include "buffer.h"
-#include "mpaux.h"
-#include "log.h"
-#include "servconf.h"
-#include "compat.h"
-#include "auth.h"
-#include "channels.h"
-#include "session.h"
-#include "uidswap.h"
-
-#ifdef HAVE_BSM
-#include "bsmaudit.h"
-extern adt_session_data_t *ah;
-#endif /* HAVE_BSM */
-
-/* import */
-extern ServerOptions options;
-
-/*
- * convert ssh auth msg type into description
- */
-static char *
-get_authname(int type)
-{
- static char buf[1024];
- switch (type) {
- case SSH_CMSG_AUTH_PASSWORD:
- return "password";
- case SSH_CMSG_AUTH_RSA:
- return "rsa";
- case SSH_CMSG_AUTH_RHOSTS_RSA:
- return "rhosts-rsa";
- case SSH_CMSG_AUTH_RHOSTS:
- return "rhosts";
- case SSH_CMSG_AUTH_TIS:
- case SSH_CMSG_AUTH_TIS_RESPONSE:
- return "challenge-response";
-#if defined(KRB4) || defined(KRB5)
- case SSH_CMSG_AUTH_KERBEROS:
- return "kerberos";
-#endif
- }
- snprintf(buf, sizeof buf, "bad-auth-msg-%d", type);
- return buf;
-}
-
-/*
- * read packets, try to authenticate the user and
- * return only if authentication is successful
- */
-static void
-do_authloop(Authctxt *authctxt)
-{
- int authenticated = 0;
- u_int bits;
- Key *client_host_key;
- BIGNUM *n;
- char *client_user, *password;
- char info[1024];
- u_int dlen;
- u_int ulen;
- int type = 0;
- struct passwd *pw = authctxt->pw;
-
- debug("Attempting authentication for %s%.100s.",
- authctxt->valid ? "" : "illegal user ", authctxt->user);
-
- /* If the user has no password, accept authentication immediately. */
- if (options.password_authentication &&
-#if defined(KRB4) || defined(KRB5)
- (!options.kerberos_authentication || options.kerberos_or_local_passwd) &&
-#endif
- auth_password(authctxt, "")) {
- auth_log(authctxt, 1, "without authentication", "");
- return;
- }
-
- /* Indicate that authentication is needed. */
- packet_start(SSH_SMSG_FAILURE);
- packet_send();
- packet_write_wait();
-
- client_user = NULL;
-
- for ( ;; ) {
- /* default to fail */
- authenticated = 0;
-
- info[0] = '\0';
-
- /* Get a packet from the client. */
- authctxt->v1_auth_type = type = packet_read();
- authctxt->v1_auth_name = get_authname(type);
-
- authctxt->attempt++;
-
- /* Process the packet. */
- switch (type) {
-
-#if defined(KRB4) || defined(KRB5)
- case SSH_CMSG_AUTH_KERBEROS:
- if (!options.kerberos_authentication) {
- verbose("Kerberos authentication disabled.");
- } else {
- char *kdata = packet_get_string(&dlen);
- packet_check_eom();
-
- if (kdata[0] == 4) { /* KRB_PROT_VERSION */
-#ifdef KRB4
- KTEXT_ST tkt, reply;
- tkt.length = dlen;
- if (tkt.length < MAX_KTXT_LEN)
- memcpy(tkt.dat, kdata, tkt.length);
-
- if (auth_krb4(authctxt, &tkt,
- &client_user, &reply)) {
- authenticated = 1;
- snprintf(info, sizeof(info),
- " tktuser %.100s",
- client_user);
-
- packet_start(
- SSH_SMSG_AUTH_KERBEROS_RESPONSE);
- packet_put_string((char *)
- reply.dat, reply.length);
- packet_send();
- packet_write_wait();
- }
-#endif /* KRB4 */
- } else {
-#ifdef KRB5
- krb5_data tkt, reply;
- tkt.length = dlen;
- tkt.data = kdata;
-
- if (auth_krb5(authctxt, &tkt,
- &client_user, &reply)) {
- authenticated = 1;
- snprintf(info, sizeof(info),
- " tktuser %.100s",
- client_user);
-
- /* Send response to client */
- packet_start(
- SSH_SMSG_AUTH_KERBEROS_RESPONSE);
- packet_put_string((char *)
- reply.data, reply.length);
- packet_send();
- packet_write_wait();
-
- if (reply.length)
- xfree(reply.data);
- }
-#endif /* KRB5 */
- }
- xfree(kdata);
- }
- break;
-#endif /* KRB4 || KRB5 */
-
-#if defined(AFS) || defined(KRB5)
- /* XXX - punt on backward compatibility here. */
- case SSH_CMSG_HAVE_KERBEROS_TGT:
- packet_send_debug("Kerberos TGT passing disabled before authentication.");
- break;
-#ifdef AFS
- case SSH_CMSG_HAVE_AFS_TOKEN:
- packet_send_debug("AFS token passing disabled before authentication.");
- break;
-#endif /* AFS */
-#endif /* AFS || KRB5 */
-
- case SSH_CMSG_AUTH_RHOSTS:
- if (!options.rhosts_authentication) {
- verbose("Rhosts authentication disabled.");
- break;
- }
- /*
- * Get client user name. Note that we just have to
- * trust the client; this is one reason why rhosts
- * authentication is insecure. (Another is
- * IP-spoofing on a local network.)
- */
- client_user = packet_get_string(&ulen);
- packet_check_eom();
-
- /* Try to authenticate using /etc/hosts.equiv and .rhosts. */
- authenticated = auth_rhosts(pw, client_user);
-
- snprintf(info, sizeof info, " ruser %.100s", client_user);
- break;
-
- case SSH_CMSG_AUTH_RHOSTS_RSA:
- if (!options.rhosts_rsa_authentication) {
- verbose("Rhosts with RSA authentication disabled.");
- break;
- }
- /*
- * Get client user name. Note that we just have to
- * trust the client; root on the client machine can
- * claim to be any user.
- */
- client_user = packet_get_string(&ulen);
-
- /* Get the client host key. */
- client_host_key = key_new(KEY_RSA1);
- bits = packet_get_int();
- packet_get_bignum(client_host_key->rsa->e);
- packet_get_bignum(client_host_key->rsa->n);
-
- if (bits != BN_num_bits(client_host_key->rsa->n))
- verbose("Warning: keysize mismatch for client_host_key: "
- "actual %d, announced %d",
- BN_num_bits(client_host_key->rsa->n), bits);
- packet_check_eom();
-
- authenticated = auth_rhosts_rsa(pw, client_user,
- client_host_key);
- key_free(client_host_key);
-
- snprintf(info, sizeof info, " ruser %.100s", client_user);
- break;
-
- case SSH_CMSG_AUTH_RSA:
- if (!options.rsa_authentication) {
- verbose("RSA authentication disabled.");
- break;
- }
- /* RSA authentication requested. */
- if ((n = BN_new()) == NULL)
- fatal("do_authloop: BN_new failed");
- packet_get_bignum(n);
- packet_check_eom();
- authenticated = auth_rsa(pw, n);
- BN_clear_free(n);
- break;
-
- case SSH_CMSG_AUTH_PASSWORD:
- authctxt->init_attempt++;
-
- if (!options.password_authentication) {
- verbose("Password authentication disabled.");
- break;
- }
- /*
- * Read user password. It is in plain text, but was
- * transmitted over the encrypted channel so it is
- * not visible to an outside observer.
- */
- password = packet_get_string(&dlen);
- packet_check_eom();
-
- /* Try authentication with the password. */
- if (authctxt->init_failures <
- options.max_init_auth_tries)
- authenticated =
- auth_password(authctxt, password);
-
- memset(password, 0, strlen(password));
- xfree(password);
- break;
-
- case SSH_CMSG_AUTH_TIS:
- debug("rcvd SSH_CMSG_AUTH_TIS");
- if (options.challenge_response_authentication == 1) {
- char *challenge = get_challenge(authctxt);
- if (challenge != NULL) {
- debug("sending challenge '%s'", challenge);
- packet_start(SSH_SMSG_AUTH_TIS_CHALLENGE);
- packet_put_cstring(challenge);
- xfree(challenge);
- packet_send();
- packet_write_wait();
- continue;
- }
- }
- break;
- case SSH_CMSG_AUTH_TIS_RESPONSE:
- debug("rcvd SSH_CMSG_AUTH_TIS_RESPONSE");
- if (options.challenge_response_authentication == 1) {
- char *response = packet_get_string(&dlen);
- debug("got response '%s'", response);
- packet_check_eom();
- authenticated = verify_response(authctxt, response);
- memset(response, 'r', dlen);
- xfree(response);
- }
- break;
-
- default:
- /*
- * Any unknown messages will be ignored (and failure
- * returned) during authentication.
- */
- log("Unknown message during authentication: type %d", type);
- break;
- }
-#ifdef BSD_AUTH
- if (authctxt->as) {
- auth_close(authctxt->as);
- authctxt->as = NULL;
- }
-#endif
- if (!authctxt->valid && authenticated) {
- authenticated = 0;
- log("Ignoring authenticated invalid user %s",
- authctxt->user);
- }
-
-#ifdef _UNICOS
- if (type == SSH_CMSG_AUTH_PASSWORD && !authenticated)
- cray_login_failure(authctxt->user, IA_UDBERR);
- if (authenticated && cray_access_denied(authctxt->user)) {
- authenticated = 0;
- fatal("Access denied for user %s.",authctxt->user);
- }
-#endif /* _UNICOS */
-
-#ifdef HAVE_CYGWIN
- if (authenticated &&
- !check_nt_auth(type == SSH_CMSG_AUTH_PASSWORD, pw)) {
- packet_disconnect("Authentication rejected for uid %d.",
- pw == NULL ? -1 : pw->pw_uid);
- authenticated = 0;
- }
-#else
- /* Special handling for root */
- if (authenticated && authctxt->pw->pw_uid == 0 &&
- !auth_root_allowed(get_authname(type)))
- authenticated = 0;
-#endif
-#ifdef USE_PAM
- if (authenticated && type != SSH_CMSG_AUTH_PASSWORD)
- authenticated = do_pam_non_initial_userauth(authctxt);
- else if (authenticated && !AUTHPAM_DONE(authctxt))
- authenticated = 0;
-
- if (!authenticated)
- authctxt->pam_retval = AUTHPAM_ERROR(authctxt,
- PAM_PERM_DENIED);
-#endif /* USE_PAM */
-
- /* Log before sending the reply */
- auth_log(authctxt, authenticated, get_authname(type), info);
-
- if (client_user != NULL) {
- xfree(client_user);
- client_user = NULL;
- }
-
- if (authenticated)
- return;
-
- if (type == SSH_CMSG_AUTH_PASSWORD)
- authctxt->init_failures++;
-
- if (authctxt->failures++ > options.max_auth_tries) {
-#ifdef HAVE_BSM
- fatal_remove_cleanup(audit_failed_login_cleanup,
- authctxt);
- audit_sshd_login_failure(&ah, PAM_MAXTRIES,
- authctxt->user);
-#endif /* HAVE_BSM */
- packet_disconnect(AUTH_FAIL_MSG, authctxt->user);
- }
-
- packet_start(SSH_SMSG_FAILURE);
- packet_send();
- packet_write_wait();
- }
-}
-
-/*
- * Performs authentication of an incoming connection. Session key has already
- * been exchanged and encryption is enabled.
- */
-Authctxt *
-do_authentication(void)
-{
- Authctxt *authctxt;
- u_int ulen;
- char *user, *style = NULL;
-
- /* Get the name of the user that we wish to log in as. */
- packet_read_expect(SSH_CMSG_USER);
-
- /* Get the user name. */
- user = packet_get_string(&ulen);
- packet_check_eom();
-
- if ((style = strchr(user, ':')) != NULL)
- *style++ = '\0';
-
-#ifdef KRB5
- /* XXX - SSH.com Kerberos v5 braindeath. */
- if ((datafellows & SSH_BUG_K5USER) &&
- options.kerberos_authentication) {
- char *p;
- if ((p = strchr(user, '@')) != NULL)
- *p = '\0';
- }
-#endif
-
- authctxt = authctxt_new();
- authctxt->user = user;
- authctxt->style = style;
-
-#ifdef HAVE_BSM
- fatal_add_cleanup(audit_failed_login_cleanup, authctxt);
-#endif /* HAVE_BSM */
-
- /* Verify that the user is a valid user. */
- if ((authctxt->pw = getpwnamallow(user)) != NULL) {
- authctxt->valid = 1;
- } else {
- authctxt->valid = 0;
- debug("do_authentication: illegal user %s", user);
- }
-
- setproctitle("%s", authctxt->pw ? user : "unknown");
-
- /*
- * If we are not running as root, the user must have the same uid as
- * the server. (Unless you are running Windows)
- */
-#ifndef HAVE_CYGWIN
- if (getuid() != 0 && authctxt->pw &&
- authctxt->pw->pw_uid != getuid())
- packet_disconnect("Cannot change user when server not running as root.");
-#endif
-
- /*
- * Loop until the user has been authenticated or the connection is
- * closed, do_authloop() returns only if authentication is successful
- */
- do_authloop(authctxt);
-
- /* The user has been authenticated and accepted. */
- packet_start(SSH_SMSG_SUCCESS);
- packet_send();
- packet_write_wait();
-
- return (authctxt);
-}
diff --git a/usr/src/cmd/ssh/sshd/auth2-chall.c b/usr/src/cmd/ssh/sshd/auth2-chall.c
deleted file mode 100644
index 72dcc6dc5b..0000000000
--- a/usr/src/cmd/ssh/sshd/auth2-chall.c
+++ /dev/null
@@ -1,358 +0,0 @@
-/*
- * Copyright (c) 2001 Markus Friedl. All rights reserved.
- * Copyright (c) 2001 Per Allansson. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: auth2-chall.c,v 1.20 2002/06/30 21:59:45 deraadt Exp $");
-
-#include "ssh2.h"
-#include "auth.h"
-#include "buffer.h"
-#include "packet.h"
-#include "xmalloc.h"
-#include "dispatch.h"
-#include "auth.h"
-#include "log.h"
-
-#ifndef lint
-static void auth2_challenge_start(Authctxt *);
-static int send_userauth_info_request(Authctxt *);
-static void input_userauth_info_response(int, u_int32_t, void *);
-
-#ifdef BSD_AUTH
-extern KbdintDevice bsdauth_device;
-#else
-#ifdef SKEY
-extern KbdintDevice skey_device;
-#endif
-#endif
-
-KbdintDevice *devices[] = {
-#ifdef BSD_AUTH
- &bsdauth_device,
-#else
-#ifdef SKEY
- &skey_device,
-#endif
-#endif
- NULL
-};
-
-typedef struct KbdintAuthctxt KbdintAuthctxt;
-struct KbdintAuthctxt
-{
- char *devices;
- void *ctxt;
- KbdintDevice *device;
- u_int nreq;
-};
-
-static KbdintAuthctxt *
-kbdint_alloc(const char *devs)
-{
- KbdintAuthctxt *kbdintctxt;
- Buffer b;
- int i;
-
- kbdintctxt = xmalloc(sizeof(KbdintAuthctxt));
- if (strcmp(devs, "") == 0) {
- buffer_init(&b);
- for (i = 0; devices[i]; i++) {
- if (buffer_len(&b) > 0)
- buffer_append(&b, ",", 1);
- buffer_append(&b, devices[i]->name,
- strlen(devices[i]->name));
- }
- buffer_append(&b, "\0", 1);
- kbdintctxt->devices = xstrdup(buffer_ptr(&b));
- buffer_free(&b);
- } else {
- kbdintctxt->devices = xstrdup(devs);
- }
- debug("kbdint_alloc: devices '%s'", kbdintctxt->devices);
- kbdintctxt->ctxt = NULL;
- kbdintctxt->device = NULL;
- kbdintctxt->nreq = 0;
-
- return kbdintctxt;
-}
-static void
-kbdint_reset_device(KbdintAuthctxt *kbdintctxt)
-{
- if (kbdintctxt->ctxt) {
- kbdintctxt->device->free_ctx(kbdintctxt->ctxt);
- kbdintctxt->ctxt = NULL;
- }
- kbdintctxt->device = NULL;
-}
-static void
-kbdint_free(KbdintAuthctxt *kbdintctxt)
-{
- if (kbdintctxt->device)
- kbdint_reset_device(kbdintctxt);
- if (kbdintctxt->devices) {
- xfree(kbdintctxt->devices);
- kbdintctxt->devices = NULL;
- }
- xfree(kbdintctxt);
-}
-/* get next device */
-static int
-kbdint_next_device(KbdintAuthctxt *kbdintctxt)
-{
- size_t len;
- char *t;
- int i;
-
- if (kbdintctxt->device)
- kbdint_reset_device(kbdintctxt);
- do {
- len = kbdintctxt->devices ?
- strcspn(kbdintctxt->devices, ",") : 0;
-
- if (len == 0)
- break;
- for (i = 0; devices[i]; i++)
- if (strncmp(kbdintctxt->devices, devices[i]->name, len) == 0)
- kbdintctxt->device = devices[i];
- t = kbdintctxt->devices;
- kbdintctxt->devices = t[len] ? xstrdup(t+len+1) : NULL;
- xfree(t);
- debug2("kbdint_next_device: devices %s", kbdintctxt->devices ?
- kbdintctxt->devices : "<empty>");
- } while (kbdintctxt->devices && !kbdintctxt->device);
-
- return kbdintctxt->device ? 1 : 0;
-}
-
-/*
- * try challenge-response, set authctxt->method->postponed if we have to
- * wait for the response.
- */
-void
-auth2_challenge(Authctxt *authctxt, char *devs)
-{
- debug("auth2_challenge: user=%s devs=%s",
- authctxt->user ? authctxt->user : "<nouser>",
- devs ? devs : "<no devs>");
-
- if (authctxt->user == NULL || !devs)
- return;
- if (authctxt->method->method_data != NULL) {
- auth2_challenge_abandon(authctxt);
- authctxt->method->abandoned = 0;
- }
- authctxt->method->method_data = (void *) kbdint_alloc(devs);
- auth2_challenge_start(authctxt);
-}
-
-/* unregister kbd-int callbacks and context */
-static void
-auth2_challenge_stop(Authctxt *authctxt)
-{
- /* unregister callback */
- dispatch_set(SSH2_MSG_USERAUTH_INFO_RESPONSE, NULL);
- if (authctxt->method->method_data != NULL) {
- kbdint_free((KbdintAuthctxt *) authctxt->method->method_data);
- authctxt->method->method_data = NULL;
- }
-}
-
-void
-auth2_challenge_abandon(Authctxt *authctxt)
-{
- auth2_challenge_stop(authctxt);
- authctxt->method->abandoned = 1;
- authctxt->method->postponed = 0;
- authctxt->method->authenticated = 0;
- authctxt->method->abandons++;
- authctxt->method->attempts++;
-}
-
-/* side effect: sets authctxt->method->postponed if a reply was sent*/
-static void
-auth2_challenge_start(Authctxt *authctxt)
-{
- KbdintAuthctxt *kbdintctxt = (KbdintAuthctxt *)
- authctxt->method->method_data;
-
- debug2("auth2_challenge_start: devices %s",
- kbdintctxt->devices ? kbdintctxt->devices : "<empty>");
-
- if (kbdint_next_device(kbdintctxt) == 0) {
- auth2_challenge_stop(authctxt);
- return;
- }
- debug("auth2_challenge_start: trying authentication method '%s'",
- kbdintctxt->device->name);
-
- if ((kbdintctxt->ctxt = kbdintctxt->device->init_ctx(authctxt)) == NULL) {
- auth2_challenge_stop(authctxt);
- return;
- }
- if (send_userauth_info_request(authctxt) == 0) {
- auth2_challenge_stop(authctxt);
- return;
- }
- dispatch_set(SSH2_MSG_USERAUTH_INFO_RESPONSE,
- &input_userauth_info_response);
-
- authctxt->method->postponed = 1;
-}
-
-static int
-send_userauth_info_request(Authctxt *authctxt)
-{
- KbdintAuthctxt *kbdintctxt;
- char *name, *instr, **prompts;
- int i;
- u_int *echo_on;
-
- kbdintctxt = (KbdintAuthctxt *) authctxt->method->method_data;
- if (kbdintctxt->device->query(kbdintctxt->ctxt,
- &name, &instr, &kbdintctxt->nreq, &prompts, &echo_on))
- return 0;
-
- packet_start(SSH2_MSG_USERAUTH_INFO_REQUEST);
- packet_put_cstring(name);
- packet_put_utf8_cstring(instr);
- packet_put_cstring(""); /* language not used */
- packet_put_int(kbdintctxt->nreq);
- for (i = 0; i < kbdintctxt->nreq; i++) {
- packet_put_utf8_cstring(prompts[i]);
- packet_put_char(echo_on[i]);
- }
- packet_send();
- packet_write_wait();
-
- for (i = 0; i < kbdintctxt->nreq; i++)
- xfree(prompts[i]);
- xfree(prompts);
- xfree(echo_on);
- xfree(name);
- xfree(instr);
- return 1;
-}
-
-static void
-input_userauth_info_response(int type, u_int32_t seq, void *ctxt)
-{
- Authctxt *authctxt = ctxt;
- KbdintAuthctxt *kbdintctxt;
- int i, res, len;
- u_int nresp;
- char **response = NULL, *method;
-
- if (authctxt == NULL)
- fatal("input_userauth_info_response: no authctxt");
- kbdintctxt = (KbdintAuthctxt *) authctxt->method->method_data;
- if (kbdintctxt == NULL || kbdintctxt->ctxt == NULL)
- fatal("input_userauth_info_response: no kbdintctxt");
- if (kbdintctxt->device == NULL)
- fatal("input_userauth_info_response: no device");
-
- nresp = packet_get_int();
- if (nresp != kbdintctxt->nreq)
- fatal("input_userauth_info_response: wrong number of replies");
- if (nresp > 100)
- fatal("input_userauth_info_response: too many replies");
- if (nresp > 0) {
- response = xmalloc(nresp * sizeof(char *));
- for (i = 0; i < nresp; i++)
- response[i] = packet_get_string(NULL);
- }
- packet_check_eom();
-
- if (authctxt->valid) {
- res = kbdintctxt->device->respond(kbdintctxt->ctxt,
- nresp, response);
- } else {
- res = -1;
- }
-
- for (i = 0; i < nresp; i++) {
- memset(response[i], 'r', strlen(response[i]));
- xfree(response[i]);
- }
- if (response)
- xfree(response);
-
- authctxt->method->postponed = 0; /* reset */
- switch (res) {
- case 0:
- /* Success! */
- authctxt->method->authenticated = 1;
- break;
- case 1:
- /* Authentication needs further interaction */
- if (send_userauth_info_request(authctxt) == 1) {
- authctxt->method->postponed = 1;
- }
- break;
- default:
- /* Failure! */
- break;
- }
-
-
- len = strlen("keyboard-interactive") + 2 +
- strlen(kbdintctxt->device->name);
- method = xmalloc(len);
- snprintf(method, len, "keyboard-interactive/%s",
- kbdintctxt->device->name);
-
- if (authctxt->method->authenticated || authctxt->method->abandoned) {
- auth2_challenge_stop(authctxt);
- } else {
- /* start next device */
- /* may set authctxt->method->postponed */
- auth2_challenge_start(authctxt);
- }
- userauth_finish(authctxt, method);
- xfree(method);
-}
-
-void
-privsep_challenge_enable(void)
-{
-#ifdef BSD_AUTH
- extern KbdintDevice mm_bsdauth_device;
-#endif
-#ifdef SKEY
- extern KbdintDevice mm_skey_device;
-#endif
- /* As long as SSHv1 has devices[0] hard coded this is fine */
-#ifdef BSD_AUTH
- devices[0] = &mm_bsdauth_device;
-#else
-#ifdef SKEY
- devices[0] = &mm_skey_device;
-#endif
-#endif
-}
-#endif /* lint */
diff --git a/usr/src/cmd/ssh/sshd/auth2-gss.c b/usr/src/cmd/ssh/sshd/auth2-gss.c
deleted file mode 100644
index 8525707c1e..0000000000
--- a/usr/src/cmd/ssh/sshd/auth2-gss.c
+++ /dev/null
@@ -1,533 +0,0 @@
-/*
- * Copyright (c) 2001-2003 Simon Wilkinson. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-
-#ifdef GSSAPI
-#include "auth.h"
-#include "ssh2.h"
-#include "xmalloc.h"
-#include "log.h"
-#include "dispatch.h"
-#include "buffer.h"
-#include "servconf.h"
-#include "compat.h"
-#include "bufaux.h"
-#include "packet.h"
-
-#include <gssapi/gssapi.h>
-#include "ssh-gss.h"
-
-extern ServerOptions options;
-extern uchar_t *session_id2;
-extern int session_id2_len;
-extern Gssctxt *xxx_gssctxt;
-
-static void userauth_gssapi_finish(Authctxt *authctxt, Gssctxt *gssctxt);
-
-static void
-userauth_gssapi_keyex(Authctxt *authctxt)
-{
- gss_buffer_desc g_mic_data, mic_tok;
- Buffer mic_data;
- OM_uint32 maj_status, min_status;
-
- if (authctxt == NULL || authctxt->method == NULL)
- fatal("No authentication context during gssapi-keyex userauth");
-
- if (xxx_gssctxt == NULL || xxx_gssctxt->context == GSS_C_NO_CONTEXT) {
- /* fatal()? or return? */
- debug("No GSS-API context during gssapi-keyex userauth");
- return;
- }
-
- /* Make data buffer to verify MIC with */
- buffer_init(&mic_data);
- buffer_put_string(&mic_data, session_id2, session_id2_len);
- buffer_put_char(&mic_data, SSH2_MSG_USERAUTH_REQUEST);
- buffer_put_cstring(&mic_data, authctxt->user);
- buffer_put_cstring(&mic_data, authctxt->service);
- buffer_put_cstring(&mic_data, authctxt->method->name);
-
- g_mic_data.value = buffer_ptr(&mic_data);
- g_mic_data.length = buffer_len(&mic_data);
-
- mic_tok.value = packet_get_string(&mic_tok.length);
-
- maj_status = gss_verify_mic(&min_status, xxx_gssctxt->context,
- &g_mic_data, &mic_tok, NULL);
-
- packet_check_eom();
- buffer_clear(&mic_data);
-
- if (maj_status != GSS_S_COMPLETE)
- debug2("MIC verification failed, GSSAPI userauth failed");
- else
- userauth_gssapi_finish(authctxt, xxx_gssctxt);
-
- /* Leave Gssctxt around for ssh_gssapi_cleanup/storecreds() */
- if (xxx_gssctxt->deleg_creds == GSS_C_NO_CREDENTIAL)
- ssh_gssapi_delete_ctx(&xxx_gssctxt);
-}
-
-static void ssh_gssapi_userauth_error(Gssctxt *ctxt);
-static void input_gssapi_token(int type, u_int32_t plen, void *ctxt);
-static void input_gssapi_mic(int type, u_int32_t plen, void *ctxt);
-static void input_gssapi_errtok(int, u_int32_t, void *);
-static void input_gssapi_exchange_complete(int type, u_int32_t plen,
- void *ctxt);
-
-static void
-userauth_gssapi_abandon(Authctxt *authctxt, Authmethod *method)
-{
- ssh_gssapi_delete_ctx((Gssctxt **)&method->method_data);
- xxx_gssctxt = NULL;
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_MIC, NULL);
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_EXCHANGE_COMPLETE, NULL);
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_TOKEN, NULL);
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_ERRTOK, NULL);
-}
-
-static void
-userauth_gssapi(Authctxt *authctxt)
-{
- gss_OID_set supported_mechs;
- int mechs, present = 0;
- OM_uint32 min_status;
- uint_t len;
- char *doid = NULL;
- gss_OID oid = GSS_C_NULL_OID;
-
- if (datafellows & SSH_OLD_GSSAPI) {
- debug("Early drafts of GSSAPI userauth not supported");
- return;
- }
-
- mechs = packet_get_int();
- if (mechs == 0) {
- packet_check_eom();
- debug("Mechanism negotiation is not supported");
- return;
- }
-
- ssh_gssapi_server_mechs(&supported_mechs);
-
- do {
- mechs--;
-
- if (oid != GSS_C_NULL_OID)
- ssh_gssapi_release_oid(&oid);
-
- doid = packet_get_string(&len);
-
- /* ick */
- if (doid[0] != 0x06 || (len > 2 && doid[1] != len - 2)) {
- log("Mechanism OID received using the old "
- "encoding form");
- oid = ssh_gssapi_make_oid(len, doid);
- } else {
- oid = ssh_gssapi_make_oid(len - 2, doid + 2);
- }
-
- (void) gss_test_oid_set_member(&min_status, oid,
- supported_mechs, &present);
-
- debug("Client offered gssapi userauth with %s (%s)",
- ssh_gssapi_oid_to_str(oid),
- present ? "supported" : "unsupported");
- } while (!present && (mechs > 0));
-
- if (!present) {
- /* userauth_finish() will send SSH2_MSG_USERAUTH_FAILURE */
- debug2("No mechanism offered by the client is available");
- ssh_gssapi_release_oid(&oid);
- return;
- }
-
- ssh_gssapi_build_ctx((Gssctxt **)&authctxt->method->method_data,
- 0, oid);
- ssh_gssapi_release_oid(&oid);
- /* Send SSH_MSG_USERAUTH_GSSAPI_RESPONSE */
-
- packet_start(SSH2_MSG_USERAUTH_GSSAPI_RESPONSE);
-
- /* Just return whatever we found -- the matched mech does us no good */
- packet_put_string(doid, len);
- xfree(doid);
-
- packet_send();
- packet_write_wait();
-
- /* Setup rest of gssapi userauth conversation */
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_TOKEN, &input_gssapi_token);
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_ERRTOK, &input_gssapi_errtok);
- authctxt->method->postponed = 1;
-}
-
-static void
-input_gssapi_token(int type, u_int32_t plen, void *ctxt)
-{
- Authctxt *authctxt = ctxt;
- Gssctxt *gssctxt;
- gss_buffer_desc send_tok, recv_tok;
- OM_uint32 maj_status, min_status;
- uint_t len;
-
- if (authctxt == NULL || authctxt->method == NULL ||
- (authctxt->method->method_data == NULL)) {
- fatal("No authentication or GSSAPI context during "
- "gssapi-with-mic userauth");
- }
-
- gssctxt = authctxt->method->method_data;
- recv_tok.value = packet_get_string(&len);
- recv_tok.length = len; /* u_int vs. size_t */
-
- maj_status = ssh_gssapi_accept_ctx(gssctxt, &recv_tok, &send_tok);
- packet_check_eom();
-
- if (GSS_ERROR(maj_status)) {
- ssh_gssapi_userauth_error(gssctxt);
- if (send_tok.length != 0) {
- packet_start(SSH2_MSG_USERAUTH_GSSAPI_ERRTOK);
- packet_put_string(send_tok.value, send_tok.length);
- packet_send();
- packet_write_wait();
- }
- authctxt->method->postponed = 0;
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_TOKEN, NULL);
- userauth_finish(authctxt, authctxt->method->name);
- } else {
- if (send_tok.length != 0) {
- packet_start(SSH2_MSG_USERAUTH_GSSAPI_TOKEN);
- packet_put_string(send_tok.value, send_tok.length);
- packet_send();
- packet_write_wait();
- }
- if (maj_status == GSS_S_COMPLETE) {
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_TOKEN, NULL);
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_MIC,
- &input_gssapi_mic);
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_EXCHANGE_COMPLETE,
- &input_gssapi_exchange_complete);
- }
- }
-
- gss_release_buffer(&min_status, &send_tok);
-}
-
-static void
-input_gssapi_errtok(int type, u_int32_t plen, void *ctxt)
-{
- Authctxt *authctxt = ctxt;
- Gssctxt *gssctxt;
- gss_buffer_desc send_tok, recv_tok;
-
- if (authctxt == NULL || authctxt->method == NULL ||
- (authctxt->method->method_data == NULL)) {
- fatal("No authentication or GSSAPI context during "
- "gssapi-with-mic userauth");
- }
-
- gssctxt = authctxt->method->method_data;
- recv_tok.value = packet_get_string(&recv_tok.length);
- packet_check_eom();
-
- /* Push the error token into GSSAPI to see what it says */
- (void) ssh_gssapi_accept_ctx(gssctxt, &recv_tok, &send_tok);
-
- debug("Client sent GSS-API error token during GSS userauth-- %s",
- ssh_gssapi_last_error(gssctxt, NULL, NULL));
-
- /* We can't return anything to the client, even if we wanted to */
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_TOKEN, NULL);
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_ERRTOK, NULL);
-
-
- /*
- * The client will have already moved on to the next auth and
- * will send a new userauth request. The spec says that the
- * server MUST NOT send a SSH_MSG_USERAUTH_FAILURE packet in
- * response to this.
- *
- * We leave authctxt->method->postponed == 1 here so that a call
- * to input_userauth_request() will detect this failure (as
- * userauth abandonment) and act accordingly.
- */
-}
-
-static void
-input_gssapi_mic(int type, u_int32_t plen, void *ctxt)
-{
- Authctxt *authctxt = ctxt;
- Gssctxt *gssctxt;
- gss_buffer_desc g_mic_data, mic_tok;
- Buffer mic_data;
- OM_uint32 maj_status, min_status;
-
- if (authctxt == NULL || authctxt->method == NULL ||
- (authctxt->method->method_data == NULL)) {
- debug3("No authentication or GSSAPI context during "
- "gssapi-with-mic userauth");
- return;
- }
-
- gssctxt = authctxt->method->method_data;
-
- /* Make data buffer to verify MIC with */
- buffer_init(&mic_data);
- buffer_put_string(&mic_data, session_id2, session_id2_len);
- buffer_put_char(&mic_data, SSH2_MSG_USERAUTH_REQUEST);
- buffer_put_cstring(&mic_data, authctxt->user);
- buffer_put_cstring(&mic_data, authctxt->service);
- buffer_put_cstring(&mic_data, authctxt->method->name);
-
- g_mic_data.value = buffer_ptr(&mic_data);
- g_mic_data.length = buffer_len(&mic_data);
-
- mic_tok.value = packet_get_string(&mic_tok.length);
-
- maj_status = gss_verify_mic(&min_status, gssctxt->context,
- &g_mic_data, &mic_tok, NULL);
-
- packet_check_eom();
- buffer_free(&mic_data);
-
- if (maj_status != GSS_S_COMPLETE)
- debug2("MIC verification failed, GSSAPI userauth failed");
- else
- userauth_gssapi_finish(authctxt, gssctxt);
-
- /* Delete context from keyex */
- if (xxx_gssctxt != gssctxt)
- ssh_gssapi_delete_ctx(&xxx_gssctxt);
-
- /* Leave Gssctxt around for ssh_gssapi_cleanup/storecreds() */
- if (gssctxt->deleg_creds == GSS_C_NO_CREDENTIAL)
- ssh_gssapi_delete_ctx(&gssctxt);
-
- xxx_gssctxt = gssctxt;
-
- authctxt->method->postponed = 0;
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_TOKEN, NULL);
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_ERRTOK, NULL);
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_MIC, NULL);
- userauth_finish(authctxt, authctxt->method->name);
-}
-
-/*
- * This is called when the client thinks we've completed authentication.
- * It should only be enabled in the dispatch handler by the function above,
- * which only enables it once the GSSAPI exchange is complete.
- */
-static void
-input_gssapi_exchange_complete(int type, u_int32_t plen, void *ctxt)
-{
- Authctxt *authctxt = ctxt;
- Gssctxt *gssctxt;
-
- packet_check_eom();
-
- if (authctxt == NULL || authctxt->method == NULL ||
- (authctxt->method->method_data == NULL))
- fatal("No authentication or GSSAPI context");
-
- gssctxt = authctxt->method->method_data;
-
- /*
- * SSH2_MSG_USERAUTH_GSSAPI_EXCHANGE_COMPLETE -> gssapi userauth
- * failure, the client should use SSH2_MSG_USERAUTH_GSSAPI_MIC
- * instead.
- *
- * There's two reasons for this:
- *
- * 1) we don't have GSS mechs that don't support integrity
- * protection, and even if we did we'd not want to use them with
- * SSHv2, and,
- *
- * 2) we currently have no way to dynamically detect whether a
- * given mechanism does or does not support integrity
- * protection, so when a context's flags do not indicate
- * integrity protection we can't know if the client simply
- * didn't request it, so we assume it didn't and reject the
- * userauth.
- *
- * We could fail partially (i.e., force the use of other
- * userauth methods without counting this one as failed). But
- * this will do for now.
- */
-#if 0
- authctxt->method->authenticated = ssh_gssapi_userok(gssctxt,
- authctxt->user);
-#endif
-
- if (xxx_gssctxt != gssctxt)
- ssh_gssapi_delete_ctx(&gssctxt);
- ssh_gssapi_delete_ctx(&gssctxt);
- authctxt->method->postponed = 0;
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_TOKEN, NULL);
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_ERRTOK, NULL);
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_MIC, NULL);
- dispatch_set(SSH2_MSG_USERAUTH_GSSAPI_EXCHANGE_COMPLETE, NULL);
- userauth_finish(authctxt, authctxt->method->name);
-}
-
-static void
-ssh_gssapi_userauth_error(Gssctxt *ctxt)
-{
- char *errstr;
- OM_uint32 maj, min;
-
- errstr = ssh_gssapi_last_error(ctxt, &maj, &min);
- if (errstr) {
- packet_start(SSH2_MSG_USERAUTH_GSSAPI_ERROR);
- packet_put_int(maj);
- packet_put_int(min);
- packet_put_cstring(errstr);
- packet_put_cstring("");
- packet_send();
- packet_write_wait();
- xfree(errstr);
- }
-}
-
-/*
- * Code common to gssapi-keyex and gssapi-with-mic userauth.
- *
- * Does authorization, figures out how to store delegated creds.
- */
-static void
-userauth_gssapi_finish(Authctxt *authctxt, Gssctxt *gssctxt)
-{
- char *local_user = NULL;
- gss_buffer_desc dispname;
- OM_uint32 major;
-
- if (*authctxt->user != '\0' &&
- ssh_gssapi_userok(gssctxt, authctxt->user)) {
-
- /*
- * If the client princ did not map to the requested
- * username then we don't want to clobber existing creds
- * for the user with the delegated creds.
- */
- local_user = ssh_gssapi_localname(gssctxt);
- if (local_user == NULL ||
- strcmp(local_user, authctxt->user) == 0)
- gssctxt->default_creds = 1; /* store creds as default */
-
- authctxt->method->authenticated =
- do_pam_non_initial_userauth(authctxt);
-
- } else if (*authctxt->user == '\0') {
- /* Requested username == ""; derive username from princ name */
- if ((local_user = ssh_gssapi_localname(gssctxt)) == NULL)
- return;
-
- /* Changed username (from implicit, '') */
- userauth_user_svc_change(authctxt, local_user, NULL);
-
- gssctxt->default_creds = 1; /* store creds as default */
-
- authctxt->method->authenticated =
- do_pam_non_initial_userauth(authctxt);
- }
-
- if (local_user != NULL)
- xfree(local_user);
-
- if (*authctxt->user != '\0' && authctxt->method->authenticated != 0) {
- major = gss_display_name(&gssctxt->minor, gssctxt->src_name,
- &dispname, NULL);
- if (major == GSS_S_COMPLETE) {
- log("Authorized principal %.*s, authenticated with "
- "GSS mechanism %s, to: %s",
- dispname.length, (char *)dispname.value,
- ssh_gssapi_oid_to_name(gssctxt->actual_mech),
- authctxt->user);
- }
- (void) gss_release_buffer(&gssctxt->minor, &dispname);
- }
-}
-
-#if 0
-/* Deprecated userauths -- should not be enabled */
-Authmethod method_external = {
- "external-keyx",
- &options.gss_authentication,
- userauth_gssapi_keyex,
- NULL, /* no abandon function */
- NULL,
- NULL,
- /* State counters */
- 0, 0, 0, 0,
- /* State flags */
- 0, 0, 0, 0, 0, 0
-};
-
-Authmethod method_gssapi = {
- "gssapi",
- &options.gss_authentication,
- userauth_gssapi,
- userauth_gssapi_abandon,
- NULL,
- NULL,
- /* State counters */
- 0, 0, 0, 0,
- /* State flags */
- 0, 0, 0, 0, 0, 0
-};
-#endif
-
-Authmethod method_external = {
- "gssapi-keyex",
- &options.gss_authentication,
- userauth_gssapi_keyex,
- NULL, /* no abandon function */
- NULL,
- NULL,
- /* State counters */
- 0, 0, 0, 0,
- /* State flags */
- 0, 0, 0, 0, 0, 0
-};
-
-Authmethod method_gssapi = {
- "gssapi-with-mic",
- &options.gss_authentication,
- userauth_gssapi,
- userauth_gssapi_abandon,
- NULL,
- NULL,
- /* State counters */
- 0, 0, 0, 0,
- /* State flags */
- 0, 0, 0, 0, 0, 0
-};
-
-#endif /* GSSAPI */
diff --git a/usr/src/cmd/ssh/sshd/auth2-hostbased.c b/usr/src/cmd/ssh/sshd/auth2-hostbased.c
deleted file mode 100644
index c88e308100..0000000000
--- a/usr/src/cmd/ssh/sshd/auth2-hostbased.c
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * Copyright (c) 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: auth2-hostbased.c,v 1.2 2002/05/31 11:35:15 markus Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "ssh2.h"
-#include "xmalloc.h"
-#include "packet.h"
-#include "buffer.h"
-#include "log.h"
-#include "servconf.h"
-#include "compat.h"
-#include "bufaux.h"
-#include "auth.h"
-
-#ifdef USE_PAM
-#include "auth-pam.h"
-#endif /* USE_PAM */
-
-#include "key.h"
-#include "canohost.h"
-#include "pathnames.h"
-
-/* import */
-extern ServerOptions options;
-extern u_char *session_id2;
-extern int session_id2_len;
-
-static void
-userauth_hostbased(Authctxt *authctxt)
-{
- Buffer b;
- Key *key = NULL;
- char *pkalg, *cuser, *chost, *service;
- u_char *pkblob, *sig;
- u_int alen, blen, slen;
- int pktype;
- int authenticated = 0;
-
- if (!authctxt || !authctxt->method)
- fatal("%s: missing context", __func__);
-
- pkalg = packet_get_string(&alen);
- pkblob = packet_get_string(&blen);
- chost = packet_get_string(NULL);
- cuser = packet_get_string(NULL);
- sig = packet_get_string(&slen);
-
- debug("userauth_hostbased: cuser %s chost %s pkalg %s slen %d",
- cuser, chost, pkalg, slen);
-#ifdef DEBUG_PK
- debug("signature:");
- buffer_init(&b);
- buffer_append(&b, sig, slen);
- buffer_dump(&b);
- buffer_free(&b);
-#endif
- pktype = key_type_from_name(pkalg);
- if (pktype == KEY_UNSPEC) {
- /* this is perfectly legal */
- log("userauth_hostbased: unsupported "
- "public key algorithm: %s", pkalg);
- goto done;
- }
- key = key_from_blob(pkblob, blen);
- if (key == NULL) {
- error("userauth_hostbased: cannot decode key: %s", pkalg);
- goto done;
- }
- if (key->type != pktype) {
- error("userauth_hostbased: type mismatch for decoded key "
- "(received %d, expected %d)", key->type, pktype);
- goto done;
- }
- service = datafellows & SSH_BUG_HBSERVICE ? "ssh-userauth" :
- authctxt->service;
- buffer_init(&b);
- buffer_put_string(&b, session_id2, session_id2_len);
- /* reconstruct packet */
- buffer_put_char(&b, SSH2_MSG_USERAUTH_REQUEST);
- buffer_put_cstring(&b, authctxt->user);
- buffer_put_cstring(&b, service);
- buffer_put_cstring(&b, "hostbased");
- buffer_put_string(&b, pkalg, alen);
- buffer_put_string(&b, pkblob, blen);
- buffer_put_cstring(&b, chost);
- buffer_put_cstring(&b, cuser);
-#ifdef DEBUG_PK
- buffer_dump(&b);
-#endif
- /* test for allowed key and correct signature */
- authenticated = 0;
- if (hostbased_key_allowed(authctxt->pw, cuser, chost, key) &&
- key_verify(key, sig, slen, buffer_ptr(&b), buffer_len(&b)) == 1)
- authenticated = 1;
-
- buffer_clear(&b);
-done:
- /*
- * XXX TODO: Add config options for specifying users for whom
- * this userauth is insufficient and what userauths
- * may continue.
- *
- * NOTE: do_pam_non_initial_userauth() does this for
- * users with expired passwords.
- */
-#ifdef USE_PAM
- if (authenticated) {
- authctxt->cuser = cuser;
- if (!do_pam_non_initial_userauth(authctxt))
- authenticated = 0;
- /* Make sure nobody else will use this pointer since we are
- * going to free that string. */
- authctxt->cuser = NULL;
- }
-#endif /* USE_PAM */
-
- if (authenticated)
- authctxt->method->authenticated = 1;
-
- debug2("userauth_hostbased: authenticated %d", authenticated);
- if (key != NULL)
- key_free(key);
- xfree(pkalg);
- xfree(pkblob);
- xfree(cuser);
- xfree(chost);
- xfree(sig);
- return;
-}
-
-/* return 1 if given hostkey is allowed */
-int
-hostbased_key_allowed(struct passwd *pw, const char *cuser, char *chost,
- Key *key)
-{
- const char *resolvedname, *ipaddr, *lookup;
- HostStatus host_status;
- int len;
-
- resolvedname = get_canonical_hostname(options.verify_reverse_mapping);
- ipaddr = get_remote_ipaddr();
-
- debug2("userauth_hostbased: chost %s resolvedname %s ipaddr %s",
- chost, resolvedname, ipaddr);
-
- if (pw == NULL)
- return 0;
-
- if (options.hostbased_uses_name_from_packet_only) {
- if (auth_rhosts2(pw, cuser, chost, chost) == 0)
- return 0;
- lookup = chost;
- } else {
- if (((len = strlen(chost)) > 0) && chost[len - 1] == '.') {
- debug2("stripping trailing dot from chost %s", chost);
- chost[len - 1] = '\0';
- }
- if (strcasecmp(resolvedname, chost) != 0)
- log("userauth_hostbased mismatch: "
- "client sends %s, but we resolve %s to %s",
- chost, ipaddr, resolvedname);
- if (auth_rhosts2(pw, cuser, resolvedname, ipaddr) == 0)
- return 0;
- lookup = resolvedname;
- }
- debug2("userauth_hostbased: access allowed by auth_rhosts2");
-
- host_status = check_key_in_hostfiles(pw, key, lookup,
- _PATH_SSH_SYSTEM_HOSTFILE,
- options.ignore_user_known_hosts ? NULL : _PATH_SSH_USER_HOSTFILE);
-
- /* backward compat if no key has been found. */
- if (host_status == HOST_NEW)
- host_status = check_key_in_hostfiles(pw, key, lookup,
- _PATH_SSH_SYSTEM_HOSTFILE2,
- options.ignore_user_known_hosts ? NULL :
- _PATH_SSH_USER_HOSTFILE2);
-
- return (host_status == HOST_OK);
-}
-
-Authmethod method_hostbased = {
- "hostbased",
- &options.hostbased_authentication,
- userauth_hostbased,
- NULL, /* no abandon function */
- NULL, NULL, /* method data and hist data */
- 0, /* is not initial userauth */
- 0, 0, 0, /* counters */
- 0, 0, 0, 0, 0, 0 /* state */
-};
diff --git a/usr/src/cmd/ssh/sshd/auth2-kbdint.c b/usr/src/cmd/ssh/sshd/auth2-kbdint.c
deleted file mode 100644
index 2ea8104182..0000000000
--- a/usr/src/cmd/ssh/sshd/auth2-kbdint.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: auth2-kbdint.c,v 1.2 2002/05/31 11:35:15 markus Exp $");
-
-#include "packet.h"
-#include "auth.h"
-#include "log.h"
-#include "servconf.h"
-#include "xmalloc.h"
-
-/* import */
-extern ServerOptions options;
-
-static void
-userauth_kbdint(Authctxt *authctxt)
-{
- char *lang, *devs;
-
- if (!authctxt || !authctxt->method)
- fatal("%s: missing contex", __func__);
-
- lang = packet_get_string(NULL);
- devs = packet_get_string(NULL);
- packet_check_eom();
-
- debug("keyboard-interactive devs %s", devs);
-
-#ifdef USE_PAM
- if (options.kbd_interactive_authentication)
- auth2_pam(authctxt);
-#else
- if (options.challenge_response_authentication)
- auth2_challenge(authctxt, devs);
-#endif /* USE_PAM */
- xfree(devs);
- xfree(lang);
-#ifdef HAVE_CYGWIN
- if (check_nt_auth(0, authctxt->pw) == 0) {
- authctxt->method->authenticated = 0;
- return;
- }
-#endif
-}
-
-static void
-userauth_kbdint_abandon(Authctxt *authctxt, Authmethod *method)
-{
-#ifdef USE_PAM
- kbdint_pam_abandon(authctxt, method);
-#else
- auth2_challenge_abandon(authctxt);
-#endif /* USE_PAM */
-}
-
-Authmethod method_kbdint = {
- "keyboard-interactive",
- &options.kbd_interactive_authentication,
- userauth_kbdint,
- userauth_kbdint_abandon,
- NULL, NULL, /* method data and historical data */
- 1, /* initial userauth */
- 0, 0, 0, /* counters */
- 0, 0, 0, 0, 0, 0 /* state */
-};
diff --git a/usr/src/cmd/ssh/sshd/auth2-none.c b/usr/src/cmd/ssh/sshd/auth2-none.c
deleted file mode 100644
index 5d49ee95e8..0000000000
--- a/usr/src/cmd/ssh/sshd/auth2-none.c
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: auth2-none.c,v 1.4 2002/06/27 10:35:47 deraadt Exp $");
-
-#include "auth.h"
-#include "xmalloc.h"
-#include "packet.h"
-#include "log.h"
-#include "servconf.h"
-#include "atomicio.h"
-#include "compat.h"
-#include "ssh2.h"
-
-/* import */
-extern ServerOptions options;
-
-/* "none" is allowed only one time */
-static int none_enabled = 1;
-
-char *
-auth2_read_banner(void)
-{
- struct stat st;
- char *banner, *ubanner, *errstr;
- off_t len, n;
- int fd;
- uint_t ilen;
-
- if ((fd = open(options.banner, O_RDONLY)) == -1)
- return (NULL);
- if (fstat(fd, &st) == -1) {
- close(fd);
- return (NULL);
- }
- len = st.st_size;
- banner = xmalloc(len + 1);
- n = atomicio(read, fd, banner, len);
- close(fd);
-
- if (n != len) {
- xfree(banner);
- return (NULL);
- }
- banner[n] = '\0';
-
- if (datafellows & SSH_BUG_STRING_ENCODING) {
- ubanner = banner;
- } else {
- ilen = (uint_t)n;
- ubanner = g11n_convert_to_utf8(banner, &ilen, 1, &errstr);
- if (ubanner == NULL) {
- if (errstr != NULL) {
- error("Can't convert banner contents "
- "to UTF-8: %s\n", errstr);
- }
- ubanner = banner;
- } else {
- xfree(banner);
- }
- }
-
- return (ubanner);
-}
-
-static void
-userauth_banner(void)
-{
- char *banner = NULL;
-
- if (options.banner == NULL || (datafellows & SSH_BUG_BANNER))
- return;
-
- if ((banner = auth2_read_banner()) == NULL)
- goto done;
-
- packet_start(SSH2_MSG_USERAUTH_BANNER);
- packet_put_cstring(banner);
- packet_put_cstring(""); /* language, unused */
- packet_send();
- debug("userauth_banner: sent");
-done:
- if (banner)
- xfree(banner);
-}
-
-static void
-userauth_none(Authctxt *authctxt)
-{
- none_enabled = 0;
-
- if (!authctxt || !authctxt->method)
- fatal("%s: missing context", __func__);
-
- packet_check_eom();
- userauth_banner();
-#ifdef HAVE_CYGWIN
- if (check_nt_auth(1, authctxt->pw) == 0)
- return (0);
-#endif
- authctxt->method->authenticated = auth_password(authctxt, "");
-}
-
-Authmethod method_none = {
- "none",
- &none_enabled,
- userauth_none,
- NULL, /* no abandon function */
- NULL, NULL, /* method data and hist data */
- 0, /* not really initial userauth */
- 0, 0, 0, /* counters */
- 0, 0, 0, 0, 0, 0 /* state */
-};
diff --git a/usr/src/cmd/ssh/sshd/auth2-pam.c b/usr/src/cmd/ssh/sshd/auth2-pam.c
deleted file mode 100644
index 1b0fa40f2b..0000000000
--- a/usr/src/cmd/ssh/sshd/auth2-pam.c
+++ /dev/null
@@ -1,458 +0,0 @@
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-
-RCSID("$Id: auth2-pam.c,v 1.14 2002/06/28 16:48:12 mouring Exp $");
-
-#ifdef USE_PAM
-#include <security/pam_appl.h>
-
-#include "ssh.h"
-#include "ssh2.h"
-#include "auth.h"
-#include "auth-pam.h"
-#include "auth-options.h"
-#include "packet.h"
-#include "xmalloc.h"
-#include "dispatch.h"
-#include "canohost.h"
-#include "log.h"
-#include "servconf.h"
-#include "misc.h"
-
-#ifdef HAVE_BSM
-#include "bsmaudit.h"
-#endif /* HAVE_BSM */
-
-extern u_int utmp_len;
-extern ServerOptions options;
-
-extern Authmethod method_kbdint;
-extern Authmethod method_passwd;
-
-#define SSHD_PAM_KBDINT_SVC "sshd-kbdint"
-/* Maximum attempts for changing expired password */
-#define DEF_ATTEMPTS 3
-
-static int do_pam_conv_kbd_int(int num_msg,
- struct pam_message **msg, struct pam_response **resp,
- void *appdata_ptr);
-static void input_userauth_info_response_pam(int type,
- u_int32_t seqnr,
- void *ctxt);
-
-static struct pam_conv conv2 = {
- do_pam_conv_kbd_int,
- NULL,
-};
-
-static void do_pam_kbdint_cleanup(pam_handle_t *pamh);
-static void do_pam_kbdint(Authctxt *authctxt);
-
-void
-auth2_pam(Authctxt *authctxt)
-{
- if (authctxt->user == NULL)
- fatal("auth2_pam: internal error: no user");
- if (authctxt->method == NULL)
- fatal("auth2_pam: internal error: no method");
-
- conv2.appdata_ptr = authctxt;
- new_start_pam(authctxt, &conv2);
-
- authctxt->method->method_data = NULL; /* freed in the conv func */
- dispatch_set(SSH2_MSG_USERAUTH_INFO_RESPONSE,
- &input_userauth_info_response_pam);
-
- /*
- * Since password userauth and keyboard-interactive userauth
- * both use PAM, and since keyboard-interactive is so much
- * better than password userauth, we should not allow the user
- * to try password userauth after trying keyboard-interactive.
- */
- if (method_passwd.enabled)
- *method_passwd.enabled = 0;
-
- do_pam_kbdint(authctxt);
-
- dispatch_set(SSH2_MSG_USERAUTH_INFO_RESPONSE, NULL);
-}
-
-static void
-do_pam_kbdint(Authctxt *authctxt)
-{
- int retval, retval2;
- pam_handle_t *pamh = authctxt->pam->h;
- const char *where = "authenticating";
- char *text = NULL;
-
- debug2("Calling pam_authenticate()");
- retval = pam_authenticate(pamh,
- options.permit_empty_passwd ? 0 :
- PAM_DISALLOW_NULL_AUTHTOK);
-
- if (retval != PAM_SUCCESS)
- goto cleanup;
-
- debug2("kbd-int: pam_authenticate() succeeded");
- where = "authorizing";
- retval = pam_acct_mgmt(pamh, 0);
-
- if (retval == PAM_NEW_AUTHTOK_REQD) {
- if (authctxt->valid && authctxt->pw != NULL) {
- /* send password expiration warning */
- message_cat(&text,
- gettext("Warning: Your password has expired,"
- " please change it now."));
- packet_start(SSH2_MSG_USERAUTH_INFO_REQUEST);
- packet_put_cstring(""); /* name */
- packet_put_utf8_cstring(text); /* instructions */
- packet_put_cstring(""); /* language, unused */
- packet_put_int(0);
- packet_send();
- packet_write_wait();
- debug("expiration message sent");
- if (text)
- xfree(text);
- /*
- * wait for the response so it does not mix
- * with the upcoming PAM conversation
- */
- packet_read_expect(SSH2_MSG_USERAUTH_INFO_RESPONSE);
- /*
- * Can't use temporarily_use_uid() and restore_uid()
- * here because we need (euid == 0 && ruid == pw_uid)
- * whereas temporarily_use_uid() arranges for
- * (suid = 0 && euid == pw_uid && ruid == pw_uid).
- */
- (void) setreuid(authctxt->pw->pw_uid, -1);
- debug2("kbd-int: changing expired password");
- where = "changing authentication tokens (password)";
- /*
- * Depending on error returned from pam_chauthtok, we
- * need to try to change password a few times before
- * we error out and return.
- */
- int tries = 0;
- while ((retval = pam_chauthtok(pamh,
- PAM_CHANGE_EXPIRED_AUTHTOK)) != PAM_SUCCESS) {
- if (tries++ < DEF_ATTEMPTS) {
- if ((retval == PAM_AUTHTOK_ERR) ||
- (retval == PAM_TRY_AGAIN)) {
- continue;
- }
- }
- break;
- }
- audit_sshd_chauthtok(retval, authctxt->pw->pw_uid,
- authctxt->pw->pw_gid);
- (void) setreuid(0, -1);
- } else {
- retval = PAM_PERM_DENIED;
- }
- }
-
- if (retval != PAM_SUCCESS)
- goto cleanup;
-
- authctxt->pam->state |= PAM_S_DONE_ACCT_MGMT;
-
- retval = finish_userauth_do_pam(authctxt);
-
- if (retval != PAM_SUCCESS)
- goto cleanup;
-
- /*
- * PAM handle stays around so we can call pam_close_session()
- * on it later.
- */
- authctxt->method->authenticated = 1;
- debug2("kbd-int: success (pam->state == %x)", authctxt->pam->state);
- return;
-
-cleanup:
- /*
- * Check for abandonment and cleanup. When kbdint is abandoned
- * authctxt->pam->h is NULLed and by this point a new handle may
- * be allocated.
- */
- if (authctxt->pam->h != pamh) {
- log("Keyboard-interactive (PAM) userauth abandoned "
- "while %s", where);
- if ((retval2 = pam_end(pamh, retval)) != PAM_SUCCESS) {
- log("Cannot close PAM handle after "
- "kbd-int userauth abandonment[%d]: %.200s",
- retval2, PAM_STRERROR(pamh, retval2));
- }
- authctxt->method->abandoned = 1;
-
- /*
- * Avoid double counting; these are incremented in
- * kbdint_pam_abandon() so that they reflect the correct
- * count when userauth_finish() is called before
- * unwinding the dispatch_run() loop, but they are
- * incremented again in input_userauth_request() when
- * the loop is unwound, right here.
- */
- if (authctxt->method->abandons)
- authctxt->method->abandons--;
- if (authctxt->method->attempts)
- authctxt->method->attempts--;
- }
- else {
- /* Save error value for pam_end() */
- authctxt->pam->last_pam_retval = retval;
- log("Keyboard-interactive (PAM) userauth failed[%d] "
- "while %s: %.200s", retval, where,
- PAM_STRERROR(pamh, retval));
- /* pam handle can be reused elsewhere, so no pam_end() here */
- }
-
- return;
-}
-
-static int
-do_pam_conv_kbd_int(int num_msg, struct pam_message **msg,
- struct pam_response **resp, void *appdata_ptr)
-{
- int i, j;
- char *text;
- Convctxt *conv_ctxt;
- Authctxt *authctxt = (Authctxt *)appdata_ptr;
-
- if (!authctxt || !authctxt->method) {
- debug("Missing state during PAM conversation");
- return PAM_CONV_ERR;
- }
-
- conv_ctxt = xmalloc(sizeof(Convctxt));
- (void) memset(conv_ctxt, 0, sizeof(Convctxt));
- conv_ctxt->finished = 0;
- conv_ctxt->num_received = 0;
- conv_ctxt->num_expected = 0;
- conv_ctxt->prompts = xmalloc(sizeof(int) * num_msg);
- conv_ctxt->responses = xmalloc(sizeof(struct pam_response) * num_msg);
- (void) memset(conv_ctxt->responses, 0, sizeof(struct pam_response) * num_msg);
-
- text = NULL;
- for (i = 0, conv_ctxt->num_expected = 0; i < num_msg; i++) {
- int style = PAM_MSG_MEMBER(msg, i, msg_style);
- switch (style) {
- case PAM_PROMPT_ECHO_ON:
- debug2("PAM echo on prompt: %s",
- PAM_MSG_MEMBER(msg, i, msg));
- conv_ctxt->num_expected++;
- break;
- case PAM_PROMPT_ECHO_OFF:
- debug2("PAM echo off prompt: %s",
- PAM_MSG_MEMBER(msg, i, msg));
- conv_ctxt->num_expected++;
- break;
- case PAM_TEXT_INFO:
- debug2("PAM text info prompt: %s",
- PAM_MSG_MEMBER(msg, i, msg));
- message_cat(&text, PAM_MSG_MEMBER(msg, i, msg));
- break;
- case PAM_ERROR_MSG:
- debug2("PAM error prompt: %s",
- PAM_MSG_MEMBER(msg, i, msg));
- message_cat(&text, PAM_MSG_MEMBER(msg, i, msg));
- break;
- default:
- /* Capture all these messages to be sent at once */
- message_cat(&text, PAM_MSG_MEMBER(msg, i, msg));
- break;
- }
- }
-
- if (conv_ctxt->num_expected == 0 && text == NULL) {
- xfree(conv_ctxt->prompts);
- xfree(conv_ctxt->responses);
- xfree(conv_ctxt);
- return PAM_SUCCESS;
- }
-
- authctxt->method->method_data = (void *) conv_ctxt;
-
- packet_start(SSH2_MSG_USERAUTH_INFO_REQUEST);
- packet_put_cstring(""); /* Name */
- packet_put_utf8_cstring(text ? text : ""); /* Instructions */
- packet_put_cstring(""); /* Language */
- packet_put_int(conv_ctxt->num_expected);
-
- if (text)
- xfree(text);
-
- for (i = 0, j = 0; i < num_msg; i++) {
- int style = PAM_MSG_MEMBER(msg, i, msg_style);
-
- /* Skip messages which don't need a reply */
- if (style != PAM_PROMPT_ECHO_ON && style != PAM_PROMPT_ECHO_OFF)
- continue;
-
- conv_ctxt->prompts[j++] = i;
- packet_put_utf8_cstring(PAM_MSG_MEMBER(msg, i, msg));
- packet_put_char(style == PAM_PROMPT_ECHO_ON);
- }
- packet_send();
- packet_write_wait();
-
- /*
- * Here the dispatch_run() loop is nested. It should be unwound
- * if keyboard-interactive userauth is abandoned (or restarted;
- * same thing).
- *
- * The condition for breaking out of the nested dispatch_run() loop is
- * ((got kbd-int info reponse) || (kbd-int abandoned))
- *
- * conv_ctxt->finished is set in either of those cases.
- *
- * When abandonment is detected the conv_ctxt->finished is set as
- * is conv_ctxt->abandoned, causing this function to signal
- * userauth nested dispatch_run() loop unwinding and to return
- * PAM_CONV_ERR;
- */
- debug2("Nesting dispatch_run loop");
- dispatch_run(DISPATCH_BLOCK, &conv_ctxt->finished, appdata_ptr);
- debug2("Nested dispatch_run loop exited");
-
- if (conv_ctxt->abandoned) {
- authctxt->unwind_dispatch_loop = 1;
- xfree(conv_ctxt->prompts);
- xfree(conv_ctxt->responses);
- xfree(conv_ctxt);
- debug("PAM conv function returns PAM_CONV_ERR");
- return PAM_CONV_ERR;
- }
-
- if (conv_ctxt->num_received == conv_ctxt->num_expected) {
- *resp = conv_ctxt->responses;
- xfree(conv_ctxt->prompts);
- xfree(conv_ctxt);
- debug("PAM conv function returns PAM_SUCCESS");
- return PAM_SUCCESS;
- }
-
- debug("PAM conv function returns PAM_CONV_ERR");
- xfree(conv_ctxt->prompts);
- xfree(conv_ctxt->responses);
- xfree(conv_ctxt);
- return PAM_CONV_ERR;
-}
-
-static void
-input_userauth_info_response_pam(int type, u_int32_t seqnr, void *ctxt)
-{
- Authctxt *authctxt = ctxt;
- Convctxt *conv_ctxt;
- unsigned int nresp = 0, rlen = 0, i = 0;
- char *resp;
-
- if (authctxt == NULL)
- fatal("input_userauth_info_response_pam: no authentication context");
-
- /* Check for spurious/unexpected info response */
- if (method_kbdint.method_data == NULL) {
- debug("input_userauth_info_response_pam: no method context");
- return;
- }
-
- conv_ctxt = (Convctxt *) method_kbdint.method_data;
-
- nresp = packet_get_int(); /* Number of responses. */
- debug("got %d responses", nresp);
-
-
-#if 0
- if (nresp != conv_ctxt->num_expected)
- fatal("%s: Received incorrect number of responses "
- "(expected %d, received %u)", __func__,
- conv_ctxt->num_expected, nresp);
-#endif
-
- if (nresp > 100)
- fatal("%s: too many replies", __func__);
-
- for (i = 0; i < nresp && i < conv_ctxt->num_expected ; i++) {
- int j = conv_ctxt->prompts[i];
-
- /*
- * We assume that ASCII charset is used for password
- * although the protocol requires UTF-8 encoding for the
- * password string. Therefore, we don't perform code
- * conversion for the string.
- */
- resp = packet_get_string(&rlen);
- if (i < conv_ctxt->num_expected) {
- conv_ctxt->responses[j].resp_retcode = PAM_SUCCESS;
- conv_ctxt->responses[j].resp = xstrdup(resp);
- conv_ctxt->num_received++;
- }
- xfree(resp);
- }
-
- if (nresp < conv_ctxt->num_expected)
- fatal("%s: too few replies (%d < %d)", __func__,
- nresp, conv_ctxt->num_expected);
-
- /* XXX - This could make a covert channel... */
- if (nresp > conv_ctxt->num_expected)
- debug("Ignoring additional PAM replies");
-
- conv_ctxt->finished = 1;
-
- packet_check_eom();
-}
-
-#if 0
-int
-kbdint_pam_abandon_chk(Authctxt *authctxt, Authmethod *method)
-{
- if (!method)
- return 0; /* fatal(), really; it'll happen somewhere else */
-
- if (!method->method_data)
- return 0;
-
- return 1;
-}
-#endif
-
-void
-kbdint_pam_abandon(Authctxt *authctxt, Authmethod *method)
-{
- Convctxt *conv_ctxt;
-
- /*
- * But, if it ever becomes desirable and possible to support
- * kbd-int userauth abandonment, here's what must be done.
- */
- if (!method)
- return;
-
- if (!method->method_data)
- return;
-
- conv_ctxt = (Convctxt *) method->method_data;
-
- /* dispatch_run() loop will exit */
- conv_ctxt->abandoned = 1;
- conv_ctxt->finished = 1;
-
- /*
- * The method_data will be free in the corresponding, active
- * conversation function
- */
- method->method_data = NULL;
-
- /* update counts that can't be updated elsewhere */
- method->abandons++;
- method->attempts++;
-
- /* Finally, we cannot re-use the current current PAM handle */
- authctxt->pam->h = NULL; /* Let the conv function cleanup */
-}
-#endif
diff --git a/usr/src/cmd/ssh/sshd/auth2-passwd.c b/usr/src/cmd/ssh/sshd/auth2-passwd.c
deleted file mode 100644
index 9a1837fb05..0000000000
--- a/usr/src/cmd/ssh/sshd/auth2-passwd.c
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: auth2-passwd.c,v 1.2 2002/05/31 11:35:15 markus Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "xmalloc.h"
-#include "packet.h"
-#include "log.h"
-#include "auth.h"
-#include "servconf.h"
-
-/* import */
-extern ServerOptions options;
-
-static void
-userauth_passwd(Authctxt *authctxt)
-{
- char *password;
- int change;
- u_int len;
-
- if (!authctxt || !authctxt->method)
- fatal("%s: missing context", __func__);
-
- change = packet_get_char();
- if (change)
- log("password change not supported");
- password = packet_get_string(&len);
- packet_check_eom();
- if (
-#ifdef HAVE_CYGWIN
- check_nt_auth(1, authctxt->pw) &&
-#endif
- auth_password(authctxt, password) == 1) {
- authctxt->method->authenticated = 1;
- }
- memset(password, 0, len);
- xfree(password);
-}
-
-Authmethod method_passwd = {
- "password",
- &options.password_authentication,
- userauth_passwd,
- NULL, /* no abandon function */
- NULL, NULL, /* method data and hist data */
- 1, /* initial userauth */
- 0, 0, 0, /* counters */
- 0, 0, 0, 0, 0, 0 /* state */
-};
diff --git a/usr/src/cmd/ssh/sshd/auth2-pubkey.c b/usr/src/cmd/ssh/sshd/auth2-pubkey.c
deleted file mode 100644
index 658634c195..0000000000
--- a/usr/src/cmd/ssh/sshd/auth2-pubkey.c
+++ /dev/null
@@ -1,359 +0,0 @@
-/*
- * Copyright (c) 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: auth2-pubkey.c,v 1.2 2002/05/31 11:35:15 markus Exp $");
-
-#include "ssh2.h"
-#include "xmalloc.h"
-#include "packet.h"
-#include "buffer.h"
-#include "log.h"
-#include "servconf.h"
-#include "compat.h"
-#include "bufaux.h"
-#include "auth.h"
-#include "key.h"
-#include "pathnames.h"
-#include "uidswap.h"
-#include "auth-options.h"
-#include "canohost.h"
-
-#ifdef USE_PAM
-#include <security/pam_appl.h>
-#include "auth-pam.h"
-#endif /* USE_PAM */
-
-/* import */
-extern ServerOptions options;
-extern u_char *session_id2;
-extern int session_id2_len;
-
-static void
-userauth_pubkey(Authctxt *authctxt)
-{
- Buffer b;
- Key *key = NULL;
- char *pkalg;
- u_char *pkblob, *sig;
- u_int alen, blen, slen;
- int have_sig, pktype;
- int authenticated = 0;
-
- if (!authctxt || !authctxt->method)
- fatal("%s: missing context", __func__);
-
- have_sig = packet_get_char();
- if (datafellows & SSH_BUG_PKAUTH) {
- debug2("userauth_pubkey: SSH_BUG_PKAUTH");
- /* no explicit pkalg given */
- pkblob = packet_get_string(&blen);
- buffer_init(&b);
- buffer_append(&b, pkblob, blen);
- /* so we have to extract the pkalg from the pkblob */
- pkalg = buffer_get_string(&b, &alen);
- buffer_free(&b);
- } else {
- pkalg = packet_get_string(&alen);
- pkblob = packet_get_string(&blen);
- }
- pktype = key_type_from_name(pkalg);
- if (pktype == KEY_UNSPEC) {
- /* this is perfectly legal */
- log("userauth_pubkey: unsupported public key algorithm: %s",
- pkalg);
- goto done;
- }
- key = key_from_blob(pkblob, blen);
- if (key == NULL) {
- error("userauth_pubkey: cannot decode key: %s", pkalg);
- goto done;
- }
- if (key->type != pktype) {
- error("userauth_pubkey: type mismatch for decoded key "
- "(received %d, expected %d)", key->type, pktype);
- goto done;
- }
-
- /* Detect and count abandonment */
- if (authctxt->method->method_data) {
- Key *prev_key;
- unsigned char *prev_pkblob;
- int prev_blen;
-
- /*
- * Check for earlier test of a key that was allowed but
- * not followed up with a pubkey req for the same pubkey
- * and with a signature.
- */
- prev_key = authctxt->method->method_data;
- if ((prev_blen = key_to_blob(prev_key,
- &prev_pkblob, NULL))) {
- if (prev_blen != blen ||
- memcmp(prev_pkblob, pkblob, blen) != 0) {
- authctxt->method->abandons++;
- authctxt->method->attempts++;
- }
- }
- key_free(prev_key);
- authctxt->method->method_data = NULL;
- }
-
- if (have_sig) {
- sig = packet_get_string(&slen);
- packet_check_eom();
- buffer_init(&b);
- if (datafellows & SSH_OLD_SESSIONID) {
- buffer_append(&b, session_id2, session_id2_len);
- } else {
- buffer_put_string(&b, session_id2, session_id2_len);
- }
- /* reconstruct packet */
- buffer_put_char(&b, SSH2_MSG_USERAUTH_REQUEST);
- buffer_put_cstring(&b, authctxt->user);
- buffer_put_cstring(&b,
- datafellows & SSH_BUG_PKSERVICE ?
- "ssh-userauth" :
- authctxt->service);
- if (datafellows & SSH_BUG_PKAUTH) {
- buffer_put_char(&b, have_sig);
- } else {
- buffer_put_cstring(&b, "publickey");
- buffer_put_char(&b, have_sig);
- buffer_put_cstring(&b, pkalg);
- }
- buffer_put_string(&b, pkblob, blen);
-#ifdef DEBUG_PK
- buffer_dump(&b);
-#endif
- /* test for correct signature */
- if (user_key_allowed(authctxt->pw, key) &&
- key_verify(key, sig, slen, buffer_ptr(&b),
- buffer_len(&b)) == 1) {
- authenticated = 1;
- }
- authctxt->method->postponed = 0;
- buffer_free(&b);
- xfree(sig);
- } else {
- debug("test whether pkalg/pkblob are acceptable");
- packet_check_eom();
-
- /* XXX fake reply and always send PK_OK ? */
- /*
- * XXX this allows testing whether a user is allowed
- * to login: if you happen to have a valid pubkey this
- * message is sent. the message is NEVER sent at all
- * if a user is not allowed to login. is this an
- * issue? -markus
- */
- if (user_key_allowed(authctxt->pw, key)) {
- packet_start(SSH2_MSG_USERAUTH_PK_OK);
- packet_put_string(pkalg, alen);
- packet_put_string(pkblob, blen);
- packet_send();
- packet_write_wait();
- authctxt->method->postponed = 1;
- /*
- * Remember key that was tried so we can
- * correctly detect abandonment. See above.
- */
- authctxt->method->method_data = (void *) key;
- key = NULL;
- }
- }
- if (authenticated != 1)
- auth_clear_options();
-
-done:
- /*
- * XXX TODO: add config options for specifying users for whom
- * this userauth is insufficient and what userauths may
- * continue.
- */
-#ifdef USE_PAM
- if (authenticated) {
- if (!do_pam_non_initial_userauth(authctxt))
- authenticated = 0;
- }
-#endif /* USE_PAM */
-
- debug2("userauth_pubkey: authenticated %d pkalg %s", authenticated, pkalg);
- if (key != NULL)
- key_free(key);
- xfree(pkalg);
- xfree(pkblob);
-#ifdef HAVE_CYGWIN
- if (check_nt_auth(0, authctxt->pw) == 0)
- return;
-#endif
- if (authenticated)
- authctxt->method->authenticated = 1;
-}
-
-/* return 1 if user allows given key */
-static int
-user_key_allowed2(struct passwd *pw, Key *key, char *file)
-{
- char line[8192];
- int found_key = 0;
- FILE *f;
- u_long linenum = 0;
- struct stat st;
- Key *found;
- char *fp;
-
- if (pw == NULL)
- return 0;
-
- /* Temporarily use the user's uid. */
- temporarily_use_uid(pw);
-
- debug("trying public key file %s", file);
-
- /* Fail quietly if file does not exist */
- if (stat(file, &st) < 0) {
- /* Restore the privileged uid. */
- restore_uid();
- return 0;
- }
- /* Open the file containing the authorized keys. */
- f = fopen(file, "r");
- if (!f) {
- /* Restore the privileged uid. */
- restore_uid();
- return 0;
- }
- if (options.strict_modes &&
- secure_filename(f, file, pw, line, sizeof(line)) != 0) {
- (void) fclose(f);
- log("Authentication refused: %s", line);
- restore_uid();
- return 0;
- }
-
- found_key = 0;
- found = key_new(key->type);
-
- while (fgets(line, sizeof(line), f)) {
- char *cp, *options = NULL;
- linenum++;
- /* Skip leading whitespace, empty and comment lines. */
- for (cp = line; *cp == ' ' || *cp == '\t'; cp++)
- ;
- if (!*cp || *cp == '\n' || *cp == '#')
- continue;
-
- if (key_read(found, &cp) != 1) {
- /* no key? check if there are options for this key */
- int quoted = 0;
- debug2("user_key_allowed: check options: '%s'", cp);
- options = cp;
- for (; *cp && (quoted || (*cp != ' ' && *cp != '\t')); cp++) {
- if (*cp == '\\' && cp[1] == '"')
- cp++; /* Skip both */
- else if (*cp == '"')
- quoted = !quoted;
- }
- /* Skip remaining whitespace. */
- for (; *cp == ' ' || *cp == '\t'; cp++)
- ;
- if (key_read(found, &cp) != 1) {
- debug2("user_key_allowed: advance: '%s'", cp);
- /* still no key? advance to next line*/
- continue;
- }
- }
- if (key_equal(found, key) &&
- auth_parse_options(pw, options, file, linenum) == 1) {
- found_key = 1;
- debug("matching key found: file %s, line %lu",
- file, linenum);
- fp = key_fingerprint(found, SSH_FP_MD5, SSH_FP_HEX);
- verbose("Found matching %s key: %s",
- key_type(found), fp);
- xfree(fp);
- break;
- }
- }
- restore_uid();
- (void) fclose(f);
- key_free(found);
- if (!found_key)
- debug2("key not found");
- return found_key;
-}
-
-/* check whether given key is in .ssh/authorized_keys* */
-int
-user_key_allowed(struct passwd *pw, Key *key)
-{
- int success;
- char *file;
-
- if (pw == NULL)
- return 0;
-
- file = authorized_keys_file(pw);
- success = user_key_allowed2(pw, key, file);
- xfree(file);
- if (success)
- return success;
-
- /* try suffix "2" for backward compat, too */
- file = authorized_keys_file2(pw);
- success = user_key_allowed2(pw, key, file);
- xfree(file);
- return success;
-}
-
-static
-void
-userauth_pubkey_abandon(Authctxt *authctxt, Authmethod *method)
-{
- if (!authctxt || !method)
- return;
-
- if (method->method_data) {
- method->abandons++;
- method->attempts++;
- key_free((Key *) method->method_data);
- method->method_data = NULL;
- }
-}
-
-Authmethod method_pubkey = {
- "publickey",
- &options.pubkey_authentication,
- userauth_pubkey,
- userauth_pubkey_abandon,
- NULL, NULL, /* method data and hist data */
- 0, /* not initial userauth */
- 0, 0, 0, /* counters */
- 0, 0, 0, 0, 0, 0 /* state */
-};
diff --git a/usr/src/cmd/ssh/sshd/auth2.c b/usr/src/cmd/ssh/sshd/auth2.c
deleted file mode 100644
index bc3f4284f3..0000000000
--- a/usr/src/cmd/ssh/sshd/auth2.c
+++ /dev/null
@@ -1,695 +0,0 @@
-/*
- * Copyright (c) 2000 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: auth2.c,v 1.95 2002/08/22 21:33:58 markus Exp $");
-
-#include "ssh2.h"
-#include "xmalloc.h"
-#include "packet.h"
-#include "log.h"
-#include "servconf.h"
-#include "compat.h"
-#include "misc.h"
-#include "auth.h"
-#include "dispatch.h"
-#include "sshlogin.h"
-#include "pathnames.h"
-
-#ifdef HAVE_BSM
-#include "bsmaudit.h"
-extern adt_session_data_t *ah;
-#endif /* HAVE_BSM */
-
-#ifdef GSSAPI
-#include "ssh-gss.h"
-#endif
-
-/* import */
-extern ServerOptions options;
-extern u_char *session_id2;
-extern int session_id2_len;
-
-Authctxt *x_authctxt = NULL;
-
-/* methods */
-
-extern Authmethod method_none;
-extern Authmethod method_pubkey;
-extern Authmethod method_passwd;
-extern Authmethod method_kbdint;
-extern Authmethod method_hostbased;
-extern Authmethod method_external;
-extern Authmethod method_gssapi;
-
-static Authmethod *authmethods[] = {
- &method_none,
-#ifdef GSSAPI
- &method_external,
- &method_gssapi,
-#endif
- &method_pubkey,
- &method_passwd,
- &method_kbdint,
- &method_hostbased,
- NULL
-};
-
-/* protocol */
-
-static void input_service_request(int, u_int32_t, void *);
-static void input_userauth_request(int, u_int32_t, void *);
-
-/* helper */
-static Authmethod *authmethod_lookup(const char *);
-static char *authmethods_get(void);
-static char *authmethods_check_abandonment(Authctxt *authctxt,
- Authmethod *method);
-static void authmethod_count_attempt(Authmethod *method);
-/*static char *authmethods_get_kbdint(void);*/
-int user_key_allowed(struct passwd *, Key *);
-int hostbased_key_allowed(struct passwd *, const char *, char *, Key *);
-static int userauth_method_can_run(Authmethod *method);
-static void userauth_reset_methods(void);
-
-/*
- * loop until authctxt->success == TRUE
- */
-
-Authctxt *
-do_authentication2(void)
-{
- Authctxt *authctxt = authctxt_new();
-
- x_authctxt = authctxt; /*XXX*/
-
-#ifdef HAVE_BSM
- fatal_add_cleanup(audit_failed_login_cleanup, authctxt);
-#endif /* HAVE_BSM */
-
- dispatch_init(&dispatch_protocol_error);
- dispatch_set(SSH2_MSG_SERVICE_REQUEST, &input_service_request);
- dispatch_run(DISPATCH_BLOCK, &authctxt->success, authctxt);
-
- return (authctxt);
-}
-
-static void
-input_service_request(int type, u_int32_t seq, void *ctxt)
-{
- Authctxt *authctxt = ctxt;
- u_int len;
- int acceptit = 0;
- char *service = packet_get_string(&len);
- packet_check_eom();
-
- if (authctxt == NULL)
- fatal("input_service_request: no authctxt");
-
- if (strcmp(service, "ssh-userauth") == 0) {
- if (!authctxt->success) {
- acceptit = 1;
- /* now we can handle user-auth requests */
- dispatch_set(SSH2_MSG_USERAUTH_REQUEST, &input_userauth_request);
- }
- }
- /* XXX all other service requests are denied */
-
- if (acceptit) {
- packet_start(SSH2_MSG_SERVICE_ACCEPT);
- packet_put_cstring(service);
- packet_send();
- packet_write_wait();
- } else {
- debug("bad service request %s", service);
- packet_disconnect("bad service request %s", service);
- }
- xfree(service);
-}
-
-static void
-input_userauth_request(int type, u_int32_t seq, void *ctxt)
-{
- Authctxt *authctxt = ctxt;
- Authmethod *m = NULL;
- char *user, *service, *method, *style = NULL;
- int valid_attempt;
-
- if (authctxt == NULL)
- fatal("input_userauth_request: no authctxt");
-
- user = packet_get_string(NULL);
- service = packet_get_string(NULL);
- method = packet_get_string(NULL);
- debug("userauth-request for user %s service %s method %s", user,
- service, method);
- debug("attempt %d initial attempt %d failures %d initial failures %d",
- authctxt->attempt, authctxt->init_attempt,
- authctxt->failures, authctxt->init_failures);
-
- m = authmethod_lookup(method);
-
- if ((style = strchr(user, ':')) != NULL)
- *style++ = 0;
-
- authctxt->attempt++;
- if (m != NULL && m->is_initial)
- authctxt->init_attempt++;
-
- if (options.pre_userauth_hook != NULL &&
- run_auth_hook(options.pre_userauth_hook, user, m->name) != 0) {
- valid_attempt = 0;
- } else {
- valid_attempt = 1;
- }
-
- if (authctxt->attempt == 1) {
- /* setup auth context */
- authctxt->pw = getpwnamallow(user);
- /* May want to abstract SSHv2 services someday */
- if (authctxt->pw && strcmp(service, "ssh-connection")==0) {
- /* enforced in userauth_finish() below */
- if (valid_attempt) {
- authctxt->valid = 1;
- }
- debug2("input_userauth_request: setting up authctxt for %s", user);
- } else {
- log("input_userauth_request: illegal user %s", user);
- }
- setproctitle("%s", authctxt->pw ? user : "unknown");
- authctxt->user = xstrdup(user);
- authctxt->service = xstrdup(service);
- authctxt->style = style ? xstrdup(style) : NULL;
- userauth_reset_methods();
- } else {
- char *abandoned;
-
- /*
- * Check for abandoned [multi-round-trip] userauths
- * methods (e.g., kbdint). Userauth method abandonment
- * should be treated as userauth method failure and
- * counted against max_auth_tries.
- */
- abandoned = authmethods_check_abandonment(authctxt, m);
-
- if (abandoned != NULL &&
- authctxt->failures > options.max_auth_tries) {
- /* userauth_finish() will now packet_disconnect() */
- userauth_finish(authctxt, abandoned);
- /* NOTREACHED */
- }
-
- /* Handle user|service changes, possibly packet_disconnect() */
- userauth_user_svc_change(authctxt, user, service);
- }
-
- authctxt->method = m;
-
- /* run userauth method, try to authenticate user */
- if (m != NULL && userauth_method_can_run(m)) {
- debug2("input_userauth_request: try method %s", method);
-
- m->postponed = 0;
- m->abandoned = 0;
- m->authenticated = 0;
-
- if (!m->is_initial ||
- authctxt->init_failures < options.max_init_auth_tries)
- m->userauth(authctxt);
-
- authmethod_count_attempt(m);
-
- if (authctxt->unwind_dispatch_loop) {
- /*
- * Method ran nested dispatch loop but was
- * abandoned. Cleanup and return without doing
- * anything else; we're just unwinding the stack.
- */
- authctxt->unwind_dispatch_loop = 0;
- goto done;
- }
-
- if (m->postponed)
- goto done; /* multi-round trip userauth not finished */
-
- if (m->abandoned) {
- /* multi-round trip userauth abandoned, log failure */
- auth_log(authctxt, 0, method, " ssh2");
- goto done;
- }
- }
-
- userauth_finish(authctxt, method);
-
-done:
- xfree(service);
- xfree(user);
- xfree(method);
-}
-
-void
-userauth_finish(Authctxt *authctxt, char *method)
-{
- int authenticated, partial;
-
- if (authctxt == NULL)
- fatal("%s: missing context", __func__);
-
- /* unknown method handling -- must elicit userauth failure msg */
- if (authctxt->method == NULL) {
- authenticated = 0;
- partial = 0;
- goto done_checking;
- }
-
-#ifndef USE_PAM
- /* Special handling for root (done elsewhere for PAM) */
- if (authctxt->method->authenticated &&
- authctxt->pw != NULL && authctxt->pw->pw_uid == 0 &&
- !auth_root_allowed(method))
- authctxt->method->authenticated = 0;
-#endif /* USE_PAM */
-
-#ifdef _UNICOS
- if (authctxt->method->authenticated &&
- cray_access_denied(authctxt->user)) {
- authctxt->method->authenticated = 0;
- fatal("Access denied for user %s.",authctxt->user);
- }
-#endif /* _UNICOS */
-
- partial = userauth_check_partial_failure(authctxt);
- authenticated = authctxt->method->authenticated;
-
-#ifdef USE_PAM
- /*
- * If the userauth method failed to complete PAM work then force
- * partial failure.
- */
- if (authenticated && !AUTHPAM_DONE(authctxt))
- partial = 1;
-#endif /* USE_PAM */
-
- /*
- * To properly support invalid userauth method names we set
- * authenticated=0, partial=0 above and know that
- * authctxt->method == NULL.
- *
- * No unguarded reference to authctxt->method allowed from here.
- * Checking authenticated != 0 is a valid guard; authctxt->method
- * MUST NOT be NULL if authenticated.
- */
-done_checking:
- if (!authctxt->valid && authenticated) {
- /*
- * We get here if the PreUserauthHook fails but the
- * user is otherwise valid.
- * An error in the PAM handling could also get us here
- * but we need not panic, just treat as a failure.
- */
- authctxt->method->authenticated = 0;
- authenticated = 0;
- log("Ignoring authenticated invalid user %s",
- authctxt->user);
- auth_log(authctxt, 0, method, " ssh2");
- }
-
- /* Log before sending the reply */
- auth_log(authctxt, authenticated, method, " ssh2");
-
- if (authenticated && !partial) {
-
- /* turn off userauth */
- dispatch_set(SSH2_MSG_USERAUTH_REQUEST, &dispatch_protocol_ignore);
- packet_start(SSH2_MSG_USERAUTH_SUCCESS);
- packet_send();
- packet_write_wait();
- /* now we can break out */
- authctxt->success = 1;
- } else {
- char *methods;
-
- if (authctxt->method && authctxt->method->is_initial)
- authctxt->init_failures++;
-
- authctxt->method = NULL;
-
-#ifdef USE_PAM
- /*
- * Keep track of last PAM error (or PERM_DENIED) for BSM
- * login failure auditing, which may run after the PAM
- * state has been cleaned up.
- */
- authctxt->pam_retval = AUTHPAM_ERROR(authctxt, PAM_PERM_DENIED);
-#endif /* USE_PAM */
-
- if (authctxt->failures++ > options.max_auth_tries) {
-#ifdef HAVE_BSM
- fatal_remove_cleanup(audit_failed_login_cleanup,
- authctxt);
- audit_sshd_login_failure(&ah, PAM_MAXTRIES,
- authctxt->user);
-#endif /* HAVE_BSM */
- packet_disconnect(AUTH_FAIL_MSG, authctxt->user);
- }
-
-#ifdef _UNICOS
- if (strcmp(method, "password") == 0)
- cray_login_failure(authctxt->user, IA_UDBERR);
-#endif /* _UNICOS */
- packet_start(SSH2_MSG_USERAUTH_FAILURE);
-
- /*
- * If (partial) then authmethods_get() will return only
- * required methods, likely only "keyboard-interactive;"
- * (methods == NULL) implies failure, even if (partial == 1)
- */
- methods = authmethods_get();
- packet_put_cstring(methods);
- packet_put_char((authenticated && partial && methods) ? 1 : 0);
- if (methods)
- xfree(methods);
- packet_send();
- packet_write_wait();
- }
-}
-
-/* get current user */
-
-struct passwd*
-auth_get_user(void)
-{
- return (x_authctxt != NULL && x_authctxt->valid) ? x_authctxt->pw : NULL;
-}
-
-#define DELIM ","
-
-#if 0
-static char *
-authmethods_get_kbdint(void)
-{
- Buffer b;
- int i;
-
- for (i = 0; authmethods[i] != NULL; i++) {
- if (strcmp(authmethods[i]->name, "keyboard-interactive") != 0)
- continue;
- return xstrdup(authmethods[i]->name);
- }
- return NULL;
-}
-#endif
-
-void
-userauth_user_svc_change(Authctxt *authctxt, char *user, char *service)
-{
- /*
- * NOTE:
- *
- * SSHv2 services should be abstracted and service changes during
- * userauth should be supported as per the userauth draft. In the PAM
- * case, support for multiple SSHv2 services means that we have to
- * format the PAM service name according to the SSHv2 service *and* the
- * SSHv2 userauth being attempted ("passwd", "kbdint" and "other").
- *
- * We'll cross that bridge when we come to it. For now disallow service
- * changes during userauth if using PAM, but allow username changes.
- */
-
- /* authctxt->service must == ssh-connection here */
- if (service != NULL && strcmp(service, authctxt->service) != 0) {
- packet_disconnect("Change of service not "
- "allowed: %s and %s",
- authctxt->service, service);
- }
- if (user != NULL && authctxt->user != NULL &&
- strcmp(user, authctxt->user) == 0)
- return;
-
- /* All good; update authctxt */
- xfree(authctxt->user);
- authctxt->user = xstrdup(user);
- pwfree(&authctxt->pw);
- authctxt->pw = getpwnamallow(user);
- authctxt->valid = (authctxt->pw != NULL);
-
- /* Forget method state; abandon postponed userauths */
- userauth_reset_methods();
-}
-
-int
-userauth_check_partial_failure(Authctxt *authctxt)
-{
- int i;
- int required = 0;
- int sufficient = 0;
-
- /*
- * v1 does not set authctxt->method
- * partial userauth failure is a v2 concept
- */
- if (authctxt->method == NULL)
- return 0;
-
- for (i = 0; authmethods[i] != NULL; i++) {
- if (authmethods[i]->required)
- required++;
- if (authmethods[i]->sufficient)
- sufficient++;
- }
-
- if (required == 0 && sufficient == 0)
- return !authctxt->method->authenticated;
-
- if (required == 1 && authctxt->method->required)
- return !authctxt->method->authenticated;
-
- if (sufficient && authctxt->method->sufficient)
- return !authctxt->method->authenticated;
-
- return 1;
-}
-
-int
-userauth_method_can_run(Authmethod *method)
-{
- if (method->not_again)
- return 0;
-
- return 1;
-}
-
-static
-void
-userauth_reset_methods(void)
-{
- int i;
-
- for (i = 0; authmethods[i] != NULL; i++) {
- /* note: counters not reset */
- authmethods[i]->required = 0;
- authmethods[i]->sufficient = 0;
- authmethods[i]->authenticated = 0;
- authmethods[i]->not_again = 0;
- authmethods[i]->postponed = 0;
- authmethods[i]->abandoned = 0;
- }
-}
-
-void
-userauth_force_kbdint(void)
-{
- int i;
-
- for (i = 0; authmethods[i] != NULL; i++) {
- authmethods[i]->required = 0;
- authmethods[i]->sufficient = 0;
- }
- method_kbdint.required = 1;
-}
-
-/*
- * Check to see if a previously run multi-round trip userauth method has
- * been abandoned and call its cleanup function.
- *
- * Abandoned userauth method invocations are counted as userauth failures.
- */
-static
-char *
-authmethods_check_abandonment(Authctxt *authctxt, Authmethod *method)
-{
- int i;
-
- /* optimization: check current method first */
- if (method && method->postponed) {
- method->postponed = 0;
- if (method->abandon)
- method->abandon(authctxt, method);
- else
- method->abandons++;
- authctxt->failures++; /* abandonment -> failure */
- if (method->is_initial)
- authctxt->init_failures++;
-
- /*
- * Since we check for abandonment whenever a userauth is
- * requested we know only one method could have been
- * in postponed state, so we can return now.
- */
- return (method->name);
- }
- for (i = 0; authmethods[i] != NULL; i++) {
- if (!authmethods[i]->postponed)
- continue;
-
- /* some method was postponed and a diff one is being started */
- if (method != authmethods[i]) {
- authmethods[i]->postponed = 0;
- if (authmethods[i]->abandon)
- authmethods[i]->abandon(authctxt,
- authmethods[i]);
- else
- authmethods[i]->abandons++;
- authctxt->failures++;
- if (authmethods[i]->is_initial)
- authctxt->init_failures++;
- return (authmethods[i]->name); /* see above */
- }
- }
-
- return NULL;
-}
-
-static char *
-authmethods_get(void)
-{
- Buffer b;
- char *list;
- int i;
- int sufficient = 0;
- int required = 0;
- int authenticated = 0;
- int partial = 0;
-
- /*
- * If at least one method succeeded partially then at least one
- * authmethod will be required and only required methods should
- * continue.
- */
- for (i = 0; authmethods[i] != NULL; i++) {
- if (authmethods[i]->authenticated)
- authenticated++;
- if (authmethods[i]->required)
- required++;
- if (authmethods[i]->sufficient)
- sufficient++;
- }
-
- partial = (required + sufficient) > 0;
-
- buffer_init(&b);
- for (i = 0; authmethods[i] != NULL; i++) {
- if (strcmp(authmethods[i]->name, "none") == 0)
- continue;
- if (required && !authmethods[i]->required)
- continue;
- if (sufficient && !required && !authmethods[i]->sufficient)
- continue;
- if (authmethods[i]->not_again)
- continue;
-
- if (authmethods[i]->required) {
- if (buffer_len(&b) > 0)
- buffer_append(&b, ",", 1);
- buffer_append(&b, authmethods[i]->name,
- strlen(authmethods[i]->name));
- continue;
- }
-
- /*
- * A method can be enabled (marked sufficient)
- * dynamically provided that at least one other method
- * has succeeded partially.
- */
- if ((partial && authmethods[i]->sufficient) ||
- (authmethods[i]->enabled != NULL &&
- *(authmethods[i]->enabled) != 0)) {
- if (buffer_len(&b) > 0)
- buffer_append(&b, ",", 1);
- buffer_append(&b, authmethods[i]->name,
- strlen(authmethods[i]->name));
- }
- }
- buffer_append(&b, "\0", 1);
- list = xstrdup(buffer_ptr(&b));
- buffer_free(&b);
- return list;
-}
-
-static Authmethod *
-authmethod_lookup(const char *name)
-{
- int i;
-
- /*
- * Method must be sufficient, required or enabled and must not
- * be marked as not able to run again
- */
- if (name != NULL)
- for (i = 0; authmethods[i] != NULL; i++)
- if (((authmethods[i]->sufficient ||
- authmethods[i]->required) ||
- (authmethods[i]->enabled != NULL &&
- *(authmethods[i]->enabled) != 0)) &&
- !authmethods[i]->not_again &&
- strcmp(name, authmethods[i]->name) == 0)
- return authmethods[i];
- debug2("Unrecognized authentication method name: %s",
- name ? name : "NULL");
- return NULL;
-}
-
-static void
-authmethod_count_attempt(Authmethod *method)
-{
- if (!method)
- fatal("Internal error in authmethod_count_attempt()");
-
- if (method->postponed)
- return;
-
- method->attempts++;
-
- if (method->abandoned)
- method->abandons++;
- else if (method->authenticated)
- method->successes++;
- else
- method->failures++;
-
- return;
-}
diff --git a/usr/src/cmd/ssh/sshd/bsmaudit.c b/usr/src/cmd/ssh/sshd/bsmaudit.c
deleted file mode 100644
index c46d295972..0000000000
--- a/usr/src/cmd/ssh/sshd/bsmaudit.c
+++ /dev/null
@@ -1,316 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- *
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- *
- * usr/src/cmd/ssh/sshd/bsmaudit.c
- *
- * Taken from the on81 usr/src/lib/libbsm/common/audit_login.c
- */
-#include "includes.h"
-
-#include <sys/systeminfo.h>
-#include <sys/param.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/systeminfo.h>
-#include <sys/stat.h>
-#include <sys/wait.h>
-#include <netinet/in.h>
-#include <netdb.h>
-#include <signal.h>
-
-#include <stdarg.h>
-#include <pwd.h>
-#include <shadow.h>
-#include <utmpx.h>
-#include <unistd.h>
-#include <string.h>
-
-#include <locale.h>
-
-#include "log.h"
-#include "packet.h"
-#include "canohost.h"
-#include "servconf.h"
-#include "xmalloc.h"
-#include <errno.h>
-#include <bsm/adt.h>
-#include <bsm/adt_event.h>
-
-extern uint_t utmp_len; /* XXX - Yuck; we'll keep this for now */
-extern ServerOptions options;
- /*
- * XXX - Yuck; we should have a
- * get_client_name_or_ip that does the
- * right thing wrt reverse lookups
- */
-
-void
-audit_sshd_chauthtok(int pam_retval, uid_t uid, gid_t gid)
-{
- adt_session_data_t *ah = NULL;
- adt_event_data_t *event = NULL;
- const char *how = "couldn't start adt session";
- int saved_errno = 0;
-
- if (adt_start_session(&ah, NULL, 0) != 0) {
- saved_errno = errno;
- goto fail;
- }
- if (adt_set_user(ah, uid, gid, uid, gid, NULL, ADT_NEW) != 0) {
- saved_errno = errno;
- how = "couldn't set adt user";
- goto fail;
- }
-
- if ((event = adt_alloc_event(ah, ADT_passwd)) == NULL) {
- saved_errno = errno;
- how = "couldn't allocate adt event";
- goto fail;
- }
-
- if (pam_retval == PAM_SUCCESS) {
- if (adt_put_event(event, ADT_SUCCESS, ADT_SUCCESS) != 0) {
- saved_errno = errno;
- how = "couldn't put adt event";
- goto fail;
- }
- } else if (adt_put_event(event, ADT_FAILURE,
- ADT_FAIL_PAM + pam_retval) != 0) {
- saved_errno = errno;
- how = "couldn't put adt event";
- goto fail;
- }
-
- adt_free_event(event);
- (void) adt_end_session(ah);
- return;
-
-fail:
- adt_free_event(event);
- (void) adt_end_session(ah);
-
- fatal("Auditing of password change failed: %s (%s)",
- strerror(saved_errno), how);
-}
-
-void
-audit_sshd_login(adt_session_data_t **ah, pid_t pid)
-{
- adt_event_data_t *event = NULL;
- const char *how;
- int saved_errno = 0;
- ucred_t *ucred = NULL;
-
- if (ah == NULL) {
- how = "programmer error";
- saved_errno = EINVAL;
- goto fail;
- }
-
- if (adt_start_session(ah, NULL, 0) != 0) {
- saved_errno = errno;
- how = "couldn't start adt session";
- goto fail;
- }
-
- if ((ucred = ucred_get(pid)) == NULL) {
- saved_errno = errno;
- how = "ucred_get() failed to obtain user credential";
- goto fail;
- }
-
- if (adt_set_from_ucred(*ah, ucred, ADT_NEW)) {
- saved_errno = errno;
- how = "adt_set_from_ucred() failed to set user credential";
- goto fail;
- }
-
- if ((event = adt_alloc_event(*ah, ADT_ssh)) == NULL) {
- saved_errno = errno;
- how = "couldn't allocate adt event";
- goto fail;
- }
-
- if (adt_put_event(event, ADT_SUCCESS, ADT_SUCCESS) != 0) {
- saved_errno = errno;
- how = "couldn't put adt event";
- goto fail;
- }
-
- adt_free_event(event);
- ucred_free(ucred);
- /* Don't end adt session - leave for when logging out */
- return;
-
-fail:
- if (ucred != NULL)
- ucred_free(ucred);
- adt_free_event(event);
- (void) adt_end_session(*ah);
-
- fatal("Auditing of login failed: %s (%s)",
- strerror(saved_errno), how);
-}
-
-void
-audit_sshd_login_failure(adt_session_data_t **ah, int pam_retval, char *user)
-{
- adt_event_data_t *event = NULL;
- const char *how;
- int saved_errno = 0;
- struct passwd pwd;
- char *pwdbuf = NULL;
- size_t pwdbuf_len;
- long pwdbuf_len_max;
- uid_t uid = ADT_NO_ATTRIB;
- gid_t gid = ADT_NO_ATTRIB;
-
- if (ah == NULL) {
- how = "programmer error";
- saved_errno = EINVAL;
- goto fail;
- }
-
- if ((pwdbuf_len_max = sysconf(_SC_GETPW_R_SIZE_MAX)) == -1) {
- saved_errno = errno;
- how = "couldn't determine maximum size of password buffer";
- goto fail;
- }
-
- pwdbuf_len = (size_t)pwdbuf_len_max;
- pwdbuf = xmalloc(pwdbuf_len);
-
- if (adt_start_session(ah, NULL, ADT_USE_PROC_DATA) != 0) {
- saved_errno = errno;
- how = "couldn't start adt session";
- goto fail;
- }
-
- /*
- * Its possible to reach this point with user being invalid so
- * we check here to make sure that the user in question has a valid
- * password entry.
- */
- if ((user != NULL) &&
- (getpwnam_r(user, &pwd, pwdbuf, pwdbuf_len) != NULL)) {
- uid = pwd.pw_uid;
- gid = pwd.pw_gid;
- }
-
- if (adt_set_user(*ah, uid, gid, uid, gid, NULL, ADT_NEW) != 0) {
- saved_errno = errno;
- how = "couldn't set adt user";
- goto fail;
- }
-
- if ((event = adt_alloc_event(*ah, ADT_ssh)) == NULL) {
- saved_errno = errno;
- how = "couldn't allocate adt event";
- goto fail;
- }
-
- if (adt_put_event(event, ADT_FAILURE, ADT_FAIL_PAM + pam_retval) != 0) {
- saved_errno = errno;
- how = "couldn't put adt event";
- goto fail;
- }
-
- xfree(pwdbuf);
- adt_free_event(event);
- (void) adt_end_session(*ah);
- *ah = NULL;
- return;
-
-fail:
- if (pwdbuf != NULL)
- xfree(pwdbuf);
- adt_free_event(event);
- (void) adt_end_session(*ah);
-
- fatal("Auditing of login failed: %s (%s)",
- strerror(saved_errno), how);
-}
-
-void
-audit_sshd_logout(adt_session_data_t **ah)
-{
- adt_event_data_t *event = NULL;
- const char *how = "programmer error";
- int saved_errno = 0;
-
- if (!ah) {
- saved_errno = EINVAL;
- goto fail;
- }
-
- if ((event = adt_alloc_event(*ah, ADT_logout)) == NULL) {
- saved_errno = errno;
- how = "couldn't allocate adt event";
- goto fail;
- }
-
- if (adt_put_event(event, ADT_SUCCESS, ADT_SUCCESS) != 0) {
- saved_errno = errno;
- how = "couldn't put adt event";
- goto fail;
- }
-
- adt_free_event(event);
- (void) adt_end_session(*ah);
- *ah = NULL;
- return;
-
-fail:
- adt_free_event(event);
- (void) adt_end_session(*ah);
-
- fatal("Auditing of logout failed: %s (%s)",
- how, strerror(saved_errno));
-}
-
-/*
- * audit_sshd_settid stores the terminal id while it is still
- * available.
- *
- * The failure cases are lack of resources or incorrect permissions.
- * libbsm generates syslog messages, so there's no value doing more
- * here. ADT_NO_AUDIT leaves the auid at AU_NOAUDITID and will be
- * replaced when one of the above functions is called.
- */
-void
-audit_sshd_settid(int sock)
-{
- adt_session_data_t *ah;
- adt_termid_t *termid;
-
- if (adt_start_session(&ah, NULL, 0) == 0) {
- if (adt_load_termid(sock, &termid) == 0) {
- if (adt_set_user(ah, ADT_NO_AUDIT,
- ADT_NO_AUDIT, 0, ADT_NO_AUDIT,
- termid, ADT_SETTID) == 0)
- (void) adt_set_proc(ah);
- free(termid);
- }
- (void) adt_end_session(ah);
- }
-}
diff --git a/usr/src/cmd/ssh/sshd/groupaccess.c b/usr/src/cmd/ssh/sshd/groupaccess.c
deleted file mode 100644
index 2239832e1b..0000000000
--- a/usr/src/cmd/ssh/sshd/groupaccess.c
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * Copyright (c) 2001 Kevin Steves. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: groupaccess.c,v 1.5 2002/03/04 17:27:39 stevesk Exp $");
-
-#include "groupaccess.h"
-#include "xmalloc.h"
-#include "match.h"
-#include "log.h"
-#include <alloca.h>
-
-static int ngroups, ngroups_lim;
-static char **groups_byname;
-
-/*
- * Initialize group access list for user with primary (base) and
- * supplementary groups. Return the number of groups in the list.
- */
-int
-ga_init(const char *user, gid_t base)
-{
- gid_t *groups_bygid;
- int i, j;
- struct group *gr;
-
- if (ngroups_lim == 0) {
- /* Add one for the base gid */
- ngroups_lim = sysconf(_SC_NGROUPS_MAX) + 1;
- groups_byname = malloc(sizeof (char *) * ngroups_lim);
- } else if (ngroups > 0)
- ga_free();
-
- groups_bygid = alloca(ngroups_lim * sizeof (gid_t));
-
- ngroups = ngroups_lim;
- if (getgrouplist(user, base, groups_bygid, &ngroups) == -1)
- log("getgrouplist: groups list too small");
- for (i = 0, j = 0; i < ngroups; i++)
- if ((gr = getgrgid(groups_bygid[i])) != NULL)
- groups_byname[j++] = xstrdup(gr->gr_name);
- return (ngroups = j);
-}
-
-/*
- * Return 1 if one of user's groups is contained in groups.
- * Return 0 otherwise. Use match_pattern() for string comparison.
- */
-int
-ga_match(char * const *groups, int n)
-{
- int i, j;
-
- for (i = 0; i < ngroups; i++)
- for (j = 0; j < n; j++)
- if (match_pattern(groups_byname[i], groups[j]))
- return (1);
- return (0);
-}
-
-/*
- * Return 1 if one of user's groups matches group_pattern list.
- * Return 0 on negated or no match.
- */
-int
-ga_match_pattern_list(const char *group_pattern)
-{
- int i, found = 0;
- size_t len = strlen(group_pattern);
-
- for (i = 0; i < ngroups; i++) {
- switch (match_pattern_list(groups_byname[i],
- group_pattern, len, 0)) {
- case -1:
- return (0); /* Negated match wins */
- case 0:
- continue;
- case 1:
- found = 1;
- }
- }
- return (found);
-}
-
-/*
- * Free memory allocated for group access list.
- */
-void
-ga_free(void)
-{
- int i;
-
- if (ngroups > 0) {
- for (i = 0; i < ngroups; i++)
- xfree(groups_byname[i]);
- ngroups = 0;
- }
-}
diff --git a/usr/src/cmd/ssh/sshd/gss-serv.c b/usr/src/cmd/ssh/sshd/gss-serv.c
deleted file mode 100644
index 7ff525c306..0000000000
--- a/usr/src/cmd/ssh/sshd/gss-serv.c
+++ /dev/null
@@ -1,516 +0,0 @@
-/*
- * Copyright (c) 2001-2003 Simon Wilkinson. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR `AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-
-#ifdef GSSAPI
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "includes.h"
-#include "ssh.h"
-#include "ssh2.h"
-#include "xmalloc.h"
-#include "buffer.h"
-#include "bufaux.h"
-#include "packet.h"
-#include "compat.h"
-#include <openssl/evp.h>
-#include "cipher.h"
-#include "kex.h"
-#include "auth.h"
-#include "log.h"
-#include "channels.h"
-#include "session.h"
-#include "dispatch.h"
-#include "servconf.h"
-#include "uidswap.h"
-#include "compat.h"
-#include <pwd.h>
-
-#include "ssh-gss.h"
-
-extern char **environ;
-
-extern ServerOptions options;
-extern uchar_t *session_id2;
-extern int session_id2_len;
-
-Gssctxt *xxx_gssctxt;
-
-void
-ssh_gssapi_server_kex_hook(Kex *kex, char **proposal)
-{
- gss_OID_set mechs = GSS_C_NULL_OID_SET;
-
- if (kex == NULL || !kex->server)
- fatal("INTERNAL ERROR (%s)", __func__);
-
- ssh_gssapi_server_mechs(&mechs);
- ssh_gssapi_modify_kex(kex, mechs, proposal);
-}
-
-void
-ssh_gssapi_server_mechs(gss_OID_set *mechs)
-{
- static gss_OID_set supported = GSS_C_NULL_OID_SET;
- gss_OID_set s, acquired, indicated = GSS_C_NULL_OID_SET;
- gss_cred_id_t creds;
- OM_uint32 maj, min;
- int i;
-
- if (!mechs) {
- (void) gss_release_oid_set(&min, &supported);
- return;
- }
-
- if (supported != GSS_C_NULL_OID_SET) {
- *mechs = supported;
- return;
- }
-
- *mechs = GSS_C_NULL_OID_SET;
-
- maj = gss_create_empty_oid_set(&min, &s);
- if (GSS_ERROR(maj)) {
- debug("Could not allocate GSS-API resources (%s)",
- ssh_gssapi_last_error(NULL, &maj, &min));
- return;
- }
-
- maj = gss_indicate_mechs(&min, &indicated);
- if (GSS_ERROR(maj)) {
- debug("No GSS-API mechanisms are installed");
- return;
- }
-
- maj = gss_acquire_cred(&min, GSS_C_NO_NAME, 0, indicated,
- GSS_C_ACCEPT, &creds, &acquired, NULL);
-
- if (GSS_ERROR(maj))
- debug("Failed to acquire GSS-API credentials for any "
- "mechanisms (%s)", ssh_gssapi_last_error(NULL, &maj, &min));
-
- (void) gss_release_oid_set(&min, &indicated);
- (void) gss_release_cred(&min, &creds);
-
- if (acquired == GSS_C_NULL_OID_SET || acquired->count == 0)
- return;
-
- for (i = 0; i < acquired->count; i++) {
- if (ssh_gssapi_is_spnego(&acquired->elements[i]))
- continue;
-
- maj = gss_add_oid_set_member(&min, &acquired->elements[i], &s);
- if (GSS_ERROR(maj)) {
- debug("Could not allocate GSS-API resources (%s)",
- ssh_gssapi_last_error(NULL, &maj, &min));
- return;
- }
- }
- (void) gss_release_oid_set(&min, &acquired);
-
- if (s->count) {
- supported = s;
- *mechs = s;
- }
-}
-
-/*
- * Wrapper around accept_sec_context. Requires that the context contains:
- *
- * oid
- * credentials (from ssh_gssapi_acquire_cred)
- */
-/* Priviledged */
-OM_uint32
-ssh_gssapi_accept_ctx(Gssctxt *ctx, gss_buffer_t recv_tok,
- gss_buffer_t send_tok)
-{
- /*
- * Acquiring a cred for the ctx->desired_mech for GSS_C_NO_NAME
- * may well be probably better than using GSS_C_NO_CREDENTIAL
- * and then checking that ctx->desired_mech agrees with
- * ctx->actual_mech...
- */
- ctx->major = gss_accept_sec_context(&ctx->minor, &ctx->context,
- GSS_C_NO_CREDENTIAL, recv_tok, GSS_C_NO_CHANNEL_BINDINGS,
- &ctx->src_name, &ctx->actual_mech, send_tok, &ctx->flags,
- NULL, &ctx->deleg_creds);
-
- if (GSS_ERROR(ctx->major))
- ssh_gssapi_error(ctx, "accepting security context");
-
- if (ctx->major == GSS_S_CONTINUE_NEEDED && send_tok->length == 0)
- fatal("Zero length GSS context token output when "
- "continue needed");
- else if (GSS_ERROR(ctx->major) && send_tok->length == 0)
- debug2("Zero length GSS context error token output");
-
- if (ctx->major == GSS_S_COMPLETE &&
- ctx->desired_mech != GSS_C_NULL_OID &&
- (ctx->desired_mech->length != ctx->actual_mech->length ||
- memcmp(ctx->desired_mech->elements, ctx->actual_mech->elements,
- ctx->desired_mech->length) != 0)) {
-
- gss_OID_set supported;
- OM_uint32 min;
- int present = 0;
-
- debug("The client did not use the GSS-API mechanism it "
- "asked for");
-
- /* Let it slide as long as the mech is supported */
- ssh_gssapi_server_mechs(&supported);
- if (supported != GSS_C_NULL_OID_SET) {
- (void) gss_test_oid_set_member(&min, ctx->actual_mech,
- supported, &present);
- }
- if (!present)
- ctx->major = GSS_S_BAD_MECH;
- }
-
- if (ctx->deleg_creds)
- debug("Received delegated GSS credentials");
-
- if (ctx->major == GSS_S_COMPLETE) {
- ctx->major = gss_inquire_context(&ctx->minor, ctx->context,
- NULL, &ctx->dst_name, NULL, NULL, NULL, NULL,
- &ctx->established);
-
- if (GSS_ERROR(ctx->major)) {
- ssh_gssapi_error(ctx,
- "inquiring established sec context");
- return (ctx->major);
- }
-
- xxx_gssctxt = ctx;
- }
-
- return (ctx->major);
-}
-
-
-/* As user - called through fatal cleanup hook */
-void
-ssh_gssapi_cleanup_creds(Gssctxt *ctx)
-{
-#ifdef HAVE_GSS_STORE_CRED
- /* pam_setcred() will take care of this */
- return;
-#else
- return;
-/* #error "Portability broken in cleanup of stored creds" */
-#endif /* HAVE_GSS_STORE_CRED */
-}
-
-void
-ssh_gssapi_storecreds(Gssctxt *ctx, Authctxt *authctxt)
-{
-#ifdef USE_PAM
- char **penv, **tmp_env;
-#endif /* USE_PAM */
-
- if (authctxt == NULL) {
- error("Missing context while storing GSS-API credentials");
- return;
- }
-
- if (ctx == NULL && xxx_gssctxt == NULL)
- return;
-
- if (ctx == NULL)
- ctx = xxx_gssctxt;
-
- if (!options.gss_cleanup_creds ||
- ctx->deleg_creds == GSS_C_NO_CREDENTIAL) {
- debug3("Not storing delegated GSS credentials"
- " (none delegated)");
- return;
- }
-
- if (!authctxt->valid || authctxt->pw == NULL) {
- debug3("Not storing delegated GSS credentials"
- " for invalid user");
- return;
- }
-
- debug("Storing delegated GSS-API credentials");
-
- /*
- * The GSS-API has a flaw in that it does not provide a
- * mechanism by which delegated credentials can be made
- * available for acquisition by GSS_Acquire_cred() et. al.;
- * gss_store_cred() is the proposed GSS-API extension for
- * generically storing delegated credentials.
- *
- * gss_store_cred() does not speak to how credential stores are
- * referenced. Generically this may be done by switching to the
- * user context of the user in whose default credential store we
- * wish to place delegated credentials. But environment
- * variables could conceivably affect the choice of credential
- * store as well, and perhaps in a mechanism-specific manner.
- *
- * SUNW -- On Solaris the euid selects the current credential
- * store, but PAM modules could select alternate stores by
- * setting, for example, KRB5CCNAME, so we also use the PAM
- * environment temporarily.
- */
-
-#ifdef HAVE_GSS_STORE_CRED
-#ifdef USE_PAM
- /*
- * PAM may have set mechanism-specific variables (e.g.,
- * KRB5CCNAME). fetch_pam_environment() protects against LD_*
- * and other environment variables.
- */
- penv = fetch_pam_environment(authctxt);
- tmp_env = environ;
- environ = penv;
-#endif /* USE_PAM */
- if (authctxt->pw->pw_uid != geteuid()) {
- temporarily_use_uid(authctxt->pw);
- ctx->major = gss_store_cred(&ctx->minor, ctx->deleg_creds,
- GSS_C_INITIATE, GSS_C_NULL_OID, 0, ctx->default_creds,
- NULL, NULL);
- restore_uid();
- } else {
- /* only when logging in as the privileged user used by sshd */
- ctx->major = gss_store_cred(&ctx->minor, ctx->deleg_creds,
- GSS_C_INITIATE, GSS_C_NULL_OID, 0, ctx->default_creds,
- NULL, NULL);
- }
-#ifdef USE_PAM
- environ = tmp_env;
- free_pam_environment(penv);
-#endif /* USE_PAM */
- if (GSS_ERROR(ctx->major))
- ssh_gssapi_error(ctx, "storing delegated credentials");
-
-#else
-#ifdef KRB5_GSS
-#error "MIT/Heimdal krb5-specific code missing in ssh_gssapi_storecreds()"
- if (ssh_gssapi_is_krb5(ctx->mech))
- ssh_gssapi_krb5_storecreds(ctx);
-#endif /* KRB5_GSS */
-#ifdef GSI_GSS
-#error "GSI krb5-specific code missing in ssh_gssapi_storecreds()"
- if (ssh_gssapi_is_gsi(ctx->mech))
- ssh_gssapi_krb5_storecreds(ctx);
-#endif /* GSI_GSS */
-/* #error "Mechanism-specific code missing in ssh_gssapi_storecreds()" */
- return;
-#endif /* HAVE_GSS_STORE_CRED */
-}
-
-void
-ssh_gssapi_do_child(Gssctxt *ctx, char ***envp, uint_t *envsizep)
-{
- /*
- * MIT/Heimdal/GSI specific code goes here.
- *
- * On Solaris there's nothing to do here as the GSS store and
- * related environment variables are to be set by PAM, if at all
- * (no environment variables are needed to address the default
- * credential store -- the euid does that).
- */
-#ifdef KRB5_GSS
-#error "MIT/Heimdal krb5-specific code missing in ssh_gssapi_storecreds()"
-#endif /* KRB5_GSS */
-#ifdef GSI_GSS
-#error "GSI krb5-specific code missing in ssh_gssapi_storecreds()"
-#endif /* GSI_GSS */
-}
-
-int
-ssh_gssapi_userok(Gssctxt *ctx, char *user)
-{
- if (ctx == NULL) {
- debug3("INTERNAL ERROR: %s", __func__);
- return (0);
- }
-
- if (user == NULL || *user == '\0')
- return (0);
-
-#ifdef HAVE___GSS_USEROK
- {
- int user_ok = 0;
-
- ctx->major = __gss_userok(&ctx->minor, ctx->src_name, user,
- &user_ok);
- if (GSS_ERROR(ctx->major)) {
- debug2("__GSS_userok() failed");
- return (0);
- }
-
- if (user_ok)
- return (1);
-
- /* fall through */
- }
-#else
-#ifdef GSSAPI_SIMPLE_USEROK
- {
- /* Mechanism-generic */
- OM_uint32 min;
- gss_buffer_desc buf, ename1, ename2;
- gss_name_t iname, cname;
- int eql;
-
- buf.value = user;
- buf.length = strlen(user);
- ctx->major = gss_import_name(&ctx->minor, &buf,
- GSS_C_NULL_OID, &iname);
- if (GSS_ERROR(ctx->major)) {
- ssh_gssapi_error(ctx,
- "importing name for authorizing initiator");
- goto failed_simple_userok;
- }
-
- ctx->major = gss_canonicalize_name(&ctx->minor, iname,
- ctx->actual_mech, &cname);
- (void) gss_release_name(&min, &iname);
- if (GSS_ERROR(ctx->major)) {
- ssh_gssapi_error(ctx, "canonicalizing name");
- goto failed_simple_userok;
- }
-
- ctx->major = gss_export_name(&ctx->minor, cname, &ename1);
- (void) gss_release_name(&min, &cname);
- if (GSS_ERROR(ctx->major)) {
- ssh_gssapi_error(ctx, "exporting name");
- goto failed_simple_userok;
- }
-
- ctx->major = gss_export_name(&ctx->minor, ctx->src_name,
- &ename2);
- if (GSS_ERROR(ctx->major)) {
- ssh_gssapi_error(ctx,
- "exporting client principal name");
- (void) gss_release_buffer(&min, &ename1);
- goto failed_simple_userok;
- }
-
- eql = (ename1.length == ename2.length &&
- memcmp(ename1.value, ename2.value, ename1.length) == 0);
-
- (void) gss_release_buffer(&min, &ename1);
- (void) gss_release_buffer(&min, &ename2);
-
- if (eql)
- return (1);
- /* fall through */
- }
-failed_simple_userok:
-#endif /* GSSAPI_SIMPLE_USEROK */
-#ifdef HAVE_GSSCRED_API
- {
- /* Mechanism-generic, Solaris-specific */
- OM_uint32 maj;
- uid_t uid;
- struct passwd *pw;
-
- maj = gsscred_name_to_unix_cred(ctx->src_name,
- ctx->actual_mech, &uid, NULL, NULL, NULL);
-
- if (GSS_ERROR(maj))
- goto failed_simple_gsscred_userok;
-
- if ((pw = getpwnam(user)) == NULL)
- goto failed_simple_gsscred_userok;
-
- if (pw->pw_uid == uid)
- return (1);
- /* fall through */
- }
-
-failed_simple_gsscred_userok:
-#endif /* HAVE_GSSCRED_API */
-#ifdef KRB5_GSS
- if (ssh_gssapi_is_krb5(ctx->mech))
- if (ssh_gssapi_krb5_userok(ctx->src_name, user))
- return (1);
-#endif /* KRB5_GSS */
-#ifdef GSI_GSS
- if (ssh_gssapi_is_gsi(ctx->mech))
- if (ssh_gssapi_gsi_userok(ctx->src_name, user))
- return (1);
-#endif /* GSI_GSS */
-#endif /* HAVE___GSS_USEROK */
-
- /* default to not authorized */
- return (0);
-}
-
-char *
-ssh_gssapi_localname(Gssctxt *ctx)
-{
- if (ctx == NULL) {
- debug3("INTERNAL ERROR: %s", __func__);
- return (NULL);
- }
-
- debug2("Mapping initiator GSS-API principal to local username");
-#ifdef HAVE_GSSCRED_API
- {
- /* Mechanism-generic, Solaris-specific */
- OM_uint32 maj;
- uid_t uid;
- struct passwd *pw;
-
- if (ctx->src_name == GSS_C_NO_NAME)
- goto failed_gsscred_localname;
-
- maj = gsscred_name_to_unix_cred(ctx->src_name,
- ctx->actual_mech, &uid, NULL, NULL, NULL);
-
- if (GSS_ERROR(maj))
- goto failed_gsscred_localname;
-
- if ((pw = getpwuid(uid)) == NULL)
- goto failed_gsscred_localname;
-
- debug2("Mapped the initiator to: %s", pw->pw_name);
- return (xstrdup(pw->pw_name));
- }
-failed_gsscred_localname:
-#endif /* HAVE_GSSCRED_API */
-#ifdef KRB5_GSS
-#error "ssh_gssapi_krb5_localname() not implemented"
- if (ssh_gssapi_is_krb5(ctx->mech))
- return (ssh_gssapi_krb5_localname(ctx->src_name));
-#endif /* KRB5_GSS */
-#ifdef GSI_GSS
-#error "ssh_gssapi_gsi_localname() not implemented"
- if (ssh_gssapi_is_gsi(ctx->mech))
- return (ssh_gssapi_gsi_localname(ctx->src_name));
-#endif /* GSI_GSS */
- return (NULL);
-}
-#endif /* GSSAPI */
diff --git a/usr/src/cmd/ssh/sshd/loginrec.c b/usr/src/cmd/ssh/sshd/loginrec.c
deleted file mode 100644
index 33998b02b9..0000000000
--- a/usr/src/cmd/ssh/sshd/loginrec.c
+++ /dev/null
@@ -1,1533 +0,0 @@
-/*
- * Copyright (c) 2000 Andre Lucas. All rights reserved.
- * Portions copyright (c) 1998 Todd C. Miller
- * Portions copyright (c) 1996 Jason Downs
- * Portions copyright (c) 1996 Theo de Raadt
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by Markus Friedl.
- * 4. The name of the author may not be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/**
- ** loginrec.c: platform-independent login recording and lastlog retrieval
- **/
-
-/*
- The new login code explained
- ============================
-
- This code attempts to provide a common interface to login recording
- (utmp and friends) and last login time retrieval.
-
- Its primary means of achieving this is to use 'struct logininfo', a
- union of all the useful fields in the various different types of
- system login record structures one finds on UNIX variants.
-
- We depend on autoconf to define which recording methods are to be
- used, and which fields are contained in the relevant data structures
- on the local system. Many C preprocessor symbols affect which code
- gets compiled here.
-
- The code is designed to make it easy to modify a particular
- recording method, without affecting other methods nor requiring so
- many nested conditional compilation blocks as were commonplace in
- the old code.
-
- For login recording, we try to use the local system's libraries as
- these are clearly most likely to work correctly. For utmp systems
- this usually means login() and logout() or setutent() etc., probably
- in libutil, along with logwtmp() etc. On these systems, we fall back
- to writing the files directly if we have to, though this method
- requires very thorough testing so we do not corrupt local auditing
- information. These files and their access methods are very system
- specific indeed.
-
- For utmpx systems, the corresponding library functions are
- setutxent() etc. To the author's knowledge, all utmpx systems have
- these library functions and so no direct write is attempted. If such
- a system exists and needs support, direct analogues of the [uw]tmp
- code should suffice.
-
- Retrieving the time of last login ('lastlog') is in some ways even
- more problemmatic than login recording. Some systems provide a
- simple table of all users which we seek based on uid and retrieve a
- relatively standard structure. Others record the same information in
- a directory with a separate file, and others don't record the
- information separately at all. For systems in the latter category,
- we look backwards in the wtmp or wtmpx file for the last login entry
- for our user. Naturally this is slower and on busy systems could
- incur a significant performance penalty.
-
- Calling the new code
- --------------------
-
- In OpenSSH all login recording and retrieval is performed in
- login.c. Here you'll find working examples. Also, in the logintest.c
- program there are more examples.
-
- Internal handler calling method
- -------------------------------
-
- When a call is made to login_login() or login_logout(), both
- routines set a struct logininfo flag defining which action (log in,
- or log out) is to be taken. They both then call login_write(), which
- calls whichever of the many structure-specific handlers autoconf
- selects for the local system.
-
- The handlers themselves handle system data structure specifics. Both
- struct utmp and struct utmpx have utility functions (see
- construct_utmp*()) to try to make it simpler to add extra systems
- that introduce new features to either structure.
-
- While it may seem terribly wasteful to replicate so much similar
- code for each method, experience has shown that maintaining code to
- write both struct utmp and utmpx in one function, whilst maintaining
- support for all systems whether they have library support or not, is
- a difficult and time-consuming task.
-
- Lastlog support proceeds similarly. Functions login_get_lastlog()
- (and its OpenSSH-tuned friend login_get_lastlog_time()) call
- getlast_entry(), which tries one of three methods to find the last
- login time. It uses local system lastlog support if it can,
- otherwise it tries wtmp or wtmpx before giving up and returning 0,
- meaning "tilt".
-
- Maintenance
- -----------
-
- In many cases it's possible to tweak autoconf to select the correct
- methods for a particular platform, either by improving the detection
- code (best), or by presetting DISABLE_<method> or CONF_<method>_FILE
- symbols for the platform.
-
- Use logintest to check which symbols are defined before modifying
- configure.ac and loginrec.c. (You have to build logintest yourself
- with 'make logintest' as it's not built by default.)
-
- Otherwise, patches to the specific method(s) are very helpful!
-
-*/
-
-/**
- ** TODO:
- ** homegrown ttyslot()
- ** test, test, test
- **
- ** Platform status:
- ** ----------------
- **
- ** Known good:
- ** Linux (Redhat 6.2, Debian)
- ** Solaris
- ** HP-UX 10.20 (gcc only)
- ** IRIX
- ** NeXT - M68k/HPPA/Sparc (4.2/3.3)
- **
- ** Testing required: Please send reports!
- ** NetBSD
- ** HP-UX 11
- ** AIX
- **
- ** Platforms with known problems:
- ** Some variants of Slackware Linux
- **
- **/
-
-#include "includes.h"
-
-#include "ssh.h"
-#include "xmalloc.h"
-#include "loginrec.h"
-#include "log.h"
-#include "atomicio.h"
-
-RCSID("$Id: loginrec.c,v 1.44 2002/09/26 00:38:49 tim Exp $");
-
-#ifdef HAVE_UTIL_H
-# include <util.h>
-#endif
-
-#ifdef HAVE_LIBUTIL_H
-# include <libutil.h>
-#endif
-
-/**
- ** prototypes for helper functions in this file
- **/
-
-#if HAVE_UTMP_H
-void set_utmp_time(struct logininfo *li, struct utmp *ut);
-void construct_utmp(struct logininfo *li, struct utmp *ut);
-#endif
-
-#ifdef HAVE_UTMPX_H
-void set_utmpx_time(struct logininfo *li, struct utmpx *ut);
-void construct_utmpx(struct logininfo *li, struct utmpx *ut);
-#endif
-
-int utmp_write_entry(struct logininfo *li);
-int utmpx_write_entry(struct logininfo *li);
-int wtmp_write_entry(struct logininfo *li);
-int wtmpx_write_entry(struct logininfo *li);
-int lastlog_write_entry(struct logininfo *li);
-int syslogin_write_entry(struct logininfo *li);
-
-int getlast_entry(struct logininfo *li);
-int lastlog_get_entry(struct logininfo *li);
-int wtmp_get_entry(struct logininfo *li);
-int wtmpx_get_entry(struct logininfo *li);
-
-/* pick the shortest string */
-#define MIN_SIZEOF(s1,s2) ( sizeof(s1) < sizeof(s2) ? sizeof(s1) : sizeof(s2) )
-
-/**
- ** platform-independent login functions
- **/
-
-/* login_login(struct logininfo *) -Record a login
- *
- * Call with a pointer to a struct logininfo initialised with
- * login_init_entry() or login_alloc_entry()
- *
- * Returns:
- * >0 if successful
- * 0 on failure (will use OpenSSH's logging facilities for diagnostics)
- */
-int
-login_login (struct logininfo *li)
-{
- li->type = LTYPE_LOGIN;
- return login_write(li);
-}
-
-
-/* login_logout(struct logininfo *) - Record a logout
- *
- * Call as with login_login()
- *
- * Returns:
- * >0 if successful
- * 0 on failure (will use OpenSSH's logging facilities for diagnostics)
- */
-int
-login_logout(struct logininfo *li)
-{
- li->type = LTYPE_LOGOUT;
- return login_write(li);
-}
-
-/* login_get_lastlog_time(int) - Retrieve the last login time
- *
- * Retrieve the last login time for the given uid. Will try to use the
- * system lastlog facilities if they are available, but will fall back
- * to looking in wtmp/wtmpx if necessary
- *
- * Returns:
- * 0 on failure, or if user has never logged in
- * Time in seconds from the epoch if successful
- *
- * Useful preprocessor symbols:
- * DISABLE_LASTLOG: If set, *never* even try to retrieve lastlog
- * info
- * USE_LASTLOG: If set, indicates the presence of system lastlog
- * facilities. If this and DISABLE_LASTLOG are not set,
- * try to retrieve lastlog information from wtmp/wtmpx.
- */
-#if 0
-unsigned int
-login_get_lastlog_time(const int uid)
-{
- struct logininfo li;
-
- if (login_get_lastlog(&li, uid))
- return li.tv_sec;
- else
- return 0;
-}
-#endif
-
-/* login_get_lastlog(struct logininfo *, int) - Retrieve a lastlog entry
- *
- * Retrieve a logininfo structure populated (only partially) with
- * information from the system lastlog data, or from wtmp/wtmpx if no
- * system lastlog information exists.
- *
- * Note this routine must be given a pre-allocated logininfo.
- *
- * Returns:
- * >0: A pointer to your struct logininfo if successful
- * 0 on failure (will use OpenSSH's logging facilities for diagnostics)
- *
- */
-struct logininfo *
-login_get_lastlog(struct logininfo *li, const int uid)
-{
- struct passwd *pw;
-
- (void) memset(li, '\0', sizeof(*li));
- li->uid = uid;
-
- /*
- * If we don't have a 'real' lastlog, we need the username to
- * reliably search wtmp(x) for the last login (see
- * wtmp_get_entry().)
- */
- pw = getpwuid(uid);
- if (pw == NULL)
- fatal("login_get_lastlog: Cannot find account for uid %i", uid);
-
- /* No MIN_SIZEOF here - we absolutely *must not* truncate the
- * username */
- (void) strlcpy(li->username, pw->pw_name, sizeof(li->username));
-
- if (getlast_entry(li))
- return li;
- else
- return NULL;
-}
-
-
-/* login_alloc_entry() - Allocate and initialise a logininfo
- * structure
- *
- * This function creates a new struct logininfo, a data structure
- * meant to carry the information required to portably record login info.
- *
- * Returns a pointer to a newly created struct logininfo. If memory
- * allocation fails, the program halts.
- */
-struct
-logininfo *login_alloc_entry(int pid, const char *username,
- const char *hostname, const char *line,
- const char *progname)
-{
- struct logininfo *newli;
-
- newli = (struct logininfo *) xmalloc (sizeof(*newli));
- (void)login_init_entry(newli, pid, username, hostname, line, progname);
- return newli;
-}
-
-
-/* login_free_entry(struct logininfo *) - free struct memory */
-void
-login_free_entry(struct logininfo *li)
-{
- xfree(li);
-}
-
-
-/* login_init_entry()
- * - initialise a struct logininfo
- *
- * Populates a new struct logininfo, a data structure meant to carry
- * the information required to portably record login info.
- *
- * Returns: 1
- */
-int
-login_init_entry(struct logininfo *li, int pid, const char *username,
- const char *hostname, const char *line, const char *progname)
-{
- struct passwd *pw;
-
- (void) memset(li, 0, sizeof(*li));
-
- li->pid = pid;
-
- /* set the line information */
- if (line)
- (void) line_fullname(li->line, line, sizeof(li->line));
- else
- li->line_null = 1;
-
- if (progname)
- (void) strlcpy(li->progname, progname, sizeof(li->progname));
- else
- li->progname_null = 1;
-
- if (username) {
- (void) strlcpy(li->username, username, sizeof(li->username));
- pw = getpwnam(li->username);
- if (pw == NULL)
- fatal("login_init_entry: Cannot find user \"%s\"", li->username);
- li->uid = pw->pw_uid;
- }
-
- if (hostname)
- (void) strlcpy(li->hostname, hostname, sizeof(li->hostname));
-
- return 1;
-}
-
-/* login_set_current_time(struct logininfo *) - set the current time
- *
- * Set the current time in a logininfo structure. This function is
- * meant to eliminate the need to deal with system dependencies for
- * time handling.
- */
-void
-login_set_current_time(struct logininfo *li)
-{
- struct timeval tv;
-
- (void) gettimeofday(&tv, NULL);
-
- li->tv_sec = tv.tv_sec;
- li->tv_usec = tv.tv_usec;
-}
-
-/* copy a sockaddr_* into our logininfo */
-void
-login_set_addr(struct logininfo *li, const struct sockaddr *sa,
- const unsigned int sa_size)
-{
- unsigned int bufsize = sa_size;
-
- /* make sure we don't overrun our union */
- if (sizeof(li->hostaddr) < sa_size)
- bufsize = sizeof(li->hostaddr);
-
- (void) memcpy((void *)&(li->hostaddr.sa), (const void *)sa, bufsize);
-}
-
-
-/**
- ** login_write: Call low-level recording functions based on autoconf
- ** results
- **/
-int
-login_write (struct logininfo *li)
-{
-#ifndef HAVE_CYGWIN
- if ((int)geteuid() != 0) {
- log("Attempt to write login records by non-root user (aborting)");
- return 1;
- }
-#endif
-
- /* set the timestamp */
- login_set_current_time(li);
-#ifdef USE_LOGIN
- syslogin_write_entry(li);
-#endif
-#ifdef USE_LASTLOG
- if (li->type == LTYPE_LOGIN) {
- (void) lastlog_write_entry(li);
- }
-#endif
-#ifdef USE_UTMP
- utmp_write_entry(li);
-#endif
-#ifdef USE_WTMP
- wtmp_write_entry(li);
-#endif
-#ifdef USE_UTMPX
- (void) utmpx_write_entry(li);
-#endif
-#ifdef USE_WTMPX
- (void) wtmpx_write_entry(li);
-#endif
- return 0;
-}
-
-/**
- ** getlast_entry: Call low-level functions to retrieve the last login
- ** time.
- **/
-
-/* take the uid in li and return the last login time */
-int
-getlast_entry(struct logininfo *li)
-{
-#ifdef USE_LASTLOG
- return(lastlog_get_entry(li));
-#else /* !USE_LASTLOG */
-
-#ifdef DISABLE_LASTLOG
- /* On some systems we shouldn't even try to obtain last login
- * time, e.g. AIX */
- return 0;
-# else /* DISABLE_LASTLOG */
- /* Try to retrieve the last login time from wtmp */
-# if defined(USE_WTMP) && (defined(HAVE_TIME_IN_UTMP) || defined(HAVE_TV_IN_UTMP))
- /* retrieve last login time from utmp */
- return (wtmp_get_entry(li));
-# else /* defined(USE_WTMP) && (defined(HAVE_TIME_IN_UTMP) || defined(HAVE_TV_IN_UTMP)) */
- /* If wtmp isn't available, try wtmpx */
-# if defined(USE_WTMPX) && (defined(HAVE_TIME_IN_UTMPX) || defined(HAVE_TV_IN_UTMPX))
- /* retrieve last login time from utmpx */
- return (wtmpx_get_entry(li));
-# else
- /* Give up: No means of retrieving last login time */
- return 0;
-# endif /* USE_WTMPX && (HAVE_TIME_IN_UTMPX || HAVE_TV_IN_UTMPX) */
-# endif /* USE_WTMP && (HAVE_TIME_IN_UTMP || HAVE_TV_IN_UTMP) */
-# endif /* DISABLE_LASTLOG */
-#endif /* USE_LASTLOG */
-}
-
-
-
-/*
- * 'line' string utility functions
- *
- * These functions process the 'line' string into one of three forms:
- *
- * 1. The full filename (including '/dev')
- * 2. The stripped name (excluding '/dev')
- * 3. The abbreviated name (e.g. /dev/ttyp00 -> yp00
- * /dev/pts/1 -> ts/1 )
- *
- * Form 3 is used on some systems to identify a .tmp.? entry when
- * attempting to remove it. Typically both addition and removal is
- * performed by one application - say, sshd - so as long as the choice
- * uniquely identifies a terminal it's ok.
- */
-
-
-/* line_fullname(): add the leading '/dev/' if it doesn't exist make
- * sure dst has enough space, if not just copy src (ugh) */
-char *
-line_fullname(char *dst, const char *src, int dstsize)
-{
- (void) memset(dst, '\0', dstsize);
- /* "sshd" is special, like "ftp" */
- if (strcmp(src, "sshd") ||
- ((strncmp(src, "/dev/", 5) == 0) || (dstsize < (strlen(src) + 5)))) {
- (void) strlcpy(dst, src, dstsize);
- } else {
- (void) strlcpy(dst, "/dev/", dstsize);
- (void) strlcat(dst, src, dstsize);
- }
- return dst;
-}
-
-/* line_stripname(): strip the leading '/dev' if it exists, return dst */
-char *
-line_stripname(char *dst, const char *src, int dstsize)
-{
- (void) memset(dst, '\0', dstsize);
- if (strncmp(src, "/dev/", 5) == 0)
- (void) strlcpy(dst, src + 5, dstsize);
- else
- (void) strlcpy(dst, src, dstsize);
- return dst;
-}
-
-/* line_abbrevname(): Return the abbreviated (usually four-character)
- * form of the line (Just use the last <dstsize> characters of the
- * full name.)
- *
- * NOTE: use strncpy because we do NOT necessarily want zero
- * termination */
-char *
-line_abbrevname(char *dst, const char *src, int dstsize)
-{
- size_t len;
-
- (void) memset(dst, '\0', dstsize);
-
- /* Always skip prefix if present */
- if (strncmp(src, "/dev/", 5) == 0)
- src += 5;
-
-#ifdef WITH_ABBREV_NO_TTY
- if (strncmp(src, "tty", 3) == 0)
- src += 3;
-#endif
-
- len = strlen(src);
-
- if (len > 0) {
- if (((int)len - dstsize) > 0)
- src += ((int)len - dstsize);
-
- /* note: _don't_ change this to strlcpy */
- (void) strncpy(dst, src, (size_t)dstsize);
- }
-
- return dst;
-}
-
-/**
- ** utmp utility functions
- **
- ** These functions manipulate struct utmp, taking system differences
- ** into account.
- **/
-
-#if defined(USE_UTMP) || defined (USE_WTMP) || defined (USE_LOGIN)
-
-/* build the utmp structure */
-void
-set_utmp_time(struct logininfo *li, struct utmp *ut)
-{
-# ifdef HAVE_TV_IN_UTMP
- ut->ut_tv.tv_sec = li->tv_sec;
- ut->ut_tv.tv_usec = li->tv_usec;
-# else
-# ifdef HAVE_TIME_IN_UTMP
- ut->ut_time = li->tv_sec;
-# endif
-# endif
-}
-
-void
-construct_utmp(struct logininfo *li,
- struct utmp *ut)
-{
- (void) memset(ut, '\0', sizeof(*ut));
-
- /* First fill out fields used for both logins and logouts */
-
-# ifdef HAVE_ID_IN_UTMP
- (void) line_abbrevname(ut->ut_id, li->line, sizeof(ut->ut_id));
-# endif
-
-# ifdef HAVE_TYPE_IN_UTMP
- /* This is done here to keep utmp constants out of struct logininfo */
- switch (li->type) {
- case LTYPE_LOGIN:
- ut->ut_type = USER_PROCESS;
-#ifdef _UNICOS
- cray_set_tmpdir(ut);
-#endif
- break;
- case LTYPE_LOGOUT:
- ut->ut_type = DEAD_PROCESS;
-#ifdef _UNICOS
- cray_retain_utmp(ut, li->pid);
-#endif
- break;
- }
-# endif
- set_utmp_time(li, ut);
-
- (void) line_stripname(ut->ut_line, li->line, sizeof(ut->ut_line));
-
-# ifdef HAVE_PID_IN_UTMP
- ut->ut_pid = li->pid;
-# endif
-
- /* If we're logging out, leave all other fields blank */
- if (li->type == LTYPE_LOGOUT)
- return;
-
- /*
- * These fields are only used when logging in, and are blank
- * for logouts.
- */
-
- /* Use strncpy because we don't necessarily want null termination */
- (void) strncpy(ut->ut_name, li->username, MIN_SIZEOF(ut->ut_name, li->username));
-# ifdef HAVE_HOST_IN_UTMP
- (void) strncpy(ut->ut_host, li->hostname, MIN_SIZEOF(ut->ut_host, li->hostname));
-# endif
-# ifdef HAVE_ADDR_IN_UTMP
- /* this is just a 32-bit IP address */
- if (li->hostaddr.sa.sa_family == AF_INET)
- ut->ut_addr = li->hostaddr.sa_in.sin_addr.s_addr;
-# endif
-}
-#endif /* USE_UTMP || USE_WTMP || USE_LOGIN */
-
-/**
- ** utmpx utility functions
- **
- ** These functions manipulate struct utmpx, accounting for system
- ** variations.
- **/
-
-#if defined(USE_UTMPX) || defined (USE_WTMPX)
-/* build the utmpx structure */
-void
-set_utmpx_time(struct logininfo *li, struct utmpx *utx)
-{
-# ifdef HAVE_TV_IN_UTMPX
- utx->ut_tv.tv_sec = li->tv_sec;
- utx->ut_tv.tv_usec = li->tv_usec;
-# else /* HAVE_TV_IN_UTMPX */
-# ifdef HAVE_TIME_IN_UTMPX
- utx->ut_time = li->tv_sec;
-# endif /* HAVE_TIME_IN_UTMPX */
-# endif /* HAVE_TV_IN_UTMPX */
-}
-
-void
-construct_utmpx(struct logininfo *li, struct utmpx *utx)
-{
- (void) memset(utx, '\0', sizeof(*utx));
-# ifdef HAVE_ID_IN_UTMPX
- (void) line_abbrevname(utx->ut_id, li->line, sizeof(utx->ut_id));
-# endif
-
- /* this is done here to keep utmp constants out of loginrec.h */
- switch (li->type) {
- case LTYPE_LOGIN:
- utx->ut_type = USER_PROCESS;
- break;
- case LTYPE_LOGOUT:
- utx->ut_type = DEAD_PROCESS;
- break;
- }
- if (!li->line_null)
- (void) line_stripname(utx->ut_line, li->line, sizeof(utx->ut_line));
- else if (!li->progname_null)
- (void) line_stripname(utx->ut_line, li->progname, sizeof(utx->ut_line));
-
- set_utmpx_time(li, utx);
- utx->ut_pid = li->pid;
- /* strncpy(): Don't necessarily want null termination */
- (void) strncpy(utx->ut_name, li->username, MIN_SIZEOF(utx->ut_name, li->username));
-
- if (li->type == LTYPE_LOGOUT)
- return;
-
- /*
- * These fields are only used when logging in, and are blank
- * for logouts.
- */
-
-# ifdef HAVE_HOST_IN_UTMPX
- (void) strncpy(utx->ut_host, li->hostname, MIN_SIZEOF(utx->ut_host, li->hostname));
-# endif
-# ifdef HAVE_ADDR_IN_UTMPX
- /* this is just a 32-bit IP address */
- if (li->hostaddr.sa.sa_family == AF_INET)
- utx->ut_addr = li->hostaddr.sa_in.sin_addr.s_addr;
-# endif
-# ifdef HAVE_SYSLEN_IN_UTMPX
- /* ut_syslen is the length of the utx_host string */
- utx->ut_syslen = MIN(strlen(li->hostname), sizeof(utx->ut_host));
-# endif
-}
-#endif /* USE_UTMPX || USE_WTMPX */
-
-/**
- ** Low-level utmp functions
- **/
-
-/* FIXME: (ATL) utmp_write_direct needs testing */
-#ifdef USE_UTMP
-
-/* if we can, use pututline() etc. */
-# if !defined(DISABLE_PUTUTLINE) && defined(HAVE_SETUTENT) && \
- defined(HAVE_PUTUTLINE)
-# define UTMP_USE_LIBRARY
-# endif
-
-
-/* write a utmp entry with the system's help (pututline() and pals) */
-# ifdef UTMP_USE_LIBRARY
-static int
-utmp_write_library(struct logininfo *li, struct utmp *ut)
-{
- setutent();
- pututline(ut);
-
-# ifdef HAVE_ENDUTENT
- endutent();
-# endif
- return 1;
-}
-# else /* UTMP_USE_LIBRARY */
-
-/* write a utmp entry direct to the file */
-/* This is a slightly modification of code in OpenBSD's login.c */
-static int
-utmp_write_direct(struct logininfo *li, struct utmp *ut)
-{
- struct utmp old_ut;
- register int fd;
- int tty;
-
- /* FIXME: (ATL) ttyslot() needs local implementation */
-
-#if defined(HAVE_GETTTYENT)
- register struct ttyent *ty;
-
- tty=0;
-
- setttyent();
- while ((struct ttyent *)0 != (ty = getttyent())) {
- tty++;
- if (!strncmp(ty->ty_name, ut->ut_line, sizeof(ut->ut_line)))
- break;
- }
- endttyent();
-
- if((struct ttyent *)0 == ty) {
- log("utmp_write_entry: tty not found");
- return(1);
- }
-#else /* FIXME */
-
- tty = ttyslot(); /* seems only to work for /dev/ttyp? style names */
-
-#endif /* HAVE_GETTTYENT */
-
- if (tty > 0 && (fd = open(UTMP_FILE, O_RDWR|O_CREAT, 0644)) >= 0) {
- (void)lseek(fd, (off_t)(tty * sizeof(struct utmp)), SEEK_SET);
- /*
- * Prevent luser from zero'ing out ut_host.
- * If the new ut_line is empty but the old one is not
- * and ut_line and ut_name match, preserve the old ut_line.
- */
- if (atomicio(read, fd, &old_ut, sizeof(old_ut)) == sizeof(old_ut) &&
- (ut->ut_host[0] == '\0') && (old_ut.ut_host[0] != '\0') &&
- (strncmp(old_ut.ut_line, ut->ut_line, sizeof(ut->ut_line)) == 0) &&
- (strncmp(old_ut.ut_name, ut->ut_name, sizeof(ut->ut_name)) == 0)) {
- (void)memcpy(ut->ut_host, old_ut.ut_host, sizeof(ut->ut_host));
- }
-
- (void)lseek(fd, (off_t)(tty * sizeof(struct utmp)), SEEK_SET);
- if (atomicio(write, fd, ut, sizeof(*ut)) != sizeof(*ut))
- log("utmp_write_direct: error writing %s: %s",
- UTMP_FILE, strerror(errno));
-
- (void)close(fd);
- return 1;
- } else {
- return 0;
- }
-}
-# endif /* UTMP_USE_LIBRARY */
-
-static int
-utmp_perform_login(struct logininfo *li)
-{
- struct utmp ut;
-
- construct_utmp(li, &ut);
-# ifdef UTMP_USE_LIBRARY
- if (!utmp_write_library(li, &ut)) {
- log("utmp_perform_login: utmp_write_library() failed");
- return 0;
- }
-# else
- if (!utmp_write_direct(li, &ut)) {
- log("utmp_perform_login: utmp_write_direct() failed");
- return 0;
- }
-# endif
- return 1;
-}
-
-
-static int
-utmp_perform_logout(struct logininfo *li)
-{
- struct utmp ut;
-
- construct_utmp(li, &ut);
-# ifdef UTMP_USE_LIBRARY
- if (!utmp_write_library(li, &ut)) {
- log("utmp_perform_logout: utmp_write_library() failed");
- return 0;
- }
-# else
- if (!utmp_write_direct(li, &ut)) {
- log("utmp_perform_logout: utmp_write_direct() failed");
- return 0;
- }
-# endif
- return 1;
-}
-
-
-int
-utmp_write_entry(struct logininfo *li)
-{
- if (li->line_null) {
- debug3("not writing utmp entry");
- return 1;
- }
- debug3("writing utmp entry");
-
- switch(li->type) {
- case LTYPE_LOGIN:
- return utmp_perform_login(li);
-
- case LTYPE_LOGOUT:
- return utmp_perform_logout(li);
-
- default:
- log("utmp_write_entry: invalid type field");
- return 0;
- }
-}
-#endif /* USE_UTMP */
-
-
-/**
- ** Low-level utmpx functions
- **/
-
-/* not much point if we don't want utmpx entries */
-#ifdef USE_UTMPX
-
-/* if we have the wherewithall, use pututxline etc. */
-# if !defined(DISABLE_PUTUTXLINE) && defined(HAVE_SETUTXENT) && \
- defined(HAVE_PUTUTXLINE)
-# define UTMPX_USE_LIBRARY
-# endif
-
-
-/* write a utmpx entry with the system's help (pututxline() and pals) */
-# ifdef UTMPX_USE_LIBRARY
-static int
-utmpx_write_library(struct logininfo *li, struct utmpx *utx)
-{
- setutxent();
- (void) pututxline(utx);
-
-# ifdef HAVE_ENDUTXENT
- endutxent();
-# endif
- return 1;
-}
-
-# else /* UTMPX_USE_LIBRARY */
-
-/* write a utmp entry direct to the file */
-static int
-utmpx_write_direct(struct logininfo *li, struct utmpx *utx)
-{
- log("utmpx_write_direct: not implemented!");
- return 0;
-}
-# endif /* UTMPX_USE_LIBRARY */
-
-static int
-utmpx_perform_login(struct logininfo *li)
-{
- struct utmpx utx;
-
- construct_utmpx(li, &utx);
-# ifdef UTMPX_USE_LIBRARY
- if (!utmpx_write_library(li, &utx)) {
- log("tmpx_perform_login: utmp_write_library() failed");
- return 0;
- }
-# else
- if (!utmpx_write_direct(li, &ut)) {
- log("utmpx_perform_login: utmp_write_direct() failed");
- return 0;
- }
-# endif
- return 1;
-}
-
-
-static int
-utmpx_perform_logout(struct logininfo *li)
-{
- struct utmpx utx;
-
- construct_utmpx(li, &utx);
-# ifdef HAVE_ID_IN_UTMPX
- (void) line_abbrevname(utx.ut_id, li->line, sizeof(utx.ut_id));
-# endif
-# ifdef HAVE_TYPE_IN_UTMPX
- utx.ut_type = DEAD_PROCESS;
-# endif
-
-# ifdef UTMPX_USE_LIBRARY
- (void) utmpx_write_library(li, &utx);
-# else
- utmpx_write_direct(li, &utx);
-# endif
- return 1;
-}
-
-int
-utmpx_write_entry(struct logininfo *li)
-{
- if (li->line_null) {
- debug3("not writing utmpx entry");
- return 1;
- }
- debug3("writing utmpx entry");
-
- switch(li->type) {
- case LTYPE_LOGIN:
- return utmpx_perform_login(li);
- case LTYPE_LOGOUT:
- return utmpx_perform_logout(li);
- default:
- log("utmpx_write_entry: invalid type field");
- return 0;
- }
-}
-#endif /* USE_UTMPX */
-
-
-/**
- ** Low-level wtmp functions
- **/
-
-#ifdef USE_WTMP
-
-/* write a wtmp entry direct to the end of the file */
-/* This is a slight modification of code in OpenBSD's logwtmp.c */
-static int
-wtmp_write(struct logininfo *li, struct utmp *ut)
-{
- struct stat buf;
- int fd, ret = 1;
-
- if ((fd = open(WTMP_FILE, O_WRONLY|O_APPEND, 0)) < 0) {
- log("wtmp_write: problem writing %s: %s",
- WTMP_FILE, strerror(errno));
- return 0;
- }
- if (fstat(fd, &buf) == 0)
- if (atomicio(write, fd, ut, sizeof(*ut)) != sizeof(*ut)) {
- (void) ftruncate(fd, buf.st_size);
- log("wtmp_write: problem writing %s: %s",
- WTMP_FILE, strerror(errno));
- ret = 0;
- }
- (void)close(fd);
- return ret;
-}
-
-static int
-wtmp_perform_login(struct logininfo *li)
-{
- struct utmp ut;
-
- construct_utmp(li, &ut);
- return wtmp_write(li, &ut);
-}
-
-
-static int
-wtmp_perform_logout(struct logininfo *li)
-{
- struct utmp ut;
-
- construct_utmp(li, &ut);
- return wtmp_write(li, &ut);
-}
-
-
-int
-wtmp_write_entry(struct logininfo *li)
-{
- switch(li->type) {
- case LTYPE_LOGIN:
- return wtmp_perform_login(li);
- case LTYPE_LOGOUT:
- return wtmp_perform_logout(li);
- default:
- log("wtmp_write_entry: invalid type field");
- return 0;
- }
-}
-
-
-/* Notes on fetching login data from wtmp/wtmpx
- *
- * Logouts are usually recorded with (amongst other things) a blank
- * username on a given tty line. However, some systems (HP-UX is one)
- * leave all fields set, but change the ut_type field to DEAD_PROCESS.
- *
- * Since we're only looking for logins here, we know that the username
- * must be set correctly. On systems that leave it in, we check for
- * ut_type==USER_PROCESS (indicating a login.)
- *
- * Portability: Some systems may set something other than USER_PROCESS
- * to indicate a login process. I don't know of any as I write. Also,
- * it's possible that some systems may both leave the username in
- * place and not have ut_type.
- */
-
-/* return true if this wtmp entry indicates a login */
-static int
-wtmp_islogin(struct logininfo *li, struct utmp *ut)
-{
- if (strncmp(li->username, ut->ut_name,
- MIN_SIZEOF(li->username, ut->ut_name)) == 0) {
-# ifdef HAVE_TYPE_IN_UTMP
- if (ut->ut_type & USER_PROCESS)
- return 1;
-# else
- return 1;
-# endif
- }
- return 0;
-}
-
-int
-wtmp_get_entry(struct logininfo *li)
-{
- struct stat st;
- struct utmp ut;
- int fd, found=0;
-
- /* Clear the time entries in our logininfo */
- li->tv_sec = li->tv_usec = 0;
-
- if ((fd = open(WTMP_FILE, O_RDONLY)) < 0) {
- log("wtmp_get_entry: problem opening %s: %s",
- WTMP_FILE, strerror(errno));
- return 0;
- }
- if (fstat(fd, &st) != 0) {
- log("wtmp_get_entry: couldn't stat %s: %s",
- WTMP_FILE, strerror(errno));
- (void) close(fd);
- return 0;
- }
-
- /* Seek to the start of the last struct utmp */
- if (lseek(fd, -(off_t)sizeof(struct utmp), SEEK_END) == -1) {
- /* Looks like we've got a fresh wtmp file */
- (void) close(fd);
- return 0;
- }
-
- while (!found) {
- if (atomicio(read, fd, &ut, sizeof(ut)) != sizeof(ut)) {
- log("wtmp_get_entry: read of %s failed: %s",
- WTMP_FILE, strerror(errno));
- (void) close (fd);
- return 0;
- }
- if ( wtmp_islogin(li, &ut) ) {
- found = 1;
- /* We've already checked for a time in struct
- * utmp, in login_getlast(). */
-# ifdef HAVE_TIME_IN_UTMP
- li->tv_sec = ut.ut_time;
-# else
-# if HAVE_TV_IN_UTMP
- li->tv_sec = ut.ut_tv.tv_sec;
-# endif
-# endif
- (void) line_fullname(li->line, ut.ut_line,
- MIN_SIZEOF(li->line, ut.ut_line));
-# ifdef HAVE_HOST_IN_UTMP
- (void) strlcpy(li->hostname, ut.ut_host,
- MIN_SIZEOF(li->hostname, ut.ut_host));
-# endif
- continue;
- }
- /* Seek back 2 x struct utmp */
- if (lseek(fd, -(off_t)(2 * sizeof(struct utmp)), SEEK_CUR) == -1) {
- /* We've found the start of the file, so quit */
- (void) close (fd);
- return 0;
- }
- }
-
- /* We found an entry. Tidy up and return */
- (void) close(fd);
- return 1;
-}
-# endif /* USE_WTMP */
-
-
-/**
- ** Low-level wtmpx functions
- **/
-
-#ifdef USE_WTMPX
-/* write a wtmpx entry direct to the end of the file */
-/* This is a slight modification of code in OpenBSD's logwtmp.c */
-static int
-wtmpx_write(struct logininfo *li, struct utmpx *utx)
-{
- struct stat buf;
- int fd, ret = 1;
-
- if ((fd = open(WTMPX_FILE, O_WRONLY|O_APPEND, 0)) < 0) {
- log("wtmpx_write: problem opening %s: %s",
- WTMPX_FILE, strerror(errno));
- return 0;
- }
-
- if (fstat(fd, &buf) == 0)
- if (atomicio(write, fd, utx, sizeof(*utx)) != sizeof(*utx)) {
- (void) ftruncate(fd, buf.st_size);
- log("wtmpx_write: problem writing %s: %s",
- WTMPX_FILE, strerror(errno));
- ret = 0;
- }
- (void)close(fd);
-
- return ret;
-}
-
-
-static int
-wtmpx_perform_login(struct logininfo *li)
-{
- struct utmpx utx;
-
- construct_utmpx(li, &utx);
- return wtmpx_write(li, &utx);
-}
-
-
-static int
-wtmpx_perform_logout(struct logininfo *li)
-{
- struct utmpx utx;
-
- construct_utmpx(li, &utx);
- return wtmpx_write(li, &utx);
-}
-
-
-int
-wtmpx_write_entry(struct logininfo *li)
-{
- switch(li->type) {
- case LTYPE_LOGIN:
- return wtmpx_perform_login(li);
- case LTYPE_LOGOUT:
- return wtmpx_perform_logout(li);
- default:
- log("wtmpx_write_entry: invalid type field");
- return 0;
- }
-}
-
-/* Please see the notes above wtmp_islogin() for information about the
- next two functions */
-
-/* Return true if this wtmpx entry indicates a login */
-static int
-wtmpx_islogin(struct logininfo *li, struct utmpx *utx)
-{
- if ( strncmp(li->username, utx->ut_name,
- MIN_SIZEOF(li->username, utx->ut_name)) == 0 ) {
-# ifdef HAVE_TYPE_IN_UTMPX
- if (utx->ut_type == USER_PROCESS)
- return 1;
-# else
- return 1;
-# endif
- }
- return 0;
-}
-
-
-#if 0
-int
-wtmpx_get_entry(struct logininfo *li)
-{
- struct stat st;
- struct utmpx utx;
- int fd, found=0;
-
- /* Clear the time entries */
- li->tv_sec = li->tv_usec = 0;
-
- if ((fd = open(WTMPX_FILE, O_RDONLY)) < 0) {
- log("wtmpx_get_entry: problem opening %s: %s",
- WTMPX_FILE, strerror(errno));
- return 0;
- }
- if (fstat(fd, &st) != 0) {
- log("wtmpx_get_entry: couldn't stat %s: %s",
- WTMPX_FILE, strerror(errno));
- (void) close(fd);
- return 0;
- }
-
- /* Seek to the start of the last struct utmpx */
- if (lseek(fd, -(off_t)sizeof(struct utmpx), SEEK_END) == -1 ) {
- /* probably a newly rotated wtmpx file */
- (void) close(fd);
- return 0;
- }
-
- while (!found) {
- if (atomicio(read, fd, &utx, sizeof(utx)) != sizeof(utx)) {
- log("wtmpx_get_entry: read of %s failed: %s",
- WTMPX_FILE, strerror(errno));
- (void) close (fd);
- return 0;
- }
- /* Logouts are recorded as a blank username on a particular line.
- * So, we just need to find the username in struct utmpx */
- if ( wtmpx_islogin(li, &utx) ) {
- found = 1;
-# ifdef HAVE_TV_IN_UTMPX
- li->tv_sec = utx.ut_tv.tv_sec;
-# else
-# ifdef HAVE_TIME_IN_UTMPX
- li->tv_sec = utx.ut_time;
-# endif
-# endif
- (void) line_fullname(li->line, utx.ut_line, sizeof(li->line));
-# ifdef HAVE_HOST_IN_UTMPX
- (void) strlcpy(li->hostname, utx.ut_host,
- MIN_SIZEOF(li->hostname, utx.ut_host));
-# endif
- continue;
- }
- if (lseek(fd, -(off_t)(2 * sizeof(struct utmpx)), SEEK_CUR) == -1) {
- (void) close (fd);
- return 0;
- }
- }
-
- (void) close(fd);
- return 1;
-}
-#endif
-#endif /* USE_WTMPX */
-
-/**
- ** Low-level libutil login() functions
- **/
-
-#ifdef USE_LOGIN
-static int
-syslogin_perform_login(struct logininfo *li)
-{
- struct utmp *ut;
-
- if (! (ut = (struct utmp *)malloc(sizeof(*ut)))) {
- log("syslogin_perform_login: couldn't malloc()");
- return 0;
- }
- construct_utmp(li, ut);
- login(ut);
-
- return 1;
-}
-
-static int
-syslogin_perform_logout(struct logininfo *li)
-{
-# ifdef HAVE_LOGOUT
- char line[8];
-
- (void)line_stripname(line, li->line, sizeof(line));
-
- if (!logout(line)) {
- log("syslogin_perform_logout: logout() returned an error");
-# ifdef HAVE_LOGWTMP
- } else {
- logwtmp(line, "", "");
-# endif
- }
- /* FIXME: (ATL - if the need arises) What to do if we have
- * login, but no logout? what if logout but no logwtmp? All
- * routines are in libutil so they should all be there,
- * but... */
-# endif
- return 1;
-}
-
-int
-syslogin_write_entry(struct logininfo *li)
-{
- switch (li->type) {
- case LTYPE_LOGIN:
- return syslogin_perform_login(li);
- case LTYPE_LOGOUT:
- return syslogin_perform_logout(li);
- default:
- log("syslogin_write_entry: Invalid type field");
- return 0;
- }
-}
-#endif /* USE_LOGIN */
-
-/* end of file log-syslogin.c */
-
-/**
- ** Low-level lastlog functions
- **/
-
-#ifdef USE_LASTLOG
-#define LL_FILE 1
-#define LL_DIR 2
-#define LL_OTHER 3
-
-static void
-lastlog_construct(struct logininfo *li, struct lastlog *last)
-{
- /* clear the structure */
- (void) memset(last, '\0', sizeof(*last));
-
- (void)line_stripname(last->ll_line, li->line, sizeof(last->ll_line));
- (void) strlcpy(last->ll_host, li->hostname,
- MIN_SIZEOF(last->ll_host, li->hostname));
- last->ll_time = li->tv_sec;
-}
-
-static int
-lastlog_filetype(char *filename)
-{
- struct stat st;
-
- if (stat(LASTLOG_FILE, &st) != 0) {
- log("lastlog_perform_login: Couldn't stat %s: %s", LASTLOG_FILE,
- strerror(errno));
- return 0;
- }
- if (S_ISDIR(st.st_mode))
- return LL_DIR;
- else if (S_ISREG(st.st_mode))
- return LL_FILE;
- else
- return LL_OTHER;
-}
-
-
-/* open the file (using filemode) and seek to the login entry */
-static int
-lastlog_openseek(struct logininfo *li, int *fd, int filemode)
-{
- off_t offset;
- int type;
- char lastlog_file[1024];
-
- type = lastlog_filetype(LASTLOG_FILE);
- switch (type) {
- case LL_FILE:
- (void) strlcpy(lastlog_file, LASTLOG_FILE, sizeof(lastlog_file));
- break;
- case LL_DIR:
- (void) snprintf(lastlog_file, sizeof(lastlog_file), "%s/%s",
- LASTLOG_FILE, li->username);
- break;
- default:
- log("lastlog_openseek: %.100s is not a file or directory!",
- LASTLOG_FILE);
- return 0;
- }
-
- *fd = open(lastlog_file, filemode);
- if ( *fd < 0) {
- debug("lastlog_openseek: Couldn't open %s: %s",
- lastlog_file, strerror(errno));
- return 0;
- }
-
- if (type == LL_FILE) {
- /* find this uid's offset in the lastlog file */
- offset = (off_t) ((long)li->uid * sizeof(struct lastlog));
-
- if ( lseek(*fd, offset, SEEK_SET) != offset ) {
- log("lastlog_openseek: %s->lseek(): %s",
- lastlog_file, strerror(errno));
- return 0;
- }
- }
-
- return 1;
-}
-
-static int
-lastlog_perform_login(struct logininfo *li)
-{
- struct lastlog last;
- int fd;
-
- /* create our struct lastlog */
- lastlog_construct(li, &last);
-
- if (!lastlog_openseek(li, &fd, O_RDWR|O_CREAT))
- return(0);
-
- /* write the entry */
- if (atomicio(write, fd, &last, sizeof(last)) != sizeof(last)) {
- (void) close(fd);
- log("lastlog_write_filemode: Error writing to %s: %s",
- LASTLOG_FILE, strerror(errno));
- return 0;
- }
-
- (void) close(fd);
- return 1;
-}
-
-int
-lastlog_write_entry(struct logininfo *li)
-{
- switch(li->type) {
- case LTYPE_LOGIN:
- return lastlog_perform_login(li);
- default:
- log("lastlog_write_entry: Invalid type field");
- return 0;
- }
-}
-
-static void
-lastlog_populate_entry(struct logininfo *li, struct lastlog *last)
-{
- (void) line_fullname(li->line, last->ll_line, sizeof(li->line));
- (void) strlcpy(li->hostname, last->ll_host,
- MIN_SIZEOF(li->hostname, last->ll_host));
- li->tv_sec = last->ll_time;
-}
-
-int
-lastlog_get_entry(struct logininfo *li)
-{
- struct lastlog last;
- int fd, ret;
-
- if (!lastlog_openseek(li, &fd, O_RDONLY))
- return (0);
-
- ret = atomicio(read, fd, &last, sizeof(last));
- close(fd);
-
- switch (ret) {
- case 0:
- memset(&last, '\0', sizeof(last));
- /* FALLTHRU */
- case sizeof(last):
- lastlog_populate_entry(li, &last);
- return (1);
- case -1:
- error("%s: Error reading from %s: %s", __func__,
- LASTLOG_FILE, strerror(errno));
- return (0);
- default:
- error("%s: Error reading from %s: Expecting %d, got %d",
- __func__, LASTLOG_FILE, (int)sizeof(last), ret);
- return (0);
- }
-
- /* NOTREACHED */
- return (0);
-}
-#endif /* USE_LASTLOG */
diff --git a/usr/src/cmd/ssh/sshd/servconf.c b/usr/src/cmd/ssh/sshd/servconf.c
deleted file mode 100644
index 516466bbc1..0000000000
--- a/usr/src/cmd/ssh/sshd/servconf.c
+++ /dev/null
@@ -1,1494 +0,0 @@
-/*
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-/*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2013 Joyent, Inc. All rights reserved.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: servconf.c,v 1.115 2002/09/04 18:52:42 stevesk Exp $");
-
-#ifdef HAVE_DEFOPEN
-#include <deflt.h>
-#endif /* HAVE_DEFOPEN */
-
-#if defined(KRB4)
-#include <krb.h>
-#endif
-#if defined(KRB5)
-#ifdef HEIMDAL
-#include <krb.h>
-#else
-/* Bodge - but then, so is using the kerberos IV KEYFILE to get a Kerberos V
- * keytab */
-#define KEYFILE "/etc/krb5.keytab"
-#endif
-#endif
-#ifdef AFS
-#include <kafs.h>
-#endif
-
-#include "ssh.h"
-#include "log.h"
-#include "buffer.h"
-#include "servconf.h"
-#include "xmalloc.h"
-#include "compat.h"
-#include "pathnames.h"
-#include "tildexpand.h"
-#include "misc.h"
-#include "cipher.h"
-#include "kex.h"
-#include "mac.h"
-#include "auth.h"
-#include "match.h"
-#include "groupaccess.h"
-
-static void add_listen_addr(ServerOptions *, char *, u_short);
-static void add_one_listen_addr(ServerOptions *, char *, u_short);
-
-extern Buffer cfg;
-
-/* AF_UNSPEC or AF_INET or AF_INET6 */
-extern int IPv4or6;
-
-/*
- * Initializes the server options to their initial (unset) values. Some of those
- * that stay unset after the command line options and configuration files are
- * read are set to their default values in fill_default_server_options().
- */
-void
-initialize_server_options(ServerOptions *options)
-{
- (void) memset(options, 0, sizeof(*options));
-
- /* Standard Options */
- options->num_ports = 0;
- options->ports_from_cmdline = 0;
- options->listen_addrs = NULL;
- options->num_host_key_files = 0;
- options->pid_file = NULL;
- options->server_key_bits = -1;
- options->login_grace_time = -1;
- options->key_regeneration_time = -1;
- options->permit_root_login = PERMIT_NOT_SET;
- options->ignore_rhosts = -1;
- options->ignore_user_known_hosts = -1;
- options->print_motd = -1;
- options->print_lastlog = -1;
- options->x11_forwarding = -1;
- options->x11_display_offset = -1;
- options->x11_use_localhost = -1;
- options->xauth_location = NULL;
- options->strict_modes = -1;
- options->keepalives = -1;
- options->log_facility = SYSLOG_FACILITY_NOT_SET;
- options->log_level = SYSLOG_LEVEL_NOT_SET;
- options->rhosts_authentication = -1;
- options->rhosts_rsa_authentication = -1;
- options->hostbased_authentication = -1;
- options->hostbased_uses_name_from_packet_only = -1;
- options->rsa_authentication = -1;
- options->pubkey_authentication = -1;
-#ifdef GSSAPI
- options->gss_authentication = -1;
- options->gss_keyex = -1;
- options->gss_store_creds = -1;
- options->gss_use_session_ccache = -1;
- options->gss_cleanup_creds = -1;
-#endif
-#if defined(KRB4) || defined(KRB5)
- options->kerberos_authentication = -1;
- options->kerberos_or_local_passwd = -1;
- options->kerberos_ticket_cleanup = -1;
-#endif
-#if defined(AFS) || defined(KRB5)
- options->kerberos_tgt_passing = -1;
-#endif
-#ifdef AFS
- options->afs_token_passing = -1;
-#endif
- options->password_authentication = -1;
- options->kbd_interactive_authentication = -1;
- options->challenge_response_authentication = -1;
- options->pam_authentication_via_kbd_int = -1;
- options->permit_empty_passwd = -1;
- options->permit_user_env = -1;
- options->compression = -1;
- options->allow_tcp_forwarding = -1;
- options->num_allow_users = 0;
- options->num_deny_users = 0;
- options->num_allow_groups = 0;
- options->num_deny_groups = 0;
- options->ciphers = NULL;
- options->macs = NULL;
- options->protocol = SSH_PROTO_UNKNOWN;
- options->gateway_ports = -1;
- options->num_subsystems = 0;
- options->max_startups_begin = -1;
- options->max_startups_rate = -1;
- options->max_startups = -1;
- options->banner = NULL;
- options->verify_reverse_mapping = -1;
- options->client_alive_interval = -1;
- options->client_alive_count_max = -1;
- options->authorized_keys_file = NULL;
- options->authorized_keys_file2 = NULL;
-
- options->max_auth_tries = -1;
- options->max_auth_tries_log = -1;
-
- options->max_init_auth_tries = -1;
- options->max_init_auth_tries_log = -1;
-
- options->lookup_client_hostnames = -1;
- options->use_openssl_engine = -1;
- options->chroot_directory = NULL;
- options->pre_userauth_hook = NULL;
- options->pam_service_name = NULL;
- options->pam_service_prefix = NULL;
-}
-
-#ifdef HAVE_DEFOPEN
-/*
- * Reads /etc/default/login and defaults several ServerOptions:
- *
- * PermitRootLogin
- * PermitEmptyPasswords
- * LoginGraceTime
- *
- * CONSOLE=* -> PermitRootLogin=without-password
- * #CONSOLE=* -> PermitRootLogin=yes
- *
- * PASSREQ=YES -> PermitEmptyPasswords=no
- * PASSREQ=NO -> PermitEmptyPasswords=yes
- * #PASSREQ=* -> PermitEmptyPasswords=no
- *
- * TIMEOUT=<secs> -> LoginGraceTime=<secs>
- * #TIMEOUT=<secs> -> LoginGraceTime=300
- */
-static
-void
-deflt_fill_default_server_options(ServerOptions *options)
-{
- int flags;
- char *ptr;
-
- if (defopen(_PATH_DEFAULT_LOGIN))
- return;
-
- /* Ignore case */
- flags = defcntl(DC_GETFLAGS, 0);
- TURNOFF(flags, DC_CASE);
- (void) defcntl(DC_SETFLAGS, flags);
-
- if (options->permit_root_login == PERMIT_NOT_SET &&
- (ptr = defread("CONSOLE=")) != NULL)
- options->permit_root_login = PERMIT_NO_PASSWD;
-
- if (options->permit_empty_passwd == -1 &&
- (ptr = defread("PASSREQ=")) != NULL) {
- if (strcasecmp("YES", ptr) == 0)
- options->permit_empty_passwd = 0;
- else if (strcasecmp("NO", ptr) == 0)
- options->permit_empty_passwd = 1;
- }
-
- if (options->max_init_auth_tries == -1 &&
- (ptr = defread("RETRIES=")) != NULL) {
- options->max_init_auth_tries = atoi(ptr);
- }
-
- if (options->max_init_auth_tries_log == -1 &&
- (ptr = defread("SYSLOG_FAILED_LOGINS=")) != NULL) {
- options->max_init_auth_tries_log = atoi(ptr);
- }
-
- if (options->login_grace_time == -1) {
- if ((ptr = defread("TIMEOUT=")) != NULL)
- options->login_grace_time = (unsigned)atoi(ptr);
- else
- options->login_grace_time = 300;
- }
-
- (void) defopen((char *)NULL);
-}
-#endif /* HAVE_DEFOPEN */
-
-void
-fill_default_server_options(ServerOptions *options)
-{
-
-#ifdef HAVE_DEFOPEN
- deflt_fill_default_server_options(options);
-#endif /* HAVE_DEFOPEN */
-
- /* Standard Options */
- if (options->protocol == SSH_PROTO_UNKNOWN)
- options->protocol = SSH_PROTO_1|SSH_PROTO_2;
- if (options->num_host_key_files == 0) {
- /* fill default hostkeys for protocols */
- if (options->protocol & SSH_PROTO_1)
- options->host_key_files[options->num_host_key_files++] =
- _PATH_HOST_KEY_FILE;
-#ifndef GSSAPI
- /* With GSS keyex we can run v2 w/ no host keys */
- if (options->protocol & SSH_PROTO_2) {
- options->host_key_files[options->num_host_key_files++] =
- _PATH_HOST_RSA_KEY_FILE;
- options->host_key_files[options->num_host_key_files++] =
- _PATH_HOST_DSA_KEY_FILE;
- }
-#endif /* GSSAPI */
- }
- if (options->num_ports == 0)
- options->ports[options->num_ports++] = SSH_DEFAULT_PORT;
- if (options->listen_addrs == NULL)
- add_listen_addr(options, NULL, 0);
- if (options->pid_file == NULL)
- options->pid_file = _PATH_SSH_DAEMON_PID_FILE;
- if (options->server_key_bits == -1)
- options->server_key_bits = 768;
- if (options->login_grace_time == -1)
- options->login_grace_time = 120;
- if (options->key_regeneration_time == -1)
- options->key_regeneration_time = 3600;
- if (options->permit_root_login == PERMIT_NOT_SET)
- options->permit_root_login = PERMIT_YES;
- if (options->ignore_rhosts == -1)
- options->ignore_rhosts = 1;
- if (options->ignore_user_known_hosts == -1)
- options->ignore_user_known_hosts = 0;
- if (options->print_motd == -1)
- options->print_motd = 1;
- if (options->print_lastlog == -1)
- options->print_lastlog = 1;
- if (options->x11_forwarding == -1)
- options->x11_forwarding = 1;
- if (options->x11_display_offset == -1)
- options->x11_display_offset = 10;
- if (options->x11_use_localhost == -1)
- options->x11_use_localhost = 1;
- if (options->xauth_location == NULL)
- options->xauth_location = _PATH_XAUTH;
- if (options->strict_modes == -1)
- options->strict_modes = 1;
- if (options->keepalives == -1)
- options->keepalives = 1;
- if (options->log_facility == SYSLOG_FACILITY_NOT_SET)
- options->log_facility = SYSLOG_FACILITY_AUTH;
- if (options->log_level == SYSLOG_LEVEL_NOT_SET)
- options->log_level = SYSLOG_LEVEL_INFO;
- if (options->rhosts_authentication == -1)
- options->rhosts_authentication = 0;
- if (options->rhosts_rsa_authentication == -1)
- options->rhosts_rsa_authentication = 0;
- if (options->hostbased_authentication == -1)
- options->hostbased_authentication = 0;
- if (options->hostbased_uses_name_from_packet_only == -1)
- options->hostbased_uses_name_from_packet_only = 0;
- if (options->rsa_authentication == -1)
- options->rsa_authentication = 1;
- if (options->pubkey_authentication == -1)
- options->pubkey_authentication = 1;
-#ifdef GSSAPI
- if (options->gss_authentication == -1)
- options->gss_authentication = 1;
- if (options->gss_keyex == -1)
- options->gss_keyex = 1;
- if (options->gss_store_creds == -1)
- options->gss_store_creds = 1;
- if (options->gss_use_session_ccache == -1)
- options->gss_use_session_ccache = 1;
- if (options->gss_cleanup_creds == -1)
- options->gss_cleanup_creds = 1;
-#endif
-#if defined(KRB4) || defined(KRB5)
- if (options->kerberos_authentication == -1)
- options->kerberos_authentication = 0;
- if (options->kerberos_or_local_passwd == -1)
- options->kerberos_or_local_passwd = 1;
- if (options->kerberos_ticket_cleanup == -1)
- options->kerberos_ticket_cleanup = 1;
-#endif
-#if defined(AFS) || defined(KRB5)
- if (options->kerberos_tgt_passing == -1)
- options->kerberos_tgt_passing = 0;
-#endif
-#ifdef AFS
- if (options->afs_token_passing == -1)
- options->afs_token_passing = 0;
-#endif
- if (options->password_authentication == -1)
- options->password_authentication = 1;
- /*
- * options->pam_authentication_via_kbd_int has intentionally no default
- * value since we do not need it.
- */
- if (options->kbd_interactive_authentication == -1)
- options->kbd_interactive_authentication = 1;
- if (options->challenge_response_authentication == -1)
- options->challenge_response_authentication = 1;
- if (options->permit_empty_passwd == -1)
- options->permit_empty_passwd = 0;
- if (options->permit_user_env == -1)
- options->permit_user_env = 0;
- if (options->compression == -1)
- options->compression = 1;
- if (options->allow_tcp_forwarding == -1)
- options->allow_tcp_forwarding = 1;
- if (options->gateway_ports == -1)
- options->gateway_ports = 0;
- if (options->max_startups == -1)
- options->max_startups = 10;
- if (options->max_startups_rate == -1)
- options->max_startups_rate = 100; /* 100% */
- if (options->max_startups_begin == -1)
- options->max_startups_begin = options->max_startups;
- if (options->verify_reverse_mapping == -1)
- options->verify_reverse_mapping = 0;
- if (options->client_alive_interval == -1)
- options->client_alive_interval = 0;
- if (options->client_alive_count_max == -1)
- options->client_alive_count_max = 3;
- if (options->authorized_keys_file2 == NULL) {
- /* authorized_keys_file2 falls back to authorized_keys_file */
- if (options->authorized_keys_file != NULL)
- options->authorized_keys_file2 = options->authorized_keys_file;
- else
- options->authorized_keys_file2 = _PATH_SSH_USER_PERMITTED_KEYS2;
- }
- if (options->authorized_keys_file == NULL)
- options->authorized_keys_file = _PATH_SSH_USER_PERMITTED_KEYS;
-
- if (options->max_auth_tries == -1)
- options->max_auth_tries = AUTH_FAIL_MAX;
- if (options->max_auth_tries_log == -1)
- options->max_auth_tries_log = options->max_auth_tries / 2;
-
- if (options->max_init_auth_tries == -1)
- options->max_init_auth_tries = AUTH_FAIL_MAX;
- if (options->max_init_auth_tries_log == -1)
- options->max_init_auth_tries_log = options->max_init_auth_tries / 2;
-
- if (options->lookup_client_hostnames == -1)
- options->lookup_client_hostnames = 1;
- if (options->use_openssl_engine == -1)
- options->use_openssl_engine = 1;
- if (options->pam_service_prefix == NULL)
- options->pam_service_prefix = _SSH_PAM_SERVICE_PREFIX;
- if (options->pam_service_name == NULL)
- options->pam_service_name = NULL;
-}
-
-/* Keyword tokens. */
-typedef enum {
- sBadOption, /* == unknown option */
- /* Portable-specific options */
- sPAMAuthenticationViaKbdInt,
- /* Standard Options */
- sPort, sHostKeyFile, sServerKeyBits, sLoginGraceTime, sKeyRegenerationTime,
- sPermitRootLogin, sLogFacility, sLogLevel,
- sRhostsAuthentication, sRhostsRSAAuthentication, sRSAAuthentication,
-#ifdef GSSAPI
- sGssAuthentication, sGssKeyEx, sGssStoreDelegCreds,
- sGssUseSessionCredCache, sGssCleanupCreds,
-#endif /* GSSAPI */
-#if defined(KRB4) || defined(KRB5)
- sKerberosAuthentication, sKerberosOrLocalPasswd, sKerberosTicketCleanup,
-#endif
-#if defined(AFS) || defined(KRB5)
- sKerberosTgtPassing,
-#endif
-#ifdef AFS
- sAFSTokenPassing,
-#endif
- sChallengeResponseAuthentication,
- sPasswordAuthentication, sKbdInteractiveAuthentication, sListenAddress,
- sPrintMotd, sPrintLastLog, sIgnoreRhosts,
- sX11Forwarding, sX11DisplayOffset, sX11UseLocalhost,
- sStrictModes, sEmptyPasswd, sKeepAlives,
- sPermitUserEnvironment, sUseLogin, sAllowTcpForwarding, sCompression,
- sAllowUsers, sDenyUsers, sAllowGroups, sDenyGroups,
- sIgnoreUserKnownHosts, sCiphers, sMacs, sProtocol, sPidFile,
- sGatewayPorts, sPubkeyAuthentication, sXAuthLocation, sSubsystem, sMaxStartups,
- sBanner, sVerifyReverseMapping, sHostbasedAuthentication,
- sHostbasedUsesNameFromPacketOnly, sClientAliveInterval,
- sClientAliveCountMax, sAuthorizedKeysFile, sAuthorizedKeysFile2,
- sMaxAuthTries, sMaxAuthTriesLog, sUsePrivilegeSeparation,
- sLookupClientHostnames, sUseOpenSSLEngine, sChrootDirectory,
- sPreUserauthHook, sMatch, sPAMServicePrefix, sPAMServiceName,
- sDeprecated
-} ServerOpCodes;
-
-#define SSHCFG_GLOBAL 0x01 /* allowed in main section of sshd_config */
-#define SSHCFG_MATCH 0x02 /* allowed inside a Match section */
-#define SSHCFG_ALL (SSHCFG_GLOBAL|SSHCFG_MATCH)
-
-/* Textual representation of the tokens. */
-static struct {
- const char *name;
- ServerOpCodes opcode;
- u_int flags;
-} keywords[] = {
- /* Portable-specific options */
- { "PAMAuthenticationViaKbdInt", sPAMAuthenticationViaKbdInt, SSHCFG_GLOBAL },
- /* Standard Options */
- { "port", sPort, SSHCFG_GLOBAL },
- { "hostkey", sHostKeyFile, SSHCFG_GLOBAL },
- { "hostdsakey", sHostKeyFile, SSHCFG_GLOBAL }, /* alias */
- { "pidfile", sPidFile, SSHCFG_GLOBAL },
- { "serverkeybits", sServerKeyBits, SSHCFG_GLOBAL },
- { "logingracetime", sLoginGraceTime, SSHCFG_GLOBAL },
- { "keyregenerationinterval", sKeyRegenerationTime, SSHCFG_GLOBAL },
- { "permitrootlogin", sPermitRootLogin, SSHCFG_ALL },
- { "syslogfacility", sLogFacility, SSHCFG_GLOBAL },
- { "loglevel", sLogLevel, SSHCFG_GLOBAL },
- { "rhostsauthentication", sRhostsAuthentication, SSHCFG_GLOBAL },
- { "rhostsrsaauthentication", sRhostsRSAAuthentication, SSHCFG_ALL },
- { "hostbasedauthentication", sHostbasedAuthentication, SSHCFG_ALL },
- { "hostbasedusesnamefrompacketonly", sHostbasedUsesNameFromPacketOnly },
- { "rsaauthentication", sRSAAuthentication, SSHCFG_ALL },
- { "pubkeyauthentication", sPubkeyAuthentication, SSHCFG_ALL },
- { "dsaauthentication", sPubkeyAuthentication, SSHCFG_GLOBAL }, /* alias */
-#ifdef GSSAPI
- { "gssapiauthentication", sGssAuthentication, SSHCFG_ALL },
- { "gssapikeyexchange", sGssKeyEx, SSHCFG_GLOBAL },
- { "gssapistoredelegatedcredentials", sGssStoreDelegCreds, SSHCFG_GLOBAL },
- { "gssauthentication", sGssAuthentication, SSHCFG_GLOBAL }, /* alias */
- { "gsskeyex", sGssKeyEx, SSHCFG_GLOBAL }, /* alias */
- { "gssstoredelegcreds", sGssStoreDelegCreds, SSHCFG_GLOBAL }, /* alias */
-#ifndef SUNW_GSSAPI
- { "gssusesessionccache", sGssUseSessionCredCache, SSHCFG_GLOBAL },
- { "gssusesessioncredcache", sGssUseSessionCredCache, SSHCFG_GLOBAL },
- { "gsscleanupcreds", sGssCleanupCreds, SSHCFG_GLOBAL },
-#endif /* SUNW_GSSAPI */
-#endif
-#if defined(KRB4) || defined(KRB5)
- { "kerberosauthentication", sKerberosAuthentication, SSHCFG_ALL },
- { "kerberosorlocalpasswd", sKerberosOrLocalPasswd, SSHCFG_GLOBAL },
- { "kerberosticketcleanup", sKerberosTicketCleanup, SSHCFG_GLOBAL },
-#endif
-#if defined(AFS) || defined(KRB5)
- { "kerberostgtpassing", sKerberosTgtPassing, SSHCFG_GLOBAL },
-#endif
-#ifdef AFS
- { "afstokenpassing", sAFSTokenPassing, SSHCFG_GLOBAL },
-#endif
- { "passwordauthentication", sPasswordAuthentication, SSHCFG_ALL },
- { "kbdinteractiveauthentication", sKbdInteractiveAuthentication, SSHCFG_ALL },
- { "challengeresponseauthentication", sChallengeResponseAuthentication, SSHCFG_GLOBAL },
- { "skeyauthentication", sChallengeResponseAuthentication, SSHCFG_GLOBAL }, /* alias */
- { "checkmail", sDeprecated, SSHCFG_GLOBAL },
- { "listenaddress", sListenAddress, SSHCFG_GLOBAL },
- { "printmotd", sPrintMotd, SSHCFG_GLOBAL },
- { "printlastlog", sPrintLastLog, SSHCFG_GLOBAL },
- { "ignorerhosts", sIgnoreRhosts, SSHCFG_GLOBAL },
- { "ignoreuserknownhosts", sIgnoreUserKnownHosts, SSHCFG_GLOBAL },
- { "x11forwarding", sX11Forwarding, SSHCFG_ALL },
- { "x11displayoffset", sX11DisplayOffset, SSHCFG_ALL },
- { "x11uselocalhost", sX11UseLocalhost, SSHCFG_ALL },
- { "xauthlocation", sXAuthLocation, SSHCFG_GLOBAL },
- { "strictmodes", sStrictModes, SSHCFG_GLOBAL },
- { "permitemptypasswords", sEmptyPasswd, SSHCFG_ALL },
- { "permituserenvironment", sPermitUserEnvironment, SSHCFG_GLOBAL },
- { "uselogin", sUseLogin, SSHCFG_GLOBAL },
- { "compression", sCompression, SSHCFG_GLOBAL },
- { "tcpkeepalive", sKeepAlives, SSHCFG_GLOBAL },
- { "keepalive", sKeepAlives, SSHCFG_GLOBAL }, /* obsolete */
- { "allowtcpforwarding", sAllowTcpForwarding, SSHCFG_ALL },
- { "allowusers", sAllowUsers, SSHCFG_GLOBAL },
- { "denyusers", sDenyUsers, SSHCFG_GLOBAL },
- { "allowgroups", sAllowGroups, SSHCFG_GLOBAL },
- { "denygroups", sDenyGroups, SSHCFG_GLOBAL },
- { "ciphers", sCiphers, SSHCFG_GLOBAL },
- { "macs", sMacs, SSHCFG_GLOBAL},
- { "protocol", sProtocol,SSHCFG_GLOBAL },
- { "gatewayports", sGatewayPorts, SSHCFG_ALL },
- { "subsystem", sSubsystem, SSHCFG_GLOBAL},
- { "maxstartups", sMaxStartups, SSHCFG_GLOBAL },
- { "banner", sBanner, SSHCFG_ALL },
- { "verifyreversemapping", sVerifyReverseMapping, SSHCFG_GLOBAL },
- { "reversemappingcheck", sVerifyReverseMapping,SSHCFG_GLOBAL },
- { "clientaliveinterval", sClientAliveInterval, SSHCFG_GLOBAL },
- { "clientalivecountmax", sClientAliveCountMax, SSHCFG_GLOBAL },
- { "authorizedkeysfile", sAuthorizedKeysFile, SSHCFG_GLOBAL },
- { "authorizedkeysfile2", sAuthorizedKeysFile2, SSHCFG_GLOBAL },
- { "maxauthtries", sMaxAuthTries, SSHCFG_ALL },
- { "maxauthtrieslog", sMaxAuthTriesLog, SSHCFG_GLOBAL },
- { "useprivilegeseparation", sUsePrivilegeSeparation, SSHCFG_GLOBAL },
- { "lookupclienthostnames", sLookupClientHostnames, SSHCFG_GLOBAL },
- { "useopensslengine", sUseOpenSSLEngine, SSHCFG_GLOBAL },
- { "chrootdirectory", sChrootDirectory, SSHCFG_ALL },
- { "preuserauthhook", sPreUserauthHook, SSHCFG_ALL},
- { "match", sMatch, SSHCFG_ALL },
- { "pamserviceprefix", sPAMServicePrefix, SSHCFG_GLOBAL },
- { "pamservicename", sPAMServiceName, SSHCFG_GLOBAL },
-
- { NULL, sBadOption, 0 }
-};
-
-/*
- * Returns the number of the token pointed to by cp or sBadOption.
- */
-
-static ServerOpCodes
-parse_token(const char *cp, const char *filename,
- int linenum, u_int *flags)
-{
- u_int i;
-
- for (i = 0; keywords[i].name; i++)
- if (strcasecmp(cp, keywords[i].name) == 0) {
- *flags = keywords[i].flags;
- return keywords[i].opcode;
- }
-
- error("%s: line %d: Bad configuration option: %s",
- filename, linenum, cp);
- return sBadOption;
-}
-
-static void
-add_listen_addr(ServerOptions *options, char *addr, u_short port)
-{
- int i;
-
- if (options->num_ports == 0)
- options->ports[options->num_ports++] = SSH_DEFAULT_PORT;
- if (port == 0)
- for (i = 0; i < options->num_ports; i++)
- add_one_listen_addr(options, addr, options->ports[i]);
- else
- add_one_listen_addr(options, addr, port);
-}
-
-static void
-add_one_listen_addr(ServerOptions *options, char *addr, u_short port)
-{
- struct addrinfo hints, *ai, *aitop;
- char strport[NI_MAXSERV];
- int gaierr;
-
- (void) memset(&hints, 0, sizeof(hints));
- hints.ai_family = IPv4or6;
- hints.ai_socktype = SOCK_STREAM;
- hints.ai_flags = (addr == NULL) ? AI_PASSIVE : 0;
- (void) snprintf(strport, sizeof strport, "%u", port);
- if ((gaierr = getaddrinfo(addr, strport, &hints, &aitop)) != 0)
- fatal("bad addr or host: %s (%s)",
- addr ? addr : "<NULL>",
- gai_strerror(gaierr));
- for (ai = aitop; ai->ai_next; ai = ai->ai_next)
- ;
- ai->ai_next = options->listen_addrs;
- options->listen_addrs = aitop;
-}
-
-/*
- * The strategy for the Match blocks is that the config file is parsed twice.
- *
- * The first time is at startup. activep is initialized to 1 and the
- * directives in the global context are processed and acted on. Hitting a
- * Match directive unsets activep and the directives inside the block are
- * checked for syntax only.
- *
- * The second time is after a connection has been established but before
- * authentication. activep is initialized to 2 and global config directives
- * are ignored since they have already been processed. If the criteria in a
- * Match block is met, activep is set and the subsequent directives
- * processed and actioned until EOF or another Match block unsets it. Any
- * options set are copied into the main server config.
- *
- * Potential additions/improvements:
- * - Add Match support for pre-kex directives, eg Protocol, Ciphers.
- *
- * - Add a Tag directive (idea from David Leonard) ala pf, eg:
- * Match Address 192.168.0.*
- * Tag trusted
- * Match Group wheel
- * Tag trusted
- * Match Tag trusted
- * AllowTcpForwarding yes
- * GatewayPorts clientspecified
- * [...]
- *
- * - Add a PermittedChannelRequests directive
- * Match Group shell
- * PermittedChannelRequests session,forwarded-tcpip
- */
-
-static int
-match_cfg_line_group(const char *grps, int line, const char *user)
-{
- int result = 0;
- struct passwd *pw;
-
- if (user == NULL)
- goto out;
-
- if ((pw = getpwnam(user)) == NULL) {
- debug("Can't match group at line %d because user %.100s does "
- "not exist", line, user);
- } else if (ga_init(pw->pw_name, pw->pw_gid) == 0) {
- debug("Can't Match group because user %.100s not in any group "
- "at line %d", user, line);
- } else if (ga_match_pattern_list(grps) != 1) {
- debug("user %.100s does not match group list %.100s at line %d",
- user, grps, line);
- } else {
- debug("user %.100s matched group list %.100s at line %d", user,
- grps, line);
- result = 1;
- }
-out:
- ga_free();
- return result;
-}
-
-static int
-match_cfg_line(char **condition, int line, const char *user, const char *host,
- const char *address)
-{
- int result = 1;
- char *arg, *attrib, *cp = *condition;
- size_t len;
-
- if (user == NULL)
- debug3("checking syntax for 'Match %s'", cp);
- else
- debug3("checking match for '%s' user %s host %s addr %s", cp,
- user ? user : "(null)", host ? host : "(null)",
- address ? address : "(null)");
-
- while ((attrib = strdelim(&cp)) != NULL && *attrib != '\0') {
- if ((arg = strdelim(&cp)) == NULL || *arg == '\0') {
- error("Missing Match criteria for %s", attrib);
- return -1;
- }
- len = strlen(arg);
- if (strcasecmp(attrib, "user") == 0) {
- if (!user) {
- result = 0;
- continue;
- }
- if (match_pattern_list(user, arg, len, 0) != 1)
- result = 0;
- else
- debug("user %.100s matched 'User %.100s' at "
- "line %d", user, arg, line);
- } else if (strcasecmp(attrib, "group") == 0) {
- switch (match_cfg_line_group(arg, line, user)) {
- case -1:
- return -1;
- case 0:
- result = 0;
- }
- } else if (strcasecmp(attrib, "host") == 0) {
- if (!host) {
- result = 0;
- continue;
- }
- if (match_hostname(host, arg, len) != 1)
- result = 0;
- else
- debug("connection from %.100s matched 'Host "
- "%.100s' at line %d", host, arg, line);
- } else if (strcasecmp(attrib, "address") == 0) {
- switch (addr_match_list(address, arg)) {
- case 1:
- debug("connection from %.100s matched 'Address "
- "%.100s' at line %d", address, arg, line);
- break;
- case 0:
- case -1:
- result = 0;
- break;
- case -2:
- return -1;
- }
- } else {
- error("Unsupported Match attribute %s", attrib);
- return -1;
- }
- }
- if (user != NULL)
- debug3("match %sfound", result ? "" : "not ");
- *condition = cp;
- return result;
-}
-
-#define WHITESPACE " \t\r\n"
-
-int
-process_server_config_line(ServerOptions *options, char *line,
- const char *filename, int linenum, int *activep, const char *user,
- const char *host, const char *address)
-{
- char *cp, **charptr, *arg, *p;
- int cmdline = 0, *intptr, value, n;
- ServerOpCodes opcode;
- u_int i, flags = 0;
- size_t len;
-
- cp = line;
- arg = strdelim(&cp);
- /* Ignore leading whitespace */
- if (*arg == '\0')
- arg = strdelim(&cp);
- if (!arg || !*arg || *arg == '#')
- return 0;
- intptr = NULL;
- charptr = NULL;
- opcode = parse_token(arg, filename, linenum, &flags);
-
- if (activep == NULL) { /* We are processing a command line directive */
- cmdline = 1;
- activep = &cmdline;
- }
- if (*activep && opcode != sMatch)
- debug3("%s:%d setting %s %s", filename, linenum, arg, cp);
- if (*activep == 0 && !(flags & SSHCFG_MATCH)) {
- if (user == NULL) {
- fatal("%s line %d: Directive '%s' is not allowed "
- "within a Match block", filename, linenum, arg);
- } else { /* this is a directive we have already processed */
- while (arg)
- arg = strdelim(&cp);
- return 0;
- }
- }
-
- switch (opcode) {
- /* Portable-specific options */
- case sPAMAuthenticationViaKbdInt:
- log("%s line %d: PAMAuthenticationViaKbdInt has been "
- "deprecated. You should use KbdInteractiveAuthentication "
- "instead (which defaults to \"yes\").", filename, linenum);
- intptr = &options->pam_authentication_via_kbd_int;
- goto parse_flag;
-
- /* Standard Options */
- case sBadOption:
- return -1;
- case sPort:
- /* ignore ports from configfile if cmdline specifies ports */
- if (options->ports_from_cmdline)
- return 0;
- if (options->listen_addrs != NULL)
- fatal("%s line %d: ports must be specified before "
- "ListenAddress.", filename, linenum);
- if (options->num_ports >= MAX_PORTS)
- fatal("%s line %d: too many ports.",
- filename, linenum);
- arg = strdelim(&cp);
- if (!arg || *arg == '\0')
- fatal("%s line %d: missing port number.",
- filename, linenum);
- options->ports[options->num_ports++] = a2port(arg);
- if (options->ports[options->num_ports-1] == 0)
- fatal("%s line %d: Badly formatted port number.",
- filename, linenum);
- break;
-
- case sServerKeyBits:
- intptr = &options->server_key_bits;
-parse_int:
- arg = strdelim(&cp);
- if (!arg || *arg == '\0')
- fatal("%s line %d: missing integer value.",
- filename, linenum);
- value = atoi(arg);
- if (*activep && *intptr == -1)
- *intptr = value;
- break;
-
- case sLoginGraceTime:
- intptr = &options->login_grace_time;
-parse_time:
- arg = strdelim(&cp);
- if (!arg || *arg == '\0')
- fatal("%s line %d: missing time value.",
- filename, linenum);
- if ((value = convtime(arg)) == -1)
- fatal("%s line %d: invalid time value.",
- filename, linenum);
- if (*intptr == -1)
- *intptr = value;
- break;
-
- case sKeyRegenerationTime:
- intptr = &options->key_regeneration_time;
- goto parse_time;
-
- case sListenAddress:
- arg = strdelim(&cp);
- if (!arg || *arg == '\0' || strncmp(arg, "[]", 2) == 0)
- fatal("%s line %d: missing inet addr.",
- filename, linenum);
- if (*arg == '[') {
- if ((p = strchr(arg, ']')) == NULL)
- fatal("%s line %d: bad ipv6 inet addr usage.",
- filename, linenum);
- arg++;
- (void) memmove(p, p+1, strlen(p+1)+1);
- } else if (((p = strchr(arg, ':')) == NULL) ||
- (strchr(p+1, ':') != NULL)) {
- add_listen_addr(options, arg, 0);
- break;
- }
- if (*p == ':') {
- u_short port;
-
- p++;
- if (*p == '\0')
- fatal("%s line %d: bad inet addr:port usage.",
- filename, linenum);
- else {
- *(p-1) = '\0';
- if ((port = a2port(p)) == 0)
- fatal("%s line %d: bad port number.",
- filename, linenum);
- add_listen_addr(options, arg, port);
- }
- } else if (*p == '\0')
- add_listen_addr(options, arg, 0);
- else
- fatal("%s line %d: bad inet addr usage.",
- filename, linenum);
- break;
-
- case sHostKeyFile:
- intptr = &options->num_host_key_files;
- if (*intptr >= MAX_HOSTKEYS)
- fatal("%s line %d: too many host keys specified (max %d).",
- filename, linenum, MAX_HOSTKEYS);
- charptr = &options->host_key_files[*intptr];
-parse_filename:
- arg = strdelim(&cp);
- if (!arg || *arg == '\0')
- fatal("%s line %d: missing file name.",
- filename, linenum);
- if (*activep && *charptr == NULL) {
- *charptr = tilde_expand_filename(arg, getuid());
- /* increase optional counter */
- if (intptr != NULL)
- *intptr = *intptr + 1;
- }
- break;
-
- case sPidFile:
- charptr = &options->pid_file;
- goto parse_filename;
-
- case sPermitRootLogin:
- intptr = &options->permit_root_login;
- arg = strdelim(&cp);
- if (!arg || *arg == '\0')
- fatal("%s line %d: missing yes/"
- "without-password/forced-commands-only/no "
- "argument.", filename, linenum);
- value = 0; /* silence compiler */
- if (strcmp(arg, "without-password") == 0)
- value = PERMIT_NO_PASSWD;
- else if (strcmp(arg, "forced-commands-only") == 0)
- value = PERMIT_FORCED_ONLY;
- else if (strcmp(arg, "yes") == 0)
- value = PERMIT_YES;
- else if (strcmp(arg, "no") == 0)
- value = PERMIT_NO;
- else
- fatal("%s line %d: Bad yes/"
- "without-password/forced-commands-only/no "
- "argument: %s", filename, linenum, arg);
- if (*activep && *intptr == -1)
- *intptr = value;
- break;
-
- case sIgnoreRhosts:
- intptr = &options->ignore_rhosts;
-parse_flag:
- arg = strdelim(&cp);
- if (!arg || *arg == '\0')
- fatal("%s line %d: missing yes/no argument.",
- filename, linenum);
- value = 0; /* silence compiler */
- if (strcmp(arg, "yes") == 0)
- value = 1;
- else if (strcmp(arg, "no") == 0)
- value = 0;
- else
- fatal("%s line %d: Bad yes/no argument: %s",
- filename, linenum, arg);
- if (*activep && *intptr == -1)
- *intptr = value;
- break;
-
- case sIgnoreUserKnownHosts:
- intptr = &options->ignore_user_known_hosts;
- goto parse_flag;
-
- case sRhostsAuthentication:
- intptr = &options->rhosts_authentication;
- goto parse_flag;
-
- case sRhostsRSAAuthentication:
- intptr = &options->rhosts_rsa_authentication;
- goto parse_flag;
-
- case sHostbasedAuthentication:
- intptr = &options->hostbased_authentication;
- goto parse_flag;
-
- case sHostbasedUsesNameFromPacketOnly:
- intptr = &options->hostbased_uses_name_from_packet_only;
- goto parse_flag;
-
- case sRSAAuthentication:
- intptr = &options->rsa_authentication;
- goto parse_flag;
-
- case sPubkeyAuthentication:
- intptr = &options->pubkey_authentication;
- goto parse_flag;
-#ifdef GSSAPI
- case sGssAuthentication:
- intptr = &options->gss_authentication;
- goto parse_flag;
- case sGssKeyEx:
- intptr = &options->gss_keyex;
- goto parse_flag;
- case sGssStoreDelegCreds:
- intptr = &options->gss_keyex;
- goto parse_flag;
-#ifndef SUNW_GSSAPI
- case sGssUseSessionCredCache:
- intptr = &options->gss_use_session_ccache;
- goto parse_flag;
- case sGssCleanupCreds:
- intptr = &options->gss_cleanup_creds;
- goto parse_flag;
-#endif /* SUNW_GSSAPI */
-#endif /* GSSAPI */
-#if defined(KRB4) || defined(KRB5)
- case sKerberosAuthentication:
- intptr = &options->kerberos_authentication;
- goto parse_flag;
-
- case sKerberosOrLocalPasswd:
- intptr = &options->kerberos_or_local_passwd;
- goto parse_flag;
-
- case sKerberosTicketCleanup:
- intptr = &options->kerberos_ticket_cleanup;
- goto parse_flag;
-#endif
-#if defined(AFS) || defined(KRB5)
- case sKerberosTgtPassing:
- intptr = &options->kerberos_tgt_passing;
- goto parse_flag;
-#endif
-#ifdef AFS
- case sAFSTokenPassing:
- intptr = &options->afs_token_passing;
- goto parse_flag;
-#endif
-
- case sPasswordAuthentication:
- intptr = &options->password_authentication;
- goto parse_flag;
-
- case sKbdInteractiveAuthentication:
- intptr = &options->kbd_interactive_authentication;
- goto parse_flag;
-
- case sChallengeResponseAuthentication:
- intptr = &options->challenge_response_authentication;
- goto parse_flag;
-
- case sPrintMotd:
- intptr = &options->print_motd;
- goto parse_flag;
-
- case sPrintLastLog:
- intptr = &options->print_lastlog;
- goto parse_flag;
-
- case sX11Forwarding:
- intptr = &options->x11_forwarding;
- goto parse_flag;
-
- case sX11DisplayOffset:
- intptr = &options->x11_display_offset;
- goto parse_int;
-
- case sX11UseLocalhost:
- intptr = &options->x11_use_localhost;
- goto parse_flag;
-
- case sXAuthLocation:
- charptr = &options->xauth_location;
- goto parse_filename;
-
- case sStrictModes:
- intptr = &options->strict_modes;
- goto parse_flag;
-
- case sKeepAlives:
- intptr = &options->keepalives;
- goto parse_flag;
-
- case sEmptyPasswd:
- intptr = &options->permit_empty_passwd;
- goto parse_flag;
-
- case sPermitUserEnvironment:
- intptr = &options->permit_user_env;
- goto parse_flag;
-
- case sUseLogin:
- log("%s line %d: ignoring UseLogin option value."
- " This option is always off.", filename, linenum);
- while (arg)
- arg = strdelim(&cp);
- break;
-
- case sCompression:
- intptr = &options->compression;
- goto parse_flag;
-
- case sGatewayPorts:
- intptr = &options->gateway_ports;
- arg = strdelim(&cp);
- if (!arg || *arg == '\0')
- fatal("%s line %d: missing yes/no/clientspecified "
- "argument.", filename, linenum);
- value = 0; /* silence compiler */
- if (strcmp(arg, "clientspecified") == 0)
- value = 2;
- else if (strcmp(arg, "yes") == 0)
- value = 1;
- else if (strcmp(arg, "no") == 0)
- value = 0;
- else
- fatal("%s line %d: Bad yes/no/clientspecified "
- "argument: %s", filename, linenum, arg);
- if (*activep && *intptr == -1)
- *intptr = value;
- break;
-
- case sVerifyReverseMapping:
- intptr = &options->verify_reverse_mapping;
- goto parse_flag;
-
- case sLogFacility:
- intptr = (int *) &options->log_facility;
- arg = strdelim(&cp);
- value = log_facility_number(arg);
- if (value == SYSLOG_FACILITY_NOT_SET)
- fatal("%.200s line %d: unsupported log facility '%s'",
- filename, linenum, arg ? arg : "<NONE>");
- if (*intptr == -1)
- *intptr = (SyslogFacility) value;
- break;
-
- case sLogLevel:
- intptr = (int *) &options->log_level;
- arg = strdelim(&cp);
- value = log_level_number(arg);
- if (value == SYSLOG_LEVEL_NOT_SET)
- fatal("%.200s line %d: unsupported log level '%s'",
- filename, linenum, arg ? arg : "<NONE>");
- if (*intptr == -1)
- *intptr = (LogLevel) value;
- break;
-
- case sAllowTcpForwarding:
- intptr = &options->allow_tcp_forwarding;
- goto parse_flag;
-
- case sUsePrivilegeSeparation:
- log("%s line %d: ignoring UsePrivilegeSeparation option value."
- " This option is always on.", filename, linenum);
- while (arg)
- arg = strdelim(&cp);
- break;
-
- case sAllowUsers:
- while (((arg = strdelim(&cp)) != NULL) && *arg != '\0') {
- if (options->num_allow_users >= MAX_ALLOW_USERS)
- fatal("%s line %d: too many allow users.",
- filename, linenum);
- options->allow_users[options->num_allow_users++] =
- xstrdup(arg);
- }
- break;
-
- case sDenyUsers:
- while (((arg = strdelim(&cp)) != NULL) && *arg != '\0') {
- if (options->num_deny_users >= MAX_DENY_USERS)
- fatal( "%s line %d: too many deny users.",
- filename, linenum);
- options->deny_users[options->num_deny_users++] =
- xstrdup(arg);
- }
- break;
-
- case sAllowGroups:
- while (((arg = strdelim(&cp)) != NULL) && *arg != '\0') {
- if (options->num_allow_groups >= MAX_ALLOW_GROUPS)
- fatal("%s line %d: too many allow groups.",
- filename, linenum);
- options->allow_groups[options->num_allow_groups++] =
- xstrdup(arg);
- }
- break;
-
- case sDenyGroups:
- while (((arg = strdelim(&cp)) != NULL) && *arg != '\0') {
- if (options->num_deny_groups >= MAX_DENY_GROUPS)
- fatal("%s line %d: too many deny groups.",
- filename, linenum);
- options->deny_groups[options->num_deny_groups++] = xstrdup(arg);
- }
- break;
-
- case sCiphers:
- arg = strdelim(&cp);
- if (!arg || *arg == '\0')
- fatal("%s line %d: Missing argument.", filename, linenum);
- if (!ciphers_valid(arg))
- fatal("%s line %d: Bad SSH2 cipher spec '%s'.",
- filename, linenum, arg ? arg : "<NONE>");
- if (options->ciphers == NULL)
- options->ciphers = xstrdup(arg);
- break;
-
- case sMacs:
- arg = strdelim(&cp);
- if (!arg || *arg == '\0')
- fatal("%s line %d: Missing argument.", filename, linenum);
- if (!mac_valid(arg))
- fatal("%s line %d: Bad SSH2 mac spec '%s'.",
- filename, linenum, arg ? arg : "<NONE>");
- if (options->macs == NULL)
- options->macs = xstrdup(arg);
- break;
-
- case sProtocol:
- intptr = &options->protocol;
- arg = strdelim(&cp);
- if (!arg || *arg == '\0')
- fatal("%s line %d: Missing argument.", filename, linenum);
- value = proto_spec(arg);
- if (value == SSH_PROTO_UNKNOWN)
- fatal("%s line %d: Bad protocol spec '%s'.",
- filename, linenum, arg ? arg : "<NONE>");
- if (*intptr == SSH_PROTO_UNKNOWN)
- *intptr = value;
- break;
-
- case sSubsystem:
- if (options->num_subsystems >= MAX_SUBSYSTEMS) {
- fatal("%s line %d: too many subsystems defined.",
- filename, linenum);
- }
- arg = strdelim(&cp);
- if (!arg || *arg == '\0')
- fatal("%s line %d: Missing subsystem name.",
- filename, linenum);
- if (!*activep) {
- arg = strdelim(&cp);
- break;
- }
- for (i = 0; i < options->num_subsystems; i++)
- if (strcmp(arg, options->subsystem_name[i]) == 0)
- fatal("%s line %d: Subsystem '%s' already defined.",
- filename, linenum, arg);
- options->subsystem_name[options->num_subsystems] = xstrdup(arg);
- arg = strdelim(&cp);
- if (!arg || *arg == '\0')
- fatal("%s line %d: Missing subsystem command.",
- filename, linenum);
- options->subsystem_command[options->num_subsystems] = xstrdup(arg);
-
- /*
- * Collect arguments (separate to executable), including the
- * name of the executable, in a way that is easier to parse
- * later.
- */
- p = xstrdup(arg);
- len = strlen(p) + 1;
- while ((arg = strdelim(&cp)) != NULL && *arg != '\0') {
- len += 1 + strlen(arg);
- p = xrealloc(p, len);
- strlcat(p, " ", len);
- strlcat(p, arg, len);
- }
- options->subsystem_args[options->num_subsystems] = p;
- options->num_subsystems++;
- break;
-
- case sMaxStartups:
- arg = strdelim(&cp);
- if (!arg || *arg == '\0')
- fatal("%s line %d: Missing MaxStartups spec.",
- filename, linenum);
- if ((n = sscanf(arg, "%d:%d:%d",
- &options->max_startups_begin,
- &options->max_startups_rate,
- &options->max_startups)) == 3) {
- if (options->max_startups_begin >
- options->max_startups ||
- options->max_startups_rate > 100 ||
- options->max_startups_rate < 1)
- fatal("%s line %d: Illegal MaxStartups spec.",
- filename, linenum);
- } else if (n != 1)
- fatal("%s line %d: Illegal MaxStartups spec.",
- filename, linenum);
- else
- options->max_startups = options->max_startups_begin;
- break;
-
- case sBanner:
- charptr = &options->banner;
- goto parse_filename;
- /*
- * These options can contain %X options expanded at
- * connect time, so that you can specify paths like:
- *
- * AuthorizedKeysFile /etc/ssh_keys/%u
- */
- case sAuthorizedKeysFile:
- case sAuthorizedKeysFile2:
- charptr = (opcode == sAuthorizedKeysFile) ?
- &options->authorized_keys_file :
- &options->authorized_keys_file2;
- goto parse_filename;
-
- case sClientAliveInterval:
- intptr = &options->client_alive_interval;
- goto parse_time;
-
- case sClientAliveCountMax:
- intptr = &options->client_alive_count_max;
- goto parse_int;
-
- case sMaxAuthTries:
- intptr = &options->max_auth_tries;
- goto parse_int;
-
- case sMaxAuthTriesLog:
- intptr = &options->max_auth_tries_log;
- goto parse_int;
-
- case sLookupClientHostnames:
- intptr = &options->lookup_client_hostnames;
- goto parse_flag;
-
- case sUseOpenSSLEngine:
- intptr = &options->use_openssl_engine;
- goto parse_flag;
-
- case sChrootDirectory:
- charptr = &options->chroot_directory;
-
- arg = strdelim(&cp);
- if (arg == NULL || *arg == '\0')
- fatal("%s line %d: missing directory name for "
- "ChrootDirectory.", filename, linenum);
- if (*activep && *charptr == NULL)
- *charptr = xstrdup(arg);
- break;
-
- case sPreUserauthHook:
- charptr = &options->pre_userauth_hook;
- goto parse_filename;
-
- case sMatch:
- if (cmdline)
- fatal("Match directive not supported as a command-line "
- "option");
- value = match_cfg_line(&cp, linenum, user, host, address);
- if (value < 0)
- fatal("%s line %d: Bad Match condition", filename,
- linenum);
- *activep = value;
- break;
-
- case sDeprecated:
- log("%s line %d: Deprecated option %s",
- filename, linenum, arg);
- while (arg)
- arg = strdelim(&cp);
- break;
-
- case sPAMServicePrefix:
- arg = strdelim(&cp);
- if (!arg || *arg == '\0')
- fatal("%s line %d: Missing argument.",
- filename, linenum);
- if (options->pam_service_name != NULL)
- fatal("%s line %d: PAMServiceName and PAMServicePrefix "
- "are mutually exclusive.", filename, linenum);
- if (options->pam_service_prefix == NULL)
- options->pam_service_prefix = xstrdup(arg);
- break;
-
- case sPAMServiceName:
- arg = strdelim(&cp);
- if (!arg || *arg == '\0')
- fatal("%s line %d: Missing argument.",
- filename, linenum);
- if (options->pam_service_prefix != NULL)
- fatal("%s line %d: PAMServiceName and PAMServicePrefix "
- "are mutually exclusive.", filename, linenum);
- if (options->pam_service_name == NULL)
- options->pam_service_name = xstrdup(arg);
- break;
-
- default:
- fatal("%s line %d: Missing handler for opcode %s (%d)",
- filename, linenum, arg, opcode);
- }
- if ((arg = strdelim(&cp)) != NULL && *arg != '\0')
- fatal("%s line %d: garbage at end of line; \"%.200s\".",
- filename, linenum, arg);
- return 0;
-}
-
-
-/* Reads the server configuration file. */
-
-void
-load_server_config(const char *filename, Buffer *conf)
-{
- char line[1024], *cp;
- FILE *f;
-
- debug2("%s: filename %s", __func__, filename);
- if ((f = fopen(filename, "r")) == NULL) {
- perror(filename);
- exit(1);
- }
- buffer_clear(conf);
- while (fgets(line, sizeof(line), f)) {
- /*
- * Trim out comments and strip whitespace
- * NB - preserve newlines, they are needed to reproduce
- * line numbers later for error messages
- */
- if ((cp = strchr(line, '#')) != NULL)
- memcpy(cp, "\n", 2);
- cp = line + strspn(line, " \t\r");
-
- buffer_append(conf, cp, strlen(cp));
- }
- buffer_append(conf, "\0", 1);
- fclose(f);
- debug2("%s: done config len = %d", __func__, buffer_len(conf));
-}
-
-void
-parse_server_match_config(ServerOptions *options, const char *user,
- const char *host, const char *address)
-{
- ServerOptions mo;
-
- initialize_server_options(&mo);
- parse_server_config(&mo, "reprocess config", &cfg, user, host, address);
- copy_set_server_options(options, &mo, 0);
-}
-
-
-
-/* Helper macros */
-#define M_CP_INTOPT(n) do {\
- if (src->n != -1) \
- dst->n = src->n; \
-} while (0)
-#define M_CP_STROPT(n) do {\
- if (src->n != NULL) { \
- if (dst->n != NULL) \
- xfree(dst->n); \
- dst->n = src->n; \
- } \
-} while(0)
-
-/*
- * Copy any supported values that are set.
- *
- * If the preauth flag is set, we do not bother copying the the string or
- * array values that are not used pre-authentication, because any that we
- * do use must be explictly sent in mm_getpwnamallow().
- */
-void
-copy_set_server_options(ServerOptions *dst, ServerOptions *src, int preauth)
-{
- M_CP_INTOPT(password_authentication);
- M_CP_INTOPT(gss_authentication);
- M_CP_INTOPT(rsa_authentication);
- M_CP_INTOPT(pubkey_authentication);
- M_CP_INTOPT(hostbased_authentication);
- M_CP_INTOPT(kbd_interactive_authentication);
- M_CP_INTOPT(permit_root_login);
- M_CP_INTOPT(permit_empty_passwd);
- M_CP_INTOPT(allow_tcp_forwarding);
- M_CP_INTOPT(gateway_ports);
- M_CP_INTOPT(x11_display_offset);
- M_CP_INTOPT(x11_forwarding);
- M_CP_INTOPT(x11_use_localhost);
- M_CP_INTOPT(max_auth_tries);
- M_CP_STROPT(banner);
-
- if (preauth)
- return;
- M_CP_STROPT(chroot_directory);
-}
-
-#undef M_CP_INTOPT
-#undef M_CP_STROPT
-
-void
-parse_server_config(ServerOptions *options, const char *filename, Buffer *conf,
- const char *user, const char *host, const char *address)
-{
- int active, linenum, bad_options = 0;
- char *cp, *obuf, *cbuf;
-
- debug2("%s: config %s len %d", __func__, filename, buffer_len(conf));
-
- obuf = cbuf = xstrdup(buffer_ptr(conf));
- active = user ? 0 : 1;
- linenum = 1;
- while ((cp = strsep(&cbuf, "\n")) != NULL) {
- if (process_server_config_line(options, cp, filename,
- linenum++, &active, user, host, address) != 0)
- bad_options++;
- }
- xfree(obuf);
- if (bad_options > 0)
- fatal("%s: terminating, %d bad configuration options",
- filename, bad_options);
-}
-
-
-/*
- * Note that "none" is a special path having the same affect on sshd
- * configuration as not specifying ChrootDirectory at all.
- */
-int
-chroot_requested(char *chroot_directory)
-{
- return (chroot_directory != NULL &&
- strcasecmp(chroot_directory, "none") != 0);
-}
diff --git a/usr/src/cmd/ssh/sshd/serverloop.c b/usr/src/cmd/ssh/sshd/serverloop.c
deleted file mode 100644
index 3dae9b1840..0000000000
--- a/usr/src/cmd/ssh/sshd/serverloop.c
+++ /dev/null
@@ -1,1310 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Server main loop for handling the interactive session.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- *
- * SSH2 support by Markus Friedl.
- * Copyright (c) 2000, 2001 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: serverloop.c,v 1.104 2002/09/19 16:03:15 stevesk Exp $");
-
-#include "xmalloc.h"
-#include "packet.h"
-#include "buffer.h"
-#include "log.h"
-#include "servconf.h"
-#include "canohost.h"
-#include "sshpty.h"
-#include "channels.h"
-#include "compat.h"
-#include "ssh1.h"
-#include "ssh2.h"
-#include "auth.h"
-#include "session.h"
-#include "dispatch.h"
-#include "auth-options.h"
-#include "serverloop.h"
-#include "misc.h"
-#include "kex.h"
-
-#ifdef ALTPRIVSEP
-#include "altprivsep.h"
-#endif /* ALTPRIVSEP*/
-
-extern ServerOptions options;
-
-/* XXX */
-extern Kex *xxx_kex;
-static Authctxt *xxx_authctxt;
-
-static Buffer stdin_buffer; /* Buffer for stdin data. */
-static Buffer stdout_buffer; /* Buffer for stdout data. */
-static Buffer stderr_buffer; /* Buffer for stderr data. */
-static int fdin; /* Descriptor for stdin (for writing) */
-static int fdout; /* Descriptor for stdout (for reading);
- May be same number as fdin. */
-static int fderr; /* Descriptor for stderr. May be -1. */
-static long stdin_bytes = 0; /* Number of bytes written to stdin. */
-static long stdout_bytes = 0; /* Number of stdout bytes sent to client. */
-static long stderr_bytes = 0; /* Number of stderr bytes sent to client. */
-static long fdout_bytes = 0; /* Number of stdout bytes read from program. */
-static int stdin_eof = 0; /* EOF message received from client. */
-static int fdout_eof = 0; /* EOF encountered reading from fdout. */
-static int fderr_eof = 0; /* EOF encountered readung from fderr. */
-static int fdin_is_tty = 0; /* fdin points to a tty. */
-static int connection_in; /* Connection to client (input). */
-static int connection_out; /* Connection to client (output). */
-static int connection_closed = 0; /* Connection to client closed. */
-static u_int buffer_high; /* "Soft" max buffer size. */
-static int client_alive_timeouts = 0;
-
-/*
- * This SIGCHLD kludge is used to detect when the child exits. The server
- * will exit after that, as soon as forwarded connections have terminated.
- */
-
-static volatile sig_atomic_t child_terminated = 0; /* The child has terminated. */
-
-/* prototypes */
-static void server_init_dispatch(void);
-
-/*
- * we write to this pipe if a SIGCHLD is caught in order to avoid
- * the race between select() and child_terminated
- */
-static int notify_pipe[2];
-static void
-notify_setup(void)
-{
- if (pipe(notify_pipe) < 0) {
- error("pipe(notify_pipe) failed %s", strerror(errno));
- } else if ((fcntl(notify_pipe[0], F_SETFD, FD_CLOEXEC) == -1) ||
- (fcntl(notify_pipe[1], F_SETFD, FD_CLOEXEC) == -1)) {
- error("fcntl(notify_pipe, F_SETFD) failed %s", strerror(errno));
- (void) close(notify_pipe[0]);
- (void) close(notify_pipe[1]);
- } else {
- set_nonblock(notify_pipe[0]);
- set_nonblock(notify_pipe[1]);
- return;
- }
- notify_pipe[0] = -1; /* read end */
- notify_pipe[1] = -1; /* write end */
-}
-static void
-notify_parent(void)
-{
- if (notify_pipe[1] != -1)
- (void) write(notify_pipe[1], "", 1);
-}
-static void
-notify_prepare(fd_set *readset)
-{
- if (notify_pipe[0] != -1)
- FD_SET(notify_pipe[0], readset);
-}
-static void
-notify_done(fd_set *readset)
-{
- char c;
-
- if (notify_pipe[0] != -1 && FD_ISSET(notify_pipe[0], readset))
- while (read(notify_pipe[0], &c, 1) != -1)
- debug2("notify_done: reading");
-}
-
-static void
-sigchld_handler(int sig)
-{
- int save_errno = errno;
- debug("Received SIGCHLD.");
- child_terminated = 1;
-#ifndef _UNICOS
- mysignal(SIGCHLD, sigchld_handler);
-#endif
- notify_parent();
- errno = save_errno;
-}
-
-/*
- * Make packets from buffered stderr data, and buffer it for sending
- * to the client.
- */
-static void
-make_packets_from_stderr_data(void)
-{
- int len;
-
- /* Send buffered stderr data to the client. */
- while (buffer_len(&stderr_buffer) > 0 &&
- packet_not_very_much_data_to_write()) {
- len = buffer_len(&stderr_buffer);
- if (packet_is_interactive()) {
- if (len > 512)
- len = 512;
- } else {
- /* Keep the packets at reasonable size. */
- if (len > packet_get_maxsize())
- len = packet_get_maxsize();
- }
- packet_start(SSH_SMSG_STDERR_DATA);
- packet_put_string(buffer_ptr(&stderr_buffer), len);
- packet_send();
- buffer_consume(&stderr_buffer, len);
- stderr_bytes += len;
- }
-}
-
-/*
- * Make packets from buffered stdout data, and buffer it for sending to the
- * client.
- */
-static void
-make_packets_from_stdout_data(void)
-{
- int len;
-
- /* Send buffered stdout data to the client. */
- while (buffer_len(&stdout_buffer) > 0 &&
- packet_not_very_much_data_to_write()) {
- len = buffer_len(&stdout_buffer);
- if (packet_is_interactive()) {
- if (len > 512)
- len = 512;
- } else {
- /* Keep the packets at reasonable size. */
- if (len > packet_get_maxsize())
- len = packet_get_maxsize();
- }
- packet_start(SSH_SMSG_STDOUT_DATA);
- packet_put_string(buffer_ptr(&stdout_buffer), len);
- packet_send();
- buffer_consume(&stdout_buffer, len);
- stdout_bytes += len;
- }
-}
-
-static void
-client_alive_check(void)
-{
- static int had_channel = 0;
- int id;
-
- id = channel_find_open();
- if (id == -1) {
- if (!had_channel)
- return;
- packet_disconnect("No open channels after timeout!");
- }
- had_channel = 1;
-
- /* timeout, check to see how many we have had */
- if (++client_alive_timeouts > options.client_alive_count_max)
- packet_disconnect("Timeout, your session not responding.");
-
- /*
- * send a bogus channel request with "wantreply",
- * we should get back a failure
- */
- channel_request_start(id, "keepalive@openssh.com", 1);
- packet_send();
-}
-
-/*
- * Sleep in select() until we can do something. This will initialize the
- * select masks. Upon return, the masks will indicate which descriptors
- * have data or can accept data. Optionally, a maximum time can be specified
- * for the duration of the wait (0 = infinite).
- */
-static void
-wait_until_can_do_something(fd_set **readsetp, fd_set **writesetp, int *maxfdp,
- int *nallocp, u_int max_time_milliseconds)
-{
- struct timeval tv, *tvp;
- int ret;
- int client_alive_scheduled = 0;
-
- /*
- * if using client_alive, set the max timeout accordingly,
- * and indicate that this particular timeout was for client
- * alive by setting the client_alive_scheduled flag.
- *
- * this could be randomized somewhat to make traffic
- * analysis more difficult, but we're not doing it yet.
- */
- if (compat20 &&
- max_time_milliseconds == 0 && options.client_alive_interval) {
- client_alive_scheduled = 1;
- max_time_milliseconds = options.client_alive_interval * 1000;
- }
-
- /* Allocate and update select() masks for channel descriptors. */
- channel_prepare_select(readsetp, writesetp, maxfdp, nallocp, 0);
-
- if (compat20) {
-#ifdef ALTPRIVSEP
- int pipe_fd;
-
- if ((pipe_fd = altprivsep_get_pipe_fd()) != -1) {
- *maxfdp = MAX(*maxfdp, pipe_fd);
- FD_SET(altprivsep_get_pipe_fd(), *readsetp);
- }
-#endif /* ALTPRIVSEP */
-#if 0
- /* wrong: bad condition XXX */
- if (channel_not_very_much_buffered_data())
-#endif
- FD_SET(connection_in, *readsetp);
- } else {
- /*
- * Read packets from the client unless we have too much
- * buffered stdin or channel data.
- */
- if (buffer_len(&stdin_buffer) < buffer_high &&
- channel_not_very_much_buffered_data())
- FD_SET(connection_in, *readsetp);
- /*
- * If there is not too much data already buffered going to
- * the client, try to get some more data from the program.
- */
- if (packet_not_very_much_data_to_write()) {
- if (!fdout_eof)
- FD_SET(fdout, *readsetp);
- if (!fderr_eof)
- FD_SET(fderr, *readsetp);
- }
- /*
- * If we have buffered data, try to write some of that data
- * to the program.
- */
- if (fdin != -1 && buffer_len(&stdin_buffer) > 0)
- FD_SET(fdin, *writesetp);
- }
- notify_prepare(*readsetp);
-
- /*
- * If we have buffered packet data going to the client, mark that
- * descriptor.
- */
- if (packet_have_data_to_write())
- FD_SET(connection_out, *writesetp);
-
- /*
- * If child has terminated and there is enough buffer space to read
- * from it, then read as much as is available and exit.
- */
- if (child_terminated && packet_not_very_much_data_to_write())
- if (max_time_milliseconds == 0 || client_alive_scheduled)
- max_time_milliseconds = 100;
-
- if (max_time_milliseconds == 0)
- tvp = NULL;
- else {
- tv.tv_sec = max_time_milliseconds / 1000;
- tv.tv_usec = 1000 * (max_time_milliseconds % 1000);
- tvp = &tv;
- }
-
- /* Wait for something to happen, or the timeout to expire. */
- ret = select((*maxfdp)+1, *readsetp, *writesetp, NULL, tvp);
-
- if (ret == -1) {
- memset(*readsetp, 0, *nallocp);
- memset(*writesetp, 0, *nallocp);
- if (errno != EINTR)
- error("select: %.100s", strerror(errno));
- } else if (ret == 0 && client_alive_scheduled)
- client_alive_check();
-
- notify_done(*readsetp);
-}
-
-/*
- * Processes input from the client and the program. Input data is stored
- * in buffers and processed later.
- */
-static void
-process_input(fd_set * readset)
-{
- int len;
- char buf[16384];
-
- /* Read and buffer any input data from the client. */
- if (FD_ISSET(connection_in, readset)) {
- len = read(connection_in, buf, sizeof(buf));
- if (len == 0) {
- if (packet_is_monitor()) {
- debug("child closed the communication pipe");
- } else {
- verbose("Connection closed by %.100s",
- get_remote_ipaddr());
- }
- connection_closed = 1;
- if (compat20)
- return;
- fatal_cleanup();
- } else if (len < 0) {
- if (errno != EINTR && errno != EAGAIN) {
- verbose("Read error from remote host "
- "%.100s: %.100s",
- get_remote_ipaddr(), strerror(errno));
- fatal_cleanup();
- }
- } else {
- /* Buffer any received data. */
- packet_process_incoming(buf, len);
- }
- }
- if (compat20)
- return;
-
- /* Read and buffer any available stdout data from the program. */
- if (!fdout_eof && FD_ISSET(fdout, readset)) {
- len = read(fdout, buf, sizeof(buf));
- if (len < 0 && (errno == EINTR || errno == EAGAIN)) {
- /* EMPTY */
- } else if (len <= 0) {
- fdout_eof = 1;
- } else {
- buffer_append(&stdout_buffer, buf, len);
- fdout_bytes += len;
- }
- }
- /* Read and buffer any available stderr data from the program. */
- if (!fderr_eof && FD_ISSET(fderr, readset)) {
- len = read(fderr, buf, sizeof(buf));
- if (len < 0 && (errno == EINTR || errno == EAGAIN)) {
- /* EMPTY */
- } else if (len <= 0) {
- fderr_eof = 1;
- } else {
- buffer_append(&stderr_buffer, buf, len);
- }
- }
-}
-
-/*
- * Sends data from internal buffers to client program stdin.
- */
-static void
-process_output(fd_set * writeset)
-{
- struct termios tio;
- u_char *data;
- u_int dlen;
- int len;
-
- /* Write buffered data to program stdin. */
- if (!compat20 && fdin != -1 && FD_ISSET(fdin, writeset)) {
- data = buffer_ptr(&stdin_buffer);
- dlen = buffer_len(&stdin_buffer);
- len = write(fdin, data, dlen);
- if (len < 0 && (errno == EINTR || errno == EAGAIN)) {
- /* EMPTY */
- } else if (len <= 0) {
- if (fdin != fdout)
- (void) close(fdin);
- else
- (void) shutdown(fdin, SHUT_WR); /* We will no longer send. */
- fdin = -1;
- } else {
- /* Successful write. */
- if (fdin_is_tty && dlen >= 1 && data[0] != '\r' &&
- tcgetattr(fdin, &tio) == 0 &&
- !(tio.c_lflag & ECHO) && (tio.c_lflag & ICANON)) {
- /*
- * Simulate echo to reduce the impact of
- * traffic analysis
- */
- packet_send_ignore(len);
- packet_send();
- }
- /* Consume the data from the buffer. */
- buffer_consume(&stdin_buffer, len);
- /* Update the count of bytes written to the program. */
- stdin_bytes += len;
- }
- }
- /* Send any buffered packet data to the client. */
- if (FD_ISSET(connection_out, writeset))
- packet_write_poll();
-}
-
-/*
- * Wait until all buffered output has been sent to the client.
- * This is used when the program terminates.
- */
-static void
-drain_output(void)
-{
- /* Send any buffered stdout data to the client. */
- if (buffer_len(&stdout_buffer) > 0) {
- packet_start(SSH_SMSG_STDOUT_DATA);
- packet_put_string(buffer_ptr(&stdout_buffer),
- buffer_len(&stdout_buffer));
- packet_send();
- /* Update the count of sent bytes. */
- stdout_bytes += buffer_len(&stdout_buffer);
- }
- /* Send any buffered stderr data to the client. */
- if (buffer_len(&stderr_buffer) > 0) {
- packet_start(SSH_SMSG_STDERR_DATA);
- packet_put_string(buffer_ptr(&stderr_buffer),
- buffer_len(&stderr_buffer));
- packet_send();
- /* Update the count of sent bytes. */
- stderr_bytes += buffer_len(&stderr_buffer);
- }
- /* Wait until all buffered data has been written to the client. */
- packet_write_wait();
-}
-
-static void
-process_buffered_input_packets(void)
-{
- dispatch_run(DISPATCH_NONBLOCK, NULL, compat20 ? xxx_kex : NULL);
-}
-
-/*
- * Performs the interactive session. This handles data transmission between
- * the client and the program. Note that the notion of stdin, stdout, and
- * stderr in this function is sort of reversed: this function writes to
- * stdin (of the child program), and reads from stdout and stderr (of the
- * child program).
- */
-void
-server_loop(pid_t pid, int fdin_arg, int fdout_arg, int fderr_arg)
-{
- fd_set *readset = NULL, *writeset = NULL;
- int max_fd = 0, nalloc = 0;
- int wait_status; /* Status returned by wait(). */
- pid_t wait_pid; /* pid returned by wait(). */
- int waiting_termination = 0; /* Have displayed waiting close message. */
- u_int max_time_milliseconds;
- u_int previous_stdout_buffer_bytes;
- u_int stdout_buffer_bytes;
- int type;
-
- debug("Entering interactive session.");
-
- /* Initialize the SIGCHLD kludge. */
- child_terminated = 0;
- mysignal(SIGCHLD, sigchld_handler);
-
- /* Initialize our global variables. */
- fdin = fdin_arg;
- fdout = fdout_arg;
- fderr = fderr_arg;
-
- /* nonblocking IO */
- set_nonblock(fdin);
- set_nonblock(fdout);
- /* we don't have stderr for interactive terminal sessions, see below */
- if (fderr != -1)
- set_nonblock(fderr);
-
- if (!(datafellows & SSH_BUG_IGNOREMSG) && isatty(fdin))
- fdin_is_tty = 1;
-
- connection_in = packet_get_connection_in();
- connection_out = packet_get_connection_out();
-
- notify_setup();
-
- previous_stdout_buffer_bytes = 0;
-
- /* Set approximate I/O buffer size. */
- if (packet_is_interactive())
- buffer_high = 4096;
- else
- buffer_high = 64 * 1024;
-
-#if 0
- /* Initialize max_fd to the maximum of the known file descriptors. */
- max_fd = MAX(connection_in, connection_out);
- max_fd = MAX(max_fd, fdin);
- max_fd = MAX(max_fd, fdout);
- if (fderr != -1)
- max_fd = MAX(max_fd, fderr);
-#endif
-
- /* Initialize Initialize buffers. */
- buffer_init(&stdin_buffer);
- buffer_init(&stdout_buffer);
- buffer_init(&stderr_buffer);
-
- /*
- * If we have no separate fderr (which is the case when we have a pty
- * - there we cannot make difference between data sent to stdout and
- * stderr), indicate that we have seen an EOF from stderr. This way
- * we don\'t need to check the descriptor everywhere.
- */
- if (fderr == -1)
- fderr_eof = 1;
-
- server_init_dispatch();
-
- /* Main loop of the server for the interactive session mode. */
- for (;;) {
-
- /* Process buffered packets from the client. */
- process_buffered_input_packets();
-
- /*
- * If we have received eof, and there is no more pending
- * input data, cause a real eof by closing fdin.
- */
- if (stdin_eof && fdin != -1 && buffer_len(&stdin_buffer) == 0) {
- if (fdin != fdout)
- (void) close(fdin);
- else
- (void) shutdown(fdin, SHUT_WR); /* We will no longer send. */
- fdin = -1;
- }
- /* Make packets from buffered stderr data to send to the client. */
- make_packets_from_stderr_data();
-
- /*
- * Make packets from buffered stdout data to send to the
- * client. If there is very little to send, this arranges to
- * not send them now, but to wait a short while to see if we
- * are getting more data. This is necessary, as some systems
- * wake up readers from a pty after each separate character.
- */
- max_time_milliseconds = 0;
- stdout_buffer_bytes = buffer_len(&stdout_buffer);
- if (stdout_buffer_bytes != 0 && stdout_buffer_bytes < 256 &&
- stdout_buffer_bytes != previous_stdout_buffer_bytes) {
- /* try again after a while */
- max_time_milliseconds = 10;
- } else {
- /* Send it now. */
- make_packets_from_stdout_data();
- }
- previous_stdout_buffer_bytes = buffer_len(&stdout_buffer);
-
- /* Send channel data to the client. */
- if (packet_not_very_much_data_to_write())
- channel_output_poll();
-
- /*
- * Bail out of the loop if the program has closed its output
- * descriptors, and we have no more data to send to the
- * client, and there is no pending buffered data.
- */
- if (fdout_eof && fderr_eof && !packet_have_data_to_write() &&
- buffer_len(&stdout_buffer) == 0 && buffer_len(&stderr_buffer) == 0) {
- if (!channel_still_open())
- break;
- if (!waiting_termination) {
- const char *s = "Waiting for forwarded connections to terminate...\r\n";
- char *cp;
- waiting_termination = 1;
- buffer_append(&stderr_buffer, s, strlen(s));
-
- /* Display list of open channels. */
- cp = channel_open_message();
- buffer_append(&stderr_buffer, cp, strlen(cp));
- xfree(cp);
- }
- }
- max_fd = MAX(connection_in, connection_out);
- max_fd = MAX(max_fd, fdin);
- max_fd = MAX(max_fd, fdout);
- max_fd = MAX(max_fd, fderr);
- max_fd = MAX(max_fd, notify_pipe[0]);
-
- /* Sleep in select() until we can do something. */
- wait_until_can_do_something(&readset, &writeset, &max_fd,
- &nalloc, max_time_milliseconds);
-
- /* Process any channel events. */
- channel_after_select(readset, writeset);
-
- /* Process input from the client and from program stdout/stderr. */
- process_input(readset);
-
- /* Process output to the client and to program stdin. */
- process_output(writeset);
- }
- if (readset)
- xfree(readset);
- if (writeset)
- xfree(writeset);
-
- /* Cleanup and termination code. */
-
- /* Wait until all output has been sent to the client. */
- drain_output();
-
- debug("End of interactive session; stdin %ld, stdout (read %ld, sent %ld), stderr %ld bytes.",
- stdin_bytes, fdout_bytes, stdout_bytes, stderr_bytes);
-
- /* Free and clear the buffers. */
- buffer_free(&stdin_buffer);
- buffer_free(&stdout_buffer);
- buffer_free(&stderr_buffer);
-
- /* Close the file descriptors. */
- if (fdout != -1)
- (void) close(fdout);
- fdout = -1;
- fdout_eof = 1;
- if (fderr != -1)
- (void) close(fderr);
- fderr = -1;
- fderr_eof = 1;
- if (fdin != -1)
- (void) close(fdin);
- fdin = -1;
-
- channel_free_all();
-
- /* We no longer want our SIGCHLD handler to be called. */
- mysignal(SIGCHLD, SIG_DFL);
-
- while ((wait_pid = waitpid(-1, &wait_status, 0)) < 0)
- if (errno != EINTR)
- packet_disconnect("wait: %.100s", strerror(errno));
- if (wait_pid != pid)
- error("Strange, wait returned pid %ld, expected %ld",
- (long)wait_pid, (long)pid);
-
- /* Check if it exited normally. */
- if (WIFEXITED(wait_status)) {
- /* Yes, normal exit. Get exit status and send it to the client. */
- debug("Command exited with status %d.", WEXITSTATUS(wait_status));
- packet_start(SSH_SMSG_EXITSTATUS);
- packet_put_int(WEXITSTATUS(wait_status));
- packet_send();
- packet_write_wait();
-
- /*
- * Wait for exit confirmation. Note that there might be
- * other packets coming before it; however, the program has
- * already died so we just ignore them. The client is
- * supposed to respond with the confirmation when it receives
- * the exit status.
- */
- do {
- type = packet_read();
- }
- while (type != SSH_CMSG_EXIT_CONFIRMATION);
-
- debug("Received exit confirmation.");
- return;
- }
- /* Check if the program terminated due to a signal. */
- if (WIFSIGNALED(wait_status))
- packet_disconnect("Command terminated on signal %d.",
- WTERMSIG(wait_status));
-
- /* Some weird exit cause. Just exit. */
- packet_disconnect("wait returned status %04x.", wait_status);
- /* NOTREACHED */
-}
-
-static void
-collect_children(void)
-{
- pid_t pid;
- sigset_t oset, nset;
- int status;
-
- /* block SIGCHLD while we check for dead children */
- (void) sigemptyset(&nset);
- (void) sigaddset(&nset, SIGCHLD);
- (void) sigprocmask(SIG_BLOCK, &nset, &oset);
- if (child_terminated) {
- while ((pid = waitpid(-1, &status, WNOHANG)) > 0 ||
- (pid < 0 && errno == EINTR))
- if (pid > 0)
- session_close_by_pid(pid, status);
- child_terminated = 0;
- }
- (void) sigprocmask(SIG_SETMASK, &oset, NULL);
-}
-
-#ifdef ALTPRIVSEP
-/*
- * For ALTPRIVSEP the wait_until_can_do_something function is very
- * simple: select() on the read side of the pipe, and if there's packets
- * to send, on the write side, and on the read side of the SIGCHLD
- * handler pipe. That's it.
- */
-static void
-aps_wait_until_can_do_something(fd_set **readsetp, fd_set **writesetp,
- int *maxfdp, int *nallocp, u_int max_time_milliseconds)
-{
- int ret;
-
- /*
- * Use channel_prepare_select() to make the fd sets.
- *
- * This is cheating, really, since because the last argument in
- * this call is '1' nothing related to channels will be done --
- * we're using this function only to callocate the fd sets.
- */
- channel_prepare_select(readsetp, writesetp, maxfdp, nallocp, 1);
-
- if ((connection_in = packet_get_connection_in()) >= 0 &&
- !connection_closed)
- FD_SET(connection_in, *readsetp);
-
- notify_prepare(*readsetp);
-
- if ((connection_out = packet_get_connection_out()) >= 0 &&
- packet_have_data_to_write() && !connection_closed)
- FD_SET(connection_out, *writesetp);
-
- /* Wait for something to happen, or the timeout to expire. */
- ret = select((*maxfdp)+1, *readsetp, *writesetp, NULL, NULL);
-
- if (ret == -1) {
- memset(*readsetp, 0, *nallocp);
- memset(*writesetp, 0, *nallocp);
- if (errno != EINTR)
- error("select: %.100s", strerror(errno));
- }
-
- notify_done(*readsetp);
-}
-
-/*
- * Slightly different than collect_children, aps_collect_child() has
- * only the unprivileged sshd to wait for, no sessions, no channells,
- * just one process.
- */
-static int
-aps_collect_child(pid_t child)
-{
- pid_t pid;
- sigset_t oset, nset;
- int status;
-
- /* block SIGCHLD while we check for dead children */
- (void) sigemptyset(&nset);
- (void) sigaddset(&nset, SIGCHLD);
- (void) sigprocmask(SIG_BLOCK, &nset, &oset);
- if (child_terminated) {
- while ((pid = waitpid(child, &status, WNOHANG)) > 0 ||
- (pid < 0 && errno == EINTR))
- if (pid == child) {
- (void) sigprocmask(SIG_SETMASK, &oset, NULL);
- return (1);
- }
- child_terminated = 0;
- }
- (void) sigprocmask(SIG_SETMASK, &oset, NULL);
- return (0);
-}
-
-static int killed = 0;
-
-static void
-aps_monitor_kill_handler(int sig)
-{
- int save_errno = errno;
- killed = 1;
- notify_parent();
- mysignal(sig, aps_monitor_kill_handler);
- errno = save_errno;
-}
-
-static void
-aps_monitor_sigchld_handler(int sig)
-{
- int save_errno = errno;
- debug("Monitor received SIGCHLD.");
- child_terminated = 1;
- mysignal(SIGCHLD, aps_monitor_sigchld_handler);
- notify_parent();
- errno = save_errno;
-}
-
-void
-aps_monitor_loop(Authctxt *authctxt, pid_t child_pid)
-{
- fd_set *readset = NULL, *writeset = NULL;
- int max_fd, nalloc = 0;
-
- debug("Entering monitor loop.");
-
- /*
- * Awful hack follows: fake compat20 == 1 to cause process_input()
- * and process_output() to behave as they would for SSHv2 because that's
- * the behaviour we need in SSHv2.
- *
- * This same hack is done in packet.c
- */
- compat20 = 1; /* causes process_input/output() to ignore stdio */
-
- mysignal(SIGHUP, aps_monitor_kill_handler);
- mysignal(SIGINT, aps_monitor_kill_handler);
- mysignal(SIGTERM, aps_monitor_kill_handler);
-
- child_terminated = 0;
- mysignal(SIGCHLD, aps_monitor_sigchld_handler);
-
- connection_in = packet_get_connection_in();
- connection_out = packet_get_connection_out();
-
- notify_setup();
-
- max_fd = MAX(connection_in, connection_out);
- max_fd = MAX(max_fd, notify_pipe[0]);
-
- dispatch_set(SSH2_MSG_KEXINIT, &kex_input_kexinit);
- dispatch_range(SSH2_MSG_USERAUTH_MIN, SSH2_MSG_MAX,
- &dispatch_protocol_error);
- dispatch_set(SSH2_PRIV_MSG_ALTPRIVSEP, &aps_input_altpriv_msg);
-
- for (;;) {
- process_buffered_input_packets();
-
- aps_wait_until_can_do_something(&readset, &writeset, &max_fd,
- &nalloc, 0);
-
- if (aps_collect_child(child_pid))
- break;
-
- if (killed) {
- /* fatal cleanups will kill child, audit logout */
- log("Monitor killed; exiting");
- fatal_cleanup();
- }
-
- /*
- * Unlike server_loop2() we don't care if connection_closed
- * since we still want to wait for the monitor's child.
- */
- process_input(readset);
- process_output(writeset);
- }
-
- packet_close();
-}
-#endif /* ALTPRIVSEP */
-
-/*
- * This server loop is for unprivileged child only. Our monitor runs its own
- * aps_monitor_loop() funtion.
- */
-void
-server_loop2(Authctxt *authctxt)
-{
- fd_set *readset = NULL, *writeset = NULL;
- int rekeying = 0, max_fd, nalloc = 0;
-
- debug("Entering interactive session for SSH2.");
-
- mysignal(SIGCHLD, sigchld_handler);
- child_terminated = 0;
- connection_in = packet_get_connection_in();
- connection_out = packet_get_connection_out();
-
- notify_setup();
-
- max_fd = MAX(connection_in, connection_out);
- max_fd = MAX(max_fd, notify_pipe[0]);
-
- xxx_authctxt = authctxt;
-
- server_init_dispatch();
-
- for (;;) {
- process_buffered_input_packets();
-
- rekeying = (xxx_kex != NULL && !xxx_kex->done);
-
- if (!rekeying && packet_not_very_much_data_to_write())
- channel_output_poll();
- wait_until_can_do_something(&readset, &writeset, &max_fd,
- &nalloc, 0);
-
- collect_children();
-
- if (!rekeying) {
- channel_after_select(readset, writeset);
- if (packet_need_rekeying()) {
- debug("rekey limit reached, need rekeying");
- xxx_kex->done = 0;
- debug("poking the monitor to start "
- "key re-exchange");
- altprivsep_start_rekex();
- }
- }
-#ifdef ALTPRIVSEP
- else
- altprivsep_process_input(readset);
-#endif /* ALTPRIVSEP */
-
- process_input(readset);
- if (connection_closed)
- break;
- process_output(writeset);
- }
- collect_children();
-
- if (readset)
- xfree(readset);
- if (writeset)
- xfree(writeset);
-
- /* free all channels, no more reads and writes */
- channel_free_all();
-
- /* free remaining sessions, e.g. remove wtmp entries */
- session_destroy_all(NULL);
-}
-
-static void
-server_input_channel_failure(int type, u_int32_t seq, void *ctxt)
-{
- debug("Got CHANNEL_FAILURE for keepalive");
- /*
- * reset timeout, since we got a sane answer from the client.
- * even if this was generated by something other than
- * the bogus CHANNEL_REQUEST we send for keepalives.
- */
- client_alive_timeouts = 0;
-}
-
-static void
-server_input_stdin_data(int type, u_int32_t seq, void *ctxt)
-{
- char *data;
- u_int data_len;
-
- /* Stdin data from the client. Append it to the buffer. */
- /* Ignore any data if the client has closed stdin. */
- if (fdin == -1)
- return;
- data = packet_get_string(&data_len);
- packet_check_eom();
- buffer_append(&stdin_buffer, data, data_len);
- memset(data, 0, data_len);
- xfree(data);
-}
-
-static void
-server_input_eof(int type, u_int32_t seq, void *ctxt)
-{
- /*
- * Eof from the client. The stdin descriptor to the
- * program will be closed when all buffered data has
- * drained.
- */
- debug("EOF received for stdin.");
- packet_check_eom();
- stdin_eof = 1;
-}
-
-static void
-server_input_window_size(int type, u_int32_t seq, void *ctxt)
-{
- int row = packet_get_int();
- int col = packet_get_int();
- int xpixel = packet_get_int();
- int ypixel = packet_get_int();
-
- debug("Window change received.");
- packet_check_eom();
- if (fdin != -1)
- pty_change_window_size(fdin, row, col, xpixel, ypixel);
-}
-
-static Channel *
-server_request_direct_tcpip(char *ctype)
-{
- Channel *c;
- int sock;
- char *target, *originator;
- int target_port, originator_port;
-
- target = packet_get_string(NULL);
- target_port = packet_get_int();
- originator = packet_get_string(NULL);
- originator_port = packet_get_int();
- packet_check_eom();
-
- debug("server_request_direct_tcpip: originator %s port %d, target %s port %d",
- originator, originator_port, target, target_port);
-
- /* XXX check permission */
- sock = channel_connect_to(target, target_port);
-
- xfree(target);
- xfree(originator);
- if (sock < 0)
- return NULL;
- c = channel_new(ctype, SSH_CHANNEL_CONNECTING,
- sock, sock, -1, CHAN_TCP_WINDOW_DEFAULT,
- CHAN_TCP_PACKET_DEFAULT, 0, xstrdup("direct-tcpip"), 1);
- return c;
-}
-
-static Channel *
-server_request_session(char *ctype)
-{
- Channel *c;
-
- debug("input_session_request");
- packet_check_eom();
- /*
- * A server session has no fd to read or write until a
- * CHANNEL_REQUEST for a shell is made, so we set the type to
- * SSH_CHANNEL_LARVAL. Additionally, a callback for handling all
- * CHANNEL_REQUEST messages is registered.
- */
- c = channel_new(ctype, SSH_CHANNEL_LARVAL,
- -1, -1, -1, /*window size*/0, CHAN_SES_PACKET_DEFAULT,
- 0, xstrdup("server-session"), 1);
- if (session_open(xxx_authctxt, c->self) != 1) {
- debug("session open failed, free channel %d", c->self);
- channel_free(c);
- return NULL;
- }
- channel_register_cleanup(c->self, session_close_by_channel);
- return c;
-}
-
-static void
-server_input_channel_open(int type, u_int32_t seq, void *ctxt)
-{
- Channel *c = NULL;
- char *ctype;
- int rchan;
- u_int rmaxpack, rwindow, len;
-
- ctype = packet_get_string(&len);
- rchan = packet_get_int();
- rwindow = packet_get_int();
- rmaxpack = packet_get_int();
-
- debug("server_input_channel_open: ctype %s rchan %d win %d max %d",
- ctype, rchan, rwindow, rmaxpack);
-
- if (strcmp(ctype, "session") == 0) {
- c = server_request_session(ctype);
- } else if (strcmp(ctype, "direct-tcpip") == 0) {
- c = server_request_direct_tcpip(ctype);
- }
- if (c != NULL) {
- debug("server_input_channel_open: confirm %s", ctype);
- c->remote_id = rchan;
- c->remote_window = rwindow;
- c->remote_maxpacket = rmaxpack;
- if (c->type != SSH_CHANNEL_CONNECTING) {
- packet_start(SSH2_MSG_CHANNEL_OPEN_CONFIRMATION);
- packet_put_int(c->remote_id);
- packet_put_int(c->self);
- packet_put_int(c->local_window);
- packet_put_int(c->local_maxpacket);
- packet_send();
- }
- } else {
- debug("server_input_channel_open: failure %s", ctype);
- packet_start(SSH2_MSG_CHANNEL_OPEN_FAILURE);
- packet_put_int(rchan);
- packet_put_int(SSH2_OPEN_ADMINISTRATIVELY_PROHIBITED);
- if (!(datafellows & SSH_BUG_OPENFAILURE)) {
- packet_put_utf8_cstring("open failed");
- packet_put_cstring("");
- }
- packet_send();
- }
- xfree(ctype);
-}
-
-static void
-server_input_global_request(int type, u_int32_t seq, void *ctxt)
-{
- char *rtype;
- int want_reply;
- int success = 0;
-
- rtype = packet_get_string(NULL);
- want_reply = packet_get_char();
- debug("server_input_global_request: rtype %s want_reply %d", rtype, want_reply);
-
- /* -R style forwarding */
- if (strcmp(rtype, "tcpip-forward") == 0) {
- struct passwd *pw;
- char *listen_address;
- u_short listen_port;
-
- pw = auth_get_user();
- if (pw == NULL)
- fatal("server_input_global_request: no user");
- listen_address = packet_get_string(NULL); /* XXX currently ignored */
- listen_port = (u_short)packet_get_int();
- debug("server_input_global_request: tcpip-forward listen %s port %d",
- listen_address, listen_port);
-
- /* check permissions */
- if (!options.allow_tcp_forwarding ||
- no_port_forwarding_flag
-#ifndef NO_IPPORT_RESERVED_CONCEPT
- || (listen_port < IPPORT_RESERVED && pw->pw_uid != 0)
-#endif
- ) {
- success = 0;
- packet_send_debug("Server has disabled port forwarding.");
- } else {
- /* Start listening on the port */
- success = channel_setup_remote_fwd_listener(
- listen_address, listen_port, options.gateway_ports);
- }
- xfree(listen_address);
- } else if (strcmp(rtype, "cancel-tcpip-forward") == 0) {
- char *cancel_address;
- u_short cancel_port;
-
- cancel_address = packet_get_string(NULL);
- cancel_port = (u_short)packet_get_int();
- debug("%s: cancel-tcpip-forward addr %s port %d", __func__,
- cancel_address, cancel_port);
-
- success = channel_cancel_rport_listener(cancel_address,
- cancel_port);
- xfree(cancel_address);
- }
- if (want_reply) {
- packet_start(success ?
- SSH2_MSG_REQUEST_SUCCESS : SSH2_MSG_REQUEST_FAILURE);
- packet_send();
- packet_write_wait();
- }
- xfree(rtype);
-}
-
-static void
-server_input_channel_req(int type, u_int32_t seq, void *ctxt)
-{
- Channel *c;
- int id, reply, success = 0;
- char *rtype;
-
- id = packet_get_int();
- rtype = packet_get_string(NULL);
- reply = packet_get_char();
-
- debug("server_input_channel_req: channel %d request %s reply %d",
- id, rtype, reply);
-
- if ((c = channel_lookup(id)) == NULL)
- packet_disconnect("server_input_channel_req: "
- "unknown channel %d", id);
- if (!strcmp(rtype, "eow@openssh.com")) {
- packet_check_eom();
- chan_rcvd_eow(c);
- } else if (c->type == SSH_CHANNEL_LARVAL || c->type == SSH_CHANNEL_OPEN)
- success = session_input_channel_req(c, rtype);
- if (reply) {
- packet_start(success ?
- SSH2_MSG_CHANNEL_SUCCESS : SSH2_MSG_CHANNEL_FAILURE);
- packet_put_int(c->remote_id);
- packet_send();
- }
- xfree(rtype);
-}
-
-static void
-server_init_dispatch_20(void)
-{
- debug("server_init_dispatch_20");
- dispatch_init(&dispatch_protocol_error);
- dispatch_set(SSH2_MSG_CHANNEL_CLOSE, &channel_input_oclose);
- dispatch_set(SSH2_MSG_CHANNEL_DATA, &channel_input_data);
- dispatch_set(SSH2_MSG_CHANNEL_EOF, &channel_input_ieof);
- dispatch_set(SSH2_MSG_CHANNEL_EXTENDED_DATA, &channel_input_extended_data);
- dispatch_set(SSH2_MSG_CHANNEL_OPEN, &server_input_channel_open);
- dispatch_set(SSH2_MSG_CHANNEL_OPEN_CONFIRMATION, &channel_input_open_confirmation);
- dispatch_set(SSH2_MSG_CHANNEL_OPEN_FAILURE, &channel_input_open_failure);
- dispatch_set(SSH2_MSG_CHANNEL_REQUEST, &server_input_channel_req);
- dispatch_set(SSH2_MSG_CHANNEL_WINDOW_ADJUST, &channel_input_window_adjust);
- dispatch_set(SSH2_MSG_GLOBAL_REQUEST, &server_input_global_request);
- /* client_alive */
- dispatch_set(SSH2_MSG_CHANNEL_FAILURE, &server_input_channel_failure);
- /* rekeying */
-
-#ifdef ALTPRIVSEP
- /* unprivileged sshd has a kex packet handler that must not be reset */
- debug3("server_init_dispatch_20 -- should we dispatch_set(KEXINIT) here? %d && !%d",
- packet_is_server(), packet_is_monitor());
- if (packet_is_server() && !packet_is_monitor()) {
- debug3("server_init_dispatch_20 -- skipping dispatch_set(KEXINIT) in unpriv proc");
- dispatch_range(SSH2_MSG_KEXINIT, SSH2_MSG_TRANSPORT_MAX,
- &altprivsep_rekey);
- return;
- }
-#endif /* ALTPRIVSEP */
- dispatch_set(SSH2_MSG_KEXINIT, &kex_input_kexinit);
-}
-static void
-server_init_dispatch_13(void)
-{
- debug("server_init_dispatch_13");
- dispatch_init(NULL);
- dispatch_set(SSH_CMSG_EOF, &server_input_eof);
- dispatch_set(SSH_CMSG_STDIN_DATA, &server_input_stdin_data);
- dispatch_set(SSH_CMSG_WINDOW_SIZE, &server_input_window_size);
- dispatch_set(SSH_MSG_CHANNEL_CLOSE, &channel_input_close);
- dispatch_set(SSH_MSG_CHANNEL_CLOSE_CONFIRMATION, &channel_input_close_confirmation);
- dispatch_set(SSH_MSG_CHANNEL_DATA, &channel_input_data);
- dispatch_set(SSH_MSG_CHANNEL_OPEN_CONFIRMATION, &channel_input_open_confirmation);
- dispatch_set(SSH_MSG_CHANNEL_OPEN_FAILURE, &channel_input_open_failure);
- dispatch_set(SSH_MSG_PORT_OPEN, &channel_input_port_open);
-}
-static void
-server_init_dispatch_15(void)
-{
- server_init_dispatch_13();
- debug("server_init_dispatch_15");
- dispatch_set(SSH_MSG_CHANNEL_CLOSE, &channel_input_ieof);
- dispatch_set(SSH_MSG_CHANNEL_CLOSE_CONFIRMATION, &channel_input_oclose);
-}
-static void
-server_init_dispatch(void)
-{
- if (compat20)
- server_init_dispatch_20();
- else if (compat13)
- server_init_dispatch_13();
- else
- server_init_dispatch_15();
-}
diff --git a/usr/src/cmd/ssh/sshd/session.c b/usr/src/cmd/ssh/sshd/session.c
deleted file mode 100644
index 871b06c758..0000000000
--- a/usr/src/cmd/ssh/sshd/session.c
+++ /dev/null
@@ -1,2641 +0,0 @@
-/*
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- *
- * SSH2 support by Markus Friedl.
- * Copyright (c) 2000, 2001 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: session.c,v 1.150 2002/09/16 19:55:33 stevesk Exp $");
-
-#ifdef HAVE_DEFOPEN
-#include <deflt.h>
-#include <ulimit.h>
-#endif /* HAVE_DEFOPEN */
-
-#ifdef HAVE_LIBGEN_H
-#include <libgen.h>
-#endif
-
-#include <priv.h>
-
-#include "ssh.h"
-#include "ssh1.h"
-#include "ssh2.h"
-#include "xmalloc.h"
-#include "sshpty.h"
-#include "packet.h"
-#include "buffer.h"
-#include "mpaux.h"
-#include "uidswap.h"
-#include "compat.h"
-#include "channels.h"
-#include "bufaux.h"
-#include "auth.h"
-#include "auth-options.h"
-#include "pathnames.h"
-#include "log.h"
-#include "servconf.h"
-#include "sshlogin.h"
-#include "serverloop.h"
-#include "canohost.h"
-#include "session.h"
-#include "tildexpand.h"
-#include "misc.h"
-#include "sftp.h"
-
-#ifdef USE_PAM
-#include <security/pam_appl.h>
-#endif /* USE_PAM */
-
-#ifdef GSSAPI
-#include "ssh-gss.h"
-#endif
-
-#ifdef ALTPRIVSEP
-#include "altprivsep.h"
-#endif /* ALTPRIVSEP */
-
-#ifdef HAVE_CYGWIN
-#include <windows.h>
-#include <sys/cygwin.h>
-#define is_winnt (GetVersion() < 0x80000000)
-#endif
-
-/* func */
-
-Session *session_new(void);
-void session_set_fds(Session *, int, int, int);
-void session_pty_cleanup(void *);
-void session_xauthfile_cleanup(void *s);
-void session_proctitle(Session *);
-int session_setup_x11fwd(Session *);
-void do_exec_pty(Session *, const char *);
-void do_exec_no_pty(Session *, const char *);
-void do_exec(Session *, const char *);
-void do_login(Session *, const char *);
-void do_child(Session *, const char *);
-void do_motd(void);
-int check_quietlogin(Session *, const char *);
-
-static void do_authenticated1(Authctxt *);
-static void do_authenticated2(Authctxt *);
-
-static int session_pty_req(Session *);
-static int session_env_req(Session *s);
-static void session_free_env(char ***envp);
-static void safely_chroot(const char *path, uid_t uid);
-static void drop_privs(uid_t uid);
-
-#ifdef USE_PAM
-static void session_do_pam(Session *, int);
-#endif /* USE_PAM */
-
-/* import */
-extern ServerOptions options;
-extern char *__progname;
-extern int log_stderr;
-extern int debug_flag;
-extern u_int utmp_len;
-extern void destroy_sensitive_data(void);
-
-#ifdef GSSAPI
-extern Gssctxt *xxx_gssctxt;
-#endif /* GSSAPI */
-
-/* original command from peer. */
-const char *original_command = NULL;
-
-/* data */
-#define MAX_SESSIONS 10
-Session sessions[MAX_SESSIONS];
-
-#define SUBSYSTEM_NONE 0
-#define SUBSYSTEM_EXT 1
-#define SUBSYSTEM_INT_SFTP 2
-
-#ifdef HAVE_LOGIN_CAP
-login_cap_t *lc;
-#endif
-
-/* Name and directory of socket for authentication agent forwarding. */
-static char *auth_sock_name = NULL;
-static char *auth_sock_dir = NULL;
-
-/* removes the agent forwarding socket */
-
-static void
-auth_sock_cleanup_proc(void *_pw)
-{
- struct passwd *pw = _pw;
-
- if (auth_sock_name != NULL) {
- temporarily_use_uid(pw);
- unlink(auth_sock_name);
- rmdir(auth_sock_dir);
- auth_sock_name = NULL;
- restore_uid();
- }
-}
-
-static int
-auth_input_request_forwarding(struct passwd * pw)
-{
- Channel *nc;
- int sock;
- struct sockaddr_un sunaddr;
-
- if (auth_sock_name != NULL) {
- error("authentication forwarding requested twice.");
- return 0;
- }
-
- /* Temporarily drop privileged uid for mkdir/bind. */
- temporarily_use_uid(pw);
-
- /* Allocate a buffer for the socket name, and format the name. */
- auth_sock_name = xmalloc(MAXPATHLEN);
- auth_sock_dir = xmalloc(MAXPATHLEN);
- strlcpy(auth_sock_dir, "/tmp/ssh-XXXXXXXX", MAXPATHLEN);
-
- /* Create private directory for socket */
- if (mkdtemp(auth_sock_dir) == NULL) {
- packet_send_debug("Agent forwarding disabled: "
- "mkdtemp() failed: %.100s", strerror(errno));
- restore_uid();
- xfree(auth_sock_name);
- xfree(auth_sock_dir);
- auth_sock_name = NULL;
- auth_sock_dir = NULL;
- return 0;
- }
- snprintf(auth_sock_name, MAXPATHLEN, "%s/agent.%ld",
- auth_sock_dir, (long) getpid());
-
- /* delete agent socket on fatal() */
- fatal_add_cleanup(auth_sock_cleanup_proc, pw);
-
- /* Create the socket. */
- sock = socket(AF_UNIX, SOCK_STREAM, 0);
- if (sock < 0)
- packet_disconnect("socket: %.100s", strerror(errno));
-
- /* Bind it to the name. */
- memset(&sunaddr, 0, sizeof(sunaddr));
- sunaddr.sun_family = AF_UNIX;
- strlcpy(sunaddr.sun_path, auth_sock_name, sizeof(sunaddr.sun_path));
-
- if (bind(sock, (struct sockaddr *) & sunaddr, sizeof(sunaddr)) < 0)
- packet_disconnect("bind: %.100s", strerror(errno));
-
- /* Restore the privileged uid. */
- restore_uid();
-
- /* Start listening on the socket. */
- if (listen(sock, 5) < 0)
- packet_disconnect("listen: %.100s", strerror(errno));
-
- /* Allocate a channel for the authentication agent socket. */
- nc = channel_new("auth socket",
- SSH_CHANNEL_AUTH_SOCKET, sock, sock, -1,
- CHAN_X11_WINDOW_DEFAULT, CHAN_X11_PACKET_DEFAULT,
- 0, xstrdup("auth socket"), 1);
- strlcpy(nc->path, auth_sock_name, sizeof(nc->path));
- return 1;
-}
-
-
-void
-do_authenticated(Authctxt *authctxt)
-{
- /* setup the channel layer */
- if (!no_port_forwarding_flag && options.allow_tcp_forwarding)
- channel_permit_all_opens();
-
- if (compat20)
- do_authenticated2(authctxt);
- else
- do_authenticated1(authctxt);
-
- /* remove agent socket */
- if (auth_sock_name != NULL)
- auth_sock_cleanup_proc(authctxt->pw);
-#ifdef KRB4
- if (options.kerberos_ticket_cleanup)
- krb4_cleanup_proc(authctxt);
-#endif
-#ifdef KRB5
- if (options.kerberos_ticket_cleanup)
- krb5_cleanup_proc(authctxt);
-#endif
-}
-
-/*
- * Prepares for an interactive session. This is called after the user has
- * been successfully authenticated. During this message exchange, pseudo
- * terminals are allocated, X11, TCP/IP, and authentication agent forwardings
- * are requested, etc.
- */
-static void
-do_authenticated1(Authctxt *authctxt)
-{
- Session *s;
- char *command;
- int success, type, screen_flag;
- int enable_compression_after_reply = 0;
- u_int proto_len, data_len, dlen, compression_level = 0;
-
- s = session_new();
- s->authctxt = authctxt;
- s->pw = authctxt->pw;
-
- /*
- * We stay in this loop until the client requests to execute a shell
- * or a command.
- */
- for (;;) {
- success = 0;
-
- /* Get a packet from the client. */
- type = packet_read();
-
- /* Process the packet. */
- switch (type) {
- case SSH_CMSG_REQUEST_COMPRESSION:
- compression_level = packet_get_int();
- packet_check_eom();
- if (compression_level < 1 || compression_level > 9) {
- packet_send_debug("Received illegal compression level %d.",
- compression_level);
- break;
- }
- if (!options.compression) {
- debug2("compression disabled");
- break;
- }
- /* Enable compression after we have responded with SUCCESS. */
- enable_compression_after_reply = 1;
- success = 1;
- break;
-
- case SSH_CMSG_REQUEST_PTY:
- success = session_pty_req(s);
- break;
-
- case SSH_CMSG_X11_REQUEST_FORWARDING:
- s->auth_proto = packet_get_string(&proto_len);
- s->auth_data = packet_get_string(&data_len);
-
- screen_flag = packet_get_protocol_flags() &
- SSH_PROTOFLAG_SCREEN_NUMBER;
- debug2("SSH_PROTOFLAG_SCREEN_NUMBER: %d", screen_flag);
-
- if (packet_remaining() == 4) {
- if (!screen_flag)
- debug2("Buggy client: "
- "X11 screen flag missing");
- s->screen = packet_get_int();
- } else {
- s->screen = 0;
- }
- packet_check_eom();
- success = session_setup_x11fwd(s);
- if (!success) {
- xfree(s->auth_proto);
- xfree(s->auth_data);
- s->auth_proto = NULL;
- s->auth_data = NULL;
- }
- break;
-
- case SSH_CMSG_AGENT_REQUEST_FORWARDING:
- if (no_agent_forwarding_flag || compat13) {
- debug("Authentication agent forwarding not permitted for this authentication.");
- break;
- }
- debug("Received authentication agent forwarding request.");
- success = auth_input_request_forwarding(s->pw);
- break;
-
- case SSH_CMSG_PORT_FORWARD_REQUEST:
- if (no_port_forwarding_flag) {
- debug("Port forwarding not permitted for this authentication.");
- break;
- }
- if (!options.allow_tcp_forwarding) {
- debug("Port forwarding not permitted.");
- break;
- }
- debug("Received TCP/IP port forwarding request.");
- channel_input_port_forward_request(s->pw->pw_uid == 0, options.gateway_ports);
- success = 1;
- break;
-
- case SSH_CMSG_MAX_PACKET_SIZE:
- if (packet_set_maxsize(packet_get_int()) > 0)
- success = 1;
- break;
-
-#if defined(AFS) || defined(KRB5)
- case SSH_CMSG_HAVE_KERBEROS_TGT:
- if (!options.kerberos_tgt_passing) {
- verbose("Kerberos TGT passing disabled.");
- } else {
- char *kdata = packet_get_string(&dlen);
- packet_check_eom();
-
- /* XXX - 0x41, see creds_to_radix version */
- if (kdata[0] != 0x41) {
-#ifdef KRB5
- krb5_data tgt;
- tgt.data = kdata;
- tgt.length = dlen;
-
- if (auth_krb5_tgt(s->authctxt, &tgt))
- success = 1;
- else
- verbose("Kerberos v5 TGT refused for %.100s", s->authctxt->user);
-#endif /* KRB5 */
- } else {
-#ifdef AFS
- if (auth_krb4_tgt(s->authctxt, kdata))
- success = 1;
- else
- verbose("Kerberos v4 TGT refused for %.100s", s->authctxt->user);
-#endif /* AFS */
- }
- xfree(kdata);
- }
- break;
-#endif /* AFS || KRB5 */
-
-#ifdef AFS
- case SSH_CMSG_HAVE_AFS_TOKEN:
- if (!options.afs_token_passing || !k_hasafs()) {
- verbose("AFS token passing disabled.");
- } else {
- /* Accept AFS token. */
- char *token = packet_get_string(&dlen);
- packet_check_eom();
-
- if (auth_afs_token(s->authctxt, token))
- success = 1;
- else
- verbose("AFS token refused for %.100s",
- s->authctxt->user);
- xfree(token);
- }
- break;
-#endif /* AFS */
-
- case SSH_CMSG_EXEC_SHELL:
- case SSH_CMSG_EXEC_CMD:
- if (type == SSH_CMSG_EXEC_CMD) {
- command = packet_get_string(&dlen);
- debug("Exec command '%.500s'", command);
- do_exec(s, command);
- xfree(command);
- } else {
- do_exec(s, NULL);
- }
- packet_check_eom();
- session_close(s);
- return;
-
- default:
- /*
- * Any unknown messages in this phase are ignored,
- * and a failure message is returned.
- */
- log("Unknown packet type received after authentication: %d", type);
- }
- packet_start(success ? SSH_SMSG_SUCCESS : SSH_SMSG_FAILURE);
- packet_send();
- packet_write_wait();
-
- /* Enable compression now that we have replied if appropriate. */
- if (enable_compression_after_reply) {
- enable_compression_after_reply = 0;
- packet_start_compression(compression_level);
- }
- }
-}
-
-/*
- * This is called to fork and execute a command when we have no tty. This
- * will call do_child from the child, and server_loop from the parent after
- * setting up file descriptors and such.
- */
-void
-do_exec_no_pty(Session *s, const char *command)
-{
- pid_t pid;
-
- int inout[2], err[2];
- /* Uses socket pairs to communicate with the program. */
- if (socketpair(AF_UNIX, SOCK_STREAM, 0, inout) < 0 ||
- socketpair(AF_UNIX, SOCK_STREAM, 0, err) < 0)
- packet_disconnect("Could not create socket pairs: %.100s",
- strerror(errno));
- if (s == NULL)
- fatal("do_exec_no_pty: no session");
-
- session_proctitle(s);
-
- /* Fork the child. */
- if ((pid = fork()) == 0) {
- fatal_remove_all_cleanups();
-
- /* Child. Reinitialize the log since the pid has changed. */
- log_init(__progname, options.log_level, options.log_facility, log_stderr);
-
- /*
- * Create a new session and process group since the 4.4BSD
- * setlogin() affects the entire process group.
- */
- if (setsid() < 0)
- error("setsid failed: %.100s", strerror(errno));
-
- /*
- * Redirect stdin, stdout, and stderr. Stdin and stdout will
- * use the same socket, as some programs (particularly rdist)
- * seem to depend on it.
- */
- close(inout[1]);
- close(err[1]);
- if (dup2(inout[0], 0) < 0) /* stdin */
- perror("dup2 stdin");
- if (dup2(inout[0], 1) < 0) /* stdout. Note: same socket as stdin. */
- perror("dup2 stdout");
- if (s->is_subsystem) {
- /*
- * Redirect the subsystem's stderr to /dev/null. We might send it
- * over to the other side but changing that might break existing
- * SSH clients.
- */
- close(err[0]);
- if ((err[0] = open(_PATH_DEVNULL, O_WRONLY)) == -1)
- fatal("Cannot open /dev/null: %.100s", strerror(errno));
- }
- if (dup2(err[0], 2) < 0) /* stderr */
- perror("dup2 stderr");
-
-#ifdef _UNICOS
- cray_init_job(s->pw); /* set up cray jid and tmpdir */
-#endif
-
- /* Do processing for the child (exec command etc). */
- do_child(s, command);
- /* NOTREACHED */
- }
-#ifdef _UNICOS
- signal(WJSIGNAL, cray_job_termination_handler);
-#endif /* _UNICOS */
-#ifdef HAVE_CYGWIN
- if (is_winnt)
- cygwin_set_impersonation_token(INVALID_HANDLE_VALUE);
-#endif
- if (pid < 0)
- packet_disconnect("fork failed: %.100s", strerror(errno));
-
- s->pid = pid;
- /* Set interactive/non-interactive mode. */
- packet_set_interactive(s->display != NULL);
-
- /* We are the parent. Close the child sides of the socket pairs. */
- close(inout[0]);
- close(err[0]);
-
- /*
- * Enter the interactive session. Note: server_loop must be able to
- * handle the case that fdin and fdout are the same.
- */
- if (compat20) {
- session_set_fds(s, inout[1], inout[1], s->is_subsystem ? -1 : err[1]);
- if (s->is_subsystem)
- close(err[1]);
- /* Don't close channel before sending exit-status! */
- channel_set_wait_for_exit(s->chanid, 1);
- } else {
- server_loop(pid, inout[1], inout[1], err[1]);
- /* server_loop has closed inout[1] and err[1]. */
- }
-}
-
-/*
- * This is called to fork and execute a command when we have a tty. This
- * will call do_child from the child, and server_loop from the parent after
- * setting up file descriptors, controlling tty, updating wtmp, utmp,
- * lastlog, and other such operations.
- */
-void
-do_exec_pty(Session *s, const char *command)
-{
- int fdout, ptyfd, ttyfd, ptymaster, pipe_fds[2];
- pid_t pid;
-
- if (s == NULL)
- fatal("do_exec_pty: no session");
- ptyfd = s->ptyfd;
- ttyfd = s->ttyfd;
-
-#ifdef USE_PAM
- session_do_pam(s, 1); /* pam_open_session() */
-#endif /* USE_PAM */
-
- /*
- * This pipe lets sshd wait for child to exec or exit. This is
- * particularly important for ALTPRIVSEP because the child is
- * the one to call the monitor to request a record_login() and
- * we don't want the child and the parent to compete for the
- * monitor's attention. But this is generic code and doesn't
- * hurt to have here even if ALTPRIVSEP is not used.
- */
- if (pipe(pipe_fds) != 0)
- packet_disconnect("pipe failed: %.100s", strerror(errno));
-
- (void) fcntl(pipe_fds[0], F_SETFD, FD_CLOEXEC);
- (void) fcntl(pipe_fds[1], F_SETFD, FD_CLOEXEC);
-
- /* Fork the child. */
- if ((pid = fork()) == 0) {
- (void) close(pipe_fds[0]);
-
- fatal_remove_all_cleanups();
-
- /* Child. Reinitialize the log because the pid has changed. */
- log_init(__progname, options.log_level, options.log_facility, log_stderr);
- /* Close the master side of the pseudo tty. */
- close(ptyfd);
-
- /* Make the pseudo tty our controlling tty. */
- pty_make_controlling_tty(&ttyfd, s->tty);
-
- /* Redirect stdin/stdout/stderr from the pseudo tty. */
- if (dup2(ttyfd, 0) < 0)
- error("dup2 stdin: %s", strerror(errno));
- if (dup2(ttyfd, 1) < 0)
- error("dup2 stdout: %s", strerror(errno));
- if (dup2(ttyfd, 2) < 0)
- error("dup2 stderr: %s", strerror(errno));
-
- /* Close the extra descriptor for the pseudo tty. */
- close(ttyfd);
-
- /* record login, etc. similar to login(1) */
- do_login(s, command);
-
- /*
- * Close the pipe to the parent so it can re-enter its event
- * loop and service the ptm; if enough debug messages get
- * written to the pty before this happens there will be a
- * deadlock.
- */
- close(pipe_fds[1]);
-
- /*
- * do_motd() was called originally in do_login(). However,
- * when the /etc/motd file is large, a deadlock would happen,
- * because
- * - The child is blocked at fputs() to pty, when pty buffer
- * is full.
- * - The parent can not consume the pty buffer, because it is
- * still blocked at read(pipe_fds[0]).
- *
- * To resolve the deadlock issue, we defer do_motd() after
- * close(pipe_fds[1]).
- */
- do_motd();
-
- /* Do common processing for the child, such as execing the command. */
- do_child(s, command);
- /* NOTREACHED */
- }
-
- /* Wait for child to exec() or exit() */
- (void) close(pipe_fds[1]);
- (void) read(pipe_fds[0], &pipe_fds[1], sizeof(int));
-
-#ifdef _UNICOS
- signal(WJSIGNAL, cray_job_termination_handler);
-#endif /* _UNICOS */
-#ifdef HAVE_CYGWIN
- if (is_winnt)
- cygwin_set_impersonation_token(INVALID_HANDLE_VALUE);
-#endif
- if (pid < 0)
- packet_disconnect("fork failed: %.100s", strerror(errno));
- s->pid = pid;
-
- /* Parent. Close the slave side of the pseudo tty. */
- close(ttyfd);
-
- /*
- * Create another descriptor of the pty master side for use as the
- * standard input. We could use the original descriptor, but this
- * simplifies code in server_loop. The descriptor is bidirectional.
- */
- fdout = dup(ptyfd);
- if (fdout < 0)
- packet_disconnect("dup #1 failed: %.100s", strerror(errno));
-
- /* we keep a reference to the pty master */
- ptymaster = dup(ptyfd);
- if (ptymaster < 0)
- packet_disconnect("dup #2 failed: %.100s", strerror(errno));
- s->ptymaster = ptymaster;
-
- /* Enter interactive session. */
- packet_set_interactive(1);
- if (compat20) {
- session_set_fds(s, ptyfd, fdout, -1);
- /* Don't close channel before sending exit-status! */
- channel_set_wait_for_exit(s->chanid, 1);
- } else {
- server_loop(pid, ptyfd, fdout, -1);
- /* server_loop _has_ closed ptyfd and fdout. */
- }
-}
-
-/*
- * This is called to fork and execute a command. If another command is
- * to be forced, execute that instead.
- */
-void
-do_exec(Session *s, const char *command)
-{
- if (command)
- s->command = xstrdup(command);
-
- if (forced_command) {
- original_command = command;
- command = forced_command;
- debug("Forced command '%.900s'", command);
- }
-
- if (s->ttyfd != -1)
- do_exec_pty(s, command);
- else
- do_exec_no_pty(s, command);
-
- original_command = NULL;
-}
-
-
-/* administrative, login(1)-like work */
-void
-do_login(Session *s, const char *command)
-{
- char *time_string;
-#ifndef ALTPRIVSEP
- struct passwd * pw = s->pw;
-#endif /* ALTPRIVSEP*/
- pid_t pid = getpid();
-
- /* Record that there was a login on that tty from the remote host. */
-#ifdef ALTPRIVSEP
- debug3("Recording SSHv2 channel login in utmpx/wtmpx");
- altprivsep_record_login(pid, s->tty);
-#endif /* ALTPRIVSEP*/
-
- if (check_quietlogin(s, command))
- return;
-
-#ifdef USE_PAM
- print_pam_messages();
-#endif /* USE_PAM */
-#ifdef WITH_AIXAUTHENTICATE
- if (aixloginmsg && *aixloginmsg)
- printf("%s\n", aixloginmsg);
-#endif /* WITH_AIXAUTHENTICATE */
-
-#ifndef NO_SSH_LASTLOG
- if (options.print_lastlog && s->last_login_time != 0) {
- time_string = ctime(&s->last_login_time);
- if (strchr(time_string, '\n'))
- *strchr(time_string, '\n') = 0;
- if (strcmp(s->hostname, "") == 0)
- printf("Last login: %s\r\n", time_string);
- else
- printf("Last login: %s from %s\r\n", time_string,
- s->hostname);
- }
-#endif /* NO_SSH_LASTLOG */
-
-}
-
-/*
- * Display the message of the day.
- */
-void
-do_motd(void)
-{
- FILE *f;
- char buf[256];
-
- if (options.print_motd) {
-#ifdef HAVE_LOGIN_CAP
- f = fopen(login_getcapstr(lc, "welcome", "/etc/motd",
- "/etc/motd"), "r");
-#else
- f = fopen("/etc/motd", "r");
-#endif
- if (f) {
- while (fgets(buf, sizeof(buf), f))
- fputs(buf, stdout);
- fclose(f);
- }
- }
-}
-
-
-/*
- * Check for quiet login, either .hushlogin or command given.
- */
-int
-check_quietlogin(Session *s, const char *command)
-{
- char buf[256];
- struct passwd *pw = s->pw;
- struct stat st;
-
- /* Return 1 if .hushlogin exists or a command given. */
- if (command != NULL)
- return 1;
- snprintf(buf, sizeof(buf), "%.200s/.hushlogin", pw->pw_dir);
-#ifdef HAVE_LOGIN_CAP
- if (login_getcapbool(lc, "hushlogin", 0) || stat(buf, &st) >= 0)
- return 1;
-#else
- if (stat(buf, &st) >= 0)
- return 1;
-#endif
- return 0;
-}
-
-/*
- * Sets the value of the given variable in the environment. If the variable
- * already exists, its value is overriden.
- */
-void
-child_set_env(char ***envp, u_int *envsizep, const char *name,
- const char *value)
-{
- debug3("child_set_env(%s, %s)", name, value);
- child_set_env_silent(envp, envsizep, name, value);
-}
-
-
-void
-child_set_env_silent(char ***envp, u_int *envsizep, const char *name,
- const char *value)
-{
- u_int i, namelen;
- char **env;
-
- /*
- * Find the slot where the value should be stored. If the variable
- * already exists, we reuse the slot; otherwise we append a new slot
- * at the end of the array, expanding if necessary.
- */
- env = *envp;
- namelen = strlen(name);
- for (i = 0; env[i]; i++)
- if (strncmp(env[i], name, namelen) == 0 && env[i][namelen] == '=')
- break;
- if (env[i]) {
- /* Reuse the slot. */
- xfree(env[i]);
- } else {
- /* New variable. Expand if necessary. */
- if (i >= (*envsizep) - 1) {
- if (*envsizep >= 1000)
- fatal("child_set_env: too many env vars,"
- " skipping: %.100s", name);
- (*envsizep) += 50;
- env = (*envp) = xrealloc(env, (*envsizep) * sizeof(char *));
- }
- /* Need to set the NULL pointer at end of array beyond the new slot. */
- env[i + 1] = NULL;
- }
-
- /* Allocate space and format the variable in the appropriate slot. */
- env[i] = xmalloc(strlen(name) + 1 + strlen(value) + 1);
- snprintf(env[i], strlen(name) + 1 + strlen(value) + 1, "%s=%s", name, value);
-}
-
-/*
- * Reads environment variables from the given file and adds/overrides them
- * into the environment. If the file does not exist, this does nothing.
- * Otherwise, it must consist of empty lines, comments (line starts with '#')
- * and assignments of the form name=value. No other forms are allowed.
- */
-static void
-read_environment_file(char ***env, u_int *envsize,
- const char *filename)
-{
- FILE *f;
- char buf[4096];
- char *cp, *value;
- u_int lineno = 0;
-
- f = fopen(filename, "r");
- if (!f)
- return;
-
- while (fgets(buf, sizeof(buf), f)) {
- if (++lineno > 1000)
- fatal("Too many lines in environment file %s", filename);
- for (cp = buf; *cp == ' ' || *cp == '\t'; cp++)
- ;
- if (!*cp || *cp == '#' || *cp == '\n')
- continue;
- if (strchr(cp, '\n'))
- *strchr(cp, '\n') = '\0';
- value = strchr(cp, '=');
- if (value == NULL) {
- fprintf(stderr, gettext("Bad line %u in %.100s\n"),
- lineno, filename);
- continue;
- }
- /*
- * Replace the equals sign by nul, and advance value to
- * the value string.
- */
- *value = '\0';
- value++;
- child_set_env(env, envsize, cp, value);
- }
- fclose(f);
-}
-
-void copy_environment(char **source, char ***env, u_int *envsize)
-{
- char *var_name, *var_val;
- int i;
-
- if (source == NULL)
- return;
-
- for(i = 0; source[i] != NULL; i++) {
- var_name = xstrdup(source[i]);
- if ((var_val = strstr(var_name, "=")) == NULL) {
- xfree(var_name);
- continue;
- }
- *var_val++ = '\0';
-
- debug3("Copy environment: %s=%s", var_name, var_val);
- child_set_env(env, envsize, var_name, var_val);
-
- xfree(var_name);
- }
-}
-
-#ifdef HAVE_DEFOPEN
-static
-void
-deflt_do_setup_env(Session *s, const char *shell, char ***env, u_int *envsize)
-{
- int flags;
- char *ptr;
- mode_t Umask = 022;
-
- if (defopen(_PATH_DEFAULT_LOGIN))
- return;
-
- /* Ignore case */
- flags = defcntl(DC_GETFLAGS, 0);
- TURNOFF(flags, DC_CASE);
- (void) defcntl(DC_SETFLAGS, flags);
-
- /* TZ & HZ */
- if ((ptr = defread("TIMEZONE=")) != NULL)
- child_set_env(env, envsize, "TZ", ptr);
- if ((ptr = defread("HZ=")) != NULL)
- child_set_env(env, envsize, "HZ", ptr);
-
- /* PATH */
- if (s->pw->pw_uid != 0 && (ptr = defread("PATH=")) != NULL)
- child_set_env(env, envsize, "PATH", ptr);
- if (s->pw->pw_uid == 0 && (ptr = defread("SUPATH=")) != NULL)
- child_set_env(env, envsize, "PATH", ptr);
-
- /* SHELL */
- if ((ptr = defread("ALTSHELL=")) != NULL) {
- if (strcasecmp("YES", ptr) == 0)
- child_set_env(env, envsize, "SHELL", shell);
- else
- child_set_env(env, envsize, "SHELL", "");
- }
-
- /* UMASK */
- if ((ptr = defread("UMASK=")) != NULL &&
- sscanf(ptr, "%lo", &Umask) == 1 &&
- Umask <= (mode_t)0777)
- (void) umask(Umask);
- else
- (void) umask(022);
-
- /* ULIMIT */
- if ((ptr = defread("ULIMIT=")) != NULL && atol(ptr) > 0L &&
- ulimit(UL_SETFSIZE, atol(ptr)) < 0L)
- error("Could not set ULIMIT to %ld from %s\n", atol(ptr),
- _PATH_DEFAULT_LOGIN);
-
- (void) defopen(NULL);
-}
-#endif /* HAVE_DEFOPEN */
-
-static char **
-do_setup_env(Session *s, const char *shell)
-{
- char buf[256];
- char path_maildir[] = _PATH_MAILDIR;
- u_int i, envsize, pm_len;
- char **env;
- struct passwd *pw = s->pw;
-
- /* Initialize the environment. */
- envsize = 100;
- env = xmalloc(envsize * sizeof(char *));
- env[0] = NULL;
-
-#ifdef HAVE_CYGWIN
- /*
- * The Windows environment contains some setting which are
- * important for a running system. They must not be dropped.
- */
- copy_environment(environ, &env, &envsize);
-#endif
-
-#ifdef GSSAPI
- /* Allow any GSSAPI methods that we've used to alter
- * the childs environment as they see fit
- */
- ssh_gssapi_do_child(xxx_gssctxt, &env,&envsize);
-#endif
-
- /* Set basic environment. */
- child_set_env(&env, &envsize, "USER", pw->pw_name);
- child_set_env(&env, &envsize, "LOGNAME", pw->pw_name);
- child_set_env(&env, &envsize, "HOME", pw->pw_dir);
-#ifdef HAVE_LOGIN_CAP
- if (setusercontext(lc, pw, pw->pw_uid, LOGIN_SETPATH) < 0)
- child_set_env(&env, &envsize, "PATH", _PATH_STDPATH);
- else
- child_set_env(&env, &envsize, "PATH", getenv("PATH"));
-#else /* HAVE_LOGIN_CAP */
-# ifndef HAVE_CYGWIN
- /*
- * There's no standard path on Windows. The path contains
- * important components pointing to the system directories,
- * needed for loading shared libraries. So the path better
- * remains intact here.
- */
-# ifdef SUPERUSER_PATH
- child_set_env(&env, &envsize, "PATH",
- s->pw->pw_uid == 0 ? SUPERUSER_PATH : _PATH_STDPATH);
-# else
- child_set_env(&env, &envsize, "PATH", _PATH_STDPATH);
-# endif /* SUPERUSER_PATH */
-# endif /* HAVE_CYGWIN */
-#endif /* HAVE_LOGIN_CAP */
-
- pm_len = strlen(path_maildir);
- if (path_maildir[pm_len - 1] == '/' && pm_len > 1)
- path_maildir[pm_len - 1] = NULL;
- snprintf(buf, sizeof buf, "%.200s/%.50s",
- path_maildir, pw->pw_name);
- child_set_env(&env, &envsize, "MAIL", buf);
-
- /* Normal systems set SHELL by default. */
- child_set_env(&env, &envsize, "SHELL", shell);
-
-#ifdef HAVE_DEFOPEN
- deflt_do_setup_env(s, shell, &env, &envsize);
-#endif /* HAVE_DEFOPEN */
-
-#define PASS_ENV(x) \
- if (getenv(x)) \
- child_set_env(&env, &envsize, x, getenv(x));
-
- if (getenv("TZ"))
- child_set_env(&env, &envsize, "TZ", getenv("TZ"));
-
- if (s->auth_file != NULL)
- child_set_env(&env, &envsize, "XAUTHORITY", s->auth_file);
-
- PASS_ENV("LANG")
- PASS_ENV("LC_ALL")
- PASS_ENV("LC_CTYPE")
- PASS_ENV("LC_COLLATE")
- PASS_ENV("LC_TIME")
- PASS_ENV("LC_NUMERIC")
- PASS_ENV("LC_MONETARY")
- PASS_ENV("LC_MESSAGES")
-
-#undef PASS_ENV
-
- if (s->env != NULL)
- copy_environment(s->env, &env, &envsize);
-
- /* Set custom environment options from RSA authentication. */
- while (custom_environment) {
- struct envstring *ce = custom_environment;
- char *str = ce->s;
-
- for (i = 0; str[i] != '=' && str[i]; i++)
- ;
- if (str[i] == '=') {
- str[i] = 0;
- child_set_env(&env, &envsize, str, str + i + 1);
- }
- custom_environment = ce->next;
- xfree(ce->s);
- xfree(ce);
- }
-
- /* SSH_CLIENT deprecated */
- snprintf(buf, sizeof buf, "%.50s %d %d",
- get_remote_ipaddr(), get_remote_port(), get_local_port());
- child_set_env(&env, &envsize, "SSH_CLIENT", buf);
-
- snprintf(buf, sizeof buf, "%.50s %d %.50s %d",
- get_remote_ipaddr(), get_remote_port(),
- get_local_ipaddr(packet_get_connection_in()), get_local_port());
- child_set_env(&env, &envsize, "SSH_CONNECTION", buf);
-
- if (s->ttyfd != -1)
- child_set_env(&env, &envsize, "SSH_TTY", s->tty);
- if (s->term)
- child_set_env(&env, &envsize, "TERM", s->term);
- if (s->display)
- child_set_env(&env, &envsize, "DISPLAY", s->display);
- if (original_command)
- child_set_env(&env, &envsize, "SSH_ORIGINAL_COMMAND",
- original_command);
-
-#ifdef _UNICOS
- if (cray_tmpdir[0] != '\0')
- child_set_env(&env, &envsize, "TMPDIR", cray_tmpdir);
-#endif /* _UNICOS */
-
-#ifdef _AIX
- {
- char *cp;
-
- if ((cp = getenv("AUTHSTATE")) != NULL)
- child_set_env(&env, &envsize, "AUTHSTATE", cp);
- if ((cp = getenv("KRB5CCNAME")) != NULL)
- child_set_env(&env, &envsize, "KRB5CCNAME", cp);
- read_environment_file(&env, &envsize, "/etc/environment");
- }
-#endif
-#ifdef KRB4
- if (s->authctxt->krb4_ticket_file)
- child_set_env(&env, &envsize, "KRBTKFILE",
- s->authctxt->krb4_ticket_file);
-#endif
-#ifdef KRB5
- if (s->authctxt->krb5_ticket_file)
- child_set_env(&env, &envsize, "KRB5CCNAME",
- s->authctxt->krb5_ticket_file);
-#endif
-#ifdef USE_PAM
- /*
- * Pull in any environment variables that may have
- * been set by PAM.
- */
- {
- char **p;
-
- p = fetch_pam_environment(s->authctxt);
- copy_environment(p, &env, &envsize);
- free_pam_environment(p);
- }
-#endif /* USE_PAM */
-
- if (auth_sock_name != NULL)
- child_set_env(&env, &envsize, SSH_AUTHSOCKET_ENV_NAME,
- auth_sock_name);
-
- /* read $HOME/.ssh/environment. */
- if (options.permit_user_env) {
- snprintf(buf, sizeof buf, "%.200s/.ssh/environment",
- strcmp(pw->pw_dir, "/") ? pw->pw_dir : "");
- read_environment_file(&env, &envsize, buf);
- }
- if (debug_flag) {
- /* dump the environment */
- fprintf(stderr, gettext("Environment:\n"));
- for (i = 0; env[i]; i++)
- fprintf(stderr, " %.200s\n", env[i]);
- }
- return env;
-}
-
-/*
- * Run $HOME/.ssh/rc, /etc/ssh/sshrc, or xauth (whichever is found
- * first in this order).
- */
-static void
-do_rc_files(Session *s, const char *shell)
-{
- FILE *f = NULL;
- char cmd[1024];
- int do_xauth;
- struct stat st;
-
- do_xauth =
- s->display != NULL && s->auth_proto != NULL && s->auth_data != NULL;
-
- /* ignore _PATH_SSH_USER_RC for subsystems */
- if (!s->is_subsystem && (stat(_PATH_SSH_USER_RC, &st) >= 0)) {
- snprintf(cmd, sizeof cmd, "%s -c '%s %s'",
- shell, _PATH_BSHELL, _PATH_SSH_USER_RC);
- if (debug_flag)
- fprintf(stderr, "Running %s\n", cmd);
- f = popen(cmd, "w");
- if (f) {
- if (do_xauth)
- fprintf(f, "%s %s\n", s->auth_proto,
- s->auth_data);
- pclose(f);
- } else
- fprintf(stderr, "Could not run %s\n",
- _PATH_SSH_USER_RC);
- } else if (stat(_PATH_SSH_SYSTEM_RC, &st) >= 0) {
- if (debug_flag)
- fprintf(stderr, "Running %s %s\n", _PATH_BSHELL,
- _PATH_SSH_SYSTEM_RC);
- f = popen(_PATH_BSHELL " " _PATH_SSH_SYSTEM_RC, "w");
- if (f) {
- if (do_xauth)
- fprintf(f, "%s %s\n", s->auth_proto,
- s->auth_data);
- pclose(f);
- } else
- fprintf(stderr, "Could not run %s\n",
- _PATH_SSH_SYSTEM_RC);
- } else if (do_xauth && options.xauth_location != NULL) {
- /* Add authority data to .Xauthority if appropriate. */
- if (debug_flag) {
- fprintf(stderr,
- "Running %.500s add "
- "%.100s %.100s %.100s\n",
- options.xauth_location, s->auth_display,
- s->auth_proto, s->auth_data);
- }
- snprintf(cmd, sizeof cmd, "%s -q -",
- options.xauth_location);
- f = popen(cmd, "w");
- if (f) {
- fprintf(f, "add %s %s %s\n",
- s->auth_display, s->auth_proto,
- s->auth_data);
- pclose(f);
- } else {
- fprintf(stderr, "Could not run %s\n",
- cmd);
- }
- }
-}
-
-/* Disallow logins if /etc/nologin exists. This does not apply to root. */
-static void
-do_nologin(struct passwd *pw)
-{
- FILE *f = NULL;
- char buf[1024];
- struct stat sb;
-
- if (pw->pw_uid == 0)
- return;
-
- if (stat(_PATH_NOLOGIN, &sb) == -1)
- return;
-
- /* /etc/nologin exists. Print its contents if we can and exit. */
- log("User %.100s not allowed because %s exists.", pw->pw_name,
- _PATH_NOLOGIN);
- if ((f = fopen(_PATH_NOLOGIN, "r")) != NULL) {
- while (fgets(buf, sizeof(buf), f))
- fputs(buf, stderr);
- fclose(f);
- }
- exit(254);
-}
-
-/* Chroot into ChrootDirectory if the option is set. */
-void
-chroot_if_needed(struct passwd *pw)
-{
- char *chroot_path, *tmp;
-
- if (chroot_requested(options.chroot_directory)) {
- tmp = tilde_expand_filename(options.chroot_directory,
- pw->pw_uid);
- chroot_path = percent_expand(tmp, "h", pw->pw_dir,
- "u", pw->pw_name, (char *)NULL);
- safely_chroot(chroot_path, pw->pw_uid);
- free(tmp);
- free(chroot_path);
- }
-}
-
-/*
- * Chroot into a directory after checking it for safety: all path components
- * must be root-owned directories with strict permissions.
- */
-static void
-safely_chroot(const char *path, uid_t uid)
-{
- const char *cp;
- char component[MAXPATHLEN];
- struct stat st;
-
- if (*path != '/')
- fatal("chroot path does not begin at root");
- if (strlen(path) >= sizeof(component))
- fatal("chroot path too long");
-
- /*
- * Descend the path, checking that each component is a
- * root-owned directory with strict permissions.
- */
- for (cp = path; cp != NULL;) {
- if ((cp = strchr(cp, '/')) == NULL)
- strlcpy(component, path, sizeof(component));
- else {
- cp++;
- memcpy(component, path, cp - path);
- component[cp - path] = '\0';
- }
-
- debug3("%s: checking '%s'", __func__, component);
-
- if (stat(component, &st) != 0)
- fatal("%s: stat(\"%s\"): %s", __func__,
- component, strerror(errno));
- if (st.st_uid != 0 || (st.st_mode & 022) != 0)
- fatal("bad ownership or modes for chroot "
- "directory %s\"%s\"",
- cp == NULL ? "" : "component ", component);
- if (!S_ISDIR(st.st_mode))
- fatal("chroot path %s\"%s\" is not a directory",
- cp == NULL ? "" : "component ", component);
- }
-
- if (chdir(path) == -1)
- fatal("Unable to chdir to chroot path \"%s\": "
- "%s", path, strerror(errno));
- if (chroot(path) == -1)
- fatal("chroot(\"%s\"): %s", path, strerror(errno));
- if (chdir("/") == -1)
- fatal("%s: chdir(/) after chroot: %s",
- __func__, strerror(errno));
- verbose("Changed root directory to \"%s\"", path);
-}
-
-static void
-launch_login(struct passwd *pw, const char *hostname)
-{
- /* Launch login(1). */
-
- execl(LOGIN_PROGRAM, "login", "-h", hostname,
-#ifdef xxxLOGIN_NEEDS_TERM
- (s->term ? s->term : "unknown"),
-#endif /* LOGIN_NEEDS_TERM */
-#ifdef LOGIN_NO_ENDOPT
- "-p", "-f", pw->pw_name, (char *)NULL);
-#else
- "-p", "-f", "--", pw->pw_name, (char *)NULL);
-#endif
-
- /* Login couldn't be executed, die. */
-
- perror("login");
- exit(1);
-}
-
-/*
- * Performs common processing for the child, such as setting up the
- * environment, closing extra file descriptors, setting the user and group
- * ids, and executing the command or shell.
- */
-#define ARGV_MAX 10
-void
-do_child(Session *s, const char *command)
-{
- extern char **environ;
- char **env;
- char *argv[ARGV_MAX];
- const char *shell, *shell0;
- struct passwd *pw = s->pw;
-
- /* remove hostkey from the child's memory */
- destroy_sensitive_data();
-
- do_nologin(pw);
- chroot_if_needed(pw);
-
- /*
- * Get the shell from the password data. An empty shell field is
- * legal, and means /bin/sh.
- */
- shell = (pw->pw_shell[0] == '\0') ? _PATH_BSHELL : pw->pw_shell;
-#ifdef HAVE_LOGIN_CAP
- shell = login_getcapstr(lc, "shell", (char *)shell, (char *)shell);
-#endif
-
- env = do_setup_env(s, shell);
-
- /*
- * Close the connection descriptors; note that this is the child, and
- * the server will still have the socket open, and it is important
- * that we do not shutdown it. Note that the descriptors cannot be
- * closed before building the environment, as we call
- * get_remote_ipaddr there.
- */
- if (packet_get_connection_in() == packet_get_connection_out())
- close(packet_get_connection_in());
- else {
- close(packet_get_connection_in());
- close(packet_get_connection_out());
- }
- /*
- * Close all descriptors related to channels. They will still remain
- * open in the parent.
- */
- /* XXX better use close-on-exec? -markus */
- channel_close_all();
-
- /*
- * Close any extra file descriptors. Note that there may still be
- * descriptors left by system functions. They will be closed later.
- */
- endpwent();
-
- /*
- * Must switch to the new environment variables so that .ssh/rc,
- * /etc/ssh/sshrc, and xauth are run in the proper environment.
- */
- environ = env;
-
- /*
- * New environment has been installed. We need to update locale
- * so that error messages beyond this point have the proper
- * character encoding.
- */
- (void) setlocale(LC_ALL, "");
-
- /*
- * Close any extra open file descriptors so that we don\'t have them
- * hanging around in clients. Note that we want to do this after
- * initgroups, because at least on Solaris 2.3 it leaves file
- * descriptors open.
- */
- closefrom(STDERR_FILENO + 1);
-
-#ifdef AFS
- /* Try to get AFS tokens for the local cell. */
- if (k_hasafs()) {
- char cell[64];
-
- if (k_afs_cell_of_file(pw->pw_dir, cell, sizeof(cell)) == 0)
- krb_afslog(cell, 0);
-
- krb_afslog(0, 0);
- }
-#endif /* AFS */
-
- /* Change current directory to the user's home directory. */
- if (chdir(pw->pw_dir) < 0) {
- /* Suppress missing homedir warning for chroot case */
- if (!chroot_requested(options.chroot_directory))
- fprintf(stderr, "Could not chdir to home "
- "directory %s: %s\n", pw->pw_dir,
- strerror(errno));
- }
-
- do_rc_files(s, shell);
-
- /* restore SIGPIPE for child */
- signal(SIGPIPE, SIG_DFL);
-
- if (s->is_subsystem == SUBSYSTEM_INT_SFTP) {
- int i;
- char *p, *args;
- extern int optind, optreset;
-
- /* This will set the E/P sets here, simulating exec(2). */
- drop_privs(pw->pw_uid);
-
- setproctitle("%s@internal-sftp-server", s->pw->pw_name);
- args = xstrdup(command ? command : "sftp-server");
-
- i = 0;
- for ((p = strtok(args, " ")); p != NULL; (p = strtok(NULL, " "))) {
- if (i < ARGV_MAX - 1)
- argv[i++] = p;
- }
-
- argv[i] = NULL;
- optind = optreset = 1;
- __progname = argv[0];
- exit(sftp_server_main(i, argv, s->pw));
- }
-
- /* Get the last component of the shell name. */
- if ((shell0 = strrchr(shell, '/')) != NULL)
- shell0++;
- else
- shell0 = shell;
-
- /*
- * If we have no command, execute the shell. In this case, the shell
- * name to be passed in argv[0] is preceded by '-' to indicate that
- * this is a login shell.
- */
- if (!command) {
- char argv0[256];
-
- /* Start the shell. Set initial character to '-'. */
- argv0[0] = '-';
-
- if (strlcpy(argv0 + 1, shell0, sizeof(argv0) - 1)
- >= sizeof(argv0) - 1) {
- errno = EINVAL;
- perror(shell);
- exit(1);
- }
-
- /* Execute the shell. */
- argv[0] = argv0;
- argv[1] = NULL;
- execve(shell, argv, env);
-
- /* Executing the shell failed. */
- perror(shell);
- exit(1);
- }
- /*
- * Execute the command using the user's shell. This uses the -c
- * option to execute the command.
- */
- argv[0] = (char *) shell0;
- argv[1] = "-c";
- argv[2] = (char *) command;
- argv[3] = NULL;
- execve(shell, argv, env);
- perror(shell);
- exit(1);
-}
-
-Session *
-session_new(void)
-{
- int i;
- static int did_init = 0;
- if (!did_init) {
- debug("session_new: init");
- for (i = 0; i < MAX_SESSIONS; i++) {
- sessions[i].used = 0;
- }
- did_init = 1;
- }
- for (i = 0; i < MAX_SESSIONS; i++) {
- Session *s = &sessions[i];
- if (! s->used) {
- memset(s, 0, sizeof(*s));
- s->chanid = -1;
- s->ptyfd = -1;
- s->ttyfd = -1;
- s->used = 1;
- s->self = i;
- s->env = NULL;
- debug("session_new: session %d", i);
- return s;
- }
- }
- return NULL;
-}
-
-static void
-session_dump(void)
-{
- int i;
- for (i = 0; i < MAX_SESSIONS; i++) {
- Session *s = &sessions[i];
- debug("dump: used %d session %d %p channel %d pid %ld",
- s->used,
- s->self,
- s,
- s->chanid,
- (long)s->pid);
- }
-}
-
-int
-session_open(Authctxt *authctxt, int chanid)
-{
- Session *s = session_new();
- debug("session_open: channel %d", chanid);
- if (s == NULL) {
- error("no more sessions");
- return 0;
- }
- s->authctxt = authctxt;
- s->pw = authctxt->pw;
- if (s->pw == NULL)
- fatal("no user for session %d", s->self);
- debug("session_open: session %d: link with channel %d", s->self, chanid);
- s->chanid = chanid;
- return 1;
-}
-
-#ifndef lint
-Session *
-session_by_tty(char *tty)
-{
- int i;
- for (i = 0; i < MAX_SESSIONS; i++) {
- Session *s = &sessions[i];
- if (s->used && s->ttyfd != -1 && strcmp(s->tty, tty) == 0) {
- debug("session_by_tty: session %d tty %s", i, tty);
- return s;
- }
- }
- debug("session_by_tty: unknown tty %.100s", tty);
- session_dump();
- return NULL;
-}
-#endif /* lint */
-
-static Session *
-session_by_channel(int id)
-{
- int i;
- for (i = 0; i < MAX_SESSIONS; i++) {
- Session *s = &sessions[i];
- if (s->used && s->chanid == id) {
- debug("session_by_channel: session %d channel %d", i, id);
- return s;
- }
- }
- debug("session_by_channel: unknown channel %d", id);
- session_dump();
- return NULL;
-}
-
-static Session *
-session_by_pid(pid_t pid)
-{
- int i;
- debug("session_by_pid: pid %ld", (long)pid);
- for (i = 0; i < MAX_SESSIONS; i++) {
- Session *s = &sessions[i];
- if (s->used && s->pid == pid)
- return s;
- }
- error("session_by_pid: unknown pid %ld", (long)pid);
- session_dump();
- return NULL;
-}
-
-static int
-session_window_change_req(Session *s)
-{
- s->col = packet_get_int();
- s->row = packet_get_int();
- s->xpixel = packet_get_int();
- s->ypixel = packet_get_int();
- packet_check_eom();
- pty_change_window_size(s->ptyfd, s->row, s->col, s->xpixel, s->ypixel);
- return 1;
-}
-
-static int
-session_pty_req(Session *s)
-{
- u_int len;
- int n_bytes;
-
- if (no_pty_flag) {
- debug("Allocating a pty not permitted for this authentication.");
- return 0;
- }
- if (s->ttyfd != -1) {
- packet_disconnect("Protocol error: you already have a pty.");
- return 0;
- }
- /* Get the time and hostname when the user last logged in. */
- if (options.print_lastlog) {
- s->hostname[0] = '\0';
- s->last_login_time = get_last_login_time(s->pw->pw_uid,
- s->pw->pw_name, s->hostname, sizeof(s->hostname));
-
- /*
- * PAM may update the last login date.
- *
- * Ideally PAM would also show the last login date as a
- * PAM_TEXT_INFO conversation message, and then we could just
- * always force the use of keyboard-interactive just so we can
- * pass any such PAM prompts and messages from the account and
- * session stacks, but skip pam_authenticate() if other userauth
- * has succeeded and the user's password isn't expired.
- *
- * Unfortunately this depends on support for keyboard-
- * interactive in the client, and support for lastlog messages
- * in some PAM module.
- *
- * As it is Solaris updates the lastlog in PAM, but does
- * not show the lastlog date in PAM. If and when this state of
- * affairs changes this hack can be reconsidered, and, maybe,
- * removed.
- *
- * So we're stuck with a crude hack: get the lastlog
- * time before calling pam_open_session() and store it
- * in the Authctxt and then use it here once. After
- * that, if the client opens any more pty sessions we'll
- * show the last lastlog entry since userauth.
- */
- if (s->authctxt != NULL && s->authctxt->last_login_time > 0) {
- s->last_login_time = s->authctxt->last_login_time;
- (void) strlcpy(s->hostname,
- s->authctxt->last_login_host,
- sizeof(s->hostname));
- s->authctxt->last_login_time = 0;
- s->authctxt->last_login_host[0] = '\0';
- }
- }
-
- s->term = packet_get_string(&len);
-
- if (compat20) {
- s->col = packet_get_int();
- s->row = packet_get_int();
- } else {
- s->row = packet_get_int();
- s->col = packet_get_int();
- }
- s->xpixel = packet_get_int();
- s->ypixel = packet_get_int();
-
- if (strcmp(s->term, "") == 0) {
- xfree(s->term);
- s->term = NULL;
- }
-
- /* Allocate a pty and open it. */
- debug("Allocating pty.");
- if (!pty_allocate(&s->ptyfd, &s->ttyfd, s->tty, sizeof(s->tty))) {
- if (s->term)
- xfree(s->term);
- s->term = NULL;
- s->ptyfd = -1;
- s->ttyfd = -1;
- error("session_pty_req: session %d alloc failed", s->self);
- return 0;
- }
- debug("session_pty_req: session %d alloc %s", s->self, s->tty);
-
- /* for SSH1 the tty modes length is not given */
- if (!compat20)
- n_bytes = packet_remaining();
- tty_parse_modes(s->ttyfd, &n_bytes);
-
- /*
- * Add a cleanup function to clear the utmp entry and record logout
- * time in case we call fatal() (e.g., the connection gets closed).
- */
- fatal_add_cleanup(session_pty_cleanup, (void *)s);
- pty_setowner(s->pw, s->tty);
-
- /* Set window size from the packet. */
- pty_change_window_size(s->ptyfd, s->row, s->col, s->xpixel, s->ypixel);
-
- packet_check_eom();
- session_proctitle(s);
- return 1;
-}
-
-static int
-session_subsystem_req(Session *s)
-{
- struct stat st;
- u_int len;
- int success = 0;
- char *prog, *cmd, *subsys = packet_get_string(&len);
- u_int i;
-
- packet_check_eom();
- log("subsystem request for %.100s", subsys);
-
- for (i = 0; i < options.num_subsystems; i++) {
- if (strcmp(subsys, options.subsystem_name[i]) == 0) {
- prog = options.subsystem_command[i];
- cmd = options.subsystem_args[i];
- if (strcmp(INTERNAL_SFTP_NAME, prog) == 0) {
- s->is_subsystem = SUBSYSTEM_INT_SFTP;
- /*
- * We must stat(2) the subsystem before we chroot in
- * order to be able to send a proper error message.
- */
- } else if (chroot_requested(options.chroot_directory)) {
- char chdirsub[MAXPATHLEN];
-
- strlcpy(chdirsub, options.chroot_directory,
- sizeof (chdirsub));
- strlcat(chdirsub, "/", sizeof (chdirsub));
- strlcat(chdirsub, prog, sizeof (chdirsub));
- if (stat(chdirsub, &st) < 0) {
- error("subsystem: cannot stat %s under "
- "chroot directory %s: %s", prog,
- options.chroot_directory,
- strerror(errno));
- if (strcmp(subsys, "sftp") == 0)
- error("subsystem: please see "
- "the Subsystem option in "
- "sshd_config(4) for an "
- "explanation of '%s'.",
- INTERNAL_SFTP_NAME);
- break;
- }
- } else if (stat(prog, &st) < 0) {
- error("subsystem: cannot stat %s: %s", prog,
- strerror(errno));
- break;
- } else {
- s->is_subsystem = SUBSYSTEM_EXT;
- }
- debug("subsystem: exec() %s", cmd);
- do_exec(s, cmd);
- success = 1;
- break;
- }
- }
-
- if (!success)
- log("subsystem request for %.100s failed, subsystem not found",
- subsys);
-
- xfree(subsys);
- return success;
-}
-
-/*
- * Serve "x11-req" channel request for X11 forwarding for the current session
- * channel.
- */
-static int
-session_x11_req(Session *s)
-{
- int success, fd;
- char xauthdir[] = "/tmp/ssh-xauth-XXXXXX";
-
- s->single_connection = packet_get_char();
- s->auth_proto = packet_get_string(NULL);
- s->auth_data = packet_get_string(NULL);
- s->screen = packet_get_int();
- packet_check_eom();
-
- success = session_setup_x11fwd(s);
- if (!success) {
- xfree(s->auth_proto);
- xfree(s->auth_data);
- s->auth_proto = NULL;
- s->auth_data = NULL;
- return (success);
- }
-
- /*
- * Create per session X authority file so that different sessions
- * don't contend for one common file. The reason for this is that
- * xauth(1) locking doesn't work too well over network filesystems.
- *
- * If mkdtemp() or open() fails then s->auth_file remains NULL which
- * means that we won't set XAUTHORITY variable in child's environment
- * and xauth(1) will use the default location for the authority file.
- */
- if (mkdtemp(xauthdir) != NULL) {
- s->auth_file = xmalloc(MAXPATHLEN);
- snprintf(s->auth_file, MAXPATHLEN, "%s/xauthfile",
- xauthdir);
- /*
- * we don't want that "creating new authority file" message to
- * be printed by xauth(1) so we must create that file
- * beforehand.
- */
- if ((fd = open(s->auth_file, O_CREAT | O_EXCL | O_RDONLY,
- S_IRUSR | S_IWUSR)) == -1) {
- error("failed to create the temporary X authority "
- "file %s: %.100s; will use the default one",
- s->auth_file, strerror(errno));
- xfree(s->auth_file);
- s->auth_file = NULL;
- if (rmdir(xauthdir) == -1) {
- error("cannot remove xauth directory %s: %.100s",
- xauthdir, strerror(errno));
- }
- } else {
- close(fd);
- debug("temporary X authority file %s created",
- s->auth_file);
-
- /*
- * add a cleanup function to remove the temporary
- * xauth file in case we call fatal() (e.g., the
- * connection gets closed).
- */
- fatal_add_cleanup(session_xauthfile_cleanup, (void *)s);
- }
- }
- else {
- error("failed to create a directory for the temporary X "
- "authority file: %.100s; will use the default xauth file",
- strerror(errno));
- }
-
- return (success);
-}
-
-static int
-session_shell_req(Session *s)
-{
- packet_check_eom();
- do_exec(s, NULL);
- return 1;
-}
-
-static int
-session_exec_req(Session *s)
-{
- u_int len;
- char *command = packet_get_string(&len);
- packet_check_eom();
- do_exec(s, command);
- xfree(command);
- return 1;
-}
-
-static int
-session_auth_agent_req(Session *s)
-{
- static int called = 0;
- packet_check_eom();
- if (no_agent_forwarding_flag) {
- debug("session_auth_agent_req: no_agent_forwarding_flag");
- return 0;
- }
- if (called) {
- return 0;
- } else {
- called = 1;
- return auth_input_request_forwarding(s->pw);
- }
-}
-
-static int
-session_loc_env_check(char *var, char *val)
-{
- char *current;
- int cat, ret;
-
- if (strcmp(var, "LANG") == 0)
- cat = LC_ALL;
- else if (strcmp(var, "LC_ALL") == 0)
- cat = LC_ALL;
- else if (strcmp(var, "LC_CTYPE") == 0)
- cat = LC_CTYPE;
- else if (strcmp(var, "LC_COLLATE") == 0)
- cat = LC_COLLATE;
- else if (strcmp(var, "LC_TIME") == 0)
- cat = LC_TIME;
- else if (strcmp(var, "LC_NUMERIC") == 0)
- cat = LC_NUMERIC;
- else if (strcmp(var, "LC_MONETARY") == 0)
- cat = LC_MONETARY;
- else if (strcmp(var, "LC_MESSAGES") == 0)
- cat = LC_MESSAGES;
-
- current = setlocale(cat, NULL);
-
- ret = (setlocale(cat, val) != NULL);
- (void) setlocale(cat, current);
- return (ret);
-}
-
-static int
-session_env_req(Session *s)
-{
- Channel *c;
- char *var, *val, *e;
- char **p;
- size_t len;
- int ret = 0;
-
- /* Get var/val from the rest of this packet */
- var = packet_get_string(NULL);
- val = packet_get_string(NULL);
-
- /*
- * We'll need the channel ID for the packet_send_debug messages,
- * so get it now.
- */
- if ((c = channel_lookup(s->chanid)) == NULL)
- goto done; /* shouldn't happen! */
-
- debug2("Received request for environment variable %s=%s", var, val);
-
- /* For now allow only LANG and LC_* */
- if (strcmp(var, "LANG") != 0 && strncmp(var, "LC_", 3) != 0) {
- debug2("Rejecting request for environment variable %s", var);
- goto done;
- }
-
- if (!session_loc_env_check(var, val)) {
- packet_send_debug(gettext("Missing locale support for %s=%s"),
- var, val);
- goto done;
- }
-
- packet_send_debug(gettext("Channel %d set: %s=%s"), c->remote_id,
- var, val);
-
- /*
- * Always append new environment variables without regard to old
- * ones being overriden. The way these are actually added to
- * the environment of the session process later settings
- * override earlier ones; see copy_environment().
- */
- if (s->env == NULL) {
- char **env;
-
- env = xmalloc(sizeof (char **) * 2);
- memset(env, 0, sizeof (char **) * 2);
-
- s->env = env;
- p = env;
- } else {
- for (p = s->env; *p != NULL ; p++);
-
- s->env = xrealloc(s->env, (p - s->env + 2) * sizeof (char **));
-
- for (p = s->env; *p != NULL ; p++);
- }
-
- len = snprintf(NULL, 0, "%s=%s", var, val);
- e = xmalloc(len + 1);
- (void) snprintf(e, len + 1, "%s=%s", var, val);
-
- (*p++) = e;
- *p = NULL;
-
- ret = 1;
-
-done:
- xfree(var);
- xfree(val);
-
- return (ret);
-}
-
-static void
-session_free_env(char ***envp)
-{
- char **env, **p;
-
- if (envp == NULL || *envp == NULL)
- return;
-
- env = *envp;
-
- *envp = NULL;
-
- for (p = env; *p != NULL; p++)
- xfree(*p);
-
- xfree(env);
-}
-
-int
-session_input_channel_req(Channel *c, const char *rtype)
-{
- int success = 0;
- Session *s;
-
- if ((s = session_by_channel(c->self)) == NULL) {
- log("session_input_channel_req: no session %d req %.100s",
- c->self, rtype);
- return 0;
- }
- debug("session_input_channel_req: session %d req %s", s->self, rtype);
-
- /*
- * a session is in LARVAL state until a shell, a command
- * or a subsystem is executed
- */
- if (c->type == SSH_CHANNEL_LARVAL) {
- if (strcmp(rtype, "shell") == 0) {
- success = session_shell_req(s);
- } else if (strcmp(rtype, "exec") == 0) {
- success = session_exec_req(s);
- } else if (strcmp(rtype, "pty-req") == 0) {
- success = session_pty_req(s);
- } else if (strcmp(rtype, "x11-req") == 0) {
- success = session_x11_req(s);
- } else if (strcmp(rtype, "auth-agent-req@openssh.com") == 0) {
- success = session_auth_agent_req(s);
- } else if (strcmp(rtype, "subsystem") == 0) {
- success = session_subsystem_req(s);
- } else if (strcmp(rtype, "env") == 0) {
- success = session_env_req(s);
- }
- }
- if (strcmp(rtype, "window-change") == 0) {
- success = session_window_change_req(s);
- }
- return success;
-}
-
-void
-session_set_fds(Session *s, int fdin, int fdout, int fderr)
-{
- if (!compat20)
- fatal("session_set_fds: called for proto != 2.0");
- /*
- * now that have a child and a pipe to the child,
- * we can activate our channel and register the fd's
- */
- if (s->chanid == -1)
- fatal("no channel for session %d", s->self);
- channel_set_fds(s->chanid,
- fdout, fdin, fderr,
- fderr == -1 ? CHAN_EXTENDED_IGNORE : CHAN_EXTENDED_READ,
- 1,
- CHAN_SES_WINDOW_DEFAULT);
-}
-
-/*
- * Function to perform pty cleanup. Also called if we get aborted abnormally
- * (e.g., due to a dropped connection).
- */
-void
-session_pty_cleanup2(void *session)
-{
- Session *s = session;
-
- if (s == NULL) {
- error("session_pty_cleanup: no session");
- return;
- }
- if (s->ttyfd == -1)
- return;
-
- debug("session_pty_cleanup: session %d release %s", s->self, s->tty);
-
-#ifdef USE_PAM
- session_do_pam(s, 0);
-#endif /* USE_PAM */
-
- /* Record that the user has logged out. */
- if (s->pid != 0) {
- debug3("Recording SSHv2 channel logout in utmpx/wtmpx");
-#ifdef ALTPRIVSEP
- altprivsep_record_logout(s->pid);
-#endif /* ALTPRIVSEP */
- }
-
- /* Release the pseudo-tty. */
- if (getuid() == 0)
- pty_release(s->tty);
-
- /*
- * Close the server side of the socket pairs. We must do this after
- * the pty cleanup, so that another process doesn't get this pty
- * while we're still cleaning up.
- */
- if (close(s->ptymaster) < 0)
- error("close(s->ptymaster/%d): %s", s->ptymaster, strerror(errno));
-
- /* unlink pty from session */
- s->ttyfd = -1;
-}
-
-void
-session_pty_cleanup(void *session)
-{
- session_pty_cleanup2(session);
-}
-
-/*
- * We use a different temporary X authority file per every session so we
- * should remove those files when fatal() is called.
- */
-void
-session_xauthfile_cleanup(void *session)
-{
- Session *s = session;
-
- if (s == NULL) {
- error("session_xauthfile_cleanup: no session");
- return;
- }
-
- debug("session_xauthfile_cleanup: session %d removing %s", s->self,
- s->auth_file);
-
- if (unlink(s->auth_file) == -1) {
- error("session_xauthfile_cleanup: cannot remove xauth file: "
- "%.100s", strerror(errno));
- return;
- }
-
- /* dirname() will modify s->auth_file but that's ok */
- if (rmdir(dirname(s->auth_file)) == -1) {
- error("session_xauthfile_cleanup: "
- "cannot remove xauth directory: %.100s", strerror(errno));
- return;
- }
-}
-
-static char *
-sig2name(int sig)
-{
-#define SSH_SIG(x) if (sig == SIG ## x) return #x
- SSH_SIG(ABRT);
- SSH_SIG(ALRM);
- SSH_SIG(FPE);
- SSH_SIG(HUP);
- SSH_SIG(ILL);
- SSH_SIG(INT);
- SSH_SIG(KILL);
- SSH_SIG(PIPE);
- SSH_SIG(QUIT);
- SSH_SIG(SEGV);
- SSH_SIG(TERM);
- SSH_SIG(USR1);
- SSH_SIG(USR2);
-#undef SSH_SIG
- return "SIG@openssh.com";
-}
-
-static void
-session_exit_message(Session *s, int status)
-{
- Channel *c;
-
- if ((c = channel_lookup(s->chanid)) == NULL)
- fatal("session_exit_message: session %d: no channel %d",
- s->self, s->chanid);
- debug("session_exit_message: session %d channel %d pid %ld",
- s->self, s->chanid, (long)s->pid);
-
- if (WIFEXITED(status)) {
- channel_request_start(s->chanid, "exit-status", 0);
- packet_put_int(WEXITSTATUS(status));
- packet_send();
- } else if (WIFSIGNALED(status)) {
- channel_request_start(s->chanid, "exit-signal", 0);
- packet_put_cstring(sig2name(WTERMSIG(status)));
-#ifdef WCOREDUMP
- packet_put_char(WCOREDUMP(status));
-#else /* WCOREDUMP */
- packet_put_char(0);
-#endif /* WCOREDUMP */
- packet_put_cstring("");
- packet_put_cstring("");
- packet_send();
- } else {
- /* Some weird exit cause. Just exit. */
- packet_disconnect("wait returned status %04x.", status);
- }
-
- /* Ok to close channel now */
- channel_set_wait_for_exit(s->chanid, 0);
-
- /* disconnect channel */
- debug("session_exit_message: release channel %d", s->chanid);
- channel_cancel_cleanup(s->chanid);
- /*
- * emulate a write failure with 'chan_write_failed', nobody will be
- * interested in data we write.
- * Note that we must not call 'chan_read_failed', since there could
- * be some more data waiting in the pipe.
- */
- if (c->ostate != CHAN_OUTPUT_CLOSED)
- chan_write_failed(c);
- s->chanid = -1;
-}
-
-void
-session_close(Session *s)
-{
- debug("session_close: session %d pid %ld", s->self, (long)s->pid);
- if (s->ttyfd != -1) {
- fatal_remove_cleanup(session_pty_cleanup, (void *)s);
- session_pty_cleanup(s);
- }
- if (s->auth_file != NULL) {
- fatal_remove_cleanup(session_xauthfile_cleanup, (void *)s);
- session_xauthfile_cleanup(s);
- xfree(s->auth_file);
- }
- if (s->term)
- xfree(s->term);
- if (s->display)
- xfree(s->display);
- if (s->auth_display)
- xfree(s->auth_display);
- if (s->auth_data)
- xfree(s->auth_data);
- if (s->auth_proto)
- xfree(s->auth_proto);
- if (s->command)
- xfree(s->command);
- session_free_env(&s->env);
- s->used = 0;
- session_proctitle(s);
-}
-
-void
-session_close_by_pid(pid_t pid, int status)
-{
- Session *s = session_by_pid(pid);
- if (s == NULL) {
- debug("session_close_by_pid: no session for pid %ld",
- (long)pid);
- return;
- }
- if (s->chanid != -1)
- session_exit_message(s, status);
- session_close(s);
-}
-
-/*
- * This is called when a channel dies before the session 'child' itself dies.
- * It can happen for example if we exit from an interactive shell before we
- * exit from forwarded X11 applications.
- */
-void
-session_close_by_channel(int id, void *arg)
-{
- Session *s = session_by_channel(id);
- if (s == NULL) {
- debug("session_close_by_channel: no session for id %d", id);
- return;
- }
- debug("session_close_by_channel: channel %d child %ld",
- id, (long)s->pid);
- if (s->pid != 0) {
- debug("session_close_by_channel: channel %d: has child", id);
- /*
- * delay detach of session, but release pty, since
- * the fd's to the child are already closed
- */
- if (s->ttyfd != -1) {
- fatal_remove_cleanup(session_pty_cleanup, (void *)s);
- session_pty_cleanup(s);
- }
- return;
- }
- /* detach by removing callback */
- channel_cancel_cleanup(s->chanid);
- s->chanid = -1;
- session_close(s);
-}
-
-void
-session_destroy_all(void (*closefunc)(Session *))
-{
- int i;
- for (i = 0; i < MAX_SESSIONS; i++) {
- Session *s = &sessions[i];
- if (s->used) {
- if (closefunc != NULL)
- closefunc(s);
- else
- session_close(s);
- }
- }
-}
-
-static char *
-session_tty_list(void)
-{
- static char buf[1024];
- int i;
- buf[0] = '\0';
- for (i = 0; i < MAX_SESSIONS; i++) {
- Session *s = &sessions[i];
- if (s->used && s->ttyfd != -1) {
- if (buf[0] != '\0')
- strlcat(buf, ",", sizeof buf);
- strlcat(buf, strrchr(s->tty, '/') + 1, sizeof buf);
- }
- }
- if (buf[0] == '\0')
- strlcpy(buf, "notty", sizeof buf);
- return buf;
-}
-
-void
-session_proctitle(Session *s)
-{
- if (s->pw == NULL)
- error("no user for session %d", s->self);
- else
- setproctitle("%s@%s", s->pw->pw_name, session_tty_list());
-}
-
-int
-session_setup_x11fwd(Session *s)
-{
- struct stat st;
- char display[512], auth_display[512];
- char hostname[MAXHOSTNAMELEN];
-
- if (no_x11_forwarding_flag) {
- packet_send_debug("X11 forwarding disabled in user configuration file.");
- return 0;
- }
- if (!options.x11_forwarding) {
- debug("X11 forwarding disabled in server configuration file.");
- return 0;
- }
- if (!options.xauth_location ||
- (stat(options.xauth_location, &st) == -1)) {
- packet_send_debug("No xauth program; cannot forward with spoofing.");
- return 0;
- }
- if (s->display != NULL) {
- debug("X11 display already set.");
- return 0;
- }
- if (x11_create_display_inet(options.x11_display_offset,
- options.x11_use_localhost, s->single_connection,
- &s->display_number) == -1) {
- debug("x11_create_display_inet failed.");
- return 0;
- }
-
- /* Set up a suitable value for the DISPLAY variable. */
- if (gethostname(hostname, sizeof(hostname)) < 0)
- fatal("gethostname: %.100s", strerror(errno));
- /*
- * auth_display must be used as the displayname when the
- * authorization entry is added with xauth(1). This will be
- * different than the DISPLAY string for localhost displays.
- */
- if (options.x11_use_localhost) {
- snprintf(display, sizeof display, "localhost:%u.%u",
- s->display_number, s->screen);
- snprintf(auth_display, sizeof auth_display, "unix:%u.%u",
- s->display_number, s->screen);
- s->display = xstrdup(display);
- s->auth_display = xstrdup(auth_display);
- } else {
-#ifdef IPADDR_IN_DISPLAY
- struct hostent *he;
- struct in_addr my_addr;
-
- he = gethostbyname(hostname);
- if (he == NULL) {
- error("Can't get IP address for X11 DISPLAY.");
- packet_send_debug("Can't get IP address for X11 DISPLAY.");
- return 0;
- }
- memcpy(&my_addr, he->h_addr_list[0], sizeof(struct in_addr));
- snprintf(display, sizeof display, "%.50s:%u.%u", inet_ntoa(my_addr),
- s->display_number, s->screen);
-#else
- snprintf(display, sizeof display, "%.400s:%u.%u", hostname,
- s->display_number, s->screen);
-#endif
- s->display = xstrdup(display);
- s->auth_display = xstrdup(display);
- }
-
- return 1;
-}
-
-#ifdef USE_PAM
-int session_do_pam_conv(int, struct pam_message **,
- struct pam_response **, void *);
-
-static struct pam_conv session_pam_conv = {
- session_do_pam_conv,
- NULL
-};
-
-static void
-session_do_pam(Session *s, int do_open)
-{
- int pam_retval;
- char *where, *old_tty, *old_tty_copy = NULL;
- struct pam_conv old_conv, *old_conv_ptr;
-
- if (!s || !s->authctxt || !s->authctxt->pam || !s->authctxt->pam->h)
- return;
-
- /* Save current PAM item values */
- where = "getting PAM_CONV";
- pam_retval = pam_get_item(s->authctxt->pam->h, PAM_CONV,
- (void **) &old_conv_ptr);
- if (pam_retval != PAM_SUCCESS)
- goto done;
- old_conv = *old_conv_ptr;
-
- where = "getting PAM_TTY";
- pam_retval = pam_get_item(s->authctxt->pam->h, PAM_TTY,
- (void **) &old_tty);
- if (pam_retval != PAM_SUCCESS)
- goto done;
- old_tty_copy = xstrdup(old_tty);
-
- /* Change PAM_TTY and PAM_CONV items */
- where = "setting PAM_TTY";
- pam_retval = pam_set_item(s->authctxt->pam->h, PAM_TTY, s->tty);
- if (pam_retval != PAM_SUCCESS)
- goto done;
-
- where = "setting PAM_CONV";
- session_pam_conv.appdata_ptr = s;
- pam_retval = pam_set_item(s->authctxt->pam->h,
- PAM_CONV, &session_pam_conv);
- if (pam_retval != PAM_SUCCESS)
- goto done;
-
- /* Call pam_open/close_session() */
- if (do_open) {
- where = "calling pam_open_session()";
- pam_retval = pam_open_session(s->authctxt->pam->h, 0);
- }
- else {
- where = "calling pam_close_session()";
- pam_retval = pam_close_session(s->authctxt->pam->h, 0);
- }
-
- /* Reset PAM_TTY and PAM_CONV items to previous values */
- where = "setting PAM_TTY";
- pam_retval = pam_set_item(s->authctxt->pam->h, PAM_TTY, old_tty_copy);
- if (pam_retval != PAM_SUCCESS)
- goto done;
-
- where = "setting PAM_CONV";
- pam_retval = pam_set_item(s->authctxt->pam->h, PAM_CONV, &old_conv);
- if (pam_retval != PAM_SUCCESS)
- goto done;
-
- session_pam_conv.appdata_ptr = NULL;
-
-done:
- if (old_tty_copy)
- xfree(old_tty_copy);
-
- if (pam_retval == PAM_SUCCESS)
- return;
-
- /* fatal()? probably not... */
- log("PAM failed[%d] while %s: %s", pam_retval, where,
- PAM_STRERROR(s->authctxt->pam->h, pam_retval));
-}
-
-int
-session_do_pam_conv(int num_prompts,
- struct pam_message **prompts,
- struct pam_response **resp,
- void *app_data)
-{
- Session *s = (Session *) app_data;
-
- struct pam_response *reply;
- int count;
- char *prompt;
-
- if (channel_lookup(s->chanid) == NULL)
- return PAM_CONV_ERR;
-
- /* PAM will free this later */
- reply = xmalloc(num_prompts * sizeof(*reply));
-
- (void) memset(reply, 0, num_prompts * sizeof(*reply));
- for (count = 0; count < num_prompts; count++) {
- switch(PAM_MSG_MEMBER(prompts, count, msg_style)) {
- case PAM_TEXT_INFO:
- /* Write to stdout of channel */
- prompt = PAM_MSG_MEMBER(prompts, count, msg);
- if (prompt != NULL && s->ttyfd != -1) {
- debug2("session_do_pam_conv: text info "
- "prompt: %s", prompt);
- (void) write(s->ttyfd, prompt, strlen(prompt));
- (void) write(s->ttyfd, "\n", 1);
- }
- reply[count].resp = xstrdup("");
- reply[count].resp_retcode = PAM_SUCCESS;
- break;
- case PAM_ERROR_MSG:
- /* Write to stderr of channel */
- prompt = PAM_MSG_MEMBER(prompts, count, msg);
- if (prompt != NULL && s->ttyfd != -1) {
- debug2("session_do_pam_conv: error "
- "prompt: %s", prompt);
- (void) write(s->ttyfd, prompt, strlen(prompt));
- (void) write(s->ttyfd, "\n", 1);
- }
- reply[count].resp = xstrdup("");
- reply[count].resp_retcode = PAM_SUCCESS;
- break;
- case PAM_PROMPT_ECHO_ON:
- case PAM_PROMPT_ECHO_OFF:
- /*
- * XXX Someday add support for echo on/off prompts
- * here on sessions with ttys.
- */
- default:
- xfree(reply);
- return PAM_CONV_ERR;
- }
- }
-
- *resp = reply;
-
- return PAM_SUCCESS;
-}
-#endif /* USE_PAM */
-
-static void
-do_authenticated2(Authctxt *authctxt)
-{
- server_loop2(authctxt);
-}
-
-/*
- * Drop the privileges. We need this for the in-process SFTP server only. For
- * the shell and the external subsystem the exec(2) call will do the P = E = I
- * assignment itself. Never change the privileges if the connecting user is
- * root. See privileges(5) if the terminology used here is not known to you.
- */
-static void
-drop_privs(uid_t uid)
-{
- priv_set_t *priv_inherit;
-
- /* If root is connecting we are done. */
- if (uid == 0)
- return;
-
- if ((priv_inherit = priv_allocset()) == NULL)
- fatal("priv_allocset: %s", strerror(errno));
- if (getppriv(PRIV_INHERITABLE, priv_inherit) != 0)
- fatal("getppriv: %s", strerror(errno));
-
- /*
- * This will limit E as well. Note that before this P was a
- * superset of I, see permanently_set_uid().
- */
- if (setppriv(PRIV_SET, PRIV_PERMITTED, priv_inherit) == -1)
- fatal("setppriv: %s", strerror(errno));
-
- priv_freeset(priv_inherit);
-
- /*
- * By manipulating the P set above we entered a PA mode which we
- * do not need to retain in.
- */
- if (setpflags(PRIV_AWARE, 0) == -1)
- fatal("setpflags: %s", strerror(errno));
-}
diff --git a/usr/src/cmd/ssh/sshd/sshd.c b/usr/src/cmd/ssh/sshd/sshd.c
deleted file mode 100644
index 3be0890a8c..0000000000
--- a/usr/src/cmd/ssh/sshd/sshd.c
+++ /dev/null
@@ -1,2051 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * This program is the ssh daemon. It listens for connections from clients,
- * and performs authentication, executes use commands or shell, and forwards
- * information to/from the application to the user client over an encrypted
- * connection. This can also handle forwarding of X11, TCP/IP, and
- * authentication agent connections.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- *
- * SSH2 implementation:
- * Privilege Separation:
- *
- * Copyright (c) 2000, 2001, 2002 Markus Friedl. All rights reserved.
- * Copyright (c) 2002 Niels Provos. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: sshd.c,v 1.260 2002/09/27 10:42:09 mickey Exp $");
-
-#include <openssl/dh.h>
-#include <openssl/bn.h>
-#include <openssl/md5.h>
-
-#include <openssl/rand.h>
-
-#include "ssh.h"
-#include "ssh1.h"
-#include "ssh2.h"
-#include "xmalloc.h"
-#include "rsa.h"
-#include "sshpty.h"
-#include "packet.h"
-#include "mpaux.h"
-#include "log.h"
-#include "servconf.h"
-#include "uidswap.h"
-#include "compat.h"
-#include "buffer.h"
-#include "cipher.h"
-#include "kex.h"
-#include "key.h"
-#include "dh.h"
-#include "myproposal.h"
-#include "authfile.h"
-#include "pathnames.h"
-#include "atomicio.h"
-#include "canohost.h"
-#include "auth.h"
-#include "misc.h"
-#include "dispatch.h"
-#include "channels.h"
-#include "session.h"
-#include "g11n.h"
-#include "sshlogin.h"
-#include "xlist.h"
-#include "engine.h"
-
-#ifdef HAVE_BSM
-#include "bsmaudit.h"
-#endif /* HAVE_BSM */
-
-#ifdef ALTPRIVSEP
-#include "altprivsep.h"
-#endif /* ALTPRIVSEP */
-
-#ifdef HAVE_SOLARIS_CONTRACTS
-#include <sys/ctfs.h>
-#include <sys/contract.h>
-#include <sys/contract/process.h>
-#include <libcontract.h>
-#endif /* HAVE_SOLARIS_CONTRACTS */
-
-#ifdef GSSAPI
-#include "ssh-gss.h"
-#endif /* GSSAPI */
-
-#ifdef LIBWRAP
-#include <tcpd.h>
-#include <syslog.h>
-#ifndef lint
-int allow_severity = LOG_INFO;
-int deny_severity = LOG_WARNING;
-#endif /* lint */
-#endif /* LIBWRAP */
-
-#ifndef O_NOCTTY
-#define O_NOCTTY 0
-#endif
-
-#ifdef HAVE___PROGNAME
-extern char *__progname;
-#else
-char *__progname;
-#endif
-
-/* Server configuration options. */
-ServerOptions options;
-
-/* Name of the server configuration file. */
-static char *config_file_name = _PATH_SERVER_CONFIG_FILE;
-
-/*
- * Flag indicating whether IPv4 or IPv6. This can be set on the command line.
- * Default value is AF_UNSPEC means both IPv4 and IPv6.
- */
-#ifdef IPV4_DEFAULT
-int IPv4or6 = AF_INET;
-#else
-int IPv4or6 = AF_UNSPEC;
-#endif
-
-/*
- * Debug mode flag. This can be set on the command line. If debug
- * mode is enabled, extra debugging output will be sent to the system
- * log, the daemon will not go to background, and will exit after processing
- * the first connection.
- */
-int debug_flag = 0;
-
-/* Flag indicating that the daemon should only test the configuration and keys. */
-static int test_flag = 0;
-
-/* Flag indicating that the daemon is being started from inetd. */
-static int inetd_flag = 0;
-
-/* Flag indicating that sshd should not detach and become a daemon. */
-static int no_daemon_flag = 0;
-
-/* debug goes to stderr unless inetd_flag is set */
-int log_stderr = 0;
-
-/* Saved arguments to main(). */
-static char **saved_argv;
-static int saved_argc;
-
-/*
- * The sockets that the server is listening; this is used in the SIGHUP
- * signal handler.
- */
-#define MAX_LISTEN_SOCKS 16
-static int listen_socks[MAX_LISTEN_SOCKS];
-static int num_listen_socks = 0;
-
-/*
- * the client's version string, passed by sshd2 in compat mode. if != NULL,
- * sshd will skip the version-number exchange
- */
-static char *client_version_string = NULL;
-static char *server_version_string = NULL;
-
-/* for rekeying XXX fixme */
-Kex *xxx_kex;
-
-/*
- * Any really sensitive data in the application is contained in this
- * structure. The idea is that this structure could be locked into memory so
- * that the pages do not get written into swap. However, there are some
- * problems. The private key contains BIGNUMs, and we do not (in principle)
- * have access to the internals of them, and locking just the structure is
- * not very useful. Currently, memory locking is not implemented.
- */
-static struct {
- Key *server_key; /* ephemeral server key */
- Key *ssh1_host_key; /* ssh1 host key */
- Key **host_keys; /* all private host keys */
- int have_ssh1_key;
- int have_ssh2_key;
- u_char ssh1_cookie[SSH_SESSION_KEY_LENGTH];
-} sensitive_data;
-
-/*
- * Flag indicating whether the RSA server key needs to be regenerated.
- * Is set in the SIGALRM handler and cleared when the key is regenerated.
- */
-static volatile sig_atomic_t key_do_regen = 0;
-
-/* This is set to true when a signal is received. */
-static volatile sig_atomic_t received_sighup = 0;
-static volatile sig_atomic_t received_sigterm = 0;
-
-/* session identifier, used by RSA-auth */
-u_char session_id[16];
-
-/* same for ssh2 */
-u_char *session_id2 = NULL;
-int session_id2_len = 0;
-
-/* record remote hostname or ip */
-u_int utmp_len = MAXHOSTNAMELEN;
-
-/* options.max_startup sized array of fd ints */
-static int *startup_pipes = NULL;
-static int startup_pipe = -1; /* in child */
-
-/* sshd_config buffer */
-Buffer cfg;
-
-#ifdef GSSAPI
-static gss_OID_set mechs = GSS_C_NULL_OID_SET;
-#endif /* GSSAPI */
-
-/* Prototypes for various functions defined later in this file. */
-void destroy_sensitive_data(void);
-static void demote_sensitive_data(void);
-
-static void do_ssh1_kex(void);
-static void do_ssh2_kex(void);
-
-/*
- * Close all listening sockets
- */
-static void
-close_listen_socks(void)
-{
- int i;
-
- for (i = 0; i < num_listen_socks; i++)
- (void) close(listen_socks[i]);
- num_listen_socks = -1;
-}
-
-static void
-close_startup_pipes(void)
-{
- int i;
-
- if (startup_pipes)
- for (i = 0; i < options.max_startups; i++)
- if (startup_pipes[i] != -1)
- (void) close(startup_pipes[i]);
-}
-
-/*
- * Signal handler for SIGHUP. Sshd execs itself when it receives SIGHUP;
- * the effect is to reread the configuration file (and to regenerate
- * the server key).
- */
-static void
-sighup_handler(int sig)
-{
- int save_errno = errno;
-
- received_sighup = 1;
- (void) signal(SIGHUP, sighup_handler);
- errno = save_errno;
-}
-
-/*
- * Called from the main program after receiving SIGHUP.
- * Restarts the server.
- */
-static void
-sighup_restart(void)
-{
- log("Received SIGHUP; restarting.");
- close_listen_socks();
- close_startup_pipes();
- (void) execv(saved_argv[0], saved_argv);
- log("RESTART FAILED: av[0]='%.100s', error: %.100s.", saved_argv[0],
- strerror(errno));
- exit(1);
-}
-
-/*
- * Generic signal handler for terminating signals in the master daemon.
- */
-static void
-sigterm_handler(int sig)
-{
- received_sigterm = sig;
-}
-
-/*
- * SIGCHLD handler. This is called whenever a child dies. This will then
- * reap any zombies left by exited children.
- */
-static void
-main_sigchld_handler(int sig)
-{
- int save_errno = errno;
- pid_t pid;
- int status;
-
- while ((pid = waitpid(-1, &status, WNOHANG)) > 0 ||
- (pid < 0 && errno == EINTR))
- ;
-
- (void) signal(SIGCHLD, main_sigchld_handler);
- errno = save_errno;
-}
-
-/*
- * Signal handler for the alarm after the login grace period has expired. This
- * is for the (soon-to-be) unprivileged child only. The monitor gets an event on
- * the communication pipe and exits as well.
- */
-static void
-grace_alarm_handler(int sig)
-{
- /* Log error and exit. */
- fatal("Timeout before authentication for %.200s", get_remote_ipaddr());
-}
-
-#ifdef HAVE_SOLARIS_CONTRACTS
-static int contracts_fd = -1;
-void
-contracts_pre_fork()
-{
- const char *during = "opening process contract template";
-
- /*
- * Failure should not be treated as fatal on the theory that
- * it's better to start with children in the same contract as
- * the master listener than not at all.
- */
-
- if (contracts_fd == -1) {
- if ((contracts_fd = open64(CTFS_ROOT "/process/template",
- O_RDWR)) == -1)
- goto cleanup;
-
- during = "setting sundry contract terms";
- if ((errno = ct_pr_tmpl_set_param(contracts_fd, CT_PR_PGRPONLY)))
- goto cleanup;
-
- if ((errno = ct_tmpl_set_informative(contracts_fd, CT_PR_EV_HWERR)))
- goto cleanup;
-
- if ((errno = ct_pr_tmpl_set_fatal(contracts_fd, CT_PR_EV_HWERR)))
- goto cleanup;
-
- if ((errno = ct_tmpl_set_critical(contracts_fd, 0)))
- goto cleanup;
- }
-
- during = "setting active template";
- if ((errno = ct_tmpl_activate(contracts_fd)))
- goto cleanup;
-
- debug3("Set active contract");
- return;
-
-cleanup:
- if (contracts_fd != -1)
- (void) close(contracts_fd);
-
- contracts_fd = -1;
-
- if (errno)
- debug2("Error while trying to set up active contract"
- " template: %s while %s", strerror(errno), during);
-}
-
-void
-contracts_post_fork_child()
-{
- /* Clear active template so fork() creates no new contracts. */
-
- if (contracts_fd == -1)
- return;
-
- if ((errno = (ct_tmpl_clear(contracts_fd))))
- debug2("Error while trying to clear active contract template"
- " (child): %s", strerror(errno));
- else
- debug3("Cleared active contract template (child)");
-
- (void) close(contracts_fd);
-
- contracts_fd = -1;
-}
-
-void
-contracts_post_fork_parent(int fork_succeeded)
-{
- char path[PATH_MAX];
- int cfd, n;
- ct_stathdl_t st;
- ctid_t latest;
-
- /* Clear active template, abandon latest contract. */
- if (contracts_fd == -1)
- return;
-
- if ((errno = ct_tmpl_clear(contracts_fd)))
- debug2("Error while clearing active contract template: %s",
- strerror(errno));
- else
- debug3("Cleared active contract template (parent)");
-
- if (!fork_succeeded)
- return;
-
- if ((cfd = open64(CTFS_ROOT "/process/latest", O_RDONLY)) == -1) {
- debug2("Error while getting latest contract: %s",
- strerror(errno));
- return;
- }
-
- if ((errno = ct_status_read(cfd, CTD_COMMON, &st)) != 0) {
- debug2("Error while getting latest contract ID: %s",
- strerror(errno));
- (void) close(cfd);
- return;
- }
-
- latest = ct_status_get_id(st);
- ct_status_free(st);
- (void) close(cfd);
-
- n = snprintf(path, PATH_MAX, CTFS_ROOT "/all/%ld/ctl", latest);
-
- if (n >= PATH_MAX) {
- debug2("Error while opening the latest contract ctl file: %s",
- strerror(ENAMETOOLONG));
- return;
- }
-
- if ((cfd = open64(path, O_WRONLY)) == -1) {
- debug2("Error while opening the latest contract ctl file: %s",
- strerror(errno));
- return;
- }
-
- if ((errno = ct_ctl_abandon(cfd)))
- debug2("Error while abandoning latest contract: %s",
- strerror(errno));
- else
- debug3("Abandoned latest contract");
-
- (void) close(cfd);
-}
-#endif /* HAVE_SOLARIS_CONTRACTS */
-
-/*
- * Signal handler for the key regeneration alarm. Note that this
- * alarm only occurs in the daemon waiting for connections, and it does not
- * do anything with the private key or random state before forking.
- * Thus there should be no concurrency control/asynchronous execution
- * problems.
- */
-static void
-generate_ephemeral_server_key(void)
-{
- u_int32_t rnd = 0;
- int i;
-
- verbose("Generating %s%d bit RSA key.",
- sensitive_data.server_key ? "new " : "", options.server_key_bits);
- if (sensitive_data.server_key != NULL)
- key_free(sensitive_data.server_key);
- sensitive_data.server_key = key_generate(KEY_RSA1,
- options.server_key_bits);
- verbose("RSA key generation complete.");
-
- for (i = 0; i < SSH_SESSION_KEY_LENGTH; i++) {
- if (i % 4 == 0)
- rnd = arc4random();
- sensitive_data.ssh1_cookie[i] = rnd & 0xff;
- rnd >>= 8;
- }
- arc4random_stir();
-}
-
-static void
-key_regeneration_alarm(int sig)
-{
- int save_errno = errno;
-
- (void) signal(SIGALRM, SIG_DFL);
- errno = save_errno;
- key_do_regen = 1;
-}
-
-static void
-sshd_exchange_identification(int sock_in, int sock_out)
-{
- int i, mismatch;
- int remote_major, remote_minor;
- int major, minor;
- char *s;
- char buf[256]; /* Must not be larger than remote_version. */
- char remote_version[256]; /* Must be at least as big as buf. */
-
- if ((options.protocol & SSH_PROTO_1) &&
- (options.protocol & SSH_PROTO_2)) {
- major = PROTOCOL_MAJOR_1;
- minor = 99;
- } else if (options.protocol & SSH_PROTO_2) {
- major = PROTOCOL_MAJOR_2;
- minor = PROTOCOL_MINOR_2;
- } else {
- major = PROTOCOL_MAJOR_1;
- minor = PROTOCOL_MINOR_1;
- }
- (void) snprintf(buf, sizeof buf, "SSH-%d.%d-%.100s\n", major, minor, SSH_VERSION);
- server_version_string = xstrdup(buf);
-
- if (client_version_string == NULL) {
- /* Send our protocol version identification. */
- if (atomicio(write, sock_out, server_version_string,
- strlen(server_version_string))
- != strlen(server_version_string)) {
- log("Could not write ident string to %s", get_remote_ipaddr());
- fatal_cleanup();
- }
-
- /* Read other sides version identification. */
- (void) memset(buf, 0, sizeof(buf));
- for (i = 0; i < sizeof(buf) - 1; i++) {
- if (atomicio(read, sock_in, &buf[i], 1) != 1) {
- log("Did not receive identification string from %s",
- get_remote_ipaddr());
- fatal_cleanup();
- }
- if (buf[i] == '\r') {
- buf[i] = 0;
- /* Kludge for F-Secure Macintosh < 1.0.2 */
- if (i == 12 &&
- strncmp(buf, "SSH-1.5-W1.0", 12) == 0)
- break;
- continue;
- }
- if (buf[i] == '\n') {
- buf[i] = 0;
- break;
- }
- }
- buf[sizeof(buf) - 1] = 0;
- client_version_string = xstrdup(buf);
- }
-
- /*
- * Check that the versions match. In future this might accept
- * several versions and set appropriate flags to handle them.
- */
- if (sscanf(client_version_string, "SSH-%d.%d-%[^\n]\n",
- &remote_major, &remote_minor, remote_version) != 3) {
- s = "Protocol mismatch.\n";
- (void) atomicio(write, sock_out, s, strlen(s));
- (void) close(sock_in);
- (void) close(sock_out);
- log("Bad protocol version identification '%.100s' from %s",
- client_version_string, get_remote_ipaddr());
- fatal_cleanup();
- }
- debug("Client protocol version %d.%d; client software version %.100s",
- remote_major, remote_minor, remote_version);
-
- compat_datafellows(remote_version);
-
- if (datafellows & SSH_BUG_PROBE) {
- log("probed from %s with %s. Don't panic.",
- get_remote_ipaddr(), client_version_string);
- fatal_cleanup();
- }
-
- if (datafellows & SSH_BUG_SCANNER) {
- log("scanned from %s with %s. Don't panic.",
- get_remote_ipaddr(), client_version_string);
- fatal_cleanup();
- }
-
- mismatch = 0;
- switch (remote_major) {
- case 1:
- if (remote_minor == 99) {
- if (options.protocol & SSH_PROTO_2)
- enable_compat20();
- else
- mismatch = 1;
- break;
- }
- if (!(options.protocol & SSH_PROTO_1)) {
- mismatch = 1;
- break;
- }
- if (remote_minor < 3) {
- packet_disconnect("Your ssh version is too old and "
- "is no longer supported. Please install a newer version.");
- } else if (remote_minor == 3) {
- /* note that this disables agent-forwarding */
- enable_compat13();
- }
- break;
- case 2:
- if (options.protocol & SSH_PROTO_2) {
- enable_compat20();
- break;
- }
- /* FALLTHROUGH */
- default:
- mismatch = 1;
- break;
- }
- chop(server_version_string);
- debug("Local version string %.200s", server_version_string);
-
- if (mismatch) {
- s = "Protocol major versions differ.\n";
- (void) atomicio(write, sock_out, s, strlen(s));
- (void) close(sock_in);
- (void) close(sock_out);
- log("Protocol major versions differ for %s: %.200s vs. %.200s",
- get_remote_ipaddr(),
- server_version_string, client_version_string);
- fatal_cleanup();
- }
-}
-
-/* Destroy the host and server keys. They will no longer be needed. */
-void
-destroy_sensitive_data(void)
-{
- int i;
-
- if (sensitive_data.server_key) {
- key_free(sensitive_data.server_key);
- sensitive_data.server_key = NULL;
- }
- for (i = 0; i < options.num_host_key_files; i++) {
- if (sensitive_data.host_keys[i]) {
- key_free(sensitive_data.host_keys[i]);
- sensitive_data.host_keys[i] = NULL;
- }
- }
- sensitive_data.ssh1_host_key = NULL;
- (void) memset(sensitive_data.ssh1_cookie, 0, SSH_SESSION_KEY_LENGTH);
-}
-
-/* Demote private to public keys for network child */
-static void
-demote_sensitive_data(void)
-{
- Key *tmp;
- int i;
-
- if (sensitive_data.server_key) {
- tmp = key_demote(sensitive_data.server_key);
- key_free(sensitive_data.server_key);
- sensitive_data.server_key = tmp;
- }
-
- for (i = 0; i < options.num_host_key_files; i++) {
- if (sensitive_data.host_keys[i]) {
- tmp = key_demote(sensitive_data.host_keys[i]);
- key_free(sensitive_data.host_keys[i]);
- sensitive_data.host_keys[i] = tmp;
- if (tmp->type == KEY_RSA1)
- sensitive_data.ssh1_host_key = tmp;
- }
- }
-
- /* We do not clear ssh1_host key and cookie. XXX - Okay Niels? */
-}
-
-static char *
-list_hostkey_types(void)
-{
- Buffer b;
- char *p;
- int i;
-
- buffer_init(&b);
- for (i = 0; i < options.num_host_key_files; i++) {
- Key *key = sensitive_data.host_keys[i];
- if (key == NULL)
- continue;
- switch (key->type) {
- case KEY_RSA:
- case KEY_DSA:
- if (buffer_len(&b) > 0)
- buffer_append(&b, ",", 1);
- p = key_ssh_name(key);
- buffer_append(&b, p, strlen(p));
- break;
- }
- }
- buffer_append(&b, "\0", 1);
- p = xstrdup(buffer_ptr(&b));
- buffer_free(&b);
- debug("list_hostkey_types: %s", p);
- return p;
-}
-
-#ifdef lint
-static
-#endif /* lint */
-Key *
-get_hostkey_by_type(int type)
-{
- int i;
-
- for (i = 0; i < options.num_host_key_files; i++) {
- Key *key = sensitive_data.host_keys[i];
- if (key != NULL && key->type == type)
- return key;
- }
- return NULL;
-}
-
-#ifdef lint
-static
-#endif /* lint */
-Key *
-get_hostkey_by_index(int ind)
-{
- if (ind < 0 || ind >= options.num_host_key_files)
- return (NULL);
- return (sensitive_data.host_keys[ind]);
-}
-
-#ifdef lint
-static
-#endif /* lint */
-int
-get_hostkey_index(Key *key)
-{
- int i;
-
- for (i = 0; i < options.num_host_key_files; i++) {
- if (key == sensitive_data.host_keys[i])
- return (i);
- }
- return (-1);
-}
-
-/*
- * returns 1 if connection should be dropped, 0 otherwise.
- * dropping starts at connection #max_startups_begin with a probability
- * of (max_startups_rate/100). the probability increases linearly until
- * all connections are dropped for startups > max_startups
- */
-static int
-drop_connection(int startups)
-{
- double p, r;
-
- if (startups < options.max_startups_begin)
- return 0;
- if (startups >= options.max_startups)
- return 1;
- if (options.max_startups_rate == 100)
- return 1;
-
- p = 100 - options.max_startups_rate;
- p *= startups - options.max_startups_begin;
- p /= (double) (options.max_startups - options.max_startups_begin);
- p += options.max_startups_rate;
- p /= 100.0;
- r = arc4random() / (double) UINT_MAX;
-
- debug("drop_connection: p %g, r %g", p, r);
- return (r < p) ? 1 : 0;
-}
-
-static void
-usage(void)
-{
- (void) fprintf(stderr, gettext("sshd version %s\n"), SSH_VERSION);
- (void) fprintf(stderr,
- gettext("Usage: %s [options]\n"
- "Options:\n"
- " -f file Configuration file (default %s)\n"
- " -d Debugging mode (multiple -d means more "
- "debugging)\n"
- " -i Started from inetd\n"
- " -D Do not fork into daemon mode\n"
- " -t Only test configuration file and keys\n"
- " -q Quiet (no logging)\n"
- " -p port Listen on the specified port (default: 22)\n"
- " -k seconds Regenerate server key every this many seconds "
- "(default: 3600)\n"
- " -g seconds Grace period for authentication (default: 600)\n"
- " -b bits Size of server RSA key (default: 768 bits)\n"
- " -h file File from which to read host key (default: %s)\n"
- " -4 Use IPv4 only\n"
- " -6 Use IPv6 only\n"
- " -o option Process the option as if it was read from "
- "a configuration file.\n"),
- __progname, _PATH_SERVER_CONFIG_FILE, _PATH_HOST_KEY_FILE);
- exit(1);
-}
-
-/*
- * Main program for the daemon.
- */
-int
-main(int ac, char **av)
-{
- extern char *optarg;
- extern int optind;
- int opt, j, i, fdsetsz, sock_in = 0, sock_out = 0, newsock = -1, on = 1;
- pid_t pid;
- socklen_t fromlen;
- fd_set *fdset;
- struct sockaddr_storage from;
- const char *remote_ip;
- int remote_port;
- FILE *f;
- struct addrinfo *ai;
- char ntop[NI_MAXHOST], strport[NI_MAXSERV];
- int listen_sock, maxfd;
- int startup_p[2];
- int startups = 0;
- Authctxt *authctxt = NULL;
- Key *key;
- int ret, key_used = 0;
-#ifdef HAVE_BSM
- au_id_t auid = AU_NOAUDITID;
-#endif /* HAVE_BSM */
- int mpipe;
-
- __progname = get_progname(av[0]);
-
- (void) g11n_setlocale(LC_ALL, "");
-
- init_rng();
-
- /* Save argv. */
- saved_argc = ac;
- saved_argv = av;
-
- /* Initialize configuration options to their default values. */
- initialize_server_options(&options);
-
- /* Parse command-line arguments. */
- while ((opt = getopt(ac, av, "f:p:b:k:h:g:V:u:o:dDeiqtQ46")) != -1) {
- switch (opt) {
- case '4':
- IPv4or6 = AF_INET;
- break;
- case '6':
- IPv4or6 = AF_INET6;
- break;
- case 'f':
- config_file_name = optarg;
- break;
- case 'd':
- if (0 == debug_flag) {
- debug_flag = 1;
- options.log_level = SYSLOG_LEVEL_DEBUG1;
- } else if (options.log_level < SYSLOG_LEVEL_DEBUG3) {
- options.log_level++;
- } else {
- (void) fprintf(stderr,
- gettext("Debug level too high.\n"));
- exit(1);
- }
- break;
- case 'D':
- no_daemon_flag = 1;
- break;
- case 'e':
- log_stderr = 1;
- break;
- case 'i':
- inetd_flag = 1;
- break;
- case 'Q':
- /* ignored */
- break;
- case 'q':
- options.log_level = SYSLOG_LEVEL_QUIET;
- break;
- case 'b':
- options.server_key_bits = atoi(optarg);
- break;
- case 'p':
- options.ports_from_cmdline = 1;
- if (options.num_ports >= MAX_PORTS) {
- (void) fprintf(stderr, gettext("too many ports.\n"));
- exit(1);
- }
- options.ports[options.num_ports++] = a2port(optarg);
- if (options.ports[options.num_ports-1] == 0) {
- (void) fprintf(stderr, gettext("Bad port number.\n"));
- exit(1);
- }
- break;
- case 'g':
- if ((options.login_grace_time = convtime(optarg)) == -1) {
- (void) fprintf(stderr,
- gettext("Invalid login grace time.\n"));
- exit(1);
- }
- break;
- case 'k':
- if ((options.key_regeneration_time = convtime(optarg)) == -1) {
- (void) fprintf(stderr,
- gettext("Invalid key regeneration "
- "interval.\n"));
- exit(1);
- }
- break;
- case 'h':
- if (options.num_host_key_files >= MAX_HOSTKEYS) {
- (void) fprintf(stderr,
- gettext("too many host keys.\n"));
- exit(1);
- }
- options.host_key_files[options.num_host_key_files++] = optarg;
- break;
- case 'V':
- client_version_string = optarg;
- /* only makes sense with inetd_flag, i.e. no listen() */
- inetd_flag = 1;
- break;
- case 't':
- test_flag = 1;
- break;
- case 'o':
- if (process_server_config_line(&options, optarg,
- "command-line", 0, NULL, NULL, NULL, NULL) != 0)
- exit(1);
- break;
- case '?':
- default:
- usage();
- break;
- }
- }
-
- /*
- * There is no need to use the PKCS#11 engine in the master SSH process.
- */
- SSLeay_add_all_algorithms();
- seed_rng();
- channel_set_af(IPv4or6);
-
- /*
- * Force logging to stderr until we have loaded the private host
- * key (unless started from inetd)
- */
- log_init(__progname,
- options.log_level == SYSLOG_LEVEL_NOT_SET ?
- SYSLOG_LEVEL_INFO : options.log_level,
- options.log_facility == SYSLOG_FACILITY_NOT_SET ?
- SYSLOG_FACILITY_AUTH : options.log_facility,
- !inetd_flag);
-
-#ifdef _UNICOS
- /* Cray can define user privs drop all prives now!
- * Not needed on PRIV_SU systems!
- */
- drop_cray_privs();
-#endif
-
- /* Fetch our configuration */
- buffer_init(&cfg);
- load_server_config(config_file_name, &cfg);
- parse_server_config(&options, config_file_name, &cfg, NULL, NULL, NULL);
-
- /*
- * ChallengeResponseAuthentication is deprecated for protocol 2 which is
- * the default setting on Solaris. Warn the user about it. Note that
- * ChallengeResponseAuthentication is on by default but the option is
- * not set until fill_default_server_options() is called. If the option
- * is already set now, the user must have set it manually.
- */
- if ((options.protocol & SSH_PROTO_2) &&
- !(options.protocol & SSH_PROTO_1) &&
- options.challenge_response_authentication != -1) {
- log("ChallengeResponseAuthentication has been "
- "deprecated for the SSH Protocol 2. You should use "
- "KbdInteractiveAuthentication instead (which defaults to "
- "\"yes\").");
- }
-
- /*
- * While PAMAuthenticationViaKbdInt was not documented, it was
- * previously set in our default sshd_config and also the only way to
- * switch off the keyboard-interactive authentication. To maintain
- * backward compatibility, if PAMAuthenticationViaKbdInt is manually set
- * to "no" and KbdInteractiveAuthentication is not set, switch off the
- * keyboard-interactive authentication method as before. As with the
- * challenge response auth situation dealt above, we have not called
- * fill_default_server_options() yet so if KbdInteractiveAuthentication
- * is already set to 1 here the admin must have set it manually and we
- * will honour it.
- */
- if (options.kbd_interactive_authentication != 1 &&
- options.pam_authentication_via_kbd_int == 0) {
- options.kbd_interactive_authentication = 0;
- }
-
- /* Fill in default values for those options not explicitly set. */
- fill_default_server_options(&options);
-
- utmp_len = options.lookup_client_hostnames ? utmp_len : 0;
-
- /* Check that there are no remaining arguments. */
- if (optind < ac) {
- (void) fprintf(stderr, gettext("Extra argument %s.\n"), av[optind]);
- exit(1);
- }
-
- debug("sshd version %.100s", SSH_VERSION);
-
- /* load private host keys */
- if (options.num_host_key_files > 0)
- sensitive_data.host_keys =
- xmalloc(options.num_host_key_files * sizeof(Key *));
- for (i = 0; i < options.num_host_key_files; i++)
- sensitive_data.host_keys[i] = NULL;
- sensitive_data.server_key = NULL;
- sensitive_data.ssh1_host_key = NULL;
- sensitive_data.have_ssh1_key = 0;
- sensitive_data.have_ssh2_key = 0;
-
- for (i = 0; i < options.num_host_key_files; i++) {
- key = key_load_private(options.host_key_files[i], "", NULL);
- sensitive_data.host_keys[i] = key;
- if (key == NULL) {
- error("Could not load host key: %s",
- options.host_key_files[i]);
- sensitive_data.host_keys[i] = NULL;
- continue;
- }
- switch (key->type) {
- case KEY_RSA1:
- sensitive_data.ssh1_host_key = key;
- sensitive_data.have_ssh1_key = 1;
- break;
- case KEY_RSA:
- case KEY_DSA:
- sensitive_data.have_ssh2_key = 1;
- break;
- }
- debug("private host key: #%d type %d %s", i, key->type,
- key_type(key));
- }
- if ((options.protocol & SSH_PROTO_1) && !sensitive_data.have_ssh1_key) {
- log("Disabling protocol version 1. Could not load host key");
- options.protocol &= ~SSH_PROTO_1;
- }
- if ((options.protocol & SSH_PROTO_2) &&
- !sensitive_data.have_ssh2_key) {
-#ifdef GSSAPI
- if (options.gss_keyex)
- ssh_gssapi_server_mechs(&mechs);
-
- if (mechs == GSS_C_NULL_OID_SET) {
- log("Disabling protocol version 2. Could not load host"
- "key or GSS-API mechanisms");
- options.protocol &= ~SSH_PROTO_2;
- }
-#else
- log("Disabling protocol version 2. Could not load host key");
- options.protocol &= ~SSH_PROTO_2;
-#endif /* GSSAPI */
- }
- if (!(options.protocol & (SSH_PROTO_1|SSH_PROTO_2))) {
- log("sshd: no hostkeys available -- exiting.");
- exit(1);
- }
-
- /* Check certain values for sanity. */
- if (options.protocol & SSH_PROTO_1) {
- if (options.server_key_bits < 512 ||
- options.server_key_bits > 32768) {
- (void) fprintf(stderr, gettext("Bad server key size.\n"));
- exit(1);
- }
- /*
- * Check that server and host key lengths differ sufficiently. This
- * is necessary to make double encryption work with rsaref. Oh, I
- * hate software patents. I dont know if this can go? Niels
- */
- if (options.server_key_bits >
- BN_num_bits(sensitive_data.ssh1_host_key->rsa->n) -
- SSH_KEY_BITS_RESERVED && options.server_key_bits <
- BN_num_bits(sensitive_data.ssh1_host_key->rsa->n) +
- SSH_KEY_BITS_RESERVED) {
- options.server_key_bits =
- BN_num_bits(sensitive_data.ssh1_host_key->rsa->n) +
- SSH_KEY_BITS_RESERVED;
- debug("Forcing server key to %d bits to make it differ from host key.",
- options.server_key_bits);
- }
- }
-
- /* Configuration looks good, so exit if in test mode. */
- if (test_flag)
- exit(0);
-
- /*
- * Clear out any supplemental groups we may have inherited. This
- * prevents inadvertent creation of files with bad modes (in the
- * portable version at least, it's certainly possible for PAM
- * to create a file, and we can't control the code in every
- * module which might be used).
- */
- if (setgroups(0, NULL) < 0)
- debug("setgroups() failed: %.200s", strerror(errno));
-
- /* Initialize the log (it is reinitialized below in case we forked). */
- if (debug_flag && !inetd_flag)
- log_stderr = 1;
- log_init(__progname, options.log_level, options.log_facility, log_stderr);
-
- /*
- * Solaris 9 and systems upgraded from it may have the Ciphers option
- * explicitly set to "aes128-cbc,blowfish-cbc,3des-cbc" in the
- * sshd_config. Since the default server cipher list completely changed
- * since then we rather notify the administator on startup. We do this
- * check after log_init() so that the message goes to syslogd and not to
- * stderr (unless the server is in the debug mode). Note that since
- * Solaris 10 we no longer ship sshd_config with explicit settings for
- * Ciphers or MACs. Do not try to augment the cipher list here since
- * that might end up in a very confusing situation.
- */
-#define OLD_DEFAULT_CIPHERS_LIST "aes128-cbc,blowfish-cbc,3des-cbc"
- if (options.ciphers != NULL &&
- strcmp(options.ciphers, OLD_DEFAULT_CIPHERS_LIST) == 0) {
- notice("Old default value \"%s\" for the \"Ciphers\" "
- "option found in use. In general it is prudent to let "
- "the server choose the defaults unless your environment "
- "specifically needs an explicit setting. See "
- "sshd_config(4) for more information.",
- OLD_DEFAULT_CIPHERS_LIST);
- }
-
-#ifdef HAVE_BSM
- (void) setauid(&auid);
-#endif /* HAVE_BSM */
-
- /*
- * If not in debugging mode, and not started from inetd, disconnect
- * from the controlling terminal, and fork. The original process
- * exits.
- */
- if (!(debug_flag || inetd_flag || no_daemon_flag)) {
-#ifdef TIOCNOTTY
- int fd;
-#endif /* TIOCNOTTY */
- if (daemon(0, 0) < 0)
- fatal("daemon() failed: %.200s", strerror(errno));
-
- /* Disconnect from the controlling tty. */
-#ifdef TIOCNOTTY
- fd = open(_PATH_TTY, O_RDWR | O_NOCTTY);
- if (fd >= 0) {
- (void) ioctl(fd, TIOCNOTTY, NULL);
- (void) close(fd);
- }
-#endif /* TIOCNOTTY */
- }
- /* Reinitialize the log (because of the fork above). */
- log_init(__progname, options.log_level, options.log_facility, log_stderr);
-
- /* Initialize the random number generator. */
- arc4random_stir();
-
- /* Chdir to the root directory so that the current disk can be
- unmounted if desired. */
- (void) chdir("/");
-
- /* ignore SIGPIPE */
- (void) signal(SIGPIPE, SIG_IGN);
-
- /* Start listening for a socket, unless started from inetd. */
- if (inetd_flag) {
- int s1;
- s1 = dup(0); /* Make sure descriptors 0, 1, and 2 are in use. */
- (void) dup(s1);
- sock_in = dup(0);
- sock_out = dup(1);
- startup_pipe = -1;
- /* we need this later for setting audit context */
- newsock = sock_in;
- /*
- * We intentionally do not close the descriptors 0, 1, and 2
- * as our code for setting the descriptors won\'t work if
- * ttyfd happens to be one of those.
- */
- debug("inetd sockets after dupping: %d, %d", sock_in, sock_out);
- if (options.protocol & SSH_PROTO_1)
- generate_ephemeral_server_key();
- } else {
- for (ai = options.listen_addrs; ai; ai = ai->ai_next) {
- if (ai->ai_family != AF_INET && ai->ai_family != AF_INET6)
- continue;
- if (num_listen_socks >= MAX_LISTEN_SOCKS)
- fatal("Too many listen sockets. "
- "Enlarge MAX_LISTEN_SOCKS");
- if (getnameinfo(ai->ai_addr, ai->ai_addrlen,
- ntop, sizeof(ntop), strport, sizeof(strport),
- NI_NUMERICHOST|NI_NUMERICSERV) != 0) {
- error("getnameinfo failed");
- continue;
- }
- /* Create socket for listening. */
- listen_sock = socket(ai->ai_family, SOCK_STREAM, 0);
- if (listen_sock < 0) {
- /* kernel may not support ipv6 */
- verbose("socket: %.100s", strerror(errno));
- continue;
- }
- if (fcntl(listen_sock, F_SETFL, O_NONBLOCK) < 0) {
- error("listen_sock O_NONBLOCK: %s", strerror(errno));
- (void) close(listen_sock);
- continue;
- }
- /*
- * Set socket options.
- * Allow local port reuse in TIME_WAIT.
- */
- if (setsockopt(listen_sock, SOL_SOCKET, SO_REUSEADDR,
- &on, sizeof(on)) == -1)
- error("setsockopt SO_REUSEADDR: %s", strerror(errno));
-
- debug("Bind to port %s on %s.", strport, ntop);
-
- /* Bind the socket to the desired port. */
- if (bind(listen_sock, ai->ai_addr, ai->ai_addrlen) < 0) {
- if (!ai->ai_next)
- error("Bind to port %s on %s failed: %.200s.",
- strport, ntop, strerror(errno));
- (void) close(listen_sock);
- continue;
- }
- listen_socks[num_listen_socks] = listen_sock;
- num_listen_socks++;
-
- /* Start listening on the port. */
- log("Server listening on %s port %s.", ntop, strport);
- if (listen(listen_sock, 5) < 0)
- fatal("listen: %.100s", strerror(errno));
-
- }
- freeaddrinfo(options.listen_addrs);
-
- if (!num_listen_socks)
- fatal("Cannot bind any address.");
-
- if (options.protocol & SSH_PROTO_1)
- generate_ephemeral_server_key();
-
- /*
- * Arrange to restart on SIGHUP. The handler needs
- * listen_sock.
- */
- (void) signal(SIGHUP, sighup_handler);
-
- (void) signal(SIGTERM, sigterm_handler);
- (void) signal(SIGQUIT, sigterm_handler);
-
- /* Arrange SIGCHLD to be caught. */
- (void) signal(SIGCHLD, main_sigchld_handler);
-
- /* Write out the pid file after the sigterm handler is setup */
- if (!debug_flag) {
- /*
- * Record our pid in /var/run/sshd.pid to make it
- * easier to kill the correct sshd. We don't want to
- * do this before the bind above because the bind will
- * fail if there already is a daemon, and this will
- * overwrite any old pid in the file.
- */
- f = fopen(options.pid_file, "wb");
- if (f) {
- (void) fprintf(f, "%ld\n", (long) getpid());
- (void) fclose(f);
- }
- }
-
- /* setup fd set for listen */
- fdset = NULL;
- maxfd = 0;
- for (i = 0; i < num_listen_socks; i++)
- if (listen_socks[i] > maxfd)
- maxfd = listen_socks[i];
- /* pipes connected to unauthenticated childs */
- startup_pipes = xmalloc(options.max_startups * sizeof(int));
- for (i = 0; i < options.max_startups; i++)
- startup_pipes[i] = -1;
-
- /*
- * Stay listening for connections until the system crashes or
- * the daemon is killed with a signal.
- */
- for (;;) {
- if (received_sighup)
- sighup_restart();
- if (fdset != NULL)
- xfree(fdset);
- fdsetsz = howmany(maxfd+1, NFDBITS) * sizeof(fd_mask);
- fdset = (fd_set *)xmalloc(fdsetsz);
- (void) memset(fdset, 0, fdsetsz);
-
- for (i = 0; i < num_listen_socks; i++)
- FD_SET(listen_socks[i], fdset);
- for (i = 0; i < options.max_startups; i++)
- if (startup_pipes[i] != -1)
- FD_SET(startup_pipes[i], fdset);
-
- /* Wait in select until there is a connection. */
- ret = select(maxfd+1, fdset, NULL, NULL, NULL);
- if (ret < 0 && errno != EINTR)
- error("select: %.100s", strerror(errno));
- if (received_sigterm) {
- log("Received signal %d; terminating.",
- (int) received_sigterm);
- close_listen_socks();
- (void) unlink(options.pid_file);
- exit(255);
- }
- if (key_used && key_do_regen) {
- generate_ephemeral_server_key();
- key_used = 0;
- key_do_regen = 0;
- }
- if (ret < 0)
- continue;
-
- for (i = 0; i < options.max_startups; i++)
- if (startup_pipes[i] != -1 &&
- FD_ISSET(startup_pipes[i], fdset)) {
- /*
- * the read end of the pipe is ready
- * if the child has closed the pipe
- * after successful authentication
- * or if the child has died
- */
- (void) close(startup_pipes[i]);
- startup_pipes[i] = -1;
- startups--;
- }
- for (i = 0; i < num_listen_socks; i++) {
- if (!FD_ISSET(listen_socks[i], fdset))
- continue;
- fromlen = sizeof(from);
- newsock = accept(listen_socks[i], (struct sockaddr *)&from,
- &fromlen);
- if (newsock < 0) {
- if (errno != EINTR && errno != EWOULDBLOCK)
- error("accept: %.100s", strerror(errno));
- continue;
- }
- if (fcntl(newsock, F_SETFL, 0) < 0) {
- error("newsock del O_NONBLOCK: %s", strerror(errno));
- (void) close(newsock);
- continue;
- }
- if (drop_connection(startups) == 1) {
- debug("drop connection #%d", startups);
- (void) close(newsock);
- continue;
- }
- if (pipe(startup_p) == -1) {
- (void) close(newsock);
- continue;
- }
-
- for (j = 0; j < options.max_startups; j++)
- if (startup_pipes[j] == -1) {
- startup_pipes[j] = startup_p[0];
- if (maxfd < startup_p[0])
- maxfd = startup_p[0];
- startups++;
- break;
- }
-
- /*
- * Got connection. Fork a child to handle it, unless
- * we are in debugging mode.
- */
- if (debug_flag) {
- /*
- * In debugging mode. Close the listening
- * socket, and start processing the
- * connection without forking.
- */
- debug("Server will not fork when running in debugging mode.");
- close_listen_socks();
- sock_in = newsock;
- sock_out = newsock;
- startup_pipe = -1;
- pid = getpid();
- break;
- } else {
- /*
- * Normal production daemon. Fork, and have
- * the child process the connection. The
- * parent continues listening.
- */
-#ifdef HAVE_SOLARIS_CONTRACTS
- /*
- * Setup Solaris contract template so
- * the child process is in a different
- * process contract than the parent;
- * prevents established connections from
- * being killed when the sshd master
- * listener service is stopped.
- */
- contracts_pre_fork();
-#endif /* HAVE_SOLARIS_CONTRACTS */
- if ((pid = fork()) == 0) {
- /*
- * Child. Close the listening and max_startup
- * sockets. Start using the accepted socket.
- * Reinitialize logging (since our pid has
- * changed). We break out of the loop to handle
- * the connection.
- */
-#ifdef HAVE_SOLARIS_CONTRACTS
- contracts_post_fork_child();
-#endif /* HAVE_SOLARIS_CONTRACTS */
- xfree(fdset);
- startup_pipe = startup_p[1];
- close_startup_pipes();
- close_listen_socks();
- sock_in = newsock;
- sock_out = newsock;
- log_init(__progname, options.log_level, options.log_facility, log_stderr);
- break;
- }
-
- /* Parent. Stay in the loop. */
- if (pid < 0)
- error("fork: %.100s", strerror(errno));
- else
- debug("Forked child %ld.", (long)pid);
-
-#ifdef HAVE_SOLARIS_CONTRACTS
- contracts_post_fork_parent((pid > 0));
-#endif /* HAVE_SOLARIS_CONTRACTS */
- }
-
- (void) close(startup_p[1]);
-
- /* Mark that the key has been used (it was "given" to the child). */
- if ((options.protocol & SSH_PROTO_1) &&
- key_used == 0) {
- /* Schedule server key regeneration alarm. */
- (void) signal(SIGALRM, key_regeneration_alarm);
- (void) alarm(options.key_regeneration_time);
- key_used = 1;
- }
-
- arc4random_stir();
-
- /*
- * Close the accepted socket since the child
- * will now take care of the new connection.
- */
- (void) close(newsock);
- }
- /* child process check (or debug mode) */
- if (num_listen_socks < 0)
- break;
- }
- }
-
- /*
- * This is the child processing a new connection, the SSH master process
- * stays in the ( ; ; ) loop above.
- */
-#ifdef HAVE_BSM
- audit_sshd_settid(newsock);
-#endif
- /*
- * Create a new session and process group since the 4.4BSD
- * setlogin() affects the entire process group. We don't
- * want the child to be able to affect the parent.
- */
-#if 0
- /* XXX: this breaks Solaris */
- if (!debug_flag && !inetd_flag && setsid() < 0)
- error("setsid: %.100s", strerror(errno));
-#endif
-
- /*
- * Disable the key regeneration alarm. We will not regenerate the
- * key since we are no longer in a position to give it to anyone. We
- * will not restart on SIGHUP since it no longer makes sense.
- */
- (void) alarm(0);
- (void) signal(SIGALRM, SIG_DFL);
- (void) signal(SIGHUP, SIG_DFL);
- (void) signal(SIGTERM, SIG_DFL);
- (void) signal(SIGQUIT, SIG_DFL);
- (void) signal(SIGCHLD, SIG_DFL);
- (void) signal(SIGINT, SIG_DFL);
-
- /* Set keepalives if requested. */
- if (options.keepalives &&
- setsockopt(sock_in, SOL_SOCKET, SO_KEEPALIVE, &on,
- sizeof(on)) < 0)
- debug2("setsockopt SO_KEEPALIVE: %.100s", strerror(errno));
-
- /*
- * Register our connection. This turns encryption off because we do
- * not have a key.
- */
- packet_set_connection(sock_in, sock_out);
-
- remote_port = get_remote_port();
- remote_ip = get_remote_ipaddr();
-
-#ifdef LIBWRAP
- /* Check whether logins are denied from this host. */
- {
- struct request_info req;
-
- (void) request_init(&req, RQ_DAEMON, __progname, RQ_FILE, sock_in, 0);
- fromhost(&req);
-
- if (!hosts_access(&req)) {
- debug("Connection refused by tcp wrapper");
- refuse(&req);
- /* NOTREACHED */
- fatal("libwrap refuse returns");
- }
- }
-#endif /* LIBWRAP */
-
- /* Log the connection. */
- verbose("Connection from %.500s port %d", remote_ip, remote_port);
-
- sshd_exchange_identification(sock_in, sock_out);
- /*
- * Check that the connection comes from a privileged port.
- * Rhosts-Authentication only makes sense from privileged
- * programs. Of course, if the intruder has root access on his local
- * machine, he can connect from any port. So do not use these
- * authentication methods from machines that you do not trust.
- */
- if (options.rhosts_authentication &&
- (remote_port >= IPPORT_RESERVED ||
- remote_port < IPPORT_RESERVED / 2)) {
- debug("Rhosts Authentication disabled, "
- "originating port %d not trusted.", remote_port);
- options.rhosts_authentication = 0;
- }
-#if defined(KRB4) && !defined(KRB5)
- if (!packet_connection_is_ipv4() &&
- options.kerberos_authentication) {
- debug("Kerberos Authentication disabled, only available for IPv4.");
- options.kerberos_authentication = 0;
- }
-#endif /* KRB4 && !KRB5 */
-#ifdef AFS
- /* If machine has AFS, set process authentication group. */
- if (k_hasafs()) {
- k_setpag();
- k_unlog();
- }
-#endif /* AFS */
-
- packet_set_nonblocking();
-
- /*
- * Start the monitor. That way both processes will have their own
- * PKCS#11 sessions. See the PKCS#11 standard for more information on
- * fork safety and packet.c for information about forking with the
- * engine.
- *
- * Note that the monitor stays in the function while the child is the
- * only one that returns.
- */
- altprivsep_start_and_do_monitor(options.use_openssl_engine,
- inetd_flag, newsock, startup_pipe);
-
- /*
- * We don't want to listen forever unless the other side successfully
- * authenticates itself. So we set up an alarm which is cleared after
- * successful authentication. A limit of zero indicates no limit. Note
- * that we don't set the alarm in debugging mode; it is just annoying to
- * have the server exit just when you are about to discover the bug.
- */
- (void) signal(SIGALRM, grace_alarm_handler);
- if (!debug_flag)
- (void) alarm(options.login_grace_time);
-
- /*
- * The child is about to start the first key exchange while the monitor
- * stays in altprivsep_start_and_do_monitor() function.
- */
- (void) pkcs11_engine_load(options.use_openssl_engine);
-
- /* perform the key exchange */
- /* authenticate user and start session */
- if (compat20) {
- do_ssh2_kex();
- authctxt = do_authentication2();
- } else {
- do_ssh1_kex();
- authctxt = do_authentication();
- }
-
- /* Authentication complete */
- (void) alarm(0);
- /* we no longer need an alarm handler */
- (void) signal(SIGALRM, SIG_DFL);
-
- if (startup_pipe != -1) {
- (void) close(startup_pipe);
- startup_pipe = -1;
- }
-
- /* ALTPRIVSEP Child */
-
- /*
- * Drop privileges, access to privileged resources.
- *
- * Destroy private host keys, if any.
- *
- * No need to release any GSS credentials -- sshd only acquires
- * creds to determine what mechs it can negotiate then releases
- * them right away and uses GSS_C_NO_CREDENTIAL to accept
- * contexts.
- */
- debug2("Unprivileged server process dropping privileges");
- permanently_set_uid(authctxt->pw, options.chroot_directory);
- destroy_sensitive_data();
-
- /* Just another safety check. */
- if (getuid() != authctxt->pw->pw_uid ||
- geteuid() != authctxt->pw->pw_uid) {
- fatal("Failed to set uids to %u.", (u_int)authctxt->pw->pw_uid);
- }
-
- ssh_gssapi_server_mechs(NULL); /* release cached mechs list */
- packet_set_server();
-
- /* now send the authentication context to the monitor */
- altprivsep_send_auth_context(authctxt);
-
- mpipe = altprivsep_get_pipe_fd();
- if (fcntl(mpipe, F_SETFL, O_NONBLOCK) < 0)
- error("fcntl O_NONBLOCK: %.100s", strerror(errno));
-
-#ifdef HAVE_BSM
- fatal_remove_cleanup(
- (void (*)(void *))audit_failed_login_cleanup,
- (void *)authctxt);
-#endif /* HAVE_BSM */
-
- if (compat20) {
- debug3("setting handler to forward re-key packets to the monitor");
- dispatch_range(SSH2_MSG_KEXINIT, SSH2_MSG_TRANSPORT_MAX,
- &altprivsep_rekey);
- }
-
- /* Logged-in session. */
- do_authenticated(authctxt);
-
- /* The connection has been terminated. */
- verbose("Closing connection to %.100s", remote_ip);
-
- packet_close();
-
-#ifdef USE_PAM
- finish_pam(authctxt);
-#endif /* USE_PAM */
-
- return (0);
-}
-
-/*
- * Decrypt session_key_int using our private server key and private host key
- * (key with larger modulus first).
- */
-int
-ssh1_session_key(BIGNUM *session_key_int)
-{
- int rsafail = 0;
-
- if (BN_cmp(sensitive_data.server_key->rsa->n, sensitive_data.ssh1_host_key->rsa->n) > 0) {
- /* Server key has bigger modulus. */
- if (BN_num_bits(sensitive_data.server_key->rsa->n) <
- BN_num_bits(sensitive_data.ssh1_host_key->rsa->n) + SSH_KEY_BITS_RESERVED) {
- fatal("do_connection: %s: server_key %d < host_key %d + SSH_KEY_BITS_RESERVED %d",
- get_remote_ipaddr(),
- BN_num_bits(sensitive_data.server_key->rsa->n),
- BN_num_bits(sensitive_data.ssh1_host_key->rsa->n),
- SSH_KEY_BITS_RESERVED);
- }
- if (rsa_private_decrypt(session_key_int, session_key_int,
- sensitive_data.server_key->rsa) <= 0)
- rsafail++;
- if (rsa_private_decrypt(session_key_int, session_key_int,
- sensitive_data.ssh1_host_key->rsa) <= 0)
- rsafail++;
- } else {
- /* Host key has bigger modulus (or they are equal). */
- if (BN_num_bits(sensitive_data.ssh1_host_key->rsa->n) <
- BN_num_bits(sensitive_data.server_key->rsa->n) + SSH_KEY_BITS_RESERVED) {
- fatal("do_connection: %s: host_key %d < server_key %d + SSH_KEY_BITS_RESERVED %d",
- get_remote_ipaddr(),
- BN_num_bits(sensitive_data.ssh1_host_key->rsa->n),
- BN_num_bits(sensitive_data.server_key->rsa->n),
- SSH_KEY_BITS_RESERVED);
- }
- if (rsa_private_decrypt(session_key_int, session_key_int,
- sensitive_data.ssh1_host_key->rsa) < 0)
- rsafail++;
- if (rsa_private_decrypt(session_key_int, session_key_int,
- sensitive_data.server_key->rsa) < 0)
- rsafail++;
- }
- return (rsafail);
-}
-/*
- * SSH1 key exchange
- */
-static void
-do_ssh1_kex(void)
-{
- int i, len;
- int rsafail = 0;
- BIGNUM *session_key_int;
- u_char session_key[SSH_SESSION_KEY_LENGTH];
- u_char cookie[8];
- u_int cipher_type, auth_mask, protocol_flags;
- u_int32_t rnd = 0;
-
- /*
- * Generate check bytes that the client must send back in the user
- * packet in order for it to be accepted; this is used to defy ip
- * spoofing attacks. Note that this only works against somebody
- * doing IP spoofing from a remote machine; any machine on the local
- * network can still see outgoing packets and catch the random
- * cookie. This only affects rhosts authentication, and this is one
- * of the reasons why it is inherently insecure.
- */
- for (i = 0; i < 8; i++) {
- if (i % 4 == 0)
- rnd = arc4random();
- cookie[i] = rnd & 0xff;
- rnd >>= 8;
- }
-
- /*
- * Send our public key. We include in the packet 64 bits of random
- * data that must be matched in the reply in order to prevent IP
- * spoofing.
- */
- packet_start(SSH_SMSG_PUBLIC_KEY);
- for (i = 0; i < 8; i++)
- packet_put_char(cookie[i]);
-
- /* Store our public server RSA key. */
- packet_put_int(BN_num_bits(sensitive_data.server_key->rsa->n));
- packet_put_bignum(sensitive_data.server_key->rsa->e);
- packet_put_bignum(sensitive_data.server_key->rsa->n);
-
- /* Store our public host RSA key. */
- packet_put_int(BN_num_bits(sensitive_data.ssh1_host_key->rsa->n));
- packet_put_bignum(sensitive_data.ssh1_host_key->rsa->e);
- packet_put_bignum(sensitive_data.ssh1_host_key->rsa->n);
-
- /* Put protocol flags. */
- packet_put_int(SSH_PROTOFLAG_HOST_IN_FWD_OPEN);
-
- /* Declare which ciphers we support. */
- packet_put_int(cipher_mask_ssh1(0));
-
- /* Declare supported authentication types. */
- auth_mask = 0;
- if (options.rhosts_authentication)
- auth_mask |= 1 << SSH_AUTH_RHOSTS;
- if (options.rhosts_rsa_authentication)
- auth_mask |= 1 << SSH_AUTH_RHOSTS_RSA;
- if (options.rsa_authentication)
- auth_mask |= 1 << SSH_AUTH_RSA;
-#if defined(KRB4) || defined(KRB5)
- if (options.kerberos_authentication)
- auth_mask |= 1 << SSH_AUTH_KERBEROS;
-#endif
-#if defined(AFS) || defined(KRB5)
- if (options.kerberos_tgt_passing)
- auth_mask |= 1 << SSH_PASS_KERBEROS_TGT;
-#endif
-#ifdef AFS
- if (options.afs_token_passing)
- auth_mask |= 1 << SSH_PASS_AFS_TOKEN;
-#endif
- if (options.challenge_response_authentication == 1)
- auth_mask |= 1 << SSH_AUTH_TIS;
- if (options.password_authentication)
- auth_mask |= 1 << SSH_AUTH_PASSWORD;
- packet_put_int(auth_mask);
-
- /* Send the packet and wait for it to be sent. */
- packet_send();
- packet_write_wait();
-
- debug("Sent %d bit server key and %d bit host key.",
- BN_num_bits(sensitive_data.server_key->rsa->n),
- BN_num_bits(sensitive_data.ssh1_host_key->rsa->n));
-
- /* Read clients reply (cipher type and session key). */
- packet_read_expect(SSH_CMSG_SESSION_KEY);
-
- /* Get cipher type and check whether we accept this. */
- cipher_type = packet_get_char();
-
- if (!(cipher_mask_ssh1(0) & (1 << cipher_type))) {
- packet_disconnect("Warning: client selects unsupported cipher.");
- }
-
- /* Get check bytes from the packet. These must match those we
- sent earlier with the public key packet. */
- for (i = 0; i < 8; i++) {
- if (cookie[i] != packet_get_char()) {
- packet_disconnect("IP Spoofing check bytes do not match.");
- }
- }
-
- debug("Encryption type: %.200s", cipher_name(cipher_type));
-
- /* Get the encrypted integer. */
- if ((session_key_int = BN_new()) == NULL)
- fatal("do_ssh1_kex: BN_new failed");
- packet_get_bignum(session_key_int);
-
- protocol_flags = packet_get_int();
- packet_set_protocol_flags(protocol_flags);
- packet_check_eom();
-
- /* Decrypt session_key_int using host/server keys */
- rsafail = ssh1_session_key(session_key_int);
-
- /*
- * Extract session key from the decrypted integer. The key is in the
- * least significant 256 bits of the integer; the first byte of the
- * key is in the highest bits.
- */
- if (!rsafail) {
- (void) BN_mask_bits(session_key_int, sizeof(session_key) * 8);
- len = BN_num_bytes(session_key_int);
- if (len < 0 || len > sizeof(session_key)) {
- error("do_connection: bad session key len from %s: "
- "session_key_int %d > sizeof(session_key) %lu",
- get_remote_ipaddr(), len, (u_long)sizeof(session_key));
- rsafail++;
- } else {
- (void) memset(session_key, 0, sizeof(session_key));
- (void) BN_bn2bin(session_key_int,
- session_key + sizeof(session_key) - len);
-
- compute_session_id(session_id, cookie,
- sensitive_data.ssh1_host_key->rsa->n,
- sensitive_data.server_key->rsa->n);
- /*
- * Xor the first 16 bytes of the session key with the
- * session id.
- */
- for (i = 0; i < 16; i++)
- session_key[i] ^= session_id[i];
- }
- }
- if (rsafail) {
- int bytes = BN_num_bytes(session_key_int);
- u_char *buf = xmalloc(bytes);
- MD5_CTX md;
-
- log("do_connection: generating a fake encryption key");
- (void) BN_bn2bin(session_key_int, buf);
- MD5_Init(&md);
- MD5_Update(&md, buf, bytes);
- MD5_Update(&md, sensitive_data.ssh1_cookie, SSH_SESSION_KEY_LENGTH);
- MD5_Final(session_key, &md);
- MD5_Init(&md);
- MD5_Update(&md, session_key, 16);
- MD5_Update(&md, buf, bytes);
- MD5_Update(&md, sensitive_data.ssh1_cookie, SSH_SESSION_KEY_LENGTH);
- MD5_Final(session_key + 16, &md);
- (void) memset(buf, 0, bytes);
- xfree(buf);
- for (i = 0; i < 16; i++)
- session_id[i] = session_key[i] ^ session_key[i + 16];
- }
- /* Destroy the private and public keys. No longer. */
- destroy_sensitive_data();
-
- /* Destroy the decrypted integer. It is no longer needed. */
- BN_clear_free(session_key_int);
-
- /* Set the session key. From this on all communications will be encrypted. */
- packet_set_encryption_key(session_key, SSH_SESSION_KEY_LENGTH, cipher_type);
-
- /* Destroy our copy of the session key. It is no longer needed. */
- (void) memset(session_key, 0, sizeof(session_key));
-
- debug("Received session key; encryption turned on.");
-
- /* Send an acknowledgment packet. Note that this packet is sent encrypted. */
- packet_start(SSH_SMSG_SUCCESS);
- packet_send();
- packet_write_wait();
-}
-
-/*
- * Prepare for SSH2 key exchange.
- */
-Kex *
-prepare_for_ssh2_kex(void)
-{
- Kex *kex;
- Kex_hook_func kex_hook = NULL;
- char **locales;
- static char **myproposal;
-
- myproposal = my_srv_proposal;
-
- if (options.ciphers != NULL) {
- myproposal[PROPOSAL_ENC_ALGS_CTOS] =
- myproposal[PROPOSAL_ENC_ALGS_STOC] = options.ciphers;
- }
- myproposal[PROPOSAL_ENC_ALGS_CTOS] =
- compat_cipher_proposal(myproposal[PROPOSAL_ENC_ALGS_CTOS]);
- myproposal[PROPOSAL_ENC_ALGS_STOC] =
- compat_cipher_proposal(myproposal[PROPOSAL_ENC_ALGS_STOC]);
-
- if (options.macs != NULL) {
- myproposal[PROPOSAL_MAC_ALGS_CTOS] =
- myproposal[PROPOSAL_MAC_ALGS_STOC] = options.macs;
- }
- if (!options.compression) {
- myproposal[PROPOSAL_COMP_ALGS_CTOS] =
- myproposal[PROPOSAL_COMP_ALGS_STOC] = "none";
- }
-
- /*
- * Prepare kex algs / hostkey algs (excluding GSS, which is
- * handled in the kex hook.
- *
- * XXX This should probably move to the kex hook as well, where
- * all non-constant kex offer material belongs.
- */
- myproposal[PROPOSAL_SERVER_HOST_KEY_ALGS] = list_hostkey_types();
-
- /* If we have no host key algs we can't offer KEXDH/KEX_DH_GEX */
- if (myproposal[PROPOSAL_SERVER_HOST_KEY_ALGS] == NULL ||
- *myproposal[PROPOSAL_SERVER_HOST_KEY_ALGS] == '\0')
- myproposal[PROPOSAL_KEX_ALGS] = "";
-
- if ((locales = g11n_getlocales()) != NULL) {
- /* Solaris 9 SSH expects a list of locales */
- if (datafellows & SSH_BUG_LOCALES_NOT_LANGTAGS)
- myproposal[PROPOSAL_LANG_STOC] = xjoin(locales, ',');
- else
- myproposal[PROPOSAL_LANG_STOC] =
- g11n_locales2langs(locales);
- }
-
- if (locales != NULL)
- g11n_freelist(locales);
-
- if ((myproposal[PROPOSAL_LANG_STOC] != NULL) &&
- (strcmp(myproposal[PROPOSAL_LANG_STOC], "")) != 0)
- myproposal[PROPOSAL_LANG_CTOS] =
- xstrdup(myproposal[PROPOSAL_LANG_STOC]);
-
-#ifdef GSSAPI
- if (options.gss_keyex)
- kex_hook = ssh_gssapi_server_kex_hook;
-#endif /* GSSAPI */
-
- kex = kex_setup(NULL, myproposal, kex_hook);
-
- /*
- * Note that the my_srv_proposal variable (ie., myproposal) is staticly
- * initialized with "" for the language fields; we must not xfree such
- * strings.
- */
- if (myproposal[PROPOSAL_LANG_STOC] != NULL &&
- strcmp(myproposal[PROPOSAL_LANG_STOC], "") != 0)
- xfree(myproposal[PROPOSAL_LANG_STOC]);
- if (myproposal[PROPOSAL_LANG_CTOS] != NULL &&
- strcmp(myproposal[PROPOSAL_LANG_STOC], "") != 0)
- xfree(myproposal[PROPOSAL_LANG_CTOS]);
-
- kex->kex[KEX_DH_GRP1_SHA1] = kexdh_server;
- kex->kex[KEX_DH_GEX_SHA1] = kexgex_server;
-#ifdef GSSAPI
- kex->kex[KEX_GSS_GRP1_SHA1] = kexgss_server;
-#endif /* GSSAPI */
- kex->server = 1;
- kex->client_version_string = client_version_string;
- kex->server_version_string = server_version_string;
- kex->load_host_key = &get_hostkey_by_type;
- kex->host_key_index = &get_hostkey_index;
-
- xxx_kex = kex;
- return (kex);
-}
-
-/*
- * Do SSH2 key exchange.
- */
-static void
-do_ssh2_kex(void)
-{
- Kex *kex;
-
- kex = prepare_for_ssh2_kex();
- kex_start(kex);
-
- dispatch_run(DISPATCH_BLOCK, &kex->done, kex);
-
- if (kex->name) {
- xfree(kex->name);
- kex->name = NULL;
- }
- session_id2 = kex->session_id;
- session_id2_len = kex->session_id_len;
-
-#ifdef DEBUG_KEXDH
- /* send 1st encrypted/maced/compressed message */
- packet_start(SSH2_MSG_IGNORE);
- packet_put_cstring("markus");
- packet_send();
- packet_write_wait();
-#endif
- debug("KEX done");
-}
diff --git a/usr/src/cmd/ssh/sshd/sshlogin.c b/usr/src/cmd/ssh/sshd/sshlogin.c
deleted file mode 100644
index c21877355c..0000000000
--- a/usr/src/cmd/ssh/sshd/sshlogin.c
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * This file performs some of the things login(1) normally does. We cannot
- * easily use something like login -p -h host -f user, because there are
- * several different logins around, and it is hard to determined what kind of
- * login the current system has. Also, we want to be able to execute commands
- * on a tty.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- *
- * Copyright (c) 1999 Theo de Raadt. All rights reserved.
- * Copyright (c) 1999 Markus Friedl. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: sshlogin.c,v 1.5 2002/08/29 15:57:25 stevesk Exp $");
-
-#include "loginrec.h"
-#include "log.h"
-#include "buffer.h"
-#include "servconf.h"
-#include "canohost.h"
-#include "packet.h"
-
-extern u_int utmp_len;
-extern ServerOptions options;
-
-/*
- * Returns the time when the user last logged in. Returns 0 if the
- * information is not available. This must be called before record_login.
- * The host the user logged in from will be returned in buf.
- */
-u_long
-get_last_login_time(uid_t uid, const char *logname,
- char *buf, u_int bufsize)
-{
- struct logininfo li;
-
- (void) login_get_lastlog(&li, uid);
- (void) strlcpy(buf, li.hostname, bufsize);
- return li.tv_sec;
-}
-
-/*
- * Records that the user has logged in. If only these parts of operating
- * systems were more standardized.
- */
-void
-record_login(pid_t pid, const char *ttyname, const char *progname,
- const char *user)
-{
- struct logininfo *li;
- static int initialized = 0;
- static socklen_t fromlen;
- static struct sockaddr_storage from;
- static const char *remote_name_or_ip;
-
- if (pid == 0)
- pid = getpid();
- /*
- * Get IP address of client. If the connection is not a socket, let
- * the address be 0.0.0.0.
- */
- if (!initialized) {
- (void) memset(&from, 0, sizeof(from));
- if (packet_connection_is_on_socket()) {
- fromlen = sizeof(from);
- if (getpeername(packet_get_connection_in(),
- (struct sockaddr *) &from, &fromlen) < 0) {
- debug("getpeername: %.100s", strerror(errno));
- fatal_cleanup();
- }
- }
- remote_name_or_ip = get_remote_name_or_ip(utmp_len,
- options.verify_reverse_mapping);
-
- initialized = 1;
- }
-
- li = login_alloc_entry(pid, user, remote_name_or_ip, ttyname, progname);
- login_set_addr(li, (struct sockaddr*) &from, sizeof(struct sockaddr));
- (void) login_login(li);
- login_free_entry(li);
-}
-
-/* Records that the user has logged out. */
-void
-record_logout(pid_t pid, const char *ttyname, const char *progname,
- const char *user)
-{
- struct logininfo *li;
-
- li = login_alloc_entry(pid, user, NULL, ttyname, progname);
- (void) login_logout(li);
- login_free_entry(li);
-}
diff --git a/usr/src/cmd/ssh/sshd/sshpty.c b/usr/src/cmd/ssh/sshd/sshpty.c
deleted file mode 100644
index b421798cb5..0000000000
--- a/usr/src/cmd/ssh/sshd/sshpty.c
+++ /dev/null
@@ -1,420 +0,0 @@
-/*
- * Author: Tatu Ylonen <ylo@cs.hut.fi>
- * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
- * All rights reserved
- * Allocating a pseudo-terminal, and making it the controlling tty.
- *
- * As far as I am concerned, the code I have written for this software
- * can be used freely for any purpose. Any derived versions of this
- * software must be clearly marked as such, and if the derived work is
- * incompatible with the protocol description in the RFC file, it must be
- * called by a name other than "ssh" or "Secure Shell".
- */
-
-#include "includes.h"
-RCSID("$OpenBSD: sshpty.c,v 1.7 2002/06/24 17:57:20 deraadt Exp $");
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#ifdef HAVE_UTIL_H
-# include <util.h>
-#endif /* HAVE_UTIL_H */
-
-#include "sshpty.h"
-#include "log.h"
-#include "misc.h"
-
-/* Pty allocated with _getpty gets broken if we do I_PUSH:es to it. */
-#if defined(HAVE__GETPTY) || defined(HAVE_OPENPTY)
-#undef HAVE_DEV_PTMX
-#endif
-
-#ifdef HAVE_PTY_H
-# include <pty.h>
-#endif
-#if defined(HAVE_DEV_PTMX) && defined(HAVE_SYS_STROPTS_H)
-# include <sys/stropts.h>
-#endif
-
-#ifndef O_NOCTTY
-#define O_NOCTTY 0
-#endif
-
-/*
- * Allocates and opens a pty. Returns 0 if no pty could be allocated, or
- * nonzero if a pty was successfully allocated. On success, open file
- * descriptors for the pty and tty sides and the name of the tty side are
- * returned (the buffer must be able to hold at least 64 characters).
- */
-
-int
-pty_allocate(int *ptyfd, int *ttyfd, char *namebuf, int namebuflen)
-{
-#if defined(HAVE_OPENPTY) || defined(BSD4_4)
- /* openpty(3) exists in OSF/1 and some other os'es */
- char *name;
- int i;
-
- i = openpty(ptyfd, ttyfd, NULL, NULL, NULL);
- if (i < 0) {
- error("openpty: %.100s", strerror(errno));
- return 0;
- }
- name = ttyname(*ttyfd);
- if (!name)
- fatal("openpty returns device for which ttyname fails.");
-
- strlcpy(namebuf, name, namebuflen); /* possible truncation */
- return 1;
-#else /* HAVE_OPENPTY */
-#ifdef HAVE__GETPTY
- /*
- * _getpty(3) exists in SGI Irix 4.x, 5.x & 6.x -- it generates more
- * pty's automagically when needed
- */
- char *slave;
-
- slave = _getpty(ptyfd, O_RDWR, 0622, 0);
- if (slave == NULL) {
- error("_getpty: %.100s", strerror(errno));
- return 0;
- }
- strlcpy(namebuf, slave, namebuflen);
- /* Open the slave side. */
- *ttyfd = open(namebuf, O_RDWR | O_NOCTTY);
- if (*ttyfd < 0) {
- error("%.200s: %.100s", namebuf, strerror(errno));
- close(*ptyfd);
- return 0;
- }
- return 1;
-#else /* HAVE__GETPTY */
-#if defined(HAVE_DEV_PTMX)
- /*
- * This code is used e.g. on Solaris 2.x. (Note that Solaris 2.3
- * also has bsd-style ptys, but they simply do not work.)
- */
- int ptm;
- char *pts;
- mysig_t old_signal;
-
- ptm = open("/dev/ptmx", O_RDWR | O_NOCTTY);
- if (ptm < 0) {
- error("/dev/ptmx: %.100s", strerror(errno));
- return 0;
- }
- old_signal = mysignal(SIGCHLD, SIG_DFL);
- if (grantpt(ptm) < 0) {
- error("grantpt: %.100s", strerror(errno));
- return 0;
- }
- mysignal(SIGCHLD, old_signal);
- if (unlockpt(ptm) < 0) {
- error("unlockpt: %.100s", strerror(errno));
- return 0;
- }
- pts = ptsname(ptm);
- if (pts == NULL)
- error("Slave pty side name could not be obtained.");
- strlcpy(namebuf, pts, namebuflen);
- *ptyfd = ptm;
-
- /* Open the slave side. */
- *ttyfd = open(namebuf, O_RDWR | O_NOCTTY);
- if (*ttyfd < 0) {
- error("%.100s: %.100s", namebuf, strerror(errno));
- close(*ptyfd);
- return 0;
- }
-#ifndef HAVE_CYGWIN
- /*
- * Push the appropriate streams modules, as described in Solaris pts(7).
- * HP-UX pts(7) doesn't have ttcompat module.
- */
- if (ioctl(*ttyfd, I_PUSH, "ptem") < 0)
- error("ioctl I_PUSH ptem: %.100s", strerror(errno));
- if (ioctl(*ttyfd, I_PUSH, "ldterm") < 0)
- error("ioctl I_PUSH ldterm: %.100s", strerror(errno));
-#ifndef __hpux
- if (ioctl(*ttyfd, I_PUSH, "ttcompat") < 0)
- error("ioctl I_PUSH ttcompat: %.100s", strerror(errno));
-#endif
-#endif
- return 1;
-#else /* HAVE_DEV_PTMX */
-#ifdef HAVE_DEV_PTS_AND_PTC
- /* AIX-style pty code. */
- const char *name;
-
- *ptyfd = open("/dev/ptc", O_RDWR | O_NOCTTY);
- if (*ptyfd < 0) {
- error("Could not open /dev/ptc: %.100s", strerror(errno));
- return 0;
- }
- name = ttyname(*ptyfd);
- if (!name)
- fatal("Open of /dev/ptc returns device for which ttyname fails.");
- strlcpy(namebuf, name, namebuflen);
- *ttyfd = open(name, O_RDWR | O_NOCTTY);
- if (*ttyfd < 0) {
- error("Could not open pty slave side %.100s: %.100s",
- name, strerror(errno));
- close(*ptyfd);
- return 0;
- }
- return 1;
-#else /* HAVE_DEV_PTS_AND_PTC */
-#ifdef _UNICOS
- char buf[64];
- int i;
- int highpty;
-
-#ifdef _SC_CRAY_NPTY
- highpty = sysconf(_SC_CRAY_NPTY);
- if (highpty == -1)
- highpty = 128;
-#else
- highpty = 128;
-#endif
-
- for (i = 0; i < highpty; i++) {
- snprintf(buf, sizeof(buf), "/dev/pty/%03d", i);
- *ptyfd = open(buf, O_RDWR|O_NOCTTY);
- if (*ptyfd < 0)
- continue;
- snprintf(namebuf, namebuflen, "/dev/ttyp%03d", i);
- /* Open the slave side. */
- *ttyfd = open(namebuf, O_RDWR|O_NOCTTY);
- if (*ttyfd < 0) {
- error("%.100s: %.100s", namebuf, strerror(errno));
- close(*ptyfd);
- return 0;
- }
- return 1;
- }
- return 0;
-#else
- /* BSD-style pty code. */
- char buf[64];
- int i;
- const char *ptymajors = "pqrstuvwxyzabcdefghijklmnoABCDEFGHIJKLMNOPQRSTUVWXYZ";
- const char *ptyminors = "0123456789abcdef";
- int num_minors = strlen(ptyminors);
- int num_ptys = strlen(ptymajors) * num_minors;
- struct termios tio;
-
- for (i = 0; i < num_ptys; i++) {
- snprintf(buf, sizeof buf, "/dev/pty%c%c", ptymajors[i / num_minors],
- ptyminors[i % num_minors]);
- snprintf(namebuf, namebuflen, "/dev/tty%c%c",
- ptymajors[i / num_minors], ptyminors[i % num_minors]);
-
- *ptyfd = open(buf, O_RDWR | O_NOCTTY);
- if (*ptyfd < 0) {
- /* Try SCO style naming */
- snprintf(buf, sizeof buf, "/dev/ptyp%d", i);
- snprintf(namebuf, namebuflen, "/dev/ttyp%d", i);
- *ptyfd = open(buf, O_RDWR | O_NOCTTY);
- if (*ptyfd < 0)
- continue;
- }
-
- /* Open the slave side. */
- *ttyfd = open(namebuf, O_RDWR | O_NOCTTY);
- if (*ttyfd < 0) {
- error("%.100s: %.100s", namebuf, strerror(errno));
- close(*ptyfd);
- return 0;
- }
- /* set tty modes to a sane state for broken clients */
- if (tcgetattr(*ptyfd, &tio) < 0)
- log("Getting tty modes for pty failed: %.100s", strerror(errno));
- else {
- tio.c_lflag |= (ECHO | ISIG | ICANON);
- tio.c_oflag |= (OPOST | ONLCR);
- tio.c_iflag |= ICRNL;
-
- /* Set the new modes for the terminal. */
- if (tcsetattr(*ptyfd, TCSANOW, &tio) < 0)
- log("Setting tty modes for pty failed: %.100s", strerror(errno));
- }
-
- return 1;
- }
- return 0;
-#endif /* CRAY */
-#endif /* HAVE_DEV_PTS_AND_PTC */
-#endif /* HAVE_DEV_PTMX */
-#endif /* HAVE__GETPTY */
-#endif /* HAVE_OPENPTY */
-}
-
-/* Releases the tty. Its ownership is returned to root, and permissions to 0666. */
-
-void
-pty_release(const char *ttyname)
-{
- if (chown(ttyname, (uid_t) 0, (gid_t) 0) < 0)
- error("chown %.100s 0 0 failed: %.100s", ttyname, strerror(errno));
- if (chmod(ttyname, (mode_t) 0666) < 0)
- error("chmod %.100s 0666 failed: %.100s", ttyname, strerror(errno));
-}
-
-/* Makes the tty the processes controlling tty and sets it to sane modes. */
-
-void
-pty_make_controlling_tty(int *ttyfd, const char *ttyname)
-{
- int fd;
-#ifdef USE_VHANGUP
- void *old;
-#endif /* USE_VHANGUP */
-
-#ifdef _UNICOS
- if (setsid() < 0)
- error("setsid: %.100s", strerror(errno));
-
- fd = open(ttyname, O_RDWR|O_NOCTTY);
- if (fd != -1) {
- mysignal(SIGHUP, SIG_IGN);
- ioctl(fd, TCVHUP, (char *)NULL);
- mysignal(SIGHUP, SIG_DFL);
- setpgid(0, 0);
- close(fd);
- } else {
- error("Failed to disconnect from controlling tty.");
- }
-
- debug("Setting controlling tty using TCSETCTTY.");
- ioctl(*ttyfd, TCSETCTTY, NULL);
- fd = open("/dev/tty", O_RDWR);
- if (fd < 0)
- error("%.100s: %.100s", ttyname, strerror(errno));
- close(*ttyfd);
- *ttyfd = fd;
-#else /* _UNICOS */
-
- /* First disconnect from the old controlling tty. */
-#ifdef TIOCNOTTY
- fd = open(_PATH_TTY, O_RDWR | O_NOCTTY);
- if (fd >= 0) {
- (void) ioctl(fd, TIOCNOTTY, NULL);
- close(fd);
- }
-#endif /* TIOCNOTTY */
- if (setsid() < 0)
- error("setsid: %.100s", strerror(errno));
-
- /*
- * Verify that we are successfully disconnected from the controlling
- * tty.
- */
- fd = open(_PATH_TTY, O_RDWR | O_NOCTTY);
- if (fd >= 0) {
- error("Failed to disconnect from controlling tty.");
- close(fd);
- }
- /* Make it our controlling tty. */
-#ifdef TIOCSCTTY
- debug("Setting controlling tty using TIOCSCTTY.");
- if (ioctl(*ttyfd, TIOCSCTTY, NULL) < 0)
- error("ioctl(TIOCSCTTY): %.100s", strerror(errno));
-#endif /* TIOCSCTTY */
-#ifdef HAVE_NEWS4
- if (setpgrp(0,0) < 0)
- error("SETPGRP %s",strerror(errno));
-#endif /* HAVE_NEWS4 */
-#ifdef USE_VHANGUP
- old = mysignal(SIGHUP, SIG_IGN);
- vhangup();
- mysignal(SIGHUP, old);
-#endif /* USE_VHANGUP */
- fd = open(ttyname, O_RDWR);
- if (fd < 0) {
- error("%.100s: %.100s", ttyname, strerror(errno));
- } else {
-#ifdef USE_VHANGUP
- close(*ttyfd);
- *ttyfd = fd;
-#else /* USE_VHANGUP */
- close(fd);
-#endif /* USE_VHANGUP */
- }
- /* Verify that we now have a controlling tty. */
- fd = open(_PATH_TTY, O_WRONLY);
- if (fd < 0)
- error("open /dev/tty failed - could not set controlling tty: %.100s",
- strerror(errno));
- else
- close(fd);
-#endif /* _UNICOS */
-}
-
-/* Changes the window size associated with the pty. */
-
-void
-pty_change_window_size(int ptyfd, int row, int col,
- int xpixel, int ypixel)
-{
- struct winsize w;
-
- w.ws_row = row;
- w.ws_col = col;
- w.ws_xpixel = xpixel;
- w.ws_ypixel = ypixel;
- (void) ioctl(ptyfd, TIOCSWINSZ, &w);
-}
-
-void
-pty_setowner(struct passwd *pw, const char *ttyname)
-{
- struct group *grp;
- gid_t gid;
- mode_t mode;
- struct stat st;
-
- /* Determine the group to make the owner of the tty. */
- grp = getgrnam("tty");
- if (grp) {
- gid = grp->gr_gid;
- mode = S_IRUSR | S_IWUSR | S_IWGRP;
- } else {
- gid = pw->pw_gid;
- mode = S_IRUSR | S_IWUSR | S_IWGRP | S_IWOTH;
- }
-
- /*
- * Change owner and mode of the tty as required.
- * Warn but continue if filesystem is read-only and the uids match/
- * tty is owned by root.
- */
- if (stat(ttyname, &st))
- fatal("stat(%.100s) failed: %.100s", ttyname,
- strerror(errno));
-
- if (st.st_uid != pw->pw_uid || st.st_gid != gid) {
- if (chown(ttyname, pw->pw_uid, gid) < 0) {
- if (errno == EROFS &&
- (st.st_uid == pw->pw_uid || st.st_uid == 0))
- error("chown(%.100s, %u, %u) failed: %.100s",
- ttyname, (u_int)pw->pw_uid, (u_int)gid,
- strerror(errno));
- else
- fatal("chown(%.100s, %u, %u) failed: %.100s",
- ttyname, (u_int)pw->pw_uid, (u_int)gid,
- strerror(errno));
- }
- }
-
- if ((st.st_mode & (S_IRWXU|S_IRWXG|S_IRWXO)) != mode) {
- if (chmod(ttyname, mode) < 0) {
- if (errno == EROFS &&
- (st.st_mode & (S_IRGRP | S_IROTH)) == 0)
- error("chmod(%.100s, 0%o) failed: %.100s",
- ttyname, (int)mode, strerror(errno));
- else
- fatal("chmod(%.100s, 0%o) failed: %.100s",
- ttyname, (int)mode, strerror(errno));
- }
- }
-}
diff --git a/usr/src/cmd/stat/Makefile b/usr/src/cmd/stat/Makefile
index 34149b2b37..faffc6a437 100644
--- a/usr/src/cmd/stat/Makefile
+++ b/usr/src/cmd/stat/Makefile
@@ -19,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2011, 2012, Joyent, Inc. All rights reserved.
# Use is subject to license terms.
#
# cmd/stat/Makefile
@@ -27,7 +27,14 @@
include ../Makefile.cmd
-SUBDIRS= arcstat iostat mpstat vmstat fsstat kstat
+SUBDIRS= arcstat \
+ fsstat \
+ iostat \
+ kstat \
+ mpstat \
+ vfsstat \
+ vmstat \
+ ziostat
all := TARGET = all
install := TARGET = install
diff --git a/usr/src/cmd/stat/arcstat/Makefile b/usr/src/cmd/stat/arcstat/Makefile
index 6ae60a8d3d..a98e2fee7e 100644
--- a/usr/src/cmd/stat/arcstat/Makefile
+++ b/usr/src/cmd/stat/arcstat/Makefile
@@ -11,6 +11,7 @@
#
# Copyright 2014 Adam Stevko. All rights reserved.
+# Copyright (c) 2011, Joyent, Inc. All rights reserved.
#
include $(SRC)/cmd/Makefile.cmd
diff --git a/usr/src/cmd/stat/arcstat/arcstat.pl b/usr/src/cmd/stat/arcstat/arcstat.pl
index 8f13221910..8f13221910 100755..100644
--- a/usr/src/cmd/stat/arcstat/arcstat.pl
+++ b/usr/src/cmd/stat/arcstat/arcstat.pl
diff --git a/usr/src/cmd/stat/vfsstat/Makefile b/usr/src/cmd/stat/vfsstat/Makefile
new file mode 100644
index 0000000000..04b5085243
--- /dev/null
+++ b/usr/src/cmd/stat/vfsstat/Makefile
@@ -0,0 +1,41 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2011, Joyent, Inc. All rights reserved.
+#
+
+include $(SRC)/cmd/Makefile.cmd
+
+PROG= vfsstat
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+install: all .WAIT $(ROOTPROG)
+
+clean:
+
+$(ROOTBINPROG): $(PROG)
+ $(INS.file)
+
+lint:
+
+include $(SRC)/cmd/Makefile.targ
diff --git a/usr/src/cmd/stat/vfsstat/vfsstat.pl b/usr/src/cmd/stat/vfsstat/vfsstat.pl
new file mode 100644
index 0000000000..a3780b8e63
--- /dev/null
+++ b/usr/src/cmd/stat/vfsstat/vfsstat.pl
@@ -0,0 +1,227 @@
+#!/usr/perl5/bin/perl -w
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2011 Joyent, Inc.
+#
+# vfsstat - report VFS statistics per zone
+#
+# USAGE: vfsstat [-hIMrzZ] [interval [count]]
+# -h # help
+# -I # print results per interval (where applicable)
+# -M # print results in MB/s
+# -r # print data in comma-separated format
+# -z # hide zones with no VFS activity
+# -Z # print data for all zones
+#
+# eg, vfsstat # print summary since zone boot
+# vfsstat 1 # print continually every 1 second
+# vfsstat 1 5 # print 5 times, every 1 second
+#
+# NOTES:
+#
+# - The calculations and output fields emulate those from iostat(1M) as closely
+# as possible. When only one zone is actively performing disk I/O, the
+# results from iostat(1M) in the global zone and vfsstat in the local zone
+# should be almost identical. Note that many VFS read operations are handled
+# by the ARC, so vfsstat and iostat(1M) will be similar only when most
+# requests are missing in the ARC.
+#
+# - As with iostat(1M), a result of 100% for VFS read and write utilization does
+# not mean that the syscall layer is fully saturated. Instead, that
+# measurement just shows that at least one operation was pending over the last
+# quanta of time examined. Since the VFS layer can process more than one
+# operation concurrently, this measurement will frequently be 100% but the VFS
+# layer can still accept additional requests.
+#
+# - This script is based on Brendan Gregg's K9Toolkit examples:
+#
+# http://www.brendangregg.com/k9toolkit.html
+#
+
+use Getopt::Std;
+use Sun::Solaris::Kstat;
+my $Kstat = Sun::Solaris::Kstat->new();
+
+# Process command line args
+usage() if defined $ARGV[0] and $ARGV[0] eq "--help";
+getopts('hIMrzZ') or usage();
+usage() if defined $main::opt_h;
+$main::opt_h = 0;
+
+my $USE_MB = defined $main::opt_M ? $main::opt_M : 0;
+my $USE_INTERVAL = defined $main::opt_I ? $main::opt_I : 0;
+my $USE_COMMA = defined $main::opt_r ? $main::opt_r : 0;
+my $HIDE_ZEROES = defined $main::opt_z ? $main::opt_z : 0;
+my $ALL_ZONES = defined $main::opt_Z ? $main::opt_Z : 0;
+
+my ($interval, $count);
+if ( defined($ARGV[0]) ) {
+ $interval = $ARGV[0];
+ $count = defined ($ARGV[1]) ? $ARGV[1] : 2**32;
+ usage() if ($interval == 0);
+} else {
+ $interval = 1;
+ $count = 1;
+}
+
+my $HEADER_FMT = $USE_COMMA ?
+ "r/%s,w/%s,%sr/%s,%sw/%s,ractv,wactv,read_t,writ_t,%%r,%%w," .
+ "d/%s,del_t,zone\n" :
+ " r/%s w/%s %sr/%s %sw/%s ractv wactv read_t writ_t " .
+ "%%r %%w d/%s del_t zone\n";
+my $DATA_FMT = $USE_COMMA ?
+ "%.1f,%.1f,%.1f,%.1f,%.1f,%.1f,%.1f,%.1f,%d,%d,%.1f,%.1f,%s,%d\n" :
+ "%5.1f %5.1f %5.1f %5.1f %5.1f %5.1f %6.1f %6.1f %3d %3d " .
+ "%5.1f %6.1f %s (%d)\n";
+
+my $BYTES_PREFIX = $USE_MB ? "M" : "k";
+my $BYTES_DIVISOR = $USE_MB ? 1024 * 1024 : 1024;
+my $INTERVAL_SUFFIX = $USE_INTERVAL ? "i" : "s";
+my $NANOSEC = 1000000000;
+
+my @fields = ( 'reads', 'writes', 'nread', 'nwritten', 'rtime', 'wtime',
+ 'rlentime', 'wlentime', 'delay_cnt', 'delay_time', 'snaptime' );
+
+chomp(my $curzone = (`/sbin/zonename`));
+
+my %old = ();
+my $rows_printed = 0;
+
+for (my $ii = 0; $ii < $count; $ii++) {
+ # Read list of visible zones and their zone IDs
+ my @zones = ();
+ my %zoneids = ();
+ my $zoneadm = `zoneadm list -p | cut -d: -f1,2`;
+ @lines = split(/\n/, $zoneadm);
+ foreach $line (@lines) {
+ @tok = split(/:/, $line);
+ $zoneids->{$tok[1]} = $tok[0];
+ push(@zones, $tok[1]);
+ }
+
+ $Kstat->update();
+
+ # Print the column header every 20 rows
+ if ($rows_printed == 0 || $ALL_ZONES) {
+ printf($HEADER_FMT, $INTERVAL_SUFFIX, $INTERVAL_SUFFIX,
+ $BYTES_PREFIX, $INTERVAL_SUFFIX, $BYTES_PREFIX,
+ $INTERVAL_SUFFIX, $INTERVAL_SUFFIX);
+ }
+
+ $rows_printed = $rows_printed >= 20 ? 0 : $rows_printed + 1;
+
+ foreach $zone (@zones) {
+ if ((!$ALL_ZONES) && ($zone ne $curzone)) {
+ next;
+ }
+
+ if (! defined $old->{$zone}) {
+ $old->{$zone} = ();
+ foreach $field (@fields) { $old->{$zone}->{$field} = 0; }
+ }
+
+ #
+ # Kstats have a 30-character limit (KSTAT_STRLEN) on their
+ # names, so if the zone name exceeds that limit, use the first
+ # 30 characters.
+ #
+ my $trimmed_zone = substr($zone, 0, 30);
+ my $zoneid = $zoneids->{$zone};
+
+ print_stats($zone, $zoneid,
+ $Kstat->{'zone_vfs'}{$zoneid}{$trimmed_zone}, $old->{$zone});
+ }
+
+ sleep ($interval);
+}
+
+exit(0);
+
+sub print_stats {
+ my $zone = $_[0];
+ my $zoneid = $_[1];
+ my $data = $_[2];
+ my $old = $_[3];
+
+ my $etime = $data->{'snaptime'} -
+ ($old->{'snaptime'} > 0 ? $old->{'snaptime'} : $data->{'crtime'});
+
+ # Calculate basic statistics
+ my $rate_divisor = $USE_INTERVAL ? 1 : $etime;
+ my $reads = ($data->{'reads'} - $old->{'reads'}) / $rate_divisor;
+ my $writes = ($data->{'writes'} - $old->{'writes'}) / $rate_divisor;
+ my $nread = ($data->{'nread'} - $old->{'nread'}) /
+ $rate_divisor / $BYTES_DIVISOR;
+ my $nwritten = ($data->{'nwritten'} - $old->{'nwritten'}) /
+ $rate_divisor / $BYTES_DIVISOR;
+
+ # Calculate transactions per second
+ my $r_tps = ($data->{'reads'} - $old->{'reads'}) / $etime;
+ my $w_tps = ($data->{'writes'} - $old->{'writes'}) / $etime;
+
+ # Calculate average length of active queue
+ my $r_actv = (($data->{'rlentime'} - $old->{'rlentime'}) / $NANOSEC) /
+ $etime;
+ my $w_actv = (($data->{'wlentime'} - $old->{'wlentime'}) / $NANOSEC) /
+ $etime;
+
+ # Calculate average service time
+ # multiply by 1000 to convert to usecs for conssistency with del_t
+ my $read_t = ($r_tps > 0 ? $r_actv * (1000 / $r_tps) : 0.0) * 1000;
+ my $writ_t = ($w_tps > 0 ? $w_actv * (1000 / $w_tps) : 0.0) * 1000;
+
+ # Calculate I/O throttle delay metrics
+ my $delays = $data->{'delay_cnt'} - $old->{'delay_cnt'};
+ my $d_tps = $delays / $etime;
+ my $del_t = $delays > 0 ?
+ ($data->{'delay_time'} - $old->{'delay_time'}) / $delays : 0.0;
+
+ # Calculate the % time the VFS layer is active
+ my $r_b_pct = ((($data->{'rtime'} - $old->{'rtime'}) / $NANOSEC) /
+ $etime) * 100;
+ my $w_b_pct = ((($data->{'wtime'} - $old->{'wtime'}) / $NANOSEC) /
+ $etime) * 100;
+
+ if (! $HIDE_ZEROES || $reads != 0.0 || $writes != 0.0 ||
+ $nread != 0.0 || $nwritten != 0.0) {
+ printf($DATA_FMT, $reads, $writes, $nread, $nwritten, $r_actv,
+ $w_actv, $read_t, $writ_t, $r_b_pct, $w_b_pct,
+ $d_tps, $del_t, substr($zone, 0, 8), $zoneid);
+ }
+
+ # Save current calculations for next loop
+ foreach (@fields) { $old->{$_} = $data->{$_}; }
+}
+
+sub usage {
+ print STDERR <<END;
+USAGE: vfsstat [-hIMrzZ] [interval [count]]
+ eg, vfsstat # print summary since zone boot
+ vfsstat 1 # print continually every 1 second
+ vfsstat 1 5 # print 5 times, every 1 second
+ vfsstat -I # print results per interval (where applicable)
+ vfsstat -M # print results in MB/s
+ vfsstat -r # print results in comma-separated format
+ vfsstat -z # hide zones with no VFS activity
+ vfsstat -Z # print results for all zones
+END
+ exit 1;
+}
diff --git a/usr/src/cmd/stat/ziostat/Makefile b/usr/src/cmd/stat/ziostat/Makefile
new file mode 100644
index 0000000000..c338b59678
--- /dev/null
+++ b/usr/src/cmd/stat/ziostat/Makefile
@@ -0,0 +1,41 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2011, Joyent, Inc. All rights reserved.
+#
+
+include $(SRC)/cmd/Makefile.cmd
+
+PROG= ziostat
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+install: all .WAIT $(ROOTPROG)
+
+clean:
+
+$(ROOTBINPROG): $(PROG)
+ $(INS.file)
+
+lint:
+
+include $(SRC)/cmd/Makefile.targ
diff --git a/usr/src/cmd/stat/ziostat/ziostat.pl b/usr/src/cmd/stat/ziostat/ziostat.pl
new file mode 100755
index 0000000000..cf95d2f5a5
--- /dev/null
+++ b/usr/src/cmd/stat/ziostat/ziostat.pl
@@ -0,0 +1,204 @@
+#!/usr/perl5/bin/perl -w
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2011 Joyent, Inc.
+#
+# ziostat - report I/O statistics per zone
+#
+# USAGE: ziostat [-hIMrzZ] [interval [count]]
+# -h # help
+# -I # print results per interval (where applicable)
+# -M # print results in MB/s
+# -r # print data in comma-separated format
+# -z # hide zones with no ZFS I/O activity
+# -Z # print data for all zones
+#
+# eg, ziostat # print summary since zone boot
+# ziostat 1 # print continually every 1 second
+# ziostat 1 5 # print 5 times, every 1 second
+#
+# NOTES:
+#
+# - The calculations and output fields emulate those from iostat(1M) as closely
+# as possible. When only one zone is actively performing disk I/O, the
+# results from iostat(1M) in the global zone and ziostat in the local zone
+# should be almost identical.
+#
+# - As with iostat(1M), a result of 100% for disk utilization does not mean that
+# the disk is fully saturated. Instead, that measurement just shows that at
+# least one operation was pending over the last quanta of time examined.
+# Since disk devices can process more than one operation concurrently, this
+# measurement will frequently be 100% but the disk can still offer higher
+# performance.
+#
+# - This script is based on Brendan Gregg's K9Toolkit examples:
+#
+# http://www.brendangregg.com/k9toolkit.html
+#
+
+use Getopt::Std;
+use Sun::Solaris::Kstat;
+my $Kstat = Sun::Solaris::Kstat->new();
+
+# Process command line args
+usage() if defined $ARGV[0] and $ARGV[0] eq "--help";
+getopts('hIMrzZ') or usage();
+usage() if defined $main::opt_h;
+$main::opt_h = 0;
+
+my $USE_MB = defined $main::opt_M ? $main::opt_M : 0;
+my $USE_INTERVAL = defined $main::opt_I ? $main::opt_I : 0;
+my $USE_COMMA = defined $main::opt_r ? $main::opt_r : 0;
+my $HIDE_ZEROES = defined $main::opt_z ? $main::opt_z : 0;
+my $ALL_ZONES = defined $main::opt_Z ? $main::opt_Z : 0;
+
+my ($interval, $count);
+if ( defined($ARGV[0]) ) {
+ $interval = $ARGV[0];
+ $count = defined ($ARGV[1]) ? $ARGV[1] : 2**32;
+ usage() if ($interval == 0);
+} else {
+ $interval = 1;
+ $count = 1;
+}
+
+my $HEADER_FMT = $USE_COMMA ?
+ "r/%s,%sr/%s,actv,wsvc_t,asvc_t,%%b,zone\n" :
+ " r/%s %sr/%s actv wsvc_t asvc_t %%b zone\n";
+my $DATA_FMT = $USE_COMMA ?
+ "%.1f,%.1f,%.1f,%.1f,%.1f,%d,%s,%d\n" :
+ " %6.1f %6.1f %6.1f %6.1f %6.1f %3d %s (%d)\n";
+
+my $BYTES_PREFIX = $USE_MB ? "M" : "k";
+my $BYTES_DIVISOR = $USE_MB ? 1024 * 1024 : 1024;
+my $INTERVAL_SUFFIX = $USE_INTERVAL ? "i" : "s";
+my $NANOSEC = 1000000000;
+
+my @fields = ( 'reads', 'nread', 'waittime', 'rtime', 'rlentime', 'snaptime' );
+
+chomp(my $curzone = (`/sbin/zonename`));
+
+# Read list of visible zones and their zone IDs
+my @zones = ();
+my %zoneids = ();
+my $zoneadm = `zoneadm list -p | cut -d: -f1,2`;
+@lines = split(/\n/, $zoneadm);
+foreach $line (@lines) {
+ @tok = split(/:/, $line);
+ $zoneids->{$tok[1]} = $tok[0];
+ push(@zones, $tok[1]);
+}
+
+my %old = ();
+my $rows_printed = 0;
+
+$Kstat->update();
+
+for (my $ii = 0; $ii < $count; $ii++) {
+ # Print the column header every 20 rows
+ if ($rows_printed == 0 || $ALL_ZONES) {
+ printf($HEADER_FMT, $INTERVAL_SUFFIX, $BYTES_PREFIX,
+ $INTERVAL_SUFFIX, $INTERVAL_SUFFIX);
+ }
+
+ $rows_printed = $rows_printed >= 20 ? 0 : $rows_printed + 1;
+
+ foreach $zone (@zones) {
+ if ((!$ALL_ZONES) && ($zone ne $curzone)) {
+ next;
+ }
+
+ if (! defined $old->{$zone}) {
+ $old->{$zone} = ();
+ foreach $field (@fields) { $old->{$zone}->{$field} = 0; }
+ }
+
+ #
+ # Kstats have a 30-character limit (KSTAT_STRLEN) on their
+ # names, so if the zone name exceeds that limit, use the first
+ # 30 characters.
+ #
+ my $trimmed_zone = substr($zone, 0, 30);
+ my $zoneid = $zoneids->{$zone};
+
+ print_stats($zone, $zoneid,
+ $Kstat->{'zone_zfs'}{$zoneid}{$trimmed_zone}, $old->{$zone});
+ }
+
+ sleep ($interval);
+ $Kstat->update();
+}
+
+sub print_stats {
+ my $zone = $_[0];
+ my $zoneid = $_[1];
+ my $data = $_[2];
+ my $old = $_[3];
+
+ my $etime = $data->{'snaptime'} -
+ ($old->{'snaptime'} > 0 ? $old->{'snaptime'} : $data->{'crtime'});
+
+ # Calculate basic statistics
+ my $rate_divisor = $USE_INTERVAL ? 1 : $etime;
+ my $reads = ($data->{'reads'} - $old->{'reads'}) / $rate_divisor;
+ my $nread = ($data->{'nread'} - $old->{'nread'}) /
+ $rate_divisor / $BYTES_DIVISOR;
+
+ # Calculate overall transactions per second
+ my $ops = $data->{'reads'} - $old->{'reads'};
+ my $tps = $ops / $etime;
+
+ # Calculate average length of disk run queue
+ my $actv = (($data->{'rlentime'} - $old->{'rlentime'}) / $NANOSEC) /
+ $etime;
+
+ # Calculate average disk wait and service times
+ my $wsvc = $ops > 0 ? (($data->{'waittime'} - $old->{'waittime'}) /
+ 1000000) / $ops : 0.0;
+ my $asvc = $tps > 0 ? $actv * (1000 / $tps) : 0.0;
+
+ # Calculate the % time the disk run queue is active
+ my $b_pct = ((($data->{'rtime'} - $old->{'rtime'}) / $NANOSEC) /
+ $etime) * 100;
+
+ if (! $HIDE_ZEROES || $reads != 0.0 || $nread != 0.0 ) {
+ printf($DATA_FMT, $reads, $nread, $actv, $wsvc, $asvc,
+ $b_pct, substr($zone, 0, 8), $zoneid);
+ }
+
+ # Save current calculations for next loop
+ foreach (@fields) { $old->{$_} = $data->{$_}; }
+}
+
+sub usage {
+ print STDERR <<END;
+USAGE: ziostat [-hIMrzZ] [interval [count]]
+ eg, ziostat # print summary since zone boot
+ ziostat 1 # print continually every 1 second
+ ziostat 1 5 # print 5 times, every 1 second
+ ziostat -I # print results per interval (where applicable)
+ ziostat -M # print results in MB/s
+ ziostat -r # print results in comma-separated format
+ ziostat -z # hide zones with no ZFS I/O activity
+ ziostat -Z # print results for all zones
+END
+ exit 1;
+}
diff --git a/usr/src/cmd/svc/configd/configd.h b/usr/src/cmd/svc/configd/configd.h
index ff2b450619..4ab5567556 100644
--- a/usr/src/cmd/svc/configd/configd.h
+++ b/usr/src/cmd/svc/configd/configd.h
@@ -24,6 +24,10 @@
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
+ */
+
#ifndef _CONFIGD_H
#define _CONFIGD_H
@@ -88,14 +92,9 @@ extern "C" {
#define CONFIGD_CORE "core.%f.%t.%p"
-#ifndef NDEBUG
#define bad_error(f, e) \
- uu_warn("%s:%d: %s() returned bad error %d. Aborting.\n", \
- __FILE__, __LINE__, f, e); \
- abort()
-#else
-#define bad_error(f, e) abort()
-#endif
+ uu_panic("%s:%d: %s() returned bad error %d. Aborting.\n", \
+ __FILE__, __LINE__, f, e);
typedef enum backend_type {
BACKEND_TYPE_NORMAL = 0,
diff --git a/usr/src/cmd/svc/configd/rc_node.c b/usr/src/cmd/svc/configd/rc_node.c
index 3cc30e3e67..149f2a6cb5 100644
--- a/usr/src/cmd/svc/configd/rc_node.c
+++ b/usr/src/cmd/svc/configd/rc_node.c
@@ -23,6 +23,9 @@
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
+/*
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ */
/*
* rc_node.c - In-memory SCF object management
diff --git a/usr/src/cmd/svc/milestone/net-routing-setup b/usr/src/cmd/svc/milestone/net-routing-setup
index 6ab1a6c7f0..b4ee7d39ac 100644
--- a/usr/src/cmd/svc/milestone/net-routing-setup
+++ b/usr/src/cmd/svc/milestone/net-routing-setup
@@ -21,11 +21,15 @@
#
#
# Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+#
+# Copyright (c) 2012 Joyent, Inc. All rights reserved.
# This script configures IP routing.
. /lib/svc/share/smf_include.sh
+set -o xtrace
+
#
# In a shared-IP zone we need this service to be up, but all of the work
# it tries to do is irrelevant (and will actually lead to the service
@@ -156,7 +160,8 @@ fi
# however, as persistent daemon state is now controlled by SMF.
#
ipv4_routing_set=`/usr/bin/svcprop -p routeadm/ipv4-routing-set $SMF_FMRI`
-if [ -z "$defrouters" ]; then
+smartos_param=`/usr/bin/bootparams | grep "^smartos"`
+if [ -z "$defrouters" ] && [ "$smartos_param" != "" ]; then
#
# Set default value for ipv4-routing to enabled. If routeadm -e/-d
# has not yet been run by the administrator, we apply this default.
@@ -210,5 +215,21 @@ if [ -f /etc/inet/static_routes ]; then
done
fi
+#
+# Read /etc/inet/static_routes.vmadm and add each route.
+#
+if [ -f /etc/inet/static_routes.vmadm ]; then
+ echo "Adding vmadm persistent routes:"
+ /usr/bin/egrep -v "^(#|$)" /etc/inet/static_routes.vmadm | while read line; do
+ /usr/sbin/route add $line
+ done
+fi
+
+#
+# Log the result
+#
+echo "Routing setup complete:"
+/usr/bin/netstat -rn
+
# Clear exit status.
exit $SMF_EXIT_OK
diff --git a/usr/src/cmd/svc/milestone/network-location.xml b/usr/src/cmd/svc/milestone/network-location.xml
index aad337f42f..709e9df8f3 100644
--- a/usr/src/cmd/svc/milestone/network-location.xml
+++ b/usr/src/cmd/svc/milestone/network-location.xml
@@ -106,7 +106,7 @@
-->
<dependency
name='manifest-import'
- grouping='require_all'
+ grouping='optional_all'
restart_on='none'
type='service'>
<service_fmri value='svc:/system/manifest-import:default' />
diff --git a/usr/src/cmd/svc/milestone/network-routing-setup.xml b/usr/src/cmd/svc/milestone/network-routing-setup.xml
index b34d578e2a..85a74756da 100644
--- a/usr/src/cmd/svc/milestone/network-routing-setup.xml
+++ b/usr/src/cmd/svc/milestone/network-routing-setup.xml
@@ -40,11 +40,19 @@
<!-- loopback/physical network configuration is required -->
<dependency
- name='network'
- grouping='optional_all'
+ name='loopback'
+ grouping='require_all'
+ restart_on='none'
+ type='service'>
+ <service_fmri value='svc:/network/loopback' />
+ </dependency>
+
+ <dependency
+ name='physical'
+ grouping='require_all'
restart_on='none'
type='service'>
- <service_fmri value='svc:/milestone/network' />
+ <service_fmri value='svc:/network/physical' />
</dependency>
<!-- usr filesystem required to run routing-related commands -->
diff --git a/usr/src/cmd/svc/milestone/network.xml b/usr/src/cmd/svc/milestone/network.xml
index 75b5578f44..48386ebf73 100644
--- a/usr/src/cmd/svc/milestone/network.xml
+++ b/usr/src/cmd/svc/milestone/network.xml
@@ -54,6 +54,14 @@
<service_fmri value='svc:/network/physical' />
</dependency>
+ <dependency
+ name='routing-setup'
+ grouping='require_all'
+ restart_on='none'
+ type='service'>
+ <service_fmri value='svc:/network/routing-setup' />
+ </dependency>
+
<exec_method
type='method'
name='start'
diff --git a/usr/src/cmd/svc/milestone/single-user.xml b/usr/src/cmd/svc/milestone/single-user.xml
index 8797f13c47..579ecb5ddd 100644
--- a/usr/src/cmd/svc/milestone/single-user.xml
+++ b/usr/src/cmd/svc/milestone/single-user.xml
@@ -68,7 +68,7 @@
<dependency
name='manifests'
- grouping='require_all'
+ grouping='optional_all'
restart_on='none'
type='service'>
<service_fmri value='svc:/system/manifest-import' />
diff --git a/usr/src/cmd/svc/shell/smf_include.sh b/usr/src/cmd/svc/shell/smf_include.sh
index 77a1453cbb..429e3310f4 100644
--- a/usr/src/cmd/svc/shell/smf_include.sh
+++ b/usr/src/cmd/svc/shell/smf_include.sh
@@ -22,6 +22,7 @@
#
# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
+# Copyright 2012 Joyent, Inc. All rights reserved.
# Copyright 2015 Nexenta Systems, Inc. All rights reserved.
#
@@ -235,7 +236,12 @@ smf_kill_contract() {
# SMF_EXIT_ERR_OTHER, although not defined, encompasses all non-zero
# exit status values.
#
+# The SMF_EXIT_NODAEMON exit status should be used when a method does not
+# need to run any persistent process. This indicates success, abandons the
+# contract, and allows dependencies to be met.
+#
SMF_EXIT_OK=0
+SMF_EXIT_NODAEMON=94
SMF_EXIT_ERR_FATAL=95
SMF_EXIT_ERR_CONFIG=96
SMF_EXIT_MON_DEGRADE=97
diff --git a/usr/src/cmd/svc/startd/graph.c b/usr/src/cmd/svc/startd/graph.c
index c831c99301..5a4e933220 100644
--- a/usr/src/cmd/svc/startd/graph.c
+++ b/usr/src/cmd/svc/startd/graph.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2015, Syneto S.R.L. All rights reserved.
*/
@@ -142,6 +143,8 @@
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
#include <fm/libfmevent.h>
#include <libscf.h>
#include <libscf_priv.h>
@@ -4876,6 +4879,20 @@ vertex_subgraph_dependencies_shutdown(scf_handle_t *h, graph_vertex_t *v,
was_up = up_state(old_state);
now_up = up_state(v->gv_state);
+ if (halting != -1 && old_state == RESTARTER_STATE_DISABLED &&
+ v->gv_state != RESTARTER_STATE_DISABLED) {
+ /*
+ * We're halting and we have a svc which is transitioning to
+ * offline in parallel. This leads to a race condition where
+ * gt_enter_offline might re-enable the svc after we disabled
+ * it. Since we're halting, we want to ensure no svc ever
+ * transitions out of the disabled state. In this case, modify
+ * the flags to keep us on the halting path.
+ */
+ was_up = 0;
+ now_up = 0;
+ }
+
if (!was_up && now_up) {
++non_subgraph_svcs;
} else if (was_up && !now_up) {
@@ -6828,6 +6845,7 @@ repository_event_thread(void *unused)
char *fmri = startd_alloc(max_scf_fmri_size);
char *pg_name = startd_alloc(max_scf_value_size);
int r;
+ int fd;
h = libscf_handle_create_bound_loop();
@@ -6850,6 +6868,14 @@ retry:
goto retry;
}
+ if ((fd = open("/etc/svc/volatile/startd.ready", O_RDONLY | O_CREAT,
+ S_IRUSR)) < 0) {
+ log_error(LOG_WARNING, "Couldn't create startd.ready file\n",
+ SCF_GROUP_FRAMEWORK, scf_strerror(scf_error()));
+ } else {
+ (void) close(fd);
+ }
+
/*CONSTCOND*/
while (1) {
ssize_t res;
diff --git a/usr/src/cmd/svc/startd/method.c b/usr/src/cmd/svc/startd/method.c
index cc9ce6768c..c3cd0144c1 100644
--- a/usr/src/cmd/svc/startd/method.c
+++ b/usr/src/cmd/svc/startd/method.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2011 Joyent Inc.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
/*
@@ -100,34 +100,18 @@ static uint_t method_events[] = {
* method_record_start(restarter_inst_t *)
* Record a service start for rate limiting. Place the current time
* in the circular array of instance starts.
+ *
+ * Save the critical_failure_period and critical_failure_allowed with either
+ * the defaults or the svc properties startd/critical_failure_count and
+ * startd/critical_failure_period.
+ * ri_crit_fail_allowed is capped at RINST_START_TIMES.
*/
static void
method_record_start(restarter_inst_t *inst)
{
- int index = inst->ri_start_index++ % RINST_START_TIMES;
-
- inst->ri_start_time[index] = gethrtime();
-}
-
-/*
- * method_rate_critical(restarter_inst_t *)
- * Return true if the average start interval is less than the permitted
- * interval. The implicit interval defaults to RINST_FAILURE_RATE_NS and
- * RINST_START_TIMES but may be overridden with the svc properties
- * startd/critical_failure_count and startd/critical_failure_period
- * which represent the number of failures to consider and the amount of
- * time in seconds in which that number may occur, respectively. Note that
- * this time is measured as of the transition to 'enabled' rather than wall
- * clock time.
- * Implicit success if insufficient measurements for an average exist.
- */
-int
-method_rate_critical(restarter_inst_t *inst)
-{
+ int index;
+ uint_t critical_failure_allowed = RINST_START_TIMES;
hrtime_t critical_failure_period;
- uint_t critical_failure_count = RINST_START_TIMES;
- uint_t n = inst->ri_start_index;
- hrtime_t avg_ns = 0;
uint64_t scf_fr, scf_st;
scf_propvec_t *prop = NULL;
scf_propvec_t restart_critical[] = {
@@ -151,17 +135,48 @@ method_rate_critical(restarter_inst_t *inst)
* in seconds but tracked in ns
*/
critical_failure_period = (hrtime_t)scf_fr * NANOSEC;
- critical_failure_count = (uint_t)scf_st;
+ critical_failure_allowed = (uint_t)scf_st;
+
+ if (critical_failure_allowed > RINST_START_TIMES)
+ critical_failure_allowed = RINST_START_TIMES;
+ if (critical_failure_allowed < 1)
+ critical_failure_allowed = 1;
+
}
- if (inst->ri_start_index < critical_failure_count)
+
+ inst->ri_crit_fail_allowed = critical_failure_allowed;
+ inst->ri_crit_fail_period = critical_failure_period;
+
+ index = inst->ri_start_index++ % critical_failure_allowed;
+ inst->ri_start_time[index] = gethrtime();
+}
+
+/*
+ * method_rate_critical(restarter_inst_t *)
+ * Return true if the number of failures within the interval
+ * ri_crit_fail_period exceeds ri_crit_fail_allowed. The allowed failure
+ * count defaults to RINST_START_TIMES and the implicit interval defaults
+ * to RINST_FAILURE_RATE_NS but may be overridden with the svc properties
+ * startd/critical_failure_count and startd/critical_failure_period which
+ * represent the acceptable number of failures and the amount of time in
+ * seconds in which that number may occur, respectively. Note that this time
+ * is measured as of the transition to 'enabled' rather than wall clock
+ * time. Implicitly not critical if insufficient failures have occured.
+ */
+int
+method_rate_critical(restarter_inst_t *inst)
+{
+ uint_t n = inst->ri_start_index;
+ uint_t fail_allowed = inst->ri_crit_fail_allowed;
+ hrtime_t diff_ns;
+
+ if (n < fail_allowed)
return (0);
- avg_ns =
- (inst->ri_start_time[(n - 1) % critical_failure_count] -
- inst->ri_start_time[n % critical_failure_count]) /
- (critical_failure_count - 1);
+ diff_ns = inst->ri_start_time[(n - 1) % fail_allowed] -
+ inst->ri_start_time[n % fail_allowed];
- return (avg_ns < critical_failure_period);
+ return (diff_ns < inst->ri_crit_fail_period);
}
/*
@@ -989,7 +1004,8 @@ method_run(restarter_inst_t **instp, int type, int *exit_code)
goto contract_out;
}
- if (!WIFEXITED(ret_status)) {
+ if (!WIFEXITED(ret_status) &&
+ WEXITSTATUS(ret_status) != SMF_EXIT_NODAEMON) {
/*
* If method didn't exit itself (it was killed by an
* external entity, etc.), consider the entire
@@ -1018,7 +1034,7 @@ method_run(restarter_inst_t **instp, int type, int *exit_code)
}
*exit_code = WEXITSTATUS(ret_status);
- if (*exit_code != 0) {
+ if (*exit_code != 0 && *exit_code != SMF_EXIT_NODAEMON) {
log_error(LOG_WARNING,
"%s: Method \"%s\" failed with exit status %d.\n",
inst->ri_i.i_fmri, method, WEXITSTATUS(ret_status));
@@ -1027,6 +1043,7 @@ method_run(restarter_inst_t **instp, int type, int *exit_code)
log_instance(inst, B_TRUE, "Method \"%s\" exited with status "
"%d.", mname, *exit_code);
+ /* Note: we will take this path for SMF_EXIT_NODAEMON */
if (*exit_code != 0)
goto contract_out;
@@ -1073,7 +1090,10 @@ assured_kill:
}
contract_out:
- /* Abandon contracts for transient methods & methods that fail. */
+ /*
+ * Abandon contracts for transient methods, methods that exit with
+ * SMF_EXIT_NODAEMON & methods that fail.
+ */
transient = method_is_transient(inst, type);
if ((transient || *exit_code != 0 || result != 0) &&
(restarter_is_kill_method(method) < 0))
@@ -1169,7 +1189,7 @@ retry:
r = method_run(&inst, info->sf_method_type, &exit_code);
- if (r == 0 && exit_code == 0) {
+ if (r == 0 && (exit_code == 0 || exit_code == SMF_EXIT_NODAEMON)) {
/* Success! */
assert(inst->ri_i.i_next_state != RESTARTER_STATE_NONE);
@@ -1187,6 +1207,12 @@ retry:
else
method_remove_contract(inst, B_TRUE, B_TRUE);
}
+
+ /*
+ * For methods that exit with SMF_EXIT_NODAEMON, we already
+ * called method_remove_contract in method_run.
+ */
+
/*
* We don't care whether the handle was rebound because this is
* the last thing we do with it.
diff --git a/usr/src/cmd/svc/startd/startd.h b/usr/src/cmd/svc/startd/startd.h
index c1062e45e0..df3e98a27b 100644
--- a/usr/src/cmd/svc/startd/startd.h
+++ b/usr/src/cmd/svc/startd/startd.h
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
*/
#ifndef _STARTD_H
@@ -82,16 +82,9 @@ extern "C" {
#endif
-#ifndef NDEBUG
-#define bad_error(func, err) { \
- (void) fprintf(stderr, "%s:%d: %s() failed with unexpected " \
- "error %d. Aborting.\n", __FILE__, __LINE__, (func), (err)); \
- abort(); \
-}
-#else
-#define bad_error(func, err) abort()
-#endif
-
+#define bad_error(func, err) \
+ uu_panic("%s:%d: %s() failed with unexpected " \
+ "error %d. Aborting.\n", __FILE__, __LINE__, (func), (err));
#define min(a, b) (((a) < (b)) ? (a) : (b))
@@ -405,7 +398,7 @@ typedef enum {
#define RINST_RETAKE_MASK 0x0f000000
-#define RINST_START_TIMES 5 /* failures to consider */
+#define RINST_START_TIMES 10 /* up to 10 fails to consider */
#define RINST_FAILURE_RATE_NS 600000000000LL /* 1 failure/10 minutes */
#define RINST_WT_SVC_FAILURE_RATE_NS NANOSEC /* 1 failure/second */
@@ -427,6 +420,8 @@ typedef struct restarter_inst {
hrtime_t ri_start_time[RINST_START_TIMES];
uint_t ri_start_index; /* times started */
+ uint_t ri_crit_fail_allowed;
+ hrtime_t ri_crit_fail_period;
uu_list_node_t ri_link;
pthread_mutex_t ri_lock;
diff --git a/usr/src/cmd/svc/svcadm/Makefile b/usr/src/cmd/svc/svcadm/Makefile
index cc0cc160bf..1a6a0dd35c 100644
--- a/usr/src/cmd/svc/svcadm/Makefile
+++ b/usr/src/cmd/svc/svcadm/Makefile
@@ -21,6 +21,7 @@
#
# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
+# Copyright 2012, Joyent, Inc. All rights reserved.
#
PROG = svcadm
@@ -49,7 +50,11 @@ $(PROG): $(OBJS)
$(POFILE): $(POFILES)
cat $(POFILES) > $(POFILE)
-install: all $(ROOTUSRSBINPROG)
+install: all $(ROOTSBINPROG) $(ROOTUSRSBINPROG)
+
+$(ROOTUSRSBINPROG):
+ -$(RM) $@
+ -$(SYMLINK) ../../sbin/$(PROG) $@
clean:
$(RM) $(OBJS)
diff --git a/usr/src/cmd/svc/svcadm/svcadm.c b/usr/src/cmd/svc/svcadm/svcadm.c
index 7cd61454c9..ac698930d4 100644
--- a/usr/src/cmd/svc/svcadm/svcadm.c
+++ b/usr/src/cmd/svc/svcadm/svcadm.c
@@ -24,7 +24,7 @@
*/
/*
- * Copyright 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc. All rights reserved.
*/
/*
@@ -71,16 +71,9 @@
*/
#define WAIT_INTERVAL 3
-#ifndef NDEBUG
-#define bad_error(func, err) { \
- pr_warn("%s:%d: %s() failed with unexpected error %d.\n", \
- __FILE__, __LINE__, (func), (err)); \
- abort(); \
-}
-#else
-#define bad_error(func, err) abort()
-#endif
-
+#define bad_error(func, err) \
+ uu_panic("%s:%d: %s() failed with unexpected error %d.\n", \
+ __FILE__, __LINE__, (func), (err));
struct ht_elt {
struct ht_elt *next;
diff --git a/usr/src/cmd/svc/svccfg/svccfg.h b/usr/src/cmd/svc/svccfg/svccfg.h
index 135d12bf33..9fdfba3221 100644
--- a/usr/src/cmd/svc/svccfg/svccfg.h
+++ b/usr/src/cmd/svc/svccfg/svccfg.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
*/
#ifndef _CMD_SVCCFG_H
@@ -319,15 +320,9 @@ typedef struct scf_callback {
*/
typedef struct tmpl_errors tmpl_errors_t;
-#ifndef NDEBUG
-#define bad_error(func, err) { \
- (void) fprintf(stderr, "%s:%d: %s() failed with unexpected " \
- "error %d. Aborting.\n", __FILE__, __LINE__, (func), (err)); \
- abort(); \
-}
-#else
-#define bad_error(func, err) abort()
-#endif
+#define bad_error(func, err) \
+ uu_panic("%s:%d: %s() failed with unexpected " \
+ "error %d. Aborting.\n", __FILE__, __LINE__, (func), (err));
#define SC_CMD_LINE 0x0
#define SC_CMD_FILE 0x1
diff --git a/usr/src/cmd/svc/svcs/Makefile b/usr/src/cmd/svc/svcs/Makefile
index 0e9fc52652..2ea89818c0 100644
--- a/usr/src/cmd/svc/svcs/Makefile
+++ b/usr/src/cmd/svc/svcs/Makefile
@@ -34,7 +34,7 @@ include ../../Makefile.cmd
include ../../Makefile.ctf
POFILE = $(PROG)_all.po
-LDLIBS += -lcontract -lscf -luutil -lumem -lnvpair -lzonecfg
+LDLIBS += -lcontract -lscf -luutil -lumem -lnvpair -lzonecfg -lsasl
CPPFLAGS += -I ../common
lint := LINTFLAGS = -mux
diff --git a/usr/src/cmd/svc/svcs/explain.c b/usr/src/cmd/svc/svcs/explain.c
index 42fca80172..41ba1b3b47 100644
--- a/usr/src/cmd/svc/svcs/explain.c
+++ b/usr/src/cmd/svc/svcs/explain.c
@@ -22,7 +22,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright (c) 2011, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
*/
/*
@@ -112,15 +112,9 @@
#define uu_list_append(lst, e) uu_list_insert_before(lst, NULL, e)
-#ifdef NDEBUG
-#define bad_error(func, err) abort()
-#else
#define bad_error(func, err) \
- (void) fprintf(stderr, "%s:%d: %s() failed with unknown error %d.\n", \
- __FILE__, __LINE__, func, err); \
- abort();
-#endif
-
+ uu_panic("%s:%d: %s() failed with unknown error %d.\n", \
+ __FILE__, __LINE__, func, err);
typedef struct {
const char *svcname;
@@ -200,6 +194,7 @@ static char *emsg_invalid_dep;
extern scf_handle_t *h;
extern char *g_zonename;
+extern char *g_zonealias;
/* ARGSUSED */
static int
@@ -2000,6 +1995,9 @@ print_service(inst_t *svcp, int verbose)
if (g_zonename != NULL)
(void) printf(gettext(" Zone: %s\n"), g_zonename);
+ if (g_zonealias != NULL)
+ (void) printf(gettext(" Alias: %s\n"), g_zonealias);
+
stime = svcp->stime.tv_sec;
tmp = localtime(&stime);
diff --git a/usr/src/cmd/svc/svcs/svcs.c b/usr/src/cmd/svc/svcs/svcs.c
index c54f4bd12d..b4882d1776 100644
--- a/usr/src/cmd/svc/svcs/svcs.c
+++ b/usr/src/cmd/svc/svcs/svcs.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
/*
@@ -59,6 +59,7 @@
#include <sys/ctfs.h>
#include <sys/stat.h>
+#include <sasl/saslutil.h>
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
@@ -134,6 +135,9 @@ static int first_paragraph = 1; /* For -l mode. */
static char *common_name_buf; /* Sized for maximal length value. */
char *locale; /* Current locale. */
char *g_zonename; /* zone being operated upon */
+char *g_zonealias; /* alias for zone, if any */
+static char g_aliasdec[MAXPATHLEN / 4 * 3]; /* decoded zone alias buffer */
+static char g_aliasbuf[MAXPATHLEN]; /* base64 encoded zone alias buffer */
/*
* Pathname storage for path generated from the fmri.
@@ -240,7 +244,25 @@ ht_free(void)
static void
ht_init(void)
{
- assert(ht_buckets == NULL);
+ if (ht_buckets != NULL) {
+ /*
+ * If we already have a hash table (e.g., because we are
+ * processing multiple zones), destroy it before creating
+ * a new one.
+ */
+ struct ht_elem *elem, *next;
+ int i;
+
+ for (i = 0; i < ht_buckets_num; i++) {
+ for (elem = ht_buckets[i]; elem != NULL; elem = next) {
+ next = elem->next;
+ free((char *)elem->fmri);
+ free(elem);
+ }
+ }
+
+ free(ht_buckets);
+ }
ht_buckets_num = 8;
ht_buckets = safe_malloc(sizeof (*ht_buckets) * ht_buckets_num);
@@ -553,7 +575,7 @@ get_restarter_time_prop(scf_instance_t *inst, const char *pname,
int r;
r = inst_get_single_val(inst, SCF_PG_RESTARTER, pname, SCF_TYPE_TIME,
- tvp, NULL, ok_if_empty ? EMPTY_OK : 0, 0, 1);
+ tvp, 0, ok_if_empty ? EMPTY_OK : 0, 0, 1);
return (r == 0 ? 0 : -1);
}
@@ -1650,7 +1672,7 @@ sprint_stime(char **buf, scf_walkinfo_t *wip)
SCF_PROPERTY_STATE_TIMESTAMP, &tv, 0);
} else {
r = pg_get_single_val(wip->pg, SCF_PROPERTY_STATE_TIMESTAMP,
- SCF_TYPE_TIME, &tv, NULL, 0);
+ SCF_TYPE_TIME, &tv, 0, 0);
}
if (r != 0) {
@@ -1702,7 +1724,7 @@ sortkey_stime(char *buf, int reverse, scf_walkinfo_t *wip)
SCF_PROPERTY_STATE_TIMESTAMP, &tv, 0);
else
r = pg_get_single_val(wip->pg, SCF_PROPERTY_STATE_TIMESTAMP,
- SCF_TYPE_TIME, &tv, NULL, 0);
+ SCF_TYPE_TIME, &tv, 0, 0);
if (r == 0) {
int64_t sec;
@@ -2514,7 +2536,7 @@ print_detailed(void *unused, scf_walkinfo_t *wip)
gettext("next_state"), buf);
if (pg_get_single_val(rpg, SCF_PROPERTY_STATE_TIMESTAMP,
- SCF_TYPE_TIME, &tv, NULL, 0) == 0) {
+ SCF_TYPE_TIME, &tv, 0, 0) == 0) {
stime = tv.tv_sec;
tmp = localtime(&stime);
for (tbsz = 50; ; tbsz *= 2) {
@@ -3663,6 +3685,24 @@ again:
assert(opt_zone == NULL || zids == NULL);
if (opt_zone == NULL) {
+ zone_status_t status;
+
+ if (zone_getattr(zids[zent], ZONE_ATTR_STATUS,
+ &status, sizeof (status)) < 0 ||
+ status != ZONE_IS_RUNNING) {
+ /*
+ * If this zone is not running or we cannot
+ * get its status, we do not want to attempt
+ * to bind an SCF handle to it, lest we
+ * accidentally interfere with a zone that
+ * is not yet running by looking up a door
+ * to its svc.configd (which could potentially
+ * block a mount with an EBUSY).
+ */
+ zent++;
+ goto nextzone;
+ }
+
if (getzonenamebyid(zids[zent++],
zonename, sizeof (zonename)) < 0) {
uu_warn(gettext("could not get name for "
@@ -3685,18 +3725,46 @@ again:
uu_die(gettext("invalid zone '%s'\n"), g_zonename);
scf_value_destroy(zone);
+
+ /*
+ * On SmartOS, there may be a base64-encoded string attribute
+ * named 'alias' associated with this zone. This alias is
+ * useful, so we attempt to make it available when we are
+ * displaying -xZ output. If it's not available or not
+ * decodable, we just ignore it.
+ */
+ if (g_zonename != NULL) {
+ unsigned len;
+ struct zone_attrtab zattrs;
+ zone_dochandle_t zhdl = zonecfg_init_handle();
+
+ bzero(&zattrs, sizeof (zattrs));
+ (void) strcpy(zattrs.zone_attr_name, "alias");
+
+ if (zhdl != NULL &&
+ zonecfg_get_handle(g_zonename, zhdl) == Z_OK &&
+ zonecfg_lookup_attr(zhdl, &zattrs) == Z_OK &&
+ zonecfg_get_attr_string(&zattrs, g_aliasbuf,
+ sizeof (g_aliasbuf)) == Z_OK &&
+ sasl_decode64(g_aliasbuf, strlen(g_aliasbuf),
+ g_aliasdec, sizeof (g_aliasdec), &len) == SASL_OK) {
+ g_aliasdec[len] = '\0';
+ g_zonealias = g_aliasdec;
+ } else {
+ g_zonealias = NULL;
+ }
+ zonecfg_fini_handle(zhdl);
+ }
}
if (scf_handle_bind(h) == -1) {
if (g_zonename != NULL) {
- uu_warn(gettext("Could not bind to repository "
+ if (show_zones)
+ goto nextzone;
+
+ uu_die(gettext("Could not bind to repository "
"server for zone %s: %s\n"), g_zonename,
scf_strerror(scf_error()));
-
- if (!show_zones)
- return (UU_EXIT_FATAL);
-
- goto nextzone;
}
uu_die(gettext("Could not bind to repository server: %s. "
@@ -3755,7 +3823,7 @@ again:
if (opt_mode == 'L') {
if ((err = scf_walk_fmri(h, argc, argv, SCF_WALK_MULTIPLE,
- print_log, NULL, &exit_status, uu_warn)) != 0) {
+ print_log, NULL, errarg, errfunc)) != 0) {
uu_warn(gettext("failed to iterate over "
"instances: %s\n"), scf_strerror(err));
exit_status = UU_EXIT_FATAL;
diff --git a/usr/src/cmd/svr4pkg/pkgadd/Makefile b/usr/src/cmd/svr4pkg/pkgadd/Makefile
index 66e6abb737..b1b4168a7c 100644
--- a/usr/src/cmd/svr4pkg/pkgadd/Makefile
+++ b/usr/src/cmd/svr4pkg/pkgadd/Makefile
@@ -35,7 +35,7 @@ ROOTLINKS= $(ROOTUSRSBIN)/pkgask
include $(SRC)/cmd/svr4pkg/Makefile.svr4pkg
LDLIBS += -lpkg -linstzones -ladm
-LDLIBS += -lcrypto -lwanboot
+LDLIBS += -lsunw_crypto -lwanboot
.KEEP_STATE:
diff --git a/usr/src/cmd/svr4pkg/pkgadm/Makefile b/usr/src/cmd/svr4pkg/pkgadm/Makefile
index 620e32cf0d..c4970c3b91 100644
--- a/usr/src/cmd/svr4pkg/pkgadm/Makefile
+++ b/usr/src/cmd/svr4pkg/pkgadm/Makefile
@@ -35,7 +35,7 @@ OBJS= addcert.o \
include $(SRC)/cmd/svr4pkg/Makefile.svr4pkg
-LDLIBS += -lpkg -ladm -lcrypto -lgen
+LDLIBS += -lpkg -ladm -lsunw_crypto -lgen
.KEEP_STATE:
all: $(PROG)
diff --git a/usr/src/cmd/tail/Makefile b/usr/src/cmd/tail/Makefile
index 293920cfd1..e660cedf2d 100644
--- a/usr/src/cmd/tail/Makefile
+++ b/usr/src/cmd/tail/Makefile
@@ -21,6 +21,7 @@ OBJS= forward.o misc.o read.o reverse.o tail.o
SRCS= $(OBJS:%.o=%.c)
include ../Makefile.cmd
+include ../Makefile.ctf
CLOBBERFILES= $(PROG)
CPPFLAGS += -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
@@ -43,6 +44,10 @@ $(PROG): $(OBJS)
$(LINK.c) $(OBJS) -o $@ $(LDLIBS)
$(POST_PROCESS)
+%.o: %.c
+ $(COMPILE.c) $<
+ $(POST_PROCESS_O)
+
install: all .WAIT $(ROOTPROG) $(ROOTXPG4PROG)
$(ROOTXPG4PROG):
diff --git a/usr/src/cmd/truss/print.c b/usr/src/cmd/truss/print.c
index 72876bfdb8..ff896f5da4 100644
--- a/usr/src/cmd/truss/print.c
+++ b/usr/src/cmd/truss/print.c
@@ -873,7 +873,9 @@ prt_mc4(private_t *pri, int raw, long val) /* print memcntl() (4th) argument */
return;
case MC_SYNC:
- if ((val & ~(MS_SYNC|MS_ASYNC|MS_INVALIDATE)) == 0) {
+ if ((val &
+ ~(MS_SYNC|MS_ASYNC|MS_INVALIDATE|MS_INVALCURPROC))
+ == 0) {
*(s = pri->code_buf) = '\0';
if (val & MS_SYNC)
(void) strlcat(s, "|MS_SYNC", CBSIZE);
@@ -882,6 +884,9 @@ prt_mc4(private_t *pri, int raw, long val) /* print memcntl() (4th) argument */
if (val & MS_INVALIDATE)
(void) strlcat(s, "|MS_INVALIDATE",
CBSIZE);
+ if (val & MS_INVALCURPROC)
+ (void) strlcat(s, "|MS_INVALCURPROC",
+ CBSIZE);
}
break;
@@ -1998,6 +2003,8 @@ udp_optname(private_t *pri, long val)
case UDP_EXCLBIND: return ("UDP_EXCLBIND");
case UDP_RCVHDR: return ("UDP_RCVHDR");
case UDP_NAT_T_ENDPOINT: return ("UDP_NAT_T_ENDPOINT");
+ case UDP_SRCPORT_HASH: return ("UDP_SRCPORT_HASH");
+ case UDP_SND_TO_CONNECTED: return ("UDP_SND_TO_CONNECTED");
default: (void) snprintf(pri->code_buf,
sizeof (pri->code_buf), "0x%lx",
@@ -2446,7 +2453,10 @@ prt_zga(private_t *pri, int raw, long val)
case ZONE_ATTR_BOOTARGS: s = "ZONE_ATTR_BOOTARGS"; break;
case ZONE_ATTR_BRAND: s = "ZONE_ATTR_BRAND"; break;
case ZONE_ATTR_FLAGS: s = "ZONE_ATTR_FLAGS"; break;
- case ZONE_ATTR_PHYS_MCAP: s = "ZONE_ATTR_PHYS_MCAP"; break;
+ case ZONE_ATTR_DID: s = "ZONE_ATTR_DID"; break;
+ case ZONE_ATTR_PMCAP_NOVER: s = "ZONE_ATTR_PMCAP_NOVER"; break;
+ case ZONE_ATTR_PMCAP_PAGEOUT: s = "ZONE_ATTR_PMCAP_PAGEOUT";
+ break;
}
}
diff --git a/usr/src/cmd/truss/systable.c b/usr/src/cmd/truss/systable.c
index febd7d71f5..8c736a7044 100644
--- a/usr/src/cmd/truss/systable.c
+++ b/usr/src/cmd/truss/systable.c
@@ -29,6 +29,9 @@
/* Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. */
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
@@ -1716,9 +1719,10 @@ const char * const afcodes[] = {
"POLICY", /* 29 */
"RDS", /* 30 */
"TRILL", /* 31 */
- "PACKET" /* 32 */
+ "PACKET", /* 32 */
+ "LX_NETLINK" /* 33 */
};
-#if MAX_AFCODES != 33
+#if MAX_AFCODES != 34
#error Need to update address-family table
#endif
diff --git a/usr/src/cmd/varpd/Makefile b/usr/src/cmd/varpd/Makefile
new file mode 100644
index 0000000000..215f23d29e
--- /dev/null
+++ b/usr/src/cmd/varpd/Makefile
@@ -0,0 +1,74 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+PROG= varpd
+OBJS = varpd.o
+SRCS = $(OBJS:%.o=../%.c)
+SVCMETHOD = svc-varpd
+MANIFEST = varpd.xml
+ROOTLIBVARPD = $(ROOTLIB)/varpd
+ROOTLIBVARPDPROG= $(PROG:%=$(ROOTLIBVARPD)/%)
+
+
+include ../Makefile.cmd
+include ../Makefile.ctf
+
+ROOTMANIFESTDIR= $(ROOTSVCNETWORK)
+
+CLEANFILES += $(OBJS)
+CPPFLAGS += -D_REENTRANT
+CFLAGS += $(CCVERBOSE)
+LDLIBS += -lvarpd -lumem -lscf
+$(NOT_RELEASE_BUILD)CPPFLAGS += -DDEBUG
+
+#
+# Our debug only umem related functions cause lint to get confused.
+#
+LINTFLAGS += -erroff=E_NAME_DEF_NOT_USED2
+
+C99MODE= -xc99=%all
+C99LMODE= -Xc99=%all
+
+.KEEP_STATE:
+
+$(PROG): $(OBJS)
+ $(LINK.c) -o $@ $(OBJS) $(LDLIBS)
+ $(POST_PROCESS)
+
+clean:
+ -$(RM) $(CLEANFILES)
+
+lint: lint_PROG
+
+%.o: ../%.c
+ $(COMPILE.c) $<
+ $(POST_PROCESS_O)
+
+check: $(CHKMANIFEST)
+
+clobber: clean
+ $(RM) $(PROG)
+
+install: $(PROG) $(ROOTLIBVARPDPROG) $(ROOTMANIFEST) $(ROOTSVCMETHOD)
+
+$(ROOTLIBVARPD):
+ $(INS.dir)
+
+$(ROOTLIBVARPD)/%: % $(ROOTLIBVARPD)
+ $(INS.file)
+
+FRC:
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/varpd/svc-varpd b/usr/src/cmd/varpd/svc-varpd
new file mode 100644
index 0000000000..c7483a033e
--- /dev/null
+++ b/usr/src/cmd/varpd/svc-varpd
@@ -0,0 +1,32 @@
+#!/usr/bin/sh
+#
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+. /lib/svc/share/smf_include.sh
+
+#
+# For the time being, we're going to manually make sure that the
+# overlay driver is loaded. We probably shouldn't do that in the long
+# run, but it helps for bootstrapping
+#
+add_drv overlay 2>/dev/null
+
+/usr/lib/varpd/varpd
+if [ $? = 0 ]; then
+ exit $SMF_EXIT_OK
+else
+ exit $SMF_EXIT_ERR_FATAL
+fi
diff --git a/usr/src/cmd/varpd/varpd.c b/usr/src/cmd/varpd/varpd.c
new file mode 100644
index 0000000000..8131604537
--- /dev/null
+++ b/usr/src/cmd/varpd/varpd.c
@@ -0,0 +1,526 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2015 Joyent, Inc.
+ */
+
+/*
+ * virtual arp daemon -- varpd
+ *
+ * The virtual arp daemon is the user land counterpart to the overlay driver. To
+ * truly understand its purpose and how it fits into things, you should read the
+ * overlay big theory statement in uts/common/io/overlay/overlay.c.
+ *
+ * varod's purpose it to provide a means for looking up the destination on the
+ * underlay network for a host on an overlay network and to also be a door
+ * server such that dladm(1M) via libdladm can configure and get useful status
+ * information. The heavy lifting is all done by libvarpd and the various lookup
+ * plugins.
+ *
+ * When varpd first starts up, we take of chdiring into /var/run/varpd, which is
+ * also where we create /var/run/varpd.door, our door server. After that we
+ * daemonize and only after we daemonize do we go ahead and load plugins. The
+ * reason that we don't load plugins before daemonizing is that they could very
+ * well be creating threads and thus lose them all. In general, we want to make
+ * things easier on our children and not require them to be fork safe.
+ *
+ * Once it's spun up, the main varpd thread sits in sigsuspend and really just
+ * hangs out waiting for something, libvarpd handles everything else.
+ */
+
+#include <libvarpd.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <libgen.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <paths.h>
+#include <limits.h>
+#include <sys/corectl.h>
+#include <signal.h>
+#include <strings.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <thread.h>
+#include <priv.h>
+#include <libscf.h>
+
+#define VARPD_EXIT_REQUESTED 0
+#define VARPD_EXIT_FATAL 1
+#define VARPD_EXIT_USAGE 2
+
+#define VARPD_RUNDIR "/var/run/varpd"
+#define VARPD_DEFAULT_DOOR "/var/run/varpd/varpd.door"
+
+#define VARPD_PG "varpd"
+#define VARPD_PROP_INC "include_path"
+
+static varpd_handle_t *varpd_handle;
+static const char *varpd_pname;
+static volatile boolean_t varpd_exit = B_FALSE;
+
+/*
+ * Debug builds are automatically wired up for umem debugging.
+ */
+#ifdef DEBUG
+const char *
+_umem_debug_init()
+{
+ return ("default,verbose");
+}
+
+const char *
+_umem_logging_init(void)
+{
+ return ("fail,contents");
+}
+#endif /* DEBUG */
+
+static void
+varpd_vwarn(FILE *out, const char *fmt, va_list ap)
+{
+ int error = errno;
+
+ (void) fprintf(out, "%s: ", varpd_pname);
+ (void) vfprintf(out, fmt, ap);
+
+ if (fmt[strlen(fmt) - 1] != '\n')
+ (void) fprintf(out, ": %s\n", strerror(error));
+}
+
+static void
+varpd_fatal(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ varpd_vwarn(stderr, fmt, ap);
+ va_end(ap);
+
+ exit(VARPD_EXIT_FATAL);
+}
+
+static void
+varpd_dfatal(int dfd, const char *fmt, ...)
+{
+ int status = VARPD_EXIT_FATAL;
+ va_list ap;
+
+ va_start(ap, fmt);
+ varpd_vwarn(stdout, fmt, ap);
+ va_end(ap);
+
+ /* Take a single shot at this */
+ (void) write(dfd, &status, sizeof (status));
+ exit(status);
+}
+
+/* ARGSUSED */
+static int
+varpd_plugin_walk_cb(varpd_handle_t *vph, const char *name, void *unused)
+{
+ (void) printf("loaded %s!\n", name);
+ return (0);
+}
+
+static int
+varpd_dir_setup(void)
+{
+ int fd;
+
+ if (mkdir(VARPD_RUNDIR, 0700) != 0) {
+ if (errno != EEXIST)
+ varpd_fatal("failed to create %s: %s", VARPD_RUNDIR,
+ strerror(errno));
+ }
+
+ fd = open(VARPD_RUNDIR, O_RDONLY);
+ if (fd < 0)
+ varpd_fatal("failed to open %s: %s", VARPD_RUNDIR,
+ strerror(errno));
+
+ if (fchown(fd, UID_NETADM, GID_NETADM) != 0)
+ varpd_fatal("failed to chown %s: %s\n", VARPD_RUNDIR,
+ strerror(errno));
+
+ return (fd);
+}
+
+/*
+ * Because varpd is generally run under SMF, we opt to keep its stdout and
+ * stderr to be whatever our parent set them up to be.
+ */
+static void
+varpd_fd_setup(void)
+{
+ int dupfd;
+
+ closefrom(STDERR_FILENO + 1);
+ dupfd = open(_PATH_DEVNULL, O_RDONLY);
+ if (dupfd < 0)
+ varpd_fatal("failed to open %s: %s", _PATH_DEVNULL,
+ strerror(errno));
+ if (dup2(dupfd, STDIN_FILENO) == -1)
+ varpd_fatal("failed to dup out stdin: %s", strerror(errno));
+}
+
+/*
+ * We borrow fmd's daemonization style. Basically, the parent waits for the
+ * child to successfully set up a door and recover all of the old configurations
+ * before we say that we're good to go.
+ */
+static int
+varpd_daemonize(int dirfd)
+{
+ char path[PATH_MAX];
+ struct rlimit rlim;
+ sigset_t set, oset;
+ int estatus, pfds[2];
+ pid_t child;
+ priv_set_t *pset;
+
+ /*
+ * Set a per-process core path to be inside of /var/run/varpd. Make sure
+ * that we aren't limited in our dump size.
+ */
+ (void) snprintf(path, sizeof (path),
+ "/var/run/varpd/core.%s.%%p", varpd_pname);
+ (void) core_set_process_path(path, strlen(path) + 1, getpid());
+
+ rlim.rlim_cur = RLIM_INFINITY;
+ rlim.rlim_max = RLIM_INFINITY;
+ (void) setrlimit(RLIMIT_CORE, &rlim);
+
+ /*
+ * Claim as many file descriptors as the system will let us.
+ */
+ if (getrlimit(RLIMIT_NOFILE, &rlim) == 0) {
+ rlim.rlim_cur = rlim.rlim_max;
+ (void) setrlimit(RLIMIT_NOFILE, &rlim);
+ }
+
+ /*
+ * chdir /var/run/varpd
+ */
+ if (fchdir(dirfd) != 0)
+ varpd_fatal("failed to chdir to %s", VARPD_RUNDIR);
+
+
+ /*
+ * At this point block all signals going in so we don't have the parent
+ * mistakingly exit when the child is running, but never block SIGABRT.
+ */
+ if (sigfillset(&set) != 0)
+ abort();
+ if (sigdelset(&set, SIGABRT) != 0)
+ abort();
+ if (sigprocmask(SIG_BLOCK, &set, &oset) != 0)
+ abort();
+
+ /*
+ * Do the fork+setsid dance.
+ */
+ if (pipe(pfds) != 0)
+ varpd_fatal("failed to create pipe for daemonizing");
+
+ if ((child = fork()) == -1)
+ varpd_fatal("failed to fork for daemonizing");
+
+ if (child != 0) {
+ /* We'll be exiting shortly, so allow for silent failure */
+ (void) close(pfds[1]);
+ if (read(pfds[0], &estatus, sizeof (estatus)) ==
+ sizeof (estatus))
+ _exit(estatus);
+
+ if (waitpid(child, &estatus, 0) == child && WIFEXITED(estatus))
+ _exit(WEXITSTATUS(estatus));
+
+ _exit(VARPD_EXIT_FATAL);
+ }
+
+ /*
+ * Drop privileges here.
+ *
+ * We should make sure we keep around PRIV_NET_PRIVADDR and
+ * PRIV_SYS_DLCONFIG, but drop everything else; however, keep basic
+ * privs and have our child drop them.
+ *
+ * We should also run as netadm:netadm and drop all of our groups.
+ */
+ if (setgroups(0, NULL) != 0)
+ abort();
+ if (setgid(GID_NETADM) == -1 || seteuid(UID_NETADM) == -1)
+ abort();
+ if ((pset = priv_allocset()) == NULL)
+ abort();
+ priv_basicset(pset);
+ if (priv_delset(pset, PRIV_PROC_EXEC) == -1 ||
+ priv_delset(pset, PRIV_PROC_INFO) == -1 ||
+ priv_delset(pset, PRIV_PROC_FORK) == -1 ||
+ priv_delset(pset, PRIV_PROC_SESSION) == -1 ||
+ priv_delset(pset, PRIV_FILE_LINK_ANY) == -1 ||
+ priv_addset(pset, PRIV_SYS_DL_CONFIG) == -1 ||
+ priv_addset(pset, PRIV_NET_PRIVADDR) == -1) {
+ abort();
+ }
+ /*
+ * Remove privs from the permitted set. That will cause them to be
+ * removed from the effective set. We want to make sure that in the case
+ * of a vulnerability, something can't get back in here and wreak more
+ * havoc. But if we want non-basic privs in the effective set, we have
+ * to request them explicitly.
+ */
+ if (setppriv(PRIV_SET, PRIV_PERMITTED, pset) == -1)
+ abort();
+ if (setppriv(PRIV_SET, PRIV_EFFECTIVE, pset) == -1)
+ abort();
+
+ priv_freeset(pset);
+
+ if (close(pfds[0]) != 0)
+ abort();
+ if (setsid() == -1)
+ abort();
+ if (sigprocmask(SIG_SETMASK, &oset, NULL) != 0)
+ abort();
+ (void) umask(0022);
+
+ return (pfds[1]);
+}
+
+static int
+varpd_setup_lookup_threads(void)
+{
+ int ret;
+ long i, ncpus = sysconf(_SC_NPROCESSORS_ONLN) * 2 + 1;
+
+ if (ncpus <= 0)
+ abort();
+ for (i = 0; i < ncpus; i++) {
+ thread_t thr;
+
+ ret = thr_create(NULL, 0,
+ (void *(*)(void *))libvarpd_overlay_lookup_run,
+ varpd_handle, THR_DETACHED | THR_DAEMON, &thr);
+ if (ret != 0)
+ return (ret);
+ }
+
+ return (0);
+}
+
+static void
+varpd_cleanup(void)
+{
+ varpd_exit = B_TRUE;
+}
+
+/*
+ * Load default information from SMF and apply any of if necessary. We recognize
+ * the following properties:
+ *
+ * varpd/include_path Treat these as a series of -i options.
+ *
+ * If we're not under SMF, just move on.
+ */
+static void
+varpd_load_smf(int dfd)
+{
+ char *fmri, *inc;
+ scf_simple_prop_t *prop;
+
+ if ((fmri = getenv("SMF_FMRI")) == NULL)
+ return;
+
+ if ((prop = scf_simple_prop_get(NULL, fmri, VARPD_PG,
+ VARPD_PROP_INC)) == NULL)
+ return;
+
+ while ((inc = scf_simple_prop_next_astring(prop)) != NULL) {
+ int err = libvarpd_plugin_load(varpd_handle, inc);
+ if (err != 0) {
+ varpd_dfatal(dfd, "failed to load from %s: %s\n",
+ inc, strerror(err));
+ }
+ }
+
+ scf_simple_prop_free(prop);
+}
+
+/*
+ * There are a bunch of things we need to do to be a proper daemon here.
+ *
+ * o Ensure that /var/run/varpd exists or create it
+ * o make stdin /dev/null (stdout?)
+ * o Ensure any other fds that we somehow inherited are closed, eg.
+ * closefrom()
+ * o Properly daemonize
+ * o Mask all signals except sigabrt before creating our first door -- all
+ * other doors will inherit from that.
+ * o Have the main thread sigsuspend looking for most things that are
+ * actionable...
+ */
+int
+main(int argc, char *argv[])
+{
+ int err, c, dirfd, dfd, i;
+ const char *doorpath = VARPD_DEFAULT_DOOR;
+ sigset_t set;
+ struct sigaction act;
+ int nincpath = 0, nextincpath = 0;
+ char **incpath = NULL;
+
+ varpd_pname = basename(argv[0]);
+
+ /*
+ * We want to clean up our file descriptors before we do anything else
+ * as we can't assume that libvarpd won't open file descriptors, etc.
+ */
+ varpd_fd_setup();
+
+ if ((err = libvarpd_create(&varpd_handle)) != 0) {
+ varpd_fatal("failed to open a libvarpd handle");
+ return (1);
+ }
+
+ while ((c = getopt(argc, argv, ":i:d:")) != -1) {
+ switch (c) {
+ case 'i':
+ if (nextincpath == nincpath) {
+ if (nincpath == 0)
+ nincpath = 16;
+ else
+ nincpath *= 2;
+ incpath = realloc(incpath, sizeof (char *) *
+ nincpath);
+ if (incpath == NULL) {
+ (void) fprintf(stderr, "failed to "
+ "allocate memory for the %dth "
+ "-I option: %s\n", nextincpath + 1,
+ strerror(errno));
+ }
+
+ }
+ incpath[nextincpath] = optarg;
+ nextincpath++;
+ break;
+ case 'd':
+ doorpath = optarg;
+ break;
+ default:
+ (void) fprintf(stderr, "unknown option: %c\n", c);
+ return (1);
+ }
+ }
+
+ dirfd = varpd_dir_setup();
+
+ (void) libvarpd_plugin_walk(varpd_handle, varpd_plugin_walk_cb, NULL);
+
+ dfd = varpd_daemonize(dirfd);
+
+ /*
+ * Now that we're in the child, go ahead and load all of our plug-ins.
+ * We do this, in part, because these plug-ins may need threads of their
+ * own and fork won't preserve those and we'd rather the plug-ins don't
+ * have to learn about fork-handlers.
+ */
+ for (i = 0; i < nextincpath; i++) {
+ err = libvarpd_plugin_load(varpd_handle, incpath[i]);
+ if (err != 0) {
+ varpd_dfatal(dfd, "failed to load from %s: %s\n",
+ incpath[i], strerror(err));
+ }
+ }
+
+ varpd_load_smf(dfd);
+
+ if ((err = libvarpd_persist_enable(varpd_handle, VARPD_RUNDIR)) != 0)
+ varpd_dfatal(dfd, "failed to enable varpd persistence: %s\n",
+ strerror(err));
+
+ if ((err = libvarpd_persist_restore(varpd_handle)) != 0)
+ varpd_dfatal(dfd, "failed to enable varpd persistence: %s\n",
+ strerror(err));
+
+ /*
+ * The ur-door thread will inherit from this signal mask. So set it to
+ * what we want before doing anything else. In addition, so will our
+ * threads that handle varpd lookups.
+ */
+ if (sigfillset(&set) != 0)
+ varpd_dfatal(dfd, "failed to fill a signal set...");
+
+ if (sigdelset(&set, SIGABRT) != 0)
+ varpd_dfatal(dfd, "failed to unmask SIGABRT");
+
+ if (sigprocmask(SIG_BLOCK, &set, NULL) != 0)
+ varpd_dfatal(dfd, "failed to set our door signal mask");
+
+ if ((err = varpd_setup_lookup_threads()) != 0)
+ varpd_dfatal(dfd, "failed to create lookup threads: %s\n",
+ strerror(err));
+
+ if ((err = libvarpd_door_server_create(varpd_handle, doorpath)) != 0)
+ varpd_dfatal(dfd, "failed to create door server at %s: %s\n",
+ doorpath, strerror(err));
+
+ /*
+ * At this point, finish up signal intialization and finally go ahead,
+ * notify the parent that we're okay, and enter the sigsuspend loop.
+ */
+ bzero(&act, sizeof (struct sigaction));
+ act.sa_handler = varpd_cleanup;
+ if (sigfillset(&act.sa_mask) != 0)
+ varpd_dfatal(dfd, "failed to fill sigaction mask");
+ act.sa_flags = 0;
+ if (sigaction(SIGHUP, &act, NULL) != 0)
+ varpd_dfatal(dfd, "failed to register HUP handler");
+ if (sigdelset(&set, SIGHUP) != 0)
+ varpd_dfatal(dfd, "failed to remove HUP from mask");
+ if (sigaction(SIGQUIT, &act, NULL) != 0)
+ varpd_dfatal(dfd, "failed to register QUIT handler");
+ if (sigdelset(&set, SIGQUIT) != 0)
+ varpd_dfatal(dfd, "failed to remove QUIT from mask");
+ if (sigaction(SIGINT, &act, NULL) != 0)
+ varpd_dfatal(dfd, "failed to register INT handler");
+ if (sigdelset(&set, SIGINT) != 0)
+ varpd_dfatal(dfd, "failed to remove INT from mask");
+ if (sigaction(SIGTERM, &act, NULL) != 0)
+ varpd_dfatal(dfd, "failed to register TERM handler");
+ if (sigdelset(&set, SIGTERM) != 0)
+ varpd_dfatal(dfd, "failed to remove TERM from mask");
+
+ err = 0;
+ (void) write(dfd, &err, sizeof (err));
+ (void) close(dfd);
+
+ for (;;) {
+ if (sigsuspend(&set) == -1)
+ if (errno == EFAULT)
+ abort();
+ if (varpd_exit == B_TRUE)
+ break;
+ }
+
+ libvarpd_door_server_destroy(varpd_handle);
+ libvarpd_destroy(varpd_handle);
+
+ return (VARPD_EXIT_REQUESTED);
+}
diff --git a/usr/src/cmd/varpd/varpd.xml b/usr/src/cmd/varpd/varpd.xml
new file mode 100644
index 0000000000..0a9201e90d
--- /dev/null
+++ b/usr/src/cmd/varpd/varpd.xml
@@ -0,0 +1,60 @@
+<?xml version="1.0"?>
+<!DOCTYPE service_bundle SYSTEM "/usr/share/lib/xml/dtd/service_bundle.dtd.1">
+<!--
+This file and its contents are supplied under the terms of the
+Common Development and Distribution License ("CDDL"), version 1.0.
+You may only use this file in accordance with the terms of version
+1.0 of the CDDL.
+
+A full copy of the text of the CDDL should have accompanied this
+source. A copy of the CDDL is also available via the Internet at
+http://www.illumos.org/license/CDDL.
+
+Copyright 2015, Joyent, Inc.
+-->
+
+<service_bundle type="manifest" name="illumos:varpd" >
+
+ <service name="network/varpd" type="service" version="1" >
+
+ <create_default_instance enabled="true" />
+
+ <single_instance/>
+
+ <dependency name="varpd-network-physical"
+ grouping="require_all"
+ restart_on="none"
+ type="service">
+ <service_fmri value="svc:/network/physical:default" />
+ </dependency>
+
+ <exec_method
+ type="method"
+ name="start"
+ exec="/lib/svc/method/svc-varpd"
+ timeout_seconds="60" />
+
+ <exec_method
+ type="method"
+ name="stop"
+ exec=":kill"
+ timeout_seconds="10" />
+
+ <property_group name='varpd' type='application'>
+ <property name='include_path' type='astring'>
+ <astring_list>
+ <value_node value='/usr/lib/varpd'/>
+ </astring_list>
+ </property>
+ </property_group>
+
+ <stability value='Unstable' />
+
+ <template>
+ <common_name>
+ <loctext xml:lang="C">virtual ARP daemon
+ </loctext>
+ </common_name>
+ </template>
+ </service>
+</service_bundle>
diff --git a/usr/src/cmd/vi/port/Makefile b/usr/src/cmd/vi/port/Makefile
index 268dd752a6..8e223d98e8 100644
--- a/usr/src/cmd/vi/port/Makefile
+++ b/usr/src/cmd/vi/port/Makefile
@@ -75,6 +75,9 @@ $(XPG6) := CFLAGS += -DXPG4 -DXPG6 -I$(SRC)/lib/libc/inc
CPPFLAGS += -DUSG -DSTDIO -DVMUNIX -DTABS=8 -DSINGLE -DTAG_STACK
+# vi intentionally uses foo[-1] as a sentinal value to q*column()
+$(__GNUC4)CERRWARN += -_gcc=-Wno-array-bounds
+
# vi maintains its own versions of various routines from libc and libcurses,
# so localize all symbols to avoid name space collisions.
LDFLAGS += $(MAPFILE.NGB:%=-M%)
diff --git a/usr/src/cmd/vi/port/ex_cmdsub.c b/usr/src/cmd/vi/port/ex_cmdsub.c
index 0260d334fe..00bdcefccb 100644
--- a/usr/src/cmd/vi/port/ex_cmdsub.c
+++ b/usr/src/cmd/vi/port/ex_cmdsub.c
@@ -1735,7 +1735,7 @@ char *prompt;
/* In ex mode, let the system hassle with setting no echo */
if (!inopen)
- return (unsigned char *)getpass(prompt);
+ return (unsigned char *)getpass((const char *)prompt);
viprintf("%s", prompt); flush();
for (p=pbuf; (c = getkey())!='\n' && c!=EOF && c!='\r';) {
if (p < &pbuf[8])
diff --git a/usr/src/cmd/vndadm/Makefile b/usr/src/cmd/vndadm/Makefile
new file mode 100644
index 0000000000..aa9c22d296
--- /dev/null
+++ b/usr/src/cmd/vndadm/Makefile
@@ -0,0 +1,65 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+PROG= vndadm
+OBJS = vndadm.o
+SRCS = $(OBJS:%.o=../%.c)
+
+
+include ../Makefile.cmd
+include ../Makefile.ctf
+
+CLEANFILES += $(OBJS)
+CFLAGS += $(CCVERBOSE)
+LDLIBS += -lvnd
+LINTFLAGS += -xerroff=E_NAME_DEF_NOT_USED2
+C99MODE= -xc99=%all
+C99LMODE= -Xc99=%all
+
+all := TARGET += all
+clean := TARGET += clean
+clobber := TARGET += clobber
+install := TARGET += install
+lint := TARGET += lint
+
+SUBDIRS = test
+
+.KEEP_STATE:
+
+$(PROG): $(OBJS)
+ $(LINK.c) -o $@ $(OBJS) $(LDLIBS)
+ $(POST_PROCESS)
+
+clean: $(SUBDIRS)
+ -$(RM) $(CLEANFILES)
+
+lint: lint_PROG $(SUBDIRS)
+
+%.o: ../%.c
+ $(COMPILE.c) $<
+ $(POST_PROCESS_O)
+
+clobber: clean $(SUBDIRS)
+ $(RM) $(PROG)
+
+install: $(PROG) $(ROOTUSRSBINPROG) $(SUBDIRS)
+
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/vndadm/test/Makefile b/usr/src/cmd/vndadm/test/Makefile
new file mode 100644
index 0000000000..12ef2c3a3c
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+SUBDIRS = scripts tst
+
+include Makefile.subdirs
+include Makefile.com
diff --git a/usr/src/cmd/vndadm/test/Makefile.com b/usr/src/cmd/vndadm/test/Makefile.com
new file mode 100644
index 0000000000..cb096952ca
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/Makefile.com
@@ -0,0 +1,43 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include $(SRC)/Makefile.master
+include $(SRC)/cmd/Makefile.cmd
+
+#
+# Force c99 for everything
+#
+C99MODE= -xc99=%all
+C99LMODE= -Xc99=%all
+
+#
+# Deal with odd lint bits.
+#
+LINTFLAGS += -xerroff=E_NAME_DEF_NOT_USED2
+
+#
+# Install related definitions
+#
+ROOTOPTPKG = $(ROOT)/opt/vndtest
+ROOTBIN = $(ROOTOPTPKG)/bin
+ROOTTST = $(ROOTOPTPKG)/tst
+ROOTTSTDIR = $(ROOTTST)/$(TSTDIR)
+ROOTTSTEXES = $(EXETESTS:%=$(ROOTTSTDIR)/%)
+ROOTTSTSH = $(SHTESTS:%=$(ROOTTSTDIR)/%)
+ROOTOUT = $(OUTFILES:%=$(ROOTTSTDIR)/%)
+ROOTTESTS = $(ROOTTSTEXES) $(ROOTTSTSH) $(ROOTOUT)
+FILEMODE = 0555
+LDLIBS = $(LDLIBS.cmd)
+LINTEXE = $(EXETESTS:%.exe=%.exe.ln)
diff --git a/usr/src/cmd/vndadm/test/Makefile.subdirs b/usr/src/cmd/vndadm/test/Makefile.subdirs
new file mode 100644
index 0000000000..957448c23b
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/Makefile.subdirs
@@ -0,0 +1,29 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+.KEEP_STATE:
+
+all := TARGET += all
+clean := TARGET += clean
+clobber := TARGET += clobber
+install := TARGET += install
+lint := TARGET += lint
+
+all clean clobber install lint: $(SUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/cmd/vndadm/test/Makefile.targ b/usr/src/cmd/vndadm/test/Makefile.targ
new file mode 100644
index 0000000000..bcbd3c8f35
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/Makefile.targ
@@ -0,0 +1,59 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+$(ROOTOPTPKG):
+ $(INS.dir)
+
+$(ROOTBIN): $(ROOTOPTPKG)
+ $(INS.dir)
+
+$(ROOTBIN)/%: %.ksh $(ROOTBIN)
+ $(INS.rename)
+
+$(ROOTTST): $(ROOTOPTPKG)
+ $(INS.dir)
+
+$(ROOTTSTDIR): $(ROOTTST)
+ $(INS.dir)
+
+$(ROOTTSTDIR)/%.ksh: %.ksh $(ROOTTSTDIR)
+ $(INS.file)
+
+$(ROOTTSTDIR)/%.out: %.out $(ROOTTSTDIR)
+ $(INS.file)
+
+%.o: %.c
+ $(COMPILE.c) $<
+ $(POST_PROCESS_O)
+
+%.exe: %.o $(SUPOBJS)
+ $(LINK.c) -o $@ $< $(SUPOBJS) $(LDLIBS)
+ $(POST_PROCESS)
+
+$(ROOTTSTDIR)/%.exe: %.exe $(ROOTTSTDIR)
+ $(INS.file)
+
+all: install
+
+%.exe.ln: %.c $(SUPOBJS)
+ $(LINT.c) $< $(LDLIBS)
+
+lint: $(LINTEXE)
+
+clean:
+ -$(RM) *.o $(CLEANFILES)
+
+clobber: clean
+ -$(RM) $(CLOBBERFILES)
diff --git a/usr/src/cmd/vndadm/test/scripts/Makefile b/usr/src/cmd/vndadm/test/scripts/Makefile
new file mode 100644
index 0000000000..d0f58918f9
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/scripts/Makefile
@@ -0,0 +1,28 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+
+SRCS = vndtest
+SCRIPTS = $(SRCS:%=$(ROOTBIN)/%)
+
+SCRIPTS := FILEMODE = 0555
+CLOBBERFILES = $(SCRIPTS)
+
+install: $(SCRIPTS)
+
+lint:
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/vndadm/test/scripts/vndtest.ksh b/usr/src/cmd/vndadm/test/scripts/vndtest.ksh
new file mode 100755
index 0000000000..1167a64802
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/scripts/vndtest.ksh
@@ -0,0 +1,300 @@
+#!/bin/ksh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014, Joyent, Inc.
+#
+
+#
+# vnd test suite driver
+#
+unalias -a
+
+vt_arg0=$(basename $0)
+vt_root="$(dirname $0)/.."
+vt_ksh="/usr/bin/ksh"
+vt_outdir=
+vt_keep=
+vt_all=
+vt_tests=
+vt_stub=
+vt_vnics="vndtest1 vndtest2 vndtest3 vndtest4 vndtest5"
+vt_tnum=0
+vt_tfail=0
+vt_tsuc=0
+
+function usage
+{
+ typeset msg="$*"
+ [[ -z "$msg" ]] || echo "$msg" 2>&1
+ cat <<USAGE >&2
+Usage: $vt_arg0 [ -o dir ] [ -k ] [ -a | test ... ]
+
+ -o dir Sets 'dir' as the output directory
+ -a Runs all tests, ignores tests passed in
+ -k Keep output from all tests, not just failures
+ -m mdb binary to test
+USAGE
+ exit 2
+}
+
+function fatal
+{
+ typeset msg="$*"
+ [[ -z "$msg" ]] && msg="failed"
+ echo "$vt_arg0: $msg" >&2
+ exit 1
+}
+
+function setup_outdir
+{
+ vt_outdir="$vt_outdir/$vt_arg0.$$"
+ mkdir -p $vt_outdir || fatal "failed to make output dir $vt_outdir"
+}
+
+function setup_etherstub
+{
+ vt_ether="vndstub$$"
+
+ dladm create-etherstub -t $vt_ether || \
+ fatal "failed to create etherstub"
+}
+
+function cleanup_vnd
+{
+ typeset over=$1
+ typeset vnddevs vn
+
+ vnddevs=$(vndadm list -p -d: -o datalink,name)
+ [[ $? -eq 0 ]] || fatal "failed to list vnics"
+ for v in $vnddevs; do
+ vn=$(echo $v | awk 'BEGIN{ FS=":"}
+ { if ($1 == targ) { print $2 } }' targ=$over)
+ [[ -z "$vn" ]] && continue
+ vndadm destroy $vn || fatal "failed to destroy $vn"
+ done
+}
+
+function create_vnics
+{
+ for n in $vt_vnics; do
+ dladm create-vnic -t -l $vt_ether $n || fatal \
+ "failed to create vnic $n over $vt_ether"
+ done
+}
+
+function cleanup_vnics
+{
+ typeset nics vn
+
+ nics=$(dladm show-vnic -p -o over,link)
+ [[ $? -eq 0 ]] || fatal "failed to list vnics"
+ for n in $nics; do
+ vn=$(echo $n | awk 'BEGIN{ FS=":"}
+ { if ($1 == targ) { print $2 } }' targ=$vt_ether )
+ [[ -z "$vn" ]] && continue
+ cleanup_vnd $vn
+ #
+ # There may or may not be an IP device on our nics...
+ #
+ ifconfig $vn down unplumb 2>/dev/null || /bin/true
+ dladm delete-vnic $vn || fatal "failed to delete vnic $n"
+ done
+
+}
+
+function cleanup_etherstub
+{
+ cleanup_vnics
+ dladm delete-etherstub -t $vt_ether || \
+ fatal "failed to delete etherstub"
+}
+
+function run_single
+{
+ typeset name=$1
+ typeset expect base ext exe command odir res reason
+ typeset iserr
+
+ [[ -z "$name" ]] && fail "missing test to run"
+ base=${name##*/}
+ ext=${base##*.}
+ expect=${base%%.*}
+ odir="$vt_outdir/current"
+ [[ -z "$ext" ]] && fatal "found test without ext: $name"
+ [[ -z "$expect" ]] && fatal "found test without prefix: $name"
+
+ [[ "$expect" == "create" || "$expect" == "ecreate" ]] && create_vnics
+ if [[ "$expect" == "err" || "$expect" == "ecreate" ]]; then
+ iserr="yup"
+ else
+ iserr=""
+ fi
+
+ case "$ext" in
+ "ksh")
+ command="$vt_ksh ./$base"
+ ;;
+ "exe")
+ command="./$base"
+ ;;
+ "out")
+ #
+ # This is the file format for checking output against.
+ #
+ return 0
+ ;;
+ *)
+ echo "skipping test $name (unknown extensino)"
+ return 0
+ ;;
+ esac
+
+ echo "Executing test $name ... \c"
+ mkdir -p "$odir" >/dev/null || fatal "can't make output directory"
+ cd $(dirname $name) || fatal "failed to enter test directory"
+ $command $vt_vnics > "$odir/stdout" 2>"$odir/stderr"
+ res=$?
+ cd - > /dev/null || fatal "failed to leave test directory"
+
+ if [[ -f "$name.out" ]] && \
+ ! diff "$name.out" "$odir/stdout" >/dev/null; then
+ cp $name.out $odir/$base.out
+ reason="stdout mismatch"
+ elif [[ -n "$iserr" && $res -eq 0 ]]; then
+ reason="test exited $res, not non-zero"
+ elif [[ -z "$iserr" && $res -ne 0 ]]; then
+ reason="test exited $res, not zero"
+ fi
+
+ if [[ -n "$reason" ]]; then
+ echo "$reason"
+ ((vt_tfail++))
+ mv "$odir" "$vt_outdir/failure.$vt_tfail" || fatal \
+ "failed to move test output directory"
+ cp "$name" "$vt_outdir/failure.$vt_tfail/$(basename $name)" || \
+ fatal "failed to copy test into output directory"
+ else
+ echo "passed"
+ ((vt_tsuc++))
+ mv "$odir" "$vt_outdir/success.$vt_tsuc" || fatal \
+ "failed to move test directory"
+ fi
+
+ [[ "$expect" == "create" || "$expect" == "ecreate" ]] && cleanup_vnics
+
+ ((vt_tnum++))
+}
+
+function run_all
+{
+ typeset tests t dir
+
+ cd $vt_root || fatal "failed to enter root test directory"
+ tests=$(ls -1 */*/@(ecreate|create|tst|err).*.@(ksh|exe))
+ cd - > /dev/null
+ for t in $tests; do
+ run_single $t
+ done
+}
+
+function welcome
+{
+ cat <<WELCOME
+Starting tests...
+output directory: $vt_outdir
+WELCOME
+}
+
+function cleanup
+{
+ [[ -n "$vt_keep" ]] && return
+ rm -rf "$vt_outdir"/success.* || fatal \
+ "failed to remove successful test cases"
+ if [[ $vt_tfail -eq 0 ]]; then
+ rmdir "$vt_outdir" || fatal \
+ "failed to remove test output directory"
+ fi
+}
+
+function goodbye
+{
+ cat <<EOF
+
+-------------
+Results
+-------------
+
+Tests passed: $vt_tsuc
+Tests failed: $vt_tfail
+Tests ran: $vt_tnum
+
+EOF
+ if [[ $vt_tfail -eq 0 ]]; then
+ echo "Congrats, vnd isn't completely broken, the tests pass".
+ else
+ echo "Some tests failed, you have some work to do."
+ fi
+}
+
+while getopts ":ahko:m:" c $@; do
+ case "$c" in
+ a)
+ vt_all="y"
+ ;;
+ k)
+ vt_keep="y"
+ ;;
+ o)
+ vt_outdir="$OPTARG"
+ ;;
+ h)
+ usage
+ ;;
+ :)
+ usage "option requires an argument -- $OPTARG"
+ ;;
+ *)
+ usage "invalid option -- $OPTARG"
+ ;;
+ esac
+done
+
+shift $((OPTIND-1))
+
+[[ $(zonename) != "global" ]] && fatal "vndtest only runs in the global zone"
+
+[[ -z "$vt_all" && $# == 0 ]] && usage "no tests to run"
+
+[[ -z "$vt_outdir" ]] && vt_outdir="$PWD"
+
+setup_outdir
+setup_etherstub
+welcome
+
+if [[ ! -z "$vt_all" ]]; then
+ run_all
+else
+ for t in $@; do
+ [[ -f $t ]] || fatal "cannot find test $t"
+ run_single $t
+ done
+fi
+
+cleanup_etherstub
+goodbye
+cleanup
+
+#
+# Exit 1 if we have tests that return non-zero
+#
+[[ $vt_tfai -eq 0 ]]
diff --git a/usr/src/cmd/vndadm/test/tst/Makefile b/usr/src/cmd/vndadm/test/tst/Makefile
new file mode 100644
index 0000000000..9b1ba29429
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+SUBDIRS = cmd dld ioctl lib
+
+include ../Makefile.subdirs
diff --git a/usr/src/cmd/vndadm/test/tst/cmd/Makefile b/usr/src/cmd/vndadm/test/tst/cmd/Makefile
new file mode 100644
index 0000000000..1ca20bf749
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/cmd/Makefile
@@ -0,0 +1,34 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+TSTDIR = cmd
+COMMONSH = cmd.common.ksh
+SHTESTS = $(COMMONSH) \
+ create.list.ksh \
+ create.sdev.ksh \
+ create.setbuf.ksh \
+ ecreate.destroy.ksh \
+ ecreate.setbadprop.ksh \
+ ecreate.setbadvalue.ksh \
+ ecreate.setbuftoobig.ksh \
+ ecreate.setrdonlyprop.ksh
+
+OUTFILES = create.list.ksh.out
+
+include ../../Makefile.com
+
+install: $(ROOTTESTS)
+
+include ../../Makefile.targ
diff --git a/usr/src/cmd/vndadm/test/tst/cmd/cmd.common.ksh b/usr/src/cmd/vndadm/test/tst/cmd/cmd.common.ksh
new file mode 100644
index 0000000000..31e4e8bf5c
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/cmd/cmd.common.ksh
@@ -0,0 +1,33 @@
+#
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Common ksh-based utilities
+#
+
+vt_arg0=$(basename $0)
+
+function fatal
+{
+ typeset msg="$*"
+ [[ -z "$msg" ]] && msg="failed"
+ echo "$vt_arg0: $msg" >&2
+ exit 1
+}
+
+[[ -z "$1" ]] && fatal "missing required vnic"
+[[ -z "$2" ]] && fatal "missing required vnic"
+[[ -z "$3" ]] && fatal "missing required vnic"
diff --git a/usr/src/cmd/vndadm/test/tst/cmd/create.list.ksh b/usr/src/cmd/vndadm/test/tst/cmd/create.list.ksh
new file mode 100644
index 0000000000..fdec9a85be
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/cmd/create.list.ksh
@@ -0,0 +1,30 @@
+#!/usr/bin/ksh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Basic device listing
+#
+
+. ./cmd.common.ksh
+
+#
+# Use what we hope is a relatively unique name
+#
+cl_name="triforceofcourage0"
+vndadm create -l $1 $cl_name || fatal "failed to create vnd device"
+vndadm list -p -o name,zone $cl_name
+vndadm list -p -d: -o zone,name $cl_name
+vndadm destroy $cl_name || fatal "failed to destroy vnd device"
diff --git a/usr/src/cmd/vndadm/test/tst/cmd/create.list.ksh.out b/usr/src/cmd/vndadm/test/tst/cmd/create.list.ksh.out
new file mode 100644
index 0000000000..d208b38aab
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/cmd/create.list.ksh.out
@@ -0,0 +1,2 @@
+triforceofcourage0 global
+global:triforceofcourage0
diff --git a/usr/src/cmd/vndadm/test/tst/cmd/create.sdev.ksh b/usr/src/cmd/vndadm/test/tst/cmd/create.sdev.ksh
new file mode 100644
index 0000000000..b816ade1de
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/cmd/create.sdev.ksh
@@ -0,0 +1,25 @@
+#!/usr/bin/ksh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Verify that our sdev links exist
+#
+
+. ./cmd.common.ksh
+
+vndadm create $1 || fatal "failed to bring up vnd"
+[[ -c /dev/vnd/$1 ]] || fatal "missing link"
+[[ -c /dev/vnd/zone/$(zonename)/$1 ]] || fatal "missing per-zone link"
diff --git a/usr/src/cmd/vndadm/test/tst/cmd/create.setbuf.ksh b/usr/src/cmd/vndadm/test/tst/cmd/create.setbuf.ksh
new file mode 100644
index 0000000000..d50edbead4
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/cmd/create.setbuf.ksh
@@ -0,0 +1,34 @@
+#!/usr/bin/ksh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Set and validate the buffer size properties. Valiate that we can set
+# the value using the various number analogues, eg. 1024K, etc.
+#
+set -o pipefail
+
+. ./cmd.common.ksh
+
+vndadm create $1 || fatal "failed to bring up vnd device"
+vndadm set $1 rxbuf=1M
+cur=$(vndadm get -p $1 rxbuf | nawk '{ print $4 }')
+[[ $? -eq 0 ]] || fatal "failed to get rxbuf"
+[[ $cur -eq 1048576 ]] || fatal "rxbuf is $cur, not 1M"
+
+vndadm set $1 txbuf=1024K
+cur=$(vndadm get -p $1 rxbuf | nawk '{ print $4 }')
+[[ $? -eq 0 ]] || fatal "failed to get txbuf"
+[[ $cur -eq 1048576 ]] || fatal "txbuf is $cur, not 1M"
diff --git a/usr/src/cmd/vndadm/test/tst/cmd/ecreate.destroy.ksh b/usr/src/cmd/vndadm/test/tst/cmd/ecreate.destroy.ksh
new file mode 100644
index 0000000000..e3c4931018
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/cmd/ecreate.destroy.ksh
@@ -0,0 +1,25 @@
+#!/usr/bin/ksh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Make sure that destroy on a previously destroyed link fails
+#
+
+. ./cmd.common.ksh
+
+vndadm create $1 || fatal "failed to bring up vnd device"
+vndadm destroy $1 || fatal "failed to destroy vnd device"
+vndadm destroy $1
diff --git a/usr/src/cmd/vndadm/test/tst/cmd/ecreate.setbadprop.ksh b/usr/src/cmd/vndadm/test/tst/cmd/ecreate.setbadprop.ksh
new file mode 100644
index 0000000000..30c27575b1
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/cmd/ecreate.setbadprop.ksh
@@ -0,0 +1,24 @@
+#!/usr/bin/ksh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Make sure that we can't set a non-existant proprety
+#
+
+. ./cmd.common.ksh
+
+vndadm create $1 || fatal "failed to bring up vnd device"
+vndadm set $1 ganon=ganondorf
diff --git a/usr/src/cmd/vndadm/test/tst/cmd/ecreate.setbadvalue.ksh b/usr/src/cmd/vndadm/test/tst/cmd/ecreate.setbadvalue.ksh
new file mode 100644
index 0000000000..056b24a817
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/cmd/ecreate.setbadvalue.ksh
@@ -0,0 +1,24 @@
+#!/usr/bin/ksh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Make sure that we can't set something to a garbage value
+#
+
+. ./cmd.common.ksh
+
+vndadm create $1 || fatal "failed to bring up vnd device"
+vndadm set $1 rxbuf=hello
diff --git a/usr/src/cmd/vndadm/test/tst/cmd/ecreate.setbuftoobig.ksh b/usr/src/cmd/vndadm/test/tst/cmd/ecreate.setbuftoobig.ksh
new file mode 100644
index 0000000000..551e20461c
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/cmd/ecreate.setbuftoobig.ksh
@@ -0,0 +1,24 @@
+#!/usr/bin/ksh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Make sure that we can't set a buffer value to a ridiculous size
+#
+
+. ./cmd.common.ksh
+
+vndadm create $1 || fatal "failed to bring up vnd device"
+vndadm set $1 rxsize=1T
diff --git a/usr/src/cmd/vndadm/test/tst/cmd/ecreate.setrdonlyprop.ksh b/usr/src/cmd/vndadm/test/tst/cmd/ecreate.setrdonlyprop.ksh
new file mode 100644
index 0000000000..4beb53e227
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/cmd/ecreate.setrdonlyprop.ksh
@@ -0,0 +1,24 @@
+#!/usr/bin/ksh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Make sure that we can't set a read only property.
+#
+
+. ./cmd.common.ksh
+
+vndadm create $1 || fatal "failed to bring up vnd device"
+vndadm set $1 mintu=100
diff --git a/usr/src/cmd/vndadm/test/tst/dld/Makefile b/usr/src/cmd/vndadm/test/tst/dld/Makefile
new file mode 100644
index 0000000000..3088812630
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/dld/Makefile
@@ -0,0 +1,27 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+TSTDIR = dld
+COMMONSH = dld.common.ksh
+SHTESTS = $(COMMONSH) \
+ ecreate.ipfirst.ksh \
+ ecreate.vndfirst.ksh \
+ create.reuse.ksh
+
+include ../../Makefile.com
+
+install: $(ROOTTESTS)
+
+include ../../Makefile.targ
diff --git a/usr/src/cmd/vndadm/test/tst/dld/create.reuse.ksh b/usr/src/cmd/vndadm/test/tst/dld/create.reuse.ksh
new file mode 100644
index 0000000000..bc2ffde7f6
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/dld/create.reuse.ksh
@@ -0,0 +1,31 @@
+#
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Make sure that we can reuse a data link
+#
+
+. ./dld.common.ksh
+
+dld_nic=$1
+[[ -z "$1" ]] && fatal "missing required vnic"
+
+vndadm create $dld_nic || fatal "failed to bring up vnd"
+vndadm destroy $dld_nic || fatal "failed to bring down vnd"
+ifconfig $dld_nic plumb up || fatal "failed to bring up IP"
+ifconfig $dld_nic down unplumb || fatal "failed to bring down IP"
+vndadm create $dld_nic || fatal "failed to bring up vnd"
+vndadm destroy $dld_nic || fatal "failed to bring down vnd"
diff --git a/usr/src/cmd/vndadm/test/tst/dld/dld.common.ksh b/usr/src/cmd/vndadm/test/tst/dld/dld.common.ksh
new file mode 100644
index 0000000000..7a2e0a8e2b
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/dld/dld.common.ksh
@@ -0,0 +1,29 @@
+#
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Common ksh-based utilities
+#
+
+vt_arg0=$(basename $0)
+
+function fatal
+{
+ typeset msg="$*"
+ [[ -z "$msg" ]] && msg="failed"
+ echo "$vt_arg0: $msg" >&2
+ exit 1
+}
diff --git a/usr/src/cmd/vndadm/test/tst/dld/ecreate.ipfirst.ksh b/usr/src/cmd/vndadm/test/tst/dld/ecreate.ipfirst.ksh
new file mode 100644
index 0000000000..e6409781cb
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/dld/ecreate.ipfirst.ksh
@@ -0,0 +1,27 @@
+#
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Make sure vnd fails to come up when IP is up
+#
+
+. ./dld.common.ksh
+
+dld_nic=$1
+[[ -z "$1" ]] && fatal "missing required vnic"
+
+ifconfig $dld_nic plumb up || fatal "failed to bring up IP"
+vndadm create $dld_nic
diff --git a/usr/src/cmd/vndadm/test/tst/dld/ecreate.vndfirst.ksh b/usr/src/cmd/vndadm/test/tst/dld/ecreate.vndfirst.ksh
new file mode 100644
index 0000000000..ee7a13c09c
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/dld/ecreate.vndfirst.ksh
@@ -0,0 +1,27 @@
+#
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Make sure IP fails to come up when vnd is up
+#
+
+. ./dld.common.ksh
+
+dld_nic=$1
+[[ -z "$1" ]] && fatal "missing required vnic"
+
+vndadm create $dld_nic || fatal "failed to bring up vnd"
+ifconfig $dld_nic plumb up
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/Makefile b/usr/src/cmd/vndadm/test/tst/ioctl/Makefile
new file mode 100644
index 0000000000..fe074f32b0
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/Makefile
@@ -0,0 +1,49 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+TSTDIR = ioctl
+EXETESTS = \
+ create.attach.exe \
+ create.attachnolink.exe \
+ create.badlinkname.exe \
+ create.doublelink.exe \
+ create.gioctlattach.exe \
+ create.link.exe \
+ create.linkexists.exe \
+ create.ngioctlfault.exe \
+ create.nopriv1.exe \
+ create.nopriv2.exe \
+ create.nopriv3.exe \
+ create.nopriv4.exe \
+ create.olink.exe \
+ create.olinknopriv.exe \
+ create.rmenolink.exe \
+ tst.attachrdonly.exe \
+ tst.basicopenctl.exe \
+ tst.badioctl.exe \
+ tst.gioctlfault.exe \
+ tst.gioctlnattach.exe \
+ tst.openctlbadflags.exe
+SHTESTS = \
+ tst.iocsize.ksh
+SUPBOBJS =
+
+CLOBBERFILES = $(EXETESTS)
+
+include ../../Makefile.com
+
+install: $(ROOTTESTS)
+
+include ../../Makefile.targ
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.attach.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.attach.c
new file mode 100644
index 0000000000..d7bca5cce3
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.attach.c
@@ -0,0 +1,63 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Simply attach a nic
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret;
+ vnd_ioc_attach_t via;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd > 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == 0);
+ assert(via.via_errno == 0);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.attachnolink.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.attachnolink.c
new file mode 100644
index 0000000000..43c6c99af5
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.attachnolink.c
@@ -0,0 +1,67 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Try to attach to a non-existant vnic
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret;
+ vnd_ioc_attach_t via;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd > 0);
+
+ /*
+ * All datalink names have numbers, so we can pick a datalink which
+ * doesn't exist by not using numbers...
+ */
+ (void) strlcpy(via.via_name, "enolink", VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == -1);
+ assert(via.via_errno == VND_E_NODATALINK);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.badlinkname.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.badlinkname.c
new file mode 100644
index 0000000000..e3a067d5ce
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.badlinkname.c
@@ -0,0 +1,119 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Test that we can't link a nic with invalid names
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+static const char *names[] = {
+ /* Reserved names */
+ "ctl",
+ "zone",
+ /* Invalid characters */
+ "The fight of the century",
+ "Link/Ganon",
+ "happens@7pm",
+ "#testing",
+ "asdf!!",
+ "power&courage&wisdom",
+ "over9000?",
+ "you're",
+ "100$",
+ "(function",
+ "x)",
+ "2^128",
+ "1++",
+ "No.",
+ "99%",
+ "*****",
+ "r|m",
+ "=0",
+ "`p0",
+ "goodbye~",
+ "however;",
+ "\"hesaid",
+ "shesaid\'",
+ /* emoji pile of poo */
+ "\xF0\x9F\x92\xA9",
+ NULL
+};
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret, i;
+ vnd_ioc_attach_t via;
+ vnd_ioc_link_t vil;
+ vnd_ioc_unlink_t viu;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd > 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == 0);
+ assert(via.via_errno == 0);
+
+ for (i = 0; names[i] != NULL; i++) {
+ (void) strlcpy(vil.vil_name, names[i], VND_NAMELEN);
+ (void) fprintf(stderr, "Trying to create [%s]\n", names[i]);
+ vil.vil_errno = 0;
+ ret = ioctl(fd, VND_IOC_LINK, &vil);
+ assert(ret == -1);
+ assert(vil.vil_errno == VND_E_BADNAME);
+ }
+
+ /* Finally, the missing null terminator */
+ for (i = 0; i < VND_NAMELEN; i++)
+ vil.vil_name[i] = 'a';
+ ret = ioctl(fd, VND_IOC_LINK, &vil);
+ assert(ret == -1);
+ assert(vil.vil_errno == VND_E_BADNAME);
+
+ viu.viu_errno = 0;
+ ret = ioctl(fd, VND_IOC_UNLINK, &viu);
+ assert(ret == -1);
+ assert(viu.viu_errno == VND_E_NOTLINKED);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.doublelink.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.doublelink.c
new file mode 100644
index 0000000000..dcf4f311e9
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.doublelink.c
@@ -0,0 +1,82 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Link a nic, first should work, second will fail.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret;
+ vnd_ioc_attach_t via;
+ vnd_ioc_link_t vil;
+ vnd_ioc_unlink_t viu;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd > 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == 0);
+ assert(via.via_errno == 0);
+
+ (void) strlcpy(vil.vil_name, argv[1], VND_NAMELEN);
+ vil.vil_errno = 0;
+ ret = ioctl(fd, VND_IOC_LINK, &vil);
+ assert(ret == 0);
+ assert(vil.vil_errno == 0);
+
+ (void) strlcpy(vil.vil_name, "dup", VND_NAMELEN);
+ vil.vil_errno = 0;
+ ret = ioctl(fd, VND_IOC_LINK, &vil);
+ assert(ret == -1);
+ assert(vil.vil_errno == VND_E_LINKED);
+ viu.viu_errno = 0;
+
+ ret = ioctl(fd, VND_IOC_UNLINK, &viu);
+ assert(ret == 0);
+ assert(viu.viu_errno == 0);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.gioctlattach.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.gioctlattach.c
new file mode 100644
index 0000000000..3d6f43377b
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.gioctlattach.c
@@ -0,0 +1,69 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Ensure that we can't run global ioctls on an attached handle
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret;
+ vnd_ioc_attach_t via;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd > 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == 0);
+ assert(via.via_errno == 0);
+
+ via.via_name[0] = 'a';
+ via.via_name[1] = '\0';
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == -1);
+ assert(via.via_errno == VND_E_ATTACHED);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.link.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.link.c
new file mode 100644
index 0000000000..16569d58cd
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.link.c
@@ -0,0 +1,76 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Link a nic
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret;
+ vnd_ioc_attach_t via;
+ vnd_ioc_link_t vil;
+ vnd_ioc_unlink_t viu;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd > 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == 0);
+ assert(via.via_errno == 0);
+
+ (void) strlcpy(vil.vil_name, argv[1], VND_NAMELEN);
+ vil.vil_errno = 0;
+ ret = ioctl(fd, VND_IOC_LINK, &vil);
+ assert(ret == 0);
+ assert(vil.vil_errno == 0);
+
+ viu.viu_errno = 0;
+ ret = ioctl(fd, VND_IOC_UNLINK, &viu);
+ assert(ret == 0);
+ assert(viu.viu_errno == 0);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.linkexists.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.linkexists.c
new file mode 100644
index 0000000000..4e3be0db5d
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.linkexists.c
@@ -0,0 +1,90 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Try to create two devices with the same link name.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, fd2, ret;
+ vnd_ioc_attach_t via;
+ vnd_ioc_link_t vil;
+ vnd_ioc_unlink_t viu;
+
+ if (argc < 3) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd > 0);
+ fd2 = open(VND_PATH, O_RDWR);
+ assert(fd2 > 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == 0);
+ assert(via.via_errno == 0);
+
+ (void) strlcpy(via.via_name, argv[2], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+ ret = ioctl(fd2, VND_IOC_ATTACH, &via);
+ assert(ret == 0);
+ assert(via.via_errno == 0);
+
+ (void) strlcpy(vil.vil_name, "dup", VND_NAMELEN);
+ vil.vil_errno = 0;
+ ret = ioctl(fd, VND_IOC_LINK, &vil);
+ assert(ret == 0);
+ assert(vil.vil_errno == 0);
+
+ (void) strlcpy(vil.vil_name, "dup", VND_NAMELEN);
+ vil.vil_errno = 0;
+ ret = ioctl(fd2, VND_IOC_LINK, &vil);
+ assert(ret == -1);
+ assert(vil.vil_errno == VND_E_LINKEXISTS);
+
+ viu.viu_errno = 0;
+ ret = ioctl(fd, VND_IOC_UNLINK, &viu);
+ assert(ret == 0);
+ assert(viu.viu_errno == 0);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.ngioctlfault.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.ngioctlfault.c
new file mode 100644
index 0000000000..bf174f1a8f
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.ngioctlfault.c
@@ -0,0 +1,96 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Pass bad addresses to all of our non-global ioctls
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+static int requests[] = {
+ VND_IOC_LINK,
+ VND_IOC_UNLINK,
+ VND_IOC_GETRXBUF,
+ VND_IOC_SETRXBUF,
+ VND_IOC_GETTXBUF,
+ VND_IOC_SETTXBUF,
+ VND_IOC_GETMINTU,
+ VND_IOC_GETMAXTU,
+ VND_IOC_GETMAXBUF,
+ -1
+};
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret, i;
+ vnd_ioc_attach_t via;
+ vnd_ioc_link_t vil;
+ vnd_ioc_unlink_t viu;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd > 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == 0);
+ assert(via.via_errno == 0);
+
+ (void) strlcpy(vil.vil_name, argv[1], VND_NAMELEN);
+ vil.vil_errno = 0;
+ ret = ioctl(fd, VND_IOC_LINK, &vil);
+ assert(ret == 0);
+ assert(vil.vil_errno == 0);
+
+ for (i = 0; requests[i] != -1; i++) {
+ ret = ioctl(fd, requests[i], (void *)(uintptr_t)i);
+ assert(ret == -1);
+ assert(errno == EFAULT);
+ }
+
+
+ viu.viu_errno = 0;
+ ret = ioctl(fd, VND_IOC_UNLINK, &viu);
+ assert(ret == 0);
+ assert(viu.viu_errno == 0);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv1.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv1.c
new file mode 100644
index 0000000000..6d5ad0eec2
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv1.c
@@ -0,0 +1,69 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Fail to attach a device without PRIV_NET_CONFIG
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <assert.h>
+#include <priv.h>
+#include <string.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <stdio.h>
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret;
+ priv_set_t *ps;
+ vnd_ioc_attach_t via;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ ps = priv_allocset();
+ assert(ps != NULL);
+ assert(priv_addset(ps, PRIV_SYS_NET_CONFIG) == 0);
+ assert(setppriv(PRIV_OFF, PRIV_PERMITTED, ps) == 0);
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd >= 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == -1);
+ assert(errno == EPERM);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv2.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv2.c
new file mode 100644
index 0000000000..6b38f159a0
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv2.c
@@ -0,0 +1,69 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Fail to attach a device without PRIV_NET_RAWACCESS
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <assert.h>
+#include <priv.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret;
+ priv_set_t *ps;
+ vnd_ioc_attach_t via;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ ps = priv_allocset();
+ assert(ps != NULL);
+ assert(priv_addset(ps, PRIV_NET_RAWACCESS) == 0);
+ assert(setppriv(PRIV_OFF, PRIV_PERMITTED, ps) == 0);
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd >= 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == -1);
+ assert(errno == EPERM);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv3.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv3.c
new file mode 100644
index 0000000000..a8c43fc46d
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv3.c
@@ -0,0 +1,70 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Fail to attach a device without PRIV_NET_CONFIG and PRIV_NET_RAWACCESS
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <assert.h>
+#include <priv.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret;
+ priv_set_t *ps;
+ vnd_ioc_attach_t via;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ ps = priv_allocset();
+ assert(ps != NULL);
+ assert(priv_addset(ps, PRIV_SYS_NET_CONFIG) == 0);
+ assert(priv_addset(ps, PRIV_NET_RAWACCESS) == 0);
+ assert(setppriv(PRIV_OFF, PRIV_PERMITTED, ps) == 0);
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd >= 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == -1);
+ assert(errno == EPERM);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv4.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv4.c
new file mode 100644
index 0000000000..aed0204544
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv4.c
@@ -0,0 +1,75 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Fail to link a device without PRIV_NET_CONFIG
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <assert.h>
+#include <priv.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret;
+ priv_set_t *ps;
+ vnd_ioc_attach_t via;
+ vnd_ioc_link_t vil;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd >= 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == 0);
+
+ ps = priv_allocset();
+ assert(ps != NULL);
+ assert(priv_addset(ps, PRIV_SYS_NET_CONFIG) == 0);
+ assert(setppriv(PRIV_OFF, PRIV_PERMITTED, ps) == 0);
+
+ (void) strlcpy(vil.vil_name, argv[1], VND_NAMELEN);
+ vil.vil_errno = 0;
+ ret = ioctl(fd, VND_IOC_LINK, &vil);
+ assert(ret == -1);
+ assert(errno == EPERM);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv5.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv5.c
new file mode 100644
index 0000000000..2db8ecc95f
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.nopriv5.c
@@ -0,0 +1,77 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Fail to open a device without PRIV_NET_RAWACCESS
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <assert.h>
+#include <priv.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, fd2, ret;
+ priv_set_t *ps;
+ char *path;
+ vnd_ioc_attach_t via;
+ vnd_ioc_link_t vil;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd >= 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == 0);
+
+ ps = priv_allocset();
+ assert(ps != NULL);
+ assert(priv_addset(ps, PRIV_SYS_NET_RAWACCESS) == 0);
+ assert(setppriv(PRIV_OFF, PRIV_PERMITTED, ps) == 0);
+
+ (void) asprintf(&path, "/dev/vnd/%s", argv[1]);
+ assert(path != NULL);
+ fd2 = open(path, O_RDWR);
+ assert(fd2 == -1);
+ assert(errno == EPERM);
+
+ free(path);
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.olink.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.olink.c
new file mode 100644
index 0000000000..0f9292bbae
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.olink.c
@@ -0,0 +1,77 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Open a /dev/vnd/%s link
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret;
+ char *path;
+ vnd_ioc_attach_t via;
+ vnd_ioc_link_t vil;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd > 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == 0);
+ assert(via.via_errno == 0);
+
+ (void) strlcpy(vil.vil_name, argv[1], VND_NAMELEN);
+ vil.vil_errno = 0;
+ ret = ioctl(fd, VND_IOC_LINK, &vil);
+ assert(ret == 0);
+ assert(vil.vil_errno == 0);
+
+ ret = asprintf(&path, "/dev/vnd/%s", argv[1]);
+ assert(ret != -1);
+
+ ret = open(path, O_RDONLY);
+ assert(ret > 0);
+ assert(close(ret) == 0);
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.olinknopriv.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.olinknopriv.c
new file mode 100644
index 0000000000..338218e751
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.olinknopriv.c
@@ -0,0 +1,83 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Fail to open a /dev/vnd/%s without PRIV_NET_RAWACCESS
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <priv.h>
+
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret;
+ char *path;
+ priv_set_t *ps;
+ vnd_ioc_attach_t via;
+ vnd_ioc_link_t vil;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd > 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == 0);
+ assert(via.via_errno == 0);
+
+ (void) strlcpy(vil.vil_name, argv[1], VND_NAMELEN);
+ vil.vil_errno = 0;
+ ret = ioctl(fd, VND_IOC_LINK, &vil);
+ assert(ret == 0);
+ assert(vil.vil_errno == 0);
+
+ ret = asprintf(&path, "/dev/vnd/%s", argv[1]);
+ assert(ret != -1);
+
+ ps = priv_allocset();
+ assert(ps != NULL);
+ assert(priv_addset(ps, PRIV_NET_RAWACCESS) == 0);
+ assert(setppriv(PRIV_OFF, PRIV_PERMITTED, ps) == 0);
+
+ ret = open(path, O_RDWR);
+ assert(ret == -1);
+ assert(errno == EPERM);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/create.rmenolink.c b/usr/src/cmd/vndadm/test/tst/ioctl/create.rmenolink.c
new file mode 100644
index 0000000000..d44e6512a7
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/create.rmenolink.c
@@ -0,0 +1,69 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Verify that unlink fails when we're not linked.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret;
+ vnd_ioc_attach_t via;
+ vnd_ioc_unlink_t viu;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR);
+ assert(fd > 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == 0);
+ assert(via.via_errno == 0);
+
+ viu.viu_errno = 0;
+ ret = ioctl(fd, VND_IOC_UNLINK, &viu);
+ assert(ret == -1);
+ assert(viu.viu_errno == VND_E_NOTLINKED);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/tst.attachrdonly.c b/usr/src/cmd/vndadm/test/tst/ioctl/tst.attachrdonly.c
new file mode 100644
index 0000000000..29def6182d
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/tst.attachrdonly.c
@@ -0,0 +1,63 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Fail to attach when /dev/vnd/ctl is opened read only.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(int argc, const char *argv[])
+{
+ int fd, ret;
+ vnd_ioc_attach_t via;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= VND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDONLY);
+ assert(fd > 0);
+
+ (void) strlcpy(via.via_name, argv[1], VND_NAMELEN);
+ via.via_zoneid = 0;
+ via.via_errno = 0;
+
+ ret = ioctl(fd, VND_IOC_ATTACH, &via);
+ assert(ret == -1);
+ assert(errno == EBADF);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/tst.badioctl.c b/usr/src/cmd/vndadm/test/tst/ioctl/tst.badioctl.c
new file mode 100644
index 0000000000..f26722f035
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/tst.badioctl.c
@@ -0,0 +1,79 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Throw a bunch of bad ioctls at us and make sure that we get ENOTTY.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <strings.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <limits.h>
+#include <assert.h>
+
+/*
+ * We're including a bunch of bad header files that have ioctl numbers that we
+ * know we shouldn't.
+ */
+#include <sys/ipd.h>
+#include <sys/dtrace.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+/*
+ * A series of bad requests
+ */
+static int requests[] = {
+ 0,
+ 1,
+ 42,
+ 169,
+ 4096,
+ INT_MAX,
+ IPDIOC_CORRUPT,
+ IPDIOC_REMOVE,
+ DTRACEIOC_CONF,
+ DTRACEIOC_REPLICATE,
+ -1
+};
+
+int
+main(void)
+{
+ int fd, i;
+
+ fd = open(VND_PATH, O_RDONLY);
+ if (fd < 0) {
+ (void) fprintf(stderr, "failed to open %s read only: %s\n",
+ VND_PATH, strerror(errno));
+ return (1);
+ }
+
+ for (i = 0; requests[i] != -1; i++) {
+ int ret;
+ ret = ioctl(fd, requests[i], NULL);
+ assert(ret == -1);
+ assert(errno == ENOTTY);
+ }
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/tst.basicopenctl.c b/usr/src/cmd/vndadm/test/tst/ioctl/tst.basicopenctl.c
new file mode 100644
index 0000000000..852ad5550f
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/tst.basicopenctl.c
@@ -0,0 +1,76 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Ensure that we can do a basic open of the device for read, write, and
+ * read/write.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <strings.h>
+#include <unistd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(void)
+{
+ int fd;
+
+ fd = open(VND_PATH, O_RDONLY);
+ if (fd < 0) {
+ (void) fprintf(stderr, "failed to open %s read only: %s\n",
+ VND_PATH, strerror(errno));
+ return (1);
+ }
+
+ if (close(fd) != 0) {
+ (void) fprintf(stderr, "failed to close vnd fd: %s\n",
+ strerror(errno));
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR);
+ if (fd < 0) {
+ (void) fprintf(stderr, "failed to open %s read/write: %s\n",
+ VND_PATH, strerror(errno));
+ return (1);
+ }
+
+ if (close(fd) != 0) {
+ (void) fprintf(stderr, "failed to close vnd fd: %s\n",
+ strerror(errno));
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_WRONLY);
+ if (fd < 0) {
+ (void) fprintf(stderr, "failed to open %s write only: %s\n",
+ VND_PATH, strerror(errno));
+ return (1);
+ }
+
+ if (close(fd) != 0) {
+ (void) fprintf(stderr, "failed to close vnd fd: %s\n",
+ strerror(errno));
+ return (1);
+ }
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/tst.gioctlfault.c b/usr/src/cmd/vndadm/test/tst/ioctl/tst.gioctlfault.c
new file mode 100644
index 0000000000..b581b5dd4c
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/tst.gioctlfault.c
@@ -0,0 +1,78 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Pass pointers to arbitrary addresses and make sure we properly get EFAULT for
+ * all the global ioctls.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <strings.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <limits.h>
+#include <assert.h>
+
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(void)
+{
+ int fd, ret;
+ vnd_ioc_attach_t *via;
+ vnd_ioc_list_t *vil;
+ vnd_ioc_buf_t *vib;
+
+ fd = open(VND_PATH, O_RDWR);
+ if (fd < 0) {
+ (void) fprintf(stderr, "failed to open %s r/w: %s\n", VND_PATH,
+ strerror(errno));
+ return (1);
+ }
+
+ via = (vnd_ioc_attach_t *)(uintptr_t)23;
+ vil = (vnd_ioc_list_t *)(uintptr_t)42;
+ vib = (vnd_ioc_buf_t *)(uintptr_t)169;
+
+ ret = ioctl(fd, VND_IOC_ATTACH, NULL);
+ assert(ret == -1);
+ assert(errno == EFAULT);
+ ret = ioctl(fd, VND_IOC_LIST, NULL);
+ assert(ret == -1);
+ assert(errno == EFAULT);
+ ret = ioctl(fd, VND_IOC_GETMAXBUF, NULL);
+ assert(ret == -1);
+ assert(errno == EFAULT);
+
+ ret = ioctl(fd, VND_IOC_ATTACH, via);
+ assert(ret == -1);
+ assert(errno == EFAULT);
+ ret = ioctl(fd, VND_IOC_LIST, vil);
+ assert(ret == -1);
+ assert(errno == EFAULT);
+ ret = ioctl(fd, VND_IOC_GETMAXBUF, vib);
+ assert(ret == -1);
+ assert(errno == EFAULT);
+
+ assert(close(fd) == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/tst.gioctlnattach.c b/usr/src/cmd/vndadm/test/tst/ioctl/tst.gioctlnattach.c
new file mode 100644
index 0000000000..98acffa194
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/tst.gioctlnattach.c
@@ -0,0 +1,100 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Here we test that all the ioctls which require us to be on a local device
+ * fail to work. Specifically, the errno should be VND_E_NOTATTACHED
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <strings.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <limits.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#include <sys/vnd.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+static int vib_ioc[] = {
+ VND_IOC_GETRXBUF,
+ VND_IOC_SETRXBUF,
+ VND_IOC_GETTXBUF,
+ VND_IOC_SETTXBUF,
+ VND_IOC_GETMINTU,
+ VND_IOC_GETMAXTU,
+ -1
+};
+
+int
+main(void)
+{
+ int fd, ret, i;
+ vnd_ioc_link_t vil;
+ vnd_ioc_unlink_t viu;
+ vnd_ioc_buf_t vib;
+ frameio_t *fio;
+ char buf[1];
+
+ fd = open(VND_PATH, O_RDWR);
+ if (fd < 0) {
+ (void) fprintf(stderr, "failed to open %s r/w: %s\n", VND_PATH,
+ strerror(errno));
+ return (1);
+ }
+
+ bzero(&vil, sizeof (vnd_ioc_link_t));
+ vil.vil_name[0] = 'a';
+ bzero(&viu, sizeof (vnd_ioc_unlink_t));
+ bzero(&vib, sizeof (vnd_ioc_buf_t));
+ fio = malloc(sizeof (frameio_t) + sizeof (framevec_t));
+ assert(fio != NULL);
+ fio->fio_version = FRAMEIO_CURRENT_VERSION;
+ fio->fio_nvpf = 1;
+ fio->fio_nvecs = 1;
+ fio->fio_vecs[0].fv_buf = buf;
+ fio->fio_vecs[0].fv_buflen = 1;
+
+ ret = ioctl(fd, VND_IOC_LINK, &vil);
+ assert(vil.vil_errno == VND_E_NOTATTACHED);
+ ret = ioctl(fd, VND_IOC_UNLINK, &viu);
+ assert(viu.viu_errno == VND_E_NOTLINKED);
+
+ for (i = 0; vib_ioc[i] != -1; i++) {
+ bzero(&vib, sizeof (vib));
+ ret = ioctl(fd, vib_ioc[i], &vib);
+ assert(vib.vib_errno == VND_E_NOTATTACHED);
+ }
+
+ /* The frameio ioctls only use standard errnos */
+ ret = ioctl(fd, VND_IOC_FRAMEIO_READ, fio);
+ assert(ret == -1);
+ assert(errno == ENXIO);
+ ret = ioctl(fd, VND_IOC_FRAMEIO_WRITE, fio);
+ assert(ret == -1);
+ assert(errno == ENXIO);
+
+ free(fio);
+ assert(close(fd) == 0);
+
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/tst.iocsize.ksh b/usr/src/cmd/vndadm/test/tst/ioctl/tst.iocsize.ksh
new file mode 100644
index 0000000000..9b30043d47
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/tst.iocsize.ksh
@@ -0,0 +1,54 @@
+#
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Ensure structure sizes for both ILP32 and LP64 are the same
+#
+
+vt_arg0=$(basename $0)
+vt_structs="vnd_ioc_attach_t vnd_ioc_link_t vnd_ioc_unlink_t"
+vt_structs="$vt_structs vnd_ioc_nonblock_t vnd_ioc_buf_t vnd_ioc_info_t"
+
+vt_t32="/tmp/vnd.iocsize.32.$$"
+vt_t64="/tmp/vnd.iocsize.64.$$"
+
+function fatal
+{
+ typeset msg="$*"
+ [[ -z "$msg" ]] && msg="failed"
+ echo "$vt_arg0: $msg" >&2
+ exit 1
+}
+
+function dump_types
+{
+ typeset file=$1
+ typeset lib=$2
+ typeset t
+
+ for t in $vn_structs; do
+ mdb -e \'::print -at $t\' $lib >> $file || fatal \
+ "failed to dump type $t from $lib"
+ done
+}
+
+rm -f $vt_t32 $vt_t64 || fatal "failed to cleanup old temp files"
+touch $vt_t32 $vt_t64 || fatal "failed to create temp files"
+
+dump_types $vt_t32 /usr/lib/libvnd.so.1
+dump_types $vt_t64 /usr/lib/64/libvnd.so.1
+
+diff $vt_t32 $vt_t64
diff --git a/usr/src/cmd/vndadm/test/tst/ioctl/tst.openctlbadflags.c b/usr/src/cmd/vndadm/test/tst/ioctl/tst.openctlbadflags.c
new file mode 100644
index 0000000000..65e48029b7
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/ioctl/tst.openctlbadflags.c
@@ -0,0 +1,88 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Make sure that we can't open the vnd control device with invalid flags.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+
+#define VND_PATH "/dev/vnd/ctl"
+
+int
+main(void)
+{
+ int fd;
+
+ fd = open(VND_PATH, O_RDONLY | O_EXCL);
+ if (fd != -1) {
+ (void) fprintf(stderr, "somehow opened vnd O_EXCL!");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR | O_EXCL);
+ if (fd != -1) {
+ (void) fprintf(stderr, "somehow opened vnd O_EXCL!");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_WRONLY | O_EXCL);
+ if (fd != -1) {
+ (void) fprintf(stderr, "somehow opened vnd O_EXCL!");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDONLY | O_NDELAY);
+ if (fd != -1) {
+ (void) fprintf(stderr, "somehow opened vnd O_NDELAY!");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR | O_NDELAY);
+ if (fd != -1) {
+ (void) fprintf(stderr, "somehow opened vnd O_NDELAY!");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_WRONLY | O_NDELAY);
+ if (fd != -1) {
+ (void) fprintf(stderr, "somehow opened vnd O_NDELAY!");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDONLY | O_NDELAY | O_EXCL);
+ if (fd != -1) {
+ (void) fprintf(stderr, "somehow opened vnd O_NDELAY | O_EXCL!");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_RDWR | O_NDELAY | O_EXCL);
+ if (fd != -1) {
+ (void) fprintf(stderr, "somehow opened vnd O_NDELAY | O_EXCL!");
+ return (1);
+ }
+
+ fd = open(VND_PATH, O_WRONLY | O_NDELAY | O_EXCL);
+ if (fd != -1) {
+ (void) fprintf(stderr, "somehow opened vnd O_NDELAY | O_EXCL!");
+ return (1);
+ }
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/lib/Makefile b/usr/src/cmd/vndadm/test/tst/lib/Makefile
new file mode 100644
index 0000000000..d7a1ed8fa5
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/Makefile
@@ -0,0 +1,44 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+TSTDIR = lib
+EXETESTS = \
+ create.basic.exe \
+ create.badlink.exe \
+ create.badpropid.exe \
+ create.badpropsize.exe \
+ create.badzone.exe \
+ create.enomem.exe \
+ create.frameioeagain.exe \
+ create.open.exe \
+ create.propiter.exe \
+ create.proprdonly.exe \
+ err.badclose.exe \
+ tst.badopen.exe \
+ tst.strerror.exe \
+ tst.strsyserror.exe
+OUTFILES = tst.strerror.exe.out
+SHTESTS =
+SUPBOBJS =
+
+CLOBBERFILES = $(EXETESTS)
+
+include ../../Makefile.com
+
+LDLIBS += -lvnd
+
+install: $(ROOTTESTS)
+
+include ../../Makefile.targ
diff --git a/usr/src/cmd/vndadm/test/tst/lib/create.badlink.c b/usr/src/cmd/vndadm/test/tst/lib/create.badlink.c
new file mode 100644
index 0000000000..aefec3ed44
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/create.badlink.c
@@ -0,0 +1,39 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Make sure that we can't create something in the context of a datalink that
+ * doesn't exist.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <libvnd.h>
+
+int
+main(void)
+{
+ int syserr;
+ vnd_errno_t vnderr;
+ vnd_handle_t *vhp;
+
+ vhp = vnd_create(NULL, "foobar", "foobar", &vnderr, &syserr);
+ (void) printf("%d, %d\n", vnderr, syserr);
+ assert(vhp == NULL);
+ assert(vnderr == VND_E_NODATALINK);
+ assert(syserr == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/lib/create.badpropid.c b/usr/src/cmd/vndadm/test/tst/lib/create.badpropid.c
new file mode 100644
index 0000000000..15334fa31c
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/create.badpropid.c
@@ -0,0 +1,76 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Make sure that we can't get and set nonexisting properties.
+ */
+
+#include <stdio.h>
+#include <strings.h>
+#include <assert.h>
+#include <libvnd.h>
+
+int
+main(int argc, const char *argv[])
+{
+ int syserr, ret;
+ vnd_errno_t vnderr;
+ vnd_handle_t *vhp;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= LIBVND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ vhp = vnd_create(NULL, argv[1], argv[1], &vnderr, &syserr);
+ assert(vhp != NULL);
+ assert(vnderr == 0);
+ assert(syserr == 0);
+
+ ret = vnd_prop_get(vhp, VND_PROP_MAX, NULL, 0);
+ assert(ret == -1);
+ assert(vnd_errno(vhp) == VND_E_BADPROP);
+ assert(vnd_syserrno(vhp) == 0);
+
+ ret = vnd_prop_get(vhp, VND_PROP_MAX + 5, NULL, 0);
+ assert(ret == -1);
+ assert(vnd_errno(vhp) == VND_E_BADPROP);
+ assert(vnd_syserrno(vhp) == 0);
+
+ ret = vnd_prop_set(vhp, VND_PROP_MAX, NULL, 0);
+ assert(ret == -1);
+ assert(vnd_errno(vhp) == VND_E_BADPROP);
+ assert(vnd_syserrno(vhp) == 0);
+
+ ret = vnd_prop_set(vhp, VND_PROP_MAX + 5, NULL, 0);
+ assert(ret == -1);
+ assert(vnd_errno(vhp) == VND_E_BADPROP);
+ assert(vnd_syserrno(vhp) == 0);
+
+ ret = vnd_prop_writeable(VND_PROP_MAX, NULL);
+ assert(ret == -1);
+
+ ret = vnd_prop_writeable(VND_PROP_MAX + 5, NULL);
+ assert(ret == -1);
+
+ vnd_close(vhp);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/lib/create.badpropsize.c b/usr/src/cmd/vndadm/test/tst/lib/create.badpropsize.c
new file mode 100644
index 0000000000..d5fefd3764
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/create.badpropsize.c
@@ -0,0 +1,63 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Validate that we can't set properties with bogus sizes.
+ */
+
+#include <stdio.h>
+#include <strings.h>
+#include <assert.h>
+#include <limits.h>
+#include <libvnd.h>
+
+int
+main(int argc, const char *argv[])
+{
+ int syserr, ret, i;
+ vnd_errno_t vnderr;
+ vnd_handle_t *vhp;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= LIBVND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ vhp = vnd_create(NULL, argv[1], argv[1], &vnderr, &syserr);
+ assert(vhp != NULL);
+ assert(vnderr == 0);
+ assert(syserr == 0);
+
+ for (i = 0; i < VND_PROP_MAX; i++) {
+ ret = vnd_prop_get(vhp, i, NULL, INT32_MAX);
+ assert(ret == -1);
+ assert(vnd_errno(vhp) == VND_E_BADPROPSIZE);
+ assert(vnd_syserrno(vhp) == 0);
+
+ ret = vnd_prop_set(vhp, i, NULL, INT32_MAX);
+ assert(ret == -1);
+ assert(vnd_errno(vhp) == VND_E_BADPROPSIZE);
+ assert(vnd_syserrno(vhp) == 0);
+ }
+
+ vnd_close(vhp);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/lib/create.badzone.c b/usr/src/cmd/vndadm/test/tst/lib/create.badzone.c
new file mode 100644
index 0000000000..30f9612963
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/create.badzone.c
@@ -0,0 +1,43 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Make sure that we can't create something in the context of a zone that
+ * doesn't exist.
+ */
+
+#include <assert.h>
+#include <sys/zone.h>
+#include <string.h>
+#include <libvnd.h>
+
+int
+main(void)
+{
+ int syserr;
+ vnd_errno_t vnderr;
+ char zname[ZONENAME_MAX+4];
+ vnd_handle_t *vhp;
+
+ (void) memset(zname, 'a', sizeof (zname));
+ zname[ZONENAME_MAX+3] = '\0';
+
+ vhp = vnd_create(zname, "foobar", "foobar", &vnderr, &syserr);
+ assert(vhp == NULL);
+ assert(vnderr == VND_E_NOZONE);
+ assert(syserr == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/lib/create.basic.c b/usr/src/cmd/vndadm/test/tst/lib/create.basic.c
new file mode 100644
index 0000000000..5335f8cbb4
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/create.basic.c
@@ -0,0 +1,49 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Simple create and destroy.
+ */
+
+#include <stdio.h>
+#include <strings.h>
+#include <assert.h>
+#include <libvnd.h>
+
+int
+main(int argc, const char *argv[])
+{
+ int syserr;
+ vnd_errno_t vnderr;
+ vnd_handle_t *vhp;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= LIBVND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ vhp = vnd_create(NULL, argv[1], argv[1], &vnderr, &syserr);
+ assert(vhp != NULL);
+ assert(vnderr == 0);
+ assert(syserr == 0);
+ vnd_close(vhp);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/lib/create.enomem.c b/usr/src/cmd/vndadm/test/tst/lib/create.enomem.c
new file mode 100644
index 0000000000..9203e369ae
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/create.enomem.c
@@ -0,0 +1,91 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Verify that we can't allocate a handle when in an ENOMEM situation.
+ */
+
+#include <procfs.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/sysmacros.h>
+#include <assert.h>
+#include <strings.h>
+
+#include <libvnd.h>
+
+int
+main(int argc, const char *argv[])
+{
+ int fd;
+ int syserr;
+ vnd_errno_t vnderr;
+ vnd_handle_t *vhp;
+ pstatus_t status;
+ void *addr;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= LIBVND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ fd = open("/proc/self/status", O_RDONLY);
+ if (fd < 0)
+ exit(1);
+ if (read(fd, &status, sizeof (status)) != sizeof (status))
+ exit(1);
+
+ addr = mmap((caddr_t)P2ROUNDUP(status.pr_brkbase +
+ status.pr_brksize, 0x1000), 0x1000,
+ PROT_READ, MAP_ANON | MAP_FIXED | MAP_PRIVATE, -1, 0);
+ if (addr == (void *)-1) {
+ perror("mmap");
+ exit(1);
+ }
+
+ /* malloc an approximate size of the vnd_handle_t */
+ for (;;) {
+ void *buf;
+
+ buf = malloc(8);
+ if (buf == NULL)
+ break;
+ }
+
+ for (;;) {
+ void *buf;
+
+ buf = malloc(4);
+ if (buf == NULL)
+ break;
+ }
+
+ vhp = vnd_create(NULL, argv[1], argv[1], &vnderr, &syserr);
+ assert(vhp == NULL);
+ assert(vnderr == VND_E_NOMEM);
+ assert(syserr == 0);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/lib/create.frameioeagain.c b/usr/src/cmd/vndadm/test/tst/lib/create.frameioeagain.c
new file mode 100644
index 0000000000..6cb14fb7df
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/create.frameioeagain.c
@@ -0,0 +1,80 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Create a datalink, set it to non-blocking mode and ensure that we get EAGAIN
+ * from frame I/O calls. Note that if this test is not plumbed up over an
+ * etherstub, then it is likely that other traffic will appear on the device and
+ * this will fail. Note that the test suite always creates these devices over an
+ * etherstub.
+ */
+
+#include <stdio.h>
+#include <strings.h>
+#include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <libvnd.h>
+
+int
+main(int argc, const char *argv[])
+{
+ int syserr, ret, fd;
+ vnd_errno_t vnderr;
+ vnd_handle_t *vhp;
+ frameio_t *fio;
+ char buf[1520];
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= LIBVND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ vhp = vnd_create(NULL, argv[1], argv[1], &vnderr, &syserr);
+ assert(vhp != NULL);
+ assert(vnderr == 0);
+ assert(syserr == 0);
+
+ fd = vnd_pollfd(vhp);
+ ret = fcntl(fd, F_SETFL, O_NONBLOCK);
+ assert(ret == 0);
+
+ fio = malloc(sizeof (frameio_t) +
+ sizeof (framevec_t));
+ assert(fio != NULL);
+ fio->fio_version = FRAMEIO_CURRENT_VERSION;
+ fio->fio_nvpf = 1;
+ fio->fio_nvecs = 1;
+
+ fio->fio_vecs[0].fv_buf = buf;
+ fio->fio_vecs[0].fv_buflen = sizeof (buf);
+
+ ret = vnd_frameio_read(vhp, fio);
+ (void) printf("%d, %d\n", ret, errno);
+ assert(ret == -1);
+ assert(errno == EAGAIN);
+
+ vnd_close(vhp);
+ free(fio);
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/lib/create.open.c b/usr/src/cmd/vndadm/test/tst/lib/create.open.c
new file mode 100644
index 0000000000..9cb1d7e40e
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/create.open.c
@@ -0,0 +1,56 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Make sure we can open a created datalink.
+ */
+
+#include <stdio.h>
+#include <strings.h>
+#include <assert.h>
+#include <libvnd.h>
+
+int
+main(int argc, const char *argv[])
+{
+ int syserr;
+ vnd_errno_t vnderr;
+ vnd_handle_t *vhp, *vhp2;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= LIBVND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ vhp = vnd_create(NULL, argv[1], argv[1], &vnderr, &syserr);
+ assert(vhp != NULL);
+ assert(vnderr == 0);
+ assert(syserr == 0);
+
+ vhp2 = vnd_open(NULL, argv[1], &vnderr, &syserr);
+ assert(vhp2 != NULL);
+ assert(vnderr == 0);
+ assert(syserr == 0);
+
+ vnd_close(vhp2);
+ vnd_close(vhp);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/lib/create.propiter.c b/usr/src/cmd/vndadm/test/tst/lib/create.propiter.c
new file mode 100644
index 0000000000..a0b46180f7
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/create.propiter.c
@@ -0,0 +1,79 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Ensure that vnd_prop_iter sees all props;
+ */
+
+#include <stdio.h>
+#include <strings.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <libvnd.h>
+
+static boolean_t *g_props;
+
+/* ARGSUSED */
+static int
+prop_cb(vnd_handle_t *vhp, vnd_prop_t prop, void *unused)
+{
+ assert(prop < VND_PROP_MAX);
+ g_props[prop] = B_TRUE;
+
+ return (0);
+}
+
+int
+main(int argc, const char *argv[])
+{
+ int syserr, i, ret;
+ vnd_errno_t vnderr;
+ vnd_handle_t *vhp;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= LIBVND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ g_props = malloc(sizeof (boolean_t) * VND_PROP_MAX);
+ if (g_props == NULL) {
+ (void) fprintf(stderr, "failed to alloc memory for %d "
+ "boolean_t\n", VND_PROP_MAX);
+ return (1);
+ }
+ for (i = 0; i < VND_PROP_MAX; i++)
+ g_props[i] = B_FALSE;
+
+ vhp = vnd_create(NULL, argv[1], argv[1], &vnderr, &syserr);
+ assert(vhp != NULL);
+ assert(vnderr == 0);
+ assert(syserr == 0);
+
+ ret = vnd_prop_iter(vhp, prop_cb, NULL);
+ assert(ret == 0);
+
+ for (i = 0; i < VND_PROP_MAX; i++)
+ assert(g_props[i] == B_TRUE);
+
+ free(g_props);
+ vnd_close(vhp);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/lib/create.proprdonly.c b/usr/src/cmd/vndadm/test/tst/lib/create.proprdonly.c
new file mode 100644
index 0000000000..18b1f7d58d
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/create.proprdonly.c
@@ -0,0 +1,63 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Validate that we can't set read only properties
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <strings.h>
+#include <assert.h>
+#include <limits.h>
+#include <libvnd.h>
+
+int
+main(int argc, const char *argv[])
+{
+ int syserr, ret;
+ vnd_errno_t vnderr;
+ vnd_handle_t *vhp;
+ vnd_prop_buf_t vpb;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= LIBVND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ vhp = vnd_create(NULL, argv[1], argv[1], &vnderr, &syserr);
+ assert(vhp != NULL);
+ assert(vnderr == 0);
+ assert(syserr == 0);
+
+ ret = vnd_prop_get(vhp, VND_PROP_MINTU, &vpb,
+ sizeof (vnd_prop_buf_t));
+ assert(ret == 0);
+
+ ret = vnd_prop_set(vhp, VND_PROP_MINTU, &vpb,
+ sizeof (vnd_prop_buf_t));
+ assert(ret == -1);
+ assert(vnd_errno(vhp) == VND_E_PROPRDONLY);
+ assert(vnd_syserrno(vhp) == 0);
+
+ vnd_close(vhp);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/lib/err.badclose.c b/usr/src/cmd/vndadm/test/tst/lib/err.badclose.c
new file mode 100644
index 0000000000..8c832506a0
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/err.badclose.c
@@ -0,0 +1,33 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * This program should segfault.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <strings.h>
+#include <assert.h>
+#include <libvnd.h>
+
+int
+main(void)
+{
+ vnd_handle_t *vhp = (void *)0x42;
+ vnd_close(vhp);
+ /* This should not be reached */
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/lib/tst.badopen.c b/usr/src/cmd/vndadm/test/tst/lib/tst.badopen.c
new file mode 100644
index 0000000000..4f67ce79ed
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/tst.badopen.c
@@ -0,0 +1,49 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Make sure we can't open a vnd device that doesn't exist
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <strings.h>
+#include <assert.h>
+#include <libvnd.h>
+
+int
+main(int argc, const char *argv[])
+{
+ int syserr;
+ vnd_errno_t vnderr;
+ vnd_handle_t *vhp;
+
+ if (argc < 2) {
+ (void) fprintf(stderr, "missing arguments...\n");
+ return (1);
+ }
+
+ if (strlen(argv[1]) >= LIBVND_NAMELEN) {
+ (void) fprintf(stderr, "vnic name too long...\n");
+ return (1);
+ }
+
+ vhp = vnd_open(NULL, argv[1], &vnderr, &syserr);
+ assert(vhp == NULL);
+ assert(vnderr == VND_E_SYS);
+ assert(syserr == ENOENT);
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/lib/tst.strerror.c b/usr/src/cmd/vndadm/test/tst/lib/tst.strerror.c
new file mode 100644
index 0000000000..a99a9ecbf6
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/tst.strerror.c
@@ -0,0 +1,30 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Verify that all the error strings we care about match what we expect.
+ */
+
+#include <stdio.h>
+#include <libvnd.h>
+
+int
+main(void)
+{
+ int i;
+ for (i = 0; i <= VND_E_UNKNOWN + 1; i++)
+ (void) printf("[%s]\n", vnd_strerror(i));
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/test/tst/lib/tst.strerror.exe.out b/usr/src/cmd/vndadm/test/tst/lib/tst.strerror.exe.out
new file mode 100644
index 0000000000..83dbcdfdb4
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/tst.strerror.exe.out
@@ -0,0 +1,37 @@
+[no error]
+[not enough memory available]
+[no such datalink]
+[datalink not of type DL_ETHER]
+[unknown dlpi failure]
+[DL_ATTACH_REQ failed]
+[DL_BIND_REQ failed]
+[DL_PROMISCON_REQ failed]
+[DLD_CAPAB_DIRECT enable failed]
+[bad datalink capability]
+[bad datalink subcapability]
+[bad dld version]
+[failed to create kstats]
+[no such vnd link]
+[netstack doesn't exist]
+[device already associated]
+[device already attached]
+[device already linked]
+[invalid name]
+[permission denied]
+[no such zone]
+[failed to initialize vnd stream module]
+[device not attached]
+[device not linked]
+[another device has the same link name]
+[failed to create minor node]
+[requested buffer size is too large]
+[requested buffer size is too small]
+[unable to obtain exclusive access to dlpi link, link busy]
+[DLD direct capability not supported over data link]
+[invalid property size]
+[invalid property]
+[property is read only]
+[unexpected system error]
+[capabilities invalid, pass-through module detected]
+[unknown error]
+[unknown error]
diff --git a/usr/src/cmd/vndadm/test/tst/lib/tst.strsyserror.c b/usr/src/cmd/vndadm/test/tst/lib/tst.strsyserror.c
new file mode 100644
index 0000000000..b95e6372e4
--- /dev/null
+++ b/usr/src/cmd/vndadm/test/tst/lib/tst.strsyserror.c
@@ -0,0 +1,50 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Verify that the error message from libvnd's strsyserrno is the same as the
+ * underlying strerror function's. It should be. We'll just check an assortment
+ * of errnos.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <libvnd.h>
+
+int
+main(void)
+{
+ int i;
+ const char *vnd, *libc;
+ for (i = 0; i < 42; i++) {
+ vnd = vnd_strsyserror(i);
+ libc = strerror(i);
+ if ((vnd != NULL && libc == NULL) ||
+ (vnd == NULL && libc != NULL)) {
+ (void) fprintf(stderr, "errno %d, vnd: %p, libc: %p",
+ i, (void *)vnd, (void *)libc);
+ return (1);
+ }
+ if (vnd != NULL && strcmp(vnd, libc) != 0) {
+ (void) fprintf(stderr,
+ "errno %d: libc and vnd disagree.\n", i);
+ (void) fprintf(stderr, "vnd: %s\n", vnd);
+ (void) fprintf(stderr, "libc: %s\n", libc);
+ return (1);
+ }
+ }
+
+ return (0);
+}
diff --git a/usr/src/cmd/vndadm/vndadm.c b/usr/src/cmd/vndadm/vndadm.c
new file mode 100644
index 0000000000..6811663696
--- /dev/null
+++ b/usr/src/cmd/vndadm/vndadm.c
@@ -0,0 +1,872 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <errno.h>
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <assert.h>
+#include <libgen.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <zone.h>
+
+#include <libvnd.h>
+
+typedef int (*vndadm_print_t)(vnd_handle_t *, vnd_prop_t);
+typedef int (*vndadm_parse_t)(char *, void **, size_t *);
+
+typedef struct vndadm_proptbl {
+ const char *vp_name;
+ vndadm_print_t vp_print;
+ vndadm_parse_t vp_parse;
+} vndadm_proptbl_t;
+
+/*
+ * Forwards
+ */
+static int usage(const char *, ...);
+static int vndadm_print_size(vnd_handle_t *, vnd_prop_t);
+static int vndadm_print_number(vnd_handle_t *, vnd_prop_t);
+static int vndadm_parse_size(char *, void **, size_t *);
+
+/*
+ * Globals
+ */
+static char *vnd_pname;
+
+static void
+vnd_vwarn(vnd_errno_t verr, int syserr, const char *format, va_list alist)
+{
+ (void) fprintf(stderr, "%s: ", vnd_pname);
+ (void) vfprintf(stderr, format, alist);
+ if (strchr(format, '\n') == NULL) {
+ (void) fprintf(stderr, ": %s\n", verr != VND_E_SYS ?
+ vnd_strerror(verr) : vnd_strsyserror(syserr));
+ }
+}
+
+static void
+vnd_libwarn(vnd_errno_t verr, int syserr, const char *format, ...)
+{
+ va_list alist;
+
+ va_start(alist, format);
+ vnd_vwarn(verr, syserr, format, alist);
+ va_end(alist);
+}
+
+static void
+vnd_warn(const char *format, ...)
+{
+ va_list alist;
+
+ va_start(alist, format);
+ vnd_vwarn(0, 0, format, alist);
+ va_end(alist);
+}
+
+static vndadm_proptbl_t vndadm_propname_tbl[] = {
+ { "rxbuf", vndadm_print_size,
+ vndadm_parse_size }, /* VND_PROP_RXBUF */
+ { "txbuf", vndadm_print_size,
+ vndadm_parse_size }, /* VND_PROP_TXBUF */
+ { "maxsize", vndadm_print_size, NULL }, /* VND_PROP_MAXBUF */
+ { "mintu", vndadm_print_number, NULL }, /* VND_PROP_MINTU */
+ { "maxtu", vndadm_print_number, NULL }, /* VND_PROP_MAXTU */
+ NULL /* VND_PROP_MAX */
+};
+
+static const char *
+vndadm_prop_to_name(vnd_prop_t prop)
+{
+ if (prop > VND_PROP_MAX)
+ return (NULL);
+
+ return (vndadm_propname_tbl[prop].vp_name);
+}
+
+static vnd_prop_t
+vndadm_name_to_prop(const char *name)
+{
+ int i;
+
+ for (i = 0; i < VND_PROP_MAX; i++) {
+ if (strcmp(name, vndadm_propname_tbl[i].vp_name) == 0)
+ return (i);
+ }
+
+ return (VND_PROP_MAX);
+}
+
+static int
+vndadm_print_size(vnd_handle_t *vhp, vnd_prop_t prop)
+{
+ vnd_prop_buf_t buf;
+
+ if (vnd_prop_get(vhp, prop, &buf, sizeof (buf)) != 0) {
+ vnd_libwarn(vnd_errno(vhp), vnd_syserrno(vhp),
+ "failed to get property %s", vndadm_prop_to_name(prop));
+ return (1);
+ }
+
+ (void) printf("%lld", buf.vpb_size);
+ return (0);
+}
+
+static int
+vndadm_print_number(vnd_handle_t *vhp, vnd_prop_t prop)
+{
+ vnd_prop_buf_t buf;
+
+ if (vnd_prop_get(vhp, prop, &buf, sizeof (buf)) != 0) {
+ vnd_libwarn(vnd_errno(vhp), vnd_syserrno(vhp),
+ "failed to get property %s", vndadm_prop_to_name(prop));
+ return (1);
+ }
+
+ (void) printf("%lld", buf.vpb_size);
+ return (0);
+}
+
+static int
+vndadm_parse_size(char *str, void **bufp, size_t *sizep)
+{
+ char *end;
+ unsigned long long val, orig;
+ vnd_prop_buf_t *buf;
+
+ errno = 0;
+ val = strtoull(str, &end, 10);
+ if (errno != 0) {
+ vnd_warn("%s: not a number\n", str);
+ return (1);
+ }
+
+ orig = val;
+ switch (*end) {
+ case 'g':
+ case 'G':
+ val *= 1024;
+ if (val < orig)
+ goto overflow;
+ /*FALLTHRU*/
+ case 'm':
+ case 'M':
+ val *= 1024;
+ if (val < orig)
+ goto overflow;
+ /*FALLTHRU*/
+ case 'k':
+ case 'K':
+ val *= 1024;
+ if (val < orig)
+ goto overflow;
+ end++;
+ break;
+ default:
+ break;
+ }
+
+ if (*end == 'b' || *end == 'B')
+ end++;
+ if (*end != '\0') {
+ vnd_warn("%s: not a number", str);
+ return (1);
+ }
+
+ buf = malloc(sizeof (vnd_prop_buf_t));
+ if (buf == NULL) {
+ vnd_warn("failed to allocate memory for setting a property");
+ return (1);
+ }
+
+ buf->vpb_size = val;
+ *bufp = buf;
+ *sizep = sizeof (vnd_prop_buf_t);
+
+ return (0);
+
+overflow:
+ vnd_warn("value overflowed: %s\n", str);
+ return (1);
+}
+
+static void
+vndadm_create_usage(FILE *out)
+{
+ (void) fprintf(out, "\tcreate:\t\t[-z zonename] -l datalink name\n");
+}
+
+static int
+vndadm_create(int argc, char *argv[])
+{
+ int c, syserr;
+ vnd_errno_t vnderr;
+ const char *datalink = NULL;
+ const char *linkname = NULL;
+ const char *zonename = NULL;
+ vnd_handle_t *vhp;
+
+ optind = 0;
+ while ((c = getopt(argc, argv, ":z:l:")) != -1) {
+ switch (c) {
+ case 'l':
+ datalink = optarg;
+ break;
+ case 'z':
+ zonename = optarg;
+ break;
+ case ':':
+ return (usage("-%c requires an operand\n", optopt));
+ case '?':
+ return (usage("unknown option: -%c\n", optopt));
+ default:
+ abort();
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc < 1) {
+ return (usage("missing required link name\n"));
+ } else if (argc > 1) {
+ return (usage("create: too many arguments for link name, "
+ "pick one\n"));
+ }
+ linkname = argv[0];
+ if (datalink == NULL)
+ datalink = linkname;
+
+ vhp = vnd_create(zonename, datalink, linkname, &vnderr, &syserr);
+ if (vhp == NULL) {
+ vnd_libwarn(vnderr, syserr,
+ "failed to create datapath link %s", linkname);
+ return (1);
+ }
+
+ vnd_close(vhp);
+ return (0);
+}
+
+static void
+vndadm_destroy_usage(FILE *out)
+{
+ (void) fprintf(out, "\tdestroy:\t[-z zonename] [link]...\n");
+}
+
+static int
+vndadm_destroy(int argc, char *argv[])
+{
+ vnd_handle_t *vhp;
+ int c, syserr;
+ vnd_errno_t vnderr;
+ const char *zonename = NULL;
+
+ optind = 0;
+ while ((c = getopt(argc, argv, ":z:")) != -1) {
+ switch (c) {
+ case 'z':
+ zonename = optarg;
+ break;
+ case ':':
+ return (usage("-%c requires an operand\n", optopt));
+ case '?':
+ return (usage("unknown option: -%c\n", optopt));
+ default:
+ abort();
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc != 1) {
+ return (usage("extraneous arguments\n"));
+ }
+
+ vhp = vnd_open(zonename, argv[0], &vnderr, &syserr);
+ if (vhp == NULL) {
+ vnd_libwarn(vnderr, syserr, "failed to open link: %s", argv[0]);
+ return (1);
+ }
+
+ if (vnd_unlink(vhp) != 0) {
+ vnd_libwarn(vnd_errno(vhp), vnd_syserrno(vhp),
+ "failed to destroy link %s", argv[0]);
+ return (1);
+ }
+
+ vnd_close(vhp);
+ return (0);
+}
+
+static void
+vndadm_list_usage(FILE *out)
+{
+ (void) fprintf(out, "\tlist:\t\t[-p] [-d delim] [-o field,...] "
+ "[-z zonename] [link]...\n");
+}
+
+#define VNDADM_LIST_NFIELDS 3
+
+typedef struct vndadm_list_cb {
+ int vsc_argc;
+ char **vsc_argv;
+ int vsc_found;
+ boolean_t vsc_parse;
+ const char *vsc_delim;
+ int vsc_order[VNDADM_LIST_NFIELDS];
+ int vsc_last;
+ zoneid_t vsc_zid;
+} vndadm_list_cb_t;
+
+typedef struct vndadm_list_field {
+ const char *vlf_name;
+ const char *vlf_header;
+ int vlf_size;
+ void (*vlf_print)(struct vndadm_list_field *, vnd_info_t *, boolean_t);
+ void (*vlf_parse)(struct vndadm_list_field *, vnd_info_t *, boolean_t);
+} vndadm_list_field_t;
+
+static void
+vlf_print_link(vndadm_list_field_t *vlfp, vnd_info_t *viip,
+ boolean_t last)
+{
+ if (last == B_TRUE) {
+ (void) printf("%s", viip->vi_name);
+ } else {
+ (void) printf("%-*s", vlfp->vlf_size, viip->vi_name);
+ }
+}
+
+/* ARGSUSED */
+static void
+vlf_parse_link(vndadm_list_field_t *vlfp, vnd_info_t *viip,
+ boolean_t last)
+{
+ (void) printf("%s", viip->vi_name);
+}
+
+static void
+vlf_print_datalink(vndadm_list_field_t *vlfp, vnd_info_t *viip,
+ boolean_t last)
+{
+ if (last == B_TRUE) {
+ (void) printf("%s", viip->vi_datalink);
+ } else {
+ (void) printf("%-*s", vlfp->vlf_size, viip->vi_datalink);
+ }
+}
+
+/* ARGSUSED */
+static void
+vlf_parse_datalink(vndadm_list_field_t *vlfp, vnd_info_t *viip,
+ boolean_t last)
+{
+ (void) printf("%s", viip->vi_datalink);
+}
+
+static void
+vlf_print_zone(vndadm_list_field_t *vlfp, vnd_info_t *viip,
+ boolean_t last)
+{
+ char buf[ZONENAME_MAX];
+
+ if (getzonenamebyid(viip->vi_zone, buf, sizeof (buf)) <= 0)
+ (void) strlcpy(buf, "<unknown>", sizeof (buf));
+
+ if (last == B_TRUE) {
+ (void) printf("%s", buf);
+ } else {
+ (void) printf("%-*s", vlfp->vlf_size, buf);
+ }
+}
+
+/* ARGSUSED */
+static void
+vlf_parse_zone(vndadm_list_field_t *vlfp, vnd_info_t *viip,
+ boolean_t last)
+{
+ char buf[ZONENAME_MAX];
+
+ if (getzonenamebyid(viip->vi_zone, buf, sizeof (buf)) <= 0)
+ (void) strlcpy(buf, "<unknown>", sizeof (buf));
+
+ (void) printf("%s", buf);
+}
+
+static vndadm_list_field_t vlf_tbl[] = {
+ { "name", "NAME", 16, vlf_print_link, vlf_parse_link },
+ { "datalink", "DATALINK", 16, vlf_print_datalink, vlf_parse_datalink },
+ { "zone", "ZONENAME", 32, vlf_print_zone, vlf_parse_zone },
+ { NULL }
+};
+
+
+static int
+vndadm_list_f(vnd_info_t *viip, void *arg)
+{
+ int i;
+ boolean_t found;
+ vndadm_list_cb_t *vscp = arg;
+
+ if (vscp->vsc_zid != ALL_ZONES && vscp->vsc_zid != viip->vi_zone)
+ return (0);
+
+ if (vscp->vsc_argc != 0) {
+ found = B_FALSE;
+ for (i = 0; i < vscp->vsc_argc; i++) {
+ if (strcmp(viip->vi_name, vscp->vsc_argv[i]) == 0) {
+ found = B_TRUE;
+ break;
+ }
+ }
+ if (found == B_FALSE)
+ return (0);
+ vscp->vsc_found++;
+ }
+
+ for (i = 0; i < VNDADM_LIST_NFIELDS && vscp->vsc_order[i] != -1; i++) {
+ boolean_t last = i == vscp->vsc_last;
+ if (vscp->vsc_parse == B_TRUE)
+ vlf_tbl[vscp->vsc_order[i]].vlf_parse(
+ &vlf_tbl[vscp->vsc_order[i]], viip, last);
+ else
+ vlf_tbl[vscp->vsc_order[i]].vlf_print(
+ &vlf_tbl[vscp->vsc_order[i]], viip, last);
+
+ if (last == B_FALSE)
+ (void) printf("%s", vscp->vsc_delim);
+ }
+ (void) printf("\n");
+
+ return (0);
+}
+
+static int
+vndadm_list(int argc, char *argv[])
+{
+ int c, i, syserr;
+ vnd_errno_t vnderr;
+ boolean_t parse = B_FALSE;
+ const char *zonename = NULL, *delim = NULL;
+ char *fields = NULL;
+ vndadm_list_cb_t vsc;
+
+ optind = 0;
+ while ((c = getopt(argc, argv, ":pd:o:z:")) != -1) {
+ switch (c) {
+ case 'p':
+ parse = B_TRUE;
+ break;
+ case 'd':
+ delim = optarg;
+ break;
+ case 'o':
+ fields = optarg;
+ break;
+ case 'z':
+ zonename = optarg;
+ break;
+ case ':':
+ return (usage("-%c requires an operand\n", optopt));
+ case '?':
+ return (usage("unknown option: -%c\n", optopt));
+ default:
+ abort();
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ vsc.vsc_argc = argc;
+ vsc.vsc_argv = argv;
+ vsc.vsc_found = 0;
+ if (zonename != NULL) {
+ vsc.vsc_zid = getzoneidbyname(zonename);
+ if (vsc.vsc_zid == -1) {
+ vnd_warn("no such zone: %s\n", zonename);
+ return (1);
+ }
+ } else {
+ vsc.vsc_zid = ALL_ZONES;
+ }
+
+ /* Sanity check parseable related stuff */
+ if (delim != NULL && parse == B_FALSE) {
+ return (usage("-d cannot be used without -p\n"));
+ }
+
+ if (parse == B_TRUE && fields == NULL) {
+ return (usage("-p cannot be used without -o\n"));
+ }
+
+ /* validate our fields, if any */
+ if (fields != NULL) {
+ char *c, *n;
+ int floc = 0;
+
+ c = fields;
+ for (;;) {
+ if (floc >= VNDADM_LIST_NFIELDS) {
+ return (usage("too many fields specified "
+ "for -o\n"));
+ }
+
+ n = strchr(c, ',');
+ if (n != NULL)
+ *n = '\0';
+
+ for (i = 0; i < VNDADM_LIST_NFIELDS; i++) {
+ if (strcasecmp(c, vlf_tbl[i].vlf_name) == 0)
+ break;
+ }
+ if (i == VNDADM_LIST_NFIELDS) {
+ vnd_warn("invalid field for -o: %s\nvalid "
+ "fields are:", c);
+ for (i = 0; i < VNDADM_LIST_NFIELDS; i++)
+ vnd_warn(" %s", vlf_tbl[i].vlf_name);
+ vnd_warn("\n");
+ return (usage(NULL));
+ }
+ vsc.vsc_order[floc] = i;
+ floc++;
+
+ if (n == NULL)
+ break;
+ c = n + 1;
+ }
+
+ vsc.vsc_last = floc - 1;
+ while (floc < VNDADM_LIST_NFIELDS)
+ vsc.vsc_order[floc++] = -1;
+ } else {
+ vsc.vsc_order[0] = 0;
+ vsc.vsc_order[1] = 1;
+ vsc.vsc_order[2] = 2;
+ }
+
+ vsc.vsc_parse = parse;
+ vsc.vsc_delim = delim;
+ if (vsc.vsc_delim == NULL)
+ vsc.vsc_delim = " ";
+
+ if (vsc.vsc_parse != B_TRUE) {
+ for (i = 0; i < VNDADM_LIST_NFIELDS && vsc.vsc_order[i] != -1;
+ i++) {
+ if (i + 1 == VNDADM_LIST_NFIELDS) {
+ (void) printf("%s\n",
+ vlf_tbl[vsc.vsc_order[i]].vlf_header);
+ continue;
+ }
+ (void) printf("%-*s ",
+ vlf_tbl[vsc.vsc_order[i]].vlf_size,
+ vlf_tbl[vsc.vsc_order[i]].vlf_header);
+ }
+ }
+
+ if (vnd_walk(vndadm_list_f, &vsc, &vnderr, &syserr) != 0) {
+ vnd_libwarn(vnderr, syserr, "failed to walk vnd links");
+ return (1);
+ }
+
+ if (argc > 0 && vsc.vsc_found == 0) {
+ vnd_warn("no links matched requested names\n");
+ return (1);
+ }
+
+ return (0);
+}
+
+typedef struct vndadm_get {
+ boolean_t vg_parse;
+ const char *vg_delim;
+ const char *vg_link;
+ int vg_argc;
+ char **vg_argv;
+} vndadm_get_t;
+
+static int
+vndadm_get_cb(vnd_handle_t *vhp, vnd_prop_t prop, void *arg)
+{
+ boolean_t writeable;
+ const char *perm;
+ vndadm_get_t *vgp = arg;
+ const char *name = vndadm_prop_to_name(prop);
+
+ /* Verify if this is a prop we're supposed to print */
+ if (vgp->vg_argc > 0) {
+ int i;
+ boolean_t found = B_FALSE;
+ for (i = 0; i < vgp->vg_argc; i++) {
+ if (strcmp(name, vgp->vg_argv[i]) == 0) {
+ found = B_TRUE;
+ break;
+ }
+ }
+ if (found == B_FALSE)
+ return (0);
+ }
+
+ if (vnd_prop_writeable(prop, &writeable) != 0)
+ abort();
+
+ perm = writeable ? "rw" : "r-";
+
+ if (vgp->vg_parse == B_TRUE) {
+ (void) printf("%s%s%s%s%s%s", vgp->vg_link, vgp->vg_delim,
+ name, vgp->vg_delim, perm, vgp->vg_delim);
+ } else {
+ (void) printf("%-13s %-16s %-5s ", vgp->vg_link, name, perm);
+ }
+
+ if (vndadm_propname_tbl[prop].vp_print != NULL) {
+ if (vndadm_propname_tbl[prop].vp_print(vhp, prop) != 0)
+ return (1);
+ } else {
+ (void) printf("-");
+ }
+ (void) printf("\n");
+ return (0);
+}
+
+static int
+vndadm_get(int argc, char *argv[])
+{
+ vnd_handle_t *vhp;
+ boolean_t parse = B_FALSE;
+ vndadm_get_t vg;
+ int c, syserr;
+ vnd_errno_t vnderr;
+ const char *zonename = NULL, *delim = NULL;
+
+ if (argc <= 0) {
+ return (usage("get requires a link name\n"));
+ }
+
+ optind = 0;
+ while ((c = getopt(argc, argv, ":pd:z:")) != -1) {
+ switch (c) {
+ case 'p':
+ parse = B_TRUE;
+ break;
+ case 'd':
+ delim = optarg;
+ break;
+ case 'z':
+ zonename = optarg;
+ break;
+ case ':':
+ return (usage("-%c requires an operand\n", optopt));
+ case '?':
+ return (usage("unknown option: -%c\n", optopt));
+ default:
+ abort();
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc < 1) {
+ return (usage("missing required link\n"));
+ }
+
+ vhp = vnd_open(zonename, argv[0], &vnderr, &syserr);
+ if (vhp == NULL) {
+ vnd_libwarn(vnderr, syserr, "failed to open link: %s", argv[0]);
+ return (1);
+ }
+
+ vg.vg_argc = argc - 1;
+ vg.vg_argv = argv + 1;
+ vg.vg_link = argv[0];
+ vg.vg_parse = parse;
+ vg.vg_delim = delim != NULL ? delim : " ";
+ if (vg.vg_parse == B_FALSE)
+ (void) printf("%-13s %-16s %-5s %s\n", "LINK", "PROPERTY",
+ "PERM", "VALUE");
+
+ if (vnd_prop_iter(vhp, vndadm_get_cb, &vg) != 0)
+ return (1);
+
+ return (0);
+}
+
+static void
+vndadm_get_usage(FILE *out)
+{
+ (void) fprintf(out,
+ "\tget:\t\t[-p] [-d delim] [-z zonename] link [prop]...\n");
+}
+
+static int
+vndadm_set(int argc, char *argv[])
+{
+ vnd_handle_t *vhp;
+ int c, i, syserr;
+ vnd_errno_t vnderr;
+ const char *zonename = NULL;
+
+ optind = 0;
+ while ((c = getopt(argc, argv, ":z:")) != -1) {
+ switch (c) {
+ case 'z':
+ zonename = optarg;
+ break;
+ case ':':
+ return (usage("-%c requires an operand\n", optopt));
+ case '?':
+ return (usage("unknown option: -%c\n", optopt));
+ default:
+ abort();
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc < 2) {
+ return (usage("missing arguments to set\n"));
+ }
+
+ vhp = vnd_open(zonename, argv[0], &vnderr, &syserr);
+ if (vhp == NULL) {
+ vnd_libwarn(vnderr, syserr, "failed to open link: %s", argv[0]);
+ return (1);
+ }
+
+ for (i = 1; i < argc; i++) {
+ char *eq, *key, *value;
+ boolean_t writeable;
+ vnd_prop_t prop;
+ void *buf;
+ size_t psize;
+ int ret;
+
+ key = argv[i];
+ eq = strchr(key, '=');
+ if (eq == NULL) {
+ vnd_warn("invalid property name=value: %s\n", key);
+ return (1);
+ }
+ *eq = '\0';
+ value = eq + 1;
+ if (*value == '\0') {
+ vnd_warn("property value required for %s\n", key);
+ return (1);
+ }
+ prop = vndadm_name_to_prop(key);
+ if (prop == VND_PROP_MAX) {
+ vnd_warn("unknown property: %s\n", key);
+ return (1);
+ }
+
+ if (vnd_prop_writeable(prop, &writeable) != 0)
+ abort();
+ if (writeable != B_TRUE) {
+ vnd_warn("property %s is read-only\n", key);
+ return (1);
+ }
+ assert(vndadm_propname_tbl[prop].vp_parse != NULL);
+
+ /*
+ * vp_parse functions should say what explicitly is invalid. We
+ * should indicate that the property failed.
+ */
+ ret = vndadm_propname_tbl[prop].vp_parse(value, &buf, &psize);
+ if (ret != 0) {
+ vnd_warn("failed to set property %s\n", key);
+ return (1);
+ }
+
+ ret = vnd_prop_set(vhp, prop, buf, psize);
+ free(buf);
+ if (ret != 0) {
+ vnd_libwarn(vnd_errno(vhp), vnd_syserrno(vhp),
+ "failed to set property %s", key);
+ return (1);
+ }
+ }
+
+ return (0);
+}
+
+static void
+vndadm_set_usage(FILE *out)
+{
+ (void) fprintf(out, "\tset:\t\t[-z zonename] link prop=val...\n");
+}
+
+typedef struct vnd_cmdtab {
+ const char *vc_name;
+ int (*vc_op)(int, char *[]);
+ void (*vc_usage)(FILE *);
+} vnd_cmdtab_t;
+
+static vnd_cmdtab_t vnd_tab[] = {
+ { "create", vndadm_create, vndadm_create_usage },
+ { "destroy", vndadm_destroy, vndadm_destroy_usage },
+ { "list", vndadm_list, vndadm_list_usage },
+ { "get", vndadm_get, vndadm_get_usage },
+ { "set", vndadm_set, vndadm_set_usage },
+ { NULL, NULL }
+};
+
+static int
+usage(const char *format, ...)
+{
+ vnd_cmdtab_t *tab;
+ const char *help = "usage: %s <subcommand> <args> ...\n";
+
+ if (format != NULL) {
+ va_list alist;
+
+ va_start(alist, format);
+ (void) fprintf(stderr, "%s: ", vnd_pname);
+ (void) vfprintf(stderr, format, alist);
+ va_end(alist);
+ }
+ (void) fprintf(stderr, help, vnd_pname);
+ for (tab = vnd_tab; tab->vc_name != NULL; tab++)
+ tab->vc_usage(stderr);
+
+ return (2);
+}
+
+int
+main(int argc, char *argv[])
+{
+ vnd_cmdtab_t *tab;
+
+ vnd_pname = basename(argv[0]);
+ if (argc < 2) {
+ return (usage(NULL));
+ }
+
+ for (tab = vnd_tab; tab->vc_name != NULL; tab++) {
+ if (strcmp(argv[1], tab->vc_name) == 0) {
+ argc -= 2; argv += 2;
+ assert(argc >= 0);
+ return (tab->vc_op(argc, argv));
+ }
+ }
+
+ return (usage("unknown sub-command '%s'\n", argv[1]));
+}
diff --git a/usr/src/cmd/vndstat/Makefile b/usr/src/cmd/vndstat/Makefile
new file mode 100644
index 0000000000..c77eef3887
--- /dev/null
+++ b/usr/src/cmd/vndstat/Makefile
@@ -0,0 +1,33 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+PROG= vndstat
+
+include ../Makefile.cmd
+
+LDLIBS += -lkstat
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+install: all $(ROOTPROG)
+
+clean:
+ $(RM) $(PROG)
+
+lint: lint_PROG
+
+include ../Makefile.targ
diff --git a/usr/src/cmd/vndstat/vndstat.c b/usr/src/cmd/vndstat/vndstat.c
new file mode 100644
index 0000000000..6f6c76fc12
--- /dev/null
+++ b/usr/src/cmd/vndstat/vndstat.c
@@ -0,0 +1,542 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/kstat.h>
+#include <kstat.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <strings.h>
+#include <alloca.h>
+#include <signal.h>
+#include <sys/varargs.h>
+#include <sys/int_limits.h>
+#include <sys/sysmacros.h>
+
+#define KSTAT_FIELD_USEINSTANCE 0x01
+#define KSTAT_FIELD_NODELTA 0x02
+#define KSTAT_FIELD_FILLER 0x04
+#define KSTAT_FIELD_STRING 0x08
+#define KSTAT_FIELD_UNIT 0x10
+#define KSTAT_FIELD_LJUST 0x20
+
+typedef struct kstat_field {
+ char *ksf_header; /* header for field */
+ char *ksf_name; /* name of stat, if any */
+ int ksf_width; /* width for field in output line */
+ uint32_t ksf_flags; /* flags for this field, if any */
+ char *ksf_suffix; /* optional suffix for units */
+ int ksf_hint; /* index hint for field in kstat */
+} kstat_field_t;
+
+typedef struct kstat_instance {
+ char ksi_name[KSTAT_STRLEN]; /* name of the underlying kstat */
+ int ksi_instance; /* instance identifer of this kstat */
+ kstat_t *ksi_ksp; /* pointer to the kstat */
+ uint64_t *ksi_data[2]; /* pointer to two generations of data */
+ hrtime_t ksi_snaptime[2]; /* hrtime for data generations */
+ int ksi_gen; /* current generation */
+ struct kstat_instance *ksi_next; /* next in instance list */
+} kstat_instance_t;
+
+const char *g_cmd = "vndstat";
+
+static void
+kstat_nicenum(uint64_t num, char *buf, size_t buflen)
+{
+ uint64_t n = num;
+ int index = 0;
+ char u;
+
+ while (n >= 1024) {
+ n /= 1024;
+ index++;
+ }
+
+ u = " KMGTPE"[index];
+
+ if (index == 0) {
+ (void) snprintf(buf, buflen, "%llu", n);
+ } else if ((num & ((1ULL << 10 * index) - 1)) == 0) {
+ /*
+ * If this is an even multiple of the base, always display
+ * without any decimal precision.
+ */
+ (void) snprintf(buf, buflen, "%llu%c", n, u);
+ } else {
+ /*
+ * We want to choose a precision that reflects the best choice
+ * for fitting in 5 characters. This can get rather tricky when
+ * we have numbers that are very close to an order of magnitude.
+ * For example, when displaying 10239 (which is really 9.999K),
+ * we want only a single place of precision for 10.0K. We could
+ * develop some complex heuristics for this, but it's much
+ * easier just to try each combination in turn.
+ */
+ int i;
+ for (i = 2; i >= 0; i--) {
+ if (snprintf(buf, buflen, "%.*f%c", i,
+ (double)num / (1ULL << 10 * index), u) <= 5)
+ break;
+ }
+ }
+}
+
+static void
+fatal(char *fmt, ...)
+{
+ va_list ap;
+ int error = errno;
+
+ va_start(ap, fmt);
+
+ (void) fprintf(stderr, "%s: ", g_cmd);
+ /*LINTED*/
+ (void) vfprintf(stderr, fmt, ap);
+
+ if (fmt[strlen(fmt) - 1] != '\n')
+ (void) fprintf(stderr, ": %s\n", strerror(error));
+
+ exit(EXIT_FAILURE);
+}
+
+int
+kstat_field_hint(kstat_t *ksp, kstat_field_t *field)
+{
+ kstat_named_t *nm = KSTAT_NAMED_PTR(ksp);
+ int i;
+
+ assert(ksp->ks_type == KSTAT_TYPE_NAMED);
+
+ for (i = 0; i < ksp->ks_ndata; i++) {
+ if (strcmp(field->ksf_name, nm[i].name) == 0)
+ return (field->ksf_hint = i);
+ }
+
+ fatal("could not find field '%s' in %s:%d\n",
+ field->ksf_name, ksp->ks_name, ksp->ks_instance);
+
+ return (0);
+}
+
+int
+kstat_instances_compare(const void *lhs, const void *rhs)
+{
+ kstat_instance_t *l = *((kstat_instance_t **)lhs);
+ kstat_instance_t *r = *((kstat_instance_t **)rhs);
+ int rval;
+
+ if ((rval = strcmp(l->ksi_name, r->ksi_name)) != 0)
+ return (rval);
+
+ if (l->ksi_instance < r->ksi_instance)
+ return (-1);
+
+ if (l->ksi_instance > r->ksi_instance)
+ return (1);
+
+ return (0);
+}
+
+void
+kstat_instances_update(kstat_ctl_t *kcp, kstat_instance_t **head,
+ boolean_t (*interested)(kstat_t *))
+{
+ int ninstances = 0, i;
+ kstat_instance_t **sorted, *ksi, *next;
+ kstat_t *ksp;
+ kid_t kid;
+
+ if ((kid = kstat_chain_update(kcp)) == 0 && *head != NULL)
+ return;
+
+ if (kid == -1)
+ fatal("failed to update kstat chain");
+
+ for (ksi = *head; ksi != NULL; ksi = ksi->ksi_next)
+ ksi->ksi_ksp = NULL;
+
+ for (ksp = kcp->kc_chain; ksp != NULL; ksp = ksp->ks_next) {
+ kstat_instance_t *last = NULL;
+
+ if (!interested(ksp))
+ continue;
+
+ /*
+ * Now look to see if we have this instance and name. (Yes,
+ * this is a linear search; we're assuming that this list is
+ * modest in size.)
+ */
+ for (ksi = *head; ksi != NULL; ksi = ksi->ksi_next) {
+ last = ksi;
+
+ if (ksi->ksi_instance != ksp->ks_instance)
+ continue;
+
+ if (strcmp(ksi->ksi_name, ksp->ks_name) != 0)
+ continue;
+
+ ksi->ksi_ksp = ksp;
+ ninstances++;
+ break;
+ }
+
+ if (ksi != NULL)
+ continue;
+
+ if ((ksi = malloc(sizeof (kstat_instance_t))) == NULL)
+ fatal("could not allocate memory for stat instance");
+
+ bzero(ksi, sizeof (kstat_instance_t));
+ (void) strlcpy(ksi->ksi_name, ksp->ks_name, KSTAT_STRLEN);
+ ksi->ksi_instance = ksp->ks_instance;
+ ksi->ksi_ksp = ksp;
+ ksi->ksi_next = NULL;
+
+ if (last == NULL) {
+ assert(*head == NULL);
+ *head = ksi;
+ } else {
+ last->ksi_next = ksi;
+ }
+
+ ninstances++;
+ }
+
+ /*
+ * Now we know how many instances we have; iterate back over them,
+ * pruning the stale ones and adding the active ones to a holding
+ * array in which to sort them.
+ */
+ sorted = (void *)alloca(ninstances * sizeof (kstat_instance_t *));
+ ninstances = 0;
+
+ for (ksi = *head; ksi != NULL; ksi = next) {
+ next = ksi->ksi_next;
+
+ if (ksi->ksi_ksp == NULL) {
+ free(ksi);
+ } else {
+ sorted[ninstances++] = ksi;
+ }
+ }
+
+ if (ninstances == 0) {
+ *head = NULL;
+ return;
+ }
+
+ qsort(sorted, ninstances, sizeof (kstat_instance_t *),
+ kstat_instances_compare);
+
+ *head = sorted[0];
+
+ for (i = 0; i < ninstances; i++) {
+ ksi = sorted[i];
+ ksi->ksi_next = i < ninstances - 1 ? sorted[i + 1] : NULL;
+ }
+}
+
+void
+kstat_instances_read(kstat_ctl_t *kcp, kstat_instance_t *instances,
+ kstat_field_t *fields)
+{
+ kstat_instance_t *ksi;
+ int i, nfields;
+
+ for (nfields = 0; fields[nfields].ksf_header != NULL; nfields++)
+ continue;
+
+ for (ksi = instances; ksi != NULL; ksi = ksi->ksi_next) {
+ kstat_t *ksp = ksi->ksi_ksp;
+
+ if (ksp == NULL)
+ continue;
+
+ if (kstat_read(kcp, ksp, NULL) == -1) {
+ if (errno == ENXIO) {
+ /*
+ * Our kstat has been removed since the update;
+ * NULL it out to prevent us from trying to read
+ * it again (and to indicate that it should not
+ * be displayed) and drive on.
+ */
+ ksi->ksi_ksp = NULL;
+ continue;
+ }
+
+ fatal("failed to read kstat %s:%d",
+ ksi->ksi_name, ksi->ksi_instance);
+ }
+
+ if (ksp->ks_type != KSTAT_TYPE_NAMED) {
+ fatal("%s:%d is not a named kstat", ksi->ksi_name,
+ ksi->ksi_instance);
+ }
+
+ if (ksi->ksi_data[0] == NULL) {
+ size_t size = nfields * sizeof (uint64_t) * 2;
+ uint64_t *data;
+
+ if ((data = malloc(size)) == NULL)
+ fatal("could not allocate memory");
+
+ bzero(data, size);
+ ksi->ksi_data[0] = data;
+ ksi->ksi_data[1] = &data[nfields];
+ }
+
+ for (i = 0; i < nfields; i++) {
+ kstat_named_t *nm = KSTAT_NAMED_PTR(ksp);
+ kstat_field_t *field = &fields[i];
+ int hint = field->ksf_hint;
+
+ if (field->ksf_name == NULL)
+ continue;
+
+ if (hint < 0 || hint >= ksp->ks_ndata ||
+ strcmp(field->ksf_name, nm[hint].name) != 0) {
+ hint = kstat_field_hint(ksp, field);
+ }
+
+ if (field->ksf_flags & KSTAT_FIELD_STRING)
+ ksi->ksi_data[ksi->ksi_gen][i] =
+ (uint64_t)(uintptr_t)
+ nm[hint].value.str.addr.ptr;
+ else
+ ksi->ksi_data[ksi->ksi_gen][i] =
+ nm[hint].value.ui64;
+ }
+
+ ksi->ksi_snaptime[ksi->ksi_gen] = ksp->ks_snaptime;
+ ksi->ksi_gen ^= 1;
+ }
+}
+
+uint64_t
+kstat_instances_delta(kstat_instance_t *ksi, int i)
+{
+ int gen = ksi->ksi_gen;
+ uint64_t delta = ksi->ksi_data[gen ^ 1][i] - ksi->ksi_data[gen][i];
+ uint64_t tdelta = ksi->ksi_snaptime[gen ^ 1] - ksi->ksi_snaptime[gen];
+
+ return (((delta * (uint64_t)NANOSEC) + (tdelta / 2)) / tdelta);
+}
+
+void
+kstat_instances_print(kstat_instance_t *instances, kstat_field_t *fields,
+ boolean_t header)
+{
+ kstat_instance_t *ksi = instances;
+ int i, nfields;
+
+ for (nfields = 0; fields[nfields].ksf_header != NULL; nfields++)
+ continue;
+
+ if (header) {
+ for (i = 0; i < nfields; i++) {
+ if (fields[i].ksf_flags & KSTAT_FIELD_LJUST) {
+ (void) printf("%s%c", fields[i].ksf_header,
+ i < nfields - 1 ? ' ' : '\n');
+ continue;
+ }
+ (void) printf("%*s%c", fields[i].ksf_width,
+ fields[i].ksf_header, i < nfields - 1 ? ' ' : '\n');
+ }
+ }
+
+ for (ksi = instances; ksi != NULL; ksi = ksi->ksi_next) {
+ if (ksi->ksi_snaptime[1] == 0 || ksi->ksi_ksp == NULL)
+ continue;
+
+ for (i = 0; i < nfields; i++) {
+ char trailer = i < nfields - 1 ? ' ' : '\n';
+
+ if (fields[i].ksf_flags & KSTAT_FIELD_FILLER) {
+ (void) printf("%*s%c", fields[i].ksf_width,
+ fields[i].ksf_header, trailer);
+ continue;
+ }
+
+ if (fields[i].ksf_flags & KSTAT_FIELD_STRING) {
+ (void) printf("%*s%c", fields[i].ksf_width,
+ (char *)(uintptr_t)ksi->ksi_data[
+ ksi->ksi_gen ^ 1][i],
+ trailer);
+ continue;
+ }
+
+ if (fields[i].ksf_flags & KSTAT_FIELD_UNIT) {
+ char buf[128];
+ size_t flen = fields[i].ksf_width + 1;
+ const char *suffix = "";
+
+ if (fields[i].ksf_suffix != NULL) {
+ suffix = fields[i].ksf_suffix;
+ flen -= strlen(fields[i].ksf_suffix);
+ }
+
+ kstat_nicenum(fields[i].ksf_flags &
+ KSTAT_FIELD_NODELTA ?
+ ksi->ksi_data[ksi->ksi_gen ^ 1][i] :
+ kstat_instances_delta(ksi, i), buf,
+ MIN(sizeof (buf), flen));
+ (void) printf("%*s%s%c", flen - 1, buf,
+ suffix, trailer);
+ continue;
+ }
+
+ (void) printf("%*lld%c", fields[i].ksf_width,
+ fields[i].ksf_flags & KSTAT_FIELD_USEINSTANCE ?
+ ksi->ksi_instance :
+ fields[i].ksf_flags & KSTAT_FIELD_NODELTA ?
+ ksi->ksi_data[ksi->ksi_gen ^ 1][i] :
+ kstat_instances_delta(ksi, i), trailer);
+ }
+ }
+}
+
+static boolean_t
+interested(kstat_t *ksp)
+{
+ const char *module = "vnd";
+ const char *class = "net";
+
+ if (strcmp(ksp->ks_module, module) != 0)
+ return (B_FALSE);
+
+ if (strcmp(ksp->ks_class, class) != 0)
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+/* BEGIN CSTYLED */
+char *g_usage = "Usage: vndstat [interval [count]]\n"
+ "\n"
+ " Displays statistics for active vnd devices, with one line per device.\n"
+ " All statistics are reported as per-second rates.\n"
+ "\n"
+ " The columns are as follows:\n"
+ "\n"
+ " zone => name of the zone with the device\n"
+ " name => name of the vnd device\n"
+ " rx => bytes received\n"
+ " tx => bytes transmitted\n"
+ " drops => number of dropped packets\n"
+ " txfc => number of transmit flow control events\n"
+ "\n";
+/* END CSTYLED */
+
+void
+usage()
+{
+ (void) fprintf(stderr, "%s", g_usage);
+ exit(EXIT_FAILURE);
+}
+
+/*ARGSUSED*/
+void
+intr(int sig)
+{}
+
+/*ARGSUSED*/
+int
+main(int argc, char **argv)
+{
+ kstat_ctl_t *kcp;
+ kstat_instance_t *instances = NULL;
+ int i = 0;
+ int interval = 1;
+ int count = INT32_MAX;
+ struct itimerval itimer;
+ struct sigaction act;
+ sigset_t set;
+ char *endp;
+
+ kstat_field_t fields[] = {
+ { "name", "linkname", 6, KSTAT_FIELD_STRING },
+ { "|", NULL, 1, KSTAT_FIELD_FILLER },
+ { "rx B/s", "rbytes", 8, KSTAT_FIELD_UNIT, "B/s" },
+ { "|", NULL, 1, KSTAT_FIELD_FILLER },
+ { "tx B/s", "obytes", 8, KSTAT_FIELD_UNIT, "B/s" },
+ { "|", NULL, 1, KSTAT_FIELD_FILLER },
+ { "drops", "total_drops", 5 },
+ { "txfc", "flowcontrol_events", 4 },
+ { "|", NULL, 1, KSTAT_FIELD_FILLER },
+ { "zone", "zonename", 36,
+ KSTAT_FIELD_STRING | KSTAT_FIELD_LJUST },
+ { NULL }
+ };
+
+ if (argc > 1) {
+ interval = strtol(argv[1], &endp, 10);
+
+ if (*endp != '\0' || interval <= 0)
+ usage();
+ }
+
+ if (argc > 2) {
+ count = strtol(argv[2], &endp, 10);
+
+ if (*endp != '\0' || count <= 0)
+ usage();
+ }
+
+ if ((kcp = kstat_open()) == NULL)
+ fatal("could not open /dev/kstat");
+
+ (void) sigemptyset(&act.sa_mask);
+ act.sa_flags = 0;
+ act.sa_handler = intr;
+ (void) sigaction(SIGALRM, &act, NULL);
+
+ (void) sigemptyset(&set);
+ (void) sigaddset(&set, SIGALRM);
+ (void) sigprocmask(SIG_BLOCK, &set, NULL);
+
+ bzero(&itimer, sizeof (itimer));
+ itimer.it_value.tv_sec = interval;
+ itimer.it_interval.tv_sec = interval;
+
+ if (setitimer(ITIMER_REAL, &itimer, NULL) != 0) {
+ fatal("could not set timer to %d second%s", interval,
+ interval == 1 ? "" : "s");
+ }
+
+ (void) sigemptyset(&set);
+
+ for (;;) {
+ kstat_instances_update(kcp, &instances, interested);
+ kstat_instances_read(kcp, instances, fields);
+
+ if (i++ > 0) {
+ kstat_instances_print(instances, fields,
+ instances != NULL && instances->ksi_next == NULL ?
+ (((i - 2) % 20) == 0) : B_TRUE);
+ }
+
+ if (i > count)
+ break;
+
+ (void) sigsuspend(&set);
+ }
+
+ /*NOTREACHED*/
+ return (0);
+}
diff --git a/usr/src/cmd/zfs/zfs_main.c b/usr/src/cmd/zfs/zfs_main.c
index 5710b19472..ff21aede8a 100644
--- a/usr/src/cmd/zfs/zfs_main.c
+++ b/usr/src/cmd/zfs/zfs_main.c
@@ -228,7 +228,7 @@ get_usage(zfs_help_t idx)
"<filesystem|volume>@<snap>[%<snap>][,...]\n"
"\tdestroy <filesystem|volume>#<bookmark>\n"));
case HELP_GET:
- return (gettext("\tget [-rHp] [-d max] "
+ return (gettext("\tget [-crHp] [-d max] "
"[-o \"all\" | field[,...]]\n"
"\t [-t type[,...]] [-s source[,...]]\n"
"\t <\"all\" | property[,...]> "
@@ -596,7 +596,7 @@ should_auto_mount(zfs_handle_t *zhp)
}
/*
- * zfs clone [-p] [-o prop=value] ... <snap> <fs | vol>
+ * zfs clone [-Fp] [-o prop=value] ... <snap> <fs | vol>
*
* Given an existing dataset, create a writable copy whose initial contents
* are the same as the source. The newly created dataset maintains a
@@ -604,12 +604,18 @@ should_auto_mount(zfs_handle_t *zhp)
* the clone exists.
*
* The '-p' flag creates all the non-existing ancestors of the target first.
+ *
+ * The '-F' flag retries the zfs_mount() operation as long as zfs_mount() is
+ * still returning EBUSY. Any callers which specify -F should be careful to
+ * ensure that no other process has a persistent hold on the mountpoint's
+ * directory.
*/
static int
zfs_do_clone(int argc, char **argv)
{
zfs_handle_t *zhp = NULL;
boolean_t parents = B_FALSE;
+ boolean_t keeptrying = B_FALSE;
nvlist_t *props;
int ret = 0;
int c;
@@ -618,8 +624,11 @@ zfs_do_clone(int argc, char **argv)
nomem();
/* check options */
- while ((c = getopt(argc, argv, "o:p")) != -1) {
+ while ((c = getopt(argc, argv, "Fo:p")) != -1) {
switch (c) {
+ case 'F':
+ keeptrying = B_TRUE;
+ break;
case 'o':
if (parseprop(props, optarg) != 0)
return (1);
@@ -686,11 +695,16 @@ zfs_do_clone(int argc, char **argv)
* step.
*/
if (should_auto_mount(clone)) {
- if ((ret = zfs_mount(clone, NULL, 0)) != 0) {
- (void) fprintf(stderr, gettext("clone "
- "successfully created, "
- "but not mounted\n"));
- } else if ((ret = zfs_share(clone)) != 0) {
+ while ((ret = zfs_mount(clone, NULL, 0)) != 0) {
+ if (!keeptrying || errno != EBUSY) {
+ (void) fprintf(stderr,
+ gettext("clone "
+ "successfully created, "
+ "but not mounted\n"));
+ break;
+ }
+ }
+ if (ret == 0 && (ret = zfs_share(clone)) != 0) {
(void) fprintf(stderr, gettext("clone "
"successfully created, "
"but not shared\n"));
@@ -916,12 +930,13 @@ badusage:
}
/*
- * zfs destroy [-rRf] <fs, vol>
+ * zfs destroy [-rRfF] <fs, vol>
* zfs destroy [-rRd] <snap>
*
* -r Recursively destroy all children
* -R Recursively destroy all dependents, including clones
* -f Force unmounting of any dependents
+ * -F Continue retrying on seeing EBUSY
* -d If we can't destroy now, mark for deferred destruction
*
* Destroys the given dataset. By default, it will unmount any filesystems,
@@ -931,6 +946,7 @@ badusage:
typedef struct destroy_cbdata {
boolean_t cb_first;
boolean_t cb_force;
+ boolean_t cb_wait;
boolean_t cb_recurse;
boolean_t cb_error;
boolean_t cb_doclones;
@@ -1014,13 +1030,18 @@ out:
static int
destroy_callback(zfs_handle_t *zhp, void *data)
{
- destroy_cbdata_t *cb = data;
+ destroy_cbdata_t *cbp = data;
+ struct timespec ts;
+ int err = 0;
+
+ ts.tv_sec = 0;
+ ts.tv_nsec = 500 * (NANOSEC / MILLISEC);
const char *name = zfs_get_name(zhp);
- if (cb->cb_verbose) {
- if (cb->cb_parsable) {
+ if (cbp->cb_verbose) {
+ if (cbp->cb_parsable) {
(void) printf("destroy\t%s\n", name);
- } else if (cb->cb_dryrun) {
+ } else if (cbp->cb_dryrun) {
(void) printf(gettext("would destroy %s\n"),
name);
} else {
@@ -1035,13 +1056,10 @@ destroy_callback(zfs_handle_t *zhp, void *data)
*/
if (strchr(zfs_get_name(zhp), '/') == NULL &&
zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) {
- zfs_close(zhp);
- return (0);
- }
- if (cb->cb_dryrun) {
- zfs_close(zhp);
- return (0);
+ goto out;
}
+ if (cbp->cb_dryrun)
+ goto out;
/*
* We batch up all contiguous snapshots (even of different
@@ -1050,23 +1068,66 @@ destroy_callback(zfs_handle_t *zhp, void *data)
* because we must delete a clone before its origin.
*/
if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT) {
- fnvlist_add_boolean(cb->cb_batchedsnaps, name);
- } else {
- int error = zfs_destroy_snaps_nvl(g_zfs,
- cb->cb_batchedsnaps, B_FALSE);
- fnvlist_free(cb->cb_batchedsnaps);
- cb->cb_batchedsnaps = fnvlist_alloc();
-
- if (error != 0 ||
- zfs_unmount(zhp, NULL, cb->cb_force ? MS_FORCE : 0) != 0 ||
- zfs_destroy(zhp, cb->cb_defer_destroy) != 0) {
- zfs_close(zhp);
- return (-1);
+ fnvlist_add_boolean(cbp->cb_batchedsnaps, name);
+ goto out;
+ }
+
+ if (cbp->cb_wait)
+ libzfs_print_on_error(g_zfs, B_FALSE);
+
+ /*
+ * Unless instructed to retry on EBUSY, bail out on the first error.
+ * When retrying, try every 500ms until either succeeding or seeing a
+ * non-EBUSY error code.
+ */
+ while ((err = zfs_destroy_snaps_nvl(g_zfs,
+ cbp->cb_batchedsnaps, B_FALSE)) != 0) {
+ if (cbp->cb_wait && libzfs_errno(g_zfs) == EZFS_BUSY) {
+ nanosleep(&ts, NULL);
+ continue;
}
+ (void) fprintf(stderr, "%s: %s\n",
+ libzfs_error_action(g_zfs),
+ libzfs_error_description(g_zfs));
+ break;
+ }
+
+ fnvlist_free(cbp->cb_batchedsnaps);
+ cbp->cb_batchedsnaps = fnvlist_alloc();
+
+ if (err != 0)
+ goto out;
+
+ while ((err = zfs_unmount(zhp, NULL,
+ cbp->cb_force ? MS_FORCE : 0)) != 0) {
+ if (cbp->cb_wait && libzfs_errno(g_zfs) == EZFS_BUSY) {
+ (void) nanosleep(&ts, NULL);
+ continue;
+ }
+ (void) fprintf(stderr, "%s: %s\n",
+ libzfs_error_action(g_zfs),
+ libzfs_error_description(g_zfs));
+ break;
+ }
+
+ if (err != 0)
+ goto out;
+
+ while ((err = zfs_destroy(zhp, cbp->cb_defer_destroy)) != 0) {
+ if (cbp->cb_wait && libzfs_errno(g_zfs) == EZFS_BUSY) {
+ (void) nanosleep(&ts, NULL);
+ continue;
+ }
+ (void) fprintf(stderr, "%s: %s\n",
+ libzfs_error_action(g_zfs),
+ libzfs_error_description(g_zfs));
+ break;
}
+out:
+ libzfs_print_on_error(g_zfs, B_TRUE);
zfs_close(zhp);
- return (0);
+ return (err);
}
static int
@@ -1222,7 +1283,7 @@ zfs_do_destroy(int argc, char **argv)
zfs_type_t type = ZFS_TYPE_DATASET;
/* check options */
- while ((c = getopt(argc, argv, "vpndfrR")) != -1) {
+ while ((c = getopt(argc, argv, "vpndfFrR")) != -1) {
switch (c) {
case 'v':
cb.cb_verbose = B_TRUE;
@@ -1241,6 +1302,9 @@ zfs_do_destroy(int argc, char **argv)
case 'f':
cb.cb_force = B_TRUE;
break;
+ case 'F':
+ cb.cb_wait = B_TRUE;
+ break;
case 'r':
cb.cb_recurse = B_TRUE;
break;
@@ -1611,8 +1675,11 @@ zfs_do_get(int argc, char **argv)
cb.cb_type = ZFS_TYPE_DATASET;
/* check options */
- while ((c = getopt(argc, argv, ":d:o:s:rt:Hp")) != -1) {
+ while ((c = getopt(argc, argv, ":d:o:s:rt:Hcp")) != -1) {
switch (c) {
+ case 'c':
+ libzfs_set_cachedprops(g_zfs, B_TRUE);
+ break;
case 'p':
cb.cb_literal = B_TRUE;
break;
@@ -3050,6 +3117,7 @@ zfs_do_list(int argc, char **argv)
int types = ZFS_TYPE_DATASET;
boolean_t types_specified = B_FALSE;
char *fields = NULL;
+ zprop_list_t *pl;
list_cbdata_t cb = { 0 };
char *value;
int limit = 0;
@@ -3168,6 +3236,18 @@ zfs_do_list(int argc, char **argv)
!= 0)
usage(B_FALSE);
+ /*
+ * The default set of properties contains only properties which can be
+ * retrieved from the set of cached properties. If any user-specfied
+ * properties cannot be retrieved from that set, unset the cachedprops
+ * flags on the ZFS handle.
+ */
+ libzfs_set_cachedprops(g_zfs, B_TRUE);
+ for (pl = cb.cb_proplist; pl != NULL; pl = pl->pl_next) {
+ if (zfs_prop_cacheable(pl->pl_prop))
+ libzfs_set_cachedprops(g_zfs, B_FALSE);
+ }
+
cb.cb_first = B_TRUE;
ret = zfs_for_each(argc, argv, flags, types, sortcol, &cb.cb_proplist,
diff --git a/usr/src/cmd/zlogin/zlogin.c b/usr/src/cmd/zlogin/zlogin.c
index 296c32be01..1934ba2595 100644
--- a/usr/src/cmd/zlogin/zlogin.c
+++ b/usr/src/cmd/zlogin/zlogin.c
@@ -22,11 +22,12 @@
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2013 DEY Storage Systems, Inc.
* Copyright (c) 2014 Gary Mills
+ * Copyright 2016 Joyent, Inc.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*/
/*
- * zlogin provides three types of login which allow users in the global
+ * zlogin provides five types of login which allow users in the global
* zone to access non-global zones.
*
* - "interactive login" is similar to rlogin(1); for example, the user could
@@ -42,12 +43,22 @@
* In this mode, zlogin sets up pipes as the communication channel, and
* 'su' is used to do the login setup work.
*
+ * - "interactive command" is a combination of the above two modes where
+ * a command is provide like the non-interactive case, but the -i option is
+ * also provided to make things interactive. For example, the user could
+ * issue 'zlogin -i my-zone /bin/sh'. In this mode neither 'login -c' nor
+ * 'su root -c' is prepended to the command invocation. Because of this
+ * there will be no wtmpx login record within the zone.
+ *
* - "console login" is the equivalent to accessing the tip line for a
* zone. For example, the user can issue 'zlogin -C my-zone'.
* In this mode, zlogin contacts the zoneadmd process via unix domain
* socket. If zoneadmd is not running, it starts it. This allows the
* console to be available anytime the zone is installed, regardless of
* whether it is running.
+ *
+ * - "standalone-processs interactive" is specified with -I and connects to
+ * the zone's stdin, stdout and stderr zfd(7D) devices.
*/
#include <sys/socket.h>
@@ -92,7 +103,8 @@
#include <auth_attr.h>
#include <secdb.h>
-static int masterfd;
+static int masterfd = -1;
+static int ctlfd = -1;
static struct termios save_termios;
static struct termios effective_termios;
static int save_fd;
@@ -101,12 +113,13 @@ static volatile int dead;
static volatile pid_t child_pid = -1;
static int interactive = 0;
static priv_set_t *dropprivs;
+static unsigned int connect_flags = 0;
static int nocmdchar = 0;
static int failsafe = 0;
-static int disconnect = 0;
static char cmdchar = '~';
static int quiet = 0;
+static char zonebrand[MAXNAMELEN];
static int pollerr = 0;
@@ -123,10 +136,14 @@ static boolean_t forced_login = B_FALSE;
#define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
#endif
-#define SUPATH "/usr/bin/su"
+#define SUPATH1 "/usr/bin/su"
+#define SUPATH2 "/bin/su"
#define FAILSAFESHELL "/sbin/sh"
#define DEFAULTSHELL "/sbin/sh"
#define DEF_PATH "/usr/sbin:/usr/bin"
+#define LX_DEF_PATH "/bin:/usr/sbin:/usr/bin"
+
+#define MAX_RETRY 30
#define CLUSTER_BRAND_NAME "cluster"
@@ -153,7 +170,7 @@ static boolean_t forced_login = B_FALSE;
static void
usage(void)
{
- (void) fprintf(stderr, gettext("usage: %s [ -dnQCES ] [ -e cmdchar ] "
+ (void) fprintf(stderr, gettext("usage: %s [-dinCEINQS] [-e cmdchar] "
"[-l user] zonename [command [args ...] ]\n"), pname);
exit(2);
}
@@ -248,57 +265,60 @@ postfork_dropprivs()
}
}
-/*
- * Create the unix domain socket and call the zoneadmd server; handshake
- * with it to determine whether it will allow us to connect.
- */
static int
-get_console_master(const char *zname)
+connect_zone_sock(const char *zname, const char *suffix, boolean_t verbose)
{
int sockfd = -1;
struct sockaddr_un servaddr;
- char clientid[MAXPATHLEN];
- char handshake[MAXPATHLEN], c;
- int msglen;
- int i = 0, err = 0;
if ((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
- zperror(gettext("could not create socket"));
+ if (verbose)
+ zperror(gettext("could not create socket"));
return (-1);
}
bzero(&servaddr, sizeof (servaddr));
servaddr.sun_family = AF_UNIX;
(void) snprintf(servaddr.sun_path, sizeof (servaddr.sun_path),
- "%s/%s.console_sock", ZONES_TMPDIR, zname);
-
+ "%s/%s.%s", ZONES_TMPDIR, zname, suffix);
if (connect(sockfd, (struct sockaddr *)&servaddr,
sizeof (servaddr)) == -1) {
- zperror(gettext("Could not connect to zone console"));
- goto bad;
+ if (verbose)
+ zperror(gettext("Could not connect to zone"));
+ close(sockfd);
+ return (-1);
}
- masterfd = sockfd;
+ return (sockfd);
+}
- msglen = snprintf(clientid, sizeof (clientid), "IDENT %lu %s %d\n",
- getpid(), setlocale(LC_MESSAGES, NULL), disconnect);
+
+static int
+handshake_zone_sock(int sockfd, unsigned int flags)
+{
+ char clientid[MAXPATHLEN];
+ char handshake[MAXPATHLEN], c;
+ int msglen;
+ int i = 0, err = 0;
+
+ msglen = snprintf(clientid, sizeof (clientid), "IDENT %s %u\n",
+ setlocale(LC_MESSAGES, NULL), flags);
if (msglen >= sizeof (clientid) || msglen < 0) {
zerror("protocol error");
- goto bad;
+ return (-1);
}
- if (write(masterfd, clientid, msglen) != msglen) {
+ if (write(sockfd, clientid, msglen) != msglen) {
zerror("protocol error");
- goto bad;
+ return (-1);
}
- bzero(handshake, sizeof (handshake));
-
/*
* Take care not to accumulate more than our fill, and leave room for
* the NUL at the end.
*/
- while ((err = read(masterfd, &c, 1)) == 1) {
+ bzero(handshake, sizeof (handshake));
+ while ((err = read(sockfd, &c, 1)) == 1) {
if (i >= (sizeof (handshake) - 1))
break;
if (c == '\n')
@@ -308,26 +328,48 @@ get_console_master(const char *zname)
}
/*
- * If something went wrong during the handshake we bail; perhaps
- * the server died off.
+ * If something went wrong during the handshake we bail.
+ * Perhaps the server died off.
*/
if (err == -1) {
- zperror(gettext("Could not connect to zone console"));
- goto bad;
+ zperror(gettext("Could not connect to zone"));
+ return (-1);
}
- if (strncmp(handshake, "OK", sizeof (handshake)) == 0)
- return (0);
+ if (strncmp(handshake, "OK", sizeof (handshake)) != 0) {
+ zerror(gettext("Zone is already in use by process ID %s."),
+ handshake);
+ return (-1);
+ }
- zerror(gettext("Console is already in use by process ID %s."),
- handshake);
-bad:
- (void) close(sockfd);
- masterfd = -1;
- return (-1);
+ return (0);
}
-
+static int
+send_ctl_sock(const char *buf, size_t len)
+{
+ char rbuf[BUFSIZ];
+ int i;
+ if (ctlfd == -1) {
+ return (-1);
+ }
+ if (write(ctlfd, buf, len) != len) {
+ return (-1);
+ }
+ /* read the response */
+ for (i = 0; i < (BUFSIZ - 1); i++) {
+ char c;
+ if (read(ctlfd, &c, 1) != 1 || c == '\n' || c == '\0') {
+ break;
+ }
+ rbuf[i] = c;
+ }
+ rbuf[i+1] = '\0';
+ if (strncmp("OK", rbuf, BUFSIZ) != 0) {
+ return (-1);
+ }
+ return (0);
+}
/*
* Routines to handle pty creation upon zone entry and to shuttle I/O back
* and forth between the two terminals. We also compute and store the
@@ -516,8 +558,32 @@ sigwinch(int s)
{
struct winsize ws;
- if (ioctl(0, TIOCGWINSZ, &ws) == 0)
- (void) ioctl(masterfd, TIOCSWINSZ, &ws);
+ if (ioctl(0, TIOCGWINSZ, &ws) == 0) {
+ if (ctlfd != -1) {
+ char buf[BUFSIZ];
+ snprintf(buf, sizeof (buf), "TIOCSWINSZ %hu %hu\n",
+ ws.ws_row, ws.ws_col);
+ (void) send_ctl_sock(buf, strlen(buf));
+ } else {
+ (void) ioctl(masterfd, TIOCSWINSZ, &ws);
+ }
+ }
+}
+
+/*
+ * Toggle zfd EOF mode and notify zoneadmd
+ */
+/*ARGSUSED*/
+static void
+sigusr1(int s)
+{
+ connect_flags ^= ZLOGIN_ZFD_EOF;
+ if (ctlfd != -1) {
+ char buf[BUFSIZ];
+ snprintf(buf, sizeof (buf), "SETFLAGS %u\n",
+ connect_flags);
+ (void) send_ctl_sock(buf, strlen(buf));
+ }
}
static volatile int close_on_sig = -1;
@@ -862,28 +928,28 @@ doio(int stdin_fd, int appin_fd, int stdout_fd, int stderr_fd, int sig_fd,
break;
}
- /* event from master side stdout */
- if (pollfds[0].revents) {
- if (pollfds[0].revents &
+ /* event from master side stderr */
+ if (pollfds[1].revents) {
+ if (pollfds[1].revents &
(POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
- if (process_output(stdout_fd, STDOUT_FILENO)
+ if (process_output(stderr_fd, STDERR_FILENO)
!= 0)
break;
} else {
- pollerr = pollfds[0].revents;
+ pollerr = pollfds[1].revents;
break;
}
}
- /* event from master side stderr */
- if (pollfds[1].revents) {
- if (pollfds[1].revents &
+ /* event from master side stdout */
+ if (pollfds[0].revents) {
+ if (pollfds[0].revents &
(POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
- if (process_output(stderr_fd, STDERR_FILENO)
+ if (process_output(stdout_fd, STDOUT_FILENO)
!= 0)
break;
} else {
- pollerr = pollfds[1].revents;
+ pollerr = pollfds[0].revents;
break;
}
}
@@ -1053,7 +1119,7 @@ zone_login_cmd(brand_handle_t bh, const char *login)
* but we're going to be very simplistic about it and break stuff
* up based on spaces. We're not even going to support any kind
* of quoting or escape characters. It's truly amazing that
- * there is no library function in OpenSolaris to do this for us.
+ * there is no library function in Illumos to do this for us.
*/
/*
@@ -1092,69 +1158,155 @@ zone_login_cmd(brand_handle_t bh, const char *login)
}
/*
- * Prepare argv array for exec'd process; if we're passing commands to the
- * new process, then use su(1M) to do the invocation. Otherwise, use
+ * Prepare argv array for exec'd process. If commands are passed to the new
+ * process and su(1M) is avalable, use it for the invocation. Otherwise, use
* 'login -z <from_zonename> -f' (-z is an undocumented option which tells
* login that we're coming from another zone, and to disregard its CONSOLE
* checks).
*/
static char **
-prep_args(brand_handle_t bh, const char *login, char **argv)
+prep_args(brand_handle_t bh, char *zonename, const char *login, char **argv)
{
- int argc = 0, a = 0, i, n = -1;
- char **new_argv;
+ int argc = 0, i;
+ size_t subshell_len = 1;
+ char *subshell = NULL, *supath = NULL;
+ char **new_argv = NULL;
- if (argv != NULL) {
- size_t subshell_len = 1;
- char *subshell;
+ if (argv == NULL) {
+ if (failsafe) {
+ if ((new_argv = malloc(sizeof (char *) * 2)) == NULL)
+ return (NULL);
+ new_argv[0] = FAILSAFESHELL;
+ new_argv[1] = NULL;
+ } else {
+ new_argv = zone_login_cmd(bh, login);
+ }
+ return (new_argv);
+ }
- while (argv[argc] != NULL)
- argc++;
+ /*
+ * Attempt to locate a 'su' binary if not using the failsafe shell.
+ */
+ if (!failsafe) {
+ struct stat sb;
+ char zonepath[MAXPATHLEN];
+ char supath_check[MAXPATHLEN];
+
+ if (zone_get_zonepath(zonename, zonepath,
+ sizeof (zonepath)) != Z_OK) {
+ zerror(gettext("unable to determine zone "
+ "path"));
+ return (NULL);
+ }
- for (i = 0; i < argc; i++) {
- subshell_len += strlen(argv[i]) + 1;
+ (void) snprintf(supath_check, sizeof (supath), "%s/root/%s",
+ zonepath, SUPATH1);
+ if (stat(supath_check, &sb) == 0) {
+ supath = SUPATH1;
+ } else {
+ (void) snprintf(supath_check, sizeof (supath_check),
+ "%s/root/%s", zonepath, SUPATH2);
+ if (stat(supath_check, &sb) == 0) {
+ supath = SUPATH2;
+ }
}
- if ((subshell = calloc(1, subshell_len)) == NULL)
+ }
+
+ /*
+ * With no failsafe shell or supath to wrap the incoming command, the
+ * arguments are passed straight through.
+ */
+ if (!failsafe && supath == NULL) {
+ /*
+ * Such an outcome is not acceptable, however, if the caller
+ * expressed a desire to switch users.
+ */
+ if (strcmp(login, "root") != 0) {
+ zerror(gettext("unable to find 'su' command"));
return (NULL);
+ }
+ return (argv);
+ }
- for (i = 0; i < argc; i++) {
- (void) strcat(subshell, argv[i]);
+ /*
+ * Inventory arguments and allocate a buffer to escape them for the
+ * subshell.
+ */
+ while (argv[argc] != NULL) {
+ /*
+ * Allocate enough space for the delimiter and 2
+ * quotes which might be needed.
+ */
+ subshell_len += strlen(argv[argc]) + 3;
+ argc++;
+ }
+ if ((subshell = calloc(1, subshell_len)) == NULL) {
+ return (NULL);
+ }
+
+ /*
+ * The handling of quotes in the following block may seem unusual, but
+ * it is done this way for backward compatibility.
+ * When running a command, zlogin is documented as:
+ * zlogin zonename command args
+ * However, some code has come to depend on the following usage:
+ * zlogin zonename 'command args'
+ * This relied on the fact that the single argument would be re-parsed
+ * within the zone and excuted as a command with an argument. To remain
+ * compatible with this (incorrect) usage, if there is only a single
+ * argument, it is not quoted, even if it has embedded spaces.
+ *
+ * Here are two examples which both need to work:
+ * 1) zlogin foo 'echo hello'
+ * This has a single argv member with a space in it but will not be
+ * quoted on the command passed into the zone.
+ * 2) zlogin foo bash -c 'echo hello'
+ * This has 3 argv members. The 3rd arg has a space and must be
+ * quoted on the command passed into the zone.
+ */
+ for (i = 0; i < argc; i++) {
+ if (i > 0)
(void) strcat(subshell, " ");
+
+ if (argc > 1 && (strchr(argv[i], ' ') != NULL ||
+ strchr(argv[i], '\t') != NULL)) {
+ (void) strcat(subshell, "'");
+ (void) strcat(subshell, argv[i]);
+ (void) strcat(subshell, "'");
+ } else {
+ (void) strcat(subshell, argv[i]);
}
+ }
- if (failsafe) {
- n = 4;
- if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
- return (NULL);
+ if (failsafe) {
+ int a = 0, n = 4;
- new_argv[a++] = FAILSAFESHELL;
- } else {
- n = 5;
- if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
- return (NULL);
+ if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
+ return (NULL);
- new_argv[a++] = SUPATH;
- if (strcmp(login, "root") != 0) {
- new_argv[a++] = "-";
- n++;
- }
- new_argv[a++] = (char *)login;
- }
+ new_argv[a++] = FAILSAFESHELL;
new_argv[a++] = "-c";
new_argv[a++] = subshell;
new_argv[a++] = NULL;
assert(a == n);
} else {
- if (failsafe) {
- n = 2;
- if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
- return (NULL);
- new_argv[a++] = FAILSAFESHELL;
- new_argv[a++] = NULL;
- assert(n == a);
+ int a = 0, n = 6;
+
+ assert(supath != NULL);
+ if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
+ return (NULL);
+
+ new_argv[a++] = supath;
+ if (strcmp(login, "root") != 0) {
+ new_argv[a++] = "-";
} else {
- new_argv = zone_login_cmd(bh, login);
+ n--;
}
+ new_argv[a++] = (char *)login;
+ new_argv[a++] = "-c";
+ new_argv[a++] = subshell;
+ new_argv[a++] = NULL;
+ assert(a == n);
}
return (new_argv);
@@ -1185,6 +1337,7 @@ prep_env()
int e = 0, size = 1;
char **new_env, *estr;
char *term = getenv("TERM");
+ char *path;
size++; /* for $PATH */
if (term != NULL)
@@ -1201,7 +1354,12 @@ prep_env()
if ((new_env = malloc(sizeof (char *) * size)) == NULL)
return (NULL);
- if ((estr = add_env("PATH", DEF_PATH)) == NULL)
+ if (strcmp(zonebrand, "lx") == 0)
+ path = LX_DEF_PATH;
+ else
+ path = DEF_PATH;
+
+ if ((estr = add_env("PATH", path)) == NULL)
return (NULL);
new_env[e++] = estr;
@@ -1723,24 +1881,61 @@ get_username()
return (nptr->pw_name);
}
+static boolean_t
+zlog_mode_logging(char *zonename, boolean_t *found)
+{
+ boolean_t lm = B_FALSE;
+ zone_dochandle_t handle;
+ struct zone_attrtab attr;
+
+ *found = B_FALSE;
+ if ((handle = zonecfg_init_handle()) == NULL)
+ return (lm);
+
+ if (zonecfg_get_handle(zonename, handle) != Z_OK)
+ goto done;
+
+ if (zonecfg_setattrent(handle) != Z_OK)
+ goto done;
+ while (zonecfg_getattrent(handle, &attr) == Z_OK) {
+ if (strcmp("zlog-mode", attr.zone_attr_name) == 0) {
+ int len = strlen(attr.zone_attr_value);
+
+ *found = B_TRUE;
+ if (strncmp("log", attr.zone_attr_value, 3) == 0 ||
+ strncmp("nolog", attr.zone_attr_value, 5) == 0 ||
+ (len >= 3 && attr.zone_attr_value[len - 2] == '-'))
+ lm = B_TRUE;
+ break;
+ }
+ }
+ (void) zonecfg_endattrent(handle);
+
+done:
+ zonecfg_fini_handle(handle);
+ return (lm);
+}
+
int
main(int argc, char **argv)
{
- int arg, console = 0;
+ int arg, console = 0, imode = 0;
+ int estatus = 0;
zoneid_t zoneid;
zone_state_t st;
char *login = "root";
+ int iflag = 0;
int lflag = 0;
int nflag = 0;
char *zonename = NULL;
char **proc_args = NULL;
char **new_args, **new_env;
sigset_t block_cld;
+ siginfo_t si;
char devroot[MAXPATHLEN];
char *slavename, slaveshortname[MAXPATHLEN];
priv_set_t *privset;
int tmpl_fd;
- char zonebrand[MAXNAMELEN];
char default_brand[MAXNAMELEN];
struct stat sb;
char kernzone[ZONENAME_MAX];
@@ -1754,7 +1949,7 @@ main(int argc, char **argv)
(void) getpname(argv[0]);
username = get_username();
- while ((arg = getopt(argc, argv, "dnECR:Se:l:Q")) != EOF) {
+ while ((arg = getopt(argc, argv, "diNnECIR:Se:l:Q")) != EOF) {
switch (arg) {
case 'C':
console = 1;
@@ -1762,6 +1957,16 @@ main(int argc, char **argv)
case 'E':
nocmdchar = 1;
break;
+ case 'I':
+ /*
+ * interactive mode is just a slight variation on the
+ * console mode.
+ */
+ console = 1;
+ imode = 1;
+ /* The default is HUP, disconnect on EOF */
+ connect_flags ^= ZLOGIN_ZFD_EOF;
+ break;
case 'R': /* undocumented */
if (*optarg != '/') {
zerror(gettext("root path must be absolute."));
@@ -1781,15 +1986,22 @@ main(int argc, char **argv)
failsafe = 1;
break;
case 'd':
- disconnect = 1;
+ connect_flags |= ZLOGIN_DISCONNECT;
break;
case 'e':
set_cmdchar(optarg);
break;
+ case 'i':
+ iflag = 1;
+ break;
case 'l':
login = optarg;
lflag = 1;
break;
+ case 'N':
+ /* NOHUP - do not send EOF */
+ connect_flags ^= ZLOGIN_ZFD_EOF;
+ break;
case 'n':
nflag = 1;
break;
@@ -1800,6 +2012,12 @@ main(int argc, char **argv)
if (console != 0) {
+ /*
+ * The only connect option in console mode is ZLOGIN_DISCONNECT
+ */
+ if (imode == 0)
+ connect_flags &= ZLOGIN_DISCONNECT;
+
if (lflag != 0) {
zerror(gettext(
"-l may not be specified for console login"));
@@ -1826,17 +2044,27 @@ main(int argc, char **argv)
}
+ if (iflag != 0 && nflag != 0) {
+ zerror(gettext("-i and -n flags are incompatible"));
+ usage();
+ }
+
if (failsafe != 0 && lflag != 0) {
zerror(gettext("-l may not be specified for failsafe login"));
usage();
}
- if (!console && disconnect != 0) {
+ if (!console && (connect_flags & ZLOGIN_DISCONNECT) != 0) {
zerror(gettext(
"-d may only be specified with console login"));
usage();
}
+ if (imode == 0 && (connect_flags & ZLOGIN_ZFD_EOF) != 0) {
+ zerror(gettext("-N may only be specified with -I"));
+ usage();
+ }
+
if (optind == (argc - 1)) {
/*
* zone name, no process name; this should be an interactive
@@ -1859,7 +2087,8 @@ main(int argc, char **argv)
/* zone name and process name, and possibly some args */
zonename = argv[optind];
proc_args = &argv[optind + 1];
- interactive = 0;
+ if (iflag && isatty(STDIN_FILENO))
+ interactive = 1;
} else {
usage();
}
@@ -1945,10 +2174,31 @@ main(int argc, char **argv)
}
/*
- * The console is a separate case from the rest of the code; handle
- * it first.
+ * The console (or standalong interactive mode) is a separate case from
+ * the rest of the code; handle it first.
*/
if (console) {
+ int gz_stderr_fd = -1;
+ int retry;
+ boolean_t set_raw = B_TRUE;
+
+ if (imode) {
+ boolean_t has_zfd_config;
+
+ if (zlog_mode_logging(zonename, &has_zfd_config))
+ set_raw = B_FALSE;
+
+ /*
+ * Asked for standalone interactive mode but the
+ * zlog-mode attribute is not configured on the zone.
+ */
+ if (!has_zfd_config) {
+ zerror(gettext("'%s' is not configured on "
+ "the zone"), "zlog-mode");
+ return (1);
+ }
+ }
+
/*
* Ensure that zoneadmd for this zone is running.
*/
@@ -1957,16 +2207,56 @@ main(int argc, char **argv)
/*
* Make contact with zoneadmd.
+ *
+ * Handshake with the control socket first. We handle retries
+ * here since the relevant thread in zoneadmd might not have
+ * finished setting up yet.
*/
- if (get_console_master(zonename) == -1)
+ for (retry = 0; retry < MAX_RETRY; retry++) {
+ masterfd = connect_zone_sock(zonename,
+ (imode ? "server_ctl" : "console_sock"), B_FALSE);
+ if (masterfd != -1)
+ break;
+ sleep(1);
+ }
+
+ if (retry == MAX_RETRY) {
+ zerror(gettext("unable to connect for %d seconds"),
+ MAX_RETRY);
return (1);
+ }
- if (!quiet)
- (void) printf(
- gettext("[Connected to zone '%s' console]\n"),
- zonename);
+ if (handshake_zone_sock(masterfd, connect_flags) != 0) {
+ (void) close(masterfd);
+ return (1);
+ }
+
+ if (imode) {
+ ctlfd = masterfd;
+
+ /* Now open the io-related sockets */
+ masterfd = connect_zone_sock(zonename, "server_out",
+ B_TRUE);
+ gz_stderr_fd = connect_zone_sock(zonename,
+ "server_err", B_TRUE);
+ if (masterfd == -1 || gz_stderr_fd == -1) {
+ (void) close(ctlfd);
+ (void) close(masterfd);
+ (void) close(gz_stderr_fd);
+ return (1);
+ }
+ }
- if (set_tty_rawmode(STDIN_FILENO) == -1) {
+ if (!quiet) {
+ if (imode)
+ (void) printf(gettext("[Connected to zone '%s' "
+ "interactively]\n"), zonename);
+ else
+ (void) printf(gettext("[Connected to zone '%s' "
+ "console]\n"), zonename);
+ }
+
+ if (set_raw && set_tty_rawmode(STDIN_FILENO) == -1) {
reset_tty();
zperror(gettext("failed to set stdin pty to raw mode"));
return (1);
@@ -1975,15 +2265,25 @@ main(int argc, char **argv)
(void) sigset(SIGWINCH, sigwinch);
(void) sigwinch(0);
+ if (imode) {
+ /* Allow EOF mode toggling via SIGUSR1 */
+ (void) sigset(SIGUSR1, sigusr1);
+ }
+
/*
* Run the I/O loop until we get disconnected.
*/
- doio(masterfd, -1, masterfd, -1, -1, B_FALSE);
+ doio(masterfd, -1, masterfd, gz_stderr_fd, -1, B_FALSE);
reset_tty();
- if (!quiet)
- (void) printf(
- gettext("\n[Connection to zone '%s' console "
- "closed]\n"), zonename);
+ if (!quiet) {
+ if (imode)
+ (void) printf(gettext("\n[Interactive "
+ "connection to zone '%s' closed]\n"),
+ zonename);
+ else
+ (void) printf(gettext("\n[Connection to zone "
+ "'%s' console closed]\n"), zonename);
+ }
return (0);
}
@@ -2051,11 +2351,23 @@ main(int argc, char **argv)
return (1);
}
- if ((new_args = prep_args(bh, login, proc_args)) == NULL) {
- zperror(gettext("could not assemble new arguments"));
- brand_close(bh);
- return (1);
+ /*
+ * The 'interactive' parameter (-i option) indicates that we're running
+ * a command interactively. In this case we skip prep_args so that we
+ * don't prepend the 'su root -c' preamble to the command invocation
+ * since the 'su' command typically will execute a setpgrp which will
+ * disassociate the actual command from the controlling terminal that
+ * we (zlogin) setup.
+ */
+ if (!iflag) {
+ if ((new_args = prep_args(bh, zonename, login, proc_args))
+ == NULL) {
+ zperror(gettext("could not assemble new arguments"));
+ brand_close(bh);
+ return (1);
+ }
}
+
/*
* Get the brand specific user_cmd. This command is used to get
* a passwd(4) entry for login.
@@ -2201,6 +2513,8 @@ main(int argc, char **argv)
return (1);
}
+ /* Note: we're now inside the zone, can't use gettext anymore */
+
if (slavefd != STDERR_FILENO)
(void) close(STDERR_FILENO);
@@ -2242,8 +2556,18 @@ main(int argc, char **argv)
/*
* In failsafe mode, we don't use login(1), so don't try
* setting up a utmpx entry.
+ *
+ * A branded zone may have very different utmpx semantics.
+ * At the moment, we only have two brand types:
+ * Illumos-like (native, sn1) and Linux. In the Illumos
+ * case, we know exactly how to do the necessary utmpx
+ * setup. Fortunately for us, the Linux /bin/login is
+ * prepared to deal with a non-initialized utmpx entry, so
+ * we can simply skip it. If future brands don't fall into
+ * either category, we'll have to add a per-brand utmpx
+ * setup hook.
*/
- if (!failsafe)
+ if (!failsafe && (strcmp(zonebrand, "lx") != 0))
if (setup_utmpx(slaveshortname) == -1)
return (1);
@@ -2252,13 +2576,17 @@ main(int argc, char **argv)
* execute the brand's login program.
*/
if (setuid(0) == -1) {
- zperror(gettext("insufficient privilege"));
+ zperror("insufficient privilege");
return (1);
}
- (void) execve(new_args[0], new_args, new_env);
- zperror(gettext("exec failure"));
- return (1);
+ if (iflag) {
+ (void) execve(proc_args[0], proc_args, new_env);
+ } else {
+ (void) execve(new_args[0], new_args, new_env);
+ }
+ zperror("exec failure");
+ return (ENOEXEC);
}
(void) ct_tmpl_clear(tmpl_fd);
@@ -2283,8 +2611,19 @@ main(int argc, char **argv)
if (pollerr != 0) {
(void) fprintf(stderr, gettext("Error: connection closed due "
"to unexpected pollevents=0x%x.\n"), pollerr);
- return (1);
+ return (EPIPE);
}
- return (0);
+ /* reap child and get its status */
+ if (waitid(P_PID, child_pid, &si, WEXITED | WNOHANG) == -1) {
+ estatus = errno;
+ } else if (si.si_pid == 0) {
+ estatus = ECHILD;
+ } else if (si.si_code == CLD_EXITED) {
+ estatus = si.si_status;
+ } else {
+ estatus = ECONNABORTED;
+ }
+
+ return (estatus);
}
diff --git a/usr/src/cmd/zoneadm/Makefile b/usr/src/cmd/zoneadm/Makefile
index 2b01078aec..fba7809c71 100644
--- a/usr/src/cmd/zoneadm/Makefile
+++ b/usr/src/cmd/zoneadm/Makefile
@@ -21,14 +21,18 @@
#
# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, Joyent, Inc. All rights reserved.
#
PROG= zoneadm
+SCRIPTS=
MANIFEST= zones.xml resource-mgmt.xml
SVCMETHOD= svc-zones svc-resource-mgmt
include ../Makefile.cmd
+include ../Makefile.ctf
+ROOTUSRSBINSCRIPTS= $(SCRIPTS:%=$(ROOTUSRSBIN)/%)
ROOTMANIFESTDIR= $(ROOTSVCSYSTEM)
OBJS= zoneadm.o zfs.o
@@ -42,13 +46,14 @@ CERRWARN += -_gcc=-Wno-uninitialized
.KEEP_STATE:
-all: $(PROG)
+all: $(PROG) $(SCRIPTS)
$(PROG): $(OBJS)
$(LINK.c) -o $@ $(OBJS) $(LDLIBS)
$(POST_PROCESS)
-install: all $(ROOTUSRSBINPROG) $(ROOTMANIFEST) $(ROOTSVCMETHOD)
+install: all $(ROOTUSRSBINPROG) $(ROOTUSRSBINSCRIPTS) $(ROOTMANIFEST) \
+ $(ROOTSVCMETHOD)
check: $(PROG).c $(CHKMANIFEST)
$(CSTYLE) -pP $(SRCS:%=%)
@@ -58,7 +63,7 @@ $(POFILE): $(POFILES)
$(CAT) $(POFILES) > $@
clean:
- $(RM) $(OBJS) $(POFILES)
+ $(RM) $(OBJS) $(POFILES) $(SCRIPTS)
lint: lint_SRCS
diff --git a/usr/src/cmd/zoneadm/svc-zones b/usr/src/cmd/zoneadm/svc-zones
index 9d307835bd..30d54f5272 100644
--- a/usr/src/cmd/zoneadm/svc-zones
+++ b/usr/src/cmd/zoneadm/svc-zones
@@ -32,7 +32,7 @@
shutdown_zones()
{
zoneadm list -p | nawk -F: '{
- if ($2 != "global") {
+ if (($5 != "lx") && ($2 != "global")) {
print $2
}
}'
diff --git a/usr/src/cmd/zoneadm/zfs.c b/usr/src/cmd/zoneadm/zfs.c
index 2041d0f4b5..a318a06dfb 100644
--- a/usr/src/cmd/zoneadm/zfs.c
+++ b/usr/src/cmd/zoneadm/zfs.c
@@ -22,7 +22,7 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2016 Martin Matuska. All rights reserved.
*/
@@ -968,6 +968,7 @@ create_zfs_zonepath(char *zonepath)
zfs_handle_t *zhp;
char zfs_name[MAXPATHLEN];
nvlist_t *props = NULL;
+ int i;
if (path2name(zonepath, zfs_name, sizeof (zfs_name)) != Z_OK)
return;
@@ -1003,9 +1004,20 @@ create_zfs_zonepath(char *zonepath)
nvlist_free(props);
- if (zfs_mount(zhp, NULL, 0) != 0) {
+ /*
+ * A monitoring tool might race with us and touch the mountpoint just
+ * as we're trying to mount, blocking the mount. We wait and retry a
+ * few times to workaround this race.
+ */
+ for (i = 0; i < 5; i++) {
+ if (zfs_mount(zhp, NULL, 0) == 0)
+ break;
(void) fprintf(stderr, gettext("cannot mount ZFS dataset %s: "
"%s\n"), zfs_name, libzfs_error_description(g_zfs));
+ (void) sleep(1);
+ }
+
+ if (i >= 5) {
(void) zfs_destroy(zhp, B_FALSE);
} else {
if (chmod(zonepath, S_IRWXU) != 0) {
diff --git a/usr/src/cmd/zoneadm/zoneadm.c b/usr/src/cmd/zoneadm/zoneadm.c
index 21bc9248f4..312159cabd 100644
--- a/usr/src/cmd/zoneadm/zoneadm.c
+++ b/usr/src/cmd/zoneadm/zoneadm.c
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2015, Joyent Inc. All rights reserved.
* Copyright (c) 2015 by Delphix. All rights reserved.
*/
@@ -101,13 +102,11 @@ typedef struct zone_entry {
char zroot[MAXPATHLEN];
char zuuid[UUID_PRINTABLE_STRING_LENGTH];
zone_iptype_t ziptype;
+ zoneid_t zdid;
} zone_entry_t;
#define CLUSTER_BRAND_NAME "cluster"
-static zone_entry_t *zents;
-static size_t nzents;
-
#define LOOPBACK_IF "lo0"
#define SOCKET_AF(af) (((af) == AF_UNSPEC) ? AF_INET : (af))
@@ -406,19 +405,6 @@ zerror(const char *fmt, ...)
va_end(alist);
}
-static void *
-safe_calloc(size_t nelem, size_t elsize)
-{
- void *r = calloc(nelem, elsize);
-
- if (r == NULL) {
- zerror(gettext("failed to allocate %lu bytes: %s"),
- (ulong_t)nelem * elsize, strerror(errno));
- exit(Z_ERR);
- }
- return (r);
-}
-
static void
zone_print(zone_entry_t *zent, boolean_t verbose, boolean_t parsable)
{
@@ -443,6 +429,7 @@ zone_print(zone_entry_t *zent, boolean_t verbose, boolean_t parsable)
}
if (!verbose) {
char *cp, *clim;
+ char zdid[80];
if (!parsable) {
(void) printf("%s\n", zent->zname);
@@ -458,8 +445,12 @@ zone_print(zone_entry_t *zent, boolean_t verbose, boolean_t parsable)
(void) printf("%.*s\\:", clim - cp, cp);
cp = clim + 1;
}
- (void) printf("%s:%s:%s:%s\n", cp, zent->zuuid, zent->zbrand,
- ip_type_str);
+ if (zent->zdid == -1)
+ zdid[0] = '\0';
+ else
+ (void) snprintf(zdid, sizeof (zdid), "%d", zent->zdid);
+ (void) printf("%s:%s:%s:%s:%s\n", cp, zent->zuuid, zent->zbrand,
+ ip_type_str, zdid);
return;
}
if (zent->zstate_str != NULL) {
@@ -485,6 +476,9 @@ lookup_zone_info(const char *zone_name, zoneid_t zid, zone_entry_t *zent)
(void) strlcpy(zent->zbrand, "???", sizeof (zent->zbrand));
zent->zstate_str = "???";
+ if (strcmp(zone_name, GLOBAL_ZONENAME) == 0)
+ zid = zent->zdid = GLOBAL_ZONEID;
+
zent->zid = zid;
if (zonecfg_get_uuid(zone_name, uuid) == Z_OK &&
@@ -529,8 +523,8 @@ lookup_zone_info(const char *zone_name, zoneid_t zid, zone_entry_t *zent)
zent->zstate_str = zone_state_str(zent->zstate_num);
/*
- * A zone's brand is only available in the .xml file describing it,
- * which is only visible to the global zone. This causes
+ * A zone's brand might only be available in the .xml file describing
+ * it, which is only visible to the global zone. This causes
* zone_get_brand() to fail when called from within a non-global
* zone. Fortunately we only do this on labeled systems, where we
* know all zones are native.
@@ -554,6 +548,22 @@ lookup_zone_info(const char *zone_name, zoneid_t zid, zone_entry_t *zent)
return (Z_OK);
}
+ if ((handle = zonecfg_init_handle()) == NULL) {
+ zperror2(zent->zname, gettext("could not init handle"));
+ return (Z_ERR);
+ }
+ if ((err = zonecfg_get_handle(zent->zname, handle)) != Z_OK) {
+ zperror2(zent->zname, gettext("could not get handle"));
+ zonecfg_fini_handle(handle);
+ return (Z_ERR);
+ }
+
+ if ((err = zonecfg_get_iptype(handle, &zent->ziptype)) != Z_OK) {
+ zperror2(zent->zname, gettext("could not get ip-type"));
+ zonecfg_fini_handle(handle);
+ return (Z_ERR);
+ }
+
/*
* There is a race condition where the zone could boot while
* we're walking the index file. In this case the zone state
@@ -574,189 +584,73 @@ lookup_zone_info(const char *zone_name, zoneid_t zid, zone_entry_t *zent)
zent->ziptype = ZS_EXCLUSIVE;
else
zent->ziptype = ZS_SHARED;
- return (Z_OK);
}
}
- if ((handle = zonecfg_init_handle()) == NULL) {
- zperror2(zent->zname, gettext("could not init handle"));
- return (Z_ERR);
- }
- if ((err = zonecfg_get_handle(zent->zname, handle)) != Z_OK) {
- zperror2(zent->zname, gettext("could not get handle"));
- zonecfg_fini_handle(handle);
- return (Z_ERR);
- }
+ zent->zdid = zonecfg_get_did(handle);
- if ((err = zonecfg_get_iptype(handle, &zent->ziptype)) != Z_OK) {
- zperror2(zent->zname, gettext("could not get ip-type"));
- zonecfg_fini_handle(handle);
- return (Z_ERR);
- }
zonecfg_fini_handle(handle);
return (Z_OK);
}
-/*
- * fetch_zents() calls zone_list(2) to find out how many zones are running
- * (which is stored in the global nzents), then calls zone_list(2) again
- * to fetch the list of running zones (stored in the global zents). This
- * function may be called multiple times, so if zents is already set, we
- * return immediately to save work.
- *
- * Note that the data about running zones can change while this function
- * is running, so its possible that the list of zones will have empty slots
- * at the end.
- */
-
-static int
-fetch_zents(void)
-{
- zoneid_t *zids = NULL;
- uint_t nzents_saved;
- int i, retv;
- FILE *fp;
- boolean_t inaltroot;
- zone_entry_t *zentp;
- const char *altroot;
-
- if (nzents > 0)
- return (Z_OK);
-
- if (zone_list(NULL, &nzents) != 0) {
- zperror(gettext("failed to get zoneid list"), B_FALSE);
- return (Z_ERR);
- }
-
-again:
- if (nzents == 0)
- return (Z_OK);
-
- zids = safe_calloc(nzents, sizeof (zoneid_t));
- nzents_saved = nzents;
-
- if (zone_list(zids, &nzents) != 0) {
- zperror(gettext("failed to get zone list"), B_FALSE);
- free(zids);
- return (Z_ERR);
- }
- if (nzents != nzents_saved) {
- /* list changed, try again */
- free(zids);
- goto again;
- }
-
- zents = safe_calloc(nzents, sizeof (zone_entry_t));
-
- inaltroot = zonecfg_in_alt_root();
- if (inaltroot) {
- fp = zonecfg_open_scratch("", B_FALSE);
- altroot = zonecfg_get_root();
- } else {
- fp = NULL;
- }
- zentp = zents;
- retv = Z_OK;
- for (i = 0; i < nzents; i++) {
- char name[ZONENAME_MAX];
- char altname[ZONENAME_MAX];
- char rev_altroot[MAXPATHLEN];
-
- if (getzonenamebyid(zids[i], name, sizeof (name)) < 0) {
- /*
- * There is a race condition where the zone may have
- * shutdown since we retrieved the number of running
- * zones above. This is not an error, there will be
- * an empty slot at the end of the list.
- */
- continue;
- }
- if (zonecfg_is_scratch(name)) {
- /* Ignore scratch zones by default */
- if (!inaltroot)
- continue;
- if (fp == NULL ||
- zonecfg_reverse_scratch(fp, name, altname,
- sizeof (altname), rev_altroot,
- sizeof (rev_altroot)) == -1) {
- zerror(gettext("could not resolve scratch "
- "zone %s"), name);
- retv = Z_ERR;
- continue;
- }
- /* Ignore zones in other alternate roots */
- if (strcmp(rev_altroot, altroot) != 0)
- continue;
- (void) strcpy(name, altname);
- } else {
- /* Ignore non-scratch when in an alternate root */
- if (inaltroot && strcmp(name, GLOBAL_ZONENAME) != 0)
- continue;
- }
- if (lookup_zone_info(name, zids[i], zentp) != Z_OK) {
- /*
- * There is a race condition where the zone may have
- * shutdown since we retrieved the number of running
- * zones above. This is not an error, there will be
- * an empty slot at the end of the list.
- */
- continue;
- }
- zentp++;
- }
- nzents = zentp - zents;
- if (fp != NULL)
- zonecfg_close_scratch(fp);
-
- free(zids);
- return (retv);
-}
-
static int
zone_print_list(zone_state_t min_state, boolean_t verbose, boolean_t parsable)
{
- int i;
zone_entry_t zent;
FILE *cookie;
- char *name;
+ struct zoneent *ze;
/*
- * First get the list of running zones from the kernel and print them.
- * If that is all we need, then return.
- */
- if ((i = fetch_zents()) != Z_OK) {
- /*
- * No need for error messages; fetch_zents() has already taken
- * care of this.
- */
- return (i);
- }
- for (i = 0; i < nzents; i++)
- zone_print(&zents[i], verbose, parsable);
- if (min_state >= ZONE_STATE_RUNNING)
- return (Z_OK);
- /*
- * Next, get the full list of zones from the configuration, skipping
- * any we have already printed.
+ * Get the full list of zones from the configuration.
*/
cookie = setzoneent();
- while ((name = getzoneent(cookie)) != NULL) {
- for (i = 0; i < nzents; i++) {
- if (strcmp(zents[i].zname, name) == 0)
- break;
- }
- if (i < nzents) {
- free(name);
- continue;
- }
- if (lookup_zone_info(name, ZONE_ID_UNDEFINED, &zent) != Z_OK) {
- free(name);
- continue;
+ while ((ze = getzoneent_private(cookie)) != NULL) {
+ char *name = ze->zone_name;
+ zoneid_t zid;
+
+ zid = getzoneidbyname(name);
+
+ if (ze->zone_brand[0] == '\0') {
+ /* old, incomplete index entry */
+ if (lookup_zone_info(name, zid, &zent) != Z_OK) {
+ free(ze);
+ continue;
+ }
+ } else {
+ /* new, full index entry */
+ (void) strlcpy(zent.zname, name, sizeof (zent.zname));
+ (void) strlcpy(zent.zroot, ze->zone_path,
+ sizeof (zent.zroot));
+ uuid_unparse(ze->zone_uuid, zent.zuuid);
+ (void) strlcpy(zent.zbrand, ze->zone_brand,
+ sizeof (zent.zbrand));
+ zent.ziptype = ze->zone_iptype;
+ zent.zdid = ze->zone_did;
+ zent.zid = zid;
+
+ if (zid != -1) {
+ int err;
+
+ err = zone_get_state(name,
+ (zone_state_t *)&ze->zone_state);
+ if (err != Z_OK) {
+ errno = err;
+ zperror2(name, gettext("could not get "
+ "state"));
+ free(ze);
+ continue;
+ }
+ }
+
+ zent.zstate_num = ze->zone_state;
+ zent.zstate_str = zone_state_str(zent.zstate_num);
}
- free(name);
+
if (zent.zstate_num >= min_state)
zone_print(&zent, verbose, parsable);
+
+ free(ze);
}
endzoneent(cookie);
return (Z_OK);
@@ -766,18 +660,22 @@ zone_print_list(zone_state_t min_state, boolean_t verbose, boolean_t parsable)
* Retrieve a zone entry by name. Returns NULL if no such zone exists.
*/
static zone_entry_t *
-lookup_running_zone(const char *str)
+lookup_running_zone(const char *name)
{
- int i;
+ zoneid_t zid;
+ zone_entry_t *zent;
+
+ if ((zid = getzoneidbyname(name)) == -1)
+ return (NULL);
- if (fetch_zents() != Z_OK)
+ if ((zent = malloc(sizeof (zone_entry_t))) == NULL)
return (NULL);
- for (i = 0; i < nzents; i++) {
- if (strcmp(str, zents[i].zname) == 0)
- return (&zents[i]);
+ if (lookup_zone_info(name, zid, zent) != Z_OK) {
+ free(zent);
+ return (NULL);
}
- return (NULL);
+ return (zent);
}
/*
@@ -1013,8 +911,12 @@ validate_zonepath(char *path, int cmd_num)
(void) printf(gettext("WARNING: %s is on a temporary "
"file system.\n"), rpath);
}
- if (crosscheck_zonepaths(rpath) != Z_OK)
- return (Z_ERR);
+ if (cmd_num != CMD_BOOT && cmd_num != CMD_REBOOT &&
+ cmd_num != CMD_READY) {
+ /* we checked when we installed, no need to check each boot */
+ if (crosscheck_zonepaths(rpath) != Z_OK)
+ return (Z_ERR);
+ }
/*
* Try to collect and report as many minor errors as possible
* before returning, so the user can learn everything that needs
@@ -1201,6 +1103,7 @@ static int
ready_func(int argc, char *argv[])
{
zone_cmd_arg_t zarg;
+ boolean_t debug = B_FALSE;
int arg;
if (zonecfg_in_alt_root()) {
@@ -1209,11 +1112,14 @@ ready_func(int argc, char *argv[])
}
optind = 0;
- if ((arg = getopt(argc, argv, "?")) != EOF) {
+ if ((arg = getopt(argc, argv, "?X")) != EOF) {
switch (arg) {
case '?':
sub_usage(SHELP_READY, CMD_READY);
return (optopt == '?' ? Z_OK : Z_USAGE);
+ case 'X':
+ debug = B_TRUE;
+ break;
default:
sub_usage(SHELP_READY, CMD_READY);
return (Z_USAGE);
@@ -1230,6 +1136,7 @@ ready_func(int argc, char *argv[])
return (Z_ERR);
zarg.cmd = Z_READY;
+ zarg.debug = debug;
if (zonecfg_call_zoneadmd(target_zone, &zarg, locale, B_TRUE) != 0) {
zerror(gettext("call to %s failed"), "zoneadmd");
return (Z_ERR);
@@ -1242,6 +1149,7 @@ boot_func(int argc, char *argv[])
{
zone_cmd_arg_t zarg;
boolean_t force = B_FALSE;
+ boolean_t debug = B_FALSE;
int arg;
if (zonecfg_in_alt_root()) {
@@ -1268,7 +1176,7 @@ boot_func(int argc, char *argv[])
* zoneadm -z myzone boot -- -s -v -m verbose.
*/
optind = 0;
- while ((arg = getopt(argc, argv, "?fs")) != EOF) {
+ while ((arg = getopt(argc, argv, "?fsX")) != EOF) {
switch (arg) {
case '?':
sub_usage(SHELP_BOOT, CMD_BOOT);
@@ -1280,6 +1188,9 @@ boot_func(int argc, char *argv[])
case 'f':
force = B_TRUE;
break;
+ case 'X':
+ debug = B_TRUE;
+ break;
default:
sub_usage(SHELP_BOOT, CMD_BOOT);
return (Z_USAGE);
@@ -1305,6 +1216,7 @@ boot_func(int argc, char *argv[])
if (verify_details(CMD_BOOT, argv) != Z_OK)
return (Z_ERR);
zarg.cmd = force ? Z_FORCEBOOT : Z_BOOT;
+ zarg.debug = debug;
if (zonecfg_call_zoneadmd(target_zone, &zarg, locale, B_TRUE) != 0) {
zerror(gettext("call to %s failed"), "zoneadmd");
return (Z_ERR);
@@ -1614,10 +1526,10 @@ auth_check(char *user, char *zone, int cmd_num)
* not already running (or ready).
*/
static int
-sanity_check(char *zone, int cmd_num, boolean_t running,
+sanity_check(char *zone, int cmd_num, boolean_t need_running,
boolean_t unsafe_when_running, boolean_t force)
{
- zone_entry_t *zent;
+ boolean_t is_running = B_FALSE;
priv_set_t *privset;
zone_state_t state, min_state;
char kernzone[ZONENAME_MAX];
@@ -1688,51 +1600,54 @@ sanity_check(char *zone, int cmd_num, boolean_t running,
}
if (!zonecfg_in_alt_root()) {
- zent = lookup_running_zone(zone);
- } else if ((fp = zonecfg_open_scratch("", B_FALSE)) == NULL) {
- zent = NULL;
- } else {
- if (zonecfg_find_scratch(fp, zone, zonecfg_get_root(),
- kernzone, sizeof (kernzone)) == 0)
- zent = lookup_running_zone(kernzone);
- else
- zent = NULL;
+ /* Avoid the xml read overhead of lookup_running_zone */
+ if (getzoneidbyname(zone) != -1)
+ is_running = B_TRUE;
+
+ } else if ((fp = zonecfg_open_scratch("", B_FALSE)) != NULL) {
+ if (zonecfg_find_scratch(fp, zone, zonecfg_get_root(), kernzone,
+ sizeof (kernzone)) == 0 && getzoneidbyname(kernzone) != -1)
+ is_running = B_TRUE;
+
zonecfg_close_scratch(fp);
}
/*
* Look up from the kernel for 'running' zones.
*/
- if (running && !force) {
- if (zent == NULL) {
+ if (need_running && !force) {
+ if (!is_running) {
zerror(gettext("not running"));
return (Z_ERR);
}
} else {
int err;
- if (unsafe_when_running && zent != NULL) {
+ err = zone_get_state(zone, &state);
+
+ if (unsafe_when_running && is_running) {
/* check whether the zone is ready or running */
- if ((err = zone_get_state(zent->zname,
- &zent->zstate_num)) != Z_OK) {
+ char *zstate_str;
+
+ if (err != Z_OK) {
errno = err;
- zperror2(zent->zname,
- gettext("could not get state"));
+ zperror2(zone, gettext("could not get state"));
/* can't tell, so hedge */
- zent->zstate_str = "ready/running";
+ zstate_str = "ready/running";
} else {
- zent->zstate_str =
- zone_state_str(zent->zstate_num);
+ zstate_str = zone_state_str(state);
}
zerror(gettext("%s operation is invalid for %s zones."),
- cmd_to_str(cmd_num), zent->zstate_str);
+ cmd_to_str(cmd_num), zstate_str);
return (Z_ERR);
}
- if ((err = zone_get_state(zone, &state)) != Z_OK) {
+
+ if (err != Z_OK) {
errno = err;
zperror2(zone, gettext("could not get state"));
return (Z_ERR);
}
+
switch (cmd_num) {
case CMD_UNINSTALL:
if (state == ZONE_STATE_CONFIGURED) {
@@ -1820,6 +1735,7 @@ static int
halt_func(int argc, char *argv[])
{
zone_cmd_arg_t zarg;
+ boolean_t debug = B_FALSE;
int arg;
if (zonecfg_in_alt_root()) {
@@ -1828,11 +1744,14 @@ halt_func(int argc, char *argv[])
}
optind = 0;
- if ((arg = getopt(argc, argv, "?")) != EOF) {
+ if ((arg = getopt(argc, argv, "?X")) != EOF) {
switch (arg) {
case '?':
sub_usage(SHELP_HALT, CMD_HALT);
return (optopt == '?' ? Z_OK : Z_USAGE);
+ case 'X':
+ debug = B_TRUE;
+ break;
default:
sub_usage(SHELP_HALT, CMD_HALT);
return (Z_USAGE);
@@ -1858,6 +1777,7 @@ halt_func(int argc, char *argv[])
return (Z_ERR);
zarg.cmd = Z_HALT;
+ zarg.debug = debug;
return ((zonecfg_call_zoneadmd(target_zone, &zarg, locale,
B_TRUE) == 0) ? Z_OK : Z_ERR);
}
@@ -1935,6 +1855,7 @@ static int
reboot_func(int argc, char *argv[])
{
zone_cmd_arg_t zarg;
+ boolean_t debug = B_FALSE;
int arg;
if (zonecfg_in_alt_root()) {
@@ -1943,11 +1864,14 @@ reboot_func(int argc, char *argv[])
}
optind = 0;
- if ((arg = getopt(argc, argv, "?")) != EOF) {
+ if ((arg = getopt(argc, argv, "?X")) != EOF) {
switch (arg) {
case '?':
sub_usage(SHELP_REBOOT, CMD_REBOOT);
return (optopt == '?' ? Z_OK : Z_USAGE);
+ case 'X':
+ debug = B_TRUE;
+ break;
default:
sub_usage(SHELP_REBOOT, CMD_REBOOT);
return (Z_USAGE);
@@ -1982,6 +1906,7 @@ reboot_func(int argc, char *argv[])
return (Z_ERR);
zarg.cmd = Z_REBOOT;
+ zarg.debug = debug;
return ((zonecfg_call_zoneadmd(target_zone, &zarg, locale, B_TRUE) == 0)
? Z_OK : Z_ERR);
}
@@ -2209,6 +2134,10 @@ verify_fs_special(struct zone_fstab *fstab)
if (strcmp(fstab->zone_fs_type, MNTTYPE_ZFS) == 0)
return (verify_fs_zfs(fstab));
+ if (strcmp(fstab->zone_fs_type, MNTTYPE_HYPRLOFS) == 0 &&
+ strcmp(fstab->zone_fs_special, "swap") == 0)
+ return (Z_OK);
+
if (stat64(fstab->zone_fs_special, &st) != 0) {
(void) fprintf(stderr, gettext("could not verify fs "
"%s: could not access %s: %s\n"), fstab->zone_fs_dir,
@@ -2612,7 +2541,6 @@ verify_handle(int cmd_num, zone_dochandle_t handle, char *argv[])
dladm_handle_t dh;
dladm_status_t status;
datalink_id_t linkid;
- char errmsg[DLADM_STRSIZE];
in_alt_root = zonecfg_in_alt_root();
if (in_alt_root)
@@ -2695,11 +2623,6 @@ verify_handle(int cmd_num, zone_dochandle_t handle, char *argv[])
dladm_close(dh);
}
if (status != DLADM_STATUS_OK) {
- (void) fprintf(stderr,
- gettext("WARNING: skipping network "
- "interface '%s': %s\n"),
- nwiftab.zone_nwif_physical,
- dladm_status2str(status, errmsg));
break;
}
dl_owner_zid = ALL_ZONES;
@@ -2783,6 +2706,74 @@ no_net:
return (return_code);
}
+/*
+ * Called when readying or booting a zone. We double check that the zone's
+ * debug ID is set and is unique. This covers the case of pre-existing zones
+ * with no ID. Also, its possible that a zone was migrated to this host
+ * and as a result it has a duplicate ID. In this case we preserve the ID
+ * of the first zone we match on in the index file (since it was there before
+ * the current zone) and we assign a new unique ID to the current zone.
+ * Return true if we assigned a new ID, indicating that the zone configuration
+ * needs to be saved.
+ */
+static boolean_t
+verify_fix_did(zone_dochandle_t handle)
+{
+ zoneid_t mydid;
+ struct zoneent *ze;
+ FILE *cookie;
+ boolean_t fix = B_FALSE;
+
+ mydid = zonecfg_get_did(handle);
+ if (mydid == -1) {
+ zonecfg_set_did(handle);
+ return (B_TRUE);
+ }
+
+ /* Get the full list of zones from the configuration. */
+ cookie = setzoneent();
+ while ((ze = getzoneent_private(cookie)) != NULL) {
+ char *name;
+ zoneid_t did;
+
+ name = ze->zone_name;
+ if (strcmp(name, GLOBAL_ZONENAME) == 0 ||
+ strcmp(name, target_zone) == 0) {
+ free(ze);
+ continue;
+ }
+
+ if (ze->zone_brand[0] == '\0') {
+ /* old, incomplete index entry */
+ zone_entry_t zent;
+
+ if (lookup_zone_info(name, ZONE_ID_UNDEFINED,
+ &zent) != Z_OK) {
+ free(ze);
+ continue;
+ }
+ did = zent.zdid;
+ } else {
+ /* new, full index entry */
+ did = ze->zone_did;
+ }
+ free(ze);
+
+ if (did == mydid) {
+ fix = B_TRUE;
+ break;
+ }
+ }
+ endzoneent(cookie);
+
+ if (fix) {
+ zonecfg_set_did(handle);
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
static int
verify_details(int cmd_num, char *argv[])
{
@@ -2842,6 +2833,18 @@ verify_details(int cmd_num, char *argv[])
if (verify_handle(cmd_num, handle, argv) != Z_OK)
return_code = Z_ERR;
+ if (cmd_num == CMD_READY || cmd_num == CMD_BOOT) {
+ int vcommit = 0, obscommit = 0;
+
+ vcommit = verify_fix_did(handle);
+ obscommit = zonecfg_fix_obsolete(handle);
+
+ if (vcommit || obscommit)
+ if (zonecfg_save(handle) != Z_OK)
+ (void) fprintf(stderr, gettext("Could not save "
+ "updated configuration.\n"));
+ }
+
zonecfg_fini_handle(handle);
if (return_code == Z_ERR)
(void) fprintf(stderr,
@@ -2927,6 +2930,7 @@ install_func(int argc, char *argv[])
int status;
boolean_t do_postinstall = B_FALSE;
boolean_t brand_help = B_FALSE;
+ boolean_t do_dataset = B_TRUE;
char opts[128];
if (target_zone == NULL) {
@@ -3002,6 +3006,12 @@ install_func(int argc, char *argv[])
}
/* Ignore unknown options - may be brand specific. */
break;
+ case 'x':
+ if (strcmp(optarg, "nodataset") == 0) {
+ do_dataset = B_FALSE;
+ continue; /* internal arg, don't pass thru */
+ }
+ break;
default:
/* Ignore unknown options - may be brand specific. */
break;
@@ -3054,7 +3064,8 @@ install_func(int argc, char *argv[])
goto done;
}
- create_zfs_zonepath(zonepath);
+ if (do_dataset)
+ create_zfs_zonepath(zonepath);
}
status = do_subproc(cmdbuf);
@@ -3865,10 +3876,10 @@ cleanup_zonepath(char *zonepath, boolean_t all)
* exist if the zone was force-attached after a
* migration.
*/
- char *std_entries[] = {"dev", "lu", "root",
- "SUNWattached.xml", NULL};
- /* (MAXPATHLEN * 3) is for the 3 std_entries dirs */
- char cmdbuf[sizeof (RMCOMMAND) + (MAXPATHLEN * 3) + 64];
+ char *std_entries[] = {"dev", "lastexited", "logs", "lu",
+ "root", "SUNWattached.xml", NULL};
+ /* (MAXPATHLEN * 5) is for the 5 std_entries dirs */
+ char cmdbuf[sizeof (RMCOMMAND) + (MAXPATHLEN * 5) + 64];
/*
* We shouldn't need these checks but lets be paranoid since we
@@ -5018,6 +5029,7 @@ uninstall_func(int argc, char *argv[])
if (zonecfg_ping_zoneadmd(target_zone) == Z_OK) {
zone_cmd_arg_t zarg;
zarg.cmd = Z_NOTE_UNINSTALLING;
+ zarg.debug = B_FALSE;
/* we don't care too much if this fails, just plow on */
(void) zonecfg_call_zoneadmd(target_zone, &zarg, locale,
B_TRUE);
@@ -5133,6 +5145,7 @@ mount_func(int argc, char *argv[])
return (Z_ERR);
zarg.cmd = force ? Z_FORCEMOUNT : Z_MOUNT;
+ zarg.debug = B_FALSE;
zarg.bootbuf[0] = '\0';
if (zonecfg_call_zoneadmd(target_zone, &zarg, locale, B_TRUE) != 0) {
zerror(gettext("call to %s failed"), "zoneadmd");
@@ -5154,6 +5167,7 @@ unmount_func(int argc, char *argv[])
return (Z_ERR);
zarg.cmd = Z_UNMOUNT;
+ zarg.debug = B_FALSE;
if (zonecfg_call_zoneadmd(target_zone, &zarg, locale, B_TRUE) != 0) {
zerror(gettext("call to %s failed"), "zoneadmd");
return (Z_ERR);
@@ -5375,7 +5389,7 @@ apply_func(int argc, char *argv[])
priv_set_t *privset;
zoneid_t zoneid;
zone_dochandle_t handle;
- struct zone_mcaptab mcap;
+ uint64_t mcap;
char pool_err[128];
zoneid = getzoneid();
@@ -5466,19 +5480,12 @@ apply_func(int argc, char *argv[])
}
/*
- * If a memory cap is configured, set the cap in the kernel using
- * zone_setattr() and make sure the rcapd SMF service is enabled.
+ * If a memory cap is configured, make sure the rcapd SMF service is
+ * enabled.
*/
- if (zonecfg_getmcapent(handle, &mcap) == Z_OK) {
- uint64_t num;
+ if (zonecfg_get_aliased_rctl(handle, ALIAS_MAXPHYSMEM, &mcap) == Z_OK) {
char smf_err[128];
- num = (uint64_t)strtoll(mcap.zone_physmem_cap, NULL, 10);
- if (zone_setattr(zoneid, ZONE_ATTR_PHYS_MCAP, &num, 0) == -1) {
- zerror(gettext("could not set zone memory cap"));
- res = Z_ERR;
- }
-
if (zonecfg_enable_rcapd(smf_err, sizeof (smf_err)) != Z_OK) {
zerror(gettext("enabling system/rcap service failed: "
"%s"), smf_err);
diff --git a/usr/src/cmd/zoneadm/zones.xml b/usr/src/cmd/zoneadm/zones.xml
index 9c8e305f89..1b6f8cd49b 100644
--- a/usr/src/cmd/zoneadm/zones.xml
+++ b/usr/src/cmd/zoneadm/zones.xml
@@ -54,11 +54,19 @@
<service_fmri value='svc:/milestone/multi-user-server' />
</dependency>
+ <dependency
+ name='metadata'
+ type='service'
+ grouping='require_all'
+ restart_on='none'>
+ <service_fmri value='svc:/system/smartdc/metadata' />
+ </dependency>
+
<exec_method
type='method'
name='start'
exec='/lib/svc/method/svc-zones %m'
- timeout_seconds='60'>
+ timeout_seconds='0'>
</exec_method>
<!--
diff --git a/usr/src/cmd/zoneadmd/Makefile b/usr/src/cmd/zoneadmd/Makefile
index 8324f7fefa..e81e4631aa 100644
--- a/usr/src/cmd/zoneadmd/Makefile
+++ b/usr/src/cmd/zoneadmd/Makefile
@@ -18,57 +18,54 @@
#
# CDDL HEADER END
-
-#
-
#
# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+# Copyright (c) 2011, Joyent, Inc. All rights reserved.
#
PROG= zoneadmd
include ../Makefile.cmd
+include ../Makefile.ctf
-ROOTCMDDIR= $(ROOTLIB)/zones
-
-OBJS= zoneadmd.o zcons.o vplat.o
-SRCS = $(OBJS:.o=.c)
-POFILE=zoneadmd_all.po
-POFILES= $(OBJS:%.o=%.po)
+$(64ONLY)SUBDIRS= $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
-CFLAGS += $(CCVERBOSE)
-CERRWARN += -_gcc=-Wno-switch
-CERRWARN += -_gcc=-Wno-parentheses
-CERRWARN += -_gcc=-Wno-uninitialized
+all := TARGET = all
+install := TARGET = install
+clean := TARGET = clean
+clobber := TARGET = clobber
+lint := TARGET = lint
-LDLIBS += -lsocket -lzonecfg -lnsl -ldevinfo -ldevice -lnvpair \
- -lgen -lbsm -lcontract -lzfs -luuid -lbrand -ldladm -ltsnet -ltsol \
- -linetutil -lscf
XGETFLAGS += -a -x zoneadmd.xcl
+ROOTUSRLIBZONES = $(ROOT)/usr/lib/zones
+
.KEEP_STATE:
.PARALLEL:
-all: $(PROG)
+all: $(SUBDIRS)
$(PROG): $(OBJS)
$(LINK.c) -o $@ $(OBJS) $(LDLIBS)
$(POST_PROCESS)
-install: all $(ROOTCMD)
-
-$(POFILE): $(POFILES)
- $(RM) $@
- $(CAT) $(POFILES) > $@
+install: $(SUBDIRS)
+ -$(RM) $(ROOTUSRLIBZONES)/$(PROG)
+ -$(LN) $(ISAEXEC) $(ROOTUSRLIBZONES)/$(PROG)
-clean:
- $(RM) $(OBJS)
+$(POFILE):
-lint: lint_SRCS
+clean clobebr lint: $(SUBDIRS)
check:
- $(CSTYLE) -p -P $(SRCS:%=%)
+ $(CSTYLE) -p -P *.c
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
include ../Makefile.targ
diff --git a/usr/src/cmd/zoneadmd/Makefile.com b/usr/src/cmd/zoneadmd/Makefile.com
new file mode 100644
index 0000000000..c8becc3e8c
--- /dev/null
+++ b/usr/src/cmd/zoneadmd/Makefile.com
@@ -0,0 +1,70 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+
+#
+# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2014, Joyent, Inc. All rights reserved.
+#
+
+PROG= zoneadmd
+
+include ../../Makefile.cmd
+include ../../Makefile.ctf
+
+ROOTCMDDIR= $(ROOTLIB)/zones
+
+OBJS= zoneadmd.o zcons.o zfd.o vplat.o mcap.o
+
+CFLAGS += $(CCVERBOSE)
+LDLIBS += -lsocket -lzonecfg -lnsl -ldevinfo -ldevice -lnvpair \
+ -lgen -lbsm -lcontract -lzfs -luuid -lbrand -ldladm -ltsnet -ltsol \
+ -linetutil -lproc -lscf
+
+.KEEP_STATE:
+
+%.o: ../%.c
+ $(COMPILE.c) $<
+ $(POST_PROCESS_O)
+
+ROOTUSRLIBZONES = $(ROOT)/usr/lib/zones
+ROOTUSRLIBZONES32 = $(ROOTUSRLIBZONES)/$(MACH32)
+ROOTUSRLIBZONES64 = $(ROOTUSRLIBZONES)/$(MACH64)
+ROOTUSRLIBZONESPROG32 = $(ROOTUSRLIBZONES32)/$(PROG)
+ROOTUSRLIBZONESPROG64 = $(ROOTUSRLIBZONES64)/$(PROG)
+$(ROOTUSRLIBZONES32)/%: $(ROOTUSRLIBZONES32) %
+ $(INS.file)
+$(ROOTUSRLIBZONES64)/%: $(ROOTUSRLIBZONES64) %
+ $(INS.file)
+$(ROOTUSRLIBZONES32):
+ $(INS.dir)
+
+all: $(PROG)
+
+$(PROG): $(OBJS)
+ $(LINK.c) -o $@ $(OBJS) $(LDLIBS)
+ $(POST_PROCESS)
+
+clean:
+ $(RM) $(OBJS)
+
+lint:
+ $(LINT.c) ../*.c $(LDLIBS)
+
+include ../../Makefile.targ
diff --git a/usr/src/cmd/zoneadmd/amd64/Makefile b/usr/src/cmd/zoneadmd/amd64/Makefile
new file mode 100644
index 0000000000..75ac51db32
--- /dev/null
+++ b/usr/src/cmd/zoneadmd/amd64/Makefile
@@ -0,0 +1,31 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2011, Joyent, Inc. All rights reserved.
+#
+
+.KEEP_STATE:
+
+include ../Makefile.com
+include ../../Makefile.cmd.64
+
+install: all $(ROOTUSRLIBZONES64) $(ROOTUSRLIBZONESPROG64)
diff --git a/usr/src/cmd/zoneadmd/i386/Makefile b/usr/src/cmd/zoneadmd/i386/Makefile
new file mode 100644
index 0000000000..a8764e0638
--- /dev/null
+++ b/usr/src/cmd/zoneadmd/i386/Makefile
@@ -0,0 +1,30 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2011, Joyent, Inc. All rights reserved.
+#
+
+.KEEP_STATE:
+
+include ../Makefile.com
+
+install: all $(ROOTUSRLIBZONES32) $(ROOTUSRLIBZONESPROG32)
diff --git a/usr/src/cmd/zoneadmd/mcap.c b/usr/src/cmd/zoneadmd/mcap.c
new file mode 100644
index 0000000000..16cd2dd07a
--- /dev/null
+++ b/usr/src/cmd/zoneadmd/mcap.c
@@ -0,0 +1,1182 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2014, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * This file implements the code which runs a thread inside zoneadmd to cap
+ * the associated zone's physical memory. A thread to do this is started
+ * when the zone boots and is halted when the zone shuts down.
+ *
+ * Because of the way that the VM system is currently implemented, there is no
+ * way to go from the bottom up (page to process to zone). Thus, there is no
+ * obvious way to hook an rctl into the kernel's paging code to enforce a hard
+ * memory cap. Instead, we implement a soft physical memory cap which looks
+ * at the zone's overall rss and once it is over the cap, works from the top
+ * down (zone to process to page), looking at zone processes, to determine
+ * what to try to pageout to get the zone under its memory cap.
+ *
+ * The code uses the fast, cheap, but potentially very inaccurate sum of the
+ * rss values from psinfo_t to first approximate the zone's rss and will
+ * fallback to the vm_getusage syscall to determine the zone's rss if needed.
+ * It then checks the rss against the zone's zone.max-physical-memory rctl.
+ * Once the zone goes over its cap, then this thread will work through the
+ * zone's /proc process list, Pgrab-bing each process and stepping through the
+ * address space segments attempting to use pr_memcntl(...MS_INVALCURPROC...)
+ * to pageout pages, until the zone is again under its cap.
+ *
+ * Although zone memory capping is implemented as a soft cap by this user-level
+ * thread, the interfaces around memory caps that are exposed to the user are
+ * the standard ones; an rctl and kstats. This thread uses the rctl value
+ * to obtain the cap and works with the zone kernel code to update the kstats.
+ * If the implementation ever moves into the kernel, these exposed interfaces
+ * do not need to change.
+ *
+ * The thread adaptively sleeps, periodically checking the state of the
+ * zone. As the zone's rss gets closer to the cap, the thread will wake up
+ * more often to check the zone's status. Once the zone is over the cap,
+ * the thread will work to pageout until the zone is under the cap, as shown
+ * by updated vm_usage data.
+ *
+ * NOTE: The pagedata page maps (at least on x86) are not useful. Those flags
+ * are set by hrm_setbits() and on x86 that code path is only executed by
+ * segvn_pagelock -> hat_setstat -> hrm_setbits
+ * segvn_softunlock -^
+ * On SPARC there is an additional code path which may make this data
+ * useful (sfmmu_ttesync), but since it is not generic, we ignore the page
+ * maps. If we ever fix this issue, then we could generalize this mcap code to
+ * do more with the data on active pages.
+ *
+ * For debugging, touch the file {zonepath}/mcap_debug.log. This will
+ * cause the thread to start logging its actions into that file (it may take
+ * a minute or two if the thread is currently sleeping). Removing that
+ * file will cause logging to stop.
+ */
+
+#include <sys/mman.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libproc.h>
+#include <limits.h>
+#include <procfs.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <time.h>
+#include <unistd.h>
+#include <sys/priocntl.h>
+#include <dirent.h>
+#include <zone.h>
+#include <libzonecfg.h>
+#include <thread.h>
+#include <values.h>
+#include <sys/vm_usage.h>
+#include <sys/resource.h>
+#include <sys/debug.h>
+#include <synch.h>
+#include <wait.h>
+#include <libcontract.h>
+#include <libcontract_priv.h>
+#include <sys/contract/process.h>
+#include "zoneadmd.h"
+
+ /* round up to next y = 2^n */
+#define ROUNDUP(x, y) (((x) + ((y) - 1)) & ~((y) - 1))
+
+#define CAP_REFRESH ((uint64_t)300 * NANOSEC) /* every 5 minutes */
+
+/*
+ * zonecfg attribute tunables for memory capping.
+ * phys-mcap-cmd
+ * type: string
+ * specifies a command that can be run when over the cap
+ * phys-mcap-no-vmusage
+ * type: boolean
+ * true disables vm_getusage and just uses zone's proc. rss sum
+ * phys-mcap-no-pageout
+ * type: boolean
+ * true disables pageout when over
+ * phys-mcap-no-pf-throttle
+ * type: boolean
+ * true disables page fault throttling when over
+ */
+#define TUNE_CMD "phys-mcap-cmd"
+#define TUNE_NVMU "phys-mcap-no-vmusage"
+#define TUNE_NPAGE "phys-mcap-no-pageout"
+#define TUNE_NPFTHROT "phys-mcap-no-pf-throttle"
+
+/*
+ * These are only used in get_mem_info but global. We always need scale_rss and
+ * prev_fast_rss to be persistent but we also have the other two global so we
+ * can easily see these with mdb.
+ */
+uint64_t scale_rss = 0;
+uint64_t prev_fast_rss = 0;
+uint64_t fast_rss = 0;
+uint64_t accurate_rss = 0;
+
+static char zoneproc[MAXPATHLEN];
+static char debug_log[MAXPATHLEN];
+static zoneid_t zid;
+static mutex_t shutdown_mx;
+static cond_t shutdown_cv;
+static int shutting_down = 0;
+static thread_t mcap_tid;
+static FILE *debug_log_fp = NULL;
+static uint64_t zone_rss_cap; /* RSS cap(KB) */
+static char over_cmd[2 * BUFSIZ]; /* same size as zone_attr_value */
+static boolean_t skip_vmusage = B_FALSE;
+static boolean_t skip_pageout = B_FALSE;
+static boolean_t skip_pf_throttle = B_FALSE;
+
+static zlog_t *logp;
+
+static int64_t check_suspend();
+static void get_mcap_tunables();
+
+/*
+ * Structure to hold current state about a process address space that we're
+ * working on.
+ */
+typedef struct {
+ int pr_curr; /* the # of the mapping we're working on */
+ int pr_nmap; /* number of mappings in address space */
+ prmap_t *pr_mapp; /* process's map array */
+} proc_map_t;
+
+typedef struct zsd_vmusage64 {
+ id_t vmu_zoneid;
+ uint_t vmu_type;
+ id_t vmu_id;
+ /*
+ * An amd64 kernel will align the following uint64_t members, but a
+ * 32bit i386 process will not without help.
+ */
+ int vmu_align_next_members_on_8_bytes;
+ uint64_t vmu_rss_all;
+ uint64_t vmu_rss_private;
+ uint64_t vmu_rss_shared;
+ uint64_t vmu_swap_all;
+ uint64_t vmu_swap_private;
+ uint64_t vmu_swap_shared;
+} zsd_vmusage64_t;
+
+/*
+ * Output a debug log message.
+ */
+/*PRINTFLIKE1*/
+static void
+debug(char *fmt, ...)
+{
+ va_list ap;
+
+ if (debug_log_fp == NULL)
+ return;
+
+ va_start(ap, fmt);
+ (void) vfprintf(debug_log_fp, fmt, ap);
+ va_end(ap);
+ (void) fflush(debug_log_fp);
+}
+
+/*
+ * Like sleep(3C) but can be interupted by cond_signal which is posted when
+ * we're shutting down the mcap thread.
+ */
+static void
+sleep_shutdown(int secs)
+{
+ timestruc_t to;
+
+ to.tv_sec = secs;
+ to.tv_nsec = 0;
+
+ (void) mutex_lock(&shutdown_mx);
+ if (!shutting_down)
+ (void) cond_reltimedwait(&shutdown_cv, &shutdown_mx, &to);
+ (void) mutex_unlock(&shutdown_mx);
+}
+
+static boolean_t
+proc_issystem(pid_t pid)
+{
+ char pc_clname[PC_CLNMSZ];
+
+ if (priocntl(P_PID, pid, PC_GETXPARMS, NULL, PC_KY_CLNAME, pc_clname,
+ PC_KY_NULL) != -1)
+ return (strcmp(pc_clname, "SYS") == 0);
+
+ return (B_TRUE);
+}
+
+/*
+ * Fork a child that enters the zone and runs the "phys-mcap-cmd" command.
+ */
+static void
+run_over_cmd()
+{
+ int ctfd;
+ int err;
+ pid_t childpid;
+ siginfo_t info;
+ ctid_t ct;
+
+ /*
+ * Before we enter the zone, we need to create a new process contract
+ * for the child, as required by zone_enter().
+ */
+ if ((ctfd = open64("/system/contract/process/template", O_RDWR)) == -1)
+ return;
+ if (ct_tmpl_set_critical(ctfd, 0) != 0 ||
+ ct_tmpl_set_informative(ctfd, 0) != 0 ||
+ ct_pr_tmpl_set_fatal(ctfd, CT_PR_EV_HWERR) != 0 ||
+ ct_pr_tmpl_set_param(ctfd, CT_PR_PGRPONLY) != 0 ||
+ ct_tmpl_activate(ctfd) != 0) {
+ (void) close(ctfd);
+ return;
+ }
+
+ childpid = fork();
+ switch (childpid) {
+ case -1:
+ (void) ct_tmpl_clear(ctfd);
+ (void) close(ctfd);
+ break;
+ case 0: /* Child */
+ (void) ct_tmpl_clear(ctfd);
+ (void) close(ctfd);
+ if (zone_enter(zid) == -1)
+ _exit(errno);
+ err = system(over_cmd);
+ _exit(err);
+ break;
+ default: /* Parent */
+ if (contract_latest(&ct) == -1)
+ ct = -1;
+ (void) ct_tmpl_clear(ctfd);
+ (void) close(ctfd);
+ err = waitid(P_PID, childpid, &info, WEXITED);
+ (void) contract_abandon_id(ct);
+ if (err == -1 || info.si_status != 0)
+ debug("over_cmd failed");
+ break;
+ }
+}
+
+/*
+ * Get the next mapping.
+ */
+static prmap_t *
+nextmapping(proc_map_t *pmp)
+{
+ if (pmp->pr_mapp == NULL || pmp->pr_curr >= pmp->pr_nmap)
+ return (NULL);
+
+ return (&pmp->pr_mapp[pmp->pr_curr++]);
+}
+
+/*
+ * Initialize the proc_map_t to access the first mapping of an address space.
+ */
+static prmap_t *
+init_map(proc_map_t *pmp, pid_t pid)
+{
+ int fd;
+ int res;
+ struct stat st;
+ char pathbuf[MAXPATHLEN];
+
+ bzero(pmp, sizeof (proc_map_t));
+ pmp->pr_nmap = -1;
+
+ (void) snprintf(pathbuf, sizeof (pathbuf), "%s/%d/map", zoneproc, pid);
+ if ((fd = open(pathbuf, O_RDONLY, 0)) < 0)
+ return (NULL);
+
+redo:
+ errno = 0;
+ if (fstat(fd, &st) != 0)
+ goto done;
+
+ if ((pmp->pr_mapp = malloc(st.st_size)) == NULL) {
+ debug("cannot malloc() %ld bytes for xmap", st.st_size);
+ goto done;
+ }
+ (void) bzero(pmp->pr_mapp, st.st_size);
+
+ errno = 0;
+ if ((res = pread(fd, pmp->pr_mapp, st.st_size, 0)) != st.st_size) {
+ free(pmp->pr_mapp);
+ pmp->pr_mapp = NULL;
+ if (res > 0 || errno == E2BIG) {
+ goto redo;
+ } else {
+ debug("pid %ld cannot read xmap\n", pid);
+ goto done;
+ }
+ }
+
+ pmp->pr_nmap = st.st_size / sizeof (prmap_t);
+
+done:
+ (void) close(fd);
+ return (nextmapping(pmp));
+}
+
+/*
+ * Attempt to invalidate the entire mapping from within the given process's
+ * address space. May return nonzero with errno as:
+ * ESRCH - process not found
+ * ENOMEM - segment not found
+ * EINVAL - mapping exceeds a single segment
+ */
+static int
+pageout_mapping(pid_t pid, prmap_t *pmp)
+{
+ int res;
+
+ if (pmp->pr_mflags & MA_ISM || pmp->pr_mflags & MA_SHM)
+ return (0);
+
+ errno = 0;
+ res = syscall(SYS_rusagesys, _RUSAGESYS_INVALMAP, pid, pmp->pr_vaddr,
+ pmp->pr_size);
+
+ return (res);
+}
+
+/*
+ * Work through a process paging out mappings until the whole address space was
+ * examined or the excess is < 0. Return our estimate of the updated excess.
+ */
+static int64_t
+pageout_process(pid_t pid, int64_t excess)
+{
+ int psfd;
+ prmap_t *pmap;
+ proc_map_t cur;
+ int res;
+ int64_t sum_d_rss, d_rss;
+ int64_t old_rss;
+ int map_cnt;
+ psinfo_t psinfo;
+ char pathbuf[MAXPATHLEN];
+
+ (void) snprintf(pathbuf, sizeof (pathbuf), "%s/%d/psinfo", zoneproc,
+ pid);
+ if ((psfd = open(pathbuf, O_RDONLY, 0000)) < 0)
+ return (excess);
+
+ cur.pr_mapp = NULL;
+
+ if (pread(psfd, &psinfo, sizeof (psinfo), 0) != sizeof (psinfo))
+ goto done;
+
+ old_rss = (int64_t)psinfo.pr_rssize;
+ map_cnt = 0;
+
+ /* If unscannable, skip it. */
+ if (psinfo.pr_nlwp == 0 || proc_issystem(pid)) {
+ debug("pid %ld: system process, skipping %s\n",
+ pid, psinfo.pr_psargs);
+ goto done;
+ }
+
+ /* If tiny RSS (16KB), skip it. */
+ if (old_rss <= 16) {
+ debug("pid %ld: skipping, RSS %lldKB %s\n",
+ pid, old_rss, psinfo.pr_psargs);
+ goto done;
+ }
+
+ /* Get segment residency information. */
+ pmap = init_map(&cur, pid);
+
+ /* Skip process if it has no mappings. */
+ if (pmap == NULL) {
+ debug("pid %ld: map unreadable; ignoring\n", pid);
+ goto done;
+ }
+
+ debug("pid %ld: nmap %d sz %dKB rss %lldKB %s\n",
+ pid, cur.pr_nmap, psinfo.pr_size, old_rss, psinfo.pr_psargs);
+
+ /*
+ * Within the process's address space, attempt to page out mappings.
+ */
+ sum_d_rss = 0;
+ while (excess > 0 && pmap != NULL && !shutting_down) {
+ /* invalidate the entire mapping */
+ if ((res = pageout_mapping(pid, pmap)) < 0)
+ debug("pid %ld: mapping 0x%p %ldkb unpageable (%d)\n",
+ pid, pmap->pr_vaddr, pmap->pr_size / 1024, errno);
+
+ map_cnt++;
+
+ /*
+ * Re-check the process rss and get the delta.
+ */
+ if (pread(psfd, &psinfo, sizeof (psinfo), 0)
+ != sizeof (psinfo)) {
+ excess -= old_rss;
+ goto done;
+ }
+
+ d_rss = (int64_t)psinfo.pr_rssize - old_rss;
+ old_rss = (int64_t)psinfo.pr_rssize;
+ sum_d_rss += d_rss;
+
+ /*
+ * d_rss hopefully should be negative (or 0 if nothing
+ * invalidated) but can be positive if more got paged in.
+ */
+ excess += d_rss;
+
+ if (excess <= 0) {
+ debug("pid %ld: (part.) nmap %d delta_rss %lldKB "
+ "excess %lldKB\n", pid, map_cnt,
+ (unsigned long long)sum_d_rss, (long long)excess);
+ map_cnt = 0;
+
+ /*
+ * If we're actually under, this will suspend checking
+ * in the middle of this process's address space.
+ */
+ excess = check_suspend();
+ if (shutting_down)
+ goto done;
+
+ /*
+ * since we might have suspended, re-read process's rss
+ */
+ if (pread(psfd, &psinfo, sizeof (psinfo), 0)
+ != sizeof (psinfo)) {
+ excess -= old_rss;
+ goto done;
+ }
+
+ old_rss = (int64_t)psinfo.pr_rssize;
+
+ debug("pid %ld: resume pageout; excess %lld\n", pid,
+ (long long)excess);
+ sum_d_rss = 0;
+ }
+
+ pmap = nextmapping(&cur);
+ }
+
+ debug("pid %ld: nmap %d delta_rss %lldKB excess %lldKB\n",
+ pid, map_cnt, (unsigned long long)sum_d_rss, (long long)excess);
+
+done:
+ if (cur.pr_mapp != NULL)
+ free(cur.pr_mapp);
+
+ (void) close(psfd);
+
+ if (shutting_down)
+ return (0);
+
+ return (excess);
+}
+
+/*
+ * Get the zone's RSS data.
+ */
+static uint64_t
+get_mem_info()
+{
+ uint64_t n = 1;
+ zsd_vmusage64_t buf;
+ uint64_t tmp_rss;
+ DIR *pdir = NULL;
+ struct dirent *dent;
+
+ /*
+ * Start by doing the fast, cheap RSS calculation using the rss value
+ * in psinfo_t. Because that's per-process, it can lead to double
+ * counting some memory and overestimating how much is being used, but
+ * as long as that's not over the cap, then we don't need do the
+ * expensive calculation.
+ *
+ * If we have to do the expensive calculation, we remember the scaling
+ * factor so that we can try to use that on subsequent iterations for
+ * the fast rss.
+ */
+ if (shutting_down)
+ return (0);
+
+ if ((pdir = opendir(zoneproc)) == NULL)
+ return (0);
+
+ accurate_rss = 0;
+ fast_rss = 0;
+ while (!shutting_down && (dent = readdir(pdir)) != NULL) {
+ pid_t pid;
+ int psfd;
+ int64_t rss;
+ char pathbuf[MAXPATHLEN];
+ psinfo_t psinfo;
+
+ if (strcmp(".", dent->d_name) == 0 ||
+ strcmp("..", dent->d_name) == 0)
+ continue;
+
+ pid = atoi(dent->d_name);
+ if (pid == 0 || pid == 1)
+ continue;
+
+ (void) snprintf(pathbuf, sizeof (pathbuf), "%s/%d/psinfo",
+ zoneproc, pid);
+
+ rss = 0;
+ if ((psfd = open(pathbuf, O_RDONLY, 0000)) != -1) {
+ if (pread(psfd, &psinfo, sizeof (psinfo), 0) ==
+ sizeof (psinfo))
+ rss = (int64_t)psinfo.pr_rssize;
+
+ (void) close(psfd);
+ }
+
+ fast_rss += rss;
+ }
+
+ (void) closedir(pdir);
+
+ if (shutting_down)
+ return (0);
+
+ debug("fast rss: %lluKB, scale: %llu, prev: %lluKB\n", fast_rss,
+ scale_rss, prev_fast_rss);
+
+ /* see if we can get by with a scaled fast rss */
+ tmp_rss = fast_rss;
+ if (scale_rss > 1 && prev_fast_rss > 0) {
+ /*
+ * Only scale the fast value if it hasn't ballooned too much
+ * to trust.
+ */
+ if (fast_rss / prev_fast_rss < 2) {
+ fast_rss /= scale_rss;
+ debug("scaled fast rss: %lluKB\n", fast_rss);
+ }
+ }
+
+ if (fast_rss <= zone_rss_cap || skip_vmusage) {
+ uint64_t zone_rss_bytes;
+
+ zone_rss_bytes = fast_rss * 1024;
+ /* Use the zone's approx. RSS in the kernel */
+ (void) zone_setattr(zid, ZONE_ATTR_RSS, &zone_rss_bytes, 0);
+ return (fast_rss);
+ }
+
+ buf.vmu_id = zid;
+
+ /* get accurate usage (cached data may be up to 5 seconds old) */
+ if (syscall(SYS_rusagesys, _RUSAGESYS_GETVMUSAGE, VMUSAGE_A_ZONE, 5,
+ (uintptr_t)&buf, (uintptr_t)&n) != 0) {
+ debug("vmusage failed\n");
+ (void) sleep_shutdown(1);
+ return (0);
+ }
+
+ if (n > 1) {
+ /* This should never happen */
+ debug("vmusage returned more than one result\n");
+ (void) sleep_shutdown(1);
+ return (0);
+ }
+
+ if (buf.vmu_id != zid) {
+ /* This should never happen */
+ debug("vmusage returned the incorrect zone\n");
+ (void) sleep_shutdown(1);
+ return (0);
+ }
+
+ accurate_rss = buf.vmu_rss_all / 1024;
+
+ /* calculate scaling factor to use for fast_rss from now on */
+ if (accurate_rss > 0) {
+ scale_rss = fast_rss / accurate_rss;
+ debug("new scaling factor: %llu\n", scale_rss);
+ /* remember the fast rss when we had to get the accurate rss */
+ prev_fast_rss = tmp_rss;
+ }
+
+ debug("accurate rss: %lluKB, scale: %llu, prev: %lluKB\n", accurate_rss,
+ scale_rss, prev_fast_rss);
+ return (accurate_rss);
+}
+
+/*
+ * Needed to read the zones physical-memory-cap rctl.
+ */
+static struct ps_prochandle *
+grab_zone_proc()
+{
+ DIR *dirp;
+ struct dirent *dentp;
+ struct ps_prochandle *ph = NULL;
+ int tmp;
+
+ if ((dirp = opendir(zoneproc)) == NULL)
+ return (NULL);
+
+ while (!shutting_down && (dentp = readdir(dirp))) {
+ int pid;
+
+ if (strcmp(".", dentp->d_name) == 0 ||
+ strcmp("..", dentp->d_name) == 0)
+ continue;
+
+ pid = atoi(dentp->d_name);
+ /* attempt to grab process */
+ if ((ph = Pgrab(pid, 0, &tmp)) != NULL) {
+ if (Psetflags(ph, PR_RLC) == 0) {
+ if (Pcreate_agent(ph) == 0) {
+ (void) closedir(dirp);
+ return (ph);
+ }
+ }
+ Prelease(ph, 0);
+ }
+ }
+
+ (void) closedir(dirp);
+ return (NULL);
+}
+
+static uint64_t
+get_zone_cap()
+{
+ rctlblk_t *rblk;
+ uint64_t mcap;
+ struct ps_prochandle *ph;
+
+ if ((rblk = (rctlblk_t *)malloc(rctlblk_size())) == NULL)
+ return (UINT64_MAX);
+
+ if ((ph = grab_zone_proc()) == NULL) {
+ free(rblk);
+ return (UINT64_MAX);
+ }
+
+ if (pr_getrctl(ph, "zone.max-physical-memory", NULL, rblk,
+ RCTL_FIRST)) {
+ Pdestroy_agent(ph);
+ Prelease(ph, 0);
+ free(rblk);
+ return (UINT64_MAX);
+ }
+
+ Pdestroy_agent(ph);
+ Prelease(ph, 0);
+
+ mcap = rctlblk_get_value(rblk);
+ free(rblk);
+ return (mcap);
+}
+
+/*
+ * check_suspend is invoked at the beginning of every pass through the process
+ * list or after we've paged out enough so that we think the excess is under
+ * the cap. The purpose is to periodically check the zone's rss and return
+ * the excess when the zone is over the cap. The rest of the time this
+ * function will sleep, periodically waking up to check the current rss.
+ *
+ * Depending on the percentage of penetration of the zone's rss into the
+ * cap we sleep for longer or shorter amounts. This reduces the impact of this
+ * work on the system, which is important considering that each zone will be
+ * monitoring its rss.
+ */
+static int64_t
+check_suspend()
+{
+ static hrtime_t last_cap_read = 0;
+ static uint64_t addon;
+ static uint64_t lo_thresh; /* Thresholds for how long to sleep */
+ static uint64_t hi_thresh; /* when under the cap (80% & 90%). */
+ static uint64_t prev_zone_rss = 0;
+ static uint32_t pfdelay = 0; /* usec page fault delay when over */
+
+ /* Wait a second to give the async pageout a chance to catch up. */
+ (void) sleep_shutdown(1);
+
+ while (!shutting_down) {
+ int64_t new_excess;
+ int sleep_time;
+ hrtime_t now;
+ struct stat st;
+ uint64_t zone_rss; /* total RSS(KB) */
+
+ /*
+ * Check if the debug log files exists and enable or disable
+ * debug.
+ */
+ if (debug_log_fp == NULL) {
+ if (stat(debug_log, &st) == 0)
+ debug_log_fp = fopen(debug_log, "w");
+ } else {
+ if (stat(debug_log, &st) == -1) {
+ (void) fclose(debug_log_fp);
+ debug_log_fp = NULL;
+ }
+ }
+
+ /*
+ * If the CAP_REFRESH interval has passed, re-get the current
+ * cap in case it has been dynamically updated.
+ */
+ now = gethrtime();
+ if (now - last_cap_read > CAP_REFRESH) {
+ uint64_t mcap;
+
+ last_cap_read = now;
+
+ mcap = get_zone_cap();
+ if (mcap != 0 && mcap != UINT64_MAX)
+ zone_rss_cap = ROUNDUP(mcap, 1024) / 1024;
+ else
+ zone_rss_cap = UINT64_MAX;
+
+ lo_thresh = (uint64_t)(zone_rss_cap * .8);
+ hi_thresh = (uint64_t)(zone_rss_cap * .9);
+ addon = (uint64_t)(zone_rss_cap * 0.05);
+
+ /*
+ * We allow the memory cap tunables to be changed on
+ * the fly.
+ */
+ get_mcap_tunables();
+
+ debug("%s: %s\n", TUNE_CMD, over_cmd);
+ debug("%s: %d\n", TUNE_NVMU, skip_vmusage);
+ debug("%s: %d\n", TUNE_NPAGE, skip_pageout);
+ debug("%s: %d\n", TUNE_NPFTHROT, skip_pf_throttle);
+ debug("current cap %lluKB lo %lluKB hi %lluKB\n",
+ zone_rss_cap, lo_thresh, hi_thresh);
+ }
+
+ /* No cap, nothing to do. */
+ if (zone_rss_cap == 0 || zone_rss_cap == UINT64_MAX) {
+ debug("no cap, sleep 120 seconds\n");
+ (void) sleep_shutdown(120);
+ continue;
+ }
+
+ zone_rss = get_mem_info();
+
+ /* calculate excess */
+ new_excess = zone_rss - zone_rss_cap;
+
+ debug("rss %lluKB, cap %lluKB, excess %lldKB\n",
+ zone_rss, zone_rss_cap, new_excess);
+
+ /*
+ * If necessary, updates stats.
+ */
+
+ /*
+ * If it looks like we did some paging out since last over the
+ * cap then update the kstat so we can approximate how much was
+ * paged out.
+ */
+ if (prev_zone_rss > zone_rss_cap && zone_rss < prev_zone_rss) {
+ uint64_t diff;
+
+ /* assume diff is num bytes we paged out */
+ diff = (prev_zone_rss - zone_rss) * 1024;
+
+ (void) zone_setattr(zid, ZONE_ATTR_PMCAP_PAGEOUT,
+ &diff, 0);
+ }
+ prev_zone_rss = zone_rss;
+
+ if (new_excess > 0) {
+ uint64_t n = 1;
+
+ /* Increment "nover" kstat. */
+ (void) zone_setattr(zid, ZONE_ATTR_PMCAP_NOVER, &n, 0);
+
+ if (!skip_pf_throttle) {
+ /*
+ * Tell the kernel to start throttling page
+ * faults by some number of usecs to help us
+ * catch up. If we are persistently over the
+ * cap the delay ramps up to a max of 2000usecs.
+ * Note that for delays less than 1 tick
+ * (i.e. all of these) we busy-wait in as_fault.
+ * delay faults/sec
+ * 125 8000
+ * 250 4000
+ * 500 2000
+ * 1000 1000
+ * 2000 500
+ */
+ if (pfdelay == 0)
+ pfdelay = 125;
+ else if (pfdelay < 2000)
+ pfdelay *= 2;
+
+ (void) zone_setattr(zid, ZONE_ATTR_PG_FLT_DELAY,
+ &pfdelay, 0);
+ }
+
+ /*
+ * Once we go over the cap, then we want to
+ * page out a little extra instead of stopping
+ * right at the cap. To do this we add 5% to
+ * the excess so that pageout_proces will work
+ * a little longer before stopping.
+ */
+ return ((int64_t)(new_excess + addon));
+ }
+
+ /*
+ * At this point we are under the cap.
+ *
+ * Tell the kernel to stop throttling page faults.
+ *
+ * Scale the amount of time we sleep before rechecking the
+ * zone's memory usage. Also, scale the accpetable age of
+ * cached results from vm_getusage. We do this based on the
+ * penetration into the capped limit.
+ */
+ if (pfdelay > 0) {
+ pfdelay = 0;
+ (void) zone_setattr(zid, ZONE_ATTR_PG_FLT_DELAY,
+ &pfdelay, 0);
+ }
+
+ if (zone_rss <= lo_thresh) {
+ sleep_time = 120;
+ } else if (zone_rss <= hi_thresh) {
+ sleep_time = 60;
+ } else {
+ sleep_time = 30;
+ }
+
+ debug("sleep %d seconds\n", sleep_time);
+ (void) sleep_shutdown(sleep_time);
+ }
+
+ /* Shutting down, tell the kernel so it doesn't throttle */
+ if (pfdelay > 0) {
+ pfdelay = 0;
+ (void) zone_setattr(zid, ZONE_ATTR_PG_FLT_DELAY, &pfdelay, 0);
+ }
+
+ return (0);
+}
+
+static void
+get_mcap_tunables()
+{
+ zone_dochandle_t handle;
+ struct zone_attrtab attr;
+
+ over_cmd[0] = '\0';
+ if ((handle = zonecfg_init_handle()) == NULL)
+ return;
+
+ if (zonecfg_get_handle(zone_name, handle) != Z_OK)
+ goto done;
+
+ /* Reset to defaults in case rebooting and settings have changed */
+ over_cmd[0] = '\0';
+ skip_vmusage = B_FALSE;
+ skip_pageout = B_FALSE;
+ skip_pf_throttle = B_FALSE;
+
+ if (zonecfg_setattrent(handle) != Z_OK)
+ goto done;
+ while (zonecfg_getattrent(handle, &attr) == Z_OK) {
+ if (strcmp(TUNE_CMD, attr.zone_attr_name) == 0) {
+ (void) strlcpy(over_cmd, attr.zone_attr_value,
+ sizeof (over_cmd));
+ } else if (strcmp(TUNE_NVMU, attr.zone_attr_name) == 0) {
+ if (strcmp("true", attr.zone_attr_value) == 0)
+ skip_vmusage = B_TRUE;
+ } else if (strcmp(TUNE_NPAGE, attr.zone_attr_name) == 0) {
+ if (strcmp("true", attr.zone_attr_value) == 0)
+ skip_pageout = B_TRUE;
+ } else if (strcmp(TUNE_NPFTHROT, attr.zone_attr_name) == 0) {
+ if (strcmp("true", attr.zone_attr_value) == 0)
+ skip_pf_throttle = B_TRUE;
+ }
+ }
+ (void) zonecfg_endattrent(handle);
+
+done:
+ zonecfg_fini_handle(handle);
+}
+
+/* ARGSUSED */
+static int
+chk_proc_fs(void *data, const char *spec, const char *dir,
+ const char *fstype, const char *opt)
+{
+ if (fstype != NULL && strcmp(fstype, "proc") == 0)
+ *((boolean_t *)data) = B_TRUE;
+
+ return (0);
+}
+
+static boolean_t
+has_proc()
+{
+ brand_handle_t bh;
+ boolean_t fnd = B_FALSE;
+
+ if ((bh = brand_open(brand_name)) != NULL) {
+ (void) brand_platform_iter_mounts(bh, chk_proc_fs, &fnd);
+ }
+
+ brand_close(bh);
+ return (fnd);
+}
+
+/*
+ * We run this loop for brands with no /proc to simply update the RSS, using
+ * the cheap GZ /proc data, every 5 minutes.
+ */
+static void
+no_procfs()
+{
+ DIR *pdir = NULL;
+ struct dirent *dent;
+ uint64_t zone_rss_bytes;
+
+ (void) sleep_shutdown(30);
+ while (!shutting_down) {
+ /*
+ * Just do the fast, cheap RSS calculation using the rss value
+ * in psinfo_t. Because that's per-process, it can lead to
+ * double counting some memory and overestimating how much is
+ * being used. Since there is no /proc in the zone, we use the
+ * GZ /proc and check for the correct zone.
+ */
+ if ((pdir = opendir("/proc")) == NULL)
+ return;
+
+ fast_rss = 0;
+ while (!shutting_down && (dent = readdir(pdir)) != NULL) {
+ pid_t pid;
+ int psfd;
+ int64_t rss;
+ char pathbuf[MAXPATHLEN];
+ psinfo_t psinfo;
+
+ if (strcmp(".", dent->d_name) == 0 ||
+ strcmp("..", dent->d_name) == 0)
+ continue;
+
+ pid = atoi(dent->d_name);
+ if (pid == 0 || pid == 1)
+ continue;
+
+ (void) snprintf(pathbuf, sizeof (pathbuf),
+ "/proc/%d/psinfo", pid);
+
+ rss = 0;
+ if ((psfd = open(pathbuf, O_RDONLY, 0000)) != -1) {
+ if (pread(psfd, &psinfo, sizeof (psinfo), 0) ==
+ sizeof (psinfo)) {
+ if (psinfo.pr_zoneid == zid)
+ rss = (int64_t)psinfo.pr_rssize;
+ }
+
+ (void) close(psfd);
+ }
+
+ fast_rss += rss;
+ }
+
+ (void) closedir(pdir);
+
+ if (shutting_down)
+ return;
+
+ zone_rss_bytes = fast_rss * 1024;
+ /* Use the zone's approx. RSS in the kernel */
+ (void) zone_setattr(zid, ZONE_ATTR_RSS, &zone_rss_bytes, 0);
+
+ (void) sleep_shutdown(300);
+ }
+}
+
+/*
+ * Thread that checks zone's memory usage and when over the cap, goes through
+ * the zone's process list trying to pageout processes to get under the cap.
+ */
+static void
+mcap_zone()
+{
+ DIR *pdir = NULL;
+ int64_t excess;
+
+ debug("thread startup\n");
+
+ get_mcap_tunables();
+
+ /*
+ * If the zone has no /proc filesystem, we can't use the fast algorithm
+ * to check RSS or pageout any processes. All we can do is periodically
+ * update it's RSS kstat using the expensive sycall.
+ */
+ if (!has_proc()) {
+ no_procfs();
+ debug("thread shutdown\n");
+ return;
+ }
+
+ /*
+ * When first starting it is likely lots of other zones are starting
+ * too because the system is booting. Since we just started the zone
+ * we're not worried about being over the cap right away, so we let
+ * things settle a bit and tolerate some older data here to minimize
+ * the load on the system.
+ */
+ (void) sleep_shutdown(15); /* wait 15 secs. so the zone can get going */
+
+ /* Wait until zone's /proc is mounted */
+ while (!shutting_down) {
+ struct stat st;
+
+ if (stat(zoneproc, &st) == 0 &&
+ strcmp(st.st_fstype, "proc") == 0)
+ break;
+ sleep_shutdown(5);
+ }
+
+ /* Open zone's /proc and walk entries. */
+ while (!shutting_down) {
+ if ((pdir = opendir(zoneproc)) != NULL)
+ break;
+ sleep_shutdown(5);
+ }
+
+ while (!shutting_down) {
+ struct dirent *dirent;
+
+ /* Wait until we've gone over the cap. */
+ excess = check_suspend();
+
+ debug("starting to scan, excess %lldk\n", (long long)excess);
+
+ if (over_cmd[0] != '\0') {
+ uint64_t zone_rss; /* total RSS(KB) */
+
+ debug("run phys_mcap_cmd: %s\n", over_cmd);
+ run_over_cmd();
+
+ zone_rss = get_mem_info();
+ excess = zone_rss - zone_rss_cap;
+ debug("rss %lluKB, cap %lluKB, excess %lldKB\n",
+ zone_rss, zone_rss_cap, excess);
+ if (excess <= 0)
+ continue;
+ }
+
+ while (!shutting_down && (dirent = readdir(pdir)) != NULL) {
+ pid_t pid;
+
+ if (strcmp(".", dirent->d_name) == 0 ||
+ strcmp("..", dirent->d_name) == 0)
+ continue;
+
+ pid = atoi(dirent->d_name);
+ if (pid == 0 || pid == 1)
+ continue;
+
+ if (skip_pageout)
+ (void) sleep_shutdown(2);
+ else
+ excess = pageout_process(pid, excess);
+
+ if (excess <= 0) {
+ debug("apparently under; excess %lld\n",
+ (long long)excess);
+ /* Double check the current excess */
+ excess = check_suspend();
+ }
+ }
+
+ debug("process pass done; excess %lld\n", (long long)excess);
+ rewinddir(pdir);
+
+ if (skip_pageout)
+ (void) sleep_shutdown(120);
+ }
+
+ if (pdir != NULL)
+ (void) closedir(pdir);
+ debug("thread shutdown\n");
+}
+
+void
+create_mcap_thread(zlog_t *zlogp, zoneid_t id)
+{
+ int res;
+
+ shutting_down = 0;
+ zid = id;
+ logp = zlogp;
+
+ /* all but the lx brand currently use /proc */
+ if (strcmp(brand_name, "lx") == 0) {
+ (void) snprintf(zoneproc, sizeof (zoneproc),
+ "%s/root/native/proc", zonepath);
+ } else {
+ (void) snprintf(zoneproc, sizeof (zoneproc), "%s/root/proc",
+ zonepath);
+ }
+
+ (void) snprintf(debug_log, sizeof (debug_log), "%s/mcap_debug.log",
+ zonepath);
+
+ res = thr_create(NULL, NULL, (void *(*)(void *))mcap_zone, NULL, NULL,
+ &mcap_tid);
+ if (res != 0) {
+ zerror(zlogp, B_FALSE, "error %d creating memory cap thread",
+ res);
+ mcap_tid = 0;
+ }
+}
+
+void
+destroy_mcap_thread()
+{
+ if (mcap_tid != 0) {
+ shutting_down = 1;
+ (void) cond_signal(&shutdown_cv);
+ (void) thr_join(mcap_tid, NULL, NULL);
+ mcap_tid = 0;
+ }
+}
diff --git a/usr/src/cmd/zoneadmd/vplat.c b/usr/src/cmd/zoneadmd/vplat.c
index 9d32485fad..cdfa7a8785 100644
--- a/usr/src/cmd/zoneadmd/vplat.c
+++ b/usr/src/cmd/zoneadmd/vplat.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, Joyent Inc. All rights reserved.
+ * Copyright 2016, Joyent Inc.
* Copyright (c) 2015 by Delphix. All rights reserved.
*/
@@ -138,6 +138,9 @@
#define ALT_MOUNT(mount_cmd) ((mount_cmd) != Z_MNT_BOOT)
+/* Number of times to retry unmounting if it fails */
+#define UMOUNT_RETRIES 30
+
/* a reasonable estimate for the number of lwps per process */
#define LWPS_PER_PROCESS 10
@@ -161,11 +164,25 @@ static priv_set_t *zprivs = NULL;
static const char *DFLT_FS_ALLOWED = "hsfs,smbfs,nfs,nfs3,nfs4,nfsdyn";
+typedef struct zone_proj_rctl_map {
+ char *zpr_zone_rctl;
+ char *zpr_project_rctl;
+} zone_proj_rctl_map_t;
+
+static zone_proj_rctl_map_t zone_proj_rctl_map[] = {
+ {"zone.max-msg-ids", "project.max-msg-ids"},
+ {"zone.max-sem-ids", "project.max-sem-ids"},
+ {"zone.max-shm-ids", "project.max-shm-ids"},
+ {"zone.max-shm-memory", "project.max-shm-memory"},
+ {NULL, NULL}
+};
+
/* from libsocket, not in any header file */
extern int getnetmaskbyaddr(struct in_addr, struct in_addr *);
/* from zoneadmd */
extern char query_hook[];
+extern char post_statechg_hook[];
/*
* For each "net" resource configured in zonecfg, we track a zone_addr_list_t
@@ -202,7 +219,7 @@ autofs_cleanup(zoneid_t zoneid)
/*
* Ask autofs to unmount all trigger nodes in the given zone.
*/
- return (_autofssys(AUTOFS_UNMOUNTALL, (void *)zoneid));
+ return (_autofssys(AUTOFS_UNMOUNTALL, (void *)((uintptr_t)zoneid)));
}
static void
@@ -593,6 +610,24 @@ root_to_lu(zlog_t *zlogp, char *zroot, size_t zrootlen, boolean_t isresolved)
}
/*
+ * Perform brand-specific cleanup if we are unable to unmount a FS.
+ */
+static void
+brand_umount_cleanup(zlog_t *zlogp, char *path)
+{
+ char cmdbuf[2 * MAXPATHLEN];
+
+ if (post_statechg_hook[0] == '\0')
+ return;
+
+ if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook,
+ ZONE_STATE_DOWN, Z_UNMOUNT, path) > sizeof (cmdbuf))
+ return;
+
+ (void) do_subproc(zlogp, cmdbuf, NULL, B_FALSE);
+}
+
+/*
* The general strategy for unmounting filesystems is as follows:
*
* - Remote filesystems may be dead, and attempting to contact them as
@@ -625,6 +660,7 @@ static int
unmount_filesystems(zlog_t *zlogp, zoneid_t zoneid, boolean_t unmount_cmd)
{
int error = 0;
+ int fail = 0;
FILE *mnttab;
struct mnttab *mnts;
uint_t nmnt;
@@ -712,18 +748,39 @@ unmount_filesystems(zlog_t *zlogp, zoneid_t zoneid, boolean_t unmount_cmd)
if (umount2(path, MS_FORCE) == 0) {
unmounted = B_TRUE;
stuck = B_FALSE;
+ fail = 0;
} else {
/*
- * The first failure indicates a
- * mount we won't be able to get
- * rid of automatically, so we
- * bail.
+ * We may hit a failure here if there
+ * is an app in the GZ with an open
+ * pipe into the zone (commonly into
+ * the zone's /var/run). This type
+ * of app will notice the closed
+ * connection and cleanup, but it may
+ * take a while and we have no easy
+ * way to notice that. To deal with
+ * this case, we will wait and retry
+ * a few times before we give up.
*/
- error++;
- zerror(zlogp, B_FALSE,
- "unable to unmount '%s'", path);
- free_mnttable(mnts, nmnt);
- goto out;
+ fail++;
+ if (fail < (UMOUNT_RETRIES - 1)) {
+ zerror(zlogp, B_FALSE,
+ "unable to unmount '%s', "
+ "retrying in 2 seconds",
+ path);
+ (void) sleep(2);
+ } else if (fail > UMOUNT_RETRIES) {
+ error++;
+ zerror(zlogp, B_FALSE,
+ "unmount of '%s' failed",
+ path);
+ free_mnttable(mnts, nmnt);
+ goto out;
+ } else {
+ /* Try the hook 2 times */
+ brand_umount_cleanup(zlogp,
+ path);
+ }
}
}
/*
@@ -1061,23 +1118,10 @@ mount_one_dev_symlink_cb(void *arg, const char *source, const char *target)
int
vplat_get_iptype(zlog_t *zlogp, zone_iptype_t *iptypep)
{
- zone_dochandle_t handle;
-
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- return (-1);
- }
- if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
- zerror(zlogp, B_FALSE, "invalid configuration");
- zonecfg_fini_handle(handle);
- return (-1);
- }
- if (zonecfg_get_iptype(handle, iptypep) != Z_OK) {
+ if (zonecfg_get_iptype(snap_hndl, iptypep) != Z_OK) {
zerror(zlogp, B_FALSE, "invalid ip-type configuration");
- zonecfg_fini_handle(handle);
return (-1);
}
- zonecfg_fini_handle(handle);
return (0);
}
@@ -1090,14 +1134,13 @@ static int
mount_one_dev(zlog_t *zlogp, char *devpath, zone_mnt_t mount_cmd)
{
char brand[MAXNAMELEN];
- zone_dochandle_t handle = NULL;
brand_handle_t bh = NULL;
struct zone_devtab ztab;
di_prof_t prof = NULL;
int err;
int retval = -1;
zone_iptype_t iptype;
- const char *curr_iptype;
+ const char *curr_iptype = NULL;
if (di_prof_init(devpath, &prof)) {
zerror(zlogp, B_TRUE, "failed to initialize profile");
@@ -1132,6 +1175,8 @@ mount_one_dev(zlog_t *zlogp, char *devpath, zone_mnt_t mount_cmd)
curr_iptype = "exclusive";
break;
}
+ if (curr_iptype == NULL)
+ abort();
if (brand_platform_iter_devices(bh, zone_name,
mount_one_dev_device_cb, prof, curr_iptype) != 0) {
@@ -1146,28 +1191,19 @@ mount_one_dev(zlog_t *zlogp, char *devpath, zone_mnt_t mount_cmd)
}
/* Add user-specified devices and directories */
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_FALSE, "can't initialize zone handle");
- goto cleanup;
- }
- if (err = zonecfg_get_handle(zone_name, handle)) {
- zerror(zlogp, B_FALSE, "can't get handle for zone "
- "%s: %s", zone_name, zonecfg_strerror(err));
- goto cleanup;
- }
- if (err = zonecfg_setdevent(handle)) {
+ if ((err = zonecfg_setdevent(snap_hndl)) != 0) {
zerror(zlogp, B_FALSE, "%s: %s", zone_name,
zonecfg_strerror(err));
goto cleanup;
}
- while (zonecfg_getdevent(handle, &ztab) == Z_OK) {
+ while (zonecfg_getdevent(snap_hndl, &ztab) == Z_OK) {
if (di_prof_add_dev(prof, ztab.zone_dev_match)) {
zerror(zlogp, B_TRUE, "failed to add "
"user-specified device");
goto cleanup;
}
}
- (void) zonecfg_enddevent(handle);
+ (void) zonecfg_enddevent(snap_hndl);
/* Send profile to kernel */
if (di_prof_commit(prof)) {
@@ -1180,8 +1216,6 @@ mount_one_dev(zlog_t *zlogp, char *devpath, zone_mnt_t mount_cmd)
cleanup:
if (bh != NULL)
brand_close(bh);
- if (handle != NULL)
- zonecfg_fini_handle(handle);
if (prof)
di_prof_fini(prof);
return (retval);
@@ -1671,12 +1705,10 @@ static int
mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd)
{
char rootpath[MAXPATHLEN];
- char zonepath[MAXPATHLEN];
char brand[MAXNAMELEN];
char luroot[MAXPATHLEN];
int i, num_fs = 0;
struct zone_fstab *fs_ptr = NULL;
- zone_dochandle_t handle = NULL;
zone_state_t zstate;
brand_handle_t bh;
plat_gmount_cb_data_t cb;
@@ -1690,22 +1722,12 @@ mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd)
goto bad;
}
- if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
- zerror(zlogp, B_TRUE, "unable to determine zone path");
- goto bad;
- }
-
if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) {
zerror(zlogp, B_TRUE, "unable to determine zone root");
goto bad;
}
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- goto bad;
- }
- if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK ||
- zonecfg_setfsent(handle) != Z_OK) {
+ if (zonecfg_setfsent(snap_hndl) != Z_OK) {
zerror(zlogp, B_FALSE, "invalid configuration");
goto bad;
}
@@ -1723,7 +1745,6 @@ mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd)
/* Get a handle to the brand info for this zone */
if ((bh = brand_open(brand)) == NULL) {
zerror(zlogp, B_FALSE, "unable to determine zone brand");
- zonecfg_fini_handle(handle);
return (-1);
}
@@ -1734,11 +1755,10 @@ mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd)
cb.pgcd_zlogp = zlogp;
cb.pgcd_fs_tab = &fs_ptr;
cb.pgcd_num_fs = &num_fs;
- if (brand_platform_iter_gmounts(bh, zonepath,
+ if (brand_platform_iter_gmounts(bh, zone_name, zonepath,
plat_gmount_cb, &cb) != 0) {
zerror(zlogp, B_FALSE, "unable to mount filesystems");
brand_close(bh);
- zonecfg_fini_handle(handle);
return (-1);
}
brand_close(bh);
@@ -1749,13 +1769,10 @@ mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd)
* higher level directories (e.g., /usr) get mounted before
* any beneath them (e.g., /usr/local).
*/
- if (mount_filesystems_fsent(handle, zlogp, &fs_ptr, &num_fs,
+ if (mount_filesystems_fsent(snap_hndl, zlogp, &fs_ptr, &num_fs,
mount_cmd) != 0)
goto bad;
- zonecfg_fini_handle(handle);
- handle = NULL;
-
/*
* Normally when we mount a zone all the zone filesystems
* get mounted relative to rootpath, which is usually
@@ -1834,8 +1851,6 @@ mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd)
return (0);
bad:
- if (handle != NULL)
- zonecfg_fini_handle(handle);
free_fs_data(fs_ptr, num_fs);
return (-1);
}
@@ -2191,13 +2206,7 @@ configure_one_interface(zlog_t *zlogp, zoneid_t zone_id,
if (ioctl(s, SIOCLIFADDIF, (caddr_t)&lifr) < 0) {
/*
* Here, we know that the interface can't be brought up.
- * A similar warning message was already printed out to
- * the console by zoneadm(1M) so instead we log the
- * message to syslog and continue.
*/
- zerror(&logsys, B_TRUE, "WARNING: skipping network interface "
- "'%s' which may not be present/plumbed in the "
- "global zone.", lifr.lifr_name);
(void) close(s);
return (Z_OK);
}
@@ -2410,7 +2419,6 @@ bad:
static int
configure_shared_network_interfaces(zlog_t *zlogp)
{
- zone_dochandle_t handle;
struct zone_nwiftab nwiftab, loopback_iftab;
zoneid_t zoneid;
@@ -2419,29 +2427,19 @@ configure_shared_network_interfaces(zlog_t *zlogp)
return (-1);
}
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- return (-1);
- }
- if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
- zerror(zlogp, B_FALSE, "invalid configuration");
- zonecfg_fini_handle(handle);
- return (-1);
- }
- if (zonecfg_setnwifent(handle) == Z_OK) {
+ if (zonecfg_setnwifent(snap_hndl) == Z_OK) {
for (;;) {
- if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK)
+ if (zonecfg_getnwifent(snap_hndl, &nwiftab) != Z_OK)
break;
+ nwifent_free_attrs(&nwiftab);
if (configure_one_interface(zlogp, zoneid, &nwiftab) !=
Z_OK) {
- (void) zonecfg_endnwifent(handle);
- zonecfg_fini_handle(handle);
+ (void) zonecfg_endnwifent(snap_hndl);
return (-1);
}
}
- (void) zonecfg_endnwifent(handle);
+ (void) zonecfg_endnwifent(snap_hndl);
}
- zonecfg_fini_handle(handle);
if (is_system_labeled()) {
/*
* Labeled zones share the loopback interface
@@ -2895,7 +2893,6 @@ free_ip_interface(zone_addr_list_t *zalist)
static int
configure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid)
{
- zone_dochandle_t handle;
struct zone_nwiftab nwiftab;
char rootpath[MAXPATHLEN];
char path[MAXPATHLEN];
@@ -2904,30 +2901,18 @@ configure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid)
boolean_t added = B_FALSE;
zone_addr_list_t *zalist = NULL, *new;
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- return (-1);
- }
- if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
- zerror(zlogp, B_FALSE, "invalid configuration");
- zonecfg_fini_handle(handle);
- return (-1);
- }
-
- if (zonecfg_setnwifent(handle) != Z_OK) {
- zonecfg_fini_handle(handle);
+ if (zonecfg_setnwifent(snap_hndl) != Z_OK)
return (0);
- }
for (;;) {
- if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK)
+ if (zonecfg_getnwifent(snap_hndl, &nwiftab) != Z_OK)
break;
+ nwifent_free_attrs(&nwiftab);
if (prof == NULL) {
if (zone_get_devroot(zone_name, rootpath,
sizeof (rootpath)) != Z_OK) {
- (void) zonecfg_endnwifent(handle);
- zonecfg_fini_handle(handle);
+ (void) zonecfg_endnwifent(snap_hndl);
zerror(zlogp, B_TRUE,
"unable to determine dev root");
return (-1);
@@ -2935,8 +2920,7 @@ configure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid)
(void) snprintf(path, sizeof (path), "%s%s", rootpath,
"/dev");
if (di_prof_init(path, &prof) != 0) {
- (void) zonecfg_endnwifent(handle);
- zonecfg_fini_handle(handle);
+ (void) zonecfg_endnwifent(snap_hndl);
zerror(zlogp, B_TRUE,
"failed to initialize profile");
return (-1);
@@ -2960,17 +2944,17 @@ configure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid)
nwiftab.zone_nwif_physical) == 0) {
added = B_TRUE;
} else {
- (void) zonecfg_endnwifent(handle);
- zonecfg_fini_handle(handle);
- zerror(zlogp, B_TRUE, "failed to add network device");
- return (-1);
+ /*
+ * Failed to add network device, but the brand hook
+ * might be doing this for us, so keep silent.
+ */
+ continue;
}
/* set up the new IP interface, and add them all later */
new = malloc(sizeof (*new));
if (new == NULL) {
zerror(zlogp, B_TRUE, "no memory for %s",
nwiftab.zone_nwif_physical);
- zonecfg_fini_handle(handle);
free_ip_interface(zalist);
}
bzero(new, sizeof (*new));
@@ -2980,16 +2964,14 @@ configure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid)
}
if (zalist != NULL) {
if ((errno = add_net(zlogp, zoneid, zalist)) != 0) {
- (void) zonecfg_endnwifent(handle);
- zonecfg_fini_handle(handle);
+ (void) zonecfg_endnwifent(snap_hndl);
zerror(zlogp, B_TRUE, "failed to add address");
free_ip_interface(zalist);
return (-1);
}
free_ip_interface(zalist);
}
- (void) zonecfg_endnwifent(handle);
- zonecfg_fini_handle(handle);
+ (void) zonecfg_endnwifent(snap_hndl);
if (prof != NULL && added) {
if (di_prof_commit(prof) != 0) {
@@ -3125,48 +3107,23 @@ remove_datalink_protect(zlog_t *zlogp, zoneid_t zoneid)
/* datalink does not belong to the GZ */
continue;
}
- if (dlstatus != DLADM_STATUS_OK) {
+ if (dlstatus != DLADM_STATUS_OK)
zerror(zlogp, B_FALSE,
+ "clear 'protection' link property: %s",
dladm_status2str(dlstatus, dlerr));
- free(dllinks);
- return (-1);
- }
+
dlstatus = dladm_set_linkprop(dld_handle, *dllink,
"allowed-ips", NULL, 0, DLADM_OPT_ACTIVE);
- if (dlstatus != DLADM_STATUS_OK) {
+ if (dlstatus != DLADM_STATUS_OK)
zerror(zlogp, B_FALSE,
+ "clear 'allowed-ips' link property: %s",
dladm_status2str(dlstatus, dlerr));
- free(dllinks);
- return (-1);
- }
}
free(dllinks);
return (0);
}
static int
-unconfigure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid)
-{
- int dlnum = 0;
-
- /*
- * The kernel shutdown callback for the dls module should have removed
- * all datalinks from this zone. If any remain, then there's a
- * problem.
- */
- if (zone_list_datalink(zoneid, &dlnum, NULL) != 0) {
- zerror(zlogp, B_TRUE, "unable to list network interfaces");
- return (-1);
- }
- if (dlnum != 0) {
- zerror(zlogp, B_FALSE,
- "datalinks remain in zone after shutdown");
- return (-1);
- }
- return (0);
-}
-
-static int
tcp_abort_conn(zlog_t *zlogp, zoneid_t zoneid,
const struct sockaddr_storage *local, const struct sockaddr_storage *remote)
{
@@ -3248,26 +3205,14 @@ static int
get_privset(zlog_t *zlogp, priv_set_t *privs, zone_mnt_t mount_cmd)
{
int error = -1;
- zone_dochandle_t handle;
char *privname = NULL;
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- return (-1);
- }
- if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
- zerror(zlogp, B_FALSE, "invalid configuration");
- zonecfg_fini_handle(handle);
- return (-1);
- }
-
if (ALT_MOUNT(mount_cmd)) {
zone_iptype_t iptype;
- const char *curr_iptype;
+ const char *curr_iptype = NULL;
- if (zonecfg_get_iptype(handle, &iptype) != Z_OK) {
+ if (zonecfg_get_iptype(snap_hndl, &iptype) != Z_OK) {
zerror(zlogp, B_TRUE, "unable to determine ip-type");
- zonecfg_fini_handle(handle);
return (-1);
}
@@ -3280,17 +3225,15 @@ get_privset(zlog_t *zlogp, priv_set_t *privs, zone_mnt_t mount_cmd)
break;
}
- if (zonecfg_default_privset(privs, curr_iptype) == Z_OK) {
- zonecfg_fini_handle(handle);
+ if (zonecfg_default_privset(privs, curr_iptype) == Z_OK)
return (0);
- }
+
zerror(zlogp, B_FALSE,
"failed to determine the zone's default privilege set");
- zonecfg_fini_handle(handle);
return (-1);
}
- switch (zonecfg_get_privset(handle, privs, &privname)) {
+ switch (zonecfg_get_privset(snap_hndl, privs, &privname)) {
case Z_OK:
error = 0;
break;
@@ -3313,10 +3256,22 @@ get_privset(zlog_t *zlogp, priv_set_t *privs, zone_mnt_t mount_cmd)
}
free(privname);
- zonecfg_fini_handle(handle);
return (error);
}
+static char *
+zone_proj_rctl(const char *name)
+{
+ int i;
+
+ for (i = 0; zone_proj_rctl_map[i].zpr_zone_rctl != NULL; i++) {
+ if (strcmp(name, zone_proj_rctl_map[i].zpr_zone_rctl) == 0) {
+ return (zone_proj_rctl_map[i].zpr_project_rctl);
+ }
+ }
+ return (NULL);
+}
+
static int
get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
{
@@ -3326,25 +3281,15 @@ get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
nvlist_t **nvlv = NULL;
int rctlcount = 0;
int error = -1;
- zone_dochandle_t handle;
struct zone_rctltab rctltab;
rctlblk_t *rctlblk = NULL;
uint64_t maxlwps;
uint64_t maxprocs;
+ int rproc, rlwp;
*bufp = NULL;
*bufsizep = 0;
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- return (-1);
- }
- if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
- zerror(zlogp, B_FALSE, "invalid configuration");
- zonecfg_fini_handle(handle);
- return (-1);
- }
-
rctltab.zone_rctl_valptr = NULL;
if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) {
zerror(zlogp, B_TRUE, "%s failed", "nvlist_alloc");
@@ -3353,22 +3298,31 @@ get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
/*
* Allow the administrator to control both the maximum number of
- * process table slots and the maximum number of lwps with just the
- * max-processes property. If only the max-processes property is set,
- * we add a max-lwps property with a limit derived from max-processes.
+ * process table slots, and the maximum number of lwps, with a single
+ * max-processes or max-lwps property. If only the max-processes
+ * property is set, we add a max-lwps property with a limit derived
+ * from max-processes. If only the max-lwps property is set, we add a
+ * max-processes property with the same limit as max-lwps.
*/
- if (zonecfg_get_aliased_rctl(handle, ALIAS_MAXPROCS, &maxprocs)
- == Z_OK &&
- zonecfg_get_aliased_rctl(handle, ALIAS_MAXLWPS, &maxlwps)
- == Z_NO_ENTRY) {
- if (zonecfg_set_aliased_rctl(handle, ALIAS_MAXLWPS,
+ rproc = zonecfg_get_aliased_rctl(snap_hndl, ALIAS_MAXPROCS, &maxprocs);
+ rlwp = zonecfg_get_aliased_rctl(snap_hndl, ALIAS_MAXLWPS, &maxlwps);
+ if (rproc == Z_OK && rlwp == Z_NO_ENTRY) {
+ if (zonecfg_set_aliased_rctl(snap_hndl, ALIAS_MAXLWPS,
maxprocs * LWPS_PER_PROCESS) != Z_OK) {
zerror(zlogp, B_FALSE, "unable to set max-lwps alias");
goto out;
}
+ } else if (rlwp == Z_OK && rproc == Z_NO_ENTRY) {
+ /* no scaling for max-proc value */
+ if (zonecfg_set_aliased_rctl(snap_hndl, ALIAS_MAXPROCS,
+ maxlwps) != Z_OK) {
+ zerror(zlogp, B_FALSE,
+ "unable to set max-processes alias");
+ goto out;
+ }
}
- if (zonecfg_setrctlent(handle) != Z_OK) {
+ if (zonecfg_setrctlent(snap_hndl) != Z_OK) {
zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setrctlent");
goto out;
}
@@ -3377,10 +3331,11 @@ get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
zerror(zlogp, B_TRUE, "memory allocation failed");
goto out;
}
- while (zonecfg_getrctlent(handle, &rctltab) == Z_OK) {
+ while (zonecfg_getrctlent(snap_hndl, &rctltab) == Z_OK) {
struct zone_rctlvaltab *rctlval;
uint_t i, count;
const char *name = rctltab.zone_rctl_name;
+ char *proj_nm;
/* zoneadm should have already warned about unknown rctls. */
if (!zonecfg_is_rctl(name)) {
@@ -3447,6 +3402,26 @@ get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
}
zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
rctltab.zone_rctl_valptr = NULL;
+
+ /*
+ * With no action on our part we will start zsched with the
+ * project rctl values for our (zoneadmd) current project. For
+ * brands running a variant of Illumos, that's not a problem
+ * since they will setup their own projects, but for a
+ * non-native brand like lx, where there are no projects, we
+ * want to start things up with the same project rctls as the
+ * corresponding zone rctls, since nothing within the zone will
+ * ever change the project rctls.
+ */
+ if ((proj_nm = zone_proj_rctl(name)) != NULL) {
+ if (nvlist_add_nvlist_array(nvl, proj_nm, nvlv, count)
+ != 0) {
+ zerror(zlogp, B_FALSE,
+ "nvlist_add_nvlist_arrays failed");
+ goto out;
+ }
+ }
+
if (nvlist_add_nvlist_array(nvl, (char *)name, nvlv, count)
!= 0) {
zerror(zlogp, B_FALSE, "%s failed",
@@ -3459,7 +3434,7 @@ get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
nvlv = NULL;
rctlcount++;
}
- (void) zonecfg_endrctlent(handle);
+ (void) zonecfg_endrctlent(snap_hndl);
if (rctlcount == 0) {
error = 0;
@@ -3483,8 +3458,6 @@ out:
nvlist_free(nvl);
if (nvlv != NULL)
free(nvlv);
- if (handle != NULL)
- zonecfg_fini_handle(handle);
return (error);
}
@@ -3500,7 +3473,7 @@ get_implicit_datasets(zlog_t *zlogp, char **retstr)
> sizeof (cmdbuf))
return (-1);
- if (do_subproc(zlogp, cmdbuf, retstr) != 0)
+ if (do_subproc(zlogp, cmdbuf, retstr, B_FALSE) != 0)
return (-1);
return (0);
@@ -3509,7 +3482,6 @@ get_implicit_datasets(zlog_t *zlogp, char **retstr)
static int
get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep)
{
- zone_dochandle_t handle;
struct zone_dstab dstab;
size_t total, offset, len;
int error = -1;
@@ -3520,30 +3492,20 @@ get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep)
*bufp = NULL;
*bufsizep = 0;
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- return (-1);
- }
- if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
- zerror(zlogp, B_FALSE, "invalid configuration");
- zonecfg_fini_handle(handle);
- return (-1);
- }
-
if (get_implicit_datasets(zlogp, &implicit_datasets) != 0) {
zerror(zlogp, B_FALSE, "getting implicit datasets failed");
goto out;
}
- if (zonecfg_setdsent(handle) != Z_OK) {
+ if (zonecfg_setdsent(snap_hndl) != Z_OK) {
zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent");
goto out;
}
total = 0;
- while (zonecfg_getdsent(handle, &dstab) == Z_OK)
+ while (zonecfg_getdsent(snap_hndl, &dstab) == Z_OK)
total += strlen(dstab.zone_dataset_name) + 1;
- (void) zonecfg_enddsent(handle);
+ (void) zonecfg_enddsent(snap_hndl);
if (implicit_datasets != NULL)
implicit_len = strlen(implicit_datasets);
@@ -3560,12 +3522,12 @@ get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep)
goto out;
}
- if (zonecfg_setdsent(handle) != Z_OK) {
+ if (zonecfg_setdsent(snap_hndl) != Z_OK) {
zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent");
goto out;
}
offset = 0;
- while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
+ while (zonecfg_getdsent(snap_hndl, &dstab) == Z_OK) {
len = strlen(dstab.zone_dataset_name);
(void) strlcpy(str + offset, dstab.zone_dataset_name,
total - offset);
@@ -3573,7 +3535,7 @@ get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep)
if (offset < total - 1)
str[offset++] = ',';
}
- (void) zonecfg_enddsent(handle);
+ (void) zonecfg_enddsent(snap_hndl);
if (implicit_len > 0)
(void) strlcpy(str + offset, implicit_datasets, total - offset);
@@ -3585,8 +3547,6 @@ get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep)
out:
if (error != 0 && str != NULL)
free(str);
- if (handle != NULL)
- zonecfg_fini_handle(handle);
if (implicit_datasets != NULL)
free(implicit_datasets);
@@ -3596,40 +3556,26 @@ out:
static int
validate_datasets(zlog_t *zlogp)
{
- zone_dochandle_t handle;
struct zone_dstab dstab;
zfs_handle_t *zhp;
libzfs_handle_t *hdl;
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- return (-1);
- }
- if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
- zerror(zlogp, B_FALSE, "invalid configuration");
- zonecfg_fini_handle(handle);
- return (-1);
- }
-
- if (zonecfg_setdsent(handle) != Z_OK) {
+ if (zonecfg_setdsent(snap_hndl) != Z_OK) {
zerror(zlogp, B_FALSE, "invalid configuration");
- zonecfg_fini_handle(handle);
return (-1);
}
if ((hdl = libzfs_init()) == NULL) {
zerror(zlogp, B_FALSE, "opening ZFS library");
- zonecfg_fini_handle(handle);
return (-1);
}
- while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
+ while (zonecfg_getdsent(snap_hndl, &dstab) == Z_OK) {
if ((zhp = zfs_open(hdl, dstab.zone_dataset_name,
ZFS_TYPE_FILESYSTEM)) == NULL) {
zerror(zlogp, B_FALSE, "cannot open ZFS dataset '%s'",
dstab.zone_dataset_name);
- zonecfg_fini_handle(handle);
libzfs_fini(hdl);
return (-1);
}
@@ -3644,7 +3590,6 @@ validate_datasets(zlog_t *zlogp)
zerror(zlogp, B_FALSE, "cannot set 'zoned' "
"property for ZFS dataset '%s'\n",
dstab.zone_dataset_name);
- zonecfg_fini_handle(handle);
zfs_close(zhp);
libzfs_fini(hdl);
return (-1);
@@ -3652,9 +3597,8 @@ validate_datasets(zlog_t *zlogp)
zfs_close(zhp);
}
- (void) zonecfg_enddsent(handle);
+ (void) zonecfg_enddsent(snap_hndl);
- zonecfg_fini_handle(handle);
libzfs_fini(hdl);
return (0);
@@ -3708,17 +3652,11 @@ validate_rootds_label(zlog_t *zlogp, char *rootpath, m_label_t *zone_sl)
zfs_handle_t *zhp;
libzfs_handle_t *hdl;
m_label_t ds_sl;
- char zonepath[MAXPATHLEN];
char ds_hexsl[MAXNAMELEN];
if (!is_system_labeled())
return (0);
- if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
- zerror(zlogp, B_TRUE, "unable to determine zone path");
- return (-1);
- }
-
if (!is_zonepath_zfs(zonepath))
return (0);
@@ -4389,62 +4327,52 @@ duplicate_reachable_path(zlog_t *zlogp, const char *rootpath)
}
/*
- * Set memory cap and pool info for the zone's resource management
- * configuration.
+ * Set pool info for the zone's resource management configuration.
*/
static int
setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid)
{
int res;
uint64_t tmp;
- struct zone_mcaptab mcap;
char sched[MAXNAMELEN];
- zone_dochandle_t handle = NULL;
char pool_err[128];
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- return (Z_BAD_HANDLE);
- }
-
- if ((res = zonecfg_get_snapshot_handle(zone_name, handle)) != Z_OK) {
- zerror(zlogp, B_FALSE, "invalid configuration");
- zonecfg_fini_handle(handle);
- return (res);
- }
-
- /*
- * If a memory cap is configured, set the cap in the kernel using
- * zone_setattr() and make sure the rcapd SMF service is enabled.
- */
- if (zonecfg_getmcapent(handle, &mcap) == Z_OK) {
- uint64_t num;
- char smf_err[128];
-
- num = (uint64_t)strtoull(mcap.zone_physmem_cap, NULL, 10);
- if (zone_setattr(zoneid, ZONE_ATTR_PHYS_MCAP, &num, 0) == -1) {
- zerror(zlogp, B_TRUE, "could not set zone memory cap");
- zonecfg_fini_handle(handle);
- return (Z_INVAL);
- }
-
- if (zonecfg_enable_rcapd(smf_err, sizeof (smf_err)) != Z_OK) {
- zerror(zlogp, B_FALSE, "enabling system/rcap service "
- "failed: %s", smf_err);
- zonecfg_fini_handle(handle);
- return (Z_INVAL);
- }
- }
-
/* Get the scheduling class set in the zone configuration. */
- if (zonecfg_get_sched_class(handle, sched, sizeof (sched)) == Z_OK &&
+ if (zonecfg_get_sched_class(snap_hndl, sched, sizeof (sched)) == Z_OK &&
strlen(sched) > 0) {
if (zone_setattr(zoneid, ZONE_ATTR_SCHED_CLASS, sched,
strlen(sched)) == -1)
zerror(zlogp, B_TRUE, "WARNING: unable to set the "
"default scheduling class");
- } else if (zonecfg_get_aliased_rctl(handle, ALIAS_SHARES, &tmp)
+ if (strcmp(sched, "FX") == 0) {
+ /*
+ * When FX is specified then by default all processes
+ * will start at the lowest priority level (0) and
+ * stay there. We support an optional attr which
+ * indicates that all the processes should be "high
+ * priority". We set this on the zone so that starting
+ * init will set the priority high.
+ */
+ struct zone_attrtab a;
+
+ bzero(&a, sizeof (a));
+ (void) strlcpy(a.zone_attr_name, "fixed-hi-prio",
+ sizeof (a.zone_attr_name));
+
+ if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK &&
+ strcmp(a.zone_attr_value, "true") == 0) {
+ boolean_t hi = B_TRUE;
+
+ if (zone_setattr(zoneid,
+ ZONE_ATTR_SCHED_FIXEDHI, (void *)hi,
+ sizeof (hi)) == -1)
+ zerror(zlogp, B_TRUE, "WARNING: unable "
+ "to set high priority");
+ }
+ }
+
+ } else if (zonecfg_get_aliased_rctl(snap_hndl, ALIAS_SHARES, &tmp)
== Z_OK) {
/*
* If the zone has the zone.cpu-shares rctl set then we want to
@@ -4455,7 +4383,7 @@ setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid)
*/
char class_name[PC_CLNMSZ];
- if (zonecfg_get_dflt_sched_class(handle, class_name,
+ if (zonecfg_get_dflt_sched_class(snap_hndl, class_name,
sizeof (class_name)) != Z_OK) {
zerror(zlogp, B_FALSE, "WARNING: unable to determine "
"the zone's scheduling class");
@@ -4488,7 +4416,7 @@ setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid)
* right thing in all cases (reuse or create) based on the current
* zonecfg.
*/
- if ((res = zonecfg_bind_tmp_pool(handle, zoneid, pool_err,
+ if ((res = zonecfg_bind_tmp_pool(snap_hndl, zoneid, pool_err,
sizeof (pool_err))) != Z_OK) {
if (res == Z_POOL || res == Z_POOL_CREATE || res == Z_POOL_BIND)
zerror(zlogp, B_FALSE, "%s: %s\ndedicated-cpu setting "
@@ -4497,14 +4425,13 @@ setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid)
else
zerror(zlogp, B_FALSE, "could not bind zone to "
"temporary pool: %s", zonecfg_strerror(res));
- zonecfg_fini_handle(handle);
return (Z_POOL_BIND);
}
/*
* Check if we need to warn about poold not being enabled.
*/
- if (zonecfg_warn_poold(handle)) {
+ if (zonecfg_warn_poold(snap_hndl)) {
zerror(zlogp, B_FALSE, "WARNING: A range of dedicated-cpus has "
"been specified\nbut the dynamic pool service is not "
"enabled.\nThe system will not dynamically adjust the\n"
@@ -4514,7 +4441,7 @@ setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid)
}
/* The following is a warning, not an error. */
- if ((res = zonecfg_bind_pool(handle, zoneid, pool_err,
+ if ((res = zonecfg_bind_pool(snap_hndl, zoneid, pool_err,
sizeof (pool_err))) != Z_OK) {
if (res == Z_POOL_BIND)
zerror(zlogp, B_FALSE, "WARNING: unable to bind to "
@@ -4528,10 +4455,9 @@ setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid)
}
/* Update saved pool name in case it has changed */
- (void) zonecfg_get_poolname(handle, zone_name, pool_name,
+ (void) zonecfg_get_poolname(snap_hndl, zone_name, pool_name,
sizeof (pool_name));
- zonecfg_fini_handle(handle);
return (Z_OK);
}
@@ -4632,33 +4558,28 @@ setup_zone_fs_allowed(zone_dochandle_t handle, zlog_t *zlogp, zoneid_t zoneid)
}
static int
-setup_zone_attrs(zlog_t *zlogp, char *zone_namep, zoneid_t zoneid)
+setup_zone_attrs(zlog_t *zlogp, zoneid_t zoneid)
{
- zone_dochandle_t handle;
int res = Z_OK;
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- return (Z_BAD_HANDLE);
- }
- if ((res = zonecfg_get_snapshot_handle(zone_namep, handle)) != Z_OK) {
- zerror(zlogp, B_FALSE, "invalid configuration");
- goto out;
- }
-
- if ((res = setup_zone_hostid(handle, zlogp, zoneid)) != Z_OK)
+ if ((res = setup_zone_hostid(snap_hndl, zlogp, zoneid)) != Z_OK)
goto out;
- if ((res = setup_zone_fs_allowed(handle, zlogp, zoneid)) != Z_OK)
+ if ((res = setup_zone_fs_allowed(snap_hndl, zlogp, zoneid)) != Z_OK)
goto out;
out:
- zonecfg_fini_handle(handle);
return (res);
}
+/*
+ * The zone_did is a persistent debug ID. Each zone should have a unique ID
+ * in the kernel. This is used for things like DTrace which want to monitor
+ * zones across reboots. They can't use the zoneid since that changes on
+ * each boot.
+ */
zoneid_t
-vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd)
+vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd, zoneid_t zone_did)
{
zoneid_t rval = -1;
priv_set_t *privs;
@@ -4674,7 +4595,7 @@ vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd)
tsol_zcent_t *zcent = NULL;
int match = 0;
int doi = 0;
- int flags;
+ int flags = -1;
zone_iptype_t iptype;
if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) {
@@ -4696,6 +4617,8 @@ vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd)
flags = ZCF_NET_EXCL;
break;
}
+ if (flags == -1)
+ abort();
if ((privs = priv_allocset()) == NULL) {
zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
@@ -4799,7 +4722,7 @@ vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd)
xerr = 0;
if ((zoneid = zone_create(kzone, rootpath, privs, rctlbuf,
rctlbufsz, zfsbuf, zfsbufsz, &xerr, match, doi, zlabel,
- flags)) == -1) {
+ flags, zone_did)) == -1) {
if (xerr == ZE_AREMOUNTS) {
if (zonecfg_find_mounts(rootpath, NULL, NULL) < 1) {
zerror(zlogp, B_FALSE,
@@ -4845,7 +4768,7 @@ vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd)
struct brand_attr attr;
char modname[MAXPATHLEN];
- if (setup_zone_attrs(zlogp, zone_name, zoneid) != Z_OK)
+ if (setup_zone_attrs(zlogp, zoneid) != Z_OK)
goto error;
if ((bh = brand_open(brand_name)) == NULL) {
@@ -4903,6 +4826,8 @@ error:
}
if (rctlbuf != NULL)
free(rctlbuf);
+ if (zfsbuf != NULL)
+ free(zfsbuf);
priv_freeset(privs);
if (fp != NULL)
zonecfg_close_scratch(fp);
@@ -4991,7 +4916,7 @@ write_index_file(zoneid_t zoneid)
int
vplat_bringup(zlog_t *zlogp, zone_mnt_t mount_cmd, zoneid_t zoneid)
{
- char zonepath[MAXPATHLEN];
+ char zpath[MAXPATHLEN];
if (mount_cmd == Z_MNT_BOOT && validate_datasets(zlogp) != 0) {
lofs_discard_mnttab();
@@ -5002,15 +4927,11 @@ vplat_bringup(zlog_t *zlogp, zone_mnt_t mount_cmd, zoneid_t zoneid)
* Before we try to mount filesystems we need to create the
* attribute backing store for /dev
*/
- if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
- lofs_discard_mnttab();
- return (-1);
- }
- resolve_lofs(zlogp, zonepath, sizeof (zonepath));
+ (void) strlcpy(zpath, zonepath, sizeof (zpath));
+ resolve_lofs(zlogp, zpath, sizeof (zpath));
/* Make /dev directory owned by root, grouped sys */
- if (make_one_dir(zlogp, zonepath, "/dev", DEFAULT_DIR_MODE,
- 0, 3) != 0) {
+ if (make_one_dir(zlogp, zpath, "/dev", DEFAULT_DIR_MODE, 0, 3) != 0) {
lofs_discard_mnttab();
return (-1);
}
@@ -5045,6 +4966,8 @@ vplat_bringup(zlog_t *zlogp, zone_mnt_t mount_cmd, zoneid_t zoneid)
return (-1);
}
break;
+ default:
+ abort();
}
}
@@ -5120,13 +5043,13 @@ unmounted:
}
int
-vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting)
+vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting,
+ boolean_t debug)
{
char *kzone;
zoneid_t zoneid;
int res;
char pool_err[128];
- char zpath[MAXPATHLEN];
char cmdbuf[MAXPATHLEN];
brand_handle_t bh = NULL;
dladm_status_t status;
@@ -5159,16 +5082,12 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting)
goto error;
}
- if (remove_datalink_pool(zlogp, zoneid) != 0) {
+ if (remove_datalink_pool(zlogp, zoneid) != 0)
zerror(zlogp, B_FALSE, "unable clear datalink pool property");
- goto error;
- }
- if (remove_datalink_protect(zlogp, zoneid) != 0) {
+ if (remove_datalink_protect(zlogp, zoneid) != 0)
zerror(zlogp, B_FALSE,
"unable clear datalink protect property");
- goto error;
- }
/*
* The datalinks assigned to the zone will be removed from the NGZ as
@@ -5182,12 +5101,6 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting)
goto error;
}
- /* Get the zonepath of this zone */
- if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
- zerror(zlogp, B_FALSE, "unable to determine zone path");
- goto error;
- }
-
/* Get a handle to the brand info for this zone */
if ((bh = brand_open(brand_name)) == NULL) {
zerror(zlogp, B_FALSE, "unable to determine zone brand");
@@ -5198,7 +5111,7 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting)
* brand a chance to cleanup any custom configuration.
*/
(void) strcpy(cmdbuf, EXEC_PREFIX);
- if (brand_get_halt(bh, zone_name, zpath, cmdbuf + EXEC_LEN,
+ if (brand_get_halt(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
sizeof (cmdbuf) - EXEC_LEN) < 0) {
brand_close(bh);
zerror(zlogp, B_FALSE, "unable to determine branded zone's "
@@ -5208,7 +5121,7 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting)
brand_close(bh);
if ((strlen(cmdbuf) > EXEC_LEN) &&
- (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) {
+ (do_subproc(zlogp, cmdbuf, NULL, debug) != Z_OK)) {
zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
goto error;
}
@@ -5240,12 +5153,6 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting)
}
break;
case ZS_EXCLUSIVE:
- if (unconfigure_exclusive_network_interfaces(zlogp,
- zoneid) != 0) {
- zerror(zlogp, B_FALSE, "unable to unconfigure "
- "network interfaces in zone");
- goto error;
- }
status = dladm_zone_halt(dld_handle, zoneid);
if (status != DLADM_STATUS_OK) {
zerror(zlogp, B_FALSE, "unable to notify "
@@ -5282,14 +5189,9 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting)
if (rebooting) {
struct zone_psettab pset_tab;
- zone_dochandle_t handle;
- if ((handle = zonecfg_init_handle()) != NULL &&
- zonecfg_get_handle(zone_name, handle) == Z_OK &&
- zonecfg_lookup_pset(handle, &pset_tab) == Z_OK)
+ if (zonecfg_lookup_pset(snap_hndl, &pset_tab) == Z_OK)
destroy_tmp_pool = B_FALSE;
-
- zonecfg_fini_handle(handle);
}
if (destroy_tmp_pool) {
diff --git a/usr/src/cmd/zoneadmd/zcons.c b/usr/src/cmd/zoneadmd/zcons.c
index 5f6fc4973c..af4fafe46a 100644
--- a/usr/src/cmd/zoneadmd/zcons.c
+++ b/usr/src/cmd/zoneadmd/zcons.c
@@ -22,7 +22,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2012 Joyent, Inc. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*/
@@ -118,9 +118,10 @@
#define CONSOLE_SOCKPATH ZONES_TMPDIR "/%s.console_sock"
+#define ZCONS_RETRY 10
+
static int serverfd = -1; /* console server unix domain socket fd */
char boot_args[BOOTARGS_MAX];
-char bad_boot_arg[BOOTARGS_MAX];
/*
* The eventstream is a simple one-directional flow of messages from the
@@ -130,7 +131,10 @@ char bad_boot_arg[BOOTARGS_MAX];
*/
static int eventstream[2];
-
+/* flag used to cope with race creating master zcons devlink */
+static boolean_t master_zcons_failed = B_FALSE;
+/* flag to track if we've seen a state change when there is no master zcons */
+static boolean_t state_changed = B_FALSE;
int
eventstream_init()
@@ -322,7 +326,7 @@ destroy_console_devs(zlog_t *zlogp)
* interfaces to instantiate a new zone console node. We do a lot of
* sanity checking, and are careful to reuse a console if one exists.
*
- * Once the device is in the device tree, we kick devfsadm via di_init_devs()
+ * Once the device is in the device tree, we kick devfsadm via di_devlink_init()
* to ensure that the appropriate symlinks (to the master and slave console
* devices) are placed in /dev in the global zone.
*/
@@ -408,43 +412,63 @@ devlinks:
* Open the master side of the console and issue the ZC_HOLDSLAVE ioctl,
* which will cause the master to retain a reference to the slave.
* This prevents ttymon from blowing through the slave's STREAMS anchor.
+ *
+ * In very rare cases the open returns ENOENT if devfs doesn't have
+ * everything setup yet due to heavy zone startup load. Wait for
+ * 1 sec. and retry a few times. Even if we can't setup the zone's
+ * console, we still go ahead and boot the zone.
*/
(void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s",
zone_name, ZCONS_MASTER_NAME);
- if ((masterfd = open(conspath, O_RDWR | O_NOCTTY)) == -1) {
+ for (i = 0; i < ZCONS_RETRY; i++) {
+ masterfd = open(conspath, O_RDWR | O_NOCTTY);
+ if (masterfd >= 0 || errno != ENOENT)
+ break;
+ (void) sleep(1);
+ }
+ if (masterfd == -1) {
zerror(zlogp, B_TRUE, "ERROR: could not open master side of "
"zone console for %s to acquire slave handle", zone_name);
- goto error;
+ master_zcons_failed = B_TRUE;
}
+
(void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s",
zone_name, ZCONS_SLAVE_NAME);
- if ((slavefd = open(conspath, O_RDWR | O_NOCTTY)) == -1) {
+ for (i = 0; i < ZCONS_RETRY; i++) {
+ slavefd = open(conspath, O_RDWR | O_NOCTTY);
+ if (slavefd >= 0 || errno != ENOENT)
+ break;
+ (void) sleep(1);
+ }
+ if (slavefd == -1)
zerror(zlogp, B_TRUE, "ERROR: could not open slave side of zone"
" console for %s to acquire slave handle", zone_name);
- (void) close(masterfd);
- goto error;
- }
+
/*
* This ioctl can occasionally return ENXIO if devfs doesn't have
* everything plumbed up yet due to heavy zone startup load. Wait for
* 1 sec. and retry a few times before we fail to boot the zone.
*/
- for (i = 0; i < 5; i++) {
- if (ioctl(masterfd, ZC_HOLDSLAVE, (caddr_t)(intptr_t)slavefd)
- == 0) {
- rv = 0;
- break;
- } else if (errno != ENXIO) {
- break;
+ if (masterfd != -1 && slavefd != -1) {
+ for (i = 0; i < ZCONS_RETRY; i++) {
+ if (ioctl(masterfd, ZC_HOLDSLAVE,
+ (caddr_t)(intptr_t)slavefd) == 0) {
+ rv = 0;
+ break;
+ } else if (errno != ENXIO) {
+ break;
+ }
+ (void) sleep(1);
}
- (void) sleep(1);
+ if (rv != 0)
+ zerror(zlogp, B_TRUE, "ERROR: error while acquiring "
+ "slave handle of zone console for %s", zone_name);
}
- if (rv != 0)
- zerror(zlogp, B_TRUE, "ERROR: error while acquiring slave "
- "handle of zone console for %s", zone_name);
- (void) close(slavefd);
- (void) close(masterfd);
+ if (slavefd != -1)
+ (void) close(slavefd);
+ if (masterfd != -1)
+ (void) close(masterfd);
error:
if (ddef_hdl)
@@ -517,6 +541,7 @@ get_client_ident(int clifd, pid_t *pid, char *locale, size_t locale_len,
size_t buflen = sizeof (buf);
char c = '\0';
int i = 0, r;
+ ucred_t *cred = NULL;
/* "eat up the ident string" case, for simplicity */
if (pid == NULL) {
@@ -550,18 +575,22 @@ get_client_ident(int clifd, pid_t *pid, char *locale, size_t locale_len,
break;
}
+ if (getpeerucred(clifd, &cred) == 0) {
+ *pid = ucred_getpid((const ucred_t *)cred);
+ ucred_free(cred);
+ } else {
+ return (-1);
+ }
+
/*
* Parse buffer for message of the form:
- * IDENT <pid> <locale> <disconnect flag>
+ * IDENT <locale> <disconnect flag>
*/
bufp = buf;
if (strncmp(bufp, "IDENT ", 6) != 0)
return (-1);
bufp += 6;
errno = 0;
- *pid = strtoll(bufp, &bufp, 10);
- if (errno != 0)
- return (-1);
while (*bufp != '\0' && isspace(*bufp))
bufp++;
@@ -667,14 +696,6 @@ event_message(int clifd, char *clilocale, zone_evt_t evt, int dflag)
else
str = "NOTICE: Zone boot failed";
break;
- case Z_EVT_ZONE_BADARGS:
- /*LINTED*/
- (void) snprintf(lmsg, sizeof (lmsg),
- localize_msg(clilocale,
- "WARNING: Ignoring invalid boot arguments: %s"),
- bad_boot_arg);
- lstr = lmsg;
- break;
default:
return;
}
@@ -878,7 +899,6 @@ init_console(zlog_t *zlogp)
if (init_console_dev(zlogp) == -1) {
zerror(zlogp, B_FALSE,
"console setup: device initialization failed");
- return (-1);
}
if ((serverfd = init_console_sock(zlogp)) == -1) {
@@ -890,6 +910,17 @@ init_console(zlog_t *zlogp)
}
/*
+ * Maintain a simple flag that tracks if we have seen at least one state
+ * change. This is currently only used to handle the special case where we are
+ * running without a console device, which is what normally drives shutdown.
+ */
+void
+zcons_statechanged()
+{
+ state_changed = B_TRUE;
+}
+
+/*
* serve_console() is the master loop for driving console I/O. It is also the
* routine which is ultimately responsible for "pulling the plug" on zoneadmd
* when it realizes that the daemon should shut down.
@@ -907,6 +938,7 @@ serve_console(zlog_t *zlogp)
int masterfd;
zone_state_t zstate;
char conspath[MAXPATHLEN];
+ static boolean_t cons_warned = B_FALSE;
(void) snprintf(conspath, sizeof (conspath),
"/dev/zcons/%s/%s", zone_name, ZCONS_MASTER_NAME);
@@ -914,6 +946,46 @@ serve_console(zlog_t *zlogp)
for (;;) {
masterfd = open(conspath, O_RDWR|O_NONBLOCK|O_NOCTTY);
if (masterfd == -1) {
+ if (master_zcons_failed) {
+ /*
+ * If we don't have a console and the zone is
+ * not shutting down, there may have been a
+ * race/failure with devfs while creating the
+ * console. In this case we want to leave the
+ * zone up, even without a console, so
+ * periodically recheck.
+ */
+ int i;
+
+ /*
+ * In the normal flow of this loop, we use
+ * do_console_io to give things a chance to get
+ * going first. However, in this case we can't
+ * use that, so we have to wait for at least
+ * one state change before checking the state.
+ */
+ for (i = 0; i < 60; i++) {
+ if (state_changed)
+ break;
+ (void) sleep(1);
+ }
+
+ if (i < 60 && zone_get_state(zone_name,
+ &zstate) == Z_OK &&
+ (zstate == ZONE_STATE_READY ||
+ zstate == ZONE_STATE_RUNNING)) {
+ if (!cons_warned) {
+ zerror(zlogp, B_FALSE,
+ "WARNING: missing zone "
+ "console for %s",
+ zone_name);
+ cons_warned = B_TRUE;
+ }
+ (void) sleep(ZCONS_RETRY);
+ continue;
+ }
+ }
+
zerror(zlogp, B_TRUE, "failed to open console master");
(void) mutex_lock(&lock);
goto death;
diff --git a/usr/src/cmd/zoneadmd/zfd.c b/usr/src/cmd/zoneadmd/zfd.c
new file mode 100644
index 0000000000..00278cd583
--- /dev/null
+++ b/usr/src/cmd/zoneadmd/zfd.c
@@ -0,0 +1,1428 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Zone file descriptor support is used as a mechanism for a process inside the
+ * zone to log messages to the GZ zoneadmd and also as a way to interact
+ * directly with the process (via zlogin -I). The zfd thread is modeled on
+ * the zcons thread so see the comment header in zcons.c for a general overview.
+ * Unlike with zcons, which has a single endpoint within the zone and a single
+ * endpoint used by zoneadmd, we setup multiple endpoints within the zone.
+ *
+ * The mode, which is controlled by the zone attribute "zlog-mode" is somewhat
+ * of a misnomer since its purpose has evolved. The attribute currently
+ * can have six values which are used to control:
+ * - how the zfd devices are used inside the zone
+ * - if the output on the device(s) is also teed into another stream within
+ * the zone
+ * - if we do logging in the GZ
+ * See the comment on get_mode_logmax() in this file, and the comment in
+ * uts/common/io/zfd.c for more details.
+ *
+ * Internally the zfd_mode_t struct holds the number of stdio devs (1 or 3),
+ * the number of additional devs corresponding to the zone attr value and the
+ * GZ logging flag.
+ *
+ * Note that although the mode indicates the number of devices needed, we always
+ * create all possible zfd devices for simplicity.
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/termios.h>
+#include <sys/zfd.h>
+#include <sys/mkdev.h>
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <stropts.h>
+#include <thread.h>
+#include <ucred.h>
+#include <unistd.h>
+#include <zone.h>
+#include <signal.h>
+#include <wchar.h>
+
+#include <libdevinfo.h>
+#include <libdevice.h>
+#include <libzonecfg.h>
+
+#include <syslog.h>
+#include <sys/modctl.h>
+
+#include "zoneadmd.h"
+
+static zlog_t *zlogp;
+static int shutting_down = 0;
+static thread_t logger_tid;
+static int logfd = -1;
+static size_t log_sz = 0;
+static size_t log_rot_sz = 0;
+
+static void rotate_log();
+
+/*
+ * The eventstream is a simple one-directional flow of messages implemented
+ * with a pipe. It is used to wake up the poller when it needs to shutdown.
+ */
+static int eventstream[2] = {-1, -1};
+
+#define LOGNAME "stdio.log"
+#define ZLOG_MODE "zlog-mode"
+#define LOG_MAXSZ "zlog-max-size"
+#define ZFDNEX_DEVTREEPATH "/pseudo/zfdnex@2"
+#define ZFDNEX_FILEPATH "/devices/pseudo/zfdnex@2"
+#define SERVER_SOCKPATH ZONES_TMPDIR "/%s.server_%s"
+#define ZTTY_RETRY 5
+
+#define NUM_ZFD_DEVS 5
+
+typedef struct zfd_mode {
+ uint_t zmode_n_stddevs;
+ uint_t zmode_n_addl_devs;
+ boolean_t zmode_gzlogging;
+} zfd_mode_t;
+static zfd_mode_t mode;
+
+/*
+ * cb_data is only used by destroy_cb.
+ */
+struct cb_data {
+ zlog_t *zlogp;
+ int killed;
+};
+
+/*
+ * destroy_zfd_devs() and its helper destroy_cb() tears down any zfd instances
+ * associated with this zone. If things went very wrong, we might have an
+ * incorrect number of instances hanging around. This routine hunts down and
+ * tries to remove all of them. Of course, if the fd is open, the instance will
+ * not detach, which is a potential issue.
+ */
+static int
+destroy_cb(di_node_t node, void *arg)
+{
+ struct cb_data *cb = (struct cb_data *)arg;
+ char *prop_data;
+ char *tmp;
+ char devpath[MAXPATHLEN];
+ devctl_hdl_t hdl;
+
+ if (di_prop_lookup_strings(DDI_DEV_T_ANY, node, "zfd_zname",
+ &prop_data) == -1)
+ return (DI_WALK_CONTINUE);
+
+ assert(prop_data != NULL);
+ if (strcmp(prop_data, zone_name) != 0) {
+ /* this is a zfd for a different zone */
+ return (DI_WALK_CONTINUE);
+ }
+
+ tmp = di_devfs_path(node);
+ (void) snprintf(devpath, sizeof (devpath), "/devices/%s", tmp);
+ di_devfs_path_free(tmp);
+
+ if ((hdl = devctl_device_acquire(devpath, 0)) == NULL) {
+ zerror(cb->zlogp, B_TRUE, "WARNING: zfd %s found, "
+ "but it could not be controlled.", devpath);
+ return (DI_WALK_CONTINUE);
+ }
+ if (devctl_device_remove(hdl) == 0) {
+ cb->killed++;
+ } else {
+ zerror(cb->zlogp, B_TRUE, "WARNING: zfd %s found, "
+ "but it could not be removed.", devpath);
+ }
+ devctl_release(hdl);
+ return (DI_WALK_CONTINUE);
+}
+
+static int
+destroy_zfd_devs(zlog_t *zlogp)
+{
+ di_node_t root;
+ struct cb_data cb;
+
+ bzero(&cb, sizeof (cb));
+ cb.zlogp = zlogp;
+
+ if ((root = di_init(ZFDNEX_DEVTREEPATH, DINFOCPYALL)) == DI_NODE_NIL) {
+ zerror(zlogp, B_TRUE, "di_init failed");
+ return (-1);
+ }
+
+ (void) di_walk_node(root, DI_WALK_CLDFIRST, (void *)&cb, destroy_cb);
+
+ di_fini(root);
+ return (0);
+}
+
+static void
+make_tty(zlog_t *zlogp, int id)
+{
+ int i;
+ int fd = -1;
+ char stdpath[MAXPATHLEN];
+
+ /*
+ * Open the master side of the dev and issue the ZFD_MAKETTY ioctl,
+ * which will cause the the various tty-related streams modules to be
+ * pushed when the slave opens the device.
+ *
+ * In very rare cases the open returns ENOENT if devfs doesn't have
+ * everything setup yet due to heavy zone startup load. Wait for
+ * 1 sec. and retry a few times. Even if we can't setup tty mode
+ * we still move on.
+ */
+ (void) snprintf(stdpath, sizeof (stdpath), "/dev/zfd/%s/master/%d",
+ zone_name, id);
+
+ for (i = 0; !shutting_down && i < ZTTY_RETRY; i++) {
+ fd = open(stdpath, O_RDWR | O_NOCTTY);
+ if (fd >= 0 || errno != ENOENT)
+ break;
+ (void) sleep(1);
+ }
+ if (fd == -1) {
+ zerror(zlogp, B_TRUE, "ERROR: could not open zfd %d for "
+ "zone %s to set tty mode", id, zone_name);
+ } else {
+ /*
+ * This ioctl can occasionally return ENXIO if devfs doesn't
+ * have everything plumbed up yet due to heavy zone startup
+ * load. Wait for 1 sec. and retry a few times before we give
+ * up.
+ */
+ for (i = 0; !shutting_down && i < ZTTY_RETRY; i++) {
+ if (ioctl(fd, ZFD_MAKETTY) == 0) {
+ break;
+ } else if (errno != ENXIO) {
+ break;
+ }
+ (void) sleep(1);
+ }
+ }
+
+ if (fd != -1)
+ (void) close(fd);
+}
+
+/*
+ * init_zfd_devs() drives the device-tree configuration of the zone fd devices.
+ * The general strategy is to use the libdevice (devctl) interfaces to
+ * instantiate all of new zone fd nodes. We do a lot of sanity checking, and
+ * are careful to reuse a dev if one exists.
+ *
+ * Once the devices are in the device tree, we kick devfsadm via
+ * di_devlink_init() to ensure that the appropriate symlinks (to the master and
+ * slave fd devices) are placed in /dev in the global zone.
+ */
+static int
+init_zfd_dev(zlog_t *zlogp, devctl_hdl_t bus_hdl, int id)
+{
+ int rv = -1;
+ devctl_ddef_t ddef_hdl = NULL;
+ devctl_hdl_t dev_hdl = NULL;
+
+ if ((ddef_hdl = devctl_ddef_alloc("zfd", 0)) == NULL) {
+ zerror(zlogp, B_TRUE, "failed to allocate ddef handle");
+ goto error;
+ }
+
+ /*
+ * Set four properties on this node; the name of the zone, the dev name
+ * seen inside the zone, a flag which lets pseudo know that it is OK to
+ * automatically allocate an instance # for this device, and the last
+ * one tells the device framework not to auto-detach this node - we
+ * need the node to still be there when we ask devfsadmd to make links,
+ * and when we need to open it.
+ */
+ if (devctl_ddef_string(ddef_hdl, "zfd_zname", zone_name) == -1) {
+ zerror(zlogp, B_TRUE, "failed to create zfd_zname property");
+ goto error;
+ }
+ if (devctl_ddef_int(ddef_hdl, "zfd_id", id) == -1) {
+ zerror(zlogp, B_TRUE, "failed to create zfd_id property");
+ goto error;
+ }
+ if (devctl_ddef_int(ddef_hdl, "auto-assign-instance", 1) == -1) {
+ zerror(zlogp, B_TRUE, "failed to create auto-assign-instance "
+ "property");
+ goto error;
+ }
+ if (devctl_ddef_int(ddef_hdl, "ddi-no-autodetach", 1) == -1) {
+ zerror(zlogp, B_TRUE, "failed to create ddi-no-auto-detach "
+ "property");
+ goto error;
+ }
+ if (devctl_bus_dev_create(bus_hdl, ddef_hdl, 0, &dev_hdl) == -1) {
+ zerror(zlogp, B_TRUE, "failed to create zfd node");
+ goto error;
+ }
+ rv = 0;
+
+error:
+ if (ddef_hdl)
+ devctl_ddef_free(ddef_hdl);
+ if (dev_hdl)
+ devctl_release(dev_hdl);
+ return (rv);
+}
+
+static int
+init_zfd_devs(zlog_t *zlogp, zfd_mode_t *mode)
+{
+ devctl_hdl_t bus_hdl = NULL;
+ di_devlink_handle_t dl = NULL;
+ int rv = -1;
+ int i;
+
+ /*
+ * Time to make the devices.
+ */
+ if ((bus_hdl = devctl_bus_acquire(ZFDNEX_FILEPATH, 0)) == NULL) {
+ zerror(zlogp, B_TRUE, "devctl_bus_acquire failed");
+ goto error;
+ }
+
+ for (i = 0; i < NUM_ZFD_DEVS; i++) {
+ if (init_zfd_dev(zlogp, bus_hdl, i) != 0)
+ goto error;
+ }
+
+ if ((dl = di_devlink_init("zfd", DI_MAKE_LINK)) == NULL) {
+ zerror(zlogp, B_TRUE, "failed to create devlinks");
+ goto error;
+ }
+
+ (void) di_devlink_fini(&dl);
+ rv = 0;
+
+ if (mode->zmode_n_stddevs == 1) {
+ /* We want the primary stream to look like a tty. */
+ make_tty(zlogp, 0);
+ }
+
+error:
+ if (bus_hdl)
+ devctl_release(bus_hdl);
+ return (rv);
+}
+
+static int
+init_server_sock(zlog_t *zlogp, int *servfd, char *nm)
+{
+ int resfd = -1;
+ struct sockaddr_un servaddr;
+
+ bzero(&servaddr, sizeof (servaddr));
+ servaddr.sun_family = AF_UNIX;
+ (void) snprintf(servaddr.sun_path, sizeof (servaddr.sun_path),
+ SERVER_SOCKPATH, zone_name, nm);
+
+ if ((resfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
+ zerror(zlogp, B_TRUE, "server setup: could not create socket");
+ goto err;
+ }
+ (void) unlink(servaddr.sun_path);
+
+ if (bind(resfd, (struct sockaddr *)&servaddr, sizeof (servaddr))
+ == -1) {
+ zerror(zlogp, B_TRUE,
+ "server setup: could not bind to socket");
+ goto err;
+ }
+
+ if (listen(resfd, 4) == -1) {
+ zerror(zlogp, B_TRUE,
+ "server setup: could not listen on socket");
+ goto err;
+ }
+
+ *servfd = resfd;
+ return (0);
+
+err:
+ (void) unlink(servaddr.sun_path);
+ if (resfd != -1)
+ (void) close(resfd);
+ return (-1);
+}
+
+static void
+destroy_server_sock(int servfd, char *nm)
+{
+ char path[MAXPATHLEN];
+
+ (void) snprintf(path, sizeof (path), SERVER_SOCKPATH, zone_name, nm);
+ (void) unlink(path);
+ (void) shutdown(servfd, SHUT_RDWR);
+ (void) close(servfd);
+}
+
+/*
+ * Read the "ident" string from the client's descriptor; this routine also
+ * tolerates being called with pid=NULL, for times when you want to "eat"
+ * the ident string from a client without saving it.
+ */
+static int
+get_client_ident(int clifd, pid_t *pid, char *locale, size_t locale_len,
+ uint_t *flagsp)
+{
+ char buf[BUFSIZ], *bufp;
+ size_t buflen = sizeof (buf);
+ char c = '\0';
+ int i = 0, r;
+ ucred_t *cred = NULL;
+
+ /* "eat up the ident string" case, for simplicity */
+ if (pid == NULL) {
+ assert(locale == NULL && locale_len == 0);
+ while (read(clifd, &c, 1) == 1) {
+ if (c == '\n')
+ return (0);
+ }
+ }
+
+ bzero(buf, sizeof (buf));
+ while ((buflen > 1) && (r = read(clifd, &c, 1)) == 1) {
+ buflen--;
+ if (c == '\n')
+ break;
+
+ buf[i] = c;
+ i++;
+ }
+ if (r == -1)
+ return (-1);
+
+ /*
+ * We've filled the buffer, but still haven't seen \n. Keep eating
+ * until we find it; we don't expect this to happen, but this is
+ * defensive.
+ */
+ if (c != '\n') {
+ while ((r = read(clifd, &c, sizeof (c))) > 0)
+ if (c == '\n')
+ break;
+ }
+
+ /*
+ * Parse buffer for message of the form:
+ * IDENT <locale> <flags>
+ */
+ bufp = buf;
+ if (strncmp(bufp, "IDENT ", 6) != 0)
+ return (-1);
+ bufp += 6;
+
+ if (getpeerucred(clifd, &cred) == 0) {
+ *pid = ucred_getpid((const ucred_t *)cred);
+ ucred_free(cred);
+ } else {
+ return (-1);
+ }
+
+ while (*bufp != '\0' && isspace(*bufp))
+ bufp++;
+ buflen = strlen(bufp) - 1;
+ bufp[buflen - 1] = '\0';
+ (void) strlcpy(locale, bufp, locale_len);
+
+ *flagsp = atoi(&bufp[buflen]);
+
+ return (0);
+}
+
+static int
+accept_client(int servfd, pid_t *pid, char *locale, size_t locale_len,
+ uint_t *flagsp)
+{
+ int connfd;
+ struct sockaddr_un cliaddr;
+ socklen_t clilen;
+ int flags;
+
+ clilen = sizeof (cliaddr);
+ connfd = accept(servfd, (struct sockaddr *)&cliaddr, &clilen);
+ if (connfd == -1)
+ return (-1);
+ if (pid != NULL) {
+ if (get_client_ident(connfd, pid, locale, locale_len, flagsp)
+ == -1) {
+ (void) shutdown(connfd, SHUT_RDWR);
+ (void) close(connfd);
+ return (-1);
+ }
+ (void) write(connfd, "OK\n", 3);
+ }
+
+ flags = fcntl(connfd, F_GETFL, 0);
+ if (flags != -1)
+ (void) fcntl(connfd, F_SETFL, flags | O_NONBLOCK | FD_CLOEXEC);
+
+ return (connfd);
+}
+
+static void
+reject_client(int servfd, pid_t clientpid)
+{
+ int connfd;
+ struct sockaddr_un cliaddr;
+ socklen_t clilen;
+ char nak[MAXPATHLEN];
+
+ clilen = sizeof (cliaddr);
+ connfd = accept(servfd, (struct sockaddr *)&cliaddr, &clilen);
+
+ /*
+ * After getting its ident string, tell client to get lost.
+ */
+ if (get_client_ident(connfd, NULL, NULL, 0, NULL) == 0) {
+ (void) snprintf(nak, sizeof (nak), "%lu\n",
+ clientpid);
+ (void) write(connfd, nak, strlen(nak));
+ }
+ (void) shutdown(connfd, SHUT_RDWR);
+ (void) close(connfd);
+}
+
+static int
+accept_socket(int servfd, pid_t verpid)
+{
+ int connfd;
+ struct sockaddr_un cliaddr;
+ socklen_t clilen = sizeof (cliaddr);
+ ucred_t *cred = NULL;
+ pid_t rpid = -1;
+ int flags;
+
+ connfd = accept(servfd, (struct sockaddr *)&cliaddr, &clilen);
+ if (connfd == -1)
+ return (-1);
+
+ /* Confirm connecting process is who we expect */
+ if (getpeerucred(connfd, &cred) == 0) {
+ rpid = ucred_getpid((const ucred_t *)cred);
+ ucred_free(cred);
+ }
+ if (rpid == -1 || rpid != verpid) {
+ (void) shutdown(connfd, SHUT_RDWR);
+ (void) close(connfd);
+ return (-1);
+ }
+
+ flags = fcntl(connfd, F_GETFL, 0);
+ if (flags != -1)
+ (void) fcntl(connfd, F_SETFL, flags | O_NONBLOCK | FD_CLOEXEC);
+
+ return (connfd);
+}
+
+static void
+ctlcmd_process(int sockfd, int stdoutfd, unsigned int *flags)
+{
+ char buf[BUFSIZ];
+ int i;
+ for (i = 0; i < BUFSIZ-1; i++) {
+ char c;
+ if (read(sockfd, &c, 1) != 1 ||
+ c == '\n' || c == '\0') {
+ break;
+ }
+ buf[i] = c;
+ }
+ if (i == 0) {
+ goto fail;
+ }
+ buf[i+1] = '\0';
+
+ if (strncmp(buf, "TIOCSWINSZ ", 11) == 0) {
+ char *next = buf + 11;
+ struct winsize ws;
+ errno = 0;
+ ws.ws_row = strtol(next, &next, 10);
+ if (errno == EINVAL) {
+ goto fail;
+ }
+ ws.ws_col = strtol(next + 1, &next, 10);
+ if (errno == EINVAL) {
+ goto fail;
+ }
+ if (ioctl(stdoutfd, TIOCSWINSZ, &ws) == 0) {
+ (void) write(sockfd, "OK\n", 3);
+ return;
+ }
+ }
+ if (strncmp(buf, "SETFLAGS ", 9) == 0) {
+ char *next = buf + 9;
+ unsigned int result;
+ errno = 0;
+ result = strtoul(next, &next, 10);
+ if (errno == EINVAL) {
+ goto fail;
+ }
+ *flags = result;
+ (void) write(sockfd, "OK\n", 3);
+ return;
+ }
+fail:
+ (void) write(sockfd, "FAIL\n", 5);
+}
+
+/*
+ * Check to see if the client at the other end of the socket is still alive; we
+ * know it is not if it throws EPIPE at us when we try to write an otherwise
+ * harmless 0-length message to it.
+ */
+static int
+test_client(int clifd)
+{
+ if ((write(clifd, "", 0) == -1) && errno == EPIPE)
+ return (-1);
+ return (0);
+}
+
+/*
+ * Modify the input string with json escapes. Since the destination can thus
+ * be larger than the source, it may get truncated, although we do use a
+ * larger buffer.
+ */
+static void
+escape_json(char *sbuf, int slen, char *dbuf, int dlen)
+{
+ int i;
+ mbstate_t mbr;
+ wchar_t c;
+ size_t sz;
+
+ bzero(&mbr, sizeof (mbr));
+
+ sbuf[slen] = '\0';
+ i = 0;
+ while (i < dlen && (sz = mbrtowc(&c, sbuf, MB_CUR_MAX, &mbr)) > 0) {
+ switch (c) {
+ case '\\':
+ dbuf[i++] = '\\';
+ dbuf[i++] = '\\';
+ break;
+
+ case '"':
+ dbuf[i++] = '\\';
+ dbuf[i++] = '"';
+ break;
+
+ case '\b':
+ dbuf[i++] = '\\';
+ dbuf[i++] = 'b';
+ break;
+
+ case '\f':
+ dbuf[i++] = '\\';
+ dbuf[i++] = 'f';
+ break;
+
+ case '\n':
+ dbuf[i++] = '\\';
+ dbuf[i++] = 'n';
+ break;
+
+ case '\r':
+ dbuf[i++] = '\\';
+ dbuf[i++] = 'r';
+ break;
+
+ case '\t':
+ dbuf[i++] = '\\';
+ dbuf[i++] = 't';
+ break;
+
+ default:
+ if ((c >= 0x00 && c <= 0x1f) ||
+ (c > 0x7f && c <= 0xffff)) {
+
+ i += snprintf(&dbuf[i], (dlen - i), "\\u%04x",
+ (int)(0xffff & c));
+ } else if (c >= 0x20 && c <= 0x7f) {
+ dbuf[i++] = 0xff & c;
+ }
+
+ break;
+ }
+ sbuf += sz;
+ }
+
+ if (i == dlen)
+ dbuf[--i] = '\0';
+ else
+ dbuf[i] = '\0';
+}
+
+/*
+ * We output to the log file as json.
+ * ex. for string 'msg\n' on the zone's stdout:
+ * {"log":"msg\n","stream":"stdout","time":"2014-10-24T20:12:11.101973117Z"}
+ *
+ * We use ns in the last field of the timestamp for compatability.
+ *
+ * We keep track of the size of the log file and rotate it when we exceed
+ * the log size limit (if one is set).
+ */
+static void
+wr_log_msg(char *buf, int len, int from)
+{
+ struct timeval tv;
+ int olen;
+ char ts[64];
+ char nbuf[BUFSIZ * 2];
+ char obuf[BUFSIZ * 2];
+ static boolean_t log_wr_err = B_FALSE;
+
+ if (logfd == -1)
+ return;
+
+ escape_json(buf, len, nbuf, sizeof (nbuf));
+
+ if (gettimeofday(&tv, NULL) != 0)
+ return;
+ (void) strftime(ts, sizeof (ts), "%FT%T", gmtime(&tv.tv_sec));
+
+ olen = snprintf(obuf, sizeof (obuf),
+ "{\"log\":\"%s\",\"stream\":\"%s\",\"time\":\"%s.%ldZ\"}\n",
+ nbuf, (from == 1) ? "stdout" : "stderr", ts, tv.tv_usec * 1000);
+
+ if (write(logfd, obuf, olen) != olen) {
+ if (!log_wr_err) {
+ zerror(zlogp, B_TRUE, "log file write error");
+ log_wr_err = B_TRUE;
+ }
+ return;
+ }
+
+ log_sz += olen;
+ if (log_rot_sz > 0 && log_sz >= log_rot_sz)
+ rotate_log();
+}
+
+/*
+ * We want to sleep for a little while but need to be responsive if the zone is
+ * halting. We poll/sleep on the event stream so we can notice if we're halting.
+ * Return true if halting, otherwise false.
+ */
+static boolean_t
+halt_sleep(int slptime)
+{
+ struct pollfd evfd[1];
+
+ evfd[0].fd = eventstream[1];
+ evfd[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI;
+
+ if (poll(evfd, 1, slptime) > 0) {
+ /* zone halting */
+ return (B_TRUE);
+ }
+ return (B_FALSE);
+}
+
+/*
+ * This routine drives the logging and interactive I/O loop. It polls for
+ * input from the zone side of the fd (output to stdout/stderr), and from the
+ * client (input to the zone's stdin). Additionally, it polls on the server
+ * fd, and disconnects any clients that might try to hook up with the zone
+ * while the fd's are in use.
+ *
+ * Data from the zone's stdout and stderr is formatted in json and written to
+ * the log file whether an interactive client is connected or not.
+ *
+ * When the client first calls us up, it is expected to send a line giving its
+ * "identity"; this consists of the string 'IDENT <pid> <locale>'. This is so
+ * that we can report that the fd's are busy, along with some diagnostics
+ * about who has them busy; the locale is ignore here but kept for compatability
+ * with the zlogin code when running on the zone's console.
+ *
+ * We need to handle the case where there is no server within the zone (or
+ * the server gets stuck) and data that we're writing to the zone server's
+ * stdin fills the pipe. Because of the way the zfd device works writes can
+ * flow into the stream and simply be dropped, if there is no server, or writes
+ * could return -1 with EAGAIN if the server is stuck. Since we ignore errors
+ * on the write to stdin, we won't get blocked in that case but we'd like to
+ * avoid dropping initial input if the server within the zone hasn't started
+ * yet. To handle this we wait to read initial input until we detect that there
+ * is a server inside the zone. We have to poll for this so that we can
+ * re-run the ioctl to notice when a server shows up. This poll/wait is handled
+ * by halt_sleep() so that we can be responsive if the zone wants to halt.
+ * We only do this check to avoid dropping initial input so it is possible for
+ * the server within the zone to go away later. At that point zfd will just
+ * drop any new input flowing into the stream.
+ */
+static void
+do_zfd_io(int gzctlfd, int gzservfd, int gzerrfd, int stdinfd, int stdoutfd,
+ int stderrfd)
+{
+ struct pollfd pollfds[8];
+ char ibuf[BUFSIZ + 1];
+ int cc, ret;
+ int ctlfd = -1;
+ int clifd = -1;
+ int clierrfd = -1;
+ int pollerr = 0;
+ char clilocale[MAXPATHLEN];
+ pid_t clipid = 0;
+ uint_t flags = 0;
+ boolean_t stdin_ready = B_FALSE;
+ int slptime = 250; /* initial poll sleep time in ms */
+
+ /* client control socket, watch for read events */
+ pollfds[0].fd = ctlfd;
+ pollfds[0].events = POLLIN | POLLRDNORM | POLLRDBAND |
+ POLLPRI | POLLERR | POLLHUP | POLLNVAL;
+
+ /* client socket, watch for read events */
+ pollfds[1].fd = clifd;
+ pollfds[1].events = pollfds[0].events;
+
+ /* stdout, watch for read events */
+ pollfds[2].fd = stdoutfd;
+ pollfds[2].events = pollfds[0].events;
+
+ /* stderr, watch for read events */
+ pollfds[3].fd = stderrfd;
+ pollfds[3].events = pollfds[0].events;
+
+ /* the server control socket; watch for new connections */
+ pollfds[4].fd = gzctlfd;
+ pollfds[4].events = POLLIN | POLLRDNORM;
+
+ /* the server stdin/out socket; watch for new connections */
+ pollfds[5].fd = gzservfd;
+ pollfds[5].events = POLLIN | POLLRDNORM;
+
+ /* the server stderr socket; watch for new connections */
+ pollfds[6].fd = gzerrfd;
+ pollfds[6].events = POLLIN | POLLRDNORM;
+
+ /* the eventstream; any input means the zone is halting */
+ pollfds[7].fd = eventstream[1];
+ pollfds[7].events = pollfds[0].events;
+
+ while (!shutting_down) {
+ pollfds[0].revents = pollfds[1].revents = 0;
+ pollfds[2].revents = pollfds[3].revents = 0;
+ pollfds[4].revents = pollfds[5].revents = 0;
+ pollfds[6].revents = pollfds[7].revents = 0;
+
+ ret = poll(pollfds, 8, -1);
+ if (ret == -1 && errno != EINTR) {
+ zerror(zlogp, B_TRUE, "poll failed");
+ /* we are hosed, close connection */
+ break;
+ }
+
+ /* control events from client */
+ if (pollfds[0].revents &
+ (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
+ /* process control message */
+ ctlcmd_process(ctlfd, stdoutfd, &flags);
+ } else if (pollfds[0].revents) {
+ /* bail if any error occurs */
+ pollerr = pollfds[0].revents;
+ zerror(zlogp, B_FALSE, "closing connection "
+ "with control channel, pollerr %d\n", pollerr);
+ break;
+ }
+
+ /* event from client side */
+ if (pollfds[1].revents) {
+ if (stdin_ready) {
+ if (pollfds[1].revents & (POLLIN |
+ POLLRDNORM | POLLRDBAND | POLLPRI)) {
+ errno = 0;
+ cc = read(clifd, ibuf, BUFSIZ);
+ if (cc > 0) {
+ /*
+ * See comment for this
+ * function on what happens if
+ * there is no reader in the
+ * zone. EOF is handled below.
+ */
+ (void) write(stdinfd, ibuf, cc);
+ }
+ } else if (pollfds[1].revents & (POLLERR |
+ POLLNVAL)) {
+ pollerr = pollfds[1].revents;
+ zerror(zlogp, B_FALSE,
+ "closing connection "
+ "with client, pollerr %d\n",
+ pollerr);
+ break;
+ }
+
+ if (pollfds[1].revents & POLLHUP) {
+ if (flags & ZLOGIN_ZFD_EOF) {
+ /*
+ * Let the client know. We've
+ * already serviced any pending
+ * regular input. Let the
+ * stream clear since the EOF
+ * ioctl jumps to the head.
+ */
+ (void) ioctl(stdinfd, I_FLUSH);
+ if (halt_sleep(250))
+ break;
+ (void) ioctl(stdinfd, ZFD_EOF);
+ }
+ break;
+ }
+ } else {
+ if (ioctl(stdinfd, ZFD_HAS_SLAVE) == 0) {
+ stdin_ready = B_TRUE;
+ } else {
+ /*
+ * There is nothing in the zone to read
+ * our input. Presumably the user
+ * providing input expects something to
+ * show up, but that is no guarantee.
+ * Since we haven't serviced the pending
+ * input poll yet, we don't want to
+ * immediately loop around but we also
+ * need to be responsive if the zone is
+ * halting.
+ */
+ if (halt_sleep(slptime))
+ break;
+
+ if (slptime < 5000)
+ slptime += 250;
+ }
+ }
+ }
+
+ /* event from the zone's stdout */
+ if (pollfds[2].revents) {
+ if (pollfds[2].revents &
+ (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
+ errno = 0;
+ cc = read(stdoutfd, ibuf, BUFSIZ);
+ if (cc <= 0 && (errno != EINTR) &&
+ (errno != EAGAIN))
+ break;
+ if (cc > 0) {
+ wr_log_msg(ibuf, cc, 1);
+
+ /*
+ * Lose output if no one is listening,
+ * otherwise pass it on.
+ */
+ if (clifd != -1)
+ (void) write(clifd, ibuf, cc);
+ }
+ } else {
+ pollerr = pollfds[2].revents;
+ zerror(zlogp, B_FALSE,
+ "closing connection with stdout zfd, "
+ "pollerr %d\n", pollerr);
+ break;
+ }
+ }
+
+ /* event from the zone's stderr */
+ if (pollfds[3].revents) {
+ if (pollfds[3].revents &
+ (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
+ errno = 0;
+ cc = read(stderrfd, ibuf, BUFSIZ);
+ if (cc <= 0 && (errno != EINTR) &&
+ (errno != EAGAIN))
+ break;
+ if (cc > 0) {
+ wr_log_msg(ibuf, cc, 2);
+
+ /*
+ * Lose output if no one is listening,
+ * otherwise pass it on.
+ */
+ if (clierrfd != -1)
+ (void) write(clierrfd, ibuf,
+ cc);
+ }
+ } else {
+ pollerr = pollfds[3].revents;
+ zerror(zlogp, B_FALSE,
+ "closing connection with stderr zfd, "
+ "pollerr %d\n", pollerr);
+ break;
+ }
+ }
+
+ /* connect event from server control socket */
+ if (pollfds[4].revents) {
+ if (ctlfd != -1) {
+ /*
+ * Test the client to see if it is really
+ * still alive. If it has died but we
+ * haven't yet detected that, we might
+ * deny a legitimate connect attempt. If it
+ * is dead, we break out; once we tear down
+ * the old connection, the new connection
+ * will happen.
+ */
+ if (test_client(ctlfd) == -1) {
+ break;
+ }
+ /* we're already handling a client */
+ reject_client(gzctlfd, clipid);
+ } else {
+ ctlfd = accept_client(gzctlfd, &clipid,
+ clilocale, sizeof (clilocale), &flags);
+ if (ctlfd != -1) {
+ pollfds[0].fd = ctlfd;
+ } else {
+ break;
+ }
+ }
+ }
+
+ /* connect event from server stdin/out socket */
+ if (pollfds[5].revents) {
+ if (ctlfd == -1) {
+ /*
+ * This shouldn't happen since the client is
+ * expected to connect on the control socket
+ * first. If we see this, tear everything down
+ * and start over.
+ */
+ zerror(zlogp, B_FALSE, "GZ zfd stdin/stdout "
+ "connection attempt with no GZ control\n");
+ break;
+ }
+ assert(clifd == -1);
+ if ((clifd = accept_socket(gzservfd, clipid)) != -1) {
+ /* No need to watch for other new connections */
+ pollfds[5].fd = -1;
+ /* Client input is of interest, though */
+ pollfds[1].fd = clifd;
+ } else {
+ break;
+ }
+ }
+
+ /* connection event from server stderr socket */
+ if (pollfds[6].revents) {
+ if (ctlfd == -1) {
+ /*
+ * Same conditions apply to stderr as stdin/out.
+ */
+ zerror(zlogp, B_FALSE, "GZ zfd stderr "
+ "connection attempt with no GZ control\n");
+ break;
+ }
+ assert(clierrfd == -1);
+ if ((clierrfd = accept_socket(gzerrfd, clipid)) != -1) {
+ /* No need to watch for other new connections */
+ pollfds[6].fd = -1;
+ } else {
+ break;
+ }
+ }
+
+ /*
+ * Watch for events on the eventstream. This is how we get
+ * notified of the zone halting, etc. It provides us a
+ * "wakeup" from poll when important things happen, which
+ * is good.
+ */
+ if (pollfds[7].revents) {
+ break;
+ }
+ }
+
+ if (clifd != -1) {
+ (void) shutdown(clifd, SHUT_RDWR);
+ (void) close(clifd);
+ }
+
+ if (clierrfd != -1) {
+ (void) shutdown(clierrfd, SHUT_RDWR);
+ (void) close(clierrfd);
+ }
+}
+
+static int
+open_fd(zlog_t *zlogp, int id, int rw)
+{
+ int fd;
+ int flag = O_NONBLOCK | O_NOCTTY | O_CLOEXEC;
+ int retried = 0;
+ char stdpath[MAXPATHLEN];
+
+ (void) snprintf(stdpath, sizeof (stdpath), "/dev/zfd/%s/master/%d",
+ zone_name, id);
+ flag |= rw;
+
+ while (!shutting_down) {
+ if ((fd = open(stdpath, flag)) != -1) {
+ /*
+ * Setting RPROTDIS on the stream means that the
+ * control portion of messages received (which we don't
+ * care about) will be discarded by the stream head. If
+ * we allowed such messages, we wouldn't be able to use
+ * read(2), as it fails (EBADMSG) when a message with a
+ * control element is received.
+ */
+ if (ioctl(fd, I_SRDOPT, RNORM|RPROTDIS) == -1) {
+ zerror(zlogp, B_TRUE,
+ "failed to set options on zfd");
+ return (-1);
+ }
+ return (fd);
+ }
+
+ if (retried++ > 60)
+ break;
+
+ (void) sleep(1);
+ }
+
+ zerror(zlogp, B_TRUE, "failed to open zfd");
+ return (-1);
+}
+
+static void
+open_logfile()
+{
+ char logpath[MAXPATHLEN];
+
+ logfd = -1;
+ log_sz = 0;
+
+ (void) snprintf(logpath, sizeof (logpath), "%s/logs", zonepath);
+ (void) mkdir(logpath, 0700);
+
+ (void) snprintf(logpath, sizeof (logpath), "%s/logs/%s", zonepath,
+ LOGNAME);
+
+ if ((logfd = open(logpath, O_WRONLY | O_APPEND | O_CREAT,
+ 0600)) == -1) {
+ zerror(zlogp, B_TRUE, "failed to open log file");
+ } else {
+ struct stat64 sb;
+
+ if (fstat64(logfd, &sb) == 0)
+ log_sz = sb.st_size;
+ }
+}
+
+static void
+rotate_log()
+{
+ time_t t;
+ struct tm gtm;
+ char onm[MAXPATHLEN], rnm[MAXPATHLEN];
+
+ if ((t = time(NULL)) == (time_t)-1 || gmtime_r(&t, &gtm) == NULL) {
+ zerror(zlogp, B_TRUE, "failed to format time");
+ return;
+ }
+
+ (void) snprintf(rnm, sizeof (rnm),
+ "%s/logs/%s.%d%02d%02dT%02d%02d%02dZ",
+ zonepath, LOGNAME, gtm.tm_year + 1900, gtm.tm_mon + 1, gtm.tm_mday,
+ gtm.tm_hour, gtm.tm_min, gtm.tm_sec);
+ (void) snprintf(onm, sizeof (onm), "%s/logs/%s", zonepath, LOGNAME);
+
+ (void) close(logfd);
+ if (rename(onm, rnm) != 0)
+ zerror(zlogp, B_TRUE, "failed to rotate log file");
+ open_logfile();
+}
+
+
+/* ARGSUSED */
+void
+hup_handler(int i)
+{
+ if (logfd != -1) {
+ (void) close(logfd);
+ open_logfile();
+ }
+}
+
+/*
+ * Body of the worker thread to log the zfd's stdout and stderr to a log file
+ * and to perform interactive IO to the stdin, stdout and stderr zfd's.
+ *
+ * The stdin, stdout and stderr are from the perspective of the process inside
+ * the zone, so the zoneadmd view is opposite (i.e. we write to the stdin fd
+ * and read from the stdout/stderr fds).
+ */
+static void
+srvr(void *modearg)
+{
+ zfd_mode_t *mode = (zfd_mode_t *)modearg;
+ int gzctlfd = -1;
+ int gzoutfd = -1;
+ int stdinfd = -1;
+ int stdoutfd = -1;
+ sigset_t blockset;
+ int gzerrfd = -1;
+ int stderrfd = -1;
+ int flags;
+ int len;
+ char ibuf[BUFSIZ + 1];
+
+ if (!shutting_down && mode->zmode_gzlogging)
+ open_logfile();
+
+ /*
+ * This thread should receive SIGHUP so that it can close the log
+ * file, and reopen it, during log rotation.
+ */
+ sigset(SIGHUP, hup_handler);
+ (void) sigfillset(&blockset);
+ (void) sigdelset(&blockset, SIGHUP);
+ (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL);
+
+ if (!shutting_down) {
+ if (pipe(eventstream) != 0) {
+ zerror(zlogp, B_TRUE, "failed to open logger control "
+ "pipe");
+ return;
+ }
+ }
+
+ while (!shutting_down) {
+ if (init_server_sock(zlogp, &gzctlfd, "ctl") == -1) {
+ zerror(zlogp, B_FALSE,
+ "server setup: control socket init failed");
+ goto death;
+ }
+ if (init_server_sock(zlogp, &gzoutfd, "out") == -1) {
+ zerror(zlogp, B_FALSE,
+ "server setup: stdout socket init failed");
+ goto death;
+ }
+ if (init_server_sock(zlogp, &gzerrfd, "err") == -1) {
+ zerror(zlogp, B_FALSE,
+ "server setup: stderr socket init failed");
+ goto death;
+ }
+
+ if (mode->zmode_n_stddevs == 1) {
+ if ((stdinfd = open_fd(zlogp, 0, O_RDWR)) == -1) {
+ goto death;
+ }
+ stdoutfd = stdinfd;
+ } else {
+ if ((stdinfd = open_fd(zlogp, 0, O_WRONLY)) == -1 ||
+ (stdoutfd = open_fd(zlogp, 1, O_RDONLY)) == -1 ||
+ (stderrfd = open_fd(zlogp, 2, O_RDONLY)) == -1) {
+ goto death;
+ }
+ }
+
+ do_zfd_io(gzctlfd, gzoutfd, gzerrfd, stdinfd, stdoutfd,
+ stderrfd);
+death:
+ destroy_server_sock(gzctlfd, "ctl");
+ destroy_server_sock(gzoutfd, "out");
+ destroy_server_sock(gzerrfd, "err");
+
+ /* when shutting down, leave open until drained */
+ if (!shutting_down) {
+ (void) close(stdinfd);
+ if (mode->zmode_n_stddevs == 3) {
+ (void) close(stdoutfd);
+ (void) close(stderrfd);
+ }
+ }
+ }
+
+ /*
+ * Attempt to drain remaining log output from the zone prior to closing
+ * the file descriptors. This helps ensure that complete logs are
+ * captured during shutdown.
+ */
+ flags = fcntl(stdoutfd, F_GETFL, 0);
+ if (fcntl(stdoutfd, F_SETFL, flags | O_NONBLOCK) != -1) {
+ while ((len = read(stdoutfd, ibuf, BUFSIZ)) > 0)
+ wr_log_msg(ibuf, len, 1);
+ }
+ (void) close(stdoutfd);
+
+ if (mode->zmode_n_stddevs > 1) {
+ (void) close(stdinfd);
+ flags = fcntl(stderrfd, F_GETFL, 0);
+ if (fcntl(stderrfd, F_SETFL, flags | O_NONBLOCK) != -1) {
+ while ((len = read(stderrfd, ibuf, BUFSIZ)) > 0)
+ wr_log_msg(ibuf, len, 2);
+ }
+ (void) close(stderrfd);
+ }
+
+
+ (void) close(eventstream[0]);
+ eventstream[0] = -1;
+ (void) close(eventstream[1]);
+ eventstream[1] = -1;
+ if (logfd != -1)
+ (void) close(logfd);
+}
+
+/*
+ * The meaning of the original legacy values for the zlog-mode evolved over
+ * time, to the point where the old names no longer made sense. The current
+ * values are simply positional letters used to indicate various capabilities.
+ * The following table shows the meaning of the mode values, along with the
+ * legacy name which we continue to support for compatability. Any future
+ * capability can add a letter to the left and '-' is implied for existing
+ * strings.
+ *
+ * zlog-mode gz log - tty - ngz log
+ * --------- ------ --- -------
+ * gt- (int) y y n
+ * g-- (log) y n n
+ * gtn (nlint) y y y
+ * g-n (nolog) y n y
+ * -t- n y n
+ * --- n n n
+ *
+ * This function also obtains a maximum log size while it is reading the
+ * zone configuration.
+ */
+static void
+get_mode_logmax(zfd_mode_t *mode)
+{
+ zone_dochandle_t handle;
+ struct zone_attrtab attr;
+
+ bzero(mode, sizeof (zfd_mode_t));
+
+ if ((handle = zonecfg_init_handle()) == NULL)
+ return;
+
+ if (zonecfg_get_handle(zone_name, handle) != Z_OK)
+ goto done;
+
+ if (zonecfg_setattrent(handle) != Z_OK)
+ goto done;
+ while (zonecfg_getattrent(handle, &attr) == Z_OK) {
+ if (strcmp(ZLOG_MODE, attr.zone_attr_name) == 0) {
+ if (strcmp("g--", attr.zone_attr_value) == 0 ||
+ strncmp("log", attr.zone_attr_value, 3) == 0) {
+ mode->zmode_gzlogging = B_TRUE;
+ mode->zmode_n_stddevs = 3;
+ mode->zmode_n_addl_devs = 0;
+ } else if (strcmp("g-n", attr.zone_attr_value) == 0 ||
+ strncmp("nolog", attr.zone_attr_value, 5) == 0) {
+ mode->zmode_gzlogging = B_TRUE;
+ mode->zmode_n_stddevs = 3;
+ mode->zmode_n_addl_devs = 2;
+ } else if (strcmp("gt-", attr.zone_attr_value) == 0 ||
+ strncmp("int", attr.zone_attr_value, 3) == 0) {
+ mode->zmode_gzlogging = B_TRUE;
+ mode->zmode_n_stddevs = 1;
+ mode->zmode_n_addl_devs = 0;
+ } else if (strcmp("gtn", attr.zone_attr_value) == 0 ||
+ strncmp("nlint", attr.zone_attr_value, 5) == 0) {
+ mode->zmode_gzlogging = B_TRUE;
+ mode->zmode_n_stddevs = 1;
+ mode->zmode_n_addl_devs = 1;
+ } else if (strcmp("-t-", attr.zone_attr_value) == 0) {
+ mode->zmode_gzlogging = B_FALSE;
+ mode->zmode_n_stddevs = 1;
+ mode->zmode_n_addl_devs = 0;
+ } else if (strcmp("---", attr.zone_attr_value) == 0) {
+ mode->zmode_gzlogging = B_FALSE;
+ mode->zmode_n_stddevs = 3;
+ mode->zmode_n_addl_devs = 0;
+ }
+
+ } else if (strcmp(LOG_MAXSZ, attr.zone_attr_name) == 0) {
+ char *p;
+ long lval;
+
+ p = attr.zone_attr_value;
+ lval = strtol(p, &p, 10);
+ if (*p == '\0')
+ log_rot_sz = (size_t)lval;
+ }
+ }
+ (void) zonecfg_endattrent(handle);
+
+done:
+ zonecfg_fini_handle(handle);
+}
+
+void
+create_log_thread(zlog_t *logp, zoneid_t id)
+{
+ int res;
+
+ shutting_down = 0;
+ zlogp = logp;
+
+ get_mode_logmax(&mode);
+ if (mode.zmode_n_stddevs == 0)
+ return;
+
+ if (init_zfd_devs(zlogp, &mode) == -1) {
+ zerror(zlogp, B_FALSE,
+ "zfd setup: device initialization failed");
+ return;
+ }
+
+ res = thr_create(NULL, 0, (void * (*)(void *))srvr, (void *)&mode, 0,
+ &logger_tid);
+ if (res != 0) {
+ zerror(zlogp, B_FALSE, "error %d creating logger thread", res);
+ logger_tid = 0;
+ }
+}
+
+void
+destroy_log_thread()
+{
+ if (logger_tid != 0) {
+ int stop = 1;
+
+ shutting_down = 1;
+ /* break out of poll to shutdown */
+ if (eventstream[0] != -1)
+ (void) write(eventstream[0], &stop, sizeof (stop));
+ (void) thr_join(logger_tid, NULL, NULL);
+ logger_tid = 0;
+ }
+
+ (void) destroy_zfd_devs(zlogp);
+}
diff --git a/usr/src/cmd/zoneadmd/zoneadmd.c b/usr/src/cmd/zoneadmd/zoneadmd.c
index e2bbd20640..0a714fda38 100644
--- a/usr/src/cmd/zoneadmd/zoneadmd.c
+++ b/usr/src/cmd/zoneadmd/zoneadmd.c
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc. All rights reserved.
*/
/*
@@ -68,6 +69,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/sysmacros.h>
+#include <sys/time.h>
#include <bsm/adt.h>
#include <bsm/adt_event.h>
@@ -108,6 +110,8 @@
static char *progname;
char *zone_name; /* zone which we are managing */
+zone_dochandle_t snap_hndl; /* handle for snapshot created when ready */
+char zonepath[MAXNAMELEN];
char pool_name[MAXNAMELEN];
char default_brand[MAXNAMELEN];
char brand_name[MAXNAMELEN];
@@ -116,10 +120,11 @@ boolean_t zone_iscluster;
boolean_t zone_islabeled;
boolean_t shutdown_in_progress;
static zoneid_t zone_id;
+static zoneid_t zone_did = 0;
dladm_handle_t dld_handle = NULL;
-static char pre_statechg_hook[2 * MAXPATHLEN];
-static char post_statechg_hook[2 * MAXPATHLEN];
+char pre_statechg_hook[2 * MAXPATHLEN];
+char post_statechg_hook[2 * MAXPATHLEN];
char query_hook[2 * MAXPATHLEN];
zlog_t logsys;
@@ -141,6 +146,9 @@ boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */
#define DEFAULT_LOCALE "C"
+#define RSRC_NET "net"
+#define RSRC_DEV "device"
+
static const char *
z_cmd_name(zone_cmd_t zcmd)
{
@@ -257,34 +265,31 @@ zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...)
}
/*
- * Emit a warning for any boot arguments which are unrecognized. Since
- * Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we
+ * Since Solaris boot arguments are getopt(3c) compatible (see kernel(1m)), we
* put the arguments into an argv style array, use getopt to process them,
- * and put the resultant argument string back into outargs.
+ * and put the resultant argument string back into outargs. Non-Solaris brands
+ * may support alternate forms of boot arguments so we must handle that as well.
*
* During the filtering, we pull out any arguments which are truly "boot"
* arguments, leaving only those which are to be passed intact to the
* progenitor process. The one we support at the moment is -i, which
* indicates to the kernel which program should be launched as 'init'.
*
- * A return of Z_INVAL indicates specifically that the arguments are
- * not valid; this is a non-fatal error. Except for Z_OK, all other return
- * values are treated as fatal.
+ * Except for Z_OK, all other return values are treated as fatal.
*/
static int
filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
- char *init_file, char *badarg)
+ char *init_file)
{
int argc = 0, argc_save;
int i;
- int err;
+ int err = Z_OK;
char *arg, *lasts, **argv = NULL, **argv_save;
char zonecfg_args[BOOTARGS_MAX];
char scratchargs[BOOTARGS_MAX], *sargs;
char c;
bzero(outargs, BOOTARGS_MAX);
- bzero(badarg, BOOTARGS_MAX);
/*
* If the user didn't specify transient boot arguments, check
@@ -292,25 +297,10 @@ filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
* and use them if applicable.
*/
if (inargs == NULL || inargs[0] == '\0') {
- zone_dochandle_t handle;
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE,
- "getting zone configuration handle");
- return (Z_BAD_HANDLE);
- }
- err = zonecfg_get_snapshot_handle(zone_name, handle);
- if (err != Z_OK) {
- zerror(zlogp, B_FALSE,
- "invalid configuration snapshot");
- zonecfg_fini_handle(handle);
- return (Z_BAD_HANDLE);
- }
-
bzero(zonecfg_args, sizeof (zonecfg_args));
- (void) zonecfg_get_bootargs(handle, zonecfg_args,
+ (void) zonecfg_get_bootargs(snap_hndl, zonecfg_args,
sizeof (zonecfg_args));
inargs = zonecfg_args;
- zonecfg_fini_handle(handle);
}
if (strlen(inargs) >= BOOTARGS_MAX) {
@@ -347,14 +337,22 @@ filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
}
/*
- * We preserve compatibility with the Solaris system boot behavior,
+ * We preserve compatibility with the illumos system boot behavior,
* which allows:
*
* # reboot kernel/unix -s -m verbose
*
- * In this example, kernel/unix tells the booter what file to
- * boot. We don't want reboot in a zone to be gratuitously different,
- * so we silently ignore the boot file, if necessary.
+ * In this example, kernel/unix tells the booter what file to boot. The
+ * original intent of this was that we didn't want reboot in a zone to
+ * be gratuitously different, so we would silently ignore the boot
+ * file, if necessary. However, this usage is archaic and has never
+ * been common, since it is impossible to boot a zone onto a different
+ * kernel. Ignoring the first argument breaks for non-native brands
+ * which pass boot arguments in a different style. e.g.
+ * systemd.log_level=debug
+ * Thus, for backward compatibility we only ignore the first argument
+ * if it appears to be in the illumos form and attempting to specify a
+ * kernel.
*/
if (argv[0] == NULL)
goto done;
@@ -362,7 +360,7 @@ filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
assert(argv[0][0] != ' ');
assert(argv[0][0] != '\t');
- if (argv[0][0] != '-' && argv[0][0] != '\0') {
+ if (strncmp(argv[0], "kernel/", 7) == 0) {
argv = &argv[1];
argc--;
}
@@ -390,36 +388,29 @@ filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
break;
case '?':
/*
- * We warn about unknown arguments but pass them
- * along anyway-- if someone wants to develop their
- * own init replacement, they can pass it whatever
- * args they want.
+ * If a brand has its own init, we need to pass along
+ * whatever the user provides. We use the entire
+ * unknown string here so that we correctly handle
+ * unknown long options (e.g. --debug).
*/
- err = Z_INVAL;
(void) snprintf(outargs, BOOTARGS_MAX,
- "%s -%c", outargs, optopt);
- (void) snprintf(badarg, BOOTARGS_MAX,
- "%s -%c", badarg, optopt);
+ "%s %s", outargs, argv[optind - 1]);
break;
}
}
/*
- * For Solaris Zones we warn about and discard non-option arguments.
- * Hence 'boot foo bar baz gub' --> 'boot'. However, to be similar
- * to the kernel, we concat up all the other remaining boot args.
- * and warn on them as a group.
+ * We need to pass along everything else since we don't know what
+ * the brand's init is expecting. For example, an argument list like:
+ * --confdir /foo --debug
+ * will cause the getopt parsing to stop at '/foo' but we need to pass
+ * that on, along with the '--debug'. This does mean that we require
+ * any of our known options (-ifms) to preceed the brand-specific ones.
*/
- if (optind < argc) {
- err = Z_INVAL;
- while (optind < argc) {
- (void) snprintf(badarg, BOOTARGS_MAX, "%s%s%s",
- badarg, strlen(badarg) > 0 ? " " : "",
- argv[optind]);
- optind++;
- }
- zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot "
- "arguments `%s'.", badarg);
+ while (optind < argc) {
+ (void) snprintf(outargs, BOOTARGS_MAX, "%s %s", outargs,
+ argv[optind]);
+ optind++;
}
done:
@@ -458,7 +449,7 @@ mkzonedir(zlog_t *zlogp)
* Run the brand's pre-state change callback, if it exists.
*/
static int
-brand_prestatechg(zlog_t *zlogp, int state, int cmd)
+brand_prestatechg(zlog_t *zlogp, int state, int cmd, boolean_t debug)
{
char cmdbuf[2 * MAXPATHLEN];
const char *altroot;
@@ -471,7 +462,7 @@ brand_prestatechg(zlog_t *zlogp, int state, int cmd)
state, cmd, altroot) > sizeof (cmdbuf))
return (-1);
- if (do_subproc(zlogp, cmdbuf, NULL) != 0)
+ if (do_subproc(zlogp, cmdbuf, NULL, debug) != 0)
return (-1);
return (0);
@@ -481,7 +472,7 @@ brand_prestatechg(zlog_t *zlogp, int state, int cmd)
* Run the brand's post-state change callback, if it exists.
*/
static int
-brand_poststatechg(zlog_t *zlogp, int state, int cmd)
+brand_poststatechg(zlog_t *zlogp, int state, int cmd, boolean_t debug)
{
char cmdbuf[2 * MAXPATHLEN];
const char *altroot;
@@ -494,7 +485,7 @@ brand_poststatechg(zlog_t *zlogp, int state, int cmd)
state, cmd, altroot) > sizeof (cmdbuf))
return (-1);
- if (do_subproc(zlogp, cmdbuf, NULL) != 0)
+ if (do_subproc(zlogp, cmdbuf, NULL, debug) != 0)
return (-1);
return (0);
@@ -533,35 +524,44 @@ notify_zonestatd(zoneid_t zoneid)
* subcommand.
*/
static int
-zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate)
+zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate, boolean_t debug)
{
int err;
+ boolean_t snapped = B_FALSE;
- if (brand_prestatechg(zlogp, zstate, Z_READY) != 0)
- return (-1);
-
+ if ((snap_hndl = zonecfg_init_handle()) == NULL) {
+ zerror(zlogp, B_TRUE, "getting zone configuration handle");
+ goto bad;
+ }
if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) {
zerror(zlogp, B_FALSE, "unable to create snapshot: %s",
zonecfg_strerror(err));
goto bad;
}
+ snapped = B_TRUE;
- if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) {
- if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
- zerror(zlogp, B_FALSE, "destroying snapshot: %s",
- zonecfg_strerror(err));
+ if (zonecfg_get_snapshot_handle(zone_name, snap_hndl) != Z_OK) {
+ zerror(zlogp, B_FALSE, "invalid configuration snapshot");
goto bad;
}
+
+ if (zone_did == 0)
+ zone_did = zone_get_did(zone_name);
+
+ if (brand_prestatechg(zlogp, zstate, Z_READY, debug) != 0)
+ goto bad;
+
+ if ((zone_id = vplat_create(zlogp, mount_cmd, zone_did)) == -1)
+ goto bad;
+
if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) {
bringup_failure_recovery = B_TRUE;
- (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE);
- if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
- zerror(zlogp, B_FALSE, "destroying snapshot: %s",
- zonecfg_strerror(err));
+ (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE,
+ debug);
goto bad;
}
- if (brand_poststatechg(zlogp, zstate, Z_READY) != 0)
+ if (brand_poststatechg(zlogp, zstate, Z_READY, debug) != 0)
goto bad;
return (0);
@@ -571,7 +571,13 @@ bad:
* If something goes wrong, we up the zones's state to the target
* state, READY, and then invoke the hook as if we're halting.
*/
- (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT);
+ (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT, debug);
+ if (snapped)
+ if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
+ zerror(zlogp, B_FALSE, "destroying snapshot: %s",
+ zonecfg_strerror(err));
+ zonecfg_fini_handle(snap_hndl);
+ snap_hndl = NULL;
return (-1);
}
@@ -623,15 +629,8 @@ mount_early_fs(void *data, const char *spec, const char *dir,
/* determine the zone rootpath */
if (mount_cmd) {
- char zonepath[MAXPATHLEN];
char luroot[MAXPATHLEN];
- if (zone_get_zonepath(zone_name,
- zonepath, sizeof (zonepath)) != Z_OK) {
- zerror(zlogp, B_FALSE, "unable to determine zone path");
- return (-1);
- }
-
(void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
resolve_lofs(zlogp, luroot, sizeof (luroot));
(void) strlcpy(rootpath, luroot, sizeof (rootpath));
@@ -686,6 +685,8 @@ mount_early_fs(void *data, const char *spec, const char *dir,
char opt_buf[MAX_MNTOPT_STR];
int optlen = 0;
int mflag = MS_DATA;
+ int i;
+ int ret;
(void) ct_tmpl_clear(tmpl_fd);
/*
@@ -713,9 +714,26 @@ mount_early_fs(void *data, const char *spec, const char *dir,
optlen = MAX_MNTOPT_STR;
mflag = MS_OPTIONSTR;
}
- if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0)
- _exit(errno);
- _exit(0);
+
+ /*
+ * There is an obscure race condition which can cause mount
+ * to return EBUSY. This happens for example on the mount
+ * of the zone's /etc/svc/volatile file system if there is
+ * a GZ process running svcs -Z, which will touch the
+ * mountpoint, just as we're trying to do the mount. To cope
+ * with this, we retry up to 3 times to let this transient
+ * process get out of the way.
+ */
+ for (i = 0; i < 3; i++) {
+ ret = 0;
+ if (mount(spec, dir, mflag, fstype, NULL, 0, opt,
+ optlen) != 0)
+ ret = errno;
+ if (ret != EBUSY)
+ break;
+ (void) sleep(1);
+ }
+ _exit(ret);
}
/* parent */
@@ -739,12 +757,151 @@ mount_early_fs(void *data, const char *spec, const char *dir,
}
/*
+ * env variable name format
+ * _ZONECFG_{resource name}_{identifying attr. name}_{property name}
+ * Any dashes (-) in the property names are replaced with underscore (_).
+ */
+static void
+set_zonecfg_env(char *rsrc, char *attr, char *name, char *val)
+{
+ char *p;
+ /* Enough for maximal name, rsrc + attr, & slop for ZONECFG & _'s */
+ char nm[2 * MAXNAMELEN + 32];
+
+ if (attr == NULL)
+ (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s", rsrc,
+ name);
+ else
+ (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s_%s", rsrc,
+ attr, name);
+
+ p = nm;
+ while ((p = strchr(p, '-')) != NULL)
+ *p++ = '_';
+
+ (void) setenv(nm, val, 1);
+}
+
+/*
+ * Export zonecfg network and device properties into environment for the boot
+ * and state change hooks.
+ * If debug is true, export the brand hook debug env. variable as well.
+ *
+ * We could export more of the config in the future, as necessary.
+ */
+static int
+setup_subproc_env(boolean_t debug)
+{
+ int res;
+ struct zone_nwiftab ntab;
+ struct zone_devtab dtab;
+ struct zone_attrtab atab;
+ char net_resources[MAXNAMELEN * 2];
+ char dev_resources[MAXNAMELEN * 2];
+
+ /* snap_hndl is null when called through the set_brand_env code path */
+ if (snap_hndl == NULL)
+ return (Z_OK);
+
+ net_resources[0] = '\0';
+ if ((res = zonecfg_setnwifent(snap_hndl)) != Z_OK)
+ goto done;
+
+ while (zonecfg_getnwifent(snap_hndl, &ntab) == Z_OK) {
+ struct zone_res_attrtab *rap;
+ char *phys;
+
+ phys = ntab.zone_nwif_physical;
+
+ (void) strlcat(net_resources, phys, sizeof (net_resources));
+ (void) strlcat(net_resources, " ", sizeof (net_resources));
+
+ set_zonecfg_env(RSRC_NET, phys, "physical", phys);
+
+ set_zonecfg_env(RSRC_NET, phys, "address",
+ ntab.zone_nwif_address);
+ set_zonecfg_env(RSRC_NET, phys, "allowed-address",
+ ntab.zone_nwif_allowed_address);
+ set_zonecfg_env(RSRC_NET, phys, "defrouter",
+ ntab.zone_nwif_defrouter);
+ set_zonecfg_env(RSRC_NET, phys, "global-nic",
+ ntab.zone_nwif_gnic);
+ set_zonecfg_env(RSRC_NET, phys, "mac-addr", ntab.zone_nwif_mac);
+ set_zonecfg_env(RSRC_NET, phys, "vlan-id",
+ ntab.zone_nwif_vlan_id);
+
+ for (rap = ntab.zone_nwif_attrp; rap != NULL;
+ rap = rap->zone_res_attr_next)
+ set_zonecfg_env(RSRC_NET, phys, rap->zone_res_attr_name,
+ rap->zone_res_attr_value);
+ nwifent_free_attrs(&ntab);
+ }
+
+ (void) setenv("_ZONECFG_net_resources", net_resources, 1);
+
+ (void) zonecfg_endnwifent(snap_hndl);
+
+ if ((res = zonecfg_setdevent(snap_hndl)) != Z_OK)
+ goto done;
+
+ while (zonecfg_getdevent(snap_hndl, &dtab) == Z_OK) {
+ struct zone_res_attrtab *rap;
+ char *match;
+
+ match = dtab.zone_dev_match;
+
+ (void) strlcat(dev_resources, match, sizeof (dev_resources));
+ (void) strlcat(dev_resources, " ", sizeof (dev_resources));
+
+ for (rap = dtab.zone_dev_attrp; rap != NULL;
+ rap = rap->zone_res_attr_next)
+ set_zonecfg_env(RSRC_DEV, match,
+ rap->zone_res_attr_name, rap->zone_res_attr_value);
+ }
+
+ (void) zonecfg_enddevent(snap_hndl);
+
+ if ((res = zonecfg_setattrent(snap_hndl)) != Z_OK)
+ goto done;
+
+ while (zonecfg_getattrent(snap_hndl, &atab) == Z_OK) {
+ set_zonecfg_env("attr", NULL, atab.zone_attr_name,
+ atab.zone_attr_value);
+ }
+
+ (void) zonecfg_endattrent(snap_hndl);
+
+ if (debug)
+ (void) setenv("_ZONEADMD_brand_debug", "1", 1);
+ else
+ (void) setenv("_ZONEADMD_brand_debug", "", 1);
+
+ res = Z_OK;
+
+done:
+ return (res);
+}
+
+void
+nwifent_free_attrs(struct zone_nwiftab *np)
+{
+ struct zone_res_attrtab *rap;
+
+ for (rap = np->zone_nwif_attrp; rap != NULL; ) {
+ struct zone_res_attrtab *tp = rap;
+
+ rap = rap->zone_res_attr_next;
+ free(tp);
+ }
+}
+
+/*
* If retstr is not NULL, the output of the subproc is returned in the str,
* otherwise it is output using zerror(). Any memory allocated for retstr
* should be freed by the caller.
*/
int
-do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr)
+do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr, boolean_t debug)
{
char buf[1024]; /* arbitrary large amount */
char *inbuf;
@@ -763,6 +920,11 @@ do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr)
inbuf = buf;
}
+ if (setup_subproc_env(debug) != Z_OK) {
+ zerror(zlogp, B_FALSE, "failed to setup environment");
+ return (-1);
+ }
+
file = popen(cmdbuf, "r");
if (file == NULL) {
zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf);
@@ -771,8 +933,13 @@ do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr)
while (fgets(inbuf, 1024, file) != NULL) {
if (retstr == NULL) {
- if (zlogp != &logsys)
+ if (zlogp != &logsys) {
+ int last = strlen(inbuf) - 1;
+
+ if (inbuf[last] == '\n')
+ inbuf[last] = '\0';
zerror(zlogp, B_FALSE, "%s", inbuf);
+ }
} else {
char *p;
@@ -802,24 +969,91 @@ do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr)
return (WEXITSTATUS(status));
}
+/*
+ * Get the path for this zone's init(1M) (or equivalent) process. First look
+ * for a zone-specific init-name attr, then get it from the brand.
+ */
static int
-zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
+get_initname(brand_handle_t bh, char *initname, int len)
+{
+ struct zone_attrtab a;
+
+ bzero(&a, sizeof (a));
+ (void) strlcpy(a.zone_attr_name, "init-name",
+ sizeof (a.zone_attr_name));
+
+ if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) {
+ (void) strlcpy(initname, a.zone_attr_value, len);
+ return (0);
+ }
+
+ return (brand_get_initname(bh, initname, len));
+}
+
+/*
+ * Get the restart-init flag for this zone's init(1M) (or equivalent) process.
+ * First look for a zone-specific restart-init attr, then get it from the brand.
+ */
+static boolean_t
+restartinit(brand_handle_t bh)
+{
+ struct zone_attrtab a;
+
+ bzero(&a, sizeof (a));
+ (void) strlcpy(a.zone_attr_name, "restart-init",
+ sizeof (a.zone_attr_name));
+
+ if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) {
+ if (strcmp(a.zone_attr_value, "false") == 0)
+ return (B_FALSE);
+ return (B_TRUE);
+ }
+
+ return (brand_restartinit(bh));
+}
+
+/*
+ * Get the app-svc-dependent flag for this zone's init process. This is a
+ * zone-specific attr which controls the type of contract we create for the
+ * zone's init. When true, the contract will include CT_PR_EV_EXIT in the fatal
+ * set, so that when any service which is in the same contract exits, the init
+ * application will be terminated.
+ */
+static boolean_t
+is_app_svc_dep(brand_handle_t bh)
+{
+ struct zone_attrtab a;
+
+ bzero(&a, sizeof (a));
+ (void) strlcpy(a.zone_attr_name, "app-svc-dependent",
+ sizeof (a.zone_attr_name));
+
+ if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK &&
+ strcmp(a.zone_attr_value, "true") == 0) {
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+static int
+zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate, boolean_t debug)
{
zoneid_t zoneid;
struct stat st;
- char zpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN];
+ char rpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN];
char nbootargs[BOOTARGS_MAX];
char cmdbuf[MAXPATHLEN];
fs_callback_t cb;
brand_handle_t bh;
zone_iptype_t iptype;
- boolean_t links_loaded = B_FALSE;
dladm_status_t status;
char errmsg[DLADM_STRSIZE];
int err;
boolean_t restart_init;
+ boolean_t app_svc_dep;
- if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0)
+ if (brand_prestatechg(zlogp, zstate, Z_BOOT, debug) != 0)
return (-1);
if ((zoneid = getzoneidbyname(zone_name)) == -1) {
@@ -852,13 +1086,8 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
/*
* Get the brand's boot callback if it exists.
*/
- if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
- zerror(zlogp, B_FALSE, "unable to determine zone path");
- brand_close(bh);
- goto bad;
- }
(void) strcpy(cmdbuf, EXEC_PREFIX);
- if (brand_get_boot(bh, zone_name, zpath, cmdbuf + EXEC_LEN,
+ if (brand_get_boot(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
sizeof (cmdbuf) - EXEC_LEN) != 0) {
zerror(zlogp, B_FALSE,
"unable to determine branded zone's boot callback");
@@ -867,41 +1096,49 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
}
/* Get the path for this zone's init(1M) (or equivalent) process. */
- if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) {
+ if (get_initname(bh, init_file, MAXPATHLEN) != 0) {
zerror(zlogp, B_FALSE,
"unable to determine zone's init(1M) location");
brand_close(bh);
goto bad;
}
- /* See if this zone's brand should restart init if it dies. */
- restart_init = brand_restartinit(bh);
+ /* See if we should restart init if it dies. */
+ restart_init = restartinit(bh);
+
+ /*
+ * See if we need to setup contract dependencies between the zone's
+ * primary application and any of its services.
+ */
+ app_svc_dep = is_app_svc_dep(bh);
brand_close(bh);
- err = filter_bootargs(zlogp, bootargs, nbootargs, init_file,
- bad_boot_arg);
- if (err == Z_INVAL)
- eventstream_write(Z_EVT_ZONE_BADARGS);
- else if (err != Z_OK)
+ err = filter_bootargs(zlogp, bootargs, nbootargs, init_file);
+ if (err != Z_OK)
goto bad;
assert(init_file[0] != '\0');
- /* Try to anticipate possible problems: Make sure init is executable. */
- if (zone_get_rootpath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
+ /*
+ * Try to anticipate possible problems: If possible, make sure init is
+ * executable.
+ */
+ if (zone_get_rootpath(zone_name, rpath, sizeof (rpath)) != Z_OK) {
zerror(zlogp, B_FALSE, "unable to determine zone root");
goto bad;
}
- (void) snprintf(initpath, sizeof (initpath), "%s%s", zpath, init_file);
+ (void) snprintf(initpath, sizeof (initpath), "%s%s", rpath, init_file);
- if (stat(initpath, &st) == -1) {
+ if (lstat(initpath, &st) == -1) {
zerror(zlogp, B_TRUE, "could not stat %s", initpath);
goto bad;
}
- if ((st.st_mode & S_IXUSR) == 0) {
+ if ((st.st_mode & S_IFMT) == S_IFLNK) {
+ /* symlink, we'll have to wait and resolve when we boot */
+ } else if ((st.st_mode & S_IXUSR) == 0) {
zerror(zlogp, B_FALSE, "%s is not executable", initpath);
goto bad;
}
@@ -919,7 +1156,6 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
" %s", dladm_status2str(status, errmsg));
goto bad;
}
- links_loaded = B_TRUE;
}
/*
@@ -928,7 +1164,7 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
* is booted.
*/
if ((strlen(cmdbuf) > EXEC_LEN) &&
- (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) {
+ (do_subproc(zlogp, cmdbuf, NULL, debug) != Z_OK)) {
zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
goto bad;
}
@@ -949,6 +1185,12 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
goto bad;
}
+ if (app_svc_dep && zone_setattr(zoneid, ZONE_ATTR_APP_SVC_CT,
+ (void *)B_TRUE, sizeof (boolean_t)) == -1) {
+ zerror(zlogp, B_TRUE, "could not set zone app-die");
+ goto bad;
+ }
+
/*
* Inform zonestatd of a new zone so that it can install a door for
* the zone to contact it.
@@ -960,9 +1202,15 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
goto bad;
}
- if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0)
+ if (brand_poststatechg(zlogp, zstate, Z_BOOT, debug) != 0)
goto bad;
+ /* Startup a thread to perform zfd logging/tty svc for the zone. */
+ create_log_thread(zlogp, zone_id);
+
+ /* Startup a thread to perform memory capping for the zone. */
+ create_mcap_thread(zlogp, zone_id);
+
return (0);
bad:
@@ -970,32 +1218,42 @@ bad:
* If something goes wrong, we up the zones's state to the target
* state, RUNNING, and then invoke the hook as if we're halting.
*/
- (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT);
- if (links_loaded)
- (void) dladm_zone_halt(dld_handle, zoneid);
+ (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT, debug);
+
return (-1);
}
static int
-zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate)
+zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate,
+ boolean_t debug)
{
int err;
- if (brand_prestatechg(zlogp, zstate, Z_HALT) != 0)
+ if (brand_prestatechg(zlogp, zstate, Z_HALT, debug) != 0)
return (-1);
- if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) {
+ /* Shutting down, stop the memcap thread */
+ destroy_mcap_thread();
+
+ if (vplat_teardown(zlogp, unmount_cmd, rebooting, debug) != 0) {
if (!bringup_failure_recovery)
zerror(zlogp, B_FALSE, "unable to destroy zone");
+ destroy_log_thread();
return (-1);
}
+ /* Shut down is done, stop the log thread */
+ destroy_log_thread();
+
+ if (brand_poststatechg(zlogp, zstate, Z_HALT, debug) != 0)
+ return (-1);
+
if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
zerror(zlogp, B_FALSE, "destroying snapshot: %s",
zonecfg_strerror(err));
- if (brand_poststatechg(zlogp, zstate, Z_HALT) != 0)
- return (-1);
+ zonecfg_fini_handle(snap_hndl);
+ snap_hndl = NULL;
return (0);
}
@@ -1007,7 +1265,6 @@ zone_graceful_shutdown(zlog_t *zlogp)
pid_t child;
char cmdbuf[MAXPATHLEN];
brand_handle_t bh = NULL;
- char zpath[MAXPATHLEN];
ctid_t ct;
int tmpl_fd;
int child_status;
@@ -1028,18 +1285,12 @@ zone_graceful_shutdown(zlog_t *zlogp)
return (-1);
}
- if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
- zerror(zlogp, B_FALSE, "unable to determine zone path");
- brand_close(bh);
- return (-1);
- }
-
/*
* If there is a brand 'shutdown' callback, execute it now to give the
* brand a chance to cleanup any custom configuration.
*/
(void) strcpy(cmdbuf, EXEC_PREFIX);
- if (brand_get_shutdown(bh, zone_name, zpath, cmdbuf + EXEC_LEN,
+ if (brand_get_shutdown(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
sizeof (cmdbuf) - EXEC_LEN) != 0 || strlen(cmdbuf) <= EXEC_LEN) {
(void) strcat(cmdbuf, SHUTDOWN_DEFAULT);
}
@@ -1177,6 +1428,36 @@ audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val,
}
/*
+ * Log the exit time and status of the zone's init process into
+ * {zonepath}/lastexited. If the zone shutdown normally, the exit status will
+ * be -1, otherwise it will be the exit status as described in wait.3c.
+ * If the zone is configured to restart init, then nothing will be logged if
+ * init exits unexpectedly (the kernel will never upcall in this case).
+ */
+static void
+log_init_exit(int status)
+{
+ char p[MAXPATHLEN];
+ char buf[128];
+ struct timeval t;
+ int fd;
+
+ if (snprintf(p, sizeof (p), "%s/lastexited", zonepath) > sizeof (p))
+ return;
+ if (gettimeofday(&t, NULL) != 0)
+ return;
+ if (snprintf(buf, sizeof (buf), "%ld.%ld %d\n", t.tv_sec, t.tv_usec,
+ status) > sizeof (buf))
+ return;
+ if ((fd = open(p, O_WRONLY | O_CREAT | O_TRUNC, 0644)) < 0)
+ return;
+
+ (void) write(fd, buf, strlen(buf));
+
+ (void) close(fd);
+}
+
+/*
* The main routine for the door server that deals with zone state transitions.
*/
/* ARGSUSED */
@@ -1189,9 +1470,11 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
zone_state_t zstate;
zone_cmd_t cmd;
+ boolean_t debug;
+ int init_status;
zone_cmd_arg_t *zargp;
- boolean_t kernelcall;
+ boolean_t kernelcall = B_TRUE;
int rval = -1;
uint64_t uniqid;
@@ -1241,6 +1524,8 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
goto out;
}
cmd = zargp->cmd;
+ debug = zargp->debug;
+ init_status = zargp->status;
if (door_ucred(&uc) != 0) {
zerror(&logsys, B_TRUE, "door_ucred");
@@ -1347,23 +1632,25 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
case ZONE_STATE_INSTALLED:
switch (cmd) {
case Z_READY:
- rval = zone_ready(zlogp, Z_MNT_BOOT, zstate);
+ rval = zone_ready(zlogp, Z_MNT_BOOT, zstate, debug);
if (rval == 0)
eventstream_write(Z_EVT_ZONE_READIED);
+ zcons_statechanged();
break;
case Z_BOOT:
case Z_FORCEBOOT:
eventstream_write(Z_EVT_ZONE_BOOTING);
- if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
- == 0) {
+ if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate,
+ debug)) == 0) {
rval = zone_bootup(zlogp, zargp->bootbuf,
- zstate);
+ zstate, debug);
}
audit_put_record(zlogp, uc, rval, "boot");
+ zcons_statechanged();
if (rval != 0) {
bringup_failure_recovery = B_TRUE;
(void) zone_halt(zlogp, B_FALSE, B_FALSE,
- zstate);
+ zstate, debug);
eventstream_write(Z_EVT_ZONE_BOOTFAILED);
}
break;
@@ -1415,7 +1702,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
rval = zone_ready(zlogp,
strcmp(zargp->bootbuf, "-U") == 0 ?
- Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate);
+ Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate, debug);
if (rval != 0)
break;
@@ -1477,15 +1764,18 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
rval = 0;
break;
case Z_BOOT:
+ case Z_FORCEBOOT:
(void) strlcpy(boot_args, zargp->bootbuf,
sizeof (boot_args));
eventstream_write(Z_EVT_ZONE_BOOTING);
- rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
+ rval = zone_bootup(zlogp, zargp->bootbuf, zstate,
+ debug);
audit_put_record(zlogp, uc, rval, "boot");
+ zcons_statechanged();
if (rval != 0) {
bringup_failure_recovery = B_TRUE;
(void) zone_halt(zlogp, B_FALSE, B_TRUE,
- zstate);
+ zstate, debug);
eventstream_write(Z_EVT_ZONE_BOOTFAILED);
}
boot_args[0] = '\0';
@@ -1493,15 +1783,17 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
case Z_HALT:
if (kernelcall) /* Invalid; can't happen */
abort();
- if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
- != 0)
+ if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate,
+ debug)) != 0)
break;
+ zcons_statechanged();
eventstream_write(Z_EVT_ZONE_HALTED);
break;
case Z_SHUTDOWN:
case Z_REBOOT:
case Z_NOTE_UNINSTALLING:
case Z_MOUNT:
+ case Z_FORCEMOUNT:
case Z_UNMOUNT:
if (kernelcall) /* Invalid; can't happen */
abort();
@@ -1518,7 +1810,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
case Z_UNMOUNT:
if (kernelcall) /* Invalid; can't happen */
abort();
- rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate);
+ rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate, debug);
if (rval == 0) {
eventstream_write(Z_EVT_ZONE_HALTED);
(void) sema_post(&scratch_sem);
@@ -1540,15 +1832,18 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
case ZONE_STATE_DOWN:
switch (cmd) {
case Z_READY:
- if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
- != 0)
+ if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate,
+ debug)) != 0)
break;
- if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) == 0)
+ zcons_statechanged();
+ if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate,
+ debug)) == 0)
eventstream_write(Z_EVT_ZONE_READIED);
else
eventstream_write(Z_EVT_ZONE_HALTED);
break;
case Z_BOOT:
+ case Z_FORCEBOOT:
/*
* We could have two clients racing to boot this
* zone; the second client loses, but his request
@@ -1559,32 +1854,40 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
rval = 0;
break;
case Z_HALT:
- if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
- != 0)
+ if (kernelcall) {
+ log_init_exit(init_status);
+ } else {
+ log_init_exit(-1);
+ }
+ if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate,
+ debug)) != 0)
break;
eventstream_write(Z_EVT_ZONE_HALTED);
+ zcons_statechanged();
break;
case Z_REBOOT:
(void) strlcpy(boot_args, zargp->bootbuf,
sizeof (boot_args));
eventstream_write(Z_EVT_ZONE_REBOOTING);
- if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
- != 0) {
+ if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate,
+ debug)) != 0) {
eventstream_write(Z_EVT_ZONE_BOOTFAILED);
boot_args[0] = '\0';
break;
}
- if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
- != 0) {
+ zcons_statechanged();
+ if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate,
+ debug)) != 0) {
eventstream_write(Z_EVT_ZONE_BOOTFAILED);
boot_args[0] = '\0';
break;
}
- rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
+ rval = zone_bootup(zlogp, zargp->bootbuf, zstate,
+ debug);
audit_put_record(zlogp, uc, rval, "reboot");
if (rval != 0) {
(void) zone_halt(zlogp, B_FALSE, B_TRUE,
- zstate);
+ zstate, debug);
eventstream_write(Z_EVT_ZONE_BOOTFAILED);
}
boot_args[0] = '\0';
@@ -1596,6 +1899,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
break;
case Z_NOTE_UNINSTALLING:
case Z_MOUNT:
+ case Z_FORCEMOUNT:
case Z_UNMOUNT:
zerror(zlogp, B_FALSE, "%s operation is invalid "
"for zones in state '%s'", z_cmd_name(cmd),
@@ -1759,11 +2063,38 @@ top:
* state.
*/
if (zstate > ZONE_STATE_INSTALLED) {
+ static zoneid_t zid;
+
zerror(zlogp, B_FALSE,
"zone '%s': WARNING: zone is in state '%s', but "
"zoneadmd does not appear to be available; "
"restarted zoneadmd to recover.",
zone_name, zone_state_str(zstate));
+
+ /*
+ * Startup a thread to perform the zfd logging/tty svc
+ * and a thread to perform memory capping for the
+ * zone. zlogp won't be valid for much longer so use
+ * logsys.
+ */
+ if ((zid = getzoneidbyname(zone_name)) != -1) {
+ create_log_thread(&logsys, zid);
+ create_mcap_thread(&logsys, zid);
+ }
+
+ /* recover the global configuration snapshot */
+ if (snap_hndl == NULL) {
+ if ((snap_hndl = zonecfg_init_handle())
+ == NULL ||
+ zonecfg_create_snapshot(zone_name)
+ != Z_OK ||
+ zonecfg_get_snapshot_handle(zone_name,
+ snap_hndl) != Z_OK) {
+ zerror(zlogp, B_FALSE, "recovering "
+ "zone configuration handle");
+ goto out;
+ }
+ }
}
(void) fdetach(zone_door_path);
@@ -1777,21 +2108,62 @@ out:
}
/*
- * Setup the brand's pre and post state change callbacks, as well as the
- * query callback, if any of these exist.
+ * Run the query hook with the 'env' parameter. It should return a
+ * string of tab-delimited key-value pairs, each of which should be set
+ * in the environment.
+ *
+ * Because the env_vars string values become part of the environment, the
+ * string is static and we don't free it.
+ *
+ * This function is always called before zoneadmd forks and makes itself
+ * exclusive, so it is possible there could more than one instance of zoneadmd
+ * running in parallel at this point. Thus, we have no zonecfg snapshot and
+ * shouldn't take one yet (i.e. snap_hndl is NULL). Thats ok, since we don't
+ * need any zonecfg info to query for a brand-specific env value.
*/
static int
-brand_callback_init(brand_handle_t bh, char *zone_name)
+set_brand_env(zlog_t *zlogp)
{
- char zpath[MAXPATHLEN];
+ int ret = 0;
+ static char *env_vars = NULL;
+ char buf[2 * MAXPATHLEN];
+
+ if (query_hook[0] == '\0' || env_vars != NULL)
+ return (0);
- if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK)
+ if (snprintf(buf, sizeof (buf), "%s env", query_hook) > sizeof (buf))
return (-1);
+ if (do_subproc(zlogp, buf, &env_vars, B_FALSE) != 0)
+ return (-1);
+
+ if (env_vars != NULL) {
+ char *sp;
+
+ sp = strtok(env_vars, "\t");
+ while (sp != NULL) {
+ if (putenv(sp) != 0) {
+ ret = -1;
+ break;
+ }
+ sp = strtok(NULL, "\t");
+ }
+ }
+
+ return (ret);
+}
+
+/*
+ * Setup the brand's pre and post state change callbacks, as well as the
+ * query callback, if any of these exist.
+ */
+static int
+brand_callback_init(brand_handle_t bh, char *zone_name)
+{
(void) strlcpy(pre_statechg_hook, EXEC_PREFIX,
sizeof (pre_statechg_hook));
- if (brand_get_prestatechange(bh, zone_name, zpath,
+ if (brand_get_prestatechange(bh, zone_name, zonepath,
pre_statechg_hook + EXEC_LEN,
sizeof (pre_statechg_hook) - EXEC_LEN) != 0)
return (-1);
@@ -1802,7 +2174,7 @@ brand_callback_init(brand_handle_t bh, char *zone_name)
(void) strlcpy(post_statechg_hook, EXEC_PREFIX,
sizeof (post_statechg_hook));
- if (brand_get_poststatechange(bh, zone_name, zpath,
+ if (brand_get_poststatechange(bh, zone_name, zonepath,
post_statechg_hook + EXEC_LEN,
sizeof (post_statechg_hook) - EXEC_LEN) != 0)
return (-1);
@@ -1813,7 +2185,7 @@ brand_callback_init(brand_handle_t bh, char *zone_name)
(void) strlcpy(query_hook, EXEC_PREFIX,
sizeof (query_hook));
- if (brand_get_query(bh, zone_name, zpath, query_hook + EXEC_LEN,
+ if (brand_get_query(bh, zone_name, zonepath, query_hook + EXEC_LEN,
sizeof (query_hook) - EXEC_LEN) != 0)
return (-1);
@@ -1941,6 +2313,11 @@ main(int argc, char *argv[])
return (1);
}
+ if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
+ zerror(zlogp, B_FALSE, "unable to determine zone path");
+ return (-1);
+ }
+
if (zonecfg_default_brand(default_brand,
sizeof (default_brand)) != Z_OK) {
zerror(zlogp, B_FALSE, "unable to determine default brand");
@@ -2012,6 +2389,11 @@ main(int argc, char *argv[])
}
priv_freeset(privset);
+ if (set_brand_env(zlogp) != 0) {
+ zerror(zlogp, B_FALSE, "Unable to setup brand's environment");
+ return (1);
+ }
+
if (mkzonedir(zlogp) != 0)
return (1);
diff --git a/usr/src/cmd/zoneadmd/zoneadmd.h b/usr/src/cmd/zoneadmd/zoneadmd.h
index d784a303b3..7e5dcea432 100644
--- a/usr/src/cmd/zoneadmd/zoneadmd.h
+++ b/usr/src/cmd/zoneadmd/zoneadmd.h
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2014, Joyent, Inc. All rights reserved.
*/
#ifndef _ZONEADMD_H
@@ -90,17 +91,19 @@ extern mutex_t msglock;
extern boolean_t in_death_throes;
extern boolean_t bringup_failure_recovery;
extern char *zone_name;
+extern char zonepath[MAXNAMELEN];
+extern zone_dochandle_t snap_hndl;
extern char pool_name[MAXNAMELEN];
extern char brand_name[MAXNAMELEN];
extern char default_brand[MAXNAMELEN];
extern char boot_args[BOOTARGS_MAX];
-extern char bad_boot_arg[BOOTARGS_MAX];
extern boolean_t zone_isnative;
extern boolean_t zone_iscluster;
extern dladm_handle_t dld_handle;
extern void zerror(zlog_t *, boolean_t, const char *, ...);
extern char *localize_msg(char *locale, const char *msg);
+extern void nwifent_free_attrs(struct zone_nwiftab *);
/*
* Eventstream interfaces.
@@ -112,8 +115,7 @@ typedef enum {
Z_EVT_ZONE_HALTED,
Z_EVT_ZONE_READIED,
Z_EVT_ZONE_UNINSTALLING,
- Z_EVT_ZONE_BOOTFAILED,
- Z_EVT_ZONE_BADARGS
+ Z_EVT_ZONE_BOOTFAILED
} zone_evt_t;
extern int eventstream_init();
@@ -135,9 +137,9 @@ typedef enum {
/*
* Virtual platform interfaces.
*/
-extern zoneid_t vplat_create(zlog_t *, zone_mnt_t);
+extern zoneid_t vplat_create(zlog_t *, zone_mnt_t, zoneid_t);
extern int vplat_bringup(zlog_t *, zone_mnt_t, zoneid_t);
-extern int vplat_teardown(zlog_t *, boolean_t, boolean_t);
+extern int vplat_teardown(zlog_t *, boolean_t, boolean_t, boolean_t);
extern int vplat_get_iptype(zlog_t *, zone_iptype_t *);
/*
@@ -154,6 +156,19 @@ extern void resolve_lofs(zlog_t *zlogp, char *path, size_t pathlen);
*/
extern int init_console(zlog_t *);
extern void serve_console(zlog_t *);
+extern void zcons_statechanged();
+
+/*
+ * Memory capping thread creation.
+ */
+extern void create_mcap_thread(zlog_t *, zoneid_t);
+extern void destroy_mcap_thread();
+
+/*
+ * Zone FD log thread creation.
+ */
+extern void create_log_thread(zlog_t *, zoneid_t);
+extern void destroy_log_thread();
/*
* Contract handling.
@@ -163,7 +178,7 @@ extern int init_template(void);
/*
* Routine to manage child processes.
*/
-extern int do_subproc(zlog_t *, char *, char **);
+extern int do_subproc(zlog_t *, char *, char **, boolean_t);
#ifdef __cplusplus
}
diff --git a/usr/src/cmd/zonecfg/Makefile b/usr/src/cmd/zonecfg/Makefile
index ae8f5c11d1..e364a59679 100644
--- a/usr/src/cmd/zonecfg/Makefile
+++ b/usr/src/cmd/zonecfg/Makefile
@@ -27,6 +27,7 @@ PROG= zonecfg
OBJS= zonecfg.o zonecfg_lex.o zonecfg_grammar.tab.o
include ../Makefile.cmd
+include ../Makefile.ctf
# zonecfg has a name clash with main() and libl.so.1. However, zonecfg must
# still export a number of "yy*" (libl) interfaces. Reduce all other symbols
@@ -36,7 +37,8 @@ MAPOPTS = $(MAPFILES:%=-M%)
LFLAGS = -t
YFLAGS = -d -b zonecfg_grammar
-LDLIBS += -lzonecfg -ll -lnsl -ltecla -lzfs -lbrand -ldladm -linetutil
+LDLIBS += -lzonecfg -ll -lnsl -ltecla -lzfs -lbrand -ldladm -linetutil -luuid
+CFLAGS += -DYYLMAX=2048
CPPFLAGS += -I.
LDFLAGS += $(MAPOPTS)
CLEANFILES += zonecfg_lex.c zonecfg_grammar.tab.c zonecfg_grammar.tab.h
diff --git a/usr/src/cmd/zonecfg/zonecfg.c b/usr/src/cmd/zonecfg/zonecfg.c
index 3dbec383bf..8c318c547e 100644
--- a/usr/src/cmd/zonecfg/zonecfg.c
+++ b/usr/src/cmd/zonecfg/zonecfg.c
@@ -23,6 +23,7 @@
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
* Copyright 2014 Gary Mills
+ * Copyright 2016, Joyent Inc.
*/
/*
@@ -79,6 +80,7 @@
#include <libinetutil.h>
#include <pwd.h>
#include <inet/ip.h>
+#include <uuid/uuid.h>
#include <libzonecfg.h>
#include "zonecfg.h"
@@ -126,7 +128,7 @@ extern int lex_lineno;
#define SHELP_REMOVE "remove [-F] <resource-type> " \
"[ <property-name>=<property-value> ]*\n" \
"\t(global scope)\n" \
- "remove <property-name> <property-value>\n" \
+ "remove [-F] <property-name> <property-value>\n" \
"\t(resource scope)"
#define SHELP_REVERT "revert [-F]"
#define SHELP_SELECT "select <resource-type> { <property-name>=" \
@@ -187,6 +189,8 @@ char *res_types[] = {
"admin",
"fs-allowed",
ALIAS_MAXPROCS,
+ ALIAS_ZFSPRI,
+ "uuid",
NULL
};
@@ -234,6 +238,12 @@ char *prop_types[] = {
"fs-allowed",
ALIAS_MAXPROCS,
"allowed-address",
+ ALIAS_ZFSPRI,
+ "mac-addr",
+ "vlan-id",
+ "global-nic",
+ "property",
+ "uuid",
NULL
};
@@ -299,6 +309,7 @@ static const char *clear_cmds[] = {
"clear " ALIAS_MAXSEMIDS,
"clear " ALIAS_SHARES,
"clear " ALIAS_MAXPROCS,
+ "clear " ALIAS_ZFSPRI,
NULL
};
@@ -349,6 +360,8 @@ static const char *set_cmds[] = {
"set hostid=",
"set fs-allowed=",
"set " ALIAS_MAXPROCS "=",
+ "set " ALIAS_ZFSPRI "=",
+ "set uuid=",
NULL
};
@@ -381,6 +394,7 @@ static const char *info_cmds[] = {
"info admin",
"info fs-allowed",
"info max-processes",
+ "info uuid",
NULL
};
@@ -406,9 +420,20 @@ static const char *net_res_scope_cmds[] = {
"exit",
"help",
"info",
+ "add property ",
+ "clear allowed-address",
+ "clear defrouter",
+ "clear global-nic",
+ "clear mac-addr",
+ "clear vlan-id",
+ "remove property ",
"set address=",
- "set physical=",
+ "set allowed-address=",
"set defrouter=",
+ "set global-nic=",
+ "set mac-addr=",
+ "set physical=",
+ "set vlan-id=",
NULL
};
@@ -418,6 +443,7 @@ static const char *device_res_scope_cmds[] = {
"exit",
"help",
"info",
+ "add property ",
"set match=",
NULL
};
@@ -525,12 +551,14 @@ static zone_dochandle_t handle;
/* used all over the place */
static char zone[ZONENAME_MAX];
static char revert_zone[ZONENAME_MAX];
+static char new_uuid[UUID_PRINTABLE_STRING_LENGTH];
/* global brand operations */
static brand_handle_t brand;
/* set in modifying functions, checked in read_input() */
static boolean_t need_to_commit = B_FALSE;
+static boolean_t is_create = B_FALSE;
boolean_t saw_error;
/* set in yacc parser, checked in read_input() */
@@ -579,7 +607,6 @@ static struct zone_rctltab old_rctltab, in_progress_rctltab;
static struct zone_attrtab old_attrtab, in_progress_attrtab;
static struct zone_dstab old_dstab, in_progress_dstab;
static struct zone_psettab old_psettab, in_progress_psettab;
-static struct zone_mcaptab old_mcaptab, in_progress_mcaptab;
static struct zone_admintab old_admintab, in_progress_admintab;
static GetLine *gl; /* The gl_get_line() resource object */
@@ -973,7 +1000,8 @@ path_find(const char *name)
}
static FILE *
-pager_open(void) {
+pager_open(void)
+{
FILE *newfp;
char *pager, *space;
@@ -999,7 +1027,8 @@ pager_open(void) {
}
static void
-pager_close(FILE *fp) {
+pager_close(FILE *fp)
+{
int status;
status = pclose(fp);
@@ -1078,11 +1107,20 @@ usage(boolean_t verbose, uint_t flags)
(void) fprintf(fp, gettext("Valid commands:\n"));
(void) fprintf(fp, "\t%s %s=%s\n", cmd_to_str(CMD_SET),
pt_to_str(PT_ADDRESS), gettext("<IP-address>"));
+ (void) fprintf(fp, "\t%s %s (%s=<value>,%s=<value>)\n",
+ cmd_to_str(CMD_ADD), pt_to_str(PT_NPROP),
+ pt_to_str(PT_NAME), pt_to_str(PT_VALUE));
(void) fprintf(fp, "\t%s %s=%s\n", cmd_to_str(CMD_SET),
pt_to_str(PT_ALLOWED_ADDRESS),
gettext("<IP-address>"));
(void) fprintf(fp, "\t%s %s=%s\n", cmd_to_str(CMD_SET),
pt_to_str(PT_PHYSICAL), gettext("<interface>"));
+ (void) fprintf(fp, "\t%s %s=%s\n", cmd_to_str(CMD_SET),
+ pt_to_str(PT_MAC), gettext("<mac-address>"));
+ (void) fprintf(fp, "\t%s %s=%s\n", cmd_to_str(CMD_SET),
+ pt_to_str(PT_GNIC), gettext("<global zone NIC>"));
+ (void) fprintf(fp, "\t%s %s=%s\n", cmd_to_str(CMD_SET),
+ pt_to_str(PT_VLANID), gettext("<vlan ID>"));
(void) fprintf(fp, gettext("See ifconfig(1M) for "
"details of the <interface> string.\n"));
(void) fprintf(fp, gettext("%s %s is valid "
@@ -1090,10 +1128,12 @@ usage(boolean_t verbose, uint_t flags)
"must not be set.\n"),
cmd_to_str(CMD_SET), pt_to_str(PT_ADDRESS),
pt_to_str(PT_IPTYPE), gettext("shared"));
- (void) fprintf(fp, gettext("%s %s is valid "
- "if the %s property is set to %s, otherwise it "
- "must not be set.\n"),
- cmd_to_str(CMD_SET), pt_to_str(PT_ALLOWED_ADDRESS),
+ (void) fprintf(fp, gettext("%s (%s, %s, %s, %s) are "
+ "valid if the %s property is set to %s, otherwise "
+ "they must not be set.\n"),
+ cmd_to_str(CMD_SET),
+ pt_to_str(PT_ALLOWED_ADDRESS), pt_to_str(PT_MAC),
+ pt_to_str(PT_VLANID), pt_to_str(PT_GNIC),
pt_to_str(PT_IPTYPE), gettext("exclusive"));
(void) fprintf(fp, gettext("\t%s %s=%s\n%s %s "
"is valid if the %s or %s property is set, "
@@ -1109,6 +1149,9 @@ usage(boolean_t verbose, uint_t flags)
"used to configure a device node.\n"),
rt_to_str(resource_scope));
(void) fprintf(fp, gettext("Valid commands:\n"));
+ (void) fprintf(fp, "\t%s %s (%s=<value>,%s=<value>)\n",
+ cmd_to_str(CMD_ADD), pt_to_str(PT_NPROP),
+ pt_to_str(PT_NAME), pt_to_str(PT_VALUE));
(void) fprintf(fp, "\t%s %s=%s\n", cmd_to_str(CMD_SET),
pt_to_str(PT_MATCH), gettext("<device-path>"));
break;
@@ -1240,10 +1283,12 @@ usage(boolean_t verbose, uint_t flags)
if (flags & HELP_USAGE) {
(void) fprintf(fp, "%s:\t%s %s\n", gettext("usage"),
execname, cmd_to_str(CMD_HELP));
- (void) fprintf(fp, "\t%s -z <zone>\t\t\t(%s)\n",
+ (void) fprintf(fp, "\t%s {-z <zone>|-u <uuid>}\t\t\t(%s)\n",
execname, gettext("interactive"));
- (void) fprintf(fp, "\t%s -z <zone> <command>\n", execname);
- (void) fprintf(fp, "\t%s -z <zone> -f <command-file>\n",
+ (void) fprintf(fp, "\t%s {-z <zone>|-u <uuid>} <command>\n",
+ execname);
+ (void) fprintf(fp,
+ "\t%s {-z <zone>|-u <uuid>} -f <command-file>\n",
execname);
}
if (flags & HELP_SUBCMDS) {
@@ -1332,15 +1377,22 @@ usage(boolean_t verbose, uint_t flags)
pt_to_str(PT_MAXSEMIDS));
(void) fprintf(fp, "\t%s\t%s\n", gettext("(global)"),
pt_to_str(PT_SHARES));
+ (void) fprintf(fp, "\t%s\t%s\n", gettext("(global)"),
+ pt_to_str(PT_UUID));
+ (void) fprintf(fp, "\t%s\t%s\n", gettext("(global)"),
+ pt_to_str(PT_ZFSPRI));
(void) fprintf(fp, "\t%s\t\t%s, %s, %s, %s, %s\n",
rt_to_str(RT_FS), pt_to_str(PT_DIR),
pt_to_str(PT_SPECIAL), pt_to_str(PT_RAW),
pt_to_str(PT_TYPE), pt_to_str(PT_OPTIONS));
- (void) fprintf(fp, "\t%s\t\t%s, %s, %s|%s\n", rt_to_str(RT_NET),
+ (void) fprintf(fp, "\t%s\t\t%s, %s, %s, %s, %s, %s, %s %s\n",
+ rt_to_str(RT_NET),
pt_to_str(PT_ADDRESS), pt_to_str(PT_ALLOWED_ADDRESS),
- pt_to_str(PT_PHYSICAL), pt_to_str(PT_DEFROUTER));
- (void) fprintf(fp, "\t%s\t\t%s\n", rt_to_str(RT_DEVICE),
- pt_to_str(PT_MATCH));
+ pt_to_str(PT_GNIC), pt_to_str(PT_MAC),
+ pt_to_str(PT_PHYSICAL), pt_to_str(PT_NPROP),
+ pt_to_str(PT_VLANID), pt_to_str(PT_DEFROUTER));
+ (void) fprintf(fp, "\t%s\t\t%s, %s\n", rt_to_str(RT_DEVICE),
+ pt_to_str(PT_MATCH), pt_to_str(PT_NPROP));
(void) fprintf(fp, "\t%s\t\t%s, %s\n", rt_to_str(RT_RCTL),
pt_to_str(PT_NAME), pt_to_str(PT_VALUE));
(void) fprintf(fp, "\t%s\t\t%s, %s, %s\n", rt_to_str(RT_ATTR),
@@ -1395,6 +1447,9 @@ initialize(boolean_t handle_expected)
if (zonecfg_check_handle(handle) != Z_OK) {
if ((err = zonecfg_get_handle(zone, handle)) == Z_OK) {
got_handle = B_TRUE;
+
+ (void) zonecfg_fix_obsolete(handle);
+
if (zonecfg_get_brand(handle, brandname,
sizeof (brandname)) != Z_OK) {
zerr("Zone %s is inconsistent: missing "
@@ -1662,6 +1717,7 @@ create_func(cmd_t *cmd)
boolean_t force = B_FALSE;
boolean_t attach = B_FALSE;
boolean_t arg_err = B_FALSE;
+ uuid_t uuid;
assert(cmd != NULL);
@@ -1669,7 +1725,7 @@ create_func(cmd_t *cmd)
(void) strlcpy(zone_template, "SUNWdefault", sizeof (zone_template));
optind = 0;
- while ((arg = getopt(cmd->cmd_argc, cmd->cmd_argv, "?a:bFt:"))
+ while ((arg = getopt(cmd->cmd_argc, cmd->cmd_argv, "?a:bFt:X"))
!= EOF) {
switch (arg) {
case '?':
@@ -1695,6 +1751,17 @@ create_func(cmd_t *cmd)
(void) strlcpy(zone_template, optarg,
sizeof (zone_template));
break;
+ case 'X':
+ (void) snprintf(zone_template, sizeof (zone_template),
+ "%s/%s.xml", ZONE_CONFIG_ROOT, zone);
+ err = zonecfg_get_xml_handle(zone_template, handle);
+ if (err != Z_OK) {
+ zone_perror(execname, err, B_TRUE);
+ exit(Z_ERR);
+ }
+ got_handle = B_TRUE;
+ need_to_commit = B_TRUE;
+ return;
default:
short_usage(CMD_CREATE);
arg_err = B_TRUE;
@@ -1748,9 +1815,14 @@ create_func(cmd_t *cmd)
}
need_to_commit = B_TRUE;
+ is_create = B_TRUE;
zonecfg_fini_handle(handle);
handle = tmphandle;
got_handle = B_TRUE;
+
+ /* Allocate a new uuid for this new zone */
+ uuid_generate(uuid);
+ uuid_unparse(uuid, new_uuid);
}
/*
@@ -1797,8 +1869,8 @@ export_func(cmd_t *cmd)
struct zone_rctltab rctltab;
struct zone_dstab dstab;
struct zone_psettab psettab;
- struct zone_mcaptab mcaptab;
struct zone_rctlvaltab *valptr;
+ struct zone_res_attrtab *rap;
struct zone_admintab admintab;
int err, arg;
char zonepath[MAXPATHLEN], outfile[MAXPATHLEN], pool[MAXNAMELEN];
@@ -1811,6 +1883,7 @@ export_func(cmd_t *cmd)
FILE *of;
boolean_t autoboot;
zone_iptype_t iptype;
+ uuid_t uuid;
boolean_t need_to_close = B_FALSE;
boolean_t arg_err = B_FALSE;
@@ -1921,6 +1994,14 @@ export_func(cmd_t *cmd)
pt_to_str(PT_FS_ALLOWED), fsallowedp);
}
+ if (zonecfg_get_uuid(zone, uuid) == Z_OK && !uuid_is_null(uuid)) {
+ char suuid[UUID_PRINTABLE_STRING_LENGTH];
+
+ uuid_unparse(uuid, suuid);
+ (void) fprintf(of, "%s %s=%s\n", cmd_to_str(CMD_SET),
+ pt_to_str(PT_UUID), suuid);
+ }
+
if ((err = zonecfg_setfsent(handle)) != Z_OK) {
zone_perror(zone, err, B_FALSE);
goto done;
@@ -1968,7 +2049,17 @@ export_func(cmd_t *cmd)
export_prop(of, PT_ALLOWED_ADDRESS,
nwiftab.zone_nwif_allowed_address);
export_prop(of, PT_PHYSICAL, nwiftab.zone_nwif_physical);
+ export_prop(of, PT_MAC, nwiftab.zone_nwif_mac);
+ export_prop(of, PT_VLANID, nwiftab.zone_nwif_vlan_id);
+ export_prop(of, PT_GNIC, nwiftab.zone_nwif_gnic);
export_prop(of, PT_DEFROUTER, nwiftab.zone_nwif_defrouter);
+ for (rap = nwiftab.zone_nwif_attrp; rap != NULL;
+ rap = rap->zone_res_attr_next) {
+ fprintf(of, "%s %s (%s=%s,%s=\"%s\")\n",
+ cmd_to_str(CMD_ADD), pt_to_str(PT_NPROP),
+ pt_to_str(PT_NAME), rap->zone_res_attr_name,
+ pt_to_str(PT_VALUE), rap->zone_res_attr_value);
+ }
(void) fprintf(of, "%s\n", cmd_to_str(CMD_END));
}
(void) zonecfg_endnwifent(handle);
@@ -1981,21 +2072,17 @@ export_func(cmd_t *cmd)
(void) fprintf(of, "%s %s\n", cmd_to_str(CMD_ADD),
rt_to_str(RT_DEVICE));
export_prop(of, PT_MATCH, devtab.zone_dev_match);
+ for (rap = devtab.zone_dev_attrp; rap != NULL;
+ rap = rap->zone_res_attr_next) {
+ fprintf(of, "%s %s (%s=%s,%s=\"%s\")\n",
+ cmd_to_str(CMD_ADD), pt_to_str(PT_NPROP),
+ pt_to_str(PT_NAME), rap->zone_res_attr_name,
+ pt_to_str(PT_VALUE), rap->zone_res_attr_value);
+ }
(void) fprintf(of, "%s\n", cmd_to_str(CMD_END));
}
(void) zonecfg_enddevent(handle);
- if (zonecfg_getmcapent(handle, &mcaptab) == Z_OK) {
- char buf[128];
-
- (void) fprintf(of, "%s %s\n", cmd_to_str(CMD_ADD),
- rt_to_str(RT_MCAP));
- bytes_to_units(mcaptab.zone_physmem_cap, buf, sizeof (buf));
- (void) fprintf(of, "%s %s=%s\n", cmd_to_str(CMD_SET),
- pt_to_str(PT_PHYSICAL), buf);
- (void) fprintf(of, "%s\n", cmd_to_str(CMD_END));
- }
-
if ((err = zonecfg_setrctlent(handle)) != Z_OK) {
zone_perror(zone, err, B_FALSE);
goto done;
@@ -2149,7 +2236,6 @@ add_resource(cmd_t *cmd)
{
int type;
struct zone_psettab tmp_psettab;
- struct zone_mcaptab tmp_mcaptab;
uint64_t tmp;
uint64_t tmp_mcap;
char pool[MAXNAMELEN];
@@ -2241,9 +2327,10 @@ add_resource(cmd_t *cmd)
* Make sure there isn't already a mem-cap entry or max-swap
* or max-locked rctl.
*/
- if (zonecfg_lookup_mcap(handle, &tmp_mcaptab) == Z_OK ||
- zonecfg_get_aliased_rctl(handle, ALIAS_MAXSWAP, &tmp_mcap)
- == Z_OK ||
+ if (zonecfg_get_aliased_rctl(handle, ALIAS_MAXSWAP,
+ &tmp_mcap) == Z_OK ||
+ zonecfg_get_aliased_rctl(handle, ALIAS_MAXPHYSMEM,
+ &tmp_mcap) == Z_OK ||
zonecfg_get_aliased_rctl(handle, ALIAS_MAXLOCKEDMEM,
&tmp_mcap) == Z_OK) {
zerr(gettext("The %s resource or a related resource "
@@ -2256,7 +2343,6 @@ add_resource(cmd_t *cmd)
"to even the root user; "
"this could render the system impossible\n"
"to administer. Please use caution."));
- bzero(&in_progress_mcaptab, sizeof (in_progress_mcaptab));
return;
case RT_ADMIN:
bzero(&in_progress_admintab, sizeof (in_progress_admintab));
@@ -2359,6 +2445,79 @@ bad:
zonecfg_free_rctl_value_list(rctlvaltab);
}
+/*
+ * Resource attribute ("property" resource embedded on net or dev resource)
+ */
+static void
+do_res_attr(struct zone_res_attrtab **headp, complex_property_ptr_t cpp)
+{
+ complex_property_ptr_t cp;
+ struct zone_res_attrtab *np;
+ int err;
+ boolean_t seen_name = B_FALSE, seen_value = B_FALSE;
+
+ if ((np = calloc(1, sizeof (struct zone_res_attrtab))) == NULL) {
+ zone_perror(zone, Z_NOMEM, B_TRUE);
+ exit(Z_ERR);
+ }
+
+ for (cp = cpp; cp != NULL; cp = cp->cp_next) {
+ switch (cp->cp_type) {
+ case PT_NAME:
+ if (seen_name) {
+ zerr(gettext("%s already specified"),
+ pt_to_str(PT_NAME));
+ goto bad;
+ }
+ if (strlcpy(np->zone_res_attr_name, cp->cp_value,
+ sizeof (np->zone_res_attr_name)) >=
+ sizeof (np->zone_res_attr_name)) {
+ zerr(gettext("Input for %s is too long"),
+ pt_to_str(PT_NAME));
+ goto bad;
+ }
+ seen_name = B_TRUE;
+ break;
+ case PT_VALUE:
+ if (seen_value) {
+ zerr(gettext("%s already specified"),
+ pt_to_str(PT_VALUE));
+ goto bad;
+ }
+ if (strlcpy(np->zone_res_attr_value, cp->cp_value,
+ sizeof (np->zone_res_attr_value)) >=
+ sizeof (np->zone_res_attr_value)) {
+ zerr(gettext("Input for %s is too long"),
+ pt_to_str(PT_VALUE));
+ goto bad;
+ }
+
+ seen_value = B_TRUE;
+ break;
+ default:
+ zone_perror(pt_to_str(PT_NPROP), Z_NO_PROPERTY_TYPE,
+ B_TRUE);
+ long_usage(CMD_ADD, B_TRUE);
+ usage(B_FALSE, HELP_PROPS);
+ zonecfg_free_res_attr_list(np);
+ return;
+ }
+ }
+
+ if (!seen_name)
+ zerr(gettext("%s not specified"), pt_to_str(PT_NAME));
+ if (!seen_value)
+ zerr(gettext("%s not specified"), pt_to_str(PT_VALUE));
+
+ err = zonecfg_add_res_attr(headp, np);
+ if (err != Z_OK)
+ zone_perror(pt_to_str(PT_NPROP), err, B_TRUE);
+ return;
+
+bad:
+ zonecfg_free_res_attr_list(np);
+}
+
static void
add_property(cmd_t *cmd)
{
@@ -2426,6 +2585,44 @@ add_property(cmd_t *cmd)
}
}
return;
+ case RT_NET:
+ if (prop_type != PT_NPROP) {
+ zone_perror(pt_to_str(prop_type), Z_NO_PROPERTY_TYPE,
+ B_TRUE);
+ long_usage(CMD_ADD, B_TRUE);
+ usage(B_FALSE, HELP_PROPS);
+ return;
+ }
+ pp = cmd->cmd_property_ptr[0];
+ if (pp->pv_type != PROP_VAL_COMPLEX) {
+ zerr(gettext("A %s value was expected here."),
+ pvt_to_str(PROP_VAL_COMPLEX));
+ saw_error = B_TRUE;
+ return;
+ }
+
+ do_res_attr(&(in_progress_nwiftab.zone_nwif_attrp),
+ pp->pv_complex);
+ return;
+ case RT_DEVICE:
+ if (prop_type != PT_NPROP) {
+ zone_perror(pt_to_str(prop_type), Z_NO_PROPERTY_TYPE,
+ B_TRUE);
+ long_usage(CMD_ADD, B_TRUE);
+ usage(B_FALSE, HELP_PROPS);
+ return;
+ }
+ pp = cmd->cmd_property_ptr[0];
+ if (pp->pv_type != PROP_VAL_COMPLEX) {
+ zerr(gettext("A %s value was expected here."),
+ pvt_to_str(PROP_VAL_COMPLEX));
+ saw_error = B_TRUE;
+ return;
+ }
+
+ do_res_attr(&(in_progress_devtab.zone_dev_attrp),
+ pp->pv_complex);
+ return;
case RT_RCTL:
if (prop_type != PT_VALUE) {
zone_perror(pt_to_str(prop_type), Z_NO_PROPERTY_TYPE,
@@ -2470,7 +2667,7 @@ static boolean_t
gz_invalid_rt_property(int type)
{
return (global_zone && (type == RT_ZONENAME || type == RT_ZONEPATH ||
- type == RT_AUTOBOOT || type == RT_LIMITPRIV ||
+ type == RT_AUTOBOOT || type == RT_LIMITPRIV || type == RT_UUID ||
type == RT_BOOTARGS || type == RT_BRAND || type == RT_SCHED ||
type == RT_IPTYPE || type == RT_HOSTID || type == RT_FS_ALLOWED));
}
@@ -2479,7 +2676,7 @@ static boolean_t
gz_invalid_property(int type)
{
return (global_zone && (type == PT_ZONENAME || type == PT_ZONEPATH ||
- type == PT_AUTOBOOT || type == PT_LIMITPRIV ||
+ type == PT_AUTOBOOT || type == PT_LIMITPRIV || type == PT_UUID ||
type == PT_BOOTARGS || type == PT_BRAND || type == PT_SCHED ||
type == PT_IPTYPE || type == PT_HOSTID || type == PT_FS_ALLOWED));
}
@@ -2530,8 +2727,9 @@ add_func(cmd_t *cmd)
resource_scope = cmd->cmd_res_type;
end_op = CMD_ADD;
add_resource(cmd);
- } else
+ } else {
add_property(cmd);
+ }
}
/*
@@ -2696,6 +2894,32 @@ fill_in_fstab(cmd_t *cmd, struct zone_fstab *fstab, boolean_t fill_in_only)
return (zonecfg_lookup_filesystem(handle, fstab));
}
+/*
+ * Turn an addr that looks like f:2:0:44:5:6C into 0f:02:00:44:05:6c
+ * We're expecting a dst of at least MAXMACADDRLEN size here.
+ */
+static void
+normalize_mac_addr(char *dst, const char *src, int len)
+{
+ char *p, *e, *sep = "";
+ long n;
+ char buf[MAXMACADDRLEN], tmp[4];
+
+ *dst = '\0';
+ (void) strlcpy(buf, src, sizeof (buf));
+ p = strtok(buf, ":");
+ while (p != NULL) {
+ n = strtol(p, &e, 16);
+ if (*e != NULL || n > 0xff)
+ return;
+ (void) snprintf(tmp, sizeof (tmp), "%s%02x", sep, n);
+ (void) strlcat(dst, tmp, len);
+
+ sep = ":";
+ p = strtok(NULL, ":");
+ }
+}
+
static int
fill_in_nwiftab(cmd_t *cmd, struct zone_nwiftab *nwiftab,
boolean_t fill_in_only)
@@ -2729,6 +2953,21 @@ fill_in_nwiftab(cmd_t *cmd, struct zone_nwiftab *nwiftab,
pp->pv_simple,
sizeof (nwiftab->zone_nwif_physical));
break;
+ case PT_MAC:
+ normalize_mac_addr(nwiftab->zone_nwif_mac,
+ pp->pv_simple,
+ sizeof (nwiftab->zone_nwif_mac));
+ break;
+ case PT_VLANID:
+ (void) strlcpy(nwiftab->zone_nwif_vlan_id,
+ pp->pv_simple,
+ sizeof (nwiftab->zone_nwif_vlan_id));
+ break;
+ case PT_GNIC:
+ (void) strlcpy(nwiftab->zone_nwif_gnic,
+ pp->pv_simple,
+ sizeof (nwiftab->zone_nwif_gnic));
+ break;
case PT_DEFROUTER:
(void) strlcpy(nwiftab->zone_nwif_defrouter,
pp->pv_simple,
@@ -2979,6 +3218,8 @@ prompt_remove_resource(cmd_t *cmd, char *rsrc)
num = zonecfg_num_resources(handle, rsrc);
if (num == 0) {
+ if (force)
+ return (B_TRUE);
z_cmd_rt_perror(CMD_REMOVE, cmd->cmd_res_type, Z_NO_ENTRY,
B_TRUE);
return (B_FALSE);
@@ -3007,7 +3248,7 @@ prompt_remove_resource(cmd_t *cmd, char *rsrc)
}
static void
-remove_fs(cmd_t *cmd)
+remove_fs(cmd_t *cmd, boolean_t force)
{
int err;
@@ -3016,13 +3257,16 @@ remove_fs(cmd_t *cmd)
struct zone_fstab fstab;
if ((err = fill_in_fstab(cmd, &fstab, B_FALSE)) != Z_OK) {
- z_cmd_rt_perror(CMD_REMOVE, RT_FS, err, B_TRUE);
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_FS, err, B_TRUE);
return;
}
- if ((err = zonecfg_delete_filesystem(handle, &fstab)) != Z_OK)
- z_cmd_rt_perror(CMD_REMOVE, RT_FS, err, B_TRUE);
- else
+ if ((err = zonecfg_delete_filesystem(handle, &fstab)) != Z_OK) {
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_FS, err, B_TRUE);
+ } else {
need_to_commit = B_TRUE;
+ }
zonecfg_free_fs_option_list(fstab.zone_fs_options);
return;
}
@@ -3041,7 +3285,7 @@ remove_fs(cmd_t *cmd)
}
static void
-remove_net(cmd_t *cmd)
+remove_net(cmd_t *cmd, boolean_t force)
{
int err;
@@ -3050,13 +3294,18 @@ remove_net(cmd_t *cmd)
struct zone_nwiftab nwiftab;
if ((err = fill_in_nwiftab(cmd, &nwiftab, B_FALSE)) != Z_OK) {
- z_cmd_rt_perror(CMD_REMOVE, RT_NET, err, B_TRUE);
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_NET, err,
+ B_TRUE);
return;
}
- if ((err = zonecfg_delete_nwif(handle, &nwiftab)) != Z_OK)
- z_cmd_rt_perror(CMD_REMOVE, RT_NET, err, B_TRUE);
- else
+ if ((err = zonecfg_delete_nwif(handle, &nwiftab)) != Z_OK) {
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_NET, err,
+ B_TRUE);
+ } else {
need_to_commit = B_TRUE;
+ }
return;
}
@@ -3074,7 +3323,7 @@ remove_net(cmd_t *cmd)
}
static void
-remove_device(cmd_t *cmd)
+remove_device(cmd_t *cmd, boolean_t force)
{
int err;
@@ -3083,13 +3332,18 @@ remove_device(cmd_t *cmd)
struct zone_devtab devtab;
if ((err = fill_in_devtab(cmd, &devtab, B_FALSE)) != Z_OK) {
- z_cmd_rt_perror(CMD_REMOVE, RT_DEVICE, err, B_TRUE);
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_DEVICE, err,
+ B_TRUE);
return;
}
- if ((err = zonecfg_delete_dev(handle, &devtab)) != Z_OK)
- z_cmd_rt_perror(CMD_REMOVE, RT_DEVICE, err, B_TRUE);
- else
+ if ((err = zonecfg_delete_dev(handle, &devtab)) != Z_OK) {
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_DEVICE, err,
+ B_TRUE);
+ } else {
need_to_commit = B_TRUE;
+ }
return;
}
@@ -3107,7 +3361,7 @@ remove_device(cmd_t *cmd)
}
static void
-remove_attr(cmd_t *cmd)
+remove_attr(cmd_t *cmd, boolean_t force)
{
int err;
@@ -3116,13 +3370,18 @@ remove_attr(cmd_t *cmd)
struct zone_attrtab attrtab;
if ((err = fill_in_attrtab(cmd, &attrtab, B_FALSE)) != Z_OK) {
- z_cmd_rt_perror(CMD_REMOVE, RT_ATTR, err, B_TRUE);
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_ATTR, err,
+ B_TRUE);
return;
}
- if ((err = zonecfg_delete_attr(handle, &attrtab)) != Z_OK)
- z_cmd_rt_perror(CMD_REMOVE, RT_ATTR, err, B_TRUE);
- else
+ if ((err = zonecfg_delete_attr(handle, &attrtab)) != Z_OK) {
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_ATTR, err,
+ B_TRUE);
+ } else {
need_to_commit = B_TRUE;
+ }
return;
}
@@ -3140,7 +3399,7 @@ remove_attr(cmd_t *cmd)
}
static void
-remove_dataset(cmd_t *cmd)
+remove_dataset(cmd_t *cmd, boolean_t force)
{
int err;
@@ -3149,13 +3408,18 @@ remove_dataset(cmd_t *cmd)
struct zone_dstab dstab;
if ((err = fill_in_dstab(cmd, &dstab, B_FALSE)) != Z_OK) {
- z_cmd_rt_perror(CMD_REMOVE, RT_DATASET, err, B_TRUE);
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_DATASET, err,
+ B_TRUE);
return;
}
- if ((err = zonecfg_delete_ds(handle, &dstab)) != Z_OK)
- z_cmd_rt_perror(CMD_REMOVE, RT_DATASET, err, B_TRUE);
- else
+ if ((err = zonecfg_delete_ds(handle, &dstab)) != Z_OK) {
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_DATASET, err,
+ B_TRUE);
+ } else {
need_to_commit = B_TRUE;
+ }
return;
}
@@ -3173,7 +3437,7 @@ remove_dataset(cmd_t *cmd)
}
static void
-remove_rctl(cmd_t *cmd)
+remove_rctl(cmd_t *cmd, boolean_t force)
{
int err;
@@ -3182,13 +3446,18 @@ remove_rctl(cmd_t *cmd)
struct zone_rctltab rctltab;
if ((err = fill_in_rctltab(cmd, &rctltab, B_FALSE)) != Z_OK) {
- z_cmd_rt_perror(CMD_REMOVE, RT_RCTL, err, B_TRUE);
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_RCTL, err,
+ B_TRUE);
return;
}
- if ((err = zonecfg_delete_rctl(handle, &rctltab)) != Z_OK)
- z_cmd_rt_perror(CMD_REMOVE, RT_RCTL, err, B_TRUE);
- else
+ if ((err = zonecfg_delete_rctl(handle, &rctltab)) != Z_OK) {
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_RCTL, err,
+ B_TRUE);
+ } else {
need_to_commit = B_TRUE;
+ }
zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
return;
}
@@ -3207,72 +3476,90 @@ remove_rctl(cmd_t *cmd)
}
static void
-remove_pset()
+remove_pset(boolean_t force)
{
int err;
struct zone_psettab psettab;
if ((err = zonecfg_lookup_pset(handle, &psettab)) != Z_OK) {
- z_cmd_rt_perror(CMD_REMOVE, RT_DCPU, err, B_TRUE);
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_DCPU, err, B_TRUE);
return;
}
- if ((err = zonecfg_delete_pset(handle)) != Z_OK)
- z_cmd_rt_perror(CMD_REMOVE, RT_DCPU, err, B_TRUE);
- else
+ if ((err = zonecfg_delete_pset(handle)) != Z_OK) {
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_DCPU, err, B_TRUE);
+ } else {
need_to_commit = B_TRUE;
+ }
}
static void
-remove_pcap()
+remove_pcap(boolean_t force)
{
int err;
uint64_t tmp;
if (zonecfg_get_aliased_rctl(handle, ALIAS_CPUCAP, &tmp) != Z_OK) {
- zerr("%s %s: %s", cmd_to_str(CMD_REMOVE), rt_to_str(RT_PCAP),
- zonecfg_strerror(Z_NO_RESOURCE_TYPE));
- saw_error = B_TRUE;
+ if (!force) {
+ zerr("%s %s: %s", cmd_to_str(CMD_REMOVE),
+ rt_to_str(RT_PCAP),
+ zonecfg_strerror(Z_NO_RESOURCE_TYPE));
+ saw_error = B_TRUE;
+ }
return;
}
- if ((err = zonecfg_rm_aliased_rctl(handle, ALIAS_CPUCAP)) != Z_OK)
- z_cmd_rt_perror(CMD_REMOVE, RT_PCAP, err, B_TRUE);
- else
+ if ((err = zonecfg_rm_aliased_rctl(handle, ALIAS_CPUCAP)) != Z_OK) {
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_PCAP, err, B_TRUE);
+ } else {
need_to_commit = B_TRUE;
+ }
}
static void
-remove_mcap()
+remove_mcap(boolean_t force)
{
int err, res1, res2, res3;
uint64_t tmp;
- struct zone_mcaptab mcaptab;
boolean_t revert = B_FALSE;
- res1 = zonecfg_lookup_mcap(handle, &mcaptab);
+ res1 = zonecfg_get_aliased_rctl(handle, ALIAS_MAXPHYSMEM, &tmp);
res2 = zonecfg_get_aliased_rctl(handle, ALIAS_MAXSWAP, &tmp);
res3 = zonecfg_get_aliased_rctl(handle, ALIAS_MAXLOCKEDMEM, &tmp);
/* if none of these exist, there is no resource to remove */
if (res1 != Z_OK && res2 != Z_OK && res3 != Z_OK) {
- zerr("%s %s: %s", cmd_to_str(CMD_REMOVE), rt_to_str(RT_MCAP),
- zonecfg_strerror(Z_NO_RESOURCE_TYPE));
- saw_error = B_TRUE;
+ if (!force) {
+ zerr("%s %s: %s", cmd_to_str(CMD_REMOVE),
+ rt_to_str(RT_MCAP),
+ zonecfg_strerror(Z_NO_RESOURCE_TYPE));
+ saw_error = B_TRUE;
+ }
return;
}
if (res1 == Z_OK) {
- if ((err = zonecfg_delete_mcap(handle)) != Z_OK) {
- z_cmd_rt_perror(CMD_REMOVE, RT_MCAP, err, B_TRUE);
- revert = B_TRUE;
+ if ((err = zonecfg_rm_aliased_rctl(handle, ALIAS_MAXPHYSMEM))
+ != Z_OK) {
+ if (!force) {
+ z_cmd_rt_perror(CMD_REMOVE, RT_MCAP, err,
+ B_TRUE);
+ revert = B_TRUE;
+ }
} else {
need_to_commit = B_TRUE;
}
}
+
if (res2 == Z_OK) {
if ((err = zonecfg_rm_aliased_rctl(handle, ALIAS_MAXSWAP))
!= Z_OK) {
- z_cmd_rt_perror(CMD_REMOVE, RT_MCAP, err, B_TRUE);
- revert = B_TRUE;
+ if (!force) {
+ z_cmd_rt_perror(CMD_REMOVE, RT_MCAP, err,
+ B_TRUE);
+ revert = B_TRUE;
+ }
} else {
need_to_commit = B_TRUE;
}
@@ -3280,8 +3567,11 @@ remove_mcap()
if (res3 == Z_OK) {
if ((err = zonecfg_rm_aliased_rctl(handle, ALIAS_MAXLOCKEDMEM))
!= Z_OK) {
- z_cmd_rt_perror(CMD_REMOVE, RT_MCAP, err, B_TRUE);
- revert = B_TRUE;
+ if (!force) {
+ z_cmd_rt_perror(CMD_REMOVE, RT_MCAP, err,
+ B_TRUE);
+ revert = B_TRUE;
+ }
} else {
need_to_commit = B_TRUE;
}
@@ -3292,7 +3582,7 @@ remove_mcap()
}
static void
-remove_admin(cmd_t *cmd)
+remove_admin(cmd_t *cmd, boolean_t force)
{
int err;
@@ -3301,34 +3591,33 @@ remove_admin(cmd_t *cmd)
struct zone_admintab admintab;
if ((err = fill_in_admintab(cmd, &admintab, B_FALSE)) != Z_OK) {
- z_cmd_rt_perror(CMD_REMOVE, RT_ADMIN,
- err, B_TRUE);
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_ADMIN, err,
+ B_TRUE);
return;
}
if ((err = zonecfg_delete_admin(handle, &admintab,
- zone))
- != Z_OK)
- z_cmd_rt_perror(CMD_REMOVE, RT_ADMIN,
- err, B_TRUE);
- else
+ zone)) != Z_OK) {
+ if (!force)
+ z_cmd_rt_perror(CMD_REMOVE, RT_ADMIN, err,
+ B_TRUE);
+ } else {
need_to_commit = B_TRUE;
+ }
return;
- } else {
- /*
- * unqualified admin removal.
- * remove all admins but prompt if more
- * than one.
- */
- if (!prompt_remove_resource(cmd, "admin"))
- return;
-
- if ((err = zonecfg_delete_admins(handle, zone))
- != Z_OK)
- z_cmd_rt_perror(CMD_REMOVE, RT_ADMIN,
- err, B_TRUE);
- else
- need_to_commit = B_TRUE;
}
+
+ /*
+ * unqualified admin removal.
+ * remove all admins but prompt if more than one.
+ */
+ if (!prompt_remove_resource(cmd, "admin"))
+ return;
+
+ if ((err = zonecfg_delete_admins(handle, zone)) != Z_OK)
+ z_cmd_rt_perror(CMD_REMOVE, RT_ADMIN, err, B_TRUE);
+ else
+ need_to_commit = B_TRUE;
}
static void
@@ -3337,6 +3626,7 @@ remove_resource(cmd_t *cmd)
int type;
int arg;
boolean_t arg_err = B_FALSE;
+ boolean_t force = B_FALSE;
if ((type = cmd->cmd_res_type) == RT_UNKNOWN) {
long_usage(CMD_REMOVE, B_TRUE);
@@ -3351,6 +3641,7 @@ remove_resource(cmd_t *cmd)
arg_err = B_TRUE;
break;
case 'F':
+ force = B_TRUE;
break;
default:
short_usage(CMD_REMOVE);
@@ -3366,34 +3657,34 @@ remove_resource(cmd_t *cmd)
switch (type) {
case RT_FS:
- remove_fs(cmd);
+ remove_fs(cmd, force);
return;
case RT_NET:
- remove_net(cmd);
+ remove_net(cmd, force);
return;
case RT_DEVICE:
- remove_device(cmd);
+ remove_device(cmd, force);
return;
case RT_RCTL:
- remove_rctl(cmd);
+ remove_rctl(cmd, force);
return;
case RT_ATTR:
- remove_attr(cmd);
+ remove_attr(cmd, force);
return;
case RT_DATASET:
- remove_dataset(cmd);
+ remove_dataset(cmd, force);
return;
case RT_DCPU:
- remove_pset();
+ remove_pset(force);
return;
case RT_PCAP:
- remove_pcap();
+ remove_pcap(force);
return;
case RT_MCAP:
- remove_mcap();
+ remove_mcap(force);
return;
case RT_ADMIN:
- remove_admin(cmd);
+ remove_admin(cmd, force);
return;
default:
zone_perror(rt_to_str(type), Z_NO_RESOURCE_TYPE, B_TRUE);
@@ -3410,7 +3701,27 @@ remove_property(cmd_t *cmd)
int err, res_type, prop_type;
property_value_ptr_t pp;
struct zone_rctlvaltab *rctlvaltab;
+ struct zone_res_attrtab *np;
complex_property_ptr_t cx;
+ int arg;
+ boolean_t force = B_FALSE;
+ boolean_t arg_err = B_FALSE;
+
+ optind = 0;
+ while ((arg = getopt(cmd->cmd_argc, cmd->cmd_argv, "F")) != EOF) {
+ switch (arg) {
+ case 'F':
+ force = B_TRUE;
+ break;
+ default:
+ arg_err = B_TRUE;
+ break;
+ }
+ }
+ if (arg_err) {
+ saw_error = B_TRUE;
+ return;
+ }
res_type = resource_scope;
prop_type = cmd->cmd_prop_name[0];
@@ -3452,7 +3763,7 @@ remove_property(cmd_t *cmd)
prop_id = pp->pv_simple;
err = zonecfg_remove_fs_option(&in_progress_fstab,
prop_id);
- if (err != Z_OK)
+ if (err != Z_OK && !force)
zone_perror(pt_to_str(prop_type), err, B_TRUE);
} else {
list_property_ptr_t list;
@@ -3464,12 +3775,62 @@ remove_property(cmd_t *cmd)
break;
err = zonecfg_remove_fs_option(
&in_progress_fstab, prop_id);
- if (err != Z_OK)
+ if (err != Z_OK && !force)
zone_perror(pt_to_str(prop_type), err,
B_TRUE);
}
}
return;
+ case RT_NET: /* FALLTHRU */
+ case RT_DEVICE:
+ if (prop_type != PT_NPROP) {
+ zone_perror(pt_to_str(prop_type), Z_NO_PROPERTY_TYPE,
+ B_TRUE);
+ long_usage(CMD_REMOVE, B_TRUE);
+ usage(B_FALSE, HELP_PROPS);
+ return;
+ }
+ pp = cmd->cmd_property_ptr[0];
+ if (pp->pv_type != PROP_VAL_COMPLEX) {
+ zerr(gettext("A %s value was expected here."),
+ pvt_to_str(PROP_VAL_COMPLEX));
+ saw_error = B_TRUE;
+ return;
+ }
+
+ np = alloca(sizeof (struct zone_res_attrtab));
+ for (cx = pp->pv_complex; cx != NULL; cx = cx->cp_next) {
+ switch (cx->cp_type) {
+ case PT_NAME:
+ (void) strlcpy(np->zone_res_attr_name,
+ cx->cp_value,
+ sizeof (np->zone_res_attr_name));
+ break;
+ case PT_VALUE:
+ (void) strlcpy(np->zone_res_attr_value,
+ cx->cp_value,
+ sizeof (np->zone_res_attr_value));
+ break;
+ default:
+ zone_perror(pt_to_str(prop_type),
+ Z_NO_PROPERTY_TYPE, B_TRUE);
+ long_usage(CMD_REMOVE, B_TRUE);
+ usage(B_FALSE, HELP_PROPS);
+ return;
+ }
+ }
+ np->zone_res_attr_next = NULL;
+
+ if (res_type == RT_NET) {
+ err = zonecfg_remove_res_attr(
+ &(in_progress_nwiftab.zone_nwif_attrp), np);
+ } else { /* RT_DEVICE */
+ err = zonecfg_remove_res_attr(
+ &(in_progress_devtab.zone_dev_attrp), np);
+ }
+ if (err != Z_OK && !force)
+ zone_perror(pt_to_str(prop_type), err, B_TRUE);
+ return;
case RT_RCTL:
if (prop_type != PT_VALUE) {
zone_perror(pt_to_str(prop_type), Z_NO_PROPERTY_TYPE,
@@ -3518,22 +3879,10 @@ remove_property(cmd_t *cmd)
rctlvaltab->zone_rctlval_next = NULL;
err = zonecfg_remove_rctl_value(&in_progress_rctltab,
rctlvaltab);
- if (err != Z_OK)
+ if (err != Z_OK && !force)
zone_perror(pt_to_str(prop_type), err, B_TRUE);
zonecfg_free_rctl_value_list(rctlvaltab);
return;
- case RT_NET:
- if (prop_type != PT_DEFROUTER) {
- zone_perror(pt_to_str(prop_type), Z_NO_PROPERTY_TYPE,
- B_TRUE);
- long_usage(CMD_REMOVE, B_TRUE);
- usage(B_FALSE, HELP_PROPS);
- return;
- } else {
- bzero(&in_progress_nwiftab.zone_nwif_defrouter,
- sizeof (in_progress_nwiftab.zone_nwif_defrouter));
- return;
- }
default:
zone_perror(rt_to_str(res_type), Z_NO_RESOURCE_TYPE, B_TRUE);
long_usage(CMD_REMOVE, B_TRUE);
@@ -3596,8 +3945,7 @@ clear_property(cmd_t *cmd)
case RT_MCAP:
switch (prop_type) {
case PT_PHYSICAL:
- in_progress_mcaptab.zone_physmem_cap[0] = '\0';
- need_to_commit = B_TRUE;
+ remove_aliased_rctl(PT_PHYSICAL, ALIAS_MAXPHYSMEM);
return;
case PT_SWAP:
remove_aliased_rctl(PT_SWAP, ALIAS_MAXSWAP);
@@ -3607,6 +3955,30 @@ clear_property(cmd_t *cmd)
return;
}
break;
+ case RT_NET:
+ switch (prop_type) {
+ case PT_ALLOWED_ADDRESS:
+ in_progress_nwiftab.zone_nwif_allowed_address[0] = '\0';
+ need_to_commit = B_TRUE;
+ return;
+ case PT_DEFROUTER:
+ in_progress_nwiftab.zone_nwif_defrouter[0] = '\0';
+ need_to_commit = B_TRUE;
+ return;
+ case PT_GNIC:
+ in_progress_nwiftab.zone_nwif_gnic[0] = '\0';
+ need_to_commit = B_TRUE;
+ return;
+ case PT_MAC:
+ in_progress_nwiftab.zone_nwif_mac[0] = '\0';
+ need_to_commit = B_TRUE;
+ return;
+ case PT_VLANID:
+ in_progress_nwiftab.zone_nwif_vlan_id[0] = '\0';
+ need_to_commit = B_TRUE;
+ return;
+ }
+ break;
default:
break;
}
@@ -3632,6 +4004,8 @@ clear_global(cmd_t *cmd)
/* FALLTHRU */
case PT_ZONEPATH:
/* FALLTHRU */
+ case PT_UUID:
+ /* FALLTHRU */
case PT_BRAND:
zone_perror(pt_to_str(type), Z_CLEAR_DISALLOW, B_TRUE);
return;
@@ -3694,6 +4068,9 @@ clear_global(cmd_t *cmd)
case PT_SHARES:
remove_aliased_rctl(PT_SHARES, ALIAS_SHARES);
return;
+ case PT_ZFSPRI:
+ remove_aliased_rctl(PT_ZFSPRI, ALIAS_ZFSPRI);
+ return;
case PT_HOSTID:
if ((err = zonecfg_set_hostid(handle, NULL)) != Z_OK)
z_cmd_rt_perror(CMD_CLEAR, RT_HOSTID, err, B_TRUE);
@@ -3739,7 +4116,7 @@ clear_func(cmd_t *cmd)
void
select_func(cmd_t *cmd)
{
- int type, err, res;
+ int type, err;
uint64_t limit;
uint64_t tmp;
@@ -3834,7 +4211,8 @@ select_func(cmd_t *cmd)
return;
case RT_MCAP:
/* if none of these exist, there is no resource to select */
- if ((res = zonecfg_lookup_mcap(handle, &old_mcaptab)) != Z_OK &&
+ if (zonecfg_get_aliased_rctl(handle, ALIAS_MAXPHYSMEM, &limit)
+ != Z_OK &&
zonecfg_get_aliased_rctl(handle, ALIAS_MAXSWAP, &limit)
!= Z_OK &&
zonecfg_get_aliased_rctl(handle, ALIAS_MAXLOCKEDMEM, &limit)
@@ -3843,12 +4221,6 @@ select_func(cmd_t *cmd)
B_TRUE);
global_scope = B_TRUE;
}
- if (res == Z_OK)
- bcopy(&old_mcaptab, &in_progress_mcaptab,
- sizeof (struct zone_mcaptab));
- else
- bzero(&in_progress_mcaptab,
- sizeof (in_progress_mcaptab));
return;
case RT_ADMIN:
if ((err = fill_in_admintab(cmd, &old_admintab, B_FALSE))
@@ -4115,9 +4487,8 @@ set_func(cmd_t *cmd)
boolean_t autoboot;
zone_iptype_t iptype;
boolean_t force_set = B_FALSE;
- size_t physmem_size = sizeof (in_progress_mcaptab.zone_physmem_cap);
uint64_t mem_cap, mem_limit;
- float cap;
+ double cap;
char *unitp;
struct zone_psettab tmp_psettab;
boolean_t arg_err = B_FALSE;
@@ -4190,6 +4561,10 @@ set_func(cmd_t *cmd)
res_type = RT_HOSTID;
} else if (prop_type == PT_FS_ALLOWED) {
res_type = RT_FS_ALLOWED;
+ } else if (prop_type == PT_ZFSPRI) {
+ res_type = RT_ZFSPRI;
+ } else if (prop_type == PT_UUID) {
+ res_type = RT_UUID;
} else {
zerr(gettext("Cannot set a resource-specific property "
"from the global scope."));
@@ -4219,10 +4594,12 @@ set_func(cmd_t *cmd)
* A nasty expression but not that complicated:
* 1. fs options are simple or list (tested below)
* 2. rctl value's are complex or list (tested below)
+ * 3. net attr's are complex (tested below)
* Anything else should be simple.
*/
if (!(res_type == RT_FS && prop_type == PT_OPTIONS) &&
!(res_type == RT_RCTL && prop_type == PT_VALUE) &&
+ !(res_type == RT_NET && prop_type == PT_NPROP) &&
(pp->pv_type != PROP_VAL_SIMPLE ||
(prop_id = pp->pv_simple) == NULL)) {
zerr(gettext("A %s value was expected here."),
@@ -4395,6 +4772,9 @@ set_func(cmd_t *cmd)
case RT_SHARES:
set_aliased_rctl(ALIAS_SHARES, prop_type, prop_id);
return;
+ case RT_ZFSPRI:
+ set_aliased_rctl(ALIAS_ZFSPRI, prop_type, prop_id);
+ return;
case RT_HOSTID:
if ((err = zonecfg_set_hostid(handle, prop_id)) != Z_OK) {
if (err == Z_TOO_BIG) {
@@ -4408,6 +4788,15 @@ set_func(cmd_t *cmd)
}
need_to_commit = B_TRUE;
return;
+ case RT_UUID:
+ /*
+ * We can't set here. We have to wait until commit since the
+ * uuid will be updating the index file and we may not have
+ * created the zone yet.
+ */
+ (void) strlcpy(new_uuid, prop_id, sizeof (new_uuid));
+ need_to_commit = B_TRUE;
+ return;
case RT_FS_ALLOWED:
if ((err = zonecfg_set_fs_allowed(handle, prop_id)) != Z_OK)
zone_perror(zone, err, B_TRUE);
@@ -4482,6 +4871,21 @@ set_func(cmd_t *cmd)
prop_id,
sizeof (in_progress_nwiftab.zone_nwif_physical));
break;
+ case PT_MAC:
+ normalize_mac_addr(in_progress_nwiftab.zone_nwif_mac,
+ prop_id,
+ sizeof (in_progress_nwiftab.zone_nwif_mac));
+ break;
+ case PT_VLANID:
+ (void) strlcpy(in_progress_nwiftab.zone_nwif_vlan_id,
+ prop_id,
+ sizeof (in_progress_nwiftab.zone_nwif_vlan_id));
+ break;
+ case PT_GNIC:
+ (void) strlcpy(in_progress_nwiftab.zone_nwif_gnic,
+ prop_id,
+ sizeof (in_progress_nwiftab.zone_nwif_gnic));
+ break;
case PT_DEFROUTER:
if (validate_net_address_syntax(prop_id, B_TRUE)
!= Z_OK) {
@@ -4492,6 +4896,20 @@ set_func(cmd_t *cmd)
prop_id,
sizeof (in_progress_nwiftab.zone_nwif_defrouter));
break;
+ case PT_NPROP:
+ if (pp->pv_type != PROP_VAL_COMPLEX) {
+ zerr(gettext("A %s value was expected here."),
+ pvt_to_str(PROP_VAL_COMPLEX));
+ saw_error = B_TRUE;
+ return;
+ }
+ zonecfg_free_res_attr_list(
+ in_progress_nwiftab.zone_nwif_attrp);
+ in_progress_nwiftab.zone_nwif_attrp = NULL;
+ if (!(pp->pv_type == PROP_VAL_LIST &&
+ pp->pv_list == NULL))
+ add_property(cmd);
+ break;
default:
zone_perror(pt_to_str(prop_type), Z_NO_PROPERTY_TYPE,
B_TRUE);
@@ -4507,6 +4925,20 @@ set_func(cmd_t *cmd)
prop_id,
sizeof (in_progress_devtab.zone_dev_match));
break;
+ case PT_NPROP:
+ if (pp->pv_type != PROP_VAL_COMPLEX) {
+ zerr(gettext("A %s value was expected here."),
+ pvt_to_str(PROP_VAL_COMPLEX));
+ saw_error = B_TRUE;
+ return;
+ }
+ zonecfg_free_res_attr_list(
+ in_progress_devtab.zone_dev_attrp);
+ in_progress_devtab.zone_dev_attrp = NULL;
+ if (!(pp->pv_type == PROP_VAL_LIST &&
+ pp->pv_list == NULL))
+ add_property(cmd);
+ break;
default:
zone_perror(pt_to_str(prop_type), Z_NO_PROPERTY_TYPE,
B_TRUE);
@@ -4650,35 +5082,50 @@ set_func(cmd_t *cmd)
* the add_resource() function.
*/
- if ((cap = strtof(prop_id, &unitp)) <= 0 || *unitp != '\0' ||
- (int)(cap * 100) < 1) {
+ if ((cap = strtod(prop_id, &unitp)) <= 0 || *unitp != '\0' ||
+ (cap * 100.0) < 1) {
zerr(gettext("%s property is out of range."),
pt_to_str(PT_NCPUS));
saw_error = B_TRUE;
return;
}
+ cap *= 100.0;
+ /* To avoid rounding issues add .5 to force correct value. */
if ((err = zonecfg_set_aliased_rctl(handle, ALIAS_CPUCAP,
- (int)(cap * 100))) != Z_OK)
+ (uint_t)(cap + 0.5))) != Z_OK) {
zone_perror(zone, err, B_TRUE);
- else
+ } else {
need_to_commit = B_TRUE;
+ }
return;
case RT_MCAP:
switch (prop_type) {
case PT_PHYSICAL:
+ /*
+ * We have to check if an rctl is allowed here since
+ * there might already be a rctl defined that blocks
+ * the alias.
+ */
+ if (!zonecfg_aliased_rctl_ok(handle,
+ ALIAS_MAXPHYSMEM)) {
+ zone_perror(pt_to_str(PT_LOCKED),
+ Z_ALIAS_DISALLOW, B_FALSE);
+ saw_error = B_TRUE;
+ return;
+ }
+
if (!zonecfg_valid_memlimit(prop_id, &mem_cap)) {
- zerr(gettext("A positive number with a "
+ zerr(gettext("A non-negative number with a "
"required scale suffix (K, M, G or T) was "
- "expected here."));
- saw_error = B_TRUE;
- } else if (mem_cap < ONE_MB) {
- zerr(gettext("%s value is too small. It must "
- "be at least 1M."), pt_to_str(PT_PHYSICAL));
+ "expected\nhere."));
saw_error = B_TRUE;
} else {
- snprintf(in_progress_mcaptab.zone_physmem_cap,
- physmem_size, "%llu", mem_cap);
+ if ((err = zonecfg_set_aliased_rctl(handle,
+ ALIAS_MAXPHYSMEM, mem_cap)) != Z_OK)
+ zone_perror(zone, err, B_TRUE);
+ else
+ need_to_commit = B_TRUE;
}
break;
case PT_SWAP:
@@ -4950,6 +5397,23 @@ info_hostid(zone_dochandle_t handle, FILE *fp)
}
static void
+info_uuid(FILE *fp)
+{
+ uuid_t uuid;
+ char suuid[UUID_PRINTABLE_STRING_LENGTH];
+
+ if (new_uuid[0] != '\0') {
+ (void) fprintf(fp, "%s: %s\n", pt_to_str(PT_UUID), new_uuid);
+ } else if (zonecfg_get_uuid(zone, uuid) == Z_OK &&
+ !uuid_is_null(uuid)) {
+ uuid_unparse(uuid, suuid);
+ (void) fprintf(fp, "%s: %s\n", pt_to_str(PT_UUID), suuid);
+ } else {
+ (void) fprintf(fp, "%s:\n", pt_to_str(PT_UUID));
+ }
+}
+
+static void
info_fs_allowed(zone_dochandle_t handle, FILE *fp)
{
char fsallowedp[ZONE_FS_ALLOWED_MAX];
@@ -5031,12 +5495,25 @@ loopend:
static void
output_net(FILE *fp, struct zone_nwiftab *nwiftab)
{
+ struct zone_res_attrtab *np;
+
(void) fprintf(fp, "%s:\n", rt_to_str(RT_NET));
output_prop(fp, PT_ADDRESS, nwiftab->zone_nwif_address, B_TRUE);
output_prop(fp, PT_ALLOWED_ADDRESS,
nwiftab->zone_nwif_allowed_address, B_TRUE);
- output_prop(fp, PT_PHYSICAL, nwiftab->zone_nwif_physical, B_TRUE);
output_prop(fp, PT_DEFROUTER, nwiftab->zone_nwif_defrouter, B_TRUE);
+ output_prop(fp, PT_GNIC, nwiftab->zone_nwif_gnic, B_TRUE);
+ output_prop(fp, PT_MAC, nwiftab->zone_nwif_mac, B_TRUE);
+ output_prop(fp, PT_PHYSICAL, nwiftab->zone_nwif_physical, B_TRUE);
+ output_prop(fp, PT_VLANID, nwiftab->zone_nwif_vlan_id, B_TRUE);
+
+ for (np = nwiftab->zone_nwif_attrp; np != NULL;
+ np = np->zone_res_attr_next) {
+ fprintf(fp, "\t%s: (%s=%s,%s=\"%s\")\n",
+ pt_to_str(PT_NPROP),
+ pt_to_str(PT_NAME), np->zone_res_attr_name,
+ pt_to_str(PT_VALUE), np->zone_res_attr_value);
+ }
}
static void
@@ -5079,8 +5556,18 @@ info_net(zone_dochandle_t handle, FILE *fp, cmd_t *cmd)
static void
output_dev(FILE *fp, struct zone_devtab *devtab)
{
+ struct zone_res_attrtab *np;
+
(void) fprintf(fp, "%s:\n", rt_to_str(RT_DEVICE));
output_prop(fp, PT_MATCH, devtab->zone_dev_match, B_TRUE);
+
+ for (np = devtab->zone_dev_attrp; np != NULL;
+ np = np->zone_res_attr_next) {
+ fprintf(fp, "\t%s: (%s=%s,%s=\"%s\")\n",
+ pt_to_str(PT_NPROP),
+ pt_to_str(PT_NAME), np->zone_res_attr_name,
+ pt_to_str(PT_VALUE), np->zone_res_attr_value);
+ }
}
static void
@@ -5339,15 +5826,18 @@ bytes_to_units(char *str, char *buf, int bufsize)
}
static void
-output_mcap(FILE *fp, struct zone_mcaptab *mcaptab, int showswap,
+output_mcap(FILE *fp, int showphys, uint64_t maxphys, int showswap,
uint64_t maxswap, int showlocked, uint64_t maxlocked)
{
char buf[128];
(void) fprintf(fp, "%s:\n", rt_to_str(RT_MCAP));
- if (mcaptab->zone_physmem_cap[0] != '\0') {
- bytes_to_units(mcaptab->zone_physmem_cap, buf, sizeof (buf));
- output_prop(fp, PT_PHYSICAL, buf, B_TRUE);
+
+ if (showphys == Z_OK) {
+ (void) snprintf(buf, sizeof (buf), "%llu", maxphys);
+ bytes_to_units(buf, buf, sizeof (buf));
+ /* Print directly since "physical" also is a net property. */
+ (void) fprintf(fp, "\t[%s: %s]\n", pt_to_str(PT_PHYSICAL), buf);
}
if (showswap == Z_OK) {
@@ -5369,16 +5859,16 @@ info_mcap(zone_dochandle_t handle, FILE *fp)
int res1, res2, res3;
uint64_t swap_limit;
uint64_t locked_limit;
- struct zone_mcaptab lookup;
+ uint64_t phys_limit;
- bzero(&lookup, sizeof (lookup));
- res1 = zonecfg_getmcapent(handle, &lookup);
+ res1 = zonecfg_get_aliased_rctl(handle, ALIAS_MAXPHYSMEM, &phys_limit);
res2 = zonecfg_get_aliased_rctl(handle, ALIAS_MAXSWAP, &swap_limit);
res3 = zonecfg_get_aliased_rctl(handle, ALIAS_MAXLOCKEDMEM,
&locked_limit);
if (res1 == Z_OK || res2 == Z_OK || res3 == Z_OK)
- output_mcap(fp, &lookup, res2, swap_limit, res3, locked_limit);
+ output_mcap(fp, res1, phys_limit, res2, swap_limit,
+ res3, locked_limit);
}
static void
@@ -5429,9 +5919,11 @@ info_func(cmd_t *cmd)
FILE *fp = stdout;
boolean_t need_to_close = B_FALSE;
int type;
- int res1, res2;
+ int res1, res2, res3;
uint64_t swap_limit;
uint64_t locked_limit;
+ uint64_t phys_limit;
+ struct stat statbuf;
assert(cmd != NULL);
@@ -5479,7 +5971,9 @@ info_func(cmd_t *cmd)
&swap_limit);
res2 = zonecfg_get_aliased_rctl(handle,
ALIAS_MAXLOCKEDMEM, &locked_limit);
- output_mcap(fp, &in_progress_mcaptab, res1, swap_limit,
+ res3 = zonecfg_get_aliased_rctl(handle,
+ ALIAS_MAXPHYSMEM, &phys_limit);
+ output_mcap(fp, res3, phys_limit, res1, swap_limit,
res2, locked_limit);
break;
case RT_ADMIN:
@@ -5519,6 +6013,7 @@ info_func(cmd_t *cmd)
info_iptype(handle, fp);
info_hostid(handle, fp);
info_fs_allowed(handle, fp);
+ info_uuid(fp);
}
info_aliased_rctl(handle, fp, ALIAS_MAXLWPS);
info_aliased_rctl(handle, fp, ALIAS_MAXPROCS);
@@ -5527,6 +6022,7 @@ info_func(cmd_t *cmd)
info_aliased_rctl(handle, fp, ALIAS_MAXMSGIDS);
info_aliased_rctl(handle, fp, ALIAS_MAXSEMIDS);
info_aliased_rctl(handle, fp, ALIAS_SHARES);
+ info_aliased_rctl(handle, fp, ALIAS_ZFSPRI);
if (!global_zone) {
info_fs(handle, fp, cmd);
info_net(handle, fp, cmd);
@@ -5590,6 +6086,9 @@ info_func(cmd_t *cmd)
case RT_SHARES:
info_aliased_rctl(handle, fp, ALIAS_SHARES);
break;
+ case RT_ZFSPRI:
+ info_aliased_rctl(handle, fp, ALIAS_ZFSPRI);
+ break;
case RT_FS:
info_fs(handle, fp, cmd);
break;
@@ -5620,6 +6119,9 @@ info_func(cmd_t *cmd)
case RT_HOSTID:
info_hostid(handle, fp);
break;
+ case RT_UUID:
+ info_uuid(fp);
+ break;
case RT_ADMIN:
info_auth(handle, fp, cmd);
break;
@@ -6101,10 +6603,33 @@ verify_func(cmd_t *cmd)
if (save) {
if (ret_val == Z_OK) {
+ /*
+ * If the zone doesn't yet have a debug ID, set one now.
+ */
+ if (zonecfg_get_did(handle) == -1)
+ zonecfg_set_did(handle);
+
if ((ret_val = zonecfg_save(handle)) == Z_OK) {
need_to_commit = B_FALSE;
(void) strlcpy(revert_zone, zone,
sizeof (revert_zone));
+
+ if (is_create) {
+ zonecfg_notify_create(handle);
+ is_create = B_FALSE;
+ }
+ }
+
+ /*
+ * Commit a new uuid at this point since we now know the
+ * zone index entry will exist.
+ */
+ if (new_uuid[0] != '\0') {
+ if ((err = zonecfg_set_uuid(zone, zonepath,
+ new_uuid)) != Z_OK)
+ zone_perror(zone, err, B_FALSE);
+ else
+ new_uuid[0] = '\0';
}
} else {
zerr(gettext("Zone %s failed to verify"), zone);
@@ -6275,6 +6800,7 @@ end_func(cmd_t *cmd)
int err, arg, res1, res2, res3;
uint64_t swap_limit;
uint64_t locked_limit;
+ uint64_t phys_limit;
uint64_t proc_cap;
assert(cmd != NULL);
@@ -6578,8 +7104,8 @@ end_func(cmd_t *cmd)
break;
case RT_MCAP:
/* Make sure everything was filled in. */
- res1 = strlen(in_progress_mcaptab.zone_physmem_cap) == 0 ?
- Z_ERR : Z_OK;
+ res1 = zonecfg_get_aliased_rctl(handle, ALIAS_MAXPHYSMEM,
+ &phys_limit);
res2 = zonecfg_get_aliased_rctl(handle, ALIAS_MAXSWAP,
&swap_limit);
res3 = zonecfg_get_aliased_rctl(handle, ALIAS_MAXLOCKEDMEM,
@@ -6595,11 +7121,6 @@ end_func(cmd_t *cmd)
/* if phys & locked are both set, verify locked <= phys */
if (res1 == Z_OK && res3 == Z_OK) {
- uint64_t phys_limit;
- char *endp;
-
- phys_limit = strtoull(
- in_progress_mcaptab.zone_physmem_cap, &endp, 10);
if (phys_limit < locked_limit) {
zerr(gettext("The %s cap must be less than or "
"equal to the %s cap."),
@@ -6611,23 +7132,6 @@ end_func(cmd_t *cmd)
}
err = Z_OK;
- if (res1 == Z_OK) {
- /*
- * We could be ending from either an add operation
- * or a select operation. Since all of the properties
- * within this resource are optional, we always use
- * modify on the mcap entry. zonecfg_modify_mcap()
- * will handle both adding and modifying a memory cap.
- */
- err = zonecfg_modify_mcap(handle, &in_progress_mcaptab);
- } else if (end_op == CMD_SELECT) {
- /*
- * If we're ending from a select and the physical
- * memory cap is empty then the user could have cleared
- * the physical cap value, so try to delete the entry.
- */
- (void) zonecfg_delete_mcap(handle);
- }
break;
case RT_ADMIN:
/* First make sure everything was filled in. */
@@ -7176,8 +7680,10 @@ get_execbasename(char *execfullname)
int
main(int argc, char *argv[])
{
- int err, arg;
+ int err, arg, uflag = 0, zflag = 0;
struct stat st;
+ uuid_t uuidin;
+ char zonename[ZONENAME_MAX + 1];
/* This must be before anything goes to stdout. */
setbuf(stdout, NULL);
@@ -7204,7 +7710,7 @@ main(int argc, char *argv[])
exit(Z_OK);
}
- while ((arg = getopt(argc, argv, "?f:R:z:")) != EOF) {
+ while ((arg = getopt(argc, argv, "?f:R:z:u:")) != EOF) {
switch (arg) {
case '?':
if (optopt == '?')
@@ -7231,6 +7737,21 @@ main(int argc, char *argv[])
}
zonecfg_set_root(optarg);
break;
+ case 'u':
+ if (uuid_parse((char *)optarg, uuidin) == -1)
+ return (Z_INVALID_PROPERTY);
+
+ if (zonecfg_get_name_by_uuid(uuidin, zonename,
+ ZONENAME_MAX) != Z_OK) {
+ zone_perror(optarg, Z_BOGUS_ZONE_NAME, B_TRUE);
+ usage(B_FALSE, HELP_SYNTAX);
+ exit(Z_USAGE);
+ }
+
+ (void) strlcpy(zone, zonename, sizeof (zone));
+ (void) strlcpy(revert_zone, zonename, sizeof (zone));
+ uflag = 1;
+ break;
case 'z':
if (strcmp(optarg, GLOBAL_ZONENAME) == 0) {
global_zone = B_TRUE;
@@ -7241,6 +7762,7 @@ main(int argc, char *argv[])
}
(void) strlcpy(zone, optarg, sizeof (zone));
(void) strlcpy(revert_zone, optarg, sizeof (zone));
+ zflag = 1;
break;
default:
usage(B_FALSE, HELP_USAGE);
@@ -7248,7 +7770,7 @@ main(int argc, char *argv[])
}
}
- if (optind > argc || strcmp(zone, "") == 0) {
+ if (optind > argc || strcmp(zone, "") == 0 || (uflag && zflag)) {
usage(B_FALSE, HELP_USAGE);
exit(Z_USAGE);
}
diff --git a/usr/src/cmd/zonecfg/zonecfg.h b/usr/src/cmd/zonecfg/zonecfg.h
index d8f8b14ce8..f8c78437ad 100644
--- a/usr/src/cmd/zonecfg/zonecfg.h
+++ b/usr/src/cmd/zonecfg/zonecfg.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent Inc. All rights reserved.
*/
#ifndef _ZONECFG_H
@@ -90,9 +91,11 @@ extern "C" {
#define RT_ADMIN 26
#define RT_FS_ALLOWED 27
#define RT_MAXPROCS 28 /* really a rctl alias property, but for info */
+#define RT_ZFSPRI 29 /* really a rctl alias property, but for info */
+#define RT_UUID 30 /* really a property, but for info */
#define RT_MIN RT_UNKNOWN
-#define RT_MAX RT_MAXPROCS
+#define RT_MAX RT_UUID
/* property types: increment PT_MAX when expanding this list */
#define PT_UNKNOWN 0
@@ -137,9 +140,15 @@ extern "C" {
#define PT_FS_ALLOWED 39
#define PT_MAXPROCS 40
#define PT_ALLOWED_ADDRESS 41
+#define PT_ZFSPRI 42
+#define PT_MAC 43
+#define PT_VLANID 44
+#define PT_GNIC 45
+#define PT_NPROP 46
+#define PT_UUID 47
#define PT_MIN PT_UNKNOWN
-#define PT_MAX PT_ALLOWED_ADDRESS
+#define PT_MAX PT_UUID
#define MAX_EQ_PROP_PAIRS 3
diff --git a/usr/src/cmd/zonecfg/zonecfg_grammar.y b/usr/src/cmd/zonecfg/zonecfg_grammar.y
index d7f11b6a46..13a17876b7 100644
--- a/usr/src/cmd/zonecfg/zonecfg_grammar.y
+++ b/usr/src/cmd/zonecfg/zonecfg_grammar.y
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2013, Joyent Inc. All rights reserved.
*/
/*
@@ -136,6 +137,7 @@ complex_piece_func(int cp_type, const char *str, complex_property_ptr_t cp_next)
%token OPEN_PAREN CLOSE_PAREN COMMA DATASET LIMITPRIV BOOTARGS BRAND PSET PCAP
%token MCAP NCPUS IMPORTANCE SHARES MAXLWPS MAXSHMMEM MAXSHMIDS MAXMSGIDS
%token MAXSEMIDS LOCKED SWAP SCHED CLEAR DEFROUTER ADMIN USER AUTHS MAXPROCS
+%token ZFSPRI MAC VLANID GNIC NPROP UUID
%type <strval> TOKEN EQUAL OPEN_SQ_BRACKET CLOSE_SQ_BRACKET
property_value OPEN_PAREN CLOSE_PAREN COMMA simple_prop_val
@@ -145,7 +147,7 @@ complex_piece_func(int cp_type, const char *str, complex_property_ptr_t cp_next)
%type <ival> property_name SPECIAL RAW DIR OPTIONS TYPE ADDRESS PHYSICAL NAME
MATCH ZONENAME ZONEPATH AUTOBOOT POOL LIMITPRIV BOOTARGS VALUE PRIV LIMIT
ACTION BRAND SCHED IPTYPE DEFROUTER HOSTID USER AUTHS FS_ALLOWED
- ALLOWED_ADDRESS
+ ALLOWED_ADDRESS MAC VLANID GNIC NPROP UUID
%type <cmd> command
%type <cmd> add_command ADD
%type <cmd> cancel_command CANCEL
@@ -650,6 +652,24 @@ info_command: INFO
$$->cmd_res_type = RT_FS_ALLOWED;
$$->cmd_prop_nv_pairs = 0;
}
+ | INFO UUID
+ {
+ if (($$ = alloc_cmd()) == NULL)
+ YYERROR;
+ cmd = $$;
+ $$->cmd_handler = &info_func;
+ $$->cmd_res_type = RT_UUID;
+ $$->cmd_prop_nv_pairs = 0;
+ }
+ | INFO ZFSPRI
+ {
+ if (($$ = alloc_cmd()) == NULL)
+ YYERROR;
+ cmd = $$;
+ $$->cmd_handler = &info_func;
+ $$->cmd_res_type = RT_ZFSPRI;
+ $$->cmd_prop_nv_pairs = 0;
+ }
| INFO resource_type property_name EQUAL property_value
{
if (($$ = alloc_cmd()) == NULL)
@@ -734,6 +754,19 @@ remove_command: REMOVE
$$->cmd_prop_name[0] = $2;
$$->cmd_property_ptr[0] = &property[0];
}
+ | REMOVE TOKEN property_name property_value
+ {
+ if (($$ = alloc_cmd()) == NULL)
+ YYERROR;
+ cmd = $$;
+ $$->cmd_handler = &remove_func;
+ $$->cmd_argc = 1;
+ $$->cmd_argv[0] = claim_token($2);
+ $$->cmd_argv[1] = NULL;
+ $$->cmd_prop_nv_pairs = 1;
+ $$->cmd_prop_name[0] = $3;
+ $$->cmd_property_ptr[0] = &property[0];
+ }
| REMOVE resource_type property_name EQUAL property_value
{
if (($$ = alloc_cmd()) == NULL)
@@ -745,6 +778,20 @@ remove_command: REMOVE
$$->cmd_prop_name[0] = $3;
$$->cmd_property_ptr[0] = &property[0];
}
+ | REMOVE TOKEN resource_type property_name EQUAL property_value
+ {
+ if (($$ = alloc_cmd()) == NULL)
+ YYERROR;
+ cmd = $$;
+ $$->cmd_handler = &remove_func;
+ $$->cmd_res_type = $3;
+ $$->cmd_argc = 1;
+ $$->cmd_argv[0] = claim_token($2);
+ $$->cmd_argv[1] = NULL;
+ $$->cmd_prop_nv_pairs = 1;
+ $$->cmd_prop_name[0] = $4;
+ $$->cmd_property_ptr[0] = &property[0];
+ }
| REMOVE resource_type property_name EQUAL property_value property_name EQUAL property_value
{
if (($$ = alloc_cmd()) == NULL)
@@ -758,6 +805,22 @@ remove_command: REMOVE
$$->cmd_prop_name[1] = $6;
$$->cmd_property_ptr[1] = &property[1];
}
+ | REMOVE TOKEN resource_type property_name EQUAL property_value property_name EQUAL property_value
+ {
+ if (($$ = alloc_cmd()) == NULL)
+ YYERROR;
+ cmd = $$;
+ $$->cmd_handler = &remove_func;
+ $$->cmd_res_type = $3;
+ $$->cmd_argc = 1;
+ $$->cmd_argv[0] = claim_token($2);
+ $$->cmd_argv[1] = NULL;
+ $$->cmd_prop_nv_pairs = 2;
+ $$->cmd_prop_name[0] = $4;
+ $$->cmd_property_ptr[0] = &property[0];
+ $$->cmd_prop_name[1] = $7;
+ $$->cmd_property_ptr[1] = &property[1];
+ }
| REMOVE resource_type property_name EQUAL property_value property_name EQUAL property_value property_name EQUAL property_value
{
if (($$ = alloc_cmd()) == NULL)
@@ -773,6 +836,24 @@ remove_command: REMOVE
$$->cmd_prop_name[2] = $9;
$$->cmd_property_ptr[2] = &property[2];
}
+ | REMOVE TOKEN resource_type property_name EQUAL property_value property_name EQUAL property_value property_name EQUAL property_value
+ {
+ if (($$ = alloc_cmd()) == NULL)
+ YYERROR;
+ cmd = $$;
+ $$->cmd_handler = &remove_func;
+ $$->cmd_res_type = $3;
+ $$->cmd_argc = 1;
+ $$->cmd_argv[0] = claim_token($2);
+ $$->cmd_argv[1] = NULL;
+ $$->cmd_prop_nv_pairs = 3;
+ $$->cmd_prop_name[0] = $4;
+ $$->cmd_property_ptr[0] = &property[0];
+ $$->cmd_prop_name[1] = $7;
+ $$->cmd_property_ptr[1] = &property[1];
+ $$->cmd_prop_name[2] = $10;
+ $$->cmd_property_ptr[2] = &property[2];
+ }
revert_command: REVERT
{
@@ -976,6 +1057,10 @@ property_name: SPECIAL { $$ = PT_SPECIAL; }
| ALLOWED_ADDRESS { $$ = PT_ALLOWED_ADDRESS; }
| PHYSICAL { $$ = PT_PHYSICAL; }
| DEFROUTER { $$ = PT_DEFROUTER; }
+ | MAC { $$ = PT_MAC; }
+ | VLANID { $$ = PT_VLANID; }
+ | GNIC { $$ = PT_GNIC; }
+ | NPROP { $$ = PT_NPROP; }
| NAME { $$ = PT_NAME; }
| VALUE { $$ = PT_VALUE; }
| MATCH { $$ = PT_MATCH; }
@@ -999,6 +1084,8 @@ property_name: SPECIAL { $$ = PT_SPECIAL; }
| USER { $$ = PT_USER; }
| AUTHS { $$ = PT_AUTHS; }
| FS_ALLOWED { $$ = PT_FS_ALLOWED; }
+ | UUID { $$ = PT_UUID; }
+ | ZFSPRI { $$ = PT_ZFSPRI; }
/*
* The grammar builds data structures from the bottom up. Thus various
diff --git a/usr/src/cmd/zonecfg/zonecfg_lex.l b/usr/src/cmd/zonecfg/zonecfg_lex.l
index 6a0b577b75..328a75c922 100644
--- a/usr/src/cmd/zonecfg/zonecfg_lex.l
+++ b/usr/src/cmd/zonecfg/zonecfg_lex.l
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent Inc. All rights reserved.
*/
#include <assert.h>
@@ -57,10 +58,11 @@ extern void yyerror(char *s);
static char *create_token(char *s);
%}
-%a 7000
+%a 8000
%p 5000
%e 2000
%n 1000
+%o 13000
%{
/*
@@ -236,6 +238,18 @@ static char *create_token(char *s);
<TSTATE>defrouter { return DEFROUTER; }
<CSTATE>defrouter { return DEFROUTER; }
+<TSTATE>mac-addr { return MAC; }
+<CSTATE>mac-addr { return MAC; }
+
+<TSTATE>vlan-id { return VLANID; }
+<CSTATE>vlan-id { return VLANID; }
+
+<TSTATE>global-nic { return GNIC; }
+<CSTATE>global-nic { return GNIC; }
+
+<TSTATE>property { return NPROP; }
+<CSTATE>property { return NPROP; }
+
<TSTATE>dir { return DIR; }
<CSTATE>dir { return DIR; }
@@ -308,6 +322,12 @@ static char *create_token(char *s);
<TSTATE>fs-allowed { return FS_ALLOWED; }
<CSTATE>fs-allowed { return FS_ALLOWED; }
+<TSTATE>uuid { return UUID; }
+<CSTATE>uuid { return UUID; }
+
+<TSTATE>zfs-io-priority { return ZFSPRI; }
+<CSTATE>zfs-io-priority { return ZFSPRI; }
+
<TSTATE>= { return EQUAL; }
<LSTATE>= { return EQUAL; }
<CSTATE>= { return EQUAL; }
@@ -357,6 +377,13 @@ static char *create_token(char *s);
return TOKEN;
}
+<CSTATE>\"[^\"\n]*[\"\n] {
+ yylval.strval = create_token(yytext + 1);
+ if (yylval.strval[yyleng - 2] == '"')
+ yylval.strval[yyleng - 2] = 0;
+ return TOKEN;
+ }
+
<TSTATE>\"[^\"\n]*[\"\n] {
yylval.strval = create_token(yytext + 1);
if (yylval.strval[yyleng - 2] == '"')
diff --git a/usr/src/cmd/zonename/Makefile b/usr/src/cmd/zonename/Makefile
index 566e893a67..3a51952455 100644
--- a/usr/src/cmd/zonename/Makefile
+++ b/usr/src/cmd/zonename/Makefile
@@ -28,8 +28,10 @@
#
PROG= zonename
+OBJS= zonename.o
include ../Makefile.cmd
+include ../Makefile.ctf
LDLIBS += -lzonecfg
@@ -37,6 +39,10 @@ LDLIBS += -lzonecfg
all: $(PROG)
+$(PROG): $(OBJS)
+ $(LINK.c) -o $@ $(OBJS) $(LDLIBS)
+ $(POST_PROCESS)
+
install: all $(ROOTSBINPROG)
$(RM) $(ROOTPROG)
$(SYMLINK) ../../sbin/$(PROG) $(ROOTPROG)
@@ -44,6 +50,10 @@ install: all $(ROOTSBINPROG)
check: $(PROG).c
$(CSTYLE) -pP $(PROG).c
+%.o: %.c
+ $(COMPILE.c) $<
+ $(POST_PROCESS_O)
+
clean:
lint: lint_PROG
diff --git a/usr/src/cmd/zonestat/zonestatd/zonestatd.c b/usr/src/cmd/zonestat/zonestatd/zonestatd.c
index b764551131..6c293bcc0e 100644
--- a/usr/src/cmd/zonestat/zonestatd/zonestatd.c
+++ b/usr/src/cmd/zonestat/zonestatd/zonestatd.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent, Inc. All rights reserved.
*/
#include <alloca.h>
#include <assert.h>
@@ -2190,7 +2191,7 @@ zsd_get_zone_rctl_usage(char *name)
return (rctlblk_get_value(rblk));
}
-#define ZSD_NUM_RCTL_VALS 19
+#define ZSD_NUM_RCTL_VALS 20
/*
* Fetch the limit information for a zone. This uses zone_enter() as the
@@ -2237,12 +2238,6 @@ zsd_get_zone_caps(zsd_ctl_t *ctl, zsd_zone_t *zone, uint64_t *cpu_shares,
*msgids = 0;
*lofi = 0;
- /* Get the ram cap first since it is a zone attr */
- ret = zone_getattr(zone->zsz_id, ZONE_ATTR_PHYS_MCAP,
- ram_cap, sizeof (*ram_cap));
- if (ret < 0 || *ram_cap == 0)
- *ram_cap = ZS_LIMIT_NONE;
-
/* Get the zone's default scheduling class */
ret = zone_getattr(zone->zsz_id, ZONE_ATTR_SCHED_CLASS,
class, sizeof (class));
@@ -2298,6 +2293,7 @@ zsd_get_zone_caps(zsd_ctl_t *ctl, zsd_zone_t *zone, uint64_t *cpu_shares,
vals[i++] = zsd_get_zone_rctl_usage("zone.max-msg-ids");
vals[i++] = zsd_get_zone_rctl_limit("zone.max-lofi");
vals[i++] = zsd_get_zone_rctl_usage("zone.max-lofi");
+ vals[i++] = zsd_get_zone_rctl_usage("zone.max-physical-memory");
if (write(p[1], vals, ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) !=
ZSD_NUM_RCTL_VALS * sizeof (uint64_t)) {
@@ -2342,6 +2338,7 @@ zsd_get_zone_caps(zsd_ctl_t *ctl, zsd_zone_t *zone, uint64_t *cpu_shares,
*msgids = vals[i++];
*lofi_cap = vals[i++];
*lofi = vals[i++];
+ *ram_cap = vals[i++];
/* Interpret maximum values as no cap */
if (*cpu_cap == UINT32_MAX || *cpu_cap == 0)
diff --git a/usr/src/cmd/zpool/zpool_main.c b/usr/src/cmd/zpool/zpool_main.c
index 82b9672a44..58a70917a2 100644
--- a/usr/src/cmd/zpool/zpool_main.c
+++ b/usr/src/cmd/zpool/zpool_main.c
@@ -24,6 +24,7 @@
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright (c) 2012 by Frederik Wessels. All rights reserved.
* Copyright (c) 2013 by Prasad Joshi (sTec). All rights reserved.
+ * Copyright (c) 2013 Joyent, Inc. All rights reserved.
* Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>.
* Copyright 2016 Nexenta Systems, Inc.
*/
diff --git a/usr/src/common/brand/lx/lx_auxv.c b/usr/src/common/brand/lx/lx_auxv.c
new file mode 100644
index 0000000000..8994ea31a5
--- /dev/null
+++ b/usr/src/common/brand/lx/lx_auxv.c
@@ -0,0 +1,85 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <sys/auxv.h>
+#include <sys/lx_brand.h>
+
+
+int
+lx_auxv_stol(const auxv_t *ap, auxv_t *oap, const lx_elf_data_t *edp)
+{
+ switch (ap->a_type) {
+ case AT_BASE:
+ oap->a_un.a_val = edp->ed_base;
+ break;
+ case AT_ENTRY:
+ oap->a_un.a_val = edp->ed_entry;
+ break;
+ case AT_PHDR:
+ oap->a_un.a_val = edp->ed_phdr;
+ break;
+ case AT_PHENT:
+ oap->a_un.a_val = edp->ed_phent;
+ break;
+ case AT_PHNUM:
+ oap->a_un.a_val = edp->ed_phnum;
+ break;
+ case AT_SUN_BRAND_LX_SYSINFO_EHDR:
+ oap->a_type = AT_SYSINFO_EHDR;
+ oap->a_un.a_val = ap->a_un.a_val;
+ return (0);
+ case AT_SUN_BRAND_LX_CLKTCK:
+ oap->a_type = AT_CLKTCK;
+ oap->a_un.a_val = ap->a_un.a_val;
+ return (0);
+ case AT_SUN_AUXFLAGS:
+ if ((ap->a_un.a_val & AF_SUN_SETUGID) != 0) {
+ oap->a_type = AT_SECURE;
+ oap->a_un.a_val = 1;
+ return (0);
+ } else {
+ return (1);
+ }
+ case AT_SUN_GID:
+ oap->a_type = AT_LX_EGID;
+ oap->a_un.a_val = ap->a_un.a_val;
+ return (0);
+ case AT_SUN_RGID:
+ oap->a_type = AT_LX_GID;
+ oap->a_un.a_val = ap->a_un.a_val;
+ return (0);
+ case AT_SUN_UID:
+ oap->a_type = AT_LX_EUID;
+ oap->a_un.a_val = ap->a_un.a_val;
+ return (0);
+ case AT_SUN_RUID:
+ oap->a_type = AT_LX_UID;
+ oap->a_un.a_val = ap->a_un.a_val;
+ return (0);
+ case AT_EXECFD:
+ case AT_PAGESZ:
+ case AT_FLAGS:
+ case AT_RANDOM:
+ case AT_NULL:
+ /* No translate needed */
+ oap->a_un.a_val = ap->a_un.a_val;
+ break;
+ default:
+ /* All other unrecognized entries are ignored */
+ return (1);
+ }
+ oap->a_type = ap->a_type;
+ return (0);
+}
diff --git a/usr/src/common/brand/lx/lx_auxv.h b/usr/src/common/brand/lx/lx_auxv.h
new file mode 100644
index 0000000000..190d939f35
--- /dev/null
+++ b/usr/src/common/brand/lx/lx_auxv.h
@@ -0,0 +1,32 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _LX_AUXV_H
+#define _LX_AUXV_H
+
+#include <sys/auxv.h>
+#include <sys/lx_brand.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int lx_auxv_stol(const auxv_t *, auxv_t *, const lx_elf_data_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_AUXV_H */
diff --git a/usr/src/common/brand/lx/lx_errno.c b/usr/src/common/brand/lx/lx_errno.c
new file mode 100644
index 0000000000..269ed470dc
--- /dev/null
+++ b/usr/src/common/brand/lx/lx_errno.c
@@ -0,0 +1,206 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * This file contains a mapping table and lookup function for converting
+ * illumos native error numbers into error numbers appropriate for Linux
+ * emulation.
+ *
+ * The translation table is generated by the "gen_errno", built from and
+ * documented in "usr/src/common/brand/lx/tools".
+ */
+
+#include <sys/debug.h>
+
+const int
+lx_stol_errno[] = {
+ 0, /* 0: No Error */
+ 1, /* 1: EPERM --> 1: EPERM */
+ 2, /* 2: ENOENT --> 2: ENOENT */
+ 3, /* 3: ESRCH --> 3: ESRCH */
+ 4, /* 4: EINTR --> 4: EINTR */
+ 5, /* 5: EIO --> 5: EIO */
+ 6, /* 6: ENXIO --> 6: ENXIO */
+ 7, /* 7: E2BIG --> 7: E2BIG */
+ 8, /* 8: ENOEXEC --> 8: ENOEXEC */
+ 9, /* 9: EBADF --> 9: EBADF */
+ 10, /* 10: ECHILD --> 10: ECHILD */
+ 11, /* 11: EAGAIN --> 11: EAGAIN */
+ 12, /* 12: ENOMEM --> 12: ENOMEM */
+ 13, /* 13: EACCES --> 13: EACCES */
+ 14, /* 14: EFAULT --> 14: EFAULT */
+ 15, /* 15: ENOTBLK --> 15: ENOTBLK */
+ 16, /* 16: EBUSY --> 16: EBUSY */
+ 17, /* 17: EEXIST --> 17: EEXIST */
+ 18, /* 18: EXDEV --> 18: EXDEV */
+ 19, /* 19: ENODEV --> 19: ENODEV */
+ 20, /* 20: ENOTDIR --> 20: ENOTDIR */
+ 21, /* 21: EISDIR --> 21: EISDIR */
+ 22, /* 22: EINVAL --> 22: EINVAL */
+ 23, /* 23: ENFILE --> 23: ENFILE */
+ 24, /* 24: EMFILE --> 24: EMFILE */
+ 25, /* 25: ENOTTY --> 25: ENOTTY */
+ 26, /* 26: ETXTBSY --> 26: ETXTBSY */
+ 27, /* 27: EFBIG --> 27: EFBIG */
+ 28, /* 28: ENOSPC --> 28: ENOSPC */
+ 29, /* 29: ESPIPE --> 29: ESPIPE */
+ 30, /* 30: EROFS --> 30: EROFS */
+ 31, /* 31: EMLINK --> 31: EMLINK */
+ 32, /* 32: EPIPE --> 32: EPIPE */
+ 33, /* 33: EDOM --> 33: EDOM */
+ 34, /* 34: ERANGE --> 34: ERANGE */
+ 42, /* 35: ENOMSG --> 42: ENOMSG */
+ 43, /* 36: EIDRM --> 43: EIDRM */
+ 44, /* 37: ECHRNG --> 44: ECHRNG */
+ 45, /* 38: EL2NSYNC --> 45: EL2NSYNC */
+ 46, /* 39: EL3HLT --> 46: EL3HLT */
+ 47, /* 40: EL3RST --> 47: EL3RST */
+ 48, /* 41: ELNRNG --> 48: ELNRNG */
+ 49, /* 42: EUNATCH --> 49: EUNATCH */
+ 50, /* 43: ENOCSI --> 50: ENOCSI */
+ 51, /* 44: EL2HLT --> 51: EL2HLT */
+ 35, /* 45: EDEADLK --> 35: EDEADLK */
+ 37, /* 46: ENOLCK --> 37: ENOLCK */
+ 125, /* 47: ECANCELED --> 125: ECANCELED */
+ 38, /* 48: ENOTSUP --> 38: ENOSYS */
+ 122, /* 49: EDQUOT --> 122: EDQUOT */
+ 52, /* 50: EBADE --> 52: EBADE */
+ 53, /* 51: EBADR --> 53: EBADR */
+ 54, /* 52: EXFULL --> 54: EXFULL */
+ 55, /* 53: ENOANO --> 55: ENOANO */
+ 56, /* 54: EBADRQC --> 56: EBADRQC */
+ 57, /* 55: EBADSLT --> 57: EBADSLT */
+ 35, /* 56: EDEADLOCK --> 35: EDEADLK */
+ 59, /* 57: EBFONT --> 59: EBFONT */
+ 130, /* 58: EOWNERDEAD --> 130: EOWNERDEAD */
+ 131, /* 59: ENOTRECOVERABLE --> 131: ENOTRECOVERABLE */
+ 60, /* 60: ENOSTR --> 60: ENOSTR */
+ 61, /* 61: ENODATA --> 61: ENODATA */
+ 62, /* 62: ETIME --> 62: ETIME */
+ 63, /* 63: ENOSR --> 63: ENOSR */
+ 64, /* 64: ENONET --> 64: ENONET */
+ 65, /* 65: ENOPKG --> 65: ENOPKG */
+ 66, /* 66: EREMOTE --> 66: EREMOTE */
+ 67, /* 67: ENOLINK --> 67: ENOLINK */
+ 68, /* 68: EADV --> 68: EADV */
+ 69, /* 69: ESRMNT --> 69: ESRMNT */
+ 70, /* 70: ECOMM --> 70: ECOMM */
+ 71, /* 71: EPROTO --> 71: EPROTO */
+ -2, /* 72: ELOCKUNMAPPED --> -2: No Analogue */
+ -2, /* 73: ENOTACTIVE --> -2: No Analogue */
+ 72, /* 74: EMULTIHOP --> 72: EMULTIHOP */
+ -1, /* 75: Unused Number */
+ -1, /* 76: Unused Number */
+ 74, /* 77: EBADMSG --> 74: EBADMSG */
+ 36, /* 78: ENAMETOOLONG --> 36: ENAMETOOLONG */
+ 75, /* 79: EOVERFLOW --> 75: EOVERFLOW */
+ 76, /* 80: ENOTUNIQ --> 76: ENOTUNIQ */
+ 77, /* 81: EBADFD --> 77: EBADFD */
+ 78, /* 82: EREMCHG --> 78: EREMCHG */
+ 79, /* 83: ELIBACC --> 79: ELIBACC */
+ 80, /* 84: ELIBBAD --> 80: ELIBBAD */
+ 81, /* 85: ELIBSCN --> 81: ELIBSCN */
+ 82, /* 86: ELIBMAX --> 82: ELIBMAX */
+ 83, /* 87: ELIBEXEC --> 83: ELIBEXEC */
+ 84, /* 88: EILSEQ --> 84: EILSEQ */
+ 38, /* 89: ENOSYS --> 38: ENOSYS */
+ 40, /* 90: ELOOP --> 40: ELOOP */
+ 85, /* 91: ERESTART --> 85: ERESTART */
+ 86, /* 92: ESTRPIPE --> 86: ESTRPIPE */
+ 39, /* 93: ENOTEMPTY --> 39: ENOTEMPTY */
+ 87, /* 94: EUSERS --> 87: EUSERS */
+ 88, /* 95: ENOTSOCK --> 88: ENOTSOCK */
+ 89, /* 96: EDESTADDRREQ --> 89: EDESTADDRREQ */
+ 90, /* 97: EMSGSIZE --> 90: EMSGSIZE */
+ 91, /* 98: EPROTOTYPE --> 91: EPROTOTYPE */
+ 92, /* 99: ENOPROTOOPT --> 92: ENOPROTOOPT */
+ -1, /* 100: Unused Number */
+ -1, /* 101: Unused Number */
+ -1, /* 102: Unused Number */
+ -1, /* 103: Unused Number */
+ -1, /* 104: Unused Number */
+ -1, /* 105: Unused Number */
+ -1, /* 106: Unused Number */
+ -1, /* 107: Unused Number */
+ -1, /* 108: Unused Number */
+ -1, /* 109: Unused Number */
+ -1, /* 110: Unused Number */
+ -1, /* 111: Unused Number */
+ -1, /* 112: Unused Number */
+ -1, /* 113: Unused Number */
+ -1, /* 114: Unused Number */
+ -1, /* 115: Unused Number */
+ -1, /* 116: Unused Number */
+ -1, /* 117: Unused Number */
+ -1, /* 118: Unused Number */
+ -1, /* 119: Unused Number */
+ 93, /* 120: EPROTONOSUPPORT --> 93: EPROTONOSUPPORT */
+ 94, /* 121: ESOCKTNOSUPPORT --> 94: ESOCKTNOSUPPORT */
+ 95, /* 122: EOPNOTSUPP --> 95: EOPNOTSUPP */
+ 96, /* 123: EPFNOSUPPORT --> 96: EPFNOSUPPORT */
+ 97, /* 124: EAFNOSUPPORT --> 97: EAFNOSUPPORT */
+ 98, /* 125: EADDRINUSE --> 98: EADDRINUSE */
+ 99, /* 126: EADDRNOTAVAIL --> 99: EADDRNOTAVAIL */
+ 100, /* 127: ENETDOWN --> 100: ENETDOWN */
+ 101, /* 128: ENETUNREACH --> 101: ENETUNREACH */
+ 102, /* 129: ENETRESET --> 102: ENETRESET */
+ 103, /* 130: ECONNABORTED --> 103: ECONNABORTED */
+ 104, /* 131: ECONNRESET --> 104: ECONNRESET */
+ 105, /* 132: ENOBUFS --> 105: ENOBUFS */
+ 106, /* 133: EISCONN --> 106: EISCONN */
+ 107, /* 134: ENOTCONN --> 107: ENOTCONN */
+ -1, /* 135: Unused Number */
+ -1, /* 136: Unused Number */
+ -1, /* 137: Unused Number */
+ -1, /* 138: Unused Number */
+ -1, /* 139: Unused Number */
+ -1, /* 140: Unused Number */
+ -1, /* 141: Unused Number */
+ -1, /* 142: Unused Number */
+ 108, /* 143: ESHUTDOWN --> 108: ESHUTDOWN */
+ 109, /* 144: ETOOMANYREFS --> 109: ETOOMANYREFS */
+ 110, /* 145: ETIMEDOUT --> 110: ETIMEDOUT */
+ 111, /* 146: ECONNREFUSED --> 111: ECONNREFUSED */
+ 112, /* 147: EHOSTDOWN --> 112: EHOSTDOWN */
+ 113, /* 148: EHOSTUNREACH --> 113: EHOSTUNREACH */
+ 114, /* 149: EALREADY --> 114: EALREADY */
+ 115, /* 150: EINPROGRESS --> 115: EINPROGRESS */
+ 116 /* 151: ESTALE --> 116: ESTALE */
+};
+
+/*
+ * Convert an illumos native error number to a Linux error number and return
+ * it. If no valid conversion is possible, the function fails back to the
+ * value of "defval". In userland, passing a default error number of "-1"
+ * will abort the program if the error number could not be converted.
+ */
+int
+lx_errno(int native_errno, int defval)
+{
+#ifdef _KERNEL
+ VERIFY3S(defval, >=, 0);
+#endif
+
+ if (native_errno < 0 || native_errno >= (sizeof (lx_stol_errno) /
+ sizeof (lx_stol_errno[0]))) {
+#ifndef _KERNEL
+ VERIFY3S(defval, >=, 0);
+#endif
+
+ return (defval);
+ }
+
+ return (lx_stol_errno[native_errno]);
+}
diff --git a/usr/src/common/brand/lx/lx_errno.h b/usr/src/common/brand/lx/lx_errno.h
new file mode 100644
index 0000000000..10b6b3066c
--- /dev/null
+++ b/usr/src/common/brand/lx/lx_errno.h
@@ -0,0 +1,29 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _LX_ERRNO_H
+#define _LX_ERRNO_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int lx_errno(int, int);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_ERRNO_H */
diff --git a/usr/src/common/brand/lx/lx_signum.c b/usr/src/common/brand/lx/lx_signum.c
new file mode 100644
index 0000000000..9c861c282a
--- /dev/null
+++ b/usr/src/common/brand/lx/lx_signum.c
@@ -0,0 +1,339 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/signal.h>
+#include <sys/lx_siginfo.h>
+#include <lx_signum.h>
+#include <sys/debug.h>
+
+/*
+ * Delivering signals to a Linux process is complicated by differences in
+ * signal numbering, stack structure and contents, and the action taken when a
+ * signal handler exits. In addition, many signal-related structures, such as
+ * sigset_ts, vary between Solaris and Linux.
+ *
+ * The simplest transformation that must be done when sending signals is to
+ * translate between Linux and Solaris signal numbers.
+ *
+ * These are the major signal number differences between Linux and Solaris:
+ *
+ * ====================================
+ * | Number | Linux | Solaris |
+ * | ====== | ========= | ========== |
+ * | 7 | SIGBUS | SIGEMT |
+ * | 10 | SIGUSR1 | SIGBUS |
+ * | 12 | SIGUSR2 | SIGSYS |
+ * | 16 | SIGSTKFLT | SIGUSR1 |
+ * | 17 | SIGCHLD | SIGUSR2 |
+ * | 18 | SIGCONT | SIGCHLD |
+ * | 19 | SIGSTOP | SIGPWR |
+ * | 20 | SIGTSTP | SIGWINCH |
+ * | 21 | SIGTTIN | SIGURG |
+ * | 22 | SIGTTOU | SIGPOLL |
+ * | 23 | SIGURG | SIGSTOP |
+ * | 24 | SIGXCPU | SIGTSTP |
+ * | 25 | SIGXFSZ | SIGCONT |
+ * | 26 | SIGVTALARM | SIGTTIN |
+ * | 27 | SIGPROF | SIGTTOU |
+ * | 28 | SIGWINCH | SIGVTALARM |
+ * | 29 | SIGPOLL | SIGPROF |
+ * | 30 | SIGPWR | SIGXCPU |
+ * | 31 | SIGSYS | SIGXFSZ |
+ * ====================================
+ *
+ * Not every Linux signal maps to a Solaris signal, nor does every Solaris
+ * signal map to a Linux counterpart. However, when signals do map, the
+ * mapping is unique.
+ *
+ * One mapping issue is that Linux supports 33 real time signals, with SIGRTMIN
+ * typically starting at or near 32 (SIGRTMIN) and proceeding to 64 (SIGRTMAX)
+ * (SIGRTMIN is "at or near" 32 because glibc usually "steals" one ore more of
+ * these signals for its own internal use, adjusting SIGRTMIN and SIGRTMAX as
+ * needed.) Conversely, Solaris actively uses signals 32-40 for other purposes
+ * and supports exactly 32 real time signals, in the range 41 (SIGRTMIN)
+ * to 72 (SIGRTMAX).
+ *
+ * At present, attempting to translate a Linux signal equal to 63
+ * will generate an error (we allow SIGRTMAX because a program
+ * should be able to send SIGRTMAX without getting an EINVAL, though obviously
+ * anything that loops through the signals from SIGRTMIN to SIGRTMAX will
+ * fail.)
+ *
+ * Similarly, attempting to translate a native Solaris signal in the range
+ * 32-40 will also generate an error as we don't want to support the receipt of
+ * those signals from the Solaris global zone.
+ */
+
+/*
+ * Linux to Solaris signal map
+ *
+ * Usage: solaris_signal = ltos_signum[lx_signal];
+ */
+const int
+ltos_signo[LX_NSIG + 1] = {
+ 0,
+ SIGHUP,
+ SIGINT,
+ SIGQUIT,
+ SIGILL,
+ SIGTRAP,
+ SIGABRT,
+ SIGBUS,
+ SIGFPE,
+ SIGKILL,
+ SIGUSR1,
+ SIGSEGV,
+ SIGUSR2,
+ SIGPIPE,
+ SIGALRM,
+ SIGTERM,
+ SIGEMT, /* 16: Linux SIGSTKFLT; use Solaris SIGEMT */
+ SIGCHLD,
+ SIGCONT,
+ SIGSTOP,
+ SIGTSTP,
+ SIGTTIN,
+ SIGTTOU,
+ SIGURG,
+ SIGXCPU,
+ SIGXFSZ,
+ SIGVTALRM,
+ SIGPROF,
+ SIGWINCH,
+ SIGPOLL,
+ SIGPWR,
+ SIGSYS,
+ _SIGRTMIN, /* 32: Linux SIGRTMIN */
+ _SIGRTMIN + 1,
+ _SIGRTMIN + 2,
+ _SIGRTMIN + 3,
+ _SIGRTMIN + 4,
+ _SIGRTMIN + 5,
+ _SIGRTMIN + 6,
+ _SIGRTMIN + 7,
+ _SIGRTMIN + 8,
+ _SIGRTMIN + 9,
+ _SIGRTMIN + 10,
+ _SIGRTMIN + 11,
+ _SIGRTMIN + 12,
+ _SIGRTMIN + 13,
+ _SIGRTMIN + 14,
+ _SIGRTMIN + 15,
+ _SIGRTMIN + 16,
+ _SIGRTMIN + 17,
+ _SIGRTMIN + 18,
+ _SIGRTMIN + 19,
+ _SIGRTMIN + 20,
+ _SIGRTMIN + 21,
+ _SIGRTMIN + 22,
+ _SIGRTMIN + 23,
+ _SIGRTMIN + 24,
+ _SIGRTMIN + 25,
+ _SIGRTMIN + 26,
+ _SIGRTMIN + 27,
+ _SIGRTMIN + 28,
+ _SIGRTMIN + 29,
+ _SIGRTMIN + 30,
+ _SIGRTMIN + 31,
+ _SIGRTMAX, /* 64: Linux SIGRTMAX */
+};
+
+/*
+ * Solaris to Linux signal map
+ *
+ * Usage: lx_signal = stol_signo[solaris_signal];
+ */
+const int
+stol_signo[NSIG] = {
+ 0,
+ LX_SIGHUP,
+ LX_SIGINT,
+ LX_SIGQUIT,
+ LX_SIGILL,
+ LX_SIGTRAP,
+ LX_SIGABRT,
+ LX_SIGSTKFLT, /* 7: Solaris SIGEMT; use for LX_SIGSTKFLT */
+ LX_SIGFPE,
+ LX_SIGKILL,
+ LX_SIGBUS,
+ LX_SIGSEGV,
+ LX_SIGSYS,
+ LX_SIGPIPE,
+ LX_SIGALRM,
+ LX_SIGTERM,
+ LX_SIGUSR1,
+ LX_SIGUSR2,
+ LX_SIGCHLD,
+ LX_SIGPWR,
+ LX_SIGWINCH,
+ LX_SIGURG,
+ LX_SIGPOLL,
+ LX_SIGSTOP,
+ LX_SIGTSTP,
+ LX_SIGCONT,
+ LX_SIGTTIN,
+ LX_SIGTTOU,
+ LX_SIGVTALRM,
+ LX_SIGPROF,
+ LX_SIGXCPU,
+ LX_SIGXFSZ,
+ -1, /* 32: Solaris SIGWAITING */
+ -1, /* 33: Solaris SIGLWP */
+ -1, /* 34: Solaris SIGFREEZE */
+ -1, /* 35: Solaris SIGTHAW */
+ -1, /* 36: Solaris SIGCANCEL */
+ -1, /* 37: Solaris SIGLOST */
+ -1, /* 38: Solaris SIGXRES */
+ -1, /* 39: Solaris SIGJVM1 */
+ -1, /* 40: Solaris SIGJVM2 */
+ -1, /* 41: Solaris SIGINFO */
+ LX_SIGRTMIN, /* 42: Solaris _SIGRTMIN */
+ LX_SIGRTMIN + 1,
+ LX_SIGRTMIN + 2,
+ LX_SIGRTMIN + 3,
+ LX_SIGRTMIN + 4,
+ LX_SIGRTMIN + 5,
+ LX_SIGRTMIN + 6,
+ LX_SIGRTMIN + 7,
+ LX_SIGRTMIN + 8,
+ LX_SIGRTMIN + 9,
+ LX_SIGRTMIN + 10,
+ LX_SIGRTMIN + 11,
+ LX_SIGRTMIN + 12,
+ LX_SIGRTMIN + 13,
+ LX_SIGRTMIN + 14,
+ LX_SIGRTMIN + 15,
+ LX_SIGRTMIN + 16,
+ LX_SIGRTMIN + 17,
+ LX_SIGRTMIN + 18,
+ LX_SIGRTMIN + 19,
+ LX_SIGRTMIN + 20,
+ LX_SIGRTMIN + 21,
+ LX_SIGRTMIN + 22,
+ LX_SIGRTMIN + 23,
+ LX_SIGRTMIN + 24,
+ LX_SIGRTMIN + 25,
+ LX_SIGRTMIN + 26,
+ LX_SIGRTMIN + 27,
+ LX_SIGRTMIN + 28,
+ LX_SIGRTMIN + 29,
+ LX_SIGRTMIN + 30,
+ LX_SIGRTMIN + 31,
+ LX_SIGRTMAX, /* 74: Solaris _SIGRTMAX */
+};
+
+/*
+ * Convert an illumos native signal number to a Linux signal number and return
+ * it. If no valid conversion is possible, the function fails back to the
+ * value of "defsig". In userland, passing a default signal number of "-1"
+ * will abort the program if the signal number could not be converted.
+ */
+int
+lx_stol_signo(int signo, int defsig)
+{
+ int rval;
+
+#ifdef _KERNEL
+ VERIFY3S(defsig, >=, 0);
+#endif
+
+ if (signo < 0 || signo >= NSIG || (rval = stol_signo[signo]) < 1) {
+#ifndef _KERNEL
+ VERIFY3S(defsig, >=, 0);
+#endif
+ return (defsig);
+ }
+
+ return (rval);
+}
+
+
+/*
+ * Convert a Linux signal number to an illumos signal number and return it.
+ * Error behavior is identical to lx_stol_signo.
+ */
+int
+lx_ltos_signo(int signo, int defsig)
+{
+#ifdef _KERNEL
+ VERIFY3S(defsig, >=, 0);
+#endif
+
+ if (signo < 1 || signo >= NSIG) {
+#ifndef _KERNEL
+ VERIFY3S(defsig, >=, 0);
+#endif
+ return (defsig);
+ }
+
+ return (ltos_signo[signo]);
+}
+
+/*
+ * Convert the "status" field of a SIGCLD siginfo_t. We need to extract the
+ * illumos signal number and convert it to a Linux signal number while leaving
+ * the ptrace(2) event bits intact. In userland, passing a default signal
+ * number of "-1" will abort the program if the signal number could not be
+ * converted, as for lx_stol_signo().
+ */
+int
+lx_stol_status(int s, int defsig)
+{
+ /*
+ * We mask out the top bit here in case PTRACE_O_TRACESYSGOOD
+ * is in use and 0x80 has been ORed with the signal number.
+ */
+ int stat = lx_stol_signo(s & 0x7f, defsig);
+
+ /*
+ * We must mix in the ptrace(2) event which may be stored in
+ * the second byte of the status code. We also re-include the
+ * PTRACE_O_TRACESYSGOOD bit.
+ */
+ return ((s & 0xff80) | stat);
+}
+
+int
+lx_stol_sigcode(int code)
+{
+ switch (code) {
+ case SI_USER:
+ return (LX_SI_USER);
+ case SI_LWP:
+ return (LX_SI_TKILL);
+ case SI_QUEUE:
+ return (LX_SI_QUEUE);
+ case SI_TIMER:
+ return (LX_SI_TIMER);
+ case SI_ASYNCIO:
+ return (LX_SI_ASYNCIO);
+ case SI_MESGQ:
+ return (LX_SI_MESGQ);
+ default:
+ return (code);
+ }
+}
diff --git a/usr/src/common/brand/lx/lx_signum.h b/usr/src/common/brand/lx/lx_signum.h
new file mode 100644
index 0000000000..b6c5f32731
--- /dev/null
+++ b/usr/src/common/brand/lx/lx_signum.h
@@ -0,0 +1,114 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _LX_SIGNUM_H
+#define _LX_SIGNUM_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LX_SIGHUP 1
+#define LX_SIGINT 2
+#define LX_SIGQUIT 3
+#define LX_SIGILL 4
+#define LX_SIGTRAP 5
+#define LX_SIGABRT 6
+#define LX_SIGIOT 6
+#define LX_SIGBUS 7
+#define LX_SIGFPE 8
+#define LX_SIGKILL 9
+#define LX_SIGUSR1 10
+#define LX_SIGSEGV 11
+#define LX_SIGUSR2 12
+#define LX_SIGPIPE 13
+#define LX_SIGALRM 14
+#define LX_SIGTERM 15
+#define LX_SIGSTKFLT 16
+#define LX_SIGCHLD 17
+#define LX_SIGCONT 18
+#define LX_SIGSTOP 19
+#define LX_SIGTSTP 20
+#define LX_SIGTTIN 21
+#define LX_SIGTTOU 22
+#define LX_SIGURG 23
+#define LX_SIGXCPU 24
+#define LX_SIGXFSZ 25
+#define LX_SIGVTALRM 26
+#define LX_SIGPROF 27
+#define LX_SIGWINCH 28
+#define LX_SIGIO 29
+#define LX_SIGPOLL LX_SIGIO
+#define LX_SIGPWR 30
+#define LX_SIGSYS 31
+#define LX_SIGUNUSED 31
+
+#define LX_NSIG 64 /* Linux _NSIG */
+
+#define LX_SIGRTMIN 32
+#define LX_SIGRTMAX LX_NSIG
+
+extern const int ltos_signo[];
+extern const int stol_signo[];
+
+extern int lx_stol_signo(int, int);
+extern int lx_ltos_signo(int, int);
+extern int lx_stol_status(int, int);
+extern int lx_stol_sigcode(int);
+
+/*
+ * NOTE: Linux uses different definitions for 'sigset_t's and 'sigaction_t's
+ * depending on whether the definition is for user space or the kernel.
+ *
+ * The definitions below MUST correspond to the Linux kernel versions,
+ * as glibc will do the necessary translation from the Linux user
+ * versions.
+ */
+#if defined(_LP64)
+#define LX_NSIG_WORDS 1
+#define LX_WSHIFT 6
+#elif defined(_ILP32)
+#define LX_NSIG_WORDS 2
+#define LX_WSHIFT 5
+#else
+#error "LX only supports LP64 and ILP32"
+#endif
+
+typedef struct {
+ ulong_t __bits[LX_NSIG_WORDS];
+} lx_sigset_t;
+
+#define LX_NBITS (sizeof (ulong_t) * NBBY)
+#define lx_sigmask(n) (1UL << (((n) - 1) % LX_NBITS))
+#define lx_sigword(n) (((ulong_t)((n) - 1)) >> LX_WSHIFT)
+#define lx_sigismember(s, n) (lx_sigmask(n) & (s)->__bits[lx_sigword(n)])
+#define lx_sigaddset(s, n) ((s)->__bits[lx_sigword(n)] |= lx_sigmask(n))
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_SIGNUM_H */
diff --git a/usr/src/common/brand/lx/lx_syscall.h b/usr/src/common/brand/lx/lx_syscall.h
new file mode 100644
index 0000000000..54fb196b5a
--- /dev/null
+++ b/usr/src/common/brand/lx/lx_syscall.h
@@ -0,0 +1,95 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _LX_SYSCALL_H
+#define _LX_SYSCALL_H
+
+#include <sys/lx_brand.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * The br_scall_args field of lx_lwp_data is going to be populated with
+ * pointers to structs. The types of these structs should be defined in this
+ * header file. These are Linux specific arguments to system calls that don't
+ * exist in illumos. Each section should be labelled with which system call it
+ * belongs to.
+ */
+
+/* arguments for waitpid(2) */
+/* see comments in usr/src/lib/brand/lx/lx_brand/common/wait.c */
+#define LX_WNOTHREAD 0x20000000 /* Do not wait on siblings' children */
+#define LX_WALL 0x40000000 /* Wait on all children */
+#define LX_WCLONE 0x80000000 /* Wait only on clone children */
+
+/* For arch_prctl(2) */
+#define LX_ARCH_SET_GS 0x1001
+#define LX_ARCH_SET_FS 0x1002
+#define LX_ARCH_GET_FS 0x1003
+#define LX_ARCH_GET_GS 0x1004
+
+/*
+ * For ptrace(2):
+ */
+#define LX_PTRACE_TRACEME 0
+#define LX_PTRACE_PEEKTEXT 1
+#define LX_PTRACE_PEEKDATA 2
+#define LX_PTRACE_PEEKUSER 3
+#define LX_PTRACE_POKETEXT 4
+#define LX_PTRACE_POKEDATA 5
+#define LX_PTRACE_POKEUSER 6
+#define LX_PTRACE_CONT 7
+#define LX_PTRACE_KILL 8
+#define LX_PTRACE_SINGLESTEP 9
+#define LX_PTRACE_GETREGS 12
+#define LX_PTRACE_SETREGS 13
+#define LX_PTRACE_GETFPREGS 14
+#define LX_PTRACE_SETFPREGS 15
+#define LX_PTRACE_ATTACH 16
+#define LX_PTRACE_DETACH 17
+#define LX_PTRACE_GETFPXREGS 18
+#define LX_PTRACE_SETFPXREGS 19
+#define LX_PTRACE_SYSCALL 24
+#define LX_PTRACE_SETOPTIONS 0x4200
+#define LX_PTRACE_GETEVENTMSG 0x4201
+#define LX_PTRACE_GETSIGINFO 0x4202
+
+/*
+ * For clone(2):
+ */
+#define LX_CSIGNAL 0x000000ff
+#define LX_CLONE_VM 0x00000100
+#define LX_CLONE_FS 0x00000200
+#define LX_CLONE_FILES 0x00000400
+#define LX_CLONE_SIGHAND 0x00000800
+#define LX_CLONE_PID 0x00001000
+#define LX_CLONE_PTRACE 0x00002000
+#define LX_CLONE_VFORK 0x00004000
+#define LX_CLONE_PARENT 0x00008000
+#define LX_CLONE_THREAD 0x00010000
+#define LX_CLONE_SYSVSEM 0x00040000
+#define LX_CLONE_SETTLS 0x00080000
+#define LX_CLONE_PARENT_SETTID 0x00100000
+#define LX_CLONE_CHILD_CLEARTID 0x00200000
+#define LX_CLONE_DETACH 0x00400000
+#define LX_CLONE_CHILD_SETTID 0x01000000
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_SYSCALL_H */
diff --git a/usr/src/common/brand/lx/tools/Makefile b/usr/src/common/brand/lx/tools/Makefile
new file mode 100644
index 0000000000..5ad1240c55
--- /dev/null
+++ b/usr/src/common/brand/lx/tools/Makefile
@@ -0,0 +1,42 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+PROG = gen_errno
+
+include ../../../../cmd/Makefile.cmd
+
+OBJS = gen_errno.o
+
+CLOBBERFILES += $(PROG)
+
+NATIVECC_CFLAGS += $(CFLAGS) $(CCVERBOSE)
+NATIVECC_LDLIBS += -lcmdutils -lnvpair
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+install: all
+
+lint: lint_PROG
+
+clean:
+ $(RM) $(OBJS)
+
+$(PROG): $(OBJS)
+ $(NATIVECC) $(NATIVECC_CFLAGS) $(NATIVECC_LDLIBS) $(OBJS) -o $@
+ $(POST_PROCESS)
+
+include ../../../../cmd/Makefile.targ
diff --git a/usr/src/common/brand/lx/tools/README.md b/usr/src/common/brand/lx/tools/README.md
new file mode 100644
index 0000000000..5e4976f200
--- /dev/null
+++ b/usr/src/common/brand/lx/tools/README.md
@@ -0,0 +1,39 @@
+# Updating Error Number Translations
+
+To create an updated error number translation table, you can use the
+`gen_errno` tool. This tool requires, as input:
+
+* the illumos native `errno.h` file
+* a set of foreign operating system `errno.h` files
+
+The output is a set of translation table entries suitable for inclusion in a
+cstyled C array. The index of the array is the native error number and the
+value at each index is the translated error number for use with the foreign
+operating system.
+
+## Example
+
+To generate a translation table for the LX Brand, you will require two files
+from the current Linux source:
+
+* `include/uapi/asm-generic/errno-base.h` (low-valued, or base, error numbers)
+* `include/uapi/asm-generic/errno.h` (extended error numbers)
+
+Assuming the files are in the current directory, you should run the tool as
+follows:
+
+ $ dmake
+ ...
+ $ ./gen_errno -F errno-base.h -F errno.h \
+ -N $SRC/uts/common/sys/errno.h
+ 0, /* 0: No Error */
+ 1, /* 1: EPERM --> 1: EPERM */
+ 2, /* 2: ENOENT --> 2: ENOENT */
+ 3, /* 3: ESRCH --> 3: ESRCH */
+ 4, /* 4: EINTR --> 4: EINTR */
+ 5, /* 5: EIO --> 5: EIO */
+ 6, /* 6: ENXIO --> 6: ENXIO */
+ 7, /* 7: E2BIG --> 7: E2BIG */
+ ...
+
+The output may be used in the `$SRC/common/brand/lx/lx_errno.c` file.
diff --git a/usr/src/common/brand/lx/tools/gen_errno.c b/usr/src/common/brand/lx/tools/gen_errno.c
new file mode 100644
index 0000000000..52b2d76d24
--- /dev/null
+++ b/usr/src/common/brand/lx/tools/gen_errno.c
@@ -0,0 +1,444 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * Take the error number definitions from a foreign system and generate a
+ * translation table that converts illumos native error numbers to foreign
+ * system error numbers.
+ */
+
+#include <ctype.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <err.h>
+#include <sys/sysmacros.h>
+#include <libcmdutils.h>
+#include <libnvpair.h>
+
+nvlist_t *native_errors;
+nvlist_t *foreign_errors;
+
+struct override {
+ const char *ovr_from;
+ const char *ovr_to;
+} overrides[] = {
+ { "ENOTSUP", "ENOSYS" },
+ { 0 }
+};
+
+static const char *
+lookup_override(const char *from)
+{
+ int i;
+
+ for (i = 0; overrides[i].ovr_from != NULL; i++) {
+ if (strcmp(overrides[i].ovr_from, from) == 0) {
+ return (overrides[i].ovr_to);
+ }
+ }
+
+ return (NULL);
+}
+
+static int
+parse_int(const char *number, int *rval)
+{
+ long n;
+ char *endpos;
+
+ errno = 0;
+ if ((n = strtol(number, &endpos, 10)) == 0 && errno != 0) {
+ return (-1);
+ }
+
+ if (endpos != NULL && *endpos != '\0') {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if (n > INT_MAX || n < INT_MIN) {
+ errno = EOVERFLOW;
+ return (-1);
+ }
+
+ *rval = (int)n;
+ return (0);
+}
+
+static int
+errnum_add(nvlist_t *nvl, const char *name, const char *number)
+{
+ int val;
+
+ if (nvlist_exists(nvl, name)) {
+ (void) fprintf(stderr, "ERROR: duplicate definition: %s -> "
+ "%s\n", name, number);
+ errno = EEXIST;
+ return (-1);
+ }
+
+ /*
+ * Try and parse the error number:
+ */
+ if (parse_int(number, &val) == 0) {
+ /*
+ * The name refers to a number.
+ */
+ if (nvlist_add_int32(nvl, name, val) != 0) {
+ (void) fprintf(stderr, "ERROR: nvlist_add_int32: %s\n",
+ strerror(errno));
+ return (-1);
+ }
+ } else {
+ /*
+ * The name refers to another definition.
+ */
+ if (nvlist_add_string(nvl, name, number) != 0) {
+ (void) fprintf(stderr, "ERROR: nvlist_add_string: %s\n",
+ strerror(errno));
+ return (-1);
+ }
+ }
+
+ return (0);
+}
+
+static int
+errnum_max(nvlist_t *nvl)
+{
+ int max = 0;
+ nvpair_t *nvp = NULL;
+
+ while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
+ if (nvpair_type(nvp) != DATA_TYPE_INT32) {
+ continue;
+ }
+
+ max = MAX(fnvpair_value_int32(nvp), max);
+ }
+
+ return (max);
+}
+
+static int
+errname_by_num(nvlist_t *nvl, int num, const char **name)
+{
+ nvpair_t *nvp = NULL;
+
+ while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
+ if (nvpair_type(nvp) != DATA_TYPE_INT32) {
+ continue;
+ }
+
+ if (fnvpair_value_int32(nvp) == num) {
+ *name = nvpair_name(nvp);
+ return (0);
+ }
+ }
+
+ errno = ENOENT;
+ return (-1);
+}
+
+static int
+errno_by_name(nvlist_t *nvl, const char *name, int *rval, const char **rname)
+{
+ nvpair_t *nvp = NULL;
+
+ if (nvlist_lookup_nvpair(nvl, name, &nvp) != 0) {
+ errno = ENOENT;
+ return (-1);
+ }
+
+ if (nvpair_type(nvp) == DATA_TYPE_STRING) {
+ return (errno_by_name(nvl, fnvpair_value_string(nvp), rval,
+ rname));
+ } else {
+ *rval = fnvpair_value_int32(nvp);
+ if (rname != NULL) {
+ *rname = name;
+ }
+ return (0);
+ }
+}
+
+static int
+process_line(const char *line, nvlist_t *nvl)
+{
+ custr_t *nam = NULL, *num = NULL;
+ const char *c = line;
+
+ if (custr_alloc(&nam) != 0 || custr_alloc(&num) != 0) {
+ int en = errno;
+
+ custr_free(nam);
+ custr_free(num);
+
+ errno = en;
+ return (-1);
+ }
+
+ /*
+ * Valid lines begin with "#define":
+ */
+ if (*c++ != '#' || *c++ != 'd' || *c++ != 'e' || *c++ != 'f' ||
+ *c++ != 'i' || *c++ != 'n' || *c++ != 'e') {
+ return (0);
+ }
+
+ /*
+ * Eat whitespace:
+ */
+ for (;;) {
+ if (*c == '\0') {
+ return (0);
+ }
+
+ if (*c != ' ' && *c != '\t') {
+ break;
+ }
+
+ c++;
+ }
+
+ /*
+ * Read error number token:
+ */
+ for (;;) {
+ if (*c == '\0') {
+ return (0);
+ }
+
+ if (*c == ' ' || *c == '\t') {
+ break;
+ }
+
+ if (custr_appendc(nam, *c) != 0) {
+ return (-1);
+ }
+
+ c++;
+ }
+
+ /*
+ * Eat whitespace:
+ */
+ for (;;) {
+ if (*c == '\0') {
+ return (0);
+ }
+
+ if (*c != ' ' && *c != '\t') {
+ break;
+ }
+
+ c++;
+ }
+
+ /*
+ * Read error number token:
+ */
+ for (;;) {
+ if (*c == '\0') {
+ break;
+ }
+
+ if (*c == ' ' || *c == '\t') {
+ break;
+ }
+
+ if (custr_appendc(num, *c) != 0) {
+ return (-1);
+ }
+
+ c++;
+ }
+
+ return (errnum_add(nvl, custr_cstr(nam), custr_cstr(num)));
+}
+
+static int
+read_file_into_list(const char *path, nvlist_t *nvl)
+{
+ int rval = 0, en = 0;
+ FILE *f;
+ custr_t *cu = NULL;
+
+ if (custr_alloc(&cu) != 0) {
+ return (-1);
+ }
+
+ if ((f = fopen(path, "r")) == NULL) {
+ custr_free(cu);
+ return (-1);
+ }
+
+ for (;;) {
+ int c;
+
+ errno = 0;
+ switch (c = fgetc(f)) {
+ case '\n':
+ case EOF:
+ if (errno != 0) {
+ en = errno;
+ rval = -1;
+ goto out;
+ }
+ if (process_line(custr_cstr(cu), nvl) != 0) {
+ en = errno;
+ rval = -1;
+ goto out;
+ }
+ custr_reset(cu);
+ if (c == EOF) {
+ goto out;
+ }
+ break;
+
+ case '\r':
+ case '\0':
+ /*
+ * Ignore these characters.
+ */
+ break;
+
+ default:
+ if (custr_appendc(cu, c) != 0) {
+ en = errno;
+ rval = -1;
+ goto out;
+ }
+ break;
+ }
+ }
+
+out:
+ (void) fclose(f);
+ custr_free(cu);
+ errno = en;
+ return (rval);
+}
+
+int
+main(int argc, char **argv)
+{
+ int max;
+ int fval;
+ int c;
+
+ if (nvlist_alloc(&native_errors, NV_UNIQUE_NAME, 0) != 0 ||
+ nvlist_alloc(&foreign_errors, NV_UNIQUE_NAME, 0) != 0) {
+ err(1, "could not allocate memory");
+ }
+
+ while ((c = getopt(argc, argv, ":N:F:")) != -1) {
+ switch (c) {
+ case 'N':
+ if (read_file_into_list(optarg, native_errors) != 0) {
+ err(1, "could not read file: %s", optarg);
+ }
+ break;
+
+ case 'F':
+ if (read_file_into_list(optarg, foreign_errors) != 0) {
+ err(1, "could not read file: %s", optarg);
+ }
+ break;
+
+ case ':':
+ errx(1, "option -%c requires an operand", c);
+ break;
+
+ case '?':
+ errx(1, "option -%c unrecognised", c);
+ break;
+ }
+ }
+
+ /*
+ * Print an array entry for each error number:
+ */
+ max = errnum_max(native_errors);
+ for (fval = 0; fval <= max; fval++) {
+ const char *fname;
+ const char *tname = NULL;
+ int32_t tval;
+ const char *msg = NULL;
+ const char *comma = (fval != max) ? "," : "";
+
+ if (errname_by_num(native_errors, fval, &fname) == -1) {
+ fname = NULL;
+ }
+
+ if (fval == 0) {
+ /*
+ * The error number "0" is special: it means no worries.
+ */
+ msg = "No Error";
+ tval = 0;
+ } else if (fname == NULL) {
+ /*
+ * There is no defined name for this error number; it
+ * is unused.
+ */
+ msg = "Unused Number";
+ tval = -1;
+ } else {
+ /*
+ * Check if we want to override the name of this error
+ * in the foreign error number lookup:
+ */
+ const char *oname = lookup_override(fname);
+
+ /*
+ * Do the lookup:
+ */
+ if (errno_by_name(foreign_errors, oname != NULL ?
+ oname : fname, &tval, &tname) != 0) {
+ /*
+ * There was no foreign error number by that
+ * name.
+ */
+ tname = "No Analogue";
+ tval = -2;
+ }
+ }
+
+ if (msg == NULL) {
+ size_t flen = strlen(fname);
+ size_t tlen = strlen(tname);
+ const char *t = flen > 7 ? "\t" : "\t\t";
+ const char *tt = tlen < 7 ? "\t\t\t" : tlen < 15 ?
+ "\t\t" : "\t";
+
+ (void) fprintf(stdout, "\t%d%s\t/* %3d: %s%s--> %3d: "
+ "%s%s*/\n", tval, comma, fval, fname, t, tval,
+ tname, tt);
+ } else {
+ const char *t = "\t\t\t\t\t";
+
+ (void) fprintf(stdout, "\t%d%s\t/* %3d: %s%s*/\n", tval,
+ comma, fval, msg, t);
+ }
+ }
+
+ (void) nvlist_free(native_errors);
+ (void) nvlist_free(foreign_errors);
+
+ return (0);
+}
diff --git a/usr/src/common/ctf/ctf_create.c b/usr/src/common/ctf/ctf_create.c
index 239d166f44..4857070db1 100644
--- a/usr/src/common/ctf/ctf_create.c
+++ b/usr/src/common/ctf/ctf_create.c
@@ -25,7 +25,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc.
*/
#include <sys/sysmacros.h>
@@ -86,6 +86,48 @@ ctf_create(int *errp)
return (fp);
}
+ctf_file_t *
+ctf_fdcreate(int fd, int *errp)
+{
+ ctf_file_t *fp;
+ static const ctf_header_t hdr = { { CTF_MAGIC, CTF_VERSION, 0 } };
+
+ const ulong_t hashlen = 128;
+ ctf_dtdef_t **hash;
+ ctf_sect_t cts;
+
+ if (fd == -1)
+ return (ctf_create(errp));
+
+ hash = ctf_alloc(hashlen * sizeof (ctf_dtdef_t *));
+
+ if (hash == NULL)
+ return (ctf_set_open_errno(errp, EAGAIN));
+
+ cts.cts_name = _CTF_SECTION;
+ cts.cts_type = SHT_PROGBITS;
+ cts.cts_flags = 0;
+ cts.cts_data = &hdr;
+ cts.cts_size = sizeof (hdr);
+ cts.cts_entsize = 1;
+ cts.cts_offset = 0;
+
+ if ((fp = ctf_fdcreate_int(fd, errp, &cts)) == NULL) {
+ ctf_free(hash, hashlen * sizeof (ctf_dtdef_t *));
+ return (NULL);
+ }
+
+ fp->ctf_flags |= LCTF_RDWR;
+ fp->ctf_dthashlen = hashlen;
+ bzero(hash, hashlen * sizeof (ctf_dtdef_t *));
+ fp->ctf_dthash = hash;
+ fp->ctf_dtstrlen = sizeof (_CTF_STRTAB_TEMPLATE);
+ fp->ctf_dtnextid = 1;
+ fp->ctf_dtoldid = 0;
+
+ return (fp);
+}
+
static uchar_t *
ctf_copy_smembers(ctf_dtdef_t *dtd, uint_t soff, uchar_t *t)
{
@@ -236,14 +278,24 @@ int
ctf_update(ctf_file_t *fp)
{
ctf_file_t ofp, *nfp;
- ctf_header_t hdr;
+ ctf_header_t hdr, *bhdr;
ctf_dtdef_t *dtd;
- ctf_sect_t cts;
+ ctf_dsdef_t *dsd;
+ ctf_dldef_t *dld;
+ ctf_sect_t cts, *symp, *strp;
uchar_t *s, *s0, *t;
- size_t size;
+ ctf_lblent_t *label;
+ uint16_t *obj, *func;
+ size_t size, objsize, funcsize, labelsize, plen;
void *buf;
int err;
+ ulong_t i;
+ const char *plabel;
+ const char *sname;
+
+ uintptr_t symbase = (uintptr_t)fp->ctf_symtab.cts_data;
+ uintptr_t strbase = (uintptr_t)fp->ctf_strtab.cts_data;
if (!(fp->ctf_flags & LCTF_RDWR))
return (ctf_set_errno(fp, ECTF_RDONLY));
@@ -261,8 +313,26 @@ ctf_update(ctf_file_t *fp)
hdr.cth_magic = CTF_MAGIC;
hdr.cth_version = CTF_VERSION;
- if (fp->ctf_flags & LCTF_CHILD)
- hdr.cth_parname = 1; /* i.e. _CTF_STRTAB_TEMPLATE[1] */
+ if (fp->ctf_flags & LCTF_CHILD) {
+ if (fp->ctf_parname == NULL) {
+ plen = 0;
+ hdr.cth_parname = 1; /* i.e. _CTF_STRTAB_TEMPLATE[1] */
+ plabel = NULL;
+ } else {
+ plen = strlen(fp->ctf_parname) + 1;
+ plabel = ctf_label_topmost(fp->ctf_parent);
+ }
+ } else {
+ plabel = NULL;
+ plen = 0;
+ }
+
+ /*
+ * Iterate over the labels that we have.
+ */
+ for (labelsize = 0, dld = ctf_list_next(&fp->ctf_dldefs);
+ dld != NULL; dld = ctf_list_next(dld))
+ labelsize += sizeof (ctf_lblent_t);
/*
* Iterate through the dynamic type definition list and compute the
@@ -304,25 +374,121 @@ ctf_update(ctf_file_t *fp)
}
/*
+ * An entry for each object must exist in the data section. However, if
+ * the symbol is SHN_UNDEF, then it is skipped. For objects, the storage
+ * is just the size of the 2-byte id. For functions it's always 2 bytes,
+ * plus 2 bytes per argument and the return type.
+ */
+ dsd = ctf_list_next(&fp->ctf_dsdefs);
+ for (objsize = 0, funcsize = 0, i = 0; i < fp->ctf_nsyms; i++) {
+ int type;
+
+ if (fp->ctf_symtab.cts_entsize == sizeof (Elf32_Sym)) {
+ const Elf32_Sym *symp = (Elf32_Sym *)symbase + i;
+
+ type = ELF32_ST_TYPE(symp->st_info);
+ if (ctf_sym_valid(strbase, type, symp->st_shndx,
+ symp->st_value, symp->st_name) == B_FALSE)
+ continue;
+ } else {
+ const Elf64_Sym *symp = (Elf64_Sym *)symbase + i;
+
+ type = ELF64_ST_TYPE(symp->st_info);
+ if (ctf_sym_valid(strbase, type, symp->st_shndx,
+ symp->st_value, symp->st_name) == B_FALSE)
+ continue;
+ }
+
+ while (dsd != NULL && i > dsd->dts_symidx)
+ dsd = ctf_list_next(dsd);
+ if (type == STT_OBJECT) {
+ objsize += sizeof (uint16_t);
+ } else {
+ /* Every function has a uint16_t info no matter what */
+ if (dsd == NULL || i < dsd->dts_symidx) {
+ funcsize += sizeof (uint16_t);
+ } else {
+ funcsize += sizeof (uint16_t) *
+ (dsd->dts_nargs + 2);
+ }
+ }
+ }
+
+ /*
+ * The objtoff and funcoffset must be 2-byte aligned. We're guaranteed
+ * that this is always true for the objtoff because labels are always 8
+ * bytes large. Similarly, because objects are always two bytes of data,
+ * this will always be true for funcoff.
+ */
+ hdr.cth_objtoff = hdr.cth_lbloff + labelsize;
+ hdr.cth_funcoff = hdr.cth_objtoff + objsize;
+
+ /*
+ * The type offset must be 4 byte aligned.
+ */
+ hdr.cth_typeoff = hdr.cth_funcoff + funcsize;
+ if (hdr.cth_typeoff & 3)
+ hdr.cth_typeoff += 4 - (hdr.cth_typeoff & 3);
+ ASSERT((hdr.cth_typeoff & 3) == 0);
+
+ /*
* Fill in the string table offset and size, compute the size of the
* entire CTF buffer we need, and then allocate a new buffer and
* bcopy the finished header to the start of the buffer.
*/
hdr.cth_stroff = hdr.cth_typeoff + size;
- hdr.cth_strlen = fp->ctf_dtstrlen;
+ hdr.cth_strlen = fp->ctf_dtstrlen + plen;
size = sizeof (ctf_header_t) + hdr.cth_stroff + hdr.cth_strlen;
+ ctf_dprintf("lbloff: %d\nobjtoff: %d\nfuncoff: %d\n"
+ "typeoff: %d\nstroff: %d\nstrlen: %d\n",
+ hdr.cth_lbloff, hdr.cth_objtoff, hdr.cth_funcoff,
+ hdr.cth_typeoff, hdr.cth_stroff, hdr.cth_strlen);
if ((buf = ctf_data_alloc(size)) == MAP_FAILED)
return (ctf_set_errno(fp, EAGAIN));
bcopy(&hdr, buf, sizeof (ctf_header_t));
- t = (uchar_t *)buf + sizeof (ctf_header_t);
+ bhdr = buf;
+ label = (ctf_lblent_t *)((uintptr_t)buf + sizeof (ctf_header_t));
+ t = (uchar_t *)buf + sizeof (ctf_header_t) + hdr.cth_typeoff;
s = s0 = (uchar_t *)buf + sizeof (ctf_header_t) + hdr.cth_stroff;
+ obj = (uint16_t *)((uintptr_t)buf + sizeof (ctf_header_t) +
+ hdr.cth_objtoff);
+ func = (uint16_t *)((uintptr_t)buf + sizeof (ctf_header_t) +
+ hdr.cth_funcoff);
bcopy(_CTF_STRTAB_TEMPLATE, s, sizeof (_CTF_STRTAB_TEMPLATE));
s += sizeof (_CTF_STRTAB_TEMPLATE);
/*
+ * We have an actual parent name and we're a child container, therefore
+ * we should make sure to note our parent's name here.
+ */
+ if (plen != 0) {
+ VERIFY(s + plen - s0 <= hdr.cth_strlen);
+ bcopy(fp->ctf_parname, s, plen);
+ bhdr->cth_parname = s - s0;
+ s += plen;
+ }
+
+ /*
+ * First pass over the labels and copy them out.
+ */
+ for (dld = ctf_list_next(&fp->ctf_dldefs); dld != NULL;
+ dld = ctf_list_next(dld), label++) {
+ size_t len = strlen(dld->dld_name) + 1;
+
+ VERIFY(s + len - s0 <= hdr.cth_strlen);
+ bcopy(dld->dld_name, s, len);
+ label->ctl_typeidx = dld->dld_type;
+ label->ctl_label = s - s0;
+ s += len;
+
+ if (plabel != NULL && strcmp(plabel, dld->dld_name) == 0)
+ bhdr->cth_parlabel = label->ctl_label;
+ }
+
+ /*
* We now take a final lap through the dynamic type definition list and
* copy the appropriate type records and strings to the output buffer.
*/
@@ -339,6 +505,7 @@ ctf_update(ctf_file_t *fp)
if (dtd->dtd_name != NULL) {
dtd->dtd_data.ctt_name = (uint_t)(s - s0);
len = strlen(dtd->dtd_name) + 1;
+ VERIFY(s + len - s0 <= hdr.cth_strlen);
bcopy(dtd->dtd_name, s, len);
s += len;
} else
@@ -411,6 +578,61 @@ ctf_update(ctf_file_t *fp)
}
/*
+ * Now we fill in our dynamic data and function sections. We use the
+ * same criteria as above, but also consult the dsd list.
+ */
+ dsd = ctf_list_next(&fp->ctf_dsdefs);
+ for (i = 0; i < fp->ctf_nsyms; i++) {
+ int type;
+ if (fp->ctf_symtab.cts_entsize == sizeof (Elf32_Sym)) {
+ const Elf32_Sym *symp = (Elf32_Sym *)symbase + i;
+ type = ELF32_ST_TYPE(symp->st_info);
+
+ if (ctf_sym_valid(strbase, type, symp->st_shndx,
+ symp->st_value, symp->st_name) == B_FALSE)
+ continue;
+ } else {
+ const Elf64_Sym *symp = (Elf64_Sym *)symbase + i;
+ type = ELF64_ST_TYPE(symp->st_info);
+ if (ctf_sym_valid(strbase, type, symp->st_shndx,
+ symp->st_value, symp->st_name) == B_FALSE)
+ continue;
+ }
+
+ while (dsd != NULL && i > dsd->dts_symidx) {
+ dsd = ctf_list_next(dsd);
+ }
+ if (type == STT_OBJECT) {
+ if (dsd == NULL || i < dsd->dts_symidx) {
+ *obj = 0;
+ } else {
+ *obj = dsd->dts_tid;
+ }
+ obj++;
+ VERIFY((uintptr_t)obj <= (uintptr_t)func);
+ } else {
+ if (dsd == NULL || i < dsd->dts_symidx) {
+ ushort_t data = CTF_TYPE_INFO(CTF_K_UNKNOWN,
+ 0, 0);
+ *func = data;
+ func++;
+ } else {
+ int j;
+ ushort_t data = CTF_TYPE_INFO(CTF_K_FUNCTION, 0,
+ dsd->dts_nargs);
+
+ *func = data;
+ func++;
+ *func = dsd->dts_tid;
+ func++;
+ for (j = 0; j < dsd->dts_nargs; j++)
+ func[j] = dsd->dts_argc[j];
+ func += dsd->dts_nargs;
+ }
+ }
+ }
+
+ /*
* Finally, we are ready to ctf_bufopen() the new container. If this
* is successful, we then switch nfp and fp and free the old container.
*/
@@ -423,7 +645,15 @@ ctf_update(ctf_file_t *fp)
cts.cts_entsize = 1;
cts.cts_offset = 0;
- if ((nfp = ctf_bufopen(&cts, NULL, NULL, &err)) == NULL) {
+ if (fp->ctf_nsyms == 0) {
+ symp = NULL;
+ strp = NULL;
+ } else {
+ symp = &fp->ctf_symtab;
+ strp = &fp->ctf_strtab;
+ }
+
+ if ((nfp = ctf_bufopen(&cts, symp, strp, &err)) == NULL) {
ctf_data_free(buf, size);
return (ctf_set_errno(fp, err));
}
@@ -433,10 +663,11 @@ ctf_update(ctf_file_t *fp)
nfp->ctf_refcnt = fp->ctf_refcnt;
nfp->ctf_flags |= fp->ctf_flags & ~LCTF_DIRTY;
- nfp->ctf_data.cts_data = NULL; /* force ctf_data_free() on close */
nfp->ctf_dthash = fp->ctf_dthash;
nfp->ctf_dthashlen = fp->ctf_dthashlen;
nfp->ctf_dtdefs = fp->ctf_dtdefs;
+ nfp->ctf_dsdefs = fp->ctf_dsdefs;
+ nfp->ctf_dldefs = fp->ctf_dldefs;
nfp->ctf_dtstrlen = fp->ctf_dtstrlen;
nfp->ctf_dtnextid = fp->ctf_dtnextid;
nfp->ctf_dtoldid = fp->ctf_dtnextid - 1;
@@ -445,6 +676,23 @@ ctf_update(ctf_file_t *fp)
fp->ctf_dthash = NULL;
fp->ctf_dthashlen = 0;
bzero(&fp->ctf_dtdefs, sizeof (ctf_list_t));
+ bzero(&fp->ctf_dsdefs, sizeof (ctf_list_t));
+ bzero(&fp->ctf_dldefs, sizeof (ctf_list_t));
+
+ /*
+ * Because the various containers share the data sections, we don't want
+ * to have ctf_close free it all. However, the name of the section is in
+ * fact unique to the ctf_sect_t. Thus we save the names of the symbol
+ * and string sections around the bzero() and restore them afterwards,
+ * ensuring that we don't result in a memory leak.
+ */
+ sname = fp->ctf_symtab.cts_name;
+ bzero(&fp->ctf_symtab, sizeof (ctf_sect_t));
+ fp->ctf_symtab.cts_name = sname;
+
+ sname = fp->ctf_strtab.cts_name;
+ bzero(&fp->ctf_strtab, sizeof (ctf_sect_t));
+ fp->ctf_strtab.cts_name = sname;
bcopy(fp, &ofp, sizeof (ctf_file_t));
bcopy(nfp, fp, sizeof (ctf_file_t));
@@ -563,6 +811,101 @@ ctf_dtd_lookup(ctf_file_t *fp, ctf_id_t type)
return (dtd);
}
+ctf_dsdef_t *
+ctf_dsd_lookup(ctf_file_t *fp, ulong_t idx)
+{
+ ctf_dsdef_t *dsd;
+
+ for (dsd = ctf_list_next(&fp->ctf_dsdefs); dsd != NULL;
+ dsd = ctf_list_next(dsd)) {
+ if (dsd->dts_symidx == idx)
+ return (dsd);
+ }
+
+ return (NULL);
+}
+
+/*
+ * We order the ctf_dsdef_t by symbol index to make things better for updates.
+ */
+void
+ctf_dsd_insert(ctf_file_t *fp, ctf_dsdef_t *dsd)
+{
+ ctf_dsdef_t *i;
+
+ for (i = ctf_list_next(&fp->ctf_dsdefs); i != NULL;
+ i = ctf_list_next(i)) {
+ if (i->dts_symidx > dsd->dts_symidx)
+ break;
+ }
+
+ if (i == NULL) {
+ ctf_list_append(&fp->ctf_dsdefs, dsd);
+ return;
+ }
+
+ ctf_list_insert_before(&fp->ctf_dsdefs, i, dsd);
+}
+
+/* ARGSUSED */
+void
+ctf_dsd_delete(ctf_file_t *fp, ctf_dsdef_t *dsd)
+{
+ if (dsd->dts_nargs > 0)
+ ctf_free(dsd->dts_argc,
+ sizeof (ctf_id_t) * dsd->dts_nargs);
+ ctf_list_delete(&fp->ctf_dsdefs, dsd);
+ ctf_free(dsd, sizeof (ctf_dsdef_t));
+}
+
+ctf_dldef_t *
+ctf_dld_lookup(ctf_file_t *fp, const char *name)
+{
+ ctf_dldef_t *dld;
+
+ for (dld = ctf_list_next(&fp->ctf_dldefs); dld != NULL;
+ dld = ctf_list_next(dld)) {
+ if (strcmp(name, dld->dld_name) == 0)
+ return (dld);
+ }
+
+ return (NULL);
+}
+
+void
+ctf_dld_insert(ctf_file_t *fp, ctf_dldef_t *dld, uint_t pos)
+{
+ ctf_dldef_t *l;
+
+ if (pos == 0) {
+ ctf_list_prepend(&fp->ctf_dldefs, dld);
+ return;
+ }
+
+ for (l = ctf_list_next(&fp->ctf_dldefs); pos != 0 && dld != NULL;
+ l = ctf_list_next(l), pos--)
+ ;
+
+ if (l == NULL)
+ ctf_list_append(&fp->ctf_dldefs, dld);
+ else
+ ctf_list_insert_before(&fp->ctf_dsdefs, l, dld);
+}
+
+void
+ctf_dld_delete(ctf_file_t *fp, ctf_dldef_t *dld)
+{
+ ctf_list_delete(&fp->ctf_dldefs, dld);
+
+ if (dld->dld_name != NULL) {
+ size_t len = strlen(dld->dld_name) + 1;
+ ctf_free(dld->dld_name, len);
+ fp->ctf_dtstrlen -= len;
+ }
+
+ ctf_free(dld, sizeof (ctf_dldef_t));
+}
+
/*
* Discard all of the dynamic type definitions that have been added to the
* container since the last call to ctf_update(). We locate such types by
@@ -583,10 +926,10 @@ ctf_discard(ctf_file_t *fp)
return (0); /* no update required */
for (dtd = ctf_list_prev(&fp->ctf_dtdefs); dtd != NULL; dtd = ntd) {
+ ntd = ctf_list_prev(dtd);
if (dtd->dtd_type <= fp->ctf_dtoldid)
continue; /* skip types that have been committed */
- ntd = ctf_list_prev(dtd);
ctf_dtd_delete(fp, dtd);
}
@@ -637,26 +980,7 @@ ctf_add_generic(ctf_file_t *fp, uint_t flag, const char *name, ctf_dtdef_t **rp)
return (type);
}
-/*
- * When encoding integer sizes, we want to convert a byte count in the range
- * 1-8 to the closest power of 2 (e.g. 3->4, 5->8, etc). The clp2() function
- * is a clever implementation from "Hacker's Delight" by Henry Warren, Jr.
- */
-static size_t
-clp2(size_t x)
-{
- x--;
-
- x |= (x >> 1);
- x |= (x >> 2);
- x |= (x >> 4);
- x |= (x >> 8);
- x |= (x >> 16);
-
- return (x + 1);
-}
-
-static ctf_id_t
+ctf_id_t
ctf_add_encoded(ctf_file_t *fp, uint_t flag,
const char *name, const ctf_encoding_t *ep, uint_t kind)
{
@@ -670,14 +994,22 @@ ctf_add_encoded(ctf_file_t *fp, uint_t flag,
return (CTF_ERR); /* errno is set for us */
dtd->dtd_data.ctt_info = CTF_TYPE_INFO(kind, flag, 0);
- dtd->dtd_data.ctt_size = clp2(P2ROUNDUP(ep->cte_bits, NBBY) / NBBY);
+
+ /*
+ * If the type's size is not an even number of bytes, then we should
+ * round up the type size to the nearest byte.
+ */
+ dtd->dtd_data.ctt_size = ep->cte_bits / NBBY;
+ if ((ep->cte_bits % NBBY) != 0)
+ dtd->dtd_data.ctt_size++;
dtd->dtd_u.dtu_enc = *ep;
return (type);
}
-static ctf_id_t
-ctf_add_reftype(ctf_file_t *fp, uint_t flag, ctf_id_t ref, uint_t kind)
+ctf_id_t
+ctf_add_reftype(ctf_file_t *fp, uint_t flag,
+ const char *name, ctf_id_t ref, uint_t kind)
{
ctf_dtdef_t *dtd;
ctf_id_t type;
@@ -685,7 +1017,7 @@ ctf_add_reftype(ctf_file_t *fp, uint_t flag, ctf_id_t ref, uint_t kind)
if (ref == CTF_ERR || ref < 0 || ref > CTF_MAX_TYPE)
return (ctf_set_errno(fp, EINVAL));
- if ((type = ctf_add_generic(fp, flag, NULL, &dtd)) == CTF_ERR)
+ if ((type = ctf_add_generic(fp, flag, name, &dtd)) == CTF_ERR)
return (CTF_ERR); /* errno is set for us */
ctf_ref_inc(fp, ref);
@@ -711,9 +1043,9 @@ ctf_add_float(ctf_file_t *fp, uint_t flag,
}
ctf_id_t
-ctf_add_pointer(ctf_file_t *fp, uint_t flag, ctf_id_t ref)
+ctf_add_pointer(ctf_file_t *fp, uint_t flag, const char *name, ctf_id_t ref)
{
- return (ctf_add_reftype(fp, flag, ref, CTF_K_POINTER));
+ return (ctf_add_reftype(fp, flag, name, ref, CTF_K_POINTER));
}
ctf_id_t
@@ -728,13 +1060,17 @@ ctf_add_array(ctf_file_t *fp, uint_t flag, const ctf_arinfo_t *arp)
fpd = fp;
if (ctf_lookup_by_id(&fpd, arp->ctr_contents) == NULL &&
- ctf_dtd_lookup(fp, arp->ctr_contents) == NULL)
+ ctf_dtd_lookup(fp, arp->ctr_contents) == NULL) {
+ ctf_dprintf("bad contents for array: %d\n", arp->ctr_contents);
return (ctf_set_errno(fp, ECTF_BADID));
+ }
fpd = fp;
if (ctf_lookup_by_id(&fpd, arp->ctr_index) == NULL &&
- ctf_dtd_lookup(fp, arp->ctr_index) == NULL)
+ ctf_dtd_lookup(fp, arp->ctr_index) == NULL) {
+ ctf_dprintf("bad index for array: %d\n", arp->ctr_index);
return (ctf_set_errno(fp, ECTF_BADID));
+ }
if ((type = ctf_add_generic(fp, flag, NULL, &dtd)) == CTF_ERR)
return (CTF_ERR); /* errno is set for us */
@@ -781,7 +1117,7 @@ ctf_set_array(ctf_file_t *fp, ctf_id_t type, const ctf_arinfo_t *arp)
}
ctf_id_t
-ctf_add_function(ctf_file_t *fp, uint_t flag,
+ctf_add_funcptr(ctf_file_t *fp, uint_t flag,
const ctf_funcinfo_t *ctc, const ctf_id_t *argv)
{
ctf_dtdef_t *dtd;
@@ -842,20 +1178,34 @@ ctf_add_struct(ctf_file_t *fp, uint_t flag, const char *name)
{
ctf_hash_t *hp = &fp->ctf_structs;
ctf_helem_t *hep = NULL;
- ctf_dtdef_t *dtd;
- ctf_id_t type;
+ ctf_dtdef_t *dtd = NULL;
+ ctf_id_t type = CTF_ERR;
if (name != NULL)
hep = ctf_hash_lookup(hp, fp, name, strlen(name));
- if (hep != NULL && ctf_type_kind(fp, hep->h_type) == CTF_K_FORWARD)
- dtd = ctf_dtd_lookup(fp, type = hep->h_type);
- else if ((type = ctf_add_generic(fp, flag, name, &dtd)) == CTF_ERR)
- return (CTF_ERR); /* errno is set for us */
+ if (hep != NULL && ctf_type_kind(fp, hep->h_type) == CTF_K_FORWARD) {
+ type = hep->h_type;
+ dtd = ctf_dtd_lookup(fp, type);
+ if (CTF_INFO_KIND(dtd->dtd_data.ctt_info) != CTF_K_FORWARD)
+ dtd = NULL;
+ }
+ if (dtd == NULL) {
+ type = ctf_add_generic(fp, flag, name, &dtd);
+ if (type == CTF_ERR)
+ return (CTF_ERR); /* errno is set for us */
+ }
+
+ VERIFY(type != CTF_ERR);
dtd->dtd_data.ctt_info = CTF_TYPE_INFO(CTF_K_STRUCT, flag, 0);
dtd->dtd_data.ctt_size = 0;
+ /*
+ * Always dirty in case we modified a forward.
+ */
+ fp->ctf_flags |= LCTF_DIRTY;
+
return (type);
}
@@ -864,20 +1214,34 @@ ctf_add_union(ctf_file_t *fp, uint_t flag, const char *name)
{
ctf_hash_t *hp = &fp->ctf_unions;
ctf_helem_t *hep = NULL;
- ctf_dtdef_t *dtd;
- ctf_id_t type;
+ ctf_dtdef_t *dtd = NULL;
+ ctf_id_t type = CTF_ERR;
if (name != NULL)
hep = ctf_hash_lookup(hp, fp, name, strlen(name));
- if (hep != NULL && ctf_type_kind(fp, hep->h_type) == CTF_K_FORWARD)
- dtd = ctf_dtd_lookup(fp, type = hep->h_type);
- else if ((type = ctf_add_generic(fp, flag, name, &dtd)) == CTF_ERR)
- return (CTF_ERR); /* errno is set for us */
+ if (hep != NULL && ctf_type_kind(fp, hep->h_type) == CTF_K_FORWARD) {
+ type = hep->h_type;
+ dtd = ctf_dtd_lookup(fp, type);
+ if (CTF_INFO_KIND(dtd->dtd_data.ctt_info) != CTF_K_FORWARD)
+ dtd = NULL;
+ }
+ if (dtd == NULL) {
+ type = ctf_add_generic(fp, flag, name, &dtd);
+ if (type == CTF_ERR)
+ return (CTF_ERR); /* errno is set for us */
+ }
+
+ VERIFY(type != CTF_ERR);
dtd->dtd_data.ctt_info = CTF_TYPE_INFO(CTF_K_UNION, flag, 0);
dtd->dtd_data.ctt_size = 0;
+ /*
+ * Always dirty in case we modified a forward.
+ */
+ fp->ctf_flags |= LCTF_DIRTY;
+
return (type);
}
@@ -886,20 +1250,34 @@ ctf_add_enum(ctf_file_t *fp, uint_t flag, const char *name)
{
ctf_hash_t *hp = &fp->ctf_enums;
ctf_helem_t *hep = NULL;
- ctf_dtdef_t *dtd;
- ctf_id_t type;
+ ctf_dtdef_t *dtd = NULL;
+ ctf_id_t type = CTF_ERR;
if (name != NULL)
hep = ctf_hash_lookup(hp, fp, name, strlen(name));
- if (hep != NULL && ctf_type_kind(fp, hep->h_type) == CTF_K_FORWARD)
- dtd = ctf_dtd_lookup(fp, type = hep->h_type);
- else if ((type = ctf_add_generic(fp, flag, name, &dtd)) == CTF_ERR)
- return (CTF_ERR); /* errno is set for us */
+ if (hep != NULL && ctf_type_kind(fp, hep->h_type) == CTF_K_FORWARD) {
+ type = hep->h_type;
+ dtd = ctf_dtd_lookup(fp, type);
+ if (CTF_INFO_KIND(dtd->dtd_data.ctt_info) != CTF_K_FORWARD)
+ dtd = NULL;
+ }
+ if (dtd == NULL) {
+ type = ctf_add_generic(fp, flag, name, &dtd);
+ if (type == CTF_ERR)
+ return (CTF_ERR); /* errno is set for us */
+ }
+
+ VERIFY(type != CTF_ERR);
dtd->dtd_data.ctt_info = CTF_TYPE_INFO(CTF_K_ENUM, flag, 0);
dtd->dtd_data.ctt_size = fp->ctf_dmodel->ctd_int;
+ /*
+ * Always dirty in case we modified a forward.
+ */
+ fp->ctf_flags |= LCTF_DIRTY;
+
return (type);
}
@@ -965,21 +1343,21 @@ ctf_add_typedef(ctf_file_t *fp, uint_t flag, const char *name, ctf_id_t ref)
}
ctf_id_t
-ctf_add_volatile(ctf_file_t *fp, uint_t flag, ctf_id_t ref)
+ctf_add_volatile(ctf_file_t *fp, uint_t flag, const char *name, ctf_id_t ref)
{
- return (ctf_add_reftype(fp, flag, ref, CTF_K_VOLATILE));
+ return (ctf_add_reftype(fp, flag, name, ref, CTF_K_VOLATILE));
}
ctf_id_t
-ctf_add_const(ctf_file_t *fp, uint_t flag, ctf_id_t ref)
+ctf_add_const(ctf_file_t *fp, uint_t flag, const char *name, ctf_id_t ref)
{
- return (ctf_add_reftype(fp, flag, ref, CTF_K_CONST));
+ return (ctf_add_reftype(fp, flag, name, ref, CTF_K_CONST));
}
ctf_id_t
-ctf_add_restrict(ctf_file_t *fp, uint_t flag, ctf_id_t ref)
+ctf_add_restrict(ctf_file_t *fp, uint_t flag, const char *name, ctf_id_t ref)
{
- return (ctf_add_reftype(fp, flag, ref, CTF_K_RESTRICT));
+ return (ctf_add_reftype(fp, flag, name, ref, CTF_K_RESTRICT));
}
int
@@ -1012,8 +1390,10 @@ ctf_add_enumerator(ctf_file_t *fp, ctf_id_t enid, const char *name, int value)
for (dmd = ctf_list_next(&dtd->dtd_u.dtu_members);
dmd != NULL; dmd = ctf_list_next(dmd)) {
- if (strcmp(dmd->dmd_name, name) == 0)
+ if (strcmp(dmd->dmd_name, name) == 0) {
+ ctf_dprintf("encountered dupliacte member %s\n", name);
return (ctf_set_errno(fp, ECTF_DUPMEMBER));
+ }
}
if ((dmd = ctf_alloc(sizeof (ctf_dmdef_t))) == NULL)
@@ -1039,13 +1419,16 @@ ctf_add_enumerator(ctf_file_t *fp, ctf_id_t enid, const char *name, int value)
}
int
-ctf_add_member(ctf_file_t *fp, ctf_id_t souid, const char *name, ctf_id_t type)
+ctf_add_member(ctf_file_t *fp, ctf_id_t souid, const char *name, ctf_id_t type,
+ ulong_t offset)
{
ctf_dtdef_t *dtd = ctf_dtd_lookup(fp, souid);
ctf_dmdef_t *dmd;
+ ulong_t mbitsz;
ssize_t msize, malign, ssize;
uint_t kind, vlen, root;
+ int mkind;
char *s = NULL;
if (!(fp->ctf_flags & LCTF_RDWR))
@@ -1064,19 +1447,58 @@ ctf_add_member(ctf_file_t *fp, ctf_id_t souid, const char *name, ctf_id_t type)
if (vlen == CTF_MAX_VLEN)
return (ctf_set_errno(fp, ECTF_DTFULL));
- if (name != NULL) {
+ /*
+ * Structures may have members which are anonymous. If they have two of
+ * these, then the duplicte member detection would find it due to the
+ * string of "", so we skip it.
+ */
+ if (name != NULL && *name != '\0') {
for (dmd = ctf_list_next(&dtd->dtd_u.dtu_members);
dmd != NULL; dmd = ctf_list_next(dmd)) {
if (dmd->dmd_name != NULL &&
- strcmp(dmd->dmd_name, name) == 0)
+ strcmp(dmd->dmd_name, name) == 0) {
return (ctf_set_errno(fp, ECTF_DUPMEMBER));
+ }
}
}
if ((msize = ctf_type_size(fp, type)) == CTF_ERR ||
- (malign = ctf_type_align(fp, type)) == CTF_ERR)
+ (malign = ctf_type_align(fp, type)) == CTF_ERR ||
+ (mkind = ctf_type_kind(fp, type)) == CTF_ERR)
return (CTF_ERR); /* errno is set for us */
+ /*
+ * ctf_type_size returns sizes in bytes. However, for bitfields, that
+ * means that it may misrepresent and actually rounds it up to a power
+ * of two and store that in bytes. So instead we have to get the
+ * Integers encoding and rely on that.
+ */
+ if (mkind == CTF_K_INTEGER) {
+ ctf_encoding_t e;
+
+ if (ctf_type_encoding(fp, type, &e) == CTF_ERR)
+ return (CTF_ERR); /* errno is set for us */
+ mbitsz = e.cte_bits;
+ } else if (mkind == CTF_K_FORWARD) {
+ /*
+ * This is a rather rare case. In general one cannot add a
+ * forward to a structure. However, the CTF tools traditionally
+ * tried to add a forward to the struct cpu as the last member.
+ * Therefore, if we find one here, we're going to verify the
+ * size and make sure it's zero. It's certainly odd, but that's
+ * life.
+ *
+ * Further, if it's not an absolute position being specified,
+ * then we refuse to add it.
+ */
+ if (offset == ULONG_MAX)
+ return (ctf_set_errno(fp, EINVAL));
+ VERIFY(msize == 0);
+ mbitsz = msize;
+ } else {
+ mbitsz = msize * 8;
+ }
+
if ((dmd = ctf_alloc(sizeof (ctf_dmdef_t))) == NULL)
return (ctf_set_errno(fp, EAGAIN));
@@ -1092,29 +1514,36 @@ ctf_add_member(ctf_file_t *fp, ctf_id_t souid, const char *name, ctf_id_t type)
if (kind == CTF_K_STRUCT && vlen != 0) {
ctf_dmdef_t *lmd = ctf_list_prev(&dtd->dtd_u.dtu_members);
ctf_id_t ltype = ctf_type_resolve(fp, lmd->dmd_type);
- size_t off = lmd->dmd_offset;
-
- ctf_encoding_t linfo;
- ssize_t lsize;
-
- if (ctf_type_encoding(fp, ltype, &linfo) != CTF_ERR)
- off += linfo.cte_bits;
- else if ((lsize = ctf_type_size(fp, ltype)) != CTF_ERR)
- off += lsize * NBBY;
-
- /*
- * Round up the offset of the end of the last member to the
- * next byte boundary, convert 'off' to bytes, and then round
- * it up again to the next multiple of the alignment required
- * by the new member. Finally, convert back to bits and store
- * the result in dmd_offset. Technically we could do more
- * efficient packing if the new member is a bit-field, but
- * we're the "compiler" and ANSI says we can do as we choose.
- */
- off = roundup(off, NBBY) / NBBY;
- off = roundup(off, MAX(malign, 1));
- dmd->dmd_offset = off * NBBY;
- ssize = off + msize;
+ size_t off;
+
+ if (offset == ULONG_MAX) {
+ ctf_encoding_t linfo;
+ ssize_t lsize;
+
+ off = lmd->dmd_offset;
+ if (ctf_type_encoding(fp, ltype, &linfo) != CTF_ERR)
+ off += linfo.cte_bits;
+ else if ((lsize = ctf_type_size(fp, ltype)) != CTF_ERR)
+ off += lsize * NBBY;
+
+ /*
+ * Round up the offset of the end of the last member to
+ * the next byte boundary, convert 'off' to bytes, and
+ * then round it up again to the next multiple of the
+ * alignment required by the new member. Finally,
+ * convert back to bits and store the result in
+ * dmd_offset. Technically we could do more efficient
+ * packing if the new member is a bit-field, but we're
+ * the "compiler" and ANSI says we can do as we choose.
+ */
+ off = roundup(off, NBBY) / NBBY;
+ off = roundup(off, MAX(malign, 1));
+ dmd->dmd_offset = off * NBBY;
+ ssize = off + msize;
+ } else {
+ dmd->dmd_offset = offset;
+ ssize = (offset + mbitsz) / NBBY;
+ }
} else {
dmd->dmd_offset = 0;
ssize = ctf_get_ctt_size(fp, &dtd->dtd_data, NULL, NULL);
@@ -1380,7 +1809,7 @@ ctf_add_type(ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type)
if (src_type == CTF_ERR)
return (CTF_ERR); /* errno is set for us */
- dst_type = ctf_add_reftype(dst_fp, flag, src_type, kind);
+ dst_type = ctf_add_reftype(dst_fp, flag, NULL, src_type, kind);
break;
case CTF_K_ARRAY:
@@ -1415,7 +1844,7 @@ ctf_add_type(ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type)
if (ctc.ctc_return == CTF_ERR)
return (CTF_ERR); /* errno is set for us */
- dst_type = ctf_add_function(dst_fp, flag, &ctc, NULL);
+ dst_type = ctf_add_funcptr(dst_fp, flag, &ctc, NULL);
break;
case CTF_K_STRUCT:
@@ -1540,3 +1969,227 @@ ctf_add_type(ctf_file_t *dst_fp, ctf_file_t *src_fp, ctf_id_t src_type)
return (dst_type);
}
+
+int
+ctf_add_function(ctf_file_t *fp, ulong_t idx, const ctf_funcinfo_t *fip,
+ const ctf_id_t *argc)
+{
+ int i;
+ ctf_dsdef_t *dsd;
+ ctf_file_t *afp;
+ uintptr_t symbase = (uintptr_t)fp->ctf_symtab.cts_data;
+
+ if (!(fp->ctf_flags & LCTF_RDWR))
+ return (ctf_set_errno(fp, ECTF_RDONLY));
+
+ if (ctf_dsd_lookup(fp, idx) != NULL)
+ return (ctf_set_errno(fp, ECTF_CONFLICT));
+
+ if (symbase == NULL)
+ return (ctf_set_errno(fp, ECTF_STRTAB));
+
+ if (idx > fp->ctf_nsyms)
+ return (ctf_set_errno(fp, ECTF_NOTDATA));
+
+ if (fp->ctf_symtab.cts_entsize == sizeof (Elf32_Sym)) {
+ const Elf32_Sym *symp = (Elf32_Sym *)symbase + idx;
+ if (ELF32_ST_TYPE(symp->st_info) != STT_FUNC)
+ return (ctf_set_errno(fp, ECTF_NOTFUNC));
+ } else {
+ const Elf64_Sym *symp = (Elf64_Sym *)symbase + idx;
+ if (ELF64_ST_TYPE(symp->st_info) != STT_FUNC)
+ return (ctf_set_errno(fp, ECTF_NOTFUNC));
+ }
+
+ afp = fp;
+ if (ctf_lookup_by_id(&afp, fip->ctc_return) == NULL)
+ return (CTF_ERR); /* errno is set for us */
+
+ for (i = 0; i < fip->ctc_argc; i++) {
+ afp = fp;
+ if (ctf_lookup_by_id(&afp, argc[i]) == NULL)
+ return (CTF_ERR); /* errno is set for us */
+ }
+
+ dsd = ctf_alloc(sizeof (ctf_dsdef_t));
+ if (dsd == NULL)
+ return (ctf_set_errno(fp, ENOMEM));
+ dsd->dts_nargs = fip->ctc_argc;
+ if (fip->ctc_flags & CTF_FUNC_VARARG)
+ dsd->dts_nargs++;
+ if (dsd->dts_nargs != 0) {
+ dsd->dts_argc = ctf_alloc(sizeof (ctf_id_t) * dsd->dts_nargs);
+ if (dsd->dts_argc == NULL) {
+ ctf_free(dsd, sizeof (ctf_dsdef_t));
+ return (ctf_set_errno(fp, ENOMEM));
+ }
+ bcopy(argc, dsd->dts_argc, sizeof (ctf_id_t) * fip->ctc_argc);
+ if (fip->ctc_flags & CTF_FUNC_VARARG)
+ dsd->dts_argc[fip->ctc_argc] = 0;
+ }
+ dsd->dts_symidx = idx;
+ dsd->dts_tid = fip->ctc_return;
+
+ ctf_dsd_insert(fp, dsd);
+ fp->ctf_flags |= LCTF_DIRTY;
+
+ return (0);
+}
+
+int
+ctf_add_object(ctf_file_t *fp, ulong_t idx, ctf_id_t type)
+{
+ ctf_dsdef_t *dsd;
+ ctf_file_t *afp;
+ uintptr_t symbase = (uintptr_t)fp->ctf_symtab.cts_data;
+
+ if (!(fp->ctf_flags & LCTF_RDWR))
+ return (ctf_set_errno(fp, ECTF_RDONLY));
+
+ if (!(fp->ctf_flags & LCTF_RDWR))
+ return (ctf_set_errno(fp, ECTF_RDONLY));
+
+ if (ctf_dsd_lookup(fp, idx) != NULL)
+ return (ctf_set_errno(fp, ECTF_CONFLICT));
+
+ if (symbase == NULL)
+ return (ctf_set_errno(fp, ECTF_STRTAB));
+
+ if (idx > fp->ctf_nsyms)
+ return (ctf_set_errno(fp, ECTF_NOTDATA));
+
+ if (fp->ctf_symtab.cts_entsize == sizeof (Elf32_Sym)) {
+ const Elf32_Sym *symp = (Elf32_Sym *)symbase + idx;
+ if (ELF32_ST_TYPE(symp->st_info) != STT_OBJECT)
+ return (ctf_set_errno(fp, ECTF_NOTDATA));
+ } else {
+ const Elf64_Sym *symp = (Elf64_Sym *)symbase + idx;
+ if (ELF64_ST_TYPE(symp->st_info) != STT_OBJECT)
+ return (ctf_set_errno(fp, ECTF_NOTDATA));
+ }
+
+ afp = fp;
+ if (ctf_lookup_by_id(&afp, type) == NULL)
+ return (CTF_ERR); /* errno is set for us */
+
+ dsd = ctf_alloc(sizeof (ctf_dsdef_t));
+ if (dsd == NULL)
+ return (ctf_set_errno(fp, ENOMEM));
+ dsd->dts_symidx = idx;
+ dsd->dts_tid = type;
+ dsd->dts_argc = NULL;
+
+ ctf_dsd_insert(fp, dsd);
+ fp->ctf_flags |= LCTF_DIRTY;
+
+ return (0);
+}
+
+void
+ctf_dataptr(ctf_file_t *fp, const void **addrp, size_t *sizep)
+{
+ if (addrp != NULL)
+ *addrp = fp->ctf_base;
+ if (sizep != NULL)
+ *sizep = fp->ctf_size;
+}
+
+int
+ctf_add_label(ctf_file_t *fp, const char *name, ctf_id_t type, uint_t position)
+{
+ ctf_file_t *fpd;
+ ctf_dldef_t *dld;
+
+ if (name == NULL)
+ return (ctf_set_errno(fp, EINVAL));
+
+ if (!(fp->ctf_flags & LCTF_RDWR))
+ return (ctf_set_errno(fp, ECTF_RDONLY));
+
+ fpd = fp;
+ if (type != 0 && ctf_lookup_by_id(&fpd, type) == NULL)
+ return (CTF_ERR); /* errno is set for us */
+
+ if (type != 0 && (fp->ctf_flags & LCTF_CHILD) &&
+ CTF_TYPE_ISPARENT(type))
+ return (ctf_set_errno(fp, ECTF_NOPARENT));
+
+ if (ctf_dld_lookup(fp, name) != NULL)
+ return (ctf_set_errno(fp, ECTF_LABELEXISTS));
+
+ if ((dld = ctf_alloc(sizeof (ctf_dldef_t))) == NULL)
+ return (ctf_set_errno(fp, EAGAIN));
+
+ if ((dld->dld_name = ctf_strdup(name)) == NULL) {
+ ctf_free(dld, sizeof (ctf_dldef_t));
+ return (ctf_set_errno(fp, EAGAIN));
+ }
+
+ dld->dld_type = type;
+ fp->ctf_dtstrlen += strlen(name) + 1;
+ ctf_dld_insert(fp, dld, position);
+ fp->ctf_flags |= LCTF_DIRTY;
+
+ return (0);
+}
+
+/*
+ * Update the size of a structure or union. Note that we don't allow this to
+ * shrink the size of a struct or union, only to increase it. This is useful for
+ * cases when you have a structure whose actual size is larger than the sum of
+ * its members due to padding for natuaral alignment.
+ */
+int
+ctf_set_size(ctf_file_t *fp, ctf_id_t id, const ulong_t newsz)
+{
+ ctf_dtdef_t *dtd = ctf_dtd_lookup(fp, id);
+ uint_t kind;
+ size_t oldsz;
+
+ if (!(fp->ctf_flags & LCTF_RDWR))
+ return (ctf_set_errno(fp, ECTF_RDONLY));
+
+ if (dtd == NULL)
+ return (ctf_set_errno(fp, ECTF_BADID));
+
+ kind = CTF_INFO_KIND(dtd->dtd_data.ctt_info);
+
+ if (kind != CTF_K_STRUCT && kind != CTF_K_UNION)
+ return (ctf_set_errno(fp, ECTF_NOTSOU));
+
+ if ((oldsz = dtd->dtd_data.ctt_size) == CTF_LSIZE_SENT)
+ oldsz = CTF_TYPE_LSIZE(&dtd->dtd_data);
+
+ if (newsz < oldsz)
+ return (ctf_set_errno(fp, EINVAL));
+
+ if (newsz > CTF_MAX_SIZE) {
+ dtd->dtd_data.ctt_size = CTF_LSIZE_SENT;
+ dtd->dtd_data.ctt_lsizehi = CTF_SIZE_TO_LSIZE_HI(newsz);
+ dtd->dtd_data.ctt_lsizelo = CTF_SIZE_TO_LSIZE_LO(newsz);
+ } else {
+ dtd->dtd_data.ctt_size = (ushort_t)newsz;
+ }
+
+ fp->ctf_flags |= LCTF_DIRTY;
+ return (0);
+}
+
+int
+ctf_set_root(ctf_file_t *fp, ctf_id_t id, const boolean_t vis)
+{
+ ctf_dtdef_t *dtd = ctf_dtd_lookup(fp, id);
+ uint_t kind, vlen;
+
+ if (!(fp->ctf_flags & LCTF_RDWR))
+ return (ctf_set_errno(fp, ECTF_RDONLY));
+
+ if (dtd == NULL)
+ return (ctf_set_errno(fp, ECTF_BADID));
+
+ kind = CTF_INFO_KIND(dtd->dtd_data.ctt_info);
+ vlen = CTF_INFO_VLEN(dtd->dtd_data.ctt_info);
+
+ dtd->dtd_data.ctt_info = CTF_TYPE_INFO(kind, vis, vlen);
+ return (0);
+}
diff --git a/usr/src/common/ctf/ctf_error.c b/usr/src/common/ctf/ctf_error.c
index fe3d0de0cb..1765b77b54 100644
--- a/usr/src/common/ctf/ctf_error.c
+++ b/usr/src/common/ctf/ctf_error.c
@@ -24,7 +24,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2012, Joyent, Inc.
+ * Copyright (c) 2015, Joyent, Inc.
*/
#include <ctf_impl.h>
@@ -75,7 +75,15 @@ static const char *const _ctf_errlist[] = {
"Duplicate member name definition", /* ECTF_DUPMEMBER */
"Conflicting type is already defined", /* ECTF_CONFLICT */
"Type has outstanding references", /* ECTF_REFERENCED */
- "Type is not a dynamic type" /* ECTF_NOTDYN */
+ "Type is not a dynamic type", /* ECTF_NOTDYN */
+ "Elf library failure", /* ECTF_ELF */
+ "Cannot merge child container", /* ECTF_MCHILD */
+ "Label already exists", /* ECTF_LABEL */
+ "Merged labels conflict", /* ECTF_LCONFLICT */
+ "Zlib library failure", /* ECTF_ZLIB */
+ "CTF conversion backend error", /* ECTF_CONVBKERR */
+ "No C source to convert from", /* ECTF_CONVNOCSRC */
+ "No applicable conversion backend" /* ECTF_NOCONVBKEND */
};
static const int _ctf_nerr = sizeof (_ctf_errlist) / sizeof (_ctf_errlist[0]);
diff --git a/usr/src/common/ctf/ctf_hash.c b/usr/src/common/ctf/ctf_hash.c
index b10a7618f6..0c5a71a5ac 100644
--- a/usr/src/common/ctf/ctf_hash.c
+++ b/usr/src/common/ctf/ctf_hash.c
@@ -25,9 +25,8 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <ctf_impl.h>
+#include <sys/debug.h>
static const ushort_t _CTF_EMPTY[1] = { 0 };
diff --git a/usr/src/common/ctf/ctf_impl.h b/usr/src/common/ctf/ctf_impl.h
index f56fa6a005..04b12418ae 100644
--- a/usr/src/common/ctf/ctf_impl.h
+++ b/usr/src/common/ctf/ctf_impl.h
@@ -25,7 +25,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
*/
#ifndef _CTF_IMPL_H
@@ -41,6 +41,8 @@
#include <sys/systm.h>
#include <sys/cmn_err.h>
#include <sys/varargs.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
#define isspace(c) \
((c) == ' ' || (c) == '\t' || (c) == '\n' || \
@@ -56,6 +58,7 @@
#include <stdio.h>
#include <limits.h>
#include <ctype.h>
+#include <stddef.h>
#endif /* _KERNEL */
@@ -77,6 +80,10 @@ typedef struct ctf_hash {
uint_t h_free; /* index of next free hash element */
} ctf_hash_t;
+struct ctf_idhash_iter {
+ int cii_id; /* Current iteration id */
+};
+
typedef struct ctf_strs {
const char *cts_strs; /* base address of string table */
size_t cts_len; /* size of string table in bytes */
@@ -159,6 +166,20 @@ typedef struct ctf_dtdef {
} dtd_u;
} ctf_dtdef_t;
+typedef struct ctf_dsdef {
+ ctf_list_t dts_list; /* list forward/back pointers */
+ ulong_t dts_symidx; /* symbol id */
+ ctf_id_t dts_tid; /* type for obj, 0 if function */
+ uint_t dts_nargs;
+ ctf_id_t *dts_argc; /* function argv */
+} ctf_dsdef_t;
+
+typedef struct ctf_dldef {
+ ctf_list_t dld_list; /* list forward/back pointers */
+ char *dld_name; /* name of the label */
+ ctf_id_t dld_type; /* type ID associated with the label */
+} ctf_dldef_t;
+
typedef struct ctf_bundle {
ctf_file_t *ctb_file; /* CTF container handle */
ctf_id_t ctb_type; /* CTF type identifier */
@@ -211,6 +232,9 @@ struct ctf_file {
ulong_t ctf_dtnextid; /* next dynamic type id to assign */
ulong_t ctf_dtoldid; /* oldest id that has been committed */
void *ctf_specific; /* data for ctf_get/setspecific */
+ ctf_list_t ctf_dsdefs; /* list of dynamic obj/func definitions */
+ ctf_list_t ctf_dldefs; /* list of dynamic labels */
+ uint_t ctf_hflags; /* original flags on the header */
};
#define LCTF_INDEX_TO_TYPEPTR(fp, i) \
@@ -225,62 +249,15 @@ struct ctf_file {
#define LCTF_RDWR 0x0004 /* CTF container is writable */
#define LCTF_DIRTY 0x0008 /* CTF container has been modified */
-#define ECTF_BASE 1000 /* base value for libctf errnos */
-
-enum {
- ECTF_FMT = ECTF_BASE, /* file is not in CTF or ELF format */
- ECTF_ELFVERS, /* ELF version is more recent than libctf */
- ECTF_CTFVERS, /* CTF version is more recent than libctf */
- ECTF_ENDIAN, /* data is different endian-ness than lib */
- ECTF_SYMTAB, /* symbol table uses invalid entry size */
- ECTF_SYMBAD, /* symbol table data buffer invalid */
- ECTF_STRBAD, /* string table data buffer invalid */
- ECTF_CORRUPT, /* file data corruption detected */
- ECTF_NOCTFDATA, /* ELF file does not contain CTF data */
- ECTF_NOCTFBUF, /* buffer does not contain CTF data */
- ECTF_NOSYMTAB, /* symbol table data is not available */
- ECTF_NOPARENT, /* parent CTF container is not available */
- ECTF_DMODEL, /* data model mismatch */
- ECTF_MMAP, /* failed to mmap a data section */
- ECTF_ZMISSING, /* decompression library not installed */
- ECTF_ZINIT, /* failed to initialize decompression library */
- ECTF_ZALLOC, /* failed to allocate decompression buffer */
- ECTF_DECOMPRESS, /* failed to decompress CTF data */
- ECTF_STRTAB, /* string table for this string is missing */
- ECTF_BADNAME, /* string offset is corrupt w.r.t. strtab */
- ECTF_BADID, /* invalid type ID number */
- ECTF_NOTSOU, /* type is not a struct or union */
- ECTF_NOTENUM, /* type is not an enum */
- ECTF_NOTSUE, /* type is not a struct, union, or enum */
- ECTF_NOTINTFP, /* type is not an integer or float */
- ECTF_NOTARRAY, /* type is not an array */
- ECTF_NOTREF, /* type does not reference another type */
- ECTF_NAMELEN, /* buffer is too small to hold type name */
- ECTF_NOTYPE, /* no type found corresponding to name */
- ECTF_SYNTAX, /* syntax error in type name */
- ECTF_NOTFUNC, /* symtab entry does not refer to a function */
- ECTF_NOFUNCDAT, /* no func info available for function */
- ECTF_NOTDATA, /* symtab entry does not refer to a data obj */
- ECTF_NOTYPEDAT, /* no type info available for object */
- ECTF_NOLABEL, /* no label found corresponding to name */
- ECTF_NOLABELDATA, /* file does not contain any labels */
- ECTF_NOTSUP, /* feature not supported */
- ECTF_NOENUMNAM, /* enum element name not found */
- ECTF_NOMEMBNAM, /* member name not found */
- ECTF_RDONLY, /* CTF container is read-only */
- ECTF_DTFULL, /* CTF type is full (no more members allowed) */
- ECTF_FULL, /* CTF container is full */
- ECTF_DUPMEMBER, /* duplicate member name definition */
- ECTF_CONFLICT, /* conflicting type definition present */
- ECTF_REFERENCED, /* type has outstanding references */
- ECTF_NOTDYN /* type is not a dynamic type */
-};
+#define CTF_ELF_SCN_NAME ".SUNW_ctf"
extern ssize_t ctf_get_ctt_size(const ctf_file_t *, const ctf_type_t *,
ssize_t *, ssize_t *);
extern const ctf_type_t *ctf_lookup_by_id(ctf_file_t **, ctf_id_t);
+extern ctf_file_t *ctf_fdcreate_int(int, int *, ctf_sect_t *);
+
extern int ctf_hash_create(ctf_hash_t *, ulong_t);
extern int ctf_hash_insert(ctf_hash_t *, ctf_file_t *, ushort_t, uint_t);
extern int ctf_hash_define(ctf_hash_t *, ctf_file_t *, ushort_t, uint_t);
@@ -294,12 +271,16 @@ extern void ctf_hash_destroy(ctf_hash_t *);
extern void ctf_list_append(ctf_list_t *, void *);
extern void ctf_list_prepend(ctf_list_t *, void *);
+extern void ctf_list_insert_before(ctf_list_t *, void *, void *);
extern void ctf_list_delete(ctf_list_t *, void *);
extern void ctf_dtd_insert(ctf_file_t *, ctf_dtdef_t *);
extern void ctf_dtd_delete(ctf_file_t *, ctf_dtdef_t *);
extern ctf_dtdef_t *ctf_dtd_lookup(ctf_file_t *, ctf_id_t);
+extern void ctf_dsd_delete(ctf_file_t *, ctf_dsdef_t *);
+extern void ctf_dld_delete(ctf_file_t *, ctf_dldef_t *);
+
extern void ctf_decl_init(ctf_decl_t *, char *, size_t);
extern void ctf_decl_fini(ctf_decl_t *);
extern void ctf_decl_push(ctf_decl_t *, ctf_file_t *, ctf_id_t);
@@ -327,6 +308,13 @@ extern void ctf_dprintf(const char *, ...);
extern void *ctf_zopen(int *);
+extern ctf_id_t ctf_add_encoded(ctf_file_t *, uint_t, const char *,
+ const ctf_encoding_t *, uint_t);
+extern ctf_id_t ctf_add_reftype(ctf_file_t *, uint_t, const char *, ctf_id_t,
+ uint_t);
+extern boolean_t ctf_sym_valid(uintptr_t, int, uint16_t, uint64_t,
+ uint32_t);
+
extern const char _CTF_SECTION[]; /* name of CTF ELF section */
extern const char _CTF_NULLSTR[]; /* empty string */
diff --git a/usr/src/common/ctf/ctf_open.c b/usr/src/common/ctf/ctf_open.c
index 001cf5c591..82b396e825 100644
--- a/usr/src/common/ctf/ctf_open.c
+++ b/usr/src/common/ctf/ctf_open.c
@@ -25,7 +25,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
*/
#include <ctf_impl.h>
@@ -550,6 +550,7 @@ ctf_bufopen(const ctf_sect_t *ctfsect, const ctf_sect_t *symsect,
void *buf, *base;
size_t size, hdrsz;
int err;
+ uint_t hflags;
if (ctfsect == NULL || ((symsect == NULL) != (strsect == NULL)))
return (ctf_set_open_errno(errp, EINVAL));
@@ -631,6 +632,7 @@ ctf_bufopen(const ctf_sect_t *ctfsect, const ctf_sect_t *symsect,
* the CTF data buffer if it is compressed. Otherwise we just put
* the data section's buffer pointer into ctf_buf, below.
*/
+ hflags = hp.cth_flags;
if (hp.cth_flags & CTF_F_COMPRESS) {
size_t srclen, dstlen;
const void *src;
@@ -680,6 +682,7 @@ ctf_bufopen(const ctf_sect_t *ctfsect, const ctf_sect_t *symsect,
bzero(fp, sizeof (ctf_file_t));
fp->ctf_version = hp.cth_version;
fp->ctf_fileops = &ctf_fileops[hp.cth_version];
+ fp->ctf_hflags = hflags;
bcopy(ctfsect, &fp->ctf_data, sizeof (ctf_sect_t));
if (symsect != NULL) {
@@ -883,6 +886,8 @@ void
ctf_close(ctf_file_t *fp)
{
ctf_dtdef_t *dtd, *ntd;
+ ctf_dsdef_t *dsd, *nsd;
+ ctf_dldef_t *dld, *nld;
if (fp == NULL)
return; /* allow ctf_close(NULL) to simplify caller code */
@@ -906,10 +911,25 @@ ctf_close(ctf_file_t *fp)
ctf_dtd_delete(fp, dtd);
}
+ for (dsd = ctf_list_prev(&fp->ctf_dsdefs); dsd != NULL; dsd = nsd) {
+ nsd = ctf_list_prev(dsd);
+ ctf_dsd_delete(fp, dsd);
+ }
+
+ for (dld = ctf_list_prev(&fp->ctf_dldefs); dld != NULL; dld = nld) {
+ nld = ctf_list_prev(dld);
+ ctf_dld_delete(fp, dld);
+ }
+
ctf_free(fp->ctf_dthash, fp->ctf_dthashlen * sizeof (ctf_dtdef_t *));
if (fp->ctf_flags & LCTF_MMAP) {
- if (fp->ctf_data.cts_data != NULL)
+ /*
+ * Writeable containers shouldn't necessairily have the CTF
+ * section freed.
+ */
+ if (fp->ctf_data.cts_data != NULL &&
+ !(fp->ctf_flags & LCTF_RDWR))
ctf_sect_munmap(&fp->ctf_data);
if (fp->ctf_symtab.cts_data != NULL)
ctf_sect_munmap(&fp->ctf_symtab);
@@ -980,6 +1000,16 @@ ctf_parent_name(ctf_file_t *fp)
}
/*
+ * Return the label of the parent CTF container, if one exists. Otherwise return
+ * NULL.
+ */
+const char *
+ctf_parent_label(ctf_file_t *fp)
+{
+ return (fp->ctf_parlabel);
+}
+
+/*
* Import the types from the specified parent container by storing a pointer
* to it in ctf_parent and incrementing its reference count. Only one parent
* is allowed: if a parent already exists, it is replaced by the new parent.
@@ -1043,3 +1073,9 @@ ctf_getspecific(ctf_file_t *fp)
{
return (fp->ctf_specific);
}
+
+uint_t
+ctf_flags(ctf_file_t *fp)
+{
+ return (fp->ctf_hflags);
+}
diff --git a/usr/src/common/ctf/ctf_types.c b/usr/src/common/ctf/ctf_types.c
index ab1b9ff14b..2ef4f42d6b 100644
--- a/usr/src/common/ctf/ctf_types.c
+++ b/usr/src/common/ctf/ctf_types.c
@@ -24,8 +24,12 @@
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2015, Joyent, Inc.
+ */
#include <ctf_impl.h>
+#include <sys/debug.h>
ssize_t
ctf_get_ctt_size(const ctf_file_t *fp, const ctf_type_t *tp, ssize_t *sizep,
@@ -138,19 +142,21 @@ ctf_enum_iter(ctf_file_t *fp, ctf_id_t type, ctf_enum_f *func, void *arg)
}
/*
- * Iterate over every root (user-visible) type in the given CTF container.
- * We pass the type ID of each type to the specified callback function.
+ * Iterate over every type in the given CTF container. If the user doesn't ask
+ * for all types, then we only give them the user visible, aka root, types. We
+ * pass the type ID of each type to the specified callback function.
*/
int
-ctf_type_iter(ctf_file_t *fp, ctf_type_f *func, void *arg)
+ctf_type_iter(ctf_file_t *fp, boolean_t nonroot, ctf_type_f *func, void *arg)
{
ctf_id_t id, max = fp->ctf_typemax;
int rc, child = (fp->ctf_flags & LCTF_CHILD);
for (id = 1; id <= max; id++) {
const ctf_type_t *tp = LCTF_INDEX_TO_TYPEPTR(fp, id);
- if (CTF_INFO_ISROOT(tp->ctt_info) &&
- (rc = func(CTF_INDEX_TO_TYPE(id, child), arg)) != 0)
+ if ((nonroot || CTF_INFO_ISROOT(tp->ctt_info)) &&
+ (rc = func(CTF_INDEX_TO_TYPE(id, child),
+ CTF_INFO_ISROOT(tp->ctt_info), arg)) != 0)
return (rc);
}
@@ -361,6 +367,9 @@ ctf_type_size(ctf_file_t *fp, ctf_id_t type)
case CTF_K_FUNCTION:
return (0); /* function size is only known by symtab */
+ case CTF_K_FORWARD:
+ return (0);
+
case CTF_K_ENUM:
return (fp->ctf_dmodel->ctd_int);
@@ -380,7 +389,22 @@ ctf_type_size(ctf_file_t *fp, ctf_id_t type)
return (-1); /* errno is set for us */
return (size * ar.ctr_nelems);
-
+ case CTF_K_STRUCT:
+ case CTF_K_UNION:
+ /*
+ * If we have a zero size, we may be in the process of adding a
+ * structure or union but having not called ctf_update() to deal
+ * with the circular dependencies in such structures and unions.
+ * To handle that case, if we get a size of zero from the ctt,
+ * we look up the dtdef and use its size instead.
+ */
+ size = ctf_get_ctt_size(fp, tp, NULL, NULL);
+ if (size == 0) {
+ ctf_dtdef_t *dtd = ctf_dtd_lookup(fp, type);
+ if (dtd != NULL)
+ return (dtd->dtd_data.ctt_size);
+ }
+ return (size);
default:
return (ctf_get_ctt_size(fp, tp, NULL, NULL));
}
@@ -868,3 +892,296 @@ ctf_type_visit(ctf_file_t *fp, ctf_id_t type, ctf_visit_f *func, void *arg)
{
return (ctf_type_rvisit(fp, type, func, arg, "", 0, 0));
}
+
+int
+ctf_func_info_by_id(ctf_file_t *fp, ctf_id_t type, ctf_funcinfo_t *fip)
+{
+ ctf_file_t *ofp = fp;
+ const ctf_type_t *tp;
+ const ushort_t *dp;
+ int nargs;
+ ssize_t increment;
+
+ if ((tp = ctf_lookup_by_id(&fp, type)) == NULL)
+ return (CTF_ERR); /* errno is set for us */
+
+ if (LCTF_INFO_KIND(fp, tp->ctt_info) != CTF_K_FUNCTION)
+ return (ctf_set_errno(ofp, ECTF_NOTFUNC));
+
+ fip->ctc_return = tp->ctt_type;
+ nargs = LCTF_INFO_VLEN(fp, tp->ctt_info);
+ fip->ctc_argc = nargs;
+ fip->ctc_flags = 0;
+
+ /* dp should now point to the first argument */
+ if (nargs != 0) {
+ (void) ctf_get_ctt_size(fp, tp, NULL, &increment);
+ dp = (ushort_t *)((uintptr_t)fp->ctf_buf +
+ fp->ctf_txlate[CTF_TYPE_TO_INDEX(type)] + increment);
+ if (dp[nargs - 1] == 0) {
+ fip->ctc_flags |= CTF_FUNC_VARARG;
+ fip->ctc_argc--;
+ }
+ }
+
+ return (0);
+}
+
+int
+ctf_func_args_by_id(ctf_file_t *fp, ctf_id_t type, uint_t argc, ctf_id_t *argv)
+{
+ ctf_file_t *ofp = fp;
+ const ctf_type_t *tp;
+ const ushort_t *dp;
+ int nargs;
+ ssize_t increment;
+
+ if ((tp = ctf_lookup_by_id(&fp, type)) == NULL)
+ return (CTF_ERR); /* errno is set for us */
+
+ if (LCTF_INFO_KIND(fp, tp->ctt_info) != CTF_K_FUNCTION)
+ return (ctf_set_errno(ofp, ECTF_NOTFUNC));
+
+ nargs = LCTF_INFO_VLEN(fp, tp->ctt_info);
+ (void) ctf_get_ctt_size(fp, tp, NULL, &increment);
+ dp = (ushort_t *)((uintptr_t)fp->ctf_buf +
+ fp->ctf_txlate[CTF_TYPE_TO_INDEX(type)] +
+ increment);
+ if (nargs != 0 && dp[nargs - 1] == 0)
+ nargs--;
+
+ for (nargs = MIN(argc, nargs); nargs != 0; nargs--)
+ *argv++ = *dp++;
+
+ return (0);
+}
+
+int
+ctf_object_iter(ctf_file_t *fp, ctf_object_f *func, void *arg)
+{
+ int i, ret;
+ ctf_id_t id;
+ uintptr_t symbase = (uintptr_t)fp->ctf_symtab.cts_data;
+ uintptr_t strbase = (uintptr_t)fp->ctf_strtab.cts_data;
+
+ if (fp->ctf_symtab.cts_data == NULL)
+ return (ctf_set_errno(fp, ECTF_NOSYMTAB));
+
+ for (i = 0; i < fp->ctf_nsyms; i++) {
+ char *name;
+ if (fp->ctf_sxlate[i] == -1u)
+ continue;
+ id = *(ushort_t *)((uintptr_t)fp->ctf_buf +
+ fp->ctf_sxlate[i]);
+
+ /*
+ * Validate whether or not we're looking at a data object as
+ * oposed to a function.
+ */
+ if (fp->ctf_symtab.cts_entsize == sizeof (Elf32_Sym)) {
+ const Elf32_Sym *symp = (Elf32_Sym *)symbase + i;
+ if (ELF32_ST_TYPE(symp->st_info) != STT_OBJECT)
+ continue;
+ if (fp->ctf_strtab.cts_data != NULL &&
+ symp->st_name != 0)
+ name = (char *)(strbase + symp->st_name);
+ else
+ name = NULL;
+ } else {
+ const Elf64_Sym *symp = (Elf64_Sym *)symbase + i;
+ if (ELF64_ST_TYPE(symp->st_info) != STT_OBJECT)
+ continue;
+ if (fp->ctf_strtab.cts_data != NULL &&
+ symp->st_name != 0)
+ name = (char *)(strbase + symp->st_name);
+ else
+ name = NULL;
+ }
+
+ if ((ret = func(name, id, i, arg)) != 0)
+ return (ret);
+ }
+
+ return (0);
+}
+
+int
+ctf_function_iter(ctf_file_t *fp, ctf_function_f *func, void *arg)
+{
+ int i, ret;
+ uintptr_t symbase = (uintptr_t)fp->ctf_symtab.cts_data;
+ uintptr_t strbase = (uintptr_t)fp->ctf_strtab.cts_data;
+
+ if (fp->ctf_symtab.cts_data == NULL)
+ return (ctf_set_errno(fp, ECTF_NOSYMTAB));
+
+ for (i = 0; i < fp->ctf_nsyms; i++) {
+ char *name;
+ ushort_t info, *dp;
+ ctf_funcinfo_t fi;
+ if (fp->ctf_sxlate[i] == -1u)
+ continue;
+
+ dp = (ushort_t *)((uintptr_t)fp->ctf_buf +
+ fp->ctf_sxlate[i]);
+ info = *dp;
+ if (info == 0)
+ continue;
+
+ /*
+ * This may be a function or it may be a data object. We have to
+ * consult the symbol table to be certain. Functions are encoded
+ * with their info, data objects with their actual type.
+ */
+ if (fp->ctf_symtab.cts_entsize == sizeof (Elf32_Sym)) {
+ const Elf32_Sym *symp = (Elf32_Sym *)symbase + i;
+ if (ELF32_ST_TYPE(symp->st_info) != STT_FUNC)
+ continue;
+ if (fp->ctf_strtab.cts_data != NULL)
+ name = (char *)(strbase + symp->st_name);
+ else
+ name = NULL;
+ } else {
+ const Elf64_Sym *symp = (Elf64_Sym *)symbase + i;
+ if (ELF64_ST_TYPE(symp->st_info) != STT_FUNC)
+ continue;
+ if (fp->ctf_strtab.cts_data != NULL)
+ name = (char *)(strbase + symp->st_name);
+ else
+ name = NULL;
+ }
+
+ if (LCTF_INFO_KIND(fp, info) != CTF_K_FUNCTION)
+ continue;
+ dp++;
+ fi.ctc_return = *dp;
+ dp++;
+ fi.ctc_argc = LCTF_INFO_VLEN(fp, info);
+ fi.ctc_flags = 0;
+
+ if (fi.ctc_argc != 0 && dp[fi.ctc_argc - 1] == 0) {
+ fi.ctc_flags |= CTF_FUNC_VARARG;
+ fi.ctc_argc--;
+ }
+
+ if ((ret = func(name, i, &fi, arg)) != 0)
+ return (ret);
+
+ }
+
+ return (0);
+}
+
+char *
+ctf_symbol_name(ctf_file_t *fp, ulong_t idx, char *buf, size_t len)
+{
+ const char *name;
+ uintptr_t symbase = (uintptr_t)fp->ctf_symtab.cts_data;
+ uintptr_t strbase = (uintptr_t)fp->ctf_strtab.cts_data;
+
+ if (fp->ctf_symtab.cts_data == NULL) {
+ (void) ctf_set_errno(fp, ECTF_NOSYMTAB);
+ return (NULL);
+ }
+
+ if (fp->ctf_strtab.cts_data == NULL) {
+ (void) ctf_set_errno(fp, ECTF_STRTAB);
+ return (NULL);
+ }
+
+ if (idx > fp->ctf_nsyms) {
+ (void) ctf_set_errno(fp, ECTF_NOTDATA);
+ return (NULL);
+ }
+
+ if (fp->ctf_symtab.cts_entsize == sizeof (Elf32_Sym)) {
+ const Elf32_Sym *symp = (Elf32_Sym *)symbase + idx;
+ if (ELF32_ST_TYPE(symp->st_info) != STT_OBJECT &&
+ ELF32_ST_TYPE(symp->st_info) != STT_FUNC) {
+ (void) ctf_set_errno(fp, ECTF_NOTDATA);
+ return (NULL);
+ }
+ if (symp->st_name == 0) {
+ (void) ctf_set_errno(fp, ENOENT);
+ return (NULL);
+ }
+ name = (const char *)(strbase + symp->st_name);
+ } else {
+ const Elf64_Sym *symp = (Elf64_Sym *)symbase + idx;
+ if (ELF64_ST_TYPE(symp->st_info) != STT_FUNC &&
+ ELF64_ST_TYPE(symp->st_info) != STT_OBJECT) {
+ (void) ctf_set_errno(fp, ECTF_NOTDATA);
+ return (NULL);
+ }
+ if (symp->st_name == 0) {
+ (void) ctf_set_errno(fp, ENOENT);
+ return (NULL);
+ }
+ name = (const char *)(strbase + symp->st_name);
+ }
+
+ (void) strlcpy(buf, name, len);
+
+ return (buf);
+}
+
+int
+ctf_string_iter(ctf_file_t *fp, ctf_string_f *func, void *arg)
+{
+ int rc;
+ const char *strp = fp->ctf_str[CTF_STRTAB_0].cts_strs;
+ size_t strl = fp->ctf_str[CTF_STRTAB_0].cts_len;
+
+ while (strl > 0) {
+ size_t len;
+
+ if ((rc = func(strp, arg)) != 0)
+ return (rc);
+
+ len = strlen(strp) + 1;
+ strl -= len;
+ strp += len;
+ }
+
+ return (0);
+}
+
+/*
+ * fp isn't strictly necesasry at the moment. However, if we ever rev the file
+ * format, the valid values for kind will change.
+ */
+const char *
+ctf_kind_name(ctf_file_t *fp, int kind)
+{
+ switch (kind) {
+ case CTF_K_INTEGER:
+ return ("integer");
+ case CTF_K_FLOAT:
+ return ("float");
+ case CTF_K_POINTER:
+ return ("pointer");
+ case CTF_K_ARRAY:
+ return ("array");
+ case CTF_K_FUNCTION:
+ return ("function");
+ case CTF_K_STRUCT:
+ return ("struct");
+ case CTF_K_UNION:
+ return ("union");
+ case CTF_K_ENUM:
+ return ("enum");
+ case CTF_K_FORWARD:
+ return ("forward");
+ case CTF_K_TYPEDEF:
+ return ("typedef");
+ case CTF_K_VOLATILE:
+ return ("volatile");
+ case CTF_K_CONST:
+ return ("const");
+ case CTF_K_RESTRICT:
+ return ("restrict");
+ case CTF_K_UNKNOWN:
+ default:
+ return ("unknown");
+ }
+}
diff --git a/usr/src/common/ctf/ctf_util.c b/usr/src/common/ctf/ctf_util.c
index 740d403e8c..550195b5e1 100644
--- a/usr/src/common/ctf/ctf_util.c
+++ b/usr/src/common/ctf/ctf_util.c
@@ -23,10 +23,12 @@
* Copyright 2005 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2015, Joyent, Inc.
+ */
#include <ctf_impl.h>
+#include <sys/debug.h>
/*
* Simple doubly-linked list append routine. This implementation assumes that
@@ -71,6 +73,24 @@ ctf_list_prepend(ctf_list_t *lp, void *new)
lp->l_prev = p;
}
+void
+ctf_list_insert_before(ctf_list_t *head, void *item, void *nitem)
+{
+ ctf_list_t *lp = item;
+ ctf_list_t *new = nitem;
+ ctf_list_t *prev = lp->l_prev;
+
+ lp->l_prev = new;
+ new->l_next = lp;
+ new->l_prev = prev;
+ if (prev != NULL) {
+ prev->l_next = new;
+ } else {
+ ASSERT(head->l_next == lp);
+ head->l_next = new;
+ }
+}
+
/*
* Delete the specified existing element from the given ctf_list_t. The
* existing pointer should be pointing at a struct with embedded ctf_list_t.
@@ -150,3 +170,22 @@ ctf_set_errno(ctf_file_t *fp, int err)
fp->ctf_errno = err;
return (CTF_ERR);
}
+
+boolean_t
+ctf_sym_valid(uintptr_t strbase, int type, uint16_t shndx, uint64_t val,
+ uint32_t noff)
+{
+ const char *name;
+
+ if (type != STT_OBJECT && type != STT_FUNC)
+ return (B_FALSE);
+ if (shndx == SHN_UNDEF || noff == 0)
+ return (B_FALSE);
+ if (type == STT_OBJECT && shndx == SHN_ABS && val == 0)
+ return (B_FALSE);
+ name = (char *)(strbase + noff);
+ if (strcmp(name, "_START_") == 0 || strcmp(name, "_END_") == 0)
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
diff --git a/usr/src/common/dis/i386/dis_tables.c b/usr/src/common/dis/i386/dis_tables.c
index 9e63143ef6..c21c392d77 100644
--- a/usr/src/common/dis/i386/dis_tables.c
+++ b/usr/src/common/dis/i386/dis_tables.c
@@ -237,7 +237,8 @@ enum {
VMxo, /* VMx instruction with optional prefix */
SVM, /* AMD SVM instructions */
BLS, /* BLSR, BLSMSK, BLSI */
- FMA /* FMA instructions, all VEX_RMrX */
+ FMA, /* FMA instructions, all VEX_RMrX */
+ ADX /* ADX instructions, support REX.w, mod_rm->mod_reg */
};
/*
@@ -569,7 +570,7 @@ const instable_t dis_op0FC7[8] = {
const instable_t dis_op0FC7m3[8] = {
/* [0] */ INVALID, INVALID, INVALID, INVALID,
-/* [4] */ INVALID, INVALID, TNS("rdrand",MG9), INVALID,
+/* [4] */ INVALID, INVALID, TNS("rdrand",MG9), TNS("rdseed", MG9),
};
/*
@@ -1405,6 +1406,15 @@ const instable_t dis_op0F38F1[2] = {
TS("movbe",MOVBE),
};
+/*
+ * The following table is used to distinguish between adox and adcx which share
+ * the same opcodes.
+ */
+const instable_t dis_op0F38F6[2] = {
+/* [00] */ TNS("adcx",ADX),
+ TNS("adox",ADX),
+};
+
const instable_t dis_op0F38[256] = {
/* [00] */ TNSZ("pshufb",XMM_66o,16),TNSZ("phaddw",XMM_66o,16),TNSZ("phaddd",XMM_66o,16),TNSZ("phaddsw",XMM_66o,16),
/* [04] */ TNSZ("pmaddubsw",XMM_66o,16),TNSZ("phsubw",XMM_66o,16), TNSZ("phsubd",XMM_66o,16),TNSZ("phsubsw",XMM_66o,16),
@@ -1481,7 +1491,7 @@ const instable_t dis_op0F38[256] = {
/* [E8] */ INVALID, INVALID, INVALID, INVALID,
/* [EC] */ INVALID, INVALID, INVALID, INVALID,
/* [F0] */ IND(dis_op0F38F0), IND(dis_op0F38F1), INVALID, INVALID,
-/* [F4] */ INVALID, INVALID, INVALID, INVALID,
+/* [F4] */ INVALID, INVALID, IND(dis_op0F38F6), INVALID,
/* [F8] */ INVALID, INVALID, INVALID, INVALID,
/* [FC] */ INVALID, INVALID, INVALID, INVALID,
};
@@ -1532,8 +1542,6 @@ const instable_t dis_opAVX660F38[256] = {
/* [88] */ INVALID, INVALID, INVALID, INVALID,
/* [8C] */ TSaZ("vpmaskmov",VEX_RMrX,16),INVALID, TSaZ("vpmaskmov",VEX_RRM,16),INVALID,
-/* XXX All of the gather things are a bit wrong. We're not properly changing the last character */
-
/* [90] */ TNSZ("vpgatherd",VEX_SbVM,16),TNSZ("vpgatherq",VEX_SbVM,16),TNSZ("vgatherdp",VEX_SbVM,16),TNSZ("vgatherqp",VEX_SbVM,16),
/* [94] */ INVALID, INVALID, TNSZ("vfmaddsub132p",FMA,16),TNSZ("vfmsubadd132p",FMA,16),
/* [98] */ TNSZ("vfmadd132p",FMA,16),TNSZ("vfmadd132s",FMA,16),TNSZ("vfmsub132p",FMA,16),TNSZ("vfmsub132s",FMA,16),
@@ -1734,6 +1742,15 @@ const instable_t dis_opAVX660F3A[256] = {
};
/*
+ * Decode table for 0x0F0D which uses the first byte of the mod_rm to
+ * indicate a sub-code.
+ */
+const instable_t dis_op0F0D[8] = {
+/* [00] */ INVALID, TNS("prefetchw",PREF), TNS("prefetchwt1",PREF),INVALID,
+/* [04] */ INVALID, INVALID, INVALID, INVALID,
+};
+
+/*
* Decode table for 0x0F opcodes
*/
@@ -1742,7 +1759,7 @@ const instable_t dis_op0F[16][16] = {
/* [00] */ IND(dis_op0F00), IND(dis_op0F01), TNS("lar",MR), TNS("lsl",MR),
/* [04] */ INVALID, TNS("syscall",NORM), TNS("clts",NORM), TNS("sysret",NORM),
/* [08] */ TNS("invd",NORM), TNS("wbinvd",NORM), INVALID, TNS("ud2",NORM),
-/* [0C] */ INVALID, INVALID, INVALID, INVALID,
+/* [0C] */ INVALID, IND(dis_op0F0D), INVALID, INVALID,
}, {
/* [10] */ TNSZ("movups",XMMO,16), TNSZ("movups",XMMOS,16),TNSZ("movlps",XMMO,8), TNSZ("movlps",XMMOS,8),
/* [14] */ TNSZ("unpcklps",XMMO,16),TNSZ("unpckhps",XMMO,16),TNSZ("movhps",XMMOM,8),TNSZ("movhps",XMMOMS,8),
@@ -3310,11 +3327,49 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode)
dp++;
}
}
+
+ /*
+ * The adx family of instructions (adcx and adox)
+ * continue the classic Intel tradition of abusing
+ * arbitrary prefixes without actually meaning the
+ * prefix bit. Therefore, if we find either the
+ * opnd_size_prefix or rep_prefix we end up zeroing it
+ * out after making our determination so as to ensure
+ * that we don't get confused and accidentally print
+ * repz prefixes and the like on these instructions.
+ *
+ * In addition, these instructions are actually much
+ * closer to AVX instructions in semantics. Importantly,
+ * they always default to having 32-bit operands.
+ * However, if the CPU is in 64-bit mode, then and only
+ * then, does it use REX.w promotes things to 64-bits
+ * and REX.r allows 64-bit mode to use register r8-r15.
+ */
+ if (dp->it_indirect == (instable_t *)dis_op0F38F6) {
+ dp = dp->it_indirect;
+ if (opnd_size_prefix == 0 &&
+ rep_prefix == 0xf3) {
+ /* It is adox */
+ dp++;
+ } else if (opnd_size_prefix != 0x66 &&
+ rep_prefix != 0) {
+ /* It isn't adcx */
+ goto error;
+ }
+ opnd_size_prefix = 0;
+ rep_prefix = 0;
+ opnd_size = SIZE32;
+ if (rex_prefix & REX_W)
+ opnd_size = SIZE64;
+ }
+
#ifdef DIS_TEXT
if (strcmp(dp->it_name, "INVALID") == 0)
goto error;
#endif
switch (dp->it_adrmode) {
+ case ADX:
+ break;
case RM_66r:
case XMM_66r:
case XMMM_66r:
@@ -3719,6 +3774,7 @@ dtrace_disx86(dis86_t *x, uint_t cpu_mode)
/* memory or register operand to register, with 'w' bit */
case MRw:
+ case ADX:
wbit = WBIT(opcode2);
STANDARD_MODRM(x, mode, reg, r_m, rex_prefix, wbit, 0);
break;
@@ -4018,6 +4074,18 @@ just_mem:
#endif
NOMEM;
break;
+ } else if (r_m == 2) {
+#ifdef DIS_TEXT
+ (void) strncpy(x->d86_mnem, "clac", OPLEN);
+#endif
+ NOMEM;
+ break;
+ } else if (r_m == 3) {
+#ifdef DIS_TEXT
+ (void) strncpy(x->d86_mnem, "stac", OPLEN);
+#endif
+ NOMEM;
+ break;
} else {
goto error;
}
diff --git a/usr/src/common/elfcap/elfcap.c b/usr/src/common/elfcap/elfcap.c
index 25cef1e307..d63db8e981 100644
--- a/usr/src/common/elfcap/elfcap.c
+++ b/usr/src/common/elfcap/elfcap.c
@@ -332,6 +332,14 @@ static const elfcap_desc_t hw2_386[ELFCAP_NUM_HW2_386] = {
AV_386_2_AVX2, STRDESC("AV_386_2_AVX2"),
STRDESC("AVX2"), STRDESC("avx2"),
},
+ { /* 0x00000040 */
+ AV_386_2_ADX, STRDESC("AV_386_2_ADX"),
+ STRDESC("ADX"), STRDESC("adx"),
+ },
+ { /* 0x00000080 */
+ AV_386_2_RDSEED, STRDESC("AV_386_2_RDSEED"),
+ STRDESC("RDSEED"), STRDESC("rdseed"),
+ }
};
/*
diff --git a/usr/src/common/elfcap/elfcap.h b/usr/src/common/elfcap/elfcap.h
index fc2101ac87..f3e29a6a97 100644
--- a/usr/src/common/elfcap/elfcap.h
+++ b/usr/src/common/elfcap/elfcap.h
@@ -115,7 +115,7 @@ typedef enum {
#define ELFCAP_NUM_SF1 3
#define ELFCAP_NUM_HW1_SPARC 17
#define ELFCAP_NUM_HW1_386 32
-#define ELFCAP_NUM_HW2_386 6
+#define ELFCAP_NUM_HW2_386 8
/*
diff --git a/usr/src/uts/common/os/id_space.c b/usr/src/common/idspace/id_space.c
index 2dad0cb940..7d28a8f533 100644
--- a/usr/src/uts/common/os/id_space.c
+++ b/usr/src/common/idspace/id_space.c
@@ -53,6 +53,10 @@
* reservation, in which an ID is allocated but placed in a internal
* dictionary for later use, should be added when a consuming subsystem
* arrives.)
+ *
+ * This code is also shared with userland. In userland, we don't have the same
+ * ability to have sleeping variants, so we effectively turn the normal
+ * versions without _nosleep into _nosleep.
*/
#define ID_TO_ADDR(id) ((void *)(uintptr_t)(id + 1))
@@ -60,16 +64,22 @@
/*
* Create an arena to represent the range [low, high).
- * Caller must be in a context in which VM_SLEEP is legal.
+ * Caller must be in a context in which VM_SLEEP is legal,
+ * for the kernel. Always VM_NOSLEEP in userland.
*/
id_space_t *
id_space_create(const char *name, id_t low, id_t high)
{
+#ifdef _KERNEL
+ int flag = VM_SLEEP;
+#else
+ int flag = VM_NOSLEEP;
+#endif
ASSERT(low >= 0);
ASSERT(low < high);
return (vmem_create(name, ID_TO_ADDR(low), high - low, 1,
- NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER));
+ NULL, NULL, NULL, 0, flag | VMC_IDENTIFIER));
}
/*
@@ -85,7 +95,12 @@ id_space_destroy(id_space_t *isp)
void
id_space_extend(id_space_t *isp, id_t low, id_t high)
{
- (void) vmem_add(isp, ID_TO_ADDR(low), high - low, VM_SLEEP);
+#ifdef _KERNEL
+ int flag = VM_SLEEP;
+#else
+ int flag = VM_NOSLEEP;
+#endif
+ (void) vmem_add(isp, ID_TO_ADDR(low), high - low, flag);
}
/*
@@ -95,7 +110,12 @@ id_space_extend(id_space_t *isp, id_t low, id_t high)
id_t
id_alloc(id_space_t *isp)
{
- return (ADDR_TO_ID(vmem_alloc(isp, 1, VM_SLEEP | VM_NEXTFIT)));
+#ifdef _KERNEL
+ int flag = VM_SLEEP;
+#else
+ int flag = VM_NOSLEEP;
+#endif
+ return (ADDR_TO_ID(vmem_alloc(isp, 1, flag | VM_NEXTFIT)));
}
/*
@@ -116,7 +136,12 @@ id_alloc_nosleep(id_space_t *isp)
id_t
id_allocff(id_space_t *isp)
{
- return (ADDR_TO_ID(vmem_alloc(isp, 1, VM_SLEEP | VM_FIRSTFIT)));
+#ifdef _KERNEL
+ int flag = VM_SLEEP;
+#else
+ int flag = VM_NOSLEEP;
+#endif
+ return (ADDR_TO_ID(vmem_alloc(isp, 1, flag | VM_FIRSTFIT)));
}
/*
diff --git a/usr/src/common/inet/inet_hash.c b/usr/src/common/inet/inet_hash.c
new file mode 100644
index 0000000000..3a511fe588
--- /dev/null
+++ b/usr/src/common/inet/inet_hash.c
@@ -0,0 +1,359 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
+ */
+
+/*
+ * Common routines usable by any part of the networking stack for hashing
+ * packets. The hashing logic originally was part of MAC, but it has more
+ * utility being usable by the rest of the broader system.
+ */
+
+#include <sys/types.h>
+#include <sys/mac.h>
+#include <sys/strsubr.h>
+#include <sys/strsun.h>
+#include <sys/vlan.h>
+#include <inet/ip.h>
+#include <inet/ip_impl.h>
+#include <inet/ip6.h>
+#include <sys/dlpi.h>
+#include <sys/sunndi.h>
+#include <inet/ipsec_impl.h>
+#include <inet/sadb.h>
+#include <inet/ipsecesp.h>
+#include <inet/ipsecah.h>
+#include <inet/inet_hash.h>
+
+/*
+ * Determines the IPv6 header length accounting for all the optional IPv6
+ * headers (hop-by-hop, destination, routing and fragment). The header length
+ * and next header value (a transport header) is captured.
+ *
+ * Returns B_FALSE if all the IP headers are not in the same mblk otherwise
+ * returns B_TRUE.
+ */
+static boolean_t
+inet_pkthash_ip_hdr_length_v6(ip6_t *ip6h, uint8_t *endptr,
+ uint16_t *hdr_length, uint8_t *next_hdr, ip6_frag_t **fragp)
+{
+ uint16_t length;
+ uint_t ehdrlen;
+ uint8_t *whereptr;
+ uint8_t *nexthdrp;
+ ip6_dest_t *desthdr;
+ ip6_rthdr_t *rthdr;
+ ip6_frag_t *fraghdr;
+
+ if (((uchar_t *)ip6h + IPV6_HDR_LEN) > endptr)
+ return (B_FALSE);
+ ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
+ length = IPV6_HDR_LEN;
+ whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
+
+ if (fragp != NULL)
+ *fragp = NULL;
+
+ nexthdrp = &ip6h->ip6_nxt;
+ while (whereptr < endptr) {
+ /* Is there enough left for len + nexthdr? */
+ if (whereptr + MIN_EHDR_LEN > endptr)
+ break;
+
+ switch (*nexthdrp) {
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_DSTOPTS:
+ /* Assumes the headers are identical for hbh and dst */
+ desthdr = (ip6_dest_t *)whereptr;
+ ehdrlen = 8 * (desthdr->ip6d_len + 1);
+ if ((uchar_t *)desthdr + ehdrlen > endptr)
+ return (B_FALSE);
+ nexthdrp = &desthdr->ip6d_nxt;
+ break;
+ case IPPROTO_ROUTING:
+ rthdr = (ip6_rthdr_t *)whereptr;
+ ehdrlen = 8 * (rthdr->ip6r_len + 1);
+ if ((uchar_t *)rthdr + ehdrlen > endptr)
+ return (B_FALSE);
+ nexthdrp = &rthdr->ip6r_nxt;
+ break;
+ case IPPROTO_FRAGMENT:
+ fraghdr = (ip6_frag_t *)whereptr;
+ ehdrlen = sizeof (ip6_frag_t);
+ if ((uchar_t *)&fraghdr[1] > endptr)
+ return (B_FALSE);
+ nexthdrp = &fraghdr->ip6f_nxt;
+ if (fragp != NULL)
+ *fragp = fraghdr;
+ break;
+ case IPPROTO_NONE:
+ /* No next header means we're finished */
+ default:
+ *hdr_length = length;
+ *next_hdr = *nexthdrp;
+ return (B_TRUE);
+ }
+ length += ehdrlen;
+ whereptr += ehdrlen;
+ *hdr_length = length;
+ *next_hdr = *nexthdrp;
+ }
+ switch (*nexthdrp) {
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_DSTOPTS:
+ case IPPROTO_ROUTING:
+ case IPPROTO_FRAGMENT:
+ /*
+ * If any known extension headers are still to be processed,
+ * the packet's malformed (or at least all the IP header(s) are
+ * not in the same mblk - and that should never happen.
+ */
+ return (B_FALSE);
+
+ default:
+ /*
+ * If we get here, we know that all of the IP headers were in
+ * the same mblk, even if the ULP header is in the next mblk.
+ */
+ *hdr_length = length;
+ *next_hdr = *nexthdrp;
+ return (B_TRUE);
+ }
+}
+
+#define PKT_HASH_2BYTES(x) ((x)[0] ^ (x)[1])
+#define PKT_HASH_4BYTES(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3])
+#define PKT_HASH_MAC(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3] ^ (x)[4] ^ (x)[5])
+uint64_t
+inet_pkt_hash(uint_t media, mblk_t *mp, uint8_t policy)
+{
+ struct ether_header *ehp;
+ uint64_t hash = 0;
+ uint16_t sap;
+ uint_t skip_len;
+ uint8_t proto;
+ boolean_t ip_fragmented;
+
+ /*
+ * We may want to have one of these per MAC type plugin in the
+ * future. For now supports only ethernet.
+ */
+ if (media != DL_ETHER)
+ return (0L);
+
+ /* for now we support only outbound packets */
+ ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)));
+ ASSERT(MBLKL(mp) >= sizeof (struct ether_header));
+
+ /* compute L2 hash */
+
+ ehp = (struct ether_header *)mp->b_rptr;
+
+ if ((policy & INET_PKT_HASH_L2) != 0) {
+ uchar_t *mac_src = ehp->ether_shost.ether_addr_octet;
+ uchar_t *mac_dst = ehp->ether_dhost.ether_addr_octet;
+ hash = PKT_HASH_MAC(mac_src) ^ PKT_HASH_MAC(mac_dst);
+ policy &= ~INET_PKT_HASH_L2;
+ }
+
+ if (policy == 0)
+ goto done;
+
+ /* skip ethernet header */
+
+ sap = ntohs(ehp->ether_type);
+ if (sap == ETHERTYPE_VLAN) {
+ struct ether_vlan_header *evhp;
+ mblk_t *newmp = NULL;
+
+ skip_len = sizeof (struct ether_vlan_header);
+ if (MBLKL(mp) < skip_len) {
+ /* the vlan tag is the payload, pull up first */
+ newmp = msgpullup(mp, -1);
+ if ((newmp == NULL) || (MBLKL(newmp) < skip_len)) {
+ goto done;
+ }
+ evhp = (struct ether_vlan_header *)newmp->b_rptr;
+ } else {
+ evhp = (struct ether_vlan_header *)mp->b_rptr;
+ }
+
+ sap = ntohs(evhp->ether_type);
+ freemsg(newmp);
+ } else {
+ skip_len = sizeof (struct ether_header);
+ }
+
+ /* if ethernet header is in its own mblk, skip it */
+ if (MBLKL(mp) <= skip_len) {
+ skip_len -= MBLKL(mp);
+ mp = mp->b_cont;
+ if (mp == NULL)
+ goto done;
+ }
+
+ sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap;
+
+ /* compute IP src/dst addresses hash and skip IPv{4,6} header */
+
+ switch (sap) {
+ case ETHERTYPE_IP: {
+ ipha_t *iphp;
+
+ /*
+ * If the header is not aligned or the header doesn't fit
+ * in the mblk, bail now. Note that this may cause packet
+ * reordering.
+ */
+ iphp = (ipha_t *)(mp->b_rptr + skip_len);
+ if (((unsigned char *)iphp + sizeof (ipha_t) > mp->b_wptr) ||
+ !OK_32PTR((char *)iphp))
+ goto done;
+
+ proto = iphp->ipha_protocol;
+ skip_len += IPH_HDR_LENGTH(iphp);
+
+ /* Check if the packet is fragmented. */
+ ip_fragmented = ntohs(iphp->ipha_fragment_offset_and_flags) &
+ IPH_OFFSET;
+
+ /*
+ * For fragmented packets, use addresses in addition to
+ * the frag_id to generate the hash inorder to get
+ * better distribution.
+ */
+ if (ip_fragmented || (policy & INET_PKT_HASH_L3) != 0) {
+ uint8_t *ip_src = (uint8_t *)&(iphp->ipha_src);
+ uint8_t *ip_dst = (uint8_t *)&(iphp->ipha_dst);
+
+ hash ^= (PKT_HASH_4BYTES(ip_src) ^
+ PKT_HASH_4BYTES(ip_dst));
+ policy &= ~INET_PKT_HASH_L3;
+ }
+
+ if (ip_fragmented) {
+ uint8_t *identp = (uint8_t *)&iphp->ipha_ident;
+ hash ^= PKT_HASH_2BYTES(identp);
+ goto done;
+ }
+ break;
+ }
+ case ETHERTYPE_IPV6: {
+ ip6_t *ip6hp;
+ ip6_frag_t *frag = NULL;
+ uint16_t hdr_length;
+
+ /*
+ * If the header is not aligned or the header doesn't fit
+ * in the mblk, bail now. Note that this may cause packets
+ * reordering.
+ */
+
+ ip6hp = (ip6_t *)(mp->b_rptr + skip_len);
+ if (((unsigned char *)ip6hp + IPV6_HDR_LEN > mp->b_wptr) ||
+ !OK_32PTR((char *)ip6hp))
+ goto done;
+
+ if (!inet_pkthash_ip_hdr_length_v6(ip6hp, mp->b_wptr,
+ &hdr_length, &proto, &frag))
+ goto done;
+ skip_len += hdr_length;
+
+ /*
+ * For fragmented packets, use addresses in addition to
+ * the frag_id to generate the hash inorder to get
+ * better distribution.
+ */
+ if (frag != NULL || (policy & INET_PKT_HASH_L3) != 0) {
+ uint8_t *ip_src = &(ip6hp->ip6_src.s6_addr8[12]);
+ uint8_t *ip_dst = &(ip6hp->ip6_dst.s6_addr8[12]);
+
+ hash ^= (PKT_HASH_4BYTES(ip_src) ^
+ PKT_HASH_4BYTES(ip_dst));
+ policy &= ~INET_PKT_HASH_L3;
+ }
+
+ if (frag != NULL) {
+ uint8_t *identp = (uint8_t *)&frag->ip6f_ident;
+ hash ^= PKT_HASH_4BYTES(identp);
+ goto done;
+ }
+ break;
+ }
+ default:
+ goto done;
+ }
+
+ if (policy == 0)
+ goto done;
+
+ /* if ip header is in its own mblk, skip it */
+ if (MBLKL(mp) <= skip_len) {
+ skip_len -= MBLKL(mp);
+ mp = mp->b_cont;
+ if (mp == NULL)
+ goto done;
+ }
+
+ /* parse ULP header */
+again:
+ switch (proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_ESP:
+ case IPPROTO_SCTP:
+ /*
+ * These Internet Protocols are intentionally designed
+ * for hashing from the git-go. Port numbers are in the first
+ * word for transports, SPI is first for ESP.
+ */
+ if (mp->b_rptr + skip_len + 4 > mp->b_wptr)
+ goto done;
+ hash ^= PKT_HASH_4BYTES((mp->b_rptr + skip_len));
+ break;
+
+ case IPPROTO_AH: {
+ ah_t *ah = (ah_t *)(mp->b_rptr + skip_len);
+ uint_t ah_length = AH_TOTAL_LEN(ah);
+
+ if ((unsigned char *)ah + sizeof (ah_t) > mp->b_wptr)
+ goto done;
+
+ proto = ah->ah_nexthdr;
+ skip_len += ah_length;
+
+ /* if AH header is in its own mblk, skip it */
+ if (MBLKL(mp) <= skip_len) {
+ skip_len -= MBLKL(mp);
+ mp = mp->b_cont;
+ if (mp == NULL)
+ goto done;
+ }
+
+ goto again;
+ }
+ }
+
+done:
+ return (hash);
+}
diff --git a/usr/src/common/zfs/zfs_prop.c b/usr/src/common/zfs/zfs_prop.c
index b1413aa596..68b2bba2ef 100644
--- a/usr/src/common/zfs/zfs_prop.c
+++ b/usr/src/common/zfs/zfs_prop.c
@@ -460,6 +460,23 @@ zfs_prop_delegatable(zfs_prop_t prop)
return (pd->pd_attr != PROP_READONLY);
}
+boolean_t
+zfs_prop_cacheable(zfs_prop_t prop)
+{
+ /*
+ * It'd be nice if each prop had a flags field which could have flag
+ * like PROP_CACHEABLE, but since zprop_attr_t is an enum and this
+ * setting is orthogonal to the concepts of PROP_READONLY, etc., we have
+ * this function.
+ */
+ return (prop == ZFS_PROP_VERSION ||
+ prop == ZFS_PROP_NORMALIZE ||
+ prop == ZFS_PROP_UTF8ONLY ||
+ prop == ZFS_PROP_CASE ||
+ prop == ZFS_PROP_VOLSIZE ||
+ prop == ZFS_PROP_VOLBLOCKSIZE);
+}
+
/*
* Given a zfs dataset property name, returns the corresponding property ID.
*/
diff --git a/usr/src/common/zfs/zfs_prop.h b/usr/src/common/zfs/zfs_prop.h
index a63262311b..1796642c68 100644
--- a/usr/src/common/zfs/zfs_prop.h
+++ b/usr/src/common/zfs/zfs_prop.h
@@ -21,6 +21,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#ifndef _ZFS_PROP_H
@@ -86,6 +87,7 @@ typedef struct {
void zfs_prop_init(void);
zprop_type_t zfs_prop_get_type(zfs_prop_t);
boolean_t zfs_prop_delegatable(zfs_prop_t prop);
+boolean_t zfs_prop_cacheable(zfs_prop_t prop);
zprop_desc_t *zfs_prop_get_table(void);
/*
diff --git a/usr/src/grub/Makefile b/usr/src/grub/Makefile
index 28c2eca2ff..c31e9a802c 100644
--- a/usr/src/grub/Makefile
+++ b/usr/src/grub/Makefile
@@ -29,6 +29,7 @@ INST_TARGETS += $(ROOT_BOOT_GRUB)/$(GRUB_MENU)
INST_TARGETS += $(ROOT_BOOT_GRUB)/$(INSTALL_MENU)
INST_TARGETS += $(ROOT_BOOT_GRUB)/$(GRUB_DEFAULT)
INST_TARGETS += $(ROOT_BOOT_GRUB)/$(CAPABILITY)
+INST_TARGETS += $(ROOT_USR_SBIN)/grub
$(ROOT_BOOT_GRUB)/$(GRUB_DEFAULT) := FILEMODE = 444
$(ROOT_BOOT_GRUB)/$(CAPABILITY) := FILEMODE = 444
@@ -45,9 +46,14 @@ $(GRUB): FRC
$(ROOT_BOOT_GRUB)/%: $(ROOT_BOOT_GRUB) %
$(INS.file)
+$(ROOT_USR_SBIN)/%: $(GRUB)/grub/grub $(ROOT_USR_SBIN)
+ $(INS.file)
+
$(ROOT_BOOT_GRUB):
$(INS.dir)
+$(ROOT_USR_SBIN):
+ $(INS.dir)
clean clobber: $(SUBDIRS)
diff --git a/usr/src/grub/Makefile.grub b/usr/src/grub/Makefile.grub
index 18354324ae..99942fa2ed 100644
--- a/usr/src/grub/Makefile.grub
+++ b/usr/src/grub/Makefile.grub
@@ -10,3 +10,4 @@ PLATFORM = i86pc
ROOT_BOOT_GRUB = $(ROOT)/boot/grub
ROOT_PLAT_GRUB = $(ROOT)/platform/$(PLATFORM)/boot/grub
ROOT_SRC = $(ROOT)/usr/share/src/grub
+ROOT_USR_SBIN = $(ROOT)/usr/sbin
diff --git a/usr/src/grub/grub-0.97/stage2/boot.c b/usr/src/grub/grub-0.97/stage2/boot.c
index cfc2336a4c..027de7709b 100644
--- a/usr/src/grub/grub-0.97/stage2/boot.c
+++ b/usr/src/grub/grub-0.97/stage2/boot.c
@@ -25,6 +25,8 @@
#include "imgact_aout.h"
#include "i386-elf.h"
+#define SAFE_LOAD_BASE 0xc800000
+
static int cur_addr;
entry_func entry_addr;
static struct mod_list mll[99];
@@ -773,6 +775,17 @@ load_module (char *module, char *arg)
{
int len;
+ /*
+ * XXX Workaround for RICHMOND-16: on some systems, the region
+ * [c700000, c800000) is corrupted by an unknown external (off-CPU) actor(s)
+ * during boot. To be on the safe side, we will simply ensure that every
+ * module is loaded above this region. Note that this means this particular
+ * boot loader supports only systems with at least 200 MB of DRAM plus the
+ * amount of space used by any modules.
+ */
+ if (cur_addr < SAFE_LOAD_BASE)
+ cur_addr = SAFE_LOAD_BASE;
+
/* if we are supposed to load on 4K boundaries */
cur_addr = (cur_addr + 0xFFF) & 0xFFFFF000;
diff --git a/usr/src/grub/grub-0.97/stage2/cmdline.c b/usr/src/grub/grub-0.97/stage2/cmdline.c
index 46c5fda027..6d5591e1de 100644
--- a/usr/src/grub/grub-0.97/stage2/cmdline.c
+++ b/usr/src/grub/grub-0.97/stage2/cmdline.c
@@ -212,8 +212,27 @@ run_script (char *script, char *heap)
intervention. */
if (fallback_entryno < 0)
{
- grub_printf ("\nPress any key to continue...");
- (void) getkey ();
+ int time1, time2 = -1;
+
+ grub_printf (
+ "\nRebooting in 2 minutes (press any key to continue)...");
+ grub_timeout = 120;
+
+ /* using RT clock now, need to initialize value */
+ while ((time1 = getrtsecs()) == 0xFF);
+
+ while (grub_timeout >= 0) {
+ if ((time1 = getrtsecs()) != time2 && time1 != 0xFF) {
+ time2 = time1;
+ grub_timeout--;
+ }
+
+ if (checkkey() >= 0)
+ break;
+ }
+
+ grub_printf ("\nresetting...");
+ grub_reboot();
}
return 1;
diff --git a/usr/src/head/Makefile b/usr/src/head/Makefile
index 030a9d51f8..d61732ce36 100644
--- a/usr/src/head/Makefile
+++ b/usr/src/head/Makefile
@@ -156,6 +156,7 @@ HDRS= $($(MACH)_HDRS) $(ATTRDB_HDRS) \
regex.h \
regexp.h \
resolv.h \
+ resolv_joy.h \
rje.h \
rtld_db.h \
sac.h \
@@ -220,6 +221,7 @@ HDRS= $($(MACH)_HDRS) $(ATTRDB_HDRS) \
xlocale.h \
xti.h \
xti_inet.h \
+ zdoor.h \
zone.h
ISOHDRS = \
diff --git a/usr/src/head/libzonecfg.h b/usr/src/head/libzonecfg.h
index d41dbb0520..b519a0cdcf 100644
--- a/usr/src/head/libzonecfg.h
+++ b/usr/src/head/libzonecfg.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#ifndef _LIBZONECFG_H
@@ -42,6 +43,7 @@ extern "C" {
#include <netinet/in.h>
#include <sys/socket.h>
#include <net/if.h>
+#include <sys/mac.h>
#include <stdio.h>
#include <rctl.h>
#include <zone.h>
@@ -118,6 +120,8 @@ extern "C" {
#define ZONE_STATE_MAXSTRLEN 14
+#define ZONE_PROP_MAXSTRLEN 1024
+
#define LIBZONECFG_PATH "libzonecfg.so.1"
#define ZONE_CONFIG_ROOT "/etc/zones"
@@ -127,6 +131,8 @@ extern "C" {
#define MAXAUTHS 4096
#define ZONE_MGMT_PROF "Zone Management"
+#define ZONE_INT32SZ 11 /* string to hold 32bit int. */
+
/* Owner, group, and mode (defined by packaging) for the config directory */
#define ZONE_CONFIG_UID 0 /* root */
#define ZONE_CONFIG_GID 3 /* sys */
@@ -150,9 +156,11 @@ extern "C" {
#define ALIAS_MAXSEMIDS "max-sem-ids"
#define ALIAS_MAXLOCKEDMEM "locked"
#define ALIAS_MAXSWAP "swap"
+#define ALIAS_MAXPHYSMEM "physical"
#define ALIAS_SHARES "cpu-shares"
#define ALIAS_CPUCAP "cpu-cap"
#define ALIAS_MAXPROCS "max-processes"
+#define ALIAS_ZFSPRI "zfs-io-priority"
/* Default name for zone detached manifest */
#define ZONE_DETACHED "SUNWdetached.xml"
@@ -162,6 +170,11 @@ extern "C" {
*/
#define ZONE_DRY_RUN 0x01
+typedef enum zone_iptype {
+ ZS_SHARED,
+ ZS_EXCLUSIVE
+} zone_iptype_t;
+
/*
* The integer field expresses the current values on a get.
* On a put, it represents the new values if >= 0 or "don't change" if < 0.
@@ -172,6 +185,9 @@ struct zoneent {
char zone_path[MAXPATHLEN]; /* path to zone storage */
uuid_t zone_uuid; /* unique ID for zone */
char zone_newname[ZONENAME_MAX]; /* for doing renames */
+ char zone_brand[MAXNAMELEN]; /* zone's brand */
+ zone_iptype_t zone_iptype; /* zone's IP type */
+ zoneid_t zone_did; /* persistent debug ID */
};
typedef struct zone_dochandle *zone_dochandle_t; /* opaque handle */
@@ -191,15 +207,30 @@ struct zone_fstab {
char zone_fs_raw[MAXPATHLEN]; /* device to fsck */
};
+/*
+ * Generic resource attribute list.
+ * Key/value resource that can be attached to net or device.
+ */
+struct zone_res_attrtab {
+ char zone_res_attr_name[MAXNAMELEN];
+ char zone_res_attr_value[ZONE_PROP_MAXSTRLEN];
+ struct zone_res_attrtab *zone_res_attr_next;
+};
+
struct zone_nwiftab {
char zone_nwif_address[INET6_ADDRSTRLEN]; /* shared-ip only */
char zone_nwif_allowed_address[INET6_ADDRSTRLEN]; /* excl-ip only */
char zone_nwif_physical[LIFNAMSIZ];
+ char zone_nwif_mac[MAXMACADDRLEN]; /* excl-ip only */
+ char zone_nwif_vlan_id[ZONE_INT32SZ]; /* excl-ip only */
+ char zone_nwif_gnic[LIFNAMSIZ]; /* excl-ip only */
char zone_nwif_defrouter[INET6_ADDRSTRLEN];
+ struct zone_res_attrtab *zone_nwif_attrp;
};
struct zone_devtab {
char zone_dev_match[MAXPATHLEN];
+ struct zone_res_attrtab *zone_dev_attrp;
};
struct zone_rctlvaltab {
@@ -230,10 +261,6 @@ struct zone_psettab {
char zone_importance[MAXNAMELEN];
};
-struct zone_mcaptab {
- char zone_physmem_cap[MAXNAMELEN];
-};
-
struct zone_pkgtab {
char zone_pkg_name[MAXNAMELEN];
char zone_pkg_version[ZONE_PKG_VERSMAX];
@@ -264,11 +291,6 @@ typedef struct {
char *zpe_vers;
} zone_pkg_entry_t;
-typedef enum zone_iptype {
- ZS_SHARED,
- ZS_EXCLUSIVE
-} zone_iptype_t;
-
/*
* Basic configuration management routines.
*/
@@ -296,6 +318,7 @@ extern boolean_t zonecfg_valid_importance(char *);
extern int zonecfg_str_to_bytes(char *, uint64_t *);
extern boolean_t zonecfg_valid_memlimit(char *, uint64_t *);
extern boolean_t zonecfg_valid_alias_limit(char *, char *, uint64_t *);
+extern void zonecfg_notify_create(zone_dochandle_t);
/*
* Zone name, path to zone directory, autoboot setting, pool, boot
@@ -317,6 +340,8 @@ extern int zonecfg_set_bootargs(zone_dochandle_t, char *);
extern int zonecfg_get_sched_class(zone_dochandle_t, char *, size_t);
extern int zonecfg_set_sched(zone_dochandle_t, char *);
extern int zonecfg_get_dflt_sched_class(zone_dochandle_t, char *, int);
+extern zoneid_t zonecfg_get_did(zone_dochandle_t);
+extern void zonecfg_set_did(zone_dochandle_t);
/*
* Set/retrieve the brand for the zone
@@ -341,6 +366,15 @@ extern int zonecfg_find_mounts(char *, int(*)(const struct mnttab *,
void *), void *);
/*
+ * Resource key/value attributes (properties).
+ */
+extern int zonecfg_add_res_attr(struct zone_res_attrtab **,
+ struct zone_res_attrtab *);
+extern void zonecfg_free_res_attr_list(struct zone_res_attrtab *);
+extern int zonecfg_remove_res_attr(struct zone_res_attrtab **,
+ struct zone_res_attrtab *);
+
+/*
* Network interface configuration.
*/
extern int zonecfg_add_nwif(zone_dochandle_t, struct zone_nwiftab *);
@@ -422,13 +456,6 @@ extern int zonecfg_modify_pset(zone_dochandle_t, struct zone_psettab *);
extern int zonecfg_lookup_pset(zone_dochandle_t, struct zone_psettab *);
/*
- * mem-cap configuration.
- */
-extern int zonecfg_delete_mcap(zone_dochandle_t);
-extern int zonecfg_modify_mcap(zone_dochandle_t, struct zone_mcaptab *);
-extern int zonecfg_lookup_mcap(zone_dochandle_t, struct zone_mcaptab *);
-
-/*
* Temporary pool support functions.
*/
extern int zonecfg_destroy_tmp_pool(char *, char *, int);
@@ -485,7 +512,6 @@ extern int zonecfg_setdsent(zone_dochandle_t);
extern int zonecfg_getdsent(zone_dochandle_t, struct zone_dstab *);
extern int zonecfg_enddsent(zone_dochandle_t);
extern int zonecfg_getpsetent(zone_dochandle_t, struct zone_psettab *);
-extern int zonecfg_getmcapent(zone_dochandle_t, struct zone_mcaptab *);
extern int zonecfg_getpkgdata(zone_dochandle_t, uu_avl_pool_t *,
uu_avl_t *);
extern int zonecfg_setdevperment(zone_dochandle_t);
@@ -509,6 +535,7 @@ extern int zonecfg_set_limitpriv(zone_dochandle_t, char *);
* Higher-level routines.
*/
extern int zone_get_brand(char *, char *, size_t);
+extern zoneid_t zone_get_did(char *);
extern int zone_get_rootpath(char *, char *, size_t);
extern int zone_get_devroot(char *, char *, size_t);
extern int zone_get_zonepath(char *, char *, size_t);
@@ -517,7 +544,9 @@ extern int zone_set_state(char *, zone_state_t);
extern char *zone_state_str(zone_state_t);
extern int zonecfg_get_name_by_uuid(const uuid_t, char *, size_t);
extern int zonecfg_get_uuid(const char *, uuid_t);
+extern int zonecfg_set_uuid(const char *, const char *, const char *);
extern int zonecfg_default_brand(char *, size_t);
+extern int zonecfg_fix_obsolete(zone_dochandle_t);
/*
* Iterator for configured zones.
diff --git a/usr/src/head/limits.h b/usr/src/head/limits.h
index a6cba76cf8..17aee19837 100644
--- a/usr/src/head/limits.h
+++ b/usr/src/head/limits.h
@@ -24,6 +24,7 @@
*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1988 AT&T */
@@ -36,6 +37,7 @@
#include <sys/feature_tests.h>
#include <sys/isa_defs.h>
#include <iso/limits_iso.h>
+#include <sys/limits.h>
/*
* Include fixed width type limits as proposed by the ISO/JTC1/SC22/WG14 C
@@ -237,8 +239,6 @@ extern "C" {
#define _XOPEN_NAME_MAX 255 /* max # bytes in filename excluding null */
#define _XOPEN_PATH_MAX 1024 /* max # bytes in a pathname */
-#define IOV_MAX _XOPEN_IOV_MAX
-
#if defined(__EXTENSIONS__) || \
(!defined(_STRICT_STDC) && !defined(__XOPEN_OR_POSIX))
diff --git a/usr/src/head/regexp.h b/usr/src/head/regexp.h
index 5336c81677..e98cd25e16 100644
--- a/usr/src/head/regexp.h
+++ b/usr/src/head/regexp.h
@@ -394,12 +394,12 @@ advance(const char *lp, const char *ep)
/*FALLTHRU*/
case CBRA:
- braslist[*ep++] = (char *)lp;
+ braslist[(int)*ep++] = (char *)lp;
continue;
/*FALLTHRU*/
case CKET:
- braelist[*ep++] = (char *)lp;
+ braelist[(int)*ep++] = (char *)lp;
continue;
/*FALLTHRU*/
@@ -477,8 +477,8 @@ advance(const char *lp, const char *ep)
/*FALLTHRU*/
case CBACK:
- bbeg = braslist[*ep];
- ct = braelist[*ep++] - bbeg;
+ bbeg = braslist[(int)*ep];
+ ct = braelist[(int)*ep++] - bbeg;
if (ecmp(bbeg, lp, ct)) {
lp += ct;
@@ -488,8 +488,8 @@ advance(const char *lp, const char *ep)
/*FALLTHRU*/
case CBACK | STAR:
- bbeg = braslist[*ep];
- ct = braelist[*ep++] - bbeg;
+ bbeg = braslist[(int)*ep];
+ ct = braelist[(int)*ep++] - bbeg;
curlp = lp;
while (ecmp(bbeg, lp, ct))
lp += ct;
diff --git a/usr/src/head/resolv_joy.h b/usr/src/head/resolv_joy.h
new file mode 100644
index 0000000000..262cf6121d
--- /dev/null
+++ b/usr/src/head/resolv_joy.h
@@ -0,0 +1,472 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T
+ * All Rights Reserved
+ *
+ * Portions of this source code were derived from Berkeley
+ * 4.3 BSD under license from the regents of the University of
+ * California.
+ */
+
+/*
+ * BIND 4.9.4:
+ */
+
+/*
+ * Portions Copyright (c) 1993 by Digital Equipment Corporation.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies, and that
+ * the name of Digital Equipment Corporation not be used in advertising or
+ * publicity pertaining to distribution of the document or software without
+ * specific, written prior permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
+ * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ * --Copyright--
+ *
+ * End BIND 4.9.4
+ */
+
+/*
+ * Copyright (c) 1983, 1987, 1989
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Portions Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
+ * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
+ * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+/*
+ * @(#)resolv.h 8.1 (Berkeley) 6/2/93
+ * $Id: resolv.h,v 8.52 2003/04/29 02:27:03 marka Exp $
+ */
+
+#ifndef _RESOLV_JOY_H
+#define _RESOLV_JOY_H
+
+#ifdef _RESOLV_H_
+#error "resolv.h and resolv_joy.h should not be used together"
+#endif
+
+#include <sys/param.h>
+
+#include <stdio.h>
+#include <arpa/nameser.h>
+#include <sys/socket.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Revision information. This is the release date in YYYYMMDD format.
+ * It can change every day so the right thing to do with it is use it
+ * in preprocessor commands such as "#if (__RES > 19931104)". Do not
+ * compare for equality; rather, use it to determine whether your resolver
+ * is new enough to contain a certain feature.
+ */
+
+#define __RES 20090302
+
+#pragma redefine_extname dn_expand joy_dn_expand
+#pragma redefine_extname res_nsearch joy_res_nsearch
+#pragma redefine_extname res_ninit joy_res_ninit
+#pragma redefine_extname res_ndestroy joy_res_ndestroy
+#pragma redefine_extname res_gethostbyaddr joy_res_gethostbyaddr
+#pragma redefine_extname res_gethostbyname joy_res_gethostbyname
+#pragma redefine_extname res_gethostbyname2 joy_res_gethostbyname2
+#pragma redefine_extname res_endhostent joy_res_endhostent
+#pragma redefine_extname res_sethostent joy_res_sethostent
+#pragma redefine_extname __res_override_retry __joy_res_override_retry
+#pragma redefine_extname __res_unset_no_hosts_fallback \
+ __joy_res_unset_no_hosts_fallback
+#pragma redefine_extname __res_set_no_hosts_fallback \
+ __joy_res_set_no_hosts_fallback
+#pragma redefine_extname __h_errno __joy_h_errno
+#pragma redefine_extname __ns_get16 __joy_ns_get16
+#pragma redefine_extname __ns_get32 __joy_ns_get32
+
+
+#define RES_SET_H_ERRNO(r, x) __h_errno_set(r, x)
+struct __res_state; /* forward */
+
+void __h_errno_set(struct __res_state *res, int err);
+
+/*
+ * Resolver configuration file.
+ * Normally not present, but may contain the address of the
+ * initial name server(s) to query and the domain search list.
+ */
+
+#ifndef _PATH_RESCONF
+#define _PATH_RESCONF "/etc/resolv.conf"
+#endif
+
+#ifdef __STDC__
+#ifndef __P
+#define __P(x) x
+#endif
+#else
+#ifndef __P
+#define __P(x) ()
+#endif
+#endif /* __STDC__ */
+
+typedef enum { res_goahead, res_nextns, res_modified, res_done, res_error }
+ res_sendhookact;
+
+typedef res_sendhookact (*res_send_qhook)__P((struct sockaddr * const *ns,
+ const uchar_t **query,
+ int *querylen,
+ uchar_t *ans,
+ int anssiz,
+ int *resplen));
+
+typedef res_sendhookact (*res_send_rhook)__P((const struct sockaddr *ns,
+ const uchar_t *query,
+ int querylen,
+ uchar_t *ans,
+ int anssiz,
+ int *resplen));
+
+struct res_sym {
+ int number; /* Identifying number, like T_MX */
+ const char *name; /* Its symbolic name, like "MX" */
+ const char *humanname; /* Its fun name, like "mail exchanger" */
+};
+
+/*
+ * Global defines and variables for resolver stub.
+ */
+/* ADDRSORT and MAXADDR retained for compatibility; not used */
+#define ADDRSORT 1 /* enable the address-sorting option */
+#define MAXADDR 10 /* max # addresses to sort by */
+
+#define MAXNS 32 /* max # name servers we'll track */
+#define MAXDFLSRCH 3 /* # default domain levels to try */
+#define MAXDNSRCH 6 /* max # domains in search path */
+#define LOCALDOMAINPARTS 2 /* min levels in name that is "local" */
+
+#define RES_TIMEOUT 5 /* min. seconds between retries */
+#define MAXRESOLVSORT 10 /* number of net to sort on */
+#define RES_MAXNDOTS 15 /* should reflect bit field size */
+#define RES_MAXRETRANS 30 /* only for resolv.conf/RES_OPTIONS */
+#define RES_MAXRETRY 5 /* only for resolv.conf/RES_OPTIONS */
+#define RES_DFLRETRY 2 /* Default #/tries. */
+#define RES_MAXTIME 65535 /* Infinity, in milliseconds. */
+
+struct __res_state_ext;
+
+struct __res_state {
+ int retrans; /* retransmission time interval */
+ int retry; /* number of times to retransmit */
+#ifdef __sun
+ uint_t options; /* option flags - see below. */
+#else
+ ulong_t options; /* option flags - see below. */
+#endif
+ int nscount; /* number of name servers */
+ struct sockaddr_in
+ nsaddr_list[MAXNS]; /* address of name server */
+#define nsaddr nsaddr_list[0] /* for backward compatibility */
+ ushort_t id; /* current packet id */
+ char *dnsrch[MAXDNSRCH+1]; /* components of domain to search */
+ char defdname[256]; /* default domain (deprecated) */
+#ifdef __sun
+ uint_t pfcode; /* RES_PRF_ flags - see below. */
+#else
+ ulong_t pfcode; /* RES_PRF_ flags - see below. */
+#endif
+ unsigned ndots:4; /* threshold for initial abs. query */
+ unsigned nsort:4; /* number of elements in sort_list[] */
+ char unused[3];
+ struct {
+ struct in_addr addr;
+ unsigned int mask;
+ } sort_list[MAXRESOLVSORT];
+ res_send_qhook qhook; /* query hook */
+ res_send_rhook rhook; /* response hook */
+ int res_h_errno; /* last one set for this context */
+ int _vcsock; /* PRIVATE: for res_send VC i/o */
+ uint_t _flags; /* PRIVATE: see below */
+ uint_t _pad; /* make _u 64 bit aligned */
+ union {
+ /* On an 32-bit arch this means 512b total. */
+ char pad[72 - 4*sizeof (int) - 2*sizeof (void *)];
+ struct {
+ uint16_t nscount;
+ uint16_t nstimes[MAXNS]; /* ms. */
+ int nssocks[MAXNS];
+ struct __res_state_ext *ext; /* extention for IPv6 */
+ uchar_t _rnd[16]; /* PRIVATE: random state */
+ } _ext;
+ } _u;
+};
+
+typedef struct __res_state *res_state;
+
+union res_sockaddr_union {
+ struct sockaddr_in sin;
+#ifdef IN6ADDR_ANY_INIT
+ struct sockaddr_in6 sin6;
+#endif
+#ifdef ISC_ALIGN64
+ int64_t __align64; /* 64bit alignment */
+#else
+ int32_t __align32; /* 32bit alignment */
+#endif
+ char __space[128]; /* max size */
+};
+
+/*
+ * Resolver flags (used to be discrete per-module statics ints).
+ */
+#define RES_F_VC 0x00000001 /* socket is TCP */
+#define RES_F_CONN 0x00000002 /* socket is connected */
+#define RES_F_EDNS0ERR 0x00000004 /* EDNS0 caused errors */
+#define RES_F__UNUSED 0x00000008 /* (unused) */
+#define RES_F_LASTMASK 0x000000F0 /* ordinal server of last res_nsend */
+#define RES_F_LASTSHIFT 4 /* bit position of LASTMASK "flag" */
+#define RES_GETLAST(res) (((res)._flags & RES_F_LASTMASK) >> RES_F_LASTSHIFT)
+
+/* res_findzonecut2() options */
+#define RES_EXHAUSTIVE 0x00000001 /* always do all queries */
+#define RES_IPV4ONLY 0x00000002 /* IPv4 only */
+#define RES_IPV6ONLY 0x00000004 /* IPv6 only */
+
+/*
+ * Resolver options (keep these in synch with res_debug.c, please)
+ */
+#define RES_INIT 0x00000001 /* address initialized */
+#define RES_DEBUG 0x00000002 /* print debug messages */
+#define RES_AAONLY 0x00000004 /* authoritative answers only (!IMPL) */
+#define RES_USEVC 0x00000008 /* use virtual circuit */
+#define RES_PRIMARY 0x00000010 /* query primary server only (!IMPL) */
+#define RES_IGNTC 0x00000020 /* ignore trucation errors */
+#define RES_RECURSE 0x00000040 /* recursion desired */
+#define RES_DEFNAMES 0x00000080 /* use default domain name */
+#define RES_STAYOPEN 0x00000100 /* Keep TCP socket open */
+#define RES_DNSRCH 0x00000200 /* search up local domain tree */
+#define RES_INSECURE1 0x00000400 /* type 1 security disabled */
+#define RES_INSECURE2 0x00000800 /* type 2 security disabled */
+#define RES_NOALIASES 0x00001000 /* shuts off HOSTALIASES feature */
+#define RES_USE_INET6 0x00002000 /* use/map IPv6 in gethostbyname() */
+#define RES_ROTATE 0x00004000 /* rotate ns list after each query */
+#define RES_NOCHECKNAME 0x00008000 /* do not check names for sanity. */
+#define RES_KEEPTSIG 0x00010000 /* do not strip TSIG records */
+#define RES_BLAST 0x00020000 /* blast all recursive servers */
+#define RES_NO_NIBBLE 0x00040000 /* disable IPv6 nibble mode reverse */
+#define RES_NO_BITSTRING 0x00080000 /* disable IPv6 bitstring mode revrse */
+#define RES_NOTLDQUERY 0x00100000 /* don't unqualified name as a tld */
+#define RES_USE_DNSSEC 0x00200000 /* use DNSSEC using OK bit in OPT */
+/* KAME extensions: use higher bit to avoid conflict with ISC use */
+#define RES_USE_DNAME 0x10000000 /* use DNAME */
+#define RES_USE_EDNS0 0x40000000 /* use EDNS0 if configured */
+#define RES_NO_NIBBLE2 0x80000000 /* disable alternate nibble lookup */
+
+#define RES_DEFAULT (RES_RECURSE | RES_DEFNAMES | RES_DNSRCH)
+
+/*
+ * Resolver "pfcode" values. Used by dig.
+ */
+#define RES_PRF_STATS 0x00000001
+#define RES_PRF_UPDATE 0x00000002
+#define RES_PRF_CLASS 0x00000004
+#define RES_PRF_CMD 0x00000008
+#define RES_PRF_QUES 0x00000010
+#define RES_PRF_ANS 0x00000020
+#define RES_PRF_AUTH 0x00000040
+#define RES_PRF_ADD 0x00000080
+#define RES_PRF_HEAD1 0x00000100
+#define RES_PRF_HEAD2 0x00000200
+#define RES_PRF_TTLID 0x00000400
+#define RES_PRF_HEADX 0x00000800
+#define RES_PRF_QUERY 0x00001000
+#define RES_PRF_REPLY 0x00002000
+#define RES_PRF_INIT 0x00004000
+#define RES_PRF_TRUNC 0x00008000
+/* 0x00010000 */
+
+/* Things involving an internal (static) resolver context. */
+#ifdef _REENTRANT
+extern struct __res_state *__res_state(void);
+#define _res (*__res_state())
+#else
+#ifndef __BIND_NOSTATIC
+extern struct __res_state _res;
+#endif
+#endif
+
+#ifndef __BIND_NOSTATIC
+void fp_nquery __P((const uchar_t *, int, FILE *));
+void fp_query __P((const uchar_t *, FILE *));
+const char *hostalias __P((const char *));
+void p_query __P((const uchar_t *));
+void res_close __P((void));
+int res_init __P((void));
+int res_isourserver __P((const struct sockaddr_in *));
+int res_mkquery __P((int, const char *, int, int, const uchar_t *,
+ int, const uchar_t *, uchar_t *, int));
+int res_query __P((const char *, int, int, uchar_t *, int));
+int res_querydomain __P((const char *, const char *, int, int,
+ uchar_t *, int));
+int res_search __P((const char *, int, int, uchar_t *, int));
+int res_send __P((const uchar_t *, int, uchar_t *, int));
+int res_sendsigned __P((const uchar_t *, int, ns_tsig_key *,
+ uchar_t *, int));
+#endif /* __BIND_NOSTATIC */
+
+extern const struct res_sym __p_key_syms[];
+extern const struct res_sym __p_cert_syms[];
+extern const struct res_sym __p_class_syms[];
+extern const struct res_sym __p_type_syms[];
+extern const struct res_sym __p_rcode_syms[];
+
+int res_hnok __P((const char *));
+int res_ownok __P((const char *));
+int res_mailok __P((const char *));
+int res_dnok __P((const char *));
+int sym_ston __P((const struct res_sym *, const char *, int *));
+const char *sym_ntos __P((const struct res_sym *, int, int *));
+const char *sym_ntop __P((const struct res_sym *, int, int *));
+int b64_ntop __P((uchar_t const *, size_t, char *, size_t));
+int b64_pton __P((char const *, uchar_t *, size_t));
+int loc_aton __P((const char *ascii, uchar_t *binary));
+const char *loc_ntoa __P((const uchar_t *binary, char *ascii));
+int dn_skipname __P((const uchar_t *, const uchar_t *));
+void putlong __P((unsigned int, uchar_t *));
+void putshort __P((unsigned short, uchar_t *));
+const char *p_class __P((int));
+const char *p_time __P((unsigned int));
+const char *p_type __P((int));
+const char *p_rcode __P((int));
+const char *p_sockun __P((union res_sockaddr_union, char *, size_t));
+const uchar_t *p_cdnname __P((const uchar_t *, const uchar_t *, int,
+ FILE *));
+const uchar_t *p_cdname __P((const uchar_t *, const uchar_t *, FILE *));
+const uchar_t *p_fqnname __P((const uchar_t *cp, const uchar_t *msg,
+ int, char *, int));
+const uchar_t *p_fqname __P((const uchar_t *, const uchar_t *, FILE *));
+const char *p_option __P((uint_t option));
+char *p_secstodate __P((uint_t));
+int dn_count_labels __P((const char *));
+int dn_comp __P((const char *, uchar_t *, int,
+ uchar_t **, uchar_t **));
+int dn_expand __P((const uchar_t *, const uchar_t *,
+ const uchar_t *, char *, int));
+void res_rndinit __P((res_state));
+uint_t res_randomid __P((void));
+uint_t res_nrandomid __P((res_state));
+int res_nameinquery __P((const char *, int, int,
+ const uchar_t *, const uchar_t *));
+int res_queriesmatch __P((const uchar_t *, const uchar_t *,
+ const uchar_t *, const uchar_t *));
+const char *p_section __P((int section, int opcode));
+
+
+/* Things involving a resolver context. */
+int res_ninit __P((res_state));
+int res_nisourserver __P((const res_state,
+ const struct sockaddr_in *));
+void fp_resstat __P((const res_state, FILE *));
+void res_pquery __P((const res_state, const uchar_t *, int, FILE *));
+const char *res_hostalias __P((const res_state, const char *,
+ char *, size_t));
+int res_nquery __P((res_state,
+ const char *, int, int, uchar_t *, int));
+int res_nsearch __P((res_state, const char *, int,
+ int, uchar_t *, int));
+int res_nquerydomain __P((res_state,
+ const char *, const char *, int, int,
+ uchar_t *, int));
+int res_nmkquery __P((res_state,
+ int, const char *, int, int, const uchar_t *,
+ int, const uchar_t *, uchar_t *, int));
+int res_nsend __P((res_state, const uchar_t *, int, uchar_t *,
+ int));
+int res_nsendsigned __P((res_state, const uchar_t *, int,
+ ns_tsig_key *, uchar_t *, int));
+int res_findzonecut __P((res_state, const char *, ns_class, int,
+ char *, size_t, struct in_addr *, int));
+int res_findzonecut2 __P((res_state, const char *, ns_class, int,
+ char *, size_t, union res_sockaddr_union *,
+ int));
+void res_nclose __P((res_state));
+int res_nopt __P((res_state, int, uchar_t *, int, int));
+int res_nopt_rdata __P((res_state, int, uchar_t *, int, uchar_t *,
+ ushort_t, ushort_t, uchar_t *));
+void res_send_setqhook __P((res_send_qhook hook));
+void res_send_setrhook __P((res_send_rhook hook));
+int __res_vinit __P((res_state, int));
+void res_destroyservicelist __P((void));
+const char *res_servicename __P((uint16_t port, const char *proto));
+const char *res_protocolname __P((int num));
+void res_destroyprotolist __P((void));
+void res_buildprotolist __P((void));
+const char *res_get_nibblesuffix __P((res_state));
+const char *res_get_nibblesuffix2 __P((res_state));
+void res_ndestroy __P((res_state));
+uint16_t res_nametoclass __P((const char *buf, int *success));
+uint16_t res_nametotype __P((const char *buf, int *success));
+void res_setservers __P((res_state,
+ const union res_sockaddr_union *, int));
+int res_getservers __P((res_state,
+ union res_sockaddr_union *, int));
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !_RESOLV_JOY_H */
diff --git a/usr/src/head/zdoor.h b/usr/src/head/zdoor.h
new file mode 100644
index 0000000000..f2d204042d
--- /dev/null
+++ b/usr/src/head/zdoor.h
@@ -0,0 +1,74 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2011 Joyent, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _ZDOOR_H
+#define _ZDOOR_H
+
+#include <sys/types.h>
+#include <zone.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct zdoor_handle *zdoor_handle_t;
+
+typedef struct zdoor_cookie {
+ char *zdc_zonename;
+ char *zdc_service;
+ void *zdc_biscuit;
+} zdoor_cookie_t;
+
+typedef struct zdoor_result {
+ char *zdr_data;
+ size_t zdr_size;
+} zdoor_result_t;
+
+typedef zdoor_result_t *(*zdoor_callback) (zdoor_cookie_t *cookie,
+ char *argp, size_t arpg_sz);
+
+#define ZDOOR_OK 0
+#define ZDOOR_ERROR -1
+#define ZDOOR_NOT_GLOBAL_ZONE -2
+#define ZDOOR_ZONE_NOT_RUNNING -3
+#define ZDOOR_ZONE_FORBIDDEN -4
+#define ZDOOR_ARGS_ERROR -5
+#define ZDOOR_OUT_OF_MEMORY -6
+
+extern zdoor_handle_t zdoor_handle_init();
+
+extern int zdoor_open(zdoor_handle_t handle, const char *zonename,
+ const char *service, void *biscuit, zdoor_callback callback);
+
+extern void * zdoor_close(zdoor_handle_t handle, const char *zonename,
+ const char *service);
+
+extern void zdoor_handle_destroy(zdoor_handle_t handle);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZDOOR_H */
diff --git a/usr/src/head/zone.h b/usr/src/head/zone.h
index 34528a27f5..f9ea8d4f82 100644
--- a/usr/src/head/zone.h
+++ b/usr/src/head/zone.h
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015 Joyent Inc.
*/
#ifndef _ZONE_H
@@ -46,17 +47,20 @@ extern ssize_t getzonenamebyid(zoneid_t, char *, size_t);
* NOTE
*
* The remaining contents of this file are private to the implementation
- * of Solaris and are subject to change at any time without notice,
+ * of Illumos and are subject to change at any time without notice,
* Applications using these interfaces may fail to run on future releases.
*/
+#define ZLOGIN_DISCONNECT 0x1 /* disconnect on zone halt */
+#define ZLOGIN_ZFD_EOF 0x2 /* EOF on ZFD */
+
extern int zonept(int, zoneid_t);
extern int zone_get_id(const char *, zoneid_t *);
/* System call API */
extern zoneid_t zone_create(const char *, const char *,
const struct priv_set *, const char *, size_t, const char *, size_t, int *,
- int, int, const bslabel_t *, int);
+ int, int, const bslabel_t *, int, zoneid_t);
extern int zone_boot(zoneid_t);
extern int zone_destroy(zoneid_t);
extern ssize_t zone_getattr(zoneid_t, int, void *, size_t);
@@ -69,6 +73,7 @@ extern int zone_add_datalink(zoneid_t, datalink_id_t);
extern int zone_remove_datalink(zoneid_t, datalink_id_t);
extern int zone_check_datalink(zoneid_t *, datalink_id_t);
extern int zone_list_datalink(zoneid_t, int *, datalink_id_t *);
+extern const char *zone_get_nroot(void);
#ifdef __cplusplus
}
diff --git a/usr/src/lib/Makefile b/usr/src/lib/Makefile
index 5af6a5dbf3..9c2ae475c9 100644
--- a/usr/src/lib/Makefile
+++ b/usr/src/lib/Makefile
@@ -22,7 +22,7 @@
#
# Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2012 by Delphix. All rights reserved.
-# Copyright (c) 2012, Joyent, Inc. All rights reserved.
+# Copyright 2015, Joyent, Inc.
# Copyright (c) 2013 Gary Mills
# Copyright 2014 Garrett D'Amore <garrett@damore.org>
# Copyright (c) 2015 Gary Mills
@@ -91,6 +91,7 @@ SUBDIRS += \
libbrand \
libbsdmalloc \
libbsm \
+ libbunyan \
libc_db \
libcfgadm \
libcmd \
@@ -117,6 +118,7 @@ SUBDIRS += \
libdscfg \
libdtrace \
libdtrace_jni \
+ libdwarf \
libefi \
libelfsign \
libeti \
@@ -133,6 +135,7 @@ SUBDIRS += \
libgss \
libhotplug \
libidmap \
+ libidspace \
libilb \
libima \
libinetsvc \
@@ -180,9 +183,11 @@ SUBDIRS += \
libraidcfg \
librcm \
librdc \
+ librename \
libreparse \
libresolv \
libresolv2 \
+ libresolv2_joy \
librestart \
librpcsvc \
librsm \
@@ -200,6 +205,7 @@ SUBDIRS += \
libsip \
libsldap \
libslp \
+ libsmartsshd \
libsmbfs \
libsmbios \
libsmedia \
@@ -223,6 +229,7 @@ SUBDIRS += \
libunistat \
libuuid \
libuutil \
+ libvnd \
libvolmgt \
libvrrpadm \
libvscan \
@@ -233,6 +240,7 @@ SUBDIRS += \
libxcurses \
libxcurses2 \
libxnet \
+ libzdoor \
libzfs \
libzfs_core \
libzfs_jni \
@@ -240,7 +248,6 @@ SUBDIRS += \
libzoneinfo \
libzonestat \
libzpool \
- lvm \
madv \
mpapi \
mpss \
@@ -252,9 +259,6 @@ SUBDIRS += \
pkcs11 \
policykit \
print \
- pylibbe \
- pysolaris \
- pyzfs \
raidcfg_plugins \
rpcsec_gss \
sasl_plugins \
@@ -264,6 +268,7 @@ SUBDIRS += \
sun_fc \
sun_sas \
udapl \
+ varpd \
watchmalloc \
$($(MACH)_SUBDIRS)
@@ -290,8 +295,7 @@ sparc_SUBDIRS= \
#
NOWAIT_SUBDIRS= $(SUBDIRS:.WAIT=)
-DCSUBDIRS = \
- lvm
+DCSUBDIRS =
MSGSUBDIRS= \
abi \
@@ -337,6 +341,7 @@ MSGSUBDIRS= \
libshell \
libsldap \
libslp \
+ libsmartsshd \
libsmbfs \
libsmedia \
libsum \
@@ -348,11 +353,10 @@ MSGSUBDIRS= \
libwanbootutil \
libzfs \
libzonecfg \
- lvm \
+ libzdoor \
madv \
mpss \
pam_modules \
- pyzfs \
rpcsec_gss \
$($(MACH)_MSGSUBDIRS)
@@ -372,6 +376,7 @@ HDRSUBDIRS= \
libast \
libbrand \
libbsm \
+ libbunyan \
libc \
libcmd \
libcmdutils \
@@ -388,6 +393,7 @@ HDRSUBDIRS= \
libdhcputil \
libdisasm \
libdiskmgt \
+ libdwarf \
libdladm \
libdll \
libdlpi \
@@ -404,6 +410,7 @@ HDRSUBDIRS= \
libgen \
libgrubmgmt \
libidmap \
+ libidspace \
libilb \
libima \
libinetsvc \
@@ -439,6 +446,7 @@ HDRSUBDIRS= \
libraidcfg \
librcm \
librdc \
+ librename \
libreparse \
librestart \
librpcsvc \
@@ -472,6 +480,7 @@ HDRSUBDIRS= \
libumem \
libunistat \
libuutil \
+ libvnd \
libvolmgt \
libvrrpadm \
libvscan \
@@ -484,7 +493,6 @@ HDRSUBDIRS= \
libzfs_jni \
libzoneinfo \
libzonestat \
- lvm \
mpapi \
passwdutil \
pkcs11 \
@@ -567,7 +575,7 @@ basedeps := TARGET=install
# libc libm libmd libmp libnsl libnvpair libsocket
abi: libctf libmapmalloc libproc
auditd_plugins: libbsm libsecdb libgss libmtmalloc
-brand: libzonecfg libmapmalloc
+brand: libzonecfg libmapmalloc libipadm libcmdutils libproc librpcsvc
cfgadm_plugins: libdevice libdevinfo libhotplug librcm hbaapi libkstat libscf
fm: libexacct libipmi libzfs scsi libdevinfo libdevid libcfgadm \
libcontract libsysevent ../cmd/sgs/libelf
@@ -581,11 +589,13 @@ libadt_jni: libbsm
libadutils: libldap5 libresolv2
libbe: libzfs libinstzones libuuid libgen libdevinfo libefi
libbsm: libinetutil libscf libsecdb libtsol
+libbunyan: libnvpair
libcfgadm: libdevinfo
libcmd: libsum libast
libcmdutils: libavl
libcpc: libpctx
libcrypt: libgen
+libctf: libdwarf
libdevid: libdevinfo
libdevinfo: libsec libgen
libdhcpagent: libdhcputil libuuid libdlpi libcontract
@@ -593,7 +603,7 @@ libdhcputil: libgen libinetutil libdlpi
libdiskmgt: libdevid libdevinfo libadm libefi libkstat libsysevent
$(INTEL_BLD)libdiskmgt: libfdisk
libdladm: libdevinfo libinetutil libscf librcm libexacct libkstat \
- libpool
+ libpool varpd
libdll: libast
libdlpi: libinetutil libdladm
libds: libsysevent
@@ -611,6 +621,7 @@ libfsmgt: libkstat
libgrubmgmt: libdevinfo libzfs libfstyp libefi
$(INTEL_BLD)libgrubmgmt: libfdisk
libidmap: libavl libuutil
+libidspace: libumem
libinetsvc: libscf
libinstzones: libzonecfg libcontract
libipadm: libinetutil libdlpi libdhcpagent libdladm libsecdb
@@ -645,6 +656,7 @@ libshare: libscf libzfs libuuid libfsmgt libsecdb libumem libsmbfs
libshell: libast libcmd libdll libsecdb
libsip: libmd5
libsldap: libldap5 libscf
+libsmartsshd: libc libcontract
libsmbfs: libkrb5 libsec libidmap pkcs11
libsrpt: libstmf
libstmf: libscf
@@ -664,6 +676,7 @@ libvolmgt: libadm
libvrrpadm: libdladm libscf
libvscan: libscf libsecdb
libwanboot: libresolv2 libdevinfo libinetutil libdhcputil
+libzdoor: libc libzonecfg libcontract
libzfs: libdevid libgen libuutil libadm libavl libefi libidmap \
libumem libtsol libzfs_core
libzfs_jni: libdiskmgt libzfs
@@ -675,7 +688,8 @@ lvm: libadm libdevid libdevinfo libefi libgen libscf
madv: libgen
mpapi: libpthread libdevinfo libsysevent
mpss: libgen
-nsswitch: libadutils libidmap libdns_sd libscf libldap5 libsldap
+nsswitch: libadutils libidmap libdns_sd libscf libldap5 libsldap \
+ libresolv2_joy
pam_modules: libproject passwdutil smbsrv libtsnet libpam libbsm libsecdb
passwdutil: libsldap
pkcs11: libcryptoutil libgen libuuid
@@ -695,6 +709,8 @@ storage: libdevice libdevinfo libdevid
sun_fc: libdevinfo libsysevent
sun_sas: libdevinfo libsysevent libkstat libdevid
udapl: libdevinfo libdladm
+varpd: libavl libidspace libumem libnsl libnvpair libmd5 librename \
+ libbunyan libcmdutils
#
# The reason this rule checks for the existence of the
diff --git a/usr/src/lib/Makefile.astmsg b/usr/src/lib/Makefile.astmsg
index 096805e825..ff8202d03a 100644
--- a/usr/src/lib/Makefile.astmsg
+++ b/usr/src/lib/Makefile.astmsg
@@ -47,8 +47,8 @@ MSGLIBNAME= $(LIBRARY:.a=)
ASTMSGCATALOG= $(ROOT)/usr/lib/locale/C/LC_MESSAGES/$(MSGLIBNAME)
$(DO_BUILD_AST_CATALOGS)ASTMSGCC= \
- PATH="/usr/ast/bin/:/bin:/usr/bin" \
- /usr/bin/ksh93 /usr/ast/bin/msgcc >>msgcc.out 2>&1
+ PATH="$(ASTBINDIR):/bin:/usr/bin" \
+ /usr/bin/ksh93 $(MSGCC) >>msgcc.out 2>&1
ASTMSGS= $(OBJECTS:%.o=msgs/%.mso)
diff --git a/usr/src/lib/Makefile.lib b/usr/src/lib/Makefile.lib
index d891e418a7..79db7f9bec 100644
--- a/usr/src/lib/Makefile.lib
+++ b/usr/src/lib/Makefile.lib
@@ -21,6 +21,7 @@
# Copyright 2015 Gary Mills
# Copyright 2015 Igor Kozhukhov <ikozhukhov@gmail.com>
# Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2015, Joyent, Inc.
#
#
# Definitions common to libraries.
@@ -135,6 +136,23 @@ SONAME= $(DYNLIB)
# combining relocations into one relocation table reduces startup costs.
# All options are tunable to allow overload/omission from lower makefiles.
+#
+# DTrace related rules
+#
+# These allow for multiple USDT providers to be specified by a library.
+# If a library needs to break up the set of objects that are passed to
+# the dtrace -G invocation, then they can place the following in heir
+# Makefile.com:
+#
+# pics/<provider>.o := USDT_G_PICS = <files>
+#
+# <provider> should be replaced with the basename of one of the USDT
+# providers specified in USDT_PROVIDERS in their Makefile.com.
+#
+USDT_HEADERS= $(USDT_PROVIDERS:%.d=%_impl.h)
+USDT_PICS= $(USDT_PROVIDERS:%.d=pics/%.o)
+USDT_G_PICS= $(PICS)
+
HSONAME= -h$(SONAME)
DYNFLAGS= $(HSONAME) $(ZTEXT) $(ZDEFS) $(BDIRECT) \
@@ -157,7 +175,7 @@ SRCS= $(OBJECTS:%.o=$(SRCDIR)/%.c)
# overridden locally when extra processing is needed
BUILD.AR= $(AR) $(ARFLAGS) $@ $(AROBJS)
BUILD.SO= $(CC) $(CFLAGS) -o $@ $(GSHARED) $(DYNFLAGS) \
- $(PICS) $(EXTPICS) $(LDLIBS)
+ $(PICS) $(EXTPICS) $(USDT_PICS) $(LDLIBS)
BUILDCCC.SO= $(CCC) $(CCFLAGS) -o $@ $(GSHARED) $(DYNFLAGS) \
$(PICS) $(EXTPICS) $(LDLIBS)
@@ -254,3 +272,15 @@ TARGETMACH= $(MACH)
# shouldn't override this - they should override $(CLOBBERFILES) instead.
#
CLOBBERTARGFILES= $(LIBS) $(DYNLIB) $(CLOBBERFILES)
+
+#
+# Define the default ctfdiff invocation used to check a list of types
+# supplied by a user of a library. The goal is to validate that a given
+# series of types is the same in both a 32-bit and 64-bit artifact. This
+# is only defined if we have a 64-bit build to do.
+#
+TYPECHECK_LIB32 = $(TYPECHECK_LIB:%=$(MACH)/%)
+TYPECHECK_LIB64 = $(TYPECHECK_LIB:%=$(MACH64)/%)
+TYPECHECK_LIST= $(TYPELIST:%=-T %)
+$(BUILD64)TYPECHECK.lib = $(CTFDIFF) -t -I $(TYPECHECK_LIST) $(TYPECHECK_LIB32) $(TYPECHECK_LIB64)
+TYPECHECK = $(TYPECHECK_LIB:%=%.typecheck)
diff --git a/usr/src/lib/Makefile.targ b/usr/src/lib/Makefile.targ
index 4769c64d54..2798192343 100644
--- a/usr/src/lib/Makefile.targ
+++ b/usr/src/lib/Makefile.targ
@@ -20,6 +20,7 @@
#
#
# Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2014, Joyent, Inc.
#
#
@@ -90,7 +91,7 @@ $(LIBRARY): objs .WAIT $$(OBJS)
$(DYNLIB): $$(MAPFILES)
-$(DYNLIB): pics .WAIT $$(PICS) $$(ALTPICS) $$(EXTPICS)
+$(DYNLIB): pics $(USDT_HEADERS) .WAIT $$(PICS) $$(ALTPICS) $$(EXTPICS) .WAIT $(USDT_PICS)
$(BUILD.SO)
$(POST_PROCESS_SO)
@@ -104,9 +105,12 @@ $(LINTLIB): $$(SRCS)
lintcheck: $$(SRCS)
$(LINT.c) $(LINTCHECKFLAGS) $(SRCS) $(LDLIBS)
+$(TYPECHECK): $(TYPECHECK_LIB32) $(TYPECHECK_LIB64)
+ $(TYPECHECK.lib)
+
clobber: clean
-$(RM) $(CLOBBERTARGFILES)
clean:
- -$(RM) $(OBJS)
+ -$(RM) $(OBJS) $(USDT_HEADERS) $(USDT_PICS)
-$(RM) $(PICS) $(DUPLICATE_SRC) $(LINTOUT) $(LINTLIB) $(CLEANFILES)
diff --git a/usr/src/lib/Makefile.usdt b/usr/src/lib/Makefile.usdt
new file mode 100644
index 0000000000..17140161ad
--- /dev/null
+++ b/usr/src/lib/Makefile.usdt
@@ -0,0 +1,28 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# This makefile contains the necessary targets for USDT providers; it should
+# be included after Makefile.targ. (It is in a separate file rather than in
+# Makefile.targ because the dependency on $(USDT_G_PICS) is incompatible with
+# libraries that dynamically define $(OBJECTS).)
+#
+pics/%.o: $(SRCDIR)/%.d $(USDT_G_PICS)
+ $(COMPILE.d) -o $@ -s $< $(USDT_G_PICS)
+ $(POST_PROCESS_O)
+
+%_impl.h: $(SRCDIR)/%.d
+ $(DTRACE) -h -o $@ -s $<
+
diff --git a/usr/src/lib/brand/Makefile b/usr/src/lib/brand/Makefile
index 05bfc54764..cd19a0bf2c 100644
--- a/usr/src/lib/brand/Makefile
+++ b/usr/src/lib/brand/Makefile
@@ -30,6 +30,9 @@ include ../../Makefile.master
# Build everything in parallel; use .WAIT for dependencies
.PARALLEL:
+i386_SUBDIRS= lx
+i386_MSGSUBDIRS= lx
+
SUBDIRS= shared .WAIT sn1 solaris10 ipkg labeled $($(MACH)_SUBDIRS)
MSGSUBDIRS= solaris10 shared $($(MACH)_MSGSUBDIRS)
diff --git a/usr/src/cmd/ssh/libssh/Makefile b/usr/src/lib/brand/lx/Makefile
index dfc3be5088..7e01b43dba 100644
--- a/usr/src/cmd/ssh/libssh/Makefile
+++ b/usr/src/lib/brand/lx/Makefile
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -19,37 +18,38 @@
#
# CDDL HEADER END
#
-# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
+
#
-# ident "%Z%%M% %I% %E% SMI"
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+# Copyright 2016 Joyent, Inc.
#
-# cmd/ssh/libssh/Makefile
-include $(SRC)/lib/Makefile.lib
-include ../Makefile.ssh-common
+default: all
+
+include Makefile.lx
-SUBDIRS= $(MACH)
-# Need to override the TEXT_DOMAIN here because we included a Makefile from
-# usr/src/lib but this is a private library which is part of SUNW_OST_OSCMD
-TEXT_DOMAIN= SUNW_OST_OSCMD
+# Build everything in parallel; use .WAIT for dependencies
+.PARALLEL:
-POFILE=_messages.po
+SUBDIRS= librtld_db lx_support lx_init lx_brand netfiles \
+ zone lx_vdso testing .WAIT
+MSGSUBDIRS= lx_brand lx_support zone
all := TARGET= all
+install := TARGET= install
clean := TARGET= clean
clobber := TARGET= clobber
-install := TARGET= install
lint := TARGET= lint
-$(POFILE) := TARGET= $(POFILE)
+_msg := TARGET= _msg
.KEEP_STATE:
-all clean clobber install lint $(POFILE): $(SUBDIRS)
+all install clean clobber lint: $(SUBDIRS)
-$(SUBDIRS): FRC
+_msg: $(MSGSUBDIRS)
+
+$(SUBDIRS): FRC
@cd $@; pwd; $(MAKE) $(TARGET)
FRC:
-
-include $(SRC)/lib/Makefile.targ
diff --git a/usr/src/cmd/ssh/etc/ssh-askpass b/usr/src/lib/brand/lx/Makefile.lx
index f2735a9103..4db4679cef 100644
--- a/usr/src/cmd/ssh/etc/ssh-askpass
+++ b/usr/src/lib/brand/lx/Makefile.lx
@@ -1,4 +1,3 @@
-#! /usr/bin/ksh
#
# CDDL HEADER START
#
@@ -19,13 +18,17 @@
#
# CDDL HEADER END
#
-
#
-# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
+# ident "%Z%%M% %I% %E% SMI"
+#
+# lib/brand/lx/Makefile.lx
+#
+# include global definitions
+
+BRAND= lx
+
+include $(SRC)/lib/brand/Makefile.brand
-prompt=$(echo $1 | sed s/_/__/g)
-ICON=/usr/share/pixmaps/blueprint-keyring.png
-exec /usr/bin/zenity --entry --title "ssh-askpass" \
- --text="$prompt" --hide-text --window-icon=$ICON
diff --git a/usr/src/cmd/ssh/ssh-add/Makefile b/usr/src/lib/brand/lx/librtld_db/Makefile
index 0d3d48eaf5..2fc0a818f6 100644
--- a/usr/src/cmd/ssh/ssh-add/Makefile
+++ b/usr/src/lib/brand/lx/librtld_db/Makefile
@@ -18,48 +18,37 @@
#
# CDDL HEADER END
#
+
+#
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# cmd/ssh/ssh-add/Makefile
-
-PROG= ssh-add
+# ident "%Z%%M% %I% %E% SMI"
+#
-OBJS = \
- ssh-add.o
-SRCS = $(OBJS:.o=.c)
+default: all
-include ../../Makefile.cmd
-include ../Makefile.ssh-common
+include $(SRC)/lib/Makefile.lib
-DIRS= $(ROOTLIBSUNSSH)
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
-LDLIBS += $(SSH_COMMON_LDLIBS) -lsocket -lcrypto
+LINT_SUBDIRS= $(MACH)
+$(BUILD64)LINT_SUBDIRS += $(MACH64)
-POFILE_DIR= ..
+all := TARGET= all
+clean := TARGET= clean
+clobber := TARGET= clobber
+install := TARGET= install
+lint := TARGET= lint
.KEEP_STATE:
-.PARALLEL: $(OBJS)
-
-all: $(PROG)
-
-$(PROG): $(OBJS) ../libssh/$(MACH)/libssh.a ../libopenbsd-compat/$(MACH)/libopenbsd-compat.a
- $(LINK.c) $(OBJS) -o $@ $(LDLIBS) $(DYNFLAGS)
- $(POST_PROCESS)
-
-$(DIRS):
- $(INS.dir)
-
-$(ROOTBIN)/ssh-add: $(ROOTLIBSUNSSH)/ssh-add
- -$(RM) $@; $(SYMLINK) ../lib/sunssh/ssh-add $@
-
-install: all $(DIRS) $(ROOTPROG)
+all install clean clobber: $(SUBDIRS)
-clean:
- $(RM) -f $(OBJS) $(PROG)
+lint: $(LINT_SUBDIRS)
-lint: lint_SRCS
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
-include ../Makefile.msg.targ
-include ../../Makefile.targ
+FRC:
diff --git a/usr/src/lib/brand/lx/librtld_db/Makefile.com b/usr/src/lib/brand/lx/librtld_db/Makefile.com
new file mode 100644
index 0000000000..202cc0fe7b
--- /dev/null
+++ b/usr/src/lib/brand/lx/librtld_db/Makefile.com
@@ -0,0 +1,83 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+LIBRARY = lx_librtld_db.a
+VERS = .1
+COBJS = lx_librtld_db.o
+OBJECTS = $(COBJS) $(COBJS64)
+
+include $(SRC)/lib/Makefile.lib
+include ../../Makefile.lx
+
+CSRCS = $(COBJS:%o=../common/%c)
+SRCS = $(CSRCS)
+
+SRCDIR = ../common
+UTSBASE = $(SRC)/uts
+
+#
+# ATTENTION:
+# Librtl_db brand plugin libraries should NOT directly invoke any
+# libproc.so interfaces or be linked against libproc. If a librtl_db
+# brand plugin library uses libproc.so interfaces then it may break
+# any other librtld_db consumers (like mdb) that tries to attach
+# to a branded process. The only safe interfaces that the a librtld_db
+# brand plugin library can use to access a target process are the
+# proc_service(3PROC) apis.
+#
+DYNFLAGS += $(VERSREF) -M../common/mapfile-vers
+LIBS = $(DYNLIB)
+LDLIBS += -lc -lrtld_db
+CFLAGS += $(CCVERBOSE)
+CPPFLAGS += -D_REENTRANT -I../ -I$(UTSBASE)/common/brand/lx \
+ -I$(SRC)/cmd/sgs/librtld_db/common \
+ -I$(SRC)/cmd/sgs/include \
+ -I$(SRC)/cmd/sgs/include/$(MACH)
+
+ROOTLIBDIR = $(ROOT)/usr/lib/brand/lx
+ROOTLIBDIR64 = $(ROOT)/usr/lib/brand/lx/$(MACH64)
+
+#
+# The top level Makefiles define define TEXT_DOMAIN. But librtld_db.so.1
+# isn't internationalized and this library won't be either. The only
+# messages that this library can generate are messages used for debugging
+# the operation of the library itself.
+#
+DTEXTDOM =
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+pics/%64.o: ../common/%.c
+ $(COMPILE.c) -D_ELF64 $(PICFLAGS) -o $@ $<
+ $(POST_PROCESS_O)
+
+include $(SRC)/lib/Makefile.targ
diff --git a/usr/src/lib/brand/lx/librtld_db/amd64/Makefile b/usr/src/lib/brand/lx/librtld_db/amd64/Makefile
new file mode 100644
index 0000000000..726e7ef6d3
--- /dev/null
+++ b/usr/src/lib/brand/lx/librtld_db/amd64/Makefile
@@ -0,0 +1,38 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+COBJS64 = lx_librtld_db64.o
+
+include ../Makefile.com
+include $(SRC)/lib/Makefile.lib.64
+
+DYNFLAGS += -Mmapfile-vers
+
+CLOBBERFILES = $(ROOTLIBDIR64)/$(DYNLIB)
+
+install: all $(ROOTLIBS64)
diff --git a/usr/src/cmd/ssh/Makefile.msg.targ b/usr/src/lib/brand/lx/librtld_db/amd64/mapfile-vers
index df56774c25..4893b02998 100644
--- a/usr/src/cmd/ssh/Makefile.msg.targ
+++ b/usr/src/lib/brand/lx/librtld_db/amd64/mapfile-vers
@@ -18,22 +18,27 @@
#
# CDDL HEADER END
#
+
+#
# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-POFILE = _messages.po
-XGETFLAGS = -o $(POFILE_DIR)/$(POFILE) --foreign-user --strict -j -n -E \
- --width=72 \
- --omit-header \
- --keyword=fatal \
- --keyword=error \
- --keyword=verbose \
- --keyword=packet_send_debug \
- --keyword=packet_disconnect
-$(POFILE):
- $(RM) $@
- $(COMPILE.cpp) $(SRCS) > $(POFILE).i.c
- $(XGETTEXT) $(XGETFLAGS) $(POFILE).i.c
- $(RM) $(POFILE).i.c
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+SUNWprivate_1.1 {
+ global:
+ rtld_db_brand_ops64;
+};
diff --git a/usr/src/lib/brand/lx/librtld_db/common/lx_librtld_db.c b/usr/src/lib/brand/lx/librtld_db/common/lx_librtld_db.c
new file mode 100644
index 0000000000..ddea4f910d
--- /dev/null
+++ b/usr/src/lib/brand/lx/librtld_db/common/lx_librtld_db.c
@@ -0,0 +1,852 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc. All rights reserved.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <sys/types.h>
+#include <sys/link.h>
+#include <libproc.h>
+#include <proc_service.h>
+#include <rtld_db.h>
+#include <synch.h>
+
+#include <sys/lx_brand.h>
+
+/*
+ * Overview of this library derived from the original "BrandZ" PSARC design
+ * document.
+ *
+ * Since Linux binaries are standard ELF objects, Illumos debug tools (i.e. mdb
+ * or ptools) are able to process them in essentially the same way that Illumos
+ * binaries are processed. The main objective is to retrieve symbols and
+ * thereby aid debugging and observability. Unfortunately, most Linux
+ * distributions strip(1) their binaries as a misguided "optimization" so the
+ * majority of the useful debugging information is lost.
+ *
+ * The debug tools use interfaces provided by librtld_db to debug live
+ * processes and core files. librtld_db discovers ELF objects which have been
+ * mapped into the target's address space and reports these back to the tool.
+ * The librtld_db library understands enough of the internals of the Illumos
+ * runtime linker to iterate over the linker's private link maps and process
+ * the objects it finds. librtld_db allows our tools to debug the Illumos
+ * portions of a branded process (e.g. the brand library, libc, etc.) but they
+ * can't understand any Linux objects that are mapped into the address space
+ * because the Illumos linker only has Illumos objects on its link maps.
+ *
+ * In order to give the tools visibility into Linux binaries, a brand helper
+ * framework is implemented in librtld_db. When librtld_db is asked to examine
+ * a branded target process or core file, it uses the AT_SUN_BRANDNAME aux
+ * vector to get our brand name (lx). It then dlopen-s this lx_librtld_db.so
+ * helper library.
+ *
+ * Once loaded, this helper library is responsible for finding any lx-specific
+ * information it needs, such as the Linux equivalent LDDATA aux entry and
+ * preparing to return details about the objects loaded into the address space
+ * by the Linux linker.
+ *
+ * When a debug tool asks to know what objects are loaded in the target,
+ * librtld_db walks the Illumos link maps and iterates over each object it
+ * finds there, handing information about each to the tool. It then calls down
+ * into this helper library, which does the same for the brand-specific objects
+ * used by the target.
+ *
+ * This debug-helper code contains a bunch of helpful ps_plog calls. To enable
+ * this output with the ptools (e.g. pmap) set LIBPROC_DEBUG=1 in your
+ * environment. To enable it with mdb set MDB_DEBUG=psvc in your environment.
+ */
+
+/*
+ * ATTENTION:
+ * Librtl_db brand plugin libraries should NOT directly invoke any
+ * libproc.so interfaces or be linked against libproc. If a librtl_db
+ * brand plugin library uses libproc.so interfaces then it may break
+ * any other librtld_db consumers (like mdb) that tries to attach
+ * to a branded process. The only safe interfaces that the a librtld_db
+ * brand plugin library can use to access a target process are the
+ * proc_service(3PROC) apis.
+ */
+
+/*
+ * M_DATA comes from some streams header file but is also redifined in
+ * _rtld_db.h, so nuke the old streams definition here.
+ */
+#ifdef M_DATA
+#undef M_DATA
+#endif /* M_DATA */
+
+/*
+ * For 32-bit versions of this library, this file gets compiled once.
+ * For 64-bit versions of this library, this file gets compiled twice,
+ * once with _ELF64 defined and once without. The expectation is that
+ * the 64-bit version of the library can properly deal with both 32-bit
+ * and 64-bit elf files, hence in the 64-bit library there are two copies
+ * of all the interfaces in this file, one set named *32 and one named *64.
+ *
+ * This also means that we need to be careful when declaring local pointers
+ * that point to objects in another processes address space, since these
+ * pointers may not match the current processes pointer width. Basically,
+ * we should avoid using data types that change size between 32 and 64 bit
+ * modes like: long, void *, uintptr_t, caddr_t, psaddr_t, size_t, etc.
+ * Instead we should declare all pointers as uint32_t. Then when we
+ * are compiled to deal with 64-bit targets we'll re-define uint32_t
+ * to be a uint64_t.
+ */
+#ifdef _LP64
+#ifdef _ELF64
+#define lx_ldb_get_dyns32 lx_ldb_get_dyns64
+#define lx_ldb_init32 lx_ldb_init64
+#define lx_ldb_fini32 lx_ldb_fini64
+#define lx_ldb_loadobj_iter32 lx_ldb_loadobj_iter64
+#define lx_ldb_getauxval32 lx_ldb_getauxval64
+#define lx_elf_props32 lx_elf_props64
+#define _rd_get_dyns32 _rd_get_dyns64
+#define _rd_get_ehdr32 _rd_get_ehdr64
+#define uint32_t uint64_t
+#define Elf32_Dyn Elf64_Dyn
+#define Elf32_Ehdr Elf64_Ehdr
+#define Elf32_Phdr Elf64_Phdr
+#define Elf32_Sym Elf64_Sym
+#endif /* _ELF64 */
+#endif /* _LP64 */
+
+/* Included from usr/src/cmd/sgs/librtld_db/common */
+#include <_rtld_db.h>
+
+typedef struct lx_rd {
+ rd_agent_t *lr_rap;
+ struct ps_prochandle *lr_php; /* proc handle pointer */
+ uint32_t lr_rdebug; /* address of lx r_debug */
+ uint32_t lr_exec; /* base address of executable */
+} lx_rd_t;
+
+typedef struct lx_link_map {
+ uint32_t lxm_addr; /* Base address shared object is loaded at. */
+ uint32_t lxm_name; /* Absolute file name object was found in. */
+ uint32_t lxm_ld; /* Dynamic section of the shared object. */
+ uint32_t lxm_next; /* Chain of loaded objects. */
+} lx_link_map_t;
+
+typedef struct lx_r_debug {
+ int r_version; /* Version number for this protocol. */
+ uint32_t r_map; /* Head of the chain of loaded objects. */
+
+ /*
+ * This is the address of a function internal to the run-time linker,
+ * that will always be called when the linker begins to map in a
+ * library or unmap it, and again when the mapping change is complete.
+ * The debugger can set a breakpoint at this address if it wants to
+ * notice shared object mapping changes.
+ */
+ uint32_t r_brk;
+ r_state_e r_state; /* defined the same way between lx/solaris */
+ uint32_t r_ldbase; /* Base address the linker is loaded at. */
+} lx_r_debug_t;
+
+static uint32_t
+lx_ldb_getauxval32(struct ps_prochandle *php, int type)
+{
+ const auxv_t *auxvp = NULL;
+
+ if (ps_pauxv(php, &auxvp) != PS_OK)
+ return ((uint32_t)-1);
+
+ while (auxvp->a_type != AT_NULL) {
+ if (auxvp->a_type == type)
+ return ((uint32_t)(uintptr_t)auxvp->a_un.a_ptr);
+ auxvp++;
+ }
+ return ((uint32_t)-1);
+}
+
+/*
+ * A key difference between the linux linker and ours' is that the linux
+ * linker adds the base address of segments to certain values in the
+ * segments' ELF header. As an example, look at the address of the
+ * DT_HASH hash table in a Solaris section - it is a relative address
+ * which locates the start of the hash table, relative to the beginning
+ * of the ELF file. However, when the linux linker loads a section, it
+ * modifies the in-memory ELF image by changing address of the hash
+ * table to be an absolute address. This is only done for libraries - not for
+ * executables.
+ *
+ * Solaris tools expect the relative address to remain relative, so
+ * here we will modify the in-memory ELF image so that it once again
+ * contains relative addresses.
+ *
+ * To accomplish this, we walk through all sections in the target.
+ * Linux sections are identified by pointing to the linux linker or libc in the
+ * DT_NEEDED section. For all matching sections, we subtract the segment
+ * base address to get back to relative addresses.
+ */
+static rd_err_e
+lx_ldb_get_dyns32(rd_helper_data_t rhd,
+ psaddr_t addr, void **dynpp, size_t *dynpp_sz)
+{
+ lx_rd_t *lx_rd = (lx_rd_t *)rhd;
+ rd_agent_t *rap = lx_rd->lr_rap;
+ Elf32_Ehdr ehdr;
+ Elf32_Dyn *dynp = NULL;
+ size_t dynp_sz;
+ uint_t ndyns;
+ int i;
+
+ ps_plog("lx_ldb_get_dyns: invoked for object at 0x%p", addr);
+
+ /* Read in a copy of the ehdr */
+ if (_rd_get_ehdr32(rap, addr, &ehdr, NULL) != RD_OK) {
+ ps_plog("lx_ldb_get_dyns: _rd_get_ehdr() failed");
+ return (RD_ERR);
+ }
+
+ /* read out the PT_DYNAMIC elements for this object */
+ if (_rd_get_dyns32(rap, addr, &dynp, &dynp_sz) != RD_OK) {
+ ps_plog("lx_ldb_get_dyns: _rd_get_dyns() failed");
+ return (RD_ERR);
+ }
+
+ /*
+ * From here on out if we encounter an error we'll just return
+ * success and pass back the unmolested dynamic elements that
+ * we've already obtained.
+ */
+ if (dynpp != NULL)
+ *dynpp = dynp;
+ if (dynpp_sz != NULL)
+ *dynpp_sz = dynp_sz;
+ ndyns = dynp_sz / sizeof (Elf32_Dyn);
+
+ /* If this isn't a dynamic object, there's nothing left todo */
+ if (ehdr.e_type != ET_DYN) {
+ ps_plog("lx_ldb_get_dyns: done: not a shared object");
+ return (RD_OK);
+ }
+
+ /*
+ * Before we blindly start changing dynamic section addresses
+ * we need to figure out if the current object that we're looking
+ * at is a linux object or a solaris object. To do this first
+ * we need to find the string tab dynamic section element.
+ */
+ for (i = 0; i < ndyns; i++) {
+ if (dynp[i].d_tag == DT_STRTAB)
+ break;
+ }
+ if (i == ndyns) {
+ ps_plog("lx_ldb_get_dyns: "
+ "failed to find string tab in the dynamic section");
+ return (RD_OK);
+ }
+
+ /*
+ * Check if the strtab value looks like an offset or an address.
+ * It's an offset if the value is less then the base address that
+ * the object is loaded at, or if the value is less than the offset
+ * of the section headers in the same elf object. This check isn't
+ * perfect, but in practice it's good enough.
+ */
+ if ((dynp[i].d_un.d_ptr < addr) ||
+ (dynp[i].d_un.d_ptr < ehdr.e_shoff)) {
+ ps_plog("lx_ldb_get_dyns: "
+ "doesn't appear to be an lx object");
+ return (RD_OK);
+ }
+
+ /*
+ * This seems to be a a linux object, so we'll patch up the dynamic
+ * section addresses
+ */
+ ps_plog("lx_ldb_get_dyns: "
+ "patching up lx object dynamic section addresses");
+ for (i = 0; i < ndyns; i++) {
+ switch (dynp[i].d_tag) {
+ case DT_PLTGOT:
+ case DT_HASH:
+ case DT_STRTAB:
+ case DT_SYMTAB:
+ case DT_RELA:
+ case DT_REL:
+ case DT_DEBUG:
+ case DT_JMPREL:
+ case DT_VERSYM:
+ if (dynp[i].d_un.d_val > addr) {
+ dynp[i].d_un.d_ptr -= addr;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ return (RD_OK);
+}
+
+static void
+lx_ldb_fini32(rd_helper_data_t rhd)
+{
+ lx_rd_t *lx_rd = (lx_rd_t *)rhd;
+ ps_plog("lx_ldb_fini: cleaning up lx helper");
+ free(lx_rd);
+}
+
+/*
+ * The linux linker has an r_debug structure somewhere in its data section that
+ * contains the address of the head of the link map list. To find this, we will
+ * use the DT_DEBUG token in the executable's dynamic section. The linux linker
+ * wrote the address of its r_debug structure to the DT_DEBUG dynamic entry. We
+ * get the address of the executable's program headers from the
+ * AT_SUN_BRAND_LX_PHDR aux vector entry. From there, we calculate the
+ * address of the Elf header, and from there we can easily get to the DT_DEBUG
+ * entry.
+ */
+static rd_helper_data_t
+lx_ldb_init32(rd_agent_t *rap, struct ps_prochandle *php)
+{
+ lx_rd_t *lx_rd;
+ uint32_t addr, phdr_addr, dyn_addr;
+ uint32_t symtab, strtab, offs;
+ uint32_t vaddr, memsz;
+ caddr_t mem;
+ Elf32_Dyn *dyn;
+ Elf32_Phdr phdr, *ph, *dph, *phdrs;
+ Elf32_Ehdr ehdr;
+ Elf32_Sym *sym;
+ int i, dyn_count;
+
+ lx_rd = calloc(sizeof (lx_rd_t), 1);
+ if (lx_rd == NULL) {
+ ps_plog("lx_ldb_init: cannot allocate memory");
+ return (NULL);
+ }
+ lx_rd->lr_rap = rap;
+ lx_rd->lr_php = php;
+
+ phdr_addr = lx_ldb_getauxval32(php, AT_SUN_BRAND_LX_PHDR);
+ if (phdr_addr == (uint32_t)-1) {
+ ps_plog("lx_ldb_init: no LX_PHDR found in aux vector");
+ return (NULL);
+ }
+ ps_plog("lx_ldb_init: found LX_PHDR auxv phdr at: 0x%p",
+ phdr_addr);
+
+ if (ps_pread(php, phdr_addr, &phdr, sizeof (phdr)) != PS_OK) {
+ ps_plog("lx_ldb_init: couldn't read phdr at 0x%p",
+ phdr_addr);
+ free(lx_rd);
+ return (NULL);
+ }
+
+ /* The ELF header should be before the program header in memory */
+ lx_rd->lr_exec = addr = phdr_addr - phdr.p_offset;
+ if (ps_pread(php, addr, &ehdr, sizeof (ehdr)) != PS_OK) {
+ ps_plog("lx_ldb_init: couldn't read ehdr at 0x%p",
+ lx_rd->lr_exec);
+ free(lx_rd);
+ return (NULL);
+ }
+ ps_plog("lx_ldb_init: read ehdr at: 0x%p", addr);
+ ps_plog("lx_ldb_init: ehdr t %d ent 0x%p poff 0x%p psize %d pnum %d",
+ ehdr.e_type, ehdr.e_entry, ehdr.e_phoff, ehdr.e_phentsize,
+ ehdr.e_phnum);
+
+ if ((phdrs = malloc(ehdr.e_phnum * ehdr.e_phentsize)) == NULL) {
+ ps_plog("lx_ldb_init: couldn't alloc phdrs memory");
+ free(lx_rd);
+ return (NULL);
+ }
+
+ if (ps_pread(php, phdr_addr, phdrs, ehdr.e_phnum * ehdr.e_phentsize) !=
+ PS_OK) {
+ ps_plog("lx_ldb_init: couldn't read phdrs at 0x%p",
+ phdr_addr);
+ free(lx_rd);
+ free(phdrs);
+ return (NULL);
+ }
+
+ /* program headers */
+ ps_plog("lx_ldb_init: read %d phdrs at: 0x%p",
+ ehdr.e_phnum, phdr_addr);
+
+ for (i = 0, ph = phdrs; i < ehdr.e_phnum; i++,
+ /*LINTED */
+ ph = (Elf32_Phdr *)((char *)ph + ehdr.e_phentsize)) {
+ ps_plog("lx_ldb_init: ph[%d] 0x%p type %d", i,
+ (phdr_addr + ((char *)ph - (char *)phdrs)), ph->p_type);
+ if (ph->p_type == PT_DYNAMIC)
+ break;
+ }
+ if (i == ehdr.e_phnum) {
+ ps_plog("lx_ldb_init: no PT_DYNAMIC in executable");
+ free(lx_rd);
+ free(phdrs);
+ return (NULL);
+ }
+ ps_plog("lx_ldb_init: found PT_DYNAMIC phdr[%d] at: 0x%p",
+ i, (phdr_addr + ((char *)ph - (char *)phdrs)));
+ ps_plog("lx_ldb_init: ph t 0x%x f 0x%x o 0x%p v 0x%p s %d",
+ ph->p_type, ph->p_flags, ph->p_offset, ph->p_vaddr, ph->p_filesz);
+
+ if ((dyn = malloc(ph->p_filesz)) == NULL) {
+ ps_plog("lx_ldb_init: couldn't alloc for PT_DYNAMIC");
+ free(lx_rd);
+ free(phdrs);
+ return (NULL);
+ }
+
+ /*
+ * Unclear why the dyn_addr works sometimes with one value and
+ * sometimes for the other, so we handle both cases.
+ */
+
+ dyn_count = ph->p_filesz / sizeof (Elf32_Dyn);
+ ps_plog("lx_ldb_init: dyn_count %d %d", dyn_count, sizeof (Elf32_Dyn));
+ dyn_addr = addr + ph->p_vaddr;
+ ps_plog("lx_ldb_init: dyn_addr 0x%p 0x%x = 0x%p",
+ addr, ph->p_offset, dyn_addr);
+ if (ps_pread(php, dyn_addr, dyn, ph->p_filesz) != PS_OK) {
+ ps_plog("lx_ldb_init: couldn't read dynamic at 0x%p, "
+ "trying dyn_addr 0x%p",
+ dyn_addr, ph->p_vaddr);
+
+ dyn_addr = ph->p_vaddr;
+ if (ps_pread(php, dyn_addr, dyn, ph->p_filesz) != PS_OK) {
+ ps_plog("lx_ldb_init: couldn't read dynamic at 0x%p",
+ dyn_addr);
+
+ free(lx_rd);
+ free(phdrs);
+ free(dyn);
+ return (NULL);
+ }
+ }
+ ps_plog("lx_ldb_init: read %d dynamic headers at: 0x%p",
+ dyn_count, dyn_addr);
+
+ for (i = 0; i < dyn_count; i++) {
+ if (dyn[i].d_tag == DT_DEBUG) {
+ lx_rd->lr_rdebug = dyn[i].d_un.d_ptr;
+ break;
+ }
+ }
+ free(phdrs);
+ free(dyn);
+
+ if (lx_rd->lr_rdebug != 0) {
+ ps_plog("lx_ldb_init: found DT_DEBUG: 0x%p", lx_rd->lr_rdebug);
+ return ((rd_helper_data_t)lx_rd);
+ }
+
+ ps_plog("lx_ldb_init: no DT_DEBUG found in exe; looking for r_debug");
+
+ /*
+ * If we didn't find DT_DEBUG, we're going to employ the same fallback
+ * as gdb: pawing through the dynamic linker's symbol table looking
+ * for the r_debug symbol.
+ */
+ addr = lx_ldb_getauxval32(php, AT_SUN_BRAND_LX_INTERP);
+
+ if (addr == (uint32_t)-1) {
+ ps_plog("lx_ldb_init: no interpreter; failing");
+ free(lx_rd);
+ return (NULL);
+ }
+
+ ps_plog("lx_ldb_init: reading interp ehdr at 0x%p", addr);
+
+ if (ps_pread(php, addr, &ehdr, sizeof (ehdr)) != PS_OK) {
+ ps_plog("lx_ldb_init: couldn't read interp ehdr at 0x%p", addr);
+ free(lx_rd);
+ return (NULL);
+ }
+
+ if (ehdr.e_type != ET_DYN) {
+ ps_plog("lx_ldb_init: interp ehdr not of type ET_DYN");
+ free(lx_rd);
+ return (NULL);
+ }
+
+ phdr_addr = addr + ehdr.e_phoff;
+
+ if ((phdrs = malloc(ehdr.e_phnum * ehdr.e_phentsize)) == NULL) {
+ ps_plog("lx_ldb_init: couldn't alloc interp phdrs memory");
+ free(lx_rd);
+ return (NULL);
+ }
+
+ if (ps_pread(php, phdr_addr, phdrs,
+ ehdr.e_phnum * ehdr.e_phentsize) != PS_OK) {
+ ps_plog("lx_ldb_init: couldn't read interp phdrs at 0x%p",
+ phdr_addr);
+ free(lx_rd);
+ free(phdrs);
+ return (NULL);
+ }
+
+ ps_plog("lx_ldb_init: read %d interp phdrs at: 0x%p",
+ ehdr.e_phnum, phdr_addr);
+
+ vaddr = (uint32_t)-1;
+ memsz = 0;
+
+ for (i = 0, ph = phdrs, dph = NULL; i < ehdr.e_phnum; i++,
+ /*LINTED */
+ ph = (Elf32_Phdr *)((char *)ph + ehdr.e_phentsize)) {
+ /*
+ * Keep track of our lowest PT_LOAD address, as this segment
+ * contains the DT_SYMTAB and DT_STRTAB.
+ */
+ if (ph->p_type == PT_LOAD && ph->p_vaddr < vaddr) {
+ vaddr = ph->p_vaddr;
+ memsz = ph->p_memsz;
+ }
+
+ if (ph->p_type == PT_DYNAMIC)
+ dph = ph;
+ }
+
+ if (vaddr == (uint32_t)-1 || memsz == 0) {
+ ps_plog("lx_ldb_init: no PT_LOAD section in interp");
+ free(lx_rd);
+ free(phdrs);
+ return (NULL);
+ }
+
+ ps_plog("lx_ldb_init: found interp PT_LOAD to be %d bytes at 0x%p",
+ memsz, vaddr);
+
+ if ((ph = dph) == NULL) {
+ ps_plog("lx_ldb_init: no PT_DYNAMIC in interp");
+ free(lx_rd);
+ free(phdrs);
+ return (NULL);
+ }
+
+ ps_plog("lx_ldb_init: found interp PT_DYNAMIC phdr[%d] at: 0x%p",
+ i, (phdr_addr + ((char *)ph - (char *)phdrs)));
+
+ if ((dyn = malloc(ph->p_filesz)) == NULL) {
+ ps_plog("lx_ldb_init: couldn't alloc for interp PT_DYNAMIC");
+ free(lx_rd);
+ free(phdrs);
+ return (NULL);
+ }
+
+ dyn_addr = addr + ph->p_offset;
+ dyn_count = ph->p_filesz / sizeof (Elf32_Dyn);
+
+ if (ps_pread(php, dyn_addr, dyn, ph->p_filesz) != PS_OK) {
+ ps_plog("lx_ldb_init: couldn't read interp dynamic at 0x%p",
+ dyn_addr);
+ free(lx_rd);
+ free(phdrs);
+ free(dyn);
+ return (NULL);
+ }
+
+ free(phdrs);
+
+ ps_plog("lx_ldb_init: read %d interp dynamic headers at: 0x%p",
+ dyn_count, dyn_addr);
+
+ /*
+ * As noted in lx_ldb_get_dyns32(), in Linux, the PT_DYNAMIC table
+ * is adjusted to represent absolute addresses instead of offsets.
+ * This is not true for the interpreter, however -- where the values
+ * will be represented as offsets from the lowest PT_LOAD p_vaddr.
+ */
+ symtab = strtab = (uint32_t)-1;
+
+ for (i = 0; i < dyn_count; i++) {
+ if (dyn[i].d_tag == DT_STRTAB)
+ strtab = dyn[i].d_un.d_ptr - vaddr;
+
+ if (dyn[i].d_tag == DT_SYMTAB)
+ symtab = dyn[i].d_un.d_ptr - vaddr;
+ }
+
+ free(dyn);
+
+ if (strtab == (uint32_t)-1 || strtab > memsz) {
+ ps_plog("lx_ldb_init: didn't find valid interp strtab");
+ free(lx_rd);
+ return (NULL);
+ }
+
+ if (symtab == (uint32_t)-1 || symtab > memsz) {
+ ps_plog("lx_ldb_init: didn't find valid interp symtab");
+ free(lx_rd);
+ return (NULL);
+ }
+
+ ps_plog("lx_ldb_init: strtab is 0x%p, symtab is 0x%p",
+ addr + strtab, addr + symtab);
+
+ if ((mem = malloc(memsz)) == NULL) {
+ ps_plog("lx_ldb_init: couldn't allocate interp "
+ "buffer of 0x%p bytes", memsz);
+ free(lx_rd);
+ return (NULL);
+ }
+
+ if (ps_pread(php, addr, mem, memsz) != PS_OK) {
+ ps_plog("lx_ldb_init: couldn't read interp at 0x%p", addr);
+ free(lx_rd);
+ free(mem);
+ return (NULL);
+ }
+
+ /*
+ * We make an assumption that is made elsewhere in the Linux linker:
+ * that the DT_SYMTAB immediately precedes the DT_STRTAB.
+ */
+ for (offs = symtab; offs < strtab; offs += sizeof (Elf32_Sym)) {
+ uint32_t p;
+
+ sym = (Elf32_Sym *)&mem[offs];
+
+ if (sym->st_name > memsz) {
+ ps_plog("lx_ldb_init: invalid st_name at sym 0x%p",
+ addr + offs);
+ continue;
+ }
+
+ p = sym->st_name;
+
+ if (strtab + p > memsz) {
+ ps_plog("lx_ldb_init: invalid symbol address 0x%p, "
+ "memsz 0x%p", strtab + p, memsz);
+ continue;
+ }
+
+ if (mem[strtab + p] == '\0')
+ continue;
+
+ /* Sometimes we're pointing into the middle of a symbol? */
+ while ((strtab + p) > 0 && (strtab + p) < memsz &&
+ mem[strtab + p] != '\0')
+ p--;
+ p++;
+
+ ps_plog("lx_ldb_init: interp symbol (0x%p) %s",
+ strtab + p, &mem[strtab + p]);
+
+ if (strcmp(&mem[strtab + p], "_r_debug") == 0)
+ break;
+ }
+
+ if (offs >= strtab) {
+ ps_plog("lx_ldb_init: no _r_debug found in interpreter");
+ free(mem);
+ free(lx_rd);
+ return (NULL);
+ }
+
+ lx_rd->lr_rdebug = (sym->st_value - vaddr) + addr;
+ ps_plog("lx_ldb_init: found _r_debug at 0x%p", lx_rd->lr_rdebug);
+ free(mem);
+
+ return ((rd_helper_data_t)lx_rd);
+}
+
+/*
+ * Given the address of an ELF object in the target, return its size and
+ * the proper link map ID.
+ */
+static size_t
+lx_elf_props32(struct ps_prochandle *php, uint32_t addr, psaddr_t *data_addr)
+{
+ Elf32_Ehdr ehdr;
+ Elf32_Phdr *phdrs, *ph;
+ int i;
+ uint32_t min = (uint32_t)-1;
+ uint32_t max = 0;
+ size_t sz = NULL;
+
+ if (ps_pread(php, addr, &ehdr, sizeof (ehdr)) != PS_OK) {
+ ps_plog("lx_elf_props: Couldn't read ELF header at 0x%p",
+ addr);
+ return (0);
+ }
+
+ if ((phdrs = malloc(ehdr.e_phnum * ehdr.e_phentsize)) == NULL)
+ return (0);
+
+ if (ps_pread(php, addr + ehdr.e_phoff, phdrs, ehdr.e_phnum *
+ ehdr.e_phentsize) != PS_OK) {
+ ps_plog("lx_elf_props: Couldn't read program headers at 0x%p",
+ addr + ehdr.e_phoff);
+ return (0);
+ }
+
+ for (i = 0, ph = phdrs; i < ehdr.e_phnum; i++,
+ /*LINTED */
+ ph = (Elf32_Phdr *)((char *)ph + ehdr.e_phentsize)) {
+
+ if (ph->p_type != PT_LOAD)
+ continue;
+
+ if ((ph->p_flags & (PF_W | PF_R)) == (PF_W | PF_R)) {
+ *data_addr = ph->p_vaddr;
+ if (ehdr.e_type == ET_DYN)
+ *data_addr += addr;
+ if (*data_addr & (ph->p_align - 1))
+ *data_addr = *data_addr & (~(ph->p_align -1));
+ }
+
+ if (ph->p_vaddr < min)
+ min = ph->p_vaddr;
+
+ if (ph->p_vaddr > max) {
+ max = ph->p_vaddr;
+ sz = ph->p_memsz + max - min;
+ if (sz & (ph->p_align - 1))
+ sz = (sz & (~(ph->p_align - 1))) + ph->p_align;
+ }
+ }
+
+ free(phdrs);
+ return (sz);
+}
+
+static int
+lx_ldb_loadobj_iter32(rd_helper_data_t rhd, rl_iter_f *cb, void *client_data)
+{
+ lx_rd_t *lx_rd = (lx_rd_t *)rhd;
+ struct ps_prochandle *php = lx_rd->lr_php;
+ lx_r_debug_t r_debug;
+ lx_link_map_t map;
+ uint32_t p = NULL;
+ int rc;
+ rd_loadobj_t exec;
+
+ if ((rc = ps_pread(php, (psaddr_t)lx_rd->lr_rdebug, &r_debug,
+ sizeof (r_debug))) != PS_OK) {
+ ps_plog("lx_ldb_loadobj_iter: "
+ "Couldn't read linux r_debug at 0x%p", lx_rd->lr_rdebug);
+ return (rc);
+ }
+
+ p = r_debug.r_map;
+
+ /*
+ * The first item on the link map list is for the executable, but it
+ * doesn't give us any useful information about it. We need to
+ * synthesize a rd_loadobj_t for the client.
+ *
+ * Linux doesn't give us the executable name, so we'll get it from
+ * the AT_EXECNAME entry instead.
+ */
+ if ((rc = ps_pread(php, (psaddr_t)p, &map, sizeof (map))) != PS_OK) {
+ ps_plog("lx_ldb_loadobj_iter: "
+ "Couldn't read linux link map at 0x%p", p);
+ return (rc);
+ }
+
+ bzero(&exec, sizeof (exec));
+ exec.rl_base = lx_rd->lr_exec;
+ exec.rl_dynamic = map.lxm_ld;
+ exec.rl_nameaddr = lx_ldb_getauxval32(php, AT_SUN_EXECNAME);
+ exec.rl_lmident = LM_ID_BASE;
+
+ exec.rl_bend = exec.rl_base +
+ lx_elf_props32(php, lx_rd->lr_exec, &exec.rl_data_base);
+
+ if ((*cb)(&exec, client_data) == 0) {
+ ps_plog("lx_ldb_loadobj_iter: "
+ "client callb failed for executable");
+ return (PS_ERR);
+ }
+ ps_plog("lx_ldb_loadobj_iter: exec base 0x%p dyn 0x%p",
+ exec.rl_base, exec.rl_dynamic);
+
+ for (p = map.lxm_next; p != NULL; p = map.lxm_next) {
+ rd_loadobj_t obj;
+
+ if ((rc = ps_pread(php, (psaddr_t)p, &map, sizeof (map))) !=
+ PS_OK) {
+ ps_plog("lx_ldb_loadobj_iter: "
+ "Couldn't read lk map at %p", p);
+ return (rc);
+ }
+
+ /*
+ * The linux link map has less information than the Solaris one.
+ * We need to go fetch the missing information from the ELF
+ * headers.
+ */
+
+ obj.rl_nameaddr = (psaddr_t)map.lxm_name;
+ obj.rl_base = map.lxm_addr;
+ obj.rl_refnameaddr = (psaddr_t)map.lxm_name;
+ obj.rl_plt_base = NULL;
+ obj.rl_plt_size = 0;
+ obj.rl_lmident = LM_ID_BASE;
+
+ ps_plog("lx_ldb_loadobj_iter: map base 0x%p 0x%p",
+ obj.rl_base, obj.rl_nameaddr);
+
+ /*
+ * Ugh - we have to walk the ELF stuff, find the PT_LOAD
+ * sections, and calculate the end of the file's mappings
+ * ourselves.
+ */
+
+ obj.rl_bend = map.lxm_addr +
+ lx_elf_props32(php, map.lxm_addr, &obj.rl_data_base);
+ obj.rl_padstart = obj.rl_base;
+ obj.rl_padend = obj.rl_bend;
+ obj.rl_dynamic = map.lxm_ld;
+ obj.rl_tlsmodid = 0;
+
+ ps_plog("lx_ldb_loadobj_iter: 0x%p to 0x%p",
+ obj.rl_base, obj.rl_bend);
+
+ if ((*cb)(&obj, client_data) == 0) {
+ ps_plog("lx_ldb_loadobj_iter: "
+ "Client callback failed on %s", map.lxm_name);
+ return (rc);
+ }
+ }
+ return (RD_OK);
+}
+
+/*
+ * Librtld_db plugin linkage struct.
+ *
+ * When we get loaded by librtld_db, it will look for the symbol below
+ * to find our plugin entry points.
+ */
+rd_helper_ops_t RTLD_DB_BRAND_OPS = {
+ LM_ID_BRAND,
+ lx_ldb_init32,
+ lx_ldb_fini32,
+ lx_ldb_loadobj_iter32,
+ lx_ldb_get_dyns32
+};
diff --git a/usr/src/tools/ctf/dump/Makefile.com b/usr/src/lib/brand/lx/librtld_db/common/mapfile-vers
index 9877fa06a3..5e328d6075 100644
--- a/usr/src/tools/ctf/dump/Makefile.com
+++ b/usr/src/lib/brand/lx/librtld_db/common/mapfile-vers
@@ -18,50 +18,41 @@
#
# CDDL HEADER END
#
+
#
# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-.KEEP_STATE:
-.SUFFIXES:
-
-PROG = ctfdump
-SRCS = dump.c utils.c symbol.c
-
-include ../../Makefile.ctf
-
-LDFLAGS += -L$(NATIVE_ADJUNCT)/lib
-LDLIBS += -lelf -lz
-
-OBJS = $(SRCS:%.c=%.o)
-LINTFILES = $(SRCS:%.c=%.ln)
-
-CERRWARN += -_gcc=-Wno-uninitialized
-
-.NO_PARALLEL:
-.PARALLEL: $(OBJS) $(LINTFILES)
-
-all: $(PROG)
-
-$(PROG): $(OBJS)
- $(LINK.c) $(OBJS) -o $@ $(LDLIBS)
- $(POST_PROCESS)
-
-%.o: ../%.c
- $(COMPILE.c) $<
-
-$(ROOTONBLDMACHPROG): $(PROG)
-
-install: $(ROOTONBLDMACHPROG)
-
-clean:
- $(RM) $(OBJS) $(LINTFILES)
-
-%.ln: ../%.c
- $(LINT.c) -c $<
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
-lint: $(LINTFILES)
- $(LINT) $(LINTFLAGS) $(LINTFILES) $(LDLIBS)
+{
+ global:
+ rtld_db_brand_ops32;
+ local:
+ *;
+};
-include ../../Makefile.ctf.targ
+#Externally defined symbols
+{
+ global:
+ ps_pauxv = NODIRECT PARENT;
+ ps_pdmodel = NODIRECT PARENT;
+ ps_pglobal_lookup = NODIRECT PARENT;
+ ps_pglobal_sym = NODIRECT PARENT;
+ ps_plog = NODIRECT PARENT;
+ ps_pread = NODIRECT PARENT;
+ ps_pwrite = NODIRECT PARENT;
+};
diff --git a/usr/src/lib/brand/lx/librtld_db/i386/Makefile b/usr/src/lib/brand/lx/librtld_db/i386/Makefile
new file mode 100644
index 0000000000..b5f780c072
--- /dev/null
+++ b/usr/src/lib/brand/lx/librtld_db/i386/Makefile
@@ -0,0 +1,33 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+
+CLOBBERFILES = $(ROOTLIBDIR)/$(DYNLIB)
+
+install: all $(ROOTLIBS)
diff --git a/usr/src/lib/brand/lx/lx_brand/Makefile b/usr/src/lib/brand/lx/lx_brand/Makefile
new file mode 100644
index 0000000000..fc217b672c
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/Makefile
@@ -0,0 +1,49 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+# Copyright 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../../../Makefile.lib
+
+default: all
+
+SUBDIRS= $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET= all
+clean := TARGET= clean
+clobber := TARGET= clobber
+install := TARGET= install
+lint := TARGET= lint
+_msg := TARGET= _msg
+
+.KEEP_STATE:
+
+all install clean clobber lint _msg: $(SUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/lib/brand/lx/lx_brand/Makefile.com b/usr/src/lib/brand/lx/lx_brand/Makefile.com
new file mode 100644
index 0000000000..12218fd3ba
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/Makefile.com
@@ -0,0 +1,103 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+# Copyright 2016 Joyent, Inc.
+#
+
+LX_CMN = $(SRC)/common/brand/lx
+
+LIBRARY = lx_brand.a
+VERS = .1
+COBJS = aio.o \
+ capabilities.o \
+ clock.o \
+ clone.o \
+ debug.o \
+ dir.o \
+ file.o \
+ fcntl.o \
+ fork.o \
+ id.o \
+ lx_brand.o \
+ mem.o \
+ misc.o \
+ module.o \
+ mount.o \
+ mount_nfs.o \
+ pgrp.o \
+ priority.o \
+ ptrace.o \
+ sched.o \
+ sendfile.o \
+ signal.o \
+ socket.o \
+ stack.o \
+ statfs.o \
+ sysctl.o \
+ sysv_ipc.o \
+ time.o \
+ truncate.o
+
+CMNOBJS = lx_auxv.o \
+ lx_errno.o \
+ lx_signum.o
+ASOBJS = lx_handler.o lx_crt.o
+OBJECTS = $(CMNOBJS) $(COBJS) $(ASOBJS)
+
+USDT_PROVIDERS = lx_provider.d
+
+include ../../Makefile.lx
+include ../../../../Makefile.lib
+
+CSRCS = $(COBJS:%o=../common/%c) $(CMNOBJS:%o=$(LX_CMN)/%c)
+ASSRCS = $(ASOBJS:%o=$(ISASRCDIR)/%s)
+SRCS = $(CSRCS) $(ASSRCS)
+
+SRCDIR = ../common
+UTSBASE = ../../../../../uts
+
+LIBS = $(DYNLIB)
+LDLIBS += -lc -lsocket -lmapmalloc -lproc -lrtld_db -lrpcsvc -lnsl
+DYNFLAGS += $(DYNFLAGS_$(CLASS))
+DYNFLAGS += $(BLOCAL) $(ZNOVERSION) -Wl,-e_start -M../common/mapfile
+CFLAGS += $(CCVERBOSE)
+CPPFLAGS += -D_REENTRANT -I. -I../ -I$(UTSBASE)/common/brand/lx -I$(LX_CMN)
+ASFLAGS = -P $(ASFLAGS_$(CURTYPE)) -D_ASM -I../ \
+ -I$(UTSBASE)/common/brand/lx
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include ../../../../Makefile.targ
+include ../../../../Makefile.usdt
+
+pics/%.o: $(ISASRCDIR)/%.s
+ $(COMPILE.s) -o $@ $<
+ $(POST_PROCESS_O)
+
+pics/%.o: $(LX_CMN)/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
diff --git a/usr/src/lib/brand/lx/lx_brand/amd64/Makefile b/usr/src/lib/brand/lx/lx_brand/amd64/Makefile
new file mode 100644
index 0000000000..a1db90cd38
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/amd64/Makefile
@@ -0,0 +1,42 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+# Copyright 2015 Joyent, Inc.
+#
+#
+# lib/brand/lx/amd64/Makefile
+
+ISASRCDIR=.
+
+ASFLAGS += -P -D_ASM
+
+include ../Makefile.com
+include $(SRC)/lib/Makefile.lib.64
+
+DYNFLAGS += -Wl,-I/native/lib/64/ld.so.1
+CPPFLAGS += -D_SYSCALL32
+
+POFILE= lx_brand.po
+MSGFILES= $(CSRCS)
+
+ASSYMDEP_OBJS = lx_handler.o
+
+install: all $(ROOTLIBS64)
+
+$(POFILE): $(MSGFILES)
+ $(BUILDPO.msgfiles)
+
+_msg: $(MSGDOMAINPOFILE)
+
+include $(SRC)/Makefile.msg.targ
diff --git a/usr/src/lib/brand/lx/lx_brand/amd64/lx_crt.s b/usr/src/lib/brand/lx/lx_brand/amd64/lx_crt.s
new file mode 100644
index 0000000000..13a88a3d50
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/amd64/lx_crt.s
@@ -0,0 +1,62 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/asm_linkage.h>
+
+#if defined(lint)
+
+void
+_start(void)
+{
+}
+
+#else /* lint */
+
+ /*
+ * C language startup routine for the lx brand shared library.
+ *
+ * That routine expects to be called with the following arguments:
+ * brand_init(int argc, char *argv[], char *envp[])
+ *
+ * There are no arguments explicitly passed to this entry point,
+ * routine, but we do know how our initial stack has been setup by
+ * the kernel. The stack format is documented in:
+ * usr/src/cmd/sgs/rtld/amd64/boot.s
+ *
+ * So this routine will troll through the stack to setup the argument
+ * values for the common brand library startup routine and then invoke
+ * it. This routine is modeled after the default crt1.s`_start()
+ * routines.
+ */
+ ENTRY_NP(_start)
+ pushq $0 / Build a stack frame. retpc = NULL
+ pushq $0 / fp = NULL
+ movq %rsp, %rbp / first stack frame
+
+ /*
+ * Calculate the location of the envp array by adding the size of
+ * the argv array to the start of the argv array.
+ */
+ movq 16(%rbp), %rdi / argc in %rdi (1st param)
+ leaq 24(%rbp), %rsi / &argv[0] in %rsi (2nd param)
+ leaq 32(%rbp,%rdi,8), %rdx / envp in %rdx (3rd param)
+ call lx_init
+
+ /* lx_init will never return. */
+ /*NOTREACHED*/
+ SET_SIZE(_start)
+#endif /* lint */
diff --git a/usr/src/lib/brand/lx/lx_brand/amd64/lx_handler.s b/usr/src/lib/brand/lx/lx_brand/amd64/lx_handler.s
new file mode 100644
index 0000000000..a01d66554d
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/amd64/lx_handler.s
@@ -0,0 +1,53 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/asm_linkage.h>
+#include <sys/regset.h>
+#include <sys/segments.h>
+#include <sys/syscall.h>
+#include <sys/lx_brand.h>
+
+#if defined(_ASM)
+#include <sys/lx_signal.h>
+#include <sys/lx_syscall.h>
+#endif /* _ASM */
+
+/* 64-bit signal syscall numbers */
+#define LX_SYS_rt_sigreturn 15
+
+#if defined(lint)
+
+#include <sys/types.h>
+#include <sys/regset.h>
+#include <sys/signal.h>
+
+void
+lx_rt_sigreturn_tramp(void)
+{}
+
+#else /* lint */
+
+ /*
+ * Trampoline code is called by the return at the end of a Linux
+ * signal handler to return control to the interrupted application
+ * via the lx_rt_sigreturn() syscall.
+ */
+ ENTRY_NP(lx_rt_sigreturn_tramp)
+ movq $LX_SYS_rt_sigreturn, %rax
+ syscall
+ SET_SIZE(lx_rt_sigreturn_tramp)
+
+#endif /* lint */
diff --git a/usr/src/lib/brand/lx/lx_brand/common/aio.c b/usr/src/lib/brand/lx/lx_brand/common/aio.c
new file mode 100644
index 0000000000..9a008b66b9
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/aio.c
@@ -0,0 +1,556 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/mman.h>
+#include <thread.h>
+#include <synch.h>
+#include <port.h>
+#include <aio.h>
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <strings.h>
+#include <stdlib.h>
+#include <sys/lx_types.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_aio.h>
+
+/*
+ * We implement the Linux asynchronous I/O system calls by using the POSIX
+ * asynchronous I/O facilities together with event port notification. This
+ * approach allows us to broadly approximate Linux semantics, but see
+ * lx_io_cancel() for some limitations.
+ *
+ * NOTE:
+ * The Linux implementation of the io_* syscalls is not exposed via glibc.
+ * These syscalls are documented to use an aio_context_t for the context
+ * parameter. On Linux this is a ulong_t. On Linux the contexts live in the
+ * kernel address space and are looked up using the aio_context_t parameter.
+ * The Linux libaio interface uses a different type for the context_t parameter.
+ *
+ * Our implementation assumes the lx_aio_context_t can be treated as a
+ * pointer. This works fortuitously because a ulong_t is the same size as a
+ * pointer. Our implementation maps the contexts into the program's address
+ * space so the aio_context_t we pass back and forth will be valid as a
+ * pointer for the program. This is similar to the native aio implementation.
+ */
+
+typedef struct lx_aiocb {
+ struct aiocb lxaiocb_cb; /* POSIX AIO control block */
+ struct lx_aiocb *lxaiocb_next; /* next outstanding/free I/O */
+ struct lx_aiocb *lxaiocb_prev; /* prev outstanding I/O */
+ uintptr_t lxaiocb_iocbp; /* pointer to lx_iocb_t */
+ uintptr_t lxaiocb_data; /* data payload */
+} lx_aiocb_t;
+
+typedef struct lx_aio_ctxt {
+ mutex_t lxaio_lock; /* lock protecting context */
+ boolean_t lxaio_destroying; /* boolean: being destroyed */
+ cond_t lxaio_destroyer; /* destroyer's condvar */
+ int lxaio_waiters; /* number of waiters */
+ size_t lxaio_size; /* total size of mapping */
+ int lxaio_port; /* port for completion */
+ lx_aiocb_t *lxaio_outstanding; /* outstanding I/O */
+ lx_aiocb_t *lxaio_free; /* free I/O control blocks */
+ int lxaio_nevents; /* max number of events */
+} lx_aio_ctxt_t;
+
+int lx_aio_max_nr = 65536;
+
+long
+lx_io_setup(unsigned int nr_events, lx_aio_context_t *cidp)
+{
+ lx_aio_ctxt_t *ctx;
+ intptr_t tp;
+ lx_aiocb_t *lxcbs;
+ uintptr_t check;
+ size_t size;
+ int i;
+
+ if (uucopy(cidp, &check, sizeof (cidp)) != 0)
+ return (-EFAULT);
+
+ if (check != NULL || nr_events == 0 || nr_events > lx_aio_max_nr)
+ return (-EINVAL);
+
+ /*
+ * We're saved from complexity in no small measure by the fact that the
+ * cap on the number of concurrent events must be specified a priori;
+ * we use that to determine the amount of memory we need and mmap() it
+ * upfront.
+ */
+ size = sizeof (lx_aio_ctxt_t) + nr_events * sizeof (lx_aiocb_t);
+
+ if ((tp = (intptr_t)mmap(0, size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON, -1, 0)) == -1) {
+ return (-ENOMEM);
+ }
+ ctx = (lx_aio_ctxt_t *)tp;
+
+ ctx->lxaio_size = size;
+ ctx->lxaio_nevents = nr_events;
+
+ if ((ctx->lxaio_port = port_create()) == -1) {
+ munmap((caddr_t)ctx, ctx->lxaio_size);
+ return (-EAGAIN);
+ }
+
+ (void) mutex_init(&ctx->lxaio_lock, USYNC_THREAD, NULL);
+
+ /*
+ * Link up the free list.
+ */
+ lxcbs = (lx_aiocb_t *)((uintptr_t)ctx + sizeof (lx_aio_ctxt_t));
+
+ for (i = 0; i < nr_events - 1; i++)
+ lxcbs[i].lxaiocb_next = &lxcbs[i + 1];
+
+ ctx->lxaio_free = &lxcbs[0];
+
+ if (uucopy(&ctx, cidp, sizeof (cidp)) != 0) {
+ (void) close(ctx->lxaio_port);
+ munmap((caddr_t)ctx, ctx->lxaio_size);
+ return (-EFAULT);
+ }
+
+ return (0);
+}
+
+long
+lx_io_submit(lx_aio_context_t cid, long nr, uintptr_t **bpp)
+{
+ int processed = 0, err = 0, i;
+ port_notify_t notify;
+ lx_aiocb_t *lxcb;
+ lx_iocb_t **iocbpp, iocb, *iocbp = &iocb;
+ struct aiocb *aiocb;
+ lx_aio_ctxt_t *ctx = (lx_aio_ctxt_t *)cid;
+
+ if (nr <= 0 || ctx == NULL)
+ return (-EINVAL);
+
+ if ((iocbpp = (lx_iocb_t **)malloc(nr * sizeof (uintptr_t))) == NULL)
+ return (-EAGAIN);
+
+ if (uucopy(bpp, iocbpp, nr * sizeof (uintptr_t)) != 0) {
+ free(iocbpp);
+ return (-EFAULT);
+ }
+
+ mutex_lock(&ctx->lxaio_lock);
+
+ for (i = 0; i < nr; i++) {
+ if ((lxcb = ctx->lxaio_free) == NULL) {
+ err = EAGAIN;
+ break;
+ }
+
+ if (uucopy(iocbpp[i], &iocb, sizeof (iocb)) != 0) {
+ err = EFAULT;
+ break;
+ }
+
+ lxcb->lxaiocb_iocbp = (uintptr_t)iocbpp[i];
+ lxcb->lxaiocb_data = iocbp->lxiocb_data;
+
+ /*
+ * We don't currently support eventfd-based notification.
+ */
+ if (iocbp->lxiocb_flags & LX_IOCB_FLAG_RESFD) {
+ err = ENOSYS;
+ break;
+ }
+
+ notify.portnfy_port = ctx->lxaio_port;
+ notify.portnfy_user = lxcb;
+
+ aiocb = &lxcb->lxaiocb_cb;
+ aiocb->aio_fildes = iocbp->lxiocb_fd;
+ aiocb->aio_sigevent.sigev_notify = SIGEV_PORT;
+ aiocb->aio_sigevent.sigev_value.sival_ptr = &notify;
+
+ switch (iocbp->lxiocb_op) {
+ case LX_IOCB_CMD_FSYNC:
+ case LX_IOCB_CMD_FDSYNC:
+ err = aio_fsync(iocbp->lxiocb_op == LX_IOCB_CMD_FSYNC ?
+ O_SYNC : O_DSYNC, aiocb);
+ break;
+
+ case LX_IOCB_CMD_PREAD:
+ case LX_IOCB_CMD_PWRITE:
+ aiocb->aio_offset = iocbp->lxiocb_offset;
+
+ if (aiocb->aio_nbytes > LONG_MAX) {
+ err = EINVAL;
+ break;
+ }
+
+ aiocb->aio_nbytes = iocbp->lxiocb_nbytes;
+
+ if ((uintptr_t)iocbp->lxiocb_buf > ULONG_MAX) {
+ err = EINVAL;
+ break;
+ }
+
+ aiocb->aio_buf = (void *)(uintptr_t)iocbp->lxiocb_buf;
+ aiocb->aio_reqprio = 0;
+
+ if (iocbp->lxiocb_op == LX_IOCB_CMD_PREAD) {
+ err = aio_read(aiocb);
+ } else {
+ err = aio_write(aiocb);
+ }
+
+ break;
+
+ case LX_IOCB_CMD_NOOP:
+ /*
+ * Yet another whodunit in Adventure Playground: why
+ * does Linux define an operation -- IOCB_CMD_NOOP --
+ * for which it always returns EINVAL?! And what
+ * could a "no-op" possibly mean for asynchronous I/O
+ * anyway?! Do nothing... later?!
+ */
+ err = EINVAL;
+ break;
+
+ case LX_IOCB_CMD_PREADV:
+ case LX_IOCB_CMD_PWRITEV:
+ /*
+ * We don't support asynchronous preadv and pwritev
+ * (an asynchronous scatter/gather being a somewhat odd
+ * notion to begin with); we return EINVAL in this
+ * case, which the caller should be able to deal with.
+ */
+ err = EINVAL;
+ break;
+
+ default:
+ err = EINVAL;
+ break;
+ }
+
+ if (err == -1)
+ err = errno;
+
+ if (err != 0)
+ break;
+
+ /*
+ * We successfully enqueued I/O. Take our control block off
+ * of the free list and transition it to our list of
+ * outstanding I/O.
+ */
+ ctx->lxaio_free = lxcb->lxaiocb_next;
+ lxcb->lxaiocb_next = ctx->lxaio_outstanding;
+
+ if (ctx->lxaio_outstanding != NULL)
+ ctx->lxaio_outstanding->lxaiocb_prev = lxcb;
+
+ ctx->lxaio_outstanding = lxcb;
+ processed++;
+ }
+
+ mutex_unlock(&ctx->lxaio_lock);
+
+ free(iocbpp);
+ if (processed == 0)
+ return (-err);
+
+ return (processed);
+}
+
+long
+lx_io_getevents(lx_aio_context_t cid, long min_nr, long nr,
+ lx_io_event_t *events, struct timespec *timeout)
+{
+ port_event_t *list;
+ lx_io_event_t *out;
+ unsigned int nget = min_nr;
+ int rval, i, err;
+ uint32_t max = nr;
+ lx_aio_ctxt_t *ctx = (lx_aio_ctxt_t *)cid;
+
+ if (nr > ctx->lxaio_nevents)
+ return (-EINVAL);
+
+ /*
+ * We can't return ENOMEM from this syscall so EINTR is the closest
+ * we can come.
+ */
+ if ((list = malloc(nr * sizeof (port_event_t))) == NULL)
+ return (-EINTR);
+
+ /*
+ * Grab the lock associated with the context to bump the number of
+ * waiters. This is needed in case this context is destroyed while
+ * we're still waiting on it.
+ */
+ mutex_lock(&ctx->lxaio_lock);
+
+ if (ctx->lxaio_destroying) {
+ mutex_unlock(&ctx->lxaio_lock);
+ free(list);
+ return (-EINVAL);
+ }
+
+ ctx->lxaio_waiters++;
+ mutex_unlock(&ctx->lxaio_lock);
+
+ rval = port_getn(ctx->lxaio_port, list, max, &nget, timeout);
+ err = errno;
+
+ mutex_lock(&ctx->lxaio_lock);
+
+ assert(ctx->lxaio_waiters > 0);
+ ctx->lxaio_waiters--;
+
+ if ((rval == -1 && err != ETIME) || nget == 0 ||
+ (nget == 1 && list[0].portev_source == PORT_SOURCE_ALERT)) {
+ /*
+ * If we're being destroyed, kick our waiter and clear out with
+ * EINVAL -- this is effectively an application-level race.
+ */
+ if (ctx->lxaio_destroying) {
+ cond_signal(&ctx->lxaio_destroyer);
+ err = EINVAL;
+ }
+
+ mutex_unlock(&ctx->lxaio_lock);
+
+ free(list);
+ return (nget == 0 ? 0 : -err);
+ }
+
+ if ((out = malloc(nget * sizeof (lx_io_event_t))) == NULL) {
+ mutex_unlock(&ctx->lxaio_lock);
+ free(list);
+ return (-EINTR);
+ }
+
+ /*
+ * For each returned event, translate it into the Linux event in our
+ * stack-based buffer. As we're doing this, we also free the lxcb by
+ * moving it from the outstanding list to the free list.
+ */
+ for (i = 0; i < nget; i++) {
+ port_event_t *pe = &list[i];
+ lx_io_event_t *lxe = &out[i];
+ struct aiocb *aiocb;
+ lx_aiocb_t *lxcb;
+
+ lxcb = pe->portev_user;
+ aiocb = (struct aiocb *)pe->portev_object;
+
+ assert(pe->portev_source == PORT_SOURCE_AIO);
+ assert(aiocb == &lxcb->lxaiocb_cb);
+
+ lxe->lxioe_data = lxcb->lxaiocb_data;
+ lxe->lxioe_object = lxcb->lxaiocb_iocbp;
+ lxe->lxioe_res = aio_return(aiocb);
+ lxe->lxioe_res2 = 0;
+
+ if (lxcb->lxaiocb_next != NULL)
+ lxcb->lxaiocb_next->lxaiocb_prev = lxcb->lxaiocb_prev;
+
+ if (lxcb->lxaiocb_prev != NULL) {
+ lxcb->lxaiocb_prev->lxaiocb_next = lxcb->lxaiocb_next;
+ } else {
+ assert(ctx->lxaio_outstanding == lxcb);
+ ctx->lxaio_outstanding = lxcb->lxaiocb_next;
+ }
+
+ lxcb->lxaiocb_prev = NULL;
+ lxcb->lxaiocb_next = ctx->lxaio_free;
+ ctx->lxaio_free = lxcb;
+ }
+
+ free(list);
+
+ /*
+ * Perform one final check for a shutdown -- it's possible that we
+ * raced with the port transitioning into alert mode, in which case we
+ * have a blocked destroyer that we need to kick. (Note that we do
+ * this after having properly cleaned up the completed I/O.)
+ */
+ if (ctx->lxaio_destroying) {
+ cond_signal(&ctx->lxaio_destroyer);
+ mutex_unlock(&ctx->lxaio_lock);
+ free(out);
+ return (-EINVAL);
+ }
+
+ mutex_unlock(&ctx->lxaio_lock);
+
+ if (uucopy(out, events, nget * sizeof (lx_io_event_t)) != 0) {
+ free(out);
+ return (-EFAULT);
+ }
+
+ free(out);
+ return (nget);
+}
+
+/*
+ * Cancellation is unfortunately problematic for us as the POSIX semantics for
+ * AIO cancellation differ slightly from the Linux semantics: on Linux,
+ * io_cancel() regrettably does not use the same mechanism for event
+ * consumption (that is, as an event retrievable via io_getevents()), but
+ * rather returns the cancellation event directly from io_cancel(). This is
+ * in contrast to POSIX AIO cancellation, which does not actually alter the
+ * notification mechanism: the cancellation is still received via its
+ * specified notification (i.e., an event port or signal). The unfortunate
+ * Linux semantics leave us with several (suboptimal) choices:
+ *
+ * (1) Cancel the I/O via aio_cancel(), and then somehow attempt to block on
+ * the asynchronous cancellation notification without otherwise disturbing
+ * other events that may be pending.
+ *
+ * (2) Cancel the I/O via aio_cancel() but ignore (and later, discard) the
+ * asynchronous cancellation notification.
+ *
+ * (3) Explicitly fail to cancel any asynchronous I/O by having io_cancel()
+ * always return EAGAIN.
+ *
+ * While the third option is the least satisfying from an engineering
+ * perspective, it is also entirely within the rights of the interface (which
+ * may return EAGAIN to merely denote that the specified I/O "was not
+ * canceled") and has the added advantage of being entirely honest. (This is
+ * in stark contrast to the first two options, each of which tries to tell
+ * small lies that seem to sure to end in elaborate webs of deceit.) Honesty
+ * is the best policy; after checking that the specified I/O is outstanding,
+ * we fail with EAGAIN.
+ */
+/*ARGSUSED*/
+long
+lx_io_cancel(lx_aio_context_t cid, lx_iocb_t *iocbp, lx_io_event_t *result)
+{
+ lx_iocb_t iocb;
+ lx_aiocb_t *lxcb;
+ lx_aio_ctxt_t *ctx = (lx_aio_ctxt_t *)cid;
+
+ if (uucopy(iocbp, &iocb, sizeof (lx_iocb_t)) != 0)
+ return (-EFAULT);
+
+ mutex_lock(&ctx->lxaio_lock);
+
+ if (ctx->lxaio_destroying) {
+ mutex_unlock(&ctx->lxaio_lock);
+ return (-EINVAL);
+ }
+
+ for (lxcb = ctx->lxaio_outstanding; lxcb != NULL &&
+ lxcb->lxaiocb_iocbp != (uintptr_t)iocbp; lxcb = lxcb->lxaiocb_next)
+ continue;
+
+ mutex_unlock(&ctx->lxaio_lock);
+
+ if (lxcb == NULL)
+ return (-EINVAL);
+
+ /*
+ * Congratulations on your hard-won EAGAIN!
+ */
+ return (-EAGAIN);
+}
+
+/*
+ * As is often the case, the destruction case makes everything a lot more
+ * complicated. In this case, io_destroy() is defined to block on the
+ * completion of all outstanding operations. To effect this, we throw the
+ * event port into the rarely-used alert mode -- invented long ago for just
+ * this purpose -- thereby kicking any waiters out of their port_get().
+ */
+long
+lx_io_destroy(lx_aio_context_t cid)
+{
+ lx_aiocb_t *lxcb;
+ unsigned int nget = 0, i;
+ int port;
+ lx_aio_ctxt_t *ctx = (lx_aio_ctxt_t *)cid;
+
+ port = ctx->lxaio_port;
+ mutex_lock(&ctx->lxaio_lock);
+
+ if (ctx->lxaio_destroying) {
+ mutex_unlock(&ctx->lxaio_lock);
+ return (-EINVAL);
+ }
+
+ ctx->lxaio_destroying = B_TRUE;
+
+ if (ctx->lxaio_waiters) {
+ /*
+ * If we have waiters, put the port into alert mode.
+ */
+ (void) port_alert(port, PORT_ALERT_SET, B_TRUE, NULL);
+
+ while (ctx->lxaio_waiters)
+ cond_wait(&ctx->lxaio_destroyer, &ctx->lxaio_lock);
+
+ /*
+ * Transition the port out of alert mode: we will need to
+ * block on the port ourselves for any outstanding I/O.
+ */
+ (void) port_alert(port, PORT_ALERT_SET, B_FALSE, NULL);
+ }
+
+ /*
+ * We have no waiters and we never will again -- we can be assured
+ * that our list of outstanding I/Os is now completely static and it's
+ * now safe to iterate over our outstanding I/Os and aio_cancel() them.
+ */
+ for (lxcb = ctx->lxaio_outstanding; lxcb != NULL;
+ lxcb = lxcb->lxaiocb_next) {
+ struct aiocb *aiocb = &lxcb->lxaiocb_cb;
+
+ /*
+ * Surely a new bureaucratic low even for POSIX that we must
+ * specify both the file descriptor and the structure that
+ * must contain the file desctiptor...
+ */
+ (void) aio_cancel(aiocb->aio_fildes, aiocb);
+ nget++;
+ }
+
+ /*
+ * Drain one at a time using port_get (vs. port_getn) so that we don't
+ * have to malloc a port_event list, which might fail.
+ */
+ for (i = 0; i < nget; i++) {
+ port_event_t pe;
+ int rval;
+
+ do {
+ rval = port_get(port, &pe, NULL);
+ } while (rval == -1 && errno == EINTR);
+
+ assert(rval == 0);
+ }
+
+ /*
+ * I/Os are either cancelled or completed. We can safely close our
+ * port and nuke the mapping that contains our context.
+ */
+ (void) close(ctx->lxaio_port);
+ munmap((caddr_t)ctx, ctx->lxaio_size);
+
+ return (0);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/capabilities.c b/usr/src/lib/brand/lx/lx_brand/common/capabilities.c
new file mode 100644
index 0000000000..f80afc9ea0
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/capabilities.c
@@ -0,0 +1,507 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * LX Brand emulation of capget/capset syscalls
+ */
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/lx_types.h>
+#include <sys/lx_syscall.h>
+#include <sys/syscall.h>
+#include <alloca.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/lx_misc.h>
+#include <priv.h>
+
+typedef struct {
+ uint32_t version;
+ int pid;
+} lx_cap_user_header_t;
+
+typedef struct {
+ uint32_t effective;
+ uint32_t permitted;
+ uint32_t inheritable;
+} lx_cap_user_data_t;
+
+typedef struct {
+ priv_set_t *p_effective;
+ priv_set_t *p_permitted;
+ priv_set_t *p_inheritable;
+} lx_cap_privs_t;
+
+#define LX_CAP_UPDATE_PERMITTED 0x1
+#define LX_CAP_UPDATE_EFFECTIVE 0x2
+#define LX_CAP_UPDATE_INHERITABLE 0x4
+
+
+#define LX_CAP_MAXLEN 2
+
+typedef struct {
+ uint32_t effective[LX_CAP_MAXLEN];
+ uint32_t permitted[LX_CAP_MAXLEN];
+ uint32_t inheritable[LX_CAP_MAXLEN];
+} lx_cap_data_t;
+
+#define LX_CAP_VERSION_1 0x19980330
+#define LX_CAP_VERSION_2 0x20071026 /* deprecated by Linux */
+#define LX_CAP_VERSION_3 0x20080522
+
+#define LX_CAP_SETPCAP 8
+
+/*
+ * Even though we lack mappings for capabilities higher than LX_CAP_MAX_VALID,
+ * it's valuable to test all the way out to the end of the second field. This
+ * ensures that new capabilities we lack support for are not silently accepted.
+ */
+#define LX_CAP_MAX_CHECK 63
+
+#define LX_CAP_CAPISSET(id, cap) \
+ (((id < 32) && (((0x1 << id) & cap[0]) != 0)) || \
+ ((id >= 32) && (((0x1 << (id - 32) & cap[1]) != 0))))
+
+#define LX_CAP_CAPSET(id, cap) \
+ if (id < 32) { cap[0] |= (0x1 << id); } \
+ else { cap[1] |= (0x1 << (id - 32)); }
+
+static const char *lx_cap_map_chown[] = {
+ PRIV_FILE_CHOWN,
+ PRIV_FILE_CHOWN_SELF,
+ NULL
+};
+static const char *lx_cap_map_dac_override[] = {
+ PRIV_FILE_DAC_READ,
+ PRIV_FILE_DAC_WRITE,
+ PRIV_FILE_DAC_EXECUTE,
+ NULL
+};
+static const char *lx_cap_map_dac_read_search[] = {
+ PRIV_FILE_DAC_SEARCH,
+ PRIV_FILE_DAC_READ,
+ NULL
+};
+static const char *lx_cap_map_fowner[] = { PRIV_FILE_OWNER, NULL };
+static const char *lx_cap_map_fsetid[] = { PRIV_FILE_SETID, NULL };
+static const char *lx_cap_map_kill[] = { PRIV_PROC_OWNER, NULL };
+/*
+ * One way that Linux capabilities(7) differs from Illumos privileges(5) is
+ * that it distinguishes between setuid and setgroups rights. This will be a
+ * problem if an lx-branded process requests to drop only CAP_SETUID but not
+ * CAP_SETGID.
+ *
+ * In that case, CAP_SETUID will be maintained.
+ */
+static const char *lx_cap_map_setgid[] = { PRIV_PROC_SETID, NULL };
+static const char *lx_cap_map_setuid[] = { PRIV_PROC_SETID, NULL };
+static const char *lx_cap_map_linux_immutable[] = { PRIV_FILE_FLAG_SET, NULL };
+static const char *lx_cap_map_bind_service[] = { PRIV_NET_PRIVADDR, NULL };
+static const char *lx_cap_map_net_admin[] = {
+ PRIV_SYS_IPC_CONFIG,
+ PRIV_SYS_DL_CONFIG,
+ NULL
+};
+static const char *lx_cap_map_net_raw[] = {
+ PRIV_NET_RAWACCESS,
+ PRIV_NET_ICMPACCESS,
+ NULL
+};
+static const char *lx_cap_map_ipc_lock[] = { PRIV_PROC_LOCK_MEMORY, NULL };
+static const char *lx_cap_map_ipc_owner[] = {
+ PRIV_IPC_DAC_READ,
+ PRIV_IPC_DAC_WRITE,
+ PRIV_IPC_OWNER,
+ NULL
+};
+static const char *lx_cap_map_sys_chroot[] = { PRIV_PROC_CHROOT, NULL };
+static const char *lx_cap_map_sys_admin[] = {
+ PRIV_SYS_MOUNT,
+ PRIV_SYS_ADMIN,
+ NULL
+};
+static const char *lx_cap_map_sys_nice[] = { PRIV_PROC_PRIOUP, NULL };
+static const char *lx_cap_map_sys_resource[] = { PRIV_SYS_RESOURCE, NULL };
+static const char *lx_cap_map_audit_write[] = { PRIV_PROC_AUDIT, NULL };
+static const char *lx_cap_map_audit_control[] = { PRIV_SYS_AUDIT, NULL };
+
+/*
+ * Mapping of Linux capabilities -> Illumos privileges
+ * The ID definitions can be found in the Linux sources here:
+ * include/uapi/linux/capability.h
+ *
+ * Order is critical.
+ */
+static const char ** lx_cap_mapping[LX_CAP_MAX_VALID + 1] = {
+ lx_cap_map_chown, /* CAP_CHOWN */
+ lx_cap_map_dac_override, /* CAP_DAC_OVERRIDE */
+ lx_cap_map_dac_read_search, /* CAP_DAC_READ_SEARCH */
+ lx_cap_map_fowner, /* CAP_FOWNER */
+ lx_cap_map_fsetid, /* CAP_FSETID */
+ lx_cap_map_kill, /* CAP_KILL */
+ lx_cap_map_setgid, /* CAP_SETGID */
+ lx_cap_map_setuid, /* CAP_SETUID */
+ NULL, /* CAP_SETPCAP */
+ lx_cap_map_linux_immutable, /* CAP_LINUX_IMMUTABLE */
+ lx_cap_map_bind_service, /* CAP_BIND_SERVICE */
+ NULL, /* CAP_BROADCAST */
+ lx_cap_map_net_admin, /* CAP_NET_ADMIN */
+ lx_cap_map_net_raw, /* CAP_NET_RAW */
+ lx_cap_map_ipc_lock, /* CAP_IPC_LOCK */
+ lx_cap_map_ipc_owner, /* CAP_IPC_OWNER */
+ NULL, /* CAP_MODULE */
+ NULL, /* CAP_RAWIO */
+ lx_cap_map_sys_chroot, /* CAP_SYS_CHROOT */
+ NULL, /* CAP_PTRACE */
+ NULL, /* CAP_PACCT */
+ lx_cap_map_sys_admin, /* CAP_SYS_ADMIN */
+ NULL, /* CAP_BOOT */
+ lx_cap_map_sys_nice, /* CAP_SYS_NICE */
+ lx_cap_map_sys_resource, /* CAP_SYS_RESOURCE */
+ NULL, /* CAP_SYS_TIME */
+ NULL, /* CAP_SYS_TTY_CONFIG */
+ NULL, /* CAP_MKNOD */
+ NULL, /* CAP_LEASE */
+ lx_cap_map_audit_write, /* CAP_AUDIT_WRITE */
+ lx_cap_map_audit_control, /* CAP_AUDIT_CONTROL */
+ NULL, /* CAP_SETFCAP */
+ NULL, /* CAP_MAC_OVERRIDE */
+ NULL, /* CAP_MAC_ADMIN */
+ NULL, /* CAP_SYSLOG */
+ NULL, /* CAP_WAKE_ALARM */
+ NULL /* CAP_BLOCK_SUSPEND */
+};
+
+/* track priv_set_t size, set on entry to lx_capset/lx_capget */
+static unsigned int lx_cap_priv_size = 0;
+
+/* safely allocate priv_set_t triplet on the stack */
+#define LX_CAP_ALLOC_PRIVS(ptr) \
+ do { \
+ ptr = SAFE_ALLOCA(sizeof (lx_cap_privs_t) + \
+ (3 * lx_cap_priv_size)); \
+ if (ptr != NULL) { \
+ ptr->p_effective = (void *) ptr + \
+ sizeof (lx_cap_privs_t); \
+ ptr->p_permitted = (void *) ptr + \
+ sizeof (lx_cap_privs_t) + \
+ lx_cap_priv_size;\
+ ptr->p_inheritable = (void *) ptr + \
+ sizeof (lx_cap_privs_t) + \
+ 2 * lx_cap_priv_size; \
+ } \
+ } while (0)
+
+static long
+lx_cap_update_priv(priv_set_t *priv, const uint32_t cap[])
+{
+ int i, j;
+ boolean_t cap_set;
+ boolean_t priv_set;
+ boolean_t updated = B_FALSE;
+ for (i = 0; i <= LX_CAP_MAX_CHECK; i++) {
+ cap_set = LX_CAP_CAPISSET(i, cap);
+ if (lx_cap_mapping[i] == NULL || i > LX_CAP_MAX_VALID) {
+ /* don't allow setting unsupported caps */
+ if (cap_set) {
+ /*
+ * CAP_SETPCAP is a special capability, with
+ * varying behavior, that can be used to
+ * control if the process can change other
+ * process's capabilities, or to control moving
+ * capabilities between sets. For now we ignore
+ * this if its passed in.
+ */
+ if (i == LX_CAP_SETPCAP) {
+ continue;
+ }
+ lx_unsupported("set unsupported capability %d",
+ i);
+ return (-1);
+ } else {
+ continue;
+ }
+ }
+ for (j = 0; lx_cap_mapping[i][j] != NULL; j++) {
+ priv_set = priv_ismember(priv, lx_cap_mapping[i][j]);
+ if (priv_set && !cap_set) {
+ priv_delset(priv, lx_cap_mapping[i][j]);
+ updated = B_TRUE;
+ } else if (!priv_set && cap_set) {
+ priv_addset(priv, lx_cap_mapping[i][j]);
+ updated = B_TRUE;
+ }
+ }
+ }
+ if (updated)
+ return (1);
+ else
+ return (0);
+}
+
+static long
+lx_cap_to_priv(lx_cap_data_t *cap, lx_cap_privs_t *priv)
+{
+ long changes = 0;
+ long result;
+
+ result = lx_cap_update_priv(priv->p_permitted, cap->permitted);
+ if (result < 0)
+ return (-1);
+ else if (result > 0)
+ changes |= LX_CAP_UPDATE_PERMITTED;
+
+ result = lx_cap_update_priv(priv->p_effective, cap->effective);
+ if (result < 0)
+ return (-1);
+ else if (result > 0)
+ changes |= LX_CAP_UPDATE_EFFECTIVE;
+
+ result = lx_cap_update_priv(priv->p_inheritable, cap->inheritable);
+ if (result < 0)
+ return (-1);
+ else if (result > 0)
+ changes |= LX_CAP_UPDATE_INHERITABLE;
+
+ return (changes);
+}
+
+static void
+lx_cap_from_priv(const priv_set_t *priv, uint32_t cap[])
+{
+ int i, j;
+ boolean_t valid;
+ memset(cap, '\0', sizeof (uint32_t) * LX_CAP_MAXLEN);
+ for (i = 0; i <= LX_CAP_MAX_VALID; i++) {
+ if (lx_cap_mapping[i] == NULL) {
+ continue;
+ }
+ valid = B_TRUE;
+ for (j = 0; lx_cap_mapping[i][j] != NULL; j++) {
+ if (!priv_ismember(priv,
+ lx_cap_mapping[i][j])) {
+ valid = B_FALSE;
+ }
+ }
+ if (valid) {
+ LX_CAP_CAPSET(i, cap);
+ }
+ }
+}
+
+static long
+lx_cap_read_cap(const lx_cap_user_header_t *uhp, const lx_cap_user_data_t *udp,
+ lx_cap_data_t *cd)
+{
+ lx_cap_user_header_t uh;
+ lx_cap_user_data_t ud_buf;
+ int cap_count;
+ int i;
+
+ if (uucopy(uhp, &uh, sizeof (uh)) != 0)
+ return (-errno);
+
+ switch (uh.version) {
+ case LX_CAP_VERSION_1:
+ cap_count = 1;
+ break;
+ case LX_CAP_VERSION_2:
+ case LX_CAP_VERSION_3:
+ cap_count = 2;
+ break;
+ default:
+ return (-EINVAL);
+ }
+
+ /* Only allow capset on calling process */
+ if (uh.pid != 0 && uh.pid != getpid())
+ return (-EPERM);
+
+ /* zero the struct in case cap_count < 2 */
+ memset(cd, '\0', sizeof (lx_cap_data_t));
+
+ for (i = 0; i < cap_count; i++) {
+ if (uucopy(udp + i, &ud_buf, sizeof (ud_buf)) != 0)
+ return (-errno);
+ cd->permitted[i] = ud_buf.permitted;
+ cd->effective[i] = ud_buf.effective;
+ cd->inheritable[i] = ud_buf.inheritable;
+ }
+ return (0);
+}
+
+long
+lx_capget(uintptr_t p1, uintptr_t p2)
+{
+ const priv_impl_info_t *impl;
+ lx_cap_user_header_t *uhp = (lx_cap_user_header_t *)p1;
+ lx_cap_user_data_t *udp = (lx_cap_user_data_t *)p2;
+ lx_cap_user_header_t uh;
+ lx_cap_privs_t *privs;
+ lx_cap_data_t cd_result;
+ lx_cap_user_data_t cd_buf;
+ int cap_count;
+ int i;
+
+ if (lx_cap_priv_size == 0) {
+ impl = getprivimplinfo();
+ lx_cap_priv_size = sizeof (priv_chunk_t) * impl->priv_setsize;
+ }
+
+ if (uucopy(uhp, &uh, sizeof (uh)) != 0)
+ return (-errno);
+
+ switch (uh.version) {
+ case LX_CAP_VERSION_1:
+ cap_count = 1;
+ break;
+ case LX_CAP_VERSION_2:
+ case LX_CAP_VERSION_3:
+ cap_count = 2;
+ break;
+ default:
+ /*
+ * As per the man page: call will fail with EINVAL and set the
+ * version field of the header to the kernel preferred version
+ * when an unsupported version value is provided.
+ */
+ uh.version = LX_CAP_VERSION_3;
+ if (uucopy(&uh, uhp, sizeof (uh)) != 0)
+ return (-errno);
+ return (-EINVAL);
+ }
+
+ /*
+ * Only allow capget on the calling process.
+ * If a pid is specified, lie about being able to locate it.
+ */
+ if (uh.pid > 0 && uh.pid != getpid())
+ return (-ESRCH);
+ if (uh.pid < 0)
+ return (-EINVAL);
+
+ LX_CAP_ALLOC_PRIVS(privs);
+ if (privs == NULL)
+ return (-ENOMEM);
+
+ if (getppriv(PRIV_PERMITTED, privs->p_permitted) != 0)
+ return (-errno);
+ if (getppriv(PRIV_EFFECTIVE, privs->p_effective) != 0)
+ return (-errno);
+ if (getppriv(PRIV_INHERITABLE, privs->p_inheritable) != 0)
+ return (-errno);
+
+ lx_cap_from_priv(privs->p_permitted, cd_result.permitted);
+ lx_cap_from_priv(privs->p_effective, cd_result.effective);
+ lx_cap_from_priv(privs->p_inheritable, cd_result.inheritable);
+
+ /* convert to output format */
+ for (i = 0; i < cap_count; i++) {
+ cd_buf.effective = cd_result.effective[i];
+ cd_buf.permitted = cd_result.permitted[i];
+ cd_buf.inheritable = cd_result.inheritable[i];
+ if (uucopy(&cd_buf, udp + i, sizeof (cd_buf)) != 0)
+ return (-errno);
+ }
+
+ return (0);
+}
+
+long
+lx_capset(uintptr_t p1, uintptr_t p2)
+{
+ const priv_impl_info_t *impl;
+ lx_cap_data_t cd;
+ lx_cap_privs_t *privs;
+ long result;
+
+ if (lx_cap_priv_size == 0) {
+ impl = getprivimplinfo();
+ lx_cap_priv_size = sizeof (priv_chunk_t) * impl->priv_setsize;
+ }
+
+ /* verify header and read in desired capabilities */
+ result = lx_cap_read_cap((lx_cap_user_header_t *)p1,
+ (lx_cap_user_data_t *)p2, &cd);
+ if (result != 0)
+ return (result);
+
+ LX_CAP_ALLOC_PRIVS(privs);
+ if (privs == NULL)
+ return (-ENOMEM);
+
+ /* fetch current privs to compare against */
+ if (getppriv(PRIV_PERMITTED, privs->p_permitted) != 0)
+ return (-errno);
+ if (getppriv(PRIV_EFFECTIVE, privs->p_effective) != 0)
+ return (-errno);
+ if (getppriv(PRIV_INHERITABLE, privs->p_inheritable) != 0)
+ return (-errno);
+
+
+ result = lx_cap_to_priv(&cd, privs);
+ if (result < 0)
+ return (-EPERM);
+
+ /* report success if no changes needed */
+ if (result == 0)
+ return (0);
+
+ /* Ensure the effective/inheritable caps aren't > permitted */
+ if (!priv_issubset(privs->p_effective, privs->p_permitted) ||
+ !priv_issubset(privs->p_inheritable, privs->p_permitted))
+ return (-EPERM);
+
+ /*
+ * Here is where things become racy. Linux updates all three
+ * capability sets simultaneously in the capset syscall. In order to
+ * emulate capabilities via privileges, three setppriv operations are
+ * required in sequence. If one or two should fail, there is not a
+ * mechanism to convey the incomplete operation to the caller.
+ *
+ * We do two things to make this less risky:
+ * 1. Verify that both the desired effective and inheritable
+ * sets are subsets of the desired permitted set.
+ * 2. Perform the setppriv of the permitted set first.
+ *
+ * Should the setppriv(permitted) fail, we can safely bail out with an
+ * error. If it succeeds, the setppriv of effective and inheritable
+ * are likely to succeed given that they've been verified legal.
+ *
+ * If the partial error does happen, we'll be forced to report failure
+ * even though the privileges were altered.
+ */
+
+ if ((result & LX_CAP_UPDATE_PERMITTED) != 0) {
+ /* failure here is totally safe */
+ if (setppriv(PRIV_SET, PRIV_PERMITTED, privs->p_permitted) != 0)
+ return (-errno);
+ }
+ if ((result & LX_CAP_UPDATE_EFFECTIVE) != 0) {
+ /* failure here is a bummer */
+ if (setppriv(PRIV_SET, PRIV_EFFECTIVE, privs->p_effective) != 0)
+ return (-errno);
+ }
+ if ((result & LX_CAP_UPDATE_EFFECTIVE) != 0) {
+ /* failure here is a major bummer */
+ if (setppriv(PRIV_SET, PRIV_INHERITABLE,
+ privs->p_inheritable) != 0)
+ return (-errno);
+ }
+
+ return (0);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/clock.c b/usr/src/lib/brand/lx/lx_brand/common/clock.c
new file mode 100644
index 0000000000..0473ac65b2
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/clock.c
@@ -0,0 +1,344 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <sys/timerfd.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+#include <lx_signum.h>
+
+/*
+ * Translating from the Linux clock types to the illumos types is a bit of a
+ * mess.
+ *
+ * Linux uses different values for it clock identifiers, so we have to do basic
+ * translations between the two. Thankfully, both Linux and illumos implement
+ * the same POSIX SUSv3 clock types, so the semantics should be identical.
+ *
+ * However, CLOCK_REALTIME and CLOCK_HIGHRES (CLOCK_MONOTONIC) are the only two
+ * clock backends currently implemented on illumos. Functions in the kernel
+ * that use the CLOCK_BACKEND macro will return an error for any clock type
+ * that does not exist in the clock_backend array. These functions are
+ * clock_settime, clock_gettime, clock_getres and timer_create.
+ *
+ * For reference, the kernel's clock_backend array looks like this:
+ *
+ * clock_backend[CLOCK_MAX] (6 entries)
+ * 0 __CLOCK_REALTIME0 valid ptr. (obs. same as CLOCK_REALTIME)
+ * 1 CLOCK_VIRTUAL NULL
+ * 2 CLOCK_THREAD_CPUTIME_ID NULL
+ * 3 CLOCK_REALTIME valid ptr.
+ * 4 CLOCK_MONOTONIC (CLOCK_HIGHRES) valid ptr.
+ * 5 CLOCK_PROCESS_CPUTIME_ID NULL
+ */
+
+#define CLOCK_RT_SLOT 0
+
+#define LX_CLOCK_REALTIME 0
+#define LX_CLOCK_MONOTONIC 1
+
+/*
+ * Limits for a minimum interval are enforced when creating timers from the
+ * CLOCK_HIGHRES source. Values below this minimum will be clamped if the
+ * process lacks the proc_clock_highres privilege.
+ */
+static int ltos_clock[] = {
+ CLOCK_REALTIME, /* LX_CLOCK_REALTIME */
+ CLOCK_HIGHRES, /* LX_CLOCK_MONOTONIC */
+ CLOCK_PROCESS_CPUTIME_ID, /* LX_CLOCK_PROCESS_CPUTIME_ID */
+ CLOCK_THREAD_CPUTIME_ID, /* LX_CLOCK_THREAD_CPUTIME_ID */
+ CLOCK_HIGHRES, /* LX_CLOCK_MONOTONIC_RAW */
+ CLOCK_REALTIME, /* LX_CLOCK_REALTIME_COARSE */
+ CLOCK_HIGHRES /* LX_CLOCK_MONOTONIC_COARSE */
+};
+
+#define LX_CLOCK_MAX (sizeof (ltos_clock) / sizeof (ltos_clock[0]))
+
+#define LX_SIGEV_PAD_SIZE ((64 - \
+ (sizeof (int) * 2 + sizeof (union sigval))) / sizeof (int))
+
+typedef struct {
+ union sigval lx_sigev_value; /* same layout for both */
+ int lx_sigev_signo;
+ int lx_sigev_notify;
+ union {
+ int lx_pad[LX_SIGEV_PAD_SIZE];
+ int lx_tid;
+ struct {
+ void (*lx_notify_function)(union sigval);
+ void *lx_notify_attribute;
+ } lx_sigev_thread;
+ } lx_sigev_un;
+} lx_sigevent_t;
+
+/* sigevent sigev_notify conversion table */
+static int ltos_sigev[] = {
+ SIGEV_SIGNAL,
+ SIGEV_NONE,
+ SIGEV_THREAD,
+ 0, /* Linux skips event 3 */
+ SIGEV_THREAD /* Linux SIGEV_THREAD_ID -- see lx_sigev_thread_id() */
+};
+
+#define LX_SIGEV_MAX (sizeof (ltos_sigev) / sizeof (ltos_sigev[0]))
+#define LX_SIGEV_THREAD_ID 4
+
+long
+lx_clock_nanosleep(int clock, int flags, struct timespec *rqtp,
+ struct timespec *rmtp)
+{
+ int ret = 0;
+ int err;
+ struct timespec rqt, rmt;
+
+ if (clock < 0 || clock >= LX_CLOCK_MAX)
+ return (-EINVAL);
+
+ if (uucopy(rqtp, &rqt, sizeof (struct timespec)) < 0)
+ return (-EFAULT);
+
+ /* the TIMER_RELTIME and TIMER_ABSTIME flags are the same on Linux */
+ if ((err = clock_nanosleep(ltos_clock[clock], flags, &rqt, &rmt))
+ != 0) {
+ if (err != EINTR)
+ return (-err);
+ ret = -EINTR;
+ /*
+ * We fall through in case we have to pass back the remaining
+ * time.
+ */
+ }
+
+ /*
+ * Only copy values to rmtp if the timer is TIMER_RELTIME and rmtp is
+ * non-NULL.
+ */
+ if (((flags & TIMER_RELTIME) == TIMER_RELTIME) && (rmtp != NULL) &&
+ (uucopy(&rmt, rmtp, sizeof (struct timespec)) < 0))
+ return (-EFAULT);
+
+ return (ret);
+}
+
+/*ARGSUSED*/
+long
+lx_adjtimex(void *tp)
+{
+ return (-EPERM);
+}
+
+/*
+ * Notification function for use with native SIGEV_THREAD in order to
+ * emulate Linux SIGEV_THREAD_ID. Native SIGEV_THREAD is used as the
+ * timer mechanism and B_SIGEV_THREAD_ID performs the actual event
+ * delivery to the appropriate lx tid.
+ */
+static void
+lx_sigev_thread_id(union sigval sival)
+{
+ lx_sigevent_t *lev = (lx_sigevent_t *)sival.sival_ptr;
+ syscall(SYS_brand, B_SIGEV_THREAD_ID, lev->lx_sigev_un.lx_tid,
+ lev->lx_sigev_signo, lev->lx_sigev_value.sival_ptr);
+ free(lev);
+}
+
+
+/*
+ * The illumos timer_create man page says it accepts the following clocks:
+ * CLOCK_REALTIME (3) wall clock
+ * CLOCK_VIRTUAL (1) user CPU usage clock - No Backend
+ * CLOCK_PROF (2) user and system CPU usage clock - No Backend
+ * CLOCK_HIGHRES (4) non-adjustable, high-resolution clock
+ * However, in reality the illumos timer_create only accepts CLOCK_REALTIME
+ * and CLOCK_HIGHRES.
+ *
+ * Linux has complicated support for clock IDs. For example, the
+ * clock_getcpuclockid() function can return a negative clock_id. See the Linux
+ * source and the comment in include/linux/posix-timers.h (above CLOCKFD) which
+ * describes clock file descriptors and shows how they map to a virt. or sched.
+ * clock ID. A process can pass one of these negative IDs to timer_create so we
+ * need to convert it and we currently only allow CLOCK_PROCESS_CPUTIME_ID
+ * against the current process as the input.
+ */
+long
+lx_timer_create(int clock, struct sigevent *lx_sevp, timer_t *tid)
+{
+ lx_sigevent_t lev;
+ struct sigevent sev;
+
+ if (clock < 0) {
+ if (clock != 0xfffffffe)
+ return (-EINVAL);
+ clock = CLOCK_RT_SLOT; /* force our use of CLOCK_REALTIME */
+ }
+
+ if (clock >= LX_CLOCK_MAX)
+ return (-EINVAL);
+
+ /* We have to convert the Linux sigevent layout to the illumos layout */
+ if (uucopy(lx_sevp, &lev, sizeof (lev)) < 0)
+ return (-EFAULT);
+
+ if (lev.lx_sigev_notify < 0 || lev.lx_sigev_notify > LX_SIGEV_MAX)
+ return (-EINVAL);
+
+ sev.sigev_notify = ltos_sigev[lev.lx_sigev_notify];
+ sev.sigev_signo = lx_ltos_signo(lev.lx_sigev_signo, 0);
+ sev.sigev_value = lev.lx_sigev_value;
+
+ /*
+ * The signal number is meaningless in SIGEV_NONE, Linux
+ * accepts any value. We convert invalid signals to 0 so other
+ * parts of lx signal handling don't break.
+ */
+ if ((sev.sigev_notify != SIGEV_NONE) && (sev.sigev_signo == 0))
+ return (-EINVAL);
+
+ /*
+ * Assume all Linux libc implementations map SIGEV_THREAD to
+ * SIGEV_THREAD_ID and ignore passed-in attributes.
+ */
+ sev.sigev_notify_attributes = NULL;
+
+ if (lev.lx_sigev_notify == LX_SIGEV_THREAD_ID) {
+ pid_t caller_pid = getpid();
+ pid_t target_pid;
+ lwpid_t ignore;
+ lx_sigevent_t *lev_copy;
+
+ if (lx_lpid_to_spair(lev.lx_sigev_un.lx_tid,
+ &target_pid, &ignore) != 0)
+ return (-EINVAL);
+
+ /*
+ * The caller of SIGEV_THREAD_ID must be in the same
+ * process as the target thread.
+ */
+ if (caller_pid != target_pid)
+ return (-EINVAL);
+
+ /*
+ * Pass the original lx sigevent_t to the native
+ * notify function so that it may pass it to the lx
+ * helper thread. It is the responsibility of
+ * lx_sigev_thread_id() to free lev_copy after the
+ * information is relayed to lx.
+ *
+ * If the calling process is forked without an exec
+ * after this copy but before the timer fires then
+ * lev_copy will leak in the child. This is acceptable
+ * given the rarity of this event, the miniscule
+ * amount leaked, and the fact that the memory is
+ * reclaimed when the proc dies. It is firmly in the
+ * land of "good enough".
+ */
+ lev_copy = malloc(sizeof (lx_sigevent_t));
+ if (lev_copy == NULL)
+ return (-ENOMEM);
+
+ if (uucopy(&lev, lev_copy, sizeof (lx_sigevent_t)) < 0) {
+ free(lev_copy);
+ return (-EFAULT);
+ }
+
+ sev.sigev_notify_function = lx_sigev_thread_id;
+ sev.sigev_value.sival_ptr = lev_copy;
+ }
+
+ return ((timer_create(ltos_clock[clock], &sev, tid) < 0) ? -errno : 0);
+}
+
+long
+lx_timer_settime(timer_t tid, int flags, struct itimerspec *new_val,
+ struct itimerspec *old_val)
+{
+ return ((timer_settime(tid, flags, new_val, old_val) < 0) ? -errno : 0);
+}
+
+long
+lx_timer_gettime(timer_t tid, struct itimerspec *val)
+{
+ return ((timer_gettime(tid, val) < 0) ? -errno : 0);
+}
+
+long
+lx_timer_getoverrun(timer_t tid)
+{
+ int val;
+
+ val = timer_getoverrun(tid);
+ return ((val < 0) ? -errno : val);
+}
+
+long
+lx_timer_delete(timer_t tid)
+{
+ return ((timer_delete(tid) < 0) ? -errno : 0);
+}
+
+long
+lx_timerfd_create(int clockid, int flags)
+{
+ int r;
+
+ /* These are the only two valid values. LTP tests for this. */
+ if (clockid != LX_CLOCK_REALTIME && clockid != LX_CLOCK_MONOTONIC)
+ return (-EINVAL);
+
+ r = timerfd_create(ltos_clock[clockid], flags);
+ /*
+ * As with the eventfd case, we return a slightly less jarring
+ * error condition if we cannot open /dev/timerfd.
+ */
+ if (r == -1 && errno == ENOENT)
+ return (-ENOTSUP);
+
+ return (r == -1 ? -errno : r);
+}
+
+long
+lx_timerfd_settime(int fd, int flags, const struct itimerspec *value,
+ struct itimerspec *ovalue)
+{
+ int r = timerfd_settime(fd, flags, value, ovalue);
+
+ return (r == -1 ? -errno : r);
+}
+
+long
+lx_timerfd_gettime(int fd, struct itimerspec *value)
+{
+ int r = timerfd_gettime(fd, value);
+
+ return (r == -1 ? -errno : r);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/clone.c b/usr/src/lib/brand/lx/lx_brand/common/clone.c
new file mode 100644
index 0000000000..59037a38b8
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/clone.c
@@ -0,0 +1,702 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <ucontext.h>
+#include <thread.h>
+#include <strings.h>
+#include <libintl.h>
+#include <sys/regset.h>
+#include <sys/syscall.h>
+#include <sys/inttypes.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/segments.h>
+#include <signal.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_types.h>
+#include <sys/lx_signal.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_thread.h>
+#include <sys/fork.h>
+#include <sys/mman.h>
+#include <sys/debug.h>
+#include <lx_syscall.h>
+
+
+#define SHARED_AS \
+ (LX_CLONE_VM | LX_CLONE_FS | LX_CLONE_FILES | LX_CLONE_SIGHAND \
+ | LX_CLONE_THREAD)
+#define CLONE_VFORK (LX_CLONE_VM | LX_CLONE_VFORK)
+#define CLONE_TD (LX_CLONE_THREAD|LX_CLONE_DETACH)
+
+#define IS_FORK(f) (((f) & SHARED_AS) == 0)
+#define IS_VFORK(f) (((f) & CLONE_VFORK) == CLONE_VFORK)
+
+/*
+ * This is dicey. This seems to be an internal glibc structure, and not
+ * part of any external interface. Thus, it is subject to change without
+ * notice. FWIW, clone(2) itself seems to be an internal (or at least
+ * unstable) interface, since strace(1) shows it differently than the man
+ * page.
+ */
+struct lx_desc
+{
+ uint32_t entry_number;
+ uint32_t base_addr;
+ uint32_t limit;
+ uint32_t seg_32bit:1;
+ uint32_t contents:2;
+ uint32_t read_exec_only:1;
+ uint32_t limit_in_pages:1;
+ uint32_t seg_not_present:1;
+ uint32_t useable:1;
+ uint32_t empty:25;
+};
+
+struct clone_state {
+ void *c_retaddr; /* instr after clone()'s int80 */
+ int c_flags; /* flags to clone(2) */
+ int c_sig; /* signal to send on thread exit */
+ void *c_stk; /* %esp of new thread */
+ void *c_ptidp;
+ struct lx_desc *c_ldtinfo; /* thread-specific segment */
+ void *c_ctidp;
+ ucontext_t c_uc; /* original register state/sigmask */
+ lx_affmask_t c_affmask; /* CPU affinity mask */
+ volatile int *c_clone_res; /* pid/error returned to cloner */
+ int c_ptrace_event; /* ptrace(2) event for child stop */
+ void *c_ntv_stk; /* native stack for this thread */
+ size_t c_ntv_stk_sz; /* native stack size */
+ lx_tsd_t *c_lx_tsd; /* tsd area for thread */
+};
+
+long
+lx_exit(uintptr_t p1)
+{
+ int status = (int)p1;
+ lx_tsd_t *lx_tsd = lx_get_tsd();
+
+ /*
+ * If we are a vfork(2)ed child, we need to exit as quickly and
+ * cleanly as possible to avoid corrupting our parent.
+ */
+ if (lx_tsd->lxtsd_is_vforked != 0) {
+ _exit(status);
+ }
+
+ lx_tsd->lxtsd_exit = LX_ET_EXIT;
+ lx_tsd->lxtsd_exit_status = status;
+
+ lx_ptrace_stop_if_option(LX_PTRACE_O_TRACEEXIT, B_FALSE,
+ (ulong_t)status, NULL);
+
+ /*
+ * This thread is exiting. Restore the state of the thread to
+ * what it was before we started running linux code.
+ */
+ (void) setcontext(&lx_tsd->lxtsd_exit_context);
+
+ /*
+ * If we returned from the setcontext(2), something is very wrong.
+ */
+ lx_err_fatal("exit: unable to set exit context: %s", strerror(errno));
+
+ /*NOTREACHED*/
+ return (0);
+}
+
+long
+lx_group_exit(uintptr_t p1)
+{
+ int status = (int)p1;
+ lx_tsd_t *lx_tsd = lx_get_tsd();
+
+ /*
+ * If we are a vfork(2)ed child, we need to exit as quickly and
+ * cleanly as possible to avoid corrupting our parent.
+ */
+ if (lx_tsd->lxtsd_is_vforked != 0) {
+ _exit(status);
+ }
+
+ lx_tsd->lxtsd_exit = LX_ET_EXIT_GROUP;
+ lx_tsd->lxtsd_exit_status = status;
+
+ /*
+ * This thread is exiting. Restore the state of the thread to
+ * what it was before we started running linux code.
+ */
+ (void) setcontext(&lx_tsd->lxtsd_exit_context);
+
+ /*
+ * If we returned from the setcontext(2), something is very wrong.
+ */
+ lx_err_fatal("group_exit: unable to set exit context: %s",
+ strerror(errno));
+
+ /*NOTREACHED*/
+ return (0);
+}
+
+static void *
+clone_start(void *arg)
+{
+ int rval;
+ struct clone_state *cs = (struct clone_state *)arg;
+ lx_tsd_t *lxtsd;
+
+ /*
+ * Let the kernel finish setting up all the needed state for this
+ * new thread.
+ *
+ * We already created the thread using the thr_create(3C) library
+ * call, so most of the work required to emulate lx_clone(2) has
+ * been done by the time we get to this point.
+ */
+ lx_debug("\tre-vectoring to lx kernel module to complete lx_clone()");
+ lx_debug("\tB_HELPER_CLONE(0x%x, 0x%p, 0x%p, 0x%p)",
+ cs->c_flags, cs->c_ptidp, cs->c_ldtinfo, cs->c_ctidp);
+
+ rval = syscall(SYS_brand, B_HELPER_CLONE, cs->c_flags, cs->c_ptidp,
+ cs->c_ldtinfo, cs->c_ctidp);
+
+ /*
+ * At this point the parent is waiting for cs->c_clone_res to go
+ * non-zero to indicate the thread has been cloned. The value set
+ * in cs->c_clone_res will be used for the return value from
+ * clone().
+ */
+ if (rval < 0) {
+ *(cs->c_clone_res) = -errno;
+ lx_debug("\tkernel clone failed, errno %d\n", errno);
+ free(cs->c_lx_tsd);
+ free(cs);
+ return (NULL);
+ }
+
+ if (lx_sched_setaffinity(0, sizeof (cs->c_affmask),
+ (uintptr_t)&cs->c_affmask) != 0) {
+ *(cs->c_clone_res) = -errno;
+
+ lx_err_fatal("Unable to set affinity mask in child thread: %s",
+ strerror(errno));
+ }
+
+ /*
+ * Initialize the thread specific data for this thread.
+ */
+ lxtsd = cs->c_lx_tsd;
+ lx_init_tsd(lxtsd);
+ lxtsd->lxtsd_clone_state = cs;
+
+ /*
+ * Install the emulation stack for this thread. Register the
+ * thread-specific data structure with the stack list so that it may be
+ * freed at thread exit or fork(2).
+ */
+ lx_install_stack(cs->c_ntv_stk, cs->c_ntv_stk_sz, lxtsd);
+
+ /*
+ * Let the parent know that the clone has (effectively) been
+ * completed.
+ */
+ *(cs->c_clone_res) = rval;
+
+ /*
+ * We want to load the general registers from this context, restore the
+ * original signal mask, and switch to the BRAND stack. The original
+ * signal mask was saved to the context by lx_clone().
+ */
+ cs->c_uc.uc_flags = UC_CPU | UC_SIGMASK;
+ cs->c_uc.uc_brand_data[0] = (void *)LX_UC_STACK_BRAND;
+
+ /*
+ * New threads will not link into the existing context chain.
+ */
+ cs->c_uc.uc_link = NULL;
+
+ /*
+ * Set stack pointer and entry point for new thread:
+ */
+ LX_REG(&cs->c_uc, REG_SP) = (uintptr_t)cs->c_stk;
+ LX_REG(&cs->c_uc, REG_PC) = (uintptr_t)cs->c_retaddr;
+
+ /*
+ * Return 0 to the child:
+ */
+ LX_REG(&cs->c_uc, REG_R0) = (uintptr_t)0;
+
+ /*
+ * Fire the ptrace(2) event stop in the new thread:
+ */
+ lx_ptrace_stop_if_option(cs->c_ptrace_event, B_TRUE, 0, &cs->c_uc);
+
+ /*
+ * Jump to the Linux process. This call cannot return.
+ */
+ lx_jump_to_linux(&cs->c_uc);
+}
+
+/*
+ * The way Linux handles stopping for FORK vs. CLONE does not map exactly to
+ * which syscall was used. Instead, it has to do with which signal is set in
+ * the low byte of the clone flag. The only time the CLONE event is emitted is
+ * if the clone signal (the low byte of the flags argument) is set to something
+ * other than SIGCHLD (see the Linux src in kernel/fork.c do_fork() for the
+ * actual code).
+ */
+static int
+ptrace_clone_event(int flags)
+{
+ if (flags & LX_CLONE_VFORK)
+ return (LX_PTRACE_O_TRACEVFORK);
+
+ if ((flags & LX_CSIGNAL) != LX_SIGCHLD)
+ return (LX_PTRACE_O_TRACECLONE);
+
+ return (LX_PTRACE_O_TRACEFORK);
+}
+
+/*
+ * See glibc sysdeps/unix/sysv/linux/x86_64/clone.S code for x64 argument order
+ * and the Linux kernel/fork.c code for the various ways arguments can be passed
+ * to the clone syscall (CONFIG_CLONE_BACKWARDS, et al).
+ */
+long
+lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, uintptr_t p5)
+{
+ struct clone_state *cs;
+ int flags = (int)p1;
+ void *cldstk = (void *)p2;
+ void *ptidp = (void *)p3;
+#if defined(_LP64)
+ void *ctidp = (void *)p4;
+ struct lx_desc *ldtinfo = (void *)p5;
+#else /* is 32bit */
+ struct lx_desc *ldtinfo = (void *)p4;
+ void *ctidp = (void *)p5;
+#endif
+ thread_t tid;
+ volatile int clone_res;
+ int sig;
+ int rval;
+ int pid;
+ ucontext_t *ucp;
+ sigset_t sigmask, osigmask;
+ int fork_flags = 0;
+ int ptrace_event;
+ int error = 0;
+ lx_tsd_t *lx_tsd = lx_get_tsd();
+
+ if (flags & LX_CLONE_SETTLS) {
+ lx_debug("lx_clone(flags=0x%x stk=0x%p ptidp=0x%p ldt=0x%p "
+ "ctidp=0x%p", flags, cldstk, ptidp, ldtinfo, ctidp);
+ } else {
+ lx_debug("lx_clone(flags=0x%x stk=0x%p ptidp=0x%p)",
+ flags, cldstk, ptidp);
+ }
+
+ /*
+ * Only supported for pid 0 on Linux after version 2.3.21, and
+ * apparently not at all since 2.5.16.
+ */
+ if (flags & LX_CLONE_PID)
+ return (-EINVAL);
+
+ /*
+ * CLONE_THREAD requires CLONE_SIGHAND.
+ *
+ * CLONE_THREAD and CLONE_DETACHED must both be either set or cleared
+ * in kernel 2.4 and prior.
+ * In kernel 2.6 (and later) CLONE_DETACHED was dropped completely, so
+ * we no longer have this requirement.
+ */
+
+ if (flags & CLONE_TD) {
+ if (!(flags & LX_CLONE_SIGHAND))
+ return (-EINVAL);
+ if (strncmp(lx_release, "2.4", 3) == 0 &&
+ (flags & CLONE_TD) != CLONE_TD)
+ return (-EINVAL);
+ }
+
+ ucp = lx_syscall_regs();
+
+ /* test if pointer passed by user are writable */
+ if (flags & LX_CLONE_PARENT_SETTID) {
+ if (uucopy(ptidp, &pid, sizeof (int)) != 0)
+ return (-EFAULT);
+ if (uucopy(&pid, ptidp, sizeof (int)) != 0)
+ return (-EFAULT);
+ }
+ if (flags & LX_CLONE_CHILD_SETTID) {
+ if (uucopy(ctidp, &pid, sizeof (int)) != 0)
+ return (-EFAULT);
+ if (uucopy(&pid, ctidp, sizeof (int)) != 0)
+ return (-EFAULT);
+ }
+
+ ptrace_event = ptrace_clone_event(flags);
+
+ /*
+ * Inform the in-kernel ptrace(2) subsystem that we are about to
+ * emulate a fork(2), vfork(2) or clone(2) system call.
+ */
+ lx_ptrace_clone_begin(ptrace_event, !!(flags & LX_CLONE_PTRACE));
+
+ /*
+ * Handle a fork(2) operation here. If this is not a fork, a new
+ * thread will be created after this block.
+ */
+ if (IS_FORK(flags) || IS_VFORK(flags)) {
+ if (flags & LX_CLONE_PARENT) {
+ lx_unsupported("clone(2) only supports CLONE_PARENT "
+ "for threads.\n");
+ return (-ENOTSUP);
+ }
+
+ if ((flags & LX_CSIGNAL) == 0)
+ fork_flags |= FORK_NOSIGCHLD;
+
+ /*
+ * Suspend signal delivery, run the stack management prefork
+ * handler and perform the actual fork(2) operation.
+ */
+ _sigoff();
+ lx_stack_prefork();
+ if (flags & LX_CLONE_VFORK) {
+ lx_sighandlers_t saved;
+
+ /*
+ * Because we keep our signal disposition at user-land
+ * (and in memory), we must prevent it from being
+ * clobbered should our vforked child change the
+ * disposition (e.g., via sigaction()) before releasing
+ * the address space. We preserve our disposition by
+ * taking a snapshot of it before the vfork and
+ * restoring it afterwards -- which we can get away
+ * with because we know that we aren't executing
+ * concurrently with our child.
+ */
+ lx_sighandlers_save(&saved);
+ lx_tsd->lxtsd_is_vforked++;
+ rval = vforkx(fork_flags);
+ if (rval != 0) {
+ lx_tsd->lxtsd_is_vforked--;
+ lx_sighandlers_restore(&saved);
+ }
+ } else {
+ rval = forkx(fork_flags);
+ }
+
+ /*
+ * The parent process returns through the regular system call
+ * path here.
+ */
+ if (rval != 0) {
+ /*
+ * Run the stack management postfork handler in the
+ * parent. In the CLONE_VFORK case, where it only
+ * needs to be performed once due to the shared address
+ * space, it is critical that this step is performed in
+ * the parent and not the child. The latter can result
+ * in un-woken threads blocked on lx_stack_list_lock.
+ */
+ lx_stack_postfork();
+
+ /*
+ * Since we've already forked, we can't do much if
+ * uucopy fails, so we just ignore failure. Failure is
+ * unlikely since we've tested the memory before we did
+ * the fork.
+ */
+ if (rval > 0 && (flags & LX_CLONE_PARENT_SETTID)) {
+ (void) uucopy(&rval, ptidp, sizeof (int));
+ }
+
+ if (rval > 0) {
+ lx_ptrace_stop_if_option(ptrace_event, B_FALSE,
+ (ulong_t)rval, NULL);
+ }
+
+ /*
+ * Re-enable signal delivery in the parent process.
+ */
+ _sigon();
+
+ return ((rval < 0) ? -errno : rval);
+ }
+
+ /*
+ * The rest of this block runs only within the new child
+ * process.
+ */
+
+ if (!IS_VFORK(flags)) {
+ /*
+ * For non-vfork children run the stack management
+ * postfork handler.
+ */
+ lx_stack_postfork();
+
+ /*
+ * We must free the stacks and thread-specific data
+ * objects for every thread except the one duplicated
+ * from the parent by forkx().
+ */
+ lx_free_other_stacks();
+ }
+
+ if (rval == 0 && (flags & LX_CLONE_CHILD_SETTID)) {
+ /*
+ * lx_getpid should not fail, and if it does, there's
+ * not much we can do about it since we've already
+ * forked, so on failure, we just don't copy the
+ * memory.
+ */
+ pid = syscall(SYS_brand, B_GETPID);
+ if (pid >= 0)
+ (void) uucopy(&pid, ctidp, sizeof (int));
+ }
+
+ /*
+ * Set up additional data in the lx_proc_data structure as
+ * necessary.
+ */
+ if ((rval = syscall(SYS_brand, B_HELPER_CLONE, flags, ptidp,
+ ldtinfo, ctidp)) < 0) {
+ return (rval);
+ }
+
+ if (IS_VFORK(flags)) {
+ ucontext_t vforkuc;
+
+ /*
+ * The vfork(2) interface is somewhat less than ideal.
+ * The unfortunate notion of borrowing the address
+ * space of the parent process requires us to jump
+ * through several hoops to prevent corrupting parent
+ * emulation state.
+ *
+ * When returning in the child, we make a copy of the
+ * system call return context and discard three pages
+ * of the native stack. Returning normally would
+ * clobber the native stack frame in which the brand
+ * library in the parent process is presently waiting.
+ *
+ * The calling program is expected to correctly use
+ * this dusty, underspecified relic. Neglecting to
+ * immediately call execve(2) or exit(2) is not
+ * cricket; this stack space will be permanently lost,
+ * not to mention myriad other undefined behaviour.
+ */
+ bcopy(ucp, &vforkuc, sizeof (vforkuc));
+ vforkuc.uc_brand_data[1] -= LX_NATIVE_STACK_VFORK_GAP;
+ vforkuc.uc_link = NULL;
+
+ lx_debug("\tvfork native stack sp %p",
+ vforkuc.uc_brand_data[1]);
+
+ /*
+ * If provided, the child needs its new stack set up.
+ */
+ if (cldstk != 0) {
+ lx_debug("\tvfork cldstk %p", cldstk);
+ LX_REG(&vforkuc, REG_SP) = (uintptr_t)cldstk;
+ }
+
+ /*
+ * Stop for ptrace if required.
+ */
+ lx_ptrace_stop_if_option(ptrace_event, B_TRUE, 0, NULL);
+
+ /*
+ * Return to the child via the specially constructed
+ * vfork(2) context.
+ */
+ LX_EMULATE_RETURN(&vforkuc, LX_SYS_clone, 0, 0);
+ (void) syscall(SYS_brand, B_EMULATION_DONE, &vforkuc,
+ LX_SYS_clone, 0, 0);
+
+ assert(0);
+ }
+
+ /*
+ * If provided, the child needs its new stack set up.
+ */
+ if (cldstk != 0) {
+ lx_debug("\tcldstk %p", cldstk);
+ LX_REG(ucp, REG_SP) = (uintptr_t)cldstk;
+ }
+
+ /*
+ * Stop for ptrace if required.
+ */
+ lx_ptrace_stop_if_option(ptrace_event, B_TRUE, 0, NULL);
+
+ /*
+ * Re-enable signal delivery in the child process.
+ */
+ _sigon();
+
+ /*
+ * The child process returns via the regular emulated system
+ * call path:
+ */
+ return (0);
+ }
+
+ /*
+ * We have very restricted support.... only exactly these flags are
+ * supported
+ */
+ if (((flags & SHARED_AS) != SHARED_AS)) {
+ lx_unsupported("clone(2) requires that all or none of "
+ "CLONE_VM/FS/FILES/THREAD/SIGHAND be set. (flags:0x%08X)\n",
+ flags);
+ return (-ENOTSUP);
+ }
+
+ if (cldstk == NULL) {
+ lx_unsupported("clone(2) requires the caller to allocate the "
+ "child's stack.\n");
+ return (-ENOTSUP);
+ }
+
+ /*
+ * If we want a signal-on-exit, ensure that the signal is valid.
+ */
+ if ((sig = ltos_signo[flags & LX_CSIGNAL]) == -1) {
+ lx_unsupported("clone(2) passed unsupported signal: %d", sig);
+ return (-ENOTSUP);
+ }
+
+ /*
+ * Initialise the state structure we pass as an argument to the new
+ * thread:
+ */
+ if ((cs = malloc(sizeof (*cs))) == NULL) {
+ lx_debug("could not allocate clone_state: %s", strerror(errno));
+ return (-ENOMEM);
+ }
+ cs->c_flags = flags;
+ cs->c_sig = sig;
+ cs->c_stk = cldstk;
+ cs->c_ptidp = ptidp;
+ cs->c_ldtinfo = ldtinfo;
+ cs->c_ctidp = ctidp;
+ cs->c_clone_res = &clone_res;
+ cs->c_ptrace_event = ptrace_event;
+ /*
+ * We want the new thread to return directly to the call site for
+ * the system call.
+ */
+ cs->c_retaddr = (void *)LX_REG(ucp, REG_PC);
+ /*
+ * Copy the saved context for the clone(2) system call so that the
+ * new thread may use it to initialise registers.
+ */
+ bcopy(ucp, &cs->c_uc, sizeof (cs->c_uc));
+ if ((cs->c_lx_tsd = malloc(sizeof (*cs->c_lx_tsd))) == NULL) {
+ free(cs);
+ return (-ENOMEM);
+ }
+
+ if (lx_sched_getaffinity(0, sizeof (cs->c_affmask),
+ (uintptr_t)&cs->c_affmask) == -1) {
+ lx_err_fatal("Unable to get affinity mask for parent "
+ "thread: %s", strerror(errno));
+ }
+
+ clone_res = 0;
+
+ /*
+ * Block all signals because the thread we create won't be able to
+ * properly handle them until it's fully set up.
+ */
+ VERIFY0(sigfillset(&sigmask));
+ if (sigprocmask(SIG_BLOCK, &sigmask, &osigmask) < 0) {
+ lx_debug("lx_clone sigprocmask() failed: %s", strerror(errno));
+ free(cs->c_lx_tsd);
+ free(cs);
+ return (-errno);
+ }
+ cs->c_uc.uc_sigmask = osigmask;
+
+ /*
+ * Allocate the native stack for this new thread now, so that we
+ * can return failure gracefully as ENOMEM.
+ */
+ if (lx_alloc_stack(&cs->c_ntv_stk, &cs->c_ntv_stk_sz) != 0) {
+ free(cs->c_lx_tsd);
+ free(cs);
+ return (-ENOMEM);
+ }
+
+ rval = thr_create(NULL, NULL, clone_start, cs, THR_DETACHED, &tid);
+
+ /*
+ * If the thread did not start, free the resources we allocated:
+ */
+ if (rval == -1) {
+ error = errno;
+ (void) munmap(cs->c_ntv_stk, cs->c_ntv_stk_sz);
+ free(cs->c_lx_tsd);
+ free(cs);
+ }
+
+ /*
+ * Release any pending signals
+ */
+ (void) sigprocmask(SIG_SETMASK, &osigmask, NULL);
+
+ /*
+ * Wait for the child to be created and have its tid assigned.
+ */
+ if (rval == 0) {
+ while (clone_res == 0)
+ ;
+
+ rval = clone_res;
+ lx_ptrace_stop_if_option(ptrace_event, B_FALSE, (ulong_t)rval,
+ NULL);
+
+ return (rval);
+ } else {
+ /*
+ * Return the error from thr_create(3C).
+ */
+ return (-error);
+ }
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/debug.c b/usr/src/lib/brand/lx/lx_brand/common/debug.c
new file mode 100644
index 0000000000..7e58a0f941
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/debug.c
@@ -0,0 +1,179 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <thread.h>
+#include <unistd.h>
+
+#include <sys/modctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <sys/lx_brand.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_misc.h>
+
+/* internal debugging state */
+static char *lx_debug_path = NULL; /* debug output file path */
+static char lx_debug_path_buf[MAXPATHLEN];
+
+int lx_dtrace_lazyload = 1; /* patchable; see below */
+
+void
+lx_debug_enable(void)
+{
+ /* send all debugging output to /dev/tty */
+ lx_debug_path = "/dev/tty";
+ lx_debug("lx_debug: debugging output enabled: %s", lx_debug_path);
+}
+
+void
+lx_debug_init(void)
+{
+ /*
+ * Our DTrace USDT provider is loaded in our .init section, which is
+ * not run by our e_entry ELF entry point (_start, which calls into
+ * lx_init()). We exploit this to only actually load our USDT provider
+ * if LX_DTRACE is set, assuring that we don't compromise fork()
+ * performance in the (common) case that DTrace of lx_brand.so.1 itself
+ * isn't enabled or desired. (As with all USDT providers, it can always
+ * be loaded by explicitly specifying the full provider name). Note
+ * that we also allow this behavior to be set via a manual override,
+ * lx_dtrace_lazyload -- allowing for USDT probes to be automatically
+ * provided in situations where setting an environment variable is
+ * tedious or otherwise impossible.
+ */
+ if (getenv("LX_DTRACE") != NULL || !lx_dtrace_lazyload) {
+ extern void _init(void);
+ _init();
+ }
+
+ if (getenv("LX_DEBUG") == NULL)
+ return;
+
+ /*
+ * It's OK to use this value without any locking, as all callers can
+ * use the return value to decide whether extra work should be done
+ * before calling lx_debug().
+ *
+ * If debugging is disabled after a routine calls this function it
+ * doesn't really matter as lx_debug() will see debugging is disabled
+ * and will not output anything.
+ */
+ lx_debug_enabled = 1;
+
+ /* check if there's a debug log file specified */
+ lx_debug_path = getenv("LX_DEBUG_FILE");
+ if (lx_debug_path == NULL) {
+ /* send all debugging output to /dev/tty */
+ lx_debug_path = "/dev/tty";
+ }
+
+ (void) strlcpy(lx_debug_path_buf, lx_debug_path,
+ sizeof (lx_debug_path_buf));
+ lx_debug_path = lx_debug_path_buf;
+
+ lx_debug("lx_debug: debugging output ENABLED to path: \"%s\"",
+ lx_debug_path);
+}
+
+void
+lx_debug(const char *msg, ...)
+{
+ va_list ap;
+ char *buf;
+ int rv, fd, n;
+ int errno_backup;
+ int size = LX_MSG_MAXLEN + 1;
+
+ if (lx_debug_enabled == 0 && !LX_DEBUG_ENABLED())
+ return;
+
+ /*
+ * If debugging is not enabled, we do not wish to have a large stack
+ * footprint. The buffer allocation is thus done conditionally,
+ * rather than as regular automatic storage.
+ */
+ if ((buf = SAFE_ALLOCA(size)) == NULL)
+ return;
+
+ errno_backup = errno;
+
+ /* prefix the message with pid/tid */
+ if ((n = snprintf(buf, size, "%u/%u: ", getpid(), thr_self())) == -1) {
+ errno = errno_backup;
+ return;
+ }
+
+ /* format the message */
+ va_start(ap, msg);
+ rv = vsnprintf(&buf[n], size - n, msg, ap);
+ va_end(ap);
+ if (rv == -1) {
+ errno = errno_backup;
+ return;
+ }
+
+ /* add a carrige return if there isn't one already */
+ if ((buf[strlen(buf) - 1] != '\n') &&
+ (strlcat(buf, "\n", size) >= size)) {
+ errno = errno_backup;
+ return;
+ }
+
+ LX_DEBUG(buf);
+
+ if (!lx_debug_enabled)
+ return;
+
+ /*
+ * Open the debugging output file. note that we don't protect
+ * ourselves against exec or fork1 here. if an mt process were
+ * to exec/fork1 while we're doing this they'd end up with an
+ * extra open desciptor in their fd space. a'well. shouldn't
+ * really matter.
+ */
+ if ((fd = open(lx_debug_path,
+ O_WRONLY|O_APPEND|O_CREAT|O_NDELAY|O_NOCTTY, 0666)) == -1) {
+ return;
+ }
+ (void) fchmod(fd, 0666);
+
+ /* we retry in case of EINTR */
+ do {
+ rv = write(fd, buf, strlen(buf));
+ } while ((rv == -1) && (errno == EINTR));
+ (void) fsync(fd);
+
+ (void) close(fd);
+ errno = errno_backup;
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/dir.c b/usr/src/lib/brand/lx/lx_brand/common/dir.c
new file mode 100644
index 0000000000..ed10dfb822
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/dir.c
@@ -0,0 +1,82 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <string.h>
+#include <stddef.h>
+#include <errno.h>
+#include <unistd.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/dirent.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_syscall.h>
+
+#define LX_NAMEMAX 256
+
+struct lx_old_dirent {
+ long d_ino; /* not l_ino_t */
+ long d_off;
+ ushort_t d_reclen;
+ char d_name[LX_NAMEMAX];
+};
+
+/*
+ * Read in one dirent structure from fd into dirp.
+ * p3 (count) is ignored.
+ */
+/*ARGSUSED*/
+long
+lx_readdir(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int fd = (int)p1;
+ struct lx_old_dirent *dirp = (struct lx_old_dirent *)p2;
+ uint_t count = sizeof (struct lx_old_dirent);
+ int rc = 0;
+ struct lx_old_dirent _ld;
+ struct dirent *sd = (struct dirent *)&_ld;
+
+ /*
+ * The return value from getdents is not applicable, as
+ * it might have squeezed more than one dirent in the buffer
+ * we provided.
+ *
+ * getdents() will deal with the case of dirp == NULL
+ */
+ if ((rc = getdents(fd, sd, count)) < 0)
+ return (-errno);
+
+ /*
+ * Set rc 1 (pass), or 0 (end of directory).
+ */
+ rc = (sd->d_reclen == 0) ? 0 : 1;
+
+ if (uucopy(sd, dirp, count) != 0)
+ return (-errno);
+
+ return (rc);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/fcntl.c b/usr/src/lib/brand/lx/lx_brand/common/fcntl.c
new file mode 100644
index 0000000000..7b6cd4d5f2
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/fcntl.c
@@ -0,0 +1,165 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/filio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stropts.h>
+#include <libintl.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/stat.h>
+
+#include <sys/lx_fcntl.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+
+long
+lx_dup2(uintptr_t p1, uintptr_t p2)
+{
+ int oldfd = (int)p1;
+ int newfd = (int)p2;
+ int rc;
+
+ rc = fcntl(oldfd, F_DUP2FD, newfd);
+ return ((rc == -1) ? -errno : rc);
+}
+
+long
+lx_dup3(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int oldfd = (int)p1;
+ int newfd = (int)p2;
+ int flags = (int)p3;
+ int rc;
+
+ /* The only valid flag is O_CLOEXEC. */
+ if (flags & ~LX_O_CLOEXEC)
+ return (-EINVAL);
+
+ if (oldfd == newfd)
+ return (-EINVAL);
+
+ rc = fcntl(oldfd, (flags == 0) ? F_DUP2FD : F_DUP2FD_CLOEXEC, newfd);
+ return ((rc == -1) ? -errno : rc);
+}
+
+/*
+ * flock() applies or removes an advisory lock on the file
+ * associated with the file descriptor fd.
+ *
+ * operation is: LX_LOCK_SH, LX_LOCK_EX, LX_LOCK_UN, LX_LOCK_NB
+ */
+long
+lx_flock(uintptr_t p1, uintptr_t p2)
+{
+ int fd = (int)p1;
+ int operation = (int)p2;
+ struct flock fl;
+ int cmd;
+ int ret;
+
+ if (operation & LX_LOCK_NB) {
+ cmd = F_FLOCK;
+ operation &= ~LX_LOCK_NB; /* turn off this bit */
+ } else {
+ cmd = F_FLOCKW;
+ }
+
+ switch (operation) {
+ case LX_LOCK_UN:
+ fl.l_type = F_UNLCK;
+ break;
+ case LX_LOCK_SH:
+ fl.l_type = F_RDLCK;
+ break;
+ case LX_LOCK_EX:
+ fl.l_type = F_WRLCK;
+ break;
+ default:
+ return (-EINVAL);
+ }
+
+ fl.l_whence = 0;
+ fl.l_start = 0;
+ fl.l_len = 0;
+ fl.l_sysid = 0;
+ fl.l_pid = 0;
+
+ ret = fcntl(fd, cmd, &fl);
+
+ return ((ret == -1) ? -errno : ret);
+}
+
+/*
+ * Based on Illumos posix_fadvise which does nothing. The only difference is
+ * that on Linux an fd refering to a pipe or FIFO returns EINVAL.
+ * The Linux POSIX_FADV_* values are the same as the Illumos values.
+ * See how glibc calls fadvise64; the offeset is a 64bit value, but the length
+ * is not, whereas fadvise64_64 passes both the offset and length as 64bit
+ * values.
+ */
+/* ARGSUSED */
+long
+lx_fadvise64(uintptr_t p1, off64_t p2, uintptr_t p3, uintptr_t p4)
+{
+ int fd = (int)p1;
+ int advice = (int)p4;
+ int32_t len = (int32_t)p3;
+ struct stat64 statb;
+
+ switch (advice) {
+ case POSIX_FADV_NORMAL:
+ case POSIX_FADV_RANDOM:
+ case POSIX_FADV_SEQUENTIAL:
+ case POSIX_FADV_WILLNEED:
+ case POSIX_FADV_DONTNEED:
+ case POSIX_FADV_NOREUSE:
+ break;
+ default:
+ return (-EINVAL);
+ }
+ if (len < 0)
+ return (-EINVAL);
+ if (fstat64(fd, &statb) != 0)
+ return (-EBADF);
+ if (S_ISFIFO(statb.st_mode))
+ return (-ESPIPE);
+ return (0);
+}
+
+long
+lx_fadvise64_64(uintptr_t p1, off64_t p2, off64_t p3, uintptr_t p4)
+{
+
+ if (p3 < 0)
+ return (-EINVAL);
+
+ return (lx_fadvise64(p1, p2, 0, p4));
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/file.c b/usr/src/lib/brand/lx/lx_brand/common/file.c
new file mode 100644
index 0000000000..6fc7a5e5dd
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/file.c
@@ -0,0 +1,582 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <sys/fstyp.h>
+#include <sys/fsid.h>
+
+#include <errno.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/vnode.h>
+#include <fcntl.h>
+#include <string.h>
+#include <utime.h>
+#include <atomic.h>
+#include <sys/syscall.h>
+
+#include <sys/lx_syscall.h>
+#include <sys/lx_types.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_fcntl.h>
+
+#define LX_UTIME_NOW ((1l << 30) - 1l)
+#define LX_UTIME_OMIT ((1l << 30) - 2l)
+
+static int
+install_checkpath(uintptr_t p1)
+{
+ int saved_errno = errno;
+ char path[MAXPATHLEN];
+
+ /*
+ * The "dev" RPM package wants to modify /dev/pts, but /dev/pts is a
+ * lofs mounted copy of /native/dev/pts, so that won't work.
+ *
+ * Instead, if we're trying to modify /dev/pts from install mode, just
+ * act as if it succeded.
+ */
+ if (uucopystr((void *)p1, path, MAXPATHLEN) == -1)
+ return (-errno);
+
+ if (strcmp(path, "/dev/pts") == 0)
+ return (0);
+
+ errno = saved_errno;
+ return (-errno);
+}
+
+/*
+ * Convert linux LX_AT_* flags to solaris AT_* flags but skip verifying allowed
+ * flags have been passed. This also allows EACCESS/REMOVEDIR to be translated
+ * correctly since on linux they have the same value.
+ *
+ * Some code can actually pass in other bits in the flag. We may have to simply
+ * ignore these, as indicated by the enforce parameter. See lx_fchmodat for
+ * another example of this type of behavior.
+ */
+static int
+ltos_at_flag(int lflag, int allow, boolean_t enforce)
+{
+ int sflag = 0;
+
+ if ((lflag & LX_AT_EACCESS) && (allow & AT_EACCESS)) {
+ lflag &= ~LX_AT_EACCESS;
+ sflag |= AT_EACCESS;
+ }
+
+ if ((lflag & LX_AT_REMOVEDIR) && (allow & AT_REMOVEDIR)) {
+ lflag &= ~LX_AT_REMOVEDIR;
+ sflag |= AT_REMOVEDIR;
+ }
+
+ if ((lflag & LX_AT_SYMLINK_NOFOLLOW) && (allow & AT_SYMLINK_NOFOLLOW)) {
+ lflag &= ~LX_AT_SYMLINK_NOFOLLOW;
+ sflag |= AT_SYMLINK_NOFOLLOW;
+ }
+
+ /* right now solaris doesn't have a _FOLLOW flag, so use a fake one */
+ if ((lflag & LX_AT_SYMLINK_FOLLOW) && (allow & LX_AT_SYMLINK_FOLLOW)) {
+ lflag &= ~LX_AT_SYMLINK_FOLLOW;
+ sflag |= LX_AT_SYMLINK_FOLLOW;
+ }
+
+ /* If lflag is not zero than some flags did not hit the above code. */
+ if (enforce && lflag)
+ return (-EINVAL);
+
+ return (sflag);
+}
+
+
+/*
+ * Miscellaneous file-related system calls.
+ */
+
+/*
+ * On Linux, an unlink of a directory returns EISDIR, not EPERM.
+ */
+long
+lx_unlink(uintptr_t p)
+{
+ char *pathname = (char *)p;
+ struct stat64 statbuf;
+
+ if ((lstat64(pathname, &statbuf) == 0) && S_ISDIR(statbuf.st_mode))
+ return (-EISDIR);
+
+ /*
+ * Some versions of the zone's /dev/log setup code (e.g. syslog-ng) get
+ * cranky if they can't cleanup /dev/log so we lie and tell them they
+ * succeeded.
+ */
+ if (pathname != NULL) {
+ char p[MAXPATHLEN];
+
+ if (uucopystr((void *)pathname, p, sizeof (p)) < 0)
+ return (-errno);
+ if (strcmp(p, "/dev/log") == 0)
+ return (0);
+ }
+
+ return (unlink(pathname) ? -errno : 0);
+}
+
+long
+lx_unlinkat(uintptr_t ext1, uintptr_t p1, uintptr_t p2)
+{
+ int atfd = (int)ext1;
+ char *pathname = (char *)p1;
+ int flag = (int)p2;
+ struct stat64 statbuf;
+
+ if (atfd == LX_AT_FDCWD)
+ atfd = AT_FDCWD;
+
+ flag = ltos_at_flag(flag, AT_REMOVEDIR, B_TRUE);
+ if (flag < 0)
+ return (-EINVAL);
+
+ if (!(flag & AT_REMOVEDIR)) {
+ /* Behave like unlink() */
+ if ((fstatat64(atfd, pathname, &statbuf, AT_SYMLINK_NOFOLLOW) ==
+ 0) && S_ISDIR(statbuf.st_mode))
+ return (-EISDIR);
+
+ if (pathname != NULL) {
+ char p[MAXPATHLEN];
+
+ if (uucopystr((void *)pathname, p, sizeof (p)) < 0)
+ return (-errno);
+ if (strcmp(p, "/dev/log") == 0)
+ return (0);
+ }
+ }
+
+ return (unlinkat(atfd, pathname, flag) ? -errno : 0);
+}
+
+/*
+ * fsync() and fdatasync() - On Illumos, these calls translate into a common
+ * fdsync() syscall with a different parameter. fsync is handled in the
+ * fsync wrapper.
+ */
+long
+lx_fsync(uintptr_t fd)
+{
+ int fildes = (int)fd;
+ struct stat64 statbuf;
+
+ if ((fstat64(fildes, &statbuf) == 0) &&
+ (S_ISCHR(statbuf.st_mode) || S_ISFIFO(statbuf.st_mode)))
+ return (-EINVAL);
+
+ return (fsync((int)fd) ? -errno : 0);
+}
+
+long
+lx_fdatasync(uintptr_t fd)
+{
+ int fildes = (int)fd;
+ struct stat64 statbuf;
+
+ if ((fstat64(fildes, &statbuf) == 0) &&
+ (S_ISCHR(statbuf.st_mode) || S_ISFIFO(statbuf.st_mode)))
+ return (-EINVAL);
+
+ return (fdatasync((int)fd) ? -errno : 0);
+}
+
+long
+lx_utime(uintptr_t p1, uintptr_t p2)
+{
+ int ret;
+
+ ret = utime((const char *)p1, (const struct utimbuf *)p2);
+
+ if (ret < 0) {
+ /*
+ * If utime() failed and we're in install mode, return success
+ * if the the reason we failed was because the source file
+ * didn't actually exist or if we're trying to modify /dev/pts.
+ */
+ if ((lx_install != 0) &&
+ ((errno == ENOENT) || (install_checkpath(p1) == 0)))
+ return (0);
+
+ return (-errno);
+ }
+
+ return (0);
+}
+
+#if defined(_ILP32)
+/*
+ * llseek() - The Linux implementation takes an additional parameter, which is
+ * the resulting position in the file.
+ */
+long
+lx_llseek(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
+ uintptr_t p5)
+{
+ offset_t ret;
+ offset_t *res = (offset_t *)p4;
+
+ /* SEEK_DATA and SEEK_HOLE are only valid in Solaris */
+ if ((int)p5 > SEEK_END)
+ return (-EINVAL);
+
+ if ((ret = llseek((int)p1, LX_32TO64(p3, p2), p5)) < 0)
+ return (-errno);
+
+ *res = ret;
+ return (0);
+}
+#endif
+
+/*
+ * seek() - For 32-bit lx, when the resultant file offset cannot be represented
+ * in 32 bits, Linux performs the seek but Illumos doesn't, though both set
+ * EOVERFLOW. We call llseek() and then check to see if we need to return
+ * EOVERFLOW.
+ */
+long
+lx_lseek(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ offset_t offset = (offset_t)(off_t)(p2); /* sign extend */
+ offset_t ret;
+#if defined(_ILP32)
+ off_t ret32;
+#endif
+
+ /* SEEK_DATA and SEEK_HOLE are only valid in Illumos */
+ if ((int)p3 > SEEK_END)
+ return (-EINVAL);
+
+ if ((ret = llseek((int)p1, offset, p3)) < 0)
+ return (-errno);
+
+#if defined(_LP64)
+ return (ret);
+#else
+ ret32 = (off_t)ret;
+ if ((offset_t)ret32 == ret)
+ return (ret32);
+ else
+ return (-EOVERFLOW);
+#endif
+}
+
+/*
+ * Neither Illumos nor Linux actually returns anything to the caller, but glibc
+ * expects to see SOME value returned, so placate it and return 0.
+ */
+long
+lx_sync(void)
+{
+ sync();
+ return (0);
+}
+
+long
+lx_rmdir(uintptr_t p1)
+{
+ int r;
+ char *nm = (char *)p1;
+
+ r = rmdir(nm);
+ if (r < 0) {
+ int terr = errno;
+
+ /*
+ * On both Illumos and Linux rmdir returns EINVAL if the last
+ * component of the path is '.', but on Illumos we also return
+ * this errno if we're trying to remove the CWD. Unfortunately,
+ * at least the LTP test suite assumes that it can rmdir the
+ * CWD, so we need handle this. We try to get out of the
+ * directory we're trying to remove.
+ */
+ if (terr == EINVAL) {
+ int l;
+
+ l = strlen(nm);
+ if (l >= 2 && !(nm[l - 2] == '/' && nm[l - 1] == '.')) {
+ if (chdir("..") == 0 && rmdir(nm) == 0) {
+ return (0);
+ }
+ }
+ }
+
+ return ((terr == EEXIST) ? -ENOTEMPTY : -terr);
+ }
+ return (0);
+}
+
+/*
+ * Exactly the same as Illumos' sysfs(2), except Linux numbers their fs indices
+ * starting at 0, and Illumos starts at 1.
+ */
+long
+lx_sysfs(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int option = (int)p1;
+ int res;
+
+ /*
+ * Linux actually doesn't have #defines for these; their sysfs(2)
+ * man page literally defines the "option" field as being 1, 2 or 3,
+ * corresponding to Solaris' GETFSIND, GETFSTYP and GETNFSTYP,
+ * respectively.
+ */
+ switch (option) {
+ case 1:
+ if ((res = sysfs(GETFSIND, (const char *)p2)) < 0)
+ return (-errno);
+
+ return (res - 1);
+
+ case 2:
+ if ((res = sysfs(GETFSTYP, (int)p2 + 1,
+ (char *)p3)) < 0)
+ return (-errno);
+
+ return (0);
+
+ case 3:
+ if ((res = sysfs(GETNFSTYP)) < 0)
+ return (-errno);
+
+ return (res);
+
+ default:
+ break;
+ }
+
+ return (-EINVAL);
+}
+
+long
+lx_futimesat(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int atfd = (int)p1;
+ char *path = (char *)p2;
+ struct timeval *times = (struct timeval *)p3;
+
+ if (atfd == LX_AT_FDCWD)
+ atfd = AT_FDCWD;
+
+ return (futimesat(atfd, path, times) ? -errno : 0);
+}
+
+/*
+ * From the utimensat man page:
+ * On Linux, futimens() is a library function implemented on top of the
+ * utimensat() system call. To support this, the Linux utimensat() system
+ * call implements a nonstandard feature: if pathname is NULL, then the
+ * call modifies the timestamps of the file referred to by the file
+ * descriptor dirfd (which may refer to any type of file). Using this
+ * feature, the call futimens(fd, times) is implemented as:
+ *
+ * utimensat(fd, NULL, times, 0);
+ *
+ * Some of the returns fail here. Linux allows the time to be modified if:
+ *
+ * the caller must have write access to the file
+ * or
+ * the caller's effective user ID must match the owner of the file
+ * or
+ * the caller must have appropriate privileges
+ *
+ * We behave differently. We fail with EPERM if:
+ *
+ * the calling process's euid has write access to the file but does not match
+ * the owner of the file and the calling process does not have the
+ * appropriate privileges
+ *
+ * This causes some of the LTP utimensat tests to fail because they expect an
+ * unprivileged process can update the time on a file it can write but does not
+ * own. There are also other LTP failures when the test uses attributes
+ * (e.g. chattr a+) and expects a failure, but we succeed.
+ */
+long
+lx_utimensat(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
+{
+ int fd = (int)p1;
+ const char *path = (const char *)p2;
+ const timespec_t *times = (const timespec_t *)p3;
+ timespec_t ts[2];
+ int flag = (int)p4;
+
+ if (times != NULL) {
+ if (uucopy((void *)p3, ts, sizeof (ts)) == -1)
+ return (-errno);
+
+ if (ts[0].tv_nsec == LX_UTIME_NOW)
+ ts[0].tv_nsec = UTIME_NOW;
+ if (ts[1].tv_nsec == LX_UTIME_NOW)
+ ts[1].tv_nsec = UTIME_NOW;
+
+ if (ts[0].tv_nsec == LX_UTIME_OMIT)
+ ts[0].tv_nsec = UTIME_OMIT;
+ if (ts[1].tv_nsec == LX_UTIME_OMIT)
+ ts[1].tv_nsec = UTIME_OMIT;
+
+ times = (const timespec_t *)ts;
+ }
+
+ if (flag == LX_AT_SYMLINK_NOFOLLOW)
+ flag = AT_SYMLINK_NOFOLLOW;
+
+ if (fd == LX_AT_FDCWD)
+ fd = AT_FDCWD;
+
+ if (path == NULL) {
+ return (futimens(fd, times) ? -errno : 0);
+ } else {
+ return (utimensat(fd, path, times, flag) ? -errno : 0);
+ }
+}
+
+/*
+ * Constructs an absolute path string in buf from the path of fd and the
+ * relative path string pointed to by "p1". This is required for emulating
+ * *at() system calls.
+ * Example:
+ * If the path of fd is "/foo/bar" and path is "etc" the string returned is
+ * "/foo/bar/etc", if the fd is a file fd then it fails with ENOTDIR.
+ * If path is absolute then no modifcations are made to it when copied.
+ */
+static int
+getpathat(int fd, uintptr_t p1, char *outbuf, size_t outbuf_size)
+{
+ char pathbuf[MAXPATHLEN];
+ char fdpathbuf[MAXPATHLEN];
+ char *fdpath;
+ struct stat64 statbuf;
+
+ if (uucopystr((void *)p1, pathbuf, MAXPATHLEN) == -1)
+ return (-errno);
+
+ /* If the path is absolute then we can early out */
+ if ((pathbuf[0] == '/') || (fd == LX_AT_FDCWD)) {
+ (void) strlcpy(outbuf, pathbuf, outbuf_size);
+ return (0);
+ }
+
+ fdpath = lx_fd_to_path(fd, fdpathbuf, sizeof (fdpathbuf));
+ if (fdpath == NULL)
+ return (-EBADF);
+
+ if ((fstat64(fd, &statbuf) < 0))
+ return (-EBADF);
+
+ if (!S_ISDIR(statbuf.st_mode))
+ return (-ENOTDIR);
+
+ if (snprintf(outbuf, outbuf_size, "%s/%s", fdpath, pathbuf) >
+ (outbuf_size-1))
+ return (-ENAMETOOLONG);
+
+ return (0);
+}
+
+long
+lx_mknodat(uintptr_t ext1, uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int atfd = (int)ext1;
+ char pathbuf[MAXPATHLEN];
+ int ret;
+
+ ret = getpathat(atfd, p1, pathbuf, sizeof (pathbuf));
+ if (ret < 0)
+ return (ret);
+
+ return (lx_mknod((uintptr_t)pathbuf, p2, p3));
+}
+
+long
+lx_symlinkat(uintptr_t p1, uintptr_t ext1, uintptr_t p2)
+{
+ int atfd = (int)ext1;
+ char pathbuf[MAXPATHLEN];
+ int ret;
+
+ ret = getpathat(atfd, p2, pathbuf, sizeof (pathbuf));
+ if (ret < 0) {
+ if (ret == -EBADF) {
+ /*
+ * Try to figure out correct Linux errno. We know path
+ * is relative. Check if we have a fd for a dir which
+ * has been removed.
+ */
+ if (atfd != -1 && lx_fd_to_path(atfd, pathbuf,
+ sizeof (pathbuf)) == NULL)
+ ret = -ENOENT;
+ }
+ return (ret);
+ }
+
+ return (symlink((char *)p1, pathbuf) ? -errno : 0);
+}
+
+long
+lx_readlink(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int ret;
+
+ if ((size_t)p3 <= 0)
+ return (-EINVAL);
+
+ ret = readlink((char *)p1, (char *)p2, (size_t)p3);
+ if (ret < 0)
+ return (-errno);
+
+ return (ret);
+}
+
+long
+lx_readlinkat(uintptr_t ext1, uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int atfd = (int)ext1;
+ char pathbuf[MAXPATHLEN];
+ int ret;
+
+ if ((size_t)p3 <= 0)
+ return (-EINVAL);
+
+ ret = getpathat(atfd, p1, pathbuf, sizeof (pathbuf));
+ if (ret < 0)
+ return (ret);
+
+ ret = readlink(pathbuf, (char *)p2, (size_t)p3);
+ if (ret < 0)
+ return (-errno);
+
+ return (ret);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/fork.c b/usr/src/lib/brand/lx/lx_brand/common/fork.c
new file mode 100644
index 0000000000..19dc1bd5ca
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/fork.c
@@ -0,0 +1,183 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc. All rights reserved.
+ */
+
+#include <errno.h>
+#include <unistd.h>
+#include <sys/fork.h>
+#include <sys/syscall.h>
+#include <sys/debug.h>
+#include <strings.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+
+/*
+ * fork() and vfork()
+ *
+ * These cannot be pass thru system calls because we need libc to do its own
+ * initialization or else bad things will happen (i.e. ending up with a bad
+ * schedctl page). On Linux, there is no such thing as forkall(), so we use
+ * fork1() here.
+ */
+
+long
+lx_fork(void)
+{
+ int ret;
+
+ /*
+ * Inform the in-kernel ptrace(2) subsystem that we are about to
+ * emulate fork(2).
+ */
+ lx_ptrace_clone_begin(LX_PTRACE_O_TRACEFORK, B_FALSE);
+
+ /*
+ * Suspend signal delivery, run the stack management prefork handler
+ * and perform the fork operation.
+ */
+ _sigoff();
+ lx_stack_prefork();
+ ret = fork1();
+ lx_stack_postfork();
+
+ switch (ret) {
+ case -1:
+ _sigon();
+ return (-errno);
+
+ case 0:
+ /*
+ * Returning in the new child. We must free the stacks and
+ * thread-specific data objects for the threads we did not
+ * duplicate; i.e. every other thread.
+ */
+ lx_free_other_stacks();
+
+ lx_ptrace_stop_if_option(LX_PTRACE_O_TRACEFORK, B_TRUE, 0,
+ NULL);
+
+ /*
+ * Re-enable signal delivery in the child and return to the
+ * new process.
+ */
+ _sigon();
+ return (0);
+
+ default:
+ lx_ptrace_stop_if_option(LX_PTRACE_O_TRACEFORK, B_FALSE,
+ (ulong_t)ret, NULL);
+
+ /*
+ * Re-enable signal delivery in the parent and return from
+ * the emulated system call.
+ */
+ _sigon();
+ return (ret);
+ }
+}
+
+long
+lx_vfork(void)
+{
+ int ret;
+ lx_sighandlers_t saved;
+ ucontext_t vforkuc;
+ ucontext_t *ucp;
+ lx_tsd_t *lx_tsd = lx_get_tsd();
+
+ ucp = lx_syscall_regs();
+
+ /*
+ * Inform the in-kernel ptrace(2) subsystem that we are about to
+ * emulate vfork(2).
+ */
+ lx_ptrace_clone_begin(LX_PTRACE_O_TRACEVFORK, B_FALSE);
+
+ /*
+ * Suspend signal delivery, run the stack management prefork handler
+ * and perform the vfork operation. We use the same approach as in
+ * lx_clone for signal handling and child return across vfork. See
+ * the comments in lx_clone for more detail.
+ */
+
+ _sigoff();
+ lx_stack_prefork();
+ lx_sighandlers_save(&saved);
+ lx_tsd->lxtsd_is_vforked++;
+ ret = vfork();
+ if (ret != 0) {
+ /* parent/error */
+ lx_tsd->lxtsd_is_vforked--;
+ lx_sighandlers_restore(&saved);
+ }
+
+ switch (ret) {
+ case -1:
+ lx_stack_postfork();
+ _sigon();
+ return (-errno);
+
+ case 0:
+ /*
+ * child
+ * Unlike the regular fork case where the child also calls
+ * lx_stack_postfork(), we only do that in the parent once it
+ * resumes execution. This is required there to wake any other
+ * threads in that process that are blocked on the lock taken
+ * in lx_stack_prefork().
+ */
+ bcopy(ucp, &vforkuc, sizeof (vforkuc));
+ vforkuc.uc_brand_data[1] -= LX_NATIVE_STACK_VFORK_GAP;
+ vforkuc.uc_link = NULL;
+
+ lx_debug("\tvfork native stack sp %p",
+ vforkuc.uc_brand_data[1]);
+
+ /* Stop for ptrace if required. */
+ lx_ptrace_stop_if_option(LX_PTRACE_O_TRACEVFORK, B_TRUE, 0,
+ NULL);
+
+ /*
+ * Return to the child via the specially constructed vfork(2)
+ * context.
+ */
+ LX_EMULATE_RETURN(&vforkuc, LX_SYS_vfork, 0, 0);
+ (void) syscall(SYS_brand, B_EMULATION_DONE, &vforkuc,
+ LX_SYS_vfork, 0, 0);
+
+ VERIFY(0);
+ return (0);
+
+ default:
+ /* parent - child should have exited or exec-ed by now */
+ lx_stack_postfork();
+ lx_ptrace_stop_if_option(LX_PTRACE_O_TRACEVFORK, B_FALSE,
+ (ulong_t)ret, NULL);
+ _sigon();
+ return (ret);
+ }
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/id.c b/usr/src/lib/brand/lx/lx_brand/common/id.c
new file mode 100644
index 0000000000..cd5baefa7d
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/id.c
@@ -0,0 +1,271 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <fcntl.h>
+#include <procfs.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/zone.h>
+#include <sys/lx_types.h>
+#include <sys/lx_syscall.h>
+#include <sys/cred_impl.h>
+#include <sys/policy.h>
+#include <sys/ucred.h>
+#include <sys/syscall.h>
+#include <alloca.h>
+#include <errno.h>
+#include <ucred.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/lx_misc.h>
+
+
+long
+lx_setuid16(uintptr_t uid)
+{
+ return ((setuid(LX_UID16_TO_UID32((lx_uid16_t)uid))) ? -errno : 0);
+}
+
+long
+lx_getuid16(void)
+{
+ return ((int)LX_UID32_TO_UID16(getuid()));
+}
+
+long
+lx_setgid16(uintptr_t gid)
+{
+ return ((setgid(LX_GID16_TO_GID32((lx_gid16_t)gid))) ? -errno : 0);
+}
+
+long
+lx_getgid16(void)
+{
+ return ((int)LX_GID32_TO_GID16(getgid()));
+}
+
+long
+lx_geteuid16(void)
+{
+ return ((int)LX_UID32_TO_UID16(geteuid()));
+}
+
+long
+lx_getegid16(void)
+{
+ return ((int)LX_GID32_TO_GID16(getegid()));
+}
+
+long
+lx_geteuid(void)
+{
+ return ((int)geteuid());
+}
+
+long
+lx_getegid(void)
+{
+ return ((int)getegid());
+}
+
+long
+lx_getresuid(uintptr_t ruid, uintptr_t euid, uintptr_t suid)
+{
+ lx_uid_t lx_ruid, lx_euid, lx_suid;
+ ucred_t *cr;
+ size_t sz;
+
+ /*
+ * We allocate a ucred_t ourselves rather than call ucred_get(3C)
+ * because ucred_get() calls malloc(3C), which the brand library cannot
+ * use. Because we allocate the space with SAFE_ALLOCA(), there's
+ * no need to free it when we're done.
+ */
+ sz = ucred_size();
+ cr = (ucred_t *)SAFE_ALLOCA(sz);
+ if (cr == NULL)
+ return (-ENOMEM);
+
+ if (syscall(SYS_ucredsys, UCREDSYS_UCREDGET, P_MYID, cr) != 0)
+ return (-errno);
+
+ if (((lx_ruid = (lx_uid_t)ucred_getruid(cr)) == (lx_uid_t)-1) ||
+ ((lx_euid = (lx_uid_t)ucred_geteuid(cr)) == (lx_uid_t)-1) ||
+ ((lx_suid = (lx_uid_t)ucred_getsuid(cr)) == (lx_uid_t)-1)) {
+ return (-errno);
+ }
+
+ if (uucopy(&lx_ruid, (void *)ruid, sizeof (lx_uid_t)) != 0)
+ return (-errno);
+
+ if (uucopy(&lx_euid, (void *)euid, sizeof (lx_uid_t)) != 0)
+ return (-errno);
+
+ return ((uucopy(&lx_suid, (void *)suid, sizeof (lx_uid_t)) != 0)
+ ? -errno : 0);
+}
+
+long
+lx_getresuid16(uintptr_t ruid16, uintptr_t euid16, uintptr_t suid16)
+{
+ lx_uid_t lx_ruid, lx_euid, lx_suid;
+ lx_uid16_t lx_ruid16, lx_euid16, lx_suid16;
+ int rv;
+
+ if ((rv = lx_getresuid((uintptr_t)&lx_ruid, (uintptr_t)&lx_euid,
+ (uintptr_t)&lx_suid)) != 0)
+ return (rv);
+
+ lx_ruid16 = LX_UID32_TO_UID16(lx_ruid);
+ lx_euid16 = LX_UID32_TO_UID16(lx_euid);
+ lx_suid16 = LX_UID32_TO_UID16(lx_suid);
+
+ if (uucopy(&lx_ruid16, (void *)ruid16, sizeof (lx_uid16_t)) != 0)
+ return (-errno);
+
+ if (uucopy(&lx_euid16, (void *)euid16, sizeof (lx_uid16_t)) != 0)
+ return (-errno);
+
+ return ((uucopy(&lx_suid16, (void *)suid16, sizeof (lx_uid16_t)) != 0)
+ ? -errno : 0);
+}
+
+long
+lx_getresgid(uintptr_t rgid, uintptr_t egid, uintptr_t sgid)
+{
+ ucred_t *cr;
+ lx_gid_t lx_rgid, lx_egid, lx_sgid;
+ size_t sz;
+
+ /*
+ * We allocate a ucred_t ourselves rather than call ucred_get(3C)
+ * because ucred_get() calls malloc(3C), which the brand library cannot
+ * use. Because we allocate the space with SAFE_ALLOCA(), there's
+ * no need to free it when we're done.
+ */
+ sz = ucred_size();
+ cr = (ucred_t *)SAFE_ALLOCA(sz);
+ if (cr == NULL)
+ return (-ENOMEM);
+
+ if (syscall(SYS_ucredsys, UCREDSYS_UCREDGET, P_MYID, cr) != 0)
+ return (-errno);
+
+ if (((lx_rgid = (lx_gid_t)ucred_getrgid(cr)) == (lx_gid_t)-1) ||
+ ((lx_egid = (lx_gid_t)ucred_getegid(cr)) == (lx_gid_t)-1) ||
+ ((lx_sgid = (lx_gid_t)ucred_getsgid(cr)) == (lx_gid_t)-1)) {
+ return (-errno);
+ }
+
+ if (uucopy(&lx_rgid, (void *)rgid, sizeof (lx_gid_t)) != 0)
+ return (-errno);
+
+ if (uucopy(&lx_egid, (void *)egid, sizeof (lx_gid_t)) != 0)
+ return (-errno);
+
+ return ((uucopy(&lx_sgid, (void *)sgid, sizeof (lx_gid_t)) != 0)
+ ? -errno : 0);
+}
+
+long
+lx_getresgid16(uintptr_t rgid16, uintptr_t egid16, uintptr_t sgid16)
+{
+ lx_gid_t lx_rgid, lx_egid, lx_sgid;
+ lx_gid16_t lx_rgid16, lx_egid16, lx_sgid16;
+ int rv;
+
+ if ((rv = lx_getresgid((uintptr_t)&lx_rgid, (uintptr_t)&lx_egid,
+ (uintptr_t)&lx_sgid)) != 0)
+ return (rv);
+
+ lx_rgid16 = LX_UID32_TO_UID16(lx_rgid);
+ lx_egid16 = LX_UID32_TO_UID16(lx_egid);
+ lx_sgid16 = LX_UID32_TO_UID16(lx_sgid);
+
+ if (uucopy(&lx_rgid16, (void *)rgid16, sizeof (lx_gid16_t)) != 0)
+ return (-errno);
+
+ if (uucopy(&lx_egid16, (void *)egid16, sizeof (lx_gid16_t)) != 0)
+ return (-errno);
+
+ return ((uucopy(&lx_sgid16, (void *)sgid16, sizeof (lx_gid16_t)) != 0)
+ ? -errno : 0);
+}
+
+long
+lx_setreuid16(uintptr_t ruid, uintptr_t euid)
+{
+ return ((setreuid(LX_UID16_TO_UID32((lx_uid16_t)ruid),
+ LX_UID16_TO_UID32((lx_uid16_t)euid))) ? -errno : 0);
+}
+
+long
+lx_setregid16(uintptr_t rgid, uintptr_t egid)
+{
+ return ((setregid(LX_UID16_TO_UID32((lx_gid16_t)rgid),
+ LX_UID16_TO_UID32((lx_gid16_t)egid))) ? -errno : 0);
+}
+
+/*
+ * The lx brand cannot support the setfs[ug]id16/setfs[ug]id calls as that
+ * would require significant rework of Solaris' privilege mechanisms, so
+ * instead return the current effective [ug]id.
+ *
+ * In Linux, fsids track effective IDs, so returning the effective IDs works
+ * as a substitute; returning the current value also denotes failure of the
+ * call if the caller had specified something different. We don't need to
+ * worry about setting error codes because the Linux calls don't set any.
+ */
+/*ARGSUSED*/
+long
+lx_setfsuid16(uintptr_t fsuid16)
+{
+ return (lx_geteuid16());
+}
+
+/*ARGSUSED*/
+long
+lx_setfsgid16(uintptr_t fsgid16)
+{
+ return (lx_getegid16());
+}
+
+/*ARGSUSED*/
+long
+lx_setfsuid(uintptr_t fsuid)
+{
+ return (geteuid());
+}
+
+/*ARGSUSED*/
+long
+lx_setfsgid(uintptr_t fsgid)
+{
+ return (getegid());
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c b/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c
new file mode 100644
index 0000000000..75fba80b07
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c
@@ -0,0 +1,1618 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <sys/utsname.h>
+#include <sys/inttypes.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sys/fstyp.h>
+#include <sys/fsid.h>
+#include <sys/systm.h>
+#include <sys/auxv.h>
+#include <sys/frame.h>
+#include <zone.h>
+#include <sys/brand.h>
+#include <sys/epoll.h>
+#include <sys/stack.h>
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <errno.h>
+#include <syslog.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <synch.h>
+#include <libelf.h>
+#include <libgen.h>
+#include <pthread.h>
+#include <utime.h>
+#include <dirent.h>
+#include <ucontext.h>
+#include <libintl.h>
+#include <locale.h>
+
+#include <sys/lx_misc.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_types.h>
+#include <sys/lx_statfs.h>
+#include <sys/lx_signal.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_thread.h>
+#include <sys/lx_aio.h>
+#include <lx_auxv.h>
+
+/*
+ * There is a block comment in "uts/common/brand/lx/os/lx_brand.c" that
+ * describes the functioning of the LX brand in some detail.
+ *
+ * *** Setting errno
+ *
+ * This emulation library is loaded onto a seperate link map from the
+ * application whose address space we're running in. The Linux libc errno is
+ * independent of our native libc errno. To pass back an error the emulation
+ * function should return -errno back to the Linux caller.
+ */
+
+char lx_release[LX_KERN_RELEASE_MAX];
+char lx_cmd_name[MAXNAMLEN];
+
+/*
+ * Map a linux locale ending string to the solaris equivalent.
+ */
+struct lx_locale_ending {
+ const char *linux_end; /* linux ending string */
+ const char *solaris_end; /* to transform with this string */
+ int le_size; /* linux ending string length */
+ int se_size; /* solaris ending string length */
+};
+
+#define l2s_locale(lname, sname) \
+ {(lname), (sname), sizeof ((lname)) - 1, sizeof ((sname)) - 1}
+
+#define MAXLOCALENAMELEN 30
+#if !defined(TEXT_DOMAIN) /* should be defined by cc -D */
+#define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
+#endif
+
+/*
+ * Most syscalls return an int but some return something else, typically a
+ * ssize_t. This can be either an int or a long, depending on if we're compiled
+ * for 32-bit or 64-bit. To correctly propagate the -errno return code in the
+ * 64-bit case, we declare all emulation wrappers will return a long. Thus,
+ * when we save the return value into the %eax or %rax register and return to
+ * Linux, we will have the right size value in both the 32 and 64 bit cases.
+ */
+
+typedef long (*lx_syscall_handler_t)();
+
+static lx_syscall_handler_t lx_handlers[LX_NSYSCALLS + 1];
+
+static uintptr_t stack_size;
+
+#if defined(_LP64)
+long lx_fsb;
+long lx_fs;
+#endif
+int lx_install = 0; /* install mode enabled if non-zero */
+int lx_verbose = 0; /* verbose mode enabled if non-zero */
+int lx_debug_enabled = 0; /* debugging output enabled if non-zero */
+
+pid_t zoneinit_pid; /* zone init PID */
+
+thread_key_t lx_tsd_key;
+
+int
+uucopy_unsafe(const void *src, void *dst, size_t n)
+{
+ bcopy(src, dst, n);
+ return (0);
+}
+
+int
+uucopystr_unsafe(const void *src, void *dst, size_t n)
+{
+ (void) strncpy((char *)src, dst, n);
+ return (0);
+}
+
+static void
+i_lx_msg(int fd, char *msg, va_list ap)
+{
+ int i;
+ char buf[LX_MSG_MAXLEN];
+
+ /* LINTED [possible expansion issues] */
+ i = vsnprintf(buf, sizeof (buf), msg, ap);
+ buf[LX_MSG_MAXLEN - 1] = '\0';
+ if (i == -1)
+ return;
+
+ /* if debugging is enabled, send this message to debug output */
+ if (LX_DEBUG_ISENABLED)
+ lx_debug(buf);
+
+ if (fd == 2) {
+ /*
+ * We let the user choose whether or not to see these
+ * messages on the console.
+ */
+ if (lx_verbose == 0)
+ return;
+ }
+
+ /* we retry in case of EINTR */
+ do {
+ i = write(fd, buf, strlen(buf));
+ } while ((i == -1) && (errno == EINTR));
+}
+
+/*PRINTFLIKE1*/
+void
+lx_err(char *msg, ...)
+{
+ va_list ap;
+
+ assert(msg != NULL);
+
+ va_start(ap, msg);
+ i_lx_msg(STDERR_FILENO, msg, ap);
+ va_end(ap);
+}
+
+/*
+ * This is just a non-zero exit value which also isn't one that would allow
+ * us to easily detect if a branded process exited because of a recursive
+ * fatal error.
+ */
+#define LX_ERR_FATAL 42
+
+/*
+ * Our own custom version of abort(), this routine will be used in place
+ * of the one located in libc. The primary difference is that this version
+ * will first reset the signal handler for SIGABRT to SIG_DFL, ensuring the
+ * SIGABRT sent causes us to dump core and is not caught by a user program.
+ */
+void
+abort(void)
+{
+ static int aborting = 0;
+
+ struct sigaction sa;
+ sigset_t sigmask;
+
+ /* watch out for recursive calls to this function */
+ if (aborting != 0)
+ exit(LX_ERR_FATAL);
+
+ aborting = 1;
+
+ /*
+ * Block all signals here to avoid taking any signals while exiting
+ * in an effort to avoid any strange user interaction with our death.
+ */
+ (void) sigfillset(&sigmask);
+ (void) sigprocmask(SIG_BLOCK, &sigmask, NULL);
+
+ /*
+ * Our own version of abort(3C) that we know will never call
+ * a user-installed SIGABRT handler first. We WANT to die.
+ *
+ * Do this by resetting the handler to SIG_DFL, and releasing any
+ * held SIGABRTs.
+ *
+ * If no SIGABRTs are pending, send ourselves one.
+ *
+ * The while loop is a bit of overkill, but abort(3C) does it to
+ * assure it never returns so we will as well.
+ */
+ (void) sigemptyset(&sa.sa_mask);
+ sa.sa_sigaction = SIG_DFL;
+ sa.sa_flags = 0;
+
+ for (;;) {
+ (void) sigaction(SIGABRT, &sa, NULL);
+ (void) sigrelse(SIGABRT);
+ (void) thr_kill(thr_self(), SIGABRT);
+ }
+
+ /*NOTREACHED*/
+}
+
+/*PRINTFLIKE1*/
+void
+lx_msg(char *msg, ...)
+{
+ va_list ap;
+
+ assert(msg != NULL);
+ va_start(ap, msg);
+ i_lx_msg(STDOUT_FILENO, msg, ap);
+ va_end(ap);
+}
+
+/*PRINTFLIKE1*/
+void
+lx_err_fatal(char *msg, ...)
+{
+ va_list ap;
+
+ assert(msg != NULL);
+
+ va_start(ap, msg);
+ i_lx_msg(STDERR_FILENO, msg, ap);
+ va_end(ap);
+ abort();
+}
+
+/*
+ * See if it is safe to alloca() sz bytes. Return 1 for yes, 0 for no.
+ * We can't be certain we won't blow the stack since we don't know where it
+ * starts, but since the stack is only two pages we know any allocation bigger
+ * than that will blow the stack. Fortunately most allocations are small (e.g.
+ * 128 bytes).
+ */
+int
+lx_check_alloca(size_t sz)
+{
+ uintptr_t sp = (uintptr_t)&sz;
+ uintptr_t end = sp - sz;
+
+ return ((end < sp) && (sz < stack_size));
+}
+
+/*PRINTFLIKE1*/
+void
+lx_unsupported(char *msg, ...)
+{
+ va_list ap;
+ char dmsg[256];
+ int lastc;
+
+ assert(msg != NULL);
+
+ /* make a brand call so we can easily dtrace unsupported actions */
+ va_start(ap, msg);
+ /* LINTED [possible expansion issues] */
+ (void) vsnprintf(dmsg, sizeof (dmsg), msg, ap);
+ dmsg[255] = '\0';
+ lastc = strlen(dmsg) - 1;
+ if (dmsg[lastc] == '\n')
+ dmsg[lastc] = '\0';
+ (void) syscall(SYS_brand, B_UNSUPPORTED, dmsg);
+ va_end(ap);
+
+ /* send the msg to the error stream */
+ va_start(ap, msg);
+ i_lx_msg(STDERR_FILENO, msg, ap);
+ va_end(ap);
+}
+
+int lx_init(int argc, char *argv[], char *envp[]);
+
+lx_tsd_t *
+lx_get_tsd(void)
+{
+ int ret;
+ lx_tsd_t *lx_tsd;
+
+ if ((ret = thr_getspecific(lx_tsd_key, (void **)&lx_tsd)) != 0) {
+ lx_err_fatal("lx_get_tsd: unable to read "
+ "thread-specific data: %s", strerror(ret));
+ }
+
+ assert(lx_tsd != 0);
+
+ return (lx_tsd);
+}
+
+/*
+ * This function is called from the kernel like a signal handler. Each
+ * function call is a request to provide emulation for a system call that, on
+ * illumos, is implemented in userland. The system call number selection and
+ * argument parsing have already been done by the kernel.
+ */
+void
+lx_emulate(ucontext_t *ucp, int syscall_num, uintptr_t *args)
+{
+ long emu_ret;
+ int emu_errno = 0;
+
+ LX_EMULATE_ENTER(ucp, syscall_num, args);
+ lx_debug("lx_emulate(%p, %d, [%p, %p, %p, %p, %p, %p])\n",
+ ucp, syscall_num, args[0], args[1], args[2], args[3], args[4],
+ args[5]);
+
+ /*
+ * The kernel should have saved us a context that will not restore the
+ * previous signal mask. Some emulated system calls alter the signal
+ * mask; restoring it after the emulation would cancel that out.
+ */
+ assert(!(ucp->uc_flags & UC_SIGMASK));
+
+ /*
+ * The kernel ensures that the syscall_num is sane; Use it as is.
+ */
+ assert(syscall_num >= 0);
+ assert(syscall_num < (sizeof (lx_handlers) / sizeof (lx_handlers[0])));
+ if (lx_handlers[syscall_num] == NULL) {
+ lx_err_fatal("lx_emulate: kernel sent us a call we cannot "
+ "emulate (%d)", syscall_num);
+ }
+
+ /*
+ * Call our handler function:
+ */
+ emu_ret = lx_handlers[syscall_num](args[0], args[1], args[2], args[3],
+ args[4], args[5]);
+
+ /*
+ * If the return value is between -1 and -4095 then it's an errno.
+ * The kernel will translate it to the Linux equivalent for us.
+ */
+ if (emu_ret < 0 && emu_ret > -4096) {
+ emu_errno = (int)-emu_ret;
+ }
+
+ /*
+ * Return to the context we were passed
+ */
+ LX_EMULATE_RETURN(ucp, syscall_num, emu_ret, emu_errno);
+ lx_debug("\tlx_emulate(%d) done (ret %ld / 0x%p ; errno %d)",
+ syscall_num, emu_ret, emu_ret, emu_errno);
+ (void) syscall(SYS_brand, B_EMULATION_DONE, ucp, syscall_num, emu_ret,
+ emu_errno);
+
+ assert(!"cannot be returned here");
+}
+
+static void
+lx_close_fh(FILE *file)
+{
+ int fd, fd_new;
+
+ if (file == NULL)
+ return;
+
+ if ((fd = fileno(file)) < 0)
+ return;
+
+ fd_new = dup(fd);
+ if (fd_new == -1)
+ return;
+
+ (void) fclose(file);
+ (void) dup2(fd_new, fd);
+ (void) close(fd_new);
+}
+
+
+extern int set_l10n_alternate_root(char *path);
+
+/*
+ * Initialize the thread specific data for this thread.
+ */
+void
+lx_init_tsd(lx_tsd_t *lxtsd)
+{
+ int err;
+
+ bzero(lxtsd, sizeof (*lxtsd));
+ lxtsd->lxtsd_exit = LX_ET_NONE;
+
+ /*
+ * The Linux alternate signal stack is initially disabled:
+ */
+ lxtsd->lxtsd_sigaltstack.ss_flags = LX_SS_DISABLE;
+
+ /*
+ * Create a per-thread exit context from the current register and
+ * native/brand stack state. Replace the saved program counter value
+ * with the address of lx_exit_common(); we wish to revector there when
+ * the thread or process is exiting.
+ */
+ if (getcontext(&lxtsd->lxtsd_exit_context) != 0) {
+ lx_err_fatal("Unable to initialize thread-specific exit "
+ "context: %s", strerror(errno));
+ }
+ LX_REG(&lxtsd->lxtsd_exit_context, REG_PC) = (uintptr_t)lx_exit_common;
+
+ /*
+ * Align the stack pointer and clear the frame pointer.
+ */
+ LX_REG(&lxtsd->lxtsd_exit_context, REG_FP) = 0;
+ LX_REG(&lxtsd->lxtsd_exit_context, REG_SP) &= ~(STACK_ALIGN - 1UL);
+#if defined(_LP64)
+#if (STACK_ENTRY_ALIGN != 8) && (STACK_ALIGN != 16)
+#error "lx_init_tsd: unexpected STACK_[ENTRY_]ALIGN values"
+#endif
+ /*
+ * The AMD64 ABI requires that, on entry to a function, the stack
+ * pointer must be 8-byte aligned, but _not_ 16-byte aligned. When
+ * the frame pointer is pushed, the alignment will then be correct.
+ */
+ LX_REG(&lxtsd->lxtsd_exit_context, REG_SP) -= STACK_ENTRY_ALIGN;
+#endif
+
+ /*
+ * Block all signals in the exit context to avoid taking any signals
+ * (to the degree possible) while exiting.
+ */
+ (void) sigfillset(&lxtsd->lxtsd_exit_context.uc_sigmask);
+
+ if ((err = thr_setspecific(lx_tsd_key, lxtsd)) != 0) {
+ lx_err_fatal("Unable to initialize thread-specific data: %s",
+ strerror(err));
+ }
+}
+
+void
+lx_jump_to_linux(ucontext_t *ucp)
+{
+ extern void setcontext_sigmask(ucontext_t *);
+
+ /*
+ * Call into this private libc interface to allow us to use only the
+ * signal mask handling part of a regular setcontext() operation.
+ */
+ setcontext_sigmask(ucp);
+
+ if (syscall(SYS_brand, B_JUMP_TO_LINUX, ucp) != 0) {
+ lx_err_fatal("B_JUMP_TO_LINUX failed: %s", strerror(errno));
+ }
+
+ /*
+ * This system call should not return.
+ */
+ abort();
+}
+
+static void
+lx_start(uintptr_t sp, uintptr_t entry)
+{
+ ucontext_t jump_uc;
+
+ if (getcontext(&jump_uc) != 0) {
+ lx_err_fatal("Unable to getcontext for program start: %s",
+ strerror(errno));
+ }
+
+ /*
+ * We want to load the general registers from this
+ * context, and switch to the BRAND stack.
+ */
+ jump_uc.uc_flags = UC_CPU;
+ jump_uc.uc_brand_data[0] = (void *)LX_UC_STACK_BRAND;
+
+ LX_REG(&jump_uc, REG_FP) = NULL;
+ LX_REG(&jump_uc, REG_SP) = sp;
+ LX_REG(&jump_uc, REG_PC) = entry;
+
+ /*
+ * The AMD64 ABI states that at process entry, %rdx contains "a
+ * function pointer that the application should register with
+ * atexit()". This behavior has been observed in statically linked
+ * i386 programs as well. As a precaution, all of the registers are
+ * zeroed prior to initial execution.
+ */
+#if defined(_LP64)
+ LX_REG(&jump_uc, REG_RAX) = NULL;
+ LX_REG(&jump_uc, REG_RCX) = NULL;
+ LX_REG(&jump_uc, REG_RDX) = NULL;
+ LX_REG(&jump_uc, REG_RBX) = NULL;
+ LX_REG(&jump_uc, REG_RBP) = NULL;
+ LX_REG(&jump_uc, REG_RSI) = NULL;
+ LX_REG(&jump_uc, REG_RDI) = NULL;
+ LX_REG(&jump_uc, REG_R8) = NULL;
+ LX_REG(&jump_uc, REG_R9) = NULL;
+ LX_REG(&jump_uc, REG_R10) = NULL;
+ LX_REG(&jump_uc, REG_R11) = NULL;
+ LX_REG(&jump_uc, REG_R12) = NULL;
+ LX_REG(&jump_uc, REG_R13) = NULL;
+ LX_REG(&jump_uc, REG_R14) = NULL;
+ LX_REG(&jump_uc, REG_R15) = NULL;
+#else
+ LX_REG(&jump_uc, EAX) = NULL;
+ LX_REG(&jump_uc, ECX) = NULL;
+ LX_REG(&jump_uc, EDX) = NULL;
+ LX_REG(&jump_uc, EBX) = NULL;
+ LX_REG(&jump_uc, EBP) = NULL;
+ LX_REG(&jump_uc, ESI) = NULL;
+ LX_REG(&jump_uc, EDI) = NULL;
+#endif /* defined(_LP64) */
+
+ lx_debug("starting Linux program sp %p ldentry %p", sp, entry);
+ lx_jump_to_linux(&jump_uc);
+}
+
+/*ARGSUSED*/
+int
+lx_init(int argc, char *argv[], char *envp[])
+{
+ char *rele, *vers;
+ auxv_t *ap, *oap;
+ long *p;
+ int err;
+ lx_elf_data_t edp;
+ lx_brand_registration_t reg;
+ lx_tsd_t *lxtsd;
+
+ bzero(&reg, sizeof (reg));
+ stack_size = 2 * sysconf(_SC_PAGESIZE);
+
+ /*
+ * We need to shutdown all libc stdio. libc stdio normally goes to
+ * file descriptors, but since we're actually part of a linux
+ * process we don't own these file descriptors and we can't make
+ * any assumptions about their state.
+ */
+ lx_close_fh(stdin);
+ lx_close_fh(stdout);
+ lx_close_fh(stderr);
+
+ lx_debug_init();
+
+ rele = getenv("LX_RELEASE");
+ vers = getenv("LX_VERSION");
+ if (rele == NULL) {
+ if (zone_getattr(getzoneid(), LX_ATTR_KERN_RELEASE,
+ lx_release, sizeof (lx_release)) <= 0)
+ (void) strlcpy(lx_release, "2.4.21",
+ LX_KERN_RELEASE_MAX);
+ } else {
+ (void) strlcpy(lx_release, rele, LX_KERN_RELEASE_MAX);
+ }
+
+ if (syscall(SYS_brand, B_OVERRIDE_KERN_VER, rele, vers) != 0) {
+ lx_debug("failed to override kernel release/version");
+ }
+ lx_debug("lx_release: %s\n", lx_release);
+
+
+ /*
+ * Should we kill an application that attempts an unimplemented
+ * system call?
+ */
+ if (getenv("LX_STRICT") != NULL) {
+ reg.lxbr_flags |= LX_PROC_STRICT_MODE;
+ lx_debug("STRICT mode enabled.\n");
+ }
+
+ /*
+ * Are we in install mode?
+ */
+ if (getenv("LX_INSTALL") != NULL) {
+ reg.lxbr_flags |= LX_PROC_INSTALL_MODE;
+ lx_install = 1;
+ lx_debug("INSTALL mode enabled.\n");
+ }
+
+ /*
+ * Should we attempt to send messages to the screen?
+ */
+ if (getenv("LX_VERBOSE") != NULL) {
+ lx_verbose = 1;
+ lx_debug("VERBOSE mode enabled.\n");
+ }
+
+ (void) strlcpy(lx_cmd_name, basename(argv[0]), sizeof (lx_cmd_name));
+ lx_debug("executing linux process: %s", argv[0]);
+ lx_debug("branding myself and setting handler to 0x%p",
+ (void *)lx_emulate);
+
+ reg.lxbr_version = LX_VERSION;
+ reg.lxbr_handler = (void *)&lx_emulate;
+
+ /*
+ * Register the address of the user-space handler with the lx brand
+ * module. As a side-effect this leaves the thread in native syscall
+ * mode so that it's ok to continue to make syscalls during setup. We
+ * need to switch to Linux mode at the end of initialization.
+ */
+ if (syscall(SYS_brand, B_REGISTER, &reg))
+ lx_err_fatal("failed to brand the process");
+
+ /* Look up the PID that serves as init for this zone */
+ if ((err = lx_lpid_to_spid(1, &zoneinit_pid)) < 0)
+ lx_err_fatal("Unable to find PID for zone init process: %s",
+ strerror(err));
+
+ /*
+ * Upload data about the lx executable from the kernel.
+ */
+ if (syscall(SYS_brand, B_ELFDATA, (void *)&edp))
+ lx_err_fatal("failed to get required ELF data from the kernel");
+
+ if (lx_statfs_init() != 0)
+ lx_err_fatal("failed to setup the statfs translator");
+
+ /*
+ * Find the aux vector on the stack.
+ */
+ p = (long *)envp;
+ while (*p != NULL)
+ p++;
+ /*
+ * p is now pointing at the 0 word after the environ pointers. After
+ * that is the aux vectors.
+ */
+ p++;
+ for (ap = (auxv_t *)p, oap = ap; ap->a_type != AT_NULL; ap++) {
+ if (lx_auxv_stol(ap, oap, &edp) == 0) {
+ /*
+ * Copy only auxv entries which Linux programs will
+ * understand. Other entries will be skipped.
+ */
+ oap++;
+ }
+ }
+ /* NULL out skipped entries */
+ while (oap < ap) {
+ oap->a_type = AT_NULL;
+ oap->a_un.a_val = 0;
+ oap++;
+ }
+
+ /* Setup signal handler information. */
+ if (lx_siginit()) {
+ lx_err_fatal("failed to initialize lx signals for the "
+ "branded process");
+ }
+
+ /* Setup thread-specific data area for managing linux threads. */
+ if ((err = thr_keycreate(&lx_tsd_key, NULL)) != 0) {
+ lx_err_fatal("thr_keycreate(lx_tsd_key) failed: %s",
+ strerror(err));
+ }
+
+ lx_debug("thr_keycreate created lx_tsd_key (%d)", lx_tsd_key);
+
+ /*
+ * Initialize the thread specific data for this thread.
+ */
+ if ((lxtsd = malloc(sizeof (*lxtsd))) == NULL) {
+ lx_err_fatal("failed to allocate tsd for main thread: %s",
+ strerror(errno));
+ }
+ lx_debug("lx tsd allocated @ %p", lxtsd);
+ lx_init_tsd(lxtsd);
+
+ /*
+ * Allocate the brand emulation stack for the main process thread.
+ * Register the thread-specific data structure with the stack list so
+ * that it may be freed at thread exit or fork(2).
+ */
+ lx_install_stack(NULL, 0, lxtsd);
+
+ /*
+ * The brand linker expects the stack pointer to point to
+ * "argc", which is just before &argv[0].
+ */
+ lx_start((uintptr_t)argv - sizeof (void *), edp.ed_ldentry);
+
+ /*NOTREACHED*/
+ abort();
+ return (0);
+}
+
+/*
+ * We "return" to this function via a context hand-crafted by
+ * "lx_init_tsd()"; see that function for more detail.
+ *
+ * NOTE: Our call frame is on the main thread stack, not the alternate native
+ * stack -- it is safe to release the latter here. The frame does not have a
+ * valid return address, so this function MUST NOT return.
+ */
+void
+lx_exit_common(void)
+{
+ lx_tsd_t *lxtsd = lx_get_tsd();
+ int ev = (0xff & lxtsd->lxtsd_exit_status);
+
+ switch (lxtsd->lxtsd_exit) {
+ case LX_ET_EXIT:
+ lx_debug("lx_exit_common(LX_ET_EXIT, %d, %d)\n", thr_self(),
+ ev);
+
+ if (thr_self() == 1) {
+ /*
+ * Modern versions of glibc will call the exit_group
+ * syscall when exit(3) is called, but if the primary
+ * thread explicitly invokes the exit syscall we now
+ * need to exit with the proper value.
+ */
+ exit(ev);
+ } else {
+ /*
+ * If the thread is exiting, but not the entire process,
+ * we must free the stack we allocated for usermode
+ * emulation. This is safe to do here because the
+ * setcontext() put us back on the BRAND stack for this
+ * process. This function also frees the
+ * thread-specific data object for this thread.
+ */
+ lx_free_stack();
+
+ /*
+ * The native thread return value is never seen so we
+ * pass NULL.
+ */
+ thr_exit(NULL);
+ }
+ break;
+
+ case LX_ET_EXIT_GROUP:
+ lx_debug("lx_exit_common(LX_ET_EXIT_GROUP, %d)\n", ev);
+ exit(ev);
+ break;
+
+ default:
+ abort();
+ }
+
+ abort();
+}
+
+const ucontext_t *
+lx_find_brand_uc(void)
+{
+ ucontext_t *ucp = NULL;
+
+ /*
+ * Ask for the current emulation (or signal handling) ucontext_t...
+ */
+ assert(syscall(SYS_brand, B_GET_CURRENT_CONTEXT, &ucp) == 0);
+
+ for (;;) {
+ uintptr_t flags;
+
+ lx_debug("lx_find_brand_uc: inspect ucp %p...\n", ucp);
+ assert(ucp != NULL);
+
+ flags = (uintptr_t)ucp->uc_brand_data[0];
+
+ if (flags & LX_UC_STACK_BRAND) {
+ lx_debug("lx_find_brand_uc: ucp %p\n", ucp);
+
+ return (ucp);
+ }
+
+ lx_debug("lx_find_brand_uc: skip non-BRAND ucp %p\n", ucp);
+
+ /*
+ * Walk up the context chain to find the most recently stored
+ * brand register state.
+ */
+ ucp = ucp->uc_link;
+ }
+}
+
+uintptr_t
+lx_find_brand_sp(void)
+{
+ const ucontext_t *ucp = lx_find_brand_uc();
+ uintptr_t sp = LX_REG(ucp, REG_SP);
+
+ lx_debug("lx_find_brand_sp: ucp %p sp %p\n", ucp, sp);
+
+ return (sp);
+}
+
+ucontext_t *
+lx_syscall_regs(void)
+{
+ ucontext_t *ucp = NULL;
+ uintptr_t flags;
+
+ /*
+ * Ask for the current emulation (or signal handling) ucontext_t...
+ */
+ assert(syscall(SYS_brand, B_GET_CURRENT_CONTEXT, &ucp) == 0);
+ assert(ucp != NULL);
+
+ /*
+ * Use of the lx_syscall_regs() function implies that the topmost (i.e.
+ * current) context is for a system call emulation request from the
+ * kernel, rather than a signal handling frame.
+ */
+ flags = (uintptr_t)ucp->uc_brand_data[0];
+ assert(flags & LX_UC_FRAME_IS_SYSCALL);
+
+ lx_debug("lx_syscall_regs: ucp %p\n", ucp);
+
+ return (ucp);
+}
+
+int
+lx_lpid_to_spair(pid_t lpid, pid_t *spid, lwpid_t *slwp)
+{
+ pid_t pid;
+ lwpid_t tid;
+
+ if (lpid == 0) {
+ pid = getpid();
+ tid = thr_self();
+ } else {
+ if (syscall(SYS_brand, B_LPID_TO_SPAIR, lpid, &pid, &tid) < 0)
+ return (-errno);
+
+ /*
+ * If the returned pid is -1, that indicates we tried to
+ * look up the PID for init, but that process no longer
+ * exists.
+ */
+ if (pid == -1)
+ return (-ESRCH);
+ }
+
+ if (uucopy(&pid, spid, sizeof (pid_t)) != 0)
+ return (-errno);
+
+ if (uucopy(&tid, slwp, sizeof (lwpid_t)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+int
+lx_lpid_to_spid(pid_t lpid, pid_t *spid)
+{
+ lwpid_t slwp;
+
+ return (lx_lpid_to_spair(lpid, spid, &slwp));
+}
+
+char *
+lx_fd_to_path(int fd, char *buf, int buf_size)
+{
+ char path_proc[MAXPATHLEN];
+ pid_t pid;
+ int n;
+
+ assert((buf != NULL) && (buf_size >= 0));
+
+ if (fd < 0)
+ return (NULL);
+
+ if ((pid = getpid()) == -1)
+ return (NULL);
+
+ (void) snprintf(path_proc, MAXPATHLEN,
+ "/native/proc/%d/path/%d", pid, fd);
+
+ if ((n = readlink(path_proc, buf, buf_size - 1)) == -1)
+ return (NULL);
+ buf[n] = '\0';
+
+ return (buf);
+}
+
+#if defined(_LP64)
+/* The following is the 64-bit syscall table */
+
+static lx_syscall_handler_t lx_handlers[] = {
+ NULL, /* 0: read */
+ NULL, /* 1: write */
+ NULL, /* 2: open */
+ lx_close, /* 3: close */
+ NULL, /* 4: stat */
+ NULL, /* 5: fstat */
+ NULL, /* 6: lstat */
+ NULL, /* 7: poll */
+ lx_lseek, /* 8: lseek */
+ lx_mmap, /* 9: mmap */
+ lx_mprotect, /* 10: mprotect */
+ lx_munmap, /* 11: munmap */
+ NULL, /* 12: brk */
+ lx_rt_sigaction, /* 13: rt_sigaction */
+ lx_rt_sigprocmask, /* 14: rt_sigprocmask */
+ lx_rt_sigreturn, /* 15: rt_sigreturn */
+ NULL, /* 16: ioctl */
+ NULL, /* 17: pread64 */
+ NULL, /* 18: pwrite64 */
+ NULL, /* 19: readv */
+ NULL, /* 20: writev */
+ NULL, /* 21: access */
+ NULL, /* 22: pipe */
+ NULL, /* 23: select */
+ NULL, /* 24: sched_yield */
+ lx_remap, /* 25: mremap */
+ lx_msync, /* 26: msync */
+ lx_mincore, /* 27: mincore */
+ lx_madvise, /* 28: madvise */
+ lx_shmget, /* 29: shmget */
+ lx_shmat, /* 30: shmat */
+ lx_shmctl, /* 31: shmctl */
+ lx_dup, /* 32: dup */
+ lx_dup2, /* 33: dup2 */
+ lx_pause, /* 34: pause */
+ NULL, /* 35: nanosleep */
+ lx_getitimer, /* 36: getitimer */
+ lx_alarm, /* 37: alarm */
+ lx_setitimer, /* 38: setitimer */
+ NULL, /* 39: getpid */
+ lx_sendfile64, /* 40: sendfile */
+ NULL, /* 41: socket */
+ NULL, /* 42: connect */
+ NULL, /* 43: accept */
+ NULL, /* 44: sendto */
+ NULL, /* 45: recvfrom */
+ NULL, /* 46: sendmsg */
+ NULL, /* 47: recvmsg */
+ lx_shutdown, /* 48: shutdown */
+ NULL, /* 49: bind */
+ lx_listen, /* 50: listen */
+ NULL, /* 51: getsockname */
+ NULL, /* 52: getpeername */
+ lx_socketpair, /* 53: socketpair */
+ NULL, /* 54: setsockopt */
+ NULL, /* 55: getsockopt */
+ lx_clone, /* 56: clone */
+ lx_fork, /* 57: fork */
+ lx_vfork, /* 58: vfork */
+ lx_execve, /* 59: execve */
+ lx_exit, /* 60: exit */
+ NULL, /* 61: wait4 */
+ NULL, /* 62: kill */
+ NULL, /* 63: uname */
+ lx_semget, /* 64: semget */
+ lx_semop, /* 65: semop */
+ lx_semctl, /* 66: semctl */
+ lx_shmdt, /* 67: shmdt */
+ lx_msgget, /* 68: msgget */
+ lx_msgsnd, /* 69: msgsnd */
+ lx_msgrcv, /* 70: msgrcv */
+ lx_msgctl, /* 71: msgctl */
+ NULL, /* 72: fcntl */
+ lx_flock, /* 73: flock */
+ lx_fsync, /* 74: fsync */
+ lx_fdatasync, /* 75: fdatasync */
+ lx_truncate, /* 76: truncate */
+ lx_ftruncate, /* 77: ftruncate */
+ NULL, /* 78: getdents */
+ NULL, /* 79: getcwd */
+ lx_chdir, /* 80: chdir */
+ lx_fchdir, /* 81: fchdir */
+ lx_rename, /* 82: rename */
+ NULL, /* 83: mkdir */
+ lx_rmdir, /* 84: rmdir */
+ lx_creat, /* 85: creat */
+ NULL, /* 86: link */
+ lx_unlink, /* 87: unlink */
+ lx_symlink, /* 88: symlink */
+ lx_readlink, /* 89: readlink */
+ NULL, /* 90: chmod */
+ NULL, /* 91: fchmod */
+ NULL, /* 92: chown */
+ NULL, /* 93: fchown */
+ NULL, /* 94: lchown */
+ lx_umask, /* 95: umask */
+ NULL, /* 96: gettimeofday */
+ NULL, /* 97: getrlimit */
+ lx_getrusage, /* 98: getrusage */
+ NULL, /* 99: sysinfo */
+ lx_times, /* 100: times */
+ NULL, /* 101: ptrace */
+ lx_getuid, /* 102: getuid */
+ lx_syslog, /* 103: syslog */
+ lx_getgid, /* 104: getgid */
+ lx_setuid, /* 105: setuid */
+ lx_setgid, /* 106: setgid */
+ lx_geteuid, /* 107: geteuid */
+ lx_getegid, /* 108: getegid */
+ lx_setpgid, /* 109: setpgid */
+ NULL, /* 110: getppid */
+ lx_getpgrp, /* 111: getpgrp */
+ lx_setsid, /* 112: setsid */
+ lx_setreuid, /* 113: setreuid */
+ lx_setregid, /* 114: setregid */
+ lx_getgroups, /* 115: getgroups */
+ lx_setgroups, /* 116: setgroups */
+ NULL, /* 117: setresuid */
+ lx_getresuid, /* 118: getresuid */
+ NULL, /* 119: setresgid */
+ lx_getresgid, /* 120: getresgid */
+ lx_getpgid, /* 121: getpgid */
+ lx_setfsuid, /* 122: setfsuid */
+ lx_setfsgid, /* 123: setfsgid */
+ lx_getsid, /* 124: getsid */
+ lx_capget, /* 125: capget */
+ lx_capset, /* 126: capset */
+ lx_rt_sigpending, /* 127: rt_sigpending */
+ lx_rt_sigtimedwait, /* 128: rt_sigtimedwait */
+ lx_rt_sigqueueinfo, /* 129: rt_sigqueueinfo */
+ lx_rt_sigsuspend, /* 130: rt_sigsuspend */
+ lx_sigaltstack, /* 131: sigaltstack */
+ lx_utime, /* 132: utime */
+ lx_mknod, /* 133: mknod */
+ NULL, /* 134: uselib */
+ NULL, /* 135: personality */
+ NULL, /* 136: ustat */
+ lx_statfs, /* 137: statfs */
+ lx_fstatfs, /* 138: fstatfs */
+ lx_sysfs, /* 139: sysfs */
+ lx_getpriority, /* 140: getpriority */
+ lx_setpriority, /* 141: setpriority */
+ lx_sched_setparam, /* 142: sched_setparam */
+ lx_sched_getparam, /* 143: sched_getparam */
+ lx_sched_setscheduler, /* 144: sched_setscheduler */
+ lx_sched_getscheduler, /* 145: sched_getscheduler */
+ lx_sched_get_priority_max, /* 146: sched_get_priority_max */
+ lx_sched_get_priority_min, /* 147: sched_get_priority_min */
+ lx_sched_rr_get_interval, /* 148: sched_rr_get_interval */
+ lx_mlock, /* 149: mlock */
+ lx_munlock, /* 150: munlock */
+ lx_mlockall, /* 151: mlockall */
+ lx_munlockall, /* 152: munlockall */
+ lx_vhangup, /* 153: vhangup */
+ NULL, /* 154: modify_ldt */
+ NULL, /* 155: pivot_root */
+ lx_sysctl, /* 156: sysctl */
+ NULL, /* 157: prctl */
+ NULL, /* 158: arch_prctl */
+ lx_adjtimex, /* 159: adjtimex */
+ NULL, /* 160: setrlimit */
+ lx_chroot, /* 161: chroot */
+ lx_sync, /* 162: sync */
+ NULL, /* 163: acct */
+ lx_settimeofday, /* 164: settimeofday */
+ lx_mount, /* 165: mount */
+ lx_umount2, /* 166: umount2 */
+ NULL, /* 167: swapon */
+ NULL, /* 168: swapoff */
+ lx_reboot, /* 169: reboot */
+ lx_sethostname, /* 170: sethostname */
+ lx_setdomainname, /* 171: setdomainname */
+ NULL, /* 172: iopl */
+ NULL, /* 173: ioperm */
+ NULL, /* 174: create_module */
+ NULL, /* 175: init_module */
+ NULL, /* 176: delete_module */
+ NULL, /* 177: get_kernel_syms */
+ lx_query_module, /* 178: query_module */
+ NULL, /* 179: quotactl */
+ NULL, /* 180: nfsservctl */
+ NULL, /* 181: getpmsg */
+ NULL, /* 182: putpmsg */
+ NULL, /* 183: afs_syscall */
+ NULL, /* 184: tux */
+ NULL, /* 185: security */
+ NULL, /* 186: gettid */
+ NULL, /* 187: readahead */
+ NULL, /* 188: setxattr */
+ NULL, /* 189: lsetxattr */
+ NULL, /* 190: fsetxattr */
+ NULL, /* 191: getxattr */
+ NULL, /* 192: lgetxattr */
+ NULL, /* 193: fgetxattr */
+ NULL, /* 194: listxattr */
+ NULL, /* 195: llistxattr */
+ NULL, /* 196: flistxattr */
+ NULL, /* 197: removexattr */
+ NULL, /* 198: lremovexattr */
+ NULL, /* 199: fremovexattr */
+ NULL, /* 200: tkill */
+ NULL, /* 201: time */
+ NULL, /* 202: futex */
+ lx_sched_setaffinity, /* 203: sched_setaffinity */
+ lx_sched_getaffinity, /* 204: sched_getaffinity */
+ NULL, /* 205: set_thread_area */
+ lx_io_setup, /* 206: io_setup */
+ lx_io_destroy, /* 207: io_destroy */
+ lx_io_getevents, /* 208: io_getevents */
+ lx_io_submit, /* 209: io_submit */
+ lx_io_cancel, /* 210: io_cancel */
+ NULL, /* 211: get_thread_area */
+ NULL, /* 212: lookup_dcookie */
+ NULL, /* 213: epoll_create */
+ NULL, /* 214: epoll_ctl_old */
+ NULL, /* 215: epoll_wait_old */
+ NULL, /* 216: remap_file_pages */
+ NULL, /* 217: getdents64 */
+ NULL, /* 218: set_tid_address */
+ NULL, /* 219: restart_syscall */
+ lx_semtimedop, /* 220: semtimedop */
+ lx_fadvise64_64, /* 221: fadvise64 */
+ lx_timer_create, /* 222: timer_create */
+ lx_timer_settime, /* 223: timer_settime */
+ lx_timer_gettime, /* 224: timer_gettime */
+ lx_timer_getoverrun, /* 225: timer_getoverrun */
+ lx_timer_delete, /* 226: timer_delete */
+ NULL, /* 227: clock_settime */
+ NULL, /* 228: clock_gettime */
+ NULL, /* 229: clock_getres */
+ lx_clock_nanosleep, /* 230: clock_nanosleep */
+ lx_group_exit, /* 231: exit_group */
+ NULL, /* 232: epoll_wait */
+ NULL, /* 233: epoll_ctl */
+ NULL, /* 234: tgkill */
+ lx_utimes, /* 235: utimes */
+ NULL, /* 236: vserver */
+ NULL, /* 237: mbind */
+ NULL, /* 238: set_mempolicy */
+ NULL, /* 239: get_mempolicy */
+ NULL, /* 240: mq_open */
+ NULL, /* 241: mq_unlink */
+ NULL, /* 242: mq_timedsend */
+ NULL, /* 243: mq_timedreceive */
+ NULL, /* 244: mq_notify */
+ NULL, /* 245: mq_getsetattr */
+ NULL, /* 246: kexec_load */
+ NULL, /* 247: waitid */
+ NULL, /* 248: add_key */
+ NULL, /* 249: request_key */
+ NULL, /* 250: keyctl */
+ NULL, /* 251: ioprio_set */
+ NULL, /* 252: ioprio_get */
+ lx_inotify_init, /* 253: inotify_init */
+ lx_inotify_add_watch, /* 254: inotify_add_watch */
+ lx_inotify_rm_watch, /* 255: inotify_rm_watch */
+ NULL, /* 256: migrate_pages */
+ NULL, /* 257: openat */
+ NULL, /* 258: mkdirat */
+ lx_mknodat, /* 259: mknodat */
+ NULL, /* 260: fchownat */
+ lx_futimesat, /* 261: futimesat */
+ NULL, /* 262: fstatat64 */
+ lx_unlinkat, /* 263: unlinkat */
+ lx_renameat, /* 264: renameat */
+ NULL, /* 265: linkat */
+ lx_symlinkat, /* 266: symlinkat */
+ lx_readlinkat, /* 267: readlinkat */
+ NULL, /* 268: fchmodat */
+ NULL, /* 269: faccessat */
+ NULL, /* 270: pselect6 */
+ NULL, /* 271: ppoll */
+ NULL, /* 272: unshare */
+ NULL, /* 273: set_robust_list */
+ NULL, /* 274: get_robust_list */
+ NULL, /* 275: splice */
+ NULL, /* 276: tee */
+ NULL, /* 277: sync_file_range */
+ NULL, /* 278: vmsplice */
+ NULL, /* 279: move_pages */
+ lx_utimensat, /* 280: utimensat */
+ NULL, /* 281: epoll_pwait */
+ lx_signalfd, /* 282: signalfd */
+ lx_timerfd_create, /* 283: timerfd_create */
+ lx_eventfd, /* 284: eventfd */
+ NULL, /* 285: fallocate */
+ lx_timerfd_settime, /* 286: timerfd_settime */
+ lx_timerfd_gettime, /* 287: timerfd_gettime */
+ NULL, /* 288: accept4 */
+ lx_signalfd4, /* 289: signalfd4 */
+ lx_eventfd2, /* 290: eventfd2 */
+ NULL, /* 291: epoll_create1 */
+ lx_dup3, /* 292: dup3 */
+ NULL, /* 293: pipe2 */
+ lx_inotify_init1, /* 294: inotify_init1 */
+ NULL, /* 295: preadv */
+ NULL, /* 296: pwritev */
+ lx_rt_tgsigqueueinfo, /* 297: rt_tgsigqueueinfo */
+ NULL, /* 298: perf_event_open */
+ NULL, /* 299: recvmmsg */
+ NULL, /* 300: fanotify_init */
+ NULL, /* 301: fanotify_mark */
+ NULL, /* 302: prlimit64 */
+ NULL, /* 303: name_to_handle_at */
+ NULL, /* 304: open_by_handle_at */
+ NULL, /* 305: clock_adjtime */
+ NULL, /* 306: syncfs */
+ NULL, /* 307: sendmmsg */
+ NULL, /* 309: setns */
+ NULL, /* 309: getcpu */
+ NULL, /* 310: process_vm_readv */
+ NULL, /* 311: process_vm_writev */
+ NULL, /* 312: kcmp */
+ NULL, /* 313: finit_module */
+ NULL, /* 314: sched_setattr */
+ NULL, /* 315: sched_getattr */
+ NULL, /* 316: renameat2 */
+ NULL, /* 317: seccomp */
+ NULL, /* 318: getrandom */
+ NULL, /* 319: memfd_create */
+ NULL, /* 320: kexec_file_load */
+ NULL, /* 321: bpf */
+ NULL, /* 322: execveat */
+
+ /* XXX TBD gap then x32 syscalls from 512 - 544 */
+};
+
+#else
+/* The following is the 32-bit syscall table */
+
+static lx_syscall_handler_t lx_handlers[] = {
+ NULL, /* 0: nosys */
+ lx_exit, /* 1: exit */
+ lx_fork, /* 2: fork */
+ NULL, /* 3: read */
+ NULL, /* 4: write */
+ NULL, /* 5: open */
+ lx_close, /* 6: close */
+ NULL, /* 7: waitpid */
+ lx_creat, /* 8: creat */
+ NULL, /* 9: link */
+ lx_unlink, /* 10: unlink */
+ lx_execve, /* 11: execve */
+ lx_chdir, /* 12: chdir */
+ NULL, /* 13: time */
+ lx_mknod, /* 14: mknod */
+ NULL, /* 15: chmod */
+ NULL, /* 16: lchown16 */
+ NULL, /* 17: break */
+ NULL, /* 18: stat */
+ lx_lseek, /* 19: lseek */
+ NULL, /* 20: getpid */
+ lx_mount, /* 21: mount */
+ lx_umount, /* 22: umount */
+ lx_setuid16, /* 23: setuid16 */
+ lx_getuid16, /* 24: getuid16 */
+ lx_stime, /* 25: stime */
+ NULL, /* 26: ptrace */
+ lx_alarm, /* 27: alarm */
+ NULL, /* 28: fstat */
+ lx_pause, /* 29: pause */
+ lx_utime, /* 30: utime */
+ NULL, /* 31: stty */
+ NULL, /* 32: gtty */
+ NULL, /* 33: access */
+ lx_nice, /* 34: nice */
+ NULL, /* 35: ftime */
+ lx_sync, /* 36: sync */
+ NULL, /* 37: kill */
+ lx_rename, /* 38: rename */
+ NULL, /* 39: mkdir */
+ lx_rmdir, /* 40: rmdir */
+ lx_dup, /* 41: dup */
+ NULL, /* 42: pipe */
+ lx_times, /* 43: times */
+ NULL, /* 44: prof */
+ NULL, /* 45: brk */
+ lx_setgid16, /* 46: setgid16 */
+ lx_getgid16, /* 47: getgid16 */
+ lx_signal, /* 48: signal */
+ lx_geteuid16, /* 49: geteuid16 */
+ lx_getegid16, /* 50: getegid16 */
+ NULL, /* 51: acct */
+ lx_umount2, /* 52: umount2 */
+ NULL, /* 53: lock */
+ NULL, /* 54: ioctl */
+ NULL, /* 55: fcntl */
+ NULL, /* 56: mpx */
+ lx_setpgid, /* 57: setpgid */
+ NULL, /* 58: ulimit */
+ NULL, /* 59: olduname */
+ lx_umask, /* 60: umask */
+ lx_chroot, /* 61: chroot */
+ NULL, /* 62: ustat */
+ lx_dup2, /* 63: dup2 */
+ NULL, /* 64: getppid */
+ lx_getpgrp, /* 65: getpgrp */
+ lx_setsid, /* 66: setsid */
+ lx_sigaction, /* 67: sigaction */
+ NULL, /* 68: sgetmask */
+ NULL, /* 69: ssetmask */
+ lx_setreuid16, /* 70: setreuid16 */
+ lx_setregid16, /* 71: setregid16 */
+ lx_sigsuspend, /* 72: sigsuspend */
+ lx_sigpending, /* 73: sigpending */
+ lx_sethostname, /* 74: sethostname */
+ NULL, /* 75: setrlimit */
+ NULL, /* 76: getrlimit */
+ lx_getrusage, /* 77: getrusage */
+ NULL, /* 78: gettimeofday */
+ lx_settimeofday, /* 79: settimeofday */
+ lx_getgroups16, /* 80: getgroups16 */
+ lx_setgroups16, /* 81: setgroups16 */
+ NULL, /* 82: select */
+ lx_symlink, /* 83: symlink */
+ NULL, /* 84: oldlstat */
+ lx_readlink, /* 85: readlink */
+ NULL, /* 86: uselib */
+ NULL, /* 87: swapon */
+ lx_reboot, /* 88: reboot */
+ lx_readdir, /* 89: readdir */
+ lx_mmap, /* 90: mmap */
+ lx_munmap, /* 91: munmap */
+ lx_truncate, /* 92: truncate */
+ lx_ftruncate, /* 93: ftruncate */
+ NULL, /* 94: fchmod */
+ NULL, /* 95: fchown16 */
+ lx_getpriority, /* 96: getpriority */
+ lx_setpriority, /* 97: setpriority */
+ NULL, /* 98: profil */
+ lx_statfs, /* 99: statfs */
+ lx_fstatfs, /* 100: fstatfs */
+ NULL, /* 101: ioperm */
+ lx_socketcall, /* 102: socketcall */
+ lx_syslog, /* 103: syslog */
+ lx_setitimer, /* 104: setitimer */
+ lx_getitimer, /* 105: getitimer */
+ NULL, /* 106: stat */
+ NULL, /* 107: lstat */
+ NULL, /* 108: fstat */
+ NULL, /* 109: uname */
+ NULL, /* 110: oldiopl */
+ lx_vhangup, /* 111: vhangup */
+ NULL, /* 112: idle */
+ NULL, /* 113: vm86old */
+ NULL, /* 114: wait4 */
+ NULL, /* 115: swapoff */
+ NULL, /* 116: sysinfo */
+ lx_ipc, /* 117: ipc */
+ lx_fsync, /* 118: fsync */
+ lx_sigreturn, /* 119: sigreturn */
+ lx_clone, /* 120: clone */
+ lx_setdomainname, /* 121: setdomainname */
+ NULL, /* 122: uname */
+ NULL, /* 123: modify_ldt */
+ lx_adjtimex, /* 124: adjtimex */
+ lx_mprotect, /* 125: mprotect */
+ lx_sigprocmask, /* 126: sigprocmask */
+ NULL, /* 127: create_module */
+ NULL, /* 128: init_module */
+ NULL, /* 129: delete_module */
+ NULL, /* 130: get_kernel_syms */
+ NULL, /* 131: quotactl */
+ lx_getpgid, /* 132: getpgid */
+ lx_fchdir, /* 133: fchdir */
+ NULL, /* 134: bdflush */
+ lx_sysfs, /* 135: sysfs */
+ NULL, /* 136: personality */
+ NULL, /* 137: afs_syscall */
+ lx_setfsuid16, /* 138: setfsuid16 */
+ lx_setfsgid16, /* 139: setfsgid16 */
+ lx_llseek, /* 140: llseek */
+ NULL, /* 141: getdents */
+ NULL, /* 142: select */
+ lx_flock, /* 143: flock */
+ lx_msync, /* 144: msync */
+ NULL, /* 145: readv */
+ NULL, /* 146: writev */
+ lx_getsid, /* 147: getsid */
+ lx_fdatasync, /* 148: fdatasync */
+ lx_sysctl, /* 149: sysctl */
+ lx_mlock, /* 150: mlock */
+ lx_munlock, /* 151: munlock */
+ lx_mlockall, /* 152: mlockall */
+ lx_munlockall, /* 153: munlockall */
+ lx_sched_setparam, /* 154: sched_setparam */
+ lx_sched_getparam, /* 155: sched_getparam */
+ lx_sched_setscheduler, /* 156: sched_setscheduler */
+ lx_sched_getscheduler, /* 157: sched_getscheduler */
+ NULL, /* 158: sched_yield */
+ lx_sched_get_priority_max, /* 159: sched_get_priority_max */
+ lx_sched_get_priority_min, /* 160: sched_get_priority_min */
+ lx_sched_rr_get_interval, /* 161: sched_rr_get_interval */
+ NULL, /* 162: nanosleep */
+ lx_remap, /* 163: mremap */
+ NULL, /* 164: setresuid16 */
+ lx_getresuid16, /* 165: getresuid16 */
+ NULL, /* 166: vm86 */
+ lx_query_module, /* 167: query_module */
+ NULL, /* 168: poll */
+ NULL, /* 169: nfsservctl */
+ NULL, /* 170: setresgid16 */
+ lx_getresgid16, /* 171: getresgid16 */
+ NULL, /* 172: prctl */
+ lx_rt_sigreturn, /* 173: rt_sigreturn */
+ lx_rt_sigaction, /* 174: rt_sigaction */
+ lx_rt_sigprocmask, /* 175: rt_sigprocmask */
+ lx_rt_sigpending, /* 176: rt_sigpending */
+ lx_rt_sigtimedwait, /* 177: rt_sigtimedwait */
+ lx_rt_sigqueueinfo, /* 178: rt_sigqueueinfo */
+ lx_rt_sigsuspend, /* 179: rt_sigsuspend */
+ NULL, /* 180: pread64 */
+ NULL, /* 181: pwrite64 */
+ NULL, /* 182: chown16 */
+ NULL, /* 183: getcwd */
+ lx_capget, /* 184: capget */
+ lx_capset, /* 185: capset */
+ lx_sigaltstack, /* 186: sigaltstack */
+ lx_sendfile, /* 187: sendfile */
+ NULL, /* 188: getpmsg */
+ NULL, /* 189: putpmsg */
+ lx_vfork, /* 190: vfork */
+ NULL, /* 191: getrlimit */
+ lx_mmap2, /* 192: mmap2 */
+ lx_truncate64, /* 193: truncate64 */
+ lx_ftruncate64, /* 194: ftruncate64 */
+ NULL, /* 195: stat64 */
+ NULL, /* 196: lstat64 */
+ NULL, /* 197: fstat64 */
+ NULL, /* 198: lchown */
+ lx_getuid, /* 199: getuid */
+ lx_getgid, /* 200: getgid */
+ lx_geteuid, /* 201: geteuid */
+ lx_getegid, /* 202: getegid */
+ lx_setreuid, /* 203: setreuid */
+ lx_setregid, /* 204: setregid */
+ lx_getgroups, /* 205: getgroups */
+ lx_setgroups, /* 206: setgroups */
+ NULL, /* 207: fchown */
+ NULL, /* 208: setresuid */
+ lx_getresuid, /* 209: getresuid */
+ NULL, /* 210: setresgid */
+ lx_getresgid, /* 211: getresgid */
+ NULL, /* 212: chown */
+ lx_setuid, /* 213: setuid */
+ lx_setgid, /* 214: setgid */
+ lx_setfsuid, /* 215: setfsuid */
+ lx_setfsgid, /* 216: setfsgid */
+ NULL, /* 217: pivot_root */
+ lx_mincore, /* 218: mincore */
+ lx_madvise, /* 219: madvise */
+ NULL, /* 220: getdents64 */
+ NULL, /* 221: fcntl64 */
+ NULL, /* 222: tux */
+ NULL, /* 223: security */
+ NULL, /* 224: gettid */
+ NULL, /* 225: readahead */
+ NULL, /* 226: setxattr */
+ NULL, /* 227: lsetxattr */
+ NULL, /* 228: fsetxattr */
+ NULL, /* 229: getxattr */
+ NULL, /* 230: lgetxattr */
+ NULL, /* 231: fgetxattr */
+ NULL, /* 232: listxattr */
+ NULL, /* 233: llistxattr */
+ NULL, /* 234: flistxattr */
+ NULL, /* 235: removexattr */
+ NULL, /* 236: lremovexattr */
+ NULL, /* 237: fremovexattr */
+ NULL, /* 238: tkill */
+ lx_sendfile64, /* 239: sendfile64 */
+ NULL, /* 240: futex */
+ lx_sched_setaffinity, /* 241: sched_setaffinity */
+ lx_sched_getaffinity, /* 242: sched_getaffinity */
+ NULL, /* 243: set_thread_area */
+ NULL, /* 244: get_thread_area */
+ lx_io_setup, /* 245: io_setup */
+ lx_io_destroy, /* 246: io_destroy */
+ lx_io_getevents, /* 247: io_getevents */
+ lx_io_submit, /* 248: io_submit */
+ lx_io_cancel, /* 249: io_cancel */
+ lx_fadvise64, /* 250: fadvise64 */
+ NULL, /* 251: nosys */
+ lx_group_exit, /* 252: group_exit */
+ NULL, /* 253: lookup_dcookie */
+ NULL, /* 254: epoll_create */
+ NULL, /* 255: epoll_ctl */
+ NULL, /* 256: epoll_wait */
+ NULL, /* 257: remap_file_pages */
+ NULL, /* 258: set_tid_address */
+ lx_timer_create, /* 259: timer_create */
+ lx_timer_settime, /* 260: timer_settime */
+ lx_timer_gettime, /* 261: timer_gettime */
+ lx_timer_getoverrun, /* 262: timer_getoverrun */
+ lx_timer_delete, /* 263: timer_delete */
+ NULL, /* 264: clock_settime */
+ NULL, /* 265: clock_gettime */
+ NULL, /* 266: clock_getres */
+ lx_clock_nanosleep, /* 267: clock_nanosleep */
+ lx_statfs64, /* 268: statfs64 */
+ lx_fstatfs64, /* 269: fstatfs64 */
+ NULL, /* 270: tgkill */
+ lx_utimes, /* 271: utimes */
+ lx_fadvise64_64, /* 272: fadvise64_64 */
+ NULL, /* 273: vserver */
+ NULL, /* 274: mbind */
+ NULL, /* 275: get_mempolicy */
+ NULL, /* 276: set_mempolicy */
+ NULL, /* 277: mq_open */
+ NULL, /* 278: mq_unlink */
+ NULL, /* 279: mq_timedsend */
+ NULL, /* 280: mq_timedreceive */
+ NULL, /* 281: mq_notify */
+ NULL, /* 282: mq_getsetattr */
+ NULL, /* 283: kexec_load */
+ NULL, /* 284: waitid */
+ NULL, /* 285: sys_setaltroot */
+ NULL, /* 286: add_key */
+ NULL, /* 287: request_key */
+ NULL, /* 288: keyctl */
+ NULL, /* 289: ioprio_set */
+ NULL, /* 290: ioprio_get */
+ lx_inotify_init, /* 291: inotify_init */
+ lx_inotify_add_watch, /* 292: inotify_add_watch */
+ lx_inotify_rm_watch, /* 293: inotify_rm_watch */
+ NULL, /* 294: migrate_pages */
+ NULL, /* 295: openat */
+ NULL, /* 296: mkdirat */
+ lx_mknodat, /* 297: mknodat */
+ NULL, /* 298: fchownat */
+ lx_futimesat, /* 299: futimesat */
+ NULL, /* 300: fstatat64 */
+ lx_unlinkat, /* 301: unlinkat */
+ lx_renameat, /* 302: renameat */
+ NULL, /* 303: linkat */
+ lx_symlinkat, /* 304: symlinkat */
+ lx_readlinkat, /* 305: readlinkat */
+ NULL, /* 306: fchmodat */
+ NULL, /* 307: faccessat */
+ NULL, /* 308: pselect6 */
+ NULL, /* 309: ppoll */
+ NULL, /* 310: unshare */
+ NULL, /* 311: set_robust_list */
+ NULL, /* 312: get_robust_list */
+ NULL, /* 313: splice */
+ NULL, /* 314: sync_file_range */
+ NULL, /* 315: tee */
+ NULL, /* 316: vmsplice */
+ NULL, /* 317: move_pages */
+ NULL, /* 318: getcpu */
+ NULL, /* 319: epoll_pwait */
+ lx_utimensat, /* 320: utimensat */
+ lx_signalfd, /* 321: signalfd */
+ lx_timerfd_create, /* 322: timerfd_create */
+ lx_eventfd, /* 323: eventfd */
+ NULL, /* 324: fallocate */
+ lx_timerfd_settime, /* 325: timerfd_settime */
+ lx_timerfd_gettime, /* 326: timerfd_gettime */
+ lx_signalfd4, /* 327: signalfd4 */
+ lx_eventfd2, /* 328: eventfd2 */
+ NULL, /* 329: epoll_create1 */
+ lx_dup3, /* 330: dup3 */
+ NULL, /* 331: pipe2 */
+ lx_inotify_init1, /* 332: inotify_init1 */
+ NULL, /* 333: preadv */
+ NULL, /* 334: pwritev */
+ lx_rt_tgsigqueueinfo, /* 335: rt_tgsigqueueinfo */
+ NULL, /* 336: perf_event_open */
+ NULL, /* 337: recvmmsg */
+ NULL, /* 338: fanotify_init */
+ NULL, /* 339: fanotify_mark */
+ NULL, /* 340: prlimit64 */
+ NULL, /* 341: name_to_handle_at */
+ NULL, /* 342: open_by_handle_at */
+ NULL, /* 343: clock_adjtime */
+ NULL, /* 344: syncfs */
+ NULL, /* 345: sendmmsg */
+ NULL, /* 346: setns */
+ NULL, /* 347: process_vm_readv */
+ NULL, /* 348: process_vm_writev */
+ NULL, /* 349: kcmp */
+ NULL, /* 350: finit_module */
+ NULL, /* 351: sched_setattr */
+ NULL, /* 352: sched_getattr */
+ NULL, /* 353: renameat2 */
+ NULL, /* 354: seccomp */
+ NULL, /* 355: getrandom */
+ NULL, /* 356: memfd_create */
+ NULL, /* 357: bpf */
+ NULL, /* 358: execveat */
+};
+#endif
diff --git a/usr/src/lib/brand/lx/lx_brand/common/lx_provider.d b/usr/src/lib/brand/lx/lx_brand/common/lx_provider.d
new file mode 100644
index 0000000000..14326e8f56
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/lx_provider.d
@@ -0,0 +1,39 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+provider lx {
+ probe debug(char *buf);
+ probe sigdeliver(int sig, void *lx_sigaction, void *lx_sigstack);
+ probe sigreturn(void *lx_ucontext, void *ucontext, uintptr_t sp);
+
+ probe signal__delivery__frame__create(void *lx_sigdeliver_frame);
+ probe signal__delivery__frame__found(void *lx_sigdeliver_frame);
+ probe signal__delivery__frame__corrupt(void *lx_sigdeliver_frame);
+
+ probe signal__post__handler(uintptr_t old_sp, uintptr_t new_sp);
+
+ probe signal__altstack__enable(uintptr_t alt_sp);
+ probe signal__altstack__disable();
+
+ probe emulate__enter(void *ucp, int syscall_num, uintptr_t *args);
+ probe emulate__return(void *ucp, int syscall_num, uintptr_t ret,
+ uintptr_t errn);
+};
+
+#pragma D attributes Evolving/Evolving/ISA provider lx provider
+#pragma D attributes Private/Private/Unknown provider lx module
+#pragma D attributes Private/Private/Unknown provider lx function
+#pragma D attributes Private/Private/ISA provider lx name
+#pragma D attributes Private/Private/ISA provider lx args
diff --git a/usr/src/lib/brand/lx/lx_brand/common/mapfile b/usr/src/lib/brand/lx/lx_brand/common/mapfile
new file mode 100644
index 0000000000..0663f4bc19
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/mapfile
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+#
+# Scope everything local -- our .init section is our only public interface.
+#
+{
+ local:
+ *;
+};
diff --git a/usr/src/lib/brand/lx/lx_brand/common/mapfile-vers b/usr/src/lib/brand/lx/lx_brand/common/mapfile-vers
new file mode 100644
index 0000000000..0663f4bc19
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/mapfile-vers
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+#
+# Scope everything local -- our .init section is our only public interface.
+#
+{
+ local:
+ *;
+};
diff --git a/usr/src/lib/brand/lx/lx_brand/common/mem.c b/usr/src/lib/brand/lx/lx_brand/common/mem.c
new file mode 100644
index 0000000000..4bccfc6032
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/mem.c
@@ -0,0 +1,814 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <procfs.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <sys/mman.h>
+#include <sys/param.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+
+/*
+ * There are two forms of mmap, mmap() and mmap2(). The only difference is that
+ * the final argument to mmap2() specifies the number of pages, not bytes.
+ * Linux has a number of additional flags, but they are all deprecated. We also
+ * ignore the MAP_GROWSDOWN flag, which has no equivalent on Solaris.
+ *
+ * The Linux mmap() returns ENOMEM in some cases where Solaris returns
+ * EOVERFLOW, so we translate the errno as necessary.
+ */
+
+int pagesize; /* needed for mmap2() */
+
+#define LX_MAP_ANONYMOUS 0x00020
+#define LX_MAP_LOCKED 0x02000
+#define LX_MAP_NORESERVE 0x04000
+#define LX_MAP_32BIT 0x00040
+
+#define LX_MADV_REMOVE 9
+#define LX_MADV_DONTFORK 10
+#define LX_MADV_DOFORK 11
+#define LX_MADV_MERGEABLE 12
+#define LX_MADV_UNMERGEABLE 13
+#define LX_MADV_HUGEPAGE 14
+#define LX_MADV_NOHUGEPAGE 15
+#define LX_MADV_DONTDUMP 16
+#define LX_MADV_DODUMP 17
+
+#define TWO_GB 0x80000000
+
+static void lx_remap_anoncache_invalidate(uintptr_t, size_t);
+
+static int
+ltos_mmap_flags(int flags)
+{
+ int new_flags;
+
+ new_flags = flags & (MAP_TYPE | MAP_FIXED);
+
+ if (flags & LX_MAP_ANONYMOUS)
+ new_flags |= MAP_ANONYMOUS;
+ if (flags & LX_MAP_NORESERVE)
+ new_flags |= MAP_NORESERVE;
+
+#if defined(_LP64)
+ if (flags & LX_MAP_32BIT)
+ new_flags |= MAP_32BIT;
+#endif
+
+ return (new_flags);
+}
+
+static void *
+mmap_common(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
+ uintptr_t p5, off64_t p6)
+{
+ void *addr = (void *)p1;
+ size_t len = p2;
+ int prot = p3;
+ int flags = p4;
+ int fd = p5;
+ off64_t off = p6;
+ void *ret;
+
+ if (LX_DEBUG_ISENABLED) {
+ char *path, path_buf[MAXPATHLEN];
+
+ path = lx_fd_to_path(fd, path_buf, sizeof (path_buf));
+ if (path == NULL)
+ path = "?";
+
+ lx_debug("\tmmap_common(): fd = %d - %s", fd, path);
+ }
+
+ /*
+ * Under Linux, the file descriptor is ignored when mapping zfod
+ * anonymous memory, On Solaris, we want the fd set to -1 for the
+ * same functionality.
+ */
+ if (flags & LX_MAP_ANONYMOUS)
+ fd = -1;
+
+ /*
+ * We refuse, as a matter of principle, to overcommit memory.
+ * Unfortunately, several bits of important and popular software expect
+ * to be able to pre-allocate large amounts of virtual memory but then
+ * probably never use it. One particularly bad example of this
+ * practice is golang.
+ *
+ * In the interest of running software, unsafe or not, we fudge
+ * something vaguely similar to overcommit by permanently enabling
+ * MAP_NORESERVE unless MAP_LOCKED was requested:
+ */
+ if (!(flags & LX_MAP_LOCKED)) {
+ flags |= LX_MAP_NORESERVE;
+ }
+
+ /*
+ * This is totally insane. The NOTES section in the linux mmap(2) man
+ * page claims that on some architectures, read protection may
+ * automatically include exec protection. It has been observed on a
+ * native linux system that the /proc/<pid>/maps file does indeed
+ * show that segments mmap'd from userland (such as libraries mapped in
+ * by the dynamic linker) all have exec the permission set, even for
+ * data segments.
+ *
+ * This insanity is tempered by the fact that the behavior is disabled
+ * for ELF binaries bearing a PT_GNU_STACK header which lacks PF_X
+ * (which most do). Such a header will clear the READ_IMPLIES_EXEC
+ * flag from the process personality.
+ */
+ if (prot & PROT_READ) {
+ unsigned int personality;
+
+ personality = syscall(SYS_brand, B_GET_PERSONALITY);
+ if ((personality & LX_PER_READ_IMPLIES_EXEC) != 0) {
+ prot |= PROT_EXEC;
+ }
+ }
+
+ ret = mmap64(addr, len, prot, ltos_mmap_flags(flags), fd, off);
+
+ if (ret == MAP_FAILED)
+ return ((void *)(long)(errno == EOVERFLOW ? -ENOMEM : -errno));
+
+ if (flags & LX_MAP_LOCKED)
+ (void) mlock(ret, len);
+
+ /*
+ * We have a new mapping; invalidate any cached anonymous regions that
+ * overlap(ped) with it.
+ */
+ lx_remap_anoncache_invalidate((uintptr_t)ret, len);
+
+ return (ret);
+}
+
+long
+lx_mmap(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
+ uintptr_t p5, uintptr_t p6)
+{
+ return ((ssize_t)mmap_common(p1, p2, p3, p4, p5, (off64_t)p6));
+}
+
+long
+lx_mmap2(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
+ uintptr_t p5, uintptr_t p6)
+{
+ if (pagesize == 0)
+ pagesize = sysconf(_SC_PAGESIZE);
+
+ return ((ssize_t)mmap_common(p1, p2, p3, p4, p5,
+ (off64_t)p6 * pagesize));
+}
+
+
+/*
+ * The locking family of system calls, as well as msync(), are identical. On
+ * Solaris, they are layered on top of the memcntl syscall, so they cannot be
+ * pass-thru.
+ */
+long
+lx_mlock(uintptr_t addr, uintptr_t len)
+{
+ uintptr_t addr1 = addr & PAGEMASK;
+ uintptr_t len1 = len + (addr & PAGEOFFSET);
+
+ return (mlock((void *)addr1, (size_t)len1) ? -errno : 0);
+}
+
+long
+lx_mlockall(uintptr_t flags)
+{
+ return (mlockall(flags) ? -errno : 0);
+}
+
+long
+lx_munlock(uintptr_t addr, uintptr_t len)
+{
+ uintptr_t addr1 = addr & PAGEMASK;
+ uintptr_t len1 = len + (addr & PAGEOFFSET);
+
+ return (munlock((void *)addr1, (size_t)len1) ? -errno : 0);
+}
+
+long
+lx_munlockall(void)
+{
+ return (munlockall() ? -errno : 0);
+}
+
+long
+lx_msync(uintptr_t addr, uintptr_t len, uintptr_t flags)
+{
+ return (msync((void *)addr, (size_t)len, flags) ? -errno : 0);
+}
+
+/*
+ * Illumos and Linux overlap on the basic flags, and are disjoint on the rest.
+ * Linux also allows the length to be zero, while Illumos does not.
+ */
+long
+lx_madvise(uintptr_t start, uintptr_t len, uintptr_t advice)
+{
+ int ret;
+
+ if (len == 0)
+ return (0);
+
+ /* approximately similar */
+ if (advice == LX_MADV_REMOVE)
+ advice = MADV_FREE;
+
+ switch (advice) {
+ case MADV_NORMAL:
+ case MADV_RANDOM:
+ case MADV_SEQUENTIAL:
+ case MADV_WILLNEED:
+ case MADV_FREE:
+ case MADV_DONTNEED:
+ if (advice == MADV_DONTNEED) {
+ /*
+ * On Linux, MADV_DONTNEED implies an immediate purge
+ * of the specified region. This is spuriously
+ * different from (nearly) every other Unix, having
+ * apparently been done to mimic the semantics on
+ * Digital Unix (!). This is bad enough (MADV_FREE
+ * both has better semantics and results in better
+ * performance), but it gets worse: Linux applications
+ * (and notably, jemalloc) have managed to depend on
+ * the busted semantics of MADV_DONTNEED on Linux. We
+ * implement these semantics via MADV_PURGE -- and
+ * we translate our advice accordingly.
+ */
+ advice = MADV_PURGE;
+ }
+
+ ret = madvise((void *)start, len, advice);
+ if (ret == -1) {
+ if (errno == EBUSY) {
+ if (advice != MADV_PURGE)
+ return (-EINVAL);
+
+ /*
+ * If we got an EBUSY from a MADV_PURGE, we
+ * will now try again with a MADV_DONTNEED:
+ * there are conditions (namely, with locked
+ * mappings that haven't yet been faulted in)
+ * where MADV_PURGE will fail but MADV_DONTNEED
+ * will succeed. If this succeeds, we'll call
+ * the operation successful; if not, we'll kick
+ * back EINVAL.
+ */
+ advice = MADV_DONTNEED;
+
+ if (madvise((void *)start, len, advice) == 0)
+ return (0);
+
+ return (-EINVAL);
+ }
+
+ return (-errno);
+ } else {
+ return (0);
+ }
+
+ /* harmless to pretend these work */
+ case LX_MADV_DONTFORK:
+ case LX_MADV_DOFORK:
+ case LX_MADV_HUGEPAGE:
+ case LX_MADV_NOHUGEPAGE:
+ case LX_MADV_DONTDUMP:
+ case LX_MADV_DODUMP:
+ return (0);
+
+ /* we'll return an error for the rest of the Linux flags */
+ default:
+ return (-EINVAL);
+ }
+}
+
+/*
+ * mprotect() is identical except that we ignore the Linux flags PROT_GROWSDOWN
+ * and PROT_GROWSUP, which have no equivalent on Solaris.
+ */
+#define LX_PROT_GROWSDOWN 0x01000000
+#define LX_PROT_GROWSUP 0x02000000
+
+long
+lx_mprotect(uintptr_t start, uintptr_t len, uintptr_t prot)
+{
+ prot &= ~(LX_PROT_GROWSUP | LX_PROT_GROWSDOWN);
+
+ return (mprotect((void *)start, len, prot) ? -errno : 0);
+}
+
+#define LX_MREMAP_MAYMOVE 1 /* mapping can be moved */
+#define LX_MREMAP_FIXED 2 /* address is fixed */
+
+/*
+ * Unfortunately, the Linux mremap() manpage contains a statement that is, at
+ * best, grossly oversimplified: that mremap() "can be used to implement a
+ * very efficient realloc(3)." To the degree this is true at all, it is only
+ * true narrowly (namely, when large buffers are being expanded but can't be
+ * expanded in place due to virtual address space restrictions) -- but
+ * apparently, someone took this very literally, because variants of glibc
+ * appear to simply implement realloc() in terms of mremap(). This is
+ * unfortunate because absent intelligent usage, it forces realloc() to have
+ * an unncessary interaction with the VM system for small expansions -- and if
+ * realloc() is itself abused (e.g., if a consumer repeatedly expands and
+ * contracts the same memory buffer), the net result can be less efficient
+ * than a much more naive realloc() implementation. And if native Linux is
+ * suboptimal in this case, we are deeply pathological, having not
+ * historically supported mremap() for anonymous mappings at all. To make
+ * this at least palatable, we not only support remap for anonymous mappings
+ * (see lx_remap_anon(), below), we also cache the metadata associated with
+ * these mappings to save both the reads from /proc and the libmapmalloc
+ * alloc/free. We implement the anonymous metadata cache with
+ * lx_remap_anoncache, an LRU cache of prmap_t's that correspond to anonymous
+ * segments that have been resized.
+ */
+#define LX_REMAP_ANONCACHE_NENTRIES 4
+
+static prmap_t lx_remap_anoncache[LX_REMAP_ANONCACHE_NENTRIES];
+static int lx_remap_anoncache_nentries = LX_REMAP_ANONCACHE_NENTRIES;
+static offset_t lx_remap_anoncache_generation;
+static mutex_t lx_remap_anoncache_lock = DEFAULTMUTEX;
+
+static void
+lx_remap_anoncache_invalidate(uintptr_t addr, size_t size)
+{
+ int i;
+
+ if (lx_remap_anoncache_generation == 0)
+ return;
+
+ mutex_lock(&lx_remap_anoncache_lock);
+
+ for (i = 0; i < LX_REMAP_ANONCACHE_NENTRIES; i++) {
+ prmap_t *map = &lx_remap_anoncache[i];
+
+ /*
+ * If the ranges overlap at all, we zap it by clearing the
+ * pr_vaddr.
+ */
+ if (addr < map->pr_vaddr + map->pr_size &&
+ map->pr_vaddr < addr + size) {
+ map->pr_vaddr = 0;
+ }
+ }
+
+ mutex_unlock(&lx_remap_anoncache_lock);
+}
+
+static void
+lx_remap_anoncache_evict(prmap_t *map)
+{
+ if (map >= &lx_remap_anoncache[0] &&
+ map < &lx_remap_anoncache[LX_REMAP_ANONCACHE_NENTRIES]) {
+ /*
+ * We're already in the cache; we just need to zap our pr_vaddr
+ * to indicate that this has been evicted.
+ */
+ map->pr_vaddr = 0;
+ } else {
+ /*
+ * We need to invalidate this by address and size.
+ */
+ lx_remap_anoncache_invalidate(map->pr_vaddr, map->pr_size);
+ }
+}
+
+static void
+lx_remap_anoncache_load(prmap_t *map, size_t size)
+{
+ offset_t oldest = 0;
+ prmap_t *evict = NULL;
+ int i;
+
+ if (map >= &lx_remap_anoncache[0] &&
+ map < &lx_remap_anoncache[LX_REMAP_ANONCACHE_NENTRIES]) {
+ /*
+ * We're already in the cache -- we just need to update
+ * our LRU field (pr_offset) to reflect the hit.
+ */
+ map->pr_offset = lx_remap_anoncache_generation++;
+ map->pr_size = size;
+ return;
+ }
+
+ mutex_lock(&lx_remap_anoncache_lock);
+
+ for (i = 0; i < lx_remap_anoncache_nentries; i++) {
+ if (lx_remap_anoncache[i].pr_vaddr == 0) {
+ evict = &lx_remap_anoncache[i];
+ break;
+ }
+
+ if (oldest == 0 || lx_remap_anoncache[i].pr_offset < oldest) {
+ oldest = lx_remap_anoncache[i].pr_offset;
+ evict = &lx_remap_anoncache[i];
+ }
+ }
+
+ if (evict != NULL) {
+ *evict = *map;
+ evict->pr_offset = lx_remap_anoncache_generation++;
+ evict->pr_size = size;
+ }
+
+ mutex_unlock(&lx_remap_anoncache_lock);
+}
+
+/*
+ * As part of lx_remap() (see below) and to accommodate heavy realloc() use
+ * cases (see the discussion of the lx_remap_anoncache, above), we allow
+ * anonymous segments to be "remapped" in that we are willing to truncate them
+ * or append to them (as much as that's allowed by virtual address space
+ * usage). If we fall out of these cases, we take the more expensive option
+ * of actually copying the data to a new segment -- but we locate the address
+ * in a portion of the address space that should give us plenty of VA space to
+ * expand.
+ */
+static long
+lx_remap_anon(prmap_t *map, prmap_t *maps, int nmap,
+ uintptr_t new_size, uintptr_t flags, uintptr_t new_address)
+{
+ int mflags = MAP_ANON;
+ int prot = 0, i;
+ void *addr, *hint = NULL;
+
+ /*
+ * If our new size is less than our old size and we're either not
+ * being ordered to move it or the address we're being ordered to
+ * move it to is our current address, we can just act as Procrustes
+ * and chop off anything larger than the new size.
+ */
+ if (new_size < map->pr_size && (!(flags & LX_MREMAP_FIXED) ||
+ new_address == map->pr_vaddr)) {
+ if (munmap((void *)(map->pr_vaddr + new_size),
+ map->pr_size - new_size) != 0) {
+ return (-EINVAL);
+ }
+
+ lx_remap_anoncache_load(map, new_size);
+
+ return (map->pr_vaddr);
+ }
+
+ if (map->pr_mflags & (MA_SHM | MA_ISM))
+ return (-EINVAL);
+
+ if (map->pr_mflags & MA_WRITE)
+ prot |= PROT_WRITE;
+
+ if (map->pr_mflags & MA_READ)
+ prot |= PROT_READ;
+
+ if (map->pr_mflags & MA_EXEC)
+ prot |= PROT_EXEC;
+
+ mflags |= (map->pr_mflags & MA_SHARED) ? MAP_SHARED : MAP_PRIVATE;
+
+ if (map->pr_mflags & MA_NORESERVE)
+ mflags |= MAP_NORESERVE;
+
+ /*
+ * If we're not being told where to move it, or the address matches
+ * where we already are, let's try to expand our mapping in place
+ * by adding a fixed mapping after it.
+ */
+ if (!(flags & LX_MREMAP_FIXED) || new_address == map->pr_vaddr) {
+ addr = mmap((void *)(map->pr_vaddr + map->pr_size),
+ new_size - map->pr_size, prot, mflags, -1, 0);
+
+ if (addr == (void *)-1)
+ return (-EINVAL);
+
+ if (addr == (void *)(map->pr_vaddr + map->pr_size)) {
+ lx_remap_anoncache_load(map, new_size);
+ return (map->pr_vaddr);
+ }
+
+ /*
+ * Our advisory address was not followed -- which, as a
+ * practical matter, means that the range conflicted with an
+ * extant mapping. Unmap wherever we landed, and drop into
+ * the relocation case.
+ */
+ (void) munmap(addr, new_size - map->pr_size);
+ }
+
+ lx_remap_anoncache_evict(map);
+
+ /*
+ * If we're here, we actually need to move this mapping -- so if we
+ * can't move it, we're done.
+ */
+ if (!(flags & LX_MREMAP_MAYMOVE))
+ return (-ENOMEM);
+
+ /*
+ * If this is a shared private mapping, we can't remap it.
+ */
+ if (map->pr_mflags & MA_SHARED)
+ return (-EINVAL);
+
+ if (new_address != NULL && (flags & LX_MREMAP_FIXED)) {
+ mflags |= MAP_FIXED;
+ hint = (void *)new_address;
+ } else {
+ /*
+ * We're going to start at the bottom of the address space;
+ * once we hit an address above 2G, we'll treat that as the
+ * bottom of the top of the address space, and set our address
+ * hint below that. To give ourselves plenty of room for
+ * further mremap() expansion, we'll multiply our new size by
+ * 16 and leave that much room between our lowest high address
+ * and our hint.
+ */
+ for (i = 0; i < nmap; i++) {
+ if (maps[i].pr_vaddr < TWO_GB)
+ continue;
+
+ hint = (void *)(maps[i].pr_vaddr - (new_size << 4UL));
+ break;
+ }
+ }
+
+ if ((addr = mmap(hint, new_size, prot, mflags, -1, 0)) == (void *)-1)
+ return (-errno);
+
+ bcopy((void *)map->pr_vaddr, addr, map->pr_size);
+ (void) munmap((void *)map->pr_vaddr, map->pr_size);
+
+ return ((long)addr);
+}
+
+/*
+ * We don't have a native mremap() (and nor do we particularly want one), so
+ * we emulate it strictly in user-land. The idea is simple: we just want to
+ * mmap() the underlying object with the new size and rip down the old mapping.
+ * However, this is problematic because we don't actually have the file
+ * descriptor that corresponds to the resized mapping (and indeed, the mapped
+ * file may not exist in any file system name space). So to get a file
+ * descriptor, we find the (or rather, a) path to the mapped object via its
+ * entry in /proc/self/path and attempt to open it. Assuming that this
+ * succeeds, we then mmap() it and rip down the original mapping. There are
+ * clearly many reasons why this might fail; absent a more apt errno (e.g.,
+ * ENOMEM in some cases), we return EINVAL to denote these cases.
+ */
+long
+lx_remap(uintptr_t old_address, uintptr_t old_size,
+ uintptr_t new_size, uintptr_t flags, uintptr_t new_address)
+{
+ int prot = 0, oflags, mflags = 0, len, fd = -1, i, nmap;
+ prmap_t *map = NULL, *maps;
+ long rval;
+ char path[256], buf[MAXPATHLEN + 1];
+ struct stat st;
+ ssize_t n;
+ static uintptr_t pagesize = 0;
+
+ if (pagesize == 0)
+ pagesize = sysconf(_SC_PAGESIZE);
+
+ if ((flags & (LX_MREMAP_MAYMOVE | LX_MREMAP_FIXED)) == LX_MREMAP_FIXED)
+ return (-EINVAL);
+
+ if (old_address & (pagesize - 1))
+ return (-EINVAL);
+
+ if (new_size == 0)
+ return (-EINVAL);
+
+ if ((flags & LX_MREMAP_FIXED) && (new_address & (pagesize - 1)))
+ return (-EINVAL);
+
+ if (new_size == old_size && !(flags & LX_MREMAP_FIXED))
+ return (old_address);
+
+ /*
+ * First consult the anoncache; if we find the segment there, we'll
+ * drop straight into lx_remap_anon() and save ourself the pain of
+ * the /proc reads.
+ */
+ mutex_lock(&lx_remap_anoncache_lock);
+
+ for (i = 0; i < lx_remap_anoncache_nentries; i++) {
+ map = &lx_remap_anoncache[i];
+
+ if (map->pr_vaddr != old_address)
+ continue;
+
+ if (map->pr_size != old_size)
+ continue;
+
+ if (lx_remap_anon(map, NULL,
+ 0, new_size, 0, new_address) == old_address) {
+ mutex_unlock(&lx_remap_anoncache_lock);
+ return (old_address);
+ }
+
+ break;
+ }
+
+ mutex_unlock(&lx_remap_anoncache_lock);
+
+ /*
+ * We need to search the mappings to find our specified mapping. Note
+ * that to perform this search, we use /proc/self/rmap instead of
+ * /proc/self/map. This is to accommodate the case where an mmap()'d
+ * and then ftruncate()'d file is being mremap()'d: rmap will report
+ * the size of the mapping (which we need to validate old_size) where
+ * map will report the smaller of the size of the mapping and the
+ * size of the object. (The "r" in "rmap" denotes "reserved".)
+ */
+ if ((fd = open("/native/proc/self/rmap", O_RDONLY)) == -1 ||
+ fstat(fd, &st) != 0) {
+ if (fd >= 0) {
+ (void) close(fd);
+ }
+ return (-EINVAL);
+ }
+
+ /*
+ * Determine the number of mappings we need to read and allocate
+ * a buffer:
+ */
+ nmap = st.st_size / sizeof (prmap_t);
+ if ((maps = malloc((nmap + 1) * sizeof (prmap_t))) == NULL) {
+ (void) close(fd);
+ return (-EINVAL);
+ }
+
+ /*
+ * Read mappings from the kernel and determine how many complete
+ * mappings were read:
+ */
+ if ((n = read(fd, maps, (nmap + 1) * sizeof (prmap_t))) < 0) {
+ lx_debug("\rread of /proc/self/map failed: %s",
+ strerror(errno));
+ (void) close(fd);
+ rval = -EINVAL;
+ goto out;
+ }
+ (void) close(fd);
+
+ nmap = n / sizeof (prmap_t);
+ lx_debug("\tfound %d mappings", nmap);
+
+ /*
+ * Check if any mappings match our arguments:
+ */
+ for (i = 0; i < nmap; i++) {
+ if (maps[i].pr_vaddr == old_address &&
+ maps[i].pr_size == old_size) {
+ map = &maps[i];
+ break;
+ }
+
+ if (maps[i].pr_vaddr <= old_address &&
+ old_address + old_size < maps[i].pr_vaddr +
+ maps[i].pr_size) {
+ /*
+ * We have a mismatch, but our specified range is
+ * a subset of the actual segment; this is EINVAL.
+ */
+ rval = -EINVAL;
+ goto out;
+ }
+ }
+
+ if (i == nmap) {
+ lx_debug("\tno matching mapping found");
+ rval = -EFAULT;
+ goto out;
+ }
+
+ if (map->pr_mflags & (MA_ISM | MA_SHM)) {
+ /*
+ * If this is either ISM or System V shared memory, we're not
+ * going to remap it.
+ */
+ rval = -EINVAL;
+ goto out;
+ }
+
+ oflags = (map->pr_mflags & MA_WRITE) ? O_RDWR : O_RDONLY;
+
+ if (map->pr_mflags & MA_ANON) {
+ /*
+ * This is an anonymous mapping -- which is the one case in
+ * which we perform something that approaches a true remap.
+ */
+ rval = lx_remap_anon(map, maps, nmap,
+ new_size, flags, new_address);
+ goto out;
+ }
+
+ if (!(flags & LX_MREMAP_MAYMOVE)) {
+ /*
+ * If we're not allowed to move this mapping, we're going to
+ * act as if we can't expand it.
+ */
+ rval = -ENOMEM;
+ goto out;
+ }
+
+ if (!(map->pr_mflags & MA_SHARED)) {
+ /*
+ * If this is a private mapping, we're not going to remap it.
+ */
+ rval = -EINVAL;
+ goto out;
+ }
+
+ (void) snprintf(path, sizeof (path),
+ "/native/proc/self/path/%s", map->pr_mapname);
+
+ if ((len = readlink(path, buf, sizeof (buf))) == -1 ||
+ len == sizeof (buf)) {
+ /*
+ * If we failed to read the link, the path might not exist.
+ */
+ rval = -EINVAL;
+ goto out;
+ }
+
+ buf[len] = '\0';
+
+ if ((fd = open(buf, oflags)) == -1) {
+ /*
+ * If we failed to open the object, it may be because it's
+ * not named (i.e., it's anonymous) or because we somehow
+ * don't have permissions. Either way, we're going to kick
+ * it back with EINVAL.
+ */
+ rval = -EINVAL;
+ goto out;
+ }
+
+ if (map->pr_mflags & MA_WRITE)
+ prot |= PROT_WRITE;
+
+ if (map->pr_mflags & MA_READ)
+ prot |= PROT_READ;
+
+ if (map->pr_mflags & MA_EXEC)
+ prot |= PROT_EXEC;
+
+ mflags = MAP_SHARED;
+
+ if (new_address != NULL && (flags & LX_MREMAP_FIXED)) {
+ mflags |= MAP_FIXED;
+ } else {
+ new_address = NULL;
+ }
+
+ rval = (long)mmap((void *)new_address, new_size,
+ prot, mflags, fd, map->pr_offset);
+ (void) close(fd);
+
+ if ((void *)rval == MAP_FAILED) {
+ rval = -ENOMEM;
+ goto out;
+ }
+
+ /*
+ * Our mapping succeeded; we're now going to rip down the old mapping.
+ */
+ (void) munmap((void *)old_address, old_size);
+out:
+ free(maps);
+ return (rval);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/misc.c b/usr/src/lib/brand/lx/lx_brand/common/misc.c
new file mode 100644
index 0000000000..e8d60c4696
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/misc.c
@@ -0,0 +1,795 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <alloca.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <strings.h>
+#include <macros.h>
+#include <sys/brand.h>
+#include <sys/reboot.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/sysmacros.h>
+#include <sys/systeminfo.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/shm.h>
+#include <sys/socket.h>
+#include <sys/lx_types.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_fcntl.h>
+#include <sys/lx_thread.h>
+#include <sys/inotify.h>
+#include <sys/eventfd.h>
+#include <thread.h>
+#include <unistd.h>
+#include <libintl.h>
+#include <zone.h>
+#include <priv.h>
+#include <procfs.h>
+#include <lx_syscall.h>
+#include <sys/lx_socket.h>
+
+extern int sethostname(char *, int);
+
+/* ARGUSED */
+long
+lx_rename(uintptr_t p1, uintptr_t p2)
+{
+ int ret;
+
+ ret = rename((const char *)p1, (const char *)p2);
+
+ if (ret < 0) {
+ /*
+ * If rename(2) failed and we're in install mode, return
+ * success if the the reason we failed was either because the
+ * source file didn't actually exist or if it was because we
+ * tried to rename it to be the name of a device currently in
+ * use (resulting in an EBUSY.)
+ *
+ * To help install along further, if the failure was due
+ * to an EBUSY, delete the original file so we don't leave
+ * extra files lying around.
+ */
+ if (lx_install != 0) {
+ if (errno == ENOENT)
+ return (0);
+
+ if (errno == EBUSY) {
+ (void) unlink((const char *)p1);
+ return (0);
+ }
+ }
+
+ return (-errno);
+ }
+
+ return (0);
+}
+
+long
+lx_renameat(uintptr_t ext1, uintptr_t p1, uintptr_t ext2, uintptr_t p2)
+{
+ int ret;
+ int atfd1 = (int)ext1;
+ int atfd2 = (int)ext2;
+
+ if (atfd1 == LX_AT_FDCWD)
+ atfd1 = AT_FDCWD;
+
+ if (atfd2 == LX_AT_FDCWD)
+ atfd2 = AT_FDCWD;
+
+ ret = renameat(atfd1, (const char *)p1, atfd2, (const char *)p2);
+
+ if (ret < 0) {
+ /* see lx_rename() for why we check lx_install */
+ if (lx_install != 0) {
+ if (errno == ENOENT)
+ return (0);
+
+ if (errno == EBUSY) {
+ (void) unlinkat(ext1, (const char *)p1, 0);
+ return (0);
+ }
+ }
+
+ return (-errno);
+ }
+
+ return (0);
+}
+
+/*ARGSUSED*/
+long
+lx_reboot(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
+{
+ int magic = (int)p1;
+ int magic2 = (int)p2;
+ uint_t flag = (int)p3;
+ int rc;
+
+ if (magic != LINUX_REBOOT_MAGIC1)
+ return (-EINVAL);
+ if (magic2 != LINUX_REBOOT_MAGIC2 && magic2 != LINUX_REBOOT_MAGIC2A &&
+ magic2 != LINUX_REBOOT_MAGIC2B && magic2 != LINUX_REBOOT_MAGIC2C &&
+ magic2 != LINUX_REBOOT_MAGIC2D)
+ return (-EINVAL);
+
+ if (geteuid() != 0)
+ return (-EPERM);
+
+ switch (flag) {
+ case LINUX_REBOOT_CMD_CAD_ON:
+ case LINUX_REBOOT_CMD_CAD_OFF:
+ /* ignored */
+ rc = 0;
+ break;
+ case LINUX_REBOOT_CMD_POWER_OFF:
+ case LINUX_REBOOT_CMD_HALT:
+ rc = reboot(RB_HALT, NULL);
+ break;
+ case LINUX_REBOOT_CMD_RESTART:
+ case LINUX_REBOOT_CMD_RESTART2:
+ /* RESTART2 may need more work */
+ lx_msg("Restarting system.\n");
+ rc = reboot(RB_AUTOBOOT, NULL);
+ break;
+ default:
+ return (-EINVAL);
+ }
+
+ return ((rc == -1) ? -errno : rc);
+}
+
+/*
+ * {get,set}groups16() - Handle the conversion between 16-bit Linux gids and
+ * 32-bit illumos gids.
+ */
+long
+lx_getgroups16(uintptr_t p1, uintptr_t p2)
+{
+ int count = (int)p1;
+ lx_gid16_t *grouplist = (lx_gid16_t *)p2;
+ gid_t *grouplist32;
+ int ret;
+ int i;
+
+ if (count < 0)
+ return (-EINVAL);
+
+ grouplist32 = malloc(count * sizeof (gid_t));
+ if (grouplist32 == NULL && count > 0) {
+ free(grouplist32);
+ return (-ENOMEM);
+ }
+ if ((ret = getgroups(count, grouplist32)) < 0) {
+ free(grouplist32);
+ return (-errno);
+ }
+
+ /* we must not modify the list if the incoming count was 0 */
+ if (count > 0) {
+ for (i = 0; i < ret; i++)
+ grouplist[i] = LX_GID32_TO_GID16(grouplist32[i]);
+ }
+
+ free(grouplist32);
+ return (ret);
+}
+
+long
+lx_setgroups16(uintptr_t p1, uintptr_t p2)
+{
+ long rv;
+ int count = (int)p1;
+ lx_gid16_t *grouplist = NULL;
+ gid_t *grouplist32 = NULL;
+ int i;
+
+ if ((grouplist = malloc(count * sizeof (lx_gid16_t))) == NULL) {
+ return (-ENOMEM);
+ }
+ if (uucopy((void *)p2, grouplist, count * sizeof (lx_gid16_t)) != 0) {
+ free(grouplist);
+ return (-EFAULT);
+ }
+
+ grouplist32 = malloc(count * sizeof (gid_t));
+ if (grouplist32 == NULL) {
+ free(grouplist);
+ return (-ENOMEM);
+ }
+ for (i = 0; i < count; i++)
+ grouplist32[i] = LX_GID16_TO_GID32(grouplist[i]);
+
+ /* order matters here to get the correct errno back */
+ if (count > NGROUPS_MAX_DEFAULT) {
+ free(grouplist);
+ free(grouplist32);
+ return (-EINVAL);
+ }
+
+ rv = setgroups(count, grouplist32);
+
+ free(grouplist);
+ free(grouplist32);
+
+ return (rv != 0 ? -errno : 0);
+}
+
+/*
+ * mknod() - Since we don't have the SYS_CONFIG privilege within a zone, the
+ * only mode we have to support is S_IFIFO. We also have to distinguish between
+ * an invalid type and insufficient privileges.
+ */
+#define LX_S_IFMT 0170000
+#define LX_S_IFDIR 0040000
+#define LX_S_IFCHR 0020000
+#define LX_S_IFBLK 0060000
+#define LX_S_IFREG 0100000
+#define LX_S_IFIFO 0010000
+#define LX_S_IFLNK 0120000
+#define LX_S_IFSOCK 0140000
+
+/*ARGSUSED*/
+long
+lx_mknod(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ char *path = (char *)p1;
+ lx_dev_t lx_dev = (lx_dev_t)p3;
+ struct sockaddr_un sockaddr;
+ struct stat statbuf;
+ mode_t mode, type;
+ dev_t dev;
+ int fd;
+
+ type = ((mode_t)p2 & LX_S_IFMT);
+ mode = ((mode_t)p2 & 07777);
+
+ switch (type) {
+ case 0:
+ case LX_S_IFREG:
+ /* create a regular file */
+ if (stat(path, &statbuf) == 0)
+ return (-EEXIST);
+
+ if (errno != ENOENT)
+ return (-errno);
+
+ if ((fd = creat(path, mode)) < 0)
+ return (-errno);
+
+ (void) close(fd);
+ return (0);
+
+ case LX_S_IFSOCK:
+ /*
+ * Create a UNIX domain socket.
+ *
+ * Most programmers aren't even aware you can do this.
+ *
+ * Note you can also do this via illumos' mknod(2), but
+ * Linux allows anyone who can create a UNIX domain
+ * socket via bind(2) to create one via mknod(2);
+ * illumos requires the caller to be privileged.
+ */
+ if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0)
+ return (-errno);
+
+ if (stat(path, &statbuf) == 0)
+ return (-EEXIST);
+
+ if (errno != ENOENT)
+ return (-errno);
+
+ if (uucopy(path, &sockaddr.sun_path,
+ sizeof (sockaddr.sun_path)) < 0)
+ return (-errno);
+
+ /* assure NULL termination of sockaddr.sun_path */
+ sockaddr.sun_path[sizeof (sockaddr.sun_path) - 1] = '\0';
+ sockaddr.sun_family = AF_UNIX;
+
+ if (bind(fd, (struct sockaddr *)&sockaddr,
+ strlen(sockaddr.sun_path) +
+ sizeof (sockaddr.sun_family)) < 0)
+ return (-errno);
+
+ (void) close(fd);
+ return (0);
+
+ case LX_S_IFIFO:
+ dev = 0;
+ break;
+
+ case LX_S_IFCHR:
+ case LX_S_IFBLK:
+ /*
+ * The "dev" RPM package wants to create all possible Linux
+ * device nodes, so just report its mknod()s as having
+ * succeeded if we're in install mode.
+ */
+ if (lx_install != 0) {
+ lx_debug("lx_mknod: install mode spoofed creation of "
+ "Linux device [%lld, %lld]\n",
+ LX_GETMAJOR(lx_dev), LX_GETMINOR(lx_dev));
+
+ return (0);
+ }
+
+ dev = makedevice(LX_GETMAJOR(lx_dev), LX_GETMINOR(lx_dev));
+ break;
+
+ default:
+ return (-EINVAL);
+ }
+
+ return (mknod(path, mode | type, dev) ? -errno : 0);
+}
+
+long
+lx_sethostname(uintptr_t p1, uintptr_t p2)
+{
+ char *name = (char *)p1;
+ int len = (size_t)p2;
+
+ return (sethostname(name, len) ? -errno : 0);
+}
+
+long
+lx_setdomainname(uintptr_t p1, uintptr_t p2)
+{
+ char *name = (char *)p1;
+ int len = (size_t)p2;
+ long rval;
+
+ if (len < 0 || len >= LX_SYS_UTS_LN)
+ return (-EINVAL);
+
+ rval = sysinfo(SI_SET_SRPC_DOMAIN, name, len);
+
+ return ((rval < 0) ? -errno : 0);
+}
+
+long
+lx_execve(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ char *filename = (char *)p1;
+ char **argv = (char **)p2;
+ char **envp = (char **)p3;
+ char *nullist[] = { NULL };
+
+ if (argv == NULL)
+ argv = nullist;
+
+ /*
+ * Emulate PR_SET_KEEPCAPS which is reset on execve. If this is not done
+ * the emulated capabilities could be reduced more than expected.
+ */
+ (void) setpflags(PRIV_AWARE_RESET, 1);
+
+ /* This is a normal exec call. */
+ (void) execve(filename, argv, envp);
+
+ return (-errno);
+}
+
+long
+lx_setgroups(uintptr_t p1, uintptr_t p2)
+{
+ int ng = (int)p1;
+ gid_t *glist = NULL;
+ int i, r;
+
+ lx_debug("\tlx_setgroups(%d, 0x%p", ng, p2);
+
+ if (ng > 0) {
+ if ((glist = (gid_t *)malloc(ng * sizeof (gid_t))) == NULL)
+ return (-ENOMEM);
+
+ if (uucopy((void *)p2, glist, ng * sizeof (gid_t)) != 0) {
+ free(glist);
+ return (-errno);
+ }
+
+ /*
+ * Linux doesn't check the validity of the group IDs, but
+ * illumos does. Change any invalid group IDs to a known, valid
+ * value (yuck).
+ */
+ for (i = 0; i < ng; i++) {
+ if (glist[i] > MAXUID)
+ glist[i] = MAXUID;
+ }
+ }
+
+ /* order matters here to get the correct errno back */
+ if (ng > NGROUPS_MAX_DEFAULT) {
+ free(glist);
+ return (-EINVAL);
+ }
+
+ r = syscall(SYS_brand, B_HELPER_SETGROUPS, ng, glist);
+
+ free(glist);
+ return ((r == -1) ? -errno : r);
+}
+
+/*
+ * For syslog(), as there is no kernel and nothing to log, we simply emulate a
+ * kernel cyclic buffer (LOG_BUF_LEN) of 0 bytes, only handling errors for bad
+ * input. All actions except 3 and 10 require CAP_SYS_ADMIN or CAP_SYSLOG, in
+ * lieu of full capabilities support for now we just perform an euid check.
+ */
+long
+lx_syslog(int type, char *bufp, int len)
+{
+ if (type < 0 || type > 10)
+ return (-EINVAL);
+
+ if ((type != 3 && type != 10) && (geteuid() != 0))
+ return (-EPERM);
+
+ if ((type >= 2 && type <= 4) && (bufp == NULL || len < 0))
+ return (-EINVAL);
+
+ if ((type == 8) && (len < 1 || len > 8))
+ return (-EINVAL);
+
+ return (0);
+}
+
+/*
+ * The following are pass-through functions but we need to return the correct
+ * long so that the errno propagates back to the Linux code correctly.
+ */
+
+long
+lx_alarm(unsigned int seconds)
+{
+ int r;
+
+ r = alarm(seconds);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_close(int fildes)
+{
+ int r;
+
+ r = close(fildes);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_chdir(const char *path)
+{
+ int r;
+
+ r = chdir(path);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_chroot(const char *path)
+{
+ int r;
+
+ r = chroot(path);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_creat(const char *path, mode_t mode)
+{
+ int r;
+
+ r = creat(path, mode);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_dup(int fildes)
+{
+ int r;
+
+ r = dup(fildes);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_fchdir(int fildes)
+{
+ int r;
+
+ r = fchdir(fildes);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_getgid(void)
+{
+ int r;
+
+ r = getgid();
+ return (r);
+}
+
+long
+lx_getgroups(int gidsetsize, gid_t *grouplist)
+{
+ int r;
+
+ r = getgroups(gidsetsize, grouplist);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_getitimer(int which, struct itimerval *value)
+{
+ int r;
+
+ r = getitimer(which, value);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_getuid(void)
+{
+ int r;
+
+ r = getuid();
+ return (r);
+}
+
+long
+lx_inotify_add_watch(int fd, const char *pathname, uint32_t mask)
+{
+ int r;
+
+ r = inotify_add_watch(fd, pathname, mask);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_inotify_init(void)
+{
+ int r;
+
+ r = inotify_init();
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_inotify_init1(int flags)
+{
+ int r;
+
+ r = inotify_init1(flags);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_inotify_rm_watch(int fd, int wd)
+{
+ int r;
+
+ r = inotify_rm_watch(fd, wd);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_mincore(caddr_t addr, size_t len, char *vec)
+{
+ int r;
+
+ r = mincore(addr, len, vec);
+ if (r == -1 && errno == EINVAL) {
+ /*
+ * LTP mincore01 expects mincore with a huge len to fail with
+ * ENOMEM on a modern kernel but on Linux 2.6.11 and earlier it
+ * returns EINVAL.
+ */
+ if (strcmp(lx_release, "2.6.11") > 0 && (long)len < 0)
+ errno = ENOMEM;
+ }
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_munmap(void *addr, size_t len)
+{
+ int r;
+
+ r = munmap(addr, len);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_nice(int incr)
+{
+ int r;
+
+ r = nice(incr);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_pause(void)
+{
+ int r;
+
+ r = pause();
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_setgid(gid_t gid)
+{
+ int r;
+
+ r = setgid(gid);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_setuid(uid_t uid)
+{
+ int r;
+
+ r = setuid(uid);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_setregid(gid_t rgid, gid_t egid)
+{
+ int r;
+
+ r = setregid(rgid, egid);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_setreuid(uid_t ruid, uid_t euid)
+{
+ int r;
+
+ r = setreuid(ruid, euid);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_shmdt(char *shmaddr)
+{
+ int r;
+
+ r = shmdt(shmaddr);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_stime(const time_t *tp)
+{
+ int r;
+
+ r = stime(tp);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_symlink(const char *name1, const char *name2)
+{
+ int r;
+
+ r = symlink(name1, name2);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_umask(mode_t cmask)
+{
+ int r;
+
+ r = umask(cmask);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_utimes(const char *path, const struct timeval times[2])
+{
+ int r;
+
+ r = utimes(path, times);
+ return ((r == -1) ? -errno : r);
+}
+
+long
+lx_vhangup(void)
+{
+ if (geteuid() != 0)
+ return (-EPERM);
+
+ vhangup();
+
+ return (0);
+}
+
+long
+lx_eventfd(unsigned int initval)
+{
+ return (lx_eventfd2(initval, 0));
+}
+
+long
+lx_eventfd2(unsigned int initval, int flags)
+{
+ int r = eventfd(initval, flags);
+
+ /*
+ * eventfd(3C) may fail with ENOENT if /dev/eventfd is not available.
+ * It is less jarring to Linux programs to tell them that the system
+ * call is not supported than to report an error number they are not
+ * expecting.
+ */
+ if (r == -1 && errno == ENOENT)
+ return (-ENOTSUP);
+
+ return (r == -1 ? -errno : r);
+}
+
+/*
+ * We lucked out here. Linux and Solaris have exactly the same
+ * rusage structures.
+ */
+long
+lx_getrusage(uintptr_t p1, uintptr_t p2)
+{
+ int who = (int)p1;
+ struct rusage *rup = (struct rusage *)p2;
+ int rv, swho;
+
+ if (who == LX_RUSAGE_SELF)
+ swho = RUSAGE_SELF;
+ else if (who == LX_RUSAGE_CHILDREN)
+ swho = RUSAGE_CHILDREN;
+ else if (who == LX_RUSAGE_THREAD)
+ swho = RUSAGE_LWP;
+ else
+ return (-EINVAL);
+
+ rv = getrusage(swho, rup);
+
+ return (rv < 0 ? -errno : 0);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/module.c b/usr/src/lib/brand/lx/lx_brand/common/module.c
new file mode 100644
index 0000000000..78a593712f
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/module.c
@@ -0,0 +1,90 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * We don't support Linux modules, but we have to emulate enough of the system
+ * calls to show that we don't have any modules installed.
+ */
+
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+
+/*
+ * For query_module(), we provide an empty list of modules, and return ENOENT
+ * on any request for a specific module.
+ */
+#define LX_QM_MODULES 1
+#define LX_QM_DEPS 2
+#define LX_QM_REFS 3
+#define LX_QM_SYMBOLS 4
+#define LX_QM_INFO 5
+
+/*ARGSUSED*/
+long
+lx_query_module(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
+ uintptr_t p5)
+{
+ /*
+ * parameter p1 is the 'name' argument.
+ */
+ int which = (int)p2;
+ char *buf = (char *)p3;
+ size_t bufsize = (size_t)p4;
+ size_t *ret = (size_t *)p5;
+
+ switch (which) {
+ case 0:
+ /*
+ * Special case: always return 0
+ */
+ return (0);
+
+ case LX_QM_MODULES:
+ /*
+ * Generate an empty list of modules.
+ */
+ if (bufsize && buf)
+ buf[0] = '\0';
+ if (ret)
+ *ret = 0;
+ return (0);
+
+ case LX_QM_DEPS:
+ case LX_QM_REFS:
+ case LX_QM_SYMBOLS:
+ case LX_QM_INFO:
+ /*
+ * Any requests for specific module information return ENOENT.
+ */
+ return (-ENOENT);
+
+ default:
+ return (-EINVAL);
+ }
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/mount.c b/usr/src/lib/brand/lx/lx_brand/common/mount.c
new file mode 100644
index 0000000000..2b2fc951e5
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/mount.c
@@ -0,0 +1,1071 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <alloca.h>
+#include <assert.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <signal.h>
+#include <string.h>
+#include <strings.h>
+#include <nfs/mount.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+#include <sys/lx_autofs.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_mount.h>
+
+/*
+ * support definitions
+ */
+union fh_buffer {
+ struct nfs_fid fh2;
+ struct nfs_fh3 fh3;
+ char fh_data[NFS3_FHSIZE + 2];
+};
+
+typedef enum mount_opt_type {
+ MOUNT_OPT_INVALID = 0,
+ MOUNT_OPT_NORMAL = 1, /* option value: none */
+ MOUNT_OPT_UINT = 2, /* option value: unsigned int */
+ MOUNT_OPT_BYTESIZE = 3 /* option value: byte size, e.g. 25m */
+} mount_opt_type_t;
+
+typedef struct mount_opt {
+ char *mo_name;
+ mount_opt_type_t mo_type;
+} mount_opt_t;
+
+
+/*
+ * Globals
+ */
+mount_opt_t lofs_options[] = {
+ { NULL, MOUNT_OPT_INVALID }
+};
+
+mount_opt_t lx_proc_options[] = {
+ { NULL, MOUNT_OPT_INVALID }
+};
+
+mount_opt_t lx_sysfs_options[] = {
+ { NULL, MOUNT_OPT_INVALID }
+};
+
+mount_opt_t lx_tmpfs_options[] = {
+ { "size", MOUNT_OPT_BYTESIZE },
+ { "mode", MOUNT_OPT_UINT },
+ { "uid", MOUNT_OPT_UINT },
+ { "gid", MOUNT_OPT_UINT },
+ { NULL, MOUNT_OPT_INVALID }
+};
+
+mount_opt_t lx_autofs_options[] = {
+ { LX_MNTOPT_FD, MOUNT_OPT_UINT },
+ { LX_MNTOPT_PGRP, MOUNT_OPT_UINT },
+ { LX_MNTOPT_MINPROTO, MOUNT_OPT_UINT },
+ { LX_MNTOPT_MAXPROTO, MOUNT_OPT_UINT },
+ { LX_MNTOPT_INDIRECT, MOUNT_OPT_NORMAL },
+ { LX_MNTOPT_DIRECT, MOUNT_OPT_NORMAL },
+ { LX_MNTOPT_OFFSET, MOUNT_OPT_NORMAL },
+ { NULL, MOUNT_OPT_INVALID }
+};
+
+
+/*
+ * i_lx_opt_verify() - Check the mount options.
+ *
+ * You might wonder why we're being so strict about the mount options
+ * we allow. The reason is that normally all mount option verification
+ * is done by the Solaris userland mount command. Once mount options
+ * are passed to the kernel, invalid options are simply ignored. So
+ * if we actually want to catch requests for functionality that we
+ * don't support, or if we want to make sure that we don't randomly
+ * enable options that we haven't check to make sure they have the
+ * same syntax on Linux and Solaris, we need to reject any options
+ * we don't know to be ok here.
+ */
+static int
+i_lx_opt_verify(char *opts, mount_opt_t *mop)
+{
+ int opts_len = strlen(opts);
+ char *opts_tmp, *opt;
+ int opt_len, i;
+
+ assert((opts != NULL) && (mop != NULL));
+
+ /* If no options were specified, there's no problem. */
+ if (opts_len == 0) {
+ errno = 0;
+ return (0);
+ }
+
+ /* If no options are allowed, fail. */
+ if (mop[0].mo_name == NULL) {
+ errno = ENOTSUP;
+ return (-1);
+ }
+
+ /* Don't accept leading or trailing ','. */
+ if ((opts[0] == ',') || (opts[opts_len] == ',')) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ /* Don't accept sequential ','. */
+ for (i = 1; i < opts_len; i++) {
+ if ((opts[i - 1] == ',') && (opts[i] == ',')) {
+ errno = EINVAL;
+ return (-1);
+ }
+ }
+
+ /*
+ * We're going to use strtok() which modifies the target
+ * string so make a temporary copy.
+ */
+ opts_tmp = SAFE_ALLOCA(opts_len);
+ if (opts_tmp == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+ bcopy(opts, opts_tmp, opts_len + 1);
+
+ /* Verify each prop one at a time. */
+ opt = strtok(opts_tmp, ",");
+ opt_len = strlen(opt);
+ for (;;) {
+
+ /* Check for matching option/value pair. */
+ for (i = 0; mop[i].mo_name != NULL; i++) {
+ char *ovalue;
+ int ovalue_len, mo_len;
+
+ /* If the options is too short don't bother comparing */
+ mo_len = strlen(mop[i].mo_name);
+ if (opt_len < mo_len) {
+ /* Keep trying to find a match. */
+ continue;
+ }
+
+ /* Compare the option to an allowed option. */
+ if (strncmp(mop[i].mo_name, opt, mo_len) != 0) {
+ /* Keep trying to find a match. */
+ continue;
+ }
+
+ if (mop[i].mo_type == MOUNT_OPT_NORMAL) {
+ /* The option doesn't take a value. */
+ if (opt_len == mo_len) {
+ /* This option is ok. */
+ break;
+ } else {
+ /* Keep trying to find a match. */
+ continue;
+ }
+ }
+
+ /* This options takes a value. */
+ if ((opt_len == mo_len) || (opt[mo_len] != '=')) {
+ /* Keep trying to find a match. */
+ continue;
+ }
+
+ /* We have an option match. Verify option value. */
+ ovalue = &opt[mo_len] + 1;
+ ovalue_len = strlen(ovalue);
+
+ /* Value can't be zero length string. */
+ if (ovalue_len == 0) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if (mop[i].mo_type == MOUNT_OPT_UINT) {
+ int j;
+ /* Verify that value is an unsigned int. */
+ for (j = 0; j < ovalue_len; j++) {
+ if (!isdigit(ovalue[j])) {
+ errno = EINVAL;
+ return (-1);
+ }
+ }
+ } else if (mop[i].mo_type == MOUNT_OPT_BYTESIZE) {
+ int j;
+ int stage = 0;
+ int suffix;
+
+ /*
+ * Verify that the value is an unsigned integer
+ * that ends in a magnitude suffix (i.e. case
+ * insensitive 'k' 'm' or 'g') or a '%'
+ * character.
+ */
+ for (j = 0; j < ovalue_len; j++) {
+ switch (stage) {
+ case 0:
+ /*
+ * Look for at least one digit.
+ */
+ if (!isdigit(ovalue[j])) {
+ errno = EINVAL;
+ return (-1);
+ }
+ stage = 1;
+ break;
+ case 1:
+ /*
+ * Allow an unlimited number of
+ * digits.
+ */
+ if (isdigit(ovalue[j])) {
+ break;
+ }
+ /*
+ * Allow one (valid) byte
+ * magnitude character.
+ */
+ suffix = tolower(ovalue[j]);
+ if (suffix == 'k' ||
+ suffix == 'm' ||
+ suffix == 'g' ||
+ suffix == '\%') {
+ stage = 2;
+ break;
+ }
+ errno = EINVAL;
+ return (-1);
+ case 2:
+ /*
+ * Invalid trailing characters.
+ */
+ errno = EINVAL;
+ return (-1);
+ }
+ }
+
+ if (stage < 1) {
+ errno = EINVAL;
+ return (-1);
+ }
+ } else {
+ /* Unknown option type specified. */
+ assert(0);
+ }
+
+ /* The option is ok. */
+ break;
+ }
+
+ /* If there were no matches this is an unsupported option. */
+ if (mop[i].mo_name == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ /* This option is ok, move onto the next option. */
+ if ((opt = strtok(NULL, ",")) == NULL)
+ break;
+ opt_len = strlen(opt);
+ };
+
+ /* We verified all the options. */
+ return (0);
+}
+
+/*
+ * Remove an option from the string and save it in the provided buffer.
+ * The option string should have already been verified as valid.
+ * Return 0 if not present, -1 if error, and 1 if present and fine.
+ */
+static int
+opt_rm(char *opts, char *rmopt, char *retstr, int retlen)
+{
+ int opts_len = strlen(opts);
+ char *optstart, *optend;
+ int optlen;
+
+ assert((opts != NULL) && (rmopt != NULL));
+
+ retstr[0] = '\0';
+
+ /* If no options were specified, there's no problem. */
+ if (opts_len == 0)
+ return (0);
+
+ if ((optstart = strstr(opts, rmopt)) == NULL)
+ return (0);
+
+ for (optend = optstart; *optend != ',' && *optend != '\0'; optend++)
+ ;
+
+ optlen = optend - optstart;
+ if (optlen >= retlen)
+ return (-1);
+ strncpy(retstr, optstart, optlen);
+ retstr[optlen] = '\0';
+
+ if (*optend == ',')
+ optend++;
+
+ optlen = strlen(optend) + 1;
+ bcopy(optend, optstart, optlen);
+
+ if (*optstart == '\0' && optstart != opts) {
+ /* removed last opt and it had a preceeding opt, remove comma */
+ *(optstart - 1) = '\0';
+ }
+
+ return (1);
+}
+
+static int
+opt_id_val(char *opt, int *valp)
+{
+ char *vp;
+ long lval;
+
+ if ((vp = strchr(opt, '=')) == NULL)
+ return (-1);
+
+ vp++;
+ if (!isdigit(*vp))
+ return (-1);
+
+ lval = strtol(vp, &vp, 10);
+ if (*vp != '\0' || lval > INT_MAX)
+ return (-1);
+
+ *valp = (int)lval;
+ return (0);
+}
+
+static int
+i_add_option(char *option, char *buf, size_t buf_size)
+{
+ char *fmt_str = NULL;
+
+ assert((option != NULL) && (strlen(option) > 0));
+ assert((buf != NULL) && (buf_size > 0));
+
+ if (buf[0] == '\0') {
+ fmt_str = "%s";
+ } else {
+ fmt_str = ",%s";
+ }
+
+ buf_size -= strlen(buf);
+ buf += strlen(buf);
+
+ /*LINTED*/
+ if (snprintf(buf, buf_size, fmt_str, option) > (buf_size - 1))
+ return (-EOVERFLOW);
+ return (0);
+}
+
+static int
+i_add_option_int(char *option, int val, char *buf, size_t buf_size)
+{
+ char *fmt_str = NULL;
+
+ assert((option != NULL) && (strlen(option) > 0));
+ assert((buf != NULL) && (buf_size > 0));
+
+ if (buf[0] == '\0') {
+ fmt_str = "%s=%d";
+ } else {
+ fmt_str = ",%s=%d";
+ }
+
+ buf_size -= strlen(buf);
+ buf += strlen(buf);
+
+ /*LINTED*/
+ if (snprintf(buf, buf_size, fmt_str, option, val) > (buf_size - 1))
+ return (-EOVERFLOW);
+ return (0);
+}
+
+static int
+i_make_nfs_args(lx_nfs_mount_data_t *lx_nmd, struct nfs_args *nfs_args,
+ struct netbuf *nfs_args_addr, struct knetconfig *nfs_args_knconf,
+ union fh_buffer *nfs_args_fh, struct sec_data *nfs_args_secdata,
+ char *fstype, char *options, int options_size)
+{
+ struct stat statbuf;
+ int i, rv, use_tcp;
+
+ /* Sanity check the incomming Linux request. */
+ if ((lx_nmd->nmd_rsize < 0) || (lx_nmd->nmd_wsize < 0) ||
+ (lx_nmd->nmd_timeo < 0) || (lx_nmd->nmd_retrans < 0) ||
+ (lx_nmd->nmd_acregmin < 0) || (lx_nmd->nmd_acregmax < 0) ||
+ (lx_nmd->nmd_acdirmax < 0)) {
+ return (-EINVAL);
+ }
+
+ /*
+ * Additional sanity checks of incomming request.
+ *
+ * Some of the sanity checks below should probably return
+ * EINVAL (or some other error code) instead or ENOTSUP,
+ * but without experiminting on Linux to see how it
+ * deals with certain strange values there is no way
+ * to really know what we should return, hence we return
+ * ENOTSUP to tell us that eventually if we see some
+ * application hitting the problem we can go to a real
+ * Linux system, figure out how it deals with the situation
+ * and update our code to handle it in the same fashion.
+ */
+ if (lx_nmd->nmd_version != 4) {
+ lx_unsupported("unsupported nfs mount request, "
+ "unrecognized NFS mount structure: %d\n",
+ lx_nmd->nmd_version);
+ return (-ENOTSUP);
+ }
+ if ((lx_nmd->nmd_flags & ~LX_NFS_MOUNT_SUPPORTED) != 0) {
+ lx_unsupported("unsupported nfs mount request, "
+ "flags: 0x%x\n", lx_nmd->nmd_flags);
+ return (-ENOTSUP);
+ }
+ if (lx_nmd->nmd_addr.sin_family != AF_INET) {
+ lx_unsupported("unsupported nfs mount request, "
+ "transport address family: 0x%x\n",
+ lx_nmd->nmd_addr.sin_family);
+ return (-ENOTSUP);
+ }
+ for (i = 0; i < LX_NMD_MAXHOSTNAMELEN; i++) {
+ if (lx_nmd->nmd_hostname[i] == '\0')
+ break;
+ }
+ if (i == 0) {
+ lx_unsupported("unsupported nfs mount request, "
+ "no hostname specified\n");
+ return (-ENOTSUP);
+ }
+ if (i == LX_NMD_MAXHOSTNAMELEN) {
+ lx_unsupported("unsupported nfs mount request, "
+ "hostname not terminated\n");
+ return (-ENOTSUP);
+ }
+ if (lx_nmd->nmd_namlen < i) {
+ lx_unsupported("unsupported nfs mount request, "
+ "invalid namlen value: 0x%x\n", lx_nmd->nmd_namlen);
+ return (-ENOTSUP);
+ }
+ if (lx_nmd->nmd_bsize != 0) {
+ lx_unsupported("unsupported nfs mount request, "
+ "bsize value: 0x%x\n", lx_nmd->nmd_bsize);
+ return (-ENOTSUP);
+ }
+
+ /* Initialize and clear the output structure pointers passed in. */
+ bzero(nfs_args, sizeof (*nfs_args));
+ bzero(nfs_args_addr, sizeof (*nfs_args_addr));
+ bzero(nfs_args_knconf, sizeof (*nfs_args_knconf));
+ bzero(nfs_args_fh, sizeof (*nfs_args_fh));
+ bzero(nfs_args_secdata, sizeof (*nfs_args_secdata));
+ nfs_args->addr = nfs_args_addr;
+ nfs_args->knconf = nfs_args_knconf;
+ nfs_args->fh = (caddr_t)nfs_args_fh;
+ nfs_args->nfs_ext_u.nfs_extB.secdata = nfs_args_secdata;
+
+ /* Check if we're using tcp. */
+ use_tcp = (lx_nmd->nmd_flags & LX_NFS_MOUNT_TCP) ? 1 : 0;
+
+ /*
+ * These seem to be the default flags used by Solaris for v2 and v3
+ * nfs mounts.
+ *
+ * Don't bother with NFSMNT_TRYRDMA since we always specify a
+ * transport (either udp or tcp).
+ */
+ nfs_args->flags = NFSMNT_NEWARGS | NFSMNT_KNCONF | NFSMNT_INT |
+ NFSMNT_HOSTNAME;
+
+ /* Translate some Linux mount flags into Solaris mount flags. */
+ if (lx_nmd->nmd_flags & LX_NFS_MOUNT_SOFT)
+ nfs_args->flags |= NFSMNT_SOFT;
+ if (lx_nmd->nmd_flags & LX_NFS_MOUNT_INTR)
+ nfs_args->flags |= NFSMNT_INT;
+ if (lx_nmd->nmd_flags & LX_NFS_MOUNT_POSIX)
+ nfs_args->flags |= NFSMNT_POSIX;
+ if (lx_nmd->nmd_flags & LX_NFS_MOUNT_NOCTO)
+ nfs_args->flags |= NFSMNT_NOCTO;
+ if (lx_nmd->nmd_flags & LX_NFS_MOUNT_NOAC)
+ nfs_args->flags |= NFSMNT_NOAC;
+ if (lx_nmd->nmd_flags & LX_NFS_MOUNT_NONLM)
+ nfs_args->flags |= NFSMNT_LLOCK;
+
+ if ((lx_nmd->nmd_flags & LX_NFS_MOUNT_VER3) != 0) {
+ (void) strcpy(fstype, "nfs3");
+ if ((rv = i_add_option_int("vers", 3,
+ options, options_size)) != 0)
+ return (rv);
+
+ if (lx_nmd->nmd_root.lx_fh3_length >
+ sizeof (nfs_args_fh->fh3.fh3_u.data)) {
+ lx_unsupported("unsupported nfs mount request, "
+ "nfs file handle length: 0x%x\n",
+ lx_nmd->nmd_root.lx_fh3_length);
+ return (-ENOTSUP);
+ }
+
+ /* Set the v3 file handle info. */
+ nfs_args_fh->fh3.fh3_length = lx_nmd->nmd_root.lx_fh3_length;
+ bcopy(&lx_nmd->nmd_root.lx_fh3_data,
+ nfs_args_fh->fh3.fh3_u.data,
+ lx_nmd->nmd_root.lx_fh3_length);
+ } else {
+ /*
+ * Assume nfs v2. Note that this could also be a v1
+ * mount request but there doesn't seem to be any difference
+ * in the parameters passed to the Linux mount system
+ * call for v1 or v2 mounts so there is no way of really
+ * knowing.
+ */
+ (void) strcpy(fstype, "nfs");
+ if ((rv = i_add_option_int("vers", 2,
+ options, options_size)) != 0)
+ return (rv);
+
+ /* Solaris seems to add this flag when using v2. */
+ nfs_args->flags |= NFSMNT_SECDEFAULT;
+
+ /* Set the v2 file handle info. */
+ bcopy(&lx_nmd->nmd_old_root,
+ nfs_args_fh, sizeof (nfs_args_fh->fh2));
+ }
+
+ /*
+ * We can't use getnetconfig() here because there is no netconfig
+ * database in linux.
+ */
+ nfs_args_knconf->knc_protofmly = "inet";
+ if (use_tcp) {
+ /*
+ * TCP uses NC_TPI_COTS_ORD semantics.
+ * See /etc/netconfig.
+ */
+ nfs_args_knconf->knc_semantics = NC_TPI_COTS_ORD;
+ nfs_args_knconf->knc_proto = "tcp";
+ if ((rv = i_add_option("proto=tcp",
+ options, options_size)) != 0)
+ return (rv);
+ if (stat("/dev/tcp", &statbuf) != 0)
+ return (-errno);
+ nfs_args_knconf->knc_rdev = statbuf.st_rdev;
+ } else {
+ /*
+ * Assume UDP. UDP uses NC_TPI_CLTS semantics.
+ * See /etc/netconfig.
+ */
+ nfs_args_knconf->knc_semantics = NC_TPI_CLTS;
+ nfs_args_knconf->knc_proto = "udp";
+ if ((rv = i_add_option("proto=udp",
+ options, options_size)) != 0)
+ return (rv);
+ if (stat("/dev/udp", &statbuf) != 0)
+ return (-errno);
+ nfs_args_knconf->knc_rdev = statbuf.st_rdev;
+ }
+
+ /* Set the server address. */
+ nfs_args_addr->maxlen = nfs_args_addr->len =
+ sizeof (struct sockaddr_in);
+ nfs_args_addr->buf = (char *)&lx_nmd->nmd_addr;
+
+ /* Set the server hostname string. */
+ nfs_args->hostname = lx_nmd->nmd_hostname;
+
+ /* Translate Linux nfs mount parameters into Solaris mount options. */
+ if (lx_nmd->nmd_rsize != LX_NMD_DEFAULT_RSIZE) {
+ if ((rv = i_add_option_int("rsize", lx_nmd->nmd_rsize,
+ options, options_size)) != 0)
+ return (rv);
+ nfs_args->rsize = lx_nmd->nmd_rsize;
+ nfs_args->flags |= NFSMNT_RSIZE;
+ }
+ if (lx_nmd->nmd_wsize != LX_NMD_DEFAULT_WSIZE) {
+ if ((rv = i_add_option_int("wsize", lx_nmd->nmd_wsize,
+ options, options_size)) != 0)
+ return (rv);
+ nfs_args->wsize = lx_nmd->nmd_wsize;
+ nfs_args->flags |= NFSMNT_WSIZE;
+ }
+ if ((rv = i_add_option_int("timeo", lx_nmd->nmd_timeo,
+ options, options_size)) != 0)
+ return (rv);
+ nfs_args->timeo = lx_nmd->nmd_timeo;
+ nfs_args->flags |= NFSMNT_TIMEO;
+ if ((rv = i_add_option_int("retrans", lx_nmd->nmd_retrans,
+ options, options_size)) != 0)
+ return (rv);
+ nfs_args->retrans = lx_nmd->nmd_retrans;
+ nfs_args->flags |= NFSMNT_RETRANS;
+ if ((rv = i_add_option_int("acregmin", lx_nmd->nmd_acregmin,
+ options, options_size)) != 0)
+ return (rv);
+ nfs_args->acregmin = lx_nmd->nmd_acregmin;
+ nfs_args->flags |= NFSMNT_ACREGMIN;
+ if ((rv = i_add_option_int("acregmax", lx_nmd->nmd_acregmax,
+ options, options_size)) != 0)
+ return (rv);
+ nfs_args->acregmax = lx_nmd->nmd_acregmax;
+ nfs_args->flags |= NFSMNT_ACREGMAX;
+ if ((rv = i_add_option_int("acdirmin", lx_nmd->nmd_acdirmin,
+ options, options_size)) != 0)
+ return (rv);
+ nfs_args->acdirmin = lx_nmd->nmd_acdirmin;
+ nfs_args->flags |= NFSMNT_ACDIRMIN;
+ if ((rv = i_add_option_int("acdirmax", lx_nmd->nmd_acdirmax,
+ options, options_size)) != 0)
+ return (rv);
+ nfs_args->acdirmax = lx_nmd->nmd_acdirmax;
+ nfs_args->flags |= NFSMNT_ACDIRMAX;
+
+ /* We only support nfs with a security type of AUTH_SYS. */
+ nfs_args->nfs_args_ext = NFS_ARGS_EXTB;
+ nfs_args_secdata->secmod = AUTH_SYS;
+ nfs_args_secdata->rpcflavor = AUTH_SYS;
+ nfs_args_secdata->flags = 0;
+ nfs_args_secdata->uid = 0;
+ nfs_args_secdata->data = NULL;
+ nfs_args->nfs_ext_u.nfs_extB.next = NULL;
+
+ /*
+ * The Linux nfs mount command seems to pass an open socket fd
+ * to the kernel during the mount system call. We don't need
+ * this fd on Solaris so just close it.
+ */
+ (void) close(lx_nmd->nmd_fd);
+
+ return (0);
+}
+
+long
+lx_mount(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
+ uintptr_t p5)
+{
+ /* Linux input arguments. */
+ const char *sourcep = (const char *)p1;
+ const char *targetp = (const char *)p2;
+ const char *fstypep = (const char *)p3;
+ unsigned int flags = (unsigned int)p4;
+ const void *datap = (const void *)p5;
+
+ /* Variables needed for all mounts. */
+ char source[MAXPATHLEN + LX_NMD_MAXHOSTNAMELEN + 1];
+ char target[MAXPATHLEN];
+ char fstype[MAXPATHLEN], options[MAX_MNTOPT_STR];
+ int sflags, rv;
+ long res;
+ boolean_t is_tmpfs = B_FALSE;
+
+ /* Variable for tmpfs mounts. */
+ int uid = -1;
+ int gid = -1;
+
+ /* Variables needed for nfs mounts. */
+ lx_nfs_mount_data_t lx_nmd;
+ struct nfs_args nfs_args;
+ struct netbuf nfs_args_addr;
+ struct knetconfig nfs_args_knconf;
+ union fh_buffer nfs_args_fh;
+ struct sec_data nfs_args_secdata;
+ char *sdataptr = NULL;
+ int sdatalen = 0;
+
+ /* Initialize Solaris mount arguments. */
+ sflags = MS_OPTIONSTR;
+ options[0] = '\0';
+ sdatalen = 0;
+
+ /* Copy in parameters that are always present. */
+ rv = uucopystr((void *)sourcep, &source, sizeof (source));
+ if ((rv == -1) || (rv == sizeof (source)))
+ return (-EFAULT);
+
+ rv = uucopystr((void *)targetp, &target, sizeof (target));
+ if ((rv == -1) || (rv == sizeof (target)))
+ return (-EFAULT);
+
+ rv = uucopystr((void *)fstypep, &fstype, sizeof (fstype));
+ if ((rv == -1) || (rv == sizeof (fstype)))
+ return (-EFAULT);
+
+ lx_debug("\tlinux mount source: %s", source);
+ lx_debug("\tlinux mount target: %s", target);
+ lx_debug("\tlinux mount fstype: %s", fstype);
+
+ /* Make sure we support the requested mount flags. */
+ if ((flags & ~LX_MS_SUPPORTED) != 0) {
+ lx_unsupported("unsupported mount flags: 0x%x", flags);
+ return (-ENOTSUP);
+ }
+
+ /* Do filesystem specific mount work. */
+ if (flags & LX_MS_BIND) {
+
+ /* If MS_BIND is set, we turn this into a lofs mount. */
+ (void) strcpy(fstype, "lofs");
+
+ /* Copy in Linux mount options. */
+ if (datap != NULL) {
+ rv = uucopystr((void *)datap,
+ options, sizeof (options));
+ if ((rv == -1) || (rv == sizeof (options)))
+ return (-EFAULT);
+ }
+ lx_debug("\tlinux mount options: \"%s\"", options);
+
+ /* Verify Linux mount options. */
+ if (i_lx_opt_verify(options, lofs_options) != 0) {
+ lx_unsupported("unsupported lofs mount options: %s",
+ options);
+ return (-errno);
+ }
+ } else if (strcmp(fstype, "tmpfs") == 0) {
+ char idstr[64];
+
+ /* Copy in Linux mount options. */
+ if (datap != NULL) {
+ rv = uucopystr((void *)datap,
+ options, sizeof (options));
+ if ((rv == -1) || (rv == sizeof (options)))
+ return (-EFAULT);
+ }
+ lx_debug("\tlinux mount options: \"%s\"", options);
+
+ /* Verify Linux mount options. */
+ if (i_lx_opt_verify(options, lx_tmpfs_options) != 0) {
+ lx_unsupported("unsupported tmpfs mount options: %s",
+ options);
+ return (-errno);
+ }
+
+ /*
+ * Linux defaults to mode=1777 for tmpfs mounts.
+ */
+ if (strstr(options, "mode=") == NULL) {
+ if (options[0] != '\0')
+ (void) strlcat(options, ",", sizeof (options));
+ (void) strlcat(options, "mode=1777", sizeof (options));
+ }
+
+ switch (opt_rm(options, "uid=", idstr, sizeof (idstr))) {
+ case 0:
+ uid = -1;
+ break;
+ case 1:
+ if (opt_id_val(idstr, &uid) < 0)
+ return (-EINVAL);
+ break;
+ default:
+ return (-E2BIG);
+ }
+ switch (opt_rm(options, "gid=", idstr, sizeof (idstr))) {
+ case 0:
+ gid = -1;
+ break;
+ case 1:
+ if (opt_id_val(idstr, &gid) < 0)
+ return (-EINVAL);
+ break;
+ default:
+ return (-E2BIG);
+ }
+
+ /*
+ * Linux seems to always allow overlay mounts. We allow this
+ * everywhere except under /dev where it interferes with device
+ * emulation.
+ */
+ if (strcmp(targetp, "/dev") != 0 &&
+ strncmp(targetp, "/dev/", 5) != 0)
+ sflags |= MS_OVERLAY;
+
+ is_tmpfs = B_TRUE;
+
+ } else if (strcmp(fstype, "proc") == 0) {
+ struct stat64 sb;
+
+ /* Translate proc mount requests to lx_proc requests. */
+ (void) strcpy(fstype, "lx_proc");
+
+ /* Copy in Linux mount options. */
+ if (datap != NULL) {
+ rv = uucopystr((void *)datap,
+ options, sizeof (options));
+ if ((rv == -1) || (rv == sizeof (options)))
+ return (-EFAULT);
+ }
+ lx_debug("\tlinux mount options: \"%s\"", options);
+
+ /* Verify Linux mount options. */
+ if (i_lx_opt_verify(options, lx_proc_options) != 0) {
+ lx_unsupported("unsupported proc mount options: %s",
+ options);
+ return (-errno);
+ }
+
+ /* If mounting proc over itself, just return ok */
+ if (stat64(target, &sb) == 0 &&
+ strcmp(sb.st_fstype, "lx_proc") == 0) {
+ return (0);
+ }
+ } else if (strcmp(fstype, "sysfs") == 0) {
+ /* Translate sysfs mount requests to lx_sysfs requests. */
+ (void) strcpy(fstype, "lx_sysfs");
+
+ /* Copy in Linux mount options. */
+ if (datap != NULL) {
+ rv = uucopystr((void *)datap,
+ options, sizeof (options));
+ if ((rv == -1) || (rv == sizeof (options)))
+ return (-EFAULT);
+ }
+ lx_debug("\tlinux mount options: \"%s\"", options);
+
+ /* Verify Linux mount options. */
+ if (i_lx_opt_verify(options, lx_sysfs_options) != 0) {
+ lx_unsupported("unsupported sysfs mount options: %s",
+ options);
+ return (-errno);
+ }
+ } else if (strcmp(fstype, "cgroup") == 0) {
+ /* Translate cgroup mount requests to lx_cgroup requests. */
+ (void) strcpy(fstype, "lx_cgroup");
+
+ /* Copy in Linux mount options. */
+ if (datap != NULL) {
+ rv = uucopystr((void *)datap,
+ options, sizeof (options));
+ if ((rv == -1) || (rv == sizeof (options)))
+ return (-EFAULT);
+ }
+ lx_debug("\tlinux mount options: \"%s\"", options);
+
+ /*
+ * Currently don't verify Linux mount options since we can
+ * have asubsystem string provided.
+ */
+
+ } else if (strcmp(fstype, "autofs") == 0) {
+
+ /* Translate autofs mount requests to lxautofs requests. */
+ (void) strcpy(fstype, LX_AUTOFS_NAME);
+
+ /* Copy in Linux mount options. */
+ if (datap != NULL) {
+ rv = uucopystr((void *)datap,
+ options, sizeof (options));
+ if ((rv == -1) || (rv == sizeof (options)))
+ return (-EFAULT);
+ }
+ lx_debug("\tlinux mount options: \"%s\"", options);
+
+ /* Verify Linux mount options. */
+ if (i_lx_opt_verify(options, lx_autofs_options) != 0) {
+ lx_unsupported("unsupported autofs mount options: %s",
+ options);
+ return (-errno);
+ }
+
+ /* Linux seems to always allow overlay mounts */
+ sflags |= MS_OVERLAY;
+
+ } else if (strcmp(fstype, "nfs") == 0) {
+
+ /*
+ * Copy in Linux mount options. Note that for older Linux
+ * kernels (pre 2.6.23) the mount options pointer (which
+ * normally points to a string) points to a structure which
+ * is populated by the user-level code after it has done the
+ * preliminary RPCs (similar to how our NFS mount cmd works).
+ * For newer kernels the options pointer is just a string of
+ * options. We're unlikely to actually emulate a kernel that
+ * uses the old style but support is kept and handled in
+ * i_make_nfs_args(). The new style handling is implemented in
+ * nfs_pre_mount(). The user-level mount caller is in charge of
+ * determining the format in which it passes the data parameter.
+ */
+ int vers;
+
+ if (datap == NULL)
+ return (-EINVAL);
+ if (uucopy((void *)datap, &vers, sizeof (int)) < 0)
+ return (-errno);
+
+ /*
+ * As described above, the data parameter might be a versioned
+ * lx_nmd structure or (most likely) it is just a string.
+ */
+ switch (vers) {
+ case 1:
+ case 2:
+ case 3:
+ case 5:
+ case 6:
+ lx_unsupported("unsupported nfs mount request "
+ "version: %d\n", vers);
+ return (-ENOTSUP);
+
+ case 4:
+ if (uucopy((void *)datap, &lx_nmd, sizeof (lx_nmd)) < 0)
+ return (-errno);
+
+ /*
+ * For Illumos nfs mounts, the kernel expects a special
+ * structure, but a pointer to this structure is passed
+ * in via an extra parameter (sdataptr below.)
+ */
+ if ((rv = i_make_nfs_args(&lx_nmd, &nfs_args,
+ &nfs_args_addr, &nfs_args_knconf, &nfs_args_fh,
+ &nfs_args_secdata, fstype, options,
+ sizeof (options))) != 0)
+ return (rv);
+
+ break;
+
+ default:
+ /*
+ * Handle new style with options as a string, make
+ * the preliminary RPC calls and do the native mount
+ * all within lx_nfs_mount().
+ */
+ if (uucopystr((void *)datap, options,
+ sizeof (options)) < 0)
+ return (-errno);
+ return (lx_nfs_mount(source, target, fstype, flags,
+ options));
+ break;
+ }
+
+ /*
+ * For nfs mounts we need to tell the mount system call
+ * to expect extra parameters.
+ */
+ sflags |= MS_DATA;
+ sdataptr = (char *)&nfs_args;
+ sdatalen = sizeof (nfs_args);
+
+ /* Linux seems to always allow overlay mounts */
+ sflags |= MS_OVERLAY;
+
+ } else {
+ lx_unsupported("unsupported mount filesystem type: %s", fstype);
+ return (-ENODEV);
+ }
+
+ /* Convert some Linux flags to Illumos flags. */
+ if (flags & LX_MS_RDONLY)
+ sflags |= MS_RDONLY;
+ if (flags & LX_MS_NOSUID)
+ sflags |= MS_NOSUID;
+ if (flags & LX_MS_REMOUNT)
+ sflags |= MS_REMOUNT;
+
+ /*
+ * Convert some Linux flags to Illumos option strings.
+ */
+ if (flags & LX_MS_STRICTATIME) {
+ /*
+ * The "strictatime" mount option ensures that none of the
+ * weaker atime-related mode options are in effect.
+ */
+ flags &= ~(LX_MS_RELATIME | LX_MS_NOATIME);
+ }
+ if ((flags & LX_MS_NODEV) &&
+ ((rv = i_add_option("nodev", options, sizeof (options))) != 0))
+ return (rv);
+ if ((flags & LX_MS_NOEXEC) &&
+ ((rv = i_add_option("noexec", options, sizeof (options))) != 0))
+ return (rv);
+ if ((flags & LX_MS_NOATIME) &&
+ ((rv = i_add_option("noatime", options, sizeof (options))) != 0))
+ return (rv);
+
+ lx_debug("\tsolaris mount fstype: %s", fstype);
+ lx_debug("\tsolaris mount options: \"%s\"", options);
+
+ res = mount(source, target, sflags, fstype, sdataptr, sdatalen,
+ options, sizeof (options));
+
+ if (res == 0) {
+ if (is_tmpfs) {
+ /* Handle uid/gid mount options. */
+ if (uid != -1 || gid != -1)
+ (void) chown(target, uid, gid);
+ return (0);
+
+ } else {
+ return (0);
+ }
+ } else {
+ return (-errno);
+ }
+}
+
+/*
+ * umount() is identical, though it is implemented on top of umount2() in
+ * Solaris so it cannot be a pass-thru system call.
+ */
+long
+lx_umount(uintptr_t p1)
+{
+ return (umount((char *)p1) ? -errno : 0);
+}
+
+/*
+ * The Linux umount2() system call is identical but has a different value for
+ * MNT_FORCE (the logical equivalent to MS_FORCE).
+ */
+#define LX_MNT_FORCE 0x1
+
+long
+lx_umount2(uintptr_t p1, uintptr_t p2)
+{
+ char *path = (char *)p1;
+ int flags = 0;
+
+ if (p2 & ~LX_MNT_FORCE)
+ return (-EINVAL);
+
+ if (p2 & LX_MNT_FORCE)
+ flags |= MS_FORCE;
+
+ return (umount2(path, flags) ? -errno : 0);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/mount_nfs.c b/usr/src/lib/brand/lx/lx_brand/common/mount_nfs.c
new file mode 100644
index 0000000000..86bec83d08
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/mount_nfs.c
@@ -0,0 +1,2317 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * NFS mount syscall support
+ *
+ * All of the Linux NFS mount RPC support is handled within the kernel whereas
+ * on Illumos the NFS mount command performs the initial RPC calls to contact
+ * the server's mountd, get the file handle, and negotiate security before
+ * making the actual 'mount' syscall. Thus we emulate the Linux in-kernel
+ * RPC behavior here using code that is partially based on the code from our
+ * user-level NFS mount command. This code also includes the nullproc RPC
+ * function calls
+ *
+ * In addition to the code described above we also have brand-specific code to
+ * convert the Linux mount arguments into our native format.
+ *
+ * Because libnsl (which we need to make RPCs) depends on the netconfig table
+ * (which won't exist inside an lx zone) we provide a built-in default
+ * netconfig table which we hook into libnsl via the brand callbacks. See
+ * _nsl_brand_set_hooks(lx_nsl_set_sz_func, lx_get_ent_func)
+ * in lx_nfs_mount().
+ *
+ * Finally, in most of the functions below, when the code refers to the
+ * hostname we're really working with the IP addr that the Linux user-level
+ * mount command passed in to us.
+ */
+
+/*
+ * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
+/* All Rights Reserved */
+
+/*
+ * University Copyright- Copyright (c) 1982, 1986, 1988
+ * The Regents of the University of California
+ * All Rights Reserved
+ *
+ * University Acknowledgment- Portions of this document are derived from
+ * software developed by the University of California, Berkeley, and its
+ * contributors.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#define NFSCLIENT
+#include <string.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <sys/param.h>
+#include <rpc/rpc.h>
+#include <errno.h>
+#include <netdb.h>
+#include <sys/mount.h>
+#include <sys/mntent.h>
+#include <sys/mnttab.h>
+#include <nfs/nfs.h>
+#include <nfs/mount.h>
+#include <rpcsvc/mount.h>
+#include <sys/pathconf.h>
+#include <netdir.h>
+#include <netconfig.h>
+#include <sys/sockio.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <nfs/nfs_sec.h>
+#include <rpcsvc/daemon_utils.h>
+#include <rpcsvc/nfs4_prot.h>
+#include <limits.h>
+#include <nfs/nfssys.h>
+#include <strings.h>
+#include <assert.h>
+#include <sys/lx_mount.h>
+#include <sys/lx_misc.h>
+
+#ifndef NFS_VERSMAX
+#define NFS_VERSMAX 4
+#endif
+#ifndef NFS_VERSMIN
+#define NFS_VERSMIN 2
+#endif
+
+#define RET_OK 0
+#define RET_RETRY 32
+#define RET_ERR 33
+#define RET_PROTOUNSUPP 34
+#define RET_MNTERR 1000
+#define ERR_PROTO_NONE 0
+#define ERR_PROTO_INVALID 901
+#define ERR_PROTO_UNSUPP 902
+#define ERR_NETPATH 903
+#define ERR_NOHOST 904
+#define ERR_RPCERROR 905
+
+typedef struct err_ret {
+ int error_type;
+ int error_value;
+} err_ret_t;
+
+#define SET_ERR_RET(errst, etype, eval) \
+ { \
+ (errst)->error_type = etype; \
+ (errst)->error_value = eval; \
+ }
+
+/*
+ * Built-in netconfig table.
+ */
+#define N_NETCONF_ENTS 4
+static struct netconfig nca[N_NETCONF_ENTS] = {
+ {"udp6", NC_TPI_CLTS, 1, "inet6", "udp", "/dev/udp6", 0, NULL},
+ {"tcp6", NC_TPI_COTS_ORD, 1, "inet6", "tcp", "/dev/tcp6", 0, NULL},
+ {"udp", NC_TPI_CLTS, 1, "inet", "udp", "/dev/udp", 0, NULL},
+ {"tcp", NC_TPI_COTS_ORD, 1, "inet", "tcp", "/dev/tcp", 0, NULL}
+};
+
+/*
+ * Mapping table of Linux NFS mount options to the corresponding Illumos
+ * option. The nmo_argtyp field tells us how to handle the argument.
+ */
+typedef enum map_mount_opt_type {
+ MOUNT_OPT_INVALID = 0,
+ MOUNT_OPT_PASTHRU = 1,
+ MOUNT_OPT_IGNORE = 2,
+ MOUNT_OPT_TOKEN = 3,
+ MOUNT_OPT_HAS_ARG = 4
+} map_mount_opt_type_t;
+
+typedef struct nfs_map_opt {
+ char *nmo_lx_opt;
+ char *nmo_il_opt;
+ map_mount_opt_type_t nmo_argtyp;
+} nfs_map_opt_t;
+
+static nfs_map_opt_t nmo_tab[] = {
+ {"ac", NULL, MOUNT_OPT_IGNORE},
+ {"acdirmax", NULL, MOUNT_OPT_PASTHRU},
+ {"acdirmin", NULL, MOUNT_OPT_PASTHRU},
+ {"acl", NULL, MOUNT_OPT_INVALID},
+ {"acregmax", NULL, MOUNT_OPT_PASTHRU},
+ {"acregmin", NULL, MOUNT_OPT_PASTHRU},
+ {"actimeo", NULL, MOUNT_OPT_PASTHRU},
+ {"bg", NULL, MOUNT_OPT_PASTHRU},
+ {"cto", NULL, MOUNT_OPT_IGNORE},
+ {"fg", NULL, MOUNT_OPT_PASTHRU},
+ {"fsc", NULL, MOUNT_OPT_IGNORE},
+ {"hard", NULL, MOUNT_OPT_PASTHRU},
+ {"intr", NULL, MOUNT_OPT_PASTHRU},
+ {"lock", NULL, MOUNT_OPT_IGNORE},
+ {"lookupcache", NULL, MOUNT_OPT_INVALID},
+ {"local_lock=%s", NULL, MOUNT_OPT_INVALID },
+ {"migration", NULL, MOUNT_OPT_INVALID},
+ {"minorversion", NULL, MOUNT_OPT_INVALID},
+ {"mountaddr", NULL, MOUNT_OPT_INVALID},
+ {"mounthost", NULL, MOUNT_OPT_INVALID},
+ {"mountport", NULL, MOUNT_OPT_PASTHRU},
+ {"mountproto", NULL, MOUNT_OPT_PASTHRU},
+ {"mountvers", NULL, MOUNT_OPT_PASTHRU},
+ {"namlen", NULL, MOUNT_OPT_INVALID},
+ {"nfsvers", NULL, MOUNT_OPT_INVALID},
+ {"noac", NULL, MOUNT_OPT_PASTHRU},
+ {"noacl", NULL, MOUNT_OPT_INVALID},
+ {"nocto", NULL, MOUNT_OPT_PASTHRU},
+ {"nofsc", NULL, MOUNT_OPT_IGNORE},
+ {"nointr", NULL, MOUNT_OPT_PASTHRU},
+ {"nolock", "llock", MOUNT_OPT_TOKEN},
+ {"nomigration", NULL, MOUNT_OPT_INVALID},
+ {"noposix", NULL, MOUNT_OPT_IGNORE},
+ {"nordirplus", NULL, MOUNT_OPT_IGNORE},
+ {"noresvport", NULL, MOUNT_OPT_INVALID},
+ {"nosharecache", NULL, MOUNT_OPT_IGNORE},
+ {"port", NULL, MOUNT_OPT_PASTHRU},
+ {"posix", NULL, MOUNT_OPT_PASTHRU},
+ {"proto", NULL, MOUNT_OPT_PASTHRU},
+ {"rdirplus", NULL, MOUNT_OPT_IGNORE},
+ {"rdma", "proto=rdma", MOUNT_OPT_TOKEN},
+ {"resvport", NULL, MOUNT_OPT_IGNORE},
+ {"retrans", NULL, MOUNT_OPT_PASTHRU},
+ {"retry", NULL, MOUNT_OPT_PASTHRU},
+ {"rsize", NULL, MOUNT_OPT_PASTHRU},
+ {"sec", NULL, MOUNT_OPT_PASTHRU},
+ {"sharecache", NULL, MOUNT_OPT_IGNORE},
+ {"sloppy", NULL, MOUNT_OPT_IGNORE},
+ {"soft", NULL, MOUNT_OPT_PASTHRU},
+ {"tcp", "proto=tcp", MOUNT_OPT_TOKEN},
+ {"timeo", NULL, MOUNT_OPT_PASTHRU},
+ {"udp", "proto=udp", MOUNT_OPT_TOKEN},
+ {"vers", NULL, MOUNT_OPT_PASTHRU},
+ {"wsize", NULL, MOUNT_OPT_PASTHRU},
+ {NULL, NULL, MOUNT_OPT_INVALID}
+};
+
+/*
+ * This struct is used to keep track of misc. variables which are set deep
+ * in one function then referenced someplace else. We pass this around to
+ * avoid the use of global variables as is done the the NFS mount command.
+ *
+ * The nfsvers variables control the NFS version number to be used.
+ *
+ * nmd_nfsvers defaults to 0 which means to use the highest number that
+ * both the client and the server support. It can also be set to
+ * a particular value, either 2, 3, or 4 to indicate the version
+ * number of choice. If the server (or the client) do not support
+ * the version indicated, then the mount attempt will be failed.
+ *
+ * nmd_nfsvers_to_use is the actual version number found to use. It
+ * is determined in get_fh by pinging the various versions of the
+ * NFS service on the server to see which responds positively.
+ *
+ * nmd_nfsretry_vers is the version number set when we retry the mount
+ * command with the version decremented from nmd_nfsvers_to_use.
+ * nmd_nfsretry_vers is set from nmd_nfsvers_to_use when we retry the mount
+ * for errors other than RPC errors; it helps us know why we are
+ * retrying. It is an indication that the retry is due to non-RPC errors.
+ */
+typedef struct nfs_mnt_data {
+ int nmd_bg;
+ int nmd_posix;
+ int nmd_retries;
+ ushort_t nmd_nfs_port;
+ char *nmd_nfs_proto;
+ char *nmd_fstype;
+ seconfig_t nmd_nfs_sec;
+ int nmd_sec_opt; /* any security option ? */
+ rpcvers_t nmd_nfsvers;
+ rpcvers_t nmd_nfsvers_to_use;
+ rpcvers_t nmd_nfsretry_vers;
+} nfs_mnt_data_t;
+
+/* number of transports to try */
+#define MNT_PREF_LISTLEN 2
+#define FIRST_TRY 1
+#define SECOND_TRY 2
+
+#define BIGRETRY 10000
+
+/* maximum length of RPC header for NFS messages */
+#define NFS_RPC_HDR 432
+
+#define NFS_ARGS_EXTB_secdata(args, secdata) \
+ { (args)->nfs_args_ext = NFS_ARGS_EXTB, \
+ (args)->nfs_ext_u.nfs_extB.secdata = secdata; }
+
+extern int __clnt_bindresvport(CLIENT *);
+
+static int retry(struct mnttab *, int, nfs_mnt_data_t *);
+static int set_args(int *, struct nfs_args *, char *, struct mnttab *,
+ nfs_mnt_data_t *);
+static int get_fh(struct nfs_args *, char *, char *, int *,
+ struct netconfig **, ushort_t, nfs_mnt_data_t *);
+static int make_secure(struct nfs_args *, char *, struct netconfig *,
+ rpcvers_t, nfs_mnt_data_t *);
+static int mount_nfs(struct mnttab *, int, err_ret_t *, nfs_mnt_data_t *);
+static int getaddr_nfs(struct nfs_args *, char *, struct netconfig **,
+ char *, ushort_t, err_ret_t *, bool_t, nfs_mnt_data_t *);
+static struct netbuf *get_addr(char *, rpcprog_t, rpcvers_t,
+ struct netconfig **, char *, ushort_t, struct t_info *,
+ caddr_t *, char *, err_ret_t *);
+static struct netbuf *get_the_addr(char *, rpcprog_t, rpcvers_t,
+ struct netconfig *, ushort_t, struct t_info *, caddr_t *,
+ char *, err_ret_t *);
+
+static int lx_nsl_set_sz_func(void);
+static struct netconfig *lx_get_ent_func(int);
+
+/*
+ * These are the defaults (range) for the client when determining
+ * which NFS version to use when probing the server (see above).
+ * These will only be used when the vers mount option is not used.
+ */
+#define vers_max_default NFS_VERSMAX_DEFAULT
+#define vers_min_default NFS_VERSMIN_DEFAULT
+
+/*
+ * The wnl/WNL* definitions come from cmd/fs.d/nfs/mount/webnfs.h. We
+ * incorporate those here since the cmd src tree hierarchy is not built when
+ * we're compiling the lib portion of the src tree and since these definitions
+ * are a fundamental part of the protocol spec, there is no risk of these
+ * changing (i.e. we're just like the Linux kernel here, which has these
+ * built-in). We only need the bare minimum set of definitions to make RPCs to
+ * the NFS server to negotiate the mount.
+ */
+
+/* The timeout for our mount null proc pings is always 5 seconds. */
+static struct timeval TIMEOUT = { 5, 0 };
+#define WNLPROC_NULL 0
+#define WNLPROC3_NULL 0
+#define WNLPROC4_NULL 0
+#define WNL_FHSIZE 32
+
+static enum clnt_stat
+wnlproc_null_2(void *argp, void *clnt_res, CLIENT *clnt)
+{
+ return (clnt_call(clnt, WNLPROC_NULL, (xdrproc_t)xdr_void,
+ (caddr_t)argp, (xdrproc_t)xdr_void, (caddr_t)clnt_res, TIMEOUT));
+}
+
+static enum clnt_stat
+wnlproc3_null_3(void *argp, void *clnt_res, CLIENT *clnt)
+{
+ return (clnt_call(clnt, WNLPROC3_NULL, (xdrproc_t)xdr_void,
+ (caddr_t)argp, (xdrproc_t)xdr_void, (caddr_t)clnt_res, TIMEOUT));
+}
+
+static enum clnt_stat
+wnlproc4_null_4(void *argp, void *clnt_res, CLIENT *clnt)
+{
+ return (clnt_call(clnt, WNLPROC4_NULL, (xdrproc_t)xdr_void,
+ (caddr_t)argp, (xdrproc_t)xdr_void, (caddr_t)clnt_res, TIMEOUT));
+}
+
+static void
+log_err(const char *fmt, ...)
+{
+ va_list ap;
+ char buf[128];
+ int fd;
+
+ va_start(ap, fmt);
+ (void) vsnprintf(buf, sizeof (buf), fmt, ap);
+ va_end(ap);
+
+ if ((fd = open("/dev/conslog", O_WRONLY)) != -1) {
+ (void) write(fd, buf, strlen(buf));
+ (void) close(fd);
+ }
+}
+
+static int
+i_add_option(char *option, char *buf, size_t buf_size)
+{
+ int len;
+ char *fmt_str = NULL;
+
+ if (buf[0] == '\0') {
+ fmt_str = "%s";
+ } else {
+ fmt_str = ",%s";
+ }
+
+ len = strlen(buf);
+ buf_size -= len;
+ buf += len;
+
+ /*LINTED*/
+ if (snprintf(buf, buf_size, fmt_str, option) > (buf_size - 1))
+ return (-EOVERFLOW);
+ return (0);
+}
+
+/*
+ * We can return a negative error value which is the errno we need to return to
+ * lx or we can return a positive error value which is primarily used to
+ * indicate retry (otherwise we map to -EINVAL in the caller). Returning 0
+ * indicates success.
+ */
+static int
+mount_nfs(struct mnttab *mntp, int mntflags, err_ret_t *retry_error,
+ nfs_mnt_data_t *nmdp)
+{
+ struct nfs_args *argp = NULL;
+ struct netconfig *nconf = NULL;
+ int vers = 0;
+ int r = 0;
+ ushort_t port;
+ char *colonp;
+ char *path;
+ char *host;
+ char spec_buf[MAXPATHLEN + LX_NMD_MAXHOSTNAMELEN + 1];
+
+ mntp->mnt_fstype = MNTTYPE_NFS;
+
+ (void) strlcpy(spec_buf, mntp->mnt_special, sizeof (spec_buf));
+ colonp = strchr(spec_buf, ':');
+ if (colonp == NULL)
+ return (-EINVAL);
+
+ *colonp = '\0';
+ host = spec_buf;
+ path = colonp + 1;
+
+ argp = (struct nfs_args *)malloc(sizeof (*argp));
+ if (argp == NULL)
+ return (-ENOMEM);
+
+ memset(argp, 0, sizeof (*argp));
+ memset(&nmdp->nmd_nfs_sec, 0, sizeof (seconfig_t));
+ nmdp->nmd_sec_opt = 0;
+ port = 0;
+
+ /* returns a negative errno */
+ if ((r = set_args(&mntflags, argp, host, mntp, nmdp)) != 0)
+ goto out;
+
+ if (port == 0) {
+ port = nmdp->nmd_nfs_port;
+ } else if (nmdp->nmd_nfs_port != 0 && nmdp->nmd_nfs_port != port) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ /* returns a negative errno or positive EAGAIN for retry */
+ r = get_fh(argp, host, path, &vers, &nconf, port, nmdp);
+ if (r != 0) {
+ /* All attempts failed */
+ goto out;
+ }
+
+ /*
+ * Call to get_fh() above may have obtained the netconfig info and NULL
+ * proc'd the server. This would be the case with v4
+ */
+ if (!(argp->flags & NFSMNT_KNCONF)) {
+ nconf = NULL;
+ /* returns a negative errno or positive EAGAIN for retry */
+ r = getaddr_nfs(argp, host, &nconf, path, port,
+ retry_error, TRUE, nmdp);
+ if (r != 0) {
+ goto out;
+ }
+ }
+
+ if (make_secure(argp, host, nconf, vers, nmdp) < 0) {
+ r = -EAGAIN;
+ goto out;
+ }
+
+ mntflags |= MS_DATA | MS_OPTIONSTR;
+
+ r = mount(mntp->mnt_special, mntp->mnt_mountp, mntflags,
+ nmdp->nmd_fstype, argp, sizeof (*argp), mntp->mnt_mntopts,
+ MAX_MNTOPT_STR);
+ if (r != 0)
+ r = -errno;
+out:
+ if (argp->fh)
+ free(argp->fh);
+ if (argp->pathconf)
+ free(argp->pathconf);
+ if (argp->knconf)
+ free(argp->knconf);
+ if (argp->addr) {
+ free(argp->addr->buf);
+ free(argp->addr);
+ }
+ nfs_free_secdata(argp->nfs_ext_u.nfs_extB.secdata);
+ if (argp->syncaddr) {
+ free(argp->syncaddr->buf);
+ free(argp->syncaddr);
+ }
+ if (argp->netname)
+ free(argp->netname);
+ free(argp);
+
+ return (r);
+}
+
+/*
+ * These options are duplicated in uts/common/fs/nfs/nfs_dlinet.c
+ * Changes must be made to both lists.
+ */
+static char *optlist[] = {
+#define OPT_RO 0
+ MNTOPT_RO,
+#define OPT_RW 1
+ MNTOPT_RW,
+#define OPT_QUOTA 2
+ MNTOPT_QUOTA,
+#define OPT_NOQUOTA 3
+ MNTOPT_NOQUOTA,
+#define OPT_SOFT 4
+ MNTOPT_SOFT,
+#define OPT_HARD 5
+ MNTOPT_HARD,
+#define OPT_SUID 6
+ MNTOPT_SUID,
+#define OPT_NOSUID 7
+ MNTOPT_NOSUID,
+#define OPT_GRPID 8
+ MNTOPT_GRPID,
+#define OPT_REMOUNT 9
+ MNTOPT_REMOUNT,
+#define OPT_NOSUB 10
+ MNTOPT_NOSUB,
+#define OPT_INTR 11
+ MNTOPT_INTR,
+#define OPT_NOINTR 12
+ MNTOPT_NOINTR,
+#define OPT_PORT 13
+ MNTOPT_PORT,
+#define OPT_SECURE 14
+ MNTOPT_SECURE,
+#define OPT_RSIZE 15
+ MNTOPT_RSIZE,
+#define OPT_WSIZE 16
+ MNTOPT_WSIZE,
+#define OPT_TIMEO 17
+ MNTOPT_TIMEO,
+#define OPT_RETRANS 18
+ MNTOPT_RETRANS,
+#define OPT_ACTIMEO 19
+ MNTOPT_ACTIMEO,
+#define OPT_ACREGMIN 20
+ MNTOPT_ACREGMIN,
+#define OPT_ACREGMAX 21
+ MNTOPT_ACREGMAX,
+#define OPT_ACDIRMIN 22
+ MNTOPT_ACDIRMIN,
+#define OPT_ACDIRMAX 23
+ MNTOPT_ACDIRMAX,
+#define OPT_BG 24
+ MNTOPT_BG,
+#define OPT_FG 25
+ MNTOPT_FG,
+#define OPT_RETRY 26
+ MNTOPT_RETRY,
+#define OPT_NOAC 27
+ MNTOPT_NOAC,
+#define OPT_NOCTO 28
+ MNTOPT_NOCTO,
+#define OPT_LLOCK 29
+ MNTOPT_LLOCK,
+#define OPT_POSIX 30
+ MNTOPT_POSIX,
+#define OPT_VERS 31
+ MNTOPT_VERS,
+#define OPT_PROTO 32
+ MNTOPT_PROTO,
+#define OPT_SEMISOFT 33
+ MNTOPT_SEMISOFT,
+#define OPT_NOPRINT 34
+ MNTOPT_NOPRINT,
+#define OPT_SEC 35
+ MNTOPT_SEC,
+#define OPT_LARGEFILES 36
+ MNTOPT_LARGEFILES,
+#define OPT_NOLARGEFILES 37
+ MNTOPT_NOLARGEFILES,
+#define OPT_PUBLIC 38
+ MNTOPT_PUBLIC,
+#define OPT_DIRECTIO 39
+ MNTOPT_FORCEDIRECTIO,
+#define OPT_NODIRECTIO 40
+ MNTOPT_NOFORCEDIRECTIO,
+#define OPT_XATTR 41
+ MNTOPT_XATTR,
+#define OPT_NOXATTR 42
+ MNTOPT_NOXATTR,
+#define OPT_DEVICES 43
+ MNTOPT_DEVICES,
+#define OPT_NODEVICES 44
+ MNTOPT_NODEVICES,
+#define OPT_SETUID 45
+ MNTOPT_SETUID,
+#define OPT_NOSETUID 46
+ MNTOPT_NOSETUID,
+#define OPT_EXEC 47
+ MNTOPT_EXEC,
+#define OPT_NOEXEC 48
+ MNTOPT_NOEXEC,
+ NULL
+};
+
+static int
+convert_int(int *val, char *str)
+{
+ long lval;
+
+ if (str == NULL || !isdigit(*str))
+ return (-1);
+
+ lval = strtol(str, &str, 10);
+ if (*str != '\0' || lval > INT_MAX)
+ return (-2);
+
+ *val = (int)lval;
+ return (0);
+}
+
+static int
+set_args(int *mntflags, struct nfs_args *args, char *fshost, struct mnttab *mnt,
+ nfs_mnt_data_t *nmdp)
+{
+ char *saveopt, *optstr, *opts, *newopts, *val;
+ int num;
+ int largefiles = 0;
+ int invalid = 0;
+ int attrpref = 0;
+ int optlen;
+
+ args->flags = NFSMNT_INT; /* default is "intr" */
+ args->flags |= NFSMNT_HOSTNAME;
+ args->flags |= NFSMNT_NEWARGS; /* using extented nfs_args structure */
+ args->hostname = fshost;
+
+ optstr = opts = strdup(mnt->mnt_mntopts);
+ /* sizeof (MNTOPT_XXX) includes one extra byte we may need for "," */
+ optlen = strlen(mnt->mnt_mntopts) + sizeof (MNTOPT_XATTR) + 1;
+ if (optlen > MAX_MNTOPT_STR)
+ return (-EINVAL);
+
+ newopts = malloc(optlen);
+ if (opts == NULL || newopts == NULL) {
+ if (opts)
+ free(opts);
+ if (newopts)
+ free(newopts);
+ return (-EINVAL);
+ }
+ newopts[0] = '\0';
+
+ while (*opts) {
+ invalid = 0;
+ saveopt = opts;
+ switch (getsubopt(&opts, optlist, &val)) {
+ case OPT_RO:
+ *mntflags |= MS_RDONLY;
+ break;
+ case OPT_RW:
+ *mntflags &= ~(MS_RDONLY);
+ break;
+ case OPT_QUOTA:
+ case OPT_NOQUOTA:
+ break;
+ case OPT_SOFT:
+ args->flags |= NFSMNT_SOFT;
+ args->flags &= ~(NFSMNT_SEMISOFT);
+ break;
+ case OPT_SEMISOFT:
+ args->flags |= NFSMNT_SOFT;
+ args->flags |= NFSMNT_SEMISOFT;
+ break;
+ case OPT_HARD:
+ args->flags &= ~(NFSMNT_SOFT);
+ args->flags &= ~(NFSMNT_SEMISOFT);
+ break;
+ case OPT_SUID:
+ *mntflags &= ~(MS_NOSUID);
+ break;
+ case OPT_NOSUID:
+ *mntflags |= MS_NOSUID;
+ break;
+ case OPT_GRPID:
+ args->flags |= NFSMNT_GRPID;
+ break;
+ case OPT_REMOUNT:
+ *mntflags |= MS_REMOUNT;
+ break;
+ case OPT_INTR:
+ args->flags |= NFSMNT_INT;
+ break;
+ case OPT_NOINTR:
+ args->flags &= ~(NFSMNT_INT);
+ break;
+ case OPT_NOAC:
+ args->flags |= NFSMNT_NOAC;
+ break;
+ case OPT_PORT:
+ if (convert_int(&num, val) != 0)
+ goto badopt;
+ nmdp->nmd_nfs_port = htons((ushort_t)num);
+ break;
+
+ case OPT_NOCTO:
+ args->flags |= NFSMNT_NOCTO;
+ break;
+
+ case OPT_RSIZE:
+ if (convert_int(&args->rsize, val) != 0)
+ goto badopt;
+ args->flags |= NFSMNT_RSIZE;
+ break;
+ case OPT_WSIZE:
+ if (convert_int(&args->wsize, val) != 0)
+ goto badopt;
+ args->flags |= NFSMNT_WSIZE;
+ break;
+ case OPT_TIMEO:
+ if (convert_int(&args->timeo, val) != 0)
+ goto badopt;
+ args->flags |= NFSMNT_TIMEO;
+ break;
+ case OPT_RETRANS:
+ if (convert_int(&args->retrans, val) != 0)
+ goto badopt;
+ args->flags |= NFSMNT_RETRANS;
+ break;
+ case OPT_ACTIMEO:
+ if (convert_int(&args->acregmax, val) != 0)
+ goto badopt;
+ args->acdirmin = args->acregmin = args->acdirmax
+ = args->acregmax;
+ args->flags |= NFSMNT_ACDIRMAX;
+ args->flags |= NFSMNT_ACREGMAX;
+ args->flags |= NFSMNT_ACDIRMIN;
+ args->flags |= NFSMNT_ACREGMIN;
+ break;
+ case OPT_ACREGMIN:
+ if (convert_int(&args->acregmin, val) != 0)
+ goto badopt;
+ args->flags |= NFSMNT_ACREGMIN;
+ break;
+ case OPT_ACREGMAX:
+ if (convert_int(&args->acregmax, val) != 0)
+ goto badopt;
+ args->flags |= NFSMNT_ACREGMAX;
+ break;
+ case OPT_ACDIRMIN:
+ if (convert_int(&args->acdirmin, val) != 0)
+ goto badopt;
+ args->flags |= NFSMNT_ACDIRMIN;
+ break;
+ case OPT_ACDIRMAX:
+ if (convert_int(&args->acdirmax, val) != 0)
+ goto badopt;
+ args->flags |= NFSMNT_ACDIRMAX;
+ break;
+ case OPT_BG:
+ nmdp->nmd_bg++;
+ break;
+ case OPT_FG:
+ nmdp->nmd_bg = 0;
+ break;
+ case OPT_RETRY:
+ if (convert_int(&nmdp->nmd_retries, val) != 0)
+ goto badopt;
+ break;
+ case OPT_LLOCK:
+ args->flags |= NFSMNT_LLOCK;
+ break;
+ case OPT_POSIX:
+ nmdp->nmd_posix = 1;
+ break;
+ case OPT_VERS:
+ if (convert_int(&num, val) != 0)
+ goto badopt;
+ nmdp->nmd_nfsvers = (rpcvers_t)num;
+ break;
+ case OPT_PROTO:
+ if (val == NULL)
+ goto badopt;
+
+ nmdp->nmd_nfs_proto = (char *)malloc(strlen(val)+1);
+ if (!nmdp->nmd_nfs_proto)
+ return (-EINVAL);
+
+ (void) strncpy(nmdp->nmd_nfs_proto, val, strlen(val)+1);
+ break;
+
+ case OPT_NOPRINT:
+ args->flags |= NFSMNT_NOPRINT;
+ break;
+
+ case OPT_LARGEFILES:
+ largefiles = 1;
+ break;
+
+ case OPT_NOLARGEFILES:
+ free(optstr);
+ return (-EINVAL);
+
+ case OPT_SEC:
+ if (val == NULL)
+ return (-EINVAL);
+ /*
+ * We initialize the nfs_sec struct as if we had the
+ * basic /etc/nffssec.conf file.
+ */
+ if (strcmp(val, "none") == 0) {
+ (void) strlcpy(nmdp->nmd_nfs_sec.sc_name,
+ "none", MAX_NAME_LEN);
+ nmdp->nmd_nfs_sec.sc_nfsnum =
+ nmdp->nmd_nfs_sec.sc_rpcnum = 0;
+ } else if (strcmp(val, "sys") == 0) {
+ (void) strlcpy(nmdp->nmd_nfs_sec.sc_name,
+ "sys", MAX_NAME_LEN);
+ nmdp->nmd_nfs_sec.sc_nfsnum =
+ nmdp->nmd_nfs_sec.sc_rpcnum = 1;
+ } else if (strcmp(val, "dh") == 0) {
+ (void) strlcpy(nmdp->nmd_nfs_sec.sc_name,
+ "dh", MAX_NAME_LEN);
+ nmdp->nmd_nfs_sec.sc_nfsnum =
+ nmdp->nmd_nfs_sec.sc_rpcnum = 3;
+ } else {
+ return (-EINVAL);
+ }
+ nmdp->nmd_sec_opt++;
+ break;
+
+ case OPT_DIRECTIO:
+ args->flags |= NFSMNT_DIRECTIO;
+ break;
+
+ case OPT_NODIRECTIO:
+ args->flags &= ~(NFSMNT_DIRECTIO);
+ break;
+
+ case OPT_XATTR:
+ case OPT_NOXATTR:
+ /*
+ * VFS options; just need to get them into the
+ * new mount option string and note we've seen them
+ */
+ attrpref = 1;
+ break;
+ default:
+ invalid = 1;
+ break;
+ }
+ if (!invalid) {
+ if (newopts[0])
+ strcat(newopts, ",");
+ strcat(newopts, saveopt);
+ }
+ }
+ /* Default is to turn extended attrs on */
+ if (!attrpref) {
+ if (newopts[0])
+ strcat(newopts, ",");
+ strcat(newopts, MNTOPT_XATTR);
+ }
+ strcpy(mnt->mnt_mntopts, newopts);
+ free(newopts);
+ free(optstr);
+
+ /* ensure that only one secure mode is requested */
+ if (nmdp->nmd_sec_opt > 1)
+ return (-EINVAL);
+
+ /* ensure that the user isn't trying to get large files over V2 */
+ if (nmdp->nmd_nfsvers == NFS_VERSION && largefiles)
+ return (-EINVAL);
+
+ if (nmdp->nmd_nfsvers == NFS_V4 && nmdp->nmd_nfs_proto != NULL &&
+ strncasecmp(nmdp->nmd_nfs_proto, NC_UDP, strlen(NC_UDP)) == 0)
+ return (-EINVAL);
+
+ return (0);
+
+badopt:
+ free(optstr);
+ return (-EINVAL);
+}
+
+/*
+ * NFS project private API.
+ *
+ * Free an sec_data structure.
+ * Free the parts that nfs_clnt_secdata allocates.
+ */
+void
+nfs_free_secdata(sec_data_t *secdata)
+{
+ dh_k4_clntdata_t *dkdata;
+
+ if (!secdata)
+ return;
+
+ switch (secdata->rpcflavor) {
+ case AUTH_UNIX:
+ case AUTH_NONE:
+ break;
+
+ case AUTH_DES:
+ /* LINTED pointer alignment */
+ dkdata = (dh_k4_clntdata_t *)secdata->data;
+ if (dkdata) {
+ if (dkdata->netname)
+ free(dkdata->netname);
+ if (dkdata->syncaddr.buf)
+ free(dkdata->syncaddr.buf);
+ free(dkdata);
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ free(secdata);
+}
+
+/*
+ * Make an client side sec_data structure and fill in appropriate value
+ * based on its rpc security flavor.
+ *
+ * It is caller's responsibility to allocate space for seconfig_t,
+ * and this routine will allocate space for the sec_data structure
+ * and related data field.
+ *
+ * Return the sec_data_t on success.
+ * If fail, return NULL pointer.
+ */
+sec_data_t *
+nfs_clnt_secdata(seconfig_t *secp, char *hostname, struct knetconfig *knconf,
+ struct netbuf *syncaddr, int flags)
+{
+ char netname[MAXNETNAMELEN+1];
+ sec_data_t *secdata;
+ dh_k4_clntdata_t *dkdata;
+
+ secdata = malloc(sizeof (sec_data_t));
+ if (!secdata)
+ return (NULL);
+
+ (void) memset(secdata, 0, sizeof (sec_data_t));
+
+ secdata->secmod = secp->sc_nfsnum;
+ secdata->rpcflavor = secp->sc_rpcnum;
+ secdata->uid = secp->sc_uid;
+ secdata->flags = flags;
+
+ /*
+ * Now, fill in the information for client side secdata :
+ *
+ * For AUTH_UNIX, AUTH_DES
+ * hostname can be in the form of
+ * nodename or
+ * nodename.domain
+ */
+ switch (secp->sc_rpcnum) {
+ case AUTH_UNIX:
+ case AUTH_NONE:
+ secdata->data = NULL;
+ break;
+
+ case AUTH_DES:
+ if (!host2netname(netname, hostname, NULL))
+ goto err_out;
+
+ dkdata = malloc(sizeof (dh_k4_clntdata_t));
+ if (!dkdata)
+ goto err_out;
+
+ (void) memset((char *)dkdata, 0,
+ sizeof (dh_k4_clntdata_t));
+ if ((dkdata->netname = strdup(netname)) == NULL)
+ goto err_out;
+
+ dkdata->netnamelen = strlen(netname);
+ dkdata->knconf = knconf;
+ dkdata->syncaddr = *syncaddr;
+ dkdata->syncaddr.buf = malloc(syncaddr->len);
+ if (dkdata->syncaddr.buf == NULL)
+ goto err_out;
+
+ (void) memcpy(dkdata->syncaddr.buf, syncaddr->buf,
+ syncaddr->len);
+ secdata->data = (caddr_t)dkdata;
+ break;
+
+ default:
+ goto err_out;
+ }
+
+ return (secdata);
+
+err_out:
+ free(secdata);
+ return (NULL);
+}
+
+static int
+make_secure(struct nfs_args *args, char *hostname, struct netconfig *nconf,
+ rpcvers_t vers, nfs_mnt_data_t *nmdp)
+{
+ sec_data_t *secdata;
+ int flags;
+ struct netbuf *syncaddr = NULL;
+ struct nd_addrlist *retaddrs = NULL;
+ char netname[MAXNETNAMELEN+1];
+
+ /*
+ * check to see if any secure mode is requested.
+ * if not, use default security mode.
+ */
+ if (!nmdp->nmd_sec_opt) {
+ /* AUTH_UNIX is the default. */
+ strlcpy(nmdp->nmd_nfs_sec.sc_name, "sys", MAX_NAME_LEN);
+ nmdp->nmd_nfs_sec.sc_nfsnum = 1;
+ args->flags |= NFSMNT_SECDEFAULT;
+ }
+
+ /*
+ * Get the network address for the time service on the server.
+ * If an RPC based time service is not available then try the
+ * IP time service.
+ *
+ * This is for AUTH_DH processing. We will also pass down syncaddr
+ * and netname for NFS V4 even if AUTH_DH is not requested right now.
+ * NFS V4 does security negotiation in the kernel via SECINFO.
+ * This information might be needed later in the kernel.
+ */
+ flags = 0;
+ syncaddr = NULL;
+
+ if (nmdp->nmd_nfs_sec.sc_rpcnum == AUTH_DH || vers == NFS_V4) {
+ /*
+ * If using nfsv4, we will not contact the remote RPCBINDer,
+ * since it is possibly behind a firewall.
+ */
+ if (vers != NFS_V4)
+ syncaddr = get_the_addr(hostname, RPCBPROG, RPCBVERS,
+ nconf, 0, NULL, NULL, NULL, NULL);
+
+ if (syncaddr != NULL) {
+ /* for flags in sec_data */
+ flags |= AUTH_F_RPCTIMESYNC;
+ } else {
+ /*
+ * TBD:
+ * For AUTH_DH (AUTH_DES) netdir_getbyname wants to
+ * lookup the timeserver entry in the /etc/services
+ * file (but our libnsl to do this won't work in Linux).
+ * That entry is:
+ * timed 525/udp timeserver
+ * Since we haven't implemented the emulation for that
+ * aspect of netdir_getbyname yet, we'll simply return
+ * an error.
+ */
+ struct nd_hostserv hs;
+ int error;
+
+ hs.h_host = hostname;
+ hs.h_serv = "timserver";
+
+ if (nmdp->nmd_nfs_sec.sc_rpcnum == AUTH_DH)
+ return (-1);
+
+ error = netdir_getbyname(nconf, &hs, &retaddrs);
+
+ if (error != ND_OK &&
+ (nmdp->nmd_nfs_sec.sc_rpcnum == AUTH_DH))
+ return (-1);
+
+ if (error == ND_OK)
+ syncaddr = retaddrs->n_addrs;
+
+ /*
+ * For NFS_V4 if AUTH_DH is negotiated later in the
+ * kernel thru SECINFO, it will need syncaddr
+ * and netname data.
+ */
+ if (vers == NFS_V4 && syncaddr &&
+ host2netname(netname, hostname, NULL)) {
+ args->syncaddr = malloc(sizeof (struct netbuf));
+ args->syncaddr->buf = malloc(syncaddr->len);
+ (void) memcpy(args->syncaddr->buf,
+ syncaddr->buf, syncaddr->len);
+ args->syncaddr->len = syncaddr->len;
+ args->syncaddr->maxlen = syncaddr->maxlen;
+ args->netname = strdup(netname);
+ args->flags |= NFSMNT_SECURE;
+ }
+ }
+ }
+
+ /*
+ * For the initial chosen flavor (any flavor defined in nfssec.conf),
+ * the data will be stored in the sec_data structure via
+ * nfs_clnt_secdata() and be passed to the kernel via nfs_args_*
+ * extended data structure.
+ */
+ if (!(secdata = nfs_clnt_secdata(&nmdp->nmd_nfs_sec, hostname,
+ args->knconf, syncaddr, flags))) {
+ if (flags & AUTH_F_RPCTIMESYNC) {
+ free(syncaddr->buf);
+ free(syncaddr);
+ } else if (retaddrs)
+ netdir_free((void *)retaddrs, ND_ADDRLIST);
+ return (-1);
+ }
+
+ NFS_ARGS_EXTB_secdata(args, secdata);
+ if (flags & AUTH_F_RPCTIMESYNC) {
+ free(syncaddr->buf);
+ free(syncaddr);
+ } else if (retaddrs)
+ netdir_free((void *)retaddrs, ND_ADDRLIST);
+ return (0);
+}
+
+/*
+ * Get the network address on "hostname" for program "prog"
+ * with version "vers" by using the nconf configuration data
+ * passed in.
+ *
+ * If the address of a netconfig pointer is null then
+ * information is not sufficient and no netbuf will be returned.
+ *
+ * Finally, ping the null procedure of that service.
+ *
+ * A similar routine is also defined in ../../autofs/autod_nfs.c.
+ * This is a potential routine to move to ../lib for common usage.
+ */
+static struct netbuf *
+get_the_addr(char *hostname, rpcprog_t prog, rpcvers_t vers,
+ struct netconfig *nconf, ushort_t port, struct t_info *tinfo,
+ caddr_t *fhp, char *fspath, err_ret_t *error)
+{
+ struct netbuf *nb = NULL;
+ struct t_bind *tbind = NULL;
+ CLIENT *cl = NULL;
+ int fd = -1;
+ AUTH *ah = NULL;
+ AUTH *new_ah = NULL;
+ struct rpc_err r_err;
+ enum clnt_stat rc;
+
+ if (nconf == NULL)
+ return (NULL);
+
+ if ((fd = t_open(nconf->nc_device, O_RDWR, tinfo)) == -1)
+ goto done;
+
+ /* LINTED pointer alignment */
+ if ((tbind = (struct t_bind *)t_alloc(fd, T_BIND, T_ADDR)) == NULL)
+ goto done;
+
+ if (vers == NFS_V4) {
+ struct nd_hostserv hs;
+ struct nd_addrlist *retaddrs;
+ int retval;
+ hs.h_host = hostname;
+
+ /* NFS where vers==4 does not support UDP */
+ if (strncasecmp(nconf->nc_proto, NC_UDP,
+ strlen(NC_UDP)) == 0) {
+ SET_ERR_RET(error, ERR_PROTO_UNSUPP, 0);
+ goto done;
+ }
+
+ if (port == 0)
+ hs.h_serv = "nfs";
+ else
+ hs.h_serv = NULL;
+
+ if ((retval = netdir_getbyname(nconf, &hs, &retaddrs))
+ != ND_OK) {
+ /*
+ * Carefully set the error value here. Want to signify
+ * that the error was an unknown host.
+ */
+ if (retval == ND_NOHOST) {
+ SET_ERR_RET(error, ERR_NOHOST, retval);
+ }
+
+ goto done;
+ }
+ memcpy(tbind->addr.buf, retaddrs->n_addrs->buf,
+ retaddrs->n_addrs->len);
+ tbind->addr.len = retaddrs->n_addrs->len;
+ netdir_free((void *)retaddrs, ND_ADDRLIST);
+ (void) netdir_options(nconf, ND_SET_RESERVEDPORT, fd, NULL);
+
+ } else {
+ if (rpcb_getaddr(prog, vers, nconf, &tbind->addr,
+ hostname) == FALSE) {
+ goto done;
+ }
+ }
+
+ if (port) {
+ /* LINTED pointer alignment */
+ if (strcmp(nconf->nc_protofmly, NC_INET) == 0)
+ ((struct sockaddr_in *)tbind->addr.buf)->sin_port
+ = port;
+ else if (strcmp(nconf->nc_protofmly, NC_INET6) == 0)
+ ((struct sockaddr_in6 *)tbind->addr.buf)->sin6_port
+ = port;
+
+ }
+
+ cl = clnt_tli_create(fd, nconf, &tbind->addr, prog, vers, 0, 0);
+ if (cl == NULL) {
+ /*
+ * clnt_tli_create() returns either RPC_SYSTEMERROR,
+ * RPC_UNKNOWNPROTO or RPC_TLIERROR. The RPC_TLIERROR translates
+ * to "Misc. TLI error". This is not too helpful. Most likely
+ * the connection to the remote server timed out, so this
+ * error is at least less perplexing.
+ * See: usr/src/cmd/rpcinfo/rpcinfo.c
+ */
+ if (rpc_createerr.cf_stat == RPC_TLIERROR) {
+ SET_ERR_RET(error, ERR_RPCERROR, RPC_PMAPFAILURE);
+ } else {
+ SET_ERR_RET(error, ERR_RPCERROR, rpc_createerr.cf_stat);
+ }
+ goto done;
+ }
+
+ ah = authsys_create_default();
+ if (ah != NULL)
+ cl->cl_auth = ah;
+
+ /*
+ * NULL procedures need not have an argument or
+ * result param.
+ */
+ if (vers == NFS_VERSION)
+ rc = wnlproc_null_2(NULL, NULL, cl);
+ else if (vers == NFS_V3)
+ rc = wnlproc3_null_3(NULL, NULL, cl);
+ else
+ rc = wnlproc4_null_4(NULL, NULL, cl);
+
+ if (rc != RPC_SUCCESS) {
+ clnt_geterr(cl, &r_err);
+ if (strcmp(nconf->nc_protofmly, NC_LOOPBACK) == 0) {
+ switch (r_err.re_status) {
+ case RPC_TLIERROR:
+ case RPC_CANTRECV:
+ case RPC_CANTSEND:
+ r_err.re_status = RPC_PROGVERSMISMATCH;
+ break;
+ default:
+ break;
+ }
+ }
+ SET_ERR_RET(error, ERR_RPCERROR, r_err.re_status);
+ goto done;
+ }
+
+ /*
+ * Make a copy of the netbuf to return
+ */
+ nb = (struct netbuf *)malloc(sizeof (*nb));
+ if (nb == NULL)
+ goto done;
+
+ *nb = tbind->addr;
+ nb->buf = (char *)malloc(nb->maxlen);
+ if (nb->buf == NULL) {
+ free(nb);
+ nb = NULL;
+ goto done;
+ }
+ (void) memcpy(nb->buf, tbind->addr.buf, tbind->addr.len);
+
+done:
+ if (cl) {
+ if (ah != NULL) {
+ if (new_ah != NULL)
+ AUTH_DESTROY(ah);
+ AUTH_DESTROY(cl->cl_auth);
+ cl->cl_auth = NULL;
+ }
+ clnt_destroy(cl);
+ cl = NULL;
+ }
+ if (tbind) {
+ t_free((char *)tbind, T_BIND);
+ tbind = NULL;
+ }
+ if (fd >= 0)
+ (void) t_close(fd);
+ return (nb);
+}
+
+static int
+check_nconf(struct netconfig *nconf, int nthtry, int *valid_proto)
+{
+ int try_test = 0;
+ char *proto = NULL;
+
+ if (nthtry == FIRST_TRY) {
+ try_test = ((nconf->nc_semantics == NC_TPI_COTS_ORD) ||
+ (nconf->nc_semantics == NC_TPI_COTS));
+ proto = NC_TCP;
+ } else if (nthtry == SECOND_TRY) {
+ try_test = (nconf->nc_semantics == NC_TPI_CLTS);
+ proto = NC_UDP;
+ }
+
+ if (proto &&
+ (strcmp(nconf->nc_protofmly, NC_INET) == 0 ||
+ strcmp(nconf->nc_protofmly, NC_INET6) == 0) &&
+ (strcmp(nconf->nc_proto, proto) == 0))
+ *valid_proto = TRUE;
+ else
+ *valid_proto = FALSE;
+
+ return (try_test);
+}
+
+static struct netconfig *
+netconfig_dup(struct netconfig *netconfigp)
+{
+ struct netconfig *nconf;
+
+ nconf = calloc(1, sizeof (struct netconfig));
+ if (nconf == NULL)
+ goto nomem;
+
+ if ((nconf->nc_netid = strdup(netconfigp->nc_netid)) == NULL)
+ goto nomem;
+
+ if ((nconf->nc_protofmly = strdup(netconfigp->nc_protofmly)) == NULL)
+ goto nomem;
+
+ if ((nconf->nc_proto = strdup(netconfigp->nc_proto)) == NULL)
+ goto nomem;
+
+ if ((nconf->nc_device = strdup(netconfigp->nc_device)) == NULL)
+ goto nomem;
+
+ nconf->nc_lookups = NULL;
+ nconf->nc_nlookups = netconfigp->nc_nlookups;
+ nconf->nc_flag = netconfigp->nc_flag;
+ nconf->nc_semantics = netconfigp->nc_semantics;
+ return (nconf);
+
+nomem:
+ if (nconf != NULL) {
+ free(nconf->nc_netid);
+ free(nconf->nc_protofmly);
+ free(nconf->nc_proto);
+ free(nconf->nc_device);
+ free(nconf);
+ }
+ return (NULL);
+}
+
+/*
+ * Get a network address on "hostname" for program "prog"
+ * with version "vers". If the port number is specified (non zero)
+ * then try for a TCP/UDP transport and set the port number of the
+ * resulting IP address.
+ *
+ * If the address of a netconfig pointer was passed and
+ * if it's not null, use it as the netconfig otherwise
+ * assign the address of the netconfig that was used to
+ * establish contact with the service.
+ *
+ * "error" refers to a more descriptive term when get_addr fails
+ * and returns NULL: ERR_PROTO_NONE if no error introduced by
+ * -o proto option, ERR_NETPATH if error found in NETPATH
+ * environment variable, ERR_PROTO_INVALID if an unrecognized
+ * protocol is specified by user, and ERR_PROTO_UNSUPP for a
+ * recognized but invalid protocol (eg. ticlts, ticots, etc.).
+ * "error" is ignored if get_addr returns non-NULL result.
+ *
+ */
+static struct netbuf *
+get_addr(char *hostname, rpcprog_t prog, rpcvers_t vers,
+ struct netconfig **nconfp, char *proto, ushort_t port,
+ struct t_info *tinfo, caddr_t *fhp, char *fspath, err_ret_t *error)
+{
+ struct netbuf *nb = NULL;
+ struct netconfig *nconf = NULL;
+ int nci;
+ int nthtry = FIRST_TRY;
+ err_ret_t errsave_nohost, errsave_rpcerr;
+
+ SET_ERR_RET(&errsave_nohost, ERR_PROTO_NONE, 0);
+ SET_ERR_RET(&errsave_rpcerr, ERR_PROTO_NONE, 0);
+
+ SET_ERR_RET(error, ERR_PROTO_NONE, 0);
+
+ if (nconfp && *nconfp)
+ return (get_the_addr(hostname, prog, vers, *nconfp, port,
+ tinfo, fhp, fspath, error));
+ /*
+ * No nconf passed in.
+ *
+ * First search for COTS, second for CLTS unless proto
+ * is specified. When we retry, we reset the
+ * netconfig list so that we would search the whole list
+ * all over again.
+ */
+
+ /*
+ * If proto is specified, then only search for the match,
+ * otherwise try COTS first, if failed, try CLTS.
+ */
+ if (proto) {
+ /* no matching proto name */
+ SET_ERR_RET(error, ERR_PROTO_INVALID, 0);
+
+ for (nci = 0; nci < N_NETCONF_ENTS; nci++) {
+ nconf = &nca[nci];
+ if (strcmp(nconf->nc_netid, proto) != 0)
+ continue;
+
+ /* may be unsupported */
+ SET_ERR_RET(error, ERR_PROTO_UNSUPP, 0);
+
+ nb = get_the_addr(hostname, prog, vers, nconf, port,
+ tinfo, fhp, fspath, error);
+ if (nb != NULL)
+ break;
+
+ /* nb is NULL - deal with errors */
+ if (error) {
+ if (error->error_type == ERR_NOHOST) {
+ SET_ERR_RET(&errsave_nohost,
+ error->error_type,
+ error->error_value);
+ }
+ if (error->error_type == ERR_RPCERROR) {
+ SET_ERR_RET(&errsave_rpcerr,
+ error->error_type,
+ error->error_value);
+ }
+ }
+
+ /* continue with same protocol selection */
+ continue;
+ } /* end of while */
+
+ if (nci >= N_NETCONF_ENTS)
+ goto done;
+
+ if (nb == NULL &&
+ (nb = get_the_addr(hostname, prog, vers, nconf, port,
+ tinfo, fhp, fspath, error)) == NULL)
+ goto done;
+ } else {
+retry:
+ SET_ERR_RET(error, ERR_NETPATH, 0);
+ for (nci = 0; nci < N_NETCONF_ENTS; nci++) {
+ int valid_proto;
+
+ nconf = &nca[nci];
+ SET_ERR_RET(error, ERR_PROTO_NONE, 0);
+
+ if (check_nconf(nconf, nthtry, &valid_proto)) {
+ if (port != 0 && valid_proto != TRUE)
+ continue;
+
+ nb = get_the_addr(hostname, prog, vers, nconf,
+ port, tinfo, fhp, fspath, error);
+ if (nb != NULL)
+ break;
+
+ /* nb is NULL - deal with errors */
+ if (error) {
+ if (error->error_type == ERR_NOHOST) {
+ SET_ERR_RET(&errsave_nohost,
+ error->error_type,
+ error->error_value);
+ }
+
+ if (error->error_type == ERR_RPCERROR) {
+ SET_ERR_RET(&errsave_rpcerr,
+ error->error_type,
+ error->error_value);
+ }
+ }
+
+ /*
+ * Continue the same search path in the
+ * netconfig db until no more matched
+ * nconf.
+ */
+ }
+ }
+
+ if (nci >= N_NETCONF_ENTS) {
+ if (++nthtry <= MNT_PREF_LISTLEN)
+ goto retry;
+ goto done;
+ }
+
+ }
+ SET_ERR_RET(error, ERR_PROTO_NONE, 0);
+
+ /*
+ * Got nconf and nb. Now dup the netconfig structure
+ * and return it thru nconfp.
+ */
+ *nconfp = netconfig_dup(nconf);
+ if (*nconfp == NULL) {
+ free(nb);
+ nb = NULL;
+ }
+done:
+ if (nb == NULL) {
+ /*
+ * Check the saved errors. The RPC error has
+ * precedence over the no host error.
+ */
+ if (errsave_nohost.error_type != ERR_PROTO_NONE) {
+ SET_ERR_RET(error, errsave_nohost.error_type,
+ errsave_nohost.error_value);
+ }
+
+ if (errsave_rpcerr.error_type != ERR_PROTO_NONE) {
+ SET_ERR_RET(error, errsave_rpcerr.error_type,
+ errsave_rpcerr.error_value);
+ }
+ }
+
+ return (nb);
+}
+
+static int
+lx_nsl_set_sz_func()
+{
+ return (N_NETCONF_ENTS);
+}
+
+static struct netconfig *
+lx_get_ent_func(int pos)
+{
+ struct netconfig *nconf;
+
+ if (pos < 0 || pos >= N_NETCONF_ENTS)
+ return (NULL);
+
+ nconf = &nca[pos];
+ return (nconf);
+}
+
+/*
+ * Roughly based on the NFSv4 try_failover_table but used here for generic
+ * errno translation.
+ */
+static int
+rpcerr2errno(int rpcerr)
+{
+ switch (rpcerr) {
+ case RPC_INTR:
+ return (EINTR);
+ case RPC_TIMEDOUT:
+ return (ETIMEDOUT);
+ case RPC_VERSMISMATCH:
+ case RPC_PROGVERSMISMATCH:
+ case RPC_PROGUNAVAIL:
+ case RPC_PROCUNAVAIL:
+ case RPC_PMAPFAILURE:
+ case RPC_PROGNOTREGISTERED:
+ return (EPROTONOSUPPORT);
+ case RPC_AUTHERROR:
+ return (EACCES);
+ case RPC_UNKNOWNPROTO:
+ case RPC_UNKNOWNHOST:
+ return (EHOSTUNREACH);
+ case RPC_CANTENCODEARGS:
+ case RPC_CANTDECODERES:
+ case RPC_CANTDECODEARGS:
+ case RPC_CANTSEND:
+ case RPC_CANTRECV:
+ return (ECOMM);
+ case RPC_SYSTEMERROR:
+ return (ENOSR);
+ default:
+ return (EIO);
+ }
+}
+
+static int
+err2errno(int err)
+{
+ assert(err != ERR_RPCERROR);
+ switch (err) {
+ case ERR_PROTO_NONE:
+ case ERR_PROTO_INVALID:
+ case ERR_PROTO_UNSUPP:
+ return (EPROTONOSUPPORT);
+ case ERR_NETPATH:
+ return (ENOSR);
+ case ERR_NOHOST:
+ return (EHOSTUNREACH);
+ default:
+ return (EIO);
+ }
+}
+
+/*
+ * get fhandle of remote path from server's mountd
+ *
+ * Return a positive EAGAIN if the caller should retry and a -EAGAIN to
+ * indicate a fatal (Linux-oriented) error condition. Return other negative
+ * errno values to indicate different fatal errors.
+ */
+static int
+get_fh(struct nfs_args *args, char *fshost, char *fspath, int *versp,
+ struct netconfig **nconfp, ushort_t port, nfs_mnt_data_t *nmdp)
+{
+ struct fhstatus fhs;
+ struct mountres3 mountres3;
+ struct pathcnf p;
+ nfs_fh3 *fh3p;
+ struct timeval timeout = { 25, 0};
+ CLIENT *cl;
+ enum clnt_stat rpc_stat;
+ rpcvers_t outvers = 0;
+ rpcvers_t vers_to_try;
+ rpcvers_t vers_min = vers_min_default;
+ int count, i, *auths;
+
+ bzero(&fhs, sizeof (fhs));
+ bzero(&mountres3, sizeof (mountres3));
+ bzero(&p, sizeof (p));
+
+ switch (nmdp->nmd_nfsvers) {
+ case 2: /* version 2 specified try that only */
+ vers_to_try = MOUNTVERS_POSIX;
+ vers_min = MOUNTVERS;
+ break;
+ case 3: /* version 3 specified try that only */
+ vers_to_try = MOUNTVERS3;
+ vers_min = MOUNTVERS3;
+ break;
+ case 4: /* version 4 specified try that only */
+ /*
+ * This assignment is in the wrong version sequence.
+ * The above are MOUNT program and this is NFS
+ * program. However, it happens to work out since the
+ * two don't collide for NFSv4.
+ */
+ vers_to_try = NFS_V4;
+ vers_min = NFS_V4;
+ break;
+ default: /* no version specified, start with default */
+ /*
+ * If the retry version is set, use that. This will
+ * be set if the last mount attempt returned any other
+ * besides an RPC error.
+ */
+ if (nmdp->nmd_nfsretry_vers)
+ vers_to_try = nmdp->nmd_nfsretry_vers;
+ else {
+ vers_to_try = vers_max_default;
+ vers_min = vers_min_default;
+ }
+
+ break;
+ }
+
+ /*
+ * In the case of version 4, just NULL proc the server since
+ * there is no MOUNT program. If this fails, then decrease
+ * vers_to_try and continue on with regular MOUNT program
+ * processing.
+ */
+ if (vers_to_try == NFS_V4) {
+ int savevers = nmdp->nmd_nfsvers_to_use;
+ err_ret_t error;
+ int retval;
+ SET_ERR_RET(&error, ERR_PROTO_NONE, 0);
+
+ /* Let's hope for the best */
+ nmdp->nmd_nfsvers_to_use = NFS_V4;
+ retval = getaddr_nfs(args, fshost, nconfp,
+ fspath, port, &error, vers_min == NFS_V4, nmdp);
+
+ if (retval == RET_OK) {
+ *versp = nmdp->nmd_nfsvers_to_use = NFS_V4;
+ nmdp->nmd_fstype = MNTTYPE_NFS4;
+ args->fh = strdup(fspath);
+ if (args->fh == NULL) {
+ *versp = nmdp->nmd_nfsvers_to_use = savevers;
+ return (-EAGAIN);
+ }
+ return (0);
+ }
+ nmdp->nmd_nfsvers_to_use = savevers;
+
+ if (retval == RET_ERR && error.error_type == ERR_RPCERROR &&
+ error.error_value == RPC_PROGVERSMISMATCH &&
+ nmdp->nmd_nfsvers != 0) {
+ /*
+ * We had an explicit vers=N mount request which locked
+ * us in to that version, however the server does not
+ * support that version (and responded to tell us that).
+ */
+ return (-EPROTONOSUPPORT);
+ }
+
+ vers_to_try--;
+ /* If no more versions to try, let the user know. */
+ if (vers_to_try < vers_min) {
+ if (error.error_value == 0)
+ return (-EPROTONOSUPPORT);
+ return (-rpcerr2errno(error.error_value));
+ }
+
+ /*
+ * If we are here, there are more versions to try but
+ * there has been an error of some sort. If it is not
+ * an RPC error (e.g. host unknown), we just stop and
+ * return the error since the other versions would see
+ * the same error as well.
+ */
+ if (retval == RET_ERR && error.error_type != ERR_RPCERROR)
+ return (-err2errno(error.error_type));
+ }
+
+ while ((cl = clnt_create_vers(fshost, MOUNTPROG, &outvers,
+ vers_min, vers_to_try, NULL)) == NULL) {
+ if (rpc_createerr.cf_stat == RPC_UNKNOWNHOST)
+ return (-EAGAIN);
+
+ /*
+ * We don't want to downgrade version on lost packets
+ */
+ if ((rpc_createerr.cf_stat == RPC_TIMEDOUT) ||
+ (rpc_createerr.cf_stat == RPC_PMAPFAILURE))
+ return (EAGAIN);
+
+ /*
+ * back off and try the previous version - patch to the
+ * problem of version numbers not being contigous and
+ * clnt_create_vers failing (SunOS4.1 clients & SGI servers)
+ * The problem happens with most non-Sun servers who
+ * don't support mountd protocol #2. So, in case the
+ * call fails, we re-try the call anyway.
+ */
+ vers_to_try--;
+ if (vers_to_try < vers_min) {
+ if (rpc_createerr.cf_stat == RPC_PROGVERSMISMATCH)
+ return (-EAGAIN);
+
+ return (EAGAIN);
+ }
+ }
+ if (nmdp->nmd_posix && outvers < MOUNTVERS_POSIX) {
+ clnt_destroy(cl);
+ return (-EAGAIN);
+ }
+
+ if (__clnt_bindresvport(cl) < 0) {
+ clnt_destroy(cl);
+ return (EAGAIN);
+ }
+
+ if ((cl->cl_auth = authsys_create_default()) == NULL) {
+ clnt_destroy(cl);
+ return (EAGAIN);
+ }
+
+ switch (outvers) {
+ case MOUNTVERS:
+ case MOUNTVERS_POSIX:
+ *versp = nmdp->nmd_nfsvers_to_use = NFS_VERSION;
+ rpc_stat = clnt_call(cl, MOUNTPROC_MNT, xdr_dirpath,
+ (caddr_t)&fspath, xdr_fhstatus, (caddr_t)&fhs, timeout);
+ if (rpc_stat != RPC_SUCCESS) {
+ log_err("%s:%s: server not responding %s\n",
+ fshost, fspath, clnt_sperror(cl, ""));
+ clnt_destroy(cl);
+ return (EAGAIN);
+ }
+
+ if ((errno = fhs.fhs_status) != MNT_OK) {
+ clnt_destroy(cl);
+ return (-fhs.fhs_status);
+ }
+ args->fh = malloc(sizeof (fhs.fhstatus_u.fhs_fhandle));
+ if (args->fh == NULL)
+ return (-EAGAIN);
+
+ memcpy((caddr_t)args->fh, (caddr_t)&fhs.fhstatus_u.fhs_fhandle,
+ sizeof (fhs.fhstatus_u.fhs_fhandle));
+ if (!errno && nmdp->nmd_posix) {
+ rpc_stat = clnt_call(cl, MOUNTPROC_PATHCONF,
+ xdr_dirpath, (caddr_t)&fspath, xdr_ppathcnf,
+ (caddr_t)&p, timeout);
+ if (rpc_stat != RPC_SUCCESS) {
+ log_err("%s:%s: server not responding %s\n",
+ fshost, fspath, clnt_sperror(cl, ""));
+ free(args->fh);
+ clnt_destroy(cl);
+ return (EAGAIN);
+ }
+ if (_PC_ISSET(_PC_ERROR, p.pc_mask)) {
+ free(args->fh);
+ clnt_destroy(cl);
+ return (-EAGAIN);
+ }
+ args->flags |= NFSMNT_POSIX;
+ args->pathconf = malloc(sizeof (p));
+ if (args->pathconf == NULL) {
+ free(args->fh);
+ clnt_destroy(cl);
+ return (-EAGAIN);
+ }
+ memcpy((caddr_t)args->pathconf, (caddr_t)&p,
+ sizeof (p));
+ }
+ break;
+
+ case MOUNTVERS3:
+ *versp = nmdp->nmd_nfsvers_to_use = NFS_V3;
+ rpc_stat = clnt_call(cl, MOUNTPROC_MNT, xdr_dirpath,
+ (caddr_t)&fspath, xdr_mountres3, (caddr_t)&mountres3,
+ timeout);
+ if (rpc_stat != RPC_SUCCESS) {
+ log_err("%s:%s: server not responding %s\n",
+ fshost, fspath, clnt_sperror(cl, ""));
+ clnt_destroy(cl);
+ return (EAGAIN);
+ }
+
+ /*
+ * Assume here that most of the MNT3ERR_*
+ * codes map into E* errors. See the nfsstat enum for values.
+ */
+ if ((errno = mountres3.fhs_status) != MNT_OK) {
+ clnt_destroy(cl);
+ return (-mountres3.fhs_status);
+ }
+
+ fh3p = (nfs_fh3 *)malloc(sizeof (*fh3p));
+ if (fh3p == NULL)
+ return (-EAGAIN);
+
+ fh3p->fh3_length =
+ mountres3.mountres3_u.mountinfo.fhandle.fhandle3_len;
+ (void) memcpy(fh3p->fh3_u.data,
+ mountres3.mountres3_u.mountinfo.fhandle.fhandle3_val,
+ fh3p->fh3_length);
+ args->fh = (caddr_t)fh3p;
+ nmdp->nmd_fstype = MNTTYPE_NFS3;
+
+ /*
+ * Check the security flavor to be used.
+ *
+ * If "secure" or "sec=flavor" is a mount
+ * option, check if the server supports the "flavor".
+ * If the server does not support the flavor, return
+ * error.
+ *
+ * If no mount option is given then look for default auth
+ * (default auth entry in /etc/nfssec.conf) in the auth list
+ * returned from server. If default auth not found, then use
+ * the first supported security flavor (by the client) in the
+ * auth list returned from the server.
+ *
+ */
+ auths =
+ mountres3.mountres3_u.mountinfo.auth_flavors
+ .auth_flavors_val;
+ count =
+ mountres3.mountres3_u.mountinfo.auth_flavors
+ .auth_flavors_len;
+
+ if (count <= 0) {
+ clnt_destroy(cl);
+ return (-EAGAIN);
+ }
+
+ if (nmdp->nmd_sec_opt) {
+ for (i = 0; i < count; i++) {
+ if (auths[i] == nmdp->nmd_nfs_sec.sc_nfsnum)
+ break;
+ }
+ if (i == count)
+ goto autherr;
+ } else {
+ /* AUTH_UNIX is the default. */
+ strlcpy(nmdp->nmd_nfs_sec.sc_name, "sys", MAX_NAME_LEN);
+ nmdp->nmd_nfs_sec.sc_nfsnum = 1;
+ }
+ break;
+ default:
+ clnt_destroy(cl);
+ return (-EAGAIN);
+ }
+
+ clnt_destroy(cl);
+ return (0);
+
+autherr:
+ clnt_destroy(cl);
+ return (-EAGAIN);
+}
+
+/*
+ * Fill in the address for the server's NFS service and fill in a knetconfig
+ * structure for the transport that the service is available on.
+ *
+ * Return a positive EAGAIN if the caller should retry and a -EAGAIN to
+ * indicate a fatal (Linux-oriented) error condition. Return other negative
+ * errno values to indicate different fatal errors.
+ */
+static int
+getaddr_nfs(struct nfs_args *args, char *fshost, struct netconfig **nconfp,
+ char *fspath, ushort_t port, err_ret_t *error, bool_t print_rpcerror,
+ nfs_mnt_data_t *nmdp)
+{
+ struct stat sb;
+ struct netconfig *nconf;
+ struct knetconfig *knconfp;
+ struct t_info tinfo;
+ err_ret_t addr_error;
+
+ SET_ERR_RET(error, ERR_PROTO_NONE, 0);
+ SET_ERR_RET(&addr_error, ERR_PROTO_NONE, 0);
+
+ args->addr = get_addr(fshost, NFS_PROGRAM, nmdp->nmd_nfsvers_to_use,
+ nconfp, nmdp->nmd_nfs_proto, port, &tinfo, &args->fh, fspath,
+ &addr_error);
+
+ if (args->addr == NULL) {
+ switch (addr_error.error_type) {
+ case 0:
+ break;
+ case ERR_RPCERROR:
+ if (!print_rpcerror)
+ /* no error print at this time */
+ break;
+ log_err("%s NFS service not available %s\n", fshost,
+ clnt_sperrno(addr_error.error_value));
+ break;
+ case ERR_NETPATH:
+ log_err("%s: Error in NETPATH.\n", fshost);
+ break;
+ case ERR_PROTO_INVALID:
+ log_err("%s: NFS service does not recognize "
+ "protocol: %s.\n", fshost, nmdp->nmd_nfs_proto);
+ break;
+ case ERR_PROTO_UNSUPP:
+ if (nmdp->nmd_nfsvers ||
+ nmdp->nmd_nfsvers_to_use == NFS_VERSMIN) {
+ log_err("%s: NFS service does"
+ " not support protocol: %s.\n",
+ fshost, nmdp->nmd_nfs_proto);
+ }
+ break;
+ case ERR_NOHOST:
+ log_err("%s: %s\n", fshost, "Unknown host");
+ break;
+ default:
+ /* case ERR_PROTO_NONE falls through */
+ log_err("%s: NFS service not responding\n", fshost);
+ break;
+ }
+
+ SET_ERR_RET(error,
+ addr_error.error_type, addr_error.error_value);
+ if (addr_error.error_type == ERR_PROTO_NONE) {
+ return (EAGAIN);
+ } else if (addr_error.error_type == ERR_RPCERROR &&
+ !IS_UNRECOVERABLE_RPC(addr_error.error_value)) {
+ return (EAGAIN);
+ } else if (nmdp->nmd_nfsvers == 0 &&
+ addr_error.error_type == ERR_PROTO_UNSUPP &&
+ nmdp->nmd_nfsvers_to_use != NFS_VERSMIN) {
+ /*
+ * If no version is specified, and the error is due
+ * to an unsupported transport, then decrement the
+ * version and retry.
+ */
+ return (EAGAIN);
+ } else if (addr_error.error_type != ERR_RPCERROR) {
+ return (-err2errno(addr_error.error_type));
+ } else {
+ return (-rpcerr2errno(addr_error.error_value));
+ }
+ }
+ nconf = *nconfp;
+
+ if (stat(nconf->nc_device, &sb) < 0)
+ return (-ENOSR);
+
+ knconfp = (struct knetconfig *)malloc(sizeof (*knconfp));
+ if (!knconfp)
+ return (-ENOMEM);
+
+ knconfp->knc_semantics = nconf->nc_semantics;
+ knconfp->knc_protofmly = nconf->nc_protofmly;
+ knconfp->knc_proto = nconf->nc_proto;
+ knconfp->knc_rdev = sb.st_rdev;
+
+ /* make sure we don't overload the transport */
+ if (tinfo.tsdu > 0 && tinfo.tsdu < NFS_MAXDATA + NFS_RPC_HDR) {
+ args->flags |= (NFSMNT_RSIZE | NFSMNT_WSIZE);
+ if (args->rsize == 0 || args->rsize > tinfo.tsdu - NFS_RPC_HDR)
+ args->rsize = tinfo.tsdu - NFS_RPC_HDR;
+ if (args->wsize == 0 || args->wsize > tinfo.tsdu - NFS_RPC_HDR)
+ args->wsize = tinfo.tsdu - NFS_RPC_HDR;
+ }
+
+ args->flags |= NFSMNT_KNCONF;
+ args->knconf = knconfp;
+ return (0);
+}
+
+static int
+retry(struct mnttab *mntp, int mntflags, nfs_mnt_data_t *nmdp)
+{
+ int delay = 5;
+ int count = nmdp->nmd_retries;
+ int r = -EAGAIN;
+ char *p;
+
+ if (nmdp->nmd_bg) {
+ if (fork() > 0)
+ return (0);
+ } else {
+ p = strchr(mntp->mnt_special, ':');
+ if (p != NULL)
+ *p = '\0';
+ log_err("%s: server not responding\n", mntp->mnt_special);
+ if (p != NULL)
+ *p = ':';
+ }
+
+ while (count--) {
+ err_ret_t retry_error;
+
+ if ((r = mount_nfs(mntp, mntflags, &retry_error, nmdp)) == 0)
+ return (0);
+
+ if (r != EAGAIN)
+ break;
+
+ if (count > 0) {
+ (void) sleep(delay);
+ delay *= 2;
+ if (delay > 120)
+ delay = 120;
+ }
+ p = strchr(mntp->mnt_special, ':');
+ if (p != NULL)
+ *p = '\0';
+ log_err("%s: server not responding\n", mntp->mnt_special);
+ if (p != NULL)
+ *p = ':';
+ }
+
+ if (!nmdp->nmd_nfsretry_vers)
+ log_err("giving up on: %s\n", mntp->mnt_mountp);
+
+ if (r > 0)
+ r = -EAGAIN;
+ return (r);
+}
+
+static int
+append_opt(char *optstr, int len, char *k, char *v)
+{
+ int i;
+
+ for (i = 0; nmo_tab[i].nmo_lx_opt != NULL; i++) {
+ if (strcmp(k, nmo_tab[i].nmo_lx_opt) == 0) {
+ switch (nmo_tab[i].nmo_argtyp) {
+ case MOUNT_OPT_INVALID:
+ lx_unsupported("invalid NFS mount option: %s",
+ k);
+ return (-EINVAL);
+
+ case MOUNT_OPT_PASTHRU:
+ if (*optstr != '\0')
+ (void) strlcat(optstr, ",", len);
+ if (v == NULL) {
+ (void) strlcat(optstr, k, len);
+ } else {
+ (void) strlcat(optstr, k, len);
+ (void) strlcat(optstr, "=", len);
+ (void) strlcat(optstr, v, len);
+ }
+ break;
+
+ case MOUNT_OPT_IGNORE:
+ break;
+
+ case MOUNT_OPT_TOKEN:
+ if (*optstr != '\0')
+ (void) strlcat(optstr, ",", len);
+ (void) strlcat(optstr,
+ nmo_tab[i].nmo_il_opt, len);
+ break;
+
+ case MOUNT_OPT_HAS_ARG:
+ if (*optstr != '\0')
+ (void) strlcat(optstr, ",", len);
+ (void) strlcat(optstr,
+ nmo_tab[i].nmo_il_opt, len);
+ (void) strlcat(optstr, "=", len);
+ (void) strlcat(optstr, v, len);
+ break;
+ }
+ break;
+ }
+ }
+
+ return (0);
+}
+
+static int
+get_nfs_kv(char *vs, char **kp, char **vp)
+{
+ char *p;
+
+ p = strchr(vs, '=');
+ if (p == NULL) {
+ *kp = vs;
+ return (1);
+ }
+
+ *vp = p + 1;
+ *p = '\0';
+ *kp = vs;
+ return (0);
+}
+
+/*
+ * Convert the Linux-specific opt string into an Illumos opt string. We also
+ * fix up the special string (host:/path) to use the address that the
+ * user-level mount code has looked up. This overwrites both the srcp special
+ * string and the mntopts string.
+ *
+ * example input string, given 'nolock' as the only user-level option:
+ * nolock,vers=4,addr=127.0.0.1,clientaddr=0.0.0.0
+ *
+ * opt string (all one line) from a Centos 6 distro given 'nolock,vers=3' as
+ * the explicit options:
+ * nolock,addr=127.0.0.1,vers=3,proto=tcp,mountvers=3,mountproto=tcp,
+ * mountport=1892
+ *
+ * This is an example emitted by the Ubuntu 14.04 automounter for an explicit
+ * v3 mount:
+ * timeo=60,soft,intr,sloppy,addr=10.88.88.200,vers=3,proto=tcp,
+ * mountvers=3,mountproto=tcp,mountport=63484
+ */
+static int
+convert_nfs_arg_str(char *srcp, char *mntopts)
+{
+ char *key, *val, *p;
+ char tmpbuf[MAX_MNTOPT_STR];
+ char *tbp = tmpbuf;
+ boolean_t no_sec = B_TRUE;
+
+ (void) strlcpy(tmpbuf, mntopts, sizeof (tmpbuf));
+ *mntopts = '\0';
+
+ while ((p = strsep(&tbp, ",")) != NULL) {
+ int tok;
+
+ tok = get_nfs_kv(p, &key, &val);
+
+ if (tok == 0) {
+ if (strcmp(key, "addr") == 0) {
+ /*
+ * The Linux user-level code looked up the
+ * address of the NFS server. We need to
+ * substitute that into the special string.
+ */
+ char *pp;
+ char spec[MAXPATHLEN + LX_NMD_MAXHOSTNAMELEN
+ + 1];
+
+ strlcpy(spec, srcp, sizeof (spec));
+ pp = strchr(spec, ':');
+ if (pp == NULL)
+ return (-EINVAL);
+
+ pp++;
+ (void) snprintf(srcp,
+ MAXPATHLEN + LX_NMD_MAXHOSTNAMELEN + 1,
+ "%s:%s", val, pp);
+ } else if (strcmp(key, "clientaddr") == 0) {
+ /*
+ * Ignore, this is an artifact of the
+ * user-level lx mount code.
+ */
+ } else if (strcmp(key, "vers") == 0) {
+ /*
+ * This may be implicitly or explicitly passed.
+ * Check for the versions we want to support.
+ */
+ int r;
+ int v = atoi(val);
+
+ if (v != 3 && v != 4)
+ return (-EINVAL);
+
+ r = append_opt(mntopts, MAX_MNTOPT_STR,
+ key, val);
+ if (r != 0)
+ return (r);
+
+ } else if (strcmp(key, "sec") == 0) {
+ no_sec = B_FALSE;
+ } else {
+ int r;
+
+ r = append_opt(mntopts, MAX_MNTOPT_STR,
+ key, val);
+ if (r != 0)
+ return (r);
+ }
+ } else {
+ int r;
+
+ r = append_opt(mntopts, MAX_MNTOPT_STR, key, NULL);
+ if (r != 0)
+ return (r);
+ }
+ }
+
+ if (no_sec) {
+ /*
+ * XXX Temporarily work around missing DES auth by defaulting
+ * to sec=sys.
+ */
+ int r;
+
+ r = append_opt(mntopts, MAX_MNTOPT_STR, "sec", "sys");
+ if (r != 0)
+ return (r);
+ }
+
+ return (0);
+}
+
+int
+lx_nfs_mount(char *srcp, char *mntp, char *fst, int lx_flags, char *opts)
+{
+ struct mnttab mnt;
+ int r;
+ int il_flags = 0;
+ err_ret_t retry_error;
+ nfs_mnt_data_t nmd;
+
+ _nsl_brand_set_hooks(lx_nsl_set_sz_func, lx_get_ent_func);
+
+ bzero(&nmd, sizeof (nmd));
+ nmd.nmd_retries = BIGRETRY;
+ nmd.nmd_fstype = MNTTYPE_NFS;
+
+ /*
+ * This will modify the special string so that the hostname passed
+ * in will be replaced with the host address that the user-land code
+ * looked up. Thus the rest of the code down the mount_nfs path will
+ * be working with that IP address in places were it references the
+ * 'hostname'. This also converts the opts string so that we'll be
+ * dealing with Illumos options after this.
+ */
+ convert_nfs_arg_str(srcp, opts);
+
+ /* Linux seems to always allow overlay mounts */
+ il_flags |= MS_OVERLAY;
+
+ /* Convert some Linux flags to Illumos flags. */
+ if (lx_flags & LX_MS_RDONLY)
+ il_flags |= MS_RDONLY;
+ if (lx_flags & LX_MS_NOSUID)
+ il_flags |= MS_NOSUID;
+ if (lx_flags & LX_MS_REMOUNT)
+ il_flags |= MS_REMOUNT;
+
+ /*
+ * Convert some Linux flags to Illumos option strings.
+ */
+ if (lx_flags & LX_MS_STRICTATIME) {
+ /*
+ * The "strictatime" mount option ensures that none of the
+ * weaker atime-related mode options are in effect.
+ */
+ lx_flags &= ~(LX_MS_RELATIME | LX_MS_NOATIME);
+ }
+ if ((lx_flags & LX_MS_NODEV) &&
+ ((r = i_add_option("nodev", opts, MAX_MNTOPT_STR)) != 0))
+ return (r);
+ if ((lx_flags & LX_MS_NOEXEC) &&
+ ((r = i_add_option("noexec", opts, MAX_MNTOPT_STR)) != 0))
+ return (r);
+ if ((lx_flags & LX_MS_NOATIME) &&
+ ((r = i_add_option("noatime", opts, MAX_MNTOPT_STR)) != 0))
+ return (r);
+
+ mnt.mnt_special = srcp;
+ mnt.mnt_mountp = mntp;
+ mnt.mnt_mntopts = opts;
+
+ SET_ERR_RET(&retry_error, ERR_PROTO_NONE, 0);
+ r = mount_nfs(&mnt, il_flags, &retry_error, &nmd);
+
+ /* A negative errno return means we're done. */
+ if (r < 0)
+ return (r);
+
+ if (r == EAGAIN && nmd.nmd_retries) {
+ /*
+ * Check the error code from the last mount attempt. If it was
+ * an RPC error, then retry as is. Otherwise we retry with the
+ * nmd_nfsretry_vers set. It is set by decrementing
+ * nmd_nfsvers_to_use.
+ */
+ if (retry_error.error_type != 0) {
+ if (retry_error.error_type != ERR_RPCERROR) {
+ nmd.nmd_nfsretry_vers =
+ nmd.nmd_nfsvers_to_use =
+ nmd.nmd_nfsvers_to_use - 1;
+ if (nmd.nmd_nfsretry_vers < NFS_VERSMIN)
+ return (-EAGAIN);
+ }
+ }
+
+ r = retry(&mnt, il_flags, &nmd);
+ }
+
+ /* Convert positve EAGAIN into a valid errno. */
+ if (r > 0)
+ return (-EAGAIN);
+
+ return (0);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/pgrp.c b/usr/src/lib/brand/lx/lx_brand/common/pgrp.c
new file mode 100644
index 0000000000..33b4bb7667
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/pgrp.c
@@ -0,0 +1,172 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+
+long
+lx_getpgrp(void)
+{
+ int ret;
+
+ ret = getpgrp();
+
+ /*
+ * If the pgrp is that of the init process, return the value Linux
+ * expects.
+ */
+ if (ret == zoneinit_pid)
+ return (LX_INIT_PGID);
+
+ return ((ret == -1) ? -errno : ret);
+}
+
+long
+lx_getpgid(uintptr_t p1)
+{
+ pid_t spid;
+ int pid = (int)p1;
+ int ret;
+
+ if (pid < 0)
+ return (-ESRCH);
+
+ /*
+ * If the supplied pid matches that of the init process, return
+ * the pgid Linux expects.
+ */
+ if (pid == zoneinit_pid)
+ return (LX_INIT_PGID);
+
+ if ((ret = lx_lpid_to_spid(pid, &spid)) < 0)
+ return (ret);
+
+ ret = getpgid(spid);
+
+ /*
+ * If the pgid is that of the init process, return the value Linux
+ * expects.
+ */
+ if (ret == zoneinit_pid)
+ return (LX_INIT_PGID);
+
+ return ((ret == -1) ? -errno : ret);
+}
+
+long
+lx_setpgid(uintptr_t p1, uintptr_t p2)
+{
+ pid_t pid = (pid_t)p1;
+ pid_t pgid = (pid_t)p2;
+ pid_t spid, spgid;
+ int ret;
+
+ if (pid < 0)
+ return (-ESRCH);
+
+ if (pgid < 0)
+ return (-EINVAL);
+
+ if ((ret = lx_lpid_to_spid(pid, &spid)) < 0)
+ return (ret);
+
+ if (pgid == 0)
+ spgid = spid;
+ else if ((ret = lx_lpid_to_spid(pgid, &spgid)) < 0)
+ return (ret);
+
+ ret = setpgid(spid, spgid);
+
+ if (ret != 0 && errno == EPERM) {
+ /*
+ * On Linux, calling setpgid with a desired pgid that is equal
+ * to the current pgid of the process, no error is emitted.
+ * This differs slightly from illumos which will return EPERM.
+ *
+ * To emulate the Linux behavior, we check specifically for
+ * matching pgids if an EPERM is encountered.
+ */
+ if (spgid == getpgid(spid))
+ return (0);
+ else
+ return (-EPERM);
+ }
+
+ return ((ret == 0) ? 0 : -errno);
+}
+
+long
+lx_getsid(uintptr_t p1)
+{
+ pid_t spid;
+ int pid = (int)p1;
+ int ret;
+
+ if (pid < 0)
+ return (-ESRCH);
+
+ /*
+ * If the supplied matches that of the init process, return the value
+ * Linux expects.
+ */
+ if (pid == zoneinit_pid)
+ return (LX_INIT_SID);
+
+ if ((ret = lx_lpid_to_spid(pid, &spid)) < 0)
+ return (ret);
+
+ ret = getsid(spid);
+
+ /*
+ * If the sid is that of the init process, return the value Linux
+ * expects.
+ */
+ if (ret == zoneinit_pid)
+ return (LX_INIT_SID);
+
+ return ((ret == -1) ? -errno : ret);
+}
+
+long
+lx_setsid(void)
+{
+ int ret;
+
+ ret = setsid();
+
+ /*
+ * If the pgid is that of the init process, return the value Linux
+ * expects.
+ */
+ if (ret == zoneinit_pid)
+ return (LX_INIT_SID);
+
+ return ((ret == -1) ? -errno : ret);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/priority.c b/usr/src/lib/brand/lx/lx_brand/common/priority.c
new file mode 100644
index 0000000000..5974abe40e
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/priority.c
@@ -0,0 +1,117 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc. All rights reserved.
+ */
+
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_types.h>
+#include <sys/resource.h>
+#include <sys/lx_misc.h>
+#include <sched.h>
+
+/*
+ * The Linux syscall returns priorities in the range (lowest) 40-1 (highest)
+ * and then glibc adjusts these to the range -20 - 19.
+ */
+long
+lx_getpriority(uintptr_t p1, uintptr_t p2)
+{
+ int which = (int)p1;
+ id_t who = (id_t)p2;
+ int ret;
+
+ /*
+ * The only valid values for 'which' are positive integers, and unlike
+ * Solaris, linux doesn't support anything past PRIO_USER.
+ */
+ if (which < 0 || which > PRIO_USER)
+ return (-EINVAL);
+
+ lx_debug("\tgetpriority(%d, %d)", which, who);
+
+ errno = 0;
+
+ if ((which == PRIO_PROCESS) && (who == 1))
+ who = zoneinit_pid;
+
+ ret = getpriority(which, who);
+ if (ret == -1 && errno != 0) {
+ pid_t mypid = getpid();
+
+ if (which == PRIO_PROCESS &&
+ (who == mypid || who == 0 || who == P_MYID) &&
+ sched_getscheduler(mypid) == SCHED_RR) {
+ /*
+ * The getpriority kernel handling will always return
+ * an error if we're in the RT class. The zone itself
+ * won't be able to put itself or any of its processes
+ * into RT but if we put the whole zone into RT via
+ * the scheduling-class property, then getpriority will
+ * always fail. This breaks pam and prevents any login.
+ * Just pretend to be the highest priority.
+ */
+ return (1);
+ }
+
+ /*
+ * Linux does not return EINVAL for invalid 'who' values, it
+ * returns ESRCH instead. We already validated 'which' above.
+ */
+ if (errno == EINVAL)
+ errno = ESRCH;
+ return (-errno);
+ }
+
+ /*
+ * The return value of the getpriority syscall is biased by 20 to avoid
+ * returning negative values when successful.
+ */
+ return (20 - ret);
+}
+
+long
+lx_setpriority(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ int which = (int)p1;
+ id_t who = (id_t)p2;
+ int prio = (int)p3;
+ int rval;
+
+ if (which > PRIO_USER)
+ return (-EINVAL);
+
+ lx_debug("\tsetpriority(%d, %d, %d)", which, who, prio);
+
+ if ((which == PRIO_PROCESS) && (who == 1))
+ who = zoneinit_pid;
+
+ rval = setpriority(which, who, prio);
+
+ return ((rval == -1) ? -errno : rval);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/ptrace.c b/usr/src/lib/brand/lx/lx_brand/common/ptrace.c
new file mode 100644
index 0000000000..bb6e52a112
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/ptrace.c
@@ -0,0 +1,105 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc. All rights reserved.
+ */
+
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_signal.h>
+#include <sys/lx_thread.h>
+#include <sys/lwp.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <procfs.h>
+#include <sys/frame.h>
+#include <strings.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <sys/wait.h>
+#include <sys/auxv.h>
+#include <thread.h>
+#include <pthread.h>
+#include <synch.h>
+#include <elf.h>
+#include <ieeefp.h>
+#include <assert.h>
+#include <libintl.h>
+#include <lx_syscall.h>
+
+/*
+ * Much of the Linux ptrace(2) emulation is performed in the kernel, and there
+ * is a block comment in "lx_ptrace.c" that describes the facility in some
+ * detail.
+ */
+
+
+void
+lx_ptrace_stop_if_option(int option, boolean_t child, ulong_t msg,
+ ucontext_t *ucp)
+{
+ /*
+ * We call into the kernel to see if we need to stop for specific
+ * ptrace(2) events.
+ */
+ lx_debug("lx_ptrace_stop_if_option(%d, %s, %lu, %p)", option,
+ child ? "TRUE [child]" : "FALSE [parent]", msg, ucp);
+ if (ucp == NULL) {
+ ucp = (ucontext_t *)lx_find_brand_uc();
+ lx_debug("\tucp = %p", ucp);
+ }
+ if (syscall(SYS_brand, B_PTRACE_STOP_FOR_OPT, option, child, msg,
+ ucp) != 0) {
+ if (errno != ESRCH) {
+ /*
+ * This should _only_ fail if we are not traced, or do
+ * not have this option set.
+ */
+ lx_err_fatal("B_PTRACE_STOP_FOR_OPT failed: %s",
+ strerror(errno));
+ }
+ }
+}
+
+/*
+ * Signal to the in-kernel ptrace(2) subsystem that the next native fork() or
+ * thr_create() is part of an emulated fork(2) or clone(2). If PTRACE_CLONE
+ * was passed to clone(2), inherit_flag should be B_TRUE.
+ */
+void
+lx_ptrace_clone_begin(int option, boolean_t inherit_flag)
+{
+ lx_debug("lx_ptrace_clone_begin(%d, %sPTRACE_CLONE)", option,
+ inherit_flag ? "" : "!");
+ if (syscall(SYS_brand, B_PTRACE_CLONE_BEGIN, option,
+ inherit_flag) != 0) {
+ lx_err_fatal("B_PTRACE_CLONE_BEGIN failed: %s",
+ strerror(errno));
+ }
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/sched.c b/usr/src/lib/brand/lx/lx_brand/common/sched.c
new file mode 100644
index 0000000000..97d4625824
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/sched.c
@@ -0,0 +1,622 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/cred_impl.h>
+#include <sys/ucred.h>
+#include <ucred.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <errno.h>
+#include <sched.h>
+#include <strings.h>
+#include <pthread.h>
+#include <time.h>
+#include <thread.h>
+#include <alloca.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_sched.h>
+
+/* Linux only has three valid policies, SCHED_FIFO, SCHED_RR and SCHED_OTHER */
+static int
+validate_policy(int policy)
+{
+ switch (policy) {
+ case LX_SCHED_FIFO:
+ return (SCHED_FIFO);
+
+ case LX_SCHED_RR:
+ return (SCHED_RR);
+
+ case LX_SCHED_OTHER:
+ return (SCHED_OTHER);
+
+ default:
+ lx_debug("validate_policy: illegal policy: %d", policy);
+ return (-EINVAL);
+ }
+}
+
+/*
+ * Check to see if we have the permissions to set scheduler parameters and
+ * policy, based on Linux' demand that such commands fail with errno set to
+ * EPERM if the current euid is not the euid or ruid of the process in
+ * question.
+ */
+static int
+check_schedperms(pid_t pid)
+{
+ size_t sz;
+ ucred_t *cr;
+ uid_t euid;
+
+ euid = geteuid();
+
+ if (pid == getpid()) {
+ /*
+ * If we're the process to be checked, simply check the euid
+ * against our ruid.
+ */
+ if (euid != getuid())
+ return (-EPERM);
+
+ return (0);
+ }
+
+ /*
+ * We allocate a ucred_t ourselves rather than call ucred_get(3C)
+ * because ucred_get() calls malloc(3C), which the brand library cannot
+ * use. Because we allocate the space with SAFE_ALLOCA(), there's
+ * no need to free it when we're done.
+ */
+ sz = ucred_size();
+ cr = (ucred_t *)SAFE_ALLOCA(sz);
+
+ if (cr == NULL)
+ return (-ENOMEM);
+
+ /*
+ * If we can't access the process' credentials, fail with errno EPERM
+ * as the call would not have succeeded anyway.
+ */
+ if (syscall(SYS_ucredsys, UCREDSYS_UCREDGET, pid, cr) != 0)
+ return ((errno == EACCES) ? -EPERM : -errno);
+
+ if ((euid != ucred_geteuid(cr)) && (euid != ucred_getruid(cr)))
+ return (-EPERM);
+
+ return (0);
+}
+
+static int
+ltos_sparam(int policy, struct lx_sched_param *lsp, struct sched_param *sp)
+{
+ struct lx_sched_param ls;
+ int smin = sched_get_priority_min(policy);
+ int smax = sched_get_priority_max(policy);
+
+ if (uucopy(lsp, &ls, sizeof (struct lx_sched_param)) != 0)
+ return (-errno);
+
+ bzero(sp, sizeof (struct sched_param));
+
+ /*
+ * Linux has a fixed priority range, 0 - 99, which we need to convert to
+ * Solaris's dynamic range. Linux considers lower numbers to be
+ * higher priority, so we'll invert the priority within Solaris's range.
+ *
+ * The formula to convert between ranges is:
+ *
+ * L * (smax - smin)
+ * S = ----------------- + smin
+ * (lmax - lmin)
+ *
+ * where S is the Solaris equivalent of the linux priority L.
+ *
+ * To invert the priority, we use:
+ * S' = smax - S + smin
+ *
+ * Together, these two formulas become:
+ *
+ * L * (smax - smin)
+ * S = smax - ----------------- + 2smin
+ * 99
+ */
+ sp->sched_priority = smax -
+ ((ls.lx_sched_prio * (smax - smin)) / LX_PRI_MAX) + 2*smin;
+
+ lx_debug("ltos_sparam: linux prio %d = Solaris prio %d "
+ "(Solaris range %d,%d)\n", ls.lx_sched_prio, sp->sched_priority,
+ smin, smax);
+
+ return (0);
+}
+
+static int
+stol_sparam(int policy, struct sched_param *sp, struct lx_sched_param *lsp)
+{
+ struct lx_sched_param ls;
+ int smin = sched_get_priority_min(policy);
+ int smax = sched_get_priority_max(policy);
+
+ if (policy == SCHED_OTHER) {
+ /*
+ * In Linux, the only valid SCHED_OTHER scheduler priority is 0
+ */
+ ls.lx_sched_prio = 0;
+ } else {
+ /*
+ * Convert Solaris's dynamic, inverted priority range to the
+ * fixed Linux range of 1 - 99.
+ *
+ * The formula is (see above):
+ *
+ * (smax - s + 2smin) * 99
+ * l = -----------------------
+ * smax - smin
+ */
+ ls.lx_sched_prio = ((smax - sp->sched_priority + 2*smin) *
+ LX_PRI_MAX) / (smax - smin);
+ }
+
+ lx_debug("stol_sparam: policy %d: Solaris prio %d = linux prio %d "
+ "(Solaris range %d,%d)\n", policy,
+ sp->sched_priority, ls.lx_sched_prio, smin, smax);
+
+ return ((uucopy(&ls, lsp, sizeof (struct lx_sched_param)) != 0)
+ ? -errno : 0);
+}
+
+#define BITINDEX(ind) (ind / (sizeof (uint_t) * 8))
+#define BITSHIFT(ind) (1 << (ind % (sizeof (uint_t) * 8)))
+
+/* ARGSUSED */
+long
+lx_sched_getaffinity(uintptr_t pid, uintptr_t len, uintptr_t maskp)
+{
+ int sz;
+ uint_t *lmask, *zmask;
+ int i;
+
+ sz = syscall(SYS_brand, B_GET_AFFINITY_MASK, pid, len, maskp);
+ if (sz == -1)
+ return (-errno);
+
+ /*
+ * If the target LWP hasn't ever had an affinity mask set, the kernel
+ * will return a mask of all 0's. If that is the case we must build a
+ * default mask that has all valid bits turned on.
+ */
+ lmask = SAFE_ALLOCA(sz);
+ zmask = SAFE_ALLOCA(sz);
+ if (lmask == NULL || zmask == NULL)
+ return (-ENOMEM);
+
+ bzero(zmask, sz);
+
+ if (uucopy((void *)maskp, lmask, sz) != 0)
+ return (-EFAULT);
+
+ if (bcmp(lmask, zmask, sz) != 0)
+ return (sz);
+
+ for (i = 0; i < sz * 8; i++) {
+ if (p_online(i, P_STATUS) != -1) {
+ lmask[BITINDEX(i)] |= BITSHIFT(i);
+ }
+ }
+
+ if (uucopy(lmask, (void *)maskp, sz) != 0)
+ return (-EFAULT);
+
+ return (sz);
+}
+
+/* ARGSUSED */
+long
+lx_sched_setaffinity(uintptr_t pid, uintptr_t len, uintptr_t maskp)
+{
+ int ret;
+ int sz;
+ int i;
+ int found;
+ uint_t *lmask;
+ pid_t s_pid;
+ lwpid_t s_tid;
+ processorid_t cpuid = NULL;
+
+ if ((pid_t)pid < 0)
+ return (-EINVAL);
+
+ if (lx_lpid_to_spair(pid, &s_pid, &s_tid) < 0)
+ return (-ESRCH);
+
+ /*
+ * We only support setting affinity masks for threads in
+ * the calling process.
+ */
+ if (s_pid != getpid())
+ return (-EPERM);
+
+ /*
+ * First, get the minimum bitmask size from the kernel.
+ */
+ sz = syscall(SYS_brand, B_GET_AFFINITY_MASK, 0, 0, 0);
+ if (sz == -1)
+ return (-errno);
+
+ lmask = SAFE_ALLOCA(sz);
+ if (lmask == NULL)
+ return (-ENOMEM);
+
+ if (uucopy((void *)maskp, lmask, sz) != 0)
+ return (-EFAULT);
+
+ /*
+ * Make sure the mask contains at least one processor that is
+ * physically on the system. Reduce the user's mask to the set of
+ * physically present CPUs. Keep track of how many valid
+ * bits are set in the user's mask.
+ */
+
+ for (found = 0, i = 0; i < sz * 8; i++) {
+ if (p_online(i, P_STATUS) == -1) {
+ /*
+ * This CPU doesn't exist, so clear this bit from
+ * the user's mask.
+ */
+ lmask[BITINDEX(i)] &= ~BITSHIFT(i);
+ continue;
+ }
+
+ if ((lmask[BITINDEX(i)] & BITSHIFT(i)) == BITSHIFT(i)) {
+ found++;
+ cpuid = i;
+ }
+ }
+
+ if (found == 0) {
+ lx_debug("\tlx_sched_setaffinity: mask has no present CPUs\n");
+ return (-EINVAL);
+ }
+
+ /*
+ * If only one bit is set, bind the thread to that procesor;
+ * otherwise, clear the binding.
+ */
+ if (found == 1) {
+ lx_debug("\tlx_sched_setaffinity: binding thread %d to cpu%d\n",
+ s_tid, cpuid);
+ if (processor_bind(P_LWPID, s_tid, cpuid, NULL) != 0)
+ /*
+ * It could be that the requested processor is offline,
+ * so we'll just abandon our good-natured attempt to
+ * bind to it.
+ */
+ lx_debug("couldn't bind LWP %d to cpu %d: %s\n", s_tid,
+ cpuid, strerror(errno));
+ } else {
+ lx_debug("\tlx_sched_setaffinity: clearing thr %d binding\n",
+ s_tid);
+ if (processor_bind(P_LWPID, s_tid, PBIND_NONE, NULL) != 0) {
+ lx_debug("couldn't clear CPU binding for LWP %d: %s\n",
+ s_tid, strerror(errno));
+ }
+ }
+
+ /*
+ * Finally, ask the kernel to make a note of our current (though fairly
+ * meaningless) affinity mask.
+ */
+ ret = syscall(SYS_brand, B_SET_AFFINITY_MASK, pid, sz, lmask);
+
+ return ((ret == 0) ? 0 : -errno);
+}
+
+long
+lx_sched_getparam(uintptr_t pid, uintptr_t param)
+{
+ int policy, ret;
+ pid_t s_pid;
+ lwpid_t s_tid;
+
+ struct sched_param sp;
+
+ if (((pid_t)pid < 0) || (param == NULL))
+ return (-EINVAL);
+
+ if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
+ return (-ESRCH);
+
+ /*
+ * If we're attempting to get information on our own process, we can
+ * get data on a per-thread basis; if not, punt and use the specified
+ * pid.
+ */
+ if (s_pid == getpid()) {
+ if ((ret = pthread_getschedparam(s_tid, &policy, &sp)) != 0)
+ return (-ret);
+ } else {
+ if (sched_getparam(s_pid, &sp) == -1)
+ return (-errno);
+
+ if ((policy = sched_getscheduler(s_pid)) < 0)
+ return (-errno);
+ }
+
+ /*
+ * Make sure that any non-SCHED_FIFO non-SCHED_RR scheduler is mapped
+ * onto SCHED_OTHER.
+ */
+ if (policy != SCHED_FIFO && policy != SCHED_RR)
+ policy = SCHED_OTHER;
+
+ return (stol_sparam(policy, &sp, (struct lx_sched_param *)param));
+}
+
+long
+lx_sched_setparam(uintptr_t pid, uintptr_t param)
+{
+ int err, policy;
+ pid_t s_pid;
+ lwpid_t s_tid;
+ struct lx_sched_param lp;
+ struct sched_param sp;
+
+ if (((pid_t)pid < 0) || (param == NULL))
+ return (-EINVAL);
+
+ if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
+ return (-ESRCH);
+
+ if (s_pid == getpid()) {
+ struct sched_param dummy;
+
+ if ((err = pthread_getschedparam(s_tid, &policy, &dummy)) != 0)
+ return (-err);
+ } else
+ if ((policy = sched_getscheduler(s_pid)) < 0)
+ return (-errno);
+
+ lx_debug("sched_setparam(): current policy %d", policy);
+
+ if (uucopy((void *)param, &lp, sizeof (lp)) != 0)
+ return (-errno);
+
+ /*
+ * In Linux, the only valid SCHED_OTHER scheduler priority is 0
+ */
+ if ((policy == SCHED_OTHER) && (lp.lx_sched_prio != 0))
+ return (-EINVAL);
+
+ if ((err = ltos_sparam(policy, (struct lx_sched_param *)&lp,
+ &sp)) != 0)
+ return (err);
+
+ /*
+ * Check if we're allowed to change the scheduler for the process.
+ *
+ * If we're operating on a thread, we can't just call
+ * pthread_setschedparam() because as all threads reside within a
+ * single Solaris process, Solaris will allow the modification
+ *
+ * If we're operating on a process, we can't just call sched_setparam()
+ * because Solaris will allow the call to succeed if the scheduler
+ * parameters do not differ from those being installed, but Linux wants
+ * the call to fail.
+ */
+ if ((err = check_schedperms(s_pid)) != 0)
+ return (err);
+
+ if (s_pid == getpid())
+ return (((err = pthread_setschedparam(s_tid, policy, &sp)) != 0)
+ ? -err : 0);
+
+ return ((sched_setparam(s_pid, &sp) == -1) ? -errno : 0);
+}
+
+long
+lx_sched_rr_get_interval(uintptr_t pid, uintptr_t ts)
+{
+ pid_t s_pid;
+
+ if ((pid_t)pid < 0)
+ return (-EINVAL);
+
+ if (lx_lpid_to_spid((pid_t)pid, &s_pid) < 0)
+ return (-ESRCH);
+
+ if (sched_rr_get_interval(s_pid, (struct timespec *)ts) == 0)
+ return (0);
+ else
+ return (-errno);
+}
+
+long
+lx_sched_getscheduler(uintptr_t pid)
+{
+ int policy, rv;
+ pid_t s_pid;
+ lwpid_t s_tid;
+
+ if ((pid_t)pid < 0)
+ return (-EINVAL);
+
+ if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
+ return (-ESRCH);
+
+ if (s_pid == getpid()) {
+ struct sched_param dummy;
+
+ if ((rv = pthread_getschedparam(s_tid, &policy, &dummy)) != 0)
+ return (-rv);
+ } else
+ if ((policy = sched_getscheduler(s_pid)) < 0)
+ return (-errno);
+
+ /*
+ * Linux only supports certain policies; avoid confusing apps with
+ * alien policies.
+ */
+ switch (policy) {
+ case SCHED_FIFO:
+ return (LX_SCHED_FIFO);
+ case SCHED_OTHER:
+ return (LX_SCHED_OTHER);
+ case SCHED_RR:
+ return (LX_SCHED_RR);
+ default:
+ break;
+ }
+
+ return (LX_SCHED_OTHER);
+}
+
+long
+lx_sched_setscheduler(uintptr_t pid, uintptr_t policy, uintptr_t param)
+{
+ int rt_pol;
+ int rv;
+ pid_t s_pid;
+ lwpid_t s_tid;
+ struct lx_sched_param lp;
+
+ struct sched_param sp;
+
+ if (((pid_t)pid < 0) || (param == NULL))
+ return (-EINVAL);
+
+ if ((rt_pol = validate_policy((int)policy)) < 0)
+ return (rt_pol);
+
+ if ((rv = ltos_sparam(policy, (struct lx_sched_param *)param,
+ &sp)) != 0)
+ return (rv);
+
+ if (uucopy((void *)param, &lp, sizeof (lp)) != 0)
+ return (-errno);
+
+ if (rt_pol == LX_SCHED_OTHER) {
+ /*
+ * In Linux, the only valid SCHED_OTHER scheduler priority is 0
+ */
+ if (lp.lx_sched_prio != 0)
+ return (-EINVAL);
+
+ /*
+ * If we're already SCHED_OTHER, there's nothing else to do.
+ */
+ if (lx_sched_getscheduler(pid) == LX_SCHED_OTHER)
+ return (0);
+ }
+
+ if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
+ return (-ESRCH);
+
+ /*
+ * Check if we're allowed to change the scheduler for the process.
+ *
+ * If we're operating on a thread, we can't just call
+ * pthread_setschedparam() because as all threads reside within a
+ * single Solaris process, Solaris will allow the modification.
+ *
+ * If we're operating on a process, we can't just call
+ * sched_setscheduler() because Solaris will allow the call to succeed
+ * if the scheduler and scheduler parameters do not differ from those
+ * being installed, but Linux wants the call to fail.
+ */
+ if ((rv = check_schedperms(s_pid)) != 0)
+ return (rv);
+
+ if (s_pid == getpid()) {
+ struct sched_param param;
+ int pol;
+
+ if ((pol = sched_getscheduler(s_pid)) == -1)
+ return (-errno);
+
+ /*
+ * sched_setscheduler() returns the previous scheduling policy
+ * on success, so call pthread_getschedparam() to get the
+ * current thread's scheduling policy and return that if the
+ * call to pthread_setschedparam() succeeds.
+ */
+ if ((rv = pthread_getschedparam(s_tid, &pol, &param)) != 0)
+ return (-rv);
+
+ return (((rv = pthread_setschedparam(s_tid, rt_pol, &sp)) != 0)
+ ? -rv : pol);
+ }
+
+ return (((rv = sched_setscheduler(s_pid, rt_pol, &sp)) == -1)
+ ? -errno : rv);
+}
+
+long
+lx_sched_get_priority_min(uintptr_t policy)
+{
+ /*
+ * In Linux, the only valid SCHED_OTHER scheduler priority is 0.
+ * Linux scheduling priorities are not alterable, so there is no
+ * Solaris translation necessary.
+ */
+ switch (policy) {
+ case LX_SCHED_FIFO:
+ case LX_SCHED_RR:
+ return (LX_SCHED_PRIORITY_MIN_RRFIFO);
+ case LX_SCHED_OTHER:
+ return (LX_SCHED_PRIORITY_MIN_OTHER);
+ default:
+ break;
+ }
+ return (-EINVAL);
+}
+
+long
+lx_sched_get_priority_max(uintptr_t policy)
+{
+ /*
+ * In Linux, the only valid SCHED_OTHER scheduler priority is 0
+ * Linux scheduling priorities are not alterable, so there is no
+ * Solaris translation necessary.
+ */
+ switch (policy) {
+ case LX_SCHED_FIFO:
+ case LX_SCHED_RR:
+ return (LX_SCHED_PRIORITY_MAX_RRFIFO);
+ case LX_SCHED_OTHER:
+ return (LX_SCHED_PRIORITY_MAX_OTHER);
+ default:
+ break;
+ }
+ return (-EINVAL);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/sendfile.c b/usr/src/lib/brand/lx/lx_brand/common/sendfile.c
new file mode 100644
index 0000000000..1d7547b051
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/sendfile.c
@@ -0,0 +1,145 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * lx_sendfile() and lx_sendfile64() are primarily branded versions of the
+ * library calls available in the Solaris libsendfile (see sendfile(3EXT)).
+ */
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/sendfile.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+
+#if defined(_ILP32)
+long
+lx_sendfile(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
+{
+ sysret_t rval;
+ off_t off = 0;
+ off_t *offp = (off_t *)p3;
+ int error;
+ struct sendfilevec sfv;
+ size_t xferred = 0;
+ size_t sz = (size_t)p4;
+
+ if (offp == NULL) {
+ /* If no offp, we must use the current offset */
+ if ((off = lseek((int)p2, 0, SEEK_CUR)) == -1)
+ return (-errno);
+ } else {
+ if (sz > 0 && uucopy(offp, &off, sizeof (off)) != 0)
+ return (-errno);
+ if (off < 0)
+ return (-EINVAL);
+ }
+
+ sfv.sfv_fd = p2;
+ sfv.sfv_flag = 0;
+ sfv.sfv_off = off;
+ sfv.sfv_len = sz;
+ error = __systemcall(&rval, SYS_sendfilev, SENDFILEV, p1, &sfv,
+ 1, &xferred);
+
+ /* Suppress EAGAIN errors if we were able to write any data at all. */
+ if (error == EAGAIN && xferred > 0) {
+ error = 0;
+ rval.sys_rval1 = xferred;
+ }
+
+ if (error == 0 && xferred > 0) {
+ if (offp == NULL) {
+ /* If no offp, we must adjust current offset */
+ if (lseek((int)p2, xferred, SEEK_CUR) == -1)
+ return (-errno);
+ } else {
+ off += xferred;
+ if (uucopy(&off, offp, sizeof (off)) != 0) {
+ return (-EFAULT);
+ }
+ }
+ }
+
+ return (error ? -error : (int)rval.sys_rval1);
+}
+#endif
+
+long
+lx_sendfile64(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
+{
+ sysret_t rval;
+ off64_t off = 0;
+ off64_t *offp = (off64_t *)p3;
+ size_t sz = (size_t)p4;
+ int error;
+ struct sendfilevec64 sfv;
+ size_t xferred;
+
+ if (offp == NULL) {
+ /* If no offp, we must use the current offset */
+ if ((off = lseek((int)p2, 0, SEEK_CUR)) == -1)
+ return (-errno);
+ } else {
+ if (sz > 0 && uucopy(offp, &off, sizeof (off)) != 0)
+ return (-errno);
+ if (off < 0)
+ return (-EINVAL);
+ }
+
+ sfv.sfv_fd = p2;
+ sfv.sfv_flag = 0;
+ sfv.sfv_off = off;
+ sfv.sfv_len = sz;
+ xferred = 0;
+ error = __systemcall(&rval, SYS_sendfilev, SENDFILEV64, p1, &sfv,
+ 1, &xferred);
+
+ /* Suppress EAGAIN errors if we were able to write any data at all. */
+ if (error == EAGAIN && xferred > 0) {
+ error = 0;
+ rval.sys_rval1 = xferred;
+ }
+
+ if (error == 0 && xferred > 0) {
+ if (offp == NULL) {
+ /* If no offp, we must adjust current offset */
+ if (lseek((int)p2, xferred, SEEK_CUR) == -1)
+ return (-errno);
+ } else {
+ off += xferred;
+ if (uucopy(&off, offp, sizeof (off)) != 0) {
+ return (-EFAULT);
+ }
+ }
+ }
+
+ return (error ? -error : (int)rval.sys_rval1);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/signal.c b/usr/src/lib/brand/lx/lx_brand/common/signal.c
new file mode 100644
index 0000000000..80f1bf09a1
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/signal.c
@@ -0,0 +1,2381 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/segments.h>
+#include <sys/lx_types.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_poll.h>
+#include <sys/lx_signal.h>
+#include <sys/lx_sigstack.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_thread.h>
+#include <sys/syscall.h>
+#include <lx_provider_impl.h>
+#include <sys/stack.h>
+#include <assert.h>
+#include <errno.h>
+#include <poll.h>
+#include <rctl.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <thread.h>
+#include <ucontext.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <libintl.h>
+#include <ieeefp.h>
+#include <sys/signalfd.h>
+
+#if defined(_ILP32)
+extern int pselect_large_fdset(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0,
+ const timespec_t *tsp, const sigset_t *sp);
+#endif
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+/*
+ * Delivering signals to a Linux process is complicated by differences in
+ * signal numbering, stack structure and contents, and the action taken when a
+ * signal handler exits. In addition, many signal-related structures, such as
+ * sigset_ts, vary between Illumos and Linux.
+ *
+ * To support user-level signal handlers, the brand uses a double layer of
+ * indirection to process and deliver signals to branded threads.
+ *
+ * When a Linux process sends a signal using the kill(2) system call, we must
+ * translate the signal into the Illumos equivalent before handing control off
+ * to the standard signalling mechanism. When a signal is delivered to a Linux
+ * process, we translate the signal number from Illumos to back to Linux.
+ * Translating signals both at generation and delivery time ensures both that
+ * Illumos signals are sent properly to Linux applications and that signals'
+ * default behavior works as expected.
+ *
+ * In a normal Illumos process, signal delivery is interposed on for any thread
+ * registering a signal handler by libc. Libc needs to do various bits of magic
+ * to provide thread-safe critical regions, so it registers its own handler,
+ * named sigacthandler(), using the sigaction(2) system call. When a signal is
+ * received, sigacthandler() is called, and after some processing, libc turns
+ * around and calls the user's signal handler via a routine named
+ * call_user_handler().
+ *
+ * Adding a Linux branded thread to the mix complicates things somewhat.
+ *
+ * First, when a thread receives a signal, it may either be running in an
+ * emulated Linux context or a native illumos context. In either case, the
+ * in-kernel brand module is responsible for preserving the register state
+ * from the interrupted context, regardless of whether emulated or native
+ * software was running at the time. The kernel is also responsible for
+ * ensuring that the illumos native sigacthandler() is called with register
+ * values appropriate for native code. Of particular note is the %gs segment
+ * selector for 32-bit code, and the %fsbase segment base register for 64-bit
+ * code; these are used by libc to locate per-thread data structures.
+ *
+ * Second, the signal number translation referenced above must take place.
+ * Finally, when we hand control to the Linux signal handler we must do so
+ * on the brand stack, and with registers configured appropriately for the
+ * Linux application.
+ *
+ * This need to translate signal numbers (and manipulate the signal handling
+ * context) means that with standard Illumos libc, following a signal from
+ * generation to delivery looks something like:
+ *
+ * kernel ->
+ * sigacthandler() ->
+ * call_user_handler() ->
+ * user signal handler
+ *
+ * but for the brand's Linux threads, this would look like:
+ *
+ * kernel ->
+ * sigacthandler() ->
+ * call_user_handler() ->
+ * lx_call_user_handler() ->
+ * lx_sigdeliver() ->
+ * syscall(B_JUMP_TO_LINUX, ...) ->
+ * Linux user signal handler
+ *
+ * The new addtions are:
+ *
+ * lx_call_user_handler
+ * ====================
+ * This routine is responsible for translating Illumos signal numbers to
+ * their Linux equivalents, building a Linux signal stack based on the
+ * information Illumos has provided, and passing the stack to the
+ * registered Linux signal handler. It is, in effect, the Linux thread
+ * equivalent to libc's call_user_handler().
+ *
+ * lx_sigdeliver
+ * =============
+ *
+ * Note that none of this interposition is necessary unless a Linux thread
+ * registers a user signal handler, as the default action for all signals is the
+ * same between Illumos and Linux save for one signal, SIGPWR. For this reason,
+ * the brand ALWAYS installs its own internal signal handler for SIGPWR that
+ * translates the action to the Linux default, to terminate the process.
+ * (Illumos' default action is to ignore SIGPWR.)
+ *
+ * A notable behavior of lx_sigdeliver is that it must replace the stack
+ * pointer in the context that will be handed to the Linux signal handler.
+ * There is at least one application (mono) which inspects the SP in the
+ * context it receives and which fails when the SP is not within the thread's
+ * stack range. There is not much else within the context that a signal
+ * handler could depend on, so we only ensure that the SP is from the Linux
+ * stack and not the alternate stack. lx_sigdeliver will restore the correct
+ * SP when setcontext returns into this function as part of returning from
+ * the signal handler.
+ *
+ * It is also important to note that when signals are not translated, the brand
+ * relies upon code interposing upon the wait(2) system call to translate
+ * signals to their proper values for any Linux threads retrieving the status
+ * of others. So while the Illumos signal number for a particular signal is set
+ * in a process' data structures (and would be returned as the result of say,
+ * WTERMSIG()), the brand's interposiiton upon wait(2) is responsible for
+ * translating the value WTERMSIG() would return from a Illumos signal number
+ * to the appropriate Linux value.
+ *
+ * lx_call_user_handler() calls lx_sigdeliver() with a helper function
+ * (typically lx_build_signal_frame) which builds a stack frame for the 32-bit
+ * Linux signal handler, or populates a local (on the stack) structure for the
+ * 64-bit Linux signal handler. The stack at that time looks like this:
+ *
+ * =========================================================
+ * | | lx_sigdeliver_frame_t -- includes LX_SIGRT_MAGIC and |
+ * | | a return context for the eventual sigreturn(2) call |
+ * | =========================================================
+ * | | Linux signal frame (32-bit) or local data |
+ * V | (64-bit) built by stack_builder() |
+ * =========================================================
+ *
+ * The process of returning to an interrupted thread of execution from a user
+ * signal handler is entirely different between Illumos and Linux. While
+ * Illumos generally expects to set the context to the interrupted one on a
+ * normal return from a signal handler, in the normal case Linux instead calls
+ * code that calls a specific Linux system call, rt_sigreturn(2) (or it also
+ * can call sigreturn(2) in 32-bit code). Thus when a Linux signal handler
+ * completes execution, instead of returning through what would in libc be a
+ * call to setcontext(2), the rt_sigreturn(2) Linux system call is responsible
+ * for accomplishing much the same thing. It's for this reason that the stack
+ * frame we build has the lx_(rt_)sigreturn_tramp code on the top of the
+ * stack. The code looks like this:
+ *
+ * 32-bit 64-bit
+ * -------------------------------- -----------------------------
+ * mov LX_SYS_rt_sigreturn, %eax movq LX_SYS_rt_sigreturn, %rax
+ * int $0x80 syscall
+ *
+ * We also use these same functions (lx_rt_sigreturn_tramp or
+ * lx_sigreturn_tramp) to actually return from the signal handler.
+ *
+ * (Note that this trampoline code actually lives in a proper executable segment
+ * and not on the stack, but gdb checks for the exact code sequence of the
+ * trampoline code on the stack to determine whether it is in a signal stack
+ * frame or not. Really.)
+ *
+ * When the 32-bit Linux user signal handler is eventually called, the brand
+ * stack frame looks like this (in the case of a "modern" signal stack; see
+ * the lx_sigstack structure definition):
+ *
+ * =========================================================
+ * | | lx_sigdeliver_frame_t |
+ * | =========================================================
+ * | | Trampoline code (marker for gdb, not really executed) |
+ * | =========================================================
+ * | | Linux struct _fpstate |
+ * | =========================================================
+ * V | Linux ucontext_t | <--+
+ * ========================================================= |
+ * | Linux siginfo_t | <--|-----+
+ * ========================================================= | |
+ * | Pointer to Linux ucontext_t (or NULL) (sigaction arg2)| ---+ |
+ * ========================================================= |
+ * | Pointer to Linux siginfo_t (or NULL) (sigaction arg1)| ---------+
+ * =========================================================
+ * | Linux signal number (sigaction arg0)|
+ * =========================================================
+ * | Pointer to signal return code (trampoline code) |
+ * =========================================================
+ *
+ * The 64-bit stack-local data looks like this:
+ *
+ * =========================================================
+ * | | lx_sigdeliver_frame_t |
+ * | =========================================================
+ * | | Trampoline code (marker for gdb, not really executed) |
+ * | =========================================================
+ * | | Linux struct _fpstate |
+ * | =========================================================
+ * V | Linux ucontext_t | %rdx arg2
+ * =========================================================
+ * | Linux siginfo_t | %rsi arg1
+ * =========================================================
+ * | Pointer to signal return code (trampoline code) |
+ * =========================================================
+ *
+ * As usual in 64-bit code, %rdi is arg0 which is the signal number.
+ *
+ * The *sigreturn(2) family of emulated system call handlers locates the
+ * "lx_sigdeliver_frame_t" struct on the Linux stack as part of processing
+ * the system call. This object contains a guard value (LX_SIGRT_MAGIC) to
+ * detect stack smashing or an incorrect stack pointer. It also contains a
+ * "return" context, which we use to get back to the "lx_sigdeliver()" frame
+ * on the native stack that originally dispatched to the Linux signal
+ * handler. The lx_sigdeliver() function is then able to return to the
+ * native libc signal handler in the usual way. This results in a further
+ * setcontext() back to whatever was running when we took the signal.
+ *
+ * There are some edge cases where the "return" context cannot be located
+ * by inspection of the Linux stack; e.g. if the guard value has been
+ * corrupted, or the emulated program has relocated parts of the signal
+ * delivery stack frame. If this case is detected, a fallback mechanism is
+ * used to attempt to find the return context. A chain of "lx_sigbackup_t"
+ * objects is maintained in signal interposer call frames, with the current
+ * head stored in the thread-specific "lx_tsd_t". This mechanism is
+ * similar in principle to the "lwp_oldcontext" member of the "klwp_t" used
+ * by the native signal handling infrastructure. This backup chain is used
+ * by the sigreturn(2) family of emulated system calls in the event that
+ * the Linux stack did not correctly reference a return context.
+ */
+
+typedef struct lx_sigdeliver_frame {
+ uintptr_t lxsdf_magic;
+ ucontext_t *lxsdf_retucp;
+ ucontext_t *lxsdf_sigucp;
+ lx_sigbackup_t *lxsdf_sigbackup;
+} lx_sigdeliver_frame_t;
+
+struct lx_oldsigstack {
+ void (*retaddr)(); /* address of real lx_sigreturn code */
+ int sig; /* signal number */
+ lx_sigcontext_t sigc; /* saved user context */
+ lx_fpstate_t fpstate; /* saved FP state */
+ int sig_extra; /* signal mask for signals [32 .. NSIG - 1] */
+ char trampoline[8]; /* code for trampoline to lx_sigreturn() */
+};
+
+/*
+ * The lx_sighandlers structure needs to be a global due to the semantics of
+ * clone().
+ *
+ * If CLONE_SIGHAND is set, the calling process and child share signal
+ * handlers, and if either calls sigaction(2) it should change the behavior
+ * in the other thread. Each thread does, however, have its own signal mask
+ * and set of pending signals.
+ *
+ * If CLONE_SIGHAND is not set, the child process should inherit a copy of
+ * the signal handlers at the time of the clone() but later calls to
+ * sigaction(2) should only affect the individual thread calling it.
+ *
+ * This maps perfectly to a thr_create(3C) thread semantic in the first
+ * case and a fork(2)-type semantic in the second case. By making
+ * lx_sighandlers global, we automatically get the correct behavior.
+ */
+static lx_sighandlers_t lx_sighandlers;
+
+/*
+ * Setting LX_NO_ABORT_HANDLER in the environment will prevent the emulated
+ * Linux program from modifying the signal handling disposition for SIGSEGV or
+ * SIGABRT. Useful for debugging programs which fall over themselves to
+ * prevent useful core files being generated.
+ */
+static int lx_no_abort_handler = 0;
+
+static void lx_sigdeliver(int, siginfo_t *, ucontext_t *, size_t, void (*)(),
+ void (*)(), struct lx_sigaction *);
+
+/*
+ * stol_stack() and ltos_stack() convert between Illumos and Linux stack_t
+ * structures.
+ *
+ * These routines are needed because although the two structures have the same
+ * contents, their contents are declared in a different order, so the content
+ * of the structures cannot be copied with a simple bcopy().
+ */
+static void
+stol_stack(stack_t *fr, lx_stack_t *to)
+{
+ to->ss_sp = fr->ss_sp;
+ to->ss_flags = fr->ss_flags;
+ to->ss_size = fr->ss_size;
+}
+
+static void
+ltos_stack(lx_stack_t *fr, stack_t *to)
+{
+ to->ss_sp = fr->ss_sp;
+ to->ss_flags = fr->ss_flags;
+ to->ss_size = fr->ss_size;
+}
+
+static int
+ltos_sigset(lx_sigset_t *lx_sigsetp, sigset_t *s_sigsetp)
+{
+ lx_sigset_t l;
+ int lx_sig, sig;
+
+ if (uucopy(lx_sigsetp, &l, sizeof (lx_sigset_t)) != 0)
+ return (-errno);
+
+ (void) sigemptyset(s_sigsetp);
+
+ for (lx_sig = 1; lx_sig <= LX_NSIG; lx_sig++) {
+ if (lx_sigismember(&l, lx_sig) &&
+ ((sig = ltos_signo[lx_sig]) > 0))
+ (void) sigaddset(s_sigsetp, sig);
+ }
+
+ return (0);
+}
+
+static int
+stol_sigset(sigset_t *s_sigsetp, lx_sigset_t *lx_sigsetp)
+{
+ lx_sigset_t l;
+ int sig, lx_sig;
+
+ bzero(&l, sizeof (lx_sigset_t));
+
+ for (sig = 1; sig < NSIG; sig++) {
+ if (sigismember(s_sigsetp, sig) &&
+ ((lx_sig = stol_signo[sig]) > 0))
+ lx_sigaddset(&l, lx_sig);
+ }
+
+ return ((uucopy(&l, lx_sigsetp, sizeof (lx_sigset_t)) != 0)
+ ? -errno : 0);
+}
+
+#if defined(_ILP32)
+static int
+ltos_osigset(lx_osigset_t *lx_osigsetp, sigset_t *s_sigsetp)
+{
+ lx_osigset_t lo;
+ int lx_sig, sig;
+
+ if (uucopy(lx_osigsetp, &lo, sizeof (lx_osigset_t)) != 0)
+ return (-errno);
+
+ (void) sigemptyset(s_sigsetp);
+
+ for (lx_sig = 1; lx_sig <= OSIGSET_NBITS; lx_sig++)
+ if ((lo & OSIGSET_BITSET(lx_sig)) &&
+ ((sig = ltos_signo[lx_sig]) > 0))
+ (void) sigaddset(s_sigsetp, sig);
+
+ return (0);
+}
+
+static int
+stol_osigset(sigset_t *s_sigsetp, lx_osigset_t *lx_osigsetp)
+{
+ lx_osigset_t lo = 0;
+ int lx_sig, sig;
+
+ /*
+ * Note that an lx_osigset_t can only represent the signals from
+ * [1 .. OSIGSET_NBITS], so even though a signal may be present in the
+ * Illumos sigset_t, it may not be representable as a bit in the
+ * lx_osigset_t.
+ */
+ for (sig = 1; sig < NSIG; sig++)
+ if (sigismember(s_sigsetp, sig) &&
+ ((lx_sig = stol_signo[sig]) > 0) &&
+ (lx_sig <= OSIGSET_NBITS))
+ lo |= OSIGSET_BITSET(lx_sig);
+
+ return ((uucopy(&lo, lx_osigsetp, sizeof (lx_osigset_t)) != 0)
+ ? -errno : 0);
+}
+#endif
+
+static int
+ltos_sigcode(int si_code)
+{
+ switch (si_code) {
+ case LX_SI_USER:
+ return (SI_USER);
+ case LX_SI_TKILL:
+ return (SI_LWP);
+ case LX_SI_QUEUE:
+ return (SI_QUEUE);
+ case LX_SI_TIMER:
+ return (SI_TIMER);
+ case LX_SI_ASYNCIO:
+ return (SI_ASYNCIO);
+ case LX_SI_MESGQ:
+ return (SI_MESGQ);
+ default:
+ return (LX_SI_CODE_NOT_EXIST);
+ }
+}
+
+int
+stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop)
+{
+ int ret = 0;
+ lx_siginfo_t lx_siginfo;
+
+ bzero(&lx_siginfo, sizeof (*lx_siginfop));
+
+ if ((lx_siginfo.lsi_signo = stol_signo[siginfop->si_signo]) <= 0) {
+ /*
+ * Depending on the caller we may still need to get a usable
+ * converted siginfo struct.
+ */
+ lx_siginfo.lsi_signo = LX_SIGKILL;
+ errno = EINVAL;
+ ret = -1;
+ }
+
+ lx_siginfo.lsi_code = lx_stol_sigcode(siginfop->si_code);
+ lx_siginfo.lsi_errno = siginfop->si_errno;
+
+ switch (lx_siginfo.lsi_signo) {
+ /*
+ * Semantics ARE defined for SIGKILL, but since
+ * we can't catch it, we can't translate it. :-(
+ */
+ case LX_SIGPOLL:
+ lx_siginfo.lsi_band = siginfop->si_band;
+ lx_siginfo.lsi_fd = siginfop->si_fd;
+ break;
+
+ case LX_SIGCHLD:
+ lx_siginfo.lsi_pid = siginfop->si_pid;
+ if (siginfop->si_code <= 0 || siginfop->si_code ==
+ CLD_EXITED) {
+ lx_siginfo.lsi_status = siginfop->si_status;
+ } else {
+ lx_siginfo.lsi_status = lx_stol_status(
+ siginfop->si_status, -1);
+ }
+ lx_siginfo.lsi_utime = siginfop->si_utime;
+ lx_siginfo.lsi_stime = siginfop->si_stime;
+ break;
+
+ case LX_SIGILL:
+ case LX_SIGBUS:
+ case LX_SIGFPE:
+ case LX_SIGSEGV:
+ lx_siginfo.lsi_addr = siginfop->si_addr;
+ break;
+
+ default:
+ lx_siginfo.lsi_pid = siginfop->si_pid;
+ lx_siginfo.lsi_uid =
+ LX_UID32_TO_UID16(siginfop->si_uid);
+ lx_siginfo.lsi_value = siginfop->si_value;
+ break;
+ }
+
+ if (uucopy(&lx_siginfo, lx_siginfop, sizeof (lx_siginfo_t)) != 0)
+ return (-errno);
+ return ((ret != 0) ? -errno : 0);
+}
+
+static void
+stol_fpstate(fpregset_t *fpr, lx_fpstate_t *lfpr)
+{
+ size_t copy_len;
+
+#if defined(_LP64)
+ /*
+ * The 64-bit Illumos struct fpregset_t and lx_fpstate_t are identical
+ * so just bcopy() those entries (see usr/src/uts/intel/sys/regset.h
+ * for __amd64's struct fpu).
+ */
+ copy_len = sizeof (fpr->fp_reg_set.fpchip_state);
+ bcopy(fpr, lfpr, copy_len);
+
+#else /* is _ILP32 */
+ struct _fpstate *fpsp = (struct _fpstate *)fpr;
+
+ /*
+ * The Illumos struct _fpstate and lx_fpstate_t are identical from the
+ * beginning of the structure to the lx_fpstate_t "magic" field, so
+ * just bcopy() those entries.
+ */
+ copy_len = (size_t)&(((lx_fpstate_t *)0)->magic);
+ bcopy(fpsp, lfpr, copy_len);
+
+ /*
+ * These fields are all only significant for the first 16 bits.
+ */
+ lfpr->cw &= 0xffff; /* x87 control word */
+ lfpr->tag &= 0xffff; /* x87 tag word */
+ lfpr->cssel &= 0xffff; /* cs selector */
+ lfpr->datasel &= 0xffff; /* ds selector */
+
+ /*
+ * Linux wants the x87 status word field to contain the value of the
+ * x87 saved exception status word.
+ */
+ lfpr->sw = lfpr->status & 0xffff; /* x87 status word */
+
+ lfpr->mxcsr = fpsp->mxcsr;
+
+ if (fpsp->mxcsr != 0) {
+ /*
+ * Linux uses the "magic" field to denote whether the XMM
+ * registers contain legal data or not. Since we can't get to
+ * %cr4 from userland to check the status of the OSFXSR bit,
+ * check the mxcsr field to see if it's 0, which it should
+ * never be on a system with the OXFXSR bit enabled.
+ */
+ lfpr->magic = LX_X86_FXSR_MAGIC;
+ bcopy(fpsp->xmm, lfpr->_xmm, sizeof (lfpr->_xmm));
+ } else {
+ lfpr->magic = LX_X86_FXSR_NONE;
+ }
+#endif
+}
+
+static void
+ltos_fpstate(lx_fpstate_t *lfpr, fpregset_t *fpr)
+{
+ size_t copy_len;
+
+#if defined(_LP64)
+ /*
+ * The 64-bit Illumos struct fpregset_t and lx_fpstate_t are identical
+ * so just bcopy() those entries (see usr/src/uts/intel/sys/regset.h
+ * for __amd64's struct fpu).
+ */
+ copy_len = sizeof (fpr->fp_reg_set.fpchip_state);
+ bcopy(lfpr, fpr, copy_len);
+
+#else /* is _ILP32 */
+ struct _fpstate *fpsp = (struct _fpstate *)fpr;
+
+ /*
+ * The lx_fpstate_t and Illumos struct _fpstate are identical from the
+ * beginning of the structure to the struct _fpstate "mxcsr" field, so
+ * just bcopy() those entries.
+ *
+ * Note that we do NOT have to propogate changes the user may have made
+ * to the "status" word back to the "sw" word, unlike the way we have
+ * to deal with processing the ESP and UESP register values on return
+ * from a signal handler.
+ */
+ copy_len = (size_t)&(((struct _fpstate *)0)->mxcsr);
+ bcopy(lfpr, fpsp, copy_len);
+
+ /*
+ * These fields are all only significant for the first 16 bits.
+ */
+ fpsp->cw &= 0xffff; /* x87 control word */
+ fpsp->sw &= 0xffff; /* x87 status word */
+ fpsp->tag &= 0xffff; /* x87 tag word */
+ fpsp->cssel &= 0xffff; /* cs selector */
+ fpsp->datasel &= 0xffff; /* ds selector */
+ fpsp->status &= 0xffff; /* saved status */
+
+ fpsp->mxcsr = lfpr->mxcsr;
+
+ if (lfpr->magic == LX_X86_FXSR_MAGIC)
+ bcopy(lfpr->_xmm, fpsp->xmm, sizeof (fpsp->xmm));
+#endif
+}
+
+/*
+ * We do not use the system sigaltstack() infrastructure as that would conflict
+ * with our handling of both system call emulation and native signals on the
+ * native stack. Instead, we track the Linux stack structure in our
+ * thread-specific data. This function is modeled on the behaviour of the
+ * native sigaltstack system call handler.
+ */
+long
+lx_sigaltstack(uintptr_t ssp, uintptr_t oss)
+{
+ lx_tsd_t *lxtsd = lx_get_tsd();
+ lx_stack_t ss;
+
+ if (ssp != NULL) {
+ if (lxtsd->lxtsd_sigaltstack.ss_flags & LX_SS_ONSTACK) {
+ /*
+ * If we are currently using the installed alternate
+ * stack for signal handling, the user may not modify
+ * the stack for this thread.
+ */
+ return (-EPERM);
+ }
+
+ if (uucopy((void *)ssp, &ss, sizeof (ss)) != 0) {
+ return (-EFAULT);
+ }
+
+ if (ss.ss_flags & ~LX_SS_DISABLE) {
+ /*
+ * The user may not specify a value for flags other
+ * than 0 or SS_DISABLE.
+ */
+ return (-EINVAL);
+ }
+
+ if (!(ss.ss_flags & LX_SS_DISABLE) && ss.ss_size <
+ LX_MINSIGSTKSZ) {
+ return (-ENOMEM);
+ }
+
+ if ((ss.ss_flags & LX_SS_DISABLE) != 0) {
+ ss.ss_sp = NULL;
+ ss.ss_size = 0;
+ }
+ }
+
+ if (oss != NULL) {
+ /*
+ * User provided old and new stack_t pointers may point to
+ * the same location. Copy out before we modify.
+ */
+ if (uucopy(&lxtsd->lxtsd_sigaltstack, (void *)oss,
+ sizeof (lxtsd->lxtsd_sigaltstack)) != 0) {
+ return (-EFAULT);
+ }
+ }
+
+ if (ssp != NULL) {
+ lxtsd->lxtsd_sigaltstack = ss;
+ }
+
+ return (0);
+}
+
+#if defined(_ILP32)
+/*
+ * The following routines are needed because sigset_ts and siginfo_ts are
+ * different in format between Linux and Illumos.
+ *
+ * Note that there are two different lx_sigset structures, lx_sigset_ts and
+ * lx_osigset_ts:
+ *
+ * + An lx_sigset_t is the equivalent of a Illumos sigset_t and supports
+ * more than 32 signals.
+ *
+ * + An lx_osigset_t is simply a uint32_t, so it by definition only supports
+ * 32 signals.
+ *
+ * When there are two versions of a routine, one prefixed with lx_rt_ and
+ * one prefixed with lx_ alone, in GENERAL the lx_rt_ routines deal with
+ * lx_sigset_ts while the lx_ routines deal with lx_osigset_ts. Unfortunately,
+ * this is not always the case (e.g. lx_sigreturn() vs. lx_rt_sigreturn())
+ */
+long
+lx_sigpending(uintptr_t sigpend)
+{
+ sigset_t sigpendset;
+
+ if (sigpending(&sigpendset) != 0)
+ return (-errno);
+
+ return (stol_osigset(&sigpendset, (lx_osigset_t *)sigpend));
+}
+#endif
+
+long
+lx_rt_sigpending(uintptr_t sigpend, uintptr_t setsize)
+{
+ sigset_t sigpendset;
+
+ if ((size_t)setsize != sizeof (lx_sigset_t))
+ return (-EINVAL);
+
+ if (sigpending(&sigpendset) != 0)
+ return (-errno);
+
+ return (stol_sigset(&sigpendset, (lx_sigset_t *)sigpend));
+}
+
+/*
+ * Create a common routine to encapsulate all of the sigprocmask code,
+ * as the only difference between lx_sigprocmask() and lx_rt_sigprocmask()
+ * is the usage of lx_osigset_ts vs. lx_sigset_ts, as toggled in the code by
+ * the setting of the "sigset_type" flag.
+ */
+static int
+lx_sigprocmask_common(uintptr_t how, uintptr_t l_setp, uintptr_t l_osetp,
+ uintptr_t sigset_type)
+{
+ int err = 0;
+ sigset_t set, oset;
+ sigset_t *s_setp = NULL;
+ sigset_t *s_osetp;
+
+ if (l_setp) {
+ switch (how) {
+ case LX_SIG_BLOCK:
+ how = SIG_BLOCK;
+ break;
+
+ case LX_SIG_UNBLOCK:
+ how = SIG_UNBLOCK;
+ break;
+
+ case LX_SIG_SETMASK:
+ how = SIG_SETMASK;
+ break;
+
+ default:
+ return (-EINVAL);
+ }
+
+ s_setp = &set;
+
+ /* Only 32-bit code passes other than USE_SIGSET */
+ if (sigset_type == USE_SIGSET)
+ err = ltos_sigset((lx_sigset_t *)l_setp, s_setp);
+#if defined(_ILP32)
+ else
+ err = ltos_osigset((lx_osigset_t *)l_setp, s_setp);
+#endif
+
+ if (err != 0)
+ return (err);
+
+ }
+
+ s_osetp = (l_osetp ? &oset : NULL);
+
+ /*
+ * In a multithreaded environment, a call to sigprocmask(2) should
+ * only affect the current thread's signal mask so we don't need to
+ * explicitly call thr_sigsetmask(3C) here.
+ */
+ if (sigprocmask(how, s_setp, s_osetp) != 0)
+ return (-errno);
+
+ if (l_osetp) {
+ if (sigset_type == USE_SIGSET)
+ err = stol_sigset(s_osetp, (lx_sigset_t *)l_osetp);
+#if defined(_ILP32)
+ else
+ err = stol_osigset(s_osetp, (lx_osigset_t *)l_osetp);
+#endif
+
+ if (err != 0) {
+ /*
+ * Encountered a fault while writing to the old signal
+ * mask buffer, so unwind the signal mask change made
+ * above.
+ */
+ (void) sigprocmask(how, s_osetp, (sigset_t *)NULL);
+ return (err);
+ }
+ }
+
+ return (0);
+}
+
+#if defined(_ILP32)
+long
+lx_sigprocmask(uintptr_t how, uintptr_t setp, uintptr_t osetp)
+{
+ return (lx_sigprocmask_common(how, setp, osetp, USE_OSIGSET));
+}
+#endif
+
+long
+lx_rt_sigprocmask(uintptr_t how, uintptr_t setp, uintptr_t osetp,
+ uintptr_t setsize)
+{
+ if ((size_t)setsize != sizeof (lx_sigset_t))
+ return (-EINVAL);
+
+ return (lx_sigprocmask_common(how, setp, osetp, USE_SIGSET));
+}
+
+#if defined(_ILP32)
+long
+lx_sigsuspend(uintptr_t set)
+{
+ sigset_t s_set;
+
+ if (ltos_osigset((lx_osigset_t *)set, &s_set) != 0)
+ return (-errno);
+
+ return ((sigsuspend(&s_set) == -1) ? -errno : 0);
+}
+#endif
+
+long
+lx_rt_sigsuspend(uintptr_t set, uintptr_t setsize)
+{
+ sigset_t s_set;
+
+ if ((size_t)setsize != sizeof (lx_sigset_t))
+ return (-EINVAL);
+
+ if (ltos_sigset((lx_sigset_t *)set, &s_set) != 0)
+ return (-errno);
+
+ return ((sigsuspend(&s_set) == -1) ? -errno : 0);
+}
+
+long
+lx_rt_sigwaitinfo(uintptr_t set, uintptr_t sinfo, uintptr_t setsize)
+{
+ sigset_t s_set;
+ siginfo_t s_sinfo, *s_sinfop;
+ int rc;
+
+ lx_sigset_t *setp = (lx_sigset_t *)set;
+ lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo;
+
+ if ((size_t)setsize != sizeof (lx_sigset_t))
+ return (-EINVAL);
+
+ if (ltos_sigset(setp, &s_set) != 0)
+ return (-errno);
+
+ s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo;
+
+ if ((rc = sigwaitinfo(&s_set, s_sinfop)) == -1)
+ return (-errno);
+
+ if (s_sinfop == NULL)
+ return (stol_signo[rc]);
+
+ return ((stol_siginfo(s_sinfop, sinfop) != 0)
+ ? -errno : stol_signo[rc]);
+}
+
+long
+lx_rt_sigtimedwait(uintptr_t set, uintptr_t sinfo, uintptr_t toutp,
+ uintptr_t setsize)
+{
+ sigset_t s_set;
+ siginfo_t s_sinfo, *s_sinfop;
+ int rc;
+
+ lx_sigset_t *setp = (lx_sigset_t *)set;
+ lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo;
+
+ if ((size_t)setsize != sizeof (lx_sigset_t))
+ return (-EINVAL);
+
+ if (ltos_sigset(setp, &s_set) != 0)
+ return (-errno);
+
+ s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo;
+
+ /*
+ * "If timeout is the NULL pointer, the behavior is unspecified."
+ * Match what LTP expects.
+ */
+ if ((rc = sigtimedwait(&s_set, s_sinfop,
+ (struct timespec *)toutp)) == -1)
+ return (toutp == NULL ? -EINTR : -errno);
+
+ if (s_sinfop == NULL)
+ return (stol_signo[rc]);
+
+ return ((stol_siginfo(s_sinfop, sinfop) != 0)
+ ? -errno : stol_signo[rc]);
+}
+
+static void
+lx_sigreturn_find_native_context(const char *caller, ucontext_t **sigucp,
+ ucontext_t **retucp, uintptr_t sp)
+{
+ lx_tsd_t *lxtsd = lx_get_tsd();
+ lx_sigdeliver_frame_t *lxsdfp = (lx_sigdeliver_frame_t *)sp;
+ lx_sigdeliver_frame_t lxsdf;
+ boolean_t copy_ok;
+
+ lx_debug("%s: reading lx_sigdeliver_frame_t @ %p\n", caller, lxsdfp);
+ if (uucopy(lxsdfp, &lxsdf, sizeof (lxsdf)) != 0) {
+ lx_debug("%s: failed to read lx_sigdeliver_frame_t @ %p\n",
+ lxsdfp);
+
+ copy_ok = B_FALSE;
+ } else {
+ lx_debug("%s: lxsdf: magic %p retucp %p sigucp %p\n", caller,
+ lxsdf.lxsdf_magic, lxsdf.lxsdf_retucp, lxsdf.lxsdf_sigucp);
+
+ copy_ok = B_TRUE;
+ }
+
+ /*
+ * lx_sigdeliver() pushes a lx_sigdeliver_frame_t onto the stack
+ * before it creates the struct lx_oldsigstack.
+ */
+ if (copy_ok && lxsdf.lxsdf_magic == LX_SIGRT_MAGIC) {
+ LX_SIGNAL_DELIVERY_FRAME_FOUND(lxsdfp);
+
+ /*
+ * The guard value is intact; use the context pointers stored
+ * in the signal delivery frame:
+ */
+ *sigucp = lxsdf.lxsdf_sigucp;
+ *retucp = lxsdf.lxsdf_retucp;
+
+ /*
+ * Ensure that the backup signal delivery chain is in sync with
+ * the frame we are returning via:
+ */
+ lxtsd->lxtsd_sigbackup = lxsdf.lxsdf_sigbackup;
+ } else {
+ /*
+ * The guard value was not intact. Either the program smashed
+ * the stack unintentionally, or worse: intentionally moved
+ * some parts of the signal delivery frame we constructed to
+ * another location before calling rt_sigreturn(2).
+ */
+ LX_SIGNAL_DELIVERY_FRAME_CORRUPT(lxsdfp);
+
+ if (lxtsd->lxtsd_sigbackup == NULL) {
+ /*
+ * There was no backup context to use, so we must
+ * kill the process.
+ */
+ if (copy_ok) {
+ lx_err_fatal("%s: sp 0x%p, expected 0x%x, "
+ "found 0x%x!", caller, sp, LX_SIGRT_MAGIC,
+ lxsdf.lxsdf_magic);
+ } else {
+ lx_err_fatal("%s: sp 0x%p, could not read "
+ "magic", caller, sp);
+ }
+ }
+
+ /*
+ * Attempt to recover by using the backup signal delivery
+ * chain:
+ */
+ lx_debug("%s: SIGRT_MAGIC not found @ sp %p; using backup "
+ "@ %p\n", caller, (void *)sp, lxtsd->lxtsd_sigbackup);
+ *sigucp = lxtsd->lxtsd_sigbackup->lxsb_sigucp;
+ *retucp = lxtsd->lxtsd_sigbackup->lxsb_retucp;
+ }
+}
+
+#if defined(_ILP32)
+/*
+ * Intercept the Linux sigreturn() syscall to turn it into the return through
+ * the libc call stack that Illumos expects.
+ *
+ * When control returns to libc's call_user_handler() routine, a setcontext(2)
+ * will be done that returns thread execution to the point originally
+ * interrupted by receipt of the signal.
+ *
+ * This is only used by 32-bit code.
+ */
+long
+lx_sigreturn(void)
+{
+ struct lx_oldsigstack *lx_ossp;
+ lx_sigset_t lx_sigset;
+ ucontext_t *ucp;
+ ucontext_t *sigucp;
+ ucontext_t *retucp;
+ uintptr_t sp;
+
+ ucp = lx_syscall_regs();
+
+ /*
+ * NOTE: The sp saved in the context is eight bytes off of where we
+ * need it to be (either due to trampoline or the copying of
+ * sp = uesp, not clear which).
+ */
+ sp = LX_REG(ucp, REG_SP) - 8;
+
+ /*
+ * At this point, the stack pointer should point to the struct
+ * lx_oldsigstack that lx_build_old_signal_frame() constructed and
+ * placed on the stack. We need to reference it a bit later, so
+ * save a pointer to it before incrementing our copy of the sp.
+ */
+ lx_ossp = (struct lx_oldsigstack *)sp;
+ sp += SA(sizeof (struct lx_oldsigstack));
+
+ lx_sigreturn_find_native_context(__func__, &sigucp, &retucp, sp);
+
+ /*
+ * We need to copy machine registers the Linux signal handler may have
+ * modified back to the Illumos ucontext_t.
+ *
+ * General registers copy across as-is, except Linux expects that
+ * changes made to uc_mcontext.gregs[ESP] will be reflected when the
+ * interrupted thread resumes execution after the signal handler. To
+ * emulate this behavior, we must modify uc_mcontext.gregs[UESP] to
+ * match uc_mcontext.gregs[ESP] as Illumos will restore the UESP
+ * value to ESP.
+ */
+ lx_ossp->sigc.sc_esp_at_signal = lx_ossp->sigc.sc_esp;
+ bcopy(&lx_ossp->sigc, &sigucp->uc_mcontext, sizeof (gregset_t));
+
+ LX_SIGRETURN(NULL, sigucp, sp);
+
+ /* copy back FP regs if present */
+ if (lx_ossp->sigc.sc_fpstate != NULL)
+ ltos_fpstate(&lx_ossp->fpstate, &sigucp->uc_mcontext.fpregs);
+
+ /* convert Linux signal mask back to its Illumos equivalent */
+ bzero(&lx_sigset, sizeof (lx_sigset_t));
+ lx_sigset.__bits[0] = lx_ossp->sigc.sc_mask;
+ lx_sigset.__bits[1] = lx_ossp->sig_extra;
+ (void) ltos_sigset(&lx_sigset, &sigucp->uc_sigmask);
+
+ /*
+ * For signal mask handling to be done properly, this call needs to
+ * return to the libc routine that originally called the signal handler
+ * rather than directly set the context back to the place the signal
+ * interrupted execution as the original Linux code would do.
+ */
+ lx_debug("lx_sigreturn: calling setcontext; retucp %p flags %lx "
+ "link %p\n", retucp, retucp->uc_flags, retucp->uc_link);
+ setcontext(retucp);
+ assert(0);
+
+ /*NOTREACHED*/
+ return (0);
+}
+#endif
+
+/*
+ * This signal return syscall is used by both 32-bit and 64-bit code.
+ */
+long
+lx_rt_sigreturn(void)
+{
+ struct lx_sigstack *lx_ssp;
+ lx_ucontext_t *lx_ucp;
+ ucontext_t *ucp;
+ ucontext_t *sigucp;
+ ucontext_t *retucp;
+ uintptr_t sp;
+
+ /* Get the registers at the emulated Linux rt_sigreturn syscall */
+ ucp = lx_syscall_regs();
+
+#if defined(_ILP32)
+ lx_debug("lx_rt_sigreturn: ESP %p UESP %p\n", LX_REG(ucp, ESP),
+ LX_REG(ucp, UESP));
+ /*
+ * For 32-bit
+ *
+ * NOTE: Because of the silly compatibility measures done in the
+ * signal trampoline code to make sure the stack holds the
+ * _exact same_ instruction sequence Linux does, we have to
+ * manually "pop" some extra instructions off the stack here
+ * before passing the stack address to the syscall because the
+ * trampoline code isn't allowed to do it due to the gdb
+ * compatability issues.
+ *
+ * No, I'm not kidding.
+ *
+ * The sp saved in the context is eight bytes off of where we
+ * need it to be (either due to trampoline or the copying of
+ * sp = uesp, not clear which but looks like the uesp case), so
+ * the need to pop the extra four byte instruction means we need
+ * to subtract a net four bytes from the sp before "popping" the
+ * struct lx_sigstack off the stack.
+ *
+ * This will yield the value the stack pointer had before
+ * lx_sigdeliver() created the stack frame for the Linux signal
+ * handler.
+ */
+ sp = (uintptr_t)LX_REG(ucp, REG_SP) - 4;
+#else
+ /*
+ * We need to make an adjustment for 64-bit code as well. Since 64-bit
+ * does not use the trampoline, it's probably for the same reason as
+ * alluded to above.
+ */
+ sp = (uintptr_t)LX_REG(ucp, REG_SP) - 8;
+#endif
+
+ /*
+ * At this point, the stack pointer should point to the struct
+ * lx_sigstack that lx_build_signal_frame() constructed and
+ * placed on the stack. We need to reference it a bit later, so
+ * save a pointer to it before incrementing our copy of the sp.
+ */
+ lx_ssp = (struct lx_sigstack *)sp;
+ sp += SA(sizeof (struct lx_sigstack));
+
+#if defined(_LP64)
+ /*
+ * The 64-bit lx_sigdeliver() inserts 8 bytes of padding between
+ * the lx_sigstack_t and the delivery frame to maintain ABI stack
+ * alignment.
+ */
+ sp += 8;
+#endif
+
+ lx_sigreturn_find_native_context(__func__, &sigucp, &retucp, sp);
+
+ /*
+ * We need to copy machine registers the Linux signal handler may have
+ * modified back to the Illumos version.
+ */
+#if defined(_LP64)
+ lx_ucp = &lx_ssp->uc;
+
+ /*
+ * General register layout is completely different.
+ */
+ LX_REG(sigucp, REG_R15) = lx_ucp->uc_sigcontext.sc_r15;
+ LX_REG(sigucp, REG_R14) = lx_ucp->uc_sigcontext.sc_r14;
+ LX_REG(sigucp, REG_R13) = lx_ucp->uc_sigcontext.sc_r13;
+ LX_REG(sigucp, REG_R12) = lx_ucp->uc_sigcontext.sc_r12;
+ LX_REG(sigucp, REG_R11) = lx_ucp->uc_sigcontext.sc_r11;
+ LX_REG(sigucp, REG_R10) = lx_ucp->uc_sigcontext.sc_r10;
+ LX_REG(sigucp, REG_R9) = lx_ucp->uc_sigcontext.sc_r9;
+ LX_REG(sigucp, REG_R8) = lx_ucp->uc_sigcontext.sc_r8;
+ LX_REG(sigucp, REG_RDI) = lx_ucp->uc_sigcontext.sc_rdi;
+ LX_REG(sigucp, REG_RSI) = lx_ucp->uc_sigcontext.sc_rsi;
+ LX_REG(sigucp, REG_RBP) = lx_ucp->uc_sigcontext.sc_rbp;
+ LX_REG(sigucp, REG_RBX) = lx_ucp->uc_sigcontext.sc_rbx;
+ LX_REG(sigucp, REG_RDX) = lx_ucp->uc_sigcontext.sc_rdx;
+ LX_REG(sigucp, REG_RCX) = lx_ucp->uc_sigcontext.sc_rcx;
+ LX_REG(sigucp, REG_RAX) = lx_ucp->uc_sigcontext.sc_rax;
+ LX_REG(sigucp, REG_TRAPNO) = lx_ucp->uc_sigcontext.sc_trapno;
+ LX_REG(sigucp, REG_ERR) = lx_ucp->uc_sigcontext.sc_err;
+ LX_REG(sigucp, REG_RIP) = lx_ucp->uc_sigcontext.sc_rip;
+ LX_REG(sigucp, REG_CS) = lx_ucp->uc_sigcontext.sc_cs;
+ LX_REG(sigucp, REG_RFL) = lx_ucp->uc_sigcontext.sc_eflags;
+ LX_REG(sigucp, REG_RSP) = lx_ucp->uc_sigcontext.sc_rsp;
+ LX_REG(sigucp, REG_SS) = lx_ucp->uc_sigcontext.sc_pad0;
+ LX_REG(sigucp, REG_FS) = lx_ucp->uc_sigcontext.sc_fs;
+ LX_REG(sigucp, REG_GS) = lx_ucp->uc_sigcontext.sc_gs;
+
+#else /* is _ILP32 */
+ lx_ucp = &lx_ssp->uc;
+
+ /*
+ * Illumos and Linux both follow the SysV i386 ABI layout for the
+ * mcontext.
+ *
+ * General registers copy across as-is, except Linux expects that
+ * changes made to uc_mcontext.gregs[ESP] will be reflected when the
+ * interrupted thread resumes execution after the signal handler. To
+ * emulate this behavior, we must modify uc_mcontext.gregs[UESP] to
+ * match uc_mcontext.gregs[ESP] as Illumos will restore the UESP value
+ * to ESP.
+ */
+ lx_ucp->uc_sigcontext.sc_esp_at_signal = lx_ucp->uc_sigcontext.sc_esp;
+
+ bcopy(&lx_ucp->uc_sigcontext, &sigucp->uc_mcontext.gregs,
+ sizeof (gregset_t));
+#endif
+
+ LX_SIGRETURN(lx_ucp, sigucp, sp);
+
+ if (lx_ucp->uc_sigcontext.sc_fpstate != NULL) {
+ ltos_fpstate(lx_ucp->uc_sigcontext.sc_fpstate,
+ &sigucp->uc_mcontext.fpregs);
+ }
+
+ /*
+ * Convert the Linux signal mask and stack back to their
+ * Illumos equivalents.
+ */
+ (void) ltos_sigset(&lx_ucp->uc_sigmask, &sigucp->uc_sigmask);
+ ltos_stack(&lx_ucp->uc_stack, &sigucp->uc_stack);
+
+ /*
+ * For signal mask handling to be done properly, this call needs to
+ * return to the libc routine that originally called the signal handler
+ * rather than directly set the context back to the place the signal
+ * interrupted execution as the original Linux code would do.
+ */
+ lx_debug("lx_rt_sigreturn: calling setcontext; retucp %p\n", retucp);
+ setcontext(retucp);
+ assert(0);
+
+ /*NOTREACHED*/
+ return (0);
+}
+
+
+#if defined(_ILP32)
+/*
+ * Build signal frame for processing for "old" (legacy) Linux signals
+ * This stack-builder function is only used by 32-bit code.
+ */
+static void
+lx_build_old_signal_frame(int lx_sig, siginfo_t *sip, void *p, void *sp,
+ uintptr_t *hargs)
+{
+ extern void lx_sigreturn_tramp();
+
+ lx_sigset_t lx_sigset;
+ ucontext_t *ucp = (ucontext_t *)p;
+ struct lx_sigaction *lxsap;
+ struct lx_oldsigstack *lx_ossp = sp;
+
+ lx_debug("building old signal frame for lx sig %d at 0x%p", lx_sig, sp);
+
+ lx_ossp->sig = lx_sig;
+ lxsap = &lx_sighandlers.lx_sa[lx_sig];
+ lx_debug("lxsap @ 0x%p", lxsap);
+
+ if (lxsap && (lxsap->lxsa_flags & LX_SA_RESTORER) &&
+ lxsap->lxsa_restorer) {
+ lx_ossp->retaddr = lxsap->lxsa_restorer;
+ lx_debug("lxsa_restorer exists @ 0x%p", lx_ossp->retaddr);
+ } else {
+ lx_ossp->retaddr = lx_sigreturn_tramp;
+ lx_debug("lx_ossp->retaddr set to 0x%p", lx_sigreturn_tramp);
+ }
+
+ lx_debug("osf retaddr = 0x%p", lx_ossp->retaddr);
+
+ /* convert Illumos signal mask and stack to their Linux equivalents */
+ (void) stol_sigset(&ucp->uc_sigmask, &lx_sigset);
+ lx_ossp->sigc.sc_mask = lx_sigset.__bits[0];
+ lx_ossp->sig_extra = lx_sigset.__bits[1];
+
+ /*
+ * General registers copy across as-is, except Linux expects that
+ * uc_mcontext.gregs[ESP] == uc_mcontext.gregs[UESP] on receipt of a
+ * signal.
+ */
+ bcopy(&ucp->uc_mcontext, &lx_ossp->sigc, sizeof (gregset_t));
+ lx_ossp->sigc.sc_esp = lx_ossp->sigc.sc_esp_at_signal;
+
+ /*
+ * cr2 contains the faulting address, and Linux only sets cr2 for a
+ * a segmentation fault.
+ */
+ lx_ossp->sigc.sc_cr2 = (((lx_sig == LX_SIGSEGV) && (sip)) ?
+ (uintptr_t)sip->si_addr : 0);
+
+ /* convert FP regs if present */
+ if (ucp->uc_flags & UC_FPU) {
+ stol_fpstate(&ucp->uc_mcontext.fpregs, &lx_ossp->fpstate);
+ lx_ossp->sigc.sc_fpstate = &lx_ossp->fpstate;
+ } else {
+ lx_ossp->sigc.sc_fpstate = NULL;
+ }
+
+ /*
+ * Believe it or not, gdb wants to SEE the trampoline code on the
+ * bottom of the stack to determine whether the stack frame belongs to
+ * a signal handler, even though this code is no longer actually
+ * called.
+ *
+ * You can't make this stuff up.
+ */
+ bcopy((void *)lx_sigreturn_tramp, lx_ossp->trampoline,
+ sizeof (lx_ossp->trampoline));
+}
+#endif
+
+/*
+ * Build stack frame (32-bit) or stack local data (64-bit) for processing for
+ * modern Linux signals. This is the only stack-builder function for 64-bit
+ * code (32-bit code also calls this when using "modern" signals).
+ */
+static void
+lx_build_signal_frame(int lx_sig, siginfo_t *sip, void *p, void *sp,
+ uintptr_t *hargs)
+{
+ extern void lx_rt_sigreturn_tramp();
+
+ lx_ucontext_t *lx_ucp;
+ ucontext_t *ucp = (ucontext_t *)p;
+ struct lx_sigstack *lx_ssp = sp;
+ struct lx_sigaction *lxsap;
+
+ lx_debug("building signal frame for lx sig %d at 0x%p", lx_sig, sp);
+
+ lx_ucp = &lx_ssp->uc;
+#if defined(_ILP32)
+ /*
+ * Arguments are passed to the 32-bit signal handler on the stack.
+ */
+ lx_ssp->ucp = lx_ucp;
+ lx_ssp->sip = sip != NULL ? &lx_ssp->si : NULL;
+ lx_ssp->sig = lx_sig;
+#else
+ /*
+ * Arguments to the 64-bit signal handler are passed in registers:
+ * hdlr(int sig, siginfo_t *sip, void *ucp);
+ */
+ hargs[0] = lx_sig;
+ hargs[1] = sip != NULL ? (uintptr_t)&lx_ssp->si : NULL;
+ hargs[2] = (uintptr_t)lx_ucp;
+#endif
+
+ lxsap = &lx_sighandlers.lx_sa[lx_sig];
+ lx_debug("lxsap @ 0x%p", lxsap);
+
+ if (lxsap && (lxsap->lxsa_flags & LX_SA_RESTORER) &&
+ lxsap->lxsa_restorer) {
+ /*
+ * lxsa_restorer is explicitly set by sigaction in 32-bit code
+ * but it can also be implicitly set for both 32 and 64 bit
+ * code via lx_sigaction_common when we bcopy the user-supplied
+ * lx_sigaction element into the proper slot in the sighandler
+ * array.
+ */
+ lx_ssp->retaddr = lxsap->lxsa_restorer;
+ lx_debug("lxsa_restorer exists @ 0x%p", lx_ssp->retaddr);
+ } else {
+ lx_ssp->retaddr = lx_rt_sigreturn_tramp;
+ lx_debug("lx_ssp->retaddr set to 0x%p", lx_rt_sigreturn_tramp);
+ }
+
+ /* Linux has these fields but always clears them to 0 */
+ lx_ucp->uc_flags = 0;
+ lx_ucp->uc_link = NULL;
+
+ /* convert Illumos signal mask and stack to their Linux equivalents */
+ (void) stol_sigset(&ucp->uc_sigmask, &lx_ucp->uc_sigmask);
+ stol_stack(&ucp->uc_stack, &lx_ucp->uc_stack);
+
+#if defined(_LP64)
+ /*
+ * General register layout is completely different.
+ */
+ lx_ucp->uc_sigcontext.sc_r8 = LX_REG(ucp, REG_R8);
+ lx_ucp->uc_sigcontext.sc_r9 = LX_REG(ucp, REG_R9);
+ lx_ucp->uc_sigcontext.sc_r10 = LX_REG(ucp, REG_R10);
+ lx_ucp->uc_sigcontext.sc_r11 = LX_REG(ucp, REG_R11);
+ lx_ucp->uc_sigcontext.sc_r12 = LX_REG(ucp, REG_R12);
+ lx_ucp->uc_sigcontext.sc_r13 = LX_REG(ucp, REG_R13);
+ lx_ucp->uc_sigcontext.sc_r14 = LX_REG(ucp, REG_R14);
+ lx_ucp->uc_sigcontext.sc_r15 = LX_REG(ucp, REG_R15);
+ lx_ucp->uc_sigcontext.sc_rdi = LX_REG(ucp, REG_RDI);
+ lx_ucp->uc_sigcontext.sc_rsi = LX_REG(ucp, REG_RSI);
+ lx_ucp->uc_sigcontext.sc_rbp = LX_REG(ucp, REG_RBP);
+ lx_ucp->uc_sigcontext.sc_rbx = LX_REG(ucp, REG_RBX);
+ lx_ucp->uc_sigcontext.sc_rdx = LX_REG(ucp, REG_RDX);
+ lx_ucp->uc_sigcontext.sc_rax = LX_REG(ucp, REG_RAX);
+ lx_ucp->uc_sigcontext.sc_rcx = LX_REG(ucp, REG_RCX);
+ lx_ucp->uc_sigcontext.sc_rsp = LX_REG(ucp, REG_RSP);
+ lx_ucp->uc_sigcontext.sc_rip = LX_REG(ucp, REG_RIP);
+ lx_ucp->uc_sigcontext.sc_eflags = LX_REG(ucp, REG_RFL);
+ lx_ucp->uc_sigcontext.sc_cs = LX_REG(ucp, REG_CS);
+ lx_ucp->uc_sigcontext.sc_gs = LX_REG(ucp, REG_GS);
+ lx_ucp->uc_sigcontext.sc_fs = LX_REG(ucp, REG_FS);
+ lx_ucp->uc_sigcontext.sc_pad0 = LX_REG(ucp, REG_SS);
+ lx_ucp->uc_sigcontext.sc_err = LX_REG(ucp, REG_ERR);
+ lx_ucp->uc_sigcontext.sc_trapno = LX_REG(ucp, REG_TRAPNO);
+
+#else /* is _ILP32 */
+ /*
+ * General registers copy across as-is, except Linux expects that
+ * uc_mcontext.gregs[ESP] == uc_mcontext.gregs[UESP] on receipt of a
+ * signal.
+ */
+ bcopy(&ucp->uc_mcontext, &lx_ucp->uc_sigcontext, sizeof (gregset_t));
+ lx_ucp->uc_sigcontext.sc_esp = lx_ucp->uc_sigcontext.sc_esp_at_signal;
+#endif
+
+ /*
+ * cr2 contains the faulting address, which Linux only sets for a
+ * a segmentation fault.
+ */
+ lx_ucp->uc_sigcontext.sc_cr2 = ((lx_sig == LX_SIGSEGV) && (sip)) ?
+ (uintptr_t)sip->si_addr : 0;
+
+ /*
+ * This should only return an error if the signum is invalid but that
+ * also gets converted into a LX_SIGKILL by this function.
+ */
+ if (sip != NULL)
+ (void) stol_siginfo(sip, &lx_ssp->si);
+ else
+ bzero(&lx_ssp->si, sizeof (lx_siginfo_t));
+
+ /* convert FP regs if present */
+ if (ucp->uc_flags & UC_FPU) {
+ /*
+ * Copy FP regs to the appropriate place in the the lx_sigstack
+ * structure.
+ */
+ stol_fpstate(&ucp->uc_mcontext.fpregs, &lx_ssp->fpstate);
+ lx_ucp->uc_sigcontext.sc_fpstate = &lx_ssp->fpstate;
+ } else {
+ lx_ucp->uc_sigcontext.sc_fpstate = NULL;
+ }
+
+#if defined(_ILP32)
+ /*
+ * Believe it or not, gdb wants to SEE the sigreturn code on the
+ * top of the stack to determine whether the stack frame belongs to
+ * a signal handler, even though this code is not actually called.
+ *
+ * You can't make this stuff up.
+ */
+ bcopy((void *)lx_rt_sigreturn_tramp, lx_ssp->trampoline,
+ sizeof (lx_ssp->trampoline));
+#endif
+}
+
+/*
+ * This is the interposition handler for Linux signals.
+ */
+static void
+lx_call_user_handler(int sig, siginfo_t *sip, void *p)
+{
+ void (*user_handler)();
+ void (*stk_builder)();
+ struct lx_sigaction *lxsap;
+ ucontext_t *ucp = (ucontext_t *)p;
+ size_t stksize;
+ int lx_sig;
+
+ /*
+ * If Illumos signal has no Linux equivalent, effectively ignore it.
+ */
+ if ((lx_sig = stol_signo[sig]) == -1) {
+ lx_unsupported("caught Illumos signal %d, no Linux equivalent",
+ sig);
+ return;
+ }
+
+ lx_debug("interpose caught Illumos signal %d, translating to Linux "
+ "signal %d", sig, lx_sig);
+
+ lxsap = &lx_sighandlers.lx_sa[lx_sig];
+ lx_debug("lxsap @ 0x%p", lxsap);
+
+ if ((sig == SIGPWR) && (lxsap->lxsa_handler == SIG_DFL)) {
+ /*
+ * Linux SIG_DFL for SIGPWR is to terminate. The lx wait
+ * emulation will translate SIGPWR to LX_SIGPWR.
+ */
+ (void) syscall(SYS_brand, B_EXIT_AS_SIG, SIGPWR);
+ /* This should never return */
+ assert(0);
+ }
+
+ if (lxsap->lxsa_handler == SIG_DFL || lxsap->lxsa_handler == SIG_IGN)
+ lx_err_fatal("lxsa_handler set to %s? How?!?!?",
+ (lxsap->lxsa_handler == SIG_DFL) ? "SIG_DFL" : "SIG_IGN");
+
+#if defined(_LP64)
+ stksize = sizeof (struct lx_sigstack);
+ stk_builder = lx_build_signal_frame;
+#else
+ if (lxsap->lxsa_flags & LX_SA_SIGINFO) {
+ stksize = sizeof (struct lx_sigstack);
+ stk_builder = lx_build_signal_frame;
+ } else {
+ stksize = sizeof (struct lx_oldsigstack);
+ stk_builder = lx_build_old_signal_frame;
+ }
+#endif
+
+ user_handler = lxsap->lxsa_handler;
+
+ lx_debug("delivering %d (lx %d) to handler at 0x%p", sig, lx_sig,
+ lxsap->lxsa_handler);
+
+ if (lxsap->lxsa_flags & LX_SA_RESETHAND)
+ lxsap->lxsa_handler = SIG_DFL;
+
+ lx_sigdeliver(lx_sig, sip, ucp, stksize, stk_builder, user_handler,
+ lxsap);
+
+ /*
+ * We need to handle restarting system calls if requested by the
+ * program for this signal type:
+ */
+ if (lxsap->lxsa_flags & LX_SA_RESTART) {
+ uintptr_t flags = (uintptr_t)ucp->uc_brand_data[0];
+ long ret = (long)LX_REG(ucp, REG_R0);
+ boolean_t interrupted = (ret == -lx_errno(EINTR, -1));
+
+ /*
+ * If the system call returned EINTR, and the system
+ * call handler set "br_syscall_restart" when returning,
+ * we modify the context to try the system call again
+ * when we return from this signal handler.
+ */
+ if ((flags & LX_UC_RESTART_SYSCALL) && interrupted) {
+ int syscall_num = (int)(uintptr_t)ucp->uc_brand_data[2];
+
+ lx_debug("restarting interrupted system call %d",
+ syscall_num);
+
+ /*
+ * Both the "int 0x80" and the "syscall" instruction
+ * are two bytes long. Wind the program counter back
+ * to the start of this instruction.
+ *
+ * The system call we interrupted is preserved in the
+ * brand-specific data in the ucontext_t when the
+ * LX_UC_RESTART_SYSCALL flag is set. This is
+ * analogous to the "orig_[er]ax" field in the Linux
+ * "user_regs_struct".
+ */
+ LX_REG(ucp, REG_PC) -= 2;
+ LX_REG(ucp, REG_R0) = syscall_num;
+ }
+ }
+}
+
+/*
+ * The "lx_sigdeliver()" function is responsible for constructing the emulated
+ * signal delivery frame on the brand stack for this LWP. A context is saved
+ * on the stack which will be used by the "sigreturn(2)" family of emulated
+ * system calls to get us back here after the Linux signal handler returns.
+ * This function is modelled on the in-kernel "sendsig()" signal delivery
+ * mechanism.
+ */
+void
+lx_sigdeliver(int lx_sig, siginfo_t *sip, ucontext_t *ucp, size_t stacksz,
+ void (*stack_builder)(), void (*user_handler)(),
+ struct lx_sigaction *lxsap)
+{
+ lx_sigbackup_t sigbackup;
+ ucontext_t uc;
+ lx_tsd_t *lxtsd = lx_get_tsd();
+ int totsz = 0;
+ uintptr_t flags;
+ uintptr_t hargs[3];
+ uintptr_t orig_sp = 0;
+
+ /*
+ * These variables must be "volatile", as they are modified after the
+ * getcontext() stores the register state:
+ */
+ volatile boolean_t signal_delivered = B_FALSE;
+ volatile boolean_t sp_modified = B_FALSE;
+ volatile uintptr_t lxfp = 0;
+ volatile uintptr_t old_tsd_sp = 0;
+ volatile int newstack = 0;
+
+ /*
+ * This function involves modifying the Linux process stack for this
+ * thread. To do so without corruption requires us to exclude other
+ * signal handlers (or emulated system calls called from within those
+ * handlers) from running while we reserve space on that stack. We
+ * defer the execution of further instances of lx_call_user_handler()
+ * until we have completed this operation.
+ */
+ _sigoff();
+
+ /*
+ * Clear register arguments vector.
+ */
+ bzero(hargs, sizeof (hargs));
+
+ /* Save our SP so we can restore it after coming back in. */
+ orig_sp = LX_REG(ucp, REG_SP);
+
+ /*
+ * We save a context here so that we can be returned later to complete
+ * handling the signal.
+ */
+ lx_debug("lx_sigdeliver: STORING RETURN CONTEXT @ %p\n", &uc);
+ assert(getcontext(&uc) == 0);
+ lx_debug("lx_sigdeliver: RETURN CONTEXT %p LINK %p FLAGS %lx\n",
+ &uc, uc.uc_link, uc.uc_flags);
+ if (signal_delivered) {
+ /*
+ * If the "signal_delivered" flag is set, we are returned here
+ * via setcontext() as called by the emulated Linux signal
+ * return system call.
+ */
+ lx_debug("lx_sigdeliver: WE ARE BACK, VIA UC @ %p!\n", &uc);
+
+ if (sp_modified) {
+ /*
+ * Restore the original stack pointer, which we saved
+ * on our alt. stack, back into the context.
+ */
+ LX_REG(ucp, REG_SP) = orig_sp;
+ }
+
+ goto after_signal_handler;
+ }
+ signal_delivered = B_TRUE;
+
+ /*
+ * Preserve the current tsd value of the Linux process stack pointer,
+ * even if it is zero. We will restore it when we are returned here
+ * via setcontext() after the Linux process has completed execution of
+ * its signal handler.
+ */
+ old_tsd_sp = lxtsd->lxtsd_lx_sp;
+
+ /*
+ * Figure out whether we will be handling this signal on an alternate
+ * stack specified by the user.
+ */
+ newstack = (lxsap->lxsa_flags & LX_SA_ONSTACK) &&
+ !(lxtsd->lxtsd_sigaltstack.ss_flags & (LX_SS_ONSTACK |
+ LX_SS_DISABLE));
+
+ /*
+ * Find the first unused region of the Linux process stack, where
+ * we will assemble our signal delivery frame.
+ */
+ flags = (uintptr_t)ucp->uc_brand_data[0];
+ if (newstack) {
+ /*
+ * We are moving to the user-provided alternate signal
+ * stack.
+ */
+ lxfp = SA((uintptr_t)lxtsd->lxtsd_sigaltstack.ss_sp) +
+ SA(lxtsd->lxtsd_sigaltstack.ss_size) - STACK_ALIGN;
+ lx_debug("lx_sigdeliver: moving to ALTSTACK sp %p\n", lxfp);
+ LX_SIGNAL_ALTSTACK_ENABLE(lxfp);
+ } else if (flags & LX_UC_STACK_BRAND) {
+ /*
+ * We interrupted the Linux process to take this signal. The
+ * stack pointer is the one saved in this context.
+ */
+ lxfp = LX_REG(ucp, REG_SP);
+ } else {
+ /*
+ * We interrupted a native (emulation) routine, so we must get
+ * the current stack pointer from either the tsd (if one is
+ * stored there) or via the context chain.
+ *
+ */
+ lxfp = lx_find_brand_sp();
+ if (lxtsd->lxtsd_lx_sp != 0) {
+ /*
+ * We must also make room for the possibility of nested
+ * signal delivery -- we may be pre-empting the
+ * in-progress handling of another signal.
+ *
+ * Note that if we were already on the alternate stack,
+ * any emulated Linux system calls would be betwixt
+ * that original signal frame and this new one on the
+ * one contiguous stack, so this logic holds either
+ * way:
+ */
+ lxfp = MIN(lxtsd->lxtsd_lx_sp, lxfp);
+ }
+
+ /* Replace the context SP with the one from the Linux context */
+ LX_REG(ucp, REG_SP) = lxfp;
+ sp_modified = B_TRUE;
+ }
+
+ /*
+ * Account for a reserved stack region (for amd64, this is 128 bytes),
+ * and align the stack:
+ */
+ lxfp -= STACK_RESERVE;
+ lxfp &= ~(STACK_ALIGN - 1);
+
+ /*
+ * Allocate space on the Linux process stack for our delivery frame,
+ * including:
+ *
+ * ----------------------------------------------------- old %sp
+ * - lx_sigdeliver_frame_t
+ * - (ucontext_t pointers and stack magic)
+ * -----------------------------------------------------
+ * - (amd64-only 8-byte alignment gap)
+ * -----------------------------------------------------
+ * - frame of size "stacksz" from the stack builder
+ * ----------------------------------------------------- new %sp
+ */
+#if defined(_LP64)
+ /*
+ * The AMD64 ABI requires us to align the stack such that when the
+ * called function pushes the base pointer, the stack is 16 byte
+ * aligned. The stack must, therefore, be 8- but _not_ 16-byte
+ * aligned.
+ */
+#if (STACK_ALIGN != 16) || (STACK_ENTRY_ALIGN != 8)
+#error "lx_sigdeliver() did not find expected stack alignment"
+#endif
+ totsz = SA(sizeof (lx_sigdeliver_frame_t)) + SA(stacksz) + 8;
+ assert((totsz & (STACK_ENTRY_ALIGN - 1)) == 0);
+ assert((totsz & (STACK_ALIGN - 1)) == 8);
+#else
+ totsz = SA(sizeof (lx_sigdeliver_frame_t)) + SA(stacksz);
+ assert((totsz & (STACK_ALIGN - 1)) == 0);
+#endif
+
+ /*
+ * Copy our return frame into place:
+ */
+ lxfp -= SA(sizeof (lx_sigdeliver_frame_t));
+ lx_debug("lx_sigdeliver: lx_sigdeliver_frame_t @ %p\n", lxfp);
+ {
+ lx_sigdeliver_frame_t frm;
+
+ frm.lxsdf_magic = LX_SIGRT_MAGIC;
+ frm.lxsdf_retucp = &uc;
+ frm.lxsdf_sigucp = ucp;
+ frm.lxsdf_sigbackup = &sigbackup;
+
+ lx_debug("lx_sigdeliver: retucp %p sigucp %p\n",
+ frm.lxsdf_retucp, frm.lxsdf_sigucp);
+
+ if (uucopy(&frm, (void *)lxfp, sizeof (frm)) != 0) {
+ /*
+ * We could not modify the stack of the emulated Linux
+ * program. Act like the kernel and terminate the
+ * program with a segmentation violation.
+ */
+ (void) syscall(SYS_brand, B_EXIT_AS_SIG, SIGSEGV);
+ }
+
+ LX_SIGNAL_DELIVERY_FRAME_CREATE((void *)lxfp);
+
+ /*
+ * Populate a backup copy of signal linkage to use in case
+ * the Linux program completely destroys (or relocates) the
+ * delivery frame.
+ *
+ * This is necessary for programs that have flown so far off
+ * the architectural rails that they believe it is
+ * acceptable to make assumptions about the precise size and
+ * layout of the signal handling frame assembled by the
+ * kernel.
+ */
+ sigbackup.lxsb_retucp = frm.lxsdf_retucp;
+ sigbackup.lxsb_sigucp = frm.lxsdf_sigucp;
+ sigbackup.lxsb_sigdeliver_frame = lxfp;
+ sigbackup.lxsb_previous = lxtsd->lxtsd_sigbackup;
+ lxtsd->lxtsd_sigbackup = &sigbackup;
+
+ lx_debug("lx_sigdeliver: installed sigbackup %p; prev %p\n",
+ &sigbackup, sigbackup.lxsb_previous);
+ }
+
+ /*
+ * Build the Linux signal handling frame:
+ */
+#if defined(_LP64)
+ lxfp -= SA(stacksz) + 8;
+#else
+ lxfp -= SA(stacksz);
+#endif
+ lx_debug("lx_sigdeliver: Linux sig frame @ %p\n", lxfp);
+ stack_builder(lx_sig, sip, ucp, lxfp, hargs);
+
+ /*
+ * Record our reservation so that any nested signal handlers
+ * can see it.
+ */
+ lx_debug("lx_sigdeliver: Linux tsd sp %p -> %p\n", lxtsd->lxtsd_lx_sp,
+ lxfp);
+ lxtsd->lxtsd_lx_sp = lxfp;
+
+ if (newstack) {
+ lxtsd->lxtsd_sigaltstack.ss_flags |= LX_SS_ONSTACK;
+ }
+
+ LX_SIGDELIVER(lx_sig, lxsap, (void *)lxfp);
+
+ /*
+ * Re-enable signal delivery. If a signal was queued while we were
+ * in the critical section, it will be delivered immediately.
+ */
+ _sigon();
+
+ /*
+ * Pass control to the Linux signal handler:
+ */
+ lx_debug("lx_sigdeliver: JUMPING TO LINUX (sig %d sp %p eip %p)\n",
+ lx_sig, lxfp, user_handler);
+ {
+ ucontext_t jump_uc;
+
+ bcopy(lx_find_brand_uc(), &jump_uc, sizeof (jump_uc));
+
+ /*
+ * We want to load the general registers from this context, and
+ * switch to the BRAND stack. We do _not_ want to restore the
+ * uc_link value from this synthetic context, as that would
+ * break the signal handling context chain.
+ */
+ jump_uc.uc_flags = UC_CPU;
+ jump_uc.uc_brand_data[0] = (void *)(LX_UC_STACK_BRAND |
+ LX_UC_IGNORE_LINK);
+
+ LX_REG(&jump_uc, REG_FP) = 0;
+ LX_REG(&jump_uc, REG_SP) = lxfp;
+ LX_REG(&jump_uc, REG_PC) = (uintptr_t)user_handler;
+
+#if defined(_LP64)
+ /*
+ * Pass signal handler arguments by registers on AMD64.
+ */
+ LX_REG(&jump_uc, REG_RDI) = hargs[0];
+ LX_REG(&jump_uc, REG_RSI) = hargs[1];
+ LX_REG(&jump_uc, REG_RDX) = hargs[2];
+#endif
+
+ lx_jump_to_linux(&jump_uc);
+ }
+
+ assert(0);
+ abort();
+
+after_signal_handler:
+ /*
+ * Ensure all nested signal handlers have completed correctly
+ * and then remove our stack reservation.
+ */
+ _sigoff();
+ LX_SIGNAL_POST_HANDLER(lxfp, old_tsd_sp);
+ assert(lxtsd->lxtsd_lx_sp == lxfp);
+ lx_debug("lx_sigdeliver: after; Linux tsd sp %p -> %p\n", lxfp,
+ old_tsd_sp);
+ lxtsd->lxtsd_lx_sp = old_tsd_sp;
+ if (newstack) {
+ LX_SIGNAL_ALTSTACK_DISABLE();
+ lx_debug("lx_sigdeliver: disabling ALTSTACK sp %p\n", lxfp);
+ lxtsd->lxtsd_sigaltstack.ss_flags &= ~LX_SS_ONSTACK;
+ }
+ /*
+ * Restore backup signal tracking chain pointer to previous value:
+ */
+ if (lxtsd->lxtsd_sigbackup != NULL) {
+ lx_sigbackup_t *bprev = lxtsd->lxtsd_sigbackup->lxsb_previous;
+
+ lx_debug("lx_sigdeliver: restoring sigbackup %p to %p\n",
+ lxtsd->lxtsd_sigbackup, bprev);
+
+ lxtsd->lxtsd_sigbackup = bprev;
+ }
+ _sigon();
+
+ /*
+ * Here we return to libc so that it may clean up and restore the
+ * context originally interrupted by this signal.
+ */
+}
+
+/*
+ * Common routine to modify sigaction characteristics of a thread.
+ *
+ * We shouldn't need any special locking code here as we actually use our copy
+ * of libc's sigaction() to do all the real work, so its thread locking should
+ * take care of any issues for us.
+ */
+static int
+lx_sigaction_common(int lx_sig, struct lx_sigaction *lxsp,
+ struct lx_sigaction *olxsp)
+{
+ struct lx_sigaction *lxsap;
+ struct sigaction sa;
+
+ if (lx_sig <= 0 || lx_sig > LX_NSIG)
+ return (-EINVAL);
+
+ lxsap = &lx_sighandlers.lx_sa[lx_sig];
+ lx_debug("&lx_sighandlers.lx_sa[%d] = 0x%p", lx_sig, lxsap);
+
+ if ((olxsp != NULL) &&
+ ((uucopy(lxsap, olxsp, sizeof (struct lx_sigaction))) != 0))
+ return (-errno);
+
+ if (lxsp != NULL) {
+ int err, sig;
+ struct lx_sigaction lxsa;
+ sigset_t new_set, oset;
+
+ if (uucopy(lxsp, &lxsa, sizeof (struct lx_sigaction)) != 0)
+ return (-errno);
+
+ if ((sig = ltos_signo[lx_sig]) != -1) {
+ if (lx_no_abort_handler != 0) {
+ /*
+ * If LX_NO_ABORT_HANDLER has been set, we will
+ * not allow the emulated program to do
+ * anything hamfisted with SIGSEGV or SIGABRT
+ * signals.
+ */
+ if (sig == SIGSEGV || sig == SIGABRT) {
+ return (0);
+ }
+ }
+
+ /*
+ * Block this signal while messing with its dispostion
+ */
+ (void) sigemptyset(&new_set);
+ (void) sigaddset(&new_set, sig);
+
+ if (sigprocmask(SIG_BLOCK, &new_set, &oset) < 0) {
+ err = errno;
+ lx_debug("unable to block signal %d: %s", sig,
+ strerror(err));
+ return (-err);
+ }
+
+ /*
+ * We don't really need the old signal disposition at
+ * this point, but this weeds out signals that would
+ * cause sigaction() to return an error before we change
+ * anything other than the current signal mask.
+ */
+ if (sigaction(sig, NULL, &sa) < 0) {
+ err = errno;
+ lx_debug("sigaction() to get old "
+ "disposition for signal %d failed: "
+ "%s", sig, strerror(err));
+ (void) sigprocmask(SIG_SETMASK, &oset, NULL);
+ return (-err);
+ }
+
+ if ((lxsa.lxsa_handler != SIG_DFL) &&
+ (lxsa.lxsa_handler != SIG_IGN)) {
+ sa.sa_handler = lx_call_user_handler;
+
+ /*
+ * The interposition signal handler needs the
+ * information provided via the SA_SIGINFO flag.
+ */
+ sa.sa_flags = SA_SIGINFO;
+
+ /*
+ * When translating from Linux to illumos
+ * sigaction(2) flags, we explicitly do not
+ * pass SA_ONSTACK to the kernel. The
+ * alternate stack for Linux signal handling is
+ * handled entirely by the emulation code.
+ */
+ if (lxsa.lxsa_flags & LX_SA_NOCLDSTOP)
+ sa.sa_flags |= SA_NOCLDSTOP;
+ if (lxsa.lxsa_flags & LX_SA_NOCLDWAIT)
+ sa.sa_flags |= SA_NOCLDWAIT;
+ if (lxsa.lxsa_flags & LX_SA_RESTART)
+ sa.sa_flags |= SA_RESTART;
+ if (lxsa.lxsa_flags & LX_SA_NODEFER)
+ sa.sa_flags |= SA_NODEFER;
+
+ /*
+ * RESETHAND cannot be used be passed through
+ * for SIGPWR due to different default actions
+ * between Linux and Illumos.
+ */
+ if ((sig != SIGPWR) &&
+ (lxsa.lxsa_flags & LX_SA_RESETHAND))
+ sa.sa_flags |= SA_RESETHAND;
+
+ if (ltos_sigset(&lxsa.lxsa_mask,
+ &sa.sa_mask) != 0) {
+ err = errno;
+ (void) sigprocmask(SIG_SETMASK, &oset,
+ NULL);
+ return (-err);
+ }
+
+ lx_debug("interposing handler @ 0x%p for "
+ "signal %d (lx %d), flags 0x%x",
+ lxsa.lxsa_handler, sig, lx_sig,
+ lxsa.lxsa_flags);
+
+ if (sigaction(sig, &sa, NULL) < 0) {
+ err = errno;
+ lx_debug("sigaction() to set new "
+ "disposition for signal %d failed: "
+ "%s", sig, strerror(err));
+ (void) sigprocmask(SIG_SETMASK, &oset,
+ NULL);
+ return (-err);
+ }
+ } else if ((sig != SIGPWR) ||
+ ((sig == SIGPWR) &&
+ (lxsa.lxsa_handler == SIG_IGN))) {
+ /*
+ * There's no need to interpose for SIG_DFL or
+ * SIG_IGN so just call our copy of libc's
+ * sigaction(), but don't allow SIG_DFL for
+ * SIGPWR due to differing default actions
+ * between Linux and Illumos.
+ *
+ * Get the previous disposition first so things
+ * like sa_mask and sa_flags are preserved over
+ * a transition to SIG_DFL or SIG_IGN, which is
+ * what Linux expects.
+ */
+
+ sa.sa_handler = lxsa.lxsa_handler;
+
+ if (sigaction(sig, &sa, NULL) < 0) {
+ err = errno;
+ lx_debug("sigaction(%d, %s) failed: %s",
+ sig, ((sa.sa_handler == SIG_DFL) ?
+ "SIG_DFL" : "SIG_IGN"),
+ strerror(err));
+ (void) sigprocmask(SIG_SETMASK, &oset,
+ NULL);
+ return (-err);
+ }
+ }
+ } else {
+ lx_debug("Linux signal with no kill support "
+ "specified: %d", lx_sig);
+ }
+
+ /*
+ * Save the new disposition for the signal in the global
+ * lx_sighandlers structure.
+ */
+ bcopy(&lxsa, lxsap, sizeof (struct lx_sigaction));
+
+ /*
+ * Reset the signal mask to what we came in with if
+ * we were modifying a kill-supported signal.
+ */
+ if (sig != -1)
+ (void) sigprocmask(SIG_SETMASK, &oset, NULL);
+ }
+
+ return (0);
+}
+
+#if defined(_ILP32)
+/*
+ * sigaction is only used in 32-bit code.
+ */
+long
+lx_sigaction(uintptr_t lx_sig, uintptr_t actp, uintptr_t oactp)
+{
+ int val;
+ struct lx_sigaction sa, osa;
+ struct lx_sigaction *sap, *osap;
+ struct lx_osigaction *osp;
+
+ sap = (actp ? &sa : NULL);
+ osap = (oactp ? &osa : NULL);
+
+ /*
+ * If we have a source pointer, convert source lxsa_mask from
+ * lx_osigset_t to lx_sigset_t format.
+ */
+ if (sap) {
+ osp = (struct lx_osigaction *)actp;
+ sap->lxsa_handler = osp->lxsa_handler;
+
+ bzero(&sap->lxsa_mask, sizeof (lx_sigset_t));
+
+ for (val = 1; val <= OSIGSET_NBITS; val++)
+ if (osp->lxsa_mask & OSIGSET_BITSET(val))
+ (void) lx_sigaddset(&sap->lxsa_mask, val);
+
+ sap->lxsa_flags = osp->lxsa_flags;
+ sap->lxsa_restorer = osp->lxsa_restorer;
+ }
+
+ if ((val = lx_sigaction_common(lx_sig, sap, osap)))
+ return (val);
+
+ /*
+ * If we have a save pointer, convert the old lxsa_mask from
+ * lx_sigset_t to lx_osigset_t format.
+ */
+ if (osap) {
+ osp = (struct lx_osigaction *)oactp;
+
+ osp->lxsa_handler = osap->lxsa_handler;
+
+ bzero(&osp->lxsa_mask, sizeof (osp->lxsa_mask));
+ for (val = 1; val <= OSIGSET_NBITS; val++)
+ if (lx_sigismember(&osap->lxsa_mask, val))
+ osp->lxsa_mask |= OSIGSET_BITSET(val);
+
+ osp->lxsa_flags = osap->lxsa_flags;
+ osp->lxsa_restorer = osap->lxsa_restorer;
+ }
+
+ return (0);
+}
+#endif
+
+long
+lx_rt_sigaction(uintptr_t lx_sig, uintptr_t actp, uintptr_t oactp,
+ uintptr_t setsize)
+{
+ /*
+ * The "new" rt_sigaction call checks the setsize
+ * parameter.
+ */
+ if ((size_t)setsize != sizeof (lx_sigset_t))
+ return (-EINVAL);
+
+ return (lx_sigaction_common(lx_sig, (struct lx_sigaction *)actp,
+ (struct lx_sigaction *)oactp));
+}
+
+#if defined(_ILP32)
+/*
+ * Convert signal syscall to a call to the lx_sigaction() syscall
+ * Only used in 32-bit code.
+ */
+long
+lx_signal(uintptr_t lx_sig, uintptr_t handler)
+{
+ struct sigaction act;
+ struct sigaction oact;
+ int rc;
+
+ /*
+ * Use sigaction to mimic SYSV signal() behavior; glibc will
+ * actually call sigaction(2) itself, so we're really reaching
+ * back for signal(2) semantics here.
+ */
+ bzero(&act, sizeof (act));
+ act.sa_handler = (void (*)())handler;
+ act.sa_flags = SA_RESETHAND | SA_NODEFER;
+
+ rc = lx_sigaction(lx_sig, (uintptr_t)&act, (uintptr_t)&oact);
+ return ((rc == 0) ? ((ssize_t)oact.sa_handler) : rc);
+}
+#endif
+
+void
+lx_sighandlers_save(lx_sighandlers_t *saved)
+{
+ bcopy(&lx_sighandlers, saved, sizeof (lx_sighandlers_t));
+}
+
+void
+lx_sighandlers_restore(lx_sighandlers_t *saved)
+{
+ bcopy(saved, &lx_sighandlers, sizeof (lx_sighandlers_t));
+}
+
+int
+lx_siginit(void)
+{
+ extern void set_setcontext_enforcement(int);
+ extern void set_escaped_context_cleanup(int);
+
+ struct sigaction sa;
+ sigset_t new_set, oset;
+ int lx_sig, sig;
+
+ if (getenv("LX_NO_ABORT_HANDLER") != NULL) {
+ lx_no_abort_handler = 1;
+ }
+
+ /*
+ * Block all signals possible while setting up the signal imposition
+ * mechanism.
+ */
+ (void) sigfillset(&new_set);
+
+ if (sigprocmask(SIG_BLOCK, &new_set, &oset) < 0)
+ lx_err_fatal("unable to block signals while setting up "
+ "imposition mechanism: %s", strerror(errno));
+
+ /*
+ * Ignore any signals that have no Linux analog so that those
+ * signals cannot be sent to Linux processes from the global zone
+ */
+ for (sig = 1; sig < NSIG; sig++)
+ if (stol_signo[sig] < 0)
+ (void) sigignore(sig);
+
+ /*
+ * Mark any signals that are ignored as ignored in our interposition
+ * handler array
+ */
+ for (lx_sig = 1; lx_sig <= LX_NSIG; lx_sig++) {
+ if (((sig = ltos_signo[lx_sig]) != -1) &&
+ (sigaction(sig, NULL, &sa) < 0))
+ lx_err_fatal("unable to determine previous disposition "
+ "for signal %d: %s", sig, strerror(errno));
+
+ if (sa.sa_handler == SIG_IGN) {
+ lx_debug("marking signal %d (lx %d) as SIG_IGN",
+ sig, lx_sig);
+ lx_sighandlers.lx_sa[lx_sig].lxsa_handler = SIG_IGN;
+ }
+ }
+
+ /*
+ * Have our interposition handler handle SIGPWR to start with,
+ * as it has a default action of terminating the process in Linux
+ * but its default is to be ignored in Illumos.
+ */
+ (void) sigemptyset(&sa.sa_mask);
+ sa.sa_sigaction = lx_call_user_handler;
+ sa.sa_flags = SA_SIGINFO;
+
+ if (sigaction(SIGPWR, &sa, NULL) < 0)
+ lx_err_fatal("sigaction(SIGPWR) failed: %s", strerror(errno));
+
+ /*
+ * Illumos' libc forces certain register values in the ucontext_t
+ * used to restore a post-signal user context to be those Illumos
+ * expects; however that is not what we want to happen if the signal
+ * was taken while branded code was executing, so we must disable
+ * that behavior.
+ */
+ set_setcontext_enforcement(0);
+
+ /*
+ * The illumos libc attempts to clean up dangling uc_link pointers in
+ * signal handling contexts when libc believes us to have escaped a
+ * signal handler incorrectly in the past. We want to disable this
+ * behaviour, so that the system call emulation context saved by the
+ * kernel brand module for lx_emulate() may be part of the context
+ * chain without itself being used for signal handling.
+ */
+ set_escaped_context_cleanup(0);
+
+ /*
+ * Reset the signal mask to what we came in with.
+ */
+ (void) sigprocmask(SIG_SETMASK, &oset, NULL);
+
+ lx_debug("interposition handler setup for SIGPWR");
+ return (0);
+}
+
+/*
+ * The first argument is the pid (Linux tgid) to send the signal to, second
+ * argument is the signal to send (an lx signal), and third is the siginfo_t
+ * with extra information. We translate the code and signal only from the
+ * siginfo_t, and leave everything else the same as it gets passed through the
+ * signalling system. This is enough to get sigqueue working. See Linux man
+ * page rt_sigqueueinfo(2).
+ */
+long
+lx_rt_sigqueueinfo(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ pid_t tgid = (pid_t)p1;
+ int lx_sig = (int)p2;
+ int sig;
+ lx_siginfo_t lx_siginfo;
+ siginfo_t siginfo;
+ int s_code;
+ pid_t s_pid;
+
+ if (uucopy((void *)p3, &lx_siginfo, sizeof (lx_siginfo_t)) != 0)
+ return (-EFAULT);
+ s_code = ltos_sigcode(lx_siginfo.lsi_code);
+ if (s_code == LX_SI_CODE_NOT_EXIST)
+ return (-EINVAL);
+ if (lx_sig < 0 || lx_sig > LX_NSIG || (sig = ltos_signo[lx_sig]) < 0) {
+ return (-EINVAL);
+ }
+ /*
+ * This case (when trying to kill pid 0) just has a different errno
+ * returned in illumos than in Linux.
+ */
+ if (tgid == 0)
+ return (-ESRCH);
+ if (lx_lpid_to_spid(tgid, &s_pid) != 0)
+ return (-ESRCH);
+ if (SI_CANQUEUE(s_code)) {
+ return ((syscall(SYS_sigqueue, s_pid, sig,
+ lx_siginfo.lsi_value, s_code, 0) == -1) ?
+ (-errno): 0);
+ } else {
+ /*
+ * This case is unlikely, as the main entry point is through
+ * sigqueue, which always has a queuable si_code.
+ */
+ siginfo.si_signo = sig;
+ siginfo.si_code = s_code;
+ siginfo.si_pid = lx_siginfo.lsi_pid;
+ siginfo.si_value = lx_siginfo.lsi_value;
+ siginfo.si_uid = lx_siginfo.lsi_uid;
+ return ((syscall(SYS_brand, B_HELPER_SIGQUEUE,
+ tgid, sig, &siginfo)) ? (-errno) : 0);
+ }
+}
+
+/*
+ * Adds an additional argument for which thread within a thread group to send
+ * the signal to (added as the second argument).
+ */
+long
+lx_rt_tgsigqueueinfo(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
+{
+ pid_t tgid = (pid_t)p1;
+ pid_t tid = (pid_t)p2;
+ int lx_sig = (int)p3;
+ int sig;
+ lx_siginfo_t lx_siginfo;
+ siginfo_t siginfo;
+ int si_code;
+
+ if (uucopy((void *)p4, &lx_siginfo, sizeof (lx_siginfo_t)) != 0)
+ return (-EFAULT);
+ if (lx_sig < 0 || lx_sig > LX_NSIG || (sig = ltos_signo[lx_sig]) < 0) {
+ return (-EINVAL);
+ }
+ si_code = ltos_sigcode(lx_siginfo.lsi_code);
+ if (si_code == LX_SI_CODE_NOT_EXIST)
+ return (-EINVAL);
+ /*
+ * Check for invalid tgid and tids. That appears to be only negatives
+ * and 0 values. Everything else that doesn't exist is instead ESRCH.
+ */
+ if (tgid <= 0 || tid <= 0)
+ return (-EINVAL);
+ siginfo.si_signo = sig;
+ siginfo.si_code = si_code;
+ siginfo.si_pid = lx_siginfo.lsi_pid;
+ siginfo.si_value = lx_siginfo.lsi_value;
+ siginfo.si_uid = lx_siginfo.lsi_uid;
+
+ return ((syscall(SYS_brand, B_HELPER_TGSIGQUEUE, tgid, tid, sig,
+ &siginfo)) ? (-errno) : 0);
+}
+
+long
+lx_signalfd(int fd, uintptr_t mask, size_t msize)
+{
+ return (lx_signalfd4(fd, mask, msize, 0));
+}
+
+long
+lx_signalfd4(int fd, uintptr_t mask, size_t msize, int flags)
+{
+ sigset_t s_set;
+ int r;
+
+ if (msize != sizeof (int64_t))
+ return (-EINVAL);
+
+ if (ltos_sigset((lx_sigset_t *)mask, &s_set) != 0)
+ return (-errno);
+
+ r = signalfd(fd, &s_set, flags);
+
+ /*
+ * signalfd(3C) may fail with ENOENT if /dev/signalfd is not available.
+ * It is less jarring to Linux programs to tell them that internal
+ * allocation failed than to report an error number they are not
+ * expecting.
+ */
+ if (r == -1 && errno == ENOENT)
+ return (-ENODEV);
+
+ return (r == -1 ? -errno : r);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/socket.c b/usr/src/lib/brand/lx/lx_brand/common/socket.c
new file mode 100644
index 0000000000..e8659cf9ba
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/socket.c
@@ -0,0 +1,349 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <libintl.h>
+#include <strings.h>
+#include <alloca.h>
+#include <ucred.h>
+#include <limits.h>
+
+#include <sys/param.h>
+#include <sys/brand.h>
+#include <sys/syscall.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/un.h>
+#include <netinet/tcp.h>
+#include <netinet/igmp.h>
+#include <netinet/icmp6.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_socket.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_misc.h>
+#include <netpacket/packet.h>
+
+#ifdef __i386
+
+static int lx_listen32(ulong_t *);
+static int lx_socketpair32(ulong_t *);
+static int lx_shutdown32(ulong_t *);
+static int lx_recvmmsg32(ulong_t *);
+static int lx_sendmmsg32(ulong_t *);
+
+typedef int (*sockfn_t)(ulong_t *);
+
+static struct {
+ sockfn_t s_fn; /* Function implementing the subcommand */
+ int s_nargs; /* Number of arguments the function takes */
+} sockfns[] = {
+ NULL, 3,
+ NULL, 3,
+ NULL, 3,
+ lx_listen32, 2,
+ NULL, 3,
+ NULL, 3,
+ NULL, 3,
+ lx_socketpair32, 4,
+ NULL, 4,
+ NULL, 4,
+ NULL, 6,
+ NULL, 6,
+ lx_shutdown32, 2,
+ NULL, 5,
+ NULL, 5,
+ NULL, 3,
+ NULL, 3,
+ NULL, 4,
+ lx_recvmmsg32, 5,
+ lx_sendmmsg32, 4
+};
+#endif /* __i386 */
+
+/*
+ * What follows are a series of tables we use to translate Linux constants
+ * into equivalent Illumos constants and back again. I wish this were
+ * cleaner, more programmatic, and generally nicer. Sadly, life is messy,
+ * and Unix networking even more so.
+ */
+static const int ltos_family[LX_AF_MAX + 1] = {
+ AF_UNSPEC, AF_UNIX, AF_INET, AF_NOTSUPPORTED, AF_NOTSUPPORTED,
+ AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED,
+ AF_NOTSUPPORTED, AF_INET6, AF_NOTSUPPORTED, AF_NOTSUPPORTED,
+ AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_LX_NETLINK,
+ AF_PACKET, AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED,
+ AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED,
+ AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED,
+ AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED,
+ AF_NOTSUPPORTED
+};
+
+#define LX_AF_INET6 10
+#define LX_AF_NETLINK 16
+#define LX_AF_PACKET 17
+
+static const int stol_family[LX_AF_MAX + 1] = {
+ AF_UNSPEC, AF_UNIX, AF_INET, AF_NOTSUPPORTED, AF_NOTSUPPORTED,
+ AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED,
+ AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED,
+ AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED,
+ AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED,
+ AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED,
+ AF_NOTSUPPORTED, LX_AF_INET6, AF_NOTSUPPORTED, AF_NOTSUPPORTED,
+ AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, LX_AF_PACKET,
+ LX_AF_NETLINK
+};
+
+#define LTOS_FAMILY(d) ((d) <= LX_AF_MAX ? ltos_family[(d)] : AF_INVAL)
+#define STOL_FAMILY(d) ((d) <= LX_AF_MAX ? stol_family[(d)] : AF_INVAL)
+
+static const int ltos_socktype[LX_SOCK_PACKET + 1] = {
+ SOCK_NOTSUPPORTED, SOCK_STREAM, SOCK_DGRAM, SOCK_RAW,
+ SOCK_RDM, SOCK_SEQPACKET, SOCK_NOTSUPPORTED, SOCK_NOTSUPPORTED,
+ SOCK_NOTSUPPORTED, SOCK_NOTSUPPORTED, SOCK_NOTSUPPORTED
+};
+
+#define LTOS_SOCKTYPE(t) \
+ ((t) <= LX_SOCK_PACKET ? ltos_socktype[(t)] : SOCK_INVAL)
+
+static const int stol_socktype[SOCK_SEQPACKET + 1] = {
+ SOCK_NOTSUPPORTED, LX_SOCK_DGRAM, LX_SOCK_STREAM, SOCK_NOTSUPPORTED,
+ LX_SOCK_RAW, LX_SOCK_RDM, LX_SOCK_SEQPACKET
+};
+
+typedef struct {
+ sa_family_t nl_family;
+ unsigned short nl_pad;
+ uint32_t nl_pid;
+ uint32_t nl_groups;
+} lx_sockaddr_nl_t;
+
+typedef struct {
+ sa_family_t sin6_family;
+ in_port_t sin6_port;
+ uint32_t sin6_flowinfo;
+ struct in6_addr sin6_addr;
+ uint32_t sin6_scope_id; /* Depends on scope of sin6_addr */
+ /* one 32-bit field shorter than illumos */
+} lx_sockaddr_in6_t;
+
+static int
+convert_pkt_proto(int protocol)
+{
+ switch (ntohs(protocol)) {
+ case LX_ETH_P_802_2:
+ return (ETH_P_802_2);
+ case LX_ETH_P_IP:
+ return (ETH_P_IP);
+ case LX_ETH_P_ARP:
+ return (ETH_P_ARP);
+ case LX_ETH_P_IPV6:
+ return (ETH_P_IPV6);
+ case LX_ETH_P_ALL:
+ case LX_ETH_P_802_3:
+ return (ETH_P_ALL);
+ default:
+ return (-1);
+ }
+}
+
+static int
+convert_sock_args(int in_dom, int in_type, int in_protocol, int *out_dom,
+ int *out_type, int *out_options, int *out_protocol)
+{
+ int domain, type, options;
+
+ if (in_dom < 0 || in_type < 0 || in_protocol < 0)
+ return (-EINVAL);
+
+ domain = LTOS_FAMILY(in_dom);
+ if (domain == AF_NOTSUPPORTED || domain == AF_UNSPEC)
+ return (-EAFNOSUPPORT);
+ if (domain == AF_INVAL)
+ return (-EINVAL);
+
+ type = LTOS_SOCKTYPE(in_type & LX_SOCK_TYPE_MASK);
+ if (type == SOCK_NOTSUPPORTED)
+ return (-ESOCKTNOSUPPORT);
+ if (type == SOCK_INVAL)
+ return (-EINVAL);
+
+ /*
+ * Linux does not allow the app to specify IP Protocol for raw
+ * sockets. Illumos does, so bail out here.
+ */
+ if (domain == AF_INET && type == SOCK_RAW && in_protocol == IPPROTO_IP)
+ return (-ESOCKTNOSUPPORT);
+
+ options = 0;
+ if (in_type & LX_SOCK_NONBLOCK)
+ options |= SOCK_NONBLOCK;
+ if (in_type & LX_SOCK_CLOEXEC)
+ options |= SOCK_CLOEXEC;
+
+ /*
+ * The protocol definitions for PF_PACKET differ between Linux and
+ * illumos.
+ */
+ if (domain == PF_PACKET &&
+ (in_protocol = convert_pkt_proto(in_protocol)) < 0)
+ return (EINVAL);
+
+ *out_dom = domain;
+ *out_type = type;
+ *out_options = options;
+ *out_protocol = in_protocol;
+ return (0);
+}
+
+long
+lx_listen(int sockfd, int backlog)
+{
+ int r;
+
+ lx_debug("\tlisten(%d, %d)", sockfd, backlog);
+ r = listen(sockfd, backlog);
+
+ return ((r < 0) ? -errno : r);
+}
+
+long
+lx_socketpair(int domain, int type, int protocol, int *sv)
+{
+ int options;
+ int fds[2];
+ int r;
+
+ r = convert_sock_args(domain, type, protocol, &domain, &type, &options,
+ &protocol);
+ if (r != 0)
+ return (r);
+
+ lx_debug("\tsocketpair(%d, %d, %d, 0x%p)", domain, type, protocol, sv);
+
+ r = socketpair(domain, type | options, protocol, fds);
+
+ if (r == 0) {
+ if (uucopy(fds, sv, sizeof (fds)) != 0) {
+ r = errno;
+ (void) close(fds[0]);
+ (void) close(fds[1]);
+ return (-r);
+ }
+ return (0);
+ }
+
+ if (errno == EPROTONOSUPPORT)
+ return (-ESOCKTNOSUPPORT);
+
+ return (-errno);
+}
+
+long
+lx_shutdown(int sockfd, int how)
+{
+ int r;
+
+ lx_debug("\tshutdown(%d, %d)", sockfd, how);
+ r = shutdown(sockfd, how);
+
+ return ((r < 0) ? -errno : r);
+}
+
+#ifdef __i386
+
+static int
+lx_listen32(ulong_t *args)
+{
+ return (lx_listen((int)args[0], (int)args[1]));
+}
+
+static int
+lx_socketpair32(ulong_t *args)
+{
+ return (lx_socketpair((int)args[0], (int)args[1], (int)args[2],
+ (int *)args[3]));
+}
+
+static int
+lx_shutdown32(ulong_t *args)
+{
+ return (lx_shutdown((int)args[0], (int)args[1]));
+}
+
+static int
+lx_recvmmsg32(ulong_t *args)
+{
+ lx_unsupported("Unsupported socketcall: recvmmsg\n.");
+ return (-EINVAL);
+}
+
+static int
+lx_sendmmsg32(ulong_t *args)
+{
+ lx_unsupported("Unsupported socketcall: sendmmsg\n.");
+ return (-EINVAL);
+}
+
+long
+lx_socketcall(uintptr_t p1, uintptr_t p2)
+{
+ int subcmd = (int)p1 - 1; /* subcommands start at 1 - not 0 */
+ ulong_t args[6];
+ int r;
+
+ if (subcmd < 0 || subcmd >= LX_SENDMMSG)
+ return (-EINVAL);
+
+ /* Bail out if we are trying to call an IKE function */
+ if (sockfns[subcmd].s_fn == NULL) {
+ lx_err_fatal("lx_socketcall: deprecated subcmd: %d", subcmd);
+ }
+
+ /*
+ * Copy the arguments to the subcommand in from the app's address
+ * space, returning EFAULT if we get a bogus pointer.
+ */
+ if (uucopy((void *)p2, args,
+ sockfns[subcmd].s_nargs * sizeof (ulong_t)))
+ return (-errno);
+
+ r = (sockfns[subcmd].s_fn)(args);
+
+ return (r);
+}
+
+#endif /* __i386 */
diff --git a/usr/src/lib/brand/lx/lx_brand/common/stack.c b/usr/src/lib/brand/lx/lx_brand/common/stack.c
new file mode 100644
index 0000000000..daa40f3cc6
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/stack.c
@@ -0,0 +1,338 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+/*
+ * Manage the native/emulation stack for LX-branded LWPs.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <errno.h>
+
+#include <thread.h>
+#include <sys/mman.h>
+#include <sys/brand.h>
+#include <sys/syscall.h>
+#include <sys/debug.h>
+
+#include <sys/lx_brand.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_thread.h>
+
+
+typedef struct lx_stack_list_ent {
+ thread_t sle_tid;
+ void *sle_stack;
+ size_t sle_stack_size;
+ lx_tsd_t *sle_tsd;
+} lx_stack_list_ent_t;
+
+static mutex_t lx_stack_list_lock = ERRORCHECKMUTEX;
+lx_stack_list_ent_t *lx_stack_list = NULL;
+unsigned int lx_stack_list_elems = 0;
+
+/*
+ * Usermode emulation alternate stack size, expressed as a page count:
+ */
+int lx_native_stack_page_count = LX_NATIVE_STACK_PAGE_COUNT;
+
+/*
+ * We use these private functions from libc to suspend signal delivery in
+ * critical sections:
+ */
+extern void _sigon(void);
+extern void _sigoff(void);
+
+void
+lx_stack_prefork(void)
+{
+ lx_tsd_t *lx_tsd = lx_get_tsd();
+
+ /*
+ * The "lx_stack_list_lock" mutex is used to protect access to the list
+ * of per-thread native stacks. Management of native stacks is
+ * generally performed while servicing an emulated fork(2), vfork(2) or
+ * clone(2) system call.
+ *
+ * Multiple threads may be attempting to create new threads or
+ * processes concurrently, but in the case of fork(2) only the
+ * currently executing thread is duplicated in the child process. We
+ * require that the stack list lock be taken before the native fork1()
+ * or forkx(), and released in both the parent and the child once the
+ * operation is complete. For vfork() the lock must only be released in
+ * the parent (once it resumes execution) since the child is borrowing
+ * the parent's thread. The _sigoff/_sigon dance will also only take
+ * place in the parent.
+ *
+ * Holding this mutex prevents the forked child from containing a
+ * copy-on-write copy of a locked mutex without the thread that would
+ * later unlock it. We also suspend signal delivery while entering
+ * this critical section to ensure async signal safety.
+ *
+ * Unfortunately some Linux applications (e.g. busybox) will call vfork
+ * and then call fork (without the expected intervening exec). We
+ * avoid the mutex deadlock by skipping the call since we know this
+ * thread has borrowed the parent's address space and the parent cannot
+ * execute until we exit/exec.
+ */
+ _sigoff();
+ if (lx_tsd->lxtsd_is_vforked == 0)
+ VERIFY0(mutex_lock(&lx_stack_list_lock));
+}
+
+void
+lx_stack_postfork(void)
+{
+ lx_tsd_t *lx_tsd = lx_get_tsd();
+
+ if (lx_tsd->lxtsd_is_vforked == 0)
+ VERIFY0(mutex_unlock(&lx_stack_list_lock));
+ _sigon();
+}
+
+/*
+ * Free the alternate stack for this thread.
+ */
+void
+lx_free_stack(void)
+{
+ thread_t me = thr_self();
+ int i;
+
+ _sigoff();
+ VERIFY0(mutex_lock(&lx_stack_list_lock));
+
+ /*
+ * Find this thread's stack in the list of stacks.
+ */
+ for (i = 0; i < lx_stack_list_elems; i++) {
+ if (lx_stack_list[i].sle_tid != me) {
+ continue;
+ }
+
+ (void) munmap(lx_stack_list[i].sle_stack,
+ lx_stack_list[i].sle_stack_size);
+
+ /*
+ * Free the thread-specific data structure for this thread.
+ */
+ if (lx_stack_list[i].sle_tsd != NULL) {
+ free(lx_stack_list[i].sle_tsd->lxtsd_clone_state);
+ free(lx_stack_list[i].sle_tsd);
+ }
+
+ /*
+ * Free up this stack list entry:
+ */
+ bzero(&lx_stack_list[i], sizeof (lx_stack_list[i]));
+
+ VERIFY0(mutex_unlock(&lx_stack_list_lock));
+ _sigon();
+ return;
+ }
+
+ /*
+ * Did not find the stack in the list.
+ */
+ assert(0);
+}
+
+/*
+ * After fork1(), we must unmap the stack of every thread other than the
+ * one copied into the child process.
+ */
+void
+lx_free_other_stacks(void)
+{
+ int i, this_stack = -1;
+ thread_t me = thr_self();
+ lx_tsd_t *lx_tsd = lx_get_tsd();
+
+ _sigoff();
+
+ /*
+ * We don't need to check or take the lx_stack_list_lock here because
+ * we are the only thread in this process, but if we got here via an
+ * evil vfork->fork path then we must drop the lock for the new child
+ * and reset our "is_vforked" counter.
+ */
+ if (lx_tsd->lxtsd_is_vforked != 0) {
+ VERIFY0(mutex_unlock(&lx_stack_list_lock));
+ lx_tsd->lxtsd_is_vforked = 0;
+ }
+
+ for (i = 0; i < lx_stack_list_elems; i++) {
+ if (lx_stack_list[i].sle_tid == me) {
+ /*
+ * Do not unmap the stack for this LWP.
+ */
+ this_stack = i;
+ continue;
+ } else if (lx_stack_list[i].sle_tid == 0) {
+ /*
+ * Skip any holes in the list.
+ */
+ continue;
+ }
+
+ /*
+ * Free the thread-specific data structure for this thread.
+ */
+ if (lx_stack_list[i].sle_tsd != NULL) {
+ free(lx_stack_list[i].sle_tsd->lxtsd_clone_state);
+ free(lx_stack_list[i].sle_tsd);
+ }
+
+ /*
+ * Unmap the stack of every other LWP.
+ */
+ (void) munmap(lx_stack_list[i].sle_stack,
+ lx_stack_list[i].sle_stack_size);
+ }
+ /*
+ * Did not find the stack for this LWP in the list.
+ */
+ assert(this_stack != -1);
+
+ /*
+ * Ensure the stack data for this LWP is in the first slot and shrink
+ * the list.
+ */
+ if (this_stack != 0) {
+ lx_stack_list[0] = lx_stack_list[this_stack];
+ }
+ lx_stack_list_elems = 1;
+ lx_stack_list = realloc(lx_stack_list, lx_stack_list_elems *
+ sizeof (lx_stack_list[0]));
+ if (lx_stack_list == NULL) {
+ lx_err_fatal("failed to shrink stack list: %s",
+ strerror(errno));
+ }
+
+ _sigon();
+}
+
+/*
+ * Allocate an alternate stack for the execution of native emulation routines.
+ * This routine is based, in part, on find_stack() from libc.
+ */
+int
+lx_alloc_stack(void **nstack, size_t *nstack_size)
+{
+ static int pagesize = 0;
+ static int stackprot = 0;
+ int stacksize = 0;
+ void *stack;
+
+ /*
+ * Fetch configuration once:
+ */
+ if (pagesize == 0) {
+ pagesize = _sysconf(_SC_PAGESIZE);
+ assert(pagesize > 0);
+ }
+ if (stackprot == 0) {
+ long lprot = _sysconf(_SC_STACK_PROT);
+
+ stackprot = lprot > 0 ? lprot : (PROT_READ | PROT_WRITE);
+ }
+
+ stacksize = lx_native_stack_page_count * pagesize;
+
+ if ((stack = mmap(NULL, stacksize, stackprot, MAP_PRIVATE |
+ MAP_NORESERVE | MAP_ANON, -1, (off_t)0)) == MAP_FAILED) {
+ int en = errno;
+ lx_debug("lx_alloc_stack: failed to allocate stack: %s",
+ strerror(errno));
+ errno = en;
+ return (-1);
+ }
+
+#if DEBUG
+ /*
+ * Write a recognisable pattern into the allocated stack pages.
+ */
+ for (pos = 0; pos < ((stacksize - 1) / 4); pos++) {
+ ((uint32_t *)stack)[pos] = 0x0facade0;
+ }
+#endif
+
+ *nstack = stack;
+ *nstack_size = stacksize;
+
+ return (0);
+}
+
+/*
+ * Configure the in-kernel brand-specific LWP data with the native stack
+ * pointer for this thread. If a stack is not passed, allocate one first.
+ */
+void
+lx_install_stack(void *stack, size_t stacksize, lx_tsd_t *tsd)
+{
+ thread_t me = thr_self();
+ int i;
+ uintptr_t stack_top;
+
+ if (stack == NULL) {
+ /*
+ * If we were not passed a stack, then allocate one:
+ */
+ if (lx_alloc_stack(&stack, &stacksize) == -1) {
+ lx_err_fatal("failed to allocate stack for thread "
+ "%d: %s", me, strerror(errno));
+ }
+ }
+
+ /*
+ * Install the stack in the global list of thread stacks.
+ */
+ _sigoff();
+ VERIFY0(mutex_lock(&lx_stack_list_lock));
+
+ for (i = 0; i < lx_stack_list_elems; i++) {
+ assert(lx_stack_list[i].sle_tid != me);
+ if (lx_stack_list[i].sle_tid == 0)
+ break;
+ }
+ if (i >= lx_stack_list_elems) {
+ lx_stack_list_elems++;
+ lx_stack_list = realloc(lx_stack_list, lx_stack_list_elems *
+ sizeof (lx_stack_list[0]));
+ if (lx_stack_list == NULL) {
+ lx_err_fatal("failed to extend stack list: %s",
+ strerror(errno));
+ }
+ }
+ lx_stack_list[i].sle_tid = me;
+ lx_stack_list[i].sle_stack = stack;
+ lx_stack_list[i].sle_stack_size = stacksize;
+ lx_stack_list[i].sle_tsd = tsd;
+
+ VERIFY0(mutex_unlock(&lx_stack_list_lock));
+ _sigon();
+
+ /*
+ * Inform the kernel of the location of the brand emulation
+ * stack for this LWP:
+ */
+ stack_top = (uintptr_t)stack + stacksize;
+ lx_debug("stack %p stack_top %p\n", stack, stack_top);
+ if (syscall(SYS_brand, B_SET_NATIVE_STACK, stack_top) != 0) {
+ lx_err_fatal("unable to set native stack: %s", strerror(errno));
+ }
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/statfs.c b/usr/src/lib/brand/lx/lx_brand/common/statfs.c
new file mode 100644
index 0000000000..942aaf9882
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/statfs.c
@@ -0,0 +1,346 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc. All rights reserved.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <libintl.h>
+#include <string.h>
+#include <strings.h>
+#include <sys/types.h>
+#include <sys/statvfs.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+
+#include <sys/lx_debug.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_statfs.h>
+#include <sys/lx_syscall.h>
+
+/*
+ * these defines must exist before we include regexp.h, see regexp(5)
+ */
+#define RE_SIZE 1024
+#define INIT char *sp = instring;
+#define GETC() (*sp++)
+#define PEEKC() (*sp)
+#define UNGETC(c) (--sp)
+#define RETURN(c) return (NULL);
+#define ERROR(c) return ((char *)c);
+
+/*
+ * for regular expressions we're using regexp(5).
+ *
+ * we'd really prefer to use some other nicer regular expressions
+ * interfaces (like regcmp(3c), regcomp(3c), or re_comp(3c)) but we
+ * can't because all these other interfaces rely on the ability
+ * to allocate memory via libc malloc()/calloc() calls, which
+ * we can't really do here.
+ *
+ * we could optionally use regexpr(3gen) but we don't since the
+ * interfaces there are incredibly similar to the regexp(5)
+ * interfaces we're already using and we'd have the added
+ * requirement of linking against libgen.
+ *
+ * another option that was considered is fnmatch(3c) but the
+ * limited pattern expansion capability of this interface would
+ * force us to include more patterns to check against.
+ */
+#include <regexp.h>
+
+static struct lx_ftype_path {
+ char *lfp_path;
+ char lfp_re[RE_SIZE];
+ int lfp_magic;
+ char *lfp_magic_str;
+} ftype_path_list[] = {
+ { "^/dev/pts$", "",
+ LX_DEVPTS_SUPER_MAGIC, "LX_DEVPTS_SUPER_MAGIC" },
+ { "^/dev/pts/$", "",
+ LX_DEVPTS_SUPER_MAGIC, "LX_DEVPTS_SUPER_MAGIC" },
+ { "^/dev/pts/[0-9][0-9]*$", "",
+ LX_DEVPTS_SUPER_MAGIC, "LX_DEVPTS_SUPER_MAGIC" },
+ { NULL, "",
+ 0, NULL }
+};
+
+/*
+ * For lack of linux equivalents, we present lofs and zfs as being ext2. Yes,
+ * this is a total lie, but Linux can't handle the truth of ZFS -- and ext2's
+ * small surface area seems to make it the most tenable lie to tell.
+ */
+static struct lx_ftype_name {
+ const char *lfn_name;
+ int lfn_magic;
+ char *lfn_magic_str;
+} ftype_name_list[] = {
+ { "hsfs", LX_ISOFS_SUPER_MAGIC, "LX_ISOFS_SUPER_MAGIC" },
+ { "nfs", LX_NFS_SUPER_MAGIC, "LX_NFS_SUPER_MAGIC" },
+ { "pcfs", LX_MSDOS_SUPER_MAGIC, "LX_MSDOS_SUPER_MAGIC" },
+ { "lx_proc", LX_PROC_SUPER_MAGIC, "LX_PROC_SUPER_MAGIC" },
+ { "tmpfs", LX_TMPFS_SUPER_MAGIC, "LX_TMPFS_SUPER_MAGIC" },
+ { "ufs", LX_UFS_MAGIC, "LX_UFS_MAGIC" },
+ { "lofs", LX_EXT2_SUPER_MAGIC, "LX_EXT2_SUPER_MAGIC" },
+ { "zfs", LX_EXT2_SUPER_MAGIC, "LX_EXT2_SUPER_MAGIC" },
+ { "lxautofs", LX_AUTOFS_SUPER_MAGIC, "LX_AUTOFS_SUPER_MAGIC" },
+ { NULL, 0, NULL }
+};
+
+int
+lx_statfs_init()
+{
+ int i;
+ char *rv;
+
+ for (i = 0; ftype_path_list[i].lfp_path != NULL; i++) {
+ rv = compile(
+ ftype_path_list[i].lfp_path,
+ ftype_path_list[i].lfp_re,
+ ftype_path_list[i].lfp_re + RE_SIZE, '\0');
+ if (rv == NULL)
+ continue;
+
+ lx_debug("lx_statfs_init compile(\"%s\") failed",
+ ftype_path_list[i].lfp_path);
+ return (1);
+ }
+ return (0);
+}
+
+static int
+stol_type(const char *path, const char *name)
+{
+ int i;
+ lx_debug("\tstol_type(\"%s\", \"%s\")\n", path == NULL ? "NULL" : path,
+ name == NULL ? "NULL" : name);
+
+ if (path != NULL) {
+ char userpath[MAXPATHLEN];
+
+ if (uucopystr(path, userpath, MAXPATHLEN) == -1)
+ return (-errno);
+
+ for (i = 0; ftype_path_list[i].lfp_path != NULL; i++) {
+ if (step(userpath, ftype_path_list[i].lfp_re) == 0)
+ continue;
+
+ /* got a match on the fs path */
+ lx_debug("\ttranslated f_type to 0x%x - %s",
+ ftype_path_list[i].lfp_magic,
+ ftype_path_list[i].lfp_magic_str);
+ return (ftype_path_list[i].lfp_magic);
+ }
+ }
+
+ assert(name != NULL);
+ for (i = 0; ftype_name_list[i].lfn_name != NULL; i++) {
+ if (strcmp(name, ftype_name_list[i].lfn_name) == 0) {
+
+ /* got a match on the fs name */
+ lx_debug("\ttranslated f_type to 0x%x - %s",
+ ftype_name_list[i].lfn_magic,
+ ftype_name_list[i].lfn_magic_str);
+ return (ftype_name_list[i].lfn_magic);
+ }
+ }
+
+ /* we don't know what the fs type is so just set it to 0 */
+ return (0);
+}
+
+/*
+ * The Linux statfs() is similar to the Solaris statvfs() call, the main
+ * difference being the use of a numeric 'f_type' identifier instead of the
+ * 'f_basetype' string.
+ */
+static int
+stol_statfs(const char *path, struct lx_statfs *l, struct statvfs *s)
+{
+ int type;
+
+ if ((type = stol_type(path, s->f_basetype)) < 0)
+ return (type);
+
+ l->f_type = type;
+ l->f_bsize = s->f_frsize; /* other fields depend on frsize */
+ l->f_blocks = s->f_blocks;
+ l->f_bfree = s->f_bfree;
+ l->f_bavail = s->f_bavail;
+ l->f_files = s->f_files;
+ l->f_ffree = s->f_ffree;
+ l->f_fsid = s->f_fsid;
+ l->f_namelen = s->f_namemax;
+ l->f_frsize = s->f_frsize;
+ bzero(&(l->f_spare), sizeof (l->f_spare));
+
+ return (0);
+}
+
+static int
+stol_statfs64(const char *path, struct lx_statfs64 *l, struct statvfs64 *s)
+{
+ int type;
+
+ if ((type = stol_type(path, s->f_basetype)) < 0)
+ return (type);
+
+ l->f_type = type;
+ l->f_bsize = s->f_frsize; /* other fields depend on frsize */
+ l->f_blocks = s->f_blocks;
+ l->f_bfree = s->f_bfree;
+ l->f_bavail = s->f_bavail;
+ l->f_files = s->f_files;
+ l->f_ffree = s->f_ffree;
+ l->f_fsid = s->f_fsid;
+ l->f_namelen = s->f_namemax;
+ l->f_frsize = s->f_frsize;
+ bzero(&(l->f_spare), sizeof (l->f_spare));
+
+ return (0);
+}
+
+long
+lx_statfs(uintptr_t p1, uintptr_t p2)
+{
+ const char *path = (const char *)p1;
+ struct lx_statfs lxfs, *fs = (struct lx_statfs *)p2;
+ struct statvfs vfs;
+ int err;
+
+ lx_debug("\tstatvfs(%s, 0x%p)", path, fs);
+ if (statvfs(path, &vfs) != 0)
+ return (-errno);
+
+ if ((err = stol_statfs(path, &lxfs, &vfs)) != 0)
+ return (err);
+
+ if (uucopy(&lxfs, fs, sizeof (struct lx_statfs)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+long
+lx_fstatfs(uintptr_t p1, uintptr_t p2)
+{
+ struct lx_statfs lxfs, *fs = (struct lx_statfs *)p2;
+ struct stat64 sb;
+ struct statvfs vfs;
+ char *path, path_buf[MAXPATHLEN];
+ int fd = (int)p1;
+ int err;
+
+ lx_debug("\tfstatvfs(%d, 0x%p)", fd, fs);
+
+ /*
+ * fstatfs emulation for a pipe.
+ */
+ if (fstat64(fd, &sb) == 0 && S_ISFIFO(sb.st_mode)) {
+ lxfs.f_type = LX_PIPEFS_MAGIC;
+ lxfs.f_bsize = 4096;
+ lxfs.f_blocks = 0;
+ lxfs.f_bfree = 0;
+ lxfs.f_bavail = 0;
+ lxfs.f_files = 0;
+ lxfs.f_ffree = 0;
+ lxfs.f_fsid = 0;
+ lxfs.f_namelen = 255;
+ lxfs.f_frsize = 4096;
+ } else {
+ if (fstatvfs(fd, &vfs) != 0)
+ return (-errno);
+
+ path = lx_fd_to_path(fd, path_buf, sizeof (path_buf));
+
+ if ((err = stol_statfs(path, &lxfs, &vfs)) != 0)
+ return (err);
+ }
+
+ if (uucopy(&lxfs, fs, sizeof (struct lx_statfs)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+/* ARGSUSED */
+long
+lx_statfs64(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ const char *path = (const char *)p1;
+ struct lx_statfs64 lxfs, *fs = (struct lx_statfs64 *)p3;
+ struct statvfs64 vfs;
+ int err;
+
+ lx_debug("\tstatvfs64(%s, %d, 0x%p)", path, p2, fs);
+ if (statvfs64(path, &vfs) != 0)
+ return (-errno);
+
+ if ((err = stol_statfs64(path, &lxfs, &vfs)) != 0)
+ return (err);
+
+ if (uucopy(&lxfs, fs, sizeof (struct lx_statfs64)) != 0)
+ return (-errno);
+
+ return (0);
+}
+
+/* ARGSUSED */
+long
+lx_fstatfs64(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ struct lx_statfs64 lxfs, *fs = (struct lx_statfs64 *)p3;
+ struct stat64 sb;
+ struct statvfs64 vfs;
+ char *path, path_buf[MAXPATHLEN];
+ int fd = (int)p1;
+ int err;
+
+ lx_debug("\tfstatvfs64(%d, %d, 0x%p)", fd, p2, fs);
+ if (fstat64(fd, &sb) == 0 && S_ISFIFO(sb.st_mode)) {
+ lxfs.f_type = LX_PIPEFS_MAGIC;
+ lxfs.f_bsize = 4096;
+ lxfs.f_blocks = 0;
+ lxfs.f_bfree = 0;
+ lxfs.f_bavail = 0;
+ lxfs.f_files = 0;
+ lxfs.f_ffree = 0;
+ lxfs.f_fsid = 0;
+ lxfs.f_namelen = 255;
+ lxfs.f_frsize = 4096;
+ } else {
+ if (fstatvfs64(fd, &vfs) != 0)
+ return (-errno);
+
+ path = lx_fd_to_path(fd, path_buf, sizeof (path_buf));
+
+ if ((err = stol_statfs64(path, &lxfs, &vfs)) != 0)
+ return (err);
+ }
+
+ if (uucopy(&lxfs, fs, sizeof (struct lx_statfs64)) != 0)
+ return (-errno);
+
+ return (0);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/sysctl.c b/usr/src/lib/brand/lx/lx_brand/common/sysctl.c
new file mode 100644
index 0000000000..262a9c3830
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/sysctl.c
@@ -0,0 +1,137 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <alloca.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_debug.h>
+
+/*
+ * sysctl() implementation. The full set of possible values is incredibly
+ * large; we only implement the bare minimum here, namely basic kernel
+ * information.
+ *
+ * For the moment, we also print out debugging messages if the application
+ * attempts to write or access any other values, so we can tell if we are not
+ * supporting something we should be.
+ */
+
+struct lx_sysctl_args {
+ int *name;
+ int nlen;
+ void *oldval;
+ size_t *oldlenp;
+ void *newval;
+ size_t newlen;
+};
+
+#define LX_CTL_KERN 1
+
+#define LX_KERN_OSTYPE 1
+#define LX_KERN_OSRELEASE 2
+#define LX_KERN_OSREV 3
+#define LX_KERN_VERSION 4
+
+long
+lx_sysctl(uintptr_t raw)
+{
+ struct lx_sysctl_args args;
+ int name[2];
+ size_t oldlen;
+ char *namebuf;
+
+ if (uucopy((void *)raw, &args, sizeof (args)) < 0)
+ return (-EFAULT);
+
+ /*
+ * We only allow [ CTL_KERN, KERN_* ] pairs, so reject anything that
+ * doesn't have exactly two values starting with LX_CTL_KERN.
+ */
+ if (args.nlen != 2)
+ return (-ENOTDIR);
+
+ if (uucopy(args.name, name, sizeof (name)) < 0)
+ return (-EFAULT);
+
+ if (name[0] != LX_CTL_KERN) {
+ lx_debug("sysctl: read of [%d, %d] unsupported",
+ name[0], name[1]);
+ return (-ENOTDIR);
+ }
+
+ /* We don't support writing new sysctl values. */
+ if ((args.newval != NULL) || (args.newlen != 0)) {
+ lx_debug("sysctl: write of [%d, %d] unsupported",
+ name[0], name[1]);
+ return (-EPERM);
+ }
+
+ /*
+ * It may seem silly, but passing in a NULL oldval pointer and not
+ * writing any new values is a perfectly legal thing to do and should
+ * succeed.
+ */
+ if (args.oldval == NULL)
+ return (0);
+
+ /*
+ * Likewise, Linux specifies that setting a non-NULL oldval but a
+ * zero *oldlenp should result in an errno of EFAULT.
+ */
+ if ((uucopy(args.oldlenp, &oldlen, sizeof (oldlen)) < 0) ||
+ (oldlen == 0))
+ return (-EFAULT);
+
+ namebuf = SAFE_ALLOCA(oldlen);
+ if (namebuf == NULL)
+ return (-ENOMEM);
+
+ switch (name[1]) {
+ case LX_KERN_OSTYPE:
+ (void) strlcpy(namebuf, LX_UNAME_SYSNAME, oldlen);
+ break;
+ case LX_KERN_OSRELEASE:
+ (void) strlcpy(namebuf, lx_release, oldlen);
+ break;
+ case LX_KERN_VERSION:
+ (void) strlcpy(namebuf, LX_UNAME_VERSION, oldlen);
+ break;
+ default:
+ lx_debug("sysctl: read of [CTL_KERN, %d] unsupported", name[1]);
+ return (-ENOTDIR);
+ }
+
+ oldlen = strlen(namebuf);
+
+ if ((uucopy(namebuf, args.oldval, oldlen) < 0) ||
+ (uucopy(&oldlen, args.oldlenp, sizeof (oldlen)) < 0))
+ return (-EFAULT);
+
+ return (0);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/sysv_ipc.c b/usr/src/lib/brand/lx/lx_brand/common/sysv_ipc.c
new file mode 100644
index 0000000000..99cc12704e
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/sysv_ipc.c
@@ -0,0 +1,975 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc. All rights reserved.
+ */
+
+#include <errno.h>
+#include <unistd.h>
+#include <strings.h>
+#include <rctl.h>
+#include <alloca.h>
+#include <values.h>
+#include <sys/syscall.h>
+#include <sys/msg.h>
+#include <sys/ipc.h>
+#include <sys/sem.h>
+#include <sys/shm.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/lx_debug.h>
+#include <sys/lx_types.h>
+#include <sys/lx_sysv_ipc.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+
+#define SLOT_SEM 0
+#define SLOT_SHM 1
+#define SLOT_MSG 2
+
+/* Use private SHM_RMID interface for IPC_RMID */
+#define SHM_RMID 5
+
+static int
+get_rctlval(rctlblk_t *rblk, char *name, ulong_t limit, uint64_t *val)
+{
+ rctl_qty_t r;
+
+ if (getrctl(name, NULL, rblk, RCTL_FIRST) == -1)
+ return (-errno);
+
+ r = rctlblk_get_value(rblk);
+ if (r > limit)
+ return (-EOVERFLOW);
+
+ *val = r;
+ return (0);
+}
+
+/*
+ * Given a slot number and a maximum number of ids to extract from the
+ * kernel, return the msgid in the provided slot.
+ */
+static int
+slot_to_id(int type, int slot)
+{
+ uint_t nids, max;
+ int *idbuf = NULL;
+ int r = 0;
+
+ nids = 0;
+ for (;;) {
+ switch (type) {
+ case SLOT_SEM:
+ r = semids(idbuf, nids, &max);
+ break;
+ case SLOT_SHM:
+ r = shmids(idbuf, nids, &max);
+ break;
+ case SLOT_MSG:
+ r = msgids(idbuf, nids, &max);
+ break;
+ }
+
+ if (r < 0)
+ return (-errno);
+
+ if (max == 0)
+ return (-EINVAL);
+
+ if (max <= nids)
+ return (idbuf[slot]);
+
+ nids = max;
+ if ((idbuf = (int *)SAFE_ALLOCA(sizeof (int) * nids)) == NULL)
+ return (-ENOMEM);
+ }
+}
+
+/*
+ * Semaphore operations.
+ */
+long
+lx_semget(key_t key, int nsems, int semflg)
+{
+ int sol_flag;
+ int r;
+
+ lx_debug("\nsemget(%d, %d, %d)\n", key, nsems, semflg);
+ sol_flag = semflg & S_IAMB;
+ if (semflg & LX_IPC_CREAT)
+ sol_flag |= IPC_CREAT;
+ if (semflg & LX_IPC_EXCL)
+ sol_flag |= IPC_EXCL;
+
+ r = semget(key, nsems, sol_flag);
+ return ((r < 0) ? -errno : r);
+}
+
+long
+lx_semop(int semid, void *p1, size_t nsops)
+{
+ int r;
+ struct sembuf *sops = (struct sembuf *)p1;
+
+ lx_debug("\nsemop(%d, 0x%p, %u)\n", semid, sops, nsops);
+ if (nsops == 0)
+ return (-EINVAL);
+
+ r = semop(semid, sops, nsops);
+ return ((r < 0) ? -errno : r);
+}
+
+long
+lx_semtimedop(int semid, void *p1, size_t nsops, struct timespec *timeout)
+{
+ int r;
+ struct sembuf *sops = (struct sembuf *)p1;
+
+ lx_debug("\nsemtimedop(%d, 0x%p, %u, 0x%p)\n", semid, sops, nsops,
+ timeout);
+ if (nsops == 0)
+ return (-EINVAL);
+
+ r = semtimedop(semid, sops, nsops, timeout);
+ return ((r < 0) ? -errno : r);
+}
+
+static int
+lx_semctl_ipcset(int semid, void *buf)
+{
+ struct lx_semid_ds semds;
+ struct semid_ds sol_semds;
+ int r;
+
+ if (uucopy(buf, &semds, sizeof (semds)))
+ return (-errno);
+
+ bzero(&sol_semds, sizeof (sol_semds));
+ sol_semds.sem_perm.uid = semds.sem_perm.uid;
+ sol_semds.sem_perm.gid = semds.sem_perm.gid;
+ sol_semds.sem_perm.mode = semds.sem_perm.mode;
+
+ r = semctl(semid, 0, IPC_SET, &sol_semds);
+ return ((r < 0) ? -errno : r);
+}
+
+static int
+lx_semctl_ipcstat(int semid, void *buf)
+{
+ struct lx_semid_ds semds;
+ struct semid_ds sol_semds;
+
+ if (semctl(semid, 0, IPC_STAT, &sol_semds) != 0)
+ return (-errno);
+
+ bzero(&semds, sizeof (semds));
+ semds.sem_perm.key = sol_semds.sem_perm.key;
+ semds.sem_perm.seq = sol_semds.sem_perm.seq;
+ semds.sem_perm.uid = sol_semds.sem_perm.uid;
+ semds.sem_perm.gid = sol_semds.sem_perm.gid;
+ semds.sem_perm.cuid = sol_semds.sem_perm.cuid;
+ semds.sem_perm.cgid = sol_semds.sem_perm.cgid;
+
+ /* Linux only uses the bottom 9 bits */
+ semds.sem_perm.mode = sol_semds.sem_perm.mode & S_IAMB;
+ semds.sem_otime = sol_semds.sem_otime;
+ semds.sem_ctime = sol_semds.sem_ctime;
+ semds.sem_nsems = sol_semds.sem_nsems;
+
+ if (uucopy(&semds, buf, sizeof (semds)))
+ return (-errno);
+
+ return (0);
+}
+
+static int
+lx_semctl_ipcinfo(void *buf)
+{
+ struct lx_seminfo i;
+ rctlblk_t *rblk;
+ int rblksz;
+ uint_t nids;
+ int idbuf;
+ int err;
+ uint64_t val;
+
+ rblksz = rctlblk_size();
+ if ((rblk = (rctlblk_t *)SAFE_ALLOCA(rblksz)) == NULL)
+ return (-ENOMEM);
+
+ bzero(&i, sizeof (i));
+ err = get_rctlval(rblk, "project.max-sem-ids", (ulong_t)MAXINT, &val);
+ if (err < 0)
+ return (err);
+ i.semmni = (int)val;
+ err = get_rctlval(rblk, "process.max-sem-nsems", (ulong_t)MAXINT, &val);
+ if (err < 0)
+ return (err);
+ i.semmsl = (int)val;
+ err = get_rctlval(rblk, "process.max-sem-ops", (ulong_t)MAXINT, &val);
+ if (err < 0)
+ return (err);
+ i.semopm = (int)val;
+
+ /*
+ * We don't have corresponding rctls for these fields. The values
+ * are taken from the formulas used to derive the defaults listed
+ * in the Linux header file. We're lying, but trying to be
+ * coherent about it.
+ */
+ i.semmap = i.semmni;
+ i.semmns = i.semmni * i.semmsl;
+ i.semmnu = INT_MAX;
+ i.semume = INT_MAX;
+ i.semvmx = LX_SEMVMX;
+ if (semids(&idbuf, 0, &nids) < 0)
+ return (-errno);
+ i.semusz = nids;
+ i.semaem = INT_MAX;
+
+ if (uucopy(&i, buf, sizeof (i)) != 0)
+ return (-errno);
+
+ return (nids);
+}
+
+static int
+lx_semctl_semstat(int slot, void *buf)
+{
+ int r, semid;
+
+ semid = slot_to_id(SLOT_SEM, slot);
+ if (semid < 0)
+ return (semid);
+
+ r = lx_semctl_ipcstat(semid, buf);
+ return (r < 0 ? r : semid);
+}
+
+/*
+ * For the SETALL operation, we have to examine each of the semaphore
+ * values to be sure it is legal.
+ */
+static int
+lx_semctl_setall(int semid, ushort_t *arg)
+{
+ struct semid_ds semds;
+ ushort_t *vals;
+ int i, sz, r;
+
+ /*
+ * Find out how many semaphores are involved, reserve enough
+ * memory for an internal copy of the array, and then copy it in
+ * from the process.
+ */
+ if (semctl(semid, 0, IPC_STAT, &semds) != 0)
+ return (-errno);
+ sz = semds.sem_nsems * sizeof (ushort_t);
+ if ((vals = SAFE_ALLOCA(sz)) == NULL)
+ return (-ENOMEM);
+ if (uucopy(arg, vals, sz))
+ return (-errno);
+
+ /* Validate each of the values. */
+ for (i = 0; i < semds.sem_nsems; i++)
+ if (vals[i] > LX_SEMVMX)
+ return (-ERANGE);
+
+ r = semctl(semid, 0, SETALL, arg);
+
+ return ((r < 0) ? -errno : r);
+}
+
+long
+lx_semctl(int semid, int semnum, int cmd, void *ptr)
+{
+#if defined(_ILP32)
+ union lx_semun arg;
+#endif
+ int rval;
+ int opt = cmd & ~LX_IPC_64;
+ int use_errno = 0;
+ uint_t val;
+
+ lx_debug("\nsemctl(%d, %d, %d, 0x%p)\n", semid, semnum, cmd, ptr);
+
+#if defined(_ILP32)
+ /*
+ * The final arg to semctl() is a pointer to a union. For some
+ * commands we can hand that pointer directly to the kernel. For
+ * these commands, we need to extract an argument from the union
+ * before calling into the kernel.
+ */
+ if (opt == LX_SETVAL || opt == LX_SETALL || opt == LX_GETALL ||
+ opt == LX_IPC_SET || opt == LX_IPC_STAT || opt == LX_SEM_STAT ||
+ opt == LX_IPC_INFO || opt == LX_SEM_INFO)
+ if (uucopy(ptr, &arg, sizeof (arg)))
+ return (-errno);
+#endif
+
+ switch (opt) {
+ case LX_GETVAL:
+ use_errno = 1;
+ rval = semctl(semid, semnum, GETVAL, NULL);
+ break;
+ case LX_SETVAL:
+#if defined(_ILP32)
+ val = arg.val;
+#else
+ val = (uint_t)(uintptr_t)ptr;
+#endif
+ if (val > LX_SEMVMX) {
+ return (-ERANGE);
+ }
+ use_errno = 1;
+ rval = semctl(semid, semnum, SETVAL, val);
+ break;
+ case LX_GETPID:
+ use_errno = 1;
+ rval = semctl(semid, semnum, GETPID, NULL);
+ break;
+ case LX_GETNCNT:
+ use_errno = 1;
+ rval = semctl(semid, semnum, GETNCNT, NULL);
+ break;
+ case LX_GETZCNT:
+ use_errno = 1;
+ rval = semctl(semid, semnum, GETZCNT, NULL);
+ break;
+ case LX_GETALL:
+ use_errno = 1;
+#if defined(_ILP32)
+ rval = semctl(semid, semnum, GETALL, arg.sems);
+#else
+ rval = semctl(semid, semnum, GETALL, ptr);
+#endif
+ break;
+ case LX_SETALL:
+#if defined(_ILP32)
+ rval = lx_semctl_setall(semid, arg.sems);
+#else
+ rval = lx_semctl_setall(semid, ptr);
+#endif
+ break;
+ case LX_IPC_RMID:
+ use_errno = 1;
+ rval = semctl(semid, semnum, IPC_RMID, NULL);
+ break;
+ case LX_SEM_STAT:
+#if defined(_ILP32)
+ rval = lx_semctl_semstat(semid, arg.semds);
+#else
+ rval = lx_semctl_semstat(semid, ptr);
+#endif
+ break;
+ case LX_IPC_STAT:
+#if defined(_ILP32)
+ rval = lx_semctl_ipcstat(semid, arg.semds);
+#else
+ rval = lx_semctl_ipcstat(semid, ptr);
+#endif
+ break;
+
+ case LX_IPC_SET:
+#if defined(_ILP32)
+ rval = lx_semctl_ipcset(semid, arg.semds);
+#else
+ rval = lx_semctl_ipcset(semid, ptr);
+#endif
+ break;
+
+ case LX_IPC_INFO:
+ case LX_SEM_INFO:
+#if defined(_ILP32)
+ rval = lx_semctl_ipcinfo(arg.semds);
+#else
+ rval = lx_semctl_ipcinfo(ptr);
+#endif
+ break;
+
+ default:
+ return (-EINVAL);
+ }
+
+ if (use_errno == 1 && rval < 0)
+ return (-errno);
+ return (rval);
+}
+
+/*
+ * msg operations.
+ */
+long
+lx_msgget(key_t key, int flag)
+{
+ int sol_flag;
+ int r;
+
+ lx_debug("\tlx_msgget(%d, %d)\n", key, flag);
+
+ sol_flag = flag & S_IAMB;
+ if (flag & LX_IPC_CREAT)
+ sol_flag |= IPC_CREAT;
+ if (flag & LX_IPC_EXCL)
+ sol_flag |= IPC_EXCL;
+
+ r = msgget(key, sol_flag);
+ return (r < 0 ? -errno : r);
+}
+
+long
+lx_msgsnd(int id, void *p1, size_t sz, int flag)
+{
+ int sol_flag = 0;
+ int r;
+ struct msgbuf *buf = (struct msgbuf *)p1;
+
+ lx_debug("\tlx_msgsnd(%d, 0x%p, %d, %d)\n", id, buf, sz, flag);
+
+ if (flag & LX_IPC_NOWAIT)
+ sol_flag |= IPC_NOWAIT;
+
+ if (((ssize_t)sz < 0) || (sz > LX_MSGMAX))
+ return (-EINVAL);
+
+ r = msgsnd(id, buf, sz, sol_flag);
+ return (r < 0 ? -errno : r);
+}
+
+long
+lx_msgrcv(int id, void *msgp, size_t sz, long msgtype, int flag)
+{
+ int sol_flag = 0;
+ ssize_t r;
+
+ lx_debug("\tlx_msgrcv(%d, 0x%p, %d, %d, %ld, %d)\n",
+ id, msgp, sz, msgtype, flag);
+
+ /*
+ * Check for a negative sz parameter.
+ *
+ * Unlike msgsnd(2), the Linux man page does not specify that
+ * msgrcv(2) should return EINVAL if (sz > MSGMAX), only if (sz < 0).
+ */
+ if ((ssize_t)sz < 0)
+ return (-EINVAL);
+
+ if (flag & LX_MSG_NOERROR)
+ sol_flag |= MSG_NOERROR;
+ if (flag & LX_IPC_NOWAIT)
+ sol_flag |= IPC_NOWAIT;
+
+ r = msgrcv(id, msgp, sz, msgtype, sol_flag);
+ return (r < 0 ? -errno : r);
+}
+
+static int
+lx_msgctl_ipcstat(int msgid, void *buf)
+{
+ struct lx_msqid_ds msgids;
+ struct msqid_ds sol_msgids;
+ int r;
+
+ r = msgctl(msgid, IPC_STAT, &sol_msgids);
+ if (r < 0)
+ return (-errno);
+
+ bzero(&msgids, sizeof (msgids));
+ msgids.msg_perm.key = sol_msgids.msg_perm.key;
+ msgids.msg_perm.seq = sol_msgids.msg_perm.seq;
+ msgids.msg_perm.uid = sol_msgids.msg_perm.uid;
+ msgids.msg_perm.gid = sol_msgids.msg_perm.gid;
+ msgids.msg_perm.cuid = sol_msgids.msg_perm.cuid;
+ msgids.msg_perm.cgid = sol_msgids.msg_perm.cgid;
+
+ /* Linux only uses the bottom 9 bits */
+ msgids.msg_perm.mode = sol_msgids.msg_perm.mode & S_IAMB;
+
+ msgids.msg_stime = sol_msgids.msg_stime;
+ msgids.msg_rtime = sol_msgids.msg_rtime;
+ msgids.msg_ctime = sol_msgids.msg_ctime;
+ msgids.msg_qbytes = sol_msgids.msg_qbytes;
+ msgids.msg_cbytes = sol_msgids.msg_cbytes;
+ msgids.msg_qnum = sol_msgids.msg_qnum;
+ msgids.msg_lspid = sol_msgids.msg_lspid;
+ msgids.msg_lrpid = sol_msgids.msg_lrpid;
+
+ if (uucopy(&msgids, buf, sizeof (msgids)))
+ return (-errno);
+
+ return (0);
+}
+
+static int
+lx_msgctl_ipcinfo(int cmd, void *buf)
+{
+ struct lx_msginfo m;
+ rctlblk_t *rblk;
+ int idbuf, rblksz, msgseg, maxmsgs;
+ uint_t nids;
+ int rval;
+ int err;
+ uint64_t val;
+
+ rblksz = rctlblk_size();
+ if ((rblk = (rctlblk_t *)SAFE_ALLOCA(rblksz)) == NULL)
+ return (-ENOMEM);
+
+ bzero(&m, sizeof (m));
+ err = get_rctlval(rblk, "project.max-msg-ids", (ulong_t)MAXINT, &val);
+ if (err < 0)
+ return (err);
+ m.msgmni = (int)val;
+ err = get_rctlval(rblk, "process.max-msg-qbytes", (ulong_t)MAXINT,
+ &val);
+ if (err < 0)
+ return (err);
+ m.msgmnb = (int)val;
+
+ if (cmd == LX_IPC_INFO) {
+ err = get_rctlval(rblk, "process.max-msg-messages",
+ (ulong_t)MAXINT, &val);
+ if (err < 0)
+ return (err);
+ maxmsgs = (int)val;
+ m.msgtql = maxmsgs * m.msgmni;
+ m.msgmap = m.msgmnb;
+ m.msgpool = m.msgmax * m.msgmnb;
+ rval = 0;
+ } else {
+ if (msgids(&idbuf, 0, &nids) < 0)
+ return (-errno);
+ m.msgpool = nids;
+
+ /*
+ * For these fields, we can't even come up with a good fake
+ * approximation. These are listed as 'obsolete' or
+ * 'unused' in the header files, so hopefully nobody is
+ * relying on them anyway.
+ */
+ m.msgtql = INT_MAX;
+ m.msgmap = INT_MAX;
+ rval = nids;
+ }
+
+ /*
+ * We don't have corresponding rctls for these fields. The values
+ * are taken from the formulas used to derive the defaults listed
+ * in the Linux header file. We're lying, but trying to be
+ * coherent about it.
+ */
+ m.msgmax = m.msgmnb;
+ m.msgssz = 16;
+ msgseg = (m.msgpool * 1024) / m.msgssz;
+ m.msgseg = (msgseg > 0xffff) ? 0xffff : msgseg;
+
+ if (uucopy(&m, buf, sizeof (m)))
+ return (-errno);
+ return (rval);
+}
+
+static int
+lx_msgctl_ipcset(int msgid, void *buf)
+{
+ struct lx_msqid_ds msgids;
+ struct msqid_ds sol_msgids;
+ int r;
+
+ if (uucopy(buf, &msgids, sizeof (msgids)))
+ return (-errno);
+
+ bzero(&sol_msgids, sizeof (sol_msgids));
+ sol_msgids.msg_perm.uid = LX_UID16_TO_UID32(msgids.msg_perm.uid);
+ sol_msgids.msg_perm.gid = LX_UID16_TO_UID32(msgids.msg_perm.gid);
+
+ /* Linux only uses the bottom 9 bits */
+ sol_msgids.msg_perm.mode = msgids.msg_perm.mode & S_IAMB;
+ sol_msgids.msg_qbytes = msgids.msg_qbytes;
+
+ r = msgctl(msgid, IPC_SET, &sol_msgids);
+ return (r < 0 ? -errno : r);
+}
+
+static int
+lx_msgctl_msgstat(int slot, void *buf)
+{
+ int r, msgid;
+
+ lx_debug("msgstat(%d, 0x%p)\n", slot, buf);
+
+ msgid = slot_to_id(SLOT_MSG, slot);
+
+ if (msgid < 0)
+ return (msgid);
+
+ r = lx_msgctl_ipcstat(msgid, buf);
+ return (r < 0 ? r : msgid);
+}
+
+/*
+ * Split off the various msgctl's here
+ */
+long
+lx_msgctl(int msgid, int cmd, void *buf)
+{
+ int r;
+
+ lx_debug("\tlx_msgctl(%d, %d, 0x%p)\n", msgid, cmd, buf);
+ switch (cmd & ~LX_IPC_64) {
+ case LX_IPC_RMID:
+ r = msgctl(msgid, IPC_RMID, NULL);
+ if (r < 0)
+ r = -errno;
+ break;
+ case LX_IPC_SET:
+ r = lx_msgctl_ipcset(msgid, buf);
+ break;
+ case LX_IPC_STAT:
+ r = lx_msgctl_ipcstat(msgid, buf);
+ break;
+ case LX_MSG_STAT:
+ r = lx_msgctl_msgstat(msgid, buf);
+ break;
+
+ case LX_IPC_INFO:
+ case LX_MSG_INFO:
+ r = lx_msgctl_ipcinfo(cmd, buf);
+ break;
+
+ default:
+ r = -EINVAL;
+ break;
+ }
+
+ return (r);
+}
+
+/*
+ * shm-related operations.
+ */
+long
+lx_shmget(key_t key, size_t size, int flag)
+{
+ int sol_flag;
+ int r;
+
+ lx_debug("\tlx_shmget(%d, %d, %d)\n", key, size, flag);
+
+ sol_flag = flag & S_IAMB;
+ if (flag & LX_IPC_CREAT)
+ sol_flag |= IPC_CREAT;
+ if (flag & LX_IPC_EXCL)
+ sol_flag |= IPC_EXCL;
+
+ r = shmget(key, size, sol_flag);
+ return (r < 0 ? -errno : r);
+}
+
+long
+lx_shmat(int shmid, void *addr, int flags)
+{
+ int sol_flags;
+ void *ptr;
+
+ lx_debug("\tlx_shmat(%d, 0x%p, %d)\n", shmid, addr, flags);
+
+ sol_flags = 0;
+ if (flags & LX_SHM_RDONLY)
+ sol_flags |= SHM_RDONLY;
+ if (flags & LX_SHM_RND)
+ sol_flags |= SHM_RND;
+ if ((flags & LX_SHM_REMAP) && (addr == NULL))
+ return (-EINVAL);
+
+ ptr = shmat(shmid, addr, sol_flags);
+ if (ptr == (void *)-1)
+ return (-errno);
+
+ return ((ssize_t)ptr);
+}
+
+static int
+lx_shmctl_ipcinfo(void *buf)
+{
+ struct lx_shminfo s;
+ rctlblk_t *rblk;
+ int rblksz;
+ int err;
+ uint64_t val;
+
+ rblksz = rctlblk_size();
+ if ((rblk = (rctlblk_t *)SAFE_ALLOCA(rblksz)) == NULL)
+ return (-ENOMEM);
+
+ bzero(&s, sizeof (s));
+ err = get_rctlval(rblk, "project.max-shm-ids", ULONG_MAX, &val);
+ if (err < 0)
+ return (err);
+ s.shmmni = val;
+ err = get_rctlval(rblk, "project.max-shm-memory", ULONG_MAX, &val);
+ if (err < 0)
+ return (err);
+ s.shmmax = val;
+
+ /*
+ * We don't have corresponding rctls for these fields. The values
+ * are taken from the formulas used to derive the defaults listed
+ * in the Linux header file. We're lying, but trying to be
+ * coherent about it.
+ */
+ s.shmmin = 1;
+ s.shmseg = ULONG_MAX;
+ s.shmall = s.shmmax / getpagesize();
+
+ if (uucopy(&s, buf, sizeof (s)))
+ return (-errno);
+
+ return (0);
+}
+
+static int
+lx_shmctl_ipcstat(int shmid, void *buf)
+{
+ struct lx_shmid_ds shmds;
+ struct shmid_ds sol_shmds;
+
+ if (shmctl(shmid, IPC_STAT, &sol_shmds) != 0)
+ return (-errno);
+
+ bzero(&shmds, sizeof (shmds));
+ shmds.shm_perm.key = sol_shmds.shm_perm.key;
+ shmds.shm_perm.seq = sol_shmds.shm_perm.seq;
+ shmds.shm_perm.uid = sol_shmds.shm_perm.uid;
+ shmds.shm_perm.gid = sol_shmds.shm_perm.gid;
+ shmds.shm_perm.cuid = sol_shmds.shm_perm.cuid;
+ shmds.shm_perm.cgid = sol_shmds.shm_perm.cgid;
+ shmds.shm_perm.mode = sol_shmds.shm_perm.mode & S_IAMB;
+ if (sol_shmds.shm_lkcnt > 0)
+ shmds.shm_perm.mode |= LX_SHM_LOCKED;
+ shmds.shm_segsz = sol_shmds.shm_segsz;
+ shmds.shm_atime = sol_shmds.shm_atime;
+ shmds.shm_dtime = sol_shmds.shm_dtime;
+ shmds.shm_ctime = sol_shmds.shm_ctime;
+ shmds.shm_cpid = sol_shmds.shm_cpid;
+ shmds.shm_lpid = sol_shmds.shm_lpid;
+ shmds.shm_nattch = (ushort_t)sol_shmds.shm_nattch;
+
+ if (uucopy(&shmds, buf, sizeof (shmds)))
+ return (-errno);
+
+ return (0);
+}
+
+static int
+lx_shmctl_ipcset(int shmid, void *buf)
+{
+ struct lx_shmid_ds shmds;
+ struct shmid_ds sol_shmds;
+ int r;
+
+ if (uucopy(buf, &shmds, sizeof (shmds)))
+ return (-errno);
+
+ bzero(&sol_shmds, sizeof (sol_shmds));
+ sol_shmds.shm_perm.uid = shmds.shm_perm.uid;
+ sol_shmds.shm_perm.gid = shmds.shm_perm.gid;
+ sol_shmds.shm_perm.mode = shmds.shm_perm.mode & S_IAMB;
+
+ r = shmctl(shmid, IPC_SET, &sol_shmds);
+ return (r < 0 ? -errno : r);
+}
+
+/*
+ * Build and return a shm_info structure. We only return the bare
+ * essentials required by ipcs. The rest of the info is not readily
+ * available.
+ */
+static int
+lx_shmctl_shminfo(void *buf)
+{
+ struct lx_shm_info shminfo;
+ uint_t nids;
+ int idbuf;
+
+ bzero(&shminfo, sizeof (shminfo));
+
+ if (shmids(&idbuf, 0, &nids) < 0)
+ return (-errno);
+
+ shminfo.used_ids = nids;
+ if (uucopy(&shminfo, buf, sizeof (shminfo)) != 0)
+ return (-errno);
+
+ return (nids);
+}
+
+static int
+lx_shmctl_shmstat(int slot, void *buf)
+{
+ int r, shmid;
+
+ lx_debug("shmctl_shmstat(%d, 0x%p)\n", slot, buf);
+ shmid = slot_to_id(SLOT_SHM, slot);
+ if (shmid < 0)
+ return (shmid);
+
+ r = lx_shmctl_ipcstat(shmid, buf);
+ return (r < 0 ? r : shmid);
+}
+
+long
+lx_shmctl(int shmid, int cmd, void *buf)
+{
+ int r;
+ int use_errno = 0;
+
+ lx_debug("\tlx_shmctl(%d, %d, 0x%p)\n", shmid, cmd, buf);
+ switch (cmd & ~LX_IPC_64) {
+ case LX_IPC_RMID:
+ use_errno = 1;
+ r = shmctl(shmid, SHM_RMID, NULL); /* lx-private cmd */
+ break;
+
+ case LX_IPC_SET:
+ r = lx_shmctl_ipcset(shmid, buf);
+ break;
+
+ case LX_IPC_STAT:
+ r = lx_shmctl_ipcstat(shmid, buf);
+ break;
+
+ case LX_IPC_INFO:
+ r = lx_shmctl_ipcinfo(buf);
+ break;
+
+ case LX_SHM_LOCK:
+ use_errno = 1;
+ r = shmctl(shmid, SHM_LOCK, NULL);
+ break;
+
+ case LX_SHM_UNLOCK:
+ use_errno = 1;
+ r = shmctl(shmid, SHM_UNLOCK, NULL);
+ break;
+
+ case LX_SHM_INFO:
+ r = lx_shmctl_shminfo(buf);
+ break;
+
+ case LX_SHM_STAT:
+ r = lx_shmctl_shmstat(shmid, buf);
+ break;
+ default:
+ r = -EINVAL;
+ break;
+ }
+
+ if (use_errno == 1 && r < 0)
+ return (-errno);
+
+ return (r);
+}
+
+/*
+ * Under 32-bit Linux, glibc funnels all of the sysv IPC operations into this
+ * single ipc(2) system call. We need to blow that up and filter the
+ * remnants into the proper Solaris system calls.
+ */
+long
+lx_ipc(uintptr_t cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3,
+ uintptr_t arg4)
+{
+ int r;
+ void *bufptr = (void *)arg4;
+
+ lx_debug("lx_ipc(%d, %d, %d, %d, 0x%p, %d)\n",
+ cmd, arg1, arg2, arg3, bufptr, arg4);
+
+ switch (cmd) {
+ case LX_MSGGET:
+ r = lx_msgget((key_t)arg1, (int)arg2);
+ break;
+ case LX_MSGSND:
+ r = lx_msgsnd((int)arg1, bufptr, (size_t)arg2, (int)arg3);
+ break;
+ case LX_MSGRCV:
+ {
+ struct {
+ void *msgp;
+ long msgtype;
+ } args;
+
+ /*
+ * Rather than passing 5 args into ipc(2) directly,
+ * glibc passes 4 args and uses the buf argument to
+ * point to a structure containing two args: a pointer
+ * to the message and the message type.
+ */
+ if (uucopy(bufptr, &args, sizeof (args)))
+ return (-errno);
+ r = lx_msgrcv((int)arg1, args.msgp, (size_t)arg2,
+ args.msgtype, (int)arg3);
+ }
+ break;
+ case LX_MSGCTL:
+ r = lx_msgctl((int)arg1, (int)arg2, bufptr);
+ break;
+ case LX_SEMCTL:
+ r = lx_semctl((int)arg1, (size_t)arg2, (int)arg3, bufptr);
+ break;
+ case LX_SEMOP:
+ /*
+ * 'struct sembuf' is the same on Linux and Solaris, so we
+ * pass bufptr straight through.
+ */
+ r = lx_semop((int)arg1, bufptr, (size_t)arg2);
+ break;
+ case LX_SEMGET:
+ r = lx_semget((int)arg1, (size_t)arg2, (int)arg3);
+ break;
+ case LX_SHMAT:
+ r = lx_shmat((int)arg1, bufptr, (size_t)arg2);
+ if (r >= 0 || r <= -4096) {
+ if (uucopy(&r, (void *)arg3, sizeof (r)) != 0)
+ r = -errno;
+ }
+ break;
+ case LX_SHMDT:
+ r = shmdt(bufptr);
+ if (r < 0)
+ r = -errno;
+ break;
+ case LX_SHMGET:
+ r = lx_shmget((int)arg1, (size_t)arg2, (int)arg3);
+ break;
+ case LX_SHMCTL:
+ r = lx_shmctl((int)arg1, (int)arg2, bufptr);
+ break;
+
+ default:
+ r = -EINVAL;
+ }
+
+ return (r);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/time.c b/usr/src/lib/brand/lx/lx_brand/common/time.c
new file mode 100644
index 0000000000..c810aa33f1
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/time.c
@@ -0,0 +1,132 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <errno.h>
+#include <time.h>
+#include <string.h>
+#include <strings.h>
+#include <sys/times.h>
+#include <sys/lx_syscall.h>
+#include <sys/lx_misc.h>
+
+/*
+ * times() - The Linux implementation avoids writing to NULL, while Illumos
+ * returns EFAULT.
+ */
+long
+lx_times(uintptr_t p1)
+{
+ clock_t ret;
+ struct tms buf, *tp = (struct tms *)p1;
+
+ ret = times(&buf);
+
+ if ((ret == -1) ||
+ ((tp != NULL) && uucopy((void *)&buf, tp, sizeof (buf)) != 0))
+ return (-errno);
+
+ return ((ret == -1) ? -errno : ret);
+}
+
+/*
+ * setitimer() - the Linux implementation can handle tv_usec values greater
+ * than 1,000,000 where Illumos would return EINVAL.
+ *
+ * There's still an issue here where Linux can handle a
+ * tv_sec value greater than 100,000,000 but Illumos cannot,
+ * but that would also mean setting an interval timer to fire
+ * over _three years_ in the future so it's unlikely anything
+ * other than Linux test suites will trip over it.
+ */
+long
+lx_setitimer(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ struct itimerval itv;
+ struct itimerval *itp = (struct itimerval *)p2;
+
+ if (itp != NULL) {
+ if (uucopy(itp, &itv, sizeof (itv)) != 0)
+ return (-errno);
+
+ /*
+ * Adjust any tv_usec fields >= 1,000,000 by adding any whole
+ * seconds so indicated to tv_sec and leaving tv_usec as the
+ * remainder.
+ */
+ if (itv.it_interval.tv_usec >= MICROSEC) {
+ itv.it_interval.tv_sec +=
+ itv.it_interval.tv_usec / MICROSEC;
+
+ itv.it_interval.tv_usec %= MICROSEC;
+ }
+ if (itv.it_value.tv_usec >= MICROSEC) {
+ itv.it_value.tv_sec +=
+ itv.it_value.tv_usec / MICROSEC;
+
+ itv.it_value.tv_usec %= MICROSEC;
+ }
+
+ itp = &itv;
+ }
+
+ return ((setitimer((int)p1, itp, (struct itimerval *)p3) != 0) ?
+ -errno : 0);
+}
+
+/*
+ * NOTE: The Linux man pages state this structure is obsolete and is
+ * unsupported, so it is declared here for sizing purposes only.
+ */
+struct lx_timezone {
+ int tz_minuteswest; /* minutes W of Greenwich */
+ int tz_dsttime; /* type of dst correction */
+};
+
+long
+lx_settimeofday(uintptr_t p1, uintptr_t p2)
+{
+ struct timeval tv;
+ struct lx_timezone tz;
+
+ if ((p1 != NULL) &&
+ (uucopy((struct timeval *)p1, &tv, sizeof (tv)) < 0))
+ return (-errno);
+
+ /*
+ * The Linux man page states use of the second parameter is obsolete,
+ * but settimeofday(2) should still return EFAULT if it is set
+ * to a bad non-NULL pointer (sigh...)
+ */
+ if ((p2 != NULL) &&
+ (uucopy((struct lx_timezone *)p2, &tz, sizeof (tz)) < 0))
+ return (-errno);
+
+ if ((p1 != NULL) && (settimeofday(&tv, NULL) < 0))
+ return (-errno);
+
+ return (0);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/common/truncate.c b/usr/src/lib/brand/lx/lx_brand/common/truncate.c
new file mode 100644
index 0000000000..ba3e452408
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/common/truncate.c
@@ -0,0 +1,173 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/resource.h>
+#include <sys/lx_types.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_syscall.h>
+
+/*
+ * ZFS does not enforce the process.max-file-size rctl on a file which is
+ * grown in size via truncate/ftruncate, since that is simply metadata which
+ * does not consume any additional space. However, LTP truncate03 depends on
+ * this behavior so we enforce it here.
+ */
+static boolean_t
+p_fsize_excd(const char *path, off_t length)
+{
+ struct stat64 sb;
+ struct rlimit rl;
+
+ if (stat64(path, &sb) == 0 && sb.st_size < length) {
+ /* We are growing the file, check the rlimit */
+ if (getrlimit(RLIMIT_FSIZE, &rl) == 0 && length > rl.rlim_cur)
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+static boolean_t
+f_fsize_excd(int fd, off_t length)
+{
+ struct stat64 sb;
+ struct rlimit rl;
+
+ if (fstat64(fd, &sb) == 0 && sb.st_size < length) {
+ /* We are growing the file, check the rlimit */
+ if (getrlimit(RLIMIT_FSIZE, &rl) == 0 && length > rl.rlim_cur)
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+/*
+ * On Illumos, truncate() and ftruncate() are implemented in libc, so these are
+ * layered on those interfaces.
+ */
+
+long
+lx_truncate(uintptr_t path, uintptr_t length)
+{
+#if defined(_ILP32)
+ if ((off_t)length >= 0xffffffffUL)
+ return (-EFBIG);
+#endif
+
+ if (length > 0 && p_fsize_excd((const char *)path, (off_t)length))
+ return (-EFBIG);
+
+ return (truncate((const char *)path, (off_t)length) == 0 ? 0 : -errno);
+}
+
+long
+lx_ftruncate(uintptr_t fd, uintptr_t length)
+{
+ int r;
+
+#if defined(_ILP32)
+ if ((off_t)length >= 0xffffffffUL)
+ return (-EFBIG);
+#endif
+
+ if (length > 0 && f_fsize_excd((int)fd, (off_t)length))
+ return (-EFBIG);
+
+ r = ftruncate((int)fd, (off_t)length);
+ /*
+ * On Linux, truncating a file opened read-only returns EINVAL whereas
+ * Illumos returns EBADF.
+ */
+ if (r != 0) {
+ if (errno == EBADF) {
+ int mode;
+
+ if ((mode = fcntl(fd, F_GETFL, 0)) != -1 &&
+ (mode & O_ACCMODE) == O_RDONLY)
+ r = -EINVAL;
+ else
+ r = -EBADF; /* keep existing errno */
+ } else {
+ r = -errno;
+ }
+ }
+ return (r);
+}
+
+long
+lx_truncate64(uintptr_t path, uintptr_t length_lo, uintptr_t length_hi)
+{
+ uint64_t len = LX_32TO64(length_lo, length_hi);
+
+ if (len >= 0x7fffffffffffffffULL)
+ return (-EFBIG);
+
+ if (len > 0 && p_fsize_excd((const char *)path, (off_t)len))
+ return (-EFBIG);
+
+ return (truncate64((const char *)path, len) == 0 ? 0 : -errno);
+}
+
+long
+lx_ftruncate64(uintptr_t fd, uintptr_t length_lo, uintptr_t length_hi)
+{
+ int r;
+ uint64_t len = LX_32TO64(length_lo, length_hi);
+
+ if (len >= 0x7fffffffffffffffULL)
+ return (-EFBIG);
+
+ if (len > 0 && f_fsize_excd((int)fd, (off_t)len))
+ return (-EFBIG);
+
+ r = ftruncate64((int)fd, len);
+ /*
+ * On Linux, truncating a file opened read-only returns EINVAL whereas
+ * Illumos returns EBADF.
+ */
+ if (r != 0) {
+ if (errno == EBADF) {
+ int mode;
+
+ if ((mode = fcntl(fd, F_GETFL, 0)) != -1 &&
+ (mode & O_ACCMODE) == O_RDONLY)
+ r = -EINVAL;
+ else
+ r = -EBADF; /* keep existing errno */
+ } else {
+ r = -errno;
+ }
+ }
+
+ return (r);
+}
diff --git a/usr/src/lib/brand/lx/lx_brand/i386/Makefile b/usr/src/lib/brand/lx/lx_brand/i386/Makefile
new file mode 100644
index 0000000000..8723f64292
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/i386/Makefile
@@ -0,0 +1,48 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+# Copyright 2015 Joyent, Inc.
+#
+# lib/brand/lx/i386/Makefile
+
+ISASRCDIR=.
+
+ASFLAGS += -P -D_ASM
+
+include ../Makefile.com
+
+DYNFLAGS += -Wl,-I/native/lib/ld.so.1
+
+POFILE= lx_brand.po
+MSGFILES= $(CSRCS)
+
+ASSYMDEP_OBJS = lx_handler.o
+
+install: all $(ROOTLIBS)
+
+$(POFILE): $(MSGFILES)
+ $(BUILDPO.msgfiles)
+
+_msg: $(MSGDOMAINPOFILE)
+
+include $(SRC)/Makefile.msg.targ
diff --git a/usr/src/lib/brand/lx/lx_brand/i386/lx_crt.s b/usr/src/lib/brand/lx/lx_brand/i386/lx_crt.s
new file mode 100644
index 0000000000..c457c1c209
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/i386/lx_crt.s
@@ -0,0 +1,65 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/asm_linkage.h>
+
+#if defined(lint)
+
+void
+_start(void)
+{
+}
+
+#else /* lint */
+
+ /*
+ * C language startup routine for the lx brand shared library.
+ */
+ ENTRY_NP(_start)
+ pushl $0 / Build a stack frame. retpc = NULL
+ pushl $0 / fp = NULL
+ movl %esp, %ebp / first stack frame
+
+ /*
+ * Calculate the location of the envp array by adding the size of
+ * the argv array to the start of the argv array.
+ */
+ movl 8(%ebp), %eax / argc in %eax
+ leal 16(%ebp,%eax,4), %edx / envp in %edx
+ andl $-16, %esp
+ pushl %edx / push envp
+ leal 12(%ebp),%edx / compute &argv[0]
+ pushl %edx / push argv
+ pushl %eax / push argc
+ call lx_init
+ /*
+ * lx_init will never return.
+ */
+ SET_SIZE(_start)
+
+#endif /* lint */
diff --git a/usr/src/lib/brand/lx/lx_brand/i386/lx_handler.s b/usr/src/lib/brand/lx/lx_brand/i386/lx_handler.s
new file mode 100644
index 0000000000..7afb9c4b7f
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/i386/lx_handler.s
@@ -0,0 +1,91 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/asm_linkage.h>
+#include <sys/regset.h>
+#include <sys/segments.h>
+#include <sys/syscall.h>
+#include <sys/lx_brand.h>
+
+#if defined(_ASM)
+#include <sys/lx_signal.h>
+#include <sys/lx_syscall.h>
+#endif /* _ASM */
+
+/* 32-bit syscall numbers */
+#define LX_SYS_sigreturn 119
+#define LX_SYS_rt_sigreturn 173
+
+#if defined(lint)
+
+#include <sys/types.h>
+#include <sys/regset.h>
+#include <sys/signal.h>
+
+void
+lx_sigreturn_tramp(void)
+{}
+
+void
+lx_rt_sigreturn_tramp(void)
+{}
+
+#else /* lint */
+
+ ENTRY_NP(lx_swap_gs)
+ push %eax /* save the current eax value */
+ movl 0xc(%esp),%eax /* 2nd param is a pointer */
+ movw %gs,(%eax) /* use the pointer to save current gs */
+ movl 0x8(%esp),%eax /* first parameter is the new gs value */
+ movw %ax, %gs /* switch to the new gs value */
+ pop %eax /* restore eax */
+ ret
+ SET_SIZE(lx_swap_gs)
+
+ /*
+ * Trampoline code is called by the return at the end of a Linux
+ * signal handler to return control to the interrupted application
+ * via the lx_sigreturn() or lx_rt_sigreturn() syscalls.
+ *
+ * (lx_sigreturn() is called for legacy signal handling, and
+ * lx_rt_sigreturn() is called for "new"-style signals.)
+ *
+ * These two routines must consist of the EXACT code sequences below
+ * as gdb looks at the sequence of instructions a routine will return
+ * to determine whether it is in a signal handler or not.
+ * See the Linux code setup_signal_stack_sc() in arch/x86/um/signal.c.
+ */
+ ENTRY_NP(lx_sigreturn_tramp)
+ popl %eax
+ movl $LX_SYS_sigreturn, %eax
+ int $0x80
+ SET_SIZE(lx_sigreturn_tramp)
+
+ ENTRY_NP(lx_rt_sigreturn_tramp)
+ movl $LX_SYS_rt_sigreturn, %eax
+ int $0x80
+ SET_SIZE(lx_rt_sigreturn_tramp)
+#endif /* lint */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_aio.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_aio.h
new file mode 100644
index 0000000000..825447c79f
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_aio.h
@@ -0,0 +1,80 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#ifndef _SYS_LX_AIO_H
+#define _SYS_LX_AIO_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LX_IOCB_FLAG_RESFD 0x0001
+
+#define LX_IOCB_CMD_PREAD 0
+#define LX_IOCB_CMD_PWRITE 1
+#define LX_IOCB_CMD_FSYNC 2
+#define LX_IOCB_CMD_FDSYNC 3
+#define LX_IOCB_CMD_PREADX 4
+#define LX_IOCB_CMD_POLL 5
+#define LX_IOCB_CMD_NOOP 6
+#define LX_IOCB_CMD_PREADV 7
+#define LX_IOCB_CMD_PWRITEV 8
+
+#define LX_KIOCB_KEY 0
+
+typedef struct lx_io_event lx_io_event_t;
+typedef struct lx_iocb lx_iocb_t;
+typedef ulong_t lx_aio_context_t;
+
+/*
+ * Linux binary definition of an I/O event.
+ */
+struct lx_io_event {
+ uint64_t lxioe_data; /* data payload */
+ uint64_t lxioe_object; /* object of origin */
+ int64_t lxioe_res; /* result code */
+ int64_t lxioe_res2; /* "secondary" result (WTF?) */
+};
+
+/*
+ * Linux binary definition of an I/O control block.
+ */
+struct lx_iocb {
+ uint64_t lxiocb_data; /* data payload */
+ uint32_t lxiocb_key; /* must be LX_KIOCB_KEY (!) */
+ uint32_t lxiocb_reserved1;
+ uint16_t lxiocb_op; /* operation */
+ int16_t lxiocb_reqprio; /* request priority */
+ uint32_t lxiocb_fd; /* file descriptor */
+ uint64_t lxiocb_buf; /* data buffer */
+ uint64_t lxiocb_nbytes; /* number of bytes */
+ int64_t lxiocb_offset; /* offset in file */
+ uint64_t lxiocb_reserved2;
+ uint32_t lxiocb_flags; /* LX_IOCB_FLAG_* flags */
+ uint32_t lxiocb_resfd; /* eventfd fd, if any */
+};
+
+extern long lx_io_setup(unsigned int, lx_aio_context_t *);
+extern long lx_io_submit(lx_aio_context_t, long nr, uintptr_t **);
+extern long lx_io_getevents(lx_aio_context_t, long, long,
+ lx_io_event_t *, struct timespec *);
+extern long lx_io_cancel(lx_aio_context_t, lx_iocb_t *, lx_io_event_t *);
+extern long lx_io_destroy(lx_aio_context_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_AIO_H */
diff --git a/usr/src/cmd/ssh/sshd/bsmaudit.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_debug.h
index 72f599a240..57692cfcd1 100644
--- a/usr/src/cmd/ssh/sshd/bsmaudit.h
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_debug.h
@@ -17,32 +17,38 @@
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
- *
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _LX_DEBUG_H
+#define _LX_DEBUG_H
-#ifndef _BSMAUDIT_H
-#define _BSMAUDIT_H
+#include <lx_provider_impl.h>
#ifdef __cplusplus
extern "C" {
#endif
-#include <bsm/adt.h>
-#include <bsm/adt_event.h>
-#include <pwd.h>
+/* initialize the debugging subsystem */
+extern void lx_debug_init(void);
+
+/* printf() style debug message functionality */
+extern void lx_debug(const char *, ...);
+
+extern int lx_debug_enabled;
-void audit_sshd_chauthtok(int pam_retval, uid_t uid, gid_t gid);
-void audit_sshd_login(adt_session_data_t **ah, pid_t pid);
-void audit_sshd_login_failure(adt_session_data_t **ah, int pam_retval,
- char *user);
-void audit_sshd_logout(adt_session_data_t **ah);
-void audit_sshd_settid(int);
+#define LX_DEBUG_ISENABLED \
+ (lx_debug_enabled || LX_DEBUG_ENABLED())
#ifdef __cplusplus
}
#endif
-#endif /* _BSMAUDIT_H */
+#endif /* _LX_DEBUG_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h
new file mode 100644
index 0000000000..35235887ad
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_misc.h
@@ -0,0 +1,206 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_LX_H
+#define _SYS_LX_H
+
+#include <stdio.h>
+#include <alloca.h>
+#include <dirent.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/lwp.h>
+
+#include <sys/lx_brand.h>
+#include <sys/lx_thread.h>
+
+#include <lx_errno.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern char lx_release[LX_KERN_RELEASE_MAX];
+extern char lx_cmd_name[MAXNAMLEN];
+extern pid_t zoneinit_pid;
+extern int lx_is_vforked;
+
+/*
+ * Values Linux expects for init
+ */
+#define LX_INIT_PGID 1
+#define LX_INIT_SID 1
+#define LX_INIT_PID 1
+
+/*
+ * Codes to reboot(2).
+ */
+#define LINUX_REBOOT_MAGIC1 0xfee1dead
+#define LINUX_REBOOT_MAGIC2 672274793
+#define LINUX_REBOOT_MAGIC2A 85072278
+#define LINUX_REBOOT_MAGIC2B 369367448
+#define LINUX_REBOOT_MAGIC2C 537993216
+
+/*
+ * This was observed as coming from Red Hat's init process, but it's not in
+ * their reboot(2) man page.
+ */
+#define LINUX_REBOOT_MAGIC2D 0x28121969
+
+#define LINUX_REBOOT_CMD_RESTART 0x1234567
+#define LINUX_REBOOT_CMD_HALT 0xcdef0123
+#define LINUX_REBOOT_CMD_POWER_OFF 0x4321fedc
+#define LINUX_REBOOT_CMD_RESTART2 0xa1b2c3d4
+#define LINUX_REBOOT_CMD_CAD_ON 0x89abcdef
+#define LINUX_REBOOT_CMD_CAD_OFF 0
+
+/*
+ * the maximum length of messages to be output with lx_msg(), lx_err(),
+ * lx_debug(), or lx_unsupported().
+ */
+#define LX_MSG_MAXLEN (128 + MAXPATHLEN)
+
+/*
+ * Linux scheduler priority ranges.
+ */
+#define LX_SCHED_PRIORITY_MIN_OTHER 0
+#define LX_SCHED_PRIORITY_MAX_OTHER 0
+#define LX_SCHED_PRIORITY_MIN_RRFIFO 1
+#define LX_SCHED_PRIORITY_MAX_RRFIFO 99
+
+/*
+ * Constants to indicate who getrusage() should return information about.
+ */
+#define LX_RUSAGE_SELF 0
+#define LX_RUSAGE_CHILDREN (-1)
+#define LX_RUSAGE_BOTH (-2)
+#define LX_RUSAGE_THREAD 1
+
+/*
+ * Based on code from brand_misc.h, but use of that is incompatible with the
+ * lx brand.
+ *
+ * These macros invoke a brandsys subcommand, B_TRUSS_POINT, which makes it
+ * easy to debug with DTrace.
+ */
+#define B_TRUSS_POINT 6
+
+#define B_TRACE_POINT_5(a0, a1, a2, a3, a4) \
+ (void) syscall(SYS_brand, B_TRUSS_POINT, (a0), (a1), (a2), (a3), (a4))
+
+#define B_TRACE_POINT_4(a0, a1, a2, a3) \
+ B_TRACE_POINT_5((a0), (a1), (a2), (a3), 0)
+
+#define B_TRACE_POINT_3(a0, a1, a2) \
+ B_TRACE_POINT_5((a0), (a1), (a2), 0, 0)
+
+#define B_TRACE_POINT_2(a0, a1) \
+ B_TRACE_POINT_5((a0), (a1), 0, 0, 0)
+
+#define B_TRACE_POINT_1(a0) \
+ B_TRACE_POINT_5((a0), 0, 0, 0, 0)
+
+#define B_TRACE_POINT_0() \
+ B_TRACE_POINT_5(0, 0, 0, 0, 0)
+
+/*
+ * Macros to access register state within a ucontext_t:
+ */
+#define LX_REG(ucp, r) ((ucp)->uc_mcontext.gregs[(r)])
+
+/*
+ * normally we never want to write to stderr or stdout because it's unsafe
+ * to make assumptions about the underlying file descriptors. to protect
+ * against writes to these file descriptors we go ahead and close them
+ * our brand process initalization code. but there are still occasions
+ * where we are willing to make assumptions about our file descriptors
+ * and write to them. at thes times we should use one lx_msg() or
+ * lx_msg_error()
+ */
+extern void lx_msg(char *, ...);
+extern void lx_err(char *, ...);
+extern void lx_err_fatal(char *, ...);
+extern void lx_unsupported(char *, ...);
+
+struct ucontext;
+
+extern ucontext_t *lx_syscall_regs(void);
+extern uintptr_t lx_find_brand_sp(void);
+extern const ucontext_t *lx_find_brand_uc(void);
+
+extern void lx_jump_to_linux(ucontext_t *) __NORETURN;
+
+extern char *lx_fd_to_path(int fd, char *buf, int buf_size);
+extern int lx_lpid_to_spair(pid_t, pid_t *, lwpid_t *);
+extern int lx_lpid_to_spid(pid_t, pid_t *);
+
+extern void lx_ptrace_init();
+extern int lx_ptrace_wait(siginfo_t *);
+extern void lx_ptrace_fork(void);
+extern void lx_ptrace_stop_if_option(int, boolean_t, ulong_t msg, ucontext_t *);
+extern void lx_ptrace_clone_begin(int, boolean_t);
+
+extern int lx_check_alloca(size_t);
+#define SAFE_ALLOCA(sz) (lx_check_alloca(sz) ? alloca(sz) : NULL)
+
+extern void lx_init_tsd(lx_tsd_t *);
+extern int lx_alloc_stack(void **, size_t *);
+extern void lx_install_stack(void *, size_t, lx_tsd_t *);
+extern void lx_free_stack(void);
+extern void lx_free_other_stacks(void);
+extern void lx_stack_prefork(void);
+extern void lx_stack_postfork(void);
+
+/*
+ * NO_UUCOPY disables calls to the uucopy* system calls to help with
+ * debugging brand library accesses to linux application memory.
+ */
+#ifdef NO_UUCOPY
+
+int uucopy_unsafe(const void *src, void *dst, size_t n);
+int uucopystr_unsafe(const void *src, void *dst, size_t n);
+
+#define uucopy(src, dst, n) uucopy_unsafe((src), (dst), (n))
+#define uucopystr(src, dst, n) uucopystr_unsafe((src), (dst), (n))
+
+#endif /* NO_UUCOPY */
+
+/*
+ * We use these Private libc interfaces to defer signals during critical
+ * sections.
+ */
+extern void _sigon(void);
+extern void _sigoff(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_mount.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_mount.h
new file mode 100644
index 0000000000..074a6017c6
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_mount.h
@@ -0,0 +1,163 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#ifndef _LX_MOUNT_H
+#define _LX_MOUNT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rpc/rpc.h>
+#include <nfs/nfs.h>
+
+extern int lx_nfs_mount(char *, char *, char *, int, char *);
+
+/*
+ * mount() is significantly different between Linux and Solaris. The main
+ * difference is between the set of flags. Some flags on Linux can be
+ * translated to a Solaris equivalent, some are converted to a
+ * filesystem-specific option, while others have no equivalent whatsoever.
+ */
+#define LX_MS_MGC_VAL 0xC0ED0000
+#define LX_MS_RDONLY 0x00000001
+#define LX_MS_NOSUID 0x00000002
+#define LX_MS_NODEV 0x00000004
+#define LX_MS_NOEXEC 0x00000008
+#define LX_MS_SYNCHRONOUS 0x00000010
+#define LX_MS_REMOUNT 0x00000020
+#define LX_MS_MANDLOCK 0x00000040
+#define LX_MS_NOATIME 0x00000400
+#define LX_MS_NODIRATIME 0x00000800
+#define LX_MS_BIND 0x00001000
+#define LX_MS_MOVE 0x00002000
+#define LX_MS_REC 0x00004000
+#define LX_MS_SILENT 0x00008000
+#define LX_MS_POSIXACL 0x00010000
+#define LX_MS_UNBINDABLE 0x00020000
+#define LX_MS_PRIVATE 0x00040000
+#define LX_MS_SLAVE 0x00080000
+#define LX_MS_SHARED 0x00100000
+#define LX_MS_RELATIME 0x00200000
+#define LX_MS_KERNMOUNT 0x00400000
+#define LX_MS_I_VERSION 0x00800000
+#define LX_MS_STRICTATIME 0x01000000
+#define LX_MS_LAZYTIME 0x02000000
+
+/* internal flags - ignored if passed in */
+#define LX_MS_NOSEC 0x10000000
+#define LX_MS_BORN 0x20000000
+#define LX_MS_ACTIVE 0x40000000
+#define LX_MS_NOUSER 0x80000000
+
+#define LX_MS_SUPPORTED (LX_MS_MGC_VAL | \
+ LX_MS_RDONLY | LX_MS_NOSUID | \
+ LX_MS_NODEV | LX_MS_NOEXEC | \
+ LX_MS_REMOUNT | LX_MS_NOATIME | \
+ LX_MS_BIND | LX_MS_SILENT | \
+ LX_MS_STRICTATIME | LX_MS_NOSEC | \
+ LX_MS_BORN | LX_MS_ACTIVE | LX_MS_NOUSER)
+
+/*
+ * support for nfs mounts
+ */
+#define LX_NMD_MAXHOSTNAMELEN 256
+
+#define LX_NFS_MOUNT_SOFT 0x00000001
+#define LX_NFS_MOUNT_INTR 0x00000002
+#define LX_NFS_MOUNT_SECURE 0x00000004
+#define LX_NFS_MOUNT_POSIX 0x00000008
+#define LX_NFS_MOUNT_NOCTO 0x00000010
+#define LX_NFS_MOUNT_NOAC 0x00000020
+#define LX_NFS_MOUNT_TCP 0x00000040
+#define LX_NFS_MOUNT_VER3 0x00000080
+#define LX_NFS_MOUNT_KERBEROS 0x00000100
+#define LX_NFS_MOUNT_NONLM 0x00000200
+#define LX_NFS_MOUNT_BROKEN_SUID 0x00000400
+#define LX_NFS_MOUNT_SUPPORTED (LX_NFS_MOUNT_SOFT | \
+ LX_NFS_MOUNT_INTR | \
+ LX_NFS_MOUNT_POSIX | \
+ LX_NFS_MOUNT_NOCTO | \
+ LX_NFS_MOUNT_NOAC | \
+ LX_NFS_MOUNT_TCP | \
+ LX_NFS_MOUNT_VER3 | \
+ LX_NFS_MOUNT_NONLM)
+
+#define LX_NMD_DEFAULT_RSIZE 0
+#define LX_NMD_DEFAULT_WSIZE 0
+
+/*
+ * the nfs v3 file handle structure definitions are _almost_ the same
+ * on linux and solaris. the key difference are:
+ *
+ * 1) on linux fh3_length is an unsigned short where as on solaris it's
+ * an int.
+ *
+ * 2) on linux the file handle data doesn't 32 bit members, so the structure
+ * is not 32 bit aligned. (where as on solaris it is.)
+ *
+ * so rather than defining a structure that would allow us to intrepret
+ * all the contents of the nfs v3 file handle here, we decide to treate
+ * the file handle as an array of chars. this works just fine since it
+ * avoids the alignment issues and the actual file handle handle contects
+ * are defined by the nfs specification so they are common across solaris
+ * and linux. we do the same thing for nfs v2 file handles.
+ */
+struct lx_nfs_fh2 {
+ unsigned char lx_fh_data[NFS_FHSIZE];
+} lx_nfs_fh2;
+
+struct lx_nfs_fh3 {
+ unsigned short lx_fh3_length;
+ unsigned char lx_fh3_data[NFS3_FHSIZE];
+} lx_nfs_fh3;
+
+typedef struct lx_nfs_mount_data {
+ int nmd_version;
+ int nmd_fd;
+ struct lx_nfs_fh2 nmd_old_root;
+ int nmd_flags;
+ int nmd_rsize;
+ int nmd_wsize;
+ int nmd_timeo;
+ int nmd_retrans;
+ int nmd_acregmin;
+ int nmd_acregmax;
+ int nmd_acdirmin;
+ int nmd_acdirmax;
+ struct sockaddr_in nmd_addr;
+ char nmd_hostname[LX_NMD_MAXHOSTNAMELEN];
+ int nmd_namlen;
+ uint_t nmd_bsize;
+ struct lx_nfs_fh3 nmd_root;
+} lx_nfs_mount_data_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_MOUNT_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_poll.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_poll.h
new file mode 100644
index 0000000000..99abdbbf46
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_poll.h
@@ -0,0 +1,65 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LX_POLL_H
+#define _SYS_LX_POLL_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * These events are identical between Linux and Solaris
+ */
+#define LX_POLLIN 0x001
+#define LX_POLLPRI 0x002
+#define LX_POLLOUT 0x004
+#define LX_POLLERR 0x008
+#define LX_POLLHUP 0x010
+#define LX_POLLNVAL 0x020
+#define LX_POLLRDNORM 0x040
+#define LX_POLLRDBAND 0x080
+
+#define LX_POLL_COMMON_EVENTS (LX_POLLIN | LX_POLLPRI | LX_POLLOUT | \
+ LX_POLLERR | LX_POLLHUP | LX_POLLNVAL | LX_POLLRDNORM | LX_POLLRDBAND)
+
+/*
+ * These events differ between Linux and Solaris
+ */
+#define LX_POLLWRNORM 0x0100
+#define LX_POLLWRBAND 0x0200
+#define LX_POLLRDHUP 0x2000
+
+
+#define LX_POLL_SUPPORTED_EVENTS \
+ (LX_POLL_COMMON_EVENTS | LX_POLLWRNORM | LX_POLLWRBAND | LX_POLLRDHUP)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_POLL_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h
new file mode 100644
index 0000000000..9d77524410
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h
@@ -0,0 +1,289 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _SYS_LX_SIGNAL_H
+#define _SYS_LX_SIGNAL_H
+
+#if !defined(_ASM)
+#include <sys/lx_types.h>
+#include <sys/ucontext.h>
+#include <sys/lx_siginfo.h>
+#include <lx_signum.h>
+
+#endif /* !defined(_ASM) */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Linux sigaction flags
+ */
+#define LX_SA_NOCLDSTOP 0x00000001
+#define LX_SA_NOCLDWAIT 0x00000002
+#define LX_SA_SIGINFO 0x00000004
+#define LX_SA_RESTORER 0x04000000
+#define LX_SA_ONSTACK 0x08000000
+#define LX_SA_RESTART 0x10000000
+#define LX_SA_NODEFER 0x40000000
+#define LX_SA_RESETHAND 0x80000000
+#define LX_SA_NOMASK LX_SA_NODEFER
+#define LX_SA_ONESHOT LX_SA_RESETHAND
+
+#define LX_SIG_BLOCK 0
+#define LX_SIG_UNBLOCK 1
+#define LX_SIG_SETMASK 2
+
+#define LX_MINSIGSTKSZ 2048
+#define LX_SS_ONSTACK 1
+#define LX_SS_DISABLE 2
+
+#define LX_SIGRT_MAGIC 0xdeadf00d
+
+#if !defined(_ASM)
+
+typedef struct lx_sigaction {
+ void (*lxsa_handler)();
+ int lxsa_flags;
+ void (*lxsa_restorer)(void);
+ lx_sigset_t lxsa_mask;
+} lx_sigaction_t;
+
+#if defined(_ILP32)
+typedef uint32_t lx_osigset_t;
+
+#define OSIGSET_NBITS (sizeof (lx_osigset_t) * NBBY)
+#define OSIGSET_BITSET(sig) (1U << (((sig) - 1) % OSIGSET_NBITS))
+
+typedef struct lx_osigaction {
+ void (*lxsa_handler)();
+ lx_osigset_t lxsa_mask;
+ int lxsa_flags;
+ void (*lxsa_restorer)(void);
+} lx_osigaction_t;
+#endif
+
+/*
+ * Flag settings to determine whether common routines should operate on
+ * lx_sigset_ts or lx_osigset_ts.
+ */
+#define USE_OSIGSET 0
+#define USE_SIGSET 1
+
+typedef struct lx_sighandlers {
+ struct lx_sigaction lx_sa[LX_NSIG + 1];
+} lx_sighandlers_t;
+
+typedef struct lx_sigaltstack {
+ void *ss_sp;
+ int ss_flags;
+ size_t ss_size;
+} lx_stack_t;
+
+/*
+ * _fpreg, _fpxreg, _xmmreg and _fpstate are defined in Linux src in:
+ * arch/x86/include/uapi/asm/sigcontext.h
+ */
+#define LX_X86_FXSR_MAGIC 0x0000
+#define LX_X86_FXSR_NONE 0xffff
+
+#if defined(_LP64)
+
+typedef struct lx_fpstate {
+ ushort_t cwd;
+ ushort_t swd;
+ ushort_t twd; /* Note this is not the same as the 32bit/x87/FSAVE twd */
+ ushort_t fop;
+ uint64_t rip;
+ uint64_t rdp;
+ uint32_t mxcsr;
+ uint32_t mxcsr_mask;
+ uint32_t st_space[32]; /* 8 * 16 bytes for each FP-reg */
+ uint32_t xmm_space[64]; /* 16 * 16 bytes for each XMM-reg */
+ uint32_t reserved2[12];
+ uint32_t reserved3[12];
+} lx_fpstate_t;
+
+/*
+ * The Linux layout is defined in the Linux src tree in:
+ * arch/x86/include/asm/sigcontext.h
+ * and the user-level def (which is what we want) at:
+ * arch/x86/include/uapi/asm/sigcontext.h
+ *
+ * The Illumos offsets of the registers in the context are defined in:
+ * usr/src/uts/intel/sys/regset.h
+ * this is an mcontext_t from uc_mcontext.
+ *
+ * For the 64-bit case the register layout is completely different in the
+ * context.
+ */
+typedef struct lx_sigcontext {
+ ulong_t sc_r8;
+ ulong_t sc_r9;
+ ulong_t sc_r10;
+ ulong_t sc_r11;
+ ulong_t sc_r12;
+ ulong_t sc_r13;
+ ulong_t sc_r14;
+ ulong_t sc_r15;
+ ulong_t sc_rdi;
+ ulong_t sc_rsi;
+ ulong_t sc_rbp;
+ ulong_t sc_rbx;
+ ulong_t sc_rdx;
+ ulong_t sc_rax;
+ ulong_t sc_rcx;
+ ulong_t sc_rsp;
+ ulong_t sc_rip;
+ ulong_t sc_eflags;
+ ushort_t sc_cs;
+ ushort_t sc_gs;
+ ushort_t sc_fs;
+ ushort_t sc_pad0;
+ ulong_t sc_err;
+ ulong_t sc_trapno;
+
+ ulong_t sc_mask;
+ ulong_t sc_cr2;
+ lx_fpstate_t *sc_fpstate;
+
+ ulong_t reserved[8];
+} lx_sigcontext_t;
+
+#else /* is _ILP32 */
+
+struct lx_fpreg {
+ ushort_t significand[4];
+ ushort_t exponent;
+};
+
+struct lx_fpxreg {
+ ushort_t significand[4];
+ ushort_t exponent;
+ ushort_t padding[3];
+};
+
+struct lx_xmmreg {
+ uint32_t element[4];
+};
+
+typedef struct lx_fpstate {
+ /* Regular FPU environment */
+ ulong_t cw;
+ ulong_t sw;
+ ulong_t tag;
+ ulong_t ipoff;
+ ulong_t cssel;
+ ulong_t dataoff;
+ ulong_t datasel;
+ struct lx_fpreg _st[8];
+ ushort_t status;
+ ushort_t magic; /* 0xffff = regular FPU data */
+
+ /* FXSR FPU environment */
+ ulong_t _fxsr_env[6]; /* env is ignored */
+ ulong_t mxcsr;
+ ulong_t reserved;
+ struct lx_fpxreg _fxsr_st[8]; /* reg data is ignored */
+ struct lx_xmmreg _xmm[8];
+ ulong_t padding[56];
+} lx_fpstate_t;
+
+/*
+ * The Linux layout is defined in the Linux src tree in:
+ * arch/x86/include/asm/sigcontext.h
+ * and the user-level def (which is what we want) at:
+ * arch/x86/include/uapi/asm/sigcontext.h
+ *
+ * The Illumos offsets of the registers in the context are defined by the
+ * i386 ABI (see usr/src/uts/intel/sys/regset.h).
+ *
+ * Both Illumos and Linux match up here.
+ */
+typedef struct lx_sigcontext {
+ ulong_t sc_gs;
+ ulong_t sc_fs;
+ ulong_t sc_es;
+ ulong_t sc_ds;
+ ulong_t sc_edi;
+ ulong_t sc_esi;
+ ulong_t sc_ebp;
+ ulong_t sc_esp;
+ ulong_t sc_ebx;
+ ulong_t sc_edx;
+ ulong_t sc_ecx;
+ ulong_t sc_eax;
+ ulong_t sc_trapno;
+ ulong_t sc_err;
+ ulong_t sc_eip;
+ ulong_t sc_cs;
+ ulong_t sc_eflags;
+ ulong_t sc_esp_at_signal;
+ ulong_t sc_ss;
+
+ lx_fpstate_t *sc_fpstate;
+ ulong_t sc_mask;
+ ulong_t sc_cr2;
+} lx_sigcontext_t;
+#endif
+
+typedef struct lx_ucontext {
+ ulong_t uc_flags; /* Linux always sets this to 0 */
+ struct lx_ucontext *uc_link; /* Linux always sets this to NULL */
+ lx_stack_t uc_stack;
+ lx_sigcontext_t uc_sigcontext;
+ lx_sigset_t uc_sigmask;
+} lx_ucontext_t;
+
+typedef struct lx_sigbackup lx_sigbackup_t;
+struct lx_sigbackup {
+ ucontext_t *lxsb_retucp;
+ ucontext_t *lxsb_sigucp;
+ uintptr_t lxsb_sigdeliver_frame;
+ lx_sigbackup_t *lxsb_previous;
+};
+
+extern const int ltos_signo[];
+extern const int stol_signo[];
+
+extern void setsigacthandler(void (*)(int, siginfo_t *, void *),
+ void (**)(int, siginfo_t *, void *),
+ int (*)(const ucontext_t *));
+
+extern int lx_siginit(void);
+extern void lx_sighandlers_save(lx_sighandlers_t *);
+extern void lx_sighandlers_restore(lx_sighandlers_t *);
+
+extern int stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop);
+extern int stol_status(int);
+
+#endif /* !defined(_ASM) */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_SIGNAL_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_sigstack.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_sigstack.h
new file mode 100644
index 0000000000..fbbf462389
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_sigstack.h
@@ -0,0 +1,78 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _SYS_LX_SIGSTACK_H
+#define _SYS_LX_SIGSTACK_H
+
+#if !defined(_ASM)
+#include <sys/lx_types.h>
+#include <sys/ucontext.h>
+#include <sys/lx_signal.h>
+#include <lx_signum.h>
+
+#endif /* !defined(_ASM) */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Two flavors of Linux signal stacks:
+ *
+ * lx_sigstack - used for "modern" signal handlers, in practice those
+ * that have the sigaction(2) flag SA_SIGINFO set
+ *
+ * lx_oldsigstack - used for legacy signal handlers, those that do not have
+ * the sigaction(2) flag SA_SIGINFO set or that were setup via
+ * the signal(2) call.
+ *
+ * NOTE: Since these structures will be placed on the stack and stack math will
+ * be done with their sizes, for the 32-bit code they must be word
+ * aligned in size (4 bytes) so the stack remains word aligned per the
+ * i386 ABI, or, for 64-bit code they must be 16 byte aligned as per the
+ * AMD64 ABI.
+ *
+ * The precise layout of these stack frames is also potentially
+ * depended on by particularly esoteric (or broken) software, and
+ * should be preserved. The Linux structures (rt_sigframe, et al)
+ * are defined in "arch/x86/include/asm/sigframe.h".
+ */
+#if defined(_LP64)
+typedef struct lx_sigstack {
+ void (*retaddr)(); /* address of real lx_rt_sigreturn code */
+ lx_ucontext_t uc; /* saved user context */
+ lx_siginfo_t si; /* saved signal information */
+ lx_fpstate_t fpstate; /* saved FP state */
+ char pad[2]; /* stack alignment */
+} lx_sigstack_t;
+#else
+struct lx_sigstack {
+ void (*retaddr)(); /* address of real lx_rt_sigreturn code */
+ int sig; /* signal number */
+ lx_siginfo_t *sip; /* points to "si" if valid, NULL if not */
+ lx_ucontext_t *ucp; /* points to "uc" */
+ lx_siginfo_t si; /* saved signal information */
+ lx_ucontext_t uc; /* saved user context */
+ lx_fpstate_t fpstate; /* saved FP state */
+ char trampoline[8]; /* code for trampoline to lx_rt_sigreturn() */
+};
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_SIGSTACK_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_socket.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_socket.h
new file mode 100644
index 0000000000..2bbc31a234
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_socket.h
@@ -0,0 +1,147 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_LX_SOCKET_H
+#define _SYS_LX_SOCKET_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/lx_types.h>
+
+/*
+ * Linux address family definitions
+ * Some of these are not supported
+ */
+#define LX_AF_UNSPEC 0 /* Unspecified */
+#define LX_AF_UNIX 1 /* local file/pipe name */
+#define LX_AF_INET 2 /* IP protocol family */
+#define LX_AF_AX25 3 /* Amateur Radio AX.25 */
+#define LX_AF_IPX 4 /* Novell Internet Protocol */
+#define LX_AF_APPLETALK 5 /* Appletalk */
+#define LX_AF_NETROM 6 /* Amateur radio */
+#define LX_AF_BRIDGE 7 /* Multiprotocol bridge */
+#define LX_AF_ATMPVC 8 /* ATM PVCs */
+#define LX_AF_X25 9 /* X.25 */
+#define LX_AF_INET6 10 /* IPV 6 */
+#define LX_AF_ROSE 11 /* Amateur Radio X.25 */
+#define LX_AF_DECnet 12 /* DECnet */
+#define LX_AF_NETBEUI 13 /* 802.2LLC */
+#define LX_AF_SECURITY 14 /* Security callback */
+#define LX_AF_KEY 15 /* key management */
+#define LX_AF_ROUTE 16 /* Alias to emulate 4.4BSD */
+#define LX_AF_PACKET 17 /* Packet family */
+#define LX_AF_ASH 18 /* Ash ? */
+#define LX_AF_ECONET 19 /* Acorn Econet */
+#define LX_AF_ATMSVC 20 /* ATM SVCs */
+#define LX_AF_SNA 22 /* Linux SNA */
+#define LX_AF_IRDA 23 /* IRDA sockets */
+#define LX_AF_PPPOX 24 /* PPPoX sockets */
+#define LX_AF_WANPIPE 25 /* Wanpipe API sockets */
+#define LX_AF_BLUETOOTH 31 /* Bluetooth sockets */
+#define LX_AF_MAX 33 /* MAX socket type */
+
+#define AF_NOTSUPPORTED -1
+#define AF_INVAL -2
+
+/*
+ * Linux ARP protocol hardware identifiers
+ */
+#define LX_ARPHRD_ETHER 1 /* Ethernet */
+#define LX_ARPHRD_LOOPBACK 772 /* Loopback */
+#define LX_ARPHRD_VOID 0xffff /* Unknown */
+
+/*
+ * Linux socket type definitions
+ */
+#define LX_SOCK_STREAM 1 /* Connection-based byte streams */
+#define LX_SOCK_DGRAM 2 /* Connectionless, datagram */
+#define LX_SOCK_RAW 3 /* Raw protocol interface */
+#define LX_SOCK_RDM 4 /* Reliably-delivered message */
+#define LX_SOCK_SEQPACKET 5 /* Sequenced packet stream */
+#define LX_SOCK_PACKET 10 /* Linux specific */
+#define LX_SOCK_MAX 11
+
+/*
+ * The Linux socket type can be or-ed with other flags (e.g. SOCK_CLOEXEC).
+ */
+#define LX_SOCK_TYPE_MASK 0xf
+
+/*
+ * Linux flags for socket, socketpair and accept4. These are or-ed into the
+ * socket type value. In the Linux net.h header these come from fcntl.h (note
+ * that they are in octal in the Linux header).
+ */
+#define LX_SOCK_CLOEXEC 0x80000
+#define LX_SOCK_NONBLOCK 0x800
+
+#define SOCK_NOTSUPPORTED -1
+#define SOCK_INVAL -2
+
+/*
+ * PF_PACKET protocol definitions.
+ */
+#define LX_ETH_P_802_3 0x0001
+#define LX_ETH_P_ALL 0x0003
+#define LX_ETH_P_802_2 0x0004
+#define LX_ETH_P_IP 0x0800
+#define LX_ETH_P_ARP 0x0806
+#define LX_ETH_P_IPV6 0x86DD
+
+/*
+ * Linux socketcall indices.
+ * These constitute all 17 socket related system calls
+ *
+ * These system calls are called via a single system call socketcall().
+ * The first arg being the endex of the system call type
+ */
+#define LX_SOCKET 1
+#define LX_BIND 2
+#define LX_CONNECT 3
+#define LX_LISTEN 4
+#define LX_ACCEPT 5
+#define LX_GETSOCKNAME 6
+#define LX_GETPEERNAME 7
+#define LX_SOCKETPAIR 8
+#define LX_SEND 9
+#define LX_RECV 10
+#define LX_SENDTO 11
+#define LX_RECVFROM 12
+#define LX_SHUTDOWN 13
+#define LX_SETSOCKOPT 14
+#define LX_GETSOCKOPT 15
+#define LX_SENDMSG 16
+#define LX_RECVMSG 17
+#define LX_ACCEPT4 18
+#define LX_RECVMMSG 19
+#define LX_SENDMMSG 20
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_SOCKET_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_statfs.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_statfs.h
new file mode 100644
index 0000000000..dd046398c4
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_statfs.h
@@ -0,0 +1,83 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _LX_STATFS_H
+#define _LX_STATFS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int lx_statfs_init(void);
+
+struct lx_statfs {
+ size_t f_type;
+ size_t f_bsize;
+ ulong_t f_blocks;
+ ulong_t f_bfree;
+ ulong_t f_bavail;
+ ulong_t f_files;
+ ulong_t f_ffree;
+ u_longlong_t f_fsid;
+ size_t f_namelen;
+ size_t f_frsize;
+ size_t f_spare[5];
+};
+
+struct lx_statfs64 {
+ int f_type;
+ int f_bsize;
+ u_longlong_t f_blocks;
+ u_longlong_t f_bfree;
+ u_longlong_t f_bavail;
+ u_longlong_t f_files;
+ u_longlong_t f_ffree;
+ u_longlong_t f_fsid;
+ int f_namelen;
+ int f_frsize;
+ int f_spare[5];
+};
+
+/*
+ * These magic values are taken mostly from statfs(2) or magic.h
+ */
+#define LX_AUTOFS_SUPER_MAGIC 0x0187
+#define LX_DEVFS_SUPER_MAGIC 0x1373
+#define LX_DEVPTS_SUPER_MAGIC 0x1cd1
+#define LX_EXT2_SUPER_MAGIC 0xEF53
+#define LX_ISOFS_SUPER_MAGIC 0x9660
+#define LX_MSDOS_SUPER_MAGIC 0x4d44
+#define LX_NFS_SUPER_MAGIC 0x6969
+#define LX_PROC_SUPER_MAGIC 0x9fa0
+#define LX_TMPFS_SUPER_MAGIC 0x01021994
+#define LX_UFS_MAGIC 0x00011954
+#define LX_PIPEFS_MAGIC 0x50495045
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_STATFS_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h
new file mode 100644
index 0000000000..6e43300e35
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h
@@ -0,0 +1,301 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#ifndef _SYS_LX_SYSCALL_H
+#define _SYS_LX_SYSCALL_H
+
+#include <sys/lx_brand.h>
+
+#if !defined(_ASM)
+
+#include <sys/types.h>
+#include <sys/procset.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int lx_install;
+
+extern long lx_mknodat(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_futimesat(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_utimensat(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_fstatat64(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_unlinkat(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_renameat(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_symlinkat(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_readlinkat(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+extern long lx_stat(uintptr_t, uintptr_t);
+extern long lx_fstat(uintptr_t, uintptr_t);
+extern long lx_lstat(uintptr_t, uintptr_t);
+extern long lx_stat64(uintptr_t, uintptr_t);
+extern long lx_fstat64(uintptr_t, uintptr_t);
+extern long lx_lstat64(uintptr_t, uintptr_t);
+extern long lx_fcntl(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_fcntl64(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_flock(uintptr_t, uintptr_t);
+extern long lx_readlink(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_readdir(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_execve(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_dup2(uintptr_t, uintptr_t);
+extern long lx_dup3(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_ioctl(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_vhangup(void);
+extern long lx_fadvise64(uintptr_t, off64_t, uintptr_t, uintptr_t);
+extern long lx_fadvise64_64(uintptr_t, off64_t, off64_t, uintptr_t);
+
+extern long lx_socketcall(uintptr_t, uintptr_t);
+extern long lx_listen(int, int);
+extern long lx_shutdown(int, int);
+extern long lx_socketpair(int, int, int, int *);
+
+extern long lx_settimeofday(uintptr_t, uintptr_t);
+extern long lx_getrusage(uintptr_t, uintptr_t);
+extern long lx_mknod(uintptr_t, uintptr_t, uintptr_t);
+
+extern long lx_getpgrp(void);
+extern long lx_getpgid(uintptr_t);
+extern long lx_setpgid(uintptr_t, uintptr_t);
+extern long lx_getsid(uintptr_t);
+extern long lx_setsid(void);
+
+extern long lx_getuid16(void);
+extern long lx_getgid16(void);
+extern long lx_geteuid16(void);
+extern long lx_getegid16(void);
+extern long lx_geteuid(void);
+extern long lx_getegid(void);
+extern long lx_getresuid16(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_getresgid16(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_getresuid(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_getresgid(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_capget(uintptr_t, uintptr_t);
+extern long lx_capset(uintptr_t, uintptr_t);
+
+extern long lx_setuid16(uintptr_t);
+extern long lx_setreuid16(uintptr_t, uintptr_t);
+extern long lx_setregid16(uintptr_t, uintptr_t);
+extern long lx_setgid16(uintptr_t);
+extern long lx_setfsuid16(uintptr_t);
+extern long lx_setfsgid16(uintptr_t);
+
+extern long lx_setfsuid(uintptr_t);
+extern long lx_setfsgid(uintptr_t);
+
+extern long lx_clock_nanosleep(int, int flags, struct timespec *,
+ struct timespec *);
+extern long lx_adjtimex(void *);
+extern long lx_timer_create(int, struct sigevent *, timer_t *);
+extern long lx_timer_settime(timer_t, int, struct itimerspec *,
+ struct itimerspec *);
+extern long lx_timer_gettime(timer_t, struct itimerspec *);
+extern long lx_timer_getoverrun(timer_t);
+extern long lx_timer_delete(timer_t);
+
+extern long lx_truncate(uintptr_t, uintptr_t);
+extern long lx_ftruncate(uintptr_t, uintptr_t);
+extern long lx_truncate64(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_ftruncate64(uintptr_t, uintptr_t, uintptr_t);
+
+extern long lx_sysctl(uintptr_t);
+extern long lx_fsync(uintptr_t);
+extern long lx_fdatasync(uintptr_t);
+extern long lx_unlink(uintptr_t);
+extern long lx_rmdir(uintptr_t);
+extern long lx_rename(uintptr_t, uintptr_t);
+extern long lx_utime(uintptr_t, uintptr_t);
+extern long lx_llseek(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_lseek(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_sysfs(uintptr_t, uintptr_t, uintptr_t);
+
+extern long lx_uname(uintptr_t);
+extern long lx_reboot(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_getgroups16(uintptr_t, uintptr_t);
+extern long lx_setgroups(uintptr_t, uintptr_t);
+extern long lx_setgroups16(uintptr_t, uintptr_t);
+
+extern long lx_query_module(uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t);
+
+extern long lx_times(uintptr_t);
+extern long lx_setitimer(uintptr_t, uintptr_t, uintptr_t);
+
+extern long lx_clone(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_exit(uintptr_t);
+extern long lx_group_exit(uintptr_t);
+
+extern long lx_mlock(uintptr_t, uintptr_t);
+extern long lx_mlockall(uintptr_t);
+extern long lx_munlock(uintptr_t, uintptr_t);
+extern long lx_munlockall(void);
+extern long lx_msync(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_madvise(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_mprotect(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_mmap(uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t, uintptr_t);
+extern long lx_mmap2(uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t, uintptr_t);
+extern long lx_remap(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+extern long lx_mount(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_umount(uintptr_t);
+extern long lx_umount2(uintptr_t, uintptr_t);
+
+extern long lx_statfs(uintptr_t, uintptr_t);
+extern long lx_fstatfs(uintptr_t, uintptr_t);
+extern long lx_statfs64(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_fstatfs64(uintptr_t, uintptr_t, uintptr_t);
+
+extern long lx_sigreturn(void);
+extern long lx_rt_sigreturn(void);
+extern long lx_signal(uintptr_t, uintptr_t);
+extern long lx_sigaction(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_rt_sigaction(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_sigaltstack(uintptr_t, uintptr_t);
+extern long lx_sigpending(uintptr_t);
+extern long lx_rt_sigpending(uintptr_t, uintptr_t);
+extern long lx_sigprocmask(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_rt_sigprocmask(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_sigsuspend(uintptr_t);
+extern long lx_rt_sigsuspend(uintptr_t, uintptr_t);
+extern long lx_rt_sigwaitinfo(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_rt_sigtimedwait(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_rt_sigqueueinfo(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_rt_tgsigqueueinfo(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+extern long lx_sync(void);
+
+extern long lx_futex(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t);
+
+extern long lx_tkill(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t);
+extern long lx_tgkill(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t);
+
+extern long lx_sethostname(uintptr_t, uintptr_t);
+extern long lx_setdomainname(uintptr_t, uintptr_t);
+
+extern long lx_sendfile(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_sendfile64(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+extern long lx_fork(void);
+extern long lx_vfork(void);
+extern long lx_exec(uintptr_t, uintptr_t, uintptr_t);
+
+extern long lx_getpriority(uintptr_t, uintptr_t);
+extern long lx_setpriority(uintptr_t, uintptr_t, uintptr_t);
+
+extern long lx_ptrace(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+extern long lx_sched_getaffinity(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_sched_setaffinity(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_sched_getparam(uintptr_t, uintptr_t);
+extern long lx_sched_setparam(uintptr_t, uintptr_t);
+extern long lx_sched_rr_get_interval(uintptr_t pid, uintptr_t);
+extern long lx_sched_getscheduler(uintptr_t);
+extern long lx_sched_setscheduler(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_sched_get_priority_min(uintptr_t);
+extern long lx_sched_get_priority_max(uintptr_t);
+
+extern long lx_xattr2(uintptr_t, uintptr_t);
+extern long lx_xattr3(uintptr_t, uintptr_t, uintptr_t);
+extern long lx_xattr4(uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+
+extern long lx_keyctl(void);
+
+extern long lx_ipc(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+extern long lx_msgget(key_t, int);
+extern long lx_msgsnd(int, void *, size_t, int);
+extern long lx_msgrcv(int, void *, size_t, long, int);
+extern long lx_msgctl(int, int, void *);
+extern long lx_semget(key_t, int, int);
+extern long lx_semop(int, void *, size_t);
+extern long lx_semtimedop(int, void *, size_t, struct timespec *);
+extern long lx_semctl(int, int, int, void *);
+extern long lx_shmget(key_t, size_t, int);
+extern long lx_shmat(int, void *, int);
+extern long lx_shmctl(int, int, void *);
+
+extern long lx_alarm(unsigned int);
+extern long lx_close(int);
+extern long lx_chdir(const char *);
+extern long lx_chroot(const char *);
+extern long lx_creat(const char *, mode_t);
+extern long lx_dup(int);
+extern long lx_eventfd(unsigned int);
+extern long lx_eventfd2(unsigned int, int);
+extern long lx_fchdir(int);
+extern long lx_getgid(void);
+extern long lx_getgroups(int, gid_t *);
+extern long lx_getitimer(int, struct itimerval *);
+extern long lx_getuid(void);
+extern long lx_inotify_add_watch(int, const char *, uint32_t);
+extern long lx_inotify_init(void);
+extern long lx_inotify_init1(int);
+extern long lx_inotify_rm_watch(int, int);
+extern long lx_mincore(caddr_t, size_t, char *);
+extern long lx_munmap(void *, size_t);
+extern long lx_nice(int);
+extern long lx_pause(void);
+extern long lx_setgid(gid_t);
+extern long lx_setuid(uid_t);
+extern long lx_setregid(gid_t, gid_t);
+extern long lx_setreuid(uid_t, uid_t);
+extern long lx_shmdt(char *);
+extern long lx_signalfd(int, uintptr_t, size_t);
+extern long lx_signalfd4(int, uintptr_t, size_t, int);
+extern long lx_stime(const time_t *);
+extern long lx_symlink(const char *, const char *);
+extern long lx_syslog(int, char *, int);
+extern long lx_timerfd_create(int, int);
+extern long lx_timerfd_settime(int, int,
+ const struct itimerspec *, struct itimerspec *);
+extern long lx_timerfd_gettime(int, struct itimerspec *);
+extern long lx_umask(mode_t);
+extern long lx_utimes(const char *, const struct timeval *);
+
+#endif /* !defined(_ASM) */
+
+
+#if defined(_LP64)
+#define LX_SYS_clone 56
+#define LX_SYS_vfork 58
+#else
+#define LX_SYS_clone 120
+#define LX_SYS_vfork 190
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_SYSCALL_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_sysv_ipc.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_sysv_ipc.h
new file mode 100644
index 0000000000..f9f49598b7
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_sysv_ipc.h
@@ -0,0 +1,222 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _LX_SYSV_IPC_H
+#define _LX_SYSV_IPC_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * msg-related definitions.
+ */
+#define LX_IPC_CREAT 00001000
+#define LX_IPC_EXCL 00002000
+#define LX_IPC_NOWAIT 00004000
+
+#define LX_IPC_RMID 0
+#define LX_IPC_SET 1
+#define LX_IPC_STAT 2
+#define LX_IPC_INFO 3
+
+#define LX_IPC_64 0x0100
+
+#define LX_SEMOP 1
+#define LX_SEMGET 2
+#define LX_SEMCTL 3
+#define LX_MSGSND 11
+#define LX_MSGRCV 12
+#define LX_MSGGET 13
+#define LX_MSGCTL 14
+#define LX_SHMAT 21
+#define LX_SHMDT 22
+#define LX_SHMGET 23
+#define LX_SHMCTL 24
+
+#define LX_MSG_STAT 11
+#define LX_MSG_INFO 12
+
+#define LX_MSG_NOERROR 010000
+
+/*
+ * Linux hard codes the maximum msgbuf length to be 8192 bytes. Really.
+ */
+#define LX_MSGMAX 8192
+
+struct lx_ipc_perm {
+ key_t key;
+ uid_t uid;
+ uid_t gid;
+ uid_t cuid;
+ uid_t cgid;
+ ushort_t mode;
+ ushort_t _pad1;
+ ushort_t seq;
+ ushort_t _pad2;
+ ulong_t _unused1;
+ ulong_t _unused2;
+};
+
+struct lx_msqid_ds {
+ struct lx_ipc_perm msg_perm;
+ time_t msg_stime;
+#if defined(_ILP32)
+ ulong_t _unused1;
+#endif
+ time_t msg_rtime;
+#if defined(_ILP32)
+ ulong_t _unused2;
+#endif
+ time_t msg_ctime;
+#if defined(_ILP32)
+ ulong_t _unused3;
+#endif
+ ulong_t msg_cbytes;
+ ulong_t msg_qnum;
+ ulong_t msg_qbytes;
+ pid_t msg_lspid;
+ pid_t msg_lrpid;
+ ulong_t _unused4;
+ ulong_t _unused5;
+};
+
+struct lx_msginfo {
+ int msgpool;
+ int msgmap;
+ int msgmax;
+ int msgmnb;
+ int msgmni;
+ int msgssz;
+ int msgtql;
+ ushort_t msgseg;
+};
+
+/*
+ * semaphore-related definitions.
+ */
+#define LX_GETPID 11
+#define LX_GETVAL 12
+#define LX_GETALL 13
+#define LX_GETNCNT 14
+#define LX_GETZCNT 15
+#define LX_SETVAL 16
+#define LX_SETALL 17
+#define LX_SEM_STAT 18
+#define LX_SEM_INFO 19
+#define LX_SEM_UNDO 0x1000
+#define LX_SEMVMX 32767
+
+struct lx_semid_ds {
+ struct lx_ipc_perm sem_perm;
+ time_t sem_otime;
+ ulong_t _unused1;
+ time_t sem_ctime;
+ ulong_t _unused2;
+ ulong_t sem_nsems;
+ ulong_t _unused3;
+ ulong_t _unused4;
+};
+
+struct lx_seminfo {
+ int semmap;
+ int semmni;
+ int semmns;
+ int semmnu;
+ int semmsl;
+ int semopm;
+ int semume;
+ int semusz;
+ int semvmx;
+ int semaem;
+};
+
+union lx_semun {
+ int val;
+ struct lx_semid_ds *semds;
+ ushort_t *sems;
+ struct lx_seminfo *info;
+ uintptr_t dummy;
+};
+
+/*
+ * shm-related definitions
+ */
+#define LX_SHM_LOCKED 02000
+#define LX_SHM_RDONLY 010000
+#define LX_SHM_RND 020000
+#define LX_SHM_REMAP 040000
+
+#define LX_SHM_LOCK 11
+#define LX_SHM_UNLOCK 12
+#define LX_SHM_STAT 13
+#define LX_SHM_INFO 14
+
+struct lx_shmid_ds {
+ struct lx_ipc_perm shm_perm;
+ size_t shm_segsz;
+ time_t shm_atime;
+#if defined(_ILP32)
+ ulong_t _unused1;
+#endif
+ time_t shm_dtime;
+#if defined(_ILP32)
+ ulong_t _unused2;
+#endif
+ time_t shm_ctime;
+#if defined(_ILP32)
+ ulong_t _unused3;
+#endif
+ pid_t shm_cpid;
+ pid_t shm_lpid;
+ ushort_t shm_nattch;
+ ulong_t _unused4;
+ ulong_t _unused5;
+};
+
+struct lx_shm_info {
+ int used_ids;
+ ulong_t shm_tot;
+ ulong_t shm_rss;
+ ulong_t shm_swp;
+ ulong_t swap_attempts;
+ ulong_t swap_successes;
+};
+
+struct lx_shminfo {
+ ulong_t shmmax;
+ ulong_t shmmin;
+ ulong_t shmmni;
+ ulong_t shmseg;
+ ulong_t shmall;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_SYSV_IPC_H */
diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h
new file mode 100644
index 0000000000..0a17705dd1
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_thread.h
@@ -0,0 +1,80 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc
+ */
+
+#ifndef _SYS_LX_THREAD_H
+#define _SYS_LX_THREAD_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/lx_signal.h>
+#include <thread.h>
+
+typedef enum lx_exit_type {
+ LX_ET_NONE = 0,
+ LX_ET_EXIT,
+ LX_ET_EXIT_GROUP
+} lx_exit_type_t;
+
+typedef struct lx_tsd {
+ /* per-thread flag set on parent vfork, cleared on thread resume */
+ int lxtsd_is_vforked;
+ lx_exit_type_t lxtsd_exit;
+ int lxtsd_exit_status;
+ ucontext_t lxtsd_exit_context;
+
+ /*
+ * If this value is non-zero, we use it in lx_sigdeliver() to represent
+ * the in-use extent of the Linux (i.e. BRAND) stack for this thread.
+ * Access to this value must be protected by _sigoff()/_sigon().
+ */
+ uintptr_t lxtsd_lx_sp;
+
+ /*
+ * Alternate stack for Linux sigaltstack emulation:
+ */
+ lx_stack_t lxtsd_sigaltstack;
+
+ void *lxtsd_clone_state;
+
+ lx_sigbackup_t *lxtsd_sigbackup;
+} lx_tsd_t;
+
+extern thread_key_t lx_tsd_key;
+
+extern void lx_swap_gs(long, long *);
+
+extern void lx_exit_common(void) __NORETURN;
+
+extern lx_tsd_t *lx_get_tsd(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_THREAD_H */
diff --git a/usr/src/cmd/ssh/ssh-keygen/Makefile b/usr/src/lib/brand/lx/lx_init/Makefile
index c28cb942c8..796e89f9a8 100644
--- a/usr/src/cmd/ssh/ssh-keygen/Makefile
+++ b/usr/src/lib/brand/lx/lx_init/Makefile
@@ -18,48 +18,54 @@
#
# CDDL HEADER END
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
#
-# cmd/ssh/ssh-keygen/Makefile
-
-PROG= ssh-keygen
+# Copyright 2015 Joyent, Inc.
+#
-OBJS = \
- ssh-keygen.o
-SRCS = $(OBJS:.o=.c)
+PROG = lxinit
-include ../../Makefile.cmd
-include ../Makefile.ssh-common
+PROG_OBJS = lxinit.o pipe_stream.o run_command.o
+LIST_OBJS = list.o
-DIRS = $(ROOTLIBSUNSSH)
+OBJS = $(PROG_OBJS) \
+ $(LIST_OBJS)
+SRCS = $(PROG_OBJS:%.o=%.c) \
+ $(LIST_OBJS:%.o=$(SRC)/common/list/%.c)
-LDLIBS += $(SSH_COMMON_LDLIBS) -lcrypto -lsocket
+all: $(PROG)
-POFILE_DIR= ..
+include ../Makefile.lx
+include $(SRC)/cmd/Makefile.cmd
-.KEEP_STATE:
+# override the install directory
+ROOTBIN = $(ROOTBRANDDIR)
+CLOBBERFILES = $(OBJS) $(ROOTPROG)
-.PARALLEL: $(OBJS)
+UTSBASE = $(SRC)/uts
-all: $(PROG)
+CFLAGS += $(CCVERBOSE)
+CPPFLAGS += -D_REENTRANT -I$(UTSBASE)/common/brand/lx
+LDLIBS += -lzonecfg -lipadm -lsocket -linetutil -lnsl -lcmdutils -ldhcpagent
-$(PROG): $(OBJS) ../libssh/$(MACH)/libssh.a ../libopenbsd-compat/$(MACH)/libopenbsd-compat.a
- $(LINK.c) $(OBJS) -o $@ $(LDLIBS) $(DYNFLAGS)
- $(POST_PROCESS)
+.KEEP_STATE:
install: all $(ROOTPROG)
-$(DIRS):
- $(INS.dir)
+clean:
+ $(RM) $(PROG) $(OBJS)
-$(ROOTBIN)/ssh-keygen: $(ROOTLIBSUNSSH)/ssh-keygen
- -$(RM) $@; $(SYMLINK) ../lib/sunssh/ssh-keygen $@
+lint: lint_PROG lint_SRCS
-clean:
- $(RM) -f $(OBJS) $(PROG)
+$(PROG): $(OBJS)
+ $(LINK.c) -o $@ $(OBJS) $(LDLIBS)
+ $(POST_PROCESS)
+
+%.o: %.c
+ $(COMPILE.c) $<
+ $(POST_PROCESS_O)
-lint: lint_SRCS
+%.o: $(SRC)/common/list/%.c
+ $(COMPILE.c) $<
+ $(POST_PROCESS_O)
-include ../Makefile.msg.targ
-include ../../Makefile.targ
+include $(SRC)/cmd/Makefile.targ
diff --git a/usr/src/lib/brand/lx/lx_init/lxinit.c b/usr/src/lib/brand/lx/lx_init/lxinit.c
new file mode 100644
index 0000000000..8d1b8d2376
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_init/lxinit.c
@@ -0,0 +1,872 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * lxinit performs zone-specific initialization prior to handing control to the
+ * guest Linux init. This primarily consists of:
+ *
+ * - Starting ipmgmtd
+ * - Configuring network interfaces
+ * - Adding a default route
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <limits.h>
+#include <net/if.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <stropts.h>
+#include <sys/ioccom.h>
+#include <sys/stat.h>
+#include <sys/systeminfo.h>
+#include <sys/sockio.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/varargs.h>
+#include <unistd.h>
+#include <libintl.h>
+#include <locale.h>
+#include <libcmdutils.h>
+
+#include <netinet/dhcp.h>
+#include <dhcpagent_util.h>
+#include <dhcpagent_ipc.h>
+
+#include <arpa/inet.h>
+#include <net/route.h>
+#include <libipadm.h>
+#include <libzonecfg.h>
+#include <libinetutil.h>
+#include <sys/lx_brand.h>
+
+#include "run_command.h"
+
+static void lxi_err(char *msg, ...) __NORETURN;
+static void lxi_err(char *msg, ...);
+
+#define IPMGMTD_PATH "/lib/inet/ipmgmtd"
+#define IN_NDPD_PATH "/usr/lib/inet/in.ndpd"
+
+#define PREFIX_LOG_WARN "lx_init warn: "
+#define PREFIX_LOG_ERR "lx_init err: "
+
+#define RTMBUFSZ (sizeof (struct rt_msghdr) + \
+ (3 * sizeof (struct sockaddr_in)))
+
+ipadm_handle_t iph;
+
+static void
+lxi_err(char *msg, ...)
+{
+ char buf[1024];
+ int len;
+ va_list ap;
+
+ va_start(ap, msg);
+ /*LINTED*/
+ len = vsnprintf(buf, sizeof (buf), msg, ap);
+ va_end(ap);
+
+ (void) write(1, PREFIX_LOG_ERR, strlen(PREFIX_LOG_ERR));
+ (void) write(1, buf, len);
+ (void) write(1, "\n", 1);
+
+ /*
+ * Since a non-zero exit will cause the zone to reboot, a pause here
+ * will prevent a mis-configured zone from spinning in a reboot loop.
+ */
+ pause();
+ exit(1);
+ /*NOTREACHED*/
+}
+
+static void
+lxi_warn(char *msg, ...)
+{
+ char buf[1024];
+ int len;
+ va_list ap;
+
+ va_start(ap, msg);
+ /*LINTED*/
+ len = vsnprintf(buf, sizeof (buf), msg, ap);
+ va_end(ap);
+
+ (void) write(1, PREFIX_LOG_WARN, strlen(PREFIX_LOG_WARN));
+ (void) write(1, buf, len);
+ (void) write(1, "\n", 1);
+}
+
+static void
+lxi_log_open()
+{
+ int fd = open("/dev/console", O_WRONLY);
+
+ if (fd < 0) {
+ /* hard to log at this point... */
+ exit(1);
+ } else if (fd != 1) {
+ /*
+ * Use stdout as the log fd. Init should start with no files
+ * open, so we should be required to perform this relocation
+ * every time.
+ */
+ if (dup2(fd, 1) != 1) {
+ exit(1);
+ }
+ }
+}
+
+static void
+lxi_log_close()
+{
+ (void) close(0);
+ (void) close(1);
+}
+
+static zone_dochandle_t
+lxi_config_open()
+{
+ zoneid_t zoneid;
+ char zonename[ZONENAME_MAX];
+ zone_dochandle_t handle;
+ zone_iptype_t iptype;
+ int res;
+
+ zoneid = getzoneid();
+ if (getzonenamebyid(zoneid, zonename, sizeof (zonename)) < 0) {
+ lxi_err("could not determine zone name");
+ }
+
+ if ((handle = zonecfg_init_handle()) == NULL)
+ lxi_err("internal libzonecfg.so.1 error", 0);
+
+ if ((res = zonecfg_get_handle(zonename, handle)) != Z_OK) {
+ zonecfg_fini_handle(handle);
+ lxi_err("could not locate zone config %d", res);
+ }
+
+ /*
+ * Only exclusive stack is supported.
+ */
+ if (zonecfg_get_iptype(handle, &iptype) != Z_OK ||
+ iptype != ZS_EXCLUSIVE) {
+ zonecfg_fini_handle(handle);
+ lxi_err("lx zones do not support shared IP stacks");
+ }
+
+ return (handle);
+
+}
+
+static int
+zone_find_attr(struct zone_res_attrtab *attrs, const char *name,
+ const char **result)
+{
+ while (attrs != NULL) {
+ if (strncmp(attrs->zone_res_attr_name, name,
+ MAXNAMELEN) == 0) {
+ *result = attrs->zone_res_attr_value;
+ return (0);
+ }
+ attrs = attrs->zone_res_attr_next;
+ }
+ return (-1);
+}
+
+void
+lxi_svc_start(char *name, char *path, char *fmri)
+{
+ pid_t pid;
+ int status;
+ char *const argv[] = {
+ name,
+ NULL
+ };
+ char *const envp[] = {
+ fmri,
+ NULL
+ };
+
+ pid = fork();
+ if (pid == -1) {
+ lxi_err("fork() failed: %s", strerror(errno));
+ }
+
+ if (pid == 0) {
+ /* child */
+ const char *zroot = zone_get_nroot();
+ char cmd[MAXPATHLEN];
+
+ /*
+ * Construct the full path to the binary, including the native
+ * system root (e.g. "/native") if in use for this zone:
+ */
+ (void) snprintf(cmd, sizeof (cmd), "%s%s", zroot != NULL ?
+ zroot : "", path);
+
+ execve(cmd, argv, envp);
+
+ lxi_err("execve(%s) failed: %s", cmd, strerror(errno));
+ /* NOTREACHED */
+ }
+
+ /* parent */
+ while (wait(&status) != pid) {
+ /* EMPTY */;
+ }
+
+ if (WIFEXITED(status)) {
+ if (WEXITSTATUS(status) != 0) {
+ lxi_err("%s[%d] exited: %d", name,
+ (int)pid, WEXITSTATUS(status));
+ }
+ } else if (WIFSIGNALED(status)) {
+ lxi_err("%s[%d] died on signal: %d", name,
+ (int)pid, WTERMSIG(status));
+ } else {
+ lxi_err("%s[%d] failed in unknown way", name,
+ (int)pid);
+ }
+}
+
+void
+lxi_net_ipmgmtd_start()
+{
+ lxi_svc_start("ipmgmtd", IPMGMTD_PATH,
+ "SMF_FMRI=svc:/network/ip-interface-management:default");
+}
+
+void
+lxi_net_ndpd_start()
+{
+ lxi_svc_start("in.ndpd", IN_NDPD_PATH,
+ "SMF_FMRI=svc:/network/routing/ndp:default");
+}
+
+
+static void
+lxi_net_ipadm_open()
+{
+ ipadm_status_t status;
+
+ if ((status = ipadm_open(&iph, IPH_LEGACY)) != IPADM_SUCCESS) {
+ lxi_err("Error opening ipadm handle: %s",
+ ipadm_status2str(status));
+ }
+}
+
+static void
+lxi_net_ipadm_close()
+{
+ ipadm_close(iph);
+}
+
+void
+lxi_net_plumb(const char *iface)
+{
+ ipadm_status_t status;
+ char ifbuf[LIFNAMSIZ];
+
+ /* ipadm_create_if stomps on ifbuf, so create a copy: */
+ (void) strncpy(ifbuf, iface, sizeof (ifbuf));
+
+ if ((status = ipadm_create_if(iph, ifbuf, AF_INET, IPADM_OPT_ACTIVE))
+ != IPADM_SUCCESS) {
+ lxi_err("ipadm_create_if error %d: %s/v4: %s",
+ status, iface, ipadm_status2str(status));
+ }
+
+ if ((status = ipadm_create_if(iph, ifbuf, AF_INET6, IPADM_OPT_ACTIVE))
+ != IPADM_SUCCESS) {
+ lxi_err("ipadm_create_if error %d: %s/v6: %s",
+ status, iface, ipadm_status2str(status));
+ }
+}
+
+static int
+lxi_getif(int af, char *iface, int len, boolean_t first_ipv4_configured)
+{
+ struct lifreq lifr;
+ int s = socket(af, SOCK_DGRAM, 0);
+ if (s < 0) {
+ lxi_warn("socket error %d: bringing up %s: %s",
+ errno, iface, strerror(errno));
+ return (-1);
+ }
+
+ /*
+ * We need a new logical interface for every IP address we add, except
+ * for the very first IPv4 address.
+ */
+ if (af == AF_INET6 || first_ipv4_configured) {
+ (void) strncpy(lifr.lifr_name, iface, sizeof (lifr.lifr_name));
+ (void) memset(&lifr.lifr_addr, 0, sizeof (lifr.lifr_addr));
+ if (ioctl(s, SIOCLIFADDIF, (caddr_t)&lifr) < 0) {
+ if (close(s) != 0) {
+ lxi_warn("failed to close socket: %s\n",
+ strerror(errno));
+ }
+ return (-1);
+ }
+ (void) strncpy(iface, lifr.lifr_name, len);
+ }
+
+ if (close(s) != 0) {
+ lxi_warn("failed to close socket: %s\n",
+ strerror(errno));
+ }
+ return (0);
+}
+
+static int
+lxi_iface_ip(const char *origiface, const char *addr,
+ boolean_t *first_ipv4_configured)
+{
+ static int addrnum = 0;
+ ipadm_status_t status;
+ ipadm_addrobj_t ipaddr = NULL;
+ char iface[LIFNAMSIZ];
+ char aobjname[IPADM_AOBJSIZ];
+ int af, err = 0;
+
+ (void) strncpy(iface, origiface, sizeof (iface));
+
+ af = strstr(addr, ":") == NULL ? AF_INET : AF_INET6;
+ if (lxi_getif(af, iface, sizeof (iface), *first_ipv4_configured) != 0) {
+ lxi_warn("failed to create new logical interface "
+ "on %s: %s", origiface, strerror(errno));
+ return (-1);
+ }
+
+ (void) snprintf(aobjname, IPADM_AOBJSIZ, "%s/addr%d", iface,
+ addrnum++);
+
+ if ((status = ipadm_create_addrobj(IPADM_ADDR_STATIC, aobjname,
+ &ipaddr)) != IPADM_SUCCESS) {
+ lxi_warn("ipadm_create_addrobj error %d: addr %s, "
+ "interface %s: %s\n", status, addr, iface,
+ ipadm_status2str(status));
+ return (-2);
+ }
+
+ if ((status = ipadm_set_addr(ipaddr, addr, AF_UNSPEC))
+ != IPADM_SUCCESS) {
+ lxi_warn("ipadm_set_addr error %d: addr %s"
+ ", interface %s: %s\n", status, addr,
+ iface, ipadm_status2str(status));
+ err = -3;
+ goto done;
+ }
+
+ if ((status = ipadm_create_addr(iph, ipaddr,
+ IPADM_OPT_ACTIVE | IPADM_OPT_UP)) != IPADM_SUCCESS) {
+ lxi_warn("ipadm_create_addr error for %s: %s\n", iface,
+ ipadm_status2str(status));
+ err = -4;
+ goto done;
+ }
+
+ if (af == AF_INET) {
+ *first_ipv4_configured = B_TRUE;
+ }
+
+done:
+ ipadm_destroy_addrobj(ipaddr);
+ return (err);
+}
+
+static int
+lxi_iface_dhcp(const char *origiface, boolean_t *first_ipv4_configured)
+{
+ dhcp_ipc_request_t *dhcpreq = NULL;
+ dhcp_ipc_reply_t *dhcpreply = NULL;
+ int err = 0, timeout = 5;
+ char iface[LIFNAMSIZ];
+
+ (void) strncpy(iface, origiface, sizeof (iface));
+
+ if (lxi_getif(AF_INET, iface, sizeof (iface), *first_ipv4_configured)
+ != 0) {
+ lxi_warn("failed to create new logical interface "
+ "on %s: %s", origiface, strerror(errno));
+ return (-1);
+ }
+
+ if (dhcp_start_agent(timeout) != 0) {
+ lxi_err("Failed to start dhcpagent\n");
+ /* NOTREACHED */
+ }
+
+ dhcpreq = dhcp_ipc_alloc_request(DHCP_START, iface,
+ NULL, 0, DHCP_TYPE_NONE);
+ if (dhcpreq == NULL) {
+ lxi_warn("Unable to allocate memory "
+ "to start DHCP on %s\n", iface);
+ return (-1);
+ }
+
+ err = dhcp_ipc_make_request(dhcpreq, &dhcpreply, timeout);
+ if (err != 0) {
+ free(dhcpreq);
+ lxi_warn("Failed to start DHCP on %s: %s\n", iface,
+ dhcp_ipc_strerror(err));
+ return (-1);
+ }
+ err = dhcpreply->return_code;
+ if (err != 0) {
+ lxi_warn("Failed to start DHCP on %s: %s\n", iface,
+ dhcp_ipc_strerror(err));
+ goto done;
+ }
+
+ *first_ipv4_configured = B_TRUE;
+
+done:
+ free(dhcpreq);
+ free(dhcpreply);
+ return (err);
+}
+
+/*
+ * Initialize an IPv6 link-local address on a given interface
+ */
+static int
+lxi_iface_ipv6_link_local(const char *iface)
+{
+ struct lifreq lifr;
+ int s;
+
+ s = socket(AF_INET6, SOCK_DGRAM, 0);
+ if (s == -1) {
+ lxi_warn("socket error %d: bringing up %s: %s",
+ errno, iface, strerror(errno));
+ }
+
+ (void) strncpy(lifr.lifr_name, iface, sizeof (lifr.lifr_name));
+ if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) {
+ lxi_warn("SIOCGLIFFLAGS error %d: bringing up %s: %s",
+ errno, iface, strerror(errno));
+ return (-1);
+ }
+
+ lifr.lifr_flags |= IFF_UP;
+ if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
+ lxi_warn("SIOCSLIFFLAGS error %d: bringing up %s: %s",
+ errno, iface, strerror(errno));
+ return (-1);
+ }
+
+ (void) close(s);
+ return (0);
+}
+
+static int
+lxi_iface_gateway(const char *iface, const char *dst, int dstpfx,
+ const char *gwaddr)
+{
+ int idx, len, sockfd;
+ char rtbuf[RTMBUFSZ];
+ struct rt_msghdr *rtm = (struct rt_msghdr *)rtbuf;
+ struct sockaddr_in *dst_sin = (struct sockaddr_in *)
+ (rtbuf + sizeof (struct rt_msghdr));
+ struct sockaddr_in *gw_sin = (struct sockaddr_in *)(dst_sin + 1);
+ struct sockaddr_in *netmask_sin = (struct sockaddr_in *)(gw_sin + 1);
+
+ (void) bzero(rtm, RTMBUFSZ);
+ rtm->rtm_addrs = RTA_DST | RTA_GATEWAY | RTA_NETMASK;
+ rtm->rtm_flags = RTF_UP | RTF_STATIC | RTF_GATEWAY;
+ rtm->rtm_msglen = sizeof (rtbuf);
+ rtm->rtm_pid = getpid();
+ rtm->rtm_type = RTM_ADD;
+ rtm->rtm_version = RTM_VERSION;
+
+
+ /*
+ * The destination and netmask components have already been zeroed,
+ * which represents the default gateway. If we were passed a more
+ * specific destination network, use that instead.
+ */
+ dst_sin->sin_family = AF_INET;
+ netmask_sin->sin_family = AF_INET;
+ if (dst != NULL) {
+ struct sockaddr *mask = (struct sockaddr *)netmask_sin;
+
+ if ((inet_pton(AF_INET, dst, &(dst_sin->sin_addr))) != 1 ||
+ plen2mask(dstpfx, AF_INET, mask) != 0) {
+ lxi_warn("bad destination network %s/%d: %s", dst,
+ dstpfx, strerror(errno));
+ return (-1);
+ }
+ }
+
+ if ((inet_pton(AF_INET, gwaddr, &(gw_sin->sin_addr))) != 1) {
+ lxi_warn("bad gateway %s: %s", gwaddr, strerror(errno));
+ return (-1);
+ }
+
+ if (iface != NULL) {
+ if ((idx = if_nametoindex(iface)) == 0) {
+ lxi_warn("unable to get interface index for %s: %s\n",
+ iface, strerror(errno));
+ return (-1);
+ }
+ rtm->rtm_index = idx;
+ }
+
+ if ((sockfd = socket(PF_ROUTE, SOCK_RAW, AF_INET)) < 0) {
+ lxi_warn("socket(PF_ROUTE): %s\n", strerror(errno));
+ return (-1);
+ }
+
+ if ((len = write(sockfd, rtbuf, rtm->rtm_msglen)) < 0) {
+ lxi_warn("could not write rtmsg: %s", strerror(errno));
+ close(sockfd);
+ return (-1);
+ } else if (len < rtm->rtm_msglen) {
+ lxi_warn("write() rtmsg incomplete");
+ close(sockfd);
+ return (-1);
+ }
+
+ close(sockfd);
+ return (0);
+}
+
+static void
+lxi_net_loopback()
+{
+ const char *iface = "lo0";
+ boolean_t first_ipv4_configured = B_FALSE;
+
+ lxi_net_plumb(iface);
+ (void) lxi_iface_ip(iface, "127.0.0.1/8", &first_ipv4_configured);
+ (void) lxi_iface_ipv6_link_local(iface);
+}
+
+
+/*
+ * This function is used when the "ips" property doesn't exist in a zone's
+ * configuration. It may be an older configuration, so we should search for
+ * "ip" and "netmask" and convert them into the new format.
+ */
+static int
+lxi_get_old_ip(struct zone_res_attrtab *attrs, const char **ipaddrs,
+ char *cidraddr, int len)
+{
+
+ const char *netmask;
+ int prefixlen;
+ struct sockaddr_in mask_sin;
+
+ lxi_warn("Could not find \"ips\" property for zone. Looking "
+ "for older \"ip\" and \"netmask\" properties, instead.");
+
+ if (zone_find_attr(attrs, "ip", ipaddrs) != 0) {
+ return (-1);
+ }
+
+ if (strcmp(*ipaddrs, "dhcp") == 0) {
+ return (0);
+ }
+
+ if (zone_find_attr(attrs, "netmask", &netmask) != 0) {
+ lxi_err("could not find netmask for interface");
+ /* NOTREACHED */
+ }
+
+ /* Convert the netmask to a number */
+ mask_sin.sin_family = AF_INET;
+ if (inet_pton(AF_INET, netmask, &mask_sin.sin_addr) != 1) {
+ lxi_err("invalid netmask address: %s\n",
+ strerror(errno));
+ /* NOTREACHED */
+ }
+ prefixlen = mask2plen((struct sockaddr *)&mask_sin);
+
+ /*
+ * Write out the IP address in the new format and use
+ * that instead
+ */
+ (void) snprintf(cidraddr, len, "%s/%d", *ipaddrs, prefixlen);
+
+ *ipaddrs = cidraddr;
+ return (0);
+}
+
+static void
+lxi_net_setup(zone_dochandle_t handle)
+{
+ struct zone_nwiftab lookup;
+ boolean_t do_addrconf = B_FALSE;
+
+ if (zonecfg_setnwifent(handle) != Z_OK)
+ return;
+ while (zonecfg_getnwifent(handle, &lookup) == Z_OK) {
+ const char *iface = lookup.zone_nwif_physical;
+ struct zone_res_attrtab *attrs = lookup.zone_nwif_attrp;
+ const char *ipaddrs, *primary, *gateway;
+ char ipaddrs_copy[MAXNAMELEN], cidraddr[BUFSIZ],
+ *ipaddr, *tmp, *lasts;
+ boolean_t first_ipv4_configured = B_FALSE;
+ boolean_t *ficp = &first_ipv4_configured;
+
+ lxi_net_plumb(iface);
+ if (zone_find_attr(attrs, "ips", &ipaddrs) != 0 &&
+ lxi_get_old_ip(attrs, &ipaddrs, cidraddr, BUFSIZ) != 0) {
+ lxi_warn("Could not find a valid network configuration "
+ "for the %s interface", iface);
+ continue;
+ }
+
+ if (lxi_iface_ipv6_link_local(iface) != 0) {
+ lxi_warn("unable to bring up link-local address on "
+ "interface %s", iface);
+ }
+
+ /*
+ * If we're going to be doing DHCP, we have to do it first since
+ * dhcpagent doesn't like to operate on non-zero logical
+ * interfaces.
+ */
+ if (strstr(ipaddrs, "dhcp") != NULL &&
+ lxi_iface_dhcp(iface, ficp) != 0) {
+ lxi_warn("Failed to start DHCP on %s\n", iface);
+ }
+
+ /*
+ * Copy the ipaddrs string, since strtok_r will write NUL
+ * characters into it.
+ */
+ (void) strlcpy(ipaddrs_copy, ipaddrs, MAXNAMELEN);
+ tmp = ipaddrs_copy;
+
+ /*
+ * Iterate over each IP and then set it up on the interface.
+ */
+ while ((ipaddr = strtok_r(tmp, ",", &lasts)) != NULL) {
+ tmp = NULL;
+ if (strcmp(ipaddr, "addrconf") == 0) {
+ do_addrconf = B_TRUE;
+ } else if (strcmp(ipaddr, "dhcp") == 0) {
+ continue;
+ } else if (lxi_iface_ip(iface, ipaddr, ficp) < 0) {
+ lxi_warn("Unable to add new IP address (%s) "
+ "to interface %s", ipaddr, iface);
+ }
+ }
+
+ if (zone_find_attr(attrs, "primary", &primary) == 0 &&
+ strncmp(primary, "true", MAXNAMELEN) == 0 &&
+ zone_find_attr(attrs, "gateway", &gateway) == 0) {
+ lxi_iface_gateway(iface, NULL, 0, gateway);
+ }
+ }
+
+ if (do_addrconf) {
+ lxi_net_ndpd_start();
+ }
+
+ (void) zonecfg_endnwifent(handle);
+}
+
+static void
+lxi_net_static_route(const char *line)
+{
+ /*
+ * Each static route line is a string of the form:
+ *
+ * "10.77.77.2|10.1.1.0/24|false"
+ *
+ * i.e. gateway address, destination network, and whether this is
+ * a "link local" route or a next hop route.
+ */
+ custr_t *cu = NULL;
+ char *gw = NULL, *dst = NULL;
+ int pfx = -1;
+ int i;
+
+ if (custr_alloc(&cu) != 0) {
+ lxi_err("custr_alloc failure");
+ }
+
+ for (i = 0; line[i] != '\0'; i++) {
+ if (gw == NULL) {
+ if (line[i] == '|') {
+ if ((gw = strdup(custr_cstr(cu))) == NULL) {
+ lxi_err("strdup failure");
+ }
+ custr_reset(cu);
+ } else {
+ if (custr_appendc(cu, line[i]) != 0) {
+ lxi_err("custr_appendc failure");
+ }
+ }
+ continue;
+ }
+
+ if (dst == NULL) {
+ if (line[i] == '/') {
+ if ((dst = strdup(custr_cstr(cu))) == NULL) {
+ lxi_err("strdup failure");
+ }
+ custr_reset(cu);
+ } else {
+ if (custr_appendc(cu, line[i]) != 0) {
+ lxi_err("custr_appendc failure");
+ }
+ }
+ continue;
+ }
+
+ if (pfx == -1) {
+ if (line[i] == '|') {
+ pfx = atoi(custr_cstr(cu));
+ custr_reset(cu);
+ } else {
+ if (custr_appendc(cu, line[i]) != 0) {
+ lxi_err("custr_appendc failure");
+ }
+ }
+ continue;
+ }
+
+ if (custr_appendc(cu, line[i]) != 0) {
+ lxi_err("custr_appendc failure");
+ }
+ }
+
+ /*
+ * We currently only support "next hop" routes, so ensure that
+ * "linklocal" is false:
+ */
+ if (strcmp(custr_cstr(cu), "false") != 0) {
+ lxi_warn("invalid static route: %s", line);
+ }
+
+ if (lxi_iface_gateway(NULL, dst, pfx, gw) != 0) {
+ lxi_err("failed to add route: %s/%d -> %s", dst, pfx, gw);
+ }
+
+ custr_free(cu);
+ free(gw);
+ free(dst);
+}
+
+static void
+lxi_net_static_routes(void)
+{
+ const char *cmd = "/native/usr/lib/brand/lx/routeinfo";
+ char *const argv[] = { "routeinfo", NULL };
+ char *const envp[] = { NULL };
+ int code;
+ struct stat st;
+ char errbuf[512];
+
+ if (stat(cmd, &st) != 0 || !S_ISREG(st.st_mode)) {
+ /*
+ * This binary is (potentially) shipped from another
+ * consolidation. If it does not exist, then the platform does
+ * not currently support static routes for LX-branded zones.
+ */
+ return;
+ }
+
+ /*
+ * Run the command, firing the callback for each line that it
+ * outputs. When this function returns, static route processing
+ * is complete.
+ */
+ if (run_command(cmd, argv, envp, errbuf, sizeof (errbuf),
+ lxi_net_static_route, &code) != 0 || code != 0) {
+ lxi_err("failed to run \"%s\": %s", cmd, errbuf);
+ }
+}
+
+static void
+lxi_config_close(zone_dochandle_t handle)
+{
+ zonecfg_fini_handle(handle);
+}
+
+static void
+lxi_init_exec(char **argv)
+{
+ const char *cmd = "/sbin/init";
+ char *const envp[] = { "container=zone", NULL };
+ int e;
+
+ argv[0] = "init";
+
+ /*
+ * systemd uses the 'container' env var to determine it is running
+ * inside a container. It only supports a few well-known types and
+ * treats anything else as 'other' but this is enough to make it
+ * behave better inside a zone. See 'detect_container' in systemd.
+ */
+ execve(cmd, argv, envp);
+ e = errno;
+
+ /*
+ * Because stdout was closed prior to exec, it must be opened again in
+ * the face of failure to log the error.
+ */
+ lxi_log_open();
+ lxi_err("execve(%s) failed: %s", cmd, strerror(e));
+}
+
+/*ARGSUSED*/
+int
+main(int argc, char *argv[])
+{
+ zone_dochandle_t handle;
+
+ lxi_log_open();
+
+ lxi_net_ipmgmtd_start();
+ lxi_net_ipadm_open();
+
+ handle = lxi_config_open();
+ lxi_net_loopback();
+ lxi_net_setup(handle);
+ lxi_config_close(handle);
+
+ lxi_net_static_routes();
+
+ lxi_net_ipadm_close();
+
+ lxi_log_close();
+
+ lxi_init_exec(argv);
+
+ /* NOTREACHED */
+ return (0);
+}
diff --git a/usr/src/lib/brand/lx/lx_init/pipe_stream.c b/usr/src/lib/brand/lx/lx_init/pipe_stream.c
new file mode 100644
index 0000000000..8f06a07906
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_init/pipe_stream.c
@@ -0,0 +1,326 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <port.h>
+#include <poll.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/debug.h>
+#include <sys/list.h>
+
+#include "pipe_stream.h"
+
+struct pipe_stream {
+ pipe_stream_loop_t *pis_loop;
+
+ boolean_t pis_finished;
+ boolean_t pis_associated;
+
+ void *pis_arg0;
+ void *pis_arg1;
+
+ int pis_fd_write;
+ int pis_fd_read;
+ list_node_t pis_linkage;
+};
+
+struct pipe_stream_loop {
+ int psl_port;
+
+ uint8_t *psl_buf;
+ size_t psl_buf_cap;
+ size_t psl_buf_occ;
+
+ list_t psl_pipes;
+
+ pipe_stream_data_cb *psl_cb_data;
+ pipe_stream_eof_cb *psl_cb_eof;
+ pipe_stream_error_cb *psl_cb_error;
+};
+
+
+int
+pipe_stream_loop_fini(pipe_stream_loop_t *psl)
+{
+ if (psl == NULL) {
+ return (0);
+ }
+
+ VERIFY0(close(psl->psl_port));
+
+ while (!list_is_empty(&psl->psl_pipes)) {
+ pipe_stream_fini(list_head(&psl->psl_pipes));
+ }
+
+ list_destroy(&psl->psl_pipes);
+ free(psl);
+
+ return (0);
+}
+
+int
+pipe_stream_loop_init(pipe_stream_loop_t **pslp, size_t bufsize,
+ pipe_stream_data_cb *data_cb, pipe_stream_eof_cb *eof_cb,
+ pipe_stream_error_cb *error_cb)
+{
+ pipe_stream_loop_t *psl;
+
+ if ((psl = calloc(1, sizeof (*psl))) == NULL) {
+ return (-1);
+ }
+
+ psl->psl_buf_cap = bufsize;
+ psl->psl_buf_occ = 0;
+ if ((psl->psl_buf = calloc(1, bufsize)) == NULL) {
+ free(psl);
+ return (-1);
+ }
+
+ if ((psl->psl_port = port_create()) == -1) {
+ free(psl->psl_buf);
+ free(psl);
+ return (-1);
+ }
+
+ psl->psl_cb_data = data_cb;
+ psl->psl_cb_eof = eof_cb;
+ psl->psl_cb_error = error_cb;
+
+ list_create(&psl->psl_pipes, sizeof (pipe_stream_t),
+ offsetof(pipe_stream_t, pis_linkage));
+
+ *pslp = psl;
+ return (0);
+}
+
+boolean_t
+pipe_stream_loop_should_run(pipe_stream_loop_t *psl)
+{
+ pipe_stream_t *pis;
+
+ for (pis = list_head(&psl->psl_pipes); pis != NULL;
+ pis = list_next(&psl->psl_pipes, pis)) {
+ if (!pis->pis_finished) {
+ return (B_TRUE);
+ }
+ }
+
+ return (B_FALSE);
+}
+
+int
+pipe_stream_loop_run(pipe_stream_loop_t *psl)
+{
+ pipe_stream_t *pis;
+ port_event_t pev;
+ ssize_t sz;
+
+ for (pis = list_head(&psl->psl_pipes); pis != NULL;
+ pis = list_next(&psl->psl_pipes, pis)) {
+ if (pis->pis_finished || pis->pis_associated) {
+ /*
+ * Skip streams that are already finished, as well as
+ * those that have already been associated with the
+ * port.
+ */
+ continue;
+ }
+
+ if (port_associate(psl->psl_port, PORT_SOURCE_FD,
+ pis->pis_fd_read, POLLIN, pis) != 0) {
+ return (-1);
+ }
+ }
+
+again:
+ if (port_get(psl->psl_port, &pev, NULL) != 0) {
+ switch (errno) {
+ case ETIME:
+ /*
+ * Timeout expired; return to caller.
+ */
+ return (0);
+
+ case EINTR:
+ /*
+ * Interrupted by signal. Try again.
+ */
+ goto again;
+
+ default:
+ return (-1);
+ }
+ }
+
+ VERIFY(pev.portev_source == PORT_SOURCE_FD);
+ pis = (pipe_stream_t *)pev.portev_user;
+ VERIFY((int)pev.portev_object == pis->pis_fd_read);
+ pis->pis_associated = B_FALSE;
+
+read_again:
+ if ((sz = read(pis->pis_fd_read, psl->psl_buf,
+ psl->psl_buf_cap)) == -1) {
+ if (errno == EINTR) {
+ goto read_again;
+ }
+
+ if (psl->psl_cb_error != NULL) {
+ psl->psl_cb_error(errno, pis->pis_arg0, pis->pis_arg1);
+ }
+
+ VERIFY0(close(pis->pis_fd_read));
+ pis->pis_fd_read = -1;
+ pis->pis_finished = B_TRUE;
+ }
+ psl->psl_buf_occ = sz;
+
+ if (sz == 0) {
+ /*
+ * Stream EOF.
+ */
+ pis->pis_finished = B_TRUE;
+ VERIFY0(close(pis->pis_fd_read));
+ pis->pis_fd_read = -1;
+ if (psl->psl_cb_eof != NULL) {
+ psl->psl_cb_eof(pis->pis_arg0, pis->pis_arg1);
+ }
+ return (0);
+ }
+
+ if (psl->psl_cb_data != NULL) {
+ int cbr = psl->psl_cb_data(psl->psl_buf, psl->psl_buf_occ,
+ pis->pis_arg0, pis->pis_arg1);
+
+ if (cbr != 0) {
+ /*
+ * Callback failure: close file descriptor.
+ */
+ pis->pis_finished = B_TRUE;
+ VERIFY0(close(pis->pis_fd_read));
+ pis->pis_fd_read = -1;
+ if (psl->psl_cb_eof != NULL) {
+ psl->psl_cb_eof(pis->pis_arg0, pis->pis_arg1);
+ }
+ }
+
+ return (0);
+ }
+
+ return (0);
+}
+
+int
+pipe_stream_init(pipe_stream_loop_t *psl, pipe_stream_t **pisp, void *arg0,
+ void *arg1)
+{
+ int e = 0;
+ pipe_stream_t *pis;
+ int fds[2] = { -1, -1 };
+
+ if ((pis = calloc(1, sizeof (*pis))) == NULL) {
+ return (-1);
+ }
+
+ if (pipe(fds) != 0) {
+ e = errno;
+ goto fail;
+ }
+
+ pis->pis_fd_read = fds[0];
+ pis->pis_fd_write = fds[1];
+
+ pis->pis_arg0 = arg0;
+ pis->pis_arg1 = arg1;
+
+ pis->pis_finished = B_FALSE;
+ pis->pis_associated = B_FALSE;
+
+ pis->pis_loop = psl;
+ list_insert_tail(&psl->psl_pipes, pis);
+
+ *pisp = pis;
+ return (0);
+
+fail:
+ if (fds[0] != -1) {
+ VERIFY0(close(fds[0]));
+ }
+ if (fds[1] != -1) {
+ VERIFY0(close(fds[1]));
+ }
+ free(pis);
+ errno = e;
+ return (-1);
+}
+
+int
+pipe_stream_fini(pipe_stream_t *pis)
+{
+ if (pis == NULL) {
+ return (0);
+ }
+
+ if (pis->pis_fd_read != -1) {
+ VERIFY0(close(pis->pis_fd_read));
+ }
+ if (pis->pis_fd_write != -1) {
+ VERIFY0(close(pis->pis_fd_write));
+ }
+
+ list_remove(&pis->pis_loop->psl_pipes, pis);
+
+ free(pis);
+ return (0);
+}
+
+/*
+ * Called in the parent, after forking, to close the "write" end of the pipe.
+ */
+void
+pipe_stream_parent_afterfork(pipe_stream_t *pis)
+{
+ if (pis->pis_fd_write != -1) {
+ (void) close(pis->pis_fd_write);
+ pis->pis_fd_write = -1;
+ }
+}
+
+/*
+ * Called in the child, after forking, to close the "read" end of the
+ * pipe, and to dup the file descriptor into the right place.
+ */
+int
+pipe_stream_child_afterfork(pipe_stream_t *pis, int dup_to)
+{
+ int e = 0;
+
+ if (dup_to != -1) {
+ if (dup2(pis->pis_fd_write, dup_to) == -1) {
+ e = errno;
+ }
+ VERIFY0(close(pis->pis_fd_write));
+ pis->pis_fd_write = dup_to;
+ }
+
+ (void) close(pis->pis_fd_read);
+ pis->pis_fd_read = -1;
+
+ errno = e;
+ return (e == 0 ? 0 : -1);
+}
diff --git a/usr/src/lib/brand/lx/lx_init/pipe_stream.h b/usr/src/lib/brand/lx/lx_init/pipe_stream.h
new file mode 100644
index 0000000000..140ef18f8c
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_init/pipe_stream.h
@@ -0,0 +1,48 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _PIPE_STREAM_H
+#define _PIPE_STREAM_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef int pipe_stream_data_cb(const uint8_t *, size_t, void *, void *);
+typedef void pipe_stream_eof_cb(void *, void *);
+typedef void pipe_stream_error_cb(int, void *, void *);
+
+typedef struct pipe_stream pipe_stream_t;
+typedef struct pipe_stream_loop pipe_stream_loop_t;
+
+extern int pipe_stream_loop_fini(pipe_stream_loop_t *);
+extern int pipe_stream_loop_init(pipe_stream_loop_t **, size_t,
+ pipe_stream_data_cb *, pipe_stream_eof_cb *, pipe_stream_error_cb *);
+
+extern int pipe_stream_init(pipe_stream_loop_t *, pipe_stream_t **, void *,
+ void *);
+extern int pipe_stream_fini(pipe_stream_t *);
+
+extern void pipe_stream_parent_afterfork(pipe_stream_t *);
+extern int pipe_stream_child_afterfork(pipe_stream_t *, int);
+
+extern boolean_t pipe_stream_loop_should_run(pipe_stream_loop_t *);
+extern int pipe_stream_loop_run(pipe_stream_loop_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PIPE_STREAM_H */
diff --git a/usr/src/lib/brand/lx/lx_init/run_command.c b/usr/src/lib/brand/lx/lx_init/run_command.c
new file mode 100644
index 0000000000..ad00b60482
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_init/run_command.c
@@ -0,0 +1,281 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <err.h>
+#include <strings.h>
+#include <unistd.h>
+#include <wait.h>
+#include <sys/types.h>
+#include <sys/debug.h>
+#include <libcmdutils.h>
+
+#include "run_command.h"
+#include "pipe_stream.h"
+
+typedef struct cmd {
+ int cmd_pid;
+ int cmd_wstatus;
+ pipe_stream_t *cmd_pipe[2];
+ custr_t *cmd_err;
+ custr_t *cmd_out;
+ run_command_line_cb *cmd_func;
+ boolean_t cmd_cancel;
+} cmd_t;
+
+static int cb_data(const uint8_t *, size_t, void *, void *);
+static void cb_eof(void *, void *);
+static void cb_error(int, void *, void *);
+
+void
+post_error(cmd_t *cmd, const char *estr)
+{
+ if (cmd->cmd_cancel) {
+ return;
+ }
+ cmd->cmd_cancel = B_TRUE;
+
+ custr_reset(cmd->cmd_err);
+ (void) custr_append(cmd->cmd_err, estr);
+}
+
+int
+run_command(const char *path, char *const argv[], char *const envp[],
+ char *errbuf, size_t errlen, run_command_line_cb *func, int *status)
+{
+ pipe_stream_loop_t *psl = NULL;
+ int e = 0;
+ cmd_t cmd;
+ pid_t wpid;
+
+ bzero(&cmd, sizeof (cmd));
+
+ cmd.cmd_func = func;
+
+ /*
+ * Allocate string buffers for stdout line buffering and error
+ * messages:
+ */
+ if (custr_alloc_buf(&cmd.cmd_err, errbuf, errlen) != 0 ||
+ custr_alloc(&cmd.cmd_out) != 0) {
+ e = errno;
+ goto out;
+ }
+
+ /*
+ * Initialise pipe stream event loop:
+ */
+ if (pipe_stream_loop_init(&psl, 256, cb_data, cb_eof, cb_error) != 0) {
+ e = errno;
+ post_error(&cmd, "could not init pipe stream loop");
+ goto out;
+ }
+
+ /*
+ * Create pipe streams for stdout and stderr communication with
+ * child process:
+ */
+ if (pipe_stream_init(psl, &cmd.cmd_pipe[0], &cmd,
+ (void *)STDOUT_FILENO) != 0 ||
+ pipe_stream_init(psl, &cmd.cmd_pipe[1], &cmd,
+ (void *)STDERR_FILENO) != 0) {
+ e = errno;
+ post_error(&cmd, "could not init pipe streams");
+ goto out;
+ }
+
+ /*
+ * Fork a child process:
+ */
+ if ((cmd.cmd_pid = fork()) == -1) {
+ e = errno;
+ post_error(&cmd, "could not fork");
+ goto out;
+ }
+
+ if (cmd.cmd_pid == 0) {
+ /*
+ * This is the child process. Clean up file descriptors, and
+ * connect stdio to the pipes we allocated:
+ */
+ VERIFY0(close(STDIN_FILENO));
+ VERIFY0(pipe_stream_child_afterfork(cmd.cmd_pipe[0],
+ STDOUT_FILENO));
+ VERIFY0(pipe_stream_child_afterfork(cmd.cmd_pipe[1],
+ STDERR_FILENO));
+ closefrom(3);
+
+ execve(path, argv, envp);
+ err(127, "exec(%s) failed", path);
+ }
+
+ /*
+ * Back in the parent. Close the remote end of the stdio pipes:
+ */
+ pipe_stream_parent_afterfork(cmd.cmd_pipe[0]);
+ pipe_stream_parent_afterfork(cmd.cmd_pipe[1]);
+
+ /*
+ * Run the pipe event loop until all streams are completely
+ * consumed:
+ */
+ while (pipe_stream_loop_should_run(psl)) {
+ if (pipe_stream_loop_run(psl) != 0) {
+ e = errno;
+ post_error(&cmd, "pipe stream loop run failure");
+ goto out;
+ }
+ }
+
+ /*
+ * Collect exit status of child process:
+ */
+ while ((wpid = waitpid(cmd.cmd_pid, &cmd.cmd_wstatus, 0)) !=
+ cmd.cmd_pid) {
+ if (wpid == -1 && errno != EINTR) {
+ e = errno;
+ post_error(&cmd, "waitpid failure");
+ goto out;
+ }
+ }
+
+ /*
+ * If the child died on a signal, fail the whole operation:
+ */
+ if (WIFSIGNALED(cmd.cmd_wstatus)) {
+ e = ENXIO;
+ post_error(&cmd, "child process died on signal");
+ (void) custr_append_printf(cmd.cmd_err, " (pid %d signal %d)",
+ cmd.cmd_pid, WTERMSIG(cmd.cmd_wstatus));
+ goto out;
+ }
+
+ /*
+ * If the child did not appear to exit, fail the whole operation:
+ */
+ if (!WIFEXITED(cmd.cmd_wstatus)) {
+ e = ENXIO;
+ post_error(&cmd, "child process did not exit");
+ (void) custr_append_printf(cmd.cmd_err, " (pid %d status %x)",
+ cmd.cmd_pid, cmd.cmd_wstatus);
+ goto out;
+ }
+
+ /*
+ * Report exit status to the caller:
+ */
+ *status = WEXITSTATUS(cmd.cmd_wstatus);
+ e = 0;
+
+out:
+ VERIFY0(pipe_stream_loop_fini(psl));
+ /*
+ * Note that freeing the static error custr_t does not touch the
+ * underlying storage; we use this property to return the error
+ * message (if one exists) to the caller.
+ */
+ custr_free(cmd.cmd_err);
+ custr_free(cmd.cmd_out);
+ errno = e;
+ return (e == 0 ? 0 : -1);
+}
+
+static int
+cb_data(const uint8_t *buf, size_t sz, void *arg0, void *arg1)
+{
+ cmd_t *cmd = arg0;
+ int fd = (int)arg1;
+ unsigned int i;
+
+ if (cmd->cmd_cancel) {
+ return (-1);
+ }
+
+ switch (fd) {
+ case STDOUT_FILENO:
+ for (i = 0; i < sz; i++) {
+ if (buf[i] == '\0' || buf[i] == '\r') {
+ continue;
+ }
+
+ if (buf[i] == '\n') {
+ cmd->cmd_func(custr_cstr(cmd->cmd_out));
+ custr_reset(cmd->cmd_out);
+ continue;
+ }
+
+ if (custr_appendc(cmd->cmd_out, buf[i]) != 0) {
+ /*
+ * Failed to allocate memory; returning
+ * -1 here will abort the stream.
+ */
+ post_error(cmd, "custr_appendc failure");
+ return (-1);
+ }
+ }
+ break;
+
+ case STDERR_FILENO:
+ /*
+ * Collect as much stderr output as will fit in our static
+ * buffer.
+ */
+ for (i = 0; i < sz; i++) {
+ if (buf[i] == '\0') {
+ continue;
+ }
+
+ (void) custr_appendc(cmd->cmd_err, buf[i]);
+ }
+ break;
+
+ default:
+ abort();
+ }
+
+ return (0);
+}
+
+static void
+cb_eof(void *arg0, void *arg1)
+{
+ cmd_t *cmd = arg0;
+ int fd = (int)arg1;
+
+ if (cmd->cmd_cancel) {
+ return;
+ }
+
+ if (fd == STDOUT_FILENO && custr_len(cmd->cmd_out) > 0) {
+ cmd->cmd_func(custr_cstr(cmd->cmd_out));
+ custr_reset(cmd->cmd_out);
+ }
+}
+
+static void
+cb_error(int e, void *arg0, void *arg1)
+{
+ cmd_t *cmd = arg0;
+ int fd = (int)arg1;
+
+ if (cmd->cmd_cancel) {
+ return;
+ }
+
+ post_error(cmd, "stream read failure");
+ (void) custr_append_printf(cmd->cmd_err, " (pid %d fd %d): %s",
+ cmd->cmd_pid, fd, strerror(e));
+}
diff --git a/usr/src/lib/brand/lx/lx_init/run_command.h b/usr/src/lib/brand/lx/lx_init/run_command.h
new file mode 100644
index 0000000000..3484b265b6
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_init/run_command.h
@@ -0,0 +1,32 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _RUN_COMMAND_H
+#define _RUN_COMMAND_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void run_command_line_cb(const char *);
+
+extern int run_command(const char *, char *const [], char *const [], char *,
+ size_t, run_command_line_cb *, int *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RUN_COMMAND_H */
diff --git a/usr/src/cmd/ssh/sftp-server/Makefile b/usr/src/lib/brand/lx/lx_support/Makefile
index c2bdf26c1e..e7c958e13a 100644
--- a/usr/src/cmd/ssh/sftp-server/Makefile
+++ b/usr/src/lib/brand/lx/lx_support/Makefile
@@ -18,39 +18,37 @@
#
# CDDL HEADER END
#
+#
# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# cmd/ssh/sftp-server/Makefile
-
-PROG = sftp-server
-OBJS = sftp-server.o sftp-server-main.o
-SRCS = $(OBJS:.o=.c)
+PROG = lx_support
+PROGS = $(PROG)
+OBJS = lx_support
-include ../../Makefile.cmd
-include ../Makefile.ssh-common
+all: $(PROG)
-LDLIBS += $(SSH_COMMON_LDLIBS) -lsocket -lcrypto
+include ../Makefile.lx
+include $(SRC)/cmd/Makefile.cmd
-POFILE_DIR = ..
+# override the install directory
+ROOTBIN = $(ROOTBRANDDIR)
+CLOBBERFILES = $(OBJS) $(ROOTPROGS)
-.KEEP_STATE:
+UTSBASE = $(SRC)/uts
-.PARALLEL: $(OBJS)
+CFLAGS += $(CCVERBOSE)
+CPPFLAGS += -D_REENTRANT -I$(UTSBASE)/common/brand/lx
+LDLIBS += -lzonecfg
-all: $(PROG)
-
-$(PROG): $(OBJS) ../libssh/$(MACH)/libssh.a ../libopenbsd-compat/$(MACH)/libopenbsd-compat.a
- $(LINK.c) $(OBJS) -o $@ $(LDLIBS) $(DYNFLAGS)
- $(POST_PROCESS)
+.KEEP_STATE:
-install: all $(ROOTLIBSSHPROG) $(ROOTLIBSSH)
+install: all $(ROOTPROGS)
clean:
- $(RM) -f $(OBJS) $(PROG)
+ $(RM) $(PROG) $(OBJS)
-lint: lint_SRCS
+lint: lint_PROG
-include ../Makefile.msg.targ
-include ../../Makefile.targ
+include $(SRC)/cmd/Makefile.targ
diff --git a/usr/src/lib/brand/lx/lx_support/lx_support.c b/usr/src/lib/brand/lx/lx_support/lx_support.c
new file mode 100644
index 0000000000..fa4bbffc4a
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_support/lx_support.c
@@ -0,0 +1,368 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * lx_support is a small cli utility used to perform some brand-specific
+ * tasks when booting, halting, or verifying a zone. This utility is not
+ * intended to be called by users - it is intended to be invoked by the
+ * zones utilities.
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <stropts.h>
+#include <sys/ioccom.h>
+#include <sys/stat.h>
+#include <sys/systeminfo.h>
+#include <sys/types.h>
+#include <sys/varargs.h>
+#include <unistd.h>
+#include <libintl.h>
+#include <locale.h>
+
+#include <libzonecfg.h>
+#include <sys/lx_brand.h>
+
+static void lxs_err(char *msg, ...) __NORETURN;
+static void usage(void) __NORETURN;
+
+#define CP_CMD "/usr/bin/cp"
+#define MOUNT_CMD "/sbin/mount"
+
+static char *bname = NULL;
+static char *zonename = NULL;
+static char *zoneroot = NULL;
+
+#if !defined(TEXT_DOMAIN) /* should be defined by cc -D */
+#define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
+#endif
+
+static void
+lxs_err(char *msg, ...)
+{
+ char buf[1024];
+ va_list ap;
+
+ va_start(ap, msg);
+ /*LINTED*/
+ (void) vsnprintf(buf, sizeof (buf), msg, ap);
+ va_end(ap);
+
+ (void) printf("%s error: %s\n", bname, buf);
+
+ exit(1);
+ /*NOTREACHED*/
+}
+
+/*
+ * The Linux init(1M) command requires communication over the /dev/initctl
+ * FIFO. Since any attempt to create a file in /dev will fail, we must
+ * create it here.
+ */
+static void
+lxs_make_initctl()
+{
+ char cmdbuf[ARG_MAX];
+ char path[MAXPATHLEN];
+ char special[MAXPATHLEN];
+ struct stat buf;
+ int err;
+
+ if (snprintf(special, sizeof (special), "%s/dev/initctl", zoneroot) >=
+ sizeof (special))
+ lxs_err("%s: %s", gettext("Failed to create /dev/initctl"),
+ gettext("zoneroot is too long"));
+
+ if (snprintf(path, sizeof (path), "%s/root/dev/initctl", zoneroot) >=
+ sizeof (path))
+ lxs_err("%s: %s", gettext("Failed to create /dev/initctl"),
+ gettext("zoneroot is too long"));
+
+ /* create the actual fifo as <zoneroot>/dev/initctl */
+ if (stat(special, &buf) != 0) {
+ err = errno;
+ if (err != ENOENT)
+ lxs_err("%s: %s",
+ gettext("Failed to create /dev/initctl"),
+ strerror(err));
+ if (mkfifo(special, 0644) < 0) {
+ err = errno;
+ lxs_err("%s: %s",
+ gettext("Failed to create /dev/initctl"),
+ strerror(err));
+ }
+ } else {
+ if ((buf.st_mode & S_IFIFO) == 0)
+ lxs_err("%s: %s",
+ gettext("Failed to create /dev/initctl"),
+ gettext("It already exists, and is not a FIFO."));
+ }
+
+ /*
+ * now lofs mount the <zoneroot>/dev/initctl fifo onto
+ * <zoneroot>/root/dev/initctl
+ */
+ if (snprintf(cmdbuf, sizeof (cmdbuf), "%s -F lofs %s %s", MOUNT_CMD,
+ special, path) >= sizeof (cmdbuf))
+ lxs_err("%s: %s", gettext("Failed to lofs mount /dev/initctl"),
+ gettext("zoneroot is too long"));
+
+ if (system(cmdbuf) < 0) {
+ err = errno;
+ lxs_err("%s: %s", gettext("Failed to lofs mount /dev/initctl"),
+ strerror(err));
+ }
+}
+
+/*
+ * fsck gets really confused when run inside a zone. Removing this file
+ * prevents it from running
+ */
+static void
+lxs_remove_autofsck()
+{
+ char path[MAXPATHLEN];
+ int err;
+
+ if (snprintf(path, MAXPATHLEN, "%s/root/.autofsck", zoneroot) >=
+ MAXPATHLEN)
+ lxs_err("%s: %s", gettext("Failed to remove /.autofsck"),
+ gettext("zoneroot is too long"));
+
+ if (unlink(path) < 0) {
+ err = errno;
+ if (err != ENOENT)
+ lxs_err("%s: %s",
+ gettext("Failed to remove /.autofsck"),
+ strerror(err));
+ }
+}
+
+/*
+ * Extract any lx-supported attributes from the zone configuration file.
+ */
+static void
+lxs_getattrs(zone_dochandle_t zdh, char **krelease)
+{
+ struct zone_attrtab attrtab;
+ int err;
+
+ /* initialize the attribute iterator */
+ if (zonecfg_setattrent(zdh) != Z_OK) {
+ zonecfg_fini_handle(zdh);
+ lxs_err(gettext("error accessing zone configuration"));
+ }
+
+ *krelease = (char *)malloc(LX_KERN_RELEASE_MAX);
+ if (*krelease == NULL)
+ lxs_err(gettext("out of memory"));
+
+ bzero(*krelease, LX_KERN_RELEASE_MAX);
+ while ((err = zonecfg_getattrent(zdh, &attrtab)) == Z_OK) {
+ if ((strcmp(attrtab.zone_attr_name, "kernel-version") == 0) &&
+ (zonecfg_get_attr_string(&attrtab, *krelease,
+ LX_KERN_RELEASE_MAX) != Z_OK))
+ lxs_err(gettext("invalid type for zone attribute: %s"),
+ attrtab.zone_attr_name);
+ }
+
+ if (strlen(*krelease) == 0) {
+ free(*krelease);
+ *krelease = NULL;
+ }
+
+ /* some kind of error while looking up attributes */
+ if (err != Z_NO_ENTRY)
+ lxs_err(gettext("error accessing zone configuration"));
+}
+
+static int
+lxs_boot()
+{
+ zoneid_t zoneid;
+ zone_dochandle_t zdh;
+ char *krelease;
+
+ lxs_make_initctl();
+ lxs_remove_autofsck();
+
+ if ((zdh = zonecfg_init_handle()) == NULL)
+ lxs_err(gettext("unable to initialize zone handle"));
+
+ if (zonecfg_get_handle((char *)zonename, zdh) != Z_OK) {
+ zonecfg_fini_handle(zdh);
+ lxs_err(gettext("unable to load zone configuration"));
+ }
+
+ /* Extract any relevant attributes from the config file. */
+ lxs_getattrs(zdh, &krelease);
+ zonecfg_fini_handle(zdh);
+
+ /*
+ * Let the kernel know whether or not this zone's init process
+ * should be automatically restarted on its death.
+ */
+ if ((zoneid = getzoneidbyname(zonename)) < 0)
+ lxs_err(gettext("unable to get zoneid"));
+
+ if (krelease != NULL) {
+ /* Backward compatability with incomplete version attr */
+ if (strcmp(krelease, "2.4") == 0) {
+ krelease = "2.4.21";
+ } else if (strcmp(krelease, "2.6") == 0) {
+ krelease = "2.6.18";
+ }
+
+ if (zone_setattr(zoneid, LX_ATTR_KERN_RELEASE, krelease,
+ strlen(krelease)) < 0)
+ lxs_err(gettext("unable to set kernel version"));
+ }
+
+ return (0);
+}
+
+static int
+lxs_halt()
+{
+ return (0);
+}
+
+static int
+lxs_verify(char *xmlfile)
+{
+ zone_dochandle_t handle;
+ char *krelease;
+ char hostidp[HW_HOSTID_LEN];
+ zone_iptype_t iptype;
+
+ if ((handle = zonecfg_init_handle()) == NULL)
+ lxs_err(gettext("internal libzonecfg.so.1 error"), 0);
+
+ if (zonecfg_get_xml_handle(xmlfile, handle) != Z_OK) {
+ zonecfg_fini_handle(handle);
+ lxs_err(gettext("zonecfg provided an invalid XML file"));
+ }
+
+ /*
+ * Check to see whether the zone has hostid emulation enabled.
+ */
+ if (zonecfg_get_hostid(handle, hostidp, sizeof (hostidp)) == Z_OK) {
+ zonecfg_fini_handle(handle);
+ lxs_err(gettext("lx zones do not support hostid emulation"));
+ }
+
+ /*
+ * Only exclusive stack is supported.
+ */
+ if (zonecfg_get_iptype(handle, &iptype) != Z_OK ||
+ iptype != ZS_EXCLUSIVE) {
+ zonecfg_fini_handle(handle);
+ lxs_err(gettext("lx zones do not support shared IP stacks"));
+ }
+
+ /* Extract any relevant attributes from the config file. */
+ lxs_getattrs(handle, &krelease);
+ zonecfg_fini_handle(handle);
+
+ if (krelease) {
+ char *pdot, *ep;
+ long major_ver;
+
+ pdot = strchr(krelease, '.');
+ if (pdot != NULL)
+ *pdot = '\0';
+ errno = 0;
+ major_ver = strtol(krelease, &ep, 10);
+ if (major_ver < 2 || errno != 0 || *ep != '\0')
+ lxs_err(gettext("invalid value for zone attribute: %s"),
+ "kernel-version");
+ if (pdot != NULL)
+ *pdot = '.';
+
+ }
+ return (0);
+}
+
+static void
+usage()
+{
+
+ (void) fprintf(stderr,
+ gettext("usage:\t%s boot <zoneroot> <zonename>\n"), bname);
+ (void) fprintf(stderr,
+ gettext(" \t%s halt <zoneroot> <zonename>\n"), bname);
+ (void) fprintf(stderr,
+ gettext(" \t%s verify <xml file>\n\n"), bname);
+ exit(1);
+}
+
+int
+main(int argc, char *argv[])
+{
+ (void) setlocale(LC_ALL, "");
+ (void) textdomain(TEXT_DOMAIN);
+
+ bname = basename(argv[0]);
+
+ if (argc < 3)
+ usage();
+
+ if (strcmp(argv[1], "boot") == 0) {
+ if (argc != 4)
+ lxs_err(gettext("usage: %s %s <zoneroot> <zonename>"),
+ bname, argv[1]);
+ zoneroot = argv[2];
+ zonename = argv[3];
+ return (lxs_boot());
+ }
+
+ if (strcmp(argv[1], "halt") == 0) {
+ if (argc != 4)
+ lxs_err(gettext("usage: %s %s <zoneroot> <zonename>"),
+ bname, argv[1]);
+ zoneroot = argv[2];
+ zonename = argv[3];
+ return (lxs_halt());
+ }
+
+ if (strcmp(argv[1], "verify") == 0) {
+ if (argc != 3)
+ lxs_err(gettext("usage: %s verify <xml file>"),
+ bname);
+ return (lxs_verify(argv[2]));
+ }
+
+ usage();
+ /*NOTREACHED*/
+}
diff --git a/usr/src/lib/brand/lx/lx_vdso/Makefile b/usr/src/lib/brand/lx/lx_vdso/Makefile
new file mode 100644
index 0000000000..56dd0a7a3c
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_vdso/Makefile
@@ -0,0 +1,39 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2016 Joyent, Inc.
+#
+
+include ../../../Makefile.lib
+
+SUBDIRS = tools $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET= all
+clean := TARGET= clean
+clobber := TARGET= clobber
+install := TARGET= install
+lint := TARGET= lint
+
+.KEEP_STATE:
+
+all install clean clobber: $(SUBDIRS)
+
+lint: $(LINT_SUBDIRS)
+
+$(MACH): tools
+$(MACH64): tools
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/lib/brand/lx/lx_vdso/Makefile.com b/usr/src/lib/brand/lx/lx_vdso/Makefile.com
new file mode 100644
index 0000000000..0cdb1aaf70
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_vdso/Makefile.com
@@ -0,0 +1,85 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2016 Joyent, Inc.
+#
+
+LIBRARY = lx_vdso.a
+VERS = .1
+
+include $(SRC)/lib/commpage/Makefile.shared.com
+
+COBJS = vdso_main.o vdso_subr.o
+OBJECTS = $(COBJS) $(COMMPAGE_OBJS)
+
+include ../../../../Makefile.lib
+include ../../Makefile.lx
+
+#
+# Since our name doesn't start with "lib", Makefile.lib incorrectly
+# calculates LIBNAME. Therefore, we set it here.
+#
+LIBNAME = lx_vdso
+
+MAPFILES = ../common/mapfile-vers
+MAPOPTS = $(MAPFILES:%=-M%)
+
+ASOBJS = vdso_subr.o
+COBJS = vdso_main.o
+OBJECTS = $(ASOBJS) $(COBJS) $(COMMPAGE_OBJS)
+
+SRCDIR = ../common
+
+ASSRCS = $(ASOBJS:%.o=$(ISASRCDIR)/%.s)
+CSRCS = $(COBJS:%.o=$(SRCDIR)/%.c)
+SRCS = $(ASSRCS) $(CSRCS)
+
+LIBS = $(DYNLIB)
+DYNFLAGS += $(DYNFLAGS_$(CLASS))
+DYNFLAGS += $(MAPOPTS)
+LDLIBS +=
+ASFLAGS = -P $(ASFLAGS_$(CURTYPE)) -D_ASM
+
+
+LIBS = $(DYNLIB)
+
+CLEANFILES = $(DYNLIB)
+ROOTLIBDIR = $(ROOT)/usr/lib/brand/lx
+ROOTLIBDIR64 = $(ROOT)/usr/lib/brand/lx/$(MACH64)
+
+VDSO_TOOL = ../tools/vdso_tool
+
+.KEEP_STATE:
+
+#
+# While $(VDSO_TOOL) performs most of the transformations required to
+# construct a correct VDSO object, we still make use of $(ELFEDIT). To
+# remove the $(ELFEDIT) requirement would mean shouldering the burden of
+# becoming a link-editor; this dark lore is best left to the linker aliens.
+#
+all: $(LIBS)
+ $(ELFEDIT) -e "dyn:value -add VERSYM $$($(ELFEDIT) \
+ -e 'shdr:dump .SUNW_versym' $(DYNLIB) | \
+ $(AWK) '{ if ($$1 == "sh_addr:") { print $$2 } }')" $(DYNLIB)
+ $(VDSO_TOOL) -f $(DYNLIB)
+
+lint: $(LINTLIB) lintcheck
+
+include ../../../../Makefile.targ
+include $(SRC)/lib/commpage/Makefile.shared.targ
+
+pics/%.o: $(ISASRCDIR)/%.s
+ $(COMPILE.s) -o $@ $<
+ $(POST_PROCESS_O)
+
+pics/vdso_main.o := CPPFLAGS += $(COMMPAGE_CPPFLAGS)
+pics/vdso_subr.o := ASFLAGS += -I$(SRC)/uts/common/brand/lx
diff --git a/usr/src/lib/brand/lx/lx_vdso/amd64/Makefile b/usr/src/lib/brand/lx/lx_vdso/amd64/Makefile
new file mode 100644
index 0000000000..f1c17dcd91
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_vdso/amd64/Makefile
@@ -0,0 +1,39 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2016 Joyent, Inc.
+#
+
+ISASRCDIR=.
+TARGET_ARCH=$(MACH64)
+
+include ../Makefile.com
+include $(SRC)/lib/Makefile.lib.64
+
+ASFLAGS += -D__$(MACH64)
+
+SONAME = linux-vdso.so.1
+
+#
+# You might ask, why aren't we overriding BUILD.SO in Makefile.com.
+# That's a sad story. The answer is that Makefile.lib.64 includes
+# Makefile.master.64 which redefines BUILD.SO, leaving us in an
+# unfortunate jumble. Therefore we have to redefine it in the
+# lower-level Makefile.
+#
+BUILD.SO = $(LD) -o $@ $(GSHARED) $(DYNFLAGS) $(PICS) $(LDLIBS)
+
+ASSYMDEP_OBJS = lx_vdso.o
+
+CLOBBERFILES = $(ROOTLIBDIR64)/$(DYNLIB) $(ROOTLIBDIR64)/$(LINTLIB)
+
+install: all $(ROOTLIBS64)
diff --git a/usr/src/lib/brand/lx/lx_vdso/amd64/vdso_subr.s b/usr/src/lib/brand/lx/lx_vdso/amd64/vdso_subr.s
new file mode 100644
index 0000000000..bf066600aa
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_vdso/amd64/vdso_subr.s
@@ -0,0 +1,68 @@
+/*
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+
+#include <sys/asm_linkage.h>
+#include <sys/lx_syscalls.h>
+
+
+#if defined(lint)
+
+comm_page_t *
+__vdso_find_commpage()
+{}
+
+long
+__vdso_sys_clock_gettime(uint_t clock_id, timespec_t *tp)
+{}
+
+int
+__vdso_sys_gettimeofday(timespec_t *tp, struct lx_timezone *tz)
+{}
+
+time_t
+__vdso_sys_time(timespec_t *tp)
+{}
+
+#else /* lint */
+
+ ENTRY_NP(__vdso_find_commpage)
+ leaq 0x0(%rip), %rax
+ andq $LX_VDSO_ADDR_MASK, %rax
+ addq $LX_VDSO_SIZE, %rax
+ ret
+ SET_SIZE(__vdso_find_commpage)
+
+ ENTRY_NP(__vdso_sys_clock_gettime)
+ movl $LX_SYS_clock_gettime, %eax
+ syscall
+ ret
+ SET_SIZE(__vdso_sys_clock_gettime)
+
+ ENTRY_NP(__vdso_sys_gettimeofday)
+ movl $LX_SYS_gettimeofday, %eax
+ syscall
+ ret
+ SET_SIZE(__vdso_sys_gettimeofday)
+
+ ENTRY_NP(__vdso_sys_time)
+ movl $LX_SYS_time, %eax
+ syscall
+ ret
+ SET_SIZE(__vdso_sys_time)
+
+#endif /* lint */
diff --git a/usr/src/lib/brand/lx/lx_vdso/common/mapfile-vers b/usr/src/lib/brand/lx/lx_vdso/common/mapfile-vers
new file mode 100644
index 0000000000..11690ce7d6
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_vdso/common/mapfile-vers
@@ -0,0 +1,59 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION LINUX_2.6 {
+ global:
+ __vdso_gettimeofday;
+ __vdso_clock_gettime;
+ __vdso_getcpu;
+ __vdso_time;
+ local:
+ *;
+};
+
+#
+# The vDSO module in GNU/Linux must only have a single PT_LOAD section.
+# Further, we should not have any data sections at all. Therefore, we go
+# through and explicitly disable several of the writeable sections that
+# might commonly show up.
+#
+LOAD_SEGMENT data {
+ disable;
+};
+
+LOAD_SEGMENT ldata {
+ disable;
+};
+
+LOAD_SEGMENT bss {
+ disable;
+};
+
diff --git a/usr/src/lib/brand/lx/lx_vdso/common/vdso_main.c b/usr/src/lib/brand/lx/lx_vdso/common/vdso_main.c
new file mode 100644
index 0000000000..2fe7adffc8
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_vdso/common/vdso_main.c
@@ -0,0 +1,159 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <cp_defs.h>
+
+
+struct lx_timezone {
+ int tz_minuteswest; /* minutes W of Greenwich */
+ int tz_dsttime; /* type of dst correction */
+};
+
+extern comm_page_t *__vdso_find_commpage();
+extern int __vdso_sys_gettimeofday(timespec_t *, struct lx_timezone *);
+extern time_t __vdso_sys_time(time_t *);
+extern long __vdso_sys_clock_gettime(uint_t, timespec_t *);
+
+#define LX_CLOCK_REALTIME 0 /* CLOCK_REALTIME */
+#define LX_CLOCK_MONOTONIC 1 /* CLOCK_HIGHRES */
+#define LX_CLOCK_PROCESS_CPUTIME_ID 2 /* Emulated */
+#define LX_CLOCK_THREAD_CPUTIME_ID 3 /* Emulated */
+#define LX_CLOCK_MONOTONIC_RAW 4 /* CLOCK_HIGHRES */
+#define LX_CLOCK_REALTIME_COARSE 5 /* CLOCK_REALTIME */
+#define LX_CLOCK_MONOTONIC_COARSE 6 /* CLOCK_HIGHRES */
+
+
+void
+__hrt2ts(hrtime_t hrt, timespec_t *tsp)
+{
+ uint32_t sec, nsec, tmp;
+
+ tmp = (uint32_t)(hrt >> 30);
+ sec = tmp - (tmp >> 2);
+ sec = tmp - (sec >> 5);
+ sec = tmp + (sec >> 1);
+ sec = tmp - (sec >> 6) + 7;
+ sec = tmp - (sec >> 3);
+ sec = tmp + (sec >> 1);
+ sec = tmp + (sec >> 3);
+ sec = tmp + (sec >> 4);
+ tmp = (sec << 7) - sec - sec - sec;
+ tmp = (tmp << 7) - tmp - tmp - tmp;
+ tmp = (tmp << 7) - tmp - tmp - tmp;
+ nsec = (uint32_t)hrt - (tmp << 9);
+ while (nsec >= NANOSEC) {
+ nsec -= NANOSEC;
+ sec++;
+ }
+ tsp->tv_sec = (time_t)sec;
+ tsp->tv_nsec = nsec;
+}
+
+int
+__vdso_gettimeofday(timespec_t *tp, struct lx_timezone *tz)
+{
+ comm_page_t *cp = __vdso_find_commpage();
+
+ if (__cp_can_gettime(cp) != 0) {
+ return (__vdso_sys_gettimeofday(tp, tz));
+ }
+
+ if (tp != NULL) {
+ long usec, nsec;
+
+ __cp_clock_gettime_realtime(cp, tp);
+
+ nsec = tp->tv_nsec;
+ usec = nsec + (nsec >> 2);
+ usec = nsec + (usec >> 1);
+ usec = nsec + (usec >> 2);
+ usec = nsec + (usec >> 4);
+ usec = nsec - (usec >> 3);
+ usec = nsec + (usec >> 2);
+ usec = nsec + (usec >> 3);
+ usec = nsec + (usec >> 4);
+ usec = nsec + (usec >> 1);
+ usec = nsec + (usec >> 6);
+ usec = usec >> 10;
+ tp->tv_nsec = usec;
+ }
+
+ if (tz != NULL) {
+ tz->tz_minuteswest = 0;
+ tz->tz_dsttime = 0;
+ }
+
+ return (0);
+}
+
+time_t
+__vdso_time(time_t *tp)
+{
+ comm_page_t *cp = __vdso_find_commpage();
+ timespec_t ts;
+
+ if (__cp_can_gettime(cp) != 0) {
+ return (__vdso_sys_time(tp));
+ }
+
+ __cp_clock_gettime_realtime(cp, &ts);
+ if (tp != NULL) {
+ *tp = ts.tv_sec;
+ }
+ return (ts.tv_sec);
+}
+
+long
+__vdso_clock_gettime(uint_t clock_id, timespec_t *tp)
+{
+ comm_page_t *cp = __vdso_find_commpage();
+
+ if (__cp_can_gettime(cp) != 0) {
+ return (__vdso_sys_clock_gettime(clock_id, tp));
+ }
+
+ switch (clock_id) {
+ case LX_CLOCK_REALTIME:
+ case LX_CLOCK_REALTIME_COARSE:
+ __cp_clock_gettime_realtime(cp, tp);
+ return (0);
+
+ case LX_CLOCK_MONOTONIC:
+ case LX_CLOCK_MONOTONIC_RAW:
+ case LX_CLOCK_MONOTONIC_COARSE:
+ __hrt2ts(__cp_gethrtime(cp), tp);
+ return (0);
+
+ case LX_CLOCK_PROCESS_CPUTIME_ID:
+ case LX_CLOCK_THREAD_CPUTIME_ID:
+ default:
+ break;
+ }
+ return (__vdso_sys_clock_gettime(clock_id, tp));
+}
+
+long
+__vdso_getcpu(uint_t *cpu, uint_t *node, void *tcache)
+{
+ comm_page_t *cp = __vdso_find_commpage();
+
+ if (cpu != NULL) {
+ *cpu = __cp_getcpu(cp);
+ }
+ if (node != NULL) {
+ *node = 0;
+ }
+ return (0);
+}
diff --git a/usr/src/lib/brand/lx/lx_vdso/i386/Makefile b/usr/src/lib/brand/lx/lx_vdso/i386/Makefile
new file mode 100644
index 0000000000..7f9a6a13e6
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_vdso/i386/Makefile
@@ -0,0 +1,43 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2016 Joyent, Inc.
+#
+
+ISASRCDIR=.
+TARGET_ARCH=$(MACH)
+
+include ../Makefile.com
+
+ASFLAGS += -D__$(MACH)
+
+SONAME = linux-gate.so.1
+
+#
+# You might ask, why aren't we overriding BUILD.SO in Makefile.com.
+# See the amd64 Makefile for more answers
+#
+BUILD.SO = $(LD) -o $@ $(GSHARED) $(DYNFLAGS) $(PICS) $(LDLIBS)
+
+ASSYMDEP_OBJS = lx_vdso.o
+
+CLOBBERFILES = $(ROOTLIBDIR)/$(DYNLIB) $(ROOTLIBDIR)/$(LINTLIB)
+
+# Set the object entry point for __vsyscall-ers
+entryfix: $(DYNLIB)
+ $(ELFEDIT) -e "ehdr:e_entry \
+ $$($(ELFEDIT) -re 'sym:st_value -osimple __vsyscall' $(DYNLIB))" \
+ $(DYNLIB)
+
+all: entryfix
+
+install: all $(ROOTLIBS)
diff --git a/usr/src/lib/brand/lx/lx_vdso/i386/vdso_subr.s b/usr/src/lib/brand/lx/lx_vdso/i386/vdso_subr.s
new file mode 100644
index 0000000000..ed7be8bb23
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_vdso/i386/vdso_subr.s
@@ -0,0 +1,92 @@
+/*
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+
+#include <sys/asm_linkage.h>
+#include <sys/lx_syscalls.h>
+
+
+#if defined(lint)
+
+comm_page_t *
+__vdso_find_commpage()
+{}
+
+long
+__vdso_sys_clock_gettime(uint_t clock_id, timespec_t *tp)
+{}
+
+int
+__vdso_sys_gettimeofday(timespec_t *tp, struct lx_timezone *tz)
+{}
+
+time_t
+__vdso_sys_time(timespec_t *tp)
+{}
+
+#else /* lint */
+
+ ENTRY_NP(__vdso_find_commpage)
+ call 1f
+1: popl %eax
+ andl $LX_VDSO_ADDR_MASK, %eax
+ addl $LX_VDSO_SIZE, %eax
+ ret
+ SET_SIZE(__vdso_find_commpage)
+
+ ENTRY_NP(__vdso_sys_clock_gettime)
+ movl $LX_SYS_clock_gettime, %eax
+ movl 0x4(%esp), %ebx
+ movl 0x8(%esp), %ecx
+ int $0x80
+ ret
+ SET_SIZE(__vdso_sys_clock_gettime)
+
+ ENTRY_NP(__vdso_sys_gettimeofday)
+ movl $LX_SYS_gettimeofday, %eax
+ movl 0x4(%esp), %ebx
+ movl 0x8(%esp), %ecx
+ int $0x80
+ ret
+ SET_SIZE(__vdso_sys_gettimeofday)
+
+ ENTRY_NP(__vdso_sys_time)
+ movl $LX_SYS_time, %eax
+ movl 0x4(%esp), %ebx
+ int $0x80
+ ret
+ SET_SIZE(__vdso_sys_time)
+
+ ENTRY_NP(__vsyscall)
+ /*
+ * On 32-bit Linux, the VDSO entry point (specified by e_entry)
+ * provides a potentially accelerated means to vector into the kernel.
+ * Normally this means using 'sysenter' with a Linux-custom calling
+ * convention so programs expecting int80 behavior are not required to
+ * change how arguments are passed.
+ *
+ * The SunOS sysenter entry point does _not_ tolerate such a departure
+ * from convention, so if this function is updated to use sysenter, it
+ * must properly marshal arguments onto the stack from the int80 style.
+ * Such an enhancement can only occur once sysenter receives the same
+ * branding hooks as syscall and int80.
+ */
+ int $0x80
+ ret
+ SET_SIZE(__vsyscall)
+
+#endif /* lint */
diff --git a/usr/src/lib/brand/lx/lx_vdso/tools/Makefile b/usr/src/lib/brand/lx/lx_vdso/tools/Makefile
new file mode 100644
index 0000000000..7907aa67c4
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_vdso/tools/Makefile
@@ -0,0 +1,42 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+PROG = vdso_tool
+
+include ../../../../../cmd/Makefile.cmd
+
+OBJS = vdso_tool.o
+
+CLOBBERFILES += $(PROG)
+
+NATIVECC_CFLAGS += $(CFLAGS) $(CCVERBOSE)
+NATIVECC_LDLIBS += -lelf
+
+.KEEP_STATE:
+
+all: $(PROG)
+
+install: all
+
+lint: lint_PROG
+
+clean:
+ $(RM) $(OBJS)
+
+$(PROG): $(OBJS)
+ $(NATIVECC) $(NATIVECC_CFLAGS) $(NATIVECC_LDLIBS) $(OBJS) -o $@
+ $(POST_PROCESS)
+
+include ../../../../../cmd/Makefile.targ
diff --git a/usr/src/lib/brand/lx/lx_vdso/tools/vdso_tool.c b/usr/src/lib/brand/lx/lx_vdso/tools/vdso_tool.c
new file mode 100644
index 0000000000..0e1d99f9da
--- /dev/null
+++ b/usr/src/lib/brand/lx/lx_vdso/tools/vdso_tool.c
@@ -0,0 +1,388 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * vdso_tool: a build-time tool for adjusting properties of the "lx_vdso.so.1"
+ * object we build for VDSO emulation in the LX brand.
+ *
+ * This tool ensures that the shared object contains only one loadable program
+ * header (PT_LOAD), and extends the size of that program header to induce the
+ * loading of all sections into memory. It also sets a few attributes in the
+ * ELF header.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <strings.h>
+#include <fcntl.h>
+#include <err.h>
+#include <errno.h>
+#include <libelf.h>
+#include <gelf.h>
+
+#define PROG "vdso_tool"
+
+typedef enum vdso_flags {
+ VDSO_UNLINK = 0x0001,
+ VDSO_UPDATE = 0x0002
+} vdso_flags_t;
+
+typedef struct vdso {
+ int v_fd;
+ char *v_path;
+ Elf *v_elf;
+ vdso_flags_t v_flags;
+ int v_ptload_phdr;
+ Elf64_Off v_max_offset;
+} vdso_t;
+
+static int
+open_vdso(vdso_t **vp, char *path)
+{
+ vdso_t *v;
+
+ if ((v = calloc(1, sizeof (vdso_t))) == NULL ||
+ (v->v_path = strdup(path)) == NULL) {
+ err(1, "could not allocate memory");
+ }
+ v->v_ptload_phdr = -1;
+ v->v_fd = -1;
+ *vp = v;
+
+ /*
+ * Open shared object file.
+ */
+ if ((v->v_fd = open(v->v_path, O_RDWR)) == -1) {
+ (void) fprintf(stderr, "could not open: %s: %s\n", v->v_path,
+ strerror(errno));
+ return (-1);
+ }
+
+ /*
+ * Attach libelf.
+ */
+ if ((v->v_elf = elf_begin(v->v_fd, ELF_C_RDWR, NULL)) == NULL) {
+ (void) fprintf(stderr, "could not attach libelf: %s\n",
+ elf_errmsg(-1));
+ return (-1);
+ }
+
+ if (elf_kind(v->v_elf) != ELF_K_ELF) {
+ (void) fprintf(stderr, "wrong elf type\n");
+ return (-1);
+ }
+
+ return (0);
+}
+
+static int
+close_vdso(vdso_t *v)
+{
+ int rval = 0;
+
+ if (v == NULL) {
+ return (0);
+ }
+
+ if (v->v_elf != NULL) {
+ /*
+ * If we want to write to the file, do so now.
+ */
+ if (v->v_flags & VDSO_UPDATE) {
+ if (elf_update(v->v_elf, ELF_C_WRITE) == -1) {
+ (void) fprintf(stderr, "ERROR: elf_update "
+ "failed: %s\n", elf_errmsg(-1));
+ v->v_flags |= VDSO_UNLINK;
+ rval = -1;
+ }
+ }
+
+ /*
+ * Close the libelf handle for this file.
+ */
+ if (elf_end(v->v_elf) == -1) {
+ (void) fprintf(stderr, "ERROR: elf_end failed: %s\n",
+ elf_errmsg(-1));
+ v->v_flags |= VDSO_UNLINK;
+ rval = -1;
+ }
+ }
+
+ if (v->v_fd > 0) {
+ (void) close(v->v_fd);
+ }
+
+ if (v->v_flags & VDSO_UNLINK) {
+ (void) fprintf(stderr, "unlinking file: %s\n", v->v_path);
+ if (unlink(v->v_path) != 0) {
+ (void) fprintf(stderr, "unlink failed: %s\n",
+ strerror(errno));
+ rval = -1;
+ }
+ }
+
+ free(v->v_path);
+ free(v);
+
+ return (rval);
+}
+
+static int
+adjust_elf_ehdr(vdso_t *v)
+{
+ GElf_Ehdr ehdr;
+ boolean_t dirty = B_FALSE;
+
+ if (gelf_getehdr(v->v_elf, &ehdr) == NULL) {
+ (void) fprintf(stderr, "could not get ehdr: %s\n",
+ elf_errmsg(-1));
+ goto errout;
+ }
+
+ if (ehdr.e_ident[EI_OSABI] != ELFOSABI_NONE) {
+ (void) fprintf(stdout, "set EI_OSABI = ELFOSABI_NONE\n");
+ ehdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
+ dirty = B_TRUE;
+ }
+
+ if (ehdr.e_ident[EI_ABIVERSION] != 0) {
+ (void) fprintf(stdout, "set EI_ABIVERSION = 0\n");
+ ehdr.e_ident[EI_ABIVERSION] = 0;
+ dirty = B_TRUE;
+ }
+
+ if (dirty && gelf_update_ehdr(v->v_elf, &ehdr) == 0) {
+ (void) fprintf(stderr, "could not update ehdr: %s\n",
+ elf_errmsg(-1));
+ goto errout;
+ }
+
+ v->v_flags |= VDSO_UPDATE;
+ return (0);
+
+errout:
+ v->v_flags |= VDSO_UNLINK;
+ return (-1);
+}
+
+static int
+find_pt_load_phdr(vdso_t *v)
+{
+ size_t nphdr, nloadable = 0;
+ int i;
+
+ if (elf_getphdrnum(v->v_elf, &nphdr) != 0) {
+ (void) fprintf(stderr, "could not get phdr count: %s\n",
+ elf_errmsg(-1));
+ goto errout;
+ }
+ (void) fprintf(stdout, "phdr count: %d\n", nphdr);
+
+ for (i = 0; i < nphdr; i++) {
+ GElf_Phdr phdr;
+
+ if (gelf_getphdr(v->v_elf, i, &phdr) == NULL) {
+ (void) fprintf(stderr, "could not get phdr[%d] count: "
+ "%s\n", i, elf_errmsg(-1));
+ goto errout;
+ }
+
+ if (phdr.p_type == PT_LOAD) {
+ if (nloadable++ != 0) {
+ (void) fprintf(stderr, "multiple PT_LOAD "
+ "phdrs\n");
+ goto errout;
+ }
+
+ (void) fprintf(stdout, "PT_LOAD header is phdr[%d]\n",
+ i);
+ v->v_ptload_phdr = i;
+
+ if (phdr.p_filesz != phdr.p_memsz) {
+ (void) fprintf(stderr, "mismatched filesz "
+ "(%llx) and memsz (%llx)\n", phdr.p_filesz,
+ phdr.p_memsz);
+ goto errout;
+ }
+
+ if (phdr.p_filesz == 0) {
+ (void) fprintf(stderr, "filesz was zero\n");
+ goto errout;
+ }
+ }
+ }
+
+ return (0);
+
+errout:
+ v->v_flags |= VDSO_UNLINK;
+ return (-1);
+}
+
+static int
+find_maximum_offset(vdso_t *v)
+{
+ size_t nshdr;
+ int i;
+
+ if (elf_getshdrnum(v->v_elf, &nshdr) != 0) {
+ (void) fprintf(stderr, "could not get shdr count: %s\n",
+ elf_errmsg(-1));
+ v->v_flags |= VDSO_UNLINK;
+ return (-1);
+ }
+ (void) fprintf(stdout, "shdr count: %d\n", nshdr);
+
+ for (i = 0; i < nshdr; i++) {
+ Elf_Scn *scn = elf_getscn(v->v_elf, i);
+ GElf_Shdr shdr;
+ Elf64_Off end;
+
+ if (gelf_getshdr(scn, &shdr) == NULL) {
+ (void) fprintf(stderr, "could not get shdr[%d] "
+ "count: %s\n", i, elf_errmsg(-1));
+ goto errout;
+ }
+
+ end = shdr.sh_offset + shdr.sh_size;
+
+ if (end > v->v_max_offset) {
+ v->v_max_offset = end;
+ }
+ }
+
+ (void) fprintf(stdout, "maximum offset: %llx\n", v->v_max_offset);
+
+ return (0);
+
+errout:
+ v->v_flags |= VDSO_UNLINK;
+ return (-1);
+}
+
+static int
+update_pt_load_size(vdso_t *v)
+{
+ GElf_Phdr phdr;
+
+ if (gelf_getphdr(v->v_elf, v->v_ptload_phdr, &phdr) == NULL) {
+ (void) fprintf(stderr, "could not get phdr[%d] count: %s\n",
+ v->v_ptload_phdr, elf_errmsg(-1));
+ goto errout;
+ }
+
+ (void) fprintf(stdout, "PT_LOAD size is currently %llx\n",
+ phdr.p_filesz);
+ if (phdr.p_filesz < v->v_max_offset) {
+ (void) fprintf(stdout, "extending PT_LOAD size from %llx "
+ "to %llx\n", phdr.p_filesz, v->v_max_offset);
+
+ phdr.p_memsz = phdr.p_filesz = v->v_max_offset;
+
+ if (gelf_update_phdr(v->v_elf, v->v_ptload_phdr, &phdr) ==
+ NULL) {
+ (void) fprintf(stderr, "could not update PT_LOAD "
+ "phdr: %s", elf_errmsg(-1));
+ goto errout;
+ }
+
+ v->v_flags |= VDSO_UPDATE;
+ }
+
+ return (0);
+
+errout:
+ v->v_flags |= VDSO_UNLINK;
+ return (-1);
+}
+
+int
+main(int argc, char **argv)
+{
+ vdso_t *v;
+ char *filen = NULL;
+ int errflg = 0;
+ int c;
+ int status = 0;
+ boolean_t do_update = B_TRUE;
+
+ while ((c = getopt(argc, argv, ":nf:")) != -1) {
+ switch (c) {
+ case 'n':
+ do_update = B_FALSE;
+ break;
+ case 'f':
+ filen = optarg;
+ break;
+ case ':':
+ (void) fprintf(stderr, "option -%c requires an "
+ "operand\n", optopt);
+ errflg++;
+ break;
+ case '?':
+ (void) fprintf(stderr, "unrecognised option: -%c\n",
+ optopt);
+ errflg++;
+ break;
+ }
+ }
+
+ if (errflg != 0 || optind != argc || filen == NULL) {
+ (void) fprintf(stderr, "usage: %s -f <vdso.so>\n", PROG);
+ return (1);
+ }
+
+ (void) fprintf(stdout, "vdso file: %s\n", filen);
+
+ if (elf_version(EV_CURRENT) == EV_NONE) {
+ (void) fprintf(stderr, "libelf mismatch: %s\n", elf_errmsg(-1));
+ return (2);
+ }
+
+ status = 3;
+ if (open_vdso(&v, filen) == -1) {
+ goto out;
+ }
+
+ status++;
+ if (adjust_elf_ehdr(v) == -1) {
+ goto out;
+ }
+
+ status++;
+ if (find_pt_load_phdr(v) == -1) {
+ goto out;
+ }
+
+ status++;
+ if (find_maximum_offset(v) == -1) {
+ goto out;
+ }
+
+ status++;
+ if (do_update && update_pt_load_size(v) == -1) {
+ goto out;
+ }
+
+out:
+ status++;
+ if (close_vdso(v) == 0) {
+ status = 0;
+ }
+
+ return (status);
+}
diff --git a/usr/src/lib/brand/lx/netfiles/Makefile b/usr/src/lib/brand/lx/netfiles/Makefile
new file mode 100644
index 0000000000..1d15d69850
--- /dev/null
+++ b/usr/src/lib/brand/lx/netfiles/Makefile
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+# Copyright 2015 Joyent, Inc.
+#
+
+TXTS = etc_default_nfs
+NFS_DFL = ../../../../cmd/fs.d/nfs/etc/nfs.dfl
+
+all: $(TXTS)
+
+include ../Makefile.lx
+
+lint:
+
+install: $(ROOTTXTS)
+
+clean:
+ -$(RM) etc_default_nfs
+
+clobber: clean
+ -$(RM) $(ROOTXMLDOCS) $(ROOTTXTS)
+
+etc_default_nfs: $(NFS_DFL)
+ $(RM) $@
+ $(CP) $(NFS_DFL) $@
diff --git a/usr/src/lib/brand/lx/testing/Makefile b/usr/src/lib/brand/lx/testing/Makefile
new file mode 100644
index 0000000000..ce5c2a47ff
--- /dev/null
+++ b/usr/src/lib/brand/lx/testing/Makefile
@@ -0,0 +1,36 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc. All rights reserved.
+#
+
+TXTS = ltp_skiplist ltp_tests
+
+all:
+
+include $(SRC)/cmd/Makefile.cmd
+include ../Makefile.lx
+
+all := TARGET= all
+install := TARGET= install
+clobber := TARGET= clobber
+
+install: $(ROOTTXTS)
+
+_msg:
+
+lint:
+
+clean:
+
+clobber:
+ -$(RM) $(ROOTTXTS)
diff --git a/usr/src/lib/brand/lx/testing/Readme_ltp b/usr/src/lib/brand/lx/testing/Readme_ltp
new file mode 100644
index 0000000000..0cadd130f5
--- /dev/null
+++ b/usr/src/lib/brand/lx/testing/Readme_ltp
@@ -0,0 +1,291 @@
+The Linux Test Project (LTP) provides the basis for testing the lx API
+implementation. The project webpage is at http://linux-test-project.github.io/
+and the source is available on GitHub at
+https://github.com/linux-test-project/ltp.git.
+
+LTP should be built and run from within an lx zone.
+
+To build
+--------
+As root first make sure you have the tools installed:
+ apt-get install build-essential autoconf automake git quota
+
+Additional prerequisites are required activate some tests:
+ apt-get install attr-dev libaio-dev
+
+As a normal user:
+ git clone https://github.com/linux-test-project/ltp.git
+ cd ltp
+ make autotools
+ ./configure
+ make all
+
+As root:
+ make install
+
+The tests can be built in both a zone that has been installed with a 32-bit
+version of Linux and another zone that has been installed with a 64-bit version
+of the same release. When testing the 64-bit zone, a copy of the 32-bit build
+can be run in the 64-bit zone to ensure that 32-bit applications work
+correctly on a 64-bit install.
+
+Running the tests
+-----------------
+The LTP source tree provides detailed documentation on using the test suite, so
+this readme only give a short summary.
+
+Because many of the tests are targetted at kernel functionality which does not
+apply to Illumos, or test capabilities which are not available from within a
+zone, or test system call functionality which has not yet been completed, a
+skip list is used during the test run to bypass tests which are known to fail.
+
+The skip list lives in this directory and is delivered on the system. It is
+available from within the lx zone as /native/usr/lib/brand/lx/ltp_skiplist. As
+new functionality is completed, the skip list should be updated to remove tests
+which now work.
+
+As root:
+ cd /opt/ltp
+ /opt/ltp/runltp -f `cat /native/usr/lib/brand/lx/ltp_tests` \
+ -S /native/usr/lib/brand/lx/ltp_skiplist -p >/tmp/test.log
+
+When the test run has finished, the results will be logged in a date/time
+stamped file under /opt/ltp/results. The summary at the end of the log file
+should show "Total Failures: 0". If not, something is wrong.
+
+Running tests for development
+-----------------------------
+The source for the tests can be found under the testcases directory. The
+largest and most useful set for lx live under testcases/kernel/syscalls.
+
+For development purposes, an individual test (or tests) can be run by listing
+them in a command file, one per line. For example, with a command file named
+~/tcmds, to run the read01 test you setup the file so it looks like this:
+ read01 read01
+
+You can run that specific test as follows:
+ /opt/ltp/runltp -f ~/tcmds -p -l ~/read.log
+
+Test status
+-----------
+This section provides a short summary of the rationale for why tests are being
+skipped.
+
+LTP groups tests into command files (i.e. syscalls, nptl, etc. provided with
+the -f option in the runltp command shown above). A complete list of the groups
+can be seen in LTP source tree under the runtest directory. Some of these
+groups are obviously not applicable when running in an lx zone. The remaining
+groups still need work before they can be run. The groups shown in the runltp
+command above are expected to work when the skip list is used. The 'syscalls'
+command file runs the majority of the actual system call tests which we are
+interested in.
+
+The following table indicates why specific subtests are being skipped. Also
+note that the following tests pass in a 64-bit lx zone, but fail in a zone
+installed with a 32-bit Linux build: mmap15, open12, openat02 and sendfile09.
+
+ Legend:
+ x = never in a zone
+ * = fails on kvm and bare metal too
+ # = emulation not implemented yet
+ - = could enable with a test zone config change
+
+- access06 wants a block device
+x acct01 enables bsd process accounting
+# add_key01
+# add_key02
+x adjtimex01
+x adjtimex02
+x bdflush01
+x cacheflush01
+x chmod03 need PRIV_SYS_CONFIG to set sticky bit on reg file
+- chmod06 needs dev
+x chmod07 need PRIV_SYS_CONFIG to set sticky bit on reg file
+- chown04 needs dev
+- chown04_16 needs dev
+# clone02
+# clone08
+- creat06 wants to mount a ro fs
+x creat07 we don't behave this way for ETXTBSY
+x creat08 sets euid to 'nobody', loses PRIV_FILE_SETID to set sgid
+x execve04 we don't behave this way for ETXTBSY
+# fallocate01
+# fallocate02
+# fallocate03
+x fchmod02 need PRIV_SYS_CONFIG to set sticky bit on reg file
+x fchmod03 need PRIV_SYS_CONFIG to set sticky bit on reg file
+- fchmod06 needs dev
+# fchown04 mounts
+# fchown04_16
+# fcntl06 not supported on linux
+# fcntl06_64
+# fcntl23 leases not implemented
+# fcntl23_64 "
+# fcntl24 "
+# fcntl24_64 "
+# fcntl25 "
+# fcntl25_64 "
+# fcntl26 "
+# fcntl26_64 "
+# fcntl30
+# fcntl30_64
+# fcntl31 setown/getown not impl
+# fcntl31_64
+# fcntl32 F_SETLEASE not impl
+# fcntl32_64
+# fcntl33 F_SETLEASE not impl
+# fcntl33_64
+# fork05 asm into %fs reg
+- fork09 needs a swap cap of ~9GB
+# fork13 decided not to support this
+# fork14 "
+# ftruncate04 need a mnt with mandatory locking
+# ftruncate04_64
+# getdents02 wrong errno on test 4 - perf. impact too high
+# getdents02_64
+# get_mempolicy01
+x getrusage03 we don't fill in the ru_maxrss field
+- getxattr01 need attr/xattr.h at build time
+- getxattr02
+- getxattr03
+# ioctl03 needs /dev/net/tun
+# io_cancel01 libaio stuff not done
+# io_destroy01
+# io_getevents01
+# io_setup01
+# io_submit01
+- inotify03 needs dev
+# fanotify01 don't have fanotify
+# fanotify02
+# fanotify03
+# fanotify04
+# fanotify05
+# keyctl01 no kernel keyring support
+- lchown03 needs to mount ro fs
+- lchown03_16
+- linkat02 needs dev
+- link08 needs dev
+x mem01 crashme test which expects OOM killer to save the day
+- mkdir03 needs dev
+- mkdirat02 needs dev
+x mknod01 makes block and chr devs
+- mknod07 needs dev
+- mknodat02 needs dev
+# mmap13 expects "invalid access" to SIGBUS
+- mount01 needs dev
+- mount02 needs dev
+x mount03 mounts ext2
+- mount04 needs dev
+x mount05 mounts ext2
+x mount06 mounts ext2
+# mq_notify01
+# mq_notify02
+# mq_open01
+# mq_timedreceive01
+# mq_timedsend01
+# mq_unlink01
+x mremap01
+x mremap02
+x mremap03
+x mremap04
+x mremap05
+# msgctl12 uses MSG_STAT
+# msgrcv07 MSG_EXCEPT subtest - not avail.
+x open01 need PRIV_SYS_CONFIG to set sticky bit on reg file
+# open02 expects NOATIME to cause err for unpriv user
+# open10 setgid on sgid subdir behavior
+x open11 makes device
+# ppoll01
+# process_vm_readv01
+# process_vm_readv02
+# process_vm_readv03
+# process_vm_writev01
+# process_vm_writev02
+# prot_hsymlinks /proc/sys/fs/protected_hardlinks
+x ptrace04 not supp on our arch
+# ptrace05 OS-3307
+# read02 checks errno for O_DIRECT
+# readahead01
+# readahead02
+# readdir21 dir perf. issue
+- rename11 needs dev
+- renameat01 needs dev
+- rmdir02 needs dev
+x sched_getparam01 assumes Linux SCHED_OTHER return value
+x sched_getparam02 assumes Linux SCHED_OTHER return value
+# sched_rr_get_interval01
+# sched_rr_get_interval02
+# sched_rr_get_interval03
+x sched_setparam02 tries to set Linux policies
+x sched_setparam03 tries to set Linux policies
+# sched_getscheduler01
+# sched_getscheduler02
+x sched_setscheduler01
+x sched_setaffinity01
+x sched_getaffinity01
+# semctl01 all pass but SEM_STAT - linux specific
+# semop02 last test fails errno - expensive
+# sendfile02 OS-3296
+# sendfile02_64 "
+# sendfile04 "
+# sendfile04_64 "
+# sendfile06 "
+# sendfile06_64 "
+# sendfile07 "
+# sendfile07_64 "
+sendmsg01 OS-3295 - tests actually pass
+x setfsuid04 no real equiv. and only for NFS server
+x setfsuid04_16
+# sgetmask01 obsolete
+# setgroups04_16 expects sig11 for certain err
+# setns01
+# setns02
+# setpgid02 all pass but one, expects pid 0 to be there
+* setregid02 fails on bare metal, expects to lookup group "nobody" which
+* setregid02_16 doesn't exist. it is "nogroup" on ubuntu at least
+# setrlimit01 all pass but one, expects to set proc limit
+x settimeofday01
+# setxattr01
+# setxattr02
+# setxattr03
+# shmget05 OS-3326
+# splice01
+# splice02
+# splice03
+# tee01
+# tee02
+# ssetmask01 obsolete
+x stime01
+x switch01
+# sync_file_range01
+# sysconf01 most pass but see OS-3305
+# sysctl01 the build compiled this out,
+# sysctl03 this syscall is basically obsolete
+# sysctl04 obsolete
+# sysctl05
+# syslog01
+# syslog02
+# syslog03
+# syslog04
+# syslog05
+# syslog06
+# syslog07
+# syslog08
+# syslog09
+# syslog10
+# syslog11
+# syslog12
+# unshare01
+# unshare02
+- umount01 needs dev
+- umount02 needs dev
+- umount03 needs dev
+x ustat01 obsolete call to stat FS
+x ustat02 obsolete call to stat FS
+- utime06 needs dev
+- utimes01 needs dev
+# utimensat01 many subtests pass but see OS-3328 for the rest
+# vmsplice01
+# vmsplice02
+# perf_event_open01
+# perf_event_open02
diff --git a/usr/src/lib/brand/lx/testing/ltp_skiplist b/usr/src/lib/brand/lx/testing/ltp_skiplist
new file mode 100644
index 0000000000..b887763d06
--- /dev/null
+++ b/usr/src/lib/brand/lx/testing/ltp_skiplist
@@ -0,0 +1,258 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+# Copyright 2016 Joyent, Inc.
+
+# Broken tests
+poll02 # OS-3997
+
+# tests functionality not allowed in a zone
+accept4_01
+acct01
+adjtimex01
+adjtimex02
+bdflush01
+cacheflush01
+chmod03
+chmod07
+creat07
+creat08
+execve04
+fchmod02
+fchmod03
+getrusage03
+ioperm01
+ioperm02
+iopl01
+iopl02
+isofs
+mbind01
+mem01
+migrate_pages01
+migrate_pages02
+mknod01
+mlockall02
+mlockall03
+mlock02
+mmap12
+mount03
+mount05
+mount06
+modify_ldt01
+modify_ldt02
+move_pages01
+move_pages02
+move_pages03
+move_pages04
+move_pages05
+move_pages06
+move_pages07
+move_pages08
+move_pages09
+move_pages10
+move_pages11
+mremap01
+mremap02
+mremap03
+mremap04
+mremap05
+open01
+open11
+ptrace04
+quotactl01
+quotactl02
+remap_file_pages01
+remap_file_pages02
+sched_getparam01
+sched_getparam02
+sched_setscheduler01
+sched_setaffinity01
+sched_getaffinity01
+sched_setparam02
+sched_setparam03
+setfsuid04
+setfsuid04_16
+settimeofday01
+stime01
+swapoff01
+swapoff02
+swapon01
+swapon02
+swapon03
+switch01
+ustat01
+ustat02
+
+# needs a config with a dev or mounting
+chmod06
+chown04
+chown04_16
+creat06
+fchmod06
+fchown04
+fchown04_16
+inotify03
+lchown03
+lchown03_16
+linkat02
+link08
+mkdir03
+mkdirat02
+mknod07
+mknodat02
+mount01
+mount02
+mount04
+rename11
+renameat01
+rmdir02
+umount01
+umount02
+umount03
+utime06
+utimes01
+
+# These tests are broken on 32-bit (including on "real" Linux)
+preadv01
+preadv02
+pwritev01
+pwritev02
+
+# tests functionality not implemented yet
+access06
+add_key01
+add_key02
+clone02
+clone08
+crash02
+fallocate04 # needs SEEK_HOLE
+fanotify01
+fanotify02
+fanotify03
+fanotify04
+fanotify05
+fcntl06
+fcntl23
+fcntl23_64
+fcntl24
+fcntl24_64
+fcntl25
+fcntl25_64
+fcntl26
+fcntl26_64
+fcntl31
+fcntl31_64
+fcntl32
+fcntl32_64
+fcntl33
+fcntl33_64
+fork05
+fork09
+fork13
+fork14
+ftruncate04
+ftruncate04_64
+futex_wake04 # OS-4471
+getdents02
+getdents02_64
+get_mempolicy01
+ioctl03
+io_cancel01
+io_destroy01
+io_getevents01
+io_setup01
+io_submit01
+keyctl01
+mmap13
+mq_notify01
+mq_notify02
+mq_open01
+mq_timedreceive01
+mq_timedsend01
+mq_unlink01
+msgctl12
+msgrcv07
+open02
+open10
+perf_event_open01
+perf_event_open02
+ppoll01
+process_vm_readv01
+process_vm_readv02
+process_vm_readv03
+process_vm_writev01
+process_vm_writev02
+prot_hsymlinks
+read02
+readahead01
+readahead02
+readdir2
+readdir21
+sched_rr_get_interval01
+sched_rr_get_interval02
+sched_rr_get_interval03
+sched_getscheduler01
+sched_getscheduler02
+sched_getattr01
+sched_getattr02
+sched_setattr01
+semctl01
+semop02
+sendfile02 # OS-3296
+sendfile02_64
+sendfile04
+sendfile04_64
+sendfile06
+sendfile06_64
+sendfile07
+sendfile07_64
+sendmsg01
+setgroups04_16
+setns01
+setns02
+setpgid02
+setregid02
+setregid02_16
+setrlimit01
+setxattr01
+setxattr02
+setxattr03
+sgetmask01
+shmget05 # OS-3326
+splice01
+splice02
+splice03
+ssetmask01
+sync_file_range01
+sysconf01 # OS-3305
+sysctl01
+sysctl03
+sysctl04
+sysctl05
+syslog01
+syslog02
+syslog03
+syslog04
+syslog05
+syslog06
+syslog07
+syslog08
+syslog09
+syslog10
+syslog11
+syslog12
+tee01
+tee02
+unshare01
+utimensat01 # OS-3328
+unshare02
+vmsplice01
+vmsplice02
diff --git a/usr/src/lib/brand/lx/testing/ltp_tests b/usr/src/lib/brand/lx/testing/ltp_tests
new file mode 100644
index 0000000000..86e9725638
--- /dev/null
+++ b/usr/src/lib/brand/lx/testing/ltp_tests
@@ -0,0 +1 @@
+syscalls,nptl,ipc,pipes,pty,crashme,math
diff --git a/usr/src/lib/brand/lx/zone/Makefile b/usr/src/lib/brand/lx/zone/Makefile
new file mode 100644
index 0000000000..4d681a1b45
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/Makefile
@@ -0,0 +1,70 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2015 Joyent, Inc. All rights reserved.
+#
+
+PROGS = lx_boot
+PROGS += lx_boot_zone_redhat lx_boot_zone_ubuntu lx_boot_zone_debian
+PROGS += lx_boot_zone_busybox
+XMLDOCS = config.xml platform.xml
+TEMPLATES = SUNWlx.xml SUNWlx26.xml
+
+all: $(PROGS)
+
+include $(SRC)/cmd/Makefile.cmd
+include ../Makefile.lx
+
+all := TARGET= all
+install := TARGET= install
+clobber := TARGET= clobber
+
+POFILES= $(PROGS:%=%.po)
+POFILE= lx_zone.po
+
+$(POFILE): $(POFILES)
+ $(RM) $@
+ $(BUILDPO.pofiles)
+
+_msg: $(MSGDOMAINPOFILE)
+
+install: $(PROGS) $(ROOTXMLDOCS) $(ROOTTEMPLATES) $(ROOTPROGS)
+ mkdir -p $(ROOT)/usr/lib/brand/lx/ld/64
+ crle -c $(ROOT)/usr/lib/brand/lx/ld/ld.config \
+ -l /native/lib:/native/usr/lib \
+ -s /native/lib/secure:/native/usr/lib/secure
+ crle -64 -c $(ROOT)/usr/lib/brand/lx/ld/64/ld.config \
+ -l /native/lib/64:/native/usr/lib/64 \
+ -s /native/lib/secure/64:/native/usr/lib/secure/64
+
+lint:
+
+clean:
+ -$(RM) $(PROGS)
+
+clobber: clean
+ -$(RM) $(ROOTXMLDOCS) $(ROOTPROGS) $(ROOTTEMPLATES)
+
+FRC:
+
+include $(SRC)/Makefile.msg.targ
diff --git a/usr/src/lib/brand/lx/zone/SUNWlx.xml b/usr/src/lib/brand/lx/zone/SUNWlx.xml
new file mode 100644
index 0000000000..04c38873de
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/SUNWlx.xml
@@ -0,0 +1,34 @@
+<?xml version="1.0"?>
+
+<!--
+ Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ Use is subject to license terms.
+
+ CDDL HEADER START
+
+ The contents of this file are subject to the terms of the
+ Common Development and Distribution License (the "License").
+ You may not use this file except in compliance with the License.
+
+ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ or http://www.opensolaris.org/os/licensing.
+ See the License for the specific language governing permissions
+ and limitations under the License.
+
+ When distributing Covered Code, include this CDDL HEADER in each
+ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ If applicable, add the following below this CDDL HEADER, with the
+ fields enclosed by brackets "[]" replaced with your own identifying
+ information: Portions Copyright [yyyy] [name of copyright owner]
+
+ CDDL HEADER END
+
+ ident "%Z%%M% %I% %E% SMI"
+
+ DO NOT EDIT THIS FILE. Use zonecfg(1M) instead.
+-->
+
+<!DOCTYPE zone PUBLIC "-//Sun Microsystems Inc//DTD Zones//EN" "file:///usr/share/lib/xml/dtd/zonecfg.dtd.1">
+
+<zone name="default" zonepath="" autoboot="false" brand="lx">
+</zone>
diff --git a/usr/src/lib/brand/lx/zone/SUNWlx26.xml b/usr/src/lib/brand/lx/zone/SUNWlx26.xml
new file mode 100644
index 0000000000..9bd8af4d92
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/SUNWlx26.xml
@@ -0,0 +1,35 @@
+<?xml version="1.0"?>
+
+<!--
+ Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ Use is subject to license terms.
+
+ CDDL HEADER START
+
+ The contents of this file are subject to the terms of the
+ Common Development and Distribution License (the "License").
+ You may not use this file except in compliance with the License.
+
+ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ or http://www.opensolaris.org/os/licensing.
+ See the License for the specific language governing permissions
+ and limitations under the License.
+
+ When distributing Covered Code, include this CDDL HEADER in each
+ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ If applicable, add the following below this CDDL HEADER, with the
+ fields enclosed by brackets "[]" replaced with your own identifying
+ information: Portions Copyright [yyyy] [name of copyright owner]
+
+ CDDL HEADER END
+
+ ident "%Z%%M% %I% %E% SMI"
+
+ DO NOT EDIT THIS FILE. Use zonecfg(1M) instead.
+-->
+
+<!DOCTYPE zone PUBLIC "-//Sun Microsystems Inc//DTD Zones//EN" "file:///usr/share/lib/xml/dtd/zonecfg.dtd.1">
+
+<zone name="default" zonepath="" autoboot="false" brand="lx">
+ <attr name="kernel-version" type="string" value="2.6"/>
+</zone>
diff --git a/usr/src/lib/brand/lx/zone/config.xml b/usr/src/lib/brand/lx/zone/config.xml
new file mode 100644
index 0000000000..4a90247938
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/config.xml
@@ -0,0 +1,104 @@
+<?xml version="1.0"?>
+
+<!--
+ CDDL HEADER START
+
+ The contents of this file are subject to the terms of the
+ Common Development and Distribution License (the "License").
+ You may not use this file except in compliance with the License.
+
+ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ or http://www.opensolaris.org/os/licensing.
+ See the License for the specific language governing permissions
+ and limitations under the License.
+
+ When distributing Covered Code, include this CDDL HEADER in each
+ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ If applicable, add the following below this CDDL HEADER, with the
+ fields enclosed by brackets "[]" replaced with your own identifying
+ information: Portions Copyright [yyyy] [name of copyright owner]
+
+ CDDL HEADER END
+
+ Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ Copyright 2015, Joyent, Inc.
+
+ DO NOT EDIT THIS FILE.
+-->
+
+<!DOCTYPE brand PUBLIC "-//Sun Microsystems Inc//DTD Brands//EN"
+ "file:///usr/share/lib/xml/dtd/brand.dtd.1">
+
+<brand name="lx">
+ <modname>lx_brand</modname>
+
+ <initname>/native/usr/lib/brand/lx/lxinit</initname>
+ <login_cmd>/bin/login -h zone:%Z %u</login_cmd>
+ <forcedlogin_cmd>/bin/login -h zone:%Z -f %u</forcedlogin_cmd>
+ <user_cmd>/usr/bin/getent passwd %u</user_cmd>
+
+ <install>/usr/lib/brand/lx/lx_install %z %R</install>
+ <installopts>d:hsvX</installopts>
+ <boot>/usr/lib/brand/lx/lx_boot %z %R</boot>
+ <halt>/usr/lib/brand/lx/lx_support halt %R %z</halt>
+ <verify_cfg>/usr/lib/brand/lx/lx_support verify</verify_cfg>
+ <verify_adm></verify_adm>
+ <postclone></postclone>
+ <postinstall></postinstall>
+
+ <privilege set="default" name="contract_event" />
+ <privilege set="default" name="contract_identity" />
+ <privilege set="default" name="contract_observer" />
+ <privilege set="default" name="dtrace_proc" />
+ <privilege set="default" name="dtrace_user" />
+ <privilege set="default" name="file_chown" />
+ <privilege set="default" name="file_chown_self" />
+ <privilege set="default" name="file_dac_execute" />
+ <privilege set="default" name="file_dac_read" />
+ <privilege set="default" name="file_dac_search" />
+ <privilege set="default" name="file_dac_write" />
+ <privilege set="default" name="file_owner" />
+ <privilege set="default" name="file_setid" />
+ <privilege set="default" name="ipc_dac_read" />
+ <privilege set="default" name="ipc_dac_write" />
+ <privilege set="default" name="ipc_owner" />
+ <privilege set="default" name="net_bindmlp" />
+ <privilege set="default" name="net_icmpaccess" />
+ <privilege set="default" name="net_mac_aware" />
+ <privilege set="default" name="net_privaddr" />
+ <privilege set="default" name="net_rawaccess" ip-type="exclusive" />
+ <privilege set="default" name="proc_chroot" />
+ <privilege set="default" name="sys_audit" />
+ <privilege set="default" name="proc_audit" />
+ <privilege set="default" name="proc_lock_memory" />
+ <privilege set="default" name="proc_owner" />
+ <privilege set="default" name="proc_setid" />
+ <privilege set="default" name="proc_prioup" />
+ <privilege set="default" name="proc_taskid" />
+ <privilege set="default" name="sys_acct" />
+ <privilege set="default" name="sys_admin" />
+ <privilege set="default" name="sys_ip_config" ip-type="exclusive" />
+ <privilege set="default" name="sys_iptun_config" ip-type="exclusive" />
+ <privilege set="default" name="sys_mount" />
+ <privilege set="default" name="sys_nfs" />
+ <privilege set="default" name="sys_resource" />
+ <privilege set="default" name="sys_ppp_config" ip-type="exclusive" />
+
+ <privilege set="prohibited" name="dtrace_kernel" />
+ <privilege set="prohibited" name="proc_zone" />
+ <privilege set="prohibited" name="sys_config" />
+ <privilege set="prohibited" name="sys_devices" />
+ <privilege set="prohibited" name="sys_ip_config" ip-type="shared" />
+ <privilege set="prohibited" name="sys_linkdir" />
+ <privilege set="prohibited" name="sys_net_config" />
+ <privilege set="prohibited" name="sys_ppp_config" ip-type="shared" />
+ <privilege set="prohibited" name="sys_res_config" />
+ <privilege set="prohibited" name="sys_suser_compat" />
+ <privilege set="prohibited" name="xvm_control" />
+ <privilege set="prohibited" name="virt_manage" />
+
+ <privilege set="required" name="proc_exec" />
+ <privilege set="required" name="proc_fork" />
+ <privilege set="required" name="sys_ip_config" ip-type="exclusive" />
+ <privilege set="required" name="sys_mount" />
+</brand>
diff --git a/usr/src/lib/brand/lx/zone/lx_boot.ksh b/usr/src/lib/brand/lx/zone/lx_boot.ksh
new file mode 100644
index 0000000000..9f4746e20a
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/lx_boot.ksh
@@ -0,0 +1,92 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2015, Joyent, Inc.
+#
+# lx boot script.
+#
+# The arguments to this script are the zone name and the zonepath.
+#
+
+. /usr/lib/brand/shared/common.ksh
+
+ZONENAME=$1
+ZONEPATH=$2
+ZONEROOT=$ZONEPATH/root
+
+w_missing=$(gettext "Warning: \"%s\" is not installed in the global zone")
+
+arch=`uname -p`
+if [ "$arch" = "i386" ]; then
+ ARCH32=i86
+ ARCH64=amd64
+else
+ echo "Unsupported architecture: $arch"
+ exit 2
+fi
+
+#
+# Run the lx_support boot hook.
+#
+/usr/lib/brand/lx/lx_support boot $ZONEPATH $ZONENAME
+if (( $? != 0 )) ; then
+ exit 1
+fi
+
+BRANDDIR=/native/usr/lib/brand/lx;
+EXIT_CODE=1
+
+#
+# Before we boot we validate and fix, if necessary, the required files within
+# the zone. These modifications can be lost if a patch or upgrade is applied
+# within the zone, so we validate and fix the zone every time it boots.
+#
+
+#
+# Determine the distro.
+#
+distro=""
+if [[ $(zonecfg -z $ZONENAME info attr name=docker) =~ "value: true" ]]; then
+ distro="docker"
+elif [[ -f $ZONEROOT/etc/redhat-release ]]; then
+ distro="redhat"
+elif [[ -f $ZONEROOT/etc/lsb-release ]]; then
+ if egrep -s Ubuntu $ZONEROOT/etc/lsb-release; then
+ distro="ubuntu"
+ elif [[ -f $ZONEROOT/etc/debian_version ]]; then
+ distro="debian"
+ fi
+elif [[ -f $ZONEROOT/etc/debian_version ]]; then
+ distro="debian"
+elif [[ -f $ZONEROOT/etc/alpine-release ]]; then
+ distro="busybox"
+fi
+
+[[ -z $distro ]] && fatal "Unsupported distribution!"
+
+#
+# Perform distro-specific customization.
+#
+. $(dirname $0)/lx_boot_zone_${distro}
+
+exit 0
diff --git a/usr/src/lib/brand/lx/zone/lx_boot_zone_busybox.ksh b/usr/src/lib/brand/lx/zone/lx_boot_zone_busybox.ksh
new file mode 100644
index 0000000000..1ad83902bc
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/lx_boot_zone_busybox.ksh
@@ -0,0 +1,168 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+#
+# Customisation for busybox-init-based distributions. Assumes to have been
+# sourced from lx_boot.
+#
+
+tmpfile=/tmp/lx-busybox.$$
+
+# Check that the directories we're writing to aren't symlinks outside the zone
+safe_dir /etc
+safe_dir /etc/init.d
+
+# Generate network setup script
+#
+cat > $tmpfile <<EOF
+#!/sbin/runscript
+depend() {
+ need localmount
+ after bootmisc hwdrivers modules
+ provide net
+ keyword nojail noprefix novserver
+}
+start() {
+ if [ ! -e /etc/resolv.conf ]; then
+ echo "# AUTOMATIC ZONE CONFIG" > /etc/resolv.conf
+EOF
+zonecfg -z $ZONENAME info attr name=resolvers |
+awk '
+ {
+ if ($1 == "value:") {
+ nres = split($2, resolvers, ",")
+ }
+ }
+ END {
+ for (i = 1; i <= nres; i++) {
+ printf(" echo \"nameserver %s\" >> %s\n", resolvers[i],
+ "/etc/resolv.conf")
+ }
+ }
+' >> $tmpfile
+zonecfg -z $ZONENAME info attr name=dns-domain |
+awk '
+ {
+ if ($1 == "value:") {
+ dom = $2
+ }
+ }
+ END {
+ printf(" echo \"search %s\" >> %s\n", dom, "/etc/resolv.conf")
+ }
+' >> $tmpfile
+cat >> $tmpfile <<EOF
+ fi
+ return 0
+}
+stop() {
+ return 0
+}
+EOF
+fnm=$ZONEROOT/etc/init.d/networking
+if [[ -f $fnm || -h $fnm ]]; then
+ mv -f $tmpfile $fnm
+ chmod 755 $fnm
+fi
+
+
+#
+# The default /etc/inittab might spawn mingetty on each of the virtual consoles
+# as well as xdm on the X console. Since we don't have virtual consoles nor
+# an X console, spawn a single mingetty on /dev/console instead.
+#
+# Don't bother changing the file if it looks like we already did.
+#
+fnm=$ZONEROOT/etc/inittab
+if ! egrep -s "Modified by lx brand" $fnm; then
+ sed 's/^tty[1-6]:/# Disabled by lx brand: &/' \
+ $fnm > $tmpfile
+ echo "console::respawn:/sbin/getty 38400 console" >> $tmpfile
+ echo "# Modified by lx brand" >> $tmpfile
+
+ if [[ ! -h $fnm ]]; then
+ mv -f $tmpfile $fnm
+ chmod 644 $fnm
+ fi
+fi
+
+#
+# The following scripts attempt to start services or otherwise configure the
+# system in ways incompatible with zones, so replace them with stubs.
+#
+
+fnm=$ZONEROOT/etc/init.d/fsck
+[[ ! -h $fnm && -f $fnm ]] && cat <<DONE > $fnm
+#!/sbin/runscript
+
+depend() {
+ use dev clock modules
+}
+
+start() {
+ return 0
+}
+DONE
+
+fnm=$ZONEROOT/etc/init.d/hwclock
+[[ ! -h $fnm && -f $fnm ]] && cat <<DONE > $fnm
+#!/sbin/runscript
+
+depend() {
+ provide clock
+}
+
+start() {
+ return 0
+}
+DONE
+
+fnm=$ZONEROOT/etc/init.d/klogd
+[[ ! -h $fnm && -f $fnm ]] && cat <<DONE > $fnm
+#!/sbin/runscript
+
+depend() {
+ need clock hostname localmount
+ before net
+}
+
+start() {
+ return 0
+}
+DONE
+
+fnm=$ZONEROOT/etc/init.d/sysfs
+[[ ! -h $fnm && -f $fnm ]] && cat <<DONE > $fnm
+#!/sbin/runscript
+
+depend() {
+}
+
+start() {
+ return 0
+}
+DONE
+
+#
+# Setup for the /dev/shm mount.
+#
+fnm=$ZONEROOT/etc/fstab
+entry=$(awk '{if ($2 == "/dev/shm") print $2}' $fnm)
+if [[ -z "$entry" && ! -h $fnm ]]; then
+ echo "swapfs /dev/shm tmpfs defaults 0 0" >> $fnm
+fi
+
+# Hand control back to lx_boot
diff --git a/usr/src/lib/brand/lx/zone/lx_boot_zone_debian.ksh b/usr/src/lib/brand/lx/zone/lx_boot_zone_debian.ksh
new file mode 100644
index 0000000000..35ae59c19c
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/lx_boot_zone_debian.ksh
@@ -0,0 +1,172 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+#
+# Customisation for Debian-based distributions. Assumes to have been
+# sourced from lx_boot.
+#
+
+tmpfile=/tmp/lx-debian.$$
+
+
+# Check that the directories we're writing to aren't symlinks outside the zone
+safe_dir /etc
+safe_dir /etc/init.d
+safe_dir /etc/network
+safe_dir /etc/rc0.d
+safe_dir /etc/rc1.d
+safe_dir /etc/rc2.d
+safe_dir /etc/rc3.d
+safe_dir /etc/rc4.d
+safe_dir /etc/rc5.d
+safe_dir /etc/rc6.d
+safe_dir /etc/rcS.d
+safe_opt_dir /etc/selinux
+
+# Populate resolve.conf setup files
+zonecfg -z $ZONENAME info attr name=resolvers | awk '
+BEGIN {
+ print("# AUTOMATIC ZONE CONFIG")
+}
+$1 == "value:" {
+ nres = split($2, resolvers, ",");
+ for (i = 1; i <= nres; i++) {
+ print("nameserver", resolvers[i]);
+ }
+}
+' > $tmpfile
+zonecfg -z $ZONENAME info attr name=dns-domain | awk '
+$1 == "value:" {
+ dom = $2
+}
+END {
+ print("search", dom);
+}
+' >> $tmpfile
+fnm=$ZONEROOT/etc/resolv.conf
+if [[ -f $fnm || -h $fnm ]]; then
+ mv -f $tmpfile $fnm
+fi
+
+# Override network configuration
+zonecfg -z $ZONENAME info net | awk '
+BEGIN {
+ print("# AUTOMATIC ZONE CONFIG")
+ print("iface lo inet manual");
+}
+$1 == "physical:" {
+ print("iface", $2, "inet manual");
+}
+' > $tmpfile
+fnm=$ZONEROOT/etc/network/interfaces
+if [[ -f $fnm || -h $fnm ]]; then
+ mv -f $tmpfile $fnm
+fi
+
+#
+# The default /etc/inittab might spawn mingetty on each of the virtual consoles
+# as well as xdm on the X console. Since we don't have virtual consoles nor
+# an X console, spawn a single mingetty on /dev/console instead.
+#
+# Don't bother changing the file if it looks like we already did.
+#
+fnm=$ZONEROOT/etc/inittab
+if ! egrep -s "Modified by lx brand" $fnm; then
+ sed 's/^[1-6]:/# Disabled by lx brand: &/' \
+ $fnm > $tmpfile
+ echo "1:2345:respawn:/sbin/getty 38400 console" >> $tmpfile
+ echo "# Modified by lx brand" >> $tmpfile
+
+ if [[ ! -h $fnm ]]; then
+ mv -f $tmpfile $fnm
+ chmod 644 $fnm
+ fi
+fi
+
+# The Debian init uses a combination of traditional rc-style service
+# definitions and upstart-style definitions.
+
+#
+# The following rc-style scripts attempt to start services or otherwise
+# configure the system in ways incompatible with zones, so don't execute them
+# at boot time.
+#
+unsupported_rc_services="
+ checkfs.sh
+ checkroot.sh
+ hwclock.sh
+ kmod
+ mtab.sh
+ procps
+ udev
+ udev-mtab
+"
+
+for file in $unsupported_rc_services; do
+ rm -f $ZONEROOT/etc/init.d/$file
+
+ rc_files="$(echo $ZONEROOT/etc/rc[0-6S].d/[SK]+([0-9])$file)"
+
+ if [[ "$rc_files" != \
+ "$ZONEROOT/etc/rc[0-6S].d/[SK]+([0-9])$file" ]]; then
+ for file in $rc_files; do
+ rm -f "$file"
+ done
+ fi
+done
+
+disable_svc()
+{
+ fnm=$ZONEROOT/etc/init/$1.override
+ [[ -h $fnm || -f $fnm ]] && return
+ echo "manual" > $fnm
+}
+
+
+#
+# Now customize upstart
+#
+
+RMSVCS="
+ network-interface-security
+ udev
+ udevmonitor
+ udevtrigger
+ udev-fallback-graphics
+ udev-finish
+"
+for f in $RMSVCS
+do
+ disable_svc $f
+done
+
+#
+# We need to setup for the /dev/shm mount. Unlike some other distros, Debian
+# can handle it as either /dev/shm or /run/shm. For simplicity we create an
+# fstab entry to force it into the /dev/shm style.
+#
+fnm=$ZONEROOT/etc/fstab
+entry=$(awk '{if ($2 == "/dev/shm") print $2}' $fnm)
+if [[ -z "$entry" && ! -h $fnm ]]; then
+ echo "swapfs /dev/shm tmpfs defaults 0 0" >> $fnm
+fi
+
+#
+# upstart modifications are complete
+#
+rm -f $tmpfile
+
+# Hand control back to lx_boot
diff --git a/usr/src/lib/brand/lx/zone/lx_boot_zone_redhat.ksh b/usr/src/lib/brand/lx/zone/lx_boot_zone_redhat.ksh
new file mode 100644
index 0000000000..566b401c18
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/lx_boot_zone_redhat.ksh
@@ -0,0 +1,361 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+# Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved.
+#
+
+#
+# Since CentOS, Red Hat Enterprise Linux, and Fedora all use approximately
+# the same source, this file should be good for all three.
+#
+# Currently, this file assumed a pre-systemd existence, so this should be
+# CentOS 6.x or earlier. Testing has been done on CentOS 6.6.
+#
+
+tmpfile=/tmp/lx-redhat.$$
+
+
+# Before doing anything else, make sure some Centos-specific dirs are safe.
+# /etc/init.d is normally a symlink so we can't easily tell if it's safe so
+# check rc.d/init.d instead.
+
+safe_dir /etc/sysconfig
+safe_dir /etc/rc.d
+safe_dir /etc/rc.d/init.d
+safe_dir /etc/rc.d/rc0.d
+safe_dir /etc/rc.d/rc1.d
+safe_dir /etc/rc.d/rc2.d
+safe_dir /etc/rc.d/rc3.d
+safe_dir /etc/rc.d/rc4.d
+safe_dir /etc/rc.d/rc5.d
+safe_dir /etc/rc.d/rc6.d
+safe_opt_dir /etc/selinux
+
+# Generate the /etc/rc.d/init.d/network rc script
+cat > $tmpfile <<EOF
+#!/bin/bash
+# network Bring up/down networking
+#
+### BEGIN INIT INFO
+# Provides: \$network
+# Short-Description: Bring up/down networking
+# Description: Bring up/down networking
+### END INIT INFO
+
+case "\$1" in
+ start)
+ [ "\$EUID" != "0" ] && exit 4
+
+ if [ ! -e /etc/resolv.conf ]; then
+ if [ -h /etc/resolv.conf ]; then
+ rm -f /etc/resolv.conf
+ fi
+ echo "# AUTOMATIC ZONE CONFIG" > /etc/resolv.conf
+EOF
+zonecfg -z $ZONENAME info attr name=resolvers |
+awk '
+ {
+ if ($1 == "value:") {
+ nres = split($2, resolvers, ",")
+ }
+ }
+ END {
+ for (i = 1; i <= nres; i++) {
+ printf(" echo \"nameserver %s\" >> %s\n", resolvers[i],
+ "/etc/resolv.conf")
+ }
+ }
+' >> $tmpfile
+zonecfg -z $ZONENAME info attr name=dns-domain |
+awk '
+ {
+ if ($1 == "value:") {
+ dom = $2
+ }
+ }
+ END {
+ printf(" echo \"search %s\" >> %s\n", dom, "/etc/resolv.conf")
+ }
+' >> $tmpfile
+cat >> $tmpfile <<EOF
+ fi
+ touch /var/lock/subsys/network
+ rc=0
+ ;;
+ stop)
+ [ "\$EUID" != "0" ] && exit 4
+
+ rm -f /var/lock/subsys/network
+ rc=0
+ ;;
+ status)
+ echo "Configured devices:"
+ echo "lo \$(cd /dev/net; ls)"
+ echo "Currently active devices:"
+ echo \$(/sbin/ip -o link show up | awk -F ": " '{ print \$2 }')
+ rc=0
+ ;;
+ restart|reload|force-reload)
+ cd "\$CWD"
+ \$0 stop
+ \$0 start
+ rc=\$?
+ ;;
+ *)
+ echo "Usage: \$0 {start|stop|status|restart|reload|force-reload}"
+ exit 2
+esac
+
+exit \$rc
+EOF
+fnm=$ZONEROOT/etc/rc.d/init.d/network
+if [[ -f $fnm || -h $fnm ]]; then
+ mv -f $tmpfile $fnm
+ chmod 755 $fnm
+fi
+
+# This is specific to a systemd-based image
+sysdir=$ZONEROOT/etc/systemd/system
+if [[ -d $ZONEROOT/etc && ! -h $ZONEROOT/etc && -d $ZONEROOT/etc/systemd &&
+ ! -h $ZONEROOT/etc/systemd && -d $sysdir && ! -h $sysdir ]]; then
+ # don't use NetworkManager
+ rm -f $sysdir/dbus-org.freedesktop.nm-dispatcher.service
+ rm -f $sysdir/multi-user.target.wants/NetworkManager.service
+ rm -f $sysdir/dbus-org.freedesktop.NetworkManager.service
+ # our network setup needs to run
+ fnm=$sysdir/multi-user.target.wants/network.service
+ if [[ ! -f $fnm ]]; then
+ ln -s /etc/rc.d/init.d/network \
+ $sysdir/multi-user.target.wants/network.service
+ fi
+fi
+
+#
+# The default /etc/inittab only sets the runlevel. Make sure it's runlevel 3
+# and not runlevel 5 (X11).
+# Don't bother changing the file if it looks like we already did.
+#
+fnm=$ZONEROOT/etc/inittab
+if ! egrep -s "Modified by lx brand" $fnm; then
+ sed 's/^id:5:initdefault:/id:3:initdefault: &/' \
+ $fnm > $tmpfile
+ echo "# Modified by lx brand" >> $tmpfile
+
+ if [[ ! -h $fnm ]]; then
+ mv -f $tmpfile $fnm
+ chmod 644 $fnm
+ fi
+fi
+
+#
+# Ensure svcs depending on $network will start.
+#
+fnm=$ZONEROOT/etc/sysconfig/network
+if ! egrep -s "NETWORKING=yes" $fnm; then
+ cfghnm=$(zonecfg -z $ZONENAME info attr name=hostname | \
+ awk '{if ($1 == "value:") print $2}')
+ if [[ -z "$cfghnm" ]]; then
+ cfghnm=$ZONENAME
+ fi
+ if [[ ! -h $fnm ]]; then
+ cat > $fnm <<- EOF
+ NETWORKING=yes
+ HOSTNAME=$cfghnm
+ EOF
+ fi
+fi
+
+#
+# SELinux must be disabled otherwise we won't get past init.
+#
+fnm=$ZONEROOT/etc/selinux/config
+if egrep -s "^SELINUX=enforcing|^SELINUX=permissive" $fnm; then
+ tmpfile=/tmp/selinux_config.$$
+
+ sed 's/^SELINUX=.*$/SELINUX=disabled/' $fnm > $tmpfile
+ if [[ ! -h $fnm ]]; then
+ mv -f $tmpfile $fnm
+ chmod 644 $fnm
+ fi
+fi
+
+#
+# /etc/rc.d/init.d/keytable tries to load a physical keyboard map, which won't
+# work in a zone. If we remove etc/sysconfig/keyboard, it won't try this at all.
+#
+fnm=$ZONEROOT/etc/sysconfig/keyboard
+if [[ ! -h $fnm ]]; then
+ rm -f $ZONEROOT/etc/sysconfig/keyboard
+fi
+
+# The Centos init uses a combination of traditional rc-style service
+# definitions and upstart-style definitions.
+
+#
+# The following rc-style scripts attempt to start services or otherwise
+# configure the system in ways incompatible with zones, so don't execute them
+# at boot time.
+#
+unsupported_rc_services="
+ acpid
+ auditd
+ gpm
+ hpoj
+ ip6tables
+ iptables
+ irda
+ irqbalance
+ iscsi
+ isdn
+ kdump
+ kudzu
+ mdmpd
+ mdmonitor
+ microcode_ctl
+ netdump
+ ntpd
+ ntpdate
+ pcmcia
+ psacct
+ quota_nld
+ random
+ rawdevices
+ smartd
+"
+
+for file in $unsupported_rc_services; do
+ rm -f $ZONEROOT/etc/rc.d/init.d/$file
+
+ rc_files="$(echo $ZONEROOT/etc/rc.d/rc[0-6].d/[SK]+([0-9])$file)"
+
+ if [[ "$rc_files" != \
+ "$ZONEROOT/etc/rc.d/rc[0-6].d/[SK]+([0-9])$file" ]]; then
+ for file in $rc_files; do
+ rm -f "$file"
+ done
+ fi
+done
+
+disable_svc()
+{
+ # XXX - TBD does this work like on Ubuntu?
+ #
+ fnm=$ZONEROOT/etc/init/$1.override
+ [[ -h $fnm || -f $fnm ]] && return
+ echo "manual" > $fnm
+
+ # fnm=$ZONEROOT/etc/init/$1.conf
+ # rm -f $fnm
+}
+
+RMSVCS="control-alt-delete
+ ttyS0"
+
+#
+# Now customize upstart services
+#
+
+for f in $RMSVCS
+do
+ disable_svc $f
+done
+
+if [[ ! -f $ZONEROOT/etc/init/tty.override ]]; then
+ cat > $ZONEROOT/etc/init/tty.override <<- EOF
+ # tty - getty
+ #
+ # This service maintains a getty on the console.
+
+ stop on runlevel [S016]
+
+ respawn
+ instance console
+ exec /sbin/mingetty console
+ EOF
+fi
+
+if [[ ! -f $ZONEROOT/etc/init/start-ttys.override ]]; then
+ cat > $ZONEROOT/etc/init/start-ttys.override <<- EOF
+ # This service starts the configured number of gettys.
+ #
+
+ start on stopped rc RUNLEVEL=[2345]
+
+ task
+ script
+ initctl start tty
+ end script
+ EOF
+fi
+
+#
+# There is a lot of stuff in the standard halt and reboot scripts that we
+# have no business running in a zone. Fortunately, the stuff we want to
+# skip is all in one contiguous chunk.
+#
+# Don't bother to modify the file if it looks like we already did.
+#
+fnm=$ZONEROOT/etc/rc.d/init.d/halt
+if ! egrep -s "Disabled by lx brand" $fnm; then
+ awk 'BEGIN {skip = ""}
+ /^# Save mixer/ {skip = "# Disabled by lx brand: "}
+ /halt.local/ {skip = ""}
+ /./ {print skip $0}' $fnm > /tmp/halt.$$
+
+ if [[ $? -eq 0 && ! -h $fnm ]]; then
+ mv -f /tmp/halt.$$ $fnm
+ chmod 755 $fnm
+ fi
+fi
+
+#
+# Fix up /etc/rc.d/rc.sysinit:
+#
+# 1) /sbin/hwclock requires the iopl() system call, which BrandZ won't support.
+# Since the hardware clock cannot be set from within a zone, we comment out
+# the line.
+#
+# 2) Disable dmesg commands, since we don't implement klogctl
+#
+# 3) Disable initlog and the mount of /dev/pts
+#
+# 4) Don't touch /dev/tty* in order to start virtual terminals, as that won't
+# work from within a zone.
+#
+# 5) Don't try to check the root filesystem (/) as there is no associated
+# physical device, and any attempt to run fsck will fail.
+#
+fnm=$ZONEROOT/etc/rc.d/rc.sysinit
+tmpfile=/tmp/lx_rc.sysinit.$$
+
+sed 's@^/sbin/hwclock@# lx: &@
+ s@^/bin/dmesg -n@# lx: &@
+ s@^dmesg -s@# lx: &@
+ s@^initlog -c \"fsck@# lx: &@
+ s@^mount -n -o remount /dev/shm @mount -t tmpfs tmpfs /dev/shm @
+ s@^mount .* /dev/pts@# lx: &@
+ /^#remount \/dev\/shm/d' \
+ $fnm > $tmpfile
+
+if [[ ! -h $fnm ]]; then
+ mv -f $tmpfile $fnm
+ chmod 755 $fnm
+fi
+
+#
+# sysinit modifications are complete
+#
+rm -f $tmpfile
+
+# Hand control back to lx_boot
diff --git a/usr/src/lib/brand/lx/zone/lx_boot_zone_ubuntu.ksh b/usr/src/lib/brand/lx/zone/lx_boot_zone_ubuntu.ksh
new file mode 100644
index 0000000000..2d664be89d
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/lx_boot_zone_ubuntu.ksh
@@ -0,0 +1,146 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2016 Joyent, Inc.
+#
+
+#
+# Customisation for Ubuntu-based distributions. Assumes to have been
+# sourced from lx_boot.
+#
+tmpfile=/tmp/lx-ubuntu.$$
+
+# Check that the directories we're writing to aren't symlinks outside the zone
+safe_dir /etc
+safe_dir /etc/init
+safe_dir /etc/resolvconf
+safe_dir /etc/resolvconf/resolv.conf.d
+safe_dir /etc/network
+safe_dir /etc/network/interfaces.d
+safe_dir /etc/network/interfaces.d/smartos
+
+ZPOOL=`df $ZONEROOT | awk -F '[()]' '{split($2, field, "/"); print field[1]; }'`
+if [ -z "$ZPOOL" ]; then
+ ROOTDEV="/"
+else
+ ROOTDEV="/dev/$ZPOOL"
+fi
+
+# Populate resolve.conf setup files
+zonecfg -z $ZONENAME info attr name=resolvers | awk '
+BEGIN {
+ print("# AUTOMATIC ZONE CONFIG")
+}
+$1 == "value:" {
+ nres = split($2, resolvers, ",");
+ for (i = 1; i <= nres; i++) {
+ print("nameserver", resolvers[i]);
+ }
+}
+' > $tmpfile
+zonecfg -z $ZONENAME info attr name=dns-domain | awk '
+$1 == "value:" {
+ dom = $2
+}
+END {
+ print("search", dom);
+}
+' >> $tmpfile
+fnm=$ZONEROOT/etc/resolvconf/resolv.conf.d/tail
+if [[ -f $fnm || -h $fnm || ! -e $fnm ]]; then
+ mv -f $tmpfile $fnm
+fi
+
+# Override network configuration
+zonecfg -z $ZONENAME info net | awk '
+BEGIN {
+ print("# AUTOMATIC ZONE CONFIG")
+}
+$1 == "physical:" {
+ print("iface", $2, "inet manual");
+}
+' > $tmpfile
+fnm=$ZONEROOT/etc/network/interfaces.d/smartos
+if [[ -f $fnm || -h $fnm ]]; then
+ mv -f $tmpfile $fnm
+fi
+
+src_fnm=$ZONEROOT/etc/init/console.conf
+tgt_fnm=$ZONEROOT/etc/init/console.override
+if [[ -f $src_fnm && ! -f $tgt_fnm && ! -h $tgt_fnm ]] then
+ sed -e 's/lxc/smartos/' $src_fnm > /tmp/console.conf.$$
+ mv /tmp/console.conf.$$ $tgt_fnm
+fi
+
+fnm=$ZONEROOT/etc/init/container-detect.override
+if [[ ! -f $fnm && ! -h $fnm ]] then
+ cat <<'DONE' > $fnm
+description "Track if upstart is running in a container"
+
+start on mounted MOUNTPOINT=/run
+
+env container
+env LIBVIRT_LXC_UUID
+
+emits container
+
+pre-start script
+ container=smartos
+ echo "$container" > /run/container_type || true
+ initctl emit --no-wait container CONTAINER=$container
+ exit 0
+end script
+DONE
+fi
+
+# XXX need to add real mounting into this svc definition
+
+fnm=$ZONEROOT/etc/init/mountall.override
+if [[ ! -h $fnm ]] then
+ cat <<DONE > $fnm
+description "Mount filesystems on boot"
+
+start on startup
+
+task
+
+emits virtual-filesystems
+emits local-filesystems
+emits remote-filesystems
+emits all-swaps
+emits filesystem
+emits mounted
+
+script
+ echo "$ROOTDEV / zfs rw 0 0" > /etc/mtab
+ echo "proc /proc proc rw,noexec,nosuid,nodev 0 0" >> /etc/mtab
+
+ /sbin/initctl emit --no-wait virtual-filesystems
+ /bin/mount -t tmpfs tmpfs /dev/shm || true
+ /bin/mount -t tmpfs tmpfs /run || true
+ /bin/mkdir -p /run/lock || true
+ /bin/ln -s /dev/shm /run/shm || true
+ /sbin/initctl emit --no-wait mounted MOUNTPOINT=/run TYPE=tmpfs
+ /sbin/initctl emit --no-wait local-filesystems
+ /sbin/initctl emit --no-wait all-swaps
+ /sbin/initctl emit --no-wait filesystem
+end script
+DONE
+fi
+
+#
+# upstart modifications are complete
+#
+rm -f $tmpfile
+
+# Hand control back to lx_boot
diff --git a/usr/src/lib/brand/lx/zone/lx_install.ksh b/usr/src/lib/brand/lx/zone/lx_install.ksh
new file mode 100644
index 0000000000..c31b8355ac
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/lx_install.ksh
@@ -0,0 +1,194 @@
+#!/bin/ksh -p
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# Copyright 2016 Joyent, Inc. All rights reserved.
+#
+
+#
+# This is only an example install script. It is not currently used for anything.
+#
+
+PATH=/bin:/usr/bin:/usr/sbin
+export PATH
+
+fullpath()
+{
+ typeset path="$1"
+
+ echo $path | egrep -s "^/" || path="${PWD:=$(pwd)}/$path"
+ echo $path
+}
+
+makedir()
+{
+ typeset dirname=$(fullpath "$1")
+ typeset mode=""
+
+ [[ $# -eq 2 ]] && mode="-m $2"
+
+ [[ -d "$dirname" ]] && return
+
+ if ! mkdir $mode -p "$dirname"; then
+ echo $(gettext "Aborting installation...")
+ exit 255
+ fi
+}
+
+symlink()
+{
+ typeset src="$1"
+ typeset dst=$(fullpath "$2")
+
+ [[ -e "$dst" || -h "$dst" ]] && rm -f "$dst"
+
+ if ! ln -s "$src" "$dst"; then
+ echo $(gettext "Aborting installation...")
+ exit 255
+ fi
+}
+
+install_ln()
+{
+ typeset source="$1"
+ typeset target=$(fullpath "$2")
+
+ log " Installing \"$target\""
+
+ mv -f "$target" "$target.$tag" 2>/dev/null
+
+ if ! ln -s "$source" "$target"; then
+ return 1
+ fi
+
+ return 0
+}
+
+# If we weren't passed 3 arguments, exit now.
+[[ $# -lt 3 ]] && exit 254
+
+# Extract the brand directory name from the path.
+branddir=$(dirname "$0")
+zonename="$1"
+zoneroot="$2"
+install_src="3"
+install_root="$zoneroot/root"
+ZPOOL=`df $ZONEROOT | awk -F '[()]' '{split($2, field, "/"); print field[1]; }'`
+if [ -z "$ZPOOL" ]; then
+ ROOTDEV="none"
+else
+ ROOTDEV="/dev/$ZPOOL"
+fi
+
+if [[ ! -f "$install_src" ]]; then
+ echo "$install_src: file not found\n"
+ exit 254
+fi
+
+if [[ ! -d "$install_root" ]]; then
+ if ! mkdir -p "$install_root" 2>/dev/null; then
+ echo "Could not create install directory $install_root"
+ exit 254
+ fi
+fi
+
+if ! ( cd "$install_root" && gtar -xzf "$install_src" ) ; then
+ echo "Error: extraction from tar archive failed"
+ exit 255
+fi
+
+tag="lxsave_$(date +%m.%d.%Y@%T)"
+
+if [[ ! -d "$install_root" ]]; then
+ exit 255
+fi
+
+cd "$install_root"
+
+makedir native/dev
+makedir native/etc/default
+makedir native/etc/svc/volatile
+makedir native/lib
+makedir native/proc
+makedir native/tmp 1777
+makedir native/usr
+makedir native/var
+
+makedir mnt
+makedir opt
+makedir usr/local/bin
+makedir usr/local/include
+makedir usr/local/lib
+makedir usr/local/sbin
+makedir usr/local/share
+makedir usr/local/src
+
+makedir dev 0755
+makedir tmp 1777
+makedir proc 0555
+makedir boot 0755
+
+symlink /bin/sh sbin/sh
+symlink /bin/su usr/bin/su
+symlink /native/usr/lib/ld.so.1 usr/lib/ld.so.1
+
+libpam_so="$(echo lib/libpam.so.0.*)"
+libpam_misc="$(echo lib/libpam_misc.so.0.*)"
+libpamc_so="$(echo lib/libpamc.so.0.*)"
+
+symlink "/$libpam_so" lib/libpam.so.0
+symlink "/$libpam_misc" lib/libpam_misc.so.0
+symlink "/$libpamc_so" lib/libpamc.so.0
+
+makedir var/ld
+
+if ! crle -c var/ld/ld.config -l /native/lib:/native/usr/lib \
+ -s /native/lib/secure:/native/usr/lib/secure; then
+ exit 255
+fi
+
+mv -f etc/fstab etc/fstab.$tag 2>/dev/null
+
+cat > etc/fstab <<- EOF
+ $ROOTDEV / zfs defaults 1 1
+ proc /proc proc defaults 0 0
+EOF
+
+if [[ $? -ne 0 ]]; then
+ exit 255
+fi
+
+if [[ ! -e "$install_root/etc/hosts" ]]; then
+ cat > "$install_root/etc/hosts" <<-_EOF_
+ 127.0.0.1 localhost
+ _EOF_
+fi
+
+#
+# Perform distribution-specific changes.
+#
+distro=""
+if [[ -f etc/redhat-release ]]; then
+ distro="redhat"
+elif [[ -f etc/lsb-release ]]; then
+ if egrep -s Ubuntu etc/lsb-release; then
+ distro="ubuntu"
+ elif [[ -f etc/debian_version ]]; then
+ distro="debian"
+ fi
+elif [[ -f etc/debian_version ]]; then
+ distro="debian"
+fi
+
+if [[ -z $distro ]]; then
+ exit 255
+fi
+
+exit 0
diff --git a/usr/src/lib/brand/lx/zone/platform.xml b/usr/src/lib/brand/lx/zone/platform.xml
new file mode 100644
index 0000000000..ccb6ab3ba1
--- /dev/null
+++ b/usr/src/lib/brand/lx/zone/platform.xml
@@ -0,0 +1,159 @@
+<?xml version="1.0"?>
+
+<!--
+ CDDL HEADER START
+
+ The contents of this file are subject to the terms of the
+ Common Development and Distribution License (the "License").
+ You may not use this file except in compliance with the License.
+
+ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ or http://www.opensolaris.org/os/licensing.
+ See the License for the specific language governing permissions
+ and limitations under the License.
+
+ When distributing Covered Code, include this CDDL HEADER in each
+ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ If applicable, add the following below this CDDL HEADER, with the
+ fields enclosed by brackets "[]" replaced with your own identifying
+ information: Portions Copyright [yyyy] [name of copyright owner]
+
+ CDDL HEADER END
+
+ Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ Use is subject to license terms.
+ Copyright 2016 Joyent, Inc.
+
+ DO NOT EDIT THIS FILE.
+-->
+
+<!DOCTYPE platform PUBLIC "-//Sun Microsystems Inc//Zones Platform//EN"
+ "file:///usr/share/lib/xml/dtd/zone_platform.dtd.1">
+
+<platform name="lx" allow-exclusive-ip="true">
+ <!-- Global filesystems to mount when booting the zone -->
+ <global_mount special="/dev" directory="/native/dev" type="dev"
+ opt="attrdir=%R/dev" />
+
+ <!--
+ Local filesystems to mount when booting the zone. The /native/sbin
+ entry is needed so that we can replace ifconfig with its native
+ counterpart (this is required for exclusive-stack zones to work).
+ We also need dladm from /native/sbin.
+ -->
+ <global_mount special="/lib" directory="/native/lib"
+ opt="ro" type="lofs" />
+ <global_mount special="/usr" directory="/native/usr"
+ opt="ro" type="lofs" />
+ <global_mount special="/usr/lib/brand/lx/etc_default_nfs"
+ directory="/native/etc/default/nfs" type="lofs" opt="ro" />
+ <global_mount special="/etc/default/dhcpagent"
+ directory="/native/etc/default/dhcpagent" type="lofs" opt="ro" />
+ <global_mount special="/etc/netconfig"
+ directory="/native/etc/netconfig" type="lofs" opt="ro" />
+ <global_mount special="/sbin"
+ directory="/native/sbin" opt="ro,nodevices" type="lofs" />
+ <global_mount special="/usr/lib/brand/lx/ld" directory="/var/ld"
+ opt="ro" type="lofs" />
+ <global_mount special="/etc/zones/%z.xml"
+ directory="/native/etc/zones/%z.xml" opt="ro" type="lofs" />
+
+ <!-- Local filesystems to mount when booting the zone -->
+ <mount special="/native/dev" directory="/dev" type="lx_devfs" />
+ <mount special="proc" directory="/native/proc" type="proc" />
+ <mount special="swap" directory="/native/etc/svc/volatile"
+ type="tmpfs" />
+ <mount special="swap" directory="/native/tmp" type="tmpfs" />
+ <mount special="objfs" directory="/system/object" type="objfs" />
+ <mount special="ctfs" directory="/system/contract" type="ctfs" />
+ <mount special="mnttab" directory="/etc/mnttab" type="mntfs" />
+ <mount special="lxproc" directory="/proc" type="lx_proc" />
+
+ <!-- Devices to create under /dev -->
+ <device match="arp" />
+ <device match="dtrace/*" />
+ <device match="dtrace/provider/*" />
+ <device match="eventfd" />
+ <device match="inotify" />
+ <device match="ipnet" />
+ <device match="kstat" />
+ <device match="lo0" />
+ <device match="null" />
+ <device match="poll" />
+ <device match="pts/*" />
+ <device match="random" />
+ <device match="signalfd" />
+ <device match="tcp" />
+ <device match="tcp6" />
+ <device match="timerfd" />
+ <device match="tty" />
+ <device match="udp" />
+ <device match="udp6" />
+ <device match="urandom" />
+ <device match="zero" />
+ <device match="zfs" />
+ <device match="zvol/dsk/%P/%z/*" />
+ <device match="zvol/rdsk/%P/%z/*" />
+
+ <!-- Devices to create in exclusive IP zone only -->
+ <device match="dld" ip-type="exclusive" />
+ <device match="icmp" ip-type="exclusive" />
+ <device match="icmp6" ip-type="exclusive" />
+ <device match="ip" ip-type="exclusive" />
+ <device match="ip6" ip-type="exclusive" />
+ <device match="ipauth" ip-type="exclusive" />
+ <device match="ipf" ip-type="exclusive" />
+ <device match="ipl" ip-type="exclusive" />
+ <device match="iplookup" ip-type="exclusive" />
+ <device match="ipmpstub" ip-type="exclusive" />
+ <device match="ipnat" ip-type="exclusive" />
+ <device match="ipscan" ip-type="exclusive" />
+ <device match="ipsecah" ip-type="exclusive" />
+ <device match="ipsecesp" ip-type="exclusive" />
+ <device match="ipstate" ip-type="exclusive" />
+ <device match="ipsync" ip-type="exclusive" />
+ <device match="keysock" ip-type="exclusive" />
+ <device match="net/*" ip-type="exclusive" />
+ <device match="rawip" ip-type="exclusive" />
+ <device match="rawip6" ip-type="exclusive" />
+ <device match="rts" ip-type="exclusive" />
+ <device match="sad/admin" ip-type="exclusive" />
+ <device match="sctp" ip-type="exclusive" />
+ <device match="sctp6" ip-type="exclusive" />
+ <device match="spdsock" ip-type="exclusive" />
+ <device match="sppp" ip-type="exclusive" />
+ <device match="sppptun" ip-type="exclusive" />
+ <device match="vni" ip-type="exclusive" />
+
+ <!-- Renamed devices to create under /dev -->
+ <device match="brand/lx/autofs" name="autofs" />
+ <device match="brand/lx/ptmx" name="ptmx" />
+ <device match="zcons/%z/zoneconsole" name="console" />
+ <device match="zfd/%z/slave/0" name="zfd/0" />
+ <device match="zfd/%z/slave/1" name="zfd/1" />
+ <device match="zfd/%z/slave/2" name="zfd/2" />
+ <device match="zfd/%z/slave/3" name="zfd/3" />
+ <device match="zfd/%z/slave/4" name="zfd/4" />
+
+ <!-- Audio devices to create under /dev -->
+ <device match="brand/lx/dsp" name="dsp" />
+ <device match="brand/lx/mixer" name="mixer" />
+
+ <!-- Symlinks to create under /dev -->
+ <symlink source="fd" target="../proc/self/fd" />
+ <symlink source="stderr" target="../proc/self/fd/2" />
+ <symlink source="stdin" target="../proc/self/fd/0" />
+ <symlink source="stdout" target="../proc/self/fd/1" />
+ <symlink source="systty" target="console" />
+ <symlink source="kmsg" target="console" />
+ <symlink source="conslog" target="console" />
+
+ <!-- Create a mount point for the /dev/initctl fifo -->
+ <device match="null" name="initctl" />
+ <!-- Create a mount point for /dev/shm tmpfs (see shm_overview(7)) -->
+ <!-- We need to force a dir for the Linux mount to work ok -->
+ <symlink source="shm/loop" target="." />
+ <!-- Create a dummy /dev/xconsole -->
+ <device match="null" name="xconsole" />
+
+</platform>
diff --git a/usr/src/lib/brand/shared/zone/common.ksh b/usr/src/lib/brand/shared/zone/common.ksh
index 52441d814a..0f87686414 100644
--- a/usr/src/lib/brand/shared/zone/common.ksh
+++ b/usr/src/lib/brand/shared/zone/common.ksh
@@ -19,6 +19,7 @@
# CDDL HEADER END
#
# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2014, Joyent, Inc. All rights reserved.
#
#
@@ -96,9 +97,27 @@ vlog()
safe_dir()
{
typeset dir="$1"
+ typeset pwd_dir=""
- if [[ -h $ZONEROOT/$dir || ! -d $ZONEROOT/$dir ]]; then
- fatal "$e_baddir" "$dir"
+ if [[ -d $ZONEROOT/$dir ]]; then
+ if [[ -h $ZONEROOT/$dir ]]; then
+ #
+ # When dir is a symlink to a directory, we 'cd' to that
+ # directory to ensure that's under $ZONEROOT. We use pwd
+ # from /usr/bin instead of built-in because they give
+ # different results.
+ #
+ pwd_dir=$(cd $ZONEROOT/$dir && /usr/bin/pwd)
+ if [[ $pwd_dir =~ "^$ZONEROOT" ]]; then
+ return;
+ else
+ fatal \
+ "$e_baddir: symlink out of zoneroot" "$dir"
+ fi
+ else
+ # it's a dir and not a symlink, so that's ok.
+ return
+ fi
fi
}
@@ -109,9 +128,7 @@ safe_opt_dir()
[[ ! -e $ZONEROOT/$dir ]] && return
- if [[ -h $ZONEROOT/$dir || ! -d $ZONEROOT/$dir ]]; then
- fatal "$e_baddir" "$dir"
- fi
+ safe_dir $dir
}
# Only make a copy if we haven't already done so.
@@ -187,7 +204,7 @@ safe_replace()
fi
cat <<-END >$filename || exit 1
- #!/bin/sh -p
+ #!/bin/sh
#
# Solaris Brand Replacement
#
@@ -333,10 +350,12 @@ post_unpack()
#
# Check if the image was created with a valid libc.so.1.
#
- hwcap=`moe -v -32 $ZONEROOT/lib/libc.so.1 2>&1`
- if (( $? != 0 )); then
- vlog "$f_hwcap_info" "$hwcap"
- fail_fatal "$f_sanity_hwcap"
+ if [[ -f $ZONEROOT/lib/libc.so.1 ]]; then
+ hwcap=`moe -v -32 $ZONEROOT/lib/libc.so.1 2>&1`
+ if (( $? != 0 )); then
+ vlog "$f_hwcap_info" "$hwcap"
+ fail_fatal "$f_sanity_hwcap"
+ fi
fi
( cd "$ZONEROOT" && \
@@ -1003,41 +1022,36 @@ install_image()
return 0
}
-# Setup i18n output
-TEXTDOMAIN="SUNW_OST_OSCMD"
-export TEXTDOMAIN
-
-e_cannot_wrap=$(gettext "%s: error: wrapper file already exists")
-e_baddir=$(gettext "Invalid '%s' directory within the zone")
-e_badfile=$(gettext "Invalid '%s' file within the zone")
-e_path_abs=$(gettext "Pathname specified to -a '%s' must be absolute.")
-e_not_found=$(gettext "%s: error: file or directory not found.")
-e_install_abort=$(gettext "Installation aborted.")
-e_not_readable=$(gettext "Cannot read directory '%s'")
-e_not_dir=$(gettext "Error: must be a directory")
-e_unknown_archive=$(gettext "Error: Unknown archive format. Must be a flash archive, a cpio archive (can also be gzipped or bzipped), a pax XUSTAR archive, or a level 0 ufsdump archive.")
-e_absolute_archive=$(gettext "Error: archive contains absolute paths instead of relative paths.")
-e_mismatch_archive=$(gettext "Error: the archive top-level directory (%s) does not match the zonepath (%s).")
-e_tmpfile=$(gettext "Unable to create temporary file")
-e_root_full=$(gettext "Zonepath root %s exists and contains data; remove or move aside prior to install.")
-f_mkdir=$(gettext "Unable to create directory %s.")
-f_chmod=$(gettext "Unable to chmod directory %s.")
-f_chown=$(gettext "Unable to chown directory %s.")
-f_hwcap_info=$(gettext "HWCAP: %s\n")
-f_sanity_hwcap=$(gettext \
-"The image was created with an incompatible libc.so.1 hwcap lofs mount.\n"\
+e_cannot_wrap="%s: error: wrapper file already exists"
+e_baddir="Invalid '%s' directory within the zone"
+e_badfile="Invalid '%s' file within the zone"
+e_path_abs="Pathname specified to -a '%s' must be absolute."
+e_not_found="%s: error: file or directory not found."
+e_install_abort="Installation aborted."
+e_not_readable="Cannot read directory '%s'"
+e_not_dir="Error: must be a directory"
+e_unknown_archive="Error: Unknown archive format. Must be a flash archive, a cpio archive (can also be gzipped or bzipped), a pax XUSTAR archive, or a level 0 ufsdump archive."
+e_absolute_archive="Error: archive contains absolute paths instead of relative paths."
+e_mismatch_archive="Error: the archive top-level directory (%s) does not match the zonepath (%s)."
+e_tmpfile="Unable to create temporary file"
+e_root_full="Zonepath root %s exists and contains data; remove or move aside prior to install."
+f_mkdir="Unable to create directory %s."
+f_chmod="Unable to chmod directory %s."
+f_chown="Unable to chown directory %s."
+f_hwcap_info="HWCAP: %s\n"
+f_sanity_hwcap="The image was created with an incompatible libc.so.1 hwcap lofs mount.\n"\
" The zone will not boot on this platform. See the zone's\n"\
-" documentation for the recommended way to create the archive.")
+" documentation for the recommended way to create the archive."
-m_analyse_archive=$(gettext "Analysing the archive")
+m_analyse_archive="Analysing the archive"
-not_readable=$(gettext "Cannot read file '%s'")
-not_flar=$(gettext "Input is not a flash archive")
-bad_flar=$(gettext "Flash archive is a corrupt")
-bad_zfs_flar=$(gettext "Flash archive contains a ZFS send stream.\n\tRecreate the flar using the -L option with cpio or pax.")
-f_unpack_failed=$(gettext "Unpacking the archive failed")
-unknown_archiver=$(gettext "Archiver %s is not supported")
-cmd_not_exec=$(gettext "Required command '%s' not executable!")
+not_readable="Cannot read file '%s'"
+not_flar="Input is not a flash archive"
+bad_flar="Flash archive is a corrupt"
+bad_zfs_flar="Flash archive contains a ZFS send stream.\n\tRecreate the flar using the -L option with cpio or pax."
+f_unpack_failed="Unpacking the archive failed"
+unknown_archiver="Archiver %s is not supported"
+cmd_not_exec="Required command '%s' not executable!"
#
# Exit values used by the script, as #defined in <sys/zone.h>
diff --git a/usr/src/lib/brand/shared/zone/uninstall.ksh b/usr/src/lib/brand/shared/zone/uninstall.ksh
index 468a7ed92f..923363c864 100644
--- a/usr/src/lib/brand/shared/zone/uninstall.ksh
+++ b/usr/src/lib/brand/shared/zone/uninstall.ksh
@@ -35,31 +35,31 @@ bname=`basename $0`
#
# error messages
#
-m_usage=$(gettext "Usage: %s: [-hFn]")
-
-m_1_zfs_promote=$(gettext "promoting '%s'.")
-m_1_zfs_destroy=$(gettext "destroying '%s'.")
-m_2_zfs_rename=$(gettext "renaming '%s' to '%s'.")
-m_3_zfs_set=$(gettext "setting property %s='%s' for '%s'.")
-m_rm_r=$(gettext "recursively deleting '%s'.")
-m_rm=$(gettext "deleting '%s'.")
-
-w_no_ds=$(gettext "Warning: no zonepath dataset found.")
-
-f_usage_err=$(gettext "Error: invalid usage")
-f_abort=$(gettext "Error: internal error detected, aborting.")
-f_1_zfs_promote=$(gettext "Error: promoting ZFS dataset '%s'.")
-f_2_zfs_rename=$(gettext "Error: renaming ZFS dataset '%s' to '%s'.")
-f_3_zfs_set=$(gettext "Error: setting ZFS propery %s='%s' for '%s'.")
-f_1_zfs_destroy=$(gettext "Error: destroying ZFS dataset.")
-f_2_zfs_get=$(gettext "Error: reading ZFS dataset property '%s' from '%s'.")
-f_user_snap=$(gettext "Error: user snapshot(s) detected.")
-f_stray_snap=$(gettext "Error: uncloned snapshot(s) detected.")
-f_stray_clone=$(gettext "Error: cloned zone datasets found outsize of zone.")
-f_rm_snap=$(gettext "Error: please delete snapshot(s) and retry uninstall.")
-f_rm_clone=$(gettext "Error: please delete clone(s) and retry uninstall.")
-f_iu_clone=$(gettext "Error: cloned zone dataset(s) in use.")
-f_dis_clone=$(gettext "Error: please stop using clone(s) and retry uninstall.")
+m_usage="Usage: %s: [-hFn]"
+
+m_1_zfs_promote="promoting '%s'."
+m_1_zfs_destroy="destroying '%s'."
+m_2_zfs_rename="renaming '%s' to '%s'."
+m_3_zfs_set="setting property %s='%s' for '%s'."
+m_rm_r="recursively deleting '%s'."
+m_rm="deleting '%s'."
+
+w_no_ds="Warning: no zonepath dataset found."
+
+f_usage_err="Error: invalid usage"
+f_abort="Error: internal error detected, aborting."
+f_1_zfs_promote="Error: promoting ZFS dataset '%s'."
+f_2_zfs_rename="Error: renaming ZFS dataset '%s' to '%s'."
+f_3_zfs_set="Error: setting ZFS propery %s='%s' for '%s'."
+f_1_zfs_destroy="Error: destroying ZFS dataset."
+f_2_zfs_get="Error: reading ZFS dataset property '%s' from '%s'."
+f_user_snap="Error: user snapshot(s) detected."
+f_stray_snap="Error: uncloned snapshot(s) detected."
+f_stray_clone="Error: cloned zone datasets found outsize of zone."
+f_rm_snap="Error: please delete snapshot(s) and retry uninstall."
+f_rm_clone="Error: please delete clone(s) and retry uninstall."
+f_iu_clone="Error: cloned zone dataset(s) in use."
+f_dis_clone="Error: please stop using clone(s) and retry uninstall."
#
# functions
diff --git a/usr/src/lib/common/i386/crtn.s b/usr/src/lib/common/i386/crtn.s
index 6e37e25a8f..413c102006 100644
--- a/usr/src/lib/common/i386/crtn.s
+++ b/usr/src/lib/common/i386/crtn.s
@@ -33,7 +33,6 @@
*
* For further details - see bug#4433015
*/
- .ident "%Z%%M% %I% %E% SMI"
.file "crtn.s"
/*
diff --git a/usr/src/lib/commpage/Makefile.shared.com b/usr/src/lib/commpage/Makefile.shared.com
new file mode 100644
index 0000000000..29cd826706
--- /dev/null
+++ b/usr/src/lib/commpage/Makefile.shared.com
@@ -0,0 +1,27 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2016 Joyent, Inc.
+#
+
+#
+# This Makefile is shared between both libc and other consumers
+#
+
+COMMPAGE_OBJS = \
+ cp_subr.o \
+ cp_main.o
+
+pics/cp_main.o := CPPFLAGS += -I$(SRC)/uts/i86pc
+pics/cp_subr.o := ASFLAGS += -I$(SRC)/uts/i86pc
+
+COMMPAGE_CPPFLAGS = -I$(SRC)/lib/commpage/common
diff --git a/usr/src/lib/commpage/Makefile.shared.targ b/usr/src/lib/commpage/Makefile.shared.targ
new file mode 100644
index 0000000000..667634cafa
--- /dev/null
+++ b/usr/src/lib/commpage/Makefile.shared.targ
@@ -0,0 +1,26 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2016 Joyent, Inc.
+#
+
+#
+# This Makefile is shared between both libc and other consumers
+#
+
+pics/%.o: $(SRC)/lib/commpage/common/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
+
+pics/%.o: $(SRC)/lib/commpage/$(TARGET_ARCH)/%.s
+ $(COMPILE.s) -o $@ $<
+ $(POST_PROCESS_O)
diff --git a/usr/src/lib/commpage/amd64/cp_subr.s b/usr/src/lib/commpage/amd64/cp_subr.s
new file mode 100644
index 0000000000..ceb946acf0
--- /dev/null
+++ b/usr/src/lib/commpage/amd64/cp_subr.s
@@ -0,0 +1,140 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <sys/asm_linkage.h>
+#include <sys/segments.h>
+#include <sys/time_impl.h>
+#include <sys/tsc.h>
+
+#define GETCPU_GDT_OFFSET SEL_GDT(GDT_CPUID, SEL_UPL)
+
+ .file "cp_subr.s"
+
+/*
+ * hrtime_t
+ * __cp_tsc_read(uint_t cp_tsc_type)
+ */
+ ENTRY_NP(__cp_tsc_read)
+ cmpl $TSC_TSCP, %edi
+ je 1f
+ cmpl $TSC_RDTSC_MFENCE, %edi
+ je 2f
+ cmpl $TSC_RDTSC_LFENCE, %edi
+ je 3f
+ cmpl $TSC_RDTSC_CPUID, %edi
+ je 4f
+ ud2a /* abort with SIGILL */
+1:
+ rdtscp
+ jmp 5f
+2:
+ mfence
+ rdtsc
+ jmp 5f
+3:
+ lfence
+ rdtsc
+ jmp 5f
+4:
+ movq %rbx, %r11
+ xorl %eax, %eax
+ cpuid
+ rdtsc
+ movq %r11, %rbx
+5:
+ shlq $0x20, %rdx
+ orq %rdx, %rax
+ ret
+ SET_SIZE(__cp_tsc_read)
+
+
+/*
+ * hrtime_t
+ * __cp_tsc_readcpu(uint_t cp_tsc_type, uint_t *cpu_id)
+ */
+ ENTRY_NP(__cp_tsc_readcpu)
+ /*
+ * Both time and cpu_id can be queried quickly (using few registers) on
+ * systems which support RDTSCP. On each cpu, the cpu_id is stored in
+ * the TSC_AUX MSR by the kernel.
+ */
+ cmpl $TSC_TSCP, %edi
+ jne 1f
+ rdtscp
+ movl %ecx, (%rsi)
+ shlq $0x20, %rdx
+ orq %rdx, %rax
+ ret
+1:
+ mov $GETCPU_GDT_OFFSET, %eax
+ lsl %ax, %eax
+ movq %rax, %r11
+ cmpl $TSC_RDTSC_MFENCE, %edi
+ je 2f
+ cmpl $TSC_RDTSC_LFENCE, %edi
+ je 3f
+ cmpl $TSC_RDTSC_CPUID, %edi
+ je 4f
+ ud2a /* abort with SIGILL */
+2:
+ mfence
+ rdtsc
+ jmp 5f
+3:
+ lfence
+ rdtsc
+ jmp 5f
+4:
+ movq %rbx, %r10
+ xorl %eax, %eax
+ cpuid
+ rdtsc
+ movq %r10, %rbx
+5:
+ shlq %rdx
+ orq %rax, %rdx
+ /*
+ * With a TSC reading in-hand, confirm that the thread has not migrated
+ * since the cpu_id was first checked.
+ */
+ mov $GETCPU_GDT_OFFSET, %eax
+ lsl %ax, %eax
+ cmpq %rax, %r11
+ jne 1b
+ movl %eax, (%rsi)
+ movq %rdx, %rax
+ ret
+ SET_SIZE(__cp_tsc_readcpu)
+
+
+/*
+ * uint_t
+ * __cp_do_getcpu(uint_t cp_tsc_type)
+ */
+ ENTRY_NP(__cp_do_getcpu)
+ /*
+ * If RDTSCP is available, it is a quick way to grab the cpu_id which
+ * is stored in the TSC_AUX MSR by the kernel.
+ */
+ cmpl $TSC_TSCP, %edi
+ jne 1f
+ rdtscp
+ movl %ecx, %eax
+ ret
+1:
+ mov $GETCPU_GDT_OFFSET, %eax
+ lsl %ax, %eax
+ ret
+ SET_SIZE(__cp_do_getcpu)
diff --git a/usr/src/lib/commpage/common/cp_defs.h b/usr/src/lib/commpage/common/cp_defs.h
new file mode 100644
index 0000000000..e2a6cbca45
--- /dev/null
+++ b/usr/src/lib/commpage/common/cp_defs.h
@@ -0,0 +1,36 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#ifndef _CP_DEFS_H_
+#define _CP_DEFS_H_
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct comm_page;
+typedef struct comm_page comm_page_t;
+
+extern uint_t __cp_can_gettime(comm_page_t *);
+extern hrtime_t __cp_gethrtime(comm_page_t *);
+extern void __cp_clock_gettime_realtime(comm_page_t *, timespec_t *);
+extern uint_t __cp_getcpu(comm_page_t *cp);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* _CP_DEFS_H_ */
diff --git a/usr/src/lib/commpage/common/cp_main.c b/usr/src/lib/commpage/common/cp_main.c
new file mode 100644
index 0000000000..8386767695
--- /dev/null
+++ b/usr/src/lib/commpage/common/cp_main.c
@@ -0,0 +1,185 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+
+#include <sys/comm_page.h>
+#include <sys/tsc.h>
+
+/*
+ * ASM-defined functions.
+ */
+extern hrtime_t __cp_tsc_read(uint_t);
+extern hrtime_t __cp_tsc_readcpu(uint_t, uint_t *);
+extern uint_t __cp_do_getcpu(uint_t);
+
+/*
+ * These are cloned from TSC and time related code in the kernel. The should
+ * be kept in sync in the case that the source values are changed.
+ */
+#define NSEC_SHIFT 5
+#define ADJ_SHIFT 4
+#define NANOSEC 1000000000LL
+
+#define TSC_CONVERT_AND_ADD(tsc, hrt, scale) do { \
+ uint32_t *_l = (uint32_t *)&(tsc); \
+ uint64_t sc = (uint32_t)(scale); \
+ (hrt) += (uint64_t)(_l[1] * sc) << NSEC_SHIFT; \
+ (hrt) += (uint64_t)(_l[0] * sc) >> (32 - NSEC_SHIFT); \
+} while (0)
+
+
+/*
+ * Userspace version of tsc_gethrtime.
+ * See: uts/i86pc/os/timestamp.c
+ */
+hrtime_t
+__cp_gethrtime(comm_page_t *cp)
+{
+ uint32_t old_hres_lock;
+ hrtime_t tsc, hrt, tsc_last;
+
+ /*
+ * Several precautions must be taken when collecting the data necessary
+ * to perform an accurate gethrtime calculation.
+ *
+ * While much of the TSC state stored in the comm page is unchanging
+ * after boot, portions of it are periodically updated during OS ticks.
+ * Changes to hres_lock during the course of the copy indicates a
+ * potentially inconsistent snapshot, necessitating a loop.
+ *
+ * Even more complicated is the handling for TSCs which require sync
+ * offsets between different CPUs. Since userspace lacks the luxury of
+ * disabling interrupts, a validation loop checking for CPU migrations
+ * is used. Pathological scheduling could, in theory, "outwit"
+ * this check. Such a possibility is considered an acceptable risk.
+ *
+ */
+ if (cp->cp_tsc_ncpu == 0) {
+ /*
+ * No per-CPU offset data, use the simple hres_lock loop.
+ */
+ do {
+ old_hres_lock = cp->cp_hres_lock;
+ tsc_last = cp->cp_tsc_last;
+ hrt = cp->cp_tsc_hrtime_base;
+ tsc = __cp_tsc_read(cp->cp_tsc_type);
+ } while ((old_hres_lock & ~1) != cp->cp_hres_lock);
+ } else {
+ /*
+ * Per-CPU offset data is needed for an accurate TSC reading.
+ */
+ do {
+ uint_t cpu_id;
+
+ old_hres_lock = cp->cp_hres_lock;
+ tsc_last = cp->cp_tsc_last;
+ hrt = cp->cp_tsc_hrtime_base;
+ /*
+ * When collecting the TSC and cpu_id, cp_tsc_readcpu
+ * will accurately detect CPU migrations in all but
+ * the most pathological scheduling conditions.
+ */
+ tsc = __cp_tsc_readcpu(cp->cp_tsc_type, &cpu_id);
+ tsc += cp->cp_tsc_sync_tick_delta[cpu_id];
+ } while ((old_hres_lock & ~1) != cp->cp_hres_lock);
+ }
+
+ if (tsc >= tsc_last) {
+ tsc -= tsc_last;
+ } else if (tsc >= tsc_last - (2 * cp->cp_tsc_max_delta)) {
+ tsc = 0;
+ } else if (tsc > cp->cp_tsc_resume_cap) {
+ tsc = cp->cp_tsc_resume_cap;
+ }
+ TSC_CONVERT_AND_ADD(tsc, hrt, cp->cp_nsec_scale);
+
+ return (hrt);
+}
+
+/*
+ * Userspace version of pc_gethrestime.
+ * See: uts/i86pc/os/machdep.c
+ */
+void
+__cp_clock_gettime_realtime(comm_page_t *cp, timespec_t *tp)
+{
+ int lock_prev, nslt, adj;
+ timespec_t now;
+ int64_t hres_adj;
+
+loop:
+ lock_prev = cp->cp_hres_lock;
+ now.tv_sec = cp->cp_hrestime[0];
+ now.tv_nsec = cp->cp_hrestime[1];
+ nslt = (int)(__cp_gethrtime(cp) - cp->cp_hres_last_tick);
+ hres_adj = cp->cp_hrestime_adj;
+ if (nslt < 0) {
+ /*
+ * Tick came between sampling hrtime and hres_last_tick;
+ */
+ goto loop;
+ }
+ now.tv_nsec += nslt;
+ if (hres_adj != 0) {
+ if (hres_adj > 0) {
+ adj = (nslt >> ADJ_SHIFT);
+ if (adj > hres_adj)
+ adj = (int)hres_adj;
+ } else {
+ adj = -(nslt >> ADJ_SHIFT);
+ if (adj < hres_adj)
+ adj = (int)hres_adj;
+ }
+ now.tv_nsec += adj;
+ }
+ while ((unsigned long)now.tv_nsec >= NANOSEC) {
+ /*
+ * Rope in tv_nsec from any excessive adjustments.
+ */
+ now.tv_nsec -= NANOSEC;
+ now.tv_sec++;
+ }
+ if ((cp->cp_hres_lock & ~1) != lock_prev)
+ goto loop;
+
+ *tp = now;
+}
+
+/*
+ * Interrogate if querying the clock via the comm page is possible.
+ */
+int
+__cp_can_gettime(comm_page_t *cp)
+{
+ switch (cp->cp_tsc_type) {
+ case TSC_TSCP:
+ case TSC_RDTSC_MFENCE:
+ case TSC_RDTSC_LFENCE:
+ case TSC_RDTSC_CPUID:
+ return (0);
+ default:
+ break;
+ }
+ return (1);
+}
+
+/*
+ * Query which CPU this LWP is running on.
+ */
+uint_t
+__cp_getcpu(comm_page_t *cp)
+{
+ return (__cp_do_getcpu(cp->cp_tsc_type));
+}
diff --git a/usr/src/lib/commpage/i386/cp_subr.s b/usr/src/lib/commpage/i386/cp_subr.s
new file mode 100644
index 0000000000..3dda195c12
--- /dev/null
+++ b/usr/src/lib/commpage/i386/cp_subr.s
@@ -0,0 +1,153 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <sys/asm_linkage.h>
+#include <sys/segments.h>
+#include <sys/time_impl.h>
+#include <sys/tsc.h>
+
+#define GETCPU_GDT_OFFSET SEL_GDT(GDT_CPUID, SEL_UPL)
+
+ .file "cp_subr.s"
+
+/*
+ * hrtime_t
+ * __cp_tsc_read(uint_t cp_tsc_type)
+ */
+ ENTRY_NP(__cp_tsc_read)
+ movl 4(%esp), %eax
+ cmpl $TSC_TSCP, %eax
+ je 1f
+ cmpl $TSC_RDTSC_MFENCE, %eax
+ je 2f
+ cmpl $TSC_RDTSC_LFENCE, %eax
+ je 3f
+ cmpl $TSC_RDTSC_CPUID, %eax
+ je 4f
+ ud2a /* abort with SIGILL */
+1:
+ rdtscp
+ ret
+2:
+ mfence
+ rdtsc
+ ret
+3:
+ lfence
+ rdtsc
+ ret
+4:
+ pushl %ebx
+ xorl %eax, %eax
+ cpuid
+ rdtsc
+ popl %ebx
+ ret
+ SET_SIZE(__cp_tsc_read)
+
+/*
+ * hrtime_t
+ * __cp_tsc_readcpu(uint_t cp_tsc_type, uint_t *cpu_id)
+ */
+ ENTRY_NP(__cp_tsc_readcpu)
+ /*
+ * Both time and cpu_id can be queried quickly (using few registers) on
+ * systems which support RDTSCP. On each cpu, the cpu_id is stored in
+ * the TSC_AUX MSR by the kernel.
+ */
+ movl 4(%esp), %eax
+ cmpl $TSC_TSCP, %eax
+ jne 1f
+ rdtscp
+ pushl %eax
+ movl 0xc(%esp), %eax
+ movl %ecx, (%eax)
+ popl %eax
+ ret
+1:
+ /*
+ * Since the other methods of querying the TSC and cpu_id are
+ * vulnurable to CPU migrations, build a proper stack frame so a more
+ * complicated and thorough check and be performed.
+ */
+ pushl %ebp
+ movl %esp, %ebp
+ pushl %edi
+ pushl %esi
+ movl %eax, %edi
+2:
+ mov $GETCPU_GDT_OFFSET, %eax
+ lsl %ax, %eax
+ movl %eax, %esi
+ cmpl $TSC_RDTSC_MFENCE, %edi
+ je 3f
+ cmpl $TSC_RDTSC_LFENCE, %edi
+ je 4f
+ cmpl $TSC_RDTSC_CPUID, %edi
+ je 5f
+ ud2a /* abort with SIGILL */
+3:
+ mfence
+ rdtsc
+ jmp 6f
+4:
+ lfence
+ rdtsc
+ jmp 6f
+5:
+ pushl %ebx
+ xorl %eax, %eax
+ cpuid
+ rdtsc
+ popl %ebx
+6:
+ /*
+ * With a TSC reading in-hand, confirm that the thread has not migrated
+ * since the cpu_id was first checked.
+ */
+ pushl %eax
+ mov $GETCPU_GDT_OFFSET, %eax
+ lsl %ax, %eax
+ cmpl %eax, %esi
+ jne 2b
+ movl 0xc(%ebp), %edi
+ mov %eax, (%edi)
+ popl %eax
+ popl %esi
+ popl %edi
+ leave
+ ret
+ SET_SIZE(__cp_tsc_readcpu)
+
+/*
+ * uint_t
+ * __cp_do_getcpu(uint_t cp_tsc_type)
+ */
+ ENTRY_NP(__cp_do_getcpu)
+ /*
+ * If RDTSCP is available, it is a quick way to grab the cpu_id which
+ * is stored in the TSC_AUX MSR by the kernel.
+ */
+ movl 4(%esp), %eax
+ cmpl $TSC_TSCP, %eax
+ jne 1f
+ rdtscp
+ movl %ecx, %eax
+ ret
+1:
+ mov $GETCPU_GDT_OFFSET, %eax
+ lsl %ax, %eax
+ ret
+ SET_SIZE(__cp_do_getcpu)
diff --git a/usr/src/lib/fm/topo/libtopo/common/libtopo.h b/usr/src/lib/fm/topo/libtopo/common/libtopo.h
index 3ea35cdddd..e0adb6e0ab 100644
--- a/usr/src/lib/fm/topo/libtopo/common/libtopo.h
+++ b/usr/src/lib/fm/topo/libtopo/common/libtopo.h
@@ -22,6 +22,9 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
*/
+/*
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ */
#ifndef _LIBTOPO_H
#define _LIBTOPO_H
@@ -389,6 +392,8 @@ extern void topo_hdl_free(topo_hdl_t *, void *, size_t);
extern int topo_hdl_nvalloc(topo_hdl_t *, nvlist_t **, uint_t);
extern int topo_hdl_nvdup(topo_hdl_t *, nvlist_t *, nvlist_t **);
extern char *topo_hdl_strdup(topo_hdl_t *, const char *);
+extern char *topo_hdl_strsplit(topo_hdl_t *, const char *, const char *,
+ char **);
/*
* Interfaces for converting sensor/indicator types, units, states, etc to
diff --git a/usr/src/lib/fm/topo/libtopo/common/mapfile-vers b/usr/src/lib/fm/topo/libtopo/common/mapfile-vers
index b81f4fd7c6..c6ff800951 100644
--- a/usr/src/lib/fm/topo/libtopo/common/mapfile-vers
+++ b/usr/src/lib/fm/topo/libtopo/common/mapfile-vers
@@ -76,6 +76,7 @@ SYMBOL_VERSION SUNWprivate {
topo_hdl_prominfo;
topo_hdl_strdup;
topo_hdl_strfree;
+ topo_hdl_strsplit;
topo_hdl_zalloc;
topo_led_state_name;
topo_led_type_name;
@@ -118,6 +119,7 @@ SYMBOL_VERSION SUNWprivate {
topo_mod_str2nvl;
topo_mod_strdup;
topo_mod_strfree;
+ topo_mod_strsplit;
topo_mod_unload;
topo_mod_unregister;
topo_mod_walk_init;
diff --git a/usr/src/lib/fm/topo/libtopo/common/topo_mod.h b/usr/src/lib/fm/topo/libtopo/common/topo_mod.h
index 2a137ad388..e6dda440a0 100644
--- a/usr/src/lib/fm/topo/libtopo/common/topo_mod.h
+++ b/usr/src/lib/fm/topo/libtopo/common/topo_mod.h
@@ -22,6 +22,9 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
*/
+/*
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ */
#ifndef _TOPO_MOD_H
#define _TOPO_MOD_H
@@ -215,6 +218,8 @@ extern void *topo_mod_zalloc(topo_mod_t *, size_t);
extern void topo_mod_free(topo_mod_t *, void *, size_t);
extern char *topo_mod_strdup(topo_mod_t *, const char *);
extern void topo_mod_strfree(topo_mod_t *, char *);
+extern char *topo_mod_strsplit(topo_mod_t *, const char *, const char *,
+ char **);
extern int topo_mod_nvalloc(topo_mod_t *, nvlist_t **, uint_t);
extern int topo_mod_nvdup(topo_mod_t *, nvlist_t *, nvlist_t **);
diff --git a/usr/src/lib/fm/topo/libtopo/common/topo_string.c b/usr/src/lib/fm/topo/libtopo/common/topo_string.c
index 2c1f451770..2f59013e9e 100644
--- a/usr/src/lib/fm/topo/libtopo/common/topo_string.c
+++ b/usr/src/lib/fm/topo/libtopo/common/topo_string.c
@@ -23,8 +23,9 @@
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ */
#include <strings.h>
#include <ctype.h>
@@ -56,6 +57,58 @@ topo_hdl_strfree(topo_hdl_t *thp, char *s)
}
char *
+topo_hdl_strsplit(topo_hdl_t *hdl, const char *input, const char *sep,
+ char **lastp)
+{
+ size_t seplen = strlen(sep);
+ const char *scanstart;
+ char *token;
+ char *ret;
+
+ if (input != NULL) {
+ /*
+ * Start scanning at beginning of input:
+ */
+ scanstart = input;
+ } else if (*lastp == NULL) {
+ /*
+ * If we have already finished scanning, return NULL.
+ */
+ return (NULL);
+ } else {
+ /*
+ * Otherwise, start scanning where we left off:
+ */
+ scanstart = *lastp;
+ }
+
+ token = strstr(scanstart, sep);
+ if (token != NULL) {
+ /*
+ * We still have a separator, so advance the next-start
+ * pointer past it:
+ */
+ *lastp = token + seplen;
+ /*
+ * Copy out this element. The buffer must fit the string
+ * exactly, so that topo_hdl_strfree() can determine its
+ * size with strlen().
+ */
+ ret = topo_hdl_alloc(hdl, token - scanstart + 1);
+ (void) strncpy(ret, scanstart, token - scanstart);
+ ret[token - scanstart] = '\0';
+ } else {
+ /*
+ * We have no separator, so this is the last element:
+ */
+ *lastp = NULL;
+ ret = topo_hdl_strdup(hdl, scanstart);
+ }
+
+ return (ret);
+}
+
+char *
topo_mod_strdup(topo_mod_t *mod, const char *s)
{
return (topo_hdl_strdup(mod->tm_hdl, s));
@@ -67,6 +120,13 @@ topo_mod_strfree(topo_mod_t *mod, char *s)
topo_hdl_strfree(mod->tm_hdl, s);
}
+char *
+topo_mod_strsplit(topo_mod_t *mod, const char *input, const char *sep,
+ char **lastp)
+{
+ return (topo_hdl_strsplit(mod->tm_hdl, input, sep, lastp));
+}
+
const char *
topo_strbasename(const char *s)
{
diff --git a/usr/src/lib/fm/topo/maps/Joyent,Joyent-Compute-Platform-1101/Joyent-Compute-Platform-1101-disk-hc-topology.xmlgenksh b/usr/src/lib/fm/topo/maps/Joyent,Joyent-Compute-Platform-1101/Joyent-Compute-Platform-1101-disk-hc-topology.xmlgenksh
index 000742db71..874bad142f 100644..100755
--- a/usr/src/lib/fm/topo/maps/Joyent,Joyent-Compute-Platform-1101/Joyent-Compute-Platform-1101-disk-hc-topology.xmlgenksh
+++ b/usr/src/lib/fm/topo/maps/Joyent,Joyent-Compute-Platform-1101/Joyent-Compute-Platform-1101-disk-hc-topology.xmlgenksh
@@ -69,17 +69,19 @@ EOF
enclosure=1
bay=0
slot=0
-devctl='/devices/pci@0,0/pci8086,3c02@1/pci15d9,691@0:devctl'
+devctl0='/devices/pci@0,0/pci8086,3c02@1/pci15d9,691@0:devctl'
+devctl1='/devices/pci@0,0/pci8086,e02@1/pci15d9,691@0:devctl'
while (( slot <= 7 )); do
- do_node $bay "Front Disk $bay" "$devctl" $enclosure $slot
+ do_node $bay "Front Disk $bay" "$devctl0|$devctl1" $enclosure $slot
(( bay = bay + 1 ))
(( slot = slot + 1 ))
done
slot=0
-devctl='/devices/pci@0,0/pci8086,3c06@2,2/pci15d9,691@0:devctl'
+devctl0='/devices/pci@0,0/pci8086,3c06@2,2/pci15d9,691@0:devctl'
+devctl1='/devices/pci@0,0/pci8086,e06@2,2/pci15d9,691@0:devctl'
while (( slot <= 7 )); do
- do_node $bay "Front Disk $bay" "$devctl" $enclosure $slot
+ do_node $bay "Front Disk $bay" "$devctl0|$devctl1" $enclosure $slot
(( bay = bay + 1 ))
(( slot = slot + 1 ))
done
diff --git a/usr/src/lib/fm/topo/maps/i86pc/i86pc-legacy-hc-topology.xml b/usr/src/lib/fm/topo/maps/i86pc/i86pc-legacy-hc-topology.xml
index fda43bca6c..7d8f85fd88 100644
--- a/usr/src/lib/fm/topo/maps/i86pc/i86pc-legacy-hc-topology.xml
+++ b/usr/src/lib/fm/topo/maps/i86pc/i86pc-legacy-hc-topology.xml
@@ -2,7 +2,7 @@
<!DOCTYPE topology SYSTEM "/usr/share/lib/xml/dtd/topology.dtd.1">
<!--
Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
-Copyright (c) 2013, Joyent, Inc. All rights reserved.
+Copyright (c) 2014, Joyent, Inc. All rights reserved.
CDDL HEADER START
@@ -152,7 +152,7 @@ Copyright (c) 2013, Joyent, Inc. All rights reserved.
</range>
</set>
- <set type='product' setlist='Joyent-Compute-Platform-1101|Joyent-Compute-Platform-1102'>
+ <set type='product' setlist='Joyent-Compute-Platform-1101|Joyent-Compute-Platform-1102|Joyent-Compute-Platform-2101|Joyent-Compute-Platform-2102'>
<range name='psu' min='0' max='1'>
<enum-method name='ipmi' version='1' />
</range>
diff --git a/usr/src/lib/fm/topo/modules/common/disk/disk_mptsas.c b/usr/src/lib/fm/topo/modules/common/disk/disk_mptsas.c
index db853c6695..beae89f9f1 100644
--- a/usr/src/lib/fm/topo/modules/common/disk/disk_mptsas.c
+++ b/usr/src/lib/fm/topo/modules/common/disk/disk_mptsas.c
@@ -32,11 +32,18 @@
#include "disk.h"
#include "disk_drivers.h"
+/*
+ * Request the SAS address of the disk (if any) attached to this mpt_sas
+ * instance at (Enclosure Number, Slot Number). The function returns
+ * -1 on error and sets errno to ENOENT _only_ if the /devices node
+ * (*devctl) does not exist.
+ */
static int
get_sas_address(topo_mod_t *mod, char *devctl, uint32_t enclosure,
uint32_t slot, char **sas_address)
{
- int fd, err, i;
+ int ret = -1, en = ENXIO;
+ int fd, i;
mptsas_get_disk_info_t gdi;
mptsas_disk_info_t *di;
size_t disz;
@@ -44,16 +51,19 @@ get_sas_address(topo_mod_t *mod, char *devctl, uint32_t enclosure,
bzero(&gdi, sizeof (gdi));
if ((fd = open(devctl, O_RDWR)) == -1) {
+ en = errno;
topo_mod_dprintf(mod, "could not open '%s' for ioctl: %s\n",
devctl, strerror(errno));
+ errno = en;
return (-1);
}
if (ioctl(fd, MPTIOCTL_GET_DISK_INFO, &gdi) == -1) {
+ if (errno != ENOENT)
+ en = errno;
topo_mod_dprintf(mod, "ioctl 1 on '%s' failed: %s\n", devctl,
strerror(errno));
- (void) close(fd);
- return (-1);
+ goto out;
}
gdi.DiskInfoArraySize = disz = sizeof (mptsas_disk_info_t) *
@@ -61,19 +71,19 @@ get_sas_address(topo_mod_t *mod, char *devctl, uint32_t enclosure,
gdi.PtrDiskInfoArray = di = topo_mod_alloc(mod, disz);
if (di == NULL) {
topo_mod_dprintf(mod, "memory allocation failed\n");
- (void) close(fd);
- return (-1);
+ en = ENOMEM;
+ goto out;
}
if (ioctl(fd, MPTIOCTL_GET_DISK_INFO, &gdi) == -1) {
+ if (errno != ENOENT)
+ en = errno;
topo_mod_dprintf(mod, "ioctl 2 on '%s' failed: %s\n", devctl,
strerror(errno));
topo_mod_free(mod, di, disz);
- (void) close(fd);
- return (-1);
+ goto out;
}
- err = -1;
for (i = 0; i < gdi.DiskCount; i++) {
if (di[i].Enclosure == enclosure && di[i].Slot == slot) {
char sas[17]; /* 16 hex digits and NUL */
@@ -81,14 +91,16 @@ get_sas_address(topo_mod_t *mod, char *devctl, uint32_t enclosure,
topo_mod_dprintf(mod, "found mpt_sas disk (%d/%d) "
"with adddress %s\n", enclosure, slot, sas);
*sas_address = topo_mod_strdup(mod, sas);
- err = 0;
+ en = ret = 0;
break;
}
}
topo_mod_free(mod, di, disz);
+out:
(void) close(fd);
- return (err);
+ errno = en;
+ return (ret);
}
int
@@ -97,6 +109,8 @@ disk_mptsas_find_disk(topo_mod_t *mod, tnode_t *baynode, char **sas_address)
char *devctl = NULL;
uint32_t enclosure, slot;
int err;
+ char *elem, *lastp;
+ int ret = -1;
/*
* Get the required properties from the node. These come from
@@ -115,6 +129,35 @@ disk_mptsas_find_disk(topo_mod_t *mod, tnode_t *baynode, char **sas_address)
return (-1);
}
- return (get_sas_address(mod, devctl, enclosure, slot, sas_address));
+ /*
+ * devctl is a (potentially) pipe-separated list of different device
+ * paths to try.
+ */
+ if ((elem = topo_mod_strsplit(mod, devctl, "|", &lastp)) != NULL) {
+ boolean_t done = B_FALSE;
+ do {
+ topo_mod_dprintf(mod, "trying mpt_sas instance at %s\n",
+ elem);
+
+ ret = get_sas_address(mod, elem, enclosure,
+ slot, sas_address);
+
+ /*
+ * Only try further devctl paths from the list if this
+ * one was not found:
+ */
+ if (ret == 0 || errno != ENOENT) {
+ done = B_TRUE;
+ } else {
+ topo_mod_dprintf(mod, "instance not found\n");
+ }
+
+ topo_mod_strfree(mod, elem);
+
+ } while (!done && (elem = topo_mod_strsplit(mod, NULL, "|",
+ &lastp)) != NULL);
+ }
+ topo_mod_strfree(mod, devctl);
+ return (ret);
}
diff --git a/usr/src/lib/fm/topo/modules/common/fac_prov_mptsas/fac_prov_mptsas.c b/usr/src/lib/fm/topo/modules/common/fac_prov_mptsas/fac_prov_mptsas.c
index a49a131811..115e3b801d 100644
--- a/usr/src/lib/fm/topo/modules/common/fac_prov_mptsas/fac_prov_mptsas.c
+++ b/usr/src/lib/fm/topo/modules/common/fac_prov_mptsas/fac_prov_mptsas.c
@@ -72,6 +72,12 @@ _topo_fini(topo_mod_t *mod)
topo_mod_unregister(mod);
}
+/*
+ * Get or set LED state for a particular target attached to an mpt_sas
+ * instance at (Enclosure Number, Slot Number). The function returns
+ * -1 on error and sets errno to ENOENT _only_ if the /devices node
+ * (*devctl) does not exist.
+ */
static int
do_led_control(topo_mod_t *mod, char *devctl, uint16_t enclosure,
uint16_t slot, uint8_t led, uint32_t *ledmode, boolean_t set)
@@ -88,8 +94,10 @@ do_led_control(topo_mod_t *mod, char *devctl, uint16_t enclosure,
lc.LedStatus = *ledmode;
if ((fd = open(devctl, (set ? O_RDWR : O_RDONLY))) == -1) {
+ int en = errno;
topo_mod_dprintf(mod, "devctl open failed: %s",
strerror(errno));
+ errno = en;
return (-1);
}
@@ -103,9 +111,11 @@ do_led_control(topo_mod_t *mod, char *devctl, uint16_t enclosure,
*/
lc.LedStatus = 0;
} else {
+ int en = errno;
topo_mod_dprintf(mod, "led control ioctl failed: %s",
strerror(errno));
(void) close(fd);
+ errno = en;
return (-1);
}
}
@@ -113,6 +123,7 @@ do_led_control(topo_mod_t *mod, char *devctl, uint16_t enclosure,
*ledmode = lc.LedStatus ? TOPO_LED_STATE_ON : TOPO_LED_STATE_OFF;
(void) close(fd);
+ errno = 0;
return (0);
}
@@ -127,7 +138,8 @@ mptsas_led_mode(topo_mod_t *mod, tnode_t *node, topo_version_t vers,
char *driver = NULL, *devctl = NULL;
uint32_t enclosure, slot;
uint8_t mptsas_led;
- boolean_t set;
+ boolean_t set, done;
+ char *elem, *lastp;
if (vers > TOPO_METH_MPTSAS_LED_MODE_VERSION)
return (topo_mod_seterrno(mod, ETOPO_METHOD_VERNEW));
@@ -197,8 +209,41 @@ mptsas_led_mode(topo_mod_t *mod, tnode_t *node, topo_version_t vers,
topo_mod_dprintf(mod, "%s: Getting LED mode\n", __func__);
}
- if (do_led_control(mod, devctl, enclosure, slot, mptsas_led, &ledmode,
- set) != 0) {
+ /*
+ * devctl is a (potentially) pipe-separated list of different device
+ * paths to try.
+ */
+ if ((elem = topo_mod_strsplit(mod, devctl, "|", &lastp)) == NULL) {
+ topo_mod_dprintf(mod, "%s: could not parse devctl list",
+ __func__);
+ ret = topo_mod_seterrno(mod, EMOD_UNKNOWN);
+ goto out;
+ }
+ done = B_FALSE;
+ do {
+ topo_mod_dprintf(mod, "%s: trying mpt_sas instance at %s\n",
+ __func__, elem);
+
+ ret = do_led_control(mod, elem, enclosure, slot,
+ mptsas_led, &ledmode, set);
+
+ /*
+ * Only try further devctl paths from the list if this one
+ * was not found:
+ */
+ if (ret == 0 || errno != ENOENT) {
+ done = B_TRUE;
+ } else {
+ topo_mod_dprintf(mod, "%s: instance not found\n",
+ __func__);
+ }
+
+ topo_mod_strfree(mod, elem);
+
+ } while (!done && (elem = topo_mod_strsplit(mod, NULL, "|",
+ &lastp)) != NULL);
+
+ if (ret != 0) {
topo_mod_dprintf(mod, "%s: do_led_control failed", __func__);
ret = topo_mod_seterrno(mod, EMOD_UNKNOWN);
goto out;
diff --git a/usr/src/lib/krb5/plugins/preauth/pkinit/Makefile.com b/usr/src/lib/krb5/plugins/preauth/pkinit/Makefile.com
index 8449d22e24..07fcb2bbf7 100644
--- a/usr/src/lib/krb5/plugins/preauth/pkinit/Makefile.com
+++ b/usr/src/lib/krb5/plugins/preauth/pkinit/Makefile.com
@@ -70,7 +70,7 @@ CERRWARN += -_gcc=-Wno-unused-function
CFLAGS += $(CCVERBOSE) -I..
DYNFLAGS += $(KRUNPATH) $(KMECHLIB) -znodelete
-LDLIBS += -L $(ROOTLIBDIR) -lcrypto -lc
+LDLIBS += -L $(ROOTLIBDIR) -lsunw_crypto -lc
ROOTLIBDIR= $(ROOT)/usr/lib/krb5/plugins/preauth
diff --git a/usr/src/lib/libbc/inc/include/sys/socket.h b/usr/src/lib/libbc/inc/include/sys/socket.h
index 03961f805b..6607721e62 100644
--- a/usr/src/lib/libbc/inc/include/sys/socket.h
+++ b/usr/src/lib/libbc/inc/include/sys/socket.h
@@ -3,8 +3,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Copyright (c) 1982, 1985, 1986 Regents of the University of California.
* All rights reserved. The Berkeley software License Agreement
@@ -169,6 +167,4 @@ struct msghdr {
#define MSG_PEEK 0x2 /* peek at incoming message */
#define MSG_DONTROUTE 0x4 /* send without using routing tables */
-#define MSG_MAXIOVLEN 16
-
#endif /*!_sys_socket_h*/
diff --git a/usr/src/lib/libbe/common/be_list.c b/usr/src/lib/libbe/common/be_list.c
index fe002a2ce1..2534b7c32c 100644
--- a/usr/src/lib/libbe/common/be_list.c
+++ b/usr/src/lib/libbe/common/be_list.c
@@ -25,6 +25,7 @@
/*
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2011 Joyent, Inc. All rights reserved.
* Copyright 2015 Toomas Soome <tsoome@me.com>
* Copyright 2015 Gary Mills
* Copyright (c) 2016 Martin Matuska. All rights reserved.
diff --git a/usr/src/lib/libbrand/common/libbrand.c b/usr/src/lib/libbrand/common/libbrand.c
index 7d34df9e58..dc406d60a0 100644
--- a/usr/src/lib/libbrand/common/libbrand.c
+++ b/usr/src/lib/libbrand/common/libbrand.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc.
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
*/
@@ -323,6 +323,7 @@ i_substitute_tokens(const char *sbuf, char *dbuf, int dbuf_size,
const char *curr_zone)
{
int dst, src;
+ static char *env_pool = NULL;
/*
* Walk through the characters, substituting values as needed.
@@ -339,6 +340,13 @@ i_substitute_tokens(const char *sbuf, char *dbuf, int dbuf_size,
case '%':
dst += strlcpy(dbuf + dst, "%", dbuf_size - dst);
break;
+ case 'P':
+ if (env_pool == NULL)
+ env_pool = getenv("_ZONEADMD_ZPOOL");
+ if (env_pool == NULL)
+ break;
+ dst += strlcpy(dbuf + dst, env_pool, dbuf_size - dst);
+ break;
case 'R':
if (zonepath == NULL)
break;
@@ -810,13 +818,14 @@ brand_config_iter_privilege(brand_handle_t bh,
}
static int
-i_brand_platform_iter_mounts(struct brand_handle *bhp, const char *zonepath,
- int (*func)(void *, const char *, const char *, const char *,
- const char *), void *data, const xmlChar *mount_type)
+i_brand_platform_iter_mounts(struct brand_handle *bhp, const char *zonename,
+ const char *zonepath, int (*func)(void *, const char *, const char *,
+ const char *, const char *), void *data, const xmlChar *mount_type)
{
xmlNodePtr node;
xmlChar *special, *dir, *type, *opt;
char special_exp[MAXPATHLEN];
+ char dir_exp[MAXPATHLEN];
char opt_exp[MAXPATHLEN];
int ret;
@@ -841,7 +850,11 @@ i_brand_platform_iter_mounts(struct brand_handle *bhp, const char *zonepath,
/* Substitute token values as needed. */
if ((ret = i_substitute_tokens((char *)special,
special_exp, sizeof (special_exp),
- NULL, zonepath, NULL, NULL)) != 0)
+ zonename, zonepath, NULL, NULL)) != 0)
+ goto next;
+ if ((ret = i_substitute_tokens((char *)dir,
+ dir_exp, sizeof (dir_exp),
+ zonename, zonepath, NULL, NULL)) != 0)
goto next;
/* opt might not be defined */
@@ -851,11 +864,11 @@ i_brand_platform_iter_mounts(struct brand_handle *bhp, const char *zonepath,
} else {
if ((ret = i_substitute_tokens((char *)opt,
opt_exp, sizeof (opt_exp),
- NULL, zonepath, NULL, NULL)) != 0)
+ zonename, zonepath, NULL, NULL)) != 0)
goto next;
}
- ret = func(data, (char *)special_exp, (char *)dir,
+ ret = func(data, (char *)special_exp, (char *)dir_exp,
(char *)type, ((opt != NULL) ? opt_exp : NULL));
next:
@@ -885,13 +898,13 @@ next:
* %R Zonepath of zone
*/
int
-brand_platform_iter_gmounts(brand_handle_t bh, const char *zonepath,
- int (*func)(void *, const char *, const char *, const char *,
- const char *), void *data)
+brand_platform_iter_gmounts(brand_handle_t bh, const char *zonename,
+ const char *zonepath, int (*func)(void *, const char *, const char *,
+ const char *, const char *), void *data)
{
struct brand_handle *bhp = (struct brand_handle *)bh;
- return (i_brand_platform_iter_mounts(bhp, zonepath, func, data,
- DTD_ELEM_GLOBAL_MOUNT));
+ return (i_brand_platform_iter_mounts(bhp, zonename, zonepath, func,
+ data, DTD_ELEM_GLOBAL_MOUNT));
}
/*
@@ -905,7 +918,7 @@ brand_platform_iter_mounts(brand_handle_t bh, int (*func)(void *,
const char *, const char *, const char *, const char *), void *data)
{
struct brand_handle *bhp = (struct brand_handle *)bh;
- return (i_brand_platform_iter_mounts(bhp, NULL, func, data,
+ return (i_brand_platform_iter_mounts(bhp, NULL, NULL, func, data,
DTD_ELEM_MOUNT));
}
diff --git a/usr/src/lib/libbrand/common/libbrand.h b/usr/src/lib/libbrand/common/libbrand.h
index 5c5fb1b42e..30accf95ba 100644
--- a/usr/src/lib/libbrand/common/libbrand.h
+++ b/usr/src/lib/libbrand/common/libbrand.h
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc.
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
*/
@@ -106,8 +106,8 @@ extern int brand_config_iter_privilege(brand_handle_t,
extern int brand_platform_iter_devices(brand_handle_t, const char *,
int (*)(void *, const char *, const char *), void *, const char *);
extern int brand_platform_iter_gmounts(brand_handle_t, const char *,
- int (*)(void *, const char *, const char *, const char *, const char *),
- void *);
+ const char *, int (*)(void *, const char *, const char *, const char *,
+ const char *), void *);
extern int brand_platform_iter_link(brand_handle_t, int (*)(void *,
const char *, const char *), void *);
extern int brand_platform_iter_mounts(brand_handle_t, int (*)(void *,
diff --git a/usr/src/lib/libbsm/common/adt.c b/usr/src/lib/libbsm/common/adt.c
index 8c7b299e32..4cf0dd7566 100644
--- a/usr/src/lib/libbsm/common/adt.c
+++ b/usr/src/lib/libbsm/common/adt.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2013, Joyent, Inc. All rights reserved.
*/
#include <bsm/adt.h>
@@ -702,7 +703,37 @@ adt_get_hostIP(const char *hostname, au_tid_addr_t *p_term)
int tries = 3;
char msg[512];
int eai_err;
+ struct ifaddrlist al;
+ int family;
+ boolean_t found = B_FALSE;
+ /*
+ * getaddrinfo can take a long time to timeout if it can't map the
+ * hostname to an IP address so try to get an IP address from a local
+ * interface first.
+ */
+ family = AF_INET6;
+ if (adt_get_local_address(family, &al) == 0) {
+ found = B_TRUE;
+ } else {
+ family = AF_INET;
+ if (adt_get_local_address(family, &al) == 0)
+ found = B_TRUE;
+ }
+
+ if (found) {
+ if (family == AF_INET) {
+ p_term->at_type = AU_IPv4;
+ (void) memcpy(p_term->at_addr, &al.addr.addr, AU_IPv4);
+ } else {
+ p_term->at_type = AU_IPv6;
+ (void) memcpy(p_term->at_addr, &al.addr.addr6, AU_IPv6);
+ }
+
+ return (0);
+ }
+
+ /* Now try getaddrinfo */
while ((tries-- > 0) &&
((eai_err = getaddrinfo(hostname, NULL, NULL, &ai)) != 0)) {
/*
@@ -739,7 +770,9 @@ adt_get_hostIP(const char *hostname, au_tid_addr_t *p_term)
}
freeaddrinfo(ai);
return (0);
- } else if (auditstate & (AUC_AUDITING | AUC_NOSPACE)) {
+ }
+
+ if (auditstate & (AUC_AUDITING | AUC_NOSPACE)) {
auditinfo_addr_t audit_info;
/*
@@ -747,58 +780,23 @@ adt_get_hostIP(const char *hostname, au_tid_addr_t *p_term)
* kernel audit context
*/
if (auditon(A_GETKAUDIT, (caddr_t)&audit_info,
- sizeof (audit_info)) < 0) {
- adt_write_syslog("unable to get kernel audit context",
- errno);
- goto try_interface;
+ sizeof (audit_info)) >= 0) {
+ adt_write_syslog("setting Audit IP address to kernel",
+ 0);
+ *p_term = audit_info.ai_termid;
+ return (0);
}
- adt_write_syslog("setting Audit IP address to kernel", 0);
- *p_term = audit_info.ai_termid;
- return (0);
+ adt_write_syslog("unable to get kernel audit context", errno);
}
-try_interface:
- {
- struct ifaddrlist al;
- int family;
- char ntop[INET6_ADDRSTRLEN];
-
- /*
- * getaddrinfo has failed to map the hostname
- * to an IP address, try to get an IP address
- * from a local interface. If none up, default
- * to loopback.
- */
- family = AF_INET6;
- if (adt_get_local_address(family, &al) != 0) {
- family = AF_INET;
-
- if (adt_get_local_address(family, &al) != 0) {
- adt_write_syslog("adt_get_local_address "
- "failed, no Audit IP address available, "
- "faking loopback and error",
- errno);
- IN_SET_LOOPBACK_ADDR(
- (struct sockaddr_in *)&(al.addr.addr));
- (void) memcpy(p_term->at_addr, &al.addr.addr,
- AU_IPv4);
- p_term->at_type = AU_IPv4;
- return (-1);
- }
- }
- if (family == AF_INET) {
- p_term->at_type = AU_IPv4;
- (void) memcpy(p_term->at_addr, &al.addr.addr, AU_IPv4);
- } else {
- p_term->at_type = AU_IPv6;
- (void) memcpy(p_term->at_addr, &al.addr.addr6, AU_IPv6);
- }
- (void) snprintf(msg, sizeof (msg), "mapping %s to %s",
- hostname, inet_ntop(family, &(al.addr), ntop,
- sizeof (ntop)));
- adt_write_syslog(msg, 0);
- return (0);
- }
+ /* No mapping, default to loopback. */
+ errno = ENETDOWN;
+ adt_write_syslog("adt_get_local_address failed, no Audit IP address "
+ "available, faking loopback and error", errno);
+ IN_SET_LOOPBACK_ADDR((struct sockaddr_in *)&(al.addr.addr));
+ (void) memcpy(p_term->at_addr, &al.addr.addr, AU_IPv4);
+ p_term->at_type = AU_IPv4;
+ return (-1);
}
/*
@@ -2093,8 +2091,8 @@ adt_selected(struct adt_event_state *event, au_event_t actual_id, int status)
}
/*
- * Can't map the host name to an IP address in
- * adt_get_hostIP. Get something off an interface
+ * Before trying to map the host name to an IP address in
+ * adt_get_hostIP, get something off an interface
* to act as the hosts IP address for auditing.
*/
diff --git a/usr/src/lib/libbunyan/Makefile b/usr/src/lib/libbunyan/Makefile
new file mode 100644
index 0000000000..a59de91113
--- /dev/null
+++ b/usr/src/lib/libbunyan/Makefile
@@ -0,0 +1,42 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc.
+#
+
+include ../Makefile.lib
+
+HDRS = bunyan.h
+HDRDIR = common
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+install := TARGET = install
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber install lint: $(SUBDIRS)
+
+install_h: $(ROOTHDRS)
+
+check: $(CHECKHDRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../Makefile.targ
diff --git a/usr/src/lib/libbunyan/Makefile.com b/usr/src/lib/libbunyan/Makefile.com
new file mode 100644
index 0000000000..5214915c56
--- /dev/null
+++ b/usr/src/lib/libbunyan/Makefile.com
@@ -0,0 +1,36 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+LIBRARY = libbunyan.a
+VERS = .1
+OBJECTS = bunyan.o
+USDT_PROVIDERS = bunyan_provider.d
+
+include ../../Makefile.lib
+
+LIBS = $(DYNLIB) $(LINTLIB)
+LDLIBS += -lc -lumem -lnvpair -lnsl
+CPPFLAGS += -I../common -I. -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
+
+SRCDIR = ../common
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include ../../Makefile.targ
+include ../../Makefile.usdt
diff --git a/usr/src/lib/libbunyan/amd64/Makefile b/usr/src/lib/libbunyan/amd64/Makefile
new file mode 100644
index 0000000000..15d904c616
--- /dev/null
+++ b/usr/src/lib/libbunyan/amd64/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+include ../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/libbunyan/common/bunyan.c b/usr/src/lib/libbunyan/common/bunyan.c
new file mode 100644
index 0000000000..149702a38f
--- /dev/null
+++ b/usr/src/lib/libbunyan/common/bunyan.c
@@ -0,0 +1,913 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+#include <errno.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <stdarg.h>
+#include <umem.h>
+#include <netdb.h>
+#include <string.h>
+#include <strings.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <sys/sysmacros.h>
+#include <thread.h>
+#include <sys/debug.h>
+
+#include <bunyan.h>
+#include <bunyan_provider_impl.h>
+
+struct bunyan_key;
+struct bunyan_stream;
+struct bunyan;
+
+typedef struct bunyan_stream {
+ struct bunyan_stream *bs_next;
+ char *bs_name;
+ bunyan_level_t bs_level;
+ bunyan_stream_f bs_func;
+ void *bs_arg;
+ uint_t bs_count;
+} bunyan_stream_t;
+
+typedef struct bunyan_key {
+ struct bunyan_key *bk_next;
+ char *bk_name;
+ bunyan_type_t bk_type;
+ void *bk_data;
+ size_t bk_len;
+} bunyan_key_t;
+
+typedef struct bunyan {
+ pthread_mutex_t bun_lock;
+ bunyan_key_t *bun_keys;
+ bunyan_stream_t *bun_streams;
+ char *bun_name;
+ char bun_host[MAXHOSTNAMELEN+1];
+} bunyan_t;
+
+#define ISO_TIMELEN 25
+static const int bunyan_version = 0;
+
+static void
+bunyan_key_fini(bunyan_key_t *bkp)
+{
+ size_t nlen = strlen(bkp->bk_name) + 1;
+ umem_free(bkp->bk_data, bkp->bk_len);
+ umem_free(bkp->bk_name, nlen);
+ umem_free(bkp, sizeof (bunyan_key_t));
+}
+
+static void
+bunyan_stream_fini(bunyan_stream_t *bsp)
+{
+ size_t nlen = strlen(bsp->bs_name) + 1;
+ umem_free(bsp->bs_name, nlen);
+ umem_free(bsp, sizeof (bunyan_stream_t));
+}
+
+int
+bunyan_init(const char *name, bunyan_logger_t **bhp)
+{
+ int ret;
+ bunyan_t *b;
+ size_t nlen = strlen(name) + 1;
+
+ b = umem_zalloc(sizeof (bunyan_t), UMEM_DEFAULT);
+ if (b == NULL)
+ return (ENOMEM);
+
+ b->bun_name = umem_alloc(nlen, UMEM_DEFAULT);
+ if (b->bun_name == NULL) {
+ umem_free(b, sizeof (bunyan_t));
+ return (ENOMEM);
+ }
+ bcopy(name, b->bun_name, nlen);
+
+ if ((ret = pthread_mutex_init(&b->bun_lock, NULL)) != 0) {
+ umem_free(b->bun_name, nlen);
+ umem_free(b, sizeof (bunyan_t));
+ return (ret);
+ }
+
+ VERIFY(gethostname(b->bun_host, sizeof (b->bun_host)) == 0);
+ b->bun_host[MAXHOSTNAMELEN] = '\0';
+
+ *bhp = (bunyan_logger_t *)b;
+ return (0);
+}
+
+void
+bunyan_fini(bunyan_logger_t *bhp)
+{
+ bunyan_t *b = (bunyan_t *)bhp;
+ bunyan_key_t *bkp;
+ bunyan_stream_t *bsp;
+
+ while ((bkp = b->bun_keys) != NULL) {
+ b->bun_keys = bkp->bk_next;
+ bunyan_key_fini(bkp);
+ }
+
+ while ((bsp = b->bun_streams) != NULL) {
+ b->bun_streams = bsp->bs_next;
+ bunyan_stream_fini(bsp);
+ }
+
+ if (b->bun_name != NULL)
+ umem_free(b->bun_name, strlen(b->bun_name) + 1);
+
+ VERIFY(pthread_mutex_destroy(&b->bun_lock) == 0);
+ umem_free(b, sizeof (bunyan_t));
+}
+
+/* ARGSUSED */
+int
+bunyan_stream_fd(nvlist_t *nvl, const char *js, void *arg)
+{
+ uintptr_t fd = (uintptr_t)arg;
+ size_t jslen = strlen(js);
+ off_t off = 0;
+ ssize_t ret = 0;
+ static int maxbuf = -1;
+
+ if (maxbuf == -1)
+ maxbuf = getpagesize();
+
+ while (off != jslen) {
+ /*
+ * Write up to a page of data at a time. If for some reason an
+ * individual write fails, move on and try to still write a new
+ * line at least...
+ */
+ ret = write(fd, js + off, MIN(jslen - off, maxbuf));
+ if (ret < 0)
+ break;
+ off += ret;
+ }
+
+ if (ret < 0) {
+ (void) write(fd, "\n", 1);
+ } else {
+ ret = write(fd, "\n", 1);
+ }
+ return (ret < 0 ? 1: 0);
+}
+
+int
+bunyan_stream_add(bunyan_logger_t *bhp, const char *name, int level,
+ bunyan_stream_f func, void *arg)
+{
+ bunyan_stream_t *bs, *cur;
+ size_t nlen = strlen(name) + 1;
+ bunyan_t *b = (bunyan_t *)bhp;
+
+ if (level != BUNYAN_L_TRACE &&
+ level != BUNYAN_L_DEBUG &&
+ level != BUNYAN_L_INFO &&
+ level != BUNYAN_L_WARN &&
+ level != BUNYAN_L_ERROR &&
+ level != BUNYAN_L_FATAL)
+ return (EINVAL);
+
+ bs = umem_alloc(sizeof (bunyan_stream_t), UMEM_DEFAULT);
+ if (bs == NULL)
+ return (ENOMEM);
+
+ bs->bs_name = umem_alloc(nlen, UMEM_DEFAULT);
+ if (bs->bs_name == NULL) {
+ umem_free(bs, sizeof (bunyan_stream_t));
+ return (ENOMEM);
+ }
+ bcopy(name, bs->bs_name, nlen);
+ bs->bs_level = level;
+ bs->bs_func = func;
+ bs->bs_arg = arg;
+ bs->bs_count = 0;
+ (void) pthread_mutex_lock(&b->bun_lock);
+ cur = b->bun_streams;
+ while (cur != NULL) {
+ if (strcmp(name, cur->bs_name) == 0) {
+ (void) pthread_mutex_unlock(&b->bun_lock);
+ umem_free(bs->bs_name, nlen);
+ umem_free(bs, sizeof (bunyan_stream_t));
+ return (EEXIST);
+ }
+ cur = cur->bs_next;
+ }
+ bs->bs_next = b->bun_streams;
+ b->bun_streams = bs;
+ (void) pthread_mutex_unlock(&b->bun_lock);
+
+ return (0);
+}
+
+int
+bunyan_stream_remove(bunyan_logger_t *bhp, const char *name)
+{
+ bunyan_stream_t *cur, *prev;
+ bunyan_t *b = (bunyan_t *)bhp;
+
+ (void) pthread_mutex_lock(&b->bun_lock);
+ prev = NULL;
+ cur = b->bun_streams;
+ while (cur != NULL) {
+ if (strcmp(name, cur->bs_name) == 0)
+ break;
+ prev = cur;
+ cur = cur->bs_next;
+ }
+ if (cur == NULL) {
+ (void) pthread_mutex_unlock(&b->bun_lock);
+ return (ENOENT);
+ }
+ if (prev == NULL)
+ b->bun_streams = cur->bs_next;
+ else
+ prev->bs_next = cur->bs_next;
+ cur->bs_next = NULL;
+ (void) pthread_mutex_unlock(&b->bun_lock);
+
+ bunyan_stream_fini(cur);
+
+ return (0);
+}
+
+static int
+bunyan_key_add_one(bunyan_t *b, const char *name, bunyan_type_t type,
+ const void *arg)
+{
+ bunyan_key_t *bkp, *cur, *prev;
+ size_t nlen = strlen(name) + 1;
+ size_t blen;
+
+ bkp = umem_alloc(sizeof (bunyan_key_t), UMEM_DEFAULT);
+ if (bkp == NULL)
+ return (ENOMEM);
+ bkp->bk_name = umem_alloc(nlen, UMEM_DEFAULT);
+ if (bkp->bk_name == NULL) {
+ umem_free(bkp, sizeof (bunyan_key_t));
+ return (ENOMEM);
+ }
+ bcopy(name, bkp->bk_name, nlen);
+
+ switch (type) {
+ case BUNYAN_T_STRING:
+ blen = strlen(arg) + 1;
+ break;
+ case BUNYAN_T_POINTER:
+ blen = sizeof (uintptr_t);
+ break;
+ case BUNYAN_T_IP:
+ blen = sizeof (struct in_addr);
+ break;
+ case BUNYAN_T_IP6:
+ blen = sizeof (struct in6_addr);
+ break;
+ case BUNYAN_T_BOOLEAN:
+ blen = sizeof (boolean_t);
+ break;
+ case BUNYAN_T_INT32:
+ blen = sizeof (int32_t);
+ break;
+ case BUNYAN_T_INT64:
+ case BUNYAN_T_INT64STR:
+ blen = sizeof (int64_t);
+ break;
+ case BUNYAN_T_UINT32:
+ blen = sizeof (uint32_t);
+ break;
+ case BUNYAN_T_UINT64:
+ case BUNYAN_T_UINT64STR:
+ blen = sizeof (uint64_t);
+ break;
+ case BUNYAN_T_DOUBLE:
+ blen = sizeof (double);
+ break;
+ default:
+ umem_free(bkp->bk_name, nlen);
+ umem_free(bkp, sizeof (bunyan_key_t));
+ return (EINVAL);
+ }
+
+ bkp->bk_data = umem_alloc(blen, UMEM_DEFAULT);
+ if (bkp->bk_data == NULL) {
+ umem_free(bkp->bk_name, nlen);
+ umem_free(bkp, sizeof (bunyan_key_t));
+ return (ENOMEM);
+ }
+ bcopy(arg, bkp->bk_data, blen);
+ bkp->bk_len = blen;
+ bkp->bk_type = type;
+
+ (void) pthread_mutex_lock(&b->bun_lock);
+ prev = NULL;
+ cur = b->bun_keys;
+ while (cur != NULL) {
+ if (strcmp(name, cur->bk_name) == 0)
+ break;
+ prev = cur;
+ cur = cur->bk_next;
+ }
+ if (cur != NULL) {
+ if (prev == NULL)
+ b->bun_keys = cur->bk_next;
+ else
+ prev->bk_next = cur->bk_next;
+ bunyan_key_fini(cur);
+ }
+ bkp->bk_next = b->bun_keys;
+ b->bun_keys = bkp;
+ (void) pthread_mutex_unlock(&b->bun_lock);
+
+ return (0);
+}
+
+static int
+bunyan_key_vadd(bunyan_t *b, va_list *ap)
+{
+ int type, ret;
+ void *data;
+ boolean_t bt;
+ int32_t i32;
+ int64_t i64;
+ uint32_t ui32;
+ uint64_t ui64;
+ double d;
+ uintptr_t ptr;
+
+ while ((type = va_arg(*ap, int)) != BUNYAN_T_END) {
+ const char *name = va_arg(*ap, char *);
+
+ switch (type) {
+ case BUNYAN_T_STRING:
+ data = va_arg(*ap, char *);
+ break;
+ case BUNYAN_T_POINTER:
+ ptr = (uintptr_t)va_arg(*ap, void *);
+ data = &ptr;
+ break;
+ case BUNYAN_T_IP:
+ case BUNYAN_T_IP6:
+ data = va_arg(*ap, void *);
+ break;
+ case BUNYAN_T_BOOLEAN:
+ bt = va_arg(*ap, boolean_t);
+ data = &bt;
+ break;
+ case BUNYAN_T_INT32:
+ i32 = va_arg(*ap, int32_t);
+ data = &i32;
+ break;
+ case BUNYAN_T_INT64:
+ case BUNYAN_T_INT64STR:
+ i64 = va_arg(*ap, int64_t);
+ data = &i64;
+ break;
+ case BUNYAN_T_UINT32:
+ ui32 = va_arg(*ap, uint32_t);
+ data = &ui32;
+ break;
+ case BUNYAN_T_UINT64:
+ case BUNYAN_T_UINT64STR:
+ ui64 = va_arg(*ap, uint64_t);
+ data = &ui64;
+ break;
+ case BUNYAN_T_DOUBLE:
+ d = va_arg(*ap, double);
+ data = &d;
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ if ((ret = bunyan_key_add_one(b, name, type, data)) != 0)
+ return (ret);
+ }
+
+ return (0);
+}
+
+int
+bunyan_key_add(bunyan_logger_t *bhp, ...)
+{
+ int ret;
+ va_list ap;
+ bunyan_t *b = (bunyan_t *)bhp;
+
+ va_start(ap, bhp);
+ ret = bunyan_key_vadd(b, &ap);
+ va_end(ap);
+
+ return (ret);
+}
+
+int
+bunyan_key_remove(bunyan_logger_t *bhp, const char *name)
+{
+ bunyan_t *b = (bunyan_t *)bhp;
+ bunyan_key_t *cur, *prev;
+
+ (void) pthread_mutex_lock(&b->bun_lock);
+ prev = NULL;
+ cur = b->bun_keys;
+ while (cur != NULL) {
+ if (strcmp(name, cur->bk_name) == 0)
+ break;
+ prev = cur;
+ cur = cur->bk_next;
+ }
+
+ if (cur == NULL) {
+ (void) pthread_mutex_unlock(&b->bun_lock);
+ return (ENOENT);
+ }
+
+ if (prev == NULL)
+ b->bun_keys = cur->bk_next;
+ else
+ prev->bk_next = cur->bk_next;
+ (void) pthread_mutex_unlock(&b->bun_lock);
+
+ bunyan_key_fini(cur);
+ return (0);
+}
+
+static bunyan_key_t *
+bunyan_key_dup(const bunyan_key_t *bkp)
+{
+ bunyan_key_t *nkp;
+ size_t nlen = strlen(bkp->bk_name) + 1;
+
+ nkp = umem_alloc(sizeof (bunyan_key_t), UMEM_DEFAULT);
+ if (nkp == NULL)
+ return (NULL);
+ nkp->bk_next = NULL;
+ nkp->bk_name = umem_alloc(nlen, UMEM_DEFAULT);
+ if (nkp->bk_name == NULL) {
+ umem_free(nkp, sizeof (bunyan_key_t));
+ return (NULL);
+ }
+ bcopy(bkp->bk_name, nkp->bk_name, nlen);
+ nkp->bk_type = bkp->bk_type;
+ nkp->bk_data = umem_alloc(bkp->bk_len, UMEM_DEFAULT);
+ if (nkp->bk_data == NULL) {
+ umem_free(nkp->bk_name, nlen);
+ umem_free(nkp, sizeof (bunyan_key_t));
+ return (NULL);
+ }
+ bcopy(bkp->bk_data, nkp->bk_data, bkp->bk_len);
+ nkp->bk_len = bkp->bk_len;
+
+ return (nkp);
+}
+
+static bunyan_stream_t *
+bunyan_stream_dup(const bunyan_stream_t *bsp)
+{
+ bunyan_stream_t *nsp;
+ size_t nlen = strlen(bsp->bs_name) + 1;
+
+ nsp = umem_alloc(sizeof (bunyan_stream_t), UMEM_DEFAULT);
+ if (nsp == NULL)
+ return (NULL);
+
+ nsp->bs_next = NULL;
+ nsp->bs_name = umem_alloc(nlen, UMEM_DEFAULT);
+ if (nsp->bs_name == NULL) {
+ umem_free(nsp, sizeof (bunyan_stream_t));
+ return (NULL);
+ }
+ bcopy(bsp->bs_name, nsp->bs_name, nlen);
+ nsp->bs_level = bsp->bs_level;
+ nsp->bs_func = bsp->bs_func;
+ nsp->bs_arg = bsp->bs_arg;
+ nsp->bs_count = 0;
+
+ return (nsp);
+}
+
+static bunyan_t *
+bunyan_dup(const bunyan_t *b)
+{
+ bunyan_t *n;
+ const bunyan_key_t *bkp;
+ const bunyan_stream_t *bsp;
+ size_t nlen;
+
+ n = umem_zalloc(sizeof (bunyan_t), UMEM_DEFAULT);
+ if (n == NULL)
+ return (NULL);
+
+ if (pthread_mutex_init(&n->bun_lock, NULL) != 0) {
+ umem_free(n, sizeof (bunyan_t));
+ return (NULL);
+ }
+
+ for (bkp = b->bun_keys; bkp != NULL; bkp = bkp->bk_next) {
+ bunyan_key_t *nkp;
+ nkp = bunyan_key_dup(bkp);
+ if (nkp == NULL) {
+ bunyan_fini((bunyan_logger_t *)n);
+ return (NULL);
+ }
+
+ nkp->bk_next = n->bun_keys;
+ n->bun_keys = nkp;
+ }
+
+ for (bsp = b->bun_streams; bsp != NULL; bsp = bsp->bs_next) {
+ bunyan_stream_t *nsp;
+ nsp = bunyan_stream_dup(bsp);
+ if (bsp == NULL) {
+ bunyan_fini((bunyan_logger_t *)n);
+ return (NULL);
+ }
+
+ nsp->bs_next = n->bun_streams;
+ n->bun_streams = nsp;
+ }
+
+ nlen = strlen(b->bun_name) + 1;
+ n->bun_name = umem_alloc(nlen, UMEM_DEFAULT);
+ if (n->bun_name == NULL) {
+ bunyan_fini((bunyan_logger_t *)n);
+ return (NULL);
+ }
+ bcopy(b->bun_name, n->bun_name, nlen);
+ bcopy(b->bun_host, n->bun_host, MAXHOSTNAMELEN+1);
+
+ return (n);
+}
+
+int
+bunyan_child(const bunyan_logger_t *bhp, bunyan_logger_t **outp, ...)
+{
+ bunyan_t *b = (bunyan_t *)bhp;
+ bunyan_t *n;
+ va_list ap;
+ int ret;
+
+ n = bunyan_dup(b);
+ if (n == NULL)
+ return (ENOMEM);
+
+ va_start(ap, outp);
+ ret = bunyan_key_vadd(b, &ap);
+ va_end(ap);
+
+ if (ret != 0)
+ bunyan_fini((bunyan_logger_t *)n);
+ else
+ *outp = (bunyan_logger_t *)n;
+
+ return (ret);
+}
+
+static int
+bunyan_iso_time(char *buf)
+{
+ struct timeval tv;
+ struct tm tm;
+
+ if (gettimeofday(&tv, NULL) != 0)
+ return (errno);
+
+ if (gmtime_r(&tv.tv_sec, &tm) == NULL)
+ return (errno);
+
+ VERIFY(strftime(buf, ISO_TIMELEN, "%FT%T", &tm) == 19);
+
+ (void) snprintf(&buf[19], 6, ".%03dZ", (int)(tv.tv_usec / 1000));
+
+ return (0);
+}
+
+/*
+ * Note, these fields are all required, so even if a user attempts to use one of
+ * them in their own fields, we'll override them and therefore, have it be the
+ * last one.
+ */
+static int
+bunyan_vlog_defaults(nvlist_t *nvl, bunyan_t *b, bunyan_level_t level,
+ const char *msg)
+{
+ int ret;
+ char tbuf[ISO_TIMELEN];
+
+ if ((ret = bunyan_iso_time(tbuf)) != 0)
+ return (ret);
+
+ if ((ret = nvlist_add_int32(nvl, "v", bunyan_version)) != 0 ||
+ (ret = nvlist_add_int32(nvl, "level", level) != 0) ||
+ (ret = nvlist_add_string(nvl, "name", b->bun_name) != 0) ||
+ (ret = nvlist_add_string(nvl, "hostname", b->bun_host) != 0) ||
+ (ret = nvlist_add_int32(nvl, "pid", getpid()) != 0) ||
+ (ret = nvlist_add_uint32(nvl, "tid", thr_self()) != 0) ||
+ (ret = nvlist_add_string(nvl, "time", tbuf) != 0) ||
+ (ret = nvlist_add_string(nvl, "msg", msg) != 0))
+ return (ret);
+
+ return (0);
+}
+
+static int
+bunyan_vlog_add(nvlist_t *nvl, const char *key, bunyan_type_t type, void *arg)
+{
+ int ret;
+ uintptr_t *up;
+ struct in_addr *v4;
+ struct in6_addr *v6;
+
+ /*
+ * Our buffer needs to hold the string forms of pointers, IPv6 strings,
+ * etc. INET6_ADDRSTRLEN is large enough for all of these.
+ */
+ char buf[INET6_ADDRSTRLEN];
+
+ switch (type) {
+ case BUNYAN_T_STRING:
+ ret = nvlist_add_string(nvl, key, (char *)arg);
+ break;
+ case BUNYAN_T_POINTER:
+ up = arg;
+ (void) snprintf(buf, sizeof (buf), "0x%p", *up);
+ ret = nvlist_add_string(nvl, key, buf);
+ break;
+ case BUNYAN_T_IP:
+ v4 = arg;
+ VERIFY(inet_ntop(AF_INET, v4, buf, sizeof (buf)) != NULL);
+ ret = nvlist_add_string(nvl, key, buf);
+ break;
+ case BUNYAN_T_IP6:
+ v6 = arg;
+ VERIFY(inet_ntop(AF_INET6, v6, buf, sizeof (buf)) != NULL);
+ ret = nvlist_add_string(nvl, key, buf);
+ break;
+ case BUNYAN_T_BOOLEAN:
+ ret = nvlist_add_boolean_value(nvl, key, *(boolean_t *)arg);
+ break;
+ case BUNYAN_T_INT32:
+ ret = nvlist_add_int32(nvl, key, *(int32_t *)arg);
+ break;
+ case BUNYAN_T_INT64:
+ ret = nvlist_add_int64(nvl, key, *(int64_t *)arg);
+ break;
+ case BUNYAN_T_UINT32:
+ ret = nvlist_add_uint32(nvl, key, *(uint32_t *)arg);
+ break;
+ case BUNYAN_T_UINT64:
+ ret = nvlist_add_uint64(nvl, key, *(uint32_t *)arg);
+ break;
+ case BUNYAN_T_DOUBLE:
+ ret = nvlist_add_double(nvl, key, *(double *)arg);
+ break;
+ case BUNYAN_T_INT64STR:
+ (void) snprintf(buf, sizeof (buf), "%lld", *(int64_t *)arg);
+ ret = nvlist_add_string(nvl, key, buf);
+ break;
+ case BUNYAN_T_UINT64STR:
+ (void) snprintf(buf, sizeof (buf), "%llu", *(uint64_t *)arg);
+ ret = nvlist_add_string(nvl, key, buf);
+ break;
+ default:
+ ret = EINVAL;
+ break;
+ }
+
+ return (ret);
+}
+
+static int
+bunyan_vlog(bunyan_logger_t *bhp, bunyan_level_t level, const char *msg,
+ va_list *ap)
+{
+ nvlist_t *nvl = NULL;
+ int ret, type;
+ bunyan_key_t *bkp;
+ bunyan_stream_t *bsp;
+ char *buf = NULL;
+ bunyan_t *b = (bunyan_t *)bhp;
+
+ if (msg == NULL)
+ return (EINVAL);
+
+ (void) pthread_mutex_lock(&b->bun_lock);
+
+ if ((ret = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0)) != 0) {
+ (void) pthread_mutex_unlock(&b->bun_lock);
+ return (ret);
+ }
+
+ /*
+ * We add pre-defined keys, then go through and process the users keys,
+ * and finally go ahead and our defaults. If all that succeeds, then we
+ * can go ahead and call all the built-in logs.
+ */
+ for (bkp = b->bun_keys; bkp != NULL; bkp = bkp->bk_next) {
+ if ((ret = bunyan_vlog_add(nvl, bkp->bk_name, bkp->bk_type,
+ bkp->bk_data)) != 0)
+ goto out;
+ }
+
+ while ((type = va_arg(*ap, int)) != BUNYAN_T_END) {
+ void *data;
+ boolean_t bt;
+ int32_t i32;
+ int64_t i64;
+ uint32_t ui32;
+ uint64_t ui64;
+ double d;
+ uintptr_t ptr;
+ const char *key = va_arg(*ap, char *);
+
+ switch (type) {
+ case BUNYAN_T_STRING:
+ data = va_arg(*ap, char *);
+ break;
+ case BUNYAN_T_POINTER:
+ ptr = (uintptr_t)va_arg(*ap, void *);
+ data = &ptr;
+ break;
+ case BUNYAN_T_IP:
+ case BUNYAN_T_IP6:
+ data = va_arg(*ap, void *);
+ break;
+ case BUNYAN_T_BOOLEAN:
+ bt = va_arg(*ap, boolean_t);
+ data = &bt;
+ break;
+ case BUNYAN_T_INT32:
+ i32 = va_arg(*ap, int32_t);
+ data = &i32;
+ break;
+ case BUNYAN_T_INT64:
+ case BUNYAN_T_INT64STR:
+ i64 = va_arg(*ap, int64_t);
+ data = &i64;
+ break;
+ case BUNYAN_T_UINT32:
+ ui32 = va_arg(*ap, uint32_t);
+ data = &ui32;
+ break;
+ case BUNYAN_T_UINT64:
+ case BUNYAN_T_UINT64STR:
+ ui64 = va_arg(*ap, uint64_t);
+ data = &ui64;
+ break;
+ case BUNYAN_T_DOUBLE:
+ d = va_arg(*ap, double);
+ data = &d;
+ break;
+ default:
+ ret = EINVAL;
+ goto out;
+ }
+
+ if ((ret = bunyan_vlog_add(nvl, key, type, data)) != 0)
+ goto out;
+
+ }
+ /*
+ * This must be the last thing we do before we log to ensure that all of
+ * our defaults always make it out.
+ */
+ if ((ret = bunyan_vlog_defaults(nvl, b, level, msg)) != 0)
+ goto out;
+
+ if (nvlist_dump_json(nvl, &buf) < 0) {
+ ret = errno;
+ goto out;
+ }
+
+ /* Fire DTrace probes */
+ switch (level) {
+ case BUNYAN_L_TRACE:
+ BUNYAN_LOG_TRACE(buf);
+ break;
+ case BUNYAN_L_DEBUG:
+ BUNYAN_LOG_DEBUG(buf);
+ break;
+ case BUNYAN_L_INFO:
+ BUNYAN_LOG_INFO(buf);
+ break;
+ case BUNYAN_L_WARN:
+ BUNYAN_LOG_WARN(buf);
+ break;
+ case BUNYAN_L_ERROR:
+ BUNYAN_LOG_ERROR(buf);
+ break;
+ case BUNYAN_L_FATAL:
+ BUNYAN_LOG_FATAL(buf);
+ break;
+ }
+
+ for (bsp = b->bun_streams; bsp != NULL; bsp = bsp->bs_next) {
+ if (bsp->bs_level <= level)
+ if (bsp->bs_func(nvl, buf, bsp->bs_arg) != 0)
+ bsp->bs_count++;
+ }
+ ret = 0;
+out:
+ (void) pthread_mutex_unlock(&b->bun_lock);
+ if (buf != NULL)
+ nvlist_dump_json_free(nvl, buf);
+ if (nvl != NULL)
+ nvlist_free(nvl);
+ return (ret);
+}
+
+int
+bunyan_trace(bunyan_logger_t *bhp, const char *msg, ...)
+{
+ va_list va;
+ int ret;
+
+ va_start(va, msg);
+ ret = bunyan_vlog(bhp, BUNYAN_L_TRACE, msg, &va);
+ va_end(va);
+
+ return (ret);
+}
+
+int
+bunyan_debug(bunyan_logger_t *bhp, const char *msg, ...)
+{
+ va_list va;
+ int ret;
+
+ va_start(va, msg);
+ ret = bunyan_vlog(bhp, BUNYAN_L_DEBUG, msg, &va);
+ va_end(va);
+
+ return (ret);
+}
+
+int
+bunyan_info(bunyan_logger_t *bhp, const char *msg, ...)
+{
+ va_list va;
+ int ret;
+
+ va_start(va, msg);
+ ret = bunyan_vlog(bhp, BUNYAN_L_INFO, msg, &va);
+ va_end(va);
+
+ return (ret);
+}
+
+int
+bunyan_warn(bunyan_logger_t *bhp, const char *msg, ...)
+{
+ va_list va;
+ int ret;
+
+ va_start(va, msg);
+ ret = bunyan_vlog(bhp, BUNYAN_L_WARN, msg, &va);
+ va_end(va);
+
+ return (ret);
+}
+
+int
+bunyan_error(bunyan_logger_t *bhp, const char *msg, ...)
+{
+ va_list va;
+ int ret;
+
+ va_start(va, msg);
+ ret = bunyan_vlog(bhp, BUNYAN_L_ERROR, msg, &va);
+ va_end(va);
+
+ return (ret);
+}
+
+
+int
+bunyan_fatal(bunyan_logger_t *bhp, const char *msg, ...)
+{
+ va_list va;
+ int ret;
+
+ va_start(va, msg);
+ ret = bunyan_vlog(bhp, BUNYAN_L_FATAL, msg, &va);
+ va_end(va);
+
+ return (ret);
+}
diff --git a/usr/src/lib/libbunyan/common/bunyan.h b/usr/src/lib/libbunyan/common/bunyan.h
new file mode 100644
index 0000000000..9a01f6f6cd
--- /dev/null
+++ b/usr/src/lib/libbunyan/common/bunyan.h
@@ -0,0 +1,88 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc.
+ */
+
+#ifndef _BUNYAN_H
+#define _BUNYAN_H
+
+/*
+ * C version of the bunyan logging format.
+ */
+
+#include <limits.h>
+#include <libnvpair.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct bunyan_logger bunyan_logger_t;
+
+typedef enum bunyan_level {
+ BUNYAN_L_TRACE = 10,
+ BUNYAN_L_DEBUG = 20,
+ BUNYAN_L_INFO = 30,
+ BUNYAN_L_WARN = 40,
+ BUNYAN_L_ERROR = 50,
+ BUNYAN_L_FATAL = 60
+} bunyan_level_t;
+
+typedef enum bunyan_type {
+ BUNYAN_T_END = 0x0,
+ BUNYAN_T_STRING,
+ BUNYAN_T_POINTER,
+ BUNYAN_T_IP,
+ BUNYAN_T_IP6,
+ BUNYAN_T_BOOLEAN,
+ BUNYAN_T_INT32,
+ BUNYAN_T_INT64,
+ BUNYAN_T_UINT32,
+ BUNYAN_T_UINT64,
+ BUNYAN_T_DOUBLE,
+ BUNYAN_T_INT64STR,
+ BUNYAN_T_UINT64STR
+} bunyan_type_t;
+
+/*
+ * A handle is MT-safe, but not fork-safe.
+ */
+extern int bunyan_init(const char *, bunyan_logger_t **);
+extern int bunyan_child(const bunyan_logger_t *, bunyan_logger_t **, ...);
+extern void bunyan_fini(bunyan_logger_t *);
+
+/*
+ * Bunyan stream callbacks are guaranteed to be serialized.
+ */
+typedef int (*bunyan_stream_f)(nvlist_t *, const char *, void *);
+extern int bunyan_stream_fd(nvlist_t *, const char *, void *);
+
+extern int bunyan_stream_add(bunyan_logger_t *, const char *, int,
+ bunyan_stream_f, void *);
+extern int bunyan_stream_remove(bunyan_logger_t *, const char *);
+
+extern int bunyan_key_add(bunyan_logger_t *, ...);
+extern int bunyan_key_remove(bunyan_logger_t *, const char *);
+
+extern int bunyan_trace(bunyan_logger_t *, const char *msg, ...);
+extern int bunyan_debug(bunyan_logger_t *, const char *msg, ...);
+extern int bunyan_info(bunyan_logger_t *, const char *msg, ...);
+extern int bunyan_warn(bunyan_logger_t *, const char *msg, ...);
+extern int bunyan_error(bunyan_logger_t *, const char *msg, ...);
+extern int bunyan_fatal(bunyan_logger_t *, const char *msg, ...);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _BUNYAN_H */
diff --git a/usr/src/lib/libbunyan/common/bunyan_provider.d b/usr/src/lib/libbunyan/common/bunyan_provider.d
new file mode 100644
index 0000000000..d47ea75733
--- /dev/null
+++ b/usr/src/lib/libbunyan/common/bunyan_provider.d
@@ -0,0 +1,32 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc.
+ */
+
+/*
+ * Bunyan DTrace provider
+ */
+provider bunyan {
+ probe log__trace(char *);
+ probe log__debug(char *);
+ probe log__info(char *);
+ probe log__warn(char *);
+ probe log__error(char *);
+ probe log__fatal(char *);
+};
+
+#pragma D attributes Stable/Stable/ISA provider bunyan provider
+#pragma D attributes Private/Private/Unknown provider bunyan module
+#pragma D attributes Private/Private/Unknown provider bunyan function
+#pragma D attributes Stable/Stable/ISA provider bunyan name
+#pragma D attributes Stable/Stable/ISA provider bunyan args
diff --git a/usr/src/lib/libbunyan/common/llib-lbunyan b/usr/src/lib/libbunyan/common/llib-lbunyan
new file mode 100644
index 0000000000..31f6a52aba
--- /dev/null
+++ b/usr/src/lib/libbunyan/common/llib-lbunyan
@@ -0,0 +1,19 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc.
+ */
+
+/* LINTLIBRARY */
+/* PROTOLIB1 */
+
+#include <bunyan.h>
diff --git a/usr/src/lib/libbunyan/common/mapfile-vers b/usr/src/lib/libbunyan/common/mapfile-vers
new file mode 100644
index 0000000000..775af4ab45
--- /dev/null
+++ b/usr/src/lib/libbunyan/common/mapfile-vers
@@ -0,0 +1,53 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION ILLUMOS_1.0 {
+ global:
+ bunyan_init;
+ bunyan_child;
+ bunyan_fini;
+ bunyan_stream_fd;
+ bunyan_stream_add;
+ bunyan_stream_remove;
+ bunyan_key_add;
+ bunyan_key_remove;
+ bunyan_trace;
+ bunyan_debug;
+ bunyan_info;
+ bunyan_warn;
+ bunyan_error;
+ bunyan_fatal;
+};
+
+SYMBOL_VERSION ILLUMOSprivate {
+ local:
+ *;
+};
diff --git a/usr/src/lib/libbunyan/i386/Makefile b/usr/src/lib/libbunyan/i386/Makefile
new file mode 100644
index 0000000000..41e699e8f8
--- /dev/null
+++ b/usr/src/lib/libbunyan/i386/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/libbunyan/sparc/Makefile b/usr/src/lib/libbunyan/sparc/Makefile
new file mode 100644
index 0000000000..41e699e8f8
--- /dev/null
+++ b/usr/src/lib/libbunyan/sparc/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/libbunyan/sparcv9/Makefile b/usr/src/lib/libbunyan/sparcv9/Makefile
new file mode 100644
index 0000000000..15d904c616
--- /dev/null
+++ b/usr/src/lib/libbunyan/sparcv9/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+include ../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/libc/Makefile.targ b/usr/src/lib/libc/Makefile.targ
index 9f9f6d2dfb..bb9ccf467d 100644
--- a/usr/src/lib/libc/Makefile.targ
+++ b/usr/src/lib/libc/Makefile.targ
@@ -21,6 +21,7 @@
#
# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
+# Copyright 2016 Joyent, Inc.
#
#
@@ -93,6 +94,10 @@ pics/%.o: $(LIBCBASE)/../$(MACH)/sys/%.s
$(BUILD.s)
$(POST_PROCESS_O)
+pics/%.o: $(LIBCBASE)/../$(MACH)/sys/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
+
pics/%.o: $(LIBCBASE)/../$(TARGET_ARCH)/sys/%.c
$(COMPILE.c) -o $@ $<
$(POST_PROCESS_O)
diff --git a/usr/src/lib/libc/amd64/Makefile b/usr/src/lib/libc/amd64/Makefile
index 4580972482..a2803faedf 100644
--- a/usr/src/lib/libc/amd64/Makefile
+++ b/usr/src/lib/libc/amd64/Makefile
@@ -36,6 +36,10 @@ VERS= .1
CPP= /usr/lib/cpp
TARGET_ARCH= amd64
+# include comm page definitions
+include $(SRC)/lib/commpage/Makefile.shared.targ
+include $(SRC)/lib/commpage/Makefile.shared.com
+
# objects are grouped by source directory
# local objects
@@ -106,6 +110,7 @@ COMOBJS= \
strtoull.o
GENOBJS= \
+ $(COMMPAGE_OBJS) \
_getsp.o \
abs.o \
alloca.o \
@@ -281,6 +286,7 @@ COMSYSOBJS= \
SYSOBJS= \
__clock_gettime.o \
+ __clock_gettime_sys.o \
__getcontext.o \
__uadmin.o \
_lwp_mutex_unlock.o \
@@ -876,6 +882,7 @@ PORTSYS= \
fcntl.o \
getpagesizes.o \
getpeerucred.o \
+ inotify.o \
inst_sync.o \
issetugid.o \
label.o \
@@ -1183,6 +1190,8 @@ $(PORTI18N_COND:%=pics/%) := \
pics/arc4random.o := CPPFLAGS += -I$(SRC)/common/crypto/chacha
+pics/__clock_gettime.o := CPPFLAGS += $(COMMPAGE_CPPFLAGS)
+
.KEEP_STATE:
all: $(LIBS) $(LIB_PIC)
diff --git a/usr/src/lib/libc/amd64/gen/siginfolst.c b/usr/src/lib/libc/amd64/gen/siginfolst.c
index 8451dfbb4f..8b8a1b4669 100644
--- a/usr/src/lib/libc/amd64/gen/siginfolst.c
+++ b/usr/src/lib/libc/amd64/gen/siginfolst.c
@@ -22,6 +22,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015, Joyent, Inc.
*/
/* Copyright (c) 1988 AT&T */
@@ -188,6 +189,7 @@ static const struct siginfolist _sys_siginfolist_data[NSIG-1] = {
0, 0,
0, 0,
0, 0, /* SIGRTMIN+15 */
+ 0, 0, /* SIGRTMIN+16 */
0, 0, /* SIGRTMAX-15 */
0, 0,
0, 0,
diff --git a/usr/src/lib/libc/amd64/sys/__clock_gettime.s b/usr/src/lib/libc/amd64/sys/__clock_gettime_sys.s
index 0a98de28a9..bef188c18e 100644
--- a/usr/src/lib/libc/amd64/sys/__clock_gettime.s
+++ b/usr/src/lib/libc/amd64/sys/__clock_gettime_sys.s
@@ -21,25 +21,28 @@
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
*/
- .file "__clock_gettime.s"
+ .file "__clock_gettime_sys.s"
+#include <sys/asm_linkage.h>
#include <sys/time_impl.h>
#include "SYS.h"
+
/*
* int
- * __clock_gettime(clockid_t clock_id, timespec_t *tp)
+ * __clock_gettime_sys(clockid_t clock_id, timespec_t *tp)
*/
- ENTRY(__clock_gettime)
+ ENTRY(__clock_gettime_sys)
cmpl $__CLOCK_REALTIME0, %edi /* if (clock_id) */
- je 2f /* equal to __CLOCK_REALTIME0 */
+ je 2f /* equals __CLOCK_REALTIME0 */
cmpl $CLOCK_REALTIME, %edi /* or if (clock_id) */
- jne 1f /* equal to CLOCK_REALTIME */
+ jne 1f /* equals CLOCK_REALTIME */
2:
- pushq %rsi /* preserve the timespec_t pointer */
+ pushq %rsi /* preserve timespec_t ptr */
SYSFASTTRAP(GETHRESTIME)
popq %rsi
movq %rax, (%rsi)
@@ -49,4 +52,4 @@
SYSTRAP_RVAL1(clock_gettime)
SYSCERROR
RETC
- SET_SIZE(__clock_gettime)
+ SET_SIZE(__clock_gettime_sys)
diff --git a/usr/src/lib/libc/i386/Makefile.com b/usr/src/lib/libc/i386/Makefile.com
index c73b03879f..7c768d1019 100644
--- a/usr/src/lib/libc/i386/Makefile.com
+++ b/usr/src/lib/libc/i386/Makefile.com
@@ -34,6 +34,10 @@ VERS= .1
CPP= /usr/lib/cpp
TARGET_ARCH= i386
+# include comm page definitions
+include $(SRC)/lib/commpage/Makefile.shared.targ
+include $(SRC)/lib/commpage/Makefile.shared.com
+
VALUES= values-Xa.o
# objects are grouped by source directory
@@ -110,6 +114,7 @@ DTRACEOBJS= \
dtrace_data.o
GENOBJS= \
+ $(COMMPAGE_OBJS) \
_div64.o \
_divdi3.o \
_getsp.o \
@@ -305,6 +310,7 @@ COMSYSOBJS= \
SYSOBJS= \
__clock_gettime.o \
+ __clock_gettime_sys.o \
__getcontext.o \
__uadmin.o \
_lwp_mutex_unlock.o \
@@ -916,6 +922,7 @@ PORTSYS= \
fcntl.o \
getpagesizes.o \
getpeerucred.o \
+ inotify.o \
inst_sync.o \
issetugid.o \
label.o \
@@ -1251,6 +1258,8 @@ $(PORTI18N_COND:%=pics/%) := \
pics/arc4random.o := CPPFLAGS += -I$(SRC)/common/crypto/chacha
+pics/__clock_gettime.o := CPPFLAGS += $(COMMPAGE_CPPFLAGS)
+
.KEEP_STATE:
all: $(LIBS) $(LIB_PIC)
diff --git a/usr/src/lib/libc/i386/gen/siginfolst.c b/usr/src/lib/libc/i386/gen/siginfolst.c
index 8451dfbb4f..8b8a1b4669 100644
--- a/usr/src/lib/libc/i386/gen/siginfolst.c
+++ b/usr/src/lib/libc/i386/gen/siginfolst.c
@@ -22,6 +22,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015, Joyent, Inc.
*/
/* Copyright (c) 1988 AT&T */
@@ -188,6 +189,7 @@ static const struct siginfolist _sys_siginfolist_data[NSIG-1] = {
0, 0,
0, 0,
0, 0, /* SIGRTMIN+15 */
+ 0, 0, /* SIGRTMIN+16 */
0, 0, /* SIGRTMAX-15 */
0, 0,
0, 0,
diff --git a/usr/src/lib/libc/i386/sys/__clock_gettime.c b/usr/src/lib/libc/i386/sys/__clock_gettime.c
new file mode 100644
index 0000000000..cdc40f7747
--- /dev/null
+++ b/usr/src/lib/libc/i386/sys/__clock_gettime.c
@@ -0,0 +1,45 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+
+
+#include "thr_uberdata.h"
+#include <cp_defs.h>
+
+extern int __clock_gettime_sys(clockid_t, timespec_t *);
+
+int
+__clock_gettime(clockid_t clock_id, timespec_t *tp)
+{
+ comm_page_t *cp = (comm_page_t *)__uberdata.ub_comm_page;
+
+ if (cp != NULL && __cp_can_gettime(cp) == 0) {
+ switch (clock_id) {
+ case __CLOCK_REALTIME0:
+ case CLOCK_REALTIME:
+ __cp_clock_gettime_realtime(cp, tp);
+ return (0);
+
+ case CLOCK_MONOTONIC:
+ hrt2ts(__cp_gethrtime(cp), tp);
+ return (0);
+
+ default:
+ /* Fallback */
+ break;
+ }
+ }
+ return (__clock_gettime_sys(clock_id, tp));
+}
diff --git a/usr/src/lib/libc/i386/sys/__clock_gettime.s b/usr/src/lib/libc/i386/sys/__clock_gettime_sys.s
index cf9ee306ff..57c831843a 100644
--- a/usr/src/lib/libc/i386/sys/__clock_gettime.s
+++ b/usr/src/lib/libc/i386/sys/__clock_gettime_sys.s
@@ -21,24 +21,26 @@
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
*/
- .file "__clock_gettime.s"
+ .file "__clock_gettime_sys.s"
+#include <sys/asm_linkage.h>
#include <sys/time_impl.h>
#include "SYS.h"
/*
* int
- * __clock_gettime(clockid_t clock_id, timespec_t *tp)
+ * __sys_clock_gettime(clockid_t clock_id, timespec_t *tp)
*/
- ENTRY(__clock_gettime)
+ ENTRY(__clock_gettime_sys)
movl 4(%esp), %eax
- cmpl $__CLOCK_REALTIME0, %eax / if (clock_id)
- je 2f / equal to __CLOCK_REALTIME0
- cmpl $CLOCK_REALTIME, %eax / or if (clock_id)
- jne 1f / equal to CLOCK_REALTIME
+ cmpl $__CLOCK_REALTIME0, %eax /* if (clock_id) */
+ je 2f /* equal to __CLOCK_REALTIME0 */
+ cmpl $CLOCK_REALTIME, %eax /* or if (clock_id) */
+ jne 1f /* equal to CLOCK_REALTIME */
2:
SYSFASTTRAP(GETHRESTIME)
movl 8(%esp), %ecx
@@ -49,4 +51,4 @@
SYSTRAP_RVAL1(clock_gettime)
SYSCERROR
RETC
- SET_SIZE(__clock_gettime)
+ SET_SIZE(__clock_gettime_sys)
diff --git a/usr/src/lib/libc/inc/thr_inlines.h b/usr/src/lib/libc/inc/thr_inlines.h
index f7cdc6a6bd..66d811f25b 100644
--- a/usr/src/lib/libc/inc/thr_inlines.h
+++ b/usr/src/lib/libc/inc/thr_inlines.h
@@ -47,17 +47,19 @@
extern __GNU_INLINE ulwp_t *
_curthread(void)
{
-#if defined(__amd64)
ulwp_t *__value;
- __asm__ __volatile__("movq %%fs:0, %0" : "=r" (__value));
+ __asm__ __volatile__(
+#if defined(__amd64)
+ "movq %%fs:0, %0\n\t"
#elif defined(__i386)
- ulwp_t *__value;
- __asm__ __volatile__("movl %%gs:0, %0" : "=r" (__value));
+ "movl %%gs:0, %0\n\t"
#elif defined(__sparc)
- register ulwp_t *__value __asm__("g7");
+ ".register %%g7, #scratch\n\t"
+ "mov %%g7, %0\n\t"
#else
#error "port me"
#endif
+ : "=r" (__value));
return (__value);
}
diff --git a/usr/src/lib/libc/inc/thr_uberdata.h b/usr/src/lib/libc/inc/thr_uberdata.h
index 384a23582d..ff18c8e9f4 100644
--- a/usr/src/lib/libc/inc/thr_uberdata.h
+++ b/usr/src/lib/libc/inc/thr_uberdata.h
@@ -956,6 +956,7 @@ typedef struct uberdata {
int ndaemons; /* total number of THR_DAEMON threads/lwps */
pid_t pid; /* the current process's pid */
void (*sigacthandler)(int, siginfo_t *, void *);
+ int (*setctxt)(const ucontext_t *);
ulwp_t *lwp_stacks;
ulwp_t *lwp_laststack;
int nfreestack;
@@ -968,6 +969,8 @@ typedef struct uberdata {
robust_t **robustlocks; /* table of registered robust locks */
robust_t *robustlist; /* list of registered robust locks */
char *progname; /* the basename of the program, from argv[0] */
+ char *ub_broot; /* the root of the native code in the brand */
+ void *ub_comm_page; /* arch-specific comm page of kernel data */
struct uberdata **tdb_bootstrap;
tdb_t tdb; /* thread debug interfaces (for libc_db) */
} uberdata_t;
@@ -1169,6 +1172,7 @@ typedef struct uberdata32 {
int ndaemons;
int pid;
caddr32_t sigacthandler;
+ caddr32_t setctxt;
caddr32_t lwp_stacks;
caddr32_t lwp_laststack;
int nfreestack;
@@ -1181,6 +1185,7 @@ typedef struct uberdata32 {
caddr32_t robustlocks;
caddr32_t robustlist;
caddr32_t progname;
+ caddr32_t ub_comm_page;
caddr32_t tdb_bootstrap;
tdb32_t tdb;
} uberdata32_t;
@@ -1270,6 +1275,7 @@ extern void rwl_free(ulwp_t *);
extern void heldlock_exit(void);
extern void heldlock_free(ulwp_t *);
extern void sigacthandler(int, siginfo_t *, void *);
+extern int setctxt(const ucontext_t *);
extern void signal_init(void);
extern int sigequalset(const sigset_t *, const sigset_t *);
extern void mutex_setup(void);
diff --git a/usr/src/lib/libc/port/gen/getauxv.c b/usr/src/lib/libc/port/gen/getauxv.c
index 500675719c..4356a01392 100644
--- a/usr/src/lib/libc/port/gen/getauxv.c
+++ b/usr/src/lib/libc/port/gen/getauxv.c
@@ -24,9 +24,8 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include "lint.h"
+#include "thr_uberdata.h"
#include <libc.h>
#include <fcntl.h>
#include <stdlib.h>
@@ -38,6 +37,7 @@
#include <thread.h>
#include <synch.h>
#include <atomic.h>
+#include <limits.h>
static mutex_t auxlock = DEFAULTMUTEX;
@@ -59,11 +59,20 @@ _getaux(int type)
if (auxb == NULL) {
lmutex_lock(&auxlock);
if (auxb == NULL) {
+ uberdata_t *udp = curthread->ul_uberdata;
struct stat statb;
auxv_t *buf = NULL;
+ char *path = "/proc/self/auxv";
+ char pbuf[PATH_MAX];
int fd;
- if ((fd = open("/proc/self/auxv", O_RDONLY)) != -1 &&
+ if (udp->ub_broot != NULL) {
+ (void) snprintf(pbuf, sizeof (pbuf),
+ "%s/proc/self/auxv", udp->ub_broot);
+ path = pbuf;
+ }
+
+ if ((fd = open(path, O_RDONLY)) != -1 &&
fstat(fd, &statb) != -1)
buf = libc_malloc(
statb.st_size + sizeof (auxv_t));
diff --git a/usr/src/lib/libc/port/gen/getenv.c b/usr/src/lib/libc/port/gen/getenv.c
index c345226d0c..bd13a749ed 100644
--- a/usr/src/lib/libc/port/gen/getenv.c
+++ b/usr/src/lib/libc/port/gen/getenv.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
*/
/* Copyright (c) 1988 AT&T */
@@ -336,6 +337,16 @@ addtoenv(char *string, int overwrite)
int
putenv(char *string)
{
+ /*
+ * Historically a call to putenv() with no '=' in the string would work
+ * great until someone called getenv() on that particular environment
+ * variable again. As we've always treated this as valid, rather than
+ * teaching the rest of the environment code how to handle something
+ * without an '=' sign, it instead just calls unsetenv().
+ */
+ if (strchr(string, '=') == NULL)
+ return (unsetenv(string));
+
return (addtoenv(string, 1));
}
diff --git a/usr/src/lib/libc/port/gen/psiginfo.c b/usr/src/lib/libc/port/gen/psiginfo.c
index a648c81094..e7cf46abef 100644
--- a/usr/src/lib/libc/port/gen/psiginfo.c
+++ b/usr/src/lib/libc/port/gen/psiginfo.c
@@ -51,16 +51,23 @@ psiginfo(const siginfo_t *sip, const char *s)
{
char buf[256];
char *c;
+ size_t l = 0;
const struct siginfolist *listp;
- if (sip == 0)
+ if (sip == NULL)
return;
+ if (s != NULL && *s != '\0') {
+ l = snprintf(buf, sizeof (buf), _libc_gettext("%s : "), s);
+ if (l > sizeof (buf))
+ l = sizeof (buf);
+ }
+
if (sip->si_code <= 0) {
- (void) snprintf(buf, sizeof (buf),
- _libc_gettext("%s : %s ( from process %d )\n"),
- s, strsignal(sip->si_signo), sip->si_pid);
+ (void) snprintf(buf + l, sizeof (buf) - l,
+ _libc_gettext("%s ( from process %d )\n"),
+ strsignal(sip->si_signo), sip->si_pid);
} else if (((listp = &_sys_siginfolist[sip->si_signo-1]) != NULL) &&
sip->si_code <= listp->nsiginfo) {
c = _libc_gettext(listp->vsiginfo[sip->si_code-1]);
@@ -69,21 +76,20 @@ psiginfo(const siginfo_t *sip, const char *s)
case SIGBUS:
case SIGILL:
case SIGFPE:
- (void) snprintf(buf, sizeof (buf),
- _libc_gettext("%s : %s ( [%p] %s)\n"),
- s, strsignal(sip->si_signo),
+ (void) snprintf(buf + l, sizeof (buf) - l,
+ _libc_gettext("%s ( [%p] %s)\n"),
+ strsignal(sip->si_signo),
sip->si_addr, c);
break;
default:
- (void) snprintf(buf, sizeof (buf),
- _libc_gettext("%s : %s (%s)\n"),
- s, strsignal(sip->si_signo), c);
+ (void) snprintf(buf + l, sizeof (buf) - l,
+ _libc_gettext("%s (%s)\n"),
+ strsignal(sip->si_signo), c);
break;
}
} else {
- (void) snprintf(buf, sizeof (buf),
- _libc_gettext("%s : %s\n"),
- s, strsignal(sip->si_signo));
+ (void) snprintf(buf + l, sizeof (buf) - l,
+ _libc_gettext("%s\n"), strsignal(sip->si_signo));
}
(void) write(2, buf, strlen(buf));
}
diff --git a/usr/src/lib/libc/port/gen/psignal.c b/usr/src/lib/libc/port/gen/psignal.c
index 201beaacd7..2a61cd9610 100644
--- a/usr/src/lib/libc/port/gen/psignal.c
+++ b/usr/src/lib/libc/port/gen/psignal.c
@@ -37,8 +37,6 @@
* contributors.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Print the name of the signal indicated by "sig", along with the
* supplied message
@@ -61,14 +59,15 @@ void
psignal(int sig, const char *s)
{
char *c;
- size_t n;
+ size_t n = 0;
char buf[256];
if (sig < 0 || sig >= NSIG)
sig = 0;
c = strsignal(sig);
- n = strlen(s);
- if (n) {
+ if (s != NULL)
+ n = strlen(s);
+ if (n != 0) {
(void) snprintf(buf, sizeof (buf), "%s: %s\n", s, c);
} else {
(void) snprintf(buf, sizeof (buf), "%s\n", c);
diff --git a/usr/src/lib/libc/port/gen/sh_locks.c b/usr/src/lib/libc/port/gen/sh_locks.c
index 6583efbc9c..cf879195c6 100644
--- a/usr/src/lib/libc/port/gen/sh_locks.c
+++ b/usr/src/lib/libc/port/gen/sh_locks.c
@@ -24,8 +24,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include "lint.h"
#include <mtlib.h>
#include <sys/types.h>
diff --git a/usr/src/lib/libc/port/gen/siglist.c b/usr/src/lib/libc/port/gen/siglist.c
index 441cc4c2c5..bc6dc1b731 100644
--- a/usr/src/lib/libc/port/gen/siglist.c
+++ b/usr/src/lib/libc/port/gen/siglist.c
@@ -22,6 +22,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015, Joyent, Inc.
*/
/* Copyright (c) 1988 AT&T */
@@ -128,6 +129,7 @@ static const char *_sys_siglist_data[NSIG] = {
"Fourteenth Realtime Signal", /* SIGRTMIN+13 */
"Fifteenth Realtime Signal", /* SIGRTMIN+14 */
"Sixteenth Realtime Signal", /* SIGRTMIN+15 */
+ "Seventeenth Realtime Signal", /* SIGRTMIN+16 */
"Sixteenth Last Realtime Signal", /* SIGRTMAX-15 */
"Fifteenth Last Realtime Signal", /* SIGRTMAX-14 */
"Fourteenth Last Realtime Signal", /* SIGRTMAX-13 */
diff --git a/usr/src/lib/libc/port/gen/str2sig.c b/usr/src/lib/libc/port/gen/str2sig.c
index e0c4e89d68..02c6f3cb65 100644
--- a/usr/src/lib/libc/port/gen/str2sig.c
+++ b/usr/src/lib/libc/port/gen/str2sig.c
@@ -22,7 +22,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
/* Copyright (c) 1988 AT&T */
@@ -102,6 +102,7 @@ static signame_t signames[] = {
{ "RTMIN+13", _SIGRTMIN+13 },
{ "RTMIN+14", _SIGRTMIN+14 },
{ "RTMIN+15", _SIGRTMIN+15 },
+ { "RTMIN+16", _SIGRTMIN+16 },
{ "RTMAX-15", _SIGRTMAX-15 },
{ "RTMAX-14", _SIGRTMAX-14 },
{ "RTMAX-13", _SIGRTMAX-13 },
diff --git a/usr/src/lib/libc/port/llib-lc b/usr/src/lib/libc/port/llib-lc
index b86f23280c..d887a44f37 100644
--- a/usr/src/lib/libc/port/llib-lc
+++ b/usr/src/lib/libc/port/llib-lc
@@ -25,6 +25,7 @@
* Copyright 2013 OmniTI Computer Consulting, Inc. All rights reserved.
* Copyright (c) 2013 Gary Mills
* Copyright 2014 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2015 Joyent, Inc.
* Copyright 2015 Circonus, Inc. All rights reserved.
* Copyright 2015 Joyent, Inc.
*/
diff --git a/usr/src/lib/libc/port/mapfile-vers b/usr/src/lib/libc/port/mapfile-vers
index afb07a4bb9..c278c6a8ee 100644
--- a/usr/src/lib/libc/port/mapfile-vers
+++ b/usr/src/lib/libc/port/mapfile-vers
@@ -2986,6 +2986,10 @@ $endif
__idmap_unreg;
__init_daemon_priv;
__init_suid_priv;
+ inotify_init;
+ inotify_init1;
+ inotify_add_watch;
+ inotify_rm_watch;
_insert;
inst_sync;
_iswctype;
@@ -3066,7 +3070,9 @@ $endif
scrwidth;
semctl64;
_semctl64;
+ set_escaped_context_cleanup;
set_setcontext_enforcement;
+ setcontext_sigmask;
_setbufend;
__set_errno;
setprojrctl;
@@ -3183,6 +3189,7 @@ $endif
zone_list;
zone_list_datalink;
zonept;
+ zone_get_nroot;
zone_remove_datalink;
zone_setattr;
zone_shutdown;
diff --git a/usr/src/lib/libc/port/sys/inotify.c b/usr/src/lib/libc/port/sys/inotify.c
new file mode 100644
index 0000000000..90d04b5dd3
--- /dev/null
+++ b/usr/src/lib/libc/port/sys/inotify.c
@@ -0,0 +1,142 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/inotify.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <strings.h>
+#include <dirent.h>
+
+int
+inotify_init()
+{
+ return (open("/dev/inotify", O_RDWR));
+}
+
+int
+inotify_init1(int flags)
+{
+ int oflags = O_RDWR;
+
+ if (flags & IN_NONBLOCK)
+ oflags |= O_NONBLOCK;
+
+ if (flags & IN_CLOEXEC)
+ oflags |= O_CLOEXEC;
+
+ return (open("/dev/inotify", oflags));
+}
+
+int
+inotify_add_watch(int fd, const char *pathname, uint32_t mask)
+{
+ inotify_addwatch_t ioc;
+ inotify_addchild_t cioc;
+ struct stat buf;
+ int dirfd, wd;
+ DIR *dir;
+ struct dirent *dp;
+ int oflags = O_RDONLY;
+
+ if (mask & IN_DONT_FOLLOW)
+ oflags |= O_NOFOLLOW;
+
+ if ((dirfd = open(pathname, oflags)) < 0)
+ return (-1);
+
+ if (fstat(dirfd, &buf) != 0) {
+ (void) close(dirfd);
+ return (-1);
+ }
+
+ if ((mask & IN_ONLYDIR) && !(buf.st_mode & S_IFDIR)) {
+ (void) close(dirfd);
+ errno = ENOTDIR;
+ return (-1);
+ }
+
+ bzero(&ioc, sizeof (ioc));
+ ioc.inaw_fd = dirfd;
+ ioc.inaw_mask = mask;
+
+ if ((wd = ioctl(fd, INOTIFYIOC_ADD_WATCH, &ioc)) < 0) {
+ (void) close(dirfd);
+ return (-1);
+ }
+
+ if (!(buf.st_mode & S_IFDIR) || !(mask & IN_CHILD_EVENTS)) {
+ (void) close(dirfd);
+ (void) ioctl(fd, INOTIFYIOC_ACTIVATE, wd);
+ return (wd);
+ }
+
+ /*
+ * If we have a directory and we have a mask that denotes child events,
+ * we need to manually add a child watch to every directory entry.
+ * (Because our watch is in place, it will automatically be added to
+ * files that are newly created after this point.)
+ */
+ if ((dir = fdopendir(dirfd)) == NULL) {
+ (void) inotify_rm_watch(fd, wd);
+ (void) close(dirfd);
+ return (-1);
+ }
+
+ bzero(&cioc, sizeof (cioc));
+ cioc.inac_fd = dirfd;
+
+ while ((dp = readdir(dir)) != NULL) {
+ if (strcmp(dp->d_name, ".") == 0)
+ continue;
+
+ if (strcmp(dp->d_name, "..") == 0)
+ continue;
+
+ cioc.inac_name = dp->d_name;
+
+ if (ioctl(fd, INOTIFYIOC_ADD_CHILD, &cioc) != 0) {
+ /*
+ * If we get an error that indicates clear internal
+ * malfunctioning, we propagate the error. Otherwise
+ * we eat it: this could be a file that no longer
+ * exists or a symlink or something else that we
+ * can't lookup.
+ */
+ switch (errno) {
+ case ENXIO:
+ case EFAULT:
+ case EBADF:
+ (void) closedir(dir);
+ (void) inotify_rm_watch(fd, wd);
+ return (-1);
+ default:
+ break;
+ }
+ }
+ }
+
+ (void) closedir(dir);
+ (void) ioctl(fd, INOTIFYIOC_ACTIVATE, wd);
+
+ return (wd);
+}
+
+int
+inotify_rm_watch(int fd, int wd)
+{
+ return (ioctl(fd, INOTIFYIOC_RM_WATCH, wd));
+}
diff --git a/usr/src/lib/libc/port/sys/zone.c b/usr/src/lib/libc/port/sys/zone.c
index 4a4c70043d..8cf28c3ccf 100644
--- a/usr/src/lib/libc/port/sys/zone.c
+++ b/usr/src/lib/libc/port/sys/zone.c
@@ -22,9 +22,11 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011 Joyent Inc. All rights reserved.
*/
#include "lint.h"
+#include "thr_uberdata.h"
#include <sys/types.h>
#include <sys/syscall.h>
#include <sys/zone.h>
@@ -39,7 +41,8 @@
zoneid_t
zone_create(const char *name, const char *root, const struct priv_set *privs,
const char *rctls, size_t rctlsz, const char *zfs, size_t zfssz,
- int *extended_error, int match, int doi, const bslabel_t *label, int flags)
+ int *extended_error, int match, int doi, const bslabel_t *label, int flags,
+ zoneid_t req_zoneid)
{
zone_def zd;
priv_data_t *d;
@@ -59,6 +62,7 @@ zone_create(const char *name, const char *root, const struct priv_set *privs,
zd.doi = doi;
zd.label = label;
zd.flags = flags;
+ zd.zoneid = req_zoneid;
return ((zoneid_t)syscall(SYS_zone, ZONE_CREATE, &zd));
}
@@ -241,3 +245,10 @@ zone_list_datalink(zoneid_t zoneid, int *dlnump, datalink_id_t *linkids)
{
return (syscall(SYS_zone, ZONE_LIST_DATALINK, zoneid, dlnump, linkids));
}
+
+const char *
+zone_get_nroot()
+{
+ uberdata_t *udp = curthread->ul_uberdata;
+ return (udp->ub_broot);
+}
diff --git a/usr/src/lib/libc/port/threads/sigaction.c b/usr/src/lib/libc/port/threads/sigaction.c
index 571e211f97..6a283be33b 100644
--- a/usr/src/lib/libc/port/threads/sigaction.c
+++ b/usr/src/lib/libc/port/threads/sigaction.c
@@ -22,6 +22,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
*/
#include "lint.h"
@@ -284,6 +285,24 @@ take_deferred_signal(int sig)
thr_panic("take_deferred_signal(): __sigresend() failed");
}
+/*
+ * sigacthandler() attempts to clean up dangling uc_link pointers in
+ * signal handling contexts when libc believes us to have escaped
+ * a signal handler incorrectly in the past.
+ *
+ * Branded processes have a legitimate use for a chain including contexts
+ * other than those used for signal handling when tracking emulation
+ * requests from the kernel. We allow them to disable this cleanup
+ * behaviour.
+ */
+static int escaped_context_cleanup = 1;
+
+void
+set_escaped_context_cleanup(int on)
+{
+ escaped_context_cleanup = on;
+}
+
void
sigacthandler(int sig, siginfo_t *sip, void *uvp)
{
@@ -306,7 +325,7 @@ sigacthandler(int sig, siginfo_t *sip, void *uvp)
* we are actually executing at main level (self->ul_siglink == NULL).
* See the code for setjmp()/longjmp() for more details.
*/
- if (self->ul_siglink == NULL)
+ if (escaped_context_cleanup && self->ul_siglink == NULL)
ucp->uc_link = NULL;
/*
@@ -458,11 +477,12 @@ sigaction(int sig, const struct sigaction *nact, struct sigaction *oact)
}
/*
- * This is a private interface for the linux brand interface.
+ * This is a private interface for the lx brand.
*/
void
setsigacthandler(void (*nsigacthandler)(int, siginfo_t *, void *),
- void (**osigacthandler)(int, siginfo_t *, void *))
+ void (**osigacthandler)(int, siginfo_t *, void *),
+ int (*brsetctxt)(const ucontext_t *))
{
ulwp_t *self = curthread;
uberdata_t *udp = self->ul_uberdata;
@@ -471,6 +491,9 @@ setsigacthandler(void (*nsigacthandler)(int, siginfo_t *, void *),
*osigacthandler = udp->sigacthandler;
udp->sigacthandler = nsigacthandler;
+
+ if (brsetctxt != NULL)
+ udp->setctxt = brsetctxt;
}
/*
@@ -517,11 +540,39 @@ set_setcontext_enforcement(int on)
setcontext_enforcement = on;
}
+/*
+ * The LX brand emulation library implements an operation that is analogous to
+ * setcontext(), but takes a different path in to the kernel. So that it can
+ * correctly restore a signal mask, we expose just the signal mask handling
+ * part of the regular setcontext() routine as a private interface.
+ */
+void
+setcontext_sigmask(ucontext_t *ucp)
+{
+ ulwp_t *self = curthread;
+
+ if (ucp->uc_flags & UC_SIGMASK) {
+ block_all_signals(self);
+ delete_reserved_signals(&ucp->uc_sigmask);
+ self->ul_sigmask = ucp->uc_sigmask;
+ if (self->ul_cursig) {
+ /*
+ * We have a deferred signal present.
+ * The signal mask will be set when the
+ * signal is taken in take_deferred_signal().
+ */
+ ASSERT(self->ul_critical + self->ul_sigdefer != 0);
+ ucp->uc_flags &= ~UC_SIGMASK;
+ }
+ }
+}
+
#pragma weak _setcontext = setcontext
int
setcontext(const ucontext_t *ucp)
{
ulwp_t *self = curthread;
+ uberdata_t *udp = self->ul_uberdata;
int ret;
ucontext_t uc;
@@ -536,20 +587,7 @@ setcontext(const ucontext_t *ucp)
/*
* Restore previous signal mask and context link.
*/
- if (uc.uc_flags & UC_SIGMASK) {
- block_all_signals(self);
- delete_reserved_signals(&uc.uc_sigmask);
- self->ul_sigmask = uc.uc_sigmask;
- if (self->ul_cursig) {
- /*
- * We have a deferred signal present.
- * The signal mask will be set when the
- * signal is taken in take_deferred_signal().
- */
- ASSERT(self->ul_critical + self->ul_sigdefer != 0);
- uc.uc_flags &= ~UC_SIGMASK;
- }
- }
+ setcontext_sigmask(&uc);
self->ul_siglink = uc.uc_link;
/*
@@ -578,7 +616,7 @@ setcontext(const ucontext_t *ucp)
*/
set_parking_flag(self, 0);
self->ul_sp = 0;
- ret = __setcontext(&uc);
+ ret = udp->setctxt(&uc);
/*
* It is OK for setcontext() to return if the user has not specified
diff --git a/usr/src/lib/libc/port/threads/thr.c b/usr/src/lib/libc/port/threads/thr.c
index 88ce377f21..fcfdf6b93a 100644
--- a/usr/src/lib/libc/port/threads/thr.c
+++ b/usr/src/lib/libc/port/threads/thr.c
@@ -125,6 +125,7 @@ uberdata_t __uberdata = {
0, /* ndaemons */
0, /* pid */
sigacthandler, /* sigacthandler */
+ __setcontext, /* setctxt */
NULL, /* lwp_stacks */
NULL, /* lwp_laststack */
0, /* nfreestack */
@@ -137,6 +138,8 @@ uberdata_t __uberdata = {
NULL, /* robustlocks */
NULL, /* robustlist */
NULL, /* progname */
+ NULL, /* ub_broot */
+ NULL, /* ub_comm_page */
NULL, /* __tdb_bootstrap */
{ /* tdb */
NULL, /* tdb_sync_addr_hash */
@@ -1220,6 +1223,29 @@ extern void atfork_init(void);
extern void __proc64id(void);
#endif
+static void
+init_auxv_data(uberdata_t *udp)
+{
+ Dl_argsinfo_t args;
+
+ udp->ub_broot = NULL;
+ udp->ub_comm_page = NULL;
+ if (dlinfo(RTLD_SELF, RTLD_DI_ARGSINFO, &args) < 0)
+ return;
+
+ while (args.dla_auxv->a_type != AT_NULL) {
+ switch (args.dla_auxv->a_type) {
+ case AT_SUN_BRAND_NROOT:
+ udp->ub_broot = args.dla_auxv->a_un.a_ptr;
+ break;
+ case AT_SUN_COMMPAGE:
+ udp->ub_comm_page = args.dla_auxv->a_un.a_ptr;
+ break;
+ }
+ args.dla_auxv++;
+ }
+}
+
/*
* libc_init() is called by ld.so.1 for library initialization.
* We perform minimal initialization; enough to work with the main thread.
@@ -1256,6 +1282,14 @@ libc_init(void)
(void) _atexit(__cleanup);
/*
+ * Every libc, regardless of link map, needs to go through and check
+ * its aux vectors. Doing so will indicate whether or not this has
+ * been given a brand root (used to qualify various other data) or a
+ * comm page (to optimize certain system actions).
+ */
+ init_auxv_data(udp);
+
+ /*
* We keep our uberdata on one of (a) the first alternate link map
* or (b) the primary link map. We switch to the primary link map
* and stay there once we see it. All intermediate link maps are
diff --git a/usr/src/lib/libc/sparc/Makefile.com b/usr/src/lib/libc/sparc/Makefile.com
index 062949b3f9..c525f28b3d 100644
--- a/usr/src/lib/libc/sparc/Makefile.com
+++ b/usr/src/lib/libc/sparc/Makefile.com
@@ -950,6 +950,7 @@ PORTSYS= \
fcntl.o \
getpagesizes.o \
getpeerucred.o \
+ inotify.o \
inst_sync.o \
issetugid.o \
label.o \
diff --git a/usr/src/lib/libc/sparc/gen/siginfolst.c b/usr/src/lib/libc/sparc/gen/siginfolst.c
index 8451dfbb4f..8b8a1b4669 100644
--- a/usr/src/lib/libc/sparc/gen/siginfolst.c
+++ b/usr/src/lib/libc/sparc/gen/siginfolst.c
@@ -22,6 +22,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015, Joyent, Inc.
*/
/* Copyright (c) 1988 AT&T */
@@ -188,6 +189,7 @@ static const struct siginfolist _sys_siginfolist_data[NSIG-1] = {
0, 0,
0, 0,
0, 0, /* SIGRTMIN+15 */
+ 0, 0, /* SIGRTMIN+16 */
0, 0, /* SIGRTMAX-15 */
0, 0,
0, 0,
diff --git a/usr/src/lib/libc/sparcv9/Makefile.com b/usr/src/lib/libc/sparcv9/Makefile.com
index 596d349dfb..ff896639e2 100644
--- a/usr/src/lib/libc/sparcv9/Makefile.com
+++ b/usr/src/lib/libc/sparcv9/Makefile.com
@@ -886,6 +886,7 @@ PORTSYS= \
chmod.o \
chown.o \
corectl.o \
+ epoll.o \
eventfd.o \
epoll.o \
exacctsys.o \
diff --git a/usr/src/lib/libc/sparcv9/gen/siginfolst.c b/usr/src/lib/libc/sparcv9/gen/siginfolst.c
index 8451dfbb4f..8b8a1b4669 100644
--- a/usr/src/lib/libc/sparcv9/gen/siginfolst.c
+++ b/usr/src/lib/libc/sparcv9/gen/siginfolst.c
@@ -22,6 +22,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015, Joyent, Inc.
*/
/* Copyright (c) 1988 AT&T */
@@ -188,6 +189,7 @@ static const struct siginfolist _sys_siginfolist_data[NSIG-1] = {
0, 0,
0, 0,
0, 0, /* SIGRTMIN+15 */
+ 0, 0, /* SIGRTMIN+16 */
0, 0, /* SIGRTMAX-15 */
0, 0,
0, 0,
diff --git a/usr/src/lib/libctf/Makefile.com b/usr/src/lib/libctf/Makefile.com
index 4d1e01d4eb..0169c2a367 100644
--- a/usr/src/lib/libctf/Makefile.com
+++ b/usr/src/lib/libctf/Makefile.com
@@ -23,43 +23,9 @@
# Use is subject to license terms.
#
-LIBRARY = libctf.a
-VERS = .1
-
-COMMON_OBJS = \
- ctf_create.o \
- ctf_decl.o \
- ctf_error.o \
- ctf_hash.o \
- ctf_labels.o \
- ctf_lookup.o \
- ctf_open.o \
- ctf_types.o \
- ctf_util.o
-
-LIB_OBJS = \
- ctf_lib.o \
- ctf_subr.o
-
-OBJECTS = $(COMMON_OBJS) $(LIB_OBJS)
-
-include ../../Makefile.lib
+include ../Makefile.shared.com
include ../../Makefile.rootfs
-SRCS = $(COMMON_OBJS:%.o=../../../common/ctf/%.c) $(LIB_OBJS:%.o=../common/%.c)
-LIBS = $(DYNLIB) $(LINTLIB)
-
-SRCDIR = ../common
-
-CPPFLAGS += -I../common -I../../../common/ctf -DCTF_OLD_VERSIONS
-CFLAGS += $(CCVERBOSE)
-
-CERRWARN += -_gcc=-Wno-uninitialized
-
-LDLIBS += -lc
-
-$(LINTLIB) := SRCS = $(SRCDIR)/$(LINTSRC)
-
.KEEP_STATE:
all: $(LIBS)
@@ -67,7 +33,4 @@ all: $(LIBS)
lint: lintcheck
include ../../Makefile.targ
-
-objs/%.o pics/%.o: ../../../common/ctf/%.c
- $(COMPILE.c) -o $@ $<
- $(POST_PROCESS_O)
+include ../Makefile.shared.targ
diff --git a/usr/src/lib/libctf/Makefile.shared.com b/usr/src/lib/libctf/Makefile.shared.com
new file mode 100644
index 0000000000..55f090e7f8
--- /dev/null
+++ b/usr/src/lib/libctf/Makefile.shared.com
@@ -0,0 +1,90 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright (c) 2015, Joyent, Inc. All rights reserved.
+#
+
+#
+# This Makefile is shared between the libctf native build in tools and
+# the libctf build here for the system.
+#
+LIBRARY = libctf.a
+VERS = .1
+
+COMMON_OBJS = \
+ ctf_create.o \
+ ctf_decl.o \
+ ctf_dwarf.o \
+ ctf_error.o \
+ ctf_hash.o \
+ ctf_labels.o \
+ ctf_lookup.o \
+ ctf_open.o \
+ ctf_types.o \
+ ctf_util.o
+
+MERGEQ_OBJS = \
+ mergeq.o \
+ workq.o
+
+LIST_OBJS = \
+ list.o
+
+LIB_OBJS = \
+ ctf_convert.o \
+ ctf_elfwrite.o \
+ ctf_diff.o \
+ ctf_lib.o \
+ ctf_merge.o \
+ ctf_subr.o
+
+OBJECTS = $(COMMON_OBJS) $(LIB_OBJS) $(LIST_OBJS) $(MERGEQ_OBJS)
+MAPFILEDIR = $(SRC)/lib/libctf
+
+include $(SRC)/lib/Makefile.lib
+
+SRCS = \
+ $(COMMON_OBJS:%.o=$(SRC)/common/ctf/%.c) \
+ $(LIB_OBJS:%.o=$(SRC)/lib/libctf/common/%.c) \
+ $(LIST_OBJS:%.o=$(SRC)/common/list/%.c) \
+ $(MERGEQ_OBJS:%.o=$(SRC)/lib/mergeq/%.c)
+
+LIBS = $(DYNLIB) $(LINTLIB)
+LDLIBS += -lc -lelf -ldwarf -lavl
+
+C99MODE= -xc99=%all
+C99LMODE= -Xc99=%all
+
+SRCDIR = $(SRC)/lib/libctf/common
+
+CPPFLAGS += -I$(SRC)/lib/libctf/common \
+ -I$(SRC)/common/ctf \
+ -I$(SRC)/lib/libdwarf/common \
+ -I$(SRC)/lib/mergeq \
+ -DCTF_OLD_VERSIONS
+CFLAGS += $(CCVERBOSE)
+
+CERRWARN += -_gcc=-Wno-uninitialized
+
+$(LINTLIB) := SRCS = $(SRCDIR)/$(LINTSRC)
diff --git a/usr/src/lib/libctf/Makefile.shared.targ b/usr/src/lib/libctf/Makefile.shared.targ
new file mode 100644
index 0000000000..b6520f2366
--- /dev/null
+++ b/usr/src/lib/libctf/Makefile.shared.targ
@@ -0,0 +1,30 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2015, Joyent, Inc. All rights reserved.
+#
+
+#
+# This Makefile is shared between both the tools and the normal library build.
+#
+
+pics/%.o: $(SRC)/common/ctf/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
+
+pics/%.o: $(SRC)/common/list/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
+
+pics/%.o: $(SRC)/lib/mergeq/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
diff --git a/usr/src/lib/libctf/common/ctf_convert.c b/usr/src/lib/libctf/common/ctf_convert.c
new file mode 100644
index 0000000000..1a433d17db
--- /dev/null
+++ b/usr/src/lib/libctf/common/ctf_convert.c
@@ -0,0 +1,210 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * Main conversion entry points. This has been designed such that there can be
+ * any number of different conversion backends. Currently we only have one that
+ * understands DWARFv2 (and bits of DWARFv4). Each backend should be placed in
+ * the ctf_converters list and each will be tried in turn.
+ */
+
+#include <libctf_impl.h>
+#include <gelf.h>
+
+ctf_convert_f ctf_converters[] = {
+ ctf_dwarf_convert
+};
+
+#define NCONVERTS (sizeof (ctf_converters) / sizeof (ctf_convert_f))
+
+typedef enum ctf_convert_source {
+ CTFCONV_SOURCE_NONE = 0x0,
+ CTFCONV_SOURCE_UNKNOWN = 0x01,
+ CTFCONV_SOURCE_C = 0x02,
+ CTFCONV_SOURCE_S = 0x04
+} ctf_convert_source_t;
+
+static void
+ctf_convert_ftypes(Elf *elf, ctf_convert_source_t *types)
+{
+ int i;
+ Elf_Scn *scn = NULL, *strscn;
+ *types = CTFCONV_SOURCE_NONE;
+ GElf_Shdr shdr;
+ Elf_Data *data, *strdata;
+
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
+
+ if (gelf_getshdr(scn, &shdr) == NULL)
+ return;
+
+ if (shdr.sh_type == SHT_SYMTAB)
+ break;
+ }
+
+ if (scn == NULL)
+ return;
+
+ if ((strscn = elf_getscn(elf, shdr.sh_link)) == NULL)
+ return;
+
+ if ((data = elf_getdata(scn, NULL)) == NULL)
+ return;
+
+ if ((strdata = elf_getdata(strscn, NULL)) == NULL)
+ return;
+
+ for (i = 0; i < shdr.sh_size / shdr.sh_entsize; i++) {
+ GElf_Sym sym;
+ const char *file;
+ size_t len;
+
+ if (gelf_getsym(data, i, &sym) == NULL)
+ return;
+
+ if (GELF_ST_TYPE(sym.st_info) != STT_FILE)
+ continue;
+
+ file = (const char *)((uintptr_t)strdata->d_buf + sym.st_name);
+ len = strlen(file);
+ if (len < 2 || file[len - 2] != '.') {
+ *types |= CTFCONV_SOURCE_UNKNOWN;
+ continue;
+ }
+
+ switch (file[len - 1]) {
+ case 'c':
+ *types |= CTFCONV_SOURCE_C;
+ break;
+ case 'h':
+ /* We traditionally ignore header files... */
+ break;
+ case 's':
+ *types |= CTFCONV_SOURCE_S;
+ break;
+ default:
+ *types |= CTFCONV_SOURCE_UNKNOWN;
+ break;
+ }
+ }
+}
+
+ctf_file_t *
+ctf_elfconvert(int fd, Elf *elf, const char *label, uint_t nthrs, uint_t flags,
+ int *errp, char *errbuf, size_t errlen)
+{
+ int err, i;
+ ctf_file_t *fp = NULL;
+ boolean_t notsup = B_TRUE;
+ ctf_convert_source_t type;
+
+ if (errp == NULL)
+ errp = &err;
+
+ if (elf == NULL) {
+ *errp = EINVAL;
+ return (NULL);
+ }
+
+ if (flags & ~CTF_CONVERT_F_IGNNONC) {
+ *errp = EINVAL;
+ return (NULL);
+ }
+
+ if (elf_kind(elf) != ELF_K_ELF) {
+ *errp = ECTF_FMT;
+ return (NULL);
+ }
+
+ ctf_convert_ftypes(elf, &type);
+ ctf_dprintf("got types: %d\n", type);
+ if (flags & CTF_CONVERT_F_IGNNONC) {
+ if (type == CTFCONV_SOURCE_NONE ||
+ (type & CTFCONV_SOURCE_UNKNOWN)) {
+ *errp = ECTF_CONVNOCSRC;
+ return (NULL);
+ }
+ }
+
+ for (i = 0; i < NCONVERTS; i++) {
+ ctf_conv_status_t cs;
+
+ fp = NULL;
+ cs = ctf_converters[i](fd, elf, nthrs, errp, &fp, errbuf,
+ errlen);
+ if (cs == CTF_CONV_SUCCESS) {
+ notsup = B_FALSE;
+ break;
+ }
+ if (cs == CTF_CONV_ERROR) {
+ fp = NULL;
+ notsup = B_FALSE;
+ break;
+ }
+ }
+
+ if (notsup == B_TRUE) {
+ if ((flags & CTF_CONVERT_F_IGNNONC) != 0 &&
+ (type & CTFCONV_SOURCE_C) == 0) {
+ *errp = ECTF_CONVNOCSRC;
+ return (NULL);
+ }
+ *errp = ECTF_NOCONVBKEND;
+ return (NULL);
+ }
+
+ /*
+ * Succsesful conversion.
+ */
+ if (fp != NULL) {
+ if (label == NULL)
+ label = "";
+ if (ctf_add_label(fp, label, fp->ctf_typemax, 0) == CTF_ERR) {
+ *errp = ctf_errno(fp);
+ ctf_close(fp);
+ return (NULL);
+ }
+ if (ctf_update(fp) == CTF_ERR) {
+ *errp = ctf_errno(fp);
+ ctf_close(fp);
+ return (NULL);
+ }
+ }
+
+ return (fp);
+}
+
+ctf_file_t *
+ctf_fdconvert(int fd, const char *label, uint_t nthrs, uint_t flags, int *errp,
+ char *errbuf, size_t errlen)
+{
+ int err;
+ Elf *elf;
+ ctf_file_t *fp;
+
+ if (errp == NULL)
+ errp = &err;
+
+ elf = elf_begin(fd, ELF_C_READ, NULL);
+ if (elf == NULL) {
+ *errp = ECTF_FMT;
+ return (NULL);
+ }
+
+ fp = ctf_elfconvert(fd, elf, label, nthrs, flags, errp, errbuf, errlen);
+
+ (void) elf_end(elf);
+ return (fp);
+}
diff --git a/usr/src/lib/libctf/common/ctf_diff.c b/usr/src/lib/libctf/common/ctf_diff.c
new file mode 100644
index 0000000000..d070488bbb
--- /dev/null
+++ b/usr/src/lib/libctf/common/ctf_diff.c
@@ -0,0 +1,1362 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2015 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * The following ia a basic overview of how we diff types in containers (the
+ * generally interesting part of diff, and what's used by merge). We maintain
+ * two mapping tables, a table of forward mappings (src->dest), and a reverse
+ * mapping (dest->src). Both are initialized to contain no mapping, and can also
+ * be updated to contain a negative mapping.
+ *
+ * What we do first is iterate over each type in the src container, and compare
+ * it with a type in the destination container. This may involve doing recursive
+ * comparisons -- which can involve cycles. To deal with this, whenever we
+ * encounter something which may be cyclic, we insert a guess. In other words,
+ * we assume that it may be true. This is necessary for the classic case of the
+ * following structure:
+ *
+ * struct foo {
+ * struct foo *foo_next;
+ * };
+ *
+ * If it turns out that we were wrong, we discard our guesses.
+ *
+ * If we find that a given type in src has no corresponding entry in dst, we
+ * then mark its map as CTF_ERR (-1) to indicate that it has *no* match, as
+ * opposed to the default value of 0, which indicates an unknown match.
+ * Once we've done the first iteration through src, we know at that point in
+ * time whether everything in dst is similar or not and can simply walk over it
+ * and don't have to do any additional checks.
+ */
+
+#include <libctf.h>
+#include <ctf_impl.h>
+#include <sys/debug.h>
+
+typedef struct ctf_diff_func {
+ const char *cdf_name;
+ ulong_t cdf_symidx;
+ ulong_t cdf_matchidx;
+} ctf_diff_func_t;
+
+typedef struct ctf_diff_obj {
+ const char *cdo_name;
+ ulong_t cdo_symidx;
+ ctf_id_t cdo_id;
+ ulong_t cdo_matchidx;
+} ctf_diff_obj_t;
+
+typedef struct ctf_diff_guess {
+ struct ctf_diff_guess *cdg_next;
+ ctf_id_t cdg_iid;
+ ctf_id_t cdg_oid;
+} ctf_diff_guess_t;
+
+/* typedef in libctf.h */
+struct ctf_diff {
+ uint_t cds_flags;
+ boolean_t cds_tvalid; /* types valid */
+ ctf_file_t *cds_ifp;
+ ctf_file_t *cds_ofp;
+ ctf_id_t *cds_forward;
+ ctf_id_t *cds_reverse;
+ size_t cds_fsize;
+ size_t cds_rsize;
+ ctf_diff_type_f cds_func;
+ ctf_diff_guess_t *cds_guess;
+ void *cds_arg;
+ uint_t cds_nifuncs;
+ uint_t cds_nofuncs;
+ uint_t cds_nextifunc;
+ uint_t cds_nextofunc;
+ ctf_diff_func_t *cds_ifuncs;
+ ctf_diff_func_t *cds_ofuncs;
+ boolean_t cds_ffillip;
+ boolean_t cds_fvalid;
+ uint_t cds_niobj;
+ uint_t cds_noobj;
+ uint_t cds_nextiobj;
+ uint_t cds_nextoobj;
+ ctf_diff_obj_t *cds_iobj;
+ ctf_diff_obj_t *cds_oobj;
+ boolean_t cds_ofillip;
+ boolean_t cds_ovalid;
+};
+
+#define TINDEX(tid) (tid - 1)
+
+/*
+ * Team Diff
+ */
+static int ctf_diff_type(ctf_diff_t *, ctf_file_t *, ctf_id_t, ctf_file_t *,
+ ctf_id_t);
+
+static int
+ctf_diff_name(ctf_file_t *ifp, ctf_id_t iid, ctf_file_t *ofp, ctf_id_t oid)
+{
+ const char *iname, *oname;
+ const ctf_type_t *itp, *otp;
+
+ if ((itp = ctf_lookup_by_id(&ifp, iid)) == NULL)
+ return (CTF_ERR);
+
+ if ((otp = ctf_lookup_by_id(&ofp, oid)) == NULL)
+ return (ctf_set_errno(ifp, iid));
+
+ iname = ctf_strptr(ifp, itp->ctt_name);
+ oname = ctf_strptr(ofp, otp->ctt_name);
+
+ if ((iname == NULL || oname == NULL) && (iname != oname))
+ return (B_TRUE);
+
+ /* Two anonymous names are the same */
+ if (iname == NULL && oname == NULL)
+ return (B_FALSE);
+
+ return (strcmp(iname, oname) == 0 ? B_FALSE: B_TRUE);
+}
+
+/*
+ * For floats and ints
+ */
+static int
+ctf_diff_number(ctf_file_t *ifp, ctf_id_t iid, ctf_file_t *ofp, ctf_id_t oid)
+{
+ ctf_encoding_t ien, den;
+
+ if (ctf_type_encoding(ifp, iid, &ien) != 0)
+ return (CTF_ERR);
+
+ if (ctf_type_encoding(ofp, oid, &den) != 0)
+ return (ctf_set_errno(ifp, iid));
+
+ if (bcmp(&ien, &den, sizeof (ctf_encoding_t)) != 0)
+ return (B_TRUE);
+
+ return (B_FALSE);
+}
+
+/*
+ * Two typedefs are equivalent, if after we resolve a chain of typedefs, they
+ * point to equivalent types. This means that if a size_t is defined as follows:
+ *
+ * size_t -> ulong_t -> unsigned long
+ * size_t -> unsigned long
+ *
+ * That we'll ultimately end up treating them the same.
+ */
+static int
+ctf_diff_typedef(ctf_diff_t *cds, ctf_file_t *ifp, ctf_id_t iid,
+ ctf_file_t *ofp, ctf_id_t oid)
+{
+ ctf_id_t iref = CTF_ERR, oref = CTF_ERR;
+
+ while (ctf_type_kind(ifp, iid) == CTF_K_TYPEDEF) {
+ iref = ctf_type_reference(ifp, iid);
+ if (iref == CTF_ERR)
+ return (CTF_ERR);
+ iid = iref;
+ }
+
+ while (ctf_type_kind(ofp, oid) == CTF_K_TYPEDEF) {
+ oref = ctf_type_reference(ofp, oid);
+ if (oref == CTF_ERR)
+ return (CTF_ERR);
+ oid = oref;
+ }
+
+ VERIFY(iref != CTF_ERR && oref != CTF_ERR);
+ return (ctf_diff_type(cds, ifp, iref, ofp, oref));
+}
+
+/*
+ * Two qualifiers are equivalent iff they point to two equivalent types.
+ */
+static int
+ctf_diff_qualifier(ctf_diff_t *cds, ctf_file_t *ifp, ctf_id_t iid,
+ ctf_file_t *ofp, ctf_id_t oid)
+{
+ ctf_id_t iref, oref;
+
+ iref = ctf_type_reference(ifp, iid);
+ if (iref == CTF_ERR)
+ return (CTF_ERR);
+
+ oref = ctf_type_reference(ofp, oid);
+ if (oref == CTF_ERR)
+ return (ctf_set_errno(ifp, ctf_errno(ofp)));
+
+ return (ctf_diff_type(cds, ifp, iref, ofp, oref));
+}
+
+/*
+ * Two arrays are the same iff they have the same type for contents, the same
+ * type for the index, and the same number of elements.
+ */
+static int
+ctf_diff_array(ctf_diff_t *cds, ctf_file_t *ifp, ctf_id_t iid, ctf_file_t *ofp,
+ ctf_id_t oid)
+{
+ int ret;
+ ctf_arinfo_t iar, oar;
+
+ if (ctf_array_info(ifp, iid, &iar) == CTF_ERR)
+ return (CTF_ERR);
+
+ if (ctf_array_info(ofp, oid, &oar) == CTF_ERR)
+ return (ctf_set_errno(ifp, ctf_errno(ofp)));
+
+ ret = ctf_diff_type(cds, ifp, iar.ctr_contents, ofp, oar.ctr_contents);
+ if (ret != B_FALSE)
+ return (ret);
+
+ if (iar.ctr_nelems != oar.ctr_nelems)
+ return (B_TRUE);
+
+ /*
+ * If we're ignoring integer types names, then we're trying to do a bit
+ * of a logical diff and we don't really care about the fact that the
+ * index element might not be the same here, what we care about are the
+ * number of elements and that they're the same type.
+ */
+ if ((cds->cds_flags & CTF_DIFF_F_IGNORE_INTNAMES) == 0) {
+ ret = ctf_diff_type(cds, ifp, iar.ctr_index, ofp,
+ oar.ctr_index);
+ if (ret != B_FALSE)
+ return (ret);
+ }
+
+ return (B_FALSE);
+}
+
+/*
+ * Two function pointers are the same if the following is all true:
+ *
+ * o They have the same return type
+ * o They have the same number of arguments
+ * o The arguments are of the same type
+ * o They have the same flags
+ */
+static int
+ctf_diff_fptr(ctf_diff_t *cds, ctf_file_t *ifp, ctf_id_t iid, ctf_file_t *ofp,
+ ctf_id_t oid)
+{
+ int ret, i;
+ ctf_funcinfo_t ifunc, ofunc;
+ ctf_id_t *iids, *oids;
+
+ if (ctf_func_info_by_id(ifp, iid, &ifunc) == CTF_ERR)
+ return (CTF_ERR);
+
+ if (ctf_func_info_by_id(ofp, oid, &ofunc) == CTF_ERR)
+ return (ctf_set_errno(ifp, ctf_errno(ofp)));
+
+ if (ifunc.ctc_argc != ofunc.ctc_argc)
+ return (B_TRUE);
+
+ if (ifunc.ctc_flags != ofunc.ctc_flags)
+ return (B_TRUE);
+
+ ret = ctf_diff_type(cds, ifp, ifunc.ctc_return, ofp, ofunc.ctc_return);
+ if (ret != B_FALSE)
+ return (ret);
+
+ iids = ctf_alloc(sizeof (ctf_id_t) * ifunc.ctc_argc);
+ if (iids == NULL)
+ return (ctf_set_errno(ifp, ENOMEM));
+
+ oids = ctf_alloc(sizeof (ctf_id_t) * ifunc.ctc_argc);
+ if (oids == NULL) {
+ ctf_free(iids, sizeof (ctf_id_t) * ifunc.ctc_argc);
+ return (ctf_set_errno(ifp, ENOMEM));
+ }
+
+ if (ctf_func_args_by_id(ifp, iid, ifunc.ctc_argc, iids) == CTF_ERR) {
+ ret = CTF_ERR;
+ goto out;
+ }
+
+ if (ctf_func_args_by_id(ofp, oid, ofunc.ctc_argc, oids) == CTF_ERR) {
+ ret = ctf_set_errno(ifp, ctf_errno(ofp));
+ goto out;
+ }
+
+ ret = B_TRUE;
+ for (i = 0; i < ifunc.ctc_argc; i++) {
+ ret = ctf_diff_type(cds, ifp, iids[i], ofp, oids[i]);
+ if (ret != B_FALSE)
+ goto out;
+ }
+ ret = B_FALSE;
+
+out:
+ ctf_free(iids, sizeof (ctf_id_t) * ifunc.ctc_argc);
+ ctf_free(oids, sizeof (ctf_id_t) * ofunc.ctc_argc);
+ return (ret);
+}
+
+/*
+ * Two structures are the same if every member is identical to its corresponding
+ * type, at the same offset, and has the same name, as well as them having the
+ * same overall size.
+ */
+static int
+ctf_diff_struct(ctf_diff_t *cds, ctf_file_t *ifp, ctf_id_t iid, ctf_file_t *ofp,
+ ctf_id_t oid)
+{
+ ctf_file_t *oifp;
+ const ctf_type_t *itp, *otp;
+ ssize_t isize, iincr, osize, oincr;
+ const ctf_member_t *imp, *omp;
+ const ctf_lmember_t *ilmp, *olmp;
+ int n;
+ ctf_diff_guess_t *cdg;
+
+ oifp = ifp;
+
+ if ((itp = ctf_lookup_by_id(&ifp, iid)) == NULL)
+ return (CTF_ERR);
+
+ if ((otp = ctf_lookup_by_id(&ofp, oid)) == NULL)
+ return (ctf_set_errno(oifp, ctf_errno(ofp)));
+
+ if (ctf_type_size(ifp, iid) != ctf_type_size(ofp, oid))
+ return (B_TRUE);
+
+ if (LCTF_INFO_VLEN(ifp, itp->ctt_info) !=
+ LCTF_INFO_VLEN(ofp, otp->ctt_info))
+ return (B_TRUE);
+
+ (void) ctf_get_ctt_size(ifp, itp, &isize, &iincr);
+ (void) ctf_get_ctt_size(ofp, otp, &osize, &oincr);
+
+ if (ifp->ctf_version == CTF_VERSION_1 || isize < CTF_LSTRUCT_THRESH) {
+ imp = (const ctf_member_t *)((uintptr_t)itp + iincr);
+ ilmp = NULL;
+ } else {
+ imp = NULL;
+ ilmp = (const ctf_lmember_t *)((uintptr_t)itp + iincr);
+ }
+
+ if (ofp->ctf_version == CTF_VERSION_1 || osize < CTF_LSTRUCT_THRESH) {
+ omp = (const ctf_member_t *)((uintptr_t)otp + oincr);
+ olmp = NULL;
+ } else {
+ omp = NULL;
+ olmp = (const ctf_lmember_t *)((uintptr_t)otp + oincr);
+ }
+
+ /*
+ * Insert our assumption that they're equal for the moment.
+ */
+ cdg = ctf_alloc(sizeof (ctf_diff_guess_t));
+ if (cdg == NULL)
+ return (ctf_set_errno(ifp, ENOMEM));
+ cdg->cdg_iid = iid;
+ cdg->cdg_oid = oid;
+ cdg->cdg_next = cds->cds_guess;
+ cds->cds_guess = cdg;
+ cds->cds_forward[TINDEX(iid)] = oid;
+ cds->cds_reverse[TINDEX(oid)] = iid;
+
+ for (n = LCTF_INFO_VLEN(ifp, itp->ctt_info); n != 0; n--) {
+ const char *iname, *oname;
+ ulong_t ioff, ooff;
+ ctf_id_t itype, otype;
+ int ret;
+
+ if (imp != NULL) {
+ iname = ctf_strptr(ifp, imp->ctm_name);
+ ioff = imp->ctm_offset;
+ itype = imp->ctm_type;
+ } else {
+ iname = ctf_strptr(ifp, ilmp->ctlm_name);
+ ioff = CTF_LMEM_OFFSET(ilmp);
+ itype = ilmp->ctlm_type;
+ }
+
+ if (omp != NULL) {
+ oname = ctf_strptr(ofp, omp->ctm_name);
+ ooff = omp->ctm_offset;
+ otype = omp->ctm_type;
+ } else {
+ oname = ctf_strptr(ofp, olmp->ctlm_name);
+ ooff = CTF_LMEM_OFFSET(olmp);
+ otype = olmp->ctlm_type;
+ }
+
+ if (ioff != ooff) {
+ return (B_TRUE);
+ }
+ if (strcmp(iname, oname) != 0) {
+ return (B_TRUE);
+ }
+ ret = ctf_diff_type(cds, ifp, itype, ofp, otype);
+ if (ret != B_FALSE) {
+ return (ret);
+ }
+
+ /* Advance our pointers */
+ if (imp != NULL)
+ imp++;
+ if (ilmp != NULL)
+ ilmp++;
+ if (omp != NULL)
+ omp++;
+ if (olmp != NULL)
+ olmp++;
+ }
+
+ return (B_FALSE);
+}
+
+/*
+ * Two unions are the same if they have the same set of members. This is similar
+ * to, but slightly different from a struct. The offsets of members don't
+ * matter. However, their is no guarantee of ordering so we have to fall back to
+ * doing an O(N^2) scan.
+ */
+typedef struct ctf_diff_union_member {
+ ctf_diff_t *cdum_cds;
+ ctf_file_t *cdum_fp;
+ ctf_file_t *cdum_iterfp;
+ const char *cdum_name;
+ ctf_id_t cdum_type;
+ int cdum_ret;
+} ctf_diff_union_member_t;
+
+typedef struct ctf_diff_union_fp {
+ ctf_diff_t *cduf_cds;
+ ctf_file_t *cduf_curfp;
+ ctf_file_t *cduf_altfp;
+ ctf_id_t cduf_type;
+ int cduf_ret;
+} ctf_diff_union_fp_t;
+
+/* ARGSUSED */
+static int
+ctf_diff_union_check_member(const char *name, ctf_id_t id, ulong_t off,
+ void *arg)
+{
+ int ret;
+ ctf_diff_union_member_t *cdump = arg;
+
+ if (strcmp(name, cdump->cdum_name) != 0)
+ return (0);
+
+ ret = ctf_diff_type(cdump->cdum_cds, cdump->cdum_fp, cdump->cdum_type,
+ cdump->cdum_iterfp, id);
+ if (ret == CTF_ERR) {
+ cdump->cdum_ret = CTF_ERR;
+ return (1);
+ }
+
+ if (ret == B_FALSE) {
+ cdump->cdum_ret = B_FALSE;
+ /* Return non-zero to stop iteration as we have a match */
+ return (1);
+ }
+
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+ctf_diff_union_check_fp(const char *name, ctf_id_t id, ulong_t off, void *arg)
+{
+ int ret;
+ ctf_diff_union_member_t cdum;
+ ctf_diff_union_fp_t *cdufp = arg;
+
+ cdum.cdum_cds = cdufp->cduf_cds;
+ cdum.cdum_fp = cdufp->cduf_curfp;
+ cdum.cdum_iterfp = cdufp->cduf_altfp;
+ cdum.cdum_name = name;
+ cdum.cdum_type = id;
+ cdum.cdum_ret = B_TRUE;
+
+ ret = ctf_member_iter(cdum.cdum_iterfp, cdufp->cduf_type,
+ ctf_diff_union_check_member, &cdum);
+ if (ret == 0 || cdum.cdum_ret == CTF_ERR) {
+ /* No match found or error, terminate now */
+ cdufp->cduf_ret = cdum.cdum_ret;
+ return (1);
+ } else if (ret == CTF_ERR) {
+ (void) ctf_set_errno(cdum.cdum_fp, ctf_errno(cdum.cdum_iterfp));
+ cdufp->cduf_ret = CTF_ERR;
+ return (1);
+ } else {
+ ASSERT(cdum.cdum_ret == B_FALSE);
+ cdufp->cduf_ret = cdum.cdum_ret;
+ return (0);
+ }
+}
+
+static int
+ctf_diff_union(ctf_diff_t *cds, ctf_file_t *ifp, ctf_id_t iid, ctf_file_t *ofp,
+ ctf_id_t oid)
+{
+ ctf_file_t *oifp;
+ const ctf_type_t *itp, *otp;
+ ctf_diff_union_fp_t cduf;
+ ctf_diff_guess_t *cdg;
+ int ret;
+
+ oifp = ifp;
+ if ((itp = ctf_lookup_by_id(&ifp, iid)) == NULL)
+ return (CTF_ERR);
+ if ((otp = ctf_lookup_by_id(&ofp, oid)) == NULL)
+ return (ctf_set_errno(oifp, ctf_errno(ofp)));
+
+ if (LCTF_INFO_VLEN(ifp, itp->ctt_info) !=
+ LCTF_INFO_VLEN(ofp, otp->ctt_info))
+ return (B_TRUE);
+
+ cdg = ctf_alloc(sizeof (ctf_diff_guess_t));
+ if (cdg == NULL)
+ return (ctf_set_errno(ifp, ENOMEM));
+ cdg->cdg_iid = iid;
+ cdg->cdg_oid = oid;
+ cdg->cdg_next = cds->cds_guess;
+ cds->cds_guess = cdg;
+ cds->cds_forward[TINDEX(iid)] = oid;
+ cds->cds_reverse[TINDEX(oid)] = iid;
+
+ cduf.cduf_cds = cds;
+ cduf.cduf_curfp = ifp;
+ cduf.cduf_altfp = ofp;
+ cduf.cduf_type = oid;
+ cduf.cduf_ret = B_TRUE;
+ ret = ctf_member_iter(ifp, iid, ctf_diff_union_check_fp, &cduf);
+ if (ret != CTF_ERR)
+ ret = cduf.cduf_ret;
+
+ return (ret);
+}
+
+/*
+ * Two enums are equivalent if they share the same underlying type and they have
+ * the same set of members.
+ */
+static int
+ctf_diff_enum(ctf_file_t *ifp, ctf_id_t iid, ctf_file_t *ofp, ctf_id_t oid)
+{
+ ctf_file_t *oifp;
+ const ctf_type_t *itp, *otp;
+ ssize_t iincr, oincr;
+ const ctf_enum_t *iep, *oep;
+ int n;
+
+ oifp = ifp;
+ if ((itp = ctf_lookup_by_id(&ifp, iid)) == NULL)
+ return (CTF_ERR);
+ if ((otp = ctf_lookup_by_id(&ofp, oid)) == NULL)
+ return (ctf_set_errno(oifp, ctf_errno(ofp)));
+
+ if (LCTF_INFO_VLEN(ifp, itp->ctt_info) !=
+ LCTF_INFO_VLEN(ofp, otp->ctt_info))
+ return (B_TRUE);
+
+ (void) ctf_get_ctt_size(ifp, itp, NULL, &iincr);
+ (void) ctf_get_ctt_size(ofp, otp, NULL, &oincr);
+ iep = (const ctf_enum_t *)((uintptr_t)itp + iincr);
+ oep = (const ctf_enum_t *)((uintptr_t)otp + oincr);
+
+ for (n = LCTF_INFO_VLEN(ifp, itp->ctt_info); n != 0;
+ n--, iep++, oep++) {
+ if (strcmp(ctf_strptr(ifp, iep->cte_name),
+ ctf_strptr(ofp, oep->cte_name)) != 0)
+ return (B_TRUE);
+
+ if (iep->cte_value != oep->cte_value)
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+/*
+ * Two forwards are equivalent in one of two cases. If both are forwards, than
+ * they are the same. Otherwise, they're equivalent if one is a struct or union
+ * and the other is a forward.
+ */
+static int
+ctf_diff_forward(ctf_file_t *ifp, ctf_id_t iid, ctf_file_t *ofp, ctf_id_t oid)
+{
+ int ikind, okind;
+
+ ikind = ctf_type_kind(ifp, iid);
+ okind = ctf_type_kind(ofp, oid);
+
+ if (ikind == okind) {
+ ASSERT(ikind == CTF_K_FORWARD);
+ return (B_FALSE);
+ } else if (ikind == CTF_K_FORWARD) {
+ return (okind != CTF_K_UNION && okind != CTF_K_STRUCT);
+ } else {
+ return (ikind != CTF_K_UNION && ikind != CTF_K_STRUCT);
+ }
+}
+
+/*
+ * Are two types equivalent?
+ */
+int
+ctf_diff_type(ctf_diff_t *cds, ctf_file_t *ifp, ctf_id_t iid, ctf_file_t *ofp,
+ ctf_id_t oid)
+{
+ int ret, ikind, okind;
+
+ /* Do a quick short circuit */
+ if (ifp == ofp && iid == oid)
+ return (B_FALSE);
+
+ /*
+ * Check if it's something we've already encountered in a forward
+ * reference or forward negative table. Also double check the reverse
+ * table.
+ */
+ if (cds->cds_forward[TINDEX(iid)] == oid)
+ return (B_FALSE);
+ if (cds->cds_forward[TINDEX(iid)] != 0)
+ return (B_TRUE);
+ if (cds->cds_reverse[TINDEX(oid)] == iid)
+ return (B_FALSE);
+ if ((cds->cds_flags & CTF_DIFF_F_IGNORE_INTNAMES) == 0 &&
+ cds->cds_reverse[TINDEX(oid)] != 0)
+ return (B_TRUE);
+
+ ikind = ctf_type_kind(ifp, iid);
+ okind = ctf_type_kind(ofp, oid);
+
+ if (ikind != okind &&
+ ikind != CTF_K_FORWARD && okind != CTF_K_FORWARD)
+ return (B_TRUE);
+
+ /* Check names */
+ if ((ret = ctf_diff_name(ifp, iid, ofp, oid)) != B_FALSE) {
+ if (ikind != okind || ikind != CTF_K_INTEGER ||
+ (cds->cds_flags & CTF_DIFF_F_IGNORE_INTNAMES) == 0)
+ return (ret);
+ }
+
+ if (ikind == CTF_K_FORWARD || okind == CTF_K_FORWARD)
+ return (ctf_diff_forward(ifp, iid, ofp, oid));
+
+ switch (ikind) {
+ case CTF_K_INTEGER:
+ case CTF_K_FLOAT:
+ ret = ctf_diff_number(ifp, iid, ofp, oid);
+ break;
+ case CTF_K_ARRAY:
+ ret = ctf_diff_array(cds, ifp, iid, ofp, oid);
+ break;
+ case CTF_K_FUNCTION:
+ ret = ctf_diff_fptr(cds, ifp, iid, ofp, oid);
+ break;
+ case CTF_K_STRUCT:
+ ret = ctf_diff_struct(cds, ifp, iid, ofp, oid);
+ break;
+ case CTF_K_UNION:
+ ret = ctf_diff_union(cds, ifp, iid, ofp, oid);
+ break;
+ case CTF_K_ENUM:
+ ret = ctf_diff_enum(ifp, iid, ofp, oid);
+ break;
+ case CTF_K_FORWARD:
+ ret = ctf_diff_forward(ifp, iid, ofp, oid);
+ break;
+ case CTF_K_TYPEDEF:
+ ret = ctf_diff_typedef(cds, ifp, iid, ofp, oid);
+ break;
+ case CTF_K_POINTER:
+ case CTF_K_VOLATILE:
+ case CTF_K_CONST:
+ case CTF_K_RESTRICT:
+ ret = ctf_diff_qualifier(cds, ifp, iid, ofp, oid);
+ break;
+ case CTF_K_UNKNOWN:
+ /*
+ * The current CTF tools use CTF_K_UNKNOWN as a padding type. We
+ * always declare two instances of CTF_K_UNKNOWN as different,
+ * even though this leads to additional diff noise.
+ */
+ ret = B_TRUE;
+ break;
+ default:
+ abort();
+ }
+
+ return (ret);
+}
+
+/*
+ * Walk every type in the first container and try to find a match in the second.
+ * If there is a match, then update both the forward and reverse mapping tables.
+ *
+ * The self variable tells us whether or not we should be comparing the input
+ * ctf container with itself or not.
+ */
+static int
+ctf_diff_pass1(ctf_diff_t *cds, boolean_t self)
+{
+ int i, j, diff;
+ int istart, iend, jstart, jend;
+
+ if (cds->cds_ifp->ctf_flags & LCTF_CHILD) {
+ istart = 0x8001;
+ iend = cds->cds_ifp->ctf_typemax + 0x8000;
+ } else {
+ istart = 1;
+ iend = cds->cds_ifp->ctf_typemax;
+ }
+
+ if (cds->cds_ofp->ctf_flags & LCTF_CHILD) {
+ jstart = 0x8001;
+ jend = cds->cds_ofp->ctf_typemax + 0x8000;
+ } else {
+ jstart = 1;
+ jend = cds->cds_ofp->ctf_typemax;
+ }
+
+ for (i = istart; i <= iend; i++) {
+ diff = B_TRUE;
+
+ /*
+ * If we're doing a self diff for dedup purposes, then we want
+ * to ensure that we compare a type i with every type in the
+ * range, [ 1, i ). Yes, this does mean that when i equals 1,
+ * we won't compare anything.
+ */
+ if (self == B_TRUE) {
+ jstart = istart;
+ jend = i - 1;
+ }
+ for (j = jstart; j <= jend; j++) {
+ ctf_diff_guess_t *cdg, *tofree;
+
+ ASSERT(cds->cds_guess == NULL);
+ diff = ctf_diff_type(cds, cds->cds_ifp, i,
+ cds->cds_ofp, j);
+ if (diff == CTF_ERR)
+ return (CTF_ERR);
+
+ /* Clean up our guesses */
+ cdg = cds->cds_guess;
+ cds->cds_guess = NULL;
+ while (cdg != NULL) {
+ if (diff == B_TRUE) {
+ cds->cds_forward[TINDEX(cdg->cdg_iid)] =
+ 0;
+ cds->cds_reverse[TINDEX(cdg->cdg_oid)] =
+ 0;
+ }
+ tofree = cdg;
+ cdg = cdg->cdg_next;
+ ctf_free(tofree, sizeof (ctf_diff_guess_t));
+ }
+
+ /* Found a hit, update the tables */
+ if (diff == B_FALSE) {
+ cds->cds_forward[TINDEX(i)] = j;
+ if (cds->cds_reverse[TINDEX(j)] == 0)
+ cds->cds_reverse[TINDEX(j)] = i;
+ break;
+ }
+ }
+
+ /* Call the callback at this point */
+ if (diff == B_TRUE) {
+ cds->cds_forward[TINDEX(i)] = CTF_ERR;
+ cds->cds_func(cds->cds_ifp, i, B_FALSE, NULL, CTF_ERR,
+ cds->cds_arg);
+ } else {
+ cds->cds_func(cds->cds_ifp, i, B_TRUE, cds->cds_ofp, j,
+ cds->cds_arg);
+ }
+ }
+
+ return (0);
+}
+
+/*
+ * Now we need to walk the second container and emit anything that we didn't
+ * find as common in the first pass.
+ */
+static int
+ctf_diff_pass2(ctf_diff_t *cds)
+{
+ int i, start, end;
+
+ start = 0x1;
+ end = cds->cds_ofp->ctf_typemax;
+ if (cds->cds_ofp->ctf_flags & LCTF_CHILD) {
+ start += 0x8000;
+ end += 0x8000;
+ }
+
+ for (i = start; i <= end; i++) {
+ if (cds->cds_reverse[TINDEX(i)] != 0)
+ continue;
+ cds->cds_func(cds->cds_ofp, i, B_FALSE, NULL, CTF_ERR,
+ cds->cds_arg);
+ }
+
+ return (0);
+}
+
+int
+ctf_diff_init(ctf_file_t *ifp, ctf_file_t *ofp, ctf_diff_t **cdsp)
+{
+ ctf_diff_t *cds;
+ size_t fsize, rsize;
+
+ cds = ctf_alloc(sizeof (ctf_diff_t));
+ if (cds == NULL)
+ return (ctf_set_errno(ifp, ENOMEM));
+
+ bzero(cds, sizeof (ctf_diff_t));
+ cds->cds_ifp = ifp;
+ cds->cds_ofp = ofp;
+
+ fsize = sizeof (ctf_id_t) * ifp->ctf_typemax;
+ rsize = sizeof (ctf_id_t) * ofp->ctf_typemax;
+ if (ifp->ctf_flags & LCTF_CHILD)
+ fsize += 0x8000 * sizeof (ctf_id_t);
+ if (ofp->ctf_flags & LCTF_CHILD)
+ rsize += 0x8000 * sizeof (ctf_id_t);
+
+ cds->cds_forward = ctf_alloc(fsize);
+ if (cds->cds_forward == NULL) {
+ ctf_free(cds, sizeof (ctf_diff_t));
+ return (ctf_set_errno(ifp, ENOMEM));
+ }
+ cds->cds_fsize = fsize;
+ cds->cds_reverse = ctf_alloc(rsize);
+ if (cds->cds_reverse == NULL) {
+ ctf_free(cds->cds_forward, fsize);
+ ctf_free(cds, sizeof (ctf_diff_t));
+ return (ctf_set_errno(ifp, ENOMEM));
+ }
+ cds->cds_rsize = rsize;
+ bzero(cds->cds_forward, fsize);
+ bzero(cds->cds_reverse, rsize);
+
+ cds->cds_ifp->ctf_refcnt++;
+ cds->cds_ofp->ctf_refcnt++;
+ *cdsp = cds;
+ return (0);
+}
+
+int
+ctf_diff_types(ctf_diff_t *cds, ctf_diff_type_f cb, void *arg)
+{
+ int ret;
+
+ cds->cds_func = cb;
+ cds->cds_arg = arg;
+
+ ret = ctf_diff_pass1(cds, B_FALSE);
+ if (ret == 0)
+ ret = ctf_diff_pass2(cds);
+
+ cds->cds_func = NULL;
+ cds->cds_arg = NULL;
+ cds->cds_tvalid = B_TRUE;
+ return (ret);
+}
+
+/*
+ * Do a diff where we're comparing a container with itself. In other words we'd
+ * like to know what types are actually duplicates of existing types in the
+ * container.
+ *
+ * Note this should remain private to libctf and not be exported in the public
+ * mapfile for the time being.
+ */
+int
+ctf_diff_self(ctf_diff_t *cds, ctf_diff_type_f cb, void *arg)
+{
+ if (cds->cds_ifp != cds->cds_ofp)
+ return (EINVAL);
+
+ cds->cds_func = cb;
+ cds->cds_arg = arg;
+
+ return (ctf_diff_pass1(cds, B_TRUE));
+}
+
+
+void
+ctf_diff_fini(ctf_diff_t *cds)
+{
+ ctf_diff_guess_t *cdg;
+ size_t fsize, rsize;
+
+ if (cds == NULL)
+ return;
+
+ cds->cds_ifp->ctf_refcnt--;
+ cds->cds_ofp->ctf_refcnt--;
+
+ fsize = sizeof (ctf_id_t) * cds->cds_ifp->ctf_typemax;
+ rsize = sizeof (ctf_id_t) * cds->cds_ofp->ctf_typemax;
+ if (cds->cds_ifp->ctf_flags & LCTF_CHILD)
+ fsize += 0x8000 * sizeof (ctf_id_t);
+ if (cds->cds_ofp->ctf_flags & LCTF_CHILD)
+ rsize += 0x8000 * sizeof (ctf_id_t);
+
+ if (cds->cds_ifuncs != NULL)
+ ctf_free(cds->cds_ifuncs,
+ sizeof (ctf_diff_func_t) * cds->cds_nifuncs);
+ if (cds->cds_ofuncs != NULL)
+ ctf_free(cds->cds_ofuncs,
+ sizeof (ctf_diff_func_t) * cds->cds_nofuncs);
+ if (cds->cds_iobj != NULL)
+ ctf_free(cds->cds_iobj,
+ sizeof (ctf_diff_obj_t) * cds->cds_niobj);
+ if (cds->cds_oobj != NULL)
+ ctf_free(cds->cds_oobj,
+ sizeof (ctf_diff_obj_t) * cds->cds_noobj);
+ cdg = cds->cds_guess;
+ while (cdg != NULL) {
+ ctf_diff_guess_t *tofree = cdg;
+ cdg = cdg->cdg_next;
+ ctf_free(tofree, sizeof (ctf_diff_guess_t));
+ }
+ if (cds->cds_forward != NULL)
+ ctf_free(cds->cds_forward, cds->cds_fsize);
+ if (cds->cds_reverse != NULL)
+ ctf_free(cds->cds_reverse, cds->cds_rsize);
+ ctf_free(cds, sizeof (ctf_diff_t));
+}
+
+uint_t
+ctf_diff_getflags(ctf_diff_t *cds)
+{
+ return (cds->cds_flags);
+}
+
+int
+ctf_diff_setflags(ctf_diff_t *cds, uint_t flags)
+{
+ if ((flags & ~CTF_DIFF_F_IGNORE_INTNAMES) != 0)
+ return (ctf_set_errno(cds->cds_ifp, EINVAL));
+
+ cds->cds_flags = flags;
+ return (0);
+}
+
+static boolean_t
+ctf_diff_symid(ctf_diff_t *cds, ctf_id_t iid, ctf_id_t oid)
+{
+ ctf_file_t *ifp, *ofp;
+
+ ifp = cds->cds_ifp;
+ ofp = cds->cds_ofp;
+
+ /*
+ * If we have parent containers on the scene here, we need to go through
+ * and do a full diff check because while a diff for types will not
+ * actually go through and check types in the parent container.
+ */
+ if (iid == 0 || oid == 0)
+ return (iid == oid ? B_FALSE: B_TRUE);
+
+ if (!(ifp->ctf_flags & LCTF_CHILD) && !(ofp->ctf_flags & LCTF_CHILD)) {
+ if (cds->cds_forward[TINDEX(iid)] != oid)
+ return (B_TRUE);
+ return (B_FALSE);
+ }
+
+ return (ctf_diff_type(cds, ifp, iid, ofp, oid));
+}
+
+/* ARGSUSED */
+static void
+ctf_diff_void_cb(ctf_file_t *ifp, ctf_id_t iid, boolean_t same, ctf_file_t *ofp,
+ ctf_id_t oid, void *arg)
+{
+}
+
+/* ARGSUSED */
+static int
+ctf_diff_func_count(const char *name, ulong_t symidx, ctf_funcinfo_t *fip,
+ void *arg)
+{
+ uint32_t *ip = arg;
+
+ *ip = *ip + 1;
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+ctf_diff_func_fill_cb(const char *name, ulong_t symidx, ctf_funcinfo_t *fip,
+ void *arg)
+{
+ uint_t *next, max;
+ ctf_diff_func_t *funcptr;
+ ctf_diff_t *cds = arg;
+
+ if (cds->cds_ffillip == B_TRUE) {
+ max = cds->cds_nifuncs;
+ next = &cds->cds_nextifunc;
+ funcptr = cds->cds_ifuncs + *next;
+ } else {
+ max = cds->cds_nofuncs;
+ next = &cds->cds_nextofunc;
+ funcptr = cds->cds_ofuncs + *next;
+
+ }
+
+ VERIFY(*next < max);
+ funcptr->cdf_name = name;
+ funcptr->cdf_symidx = symidx;
+ funcptr->cdf_matchidx = ULONG_MAX;
+ *next = *next + 1;
+
+ return (0);
+}
+
+int
+ctf_diff_func_fill(ctf_diff_t *cds)
+{
+ int ret;
+ uint32_t ifcount, ofcount, idcnt, cti;
+ ulong_t i, j;
+ ctf_id_t *iids, *oids;
+
+ ifcount = 0;
+ ofcount = 0;
+ idcnt = 0;
+ iids = NULL;
+ oids = NULL;
+
+ ret = ctf_function_iter(cds->cds_ifp, ctf_diff_func_count, &ifcount);
+ if (ret != 0)
+ return (ret);
+ ret = ctf_function_iter(cds->cds_ofp, ctf_diff_func_count, &ofcount);
+ if (ret != 0)
+ return (ret);
+
+ cds->cds_ifuncs = ctf_alloc(sizeof (ctf_diff_func_t) * ifcount);
+ if (cds->cds_ifuncs == NULL)
+ return (ctf_set_errno(cds->cds_ifp, ENOMEM));
+
+ cds->cds_nifuncs = ifcount;
+ cds->cds_nextifunc = 0;
+
+ cds->cds_ofuncs = ctf_alloc(sizeof (ctf_diff_func_t) * ofcount);
+ if (cds->cds_ofuncs == NULL)
+ return (ctf_set_errno(cds->cds_ifp, ENOMEM));
+
+ cds->cds_nofuncs = ofcount;
+ cds->cds_nextofunc = 0;
+
+ cds->cds_ffillip = B_TRUE;
+ if ((ret = ctf_function_iter(cds->cds_ifp, ctf_diff_func_fill_cb,
+ cds)) != 0)
+ return (ret);
+
+ cds->cds_ffillip = B_FALSE;
+ if ((ret = ctf_function_iter(cds->cds_ofp, ctf_diff_func_fill_cb,
+ cds)) != 0)
+ return (ret);
+
+ /*
+ * Everything is initialized to not match. This could probably be faster
+ * with something that used a hash. But this part of the diff isn't used
+ * by merge.
+ */
+ for (i = 0; i < cds->cds_nifuncs; i++) {
+ for (j = 0; j < cds->cds_nofuncs; j++) {
+ ctf_diff_func_t *ifd, *ofd;
+ ctf_funcinfo_t ifip, ofip;
+ boolean_t match;
+
+ ifd = &cds->cds_ifuncs[i];
+ ofd = &cds->cds_ofuncs[j];
+ if (strcmp(ifd->cdf_name, ofd->cdf_name) != 0)
+ continue;
+
+ ret = ctf_func_info(cds->cds_ifp, ifd->cdf_symidx,
+ &ifip);
+ if (ret != 0)
+ goto out;
+ ret = ctf_func_info(cds->cds_ofp, ofd->cdf_symidx,
+ &ofip);
+ if (ret != 0) {
+ ret = ctf_set_errno(cds->cds_ifp,
+ ctf_errno(cds->cds_ofp));
+ goto out;
+ }
+
+ if (ifip.ctc_argc != ofip.ctc_argc &&
+ ifip.ctc_flags != ofip.ctc_flags)
+ continue;
+
+ /* Validate return type and arguments are the same */
+ if (ctf_diff_symid(cds, ifip.ctc_return,
+ ofip.ctc_return))
+ continue;
+
+ if (ifip.ctc_argc > idcnt) {
+ if (iids != NULL)
+ ctf_free(iids,
+ sizeof (ctf_id_t) * idcnt);
+ if (oids != NULL)
+ ctf_free(oids,
+ sizeof (ctf_id_t) * idcnt);
+ iids = oids = NULL;
+ idcnt = ifip.ctc_argc;
+ iids = ctf_alloc(sizeof (ctf_id_t) * idcnt);
+ if (iids == NULL) {
+ ret = ctf_set_errno(cds->cds_ifp,
+ ENOMEM);
+ goto out;
+ }
+ oids = ctf_alloc(sizeof (ctf_id_t) * idcnt);
+ if (iids == NULL) {
+ ret = ctf_set_errno(cds->cds_ifp,
+ ENOMEM);
+ goto out;
+ }
+ }
+
+ if ((ret = ctf_func_args(cds->cds_ifp, ifd->cdf_symidx,
+ ifip.ctc_argc, iids)) != 0)
+ goto out;
+ if ((ret = ctf_func_args(cds->cds_ofp, ofd->cdf_symidx,
+ ofip.ctc_argc, oids)) != 0)
+ goto out;
+
+ match = B_TRUE;
+ for (cti = 0; cti < ifip.ctc_argc; cti++) {
+ if (ctf_diff_symid(cds, iids[cti], oids[cti])) {
+ match = B_FALSE;
+ break;
+ }
+ }
+
+ if (match == B_FALSE)
+ continue;
+
+ ifd->cdf_matchidx = j;
+ ofd->cdf_matchidx = i;
+ break;
+ }
+ }
+
+ ret = 0;
+
+out:
+ if (iids != NULL)
+ ctf_free(iids, sizeof (ctf_id_t) * idcnt);
+ if (oids != NULL)
+ ctf_free(oids, sizeof (ctf_id_t) * idcnt);
+
+ return (ret);
+}
+
+/*
+ * In general, two functions are the same, if they have the same name and their
+ * arguments have the same types, including the return type. Like types, we
+ * basically have to do this in two passes. In the first phase we walk every
+ * type in the first container and try to find a match in the second.
+ */
+int
+ctf_diff_functions(ctf_diff_t *cds, ctf_diff_func_f cb, void *arg)
+{
+ int ret;
+ ulong_t i;
+
+ if (cds->cds_tvalid == B_FALSE) {
+ if ((ret = ctf_diff_types(cds, ctf_diff_void_cb, NULL)) != 0)
+ return (ret);
+ }
+
+ if (cds->cds_fvalid == B_FALSE) {
+ if ((ret = ctf_diff_func_fill(cds)) != 0)
+ return (ret);
+ cds->cds_fvalid = B_TRUE;
+ }
+
+ for (i = 0; i < cds->cds_nifuncs; i++) {
+ if (cds->cds_ifuncs[i].cdf_matchidx == ULONG_MAX) {
+ cb(cds->cds_ifp, cds->cds_ifuncs[i].cdf_symidx,
+ B_FALSE, NULL, ULONG_MAX, arg);
+ } else {
+ ulong_t idx = cds->cds_ifuncs[i].cdf_matchidx;
+ cb(cds->cds_ifp, cds->cds_ifuncs[i].cdf_symidx, B_TRUE,
+ cds->cds_ofp, cds->cds_ofuncs[idx].cdf_symidx, arg);
+ }
+ }
+
+ for (i = 0; i < cds->cds_nofuncs; i++) {
+ if (cds->cds_ofuncs[i].cdf_matchidx != ULONG_MAX)
+ continue;
+ cb(cds->cds_ofp, cds->cds_ofuncs[i].cdf_symidx, B_FALSE,
+ NULL, ULONG_MAX, arg);
+ }
+
+ return (0);
+}
+
+static int
+ctf_diff_obj_fill_cb(const char *name, ctf_id_t id, ulong_t symidx, void *arg)
+{
+ uint_t *next, max;
+ ctf_diff_obj_t *objptr;
+ ctf_diff_t *cds = arg;
+
+ if (cds->cds_ofillip == B_TRUE) {
+ max = cds->cds_niobj;
+ next = &cds->cds_nextiobj;
+ objptr = cds->cds_iobj + *next;
+ } else {
+ max = cds->cds_noobj;
+ next = &cds->cds_nextoobj;
+ objptr = cds->cds_oobj+ *next;
+
+ }
+
+ VERIFY(*next < max);
+ objptr->cdo_name = name;
+ objptr->cdo_symidx = symidx;
+ objptr->cdo_id = id;
+ objptr->cdo_matchidx = ULONG_MAX;
+ *next = *next + 1;
+
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+ctf_diff_obj_count(const char *name, ctf_id_t id, ulong_t symidx, void *arg)
+{
+ uint32_t *count = arg;
+
+ *count = *count + 1;
+
+ return (0);
+}
+
+
+static int
+ctf_diff_obj_fill(ctf_diff_t *cds)
+{
+ int ret;
+ uint32_t iocount, oocount;
+ ulong_t i, j;
+
+ iocount = 0;
+ oocount = 0;
+
+ ret = ctf_object_iter(cds->cds_ifp, ctf_diff_obj_count, &iocount);
+ if (ret != 0)
+ return (ret);
+
+ ret = ctf_object_iter(cds->cds_ofp, ctf_diff_obj_count, &oocount);
+ if (ret != 0)
+ return (ret);
+
+ cds->cds_iobj = ctf_alloc(sizeof (ctf_diff_obj_t) * iocount);
+ if (cds->cds_iobj == NULL)
+ return (ctf_set_errno(cds->cds_ifp, ENOMEM));
+ cds->cds_niobj = iocount;
+ cds->cds_nextiobj = 0;
+
+ cds->cds_oobj = ctf_alloc(sizeof (ctf_diff_obj_t) * oocount);
+ if (cds->cds_oobj == NULL)
+ return (ctf_set_errno(cds->cds_ifp, ENOMEM));
+ cds->cds_noobj = oocount;
+ cds->cds_nextoobj = 0;
+
+ cds->cds_ofillip = B_TRUE;
+ if ((ret = ctf_object_iter(cds->cds_ifp, ctf_diff_obj_fill_cb,
+ cds)) != 0)
+ return (ret);
+
+ cds->cds_ofillip = B_FALSE;
+ if ((ret = ctf_object_iter(cds->cds_ofp, ctf_diff_obj_fill_cb,
+ cds)) != 0)
+ return (ret);
+
+ for (i = 0; i < cds->cds_niobj; i++) {
+ for (j = 0; j < cds->cds_noobj; j++) {
+ ctf_diff_obj_t *id, *od;
+
+ id = &cds->cds_iobj[i];
+ od = &cds->cds_oobj[j];
+
+ if (id->cdo_name == NULL || od->cdo_name == NULL)
+ continue;
+ if (strcmp(id->cdo_name, od->cdo_name) != 0)
+ continue;
+
+ if (ctf_diff_symid(cds, id->cdo_id, od->cdo_id)) {
+ continue;
+ }
+
+ id->cdo_matchidx = j;
+ od->cdo_matchidx = i;
+ break;
+ }
+ }
+
+ return (0);
+}
+
+int
+ctf_diff_objects(ctf_diff_t *cds, ctf_diff_obj_f cb, void *arg)
+{
+ int ret;
+ ulong_t i;
+
+ if (cds->cds_tvalid == B_FALSE) {
+ if ((ret = ctf_diff_types(cds, ctf_diff_void_cb, NULL)) != 0)
+ return (ret);
+ }
+
+ if (cds->cds_ovalid == B_FALSE) {
+ if ((ret = ctf_diff_obj_fill(cds)) != 0)
+ return (ret);
+ cds->cds_ovalid = B_TRUE;
+ }
+
+ for (i = 0; i < cds->cds_niobj; i++) {
+ ctf_diff_obj_t *o = &cds->cds_iobj[i];
+
+ if (cds->cds_iobj[i].cdo_matchidx == ULONG_MAX) {
+ cb(cds->cds_ifp, o->cdo_symidx, o->cdo_id, B_FALSE,
+ NULL, ULONG_MAX, CTF_ERR, arg);
+ } else {
+ ctf_diff_obj_t *alt = &cds->cds_oobj[o->cdo_matchidx];
+ cb(cds->cds_ifp, o->cdo_symidx, o->cdo_id, B_TRUE,
+ cds->cds_ofp, alt->cdo_symidx, alt->cdo_id, arg);
+ }
+ }
+
+ for (i = 0; i < cds->cds_noobj; i++) {
+ ctf_diff_obj_t *o = &cds->cds_oobj[i];
+ if (o->cdo_matchidx != ULONG_MAX)
+ continue;
+ cb(cds->cds_ofp, o->cdo_symidx, o->cdo_id, B_FALSE, NULL,
+ ULONG_MAX, CTF_ERR, arg);
+ }
+
+ return (0);
+}
diff --git a/usr/src/lib/libctf/common/ctf_dwarf.c b/usr/src/lib/libctf/common/ctf_dwarf.c
new file mode 100644
index 0000000000..1e684641fe
--- /dev/null
+++ b/usr/src/lib/libctf/common/ctf_dwarf.c
@@ -0,0 +1,2956 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright 2012 Jason King. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * CTF DWARF conversion theory.
+ *
+ * DWARF data contains a series of compilation units. Each compilation unit
+ * generally refers to an object file or what once was, in the case of linked
+ * binaries and shared objects. Each compilation unit has a series of what DWARF
+ * calls a DIE (Debugging Information Entry). The set of entries that we care
+ * about have type information stored in a series of attributes. Each DIE also
+ * has a tag that identifies the kind of attributes that it has.
+ *
+ * A given DIE may itself have children. For example, a DIE that represents a
+ * structure has children which represent members. Whenever we encounter a DIE
+ * that has children or other values or types associated with it, we recursively
+ * process those children first so that way we can then refer to the generated
+ * CTF type id while processing its parent. This reduces the amount of unknowns
+ * and fixups that we need. It also ensures that we don't accidentally add types
+ * that an overzealous compiler might add to the DWARF data but aren't used by
+ * anything in the system.
+ *
+ * Once we do a conversion, we store a mapping in an AVL tree that goes from the
+ * DWARF's die offset, which is relative to the given compilation unit), to a
+ * ctf_id_t.
+ *
+ * Unfortunately, some compilers actually will emit duplicate entries for a
+ * given type that look similar, but aren't quite. To that end, we go through
+ * and do a variant on a merge once we're done processing a single compilation
+ * unit which deduplicates all of the types that are in the unit.
+ *
+ * Finally, if we encounter an object that has multiple compilation units, then
+ * we'll convert all of the compilation units separately and then do a merge, so
+ * that way we can result in one single ctf_file_t that represents everything
+ * for the object.
+ *
+ * Conversion Steps
+ * ----------------
+ *
+ * Because a given object we've been given to convert may have multiple
+ * compilation units, we break the work into two halves. The first half
+ * processes each compilation unit (potentially in parallel) and then the second
+ * half optionally merges all of the dies in the first half. First, we'll cover
+ * what's involved in converting a single ctf_die_t's dwarf to CTF. This covers
+ * the work done in ctf_dwarf_convert_one().
+ *
+ * An individual ctf_die_t, which represents a compilation unit, is converted to
+ * CTF in a series of multiple passes.
+ *
+ * Pass 1: During the first pass we walk all of the dies and if we find a
+ * function, variable, struct, union, enum or typedef, we recursively transform
+ * all of its types. We don't recurse or process everything, because we don't
+ * want to add some of the types that compilers may add which are effectively
+ * unused.
+ *
+ * During pass 1, if we encounter any structures or unions we mark them for
+ * fixing up later. This is necessary because we may not be able to determine
+ * the full size of a structure at the beginning of time. This will happen if
+ * the DWARF attribute DW_AT_byte_size is not present for a member. Because of
+ * this possibility we defer adding members to structures or even converting
+ * them during pass 1 and save that for pass 2. Adding all of the base
+ * structures without any of their members helps deal with any circular
+ * dependencies that we might encounter.
+ *
+ * Pass 2: This pass is used to do the first half of fixing up structures and
+ * unions. Rather than walk the entire type space again, we actually walk the
+ * list of structures and unions that we marked for later fixing up. Here, we
+ * iterate over every structure and add members to the underlying ctf_file_t,
+ * but not to the structs themselves. One might wonder why we don't, and the
+ * main reason is that libctf requires a ctf_update() be done before adding the
+ * members to structures or unions.
+ *
+ * Pass 3: This pass is used to do the second half of fixing up structures and
+ * unions. During this part we always go through and add members to structures
+ * and unions that we added to the container in the previous pass. In addition,
+ * we set the structure and union's actual size, which may have additional
+ * padding added by the compiler, it isn't simply the last offset. DWARF always
+ * guarantees an attribute exists for this. Importantly no ctf_id_t's change
+ * during pass 2.
+ *
+ * Pass 4: The next phase is to add CTF entries for all of the symbols and
+ * variables that are present in this die. During pass 1 we added entries to a
+ * map for each variable and function. During this pass, we iterate over the
+ * symbol table and when we encounter a symbol that we have in our lists of
+ * translated information which matches, we then add it to the ctf_file_t.
+ *
+ * Pass 5: Here we go and look for any weak symbols and functions and see if
+ * they match anything that we recognize. If so, then we add type information
+ * for them at this point based on the matching type.
+ *
+ * Pass 6: This pass is actually a variant on a merge. The traditional merge
+ * process expects there to be no duplicate types. As such, at the end of
+ * conversion, we do a dedup on all of the types in the system. The
+ * deduplication process is described in lib/libctf/common/ctf_merge.c.
+ *
+ * Once pass 6 is done, we've finished processing the individual compilation
+ * unit.
+ *
+ * The following steps reflect the general process of doing a conversion.
+ *
+ * 1) Walk the dwarf section and determine the number of compilation units
+ * 2) Create a ctf_die_t for each compilation unit
+ * 3) Add all ctf_die_t's to a workq
+ * 4) Have the workq process each die with ctf_dwarf_convert_one. This itself
+ * is comprised of several steps, which were already enumerated.
+ * 5) If we have multiple dies, we do a ctf merge of all the dies. The mechanics
+ * of the merge are discussed in lib/libctf/common/ctf_merge.c.
+ * 6) Free everything up and return a ctf_file_t to the user. If we only had a
+ * single compilation unit, then we give that to the user. Otherwise, we
+ * return the merged ctf_file_t.
+ *
+ * Threading
+ * ---------
+ *
+ * The process has been designed to be amenable to threading. Each compilation
+ * unit has its own type stream, therefore the logical place to divide and
+ * conquer is at the compilation unit. Each ctf_die_t has been built to be able
+ * to be processed independently of the others. It has its own libdwarf handle,
+ * as a given libdwarf handle may only be used by a single thread at a time.
+ * This allows the various ctf_die_t's to be processed in parallel by different
+ * threads.
+ *
+ * All of the ctf_die_t's are loaded into a workq which allows for a number of
+ * threads to be specified and used as a thread pool to process all of the
+ * queued work. We set the number of threads to use in the workq equal to the
+ * number of threads that the user has specified.
+ *
+ * After all of the compilation units have been drained, we use the same number
+ * of threads when performing a merge of multiple compilation units, if they
+ * exist.
+ *
+ * While all of these different parts do support and allow for multiple threads,
+ * it's important that when only a single thread is specified, that it be the
+ * calling thread. This allows the conversion routines to be used in a context
+ * that doesn't allow additional threads, such as rtld.
+ *
+ * Common DWARF Mechanics and Notes
+ * --------------------------------
+ *
+ * At this time, we really only support DWARFv2, though support for DWARFv4 is
+ * mostly there. There is no intent to support DWARFv3.
+ *
+ * Generally types for something are stored in the DW_AT_type attribute. For
+ * example, a function's return type will be stored in the local DW_AT_type
+ * attribute while the arguments will be in child DIEs. There are also various
+ * times when we don't have any DW_AT_type. In that case, the lack of a type
+ * implies, at least for C, that it's C type is void. Because DWARF doesn't emit
+ * one, we have a synthetic void type that we create and manipulate instead and
+ * pass it off to consumers on an as-needed basis. If nothing has a void type,
+ * it will not be emitted.
+ *
+ * Architecture Specific Parts
+ * ---------------------------
+ *
+ * The CTF tooling encodes various information about the various architectures
+ * in the system. Importantly, the tool assumes that every architecture has a
+ * data model where long and pointer are the same size. This is currently the
+ * case, as the two data models illumos supports are ILP32 and LP64.
+ *
+ * In addition, we encode the mapping of various floating point sizes to various
+ * types for each architecture. If a new architecture is being added, it should
+ * be added to the list. The general design of the ctf conversion tools is to be
+ * architecture independent. eg. any of the tools here should be able to convert
+ * any architecture's DWARF into ctf; however, this has not been rigorously
+ * tested and more importantly, the ctf routines don't currently write out the
+ * data in an endian-aware form, they only use that of the currently running
+ * library.
+ */
+
+#include <libctf_impl.h>
+#include <sys/avl.h>
+#include <sys/debug.h>
+#include <gelf.h>
+#include <libdwarf.h>
+#include <dwarf.h>
+#include <libgen.h>
+#include <workq.h>
+#include <errno.h>
+
+#define DWARF_VERSION_TWO 2
+#define DWARF_VARARGS_NAME "..."
+
+/*
+ * Dwarf may refer recursively to other types that we've already processed. To
+ * see if we've already converted them, we look them up in an AVL tree that's
+ * sorted by the DWARF id.
+ */
+typedef struct ctf_dwmap {
+ avl_node_t cdm_avl;
+ Dwarf_Off cdm_off;
+ Dwarf_Die cdm_die;
+ ctf_id_t cdm_id;
+ boolean_t cdm_fix;
+} ctf_dwmap_t;
+
+typedef struct ctf_dwvar {
+ ctf_list_t cdv_list;
+ char *cdv_name;
+ ctf_id_t cdv_type;
+ boolean_t cdv_global;
+} ctf_dwvar_t;
+
+typedef struct ctf_dwfunc {
+ ctf_list_t cdf_list;
+ char *cdf_name;
+ ctf_funcinfo_t cdf_fip;
+ ctf_id_t *cdf_argv;
+ boolean_t cdf_global;
+} ctf_dwfunc_t;
+
+typedef struct ctf_dwbitf {
+ ctf_list_t cdb_list;
+ ctf_id_t cdb_base;
+ uint_t cdb_nbits;
+ ctf_id_t cdb_id;
+} ctf_dwbitf_t;
+
+/*
+ * The ctf_die_t represents a single top-level DWARF die unit. While generally,
+ * the typical object file hs only a single die, if we're asked to convert
+ * something that's been linked from multiple sources, multiple dies will exist.
+ */
+typedef struct ctf_die {
+ Elf *cd_elf; /* shared libelf handle */
+ char *cd_name; /* basename of the DIE */
+ ctf_merge_t *cd_cmh; /* merge handle */
+ ctf_list_t cd_vars; /* List of variables */
+ ctf_list_t cd_funcs; /* List of functions */
+ ctf_list_t cd_bitfields; /* Bit field members */
+ Dwarf_Debug cd_dwarf; /* shared libdwarf handle */
+ Dwarf_Die cd_cu; /* libdwarf compilation unit */
+ Dwarf_Off cd_cuoff; /* cu's offset */
+ Dwarf_Off cd_maxoff; /* maximum offset */
+ ctf_file_t *cd_ctfp; /* output CTF file */
+ avl_tree_t cd_map; /* map die offsets to CTF types */
+ char *cd_errbuf; /* error message buffer */
+ size_t cd_errlen; /* error message buffer length */
+ size_t cd_ptrsz; /* object's pointer size */
+ boolean_t cd_bigend; /* is it big endian */
+ boolean_t cd_doweaks; /* should we convert weak symbols? */
+ uint_t cd_mach; /* machine type */
+ ctf_id_t cd_voidtid; /* void pointer */
+ ctf_id_t cd_longtid; /* id for a 'long' */
+} ctf_die_t;
+
+static int ctf_dwarf_offset(ctf_die_t *, Dwarf_Die, Dwarf_Off *);
+static int ctf_dwarf_convert_die(ctf_die_t *, Dwarf_Die);
+static int ctf_dwarf_convert_type(ctf_die_t *, Dwarf_Die, ctf_id_t *, int);
+
+static int ctf_dwarf_function_count(ctf_die_t *, Dwarf_Die, ctf_funcinfo_t *,
+ boolean_t);
+static int ctf_dwarf_convert_fargs(ctf_die_t *, Dwarf_Die, ctf_funcinfo_t *,
+ ctf_id_t *);
+
+typedef int (ctf_dwarf_symtab_f)(ctf_die_t *, const GElf_Sym *, ulong_t,
+ const char *, const char *, void *);
+
+/*
+ * This is a generic way to set a CTF Conversion backend error depending on what
+ * we were doing. Unless it was one of a specific set of errors that don't
+ * indicate a programming / translation bug, eg. ENOMEM, then we transform it
+ * into a CTF backend error and fill in the error buffer.
+ */
+static int
+ctf_dwarf_error(ctf_die_t *cdp, ctf_file_t *cfp, int err, const char *fmt, ...)
+{
+ va_list ap;
+ int ret;
+ size_t off = 0;
+ ssize_t rem = cdp->cd_errlen;
+ if (cfp != NULL)
+ err = ctf_errno(cfp);
+
+ if (err == ENOMEM)
+ return (err);
+
+ ret = snprintf(cdp->cd_errbuf, rem, "die %s: ", cdp->cd_name);
+ if (ret < 0)
+ goto err;
+ off += ret;
+ rem = MAX(rem - ret, 0);
+
+ va_start(ap, fmt);
+ ret = vsnprintf(cdp->cd_errbuf + off, rem, fmt, ap);
+ va_end(ap);
+ if (ret < 0)
+ goto err;
+
+ off += ret;
+ rem = MAX(rem - ret, 0);
+ if (fmt[strlen(fmt) - 1] != '\n') {
+ (void) snprintf(cdp->cd_errbuf + off, rem,
+ ": %s\n", ctf_errmsg(err));
+ }
+ va_end(ap);
+ return (ECTF_CONVBKERR);
+
+err:
+ cdp->cd_errbuf[0] = '\0';
+ return (ECTF_CONVBKERR);
+}
+
+/*
+ * DWARF often ops to put no explicit type to describe a void type. eg. if we
+ * have a reference type whose DW_AT_type member doesn't exist, then we should
+ * instead assume it points to void. Because this isn't represented, we
+ * instead cause it to come into existence.
+ */
+static ctf_id_t
+ctf_dwarf_void(ctf_die_t *cdp)
+{
+ if (cdp->cd_voidtid == CTF_ERR) {
+ ctf_encoding_t enc = { CTF_INT_SIGNED, 0, 0 };
+ cdp->cd_voidtid = ctf_add_integer(cdp->cd_ctfp, CTF_ADD_ROOT,
+ "void", &enc);
+ if (cdp->cd_voidtid == CTF_ERR) {
+ (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
+ "failed to create void type: %s\n",
+ ctf_errmsg(ctf_errno(cdp->cd_ctfp)));
+ }
+ }
+
+ return (cdp->cd_voidtid);
+}
+
+/*
+ * There are many different forms that an array index may take. However, we just
+ * always force it to be of a type long no matter what. Therefore we use this to
+ * have a single instance of long across everything.
+ */
+static ctf_id_t
+ctf_dwarf_long(ctf_die_t *cdp)
+{
+ if (cdp->cd_longtid == CTF_ERR) {
+ ctf_encoding_t enc;
+
+ enc.cte_format = CTF_INT_SIGNED;
+ enc.cte_offset = 0;
+ /* All illumos systems are LP */
+ enc.cte_bits = cdp->cd_ptrsz * 8;
+ cdp->cd_longtid = ctf_add_integer(cdp->cd_ctfp, CTF_ADD_NONROOT,
+ "long", &enc);
+ if (cdp->cd_longtid == CTF_ERR) {
+ (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
+ "failed to create long type: %s\n",
+ ctf_errmsg(ctf_errno(cdp->cd_ctfp)));
+ }
+
+ }
+
+ return (cdp->cd_longtid);
+}
+
+static int
+ctf_dwmap_comp(const void *a, const void *b)
+{
+ const ctf_dwmap_t *ca = a;
+ const ctf_dwmap_t *cb = b;
+
+ if (ca->cdm_off > cb->cdm_off)
+ return (1);
+ if (ca->cdm_off < cb->cdm_off)
+ return (-1);
+ return (0);
+}
+
+static int
+ctf_dwmap_add(ctf_die_t *cdp, ctf_id_t id, Dwarf_Die die, boolean_t fix)
+{
+ int ret;
+ avl_index_t index;
+ ctf_dwmap_t *dwmap;
+ Dwarf_Off off;
+
+ VERIFY(id > 0 && id < CTF_MAX_TYPE);
+
+ if ((ret = ctf_dwarf_offset(cdp, die, &off)) != 0)
+ return (ret);
+
+ if ((dwmap = ctf_alloc(sizeof (ctf_dwmap_t))) == NULL)
+ return (ENOMEM);
+
+ dwmap->cdm_die = die;
+ dwmap->cdm_off = off;
+ dwmap->cdm_id = id;
+ dwmap->cdm_fix = fix;
+
+ ctf_dprintf("dwmap: %p %x->%d\n", dwmap, (uint32_t)off, id);
+ VERIFY(avl_find(&cdp->cd_map, dwmap, &index) == NULL);
+ avl_insert(&cdp->cd_map, dwmap, index);
+ return (0);
+}
+
+static int
+ctf_dwarf_attribute(ctf_die_t *cdp, Dwarf_Die die, Dwarf_Half name,
+ Dwarf_Attribute *attrp)
+{
+ int ret;
+ Dwarf_Error derr;
+
+ if ((ret = dwarf_attr(die, name, attrp, &derr)) == DW_DLV_OK)
+ return (0);
+ if (ret == DW_DLV_NO_ENTRY) {
+ *attrp = NULL;
+ return (ENOENT);
+ }
+ (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
+ "failed to get attribute for type: %s\n",
+ dwarf_errmsg(derr));
+ return (ECTF_CONVBKERR);
+}
+
+static int
+ctf_dwarf_ref(ctf_die_t *cdp, Dwarf_Die die, Dwarf_Half name, Dwarf_Off *refp)
+{
+ int ret;
+ Dwarf_Attribute attr;
+ Dwarf_Error derr;
+
+ if ((ret = ctf_dwarf_attribute(cdp, die, name, &attr)) != 0)
+ return (ret);
+
+ if (dwarf_formref(attr, refp, &derr) == DW_DLV_OK) {
+ dwarf_dealloc(cdp->cd_dwarf, attr, DW_DLA_ATTR);
+ return (0);
+ }
+
+ (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
+ "failed to get unsigned attribute for type: %s\n",
+ dwarf_errmsg(derr));
+ return (ECTF_CONVBKERR);
+}
+
+static int
+ctf_dwarf_refdie(ctf_die_t *cdp, Dwarf_Die die, Dwarf_Half name,
+ Dwarf_Die *diep)
+{
+ int ret;
+ Dwarf_Off off;
+ Dwarf_Error derr;
+
+ if ((ret = ctf_dwarf_ref(cdp, die, DW_AT_type, &off)) != 0)
+ return (ret);
+
+ off += cdp->cd_cuoff;
+ if ((ret = dwarf_offdie(cdp->cd_dwarf, off, diep, &derr)) !=
+ DW_DLV_OK) {
+ (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
+ "failed to get die from offset %llu: %s\n",
+ off, dwarf_errmsg(derr));
+ return (ECTF_CONVBKERR);
+ }
+
+ return (0);
+}
+
+static int
+ctf_dwarf_signed(ctf_die_t *cdp, Dwarf_Die die, Dwarf_Half name,
+ Dwarf_Signed *valp)
+{
+ int ret;
+ Dwarf_Attribute attr;
+ Dwarf_Error derr;
+
+ if ((ret = ctf_dwarf_attribute(cdp, die, name, &attr)) != 0)
+ return (ret);
+
+ if (dwarf_formsdata(attr, valp, &derr) == DW_DLV_OK) {
+ dwarf_dealloc(cdp->cd_dwarf, attr, DW_DLA_ATTR);
+ return (0);
+ }
+
+ (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
+ "failed to get unsigned attribute for type: %s\n",
+ dwarf_errmsg(derr));
+ return (ECTF_CONVBKERR);
+}
+
+static int
+ctf_dwarf_unsigned(ctf_die_t *cdp, Dwarf_Die die, Dwarf_Half name,
+ Dwarf_Unsigned *valp)
+{
+ int ret;
+ Dwarf_Attribute attr;
+ Dwarf_Error derr;
+
+ if ((ret = ctf_dwarf_attribute(cdp, die, name, &attr)) != 0)
+ return (ret);
+
+ if (dwarf_formudata(attr, valp, &derr) == DW_DLV_OK) {
+ dwarf_dealloc(cdp->cd_dwarf, attr, DW_DLA_ATTR);
+ return (0);
+ }
+
+ (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
+ "failed to get unsigned attribute for type: %s\n",
+ dwarf_errmsg(derr));
+ return (ECTF_CONVBKERR);
+}
+
+static int
+ctf_dwarf_boolean(ctf_die_t *cdp, Dwarf_Die die, Dwarf_Half name,
+ Dwarf_Bool *val)
+{
+ int ret;
+ Dwarf_Attribute attr;
+ Dwarf_Error derr;
+
+ if ((ret = ctf_dwarf_attribute(cdp, die, name, &attr)) != 0)
+ return (ret);
+
+ if (dwarf_formflag(attr, val, &derr) == DW_DLV_OK) {
+ dwarf_dealloc(cdp->cd_dwarf, attr, DW_DLA_ATTR);
+ return (0);
+ }
+
+ (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
+ "failed to get boolean attribute for type: %s\n",
+ dwarf_errmsg(derr));
+
+ return (ECTF_CONVBKERR);
+}
+
+static int
+ctf_dwarf_string(ctf_die_t *cdp, Dwarf_Die die, Dwarf_Half name, char **strp)
+{
+ int ret;
+ char *s;
+ Dwarf_Attribute attr;
+ Dwarf_Error derr;
+
+ *strp = NULL;
+ if ((ret = ctf_dwarf_attribute(cdp, die, name, &attr)) != 0)
+ return (ret);
+
+ if (dwarf_formstring(attr, &s, &derr) == DW_DLV_OK) {
+ if ((*strp = ctf_strdup(s)) == NULL)
+ ret = ENOMEM;
+ else
+ ret = 0;
+ dwarf_dealloc(cdp->cd_dwarf, attr, DW_DLA_ATTR);
+ return (ret);
+ }
+
+ (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
+ "failed to get string attribute for type: %s\n",
+ dwarf_errmsg(derr));
+ return (ECTF_CONVBKERR);
+}
+
+static int
+ctf_dwarf_member_location(ctf_die_t *cdp, Dwarf_Die die, Dwarf_Unsigned *valp)
+{
+ int ret;
+ Dwarf_Error derr;
+ Dwarf_Attribute attr;
+ Dwarf_Locdesc *loc;
+ Dwarf_Signed locnum;
+
+ if ((ret = ctf_dwarf_attribute(cdp, die, DW_AT_data_member_location,
+ &attr)) != 0)
+ return (ret);
+
+ if (dwarf_loclist(attr, &loc, &locnum, &derr) != DW_DLV_OK) {
+ (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
+ "failed to obtain location list for member offset: %s",
+ dwarf_errmsg(derr));
+ dwarf_dealloc(cdp->cd_dwarf, attr, DW_DLA_ATTR);
+ return (ECTF_CONVBKERR);
+ }
+ dwarf_dealloc(cdp->cd_dwarf, attr, DW_DLA_ATTR);
+
+ if (locnum != 1 || loc->ld_s->lr_atom != DW_OP_plus_uconst) {
+ (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
+ "failed to parse location structure for member");
+ dwarf_dealloc(cdp->cd_dwarf, loc->ld_s, DW_DLA_LOC_BLOCK);
+ dwarf_dealloc(cdp->cd_dwarf, loc, DW_DLA_LOCDESC);
+ return (ECTF_CONVBKERR);
+ }
+
+ *valp = loc->ld_s->lr_number;
+
+ dwarf_dealloc(cdp->cd_dwarf, loc->ld_s, DW_DLA_LOC_BLOCK);
+ dwarf_dealloc(cdp->cd_dwarf, loc, DW_DLA_LOCDESC);
+ return (0);
+}
+
+
+static int
+ctf_dwarf_offset(ctf_die_t *cdp, Dwarf_Die die, Dwarf_Off *offsetp)
+{
+ Dwarf_Error derr;
+
+ if (dwarf_dieoffset(die, offsetp, &derr) == DW_DLV_OK)
+ return (0);
+
+ (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
+ "failed to get die offset: %s\n",
+ dwarf_errmsg(derr));
+ return (ECTF_CONVBKERR);
+}
+
+static int
+ctf_dwarf_tag(ctf_die_t *cdp, Dwarf_Die die, Dwarf_Half *tagp)
+{
+ Dwarf_Error derr;
+
+ if (dwarf_tag(die, tagp, &derr) == DW_DLV_OK)
+ return (0);
+
+ (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
+ "failed to get tag type: %s\n",
+ dwarf_errmsg(derr));
+ return (ECTF_CONVBKERR);
+}
+
+static int
+ctf_dwarf_sib(ctf_die_t *cdp, Dwarf_Die base, Dwarf_Die *sibp)
+{
+ Dwarf_Error derr;
+ int ret;
+
+ *sibp = NULL;
+ ret = dwarf_siblingof(cdp->cd_dwarf, base, sibp, &derr);
+ if (ret == DW_DLV_OK || ret == DW_DLV_NO_ENTRY)
+ return (0);
+
+ (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
+ "failed to sibling from die: %s\n",
+ dwarf_errmsg(derr));
+ return (ECTF_CONVBKERR);
+}
+
+static int
+ctf_dwarf_child(ctf_die_t *cdp, Dwarf_Die base, Dwarf_Die *childp)
+{
+ Dwarf_Error derr;
+ int ret;
+
+ *childp = NULL;
+ ret = dwarf_child(base, childp, &derr);
+ if (ret == DW_DLV_OK || ret == DW_DLV_NO_ENTRY)
+ return (0);
+
+ (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
+ "failed to child from die: %s\n",
+ dwarf_errmsg(derr));
+ return (ECTF_CONVBKERR);
+}
+
+/*
+ * Compilers disagree on what to do to determine if something has global
+ * visiblity. Traditionally gcc has used DW_AT_external to indicate this while
+ * Studio has used DW_AT_visibility. We check DW_AT_visibility first and then
+ * fall back to DW_AT_external. Lack of DW_AT_external implies that it is not.
+ */
+static int
+ctf_dwarf_isglobal(ctf_die_t *cdp, Dwarf_Die die, boolean_t *igp)
+{
+ int ret;
+ Dwarf_Signed vis;
+ Dwarf_Bool ext;
+
+ if ((ret = ctf_dwarf_signed(cdp, die, DW_AT_visibility, &vis)) == 0) {
+ *igp = vis == DW_VIS_exported;
+ return (0);
+ } else if (ret != ENOENT) {
+ return (ret);
+ }
+
+ if ((ret = ctf_dwarf_boolean(cdp, die, DW_AT_external, &ext)) != 0) {
+ if (ret == ENOENT) {
+ *igp = B_FALSE;
+ return (0);
+ }
+ return (ret);
+ }
+ *igp = ext != 0 ? B_TRUE : B_FALSE;
+ return (0);
+}
+
+static int
+ctf_dwarf_die_elfenc(Elf *elf, ctf_die_t *cdp, char *errbuf, size_t errlen)
+{
+ GElf_Ehdr ehdr;
+
+ if (gelf_getehdr(elf, &ehdr) == NULL) {
+ (void) snprintf(errbuf, errlen,
+ "failed to get ELF header: %s\n",
+ elf_errmsg(elf_errno()));
+ return (ECTF_CONVBKERR);
+ }
+
+ cdp->cd_mach = ehdr.e_machine;
+
+ if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) {
+ cdp->cd_ptrsz = 4;
+ VERIFY(ctf_setmodel(cdp->cd_ctfp, CTF_MODEL_ILP32) == 0);
+ } else if (ehdr.e_ident[EI_CLASS] == ELFCLASS64) {
+ cdp->cd_ptrsz = 8;
+ VERIFY(ctf_setmodel(cdp->cd_ctfp, CTF_MODEL_LP64) == 0);
+ } else {
+ (void) snprintf(errbuf, errlen,
+ "unknown ELF class %d", ehdr.e_ident[EI_CLASS]);
+ return (ECTF_CONVBKERR);
+ }
+
+ if (ehdr.e_ident[EI_DATA] == ELFDATA2LSB) {
+ cdp->cd_bigend = B_FALSE;
+ } else if (ehdr.e_ident[EI_DATA] == ELFDATA2MSB) {
+ cdp->cd_bigend = B_TRUE;
+ } else {
+ (void) snprintf(errbuf, errlen,
+ "unknown ELF data encoding: %d", ehdr.e_ident[EI_DATA]);
+ return (ECTF_CONVBKERR);
+ }
+
+ return (0);
+}
+
+typedef struct ctf_dwarf_fpent {
+ size_t cdfe_size;
+ uint_t cdfe_enc[3];
+} ctf_dwarf_fpent_t;
+
+typedef struct ctf_dwarf_fpmap {
+ uint_t cdf_mach;
+ ctf_dwarf_fpent_t cdf_ents[4];
+} ctf_dwarf_fpmap_t;
+
+static const ctf_dwarf_fpmap_t ctf_dwarf_fpmaps[] = {
+ { EM_SPARC, {
+ { 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
+ { 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
+ { 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
+ { 0, { 0 } }
+ } },
+ { EM_SPARC32PLUS, {
+ { 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
+ { 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
+ { 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
+ { 0, { 0 } }
+ } },
+ { EM_SPARCV9, {
+ { 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
+ { 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
+ { 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
+ { 0, { 0 } }
+ } },
+ { EM_386, {
+ { 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
+ { 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
+ { 12, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
+ { 0, { 0 } }
+ } },
+ { EM_X86_64, {
+ { 4, { CTF_FP_SINGLE, CTF_FP_CPLX, CTF_FP_IMAGRY } },
+ { 8, { CTF_FP_DOUBLE, CTF_FP_DCPLX, CTF_FP_DIMAGRY } },
+ { 16, { CTF_FP_LDOUBLE, CTF_FP_LDCPLX, CTF_FP_LDIMAGRY } },
+ { 0, { 0 } }
+ } },
+ { EM_NONE }
+};
+
+static int
+ctf_dwarf_float_base(ctf_die_t *cdp, Dwarf_Signed type, ctf_encoding_t *enc)
+{
+ const ctf_dwarf_fpmap_t *map = &ctf_dwarf_fpmaps[0];
+ const ctf_dwarf_fpent_t *ent;
+ uint_t col = 0, mult = 1;
+
+ for (map = &ctf_dwarf_fpmaps[0]; map->cdf_mach != EM_NONE; map++) {
+ if (map->cdf_mach == cdp->cd_mach)
+ break;
+ }
+
+ if (map->cdf_mach == EM_NONE) {
+ (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
+ "Unsupported machine type: %d\n", cdp->cd_mach);
+ return (ENOTSUP);
+ }
+
+ if (type == DW_ATE_complex_float) {
+ mult = 2;
+ col = 1;
+ } else if (type == DW_ATE_imaginary_float ||
+ type == DW_ATE_SUN_imaginary_float) {
+ col = 2;
+ }
+
+ ent = &map->cdf_ents[0];
+ for (ent = &map->cdf_ents[0]; ent->cdfe_size != 0; ent++) {
+ if (ent->cdfe_size * mult * 8 == enc->cte_bits) {
+ enc->cte_format = ent->cdfe_enc[col];
+ return (0);
+ }
+ }
+
+ (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
+ "failed to find valid fp mapping for encoding %d, size %d bits\n",
+ type, enc->cte_bits);
+ return (EINVAL);
+}
+
+static int
+ctf_dwarf_dwarf_base(ctf_die_t *cdp, Dwarf_Die die, int *kindp,
+ ctf_encoding_t *enc)
+{
+ int ret;
+ Dwarf_Signed type;
+
+ if ((ret = ctf_dwarf_signed(cdp, die, DW_AT_encoding, &type)) != 0)
+ return (ret);
+
+ switch (type) {
+ case DW_ATE_unsigned:
+ case DW_ATE_address:
+ *kindp = CTF_K_INTEGER;
+ enc->cte_format = 0;
+ break;
+ case DW_ATE_unsigned_char:
+ *kindp = CTF_K_INTEGER;
+ enc->cte_format = CTF_INT_CHAR;
+ break;
+ case DW_ATE_signed:
+ *kindp = CTF_K_INTEGER;
+ enc->cte_format = CTF_INT_SIGNED;
+ break;
+ case DW_ATE_signed_char:
+ *kindp = CTF_K_INTEGER;
+ enc->cte_format = CTF_INT_SIGNED | CTF_INT_CHAR;
+ break;
+ case DW_ATE_boolean:
+ *kindp = CTF_K_INTEGER;
+ enc->cte_format = CTF_INT_SIGNED | CTF_INT_BOOL;
+ break;
+ case DW_ATE_float:
+ case DW_ATE_complex_float:
+ case DW_ATE_imaginary_float:
+ case DW_ATE_SUN_imaginary_float:
+ case DW_ATE_SUN_interval_float:
+ *kindp = CTF_K_FLOAT;
+ if ((ret = ctf_dwarf_float_base(cdp, type, enc)) != 0)
+ return (ret);
+ break;
+ default:
+ (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
+ "encountered unkown DWARF encoding: %d", type);
+ return (ECTF_CONVBKERR);
+ }
+
+ return (0);
+}
+
+/*
+ * Different compilers (at least GCC and Studio) use different names for types.
+ * This parses the types and attempts to unify them. If this fails, we just fall
+ * back to using the DWARF itself.
+ */
+static int
+ctf_dwarf_parse_base(const char *name, int *kindp, ctf_encoding_t *enc,
+ char **newnamep)
+{
+ char buf[256];
+ char *base, *c;
+ int nlong = 0, nshort = 0, nchar = 0, nint = 0;
+ int sign = 1;
+
+ if (strlen(name) + 1 > sizeof (buf))
+ return (EINVAL);
+
+ (void) strlcpy(buf, name, sizeof (buf));
+ for (c = strtok(buf, " "); c != NULL; c = strtok(NULL, " ")) {
+ if (strcmp(c, "signed") == 0) {
+ sign = 1;
+ } else if (strcmp(c, "unsigned") == 0) {
+ sign = 0;
+ } else if (strcmp(c, "long") == 0) {
+ nlong++;
+ } else if (strcmp(c, "char") == 0) {
+ nchar++;
+ } else if (strcmp(c, "short") == 0) {
+ nshort++;
+ } else if (strcmp(c, "int") == 0) {
+ nint++;
+ } else {
+ /*
+ * If we don't recognize any of the tokens, we'll tell
+ * the caller to fall back to the dwarf-provided
+ * encoding information.
+ */
+ return (EINVAL);
+ }
+ }
+
+ if (nchar > 1 || nshort > 1 || nint > 1 || nlong > 2)
+ return (EINVAL);
+
+ if (nchar > 0) {
+ if (nlong > 0 || nshort > 0 || nint > 0)
+ return (EINVAL);
+ base = "char";
+ } else if (nshort > 0) {
+ if (nlong > 0)
+ return (EINVAL);
+ base = "short";
+ } else if (nlong > 0) {
+ base = "long";
+ } else {
+ base = "int";
+ }
+
+ if (nchar > 0)
+ enc->cte_format = CTF_INT_CHAR;
+ else
+ enc->cte_format = 0;
+
+ if (sign > 0)
+ enc->cte_format |= CTF_INT_SIGNED;
+
+ (void) snprintf(buf, sizeof (buf), "%s%s%s",
+ (sign ? "" : "unsigned "),
+ (nlong > 1 ? "long " : ""),
+ base);
+
+ *newnamep = ctf_strdup(buf);
+ if (*newnamep == NULL)
+ return (ENOMEM);
+ *kindp = CTF_K_INTEGER;
+ return (0);
+}
+
+static int
+ctf_dwarf_create_base(ctf_die_t *cdp, Dwarf_Die die, ctf_id_t *idp, int isroot,
+ Dwarf_Off off)
+{
+ int ret;
+ char *name, *nname;
+ Dwarf_Unsigned sz;
+ int kind;
+ ctf_encoding_t enc;
+ ctf_id_t id;
+
+ if ((ret = ctf_dwarf_string(cdp, die, DW_AT_name, &name)) != 0)
+ return (ret);
+ if ((ret = ctf_dwarf_unsigned(cdp, die, DW_AT_byte_size, &sz)) != 0) {
+ goto out;
+ }
+ ctf_dprintf("Creating base type %s from off %llu, size: %d\n", name,
+ off, sz);
+
+ bzero(&enc, sizeof (ctf_encoding_t));
+ enc.cte_bits = sz * 8;
+ if ((ret = ctf_dwarf_parse_base(name, &kind, &enc, &nname)) == 0) {
+ ctf_free(name, strlen(name) + 1);
+ name = nname;
+ } else {
+ if (ret != EINVAL)
+ return (ret);
+ ctf_dprintf("falling back to dwarf for base type %s\n", name);
+ if ((ret = ctf_dwarf_dwarf_base(cdp, die, &kind, &enc)) != 0)
+ return (ret);
+ }
+
+ id = ctf_add_encoded(cdp->cd_ctfp, isroot, name, &enc, kind);
+ if (id == CTF_ERR) {
+ ret = ctf_errno(cdp->cd_ctfp);
+ } else {
+ *idp = id;
+ ret = ctf_dwmap_add(cdp, id, die, B_FALSE);
+ }
+out:
+ ctf_free(name, strlen(name) + 1);
+ return (ret);
+}
+
+/*
+ * Getting a member's offset is a surprisingly intricate dance. It works as
+ * follows:
+ *
+ * 1) If we're in DWARFv4, then we either have a DW_AT_data_bit_offset or we
+ * have a DW_AT_data_member_location. We won't have both. Thus we check first
+ * for DW_AT_data_bit_offset, and if it exists, we're set.
+ *
+ * Next, if we have a bitfield and we don't ahve a DW_AT_data_bit_offset, then
+ * we have to grab the data location and use the following dance:
+ *
+ * 2) Gather the set of DW_AT_byte_size, DW_AT_bit_offset, and DW_AT_bit_size.
+ * Of course, the DW_AT_byte_size may be omitted, even though it isn't always.
+ * When it's been omitted, we then have to say that the size is that of the
+ * underlying type, which forces that to be after a ctf_update(). Here, we have
+ * to do different things based on whether or not we're using big endian or
+ * little endian to obtain the proper offset.
+ */
+static int
+ctf_dwarf_member_offset(ctf_die_t *cdp, Dwarf_Die die, ctf_id_t mid,
+ ulong_t *offp)
+{
+ int ret;
+ Dwarf_Unsigned loc, bitsz, bytesz, bitoff;
+ size_t off, tsz;
+
+ if ((ret = ctf_dwarf_unsigned(cdp, die, DW_AT_data_bit_offset,
+ &loc)) == 0) {
+ *offp = loc;
+ return (0);
+ } else if (ret != ENOENT) {
+ return (ret);
+ }
+
+ if ((ret = ctf_dwarf_member_location(cdp, die, &loc)) != 0)
+ return (ret);
+ off = loc * 8;
+
+ if ((ret = ctf_dwarf_unsigned(cdp, die, DW_AT_bit_offset,
+ &bitoff)) != 0) {
+ if (ret != ENOENT)
+ return (ret);
+ *offp = off;
+ return (0);
+ }
+
+ /* At this point we have to have DW_AT_bit_size */
+ if ((ret = ctf_dwarf_unsigned(cdp, die, DW_AT_bit_size, &bitsz)) != 0)
+ return (ret);
+
+ if ((ret = ctf_dwarf_unsigned(cdp, die, DW_AT_byte_size,
+ &bytesz)) != 0) {
+ if (ret != ENOENT)
+ return (ret);
+ if ((tsz = ctf_type_size(cdp->cd_ctfp, mid)) == CTF_ERR) {
+ int e = ctf_errno(cdp->cd_ctfp);
+ (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
+ "failed to get type size: %s", ctf_errmsg(e));
+ return (ECTF_CONVBKERR);
+ }
+ } else {
+ tsz = bytesz;
+ }
+ tsz *= 8;
+ if (cdp->cd_bigend == B_TRUE) {
+ *offp = off + bitoff;
+ } else {
+ *offp = off + tsz - bitoff - bitsz;
+ }
+
+ return (0);
+}
+
+/*
+ * We need to determine if the member in question is a bitfield. If it is, then
+ * we need to go through and create a new type that's based on the actual base
+ * type, but has a different size. We also rename the type as a result to help
+ * deal with future collisions.
+ *
+ * Here we need to look and see if we have a DW_AT_bit_size value. If we have a
+ * bit size member and it does not equal the byte size member, then we need to
+ * create a bitfield type based on this.
+ *
+ * Note: When we support DWARFv4, there may be a chance that we ned to also
+ * search for the DW_AT_byte_size if we don't have a DW_AT_bit_size member.
+ */
+static int
+ctf_dwarf_member_bitfield(ctf_die_t *cdp, Dwarf_Die die, ctf_id_t *idp)
+{
+ int ret;
+ Dwarf_Unsigned bitsz;
+ ctf_encoding_t e;
+ ctf_dwbitf_t *cdb;
+ ctf_dtdef_t *dtd;
+ ctf_id_t base = *idp;
+ int kind;
+
+ if ((ret = ctf_dwarf_unsigned(cdp, die, DW_AT_bit_size, &bitsz)) != 0) {
+ if (ret == ENOENT)
+ return (0);
+ return (ret);
+ }
+
+ ctf_dprintf("Trying to deal with bitfields on %d:%d\n", base, bitsz);
+ /*
+ * Given that we now have a bitsize, time to go do something about it.
+ * We're going to create a new type based on the current one, but first
+ * we need to find the base type. This means we need to traverse any
+ * typedef's, consts, and volatiles until we get to what should be
+ * something of type integer or enumeration.
+ */
+ VERIFY(bitsz < UINT32_MAX);
+ dtd = ctf_dtd_lookup(cdp->cd_ctfp, base);
+ VERIFY(dtd != NULL);
+ kind = CTF_INFO_KIND(dtd->dtd_data.ctt_info);
+ while (kind == CTF_K_TYPEDEF || kind == CTF_K_CONST ||
+ kind == CTF_K_VOLATILE) {
+ dtd = ctf_dtd_lookup(cdp->cd_ctfp, dtd->dtd_data.ctt_type);
+ VERIFY(dtd != NULL);
+ kind = CTF_INFO_KIND(dtd->dtd_data.ctt_info);
+ }
+ ctf_dprintf("got kind %d\n", kind);
+ VERIFY(kind == CTF_K_INTEGER || kind == CTF_K_ENUM);
+
+ /*
+ * As surprising as it may be, it is strictly possible to create a
+ * bitfield that is based on an enum. Of course, the C standard leaves
+ * enums sizing as an ABI concern more or less. To that effect, today on
+ * all illumos platforms the size of an enum is generally that of an
+ * int as our supported data models and ABIs all agree on that. So what
+ * we'll do is fake up a CTF enconding here to use. In this case, we'll
+ * treat it as an unsigned value of whatever size the underlying enum
+ * currently has (which is in the ctt_size member of its dynamic type
+ * data).
+ */
+ if (kind == CTF_K_INTEGER) {
+ e = dtd->dtd_u.dtu_enc;
+ } else {
+ bzero(&e, sizeof (ctf_encoding_t));
+ e.cte_bits = dtd->dtd_data.ctt_size * NBBY;
+ }
+
+ for (cdb = ctf_list_next(&cdp->cd_bitfields); cdb != NULL;
+ cdb = ctf_list_next(cdb)) {
+ if (cdb->cdb_base == base && cdb->cdb_nbits == bitsz)
+ break;
+ }
+
+ /*
+ * Create a new type if none exists. We name all types in a way that is
+ * guaranteed not to conflict with the corresponding C type. We do this
+ * by using the ':' operator.
+ */
+ if (cdb == NULL) {
+ size_t namesz;
+ char *name;
+
+ e.cte_bits = bitsz;
+ namesz = snprintf(NULL, 0, "%s:%d", dtd->dtd_name,
+ (uint32_t)bitsz);
+ name = ctf_alloc(namesz + 1);
+ if (name == NULL)
+ return (ENOMEM);
+ cdb = ctf_alloc(sizeof (ctf_dwbitf_t));
+ if (cdb == NULL) {
+ ctf_free(name, namesz + 1);
+ return (ENOMEM);
+ }
+ (void) snprintf(name, namesz + 1, "%s:%d", dtd->dtd_name,
+ (uint32_t)bitsz);
+
+ cdb->cdb_base = base;
+ cdb->cdb_nbits = bitsz;
+ cdb->cdb_id = ctf_add_integer(cdp->cd_ctfp, CTF_ADD_NONROOT,
+ name, &e);
+ if (cdb->cdb_id == CTF_ERR) {
+ (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
+ "failed to get add bitfield type %s: %s", name,
+ ctf_errmsg(ctf_errno(cdp->cd_ctfp)));
+ ctf_free(name, namesz + 1);
+ ctf_free(cdb, sizeof (ctf_dwbitf_t));
+ return (ECTF_CONVBKERR);
+ }
+ ctf_free(name, namesz + 1);
+ ctf_list_append(&cdp->cd_bitfields, cdb);
+ }
+
+ *idp = cdb->cdb_id;
+
+ return (0);
+}
+
+static int
+ctf_dwarf_fixup_sou(ctf_die_t *cdp, Dwarf_Die die, ctf_id_t base, boolean_t add)
+{
+ int ret, kind;
+ Dwarf_Die child, memb;
+ Dwarf_Unsigned size;
+ ulong_t nsz;
+
+ kind = ctf_type_kind(cdp->cd_ctfp, base);
+ VERIFY(kind != CTF_ERR);
+ VERIFY(kind == CTF_K_STRUCT || kind == CTF_K_UNION);
+
+ /*
+ * Members are in children. However, gcc also allows empty ones.
+ */
+ if ((ret = ctf_dwarf_child(cdp, die, &child)) != 0)
+ return (ret);
+ if (child == NULL)
+ return (0);
+
+ memb = child;
+ while (memb != NULL) {
+ Dwarf_Die sib, tdie;
+ Dwarf_Half tag;
+ ctf_id_t mid;
+ char *mname;
+ ulong_t memboff = 0;
+
+ if ((ret = ctf_dwarf_tag(cdp, memb, &tag)) != 0)
+ return (ret);
+
+ if (tag != DW_TAG_member)
+ continue;
+
+ if ((ret = ctf_dwarf_refdie(cdp, memb, DW_AT_type, &tdie)) != 0)
+ return (ret);
+
+ if ((ret = ctf_dwarf_convert_type(cdp, tdie, &mid,
+ CTF_ADD_NONROOT)) != 0)
+ return (ret);
+ ctf_dprintf("Got back type id: %d\n", mid);
+
+ /*
+ * If we're not adding a member, just go ahead and return.
+ */
+ if (add == B_FALSE) {
+ if ((ret = ctf_dwarf_member_bitfield(cdp, memb,
+ &mid)) != 0)
+ return (ret);
+ goto next;
+ }
+
+ if ((ret = ctf_dwarf_string(cdp, memb, DW_AT_name,
+ &mname)) != 0 && ret != ENOENT)
+ return (ret);
+ if (ret == ENOENT)
+ mname = NULL;
+
+ if (kind == CTF_K_UNION) {
+ memboff = 0;
+ } else if ((ret = ctf_dwarf_member_offset(cdp, memb, mid,
+ &memboff)) != 0) {
+ if (mname != NULL)
+ ctf_free(mname, strlen(mname) + 1);
+ return (ret);
+ }
+
+ if ((ret = ctf_dwarf_member_bitfield(cdp, memb, &mid)) != 0)
+ return (ret);
+
+ ret = ctf_add_member(cdp->cd_ctfp, base, mname, mid, memboff);
+ if (ret == CTF_ERR) {
+ (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
+ "failed to add member %s: %s",
+ mname, ctf_errmsg(ctf_errno(cdp->cd_ctfp)));
+ if (mname != NULL)
+ ctf_free(mname, strlen(mname) + 1);
+ return (ECTF_CONVBKERR);
+ }
+
+ if (mname != NULL)
+ ctf_free(mname, strlen(mname) + 1);
+
+next:
+ if ((ret = ctf_dwarf_sib(cdp, memb, &sib)) != 0)
+ return (ret);
+ memb = sib;
+ }
+
+ /*
+ * If we're not adding members, then we don't know the final size of the
+ * structure, so end here.
+ */
+ if (add == B_FALSE)
+ return (0);
+
+ /* Finally set the size of the structure to the actual byte size */
+ if ((ret = ctf_dwarf_unsigned(cdp, die, DW_AT_byte_size, &size)) != 0)
+ return (ret);
+ nsz = size;
+ if ((ctf_set_size(cdp->cd_ctfp, base, nsz)) == CTF_ERR) {
+ int e = ctf_errno(cdp->cd_ctfp);
+ (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
+ "failed to set type size for %d to 0x%x: %s", base,
+ (uint32_t)size, ctf_errmsg(e));
+ return (ECTF_CONVBKERR);
+ }
+
+ return (0);
+}
+
+static int
+ctf_dwarf_create_sou(ctf_die_t *cdp, Dwarf_Die die, ctf_id_t *idp,
+ int kind, int isroot)
+{
+ int ret;
+ char *name;
+ ctf_id_t base;
+ Dwarf_Die child;
+ Dwarf_Bool decl;
+
+ /*
+ * Deal with the terribly annoying case of anonymous structs and unions.
+ * If they don't have a name, set the name to the empty string.
+ */
+ if ((ret = ctf_dwarf_string(cdp, die, DW_AT_name, &name)) != 0 &&
+ ret != ENOENT)
+ return (ret);
+ if (ret == ENOENT)
+ name = NULL;
+
+ /*
+ * We need to check if we just have a declaration here. If we do, then
+ * instead of creating an actual structure or union, we're just going to
+ * go ahead and create a forward. During a dedup or merge, the forward
+ * will be replaced with the real thing.
+ */
+ if ((ret = ctf_dwarf_boolean(cdp, die, DW_AT_declaration,
+ &decl)) != 0) {
+ if (ret != ENOENT)
+ return (ret);
+ decl = 0;
+ }
+
+ if (decl != 0) {
+ base = ctf_add_forward(cdp->cd_ctfp, isroot, name, kind);
+ } else if (kind == CTF_K_STRUCT) {
+ base = ctf_add_struct(cdp->cd_ctfp, isroot, name);
+ } else {
+ base = ctf_add_union(cdp->cd_ctfp, isroot, name);
+ }
+ ctf_dprintf("added sou %s (%d) (%d)\n", name, kind, base);
+ if (name != NULL)
+ ctf_free(name, strlen(name) + 1);
+ if (base == CTF_ERR)
+ return (ctf_errno(cdp->cd_ctfp));
+ *idp = base;
+
+ /*
+ * If it's just a declaration, we're not going to mark it for fix up or
+ * do anything else.
+ */
+ if (decl == B_TRUE)
+ return (ctf_dwmap_add(cdp, base, die, B_FALSE));
+ if ((ret = ctf_dwmap_add(cdp, base, die, B_TRUE)) != 0)
+ return (ret);
+
+ /*
+ * Members are in children. However, gcc also allows empty ones.
+ */
+ if ((ret = ctf_dwarf_child(cdp, die, &child)) != 0)
+ return (ret);
+ if (child == NULL)
+ return (0);
+
+ return (0);
+}
+
+static int
+ctf_dwarf_create_array_range(ctf_die_t *cdp, Dwarf_Die range, ctf_id_t *idp,
+ ctf_id_t base, int isroot)
+{
+ int ret;
+ Dwarf_Die sib;
+ Dwarf_Unsigned val;
+ Dwarf_Signed sval;
+ ctf_arinfo_t ar;
+
+ ctf_dprintf("creating array range\n");
+
+ if ((ret = ctf_dwarf_sib(cdp, range, &sib)) != 0)
+ return (ret);
+ if (sib != NULL) {
+ ctf_id_t id;
+ if ((ret = ctf_dwarf_create_array_range(cdp, sib, &id,
+ base, CTF_ADD_NONROOT)) != 0)
+ return (ret);
+ ar.ctr_contents = id;
+ } else {
+ ar.ctr_contents = base;
+ }
+
+ if ((ar.ctr_index = ctf_dwarf_long(cdp)) == CTF_ERR)
+ return (ctf_errno(cdp->cd_ctfp));
+
+ /*
+ * Array bounds can be signed or unsigned, but there are several kinds
+ * of signless forms (data1, data2, etc) that take their sign from the
+ * routine that is trying to interpret them. That is, data1 can be
+ * either signed or unsigned, depending on whether you use the signed or
+ * unsigned accessor function. GCC will use the signless forms to store
+ * unsigned values which have their high bit set, so we need to try to
+ * read them first as unsigned to get positive values. We could also
+ * try signed first, falling back to unsigned if we got a negative
+ * value.
+ */
+ if ((ret = ctf_dwarf_unsigned(cdp, range, DW_AT_upper_bound,
+ &val)) == 0) {
+ ar.ctr_nelems = val + 1;
+ } else if (ret != ENOENT) {
+ return (ret);
+ } else if ((ret = ctf_dwarf_signed(cdp, range, DW_AT_upper_bound,
+ &sval)) == 0) {
+ ar.ctr_nelems = sval + 1;
+ } else if (ret != ENOENT) {
+ return (ret);
+ } else {
+ ar.ctr_nelems = 0;
+ }
+
+ if ((*idp = ctf_add_array(cdp->cd_ctfp, isroot, &ar)) == CTF_ERR)
+ return (ctf_errno(cdp->cd_ctfp));
+
+ return (0);
+}
+
+/*
+ * Try and create an array type. First, the kind of the array is specified in
+ * the DW_AT_type entry. Next, the number of entries is stored in a more
+ * complicated form, we should have a child that has the DW_TAG_subrange type.
+ */
+static int
+ctf_dwarf_create_array(ctf_die_t *cdp, Dwarf_Die die, ctf_id_t *idp, int isroot)
+{
+ int ret;
+ Dwarf_Die tdie, rdie;
+ ctf_id_t tid;
+ Dwarf_Half rtag;
+ ctf_arinfo_t ar;
+
+ if ((ret = ctf_dwarf_refdie(cdp, die, DW_AT_type, &tdie)) != 0)
+ return (ret);
+ if ((ret = ctf_dwarf_convert_type(cdp, tdie, &tid,
+ CTF_ADD_NONROOT)) != 0)
+ return (ret);
+
+ ar.ctr_contents = tid;
+
+ if ((ret = ctf_dwarf_child(cdp, die, &rdie)) != 0)
+ return (ret);
+ if ((ret = ctf_dwarf_tag(cdp, rdie, &rtag)) != 0)
+ return (ret);
+ if (rtag != DW_TAG_subrange_type) {
+ (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
+ "encountered array without DW_TAG_subrange_type child\n");
+ return (ECTF_CONVBKERR);
+ }
+
+ /*
+ * The compiler may opt to describe a multi-dimensional array as one
+ * giant array or it may opt to instead encode it as a series of
+ * subranges. If it's the latter, then for each subrange we introduce a
+ * type. We can always use the base type.
+ */
+ if ((ret = ctf_dwarf_create_array_range(cdp, rdie, idp, tid,
+ isroot)) != 0)
+ return (ret);
+ ctf_dprintf("Got back id %d\n", *idp);
+ return (ctf_dwmap_add(cdp, *idp, die, B_FALSE));
+}
+
+static int
+ctf_dwarf_create_reference(ctf_die_t *cdp, Dwarf_Die die, ctf_id_t *idp,
+ int kind, int isroot)
+{
+ int ret;
+ ctf_id_t id;
+ Dwarf_Die tdie;
+ char *name;
+ size_t namelen;
+
+ if ((ret = ctf_dwarf_string(cdp, die, DW_AT_name, &name)) != 0 &&
+ ret != ENOENT)
+ return (ret);
+ if (ret == ENOENT) {
+ name = NULL;
+ namelen = 0;
+ } else {
+ namelen = strlen(name);
+ }
+
+ ctf_dprintf("reference kind %d %s\n", kind, name != NULL ? name : "<>");
+
+ if ((ret = ctf_dwarf_refdie(cdp, die, DW_AT_type, &tdie)) != 0) {
+ if (ret != ENOENT) {
+ ctf_free(name, namelen);
+ return (ret);
+ }
+ if ((id = ctf_dwarf_void(cdp)) == CTF_ERR) {
+ ctf_free(name, namelen);
+ return (ctf_errno(cdp->cd_ctfp));
+ }
+ } else {
+ if ((ret = ctf_dwarf_convert_type(cdp, tdie, &id,
+ CTF_ADD_NONROOT)) != 0) {
+ ctf_free(name, namelen);
+ return (ret);
+ }
+ }
+
+ if ((*idp = ctf_add_reftype(cdp->cd_ctfp, isroot, name, id, kind)) ==
+ CTF_ERR) {
+ ctf_free(name, namelen);
+ return (ctf_errno(cdp->cd_ctfp));
+ }
+
+ ctf_free(name, namelen);
+ return (ctf_dwmap_add(cdp, *idp, die, B_FALSE));
+}
+
+static int
+ctf_dwarf_create_enum(ctf_die_t *cdp, Dwarf_Die die, ctf_id_t *idp, int isroot)
+{
+ int ret;
+ ctf_id_t id;
+ Dwarf_Die child;
+ char *name;
+
+ if ((ret = ctf_dwarf_string(cdp, die, DW_AT_name, &name)) != 0 &&
+ ret != ENOENT)
+ return (ret);
+ if (ret == ENOENT)
+ name = NULL;
+ id = ctf_add_enum(cdp->cd_ctfp, isroot, name);
+ ctf_dprintf("added enum %s (%d)\n", name, id);
+ if (name != NULL)
+ ctf_free(name, strlen(name) + 1);
+ if (id == CTF_ERR)
+ return (ctf_errno(cdp->cd_ctfp));
+ *idp = id;
+ if ((ret = ctf_dwmap_add(cdp, id, die, B_FALSE)) != 0)
+ return (ret);
+
+
+ if ((ret = ctf_dwarf_child(cdp, die, &child)) != 0) {
+ if (ret == ENOENT)
+ ret = 0;
+ return (ret);
+ }
+
+ while (child != NULL) {
+ Dwarf_Half tag;
+ Dwarf_Signed sval;
+ Dwarf_Unsigned uval;
+ Dwarf_Die arg = child;
+ int eval;
+
+ if ((ret = ctf_dwarf_sib(cdp, arg, &child)) != 0)
+ return (ret);
+
+ if ((ret = ctf_dwarf_tag(cdp, arg, &tag)) != 0)
+ return (ret);
+
+ if (tag != DW_TAG_enumerator) {
+ if ((ret = ctf_dwarf_convert_type(cdp, arg, NULL,
+ CTF_ADD_NONROOT)) != 0)
+ return (ret);
+ continue;
+ }
+
+ if ((ret = ctf_dwarf_signed(cdp, arg, DW_AT_const_value,
+ &sval)) == 0) {
+ eval = sval;
+ } else if (ret != ENOENT) {
+ return (ret);
+ } else if ((ret = ctf_dwarf_unsigned(cdp, arg,
+ DW_AT_const_value, &uval)) == 0) {
+ eval = (int)uval;
+ } else {
+ (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
+ "encountered enumration without constant value\n");
+ return (ECTF_CONVBKERR);
+ }
+
+ /*
+ * DWARF v4 section 5.7 tells us we'll always have names.
+ */
+ if ((ret = ctf_dwarf_string(cdp, arg, DW_AT_name,
+ &name)) != 0)
+ return (ret);
+
+ ret = ctf_add_enumerator(cdp->cd_ctfp, id, name, eval);
+ if (ret == CTF_ERR) {
+ (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
+ "failed to add enumarator %s (%d) to %d\n",
+ name, eval, id);
+ ctf_free(name, strlen(name) + 1);
+ return (ctf_errno(cdp->cd_ctfp));
+ }
+ ctf_free(name, strlen(name) + 1);
+ }
+
+ return (0);
+}
+
+/*
+ * For a function pointer, walk over and process all of its children, unless we
+ * encounter one that's just a declaration. In which case, we error on it.
+ */
+static int
+ctf_dwarf_create_fptr(ctf_die_t *cdp, Dwarf_Die die, ctf_id_t *idp, int isroot)
+{
+ int ret;
+ Dwarf_Bool b;
+ ctf_funcinfo_t fi;
+ Dwarf_Die retdie;
+ ctf_id_t *argv = NULL;
+
+ bzero(&fi, sizeof (ctf_funcinfo_t));
+
+ if ((ret = ctf_dwarf_boolean(cdp, die, DW_AT_declaration, &b)) != 0) {
+ if (ret != ENOENT)
+ return (ret);
+ } else {
+ if (b != 0)
+ return (EPROTOTYPE);
+ }
+
+ /*
+ * Return type is in DW_AT_type, if none, it returns void.
+ */
+ if ((ret = ctf_dwarf_refdie(cdp, die, DW_AT_type, &retdie)) != 0) {
+ if (ret != ENOENT)
+ return (ret);
+ if ((fi.ctc_return = ctf_dwarf_void(cdp)) == CTF_ERR)
+ return (ctf_errno(cdp->cd_ctfp));
+ } else {
+ if ((ret = ctf_dwarf_convert_type(cdp, retdie, &fi.ctc_return,
+ CTF_ADD_NONROOT)) != 0)
+ return (ret);
+ }
+
+ if ((ret = ctf_dwarf_function_count(cdp, die, &fi, B_TRUE)) != 0) {
+ return (ret);
+ }
+
+ if (fi.ctc_argc != 0) {
+ argv = ctf_alloc(sizeof (ctf_id_t) * fi.ctc_argc);
+ if (argv == NULL)
+ return (ENOMEM);
+
+ if ((ret = ctf_dwarf_convert_fargs(cdp, die, &fi, argv)) != 0) {
+ ctf_free(argv, sizeof (ctf_id_t) * fi.ctc_argc);
+ return (ret);
+ }
+ }
+
+ if ((*idp = ctf_add_funcptr(cdp->cd_ctfp, isroot, &fi, argv)) ==
+ CTF_ERR) {
+ ctf_free(argv, sizeof (ctf_id_t) * fi.ctc_argc);
+ return (ctf_errno(cdp->cd_ctfp));
+ }
+
+ ctf_free(argv, sizeof (ctf_id_t) * fi.ctc_argc);
+ return (ctf_dwmap_add(cdp, *idp, die, B_FALSE));
+}
+
+static int
+ctf_dwarf_convert_type(ctf_die_t *cdp, Dwarf_Die die, ctf_id_t *idp,
+ int isroot)
+{
+ int ret;
+ Dwarf_Off offset;
+ Dwarf_Half tag;
+ ctf_dwmap_t lookup, *map;
+ ctf_id_t id;
+
+ if (idp == NULL)
+ idp = &id;
+
+ if ((ret = ctf_dwarf_offset(cdp, die, &offset)) != 0)
+ return (ret);
+
+ if (offset > cdp->cd_maxoff) {
+ (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
+ "die offset %llu beyond maximum for header %llu\n",
+ offset, cdp->cd_maxoff);
+ return (ECTF_CONVBKERR);
+ }
+
+ /*
+ * If we've already added an entry for this offset, then we're done.
+ */
+ lookup.cdm_off = offset;
+ if ((map = avl_find(&cdp->cd_map, &lookup, NULL)) != NULL) {
+ *idp = map->cdm_id;
+ return (0);
+ }
+
+ if ((ret = ctf_dwarf_tag(cdp, die, &tag)) != 0)
+ return (ret);
+
+ ret = ENOTSUP;
+ switch (tag) {
+ case DW_TAG_base_type:
+ ctf_dprintf("base\n");
+ ret = ctf_dwarf_create_base(cdp, die, idp, isroot, offset);
+ break;
+ case DW_TAG_array_type:
+ ctf_dprintf("array\n");
+ ret = ctf_dwarf_create_array(cdp, die, idp, isroot);
+ break;
+ case DW_TAG_enumeration_type:
+ ctf_dprintf("enum\n");
+ ret = ctf_dwarf_create_enum(cdp, die, idp, isroot);
+ break;
+ case DW_TAG_pointer_type:
+ ctf_dprintf("pointer\n");
+ ret = ctf_dwarf_create_reference(cdp, die, idp, CTF_K_POINTER,
+ isroot);
+ break;
+ case DW_TAG_structure_type:
+ ctf_dprintf("struct\n");
+ ret = ctf_dwarf_create_sou(cdp, die, idp, CTF_K_STRUCT,
+ isroot);
+ break;
+ case DW_TAG_subroutine_type:
+ ctf_dprintf("fptr\n");
+ ret = ctf_dwarf_create_fptr(cdp, die, idp, isroot);
+ break;
+ case DW_TAG_typedef:
+ ctf_dprintf("typedef\n");
+ ret = ctf_dwarf_create_reference(cdp, die, idp, CTF_K_TYPEDEF,
+ isroot);
+ break;
+ case DW_TAG_union_type:
+ ctf_dprintf("union\n");
+ ret = ctf_dwarf_create_sou(cdp, die, idp, CTF_K_UNION,
+ isroot);
+ break;
+ case DW_TAG_const_type:
+ ctf_dprintf("const\n");
+ ret = ctf_dwarf_create_reference(cdp, die, idp, CTF_K_CONST,
+ isroot);
+ break;
+ case DW_TAG_volatile_type:
+ ctf_dprintf("volatile\n");
+ ret = ctf_dwarf_create_reference(cdp, die, idp, CTF_K_VOLATILE,
+ isroot);
+ break;
+ case DW_TAG_restrict_type:
+ ctf_dprintf("restrict\n");
+ ret = ctf_dwarf_create_reference(cdp, die, idp, CTF_K_RESTRICT,
+ isroot);
+ break;
+ default:
+ ctf_dprintf("ignoring tag type %x\n", tag);
+ ret = 0;
+ break;
+ }
+ ctf_dprintf("ctf_dwarf_convert_type tag specific handler returned %d\n",
+ ret);
+
+ return (ret);
+}
+
+static int
+ctf_dwarf_walk_lexical(ctf_die_t *cdp, Dwarf_Die die)
+{
+ int ret;
+ Dwarf_Die child;
+
+ if ((ret = ctf_dwarf_child(cdp, die, &child)) != 0)
+ return (ret);
+
+ if (child == NULL)
+ return (0);
+
+ return (ctf_dwarf_convert_die(cdp, die));
+}
+
+static int
+ctf_dwarf_function_count(ctf_die_t *cdp, Dwarf_Die die, ctf_funcinfo_t *fip,
+ boolean_t fptr)
+{
+ int ret;
+ Dwarf_Die child, sib, arg;
+
+ if ((ret = ctf_dwarf_child(cdp, die, &child)) != 0)
+ return (ret);
+
+ arg = child;
+ while (arg != NULL) {
+ Dwarf_Half tag;
+
+ if ((ret = ctf_dwarf_tag(cdp, arg, &tag)) != 0)
+ return (ret);
+
+ /*
+ * We have to check for a varargs type decleration. This will
+ * happen in one of two ways. If we have a function pointer
+ * type, then it'll be done with a tag of type
+ * DW_TAG_unspecified_parameters. However, it only means we have
+ * a variable number of arguments, if we have more than one
+ * argument found so far. Otherwise, when we have a function
+ * type, it instead uses a formal parameter whose name is '...'
+ * to indicate a variable arguments member.
+ *
+ * Also, if we have a function pointer, then we have to expect
+ * that we might not get a name at all.
+ */
+ if (tag == DW_TAG_formal_parameter && fptr == B_FALSE) {
+ char *name;
+ if ((ret = ctf_dwarf_string(cdp, die, DW_AT_name,
+ &name)) != 0)
+ return (ret);
+ if (strcmp(name, DWARF_VARARGS_NAME) == 0)
+ fip->ctc_flags |= CTF_FUNC_VARARG;
+ else
+ fip->ctc_argc++;
+ ctf_free(name, strlen(name) + 1);
+ } else if (tag == DW_TAG_formal_parameter) {
+ fip->ctc_argc++;
+ } else if (tag == DW_TAG_unspecified_parameters &&
+ fip->ctc_argc > 0) {
+ fip->ctc_flags |= CTF_FUNC_VARARG;
+ }
+ if ((ret = ctf_dwarf_sib(cdp, arg, &sib)) != 0)
+ return (ret);
+ arg = sib;
+ }
+
+ return (0);
+}
+
+static int
+ctf_dwarf_convert_fargs(ctf_die_t *cdp, Dwarf_Die die, ctf_funcinfo_t *fip,
+ ctf_id_t *argv)
+{
+ int ret;
+ int i = 0;
+ Dwarf_Die child, sib, arg;
+
+ if ((ret = ctf_dwarf_child(cdp, die, &child)) != 0)
+ return (ret);
+
+ arg = child;
+ while (arg != NULL) {
+ Dwarf_Half tag;
+
+ if ((ret = ctf_dwarf_tag(cdp, arg, &tag)) != 0)
+ return (ret);
+ if (tag == DW_TAG_formal_parameter) {
+ Dwarf_Die tdie;
+
+ if ((ret = ctf_dwarf_refdie(cdp, arg, DW_AT_type,
+ &tdie)) != 0)
+ return (ret);
+
+ if ((ret = ctf_dwarf_convert_type(cdp, tdie, &argv[i],
+ CTF_ADD_ROOT)) != 0)
+ return (ret);
+ i++;
+
+ /*
+ * Once we hit argc entries, we're done. This ensures we
+ * don't accidentally hit a varargs which should be the
+ * least entry.
+ */
+ if (i == fip->ctc_argc)
+ break;
+ }
+
+ if ((ret = ctf_dwarf_sib(cdp, arg, &sib)) != 0)
+ return (ret);
+ arg = sib;
+ }
+
+ return (0);
+}
+
+static int
+ctf_dwarf_convert_function(ctf_die_t *cdp, Dwarf_Die die)
+{
+ int ret;
+ char *name;
+ ctf_dwfunc_t *cdf;
+ Dwarf_Die tdie;
+
+ /*
+ * Functions that don't have a name are generally functions that have
+ * been inlined and thus most information about them has been lost. If
+ * we can't get a name, then instead of returning ENOENT, we silently
+ * swallow the error.
+ */
+ if ((ret = ctf_dwarf_string(cdp, die, DW_AT_name, &name)) != 0) {
+ if (ret == ENOENT)
+ return (0);
+ return (ret);
+ }
+
+ ctf_dprintf("beginning work on function %s\n", name);
+ if ((cdf = ctf_alloc(sizeof (ctf_dwfunc_t))) == NULL) {
+ ctf_free(name, strlen(name) + 1);
+ return (ENOMEM);
+ }
+ bzero(cdf, sizeof (ctf_dwfunc_t));
+ cdf->cdf_name = name;
+
+ if ((ret = ctf_dwarf_refdie(cdp, die, DW_AT_type, &tdie)) == 0) {
+ if ((ret = ctf_dwarf_convert_type(cdp, tdie,
+ &(cdf->cdf_fip.ctc_return), CTF_ADD_ROOT)) != 0) {
+ ctf_free(name, strlen(name) + 1);
+ ctf_free(cdf, sizeof (ctf_dwfunc_t));
+ return (ret);
+ }
+ } else if (ret != ENOENT) {
+ ctf_free(name, strlen(name) + 1);
+ ctf_free(cdf, sizeof (ctf_dwfunc_t));
+ return (ret);
+ } else {
+ if ((cdf->cdf_fip.ctc_return = ctf_dwarf_void(cdp)) ==
+ CTF_ERR) {
+ ctf_free(name, strlen(name) + 1);
+ ctf_free(cdf, sizeof (ctf_dwfunc_t));
+ return (ctf_errno(cdp->cd_ctfp));
+ }
+ }
+
+ /*
+ * A function has a number of children, some of which may not be ones we
+ * care about. Children that we care about have a type of
+ * DW_TAG_formal_parameter. We're going to do two passes, the first to
+ * count the arguments, the second to process them. Afterwards, we
+ * should be good to go ahead and add this function.
+ *
+ * Note, we already got the return type by going in and grabbing it out
+ * of the DW_AT_type.
+ */
+ if ((ret = ctf_dwarf_function_count(cdp, die, &cdf->cdf_fip,
+ B_FALSE)) != 0) {
+ ctf_free(name, strlen(name) + 1);
+ ctf_free(cdf, sizeof (ctf_dwfunc_t));
+ return (ret);
+ }
+
+ ctf_dprintf("beginning to convert function arguments %s\n", name);
+ if (cdf->cdf_fip.ctc_argc != 0) {
+ uint_t argc = cdf->cdf_fip.ctc_argc;
+ cdf->cdf_argv = ctf_alloc(sizeof (ctf_id_t) * argc);
+ if (cdf->cdf_argv == NULL) {
+ ctf_free(name, strlen(name) + 1);
+ ctf_free(cdf, sizeof (ctf_dwfunc_t));
+ return (ENOMEM);
+ }
+ if ((ret = ctf_dwarf_convert_fargs(cdp, die,
+ &cdf->cdf_fip, cdf->cdf_argv)) != 0) {
+ ctf_free(cdf->cdf_argv, sizeof (ctf_id_t) * argc);
+ ctf_free(name, strlen(name) + 1);
+ ctf_free(cdf, sizeof (ctf_dwfunc_t));
+ return (ret);
+ }
+ } else {
+ cdf->cdf_argv = NULL;
+ }
+
+ if ((ret = ctf_dwarf_isglobal(cdp, die, &cdf->cdf_global)) != 0) {
+ ctf_free(cdf->cdf_argv, sizeof (ctf_id_t) *
+ cdf->cdf_fip.ctc_argc);
+ ctf_free(name, strlen(name) + 1);
+ ctf_free(cdf, sizeof (ctf_dwfunc_t));
+ return (ret);
+ }
+
+ ctf_list_append(&cdp->cd_funcs, cdf);
+ return (ret);
+}
+
+/*
+ * Convert variables, but only if they're not prototypes and have names.
+ */
+static int
+ctf_dwarf_convert_variable(ctf_die_t *cdp, Dwarf_Die die)
+{
+ int ret;
+ char *name;
+ Dwarf_Bool b;
+ Dwarf_Die tdie;
+ ctf_id_t id;
+ ctf_dwvar_t *cdv;
+
+ if ((ret = ctf_dwarf_boolean(cdp, die, DW_AT_declaration, &b)) != 0) {
+ if (ret != ENOENT)
+ return (ret);
+ } else if (b != 0) {
+ return (0);
+ }
+
+ if ((ret = ctf_dwarf_string(cdp, die, DW_AT_name, &name)) != 0 &&
+ ret != ENOENT)
+ return (ret);
+ if (ret == ENOENT)
+ return (0);
+
+ if ((ret = ctf_dwarf_refdie(cdp, die, DW_AT_type, &tdie)) != 0) {
+ ctf_free(name, strlen(name) + 1);
+ return (ret);
+ }
+
+ if ((ret = ctf_dwarf_convert_type(cdp, tdie, &id,
+ CTF_ADD_ROOT)) != 0)
+ return (ret);
+
+ if ((cdv = ctf_alloc(sizeof (ctf_dwvar_t))) == NULL) {
+ ctf_free(name, strlen(name) + 1);
+ return (ENOMEM);
+ }
+
+ cdv->cdv_name = name;
+ cdv->cdv_type = id;
+
+ if ((ret = ctf_dwarf_isglobal(cdp, die, &cdv->cdv_global)) != 0) {
+ ctf_free(cdv, sizeof (ctf_dwvar_t));
+ ctf_free(name, strlen(name) + 1);
+ return (ret);
+ }
+
+ ctf_list_append(&cdp->cd_vars, cdv);
+ return (0);
+}
+
+/*
+ * Walk through our set of top-level types and process them.
+ */
+static int
+ctf_dwarf_walk_toplevel(ctf_die_t *cdp, Dwarf_Die die)
+{
+ int ret;
+ Dwarf_Off offset;
+ Dwarf_Half tag;
+
+ if ((ret = ctf_dwarf_offset(cdp, die, &offset)) != 0)
+ return (ret);
+
+ if (offset > cdp->cd_maxoff) {
+ (void) snprintf(cdp->cd_errbuf, cdp->cd_errlen,
+ "die offset %llu beyond maximum for header %llu\n",
+ offset, cdp->cd_maxoff);
+ return (ECTF_CONVBKERR);
+ }
+
+ if ((ret = ctf_dwarf_tag(cdp, die, &tag)) != 0)
+ return (ret);
+
+ ret = 0;
+ switch (tag) {
+ case DW_TAG_subprogram:
+ ctf_dprintf("top level func\n");
+ ret = ctf_dwarf_convert_function(cdp, die);
+ break;
+ case DW_TAG_variable:
+ ctf_dprintf("top level var\n");
+ ret = ctf_dwarf_convert_variable(cdp, die);
+ break;
+ case DW_TAG_lexical_block:
+ ctf_dprintf("top level block\n");
+ ret = ctf_dwarf_walk_lexical(cdp, die);
+ break;
+ case DW_TAG_enumeration_type:
+ case DW_TAG_structure_type:
+ case DW_TAG_typedef:
+ case DW_TAG_union_type:
+ ctf_dprintf("top level type\n");
+ ret = ctf_dwarf_convert_type(cdp, die, NULL, B_TRUE);
+ break;
+ default:
+ break;
+ }
+
+ return (ret);
+}
+
+
+/*
+ * We're given a node. At this node we need to convert it and then proceed to
+ * convert any siblings that are associaed with this die.
+ */
+static int
+ctf_dwarf_convert_die(ctf_die_t *cdp, Dwarf_Die die)
+{
+ while (die != NULL) {
+ int ret;
+ Dwarf_Die sib;
+
+ if ((ret = ctf_dwarf_walk_toplevel(cdp, die)) != 0)
+ return (ret);
+
+ if ((ret = ctf_dwarf_sib(cdp, die, &sib)) != 0)
+ return (ret);
+ die = sib;
+ }
+ return (0);
+}
+
+static int
+ctf_dwarf_fixup_die(ctf_die_t *cdp, boolean_t addpass)
+{
+ ctf_dwmap_t *map;
+
+ for (map = avl_first(&cdp->cd_map); map != NULL;
+ map = AVL_NEXT(&cdp->cd_map, map)) {
+ int ret;
+ if (map->cdm_fix == B_FALSE)
+ continue;
+ if ((ret = ctf_dwarf_fixup_sou(cdp, map->cdm_die, map->cdm_id,
+ addpass)) != 0)
+ return (ret);
+ }
+
+ return (0);
+}
+
+static ctf_dwfunc_t *
+ctf_dwarf_match_func(ctf_die_t *cdp, const char *file, const char *name,
+ int bind)
+{
+ ctf_dwfunc_t *cdf;
+
+ if (bind == STB_WEAK)
+ return (NULL);
+
+ /* Nothing we can do if we can't find a name to compare it to. */
+ if (bind == STB_LOCAL && (file == NULL || cdp->cd_name == NULL))
+ return (NULL);
+
+ for (cdf = ctf_list_next(&cdp->cd_funcs); cdf != NULL;
+ cdf = ctf_list_next(cdf)) {
+ if (bind == STB_GLOBAL && cdf->cdf_global == B_FALSE)
+ continue;
+ if (bind == STB_LOCAL && cdf->cdf_global == B_TRUE)
+ continue;
+ if (strcmp(name, cdf->cdf_name) != 0)
+ continue;
+ if (bind == STB_LOCAL && strcmp(file, cdp->cd_name) != 0)
+ continue;
+ return (cdf);
+ }
+
+ return (NULL);
+}
+static ctf_dwvar_t *
+ctf_dwarf_match_var(ctf_die_t *cdp, const char *file, const char *name,
+ int bind)
+{
+ ctf_dwvar_t *cdv;
+
+ /* Nothing we can do if we can't find a name to compare it to. */
+ if (bind == STB_LOCAL && (file == NULL || cdp->cd_name == NULL))
+ return (NULL);
+ ctf_dprintf("Still considering %s\n", name);
+
+ for (cdv = ctf_list_next(&cdp->cd_vars); cdv != NULL;
+ cdv = ctf_list_next(cdv)) {
+ if (bind == STB_GLOBAL && cdv->cdv_global == B_FALSE)
+ continue;
+ if (bind == STB_LOCAL && cdv->cdv_global == B_TRUE)
+ continue;
+ if (strcmp(name, cdv->cdv_name) != 0)
+ continue;
+ if (bind == STB_LOCAL && strcmp(file, cdp->cd_name) != 0)
+ continue;
+ return (cdv);
+ }
+
+ return (NULL);
+}
+
+static int
+ctf_dwarf_symtab_iter(ctf_die_t *cdp, ctf_dwarf_symtab_f *func, void *arg)
+{
+ int ret;
+ ulong_t i;
+ ctf_file_t *fp = cdp->cd_ctfp;
+ const char *file = NULL;
+ uintptr_t symbase = (uintptr_t)fp->ctf_symtab.cts_data;
+ uintptr_t strbase = (uintptr_t)fp->ctf_strtab.cts_data;
+
+ for (i = 0; i < fp->ctf_nsyms; i++) {
+ const char *name;
+ int type;
+ GElf_Sym gsym;
+ const GElf_Sym *gsymp;
+
+ if (fp->ctf_symtab.cts_entsize == sizeof (Elf32_Sym)) {
+ const Elf32_Sym *symp = (Elf32_Sym *)symbase + i;
+ type = ELF32_ST_TYPE(symp->st_info);
+ if (type == STT_FILE) {
+ file = (char *)(strbase + symp->st_name);
+ continue;
+ }
+ if (type != STT_OBJECT && type != STT_FUNC)
+ continue;
+ if (ctf_sym_valid(strbase, type, symp->st_shndx,
+ symp->st_value, symp->st_name) == B_FALSE)
+ continue;
+ name = (char *)(strbase + symp->st_name);
+ gsym.st_name = symp->st_name;
+ gsym.st_value = symp->st_value;
+ gsym.st_size = symp->st_size;
+ gsym.st_info = symp->st_info;
+ gsym.st_other = symp->st_other;
+ gsym.st_shndx = symp->st_shndx;
+ gsymp = &gsym;
+ } else {
+ const Elf64_Sym *symp = (Elf64_Sym *)symbase + i;
+ type = ELF64_ST_TYPE(symp->st_info);
+ if (type == STT_FILE) {
+ file = (char *)(strbase + symp->st_name);
+ continue;
+ }
+ if (type != STT_OBJECT && type != STT_FUNC)
+ continue;
+ if (ctf_sym_valid(strbase, type, symp->st_shndx,
+ symp->st_value, symp->st_name) == B_FALSE)
+ continue;
+ name = (char *)(strbase + symp->st_name);
+ gsymp = symp;
+ }
+
+ ret = func(cdp, gsymp, i, file, name, arg);
+ if (ret != 0)
+ return (ret);
+ }
+
+ return (0);
+}
+
+static int
+ctf_dwarf_conv_funcvars_cb(ctf_die_t *cdp, const GElf_Sym *symp, ulong_t idx,
+ const char *file, const char *name, void *arg)
+{
+ int ret, bind, type;
+
+ bind = GELF_ST_BIND(symp->st_info);
+ type = GELF_ST_TYPE(symp->st_info);
+
+ /*
+ * Come back to weak symbols in another pass
+ */
+ if (bind == STB_WEAK)
+ return (0);
+
+ if (type == STT_OBJECT) {
+ ctf_dwvar_t *cdv = ctf_dwarf_match_var(cdp, file, name,
+ bind);
+ ctf_dprintf("match for %s (%d): %p\n", name, idx, cdv);
+ if (cdv == NULL)
+ return (0);
+ ret = ctf_add_object(cdp->cd_ctfp, idx, cdv->cdv_type);
+ ctf_dprintf("added object %s\n", name);
+ } else {
+ ctf_dwfunc_t *cdf = ctf_dwarf_match_func(cdp, file, name,
+ bind);
+ if (cdf == NULL)
+ return (0);
+ ret = ctf_add_function(cdp->cd_ctfp, idx, &cdf->cdf_fip,
+ cdf->cdf_argv);
+ }
+
+ if (ret == CTF_ERR) {
+ return (ctf_errno(cdp->cd_ctfp));
+ }
+
+ return (0);
+}
+
+static int
+ctf_dwarf_conv_funcvars(ctf_die_t *cdp)
+{
+ return (ctf_dwarf_symtab_iter(cdp, ctf_dwarf_conv_funcvars_cb, NULL));
+}
+
+/*
+ * Note, this comment comes from the original version of the CTF tools.
+ *
+ * If we have a weak symbol, attempt to find the strong symbol it will
+ * resolve to. Note: the code where this actually happens is in
+ * sym_process() in cmd/sgs/libld/common/syms.c
+ *
+ * Finding the matching symbol is unfortunately not trivial. For a
+ * symbol to be a candidate, it must:
+ *
+ * - have the same type (function, object)
+ * - have the same value (address)
+ * - have the same size
+ * - not be another weak symbol
+ * - belong to the same section (checked via section index)
+ *
+ * If such a candidate is global, then we assume we've found it. The
+ * linker generates the symbol table such that the curfile might be
+ * incorrect; this is OK for global symbols, since find_iidesc() doesn't
+ * need to check for the source file for the symbol.
+ *
+ * We might have found a strong local symbol, where the curfile is
+ * accurate and matches that of the weak symbol. We assume this is a
+ * reasonable match.
+ *
+ * If we've got a local symbol with a non-matching curfile, there are
+ * two possibilities. Either this is a completely different symbol, or
+ * it's a once-global symbol that was scoped to local via a mapfile. In
+ * the latter case, curfile is likely inaccurate since the linker does
+ * not preserve the needed curfile in the order of the symbol table (see
+ * the comments about locally scoped symbols in libld's update_osym()).
+ * As we can't tell this case from the former one, we use this symbol
+ * iff no other matching symbol is found.
+ *
+ * What we really need here is a SUNW section containing weak<->strong
+ * mappings that we can consume.
+ */
+typedef struct ctf_dwarf_weak_arg {
+ const GElf_Sym *cweak_symp;
+ const char *cweak_file;
+ boolean_t cweak_candidate;
+ ulong_t cweak_idx;
+} ctf_dwarf_weak_arg_t;
+
+static int
+ctf_dwarf_conv_check_weak(ctf_die_t *cdp, const GElf_Sym *symp,
+ ulong_t idx, const char *file, const char *name, void *arg)
+{
+ ctf_dwarf_weak_arg_t *cweak = arg;
+ const GElf_Sym *wsymp = cweak->cweak_symp;
+
+ ctf_dprintf("comparing weak to %s\n", name);
+
+ if (GELF_ST_BIND(symp->st_info) == STB_WEAK) {
+ return (0);
+ }
+
+ if (GELF_ST_TYPE(wsymp->st_info) != GELF_ST_TYPE(symp->st_info)) {
+ return (0);
+ }
+
+ if (wsymp->st_value != symp->st_value) {
+ return (0);
+ }
+
+ if (wsymp->st_size != symp->st_size) {
+ return (0);
+ }
+
+ if (wsymp->st_shndx != symp->st_shndx) {
+ return (0);
+ }
+
+ /*
+ * Check if it's a weak candidate.
+ */
+ if (GELF_ST_BIND(symp->st_info) == STB_LOCAL &&
+ (file == NULL || cweak->cweak_file == NULL ||
+ strcmp(file, cweak->cweak_file) != 0)) {
+ cweak->cweak_candidate = B_TRUE;
+ cweak->cweak_idx = idx;
+ return (0);
+ }
+
+ /*
+ * Found a match, break.
+ */
+ cweak->cweak_idx = idx;
+ return (1);
+}
+
+static int
+ctf_dwarf_duplicate_sym(ctf_die_t *cdp, ulong_t idx, ulong_t matchidx)
+{
+ ctf_id_t id = ctf_lookup_by_symbol(cdp->cd_ctfp, matchidx);
+
+ /*
+ * If we matched something that for some reason didn't have type data,
+ * we don't consider that a fatal error and silently swallow it.
+ */
+ if (id == CTF_ERR) {
+ if (ctf_errno(cdp->cd_ctfp) == ECTF_NOTYPEDAT)
+ return (0);
+ else
+ return (ctf_errno(cdp->cd_ctfp));
+ }
+
+ if (ctf_add_object(cdp->cd_ctfp, idx, id) == CTF_ERR)
+ return (ctf_errno(cdp->cd_ctfp));
+
+ return (0);
+}
+
+static int
+ctf_dwarf_duplicate_func(ctf_die_t *cdp, ulong_t idx, ulong_t matchidx)
+{
+ int ret;
+ ctf_funcinfo_t fip;
+ ctf_id_t *args = NULL;
+
+ if (ctf_func_info(cdp->cd_ctfp, matchidx, &fip) == CTF_ERR) {
+ if (ctf_errno(cdp->cd_ctfp) == ECTF_NOFUNCDAT)
+ return (0);
+ else
+ return (ctf_errno(cdp->cd_ctfp));
+ }
+
+ if (fip.ctc_argc != 0) {
+ args = ctf_alloc(sizeof (ctf_id_t) * fip.ctc_argc);
+ if (args == NULL)
+ return (ENOMEM);
+
+ if (ctf_func_args(cdp->cd_ctfp, matchidx, fip.ctc_argc, args) ==
+ CTF_ERR) {
+ ctf_free(args, sizeof (ctf_id_t) * fip.ctc_argc);
+ return (ctf_errno(cdp->cd_ctfp));
+ }
+ }
+
+ ret = ctf_add_function(cdp->cd_ctfp, idx, &fip, args);
+ if (args != NULL)
+ ctf_free(args, sizeof (ctf_id_t) * fip.ctc_argc);
+ if (ret == CTF_ERR)
+ return (ctf_errno(cdp->cd_ctfp));
+
+ return (0);
+}
+
+static int
+ctf_dwarf_conv_weaks_cb(ctf_die_t *cdp, const GElf_Sym *symp,
+ ulong_t idx, const char *file, const char *name, void *arg)
+{
+ int ret, type;
+ ctf_dwarf_weak_arg_t cweak;
+
+ /*
+ * We only care about weak symbols.
+ */
+ if (GELF_ST_BIND(symp->st_info) != STB_WEAK)
+ return (0);
+
+ type = GELF_ST_TYPE(symp->st_info);
+ ASSERT(type == STT_OBJECT || type == STT_FUNC);
+
+ /*
+ * For each weak symbol we encounter, we need to do a second iteration
+ * to try and find a match. We should probably think about other
+ * techniques to try and save us time in the future.
+ */
+ cweak.cweak_symp = symp;
+ cweak.cweak_file = file;
+ cweak.cweak_candidate = B_FALSE;
+ cweak.cweak_idx = 0;
+
+ ctf_dprintf("Trying to find weak equiv for %s\n", name);
+
+ ret = ctf_dwarf_symtab_iter(cdp, ctf_dwarf_conv_check_weak, &cweak);
+ VERIFY(ret == 0 || ret == 1);
+
+ /*
+ * Nothing was ever found, we're not going to add anything for this
+ * entry.
+ */
+ if (ret == 0 && cweak.cweak_candidate == B_FALSE) {
+ ctf_dprintf("found no weak match for %s\n", name);
+ return (0);
+ }
+
+ /*
+ * Now, finally go and add the type based on the match.
+ */
+ if (type == STT_OBJECT) {
+ ret = ctf_dwarf_duplicate_sym(cdp, idx, cweak.cweak_idx);
+ } else {
+ ret = ctf_dwarf_duplicate_func(cdp, idx, cweak.cweak_idx);
+ }
+
+ return (ret);
+}
+
+static int
+ctf_dwarf_conv_weaks(ctf_die_t *cdp)
+{
+ return (ctf_dwarf_symtab_iter(cdp, ctf_dwarf_conv_weaks_cb, NULL));
+}
+
+/* ARGSUSED */
+static int
+ctf_dwarf_convert_one(void *arg, void *unused)
+{
+ int ret;
+ ctf_file_t *dedup;
+ ctf_die_t *cdp = arg;
+
+ ctf_dprintf("converting die: %s\n", cdp->cd_name);
+ ctf_dprintf("max offset: %x\n", cdp->cd_maxoff);
+ VERIFY(cdp != NULL);
+
+ ret = ctf_dwarf_convert_die(cdp, cdp->cd_cu);
+ ctf_dprintf("ctf_dwarf_convert_die (%s) returned %d\n", cdp->cd_name,
+ ret);
+ if (ret != 0) {
+ return (ret);
+ }
+ if (ctf_update(cdp->cd_ctfp) != 0) {
+ return (ctf_dwarf_error(cdp, cdp->cd_ctfp, 0,
+ "failed to update output ctf container"));
+ }
+
+ ret = ctf_dwarf_fixup_die(cdp, B_FALSE);
+ ctf_dprintf("ctf_dwarf_fixup_die (%s) returned %d\n", cdp->cd_name,
+ ret);
+ if (ret != 0) {
+ return (ret);
+ }
+ if (ctf_update(cdp->cd_ctfp) != 0) {
+ return (ctf_dwarf_error(cdp, cdp->cd_ctfp, 0,
+ "failed to update output ctf container"));
+ }
+
+ ret = ctf_dwarf_fixup_die(cdp, B_TRUE);
+ ctf_dprintf("ctf_dwarf_fixup_die (%s) returned %d\n", cdp->cd_name,
+ ret);
+ if (ret != 0) {
+ return (ret);
+ }
+ if (ctf_update(cdp->cd_ctfp) != 0) {
+ return (ctf_dwarf_error(cdp, cdp->cd_ctfp, 0,
+ "failed to update output ctf container"));
+ }
+
+
+ if ((ret = ctf_dwarf_conv_funcvars(cdp)) != 0) {
+ return (ctf_dwarf_error(cdp, NULL, ret,
+ "failed to convert strong functions and variables"));
+ }
+
+ if (ctf_update(cdp->cd_ctfp) != 0) {
+ return (ctf_dwarf_error(cdp, cdp->cd_ctfp, 0,
+ "failed to update output ctf container"));
+ }
+
+ if (cdp->cd_doweaks == B_TRUE) {
+ if ((ret = ctf_dwarf_conv_weaks(cdp)) != 0) {
+ return (ctf_dwarf_error(cdp, NULL, ret,
+ "failed to convert weak functions and variables"));
+ }
+
+ if (ctf_update(cdp->cd_ctfp) != 0) {
+ return (ctf_dwarf_error(cdp, cdp->cd_ctfp, 0,
+ "failed to update output ctf container"));
+ }
+ }
+
+ ctf_phase_dump(cdp->cd_ctfp, "pre-dedup");
+ ctf_dprintf("adding inputs for dedup\n");
+ if ((ret = ctf_merge_add(cdp->cd_cmh, cdp->cd_ctfp)) != 0) {
+ return (ctf_dwarf_error(cdp, NULL, ret,
+ "failed to add inputs for merge"));
+ }
+
+ ctf_dprintf("starting merge\n");
+ if ((ret = ctf_merge_dedup(cdp->cd_cmh, &dedup)) != 0) {
+ return (ctf_dwarf_error(cdp, NULL, ret,
+ "failed to deduplicate die"));
+ }
+ ctf_close(cdp->cd_ctfp);
+ cdp->cd_ctfp = dedup;
+
+ return (0);
+}
+
+/*
+ * Note, we expect that if we're returning a ctf_file_t from one of the dies,
+ * say in the single node case, it's been saved and the entry here has been set
+ * to NULL, which ctf_close happily ignores.
+ */
+static void
+ctf_dwarf_free_die(ctf_die_t *cdp)
+{
+ ctf_dwfunc_t *cdf, *ndf;
+ ctf_dwvar_t *cdv, *ndv;
+ ctf_dwbitf_t *cdb, *ndb;
+ ctf_dwmap_t *map;
+ void *cookie;
+ Dwarf_Error derr;
+
+ ctf_dprintf("Beginning to free die: %p\n", cdp);
+ cdp->cd_elf = NULL;
+ ctf_dprintf("Trying to free name: %p\n", cdp->cd_name);
+ if (cdp->cd_name != NULL)
+ ctf_free(cdp->cd_name, strlen(cdp->cd_name) + 1);
+ ctf_dprintf("Trying to free merge handle: %p\n", cdp->cd_cmh);
+ if (cdp->cd_cmh != NULL) {
+ ctf_merge_fini(cdp->cd_cmh);
+ cdp->cd_cmh = NULL;
+ }
+
+ ctf_dprintf("Trying to free functions\n");
+ for (cdf = ctf_list_next(&cdp->cd_funcs); cdf != NULL; cdf = ndf) {
+ ndf = ctf_list_next(cdf);
+ ctf_free(cdf->cdf_name, strlen(cdf->cdf_name) + 1);
+ if (cdf->cdf_fip.ctc_argc != 0) {
+ ctf_free(cdf->cdf_argv,
+ sizeof (ctf_id_t) * cdf->cdf_fip.ctc_argc);
+ }
+ ctf_free(cdf, sizeof (ctf_dwfunc_t));
+ }
+
+ ctf_dprintf("Trying to free variables\n");
+ for (cdv = ctf_list_next(&cdp->cd_vars); cdv != NULL; cdv = ndv) {
+ ndv = ctf_list_next(cdv);
+ ctf_free(cdv->cdv_name, strlen(cdv->cdv_name) + 1);
+ ctf_free(cdv, sizeof (ctf_dwvar_t));
+ }
+
+ ctf_dprintf("Trying to free bitfields\n");
+ for (cdb = ctf_list_next(&cdp->cd_bitfields); cdb != NULL; cdb = ndb) {
+ ndb = ctf_list_next(cdb);
+ ctf_free(cdb, sizeof (ctf_dwbitf_t));
+ }
+
+ /* How do we clean up die usage? */
+ ctf_dprintf("Trying to clean up dwarf_t: %p\n", cdp->cd_dwarf);
+ (void) dwarf_finish(cdp->cd_dwarf, &derr);
+ cdp->cd_dwarf = NULL;
+ ctf_close(cdp->cd_ctfp);
+
+ cookie = NULL;
+ while ((map = avl_destroy_nodes(&cdp->cd_map, &cookie)) != NULL) {
+ ctf_free(map, sizeof (ctf_dwmap_t));
+ }
+ avl_destroy(&cdp->cd_map);
+ cdp->cd_errbuf = NULL;
+}
+
+static void
+ctf_dwarf_free_dies(ctf_die_t *cdies, int ndies)
+{
+ int i;
+
+ ctf_dprintf("Beginning to free dies\n");
+ for (i = 0; i < ndies; i++) {
+ ctf_dwarf_free_die(&cdies[i]);
+ }
+
+ ctf_free(cdies, sizeof (ctf_die_t) * ndies);
+}
+
+static int
+ctf_dwarf_count_dies(Dwarf_Debug dw, Dwarf_Error *derr, int *ndies,
+ char *errbuf, size_t errlen)
+{
+ int ret;
+ Dwarf_Half vers;
+ Dwarf_Unsigned nexthdr;
+
+ while ((ret = dwarf_next_cu_header(dw, NULL, &vers, NULL, NULL,
+ &nexthdr, derr)) != DW_DLV_NO_ENTRY) {
+ if (ret != DW_DLV_OK) {
+ (void) snprintf(errbuf, errlen,
+ "file does not contain valid DWARF data: %s\n",
+ dwarf_errmsg(*derr));
+ return (ECTF_CONVBKERR);
+ }
+
+ if (vers != DWARF_VERSION_TWO) {
+ (void) snprintf(errbuf, errlen,
+ "unsupported DWARF version: %d\n", vers);
+ return (ECTF_CONVBKERR);
+ }
+ *ndies = *ndies + 1;
+ }
+
+ if (*ndies == 0) {
+ (void) snprintf(errbuf, errlen,
+ "file does not contain valid DWARF data: %s\n",
+ dwarf_errmsg(*derr));
+ return (ECTF_CONVBKERR);
+ }
+
+ return (0);
+}
+
+/*
+ * Iterate over all of the dies and create a ctf_die_t for each of them. This is
+ * used to determine if we have zero, one, or multiple dies to convert. If we
+ * have zero, that's an error. If there's only one die, that's the simple case.
+ * No merge needed and only a single Dwarf_Debug as well.
+ */
+static int
+ctf_dwarf_init_die(int fd, Elf *elf, ctf_die_t *cdp, int ndie, char *errbuf,
+ size_t errlen)
+{
+ int ret;
+ Dwarf_Unsigned hdrlen, abboff, nexthdr;
+ Dwarf_Half addrsz;
+ Dwarf_Unsigned offset = 0;
+ Dwarf_Error derr;
+
+ while ((ret = dwarf_next_cu_header(cdp->cd_dwarf, &hdrlen, NULL,
+ &abboff, &addrsz, &nexthdr, &derr)) != DW_DLV_NO_ENTRY) {
+ char *name;
+ Dwarf_Die cu, child;
+
+ /* Based on the counting above, we should be good to go */
+ VERIFY(ret == DW_DLV_OK);
+ if (ndie > 0) {
+ ndie--;
+ offset = nexthdr;
+ continue;
+ }
+
+ /*
+ * Compilers are apparently inconsistent. Some emit no DWARF for
+ * empty files and others emit empty compilation unit.
+ */
+ cdp->cd_voidtid = CTF_ERR;
+ cdp->cd_longtid = CTF_ERR;
+ cdp->cd_elf = elf;
+ cdp->cd_maxoff = nexthdr - 1;
+ cdp->cd_ctfp = ctf_fdcreate(fd, &ret);
+ if (cdp->cd_ctfp == NULL) {
+ ctf_free(cdp, sizeof (ctf_die_t));
+ return (ret);
+ }
+ avl_create(&cdp->cd_map, ctf_dwmap_comp, sizeof (ctf_dwmap_t),
+ offsetof(ctf_dwmap_t, cdm_avl));
+ cdp->cd_errbuf = errbuf;
+ cdp->cd_errlen = errlen;
+ bzero(&cdp->cd_vars, sizeof (ctf_list_t));
+ bzero(&cdp->cd_funcs, sizeof (ctf_list_t));
+ bzero(&cdp->cd_bitfields, sizeof (ctf_list_t));
+
+ if ((ret = ctf_dwarf_die_elfenc(elf, cdp, errbuf,
+ errlen)) != 0) {
+ avl_destroy(&cdp->cd_map);
+ ctf_free(cdp, sizeof (ctf_die_t));
+ return (ret);
+ }
+
+ if ((ret = ctf_dwarf_sib(cdp, NULL, &cu)) != 0) {
+ avl_destroy(&cdp->cd_map);
+ ctf_free(cdp, sizeof (ctf_die_t));
+ return (ret);
+ }
+ if (cu == NULL) {
+ (void) snprintf(errbuf, errlen,
+ "file does not contain DWARF data\n");
+ avl_destroy(&cdp->cd_map);
+ ctf_free(cdp, sizeof (ctf_die_t));
+ return (ECTF_CONVBKERR);
+ }
+
+ if ((ret = ctf_dwarf_child(cdp, cu, &child)) != 0) {
+ avl_destroy(&cdp->cd_map);
+ ctf_free(cdp, sizeof (ctf_die_t));
+ return (ret);
+ }
+ if (child == NULL) {
+ (void) snprintf(errbuf, errlen,
+ "file does not contain DWARF data\n");
+ avl_destroy(&cdp->cd_map);
+ ctf_free(cdp, sizeof (ctf_die_t));
+ return (ECTF_CONVBKERR);
+ }
+
+ cdp->cd_cuoff = offset;
+ cdp->cd_cu = child;
+
+ if ((cdp->cd_cmh = ctf_merge_init(fd, &ret)) == NULL) {
+ avl_destroy(&cdp->cd_map);
+ ctf_free(cdp, sizeof (ctf_die_t));
+ return (ret);
+ }
+
+ if (ctf_dwarf_string(cdp, cu, DW_AT_name, &name) == 0) {
+ size_t len = strlen(name) + 1;
+ char *b = basename(name);
+ cdp->cd_name = strdup(b);
+ ctf_free(name, len);
+ }
+ break;
+ }
+
+ return (0);
+}
+
+
+ctf_conv_status_t
+ctf_dwarf_convert(int fd, Elf *elf, uint_t nthrs, int *errp, ctf_file_t **fpp,
+ char *errmsg, size_t errlen)
+{
+ int err, ret, ndies, i;
+ Dwarf_Debug dw;
+ Dwarf_Error derr;
+ ctf_die_t *cdies = NULL, *cdp;
+ workq_t *wqp = NULL;
+
+ if (errp == NULL)
+ errp = &err;
+ *errp = 0;
+ *fpp = NULL;
+
+ ret = dwarf_elf_init(elf, DW_DLC_READ, NULL, NULL, &dw, &derr);
+ if (ret != DW_DLV_OK) {
+ /*
+ * The old CTF tools used to check if we expected DWARF data
+ * here. In this case, if we actually have some amount of DWARF,
+ * but no section, for now, just go ahead and create an empty
+ * CTF file.
+ */
+ if (ret == DW_DLV_NO_ENTRY ||
+ dwarf_errno(derr) == DW_DLE_DEBUG_INFO_NULL) {
+ *fpp = ctf_create(errp);
+ return (*fpp != NULL ? CTF_CONV_SUCCESS :
+ CTF_CONV_ERROR);
+ }
+ (void) snprintf(errmsg, errlen,
+ "failed to initialize DWARF: %s\n",
+ dwarf_errmsg(derr));
+ *errp = ECTF_CONVBKERR;
+ return (CTF_CONV_ERROR);
+ }
+
+ ndies = 0;
+ ret = ctf_dwarf_count_dies(dw, &derr, &ndies, errmsg, errlen);
+ if (ret != 0) {
+ *errp = ret;
+ goto out;
+ }
+
+ (void) dwarf_finish(dw, &derr);
+ cdies = ctf_alloc(sizeof (ctf_die_t) * ndies);
+ if (cdies == NULL) {
+ *errp = ENOMEM;
+ return (CTF_CONV_ERROR);
+ }
+
+ for (i = 0; i < ndies; i++) {
+ cdp = &cdies[i];
+ ret = dwarf_elf_init(elf, DW_DLC_READ, NULL, NULL,
+ &cdp->cd_dwarf, &derr);
+ if (ret != 0) {
+ ctf_free(cdies, sizeof (ctf_die_t) * ndies);
+ (void) snprintf(errmsg, errlen,
+ "failed to initialize DWARF: %s\n",
+ dwarf_errmsg(derr));
+ *errp = ECTF_CONVBKERR;
+ return (CTF_CONV_ERROR);
+ }
+
+ ret = ctf_dwarf_init_die(fd, elf, &cdies[i], i, errmsg, errlen);
+ if (ret != 0) {
+ *errp = ret;
+ goto out;
+ }
+ cdp->cd_doweaks = ndies > 1 ? B_FALSE : B_TRUE;
+ }
+
+ ctf_dprintf("found %d DWARF die(s)\n", ndies);
+
+ /*
+ * If we only have one die, there's no reason to use multiple threads,
+ * even if the user requested them. After all, they just gave us an
+ * upper bound.
+ */
+ if (ndies == 1)
+ nthrs = 1;
+
+ if (workq_init(&wqp, nthrs) == -1) {
+ *errp = errno;
+ goto out;
+ }
+
+ for (i = 0; i < ndies; i++) {
+ cdp = &cdies[i];
+ ctf_dprintf("adding die %s: %p, %x %x\n", cdp->cd_name,
+ cdp->cd_cu, cdp->cd_cuoff, cdp->cd_maxoff);
+ if (workq_add(wqp, cdp) == -1) {
+ *errp = errno;
+ goto out;
+ }
+ }
+
+ ret = workq_work(wqp, ctf_dwarf_convert_one, NULL, errp);
+ if (ret == WORKQ_ERROR) {
+ *errp = errno;
+ goto out;
+ } else if (ret == WORKQ_UERROR) {
+ ctf_dprintf("internal convert failed: %s\n",
+ ctf_errmsg(*errp));
+ goto out;
+ }
+
+ ctf_dprintf("Determining next phase: have %d dies\n", ndies);
+ if (ndies != 1) {
+ ctf_merge_t *cmp;
+
+ cmp = ctf_merge_init(fd, &ret);
+ if (cmp == NULL) {
+ *errp = ret;
+ goto out;
+ }
+
+ ctf_dprintf("setting threads\n");
+ if ((ret = ctf_merge_set_nthreads(cmp, nthrs)) != 0) {
+ ctf_merge_fini(cmp);
+ *errp = ret;
+ goto out;
+ }
+
+ ctf_dprintf("adding dies\n");
+ for (i = 0; i < ndies; i++) {
+ cdp = &cdies[i];
+ if ((ret = ctf_merge_add(cmp, cdp->cd_ctfp)) != 0) {
+ ctf_merge_fini(cmp);
+ *errp = ret;
+ goto out;
+ }
+ }
+
+ ctf_dprintf("performing merge\n");
+ ret = ctf_merge_merge(cmp, fpp);
+ if (ret != 0) {
+ ctf_dprintf("failed merge!\n");
+ *fpp = NULL;
+ ctf_merge_fini(cmp);
+ *errp = ret;
+ goto out;
+ }
+ ctf_merge_fini(cmp);
+ *errp = 0;
+ ctf_dprintf("successfully converted!\n");
+ } else {
+ *errp = 0;
+ *fpp = cdies->cd_ctfp;
+ cdies->cd_ctfp = NULL;
+ ctf_dprintf("successfully converted!\n");
+ }
+
+out:
+ workq_fini(wqp);
+ ctf_dwarf_free_dies(cdies, ndies);
+ return (*fpp != NULL ? CTF_CONV_SUCCESS : CTF_CONV_ERROR);
+}
diff --git a/usr/src/lib/libctf/common/ctf_elfwrite.c b/usr/src/lib/libctf/common/ctf_elfwrite.c
new file mode 100644
index 0000000000..4d7c10aeec
--- /dev/null
+++ b/usr/src/lib/libctf/common/ctf_elfwrite.c
@@ -0,0 +1,422 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2015, Joyent, Inc.
+ */
+
+/*
+ * Routines for writing ctf data to elf files, originally from the ctf tools.
+ */
+
+#include <libctf_impl.h>
+#include <libctf.h>
+#include <gelf.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <libelf.h>
+
+static int
+ctf_write_elf(ctf_file_t *fp, Elf *src, Elf *dst, int flags)
+{
+ GElf_Ehdr sehdr, dehdr;
+ Elf_Scn *sscn, *dscn;
+ Elf_Data *sdata, *ddata;
+ GElf_Shdr shdr;
+ int symtab_idx = -1;
+ off_t new_offset = 0;
+ off_t ctfnameoff = 0;
+ int compress = (flags & CTF_ELFWRITE_F_COMPRESS);
+ int *secxlate = NULL;
+ int srcidx, dstidx, pad, i;
+ int curnmoff = 0;
+ int changing = 0;
+ int ret;
+ size_t nshdr, nphdr, strndx;
+ void *strdatabuf = NULL, *symdatabuf = NULL;
+ size_t strdatasz = 0, symdatasz = 0;
+
+ void *cdata = NULL;
+ size_t elfsize, asize;
+
+ if ((flags & ~(CTF_ELFWRITE_F_COMPRESS)) != 0) {
+ ret = ctf_set_errno(fp, EINVAL);
+ goto out;
+ }
+
+ if (gelf_newehdr(dst, gelf_getclass(src)) == NULL) {
+ ret = ctf_set_errno(fp, ECTF_ELF);
+ goto out;
+ }
+ if (gelf_getehdr(src, &sehdr) == NULL) {
+ ret = ctf_set_errno(fp, ECTF_ELF);
+ goto out;
+ }
+ (void) memcpy(&dehdr, &sehdr, sizeof (GElf_Ehdr));
+ if (gelf_update_ehdr(dst, &dehdr) == 0) {
+ ret = ctf_set_errno(fp, ECTF_ELF);
+ goto out;
+ }
+
+ /*
+ * Use libelf to get the number of sections and the string section to
+ * deal with ELF files that may have a large number of sections. We just
+ * always use this to make our live easier.
+ */
+ if (elf_getphdrnum(src, &nphdr) != 0) {
+ ret = ctf_set_errno(fp, ECTF_ELF);
+ goto out;
+ }
+ if (elf_getshdrnum(src, &nshdr) != 0) {
+ ret = ctf_set_errno(fp, ECTF_ELF);
+ goto out;
+ }
+ if (elf_getshdrstrndx(src, &strndx) != 0) {
+ ret = ctf_set_errno(fp, ECTF_ELF);
+ goto out;
+ }
+
+ /*
+ * Neither the existing debug sections nor the SUNW_ctf sections (new or
+ * existing) are SHF_ALLOC'd, so they won't be in areas referenced by
+ * program headers. As such, we can just blindly copy the program
+ * headers from the existing file to the new file.
+ */
+ if (nphdr != 0) {
+ (void) elf_flagelf(dst, ELF_C_SET, ELF_F_LAYOUT);
+ if (gelf_newphdr(dst, nphdr) == NULL) {
+ ret = ctf_set_errno(fp, ECTF_ELF);
+ goto out;
+ }
+
+ for (i = 0; i < nphdr; i++) {
+ GElf_Phdr phdr;
+
+ if (gelf_getphdr(src, i, &phdr) == NULL) {
+ ret = ctf_set_errno(fp, ECTF_ELF);
+ goto out;
+ }
+ if (gelf_update_phdr(dst, i, &phdr) == 0) {
+ ret = ctf_set_errno(fp, ECTF_ELF);
+ goto out;
+ }
+ }
+ }
+
+ secxlate = ctf_alloc(sizeof (int) * nshdr);
+ for (srcidx = dstidx = 0; srcidx < nshdr; srcidx++) {
+ Elf_Scn *scn = elf_getscn(src, srcidx);
+ GElf_Shdr shdr;
+ char *sname;
+
+ if (gelf_getshdr(scn, &shdr) == NULL) {
+ ret = ctf_set_errno(fp, ECTF_ELF);
+ goto out;
+ }
+ sname = elf_strptr(src, strndx, shdr.sh_name);
+ if (sname == NULL) {
+ ret = ctf_set_errno(fp, ECTF_ELF);
+ goto out;
+ }
+
+ if (strcmp(sname, CTF_ELF_SCN_NAME) == 0) {
+ secxlate[srcidx] = -1;
+ } else {
+ secxlate[srcidx] = dstidx++;
+ curnmoff += strlen(sname) + 1;
+ }
+
+ new_offset = (off_t)dehdr.e_phoff;
+ }
+
+ for (srcidx = 1; srcidx < nshdr; srcidx++) {
+ char *sname;
+
+ sscn = elf_getscn(src, srcidx);
+ if (gelf_getshdr(sscn, &shdr) == NULL) {
+ ret = ctf_set_errno(fp, ECTF_ELF);
+ goto out;
+ }
+
+ if (secxlate[srcidx] == -1) {
+ changing = 1;
+ continue;
+ }
+
+ dscn = elf_newscn(dst);
+ if (dscn == NULL) {
+ ret = ctf_set_errno(fp, ECTF_ELF);
+ goto out;
+ }
+
+ /*
+ * If this file has program headers, we need to explicitly lay
+ * out sections. If none of the sections prior to this one have
+ * been removed, then we can just use the existing location. If
+ * one or more sections have been changed, then we need to
+ * adjust this one to avoid holes.
+ */
+ if (changing && nphdr != 0) {
+ pad = new_offset % shdr.sh_addralign;
+
+ if (pad != 0)
+ new_offset += shdr.sh_addralign - pad;
+ shdr.sh_offset = new_offset;
+ }
+
+ shdr.sh_link = secxlate[shdr.sh_link];
+
+ if (shdr.sh_type == SHT_REL || shdr.sh_type == SHT_RELA)
+ shdr.sh_info = secxlate[shdr.sh_info];
+
+ sname = elf_strptr(src, strndx, shdr.sh_name);
+ if (sname == NULL) {
+ ret = ctf_set_errno(fp, ECTF_ELF);
+ goto out;
+ }
+ if ((sdata = elf_getdata(sscn, NULL)) == NULL) {
+ ret = ctf_set_errno(fp, ECTF_ELF);
+ goto out;
+ }
+ if ((ddata = elf_newdata(dscn)) == NULL) {
+ ret = ctf_set_errno(fp, ECTF_ELF);
+ goto out;
+ }
+ bcopy(sdata, ddata, sizeof (Elf_Data));
+
+ if (srcidx == strndx) {
+ char seclen = strlen(CTF_ELF_SCN_NAME);
+
+ strdatasz = ddata->d_size + shdr.sh_size +
+ seclen + 1;
+ ddata->d_buf = strdatabuf = ctf_alloc(strdatasz);
+ if (ddata->d_buf == NULL) {
+ ret = ctf_set_errno(fp, ECTF_ELF);
+ goto out;
+ }
+ bcopy(sdata->d_buf, ddata->d_buf, shdr.sh_size);
+ (void) strcpy((caddr_t)ddata->d_buf + shdr.sh_size,
+ CTF_ELF_SCN_NAME);
+ ctfnameoff = (off_t)shdr.sh_size;
+ shdr.sh_size += seclen + 1;
+ ddata->d_size += seclen + 1;
+
+ if (nphdr != 0)
+ changing = 1;
+ }
+
+ if (shdr.sh_type == SHT_SYMTAB && shdr.sh_entsize != 0) {
+ int nsym = shdr.sh_size / shdr.sh_entsize;
+
+ symtab_idx = secxlate[srcidx];
+
+ symdatasz = shdr.sh_size;
+ ddata->d_buf = symdatabuf = ctf_alloc(symdatasz);
+ if (ddata->d_buf == NULL) {
+ ret = ctf_set_errno(fp, ECTF_ELF);
+ goto out;
+ }
+ (void) bcopy(sdata->d_buf, ddata->d_buf, shdr.sh_size);
+
+ for (i = 0; i < nsym; i++) {
+ GElf_Sym sym;
+ short newscn;
+
+ (void) gelf_getsym(ddata, i, &sym);
+
+ if (sym.st_shndx >= SHN_LORESERVE)
+ continue;
+
+ if ((newscn = secxlate[sym.st_shndx]) !=
+ sym.st_shndx) {
+ sym.st_shndx =
+ (newscn == -1 ? 1 : newscn);
+
+ if (gelf_update_sym(ddata, i, &sym) ==
+ 0) {
+ ret = ctf_set_errno(fp,
+ ECTF_ELF);
+ goto out;
+ }
+ }
+ }
+ }
+
+ if (gelf_update_shdr(dscn, &shdr) == NULL) {
+ ret = ctf_set_errno(fp, ECTF_ELF);
+ goto out;
+ }
+
+ new_offset = (off_t)shdr.sh_offset;
+ if (shdr.sh_type != SHT_NOBITS)
+ new_offset += shdr.sh_size;
+ }
+
+ if (symtab_idx == -1) {
+ ret = ctf_set_errno(fp, ECTF_ELF);
+ goto out;
+ }
+
+ /* Add the ctf section */
+ if ((dscn = elf_newscn(dst)) == NULL) {
+ ret = ctf_set_errno(fp, ECTF_ELF);
+ goto out;
+ }
+ if (gelf_getshdr(dscn, &shdr) == NULL) {
+ ret = ctf_set_errno(fp, ECTF_ELF);
+ goto out;
+ }
+ shdr.sh_name = ctfnameoff;
+ shdr.sh_type = SHT_PROGBITS;
+ shdr.sh_size = fp->ctf_size;
+ shdr.sh_link = symtab_idx;
+ shdr.sh_addralign = 4;
+ if (changing && nphdr != 0) {
+ pad = new_offset % shdr.sh_addralign;
+
+ if (pad)
+ new_offset += shdr.sh_addralign - pad;
+
+ shdr.sh_offset = new_offset;
+ new_offset += shdr.sh_size;
+ }
+
+ if ((ddata = elf_newdata(dscn)) == NULL) {
+ ret = ctf_set_errno(fp, ECTF_ELF);
+ goto out;
+ }
+
+ if (compress != 0) {
+ int err;
+
+ if (ctf_zopen(&err) == NULL) {
+ ret = ctf_set_errno(fp, err);
+ goto out;
+ }
+
+ if ((err = ctf_compress(fp, &cdata, &asize, &elfsize)) != 0) {
+ ret = ctf_set_errno(fp, err);
+ goto out;
+ }
+ ddata->d_buf = cdata;
+ ddata->d_size = elfsize;
+ } else {
+ ddata->d_buf = (void *)fp->ctf_base;
+ ddata->d_size = fp->ctf_size;
+ }
+ ddata->d_align = shdr.sh_addralign;
+
+ if (gelf_update_shdr(dscn, &shdr) == 0) {
+ ret = ctf_set_errno(fp, ECTF_ELF);
+ goto out;
+ }
+
+ /* update the section header location */
+ if (nphdr != 0) {
+ size_t align = gelf_fsize(dst, ELF_T_ADDR, 1, EV_CURRENT);
+ size_t r = new_offset % align;
+
+ if (r)
+ new_offset += align - r;
+
+ dehdr.e_shoff = new_offset;
+ }
+
+ /* commit to disk */
+ if (sehdr.e_shstrndx == SHN_XINDEX)
+ dehdr.e_shstrndx = SHN_XINDEX;
+ else
+ dehdr.e_shstrndx = secxlate[sehdr.e_shstrndx];
+ if (gelf_update_ehdr(dst, &dehdr) == NULL) {
+ ret = ctf_set_errno(fp, ECTF_ELF);
+ goto out;
+ }
+ if (elf_update(dst, ELF_C_WRITE) < 0) {
+ ret = ctf_set_errno(fp, ECTF_ELF);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (strdatabuf != NULL)
+ ctf_free(strdatabuf, strdatasz);
+ if (symdatabuf != NULL)
+ ctf_free(symdatabuf, symdatasz);
+ if (cdata != NULL)
+ ctf_data_free(cdata, fp->ctf_size);
+ if (secxlate != NULL)
+ ctf_free(secxlate, sizeof (int) * nshdr);
+
+ return (ret);
+}
+
+int
+ctf_elffdwrite(ctf_file_t *fp, int ifd, int ofd, int flags)
+{
+ int ret;
+ Elf *ielf, *oelf;
+
+ (void) elf_version(EV_CURRENT);
+ if ((ielf = elf_begin(ifd, ELF_C_READ, NULL)) == NULL)
+ return (ctf_set_errno(fp, ECTF_ELF));
+
+ if ((oelf = elf_begin(ofd, ELF_C_WRITE, NULL)) == NULL)
+ return (ctf_set_errno(fp, ECTF_ELF));
+
+ ret = ctf_write_elf(fp, ielf, oelf, flags);
+
+ (void) elf_end(ielf);
+ (void) elf_end(oelf);
+
+ return (ret);
+}
+
+int
+ctf_elfwrite(ctf_file_t *fp, const char *input, const char *output, int flags)
+{
+ struct stat st;
+ int ifd, ofd, ret;
+
+ if ((ifd = open(input, O_RDONLY)) < 0)
+ return (ctf_set_errno(fp, errno));
+
+ if (fstat(ifd, &st) < 0)
+ return (ctf_set_errno(fp, errno));
+
+ if ((ofd = open(output, O_RDWR | O_CREAT | O_TRUNC, st.st_mode)) < 0)
+ return (ctf_set_errno(fp, errno));
+
+ ret = ctf_elffdwrite(fp, ifd, ofd, flags);
+
+ if (close(ifd) != 0 && ret != 0)
+ ret = ctf_set_errno(fp, errno);
+ if (close(ofd) != 0 && ret != 0)
+ ret = ctf_set_errno(fp, errno);
+
+ return (ret);
+}
diff --git a/usr/src/lib/libctf/common/ctf_lib.c b/usr/src/lib/libctf/common/ctf_lib.c
index e71ebc6d9d..6b637ba663 100644
--- a/usr/src/lib/libctf/common/ctf_lib.c
+++ b/usr/src/lib/libctf/common/ctf_lib.c
@@ -23,6 +23,9 @@
* Copyright 2003 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2015, Joyent, Inc.
+ */
#include <sys/types.h>
#include <sys/stat.h>
@@ -33,6 +36,9 @@
#include <errno.h>
#include <dlfcn.h>
#include <gelf.h>
+#include <zlib.h>
+#include <zone.h>
+#include <sys/debug.h>
#ifdef _LP64
static const char *_libctf_zlib = "/usr/lib/64/libz.so.1";
@@ -42,6 +48,9 @@ static const char *_libctf_zlib = "/usr/lib/libz.so.1";
static struct {
int (*z_uncompress)(uchar_t *, ulong_t *, const uchar_t *, ulong_t);
+ int (*z_initcomp)(z_stream *, int, const char *, int);
+ int (*z_compress)(z_stream *, int);
+ int (*z_finicomp)(z_stream *);
const char *(*z_error)(int);
void *z_dlp;
} zlib;
@@ -49,6 +58,18 @@ static struct {
static size_t _PAGESIZE;
static size_t _PAGEMASK;
+static uint64_t ctf_phase = 0;
+
+#define CTF_COMPRESS_CHUNK (64*1024)
+
+typedef struct ctf_zdata {
+ void *czd_buf;
+ void *czd_next;
+ ctf_file_t *czd_ctfp;
+ size_t czd_allocsz;
+ z_stream czd_zstr;
+} ctf_zdata_t;
+
#pragma init(_libctf_init)
void
_libctf_init(void)
@@ -72,21 +93,47 @@ _libctf_init(void)
void *
ctf_zopen(int *errp)
{
- ctf_dprintf("decompressing CTF data using %s\n", _libctf_zlib);
+ char buf[MAXPATHLEN];
+ const char *path = _libctf_zlib, *zroot;
if (zlib.z_dlp != NULL)
return (zlib.z_dlp); /* library is already loaded */
- if (access(_libctf_zlib, R_OK) == -1)
+ /*
+ * Get the zone native root. For the tools build, we don't need
+ * this (it seems fair to impose that we always build the system in
+ * a native zone), and we want to allow build machines that are older
+ * that the notion of the native root, so we only actually make this
+ * call if we're not the tools build.
+ */
+#ifndef CTF_TOOLS_BUILD
+ zroot = zone_get_nroot();
+#else
+ zroot = NULL;
+#endif
+
+ if (zroot != NULL) {
+ (void) snprintf(buf, MAXPATHLEN, "%s/%s", zroot, _libctf_zlib);
+ path = buf;
+ }
+
+ ctf_dprintf("decompressing CTF data using %s\n", path);
+
+ if (access(path, R_OK) == -1)
return (ctf_set_open_errno(errp, ECTF_ZMISSING));
- if ((zlib.z_dlp = dlopen(_libctf_zlib, RTLD_LAZY | RTLD_LOCAL)) == NULL)
+ if ((zlib.z_dlp = dlopen(path, RTLD_LAZY | RTLD_LOCAL)) == NULL)
return (ctf_set_open_errno(errp, ECTF_ZINIT));
zlib.z_uncompress = (int (*)()) dlsym(zlib.z_dlp, "uncompress");
+ zlib.z_initcomp = (int (*)()) dlsym(zlib.z_dlp, "deflateInit_");
+ zlib.z_compress = (int (*)()) dlsym(zlib.z_dlp, "deflate");
+ zlib.z_finicomp = (int (*)()) dlsym(zlib.z_dlp, "deflateEnd");
zlib.z_error = (const char *(*)()) dlsym(zlib.z_dlp, "zError");
- if (zlib.z_uncompress == NULL || zlib.z_error == NULL) {
+ if (zlib.z_uncompress == NULL || zlib.z_error == NULL ||
+ zlib.z_initcomp == NULL|| zlib.z_compress == NULL ||
+ zlib.z_finicomp == NULL) {
(void) dlclose(zlib.z_dlp);
bzero(&zlib, sizeof (zlib));
return (ctf_set_open_errno(errp, ECTF_ZINIT));
@@ -111,6 +158,207 @@ z_strerror(int err)
return (zlib.z_error(err));
}
+static int
+ctf_zdata_init(ctf_zdata_t *czd, ctf_file_t *fp)
+{
+ int err;
+ ctf_header_t *cthp;
+
+ bzero(czd, sizeof (ctf_zdata_t));
+
+ czd->czd_allocsz = fp->ctf_size;
+ czd->czd_buf = ctf_data_alloc(czd->czd_allocsz);
+ if (czd->czd_buf == MAP_FAILED)
+ return (ctf_set_errno(fp, ENOMEM));
+
+ bcopy(fp->ctf_base, czd->czd_buf, sizeof (ctf_header_t));
+ czd->czd_ctfp = fp;
+ cthp = czd->czd_buf;
+ cthp->cth_flags |= CTF_F_COMPRESS;
+ czd->czd_next = (void *)((uintptr_t)czd->czd_buf +
+ sizeof (ctf_header_t));
+
+ if ((err = zlib.z_initcomp(&czd->czd_zstr, Z_BEST_COMPRESSION,
+ ZLIB_VERSION, sizeof (z_stream))) != Z_OK)
+ return (ctf_set_errno(fp, ECTF_ZLIB));
+
+ return (0);
+}
+
+static int
+ctf_zdata_grow(ctf_zdata_t *czd)
+{
+ size_t off;
+ size_t newsz;
+ void *ndata;
+
+ off = (uintptr_t)czd->czd_next - (uintptr_t)czd->czd_buf;
+ newsz = czd->czd_allocsz + CTF_COMPRESS_CHUNK;
+ ndata = ctf_data_alloc(newsz);
+ if (ndata == MAP_FAILED) {
+ return (ctf_set_errno(czd->czd_ctfp, ENOMEM));
+ }
+
+ bcopy(czd->czd_buf, ndata, off);
+ ctf_data_free(czd->czd_buf, czd->czd_allocsz);
+ czd->czd_allocsz = newsz;
+ czd->czd_buf = ndata;
+ czd->czd_next = (void *)((uintptr_t)ndata + off);
+
+ czd->czd_zstr.next_out = (Bytef *)czd->czd_next;
+ czd->czd_zstr.avail_out = CTF_COMPRESS_CHUNK;
+ return (0);
+}
+
+static int
+ctf_zdata_compress_buffer(ctf_zdata_t *czd, const void *buf, size_t bufsize)
+{
+ int err;
+
+ czd->czd_zstr.next_out = czd->czd_next;
+ czd->czd_zstr.avail_out = czd->czd_allocsz -
+ (czd->czd_next - czd->czd_buf);
+ czd->czd_zstr.next_in = (Bytef *)buf;
+ czd->czd_zstr.avail_in = bufsize;
+
+ while (czd->czd_zstr.avail_in != 0) {
+ if (czd->czd_zstr.avail_out == 0) {
+ czd->czd_next = czd->czd_zstr.next_out;
+ if ((err = ctf_zdata_grow(czd)) != 0) {
+ return (err);
+ }
+ }
+
+ if ((err = zlib.z_compress(&czd->czd_zstr, Z_NO_FLUSH)) != Z_OK)
+ return (ctf_set_errno(czd->czd_ctfp, ECTF_ZLIB));
+ }
+ czd->czd_next = czd->czd_zstr.next_out;
+
+ return (0);
+}
+
+static int
+ctf_zdata_flush(ctf_zdata_t *czd, boolean_t finish)
+{
+ int err;
+ int flag = finish == B_TRUE ? Z_FINISH : Z_FULL_FLUSH;
+ int bret = finish == B_TRUE ? Z_STREAM_END : Z_BUF_ERROR;
+
+ for (;;) {
+ if (czd->czd_zstr.avail_out == 0) {
+ czd->czd_next = czd->czd_zstr.next_out;
+ if ((err = ctf_zdata_grow(czd)) != 0) {
+ return (err);
+ }
+ }
+
+ err = zlib.z_compress(&czd->czd_zstr, flag);
+ if (err == bret) {
+ break;
+ }
+ if (err != Z_OK)
+ return (ctf_set_errno(czd->czd_ctfp, ECTF_ZLIB));
+
+ }
+
+ czd->czd_next = czd->czd_zstr.next_out;
+
+ return (0);
+}
+
+static int
+ctf_zdata_end(ctf_zdata_t *czd)
+{
+ int ret;
+
+ if ((ret = ctf_zdata_flush(czd, B_TRUE)) != 0)
+ return (ret);
+
+ if ((ret = zlib.z_finicomp(&czd->czd_zstr)) != 0)
+ return (ctf_set_errno(czd->czd_ctfp, ECTF_ZLIB));
+
+ return (0);
+}
+
+static void
+ctf_zdata_cleanup(ctf_zdata_t *czd)
+{
+ ctf_data_free(czd->czd_buf, czd->czd_allocsz);
+ (void) zlib.z_finicomp(&czd->czd_zstr);
+}
+
+/*
+ * Compress our CTF data and return both the size of the compressed data and the
+ * size of the allocation. These may be different due to the nature of
+ * compression.
+ *
+ * In addition, we flush the compression inbetween our two phases such that we
+ * maintain a different dictionary bbetween the CTF data and the string section.
+ */
+int
+ctf_compress(ctf_file_t *fp, void **buf, size_t *allocsz, size_t *elfsize)
+{
+ int err;
+ ctf_zdata_t czd;
+ ctf_header_t *cthp = (ctf_header_t *)fp->ctf_base;
+
+ if ((err = ctf_zdata_init(&czd, fp)) != 0)
+ return (err);
+
+ if ((err = ctf_zdata_compress_buffer(&czd, fp->ctf_buf,
+ cthp->cth_stroff)) != 0) {
+ ctf_zdata_cleanup(&czd);
+ return (err);
+ }
+
+ if ((err = ctf_zdata_flush(&czd, B_FALSE)) != 0) {
+ ctf_zdata_cleanup(&czd);
+ return (err);
+ }
+
+ if ((err = ctf_zdata_compress_buffer(&czd,
+ fp->ctf_buf + cthp->cth_stroff, cthp->cth_strlen)) != 0) {
+ ctf_zdata_cleanup(&czd);
+ return (err);
+ }
+
+ if ((err = ctf_zdata_end(&czd)) != 0) {
+ ctf_zdata_cleanup(&czd);
+ return (err);
+ }
+
+ *buf = czd.czd_buf;
+ *allocsz = czd.czd_allocsz;
+ *elfsize = (uintptr_t)(czd.czd_next - czd.czd_buf);
+
+ return (0);
+}
+
+int
+z_compress(void *dst, size_t *dstlen, const void *src, size_t srclen)
+{
+ z_stream zs;
+ int err;
+
+ bzero(&zs, sizeof (z_stream));
+ zs.next_in = (uchar_t *)src;
+ zs.avail_in = srclen;
+ zs.next_out = dst;
+ zs.avail_out = *dstlen;
+
+ if ((err = zlib.z_initcomp(&zs, Z_BEST_COMPRESSION, ZLIB_VERSION,
+ sizeof (z_stream))) != Z_OK)
+ return (err);
+
+ if ((err = zlib.z_compress(&zs, Z_FINISH)) != Z_STREAM_END) {
+ (void) zlib.z_finicomp(&zs);
+ return (err == Z_OK ? Z_BUF_ERROR : err);
+ }
+
+ *dstlen = zs.total_out;
+ return (zlib.z_finicomp(&zs));
+}
+
/*
* Convert a 32-bit ELF file header into GElf.
*/
@@ -189,7 +437,7 @@ ctf_sect_munmap(const ctf_sect_t *sp)
* responsible for closing the file descriptor when it is no longer needed.
*/
ctf_file_t *
-ctf_fdopen(int fd, int *errp)
+ctf_fdcreate_int(int fd, int *errp, ctf_sect_t *ctfp)
{
ctf_sect_t ctfsect, symsect, strsect;
ctf_file_t *fp = NULL;
@@ -221,6 +469,9 @@ ctf_fdopen(int fd, int *errp)
*/
if (nbytes >= sizeof (ctf_preamble_t) &&
hdr.ctf.ctp_magic == CTF_MAGIC) {
+ if (ctfp != NULL)
+ return (ctf_set_open_errno(errp, EINVAL));
+
if (hdr.ctf.ctp_version > CTF_VERSION)
return (ctf_set_open_errno(errp, ECTF_CTFVERS));
@@ -370,7 +621,8 @@ ctf_fdopen(int fd, int *errp)
continue; /* corrupt sh_name field */
if (shp->sh_type == SHT_PROGBITS &&
- strcmp(strs + shp->sh_name, _CTF_SECTION) == 0) {
+ strcmp(strs + shp->sh_name, _CTF_SECTION) == 0 &&
+ ctfp == NULL) {
ctfsect.cts_name = strs + shp->sh_name;
ctfsect.cts_type = shp->sh_type;
ctfsect.cts_flags = shp->sh_flags;
@@ -397,18 +649,22 @@ ctf_fdopen(int fd, int *errp)
free(sp); /* free section header array */
- if (ctfsect.cts_type == SHT_NULL) {
- (void) munmap(strs_map, strs_mapsz);
- return (ctf_set_open_errno(errp, ECTF_NOCTFDATA));
- }
+ if (ctfp == NULL) {
+ if (ctfsect.cts_type == SHT_NULL && ctfp == NULL) {
+ (void) munmap(strs_map, strs_mapsz);
+ return (ctf_set_open_errno(errp,
+ ECTF_NOCTFDATA));
+ }
- /*
- * Now mmap the CTF data, symtab, and strtab sections and
- * call ctf_bufopen() to do the rest of the work.
- */
- if (ctf_sect_mmap(&ctfsect, fd) == MAP_FAILED) {
- (void) munmap(strs_map, strs_mapsz);
- return (ctf_set_open_errno(errp, ECTF_MMAP));
+ /*
+ * Now mmap the CTF data, symtab, and strtab sections
+ * and call ctf_bufopen() to do the rest of the work.
+ */
+ if (ctf_sect_mmap(&ctfsect, fd) == MAP_FAILED) {
+ (void) munmap(strs_map, strs_mapsz);
+ return (ctf_set_open_errno(errp, ECTF_MMAP));
+ }
+ ctfp = &ctfsect;
}
if (symsect.cts_type != SHT_NULL &&
@@ -418,12 +674,13 @@ ctf_fdopen(int fd, int *errp)
(void) ctf_set_open_errno(errp, ECTF_MMAP);
goto bad; /* unmap all and abort */
}
- fp = ctf_bufopen(&ctfsect, &symsect, &strsect, errp);
+ fp = ctf_bufopen(ctfp, &symsect, &strsect, errp);
} else
- fp = ctf_bufopen(&ctfsect, NULL, NULL, errp);
+ fp = ctf_bufopen(ctfp, NULL, NULL, errp);
bad:
if (fp == NULL) {
- ctf_sect_munmap(&ctfsect);
+ if (ctfp == NULL)
+ ctf_sect_munmap(&ctfsect);
ctf_sect_munmap(&symsect);
ctf_sect_munmap(&strsect);
} else
@@ -436,6 +693,12 @@ bad:
return (ctf_set_open_errno(errp, ECTF_FMT));
}
+ctf_file_t *
+ctf_fdopen(int fd, int *errp)
+{
+ return (ctf_fdcreate_int(fd, errp, NULL));
+}
+
/*
* Open the specified file and return a pointer to a CTF container. The file
* can be either an ELF file or raw CTF file. This is just a convenient
@@ -502,3 +765,25 @@ ctf_version(int version)
return (_libctf_version);
}
+
+/*
+ * A utility function for folks debugging CTF conversion and merging.
+ */
+void
+ctf_phase_dump(ctf_file_t *fp, const char *phase)
+{
+ int fd;
+ static char *base;
+ char path[MAXPATHLEN];
+
+ if (base == NULL && (base = getenv("LIBCTF_WRITE_PHASES")) == NULL)
+ return;
+
+ (void) snprintf(path, sizeof (path), "%s/libctf.%s.%d.ctf", base,
+ phase != NULL ? phase : "",
+ ctf_phase);
+ if ((fd = open(path, O_CREAT | O_TRUNC | O_RDWR, 0777)) < 0)
+ return;
+ (void) ctf_write(fp, fd);
+ (void) close(fd);
+}
diff --git a/usr/src/lib/libctf/common/ctf_merge.c b/usr/src/lib/libctf/common/ctf_merge.c
new file mode 100644
index 0000000000..f23dbc232d
--- /dev/null
+++ b/usr/src/lib/libctf/common/ctf_merge.c
@@ -0,0 +1,1570 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2015 Joyent, Inc.
+ */
+
+/*
+ * To perform a merge of two CTF containers, we first diff the two containers
+ * types. For every type that's in the src container, but not in the dst
+ * container, we note it and add it to dst container. If there are any objects
+ * or functions associated with src, we go through and update the types that
+ * they refer to such that they all refer to types in the dst container.
+ *
+ * The bulk of the logic for the merge, after we've run the diff, occurs in
+ * ctf_merge_common().
+ *
+ * In terms of exported APIs, we don't really export a simple merge two
+ * containers, as the general way this is used, in something like ctfmerge(1),
+ * is to add all the containers and then let us figure out the best way to merge
+ * it. In the future we'll want to grow some control over the number of threads
+ * that we use to do this, if we end up wanting a muli-threaded merge. If we do,
+ * we should take care to do the merge in the same way every time.
+ */
+
+#include <libctf_impl.h>
+#include <sys/debug.h>
+#include <sys/list.h>
+#include <stddef.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <mergeq.h>
+#include <errno.h>
+
+typedef struct ctf_merge_tinfo {
+ uint16_t cmt_map; /* Map to the type in out */
+ boolean_t cmt_fixup;
+ boolean_t cmt_forward;
+ boolean_t cmt_missing;
+} ctf_merge_tinfo_t;
+
+/*
+ * State required for doing an individual merge of two containers.
+ */
+typedef struct ctf_merge_types {
+ ctf_file_t *cm_out; /* Output CTF file */
+ ctf_file_t *cm_src; /* Input CTF file */
+ ctf_merge_tinfo_t *cm_tmap; /* Type state information */
+ boolean_t cm_dedup; /* Are we doing a dedup? */
+ boolean_t cm_unique; /* are we doing a uniquify? */
+} ctf_merge_types_t;
+
+typedef struct ctf_merge_objmap {
+ list_node_t cmo_node;
+ const char *cmo_name; /* Symbol name */
+ ulong_t cmo_idx; /* Symbol ID */
+ ctf_id_t cmo_tid; /* Type ID */
+} ctf_merge_objmap_t;
+
+typedef struct ctf_merge_funcmap {
+ list_node_t cmf_node;
+ const char *cmf_name; /* Symbol name */
+ ulong_t cmf_idx; /* Symbol ID */
+ ctf_id_t cmf_rtid; /* Type ID */
+ uint_t cmf_flags; /* ctf_funcinfo_t ctc_flags */
+ uint_t cmf_argc; /* Number of arguments */
+ ctf_id_t cmf_args[]; /* Types of arguments */
+} ctf_merge_funcmap_t;
+
+typedef struct ctf_merge_input {
+ list_node_t cmi_node;
+ ctf_file_t *cmi_input;
+ list_t cmi_omap;
+ list_t cmi_fmap;
+ boolean_t cmi_created;
+} ctf_merge_input_t;
+
+struct ctf_merge_handle {
+ list_t cmh_inputs; /* Input list */
+ uint_t cmh_ninputs; /* Number of inputs */
+ uint_t cmh_nthreads; /* Number of threads to use */
+ ctf_file_t *cmh_unique; /* ctf to uniquify against */
+ boolean_t cmh_msyms; /* Should we merge symbols/funcs? */
+ int cmh_ofd; /* FD for output file */
+ int cmh_flags; /* Flags that control merge behavior */
+ char *cmh_label; /* Optional label */
+ char *cmh_pname; /* Parent name */
+};
+
+static int ctf_merge_add_type(ctf_merge_types_t *, ctf_id_t);
+
+static ctf_id_t
+ctf_merge_gettype(ctf_merge_types_t *cmp, ctf_id_t id)
+{
+ if (cmp->cm_dedup == B_FALSE) {
+ VERIFY(cmp->cm_tmap[id].cmt_map != 0);
+ return (cmp->cm_tmap[id].cmt_map);
+ }
+
+ while (cmp->cm_tmap[id].cmt_missing == B_FALSE) {
+ VERIFY(cmp->cm_tmap[id].cmt_map != 0);
+ id = cmp->cm_tmap[id].cmt_map;
+ }
+ VERIFY(cmp->cm_tmap[id].cmt_map != 0);
+ return (cmp->cm_tmap[id].cmt_map);
+}
+
+static void
+ctf_merge_diffcb(ctf_file_t *ifp, ctf_id_t iid, boolean_t same, ctf_file_t *ofp,
+ ctf_id_t oid, void *arg)
+{
+ ctf_merge_types_t *cmp = arg;
+ ctf_merge_tinfo_t *cmt = cmp->cm_tmap;
+
+ if (same == B_TRUE) {
+ if (ctf_type_kind(ifp, iid) == CTF_K_FORWARD &&
+ ctf_type_kind(ofp, oid) != CTF_K_FORWARD) {
+ VERIFY(cmt[oid].cmt_map == 0);
+
+ /*
+ * If we're uniquifying types, it's possible for the
+ * container that we're uniquifying against to have a
+ * forward which exists in the container being reduced.
+ * For example, genunix has the machcpu structure as a
+ * forward which is actually in unix and we uniquify
+ * unix against genunix. In such cases, we explicitly do
+ * not do any mapping of the forward information, lest
+ * we risk losing the real definition. Instead, mark
+ * that it's missing.
+ */
+ if (cmp->cm_unique == B_TRUE) {
+ cmt[oid].cmt_missing = B_TRUE;
+ return;
+ }
+
+ cmt[oid].cmt_map = iid;
+ cmt[oid].cmt_forward = B_TRUE;
+ ctf_dprintf("merge diff forward mapped %d->%d\n", oid,
+ iid);
+ return;
+ }
+
+ /*
+ * We could have multiple things that a given type ends up
+ * matching in the world of forwards and pointers to forwards.
+ * For now just take the first one...
+ */
+ if (cmt[oid].cmt_map != 0)
+ return;
+ cmt[oid].cmt_map = iid;
+ ctf_dprintf("merge diff mapped %d->%d\n", oid, iid);
+ } else if (ifp == cmp->cm_src) {
+ VERIFY(cmt[iid].cmt_map == 0);
+ cmt[iid].cmt_missing = B_TRUE;
+ ctf_dprintf("merge diff said %d is missing\n", iid);
+ }
+}
+
+static int
+ctf_merge_add_number(ctf_merge_types_t *cmp, ctf_id_t id)
+{
+ int ret, flags;
+ const ctf_type_t *tp;
+ const char *name;
+ ctf_encoding_t en;
+
+ if (ctf_type_encoding(cmp->cm_src, id, &en) != 0)
+ return (CTF_ERR);
+
+ tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
+ name = ctf_strraw(cmp->cm_src, tp->ctt_name);
+ if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
+ flags = CTF_ADD_ROOT;
+ else
+ flags = CTF_ADD_NONROOT;
+
+ ret = ctf_add_encoded(cmp->cm_out, flags, name, &en,
+ ctf_type_kind(cmp->cm_src, id));
+
+ if (ret == CTF_ERR)
+ return (ret);
+
+ VERIFY(cmp->cm_tmap[id].cmt_map == 0);
+ cmp->cm_tmap[id].cmt_map = ret;
+ return (0);
+}
+
+static int
+ctf_merge_add_array(ctf_merge_types_t *cmp, ctf_id_t id)
+{
+ int ret, flags;
+ const ctf_type_t *tp;
+ ctf_arinfo_t ar;
+
+ if (ctf_array_info(cmp->cm_src, id, &ar) == CTF_ERR)
+ return (CTF_ERR);
+
+ tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
+ if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
+ flags = CTF_ADD_ROOT;
+ else
+ flags = CTF_ADD_NONROOT;
+
+ if (cmp->cm_tmap[ar.ctr_contents].cmt_map == 0) {
+ ret = ctf_merge_add_type(cmp, ar.ctr_contents);
+ if (ret != 0)
+ return (ret);
+ ASSERT(cmp->cm_tmap[ar.ctr_contents].cmt_map != 0);
+ }
+ ar.ctr_contents = ctf_merge_gettype(cmp, ar.ctr_contents);
+
+ if (cmp->cm_tmap[ar.ctr_index].cmt_map == 0) {
+ ret = ctf_merge_add_type(cmp, ar.ctr_index);
+ if (ret != 0)
+ return (ret);
+ ASSERT(cmp->cm_tmap[ar.ctr_index].cmt_map != 0);
+ }
+ ar.ctr_index = ctf_merge_gettype(cmp, ar.ctr_index);
+
+ ret = ctf_add_array(cmp->cm_out, flags, &ar);
+ if (ret == CTF_ERR)
+ return (ret);
+
+ VERIFY(cmp->cm_tmap[id].cmt_map == 0);
+ cmp->cm_tmap[id].cmt_map = ret;
+
+ return (0);
+}
+
+static int
+ctf_merge_add_reftype(ctf_merge_types_t *cmp, ctf_id_t id)
+{
+ int ret, flags;
+ const ctf_type_t *tp;
+ ctf_id_t reftype;
+ const char *name;
+
+ tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
+ name = ctf_strraw(cmp->cm_src, tp->ctt_name);
+ if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
+ flags = CTF_ADD_ROOT;
+ else
+ flags = CTF_ADD_NONROOT;
+
+ reftype = ctf_type_reference(cmp->cm_src, id);
+ if (reftype == CTF_ERR)
+ return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src)));
+
+ if (cmp->cm_tmap[reftype].cmt_map == 0) {
+ ret = ctf_merge_add_type(cmp, reftype);
+ if (ret != 0)
+ return (ret);
+ ASSERT(cmp->cm_tmap[reftype].cmt_map != 0);
+ }
+ reftype = ctf_merge_gettype(cmp, reftype);
+
+ ret = ctf_add_reftype(cmp->cm_out, flags, name, reftype,
+ ctf_type_kind(cmp->cm_src, id));
+ if (ret == CTF_ERR)
+ return (ret);
+
+ VERIFY(cmp->cm_tmap[id].cmt_map == 0);
+ cmp->cm_tmap[id].cmt_map = ret;
+ return (0);
+}
+
+static int
+ctf_merge_add_typedef(ctf_merge_types_t *cmp, ctf_id_t id)
+{
+ int ret, flags;
+ const ctf_type_t *tp;
+ const char *name;
+ ctf_id_t reftype;
+
+ tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
+ name = ctf_strraw(cmp->cm_src, tp->ctt_name);
+ if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
+ flags = CTF_ADD_ROOT;
+ else
+ flags = CTF_ADD_NONROOT;
+
+ reftype = ctf_type_reference(cmp->cm_src, id);
+ if (reftype == CTF_ERR)
+ return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src)));
+
+ if (cmp->cm_tmap[reftype].cmt_map == 0) {
+ ret = ctf_merge_add_type(cmp, reftype);
+ if (ret != 0)
+ return (ret);
+ ASSERT(cmp->cm_tmap[reftype].cmt_map != 0);
+ }
+ reftype = ctf_merge_gettype(cmp, reftype);
+
+ ret = ctf_add_typedef(cmp->cm_out, flags, name, reftype);
+ if (ret == CTF_ERR)
+ return (ret);
+
+ VERIFY(cmp->cm_tmap[id].cmt_map == 0);
+ cmp->cm_tmap[id].cmt_map = ret;
+ return (0);
+}
+
+typedef struct ctf_merge_enum {
+ ctf_file_t *cme_fp;
+ ctf_id_t cme_id;
+} ctf_merge_enum_t;
+
+static int
+ctf_merge_add_enumerator(const char *name, int value, void *arg)
+{
+ ctf_merge_enum_t *cmep = arg;
+
+ return (ctf_add_enumerator(cmep->cme_fp, cmep->cme_id, name, value) ==
+ CTF_ERR);
+}
+
+static int
+ctf_merge_add_enum(ctf_merge_types_t *cmp, ctf_id_t id)
+{
+ int flags;
+ const ctf_type_t *tp;
+ const char *name;
+ ctf_id_t enumid;
+ ctf_merge_enum_t cme;
+
+ tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
+ name = ctf_strraw(cmp->cm_src, tp->ctt_name);
+ if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
+ flags = CTF_ADD_ROOT;
+ else
+ flags = CTF_ADD_NONROOT;
+
+ enumid = ctf_add_enum(cmp->cm_out, flags, name);
+ if (enumid == CTF_ERR)
+ return (enumid);
+
+ cme.cme_fp = cmp->cm_out;
+ cme.cme_id = enumid;
+ if (ctf_enum_iter(cmp->cm_src, id, ctf_merge_add_enumerator,
+ &cme) != 0)
+ return (CTF_ERR);
+
+ VERIFY(cmp->cm_tmap[id].cmt_map == 0);
+ cmp->cm_tmap[id].cmt_map = enumid;
+ return (0);
+}
+
+static int
+ctf_merge_add_func(ctf_merge_types_t *cmp, ctf_id_t id)
+{
+ int ret, flags, i;
+ const ctf_type_t *tp;
+ ctf_funcinfo_t ctc;
+ ctf_id_t *argv;
+
+ tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
+ if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
+ flags = CTF_ADD_ROOT;
+ else
+ flags = CTF_ADD_NONROOT;
+
+ if (ctf_func_info_by_id(cmp->cm_src, id, &ctc) == CTF_ERR)
+ return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src)));
+
+ argv = ctf_alloc(sizeof (ctf_id_t) * ctc.ctc_argc);
+ if (argv == NULL)
+ return (ctf_set_errno(cmp->cm_out, ENOMEM));
+ if (ctf_func_args_by_id(cmp->cm_src, id, ctc.ctc_argc, argv) ==
+ CTF_ERR) {
+ ctf_free(argv, sizeof (ctf_id_t) * ctc.ctc_argc);
+ return (ctf_set_errno(cmp->cm_out, ctf_errno(cmp->cm_src)));
+ }
+
+ if (cmp->cm_tmap[ctc.ctc_return].cmt_map == 0) {
+ ret = ctf_merge_add_type(cmp, ctc.ctc_return);
+ if (ret != 0)
+ return (ret);
+ ASSERT(cmp->cm_tmap[ctc.ctc_return].cmt_map != 0);
+ }
+ ctc.ctc_return = ctf_merge_gettype(cmp, ctc.ctc_return);
+
+ for (i = 0; i < ctc.ctc_argc; i++) {
+ if (cmp->cm_tmap[argv[i]].cmt_map == 0) {
+ ret = ctf_merge_add_type(cmp, argv[i]);
+ if (ret != 0)
+ return (ret);
+ ASSERT(cmp->cm_tmap[argv[i]].cmt_map != 0);
+ }
+ argv[i] = ctf_merge_gettype(cmp, argv[i]);
+ }
+
+ ret = ctf_add_funcptr(cmp->cm_out, flags, &ctc, argv);
+ ctf_free(argv, sizeof (ctf_id_t) * ctc.ctc_argc);
+ if (ret == CTF_ERR)
+ return (ret);
+
+ VERIFY(cmp->cm_tmap[id].cmt_map == 0);
+ cmp->cm_tmap[id].cmt_map = ret;
+ return (0);
+}
+
+static int
+ctf_merge_add_forward(ctf_merge_types_t *cmp, ctf_id_t id)
+{
+ int ret, flags;
+ const ctf_type_t *tp;
+ const char *name;
+
+ tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
+ name = ctf_strraw(cmp->cm_src, tp->ctt_name);
+ if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
+ flags = CTF_ADD_ROOT;
+ else
+ flags = CTF_ADD_NONROOT;
+
+ /*
+ * ctf_add_forward tries to check to see if a given forward already
+ * exists in one of its hash tables. If we're here then we know that we
+ * have a forward in a container that isn't present in another.
+ * Therefore, we choose a token hash table to satisfy the API choice
+ * here.
+ */
+ ret = ctf_add_forward(cmp->cm_out, flags, name, CTF_K_STRUCT);
+ if (ret == CTF_ERR)
+ return (CTF_ERR);
+
+ VERIFY(cmp->cm_tmap[id].cmt_map == 0);
+ cmp->cm_tmap[id].cmt_map = ret;
+ return (0);
+}
+
+typedef struct ctf_merge_su {
+ ctf_merge_types_t *cms_cm;
+ ctf_id_t cms_id;
+} ctf_merge_su_t;
+
+static int
+ctf_merge_add_member(const char *name, ctf_id_t type, ulong_t offset, void *arg)
+{
+ ctf_merge_su_t *cms = arg;
+
+ VERIFY(cms->cms_cm->cm_tmap[type].cmt_map != 0);
+ type = cms->cms_cm->cm_tmap[type].cmt_map;
+
+ ctf_dprintf("Trying to add member %s to %d\n", name, cms->cms_id);
+ return (ctf_add_member(cms->cms_cm->cm_out, cms->cms_id, name,
+ type, offset) == CTF_ERR);
+}
+
+/*
+ * During the first pass, we always add the generic structure and union but none
+ * of its members as they might not all have been mapped yet. Instead we just
+ * mark all structures and unions as needing to be fixed up.
+ */
+static int
+ctf_merge_add_sou(ctf_merge_types_t *cmp, ctf_id_t id, boolean_t forward)
+{
+ int flags, kind;
+ const ctf_type_t *tp;
+ const char *name;
+ ctf_id_t suid;
+
+ tp = LCTF_INDEX_TO_TYPEPTR(cmp->cm_src, id);
+ name = ctf_strraw(cmp->cm_src, tp->ctt_name);
+ if (CTF_INFO_ISROOT(tp->ctt_info) != 0)
+ flags = CTF_ADD_ROOT;
+ else
+ flags = CTF_ADD_NONROOT;
+ kind = ctf_type_kind(cmp->cm_src, id);
+
+ if (kind == CTF_K_STRUCT)
+ suid = ctf_add_struct(cmp->cm_out, flags, name);
+ else
+ suid = ctf_add_union(cmp->cm_out, flags, name);
+
+ if (suid == CTF_ERR)
+ return (suid);
+
+ /*
+ * If this is a forward reference then it's mapping should already
+ * exist.
+ */
+ if (forward == B_FALSE) {
+ VERIFY(cmp->cm_tmap[id].cmt_map == 0);
+ cmp->cm_tmap[id].cmt_map = suid;
+ ctf_dprintf("added sou \"%s\" as (%d) %d->%d\n", name, kind, id,
+ suid);
+ } else {
+ VERIFY(cmp->cm_tmap[id].cmt_map == suid);
+ }
+ cmp->cm_tmap[id].cmt_fixup = B_TRUE;
+
+ return (0);
+}
+
+static int
+ctf_merge_add_type(ctf_merge_types_t *cmp, ctf_id_t id)
+{
+ int kind, ret;
+
+ /*
+ * We may end up evaluating a type more than once as we may deal with it
+ * as we recursively evaluate some kind of reference and then we may see
+ * it normally.
+ */
+ if (cmp->cm_tmap[id].cmt_map != 0)
+ return (0);
+
+ kind = ctf_type_kind(cmp->cm_src, id);
+ switch (kind) {
+ case CTF_K_INTEGER:
+ case CTF_K_FLOAT:
+ ret = ctf_merge_add_number(cmp, id);
+ break;
+ case CTF_K_ARRAY:
+ ret = ctf_merge_add_array(cmp, id);
+ break;
+ case CTF_K_POINTER:
+ case CTF_K_VOLATILE:
+ case CTF_K_CONST:
+ case CTF_K_RESTRICT:
+ ret = ctf_merge_add_reftype(cmp, id);
+ break;
+ case CTF_K_TYPEDEF:
+ ret = ctf_merge_add_typedef(cmp, id);
+ break;
+ case CTF_K_ENUM:
+ ret = ctf_merge_add_enum(cmp, id);
+ break;
+ case CTF_K_FUNCTION:
+ ret = ctf_merge_add_func(cmp, id);
+ break;
+ case CTF_K_FORWARD:
+ ret = ctf_merge_add_forward(cmp, id);
+ break;
+ case CTF_K_STRUCT:
+ case CTF_K_UNION:
+ ret = ctf_merge_add_sou(cmp, id, B_FALSE);
+ break;
+ case CTF_K_UNKNOWN:
+ /*
+ * We don't add uknown types, and we later assert that nothing
+ * should reference them.
+ */
+ return (0);
+ default:
+ abort();
+ }
+
+ return (ret);
+}
+
+static int
+ctf_merge_fixup_sou(ctf_merge_types_t *cmp, ctf_id_t id)
+{
+ ctf_dtdef_t *dtd;
+ ctf_merge_su_t cms;
+ ctf_id_t mapid;
+ ssize_t size;
+
+ mapid = cmp->cm_tmap[id].cmt_map;
+ VERIFY(mapid != 0);
+ dtd = ctf_dtd_lookup(cmp->cm_out, mapid);
+ VERIFY(dtd != NULL);
+
+ ctf_dprintf("Trying to fix up sou %d\n", id);
+ cms.cms_cm = cmp;
+ cms.cms_id = mapid;
+ if (ctf_member_iter(cmp->cm_src, id, ctf_merge_add_member, &cms) != 0)
+ return (CTF_ERR);
+
+ if ((size = ctf_type_size(cmp->cm_src, id)) == CTF_ERR)
+ return (CTF_ERR);
+ if (ctf_set_size(cmp->cm_out, mapid, size) == CTF_ERR)
+ return (CTF_ERR);
+
+ return (0);
+}
+
+static int
+ctf_merge_fixup_type(ctf_merge_types_t *cmp, ctf_id_t id)
+{
+ int kind, ret;
+
+ kind = ctf_type_kind(cmp->cm_src, id);
+ switch (kind) {
+ case CTF_K_STRUCT:
+ case CTF_K_UNION:
+ ret = ctf_merge_fixup_sou(cmp, id);
+ break;
+ default:
+ VERIFY(0);
+ ret = CTF_ERR;
+ }
+
+ return (ret);
+}
+
+/*
+ * Now that we've successfully merged everything, we're going to clean
+ * up the merge type table. Traditionally if we had just two different
+ * files that we were working between, the types would be fully
+ * resolved. However, because we were comparing with ourself every step
+ * of the way and not our reduced self, we need to go through and update
+ * every mapped entry to what it now points to in the deduped file.
+ */
+static void
+ctf_merge_fixup_dedup_map(ctf_merge_types_t *cmp)
+{
+ int i;
+
+ for (i = 1; i < cmp->cm_src->ctf_typemax + 1; i++) {
+ ctf_id_t tid;
+
+ /*
+ * Missing types always have their id updated to exactly what it
+ * should be.
+ */
+ if (cmp->cm_tmap[i].cmt_missing == B_TRUE) {
+ VERIFY(cmp->cm_tmap[i].cmt_map != 0);
+ continue;
+ }
+
+ tid = i;
+ while (cmp->cm_tmap[tid].cmt_missing == B_FALSE) {
+ VERIFY(cmp->cm_tmap[tid].cmt_map != 0);
+ tid = cmp->cm_tmap[tid].cmt_map;
+ }
+ VERIFY(cmp->cm_tmap[tid].cmt_map != 0);
+ cmp->cm_tmap[i].cmt_map = cmp->cm_tmap[tid].cmt_map;
+ }
+}
+
+
+/*
+ * We're going to do three passes over the containers.
+ *
+ * Pass 1 checks for forward references in the output container that we know
+ * exist in the source container.
+ *
+ * Pass 2 adds all the missing types from the source container. As part of this
+ * we may be adding a type as a forward reference that doesn't exist yet.
+ * Any types that we encounter in this form, we need to add to a third pass.
+ *
+ * Pass 3 is the fixup pass. Here we go through and find all the types that were
+ * missing in the first.
+ *
+ * Importantly, we *must* call ctf_update between the second and third pass,
+ * otherwise several of the libctf functions will not properly find the data in
+ * the container. If we're doing a dedup we also fix up the type mapping.
+ */
+static int
+ctf_merge_common(ctf_merge_types_t *cmp)
+{
+ int ret, i;
+
+ ctf_phase_dump(cmp->cm_src, "merge-common-src");
+ ctf_phase_dump(cmp->cm_out, "merge-common-dest");
+
+ /* Pass 1 */
+ for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
+ if (cmp->cm_tmap[i].cmt_forward == B_TRUE) {
+ ret = ctf_merge_add_sou(cmp, i, B_TRUE);
+ if (ret != 0) {
+ return (ret);
+ }
+ }
+ }
+
+ /* Pass 2 */
+ for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
+ if (cmp->cm_tmap[i].cmt_missing == B_TRUE) {
+ ret = ctf_merge_add_type(cmp, i);
+ if (ret != 0) {
+ ctf_dprintf("Failed to merge type %d\n", i);
+ return (ret);
+ }
+ }
+ }
+
+ ret = ctf_update(cmp->cm_out);
+ if (ret != 0)
+ return (ret);
+
+ if (cmp->cm_dedup == B_TRUE) {
+ ctf_merge_fixup_dedup_map(cmp);
+ }
+
+ ctf_dprintf("Beginning merge pass 3\n");
+ /* Pass 3 */
+ for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
+ if (cmp->cm_tmap[i].cmt_fixup == B_TRUE) {
+ ret = ctf_merge_fixup_type(cmp, i);
+ if (ret != 0)
+ return (ret);
+ }
+ }
+
+ if (cmp->cm_dedup == B_TRUE) {
+ ctf_merge_fixup_dedup_map(cmp);
+ }
+
+ return (0);
+}
+
+/*
+ * Uniquification is slightly different from a stock merge. For starters, we
+ * don't need to replace any forward references in the output. In this case
+ * though, the types that already exist are in a parent container to the empty
+ * output container.
+ */
+static int
+ctf_merge_uniquify_types(ctf_merge_types_t *cmp)
+{
+ int i, ret;
+
+ for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
+ if (cmp->cm_tmap[i].cmt_missing == B_FALSE)
+ continue;
+ ret = ctf_merge_add_type(cmp, i);
+ if (ret != 0)
+ return (ret);
+ }
+
+ ret = ctf_update(cmp->cm_out);
+ if (ret != 0)
+ return (ret);
+
+ for (i = 1; i <= cmp->cm_src->ctf_typemax; i++) {
+ if (cmp->cm_tmap[i].cmt_fixup == B_FALSE)
+ continue;
+ ret = ctf_merge_fixup_type(cmp, i);
+ if (ret != 0)
+ return (ret);
+ }
+
+ return (0);
+}
+
+static int
+ctf_merge_types_init(ctf_merge_types_t *cmp)
+{
+ cmp->cm_tmap = ctf_alloc(sizeof (ctf_merge_tinfo_t) *
+ (cmp->cm_src->ctf_typemax + 1));
+ if (cmp->cm_tmap == NULL)
+ return (ctf_set_errno(cmp->cm_out, ENOMEM));
+ bzero(cmp->cm_tmap, sizeof (ctf_merge_tinfo_t) *
+ (cmp->cm_src->ctf_typemax + 1));
+ return (0);
+}
+
+static void
+ctf_merge_types_fini(ctf_merge_types_t *cmp)
+{
+ ctf_free(cmp->cm_tmap, sizeof (ctf_merge_tinfo_t) *
+ (cmp->cm_src->ctf_typemax + 1));
+}
+
+/*
+ * Merge the types contained inside of two input files. The second input file is
+ * always going to be the destination. We're guaranteed that it's always
+ * writeable.
+ */
+static int
+ctf_merge_types(void *arg, void *arg2, void **outp, void *unsued)
+{
+ int ret;
+ ctf_merge_types_t cm;
+ ctf_diff_t *cdp;
+ ctf_merge_objmap_t *cmo;
+ ctf_merge_funcmap_t *cmf;
+ ctf_merge_input_t *scmi = arg;
+ ctf_merge_input_t *dcmi = arg2;
+ ctf_file_t *out = dcmi->cmi_input;
+ ctf_file_t *source = scmi->cmi_input;
+
+ ctf_dprintf("merging %p->%p\n", source, out);
+
+ if (!(out->ctf_flags & LCTF_RDWR))
+ return (ctf_set_errno(out, ECTF_RDONLY));
+
+ if (ctf_getmodel(out) != ctf_getmodel(source))
+ return (ctf_set_errno(out, ECTF_DMODEL));
+
+ if ((ret = ctf_diff_init(out, source, &cdp)) != 0)
+ return (ret);
+
+ cm.cm_out = out;
+ cm.cm_src = source;
+ cm.cm_dedup = B_FALSE;
+ cm.cm_unique = B_FALSE;
+ ret = ctf_merge_types_init(&cm);
+ if (ret != 0) {
+ ctf_diff_fini(cdp);
+ return (ctf_set_errno(out, ret));
+ }
+
+ ret = ctf_diff_types(cdp, ctf_merge_diffcb, &cm);
+ if (ret != 0)
+ goto cleanup;
+ ret = ctf_merge_common(&cm);
+ ctf_dprintf("merge common returned with %d\n", ret);
+ if (ret == 0) {
+ ret = ctf_update(out);
+ ctf_dprintf("update returned with %d\n", ret);
+ } else {
+ goto cleanup;
+ }
+
+ /*
+ * Now we need to fix up the object and function maps.
+ */
+ for (cmo = list_head(&scmi->cmi_omap); cmo != NULL;
+ cmo = list_next(&scmi->cmi_omap, cmo)) {
+ if (cmo->cmo_tid == 0)
+ continue;
+ VERIFY(cm.cm_tmap[cmo->cmo_tid].cmt_map != 0);
+ cmo->cmo_tid = cm.cm_tmap[cmo->cmo_tid].cmt_map;
+ }
+
+ for (cmf = list_head(&scmi->cmi_fmap); cmf != NULL;
+ cmf = list_next(&scmi->cmi_fmap, cmf)) {
+ int i;
+
+ VERIFY(cm.cm_tmap[cmf->cmf_rtid].cmt_map != 0);
+ cmf->cmf_rtid = cm.cm_tmap[cmf->cmf_rtid].cmt_map;
+ for (i = 0; i < cmf->cmf_argc; i++) {
+ VERIFY(cm.cm_tmap[cmf->cmf_args[i]].cmt_map != 0);
+ cmf->cmf_args[i] = cm.cm_tmap[cmf->cmf_args[i]].cmt_map;
+ }
+ }
+
+ /*
+ * Now that we've fixed things up, we need to give our function and
+ * object maps to the destination, such that it can continue to update
+ * them going forward.
+ */
+ list_move_tail(&dcmi->cmi_fmap, &scmi->cmi_fmap);
+ list_move_tail(&dcmi->cmi_omap, &scmi->cmi_omap);
+
+cleanup:
+ if (ret == 0)
+ *outp = dcmi;
+ ctf_merge_types_fini(&cm);
+ ctf_diff_fini(cdp);
+ if (ret != 0)
+ return (ctf_errno(out));
+ return (0);
+}
+
+static int
+ctf_uniquify_types(ctf_merge_t *cmh, ctf_file_t *src, ctf_file_t **outp)
+{
+ int err, ret;
+ ctf_file_t *out;
+ ctf_merge_types_t cm;
+ ctf_diff_t *cdp;
+ ctf_merge_input_t *cmi;
+ ctf_file_t *parent = cmh->cmh_unique;
+
+ *outp = NULL;
+ out = ctf_fdcreate(cmh->cmh_ofd, &err);
+ if (out == NULL)
+ return (ctf_set_errno(src, err));
+
+ out->ctf_parname = cmh->cmh_pname;
+ if (ctf_setmodel(out, ctf_getmodel(parent)) != 0) {
+ (void) ctf_set_errno(src, ctf_errno(out));
+ ctf_close(out);
+ return (CTF_ERR);
+ }
+
+ if (ctf_import(out, parent) != 0) {
+ (void) ctf_set_errno(src, ctf_errno(out));
+ ctf_close(out);
+ return (CTF_ERR);
+ }
+
+ if ((ret = ctf_diff_init(parent, src, &cdp)) != 0) {
+ ctf_close(out);
+ return (ctf_set_errno(src, ctf_errno(parent)));
+ }
+
+ cm.cm_out = parent;
+ cm.cm_src = src;
+ cm.cm_dedup = B_FALSE;
+ cm.cm_unique = B_TRUE;
+ ret = ctf_merge_types_init(&cm);
+ if (ret != 0) {
+ ctf_close(out);
+ ctf_diff_fini(cdp);
+ return (ctf_set_errno(src, ret));
+ }
+
+ ret = ctf_diff_types(cdp, ctf_merge_diffcb, &cm);
+ if (ret == 0) {
+ cm.cm_out = out;
+ ret = ctf_merge_uniquify_types(&cm);
+ if (ret == 0)
+ ret = ctf_update(out);
+ }
+
+ if (ret != 0) {
+ ctf_merge_types_fini(&cm);
+ ctf_diff_fini(cdp);
+ return (ctf_set_errno(src, ctf_errno(cm.cm_out)));
+ }
+
+ for (cmi = list_head(&cmh->cmh_inputs); cmi != NULL;
+ cmi = list_next(&cmh->cmh_inputs, cmi)) {
+ ctf_merge_objmap_t *cmo;
+ ctf_merge_funcmap_t *cmf;
+
+ for (cmo = list_head(&cmi->cmi_omap); cmo != NULL;
+ cmo = list_next(&cmi->cmi_omap, cmo)) {
+ if (cmo->cmo_tid == 0)
+ continue;
+ VERIFY(cm.cm_tmap[cmo->cmo_tid].cmt_map != 0);
+ cmo->cmo_tid = cm.cm_tmap[cmo->cmo_tid].cmt_map;
+ }
+
+ for (cmf = list_head(&cmi->cmi_fmap); cmf != NULL;
+ cmf = list_next(&cmi->cmi_fmap, cmf)) {
+ int i;
+
+ VERIFY(cm.cm_tmap[cmf->cmf_rtid].cmt_map != 0);
+ cmf->cmf_rtid = cm.cm_tmap[cmf->cmf_rtid].cmt_map;
+ for (i = 0; i < cmf->cmf_argc; i++) {
+ VERIFY(cm.cm_tmap[cmf->cmf_args[i]].cmt_map !=
+ 0);
+ cmf->cmf_args[i] =
+ cm.cm_tmap[cmf->cmf_args[i]].cmt_map;
+ }
+ }
+ }
+
+ ctf_merge_types_fini(&cm);
+ ctf_diff_fini(cdp);
+ *outp = out;
+ return (0);
+}
+
+static void
+ctf_merge_fini_input(ctf_merge_input_t *cmi)
+{
+ ctf_merge_objmap_t *cmo;
+ ctf_merge_funcmap_t *cmf;
+
+ while ((cmo = list_remove_head(&cmi->cmi_omap)) != NULL)
+ ctf_free(cmo, sizeof (ctf_merge_objmap_t));
+
+ while ((cmf = list_remove_head(&cmi->cmi_fmap)) != NULL)
+ ctf_free(cmf, sizeof (ctf_merge_funcmap_t) +
+ sizeof (ctf_id_t) * cmf->cmf_argc);
+
+ if (cmi->cmi_created == B_TRUE && cmi->cmi_input != NULL)
+ ctf_close(cmi->cmi_input);
+
+ ctf_free(cmi, sizeof (ctf_merge_input_t));
+}
+
+void
+ctf_merge_fini(ctf_merge_t *cmh)
+{
+ size_t len;
+ ctf_merge_input_t *cmi;
+
+ if (cmh->cmh_label != NULL) {
+ len = strlen(cmh->cmh_label) + 1;
+ ctf_free(cmh->cmh_label, len);
+ }
+
+ if (cmh->cmh_pname != NULL) {
+ len = strlen(cmh->cmh_pname) + 1;
+ ctf_free(cmh->cmh_pname, len);
+ }
+
+ while ((cmi = list_remove_head(&cmh->cmh_inputs)) != NULL)
+ ctf_merge_fini_input(cmi);
+
+ ctf_free(cmh, sizeof (ctf_merge_t));
+}
+
+ctf_merge_t *
+ctf_merge_init(int fd, int *errp)
+{
+ int err;
+ ctf_merge_t *out;
+ struct stat st;
+
+ if (errp == NULL)
+ errp = &err;
+
+ if (fd != -1 && fstat(fd, &st) != 0) {
+ *errp = EINVAL;
+ return (NULL);
+ }
+
+ out = ctf_alloc(sizeof (ctf_merge_t));
+ if (out == NULL) {
+ *errp = ENOMEM;
+ return (NULL);
+ }
+
+ if (fd == -1) {
+ out->cmh_msyms = B_FALSE;
+ } else {
+ out->cmh_msyms = B_TRUE;
+ }
+
+ list_create(&out->cmh_inputs, sizeof (ctf_merge_input_t),
+ offsetof(ctf_merge_input_t, cmi_node));
+ out->cmh_ninputs = 0;
+ out->cmh_nthreads = 1;
+ out->cmh_unique = NULL;
+ out->cmh_ofd = fd;
+ out->cmh_flags = 0;
+ out->cmh_label = NULL;
+ out->cmh_pname = NULL;
+
+ return (out);
+}
+
+int
+ctf_merge_label(ctf_merge_t *cmh, const char *label)
+{
+ char *dup;
+
+ if (label == NULL)
+ return (EINVAL);
+
+ dup = ctf_strdup(label);
+ if (dup == NULL)
+ return (EAGAIN);
+
+ if (cmh->cmh_label != NULL) {
+ size_t len = strlen(cmh->cmh_label) + 1;
+ ctf_free(cmh->cmh_label, len);
+ }
+
+ cmh->cmh_label = dup;
+ return (0);
+}
+
+static int
+ctf_merge_add_funcs(const char *name, ulong_t idx, ctf_funcinfo_t *fip,
+ void *arg)
+{
+ ctf_merge_input_t *cmi = arg;
+ ctf_merge_funcmap_t *fmap;
+
+ fmap = ctf_alloc(sizeof (ctf_merge_funcmap_t) +
+ sizeof (ctf_id_t) * fip->ctc_argc);
+ if (fmap == NULL)
+ return (ENOMEM);
+
+ fmap->cmf_idx = idx;
+ fmap->cmf_rtid = fip->ctc_return;
+ fmap->cmf_flags = fip->ctc_flags;
+ fmap->cmf_argc = fip->ctc_argc;
+ fmap->cmf_name = name;
+
+ if (ctf_func_args(cmi->cmi_input, idx, fmap->cmf_argc,
+ fmap->cmf_args) != 0) {
+ ctf_free(fmap, sizeof (ctf_merge_funcmap_t) +
+ sizeof (ctf_id_t) * fip->ctc_argc);
+ return (ctf_errno(cmi->cmi_input));
+ }
+
+ list_insert_tail(&cmi->cmi_fmap, fmap);
+ return (0);
+}
+
+static int
+ctf_merge_add_objs(const char *name, ctf_id_t id, ulong_t idx, void *arg)
+{
+ ctf_merge_input_t *cmi = arg;
+ ctf_merge_objmap_t *cmo;
+
+ cmo = ctf_alloc(sizeof (ctf_merge_objmap_t));
+ if (cmo == NULL)
+ return (ENOMEM);
+
+ cmo->cmo_name = name;
+ cmo->cmo_idx = idx;
+ cmo->cmo_tid = id;
+ list_insert_tail(&cmi->cmi_omap, cmo);
+ return (0);
+}
+
+/*
+ * Whenever we create an entry to merge, we then go and add a second empty
+ * ctf_file_t which we use for the purposes of our merging. It's not the best,
+ * but it's the best that we've got at the moment.
+ */
+int
+ctf_merge_add(ctf_merge_t *cmh, ctf_file_t *input)
+{
+ int ret;
+ ctf_merge_input_t *cmi;
+ ctf_file_t *empty;
+
+ if (input->ctf_flags & LCTF_CHILD)
+ return (ECTF_MCHILD);
+
+ cmi = ctf_alloc(sizeof (ctf_merge_input_t));
+ if (cmi == NULL)
+ return (ENOMEM);
+
+ cmi->cmi_created = B_FALSE;
+ cmi->cmi_input = input;
+ list_create(&cmi->cmi_fmap, sizeof (ctf_merge_funcmap_t),
+ offsetof(ctf_merge_funcmap_t, cmf_node));
+ list_create(&cmi->cmi_omap, sizeof (ctf_merge_funcmap_t),
+ offsetof(ctf_merge_objmap_t, cmo_node));
+
+ if (cmh->cmh_msyms == B_TRUE) {
+ if ((ret = ctf_function_iter(input, ctf_merge_add_funcs,
+ cmi)) != 0) {
+ ctf_merge_fini_input(cmi);
+ return (ret);
+ }
+
+ if ((ret = ctf_object_iter(input, ctf_merge_add_objs,
+ cmi)) != 0) {
+ ctf_merge_fini_input(cmi);
+ return (ret);
+ }
+ }
+
+ list_insert_tail(&cmh->cmh_inputs, cmi);
+ cmh->cmh_ninputs++;
+
+ /* And now the empty one to merge into this */
+ cmi = ctf_alloc(sizeof (ctf_merge_input_t));
+ if (cmi == NULL)
+ return (ENOMEM);
+ list_create(&cmi->cmi_fmap, sizeof (ctf_merge_funcmap_t),
+ offsetof(ctf_merge_funcmap_t, cmf_node));
+ list_create(&cmi->cmi_omap, sizeof (ctf_merge_funcmap_t),
+ offsetof(ctf_merge_objmap_t, cmo_node));
+
+ empty = ctf_fdcreate(cmh->cmh_ofd, &ret);
+ if (empty == NULL)
+ return (ret);
+ cmi->cmi_input = empty;
+ cmi->cmi_created = B_TRUE;
+
+ if (ctf_setmodel(empty, ctf_getmodel(input)) == CTF_ERR) {
+ return (ctf_errno(empty));
+ }
+
+ list_insert_tail(&cmh->cmh_inputs, cmi);
+ cmh->cmh_ninputs++;
+ ctf_dprintf("added containers %p and %p\n", input, empty);
+ return (0);
+}
+
+int
+ctf_merge_uniquify(ctf_merge_t *cmh, ctf_file_t *u, const char *pname)
+{
+ char *dup;
+
+ if (u->ctf_flags & LCTF_CHILD)
+ return (ECTF_MCHILD);
+ if (pname == NULL)
+ return (EINVAL);
+ dup = ctf_strdup(pname);
+ if (dup == NULL)
+ return (EINVAL);
+ if (cmh->cmh_pname != NULL) {
+ size_t len = strlen(cmh->cmh_pname) + 1;
+ ctf_free(cmh->cmh_pname, len);
+ }
+ cmh->cmh_pname = dup;
+ cmh->cmh_unique = u;
+ return (0);
+}
+
+static int
+ctf_merge_symbols(ctf_merge_t *cmh, ctf_file_t *fp)
+{
+ int err;
+ ulong_t i;
+
+ uintptr_t symbase = (uintptr_t)fp->ctf_symtab.cts_data;
+ uintptr_t strbase = (uintptr_t)fp->ctf_strtab.cts_data;
+
+ for (i = 0; i < fp->ctf_nsyms; i++) {
+ const char *name;
+ ctf_merge_input_t *cmi;
+ ctf_merge_objmap_t *cmo;
+
+ if (fp->ctf_symtab.cts_entsize == sizeof (Elf32_Sym)) {
+ const Elf32_Sym *symp = (Elf32_Sym *)symbase + i;
+ int type = ELF32_ST_TYPE(symp->st_info);
+ if (type != STT_OBJECT)
+ continue;
+ if (ctf_sym_valid(strbase, type, symp->st_shndx,
+ symp->st_value, symp->st_name) == B_FALSE)
+ continue;
+ name = (char *)(strbase + symp->st_name);
+ } else {
+ const Elf64_Sym *symp = (Elf64_Sym *)symbase + i;
+ int type = ELF64_ST_TYPE(symp->st_info);
+ if (type != STT_OBJECT)
+ continue;
+ if (ctf_sym_valid(strbase, type, symp->st_shndx,
+ symp->st_value, symp->st_name) == B_FALSE)
+ continue;
+ name = (char *)(strbase + symp->st_name);
+ }
+
+ cmo = NULL;
+ for (cmi = list_head(&cmh->cmh_inputs); cmi != NULL;
+ cmi = list_next(&cmh->cmh_inputs, cmi)) {
+ for (cmo = list_head(&cmi->cmi_omap); cmo != NULL;
+ cmo = list_next(&cmi->cmi_omap, cmo)) {
+ if (strcmp(cmo->cmo_name, name) == 0)
+ goto found;
+ }
+ }
+found:
+ if (cmo != NULL) {
+ if (cmo->cmo_tid == 0)
+ continue;
+ if ((err = ctf_add_object(fp, i, cmo->cmo_tid)) != 0) {
+ ctf_dprintf("Failed to add symbol %s->%d: %s\n",
+ name, cmo->cmo_tid,
+ ctf_errmsg(ctf_errno(fp)));
+ return (err);
+ }
+ }
+ }
+
+ return (0);
+}
+
+static int
+ctf_merge_functions(ctf_merge_t *cmh, ctf_file_t *fp)
+{
+ int err;
+ ulong_t i;
+ ctf_funcinfo_t fi;
+
+ uintptr_t symbase = (uintptr_t)fp->ctf_symtab.cts_data;
+ uintptr_t strbase = (uintptr_t)fp->ctf_strtab.cts_data;
+
+ for (i = 0; i < fp->ctf_nsyms; i++) {
+ const char *name;
+ ctf_merge_input_t *cmi;
+ ctf_merge_funcmap_t *cmf;
+
+ if (fp->ctf_symtab.cts_entsize == sizeof (Elf32_Sym)) {
+ const Elf32_Sym *symp = (Elf32_Sym *)symbase + i;
+ int type = ELF32_ST_TYPE(symp->st_info);
+ if (ELF32_ST_TYPE(symp->st_info) != STT_FUNC)
+ continue;
+ if (ctf_sym_valid(strbase, type, symp->st_shndx,
+ symp->st_value, symp->st_name) == B_FALSE)
+ continue;
+ name = (char *)(strbase + symp->st_name);
+ } else {
+ const Elf64_Sym *symp = (Elf64_Sym *)symbase + i;
+ int type = ELF64_ST_TYPE(symp->st_info);
+ if (ELF64_ST_TYPE(symp->st_info) != STT_FUNC)
+ continue;
+ if (ctf_sym_valid(strbase, type, symp->st_shndx,
+ symp->st_value, symp->st_name) == B_FALSE)
+ continue;
+ name = (char *)(strbase + symp->st_name);
+ }
+
+ cmf = NULL;
+ for (cmi = list_head(&cmh->cmh_inputs); cmi != NULL;
+ cmi = list_next(&cmh->cmh_inputs, cmi)) {
+ for (cmf = list_head(&cmi->cmi_fmap); cmf != NULL;
+ cmf = list_next(&cmi->cmi_fmap, cmf)) {
+ if (strcmp(cmf->cmf_name, name) == 0)
+ goto found;
+ }
+ }
+found:
+ if (cmf != NULL) {
+ fi.ctc_return = cmf->cmf_rtid;
+ fi.ctc_argc = cmf->cmf_argc;
+ fi.ctc_flags = cmf->cmf_flags;
+ if ((err = ctf_add_function(fp, i, &fi,
+ cmf->cmf_args)) != 0)
+ return (err);
+ }
+ }
+
+ return (0);
+
+}
+
+int
+ctf_merge_merge(ctf_merge_t *cmh, ctf_file_t **outp)
+{
+ int err, merr;
+ ctf_merge_input_t *cmi;
+ ctf_id_t ltype;
+ mergeq_t *mqp;
+ ctf_merge_input_t *final;
+ ctf_file_t *out;
+
+ if (cmh->cmh_label != NULL && cmh->cmh_unique != NULL) {
+ const char *label = ctf_label_topmost(cmh->cmh_unique);
+ if (label == NULL)
+ return (ECTF_NOLABEL);
+ if (strcmp(label, cmh->cmh_label) != 0)
+ return (ECTF_LCONFLICT);
+ }
+
+ if (mergeq_init(&mqp, cmh->cmh_nthreads) == -1) {
+ return (errno);
+ }
+
+ /*
+ * We should consider doing a divide and conquer and parallel merge
+ * here. If we did, we'd want to use some number of threads to perform
+ * this operation.
+ */
+ VERIFY(cmh->cmh_ninputs % 2 == 0);
+ for (cmi = list_head(&cmh->cmh_inputs); cmi != NULL;
+ cmi = list_next(&cmh->cmh_inputs, cmi)) {
+ if (mergeq_add(mqp, cmi) == -1) {
+ err = errno;
+ mergeq_fini(mqp);
+ }
+ }
+
+ err = mergeq_merge(mqp, ctf_merge_types, NULL, (void **)&final, &merr);
+ mergeq_fini(mqp);
+
+ if (err == MERGEQ_ERROR) {
+ return (errno);
+ } else if (err == MERGEQ_UERROR) {
+ return (merr);
+ }
+
+ /*
+ * Disassociate the generated ctf_file_t from the original input. That
+ * way when the input gets cleaned up, we don't accidentally kill the
+ * final reference to the ctf_file_t. If it gets uniquified then we'll
+ * kill it.
+ */
+ VERIFY(final->cmi_input != NULL);
+ out = final->cmi_input;
+ final->cmi_input = NULL;
+
+ ctf_dprintf("preparing to uniquify against: %p\n", cmh->cmh_unique);
+ if (cmh->cmh_unique != NULL) {
+ ctf_file_t *u;
+ err = ctf_uniquify_types(cmh, out, &u);
+ if (err != 0) {
+ err = ctf_errno(out);
+ ctf_close(out);
+ return (err);
+ }
+ ctf_close(out);
+ out = u;
+ }
+
+ ltype = out->ctf_typemax;
+ if ((out->ctf_flags & LCTF_CHILD) && ltype != 0)
+ ltype += 0x8000;
+ ctf_dprintf("trying to add the label\n");
+ if (cmh->cmh_label != NULL &&
+ ctf_add_label(out, cmh->cmh_label, ltype, 0) != 0) {
+ ctf_close(out);
+ return (ctf_errno(out));
+ }
+
+ ctf_dprintf("merging symbols and the like\n");
+ if (cmh->cmh_msyms == B_TRUE) {
+ err = ctf_merge_symbols(cmh, out);
+ if (err != 0) {
+ ctf_close(out);
+ return (ctf_errno(out));
+ }
+
+ err = ctf_merge_functions(cmh, out);
+ if (err != 0) {
+ ctf_close(out);
+ return (ctf_errno(out));
+ }
+ }
+
+ err = ctf_update(out);
+ if (err != 0) {
+ ctf_close(out);
+ return (ctf_errno(out));
+ }
+
+ *outp = out;
+ return (0);
+}
+
+/*
+ * When we get told that something is unique, eg. same is B_FALSE, then that
+ * tells us that we need to add it to the output. If same is B_TRUE, then we'll
+ * want to record it in the mapping table so that we know how to redirect types
+ * to the extant ones.
+ */
+static void
+ctf_dedup_cb(ctf_file_t *ifp, ctf_id_t iid, boolean_t same, ctf_file_t *ofp,
+ ctf_id_t oid, void *arg)
+{
+ ctf_merge_types_t *cmp = arg;
+ ctf_merge_tinfo_t *cmt = cmp->cm_tmap;
+
+ if (same == B_TRUE) {
+ /*
+ * The output id here may itself map to something else.
+ * Therefore, we need to basically walk a chain and see what it
+ * points to until it itself points to a base type, eg. -1.
+ * Otherwise we'll dedup to something which no longer exists.
+ */
+ while (cmt[oid].cmt_missing == B_FALSE)
+ oid = cmt[oid].cmt_map;
+ cmt[iid].cmt_map = oid;
+ ctf_dprintf("%d->%d \n", iid, oid);
+ } else {
+ VERIFY(cmt[iid].cmt_map == 0);
+ cmt[iid].cmt_missing = B_TRUE;
+ ctf_dprintf("%d is missing\n", iid);
+ }
+}
+
+/*
+ * Dedup a CTF container.
+ *
+ * DWARF and other encoding formats that we use to create CTF data may create
+ * multiple copies of a given type. However, after doing a conversion, and
+ * before doing a merge, we'd prefer, if possible, to have every input container
+ * to be unique.
+ *
+ * Doing a deduplication is like a normal merge. However, when we diff the types
+ * in the container, rather than doing a normal diff, we instead want to diff
+ * against any already processed types. eg, for a given type i in a container,
+ * we want to diff it from 0 to i - 1.
+ */
+int
+ctf_merge_dedup(ctf_merge_t *cmp, ctf_file_t **outp)
+{
+ int ret;
+ ctf_diff_t *cdp = NULL;
+ ctf_merge_input_t *cmi, *cmc;
+ ctf_file_t *ifp, *ofp;
+ ctf_merge_objmap_t *cmo;
+ ctf_merge_funcmap_t *cmf;
+ ctf_merge_types_t cm;
+
+ if (cmp == NULL || outp == NULL)
+ return (EINVAL);
+
+ ctf_dprintf("encountered %d inputs\n", cmp->cmh_ninputs);
+ if (cmp->cmh_ninputs != 2)
+ return (EINVAL);
+
+ ctf_dprintf("passed argument sanity check\n");
+
+ cmi = list_head(&cmp->cmh_inputs);
+ VERIFY(cmi != NULL);
+ cmc = list_next(&cmp->cmh_inputs, cmi);
+ VERIFY(cmc != NULL);
+ ifp = cmi->cmi_input;
+ ofp = cmc->cmi_input;
+ VERIFY(ifp != NULL);
+ VERIFY(ofp != NULL);
+ cm.cm_src = ifp;
+ cm.cm_out = ofp;
+ cm.cm_dedup = B_TRUE;
+ cm.cm_unique = B_FALSE;
+
+ if ((ret = ctf_merge_types_init(&cm)) != 0) {
+ return (ret);
+ }
+
+ if ((ret = ctf_diff_init(ifp, ifp, &cdp)) != 0)
+ goto err;
+
+ ctf_dprintf("Successfully initialized dedup\n");
+ if ((ret = ctf_diff_self(cdp, ctf_dedup_cb, &cm)) != 0)
+ goto err;
+
+ ctf_dprintf("Successfully diffed types\n");
+ ret = ctf_merge_common(&cm);
+ ctf_dprintf("deduping types result: %d\n", ret);
+ if (ret == 0)
+ ret = ctf_update(cm.cm_out);
+ if (ret != 0)
+ goto err;
+
+ ctf_dprintf("Successfully deduped types\n");
+ ctf_phase_dump(cm.cm_out, "dedup-pre-syms");
+
+
+ /*
+ * Now we need to fix up the object and function maps.
+ */
+ for (cmo = list_head(&cmi->cmi_omap); cmo != NULL;
+ cmo = list_next(&cmi->cmi_omap, cmo)) {
+ if (cmo->cmo_tid == 0)
+ continue;
+ ctf_dprintf("mapped %s %d->%d\n", cmo->cmo_name,
+ cmo->cmo_tid, cm.cm_tmap[cmo->cmo_tid].cmt_map);
+ cmo->cmo_tid = cm.cm_tmap[cmo->cmo_tid].cmt_map;
+ }
+
+ for (cmf = list_head(&cmi->cmi_fmap); cmf != NULL;
+ cmf = list_next(&cmi->cmi_fmap, cmf)) {
+ int i;
+
+ VERIFY(cm.cm_tmap[cmf->cmf_rtid].cmt_map != 0);
+ cmf->cmf_rtid = cm.cm_tmap[cmf->cmf_rtid].cmt_map;
+ for (i = 0; i < cmf->cmf_argc; i++) {
+ VERIFY(cm.cm_tmap[cmf->cmf_args[i]].cmt_map != 0);
+ cmf->cmf_args[i] = cm.cm_tmap[cmf->cmf_args[i]].cmt_map;
+ }
+ }
+
+ if (cmp->cmh_msyms == B_TRUE) {
+ ret = ctf_merge_symbols(cmp, cm.cm_out);
+ if (ret != 0) {
+ ret = ctf_errno(cm.cm_out);
+ ctf_dprintf("failed to dedup symbols: %s\n",
+ ctf_errmsg(ret));
+ goto err;
+ }
+
+ ret = ctf_merge_functions(cmp, cm.cm_out);
+ if (ret != 0) {
+ ret = ctf_errno(cm.cm_out);
+ ctf_dprintf("failed to dedup functions: %s\n",
+ ctf_errmsg(ret));
+ goto err;
+ }
+ }
+
+ ret = ctf_update(cm.cm_out);
+ if (ret == 0) {
+ cmc->cmi_input = NULL;
+ *outp = cm.cm_out;
+ }
+err:
+ ctf_merge_types_fini(&cm);
+ ctf_diff_fini(cdp);
+ return (ret);
+}
+
+int
+ctf_merge_set_nthreads(ctf_merge_t *cmp, const uint_t nthrs)
+{
+ if (nthrs == 0)
+ return (EINVAL);
+ cmp->cmh_nthreads = nthrs;
+ return (0);
+}
diff --git a/usr/src/lib/libctf/common/ctf_subr.c b/usr/src/lib/libctf/common/ctf_subr.c
index 467b6a8181..26f7e8c4db 100644
--- a/usr/src/lib/libctf/common/ctf_subr.c
+++ b/usr/src/lib/libctf/common/ctf_subr.c
@@ -24,8 +24,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <ctf_impl.h>
#include <libctf.h>
#include <sys/mman.h>
@@ -56,6 +54,18 @@ ctf_alloc(size_t size)
return (malloc(size));
}
+void *
+mergeq_alloc(size_t size)
+{
+ return (malloc(size));
+}
+
+void *
+workq_alloc(size_t size)
+{
+ return (malloc(size));
+}
+
/*ARGSUSED*/
void
ctf_free(void *buf, size_t size)
@@ -63,6 +73,20 @@ ctf_free(void *buf, size_t size)
free(buf);
}
+/*ARGSUSED*/
+void
+mergeq_free(void *buf, size_t size)
+{
+ free(buf);
+}
+
+/*ARGSUSED*/
+void
+workq_free(void *buf, size_t size)
+{
+ free(buf);
+}
+
const char *
ctf_strerror(int err)
{
diff --git a/usr/src/lib/libctf/common/libctf.h b/usr/src/lib/libctf/common/libctf.h
index 3fd69318de..a5c5027048 100644
--- a/usr/src/lib/libctf/common/libctf.h
+++ b/usr/src/lib/libctf/common/libctf.h
@@ -23,6 +23,9 @@
* Copyright 2001-2003 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2015, Joyent, Inc.
+ */
/*
* This header file defines the interfaces available from the CTF debugger
@@ -32,7 +35,7 @@
* the fullness of time after we gain more experience with the interfaces.
*
* In the meantime, be aware that any program linked with libctf in this
- * release of Solaris is almost guaranteed to break in the next release.
+ * release of illumos is almost guaranteed to break in the next release.
*
* In short, do not user this header file or libctf for any purpose.
*/
@@ -40,9 +43,8 @@
#ifndef _LIBCTF_H
#define _LIBCTF_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/ctf_api.h>
+#include <libelf.h>
#ifdef __cplusplus
extern "C" {
@@ -53,6 +55,46 @@ extern "C" {
*/
extern int _libctf_debug;
+typedef enum ctf_diff_flag {
+ CTF_DIFF_F_IGNORE_INTNAMES = 0x01
+} ctf_diff_flag_t;
+
+typedef struct ctf_diff ctf_diff_t;
+typedef void (*ctf_diff_type_f)(ctf_file_t *, ctf_id_t, boolean_t, ctf_file_t *,
+ ctf_id_t, void *);
+typedef void (*ctf_diff_func_f)(ctf_file_t *, ulong_t, boolean_t, ctf_file_t *,
+ ulong_t, void *);
+typedef void (*ctf_diff_obj_f)(ctf_file_t *, ulong_t, ctf_id_t, boolean_t,
+ ctf_file_t *, ulong_t, ctf_id_t, void *);
+
+extern int ctf_diff_init(ctf_file_t *, ctf_file_t *, ctf_diff_t **);
+extern uint_t ctf_diff_getflags(ctf_diff_t *);
+extern int ctf_diff_setflags(ctf_diff_t *, uint_t);
+extern int ctf_diff_types(ctf_diff_t *, ctf_diff_type_f, void *);
+extern int ctf_diff_functions(ctf_diff_t *, ctf_diff_func_f, void *);
+extern int ctf_diff_objects(ctf_diff_t *, ctf_diff_obj_f, void *);
+extern void ctf_diff_fini(ctf_diff_t *);
+
+#define CTF_CONVERT_F_IGNNONC 0x01
+extern ctf_file_t *ctf_elfconvert(int, Elf *, const char *, uint_t, uint_t,
+ int *, char *, size_t);
+extern ctf_file_t *ctf_fdconvert(int, const char *, uint_t, uint_t, int *,
+ char *, size_t);
+
+typedef struct ctf_merge_handle ctf_merge_t;
+extern ctf_merge_t *ctf_merge_init(int, int *);
+extern int ctf_merge_add(ctf_merge_t *, ctf_file_t *);
+extern int ctf_merge_set_nthreads(ctf_merge_t *, const uint_t);
+extern int ctf_merge_label(ctf_merge_t *, const char *);
+extern int ctf_merge_uniquify(ctf_merge_t *, ctf_file_t *, const char *);
+extern int ctf_merge_merge(ctf_merge_t *, ctf_file_t **);
+extern int ctf_merge_dedup(ctf_merge_t *, ctf_file_t **);
+extern void ctf_merge_fini(ctf_merge_t *);
+
+#define CTF_ELFWRITE_F_COMPRESS 0x1
+extern int ctf_elffdwrite(ctf_file_t *, int, int, int);
+extern int ctf_elfwrite(ctf_file_t *, const char *, const char *, int);
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/lib/libctf/common/libctf_impl.h b/usr/src/lib/libctf/common/libctf_impl.h
new file mode 100644
index 0000000000..11193e97d0
--- /dev/null
+++ b/usr/src/lib/libctf/common/libctf_impl.h
@@ -0,0 +1,59 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _LIBCTF_IMPL_H
+#define _LIBCTF_IMPL_H
+
+/*
+ * Portions of libctf implementations that are only suitable for CTF's userland
+ * library, eg. converting and merging related routines.
+ */
+
+#include <libelf.h>
+#include <libctf.h>
+#include <ctf_impl.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum ctf_conv_status {
+ CTF_CONV_SUCCESS = 0,
+ CTF_CONV_ERROR = 1,
+ CTF_CONV_NOTSUP = 2
+} ctf_conv_status_t;
+
+typedef ctf_conv_status_t (*ctf_convert_f)(int, Elf *, uint_t, int *,
+ ctf_file_t **, char *, size_t);
+extern ctf_conv_status_t ctf_dwarf_convert(int, Elf *, uint_t, int *,
+ ctf_file_t **, char *, size_t);
+
+/*
+ * zlib compression routines
+ */
+extern int ctf_compress(ctf_file_t *fp, void **, size_t *, size_t *);
+
+extern int ctf_diff_self(ctf_diff_t *, ctf_diff_type_f, void *);
+
+/*
+ * Internal debugging aids
+ */
+extern void ctf_phase_dump(ctf_file_t *, const char *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBCTF_IMPL_H */
diff --git a/usr/src/lib/libctf/common/mapfile-vers b/usr/src/lib/libctf/common/mapfile-vers
index 5573e8db25..cfd2952bbe 100644
--- a/usr/src/lib/libctf/common/mapfile-vers
+++ b/usr/src/lib/libctf/common/mapfile-vers
@@ -23,7 +23,7 @@
#
#
-# Copyright (c) 2013, Joyent, Inc. All rights reserved.
+# Copyright (c) 2015, Joyent, Inc. All rights reserved.
#
#
@@ -53,10 +53,13 @@ SYMBOL_VERSION SUNWprivate_1.2 {
ctf_add_enumerator;
ctf_add_float;
ctf_add_forward;
+ ctf_add_funcptr;
ctf_add_function;
ctf_add_integer;
+ ctf_add_label;
ctf_add_member;
ctf_add_pointer;
+ ctf_add_object;
ctf_add_restrict;
ctf_add_struct;
ctf_add_type;
@@ -64,17 +67,48 @@ SYMBOL_VERSION SUNWprivate_1.2 {
ctf_add_union;
ctf_add_volatile;
ctf_create;
+ ctf_dataptr;
ctf_delete_type;
+ ctf_diff_init;
+ ctf_diff_fini;
+ ctf_diff_functions;
+ ctf_diff_getflags;
+ ctf_diff_objects;
+ ctf_diff_setflags;
+ ctf_diff_types;
ctf_discard;
ctf_dup;
+ ctf_elfconvert;
+ ctf_elffdwrite;
+ ctf_elfwrite;
ctf_enum_value;
+ ctf_fdconvert;
+ ctf_flags;
+ ctf_func_args_by_id;
+ ctf_func_info_by_id;
+ ctf_function_iter;
+ ctf_kind_name;
ctf_label_info;
ctf_label_iter;
ctf_label_topmost;
ctf_member_info;
+ ctf_merge_add;
+ ctf_merge_dedup;
+ ctf_merge_fini;
+ ctf_merge_init;
+ ctf_merge_label;
+ ctf_merge_merge;
+ ctf_merge_set_nthreads;
+ ctf_merge_uniquify;
+ ctf_object_iter;
ctf_parent_file;
+ ctf_parent_label;
ctf_parent_name;
ctf_set_array;
+ ctf_set_root;
+ ctf_set_size;
+ ctf_string_iter;
+ ctf_symbol_name;
ctf_type_align;
ctf_type_cmp;
ctf_type_compat;
diff --git a/usr/src/lib/libcurses/screen/setupterm.c b/usr/src/lib/libcurses/screen/setupterm.c
index c1f4bdeb04..f67e8751b7 100644
--- a/usr/src/lib/libcurses/screen/setupterm.c
+++ b/usr/src/lib/libcurses/screen/setupterm.c
@@ -37,8 +37,6 @@
* contributors.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*LINTLIBRARY*/
#include <stdio.h>
@@ -48,6 +46,7 @@
#include <string.h>
#include <unistd.h>
#include <errno.h>
+#include <zone.h>
#include "curses_inc.h"
#define TERMPATH "/usr/share/lib/terminfo/"
@@ -272,9 +271,11 @@ setupterm(char *term, int filenum, int *errret)
}
if (tfd < 0) {
+ const char *zroot = zone_get_nroot();
/* /usr/share/lib/terminfo/?/$TERM */
if (snprintf(fname, sizeof (fname),
- "%s/%c/%s", TERMPATH, *term, term) >= sizeof (fname)) {
+ "%s/%s/%c/%s", zroot == NULL ? "" : zroot, TERMPATH,
+ *term, term) >= sizeof (fname)) {
term_errno = TERMINFO_TOO_LONG;
goto out_err;
}
diff --git a/usr/src/lib/libdhcpagent/common/dhcpagent_util.c b/usr/src/lib/libdhcpagent/common/dhcpagent_util.c
index 1c381fa08f..7a44ae5249 100644
--- a/usr/src/lib/libdhcpagent/common/dhcpagent_util.c
+++ b/usr/src/lib/libdhcpagent/common/dhcpagent_util.c
@@ -36,6 +36,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <zone.h>
#include "dhcpagent_ipc.h"
#include "dhcpagent_util.h"
@@ -125,6 +126,12 @@ dhcp_start_agent(int timeout)
int ctfd;
pid_t childpid;
ctid_t ct;
+ char dhcpcmd[MAXPATHLEN];
+ const char *zroot = zone_get_nroot();
+
+ /* Prepend the root of the native code in the brand to the command */
+ (void) snprintf(dhcpcmd, sizeof (dhcpcmd), "%s%s", zroot != NULL ?
+ zroot : "", DHCP_AGENT_PATH);
/*
* just send a dummy request to the agent to find out if it's
@@ -160,7 +167,7 @@ dhcp_start_agent(int timeout)
goto fail;
case 0:
- (void) execl(DHCP_AGENT_PATH, DHCP_AGENT_PATH, (char *)0);
+ (void) execl(dhcpcmd, dhcpcmd, (char *)0);
_exit(EXIT_FAILURE);
default:
diff --git a/usr/src/lib/libdladm/Makefile b/usr/src/lib/libdladm/Makefile
index 92de14cc8a..a74b40f58d 100644
--- a/usr/src/lib/libdladm/Makefile
+++ b/usr/src/lib/libdladm/Makefile
@@ -20,6 +20,7 @@
#
#
# Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2015, Joyent, Inc.
#
#
@@ -29,7 +30,7 @@ HDRS = libdladm.h libdladm_impl.h libdllink.h libdlaggr.h \
libdlwlan.h libdlwlan_impl.h libdlvnic.h libdlvlan.h \
libdlmgmt.h libdlflow.h libdlflow_impl.h libdlstat.h \
libdlether.h libdlsim.h libdlbridge.h libdliptun.h \
- libdlib.h
+ libdlib.h libdloverlay.h
HDRDIR = common
@@ -50,6 +51,14 @@ MSGFILES = common/libdladm.c common/linkprop.c common/secobj.c \
XGETFLAGS = -a -x libdladm.xcl
+TYPECHECK_LIB = libdladm.so.1
+TYPELIST = overlay_ioc_create_t \
+ overlay_ioc_activate_t \
+ overlay_ioc_delete_t \
+ overlay_ioc_nprops_t \
+ overlay_ioc_propinfo_t \
+ overlay_ioc_prop_t
+
all := TARGET = all
clean := TARGET = clean
clobber := TARGET = clobber
@@ -62,7 +71,7 @@ all clean clobber install lint: $(SUBDIRS)
install_h: $(ROOTHDRS)
-check: $(CHECKHDRS)
+check: $(CHECKHDRS) $(TYPECHECK)
$(POFILE): pofile_MSGFILES
diff --git a/usr/src/lib/libdladm/Makefile.com b/usr/src/lib/libdladm/Makefile.com
index b6fa5cad2e..f9270f9769 100644
--- a/usr/src/lib/libdladm/Makefile.com
+++ b/usr/src/lib/libdladm/Makefile.com
@@ -20,6 +20,7 @@
#
#
# Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2015, Joyent, Inc.
#
LIBRARY = libdladm.a
@@ -27,7 +28,8 @@ VERS = .1
OBJECTS = libdladm.o secobj.o linkprop.o libdllink.o libdlaggr.o \
libdlwlan.o libdlvnic.o libdlmgmt.o libdlvlan.o libdlib.o\
flowattr.o flowprop.o propfuncs.o libdlflow.o libdlstat.o \
- usage.o libdlether.o libdlsim.o libdlbridge.o libdliptun.o
+ usage.o libdlether.o libdlsim.o libdlbridge.o libdliptun.o \
+ libdloverlay.o
include ../../Makefile.lib
@@ -36,7 +38,7 @@ include ../../Makefile.rootfs
LIBS = $(DYNLIB) $(LINTLIB)
LDLIBS += -ldevinfo -lc -linetutil -lsocket -lscf -lrcm -lnvpair \
- -lexacct -lnsl -lkstat -lpool
+ -lexacct -lnsl -lkstat -lpool -lvarpd
SRCDIR = ../common
$(LINTLIB) := SRCS = $(SRCDIR)/$(LINTSRC)
@@ -48,6 +50,9 @@ CERRWARN += -_gcc=-Wno-unused-label
CERRWARN += -_gcc=-Wno-uninitialized
CPPFLAGS += -I$(SRCDIR) -D_REENTRANT
+C99MODE= -xc99=%all
+C99LMODE= -Xc99=%all
+
.KEEP_STATE:
all: $(LIBS)
diff --git a/usr/src/lib/libdladm/common/libdladm.c b/usr/src/lib/libdladm/common/libdladm.c
index cf113e7357..37823ce913 100644
--- a/usr/src/lib/libdladm/common/libdladm.c
+++ b/usr/src/lib/libdladm/common/libdladm.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc.
*/
#include <unistd.h>
@@ -29,6 +30,9 @@
#include <strings.h>
#include <dirent.h>
#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#include <stdarg.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <sys/param.h>
@@ -418,6 +422,9 @@ dladm_status2str(dladm_status_t status, char *buf)
case DLADM_STATUS_INVALID_MTU:
s = "MTU check failed, MTU outside of device's supported range";
break;
+ case DLADM_STATUS_BAD_ENCAP:
+ s = "invalid encapsulation protocol";
+ break;
default:
s = "<unknown error>";
break;
@@ -654,6 +661,9 @@ dladm_class2str(datalink_class_t class, char *buf)
case DATALINK_CLASS_PART:
s = "part";
break;
+ case DATALINK_CLASS_OVERLAY:
+ s = "overlay";
+ break;
default:
s = "unknown";
break;
@@ -1132,15 +1142,15 @@ dladm_strs2range(char **prop_val, uint_t val_cnt,
* Convert a mac_propval_range_t structure into an array of elements.
*/
dladm_status_t
-dladm_range2list(mac_propval_range_t *rangep, void *elem, uint_t *nelem)
+dladm_range2list(const mac_propval_range_t *rangep, void *elem, uint_t *nelem)
{
int i, j, k;
dladm_status_t status = DLADM_STATUS_OK;
switch (rangep->mpr_type) {
case MAC_PROPVAL_UINT32: {
- mac_propval_uint32_range_t *ur;
- uint32_t *elem32 = elem;
+ const mac_propval_uint32_range_t *ur;
+ uint32_t *elem32 = elem;
k = 0;
ur = &rangep->mpr_range_uint32[0];
@@ -1168,13 +1178,13 @@ dladm_range2list(mac_propval_range_t *rangep, void *elem, uint_t *nelem)
* of single elements or ranges.
*/
int
-dladm_range2strs(mac_propval_range_t *rangep, char **prop_val)
+dladm_range2strs(const mac_propval_range_t *rangep, char **prop_val)
{
int i;
switch (rangep->mpr_type) {
case MAC_PROPVAL_UINT32: {
- mac_propval_uint32_range_t *ur;
+ const mac_propval_uint32_range_t *ur;
/* Write ranges and individual elements */
ur = &rangep->mpr_range_uint32[0];
@@ -1191,6 +1201,20 @@ dladm_range2strs(mac_propval_range_t *rangep, char **prop_val)
}
return (0);
}
+ case MAC_PROPVAL_STR: {
+ const mac_propval_str_range_t *str;
+ size_t coff, len;
+
+ coff = 0;
+ str = &rangep->u.mpr_str;
+ for (i = 0; i < rangep->mpr_count; i++) {
+ len = strlen(&str->mpur_data[coff]);
+ (void) strlcpy(prop_val[i], &str->mpur_data[coff],
+ DLADM_PROP_VAL_MAX);
+ coff += len + 1;
+ }
+ return (0);
+ }
default:
break;
}
@@ -1268,3 +1292,54 @@ dladm_list2range(void *elem, uint_t nelem, mac_propval_type_t type,
return (status);
}
+
+void
+dladm_errlist_init(dladm_errlist_t *erl)
+{
+ bzero(erl, sizeof (dladm_errlist_t));
+}
+
+void
+dladm_errlist_reset(dladm_errlist_t *erl)
+{
+ uint_t i;
+
+ for (i = 0; i < erl->el_count; i++)
+ free(erl->el_errs[i]);
+ free(erl->el_errs);
+ dladm_errlist_init(erl);
+}
+
+dladm_status_t
+dladm_errlist_append(dladm_errlist_t *erl, const char *fmt, ...)
+{
+ int ret;
+ va_list ap;
+ char *m = NULL;
+
+ if (erl->el_count == erl->el_alloc) {
+ int alloc;
+ void *addr;
+ if (erl->el_alloc == 0) {
+ assert(erl->el_errs == NULL);
+ alloc = 32;
+ } else {
+ alloc = erl->el_alloc + 32;
+ }
+ addr = realloc(erl->el_errs, sizeof (char *) * alloc);
+ if (addr == NULL)
+ return (DLADM_STATUS_NOMEM);
+
+ erl->el_errs = addr;
+ erl->el_alloc = alloc;
+ }
+
+ va_start(ap, fmt);
+ ret = vasprintf(&m, fmt, ap);
+ va_end(ap);
+ if (ret == -1)
+ return (dladm_errno2status(errno));
+ erl->el_errs[erl->el_count] = m;
+ erl->el_count++;
+ return (DLADM_STATUS_OK);
+}
diff --git a/usr/src/lib/libdladm/common/libdladm.h b/usr/src/lib/libdladm/common/libdladm.h
index c2fceb25ab..e5da4e3b44 100644
--- a/usr/src/lib/libdladm/common/libdladm.h
+++ b/usr/src/lib/libdladm/common/libdladm.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#ifndef _LIBDLADM_H
@@ -41,7 +42,7 @@ extern "C" {
#endif
#define LINKID_STR_WIDTH 10
-#define DLADM_STRSIZE 256
+#define DLADM_STRSIZE 2048
/*
* option flags taken by the libdladm functions
@@ -71,6 +72,10 @@ extern "C" {
* - DLADM_OPT_BOOT:
* Bypass check functions during boot (used by pool property since pools
* can come up after link properties are set)
+ *
+ * - DLADM_OPT_TRANSIENT:
+ * Indicates that the link assigned to a zone is transient and will be
+ * removed when the zone shuts down.
*/
#define DLADM_OPT_ACTIVE 0x00000001
#define DLADM_OPT_PERSIST 0x00000002
@@ -81,6 +86,7 @@ extern "C" {
#define DLADM_OPT_VLAN 0x00000040
#define DLADM_OPT_NOREFRESH 0x00000080
#define DLADM_OPT_BOOT 0x00000100
+#define DLADM_OPT_TRANSIENT 0x00000200
#define DLADM_WALK_TERMINATE 0
#define DLADM_WALK_CONTINUE -1
@@ -173,7 +179,8 @@ typedef enum {
DLADM_STATUS_NO_IB_HW_RESOURCE,
DLADM_STATUS_INVALID_PKEY_TBL_SIZE,
DLADM_STATUS_PORT_NOPROTO,
- DLADM_STATUS_INVALID_MTU
+ DLADM_STATUS_INVALID_MTU,
+ DLADM_STATUS_BAD_ENCAP
} dladm_status_t;
typedef enum {
@@ -221,6 +228,12 @@ typedef struct dladm_arg_list {
char *al_buf;
} dladm_arg_list_t;
+typedef struct dladm_errlist {
+ uint_t el_count;
+ uint_t el_alloc;
+ char **el_errs;
+} dladm_errlist_t;
+
typedef enum {
DLADM_LOGTYPE_LINK = 1,
DLADM_LOGTYPE_FLOW
@@ -282,12 +295,15 @@ extern dladm_status_t dladm_zone_halt(dladm_handle_t, zoneid_t);
extern dladm_status_t dladm_strs2range(char **, uint_t, mac_propval_type_t,
mac_propval_range_t **);
-extern dladm_status_t dladm_range2list(mac_propval_range_t *, void*,
+extern dladm_status_t dladm_range2list(const mac_propval_range_t *, void *,
uint_t *);
-extern int dladm_range2strs(mac_propval_range_t *, char **);
+extern int dladm_range2strs(const mac_propval_range_t *, char **);
extern dladm_status_t dladm_list2range(void *, uint_t, mac_propval_type_t,
mac_propval_range_t **);
+extern void dladm_errlist_init(dladm_errlist_t *);
+extern void dladm_errlist_reset(dladm_errlist_t *);
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/lib/libdladm/common/libdladm_impl.h b/usr/src/lib/libdladm/common/libdladm_impl.h
index 13169285e3..e1a3177808 100644
--- a/usr/src/lib/libdladm/common/libdladm_impl.h
+++ b/usr/src/lib/libdladm/common/libdladm_impl.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#ifndef _LIBDLADM_IMPL_H
@@ -168,6 +169,12 @@ typedef struct resource_prop_s {
*/
#define FBRIDGE "bridge" /* string */
+/*
+ * For error lists
+ */
+extern dladm_status_t dladm_errlist_append(dladm_errlist_t *,
+ const char *, ...);
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/lib/libdladm/common/libdllink.c b/usr/src/lib/libdladm/common/libdllink.c
index 58bec0ad30..7e952b97e9 100644
--- a/usr/src/lib/libdladm/common/libdllink.c
+++ b/usr/src/lib/libdladm/common/libdllink.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -386,10 +387,14 @@ dladm_linkduplex2str(link_duplex_t duplex, char *buf)
/*
* Case 1: rename an existing link1 to a link2 that does not exist.
* Result: <linkid1, link2>
+ * The zonename parameter is used to allow us to create a VNIC in the global
+ * zone which is assigned to a non-global zone. Since there is a race condition
+ * in the create process if two VNICs have the same name, we need to rename it
+ * after it has been assigned to the zone.
*/
static dladm_status_t
i_dladm_rename_link_c1(dladm_handle_t handle, datalink_id_t linkid1,
- const char *link1, const char *link2, uint32_t flags)
+ const char *link1, const char *link2, uint32_t flags, const char *zonename)
{
dld_ioc_rename_t dir;
dladm_status_t status = DLADM_STATUS_OK;
@@ -402,6 +407,10 @@ i_dladm_rename_link_c1(dladm_handle_t handle, datalink_id_t linkid1,
dir.dir_linkid1 = linkid1;
dir.dir_linkid2 = DATALINK_INVALID_LINKID;
(void) strlcpy(dir.dir_link, link2, MAXLINKNAMELEN);
+ if (zonename != NULL)
+ dir.dir_zoneinit = B_TRUE;
+ else
+ dir.dir_zoneinit = B_FALSE;
if (ioctl(dladm_dld_fd(handle), DLDIOC_RENAME, &dir) < 0) {
status = dladm_errno2status(errno);
@@ -412,6 +421,7 @@ i_dladm_rename_link_c1(dladm_handle_t handle, datalink_id_t linkid1,
status = dladm_remap_datalink_id(handle, linkid1, link2);
if (status != DLADM_STATUS_OK && (flags & DLADM_OPT_ACTIVE)) {
(void) strlcpy(dir.dir_link, link1, MAXLINKNAMELEN);
+ dir.dir_zoneinit = B_FALSE;
(void) ioctl(dladm_dld_fd(handle), DLDIOC_RENAME, &dir);
}
return (status);
@@ -508,6 +518,7 @@ i_dladm_rename_link_c2(dladm_handle_t handle, datalink_id_t linkid1,
*/
dir.dir_linkid1 = linkid1;
dir.dir_linkid2 = linkid2;
+ dir.dir_zoneinit = B_FALSE;
if (ioctl(dladm_dld_fd(handle), DLDIOC_RENAME, &dir) < 0)
status = dladm_errno2status(errno);
@@ -616,7 +627,8 @@ done:
}
dladm_status_t
-dladm_rename_link(dladm_handle_t handle, const char *link1, const char *link2)
+dladm_rename_link(dladm_handle_t handle, const char *zonename,
+ const char *link1, const char *link2)
{
datalink_id_t linkid1 = DATALINK_INVALID_LINKID;
datalink_id_t linkid2 = DATALINK_INVALID_LINKID;
@@ -626,11 +638,11 @@ dladm_rename_link(dladm_handle_t handle, const char *link1, const char *link2)
boolean_t remphy2 = B_FALSE;
dladm_status_t status;
- (void) dladm_name2info(handle, link1, &linkid1, &flags1, &class1,
- &media1);
- if ((dladm_name2info(handle, link2, &linkid2, &flags2, &class2,
- &media2) == DLADM_STATUS_OK) && (class2 == DATALINK_CLASS_PHYS) &&
- (flags2 == DLADM_OPT_PERSIST)) {
+ (void) dladm_zname2info(handle, zonename, link1, &linkid1, &flags1,
+ &class1, &media1);
+ if ((dladm_zname2info(handle, zonename, link2, &linkid2, &flags2,
+ &class2, &media2) == DLADM_STATUS_OK) &&
+ (class2 == DATALINK_CLASS_PHYS) && (flags2 == DLADM_OPT_PERSIST)) {
/*
* see whether link2 is a removed physical link.
*/
@@ -644,7 +656,7 @@ dladm_rename_link(dladm_handle_t handle, const char *link1, const char *link2)
* does not exist.
*/
status = i_dladm_rename_link_c1(handle, linkid1, link1,
- link2, flags1);
+ link2, flags1, zonename);
} else if (remphy2) {
/*
* case 2: rename an available link to a REMOVED
diff --git a/usr/src/lib/libdladm/common/libdllink.h b/usr/src/lib/libdladm/common/libdllink.h
index a2830b5e37..a858e78aa3 100644
--- a/usr/src/lib/libdladm/common/libdllink.h
+++ b/usr/src/lib/libdladm/common/libdllink.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent Inc. All rights reserved.
*/
#ifndef _LIBDLLINK_H
@@ -121,7 +122,7 @@ extern dladm_status_t dladm_info(dladm_handle_t, datalink_id_t,
dladm_attr_t *);
extern dladm_status_t dladm_rename_link(dladm_handle_t, const char *,
- const char *);
+ const char *, const char *);
extern dladm_status_t dladm_set_linkprop(dladm_handle_t, datalink_id_t,
const char *, char **, uint_t, uint_t);
@@ -170,6 +171,9 @@ extern dladm_status_t dladm_up_datalink_id(dladm_handle_t, datalink_id_t);
extern dladm_status_t dladm_name2info(dladm_handle_t, const char *,
datalink_id_t *, uint32_t *, datalink_class_t *,
uint32_t *);
+extern dladm_status_t dladm_zname2info(dladm_handle_t, const char *,
+ const char *, datalink_id_t *, uint32_t *,
+ datalink_class_t *, uint32_t *);
extern dladm_status_t dladm_datalink_id2info(dladm_handle_t, datalink_id_t,
uint32_t *, datalink_class_t *, uint32_t *, char *,
size_t);
diff --git a/usr/src/lib/libdladm/common/libdlmgmt.c b/usr/src/lib/libdladm/common/libdlmgmt.c
index 4b0753417c..c9c7906934 100644
--- a/usr/src/lib/libdladm/common/libdlmgmt.c
+++ b/usr/src/lib/libdladm/common/libdlmgmt.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent Inc. All rights reserved.
*/
#include <door.h>
@@ -528,12 +529,24 @@ dladm_getnext_conf_linkprop(dladm_handle_t handle, dladm_conf_t conf,
}
/*
- * Get the link ID that is associated with the given name.
+ * Get the link ID that is associated with the given name in the current zone.
*/
dladm_status_t
dladm_name2info(dladm_handle_t handle, const char *link, datalink_id_t *linkidp,
uint32_t *flagp, datalink_class_t *classp, uint32_t *mediap)
{
+ return (dladm_zname2info(handle, NULL, link, linkidp, flagp, classp,
+ mediap));
+}
+
+/*
+ * Get the link ID that is associated with the given zone/name pair.
+ */
+dladm_status_t
+dladm_zname2info(dladm_handle_t handle, const char *zonename, const char *link,
+ datalink_id_t *linkidp, uint32_t *flagp, datalink_class_t *classp,
+ uint32_t *mediap)
+{
dlmgmt_door_getlinkid_t getlinkid;
dlmgmt_getlinkid_retval_t retval;
datalink_id_t linkid;
@@ -542,6 +555,10 @@ dladm_name2info(dladm_handle_t handle, const char *link, datalink_id_t *linkidp,
getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID;
(void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN);
+ if (zonename != NULL)
+ getlinkid.ld_zoneid = getzoneidbyname(zonename);
+ else
+ getlinkid.ld_zoneid = -1;
if ((status = dladm_door_call(handle, &getlinkid, sizeof (getlinkid),
&retval, &sz)) != DLADM_STATUS_OK) {
diff --git a/usr/src/lib/libdladm/common/libdloverlay.c b/usr/src/lib/libdladm/common/libdloverlay.c
new file mode 100644
index 0000000000..3da9005183
--- /dev/null
+++ b/usr/src/lib/libdladm/common/libdloverlay.c
@@ -0,0 +1,887 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2015 Joyent, Inc.
+ */
+
+#include <libdladm_impl.h>
+#include <libdllink.h>
+#include <libdloverlay.h>
+#include <sys/dld.h>
+#include <sys/overlay.h>
+#include <strings.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <limits.h>
+#include <libvarpd_client.h>
+
+#define VARPD_PROPERTY_NAME "varpd/id"
+
+static const char *dladm_overlay_doorpath = "/var/run/varpd/varpd.door";
+
+typedef struct dladm_overlay_propinfo {
+ boolean_t dop_isvarpd;
+ union {
+ overlay_ioc_propinfo_t *dop_overlay;
+ varpd_client_prop_handle_t *dop_varpd;
+ } dop_un;
+} dladm_overlay_propinfo_t;
+
+dladm_status_t
+dladm_overlay_prop_info(dladm_overlay_propinfo_handle_t phdl,
+ const char **namep, uint_t *typep, uint_t *protp, const void **defp,
+ uint32_t *sizep, const mac_propval_range_t **possp)
+{
+ dladm_overlay_propinfo_t *infop = (dladm_overlay_propinfo_t *)phdl;
+ overlay_ioc_propinfo_t *oinfop = infop->dop_un.dop_overlay;
+
+ if (infop->dop_isvarpd == B_FALSE) {
+ if (namep != NULL)
+ *namep = oinfop->oipi_name;
+ if (typep != NULL)
+ *typep = oinfop->oipi_type;
+ if (protp != NULL)
+ *protp = oinfop->oipi_prot;
+ if (defp != NULL)
+ *defp = oinfop->oipi_default;
+ if (sizep != NULL)
+ *sizep = oinfop->oipi_defsize;
+ if (possp != NULL) {
+ /* LINTED: E_BAD_PTR_CAST_ALIGN */
+ *possp = (const mac_propval_range_t *)oinfop->oipi_poss;
+ }
+
+ } else {
+ int ret;
+ ret = libvarpd_c_prop_info(infop->dop_un.dop_varpd, namep,
+ typep, protp, defp, sizep, possp);
+ if (ret != 0)
+ return (dladm_errno2status(ret));
+
+ }
+
+ return (DLADM_STATUS_OK);
+}
+
+static dladm_status_t
+dladm_overlay_parse_prop(overlay_prop_type_t type, void *buf, uint32_t *sizep,
+ const char *val)
+{
+ int ret;
+ int64_t ival;
+ uint64_t uval;
+ char *eptr;
+ struct in6_addr ipv6;
+ struct in_addr ip;
+
+ switch (type) {
+ case OVERLAY_PROP_T_INT:
+ errno = 0;
+ ival = strtol(val, &eptr, 10);
+ if ((ival == 0 && errno == EINVAL) ||
+ ((ival == LONG_MAX || ival == LONG_MIN) &&
+ errno == ERANGE))
+ return (DLADM_STATUS_BADARG);
+ bcopy(&ival, buf, sizeof (int64_t));
+ *sizep = sizeof (int64_t);
+ break;
+ case OVERLAY_PROP_T_UINT:
+ errno = 0;
+ uval = strtol(val, &eptr, 10);
+ if ((uval == 0 && errno == EINVAL) ||
+ (uval == ULONG_MAX && errno == ERANGE))
+ return (DLADM_STATUS_BADARG);
+ bcopy(&uval, buf, sizeof (uint64_t));
+ *sizep = sizeof (uint64_t);
+ break;
+ case OVERLAY_PROP_T_STRING:
+ ret = strlcpy((char *)buf, val, OVERLAY_PROP_SIZEMAX);
+ if (ret >= OVERLAY_PROP_SIZEMAX)
+ return (DLADM_STATUS_BADARG);
+ *sizep = ret + 1;
+ break;
+ case OVERLAY_PROP_T_IP:
+ /*
+ * Always try to parse the IP as an IPv6 address. If that fails,
+ * try to interpret it as an IPv4 address and transform it into
+ * an IPv6 mapped IPv4 address.
+ */
+ if (inet_pton(AF_INET6, val, &ipv6) != 1) {
+ if (inet_pton(AF_INET, val, &ip) != 1)
+ return (DLADM_STATUS_BADARG);
+
+ IN6_INADDR_TO_V4MAPPED(&ip, &ipv6);
+ }
+ bcopy(&ipv6, buf, sizeof (struct in6_addr));
+ *sizep = sizeof (struct in6_addr);
+ break;
+ default:
+ abort();
+ }
+
+ return (DLADM_STATUS_OK);
+}
+
+/* ARGSUSED */
+static dladm_status_t
+dladm_overlay_varpd_setprop(dladm_handle_t handle, varpd_client_handle_t *chdl,
+ uint64_t inst, const char *name, char *const *valp, uint_t cnt)
+{
+ int ret;
+ uint32_t size;
+ uint8_t buf[LIBVARPD_PROP_SIZEMAX];
+ varpd_client_prop_handle_t *phdl;
+ uint_t type;
+ dladm_status_t status;
+
+ if ((ret = libvarpd_c_prop_handle_alloc(chdl, inst, &phdl)) != 0)
+ return (dladm_errno2status(ret));
+
+ if ((ret = libvarpd_c_prop_info_fill_by_name(phdl, name)) != 0) {
+ libvarpd_c_prop_handle_free(phdl);
+ return (dladm_errno2status(ret));
+ }
+
+ if ((ret = libvarpd_c_prop_info(phdl, NULL, &type, NULL, NULL, NULL,
+ NULL)) != 0) {
+ libvarpd_c_prop_handle_free(phdl);
+ return (dladm_errno2status(ret));
+ }
+
+ if ((status = dladm_overlay_parse_prop(type, buf, &size, valp[0])) !=
+ DLADM_STATUS_OK) {
+ libvarpd_c_prop_handle_free(phdl);
+ return (status);
+ }
+
+ ret = libvarpd_c_prop_set(phdl, buf, size);
+ libvarpd_c_prop_handle_free(phdl);
+
+ return (dladm_errno2status(ret));
+}
+
+dladm_status_t
+dladm_overlay_setprop(dladm_handle_t handle, datalink_id_t linkid,
+ const char *name, char *const *valp, uint_t cnt)
+{
+ int ret;
+ dladm_status_t status;
+ overlay_ioc_propinfo_t info;
+ overlay_ioc_prop_t prop;
+
+ if (linkid == DATALINK_INVALID_LINKID ||
+ name == NULL || valp == NULL || cnt != 1)
+ return (DLADM_STATUS_BADARG);
+
+ bzero(&info, sizeof (overlay_ioc_propinfo_t));
+ info.oipi_linkid = linkid;
+ info.oipi_id = -1;
+ if (strlcpy(info.oipi_name, name, OVERLAY_PROP_NAMELEN) >=
+ OVERLAY_PROP_NAMELEN)
+ return (DLADM_STATUS_BADARG);
+
+ status = DLADM_STATUS_OK;
+ ret = ioctl(dladm_dld_fd(handle), OVERLAY_IOC_PROPINFO, &info);
+ if (ret != 0)
+ status = dladm_errno2status(errno);
+
+ if (status != DLADM_STATUS_OK)
+ return (status);
+
+ prop.oip_linkid = linkid;
+ prop.oip_id = info.oipi_id;
+ prop.oip_name[0] = '\0';
+ if ((ret = dladm_overlay_parse_prop(info.oipi_type, prop.oip_value,
+ &prop.oip_size, valp[0])) != DLADM_STATUS_OK)
+ return (ret);
+
+ status = DLADM_STATUS_OK;
+ ret = ioctl(dladm_dld_fd(handle), OVERLAY_IOC_SETPROP, &prop);
+ if (ret != 0)
+ status = dladm_errno2status(errno);
+
+ return (ret);
+}
+
+/*
+ * Tell the user about any unset required properties.
+ */
+static int
+dladm_overlay_activate_cb(dladm_handle_t handle, datalink_id_t linkid,
+ dladm_overlay_propinfo_handle_t phdl, void *arg)
+{
+ dladm_status_t status;
+ uint8_t buf[DLADM_OVERLAY_PROP_SIZEMAX];
+ uint_t prot;
+ size_t size = sizeof (buf);
+ const char *name;
+ dladm_errlist_t *errs = arg;
+
+ if ((status = dladm_overlay_prop_info(phdl, &name, NULL, &prot, NULL,
+ NULL, NULL)) != DLADM_STATUS_OK)
+ return (status);
+
+ if ((prot & OVERLAY_PROP_PERM_REQ) == 0)
+ return (DLADM_WALK_CONTINUE);
+
+ if (dladm_overlay_get_prop(handle, linkid, phdl, buf, &size) !=
+ DLADM_STATUS_OK)
+ return (DLADM_WALK_CONTINUE);
+
+ if (size == 0)
+ (void) dladm_errlist_append(errs, "unset required property: %s",
+ name);
+
+ return (DLADM_WALK_CONTINUE);
+}
+
+/*
+ * We need to clean up the world here. The problem is that we may or may not
+ * actually have everything created. While in the normal case, we'd always have
+ * an overlay device, assigned datalink id, and a varpd instance, we might not
+ * have any of those, except for the datalink instance. Therefore, as long as
+ * the id refers to a valid overlay, we should try to clean up as much of the
+ * state as possible and most importantly, we need to make sure we delete the
+ * datalink id. If we fail to do that, then that name will become lost to time.
+ */
+dladm_status_t
+dladm_overlay_delete(dladm_handle_t handle, datalink_id_t linkid)
+{
+ datalink_class_t class;
+ overlay_ioc_delete_t oid;
+ varpd_client_handle_t *chdl;
+ int ret;
+ uint32_t flags;
+ uint64_t varpdid;
+
+ if (dladm_datalink_id2info(handle, linkid, &flags, &class, NULL,
+ NULL, 0) != DLADM_STATUS_OK)
+ return (DLADM_STATUS_BADARG);
+
+ if (class != DATALINK_CLASS_OVERLAY)
+ return (DLADM_STATUS_BADARG);
+
+ oid.oid_linkid = linkid;
+ ret = ioctl(dladm_dld_fd(handle), OVERLAY_IOC_DELETE, &oid);
+ if (ret != 0 && errno != ENOENT) {
+ return (dladm_errno2status(errno));
+ }
+
+ if ((ret = libvarpd_c_create(&chdl, dladm_overlay_doorpath)) != 0) {
+ return (dladm_errno2status(ret));
+ }
+
+ if ((ret = libvarpd_c_instance_lookup(chdl, linkid, &varpdid)) != 0) {
+ if (ret == ENOENT) {
+ goto finish;
+ }
+ (void) libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+ }
+
+ ret = libvarpd_c_instance_destroy(chdl, varpdid);
+finish:
+ (void) libvarpd_c_destroy(chdl);
+ (void) dladm_destroy_datalink_id(handle, linkid, flags);
+
+ return (dladm_errno2status(ret));
+}
+
+dladm_status_t
+dladm_overlay_get_prop(dladm_handle_t handle, datalink_id_t linkid,
+ dladm_overlay_propinfo_handle_t infohdl, void *buf, size_t *sizep)
+{
+ int ret;
+ overlay_ioc_prop_t oip;
+ dladm_overlay_propinfo_t *infop = (dladm_overlay_propinfo_t *)infohdl;
+
+ /*
+ * It'd be nice if we had a better or more specific error for this. If
+ * this kind of error becomes common place, let's get a better dladm
+ * error.
+ */
+ if (*sizep < DLADM_OVERLAY_PROP_SIZEMAX)
+ return (dladm_errno2status(ERANGE));
+
+ if (infop->dop_isvarpd == B_FALSE) {
+ bzero(&oip, sizeof (overlay_ioc_prop_t));
+ oip.oip_linkid = linkid;
+ oip.oip_id = infop->dop_un.dop_overlay->oipi_id;
+ ret = ioctl(dladm_dld_fd(handle), OVERLAY_IOC_GETPROP, &oip);
+ if (ret != 0)
+ return (dladm_errno2status(errno));
+ bcopy(oip.oip_value, buf, DLADM_OVERLAY_PROP_SIZEMAX);
+ *sizep = oip.oip_size;
+ } else {
+ uint32_t size = *sizep;
+
+ ret = libvarpd_c_prop_get(infop->dop_un.dop_varpd, buf, &size);
+ if (ret != 0)
+ return (dladm_errno2status(errno));
+ *sizep = size;
+ }
+
+ return (DLADM_STATUS_OK);
+}
+
+static dladm_status_t
+dladm_overlay_walk_varpd_prop(dladm_handle_t handle, datalink_id_t linkid,
+ uint64_t varpdid, dladm_overlay_prop_f func, void *arg)
+{
+ int ret, i;
+ varpd_client_handle_t *chdl;
+ varpd_client_prop_handle_t *phdl;
+ uint_t nprops;
+ dladm_status_t status;
+
+ if ((ret = libvarpd_c_create(&chdl, dladm_overlay_doorpath)) != 0)
+ return (dladm_errno2status(ret));
+
+ if ((ret = libvarpd_c_prop_handle_alloc(chdl, varpdid, &phdl)) != 0) {
+ (void) libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+ }
+
+ if ((ret = libvarpd_c_prop_nprops(chdl, varpdid, &nprops)) != 0) {
+ libvarpd_c_prop_handle_free(phdl);
+ (void) libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+ }
+
+ status = DLADM_STATUS_OK;
+ for (i = 0; i < nprops; i++) {
+ dladm_overlay_propinfo_t dop;
+
+ bzero(&dop, sizeof (dop));
+ dop.dop_isvarpd = B_TRUE;
+ dop.dop_un.dop_varpd = phdl;
+
+ if ((ret = libvarpd_c_prop_info_fill(phdl, i)) != 0) {
+ status = dladm_errno2status(ret);
+ break;
+ }
+
+ ret = func(handle, linkid,
+ (dladm_overlay_propinfo_handle_t)&dop, arg);
+ if (ret == DLADM_WALK_TERMINATE)
+ break;
+ }
+
+ libvarpd_c_prop_handle_free(phdl);
+ libvarpd_c_destroy(chdl);
+
+ return (status);
+}
+
+dladm_status_t
+dladm_overlay_walk_prop(dladm_handle_t handle, datalink_id_t linkid,
+ dladm_overlay_prop_f func, void *arg, dladm_errlist_t *errs)
+{
+ int i, ret;
+ datalink_class_t class;
+ overlay_ioc_nprops_t oin;
+ overlay_ioc_propinfo_t oipi;
+ dladm_overlay_propinfo_t dop;
+ uint64_t varpdid = UINT64_MAX;
+
+ if (dladm_datalink_id2info(handle, linkid, NULL, &class, NULL,
+ NULL, 0) != DLADM_STATUS_OK)
+ return (DLADM_STATUS_BADARG);
+
+ if (class != DATALINK_CLASS_OVERLAY)
+ return (DLADM_STATUS_BADARG);
+
+ bzero(&oin, sizeof (overlay_ioc_nprops_t));
+ oin.oipn_linkid = linkid;
+ ret = ioctl(dladm_dld_fd(handle), OVERLAY_IOC_NPROPS, &oin);
+ if (ret != 0)
+ return (dladm_errno2status(errno));
+
+ for (i = 0; i < oin.oipn_nprops; i++) {
+ bzero(&dop, sizeof (dladm_overlay_propinfo_t));
+ bzero(&oipi, sizeof (overlay_ioc_propinfo_t));
+ oipi.oipi_linkid = linkid;
+ oipi.oipi_id = i;
+ ret = ioctl(dladm_dld_fd(handle), OVERLAY_IOC_PROPINFO, &oipi);
+ if (ret != 0) {
+ (void) dladm_errlist_append(errs, "failed to get "
+ "propinfo for property %d: %s", i, strerror(errno));
+ return (dladm_errno2status(errno));
+ }
+
+ dop.dop_isvarpd = B_FALSE;
+ dop.dop_un.dop_overlay = &oipi;
+ ret = func(handle, linkid,
+ (dladm_overlay_propinfo_handle_t)&dop, arg);
+ if (ret == DLADM_WALK_TERMINATE)
+ break;
+
+ if (strcmp(oipi.oipi_name, VARPD_PROPERTY_NAME) == 0) {
+ uint8_t buf[DLADM_OVERLAY_PROP_SIZEMAX];
+ size_t bufsize = sizeof (buf);
+ uint64_t *vp;
+
+ if (dladm_overlay_get_prop(handle, linkid,
+ (dladm_overlay_propinfo_handle_t)&dop, buf,
+ &bufsize) != DLADM_STATUS_OK)
+ continue;
+
+ /* LINTED: E_BAD_PTR_CAST_ALIGN */
+ vp = (uint64_t *)buf;
+ varpdid = *vp;
+ }
+ }
+
+ /* Should this really be possible? */
+ if (varpdid == UINT64_MAX)
+ return (DLADM_STATUS_OK);
+
+ return (dladm_overlay_walk_varpd_prop(handle, linkid, varpdid, func,
+ arg));
+}
+
+dladm_status_t
+dladm_overlay_create(dladm_handle_t handle, const char *name,
+ const char *encap, const char *search, uint64_t vid,
+ dladm_arg_list_t *props, dladm_errlist_t *errs, uint32_t flags)
+{
+ int ret, i;
+ dladm_status_t status;
+ datalink_id_t linkid;
+ overlay_ioc_create_t oic;
+ overlay_ioc_activate_t oia;
+ size_t slen;
+ varpd_client_handle_t *vch;
+ uint64_t id;
+
+ status = dladm_create_datalink_id(handle, name, DATALINK_CLASS_OVERLAY,
+ DL_ETHER, flags, &linkid);
+ if (status != DLADM_STATUS_OK)
+ return (status);
+
+ bzero(&oic, sizeof (oic));
+ oic.oic_linkid = linkid;
+ oic.oic_vnetid = vid;
+ (void) strlcpy(oic.oic_encap, encap, MAXLINKNAMELEN);
+
+ status = DLADM_STATUS_OK;
+ ret = ioctl(dladm_dld_fd(handle), OVERLAY_IOC_CREATE, &oic);
+ if (ret != 0) {
+ /*
+ * It'd be nice if we had private errors so we could better
+ * distinguish between different classes of errors.
+ */
+ status = dladm_errno2status(errno);
+ }
+
+ if (status != DLADM_STATUS_OK) {
+ (void) dladm_destroy_datalink_id(handle, linkid, flags);
+ return (status);
+ }
+
+ slen = strlen(search);
+ for (i = 0; props != NULL && i < props->al_count; i++) {
+ dladm_arg_info_t *aip = &props->al_info[i];
+
+ /*
+ * If it's a property for the search plugin, eg. it has the
+ * prefix '<search>/', then we don't set the property on the
+ * overlay device and instead set it on the varpd instance.
+ */
+ if (strncmp(aip->ai_name, search, slen) == 0 &&
+ aip->ai_name[slen] == '/')
+ continue;
+ status = dladm_overlay_setprop(handle, linkid, aip->ai_name,
+ aip->ai_val, aip->ai_count);
+ if (status != DLADM_STATUS_OK) {
+ (void) dladm_errlist_append(errs,
+ "failed to set property %s",
+ aip->ai_name);
+ (void) dladm_overlay_delete(handle, linkid);
+ return (status);
+ }
+ }
+
+ if ((ret = libvarpd_c_create(&vch, dladm_overlay_doorpath)) != 0) {
+ (void) dladm_errlist_append(errs,
+ "failed to create libvarpd handle: %s", strerror(ret));
+ (void) dladm_overlay_delete(handle, linkid);
+ return (dladm_errno2status(ret));
+ }
+
+ if ((ret = libvarpd_c_instance_create(vch, linkid, search,
+ &id)) != 0) {
+ (void) dladm_errlist_append(errs,
+ "failed to create varpd instance: %s", strerror(ret));
+ libvarpd_c_destroy(vch);
+ (void) dladm_overlay_delete(handle, linkid);
+ return (dladm_errno2status(ret));
+ }
+
+ for (i = 0; props != NULL && i < props->al_count; i++) {
+ dladm_arg_info_t *aip = &props->al_info[i];
+
+ /*
+ * Skip arguments we've processed already.
+ */
+ if (strncmp(aip->ai_name, search, slen) != 0)
+ continue;
+
+ if (aip->ai_name[slen] != '/')
+ continue;
+
+ ret = dladm_overlay_varpd_setprop(handle, vch, id, aip->ai_name,
+ aip->ai_val, aip->ai_count);
+ if (ret != 0) {
+ (void) dladm_errlist_append(errs,
+ "failed to set varpd prop: %s\n",
+ aip->ai_name);
+ (void) libvarpd_c_instance_destroy(vch, id);
+ libvarpd_c_destroy(vch);
+ (void) dladm_overlay_delete(handle, linkid);
+ return (dladm_errno2status(ret));
+ }
+ }
+
+ if ((ret = libvarpd_c_instance_activate(vch, id)) != 0) {
+ (void) dladm_errlist_append(errs,
+ "failed to activate varpd instance: %s", strerror(ret));
+ (void) dladm_overlay_walk_varpd_prop(handle, linkid, id,
+ dladm_overlay_activate_cb, errs);
+ (void) libvarpd_c_instance_destroy(vch, id);
+ libvarpd_c_destroy(vch);
+ (void) dladm_overlay_delete(handle, linkid);
+ return (dladm_errno2status(ret));
+
+ }
+
+ bzero(&oia, sizeof (oia));
+ oia.oia_linkid = linkid;
+ status = DLADM_STATUS_OK;
+ ret = ioctl(dladm_dld_fd(handle), OVERLAY_IOC_ACTIVATE, &oia);
+ if (ret != 0) {
+ ret = errno;
+ (void) dladm_errlist_append(errs, "failed to activate "
+ "device: %s", strerror(ret));
+ (void) libvarpd_c_instance_destroy(vch, id);
+ (void) dladm_overlay_walk_prop(handle, linkid,
+ dladm_overlay_activate_cb, errs, errs);
+ status = dladm_errno2status(ret);
+ (void) libvarpd_c_instance_destroy(vch, id);
+ }
+
+ libvarpd_c_destroy(vch);
+ if (status != DLADM_STATUS_OK)
+ (void) dladm_overlay_delete(handle, linkid);
+
+ return (status);
+}
+
+
+
+typedef struct overlay_walk_cb {
+ dladm_handle_t owc_handle;
+ datalink_id_t owc_linkid;
+ void *owc_arg;
+ dladm_overlay_cache_f owc_func;
+ uint_t owc_mode;
+ uint_t owc_dest;
+} overlay_walk_cb_t;
+
+/* ARGSUSED */
+static int
+dladm_overlay_walk_cache_cb(varpd_client_handle_t *chdl, uint64_t varpdid,
+ const struct ether_addr *key, const varpd_client_cache_entry_t *entry,
+ void *arg)
+{
+ overlay_walk_cb_t *owc = arg;
+ dladm_overlay_point_t point;
+
+ bzero(&point, sizeof (dladm_overlay_point_t));
+ point.dop_dest = owc->owc_dest;
+ point.dop_mac = entry->vcp_mac;
+ point.dop_flags = entry->vcp_flags;
+ point.dop_ip = entry->vcp_ip;
+ point.dop_port = entry->vcp_port;
+
+ if (owc->owc_mode == OVERLAY_TARGET_POINT)
+ point.dop_flags |= DLADM_OVERLAY_F_DEFAULT;
+
+ if (owc->owc_func(owc->owc_handle, owc->owc_linkid, key, &point,
+ owc->owc_arg) == DLADM_WALK_TERMINATE)
+ return (1);
+ return (0);
+}
+
+dladm_status_t
+dladm_overlay_walk_cache(dladm_handle_t handle, datalink_id_t linkid,
+ dladm_overlay_cache_f func, void *arg)
+{
+ int ret;
+ uint_t mode, dest;
+ uint64_t varpdid;
+ varpd_client_handle_t *chdl;
+ overlay_walk_cb_t cbarg;
+
+ if ((ret = libvarpd_c_create(&chdl, dladm_overlay_doorpath)) != 0)
+ return (dladm_errno2status(ret));
+
+ if ((ret = libvarpd_c_instance_lookup(chdl, linkid, &varpdid)) != 0) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+ }
+
+ if ((ret = libvarpd_c_instance_target_mode(chdl, varpdid,
+ &dest, &mode)) != 0) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+ }
+
+ cbarg.owc_handle = handle;
+ cbarg.owc_linkid = linkid;
+ cbarg.owc_arg = arg;
+ cbarg.owc_func = func;
+ cbarg.owc_dest = dest;
+ cbarg.owc_mode = mode;
+ ret = libvarpd_c_instance_cache_walk(chdl, varpdid,
+ dladm_overlay_walk_cache_cb, &cbarg);
+ libvarpd_c_destroy(chdl);
+
+ return (dladm_errno2status(ret));
+}
+
+/* ARGSUSED */
+dladm_status_t
+dladm_overlay_cache_flush(dladm_handle_t handle, datalink_id_t linkid)
+{
+ int ret;
+ uint64_t varpdid;
+ varpd_client_handle_t *chdl;
+
+ if ((ret = libvarpd_c_create(&chdl, dladm_overlay_doorpath)) != 0)
+ return (dladm_errno2status(ret));
+
+ if ((ret = libvarpd_c_instance_lookup(chdl, linkid, &varpdid)) != 0) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+ }
+
+ ret = libvarpd_c_instance_cache_flush(chdl, varpdid);
+ libvarpd_c_destroy(chdl);
+
+ return (dladm_errno2status(ret));
+}
+
+/* ARGSUSED */
+dladm_status_t
+dladm_overlay_cache_delete(dladm_handle_t handle, datalink_id_t linkid,
+ const struct ether_addr *key)
+{
+ int ret;
+ uint64_t varpdid;
+ varpd_client_handle_t *chdl;
+
+ if ((ret = libvarpd_c_create(&chdl, dladm_overlay_doorpath)) != 0)
+ return (dladm_errno2status(ret));
+
+ if ((ret = libvarpd_c_instance_lookup(chdl, linkid, &varpdid)) != 0) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+ }
+
+ ret = libvarpd_c_instance_cache_delete(chdl, varpdid, key);
+ libvarpd_c_destroy(chdl);
+
+ return (dladm_errno2status(ret));
+}
+
+/* ARGSUSED */
+dladm_status_t
+dladm_overlay_cache_set(dladm_handle_t handle, datalink_id_t linkid,
+ const struct ether_addr *key, char *val)
+{
+ int ret;
+ uint_t dest;
+ uint64_t varpdid;
+ char *ip, *port = NULL;
+ varpd_client_handle_t *chdl;
+ varpd_client_cache_entry_t vcp;
+
+
+ if ((ret = libvarpd_c_create(&chdl, dladm_overlay_doorpath)) != 0)
+ return (dladm_errno2status(ret));
+
+ if ((ret = libvarpd_c_instance_lookup(chdl, linkid, &varpdid)) != 0) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+ }
+
+ if ((ret = libvarpd_c_instance_target_mode(chdl, varpdid,
+ &dest, NULL)) != 0) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+ }
+
+ /*
+ * Mode tells us what we should expect in val. It we have more than one
+ * thing listed, the canonical format of it right now is mac,ip:port.
+ */
+ bzero(&vcp, sizeof (varpd_client_cache_entry_t));
+
+ if (strcasecmp(val, "drop") == 0) {
+ vcp.vcp_flags = OVERLAY_TARGET_CACHE_DROP;
+ goto send;
+ }
+
+ if (dest & OVERLAY_PLUGIN_D_ETHERNET) {
+ if (ether_aton_r(val, &vcp.vcp_mac) == NULL) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(EINVAL));
+ }
+ }
+
+ if (dest & OVERLAY_PLUGIN_D_IP) {
+ if (dest & OVERLAY_PLUGIN_D_ETHERNET) {
+ if ((ip = strchr(val, ',')) == NULL) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+ }
+ ip++;
+ } else {
+ ip = val;
+ }
+
+ if (dest & OVERLAY_PLUGIN_D_PORT) {
+ if ((port = strchr(val, ':')) == NULL) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+ }
+ *port = '\0';
+ port++;
+ }
+
+ /* Try v6, then fall back to v4 */
+ ret = inet_pton(AF_INET6, ip, &vcp.vcp_ip);
+ if (ret == -1)
+ abort();
+ if (ret == 0) {
+ struct in_addr v4;
+
+ ret = inet_pton(AF_INET, ip, &v4);
+ if (ret == -1)
+ abort();
+ if (ret == 0) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+ }
+ IN6_INADDR_TO_V4MAPPED(&v4, &vcp.vcp_ip);
+ }
+ }
+
+ if (dest & OVERLAY_PLUGIN_D_PORT) {
+ char *eptr;
+ unsigned long l;
+ if (port == NULL && (dest & OVERLAY_PLUGIN_D_ETHERNET)) {
+ if ((port = strchr(val, ',')) == NULL) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(EINVAL));
+ }
+ } else if (port == NULL)
+ port = val;
+
+ errno = 0;
+ l = strtoul(port, &eptr, 10);
+ if (errno != 0 || *eptr != '\0') {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(EINVAL));
+ }
+ if (l == 0 || l > UINT16_MAX) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(EINVAL));
+ }
+ vcp.vcp_port = l;
+ }
+
+send:
+ ret = libvarpd_c_instance_cache_set(chdl, varpdid, key, &vcp);
+
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+}
+
+/* ARGSUSED */
+dladm_status_t
+dladm_overlay_cache_get(dladm_handle_t handle, datalink_id_t linkid,
+ const struct ether_addr *key, dladm_overlay_point_t *point)
+{
+ int ret;
+ uint_t dest, mode;
+ uint64_t varpdid;
+ varpd_client_handle_t *chdl;
+ varpd_client_cache_entry_t entry;
+
+ if ((ret = libvarpd_c_create(&chdl, dladm_overlay_doorpath)) != 0)
+ return (dladm_errno2status(ret));
+
+ if ((ret = libvarpd_c_instance_lookup(chdl, linkid, &varpdid)) != 0) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+ }
+
+ if ((ret = libvarpd_c_instance_target_mode(chdl, varpdid,
+ &dest, &mode)) != 0) {
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+ }
+
+ ret = libvarpd_c_instance_cache_get(chdl, varpdid, key, &entry);
+ if (ret == 0) {
+ point->dop_dest = dest;
+ point->dop_mac = entry.vcp_mac;
+ point->dop_flags = entry.vcp_flags;
+ point->dop_ip = entry.vcp_ip;
+ point->dop_port = entry.vcp_port;
+ if (mode == OVERLAY_TARGET_POINT)
+ point->dop_flags |= DLADM_OVERLAY_F_DEFAULT;
+ }
+
+ libvarpd_c_destroy(chdl);
+ return (dladm_errno2status(ret));
+}
+
+dladm_status_t
+dladm_overlay_status(dladm_handle_t handle, datalink_id_t linkid,
+ dladm_overlay_status_f func, void *arg)
+{
+ int ret;
+ dladm_status_t status;
+ overlay_ioc_status_t ois;
+ dladm_overlay_status_t dos;
+
+ ois.ois_linkid = linkid;
+ status = DLADM_STATUS_OK;
+ ret = ioctl(dladm_dld_fd(handle), OVERLAY_IOC_STATUS, &ois);
+ if (ret != 0)
+ status = dladm_errno2status(errno);
+ if (status != DLADM_STATUS_OK)
+ return (status);
+
+ dos.dos_degraded = ois.ois_status == OVERLAY_I_DEGRADED ? B_TRUE :
+ B_FALSE;
+ (void) strlcpy(dos.dos_fmamsg, ois.ois_message,
+ sizeof (dos.dos_fmamsg));
+ func(handle, linkid, &dos, arg);
+ return (DLADM_STATUS_OK);
+}
diff --git a/usr/src/lib/libdladm/common/libdloverlay.h b/usr/src/lib/libdladm/common/libdloverlay.h
new file mode 100644
index 0000000000..39b01ccae3
--- /dev/null
+++ b/usr/src/lib/libdladm/common/libdloverlay.h
@@ -0,0 +1,107 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2015 Joyent, Inc.
+ */
+
+#ifndef _LIBDLOVERLAY_H
+#define _LIBDLOVERLAY_H
+
+/*
+ * libdladm Overlay device routines
+ */
+
+#include <libdladm.h>
+#include <libdladm_impl.h>
+#include <sys/overlay.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define DLADM_OVERLAY_F_DROP 0x0001
+#define DLADM_OVERLAY_F_DEFAULT 0xf000
+
+typedef struct dladm_overlay_point {
+ uint_t dop_dest;
+ struct ether_addr dop_mac;
+ uint16_t dop_flags;
+ struct in6_addr dop_ip;
+ uint16_t dop_port;
+} dladm_overlay_point_t;
+
+typedef struct dladm_overlay_status {
+ boolean_t dos_degraded;
+ char dos_fmamsg[256];
+} dladm_overlay_status_t;
+
+extern dladm_status_t dladm_overlay_create(dladm_handle_t, const char *,
+ const char *, const char *, uint64_t, dladm_arg_list_t *, dladm_errlist_t *,
+ uint32_t);
+extern dladm_status_t dladm_overlay_delete(dladm_handle_t, datalink_id_t);
+
+typedef void (*dladm_overlay_status_f)(dladm_handle_t, datalink_id_t,
+ dladm_overlay_status_t *, void *);
+extern dladm_status_t dladm_overlay_status(dladm_handle_t, datalink_id_t,
+ dladm_overlay_status_f, void *);
+
+extern dladm_status_t dladm_overlay_cache_flush(dladm_handle_t, datalink_id_t);
+extern dladm_status_t dladm_overlay_cache_delete(dladm_handle_t, datalink_id_t,
+ const struct ether_addr *);
+extern dladm_status_t dladm_overlay_cache_set(dladm_handle_t, datalink_id_t,
+ const struct ether_addr *, char *);
+extern dladm_status_t dladm_overlay_cache_get(dladm_handle_t, datalink_id_t,
+ const struct ether_addr *, dladm_overlay_point_t *);
+
+#define DLADM_OVERLAY_PROP_SIZEMAX 256
+#define DLADM_OVERLAY_PROP_NAMELEN 32
+
+typedef struct __dladm_overlay_propinfo *dladm_overlay_propinfo_handle_t;
+
+extern dladm_status_t dladm_overlay_prop_info(dladm_overlay_propinfo_handle_t,
+ const char **, uint_t *, uint_t *, const void **, uint32_t *,
+ const mac_propval_range_t **);
+extern dladm_status_t dladm_overlay_get_prop(dladm_handle_t, datalink_id_t,
+ dladm_overlay_propinfo_handle_t, void *buf, size_t *bufsize);
+
+typedef int (*dladm_overlay_prop_f)(dladm_handle_t, datalink_id_t,
+ dladm_overlay_propinfo_handle_t, void *);
+extern dladm_status_t dladm_overlay_walk_prop(dladm_handle_t, datalink_id_t,
+ dladm_overlay_prop_f, void *arg, dladm_errlist_t *);
+
+typedef int (*dladm_overlay_cache_f)(dladm_handle_t, datalink_id_t,
+ const struct ether_addr *, const dladm_overlay_point_t *, void *);
+extern dladm_status_t dladm_overlay_walk_cache(dladm_handle_t, datalink_id_t,
+ dladm_overlay_cache_f, void *);
+
+/*
+ * Some day we'll want to support being able to set properties after creation.
+ * If we do, the following strawman API might serve us well.
+ *
+ * extern dladm_status_t dladm_overlay_prop_lookup(dladm_handle_t,
+ * datalink_id_t, const char *, dladm_overlay_propinfo_handle_t *);
+ * extern void dladm_overlay_prop_handle_free(dladm_handle_t, datalink_id_t,
+ * dladm_overlay_propinfo_handle_t *);
+ * extern dladm_status_t dladm_overlay_set_prop(dladm_handle_t, datalink_id_t,
+ * dladm_propinfo_handle_t, void *buf, size_t *bufsize);
+ * extern dladm_status_t dladm_overlay_str_to_buf(dladm_handle_t, datalink_id_t,
+ * dladm_overlay_propinfo_handle_t *, const char *, void *, size_t *);
+ * extern dladm_status_t dladm_overlay_buf_to_str(dladm_handle_t, datalink_id_t,
+ * dladm_overlay_propinfo_handle_t *, const void *, const size_t, char *,
+ * size_t *);
+ */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBDLOVERLAY_H */
diff --git a/usr/src/lib/libdladm/common/libdlvlan.c b/usr/src/lib/libdladm/common/libdlvlan.c
index 943728dc03..34c1e6682d 100644
--- a/usr/src/lib/libdladm/common/libdlvlan.c
+++ b/usr/src/lib/libdladm/common/libdlvlan.c
@@ -64,7 +64,7 @@ dladm_vlan_create(dladm_handle_t handle, const char *vlan, datalink_id_t linkid,
{
return (dladm_vnic_create(handle, vlan, linkid,
VNIC_MAC_ADDR_TYPE_PRIMARY, NULL, 0, NULL, 0, vid, VRRP_VRID_NONE,
- AF_UNSPEC, vlan_id_out, proplist, flags | DLADM_OPT_VLAN));
+ AF_UNSPEC, vlan_id_out, proplist, NULL, flags | DLADM_OPT_VLAN));
}
/*
diff --git a/usr/src/lib/libdladm/common/libdlvnic.c b/usr/src/lib/libdladm/common/libdlvnic.c
index 44f8bb2726..47d007a1e2 100644
--- a/usr/src/lib/libdladm/common/libdlvnic.c
+++ b/usr/src/lib/libdladm/common/libdlvnic.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015, Joyent Inc.
*/
#include <stdio.h>
@@ -399,7 +400,7 @@ dladm_vnic_create(dladm_handle_t handle, const char *vnic, datalink_id_t linkid,
vnic_mac_addr_type_t mac_addr_type, uchar_t *mac_addr, uint_t mac_len,
int *mac_slot, uint_t mac_prefix_len, uint16_t vid, vrid_t vrid,
int af, datalink_id_t *vnic_id_out, dladm_arg_list_t *proplist,
- uint32_t flags)
+ dladm_errlist_t *errs, uint32_t flags)
{
dladm_vnic_attr_t attr;
datalink_id_t vnic_id;
@@ -539,27 +540,40 @@ dladm_vnic_create(dladm_handle_t handle, const char *vnic, datalink_id_t linkid,
vnic_created = B_TRUE;
/* Save vnic configuration and its properties */
- if (!(flags & DLADM_OPT_PERSIST))
- goto done;
+ if (flags & DLADM_OPT_PERSIST) {
+ status = dladm_vnic_persist_conf(handle, name, &attr, class);
+ if (status == DLADM_STATUS_OK)
+ conf_set = B_TRUE;
+ }
- status = dladm_vnic_persist_conf(handle, name, &attr, class);
- if (status != DLADM_STATUS_OK)
- goto done;
- conf_set = B_TRUE;
+done:
+ if (status == DLADM_STATUS_OK && proplist != NULL) {
+ uint32_t flg;
+
+ flg = flags & (DLADM_OPT_PERSIST | DLADM_OPT_ACTIVE);
- if (proplist != NULL) {
for (i = 0; i < proplist->al_count; i++) {
dladm_arg_info_t *aip = &proplist->al_info[i];
+ if (strcmp(aip->ai_name, "zone") == 0 &&
+ flags & DLADM_OPT_TRANSIENT)
+ flg |= DLADM_OPT_TRANSIENT;
+ else
+ flg &= ~DLADM_OPT_TRANSIENT;
+
status = dladm_set_linkprop(handle, vnic_id,
- aip->ai_name, aip->ai_val, aip->ai_count,
- DLADM_OPT_PERSIST);
- if (status != DLADM_STATUS_OK)
+ aip->ai_name, aip->ai_val, aip->ai_count, flg);
+ if (status != DLADM_STATUS_OK) {
+ char errmsg[DLADM_STRSIZE];
+ (void) dladm_errlist_append(errs,
+ "failed to set property %s: %s",
+ aip->ai_name,
+ dladm_status2str(status, errmsg));
break;
+ }
}
}
-done:
if (status != DLADM_STATUS_OK) {
if (conf_set)
(void) dladm_remove_conf(handle, vnic_id);
diff --git a/usr/src/lib/libdladm/common/libdlvnic.h b/usr/src/lib/libdladm/common/libdlvnic.h
index 94b656aadf..839b2de9f2 100644
--- a/usr/src/lib/libdladm/common/libdlvnic.h
+++ b/usr/src/lib/libdladm/common/libdlvnic.h
@@ -55,7 +55,8 @@ typedef struct dladm_vnic_attr {
extern dladm_status_t dladm_vnic_create(dladm_handle_t, const char *,
datalink_id_t, vnic_mac_addr_type_t, uchar_t *,
uint_t, int *, uint_t, uint16_t, vrid_t, int,
- datalink_id_t *, dladm_arg_list_t *, uint32_t);
+ datalink_id_t *, dladm_arg_list_t *,
+ dladm_errlist_t *, uint32_t);
extern dladm_status_t dladm_vnic_delete(dladm_handle_t, datalink_id_t,
uint32_t);
diff --git a/usr/src/lib/libdladm/common/linkprop.c b/usr/src/lib/libdladm/common/linkprop.c
index 171e23dc2c..5fdb082472 100644
--- a/usr/src/lib/libdladm/common/linkprop.c
+++ b/usr/src/lib/libdladm/common/linkprop.c
@@ -154,11 +154,13 @@ static pd_getf_t get_zone, get_autopush, get_rate_mod, get_rate,
get_bridge_pvid, get_protection, get_rxrings,
get_txrings, get_cntavail, get_secondary_macs,
get_allowedips, get_allowedcids, get_pool,
- get_rings_range, get_linkmode_prop;
+ get_rings_range, get_linkmode_prop,
+ get_promisc_filtered;
static pd_setf_t set_zone, set_rate, set_powermode, set_radio,
set_public_prop, set_resource, set_stp_prop,
- set_bridge_forward, set_bridge_pvid, set_secondary_macs;
+ set_bridge_forward, set_bridge_pvid, set_secondary_macs,
+ set_promisc_filtered;
static pd_checkf_t check_zone, check_autopush, check_rate, check_hoplimit,
check_encaplim, check_uint32, check_maxbw, check_cpus,
@@ -382,6 +384,8 @@ static link_attr_t link_attr[] = {
{ MAC_PROP_IB_LINKMODE, sizeof (uint32_t), "linkmode"},
+ { MAC_PROP_VN_PROMISC_FILTERED, sizeof (boolean_t), "promisc-filtered"},
+
{ MAC_PROP_SECONDARY_ADDRS, sizeof (mac_secondary_addr_t),
"secondary-macs"},
@@ -439,6 +443,11 @@ static val_desc_t link_protect_vals[] = {
{ "dhcp-nospoof", MPT_DHCPNOSPOOF },
};
+static val_desc_t link_promisc_filtered_vals[] = {
+ { "off", B_FALSE },
+ { "on", B_TRUE },
+};
+
static val_desc_t dladm_wlan_radio_vals[] = {
{ "on", DLADM_WLAN_RADIO_ON },
{ "off", DLADM_WLAN_RADIO_OFF }
@@ -755,6 +764,12 @@ static prop_desc_t prop_table[] = {
set_resource, NULL, get_protection, check_prop, 0,
DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE },
+ { "promisc-filtered", { "on", 1 },
+ link_promisc_filtered_vals, VALCNT(link_promisc_filtered_vals),
+ set_promisc_filtered, NULL, get_promisc_filtered, check_prop, 0,
+ DATALINK_CLASS_VNIC, DATALINK_ANY_MEDIATYPE },
+
+
{ "allowed-ips", { "--", 0 },
NULL, 0, set_resource, NULL,
get_allowedips, check_allowedips, PD_CHECK_ALLOC,
@@ -1586,6 +1601,9 @@ set_zone(dladm_handle_t handle, prop_desc_t *pdp, datalink_id_t linkid,
if (zid_new == zid_old)
return (DLADM_STATUS_OK);
+ if (flags & DLADM_OPT_TRANSIENT)
+ dzp->diz_transient = B_TRUE;
+
if ((status = set_public_prop(handle, pdp, linkid, vdp, val_cnt,
flags, media)) != DLADM_STATUS_OK)
return (status);
@@ -4827,3 +4845,50 @@ get_linkmode_prop(dladm_handle_t handle, prop_desc_t *pdp,
*val_cnt = 1;
return (DLADM_STATUS_OK);
}
+
+/*ARGSUSED*/
+static dladm_status_t
+get_promisc_filtered(dladm_handle_t handle, prop_desc_t *pdp,
+ datalink_id_t linkid, char **prop_val, uint_t *val_cnt,
+ datalink_media_t media, uint_t flags, uint_t *perm_flags)
+{
+ char *s;
+ dladm_status_t status;
+ boolean_t filt;
+
+ status = i_dladm_get_public_prop(handle, linkid, pdp->pd_name, flags,
+ perm_flags, &filt, sizeof (filt));
+ if (status != DLADM_STATUS_OK)
+ return (status);
+
+ if (filt != 0)
+ s = link_promisc_filtered_vals[1].vd_name;
+ else
+ s = link_promisc_filtered_vals[0].vd_name;
+ (void) snprintf(prop_val[0], DLADM_STRSIZE, "%s", s);
+
+ *val_cnt = 1;
+ return (DLADM_STATUS_OK);
+}
+
+/* ARGSUSED */
+static dladm_status_t
+set_promisc_filtered(dladm_handle_t handle, prop_desc_t *pdp,
+ datalink_id_t linkid, val_desc_t *vdp, uint_t val_cnt, uint_t flags,
+ datalink_media_t media)
+{
+ dld_ioc_macprop_t *dip;
+ dladm_status_t status = DLADM_STATUS_OK;
+
+ dip = i_dladm_buf_alloc_by_name(0, linkid, pdp->pd_name,
+ 0, &status);
+
+ if (dip == NULL)
+ return (status);
+
+ (void) memcpy(dip->pr_val, &vdp->vd_val, dip->pr_valsize);
+ status = i_dladm_macprop(handle, dip, B_TRUE);
+
+ free(dip);
+ return (status);
+}
diff --git a/usr/src/lib/libdladm/common/llib-ldladm b/usr/src/lib/libdladm/common/llib-ldladm
index 8e5eac0614..e5366fb92d 100644
--- a/usr/src/lib/libdladm/common/llib-ldladm
+++ b/usr/src/lib/libdladm/common/llib-ldladm
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
/*LINTLIBRARY*/
@@ -38,3 +39,4 @@
#include <libdlether.h>
#include <libdlsim.h>
#include <libdlbridge.h>
+#include <libdloverlay.h>
diff --git a/usr/src/lib/libdladm/common/mapfile-vers b/usr/src/lib/libdladm/common/mapfile-vers
index 63a86529fc..589bbf5330 100644
--- a/usr/src/lib/libdladm/common/mapfile-vers
+++ b/usr/src/lib/libdladm/common/mapfile-vers
@@ -20,6 +20,7 @@
#
#
# Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2015 Joyent, Inc.
#
#
@@ -134,6 +135,7 @@ SYMBOL_VERSION SUNWprivate_1.1 {
dladm_remap_datalink_id;
dladm_up_datalink_id;
dladm_name2info;
+ dladm_zname2info;
dladm_datalink_id2info;
dladm_walk_datalink_id;
dladm_create_conf;
@@ -269,6 +271,23 @@ SYMBOL_VERSION SUNWprivate_1.1 {
dladm_strs2range;
dladm_range2list;
dladm_list2range;
+
+ dladm_errlist_init;
+ dladm_errlist_reset;
+ dladm_errlist_append;
+
+ dladm_overlay_create;
+ dladm_overlay_delete;
+ dladm_overlay_status;
+ dladm_overlay_prop_info;
+ dladm_overlay_get_prop;
+ dladm_overlay_walk_prop;
+
+ dladm_overlay_cache_set;
+ dladm_overlay_cache_get;
+ dladm_overlay_cache_delete;
+ dladm_overlay_cache_flush;
+ dladm_overlay_walk_cache;
local:
*;
};
diff --git a/usr/src/lib/libdlpi/common/libdlpi.c b/usr/src/lib/libdlpi/common/libdlpi.c
index 1a639a5db9..aa97afee04 100644
--- a/usr/src/lib/libdlpi/common/libdlpi.c
+++ b/usr/src/lib/libdlpi/common/libdlpi.c
@@ -22,6 +22,9 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2014, Joyent, Inc.
+ */
/*
* Data-Link Provider Interface (Version 2)
@@ -51,7 +54,7 @@
#include "libdlpi_impl.h"
-static int i_dlpi_open(const char *, int *, uint_t, boolean_t);
+static int i_dlpi_open(const char *, const char *, int *, uint_t, boolean_t);
static int i_dlpi_style1_open(dlpi_impl_t *);
static int i_dlpi_style2_open(dlpi_impl_t *);
static int i_dlpi_checkstyle(dlpi_impl_t *, t_uscalar_t);
@@ -130,7 +133,8 @@ dlpi_walk(dlpi_walkfunc_t *fn, void *arg, uint_t flags)
}
int
-dlpi_open(const char *linkname, dlpi_handle_t *dhp, uint_t flags)
+dlpi_open_zone(const char *linkname, const char *zonename, dlpi_handle_t *dhp,
+ uint_t flags)
{
int retval, on = 1;
ifspec_t ifsp;
@@ -164,6 +168,16 @@ dlpi_open(const char *linkname, dlpi_handle_t *dhp, uint_t flags)
if (getenv("DLPI_DEVONLY") != NULL)
dip->dli_oflags |= DLPI_DEVONLY;
+ if (zonename == NULL) {
+ dip->dli_zonename[0] = '\0';
+ } else {
+ if (strlcpy(dip->dli_zonename, zonename,
+ sizeof (dip->dli_zonename)) >= sizeof (dip->dli_zonename)) {
+ free(dip);
+ return (DLPI_EZONENAMEINVAL);
+ }
+ }
+
/* Copy linkname provided to the function. */
if (strlcpy(dip->dli_linkname, linkname, sizeof (dip->dli_linkname)) >=
sizeof (dip->dli_linkname)) {
@@ -237,6 +251,12 @@ dlpi_open(const char *linkname, dlpi_handle_t *dhp, uint_t flags)
return (DLPI_SUCCESS);
}
+int
+dlpi_open(const char *linkname, dlpi_handle_t *dhp, uint_t flags)
+{
+ return (dlpi_open_zone(linkname, NULL, dhp, flags));
+}
+
void
dlpi_close(dlpi_handle_t dh)
{
@@ -1013,6 +1033,15 @@ dlpi_iftype(uint_t dlpitype)
* /dev - if DLPI_DEVONLY is specified, or if there is no
* data-link with the specified name (could be /dev/ip)
*
+ * If a zone's name has been specified, eg. via dlpi_open_zone, then we instead
+ * will check in:
+ *
+ * /dev/ipnet/zone/%z/ - if DLPI_DEVIPNET is specified
+ * /dev/net/zone/%z/ - if a data-link with the specified name exists.
+ *
+ * When a zone name is specified, all of the fallback procedures that we opt for
+ * in the normal case are not used.
+ *
* In particular, if DLPI_DEVIPNET is not specified, this function is used to
* open a data-link node, or "/dev/ip" node. It is usually be called firstly
* with style1 being B_TRUE, and if that fails and the return value is not
@@ -1040,7 +1069,8 @@ dlpi_iftype(uint_t dlpitype)
* the second style-2 open attempt.
*/
static int
-i_dlpi_open(const char *provider, int *fd, uint_t flags, boolean_t style1)
+i_dlpi_open(const char *provider, const char *zonename, int *fd, uint_t flags,
+ boolean_t style1)
{
char path[MAXPATHLEN];
int oflags;
@@ -1051,7 +1081,13 @@ i_dlpi_open(const char *provider, int *fd, uint_t flags, boolean_t style1)
oflags |= O_EXCL;
if (flags & DLPI_DEVIPNET) {
- (void) snprintf(path, sizeof (path), "/dev/ipnet/%s", provider);
+ if (*zonename != '\0') {
+ (void) snprintf(path, sizeof (path),
+ "/dev/ipnet/zone/%s/%s", zonename, provider);
+ } else {
+ (void) snprintf(path, sizeof (path), "/dev/ipnet/%s",
+ provider);
+ }
if ((*fd = open(path, oflags)) != -1)
return (DLPI_SUCCESS);
else
@@ -1070,7 +1106,13 @@ i_dlpi_open(const char *provider, int *fd, uint_t flags, boolean_t style1)
if (dlpi_parselink(provider, driver, &ppa) != DLPI_SUCCESS)
goto fallback;
- (void) snprintf(path, sizeof (path), "/dev/net/%s", provider);
+ if (*zonename != '\0') {
+ (void) snprintf(path, sizeof (path),
+ "/dev/net/zone/%s/%s", zonename, provider);
+ } else {
+ (void) snprintf(path, sizeof (path), "/dev/net/%s",
+ provider);
+ }
if ((*fd = open(path, oflags)) != -1)
return (DLPI_SUCCESS);
@@ -1130,7 +1172,8 @@ i_dlpi_style1_open(dlpi_impl_t *dip)
int retval, save_errno;
int fd;
- retval = i_dlpi_open(dip->dli_linkname, &fd, dip->dli_oflags, B_TRUE);
+ retval = i_dlpi_open(dip->dli_linkname, dip->dli_zonename, &fd,
+ dip->dli_oflags, B_TRUE);
if (retval != DLPI_SUCCESS)
return (retval);
dip->dli_fd = fd;
@@ -1153,7 +1196,8 @@ i_dlpi_style2_open(dlpi_impl_t *dip)
int fd;
int retval, save_errno;
- retval = i_dlpi_open(dip->dli_provider, &fd, dip->dli_oflags, B_FALSE);
+ retval = i_dlpi_open(dip->dli_provider, dip->dli_zonename, &fd,
+ dip->dli_oflags, B_FALSE);
if (retval != DLPI_SUCCESS)
return (retval);
dip->dli_fd = fd;
@@ -1571,7 +1615,8 @@ static const char *libdlpi_errlist[] = {
/* DLPI_ENOTENOTSUP */
"invalid DLPI notification type", /* DLPI_ENOTEINVAL */
"invalid DLPI notification id", /* DLPI_ENOTEIDINVAL */
- "DLPI_IPNETINFO not supported" /* DLPI_EIPNETINFONOTSUP */
+ "DLPI_IPNETINFO not supported", /* DLPI_EIPNETINFONOTSUP */
+ "invalid zone name" /* DLPI_EZONENAMEINVAL */
};
const char *
diff --git a/usr/src/lib/libdlpi/common/libdlpi.h b/usr/src/lib/libdlpi/common/libdlpi.h
index 993ac1b7a4..364413ee3a 100644
--- a/usr/src/lib/libdlpi/common/libdlpi.h
+++ b/usr/src/lib/libdlpi/common/libdlpi.h
@@ -93,6 +93,7 @@ enum {
DLPI_ENOTENOTSUP, /* DLPI notification not supported by link */
DLPI_ENOTEIDINVAL, /* invalid DLPI notification id */
DLPI_EIPNETINFONOTSUP, /* DLPI_IPNETINFO not supported */
+ DLPI_EZONENAMEINVAL, /* invalid zone name */
DLPI_ERRMAX /* Highest + 1 libdlpi error code */
};
@@ -184,6 +185,7 @@ typedef boolean_t dlpi_walkfunc_t(const char *, void *);
extern void dlpi_walk(dlpi_walkfunc_t *, void *, uint_t);
extern int dlpi_open(const char *, dlpi_handle_t *, uint_t);
+extern int dlpi_open_zone(const char *, const char *, dlpi_handle_t *, uint_t);
extern void dlpi_close(dlpi_handle_t);
extern int dlpi_info(dlpi_handle_t, dlpi_info_t *, uint_t);
extern int dlpi_bind(dlpi_handle_t, uint_t, uint_t *);
diff --git a/usr/src/lib/libdlpi/common/libdlpi_impl.h b/usr/src/lib/libdlpi/common/libdlpi_impl.h
index 70708ff5af..8969cce7cb 100644
--- a/usr/src/lib/libdlpi/common/libdlpi_impl.h
+++ b/usr/src/lib/libdlpi/common/libdlpi_impl.h
@@ -28,6 +28,7 @@
#include <libdlpi.h>
#include <sys/sysmacros.h>
+#include <sys/zone.h>
#ifdef __cplusplus
extern "C" {
@@ -112,6 +113,8 @@ typedef struct dlpi_impl_s {
/* full linkname including PPA */
char dli_provider[DLPI_LINKNAME_MAX];
/* only provider name */
+ char dli_zonename[ZONENAME_MAX];
+ /* optionally specified zone */
t_uscalar_t dli_style; /* style 1 or 2 */
uint_t dli_saplen; /* bound SAP length */
uint_t dli_sap; /* bound SAP value */
diff --git a/usr/src/lib/libdlpi/common/mapfile-vers b/usr/src/lib/libdlpi/common/mapfile-vers
index ed3231dc92..c818e5e660 100644
--- a/usr/src/lib/libdlpi/common/mapfile-vers
+++ b/usr/src/lib/libdlpi/common/mapfile-vers
@@ -67,6 +67,11 @@ SYMBOL_VERSION SUNW_1.1 { # first release of libdlpi, Solaris 11
SYMBOL_VERSION SUNWprivate {
global:
+ #
+ # dlpi_open_zone should be moved to a new public section once it is
+ # upstreamed into illumos-gate .
+ #
+ dlpi_open_zone;
dlpi_parselink;
dlpi_makelink;
dlpi_style;
diff --git a/usr/src/lib/libdns_sd/java/com/apple/dnssd/DNSSDRecordRegistrar.java b/usr/src/lib/libdns_sd/java/com/apple/dnssd/DNSSDRecordRegistrar.java
index 62c5330840..569c51dd3a 100644
--- a/usr/src/lib/libdns_sd/java/com/apple/dnssd/DNSSDRecordRegistrar.java
+++ b/usr/src/lib/libdns_sd/java/com/apple/dnssd/DNSSDRecordRegistrar.java
@@ -5,9 +5,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -23,7 +23,7 @@ package com.apple.dnssd;
public interface DNSSDRecordRegistrar extends DNSSDService
{
- /** Register an independent {@link DNSRecord}.<P>
+ /** Register an independent {@link DNSRecord}.<P>
@param flags
Possible values are SHARED or UNIQUE (see flag type definitions for details).
<P>
@@ -40,7 +40,7 @@ public interface DNSSDRecordRegistrar extends DNSSDService
as defined in nameser.h.
<P>
@param rrclass
- The class of the resource record, as defined in nameser.h
+ The class of the resource record, as defined in nameser.h
(usually 1 for the Internet class).
<P>
@param rdata
@@ -54,8 +54,8 @@ public interface DNSSDRecordRegistrar extends DNSSDService
@throws SecurityException If a security manager is present and denies <tt>RuntimePermission("getDNSSDInstance")</tt>.
@see RuntimePermission
*/
- public DNSRecord registerRecord( int flags, int ifIndex, String fullname, int rrtype,
+ public DNSRecord registerRecord( int flags, int ifIndex, String fullname, int rrtype,
int rrclass, byte[] rdata, int ttl)
throws DNSSDException;
-}
+}
diff --git a/usr/src/lib/libdtrace/Makefile.com b/usr/src/lib/libdtrace/Makefile.com
index f43915ec06..9f3002320a 100644
--- a/usr/src/lib/libdtrace/Makefile.com
+++ b/usr/src/lib/libdtrace/Makefile.com
@@ -86,6 +86,7 @@ DLIBSRCS += \
io.d \
ip.d \
iscsit.d \
+ mac.d \
net.d \
nfs.d \
nfssrv.d \
@@ -98,7 +99,8 @@ DLIBSRCS += \
sysevent.d \
tcp.d \
udp.d \
- unistd.d
+ unistd.d \
+ vnd.d
include ../../Makefile.lib
@@ -111,6 +113,7 @@ CLEANFILES += dt_lex.c dt_grammar.c dt_grammar.h y.output
CLEANFILES += ../common/procfs.sed ../common/procfs.d
CLEANFILES += ../common/io.sed ../common/io.d
CLEANFILES += ../common/ip.sed ../common/ip.d
+CLEANFILES += ../common/mac.sed ../common/mac.d
CLEANFILES += ../common/net.sed ../common/net.d
CLEANFILES += ../common/errno.d ../common/signal.d
CLEANFILES += ../common/dt_errtags.c ../common/dt_names.c
@@ -132,7 +135,7 @@ CERRWARN += -_gcc=-Wno-uninitialized
CERRWARN += -_gcc=-Wno-switch
YYCFLAGS =
-LDLIBS += -lgen -lproc -lrtld_db -lnsl -lsocket -lctf -lelf -lc
+LDLIBS += -lgen -lproc -lrtld_db -lnsl -lsocket -lctf -lelf -lc -lzonecfg
DRTILDLIBS = $(LDLIBS.lib) -lc
LIBDAUDITLIBS = $(LDLIBS.lib) -lmapmalloc -lc -lproc
@@ -204,6 +207,9 @@ pics/dt_lex.o pics/dt_grammar.o := CCVERBOSE =
../common/ip.d: ../common/ip.sed ../common/ip.d.in
sed -f ../common/ip.sed < ../common/ip.d.in > $@
+../common/mac.d: ../common/mac.sed ../common/mac.d.in
+ sed -f ../common/mac.sed < ../common/mac.d.in > $@
+
../common/net.d: ../common/net.sed ../common/net.d.in
sed -f ../common/net.sed < ../common/net.d.in > $@
diff --git a/usr/src/lib/libdtrace/common/dt_decl.c b/usr/src/lib/libdtrace/common/dt_decl.c
index bbb561d027..c9bd469ddb 100644
--- a/usr/src/lib/libdtrace/common/dt_decl.c
+++ b/usr/src/lib/libdtrace/common/dt_decl.c
@@ -22,7 +22,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
- * Copyright (c) 2013 Joyent, Inc. All rights reserved.
+ * Copyright (c) 2015 Joyent, Inc. All rights reserved.
*/
#include <strings.h>
@@ -652,7 +652,7 @@ dt_decl_member(dt_node_t *dnp)
}
if (ctf_add_member(dsp->ds_ctfp, dsp->ds_type,
- ident, dtt.dtt_type) == CTF_ERR) {
+ ident, dtt.dtt_type, ULONG_MAX) == CTF_ERR) {
xyerror(D_UNKNOWN, "failed to define member '%s': %s\n",
idname, ctf_errmsg(ctf_errno(dsp->ds_ctfp)));
}
diff --git a/usr/src/lib/libdtrace/common/dt_impl.h b/usr/src/lib/libdtrace/common/dt_impl.h
index c6ed8ceaac..2d824ba640 100644
--- a/usr/src/lib/libdtrace/common/dt_impl.h
+++ b/usr/src/lib/libdtrace/common/dt_impl.h
@@ -267,6 +267,7 @@ struct dtrace_hdl {
uint_t dt_droptags; /* boolean: set via -xdroptags */
uint_t dt_active; /* boolean: set once tracing is active */
uint_t dt_stopped; /* boolean: set once tracing is stopped */
+ uint_t dt_optset; /* boolean: set once options have been set */
processorid_t dt_beganon; /* CPU that executed BEGIN probe (if any) */
processorid_t dt_endedon; /* CPU that executed END probe (if any) */
uint_t dt_oflags; /* dtrace open-time options (see dtrace.h) */
diff --git a/usr/src/lib/libdtrace/common/dt_module.c b/usr/src/lib/libdtrace/common/dt_module.c
index 0288f329da..4c9a7ce66b 100644
--- a/usr/src/lib/libdtrace/common/dt_module.c
+++ b/usr/src/lib/libdtrace/common/dt_module.c
@@ -1372,6 +1372,77 @@ dtrace_lookup_by_addr(dtrace_hdl_t *dtp, GElf_Addr addr,
return (0);
}
+/*
+ * We've been asked to look up something inside a pid related module and it has
+ * been qualified with a library name. In that case, we may have to split this
+ * up into the library and the type itself, which will be separated by an '`'
+ * character. This is complicated further by the fact that the keyword for a
+ * struct, union, or enum, will precede the library. Hence we may have something
+ * that looks like "struct libsocket.so.1`msghdr" in name and we need to
+ * transform that into "libsocket.so.1" and "struct msghdr".
+ */
+int
+dtrace_lookup_fixup_pidtype(const char *name, char **libp, char **typep)
+{
+ int len, i;
+ char *split = NULL, *lib, *buf;
+ char *base;
+ char *keywords[] = { "struct ", "union ", "enum ", NULL };
+
+ if (name == NULL)
+ return (-1);
+
+ *libp = NULL;
+ *typep = NULL;
+ buf = strdup(name);
+ if (buf == NULL)
+ return (-1);
+
+ i = 0;
+ lib = buf;
+ while (keywords[i] != NULL) {
+ base = keywords[i];
+ len = strlen(base);
+ if (strncmp(name, base, len) == 0) {
+ lib += len;
+ break;
+ }
+ i++;
+ }
+
+ split = strchr(buf, '`');
+ assert(split != NULL);
+ *split = '\0';
+ split++;
+ if (lib == buf) {
+ *libp = strdup(buf);
+ *typep = strdup(split);
+ if (*libp == NULL || *typep == NULL)
+ goto err;
+ free(buf);
+ return (0);
+ } else {
+ assert(len > 0);
+ assert(base != NULL);
+
+ *libp = strdup(lib);
+ if (*libp == NULL)
+ goto err;
+ if (asprintf(typep, "%s%s", base, split) == -1)
+ goto err;
+ free(buf);
+ return (0);
+ }
+
+err:
+ free(buf);
+ free(*libp);
+ *libp = NULL;
+ free(*typep);
+ *typep = NULL;
+ return (-1);
+}
+
int
dtrace_lookup_by_type(dtrace_hdl_t *dtp, const char *object, const char *name,
dtrace_typeinfo_t *tip)
@@ -1383,7 +1454,6 @@ dtrace_lookup_by_type(dtrace_hdl_t *dtp, const char *object, const char *name,
uint_t n, i;
int justone;
ctf_file_t *fp;
- char *buf, *p, *q;
uint_t mask = 0; /* mask of dt_module flags to match */
uint_t bits = 0; /* flag bits that must be present */
@@ -1437,19 +1507,19 @@ dtrace_lookup_by_type(dtrace_hdl_t *dtp, const char *object, const char *name,
id = ctf_lookup_by_name(dmp->dm_ctfp, name);
fp = dmp->dm_ctfp;
} else {
- if ((p = strchr(name, '`')) != NULL) {
- buf = strdup(name);
- if (buf == NULL)
+ dt_dprintf("Trying to find userland type: %s\n", name);
+ if (strchr(name, '`') != NULL) {
+ char *lib, *type;
+ if (dtrace_lookup_fixup_pidtype(name, &lib,
+ &type) != 0) {
return (dt_set_errno(dtp, EDT_NOMEM));
- p = strchr(buf, '`');
- if ((q = strchr(p + 1, '`')) != NULL)
- p = q;
- *p = '\0';
- fp = dt_module_getctflib(dtp, dmp, buf);
+ }
+ fp = dt_module_getctflib(dtp, dmp, lib);
if (fp == NULL || (id = ctf_lookup_by_name(fp,
- p + 1)) == CTF_ERR)
+ type)) == CTF_ERR)
id = CTF_ERR;
- free(buf);
+ free(lib);
+ free(type);
} else {
for (i = 0; i < dmp->dm_nctflibs; i++) {
fp = dmp->dm_libctfp[i];
diff --git a/usr/src/lib/libdtrace/common/dt_open.c b/usr/src/lib/libdtrace/common/dt_open.c
index ffb2fe9a8f..74a9cccfef 100644
--- a/usr/src/lib/libdtrace/common/dt_open.c
+++ b/usr/src/lib/libdtrace/common/dt_open.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
@@ -40,6 +40,7 @@
#include <fcntl.h>
#include <errno.h>
#include <assert.h>
+#include <zone.h>
#define _POSIX_PTHREAD_SEMANTICS
#include <dirent.h>
@@ -683,8 +684,8 @@ const dtrace_pattr_t _dtrace_prvdesc = {
{ DTRACE_STABILITY_UNSTABLE, DTRACE_STABILITY_UNSTABLE, DTRACE_CLASS_COMMON },
};
-const char *_dtrace_defcpp = "/usr/ccs/lib/cpp"; /* default cpp(1) to invoke */
-const char *_dtrace_defld = "/usr/ccs/bin/ld"; /* default ld(1) to invoke */
+const char *_dtrace_defcpp = "/usr/lib/cpp"; /* default cpp(1) to invoke */
+const char *_dtrace_defld = "/usr/bin/ld"; /* default ld(1) to invoke */
const char *_dtrace_libdir = "/usr/lib/dtrace"; /* default library directory */
const char *_dtrace_provdir = "/dev/dtrace/provider"; /* provider directory */
@@ -824,6 +825,8 @@ dt_vopen(int version, int flags, int *errp,
dt_provmod_t *provmod = NULL;
int i, err;
struct rlimit rl;
+ const char *zroot;
+ char *libpath = NULL;
const dt_intrinsic_t *dinp;
const dt_typedef_t *dtyp;
@@ -956,11 +959,19 @@ alloc:
dtp->dt_provs = calloc(dtp->dt_provbuckets, sizeof (dt_provider_t *));
dt_proc_init(dtp);
dtp->dt_vmax = DT_VERS_LATEST;
- dtp->dt_cpp_path = strdup(_dtrace_defcpp);
+ zroot = zone_get_nroot();
+ if (zroot != NULL) {
+ (void) asprintf(&dtp->dt_ld_path, "%s/%s", zroot,
+ _dtrace_defld);
+ (void) asprintf(&dtp->dt_cpp_path, "%s/%s", zroot,
+ _dtrace_defcpp);
+ } else {
+ dtp->dt_ld_path = strdup(_dtrace_defld);
+ dtp->dt_cpp_path = strdup(_dtrace_defcpp);
+ }
dtp->dt_cpp_argv = malloc(sizeof (char *));
dtp->dt_cpp_argc = 1;
dtp->dt_cpp_args = 1;
- dtp->dt_ld_path = strdup(_dtrace_defld);
dtp->dt_provmod = provmod;
dtp->dt_vector = vector;
dtp->dt_varg = arg;
@@ -1134,13 +1145,13 @@ alloc:
* Add intrinsic pointer types that are needed to initialize printf
* format dictionary types (see table in dt_printf.c).
*/
- (void) ctf_add_pointer(dmp->dm_ctfp, CTF_ADD_ROOT,
+ (void) ctf_add_pointer(dmp->dm_ctfp, CTF_ADD_ROOT, NULL,
ctf_lookup_by_name(dmp->dm_ctfp, "void"));
- (void) ctf_add_pointer(dmp->dm_ctfp, CTF_ADD_ROOT,
+ (void) ctf_add_pointer(dmp->dm_ctfp, CTF_ADD_ROOT, NULL,
ctf_lookup_by_name(dmp->dm_ctfp, "char"));
- (void) ctf_add_pointer(dmp->dm_ctfp, CTF_ADD_ROOT,
+ (void) ctf_add_pointer(dmp->dm_ctfp, CTF_ADD_ROOT, NULL,
ctf_lookup_by_name(dmp->dm_ctfp, "int"));
if (ctf_update(dmp->dm_ctfp) != 0) {
@@ -1200,11 +1211,11 @@ alloc:
ctc.ctc_argc = 0;
ctc.ctc_flags = 0;
- dtp->dt_type_func = ctf_add_function(dmp->dm_ctfp,
+ dtp->dt_type_func = ctf_add_funcptr(dmp->dm_ctfp,
CTF_ADD_ROOT, &ctc, NULL);
- dtp->dt_type_fptr = ctf_add_pointer(dmp->dm_ctfp,
- CTF_ADD_ROOT, dtp->dt_type_func);
+ dtp->dt_type_fptr = ctf_add_pointer(dmp->dm_ctfp, CTF_ADD_ROOT, NULL,
+ dtp->dt_type_func);
/*
* We also insert CTF definitions for the special D intrinsic types
@@ -1305,9 +1316,15 @@ alloc:
* compile, and to provide better error reporting (because the full
* reporting of compiler errors requires dtrace_open() to succeed).
*/
- if (dtrace_setopt(dtp, "libdir", _dtrace_libdir) != 0)
+ if (zroot != NULL)
+ (void) asprintf(&libpath, "%s/%s", zroot, _dtrace_libdir);
+ if (dtrace_setopt(dtp, "libdir",
+ libpath != NULL ? libpath : _dtrace_libdir) != 0)
return (set_open_errno(dtp, errp, dtp->dt_errno));
+ if (libpath != NULL)
+ free(libpath);
+
return (dtp);
}
diff --git a/usr/src/lib/libdtrace/common/dt_options.c b/usr/src/lib/libdtrace/common/dt_options.c
index 201b50a177..be985d6dab 100644
--- a/usr/src/lib/libdtrace/common/dt_options.c
+++ b/usr/src/lib/libdtrace/common/dt_options.c
@@ -41,6 +41,8 @@
#include <alloca.h>
#include <errno.h>
#include <fcntl.h>
+#include <zone.h>
+#include <libzonecfg.h>
#include <dt_impl.h>
#include <dt_string.h>
@@ -854,6 +856,44 @@ dt_opt_bufresize(dtrace_hdl_t *dtp, const char *arg, uintptr_t option)
return (0);
}
+/*ARGSUSED*/
+static int
+dt_opt_zone(dtrace_hdl_t *dtp, const char *arg, uintptr_t option)
+{
+ zoneid_t z, did;
+
+ if (arg == NULL)
+ return (dt_set_errno(dtp, EDT_BADOPTVAL));
+
+ /*
+ * If the specified zone is currently running, we'll query the kernel
+ * for its debugger ID. If it doesn't appear to be running, we'll look
+ * for it for among all installed zones (thereby allowing a zdefs
+ * enabling against a halted zone).
+ */
+ if ((z = getzoneidbyname(arg)) != -1) {
+ if (zone_getattr(z, ZONE_ATTR_DID, &did, sizeof (did)) < 0)
+ return (dt_set_errno(dtp, EDT_BADOPTVAL));
+ } else {
+ zone_dochandle_t handle;
+
+ if ((handle = zonecfg_init_handle()) == NULL)
+ return (dt_set_errno(dtp, errno));
+
+ if (zonecfg_get_handle(arg, handle) != Z_OK) {
+ zonecfg_fini_handle(handle);
+ return (dt_set_errno(dtp, EDT_BADOPTVAL));
+ }
+
+ did = zonecfg_get_did(handle);
+ zonecfg_fini_handle(handle);
+ }
+
+ dtp->dt_options[DTRACEOPT_ZONE] = did;
+
+ return (0);
+}
+
int
dt_options_load(dtrace_hdl_t *dtp)
{
@@ -988,6 +1028,7 @@ static const dt_option_t _dtrace_rtoptions[] = {
{ "statusrate", dt_opt_rate, DTRACEOPT_STATUSRATE },
{ "strsize", dt_opt_strsize, DTRACEOPT_STRSIZE },
{ "ustackframes", dt_opt_runtime, DTRACEOPT_USTACKFRAMES },
+ { "zone", dt_opt_zone, DTRACEOPT_ZONE },
{ "temporal", dt_opt_runtime, DTRACEOPT_TEMPORAL },
{ NULL }
};
@@ -1068,9 +1109,41 @@ dtrace_setopt(dtrace_hdl_t *dtp, const char *opt, const char *val)
if (dtp->dt_active)
return (dt_set_errno(dtp, EDT_ACTIVE));
+ /*
+ * If our options had been previously ioctl'd down,
+ * clear dt_optset to indicate that a run-time option
+ * has since been set.
+ */
+ dtp->dt_optset = B_FALSE;
+
return (op->o_func(dtp, val, op->o_option));
}
}
return (dt_set_errno(dtp, EDT_BADOPTNAME));
}
+
+int
+dtrace_setopts(dtrace_hdl_t *dtp)
+{
+ void *dof;
+ int err;
+
+ if (dtp->dt_optset)
+ return (0);
+
+ if ((dof = dtrace_getopt_dof(dtp)) == NULL)
+ return (-1); /* dt_errno has been set for us */
+
+ if ((err = dt_ioctl(dtp, DTRACEIOC_ENABLE, dof)) == -1)
+ (void) dt_set_errno(dtp, errno);
+
+ dtrace_dof_destroy(dtp, dof);
+
+ if (err == -1)
+ return (-1);
+
+ dtp->dt_optset = B_TRUE;
+
+ return (0);
+}
diff --git a/usr/src/lib/libdtrace/common/dt_parser.c b/usr/src/lib/libdtrace/common/dt_parser.c
index 753009f857..4e7c0c8869 100644
--- a/usr/src/lib/libdtrace/common/dt_parser.c
+++ b/usr/src/lib/libdtrace/common/dt_parser.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, Joyent Inc. All rights reserved.
+ * Copyright (c) 2015, Joyent Inc. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
*/
@@ -288,7 +288,7 @@ dt_type_pointer(dtrace_typeinfo_t *tip)
return (dt_set_errno(dtp, EDT_CTF));
}
- ptr = ctf_add_pointer(dmp->dm_ctfp, CTF_ADD_ROOT, type);
+ ptr = ctf_add_pointer(dmp->dm_ctfp, CTF_ADD_ROOT, NULL, type);
if (ptr == CTF_ERR || ctf_update(dmp->dm_ctfp) == CTF_ERR) {
dtp->dt_ctferr = ctf_errno(dmp->dm_ctfp);
diff --git a/usr/src/lib/libdtrace/common/dt_program.c b/usr/src/lib/libdtrace/common/dt_program.c
index 7d725bd0af..e4f9d8dd1c 100644
--- a/usr/src/lib/libdtrace/common/dt_program.c
+++ b/usr/src/lib/libdtrace/common/dt_program.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent, Inc. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
*/
@@ -154,6 +155,14 @@ dtrace_program_exec(dtrace_hdl_t *dtp, dtrace_prog_t *pgp,
void *dof;
int n, err;
+ /*
+ * If we have not yet ioctl'd down our options DOF, we'll do that
+ * before enabling any probes (some options will affect which probes
+ * we match).
+ */
+ if (dtrace_setopts(dtp) != 0)
+ return (-1);
+
dtrace_program_info(dtp, pgp, pip);
if ((dof = dtrace_dof_create(dtp, pgp, DTRACE_D_STRIP)) == NULL)
diff --git a/usr/src/lib/libdtrace/common/dt_work.c b/usr/src/lib/libdtrace/common/dt_work.c
index 97a7f62d69..c330394027 100644
--- a/usr/src/lib/libdtrace/common/dt_work.c
+++ b/usr/src/lib/libdtrace/common/dt_work.c
@@ -25,7 +25,9 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2011, Joyent, Inc. All rights reserved.
+ */
#include <dt_impl.h>
#include <stddef.h>
@@ -164,13 +166,22 @@ dtrace_status(dtrace_hdl_t *dtp)
int
dtrace_go(dtrace_hdl_t *dtp)
{
- void *dof;
- int err;
-
if (dtp->dt_active)
return (dt_set_errno(dtp, EINVAL));
/*
+ * In most cases, we will have already ioctl'd down our options DOF
+ * by this point -- but if a libdtrace does a dtrace_setopt() after
+ * calling dtrace_program_exec() but before calling dtrace_go(),
+ * dt_optset will be cleared and we need to ioctl down the options
+ * DOF now.
+ */
+ if (dtrace_setopts(dtp) != 0 &&
+ (dtp->dt_errno != ENOTTY || dtp->dt_vector == NULL)) {
+ return (-1);
+ }
+
+ /*
* If a dtrace:::ERROR program and callback are registered, enable the
* program before we start tracing. If this fails for a vector open
* with ENOTTY, we permit dtrace_go() to succeed so that vector clients
@@ -178,19 +189,10 @@ dtrace_go(dtrace_hdl_t *dtp)
* though they do not provide support for the DTRACEIOC_ENABLE ioctl.
*/
if (dtp->dt_errprog != NULL &&
- dtrace_program_exec(dtp, dtp->dt_errprog, NULL) == -1 && (
- dtp->dt_errno != ENOTTY || dtp->dt_vector == NULL))
- return (-1); /* dt_errno has been set for us */
-
- if ((dof = dtrace_getopt_dof(dtp)) == NULL)
+ dtrace_program_exec(dtp, dtp->dt_errprog, NULL) == -1 &&
+ (dtp->dt_errno != ENOTTY || dtp->dt_vector == NULL))
return (-1); /* dt_errno has been set for us */
- err = dt_ioctl(dtp, DTRACEIOC_ENABLE, dof);
- dtrace_dof_destroy(dtp, dof);
-
- if (err == -1 && (errno != ENOTTY || dtp->dt_vector == NULL))
- return (dt_set_errno(dtp, errno));
-
if (dt_ioctl(dtp, DTRACEIOC_GO, &dtp->dt_beganon) == -1) {
if (errno == EACCES)
return (dt_set_errno(dtp, EDT_DESTRUCTIVE));
diff --git a/usr/src/lib/libdtrace/common/dtrace.h b/usr/src/lib/libdtrace/common/dtrace.h
index 065d99f300..8c65ff8bec 100644
--- a/usr/src/lib/libdtrace/common/dtrace.h
+++ b/usr/src/lib/libdtrace/common/dtrace.h
@@ -82,6 +82,7 @@ extern const char *dtrace_subrstr(dtrace_hdl_t *, int);
extern int dtrace_setopt(dtrace_hdl_t *, const char *, const char *);
extern int dtrace_getopt(dtrace_hdl_t *, const char *, dtrace_optval_t *);
+extern int dtrace_setopts(dtrace_hdl_t *);
extern void dtrace_update(dtrace_hdl_t *);
extern int dtrace_ctlfd(dtrace_hdl_t *);
diff --git a/usr/src/lib/libdtrace/common/mac.d.in b/usr/src/lib/libdtrace/common/mac.d.in
new file mode 100644
index 0000000000..6263d51bdd
--- /dev/null
+++ b/usr/src/lib/libdtrace/common/mac.d.in
@@ -0,0 +1,66 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#pragma D depends_on library ip.d
+
+inline int ETHERTYPE_PUP = @ETHERTYPE_PUP@;
+inline int ETHERTYPE_802_MIN = @ETHERTYPE_802_MIN@;
+inline int ETHERTYPE_IP = @ETHERTYPE_IP@;
+inline int ETHERTYPE_ARP = @ETHERTYPE_ARP@;
+inline int ETHERTYPE_REVARP = @ETHERTYPE_REVARP@;
+inline int ETHERTYPE_AT = @ETHERTYPE_AT@;
+inline int ETHERTYPE_AARP = @ETHERTYPE_AARP@;
+inline int ETHERTYPE_VLAN = @ETHERTYPE_VLAN@;
+inline int ETHERTYPE_IPV6 = @ETHERTYPE_IPV6@;
+inline int ETHERTYPE_SLOW = @ETHERTYPE_SLOW@;
+inline int ETHERTYPE_PPPOED = @ETHERTYPE_PPPOED@;
+inline int ETHERTYPE_PPPOES = @ETHERTYPE_PPPOES@;
+inline int ETHERTYPE_EAPOL = @ETHERTYPE_EAPOL@;
+inline int ETHERTYPE_RSN_PREAUTH = @ETHERTYPE_RSN_PREAUTH@;
+inline int ETHERTYPE_TRILL = @ETHERTYPE_TRILL@;
+inline int ETHERTYPE_FCOE = @ETHERTYPE_FCOE@;
+inline int ETHERTYPE_MAX = @ETHERTYPE_MAX@;
+
+
+typedef struct etherinfo {
+ uint8_t eth_dst[6]; /* Destination MAC addr */
+ uint8_t eth_src[6]; /* Source MAC addr */
+ uint16_t eth_type; /* Ethertype */
+ boolean_t eth_istagged; /* Is the VLAN tag present */
+ uint8_t eth_priority; /* Priority tag */
+ uint8_t eth_dei; /* drop eligible indicator */
+ uint16_t eth_vlanid; /* VLAN ID */
+ uintptr_t eth_header; /* Pointer to start of header */
+ uintptr_t eth_mblk; /* Pointer to the mblk containing header */
+} etherinfo_t;
+
+#pragma D binding "1.12.1" translator
+translator etherinfo_t < mblk_t *mp > {
+ eth_dst = mp->b_rptr;
+ eth_src = mp->b_rptr + 6;
+ eth_type = ntohs(*(uint16_t *)(mp->b_rptr + 12)) == ETHERTYPE_VLAN ?
+ ntohs(*(uint16_t *)(mp->b_rptr + 16)) :
+ ntohs(*(uint16_t *)(mp->b_rptr + 12));
+ eth_istagged = ntohs(*(uint16_t *)(mp->b_rptr + 12)) == ETHERTYPE_VLAN ?
+ 1 : 0;
+ eth_priority = ntohs(*(uint16_t *)(mp->b_rptr + 12)) == ETHERTYPE_VLAN ?
+ ntohs(*(uint16_t *)(mp->b_rptr + 14)) & 0xe000: 0;
+ eth_dei = ntohs(*(uint16_t *)(mp->b_rptr + 12)) == ETHERTYPE_VLAN ?
+ ntohs(*(uint16_t *)(mp->b_rptr + 14)) & 0x1000: 0;
+ eth_vlanid = ntohs(*(uint16_t *)(mp->b_rptr + 12)) == ETHERTYPE_VLAN ?
+ ntohs(*(uint16_t *)(mp->b_rptr + 14)) & 0x0fff: 0;
+ eth_header = (uintptr_t)mp->b_rptr;
+ eth_mblk = (uintptr_t)mp;
+};
diff --git a/usr/src/lib/libdtrace/common/mac.sed.in b/usr/src/lib/libdtrace/common/mac.sed.in
new file mode 100644
index 0000000000..00e149d000
--- /dev/null
+++ b/usr/src/lib/libdtrace/common/mac.sed.in
@@ -0,0 +1,45 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+
+/*
+ * This file is a sed script which is first preprocessed by cpp or cc -E to
+ * define a set of sed directives which replace #define tokens with their
+ * values. After preprocessing, the sed script is run over vnd.d.in to
+ * replace the #define tokens listed below to create the finished vnd.d.
+ * Refer to the rules in libdtrace/Makefile.com for more information.
+ */
+
+#include <sys/ethernet.h>
+
+#define SED_REPLACE(x) s/#x/x/g
+
+SED_REPLACE(ETHERTYPE_PUP)
+SED_REPLACE(ETHERTYPE_802_MIN)
+SED_REPLACE(ETHERTYPE_IP)
+SED_REPLACE(ETHERTYPE_ARP)
+SED_REPLACE(ETHERTYPE_REVARP)
+SED_REPLACE(ETHERTYPE_AT)
+SED_REPLACE(ETHERTYPE_AARP)
+SED_REPLACE(ETHERTYPE_VLAN)
+SED_REPLACE(ETHERTYPE_IPV6)
+SED_REPLACE(ETHERTYPE_SLOW)
+SED_REPLACE(ETHERTYPE_PPPOED)
+SED_REPLACE(ETHERTYPE_PPPOES)
+SED_REPLACE(ETHERTYPE_EAPOL)
+SED_REPLACE(ETHERTYPE_RSN_PREAUTH)
+SED_REPLACE(ETHERTYPE_TRILL)
+SED_REPLACE(ETHERTYPE_FCOE)
+SED_REPLACE(ETHERTYPE_MAX)
diff --git a/usr/src/lib/libdtrace/common/vnd.d b/usr/src/lib/libdtrace/common/vnd.d
new file mode 100644
index 0000000000..356c412150
--- /dev/null
+++ b/usr/src/lib/libdtrace/common/vnd.d
@@ -0,0 +1,28 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+
+#pragma D depends_on module vnd
+#pragma D depends_on provider vnd
+#pragma D depends_on library ip.d
+#pragma D depends_on library mac.d
+
+#pragma D binding "1.6.3" translator
+translator ifinfo_t < vnd_str_t *vsp > {
+ if_name = vsp != NULL ? stringof(vsp->vns_dev->vdd_lname) : "<null>";
+ if_local = 0;
+ if_ipstack = vsp != NULL ? vsp->vns_nsd->vpnd_nsid : 0;
+ if_addr = (uintptr_t)vsp;
+};
diff --git a/usr/src/lib/libdwarf/Makefile b/usr/src/lib/libdwarf/Makefile
new file mode 100644
index 0000000000..6b7ff6244b
--- /dev/null
+++ b/usr/src/lib/libdwarf/Makefile
@@ -0,0 +1,40 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+include ../Makefile.lib
+
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+install := TARGET = install
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber lint install_h: $(SUBDIRS)
+
+install: install_h $(SUBDIRS)
+
+check: $(CHECKHDRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../Makefile.targ
diff --git a/usr/src/lib/libdwarf/Makefile.com b/usr/src/lib/libdwarf/Makefile.com
new file mode 100644
index 0000000000..c737366af9
--- /dev/null
+++ b/usr/src/lib/libdwarf/Makefile.com
@@ -0,0 +1,92 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+LIBRARY= libdwarf.a
+VERS= .1
+
+OBJECTS=dwarf_abbrev.o \
+ dwarf_addr_finder.o \
+ dwarf_alloc.o \
+ dwarf_arange.o \
+ dwarf_die_deliv.o \
+ dwarf_elf_access.o \
+ dwarf_error.o \
+ dwarf_form.o \
+ dwarf_frame.o \
+ dwarf_frame2.o \
+ dwarf_frame3.o \
+ dwarf_funcs.o \
+ dwarf_global.o \
+ dwarf_harmless.o \
+ dwarf_init_finish.o \
+ dwarf_leb.o \
+ dwarf_line.o \
+ dwarf_line2.o \
+ dwarf_loc.o \
+ dwarf_macro.o \
+ dwarf_names.o \
+ dwarf_original_elf_init.o \
+ dwarf_print_lines.o \
+ dwarf_pubtypes.o \
+ dwarf_query.o \
+ dwarf_ranges.o \
+ dwarf_sort_line.o \
+ dwarf_string.o \
+ dwarf_stubs.o \
+ dwarf_types.o \
+ dwarf_util.o \
+ dwarf_vars.o \
+ dwarf_weaks.o \
+ malloc_check.o \
+ pro_alloc.o \
+ pro_arange.o \
+ pro_die.o \
+ pro_encode_nm.o \
+ pro_error.o \
+ pro_expr.o \
+ pro_finish.o \
+ pro_forms.o \
+ pro_frame.o \
+ pro_funcs.o \
+ pro_init.o \
+ pro_line.o \
+ pro_macinfo.o \
+ pro_pubnames.o \
+ pro_reloc.o \
+ pro_reloc_stream.o \
+ pro_reloc_symbolic.o \
+ pro_section.o \
+ pro_types.o \
+ pro_vars.o \
+ pro_weaks.o
+
+include ../../Makefile.lib
+include ../../Makefile.rootfs
+
+LIBS = $(DYNLIB)
+LDLIBS += -lelf -lc
+
+SRCDIR = ../common
+CPPFLAGS += -I$(SRCDIR) -DELF_TARGET_ALL=1
+CERRWARN += -_gcc=-Wno-unused
+CERRWARN += -_gcc=-Wno-implicit-function-declaration
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include ../../Makefile.targ
diff --git a/usr/src/tools/ctf/dwarf/THIRDPARTYLICENSE b/usr/src/lib/libdwarf/THIRDPARTYLICENSE
index b9320c2d56..b9320c2d56 100644
--- a/usr/src/tools/ctf/dwarf/THIRDPARTYLICENSE
+++ b/usr/src/lib/libdwarf/THIRDPARTYLICENSE
diff --git a/usr/src/tools/ctf/dwarf/THIRDPARTYLICENSE.descrip b/usr/src/lib/libdwarf/THIRDPARTYLICENSE.descrip
index 73abaac973..73abaac973 100644
--- a/usr/src/tools/ctf/dwarf/THIRDPARTYLICENSE.descrip
+++ b/usr/src/lib/libdwarf/THIRDPARTYLICENSE.descrip
diff --git a/usr/src/lib/libdwarf/amd64/Makefile b/usr/src/lib/libdwarf/amd64/Makefile
new file mode 100644
index 0000000000..15a899f96f
--- /dev/null
+++ b/usr/src/lib/libdwarf/amd64/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+include ../Makefile.com
+include ../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64)
diff --git a/usr/src/tools/ctf/dwarf/common/cmplrs/dwarf_addr_finder.h b/usr/src/lib/libdwarf/common/cmplrs/dwarf_addr_finder.h
index 0eda6d1c44..0eda6d1c44 100644
--- a/usr/src/tools/ctf/dwarf/common/cmplrs/dwarf_addr_finder.h
+++ b/usr/src/lib/libdwarf/common/cmplrs/dwarf_addr_finder.h
diff --git a/usr/src/tools/ctf/dwarf/common/config.h b/usr/src/lib/libdwarf/common/config.h
index 42b286cfda..42b286cfda 100644
--- a/usr/src/tools/ctf/dwarf/common/config.h
+++ b/usr/src/lib/libdwarf/common/config.h
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf.h b/usr/src/lib/libdwarf/common/dwarf.h
index b064c4d86b..b064c4d86b 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf.h
+++ b/usr/src/lib/libdwarf/common/dwarf.h
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_abbrev.c b/usr/src/lib/libdwarf/common/dwarf_abbrev.c
index c2ae361f33..c2ae361f33 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_abbrev.c
+++ b/usr/src/lib/libdwarf/common/dwarf_abbrev.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_abbrev.h b/usr/src/lib/libdwarf/common/dwarf_abbrev.h
index b525924c83..b525924c83 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_abbrev.h
+++ b/usr/src/lib/libdwarf/common/dwarf_abbrev.h
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_addr_finder.c b/usr/src/lib/libdwarf/common/dwarf_addr_finder.c
index 2fadefc1ea..2fadefc1ea 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_addr_finder.c
+++ b/usr/src/lib/libdwarf/common/dwarf_addr_finder.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_alloc.c b/usr/src/lib/libdwarf/common/dwarf_alloc.c
index ddb423e841..ddb423e841 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_alloc.c
+++ b/usr/src/lib/libdwarf/common/dwarf_alloc.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_alloc.h b/usr/src/lib/libdwarf/common/dwarf_alloc.h
index 3a61c692c6..3a61c692c6 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_alloc.h
+++ b/usr/src/lib/libdwarf/common/dwarf_alloc.h
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_arange.c b/usr/src/lib/libdwarf/common/dwarf_arange.c
index e7ad8acc5e..e7ad8acc5e 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_arange.c
+++ b/usr/src/lib/libdwarf/common/dwarf_arange.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_arange.h b/usr/src/lib/libdwarf/common/dwarf_arange.h
index d6c537c452..d6c537c452 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_arange.h
+++ b/usr/src/lib/libdwarf/common/dwarf_arange.h
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_base_types.h b/usr/src/lib/libdwarf/common/dwarf_base_types.h
index 00e2700a81..00e2700a81 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_base_types.h
+++ b/usr/src/lib/libdwarf/common/dwarf_base_types.h
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_die_deliv.c b/usr/src/lib/libdwarf/common/dwarf_die_deliv.c
index 4ba9f2aded..4ba9f2aded 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_die_deliv.c
+++ b/usr/src/lib/libdwarf/common/dwarf_die_deliv.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_die_deliv.h b/usr/src/lib/libdwarf/common/dwarf_die_deliv.h
index f1ecb153ba..f1ecb153ba 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_die_deliv.h
+++ b/usr/src/lib/libdwarf/common/dwarf_die_deliv.h
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_elf_access.c b/usr/src/lib/libdwarf/common/dwarf_elf_access.c
index 6caa64a758..6caa64a758 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_elf_access.c
+++ b/usr/src/lib/libdwarf/common/dwarf_elf_access.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_elf_access.h b/usr/src/lib/libdwarf/common/dwarf_elf_access.h
index fd52c17938..fd52c17938 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_elf_access.h
+++ b/usr/src/lib/libdwarf/common/dwarf_elf_access.h
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_error.c b/usr/src/lib/libdwarf/common/dwarf_error.c
index 7327529820..7327529820 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_error.c
+++ b/usr/src/lib/libdwarf/common/dwarf_error.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_error.h b/usr/src/lib/libdwarf/common/dwarf_error.h
index 27acf70db0..27acf70db0 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_error.h
+++ b/usr/src/lib/libdwarf/common/dwarf_error.h
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_form.c b/usr/src/lib/libdwarf/common/dwarf_form.c
index fcdd64230c..fcdd64230c 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_form.c
+++ b/usr/src/lib/libdwarf/common/dwarf_form.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_frame.c b/usr/src/lib/libdwarf/common/dwarf_frame.c
index 3a825ee925..3a825ee925 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_frame.c
+++ b/usr/src/lib/libdwarf/common/dwarf_frame.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_frame.h b/usr/src/lib/libdwarf/common/dwarf_frame.h
index ceb686335b..ceb686335b 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_frame.h
+++ b/usr/src/lib/libdwarf/common/dwarf_frame.h
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_frame2.c b/usr/src/lib/libdwarf/common/dwarf_frame2.c
index 01b9ec497b..01b9ec497b 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_frame2.c
+++ b/usr/src/lib/libdwarf/common/dwarf_frame2.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_frame3.c b/usr/src/lib/libdwarf/common/dwarf_frame3.c
index 7bd8ec86d5..7bd8ec86d5 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_frame3.c
+++ b/usr/src/lib/libdwarf/common/dwarf_frame3.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_funcs.c b/usr/src/lib/libdwarf/common/dwarf_funcs.c
index 8d725ae33f..8d725ae33f 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_funcs.c
+++ b/usr/src/lib/libdwarf/common/dwarf_funcs.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_funcs.h b/usr/src/lib/libdwarf/common/dwarf_funcs.h
index bf91c32157..bf91c32157 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_funcs.h
+++ b/usr/src/lib/libdwarf/common/dwarf_funcs.h
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_global.c b/usr/src/lib/libdwarf/common/dwarf_global.c
index d1c090fa43..d1c090fa43 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_global.c
+++ b/usr/src/lib/libdwarf/common/dwarf_global.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_global.h b/usr/src/lib/libdwarf/common/dwarf_global.h
index c2bc2cdcc3..c2bc2cdcc3 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_global.h
+++ b/usr/src/lib/libdwarf/common/dwarf_global.h
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_harmless.c b/usr/src/lib/libdwarf/common/dwarf_harmless.c
index 16dbe4bc97..16dbe4bc97 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_harmless.c
+++ b/usr/src/lib/libdwarf/common/dwarf_harmless.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_harmless.h b/usr/src/lib/libdwarf/common/dwarf_harmless.h
index 3d4d910ce9..3d4d910ce9 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_harmless.h
+++ b/usr/src/lib/libdwarf/common/dwarf_harmless.h
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_incl.h b/usr/src/lib/libdwarf/common/dwarf_incl.h
index df2fbf334c..df2fbf334c 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_incl.h
+++ b/usr/src/lib/libdwarf/common/dwarf_incl.h
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_init_finish.c b/usr/src/lib/libdwarf/common/dwarf_init_finish.c
index 1ab9d5fd38..1ab9d5fd38 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_init_finish.c
+++ b/usr/src/lib/libdwarf/common/dwarf_init_finish.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_leb.c b/usr/src/lib/libdwarf/common/dwarf_leb.c
index b3b5d262f5..b3b5d262f5 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_leb.c
+++ b/usr/src/lib/libdwarf/common/dwarf_leb.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_line.c b/usr/src/lib/libdwarf/common/dwarf_line.c
index e7e15e7c1a..e7e15e7c1a 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_line.c
+++ b/usr/src/lib/libdwarf/common/dwarf_line.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_line.h b/usr/src/lib/libdwarf/common/dwarf_line.h
index 66d6062754..66d6062754 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_line.h
+++ b/usr/src/lib/libdwarf/common/dwarf_line.h
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_line2.c b/usr/src/lib/libdwarf/common/dwarf_line2.c
index 634b848167..634b848167 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_line2.c
+++ b/usr/src/lib/libdwarf/common/dwarf_line2.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_loc.c b/usr/src/lib/libdwarf/common/dwarf_loc.c
index f28b27b630..f28b27b630 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_loc.c
+++ b/usr/src/lib/libdwarf/common/dwarf_loc.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_loc.h b/usr/src/lib/libdwarf/common/dwarf_loc.h
index 685d199f29..685d199f29 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_loc.h
+++ b/usr/src/lib/libdwarf/common/dwarf_loc.h
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_macro.c b/usr/src/lib/libdwarf/common/dwarf_macro.c
index e1ff976d8c..e1ff976d8c 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_macro.c
+++ b/usr/src/lib/libdwarf/common/dwarf_macro.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_macro.h b/usr/src/lib/libdwarf/common/dwarf_macro.h
index 31ea2e6e67..31ea2e6e67 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_macro.h
+++ b/usr/src/lib/libdwarf/common/dwarf_macro.h
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_names.c b/usr/src/lib/libdwarf/common/dwarf_names.c
index 417e025690..417e025690 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_names.c
+++ b/usr/src/lib/libdwarf/common/dwarf_names.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_names.h b/usr/src/lib/libdwarf/common/dwarf_names.h
index 6edafa5fdd..6edafa5fdd 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_names.h
+++ b/usr/src/lib/libdwarf/common/dwarf_names.h
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_opaque.h b/usr/src/lib/libdwarf/common/dwarf_opaque.h
index b235a9c7b4..b235a9c7b4 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_opaque.h
+++ b/usr/src/lib/libdwarf/common/dwarf_opaque.h
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_original_elf_init.c b/usr/src/lib/libdwarf/common/dwarf_original_elf_init.c
index a6d943da0a..a6d943da0a 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_original_elf_init.c
+++ b/usr/src/lib/libdwarf/common/dwarf_original_elf_init.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_print_lines.c b/usr/src/lib/libdwarf/common/dwarf_print_lines.c
index 30c4889ee5..30c4889ee5 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_print_lines.c
+++ b/usr/src/lib/libdwarf/common/dwarf_print_lines.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_pubtypes.c b/usr/src/lib/libdwarf/common/dwarf_pubtypes.c
index 330c1c6adc..330c1c6adc 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_pubtypes.c
+++ b/usr/src/lib/libdwarf/common/dwarf_pubtypes.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_query.c b/usr/src/lib/libdwarf/common/dwarf_query.c
index 3f21abd039..3f21abd039 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_query.c
+++ b/usr/src/lib/libdwarf/common/dwarf_query.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_ranges.c b/usr/src/lib/libdwarf/common/dwarf_ranges.c
index ae6d5cf9b5..ae6d5cf9b5 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_ranges.c
+++ b/usr/src/lib/libdwarf/common/dwarf_ranges.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_sort_line.c b/usr/src/lib/libdwarf/common/dwarf_sort_line.c
index 3576614129..3576614129 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_sort_line.c
+++ b/usr/src/lib/libdwarf/common/dwarf_sort_line.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_string.c b/usr/src/lib/libdwarf/common/dwarf_string.c
index fafa5a097c..fafa5a097c 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_string.c
+++ b/usr/src/lib/libdwarf/common/dwarf_string.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_stubs.c b/usr/src/lib/libdwarf/common/dwarf_stubs.c
index f2c1f7fd45..f2c1f7fd45 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_stubs.c
+++ b/usr/src/lib/libdwarf/common/dwarf_stubs.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_types.c b/usr/src/lib/libdwarf/common/dwarf_types.c
index d547805289..d547805289 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_types.c
+++ b/usr/src/lib/libdwarf/common/dwarf_types.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_types.h b/usr/src/lib/libdwarf/common/dwarf_types.h
index ebd31c6c79..ebd31c6c79 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_types.h
+++ b/usr/src/lib/libdwarf/common/dwarf_types.h
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_util.c b/usr/src/lib/libdwarf/common/dwarf_util.c
index 01e0dd755d..01e0dd755d 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_util.c
+++ b/usr/src/lib/libdwarf/common/dwarf_util.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_util.h b/usr/src/lib/libdwarf/common/dwarf_util.h
index 4046bb2478..4046bb2478 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_util.h
+++ b/usr/src/lib/libdwarf/common/dwarf_util.h
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_vars.c b/usr/src/lib/libdwarf/common/dwarf_vars.c
index 24105289ba..24105289ba 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_vars.c
+++ b/usr/src/lib/libdwarf/common/dwarf_vars.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_vars.h b/usr/src/lib/libdwarf/common/dwarf_vars.h
index bd5f967e48..bd5f967e48 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_vars.h
+++ b/usr/src/lib/libdwarf/common/dwarf_vars.h
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_weaks.c b/usr/src/lib/libdwarf/common/dwarf_weaks.c
index 425916e62e..425916e62e 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_weaks.c
+++ b/usr/src/lib/libdwarf/common/dwarf_weaks.c
diff --git a/usr/src/tools/ctf/dwarf/common/dwarf_weaks.h b/usr/src/lib/libdwarf/common/dwarf_weaks.h
index d38f5f118a..d38f5f118a 100644
--- a/usr/src/tools/ctf/dwarf/common/dwarf_weaks.h
+++ b/usr/src/lib/libdwarf/common/dwarf_weaks.h
diff --git a/usr/src/tools/ctf/dwarf/common/libdwarf.h b/usr/src/lib/libdwarf/common/libdwarf.h
index 78627a96a6..78627a96a6 100644
--- a/usr/src/tools/ctf/dwarf/common/libdwarf.h
+++ b/usr/src/lib/libdwarf/common/libdwarf.h
diff --git a/usr/src/tools/ctf/dwarf/common/libdwarfdefs.h b/usr/src/lib/libdwarf/common/libdwarfdefs.h
index a564655b23..a564655b23 100644
--- a/usr/src/tools/ctf/dwarf/common/libdwarfdefs.h
+++ b/usr/src/lib/libdwarf/common/libdwarfdefs.h
diff --git a/usr/src/tools/ctf/dwarf/common/malloc_check.c b/usr/src/lib/libdwarf/common/malloc_check.c
index 1c6e7738e4..1c6e7738e4 100644
--- a/usr/src/tools/ctf/dwarf/common/malloc_check.c
+++ b/usr/src/lib/libdwarf/common/malloc_check.c
diff --git a/usr/src/tools/ctf/dwarf/common/malloc_check.h b/usr/src/lib/libdwarf/common/malloc_check.h
index ba1ad3da71..ba1ad3da71 100644
--- a/usr/src/tools/ctf/dwarf/common/malloc_check.h
+++ b/usr/src/lib/libdwarf/common/malloc_check.h
diff --git a/usr/src/tools/ctf/dwarf/common/mapfile-vers b/usr/src/lib/libdwarf/common/mapfile-vers
index c1a652a591..c1a652a591 100644
--- a/usr/src/tools/ctf/dwarf/common/mapfile-vers
+++ b/usr/src/lib/libdwarf/common/mapfile-vers
diff --git a/usr/src/tools/ctf/dwarf/common/pro_alloc.c b/usr/src/lib/libdwarf/common/pro_alloc.c
index 1ca7806239..1ca7806239 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_alloc.c
+++ b/usr/src/lib/libdwarf/common/pro_alloc.c
diff --git a/usr/src/tools/ctf/dwarf/common/pro_alloc.h b/usr/src/lib/libdwarf/common/pro_alloc.h
index b4da65325f..b4da65325f 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_alloc.h
+++ b/usr/src/lib/libdwarf/common/pro_alloc.h
diff --git a/usr/src/tools/ctf/dwarf/common/pro_arange.c b/usr/src/lib/libdwarf/common/pro_arange.c
index 4e5c37795c..4e5c37795c 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_arange.c
+++ b/usr/src/lib/libdwarf/common/pro_arange.c
diff --git a/usr/src/tools/ctf/dwarf/common/pro_arange.h b/usr/src/lib/libdwarf/common/pro_arange.h
index f0e7e84dff..f0e7e84dff 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_arange.h
+++ b/usr/src/lib/libdwarf/common/pro_arange.h
diff --git a/usr/src/tools/ctf/dwarf/common/pro_die.c b/usr/src/lib/libdwarf/common/pro_die.c
index 948b641146..948b641146 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_die.c
+++ b/usr/src/lib/libdwarf/common/pro_die.c
diff --git a/usr/src/tools/ctf/dwarf/common/pro_die.h b/usr/src/lib/libdwarf/common/pro_die.h
index 01c00e79bd..01c00e79bd 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_die.h
+++ b/usr/src/lib/libdwarf/common/pro_die.h
diff --git a/usr/src/tools/ctf/dwarf/common/pro_encode_nm.c b/usr/src/lib/libdwarf/common/pro_encode_nm.c
index d6215dc56b..d6215dc56b 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_encode_nm.c
+++ b/usr/src/lib/libdwarf/common/pro_encode_nm.c
diff --git a/usr/src/tools/ctf/dwarf/common/pro_encode_nm.h b/usr/src/lib/libdwarf/common/pro_encode_nm.h
index d08e4d5148..d08e4d5148 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_encode_nm.h
+++ b/usr/src/lib/libdwarf/common/pro_encode_nm.h
diff --git a/usr/src/tools/ctf/dwarf/common/pro_error.c b/usr/src/lib/libdwarf/common/pro_error.c
index d408a391e2..d408a391e2 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_error.c
+++ b/usr/src/lib/libdwarf/common/pro_error.c
diff --git a/usr/src/tools/ctf/dwarf/common/pro_error.h b/usr/src/lib/libdwarf/common/pro_error.h
index c37035301b..c37035301b 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_error.h
+++ b/usr/src/lib/libdwarf/common/pro_error.h
diff --git a/usr/src/tools/ctf/dwarf/common/pro_expr.c b/usr/src/lib/libdwarf/common/pro_expr.c
index 4c701748a6..4c701748a6 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_expr.c
+++ b/usr/src/lib/libdwarf/common/pro_expr.c
diff --git a/usr/src/tools/ctf/dwarf/common/pro_expr.h b/usr/src/lib/libdwarf/common/pro_expr.h
index 202f2d30d5..202f2d30d5 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_expr.h
+++ b/usr/src/lib/libdwarf/common/pro_expr.h
diff --git a/usr/src/tools/ctf/dwarf/common/pro_finish.c b/usr/src/lib/libdwarf/common/pro_finish.c
index bc43a5f0f4..bc43a5f0f4 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_finish.c
+++ b/usr/src/lib/libdwarf/common/pro_finish.c
diff --git a/usr/src/tools/ctf/dwarf/common/pro_forms.c b/usr/src/lib/libdwarf/common/pro_forms.c
index fec9a39c60..fec9a39c60 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_forms.c
+++ b/usr/src/lib/libdwarf/common/pro_forms.c
diff --git a/usr/src/tools/ctf/dwarf/common/pro_frame.c b/usr/src/lib/libdwarf/common/pro_frame.c
index bd1ef6a637..bd1ef6a637 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_frame.c
+++ b/usr/src/lib/libdwarf/common/pro_frame.c
diff --git a/usr/src/tools/ctf/dwarf/common/pro_frame.h b/usr/src/lib/libdwarf/common/pro_frame.h
index df60d369ed..df60d369ed 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_frame.h
+++ b/usr/src/lib/libdwarf/common/pro_frame.h
diff --git a/usr/src/tools/ctf/dwarf/common/pro_funcs.c b/usr/src/lib/libdwarf/common/pro_funcs.c
index 8ff05500bb..8ff05500bb 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_funcs.c
+++ b/usr/src/lib/libdwarf/common/pro_funcs.c
diff --git a/usr/src/tools/ctf/dwarf/common/pro_incl.h b/usr/src/lib/libdwarf/common/pro_incl.h
index 10bce470c2..10bce470c2 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_incl.h
+++ b/usr/src/lib/libdwarf/common/pro_incl.h
diff --git a/usr/src/tools/ctf/dwarf/common/pro_init.c b/usr/src/lib/libdwarf/common/pro_init.c
index d696113a67..d696113a67 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_init.c
+++ b/usr/src/lib/libdwarf/common/pro_init.c
diff --git a/usr/src/tools/ctf/dwarf/common/pro_line.c b/usr/src/lib/libdwarf/common/pro_line.c
index 69d3e339f0..69d3e339f0 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_line.c
+++ b/usr/src/lib/libdwarf/common/pro_line.c
diff --git a/usr/src/tools/ctf/dwarf/common/pro_line.h b/usr/src/lib/libdwarf/common/pro_line.h
index eed941239d..eed941239d 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_line.h
+++ b/usr/src/lib/libdwarf/common/pro_line.h
diff --git a/usr/src/tools/ctf/dwarf/common/pro_macinfo.c b/usr/src/lib/libdwarf/common/pro_macinfo.c
index cfa820aee6..cfa820aee6 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_macinfo.c
+++ b/usr/src/lib/libdwarf/common/pro_macinfo.c
diff --git a/usr/src/tools/ctf/dwarf/common/pro_macinfo.h b/usr/src/lib/libdwarf/common/pro_macinfo.h
index 852a0cec1f..852a0cec1f 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_macinfo.h
+++ b/usr/src/lib/libdwarf/common/pro_macinfo.h
diff --git a/usr/src/tools/ctf/dwarf/common/pro_opaque.h b/usr/src/lib/libdwarf/common/pro_opaque.h
index befc69faa6..befc69faa6 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_opaque.h
+++ b/usr/src/lib/libdwarf/common/pro_opaque.h
diff --git a/usr/src/tools/ctf/dwarf/common/pro_pubnames.c b/usr/src/lib/libdwarf/common/pro_pubnames.c
index e07fe35943..e07fe35943 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_pubnames.c
+++ b/usr/src/lib/libdwarf/common/pro_pubnames.c
diff --git a/usr/src/tools/ctf/dwarf/common/pro_reloc.c b/usr/src/lib/libdwarf/common/pro_reloc.c
index 66f16acbd0..66f16acbd0 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_reloc.c
+++ b/usr/src/lib/libdwarf/common/pro_reloc.c
diff --git a/usr/src/tools/ctf/dwarf/common/pro_reloc.h b/usr/src/lib/libdwarf/common/pro_reloc.h
index d2e6c67357..d2e6c67357 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_reloc.h
+++ b/usr/src/lib/libdwarf/common/pro_reloc.h
diff --git a/usr/src/tools/ctf/dwarf/common/pro_reloc_stream.c b/usr/src/lib/libdwarf/common/pro_reloc_stream.c
index 459779ceda..459779ceda 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_reloc_stream.c
+++ b/usr/src/lib/libdwarf/common/pro_reloc_stream.c
diff --git a/usr/src/tools/ctf/dwarf/common/pro_reloc_stream.h b/usr/src/lib/libdwarf/common/pro_reloc_stream.h
index 892ea5baf3..892ea5baf3 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_reloc_stream.h
+++ b/usr/src/lib/libdwarf/common/pro_reloc_stream.h
diff --git a/usr/src/tools/ctf/dwarf/common/pro_reloc_symbolic.c b/usr/src/lib/libdwarf/common/pro_reloc_symbolic.c
index 22080a00cd..22080a00cd 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_reloc_symbolic.c
+++ b/usr/src/lib/libdwarf/common/pro_reloc_symbolic.c
diff --git a/usr/src/tools/ctf/dwarf/common/pro_reloc_symbolic.h b/usr/src/lib/libdwarf/common/pro_reloc_symbolic.h
index 3d03a47863..3d03a47863 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_reloc_symbolic.h
+++ b/usr/src/lib/libdwarf/common/pro_reloc_symbolic.h
diff --git a/usr/src/tools/ctf/dwarf/common/pro_section.c b/usr/src/lib/libdwarf/common/pro_section.c
index 6503c2cf09..6503c2cf09 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_section.c
+++ b/usr/src/lib/libdwarf/common/pro_section.c
diff --git a/usr/src/tools/ctf/dwarf/common/pro_section.h b/usr/src/lib/libdwarf/common/pro_section.h
index b37ade44dc..b37ade44dc 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_section.h
+++ b/usr/src/lib/libdwarf/common/pro_section.h
diff --git a/usr/src/tools/ctf/dwarf/common/pro_types.c b/usr/src/lib/libdwarf/common/pro_types.c
index 1f3f93280c..1f3f93280c 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_types.c
+++ b/usr/src/lib/libdwarf/common/pro_types.c
diff --git a/usr/src/tools/ctf/dwarf/common/pro_types.h b/usr/src/lib/libdwarf/common/pro_types.h
index 817609215b..817609215b 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_types.h
+++ b/usr/src/lib/libdwarf/common/pro_types.h
diff --git a/usr/src/tools/ctf/dwarf/common/pro_util.h b/usr/src/lib/libdwarf/common/pro_util.h
index 56bde8bda6..56bde8bda6 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_util.h
+++ b/usr/src/lib/libdwarf/common/pro_util.h
diff --git a/usr/src/tools/ctf/dwarf/common/pro_vars.c b/usr/src/lib/libdwarf/common/pro_vars.c
index 3e75a9d9af..3e75a9d9af 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_vars.c
+++ b/usr/src/lib/libdwarf/common/pro_vars.c
diff --git a/usr/src/tools/ctf/dwarf/common/pro_weaks.c b/usr/src/lib/libdwarf/common/pro_weaks.c
index 8c74bf08ca..8c74bf08ca 100644
--- a/usr/src/tools/ctf/dwarf/common/pro_weaks.c
+++ b/usr/src/lib/libdwarf/common/pro_weaks.c
diff --git a/usr/src/lib/libdwarf/i386/Makefile b/usr/src/lib/libdwarf/i386/Makefile
new file mode 100644
index 0000000000..4398507523
--- /dev/null
+++ b/usr/src/lib/libdwarf/i386/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS)
diff --git a/usr/src/lib/libdwarf/sparc/Makefile b/usr/src/lib/libdwarf/sparc/Makefile
new file mode 100644
index 0000000000..4398507523
--- /dev/null
+++ b/usr/src/lib/libdwarf/sparc/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS)
diff --git a/usr/src/lib/libdwarf/sparcv9/Makefile b/usr/src/lib/libdwarf/sparcv9/Makefile
new file mode 100644
index 0000000000..4e7833710f
--- /dev/null
+++ b/usr/src/lib/libdwarf/sparcv9/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+include ../Makefile.com
+include ../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64)
diff --git a/usr/src/lib/libgrubmgmt/common/libgrub_fs.c b/usr/src/lib/libgrubmgmt/common/libgrub_fs.c
index aa5faa6470..92078bccee 100644
--- a/usr/src/lib/libgrubmgmt/common/libgrub_fs.c
+++ b/usr/src/lib/libgrubmgmt/common/libgrub_fs.c
@@ -21,6 +21,8 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright 2011 Joyent, Inc. All rights reserved.
*/
/*
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
diff --git a/usr/src/lib/libidspace/Makefile b/usr/src/lib/libidspace/Makefile
new file mode 100644
index 0000000000..44640eeddc
--- /dev/null
+++ b/usr/src/lib/libidspace/Makefile
@@ -0,0 +1,45 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.lib
+
+HDRS = libidspace.h
+HDRDIR = common
+
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+install := TARGET = install
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber install lint: $(SUBDIRS)
+
+install: install_h $(SUBDIRS)
+
+install_h: $(ROOTHDRS)
+
+check: $(CHECKHDRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../Makefile.targ
diff --git a/usr/src/lib/libidspace/Makefile.com b/usr/src/lib/libidspace/Makefile.com
new file mode 100644
index 0000000000..8cc60ffc4c
--- /dev/null
+++ b/usr/src/lib/libidspace/Makefile.com
@@ -0,0 +1,42 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+LIBRARY = libidspace.a
+VERS = .1
+OBJECTS = id_space.o \
+ libidspace.o
+COMDIR = $(SRC)/common/idspace
+
+include ../../Makefile.lib
+
+SRCDIR = ../common
+SRCS = ../../../common/idspace/id_space.c
+LIBS = $(DYNLIB) $(LINTLIB)
+
+LDLIBS += -lc -lumem
+
+$(LINTLIB) := SRCS = $(SRCDIR)/$(LINTSRC)
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include ../../Makefile.targ
+
+objs/%.o pics/%.o: $(COMDIR)/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
diff --git a/usr/src/lib/libidspace/amd64/Makefile b/usr/src/lib/libidspace/amd64/Makefile
new file mode 100644
index 0000000000..15d904c616
--- /dev/null
+++ b/usr/src/lib/libidspace/amd64/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+include ../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/libidspace/common/libidspace.c b/usr/src/lib/libidspace/common/libidspace.c
new file mode 100644
index 0000000000..7a9f8acd67
--- /dev/null
+++ b/usr/src/lib/libidspace/common/libidspace.c
@@ -0,0 +1,25 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc.
+ */
+
+/*
+ * Wrappers around the common id_space code, for userland.
+ */
+#include <sys/id_space.h>
+
+id_t
+id_alloc_specific(id_space_t *idp, id_t id)
+{
+ return (id_alloc_specific_nosleep(idp, id));
+}
diff --git a/usr/src/lib/libidspace/common/libidspace.h b/usr/src/lib/libidspace/common/libidspace.h
new file mode 100644
index 0000000000..bb8690f19c
--- /dev/null
+++ b/usr/src/lib/libidspace/common/libidspace.h
@@ -0,0 +1,42 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _LIBIDSPACE_H
+#define _LIBIDSPACE_H
+
+/*
+ * libidspace public header
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+
+typedef struct id_space id_space_t;
+
+extern id_space_t *id_space_create(const char *, id_t, id_t);
+extern void id_space_destroy(id_space_t *);
+extern void id_space_extend(id_space_t *, id_t, id_t);
+extern id_t id_alloc(id_space_t *);
+extern id_t id_alloc_specific(id_space_t *, id_t);
+extern void id_free(id_space_t *, id_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBIDSPACE_H */
diff --git a/usr/src/lib/libidspace/common/llib-lidspace b/usr/src/lib/libidspace/common/llib-lidspace
new file mode 100644
index 0000000000..39c628da47
--- /dev/null
+++ b/usr/src/lib/libidspace/common/llib-lidspace
@@ -0,0 +1,19 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/* LINTLIBRARY */
+/* PROTOLIB1 */
+
+#include <libidspace.h>
diff --git a/usr/src/lib/libidspace/common/mapfile-vers b/usr/src/lib/libidspace/common/mapfile-vers
new file mode 100644
index 0000000000..61ae855ee0
--- /dev/null
+++ b/usr/src/lib/libidspace/common/mapfile-vers
@@ -0,0 +1,47 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION ILLUMOS_1.0 { # first release of libidspace
+ global:
+ id_alloc;
+ id_alloc_specific;
+ id_free;
+ id_space_create;
+ id_space_destroy;
+ id_space_extend;
+};
+
+
+SYMBOL_VERSION ILLUMOSprivate {
+ local:
+ *;
+};
+
diff --git a/usr/src/lib/libidspace/i386/Makefile b/usr/src/lib/libidspace/i386/Makefile
new file mode 100644
index 0000000000..41e699e8f8
--- /dev/null
+++ b/usr/src/lib/libidspace/i386/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/libidspace/sparc/Makefile b/usr/src/lib/libidspace/sparc/Makefile
new file mode 100644
index 0000000000..41e699e8f8
--- /dev/null
+++ b/usr/src/lib/libidspace/sparc/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/libidspace/sparcv9/Makefile b/usr/src/lib/libidspace/sparcv9/Makefile
new file mode 100644
index 0000000000..15d904c616
--- /dev/null
+++ b/usr/src/lib/libidspace/sparcv9/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+include ../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/libipadm/common/libipadm.c b/usr/src/lib/libipadm/common/libipadm.c
index 527f735e17..58297eda6b 100644
--- a/usr/src/lib/libipadm/common/libipadm.c
+++ b/usr/src/lib/libipadm/common/libipadm.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
*/
@@ -285,11 +286,19 @@ ipadm_close(ipadm_handle_t iph)
boolean_t
ipadm_check_auth(void)
{
+ int uid;
struct passwd pwd;
char buf[NSS_BUFLEN_PASSWD];
+ /*
+ * Branded zones may have different kinds of auth, but root always
+ * allowed.
+ */
+ if ((uid = getuid()) == 0)
+ return (B_TRUE);
+
/* get the password entry for the given user ID */
- if (getpwuid_r(getuid(), &pwd, buf, sizeof (buf)) == NULL)
+ if (getpwuid_r(uid, &pwd, buf, sizeof (buf)) == NULL)
return (B_FALSE);
/* check for presence of given authorization */
@@ -897,9 +906,21 @@ ipadm_door_call(ipadm_handle_t iph, void *arg, size_t asize, void **rbufp,
reopen:
(void) pthread_mutex_lock(&iph->iph_lock);
- /* The door descriptor is opened if it isn't already */
+ /*
+ * The door descriptor is opened if it isn't already.
+ */
if (iph->iph_door_fd == -1) {
- if ((iph->iph_door_fd = open(IPMGMT_DOOR, O_RDONLY)) < 0) {
+ char door[MAXPATHLEN];
+ const char *zroot = zone_get_nroot();
+
+ /*
+ * If this is a branded zone, make sure we use the "/native"
+ * prefix for the door path:
+ */
+ (void) snprintf(door, sizeof (door), "%s%s", zroot != NULL ?
+ zroot : "", IPMGMT_DOOR);
+
+ if ((iph->iph_door_fd = open(door, O_RDONLY)) < 0) {
err = errno;
(void) pthread_mutex_unlock(&iph->iph_lock);
return (err);
diff --git a/usr/src/lib/libkmf/plugins/kmf_openssl/Makefile.com b/usr/src/lib/libkmf/plugins/kmf_openssl/Makefile.com
index 9557d11286..72432160c7 100644
--- a/usr/src/lib/libkmf/plugins/kmf_openssl/Makefile.com
+++ b/usr/src/lib/libkmf/plugins/kmf_openssl/Makefile.com
@@ -37,8 +37,8 @@ KMFINC= -I../../../include -I../../../ber_der/inc
BERLIB= -lkmf -lkmfberder
BERLIB64= $(BERLIB)
-OPENSSLLIBS= $(BERLIB) -lcrypto -lcryptoutil -lc
-OPENSSLLIBS64= $(BERLIB64) -lcrypto -lcryptoutil -lc
+OPENSSLLIBS= $(BERLIB) -lsunw_crypto -lcryptoutil -lc
+OPENSSLLIBS64= $(BERLIB64) -lsunw_crypto -lcryptoutil -lc
LINTSSLLIBS = $(BERLIB) -lcrypto -lcryptoutil -lc
LINTSSLLIBS64 = $(BERLIB64) -lcrypto -lcryptoutil -lc
diff --git a/usr/src/lib/libnisdb/db_mindex3.cc b/usr/src/lib/libnisdb/db_mindex3.cc
index d3db240b3c..e70076f409 100644
--- a/usr/src/lib/libnisdb/db_mindex3.cc
+++ b/usr/src/lib/libnisdb/db_mindex3.cc
@@ -284,7 +284,7 @@ entriesFromLDAPthread(void *voidarg) {
/* Lock to prevent removal */
(void) __nis_lock_db_table(arg->tableName, 1, 0,
- "entriesFromLDAPthread");
+ (char *)"entriesFromLDAPthread");
/*
* It's possible that the db_mindex for the table has changed,
@@ -316,7 +316,7 @@ entriesFromLDAPthread(void *voidarg) {
(void) entriesFromLDAPreal(arg);
(void) __nis_ulock_db_table(arg->tableName, 1, 0,
- "entriesFromLDAPthread");
+ (char *)"entriesFromLDAPthread");
freeQuery(arg->q);
if (arg->dirObj != 0)
diff --git a/usr/src/lib/libnisdb/db_table.cc b/usr/src/lib/libnisdb/db_table.cc
index f4eb620237..216c7575bb 100644
--- a/usr/src/lib/libnisdb/db_table.cc
+++ b/usr/src/lib/libnisdb/db_table.cc
@@ -601,7 +601,7 @@ db_table::setEntryExp(entryp where, entry_obj *obj, int initialLoad) {
if (o != 0) {
__nis_buffer_t b = {0, 0};
- bp2buf(myself, &b, "%s.%s",
+ bp2buf(myself, &b, (char *)"%s.%s",
o->zo_name, o->zo_domain);
t = getObjMapping(b.buf, 0, 1, 0, 0);
sfree(b.buf);
@@ -969,7 +969,7 @@ db_table::setEnumMode(long enumNum) {
if (stat != DB_SUCCESS) {
enumMode.flag = 0;
enumCount.flag = 0;
- logmsg(MSG_NOTIMECHECK, LOG_ERR,
+ logmsg(MSG_NOTIMECHECK, LOG_ERR, (char *)
"%s: No memory for enum check array; entry removal disabled",
myself);
}
diff --git a/usr/src/lib/libnisdb/nis_db.cc b/usr/src/lib/libnisdb/nis_db.cc
index 1007a396bf..0aa1556c33 100644
--- a/usr/src/lib/libnisdb/nis_db.cc
+++ b/usr/src/lib/libnisdb/nis_db.cc
@@ -529,7 +529,7 @@ dbFindObject(char *objName, db_status *statP) {
/* If not the root dir, find the directory where the entry lives */
sfree(table);
- name = entryName(myself, objName, &table);
+ name = entryName((char *)myself, objName, &table);
if (name == 0 || table == 0) {
sfree(name);
RETSTAT(0, DB_MEMORY_LIMIT);
@@ -739,7 +739,7 @@ dbDeleteObj(char *objName) {
nod->objType = o->zo_data.zo_type;
nis_destroy_object(o);
- nod->objName = sdup(myself, T, objName);
+ nod->objName = sdup((char *)myself, T, objName);
if (nod->objName == 0) {
sfree(nod);
return (DB_MEMORY_LIMIT);
@@ -791,7 +791,7 @@ dbTouchObj(char *objName) {
sfree(table);
table = 0;
- ent = entryName(myself, objName, &table);
+ ent = entryName((char *)myself, objName, &table);
if (ent == 0 || table == 0) {
sfree(ent);
return (DB_MEMORY_LIMIT);
@@ -991,7 +991,7 @@ dbRefreshObj(char *name, nis_object *o) {
int lstat;
/* Find parent */
- ent = entryName(myself, objName, &table);
+ ent = entryName((char *)myself, objName, &table);
if (ent == 0 || table == 0) {
sfree(b.buf);
sfree(objTable);
diff --git a/usr/src/lib/libnsl/common/mapfile-vers b/usr/src/lib/libnsl/common/mapfile-vers
index c8fa67b625..21d6ca76c6 100644
--- a/usr/src/lib/libnsl/common/mapfile-vers
+++ b/usr/src/lib/libnsl/common/mapfile-vers
@@ -21,6 +21,7 @@
#
# Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+# Copyright 2015 Joyent, Inc.
#
#
@@ -467,6 +468,7 @@ $endif
SYMBOL_VERSION SUNWprivate_1.5 {
global:
+ _nsl_brand_set_hooks;
clnt_create_service_timed;
inet_matchaddr;
} SUNWprivate_1.4;
diff --git a/usr/src/lib/libnsl/netselect/netselect.c b/usr/src/lib/libnsl/netselect/netselect.c
index 41dfa4909a..8c21fc3d69 100644
--- a/usr/src/lib/libnsl/netselect/netselect.c
+++ b/usr/src/lib/libnsl/netselect/netselect.c
@@ -22,6 +22,7 @@
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
@@ -32,8 +33,6 @@
* under license from the Regents of the University of California.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include "mt.h"
#include "../rpc/rpc_mt.h" /* for MT declarations only */
#include <rpc/types.h>
@@ -45,6 +44,7 @@
#include <malloc.h>
#include <libintl.h>
#include <syslog.h>
+#include <zone.h>
#include "netcspace.h"
#define FAILURE (unsigned)(-1)
@@ -70,6 +70,9 @@ static struct netconfig *netconfig_dup(struct netconfig *);
extern const char __nsl_dom[];
+static int (*brand_get_sz)(void) = NULL;
+static struct netconfig *(*brand_get_net_ent)(int) = NULL;
+
/*
* Static global variables used by the library procedures:
*
@@ -255,6 +258,14 @@ freenetconfigent(struct netconfig *netp)
netconfig_free(netp);
}
+void
+_nsl_brand_set_hooks(int (*set_sz_func)(void),
+ struct netconfig *(*get_ent_func)(int))
+{
+ brand_get_sz = set_sz_func;
+ brand_get_net_ent = get_ent_func;
+}
+
/*
* getnetlist() reads the netconfig file and creates a
* NULL-terminated list of entries.
@@ -265,52 +276,82 @@ freenetconfigent(struct netconfig *netp)
static struct netconfig **
getnetlist(void)
{
- char line[BUFSIZ]; /* holds each line of NETCONFIG */
- FILE *fp; /* file stream for NETCONFIG */
+ FILE *fp = NULL; /* file stream for NETCONFIG */
struct netconfig **listpp; /* the beginning of the netconfig list */
struct netconfig **tpp; /* used to traverse the netconfig list */
int count; /* the number of entries in file */
+ char nc_path[MAXPATHLEN];
+ const char *zroot = zone_get_nroot();
- if ((fp = fopen(NETCONFIG, "rF")) == NULL) {
- nc_error = NC_OPENFAIL;
- return (NULL);
- }
+ /*
+ * If we are running in a branded zone, ensure we use the "/native"
+ * prefix when opening the netconfig file:
+ */
+ (void) snprintf(nc_path, sizeof (nc_path), "%s%s", zroot != NULL ?
+ zroot : "", NETCONFIG);
- count = 0;
- while (fgets(line, BUFSIZ, fp)) {
- if (!(blank(line) || comment(line))) {
- ++count;
+ if (brand_get_sz != NULL) {
+ count = brand_get_sz();
+ } else {
+ char line[BUFSIZ]; /* holds each line of NETCONFIG */
+
+ if ((fp = fopen(nc_path, "rF")) == NULL) {
+ nc_error = NC_OPENFAIL;
+ return (NULL);
}
+
+ count = 0;
+ while (fgets(line, BUFSIZ, fp)) {
+ if (!(blank(line) || comment(line))) {
+ ++count;
+ }
+ }
+ rewind(fp);
}
- rewind(fp);
if (count == 0) {
nc_error = NC_NOTFOUND;
- (void) fclose(fp);
+ if (fp != NULL)
+ (void) fclose(fp);
return (NULL);
}
+
if ((listpp = malloc((count + 1) *
sizeof (struct netconfig *))) == NULL) {
nc_error = NC_NOMEM;
- (void) fclose(fp);
+ if (fp != NULL)
+ (void) fclose(fp);
return (NULL);
}
- /*
- * The following loop fills in the list (loops until
- * fgetnetconfig() returns a NULL) and counts the
- * number of entries placed in the list. Note that
- * when the loop is completed, the last entry in the
- * list will contain a NULL (signifying the end of
- * the list).
- */
- linenum = 0;
- for (tpp = listpp; *tpp = fgetnetconfig(fp, NULL); tpp++)
- ;
- (void) fclose(fp);
+ if (brand_get_net_ent != NULL) {
+ int i;
+
+ tpp = listpp;
+ for (i = 0; i < count; i++) {
+ *tpp = brand_get_net_ent(i);
+ tpp++;
+ }
+ *tpp = NULL;
+ nc_error = NC_NOMOREENTRIES;
+ } else {
+ /*
+ * The following loop fills in the list (loops until
+ * fgetnetconfig() returns a NULL) and counts the
+ * number of entries placed in the list. Note that
+ * when the loop is completed, the last entry in the
+ * list will contain a NULL (signifying the end of
+ * the list).
+ */
+ linenum = 0;
+ for (tpp = listpp; *tpp = fgetnetconfig(fp, NULL); tpp++)
+ ;
+ (void) fclose(fp);
+
+ if (nc_error != NC_NOMOREENTRIES) /* Something is screwed up */
+ netlist_free(&listpp);
+ }
- if (nc_error != NC_NOMOREENTRIES) /* Something is screwed up */
- netlist_free(&listpp);
return (listpp);
}
diff --git a/usr/src/lib/libnvpair/libnvpair.h b/usr/src/lib/libnvpair/libnvpair.h
index b05669e506..197ec37f46 100644
--- a/usr/src/lib/libnvpair/libnvpair.h
+++ b/usr/src/lib/libnvpair/libnvpair.h
@@ -49,6 +49,8 @@ extern int nvpair_value_match_regex(nvpair_t *, int, char *, regex_t *,
extern void nvlist_print(FILE *, nvlist_t *);
extern int nvlist_print_json(FILE *, nvlist_t *);
extern void dump_nvlist(nvlist_t *, int);
+extern int nvlist_dump_json(nvlist_t *, char **);
+extern void nvlist_dump_json_free(nvlist_t *, char *);
/*
* Private nvlist printing interface that allows the caller some control
diff --git a/usr/src/lib/libnvpair/mapfile-vers b/usr/src/lib/libnvpair/mapfile-vers
index 0403964e05..9b1f048f75 100644
--- a/usr/src/lib/libnvpair/mapfile-vers
+++ b/usr/src/lib/libnvpair/mapfile-vers
@@ -244,6 +244,8 @@ SYMBOL_VERSION SUNWprivate_1.1 {
dump_nvlist;
nvlist_add_hrtime;
nvlist_lookup_hrtime;
+ nvlist_dump_json;
+ nvlist_dump_json_free;
nvlist_print;
nvlist_print_json;
nvlist_prt;
diff --git a/usr/src/lib/libnvpair/nvpair_json.c b/usr/src/lib/libnvpair/nvpair_json.c
index 5a317f5f94..7ebd1be7a0 100644
--- a/usr/src/lib/libnvpair/nvpair_json.c
+++ b/usr/src/lib/libnvpair/nvpair_json.c
@@ -17,16 +17,71 @@
#include <strings.h>
#include <wchar.h>
#include <sys/debug.h>
+#include <stdarg.h>
+#include <assert.h>
#include "libnvpair.h"
-#define FPRINTF(fp, ...) \
+#define FPRINTF(bufp, blen, offp, ...) \
do { \
- if (fprintf(fp, __VA_ARGS__) < 0) \
+ if (nvlist_rasnprintf(bufp, blen, offp, \
+ __VA_ARGS__) < 0) \
return (-1); \
} while (0)
/*
+ * A realloc-aware snprintf/asprintf like function.
+ */
+/*PRINTFLIKE4*/
+static int
+nvlist_rasnprintf(char **bufp, size_t *blen, off_t *boff, char *input, ...)
+{
+ int ret;
+ va_list ap;
+ size_t size;
+ char *b;
+
+ if (*bufp == NULL) {
+ assert(*blen == 0);
+ assert(*boff == 0);
+ /* Pick a reasonable starting point, let's say 1k */
+ *blen = 1024;
+ *bufp = malloc(*blen);
+ if (*bufp == NULL)
+ return (-1);
+ }
+
+ size = *blen - *boff;
+ va_start(ap, input);
+ /* E_SEC_PRINTF_VAR_FMT */
+ ret = vsnprintf(*bufp + *boff, size, input, ap);
+ va_end(ap);
+ if (ret < 0)
+ return (-1);
+
+ if (ret >= size) {
+ size_t asize = *blen;
+ while (ret + *boff >= asize)
+ asize += 1024;
+ if ((b = realloc(*bufp, asize)) == NULL)
+ return (-1);
+ *bufp = b;
+ *blen = asize;
+ size = *blen - *boff;
+ va_start(ap, input);
+ /* E_SEC_PRINTF_VAR_FMT */
+ ret = vsnprintf(*bufp + *boff, size, input, ap);
+ va_end(ap);
+ if (ret < 0)
+ return (-1);
+ assert(ret < size);
+ }
+ *boff += ret;
+
+ return (0);
+}
+
+/*
* When formatting a string for JSON output we must escape certain characters,
* as described in RFC4627. This applies to both member names and
* DATA_TYPE_STRING values.
@@ -43,7 +98,8 @@
* representable Unicode characters included in their escaped numeric form.
*/
static int
-nvlist_print_json_string(FILE *fp, const char *input)
+nvlist_print_json_string(const char *input, char **bufp, size_t *blen,
+ off_t *offp)
{
mbstate_t mbr;
wchar_t c;
@@ -51,29 +107,29 @@ nvlist_print_json_string(FILE *fp, const char *input)
bzero(&mbr, sizeof (mbr));
- FPRINTF(fp, "\"");
+ FPRINTF(bufp, blen, offp, "\"");
while ((sz = mbrtowc(&c, input, MB_CUR_MAX, &mbr)) > 0) {
switch (c) {
case '"':
- FPRINTF(fp, "\\\"");
+ FPRINTF(bufp, blen, offp, "\\\"");
break;
case '\n':
- FPRINTF(fp, "\\n");
+ FPRINTF(bufp, blen, offp, "\\n");
break;
case '\r':
- FPRINTF(fp, "\\r");
+ FPRINTF(bufp, blen, offp, "\\r");
break;
case '\\':
- FPRINTF(fp, "\\\\");
+ FPRINTF(bufp, blen, offp, "\\\\");
break;
case '\f':
- FPRINTF(fp, "\\f");
+ FPRINTF(bufp, blen, offp, "\\f");
break;
case '\t':
- FPRINTF(fp, "\\t");
+ FPRINTF(bufp, blen, offp, "\\t");
break;
case '\b':
- FPRINTF(fp, "\\b");
+ FPRINTF(bufp, blen, offp, "\\b");
break;
default:
if ((c >= 0x00 && c <= 0x1f) ||
@@ -83,13 +139,15 @@ nvlist_print_json_string(FILE *fp, const char *input)
* characters in the Basic Multilingual Plane
* as JSON-escaped multibyte characters.
*/
- FPRINTF(fp, "\\u%04x", (int)(0xffff & c));
+ FPRINTF(bufp, blen, offp, "\\u%04x",
+ (int)(0xffff & c));
} else if (c >= 0x20 && c <= 0x7f) {
/*
* Render other 7-bit ASCII characters directly
* and drop other, unrepresentable characters.
*/
- FPRINTF(fp, "%c", (int)(0xff & c));
+ FPRINTF(bufp, blen, offp, "%c",
+ (int)(0xff & c));
}
break;
}
@@ -104,98 +162,103 @@ nvlist_print_json_string(FILE *fp, const char *input)
return (-1);
}
- FPRINTF(fp, "\"");
+ FPRINTF(bufp, blen, offp, "\"");
return (0);
}
-/*
- * Dump a JSON-formatted representation of an nvlist to the provided FILE *.
- * This routine does not output any new-lines or additional whitespace other
- * than that contained in strings, nor does it call fflush(3C).
- */
-int
-nvlist_print_json(FILE *fp, nvlist_t *nvl)
+static int
+nvlist_do_json(nvlist_t *nvl, char **bufp, size_t *blen, off_t *offp)
{
nvpair_t *curr;
boolean_t first = B_TRUE;
- FPRINTF(fp, "{");
+ FPRINTF(bufp, blen, offp, "{");
for (curr = nvlist_next_nvpair(nvl, NULL); curr;
curr = nvlist_next_nvpair(nvl, curr)) {
data_type_t type = nvpair_type(curr);
if (!first)
- FPRINTF(fp, ",");
+ FPRINTF(bufp, blen, offp, ",");
else
first = B_FALSE;
- if (nvlist_print_json_string(fp, nvpair_name(curr)) == -1)
+ if (nvlist_print_json_string(nvpair_name(curr), bufp, blen,
+ offp) == -1)
return (-1);
- FPRINTF(fp, ":");
+ FPRINTF(bufp, blen, offp, ":");
switch (type) {
case DATA_TYPE_STRING: {
char *string = fnvpair_value_string(curr);
- if (nvlist_print_json_string(fp, string) == -1)
+ if (nvlist_print_json_string(string, bufp, blen,
+ offp) == -1)
return (-1);
break;
}
case DATA_TYPE_BOOLEAN: {
- FPRINTF(fp, "true");
+ FPRINTF(bufp, blen, offp, "true");
break;
}
case DATA_TYPE_BOOLEAN_VALUE: {
- FPRINTF(fp, "%s", fnvpair_value_boolean_value(curr) ==
- B_TRUE ? "true" : "false");
+ FPRINTF(bufp, blen, offp, "%s",
+ fnvpair_value_boolean_value(curr) == B_TRUE ?
+ "true" : "false");
break;
}
case DATA_TYPE_BYTE: {
- FPRINTF(fp, "%hhu", fnvpair_value_byte(curr));
+ FPRINTF(bufp, blen, offp, "%hhu",
+ fnvpair_value_byte(curr));
break;
}
case DATA_TYPE_INT8: {
- FPRINTF(fp, "%hhd", fnvpair_value_int8(curr));
+ FPRINTF(bufp, blen, offp, "%hhd",
+ fnvpair_value_int8(curr));
break;
}
case DATA_TYPE_UINT8: {
- FPRINTF(fp, "%hhu", fnvpair_value_uint8_t(curr));
+ FPRINTF(bufp, blen, offp, "%hhu",
+ fnvpair_value_uint8_t(curr));
break;
}
case DATA_TYPE_INT16: {
- FPRINTF(fp, "%hd", fnvpair_value_int16(curr));
+ FPRINTF(bufp, blen, offp, "%hd",
+ fnvpair_value_int16(curr));
break;
}
case DATA_TYPE_UINT16: {
- FPRINTF(fp, "%hu", fnvpair_value_uint16(curr));
+ FPRINTF(bufp, blen, offp, "%hu",
+ fnvpair_value_uint16(curr));
break;
}
case DATA_TYPE_INT32: {
- FPRINTF(fp, "%d", fnvpair_value_int32(curr));
+ FPRINTF(bufp, blen, offp, "%d",
+ fnvpair_value_int32(curr));
break;
}
case DATA_TYPE_UINT32: {
- FPRINTF(fp, "%u", fnvpair_value_uint32(curr));
+ FPRINTF(bufp, blen, offp, "%u",
+ fnvpair_value_uint32(curr));
break;
}
case DATA_TYPE_INT64: {
- FPRINTF(fp, "%lld",
+ FPRINTF(bufp, blen, offp, "%lld",
(long long)fnvpair_value_int64(curr));
break;
}
case DATA_TYPE_UINT64: {
- FPRINTF(fp, "%llu",
+ FPRINTF(bufp, blen, offp, "%llu",
(unsigned long long)fnvpair_value_uint64(curr));
break;
}
@@ -203,20 +266,21 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl)
case DATA_TYPE_HRTIME: {
hrtime_t val;
VERIFY0(nvpair_value_hrtime(curr, &val));
- FPRINTF(fp, "%llu", (unsigned long long)val);
+ FPRINTF(bufp, blen, offp, "%llu",
+ (unsigned long long)val);
break;
}
case DATA_TYPE_DOUBLE: {
double val;
VERIFY0(nvpair_value_double(curr, &val));
- FPRINTF(fp, "%f", val);
+ FPRINTF(bufp, blen, offp, "%f", val);
break;
}
case DATA_TYPE_NVLIST: {
- if (nvlist_print_json(fp,
- fnvpair_value_nvlist(curr)) == -1)
+ if (nvlist_do_json(fnvpair_value_nvlist(curr), bufp,
+ blen, offp) == -1)
return (-1);
break;
}
@@ -225,14 +289,15 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl)
char **val;
uint_t valsz, i;
VERIFY0(nvpair_value_string_array(curr, &val, &valsz));
- FPRINTF(fp, "[");
+ FPRINTF(bufp, blen, offp, "[");
for (i = 0; i < valsz; i++) {
if (i > 0)
- FPRINTF(fp, ",");
- if (nvlist_print_json_string(fp, val[i]) == -1)
+ FPRINTF(bufp, blen, offp, ",");
+ if (nvlist_print_json_string(val[i], bufp,
+ blen, offp) == -1)
return (-1);
}
- FPRINTF(fp, "]");
+ FPRINTF(bufp, blen, offp, "]");
break;
}
@@ -240,14 +305,15 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl)
nvlist_t **val;
uint_t valsz, i;
VERIFY0(nvpair_value_nvlist_array(curr, &val, &valsz));
- FPRINTF(fp, "[");
+ FPRINTF(bufp, blen, offp, "[");
for (i = 0; i < valsz; i++) {
if (i > 0)
- FPRINTF(fp, ",");
- if (nvlist_print_json(fp, val[i]) == -1)
+ FPRINTF(bufp, blen, offp, ",");
+ if (nvlist_do_json(val[i], bufp, blen,
+ offp) == -1)
return (-1);
}
- FPRINTF(fp, "]");
+ FPRINTF(bufp, blen, offp, "]");
break;
}
@@ -255,14 +321,14 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl)
boolean_t *val;
uint_t valsz, i;
VERIFY0(nvpair_value_boolean_array(curr, &val, &valsz));
- FPRINTF(fp, "[");
+ FPRINTF(bufp, blen, offp, "[");
for (i = 0; i < valsz; i++) {
if (i > 0)
- FPRINTF(fp, ",");
- FPRINTF(fp, val[i] == B_TRUE ?
+ FPRINTF(bufp, blen, offp, ",");
+ FPRINTF(bufp, blen, offp, val[i] == B_TRUE ?
"true" : "false");
}
- FPRINTF(fp, "]");
+ FPRINTF(bufp, blen, offp, "]");
break;
}
@@ -270,13 +336,13 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl)
uchar_t *val;
uint_t valsz, i;
VERIFY0(nvpair_value_byte_array(curr, &val, &valsz));
- FPRINTF(fp, "[");
+ FPRINTF(bufp, blen, offp, "[");
for (i = 0; i < valsz; i++) {
if (i > 0)
- FPRINTF(fp, ",");
- FPRINTF(fp, "%hhu", val[i]);
+ FPRINTF(bufp, blen, offp, ",");
+ FPRINTF(bufp, blen, offp, "%hhu", val[i]);
}
- FPRINTF(fp, "]");
+ FPRINTF(bufp, blen, offp, "]");
break;
}
@@ -284,13 +350,13 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl)
uint8_t *val;
uint_t valsz, i;
VERIFY0(nvpair_value_uint8_array(curr, &val, &valsz));
- FPRINTF(fp, "[");
+ FPRINTF(bufp, blen, offp, "[");
for (i = 0; i < valsz; i++) {
if (i > 0)
- FPRINTF(fp, ",");
- FPRINTF(fp, "%hhu", val[i]);
+ FPRINTF(bufp, blen, offp, ",");
+ FPRINTF(bufp, blen, offp, "%hhu", val[i]);
}
- FPRINTF(fp, "]");
+ FPRINTF(bufp, blen, offp, "]");
break;
}
@@ -298,13 +364,13 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl)
int8_t *val;
uint_t valsz, i;
VERIFY0(nvpair_value_int8_array(curr, &val, &valsz));
- FPRINTF(fp, "[");
+ FPRINTF(bufp, blen, offp, "[");
for (i = 0; i < valsz; i++) {
if (i > 0)
- FPRINTF(fp, ",");
- FPRINTF(fp, "%hd", val[i]);
+ FPRINTF(bufp, blen, offp, ",");
+ FPRINTF(bufp, blen, offp, "%hd", val[i]);
}
- FPRINTF(fp, "]");
+ FPRINTF(bufp, blen, offp, "]");
break;
}
@@ -312,13 +378,13 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl)
uint16_t *val;
uint_t valsz, i;
VERIFY0(nvpair_value_uint16_array(curr, &val, &valsz));
- FPRINTF(fp, "[");
+ FPRINTF(bufp, blen, offp, "[");
for (i = 0; i < valsz; i++) {
if (i > 0)
- FPRINTF(fp, ",");
- FPRINTF(fp, "%hu", val[i]);
+ FPRINTF(bufp, blen, offp, ",");
+ FPRINTF(bufp, blen, offp, "%hu", val[i]);
}
- FPRINTF(fp, "]");
+ FPRINTF(bufp, blen, offp, "]");
break;
}
@@ -326,13 +392,13 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl)
int16_t *val;
uint_t valsz, i;
VERIFY0(nvpair_value_int16_array(curr, &val, &valsz));
- FPRINTF(fp, "[");
+ FPRINTF(bufp, blen, offp, "[");
for (i = 0; i < valsz; i++) {
if (i > 0)
- FPRINTF(fp, ",");
- FPRINTF(fp, "%hd", val[i]);
+ FPRINTF(bufp, blen, offp, ",");
+ FPRINTF(bufp, blen, offp, "%hd", val[i]);
}
- FPRINTF(fp, "]");
+ FPRINTF(bufp, blen, offp, "]");
break;
}
@@ -340,13 +406,13 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl)
uint32_t *val;
uint_t valsz, i;
VERIFY0(nvpair_value_uint32_array(curr, &val, &valsz));
- FPRINTF(fp, "[");
+ FPRINTF(bufp, blen, offp, "[");
for (i = 0; i < valsz; i++) {
if (i > 0)
- FPRINTF(fp, ",");
- FPRINTF(fp, "%u", val[i]);
+ FPRINTF(bufp, blen, offp, ",");
+ FPRINTF(bufp, blen, offp, "%u", val[i]);
}
- FPRINTF(fp, "]");
+ FPRINTF(bufp, blen, offp, "]");
break;
}
@@ -354,13 +420,13 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl)
int32_t *val;
uint_t valsz, i;
VERIFY0(nvpair_value_int32_array(curr, &val, &valsz));
- FPRINTF(fp, "[");
+ FPRINTF(bufp, blen, offp, "[");
for (i = 0; i < valsz; i++) {
if (i > 0)
- FPRINTF(fp, ",");
- FPRINTF(fp, "%d", val[i]);
+ FPRINTF(bufp, blen, offp, ",");
+ FPRINTF(bufp, blen, offp, "%d", val[i]);
}
- FPRINTF(fp, "]");
+ FPRINTF(bufp, blen, offp, "]");
break;
}
@@ -368,14 +434,14 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl)
uint64_t *val;
uint_t valsz, i;
VERIFY0(nvpair_value_uint64_array(curr, &val, &valsz));
- FPRINTF(fp, "[");
+ FPRINTF(bufp, blen, offp, "[");
for (i = 0; i < valsz; i++) {
if (i > 0)
- FPRINTF(fp, ",");
- FPRINTF(fp, "%llu",
+ FPRINTF(bufp, blen, offp, ",");
+ FPRINTF(bufp, blen, offp, "%llu",
(unsigned long long)val[i]);
}
- FPRINTF(fp, "]");
+ FPRINTF(bufp, blen, offp, "]");
break;
}
@@ -383,13 +449,14 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl)
int64_t *val;
uint_t valsz, i;
VERIFY0(nvpair_value_int64_array(curr, &val, &valsz));
- FPRINTF(fp, "[");
+ FPRINTF(bufp, blen, offp, "[");
for (i = 0; i < valsz; i++) {
if (i > 0)
- FPRINTF(fp, ",");
- FPRINTF(fp, "%lld", (long long)val[i]);
+ FPRINTF(bufp, blen, offp, ",");
+ FPRINTF(bufp, blen, offp, "%lld",
+ (long long)val[i]);
}
- FPRINTF(fp, "]");
+ FPRINTF(bufp, blen, offp, "]");
break;
}
@@ -398,6 +465,41 @@ nvlist_print_json(FILE *fp, nvlist_t *nvl)
}
}
- FPRINTF(fp, "}");
+ FPRINTF(bufp, blen, offp, "}");
return (0);
}
+
+int
+nvlist_dump_json(nvlist_t *nvl, char **bufp)
+{
+ off_t off = 0;
+ size_t l = 0;
+
+ *bufp = NULL;
+ return (nvlist_do_json(nvl, bufp, &l, &off));
+}
+
+/* ARGSUSED */
+void
+nvlist_dump_json_free(nvlist_t *nvl, char *buf)
+{
+ free(buf);
+}
+
+/*
+ * Dump a JSON-formatted representation of an nvlist to the provided FILE *.
+ * This routine does not output any new-lines or additional whitespace other
+ * than that contained in strings, nor does it call fflush(3C).
+ */
+int
+nvlist_print_json(FILE *fp, nvlist_t *nvl)
+{
+ int ret;
+ char *buf;
+
+ if ((ret = nvlist_dump_json(nvl, &buf)) < 0)
+ return (ret);
+ ret = fprintf(fp, "%s", buf);
+ nvlist_dump_json_free(nvl, buf);
+ return (ret);
+}
diff --git a/usr/src/lib/libpkg/Makefile.com b/usr/src/lib/libpkg/Makefile.com
index 3e3b294c7e..c2a80e0a34 100644
--- a/usr/src/lib/libpkg/Makefile.com
+++ b/usr/src/lib/libpkg/Makefile.com
@@ -66,7 +66,7 @@ $(LINTLIB):= SRCS = $(SRCDIR)/$(LINTSRC)
LIBS = $(DYNLIB) $(LINTLIB)
-LDLIBS += -lc -lssl -lwanboot -lcrypto -lscf -ladm
+LDLIBS += -lc -lsunw_ssl -lwanboot -lsunw_crypto -lscf -ladm
CFLAGS += $(CCVERBOSE)
CERRWARN += -_gcc=-Wno-unused-label
diff --git a/usr/src/lib/libproc/common/Pcontrol.c b/usr/src/lib/libproc/common/Pcontrol.c
index 3b6ef35709..f9b1d9755b 100644
--- a/usr/src/lib/libproc/common/Pcontrol.c
+++ b/usr/src/lib/libproc/common/Pcontrol.c
@@ -338,10 +338,16 @@ static const ps_ops_t P_live_ops = {
void
_libproc_init(void)
{
+ const char *root;
+
_libproc_debug = getenv("LIBPROC_DEBUG") != NULL;
_libproc_no_qsort = getenv("LIBPROC_NO_QSORT") != NULL;
_libproc_incore_elf = getenv("LIBPROC_INCORE_ELF") != NULL;
+ if ((root = zone_get_nroot()) != NULL)
+ (void) snprintf(procfs_path, sizeof (procfs_path), "%s/proc",
+ root);
+
(void) sigfillset(&blockable_sigs);
(void) sigdelset(&blockable_sigs, SIGKILL);
(void) sigdelset(&blockable_sigs, SIGSTOP);
@@ -1760,6 +1766,9 @@ prldump(const char *caller, lwpstatus_t *lsp)
case PR_SUSPENDED:
dprintf("%s: SUSPENDED\n", caller);
break;
+ case PR_BRAND:
+ dprintf("%s: BRANDPRIVATE (%d)\n", caller, lsp->pr_what);
+ break;
default:
dprintf("%s: Unknown\n", caller);
break;
@@ -1939,6 +1948,7 @@ Pstopstatus(struct ps_prochandle *P,
case PR_FAULTED:
case PR_JOBCONTROL:
case PR_SUSPENDED:
+ case PR_BRAND:
break;
default:
errno = EPROTO;
@@ -3513,6 +3523,7 @@ Lstopstatus(struct ps_lwphandle *L,
case PR_FAULTED:
case PR_JOBCONTROL:
case PR_SUSPENDED:
+ case PR_BRAND:
break;
default:
errno = EPROTO;
diff --git a/usr/src/lib/libproc/common/Pcontrol.h b/usr/src/lib/libproc/common/Pcontrol.h
index 6697d5736b..5840409651 100644
--- a/usr/src/lib/libproc/common/Pcontrol.h
+++ b/usr/src/lib/libproc/common/Pcontrol.h
@@ -24,7 +24,7 @@
*/
/*
* Copyright 2012 DEY Storage Systems, Inc. All rights reserved.
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
*/
@@ -92,10 +92,11 @@ typedef struct sym_tbl { /* symbol table */
typedef struct file_info { /* symbol information for a mapped file */
plist_t file_list; /* linked list */
- char file_pname[PRMAPSZ]; /* name from prmap_t */
+ char file_pname[PATH_MAX]; /* name from prmap_t */
struct map_info *file_map; /* primary (text) mapping */
int file_ref; /* references from map_info_t structures */
int file_fd; /* file descriptor for the mapped file */
+ int file_dbgfile; /* file descriptor for the debug file */
int file_init; /* 0: initialization yet to be performed */
GElf_Half file_etype; /* ELF e_type from ehdr */
GElf_Half file_class; /* ELF e_ident[EI_CLASS] from ehdr */
@@ -105,6 +106,7 @@ typedef struct file_info { /* symbol information for a mapped file */
char *file_rname; /* resolved on-disk object pathname */
char *file_rbase; /* pointer to basename of file_rname */
Elf *file_elf; /* ELF handle so we can close */
+ Elf *file_dbgelf; /* Debug ELF handle so we can close */
void *file_elfmem; /* data for faked-up ELF handle */
sym_tbl_t file_symtab; /* symbol table */
sym_tbl_t file_dynsym; /* dynamic symbol table */
@@ -121,6 +123,7 @@ typedef struct file_info { /* symbol information for a mapped file */
size_t file_shstrsz; /* section header string table size */
uintptr_t *file_saddrs; /* section header addresses */
uint_t file_nsaddrs; /* number of section header addresses */
+ boolean_t file_cvt; /* Have we tried to convert this? */
} file_info_t;
typedef struct map_info { /* description of an address space mapping */
diff --git a/usr/src/lib/libproc/common/Pcore.c b/usr/src/lib/libproc/common/Pcore.c
index 4ebca85e1f..dfa4cc6600 100644
--- a/usr/src/lib/libproc/common/Pcore.c
+++ b/usr/src/lib/libproc/common/Pcore.c
@@ -2699,6 +2699,7 @@ Pfgrab_core(int core_fd, const char *aout_path, int *perr)
fp->file_ref = 1;
fp->file_fd = -1;
+ fp->file_dbgfile = -1;
fp->file_lo = malloc(sizeof (rd_loadobj_t));
fp->file_lname = strdup(execname);
diff --git a/usr/src/lib/libproc/common/Pidle.c b/usr/src/lib/libproc/common/Pidle.c
index 3191f4fa7e..c69bcaf860 100644
--- a/usr/src/lib/libproc/common/Pidle.c
+++ b/usr/src/lib/libproc/common/Pidle.c
@@ -226,6 +226,7 @@ Pgrab_file(const char *fname, int *perr)
}
fp->file_fd = fd;
+ fp->file_dbgfile = -1;
fp->file_lo->rl_lmident = LM_ID_BASE;
if ((fp->file_lname = strdup(fp->file_pname)) == NULL) {
*perr = G_STRANGE;
diff --git a/usr/src/lib/libproc/common/Psymtab.c b/usr/src/lib/libproc/common/Psymtab.c
index 62354f9a7b..aed64d7799 100644
--- a/usr/src/lib/libproc/common/Psymtab.c
+++ b/usr/src/lib/libproc/common/Psymtab.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
* Copyright (c) 2013 by Delphix. All rights reserved.
*/
@@ -43,6 +43,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/sysmacros.h>
+#include <sys/crc32.h>
#include "libproc.h"
#include "Pcontrol.h"
@@ -61,6 +62,7 @@ static int read_ehdr32(struct ps_prochandle *, Elf32_Ehdr *, uint_t *,
static int read_ehdr64(struct ps_prochandle *, Elf64_Ehdr *, uint_t *,
uintptr_t);
#endif
+static uint32_t psym_crc32[] = { CRC32_TABLE };
#define DATA_TYPES \
((1 << STT_OBJECT) | (1 << STT_FUNC) | \
@@ -69,6 +71,22 @@ static int read_ehdr64(struct ps_prochandle *, Elf64_Ehdr *, uint_t *,
#define MA_RWX (MA_READ | MA_WRITE | MA_EXEC)
+/*
+ * Minimum and maximum length of a build-id that we'll accept. Generally it's a
+ * 20 byte SHA1 and it's expected that the first byte (which is two ascii
+ * characters) indicates a directory and the remaining bytes become the file
+ * name. Therefore, our minimum length is at least 2 bytes (one for the
+ * directory and one for the name) and the max is a bit over the minimum -- 64,
+ * just in case folks do something odd. The string length is three times the max
+ * length. This accounts for the fact that each byte is two characters, a null
+ * terminator, and the directory '/' character.
+ */
+#define MINBUILDID 2
+#define MAXBUILDID 64
+#define BUILDID_STRLEN (3*MAXBUILDID)
+#define BUILDID_NAME ".note.gnu.build-id"
+#define DBGLINK_NAME ".gnu_debuglink"
+
typedef enum {
PRO_NATURAL,
PRO_BYADDR,
@@ -184,6 +202,7 @@ file_info_new(struct ps_prochandle *P, map_info_t *mptr)
mptr->map_file = fptr;
fptr->file_ref = 1;
fptr->file_fd = -1;
+ fptr->file_dbgfile = -1;
P->num_files++;
/*
@@ -274,6 +293,10 @@ file_info_free(struct ps_prochandle *P, file_info_t *fptr)
free(fptr->file_elfmem);
if (fptr->file_fd >= 0)
(void) close(fptr->file_fd);
+ if (fptr->file_dbgelf)
+ (void) elf_end(fptr->file_dbgelf);
+ if (fptr->file_dbgfile >= 0)
+ (void) close(fptr->file_dbgfile);
if (fptr->file_ctfp) {
ctf_close(fptr->file_ctfp);
free(fptr->file_ctf_buf);
@@ -721,6 +744,58 @@ Pname_to_loadobj(struct ps_prochandle *P, const char *name)
return (Plmid_to_loadobj(P, PR_LMID_EVERY, name));
}
+/*
+ * We've been given a file_info_t which doesn't have any CTF. However, it may
+ * have information that's in a format that we could convert if on the fly.
+ * We'll first try to convert the alternate debug file, if present, and then
+ * move onto the default file. The reason we prefer the alternate debug file is
+ * that if both exist, then it likely has any usable debugging information.
+ */
+static ctf_file_t *
+Pconvert_file_ctf(file_info_t *fptr)
+{
+ int err;
+ ctf_file_t *fp;
+ char errmsg[1024];
+
+ /*
+ * Provide an opt in.
+ */
+ if (getenv("LIBPROC_CTFCONVERT") == NULL)
+ return (NULL);
+
+ /*
+ * If we've already attempted to call this, then that's it. No reason to
+ * pretend we'll be more successful again another time.
+ */
+ if (fptr->file_cvt == B_TRUE)
+ return (NULL);
+ fptr->file_cvt = B_TRUE;
+
+ fp = NULL;
+ if (fptr->file_dbgelf != NULL) {
+ fp = ctf_elfconvert(fptr->file_fd, fptr->file_dbgelf, NULL, 1,
+ 0, &err, errmsg, sizeof (errmsg));
+ if (fp == NULL) {
+ dprintf("failed to convert %s: %s\n", fptr->file_pname,
+ err == ECTF_CONVBKERR ? errmsg : ctf_errmsg(err));
+ }
+ }
+ if (fp == NULL) {
+ fp = ctf_elfconvert(fptr->file_fd, fptr->file_elf, NULL, 1,
+ 0, &err, errmsg, sizeof (errmsg));
+ if (fp == NULL) {
+ dprintf("failed to convert %s: %s\n", fptr->file_pname,
+ err == ECTF_CONVBKERR ? errmsg : ctf_errmsg(err));
+ }
+ }
+ if (fp != NULL) {
+ fptr->file_ctfp = fp;
+ }
+
+ return (NULL);
+}
+
ctf_file_t *
Pbuild_file_ctf(struct ps_prochandle *P, file_info_t *fptr)
{
@@ -733,8 +808,9 @@ Pbuild_file_ctf(struct ps_prochandle *P, file_info_t *fptr)
Pbuild_file_symtab(P, fptr);
- if (fptr->file_ctf_size == 0)
- return (NULL);
+ if (fptr->file_ctf_size == 0) {
+ return (Pconvert_file_ctf(fptr));
+ }
symp = fptr->file_ctf_dyn ? &fptr->file_dynsym : &fptr->file_symtab;
if (symp->sym_data_pri == NULL)
@@ -819,14 +895,28 @@ ctf_file_t *
Plmid_to_ctf(struct ps_prochandle *P, Lmid_t lmid, const char *name)
{
map_info_t *mptr;
- file_info_t *fptr;
+ file_info_t *fptr = NULL;
if (name == PR_OBJ_EVERY)
return (NULL);
- if ((mptr = object_name_to_map(P, lmid, name)) == NULL ||
- (fptr = mptr->map_file) == NULL)
- return (NULL);
+ /*
+ * While most idle files are all ELF objects, not all of them have
+ * mapping information available. There's nothing which would make
+ * sense to fake up for ET_REL. Instead, if we're being asked for their
+ * executable object and we know that the information is valid and they
+ * only have a single file, we jump straight to that file pointer.
+ */
+ if (P->state == PS_IDLE && name == PR_OBJ_EXEC && P->info_valid == 1 &&
+ P->num_files == 1 && P->mappings == NULL) {
+ fptr = list_next(&P->file_head);
+ }
+
+ if (fptr == NULL) {
+ if ((mptr = object_name_to_map(P, lmid, name)) == NULL ||
+ (fptr = mptr->map_file) == NULL)
+ return (NULL);
+ }
return (Pbuild_file_ctf(P, fptr));
}
@@ -1544,7 +1634,7 @@ optimize_symtab(sym_tbl_t *symtab)
static Elf *
build_fake_elf(struct ps_prochandle *P, file_info_t *fptr, GElf_Ehdr *ehdr,
- size_t *nshdrs, Elf_Data **shdata)
+ size_t *nshdrs, Elf_Data **shdata)
{
size_t shstrndx;
Elf_Scn *scn;
@@ -1567,6 +1657,225 @@ build_fake_elf(struct ps_prochandle *P, file_info_t *fptr, GElf_Ehdr *ehdr,
}
/*
+ * Try and find the file described by path in the file system and validate that
+ * it matches our CRC before we try and process it for symbol information. If we
+ * instead have an ELF data section, then that means we're checking a build-id
+ * section instead. In that case we just need to find and bcmp the corresponding
+ * section.
+ *
+ * Before we validate if it's a valid CRC or data section, we check to ensure
+ * that it's a normal file and not anything else.
+ */
+static boolean_t
+build_alt_debug(file_info_t *fptr, const char *path, uint32_t crc,
+ Elf_Data *data)
+{
+ int fd;
+ struct stat st;
+ Elf *elf;
+ Elf_Scn *scn;
+ GElf_Shdr symshdr, strshdr;
+ Elf_Data *symdata, *strdata;
+ boolean_t valid;
+ uint32_t c = -1U;
+
+ if ((fd = open(path, O_RDONLY)) < 0)
+ return (B_FALSE);
+
+ if (fstat(fd, &st) != 0) {
+ (void) close(fd);
+ return (B_FALSE);
+ }
+
+ if (S_ISREG(st.st_mode) == 0) {
+ (void) close(fd);
+ return (B_FALSE);
+ }
+
+ /*
+ * Only check the CRC if we've come here through a GNU debug link
+ * section as opposed to the build id. This is indicated by having the
+ * value of data be NULL.
+ */
+ if (data == NULL) {
+ for (;;) {
+ char buf[4096];
+ ssize_t ret = read(fd, buf, sizeof (buf));
+ if (ret == -1) {
+ if (ret == EINTR)
+ continue;
+ (void) close(fd);
+ return (B_FALSE);
+ }
+ if (ret == 0) {
+ c = ~c;
+ if (c != crc) {
+ dprintf("crc mismatch, found: 0x%x "
+ "expected 0x%x\n", c, crc);
+ (void) close(fd);
+ return (B_FALSE);
+ }
+ break;
+ }
+ CRC32(c, buf, ret, c, psym_crc32);
+ }
+ }
+
+ elf = elf_begin(fd, ELF_C_READ, NULL);
+ if (elf == NULL) {
+ (void) close(fd);
+ return (B_FALSE);
+ }
+
+ if (elf_kind(elf) != ELF_K_ELF) {
+ goto fail;
+ }
+
+ /*
+ * If we have a data section, that indicates we have a build-id which
+ * means we need to find the corresponding build-id section and compare
+ * it.
+ */
+ scn = NULL;
+ valid = B_FALSE;
+ for (scn = elf_nextscn(elf, scn); data != NULL && scn != NULL;
+ scn = elf_nextscn(elf, scn)) {
+ GElf_Shdr hdr;
+ Elf_Data *ntdata;
+
+ if (gelf_getshdr(scn, &hdr) == NULL)
+ goto fail;
+
+ if (hdr.sh_type != SHT_NOTE)
+ continue;
+
+ if ((ntdata = elf_getdata(scn, NULL)) == NULL)
+ goto fail;
+
+ /*
+ * First verify the data section sizes are equal, then the
+ * section name. If that's all true, then we can just do a bcmp.
+ */
+ if (data->d_size != ntdata->d_size)
+ continue;
+
+ dprintf("found corresponding section in alternate file\n");
+ if (bcmp(ntdata->d_buf, data->d_buf, data->d_size) != 0)
+ goto fail;
+
+ valid = B_TRUE;
+ break;
+ }
+ if (data != NULL && valid == B_FALSE) {
+ dprintf("failed to find a matching %s section in %s\n",
+ BUILDID_NAME, path);
+ goto fail;
+ }
+
+
+ /*
+ * Do two passes, first see if we have a symbol header, then see if we
+ * can find the corresponding linked string table.
+ */
+ scn = NULL;
+ for (scn = elf_nextscn(elf, scn); scn != NULL;
+ scn = elf_nextscn(elf, scn)) {
+
+ if (gelf_getshdr(scn, &symshdr) == NULL)
+ goto fail;
+
+ if (symshdr.sh_type != SHT_SYMTAB)
+ continue;
+
+ if ((symdata = elf_getdata(scn, NULL)) == NULL)
+ goto fail;
+
+ break;
+ }
+ if (scn == NULL)
+ goto fail;
+
+ if ((scn = elf_getscn(elf, symshdr.sh_link)) == NULL)
+ goto fail;
+
+ if (gelf_getshdr(scn, &strshdr) == NULL)
+ goto fail;
+
+ if ((strdata = elf_getdata(scn, NULL)) == NULL)
+ goto fail;
+
+ fptr->file_symtab.sym_data_pri = symdata;
+ fptr->file_symtab.sym_symn += symshdr.sh_size / symshdr.sh_entsize;
+ fptr->file_symtab.sym_strs = strdata->d_buf;
+ fptr->file_symtab.sym_strsz = strdata->d_size;
+ fptr->file_symtab.sym_hdr_pri = symshdr;
+ fptr->file_symtab.sym_strhdr = strshdr;
+
+ dprintf("successfully loaded additional debug symbols for %s from %s\n",
+ fptr->file_rname, path);
+
+ fptr->file_dbgfile = fd;
+ fptr->file_dbgelf = elf;
+ return (B_TRUE);
+fail:
+ (void) elf_end(elf);
+ (void) close(fd);
+ return (B_FALSE);
+}
+
+/*
+ * We're here because the object in question has no symbol information, that's a
+ * bit unfortunate. However, we've found that there's a .gnu_debuglink sitting
+ * around. By convention that means that given the current location of the
+ * object on disk, and the debug name that we found in the binary we need to
+ * search the following locations for a matching file.
+ *
+ * <dirname>/.debug/<debug-name>
+ * /usr/lib/debug/<dirname>/<debug-name>
+ *
+ * In the future, we should consider supporting looking in the prefix's
+ * lib/debug directory for a matching object or supporting an arbitrary user
+ * defined set of places to look.
+ */
+static void
+find_alt_debuglink(file_info_t *fptr, const char *name, uint32_t crc)
+{
+ boolean_t r;
+ char *dup = NULL, *path = NULL, *dname;
+
+ dprintf("find_alt_debug: looking for %s, crc 0x%x\n", name, crc);
+ if (fptr->file_rname == NULL) {
+ dprintf("find_alt_debug: encountered null file_rname\n");
+ return;
+ }
+
+ dup = strdup(fptr->file_rname);
+ if (dup == NULL)
+ return;
+
+ dname = dirname(dup);
+ if (asprintf(&path, "%s/.debug/%s", dname, name) != -1) {
+ dprintf("attempting to load alternate debug information "
+ "from %s\n", path);
+ r = build_alt_debug(fptr, path, crc, NULL);
+ free(path);
+ if (r == B_TRUE)
+ goto out;
+ }
+
+ if (asprintf(&path, "/usr/lib/debug/%s/%s", dname, name) != -1) {
+ dprintf("attempting to load alternate debug information "
+ "from %s\n", path);
+ r = build_alt_debug(fptr, path, crc, NULL);
+ free(path);
+ if (r == B_TRUE)
+ goto out;
+ }
+out:
+ free(dup);
+}
+
+/*
* Build the symbol table for the given mapped file.
*/
void
@@ -1587,7 +1896,8 @@ Pbuild_file_symtab(struct ps_prochandle *P, file_info_t *fptr)
GElf_Shdr c_shdr;
Elf_Data *c_data;
const char *c_name;
- } *cp, *cache = NULL, *dyn = NULL, *plt = NULL, *ctf = NULL;
+ } *cp, *cache = NULL, *dyn = NULL, *plt = NULL, *ctf = NULL,
+ *dbglink = NULL, *buildid = NULL;
if (fptr->file_init)
return; /* We've already processed this file */
@@ -1813,7 +2123,151 @@ Pbuild_file_symtab(struct ps_prochandle *P, file_info_t *fptr)
continue;
}
ctf = cp;
+ } else if (strcmp(cp->c_name, BUILDID_NAME) == 0) {
+ dprintf("Found a %s section for %s\n", BUILDID_NAME,
+ fptr->file_rname);
+ /* The ElfXX_Nhdr is 32/64-bit neutral */
+ if (cp->c_shdr.sh_type == SHT_NOTE &&
+ cp->c_data->d_buf != NULL &&
+ cp->c_data->d_size >= sizeof (Elf32_Nhdr)) {
+ Elf32_Nhdr *hdr = cp->c_data->d_buf;
+ if (hdr->n_type != 3)
+ continue;
+ if (hdr->n_namesz != 4)
+ continue;
+ if (hdr->n_descsz < MINBUILDID)
+ continue;
+ /* Set a reasonable upper bound */
+ if (hdr->n_descsz > MAXBUILDID) {
+ dprintf("Skipped %s as too large "
+ "(%ld)\n", BUILDID_NAME,
+ (unsigned long)hdr->n_descsz);
+ continue;
+ }
+
+ if (cp->c_data->d_size < sizeof (hdr) +
+ hdr->n_namesz + hdr->n_descsz)
+ continue;
+ buildid = cp;
+ }
+ } else if (strcmp(cp->c_name, DBGLINK_NAME) == 0) {
+ dprintf("found %s section for %s\n", DBGLINK_NAME,
+ fptr->file_rname);
+ /*
+ * Let's make sure of a few things before we do this.
+ */
+ if (cp->c_shdr.sh_type == SHT_PROGBITS &&
+ cp->c_data->d_buf != NULL &&
+ cp->c_data->d_size) {
+ dbglink = cp;
+ }
+ }
+ }
+
+ /*
+ * If we haven't found any symbol table information and we have found
+ * either a .note.gnu.build-id or a .gnu_debuglink, it's time to try and
+ * figure out where we might find this. Originally, GNU used the
+ * .gnu_debuglink solely, but then they added a .note.gnu.build-id. The
+ * build-id is some size, usually 16 or 20 bytes, often a SHA1 sum of
+ * the old, but not present file. All that you have to do to compare
+ * things is see if the sections are less, in theory saving you from
+ * doing lots of expensive I/O.
+ *
+ * For the .note.gnu.build-id, we're going to check a few things before
+ * using it, first that the name is 4 bytes, and is GNU and that the
+ * type is 3, which they say is the build-id identifier.
+ *
+ * To verify that the elf data for the .gnu_debuglink seems somewhat
+ * sane, eg. the elf data should be a string, so we want to verify we
+ * have a null-terminator.
+ */
+ if (fptr->file_symtab.sym_data_pri == NULL && buildid != NULL) {
+ int i, bo;
+ uint8_t *dp;
+ char buf[BUILDID_STRLEN], *path;
+ Elf32_Nhdr *hdr = buildid->c_data->d_buf;
+
+ /*
+ * This was checked for validity when assigning the buildid
+ * variable.
+ */
+ bzero(buf, sizeof (buf));
+ dp = (uint8_t *)((uintptr_t)hdr + sizeof (*hdr) +
+ hdr->n_namesz);
+ for (i = 0, bo = 0; i < hdr->n_descsz; i++, bo += 2, dp++) {
+ assert(sizeof (buf) - bo > 0);
+
+ /*
+ * Recall that the build-id is structured as a series of
+ * bytes. However, the first two characters are supposed
+ * to represent a directory. Hence, once we reach offset
+ * two, we insert a '/' character.
+ */
+ if (bo == 2) {
+ buf[bo] = '/';
+ bo++;
+ }
+ (void) snprintf(buf + bo, sizeof (buf) - bo, "%2x",
+ *dp);
}
+
+ if (asprintf(&path, "/usr/lib/debug/.build-id/%s.debug",
+ buf) != -1) {
+ boolean_t r;
+ dprintf("attempting to find build id alternate debug "
+ "file at %s\n", path);
+ r = build_alt_debug(fptr, path, 0, buildid->c_data);
+ dprintf("attempt %s\n", r == B_TRUE ?
+ "succeeded" : "failed");
+ free(path);
+ } else {
+ dprintf("failed to construct build id path: %s\n",
+ strerror(errno));
+ }
+ }
+
+ if (fptr->file_symtab.sym_data_pri == NULL && dbglink != NULL) {
+ char *c = dbglink->c_data->d_buf;
+ size_t i;
+ boolean_t found = B_FALSE;
+ Elf_Data *ed = dbglink->c_data;
+ uint32_t crc;
+
+ for (i = 0; i < ed->d_size; i++) {
+ if (c[i] == '\0') {
+ uintptr_t off;
+ dprintf("got .gnu_debuglink terminator at "
+ "offset %lu\n", (unsigned long)i);
+ /*
+ * After the null terminator, there should be
+ * padding, followed by a 4 byte CRC of the
+ * file. If we don't see this, we're going to
+ * assume this is bogus.
+ */
+ if ((i % sizeof (uint32_t)) == 0) {
+ i += 4;
+ } else {
+ i += sizeof (uint32_t) -
+ (i % sizeof (uint32_t));
+ }
+ if (i + sizeof (uint32_t) ==
+ dbglink->c_data->d_size) {
+ found = B_TRUE;
+ off = (uintptr_t)ed->d_buf + i;
+ crc = *(uint32_t *)off;
+ } else {
+ dprintf(".gnu_debuglink size mismatch, "
+ "expected: %lu, found: %lu\n",
+ (unsigned long)i,
+ (unsigned long)ed->d_size);
+ }
+ break;
+ }
+ }
+
+ if (found == B_TRUE)
+ find_alt_debuglink(fptr, dbglink->c_data->d_buf, crc);
}
/*
@@ -1943,7 +2397,13 @@ bad:
fptr->file_elfmem = NULL;
}
(void) close(fptr->file_fd);
+ if (fptr->file_dbgelf != NULL)
+ (void) elf_end(fptr->file_dbgelf);
+ fptr->file_dbgelf = NULL;
+ if (fptr->file_dbgfile >= 0)
+ (void) close(fptr->file_dbgfile);
fptr->file_fd = -1;
+ fptr->file_dbgfile = -1;
}
/*
@@ -2579,7 +3039,7 @@ Pxlookup_by_name(
*/
int
Plookup_by_name(struct ps_prochandle *P, const char *object,
- const char *symbol, GElf_Sym *symp)
+ const char *symbol, GElf_Sym *symp)
{
return (Pxlookup_by_name(P, PR_LMID_EVERY, object, symbol, symp, NULL));
}
@@ -2677,7 +3137,7 @@ Pobject_iter_resolved(struct ps_prochandle *P, proc_map_f *func, void *cd)
static char *
i_Pobjname(struct ps_prochandle *P, boolean_t lmresolve, uintptr_t addr,
- char *buffer, size_t bufsize)
+ char *buffer, size_t bufsize)
{
map_info_t *mptr;
file_info_t *fptr;
@@ -2710,7 +3170,7 @@ i_Pobjname(struct ps_prochandle *P, boolean_t lmresolve, uintptr_t addr,
*/
char *
Pobjname(struct ps_prochandle *P, uintptr_t addr,
- char *buffer, size_t bufsize)
+ char *buffer, size_t bufsize)
{
return (i_Pobjname(P, B_FALSE, addr, buffer, bufsize));
}
@@ -2725,7 +3185,7 @@ Pobjname(struct ps_prochandle *P, uintptr_t addr,
*/
char *
Pobjname_resolved(struct ps_prochandle *P, uintptr_t addr,
- char *buffer, size_t bufsize)
+ char *buffer, size_t bufsize)
{
return (i_Pobjname(P, B_TRUE, addr, buffer, bufsize));
}
diff --git a/usr/src/lib/librename/Makefile b/usr/src/lib/librename/Makefile
new file mode 100644
index 0000000000..222523d9a1
--- /dev/null
+++ b/usr/src/lib/librename/Makefile
@@ -0,0 +1,44 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.lib
+
+HDRS = librename.h
+HDRDIR = common
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+install := TARGET = install
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber lint: $(SUBDIRS)
+
+install: $(SUBDIRS) $(VARPD_MAPFILES) install_h
+
+install_h: $(ROOTHDRS)
+
+check: $(CHECKHDRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../Makefile.targ
diff --git a/usr/src/lib/librename/Makefile.com b/usr/src/lib/librename/Makefile.com
new file mode 100644
index 0000000000..f0a22f25ac
--- /dev/null
+++ b/usr/src/lib/librename/Makefile.com
@@ -0,0 +1,34 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+LIBRARY = librename.a
+VERS = .1
+OBJECTS = librename.o \
+
+include ../../Makefile.lib
+
+LIBS = $(DYNLIB) $(LINTLIB)
+LDLIBS += -lc
+CPPFLAGS += -I../common
+
+SRCDIR = ../common
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include ../../Makefile.targ
diff --git a/usr/src/lib/librename/amd64/Makefile b/usr/src/lib/librename/amd64/Makefile
new file mode 100644
index 0000000000..15d904c616
--- /dev/null
+++ b/usr/src/lib/librename/amd64/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+include ../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/librename/common/librename.c b/usr/src/lib/librename/common/librename.c
new file mode 100644
index 0000000000..1e198a50a3
--- /dev/null
+++ b/usr/src/lib/librename/common/librename.c
@@ -0,0 +1,229 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Implementation of librename(3RENAME) interfaces.
+ */
+
+#include <librename.h>
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <synch.h>
+
+typedef enum librename_atomic_state {
+ LIBRENAME_ATOMIC_INITIAL = 0x0,
+ LIBRENAME_ATOMIC_FSYNC,
+ LIBRENAME_ATOMIC_RENAME,
+ LIBRENAME_ATOMIC_POSTSYNC,
+ LIBRENAME_ATOMIC_COMPLETED
+} librename_atomic_state_t;
+
+struct librename_atomic {
+ char *lra_fname; /* RO */
+ char *lra_altname; /* RO */
+ int lra_dirfd; /* RO */
+ int lra_tmpfd; /* RO */
+ mutex_t lra_lock;
+ librename_atomic_state_t lra_state; /* lra_lock */
+};
+
+int
+librename_atomic_fdinit(int fd, const char *file, const char *prefix,
+ int mode, int flags, librename_atomic_t **outp)
+{
+ int ret;
+ int oflags;
+ librename_atomic_t *lrap;
+ struct stat st;
+
+ if (fd < 0 || file == NULL || outp == NULL)
+ return (EINVAL);
+
+ if (flags & ~(LIBRENAME_ATOMIC_NOUNLINK | LIBRENAME_ATOMIC_CLOEXEC))
+ return (EINVAL);
+
+ if (strchr(file, '/') != NULL)
+ return (EINVAL);
+
+ if (prefix != NULL && strchr(prefix, '/') != NULL)
+ return (EINVAL);
+
+ *outp = NULL;
+ lrap = malloc(sizeof (librename_atomic_t));
+ if (lrap == NULL)
+ return (errno);
+
+ if (fstat(fd, &st) != 0) {
+ ret = errno;
+ free(lrap);
+ return (ret);
+ }
+
+ if (!S_ISDIR(st.st_mode)) {
+ free(lrap);
+ return (ENOTDIR);
+ }
+
+ if ((lrap->lra_dirfd = dup(fd)) == -1) {
+ ret = errno;
+ free(lrap);
+ return (ret);
+ }
+
+
+ lrap->lra_fname = strdup(file);
+ if (lrap->lra_fname == NULL) {
+ ret = errno;
+ if (close(lrap->lra_dirfd) != 0)
+ abort();
+ free(lrap);
+ return (ret);
+ }
+
+ if (prefix == NULL) {
+ ret = asprintf(&lrap->lra_altname, ".%d.%s", (int)getpid(),
+ file);
+ } else {
+ ret = asprintf(&lrap->lra_altname, "%s%s", prefix, file);
+ }
+ if (ret == -1) {
+ ret = errno;
+ free(lrap->lra_fname);
+ if (close(lrap->lra_dirfd) != 0)
+ abort();
+ free(lrap);
+ return (errno);
+ }
+
+ oflags = O_CREAT | O_TRUNC | O_RDWR | O_NOFOLLOW;
+ if (flags & LIBRENAME_ATOMIC_NOUNLINK)
+ oflags |= O_EXCL;
+
+ if (flags & LIBRENAME_ATOMIC_CLOEXEC)
+ oflags |= O_CLOEXEC;
+
+ lrap->lra_tmpfd = openat(lrap->lra_dirfd, lrap->lra_altname,
+ oflags, mode);
+ if (lrap->lra_tmpfd < 0) {
+ ret = errno;
+ free(lrap->lra_altname);
+ free(lrap->lra_fname);
+ if (close(lrap->lra_dirfd) != 0)
+ abort();
+ free(lrap);
+ return (ret);
+ }
+
+ if (mutex_init(&lrap->lra_lock, USYNC_THREAD, NULL) != 0)
+ abort();
+
+ lrap->lra_state = LIBRENAME_ATOMIC_INITIAL;
+ *outp = lrap;
+ return (0);
+}
+
+int
+librename_atomic_init(const char *dir, const char *file, const char *prefix,
+ int mode, int flags, librename_atomic_t **outp)
+{
+ int fd, ret;
+
+ if ((fd = open(dir, O_RDONLY)) < 0)
+ return (errno);
+
+ ret = librename_atomic_fdinit(fd, file, prefix, mode, flags, outp);
+ if (close(fd) != 0)
+ abort();
+
+ return (ret);
+}
+
+int
+librename_atomic_fd(librename_atomic_t *lrap)
+{
+ return (lrap->lra_tmpfd);
+}
+
+/*
+ * To atomically commit a file, we need to go through and do the following:
+ *
+ * o fsync the source
+ * o run rename
+ * o fsync the source again and the directory.
+ */
+int
+librename_atomic_commit(librename_atomic_t *lrap)
+{
+ int ret = 0;
+
+ if (mutex_lock(&lrap->lra_lock) != 0)
+ abort();
+ if (lrap->lra_state == LIBRENAME_ATOMIC_COMPLETED) {
+ ret = EINVAL;
+ goto out;
+ }
+
+ if (fsync(lrap->lra_tmpfd) != 0) {
+ ret = errno;
+ goto out;
+ }
+ lrap->lra_state = LIBRENAME_ATOMIC_FSYNC;
+
+ if (renameat(lrap->lra_dirfd, lrap->lra_altname, lrap->lra_dirfd,
+ lrap->lra_fname) != 0) {
+ ret = errno;
+ goto out;
+ }
+ lrap->lra_state = LIBRENAME_ATOMIC_RENAME;
+
+ if (fsync(lrap->lra_tmpfd) != 0) {
+ ret = errno;
+ goto out;
+ }
+ lrap->lra_state = LIBRENAME_ATOMIC_POSTSYNC;
+
+ if (fsync(lrap->lra_dirfd) != 0) {
+ ret = errno;
+ goto out;
+ }
+ lrap->lra_state = LIBRENAME_ATOMIC_COMPLETED;
+
+out:
+ if (mutex_unlock(&lrap->lra_lock) != 0)
+ abort();
+ return (ret);
+}
+
+void
+librename_atomic_fini(librename_atomic_t *lrap)
+{
+
+ free(lrap->lra_altname);
+ free(lrap->lra_fname);
+ if (close(lrap->lra_tmpfd) != 0)
+ abort();
+ if (close(lrap->lra_dirfd) != 0)
+ abort();
+ if (mutex_destroy(&lrap->lra_lock) != 0)
+ abort();
+ free(lrap);
+}
diff --git a/usr/src/lib/librename/common/librename.h b/usr/src/lib/librename/common/librename.h
new file mode 100644
index 0000000000..cb344f534c
--- /dev/null
+++ b/usr/src/lib/librename/common/librename.h
@@ -0,0 +1,43 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _LIBRENAME_H
+#define _LIBRENAME_H
+
+/*
+ * librename(3RENAME) public interfaces
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct librename_atomic librename_atomic_t;
+
+#define LIBRENAME_ATOMIC_NOUNLINK 0x01
+#define LIBRENAME_ATOMIC_CLOEXEC 0x02
+extern int librename_atomic_init(const char *, const char *, const char *,
+ int, int, librename_atomic_t **);
+extern int librename_atomic_fdinit(int, const char *, const char *, int, int,
+ librename_atomic_t **);
+extern int librename_atomic_fd(librename_atomic_t *);
+extern int librename_atomic_commit(librename_atomic_t *);
+extern void librename_atomic_fini(librename_atomic_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBRENAME_H */
diff --git a/usr/src/lib/librename/common/llib-lrename b/usr/src/lib/librename/common/llib-lrename
new file mode 100644
index 0000000000..6f1dd81a7b
--- /dev/null
+++ b/usr/src/lib/librename/common/llib-lrename
@@ -0,0 +1,19 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/* LINTLIBRARY */
+/* PROTOLIB1 */
+
+#include <librename.h>
diff --git a/usr/src/lib/librename/common/mapfile-vers b/usr/src/lib/librename/common/mapfile-vers
new file mode 100644
index 0000000000..af98188b42
--- /dev/null
+++ b/usr/src/lib/librename/common/mapfile-vers
@@ -0,0 +1,41 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION ILLUMOSprivate {
+ global:
+ librename_atomic_commit;
+ librename_atomic_fd;
+ librename_atomic_fdinit;
+ librename_atomic_fini;
+ librename_atomic_init;
+ local:
+ *;
+};
diff --git a/usr/src/lib/librename/i386/Makefile b/usr/src/lib/librename/i386/Makefile
new file mode 100644
index 0000000000..41e699e8f8
--- /dev/null
+++ b/usr/src/lib/librename/i386/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/librename/sparc/Makefile b/usr/src/lib/librename/sparc/Makefile
new file mode 100644
index 0000000000..41e699e8f8
--- /dev/null
+++ b/usr/src/lib/librename/sparc/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/librename/sparcv9/Makefile b/usr/src/lib/librename/sparcv9/Makefile
new file mode 100644
index 0000000000..15d904c616
--- /dev/null
+++ b/usr/src/lib/librename/sparcv9/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+include ../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/libresolv2_joy/Makefile b/usr/src/lib/libresolv2_joy/Makefile
new file mode 100644
index 0000000000..052dfdd092
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/Makefile
@@ -0,0 +1,76 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
+#
+
+
+include ../../Makefile.master
+include ../Makefile.lib
+
+$(ROOTSVCMETHOD) := FILEMODE = 0555
+
+SUBDIRS= include $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET= all
+clean := TARGET= clean
+clobber := TARGET= clobber
+install := TARGET= install
+lint := TARGET= lint
+_msg := TARGET= _msg
+
+LIBRARY= libresolv_joy.a
+TEXT_DOMAIN= SUNW_OST_OSLIB
+XGETFLAGS= -a
+POFILE= $(LIBRARY:.a=.po)
+POFILES= generic.po
+
+SED= sed
+GREP= grep
+
+.KEEP_STATE:
+
+all clean clobber lint: $(SUBDIRS)
+
+install: $(SUBDIRS)
+
+check: $(CHECKHDRS) $(CHKMANIFEST)
+
+_msg: $(MSGDOMAIN) $(POFILE)
+ $(RM) $(MSGDOMAIN)/$(POFILE)
+ $(CP) $(POFILE) $(MSGDOMAIN)
+
+$(POFILE): $(POFILES)
+ $(RM) $@
+ $(CAT) $(POFILES) > $@
+
+$(POFILES):
+ $(RM) messages.po
+ $(XGETTEXT) $(XGETFLAGS) *.[ch]* */*.[ch]*
+ $(SED) -e '/^# msg/d' -e '/^domain/d' messages.po > $@
+ $(RM) messages.po
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET); echo
+
+FRC:
+
diff --git a/usr/src/lib/libresolv2_joy/Makefile.com b/usr/src/lib/libresolv2_joy/Makefile.com
new file mode 100644
index 0000000000..bbabcbcede
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/Makefile.com
@@ -0,0 +1,162 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#
+
+LIBRARY= libresolv_joy.a
+VERS= .2
+
+BSDOBJS= daemon.o putenv.o strcasecmp.o strsep.o \
+ ftruncate.o readv.o strdup.o strtoul.o \
+ gettimeofday.o setenv.o strerror.o utimes.o \
+ mktemp.o setitimer.o strpbrk.o writev.o
+
+DSTOBJS= dst_api.o support.o hmac_link.o
+
+# inet_addr, inet_pton, inet_ntop, and inet_ntoa removed due to overlap with
+# libnsl
+INETOBJS= inet_net_pton.o inet_neta.o inet_lnaof.o \
+ inet_netof.o nsap_addr.o inet_makeaddr.o \
+ inet_network.o inet_net_ntop.o inet_cidr_ntop.o \
+ inet_cidr_pton.o inet_data.o
+
+# build only the IRS objects that the ISC libbind's make would
+IRSTHROBJS= gethostent_r.o getnetent_r.o getnetgrent_r.o \
+ getprotoent_r.o getservent_r.o
+IRSOBJS= ${IRSTHROBJS} \
+ dns.o dns_ho.o dns_nw.o dns_pr.o \
+ dns_sv.o gai_strerror.o gen.o gen_ho.o \
+ gen_ng.o gen_nw.o gen_pr.o gen_sv.o \
+ getaddrinfo.o gethostent.o getnameinfo.o getnetent.o \
+ getnetgrent.o getprotoent.o getservent.o hesiod.o \
+ irp.o irp_ho.o irp_ng.o irp_nw.o \
+ irp_pr.o irp_sv.o irpmarshall.o irs_data.o \
+ lcl.o lcl_ho.o lcl_ng.o lcl_nw.o \
+ lcl_pr.o lcl_sv.o nis.o nul_ng.o \
+ util.o
+
+ISCOBJS= assertions.o base64.o bitncmp.o ctl_clnt.o \
+ ctl_p.o ctl_srvr.o ev_connects.o ev_files.o \
+ ev_streams.o ev_timers.o ev_waits.o eventlib.o \
+ heap.o hex.o logging.o memcluster.o \
+ movefile.o tree.o
+
+NAMESEROBJS= ns_date.o ns_name.o ns_netint.o ns_parse.o \
+ ns_print.o ns_samedomain.o ns_sign.o ns_ttl.o \
+ ns_verify.o ns_rdata.o ns_newmsg.o
+
+RESOLVOBJS= herror.o mtctxres.o res_comp.o res_data.o \
+ res_debug.o res_findzonecut.o res_init.o \
+ res_mkquery.o res_mkupdate.o res_query.o res_send.o \
+ res_sendsigned.o res_update.o
+
+SUNWOBJS= sunw_mtctxres.o sunw_updrec.o sunw_wrappers.o
+
+OBJECTS= $(BSDOBJS) $(DSTOBJS) $(INETOBJS) $(IRSOBJS) $(ISCOBJS) \
+ $(NAMESEROBJS) $(RESOLVOBJS) $(SUNWOBJS)
+
+# include library definitions
+include ../../Makefile.lib
+
+# install this library in the root filesystem
+include ../../Makefile.rootfs
+
+# CC -v complains about things we aren't going to change in the ISC code
+CCVERBOSE=
+
+SRCDIR = ../common
+SRCS= $(BSDOBJS:%.o=../common/bsd/%.c) \
+ $(DSTOBJS:%.o=../common/dst/%.c) \
+ $(INETOBJS:%.o=../common/inet/%.c) \
+ $(IRSOBJS:%.o=../common/irs/%.c) \
+ $(ISCOBJS:%.o=../common/isc/%.c) \
+ $(NAMESEROBJS:%.o=../common/nameser/%.c) \
+ $(RESOLVOBJS:%.o=../common/resolv/%.c) \
+ $(SUNWOBJS:%.o=../common/sunw/%.c)
+
+LIBS = $(DYNLIB) $(LINTLIB)
+
+$(LINTLIB):= SRCS = ../common/llib-lresolv_joy
+
+# Local Libresolv definitions
+
+SOLCOMPAT = -Dsocket=_socket
+CRYPTFLAGS= -DHMAC_MD5 -DUSE_MD5
+
+LOCFLAGS += $(CRYPTFLAGS)
+LOCFLAGS += -D_SYS_STREAM_H -D_REENTRANT -DSVR4 -DSUNW_OPTIONS \
+ $(SOLCOMPAT) -I../include -I../../common/inc
+
+CPPFLAGS += $(LOCFLAGS)
+
+CERRWARN += -_gcc=-Wno-implicit-function-declaration
+
+DYNFLAGS += $(ZNODELETE)
+
+LDLIBS += -lsocket -lnsl -lc -lmd
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+# include library targets
+include ../../Makefile.targ
+
+pics/%.o: ../common/bsd/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
+
+pics/%.o: ../common/dst/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
+
+pics/%.o: ../common/inet/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
+
+pics/%.o: ../common/irs/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
+
+pics/%.o: ../common/isc/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
+
+pics/%.o: ../common/nameser/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
+
+pics/%.o: ../common/resolv/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
+
+pics/%.o: ../common/sunw/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
+
+# install rule for lint library target
+$(ROOTLINTDIR)/%: ../common/%
+ $(INS.file)
diff --git a/usr/src/lib/libresolv2_joy/THIRDPARTYLICENSE b/usr/src/lib/libresolv2_joy/THIRDPARTYLICENSE
new file mode 100644
index 0000000000..719d77fea2
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/THIRDPARTYLICENSE
@@ -0,0 +1,185 @@
+ * Copyright (c) 1995-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
+ * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
+ * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+
+ * Portions Copyright (c) 1995 by International Business Machines, Inc.
+ *
+ * International Business Machines, Inc. (hereinafter called IBM) grants
+ * permission under its copyrights to use, copy, modify, and distribute this
+ * Software with or without fee, provided that the above copyright notice and
+ * all paragraphs of this notice appear in all copies, and that the name of IBM
+ * not be used in connection with the marketing of any product incorporating
+ * the Software or modifications thereof, without specific, written prior
+ * permission.
+ *
+ * To the extent it has a right to do so, IBM grants an immunity from suit
+ * under its patents, if any, for the use, sale or manufacture of products to
+ * the extent that such products are used for performing Domain Name System
+ * dynamic updates in TCP/IP networks by means of the Software. No immunity is
+ * granted for any product per se or for any other function of any product.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", AND IBM DISCLAIMS ALL WARRANTIES,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE. IN NO EVENT SHALL IBM BE LIABLE FOR ANY SPECIAL,
+ * DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE, EVEN
+ * IF IBM IS APPRISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+ * Portions Copyright (c) 1993 by Digital Equipment Corporation.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies, and that
+ * the name of Digital Equipment Corporation not be used in advertising or
+ * publicity pertaining to distribution of the document or software without
+ * specific, written prior permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
+ * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+
+ * Copyright (c) 1983, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+
+/* Copyright (C) RSA Data Security, Inc. created 1986-1987, 1990,
+ 1992-1994, 1996. This is an
+ unpublished work protected as such under copyright law. This work
+ contains proprietary, confidential, and trade secret information of
+ RSA Data Security, Inc. Use, disclosure or reproduction without the
+ express written authorization of RSA Data Security, Inc. is
+ prohibited.
+ */
+
+ DNSSAFE LICENSE TERMS
+
+ This BIND software includes the DNSsafe software from RSA Data
+ Security, Inc., which is copyrighted software that can only be
+ distributed under the terms of this license agreement.
+
+ The DNSsafe software cannot be used or distributed separately from the
+ BIND software. You only have the right to use it or distribute it as
+ a bundled, integrated product.
+
+ The DNSsafe software can ONLY be used to provide authentication for
+ resource records in the Domain Name System, as specified in RFC 2065
+ and successors. You cannot modify the BIND software to use the
+ DNSsafe software for other purposes, or to make its cryptographic
+ functions available to end-users for other uses.
+
+ If you modify the DNSsafe software itself, you cannot modify its
+ documented API, and you must grant RSA Data Security the right to use,
+ modify, and distribute your modifications, including the right to use
+ any patents or other intellectual property that your modifications
+ depend upon.
+
+ You must not remove, alter, or destroy any of RSA's copyright notices
+ or license information. When distributing the software to the Federal
+ Government, it must be licensed to them as "commercial computer
+ software" protected under 48 CFR 12.212 of the FAR, or 48 CFR
+ 227.7202.1 of the DFARS.
+
+ You must not violate United States export control laws by distributing
+ the DNSsafe software or information about it, when such distribution
+ is prohibited by law.
+
+ THE DNSSAFE SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY
+ WHATSOEVER. RSA HAS NO OBLIGATION TO SUPPORT, CORRECT, UPDATE OR
+ MAINTAIN THE RSA SOFTWARE. RSA DISCLAIMS ALL WARRANTIES, EXPRESS,
+ IMPLIED OR STATUTORY, AS TO ANY MATTER WHATSOEVER, INCLUDING ALL
+ IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+ PURPOSE AND NON-INFRINGEMENT OF THIRD PARTY RIGHTS.
+
+ If you desire to use DNSsafe in ways that these terms do not permit,
+ please contact RSA Data Security, Inc., 100 Marine Parkway, Redwood
+ City, California 94065, USA, to discuss alternate licensing
+ arrangements.
+
+ * Portions Copyright (c) 1995-1998 by Trusted Information Systems, Inc.
+ *
+ * Permission to use, copy modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND TRUSTED INFORMATION SYSTEMS
+ * DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
+ * TRUSTED INFORMATION SYSTEMS BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THE SOFTWARE.
+
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by WIDE Project and
+ * its contributors.
+ * 4. Neither the name of the project nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
diff --git a/usr/src/lib/libresolv2_joy/THIRDPARTYLICENSE.descrip b/usr/src/lib/libresolv2_joy/THIRDPARTYLICENSE.descrip
new file mode 100644
index 0000000000..67315d8f33
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/THIRDPARTYLICENSE.descrip
@@ -0,0 +1 @@
+BIND SOFTWARE
diff --git a/usr/src/lib/libresolv2_joy/amd64/Makefile b/usr/src/lib/libresolv2_joy/amd64/Makefile
new file mode 100644
index 0000000000..2e8cdecf75
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/amd64/Makefile
@@ -0,0 +1,31 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+include ../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64)
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/rresvport.c b/usr/src/lib/libresolv2_joy/common/bsd/daemon.c
index bfb9a0a51c..54ff83b753 100644
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/rresvport.c
+++ b/usr/src/lib/libresolv2_joy/common/bsd/daemon.c
@@ -1,6 +1,10 @@
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)daemon.c 8.1 (Berkeley) 6/4/93";
+static const char rcsid[] = "$Id: daemon.c,v 1.2 2005/04/27 04:56:10 sra Exp $";
+#endif /* LIBC_SCCS and not lint */
+
/*
- * Copyright (c) 1995, 1996, 1998 Theo de Raadt. All rights reserved.
- * Copyright (c) 1983, 1993, 1994
+ * Copyright (c) 1990, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -15,7 +19,6 @@
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
- * This product includes software developed by Theo de Raadt.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
@@ -33,76 +36,46 @@
* SUCH DAMAGE.
*/
-#include "includes.h"
+#include "port_before.h"
-#ifndef HAVE_RRESVPORT_AF
+#include <fcntl.h>
+#include <paths.h>
+#include <unistd.h>
-#if defined(LIBC_SCCS) && !defined(lint)
-static char *rcsid = "$OpenBSD: rresvport.c,v 1.5 2000/01/26 03:43:20 deraadt Exp $";
-#endif /* LIBC_SCCS and not lint */
+#include "port_after.h"
-#include "includes.h"
+#ifndef NEED_DAEMON
+int __bind_daemon__;
+#else
-#if 0
int
-rresvport(alport)
- int *alport;
-{
- return rresvport_af(alport, AF_INET);
-}
-#endif
-
-int
-rresvport_af(int *alport, sa_family_t af)
-{
- struct sockaddr_storage ss;
- struct sockaddr *sa;
- u_int16_t *portp;
- int s;
- socklen_t salen;
-
- memset(&ss, '\0', sizeof ss);
- sa = (struct sockaddr *)&ss;
+daemon(int nochdir, int noclose) {
+ int fd;
- switch (af) {
- case AF_INET:
- salen = sizeof(struct sockaddr_in);
- portp = &((struct sockaddr_in *)sa)->sin_port;
- break;
- case AF_INET6:
- salen = sizeof(struct sockaddr_in6);
- portp = &((struct sockaddr_in6 *)sa)->sin6_port;
+ switch (fork()) {
+ case -1:
+ return (-1);
+ case 0:
break;
default:
- errno = EPFNOSUPPORT;
- return (-1);
+ _exit(0);
}
- sa->sa_family = af;
-
- s = socket(af, SOCK_STREAM, 0);
- if (s < 0)
+
+ if (setsid() == -1)
return (-1);
- *portp = htons(*alport);
- if (*alport < IPPORT_RESERVED - 1) {
- if (bind(s, sa, salen) >= 0)
- return (s);
- if (errno != EADDRINUSE) {
- (void)close(s);
- return (-1);
- }
- }
+ if (!nochdir)
+ (void)chdir("/");
- *portp = 0;
- sa->sa_family = af;
- if (bindresvport_sa(s, sa) == -1) {
- (void)close(s);
- return (-1);
+ if (!noclose && (fd = open(_PATH_DEVNULL, O_RDWR, 0)) != -1) {
+ (void)dup2(fd, STDIN_FILENO);
+ (void)dup2(fd, STDOUT_FILENO);
+ (void)dup2(fd, STDERR_FILENO);
+ if (fd > 2)
+ (void)close (fd);
}
- *alport = ntohs(*portp);
- return (s);
+ return (0);
}
+#endif
-#endif /* HAVE_RRESVPORT_AF */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/bsd/ftruncate.c b/usr/src/lib/libresolv2_joy/common/bsd/ftruncate.c
new file mode 100644
index 0000000000..5ac4ebac9b
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/bsd/ftruncate.c
@@ -0,0 +1,64 @@
+#ifndef LINT
+static const char rcsid[] = "$Id: ftruncate.c,v 1.3 2005/04/27 18:16:45 sra Exp $";
+#endif
+
+/*! \file
+ * \brief
+ * ftruncate - set file size, BSD Style
+ *
+ * shortens or enlarges the file as neeeded
+ * uses some undocumented locking call. It is known to work on SCO unix,
+ * other vendors should try.
+ * The #error directive prevents unsupported OSes
+ */
+
+#include "port_before.h"
+
+#if defined(M_UNIX)
+#define OWN_FTRUNCATE
+#include <stdio.h>
+#ifdef _XOPEN_SOURCE
+#undef _XOPEN_SOURCE
+#endif
+#ifdef _POSIX_SOURCE
+#undef _POSIX_SOURCE
+#endif
+
+#include <fcntl.h>
+
+#include "port_after.h"
+
+int
+__ftruncate(int fd, long wantsize) {
+ long cursize;
+
+ /* determine current file size */
+ if ((cursize = lseek(fd, 0L, 2)) == -1)
+ return (-1);
+
+ /* maybe lengthen... */
+ if (cursize < wantsize) {
+ if (lseek(fd, wantsize - 1, 0) == -1 ||
+ write(fd, "", 1) == -1) {
+ return (-1);
+ }
+ return (0);
+ }
+
+ /* maybe shorten... */
+ if (wantsize < cursize) {
+ struct flock fl;
+
+ fl.l_whence = 0;
+ fl.l_len = 0;
+ fl.l_start = wantsize;
+ fl.l_type = F_WRLCK;
+ return (fcntl(fd, F_FREESP, &fl));
+ }
+ return (0);
+}
+#endif
+
+#ifndef OWN_FTRUNCATE
+int __bindcompat_ftruncate;
+#endif
diff --git a/usr/src/lib/libresolv2_joy/common/bsd/gettimeofday.c b/usr/src/lib/libresolv2_joy/common/bsd/gettimeofday.c
new file mode 100644
index 0000000000..2926a3575e
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/bsd/gettimeofday.c
@@ -0,0 +1,62 @@
+#ifndef LINT
+static const char rcsid[] = "$Id: gettimeofday.c,v 1.4 2005/04/27 04:56:11 sra Exp $";
+#endif
+
+#include "port_before.h"
+#include <stdio.h>
+#include <syslog.h>
+#include <sys/time.h>
+#include "port_after.h"
+
+#if !defined(NEED_GETTIMEOFDAY)
+/*%
+ * gettimeofday() occasionally returns invalid tv_usec on some platforms.
+ */
+#define MILLION 1000000
+#undef gettimeofday
+
+int
+isc__gettimeofday(struct timeval *tp, struct timezone *tzp) {
+ int res;
+
+ res = gettimeofday(tp, tzp);
+ if (res < 0)
+ return (res);
+ if (tp == NULL)
+ return (res);
+ if (tp->tv_usec < 0) {
+ do {
+ tp->tv_usec += MILLION;
+ tp->tv_sec--;
+ } while (tp->tv_usec < 0);
+ goto log;
+ } else if (tp->tv_usec > MILLION) {
+ do {
+ tp->tv_usec -= MILLION;
+ tp->tv_sec++;
+ } while (tp->tv_usec > MILLION);
+ goto log;
+ }
+ return (res);
+ log:
+ syslog(LOG_ERR, "gettimeofday: tv_usec out of range\n");
+ return (res);
+}
+#else
+int
+gettimeofday(struct timeval *tvp, struct _TIMEZONE *tzp) {
+ time_t clock, time(time_t *);
+
+ if (time(&clock) == (time_t) -1)
+ return (-1);
+ if (tvp) {
+ tvp->tv_sec = clock;
+ tvp->tv_usec = 0;
+ }
+ if (tzp) {
+ tzp->tz_minuteswest = 0;
+ tzp->tz_dsttime = 0;
+ }
+ return (0);
+}
+#endif /*NEED_GETTIMEOFDAY*/
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/inet_aton.c b/usr/src/lib/libresolv2_joy/common/bsd/mktemp.c
index d98afff30d..001b24b58f 100644
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/inet_aton.c
+++ b/usr/src/lib/libresolv2_joy/common/bsd/mktemp.c
@@ -1,9 +1,10 @@
-/* $OpenBSD: inet_addr.c,v 1.6 1999/05/03 22:31:14 yanick Exp $ */
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)mktemp.c 8.1 (Berkeley) 6/4/93";
+static const char rcsid[] = "$Id: mktemp.c,v 1.2 2005/04/27 04:56:11 sra Exp $";
+#endif /* LIBC_SCCS and not lint */
/*
- * ++Copyright++ 1983, 1990, 1993
- * -
- * Copyright (c) 1983, 1990, 1993
+ * Copyright (c) 1987, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -33,7 +34,9 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
- * -
+ */
+
+/*
* Portions Copyright (c) 1993 by Digital Equipment Corporation.
*
* Permission to use, copy, modify, and distribute this software for any
@@ -51,145 +54,103 @@
* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
* SOFTWARE.
- * -
- * --Copyright--
*/
-#include "includes.h"
+#include "port_before.h"
-#if !defined(HAVE_INET_ATON)
+#include <sys/types.h>
+#include <sys/stat.h>
-#if defined(LIBC_SCCS) && !defined(lint)
-#if 0
-static char sccsid[] = "@(#)inet_addr.c 8.1 (Berkeley) 6/17/93";
-static char rcsid[] = "$From: inet_addr.c,v 8.5 1996/08/05 08:31:35 vixie Exp $";
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+
+#include "port_after.h"
+
+#if (!defined(NEED_MKTEMP)) && (!defined(NEED_MKSTEMP))
+int __mktemp_unneeded__;
#else
-static char rcsid[] = "$OpenBSD: inet_addr.c,v 1.6 1999/05/03 22:31:14 yanick Exp $";
-#endif
-#endif /* LIBC_SCCS and not lint */
-#include <sys/types.h>
-#include <sys/param.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
-#include <ctype.h>
+static int gettemp(char *path, int *doopen);
-#if 0
-/*
- * Ascii internet address interpretation routine.
- * The value returned is in network order.
- */
-in_addr_t
-inet_addr(cp)
- register const char *cp;
-{
- struct in_addr val;
-
- if (inet_aton(cp, &val))
- return (val.s_addr);
- return (INADDR_NONE);
+#ifdef NEED_MKSTEMP
+mkstemp(char *path) {
+ int fd;
+
+ return (gettemp(path, &fd) ? fd : -1);
}
#endif
-/*
- * Check whether "cp" is a valid ascii representation
- * of an Internet address and convert to a binary address.
- * Returns 1 if the address is valid, 0 if not.
- * This replaces inet_addr, the return value from which
- * cannot distinguish between failure and a local broadcast address.
- */
-int
-inet_aton(const char *cp, struct in_addr *addr)
-{
- register u_int32_t val;
- register int base, n;
- register char c;
- unsigned int parts[4];
- register unsigned int *pp = parts;
-
- c = *cp;
+#ifdef NEED_MKTEMP
+char *
+mktemp(char *path) {
+ return(gettemp(path, (int *)NULL) ? path : (char *)NULL);
+}
+#endif
+
+static int
+gettemp(char *path, int *doopen) {
+ char *start, *trv;
+ struct stat sbuf;
+ u_int pid;
+
+ pid = getpid();
+ for (trv = path; *trv; ++trv); /*%< extra X's get set to 0's */
+ while (*--trv == 'X') {
+ *trv = (pid % 10) + '0';
+ pid /= 10;
+ }
+
+ /*
+ * check the target directory; if you have six X's and it
+ * doesn't exist this runs for a *very* long time.
+ */
+ for (start = trv + 1;; --trv) {
+ if (trv <= path)
+ break;
+ if (*trv == '/') {
+ *trv = '\0';
+ if (stat(path, &sbuf))
+ return(0);
+ if (!S_ISDIR(sbuf.st_mode)) {
+ errno = ENOTDIR;
+ return(0);
+ }
+ *trv = '/';
+ break;
+ }
+ }
+
for (;;) {
- /*
- * Collect number up to ``.''.
- * Values are specified as for C:
- * 0x=hex, 0=octal, isdigit=decimal.
- */
- if (!isdigit(c))
- return (0);
- val = 0; base = 10;
- if (c == '0') {
- c = *++cp;
- if (c == 'x' || c == 'X')
- base = 16, c = *++cp;
- else
- base = 8;
+ if (doopen) {
+ if ((*doopen =
+ open(path, O_CREAT|O_EXCL|O_RDWR, 0600)) >= 0)
+ return(1);
+ if (errno != EEXIST)
+ return(0);
}
- for (;;) {
- if (isascii(c) && isdigit(c)) {
- val = (val * base) + (c - '0');
- c = *++cp;
- } else if (base == 16 && isascii(c) && isxdigit(c)) {
- val = (val << 4) |
- (c + 10 - (islower(c) ? 'a' : 'A'));
- c = *++cp;
- } else
+ else if (stat(path, &sbuf))
+ return(errno == ENOENT ? 1 : 0);
+
+ /* tricky little algorithm for backward compatibility */
+ for (trv = start;;) {
+ if (!*trv)
+ return(0);
+ if (*trv == 'z')
+ *trv++ = 'a';
+ else {
+ if (isdigit(*trv))
+ *trv = 'a';
+ else
+ ++*trv;
break;
+ }
}
- if (c == '.') {
- /*
- * Internet format:
- * a.b.c.d
- * a.b.c (with c treated as 16 bits)
- * a.b (with b treated as 24 bits)
- */
- if (pp >= parts + 3)
- return (0);
- *pp++ = val;
- c = *++cp;
- } else
- break;
- }
- /*
- * Check for trailing characters.
- */
- if (c != '\0' && (!isascii(c) || !isspace(c)))
- return (0);
- /*
- * Concoct the address according to
- * the number of parts specified.
- */
- n = pp - parts + 1;
- switch (n) {
-
- case 0:
- return (0); /* initial nondigit */
-
- case 1: /* a -- 32 bits */
- break;
-
- case 2: /* a.b -- 8.24 bits */
- if ((val > 0xffffff) || (parts[0] > 0xff))
- return (0);
- val |= parts[0] << 24;
- break;
-
- case 3: /* a.b.c -- 8.8.16 bits */
- if ((val > 0xffff) || (parts[0] > 0xff) || (parts[1] > 0xff))
- return (0);
- val |= (parts[0] << 24) | (parts[1] << 16);
- break;
-
- case 4: /* a.b.c.d -- 8.8.8.8 bits */
- if ((val > 0xff) || (parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff))
- return (0);
- val |= (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8);
- break;
}
- if (addr)
- addr->s_addr = htonl(val);
- return (1);
+ /*NOTREACHED*/
}
-#endif /* !defined(HAVE_INET_ATON) */
+#endif /*NEED_MKTEMP*/
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/bsd/putenv.c b/usr/src/lib/libresolv2_joy/common/bsd/putenv.c
new file mode 100644
index 0000000000..2dcbc57e6c
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/bsd/putenv.c
@@ -0,0 +1,27 @@
+#ifndef LINT
+static const char rcsid[] = "$Id: putenv.c,v 1.2 2005/04/27 04:56:11 sra Exp $";
+#endif
+
+#include "port_before.h"
+#include "port_after.h"
+
+/*%
+ * To give a little credit to Sun, SGI,
+ * and many vendors in the SysV world.
+ */
+
+#if !defined(NEED_PUTENV)
+int __bindcompat_putenv;
+#else
+int
+putenv(char *str) {
+ char *tmp;
+
+ for (tmp = str; *tmp && (*tmp != '='); tmp++)
+ ;
+
+ return (setenv(str, tmp, 1));
+}
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/bsd/readv.c b/usr/src/lib/libresolv2_joy/common/bsd/readv.c
new file mode 100644
index 0000000000..5fa691a92f
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/bsd/readv.c
@@ -0,0 +1,39 @@
+#ifndef LINT
+static const char rcsid[] = "$Id: readv.c,v 1.2 2005/04/27 04:56:11 sra Exp $";
+#endif
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+
+#include "port_after.h"
+
+#ifndef NEED_READV
+int __bindcompat_readv;
+#else
+
+int
+__readv(fd, vp, vpcount)
+ int fd;
+ const struct iovec *vp;
+ int vpcount;
+{
+ int count = 0;
+
+ while (vpcount-- > 0) {
+ int bytes = read(fd, vp->iov_base, vp->iov_len);
+
+ if (bytes < 0)
+ return (-1);
+ count += bytes;
+ if (bytes != vp->iov_len)
+ break;
+ vp++;
+ }
+ return (count);
+}
+#endif /* NEED_READV */
+/*! \file */
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/setenv.c b/usr/src/lib/libresolv2_joy/common/bsd/setenv.c
index ed5b9525ce..baf00f6ff2 100644
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/setenv.c
+++ b/usr/src/lib/libresolv2_joy/common/bsd/setenv.c
@@ -1,6 +1,11 @@
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)setenv.c 8.1 (Berkeley) 6/4/93";
+static const char rcsid[] = "$Id: setenv.c,v 1.2 2005/04/27 04:56:11 sra Exp $";
+#endif /* LIBC_SCCS and not lint */
+
/*
- * Copyright (c) 1987 Regents of the University of California.
- * All rights reserved.
+ * Copyright (c) 1987, 1993
+ * The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -31,136 +36,116 @@
* SUCH DAMAGE.
*/
-#include "includes.h"
-#ifndef HAVE_SETENV
-
-#if defined(LIBC_SCCS) && !defined(lint)
-static char *rcsid = "$OpenBSD: setenv.c,v 1.4 2001/07/09 06:57:45 deraadt Exp $";
-#endif /* LIBC_SCCS and not lint */
+#include "port_before.h"
+#include <stddef.h>
#include <stdlib.h>
#include <string.h>
-/*
- * __findenv --
- * Returns pointer to value associated with name, if any, else NULL.
- * Sets offset to be the offset of the name/value combination in the
- * environmental array, for use by setenv(3) and unsetenv(3).
- * Explicitly removes '=' in argument name.
- *
- * This routine *should* be a static; don't use it.
- */
-char *
-__findenv(name, offset)
- register const char *name;
- int *offset;
-{
- extern char **environ;
- register int len, i;
- register const char *np;
- register char **p, *cp;
+#include "port_after.h"
- if (name == NULL || environ == NULL)
- return (NULL);
- for (np = name; *np && *np != '='; ++np)
- ;
- len = np - name;
- for (p = environ; (cp = *p) != NULL; ++p) {
- for (np = name, i = len; i && *cp; i--)
- if (*cp++ != *np++)
- break;
- if (i == 0 && *cp++ == '=') {
- *offset = p - environ;
- return (cp);
- }
- }
- return (NULL);
-}
+#if !defined(NEED_SETENV)
+int __bindcompat_setenv;
+#else
-/*
+extern char **environ;
+
+static char *findenv(const char *name, int *offset);
+
+/*%
* setenv --
* Set the value of the environmental variable "name" to be
* "value". If rewrite is set, replace any current value.
*/
-int
-setenv(name, value, rewrite)
- register const char *name;
- register const char *value;
- int rewrite;
-{
+setenv(const char *name, const char *value, int rewrite) {
extern char **environ;
- static int alloced; /* if allocated space before */
- register char *C;
+ static int alloced; /*%< if allocated space before */
+ char *c;
int l_value, offset;
- char *__findenv();
- if (*value == '=') /* no `=' in value */
+ if (*value == '=') /*%< no `=' in value */
++value;
l_value = strlen(value);
- if ((C = __findenv(name, &offset))) { /* find if already exists */
+ if ((c = findenv(name, &offset))) { /*%< find if already exists */
if (!rewrite)
return (0);
- if (strlen(C) >= l_value) { /* old larger; copy over */
- while ((*C++ = *value++))
- ;
+ if (strlen(c) >= l_value) { /*%< old larger; copy over */
+ while (*c++ = *value++);
return (0);
}
- } else { /* create new slot */
- register int cnt;
- register char **P;
+ } else { /*%< create new slot */
+ int cnt;
+ char **p;
- for (P = environ, cnt = 0; *P; ++P, ++cnt);
- if (alloced) { /* just increase size */
- P = (char **)realloc((void *)environ,
+ for (p = environ, cnt = 0; *p; ++p, ++cnt);
+ if (alloced) { /*%< just increase size */
+ environ = (char **)realloc((char *)environ,
(size_t)(sizeof(char *) * (cnt + 2)));
- if (!P)
+ if (!environ)
return (-1);
- environ = P;
}
- else { /* get new space */
- alloced = 1; /* copy old entries into it */
- P = (char **)malloc((size_t)(sizeof(char *) *
- (cnt + 2)));
- if (!P)
+ else { /*%< get new space */
+ alloced = 1; /*%< copy old entries into it */
+ p = malloc((size_t)(sizeof(char *) * (cnt + 2)));
+ if (!p)
return (-1);
- memmove(P, environ, cnt * sizeof(char *));
- environ = P;
+ memcpy(p, environ, cnt * sizeof(char *));
+ environ = p;
}
environ[cnt + 1] = NULL;
offset = cnt;
}
- for (C = (char *)name; *C && *C != '='; ++C); /* no `=' in name */
- if (!(environ[offset] = /* name + `=' + value */
- malloc((size_t)((int)(C - name) + l_value + 2))))
+ for (c = (char *)name; *c && *c != '='; ++c); /*%< no `=' in name */
+ if (!(environ[offset] = /*%< name + `=' + value */
+ malloc((size_t)((int)(c - name) + l_value + 2))))
return (-1);
- /* LINTED */
- for (C = environ[offset]; (*C = *name++) && *C != '='; ++C)
- ;
- for (*C++ = '='; (*C++ = *value++); )
- ;
+ for (c = environ[offset]; (*c = *name++) && *c != '='; ++c);
+ for (*c++ = '='; *c++ = *value++;);
return (0);
}
-/*
+/*%
* unsetenv(name) --
* Delete environmental variable "name".
*/
void
-unsetenv(name)
- const char *name;
-{
- extern char **environ;
- register char **P;
+unsetenv(const char *name) {
+ char **p;
int offset;
- char *__findenv();
- while (__findenv(name, &offset)) /* if set multiple times */
- for (P = &environ[offset];; ++P)
- /* LINTED */
- if (!(*P = *(P + 1)))
+ while (findenv(name, &offset)) /*%< if set multiple times */
+ for (p = &environ[offset];; ++p)
+ if (!(*p = *(p + 1)))
break;
}
-#endif /* HAVE_SETENV */
+/*%
+ * findenv --
+ * Returns pointer to value associated with name, if any, else NULL.
+ * Sets offset to be the offset of the name/value combination in the
+ * environmental array, for use by setenv(3) and unsetenv(3).
+ * Explicitly removes '=' in argument name.
+ *
+ * This routine *should* be a static; don't use it.
+ */
+static char *
+findenv(const char *name, int *offset) {
+ const char *np;
+ char **p, *c;
+ int len;
+
+ if (name == NULL || environ == NULL)
+ return (NULL);
+ for (np = name; *np && *np != '='; ++np)
+ continue;
+ len = np - name;
+ for (p = environ; (c = *p) != NULL; ++p)
+ if (strncmp(c, name, len) == 0 && c[len] == '=') {
+ *offset = p - environ;
+ return (c + len + 1);
+ }
+ return (NULL);
+}
+#endif
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/bsd/setitimer.c b/usr/src/lib/libresolv2_joy/common/bsd/setitimer.c
new file mode 100644
index 0000000000..67881d7ca8
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/bsd/setitimer.c
@@ -0,0 +1,29 @@
+#ifndef LINT
+static const char rcsid[] = "$Id: setitimer.c,v 1.2 2005/04/27 04:56:12 sra Exp $";
+#endif
+
+#include "port_before.h"
+
+#include <sys/time.h>
+
+#include "port_after.h"
+
+/*%
+ * Setitimer emulation routine.
+ */
+#ifndef NEED_SETITIMER
+int __bindcompat_setitimer;
+#else
+
+int
+__setitimer(int which, const struct itimerval *value,
+ struct itimerval *ovalue)
+{
+ if (alarm(value->it_value.tv_sec) >= 0)
+ return (0);
+ else
+ return (-1);
+}
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/bsd/strcasecmp.c b/usr/src/lib/libresolv2_joy/common/bsd/strcasecmp.c
new file mode 100644
index 0000000000..0c9f0dccf0
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/bsd/strcasecmp.c
@@ -0,0 +1,124 @@
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)strcasecmp.c 8.1 (Berkeley) 6/4/93";
+static const char rcsid[] = "$Id: strcasecmp.c,v 1.2 2005/04/27 04:56:12 sra Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/*
+ * Copyright (c) 1987, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "port_before.h"
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/cdefs.h>
+
+#include <string.h>
+
+#include "port_after.h"
+
+#ifndef NEED_STRCASECMP
+int __strcasecmp_unneeded__;
+#else
+
+/*%
+ * This array is designed for mapping upper and lower case letter
+ * together for a case independent comparison. The mappings are
+ * based upon ascii character sequences.
+ */
+static const u_char charmap[] = {
+ 0000, 0001, 0002, 0003, 0004, 0005, 0006, 0007,
+ 0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
+ 0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027,
+ 0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037,
+ 0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
+ 0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
+ 0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
+ 0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
+ 0100, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
+ 0150, 0151, 0152, 0153, 0154, 0155, 0156, 0157,
+ 0160, 0161, 0162, 0163, 0164, 0165, 0166, 0167,
+ 0170, 0171, 0172, 0133, 0134, 0135, 0136, 0137,
+ 0140, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
+ 0150, 0151, 0152, 0153, 0154, 0155, 0156, 0157,
+ 0160, 0161, 0162, 0163, 0164, 0165, 0166, 0167,
+ 0170, 0171, 0172, 0173, 0174, 0175, 0176, 0177,
+ 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
+ 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
+ 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
+ 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
+ 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
+ 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
+ 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
+ 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
+ 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
+ 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
+ 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
+ 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
+ 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
+ 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
+ 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
+ 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
+};
+
+int
+strcasecmp(const char *s1, const char *s2) {
+ const u_char *cm = charmap,
+ *us1 = (const u_char *)s1,
+ *us2 = (const u_char *)s2;
+
+ while (cm[*us1] == cm[*us2++])
+ if (*us1++ == '\0')
+ return (0);
+ return (cm[*us1] - cm[*--us2]);
+}
+
+int
+strncasecmp(const char *s1, const char *s2, size_t n) {
+ if (n != 0) {
+ const u_char *cm = charmap,
+ *us1 = (const u_char *)s1,
+ *us2 = (const u_char *)s2;
+
+ do {
+ if (cm[*us1] != cm[*us2++])
+ return (cm[*us1] - cm[*--us2]);
+ if (*us1++ == '\0')
+ break;
+ } while (--n != 0);
+ }
+ return (0);
+}
+
+#endif /*NEED_STRCASECMP*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/bsd/strdup.c b/usr/src/lib/libresolv2_joy/common/bsd/strdup.c
new file mode 100644
index 0000000000..a8d31e9587
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/bsd/strdup.c
@@ -0,0 +1,20 @@
+#include "port_before.h"
+
+#include <stdlib.h>
+
+#include "port_after.h"
+
+#ifndef NEED_STRDUP
+int __bind_strdup_unneeded;
+#else
+char *
+strdup(const char *src) {
+ char *dst = malloc(strlen(src) + 1);
+
+ if (dst)
+ strcpy(dst, src);
+ return (dst);
+}
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/bsd/strerror.c b/usr/src/lib/libresolv2_joy/common/bsd/strerror.c
new file mode 100644
index 0000000000..5973e63d5b
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/bsd/strerror.c
@@ -0,0 +1,94 @@
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)strerror.c 8.1 (Berkeley) 6/4/93";
+static const char rcsid[] = "$Id: strerror.c,v 1.6 2008/02/18 03:49:08 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/*
+ * Copyright (c) 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "port_before.h"
+
+#include <sys/param.h>
+#include <sys/types.h>
+
+#include <string.h>
+
+#include "port_after.h"
+
+#ifndef NEED_STRERROR
+int __strerror_unneeded__;
+#else
+
+#ifdef USE_SYSERROR_LIST
+extern int sys_nerr;
+extern char *sys_errlist[];
+#endif
+
+const char *
+isc_strerror(int num) {
+#define UPREFIX "Unknown error: "
+ static char ebuf[40] = UPREFIX; /*%< 64-bit number + slop */
+ u_int errnum;
+ char *p, *t;
+#ifndef USE_SYSERROR_LIST
+ const char *ret;
+#endif
+ char tmp[40];
+
+ errnum = num; /*%< convert to unsigned */
+#ifdef USE_SYSERROR_LIST
+ if (errnum < (u_int)sys_nerr)
+ return (sys_errlist[errnum]);
+#else
+#undef strerror
+ ret = strerror(num); /*%< call strerror() in libc */
+ if (ret != NULL)
+ return(ret);
+#endif
+
+ /* Do this by hand, so we don't include stdio(3). */
+ t = tmp;
+ do {
+ *t++ = "0123456789"[errnum % 10];
+ } while (errnum /= 10);
+ for (p = ebuf + sizeof(UPREFIX) - 1;;) {
+ *p++ = *--t;
+ if (t <= tmp)
+ break;
+ }
+ return (ebuf);
+}
+
+#endif /*NEED_STRERROR*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/bsd/strpbrk.c b/usr/src/lib/libresolv2_joy/common/bsd/strpbrk.c
new file mode 100644
index 0000000000..4c12d88e1c
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/bsd/strpbrk.c
@@ -0,0 +1,70 @@
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)strpbrk.c 8.1 (Berkeley) 6/4/93";
+static const char rcsid[] = "$Id: strpbrk.c,v 1.2 2005/04/27 04:56:12 sra Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/*
+ * Copyright (c) 1985, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "port_before.h"
+
+#include <sys/param.h>
+#include <sys/cdefs.h>
+
+#include <string.h>
+
+#include "port_after.h"
+
+#ifndef NEED_STRPBRK
+int __strpbrk_unneeded__;
+#else
+
+/*%
+ * Find the first occurrence in s1 of a character in s2 (excluding NUL).
+ */
+char *
+strpbrk(const char *s1, const char *s2) {
+ const char *scanp;
+ int c, sc;
+
+ while ((c = *s1++) != 0) {
+ for (scanp = s2; (sc = *scanp++) != 0;)
+ if (sc == c)
+ return ((char *)(s1 - 1));
+ }
+ return (NULL);
+}
+
+#endif /*NEED_STRPBRK*/
+
+/*! \file */
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/getgrouplist.c b/usr/src/lib/libresolv2_joy/common/bsd/strsep.c
index bdd4c3a153..c7969f0028 100644
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/getgrouplist.c
+++ b/usr/src/lib/libresolv2_joy/common/bsd/strsep.c
@@ -1,5 +1,10 @@
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "strsep.c 8.1 (Berkeley) 6/4/93";
+static const char rcsid[] = "$Id: strsep.c,v 1.2 2005/04/27 04:56:12 sra Exp $";
+#endif /* LIBC_SCCS and not lint */
+
/*
- * Copyright (c) 1991, 1993
+ * Copyright (c) 1990, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -31,75 +36,53 @@
* SUCH DAMAGE.
*/
-#include "includes.h"
-
-#ifndef HAVE_GETGROUPLIST
-
-#if defined(LIBC_SCCS) && !defined(lint)
-static char rcsid[] = "$OpenBSD: getgrouplist.c,v 1.7 1997/08/19 19:13:27 deraadt Exp $";
-#endif /* LIBC_SCCS and not lint */
-
-/*
- * get credential
- */
-#include <sys/types.h>
+#include "port_before.h"
+#include <sys/cdefs.h>
#include <string.h>
-#include <grp.h>
+#include <stdio.h>
+#include "port_after.h"
-int
-getgrouplist(uname, agroup, groups, grpcnt)
- const char *uname;
- gid_t agroup;
- register gid_t *groups;
- int *grpcnt;
-{
- register struct group *grp;
- register int i, ngroups;
- int ret, maxgroups;
- int bail;
+#ifndef NEED_STRSEP
+int __strsep_unneeded__;
+#else
- ret = 0;
- ngroups = 0;
- maxgroups = *grpcnt;
-
- /*
- * install primary group
- */
- if (ngroups >= maxgroups) {
- *grpcnt = ngroups;
- return (-1);
- }
- groups[ngroups++] = agroup;
+/*%
+ * Get next token from string *stringp, where tokens are possibly-empty
+ * strings separated by characters from delim.
+ *
+ * Writes NULs into the string at *stringp to end tokens.
+ * delim need not remain constant from call to call.
+ * On return, *stringp points past the last NUL written (if there might
+ * be further tokens), or is NULL (if there are definitely no more tokens).
+ *
+ * If *stringp is NULL, strsep returns NULL.
+ */
+char *
+strsep(char **stringp, const char *delim) {
+ char *s;
+ const char *spanp;
+ int c, sc;
+ char *tok;
- /*
- * Scan the group file to find additional groups.
- */
- setgrent();
- while ((grp = getgrent())) {
- if (grp->gr_gid == agroup)
- continue;
- for (bail = 0, i = 0; bail == 0 && i < ngroups; i++)
- if (groups[i] == grp->gr_gid)
- bail = 1;
- if (bail)
- continue;
- for (i = 0; grp->gr_mem[i]; i++) {
- if (!strcmp(grp->gr_mem[i], uname)) {
- if (ngroups >= maxgroups) {
- ret = -1;
- goto out;
- }
- groups[ngroups++] = grp->gr_gid;
- break;
+ if ((s = *stringp) == NULL)
+ return (NULL);
+ for (tok = s;;) {
+ c = *s++;
+ spanp = delim;
+ do {
+ if ((sc = *spanp++) == c) {
+ if (c == 0)
+ s = NULL;
+ else
+ s[-1] = 0;
+ *stringp = s;
+ return (tok);
}
- }
+ } while (sc != 0);
}
-out:
- endgrent();
- *grpcnt = ngroups;
- return (ret);
+ /* NOTREACHED */
}
-#endif /* HAVE_GETGROUPLIST */
+#endif /*NEED_STRSEP*/
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/bsd/strtoul.c b/usr/src/lib/libresolv2_joy/common/bsd/strtoul.c
new file mode 100644
index 0000000000..b37ff72729
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/bsd/strtoul.c
@@ -0,0 +1,119 @@
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)strtoul.c 8.1 (Berkeley) 6/4/93";
+static const char rcsid[] = "$Id: strtoul.c,v 1.4 2008/02/18 03:49:08 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/*
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdlib.h>
+
+#include "port_after.h"
+
+#ifndef NEED_STRTOUL
+int __strtoul_unneeded__;
+#else
+
+/*%
+ * Convert a string to an unsigned long integer.
+ *
+ * Ignores `locale' stuff. Assumes that the upper and lower case
+ * alphabets and digits are each contiguous.
+ */
+u_long
+strtoul(const char *nptr, char **endptr, int base) {
+ const char *s = nptr;
+ u_long acc, cutoff;
+ int neg, c, any, cutlim;
+
+ neg = 0;
+
+ /*
+ * See strtol for comments as to the logic used.
+ */
+ do {
+ c = *(const unsigned char *)s++;
+ } while (isspace(c));
+ if (c == '-') {
+ neg = 1;
+ c = *s++;
+ } else if (c == '+')
+ c = *s++;
+ if ((base == 0 || base == 16) &&
+ c == '0' && (*s == 'x' || *s == 'X')) {
+ c = s[1];
+ s += 2;
+ base = 16;
+ }
+ if (base == 0)
+ base = c == '0' ? 8 : 10;
+ cutoff = (u_long)ULONG_MAX / (u_long)base;
+ cutlim = (u_long)ULONG_MAX % (u_long)base;
+ for (acc = 0, any = 0;; c = *(const unsigned char*)s++) {
+ if (isdigit(c))
+ c -= '0';
+ else if (isalpha(c))
+ c -= isupper(c) ? 'A' - 10 : 'a' - 10;
+ else
+ break;
+ if (c >= base)
+ break;
+ if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
+ any = -1;
+ else {
+ any = 1;
+ acc *= base;
+ acc += c;
+ }
+ }
+ if (any < 0) {
+ acc = ULONG_MAX;
+ errno = ERANGE;
+ } else if (neg)
+ acc = -acc;
+ if (endptr != 0)
+ DE_CONST((any ? s - 1 : nptr), *endptr);
+ return (acc);
+}
+
+#endif /*NEED_STRTOUL*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/bsd/utimes.c b/usr/src/lib/libresolv2_joy/common/bsd/utimes.c
new file mode 100644
index 0000000000..2f65cffe25
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/bsd/utimes.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1997,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <utime.h>
+
+#include "port_after.h"
+
+#ifndef NEED_UTIMES
+int __bind_utimes_unneeded;
+#else
+
+int
+__utimes(char *filename, struct timeval *tvp) {
+ struct utimbuf utb;
+
+ utb.actime = (time_t)tvp[0].tv_sec;
+ utb.modtime = (time_t)tvp[1].tv_sec;
+ return (utime(filename, &utb));
+}
+
+#endif /* NEED_UTIMES */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/bsd/writev.c b/usr/src/lib/libresolv2_joy/common/bsd/writev.c
new file mode 100644
index 0000000000..65baa71cfc
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/bsd/writev.c
@@ -0,0 +1,89 @@
+#ifndef LINT
+static const char rcsid[] = "$Id: writev.c,v 1.3 2005/04/27 04:56:13 sra Exp $";
+#endif
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+
+#include "port_after.h"
+
+#ifndef NEED_WRITEV
+int __bindcompat_writev;
+#else
+
+#ifdef _CRAY
+#define OWN_WRITEV
+int
+__writev(int fd, struct iovec *iov, int iovlen)
+{
+ struct stat statbuf;
+
+ if (fstat(fd, &statbuf) < 0)
+ return (-1);
+
+ /*
+ * Allow for atomic writes to network.
+ */
+ if (statbuf.st_mode & S_IFSOCK) {
+ struct msghdr mesg;
+
+ memset(&mesg, 0, sizeof(mesg));
+ mesg.msg_name = 0;
+ mesg.msg_namelen = 0;
+ mesg.msg_iov = iov;
+ mesg.msg_iovlen = iovlen;
+ mesg.msg_accrights = 0;
+ mesg.msg_accrightslen = 0;
+ return (sendmsg(fd, &mesg, 0));
+ } else {
+ struct iovec *tv;
+ int i, rcode = 0, count = 0;
+
+ for (i = 0, tv = iov; i <= iovlen; tv++) {
+ rcode = write(fd, tv->iov_base, tv->iov_len);
+
+ if (rcode < 0)
+ break;
+
+ count += rcode;
+ }
+
+ if (count == 0)
+ return (rcode);
+ else
+ return (count);
+ }
+}
+
+#else /*_CRAY*/
+
+int
+__writev(fd, vp, vpcount)
+ int fd;
+ const struct iovec *vp;
+ int vpcount;
+{
+ int count = 0;
+
+ while (vpcount-- > 0) {
+ int written = write(fd, vp->iov_base, vp->iov_len);
+
+ if (written < 0)
+ return (-1);
+ count += written;
+ if (written != vp->iov_len)
+ break;
+ vp++;
+ }
+ return (count);
+}
+
+#endif /*_CRAY*/
+
+#endif /*NEED_WRITEV*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/dst/dst_api.c b/usr/src/lib/libresolv2_joy/common/dst/dst_api.c
new file mode 100644
index 0000000000..a3e48077ad
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/dst/dst_api.c
@@ -0,0 +1,1048 @@
+#ifndef LINT
+static const char rcsid[] = "$Header: /proj/cvs/prod/libbind/dst/dst_api.c,v 1.17 2007/09/24 17:18:25 each Exp $";
+#endif
+
+/*
+ * Portions Copyright (c) 1995-1998 by Trusted Information Systems, Inc.
+ *
+ * Permission to use, copy modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND TRUSTED INFORMATION SYSTEMS
+ * DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
+ * TRUSTED INFORMATION SYSTEMS BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THE SOFTWARE.
+ */
+/*
+ * This file contains the interface between the DST API and the crypto API.
+ * This is the only file that needs to be changed if the crypto system is
+ * changed. Exported functions are:
+ * void dst_init() Initialize the toolkit
+ * int dst_check_algorithm() Function to determines if alg is suppored.
+ * int dst_compare_keys() Function to compare two keys for equality.
+ * int dst_sign_data() Incremental signing routine.
+ * int dst_verify_data() Incremental verify routine.
+ * int dst_generate_key() Function to generate new KEY
+ * DST_KEY *dst_read_key() Function to retrieve private/public KEY.
+ * void dst_write_key() Function to write out a key.
+ * DST_KEY *dst_dnskey_to_key() Function to convert DNS KEY RR to a DST
+ * KEY structure.
+ * int dst_key_to_dnskey() Function to return a public key in DNS
+ * format binary
+ * DST_KEY *dst_buffer_to_key() Converst a data in buffer to KEY
+ * int *dst_key_to_buffer() Writes out DST_KEY key matterial in buffer
+ * void dst_free_key() Releases all memory referenced by key structure
+ */
+
+#include "port_before.h"
+#include <stdio.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <memory.h>
+#include <ctype.h>
+#include <time.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#include "dst_internal.h"
+#include "port_after.h"
+
+/* static variables */
+static int done_init = 0;
+dst_func *dst_t_func[DST_MAX_ALGS];
+const char *key_file_fmt_str = "Private-key-format: v%s\nAlgorithm: %d (%s)\n";
+const char *dst_path = "";
+
+/* internal I/O functions */
+static DST_KEY *dst_s_read_public_key(const char *in_name,
+ const u_int16_t in_id, int in_alg);
+static int dst_s_read_private_key_file(char *name, DST_KEY *pk_key,
+ u_int16_t in_id, int in_alg);
+static int dst_s_write_public_key(const DST_KEY *key);
+static int dst_s_write_private_key(const DST_KEY *key);
+
+/* internal function to set up data structure */
+static DST_KEY *dst_s_get_key_struct(const char *name, const int alg,
+ const int flags, const int protocol,
+ const int bits);
+
+/*%
+ * dst_init
+ * This function initializes the Digital Signature Toolkit.
+ * Right now, it just checks the DSTKEYPATH environment variable.
+ * Parameters
+ * none
+ * Returns
+ * none
+ */
+void
+dst_init()
+{
+ char *s;
+ int len;
+
+ if (done_init != 0)
+ return;
+ done_init = 1;
+
+ s = getenv("DSTKEYPATH");
+ len = 0;
+ if (s) {
+ struct stat statbuf;
+
+ len = strlen(s);
+ if (len > PATH_MAX) {
+ EREPORT(("%s is longer than %d characters, ignoring\n",
+ s, PATH_MAX));
+ } else if (stat(s, &statbuf) != 0 || !S_ISDIR(statbuf.st_mode)) {
+ EREPORT(("%s is not a valid directory\n", s));
+ } else {
+ char *tmp;
+ tmp = (char *) malloc(len + 2);
+ memcpy(tmp, s, len + 1);
+ if (tmp[strlen(tmp) - 1] != '/') {
+ tmp[strlen(tmp) + 1] = 0;
+ tmp[strlen(tmp)] = '/';
+ }
+ dst_path = tmp;
+ }
+ }
+ memset(dst_t_func, 0, sizeof(dst_t_func));
+ /* first one is selected */
+ dst_hmac_md5_init();
+}
+
+/*%
+ * dst_check_algorithm
+ * This function determines if the crypto system for the specified
+ * algorithm is present.
+ * Parameters
+ * alg 1 KEY_RSA
+ * 3 KEY_DSA
+ * 157 KEY_HMAC_MD5
+ * future algorithms TBD and registered with IANA.
+ * Returns
+ * 1 - The algorithm is available.
+ * 0 - The algorithm is not available.
+ */
+int
+dst_check_algorithm(const int alg)
+{
+ return (dst_t_func[alg] != NULL);
+}
+
+/*%
+ * dst_s_get_key_struct
+ * This function allocates key structure and fills in some of the
+ * fields of the structure.
+ * Parameters:
+ * name: the name of the key
+ * alg: the algorithm number
+ * flags: the dns flags of the key
+ * protocol: the dns protocol of the key
+ * bits: the size of the key
+ * Returns:
+ * NULL if error
+ * valid pointer otherwise
+ */
+static DST_KEY *
+dst_s_get_key_struct(const char *name, const int alg, const int flags,
+ const int protocol, const int bits)
+{
+ DST_KEY *new_key = NULL;
+
+ if (dst_check_algorithm(alg)) /*%< make sure alg is available */
+ new_key = (DST_KEY *) malloc(sizeof(*new_key));
+ if (new_key == NULL)
+ return (NULL);
+
+ memset(new_key, 0, sizeof(*new_key));
+ new_key->dk_key_name = strdup(name);
+ if (new_key->dk_key_name == NULL) {
+ free(new_key);
+ return (NULL);
+ }
+ new_key->dk_alg = alg;
+ new_key->dk_flags = flags;
+ new_key->dk_proto = protocol;
+ new_key->dk_KEY_struct = NULL;
+ new_key->dk_key_size = bits;
+ new_key->dk_func = dst_t_func[alg];
+ return (new_key);
+}
+
+/*%
+ * dst_compare_keys
+ * Compares two keys for equality.
+ * Parameters
+ * key1, key2 Two keys to be compared.
+ * Returns
+ * 0 The keys are equal.
+ * non-zero The keys are not equal.
+ */
+
+int
+dst_compare_keys(const DST_KEY *key1, const DST_KEY *key2)
+{
+ if (key1 == key2)
+ return (0);
+ if (key1 == NULL || key2 == NULL)
+ return (4);
+ if (key1->dk_alg != key2->dk_alg)
+ return (1);
+ if (key1->dk_key_size != key2->dk_key_size)
+ return (2);
+ if (key1->dk_id != key2->dk_id)
+ return (3);
+ return (key1->dk_func->compare(key1, key2));
+}
+
+/*%
+ * dst_sign_data
+ * An incremental signing function. Data is signed in steps.
+ * First the context must be initialized (SIG_MODE_INIT).
+ * Then data is hashed (SIG_MODE_UPDATE). Finally the signature
+ * itself is created (SIG_MODE_FINAL). This function can be called
+ * once with INIT, UPDATE and FINAL modes all set, or it can be
+ * called separately with a different mode set for each step. The
+ * UPDATE step can be repeated.
+ * Parameters
+ * mode A bit mask used to specify operation(s) to be performed.
+ * SIG_MODE_INIT 1 Initialize digest
+ * SIG_MODE_UPDATE 2 Add data to digest
+ * SIG_MODE_FINAL 4 Generate signature
+ * from signature
+ * SIG_MODE_ALL (SIG_MODE_INIT,SIG_MODE_UPDATE,SIG_MODE_FINAL
+ * data Data to be signed.
+ * len The length in bytes of data to be signed.
+ * in_key Contains a private key to sign with.
+ * KEY structures should be handled (created, converted,
+ * compared, stored, freed) by the DST.
+ * signature
+ * The location to which the signature will be written.
+ * sig_len Length of the signature field in bytes.
+ * Return
+ * 0 Successfull INIT or Update operation
+ * &gt;0 success FINAL (sign) operation
+ * &lt;0 failure
+ */
+
+int
+dst_sign_data(const int mode, DST_KEY *in_key, void **context,
+ const u_char *data, const int len,
+ u_char *signature, const int sig_len)
+{
+ DUMP(data, mode, len, "dst_sign_data()");
+
+ if (mode & SIG_MODE_FINAL &&
+ (in_key->dk_KEY_struct == NULL || signature == NULL))
+ return (MISSING_KEY_OR_SIGNATURE);
+
+ if (in_key->dk_func && in_key->dk_func->sign)
+ return (in_key->dk_func->sign(mode, in_key, context, data, len,
+ signature, sig_len));
+ return (UNKNOWN_KEYALG);
+}
+
+/*%
+ * dst_verify_data
+ * An incremental verify function. Data is verified in steps.
+ * First the context must be initialized (SIG_MODE_INIT).
+ * Then data is hashed (SIG_MODE_UPDATE). Finally the signature
+ * is verified (SIG_MODE_FINAL). This function can be called
+ * once with INIT, UPDATE and FINAL modes all set, or it can be
+ * called separately with a different mode set for each step. The
+ * UPDATE step can be repeated.
+ * Parameters
+ * mode Operations to perform this time.
+ * SIG_MODE_INIT 1 Initialize digest
+ * SIG_MODE_UPDATE 2 add data to digest
+ * SIG_MODE_FINAL 4 verify signature
+ * SIG_MODE_ALL
+ * (SIG_MODE_INIT,SIG_MODE_UPDATE,SIG_MODE_FINAL)
+ * data Data to pass through the hash function.
+ * len Length of the data in bytes.
+ * in_key Key for verification.
+ * signature Location of signature.
+ * sig_len Length of the signature in bytes.
+ * Returns
+ * 0 Verify success
+ * Non-Zero Verify Failure
+ */
+
+int
+dst_verify_data(const int mode, DST_KEY *in_key, void **context,
+ const u_char *data, const int len,
+ const u_char *signature, const int sig_len)
+{
+ DUMP(data, mode, len, "dst_verify_data()");
+ if (mode & SIG_MODE_FINAL &&
+ (in_key->dk_KEY_struct == NULL || signature == NULL))
+ return (MISSING_KEY_OR_SIGNATURE);
+
+ if (in_key->dk_func == NULL || in_key->dk_func->verify == NULL)
+ return (UNSUPPORTED_KEYALG);
+ return (in_key->dk_func->verify(mode, in_key, context, data, len,
+ signature, sig_len));
+}
+
+/*%
+ * dst_read_private_key
+ * Access a private key. First the list of private keys that have
+ * already been read in is searched, then the key accessed on disk.
+ * If the private key can be found, it is returned. If the key cannot
+ * be found, a null pointer is returned. The options specify required
+ * key characteristics. If the private key requested does not have
+ * these characteristics, it will not be read.
+ * Parameters
+ * in_keyname The private key name.
+ * in_id The id of the private key.
+ * options DST_FORCE_READ Read from disk - don't use a previously
+ * read key.
+ * DST_CAN_SIGN The key must be useable for signing.
+ * DST_NO_AUTHEN The key must be useable for authentication.
+ * DST_STANDARD Return any key
+ * Returns
+ * NULL If there is no key found in the current directory or
+ * this key has not been loaded before.
+ * !NULL Success - KEY structure returned.
+ */
+
+DST_KEY *
+dst_read_key(const char *in_keyname, const u_int16_t in_id,
+ const int in_alg, const int type)
+{
+ char keyname[PATH_MAX];
+ DST_KEY *dg_key = NULL, *pubkey = NULL;
+
+ if (!dst_check_algorithm(in_alg)) { /*%< make sure alg is available */
+ EREPORT(("dst_read_private_key(): Algorithm %d not suppored\n",
+ in_alg));
+ return (NULL);
+ }
+ if ((type & (DST_PUBLIC | DST_PRIVATE)) == 0)
+ return (NULL);
+ if (in_keyname == NULL) {
+ EREPORT(("dst_read_private_key(): Null key name passed in\n"));
+ return (NULL);
+ } else if (strlen(in_keyname) >= sizeof(keyname)) {
+ EREPORT(("dst_read_private_key(): keyname too big\n"));
+ return (NULL);
+ } else
+ strcpy(keyname, in_keyname);
+
+ /* before I read in the public key, check if it is allowed to sign */
+ if ((pubkey = dst_s_read_public_key(keyname, in_id, in_alg)) == NULL)
+ return (NULL);
+
+ if (type == DST_PUBLIC)
+ return pubkey;
+
+ if (!(dg_key = dst_s_get_key_struct(keyname, pubkey->dk_alg,
+ pubkey->dk_flags, pubkey->dk_proto,
+ 0)))
+ return (dg_key);
+ /* Fill in private key and some fields in the general key structure */
+ if (dst_s_read_private_key_file(keyname, dg_key, pubkey->dk_id,
+ pubkey->dk_alg) == 0)
+ dg_key = dst_free_key(dg_key);
+
+ (void)dst_free_key(pubkey);
+ return (dg_key);
+}
+
+int
+dst_write_key(const DST_KEY *key, const int type)
+{
+ int pub = 0, priv = 0;
+
+ if (key == NULL)
+ return (0);
+ if (!dst_check_algorithm(key->dk_alg)) { /*%< make sure alg is available */
+ EREPORT(("dst_write_key(): Algorithm %d not suppored\n",
+ key->dk_alg));
+ return (UNSUPPORTED_KEYALG);
+ }
+ if ((type & (DST_PRIVATE|DST_PUBLIC)) == 0)
+ return (0);
+
+ if (type & DST_PUBLIC)
+ if ((pub = dst_s_write_public_key(key)) < 0)
+ return (pub);
+ if (type & DST_PRIVATE)
+ if ((priv = dst_s_write_private_key(key)) < 0)
+ return (priv);
+ return (priv+pub);
+}
+
+/*%
+ * dst_write_private_key
+ * Write a private key to disk. The filename will be of the form:
+ * K&lt;key-&gt;dk_name&gt;+&lt;key-&gt;dk_alg+&gt;&lt;key-d&gt;k_id.&gt;&lt;private key suffix&gt;.
+ * If there is already a file with this name, an error is returned.
+ *
+ * Parameters
+ * key A DST managed key structure that contains
+ * all information needed about a key.
+ * Return
+ * &gt;= 0 Correct behavior. Returns length of encoded key value
+ * written to disk.
+ * &lt; 0 error.
+ */
+
+static int
+dst_s_write_private_key(const DST_KEY *key)
+{
+ u_char encoded_block[RAW_KEY_SIZE];
+ char file[PATH_MAX];
+ int len;
+ FILE *fp;
+
+ /* First encode the key into the portable key format */
+ if (key == NULL)
+ return (-1);
+ if (key->dk_KEY_struct == NULL)
+ return (0); /*%< null key has no private key */
+ if (key->dk_func == NULL || key->dk_func->to_file_fmt == NULL) {
+ EREPORT(("dst_write_private_key(): Unsupported operation %d\n",
+ key->dk_alg));
+ return (-5);
+ } else if ((len = key->dk_func->to_file_fmt(key, (char *)encoded_block,
+ sizeof(encoded_block))) <= 0) {
+ EREPORT(("dst_write_private_key(): Failed encoding private RSA bsafe key %d\n", len));
+ return (-8);
+ }
+ /* Now I can create the file I want to use */
+ dst_s_build_filename(file, key->dk_key_name, key->dk_id, key->dk_alg,
+ PRIVATE_KEY, PATH_MAX);
+
+ /* Do not overwrite an existing file */
+ if ((fp = dst_s_fopen(file, "w", 0600)) != NULL) {
+ int nn;
+ if ((nn = fwrite(encoded_block, 1, len, fp)) != len) {
+ EREPORT(("dst_write_private_key(): Write failure on %s %d != %d errno=%d\n",
+ file, len, nn, errno));
+ fclose(fp);
+ return (-5);
+ }
+ fclose(fp);
+ } else {
+ EREPORT(("dst_write_private_key(): Can not create file %s\n"
+ ,file));
+ return (-6);
+ }
+ memset(encoded_block, 0, len);
+ return (len);
+}
+
+/*%
+*
+ * dst_read_public_key
+ * Read a public key from disk and store in a DST key structure.
+ * Parameters
+ * in_name K&lt;in_name&gt;&lt;in_id&gt;.&lt;public key suffix&gt; is the
+ * filename of the key file to be read.
+ * Returns
+ * NULL If the key does not exist or no name is supplied.
+ * NON-NULL Initialized key structure if the key exists.
+ */
+
+static DST_KEY *
+dst_s_read_public_key(const char *in_name, const u_int16_t in_id, int in_alg)
+{
+ int flags, proto, alg, len, dlen;
+ int c;
+ char name[PATH_MAX], enckey[RAW_KEY_SIZE], *notspace;
+ u_char deckey[RAW_KEY_SIZE];
+ FILE *fp;
+
+ if (in_name == NULL) {
+ EREPORT(("dst_read_public_key(): No key name given\n"));
+ return (NULL);
+ }
+ if (dst_s_build_filename(name, in_name, in_id, in_alg, PUBLIC_KEY,
+ PATH_MAX) == -1) {
+ EREPORT(("dst_read_public_key(): Cannot make filename from %s, %d, and %s\n",
+ in_name, in_id, PUBLIC_KEY));
+ return (NULL);
+ }
+ /*
+ * Open the file and read it's formatted contents up to key
+ * File format:
+ * domain.name [ttl] [IN] KEY &lt;flags&gt; &lt;protocol&gt; &lt;algorithm&gt; &lt;key&gt;
+ * flags, proto, alg stored as decimal (or hex numbers FIXME).
+ * (FIXME: handle parentheses for line continuation.)
+ */
+ if ((fp = dst_s_fopen(name, "r", 0)) == NULL) {
+ EREPORT(("dst_read_public_key(): Public Key not found %s\n",
+ name));
+ return (NULL);
+ }
+ /* Skip domain name, which ends at first blank */
+ while ((c = getc(fp)) != EOF)
+ if (isspace(c))
+ break;
+ /* Skip blank to get to next field */
+ while ((c = getc(fp)) != EOF)
+ if (!isspace(c))
+ break;
+
+ /* Skip optional TTL -- if initial digit, skip whole word. */
+ if (isdigit(c)) {
+ while ((c = getc(fp)) != EOF)
+ if (isspace(c))
+ break;
+ while ((c = getc(fp)) != EOF)
+ if (!isspace(c))
+ break;
+ }
+ /* Skip optional "IN" */
+ if (c == 'I' || c == 'i') {
+ while ((c = getc(fp)) != EOF)
+ if (isspace(c))
+ break;
+ while ((c = getc(fp)) != EOF)
+ if (!isspace(c))
+ break;
+ }
+ /* Locate and skip "KEY" */
+ if (c != 'K' && c != 'k') {
+ EREPORT(("\"KEY\" doesn't appear in file: %s", name));
+ return NULL;
+ }
+ while ((c = getc(fp)) != EOF)
+ if (isspace(c))
+ break;
+ while ((c = getc(fp)) != EOF)
+ if (!isspace(c))
+ break;
+ ungetc(c, fp); /*%< return the charcter to the input field */
+ /* Handle hex!! FIXME. */
+
+ if (fscanf(fp, "%d %d %d", &flags, &proto, &alg) != 3) {
+ EREPORT(("dst_read_public_key(): Can not read flag/proto/alg field from %s\n"
+ ,name));
+ return (NULL);
+ }
+ /* read in the key string */
+ fgets(enckey, sizeof(enckey), fp);
+
+ /* If we aren't at end-of-file, something is wrong. */
+ while ((c = getc(fp)) != EOF)
+ if (!isspace(c))
+ break;
+ if (!feof(fp)) {
+ EREPORT(("Key too long in file: %s", name));
+ return NULL;
+ }
+ fclose(fp);
+
+ if ((len = strlen(enckey)) <= 0)
+ return (NULL);
+
+ /* discard \n */
+ enckey[--len] = '\0';
+
+ /* remove leading spaces */
+ for (notspace = (char *) enckey; isspace((*notspace)&0xff); len--)
+ notspace++;
+
+ dlen = b64_pton(notspace, deckey, sizeof(deckey));
+ if (dlen < 0) {
+ EREPORT(("dst_read_public_key: bad return from b64_pton = %d",
+ dlen));
+ return (NULL);
+ }
+ /* store key and info in a key structure that is returned */
+/* return dst_store_public_key(in_name, alg, proto, 666, flags, deckey,
+ dlen);*/
+ return dst_buffer_to_key(in_name, alg, flags, proto, deckey, dlen);
+}
+
+/*%
+ * dst_write_public_key
+ * Write a key to disk in DNS format.
+ * Parameters
+ * key Pointer to a DST key structure.
+ * Returns
+ * 0 Failure
+ * 1 Success
+ */
+
+static int
+dst_s_write_public_key(const DST_KEY *key)
+{
+ FILE *fp;
+ char filename[PATH_MAX];
+ u_char out_key[RAW_KEY_SIZE];
+ char enc_key[RAW_KEY_SIZE];
+ int len = 0;
+ int mode;
+
+ memset(out_key, 0, sizeof(out_key));
+ if (key == NULL) {
+ EREPORT(("dst_write_public_key(): No key specified \n"));
+ return (0);
+ } else if ((len = dst_key_to_dnskey(key, out_key, sizeof(out_key)))< 0)
+ return (0);
+
+ /* Make the filename */
+ if (dst_s_build_filename(filename, key->dk_key_name, key->dk_id,
+ key->dk_alg, PUBLIC_KEY, PATH_MAX) == -1) {
+ EREPORT(("dst_write_public_key(): Cannot make filename from %s, %d, and %s\n",
+ key->dk_key_name, key->dk_id, PUBLIC_KEY));
+ return (0);
+ }
+ /* XXX in general this should be a check for symmetric keys */
+ mode = (key->dk_alg == KEY_HMAC_MD5) ? 0600 : 0644;
+ /* create public key file */
+ if ((fp = dst_s_fopen(filename, "w+", mode)) == NULL) {
+ EREPORT(("DST_write_public_key: open of file:%s failed (errno=%d)\n",
+ filename, errno));
+ return (0);
+ }
+ /*write out key first base64 the key data */
+ if (key->dk_flags & DST_EXTEND_FLAG)
+ b64_ntop(&out_key[6], len - 6, enc_key, sizeof(enc_key));
+ else
+ b64_ntop(&out_key[4], len - 4, enc_key, sizeof(enc_key));
+ fprintf(fp, "%s IN KEY %d %d %d %s\n",
+ key->dk_key_name,
+ key->dk_flags, key->dk_proto, key->dk_alg, enc_key);
+ fclose(fp);
+ return (1);
+}
+
+/*%
+ * dst_dnskey_to_public_key
+ * This function converts the contents of a DNS KEY RR into a DST
+ * key structure.
+ * Paramters
+ * len Length of the RDATA of the KEY RR RDATA
+ * rdata A pointer to the the KEY RR RDATA.
+ * in_name Key name to be stored in key structure.
+ * Returns
+ * NULL Failure
+ * NON-NULL Success. Pointer to key structure.
+ * Caller's responsibility to free() it.
+ */
+
+DST_KEY *
+dst_dnskey_to_key(const char *in_name, const u_char *rdata, const int len)
+{
+ DST_KEY *key_st;
+ int alg ;
+ int start = DST_KEY_START;
+
+ if (rdata == NULL || len <= DST_KEY_ALG) /*%< no data */
+ return (NULL);
+ alg = (u_int8_t) rdata[DST_KEY_ALG];
+ if (!dst_check_algorithm(alg)) { /*%< make sure alg is available */
+ EREPORT(("dst_dnskey_to_key(): Algorithm %d not suppored\n",
+ alg));
+ return (NULL);
+ }
+
+ if (in_name == NULL)
+ return (NULL);
+
+ if ((key_st = dst_s_get_key_struct(in_name, alg, 0, 0, 0)) == NULL)
+ return (NULL);
+
+ key_st->dk_id = dst_s_dns_key_id(rdata, len);
+ key_st->dk_flags = dst_s_get_int16(rdata);
+ key_st->dk_proto = (u_int16_t) rdata[DST_KEY_PROT];
+ if (key_st->dk_flags & DST_EXTEND_FLAG) {
+ u_int32_t ext_flags;
+ ext_flags = (u_int32_t) dst_s_get_int16(&rdata[DST_EXT_FLAG]);
+ key_st->dk_flags = key_st->dk_flags | (ext_flags << 16);
+ start += 2;
+ }
+ /*
+ * now point to the begining of the data representing the encoding
+ * of the key
+ */
+ if (key_st->dk_func && key_st->dk_func->from_dns_key) {
+ if (key_st->dk_func->from_dns_key(key_st, &rdata[start],
+ len - start) > 0)
+ return (key_st);
+ } else
+ EREPORT(("dst_dnskey_to_public_key(): unsuppored alg %d\n",
+ alg));
+
+ SAFE_FREE(key_st);
+ return (key_st);
+}
+
+/*%
+ * dst_public_key_to_dnskey
+ * Function to encode a public key into DNS KEY wire format
+ * Parameters
+ * key Key structure to encode.
+ * out_storage Location to write the encoded key to.
+ * out_len Size of the output array.
+ * Returns
+ * <0 Failure
+ * >=0 Number of bytes written to out_storage
+ */
+
+int
+dst_key_to_dnskey(const DST_KEY *key, u_char *out_storage,
+ const int out_len)
+{
+ u_int16_t val;
+ int loc = 0;
+ int enc_len = 0;
+ if (key == NULL)
+ return (-1);
+
+ if (!dst_check_algorithm(key->dk_alg)) { /*%< make sure alg is available */
+ EREPORT(("dst_key_to_dnskey(): Algorithm %d not suppored\n",
+ key->dk_alg));
+ return (UNSUPPORTED_KEYALG);
+ }
+ memset(out_storage, 0, out_len);
+ val = (u_int16_t)(key->dk_flags & 0xffff);
+ dst_s_put_int16(out_storage, val);
+ loc += 2;
+
+ out_storage[loc++] = (u_char) key->dk_proto;
+ out_storage[loc++] = (u_char) key->dk_alg;
+
+ if (key->dk_flags > 0xffff) { /*%< Extended flags */
+ val = (u_int16_t)((key->dk_flags >> 16) & 0xffff);
+ dst_s_put_int16(&out_storage[loc], val);
+ loc += 2;
+ }
+ if (key->dk_KEY_struct == NULL)
+ return (loc);
+ if (key->dk_func && key->dk_func->to_dns_key) {
+ enc_len = key->dk_func->to_dns_key(key,
+ (u_char *) &out_storage[loc],
+ out_len - loc);
+ if (enc_len > 0)
+ return (enc_len + loc);
+ else
+ return (-1);
+ } else
+ EREPORT(("dst_key_to_dnskey(): Unsupported ALG %d\n",
+ key->dk_alg));
+ return (-1);
+}
+
+/*%
+ * dst_buffer_to_key
+ * Function to encode a string of raw data into a DST key
+ * Parameters
+ * alg The algorithm (HMAC only)
+ * key A pointer to the data
+ * keylen The length of the data
+ * Returns
+ * NULL an error occurred
+ * NON-NULL the DST key
+ */
+DST_KEY *
+dst_buffer_to_key(const char *key_name, /*!< name of the key */
+ const int alg, /*!< algorithm */
+ const int flags, /*!< dns flags */
+ const int protocol, /*!< dns protocol */
+ const u_char *key_buf, /*!< key in dns wire fmt */
+ const int key_len) /*!< size of key */
+{
+
+ DST_KEY *dkey = NULL;
+ int dnslen;
+ u_char dns[2048];
+
+ if (!dst_check_algorithm(alg)) { /*%< make sure alg is available */
+ EREPORT(("dst_buffer_to_key(): Algorithm %d not suppored\n", alg));
+ return (NULL);
+ }
+
+ dkey = dst_s_get_key_struct(key_name, alg, flags, protocol, -1);
+
+ if (dkey == NULL || dkey->dk_func == NULL ||
+ dkey->dk_func->from_dns_key == NULL)
+ return (dst_free_key(dkey));
+
+ if (dkey->dk_func->from_dns_key(dkey, key_buf, key_len) < 0) {
+ EREPORT(("dst_buffer_to_key(): dst_buffer_to_hmac failed\n"));
+ return (dst_free_key(dkey));
+ }
+
+ dnslen = dst_key_to_dnskey(dkey, dns, sizeof(dns));
+ dkey->dk_id = dst_s_dns_key_id(dns, dnslen);
+ return (dkey);
+}
+
+int
+dst_key_to_buffer(DST_KEY *key, u_char *out_buff, int buf_len)
+{
+ int len;
+ /* this function will extrac the secret of HMAC into a buffer */
+ if (key == NULL)
+ return (0);
+ if (key->dk_func != NULL && key->dk_func->to_dns_key != NULL) {
+ len = key->dk_func->to_dns_key(key, out_buff, buf_len);
+ if (len < 0)
+ return (0);
+ return (len);
+ }
+ return (0);
+}
+
+/*%
+ * dst_s_read_private_key_file
+ * Function reads in private key from a file.
+ * Fills out the KEY structure.
+ * Parameters
+ * name Name of the key to be read.
+ * pk_key Structure that the key is returned in.
+ * in_id Key identifier (tag)
+ * Return
+ * 1 if everthing works
+ * 0 if there is any problem
+ */
+
+static int
+dst_s_read_private_key_file(char *name, DST_KEY *pk_key, u_int16_t in_id,
+ int in_alg)
+{
+ int cnt, alg, len, major, minor, file_major, file_minor;
+ int ret, id;
+ char filename[PATH_MAX];
+ u_char in_buff[RAW_KEY_SIZE], *p;
+ FILE *fp;
+ int dnslen;
+ u_char dns[2048];
+
+ if (name == NULL || pk_key == NULL) {
+ EREPORT(("dst_read_private_key_file(): No key name given\n"));
+ return (0);
+ }
+ /* Make the filename */
+ if (dst_s_build_filename(filename, name, in_id, in_alg, PRIVATE_KEY,
+ PATH_MAX) == -1) {
+ EREPORT(("dst_read_private_key(): Cannot make filename from %s, %d, and %s\n",
+ name, in_id, PRIVATE_KEY));
+ return (0);
+ }
+ /* first check if we can find the key file */
+ if ((fp = dst_s_fopen(filename, "r", 0)) == NULL) {
+ EREPORT(("dst_s_read_private_key_file: Could not open file %s in directory %s\n",
+ filename, dst_path[0] ? dst_path :
+ (char *) getcwd(NULL, PATH_MAX - 1)));
+ return (0);
+ }
+ /* now read the header info from the file */
+ if ((cnt = fread(in_buff, 1, sizeof(in_buff), fp)) < 5) {
+ fclose(fp);
+ EREPORT(("dst_s_read_private_key_file: error reading file %s (empty file)\n",
+ filename));
+ return (0);
+ }
+ /* decrypt key */
+ fclose(fp);
+ if (memcmp(in_buff, "Private-key-format: v", 20) != 0)
+ goto fail;
+ len = cnt;
+ p = in_buff;
+
+ if (!dst_s_verify_str((const char **) (void *)&p,
+ "Private-key-format: v")) {
+ EREPORT(("dst_s_read_private_key_file(): Not a Key file/Decrypt failed %s\n", name));
+ goto fail;
+ }
+ /* read in file format */
+ sscanf((char *)p, "%d.%d", &file_major, &file_minor);
+ sscanf(KEY_FILE_FORMAT, "%d.%d", &major, &minor);
+ if (file_major < 1) {
+ EREPORT(("dst_s_read_private_key_file(): Unknown keyfile %d.%d version for %s\n",
+ file_major, file_minor, name));
+ goto fail;
+ } else if (file_major > major || file_minor > minor)
+ EREPORT((
+ "dst_s_read_private_key_file(): Keyfile %s version higher than mine %d.%d MAY FAIL\n",
+ name, file_major, file_minor));
+
+ while (*p++ != '\n') ; /*%< skip to end of line */
+
+ if (!dst_s_verify_str((const char **) (void *)&p, "Algorithm: "))
+ goto fail;
+
+ if (sscanf((char *)p, "%d", &alg) != 1)
+ goto fail;
+ while (*p++ != '\n') ; /*%< skip to end of line */
+
+ if (pk_key->dk_key_name && !strcmp(pk_key->dk_key_name, name))
+ SAFE_FREE2(pk_key->dk_key_name, strlen(pk_key->dk_key_name));
+ pk_key->dk_key_name = (char *) strdup(name);
+
+ /* allocate and fill in key structure */
+ if (pk_key->dk_func == NULL || pk_key->dk_func->from_file_fmt == NULL)
+ goto fail;
+
+ ret = pk_key->dk_func->from_file_fmt(pk_key, (char *)p, &in_buff[len] - p);
+ if (ret < 0)
+ goto fail;
+
+ dnslen = dst_key_to_dnskey(pk_key, dns, sizeof(dns));
+ id = dst_s_dns_key_id(dns, dnslen);
+
+ /* Make sure the actual key tag matches the input tag used in the filename
+ */
+ if (id != in_id) {
+ EREPORT(("dst_s_read_private_key_file(): actual tag of key read %d != input tag used to build filename %d.\n", id, in_id));
+ goto fail;
+ }
+ pk_key->dk_id = (u_int16_t) id;
+ pk_key->dk_alg = alg;
+ memset(in_buff, 0, cnt);
+ return (1);
+
+ fail:
+ memset(in_buff, 0, cnt);
+ return (0);
+}
+
+/*%
+ * Generate and store a public/private keypair.
+ * Keys will be stored in formatted files.
+ *
+ * Parameters
+ &
+ *\par name Name of the new key. Used to create key files
+ *\li K&lt;name&gt;+&lt;alg&gt;+&lt;id&gt;.public and K&lt;name&gt;+&lt;alg&gt;+&lt;id&gt;.private.
+ *\par bits Size of the new key in bits.
+ *\par exp What exponent to use:
+ *\li 0 use exponent 3
+ *\li non-zero use Fermant4
+ *\par flags The default value of the DNS Key flags.
+ *\li The DNS Key RR Flag field is defined in RFC2065,
+ * section 3.3. The field has 16 bits.
+ *\par protocol
+ *\li Default value of the DNS Key protocol field.
+ *\li The DNS Key protocol field is defined in RFC2065,
+ * section 3.4. The field has 8 bits.
+ *\par alg What algorithm to use. Currently defined:
+ *\li KEY_RSA 1
+ *\li KEY_DSA 3
+ *\li KEY_HMAC 157
+ *\par out_id The key tag is returned.
+ *
+ * Return
+ *\li NULL Failure
+ *\li non-NULL the generated key pair
+ * Caller frees the result, and its dk_name pointer.
+ */
+DST_KEY *
+dst_generate_key(const char *name, const int bits, const int exp,
+ const int flags, const int protocol, const int alg)
+{
+ DST_KEY *new_key = NULL;
+ int dnslen;
+ u_char dns[2048];
+
+ if (name == NULL)
+ return (NULL);
+
+ if (!dst_check_algorithm(alg)) { /*%< make sure alg is available */
+ EREPORT(("dst_generate_key(): Algorithm %d not suppored\n", alg));
+ return (NULL);
+ }
+
+ new_key = dst_s_get_key_struct(name, alg, flags, protocol, bits);
+ if (new_key == NULL)
+ return (NULL);
+ if (bits == 0) /*%< null key we are done */
+ return (new_key);
+ if (new_key->dk_func == NULL || new_key->dk_func->generate == NULL) {
+ EREPORT(("dst_generate_key_pair():Unsupported algorithm %d\n",
+ alg));
+ return (dst_free_key(new_key));
+ }
+ if (new_key->dk_func->generate(new_key, exp) <= 0) {
+ EREPORT(("dst_generate_key_pair(): Key generation failure %s %d %d %d\n",
+ new_key->dk_key_name, new_key->dk_alg,
+ new_key->dk_key_size, exp));
+ return (dst_free_key(new_key));
+ }
+
+ dnslen = dst_key_to_dnskey(new_key, dns, sizeof(dns));
+ if (dnslen != UNSUPPORTED_KEYALG)
+ new_key->dk_id = dst_s_dns_key_id(dns, dnslen);
+ else
+ new_key->dk_id = 0;
+
+ return (new_key);
+}
+
+/*%
+ * Release all data structures pointed to by a key structure.
+ *
+ * Parameters
+ *\li f_key Key structure to be freed.
+ */
+
+DST_KEY *
+dst_free_key(DST_KEY *f_key)
+{
+
+ if (f_key == NULL)
+ return (f_key);
+ if (f_key->dk_func && f_key->dk_func->destroy)
+ f_key->dk_KEY_struct =
+ f_key->dk_func->destroy(f_key->dk_KEY_struct);
+ else {
+ EREPORT(("dst_free_key(): Unknown key alg %d\n",
+ f_key->dk_alg));
+ }
+ if (f_key->dk_KEY_struct) {
+ free(f_key->dk_KEY_struct);
+ f_key->dk_KEY_struct = NULL;
+ }
+ if (f_key->dk_key_name)
+ SAFE_FREE(f_key->dk_key_name);
+ SAFE_FREE(f_key);
+ return (NULL);
+}
+
+/*%
+ * Return the maximim size of signature from the key specified in bytes
+ *
+ * Parameters
+ *\li key
+ *
+ * Returns
+ * \li bytes
+ */
+int
+dst_sig_size(DST_KEY *key) {
+ switch (key->dk_alg) {
+ case KEY_HMAC_MD5:
+ return (16);
+ case KEY_HMAC_SHA1:
+ return (20);
+ case KEY_RSA:
+ return (key->dk_key_size + 7) / 8;
+ case KEY_DSA:
+ return (40);
+ default:
+ EREPORT(("dst_sig_size(): Unknown key alg %d\n", key->dk_alg));
+ return -1;
+ }
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/dst/dst_internal.h b/usr/src/lib/libresolv2_joy/common/dst/dst_internal.h
new file mode 100644
index 0000000000..e9bc6fc08d
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/dst/dst_internal.h
@@ -0,0 +1,155 @@
+#ifndef DST_INTERNAL_H
+#define DST_INTERNAL_H
+
+/*
+ * Portions Copyright (c) 1995-1998 by Trusted Information Systems, Inc.
+ *
+ * Permission to use, copy modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND TRUSTED INFORMATION SYSTEMS
+ * DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
+ * TRUSTED INFORMATION SYSTEMS BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THE SOFTWARE.
+ */
+#include <limits.h>
+#include <sys/param.h>
+#if (!defined(BSD)) || (BSD < 199306)
+# include <sys/bitypes.h>
+#else
+# include <sys/types.h>
+#endif
+
+#ifndef PATH_MAX
+# ifdef POSIX_PATH_MAX
+# define PATH_MAX POSIX_PATH_MAX
+# else
+# define PATH_MAX 255 /*%< this is the value of POSIX_PATH_MAX */
+# endif
+#endif
+
+typedef struct dst_key {
+ char *dk_key_name; /*%< name of the key */
+ int dk_key_size; /*%< this is the size of the key in bits */
+ int dk_proto; /*%< what protocols this key can be used for */
+ int dk_alg; /*%< algorithm number from key record */
+ u_int32_t dk_flags; /*%< and the flags of the public key */
+ u_int16_t dk_id; /*%< identifier of the key */
+ void *dk_KEY_struct; /*%< pointer to key in crypto pkg fmt */
+ struct dst_func *dk_func; /*%< point to cryptto pgk specific function table */
+} DST_KEY;
+#define HAS_DST_KEY
+
+#include <isc/dst.h>
+/*
+ * define what crypto systems are supported for RSA,
+ * BSAFE is prefered over RSAREF; only one can be set at any time
+ */
+#if defined(BSAFE) && defined(RSAREF)
+# error "Cannot have both BSAFE and RSAREF defined"
+#endif
+
+/* Declare dst_lib specific constants */
+#define KEY_FILE_FORMAT "1.2"
+
+/* suffixes for key file names */
+#define PRIVATE_KEY "private"
+#define PUBLIC_KEY "key"
+
+/* error handling */
+#ifdef REPORT_ERRORS
+#define EREPORT(str) printf str
+#else
+#define EREPORT(str) (void)0
+#endif
+
+/* use our own special macro to FRRE memory */
+
+#ifndef SAFE_FREE
+#define SAFE_FREE(a) \
+do{if(a != NULL){memset(a,0, sizeof(*a)); free(a); a=NULL;}} while (0)
+#define SAFE_FREE2(a,s) if (a != NULL && (long)s > 0){memset(a,0, s);free(a); a=NULL;}
+#endif
+
+typedef struct dst_func {
+ int (*sign)(const int mode, DST_KEY *key, void **context,
+ const u_int8_t *data, const int len,
+ u_int8_t *signature, const int sig_len);
+ int (*verify)(const int mode, DST_KEY *key, void **context,
+ const u_int8_t *data, const int len,
+ const u_int8_t *signature, const int sig_len);
+ int (*compare)(const DST_KEY *key1, const DST_KEY *key2);
+ int (*generate)(DST_KEY *key, int parms);
+ void *(*destroy)(void *key);
+ /* conversion functions */
+ int (*to_dns_key)(const DST_KEY *key, u_int8_t *out,
+ const int out_len);
+ int (*from_dns_key)(DST_KEY *key, const u_int8_t *str,
+ const int str_len);
+ int (*to_file_fmt)(const DST_KEY *key, char *out,
+ const int out_len);
+ int (*from_file_fmt)(DST_KEY *key, const char *out,
+ const int out_len);
+
+} dst_func;
+
+extern dst_func *dst_t_func[DST_MAX_ALGS];
+extern const char *key_file_fmt_str;
+extern const char *dst_path;
+
+#ifndef DST_HASH_SIZE
+#define DST_HASH_SIZE 20 /*%< RIPEMD160 and SHA-1 are 20 bytes MD5 is 16 */
+#endif
+
+int dst_bsafe_init(void);
+
+int dst_rsaref_init(void);
+
+int dst_hmac_md5_init(void);
+
+int dst_cylink_init(void);
+
+int dst_eay_dss_init(void);
+
+/* from higher level support routines */
+int dst_s_calculate_bits( const u_int8_t *str, const int max_bits);
+int dst_s_verify_str( const char **buf, const char *str);
+
+
+/* conversion between dns names and key file names */
+size_t dst_s_filename_length( const char *name, const char *suffix);
+int dst_s_build_filename( char *filename, const char *name,
+ u_int16_t id, int alg, const char *suffix,
+ size_t filename_length);
+
+FILE *dst_s_fopen (const char *filename, const char *mode, int perm);
+
+/*%
+ * read and write network byte order into u_int?_t
+ * all of these should be retired
+ */
+u_int16_t dst_s_get_int16( const u_int8_t *buf);
+void dst_s_put_int16( u_int8_t *buf, const u_int16_t val);
+
+u_int32_t dst_s_get_int32( const u_int8_t *buf);
+void dst_s_put_int32( u_int8_t *buf, const u_int32_t val);
+
+#ifdef DUMP
+# undef DUMP
+# define DUMP(a,b,c,d) dst_s_dump(a,b,c,d)
+#else
+# define DUMP(a,b,c,d)
+#endif
+void
+dst_s_dump(const int mode, const u_char *data, const int size,
+ const char *msg);
+
+
+
+#endif /* DST_INTERNAL_H */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/dst/hmac_link.c b/usr/src/lib/libresolv2_joy/common/dst/hmac_link.c
new file mode 100644
index 0000000000..23925e4269
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/dst/hmac_link.c
@@ -0,0 +1,491 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#ifdef HMAC_MD5
+#ifndef LINT
+static const char rcsid[] = "$Header: /proj/cvs/prod/libbind/dst/hmac_link.c,v 1.8 2007/09/24 17:18:25 each Exp $";
+#endif
+/*
+ * Portions Copyright (c) 1995-1998 by Trusted Information Systems, Inc.
+ *
+ * Permission to use, copy modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND TRUSTED INFORMATION SYSTEMS
+ * DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
+ * TRUSTED INFORMATION SYSTEMS BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THE SOFTWARE.
+ */
+
+/*%
+ * This file contains an implementation of the HMAC-MD5 algorithm.
+ */
+#include "port_before.h"
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <memory.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#include "dst_internal.h"
+
+#ifdef USE_MD5
+# ifndef HAVE_MD5
+# include "md5.h"
+# else
+# ifdef SOLARIS2
+# include <sys/md5.h>
+# endif
+# endif
+# ifndef _MD5_H_
+# define _MD5_H_ 1 /*%< make sure we do not include rsaref md5.h file */
+# endif
+#endif
+
+#include "port_after.h"
+
+
+#define HMAC_LEN 64
+#define HMAC_IPAD 0x36
+#define HMAC_OPAD 0x5c
+#define MD5_LEN 16
+
+
+typedef struct hmackey {
+ u_char hk_ipad[64], hk_opad[64];
+} HMAC_Key;
+
+
+/**************************************************************************
+ * dst_hmac_md5_sign
+ * Call HMAC signing functions to sign a block of data.
+ * There are three steps to signing, INIT (initialize structures),
+ * UPDATE (hash (more) data), FINAL (generate a signature). This
+ * routine performs one or more of these steps.
+ * Parameters
+ * mode SIG_MODE_INIT, SIG_MODE_UPDATE and/or SIG_MODE_FINAL.
+ * priv_key key to use for signing.
+ * context the context to be used in this digest
+ * data data to be signed.
+ * len length in bytes of data.
+ * signature location to store signature.
+ * sig_len size of the signature location
+ * returns
+ * N Success on SIG_MODE_FINAL = returns signature length in bytes
+ * 0 Success on SIG_MODE_INIT and UPDATE
+ * <0 Failure
+ */
+
+static int
+dst_hmac_md5_sign(const int mode, DST_KEY *d_key, void **context,
+ const u_char *data, const int len,
+ u_char *signature, const int sig_len)
+{
+ HMAC_Key *key;
+ int sign_len = 0;
+ MD5_CTX *ctx = NULL;
+
+ if (d_key == NULL || d_key->dk_KEY_struct == NULL)
+ return (-1);
+
+ if (mode & SIG_MODE_INIT)
+ ctx = (MD5_CTX *) malloc(sizeof(*ctx));
+ else if (context)
+ ctx = (MD5_CTX *) *context;
+ if (ctx == NULL)
+ return (-1);
+
+ key = (HMAC_Key *) d_key->dk_KEY_struct;
+
+ if (mode & SIG_MODE_INIT) {
+ MD5Init(ctx);
+ MD5Update(ctx, key->hk_ipad, HMAC_LEN);
+ }
+
+ if ((mode & SIG_MODE_UPDATE) && (data && len > 0))
+ MD5Update(ctx, data, len);
+
+ if (mode & SIG_MODE_FINAL) {
+ if (signature == NULL || sig_len < MD5_LEN)
+ return (SIGN_FINAL_FAILURE);
+ MD5Final(signature, ctx);
+
+ /* perform outer MD5 */
+ MD5Init(ctx);
+ MD5Update(ctx, key->hk_opad, HMAC_LEN);
+ MD5Update(ctx, signature, MD5_LEN);
+ MD5Final(signature, ctx);
+ sign_len = MD5_LEN;
+ SAFE_FREE(ctx);
+ }
+ else {
+ if (context == NULL)
+ return (-1);
+ *context = (void *) ctx;
+ }
+ return (sign_len);
+}
+
+
+/**************************************************************************
+ * dst_hmac_md5_verify()
+ * Calls HMAC verification routines. There are three steps to
+ * verification, INIT (initialize structures), UPDATE (hash (more) data),
+ * FINAL (generate a signature). This routine performs one or more of
+ * these steps.
+ * Parameters
+ * mode SIG_MODE_INIT, SIG_MODE_UPDATE and/or SIG_MODE_FINAL.
+ * dkey key to use for verify.
+ * data data signed.
+ * len length in bytes of data.
+ * signature signature.
+ * sig_len length in bytes of signature.
+ * returns
+ * 0 Success
+ * <0 Failure
+ */
+
+static int
+dst_hmac_md5_verify(const int mode, DST_KEY *d_key, void **context,
+ const u_char *data, const int len,
+ const u_char *signature, const int sig_len)
+{
+ HMAC_Key *key;
+ MD5_CTX *ctx = NULL;
+
+ if (d_key == NULL || d_key->dk_KEY_struct == NULL)
+ return (-1);
+
+ if (mode & SIG_MODE_INIT)
+ ctx = (MD5_CTX *) malloc(sizeof(*ctx));
+ else if (context)
+ ctx = (MD5_CTX *) *context;
+ if (ctx == NULL)
+ return (-1);
+
+ key = (HMAC_Key *) d_key->dk_KEY_struct;
+ if (mode & SIG_MODE_INIT) {
+ MD5Init(ctx);
+ MD5Update(ctx, key->hk_ipad, HMAC_LEN);
+ }
+ if ((mode & SIG_MODE_UPDATE) && (data && len > 0))
+ MD5Update(ctx, data, len);
+
+ if (mode & SIG_MODE_FINAL) {
+ u_char digest[MD5_LEN];
+ if (signature == NULL || key == NULL || sig_len != MD5_LEN)
+ return (VERIFY_FINAL_FAILURE);
+ MD5Final(digest, ctx);
+
+ /* perform outer MD5 */
+ MD5Init(ctx);
+ MD5Update(ctx, key->hk_opad, HMAC_LEN);
+ MD5Update(ctx, digest, MD5_LEN);
+ MD5Final(digest, ctx);
+
+ SAFE_FREE(ctx);
+ if (memcmp(digest, signature, MD5_LEN) != 0)
+ return (VERIFY_FINAL_FAILURE);
+ }
+ else {
+ if (context == NULL)
+ return (-1);
+ *context = (void *) ctx;
+ }
+ return (0);
+}
+
+
+/**************************************************************************
+ * dst_buffer_to_hmac_md5
+ * Converts key from raw data to an HMAC Key
+ * This function gets in a pointer to the data
+ * Parameters
+ * hkey the HMAC key to be filled in
+ * key the key in raw format
+ * keylen the length of the key
+ * Return
+ * 0 Success
+ * <0 Failure
+ */
+static int
+dst_buffer_to_hmac_md5(DST_KEY *dkey, const u_char *key, const int keylen)
+{
+ int i;
+ HMAC_Key *hkey = NULL;
+ MD5_CTX ctx;
+ int local_keylen = keylen;
+ u_char tk[MD5_LEN];
+
+ if (dkey == NULL || key == NULL || keylen < 0)
+ return (-1);
+
+ if ((hkey = (HMAC_Key *) malloc(sizeof(HMAC_Key))) == NULL)
+ return (-2);
+
+ memset(hkey->hk_ipad, 0, sizeof(hkey->hk_ipad));
+ memset(hkey->hk_opad, 0, sizeof(hkey->hk_opad));
+
+ /* if key is longer than HMAC_LEN bytes reset it to key=MD5(key) */
+ if (keylen > HMAC_LEN) {
+ MD5Init(&ctx);
+ MD5Update(&ctx, key, keylen);
+ MD5Final(tk, &ctx);
+ memset((void *) &ctx, 0, sizeof(ctx));
+ key = tk;
+ local_keylen = MD5_LEN;
+ }
+ /* start out by storing key in pads */
+ memcpy(hkey->hk_ipad, key, local_keylen);
+ memcpy(hkey->hk_opad, key, local_keylen);
+
+ /* XOR key with hk_ipad and opad values */
+ for (i = 0; i < HMAC_LEN; i++) {
+ hkey->hk_ipad[i] ^= HMAC_IPAD;
+ hkey->hk_opad[i] ^= HMAC_OPAD;
+ }
+ dkey->dk_key_size = local_keylen;
+ dkey->dk_KEY_struct = (void *) hkey;
+ return (1);
+}
+
+
+/**************************************************************************
+ * dst_hmac_md5_key_to_file_format
+ * Encodes an HMAC Key into the portable file format.
+ * Parameters
+ * hkey HMAC KEY structure
+ * buff output buffer
+ * buff_len size of output buffer
+ * Return
+ * 0 Failure - null input hkey
+ * -1 Failure - not enough space in output area
+ * N Success - Length of data returned in buff
+ */
+
+static int
+dst_hmac_md5_key_to_file_format(const DST_KEY *dkey, char *buff,
+ const int buff_len)
+{
+ char *bp;
+ int len, i, key_len;
+ u_char key[HMAC_LEN];
+ HMAC_Key *hkey;
+
+ if (dkey == NULL || dkey->dk_KEY_struct == NULL)
+ return (0);
+ /*
+ * Using snprintf() would be so much simpler here.
+ */
+ if (buff == NULL ||
+ buff_len <= (int)(strlen(key_file_fmt_str) +
+ strlen(KEY_FILE_FORMAT) + 4))
+ return (-1); /*%< no OR not enough space in output area */
+ hkey = (HMAC_Key *) dkey->dk_KEY_struct;
+ memset(buff, 0, buff_len); /*%< just in case */
+ /* write file header */
+ sprintf(buff, key_file_fmt_str, KEY_FILE_FORMAT, KEY_HMAC_MD5, "HMAC");
+
+ bp = buff + strlen(buff);
+
+ memset(key, 0, HMAC_LEN);
+ for (i = 0; i < HMAC_LEN; i++)
+ key[i] = hkey->hk_ipad[i] ^ HMAC_IPAD;
+ for (i = HMAC_LEN - 1; i >= 0; i--)
+ if (key[i] != 0)
+ break;
+ key_len = i + 1;
+
+ if (buff_len - (bp - buff) < 6)
+ return (-1);
+ strcat(bp, "Key: ");
+ bp += strlen("Key: ");
+
+ len = b64_ntop(key, key_len, bp, buff_len - (bp - buff));
+ if (len < 0)
+ return (-1);
+ bp += len;
+ if (buff_len - (bp - buff) < 2)
+ return (-1);
+ *(bp++) = '\n';
+ *bp = '\0';
+
+ return (bp - buff);
+}
+
+
+/**************************************************************************
+ * dst_hmac_md5_key_from_file_format
+ * Converts contents of a key file into an HMAC key.
+ * Parameters
+ * hkey structure to put key into
+ * buff buffer containing the encoded key
+ * buff_len the length of the buffer
+ * Return
+ * n >= 0 Foot print of the key converted
+ * n < 0 Error in conversion
+ */
+
+static int
+dst_hmac_md5_key_from_file_format(DST_KEY *dkey, const char *buff,
+ const int buff_len)
+{
+ const char *p = buff, *eol;
+ u_char key[HMAC_LEN+1]; /* b64_pton needs more than 64 bytes do decode
+ * it should probably be fixed rather than doing
+ * this
+ */
+ u_char *tmp;
+ int key_len, len;
+
+ if (dkey == NULL)
+ return (-2);
+ if (buff == NULL || buff_len < 0)
+ return (-1);
+
+ memset(key, 0, sizeof(key));
+
+ if (!dst_s_verify_str(&p, "Key: "))
+ return (-3);
+
+ eol = strchr(p, '\n');
+ if (eol == NULL)
+ return (-4);
+ len = eol - p;
+ tmp = malloc(len + 2);
+ if (tmp == NULL)
+ return (-5);
+ memcpy(tmp, p, len);
+ *(tmp + len) = 0x0;
+ key_len = b64_pton((char *)tmp, key, HMAC_LEN+1); /*%< see above */
+ SAFE_FREE2(tmp, len + 2);
+
+ if (dst_buffer_to_hmac_md5(dkey, key, key_len) < 0) {
+ return (-6);
+ }
+ return (0);
+}
+
+/*%
+ * dst_hmac_md5_to_dns_key()
+ * function to extract hmac key from DST_KEY structure
+ * intput:
+ * in_key: HMAC-MD5 key
+ * output:
+ * out_str: buffer to write ot
+ * out_len: size of output buffer
+ * returns:
+ * number of bytes written to output buffer
+ */
+static int
+dst_hmac_md5_to_dns_key(const DST_KEY *in_key, u_char *out_str,
+ const int out_len)
+{
+
+ HMAC_Key *hkey;
+ int i;
+
+ if (in_key == NULL || in_key->dk_KEY_struct == NULL ||
+ out_len <= in_key->dk_key_size || out_str == NULL)
+ return (-1);
+
+ hkey = (HMAC_Key *) in_key->dk_KEY_struct;
+ for (i = 0; i < in_key->dk_key_size; i++)
+ out_str[i] = hkey->hk_ipad[i] ^ HMAC_IPAD;
+ return (i);
+}
+
+/**************************************************************************
+ * dst_hmac_md5_compare_keys
+ * Compare two keys for equality.
+ * Return
+ * 0 The keys are equal
+ * NON-ZERO The keys are not equal
+ */
+
+static int
+dst_hmac_md5_compare_keys(const DST_KEY *key1, const DST_KEY *key2)
+{
+ HMAC_Key *hkey1 = (HMAC_Key *) key1->dk_KEY_struct;
+ HMAC_Key *hkey2 = (HMAC_Key *) key2->dk_KEY_struct;
+ return memcmp(hkey1->hk_ipad, hkey2->hk_ipad, HMAC_LEN);
+}
+
+/**************************************************************************
+ * dst_hmac_md5_free_key_structure
+ * Frees all (none) dynamically allocated structures in hkey
+ */
+
+static void *
+dst_hmac_md5_free_key_structure(void *key)
+{
+ HMAC_Key *hkey = key;
+ SAFE_FREE(hkey);
+ return (NULL);
+}
+
+
+/***************************************************************************
+ * dst_hmac_md5_generate_key
+ * Creates a HMAC key of size size with a maximum size of 63 bytes
+ * generating a HMAC key larger than 63 bytes makes no sense as that key
+ * is digested before use.
+ */
+
+static int
+dst_hmac_md5_generate_key(DST_KEY *key, const int nothing)
+{
+ (void)key;
+ (void)nothing;
+ return (-1);
+}
+
+/*%
+ * dst_hmac_md5_init() Function to answer set up function pointers for HMAC
+ * related functions
+ */
+int
+dst_hmac_md5_init()
+{
+ if (dst_t_func[KEY_HMAC_MD5] != NULL)
+ return (1);
+ dst_t_func[KEY_HMAC_MD5] = malloc(sizeof(struct dst_func));
+ if (dst_t_func[KEY_HMAC_MD5] == NULL)
+ return (0);
+ memset(dst_t_func[KEY_HMAC_MD5], 0, sizeof(struct dst_func));
+ dst_t_func[KEY_HMAC_MD5]->sign = dst_hmac_md5_sign;
+ dst_t_func[KEY_HMAC_MD5]->verify = dst_hmac_md5_verify;
+ dst_t_func[KEY_HMAC_MD5]->compare = dst_hmac_md5_compare_keys;
+ dst_t_func[KEY_HMAC_MD5]->generate = dst_hmac_md5_generate_key;
+ dst_t_func[KEY_HMAC_MD5]->destroy = dst_hmac_md5_free_key_structure;
+ dst_t_func[KEY_HMAC_MD5]->to_dns_key = dst_hmac_md5_to_dns_key;
+ dst_t_func[KEY_HMAC_MD5]->from_dns_key = dst_buffer_to_hmac_md5;
+ dst_t_func[KEY_HMAC_MD5]->to_file_fmt = dst_hmac_md5_key_to_file_format;
+ dst_t_func[KEY_HMAC_MD5]->from_file_fmt = dst_hmac_md5_key_from_file_format;
+ return (1);
+}
+
+#else
+#define dst_hmac_md5_init __dst_hmac_md5_init
+
+int
+dst_hmac_md5_init(){
+ return (0);
+}
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/dst/support.c b/usr/src/lib/libresolv2_joy/common/dst/support.c
new file mode 100644
index 0000000000..8f827667d6
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/dst/support.c
@@ -0,0 +1,342 @@
+static const char rcsid[] = "$Header: /proj/cvs/prod/libbind/dst/support.c,v 1.6 2005/10/11 00:10:13 marka Exp $";
+
+
+/*
+ * Portions Copyright (c) 1995-1998 by Trusted Information Systems, Inc.
+ *
+ * Permission to use, copy modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND TRUSTED INFORMATION SYSTEMS
+ * DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
+ * TRUSTED INFORMATION SYSTEMS BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
+ * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+ * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+ * WITH THE USE OR PERFORMANCE OF THE SOFTWARE.
+ */
+
+#include "port_before.h"
+
+#include <stdio.h>
+#include <unistd.h>
+#include <memory.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#include "dst_internal.h"
+
+#include "port_after.h"
+
+/*%
+ * dst_s_verify_str()
+ * Validate that the input string(*str) is at the head of the input
+ * buffer(**buf). If so, move the buffer head pointer (*buf) to
+ * the first byte of data following the string(*str).
+ * Parameters
+ * buf Input buffer.
+ * str Input string.
+ * Return
+ * 0 *str is not the head of **buff
+ * 1 *str is the head of **buff, *buf is is advanced to
+ * the tail of **buf.
+ */
+
+int
+dst_s_verify_str(const char **buf, const char *str)
+{
+ int b, s;
+ if (*buf == NULL) /*%< error checks */
+ return (0);
+ if (str == NULL || *str == '\0')
+ return (1);
+
+ b = strlen(*buf); /*%< get length of strings */
+ s = strlen(str);
+ if (s > b || strncmp(*buf, str, s)) /*%< check if same */
+ return (0); /*%< not a match */
+ (*buf) += s; /*%< advance pointer */
+ return (1);
+}
+
+/*%
+ * dst_s_calculate_bits
+ * Given a binary number represented in a u_char[], determine
+ * the number of significant bits used.
+ * Parameters
+ * str An input character string containing a binary number.
+ * max_bits The maximum possible significant bits.
+ * Return
+ * N The number of significant bits in str.
+ */
+
+int
+dst_s_calculate_bits(const u_char *str, const int max_bits)
+{
+ const u_char *p = str;
+ u_char i, j = 0x80;
+ int bits;
+ for (bits = max_bits; *p == 0x00 && bits > 0; p++)
+ bits -= 8;
+ for (i = *p; (i & j) != j; j >>= 1)
+ bits--;
+ return (bits);
+}
+
+/*%
+ * calculates a checksum used in dst for an id.
+ * takes an array of bytes and a length.
+ * returns a 16 bit checksum.
+ */
+u_int16_t
+dst_s_id_calc(const u_char *key, const int keysize)
+{
+ u_int32_t ac;
+ const u_char *kp = key;
+ int size = keysize;
+
+ if (!key || (keysize <= 0))
+ return (0xffffU);
+
+ for (ac = 0; size > 1; size -= 2, kp += 2)
+ ac += ((*kp) << 8) + *(kp + 1);
+
+ if (size > 0)
+ ac += ((*kp) << 8);
+ ac += (ac >> 16) & 0xffff;
+
+ return (ac & 0xffff);
+}
+
+/*%
+ * dst_s_dns_key_id() Function to calculate DNSSEC footprint from KEY record
+ * rdata
+ * Input:
+ * dns_key_rdata: the raw data in wire format
+ * rdata_len: the size of the input data
+ * Output:
+ * the key footprint/id calculated from the key data
+ */
+u_int16_t
+dst_s_dns_key_id(const u_char *dns_key_rdata, const int rdata_len)
+{
+ if (!dns_key_rdata)
+ return 0;
+
+ /* compute id */
+ if (dns_key_rdata[3] == KEY_RSA) /*%< Algorithm RSA */
+ return dst_s_get_int16((const u_char *)
+ &dns_key_rdata[rdata_len - 3]);
+ else if (dns_key_rdata[3] == KEY_HMAC_MD5)
+ /* compatibility */
+ return 0;
+ else
+ /* compute a checksum on the key part of the key rr */
+ return dst_s_id_calc(dns_key_rdata, rdata_len);
+}
+
+/*%
+ * dst_s_get_int16
+ * This routine extracts a 16 bit integer from a two byte character
+ * string. The character string is assumed to be in network byte
+ * order and may be unaligned. The number returned is in host order.
+ * Parameter
+ * buf A two byte character string.
+ * Return
+ * The converted integer value.
+ */
+
+u_int16_t
+dst_s_get_int16(const u_char *buf)
+{
+ register u_int16_t a = 0;
+ a = ((u_int16_t)(buf[0] << 8)) | ((u_int16_t)(buf[1]));
+ return (a);
+}
+
+/*%
+ * dst_s_get_int32
+ * This routine extracts a 32 bit integer from a four byte character
+ * string. The character string is assumed to be in network byte
+ * order and may be unaligned. The number returned is in host order.
+ * Parameter
+ * buf A four byte character string.
+ * Return
+ * The converted integer value.
+ */
+
+u_int32_t
+dst_s_get_int32(const u_char *buf)
+{
+ register u_int32_t a = 0;
+ a = ((u_int32_t)(buf[0] << 24)) | ((u_int32_t)(buf[1] << 16)) |
+ ((u_int32_t)(buf[2] << 8)) | ((u_int32_t)(buf[3]));
+ return (a);
+}
+
+/*%
+ * dst_s_put_int16
+ * Take a 16 bit integer and store the value in a two byte
+ * character string. The integer is assumed to be in network
+ * order and the string is returned in host order.
+ *
+ * Parameters
+ * buf Storage for a two byte character string.
+ * val 16 bit integer.
+ */
+
+void
+dst_s_put_int16(u_int8_t *buf, const u_int16_t val)
+{
+ buf[0] = (u_int8_t)(val >> 8);
+ buf[1] = (u_int8_t)(val);
+}
+
+/*%
+ * dst_s_put_int32
+ * Take a 32 bit integer and store the value in a four byte
+ * character string. The integer is assumed to be in network
+ * order and the string is returned in host order.
+ *
+ * Parameters
+ * buf Storage for a four byte character string.
+ * val 32 bit integer.
+ */
+
+void
+dst_s_put_int32(u_int8_t *buf, const u_int32_t val)
+{
+ buf[0] = (u_int8_t)(val >> 24);
+ buf[1] = (u_int8_t)(val >> 16);
+ buf[2] = (u_int8_t)(val >> 8);
+ buf[3] = (u_int8_t)(val);
+}
+
+/*%
+ * dst_s_filename_length
+ *
+ * This function returns the number of bytes needed to hold the
+ * filename for a key file. '/', '\' and ':' are not allowed.
+ * form: K&lt;keyname&gt;+&lt;alg&gt;+&lt;id&gt;.&lt;suffix&gt;
+ *
+ * Returns 0 if the filename would contain either '\', '/' or ':'
+ */
+size_t
+dst_s_filename_length(const char *name, const char *suffix)
+{
+ if (name == NULL)
+ return (0);
+ if (strrchr(name, '\\'))
+ return (0);
+ if (strrchr(name, '/'))
+ return (0);
+ if (strrchr(name, ':'))
+ return (0);
+ if (suffix == NULL)
+ return (0);
+ if (strrchr(suffix, '\\'))
+ return (0);
+ if (strrchr(suffix, '/'))
+ return (0);
+ if (strrchr(suffix, ':'))
+ return (0);
+ return (1 + strlen(name) + 6 + strlen(suffix));
+}
+
+/*%
+ * dst_s_build_filename ()
+ * Builds a key filename from the key name, it's id, and a
+ * suffix. '\', '/' and ':' are not allowed. fA filename is of the
+ * form: K&lt;keyname&gt;&lt;id&gt;.&lt;suffix&gt;
+ * form: K&lt;keyname&gt;+&lt;alg&gt;+&lt;id&gt;.&lt;suffix&gt;
+ *
+ * Returns -1 if the conversion fails:
+ * if the filename would be too long for space allotted
+ * if the filename would contain a '\', '/' or ':'
+ * Returns 0 on success
+ */
+
+int
+dst_s_build_filename(char *filename, const char *name, u_int16_t id,
+ int alg, const char *suffix, size_t filename_length)
+{
+ u_int32_t my_id;
+ if (filename == NULL)
+ return (-1);
+ memset(filename, 0, filename_length);
+ if (name == NULL)
+ return (-1);
+ if (suffix == NULL)
+ return (-1);
+ if (filename_length < 1 + strlen(name) + 4 + 6 + 1 + strlen(suffix))
+ return (-1);
+ my_id = id;
+ sprintf(filename, "K%s+%03d+%05d.%s", name, alg, my_id,
+ (const char *) suffix);
+ if (strrchr(filename, '/'))
+ return (-1);
+ if (strrchr(filename, '\\'))
+ return (-1);
+ if (strrchr(filename, ':'))
+ return (-1);
+ return (0);
+}
+
+/*%
+ * dst_s_fopen ()
+ * Open a file in the dst_path directory. If perm is specified, the
+ * file is checked for existence first, and not opened if it exists.
+ * Parameters
+ * filename File to open
+ * mode Mode to open the file (passed directly to fopen)
+ * perm File permission, if creating a new file.
+ * Returns
+ * NULL Failure
+ * NON-NULL (FILE *) of opened file.
+ */
+FILE *
+dst_s_fopen(const char *filename, const char *mode, int perm)
+{
+ FILE *fp;
+ char pathname[PATH_MAX];
+
+ if (strlen(filename) + strlen(dst_path) >= sizeof(pathname))
+ return (NULL);
+
+ if (*dst_path != '\0') {
+ strcpy(pathname, dst_path);
+ strcat(pathname, filename);
+ } else
+ strcpy(pathname, filename);
+
+ fp = fopen(pathname, mode);
+ if (perm)
+ chmod(pathname, perm);
+ return (fp);
+}
+
+void
+dst_s_dump(const int mode, const u_char *data, const int size,
+ const char *msg)
+{
+ UNUSED(data);
+
+ if (size > 0) {
+#ifdef LONG_TEST
+ static u_char scratch[1000];
+ int n ;
+ n = b64_ntop(data, scratch, size, sizeof(scratch));
+ printf("%s: %x %d %s\n", msg, mode, n, scratch);
+#else
+ printf("%s,%x %d\n", msg, mode, size);
+#endif
+ }
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/inet/inet_cidr_ntop.c b/usr/src/lib/libresolv2_joy/common/inet/inet_cidr_ntop.c
new file mode 100644
index 0000000000..bf960a8acc
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/inet/inet_cidr_ntop.c
@@ -0,0 +1,263 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1998,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: inet_cidr_ntop.c,v 1.7 2006/10/11 02:18:18 marka Exp $";
+#endif
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <arpa/inet.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "port_after.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) ((size_t)sprintf x)
+#endif
+
+static char *
+inet_cidr_ntop_ipv4(const u_char *src, int bits, char *dst, size_t size);
+static char *
+inet_cidr_ntop_ipv6(const u_char *src, int bits, char *dst, size_t size);
+
+/*%
+ * char *
+ * inet_cidr_ntop(af, src, bits, dst, size)
+ * convert network address from network to presentation format.
+ * "src"'s size is determined from its "af".
+ * return:
+ * pointer to dst, or NULL if an error occurred (check errno).
+ * note:
+ * 192.5.5.1/28 has a nonzero host part, which means it isn't a network
+ * as called for by inet_net_ntop() but it can be a host address with
+ * an included netmask.
+ * author:
+ * Paul Vixie (ISC), October 1998
+ */
+char *
+inet_cidr_ntop(int af, const void *src, int bits, char *dst, size_t size) {
+ switch (af) {
+ case AF_INET:
+ return (inet_cidr_ntop_ipv4(src, bits, dst, size));
+ case AF_INET6:
+ return (inet_cidr_ntop_ipv6(src, bits, dst, size));
+ default:
+ errno = EAFNOSUPPORT;
+ return (NULL);
+ }
+}
+
+static int
+decoct(const u_char *src, int bytes, char *dst, size_t size) {
+ char *odst = dst;
+ char *t;
+ int b;
+
+ for (b = 1; b <= bytes; b++) {
+ if (size < sizeof "255.")
+ return (0);
+ t = dst;
+ dst += SPRINTF((dst, "%u", *src++));
+ if (b != bytes) {
+ *dst++ = '.';
+ *dst = '\0';
+ }
+ size -= (size_t)(dst - t);
+ }
+ return (dst - odst);
+}
+
+/*%
+ * static char *
+ * inet_cidr_ntop_ipv4(src, bits, dst, size)
+ * convert IPv4 network address from network to presentation format.
+ * "src"'s size is determined from its "af".
+ * return:
+ * pointer to dst, or NULL if an error occurred (check errno).
+ * note:
+ * network byte order assumed. this means 192.5.5.240/28 has
+ * 0b11110000 in its fourth octet.
+ * author:
+ * Paul Vixie (ISC), October 1998
+ */
+static char *
+inet_cidr_ntop_ipv4(const u_char *src, int bits, char *dst, size_t size) {
+ char *odst = dst;
+ size_t len = 4;
+ size_t b;
+ size_t bytes;
+
+ if ((bits < -1) || (bits > 32)) {
+ errno = EINVAL;
+ return (NULL);
+ }
+
+ /* Find number of significant bytes in address. */
+ if (bits == -1)
+ len = 4;
+ else
+ for (len = 1, b = 1 ; b < 4U; b++)
+ if (*(src + b))
+ len = b + 1;
+
+ /* Format whole octets plus nonzero trailing octets. */
+ bytes = (((bits <= 0) ? 1 : bits) + 7) / 8;
+ if (len > bytes)
+ bytes = len;
+ b = decoct(src, bytes, dst, size);
+ if (b == 0U)
+ goto emsgsize;
+ dst += b;
+ size -= b;
+
+ if (bits != -1) {
+ /* Format CIDR /width. */
+ if (size < sizeof "/32")
+ goto emsgsize;
+ dst += SPRINTF((dst, "/%u", bits));
+ }
+
+ return (odst);
+
+ emsgsize:
+ errno = EMSGSIZE;
+ return (NULL);
+}
+
+static char *
+inet_cidr_ntop_ipv6(const u_char *src, int bits, char *dst, size_t size) {
+ /*
+ * Note that int32_t and int16_t need only be "at least" large enough
+ * to contain a value of the specified size. On some systems, like
+ * Crays, there is no such thing as an integer variable with 16 bits.
+ * Keep this in mind if you think this function should have been coded
+ * to use pointer overlays. All the world's not a VAX.
+ */
+ char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255/128"];
+ char *tp;
+ struct { int base, len; } best, cur;
+ u_int words[NS_IN6ADDRSZ / NS_INT16SZ];
+ int i;
+
+ if ((bits < -1) || (bits > 128)) {
+ errno = EINVAL;
+ return (NULL);
+ }
+
+ /*
+ * Preprocess:
+ * Copy the input (bytewise) array into a wordwise array.
+ * Find the longest run of 0x00's in src[] for :: shorthanding.
+ */
+ memset(words, '\0', sizeof words);
+ for (i = 0; i < NS_IN6ADDRSZ; i++)
+ words[i / 2] |= (src[i] << ((1 - (i % 2)) << 3));
+ best.base = -1;
+ best.len = 0;
+ cur.base = -1;
+ cur.len = 0;
+ for (i = 0; i < (NS_IN6ADDRSZ / NS_INT16SZ); i++) {
+ if (words[i] == 0) {
+ if (cur.base == -1)
+ cur.base = i, cur.len = 1;
+ else
+ cur.len++;
+ } else {
+ if (cur.base != -1) {
+ if (best.base == -1 || cur.len > best.len)
+ best = cur;
+ cur.base = -1;
+ }
+ }
+ }
+ if (cur.base != -1) {
+ if (best.base == -1 || cur.len > best.len)
+ best = cur;
+ }
+ if (best.base != -1 && best.len < 2)
+ best.base = -1;
+
+ /*
+ * Format the result.
+ */
+ tp = tmp;
+ for (i = 0; i < (NS_IN6ADDRSZ / NS_INT16SZ); i++) {
+ /* Are we inside the best run of 0x00's? */
+ if (best.base != -1 && i >= best.base &&
+ i < (best.base + best.len)) {
+ if (i == best.base)
+ *tp++ = ':';
+ continue;
+ }
+ /* Are we following an initial run of 0x00s or any real hex? */
+ if (i != 0)
+ *tp++ = ':';
+ /* Is this address an encapsulated IPv4? */
+ if (i == 6 && best.base == 0 && (best.len == 6 ||
+ (best.len == 7 && words[7] != 0x0001) ||
+ (best.len == 5 && words[5] == 0xffff))) {
+ int n;
+
+ if (src[15] || bits == -1 || bits > 120)
+ n = 4;
+ else if (src[14] || bits > 112)
+ n = 3;
+ else
+ n = 2;
+ n = decoct(src+12, n, tp, sizeof tmp - (tp - tmp));
+ if (n == 0) {
+ errno = EMSGSIZE;
+ return (NULL);
+ }
+ tp += strlen(tp);
+ break;
+ }
+ tp += SPRINTF((tp, "%x", words[i]));
+ }
+
+ /* Was it a trailing run of 0x00's? */
+ if (best.base != -1 && (best.base + best.len) ==
+ (NS_IN6ADDRSZ / NS_INT16SZ))
+ *tp++ = ':';
+ *tp = '\0';
+
+ if (bits != -1)
+ tp += SPRINTF((tp, "/%u", bits));
+
+ /*
+ * Check for overflow, copy, and we're done.
+ */
+ if ((size_t)(tp - tmp) > size) {
+ errno = EMSGSIZE;
+ return (NULL);
+ }
+ strcpy(dst, tmp);
+ return (dst);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/inet/inet_cidr_pton.c b/usr/src/lib/libresolv2_joy/common/inet/inet_cidr_pton.c
new file mode 100644
index 0000000000..07652af463
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/inet/inet_cidr_pton.c
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1998,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: inet_cidr_pton.c,v 1.6 2005/04/27 04:56:19 sra Exp $";
+#endif
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <arpa/inet.h>
+
+#include <isc/assertions.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "port_after.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) ((size_t)sprintf x)
+#endif
+
+static int inet_cidr_pton_ipv4 __P((const char *src, u_char *dst,
+ int *bits, int ipv6));
+static int inet_cidr_pton_ipv6 __P((const char *src, u_char *dst,
+ int *bits));
+
+static int getbits(const char *, int ipv6);
+
+/*%
+ * int
+ * inet_cidr_pton(af, src, dst, *bits)
+ * convert network address from presentation to network format.
+ * accepts inet_pton()'s input for this "af" plus trailing "/CIDR".
+ * "dst" is assumed large enough for its "af". "bits" is set to the
+ * /CIDR prefix length, which can have defaults (like /32 for IPv4).
+ * return:
+ * -1 if an error occurred (inspect errno; ENOENT means bad format).
+ * 0 if successful conversion occurred.
+ * note:
+ * 192.5.5.1/28 has a nonzero host part, which means it isn't a network
+ * as called for by inet_net_pton() but it can be a host address with
+ * an included netmask.
+ * author:
+ * Paul Vixie (ISC), October 1998
+ */
+int
+inet_cidr_pton(int af, const char *src, void *dst, int *bits) {
+ switch (af) {
+ case AF_INET:
+ return (inet_cidr_pton_ipv4(src, dst, bits, 0));
+ case AF_INET6:
+ return (inet_cidr_pton_ipv6(src, dst, bits));
+ default:
+ errno = EAFNOSUPPORT;
+ return (-1);
+ }
+}
+
+static const char digits[] = "0123456789";
+
+static int
+inet_cidr_pton_ipv4(const char *src, u_char *dst, int *pbits, int ipv6) {
+ const u_char *odst = dst;
+ int n, ch, tmp, bits;
+ size_t size = 4;
+
+ /* Get the mantissa. */
+ while (ch = *src++, (isascii(ch) && isdigit(ch))) {
+ tmp = 0;
+ do {
+ n = strchr(digits, ch) - digits;
+ INSIST(n >= 0 && n <= 9);
+ tmp *= 10;
+ tmp += n;
+ if (tmp > 255)
+ goto enoent;
+ } while ((ch = *src++) != '\0' && isascii(ch) && isdigit(ch));
+ if (size-- == 0U)
+ goto emsgsize;
+ *dst++ = (u_char) tmp;
+ if (ch == '\0' || ch == '/')
+ break;
+ if (ch != '.')
+ goto enoent;
+ }
+
+ /* Get the prefix length if any. */
+ bits = -1;
+ if (ch == '/' && dst > odst) {
+ bits = getbits(src, ipv6);
+ if (bits == -2)
+ goto enoent;
+ } else if (ch != '\0')
+ goto enoent;
+
+ /* Prefix length can default to /32 only if all four octets spec'd. */
+ if (bits == -1) {
+ if (dst - odst == 4)
+ bits = ipv6 ? 128 : 32;
+ else
+ goto enoent;
+ }
+
+ /* If nothing was written to the destination, we found no address. */
+ if (dst == odst)
+ goto enoent;
+
+ /* If prefix length overspecifies mantissa, life is bad. */
+ if (((bits - (ipv6 ? 96 : 0)) / 8) > (dst - odst))
+ goto enoent;
+
+ /* Extend address to four octets. */
+ while (size-- > 0U)
+ *dst++ = 0;
+
+ *pbits = bits;
+ return (0);
+
+ enoent:
+ errno = ENOENT;
+ return (-1);
+
+ emsgsize:
+ errno = EMSGSIZE;
+ return (-1);
+}
+
+static int
+inet_cidr_pton_ipv6(const char *src, u_char *dst, int *pbits) {
+ static const char xdigits_l[] = "0123456789abcdef",
+ xdigits_u[] = "0123456789ABCDEF";
+ u_char tmp[NS_IN6ADDRSZ], *tp, *endp, *colonp;
+ const char *xdigits, *curtok;
+ int ch, saw_xdigit;
+ u_int val;
+ int bits;
+
+ memset((tp = tmp), '\0', NS_IN6ADDRSZ);
+ endp = tp + NS_IN6ADDRSZ;
+ colonp = NULL;
+ /* Leading :: requires some special handling. */
+ if (*src == ':')
+ if (*++src != ':')
+ return (0);
+ curtok = src;
+ saw_xdigit = 0;
+ val = 0;
+ bits = -1;
+ while ((ch = *src++) != '\0') {
+ const char *pch;
+
+ if ((pch = strchr((xdigits = xdigits_l), ch)) == NULL)
+ pch = strchr((xdigits = xdigits_u), ch);
+ if (pch != NULL) {
+ val <<= 4;
+ val |= (pch - xdigits);
+ if (val > 0xffff)
+ return (0);
+ saw_xdigit = 1;
+ continue;
+ }
+ if (ch == ':') {
+ curtok = src;
+ if (!saw_xdigit) {
+ if (colonp)
+ return (0);
+ colonp = tp;
+ continue;
+ } else if (*src == '\0') {
+ return (0);
+ }
+ if (tp + NS_INT16SZ > endp)
+ return (0);
+ *tp++ = (u_char) (val >> 8) & 0xff;
+ *tp++ = (u_char) val & 0xff;
+ saw_xdigit = 0;
+ val = 0;
+ continue;
+ }
+ if (ch == '.' && ((tp + NS_INADDRSZ) <= endp) &&
+ inet_cidr_pton_ipv4(curtok, tp, &bits, 1) == 0) {
+ tp += NS_INADDRSZ;
+ saw_xdigit = 0;
+ break; /*%< '\\0' was seen by inet_pton4(). */
+ }
+ if (ch == '/') {
+ bits = getbits(src, 1);
+ if (bits == -2)
+ goto enoent;
+ break;
+ }
+ goto enoent;
+ }
+ if (saw_xdigit) {
+ if (tp + NS_INT16SZ > endp)
+ goto emsgsize;
+ *tp++ = (u_char) (val >> 8) & 0xff;
+ *tp++ = (u_char) val & 0xff;
+ }
+ if (colonp != NULL) {
+ /*
+ * Since some memmove()'s erroneously fail to handle
+ * overlapping regions, we'll do the shift by hand.
+ */
+ const int n = tp - colonp;
+ int i;
+
+ if (tp == endp)
+ goto enoent;
+ for (i = 1; i <= n; i++) {
+ endp[- i] = colonp[n - i];
+ colonp[n - i] = 0;
+ }
+ tp = endp;
+ }
+
+ memcpy(dst, tmp, NS_IN6ADDRSZ);
+
+ *pbits = bits;
+ return (0);
+
+ enoent:
+ errno = ENOENT;
+ return (-1);
+
+ emsgsize:
+ errno = EMSGSIZE;
+ return (-1);
+}
+
+static int
+getbits(const char *src, int ipv6) {
+ int bits = 0;
+ char *cp, ch;
+
+ if (*src == '\0') /*%< syntax */
+ return (-2);
+ do {
+ ch = *src++;
+ cp = strchr(digits, ch);
+ if (cp == NULL) /*%< syntax */
+ return (-2);
+ bits *= 10;
+ bits += cp - digits;
+ if (bits == 0 && *src != '\0') /*%< no leading zeros */
+ return (-2);
+ if (bits > (ipv6 ? 128 : 32)) /*%< range error */
+ return (-2);
+ } while (*src != '\0');
+
+ return (bits);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/inet/inet_data.c b/usr/src/lib/libresolv2_joy/common/inet/inet_data.c
new file mode 100644
index 0000000000..58b8c4342d
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/inet/inet_data.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1995-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char rcsid[] = "$Id: inet_data.c,v 1.4 2005/04/27 04:56:19 sra Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <ctype.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "port_after.h"
+
+const struct in6_addr isc_in6addr_any = IN6ADDR_ANY_INIT;
+const struct in6_addr isc_in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
+
+/*! \file */
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/inet_ntoa.c b/usr/src/lib/libresolv2_joy/common/inet/inet_lnaof.c
index e788c83ea0..70ac409512 100644
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/inet_ntoa.c
+++ b/usr/src/lib/libresolv2_joy/common/inet/inet_lnaof.c
@@ -31,36 +31,35 @@
* SUCH DAMAGE.
*/
-#include "includes.h"
-
-#if defined(BROKEN_INET_NTOA) || !defined(HAVE_INET_NTOA)
-
#if defined(LIBC_SCCS) && !defined(lint)
-static char rcsid[] = "$OpenBSD: inet_ntoa.c,v 1.3 2002/06/27 10:14:01 itojun Exp $";
+static const char sccsid[] = "@(#)inet_lnaof.c 8.1 (Berkeley) 6/4/93";
#endif /* LIBC_SCCS and not lint */
-/*
- * Convert network-format internet address
- * to base 256 d.d.d.d representation.
- */
-#include <sys/types.h>
+#include "port_before.h"
+
+#include <sys/param.h>
#include <netinet/in.h>
#include <arpa/inet.h>
-#include <stdio.h>
-#include "inet_ntoa.h"
-char *inet_ntoa(struct in_addr in)
+#include "port_after.h"
+
+/*%
+ * Return the local network address portion of an
+ * internet address; handles class a/b/c network
+ * number formats.
+ */
+u_long
+inet_lnaof(in)
+ struct in_addr in;
{
- static char b[18];
- register char *p;
+ register u_long i = ntohl(in.s_addr);
- p = (char *)&in;
-#define UC(b) (((int)b)&0xff)
- (void)snprintf(b, sizeof(b),
- "%u.%u.%u.%u", UC(p[0]), UC(p[1]), UC(p[2]), UC(p[3]));
- return (b);
+ if (IN_CLASSA(i))
+ return ((i)&IN_CLASSA_HOST);
+ else if (IN_CLASSB(i))
+ return ((i)&IN_CLASSB_HOST);
+ else
+ return ((i)&IN_CLASSC_HOST);
}
-#endif /* defined(BROKEN_INET_NTOA) || !defined(HAVE_INET_NTOA) */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/inet/inet_makeaddr.c b/usr/src/lib/libresolv2_joy/common/inet/inet_makeaddr.c
new file mode 100644
index 0000000000..c56cb3eaeb
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/inet/inet_makeaddr.c
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 1983, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)inet_makeaddr.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "port_before.h"
+
+#include <sys/param.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#include "port_after.h"
+
+/*%
+ * Formulate an Internet address from network + host. Used in
+ * building addresses stored in the ifnet structure.
+ */
+struct in_addr
+inet_makeaddr(net, host)
+ u_long net, host;
+{
+ struct in_addr a;
+
+ if (net < 128U)
+ a.s_addr = (net << IN_CLASSA_NSHIFT) | (host & IN_CLASSA_HOST);
+ else if (net < 65536U)
+ a.s_addr = (net << IN_CLASSB_NSHIFT) | (host & IN_CLASSB_HOST);
+ else if (net < 16777216L)
+ a.s_addr = (net << IN_CLASSC_NSHIFT) | (host & IN_CLASSC_HOST);
+ else
+ a.s_addr = net | host;
+ a.s_addr = htonl(a.s_addr);
+ return (a);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/inet/inet_net_ntop.c b/usr/src/lib/libresolv2_joy/common/inet/inet_net_ntop.c
new file mode 100644
index 0000000000..fb28e3cbe5
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/inet/inet_net_ntop.c
@@ -0,0 +1,279 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: inet_net_ntop.c,v 1.5 2006/06/20 02:50:14 marka Exp $";
+#endif
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "port_after.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) ((size_t)sprintf x)
+#endif
+
+static char * inet_net_ntop_ipv4 __P((const u_char *src, int bits,
+ char *dst, size_t size));
+static char * inet_net_ntop_ipv6 __P((const u_char *src, int bits,
+ char *dst, size_t size));
+
+/*%
+ * char *
+ * inet_net_ntop(af, src, bits, dst, size)
+ * convert network number from network to presentation format.
+ * generates CIDR style result always.
+ * return:
+ * pointer to dst, or NULL if an error occurred (check errno).
+ * author:
+ * Paul Vixie (ISC), July 1996
+ */
+char *
+inet_net_ntop(af, src, bits, dst, size)
+ int af;
+ const void *src;
+ int bits;
+ char *dst;
+ size_t size;
+{
+ switch (af) {
+ case AF_INET:
+ return (inet_net_ntop_ipv4(src, bits, dst, size));
+ case AF_INET6:
+ return (inet_net_ntop_ipv6(src, bits, dst, size));
+ default:
+ errno = EAFNOSUPPORT;
+ return (NULL);
+ }
+}
+
+/*%
+ * static char *
+ * inet_net_ntop_ipv4(src, bits, dst, size)
+ * convert IPv4 network number from network to presentation format.
+ * generates CIDR style result always.
+ * return:
+ * pointer to dst, or NULL if an error occurred (check errno).
+ * note:
+ * network byte order assumed. this means 192.5.5.240/28 has
+ * 0b11110000 in its fourth octet.
+ * author:
+ * Paul Vixie (ISC), July 1996
+ */
+static char *
+inet_net_ntop_ipv4(src, bits, dst, size)
+ const u_char *src;
+ int bits;
+ char *dst;
+ size_t size;
+{
+ char *odst = dst;
+ char *t;
+ u_int m;
+ int b;
+
+ if (bits < 0 || bits > 32) {
+ errno = EINVAL;
+ return (NULL);
+ }
+
+ if (bits == 0) {
+ if (size < sizeof "0")
+ goto emsgsize;
+ *dst++ = '0';
+ size--;
+ *dst = '\0';
+ }
+
+ /* Format whole octets. */
+ for (b = bits / 8; b > 0; b--) {
+ if (size <= sizeof "255.")
+ goto emsgsize;
+ t = dst;
+ dst += SPRINTF((dst, "%u", *src++));
+ if (b > 1) {
+ *dst++ = '.';
+ *dst = '\0';
+ }
+ size -= (size_t)(dst - t);
+ }
+
+ /* Format partial octet. */
+ b = bits % 8;
+ if (b > 0) {
+ if (size <= sizeof ".255")
+ goto emsgsize;
+ t = dst;
+ if (dst != odst)
+ *dst++ = '.';
+ m = ((1 << b) - 1) << (8 - b);
+ dst += SPRINTF((dst, "%u", *src & m));
+ size -= (size_t)(dst - t);
+ }
+
+ /* Format CIDR /width. */
+ if (size <= sizeof "/32")
+ goto emsgsize;
+ dst += SPRINTF((dst, "/%u", bits));
+ return (odst);
+
+ emsgsize:
+ errno = EMSGSIZE;
+ return (NULL);
+}
+
+/*%
+ * static char *
+ * inet_net_ntop_ipv6(src, bits, fakebits, dst, size)
+ * convert IPv6 network number from network to presentation format.
+ * generates CIDR style result always. Picks the shortest representation
+ * unless the IP is really IPv4.
+ * always prints specified number of bits (bits).
+ * return:
+ * pointer to dst, or NULL if an error occurred (check errno).
+ * note:
+ * network byte order assumed. this means 192.5.5.240/28 has
+ * 0x11110000 in its fourth octet.
+ * author:
+ * Vadim Kogan (UCB), June 2001
+ * Original version (IPv4) by Paul Vixie (ISC), July 1996
+ */
+
+static char *
+inet_net_ntop_ipv6(const u_char *src, int bits, char *dst, size_t size) {
+ u_int m;
+ int b;
+ int p;
+ int zero_s, zero_l, tmp_zero_s, tmp_zero_l;
+ int i;
+ int is_ipv4 = 0;
+ unsigned char inbuf[16];
+ char outbuf[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255/128")];
+ char *cp;
+ int words;
+ u_char *s;
+
+ if (bits < 0 || bits > 128) {
+ errno = EINVAL;
+ return (NULL);
+ }
+
+ cp = outbuf;
+
+ if (bits == 0) {
+ *cp++ = ':';
+ *cp++ = ':';
+ *cp = '\0';
+ } else {
+ /* Copy src to private buffer. Zero host part. */
+ p = (bits + 7) / 8;
+ memcpy(inbuf, src, p);
+ memset(inbuf + p, 0, 16 - p);
+ b = bits % 8;
+ if (b != 0) {
+ m = ~0 << (8 - b);
+ inbuf[p-1] &= m;
+ }
+
+ s = inbuf;
+
+ /* how many words need to be displayed in output */
+ words = (bits + 15) / 16;
+ if (words == 1)
+ words = 2;
+
+ /* Find the longest substring of zero's */
+ zero_s = zero_l = tmp_zero_s = tmp_zero_l = 0;
+ for (i = 0; i < (words * 2); i += 2) {
+ if ((s[i] | s[i+1]) == 0) {
+ if (tmp_zero_l == 0)
+ tmp_zero_s = i / 2;
+ tmp_zero_l++;
+ } else {
+ if (tmp_zero_l && zero_l < tmp_zero_l) {
+ zero_s = tmp_zero_s;
+ zero_l = tmp_zero_l;
+ tmp_zero_l = 0;
+ }
+ }
+ }
+
+ if (tmp_zero_l && zero_l < tmp_zero_l) {
+ zero_s = tmp_zero_s;
+ zero_l = tmp_zero_l;
+ }
+
+ if (zero_l != words && zero_s == 0 && ((zero_l == 6) ||
+ ((zero_l == 5 && s[10] == 0xff && s[11] == 0xff) ||
+ ((zero_l == 7 && s[14] != 0 && s[15] != 1)))))
+ is_ipv4 = 1;
+
+ /* Format whole words. */
+ for (p = 0; p < words; p++) {
+ if (zero_l != 0 && p >= zero_s && p < zero_s + zero_l) {
+ /* Time to skip some zeros */
+ if (p == zero_s)
+ *cp++ = ':';
+ if (p == words - 1)
+ *cp++ = ':';
+ s++;
+ s++;
+ continue;
+ }
+
+ if (is_ipv4 && p > 5 ) {
+ *cp++ = (p == 6) ? ':' : '.';
+ cp += SPRINTF((cp, "%u", *s++));
+ /* we can potentially drop the last octet */
+ if (p != 7 || bits > 120) {
+ *cp++ = '.';
+ cp += SPRINTF((cp, "%u", *s++));
+ }
+ } else {
+ if (cp != outbuf)
+ *cp++ = ':';
+ cp += SPRINTF((cp, "%x", *s * 256 + s[1]));
+ s += 2;
+ }
+ }
+ }
+ /* Format CIDR /width. */
+ sprintf(cp, "/%u", bits);
+ if (strlen(outbuf) + 1 > size)
+ goto emsgsize;
+ strcpy(dst, outbuf);
+
+ return (dst);
+
+emsgsize:
+ errno = EMSGSIZE;
+ return (NULL);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/inet/inet_net_pton.c b/usr/src/lib/libresolv2_joy/common/inet/inet_net_pton.c
new file mode 100644
index 0000000000..8d8bfb1f64
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/inet/inet_net_pton.c
@@ -0,0 +1,407 @@
+/*
+ * Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 1996, 1998, 1999, 2001, 2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: inet_net_pton.c,v 1.10 2008/11/14 02:36:51 marka Exp $";
+#endif
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <arpa/inet.h>
+
+#include <isc/assertions.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "port_after.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) ((size_t)sprintf x)
+#endif
+
+/*%
+ * static int
+ * inet_net_pton_ipv4(src, dst, size)
+ * convert IPv4 network number from presentation to network format.
+ * accepts hex octets, hex strings, decimal octets, and /CIDR.
+ * "size" is in bytes and describes "dst".
+ * return:
+ * number of bits, either imputed classfully or specified with /CIDR,
+ * or -1 if some failure occurred (check errno). ENOENT means it was
+ * not an IPv4 network specification.
+ * note:
+ * network byte order assumed. this means 192.5.5.240/28 has
+ * 0b11110000 in its fourth octet.
+ * author:
+ * Paul Vixie (ISC), June 1996
+ */
+static int
+inet_net_pton_ipv4(const char *src, u_char *dst, size_t size) {
+ static const char xdigits[] = "0123456789abcdef";
+ static const char digits[] = "0123456789";
+ int n, ch, tmp = 0, dirty, bits;
+ const u_char *odst = dst;
+
+ ch = *src++;
+ if (ch == '0' && (src[0] == 'x' || src[0] == 'X')
+ && isascii((unsigned char)(src[1]))
+ && isxdigit((unsigned char)(src[1]))) {
+ /* Hexadecimal: Eat nybble string. */
+ if (size <= 0U)
+ goto emsgsize;
+ dirty = 0;
+ src++; /*%< skip x or X. */
+ while ((ch = *src++) != '\0' && isascii(ch) && isxdigit(ch)) {
+ if (isupper(ch))
+ ch = tolower(ch);
+ n = strchr(xdigits, ch) - xdigits;
+ INSIST(n >= 0 && n <= 15);
+ if (dirty == 0)
+ tmp = n;
+ else
+ tmp = (tmp << 4) | n;
+ if (++dirty == 2) {
+ if (size-- <= 0U)
+ goto emsgsize;
+ *dst++ = (u_char) tmp;
+ dirty = 0;
+ }
+ }
+ if (dirty) { /*%< Odd trailing nybble? */
+ if (size-- <= 0U)
+ goto emsgsize;
+ *dst++ = (u_char) (tmp << 4);
+ }
+ } else if (isascii(ch) && isdigit(ch)) {
+ /* Decimal: eat dotted digit string. */
+ for (;;) {
+ tmp = 0;
+ do {
+ n = strchr(digits, ch) - digits;
+ INSIST(n >= 0 && n <= 9);
+ tmp *= 10;
+ tmp += n;
+ if (tmp > 255)
+ goto enoent;
+ } while ((ch = *src++) != '\0' &&
+ isascii(ch) && isdigit(ch));
+ if (size-- <= 0U)
+ goto emsgsize;
+ *dst++ = (u_char) tmp;
+ if (ch == '\0' || ch == '/')
+ break;
+ if (ch != '.')
+ goto enoent;
+ ch = *src++;
+ if (!isascii(ch) || !isdigit(ch))
+ goto enoent;
+ }
+ } else
+ goto enoent;
+
+ bits = -1;
+ if (ch == '/' && isascii((unsigned char)(src[0])) &&
+ isdigit((unsigned char)(src[0])) && dst > odst) {
+ /* CIDR width specifier. Nothing can follow it. */
+ ch = *src++; /*%< Skip over the /. */
+ bits = 0;
+ do {
+ n = strchr(digits, ch) - digits;
+ INSIST(n >= 0 && n <= 9);
+ bits *= 10;
+ bits += n;
+ if (bits > 32)
+ goto enoent;
+ } while ((ch = *src++) != '\0' && isascii(ch) && isdigit(ch));
+ if (ch != '\0')
+ goto enoent;
+ }
+
+ /* Firey death and destruction unless we prefetched EOS. */
+ if (ch != '\0')
+ goto enoent;
+
+ /* If nothing was written to the destination, we found no address. */
+ if (dst == odst)
+ goto enoent;
+ /* If no CIDR spec was given, infer width from net class. */
+ if (bits == -1) {
+ if (*odst >= 240) /*%< Class E */
+ bits = 32;
+ else if (*odst >= 224) /*%< Class D */
+ bits = 8;
+ else if (*odst >= 192) /*%< Class C */
+ bits = 24;
+ else if (*odst >= 128) /*%< Class B */
+ bits = 16;
+ else /*%< Class A */
+ bits = 8;
+ /* If imputed mask is narrower than specified octets, widen. */
+ if (bits < ((dst - odst) * 8))
+ bits = (dst - odst) * 8;
+ /*
+ * If there are no additional bits specified for a class D
+ * address adjust bits to 4.
+ */
+ if (bits == 8 && *odst == 224)
+ bits = 4;
+ }
+ /* Extend network to cover the actual mask. */
+ while (bits > ((dst - odst) * 8)) {
+ if (size-- <= 0U)
+ goto emsgsize;
+ *dst++ = '\0';
+ }
+ return (bits);
+
+ enoent:
+ errno = ENOENT;
+ return (-1);
+
+ emsgsize:
+ errno = EMSGSIZE;
+ return (-1);
+}
+
+static int
+getbits(const char *src, int *bitsp) {
+ static const char digits[] = "0123456789";
+ int n;
+ int val;
+ char ch;
+
+ val = 0;
+ n = 0;
+ while ((ch = *src++) != '\0') {
+ const char *pch;
+
+ pch = strchr(digits, ch);
+ if (pch != NULL) {
+ if (n++ != 0 && val == 0) /*%< no leading zeros */
+ return (0);
+ val *= 10;
+ val += (pch - digits);
+ if (val > 128) /*%< range */
+ return (0);
+ continue;
+ }
+ return (0);
+ }
+ if (n == 0)
+ return (0);
+ *bitsp = val;
+ return (1);
+}
+
+static int
+getv4(const char *src, u_char *dst, int *bitsp) {
+ static const char digits[] = "0123456789";
+ u_char *odst = dst;
+ int n;
+ u_int val;
+ char ch;
+
+ val = 0;
+ n = 0;
+ while ((ch = *src++) != '\0') {
+ const char *pch;
+
+ pch = strchr(digits, ch);
+ if (pch != NULL) {
+ if (n++ != 0 && val == 0) /*%< no leading zeros */
+ return (0);
+ val *= 10;
+ val += (pch - digits);
+ if (val > 255) /*%< range */
+ return (0);
+ continue;
+ }
+ if (ch == '.' || ch == '/') {
+ if (dst - odst > 3) /*%< too many octets? */
+ return (0);
+ *dst++ = val;
+ if (ch == '/')
+ return (getbits(src, bitsp));
+ val = 0;
+ n = 0;
+ continue;
+ }
+ return (0);
+ }
+ if (n == 0)
+ return (0);
+ if (dst - odst > 3) /*%< too many octets? */
+ return (0);
+ *dst++ = val;
+ return (1);
+}
+
+static int
+inet_net_pton_ipv6(const char *src, u_char *dst, size_t size) {
+ static const char xdigits_l[] = "0123456789abcdef",
+ xdigits_u[] = "0123456789ABCDEF";
+ u_char tmp[NS_IN6ADDRSZ], *tp, *endp, *colonp;
+ const char *xdigits, *curtok;
+ int ch, saw_xdigit;
+ u_int val;
+ int digits;
+ int bits;
+ size_t bytes;
+ int words;
+ int ipv4;
+
+ memset((tp = tmp), '\0', NS_IN6ADDRSZ);
+ endp = tp + NS_IN6ADDRSZ;
+ colonp = NULL;
+ /* Leading :: requires some special handling. */
+ if (*src == ':')
+ if (*++src != ':')
+ goto enoent;
+ curtok = src;
+ saw_xdigit = 0;
+ val = 0;
+ digits = 0;
+ bits = -1;
+ ipv4 = 0;
+ while ((ch = *src++) != '\0') {
+ const char *pch;
+
+ if ((pch = strchr((xdigits = xdigits_l), ch)) == NULL)
+ pch = strchr((xdigits = xdigits_u), ch);
+ if (pch != NULL) {
+ val <<= 4;
+ val |= (pch - xdigits);
+ if (++digits > 4)
+ goto enoent;
+ saw_xdigit = 1;
+ continue;
+ }
+ if (ch == ':') {
+ curtok = src;
+ if (!saw_xdigit) {
+ if (colonp)
+ goto enoent;
+ colonp = tp;
+ continue;
+ } else if (*src == '\0')
+ goto enoent;
+ if (tp + NS_INT16SZ > endp)
+ return (0);
+ *tp++ = (u_char) (val >> 8) & 0xff;
+ *tp++ = (u_char) val & 0xff;
+ saw_xdigit = 0;
+ digits = 0;
+ val = 0;
+ continue;
+ }
+ if (ch == '.' && ((tp + NS_INADDRSZ) <= endp) &&
+ getv4(curtok, tp, &bits) > 0) {
+ tp += NS_INADDRSZ;
+ saw_xdigit = 0;
+ ipv4 = 1;
+ break; /*%< '\\0' was seen by inet_pton4(). */
+ }
+ if (ch == '/' && getbits(src, &bits) > 0)
+ break;
+ goto enoent;
+ }
+ if (saw_xdigit) {
+ if (tp + NS_INT16SZ > endp)
+ goto enoent;
+ *tp++ = (u_char) (val >> 8) & 0xff;
+ *tp++ = (u_char) val & 0xff;
+ }
+ if (bits == -1)
+ bits = 128;
+
+ words = (bits + 15) / 16;
+ if (words < 2)
+ words = 2;
+ if (ipv4)
+ words = 8;
+ endp = tmp + 2 * words;
+
+ if (colonp != NULL) {
+ /*
+ * Since some memmove()'s erroneously fail to handle
+ * overlapping regions, we'll do the shift by hand.
+ */
+ const int n = tp - colonp;
+ int i;
+
+ if (tp == endp)
+ goto enoent;
+ for (i = 1; i <= n; i++) {
+ endp[- i] = colonp[n - i];
+ colonp[n - i] = 0;
+ }
+ tp = endp;
+ }
+ if (tp != endp)
+ goto enoent;
+
+ bytes = (bits + 7) / 8;
+ if (bytes > size)
+ goto emsgsize;
+ memcpy(dst, tmp, bytes);
+ return (bits);
+
+ enoent:
+ errno = ENOENT;
+ return (-1);
+
+ emsgsize:
+ errno = EMSGSIZE;
+ return (-1);
+}
+
+/*%
+ * int
+ * inet_net_pton(af, src, dst, size)
+ * convert network number from presentation to network format.
+ * accepts hex octets, hex strings, decimal octets, and /CIDR.
+ * "size" is in bytes and describes "dst".
+ * return:
+ * number of bits, either imputed classfully or specified with /CIDR,
+ * or -1 if some failure occurred (check errno). ENOENT means it was
+ * not a valid network specification.
+ * author:
+ * Paul Vixie (ISC), June 1996
+ */
+int
+inet_net_pton(int af, const char *src, void *dst, size_t size) {
+ switch (af) {
+ case AF_INET:
+ return (inet_net_pton_ipv4(src, dst, size));
+ case AF_INET6:
+ return (inet_net_pton_ipv6(src, dst, size));
+ default:
+ errno = EAFNOSUPPORT;
+ return (-1);
+ }
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/inet/inet_neta.c b/usr/src/lib/libresolv2_joy/common/inet/inet_neta.c
new file mode 100644
index 0000000000..63a6c201a4
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/inet/inet_neta.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: inet_neta.c,v 1.3 2005/04/27 04:56:20 sra Exp $";
+#endif
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "port_after.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) ((size_t)sprintf x)
+#endif
+
+/*%
+ * char *
+ * inet_neta(src, dst, size)
+ * format a u_long network number into presentation format.
+ * return:
+ * pointer to dst, or NULL if an error occurred (check errno).
+ * note:
+ * format of ``src'' is as for inet_network().
+ * author:
+ * Paul Vixie (ISC), July 1996
+ */
+char *
+inet_neta(src, dst, size)
+ u_long src;
+ char *dst;
+ size_t size;
+{
+ char *odst = dst;
+ char *tp;
+
+ while (src & 0xffffffff) {
+ u_char b = (src & 0xff000000) >> 24;
+
+ src <<= 8;
+ if (b) {
+ if (size < sizeof "255.")
+ goto emsgsize;
+ tp = dst;
+ dst += SPRINTF((dst, "%u", b));
+ if (src != 0L) {
+ *dst++ = '.';
+ *dst = '\0';
+ }
+ size -= (size_t)(dst - tp);
+ }
+ }
+ if (dst == odst) {
+ if (size < sizeof "0.0.0.0")
+ goto emsgsize;
+ strcpy(dst, "0.0.0.0");
+ }
+ return (odst);
+
+ emsgsize:
+ errno = EMSGSIZE;
+ return (NULL);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/inet/inet_netof.c b/usr/src/lib/libresolv2_joy/common/inet/inet_netof.c
new file mode 100644
index 0000000000..c228e3d818
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/inet/inet_netof.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 1983, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)inet_netof.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "port_before.h"
+
+#include <sys/param.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#include "port_after.h"
+
+/*%
+ * Return the network number from an internet
+ * address; handles class a/b/c network #'s.
+ */
+u_long
+inet_netof(in)
+ struct in_addr in;
+{
+ register u_long i = ntohl(in.s_addr);
+
+ if (IN_CLASSA(i))
+ return (((i)&IN_CLASSA_NET) >> IN_CLASSA_NSHIFT);
+ else if (IN_CLASSB(i))
+ return (((i)&IN_CLASSB_NET) >> IN_CLASSB_NSHIFT);
+ else
+ return (((i)&IN_CLASSC_NET) >> IN_CLASSC_NSHIFT);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/inet/inet_network.c b/usr/src/lib/libresolv2_joy/common/inet/inet_network.c
new file mode 100644
index 0000000000..47976cff68
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/inet/inet_network.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 1983, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)inet_network.c 8.1 (Berkeley) 6/4/93";
+#endif /* LIBC_SCCS and not lint */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <ctype.h>
+
+#include "port_after.h"
+
+/*%
+ * Internet network address interpretation routine.
+ * The library routines call this routine to interpret
+ * network numbers.
+ */
+u_long
+inet_network(cp)
+ register const char *cp;
+{
+ register u_long val, base, n, i;
+ register char c;
+ u_long parts[4], *pp = parts;
+ int digit;
+
+again:
+ val = 0; base = 10; digit = 0;
+ if (*cp == '0')
+ digit = 1, base = 8, cp++;
+ if (*cp == 'x' || *cp == 'X')
+ base = 16, cp++;
+ while ((c = *cp) != 0) {
+ if (isdigit((unsigned char)c)) {
+ if (base == 8U && (c == '8' || c == '9'))
+ return (INADDR_NONE);
+ val = (val * base) + (c - '0');
+ cp++;
+ digit = 1;
+ continue;
+ }
+ if (base == 16U && isxdigit((unsigned char)c)) {
+ val = (val << 4) +
+ (c + 10 - (islower((unsigned char)c) ? 'a' : 'A'));
+ cp++;
+ digit = 1;
+ continue;
+ }
+ break;
+ }
+ if (!digit)
+ return (INADDR_NONE);
+ if (pp >= parts + 4 || val > 0xffU)
+ return (INADDR_NONE);
+ if (*cp == '.') {
+ *pp++ = val, cp++;
+ goto again;
+ }
+ if (*cp && !isspace(*cp&0xff))
+ return (INADDR_NONE);
+ *pp++ = val;
+ n = pp - parts;
+ if (n > 4U)
+ return (INADDR_NONE);
+ for (val = 0, i = 0; i < n; i++) {
+ val <<= 8;
+ val |= parts[i] & 0xff;
+ }
+ return (val);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/inet/nsap_addr.c b/usr/src/lib/libresolv2_joy/common/inet/nsap_addr.c
new file mode 100644
index 0000000000..a9972e6e32
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/inet/nsap_addr.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: nsap_addr.c,v 1.5 2005/07/28 06:51:48 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <ctype.h>
+#include <resolv_joy.h>
+#include <resolv_mt.h>
+
+#include "port_after.h"
+
+static char
+xtob(int c) {
+ return (c - (((c >= '0') && (c <= '9')) ? '0' : '7'));
+}
+
+u_int
+inet_nsap_addr(const char *ascii, u_char *binary, int maxlen) {
+ u_char c, nib;
+ u_int len = 0;
+
+ if (ascii[0] != '0' || (ascii[1] != 'x' && ascii[1] != 'X'))
+ return (0);
+ ascii += 2;
+
+ while ((c = *ascii++) != '\0' && len < (u_int)maxlen) {
+ if (c == '.' || c == '+' || c == '/')
+ continue;
+ if (!isascii(c))
+ return (0);
+ if (islower(c))
+ c = toupper(c);
+ if (isxdigit(c)) {
+ nib = xtob(c);
+ c = *ascii++;
+ if (c != '\0') {
+ c = toupper(c);
+ if (isxdigit(c)) {
+ *binary++ = (nib << 4) | xtob(c);
+ len++;
+ } else
+ return (0);
+ }
+ else
+ return (0);
+ }
+ else
+ return (0);
+ }
+ return (len);
+}
+
+char *
+inet_nsap_ntoa(int binlen, const u_char *binary, char *ascii) {
+ int nib;
+ int i;
+ char *tmpbuf = inet_nsap_ntoa_tmpbuf;
+ char *start;
+
+ if (ascii)
+ start = ascii;
+ else {
+ ascii = tmpbuf;
+ start = tmpbuf;
+ }
+
+ *ascii++ = '0';
+ *ascii++ = 'x';
+
+ if (binlen > 255)
+ binlen = 255;
+
+ for (i = 0; i < binlen; i++) {
+ nib = *binary >> 4;
+ *ascii++ = nib + (nib < 10 ? '0' : '7');
+ nib = *binary++ & 0x0f;
+ *ascii++ = nib + (nib < 10 ? '0' : '7');
+ if (((i % 2) == 0 && (i + 1) < binlen))
+ *ascii++ = '.';
+ }
+ *ascii = '\0';
+ return (start);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/dns.c b/usr/src/lib/libresolv2_joy/common/irs/dns.c
new file mode 100644
index 0000000000..7c50320ae7
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/dns.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: dns.c,v 1.5 2006/03/09 23:57:56 marka Exp $";
+#endif
+
+/*! \file
+ * \brief
+ * dns.c --- this is the top-level accessor function for the dns
+ */
+
+#include "port_before.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#include <resolv_joy.h>
+
+#include <isc/memcluster.h>
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "hesiod.h"
+#include "dns_p.h"
+
+/* forward */
+
+static void dns_close(struct irs_acc *);
+static struct __res_state * dns_res_get(struct irs_acc *);
+static void dns_res_set(struct irs_acc *, struct __res_state *,
+ void (*)(void *));
+
+/* public */
+
+struct irs_acc *
+irs_dns_acc(const char *options) {
+ struct irs_acc *acc;
+ struct dns_p *dns;
+
+ UNUSED(options);
+
+ if (!(acc = memget(sizeof *acc))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(acc, 0x5e, sizeof *acc);
+ if (!(dns = memget(sizeof *dns))) {
+ errno = ENOMEM;
+ memput(acc, sizeof *acc);
+ return (NULL);
+ }
+ memset(dns, 0x5e, sizeof *dns);
+ dns->res = NULL;
+ dns->free_res = NULL;
+ if (hesiod_init(&dns->hes_ctx) < 0) {
+ /*
+ * We allow the dns accessor class to initialize
+ * despite hesiod failing to initialize correctly,
+ * since dns host queries don't depend on hesiod.
+ */
+ dns->hes_ctx = NULL;
+ }
+ acc->private = dns;
+#ifdef WANT_IRS_GR
+ acc->gr_map = irs_dns_gr;
+#else
+ acc->gr_map = NULL;
+#endif
+#ifdef WANT_IRS_PW
+ acc->pw_map = irs_dns_pw;
+#else
+ acc->pw_map = NULL;
+#endif
+ acc->sv_map = irs_dns_sv;
+ acc->pr_map = irs_dns_pr;
+ acc->ho_map = irs_dns_ho;
+ acc->nw_map = irs_dns_nw;
+ acc->ng_map = irs_nul_ng;
+ acc->res_get = dns_res_get;
+ acc->res_set = dns_res_set;
+ acc->close = dns_close;
+ return (acc);
+}
+
+/* methods */
+static struct __res_state *
+dns_res_get(struct irs_acc *this) {
+ struct dns_p *dns = (struct dns_p *)this->private;
+
+ if (dns->res == NULL) {
+ struct __res_state *res;
+ res = (struct __res_state *)malloc(sizeof *res);
+ if (res == NULL)
+ return (NULL);
+ memset(res, 0, sizeof *res);
+ dns_res_set(this, res, free);
+ }
+
+ if ((dns->res->options & RES_INIT) == 0U &&
+ res_ninit(dns->res) < 0)
+ return (NULL);
+
+ return (dns->res);
+}
+
+static void
+dns_res_set(struct irs_acc *this, struct __res_state *res,
+ void (*free_res)(void *)) {
+ struct dns_p *dns = (struct dns_p *)this->private;
+
+ if (dns->res && dns->free_res) {
+ res_nclose(dns->res);
+ (*dns->free_res)(dns->res);
+ }
+ dns->res = res;
+ dns->free_res = free_res;
+}
+
+static void
+dns_close(struct irs_acc *this) {
+ struct dns_p *dns;
+
+ dns = (struct dns_p *)this->private;
+ if (dns->res && dns->free_res)
+ (*dns->free_res)(dns->res);
+ if (dns->hes_ctx)
+ hesiod_end(dns->hes_ctx);
+ memput(dns, sizeof *dns);
+ memput(this, sizeof *this);
+}
+
diff --git a/usr/src/lib/libresolv2_joy/common/irs/dns_ho.c b/usr/src/lib/libresolv2_joy/common/irs/dns_ho.c
new file mode 100644
index 0000000000..67812717fb
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/dns_ho.c
@@ -0,0 +1,1143 @@
+/*
+ * Portions Copyright (C) 2004-2006, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (C) 1996-2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Copyright (c) 1985, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* from gethostnamadr.c 8.1 (Berkeley) 6/4/93 */
+/* BIND Id: gethnamaddr.c,v 8.15 1996/05/22 04:56:30 vixie Exp $ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: dns_ho.c,v 1.23 2008/11/14 02:36:51 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/* Imports. */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <string.h>
+#include <syslog.h>
+
+#include <isc/memcluster.h>
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "dns_p.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) sprintf x
+#endif
+
+/* Definitions. */
+
+#define MAXALIASES 35
+#define MAXADDRS 35
+
+#define MAXPACKET (65535) /*%< Maximum TCP message size */
+#define BOUNDS_CHECK(ptr, count) \
+ if ((ptr) + (count) > eom) { \
+ had_error++; \
+ continue; \
+ } else (void)0
+
+typedef union {
+ HEADER hdr;
+ u_char buf[MAXPACKET];
+} querybuf;
+
+struct dns_res_target {
+ struct dns_res_target *next;
+ querybuf qbuf; /*%< query buffer */
+ u_char *answer; /*%< buffer to put answer */
+ int anslen; /*%< size of answer buffer */
+ int qclass, qtype; /*%< class and type of query */
+ int action; /*%< condition whether query is really issued */
+ char qname[MAXDNAME +1]; /*%< domain name */
+#if 0
+ int n; /*%< result length */
+#endif
+};
+enum {RESTGT_DOALWAYS, RESTGT_AFTERFAILURE, RESTGT_IGNORE};
+enum {RESQRY_SUCCESS, RESQRY_FAIL};
+
+struct pvt {
+ struct hostent host;
+ char * h_addr_ptrs[MAXADDRS + 1];
+ char * host_aliases[MAXALIASES];
+ char hostbuf[8*1024];
+ u_char host_addr[16]; /*%< IPv4 or IPv6 */
+ struct __res_state *res;
+ void (*free_res)(void *);
+};
+
+typedef union {
+ int32_t al;
+ char ac;
+} align;
+
+static const u_char mapped[] = { 0,0, 0,0, 0,0, 0,0, 0,0, 0xff,0xff };
+static const u_char tunnelled[] = { 0,0, 0,0, 0,0, 0,0, 0,0, 0,0 };
+/* Note: the IPv6 loopback address is in the "tunnel" space */
+static const u_char v6local[] = { 0,0, 0,1 }; /*%< last 4 bytes of IPv6 addr */
+/* Forwards. */
+
+static void ho_close(struct irs_ho *this);
+static struct hostent * ho_byname(struct irs_ho *this, const char *name);
+static struct hostent * ho_byname2(struct irs_ho *this, const char *name,
+ int af);
+static struct hostent * ho_byaddr(struct irs_ho *this, const void *addr,
+ int len, int af);
+static struct hostent * ho_next(struct irs_ho *this);
+static void ho_rewind(struct irs_ho *this);
+static void ho_minimize(struct irs_ho *this);
+static struct __res_state * ho_res_get(struct irs_ho *this);
+static void ho_res_set(struct irs_ho *this,
+ struct __res_state *res,
+ void (*free_res)(void *));
+static struct addrinfo * ho_addrinfo(struct irs_ho *this, const char *name,
+ const struct addrinfo *pai);
+
+static void map_v4v6_hostent(struct hostent *hp, char **bp,
+ char *ep);
+static void addrsort(res_state, char **, int);
+static struct hostent * gethostans(struct irs_ho *this,
+ const u_char *ansbuf, int anslen,
+ const char *qname, int qtype,
+ int af, int size,
+ struct addrinfo **ret_aip,
+ const struct addrinfo *pai);
+static int add_hostent(struct pvt *pvt, char *bp, char **hap,
+ struct addrinfo *ai);
+static int init(struct irs_ho *this);
+
+/* Exports. */
+
+struct irs_ho *
+irs_dns_ho(struct irs_acc *this) {
+ struct irs_ho *ho;
+ struct pvt *pvt;
+
+ UNUSED(this);
+
+ if (!(pvt = memget(sizeof *pvt))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+
+ if (!(ho = memget(sizeof *ho))) {
+ memput(pvt, sizeof *pvt);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(ho, 0x5e, sizeof *ho);
+ ho->private = pvt;
+ ho->close = ho_close;
+ ho->byname = ho_byname;
+ ho->byname2 = ho_byname2;
+ ho->byaddr = ho_byaddr;
+ ho->next = ho_next;
+ ho->rewind = ho_rewind;
+ ho->minimize = ho_minimize;
+ ho->res_get = ho_res_get;
+ ho->res_set = ho_res_set;
+ ho->addrinfo = ho_addrinfo;
+ return (ho);
+}
+
+/* Methods. */
+
+static void
+ho_close(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ ho_minimize(this);
+ if (pvt->res && pvt->free_res)
+ (*pvt->free_res)(pvt->res);
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+static struct hostent *
+ho_byname(struct irs_ho *this, const char *name) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct hostent *hp;
+
+ if (init(this) == -1)
+ return (NULL);
+
+ if (pvt->res->options & RES_USE_INET6) {
+ hp = ho_byname2(this, name, AF_INET6);
+ if (hp)
+ return (hp);
+ }
+ return (ho_byname2(this, name, AF_INET));
+}
+
+static struct hostent *
+ho_byname2(struct irs_ho *this, const char *name, int af)
+{
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct hostent *hp = NULL;
+ int n, size;
+ char tmp[NS_MAXDNAME];
+ const char *cp;
+ struct addrinfo ai;
+ struct dns_res_target *q, *p;
+ int querystate = RESQRY_FAIL;
+
+ if (init(this) == -1)
+ return (NULL);
+
+ q = memget(sizeof(*q));
+ if (q == NULL) {
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ errno = ENOMEM;
+ goto cleanup;
+ }
+ memset(q, 0, sizeof(*q));
+
+ switch (af) {
+ case AF_INET:
+ size = INADDRSZ;
+ q->qclass = C_IN;
+ q->qtype = T_A;
+ q->answer = q->qbuf.buf;
+ q->anslen = sizeof(q->qbuf);
+ q->action = RESTGT_DOALWAYS;
+ break;
+ case AF_INET6:
+ size = IN6ADDRSZ;
+ q->qclass = C_IN;
+ q->qtype = T_AAAA;
+ q->answer = q->qbuf.buf;
+ q->anslen = sizeof(q->qbuf);
+ q->action = RESTGT_DOALWAYS;
+ break;
+ default:
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ errno = EAFNOSUPPORT;
+ hp = NULL;
+ goto cleanup;
+ }
+
+ /*
+ * if there aren't any dots, it could be a user-level alias.
+ * this is also done in res_nquery() since we are not the only
+ * function that looks up host names.
+ */
+ if (!strchr(name, '.') && (cp = res_hostalias(pvt->res, name,
+ tmp, sizeof tmp)))
+ name = cp;
+
+ for (p = q; p; p = p->next) {
+ switch(p->action) {
+ case RESTGT_DOALWAYS:
+ break;
+ case RESTGT_AFTERFAILURE:
+ if (querystate == RESQRY_SUCCESS)
+ continue;
+ break;
+ case RESTGT_IGNORE:
+ continue;
+ }
+
+ if ((n = res_nsearch(pvt->res, name, p->qclass, p->qtype,
+ p->answer, p->anslen)) < 0) {
+ querystate = RESQRY_FAIL;
+ continue;
+ }
+
+ memset(&ai, 0, sizeof(ai));
+ ai.ai_family = af;
+ if ((hp = gethostans(this, p->answer, n, name, p->qtype,
+ af, size, NULL,
+ (const struct addrinfo *)&ai)) != NULL)
+ goto cleanup; /*%< no more loop is necessary */
+ querystate = RESQRY_FAIL;
+ continue;
+ }
+
+ cleanup:
+ if (q != NULL)
+ memput(q, sizeof(*q));
+ return(hp);
+}
+
+static struct hostent *
+ho_byaddr(struct irs_ho *this, const void *addr, int len, int af)
+{
+ struct pvt *pvt = (struct pvt *)this->private;
+ const u_char *uaddr = addr;
+ char *qp;
+ struct hostent *hp = NULL;
+ struct addrinfo ai;
+ struct dns_res_target *q, *q2, *p;
+ int n, size, i;
+ int querystate = RESQRY_FAIL;
+
+ if (init(this) == -1)
+ return (NULL);
+
+ q = memget(sizeof(*q));
+ q2 = memget(sizeof(*q2));
+ if (q == NULL || q2 == NULL) {
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ errno = ENOMEM;
+ goto cleanup;
+ }
+ memset(q, 0, sizeof(*q));
+ memset(q2, 0, sizeof(*q2));
+
+ if (af == AF_INET6 && len == IN6ADDRSZ &&
+ (!memcmp(uaddr, mapped, sizeof mapped) ||
+ (!memcmp(uaddr, tunnelled, sizeof tunnelled) &&
+ memcmp(&uaddr[sizeof tunnelled], v6local, sizeof(v6local))))) {
+ /* Unmap. */
+ addr = (const char *)addr + sizeof mapped;
+ uaddr += sizeof mapped;
+ af = AF_INET;
+ len = INADDRSZ;
+ }
+ switch (af) {
+ case AF_INET:
+ size = INADDRSZ;
+ q->qclass = C_IN;
+ q->qtype = T_PTR;
+ q->answer = q->qbuf.buf;
+ q->anslen = sizeof(q->qbuf);
+ q->action = RESTGT_DOALWAYS;
+ break;
+ case AF_INET6:
+ size = IN6ADDRSZ;
+ q->qclass = C_IN;
+ q->qtype = T_PTR;
+ q->answer = q->qbuf.buf;
+ q->anslen = sizeof(q->qbuf);
+ q->next = q2;
+ q->action = RESTGT_DOALWAYS;
+ q2->qclass = C_IN;
+ q2->qtype = T_PTR;
+ q2->answer = q2->qbuf.buf;
+ q2->anslen = sizeof(q2->qbuf);
+ if ((pvt->res->options & RES_NO_NIBBLE2) != 0U)
+ q2->action = RESTGT_IGNORE;
+ else
+ q2->action = RESTGT_AFTERFAILURE;
+ break;
+ default:
+ errno = EAFNOSUPPORT;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ hp = NULL;
+ goto cleanup;
+ }
+ if (size > len) {
+ errno = EINVAL;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ hp = NULL;
+ goto cleanup;
+ }
+ switch (af) {
+ case AF_INET:
+ qp = q->qname;
+ (void) sprintf(qp, "%u.%u.%u.%u.in-addr.arpa",
+ (uaddr[3] & 0xff),
+ (uaddr[2] & 0xff),
+ (uaddr[1] & 0xff),
+ (uaddr[0] & 0xff));
+ break;
+ case AF_INET6:
+ if (q->action != RESTGT_IGNORE) {
+ const char *nibsuff = res_get_nibblesuffix(pvt->res);
+ qp = q->qname;
+ for (n = IN6ADDRSZ - 1; n >= 0; n--) {
+ i = SPRINTF((qp, "%x.%x.",
+ uaddr[n] & 0xf,
+ (uaddr[n] >> 4) & 0xf));
+ if (i != 4)
+ abort();
+ qp += i;
+ }
+ if (strlen(q->qname) + strlen(nibsuff) + 1 >
+ sizeof q->qname) {
+ errno = ENAMETOOLONG;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ hp = NULL;
+ goto cleanup;
+ }
+ strcpy(qp, nibsuff); /* (checked) */
+ }
+ if (q2->action != RESTGT_IGNORE) {
+ const char *nibsuff2 = res_get_nibblesuffix2(pvt->res);
+ qp = q2->qname;
+ for (n = IN6ADDRSZ - 1; n >= 0; n--) {
+ i = SPRINTF((qp, "%x.%x.",
+ uaddr[n] & 0xf,
+ (uaddr[n] >> 4) & 0xf));
+ if (i != 4)
+ abort();
+ qp += i;
+ }
+ if (strlen(q2->qname) + strlen(nibsuff2) + 1 >
+ sizeof q2->qname) {
+ errno = ENAMETOOLONG;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ hp = NULL;
+ goto cleanup;
+ }
+ strcpy(qp, nibsuff2); /* (checked) */
+ }
+ break;
+ default:
+ abort();
+ }
+
+ for (p = q; p; p = p->next) {
+ switch(p->action) {
+ case RESTGT_DOALWAYS:
+ break;
+ case RESTGT_AFTERFAILURE:
+ if (querystate == RESQRY_SUCCESS)
+ continue;
+ break;
+ case RESTGT_IGNORE:
+ continue;
+ }
+
+ if ((n = res_nquery(pvt->res, p->qname, p->qclass, p->qtype,
+ p->answer, p->anslen)) < 0) {
+ querystate = RESQRY_FAIL;
+ continue;
+ }
+
+ memset(&ai, 0, sizeof(ai));
+ ai.ai_family = af;
+ hp = gethostans(this, p->answer, n, p->qname, T_PTR, af, size,
+ NULL, (const struct addrinfo *)&ai);
+ if (!hp) {
+ querystate = RESQRY_FAIL;
+ continue;
+ }
+
+ memcpy(pvt->host_addr, addr, len);
+ pvt->h_addr_ptrs[0] = (char *)pvt->host_addr;
+ pvt->h_addr_ptrs[1] = NULL;
+ if (af == AF_INET && (pvt->res->options & RES_USE_INET6)) {
+ map_v4v6_address((char*)pvt->host_addr,
+ (char*)pvt->host_addr);
+ pvt->host.h_addrtype = AF_INET6;
+ pvt->host.h_length = IN6ADDRSZ;
+ }
+
+ RES_SET_H_ERRNO(pvt->res, NETDB_SUCCESS);
+ goto cleanup; /*%< no more loop is necessary. */
+ }
+ hp = NULL; /*%< H_ERRNO was set by subroutines */
+ cleanup:
+ if (q != NULL)
+ memput(q, sizeof(*q));
+ if (q2 != NULL)
+ memput(q2, sizeof(*q2));
+ return(hp);
+}
+
+static struct hostent *
+ho_next(struct irs_ho *this) {
+
+ UNUSED(this);
+
+ return (NULL);
+}
+
+static void
+ho_rewind(struct irs_ho *this) {
+
+ UNUSED(this);
+
+ /* NOOP */
+}
+
+static void
+ho_minimize(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->res)
+ res_nclose(pvt->res);
+}
+
+static struct __res_state *
+ho_res_get(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (!pvt->res) {
+ struct __res_state *res;
+ res = (struct __res_state *)malloc(sizeof *res);
+ if (!res) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(res, 0, sizeof *res);
+ ho_res_set(this, res, free);
+ }
+
+ return (pvt->res);
+}
+
+/* XXX */
+extern struct addrinfo *addr2addrinfo __P((const struct addrinfo *,
+ const char *));
+
+static struct addrinfo *
+ho_addrinfo(struct irs_ho *this, const char *name, const struct addrinfo *pai)
+{
+ struct pvt *pvt = (struct pvt *)this->private;
+ int n;
+ char tmp[NS_MAXDNAME];
+ const char *cp;
+ struct dns_res_target *q, *q2, *p;
+ struct addrinfo sentinel, *cur;
+ int querystate = RESQRY_FAIL;
+
+ if (init(this) == -1)
+ return (NULL);
+
+ memset(&sentinel, 0, sizeof(sentinel));
+ cur = &sentinel;
+
+ q = memget(sizeof(*q));
+ q2 = memget(sizeof(*q2));
+ if (q == NULL || q2 == NULL) {
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ errno = ENOMEM;
+ goto cleanup;
+ }
+ memset(q, 0, sizeof(*q2));
+ memset(q2, 0, sizeof(*q2));
+
+ switch (pai->ai_family) {
+ case AF_UNSPEC:
+ /* prefer IPv6 */
+ q->qclass = C_IN;
+ q->qtype = T_AAAA;
+ q->answer = q->qbuf.buf;
+ q->anslen = sizeof(q->qbuf);
+ q->next = q2;
+ q->action = RESTGT_DOALWAYS;
+ q2->qclass = C_IN;
+ q2->qtype = T_A;
+ q2->answer = q2->qbuf.buf;
+ q2->anslen = sizeof(q2->qbuf);
+ q2->action = RESTGT_DOALWAYS;
+ break;
+ case AF_INET:
+ q->qclass = C_IN;
+ q->qtype = T_A;
+ q->answer = q->qbuf.buf;
+ q->anslen = sizeof(q->qbuf);
+ q->action = RESTGT_DOALWAYS;
+ break;
+ case AF_INET6:
+ q->qclass = C_IN;
+ q->qtype = T_AAAA;
+ q->answer = q->qbuf.buf;
+ q->anslen = sizeof(q->qbuf);
+ q->action = RESTGT_DOALWAYS;
+ break;
+ default:
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY); /*%< better error? */
+ goto cleanup;
+ }
+
+ /*
+ * if there aren't any dots, it could be a user-level alias.
+ * this is also done in res_nquery() since we are not the only
+ * function that looks up host names.
+ */
+ if (!strchr(name, '.') && (cp = res_hostalias(pvt->res, name,
+ tmp, sizeof tmp)))
+ name = cp;
+
+ for (p = q; p; p = p->next) {
+ struct addrinfo *ai;
+
+ switch(p->action) {
+ case RESTGT_DOALWAYS:
+ break;
+ case RESTGT_AFTERFAILURE:
+ if (querystate == RESQRY_SUCCESS)
+ continue;
+ break;
+ case RESTGT_IGNORE:
+ continue;
+ }
+
+ if ((n = res_nsearch(pvt->res, name, p->qclass, p->qtype,
+ p->answer, p->anslen)) < 0) {
+ querystate = RESQRY_FAIL;
+ continue;
+ }
+ (void)gethostans(this, p->answer, n, name, p->qtype,
+ pai->ai_family, /*%< XXX: meaningless */
+ 0, &ai, pai);
+ if (ai) {
+ querystate = RESQRY_SUCCESS;
+ cur->ai_next = ai;
+ while (cur->ai_next)
+ cur = cur->ai_next;
+ } else
+ querystate = RESQRY_FAIL;
+ }
+
+ cleanup:
+ if (q != NULL)
+ memput(q, sizeof(*q));
+ if (q2 != NULL)
+ memput(q2, sizeof(*q2));
+ return(sentinel.ai_next);
+}
+
+static void
+ho_res_set(struct irs_ho *this, struct __res_state *res,
+ void (*free_res)(void *)) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->res && pvt->free_res) {
+ res_nclose(pvt->res);
+ (*pvt->free_res)(pvt->res);
+ }
+
+ pvt->res = res;
+ pvt->free_res = free_res;
+}
+
+/* Private. */
+
+static struct hostent *
+gethostans(struct irs_ho *this,
+ const u_char *ansbuf, int anslen, const char *qname, int qtype,
+ int af, int size, /*!< meaningless for addrinfo cases */
+ struct addrinfo **ret_aip, const struct addrinfo *pai)
+{
+ struct pvt *pvt = (struct pvt *)this->private;
+ int type, class, ancount, qdcount, n, haveanswer, had_error;
+ int error = NETDB_SUCCESS;
+ int (*name_ok)(const char *);
+ const HEADER *hp;
+ const u_char *eom;
+ const u_char *eor;
+ const u_char *cp;
+ const char *tname;
+ const char *hname;
+ char *bp, *ep, **ap, **hap;
+ char tbuf[MAXDNAME+1];
+ struct addrinfo sentinel, *cur, ai;
+
+ if (pai == NULL) abort();
+ if (ret_aip != NULL)
+ *ret_aip = NULL;
+ memset(&sentinel, 0, sizeof(sentinel));
+ cur = &sentinel;
+
+ tname = qname;
+ eom = ansbuf + anslen;
+ switch (qtype) {
+ case T_A:
+ case T_AAAA:
+ case T_ANY: /*%< use T_ANY only for T_A/T_AAAA lookup */
+ name_ok = res_hnok;
+ break;
+ case T_PTR:
+ name_ok = res_dnok;
+ break;
+ default:
+ abort();
+ }
+
+ pvt->host.h_addrtype = af;
+ pvt->host.h_length = size;
+ hname = pvt->host.h_name = NULL;
+
+ /*
+ * Find first satisfactory answer.
+ */
+ if (ansbuf + HFIXEDSZ > eom) {
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY);
+ return (NULL);
+ }
+ hp = (const HEADER *)ansbuf;
+ ancount = ntohs(hp->ancount);
+ qdcount = ntohs(hp->qdcount);
+ bp = pvt->hostbuf;
+ ep = pvt->hostbuf + sizeof(pvt->hostbuf);
+ cp = ansbuf + HFIXEDSZ;
+ if (qdcount != 1) {
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY);
+ return (NULL);
+ }
+ n = dn_expand(ansbuf, eom, cp, bp, ep - bp);
+ if (n < 0 || !maybe_ok(pvt->res, bp, name_ok)) {
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY);
+ return (NULL);
+ }
+ cp += n + QFIXEDSZ;
+ if (cp > eom) {
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY);
+ return (NULL);
+ }
+ if (qtype == T_A || qtype == T_AAAA || qtype == T_ANY) {
+ /* res_nsend() has already verified that the query name is the
+ * same as the one we sent; this just gets the expanded name
+ * (i.e., with the succeeding search-domain tacked on).
+ */
+ n = strlen(bp) + 1; /*%< for the \\0 */
+ if (n > MAXHOSTNAMELEN) {
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY);
+ return (NULL);
+ }
+ pvt->host.h_name = bp;
+ hname = bp;
+ bp += n;
+ /* The qname can be abbreviated, but hname is now absolute. */
+ qname = pvt->host.h_name;
+ }
+ ap = pvt->host_aliases;
+ *ap = NULL;
+ pvt->host.h_aliases = pvt->host_aliases;
+ hap = pvt->h_addr_ptrs;
+ *hap = NULL;
+ pvt->host.h_addr_list = pvt->h_addr_ptrs;
+ haveanswer = 0;
+ had_error = 0;
+ while (ancount-- > 0 && cp < eom && !had_error) {
+ n = dn_expand(ansbuf, eom, cp, bp, ep - bp);
+ if (n < 0 || !maybe_ok(pvt->res, bp, name_ok)) {
+ had_error++;
+ continue;
+ }
+ cp += n; /*%< name */
+ BOUNDS_CHECK(cp, 3 * INT16SZ + INT32SZ);
+ type = ns_get16(cp);
+ cp += INT16SZ; /*%< type */
+ class = ns_get16(cp);
+ cp += INT16SZ + INT32SZ; /*%< class, TTL */
+ n = ns_get16(cp);
+ cp += INT16SZ; /*%< len */
+ BOUNDS_CHECK(cp, n);
+ if (class != C_IN) {
+ cp += n;
+ continue;
+ }
+ eor = cp + n;
+ if ((qtype == T_A || qtype == T_AAAA || qtype == T_ANY) &&
+ type == T_CNAME) {
+ if (haveanswer) {
+ int level = LOG_CRIT;
+#ifdef LOG_SECURITY
+ level |= LOG_SECURITY;
+#endif
+ syslog(level,
+ "gethostans: possible attempt to exploit buffer overflow while looking up %s",
+ *qname ? qname : ".");
+ }
+ n = dn_expand(ansbuf, eor, cp, tbuf, sizeof tbuf);
+ if (n < 0 || !maybe_ok(pvt->res, tbuf, name_ok)) {
+ had_error++;
+ continue;
+ }
+ cp += n;
+ /* Store alias. */
+ if (ap >= &pvt->host_aliases[MAXALIASES-1])
+ continue;
+ *ap++ = bp;
+ n = strlen(bp) + 1; /*%< for the \\0 */
+ bp += n;
+ /* Get canonical name. */
+ n = strlen(tbuf) + 1; /*%< for the \\0 */
+ if (n > (ep - bp) || n > MAXHOSTNAMELEN) {
+ had_error++;
+ continue;
+ }
+ strcpy(bp, tbuf); /* (checked) */
+ pvt->host.h_name = bp;
+ hname = bp;
+ bp += n;
+ continue;
+ }
+ if (qtype == T_PTR && type == T_CNAME) {
+ n = dn_expand(ansbuf, eor, cp, tbuf, sizeof tbuf);
+ if (n < 0 || !maybe_dnok(pvt->res, tbuf)) {
+ had_error++;
+ continue;
+ }
+ cp += n;
+#ifdef RES_USE_DNAME
+ if ((pvt->res->options & RES_USE_DNAME) != 0U)
+#endif
+ {
+ /*
+ * We may be able to check this regardless
+ * of the USE_DNAME bit, but we add the check
+ * for now since the DNAME support is
+ * experimental.
+ */
+ if (ns_samename(tname, bp) != 1)
+ continue;
+ }
+ /* Get canonical name. */
+ n = strlen(tbuf) + 1; /*%< for the \\0 */
+ if (n > (ep - bp)) {
+ had_error++;
+ continue;
+ }
+ strcpy(bp, tbuf); /* (checked) */
+ tname = bp;
+ bp += n;
+ continue;
+ }
+ if (qtype == T_ANY) {
+ if (!(type == T_A || type == T_AAAA)) {
+ cp += n;
+ continue;
+ }
+ } else if (type != qtype) {
+ cp += n;
+ continue;
+ }
+ switch (type) {
+ case T_PTR:
+ if (ret_aip != NULL) {
+ /* addrinfo never needs T_PTR */
+ cp += n;
+ continue;
+ }
+ if (ns_samename(tname, bp) != 1) {
+ cp += n;
+ continue;
+ }
+ n = dn_expand(ansbuf, eor, cp, bp, ep - bp);
+ if (n < 0 || !maybe_hnok(pvt->res, bp) ||
+ n >= MAXHOSTNAMELEN) {
+ had_error++;
+ break;
+ }
+ cp += n;
+ if (!haveanswer) {
+ pvt->host.h_name = bp;
+ hname = bp;
+ }
+ else if (ap < &pvt->host_aliases[MAXALIASES-1])
+ *ap++ = bp;
+ else
+ n = -1;
+ if (n != -1) {
+ n = strlen(bp) + 1; /*%< for the \\0 */
+ bp += n;
+ }
+ break;
+ case T_A:
+ case T_AAAA:
+ if (ns_samename(hname, bp) != 1) {
+ cp += n;
+ continue;
+ }
+ if (type == T_A && n != INADDRSZ) {
+ cp += n;
+ continue;
+ }
+ if (type == T_AAAA && n != IN6ADDRSZ) {
+ cp += n;
+ continue;
+ }
+
+ /* make addrinfo. don't overwrite constant PAI */
+ ai = *pai;
+ ai.ai_family = (type == T_AAAA) ? AF_INET6 : AF_INET;
+ cur->ai_next = addr2addrinfo(
+ (const struct addrinfo *)&ai,
+ (const char *)cp);
+ if (cur->ai_next == NULL)
+ had_error++;
+
+ if (!haveanswer) {
+ int nn;
+
+ nn = strlen(bp) + 1; /*%< for the \\0 */
+ if (nn >= MAXHOSTNAMELEN) {
+ cp += n;
+ had_error++;
+ continue;
+ }
+ pvt->host.h_name = bp;
+ hname = bp;
+ bp += nn;
+ }
+ /* Ensure alignment. */
+ bp = (char *)(((u_long)bp + (sizeof(align) - 1)) &
+ ~(sizeof(align) - 1));
+ /* Avoid overflows. */
+ if (bp + n > &pvt->hostbuf[sizeof(pvt->hostbuf) - 1]) {
+ had_error++;
+ continue;
+ }
+ if (ret_aip) { /*%< need addrinfo. keep it. */
+ while (cur->ai_next)
+ cur = cur->ai_next;
+ } else if (cur->ai_next) { /*%< need hostent */
+ struct addrinfo *aip = cur->ai_next;
+
+ for (aip = cur->ai_next; aip;
+ aip = aip->ai_next) {
+ int m;
+
+ m = add_hostent(pvt, bp, hap, aip);
+ if (m < 0) {
+ had_error++;
+ break;
+ }
+ if (m == 0)
+ continue;
+ if (hap < &pvt->h_addr_ptrs[MAXADDRS])
+ hap++;
+ *hap = NULL;
+ bp += m;
+ }
+
+ freeaddrinfo(cur->ai_next);
+ cur->ai_next = NULL;
+ }
+ cp += n;
+ break;
+ default:
+ abort();
+ }
+ if (!had_error)
+ haveanswer++;
+ }
+ if (haveanswer) {
+ if (ret_aip == NULL) {
+ *ap = NULL;
+ *hap = NULL;
+
+ if (pvt->res->nsort && hap != pvt->h_addr_ptrs &&
+ qtype == T_A)
+ addrsort(pvt->res, pvt->h_addr_ptrs,
+ hap - pvt->h_addr_ptrs);
+ if (pvt->host.h_name == NULL) {
+ n = strlen(qname) + 1; /*%< for the \\0 */
+ if (n > (ep - bp) || n >= MAXHOSTNAMELEN)
+ goto no_recovery;
+ strcpy(bp, qname); /* (checked) */
+ pvt->host.h_name = bp;
+ bp += n;
+ }
+ if (pvt->res->options & RES_USE_INET6)
+ map_v4v6_hostent(&pvt->host, &bp, ep);
+ RES_SET_H_ERRNO(pvt->res, NETDB_SUCCESS);
+ return (&pvt->host);
+ } else {
+ if ((pai->ai_flags & AI_CANONNAME) != 0) {
+ if (pvt->host.h_name == NULL) {
+ sentinel.ai_next->ai_canonname =
+ strdup(qname);
+ }
+ else {
+ sentinel.ai_next->ai_canonname =
+ strdup(pvt->host.h_name);
+ }
+ }
+ *ret_aip = sentinel.ai_next;
+ return(NULL);
+ }
+ }
+ no_recovery:
+ if (sentinel.ai_next) {
+ /* this should be impossible, but check it for safety */
+ freeaddrinfo(sentinel.ai_next);
+ }
+ if (error == NETDB_SUCCESS)
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY);
+ else
+ RES_SET_H_ERRNO(pvt->res, error);
+ return(NULL);
+}
+
+static int
+add_hostent(struct pvt *pvt, char *bp, char **hap, struct addrinfo *ai)
+{
+ int addrlen;
+ char *addrp;
+ const char **tap;
+ char *obp = bp;
+
+ switch(ai->ai_addr->sa_family) {
+ case AF_INET6:
+ addrlen = IN6ADDRSZ;
+ addrp = (char *)&((struct sockaddr_in6 *)ai->ai_addr)->sin6_addr;
+ break;
+ case AF_INET:
+ addrlen = INADDRSZ;
+ addrp = (char *)&((struct sockaddr_in *)ai->ai_addr)->sin_addr;
+ break;
+ default:
+ return(-1); /*%< abort? */
+ }
+
+ /* Ensure alignment. */
+ bp = (char *)(((u_long)bp + (sizeof(align) - 1)) &
+ ~(sizeof(align) - 1));
+ /* Avoid overflows. */
+ if (bp + addrlen > &pvt->hostbuf[sizeof(pvt->hostbuf) - 1])
+ return(-1);
+ if (hap >= &pvt->h_addr_ptrs[MAXADDRS])
+ return(0); /*%< fail, but not treat it as an error. */
+ /* Suppress duplicates. */
+ for (tap = (const char **)pvt->h_addr_ptrs;
+ *tap != NULL;
+ tap++)
+ if (memcmp(*tap, addrp, addrlen) == 0)
+ break;
+ if (*tap != NULL)
+ return (0);
+
+ memcpy(*hap = bp, addrp, addrlen);
+ return((bp + addrlen) - obp);
+}
+
+static void
+map_v4v6_hostent(struct hostent *hp, char **bpp, char *ep) {
+ char **ap;
+
+ if (hp->h_addrtype != AF_INET || hp->h_length != INADDRSZ)
+ return;
+ hp->h_addrtype = AF_INET6;
+ hp->h_length = IN6ADDRSZ;
+ for (ap = hp->h_addr_list; *ap; ap++) {
+ int i = (u_long)*bpp % sizeof(align);
+
+ if (i != 0)
+ i = sizeof(align) - i;
+
+ if ((ep - *bpp) < (i + IN6ADDRSZ)) {
+ /* Out of memory. Truncate address list here. */
+ *ap = NULL;
+ return;
+ }
+ *bpp += i;
+ map_v4v6_address(*ap, *bpp);
+ *ap = *bpp;
+ *bpp += IN6ADDRSZ;
+ }
+}
+
+static void
+addrsort(res_state statp, char **ap, int num) {
+ int i, j, needsort = 0, aval[MAXADDRS];
+ char **p;
+
+ p = ap;
+ for (i = 0; i < num; i++, p++) {
+ for (j = 0 ; (unsigned)j < statp->nsort; j++)
+ if (statp->sort_list[j].addr.s_addr ==
+ (((struct in_addr *)(*p))->s_addr &
+ statp->sort_list[j].mask))
+ break;
+ aval[i] = j;
+ if (needsort == 0 && i > 0 && j < aval[i-1])
+ needsort = i;
+ }
+ if (!needsort)
+ return;
+
+ while (needsort < num) {
+ for (j = needsort - 1; j >= 0; j--) {
+ if (aval[j] > aval[j+1]) {
+ char *hp;
+
+ i = aval[j];
+ aval[j] = aval[j+1];
+ aval[j+1] = i;
+
+ hp = ap[j];
+ ap[j] = ap[j+1];
+ ap[j+1] = hp;
+
+ } else
+ break;
+ }
+ needsort++;
+ }
+}
+
+static int
+init(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (!pvt->res && !ho_res_get(this))
+ return (-1);
+ if (((pvt->res->options & RES_INIT) == 0U) &&
+ res_ninit(pvt->res) == -1)
+ return (-1);
+ return (0);
+}
diff --git a/usr/src/lib/libresolv2_joy/common/irs/dns_nw.c b/usr/src/lib/libresolv2_joy/common/irs/dns_nw.c
new file mode 100644
index 0000000000..e9acfd39c7
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/dns_nw.c
@@ -0,0 +1,591 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: dns_nw.c,v 1.12 2005/04/27 04:56:22 sra Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/* Imports. */
+
+#include "port_before.h"
+
+#include <sys/param.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <isc/memcluster.h>
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "dns_p.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) sprintf x
+#endif
+
+/* Definitions. */
+
+#define MAXALIASES 35
+
+#define MAXPACKET (64*1024)
+
+struct pvt {
+ struct nwent net;
+ char * ali[MAXALIASES];
+ char buf[BUFSIZ+1];
+ struct __res_state * res;
+ void (*free_res)(void *);
+};
+
+typedef union {
+ long al;
+ char ac;
+} align;
+
+enum by_what { by_addr, by_name };
+
+/* Forwards. */
+
+static void nw_close(struct irs_nw *);
+static struct nwent * nw_byname(struct irs_nw *, const char *, int);
+static struct nwent * nw_byaddr(struct irs_nw *, void *, int, int);
+static struct nwent * nw_next(struct irs_nw *);
+static void nw_rewind(struct irs_nw *);
+static void nw_minimize(struct irs_nw *);
+static struct __res_state * nw_res_get(struct irs_nw *this);
+static void nw_res_set(struct irs_nw *this,
+ struct __res_state *res,
+ void (*free_res)(void *));
+
+static struct nwent * get1101byaddr(struct irs_nw *, u_char *, int);
+static struct nwent * get1101byname(struct irs_nw *, const char *);
+static struct nwent * get1101answer(struct irs_nw *,
+ u_char *ansbuf, int anslen,
+ enum by_what by_what,
+ int af, const char *name,
+ const u_char *addr, int addrlen);
+static struct nwent * get1101mask(struct irs_nw *this, struct nwent *);
+static int make1101inaddr(const u_char *, int, char *, int);
+static void normalize_name(char *name);
+static int init(struct irs_nw *this);
+
+/* Exports. */
+
+struct irs_nw *
+irs_dns_nw(struct irs_acc *this) {
+ struct irs_nw *nw;
+ struct pvt *pvt;
+
+ UNUSED(this);
+
+ if (!(pvt = memget(sizeof *pvt))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ if (!(nw = memget(sizeof *nw))) {
+ memput(pvt, sizeof *pvt);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(nw, 0x5e, sizeof *nw);
+ nw->private = pvt;
+ nw->close = nw_close;
+ nw->byname = nw_byname;
+ nw->byaddr = nw_byaddr;
+ nw->next = nw_next;
+ nw->rewind = nw_rewind;
+ nw->minimize = nw_minimize;
+ nw->res_get = nw_res_get;
+ nw->res_set = nw_res_set;
+ return (nw);
+}
+
+/* Methods. */
+
+static void
+nw_close(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ nw_minimize(this);
+
+ if (pvt->res && pvt->free_res)
+ (*pvt->free_res)(pvt->res);
+
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+static struct nwent *
+nw_byname(struct irs_nw *this, const char *name, int af) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (init(this) == -1)
+ return (NULL);
+
+ switch (af) {
+ case AF_INET:
+ return (get1101byname(this, name));
+ default:
+ (void)NULL;
+ }
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ errno = EAFNOSUPPORT;
+ return (NULL);
+}
+
+static struct nwent *
+nw_byaddr(struct irs_nw *this, void *net, int len, int af) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (init(this) == -1)
+ return (NULL);
+
+ switch (af) {
+ case AF_INET:
+ return (get1101byaddr(this, net, len));
+ default:
+ (void)NULL;
+ }
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ errno = EAFNOSUPPORT;
+ return (NULL);
+}
+
+static struct nwent *
+nw_next(struct irs_nw *this) {
+
+ UNUSED(this);
+
+ return (NULL);
+}
+
+static void
+nw_rewind(struct irs_nw *this) {
+ UNUSED(this);
+ /* NOOP */
+}
+
+static void
+nw_minimize(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->res)
+ res_nclose(pvt->res);
+}
+
+static struct __res_state *
+nw_res_get(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (!pvt->res) {
+ struct __res_state *res;
+ res = (struct __res_state *)malloc(sizeof *res);
+ if (!res) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(res, 0, sizeof *res);
+ nw_res_set(this, res, free);
+ }
+
+ return (pvt->res);
+}
+
+static void
+nw_res_set(struct irs_nw *this, struct __res_state *res,
+ void (*free_res)(void *)) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->res && pvt->free_res) {
+ res_nclose(pvt->res);
+ (*pvt->free_res)(pvt->res);
+ }
+
+ pvt->res = res;
+ pvt->free_res = free_res;
+}
+
+/* Private. */
+
+static struct nwent *
+get1101byname(struct irs_nw *this, const char *name) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ u_char *ansbuf;
+ int anslen;
+ struct nwent *result;
+
+ ansbuf = memget(MAXPACKET);
+ if (ansbuf == NULL) {
+ errno = ENOMEM;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+ anslen = res_nsearch(pvt->res, name, C_IN, T_PTR, ansbuf, MAXPACKET);
+ if (anslen < 0) {
+ memput(ansbuf, MAXPACKET);
+ return (NULL);
+ }
+ result = get1101mask(this, get1101answer(this, ansbuf, anslen, by_name,
+ AF_INET, name, NULL, 0));
+ memput(ansbuf, MAXPACKET);
+ return (result);
+}
+
+static struct nwent *
+get1101byaddr(struct irs_nw *this, u_char *net, int len) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ char qbuf[sizeof "255.255.255.255.in-addr.arpa"];
+ struct nwent *result;
+ u_char *ansbuf;
+ int anslen;
+
+ if (len < 1 || len > 32) {
+ errno = EINVAL;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+ if (make1101inaddr(net, len, qbuf, sizeof qbuf) < 0)
+ return (NULL);
+ ansbuf = memget(MAXPACKET);
+ if (ansbuf == NULL) {
+ errno = ENOMEM;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+ anslen = res_nquery(pvt->res, qbuf, C_IN, T_PTR, ansbuf, MAXPACKET);
+ if (anslen < 0) {
+ memput(ansbuf, MAXPACKET);
+ return (NULL);
+ }
+ result = get1101mask(this, get1101answer(this, ansbuf, anslen, by_addr,
+ AF_INET, NULL, net, len));
+ memput(ansbuf, MAXPACKET);
+ return (result);
+}
+
+static struct nwent *
+get1101answer(struct irs_nw *this,
+ u_char *ansbuf, int anslen, enum by_what by_what,
+ int af, const char *name, const u_char *addr, int addrlen)
+{
+ struct pvt *pvt = (struct pvt *)this->private;
+ int type, class, ancount, qdcount, haveanswer;
+ char *bp, *ep, **ap;
+ u_char *cp, *eom;
+ HEADER *hp;
+
+ /* Initialize, and parse header. */
+ eom = ansbuf + anslen;
+ if (ansbuf + HFIXEDSZ > eom) {
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY);
+ return (NULL);
+ }
+ hp = (HEADER *)ansbuf;
+ cp = ansbuf + HFIXEDSZ;
+ qdcount = ntohs(hp->qdcount);
+ while (qdcount-- > 0) {
+ int n = dn_skipname(cp, eom);
+ cp += n + QFIXEDSZ;
+ if (n < 0 || cp > eom) {
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY);
+ return (NULL);
+ }
+ }
+ ancount = ntohs(hp->ancount);
+ if (!ancount) {
+ if (hp->aa)
+ RES_SET_H_ERRNO(pvt->res, HOST_NOT_FOUND);
+ else
+ RES_SET_H_ERRNO(pvt->res, TRY_AGAIN);
+ return (NULL);
+ }
+
+ /* Prepare a return structure. */
+ bp = pvt->buf;
+ ep = pvt->buf + sizeof(pvt->buf);
+ pvt->net.n_name = NULL;
+ pvt->net.n_aliases = pvt->ali;
+ pvt->net.n_addrtype = af;
+ pvt->net.n_addr = NULL;
+ pvt->net.n_length = addrlen;
+
+ /* Save input key if given. */
+ switch (by_what) {
+ case by_name:
+ if (name != NULL) {
+ int n = strlen(name) + 1;
+
+ if (n > (ep - bp)) {
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY);
+ return (NULL);
+ }
+ pvt->net.n_name = strcpy(bp, name); /* (checked) */
+ bp += n;
+ }
+ break;
+ case by_addr:
+ if (addr != NULL && addrlen != 0) {
+ int n = addrlen / 8 + ((addrlen % 8) != 0);
+
+ if (INADDRSZ > (ep - bp)) {
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY);
+ return (NULL);
+ }
+ memset(bp, 0, INADDRSZ);
+ memcpy(bp, addr, n);
+ pvt->net.n_addr = bp;
+ bp += INADDRSZ;
+ }
+ break;
+ default:
+ abort();
+ }
+
+ /* Parse the answer, collect aliases. */
+ ap = pvt->ali;
+ haveanswer = 0;
+ while (--ancount >= 0 && cp < eom) {
+ int n = dn_expand(ansbuf, eom, cp, bp, ep - bp);
+
+ cp += n; /*%< Owner */
+ if (n < 0 || !maybe_dnok(pvt->res, bp) ||
+ cp + 3 * INT16SZ + INT32SZ > eom) {
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY);
+ return (NULL);
+ }
+ GETSHORT(type, cp); /*%< Type */
+ GETSHORT(class, cp); /*%< Class */
+ cp += INT32SZ; /*%< TTL */
+ GETSHORT(n, cp); /*%< RDLENGTH */
+ if (class == C_IN && type == T_PTR) {
+ int nn;
+
+ nn = dn_expand(ansbuf, eom, cp, bp, ep - bp);
+ if (nn < 0 || !maybe_hnok(pvt->res, bp) || nn != n) {
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY);
+ return (NULL);
+ }
+ normalize_name(bp);
+ switch (by_what) {
+ case by_addr: {
+ if (pvt->net.n_name == NULL)
+ pvt->net.n_name = bp;
+ else if (ns_samename(pvt->net.n_name, bp) == 1)
+ break;
+ else
+ *ap++ = bp;
+ nn = strlen(bp) + 1;
+ bp += nn;
+ haveanswer++;
+ break;
+ }
+ case by_name: {
+ u_int b1, b2, b3, b4;
+
+ if (pvt->net.n_addr != NULL ||
+ sscanf(bp, "%u.%u.%u.%u.in-addr.arpa",
+ &b1, &b2, &b3, &b4) != 4)
+ break;
+ if ((ep - bp) < INADDRSZ) {
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY);
+ return (NULL);
+ }
+ pvt->net.n_addr = bp;
+ *bp++ = b4;
+ *bp++ = b3;
+ *bp++ = b2;
+ *bp++ = b1;
+ pvt->net.n_length = INADDRSZ * 8;
+ haveanswer++;
+ }
+ }
+ }
+ cp += n; /*%< RDATA */
+ }
+ if (!haveanswer) {
+ RES_SET_H_ERRNO(pvt->res, TRY_AGAIN);
+ return (NULL);
+ }
+ *ap = NULL;
+
+ return (&pvt->net);
+}
+
+static struct nwent *
+get1101mask(struct irs_nw *this, struct nwent *nwent) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ char qbuf[sizeof "255.255.255.255.in-addr.arpa"], owner[MAXDNAME];
+ int anslen, type, class, ancount, qdcount;
+ u_char *ansbuf, *cp, *eom;
+ HEADER *hp;
+
+ if (!nwent)
+ return (NULL);
+ if (make1101inaddr(nwent->n_addr, nwent->n_length, qbuf, sizeof qbuf)
+ < 0) {
+ /* "First, do no harm." */
+ return (nwent);
+ }
+
+ ansbuf = memget(MAXPACKET);
+ if (ansbuf == NULL) {
+ errno = ENOMEM;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+ /* Query for the A RR that would hold this network's mask. */
+ anslen = res_nquery(pvt->res, qbuf, C_IN, T_A, ansbuf, MAXPACKET);
+ if (anslen < HFIXEDSZ) {
+ memput(ansbuf, MAXPACKET);
+ return (nwent);
+ }
+
+ /* Initialize, and parse header. */
+ hp = (HEADER *)ansbuf;
+ cp = ansbuf + HFIXEDSZ;
+ eom = ansbuf + anslen;
+ qdcount = ntohs(hp->qdcount);
+ while (qdcount-- > 0) {
+ int n = dn_skipname(cp, eom);
+ cp += n + QFIXEDSZ;
+ if (n < 0 || cp > eom) {
+ memput(ansbuf, MAXPACKET);
+ return (nwent);
+ }
+ }
+ ancount = ntohs(hp->ancount);
+
+ /* Parse the answer, collect aliases. */
+ while (--ancount >= 0 && cp < eom) {
+ int n = dn_expand(ansbuf, eom, cp, owner, sizeof owner);
+
+ if (n < 0 || !maybe_dnok(pvt->res, owner))
+ break;
+ cp += n; /*%< Owner */
+ if (cp + 3 * INT16SZ + INT32SZ > eom)
+ break;
+ GETSHORT(type, cp); /*%< Type */
+ GETSHORT(class, cp); /*%< Class */
+ cp += INT32SZ; /*%< TTL */
+ GETSHORT(n, cp); /*%< RDLENGTH */
+ if (cp + n > eom)
+ break;
+ if (n == INADDRSZ && class == C_IN && type == T_A &&
+ ns_samename(qbuf, owner) == 1) {
+ /* This A RR indicates the actual netmask. */
+ int nn, mm;
+
+ nwent->n_length = 0;
+ for (nn = 0; nn < INADDRSZ; nn++)
+ for (mm = 7; mm >= 0; mm--)
+ if (cp[nn] & (1 << mm))
+ nwent->n_length++;
+ else
+ break;
+ }
+ cp += n; /*%< RDATA */
+ }
+ memput(ansbuf, MAXPACKET);
+ return (nwent);
+}
+
+static int
+make1101inaddr(const u_char *net, int bits, char *name, int size) {
+ int n, m;
+ char *ep;
+
+ ep = name + size;
+
+ /* Zero fill any whole bytes left out of the prefix. */
+ for (n = (32 - bits) / 8; n > 0; n--) {
+ if (ep - name < (int)(sizeof "0."))
+ goto emsgsize;
+ m = SPRINTF((name, "0."));
+ name += m;
+ }
+
+ /* Format the partial byte, if any, within the prefix. */
+ if ((n = bits % 8) != 0) {
+ if (ep - name < (int)(sizeof "255."))
+ goto emsgsize;
+ m = SPRINTF((name, "%u.",
+ net[bits / 8] & ~((1 << (8 - n)) - 1)));
+ name += m;
+ }
+
+ /* Format the whole bytes within the prefix. */
+ for (n = bits / 8; n > 0; n--) {
+ if (ep - name < (int)(sizeof "255."))
+ goto emsgsize;
+ m = SPRINTF((name, "%u.", net[n - 1]));
+ name += m;
+ }
+
+ /* Add the static text. */
+ if (ep - name < (int)(sizeof "in-addr.arpa"))
+ goto emsgsize;
+ (void) SPRINTF((name, "in-addr.arpa"));
+ return (0);
+
+ emsgsize:
+ errno = EMSGSIZE;
+ return (-1);
+}
+
+static void
+normalize_name(char *name) {
+ char *t;
+
+ /* Make lower case. */
+ for (t = name; *t; t++)
+ if (isascii((unsigned char)*t) && isupper((unsigned char)*t))
+ *t = tolower((*t)&0xff);
+
+ /* Remove trailing dots. */
+ while (t > name && t[-1] == '.')
+ *--t = '\0';
+}
+
+static int
+init(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (!pvt->res && !nw_res_get(this))
+ return (-1);
+ if (((pvt->res->options & RES_INIT) == 0U) &&
+ res_ninit(pvt->res) == -1)
+ return (-1);
+ return (0);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/dns_p.h b/usr/src/lib/libresolv2_joy/common/irs/dns_p.h
new file mode 100644
index 0000000000..d85ae2a238
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/dns_p.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: dns_p.h,v 1.4 2005/04/27 04:56:22 sra Exp $
+ */
+
+#ifndef _DNS_P_H_INCLUDED
+#define _DNS_P_H_INCLUDED
+
+#define maybe_ok(res, nm, ok) (((res)->options & RES_NOCHECKNAME) != 0U || \
+ (ok)(nm) != 0)
+#define maybe_hnok(res, hn) maybe_ok((res), (hn), res_hnok)
+#define maybe_dnok(res, dn) maybe_ok((res), (dn), res_dnok)
+
+/*%
+ * Object state.
+ */
+struct dns_p {
+ void *hes_ctx;
+ struct __res_state *res;
+ void (*free_res) __P((void *));
+};
+
+/*
+ * Methods.
+ */
+
+extern struct irs_gr * irs_dns_gr __P((struct irs_acc *));
+extern struct irs_pw * irs_dns_pw __P((struct irs_acc *));
+extern struct irs_sv * irs_dns_sv __P((struct irs_acc *));
+extern struct irs_pr * irs_dns_pr __P((struct irs_acc *));
+extern struct irs_ho * irs_dns_ho __P((struct irs_acc *));
+extern struct irs_nw * irs_dns_nw __P((struct irs_acc *));
+
+#endif /*_DNS_P_H_INCLUDED*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/dns_pr.c b/usr/src/lib/libresolv2_joy/common/irs/dns_pr.c
new file mode 100644
index 0000000000..c281be432c
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/dns_pr.c
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: dns_pr.c,v 1.5 2005/04/27 04:56:22 sra Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <netdb.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include <isc/memcluster.h>
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "hesiod.h"
+#include "dns_p.h"
+
+/* Types. */
+
+struct pvt {
+ struct dns_p * dns;
+ struct protoent proto;
+ char * prbuf;
+};
+
+/* Forward. */
+
+static void pr_close(struct irs_pr *);
+static struct protoent * pr_byname(struct irs_pr *, const char *);
+static struct protoent * pr_bynumber(struct irs_pr *, int);
+static struct protoent * pr_next(struct irs_pr *);
+static void pr_rewind(struct irs_pr *);
+static void pr_minimize(struct irs_pr *);
+static struct __res_state * pr_res_get(struct irs_pr *);
+static void pr_res_set(struct irs_pr *,
+ struct __res_state *,
+ void (*)(void *));
+
+static struct protoent * parse_hes_list(struct irs_pr *, char **);
+
+/* Public. */
+
+struct irs_pr *
+irs_dns_pr(struct irs_acc *this) {
+ struct dns_p *dns = (struct dns_p *)this->private;
+ struct pvt *pvt;
+ struct irs_pr *pr;
+
+ if (!dns->hes_ctx) {
+ errno = ENODEV;
+ return (NULL);
+ }
+ if (!(pvt = memget(sizeof *pvt))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ if (!(pr = memget(sizeof *pr))) {
+ memput(pvt, sizeof *pvt);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pr, 0x5e, sizeof *pr);
+ pvt->dns = dns;
+ pr->private = pvt;
+ pr->byname = pr_byname;
+ pr->bynumber = pr_bynumber;
+ pr->next = pr_next;
+ pr->rewind = pr_rewind;
+ pr->close = pr_close;
+ pr->minimize = pr_minimize;
+ pr->res_get = pr_res_get;
+ pr->res_set = pr_res_set;
+ return (pr);
+}
+
+/* Methods. */
+
+static void
+pr_close(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->proto.p_aliases)
+ free(pvt->proto.p_aliases);
+ if (pvt->prbuf)
+ free(pvt->prbuf);
+
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+static struct protoent *
+pr_byname(struct irs_pr *this, const char *name) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct dns_p *dns = pvt->dns;
+ struct protoent *proto;
+ char **hes_list;
+
+ if (!(hes_list = hesiod_resolve(dns->hes_ctx, name, "protocol")))
+ return (NULL);
+
+ proto = parse_hes_list(this, hes_list);
+ hesiod_free_list(dns->hes_ctx, hes_list);
+ return (proto);
+}
+
+static struct protoent *
+pr_bynumber(struct irs_pr *this, int num) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct dns_p *dns = pvt->dns;
+ struct protoent *proto;
+ char numstr[16];
+ char **hes_list;
+
+ sprintf(numstr, "%d", num);
+ if (!(hes_list = hesiod_resolve(dns->hes_ctx, numstr, "protonum")))
+ return (NULL);
+
+ proto = parse_hes_list(this, hes_list);
+ hesiod_free_list(dns->hes_ctx, hes_list);
+ return (proto);
+}
+
+static struct protoent *
+pr_next(struct irs_pr *this) {
+ UNUSED(this);
+ errno = ENODEV;
+ return (NULL);
+}
+
+static void
+pr_rewind(struct irs_pr *this) {
+ UNUSED(this);
+ /* NOOP */
+}
+
+static void
+pr_minimize(struct irs_pr *this) {
+ UNUSED(this);
+ /* NOOP */
+}
+
+static struct __res_state *
+pr_res_get(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct dns_p *dns = pvt->dns;
+
+ return (__hesiod_res_get(dns->hes_ctx));
+}
+
+static void
+pr_res_set(struct irs_pr *this, struct __res_state * res,
+ void (*free_res)(void *)) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct dns_p *dns = pvt->dns;
+
+ __hesiod_res_set(dns->hes_ctx, res, free_res);
+}
+
+/* Private. */
+
+static struct protoent *
+parse_hes_list(struct irs_pr *this, char **hes_list) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ char *p, *cp, **cpp, **new;
+ int num = 0;
+ int max = 0;
+
+ for (cpp = hes_list; *cpp; cpp++) {
+ cp = *cpp;
+
+ /* Strip away comments, if any. */
+ if ((p = strchr(cp, '#')))
+ *p = 0;
+
+ /* Skip blank lines. */
+ p = cp;
+ while (*p && !isspace((unsigned char)*p))
+ p++;
+ if (!*p)
+ continue;
+
+ /* OK, we've got a live one. Let's parse it for real. */
+ if (pvt->prbuf)
+ free(pvt->prbuf);
+ pvt->prbuf = strdup(cp);
+
+ p = pvt->prbuf;
+ pvt->proto.p_name = p;
+ while (*p && !isspace((unsigned char)*p))
+ p++;
+ if (!*p)
+ continue;
+ *p++ = '\0';
+
+ pvt->proto.p_proto = atoi(p);
+ while (*p && !isspace((unsigned char)*p))
+ p++;
+ if (*p)
+ *p++ = '\0';
+
+ while (*p) {
+ if ((num + 1) >= max || !pvt->proto.p_aliases) {
+ max += 10;
+ new = realloc(pvt->proto.p_aliases,
+ max * sizeof(char *));
+ if (!new) {
+ errno = ENOMEM;
+ goto cleanup;
+ }
+ pvt->proto.p_aliases = new;
+ }
+ pvt->proto.p_aliases[num++] = p;
+ while (*p && !isspace((unsigned char)*p))
+ p++;
+ if (*p)
+ *p++ = '\0';
+ }
+ if (!pvt->proto.p_aliases)
+ pvt->proto.p_aliases = malloc(sizeof(char *));
+ if (!pvt->proto.p_aliases)
+ goto cleanup;
+ pvt->proto.p_aliases[num] = NULL;
+ return (&pvt->proto);
+ }
+
+ cleanup:
+ if (pvt->proto.p_aliases) {
+ free(pvt->proto.p_aliases);
+ pvt->proto.p_aliases = NULL;
+ }
+ if (pvt->prbuf) {
+ free(pvt->prbuf);
+ pvt->prbuf = NULL;
+ }
+ return (NULL);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/dns_sv.c b/usr/src/lib/libresolv2_joy/common/irs/dns_sv.c
new file mode 100644
index 0000000000..1001dc1051
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/dns_sv.c
@@ -0,0 +1,300 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: dns_sv.c,v 1.5 2005/04/27 04:56:23 sra Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <netinet/in.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <netdb.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#include <isc/memcluster.h>
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "hesiod.h"
+#include "dns_p.h"
+
+/* Definitions */
+
+struct pvt {
+ struct dns_p * dns;
+ struct servent serv;
+ char * svbuf;
+ struct __res_state * res;
+ void (*free_res)(void *);
+};
+
+/* Forward. */
+
+static void sv_close(struct irs_sv *);
+static struct servent * sv_byname(struct irs_sv *,
+ const char *, const char *);
+static struct servent * sv_byport(struct irs_sv *, int, const char *);
+static struct servent * sv_next(struct irs_sv *);
+static void sv_rewind(struct irs_sv *);
+static void sv_minimize(struct irs_sv *);
+#ifdef SV_RES_SETGET
+static struct __res_state * sv_res_get(struct irs_sv *);
+static void sv_res_set(struct irs_sv *,
+ struct __res_state *,
+ void (*)(void *));
+#endif
+
+static struct servent * parse_hes_list(struct irs_sv *,
+ char **, const char *);
+
+/* Public */
+
+struct irs_sv *
+irs_dns_sv(struct irs_acc *this) {
+ struct dns_p *dns = (struct dns_p *)this->private;
+ struct irs_sv *sv;
+ struct pvt *pvt;
+
+ if (!dns || !dns->hes_ctx) {
+ errno = ENODEV;
+ return (NULL);
+ }
+ if (!(pvt = memget(sizeof *pvt))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ pvt->dns = dns;
+ if (!(sv = memget(sizeof *sv))) {
+ memput(pvt, sizeof *pvt);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(sv, 0x5e, sizeof *sv);
+ sv->private = pvt;
+ sv->byname = sv_byname;
+ sv->byport = sv_byport;
+ sv->next = sv_next;
+ sv->rewind = sv_rewind;
+ sv->close = sv_close;
+ sv->minimize = sv_minimize;
+#ifdef SV_RES_SETGET
+ sv->res_get = sv_res_get;
+ sv->res_set = sv_res_set;
+#else
+ sv->res_get = NULL; /*%< sv_res_get; */
+ sv->res_set = NULL; /*%< sv_res_set; */
+#endif
+ return (sv);
+}
+
+/* Methods */
+
+static void
+sv_close(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->serv.s_aliases)
+ free(pvt->serv.s_aliases);
+ if (pvt->svbuf)
+ free(pvt->svbuf);
+
+ if (pvt->res && pvt->free_res)
+ (*pvt->free_res)(pvt->res);
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+static struct servent *
+sv_byname(struct irs_sv *this, const char *name, const char *proto) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct dns_p *dns = pvt->dns;
+ struct servent *s;
+ char **hes_list;
+
+ if (!(hes_list = hesiod_resolve(dns->hes_ctx, name, "service")))
+ return (NULL);
+
+ s = parse_hes_list(this, hes_list, proto);
+ hesiod_free_list(dns->hes_ctx, hes_list);
+ return (s);
+}
+
+static struct servent *
+sv_byport(struct irs_sv *this, int port, const char *proto) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct dns_p *dns = pvt->dns;
+ struct servent *s;
+ char portstr[16];
+ char **hes_list;
+
+ sprintf(portstr, "%d", ntohs(port));
+ if (!(hes_list = hesiod_resolve(dns->hes_ctx, portstr, "port")))
+ return (NULL);
+
+ s = parse_hes_list(this, hes_list, proto);
+ hesiod_free_list(dns->hes_ctx, hes_list);
+ return (s);
+}
+
+static struct servent *
+sv_next(struct irs_sv *this) {
+ UNUSED(this);
+ errno = ENODEV;
+ return (NULL);
+}
+
+static void
+sv_rewind(struct irs_sv *this) {
+ UNUSED(this);
+ /* NOOP */
+}
+
+/* Private */
+
+static struct servent *
+parse_hes_list(struct irs_sv *this, char **hes_list, const char *proto) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ char *p, *cp, **cpp, **new;
+ int proto_len;
+ int num = 0;
+ int max = 0;
+
+ for (cpp = hes_list; *cpp; cpp++) {
+ cp = *cpp;
+
+ /* Strip away comments, if any. */
+ if ((p = strchr(cp, '#')))
+ *p = 0;
+
+ /* Check to make sure the protocol matches. */
+ p = cp;
+ while (*p && !isspace((unsigned char)*p))
+ p++;
+ if (!*p)
+ continue;
+ if (proto) {
+ proto_len = strlen(proto);
+ if (strncasecmp(++p, proto, proto_len) != 0)
+ continue;
+ if (p[proto_len] && !isspace(p[proto_len]&0xff))
+ continue;
+ }
+ /* OK, we've got a live one. Let's parse it for real. */
+ if (pvt->svbuf)
+ free(pvt->svbuf);
+ pvt->svbuf = strdup(cp);
+
+ p = pvt->svbuf;
+ pvt->serv.s_name = p;
+ while (*p && !isspace(*p&0xff))
+ p++;
+ if (!*p)
+ continue;
+ *p++ = '\0';
+
+ pvt->serv.s_proto = p;
+ while (*p && !isspace(*p&0xff))
+ p++;
+ if (!*p)
+ continue;
+ *p++ = '\0';
+
+ pvt->serv.s_port = htons((u_short) atoi(p));
+ while (*p && !isspace(*p&0xff))
+ p++;
+ if (*p)
+ *p++ = '\0';
+
+ while (*p) {
+ if ((num + 1) >= max || !pvt->serv.s_aliases) {
+ max += 10;
+ new = realloc(pvt->serv.s_aliases,
+ max * sizeof(char *));
+ if (!new) {
+ errno = ENOMEM;
+ goto cleanup;
+ }
+ pvt->serv.s_aliases = new;
+ }
+ pvt->serv.s_aliases[num++] = p;
+ while (*p && !isspace(*p&0xff))
+ p++;
+ if (*p)
+ *p++ = '\0';
+ }
+ if (!pvt->serv.s_aliases)
+ pvt->serv.s_aliases = malloc(sizeof(char *));
+ if (!pvt->serv.s_aliases)
+ goto cleanup;
+ pvt->serv.s_aliases[num] = NULL;
+ return (&pvt->serv);
+ }
+
+ cleanup:
+ if (pvt->serv.s_aliases) {
+ free(pvt->serv.s_aliases);
+ pvt->serv.s_aliases = NULL;
+ }
+ if (pvt->svbuf) {
+ free(pvt->svbuf);
+ pvt->svbuf = NULL;
+ }
+ return (NULL);
+}
+
+static void
+sv_minimize(struct irs_sv *this) {
+ UNUSED(this);
+ /* NOOP */
+}
+
+#ifdef SV_RES_SETGET
+static struct __res_state *
+sv_res_get(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct dns_p *dns = pvt->dns;
+
+ return (__hesiod_res_get(dns->hes_ctx));
+}
+
+static void
+sv_res_set(struct irs_sv *this, struct __res_state * res,
+ void (*free_res)(void *)) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct dns_p *dns = pvt->dns;
+
+ __hesiod_res_set(dns->hes_ctx, res, free_res);
+}
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/gai_strerror.c b/usr/src/lib/libresolv2_joy/common/irs/gai_strerror.c
new file mode 100644
index 0000000000..9ca1c4bfe1
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/gai_strerror.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 2001 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <port_before.h>
+#include <netdb.h>
+#include <port_after.h>
+
+#ifdef DO_PTHREADS
+#include <pthread.h>
+#include <stdlib.h>
+#endif
+
+static const char *gai_errlist[] = {
+ "no error",
+ "address family not supported for name",/*%< EAI_ADDRFAMILY */
+ "temporary failure", /*%< EAI_AGAIN */
+ "invalid flags", /*%< EAI_BADFLAGS */
+ "permanent failure", /*%< EAI_FAIL */
+ "address family not supported", /*%< EAI_FAMILY */
+ "memory failure", /*%< EAI_MEMORY */
+ "no address", /*%< EAI_NODATA */
+ "unknown name or service", /*%< EAI_NONAME */
+ "service not supported for socktype", /*%< EAI_SERVICE */
+ "socktype not supported", /*%< EAI_SOCKTYPE */
+ "system failure", /*%< EAI_SYSTEM */
+ "bad hints", /*%< EAI_BADHINTS */
+ "bad protocol", /*%< EAI_PROTOCOL */
+ "unknown error" /*%< Must be last. */
+};
+
+static const int gai_nerr = (sizeof(gai_errlist)/sizeof(*gai_errlist));
+
+#define EAI_BUFSIZE 128
+
+const char *
+gai_strerror(int ecode) {
+#ifndef DO_PTHREADS
+ static char buf[EAI_BUFSIZE];
+#else /* DO_PTHREADS */
+#ifndef LIBBIND_MUTEX_INITIALIZER
+#define LIBBIND_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
+#endif
+ static pthread_mutex_t lock = LIBBIND_MUTEX_INITIALIZER;
+ static pthread_key_t key;
+ static int once = 0;
+ char *buf;
+#endif
+
+ if (ecode >= 0 && ecode < (gai_nerr - 1))
+ return (gai_errlist[ecode]);
+
+#ifdef DO_PTHREADS
+ if (!once) {
+ if (pthread_mutex_lock(&lock) != 0)
+ goto unknown;
+ if (!once) {
+ if (pthread_key_create(&key, free) != 0) {
+ (void)pthread_mutex_unlock(&lock);
+ goto unknown;
+ }
+ once = 1;
+ }
+ if (pthread_mutex_unlock(&lock) != 0)
+ goto unknown;
+ }
+
+ buf = pthread_getspecific(key);
+ if (buf == NULL) {
+ buf = malloc(EAI_BUFSIZE);
+ if (buf == NULL)
+ goto unknown;
+ if (pthread_setspecific(key, buf) != 0) {
+ free(buf);
+ goto unknown;
+ }
+ }
+#endif
+ /*
+ * XXX This really should be snprintf(buf, EAI_BUFSIZE, ...).
+ * It is safe until message catalogs are used.
+ */
+ sprintf(buf, "%s: %d", gai_errlist[gai_nerr - 1], ecode);
+ return (buf);
+
+#ifdef DO_PTHREADS
+ unknown:
+ return ("unknown error");
+#endif
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/gen.c b/usr/src/lib/libresolv2_joy/common/irs/gen.c
new file mode 100644
index 0000000000..7da01f5b09
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/gen.c
@@ -0,0 +1,459 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: gen.c,v 1.7 2005/04/27 04:56:23 sra Exp $";
+#endif
+
+/*! \file
+ * \brief
+ * this is the top level dispatcher
+ *
+ * The dispatcher is implemented as an accessor class; it is an
+ * accessor class that calls other accessor classes, as controlled by a
+ * configuration file.
+ *
+ * A big difference between this accessor class and others is that the
+ * map class initializers are NULL, and the map classes are already
+ * filled in with method functions that will do the right thing.
+ */
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <isc/assertions.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#include <isc/memcluster.h>
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "gen_p.h"
+
+#ifdef SUNW_HOSTS_FALLBACK
+extern int __res_no_hosts_fallback(void);
+#endif /* SUNW_HOSTS_FALLBACK */
+
+/* Definitions */
+
+struct nameval {
+ const char * name;
+ int val;
+};
+
+static const struct nameval acc_names[irs_nacc+1] = {
+ { "local", irs_lcl },
+ { "dns", irs_dns },
+ { "nis", irs_nis },
+ { "irp", irs_irp },
+ { NULL, irs_nacc }
+};
+
+typedef struct irs_acc *(*accinit) __P((const char *options));
+
+static const accinit accs[irs_nacc+1] = {
+ irs_lcl_acc,
+ irs_dns_acc,
+#ifdef WANT_IRS_NIS
+ irs_nis_acc,
+#else
+ NULL,
+#endif
+ irs_irp_acc,
+ NULL
+};
+
+static const struct nameval map_names[irs_nmap+1] = {
+ { "group", irs_gr },
+ { "passwd", irs_pw },
+ { "services", irs_sv },
+ { "protocols", irs_pr },
+ { "hosts", irs_ho },
+ { "networks", irs_nw },
+ { "netgroup", irs_ng },
+ { NULL, irs_nmap }
+};
+
+static const struct nameval option_names[] = {
+ { "merge", IRS_MERGE },
+ { "continue", IRS_CONTINUE },
+ { NULL, 0 }
+};
+
+/* Forward */
+
+static void gen_close(struct irs_acc *);
+static struct __res_state * gen_res_get(struct irs_acc *);
+static void gen_res_set(struct irs_acc *, struct __res_state *,
+ void (*)(void *));
+static int find_name(const char *, const struct nameval nv[]);
+static void init_map_rules(struct gen_p *, const char *conf_file);
+static struct irs_rule *release_rule(struct irs_rule *);
+static int add_rule(struct gen_p *,
+ enum irs_map_id, enum irs_acc_id,
+ const char *);
+
+/* Public */
+
+struct irs_acc *
+irs_gen_acc(const char *options, const char *conf_file) {
+ struct irs_acc *acc;
+ struct gen_p *irs;
+
+ if (!(acc = memget(sizeof *acc))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(acc, 0x5e, sizeof *acc);
+ if (!(irs = memget(sizeof *irs))) {
+ errno = ENOMEM;
+ memput(acc, sizeof *acc);
+ return (NULL);
+ }
+ memset(irs, 0x5e, sizeof *irs);
+ irs->options = strdup(options);
+ irs->res = NULL;
+ irs->free_res = NULL;
+ memset(irs->accessors, 0, sizeof irs->accessors);
+ memset(irs->map_rules, 0, sizeof irs->map_rules);
+ init_map_rules(irs, conf_file);
+ acc->private = irs;
+#ifdef WANT_IRS_GR
+ acc->gr_map = irs_gen_gr;
+#else
+ acc->gr_map = NULL;
+#endif
+#ifdef WANT_IRS_PW
+ acc->pw_map = irs_gen_pw;
+#else
+ acc->pw_map = NULL;
+#endif
+ acc->sv_map = irs_gen_sv;
+ acc->pr_map = irs_gen_pr;
+ acc->ho_map = irs_gen_ho;
+ acc->nw_map = irs_gen_nw;
+ acc->ng_map = irs_gen_ng;
+ acc->res_get = gen_res_get;
+ acc->res_set = gen_res_set;
+ acc->close = gen_close;
+ return (acc);
+}
+
+/* Methods */
+
+static struct __res_state *
+gen_res_get(struct irs_acc *this) {
+ struct gen_p *irs = (struct gen_p *)this->private;
+
+ if (irs->res == NULL) {
+ struct __res_state *res;
+ res = (struct __res_state *)malloc(sizeof *res);
+ if (res == NULL)
+ return (NULL);
+ memset(res, 0, sizeof *res);
+ gen_res_set(this, res, free);
+ }
+
+ if (((irs->res->options & RES_INIT) == 0U) && res_ninit(irs->res) < 0)
+ return (NULL);
+
+ return (irs->res);
+}
+
+static void
+gen_res_set(struct irs_acc *this, struct __res_state *res,
+ void (*free_res)(void *)) {
+ struct gen_p *irs = (struct gen_p *)this->private;
+#if 0
+ struct irs_rule *rule;
+ struct irs_ho *ho;
+ struct irs_nw *nw;
+#endif
+
+ if (irs->res && irs->free_res) {
+ res_nclose(irs->res);
+ (*irs->free_res)(irs->res);
+ }
+
+ irs->res = res;
+ irs->free_res = free_res;
+
+#if 0
+ for (rule = irs->map_rules[irs_ho]; rule; rule = rule->next) {
+ ho = rule->inst->ho;
+
+ (*ho->res_set)(ho, res, NULL);
+ }
+ for (rule = irs->map_rules[irs_nw]; rule; rule = rule->next) {
+ nw = rule->inst->nw;
+
+ (*nw->res_set)(nw, res, NULL);
+ }
+#endif
+}
+
+static void
+gen_close(struct irs_acc *this) {
+ struct gen_p *irs = (struct gen_p *)this->private;
+ int n;
+
+ /* Search rules. */
+ for (n = 0; n < irs_nmap; n++)
+ while (irs->map_rules[n] != NULL)
+ irs->map_rules[n] = release_rule(irs->map_rules[n]);
+
+ /* Access methods. */
+ for (n = 0; n < irs_nacc; n++) {
+ /* Map objects. */
+ if (irs->accessors[n].gr != NULL)
+ (*irs->accessors[n].gr->close)(irs->accessors[n].gr);
+ if (irs->accessors[n].pw != NULL)
+ (*irs->accessors[n].pw->close)(irs->accessors[n].pw);
+ if (irs->accessors[n].sv != NULL)
+ (*irs->accessors[n].sv->close)(irs->accessors[n].sv);
+ if (irs->accessors[n].pr != NULL)
+ (*irs->accessors[n].pr->close)(irs->accessors[n].pr);
+ if (irs->accessors[n].ho != NULL)
+ (*irs->accessors[n].ho->close)(irs->accessors[n].ho);
+ if (irs->accessors[n].nw != NULL)
+ (*irs->accessors[n].nw->close)(irs->accessors[n].nw);
+ if (irs->accessors[n].ng != NULL)
+ (*irs->accessors[n].ng->close)(irs->accessors[n].ng);
+ /* Enclosing accessor. */
+ if (irs->accessors[n].acc != NULL)
+ (*irs->accessors[n].acc->close)(irs->accessors[n].acc);
+ }
+
+ /* The options string was strdup'd. */
+ free((void*)irs->options);
+
+ if (irs->res && irs->free_res)
+ (*irs->free_res)(irs->res);
+
+ /* The private data container. */
+ memput(irs, sizeof *irs);
+
+ /* The object. */
+ memput(this, sizeof *this);
+}
+
+/* Private */
+
+static int
+find_name(const char *name, const struct nameval names[]) {
+ int n;
+
+ for (n = 0; names[n].name != NULL; n++)
+ if (strcmp(name, names[n].name) == 0)
+ return (names[n].val);
+ return (-1);
+}
+
+static struct irs_rule *
+release_rule(struct irs_rule *rule) {
+ struct irs_rule *next = rule->next;
+
+ memput(rule, sizeof *rule);
+ return (next);
+}
+
+static int
+add_rule(struct gen_p *irs,
+ enum irs_map_id map, enum irs_acc_id acc,
+ const char *options)
+{
+ struct irs_rule **rules, *last, *tmp, *new;
+ struct irs_inst *inst;
+ const char *cp;
+ int n;
+
+#ifndef WANT_IRS_GR
+ if (map == irs_gr)
+ return (-1);
+#endif
+#ifndef WANT_IRS_PW
+ if (map == irs_pw)
+ return (-1);
+#endif
+#ifndef WANT_IRS_NIS
+ if (acc == irs_nis)
+ return (-1);
+#endif
+ new = memget(sizeof *new);
+ if (new == NULL)
+ return (-1);
+ memset(new, 0x5e, sizeof *new);
+ new->next = NULL;
+
+ new->inst = &irs->accessors[acc];
+
+ new->flags = 0;
+ cp = options;
+ while (cp && *cp) {
+ char option[50], *next;
+
+ next = strchr(cp, ',');
+ if (next)
+ n = next++ - cp;
+ else
+ n = strlen(cp);
+ if ((size_t)n > sizeof option - 1)
+ n = sizeof option - 1;
+ strncpy(option, cp, n);
+ option[n] = '\0';
+
+ n = find_name(option, option_names);
+ if (n >= 0)
+ new->flags |= n;
+
+ cp = next;
+ }
+
+ rules = &irs->map_rules[map];
+ for (last = NULL, tmp = *rules;
+ tmp != NULL;
+ last = tmp, tmp = tmp->next)
+ (void)NULL;
+ if (last == NULL)
+ *rules = new;
+ else
+ last->next = new;
+
+ /* Try to instantiate map accessors for this if necessary & approp. */
+ inst = &irs->accessors[acc];
+ if (inst->acc == NULL && accs[acc] != NULL)
+ inst->acc = (*accs[acc])(irs->options);
+ if (inst->acc != NULL) {
+ if (inst->gr == NULL && inst->acc->gr_map != NULL)
+ inst->gr = (*inst->acc->gr_map)(inst->acc);
+ if (inst->pw == NULL && inst->acc->pw_map != NULL)
+ inst->pw = (*inst->acc->pw_map)(inst->acc);
+ if (inst->sv == NULL && inst->acc->sv_map != NULL)
+ inst->sv = (*inst->acc->sv_map)(inst->acc);
+ if (inst->pr == NULL && inst->acc->pr_map != NULL)
+ inst->pr = (*inst->acc->pr_map)(inst->acc);
+ if (inst->ho == NULL && inst->acc->ho_map != NULL)
+ inst->ho = (*inst->acc->ho_map)(inst->acc);
+ if (inst->nw == NULL && inst->acc->nw_map != NULL)
+ inst->nw = (*inst->acc->nw_map)(inst->acc);
+ if (inst->ng == NULL && inst->acc->ng_map != NULL)
+ inst->ng = (*inst->acc->ng_map)(inst->acc);
+ }
+
+ return (0);
+}
+
+static void
+default_map_rules(struct gen_p *irs) {
+ /* Install time honoured and proved BSD style rules as default. */
+ add_rule(irs, irs_gr, irs_lcl, "");
+ add_rule(irs, irs_pw, irs_lcl, "");
+ add_rule(irs, irs_sv, irs_lcl, "");
+ add_rule(irs, irs_pr, irs_lcl, "");
+#ifdef SUNW_HOSTS_FALLBACK
+ if (__res_no_hosts_fallback())
+ add_rule(irs, irs_ho, irs_dns, "");
+ else {
+ add_rule(irs, irs_ho, irs_dns, "continue");
+ add_rule(irs, irs_ho, irs_lcl, "");
+ }
+#else /* SUNW_HOSTS_FALLBACK */
+ add_rule(irs, irs_ho, irs_dns, "continue");
+ add_rule(irs, irs_ho, irs_lcl, "");
+#endif /* SUNW_HOSTS_FALLBACK */
+ add_rule(irs, irs_nw, irs_dns, "continue");
+ add_rule(irs, irs_nw, irs_lcl, "");
+ add_rule(irs, irs_ng, irs_lcl, "");
+}
+
+static void
+init_map_rules(struct gen_p *irs, const char *conf_file) {
+ char line[1024], pattern[40], mapname[20], accname[20], options[100];
+ FILE *conf;
+
+#ifdef SUNW_HOSTS_FALLBACK
+ if (__res_no_hosts_fallback()) {
+ default_map_rules(irs);
+ return;
+ }
+#endif /* SUNW_HOSTS_FALLBACK */
+
+ if (conf_file == NULL)
+ conf_file = _PATH_IRS_CONF ;
+
+ /* A conf file of "" means compiled in defaults. Irpd wants this */
+ if (conf_file[0] == '\0' || (conf = fopen(conf_file, "r")) == NULL) {
+ default_map_rules(irs);
+ return;
+ }
+ (void) sprintf(pattern, "%%%lus %%%lus %%%lus\n",
+ (unsigned long)sizeof mapname,
+ (unsigned long)sizeof accname,
+ (unsigned long)sizeof options);
+ while (fgets(line, sizeof line, conf)) {
+ enum irs_map_id map;
+ enum irs_acc_id acc;
+ char *tmp;
+ int n;
+
+ for (tmp = line;
+ isascii((unsigned char)*tmp) &&
+ isspace((unsigned char)*tmp);
+ tmp++)
+ (void)NULL;
+ if (*tmp == '#' || *tmp == '\n' || *tmp == '\0')
+ continue;
+ n = sscanf(tmp, pattern, mapname, accname, options);
+ if (n < 2)
+ continue;
+ if (n < 3)
+ options[0] = '\0';
+
+ n = find_name(mapname, map_names);
+ INSIST(n < irs_nmap);
+ if (n < 0)
+ continue;
+ map = (enum irs_map_id) n;
+
+ n = find_name(accname, acc_names);
+ INSIST(n < irs_nacc);
+ if (n < 0)
+ continue;
+ acc = (enum irs_acc_id) n;
+
+ add_rule(irs, map, acc, options);
+ }
+ fclose(conf);
+}
diff --git a/usr/src/lib/libresolv2_joy/common/irs/gen_ho.c b/usr/src/lib/libresolv2_joy/common/irs/gen_ho.c
new file mode 100644
index 0000000000..8e41d7d610
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/gen_ho.c
@@ -0,0 +1,391 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: gen_ho.c,v 1.5 2006/03/09 23:57:56 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <stdlib.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <isc/memcluster.h>
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "gen_p.h"
+
+/* Definitions */
+
+struct pvt {
+ struct irs_rule * rules;
+ struct irs_rule * rule;
+ struct irs_ho * ho;
+ struct __res_state * res;
+ void (*free_res)(void *);
+};
+
+/* Forwards */
+
+static void ho_close(struct irs_ho *this);
+static struct hostent * ho_byname(struct irs_ho *this, const char *name);
+static struct hostent * ho_byname2(struct irs_ho *this, const char *name,
+ int af);
+static struct hostent * ho_byaddr(struct irs_ho *this, const void *addr,
+ int len, int af);
+static struct hostent * ho_next(struct irs_ho *this);
+static void ho_rewind(struct irs_ho *this);
+static void ho_minimize(struct irs_ho *this);
+static struct __res_state * ho_res_get(struct irs_ho *this);
+static void ho_res_set(struct irs_ho *this,
+ struct __res_state *res,
+ void (*free_res)(void *));
+static struct addrinfo * ho_addrinfo(struct irs_ho *this, const char *name,
+ const struct addrinfo *pai);
+
+static int init(struct irs_ho *this);
+
+/* Exports */
+
+struct irs_ho *
+irs_gen_ho(struct irs_acc *this) {
+ struct gen_p *accpvt = (struct gen_p *)this->private;
+ struct irs_ho *ho;
+ struct pvt *pvt;
+
+ if (!(pvt = memget(sizeof *pvt))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ if (!(ho = memget(sizeof *ho))) {
+ memput(pvt, sizeof *pvt);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(ho, 0x5e, sizeof *ho);
+ pvt->rules = accpvt->map_rules[irs_ho];
+ pvt->rule = pvt->rules;
+ ho->private = pvt;
+ ho->close = ho_close;
+ ho->byname = ho_byname;
+ ho->byname2 = ho_byname2;
+ ho->byaddr = ho_byaddr;
+ ho->next = ho_next;
+ ho->rewind = ho_rewind;
+ ho->minimize = ho_minimize;
+ ho->res_get = ho_res_get;
+ ho->res_set = ho_res_set;
+ ho->addrinfo = ho_addrinfo;
+ return (ho);
+}
+
+/* Methods. */
+
+static void
+ho_close(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ ho_minimize(this);
+ if (pvt->res && pvt->free_res)
+ (*pvt->free_res)(pvt->res);
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+static struct hostent *
+ho_byname(struct irs_ho *this, const char *name) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+ struct hostent *rval;
+ struct irs_ho *ho;
+ int therrno = NETDB_INTERNAL;
+ int softerror = 0;
+
+ if (init(this) == -1)
+ return (NULL);
+
+ for (rule = pvt->rules; rule; rule = rule->next) {
+ ho = rule->inst->ho;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ errno = 0;
+ rval = (*ho->byname)(ho, name);
+ if (rval != NULL)
+ return (rval);
+ if (softerror == 0 &&
+ pvt->res->res_h_errno != HOST_NOT_FOUND &&
+ pvt->res->res_h_errno != NETDB_INTERNAL) {
+ softerror = 1;
+ therrno = pvt->res->res_h_errno;
+ }
+ if (rule->flags & IRS_CONTINUE)
+ continue;
+ /*
+ * The value TRY_AGAIN can mean that the service
+ * is not available, or just that this particular name
+ * cannot be resolved now. We use the errno ECONNREFUSED
+ * to distinguish. If a lookup sets that errno when
+ * H_ERRNO is TRY_AGAIN, we continue to try other lookup
+ * functions, otherwise we return the TRY_AGAIN error.
+ */
+ if (pvt->res->res_h_errno != TRY_AGAIN || errno != ECONNREFUSED)
+ break;
+ }
+ if (softerror != 0 && pvt->res->res_h_errno == HOST_NOT_FOUND)
+ RES_SET_H_ERRNO(pvt->res, therrno);
+ return (NULL);
+}
+
+static struct hostent *
+ho_byname2(struct irs_ho *this, const char *name, int af) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+ struct hostent *rval;
+ struct irs_ho *ho;
+ int therrno = NETDB_INTERNAL;
+ int softerror = 0;
+
+ if (init(this) == -1)
+ return (NULL);
+
+ for (rule = pvt->rules; rule; rule = rule->next) {
+ ho = rule->inst->ho;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ errno = 0;
+ rval = (*ho->byname2)(ho, name, af);
+ if (rval != NULL)
+ return (rval);
+ if (softerror == 0 &&
+ pvt->res->res_h_errno != HOST_NOT_FOUND &&
+ pvt->res->res_h_errno != NETDB_INTERNAL) {
+ softerror = 1;
+ therrno = pvt->res->res_h_errno;
+ }
+ if (rule->flags & IRS_CONTINUE)
+ continue;
+ /*
+ * See the comments in ho_byname() explaining
+ * the interpretation of TRY_AGAIN and ECONNREFUSED.
+ */
+ if (pvt->res->res_h_errno != TRY_AGAIN || errno != ECONNREFUSED)
+ break;
+ }
+ if (softerror != 0 && pvt->res->res_h_errno == HOST_NOT_FOUND)
+ RES_SET_H_ERRNO(pvt->res, therrno);
+ return (NULL);
+}
+
+static struct hostent *
+ho_byaddr(struct irs_ho *this, const void *addr, int len, int af) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+ struct hostent *rval;
+ struct irs_ho *ho;
+ int therrno = NETDB_INTERNAL;
+ int softerror = 0;
+
+
+ if (init(this) == -1)
+ return (NULL);
+
+ for (rule = pvt->rules; rule; rule = rule->next) {
+ ho = rule->inst->ho;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ errno = 0;
+ rval = (*ho->byaddr)(ho, addr, len, af);
+ if (rval != NULL)
+ return (rval);
+ if (softerror == 0 &&
+ pvt->res->res_h_errno != HOST_NOT_FOUND &&
+ pvt->res->res_h_errno != NETDB_INTERNAL) {
+ softerror = 1;
+ therrno = pvt->res->res_h_errno;
+ }
+
+ if (rule->flags & IRS_CONTINUE)
+ continue;
+ /*
+ * See the comments in ho_byname() explaining
+ * the interpretation of TRY_AGAIN and ECONNREFUSED.
+ */
+ if (pvt->res->res_h_errno != TRY_AGAIN || errno != ECONNREFUSED)
+ break;
+ }
+ if (softerror != 0 && pvt->res->res_h_errno == HOST_NOT_FOUND)
+ RES_SET_H_ERRNO(pvt->res, therrno);
+ return (NULL);
+}
+
+static struct hostent *
+ho_next(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct hostent *rval;
+ struct irs_ho *ho;
+
+ while (pvt->rule) {
+ ho = pvt->rule->inst->ho;
+ rval = (*ho->next)(ho);
+ if (rval)
+ return (rval);
+ if (!(pvt->rule->flags & IRS_CONTINUE))
+ break;
+ pvt->rule = pvt->rule->next;
+ if (pvt->rule) {
+ ho = pvt->rule->inst->ho;
+ (*ho->rewind)(ho);
+ }
+ }
+ return (NULL);
+}
+
+static void
+ho_rewind(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_ho *ho;
+
+ pvt->rule = pvt->rules;
+ if (pvt->rule) {
+ ho = pvt->rule->inst->ho;
+ (*ho->rewind)(ho);
+ }
+}
+
+static void
+ho_minimize(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+
+ if (pvt->res)
+ res_nclose(pvt->res);
+ for (rule = pvt->rules; rule != NULL; rule = rule->next) {
+ struct irs_ho *ho = rule->inst->ho;
+
+ (*ho->minimize)(ho);
+ }
+}
+
+static struct __res_state *
+ho_res_get(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (!pvt->res) {
+ struct __res_state *res;
+ res = (struct __res_state *)malloc(sizeof *res);
+ if (!res) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(res, 0, sizeof *res);
+ ho_res_set(this, res, free);
+ }
+
+ return (pvt->res);
+}
+
+static void
+ho_res_set(struct irs_ho *this, struct __res_state *res,
+ void (*free_res)(void *)) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+
+ if (pvt->res && pvt->free_res) {
+ res_nclose(pvt->res);
+ (*pvt->free_res)(pvt->res);
+ }
+
+ pvt->res = res;
+ pvt->free_res = free_res;
+
+ for (rule = pvt->rules; rule != NULL; rule = rule->next) {
+ struct irs_ho *ho = rule->inst->ho;
+
+ (*ho->res_set)(ho, pvt->res, NULL);
+ }
+}
+
+static struct addrinfo *
+ho_addrinfo(struct irs_ho *this, const char *name, const struct addrinfo *pai)
+{
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+ struct addrinfo *rval = NULL;
+ struct irs_ho *ho;
+ int therrno = NETDB_INTERNAL;
+ int softerror = 0;
+
+ if (init(this) == -1)
+ return (NULL);
+
+ for (rule = pvt->rules; rule; rule = rule->next) {
+ ho = rule->inst->ho;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ errno = 0;
+ if (ho->addrinfo == NULL) /*%< for safety */
+ continue;
+ rval = (*ho->addrinfo)(ho, name, pai);
+ if (rval != NULL)
+ return (rval);
+ if (softerror == 0 &&
+ pvt->res->res_h_errno != HOST_NOT_FOUND &&
+ pvt->res->res_h_errno != NETDB_INTERNAL) {
+ softerror = 1;
+ therrno = pvt->res->res_h_errno;
+ }
+ if (rule->flags & IRS_CONTINUE)
+ continue;
+ /*
+ * See the comments in ho_byname() explaining
+ * the interpretation of TRY_AGAIN and ECONNREFUSED.
+ */
+ if (pvt->res->res_h_errno != TRY_AGAIN ||
+ errno != ECONNREFUSED)
+ break;
+ }
+ if (softerror != 0 && pvt->res->res_h_errno == HOST_NOT_FOUND)
+ RES_SET_H_ERRNO(pvt->res, therrno);
+ return (NULL);
+}
+
+static int
+init(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (!pvt->res && !ho_res_get(this))
+ return (-1);
+
+ if (((pvt->res->options & RES_INIT) == 0U) &&
+ (res_ninit(pvt->res) == -1))
+ return (-1);
+
+ return (0);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/gen_ng.c b/usr/src/lib/libresolv2_joy/common/irs/gen_ng.c
new file mode 100644
index 0000000000..e3c874ee68
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/gen_ng.c
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: gen_ng.c,v 1.3 2005/04/27 04:56:23 sra Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <isc/memcluster.h>
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "gen_p.h"
+
+/* Types */
+
+struct pvt {
+ struct irs_rule * rules;
+ struct irs_rule * rule;
+ char * curgroup;
+};
+
+/* Forward */
+
+static void ng_close(struct irs_ng *);
+static int ng_next(struct irs_ng *, const char **,
+ const char **, const char **);
+static int ng_test(struct irs_ng *, const char *,
+ const char *, const char *,
+ const char *);
+static void ng_rewind(struct irs_ng *, const char *);
+static void ng_minimize(struct irs_ng *);
+
+/* Public */
+
+struct irs_ng *
+irs_gen_ng(struct irs_acc *this) {
+ struct gen_p *accpvt = (struct gen_p *)this->private;
+ struct irs_ng *ng;
+ struct pvt *pvt;
+
+ if (!(ng = memget(sizeof *ng))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(ng, 0x5e, sizeof *ng);
+ if (!(pvt = memget(sizeof *pvt))) {
+ memput(ng, sizeof *ng);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ pvt->rules = accpvt->map_rules[irs_ng];
+ pvt->rule = pvt->rules;
+ ng->private = pvt;
+ ng->close = ng_close;
+ ng->next = ng_next;
+ ng->test = ng_test;
+ ng->rewind = ng_rewind;
+ ng->minimize = ng_minimize;
+ return (ng);
+}
+
+/* Methods */
+
+static void
+ng_close(struct irs_ng *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ ng_minimize(this);
+ if (pvt->curgroup)
+ free(pvt->curgroup);
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+static int
+ng_next(struct irs_ng *this, const char **host, const char **user,
+ const char **domain)
+{
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_ng *ng;
+
+ while (pvt->rule) {
+ ng = pvt->rule->inst->ng;
+ if ((*ng->next)(ng, host, user, domain) == 1)
+ return (1);
+ if (!(pvt->rule->flags & IRS_CONTINUE))
+ break;
+ pvt->rule = pvt->rule->next;
+ if (pvt->rule) {
+ ng = pvt->rule->inst->ng;
+ (*ng->rewind)(ng, pvt->curgroup);
+ }
+ }
+ return (0);
+}
+
+static int
+ng_test(struct irs_ng *this, const char *name,
+ const char *user, const char *host, const char *domain)
+{
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+ struct irs_ng *ng;
+ int rval;
+
+ rval = 0;
+ for (rule = pvt->rules; rule; rule = rule->next) {
+ ng = rule->inst->ng;
+ rval = (*ng->test)(ng, name, user, host, domain);
+ if (rval || !(rule->flags & IRS_CONTINUE))
+ break;
+ }
+ return (rval);
+}
+
+static void
+ng_rewind(struct irs_ng *this, const char *group) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_ng *ng;
+
+ pvt->rule = pvt->rules;
+ if (pvt->rule) {
+ if (pvt->curgroup)
+ free(pvt->curgroup);
+ pvt->curgroup = strdup(group);
+ ng = pvt->rule->inst->ng;
+ (*ng->rewind)(ng, pvt->curgroup);
+ }
+}
+
+static void
+ng_minimize(struct irs_ng *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+
+ for (rule = pvt->rules; rule != NULL; rule = rule->next) {
+ struct irs_ng *ng = rule->inst->ng;
+
+ (*ng->minimize)(ng);
+ }
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/gen_nw.c b/usr/src/lib/libresolv2_joy/common/irs/gen_nw.c
new file mode 100644
index 0000000000..12bd0e0f6d
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/gen_nw.c
@@ -0,0 +1,264 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: gen_nw.c,v 1.4 2005/04/27 04:56:23 sra Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <resolv_joy.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <isc/memcluster.h>
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "gen_p.h"
+
+/* Types */
+
+struct pvt {
+ struct irs_rule * rules;
+ struct irs_rule * rule;
+ struct __res_state * res;
+ void (*free_res)(void *);
+};
+
+/* Forward */
+
+static void nw_close(struct irs_nw*);
+static struct nwent * nw_next(struct irs_nw *);
+static struct nwent * nw_byname(struct irs_nw *, const char *, int);
+static struct nwent * nw_byaddr(struct irs_nw *, void *, int, int);
+static void nw_rewind(struct irs_nw *);
+static void nw_minimize(struct irs_nw *);
+static struct __res_state * nw_res_get(struct irs_nw *this);
+static void nw_res_set(struct irs_nw *this,
+ struct __res_state *res,
+ void (*free_res)(void *));
+
+static int init(struct irs_nw *this);
+
+/* Public */
+
+struct irs_nw *
+irs_gen_nw(struct irs_acc *this) {
+ struct gen_p *accpvt = (struct gen_p *)this->private;
+ struct irs_nw *nw;
+ struct pvt *pvt;
+
+ if (!(pvt = memget(sizeof *pvt))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ if (!(nw = memget(sizeof *nw))) {
+ memput(pvt, sizeof *pvt);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(nw, 0x5e, sizeof *nw);
+ pvt->rules = accpvt->map_rules[irs_nw];
+ pvt->rule = pvt->rules;
+ nw->private = pvt;
+ nw->close = nw_close;
+ nw->next = nw_next;
+ nw->byname = nw_byname;
+ nw->byaddr = nw_byaddr;
+ nw->rewind = nw_rewind;
+ nw->minimize = nw_minimize;
+ nw->res_get = nw_res_get;
+ nw->res_set = nw_res_set;
+ return (nw);
+}
+
+/* Methods */
+
+static void
+nw_close(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ nw_minimize(this);
+
+ if (pvt->res && pvt->free_res)
+ (*pvt->free_res)(pvt->res);
+
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+static struct nwent *
+nw_next(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct nwent *rval;
+ struct irs_nw *nw;
+
+ if (init(this) == -1)
+ return(NULL);
+
+ while (pvt->rule) {
+ nw = pvt->rule->inst->nw;
+ rval = (*nw->next)(nw);
+ if (rval)
+ return (rval);
+ if (!(pvt->rules->flags & IRS_CONTINUE))
+ break;
+ pvt->rule = pvt->rule->next;
+ if (pvt->rule) {
+ nw = pvt->rule->inst->nw;
+ (*nw->rewind)(nw);
+ }
+ }
+ return (NULL);
+}
+
+static struct nwent *
+nw_byname(struct irs_nw *this, const char *name, int type) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+ struct nwent *rval;
+ struct irs_nw *nw;
+
+ if (init(this) == -1)
+ return(NULL);
+
+ for (rule = pvt->rules; rule; rule = rule->next) {
+ nw = rule->inst->nw;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ rval = (*nw->byname)(nw, name, type);
+ if (rval != NULL)
+ return (rval);
+ if (pvt->res->res_h_errno != TRY_AGAIN &&
+ !(rule->flags & IRS_CONTINUE))
+ break;
+ }
+ return (NULL);
+}
+
+static struct nwent *
+nw_byaddr(struct irs_nw *this, void *net, int length, int type) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+ struct nwent *rval;
+ struct irs_nw *nw;
+
+ if (init(this) == -1)
+ return(NULL);
+
+ for (rule = pvt->rules; rule; rule = rule->next) {
+ nw = rule->inst->nw;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ rval = (*nw->byaddr)(nw, net, length, type);
+ if (rval != NULL)
+ return (rval);
+ if (pvt->res->res_h_errno != TRY_AGAIN &&
+ !(rule->flags & IRS_CONTINUE))
+ break;
+ }
+ return (NULL);
+}
+
+static void
+nw_rewind(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_nw *nw;
+
+ pvt->rule = pvt->rules;
+ if (pvt->rule) {
+ nw = pvt->rule->inst->nw;
+ (*nw->rewind)(nw);
+ }
+}
+
+static void
+nw_minimize(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+
+ if (pvt->res)
+ res_nclose(pvt->res);
+ for (rule = pvt->rules; rule != NULL; rule = rule->next) {
+ struct irs_nw *nw = rule->inst->nw;
+
+ (*nw->minimize)(nw);
+ }
+}
+
+static struct __res_state *
+nw_res_get(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (!pvt->res) {
+ struct __res_state *res;
+ res = (struct __res_state *)malloc(sizeof *res);
+ if (!res) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(res, 0, sizeof *res);
+ nw_res_set(this, res, free);
+ }
+
+ return (pvt->res);
+}
+
+static void
+nw_res_set(struct irs_nw *this, struct __res_state *res,
+ void (*free_res)(void *)) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+
+ if (pvt->res && pvt->free_res) {
+ res_nclose(pvt->res);
+ (*pvt->free_res)(pvt->res);
+ }
+
+ pvt->res = res;
+ pvt->free_res = free_res;
+
+ for (rule = pvt->rules; rule != NULL; rule = rule->next) {
+ struct irs_nw *nw = rule->inst->nw;
+
+ (*nw->res_set)(nw, pvt->res, NULL);
+ }
+}
+
+static int
+init(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (!pvt->res && !nw_res_get(this))
+ return (-1);
+ if (((pvt->res->options & RES_INIT) == 0U) &&
+ res_ninit(pvt->res) == -1)
+ return (-1);
+ return (0);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/gen_p.h b/usr/src/lib/libresolv2_joy/common/irs/gen_p.h
new file mode 100644
index 0000000000..1adc5909bb
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/gen_p.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: gen_p.h,v 1.3 2005/04/27 04:56:23 sra Exp $
+ */
+
+/*! \file
+ * Notes:
+ * We hope to create a complete set of thread-safe entry points someday,
+ * which will mean a set of getXbyY() functions that take as an argument
+ * a pointer to the map class, which will have a pointer to the private
+ * data, which will be used preferentially to the static variables that
+ * are necessary to support the "classic" interface. This "classic"
+ * interface will then be reimplemented as stubs on top of the thread
+ * safe modules, and will keep the map class pointers as their only
+ * static data. HOWEVER, we are not there yet. So while we will call
+ * the just-barely-converted map class methods with map class pointers,
+ * right now they probably all still use statics. We're not fooling
+ * anybody, and we're not trying to (yet).
+ */
+
+#ifndef _GEN_P_H_INCLUDED
+#define _GEN_P_H_INCLUDED
+
+/*%
+ * These are the access methods.
+ */
+enum irs_acc_id {
+ irs_lcl, /*%< Local. */
+ irs_dns, /*%< DNS or Hesiod. */
+ irs_nis, /*%< Sun NIS ("YP"). */
+ irs_irp, /*%< IR protocol. */
+ irs_nacc
+};
+
+/*%
+ * These are the map types.
+ */
+enum irs_map_id {
+ irs_gr, /*%< "group" */
+ irs_pw, /*%< "passwd" */
+ irs_sv, /*%< "services" */
+ irs_pr, /*%< "protocols" */
+ irs_ho, /*%< "hosts" */
+ irs_nw, /*%< "networks" */
+ irs_ng, /*%< "netgroup" */
+ irs_nmap
+};
+
+/*%
+ * This is an accessor instance.
+ */
+struct irs_inst {
+ struct irs_acc *acc;
+ struct irs_gr * gr;
+ struct irs_pw * pw;
+ struct irs_sv * sv;
+ struct irs_pr * pr;
+ struct irs_ho * ho;
+ struct irs_nw * nw;
+ struct irs_ng * ng;
+};
+
+/*%
+ * This is a search rule for some map type.
+ */
+struct irs_rule {
+ struct irs_rule * next;
+ struct irs_inst * inst;
+ int flags;
+};
+#define IRS_MERGE 0x0001 /*%< Don't stop if acc. has data? */
+#define IRS_CONTINUE 0x0002 /*%< Don't stop if acc. has no data? */
+/*
+ * This is the private data for a search access class.
+ */
+struct gen_p {
+ char * options;
+ struct irs_rule * map_rules[(int)irs_nmap];
+ struct irs_inst accessors[(int)irs_nacc];
+ struct __res_state * res;
+ void (*free_res) __P((void *));
+};
+
+/*
+ * Externs.
+ */
+
+extern struct irs_acc * irs_gen_acc __P((const char *, const char *conf_file));
+extern struct irs_gr * irs_gen_gr __P((struct irs_acc *));
+extern struct irs_pw * irs_gen_pw __P((struct irs_acc *));
+extern struct irs_sv * irs_gen_sv __P((struct irs_acc *));
+extern struct irs_pr * irs_gen_pr __P((struct irs_acc *));
+extern struct irs_ho * irs_gen_ho __P((struct irs_acc *));
+extern struct irs_nw * irs_gen_nw __P((struct irs_acc *));
+extern struct irs_ng * irs_gen_ng __P((struct irs_acc *));
+
+#endif /*_IRS_P_H_INCLUDED*/
diff --git a/usr/src/lib/libresolv2_joy/common/irs/gen_pr.c b/usr/src/lib/libresolv2_joy/common/irs/gen_pr.c
new file mode 100644
index 0000000000..9fd32c4dd9
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/gen_pr.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: gen_pr.c,v 1.3 2005/04/27 04:56:24 sra Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <resolv_joy.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <isc/memcluster.h>
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "gen_p.h"
+
+/* Types */
+
+struct pvt {
+ struct irs_rule * rules;
+ struct irs_rule * rule;
+ struct __res_state * res;
+ void (*free_res)(void *);
+};
+
+/* Forward */
+
+static void pr_close(struct irs_pr*);
+static struct protoent * pr_next(struct irs_pr *);
+static struct protoent * pr_byname(struct irs_pr *, const char *);
+static struct protoent * pr_bynumber(struct irs_pr *, int);
+static void pr_rewind(struct irs_pr *);
+static void pr_minimize(struct irs_pr *);
+static struct __res_state * pr_res_get(struct irs_pr *);
+static void pr_res_set(struct irs_pr *,
+ struct __res_state *,
+ void (*)(void *));
+
+/* Public */
+
+struct irs_pr *
+irs_gen_pr(struct irs_acc *this) {
+ struct gen_p *accpvt = (struct gen_p *)this->private;
+ struct irs_pr *pr;
+ struct pvt *pvt;
+
+ if (!(pr = memget(sizeof *pr))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pr, 0x5e, sizeof *pr);
+ if (!(pvt = memget(sizeof *pvt))) {
+ memput(pr, sizeof *pr);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ pvt->rules = accpvt->map_rules[irs_pr];
+ pvt->rule = pvt->rules;
+ pr->private = pvt;
+ pr->close = pr_close;
+ pr->next = pr_next;
+ pr->byname = pr_byname;
+ pr->bynumber = pr_bynumber;
+ pr->rewind = pr_rewind;
+ pr->minimize = pr_minimize;
+ pr->res_get = pr_res_get;
+ pr->res_set = pr_res_set;
+ return (pr);
+}
+
+/* Methods */
+
+static void
+pr_close(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+static struct protoent *
+pr_next(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct protoent *rval;
+ struct irs_pr *pr;
+
+ while (pvt->rule) {
+ pr = pvt->rule->inst->pr;
+ rval = (*pr->next)(pr);
+ if (rval)
+ return (rval);
+ if (!(pvt->rules->flags & IRS_CONTINUE))
+ break;
+ pvt->rule = pvt->rule->next;
+ if (pvt->rule) {
+ pr = pvt->rule->inst->pr;
+ (*pr->rewind)(pr);
+ }
+ }
+ return (NULL);
+}
+
+static struct protoent *
+pr_byname(struct irs_pr *this, const char *name) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+ struct protoent *rval;
+ struct irs_pr *pr;
+
+ rval = NULL;
+ for (rule = pvt->rules; rule; rule = rule->next) {
+ pr = rule->inst->pr;
+ rval = (*pr->byname)(pr, name);
+ if (rval || !(rule->flags & IRS_CONTINUE))
+ break;
+ }
+ return (rval);
+}
+
+static struct protoent *
+pr_bynumber(struct irs_pr *this, int proto) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+ struct protoent *rval;
+ struct irs_pr *pr;
+
+ rval = NULL;
+ for (rule = pvt->rules; rule; rule = rule->next) {
+ pr = rule->inst->pr;
+ rval = (*pr->bynumber)(pr, proto);
+ if (rval || !(rule->flags & IRS_CONTINUE))
+ break;
+ }
+ return (rval);
+}
+
+static void
+pr_rewind(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_pr *pr;
+
+ pvt->rule = pvt->rules;
+ if (pvt->rule) {
+ pr = pvt->rule->inst->pr;
+ (*pr->rewind)(pr);
+ }
+}
+
+static void
+pr_minimize(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+
+ for (rule = pvt->rules; rule != NULL; rule = rule->next) {
+ struct irs_pr *pr = rule->inst->pr;
+
+ (*pr->minimize)(pr);
+ }
+}
+
+static struct __res_state *
+pr_res_get(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (!pvt->res) {
+ struct __res_state *res;
+ res = (struct __res_state *)malloc(sizeof *res);
+ if (!res) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(res, 0, sizeof *res);
+ pr_res_set(this, res, free);
+ }
+
+ return (pvt->res);
+}
+
+static void
+pr_res_set(struct irs_pr *this, struct __res_state *res,
+ void (*free_res)(void *)) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+
+ if (pvt->res && pvt->free_res) {
+ res_nclose(pvt->res);
+ (*pvt->free_res)(pvt->res);
+ }
+
+ pvt->res = res;
+ pvt->free_res = free_res;
+
+ for (rule = pvt->rules; rule != NULL; rule = rule->next) {
+ struct irs_pr *pr = rule->inst->pr;
+
+ if (pr->res_set)
+ (*pr->res_set)(pr, pvt->res, NULL);
+ }
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/gen_sv.c b/usr/src/lib/libresolv2_joy/common/irs/gen_sv.c
new file mode 100644
index 0000000000..93b70e57ec
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/gen_sv.c
@@ -0,0 +1,229 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: gen_sv.c,v 1.3 2005/04/27 04:56:24 sra Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <isc/memcluster.h>
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "gen_p.h"
+
+/* Types */
+
+struct pvt {
+ struct irs_rule * rules;
+ struct irs_rule * rule;
+ struct __res_state * res;
+ void (*free_res)(void *);
+};
+
+/* Forward */
+
+static void sv_close(struct irs_sv*);
+static struct servent * sv_next(struct irs_sv *);
+static struct servent * sv_byname(struct irs_sv *, const char *,
+ const char *);
+static struct servent * sv_byport(struct irs_sv *, int, const char *);
+static void sv_rewind(struct irs_sv *);
+static void sv_minimize(struct irs_sv *);
+static struct __res_state * sv_res_get(struct irs_sv *);
+static void sv_res_set(struct irs_sv *,
+ struct __res_state *,
+ void (*)(void *));
+
+/* Public */
+
+struct irs_sv *
+irs_gen_sv(struct irs_acc *this) {
+ struct gen_p *accpvt = (struct gen_p *)this->private;
+ struct irs_sv *sv;
+ struct pvt *pvt;
+
+ if (!(sv = memget(sizeof *sv))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(sv, 0x5e, sizeof *sv);
+ if (!(pvt = memget(sizeof *pvt))) {
+ memput(sv, sizeof *sv);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ pvt->rules = accpvt->map_rules[irs_sv];
+ pvt->rule = pvt->rules;
+ sv->private = pvt;
+ sv->close = sv_close;
+ sv->next = sv_next;
+ sv->byname = sv_byname;
+ sv->byport = sv_byport;
+ sv->rewind = sv_rewind;
+ sv->minimize = sv_minimize;
+ sv->res_get = sv_res_get;
+ sv->res_set = sv_res_set;
+ return (sv);
+}
+
+/* Methods */
+
+static void
+sv_close(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+static struct servent *
+sv_next(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct servent *rval;
+ struct irs_sv *sv;
+
+ while (pvt->rule) {
+ sv = pvt->rule->inst->sv;
+ rval = (*sv->next)(sv);
+ if (rval)
+ return (rval);
+ if (!(pvt->rule->flags & IRS_CONTINUE))
+ break;
+ pvt->rule = pvt->rule->next;
+ if (pvt->rule) {
+ sv = pvt->rule->inst->sv;
+ (*sv->rewind)(sv);
+ }
+ }
+ return (NULL);
+}
+
+static struct servent *
+sv_byname(struct irs_sv *this, const char *name, const char *proto) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+ struct servent *rval;
+ struct irs_sv *sv;
+
+ rval = NULL;
+ for (rule = pvt->rules; rule; rule = rule->next) {
+ sv = rule->inst->sv;
+ rval = (*sv->byname)(sv, name, proto);
+ if (rval || !(rule->flags & IRS_CONTINUE))
+ break;
+ }
+ return (rval);
+}
+
+static struct servent *
+sv_byport(struct irs_sv *this, int port, const char *proto) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+ struct servent *rval;
+ struct irs_sv *sv;
+
+ rval = NULL;
+ for (rule = pvt->rules; rule; rule = rule->next) {
+ sv = rule->inst->sv;
+ rval = (*sv->byport)(sv, port, proto);
+ if (rval || !(rule->flags & IRS_CONTINUE))
+ break;
+ }
+ return (rval);
+}
+
+static void
+sv_rewind(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_sv *sv;
+
+ pvt->rule = pvt->rules;
+ if (pvt->rule) {
+ sv = pvt->rule->inst->sv;
+ (*sv->rewind)(sv);
+ }
+}
+
+static void
+sv_minimize(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+
+ for (rule = pvt->rules; rule != NULL; rule = rule->next) {
+ struct irs_sv *sv = rule->inst->sv;
+
+ (*sv->minimize)(sv);
+ }
+}
+
+static struct __res_state *
+sv_res_get(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (!pvt->res) {
+ struct __res_state *res;
+ res = (struct __res_state *)malloc(sizeof *res);
+ if (!res) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(res, 0, sizeof *res);
+ sv_res_set(this, res, free);
+ }
+
+ return (pvt->res);
+}
+
+static void
+sv_res_set(struct irs_sv *this, struct __res_state *res,
+ void (*free_res)(void *)) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct irs_rule *rule;
+
+ if (pvt->res && pvt->free_res) {
+ res_nclose(pvt->res);
+ (*pvt->free_res)(pvt->res);
+ }
+
+ pvt->res = res;
+ pvt->free_res = free_res;
+
+ for (rule = pvt->rules; rule != NULL; rule = rule->next) {
+ struct irs_sv *sv = rule->inst->sv;
+
+ if (sv->res_set)
+ (*sv->res_set)(sv, pvt->res, NULL);
+ }
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/getaddrinfo.c b/usr/src/lib/libresolv2_joy/common/irs/getaddrinfo.c
new file mode 100644
index 0000000000..19bbbea854
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/getaddrinfo.c
@@ -0,0 +1,1253 @@
+/* $KAME: getaddrinfo.c,v 1.14 2001/01/06 09:41:15 jinmei Exp $ */
+
+/*
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the project nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*! \file
+ * Issues to be discussed:
+ *\li Thread safe-ness must be checked.
+ *\li Return values. There are nonstandard return values defined and used
+ * in the source code. This is because RFC2553 is silent about which error
+ * code must be returned for which situation.
+ *\li IPv4 classful (shortened) form. RFC2553 is silent about it. XNET 5.2
+ * says to use inet_aton() to convert IPv4 numeric to binary (allows
+ * classful form as a result).
+ * current code - disallow classful form for IPv4 (due to use of inet_pton).
+ *\li freeaddrinfo(NULL). RFC2553 is silent about it. XNET 5.2 says it is
+ * invalid.
+ * current code - SEGV on freeaddrinfo(NULL)
+ * Note:
+ *\li We use getipnodebyname() just for thread-safeness. There's no intent
+ * to let it do PF_UNSPEC (actually we never pass PF_UNSPEC to
+ * getipnodebyname().
+ *\li The code filters out AFs that are not supported by the kernel,
+ * when globbing NULL hostname (to loopback, or wildcard). Is it the right
+ * thing to do? What is the relationship with post-RFC2553 AI_ADDRCONFIG
+ * in ai_flags?
+ *\li (post-2553) semantics of AI_ADDRCONFIG itself is too vague.
+ * (1) what should we do against numeric hostname (2) what should we do
+ * against NULL hostname (3) what is AI_ADDRCONFIG itself. AF not ready?
+ * non-loopback address configured? global address configured?
+ * \par Additional Issue:
+ * To avoid search order issue, we have a big amount of code duplicate
+ * from gethnamaddr.c and some other places. The issues that there's no
+ * lower layer function to lookup "IPv4 or IPv6" record. Calling
+ * gethostbyname2 from getaddrinfo will end up in wrong search order, as
+ * follows:
+ * \li The code makes use of following calls when asked to resolver with
+ * ai_family = PF_UNSPEC:
+ *\code getipnodebyname(host, AF_INET6);
+ * getipnodebyname(host, AF_INET);
+ *\endcode
+ * \li This will result in the following queries if the node is configure to
+ * prefer /etc/hosts than DNS:
+ *\code
+ * lookup /etc/hosts for IPv6 address
+ * lookup DNS for IPv6 address
+ * lookup /etc/hosts for IPv4 address
+ * lookup DNS for IPv4 address
+ *\endcode
+ * which may not meet people's requirement.
+ * \li The right thing to happen is to have underlying layer which does
+ * PF_UNSPEC lookup (lookup both) and return chain of addrinfos.
+ * This would result in a bit of code duplicate with _dns_ghbyname() and
+ * friends.
+ */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/socket.h>
+
+#include <net/if.h>
+#include <netinet/in.h>
+
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <ctype.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+
+#include <stdarg.h>
+
+#include <irs.h>
+#include <isc/assertions.h>
+
+#include "port_after.h"
+
+#include "irs_data.h"
+
+#define SUCCESS 0
+#define ANY 0
+#define YES 1
+#define NO 0
+
+static const char in_addrany[] = { 0, 0, 0, 0 };
+static const char in6_addrany[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+static const char in_loopback[] = { 127, 0, 0, 1 };
+static const char in6_loopback[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
+};
+
+static const struct afd {
+ int a_af;
+ int a_addrlen;
+ int a_socklen;
+ int a_off;
+ const char *a_addrany;
+ const char *a_loopback;
+ int a_scoped;
+} afdl [] = {
+ {PF_INET6, sizeof(struct in6_addr),
+ sizeof(struct sockaddr_in6),
+ offsetof(struct sockaddr_in6, sin6_addr),
+ in6_addrany, in6_loopback, 1},
+ {PF_INET, sizeof(struct in_addr),
+ sizeof(struct sockaddr_in),
+ offsetof(struct sockaddr_in, sin_addr),
+ in_addrany, in_loopback, 0},
+ {0, 0, 0, 0, NULL, NULL, 0},
+};
+
+struct explore {
+ int e_af;
+ int e_socktype;
+ int e_protocol;
+ const char *e_protostr;
+ int e_wild;
+#define WILD_AF(ex) ((ex)->e_wild & 0x01)
+#define WILD_SOCKTYPE(ex) ((ex)->e_wild & 0x02)
+#define WILD_PROTOCOL(ex) ((ex)->e_wild & 0x04)
+};
+
+static const struct explore explore[] = {
+#if 0
+ { PF_LOCAL, 0, ANY, ANY, NULL, 0x01 },
+#endif
+ { PF_INET6, SOCK_DGRAM, IPPROTO_UDP, "udp", 0x07 },
+ { PF_INET6, SOCK_STREAM, IPPROTO_TCP, "tcp", 0x07 },
+ { PF_INET6, SOCK_RAW, ANY, NULL, 0x05 },
+ { PF_INET, SOCK_DGRAM, IPPROTO_UDP, "udp", 0x07 },
+ { PF_INET, SOCK_STREAM, IPPROTO_TCP, "tcp", 0x07 },
+ { PF_INET, SOCK_RAW, ANY, NULL, 0x05 },
+ { -1, 0, 0, NULL, 0 },
+};
+
+#define PTON_MAX 16
+
+static int str_isnumber __P((const char *));
+static int explore_fqdn __P((const struct addrinfo *, const char *,
+ const char *, struct addrinfo **));
+static int explore_copy __P((const struct addrinfo *, const struct addrinfo *,
+ struct addrinfo **));
+static int explore_null __P((const struct addrinfo *,
+ const char *, struct addrinfo **));
+static int explore_numeric __P((const struct addrinfo *, const char *,
+ const char *, struct addrinfo **));
+static int explore_numeric_scope __P((const struct addrinfo *, const char *,
+ const char *, struct addrinfo **));
+static int get_canonname __P((const struct addrinfo *,
+ struct addrinfo *, const char *));
+static struct addrinfo *get_ai __P((const struct addrinfo *,
+ const struct afd *, const char *));
+static struct addrinfo *copy_ai __P((const struct addrinfo *));
+static int get_portmatch __P((const struct addrinfo *, const char *));
+static int get_port __P((const struct addrinfo *, const char *, int));
+static const struct afd *find_afd __P((int));
+static int addrconfig __P((int));
+static int ip6_str2scopeid __P((char *, struct sockaddr_in6 *,
+ u_int32_t *scopeidp));
+static struct net_data *init __P((void));
+
+struct addrinfo *hostent2addrinfo __P((struct hostent *,
+ const struct addrinfo *));
+struct addrinfo *addr2addrinfo __P((const struct addrinfo *,
+ const char *));
+
+#if 0
+static const char *ai_errlist[] = {
+ "Success",
+ "Address family for hostname not supported", /*%< EAI_ADDRFAMILY */
+ "Temporary failure in name resolution", /*%< EAI_AGAIN */
+ "Invalid value for ai_flags", /*%< EAI_BADFLAGS */
+ "Non-recoverable failure in name resolution", /*%< EAI_FAIL */
+ "ai_family not supported", /*%< EAI_FAMILY */
+ "Memory allocation failure", /*%< EAI_MEMORY */
+ "No address associated with hostname", /*%< EAI_NODATA */
+ "hostname nor servname provided, or not known", /*%< EAI_NONAME */
+ "servname not supported for ai_socktype", /*%< EAI_SERVICE */
+ "ai_socktype not supported", /*%< EAI_SOCKTYPE */
+ "System error returned in errno", /*%< EAI_SYSTEM */
+ "Invalid value for hints", /*%< EAI_BADHINTS */
+ "Resolved protocol is unknown", /*%< EAI_PROTOCOL */
+ "Unknown error", /*%< EAI_MAX */
+};
+#endif
+
+/* XXX macros that make external reference is BAD. */
+
+#define GET_AI(ai, afd, addr) \
+do { \
+ /* external reference: pai, error, and label free */ \
+ (ai) = get_ai(pai, (afd), (addr)); \
+ if ((ai) == NULL) { \
+ error = EAI_MEMORY; \
+ goto free; \
+ } \
+} while (/*CONSTCOND*/0)
+
+#define GET_PORT(ai, serv) \
+do { \
+ /* external reference: error and label free */ \
+ error = get_port((ai), (serv), 0); \
+ if (error != 0) \
+ goto free; \
+} while (/*CONSTCOND*/0)
+
+#define GET_CANONNAME(ai, str) \
+do { \
+ /* external reference: pai, error and label free */ \
+ error = get_canonname(pai, (ai), (str)); \
+ if (error != 0) \
+ goto free; \
+} while (/*CONSTCOND*/0)
+
+#ifndef SOLARIS2
+#define SETERROR(err) \
+do { \
+ /* external reference: error, and label bad */ \
+ error = (err); \
+ goto bad; \
+ /*NOTREACHED*/ \
+} while (/*CONSTCOND*/0)
+#else
+#define SETERROR(err) \
+do { \
+ /* external reference: error, and label bad */ \
+ error = (err); \
+ if (error == error) \
+ goto bad; \
+} while (/*CONSTCOND*/0)
+#endif
+
+
+#define MATCH_FAMILY(x, y, w) \
+ ((x) == (y) || (/*CONSTCOND*/(w) && ((x) == PF_UNSPEC || (y) == PF_UNSPEC)))
+#define MATCH(x, y, w) \
+ ((x) == (y) || (/*CONSTCOND*/(w) && ((x) == ANY || (y) == ANY)))
+
+#if 0 /*%< bind8 has its own version */
+char *
+gai_strerror(ecode)
+ int ecode;
+{
+ if (ecode < 0 || ecode > EAI_MAX)
+ ecode = EAI_MAX;
+ return ai_errlist[ecode];
+}
+#endif
+
+void
+freeaddrinfo(ai)
+ struct addrinfo *ai;
+{
+ struct addrinfo *next;
+
+ do {
+ next = ai->ai_next;
+ if (ai->ai_canonname)
+ free(ai->ai_canonname);
+ /* no need to free(ai->ai_addr) */
+ free(ai);
+ ai = next;
+ } while (ai);
+}
+
+static int
+str_isnumber(p)
+ const char *p;
+{
+ char *ep;
+
+ if (*p == '\0')
+ return NO;
+ ep = NULL;
+ errno = 0;
+ (void)strtoul(p, &ep, 10);
+ if (errno == 0 && ep && *ep == '\0')
+ return YES;
+ else
+ return NO;
+}
+
+int
+getaddrinfo(hostname, servname, hints, res)
+ const char *hostname, *servname;
+ const struct addrinfo *hints;
+ struct addrinfo **res;
+{
+ struct addrinfo sentinel;
+ struct addrinfo *cur;
+ int error = 0;
+ struct addrinfo ai, ai0, *afai = NULL;
+ struct addrinfo *pai;
+ const struct explore *ex;
+
+ memset(&sentinel, 0, sizeof(sentinel));
+ cur = &sentinel;
+ pai = &ai;
+ pai->ai_flags = 0;
+ pai->ai_family = PF_UNSPEC;
+ pai->ai_socktype = ANY;
+ pai->ai_protocol = ANY;
+#if defined(sun) && defined(_SOCKLEN_T) && defined(__sparcv9)
+ /*
+ * clear _ai_pad to preserve binary
+ * compatibility with previously compiled 64-bit
+ * applications in a pre-SUSv3 environment by
+ * guaranteeing the upper 32-bits are empty.
+ */
+ pai->_ai_pad = 0;
+#endif
+ pai->ai_addrlen = 0;
+ pai->ai_canonname = NULL;
+ pai->ai_addr = NULL;
+ pai->ai_next = NULL;
+
+ if (hostname == NULL && servname == NULL)
+ return EAI_NONAME;
+ if (hints) {
+ /* error check for hints */
+ if (hints->ai_addrlen || hints->ai_canonname ||
+ hints->ai_addr || hints->ai_next)
+ SETERROR(EAI_BADHINTS); /*%< xxx */
+ if (hints->ai_flags & ~AI_MASK)
+ SETERROR(EAI_BADFLAGS);
+ switch (hints->ai_family) {
+ case PF_UNSPEC:
+ case PF_INET:
+ case PF_INET6:
+ break;
+ default:
+ SETERROR(EAI_FAMILY);
+ }
+ memcpy(pai, hints, sizeof(*pai));
+
+#if defined(sun) && defined(_SOCKLEN_T) && defined(__sparcv9)
+ /*
+ * We need to clear _ai_pad to preserve binary
+ * compatibility. See prior comment.
+ */
+ pai->_ai_pad = 0;
+#endif
+ /*
+ * if both socktype/protocol are specified, check if they
+ * are meaningful combination.
+ */
+ if (pai->ai_socktype != ANY && pai->ai_protocol != ANY) {
+ for (ex = explore; ex->e_af >= 0; ex++) {
+ if (pai->ai_family != ex->e_af)
+ continue;
+ if (ex->e_socktype == ANY)
+ continue;
+ if (ex->e_protocol == ANY)
+ continue;
+ if (pai->ai_socktype == ex->e_socktype &&
+ pai->ai_protocol != ex->e_protocol) {
+ SETERROR(EAI_BADHINTS);
+ }
+ }
+ }
+ }
+
+ /*
+ * post-2553: AI_ALL and AI_V4MAPPED are effective only against
+ * AF_INET6 query. They needs to be ignored if specified in other
+ * occassions.
+ */
+ switch (pai->ai_flags & (AI_ALL | AI_V4MAPPED)) {
+ case AI_V4MAPPED:
+ case AI_ALL | AI_V4MAPPED:
+ if (pai->ai_family != AF_INET6)
+ pai->ai_flags &= ~(AI_ALL | AI_V4MAPPED);
+ break;
+ case AI_ALL:
+#if 1
+ /* illegal */
+ SETERROR(EAI_BADFLAGS);
+#else
+ pai->ai_flags &= ~(AI_ALL | AI_V4MAPPED);
+ break;
+#endif
+ }
+
+ /*
+ * check for special cases. (1) numeric servname is disallowed if
+ * socktype/protocol are left unspecified. (2) servname is disallowed
+ * for raw and other inet{,6} sockets.
+ */
+ if (MATCH_FAMILY(pai->ai_family, PF_INET, 1)
+#ifdef PF_INET6
+ || MATCH_FAMILY(pai->ai_family, PF_INET6, 1)
+#endif
+ ) {
+ ai0 = *pai; /* backup *pai */
+
+ if (pai->ai_family == PF_UNSPEC) {
+#ifdef PF_INET6
+ pai->ai_family = PF_INET6;
+#else
+ pai->ai_family = PF_INET;
+#endif
+ }
+ error = get_portmatch(pai, servname);
+ if (error)
+ SETERROR(error);
+
+ *pai = ai0;
+ }
+
+ ai0 = *pai;
+
+ /* NULL hostname, or numeric hostname */
+ for (ex = explore; ex->e_af >= 0; ex++) {
+ *pai = ai0;
+
+ if (!MATCH_FAMILY(pai->ai_family, ex->e_af, WILD_AF(ex)))
+ continue;
+ if (!MATCH(pai->ai_socktype, ex->e_socktype, WILD_SOCKTYPE(ex)))
+ continue;
+ if (!MATCH(pai->ai_protocol, ex->e_protocol, WILD_PROTOCOL(ex)))
+ continue;
+
+ if (pai->ai_family == PF_UNSPEC)
+ pai->ai_family = ex->e_af;
+ if (pai->ai_socktype == ANY && ex->e_socktype != ANY)
+ pai->ai_socktype = ex->e_socktype;
+ if (pai->ai_protocol == ANY && ex->e_protocol != ANY)
+ pai->ai_protocol = ex->e_protocol;
+
+ /*
+ * if the servname does not match socktype/protocol, ignore it.
+ */
+ if (get_portmatch(pai, servname) != 0)
+ continue;
+
+ if (hostname == NULL) {
+ /*
+ * filter out AFs that are not supported by the kernel
+ * XXX errno?
+ */
+ if (!addrconfig(pai->ai_family))
+ continue;
+ error = explore_null(pai, servname, &cur->ai_next);
+ } else
+ error = explore_numeric_scope(pai, hostname, servname,
+ &cur->ai_next);
+
+ if (error)
+ goto free;
+
+ while (cur && cur->ai_next)
+ cur = cur->ai_next;
+ }
+
+ /*
+ * XXX
+ * If numreic representation of AF1 can be interpreted as FQDN
+ * representation of AF2, we need to think again about the code below.
+ */
+ if (sentinel.ai_next)
+ goto good;
+
+ if (pai->ai_flags & AI_NUMERICHOST)
+ SETERROR(EAI_NONAME);
+ if (hostname == NULL)
+ SETERROR(EAI_NONAME);
+
+ /*
+ * hostname as alphabetical name.
+ * We'll make sure that
+ * - if returning addrinfo list is empty, return non-zero error
+ * value (already known one or EAI_NONAME).
+ * - otherwise,
+ * + if we haven't had any errors, return 0 (i.e. success).
+ * + if we've had an error, free the list and return the error.
+ * without any assumption on the behavior of explore_fqdn().
+ */
+
+ /* first, try to query DNS for all possible address families. */
+ *pai = ai0;
+ error = explore_fqdn(pai, hostname, servname, &afai);
+ if (error) {
+ if (afai != NULL)
+ freeaddrinfo(afai);
+ goto free;
+ }
+ if (afai == NULL) {
+ error = EAI_NONAME; /*%< we've had no errors. */
+ goto free;
+ }
+
+ /*
+ * we would like to prefer AF_INET6 than AF_INET, so we'll make an
+ * outer loop by AFs.
+ */
+ for (ex = explore; ex->e_af >= 0; ex++) {
+ *pai = ai0;
+
+ if (pai->ai_family == PF_UNSPEC)
+ pai->ai_family = ex->e_af;
+
+ if (!MATCH_FAMILY(pai->ai_family, ex->e_af, WILD_AF(ex)))
+ continue;
+ if (!MATCH(pai->ai_socktype, ex->e_socktype,
+ WILD_SOCKTYPE(ex))) {
+ continue;
+ }
+ if (!MATCH(pai->ai_protocol, ex->e_protocol,
+ WILD_PROTOCOL(ex))) {
+ continue;
+ }
+
+#ifdef AI_ADDRCONFIG
+ /*
+ * If AI_ADDRCONFIG is specified, check if we are
+ * expected to return the address family or not.
+ */
+ if ((pai->ai_flags & AI_ADDRCONFIG) != 0 &&
+ !addrconfig(pai->ai_family))
+ continue;
+#endif
+
+ if (pai->ai_family == PF_UNSPEC)
+ pai->ai_family = ex->e_af;
+ if (pai->ai_socktype == ANY && ex->e_socktype != ANY)
+ pai->ai_socktype = ex->e_socktype;
+ if (pai->ai_protocol == ANY && ex->e_protocol != ANY)
+ pai->ai_protocol = ex->e_protocol;
+
+ /*
+ * if the servname does not match socktype/protocol, ignore it.
+ */
+ if (get_portmatch(pai, servname) != 0)
+ continue;
+
+ if ((error = explore_copy(pai, afai, &cur->ai_next)) != 0) {
+ freeaddrinfo(afai);
+ goto free;
+ }
+
+ while (cur && cur->ai_next)
+ cur = cur->ai_next;
+ }
+
+ freeaddrinfo(afai); /*%< afai must not be NULL at this point. */
+
+ if (sentinel.ai_next) {
+good:
+ *res = sentinel.ai_next;
+ return(SUCCESS);
+ } else {
+ /*
+ * All the process succeeded, but we've had an empty list.
+ * This can happen if the given hints do not match our
+ * candidates.
+ */
+ error = EAI_NONAME;
+ }
+
+free:
+bad:
+ if (sentinel.ai_next)
+ freeaddrinfo(sentinel.ai_next);
+ *res = NULL;
+ return(error);
+}
+
+/*%
+ * FQDN hostname, DNS lookup
+ */
+static int
+explore_fqdn(pai, hostname, servname, res)
+ const struct addrinfo *pai;
+ const char *hostname;
+ const char *servname;
+ struct addrinfo **res;
+{
+ struct addrinfo *result;
+ struct addrinfo *cur;
+ struct net_data *net_data = init();
+ struct irs_ho *ho;
+ int error = 0;
+ char tmp[NS_MAXDNAME];
+ const char *cp;
+
+ INSIST(res != NULL && *res == NULL);
+
+ /*
+ * if the servname does not match socktype/protocol, ignore it.
+ */
+ if (get_portmatch(pai, servname) != 0)
+ return(0);
+
+ if (!net_data || !(ho = net_data->ho))
+ return(0);
+#if 0 /*%< XXX (notyet) */
+ if (net_data->ho_stayopen && net_data->ho_last &&
+ net_data->ho_last->h_addrtype == af) {
+ if (ns_samename(name, net_data->ho_last->h_name) == 1)
+ return (net_data->ho_last);
+ for (hap = net_data->ho_last->h_aliases; hap && *hap; hap++)
+ if (ns_samename(name, *hap) == 1)
+ return (net_data->ho_last);
+ }
+#endif
+ if (!strchr(hostname, '.') &&
+ (cp = res_hostalias(net_data->res, hostname,
+ tmp, sizeof(tmp))))
+ hostname = cp;
+ result = (*ho->addrinfo)(ho, hostname, pai);
+ if (!net_data->ho_stayopen) {
+ (*ho->minimize)(ho);
+ }
+ if (result == NULL) {
+ int e = h_errno;
+
+ switch(e) {
+ case NETDB_INTERNAL:
+ error = EAI_SYSTEM;
+ break;
+ case TRY_AGAIN:
+ error = EAI_AGAIN;
+ break;
+ case NO_RECOVERY:
+ error = EAI_FAIL;
+ break;
+ case HOST_NOT_FOUND:
+ case NO_DATA:
+ error = EAI_NONAME;
+ break;
+ default:
+ case NETDB_SUCCESS: /*%< should be impossible... */
+ error = EAI_NONAME;
+ break;
+ }
+ goto free;
+ }
+
+ for (cur = result; cur; cur = cur->ai_next) {
+ GET_PORT(cur, servname); /*%< XXX: redundant lookups... */
+ /* canonname should already be filled. */
+ }
+
+ *res = result;
+
+ return(0);
+
+free:
+ if (result)
+ freeaddrinfo(result);
+ return error;
+}
+
+static int
+explore_copy(pai, src0, res)
+ const struct addrinfo *pai; /*%< seed */
+ const struct addrinfo *src0; /*%< source */
+ struct addrinfo **res;
+{
+ int error;
+ struct addrinfo sentinel, *cur;
+ const struct addrinfo *src;
+
+ error = 0;
+ sentinel.ai_next = NULL;
+ cur = &sentinel;
+
+ for (src = src0; src != NULL; src = src->ai_next) {
+ if (src->ai_family != pai->ai_family)
+ continue;
+
+ cur->ai_next = copy_ai(src);
+ if (!cur->ai_next) {
+ error = EAI_MEMORY;
+ goto fail;
+ }
+
+ cur->ai_next->ai_socktype = pai->ai_socktype;
+ cur->ai_next->ai_protocol = pai->ai_protocol;
+ cur = cur->ai_next;
+ }
+
+ *res = sentinel.ai_next;
+ return 0;
+
+fail:
+ freeaddrinfo(sentinel.ai_next);
+ return error;
+}
+
+/*%
+ * hostname == NULL.
+ * passive socket -> anyaddr (0.0.0.0 or ::)
+ * non-passive socket -> localhost (127.0.0.1 or ::1)
+ */
+static int
+explore_null(pai, servname, res)
+ const struct addrinfo *pai;
+ const char *servname;
+ struct addrinfo **res;
+{
+ const struct afd *afd;
+ struct addrinfo *cur;
+ struct addrinfo sentinel;
+ int error;
+
+ *res = NULL;
+ sentinel.ai_next = NULL;
+ cur = &sentinel;
+
+ afd = find_afd(pai->ai_family);
+ if (afd == NULL)
+ return 0;
+
+ if (pai->ai_flags & AI_PASSIVE) {
+ GET_AI(cur->ai_next, afd, afd->a_addrany);
+ /* xxx meaningless?
+ * GET_CANONNAME(cur->ai_next, "anyaddr");
+ */
+ GET_PORT(cur->ai_next, servname);
+ } else {
+ GET_AI(cur->ai_next, afd, afd->a_loopback);
+ /* xxx meaningless?
+ * GET_CANONNAME(cur->ai_next, "localhost");
+ */
+ GET_PORT(cur->ai_next, servname);
+ }
+ cur = cur->ai_next;
+
+ *res = sentinel.ai_next;
+ return 0;
+
+free:
+ if (sentinel.ai_next)
+ freeaddrinfo(sentinel.ai_next);
+ return error;
+}
+
+/*%
+ * numeric hostname
+ */
+static int
+explore_numeric(pai, hostname, servname, res)
+ const struct addrinfo *pai;
+ const char *hostname;
+ const char *servname;
+ struct addrinfo **res;
+{
+ const struct afd *afd;
+ struct addrinfo *cur;
+ struct addrinfo sentinel;
+ int error;
+ char pton[PTON_MAX];
+
+ *res = NULL;
+ sentinel.ai_next = NULL;
+ cur = &sentinel;
+
+ afd = find_afd(pai->ai_family);
+ if (afd == NULL)
+ return 0;
+
+ switch (afd->a_af) {
+#if 0 /*X/Open spec*/
+ case AF_INET:
+ if (inet_aton(hostname, (struct in_addr *)pton) == 1) {
+ if (pai->ai_family == afd->a_af ||
+ pai->ai_family == PF_UNSPEC /*?*/) {
+ GET_AI(cur->ai_next, afd, pton);
+ GET_PORT(cur->ai_next, servname);
+ while (cur->ai_next)
+ cur = cur->ai_next;
+ } else
+ SETERROR(EAI_FAMILY); /*xxx*/
+ }
+ break;
+#endif
+ default:
+ if (inet_pton(afd->a_af, hostname, pton) == 1) {
+ if (pai->ai_family == afd->a_af ||
+ pai->ai_family == PF_UNSPEC /*?*/) {
+ GET_AI(cur->ai_next, afd, pton);
+ GET_PORT(cur->ai_next, servname);
+ while (cur->ai_next)
+ cur = cur->ai_next;
+ } else
+ SETERROR(EAI_FAMILY); /*xxx*/
+ }
+ break;
+ }
+
+ *res = sentinel.ai_next;
+ return 0;
+
+free:
+bad:
+ if (sentinel.ai_next)
+ freeaddrinfo(sentinel.ai_next);
+ return error;
+}
+
+/*%
+ * numeric hostname with scope
+ */
+static int
+explore_numeric_scope(pai, hostname, servname, res)
+ const struct addrinfo *pai;
+ const char *hostname;
+ const char *servname;
+ struct addrinfo **res;
+{
+#ifndef SCOPE_DELIMITER
+ return explore_numeric(pai, hostname, servname, res);
+#else
+ const struct afd *afd;
+ struct addrinfo *cur;
+ int error;
+ char *cp, *hostname2 = NULL, *scope, *addr;
+ struct sockaddr_in6 *sin6;
+
+ afd = find_afd(pai->ai_family);
+ if (afd == NULL)
+ return 0;
+
+ if (!afd->a_scoped)
+ return explore_numeric(pai, hostname, servname, res);
+
+ cp = strchr(hostname, SCOPE_DELIMITER);
+ if (cp == NULL)
+ return explore_numeric(pai, hostname, servname, res);
+
+ /*
+ * Handle special case of <scoped_address><delimiter><scope id>
+ */
+ hostname2 = strdup(hostname);
+ if (hostname2 == NULL)
+ return EAI_MEMORY;
+ /* terminate at the delimiter */
+ hostname2[cp - hostname] = '\0';
+ addr = hostname2;
+ scope = cp + 1;
+
+ error = explore_numeric(pai, addr, servname, res);
+ if (error == 0) {
+ u_int32_t scopeid = 0;
+
+ for (cur = *res; cur; cur = cur->ai_next) {
+ if (cur->ai_family != AF_INET6)
+ continue;
+ sin6 = (struct sockaddr_in6 *)(void *)cur->ai_addr;
+ if (!ip6_str2scopeid(scope, sin6, &scopeid)) {
+ free(hostname2);
+ return(EAI_NONAME); /*%< XXX: is return OK? */
+ }
+#ifdef HAVE_SIN6_SCOPE_ID
+ sin6->sin6_scope_id = scopeid;
+#endif
+ }
+ }
+
+ free(hostname2);
+
+ return error;
+#endif
+}
+
+static int
+get_canonname(pai, ai, str)
+ const struct addrinfo *pai;
+ struct addrinfo *ai;
+ const char *str;
+{
+ if ((pai->ai_flags & AI_CANONNAME) != 0) {
+ ai->ai_canonname = (char *)malloc(strlen(str) + 1);
+ if (ai->ai_canonname == NULL)
+ return EAI_MEMORY;
+ strcpy(ai->ai_canonname, str);
+ }
+ return 0;
+}
+
+static struct addrinfo *
+get_ai(pai, afd, addr)
+ const struct addrinfo *pai;
+ const struct afd *afd;
+ const char *addr;
+{
+ char *p;
+ struct addrinfo *ai;
+
+ ai = (struct addrinfo *)malloc(sizeof(struct addrinfo)
+ + (afd->a_socklen));
+ if (ai == NULL)
+ return NULL;
+
+ memcpy(ai, pai, sizeof(struct addrinfo));
+ ai->ai_addr = (struct sockaddr *)(void *)(ai + 1);
+ memset(ai->ai_addr, 0, (size_t)afd->a_socklen);
+#ifdef HAVE_SA_LEN
+ ai->ai_addr->sa_len = afd->a_socklen;
+#endif
+ ai->ai_addrlen = afd->a_socklen;
+ ai->ai_addr->sa_family = ai->ai_family = afd->a_af;
+ p = (char *)(void *)(ai->ai_addr);
+ memcpy(p + afd->a_off, addr, (size_t)afd->a_addrlen);
+ return ai;
+}
+
+/* XXX need to malloc() the same way we do from other functions! */
+static struct addrinfo *
+copy_ai(pai)
+ const struct addrinfo *pai;
+{
+ struct addrinfo *ai;
+ size_t l;
+
+ l = sizeof(*ai) + pai->ai_addrlen;
+ if ((ai = (struct addrinfo *)malloc(l)) == NULL)
+ return NULL;
+ memset(ai, 0, l);
+ memcpy(ai, pai, sizeof(*ai));
+ ai->ai_addr = (struct sockaddr *)(void *)(ai + 1);
+ memcpy(ai->ai_addr, pai->ai_addr, pai->ai_addrlen);
+
+ if (pai->ai_canonname) {
+ l = strlen(pai->ai_canonname) + 1;
+ if ((ai->ai_canonname = malloc(l)) == NULL) {
+ free(ai);
+ return NULL;
+ }
+ strcpy(ai->ai_canonname, pai->ai_canonname); /* (checked) */
+ } else {
+ /* just to make sure */
+ ai->ai_canonname = NULL;
+ }
+
+ ai->ai_next = NULL;
+
+ return ai;
+}
+
+static int
+get_portmatch(const struct addrinfo *ai, const char *servname) {
+
+ /* get_port does not touch first argument. when matchonly == 1. */
+ /* LINTED const cast */
+ return get_port((const struct addrinfo *)ai, servname, 1);
+}
+
+static int
+get_port(const struct addrinfo *ai, const char *servname, int matchonly) {
+ const char *proto;
+ struct servent *sp;
+ int port;
+ int allownumeric;
+
+ if (servname == NULL)
+ return 0;
+ switch (ai->ai_family) {
+ case AF_INET:
+#ifdef AF_INET6
+ case AF_INET6:
+#endif
+ break;
+ default:
+ return 0;
+ }
+
+ switch (ai->ai_socktype) {
+ case SOCK_RAW:
+ return EAI_SERVICE;
+ case SOCK_DGRAM:
+ case SOCK_STREAM:
+ allownumeric = 1;
+ break;
+ case ANY:
+ switch (ai->ai_family) {
+ case AF_INET:
+#ifdef AF_INET6
+ case AF_INET6:
+#endif
+ allownumeric = 1;
+ break;
+ default:
+ allownumeric = 0;
+ break;
+ }
+ break;
+ default:
+ return EAI_SOCKTYPE;
+ }
+
+ if (str_isnumber(servname)) {
+ if (!allownumeric)
+ return EAI_SERVICE;
+ port = atoi(servname);
+ if (port < 0 || port > 65535)
+ return EAI_SERVICE;
+ port = htons(port);
+ } else {
+ switch (ai->ai_socktype) {
+ case SOCK_DGRAM:
+ proto = "udp";
+ break;
+ case SOCK_STREAM:
+ proto = "tcp";
+ break;
+ default:
+ proto = NULL;
+ break;
+ }
+
+ if ((sp = getservbyname(servname, proto)) == NULL)
+ return EAI_SERVICE;
+ port = sp->s_port;
+ }
+
+ if (!matchonly) {
+ switch (ai->ai_family) {
+ case AF_INET:
+ ((struct sockaddr_in *)(void *)
+ ai->ai_addr)->sin_port = port;
+ break;
+ case AF_INET6:
+ ((struct sockaddr_in6 *)(void *)
+ ai->ai_addr)->sin6_port = port;
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static const struct afd *
+find_afd(af)
+ int af;
+{
+ const struct afd *afd;
+
+ if (af == PF_UNSPEC)
+ return NULL;
+ for (afd = afdl; afd->a_af; afd++) {
+ if (afd->a_af == af)
+ return afd;
+ }
+ return NULL;
+}
+
+/*%
+ * post-2553: AI_ADDRCONFIG check. if we use getipnodeby* as backend, backend
+ * will take care of it.
+ * the semantics of AI_ADDRCONFIG is not defined well. we are not sure
+ * if the code is right or not.
+ */
+static int
+addrconfig(af)
+ int af;
+{
+ int s;
+
+ /* XXX errno */
+ s = socket(af, SOCK_DGRAM, 0);
+ if (s < 0) {
+ if (errno != EMFILE)
+ return 0;
+ } else
+ close(s);
+ return 1;
+}
+
+/* convert a string to a scope identifier. XXX: IPv6 specific */
+static int
+ip6_str2scopeid(char *scope, struct sockaddr_in6 *sin6,
+ u_int32_t *scopeidp)
+{
+ u_int32_t scopeid;
+ u_long lscopeid;
+ struct in6_addr *a6 = &sin6->sin6_addr;
+ char *ep;
+
+ /* empty scopeid portion is invalid */
+ if (*scope == '\0')
+ return (0);
+
+#ifdef USE_IFNAMELINKID
+ if (IN6_IS_ADDR_LINKLOCAL(a6) || IN6_IS_ADDR_MC_LINKLOCAL(a6) ||
+ IN6_IS_ADDR_MC_NODELOCAL(a6)) {
+ /*
+ * Using interface names as link indices can be allowed
+ * only when we can assume a one-to-one mappings between
+ * links and interfaces. See comments in getnameinfo.c.
+ */
+ scopeid = if_nametoindex(scope);
+ if (scopeid == 0)
+ goto trynumeric;
+ *scopeidp = scopeid;
+ return (1);
+ }
+#endif
+
+ /* still unclear about literal, allow numeric only - placeholder */
+ if (IN6_IS_ADDR_SITELOCAL(a6) || IN6_IS_ADDR_MC_SITELOCAL(a6))
+ goto trynumeric;
+ if (IN6_IS_ADDR_MC_ORGLOCAL(a6))
+ goto trynumeric;
+ else
+ goto trynumeric; /*%< global */
+ /* try to convert to a numeric id as a last resort */
+trynumeric:
+ errno = 0;
+ lscopeid = strtoul(scope, &ep, 10);
+ scopeid = lscopeid & 0xffffffff;
+ if (errno == 0 && ep && *ep == '\0' && scopeid == lscopeid) {
+ *scopeidp = scopeid;
+ return (1);
+ } else
+ return (0);
+}
+
+struct addrinfo *
+hostent2addrinfo(hp, pai)
+ struct hostent *hp;
+ const struct addrinfo *pai;
+{
+ int i, af, error = 0;
+ char **aplist = NULL, *ap;
+ struct addrinfo sentinel, *cur;
+ const struct afd *afd;
+
+ af = hp->h_addrtype;
+ if (pai->ai_family != AF_UNSPEC && af != pai->ai_family)
+ return(NULL);
+
+ afd = find_afd(af);
+ if (afd == NULL)
+ return(NULL);
+
+ aplist = hp->h_addr_list;
+
+ memset(&sentinel, 0, sizeof(sentinel));
+ cur = &sentinel;
+
+ for (i = 0; (ap = aplist[i]) != NULL; i++) {
+#if 0 /*%< the trick seems too much */
+ af = hp->h_addr_list;
+ if (af == AF_INET6 &&
+ IN6_IS_ADDR_V4MAPPED((struct in6_addr *)ap)) {
+ af = AF_INET;
+ ap = ap + sizeof(struct in6_addr)
+ - sizeof(struct in_addr);
+ }
+ afd = find_afd(af);
+ if (afd == NULL)
+ continue;
+#endif /* 0 */
+
+ GET_AI(cur->ai_next, afd, ap);
+
+ /* GET_PORT(cur->ai_next, servname); */
+ if ((pai->ai_flags & AI_CANONNAME) != 0) {
+ /*
+ * RFC2553 says that ai_canonname will be set only for
+ * the first element. we do it for all the elements,
+ * just for convenience.
+ */
+ GET_CANONNAME(cur->ai_next, hp->h_name);
+ }
+ while (cur->ai_next) /*%< no need to loop, actually. */
+ cur = cur->ai_next;
+ continue;
+
+ free:
+ if (cur->ai_next)
+ freeaddrinfo(cur->ai_next);
+ cur->ai_next = NULL;
+ /* continue, without tht pointer CUR advanced. */
+ }
+
+ return(sentinel.ai_next);
+}
+
+struct addrinfo *
+addr2addrinfo(pai, cp)
+ const struct addrinfo *pai;
+ const char *cp;
+{
+ const struct afd *afd;
+
+ afd = find_afd(pai->ai_family);
+ if (afd == NULL)
+ return(NULL);
+
+ return(get_ai(pai, afd, cp));
+}
+
+static struct net_data *
+init()
+{
+ struct net_data *net_data;
+
+ if (!(net_data = net_data_init(NULL)))
+ goto error;
+ if (!net_data->ho) {
+ net_data->ho = (*net_data->irs->ho_map)(net_data->irs);
+ if (!net_data->ho || !net_data->res) {
+error:
+ errno = EIO;
+ if (net_data && net_data->res)
+ RES_SET_H_ERRNO(net_data->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+
+ (*net_data->ho->res_set)(net_data->ho, net_data->res, NULL);
+ }
+
+ return (net_data);
+}
diff --git a/usr/src/lib/libresolv2_joy/common/irs/gethostent.c b/usr/src/lib/libresolv2_joy/common/irs/gethostent.c
new file mode 100644
index 0000000000..c4751c2c80
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/gethostent.c
@@ -0,0 +1,1096 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: gethostent.c,v 1.8 2006/01/10 05:06:00 marka Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#if !defined(__BIND_NOSTATIC)
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <netinet/in.h>
+#include <net/if.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <irs.h>
+#include <isc/memcluster.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "irs_data.h"
+
+/* Definitions */
+
+struct pvt {
+ char * aliases[1];
+ char * addrs[2];
+ char addr[NS_IN6ADDRSZ];
+ char name[NS_MAXDNAME + 1];
+ struct hostent host;
+};
+
+/* Forward */
+
+static struct net_data *init(void);
+static void freepvt(struct net_data *);
+static struct hostent *fakeaddr(const char *, int, struct net_data *);
+
+#ifdef SUNW_OVERRIDE_RETRY
+extern int __res_retry(int);
+extern int __res_retry_reset(void);
+#endif /* SUNW_OVERRIDE_RETRY */
+
+
+/* Public */
+
+struct hostent *
+gethostbyname(const char *name) {
+ struct net_data *net_data = init();
+
+ return (gethostbyname_p(name, net_data));
+}
+
+struct hostent *
+gethostbyname2(const char *name, int af) {
+ struct net_data *net_data = init();
+
+ return (gethostbyname2_p(name, af, net_data));
+}
+
+struct hostent *
+gethostbyaddr(const char *addr, int len, int af) {
+ struct net_data *net_data = init();
+
+ return (gethostbyaddr_p(addr, len, af, net_data));
+}
+
+struct hostent *
+gethostent() {
+ struct net_data *net_data = init();
+
+ return (gethostent_p(net_data));
+}
+
+void
+sethostent(int stayopen) {
+ struct net_data *net_data = init();
+ sethostent_p(stayopen, net_data);
+}
+
+
+void
+endhostent() {
+ struct net_data *net_data = init();
+ endhostent_p(net_data);
+}
+
+/* Shared private. */
+
+struct hostent *
+gethostbyname_p(const char *name, struct net_data *net_data) {
+ struct hostent *hp;
+
+ if (!net_data)
+ return (NULL);
+
+ if (net_data->res->options & RES_USE_INET6) {
+ hp = gethostbyname2_p(name, AF_INET6, net_data);
+ if (hp)
+ return (hp);
+ }
+ return (gethostbyname2_p(name, AF_INET, net_data));
+}
+
+struct hostent *
+gethostbyname2_p(const char *name, int af, struct net_data *net_data) {
+ struct irs_ho *ho;
+ char tmp[NS_MAXDNAME];
+ struct hostent *hp;
+ const char *cp;
+ char **hap;
+
+ if (!net_data || !(ho = net_data->ho))
+ return (NULL);
+ if (net_data->ho_stayopen && net_data->ho_last &&
+ net_data->ho_last->h_addrtype == af) {
+ if (ns_samename(name, net_data->ho_last->h_name) == 1)
+ return (net_data->ho_last);
+ for (hap = net_data->ho_last->h_aliases; hap && *hap; hap++)
+ if (ns_samename(name, *hap) == 1)
+ return (net_data->ho_last);
+ }
+ if (!strchr(name, '.') && (cp = res_hostalias(net_data->res, name,
+ tmp, sizeof tmp)))
+ name = cp;
+ if ((hp = fakeaddr(name, af, net_data)) != NULL)
+ return (hp);
+#ifdef SUNW_OVERRIDE_RETRY
+ net_data->res->retry = __res_retry(net_data->res->retry);
+#endif /* SUNW_OVERRIDE_RETRY */
+ net_data->ho_last = (*ho->byname2)(ho, name, af);
+#ifdef SUNW_OVERRIDE_RETRY
+ net_data->res->retry = __res_retry_reset();
+#endif /* SUNW_OVERRIDE_RETRY */
+ if (!net_data->ho_stayopen)
+ endhostent();
+ return (net_data->ho_last);
+}
+
+struct hostent *
+gethostbyaddr_p(const char *addr, int len, int af, struct net_data *net_data) {
+ struct irs_ho *ho;
+ char **hap;
+
+ if (!net_data || !(ho = net_data->ho))
+ return (NULL);
+ if (net_data->ho_stayopen && net_data->ho_last &&
+ net_data->ho_last->h_length == len)
+ for (hap = net_data->ho_last->h_addr_list;
+ hap && *hap;
+ hap++)
+ if (!memcmp(addr, *hap, len))
+ return (net_data->ho_last);
+ net_data->ho_last = (*ho->byaddr)(ho, addr, len, af);
+ if (!net_data->ho_stayopen)
+ endhostent();
+ return (net_data->ho_last);
+}
+
+
+struct hostent *
+gethostent_p(struct net_data *net_data) {
+ struct irs_ho *ho;
+ struct hostent *hp;
+
+ if (!net_data || !(ho = net_data->ho))
+ return (NULL);
+ while ((hp = (*ho->next)(ho)) != NULL &&
+ hp->h_addrtype == AF_INET6 &&
+ (net_data->res->options & RES_USE_INET6) == 0U)
+ continue;
+ net_data->ho_last = hp;
+ return (net_data->ho_last);
+}
+
+
+void
+sethostent_p(int stayopen, struct net_data *net_data) {
+ struct irs_ho *ho;
+
+ if (!net_data || !(ho = net_data->ho))
+ return;
+ freepvt(net_data);
+ (*ho->rewind)(ho);
+ net_data->ho_stayopen = (stayopen != 0);
+ if (stayopen == 0)
+ net_data_minimize(net_data);
+}
+
+void
+endhostent_p(struct net_data *net_data) {
+ struct irs_ho *ho;
+
+ if ((net_data != NULL) && ((ho = net_data->ho) != NULL))
+ (*ho->minimize)(ho);
+}
+
+#ifndef IN6_IS_ADDR_V4COMPAT
+static const unsigned char in6addr_compat[12] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+#define IN6_IS_ADDR_V4COMPAT(x) (!memcmp((x)->s6_addr, in6addr_compat, 12) && \
+ ((x)->s6_addr[12] != 0 || \
+ (x)->s6_addr[13] != 0 || \
+ (x)->s6_addr[14] != 0 || \
+ ((x)->s6_addr[15] != 0 && \
+ (x)->s6_addr[15] != 1)))
+#endif
+#ifndef IN6_IS_ADDR_V4MAPPED
+#define IN6_IS_ADDR_V4MAPPED(x) (!memcmp((x)->s6_addr, in6addr_mapped, 12))
+#endif
+
+static const unsigned char in6addr_mapped[12] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff };
+
+static int scan_interfaces(int *, int *);
+static struct hostent *copyandmerge(struct hostent *, struct hostent *, int, int *);
+
+/*%
+ * Public functions
+ */
+
+/*%
+ * AI_V4MAPPED + AF_INET6
+ * If no IPv6 address then a query for IPv4 and map returned values.
+ *
+ * AI_ALL + AI_V4MAPPED + AF_INET6
+ * Return IPv6 and IPv4 mapped.
+ *
+ * AI_ADDRCONFIG
+ * Only return IPv6 / IPv4 address if there is an interface of that
+ * type active.
+ */
+
+struct hostent *
+getipnodebyname(const char *name, int af, int flags, int *error_num) {
+ int have_v4 = 1, have_v6 = 1;
+ struct in_addr in4;
+ struct in6_addr in6;
+ struct hostent he, *he1 = NULL, *he2 = NULL, *he3;
+ int v4 = 0, v6 = 0;
+ struct net_data *net_data = init();
+ u_long options;
+ int tmp_err;
+
+ if (net_data == NULL) {
+ *error_num = NO_RECOVERY;
+ return (NULL);
+ }
+
+ /* If we care about active interfaces then check. */
+ if ((flags & AI_ADDRCONFIG) != 0)
+ if (scan_interfaces(&have_v4, &have_v6) == -1) {
+ *error_num = NO_RECOVERY;
+ return (NULL);
+ }
+
+ /* Check for literal address. */
+ if ((v4 = inet_pton(AF_INET, name, &in4)) != 1)
+ v6 = inet_pton(AF_INET6, name, &in6);
+
+ /* Impossible combination? */
+
+ if ((af == AF_INET6 && (flags & AI_V4MAPPED) == 0 && v4 == 1) ||
+ (af == AF_INET && v6 == 1) ||
+ (have_v4 == 0 && v4 == 1) ||
+ (have_v6 == 0 && v6 == 1) ||
+ (have_v4 == 0 && af == AF_INET) ||
+ (have_v6 == 0 && af == AF_INET6)) {
+ *error_num = HOST_NOT_FOUND;
+ return (NULL);
+ }
+
+ /* Literal address? */
+ if (v4 == 1 || v6 == 1) {
+ char *addr_list[2];
+ char *aliases[1];
+
+ DE_CONST(name, he.h_name);
+ he.h_addr_list = addr_list;
+ he.h_addr_list[0] = (v4 == 1) ? (char *)&in4 : (char *)&in6;
+ he.h_addr_list[1] = NULL;
+ he.h_aliases = aliases;
+ he.h_aliases[0] = NULL;
+ he.h_length = (v4 == 1) ? INADDRSZ : IN6ADDRSZ;
+ he.h_addrtype = (v4 == 1) ? AF_INET : AF_INET6;
+ return (copyandmerge(&he, NULL, af, error_num));
+ }
+
+ options = net_data->res->options;
+ net_data->res->options &= ~RES_USE_INET6;
+
+ tmp_err = NO_RECOVERY;
+ if (have_v6 && af == AF_INET6) {
+ he2 = gethostbyname2_p(name, AF_INET6, net_data);
+ if (he2 != NULL) {
+ he1 = copyandmerge(he2, NULL, af, error_num);
+ if (he1 == NULL)
+ return (NULL);
+ he2 = NULL;
+ } else {
+ tmp_err = net_data->res->res_h_errno;
+ }
+ }
+
+ if (have_v4 &&
+ ((af == AF_INET) ||
+ (af == AF_INET6 && (flags & AI_V4MAPPED) != 0 &&
+ (he1 == NULL || (flags & AI_ALL) != 0)))) {
+ he2 = gethostbyname2_p(name, AF_INET, net_data);
+ if (he1 == NULL && he2 == NULL) {
+ *error_num = net_data->res->res_h_errno;
+ return (NULL);
+ }
+ } else
+ *error_num = tmp_err;
+
+ net_data->res->options = options;
+
+ he3 = copyandmerge(he1, he2, af, error_num);
+
+ if (he1 != NULL)
+ freehostent(he1);
+ return (he3);
+}
+
+struct hostent *
+getipnodebyaddr(const void *src, size_t len, int af, int *error_num) {
+ struct hostent *he1, *he2;
+ struct net_data *net_data = init();
+
+ /* Sanity Checks. */
+#ifdef ORIGINAL_ISC_CODE
+ if (src == NULL) {
+#else
+ /* this change was added circa May 2009, but not in ISC libbind 6.0 */
+ if (src == NULL|| net_data == NULL) {
+#endif /* ORIGINAL_ISC_CODE */
+ *error_num = NO_RECOVERY;
+ return (NULL);
+ }
+
+ switch (af) {
+ case AF_INET:
+ if (len != (size_t)INADDRSZ) {
+ *error_num = NO_RECOVERY;
+ return (NULL);
+ }
+ break;
+ case AF_INET6:
+ if (len != (size_t)IN6ADDRSZ) {
+ *error_num = NO_RECOVERY;
+ return (NULL);
+ }
+ break;
+ default:
+ *error_num = NO_RECOVERY;
+ return (NULL);
+ }
+
+ /*
+ * Lookup IPv4 and IPv4 mapped/compatible addresses
+ */
+ if ((af == AF_INET6 &&
+ IN6_IS_ADDR_V4COMPAT((const struct in6_addr *)src)) ||
+ (af == AF_INET6 &&
+ IN6_IS_ADDR_V4MAPPED((const struct in6_addr *)src)) ||
+ (af == AF_INET)) {
+ const char *cp = src;
+
+ if (af == AF_INET6)
+ cp += 12;
+ he1 = gethostbyaddr_p(cp, 4, AF_INET, net_data);
+ if (he1 == NULL) {
+ *error_num = net_data->res->res_h_errno;
+ return (NULL);
+ }
+ he2 = copyandmerge(he1, NULL, af, error_num);
+ if (he2 == NULL)
+ return (NULL);
+ /*
+ * Restore original address if mapped/compatible.
+ */
+ if (af == AF_INET6)
+ memcpy(he1->h_addr, src, len);
+ return (he2);
+ }
+
+ /*
+ * Lookup IPv6 address.
+ */
+ if (memcmp((const struct in6_addr *)src, &in6addr_any, 16) == 0) {
+ *error_num = HOST_NOT_FOUND;
+ return (NULL);
+ }
+
+ he1 = gethostbyaddr_p(src, 16, AF_INET6, net_data);
+ if (he1 == NULL) {
+ *error_num = net_data->res->res_h_errno;
+ return (NULL);
+ }
+ return (copyandmerge(he1, NULL, af, error_num));
+}
+
+void
+freehostent(struct hostent *he) {
+ char **cpp;
+ int names = 1;
+ int addresses = 1;
+
+ memput(he->h_name, strlen(he->h_name) + 1);
+
+ cpp = he->h_addr_list;
+ while (*cpp != NULL) {
+ memput(*cpp, (he->h_addrtype == AF_INET) ?
+ INADDRSZ : IN6ADDRSZ);
+ *cpp = NULL;
+ cpp++;
+ addresses++;
+ }
+
+ cpp = he->h_aliases;
+ while (*cpp != NULL) {
+ memput(*cpp, strlen(*cpp) + 1);
+ cpp++;
+ names++;
+ }
+
+ memput(he->h_aliases, sizeof(char *) * (names));
+ memput(he->h_addr_list, sizeof(char *) * (addresses));
+ memput(he, sizeof *he);
+}
+
+/*%
+ * Private
+ */
+
+/*%
+ * Scan the interface table and set have_v4 and have_v6 depending
+ * upon whether there are IPv4 and IPv6 interface addresses.
+ *
+ * Returns:
+ * 0 on success
+ * -1 on failure.
+ */
+
+#if defined(SIOCGLIFCONF) && defined(SIOCGLIFADDR) && \
+ !defined(IRIX_EMUL_IOCTL_SIOCGIFCONF)
+
+#ifdef __hpux
+#define lifc_len iflc_len
+#define lifc_buf iflc_buf
+#define lifc_req iflc_req
+#define LIFCONF if_laddrconf
+#else
+#define SETFAMILYFLAGS
+#define LIFCONF lifconf
+#endif
+
+#ifdef __hpux
+#define lifr_addr iflr_addr
+#define lifr_name iflr_name
+#define lifr_dstaddr iflr_dstaddr
+#define lifr_flags iflr_flags
+#define ss_family sa_family
+#define LIFREQ if_laddrreq
+#else
+#define LIFREQ lifreq
+#endif
+
+static void
+scan_interfaces6(int *have_v4, int *have_v6) {
+ struct LIFCONF lifc;
+ struct LIFREQ lifreq;
+ struct in_addr in4;
+ struct in6_addr in6;
+ char *buf = NULL, *cp, *cplim;
+ static unsigned int bufsiz = 4095;
+ int s, cpsize, n;
+
+ /* Get interface list from system. */
+ if ((s = socket(AF_INET6, SOCK_DGRAM, 0)) == -1)
+ goto cleanup;
+
+ /*
+ * Grow buffer until large enough to contain all interface
+ * descriptions.
+ */
+ for (;;) {
+ buf = memget(bufsiz);
+ if (buf == NULL)
+ goto cleanup;
+#ifdef SETFAMILYFLAGS
+ lifc.lifc_family = AF_UNSPEC; /*%< request all families */
+ lifc.lifc_flags = 0;
+#endif
+ lifc.lifc_len = bufsiz;
+ lifc.lifc_buf = buf;
+ if ((n = ioctl(s, SIOCGLIFCONF, (char *)&lifc)) != -1) {
+ /*
+ * Some OS's just return what will fit rather
+ * than set EINVAL if the buffer is too small
+ * to fit all the interfaces in. If
+ * lifc.lifc_len is too near to the end of the
+ * buffer we will grow it just in case and
+ * retry.
+ */
+ if (lifc.lifc_len + 2 * sizeof(lifreq) < bufsiz)
+ break;
+ }
+ if ((n == -1) && errno != EINVAL)
+ goto cleanup;
+
+ if (bufsiz > 1000000)
+ goto cleanup;
+
+ memput(buf, bufsiz);
+ bufsiz += 4096;
+ }
+
+ /* Parse system's interface list. */
+ cplim = buf + lifc.lifc_len; /*%< skip over if's with big ifr_addr's */
+ for (cp = buf;
+ (*have_v4 == 0 || *have_v6 == 0) && cp < cplim;
+ cp += cpsize) {
+ memcpy(&lifreq, cp, sizeof lifreq);
+#ifdef HAVE_SA_LEN
+#ifdef FIX_ZERO_SA_LEN
+ if (lifreq.lifr_addr.sa_len == 0)
+ lifreq.lifr_addr.sa_len = 16;
+#endif
+#ifdef HAVE_MINIMUM_IFREQ
+ cpsize = sizeof lifreq;
+ if (lifreq.lifr_addr.sa_len > sizeof (struct sockaddr))
+ cpsize += (int)lifreq.lifr_addr.sa_len -
+ (int)(sizeof (struct sockaddr));
+#else
+ cpsize = sizeof lifreq.lifr_name + lifreq.lifr_addr.sa_len;
+#endif /* HAVE_MINIMUM_IFREQ */
+#elif defined SIOCGIFCONF_ADDR
+ cpsize = sizeof lifreq;
+#else
+ cpsize = sizeof lifreq.lifr_name;
+ /* XXX maybe this should be a hard error? */
+ if (ioctl(s, SIOCGLIFADDR, (char *)&lifreq) < 0)
+ continue;
+#endif
+ switch (lifreq.lifr_addr.ss_family) {
+ case AF_INET:
+ if (*have_v4 == 0) {
+ memcpy(&in4,
+ &((struct sockaddr_in *)
+ &lifreq.lifr_addr)->sin_addr,
+ sizeof in4);
+ if (in4.s_addr == INADDR_ANY)
+ break;
+ n = ioctl(s, SIOCGLIFFLAGS, (char *)&lifreq);
+ if (n < 0)
+ break;
+ if ((lifreq.lifr_flags & IFF_UP) == 0)
+ break;
+ *have_v4 = 1;
+ }
+ break;
+ case AF_INET6:
+ if (*have_v6 == 0) {
+ memcpy(&in6,
+ &((struct sockaddr_in6 *)
+ &lifreq.lifr_addr)->sin6_addr, sizeof in6);
+ if (memcmp(&in6, &in6addr_any, sizeof in6) == 0)
+ break;
+ n = ioctl(s, SIOCGLIFFLAGS, (char *)&lifreq);
+ if (n < 0)
+ break;
+ if ((lifreq.lifr_flags & IFF_UP) == 0)
+ break;
+ *have_v6 = 1;
+ }
+ break;
+ }
+ }
+ if (buf != NULL)
+ memput(buf, bufsiz);
+ close(s);
+ /* printf("scan interface -> 4=%d 6=%d\n", *have_v4, *have_v6); */
+ return;
+ cleanup:
+ if (buf != NULL)
+ memput(buf, bufsiz);
+ if (s != -1)
+ close(s);
+ /* printf("scan interface -> 4=%d 6=%d\n", *have_v4, *have_v6); */
+ return;
+}
+#endif
+
+#if ( defined(__linux__) || defined(__linux) || defined(LINUX) )
+#ifndef IF_NAMESIZE
+# ifdef IFNAMSIZ
+# define IF_NAMESIZE IFNAMSIZ
+# else
+# define IF_NAMESIZE 16
+# endif
+#endif
+static void
+scan_linux6(int *have_v6) {
+ FILE *proc = NULL;
+ char address[33];
+ char name[IF_NAMESIZE+1];
+ int ifindex, prefix, flag3, flag4;
+
+ proc = fopen("/proc/net/if_inet6", "r");
+ if (proc == NULL)
+ return;
+
+ if (fscanf(proc, "%32[a-f0-9] %x %x %x %x %16s\n",
+ address, &ifindex, &prefix, &flag3, &flag4, name) == 6)
+ *have_v6 = 1;
+ fclose(proc);
+ return;
+}
+#endif
+
+static int
+scan_interfaces(int *have_v4, int *have_v6) {
+ struct ifconf ifc;
+ union {
+ char _pad[256]; /*%< leave space for IPv6 addresses */
+ struct ifreq ifreq;
+ } u;
+ struct in_addr in4;
+ struct in6_addr in6;
+ char *buf = NULL, *cp, *cplim;
+ static unsigned int bufsiz = 4095;
+ int s, n;
+ size_t cpsize;
+
+ /* Set to zero. Used as loop terminators below. */
+ *have_v4 = *have_v6 = 0;
+
+#if defined(SIOCGLIFCONF) && defined(SIOCGLIFADDR) && \
+ !defined(IRIX_EMUL_IOCTL_SIOCGIFCONF)
+ /*
+ * Try to scan the interfaces using IPv6 ioctls().
+ */
+ scan_interfaces6(have_v4, have_v6);
+ if (*have_v4 != 0 && *have_v6 != 0)
+ return (0);
+#endif
+#ifdef __linux
+ scan_linux6(have_v6);
+#endif
+
+ /* Get interface list from system. */
+ if ((s = socket(AF_INET, SOCK_DGRAM, 0)) == -1)
+ goto err_ret;
+
+ /*
+ * Grow buffer until large enough to contain all interface
+ * descriptions.
+ */
+ for (;;) {
+ buf = memget(bufsiz);
+ if (buf == NULL)
+ goto err_ret;
+ ifc.ifc_len = bufsiz;
+ ifc.ifc_buf = buf;
+#ifdef IRIX_EMUL_IOCTL_SIOCGIFCONF
+ /*
+ * This is a fix for IRIX OS in which the call to ioctl with
+ * the flag SIOCGIFCONF may not return an entry for all the
+ * interfaces like most flavors of Unix.
+ */
+ if (emul_ioctl(&ifc) >= 0)
+ break;
+#else
+ if ((n = ioctl(s, SIOCGIFCONF, (char *)&ifc)) != -1) {
+ /*
+ * Some OS's just return what will fit rather
+ * than set EINVAL if the buffer is too small
+ * to fit all the interfaces in. If
+ * ifc.ifc_len is too near to the end of the
+ * buffer we will grow it just in case and
+ * retry.
+ */
+ if (ifc.ifc_len + 2 * sizeof(u.ifreq) < bufsiz)
+ break;
+ }
+#endif
+ if ((n == -1) && errno != EINVAL)
+ goto err_ret;
+
+ if (bufsiz > 1000000)
+ goto err_ret;
+
+ memput(buf, bufsiz);
+ bufsiz += 4096;
+ }
+
+ /* Parse system's interface list. */
+ cplim = buf + ifc.ifc_len; /*%< skip over if's with big ifr_addr's */
+ for (cp = buf;
+ (*have_v4 == 0 || *have_v6 == 0) && cp < cplim;
+ cp += cpsize) {
+ memcpy(&u.ifreq, cp, sizeof u.ifreq);
+#ifdef HAVE_SA_LEN
+#ifdef FIX_ZERO_SA_LEN
+ if (u.ifreq.ifr_addr.sa_len == 0)
+ u.ifreq.ifr_addr.sa_len = 16;
+#endif
+#ifdef HAVE_MINIMUM_IFREQ
+ cpsize = sizeof u.ifreq;
+ if (u.ifreq.ifr_addr.sa_len > sizeof (struct sockaddr))
+ cpsize += (int)u.ifreq.ifr_addr.sa_len -
+ (int)(sizeof (struct sockaddr));
+#else
+ cpsize = sizeof u.ifreq.ifr_name + u.ifreq.ifr_addr.sa_len;
+#endif /* HAVE_MINIMUM_IFREQ */
+ if (cpsize > sizeof u.ifreq && cpsize <= sizeof u)
+ memcpy(&u.ifreq, cp, cpsize);
+#elif defined SIOCGIFCONF_ADDR
+ cpsize = sizeof u.ifreq;
+#else
+ cpsize = sizeof u.ifreq.ifr_name;
+ /* XXX maybe this should be a hard error? */
+ if (ioctl(s, SIOCGIFADDR, (char *)&u.ifreq) < 0)
+ continue;
+#endif
+ switch (u.ifreq.ifr_addr.sa_family) {
+ case AF_INET:
+ if (*have_v4 == 0) {
+ memcpy(&in4,
+ &((struct sockaddr_in *)
+ &u.ifreq.ifr_addr)->sin_addr,
+ sizeof in4);
+ if (in4.s_addr == INADDR_ANY)
+ break;
+ n = ioctl(s, SIOCGIFFLAGS, (char *)&u.ifreq);
+ if (n < 0)
+ break;
+ if ((u.ifreq.ifr_flags & IFF_UP) == 0)
+ break;
+ *have_v4 = 1;
+ }
+ break;
+ case AF_INET6:
+ if (*have_v6 == 0) {
+ memcpy(&in6,
+ &((struct sockaddr_in6 *)
+ &u.ifreq.ifr_addr)->sin6_addr,
+ sizeof in6);
+ if (memcmp(&in6, &in6addr_any, sizeof in6) == 0)
+ break;
+ n = ioctl(s, SIOCGIFFLAGS, (char *)&u.ifreq);
+ if (n < 0)
+ break;
+ if ((u.ifreq.ifr_flags & IFF_UP) == 0)
+ break;
+ *have_v6 = 1;
+ }
+ break;
+ }
+ }
+ if (buf != NULL)
+ memput(buf, bufsiz);
+ close(s);
+ /* printf("scan interface -> 4=%d 6=%d\n", *have_v4, *have_v6); */
+ return (0);
+ err_ret:
+ if (buf != NULL)
+ memput(buf, bufsiz);
+ if (s != -1)
+ close(s);
+ /* printf("scan interface -> 4=%d 6=%d\n", *have_v4, *have_v6); */
+ return (-1);
+}
+
+static struct hostent *
+copyandmerge(struct hostent *he1, struct hostent *he2, int af, int *error_num) {
+ struct hostent *he = NULL;
+ int addresses = 1; /*%< NULL terminator */
+ int names = 1; /*%< NULL terminator */
+ int len = 0;
+ char **cpp, **npp;
+
+ /*
+ * Work out array sizes;
+ */
+ if (he1 != NULL) {
+ cpp = he1->h_addr_list;
+ while (*cpp != NULL) {
+ addresses++;
+ cpp++;
+ }
+ cpp = he1->h_aliases;
+ while (*cpp != NULL) {
+ names++;
+ cpp++;
+ }
+ }
+
+ if (he2 != NULL) {
+ cpp = he2->h_addr_list;
+ while (*cpp != NULL) {
+ addresses++;
+ cpp++;
+ }
+ if (he1 == NULL) {
+ cpp = he2->h_aliases;
+ while (*cpp != NULL) {
+ names++;
+ cpp++;
+ }
+ }
+ }
+
+ if (addresses == 1) {
+ *error_num = NO_ADDRESS;
+ return (NULL);
+ }
+
+ he = memget(sizeof *he);
+ if (he == NULL)
+ goto no_recovery;
+
+ he->h_addr_list = memget(sizeof(char *) * (addresses));
+ if (he->h_addr_list == NULL)
+ goto cleanup0;
+ memset(he->h_addr_list, 0, sizeof(char *) * (addresses));
+
+ /* copy addresses */
+ npp = he->h_addr_list;
+ if (he1 != NULL) {
+ cpp = he1->h_addr_list;
+ while (*cpp != NULL) {
+ *npp = memget((af == AF_INET) ? INADDRSZ : IN6ADDRSZ);
+ if (*npp == NULL)
+ goto cleanup1;
+ /* convert to mapped if required */
+ if (af == AF_INET6 && he1->h_addrtype == AF_INET) {
+ memcpy(*npp, in6addr_mapped,
+ sizeof in6addr_mapped);
+ memcpy(*npp + sizeof in6addr_mapped, *cpp,
+ INADDRSZ);
+ } else {
+ memcpy(*npp, *cpp,
+ (af == AF_INET) ? INADDRSZ : IN6ADDRSZ);
+ }
+ cpp++;
+ npp++;
+ }
+ }
+
+ if (he2 != NULL) {
+ cpp = he2->h_addr_list;
+ while (*cpp != NULL) {
+ *npp = memget((af == AF_INET) ? INADDRSZ : IN6ADDRSZ);
+ if (*npp == NULL)
+ goto cleanup1;
+ /* convert to mapped if required */
+ if (af == AF_INET6 && he2->h_addrtype == AF_INET) {
+ memcpy(*npp, in6addr_mapped,
+ sizeof in6addr_mapped);
+ memcpy(*npp + sizeof in6addr_mapped, *cpp,
+ INADDRSZ);
+ } else {
+ memcpy(*npp, *cpp,
+ (af == AF_INET) ? INADDRSZ : IN6ADDRSZ);
+ }
+ cpp++;
+ npp++;
+ }
+ }
+
+ he->h_aliases = memget(sizeof(char *) * (names));
+ if (he->h_aliases == NULL)
+ goto cleanup1;
+ memset(he->h_aliases, 0, sizeof(char *) * (names));
+
+ /* copy aliases */
+ npp = he->h_aliases;
+ cpp = (he1 != NULL) ? he1->h_aliases : he2->h_aliases;
+ while (*cpp != NULL) {
+ len = strlen (*cpp) + 1;
+ *npp = memget(len);
+ if (*npp == NULL)
+ goto cleanup2;
+ strcpy(*npp, *cpp);
+ npp++;
+ cpp++;
+ }
+
+ /* copy hostname */
+ he->h_name = memget(strlen((he1 != NULL) ?
+ he1->h_name : he2->h_name) + 1);
+ if (he->h_name == NULL)
+ goto cleanup2;
+ strcpy(he->h_name, (he1 != NULL) ? he1->h_name : he2->h_name);
+
+ /* set address type and length */
+ he->h_addrtype = af;
+ he->h_length = (af == AF_INET) ? INADDRSZ : IN6ADDRSZ;
+ return(he);
+
+ cleanup2:
+ cpp = he->h_aliases;
+ while (*cpp != NULL) {
+ memput(*cpp, strlen(*cpp) + 1);
+ cpp++;
+ }
+ memput(he->h_aliases, sizeof(char *) * (names));
+
+ cleanup1:
+ cpp = he->h_addr_list;
+ while (*cpp != NULL) {
+ memput(*cpp, (af == AF_INET) ? INADDRSZ : IN6ADDRSZ);
+ *cpp = NULL;
+ cpp++;
+ }
+ memput(he->h_addr_list, sizeof(char *) * (addresses));
+
+ cleanup0:
+ memput(he, sizeof *he);
+
+ no_recovery:
+ *error_num = NO_RECOVERY;
+ return (NULL);
+}
+
+static struct net_data *
+init() {
+ struct net_data *net_data;
+
+ if (!(net_data = net_data_init(NULL)))
+ goto error;
+ if (!net_data->ho) {
+ net_data->ho = (*net_data->irs->ho_map)(net_data->irs);
+ if (!net_data->ho || !net_data->res) {
+ error:
+ errno = EIO;
+
+#ifdef SUNW_SETHERRNO
+ h_errno = NETDB_INTERNAL;
+#endif /* SUNW_SETHERRNO */
+ if (net_data && net_data->res)
+ RES_SET_H_ERRNO(net_data->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+
+ (*net_data->ho->res_set)(net_data->ho, net_data->res, NULL);
+ }
+
+ return (net_data);
+}
+
+static void
+freepvt(struct net_data *net_data) {
+ if (net_data->ho_data) {
+ free(net_data->ho_data);
+ net_data->ho_data = NULL;
+ }
+}
+
+static struct hostent *
+fakeaddr(const char *name, int af, struct net_data *net_data) {
+ struct pvt *pvt;
+
+ freepvt(net_data);
+ net_data->ho_data = malloc(sizeof (struct pvt));
+ if (!net_data->ho_data) {
+ errno = ENOMEM;
+ RES_SET_H_ERRNO(net_data->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+ pvt = net_data->ho_data;
+#ifndef __bsdi__
+ /*
+ * Unlike its forebear(inet_aton), our friendly inet_pton() is strict
+ * in its interpretation of its input, and it will only return "1" if
+ * the input string is a formally valid(and thus unambiguous with
+ * respect to host names) internet address specification for this AF.
+ *
+ * This means "telnet 0xdeadbeef" and "telnet 127.1" are dead now.
+ */
+ if (inet_pton(af, name, pvt->addr) != 1) {
+#else
+ /* BSDI XXX
+ * We put this back to inet_aton -- we really want the old behavior
+ * Long live 127.1...
+ */
+ if ((af != AF_INET ||
+ inet_aton(name, (struct in_addr *)pvt->addr) != 1) &&
+ inet_pton(af, name, pvt->addr) != 1) {
+#endif
+ RES_SET_H_ERRNO(net_data->res, HOST_NOT_FOUND);
+ return (NULL);
+ }
+ strncpy(pvt->name, name, NS_MAXDNAME);
+ pvt->name[NS_MAXDNAME] = '\0';
+ if (af == AF_INET && (net_data->res->options & RES_USE_INET6) != 0U) {
+ map_v4v6_address(pvt->addr, pvt->addr);
+ af = AF_INET6;
+ }
+ pvt->host.h_addrtype = af;
+ switch(af) {
+ case AF_INET:
+ pvt->host.h_length = NS_INADDRSZ;
+ break;
+ case AF_INET6:
+ pvt->host.h_length = NS_IN6ADDRSZ;
+ break;
+ default:
+ errno = EAFNOSUPPORT;
+ RES_SET_H_ERRNO(net_data->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+ pvt->host.h_name = pvt->name;
+ pvt->host.h_aliases = pvt->aliases;
+ pvt->aliases[0] = NULL;
+ pvt->addrs[0] = (char *)pvt->addr;
+ pvt->addrs[1] = NULL;
+ pvt->host.h_addr_list = pvt->addrs;
+ RES_SET_H_ERRNO(net_data->res, NETDB_SUCCESS);
+ return (&pvt->host);
+}
+
+#ifdef grot /*%< for future use in gethostbyaddr(), for "SUNSECURITY" */
+ struct hostent *rhp;
+ char **haddr;
+ u_long old_options;
+ char hname2[MAXDNAME+1];
+
+ if (af == AF_INET) {
+ /*
+ * turn off search as the name should be absolute,
+ * 'localhost' should be matched by defnames
+ */
+ strncpy(hname2, hp->h_name, MAXDNAME);
+ hname2[MAXDNAME] = '\0';
+ old_options = net_data->res->options;
+ net_data->res->options &= ~RES_DNSRCH;
+ net_data->res->options |= RES_DEFNAMES;
+ if (!(rhp = gethostbyname(hname2))) {
+ net_data->res->options = old_options;
+ RES_SET_H_ERRNO(net_data->res, HOST_NOT_FOUND);
+ return (NULL);
+ }
+ net_data->res->options = old_options;
+ for (haddr = rhp->h_addr_list; *haddr; haddr++)
+ if (!memcmp(*haddr, addr, INADDRSZ))
+ break;
+ if (!*haddr) {
+ RES_SET_H_ERRNO(net_data->res, HOST_NOT_FOUND);
+ return (NULL);
+ }
+ }
+#endif /* grot */
+#endif /*__BIND_NOSTATIC*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/gethostent_r.c b/usr/src/lib/libresolv2_joy/common/irs/gethostent_r.c
new file mode 100644
index 0000000000..1971677c5d
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/gethostent_r.c
@@ -0,0 +1,278 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1998-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: gethostent_r.c,v 1.9 2005/09/03 12:41:37 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include <port_before.h>
+#if !defined(_REENTRANT) || !defined(DO_PTHREADS)
+ static int gethostent_r_not_required = 0;
+#else
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <sys/param.h>
+#include <port_after.h>
+
+#pragma redefine extname res_gethostbyname joy_res_gethostbyname
+#pragma redefine extname res_gethostbyaddr joy_res_gethostbyaddr
+
+#ifdef HOST_R_RETURN
+
+static HOST_R_RETURN
+copy_hostent(struct hostent *, struct hostent *, HOST_R_COPY_ARGS);
+
+HOST_R_RETURN
+gethostbyname_r(const char *name, struct hostent *hptr, HOST_R_ARGS) {
+ struct hostent *he = gethostbyname(name);
+#ifdef HOST_R_SETANSWER
+ int n = 0;
+#endif
+
+#ifdef HOST_R_ERRNO
+ HOST_R_ERRNO;
+#endif
+
+#ifdef HOST_R_SETANSWER
+ if (he == NULL || (n = copy_hostent(he, hptr, HOST_R_COPY)) != 0)
+ *answerp = NULL;
+ else
+ *answerp = hptr;
+
+ return (n);
+#else
+ if (he == NULL)
+ return (HOST_R_BAD);
+
+ return (copy_hostent(he, hptr, HOST_R_COPY));
+#endif
+}
+
+HOST_R_RETURN
+gethostbyaddr_r(const char *addr, int len, int type,
+ struct hostent *hptr, HOST_R_ARGS) {
+ struct hostent *he = gethostbyaddr(addr, len, type);
+#ifdef HOST_R_SETANSWER
+ int n = 0;
+#endif
+
+#ifdef HOST_R_ERRNO
+ HOST_R_ERRNO;
+#endif
+
+#ifdef HOST_R_SETANSWER
+ if (he == NULL || (n = copy_hostent(he, hptr, HOST_R_COPY)) != 0)
+ *answerp = NULL;
+ else
+ *answerp = hptr;
+
+ return (n);
+#else
+ if (he == NULL)
+ return (HOST_R_BAD);
+
+ return (copy_hostent(he, hptr, HOST_R_COPY));
+#endif
+}
+
+/*%
+ * These assume a single context is in operation per thread.
+ * If this is not the case we will need to call irs directly
+ * rather than through the base functions.
+ */
+
+HOST_R_RETURN
+gethostent_r(struct hostent *hptr, HOST_R_ARGS) {
+ struct hostent *he = gethostent();
+#ifdef HOST_R_SETANSWER
+ int n = 0;
+#endif
+
+#ifdef HOST_R_ERRNO
+ HOST_R_ERRNO;
+#endif
+
+#ifdef HOST_R_SETANSWER
+ if (he == NULL || (n = copy_hostent(he, hptr, HOST_R_COPY)) != 0)
+ *answerp = NULL;
+ else
+ *answerp = hptr;
+
+ return (n);
+#else
+ if (he == NULL)
+ return (HOST_R_BAD);
+
+ return (copy_hostent(he, hptr, HOST_R_COPY));
+#endif
+}
+
+HOST_R_SET_RETURN
+#ifdef HOST_R_ENT_ARGS
+sethostent_r(int stay_open, HOST_R_ENT_ARGS)
+#else
+sethostent_r(int stay_open)
+#endif
+{
+#ifdef HOST_R_ENT_ARGS
+ UNUSED(hdptr);
+#endif
+ sethostent(stay_open);
+#ifdef HOST_R_SET_RESULT
+ return (HOST_R_SET_RESULT);
+#endif
+}
+
+HOST_R_END_RETURN
+#ifdef HOST_R_ENT_ARGS
+endhostent_r(HOST_R_ENT_ARGS)
+#else
+endhostent_r(void)
+#endif
+{
+#ifdef HOST_R_ENT_ARGS
+ UNUSED(hdptr);
+#endif
+ endhostent();
+ HOST_R_END_RESULT(HOST_R_OK);
+}
+
+/* Private */
+
+#ifndef HOSTENT_DATA
+static HOST_R_RETURN
+copy_hostent(struct hostent *he, struct hostent *hptr, HOST_R_COPY_ARGS) {
+ char *cp;
+ char **ptr;
+ int i, n;
+ int nptr, len;
+
+ /* Find out the amount of space required to store the answer. */
+ nptr = 2; /*%< NULL ptrs */
+ len = (char *)ALIGN(buf) - buf;
+ for (i = 0; he->h_addr_list[i]; i++, nptr++) {
+ len += he->h_length;
+ }
+ for (i = 0; he->h_aliases[i]; i++, nptr++) {
+ len += strlen(he->h_aliases[i]) + 1;
+ }
+ len += strlen(he->h_name) + 1;
+ len += nptr * sizeof(char*);
+
+ if (len > buflen) {
+ errno = ERANGE;
+ return (HOST_R_BAD);
+ }
+
+ /* copy address size and type */
+ hptr->h_addrtype = he->h_addrtype;
+ n = hptr->h_length = he->h_length;
+
+ ptr = (char **)ALIGN(buf);
+ cp = (char *)ALIGN(buf) + nptr * sizeof(char *);
+
+ /* copy address list */
+ hptr->h_addr_list = ptr;
+ for (i = 0; he->h_addr_list[i]; i++ , ptr++) {
+ memcpy(cp, he->h_addr_list[i], n);
+ hptr->h_addr_list[i] = cp;
+ cp += n;
+ }
+ hptr->h_addr_list[i] = NULL;
+ ptr++;
+
+ /* copy official name */
+ n = strlen(he->h_name) + 1;
+ strcpy(cp, he->h_name);
+ hptr->h_name = cp;
+ cp += n;
+
+ /* copy aliases */
+ hptr->h_aliases = ptr;
+ for (i = 0 ; he->h_aliases[i]; i++) {
+ n = strlen(he->h_aliases[i]) + 1;
+ strcpy(cp, he->h_aliases[i]);
+ hptr->h_aliases[i] = cp;
+ cp += n;
+ }
+ hptr->h_aliases[i] = NULL;
+
+ return (HOST_R_OK);
+}
+#else /* !HOSTENT_DATA */
+static int
+copy_hostent(struct hostent *he, struct hostent *hptr, HOST_R_COPY_ARGS) {
+ char *cp, *eob;
+ int i, n;
+
+ /* copy address size and type */
+ hptr->h_addrtype = he->h_addrtype;
+ n = hptr->h_length = he->h_length;
+
+ /* copy up to first 35 addresses */
+ i = 0;
+ cp = hdptr->hostbuf;
+ eob = hdptr->hostbuf + sizeof(hdptr->hostbuf);
+ hptr->h_addr_list = hdptr->h_addr_ptrs;
+ while (he->h_addr_list[i] && i < (_MAXADDRS)) {
+ if (n < (eob - cp)) {
+ memcpy(cp, he->h_addr_list[i], n);
+ hptr->h_addr_list[i] = cp;
+ cp += n;
+ } else {
+ break;
+ }
+ i++;
+ }
+ hptr->h_addr_list[i] = NULL;
+
+ /* copy official name */
+ if ((n = strlen(he->h_name) + 1) < (eob - cp)) {
+ strcpy(cp, he->h_name);
+ hptr->h_name = cp;
+ cp += n;
+ } else {
+ return (-1);
+ }
+
+ /* copy aliases */
+ i = 0;
+ hptr->h_aliases = hdptr->host_aliases;
+ while (he->h_aliases[i] && i < (_MAXALIASES-1)) {
+ if ((n = strlen(he->h_aliases[i]) + 1) < (eob - cp)) {
+ strcpy(cp, he->h_aliases[i]);
+ hptr->h_aliases[i] = cp;
+ cp += n;
+ } else {
+ break;
+ }
+ i++;
+ }
+ hptr->h_aliases[i] = NULL;
+
+ return (HOST_R_OK);
+}
+#endif /* !HOSTENT_DATA */
+#else /* HOST_R_RETURN */
+ static int gethostent_r_unknown_system = 0;
+#endif /* HOST_R_RETURN */
+#endif /* !defined(_REENTRANT) || !defined(DO_PTHREADS) */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/getnameinfo.c b/usr/src/lib/libresolv2_joy/common/irs/getnameinfo.c
new file mode 100644
index 0000000000..360bda8bfe
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/getnameinfo.c
@@ -0,0 +1,334 @@
+/*
+ * Issues to be discussed:
+ * - Thread safe-ness must be checked
+ */
+
+#if ( defined(__linux__) || defined(__linux) || defined(LINUX) )
+#ifndef IF_NAMESIZE
+# ifdef IFNAMSIZ
+# define IF_NAMESIZE IFNAMSIZ
+# else
+# define IF_NAMESIZE 16
+# endif
+#endif
+#endif
+
+/*
+ * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by WIDE Project and
+ * its contributors.
+ * 4. Neither the name of the project nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <port_before.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <string.h>
+#include <stddef.h>
+
+#include <port_after.h>
+
+/*%
+ * Note that a_off will be dynamically adjusted so that to be consistent
+ * with the definition of sockaddr_in{,6}.
+ * The value presented below is just a guess.
+ */
+static struct afd {
+ int a_af;
+ int a_addrlen;
+ size_t a_socklen;
+ int a_off;
+} afdl [] = {
+ /* first entry is linked last... */
+ {PF_INET, sizeof(struct in_addr), sizeof(struct sockaddr_in),
+ offsetof(struct sockaddr_in, sin_addr)},
+ {PF_INET6, sizeof(struct in6_addr), sizeof(struct sockaddr_in6),
+ offsetof(struct sockaddr_in6, sin6_addr)},
+ {0, 0, 0, 0},
+};
+
+struct sockinet {
+#ifdef HAVE_SA_LEN
+ u_char si_len;
+#endif
+ u_char si_family;
+ u_short si_port;
+};
+
+static int ip6_parsenumeric __P((const struct sockaddr *, const char *, char *,
+ size_t, int));
+#ifdef HAVE_SIN6_SCOPE_ID
+static int ip6_sa2str __P((const struct sockaddr_in6 *, char *, size_t, int));
+#endif
+
+int
+getnameinfo(sa, salen, host, hostlen, serv, servlen, flags)
+ const struct sockaddr *sa;
+ size_t salen;
+ char *host;
+ size_t hostlen;
+ char *serv;
+ size_t servlen;
+ int flags;
+{
+ struct afd *afd;
+ struct servent *sp;
+ struct hostent *hp;
+ u_short port;
+#ifdef HAVE_SA_LEN
+ size_t len;
+#endif
+ int family, i;
+ const char *addr;
+ char *p;
+ char numserv[512];
+ char numaddr[512];
+ const struct sockaddr_in6 *sin6;
+
+ if (sa == NULL)
+ return EAI_FAIL;
+
+#ifdef HAVE_SA_LEN
+ len = sa->sa_len;
+ if (len != salen) return EAI_FAIL;
+#endif
+
+ family = sa->sa_family;
+ for (i = 0; afdl[i].a_af; i++)
+ if (afdl[i].a_af == family) {
+ afd = &afdl[i];
+ goto found;
+ }
+ return EAI_FAMILY;
+
+ found:
+ if (salen != afd->a_socklen) return EAI_FAIL;
+
+ port = ((const struct sockinet *)sa)->si_port; /*%< network byte order */
+ addr = (const char *)sa + afd->a_off;
+
+ if (serv == NULL || servlen == 0U) {
+ /*
+ * rfc2553bis says that serv == NULL or servlen == 0 means that
+ * the caller does not want the result.
+ */
+ } else if (flags & NI_NUMERICSERV) {
+ sprintf(numserv, "%d", ntohs(port));
+ if (strlen(numserv) > servlen)
+ return EAI_MEMORY;
+ strcpy(serv, numserv);
+ } else {
+ sp = getservbyport(port, (flags & NI_DGRAM) ? "udp" : "tcp");
+ if (sp) {
+ if (strlen(sp->s_name) + 1 > servlen)
+ return EAI_MEMORY;
+ strcpy(serv, sp->s_name);
+ } else
+ return EAI_NONAME;
+ }
+
+ switch (sa->sa_family) {
+ case AF_INET:
+ if (ntohl(*(const u_int32_t *)addr) >> IN_CLASSA_NSHIFT == 0)
+ flags |= NI_NUMERICHOST;
+ break;
+ case AF_INET6:
+ sin6 = (const struct sockaddr_in6 *)sa;
+ switch (sin6->sin6_addr.s6_addr[0]) {
+ case 0x00:
+ if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
+ ;
+ else if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr))
+ ;
+ else
+ flags |= NI_NUMERICHOST;
+ break;
+ default:
+ if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))
+ flags |= NI_NUMERICHOST;
+ else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
+ flags |= NI_NUMERICHOST;
+ break;
+ }
+ break;
+ }
+ if (host == NULL || hostlen == 0U) {
+ /*
+ * rfc2553bis says that host == NULL or hostlen == 0 means that
+ * the caller does not want the result.
+ */
+ } else if (flags & NI_NUMERICHOST) {
+ goto numeric;
+ } else {
+ hp = gethostbyaddr(addr, afd->a_addrlen, afd->a_af);
+
+ if (hp) {
+ if (flags & NI_NOFQDN) {
+ p = strchr(hp->h_name, '.');
+ if (p) *p = '\0';
+ }
+ if (strlen(hp->h_name) + 1 > hostlen)
+ return EAI_MEMORY;
+ strcpy(host, hp->h_name);
+ } else {
+ if (flags & NI_NAMEREQD)
+ return EAI_NONAME;
+ numeric:
+ switch(afd->a_af) {
+ case AF_INET6:
+ {
+ int error;
+
+ if ((error = ip6_parsenumeric(sa, addr, host,
+ hostlen,
+ flags)) != 0)
+ return(error);
+ break;
+ }
+
+ default:
+ if (inet_ntop(afd->a_af, addr, numaddr,
+ sizeof(numaddr)) == NULL)
+ return EAI_NONAME;
+ if (strlen(numaddr) + 1 > hostlen)
+ return EAI_MEMORY;
+ strcpy(host, numaddr);
+ }
+ }
+ }
+ return(0);
+}
+
+static int
+ip6_parsenumeric(const struct sockaddr *sa, const char *addr, char *host,
+ size_t hostlen, int flags)
+{
+ size_t numaddrlen;
+ char numaddr[512];
+
+#ifndef HAVE_SIN6_SCOPE_ID
+ UNUSED(sa);
+ UNUSED(flags);
+#endif
+
+ if (inet_ntop(AF_INET6, addr, numaddr, sizeof(numaddr))
+ == NULL)
+ return EAI_SYSTEM;
+
+ numaddrlen = strlen(numaddr);
+ if (numaddrlen + 1 > hostlen) /*%< don't forget terminator */
+ return EAI_MEMORY;
+ strcpy(host, numaddr);
+
+#ifdef HAVE_SIN6_SCOPE_ID
+ if (((const struct sockaddr_in6 *)sa)->sin6_scope_id) {
+ char scopebuf[MAXHOSTNAMELEN]; /*%< XXX */
+ int scopelen;
+
+ /* ip6_sa2str never fails */
+ scopelen = ip6_sa2str((const struct sockaddr_in6 *)sa,
+ scopebuf, sizeof(scopebuf), flags);
+
+ if (scopelen + 1 + numaddrlen + 1 > hostlen)
+ return EAI_MEMORY;
+
+ /* construct <numeric-addr><delim><scopeid> */
+ memcpy(host + numaddrlen + 1, scopebuf,
+ scopelen);
+ host[numaddrlen] = SCOPE_DELIMITER;
+ host[numaddrlen + 1 + scopelen] = '\0';
+ }
+#endif
+
+ return 0;
+}
+
+#ifdef HAVE_SIN6_SCOPE_ID
+/* ARGSUSED */
+static int
+ip6_sa2str(const struct sockaddr_in6 *sa6, char *buf,
+ size_t bufsiz, int flags)
+{
+#ifdef USE_IFNAMELINKID
+ unsigned int ifindex = (unsigned int)sa6->sin6_scope_id;
+ const struct in6_addr *a6 = &sa6->sin6_addr;
+#endif
+ char tmp[64];
+
+#ifdef NI_NUMERICSCOPE
+ if (flags & NI_NUMERICSCOPE) {
+ sprintf(tmp, "%u", sa6->sin6_scope_id);
+ if (bufsiz != 0U) {
+ strncpy(buf, tmp, bufsiz - 1);
+ buf[bufsiz - 1] = '\0';
+ }
+ return(strlen(tmp));
+ }
+#endif
+
+#ifdef USE_IFNAMELINKID
+ /*
+ * For a link-local address, convert the index to an interface
+ * name, assuming a one-to-one mapping between links and interfaces.
+ * Note, however, that this assumption is stronger than the
+ * specification of the scoped address architecture; the
+ * specficication says that more than one interfaces can belong to
+ * a single link.
+ */
+
+ /* if_indextoname() does not take buffer size. not a good api... */
+ if ((IN6_IS_ADDR_LINKLOCAL(a6) || IN6_IS_ADDR_MC_LINKLOCAL(a6)) &&
+ bufsiz >= IF_NAMESIZE) {
+ char *p = if_indextoname(ifindex, buf);
+ if (p) {
+ return(strlen(p));
+ }
+ }
+#endif
+
+ /* last resort */
+ sprintf(tmp, "%u", sa6->sin6_scope_id);
+ if (bufsiz != 0U) {
+ strncpy(buf, tmp, bufsiz - 1);
+ buf[bufsiz - 1] = '\0';
+ }
+ return(strlen(tmp));
+}
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/getnetent.c b/usr/src/lib/libresolv2_joy/common/irs/getnetent.c
new file mode 100644
index 0000000000..0d00699e81
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/getnetent.c
@@ -0,0 +1,345 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: getnetent.c,v 1.7 2005/04/27 04:56:25 sra Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#if !defined(__BIND_NOSTATIC)
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <arpa/inet.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "irs_data.h"
+
+/* Definitions */
+
+struct pvt {
+ struct netent netent;
+ char * aliases[1];
+ char name[MAXDNAME + 1];
+};
+
+/* Forward */
+
+static struct net_data *init(void);
+static struct netent *nw_to_net(struct nwent *, struct net_data *);
+static void freepvt(struct net_data *);
+static struct netent *fakeaddr(const char *, int af, struct net_data *);
+
+/* Portability */
+
+#ifndef INADDR_NONE
+# define INADDR_NONE 0xffffffff
+#endif
+
+/* Public */
+
+struct netent *
+getnetent() {
+ struct net_data *net_data = init();
+
+ return (getnetent_p(net_data));
+}
+
+struct netent *
+getnetbyname(const char *name) {
+ struct net_data *net_data = init();
+
+ return (getnetbyname_p(name, net_data));
+}
+
+struct netent *
+getnetbyaddr(unsigned long net, int type) {
+ struct net_data *net_data = init();
+
+ return (getnetbyaddr_p(net, type, net_data));
+}
+
+void
+setnetent(int stayopen) {
+ struct net_data *net_data = init();
+
+ setnetent_p(stayopen, net_data);
+}
+
+
+void
+endnetent() {
+ struct net_data *net_data = init();
+
+ endnetent_p(net_data);
+}
+
+/* Shared private. */
+
+struct netent *
+getnetent_p(struct net_data *net_data) {
+ struct irs_nw *nw;
+
+ if (!net_data || !(nw = net_data->nw))
+ return (NULL);
+ net_data->nww_last = (*nw->next)(nw);
+ net_data->nw_last = nw_to_net(net_data->nww_last, net_data);
+ return (net_data->nw_last);
+}
+
+struct netent *
+getnetbyname_p(const char *name, struct net_data *net_data) {
+ struct irs_nw *nw;
+ struct netent *np;
+ char **nap;
+
+ if (!net_data || !(nw = net_data->nw))
+ return (NULL);
+ if (net_data->nw_stayopen && net_data->nw_last) {
+ if (!strcmp(net_data->nw_last->n_name, name))
+ return (net_data->nw_last);
+ for (nap = net_data->nw_last->n_aliases; nap && *nap; nap++)
+ if (!strcmp(name, *nap))
+ return (net_data->nw_last);
+ }
+ if ((np = fakeaddr(name, AF_INET, net_data)) != NULL)
+ return (np);
+ net_data->nww_last = (*nw->byname)(nw, name, AF_INET);
+ net_data->nw_last = nw_to_net(net_data->nww_last, net_data);
+ if (!net_data->nw_stayopen)
+ endnetent();
+ return (net_data->nw_last);
+}
+
+struct netent *
+getnetbyaddr_p(unsigned long net, int type, struct net_data *net_data) {
+ struct irs_nw *nw;
+ u_char addr[4];
+ int bits;
+
+ if (!net_data || !(nw = net_data->nw))
+ return (NULL);
+ if (net_data->nw_stayopen && net_data->nw_last)
+ if (type == net_data->nw_last->n_addrtype &&
+ net == net_data->nw_last->n_net)
+ return (net_data->nw_last);
+
+ /* cannonize net(host order) */
+ if (net < 256UL) {
+ net <<= 24;
+ bits = 8;
+ } else if (net < 65536UL) {
+ net <<= 16;
+ bits = 16;
+ } else if (net < 16777216UL) {
+ net <<= 8;
+ bits = 24;
+ } else
+ bits = 32;
+
+ /* convert to net order */
+ addr[0] = (0xFF000000 & net) >> 24;
+ addr[1] = (0x00FF0000 & net) >> 16;
+ addr[2] = (0x0000FF00 & net) >> 8;
+ addr[3] = (0x000000FF & net);
+
+ /* reduce bits to as close to natural number as possible */
+ if ((bits == 32) && (addr[0] < 224) && (addr[3] == 0)) {
+ if ((addr[0] < 192) && (addr[2] == 0)) {
+ if ((addr[0] < 128) && (addr[1] == 0))
+ bits = 8;
+ else
+ bits = 16;
+ } else {
+ bits = 24;
+ }
+ }
+
+ net_data->nww_last = (*nw->byaddr)(nw, addr, bits, AF_INET);
+ net_data->nw_last = nw_to_net(net_data->nww_last, net_data);
+ if (!net_data->nw_stayopen)
+ endnetent();
+ return (net_data->nw_last);
+}
+
+
+
+
+void
+setnetent_p(int stayopen, struct net_data *net_data) {
+ struct irs_nw *nw;
+
+ if (!net_data || !(nw = net_data->nw))
+ return;
+ freepvt(net_data);
+ (*nw->rewind)(nw);
+ net_data->nw_stayopen = (stayopen != 0);
+ if (stayopen == 0)
+ net_data_minimize(net_data);
+}
+
+void
+endnetent_p(struct net_data *net_data) {
+ struct irs_nw *nw;
+
+ if ((net_data != NULL) && ((nw = net_data->nw) != NULL))
+ (*nw->minimize)(nw);
+}
+
+/* Private */
+
+static struct net_data *
+init() {
+ struct net_data *net_data;
+
+ if (!(net_data = net_data_init(NULL)))
+ goto error;
+ if (!net_data->nw) {
+ net_data->nw = (*net_data->irs->nw_map)(net_data->irs);
+
+ if (!net_data->nw || !net_data->res) {
+ error:
+ errno = EIO;
+ return (NULL);
+ }
+ (*net_data->nw->res_set)(net_data->nw, net_data->res, NULL);
+ }
+
+ return (net_data);
+}
+
+static void
+freepvt(struct net_data *net_data) {
+ if (net_data->nw_data) {
+ free(net_data->nw_data);
+ net_data->nw_data = NULL;
+ }
+}
+
+static struct netent *
+fakeaddr(const char *name, int af, struct net_data *net_data) {
+ struct pvt *pvt;
+ const char *cp;
+ u_long tmp;
+
+ if (af != AF_INET) {
+ /* XXX should support IPv6 some day */
+ errno = EAFNOSUPPORT;
+ RES_SET_H_ERRNO(net_data->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+ if (!isascii((unsigned char)(name[0])) ||
+ !isdigit((unsigned char)(name[0])))
+ return (NULL);
+ for (cp = name; *cp; ++cp)
+ if (!isascii(*cp) || (!isdigit((unsigned char)*cp) && *cp != '.'))
+ return (NULL);
+ if (*--cp == '.')
+ return (NULL);
+
+ /* All-numeric, no dot at the end. */
+
+ tmp = inet_network(name);
+ if (tmp == INADDR_NONE) {
+ RES_SET_H_ERRNO(net_data->res, HOST_NOT_FOUND);
+ return (NULL);
+ }
+
+ /* Valid network number specified.
+ * Fake up a netent as if we'd actually
+ * done a lookup.
+ */
+ freepvt(net_data);
+ net_data->nw_data = malloc(sizeof (struct pvt));
+ if (!net_data->nw_data) {
+ errno = ENOMEM;
+ RES_SET_H_ERRNO(net_data->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+ pvt = net_data->nw_data;
+
+ strncpy(pvt->name, name, MAXDNAME);
+ pvt->name[MAXDNAME] = '\0';
+ pvt->netent.n_name = pvt->name;
+ pvt->netent.n_addrtype = AF_INET;
+ pvt->netent.n_aliases = pvt->aliases;
+ pvt->aliases[0] = NULL;
+ pvt->netent.n_net = tmp;
+
+ return (&pvt->netent);
+}
+
+static struct netent *
+nw_to_net(struct nwent *nwent, struct net_data *net_data) {
+ struct pvt *pvt;
+ u_long addr = 0;
+ int i;
+ int msbyte;
+
+ if (!nwent || nwent->n_addrtype != AF_INET)
+ return (NULL);
+ freepvt(net_data);
+ net_data->nw_data = malloc(sizeof (struct pvt));
+ if (!net_data->nw_data) {
+ errno = ENOMEM;
+ RES_SET_H_ERRNO(net_data->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+ pvt = net_data->nw_data;
+ pvt->netent.n_name = nwent->n_name;
+ pvt->netent.n_aliases = nwent->n_aliases;
+ pvt->netent.n_addrtype = nwent->n_addrtype;
+
+/*%
+ * What this code does: Converts net addresses from network to host form.
+ *
+ * msbyte: the index of the most significant byte in the n_addr array.
+ *
+ * Shift bytes in significant order into addr. When all signicant
+ * bytes are in, zero out bits in the LSB that are not part of the network.
+ */
+ msbyte = nwent->n_length / 8 +
+ ((nwent->n_length % 8) != 0 ? 1 : 0) - 1;
+ for (i = 0; i <= msbyte; i++)
+ addr = (addr << 8) | ((unsigned char *)nwent->n_addr)[i];
+ i = (32 - nwent->n_length) % 8;
+ if (i != 0)
+ addr &= ~((1 << (i + 1)) - 1);
+ pvt->netent.n_net = addr;
+ return (&pvt->netent);
+}
+
+#endif /*__BIND_NOSTATIC*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/getnetent_r.c b/usr/src/lib/libresolv2_joy/common/irs/getnetent_r.c
new file mode 100644
index 0000000000..9fb52bc394
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/getnetent_r.c
@@ -0,0 +1,234 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1998-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: getnetent_r.c,v 1.6 2005/09/03 12:41:38 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include <port_before.h>
+#if !defined(_REENTRANT) || !defined(DO_PTHREADS)
+ static int getnetent_r_not_required = 0;
+#else
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <sys/param.h>
+#include <port_after.h>
+
+#ifdef NET_R_RETURN
+
+static NET_R_RETURN
+copy_netent(struct netent *, struct netent *, NET_R_COPY_ARGS);
+
+NET_R_RETURN
+getnetbyname_r(const char *name, struct netent *nptr, NET_R_ARGS) {
+ struct netent *ne = getnetbyname(name);
+#ifdef NET_R_SETANSWER
+ int n = 0;
+
+ if (ne == NULL || (n = copy_netent(ne, nptr, NET_R_COPY)) != 0)
+ *answerp = NULL;
+ else
+ *answerp = ne;
+ if (ne == NULL)
+ *h_errnop = h_errno;
+ return (n);
+#else
+ if (ne == NULL)
+ return (NET_R_BAD);
+
+ return (copy_netent(ne, nptr, NET_R_COPY));
+#endif
+}
+
+#ifndef GETNETBYADDR_ADDR_T
+#define GETNETBYADDR_ADDR_T long
+#endif
+NET_R_RETURN
+getnetbyaddr_r(GETNETBYADDR_ADDR_T addr, int type, struct netent *nptr, NET_R_ARGS) {
+ struct netent *ne = getnetbyaddr(addr, type);
+#ifdef NET_R_SETANSWER
+ int n = 0;
+
+ if (ne == NULL || (n = copy_netent(ne, nptr, NET_R_COPY)) != 0)
+ *answerp = NULL;
+ else
+ *answerp = ne;
+ if (ne == NULL)
+ *h_errnop = h_errno;
+ return (n);
+#else
+
+ if (ne == NULL)
+ return (NET_R_BAD);
+
+ return (copy_netent(ne, nptr, NET_R_COPY));
+#endif
+}
+
+/*%
+ * These assume a single context is in operation per thread.
+ * If this is not the case we will need to call irs directly
+ * rather than through the base functions.
+ */
+
+NET_R_RETURN
+getnetent_r(struct netent *nptr, NET_R_ARGS) {
+ struct netent *ne = getnetent();
+#ifdef NET_R_SETANSWER
+ int n = 0;
+
+ if (ne == NULL || (n = copy_netent(ne, nptr, NET_R_COPY)) != 0)
+ *answerp = NULL;
+ else
+ *answerp = ne;
+ if (ne == NULL)
+ *h_errnop = h_errno;
+ return (n);
+#else
+
+ if (ne == NULL)
+ return (NET_R_BAD);
+
+ return (copy_netent(ne, nptr, NET_R_COPY));
+#endif
+}
+
+NET_R_SET_RETURN
+#ifdef NET_R_ENT_ARGS
+setnetent_r(int stay_open, NET_R_ENT_ARGS)
+#else
+setnetent_r(int stay_open)
+#endif
+{
+#ifdef NET_R_ENT_ARGS
+ UNUSED(ndptr);
+#endif
+ setnetent(stay_open);
+#ifdef NET_R_SET_RESULT
+ return (NET_R_SET_RESULT);
+#endif
+}
+
+NET_R_END_RETURN
+#ifdef NET_R_ENT_ARGS
+endnetent_r(NET_R_ENT_ARGS)
+#else
+endnetent_r()
+#endif
+{
+#ifdef NET_R_ENT_ARGS
+ UNUSED(ndptr);
+#endif
+ endnetent();
+ NET_R_END_RESULT(NET_R_OK);
+}
+
+/* Private */
+
+#ifndef NETENT_DATA
+static NET_R_RETURN
+copy_netent(struct netent *ne, struct netent *nptr, NET_R_COPY_ARGS) {
+ char *cp;
+ int i, n;
+ int numptr, len;
+
+ /* Find out the amount of space required to store the answer. */
+ numptr = 1; /*%< NULL ptr */
+ len = (char *)ALIGN(buf) - buf;
+ for (i = 0; ne->n_aliases[i]; i++, numptr++) {
+ len += strlen(ne->n_aliases[i]) + 1;
+ }
+ len += strlen(ne->n_name) + 1;
+ len += numptr * sizeof(char*);
+
+ if (len > (int)buflen) {
+ errno = ERANGE;
+ return (NET_R_BAD);
+ }
+
+ /* copy net value and type */
+ nptr->n_addrtype = ne->n_addrtype;
+ nptr->n_net = ne->n_net;
+
+ cp = (char *)ALIGN(buf) + numptr * sizeof(char *);
+
+ /* copy official name */
+ n = strlen(ne->n_name) + 1;
+ strcpy(cp, ne->n_name);
+ nptr->n_name = cp;
+ cp += n;
+
+ /* copy aliases */
+ nptr->n_aliases = (char **)ALIGN(buf);
+ for (i = 0 ; ne->n_aliases[i]; i++) {
+ n = strlen(ne->n_aliases[i]) + 1;
+ strcpy(cp, ne->n_aliases[i]);
+ nptr->n_aliases[i] = cp;
+ cp += n;
+ }
+ nptr->n_aliases[i] = NULL;
+
+ return (NET_R_OK);
+}
+#else /* !NETENT_DATA */
+static int
+copy_netent(struct netent *ne, struct netent *nptr, NET_R_COPY_ARGS) {
+ char *cp, *eob;
+ int i, n;
+
+ /* copy net value and type */
+ nptr->n_addrtype = ne->n_addrtype;
+ nptr->n_net = ne->n_net;
+
+ /* copy official name */
+ cp = ndptr->line;
+ eob = ndptr->line + sizeof(ndptr->line);
+ if ((n = strlen(ne->n_name) + 1) < (eob - cp)) {
+ strcpy(cp, ne->n_name);
+ nptr->n_name = cp;
+ cp += n;
+ } else {
+ return (-1);
+ }
+
+ /* copy aliases */
+ i = 0;
+ nptr->n_aliases = ndptr->net_aliases;
+ while (ne->n_aliases[i] && i < (_MAXALIASES-1)) {
+ if ((n = strlen(ne->n_aliases[i]) + 1) < (eob - cp)) {
+ strcpy(cp, ne->n_aliases[i]);
+ nptr->n_aliases[i] = cp;
+ cp += n;
+ } else {
+ break;
+ }
+ i++;
+ }
+ nptr->n_aliases[i] = NULL;
+
+ return (NET_R_OK);
+}
+#endif /* !NETENT_DATA */
+#else /* NET_R_RETURN */
+ static int getnetent_r_unknown_system = 0;
+#endif /* NET_R_RETURN */
+#endif /* !defined(_REENTRANT) || !defined(DO_PTHREADS) */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/getnetgrent.c b/usr/src/lib/libresolv2_joy/common/irs/getnetgrent.c
new file mode 100644
index 0000000000..40b3e5a8ad
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/getnetgrent.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 1996-1999, 2001, 2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: getnetgrent.c,v 1.6 2008/11/14 02:36:51 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/* Imports */
+
+#include "port_before.h"
+
+#if !defined(__BIND_NOSTATIC)
+
+#include <sys/types.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_data.h"
+
+/* Forward */
+
+static struct net_data *init(void);
+
+
+/* Public */
+
+#ifndef SETNETGRENT_ARGS
+#define SETNETGRENT_ARGS const char *netgroup
+#endif
+void
+setnetgrent(SETNETGRENT_ARGS) {
+ struct net_data *net_data = init();
+
+ setnetgrent_p(netgroup, net_data);
+}
+
+void
+endnetgrent(void) {
+ struct net_data *net_data = init();
+
+ endnetgrent_p(net_data);
+}
+
+#ifndef INNETGR_ARGS
+#define INNETGR_ARGS const char *netgroup, const char *host, \
+ const char *user, const char *domain
+#endif
+int
+innetgr(INNETGR_ARGS) {
+ struct net_data *net_data = init();
+
+ return (innetgr_p(netgroup, host, user, domain, net_data));
+}
+
+int
+getnetgrent(NGR_R_CONST char **host, NGR_R_CONST char **user,
+ NGR_R_CONST char **domain)
+{
+ struct net_data *net_data = init();
+ const char *ch, *cu, *cd;
+ int ret;
+
+ ret = getnetgrent_p(&ch, &cu, &cd, net_data);
+ if (ret != 1)
+ return (ret);
+
+ DE_CONST(ch, *host);
+ DE_CONST(cu, *user);
+ DE_CONST(cd, *domain);
+ return (ret);
+}
+
+/* Shared private. */
+
+void
+setnetgrent_p(const char *netgroup, struct net_data *net_data) {
+ struct irs_ng *ng;
+
+ if ((net_data != NULL) && ((ng = net_data->ng) != NULL))
+ (*ng->rewind)(ng, netgroup);
+}
+
+void
+endnetgrent_p(struct net_data *net_data) {
+ struct irs_ng *ng;
+
+ if (!net_data)
+ return;
+ if ((ng = net_data->ng) != NULL)
+ (*ng->close)(ng);
+ net_data->ng = NULL;
+}
+
+int
+innetgr_p(const char *netgroup, const char *host,
+ const char *user, const char *domain,
+ struct net_data *net_data) {
+ struct irs_ng *ng;
+
+ if (!net_data || !(ng = net_data->ng))
+ return (0);
+ return ((*ng->test)(ng, netgroup, host, user, domain));
+}
+
+int
+getnetgrent_p(const char **host, const char **user, const char **domain,
+ struct net_data *net_data ) {
+ struct irs_ng *ng;
+
+ if (!net_data || !(ng = net_data->ng))
+ return (0);
+ return ((*ng->next)(ng, host, user, domain));
+}
+
+/* Private */
+
+static struct net_data *
+init(void) {
+ struct net_data *net_data;
+
+ if (!(net_data = net_data_init(NULL)))
+ goto error;
+ if (!net_data->ng) {
+ net_data->ng = (*net_data->irs->ng_map)(net_data->irs);
+ if (!net_data->ng) {
+ error:
+ errno = EIO;
+ return (NULL);
+ }
+ }
+
+ return (net_data);
+}
+
+#endif /*__BIND_NOSTATIC*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/getnetgrent_r.c b/usr/src/lib/libresolv2_joy/common/irs/getnetgrent_r.c
new file mode 100644
index 0000000000..aa8810320d
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/getnetgrent_r.c
@@ -0,0 +1,219 @@
+/*
+ * Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 1998, 1999, 2001, 2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: getnetgrent_r.c,v 1.14 2008/11/14 02:36:51 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include <port_before.h>
+#if !defined(_REENTRANT) || !defined(DO_PTHREADS)
+ static int getnetgrent_r_not_required = 0;
+#else
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <stdlib.h>
+#include <port_after.h>
+
+#ifdef NGR_R_RETURN
+#ifndef NGR_R_PRIVATE
+#define NGR_R_PRIVATE 0
+#endif
+
+static NGR_R_RETURN
+copy_protoent(NGR_R_CONST char **, NGR_R_CONST char **, NGR_R_CONST char **,
+ const char *, const char *, const char *, NGR_R_COPY_ARGS);
+
+NGR_R_RETURN
+innetgr_r(const char *netgroup, const char *host, const char *user,
+ const char *domain) {
+ char *ng, *ho, *us, *dom;
+
+ DE_CONST(netgroup, ng);
+ DE_CONST(host, ho);
+ DE_CONST(user, us);
+ DE_CONST(domain, dom);
+
+ return (innetgr(ng, ho, us, dom));
+}
+
+/*%
+ * These assume a single context is in operation per thread.
+ * If this is not the case we will need to call irs directly
+ * rather than through the base functions.
+ */
+
+NGR_R_RETURN
+getnetgrent_r(NGR_R_CONST char **machinep, NGR_R_CONST char **userp,
+ NGR_R_CONST char **domainp, NGR_R_ARGS)
+{
+ NGR_R_CONST char *mp, *up, *dp;
+ int res = getnetgrent(&mp, &up, &dp);
+
+ if (res != 1)
+ return (res);
+
+ return (copy_protoent(machinep, userp, domainp,
+ mp, up, dp, NGR_R_COPY));
+}
+
+#if NGR_R_PRIVATE == 2
+struct private {
+ char *buf;
+};
+
+#endif
+NGR_R_SET_RETURN
+#ifdef NGR_R_SET_ARGS
+setnetgrent_r(NGR_R_SET_CONST char *netgroup, NGR_R_SET_ARGS)
+#else
+setnetgrent_r(NGR_R_SET_CONST char *netgroup)
+#endif
+{
+#if NGR_R_PRIVATE == 2
+ struct private *p;
+#endif
+ char *tmp;
+#if defined(NGR_R_SET_ARGS) && NGR_R_PRIVATE == 0
+ UNUSED(buf);
+ UNUSED(buflen);
+#endif
+
+ DE_CONST(netgroup, tmp);
+ setnetgrent(tmp);
+
+#if NGR_R_PRIVATE == 1
+ *buf = NULL;
+#elif NGR_R_PRIVATE == 2
+ *buf = p = malloc(sizeof(struct private));
+ if (p == NULL)
+#ifdef NGR_R_SET_RESULT
+ return (NGR_R_BAD);
+#else
+ return;
+#endif
+ p->buf = NULL;
+#endif
+#ifdef NGR_R_SET_RESULT
+ return (NGR_R_SET_RESULT);
+#endif
+}
+
+NGR_R_END_RETURN
+#ifdef NGR_R_END_ARGS
+endnetgrent_r(NGR_R_END_ARGS)
+#else
+endnetgrent_r(void)
+#endif
+{
+#if NGR_R_PRIVATE == 2
+ struct private *p = buf;
+#endif
+#if defined(NGR_R_SET_ARGS) && NGR_R_PRIVATE == 0
+ UNUSED(buf);
+ UNUSED(buflen);
+#endif
+
+ endnetgrent();
+#if NGR_R_PRIVATE == 1
+ if (*buf != NULL)
+ free(*buf);
+ *buf = NULL;
+#elif NGR_R_PRIVATE == 2
+ if (p->buf != NULL)
+ free(p->buf);
+ free(p);
+#endif
+ NGR_R_END_RESULT(NGR_R_OK);
+}
+
+/* Private */
+
+static int
+copy_protoent(NGR_R_CONST char **machinep, NGR_R_CONST char **userp,
+ NGR_R_CONST char **domainp, const char *mp, const char *up,
+ const char *dp, NGR_R_COPY_ARGS)
+{
+#if NGR_R_PRIVATE == 2
+ struct private *p = buf;
+#endif
+ char *cp;
+ int n;
+ int len;
+
+ /* Find out the amount of space required to store the answer. */
+ len = 0;
+ if (mp != NULL) len += strlen(mp) + 1;
+ if (up != NULL) len += strlen(up) + 1;
+ if (dp != NULL) len += strlen(dp) + 1;
+
+#if NGR_R_PRIVATE == 1
+ if (*buf != NULL)
+ free(*buf);
+ *buf = malloc(len);
+ if (*buf == NULL)
+ return(NGR_R_BAD);
+ cp = *buf;
+#elif NGR_R_PRIVATE == 2
+ if (p->buf)
+ free(p->buf);
+ p->buf = malloc(len);
+ if (p->buf == NULL)
+ return(NGR_R_BAD);
+ cp = p->buf;
+#else
+ if (len > (int)buflen) {
+ errno = ERANGE;
+ return (NGR_R_BAD);
+ }
+ cp = buf;
+#endif
+
+ if (mp != NULL) {
+ n = strlen(mp) + 1;
+ strcpy(cp, mp);
+ *machinep = cp;
+ cp += n;
+ } else
+ *machinep = NULL;
+
+ if (up != NULL) {
+ n = strlen(up) + 1;
+ strcpy(cp, up);
+ *userp = cp;
+ cp += n;
+ } else
+ *userp = NULL;
+
+ if (dp != NULL) {
+ n = strlen(dp) + 1;
+ strcpy(cp, dp);
+ *domainp = cp;
+ cp += n;
+ } else
+ *domainp = NULL;
+
+ return (NGR_R_OK);
+}
+#else /* NGR_R_RETURN */
+ static int getnetgrent_r_unknown_system = 0;
+#endif /* NGR_R_RETURN */
+#endif /* !defined(_REENTRANT) || !defined(DO_PTHREADS) */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/getprotoent.c b/usr/src/lib/libresolv2_joy/common/irs/getprotoent.c
new file mode 100644
index 0000000000..32a1040916
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/getprotoent.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: getprotoent.c,v 1.4 2005/04/27 04:56:26 sra Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#if !defined(__BIND_NOSTATIC)
+
+#include <sys/types.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_data.h"
+
+/* Forward */
+
+static struct net_data *init(void);
+
+/* Public */
+
+struct protoent *
+getprotoent() {
+ struct net_data *net_data = init();
+
+ return (getprotoent_p(net_data));
+}
+
+struct protoent *
+getprotobyname(const char *name) {
+ struct net_data *net_data = init();
+
+ return (getprotobyname_p(name, net_data));
+}
+
+struct protoent *
+getprotobynumber(int proto) {
+ struct net_data *net_data = init();
+
+ return (getprotobynumber_p(proto, net_data));
+}
+
+void
+setprotoent(int stayopen) {
+ struct net_data *net_data = init();
+
+ setprotoent_p(stayopen, net_data);
+}
+
+void
+endprotoent() {
+ struct net_data *net_data = init();
+
+ endprotoent_p(net_data);
+}
+
+/* Shared private. */
+
+struct protoent *
+getprotoent_p(struct net_data *net_data) {
+ struct irs_pr *pr;
+
+ if (!net_data || !(pr = net_data->pr))
+ return (NULL);
+ net_data->pr_last = (*pr->next)(pr);
+ return (net_data->pr_last);
+}
+
+struct protoent *
+getprotobyname_p(const char *name, struct net_data *net_data) {
+ struct irs_pr *pr;
+ char **pap;
+
+ if (!net_data || !(pr = net_data->pr))
+ return (NULL);
+ if (net_data->pr_stayopen && net_data->pr_last) {
+ if (!strcmp(net_data->pr_last->p_name, name))
+ return (net_data->pr_last);
+ for (pap = net_data->pr_last->p_aliases; pap && *pap; pap++)
+ if (!strcmp(name, *pap))
+ return (net_data->pr_last);
+ }
+ net_data->pr_last = (*pr->byname)(pr, name);
+ if (!net_data->pr_stayopen)
+ endprotoent();
+ return (net_data->pr_last);
+}
+
+struct protoent *
+getprotobynumber_p(int proto, struct net_data *net_data) {
+ struct irs_pr *pr;
+
+ if (!net_data || !(pr = net_data->pr))
+ return (NULL);
+ if (net_data->pr_stayopen && net_data->pr_last)
+ if (net_data->pr_last->p_proto == proto)
+ return (net_data->pr_last);
+ net_data->pr_last = (*pr->bynumber)(pr, proto);
+ if (!net_data->pr_stayopen)
+ endprotoent();
+ return (net_data->pr_last);
+}
+
+void
+setprotoent_p(int stayopen, struct net_data *net_data) {
+ struct irs_pr *pr;
+
+ if (!net_data || !(pr = net_data->pr))
+ return;
+ (*pr->rewind)(pr);
+ net_data->pr_stayopen = (stayopen != 0);
+ if (stayopen == 0)
+ net_data_minimize(net_data);
+}
+
+void
+endprotoent_p(struct net_data *net_data) {
+ struct irs_pr *pr;
+
+ if ((net_data != NULL) && ((pr = net_data->pr) != NULL))
+ (*pr->minimize)(pr);
+}
+
+/* Private */
+
+static struct net_data *
+init() {
+ struct net_data *net_data;
+
+ if (!(net_data = net_data_init(NULL)))
+ goto error;
+ if (!net_data->pr) {
+ net_data->pr = (*net_data->irs->pr_map)(net_data->irs);
+
+ if (!net_data->pr || !net_data->res) {
+ error:
+ errno = EIO;
+ return (NULL);
+ }
+ (*net_data->pr->res_set)(net_data->pr, net_data->res, NULL);
+ }
+
+ return (net_data);
+}
+
+#endif /*__BIND_NOSTATIC*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/getprotoent_r.c b/usr/src/lib/libresolv2_joy/common/irs/getprotoent_r.c
new file mode 100644
index 0000000000..d5d9ae53b6
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/getprotoent_r.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1998-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: getprotoent_r.c,v 1.6 2006/08/01 01:14:16 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include <port_before.h>
+#if !defined(_REENTRANT) || !defined(DO_PTHREADS)
+ static int getprotoent_r_not_required = 0;
+#else
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <port_after.h>
+
+#ifdef PROTO_R_RETURN
+
+static PROTO_R_RETURN
+copy_protoent(struct protoent *, struct protoent *, PROTO_R_COPY_ARGS);
+
+PROTO_R_RETURN
+getprotobyname_r(const char *name, struct protoent *pptr, PROTO_R_ARGS) {
+ struct protoent *pe = getprotobyname(name);
+#ifdef PROTO_R_SETANSWER
+ int n = 0;
+
+ if (pe == NULL || (n = copy_protoent(pe, pptr, PROTO_R_COPY)) != 0)
+ *answerp = NULL;
+ else
+ *answerp = pptr;
+
+ return (n);
+#else
+ if (pe == NULL)
+ return (PROTO_R_BAD);
+
+ return (copy_protoent(pe, pptr, PROTO_R_COPY));
+#endif
+}
+
+PROTO_R_RETURN
+getprotobynumber_r(int proto, struct protoent *pptr, PROTO_R_ARGS) {
+ struct protoent *pe = getprotobynumber(proto);
+#ifdef PROTO_R_SETANSWER
+ int n = 0;
+
+ if (pe == NULL || (n = copy_protoent(pe, pptr, PROTO_R_COPY)) != 0)
+ *answerp = NULL;
+ else
+ *answerp = pptr;
+
+ return (n);
+#else
+ if (pe == NULL)
+ return (PROTO_R_BAD);
+
+ return (copy_protoent(pe, pptr, PROTO_R_COPY));
+#endif
+}
+
+/*%
+ * These assume a single context is in operation per thread.
+ * If this is not the case we will need to call irs directly
+ * rather than through the base functions.
+ */
+
+PROTO_R_RETURN
+getprotoent_r(struct protoent *pptr, PROTO_R_ARGS) {
+ struct protoent *pe = getprotoent();
+#ifdef PROTO_R_SETANSWER
+ int n = 0;
+
+ if (pe == NULL || (n = copy_protoent(pe, pptr, PROTO_R_COPY)) != 0)
+ *answerp = NULL;
+ else
+ *answerp = pptr;
+
+ return (n);
+#else
+ if (pe == NULL)
+ return (PROTO_R_BAD);
+
+ return (copy_protoent(pe, pptr, PROTO_R_COPY));
+#endif
+}
+
+PROTO_R_SET_RETURN
+#ifdef PROTO_R_ENT_ARGS
+setprotoent_r(int stay_open, PROTO_R_ENT_ARGS)
+#else
+setprotoent_r(int stay_open)
+#endif
+{
+#ifdef PROTO_R_ENT_UNUSED
+ PROTO_R_ENT_UNUSED;
+#endif
+ setprotoent(stay_open);
+#ifdef PROTO_R_SET_RESULT
+ return (PROTO_R_SET_RESULT);
+#endif
+}
+
+PROTO_R_END_RETURN
+#ifdef PROTO_R_ENT_ARGS
+endprotoent_r(PROTO_R_ENT_ARGS)
+#else
+endprotoent_r()
+#endif
+{
+#ifdef PROTO_R_ENT_UNUSED
+ PROTO_R_ENT_UNUSED;
+#endif
+ endprotoent();
+ PROTO_R_END_RESULT(PROTO_R_OK);
+}
+
+/* Private */
+
+#ifndef PROTOENT_DATA
+static PROTO_R_RETURN
+copy_protoent(struct protoent *pe, struct protoent *pptr, PROTO_R_COPY_ARGS) {
+ char *cp;
+ int i, n;
+ int numptr, len;
+
+ /* Find out the amount of space required to store the answer. */
+ numptr = 1; /*%< NULL ptr */
+ len = (char *)ALIGN(buf) - buf;
+ for (i = 0; pe->p_aliases[i]; i++, numptr++) {
+ len += strlen(pe->p_aliases[i]) + 1;
+ }
+ len += strlen(pe->p_name) + 1;
+ len += numptr * sizeof(char*);
+
+ if (len > (int)buflen) {
+ errno = ERANGE;
+ return (PROTO_R_BAD);
+ }
+
+ /* copy protocol value*/
+ pptr->p_proto = pe->p_proto;
+
+ cp = (char *)ALIGN(buf) + numptr * sizeof(char *);
+
+ /* copy official name */
+ n = strlen(pe->p_name) + 1;
+ strcpy(cp, pe->p_name);
+ pptr->p_name = cp;
+ cp += n;
+
+ /* copy aliases */
+ pptr->p_aliases = (char **)ALIGN(buf);
+ for (i = 0 ; pe->p_aliases[i]; i++) {
+ n = strlen(pe->p_aliases[i]) + 1;
+ strcpy(cp, pe->p_aliases[i]);
+ pptr->p_aliases[i] = cp;
+ cp += n;
+ }
+ pptr->p_aliases[i] = NULL;
+
+ return (PROTO_R_OK);
+}
+#else /* !PROTOENT_DATA */
+static int
+copy_protoent(struct protoent *pe, struct protoent *pptr, PROTO_R_COPY_ARGS) {
+ char *cp, *eob;
+ int i, n;
+
+ /* copy protocol value */
+ pptr->p_proto = pe->p_proto;
+
+ /* copy official name */
+ cp = pdptr->line;
+ eob = pdptr->line + sizeof(pdptr->line);
+ if ((n = strlen(pe->p_name) + 1) < (eob - cp)) {
+ strcpy(cp, pe->p_name);
+ pptr->p_name = cp;
+ cp += n;
+ } else {
+ return (-1);
+ }
+
+ /* copy aliases */
+ i = 0;
+ pptr->p_aliases = pdptr->proto_aliases;
+ while (pe->p_aliases[i] && i < (_MAXALIASES-1)) {
+ if ((n = strlen(pe->p_aliases[i]) + 1) < (eob - cp)) {
+ strcpy(cp, pe->p_aliases[i]);
+ pptr->p_aliases[i] = cp;
+ cp += n;
+ } else {
+ break;
+ }
+ i++;
+ }
+ pptr->p_aliases[i] = NULL;
+
+ return (PROTO_R_OK);
+}
+#endif /* PROTOENT_DATA */
+#else /* PROTO_R_RETURN */
+ static int getprotoent_r_unknown_system = 0;
+#endif /* PROTO_R_RETURN */
+#endif /* !defined(_REENTRANT) || !defined(DO_PTHREADS) */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/getservent.c b/usr/src/lib/libresolv2_joy/common/irs/getservent.c
new file mode 100644
index 0000000000..31af67e659
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/getservent.c
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: getservent.c,v 1.4 2005/04/27 04:56:26 sra Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#if !defined(__BIND_NOSTATIC)
+
+#include <sys/types.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_data.h"
+
+/* Forward */
+
+static struct net_data *init(void);
+
+/* Public */
+
+struct servent *
+getservent(void) {
+ struct net_data *net_data = init();
+
+ return (getservent_p(net_data));
+}
+
+struct servent *
+getservbyname(const char *name, const char *proto) {
+ struct net_data *net_data = init();
+
+ return (getservbyname_p(name, proto, net_data));
+}
+
+struct servent *
+getservbyport(int port, const char *proto) {
+ struct net_data *net_data = init();
+
+ return (getservbyport_p(port, proto, net_data));
+}
+
+void
+setservent(int stayopen) {
+ struct net_data *net_data = init();
+
+ setservent_p(stayopen, net_data);
+}
+
+void
+endservent() {
+ struct net_data *net_data = init();
+
+ endservent_p(net_data);
+}
+
+/* Shared private. */
+
+struct servent *
+getservent_p(struct net_data *net_data) {
+ struct irs_sv *sv;
+
+ if (!net_data || !(sv = net_data->sv))
+ return (NULL);
+ net_data->sv_last = (*sv->next)(sv);
+ return (net_data->sv_last);
+}
+
+struct servent *
+getservbyname_p(const char *name, const char *proto,
+ struct net_data *net_data) {
+ struct irs_sv *sv;
+ char **sap;
+
+ if (!net_data || !(sv = net_data->sv))
+ return (NULL);
+ if (net_data->sv_stayopen && net_data->sv_last)
+ if (!proto || !strcmp(net_data->sv_last->s_proto, proto)) {
+ if (!strcmp(net_data->sv_last->s_name, name))
+ return (net_data->sv_last);
+ for (sap = net_data->sv_last->s_aliases;
+ sap && *sap; sap++)
+ if (!strcmp(name, *sap))
+ return (net_data->sv_last);
+ }
+ net_data->sv_last = (*sv->byname)(sv, name, proto);
+ if (!net_data->sv_stayopen)
+ endservent();
+ return (net_data->sv_last);
+}
+
+struct servent *
+getservbyport_p(int port, const char *proto, struct net_data *net_data) {
+ struct irs_sv *sv;
+
+ if (!net_data || !(sv = net_data->sv))
+ return (NULL);
+ if (net_data->sv_stayopen && net_data->sv_last)
+ if (port == net_data->sv_last->s_port &&
+ ( !proto ||
+ !strcmp(net_data->sv_last->s_proto, proto)))
+ return (net_data->sv_last);
+ net_data->sv_last = (*sv->byport)(sv, port, proto);
+ return (net_data->sv_last);
+}
+
+void
+setservent_p(int stayopen, struct net_data *net_data) {
+ struct irs_sv *sv;
+
+ if (!net_data || !(sv = net_data->sv))
+ return;
+ (*sv->rewind)(sv);
+ net_data->sv_stayopen = (stayopen != 0);
+ if (stayopen == 0)
+ net_data_minimize(net_data);
+}
+
+void
+endservent_p(struct net_data *net_data) {
+ struct irs_sv *sv;
+
+ if ((net_data != NULL) && ((sv = net_data->sv) != NULL))
+ (*sv->minimize)(sv);
+}
+
+/* Private */
+
+static struct net_data *
+init() {
+ struct net_data *net_data;
+
+ if (!(net_data = net_data_init(NULL)))
+ goto error;
+ if (!net_data->sv) {
+ net_data->sv = (*net_data->irs->sv_map)(net_data->irs);
+
+ if (!net_data->sv || !net_data->res) {
+ error:
+ errno = EIO;
+ return (NULL);
+ }
+ (*net_data->sv->res_set)(net_data->sv, net_data->res, NULL);
+ }
+
+ return (net_data);
+}
+
+#endif /*__BIND_NOSTATIC*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/getservent_r.c b/usr/src/lib/libresolv2_joy/common/irs/getservent_r.c
new file mode 100644
index 0000000000..42d1e46163
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/getservent_r.c
@@ -0,0 +1,242 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1998-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: getservent_r.c,v 1.6 2006/08/01 01:14:16 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include <port_before.h>
+#if !defined(_REENTRANT) || !defined(DO_PTHREADS)
+ static int getservent_r_not_required = 0;
+#else
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <sys/param.h>
+#include <port_after.h>
+
+#ifdef SERV_R_RETURN
+
+static SERV_R_RETURN
+copy_servent(struct servent *, struct servent *, SERV_R_COPY_ARGS);
+
+SERV_R_RETURN
+getservbyname_r(const char *name, const char *proto,
+ struct servent *sptr, SERV_R_ARGS) {
+ struct servent *se = getservbyname(name, proto);
+#ifdef SERV_R_SETANSWER
+ int n = 0;
+
+ if (se == NULL || (n = copy_servent(se, sptr, SERV_R_COPY)) != 0)
+ *answerp = NULL;
+ else
+ *answerp = sptr;
+
+ return (n);
+#else
+ if (se == NULL)
+ return (SERV_R_BAD);
+
+ return (copy_servent(se, sptr, SERV_R_COPY));
+#endif
+}
+
+SERV_R_RETURN
+getservbyport_r(int port, const char *proto,
+ struct servent *sptr, SERV_R_ARGS) {
+ struct servent *se = getservbyport(port, proto);
+#ifdef SERV_R_SETANSWER
+ int n = 0;
+
+ if (se == NULL || (n = copy_servent(se, sptr, SERV_R_COPY)) != 0)
+ *answerp = NULL;
+ else
+ *answerp = sptr;
+
+ return (n);
+#else
+ if (se == NULL)
+ return (SERV_R_BAD);
+
+ return (copy_servent(se, sptr, SERV_R_COPY));
+#endif
+}
+
+/*%
+ * These assume a single context is in operation per thread.
+ * If this is not the case we will need to call irs directly
+ * rather than through the base functions.
+ */
+
+SERV_R_RETURN
+getservent_r(struct servent *sptr, SERV_R_ARGS) {
+ struct servent *se = getservent();
+#ifdef SERV_R_SETANSWER
+ int n = 0;
+
+ if (se == NULL || (n = copy_servent(se, sptr, SERV_R_COPY)) != 0)
+ *answerp = NULL;
+ else
+ *answerp = sptr;
+
+ return (n);
+#else
+ if (se == NULL)
+ return (SERV_R_BAD);
+
+ return (copy_servent(se, sptr, SERV_R_COPY));
+#endif
+}
+
+SERV_R_SET_RETURN
+#ifdef SERV_R_ENT_ARGS
+setservent_r(int stay_open, SERV_R_ENT_ARGS)
+#else
+setservent_r(int stay_open)
+#endif
+{
+#ifdef SERV_R_ENT_UNUSED
+ SERV_R_ENT_UNUSED;
+#endif
+ setservent(stay_open);
+#ifdef SERV_R_SET_RESULT
+ return (SERV_R_SET_RESULT);
+#endif
+}
+
+SERV_R_END_RETURN
+#ifdef SERV_R_ENT_ARGS
+endservent_r(SERV_R_ENT_ARGS)
+#else
+endservent_r()
+#endif
+{
+#ifdef SERV_R_ENT_UNUSED
+ SERV_R_ENT_UNUSED;
+#endif
+ endservent();
+ SERV_R_END_RESULT(SERV_R_OK);
+}
+
+/* Private */
+
+#ifndef SERVENT_DATA
+static SERV_R_RETURN
+copy_servent(struct servent *se, struct servent *sptr, SERV_R_COPY_ARGS) {
+ char *cp;
+ int i, n;
+ int numptr, len;
+
+ /* Find out the amount of space required to store the answer. */
+ numptr = 1; /*%< NULL ptr */
+ len = (char *)ALIGN(buf) - buf;
+ for (i = 0; se->s_aliases[i]; i++, numptr++) {
+ len += strlen(se->s_aliases[i]) + 1;
+ }
+ len += strlen(se->s_name) + 1;
+ len += strlen(se->s_proto) + 1;
+ len += numptr * sizeof(char*);
+
+ if (len > (int)buflen) {
+ errno = ERANGE;
+ return (SERV_R_BAD);
+ }
+
+ /* copy port value */
+ sptr->s_port = se->s_port;
+
+ cp = (char *)ALIGN(buf) + numptr * sizeof(char *);
+
+ /* copy official name */
+ n = strlen(se->s_name) + 1;
+ strcpy(cp, se->s_name);
+ sptr->s_name = cp;
+ cp += n;
+
+ /* copy aliases */
+ sptr->s_aliases = (char **)ALIGN(buf);
+ for (i = 0 ; se->s_aliases[i]; i++) {
+ n = strlen(se->s_aliases[i]) + 1;
+ strcpy(cp, se->s_aliases[i]);
+ sptr->s_aliases[i] = cp;
+ cp += n;
+ }
+ sptr->s_aliases[i] = NULL;
+
+ /* copy proto */
+ n = strlen(se->s_proto) + 1;
+ strcpy(cp, se->s_proto);
+ sptr->s_proto = cp;
+ cp += n;
+
+ return (SERV_R_OK);
+}
+#else /* !SERVENT_DATA */
+static int
+copy_servent(struct servent *se, struct servent *sptr, SERV_R_COPY_ARGS) {
+ char *cp, *eob;
+ int i, n;
+
+ /* copy port value */
+ sptr->s_port = se->s_port;
+
+ /* copy official name */
+ cp = sdptr->line;
+ eob = sdptr->line + sizeof(sdptr->line);
+ if ((n = strlen(se->s_name) + 1) < (eob - cp)) {
+ strcpy(cp, se->s_name);
+ sptr->s_name = cp;
+ cp += n;
+ } else {
+ return (-1);
+ }
+
+ /* copy aliases */
+ i = 0;
+ sptr->s_aliases = sdptr->serv_aliases;
+ while (se->s_aliases[i] && i < (_MAXALIASES-1)) {
+ if ((n = strlen(se->s_aliases[i]) + 1) < (eob - cp)) {
+ strcpy(cp, se->s_aliases[i]);
+ sptr->s_aliases[i] = cp;
+ cp += n;
+ } else {
+ break;
+ }
+ i++;
+ }
+ sptr->s_aliases[i] = NULL;
+
+ /* copy proto */
+ if ((n = strlen(se->s_proto) + 1) < (eob - cp)) {
+ strcpy(cp, se->s_proto);
+ sptr->s_proto = cp;
+ cp += n;
+ } else {
+ return (-1);
+ }
+
+ return (SERV_R_OK);
+}
+#endif /* !SERVENT_DATA */
+#else /*SERV_R_RETURN */
+ static int getservent_r_unknown_system = 0;
+#endif /*SERV_R_RETURN */
+#endif /* !defined(_REENTRANT) || !defined(DO_PTHREADS) */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/hesiod.c b/usr/src/lib/libresolv2_joy/common/irs/hesiod.c
new file mode 100644
index 0000000000..7641bf80ac
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/hesiod.c
@@ -0,0 +1,505 @@
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: hesiod.c,v 1.7 2005/07/28 06:51:48 marka Exp $";
+#endif
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+
+/*! \file
+ * \brief
+ * hesiod.c --- the core portion of the hesiod resolver.
+ *
+ * This file is derived from the hesiod library from Project Athena;
+ * It has been extensively rewritten by Theodore Ts'o to have a more
+ * thread-safe interface.
+ * \author
+ * This file is primarily maintained by &lt;tytso@mit.edu&gt; and &lt;ghudson@mit.edu&gt;.
+ */
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "port_after.h"
+
+#include "pathnames.h"
+#include "hesiod.h"
+#include "hesiod_p.h"
+
+/* Forward */
+
+int hesiod_init(void **context);
+void hesiod_end(void *context);
+char * hesiod_to_bind(void *context, const char *name,
+ const char *type);
+char ** hesiod_resolve(void *context, const char *name,
+ const char *type);
+void hesiod_free_list(void *context, char **list);
+
+static int parse_config_file(struct hesiod_p *ctx, const char *filename);
+static char ** get_txt_records(struct hesiod_p *ctx, int class,
+ const char *name);
+static int init(struct hesiod_p *ctx);
+
+/* Public */
+
+/*%
+ * This function is called to initialize a hesiod_p.
+ */
+int
+hesiod_init(void **context) {
+ struct hesiod_p *ctx;
+ char *cp;
+
+ ctx = malloc(sizeof(struct hesiod_p));
+ if (ctx == 0) {
+ errno = ENOMEM;
+ return (-1);
+ }
+
+ memset(ctx, 0, sizeof (*ctx));
+
+ if (parse_config_file(ctx, _PATH_HESIOD_CONF) < 0) {
+#ifdef DEF_RHS
+ /*
+ * Use compiled in defaults.
+ */
+ ctx->LHS = malloc(strlen(DEF_LHS) + 1);
+ ctx->RHS = malloc(strlen(DEF_RHS) + 1);
+ if (ctx->LHS == NULL || ctx->RHS == NULL) {
+ errno = ENOMEM;
+ goto cleanup;
+ }
+ strcpy(ctx->LHS, DEF_LHS); /* (checked) */
+ strcpy(ctx->RHS, DEF_RHS); /* (checked) */
+#else
+ goto cleanup;
+#endif
+ }
+ /*
+ * The default RHS can be overridden by an environment
+ * variable.
+ */
+ if ((cp = getenv("HES_DOMAIN")) != NULL) {
+ size_t RHSlen = strlen(cp) + 2;
+ if (ctx->RHS)
+ free(ctx->RHS);
+ ctx->RHS = malloc(RHSlen);
+ if (!ctx->RHS) {
+ errno = ENOMEM;
+ goto cleanup;
+ }
+ if (cp[0] == '.') {
+ strcpy(ctx->RHS, cp); /* (checked) */
+ } else {
+ strcpy(ctx->RHS, "."); /* (checked) */
+ strcat(ctx->RHS, cp); /* (checked) */
+ }
+ }
+
+ /*
+ * If there is no default hesiod realm set, we return an
+ * error.
+ */
+ if (!ctx->RHS) {
+ errno = ENOEXEC;
+ goto cleanup;
+ }
+
+#if 0
+ if (res_ninit(ctx->res) < 0)
+ goto cleanup;
+#endif
+
+ *context = ctx;
+ return (0);
+
+ cleanup:
+ hesiod_end(ctx);
+ return (-1);
+}
+
+/*%
+ * This function deallocates the hesiod_p
+ */
+void
+hesiod_end(void *context) {
+ struct hesiod_p *ctx = (struct hesiod_p *) context;
+ int save_errno = errno;
+
+ if (ctx->res)
+ res_nclose(ctx->res);
+ if (ctx->RHS)
+ free(ctx->RHS);
+ if (ctx->LHS)
+ free(ctx->LHS);
+ if (ctx->res && ctx->free_res)
+ (*ctx->free_res)(ctx->res);
+ free(ctx);
+ errno = save_errno;
+}
+
+/*%
+ * This function takes a hesiod (name, type) and returns a DNS
+ * name which is to be resolved.
+ */
+char *
+hesiod_to_bind(void *context, const char *name, const char *type) {
+ struct hesiod_p *ctx = (struct hesiod_p *) context;
+ char *bindname;
+ char **rhs_list = NULL;
+ const char *RHS, *cp;
+
+ /* Decide what our RHS is, and set cp to the end of the actual name. */
+ if ((cp = strchr(name, '@')) != NULL) {
+ if (strchr(cp + 1, '.'))
+ RHS = cp + 1;
+ else if ((rhs_list = hesiod_resolve(context, cp + 1,
+ "rhs-extension")) != NULL)
+ RHS = *rhs_list;
+ else {
+ errno = ENOENT;
+ return (NULL);
+ }
+ } else {
+ RHS = ctx->RHS;
+ cp = name + strlen(name);
+ }
+
+ /*
+ * Allocate the space we need, including up to three periods and
+ * the terminating NUL.
+ */
+ if ((bindname = malloc((cp - name) + strlen(type) + strlen(RHS) +
+ (ctx->LHS ? strlen(ctx->LHS) : 0) + 4)) == NULL) {
+ errno = ENOMEM;
+ if (rhs_list)
+ hesiod_free_list(context, rhs_list);
+ return NULL;
+ }
+
+ /* Now put together the DNS name. */
+ memcpy(bindname, name, cp - name);
+ bindname[cp - name] = '\0';
+ strcat(bindname, ".");
+ strcat(bindname, type);
+ if (ctx->LHS) {
+ if (ctx->LHS[0] != '.')
+ strcat(bindname, ".");
+ strcat(bindname, ctx->LHS);
+ }
+ if (RHS[0] != '.')
+ strcat(bindname, ".");
+ strcat(bindname, RHS);
+
+ if (rhs_list)
+ hesiod_free_list(context, rhs_list);
+
+ return (bindname);
+}
+
+/*%
+ * This is the core function. Given a hesiod (name, type), it
+ * returns an array of strings returned by the resolver.
+ */
+char **
+hesiod_resolve(void *context, const char *name, const char *type) {
+ struct hesiod_p *ctx = (struct hesiod_p *) context;
+ char *bindname = hesiod_to_bind(context, name, type);
+ char **retvec;
+
+ if (bindname == NULL)
+ return (NULL);
+ if (init(ctx) == -1) {
+ free(bindname);
+ return (NULL);
+ }
+
+ if ((retvec = get_txt_records(ctx, C_IN, bindname))) {
+ free(bindname);
+ return (retvec);
+ }
+
+ if (errno != ENOENT)
+ return (NULL);
+
+ retvec = get_txt_records(ctx, C_HS, bindname);
+ free(bindname);
+ return (retvec);
+}
+
+void
+hesiod_free_list(void *context, char **list) {
+ char **p;
+
+ UNUSED(context);
+
+ for (p = list; *p; p++)
+ free(*p);
+ free(list);
+}
+
+/*%
+ * This function parses the /etc/hesiod.conf file
+ */
+static int
+parse_config_file(struct hesiod_p *ctx, const char *filename) {
+ char *key, *data, *cp, **cpp;
+ char buf[MAXDNAME+7];
+ FILE *fp;
+
+ /*
+ * Clear the existing configuration variable, just in case
+ * they're set.
+ */
+ if (ctx->RHS)
+ free(ctx->RHS);
+ if (ctx->LHS)
+ free(ctx->LHS);
+ ctx->RHS = ctx->LHS = 0;
+
+ /*
+ * Now open and parse the file...
+ */
+ if (!(fp = fopen(filename, "r")))
+ return (-1);
+
+ while (fgets(buf, sizeof(buf), fp) != NULL) {
+ cp = buf;
+ if (*cp == '#' || *cp == '\n' || *cp == '\r')
+ continue;
+ while(*cp == ' ' || *cp == '\t')
+ cp++;
+ key = cp;
+ while(*cp != ' ' && *cp != '\t' && *cp != '=')
+ cp++;
+ *cp++ = '\0';
+
+ while(*cp == ' ' || *cp == '\t' || *cp == '=')
+ cp++;
+ data = cp;
+ while(*cp != ' ' && *cp != '\n' && *cp != '\r')
+ cp++;
+ *cp++ = '\0';
+
+ if (strcmp(key, "lhs") == 0)
+ cpp = &ctx->LHS;
+ else if (strcmp(key, "rhs") == 0)
+ cpp = &ctx->RHS;
+ else
+ continue;
+
+ *cpp = malloc(strlen(data) + 1);
+ if (!*cpp) {
+ errno = ENOMEM;
+ goto cleanup;
+ }
+ strcpy(*cpp, data);
+ }
+ fclose(fp);
+ return (0);
+
+ cleanup:
+ fclose(fp);
+ if (ctx->RHS)
+ free(ctx->RHS);
+ if (ctx->LHS)
+ free(ctx->LHS);
+ ctx->RHS = ctx->LHS = 0;
+ return (-1);
+}
+
+/*%
+ * Given a DNS class and a DNS name, do a lookup for TXT records, and
+ * return a list of them.
+ */
+static char **
+get_txt_records(struct hesiod_p *ctx, int class, const char *name) {
+ struct {
+ int type; /*%< RR type */
+ int class; /*%< RR class */
+ int dlen; /*%< len of data section */
+ u_char *data; /*%< pointer to data */
+ } rr;
+ HEADER *hp;
+ u_char qbuf[MAX_HESRESP], abuf[MAX_HESRESP];
+ u_char *cp, *erdata, *eom;
+ char *dst, *edst, **list;
+ int ancount, qdcount;
+ int i, j, n, skip;
+
+ /*
+ * Construct the query and send it.
+ */
+ n = res_nmkquery(ctx->res, QUERY, name, class, T_TXT, NULL, 0,
+ NULL, qbuf, MAX_HESRESP);
+ if (n < 0) {
+ errno = EMSGSIZE;
+ return (NULL);
+ }
+ n = res_nsend(ctx->res, qbuf, n, abuf, MAX_HESRESP);
+ if (n < 0) {
+ errno = ECONNREFUSED;
+ return (NULL);
+ }
+ if (n < HFIXEDSZ) {
+ errno = EMSGSIZE;
+ return (NULL);
+ }
+
+ /*
+ * OK, parse the result.
+ */
+ hp = (HEADER *) abuf;
+ ancount = ntohs(hp->ancount);
+ qdcount = ntohs(hp->qdcount);
+ cp = abuf + sizeof(HEADER);
+ eom = abuf + n;
+
+ /* Skip query, trying to get to the answer section which follows. */
+ for (i = 0; i < qdcount; i++) {
+ skip = dn_skipname(cp, eom);
+ if (skip < 0 || cp + skip + QFIXEDSZ > eom) {
+ errno = EMSGSIZE;
+ return (NULL);
+ }
+ cp += skip + QFIXEDSZ;
+ }
+
+ list = malloc((ancount + 1) * sizeof(char *));
+ if (!list) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ j = 0;
+ for (i = 0; i < ancount; i++) {
+ skip = dn_skipname(cp, eom);
+ if (skip < 0) {
+ errno = EMSGSIZE;
+ goto cleanup;
+ }
+ cp += skip;
+ if (cp + 3 * INT16SZ + INT32SZ > eom) {
+ errno = EMSGSIZE;
+ goto cleanup;
+ }
+ rr.type = ns_get16(cp);
+ cp += INT16SZ;
+ rr.class = ns_get16(cp);
+ cp += INT16SZ + INT32SZ; /*%< skip the ttl, too */
+ rr.dlen = ns_get16(cp);
+ cp += INT16SZ;
+ if (cp + rr.dlen > eom) {
+ errno = EMSGSIZE;
+ goto cleanup;
+ }
+ rr.data = cp;
+ cp += rr.dlen;
+ if (rr.class != class || rr.type != T_TXT)
+ continue;
+ if (!(list[j] = malloc(rr.dlen)))
+ goto cleanup;
+ dst = list[j++];
+ edst = dst + rr.dlen;
+ erdata = rr.data + rr.dlen;
+ cp = rr.data;
+ while (cp < erdata) {
+ n = (unsigned char) *cp++;
+ if (cp + n > eom || dst + n > edst) {
+ errno = EMSGSIZE;
+ goto cleanup;
+ }
+ memcpy(dst, cp, n);
+ cp += n;
+ dst += n;
+ }
+ if (cp != erdata) {
+ errno = EMSGSIZE;
+ goto cleanup;
+ }
+ *dst = '\0';
+ }
+ list[j] = NULL;
+ if (j == 0) {
+ errno = ENOENT;
+ goto cleanup;
+ }
+ return (list);
+
+ cleanup:
+ for (i = 0; i < j; i++)
+ free(list[i]);
+ free(list);
+ return (NULL);
+}
+
+struct __res_state *
+__hesiod_res_get(void *context) {
+ struct hesiod_p *ctx = context;
+
+ if (!ctx->res) {
+ struct __res_state *res;
+ res = (struct __res_state *)malloc(sizeof *res);
+ if (res == NULL) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(res, 0, sizeof *res);
+ __hesiod_res_set(ctx, res, free);
+ }
+
+ return (ctx->res);
+}
+
+void
+__hesiod_res_set(void *context, struct __res_state *res,
+ void (*free_res)(void *)) {
+ struct hesiod_p *ctx = context;
+
+ if (ctx->res && ctx->free_res) {
+ res_nclose(ctx->res);
+ (*ctx->free_res)(ctx->res);
+ }
+
+ ctx->res = res;
+ ctx->free_res = free_res;
+}
+
+static int
+init(struct hesiod_p *ctx) {
+
+ if (!ctx->res && !__hesiod_res_get(ctx))
+ return (-1);
+
+ if (((ctx->res->options & RES_INIT) == 0U) &&
+ (res_ninit(ctx->res) == -1))
+ return (-1);
+
+ return (0);
+}
diff --git a/usr/src/lib/libresolv2_joy/common/irs/hesiod_p.h b/usr/src/lib/libresolv2_joy/common/irs/hesiod_p.h
new file mode 100644
index 0000000000..99da15d0cd
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/hesiod_p.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: hesiod_p.h,v 1.3 2005/04/27 04:56:27 sra Exp $
+ */
+
+#ifndef _HESIOD_P_H_INCLUDED
+#define _HESIOD_P_H_INCLUDED
+
+/** \file
+ * \brief
+ * hesiod_p.h -- private definitions for the hesiod library.
+ *
+ * \author
+ * This file is primarily maintained by tytso@mit.edu and ghudson@mit.edu.
+ */
+
+#define DEF_RHS ".Athena.MIT.EDU" /*%< Defaults if HESIOD_CONF */
+#define DEF_LHS ".ns" /*%< file is not */
+ /*%< present. */
+struct hesiod_p {
+ char * LHS; /*%< normally ".ns" */
+ char * RHS; /*%< AKA the default hesiod domain */
+ struct __res_state * res; /*%< resolver context */
+ void (*free_res)(void *);
+ void (*res_set)(struct hesiod_p *, struct __res_state *,
+ void (*)(void *));
+ struct __res_state * (*res_get)(struct hesiod_p *);
+};
+
+#define MAX_HESRESP 1024
+
+#endif /*_HESIOD_P_H_INCLUDED*/
diff --git a/usr/src/lib/libresolv2_joy/common/irs/irp.c b/usr/src/lib/libresolv2_joy/common/irs/irp.c
new file mode 100644
index 0000000000..ef10631c22
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/irp.c
@@ -0,0 +1,583 @@
+/*
+ * Copyright (C) 2004-2006, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 1996, 1998-2001, 2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: irp.c,v 1.12 2008/11/14 02:36:51 marka Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <syslog.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <stdarg.h>
+#include <fcntl.h>
+#include <syslog.h>
+#include <ctype.h>
+#include <unistd.h>
+
+#include <isc/memcluster.h>
+
+#include <irs.h>
+#include <irp.h>
+
+#include "irs_p.h"
+#include "irp_p.h"
+
+#include "port_after.h"
+
+/* Forward. */
+
+static void irp_close(struct irs_acc *);
+
+#define LINEINCR 128
+
+#if !defined(SUN_LEN)
+#define SUN_LEN(su) \
+ (sizeof (*(su)) - sizeof ((su)->sun_path) + strlen((su)->sun_path))
+#endif
+
+
+/* Public */
+
+
+/* send errors to syslog if true. */
+int irp_log_errors = 1;
+
+/*%
+ * This module handles the irp module connection to irpd.
+ *
+ * The client expects a synchronous interface to functions like
+ * getpwnam(3), so we can't use the ctl_* i/o library on this end of
+ * the wire (it's used in the server).
+ */
+
+/*%
+ * irs_acc *irs_irp_acc(const char *options);
+ *
+ * Initialize the irp module.
+ */
+struct irs_acc *
+irs_irp_acc(const char *options) {
+ struct irs_acc *acc;
+ struct irp_p *irp;
+
+ UNUSED(options);
+
+ if (!(acc = memget(sizeof *acc))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(acc, 0x5e, sizeof *acc);
+ if (!(irp = memget(sizeof *irp))) {
+ errno = ENOMEM;
+ free(acc);
+ return (NULL);
+ }
+ irp->inlast = 0;
+ irp->incurr = 0;
+ irp->fdCxn = -1;
+ acc->private = irp;
+
+#ifdef WANT_IRS_GR
+ acc->gr_map = irs_irp_gr;
+#else
+ acc->gr_map = NULL;
+#endif
+#ifdef WANT_IRS_PW
+ acc->pw_map = irs_irp_pw;
+#else
+ acc->pw_map = NULL;
+#endif
+ acc->sv_map = irs_irp_sv;
+ acc->pr_map = irs_irp_pr;
+ acc->ho_map = irs_irp_ho;
+ acc->nw_map = irs_irp_nw;
+ acc->ng_map = irs_irp_ng;
+ acc->close = irp_close;
+ return (acc);
+}
+
+
+int
+irs_irp_connection_setup(struct irp_p *cxndata, int *warned) {
+ if (irs_irp_is_connected(cxndata)) {
+ return (0);
+ } else if (irs_irp_connect(cxndata) != 0) {
+ if (warned != NULL && !*warned) {
+ syslog(LOG_ERR, "irpd connection failed: %m\n");
+ (*warned)++;
+ }
+
+ return (-1);
+ }
+
+ return (0);
+}
+
+/*%
+ * int irs_irp_connect(void);
+ *
+ * Sets up the connection to the remote irpd server.
+ *
+ * Returns:
+ *
+ * 0 on success, -1 on failure.
+ *
+ */
+int
+irs_irp_connect(struct irp_p *pvt) {
+ int flags;
+ struct sockaddr *addr;
+ struct sockaddr_in iaddr;
+#ifndef NO_SOCKADDR_UN
+ struct sockaddr_un uaddr;
+#endif
+ long ipaddr;
+ const char *irphost;
+ int code;
+ char text[256];
+ int socklen = 0;
+
+ if (pvt->fdCxn != -1) {
+ perror("fd != 1");
+ return (-1);
+ }
+
+#ifndef NO_SOCKADDR_UN
+ memset(&uaddr, 0, sizeof uaddr);
+#endif
+ memset(&iaddr, 0, sizeof iaddr);
+
+ irphost = getenv(IRPD_HOST_ENV);
+ if (irphost == NULL) {
+ irphost = "127.0.0.1";
+ }
+
+#ifndef NO_SOCKADDR_UN
+ if (irphost[0] == '/') {
+ addr = (struct sockaddr *)&uaddr;
+ strncpy(uaddr.sun_path, irphost, sizeof uaddr.sun_path);
+ uaddr.sun_family = AF_UNIX;
+ socklen = SUN_LEN(&uaddr);
+#ifdef HAVE_SA_LEN
+ uaddr.sun_len = socklen;
+#endif
+ } else
+#endif
+ {
+ if (inet_pton(AF_INET, irphost, &ipaddr) != 1) {
+ errno = EADDRNOTAVAIL;
+ perror("inet_pton");
+ return (-1);
+ }
+
+ addr = (struct sockaddr *)&iaddr;
+ socklen = sizeof iaddr;
+#ifdef HAVE_SA_LEN
+ iaddr.sin_len = socklen;
+#endif
+ iaddr.sin_family = AF_INET;
+ iaddr.sin_port = htons(IRPD_PORT);
+ iaddr.sin_addr.s_addr = ipaddr;
+ }
+
+
+ pvt->fdCxn = socket(addr->sa_family, SOCK_STREAM, PF_UNSPEC);
+ if (pvt->fdCxn < 0) {
+ perror("socket");
+ return (-1);
+ }
+
+ if (connect(pvt->fdCxn, addr, socklen) != 0) {
+ perror("connect");
+ return (-1);
+ }
+
+ flags = fcntl(pvt->fdCxn, F_GETFL, 0);
+ if (flags < 0) {
+ close(pvt->fdCxn);
+ perror("close");
+ return (-1);
+ }
+
+#if 0
+ flags |= O_NONBLOCK;
+ if (fcntl(pvt->fdCxn, F_SETFL, flags) < 0) {
+ close(pvt->fdCxn);
+ perror("fcntl");
+ return (-1);
+ }
+#endif
+
+ code = irs_irp_read_response(pvt, text, sizeof text);
+ if (code != IRPD_WELCOME_CODE) {
+ if (irp_log_errors) {
+ syslog(LOG_WARNING, "Connection failed: %s", text);
+ }
+ irs_irp_disconnect(pvt);
+ return (-1);
+ }
+
+ return (0);
+}
+
+/*%
+ * int irs_irp_is_connected(struct irp_p *pvt);
+ *
+ * Returns:
+ *
+ * Non-zero if streams are setup to remote.
+ *
+ */
+
+int
+irs_irp_is_connected(struct irp_p *pvt) {
+ return (pvt->fdCxn >= 0);
+}
+
+/*%
+ * void
+ * irs_irp_disconnect(struct irp_p *pvt);
+ *
+ * Closes streams to remote.
+ */
+
+void
+irs_irp_disconnect(struct irp_p *pvt) {
+ if (pvt->fdCxn != -1) {
+ close(pvt->fdCxn);
+ pvt->fdCxn = -1;
+ }
+}
+
+
+
+int
+irs_irp_read_line(struct irp_p *pvt, char *buffer, int len) {
+ char *realstart = &pvt->inbuffer[0];
+ char *p, *start, *end;
+ int spare;
+ int i;
+ int buffpos = 0;
+ int left = len - 1;
+
+ while (left > 0) {
+ start = p = &pvt->inbuffer[pvt->incurr];
+ end = &pvt->inbuffer[pvt->inlast];
+
+ while (p != end && *p != '\n')
+ p++;
+
+ if (p == end) {
+ /* Found no newline so shift data down if necessary
+ * and append new data to buffer
+ */
+ if (start > realstart) {
+ memmove(realstart, start, end - start);
+ pvt->inlast = end - start;
+ start = realstart;
+ pvt->incurr = 0;
+ end = &pvt->inbuffer[pvt->inlast];
+ }
+
+ spare = sizeof (pvt->inbuffer) - pvt->inlast;
+
+ p = end;
+ i = read(pvt->fdCxn, end, spare);
+ if (i < 0) {
+ close(pvt->fdCxn);
+ pvt->fdCxn = -1;
+ return (buffpos > 0 ? buffpos : -1);
+ } else if (i == 0) {
+ return (buffpos);
+ }
+
+ end += i;
+ pvt->inlast += i;
+
+ while (p != end && *p != '\n')
+ p++;
+ }
+
+ if (p == end) {
+ /* full buffer and still no newline */
+ i = sizeof pvt->inbuffer;
+ } else {
+ /* include newline */
+ i = p - start + 1;
+ }
+
+ if (i > left)
+ i = left;
+ memcpy(buffer + buffpos, start, i);
+ pvt->incurr += i;
+ buffpos += i;
+ buffer[buffpos] = '\0';
+
+ if (p != end) {
+ left = 0;
+ } else {
+ left -= i;
+ }
+ }
+
+#if 0
+ fprintf(stderr, "read line: %s\n", buffer);
+#endif
+ return (buffpos);
+}
+
+/*%
+ * int irp_read_response(struct irp_p *pvt);
+ *
+ * Returns:
+ *
+ * The number found at the beginning of the line read from
+ * FP. 0 on failure(0 is not a legal response code). The
+ * rest of the line is discarded.
+ *
+ */
+
+int
+irs_irp_read_response(struct irp_p *pvt, char *text, size_t textlen) {
+ char line[1024];
+ int code;
+ char *p;
+
+ if (irs_irp_read_line(pvt, line, sizeof line) <= 0) {
+ return (0);
+ }
+
+ p = strchr(line, '\n');
+ if (p == NULL) {
+ return (0);
+ }
+
+ if (sscanf(line, "%d", &code) != 1) {
+ code = 0;
+ } else if (text != NULL && textlen > 0U) {
+ p = line;
+ while (isspace((unsigned char)*p)) p++;
+ while (isdigit((unsigned char)*p)) p++;
+ while (isspace((unsigned char)*p)) p++;
+ strncpy(text, p, textlen - 1);
+ p[textlen - 1] = '\0';
+ }
+
+ return (code);
+}
+
+/*%
+ * char *irp_read_body(struct irp_p *pvt, size_t *size);
+ *
+ * Read in the body of a response. Terminated by a line with
+ * just a dot on it. Lines should be terminated with a CR-LF
+ * sequence, but we're nt piccky if the CR is missing.
+ * No leading dot escaping is done as the protcol doesn't
+ * use leading dots anywhere.
+ *
+ * Returns:
+ *
+ * Pointer to null-terminated buffer allocated by memget.
+ * *SIZE is set to the length of the buffer.
+ *
+ */
+
+char *
+irs_irp_read_body(struct irp_p *pvt, size_t *size) {
+ char line[1024];
+ u_int linelen;
+ size_t len = LINEINCR;
+ char *buffer = memget(len);
+ int idx = 0;
+
+ if (buffer == NULL)
+ return (NULL);
+
+ for (;;) {
+ if (irs_irp_read_line(pvt, line, sizeof line) <= 0 ||
+ strchr(line, '\n') == NULL)
+ goto death;
+
+ linelen = strlen(line);
+
+ if (line[linelen - 1] != '\n')
+ goto death;
+
+ /* We're not strict about missing \r. Should we be?? */
+ if (linelen > 2 && line[linelen - 2] == '\r') {
+ line[linelen - 2] = '\n';
+ line[linelen - 1] = '\0';
+ linelen--;
+ }
+
+ if (linelen == 2 && line[0] == '.') {
+ *size = len;
+ buffer[idx] = '\0';
+
+ return (buffer);
+ }
+
+ if (linelen > (len - (idx + 1))) {
+ char *p = memget(len + LINEINCR);
+
+ if (p == NULL)
+ goto death;
+ memcpy(p, buffer, len);
+ memput(buffer, len);
+ buffer = p;
+ len += LINEINCR;
+ }
+
+ memcpy(buffer + idx, line, linelen);
+ idx += linelen;
+ }
+ death:
+ memput(buffer, len);
+ return (NULL);
+}
+
+/*%
+ * int irs_irp_get_full_response(struct irp_p *pvt, int *code,
+ * char **body, size_t *bodylen);
+ *
+ * Gets the response to a command. If the response indicates
+ * there's a body to follow(code % 10 == 1), then the
+ * body buffer is allcoated with memget and stored in
+ * *BODY. The length of the allocated body buffer is stored
+ * in *BODY. The caller must give the body buffer back to
+ * memput when done. The results code is stored in *CODE.
+ *
+ * Returns:
+ *
+ * 0 if a result was read. -1 on some sort of failure.
+ *
+ */
+
+int
+irs_irp_get_full_response(struct irp_p *pvt, int *code, char *text,
+ size_t textlen, char **body, size_t *bodylen) {
+ int result = irs_irp_read_response(pvt, text, textlen);
+
+ *body = NULL;
+
+ if (result == 0) {
+ return (-1);
+ }
+
+ *code = result;
+
+ /* Code that matches 2xx is a good result code.
+ * Code that matches xx1 means there's a response body coming.
+ */
+ if ((result / 100) == 2 && (result % 10) == 1) {
+ *body = irs_irp_read_body(pvt, bodylen);
+ if (*body == NULL) {
+ return (-1);
+ }
+ }
+
+ return (0);
+}
+
+/*%
+ * int irs_irp_send_command(struct irp_p *pvt, const char *fmt, ...);
+ *
+ * Sends command to remote connected via the PVT
+ * structure. FMT and args after it are fprintf-like
+ * arguments for formatting.
+ *
+ * Returns:
+ *
+ * 0 on success, -1 on failure.
+ */
+
+int
+irs_irp_send_command(struct irp_p *pvt, const char *fmt, ...) {
+ va_list ap;
+ char buffer[1024];
+ int pos = 0;
+ int i, todo;
+
+
+ if (pvt->fdCxn < 0) {
+ return (-1);
+ }
+
+ va_start(ap, fmt);
+ (void) vsprintf(buffer, fmt, ap);
+ todo = strlen(buffer);
+ va_end(ap);
+ if (todo > (int)sizeof(buffer) - 3) {
+ syslog(LOG_CRIT, "memory overrun in irs_irp_send_command()");
+ exit(1);
+ }
+ strcat(buffer, "\r\n");
+ todo = strlen(buffer);
+
+ while (todo > 0) {
+ i = write(pvt->fdCxn, buffer + pos, todo);
+#if 0
+ /* XXX brister */
+ fprintf(stderr, "Wrote: \"");
+ fwrite(buffer + pos, sizeof (char), todo, stderr);
+ fprintf(stderr, "\"\n");
+#endif
+ if (i < 0) {
+ close(pvt->fdCxn);
+ pvt->fdCxn = -1;
+ return (-1);
+ }
+ todo -= i;
+ }
+
+ return (0);
+}
+
+
+/* Methods */
+
+/*%
+ * void irp_close(struct irs_acc *this)
+ *
+ */
+
+static void
+irp_close(struct irs_acc *this) {
+ struct irp_p *irp = (struct irp_p *)this->private;
+
+ if (irp != NULL) {
+ irs_irp_disconnect(irp);
+ memput(irp, sizeof *irp);
+ }
+
+ memput(this, sizeof *this);
+}
+
+
+
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/irp_ho.c b/usr/src/lib/libresolv2_joy/common/irs/irp_ho.c
new file mode 100644
index 0000000000..b0de31dc89
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/irp_ho.c
@@ -0,0 +1,405 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (c) 1996,1998 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: irp_ho.c,v 1.3 2005/04/27 04:56:28 sra Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/* Imports. */
+
+#include "port_before.h"
+
+#include <syslog.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syslog.h>
+
+#include <irs.h>
+#include <irp.h>
+#include <isc/irpmarshall.h>
+#include <isc/memcluster.h>
+
+#include "irs_p.h"
+#include "dns_p.h"
+#include "irp_p.h"
+
+#include "port_after.h"
+
+/* Definitions. */
+
+#define MAXALIASES 35
+#define MAXADDRS 35
+#define Max(a,b) ((a) > (b) ? (a) : (b))
+
+
+struct pvt {
+ struct irp_p *girpdata;
+ int warned;
+ struct hostent host;
+};
+
+/* Forward. */
+
+static void ho_close(struct irs_ho *this);
+static struct hostent * ho_byname(struct irs_ho *this, const char *name);
+static struct hostent * ho_byname2(struct irs_ho *this, const char *name,
+ int af);
+static struct hostent * ho_byaddr(struct irs_ho *this, const void *addr,
+ int len, int af);
+static struct hostent * ho_next(struct irs_ho *this);
+static void ho_rewind(struct irs_ho *this);
+static void ho_minimize(struct irs_ho *this);
+
+static void free_host(struct hostent *ho);
+static struct addrinfo * ho_addrinfo(struct irs_ho *this, const char *name,
+ const struct addrinfo *pai);
+
+/* Public. */
+
+/*%
+ * struct irs_ho * irs_irp_ho(struct irs_acc *this)
+ *
+ * Notes:
+ *
+ * Initializes the irp_ho module.
+ *
+ */
+
+struct irs_ho *
+irs_irp_ho(struct irs_acc *this) {
+ struct irs_ho *ho;
+ struct pvt *pvt;
+
+ if (!(ho = memget(sizeof *ho))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(ho, 0x0, sizeof *ho);
+
+ if (!(pvt = memget(sizeof *pvt))) {
+ memput(ho, sizeof *ho);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ pvt->girpdata = this->private;
+
+ ho->private = pvt;
+ ho->close = ho_close;
+ ho->byname = ho_byname;
+ ho->byname2 = ho_byname2;
+ ho->byaddr = ho_byaddr;
+ ho->next = ho_next;
+ ho->rewind = ho_rewind;
+ ho->minimize = ho_minimize;
+ ho->addrinfo = ho_addrinfo;
+
+ return (ho);
+}
+
+/* Methods. */
+
+/*%
+ * Closes down the module.
+ *
+ */
+
+static void
+ho_close(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ ho_minimize(this);
+
+ free_host(&pvt->host);
+
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+
+
+/*
+ * struct hostent * ho_byname(struct irs_ho *this, const char *name)
+ *
+ */
+
+static struct hostent *
+ho_byname(struct irs_ho *this, const char *name) {
+ return (ho_byname2(this, name, AF_INET));
+}
+
+
+
+
+
+/*
+ * struct hostent * ho_byname2(struct irs_ho *this, const char *name, int af)
+ *
+ */
+
+static struct hostent *
+ho_byname2(struct irs_ho *this, const char *name, int af) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct hostent *ho = &pvt->host;
+ char *body = NULL;
+ size_t bodylen;
+ int code;
+ char text[256];
+
+ if (ho->h_name != NULL &&
+ strcmp(name, ho->h_name) == 0 &&
+ af == ho->h_addrtype) {
+ return (ho);
+ }
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "gethostbyname2 %s %s",
+ name, ADDR_T_STR(af)) != 0)
+ return (NULL);
+
+ if (irs_irp_get_full_response(pvt->girpdata, &code,
+ text, sizeof text,
+ &body, &bodylen) != 0) {
+ return (NULL);
+ }
+
+ if (code == IRPD_GETHOST_OK) {
+ free_host(ho);
+ if (irp_unmarshall_ho(ho, body) != 0) {
+ ho = NULL;
+ }
+ } else {
+ ho = NULL;
+ }
+
+ if (body != NULL) {
+ memput(body, bodylen);
+ }
+
+ return (ho);
+}
+
+
+
+/*
+ * struct hostent * ho_byaddr(struct irs_ho *this, const void *addr,
+ * int len, int af)
+ *
+ */
+
+static struct hostent *
+ho_byaddr(struct irs_ho *this, const void *addr, int len, int af) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct hostent *ho = &pvt->host;
+ char *body = NULL;
+ size_t bodylen;
+ int code;
+ char **p;
+ char paddr[MAXPADDRSIZE];
+ char text[256];
+
+ if (ho->h_name != NULL &&
+ af == ho->h_addrtype &&
+ len == ho->h_length) {
+ for (p = ho->h_addr_list ; *p != NULL ; p++) {
+ if (memcmp(*p, addr, len) == 0)
+ return (ho);
+ }
+ }
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return (NULL);
+ }
+
+ if (inet_ntop(af, addr, paddr, sizeof paddr) == NULL) {
+ return (NULL);
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "gethostbyaddr %s %s",
+ paddr, ADDR_T_STR(af)) != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_get_full_response(pvt->girpdata, &code,
+ text, sizeof text,
+ &body, &bodylen) != 0) {
+ return (NULL);
+ }
+
+ if (code == IRPD_GETHOST_OK) {
+ free_host(ho);
+ if (irp_unmarshall_ho(ho, body) != 0) {
+ ho = NULL;
+ }
+ } else {
+ ho = NULL;
+ }
+
+ if (body != NULL) {
+ memput(body, bodylen);
+ }
+
+ return (ho);
+}
+
+/*%
+ * The implementation for gethostent(3). The first time it's
+ * called all the data is pulled from the remote(i.e. what
+ * the maximum number of gethostent(3) calls would return)
+ * and that data is cached.
+ *
+ */
+
+static struct hostent *
+ho_next(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct hostent *ho = &pvt->host;
+ char *body;
+ size_t bodylen;
+ int code;
+ char text[256];
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "gethostent") != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_get_full_response(pvt->girpdata, &code,
+ text, sizeof text,
+ &body, &bodylen) != 0) {
+ return (NULL);
+ }
+
+ if (code == IRPD_GETHOST_OK) {
+ free_host(ho);
+ if (irp_unmarshall_ho(ho, body) != 0) {
+ ho = NULL;
+ }
+ } else {
+ ho = NULL;
+ }
+
+ if (body != NULL) {
+ memput(body, bodylen);
+ }
+
+ return (ho);
+}
+
+/*%
+ * void ho_rewind(struct irs_ho *this)
+ *
+ */
+
+static void
+ho_rewind(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ char text[256];
+ int code;
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return;
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "sethostent") != 0) {
+ return;
+ }
+
+ code = irs_irp_read_response(pvt->girpdata, text, sizeof text);
+ if (code != IRPD_GETHOST_SETOK) {
+ if (irp_log_errors) {
+ syslog(LOG_WARNING, "sethostent failed: %s", text);
+ }
+ }
+
+ return;
+}
+
+/*%
+ * void ho_minimize(struct irs_ho *this)
+ *
+ */
+
+static void
+ho_minimize(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ free_host(&pvt->host);
+
+ irs_irp_disconnect(pvt->girpdata);
+}
+
+/*%
+ * void free_host(struct hostent *ho)
+ *
+ */
+
+static void
+free_host(struct hostent *ho) {
+ char **p;
+
+ if (ho == NULL) {
+ return;
+ }
+
+ if (ho->h_name != NULL)
+ free(ho->h_name);
+
+ if (ho->h_aliases != NULL) {
+ for (p = ho->h_aliases ; *p != NULL ; p++)
+ free(*p);
+ free(ho->h_aliases);
+ }
+
+ if (ho->h_addr_list != NULL) {
+ for (p = ho->h_addr_list ; *p != NULL ; p++)
+ free(*p);
+ free(ho->h_addr_list);
+ }
+}
+
+/* dummy */
+static struct addrinfo *
+ho_addrinfo(struct irs_ho *this, const char *name, const struct addrinfo *pai)
+{
+ UNUSED(this);
+ UNUSED(name);
+ UNUSED(pai);
+ return(NULL);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/irp_ng.c b/usr/src/lib/libresolv2_joy/common/irs/irp_ng.c
new file mode 100644
index 0000000000..1af862cab4
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/irp_ng.c
@@ -0,0 +1,253 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996, 1998 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: irp_ng.c,v 1.4 2006/12/07 04:46:27 marka Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <syslog.h>
+
+#include <irs.h>
+#include <irp.h>
+#include <isc/memcluster.h>
+#include <isc/irpmarshall.h>
+
+#include "irs_p.h"
+#include "irp_p.h"
+
+#include "port_after.h"
+
+/* Definitions */
+
+struct pvt {
+ struct irp_p *girpdata;
+ int warned;
+};
+
+
+/* Forward */
+
+static void ng_rewind(struct irs_ng *, const char*);
+static void ng_close(struct irs_ng *);
+static int ng_next(struct irs_ng *, const char **, const char **,
+ const char **);
+static int ng_test(struct irs_ng *, const char *,
+ const char *, const char *,
+ const char *);
+static void ng_minimize(struct irs_ng *);
+
+
+/* Public */
+
+/*%
+ * Intialize the irp netgroup module.
+ *
+ */
+
+struct irs_ng *
+irs_irp_ng(struct irs_acc *this) {
+ struct irs_ng *ng;
+ struct pvt *pvt;
+
+ if (!(ng = memget(sizeof *ng))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(ng, 0x5e, sizeof *ng);
+
+ if (!(pvt = memget(sizeof *pvt))) {
+ memput(ng, sizeof *ng);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ pvt->girpdata = this->private;
+
+ ng->private = pvt;
+ ng->close = ng_close;
+ ng->next = ng_next;
+ ng->test = ng_test;
+ ng->rewind = ng_rewind;
+ ng->minimize = ng_minimize;
+ return (ng);
+}
+
+/* Methods */
+
+
+
+/*
+ * void ng_close(struct irs_ng *this)
+ *
+ */
+
+static void
+ng_close(struct irs_ng *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ ng_minimize(this);
+
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+
+
+
+/*
+ * void ng_rewind(struct irs_ng *this, const char *group)
+ *
+ *
+ */
+
+static void
+ng_rewind(struct irs_ng *this, const char *group) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ char text[256];
+ int code;
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return;
+ }
+
+ if (irs_irp_send_command(pvt->girpdata,
+ "setnetgrent %s", group) != 0) {
+ return;
+ }
+
+ code = irs_irp_read_response(pvt->girpdata, text, sizeof text);
+ if (code != IRPD_GETNETGR_SETOK) {
+ if (irp_log_errors) {
+ syslog(LOG_WARNING, "setnetgrent(%s) failed: %s",
+ group, text);
+ }
+ }
+
+ return;
+}
+
+/*
+ * Get the next netgroup item from the cache.
+ *
+ */
+
+static int
+ng_next(struct irs_ng *this, const char **host, const char **user,
+ const char **domain)
+{
+ struct pvt *pvt = (struct pvt *)this->private;
+ int code;
+ char *body = NULL;
+ size_t bodylen;
+ int rval = 0;
+ char text[256];
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return (0);
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "getnetgrent") != 0)
+ return (0);
+
+ if (irs_irp_get_full_response(pvt->girpdata, &code,
+ text, sizeof text,
+ &body, &bodylen) != 0) {
+ return (0);
+ }
+
+ if (code == IRPD_GETNETGR_OK) {
+ if (irp_unmarshall_ng(host, user, domain, body) == 0) {
+ rval = 1;
+ }
+ }
+
+ if (body != NULL) {
+ memput(body, bodylen);
+ }
+
+ return (rval);
+}
+
+/*
+ * Search for a match in a netgroup.
+ *
+ */
+
+static int
+ng_test(struct irs_ng *this, const char *name,
+ const char *host, const char *user, const char *domain)
+{
+ struct pvt *pvt = (struct pvt *)this->private;
+ char *body = NULL;
+ size_t bodylen = 0;
+ int code;
+ char text[256];
+ int rval = 0;
+
+ UNUSED(name);
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return (0);
+ }
+
+ if (irp_marshall_ng(host, user, domain, &body, &bodylen) != 0) {
+ return (0);
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "innetgr %s", body) == 0) {
+ code = irs_irp_read_response(pvt->girpdata, text, sizeof text);
+ if (code == IRPD_GETNETGR_MATCHES) {
+ rval = 1;
+ }
+ }
+
+ memput(body, bodylen);
+
+ return (rval);
+}
+
+
+
+
+/*
+ * void ng_minimize(struct irs_ng *this)
+ *
+ */
+
+static void
+ng_minimize(struct irs_ng *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ irs_irp_disconnect(pvt->girpdata);
+}
+
+
+
+
+/* Private */
+
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/irp_nw.c b/usr/src/lib/libresolv2_joy/common/irs/irp_nw.c
new file mode 100644
index 0000000000..3f2381f95d
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/irp_nw.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (c) 1996,1998 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: irp_nw.c,v 1.4 2006/03/09 23:57:56 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#if 0
+
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <syslog.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syslog.h>
+
+#include <irs.h>
+#include <irp.h>
+#include <isc/irpmarshall.h>
+
+#include <isc/memcluster.h>
+#include <isc/misc.h>
+
+#include "irs_p.h"
+#include "lcl_p.h"
+#include "irp_p.h"
+
+#include "port_after.h"
+
+#define MAXALIASES 35
+#define MAXADDRSIZE 4
+
+struct pvt {
+ struct irp_p *girpdata;
+ int warned;
+ struct nwent net;
+};
+
+/* Forward */
+
+static void nw_close(struct irs_nw *);
+static struct nwent * nw_byname(struct irs_nw *, const char *, int);
+static struct nwent * nw_byaddr(struct irs_nw *, void *, int, int);
+static struct nwent * nw_next(struct irs_nw *);
+static void nw_rewind(struct irs_nw *);
+static void nw_minimize(struct irs_nw *);
+
+static void free_nw(struct nwent *nw);
+
+
+/* Public */
+
+/*%
+ * struct irs_nw * irs_irp_nw(struct irs_acc *this)
+ *
+ */
+
+struct irs_nw *
+irs_irp_nw(struct irs_acc *this) {
+ struct irs_nw *nw;
+ struct pvt *pvt;
+
+ if (!(pvt = memget(sizeof *pvt))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+
+ if (!(nw = memget(sizeof *nw))) {
+ memput(pvt, sizeof *pvt);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(nw, 0x0, sizeof *nw);
+ pvt->girpdata = this->private;
+
+ nw->private = pvt;
+ nw->close = nw_close;
+ nw->byname = nw_byname;
+ nw->byaddr = nw_byaddr;
+ nw->next = nw_next;
+ nw->rewind = nw_rewind;
+ nw->minimize = nw_minimize;
+ return (nw);
+}
+
+/* Methods */
+
+/*%
+ * void nw_close(struct irs_nw *this)
+ *
+ */
+
+static void
+nw_close(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ nw_minimize(this);
+
+ free_nw(&pvt->net);
+
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+/*%
+ * struct nwent * nw_byaddr(struct irs_nw *this, void *net,
+ * int length, int type)
+ *
+ */
+
+static struct nwent *
+nw_byaddr(struct irs_nw *this, void *net, int length, int type) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct nwent *nw = &pvt->net;
+ char *body = NULL;
+ size_t bodylen;
+ int code;
+ char paddr[24]; /*%< bigenough for ip4 w/ cidr spec. */
+ char text[256];
+
+ if (inet_net_ntop(type, net, length, paddr, sizeof paddr) == NULL) {
+ return (NULL);
+ }
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "getnetbyaddr %s %s",
+ paddr, ADDR_T_STR(type)) != 0)
+ return (NULL);
+
+ if (irs_irp_get_full_response(pvt->girpdata, &code,
+ text, sizeof text,
+ &body, &bodylen) != 0) {
+ return (NULL);
+ }
+
+ if (code == IRPD_GETNET_OK) {
+ free_nw(nw);
+ if (irp_unmarshall_nw(nw, body) != 0) {
+ nw = NULL;
+ }
+ } else {
+ nw = NULL;
+ }
+
+ if (body != NULL) {
+ memput(body, bodylen);
+ }
+
+ return (nw);
+}
+
+/*%
+ * struct nwent * nw_byname(struct irs_nw *this, const char *name, int type)
+ *
+ */
+
+static struct nwent *
+nw_byname(struct irs_nw *this, const char *name, int type) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct nwent *nw = &pvt->net;
+ char *body = NULL;
+ size_t bodylen;
+ int code;
+ char text[256];
+
+ if (nw->n_name != NULL &&
+ strcmp(name, nw->n_name) == 0 &&
+ nw->n_addrtype == type) {
+ return (nw);
+ }
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "getnetbyname %s", name) != 0)
+ return (NULL);
+
+ if (irs_irp_get_full_response(pvt->girpdata, &code,
+ text, sizeof text,
+ &body, &bodylen) != 0) {
+ return (NULL);
+ }
+
+ if (code == IRPD_GETNET_OK) {
+ free_nw(nw);
+ if (irp_unmarshall_nw(nw, body) != 0) {
+ nw = NULL;
+ }
+ } else {
+ nw = NULL;
+ }
+
+ if (body != NULL) {
+ memput(body, bodylen);
+ }
+
+ return (nw);
+}
+
+/*%
+ * void nw_rewind(struct irs_nw *this)
+ *
+ */
+
+static void
+nw_rewind(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ char text[256];
+ int code;
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return;
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "setnetent") != 0) {
+ return;
+ }
+
+ code = irs_irp_read_response(pvt->girpdata, text, sizeof text);
+ if (code != IRPD_GETNET_SETOK) {
+ if (irp_log_errors) {
+ syslog(LOG_WARNING, "setnetent failed: %s", text);
+ }
+ }
+
+ return;
+}
+
+/*%
+ * Prepares the cache if necessary and returns the first, or
+ * next item from it.
+ */
+
+static struct nwent *
+nw_next(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct nwent *nw = &pvt->net;
+ char *body;
+ size_t bodylen;
+ int code;
+ char text[256];
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "getnetent") != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_get_full_response(pvt->girpdata, &code,
+ text, sizeof text,
+ &body, &bodylen) != 0) {
+ return (NULL);
+ }
+
+ if (code == IRPD_GETNET_OK) {
+ free_nw(nw);
+ if (irp_unmarshall_nw(nw, body) != 0) {
+ nw = NULL;
+ }
+ } else {
+ nw = NULL;
+ }
+
+ if (body != NULL)
+ memput(body, bodylen);
+ return (nw);
+}
+
+/*%
+ * void nw_minimize(struct irs_nw *this)
+ *
+ */
+
+static void
+nw_minimize(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ irs_irp_disconnect(pvt->girpdata);
+}
+
+
+
+
+/* private. */
+
+/*%
+ * deallocate all the memory irp_unmarshall_pw allocated.
+ *
+ */
+
+static void
+free_nw(struct nwent *nw) {
+ char **p;
+
+ if (nw == NULL)
+ return;
+
+ if (nw->n_name != NULL)
+ free(nw->n_name);
+
+ if (nw->n_aliases != NULL) {
+ for (p = nw->n_aliases ; *p != NULL ; p++) {
+ free(*p);
+ }
+ free(nw->n_aliases);
+ }
+
+ if (nw->n_addr != NULL)
+ free(nw->n_addr);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/irp_p.h b/usr/src/lib/libresolv2_joy/common/irs/irp_p.h
new file mode 100644
index 0000000000..4f943f81bd
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/irp_p.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: irp_p.h,v 1.5 2005/04/27 04:56:28 sra Exp $
+ */
+
+#ifndef _IRP_P_H_INCLUDED
+#define _IRP_P_H_INCLUDED
+
+#include <stdio.h>
+
+struct irp_p {
+ char inbuffer[1024];
+ int inlast; /*%< index of one past the last char in buffer */
+ int incurr; /*%< index of the next char to be read from buffer */
+ int fdCxn;
+};
+
+/*
+ * Externs.
+ */
+
+extern struct irs_acc * irs_irp_acc __P((const char *));
+extern struct irs_gr * irs_irp_gr __P((struct irs_acc *));
+extern struct irs_pw * irs_irp_pw __P((struct irs_acc *));
+extern struct irs_sv * irs_irp_sv __P((struct irs_acc *));
+extern struct irs_pr * irs_irp_pr __P((struct irs_acc *));
+extern struct irs_ho * irs_irp_ho __P((struct irs_acc *));
+extern struct irs_nw * irs_irp_nw __P((struct irs_acc *));
+extern struct irs_ng * irs_irp_ng __P((struct irs_acc *));
+
+int irs_irp_connect(struct irp_p *pvt);
+int irs_irp_is_connected(struct irp_p *pvt);
+void irs_irp_disconnect(struct irp_p *pvt);
+int irs_irp_read_response(struct irp_p *pvt, char *text, size_t textlen);
+char *irs_irp_read_body(struct irp_p *pvt, size_t *size);
+int irs_irp_get_full_response(struct irp_p *pvt, int *code,
+ char *text, size_t textlen,
+ char **body, size_t *bodylen);
+
+extern int irp_log_errors;
+
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/irp_pr.c b/usr/src/lib/libresolv2_joy/common/irs/irp_pr.c
new file mode 100644
index 0000000000..ea876e8281
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/irp_pr.c
@@ -0,0 +1,327 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (c) 1996 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: irp_pr.c,v 1.3 2005/04/27 04:56:29 sra Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/* extern */
+
+#include "port_before.h"
+
+#include <syslog.h>
+#include <sys/types.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <netdb.h>
+#include <syslog.h>
+
+#include <irs.h>
+#include <irp.h>
+#include <isc/memcluster.h>
+#include <isc/irpmarshall.h>
+
+#include "irs_p.h"
+#include "lcl_p.h"
+#include "irp_p.h"
+
+#include "port_after.h"
+
+
+#define MAXALIASES 35
+
+/* Types */
+
+struct pvt {
+ struct irp_p *girpdata;
+ int warned;
+ struct protoent proto;
+};
+
+/* Forward */
+
+static void pr_close(struct irs_pr *);
+static struct protoent * pr_next(struct irs_pr *);
+static struct protoent * pr_byname(struct irs_pr *, const char *);
+static struct protoent * pr_bynumber(struct irs_pr *, int);
+static void pr_rewind(struct irs_pr *);
+static void pr_minimize(struct irs_pr *);
+
+static void free_proto(struct protoent *pr);
+
+/* Public */
+
+/*%
+ * struct irs_pr * irs_irp_pr(struct irs_acc *this)
+ *
+ */
+
+struct irs_pr *
+irs_irp_pr(struct irs_acc *this) {
+ struct irs_pr *pr;
+ struct pvt *pvt;
+
+ if (!(pr = memget(sizeof *pr))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pr, 0x0, sizeof *pr);
+
+ if (!(pvt = memget(sizeof *pvt))) {
+ memput(pr, sizeof *pr);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ pvt->girpdata = this->private;
+
+ pr->private = pvt;
+ pr->close = pr_close;
+ pr->byname = pr_byname;
+ pr->bynumber = pr_bynumber;
+ pr->next = pr_next;
+ pr->rewind = pr_rewind;
+ pr->minimize = pr_minimize;
+ return (pr);
+}
+
+/* Methods */
+
+/*%
+ * void pr_close(struct irs_pr *this)
+ *
+ */
+
+static void
+pr_close(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ pr_minimize(this);
+
+ free_proto(&pvt->proto);
+
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+/*%
+ * struct protoent * pr_byname(struct irs_pr *this, const char *name)
+ *
+ */
+
+static struct protoent *
+pr_byname(struct irs_pr *this, const char *name) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct protoent *pr = &pvt->proto;
+ char *body = NULL;
+ size_t bodylen;
+ int code;
+ int i;
+ char text[256];
+
+ if (pr->p_name != NULL && strcmp(name, pr->p_name) == 0) {
+ return (pr);
+ }
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return (NULL);
+ }
+
+ i = irs_irp_send_command(pvt->girpdata, "getprotobyname %s", name);
+ if (i != 0)
+ return (NULL);
+
+ if (irs_irp_get_full_response(pvt->girpdata, &code,
+ text, sizeof text,
+ &body, &bodylen) != 0) {
+ return (NULL);
+ }
+
+ if (code == IRPD_GETPROTO_OK) {
+ free_proto(pr);
+ if (irp_unmarshall_pr(pr, body) != 0) {
+ pr = NULL;
+ }
+ } else {
+ pr = NULL;
+ }
+
+ if (body != NULL) {
+ memput(body, bodylen);
+ }
+
+ return (pr);
+}
+
+/*%
+ * struct protoent * pr_bynumber(struct irs_pr *this, int proto)
+ *
+ */
+
+static struct protoent *
+pr_bynumber(struct irs_pr *this, int proto) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct protoent *pr = &pvt->proto;
+ char *body = NULL;
+ size_t bodylen;
+ int code;
+ int i;
+ char text[256];
+
+ if (pr->p_name != NULL && proto == pr->p_proto) {
+ return (pr);
+ }
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return (NULL);
+ }
+
+ i = irs_irp_send_command(pvt->girpdata, "getprotobynumber %d", proto);
+ if (i != 0)
+ return (NULL);
+
+ if (irs_irp_get_full_response(pvt->girpdata, &code,
+ text, sizeof text,
+ &body, &bodylen) != 0) {
+ return (NULL);
+ }
+
+ if (code == IRPD_GETPROTO_OK) {
+ free_proto(pr);
+ if (irp_unmarshall_pr(pr, body) != 0) {
+ pr = NULL;
+ }
+ } else {
+ pr = NULL;
+ }
+
+ if (body != NULL) {
+ memput(body, bodylen);
+ }
+
+ return (pr);
+}
+
+/*%
+ * void pr_rewind(struct irs_pr *this)
+ *
+ */
+
+static void
+pr_rewind(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ char text[256];
+ int code;
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return;
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "setprotoent") != 0) {
+ return;
+ }
+
+ code = irs_irp_read_response(pvt->girpdata, text, sizeof text);
+ if (code != IRPD_GETPROTO_SETOK) {
+ if (irp_log_errors) {
+ syslog(LOG_WARNING, "setprotoent failed: %s", text);
+ }
+ }
+
+ return;
+}
+
+/*%
+ * Prepares the cache if necessary and returns the next item in it.
+ *
+ */
+
+static struct protoent *
+pr_next(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct protoent *pr = &pvt->proto;
+ char *body;
+ size_t bodylen;
+ int code;
+ char text[256];
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "getprotoent") != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_get_full_response(pvt->girpdata, &code,
+ text, sizeof text,
+ &body, &bodylen) != 0) {
+ return (NULL);
+ }
+
+ if (code == IRPD_GETPROTO_OK) {
+ free_proto(pr);
+ if (irp_unmarshall_pr(pr, body) != 0) {
+ pr = NULL;
+ }
+ } else {
+ pr = NULL;
+ }
+
+ if (body != NULL) {
+ memput(body, bodylen);
+ }
+
+ return (pr);
+}
+
+/*%
+ * void pr_minimize(struct irs_pr *this)
+ *
+ */
+
+static void
+pr_minimize(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ irs_irp_disconnect(pvt->girpdata);
+}
+
+/*%
+ * Deallocate all the memory irp_unmarshall_pr allocated.
+ *
+ */
+
+static void
+free_proto(struct protoent *pr) {
+ char **p;
+
+ if (pr == NULL)
+ return;
+
+ if (pr->p_name != NULL)
+ free(pr->p_name);
+
+ for (p = pr->p_aliases ; p != NULL && *p != NULL ; p++)
+ free(*p);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/irp_sv.c b/usr/src/lib/libresolv2_joy/common/irs/irp_sv.c
new file mode 100644
index 0000000000..577e697fe6
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/irp_sv.c
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (c) 1996,1998 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: irp_sv.c,v 1.3 2005/04/27 04:56:29 sra Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/* extern */
+
+#include "port_before.h"
+
+#include <syslog.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#ifdef IRS_LCL_SV_DB
+#include <db.h>
+#endif
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <syslog.h>
+
+#include <irs.h>
+#include <irp.h>
+#include <isc/irpmarshall.h>
+#include <isc/memcluster.h>
+
+#include "irs_p.h"
+#include "lcl_p.h"
+#include "irp_p.h"
+
+#include "port_after.h"
+
+/* Types */
+
+struct pvt {
+ struct irp_p *girpdata;
+ int warned;
+ struct servent service;
+};
+
+/* Forward */
+
+static void sv_close(struct irs_sv*);
+static struct servent * sv_next(struct irs_sv *);
+static struct servent * sv_byname(struct irs_sv *, const char *,
+ const char *);
+static struct servent * sv_byport(struct irs_sv *, int, const char *);
+static void sv_rewind(struct irs_sv *);
+static void sv_minimize(struct irs_sv *);
+
+static void free_service(struct servent *sv);
+
+
+
+/* Public */
+
+/*%
+ * struct irs_sv * irs_irp_sv(struct irs_acc *this)
+ *
+ */
+
+struct irs_sv *
+irs_irp_sv(struct irs_acc *this) {
+ struct irs_sv *sv;
+ struct pvt *pvt;
+
+ if ((sv = memget(sizeof *sv)) == NULL) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(sv, 0x0, sizeof *sv);
+
+ if ((pvt = memget(sizeof *pvt)) == NULL) {
+ memput(sv, sizeof *sv);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ pvt->girpdata = this->private;
+
+ sv->private = pvt;
+ sv->close = sv_close;
+ sv->next = sv_next;
+ sv->byname = sv_byname;
+ sv->byport = sv_byport;
+ sv->rewind = sv_rewind;
+ sv->minimize = sv_minimize;
+
+ return (sv);
+}
+
+/* Methods */
+
+/*%
+ * void sv_close(struct irs_sv *this)
+ *
+ */
+
+static void
+sv_close(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ sv_minimize(this);
+
+ free_service(&pvt->service);
+
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+/*%
+ * Fills the cache if necessary and returns the next item from it.
+ *
+ */
+
+static struct servent *
+sv_next(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct servent *sv = &pvt->service;
+ char *body;
+ size_t bodylen;
+ int code;
+ char text[256];
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "getservent") != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_get_full_response(pvt->girpdata, &code,
+ text, sizeof text,
+ &body, &bodylen) != 0) {
+ return (NULL);
+ }
+
+ if (code == IRPD_GETSERVICE_OK) {
+ free_service(sv);
+ if (irp_unmarshall_sv(sv, body) != 0) {
+ sv = NULL;
+ }
+ } else {
+ sv = NULL;
+ }
+
+ if (body != NULL) {
+ memput(body, bodylen);
+ }
+
+ return (sv);
+}
+
+/*%
+ * struct servent * sv_byname(struct irs_sv *this, const char *name,
+ * const char *proto)
+ *
+ */
+
+static struct servent *
+sv_byname(struct irs_sv *this, const char *name, const char *proto) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct servent *sv = &pvt->service;
+ char *body;
+ char text[256];
+ size_t bodylen;
+ int code;
+
+ if (sv->s_name != NULL &&
+ strcmp(name, sv->s_name) == 0 &&
+ strcasecmp(proto, sv->s_proto) == 0) {
+ return (sv);
+ }
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "getservbyname %s %s",
+ name, proto) != 0)
+ return (NULL);
+
+ if (irs_irp_get_full_response(pvt->girpdata, &code,
+ text, sizeof text,
+ &body, &bodylen) != 0) {
+ return (NULL);
+ }
+
+ if (code == IRPD_GETSERVICE_OK) {
+ free_service(sv);
+ if (irp_unmarshall_sv(sv, body) != 0) {
+ sv = NULL;
+ }
+ } else {
+ sv = NULL;
+ }
+
+ if (body != NULL) {
+ memput(body, bodylen);
+ }
+
+ return (sv);
+}
+
+/*%
+ * struct servent * sv_byport(struct irs_sv *this, int port,
+ * const char *proto)
+ *
+ */
+
+static struct servent *
+sv_byport(struct irs_sv *this, int port, const char *proto) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct servent *sv = &pvt->service;
+ char *body;
+ size_t bodylen;
+ char text[256];
+ int code;
+
+ if (sv->s_name != NULL &&
+ port == sv->s_port &&
+ strcasecmp(proto, sv->s_proto) == 0) {
+ return (sv);
+ }
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "getservbyport %d %s",
+ ntohs((short)port), proto) != 0) {
+ return (NULL);
+ }
+
+ if (irs_irp_get_full_response(pvt->girpdata, &code,
+ text, sizeof text,
+ &body, &bodylen) != 0) {
+ return (NULL);
+ }
+
+ if (code == IRPD_GETSERVICE_OK) {
+ free_service(sv);
+ if (irp_unmarshall_sv(sv, body) != 0) {
+ sv = NULL;
+ }
+ } else {
+ sv = NULL;
+ }
+
+ if (body != NULL) {
+ memput(body, bodylen);
+ }
+
+ return (sv);
+}
+
+/*%
+ * void sv_rewind(struct irs_sv *this)
+ *
+ */
+
+static void
+sv_rewind(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ char text[256];
+ int code;
+
+ if (irs_irp_connection_setup(pvt->girpdata, &pvt->warned) != 0) {
+ return;
+ }
+
+ if (irs_irp_send_command(pvt->girpdata, "setservent") != 0) {
+ return;
+ }
+
+ code = irs_irp_read_response(pvt->girpdata, text, sizeof text);
+ if (code != IRPD_GETSERVICE_SETOK) {
+ if (irp_log_errors) {
+ syslog(LOG_WARNING, "setservent failed: %s", text);
+ }
+ }
+
+ return;
+}
+
+/*%
+ * void sv_minimize(struct irs_sv *this)
+ *
+ */
+
+static void
+sv_minimize(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ irs_irp_disconnect(pvt->girpdata);
+}
+
+
+
+
+
+
+static void
+free_service(struct servent *sv) {
+ char **p;
+
+ if (sv == NULL) {
+ return;
+ }
+
+ if (sv->s_name != NULL) {
+ free(sv->s_name);
+ }
+
+ for (p = sv->s_aliases ; p != NULL && *p != NULL ; p++) {
+ free(*p);
+ }
+
+ if (sv->s_proto != NULL) {
+ free(sv->s_proto);
+ }
+}
+
+
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/irpmarshall.c b/usr/src/lib/libresolv2_joy/common/irs/irpmarshall.c
new file mode 100644
index 0000000000..85ffff1866
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/irpmarshall.c
@@ -0,0 +1,2301 @@
+/*
+ * Copyright(c) 1989, 1993, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (c) 1996 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: irpmarshall.c,v 1.7 2006/03/09 23:57:56 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#if 0
+
+Check values are in approrpriate endian order.
+
+Double check memory allocations on unmarhsalling
+
+#endif
+
+
+/* Extern */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <stdio.h>
+#include <ctype.h>
+#include <pwd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syslog.h>
+#include <utmp.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+
+#include <irs.h>
+#include <isc/memcluster.h>
+#include <isc/irpmarshall.h>
+
+#include "port_after.h"
+
+
+#ifndef HAVE_STRNDUP
+static char *strndup(const char *str, size_t len);
+#endif
+
+static char **splitarray(const char *buffer, const char *buffend, char delim);
+static int joinarray(char * const * argv, char *buffer, char delim);
+static char *getfield(char **res, size_t reslen, char **buffer, char delim);
+static size_t joinlength(char * const *argv);
+static void free_array(char **argv, size_t entries);
+
+#define ADDR_T_STR(x) (x == AF_INET ? "AF_INET" :\
+ (x == AF_INET6 ? "AF_INET6" : "UNKNOWN"))
+
+#define MAXPADDRSIZE (sizeof "255.255.255.255" + 1)
+
+static char COMMA = ',';
+
+static const char *COMMASTR = ",";
+static const char *COLONSTR = ":";
+
+
+
+/* See big comment at bottom of irpmarshall.h for description. */
+
+
+#ifdef WANT_IRS_PW
+/* +++++++++++++++++++++++++ struct passwd +++++++++++++++++++++++++ */
+
+/*%
+ * int irp_marshall_pw(const struct passwd *pw, char **buffer, size_t *len)
+ *
+ * notes: \li
+ *
+ * See irpmarshall.h
+ *
+ * return: \li
+ *
+ * 0 on sucess, -1 on failure.
+ *
+ */
+
+int
+irp_marshall_pw(const struct passwd *pw, char **buffer, size_t *len) {
+ size_t need = 1 ; /*%< for null byte */
+ char pwUid[24];
+ char pwGid[24];
+ char pwChange[24];
+ char pwExpire[24];
+ const char *pwClass;
+ const char *fieldsep = COLONSTR;
+
+ if (pw == NULL || len == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ sprintf(pwUid, "%ld", (long)pw->pw_uid);
+ sprintf(pwGid, "%ld", (long)pw->pw_gid);
+
+#ifdef HAVE_PW_CHANGE
+ sprintf(pwChange, "%ld", (long)pw->pw_change);
+#else
+ pwChange[0] = '0';
+ pwChange[1] = '\0';
+#endif
+
+#ifdef HAVE_PW_EXPIRE
+ sprintf(pwExpire, "%ld", (long)pw->pw_expire);
+#else
+ pwExpire[0] = '0';
+ pwExpire[1] = '\0';
+#endif
+
+#ifdef HAVE_PW_CLASS
+ pwClass = pw->pw_class;
+#else
+ pwClass = "";
+#endif
+
+ need += strlen(pw->pw_name) + 1; /*%< one for fieldsep */
+ need += strlen(pw->pw_passwd) + 1;
+ need += strlen(pwUid) + 1;
+ need += strlen(pwGid) + 1;
+ need += strlen(pwClass) + 1;
+ need += strlen(pwChange) + 1;
+ need += strlen(pwExpire) + 1;
+ need += strlen(pw->pw_gecos) + 1;
+ need += strlen(pw->pw_dir) + 1;
+ need += strlen(pw->pw_shell) + 1;
+
+ if (buffer == NULL) {
+ *len = need;
+ return (0);
+ }
+
+ if (*buffer != NULL && need > *len) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if (*buffer == NULL) {
+ need += 2; /*%< for CRLF */
+ *buffer = memget(need);
+ if (*buffer == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+
+ *len = need;
+ }
+
+ strcpy(*buffer, pw->pw_name); strcat(*buffer, fieldsep);
+ strcat(*buffer, pw->pw_passwd); strcat(*buffer, fieldsep);
+ strcat(*buffer, pwUid); strcat(*buffer, fieldsep);
+ strcat(*buffer, pwGid); strcat(*buffer, fieldsep);
+ strcat(*buffer, pwClass); strcat(*buffer, fieldsep);
+ strcat(*buffer, pwChange); strcat(*buffer, fieldsep);
+ strcat(*buffer, pwExpire); strcat(*buffer, fieldsep);
+ strcat(*buffer, pw->pw_gecos); strcat(*buffer, fieldsep);
+ strcat(*buffer, pw->pw_dir); strcat(*buffer, fieldsep);
+ strcat(*buffer, pw->pw_shell); strcat(*buffer, fieldsep);
+
+ return (0);
+}
+
+/*%
+ * int irp_unmarshall_pw(struct passwd *pw, char *buffer)
+ *
+ * notes: \li
+ *
+ * See irpmarshall.h
+ *
+ * return: \li
+ *
+ * 0 on success, -1 on failure
+ *
+ */
+
+int
+irp_unmarshall_pw(struct passwd *pw, char *buffer) {
+ char *name, *pass, *class, *gecos, *dir, *shell;
+ uid_t pwuid;
+ gid_t pwgid;
+ time_t pwchange;
+ time_t pwexpire;
+ char *p;
+ long t;
+ char tmpbuf[24];
+ char *tb = &tmpbuf[0];
+ char fieldsep = ':';
+ int myerrno = EINVAL;
+
+ name = pass = class = gecos = dir = shell = NULL;
+ p = buffer;
+
+ /* pw_name field */
+ name = NULL;
+ if (getfield(&name, 0, &p, fieldsep) == NULL || strlen(name) == 0) {
+ goto error;
+ }
+
+ /* pw_passwd field */
+ pass = NULL;
+ if (getfield(&pass, 0, &p, fieldsep) == NULL) { /*%< field can be empty */
+ goto error;
+ }
+
+
+ /* pw_uid field */
+ tb = tmpbuf;
+ if (getfield(&tb, sizeof tmpbuf, &p, fieldsep) == NULL ||
+ strlen(tb) == 0) {
+ goto error;
+ }
+ t = strtol(tmpbuf, &tb, 10);
+ if (*tb) {
+ goto error; /*%< junk in value */
+ }
+ pwuid = (uid_t)t;
+ if ((long) pwuid != t) { /*%< value must have been too big. */
+ goto error;
+ }
+
+
+
+ /* pw_gid field */
+ tb = tmpbuf;
+ if (getfield(&tb, sizeof tmpbuf, &p, fieldsep) == NULL ||
+ strlen(tb) == 0) {
+ goto error;
+ }
+ t = strtol(tmpbuf, &tb, 10);
+ if (*tb) {
+ goto error; /*%< junk in value */
+ }
+ pwgid = (gid_t)t;
+ if ((long)pwgid != t) { /*%< value must have been too big. */
+ goto error;
+ }
+
+
+
+ /* pw_class field */
+ class = NULL;
+ if (getfield(&class, 0, &p, fieldsep) == NULL) {
+ goto error;
+ }
+
+
+
+ /* pw_change field */
+ tb = tmpbuf;
+ if (getfield(&tb, sizeof tmpbuf, &p, fieldsep) == NULL ||
+ strlen(tb) == 0) {
+ goto error;
+ }
+ t = strtol(tmpbuf, &tb, 10);
+ if (*tb) {
+ goto error; /*%< junk in value */
+ }
+ pwchange = (time_t)t;
+ if ((long)pwchange != t) { /*%< value must have been too big. */
+ goto error;
+ }
+
+
+
+ /* pw_expire field */
+ tb = tmpbuf;
+ if (getfield(&tb, sizeof tmpbuf, &p, fieldsep) == NULL ||
+ strlen(tb) == 0) {
+ goto error;
+ }
+ t = strtol(tmpbuf, &tb, 10);
+ if (*tb) {
+ goto error; /*%< junk in value */
+ }
+ pwexpire = (time_t)t;
+ if ((long) pwexpire != t) { /*%< value must have been too big. */
+ goto error;
+ }
+
+
+
+ /* pw_gecos field */
+ gecos = NULL;
+ if (getfield(&gecos, 0, &p, fieldsep) == NULL) {
+ goto error;
+ }
+
+
+
+ /* pw_dir field */
+ dir = NULL;
+ if (getfield(&dir, 0, &p, fieldsep) == NULL) {
+ goto error;
+ }
+
+
+
+ /* pw_shell field */
+ shell = NULL;
+ if (getfield(&shell, 0, &p, fieldsep) == NULL) {
+ goto error;
+ }
+
+
+
+ pw->pw_name = name;
+ pw->pw_passwd = pass;
+ pw->pw_uid = pwuid;
+ pw->pw_gid = pwgid;
+ pw->pw_gecos = gecos;
+ pw->pw_dir = dir;
+ pw->pw_shell = shell;
+
+#ifdef HAVE_PW_CHANGE
+ pw->pw_change = pwchange;
+#endif
+#ifdef HAVE_PW_CLASS
+ pw->pw_class = class;
+#endif
+#ifdef HAVE_PW_EXPIRE
+ pw->pw_expire = pwexpire;
+#endif
+
+ return (0);
+
+ error:
+ errno = myerrno;
+
+ if (name != NULL) free(name);
+ if (pass != NULL) free(pass);
+ if (gecos != NULL) free(gecos);
+ if (dir != NULL) free(dir);
+ if (shell != NULL) free(shell);
+
+ return (-1);
+}
+
+/* ------------------------- struct passwd ------------------------- */
+#endif /* WANT_IRS_PW */
+/* +++++++++++++++++++++++++ struct group +++++++++++++++++++++++++ */
+
+/*%
+ * int irp_marshall_gr(const struct group *gr, char **buffer, size_t *len)
+ *
+ * notes: \li
+ *
+ * See irpmarshall.h.
+ *
+ * return: \li
+ *
+ * 0 on success, -1 on failure
+ */
+
+int
+irp_marshall_gr(const struct group *gr, char **buffer, size_t *len) {
+ size_t need = 1; /*%< for null byte */
+ char grGid[24];
+ const char *fieldsep = COLONSTR;
+
+ if (gr == NULL || len == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ sprintf(grGid, "%ld", (long)gr->gr_gid);
+
+ need += strlen(gr->gr_name) + 1;
+#ifndef MISSING_GR_PASSWD
+ need += strlen(gr->gr_passwd) + 1;
+#else
+ need++;
+#endif
+ need += strlen(grGid) + 1;
+ need += joinlength(gr->gr_mem) + 1;
+
+ if (buffer == NULL) {
+ *len = need;
+ return (0);
+ }
+
+ if (*buffer != NULL && need > *len) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if (*buffer == NULL) {
+ need += 2; /*%< for CRLF */
+ *buffer = memget(need);
+ if (*buffer == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+
+ *len = need;
+ }
+
+ strcpy(*buffer, gr->gr_name); strcat(*buffer, fieldsep);
+#ifndef MISSING_GR_PASSWD
+ strcat(*buffer, gr->gr_passwd);
+#endif
+ strcat(*buffer, fieldsep);
+ strcat(*buffer, grGid); strcat(*buffer, fieldsep);
+ joinarray(gr->gr_mem, *buffer, COMMA) ; strcat(*buffer, fieldsep);
+
+ return (0);
+}
+
+/*%
+ * int irp_unmarshall_gr(struct group *gr, char *buffer)
+ *
+ * notes: \li
+ *
+ * See irpmarshall.h
+ *
+ * return: \li
+ *
+ * 0 on success and -1 on failure.
+ *
+ */
+
+int
+irp_unmarshall_gr(struct group *gr, char *buffer) {
+ char *p, *q;
+ gid_t grgid;
+ long t;
+ char *name = NULL;
+ char *pass = NULL;
+ char **members = NULL;
+ char tmpbuf[24];
+ char *tb;
+ char fieldsep = ':';
+ int myerrno = EINVAL;
+
+ if (gr == NULL || buffer == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ p = buffer;
+
+ /* gr_name field */
+ name = NULL;
+ if (getfield(&name, 0, &p, fieldsep) == NULL || strlen(name) == 0U) {
+ goto error;
+ }
+
+
+ /* gr_passwd field */
+ pass = NULL;
+ if (getfield(&pass, 0, &p, fieldsep) == NULL) {
+ goto error;
+ }
+
+
+ /* gr_gid field */
+ tb = tmpbuf;
+ if (getfield(&tb, sizeof tmpbuf, &p, fieldsep) == NULL ||
+ strlen(tb) == 0U) {
+ goto error;
+ }
+ t = strtol(tmpbuf, &tb, 10);
+ if (*tb) {
+ goto error; /*%< junk in value */
+ }
+ grgid = (gid_t)t;
+ if ((long) grgid != t) { /*%< value must have been too big. */
+ goto error;
+ }
+
+
+ /* gr_mem field. Member names are separated by commas */
+ q = strchr(p, fieldsep);
+ if (q == NULL) {
+ goto error;
+ }
+ members = splitarray(p, q, COMMA);
+ if (members == NULL) {
+ myerrno = errno;
+ goto error;
+ }
+ p = q + 1;
+
+
+ gr->gr_name = name;
+#ifndef MISSING_GR_PASSWD
+ gr->gr_passwd = pass;
+#endif
+ gr->gr_gid = grgid;
+ gr->gr_mem = members;
+
+ return (0);
+
+ error:
+ errno = myerrno;
+
+ if (name != NULL) free(name);
+ if (pass != NULL) free(pass);
+
+ return (-1);
+}
+
+
+/* ------------------------- struct group ------------------------- */
+
+
+
+
+/* +++++++++++++++++++++++++ struct servent +++++++++++++++++++++++++ */
+
+/*%
+ * int irp_marshall_sv(const struct servent *sv, char **buffer, size_t *len)
+ *
+ * notes: \li
+ *
+ * See irpmarshall.h
+ *
+ * return: \li
+ *
+ * 0 on success, -1 on failure.
+ *
+ */
+
+int
+irp_marshall_sv(const struct servent *sv, char **buffer, size_t *len) {
+ size_t need = 1; /*%< for null byte */
+ char svPort[24];
+ const char *fieldsep = COLONSTR;
+ short realport;
+
+ if (sv == NULL || len == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ /* the int s_port field is actually a short in network order. We
+ want host order to make the marshalled data look correct */
+ realport = ntohs((short)sv->s_port);
+ sprintf(svPort, "%d", realport);
+
+ need += strlen(sv->s_name) + 1;
+ need += joinlength(sv->s_aliases) + 1;
+ need += strlen(svPort) + 1;
+ need += strlen(sv->s_proto) + 1;
+
+ if (buffer == NULL) {
+ *len = need;
+ return (0);
+ }
+
+ if (*buffer != NULL && need > *len) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if (*buffer == NULL) {
+ need += 2; /*%< for CRLF */
+ *buffer = memget(need);
+ if (*buffer == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+
+ *len = need;
+ }
+
+ strcpy(*buffer, sv->s_name); strcat(*buffer, fieldsep);
+ joinarray(sv->s_aliases, *buffer, COMMA); strcat(*buffer, fieldsep);
+ strcat(*buffer, svPort); strcat(*buffer, fieldsep);
+ strcat(*buffer, sv->s_proto); strcat(*buffer, fieldsep);
+
+ return (0);
+}
+
+/*%
+ * int irp_unmarshall_sv(struct servent *sv, char *buffer)
+ *
+ * notes: \li
+ *
+ * See irpmarshall.h
+ *
+ * return: \li
+ *
+ * 0 on success, -1 on failure.
+ *
+ */
+
+int
+irp_unmarshall_sv(struct servent *sv, char *buffer) {
+ char *p, *q;
+ short svport;
+ long t;
+ char *name = NULL;
+ char *proto = NULL;
+ char **aliases = NULL;
+ char tmpbuf[24];
+ char *tb;
+ char fieldsep = ':';
+ int myerrno = EINVAL;
+
+ if (sv == NULL || buffer == NULL)
+ return (-1);
+
+ p = buffer;
+
+
+ /* s_name field */
+ name = NULL;
+ if (getfield(&name, 0, &p, fieldsep) == NULL || strlen(name) == 0U) {
+ goto error;
+ }
+
+
+ /* s_aliases field */
+ q = strchr(p, fieldsep);
+ if (q == NULL) {
+ goto error;
+ }
+ aliases = splitarray(p, q, COMMA);
+ if (aliases == NULL) {
+ myerrno = errno;
+ goto error;
+ }
+ p = q + 1;
+
+
+ /* s_port field */
+ tb = tmpbuf;
+ if (getfield(&tb, sizeof tmpbuf, &p, fieldsep) == NULL ||
+ strlen(tb) == 0U) {
+ goto error;
+ }
+ t = strtol(tmpbuf, &tb, 10);
+ if (*tb) {
+ goto error; /*%< junk in value */
+ }
+ svport = (short)t;
+ if ((long) svport != t) { /*%< value must have been too big. */
+ goto error;
+ }
+ svport = htons(svport);
+
+ /* s_proto field */
+ proto = NULL;
+ if (getfield(&proto, 0, &p, fieldsep) == NULL) {
+ goto error;
+ }
+
+ sv->s_name = name;
+ sv->s_aliases = aliases;
+ sv->s_port = svport;
+ sv->s_proto = proto;
+
+ return (0);
+
+ error:
+ errno = myerrno;
+
+ if (name != NULL) free(name);
+ if (proto != NULL) free(proto);
+ free_array(aliases, 0);
+
+ return (-1);
+}
+
+
+/* ------------------------- struct servent ------------------------- */
+
+/* +++++++++++++++++++++++++ struct protoent +++++++++++++++++++++++++ */
+
+/*%
+ * int irp_marshall_pr(struct protoent *pr, char **buffer, size_t *len)
+ *
+ * notes: \li
+ *
+ * See irpmarshall.h
+ *
+ * return: \li
+ *
+ * 0 on success and -1 on failure.
+ *
+ */
+
+int
+irp_marshall_pr(struct protoent *pr, char **buffer, size_t *len) {
+ size_t need = 1; /*%< for null byte */
+ char prProto[24];
+ const char *fieldsep = COLONSTR;
+
+ if (pr == NULL || len == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ sprintf(prProto, "%d", (int)pr->p_proto);
+
+ need += strlen(pr->p_name) + 1;
+ need += joinlength(pr->p_aliases) + 1;
+ need += strlen(prProto) + 1;
+
+ if (buffer == NULL) {
+ *len = need;
+ return (0);
+ }
+
+ if (*buffer != NULL && need > *len) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if (*buffer == NULL) {
+ need += 2; /*%< for CRLF */
+ *buffer = memget(need);
+ if (*buffer == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+
+ *len = need;
+ }
+
+ strcpy(*buffer, pr->p_name); strcat(*buffer, fieldsep);
+ joinarray(pr->p_aliases, *buffer, COMMA); strcat(*buffer, fieldsep);
+ strcat(*buffer, prProto); strcat(*buffer, fieldsep);
+
+ return (0);
+
+}
+
+/*%
+ * int irp_unmarshall_pr(struct protoent *pr, char *buffer)
+ *
+ * notes: \li
+ *
+ * See irpmarshall.h
+ *
+ * return: \li
+ *
+ * 0 on success, -1 on failure
+ *
+ */
+
+int irp_unmarshall_pr(struct protoent *pr, char *buffer) {
+ char *p, *q;
+ int prproto;
+ long t;
+ char *name = NULL;
+ char **aliases = NULL;
+ char tmpbuf[24];
+ char *tb;
+ char fieldsep = ':';
+ int myerrno = EINVAL;
+
+ if (pr == NULL || buffer == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ p = buffer;
+
+ /* p_name field */
+ name = NULL;
+ if (getfield(&name, 0, &p, fieldsep) == NULL || strlen(name) == 0U) {
+ goto error;
+ }
+
+
+ /* p_aliases field */
+ q = strchr(p, fieldsep);
+ if (q == NULL) {
+ goto error;
+ }
+ aliases = splitarray(p, q, COMMA);
+ if (aliases == NULL) {
+ myerrno = errno;
+ goto error;
+ }
+ p = q + 1;
+
+
+ /* p_proto field */
+ tb = tmpbuf;
+ if (getfield(&tb, sizeof tmpbuf, &p, fieldsep) == NULL ||
+ strlen(tb) == 0U) {
+ goto error;
+ }
+ t = strtol(tmpbuf, &tb, 10);
+ if (*tb) {
+ goto error; /*%< junk in value */
+ }
+ prproto = (int)t;
+ if ((long) prproto != t) { /*%< value must have been too big. */
+ goto error;
+ }
+
+ pr->p_name = name;
+ pr->p_aliases = aliases;
+ pr->p_proto = prproto;
+
+ return (0);
+
+ error:
+ errno = myerrno;
+
+ if (name != NULL) free(name);
+ free_array(aliases, 0);
+
+ return (-1);
+}
+
+/* ------------------------- struct protoent ------------------------- */
+
+
+
+/* +++++++++++++++++++++++++ struct hostent +++++++++++++++++++++++++ */
+
+/*%
+ * int irp_marshall_ho(struct hostent *ho, char **buffer, size_t *len)
+ *
+ * notes: \li
+ *
+ * See irpmarshall.h.
+ *
+ * return: \li
+ *
+ * 0 on success, -1 on failure.
+ *
+ */
+
+int
+irp_marshall_ho(struct hostent *ho, char **buffer, size_t *len) {
+ size_t need = 1; /*%< for null byte */
+ char hoaddrtype[24];
+ char holength[24];
+ char **av;
+ char *p;
+ int addrlen;
+ int malloced = 0;
+ size_t remlen;
+ const char *fieldsep = "@";
+
+ if (ho == NULL || len == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ switch(ho->h_addrtype) {
+ case AF_INET:
+ strcpy(hoaddrtype, "AF_INET");
+ break;
+
+ case AF_INET6:
+ strcpy(hoaddrtype, "AF_INET6");
+ break;
+
+ default:
+ errno = EINVAL;
+ return (-1);
+ }
+
+ sprintf(holength, "%d", ho->h_length);
+
+ need += strlen(ho->h_name) + 1;
+ need += joinlength(ho->h_aliases) + 1;
+ need += strlen(hoaddrtype) + 1;
+ need += strlen(holength) + 1;
+
+ /* we determine an upper bound on the string length needed, not an
+ exact length. */
+ addrlen = (ho->h_addrtype == AF_INET ? 16 : 46) ; /*%< XX other AF's?? */
+ for (av = ho->h_addr_list; av != NULL && *av != NULL ; av++)
+ need += addrlen;
+
+ if (buffer == NULL) {
+ *len = need;
+ return (0);
+ }
+
+ if (*buffer != NULL && need > *len) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if (*buffer == NULL) {
+ need += 2; /*%< for CRLF */
+ *buffer = memget(need);
+ if (*buffer == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+
+ *len = need;
+ malloced = 1;
+ }
+
+ strcpy(*buffer, ho->h_name); strcat(*buffer, fieldsep);
+ joinarray(ho->h_aliases, *buffer, COMMA); strcat(*buffer, fieldsep);
+ strcat(*buffer, hoaddrtype); strcat(*buffer, fieldsep);
+ strcat(*buffer, holength); strcat(*buffer, fieldsep);
+
+ p = *buffer + strlen(*buffer);
+ remlen = need - strlen(*buffer);
+ for (av = ho->h_addr_list ; av != NULL && *av != NULL ; av++) {
+ if (inet_ntop(ho->h_addrtype, *av, p, remlen) == NULL) {
+ goto error;
+ }
+ if (*(av + 1) != NULL)
+ strcat(p, COMMASTR);
+ remlen -= strlen(p);
+ p += strlen(p);
+ }
+ strcat(*buffer, fieldsep);
+
+ return (0);
+
+ error:
+ if (malloced) {
+ memput(*buffer, need);
+ }
+
+ return (-1);
+}
+
+/*%
+ * int irp_unmarshall_ho(struct hostent *ho, char *buffer)
+ *
+ * notes: \li
+ *
+ * See irpmarshall.h.
+ *
+ * return: \li
+ *
+ * 0 on success, -1 on failure.
+ *
+ */
+
+int
+irp_unmarshall_ho(struct hostent *ho, char *buffer) {
+ char *p, *q, *r;
+ int hoaddrtype;
+ int holength;
+ long t;
+ char *name;
+ char **aliases = NULL;
+ char **hohaddrlist = NULL;
+ size_t hoaddrsize;
+ char tmpbuf[24];
+ char *tb;
+ char **alist;
+ int addrcount;
+ char fieldsep = '@';
+ int myerrno = EINVAL;
+
+ if (ho == NULL || buffer == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ p = buffer;
+
+ /* h_name field */
+ name = NULL;
+ if (getfield(&name, 0, &p, fieldsep) == NULL || strlen(name) == 0U) {
+ goto error;
+ }
+
+
+ /* h_aliases field */
+ q = strchr(p, fieldsep);
+ if (q == NULL) {
+ goto error;
+ }
+ aliases = splitarray(p, q, COMMA);
+ if (aliases == NULL) {
+ myerrno = errno;
+ goto error;
+ }
+ p = q + 1;
+
+
+ /* h_addrtype field */
+ tb = tmpbuf;
+ if (getfield(&tb, sizeof tmpbuf, &p, fieldsep) == NULL ||
+ strlen(tb) == 0U) {
+ goto error;
+ }
+ if (strcmp(tmpbuf, "AF_INET") == 0)
+ hoaddrtype = AF_INET;
+ else if (strcmp(tmpbuf, "AF_INET6") == 0)
+ hoaddrtype = AF_INET6;
+ else
+ goto error;
+
+
+ /* h_length field */
+ tb = tmpbuf;
+ if (getfield(&tb, sizeof tmpbuf, &p, fieldsep) == NULL ||
+ strlen(tb) == 0U) {
+ goto error;
+ }
+ t = strtol(tmpbuf, &tb, 10);
+ if (*tb) {
+ goto error; /*%< junk in value */
+ }
+ holength = (int)t;
+ if ((long) holength != t) { /*%< value must have been too big. */
+ goto error;
+ }
+
+
+ /* h_addr_list field */
+ q = strchr(p, fieldsep);
+ if (q == NULL)
+ goto error;
+
+ /* count how many addresss are in there */
+ if (q > p + 1) {
+ for (addrcount = 1, r = p ; r != q ; r++) {
+ if (*r == COMMA)
+ addrcount++;
+ }
+ } else {
+ addrcount = 0;
+ }
+
+ hoaddrsize = (addrcount + 1) * sizeof (char *);
+ hohaddrlist = malloc(hoaddrsize);
+ if (hohaddrlist == NULL) {
+ myerrno = ENOMEM;
+ goto error;
+ }
+
+ memset(hohaddrlist, 0x0, hoaddrsize);
+
+ alist = hohaddrlist;
+ for (t = 0, r = p ; r != q ; p = r + 1, t++) {
+ char saved;
+ while (r != q && *r != COMMA) r++;
+ saved = *r;
+ *r = 0x0;
+
+ alist[t] = malloc(hoaddrtype == AF_INET ? 4 : 16);
+ if (alist[t] == NULL) {
+ myerrno = ENOMEM;
+ goto error;
+ }
+
+ if (inet_pton(hoaddrtype, p, alist[t]) == -1)
+ goto error;
+ *r = saved;
+ }
+ alist[t] = NULL;
+
+ ho->h_name = name;
+ ho->h_aliases = aliases;
+ ho->h_addrtype = hoaddrtype;
+ ho->h_length = holength;
+ ho->h_addr_list = hohaddrlist;
+
+ return (0);
+
+ error:
+ errno = myerrno;
+
+ if (name != NULL) free(name);
+ free_array(hohaddrlist, 0);
+ free_array(aliases, 0);
+
+ return (-1);
+}
+
+/* ------------------------- struct hostent------------------------- */
+
+
+
+/* +++++++++++++++++++++++++ struct netgrp +++++++++++++++++++++++++ */
+
+/*%
+ * int irp_marshall_ng(const char *host, const char *user,
+ * const char *domain, char *buffer, size_t *len)
+ *
+ * notes: \li
+ *
+ * See note for irp_marshall_ng_start
+ *
+ * return: \li
+ *
+ * 0 on success, 0 on failure.
+ *
+ */
+
+int
+irp_marshall_ng(const char *host, const char *user, const char *domain,
+ char **buffer, size_t *len) {
+ size_t need = 1; /*%< for nul byte */
+ const char *fieldsep = ",";
+
+ if (len == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ need += 4; /*%< two parens and two commas */
+ need += (host == NULL ? 0 : strlen(host));
+ need += (user == NULL ? 0 : strlen(user));
+ need += (domain == NULL ? 0 : strlen(domain));
+
+ if (buffer == NULL) {
+ *len = need;
+ return (0);
+ } else if (*buffer != NULL && need > *len) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if (*buffer == NULL) {
+ need += 2; /*%< for CRLF */
+ *buffer = memget(need);
+ if (*buffer == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+
+ *len = need;
+ }
+
+ (*buffer)[0] = '(';
+ (*buffer)[1] = '\0';
+
+ if (host != NULL)
+ strcat(*buffer, host);
+ strcat(*buffer, fieldsep);
+
+ if (user != NULL)
+ strcat(*buffer, user);
+ strcat(*buffer, fieldsep);
+
+ if (domain != NULL)
+ strcat(*buffer, domain);
+ strcat(*buffer, ")");
+
+ return (0);
+}
+
+
+
+/* ---------- */
+
+/*%
+ * int irp_unmarshall_ng(const char **host, const char **user,
+ * const char **domain, char *buffer)
+ *
+ * notes: \li
+ *
+ * Unpacks the BUFFER into 3 character arrays it allocates and assigns
+ * to *HOST, *USER and *DOMAIN. If any field of the value is empty,
+ * then the corresponding paramater value will be set to NULL.
+ *
+ * return: \li
+ *
+ * 0 on success and -1 on failure.
+ */
+
+int
+irp_unmarshall_ng(const char **hostp, const char **userp, const char **domainp,
+ char *buffer)
+{
+ char *p, *q;
+ char fieldsep = ',';
+ int myerrno = EINVAL;
+ char *host, *user, *domain;
+
+ if (userp == NULL || hostp == NULL ||
+ domainp == NULL || buffer == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ host = user = domain = NULL;
+
+ p = buffer;
+ while (isspace((unsigned char)*p)) {
+ p++;
+ }
+ if (*p != '(') {
+ goto error;
+ }
+
+ q = p + 1;
+ while (*q && *q != fieldsep)
+ q++;
+ if (!*q) {
+ goto error;
+ } else if (q > p + 1) {
+ host = strndup(p, q - p);
+ }
+
+ p = q + 1;
+ if (!*p) {
+ goto error;
+ } else if (*p != fieldsep) {
+ q = p + 1;
+ while (*q && *q != fieldsep)
+ q++;
+ if (!*q) {
+ goto error;
+ }
+ user = strndup(p, q - p);
+ } else {
+ p++;
+ }
+
+ if (!*p) {
+ goto error;
+ } else if (*p != ')') {
+ q = p + 1;
+ while (*q && *q != ')')
+ q++;
+ if (!*q) {
+ goto error;
+ }
+ domain = strndup(p, q - p);
+ }
+ *hostp = host;
+ *userp = user;
+ *domainp = domain;
+
+ return (0);
+
+ error:
+ errno = myerrno;
+
+ if (host != NULL) free(host);
+ if (user != NULL) free(user);
+
+ return (-1);
+}
+
+/* ------------------------- struct netgrp ------------------------- */
+
+
+
+
+/* +++++++++++++++++++++++++ struct nwent +++++++++++++++++++++++++ */
+
+/*%
+ * int irp_marshall_nw(struct nwent *ne, char **buffer, size_t *len)
+ *
+ * notes: \li
+ *
+ * See at top.
+ *
+ * return: \li
+ *
+ * 0 on success and -1 on failure.
+ *
+ */
+
+int
+irp_marshall_nw(struct nwent *ne, char **buffer, size_t *len) {
+ size_t need = 1; /*%< for null byte */
+ char nAddrType[24];
+ char nNet[MAXPADDRSIZE];
+ const char *fieldsep = COLONSTR;
+
+ if (ne == NULL || len == NULL) {
+ return (-1);
+ }
+
+ strcpy(nAddrType, ADDR_T_STR(ne->n_addrtype));
+
+ if (inet_net_ntop(ne->n_addrtype, ne->n_addr, ne->n_length,
+ nNet, sizeof nNet) == NULL) {
+ return (-1);
+ }
+
+
+ need += strlen(ne->n_name) + 1;
+ need += joinlength(ne->n_aliases) + 1;
+ need += strlen(nAddrType) + 1;
+ need += strlen(nNet) + 1;
+
+ if (buffer == NULL) {
+ *len = need;
+ return (0);
+ }
+
+ if (*buffer != NULL && need > *len) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if (*buffer == NULL) {
+ need += 2; /*%< for CRLF */
+ *buffer = memget(need);
+ if (*buffer == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+
+ *len = need;
+ }
+
+ strcpy(*buffer, ne->n_name); strcat(*buffer, fieldsep);
+ joinarray(ne->n_aliases, *buffer, COMMA) ; strcat(*buffer, fieldsep);
+ strcat(*buffer, nAddrType); strcat(*buffer, fieldsep);
+ strcat(*buffer, nNet); strcat(*buffer, fieldsep);
+
+ return (0);
+}
+
+/*%
+ * int irp_unmarshall_nw(struct nwent *ne, char *buffer)
+ *
+ * notes: \li
+ *
+ * See note up top.
+ *
+ * return: \li
+ *
+ * 0 on success and -1 on failure.
+ *
+ */
+
+int
+irp_unmarshall_nw(struct nwent *ne, char *buffer) {
+ char *p, *q;
+ int naddrtype;
+ long nnet;
+ int bits;
+ char *name = NULL;
+ char **aliases = NULL;
+ char tmpbuf[24];
+ char *tb;
+ char fieldsep = ':';
+ int myerrno = EINVAL;
+
+ if (ne == NULL || buffer == NULL) {
+ goto error;
+ }
+
+ p = buffer;
+
+ /* n_name field */
+ name = NULL;
+ if (getfield(&name, 0, &p, fieldsep) == NULL || strlen(name) == 0U) {
+ goto error;
+ }
+
+
+ /* n_aliases field. Aliases are separated by commas */
+ q = strchr(p, fieldsep);
+ if (q == NULL) {
+ goto error;
+ }
+ aliases = splitarray(p, q, COMMA);
+ if (aliases == NULL) {
+ myerrno = errno;
+ goto error;
+ }
+ p = q + 1;
+
+
+ /* h_addrtype field */
+ tb = tmpbuf;
+ if (getfield(&tb, sizeof tmpbuf, &p, fieldsep) == NULL ||
+ strlen(tb) == 0U) {
+ goto error;
+ }
+ if (strcmp(tmpbuf, "AF_INET") == 0)
+ naddrtype = AF_INET;
+ else if (strcmp(tmpbuf, "AF_INET6") == 0)
+ naddrtype = AF_INET6;
+ else
+ goto error;
+
+
+ /* n_net field */
+ tb = tmpbuf;
+ if (getfield(&tb, sizeof tmpbuf, &p, fieldsep) == NULL ||
+ strlen(tb) == 0U) {
+ goto error;
+ }
+ nnet = 0;
+ bits = inet_net_pton(naddrtype, tmpbuf, &nnet, sizeof nnet);
+ if (bits < 0) {
+ goto error;
+ }
+
+ /* nnet = ntohl(nnet); */ /* keep in network order for nwent */
+
+ ne->n_name = name;
+ ne->n_aliases = aliases;
+ ne->n_addrtype = naddrtype;
+ ne->n_length = bits;
+ ne->n_addr = malloc(sizeof nnet);
+ if (ne->n_addr == NULL) {
+ goto error;
+ }
+
+ memcpy(ne->n_addr, &nnet, sizeof nnet);
+
+ return (0);
+
+ error:
+ errno = myerrno;
+
+ if (name != NULL) free(name);
+ free_array(aliases, 0);
+
+ return (-1);
+}
+
+
+/* ------------------------- struct nwent ------------------------- */
+
+
+/* +++++++++++++++++++++++++ struct netent +++++++++++++++++++++++++ */
+
+/*%
+ * int irp_marshall_ne(struct netent *ne, char **buffer, size_t *len)
+ *
+ * notes: \li
+ *
+ * See at top.
+ *
+ * return: \li
+ *
+ * 0 on success and -1 on failure.
+ *
+ */
+
+int
+irp_marshall_ne(struct netent *ne, char **buffer, size_t *len) {
+ size_t need = 1; /*%< for null byte */
+ char nAddrType[24];
+ char nNet[MAXPADDRSIZE];
+ const char *fieldsep = COLONSTR;
+ long nval;
+
+ if (ne == NULL || len == NULL) {
+ return (-1);
+ }
+
+ strcpy(nAddrType, ADDR_T_STR(ne->n_addrtype));
+
+ nval = htonl(ne->n_net);
+ if (inet_ntop(ne->n_addrtype, &nval, nNet, sizeof nNet) == NULL) {
+ return (-1);
+ }
+
+ need += strlen(ne->n_name) + 1;
+ need += joinlength(ne->n_aliases) + 1;
+ need += strlen(nAddrType) + 1;
+ need += strlen(nNet) + 1;
+
+ if (buffer == NULL) {
+ *len = need;
+ return (0);
+ }
+
+ if (*buffer != NULL && need > *len) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if (*buffer == NULL) {
+ need += 2; /*%< for CRLF */
+ *buffer = memget(need);
+ if (*buffer == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+
+ *len = need;
+ }
+
+ strcpy(*buffer, ne->n_name); strcat(*buffer, fieldsep);
+ joinarray(ne->n_aliases, *buffer, COMMA) ; strcat(*buffer, fieldsep);
+ strcat(*buffer, nAddrType); strcat(*buffer, fieldsep);
+ strcat(*buffer, nNet); strcat(*buffer, fieldsep);
+
+ return (0);
+}
+
+/*%
+ * int irp_unmarshall_ne(struct netent *ne, char *buffer)
+ *
+ * notes: \li
+ *
+ * See note up top.
+ *
+ * return: \li
+ *
+ * 0 on success and -1 on failure.
+ *
+ */
+
+int
+irp_unmarshall_ne(struct netent *ne, char *buffer) {
+ char *p, *q;
+ int naddrtype;
+ long nnet;
+ int bits;
+ char *name = NULL;
+ char **aliases = NULL;
+ char tmpbuf[24];
+ char *tb;
+ char fieldsep = ':';
+ int myerrno = EINVAL;
+
+ if (ne == NULL || buffer == NULL) {
+ goto error;
+ }
+
+ p = buffer;
+
+ /* n_name field */
+ name = NULL;
+ if (getfield(&name, 0, &p, fieldsep) == NULL || strlen(name) == 0U) {
+ goto error;
+ }
+
+
+ /* n_aliases field. Aliases are separated by commas */
+ q = strchr(p, fieldsep);
+ if (q == NULL) {
+ goto error;
+ }
+ aliases = splitarray(p, q, COMMA);
+ if (aliases == NULL) {
+ myerrno = errno;
+ goto error;
+ }
+ p = q + 1;
+
+
+ /* h_addrtype field */
+ tb = tmpbuf;
+ if (getfield(&tb, sizeof tmpbuf, &p, fieldsep) == NULL ||
+ strlen(tb) == 0U) {
+ goto error;
+ }
+ if (strcmp(tmpbuf, "AF_INET") == 0)
+ naddrtype = AF_INET;
+ else if (strcmp(tmpbuf, "AF_INET6") == 0)
+ naddrtype = AF_INET6;
+ else
+ goto error;
+
+
+ /* n_net field */
+ tb = tmpbuf;
+ if (getfield(&tb, sizeof tmpbuf, &p, fieldsep) == NULL ||
+ strlen(tb) == 0U) {
+ goto error;
+ }
+ bits = inet_net_pton(naddrtype, tmpbuf, &nnet, sizeof nnet);
+ if (bits < 0) {
+ goto error;
+ }
+ nnet = ntohl(nnet);
+
+ ne->n_name = name;
+ ne->n_aliases = aliases;
+ ne->n_addrtype = naddrtype;
+ ne->n_net = nnet;
+
+ return (0);
+
+ error:
+ errno = myerrno;
+
+ if (name != NULL) free(name);
+ free_array(aliases, 0);
+
+ return (-1);
+}
+
+
+/* ------------------------- struct netent ------------------------- */
+
+
+/* =========================================================================== */
+
+/*%
+ * static char ** splitarray(const char *buffer, const char *buffend, char delim)
+ *
+ * notes: \li
+ *
+ * Split a delim separated astring. Not allowed
+ * to have two delims next to each other. BUFFER points to begining of
+ * string, BUFFEND points to one past the end of the string
+ * (i.e. points at where the null byte would be if null
+ * terminated).
+ *
+ * return: \li
+ *
+ * Returns a malloced array of pointers, each pointer pointing to a
+ * malloced string. If BUFEER is an empty string, then return values is
+ * array of 1 pointer that is NULL. Returns NULL on failure.
+ *
+ */
+
+static char **
+splitarray(const char *buffer, const char *buffend, char delim) {
+ const char *p, *q;
+ int count = 0;
+ char **arr = NULL;
+ char **aptr;
+
+ if (buffend < buffer)
+ return (NULL);
+ else if (buffend > buffer && *buffer == delim)
+ return (NULL);
+ else if (buffend > buffer && *(buffend - 1) == delim)
+ return (NULL);
+
+ /* count the number of field and make sure none are empty */
+ if (buffend > buffer + 1) {
+ for (count = 1, q = buffer ; q != buffend ; q++) {
+ if (*q == delim) {
+ if (q > buffer && (*(q - 1) == delim)) {
+ errno = EINVAL;
+ return (NULL);
+ }
+ count++;
+ }
+ }
+ }
+
+ if (count > 0) {
+ count++ ; /*%< for NULL at end */
+ aptr = arr = malloc(count * sizeof (char *));
+ if (aptr == NULL) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+
+ memset(arr, 0x0, count * sizeof (char *));
+ for (p = buffer ; p < buffend ; p++) {
+ for (q = p ; *q != delim && q != buffend ; q++)
+ /* nothing */;
+ *aptr = strndup(p, q - p);
+
+ p = q;
+ aptr++;
+ }
+ *aptr = NULL;
+ } else {
+ arr = malloc(sizeof (char *));
+ if (arr == NULL) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+
+ *arr = NULL;
+ }
+
+ return (arr);
+}
+
+/*%
+ * static size_t joinlength(char * const *argv)
+ *
+ * return: \li
+ *
+ * the number of bytes in all the arrays pointed at
+ * by argv, including their null bytes(which will usually be turned
+ * into commas).
+ *
+ *
+ */
+
+static size_t
+joinlength(char * const *argv) {
+ int len = 0;
+
+ while (argv && *argv) {
+ len += (strlen(*argv) + 1);
+ argv++;
+ }
+
+ return (len);
+}
+
+/*%
+ * int joinarray(char * const *argv, char *buffer, char delim)
+ *
+ * notes: \li
+ *
+ * Copy all the ARGV strings into the end of BUFFER
+ * separating them with DELIM. BUFFER is assumed to have
+ * enough space to hold everything and to be already null-terminated.
+ *
+ * return: \li
+ *
+ * 0 unless argv or buffer is NULL.
+ *
+ *
+ */
+
+static int
+joinarray(char * const *argv, char *buffer, char delim) {
+ char * const *p;
+ char sep[2];
+
+ if (argv == NULL || buffer == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ sep[0] = delim;
+ sep[1] = 0x0;
+
+ for (p = argv ; *p != NULL ; p++) {
+ strcat(buffer, *p);
+ if (*(p + 1) != NULL) {
+ strcat(buffer, sep);
+ }
+ }
+
+ return (0);
+}
+
+/*%
+ * static char * getfield(char **res, size_t reslen, char **ptr, char delim)
+ *
+ * notes: \li
+ *
+ * Stores in *RES, which is a buffer of length RESLEN, a
+ * copy of the bytes from *PTR up to and including the first
+ * instance of DELIM. If *RES is NULL, then it will be
+ * assigned a malloced buffer to hold the copy. *PTR is
+ * modified to point at the found delimiter.
+ *
+ * return: \li
+ *
+ * If there was no delimiter, then NULL is returned,
+ * otherewise *RES is returned.
+ *
+ */
+
+static char *
+getfield(char **res, size_t reslen, char **ptr, char delim) {
+ char *q;
+
+ if (res == NULL || ptr == NULL || *ptr == NULL) {
+ errno = EINVAL;
+ return (NULL);
+ }
+
+ q = strchr(*ptr, delim);
+
+ if (q == NULL) {
+ errno = EINVAL;
+ return (NULL);
+ } else {
+ if (*res == NULL) {
+ *res = strndup(*ptr, q - *ptr);
+ } else {
+ if ((size_t)(q - *ptr + 1) > reslen) { /*%< to big for res */
+ errno = EINVAL;
+ return (NULL);
+ } else {
+ strncpy(*res, *ptr, q - *ptr);
+ (*res)[q - *ptr] = 0x0;
+ }
+ }
+ *ptr = q + 1;
+ }
+
+ return (*res);
+}
+
+
+
+
+
+#ifndef HAVE_STRNDUP
+/*
+ * static char * strndup(const char *str, size_t len)
+ *
+ * notes: \li
+ *
+ * like strdup, except do len bytes instead of the whole string. Always
+ * null-terminates.
+ *
+ * return: \li
+ *
+ * The newly malloced string.
+ *
+ */
+
+static char *
+strndup(const char *str, size_t len) {
+ char *p = malloc(len + 1);
+
+ if (p == NULL)
+ return (NULL);
+ strncpy(p, str, len);
+ p[len] = 0x0;
+ return (p);
+}
+#endif
+
+#if WANT_MAIN
+
+/*%
+ * static int strcmp_nws(const char *a, const char *b)
+ *
+ * notes: \li
+ *
+ * do a strcmp, except uneven lengths of whitespace compare the same
+ *
+ * return: \li
+ *
+ */
+
+static int
+strcmp_nws(const char *a, const char *b) {
+ while (*a && *b) {
+ if (isspace(*a) && isspace(*b)) {
+ do {
+ a++;
+ } while (isspace(*a));
+ do {
+ b++;
+ } while (isspace(*b));
+ }
+ if (*a < *b)
+ return (-1);
+ else if (*a > *b)
+ return (1);
+
+ a++;
+ b++;;
+ }
+
+ if (*a == *b)
+ return (0);
+ else if (*a > *b)
+ return (1);
+ else
+ return (-1);
+}
+
+#endif
+
+/*%
+ * static void free_array(char **argv, size_t entries)
+ *
+ * notes: \li
+ *
+ * Free argv and each of the pointers inside it. The end of
+ * the array is when a NULL pointer is found inside. If
+ * entries is > 0, then NULL pointers inside the array do
+ * not indicate the end of the array.
+ *
+ */
+
+static void
+free_array(char **argv, size_t entries) {
+ char **p = argv;
+ int useEntries = (entries > 0U);
+
+ if (argv == NULL)
+ return;
+
+ while ((useEntries && entries > 0U) || *p) {
+ if (*p)
+ free(*p);
+ p++;
+ if (useEntries)
+ entries--;
+ }
+ free(argv);
+}
+
+
+
+
+
+/* ************************************************** */
+
+#if WANT_MAIN
+
+/*% takes an option to indicate what sort of marshalling(read the code) and
+ an argument. If the argument looks like a marshalled buffer(has a ':'
+ embedded) then it's unmarshalled and the remarshalled and the new string
+ is compared to the old one.
+*/
+
+int
+main(int argc, char **argv) {
+ char buffer[1024];
+ char *b = &buffer[0];
+ size_t len = sizeof buffer;
+ char option;
+
+ if (argc < 2 || argv[1][0] != '-')
+ exit(1);
+
+ option = argv[1][1];
+ argv++;
+ argc--;
+
+
+#if 0
+ {
+ char buff[10];
+ char *p = argv[1], *q = &buff[0];
+
+ while (getfield(&q, sizeof buff, &p, ':') != NULL) {
+ printf("field: \"%s\"\n", q);
+ p++;
+ }
+ printf("p is now \"%s\"\n", p);
+ }
+#endif
+
+#if 0
+ {
+ char **x = splitarray(argv[1], argv[1] + strlen(argv[1]),
+ argv[2][0]);
+ char **p;
+
+ if (x == NULL)
+ printf("split failed\n");
+
+ for (p = x ; p != NULL && *p != NULL ; p++) {
+ printf("\"%s\"\n", *p);
+ }
+ }
+#endif
+
+#if 1
+ switch(option) {
+ case 'n': {
+ struct nwent ne;
+ int i;
+
+ if (strchr(argv[1], ':') != NULL) {
+ if (irp_unmarshall_nw(&ne, argv[1]) != 0) {
+ printf("Unmarhsalling failed\n");
+ exit(1);
+ }
+
+ printf("Name: \"%s\"\n", ne.n_name);
+ printf("Aliases:");
+ for (i = 0 ; ne.n_aliases[i] != NULL ; i++)
+ printf("\n\t\"%s\"", ne.n_aliases[i]);
+ printf("\nAddrtype: %s\n", ADDR_T_STR(ne.n_addrtype));
+ inet_net_ntop(ne.n_addrtype, ne.n_addr, ne.n_length,
+ buffer, sizeof buffer);
+ printf("Net: \"%s\"\n", buffer);
+ *((long*)ne.n_addr) = htonl(*((long*)ne.n_addr));
+ inet_net_ntop(ne.n_addrtype, ne.n_addr, ne.n_length,
+ buffer, sizeof buffer);
+ printf("Corrected Net: \"%s\"\n", buffer);
+ } else {
+ struct netent *np1 = getnetbyname(argv[1]);
+ ne.n_name = np1->n_name;
+ ne.n_aliases = np1->n_aliases;
+ ne.n_addrtype = np1->n_addrtype;
+ ne.n_addr = &np1->n_net;
+ ne.n_length = (IN_CLASSA(np1->n_net) ?
+ 8 :
+ (IN_CLASSB(np1->n_net) ?
+ 16 :
+ (IN_CLASSC(np1->n_net) ?
+ 24 : -1)));
+ np1->n_net = htonl(np1->n_net);
+ if (irp_marshall_nw(&ne, &b, &len) != 0) {
+ printf("Marshalling failed\n");
+ }
+ printf("%s\n", b);
+ }
+ break;
+ }
+
+
+ case 'r': {
+ char **hosts, **users, **domains;
+ size_t entries;
+ int i;
+ char *buff;
+ size_t size;
+ char *ngname;
+
+ if (strchr(argv[1], '(') != NULL) {
+ if (irp_unmarshall_ng(&ngname, &entries,
+ &hosts, &users, &domains,
+ argv[1]) != 0) {
+ printf("unmarshall failed\n");
+ exit(1);
+ }
+
+#define STRVAL(x) (x == NULL ? "*" : x)
+
+ printf("%s {\n", ngname);
+ for (i = 0 ; i < entries ; i++)
+ printf("\t\"%s\" : \"%s\" : \"%s\"\n",
+ STRVAL(hosts[i]),
+ STRVAL(users[i]),
+ STRVAL(domains[i]));
+ printf("}\n\n\n");
+
+
+ irp_marshall_ng_start(ngname, NULL, &size);
+ for (i = 0 ; i < entries ; i++)
+ irp_marshall_ng_next(hosts[i], users[i],
+ domains[i], NULL, &size);
+ irp_marshall_ng_end(NULL, &size);
+
+ buff = malloc(size);
+
+ irp_marshall_ng_start(ngname, buff, &size);
+ for (i = 0 ; i < entries ; i++) {
+ if (irp_marshall_ng_next(hosts[i], users[i],
+ domains[i], buff,
+ &size) != 0)
+ printf("next marshalling failed.\n");
+ }
+ irp_marshall_ng_end(buff, &size);
+
+ if (strcmp_nws(argv[1], buff) != 0) {
+ printf("compare failed:\n\t%s\n\t%s\n",
+ buffer, argv[1]);
+ } else {
+ printf("compare ok\n");
+ }
+ } else {
+ char *h, *u, *d, *buff;
+ size_t size;
+
+ /* run through two times. First to figure out how
+ much of a buffer we need. Second to do the
+ actual marshalling */
+
+ setnetgrent(argv[1]);
+ irp_marshall_ng_start(argv[1], NULL, &size);
+ while (getnetgrent(&h, &u, &d) == 1)
+ irp_marshall_ng_next(h, u, d, NULL, &size);
+ irp_marshall_ng_end(NULL, &size);
+ endnetgrent(argv[1]);
+
+ buff = malloc(size);
+
+ setnetgrent(argv[1]);
+ if (irp_marshall_ng_start(argv[1], buff, &size) != 0)
+ printf("Marshalling start failed\n");
+
+ while (getnetgrent(&h, &u, &d) == 1) {
+ if (irp_marshall_ng_next(h, u, d, buff, &size)
+ != 0) {
+ printf("Marshalling failed\n");
+ }
+ }
+
+ irp_marshall_ng_end(buff, &size);
+ endnetgrent();
+
+ printf("success: %s\n", buff);
+ }
+ break;
+ }
+
+
+
+ case 'h': {
+ struct hostent he, *hp;
+ int i;
+
+
+ if (strchr(argv[1], '@') != NULL) {
+ if (irp_unmarshall_ho(&he, argv[1]) != 0) {
+ printf("unmarshall failed\n");
+ exit(1);
+ }
+
+ printf("Host: \"%s\"\nAliases:", he.h_name);
+ for (i = 0 ; he.h_aliases[i] != NULL ; i++)
+ printf("\n\t\t\"%s\"", he.h_aliases[i]);
+ printf("\nAddr Type: \"%s\"\n",
+ ADDR_T_STR(he.h_addrtype));
+ printf("Length: %d\nAddresses:", he.h_length);
+ for (i = 0 ; he.h_addr_list[i] != 0 ; i++) {
+ inet_ntop(he.h_addrtype, he.h_addr_list[i],
+ buffer, sizeof buffer);
+ printf("\n\t\"%s\"\n", buffer);
+ }
+ printf("\n\n");
+
+ irp_marshall_ho(&he, &b, &len);
+ if (strcmp(argv[1], buffer) != 0) {
+ printf("compare failed:\n\t\"%s\"\n\t\"%s\"\n",
+ buffer, argv[1]);
+ } else {
+ printf("compare ok\n");
+ }
+ } else {
+ if ((hp = gethostbyname(argv[1])) == NULL) {
+ perror("gethostbyname");
+ printf("\"%s\"\n", argv[1]);
+ exit(1);
+ }
+
+ if (irp_marshall_ho(hp, &b, &len) != 0) {
+ printf("irp_marshall_ho failed\n");
+ exit(1);
+ }
+
+ printf("success: \"%s\"\n", buffer);
+ }
+ break;
+ }
+
+
+ case 's': {
+ struct servent *sv;
+ struct servent sv1;
+
+ if (strchr(argv[1], ':') != NULL) {
+ sv = &sv1;
+ memset(sv, 0xef, sizeof (struct servent));
+ if (irp_unmarshall_sv(sv, argv[1]) != 0) {
+ printf("unmarshall failed\n");
+
+ }
+
+ irp_marshall_sv(sv, &b, &len);
+ if (strcmp(argv[1], buffer) != 0) {
+ printf("compare failed:\n\t\"%s\"\n\t\"%s\"\n",
+ buffer, argv[1]);
+ } else {
+ printf("compare ok\n");
+ }
+ } else {
+ if ((sv = getservbyname(argv[1], argv[2])) == NULL) {
+ perror("getservent");
+ exit(1);
+ }
+
+ if (irp_marshall_sv(sv, &b, &len) != 0) {
+ printf("irp_marshall_sv failed\n");
+ exit(1);
+ }
+
+ printf("success: \"%s\"\n", buffer);
+ }
+ break;
+ }
+
+ case 'g': {
+ struct group *gr;
+ struct group gr1;
+
+ if (strchr(argv[1], ':') != NULL) {
+ gr = &gr1;
+ memset(gr, 0xef, sizeof (struct group));
+ if (irp_unmarshall_gr(gr, argv[1]) != 0) {
+ printf("unmarshall failed\n");
+
+ }
+
+ irp_marshall_gr(gr, &b, &len);
+ if (strcmp(argv[1], buffer) != 0) {
+ printf("compare failed:\n\t\"%s\"\n\t\"%s\"\n",
+ buffer, argv[1]);
+ } else {
+ printf("compare ok\n");
+ }
+ } else {
+ if ((gr = getgrnam(argv[1])) == NULL) {
+ perror("getgrnam");
+ exit(1);
+ }
+
+ if (irp_marshall_gr(gr, &b, &len) != 0) {
+ printf("irp_marshall_gr failed\n");
+ exit(1);
+ }
+
+ printf("success: \"%s\"\n", buffer);
+ }
+ break;
+ }
+
+
+ case 'p': {
+ struct passwd *pw;
+ struct passwd pw1;
+
+ if (strchr(argv[1], ':') != NULL) {
+ pw = &pw1;
+ memset(pw, 0xef, sizeof (*pw));
+ if (irp_unmarshall_pw(pw, argv[1]) != 0) {
+ printf("unmarshall failed\n");
+ exit(1);
+ }
+
+ printf("User: \"%s\"\nPasswd: \"%s\"\nUid: %ld\nGid: %ld\n",
+ pw->pw_name, pw->pw_passwd, (long)pw->pw_uid,
+ (long)pw->pw_gid);
+ printf("Class: \"%s\"\nChange: %ld\nGecos: \"%s\"\n",
+ pw->pw_class, (long)pw->pw_change, pw->pw_gecos);
+ printf("Shell: \"%s\"\nDirectory: \"%s\"\n",
+ pw->pw_shell, pw->pw_dir);
+
+ pw = getpwnam(pw->pw_name);
+ irp_marshall_pw(pw, &b, &len);
+ if (strcmp(argv[1], buffer) != 0) {
+ printf("compare failed:\n\t\"%s\"\n\t\"%s\"\n",
+ buffer, argv[1]);
+ } else {
+ printf("compare ok\n");
+ }
+ } else {
+ if ((pw = getpwnam(argv[1])) == NULL) {
+ perror("getpwnam");
+ exit(1);
+ }
+
+ if (irp_marshall_pw(pw, &b, &len) != 0) {
+ printf("irp_marshall_pw failed\n");
+ exit(1);
+ }
+
+ printf("success: \"%s\"\n", buffer);
+ }
+ break;
+ }
+
+ default:
+ printf("Wrong option: %c\n", option);
+ break;
+ }
+
+#endif
+
+ return (0);
+}
+
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/irs_data.c b/usr/src/lib/libresolv2_joy/common/irs/irs_data.c
new file mode 100644
index 0000000000..f12ca20f38
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/irs_data.c
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: irs_data.c,v 1.12 2007/08/27 03:32:26 marka Exp $";
+#endif
+
+#include "port_before.h"
+
+#ifndef __BIND_NOSTATIC
+
+#include <sys/types.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <string.h>
+#include <isc/memcluster.h>
+
+#ifdef DO_PTHREADS
+#include <pthread.h>
+#endif
+
+#include <irs.h>
+#include <stdlib.h>
+
+#include "port_after.h"
+
+#include "irs_data.h"
+#undef _res
+#if !(__GLIBC__ > 2 || __GLIBC__ == 2 && __GLIBC_MINOR__ >= 3)
+#undef h_errno
+extern int h_errno;
+#endif
+
+extern struct __res_state _res;
+
+#ifdef DO_PTHREADS
+static pthread_key_t key;
+static int once = 0;
+#else
+static struct net_data *net_data;
+#endif
+
+void
+irs_destroy(void) {
+#ifndef DO_PTHREADS
+ if (net_data != NULL)
+ net_data_destroy(net_data);
+ net_data = NULL;
+#endif
+}
+
+void
+net_data_destroy(void *p) {
+ struct net_data *net_data = p;
+
+ res_ndestroy(net_data->res);
+ if (net_data->gr != NULL) {
+ (*net_data->gr->close)(net_data->gr);
+ net_data->gr = NULL;
+ }
+ if (net_data->pw != NULL) {
+ (*net_data->pw->close)(net_data->pw);
+ net_data->pw = NULL;
+ }
+ if (net_data->sv != NULL) {
+ (*net_data->sv->close)(net_data->sv);
+ net_data->sv = NULL;
+ }
+ if (net_data->pr != NULL) {
+ (*net_data->pr->close)(net_data->pr);
+ net_data->pr = NULL;
+ }
+ if (net_data->ho != NULL) {
+ (*net_data->ho->close)(net_data->ho);
+ net_data->ho = NULL;
+ }
+ if (net_data->nw != NULL) {
+ (*net_data->nw->close)(net_data->nw);
+ net_data->nw = NULL;
+ }
+ if (net_data->ng != NULL) {
+ (*net_data->ng->close)(net_data->ng);
+ net_data->ng = NULL;
+ }
+ if (net_data->ho_data != NULL) {
+ free(net_data->ho_data);
+ net_data->ho_data = NULL;
+ }
+ if (net_data->nw_data != NULL) {
+ free(net_data->nw_data);
+ net_data->nw_data = NULL;
+ }
+
+ (*net_data->irs->close)(net_data->irs);
+ memput(net_data, sizeof *net_data);
+}
+
+/*%
+ * applications that need a specific config file other than
+ * _PATH_IRS_CONF should call net_data_init directly rather than letting
+ * the various wrapper functions make the first call. - brister
+ */
+
+struct net_data *
+net_data_init(const char *conf_file) {
+#ifdef DO_PTHREADS
+#ifndef LIBBIND_MUTEX_INITIALIZER
+#define LIBBIND_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
+#endif
+ static pthread_mutex_t keylock = LIBBIND_MUTEX_INITIALIZER;
+ struct net_data *net_data;
+
+ if (!once) {
+ if (pthread_mutex_lock(&keylock) != 0)
+ return (NULL);
+ if (!once) {
+ if (pthread_key_create(&key, net_data_destroy) != 0) {
+ (void)pthread_mutex_unlock(&keylock);
+ return (NULL);
+ }
+ once = 1;
+ }
+ if (pthread_mutex_unlock(&keylock) != 0)
+ return (NULL);
+ }
+ net_data = pthread_getspecific(key);
+#endif
+
+ if (net_data == NULL) {
+ net_data = net_data_create(conf_file);
+ if (net_data == NULL)
+ return (NULL);
+#ifdef DO_PTHREADS
+ if (pthread_setspecific(key, net_data) != 0) {
+ net_data_destroy(net_data);
+ return (NULL);
+ }
+#endif
+ }
+
+ return (net_data);
+}
+
+struct net_data *
+net_data_create(const char *conf_file) {
+ struct net_data *net_data;
+
+ net_data = memget(sizeof (struct net_data));
+ if (net_data == NULL)
+ return (NULL);
+ memset(net_data, 0, sizeof (struct net_data));
+
+ if ((net_data->irs = irs_gen_acc("", conf_file)) == NULL) {
+ memput(net_data, sizeof (struct net_data));
+ return (NULL);
+ }
+#ifndef DO_PTHREADS
+ (*net_data->irs->res_set)(net_data->irs, &_res, NULL);
+#endif
+
+ net_data->res = (*net_data->irs->res_get)(net_data->irs);
+ if (net_data->res == NULL) {
+ (*net_data->irs->close)(net_data->irs);
+ memput(net_data, sizeof (struct net_data));
+ return (NULL);
+ }
+
+ if ((net_data->res->options & RES_INIT) == 0U &&
+ res_ninit(net_data->res) == -1) {
+ (*net_data->irs->close)(net_data->irs);
+ memput(net_data, sizeof (struct net_data));
+ return (NULL);
+ }
+
+ return (net_data);
+}
+
+void
+net_data_minimize(struct net_data *net_data) {
+ res_nclose(net_data->res);
+}
+
+#ifdef _REENTRANT
+struct __res_state *
+__res_state(void) {
+ /* NULL param here means use the default config file. */
+ struct net_data *net_data = net_data_init(NULL);
+ if (net_data && net_data->res)
+ return (net_data->res);
+
+ return (&_res);
+}
+#else
+#ifdef __linux
+struct __res_state *
+__res_state(void) {
+ return (&_res);
+}
+#endif
+#endif
+
+int *
+__h_errno(void) {
+ /* NULL param here means use the default config file. */
+ struct net_data *net_data = net_data_init(NULL);
+ if (net_data && net_data->res)
+ return (&net_data->res->res_h_errno);
+#ifdef ORIGINAL_ISC_CODE
+#if !(__GLIBC__ > 2 || __GLIBC__ == 2 && __GLIBC_MINOR__ >= 3)
+ return(&_res.res_h_errno);
+#else
+ return (&h_errno);
+#endif
+#else
+ return (&h_errno);
+#endif /* ORIGINAL_ISC_CODE */
+}
+
+void
+__h_errno_set(struct __res_state *res, int err) {
+
+
+#if (__GLIBC__ > 2 || __GLIBC__ == 2 && __GLIBC_MINOR__ >= 3)
+ res->res_h_errno = err;
+#else
+ h_errno = res->res_h_errno = err;
+#endif
+}
+
+#endif /*__BIND_NOSTATIC*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/irs_data.h b/usr/src/lib/libresolv2_joy/common/irs/irs_data.h
new file mode 100644
index 0000000000..cb814fd8b1
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/irs_data.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: irs_data.h,v 1.3 2005/04/27 04:56:30 sra Exp $
+ */
+
+#ifndef __BIND_NOSTATIC
+
+#define net_data_init __net_data_init
+
+struct net_data {
+ struct irs_acc * irs;
+
+ struct irs_gr * gr;
+ struct irs_pw * pw;
+ struct irs_sv * sv;
+ struct irs_pr * pr;
+ struct irs_ho * ho;
+ struct irs_nw * nw;
+ struct irs_ng * ng;
+
+ struct group * gr_last;
+ struct passwd * pw_last;
+ struct servent * sv_last;
+ struct protoent * pr_last;
+ struct netent * nw_last; /*%< should have been ne_last */
+ struct nwent * nww_last;
+ struct hostent * ho_last;
+
+ unsigned int gr_stayopen :1;
+ unsigned int pw_stayopen :1;
+ unsigned int sv_stayopen :1;
+ unsigned int pr_stayopen :1;
+ unsigned int ho_stayopen :1;
+ unsigned int nw_stayopen :1;
+
+ void * nw_data;
+ void * ho_data;
+
+ struct __res_state * res; /*%< for gethostent.c */
+};
+
+extern struct net_data * net_data_init(const char *conf_file);
+extern void net_data_minimize(struct net_data *);
+
+#endif /*__BIND_NOSTATIC*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/irs_p.h b/usr/src/lib/libresolv2_joy/common/irs/irs_p.h
new file mode 100644
index 0000000000..2a0a933fce
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/irs_p.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: irs_p.h,v 1.3 2005/04/27 04:56:30 sra Exp $
+ */
+
+#ifndef _IRS_P_H_INCLUDED
+#define _IRS_P_H_INCLUDED
+
+#include <stdio.h>
+
+#include "pathnames.h"
+
+#define IRS_SV_MAXALIASES 35
+
+struct lcl_sv {
+ FILE * fp;
+ char line[BUFSIZ+1];
+ struct servent serv;
+ char * serv_aliases[IRS_SV_MAXALIASES];
+};
+
+#define irs_nul_ng __irs_nul_ng
+#define map_v4v6_address __map_v4v6_address
+#define make_group_list __make_group_list
+#define irs_lclsv_fnxt __irs_lclsv_fnxt
+
+extern void map_v4v6_address(const char *src, char *dst);
+extern int make_group_list(struct irs_gr *, const char *,
+ gid_t, gid_t *, int *);
+extern struct irs_ng * irs_nul_ng(struct irs_acc *);
+extern struct servent * irs_lclsv_fnxt(struct lcl_sv *);
+
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/lcl.c b/usr/src/lib/libresolv2_joy/common/irs/lcl.c
new file mode 100644
index 0000000000..9dd6967b87
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/lcl.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: lcl.c,v 1.4 2005/04/27 04:56:30 sra Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#include <isc/memcluster.h>
+
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "lcl_p.h"
+
+/* Forward. */
+
+static void lcl_close(struct irs_acc *);
+static struct __res_state * lcl_res_get(struct irs_acc *);
+static void lcl_res_set(struct irs_acc *, struct __res_state *,
+ void (*)(void *));
+
+/* Public */
+
+struct irs_acc *
+irs_lcl_acc(const char *options) {
+ struct irs_acc *acc;
+ struct lcl_p *lcl;
+
+ UNUSED(options);
+
+ if (!(acc = memget(sizeof *acc))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(acc, 0x5e, sizeof *acc);
+ if (!(lcl = memget(sizeof *lcl))) {
+ errno = ENOMEM;
+ free(acc);
+ return (NULL);
+ }
+ memset(lcl, 0x5e, sizeof *lcl);
+ lcl->res = NULL;
+ lcl->free_res = NULL;
+ acc->private = lcl;
+#ifdef WANT_IRS_GR
+ acc->gr_map = irs_lcl_gr;
+#else
+ acc->gr_map = NULL;
+#endif
+#ifdef WANT_IRS_PW
+ acc->pw_map = irs_lcl_pw;
+#else
+ acc->pw_map = NULL;
+#endif
+ acc->sv_map = irs_lcl_sv;
+ acc->pr_map = irs_lcl_pr;
+ acc->ho_map = irs_lcl_ho;
+ acc->nw_map = irs_lcl_nw;
+ acc->ng_map = irs_lcl_ng;
+ acc->res_get = lcl_res_get;
+ acc->res_set = lcl_res_set;
+ acc->close = lcl_close;
+ return (acc);
+}
+
+/* Methods */
+static struct __res_state *
+lcl_res_get(struct irs_acc *this) {
+ struct lcl_p *lcl = (struct lcl_p *)this->private;
+
+ if (lcl->res == NULL) {
+ struct __res_state *res;
+ res = (struct __res_state *)malloc(sizeof *res);
+ if (res == NULL)
+ return (NULL);
+ memset(res, 0, sizeof *res);
+ lcl_res_set(this, res, free);
+ }
+
+ if ((lcl->res->options & RES_INIT) == 0U &&
+ res_ninit(lcl->res) < 0)
+ return (NULL);
+
+ return (lcl->res);
+}
+
+static void
+lcl_res_set(struct irs_acc *this, struct __res_state *res,
+ void (*free_res)(void *)) {
+ struct lcl_p *lcl = (struct lcl_p *)this->private;
+
+ if (lcl->res && lcl->free_res) {
+ res_nclose(lcl->res);
+ (*lcl->free_res)(lcl->res);
+ }
+
+ lcl->res = res;
+ lcl->free_res = free_res;
+}
+
+static void
+lcl_close(struct irs_acc *this) {
+ struct lcl_p *lcl = (struct lcl_p *)this->private;
+
+ if (lcl) {
+ if (lcl->free_res)
+ (*lcl->free_res)(lcl->res);
+ memput(lcl, sizeof *lcl);
+ }
+ memput(this, sizeof *this);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/lcl_ho.c b/usr/src/lib/libresolv2_joy/common/irs/lcl_ho.c
new file mode 100644
index 0000000000..17c9a5e725
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/lcl_ho.c
@@ -0,0 +1,578 @@
+/*
+ * Copyright (c) 1985, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* from gethostnamadr.c 8.1 (Berkeley) 6/4/93 */
+/* BIND Id: gethnamaddr.c,v 8.15 1996/05/22 04:56:30 vixie Exp $ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: lcl_ho.c,v 1.5 2006/03/09 23:57:56 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/* Imports. */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <irs.h>
+#include <isc/memcluster.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "dns_p.h"
+#include "lcl_p.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) sprintf x
+#endif
+
+/* Definitions. */
+
+#define MAXALIASES 35
+#define MAXADDRS 35
+#define Max(a,b) ((a) > (b) ? (a) : (b))
+
+#if PACKETSZ > 1024
+#define MAXPACKET PACKETSZ
+#else
+#define MAXPACKET 1024
+#endif
+
+struct pvt {
+ FILE * fp;
+ struct hostent host;
+ char * h_addr_ptrs[MAXADDRS + 1];
+ char * host_aliases[MAXALIASES];
+ char hostbuf[8*1024];
+ u_char host_addr[16]; /*%< IPv4 or IPv6 */
+ struct __res_state *res;
+ void (*free_res)(void *);
+};
+
+typedef union {
+ int32_t al;
+ char ac;
+} align;
+
+static const u_char mapped[] = { 0,0, 0,0, 0,0, 0,0, 0,0, 0xff,0xff };
+static const u_char tunnelled[] = { 0,0, 0,0, 0,0, 0,0, 0,0, 0,0 };
+
+/* Forward. */
+
+static void ho_close(struct irs_ho *this);
+static struct hostent * ho_byname(struct irs_ho *this, const char *name);
+static struct hostent * ho_byname2(struct irs_ho *this, const char *name,
+ int af);
+static struct hostent * ho_byaddr(struct irs_ho *this, const void *addr,
+ int len, int af);
+static struct hostent * ho_next(struct irs_ho *this);
+static void ho_rewind(struct irs_ho *this);
+static void ho_minimize(struct irs_ho *this);
+static struct __res_state * ho_res_get(struct irs_ho *this);
+static void ho_res_set(struct irs_ho *this,
+ struct __res_state *res,
+ void (*free_res)(void *));
+static struct addrinfo * ho_addrinfo(struct irs_ho *this, const char *name,
+ const struct addrinfo *pai);
+
+static size_t ns_namelen(const char *);
+static int init(struct irs_ho *this);
+
+/* Portability. */
+
+#ifndef SEEK_SET
+# define SEEK_SET 0
+#endif
+
+/* Public. */
+
+struct irs_ho *
+irs_lcl_ho(struct irs_acc *this) {
+ struct irs_ho *ho;
+ struct pvt *pvt;
+
+ UNUSED(this);
+
+ if (!(pvt = memget(sizeof *pvt))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ if (!(ho = memget(sizeof *ho))) {
+ memput(pvt, sizeof *pvt);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(ho, 0x5e, sizeof *ho);
+ ho->private = pvt;
+ ho->close = ho_close;
+ ho->byname = ho_byname;
+ ho->byname2 = ho_byname2;
+ ho->byaddr = ho_byaddr;
+ ho->next = ho_next;
+ ho->rewind = ho_rewind;
+ ho->minimize = ho_minimize;
+ ho->res_get = ho_res_get;
+ ho->res_set = ho_res_set;
+ ho->addrinfo = ho_addrinfo;
+ return (ho);
+}
+
+/* Methods. */
+
+static void
+ho_close(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ ho_minimize(this);
+ if (pvt->fp)
+ (void) fclose(pvt->fp);
+ if (pvt->res && pvt->free_res)
+ (*pvt->free_res)(pvt->res);
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+static struct hostent *
+ho_byname(struct irs_ho *this, const char *name) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct hostent *hp;
+
+ if (init(this) == -1)
+ return (NULL);
+
+ if (pvt->res->options & RES_USE_INET6) {
+ hp = ho_byname2(this, name, AF_INET6);
+ if (hp)
+ return (hp);
+ }
+ return (ho_byname2(this, name, AF_INET));
+}
+
+static struct hostent *
+ho_byname2(struct irs_ho *this, const char *name, int af) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct hostent *hp;
+ char **hap;
+ size_t n;
+
+ if (init(this) == -1)
+ return (NULL);
+
+ ho_rewind(this);
+ n = ns_namelen(name);
+ while ((hp = ho_next(this)) != NULL) {
+ size_t nn;
+
+ if (hp->h_addrtype != af)
+ continue;
+ nn = ns_namelen(hp->h_name);
+ if (strncasecmp(hp->h_name, name, Max(n, nn)) == 0)
+ goto found;
+ for (hap = hp->h_aliases; *hap; hap++) {
+ nn = ns_namelen(*hap);
+ if (strncasecmp(*hap, name, Max(n, nn)) == 0)
+ goto found;
+ }
+ }
+ found:
+ if (!hp) {
+ RES_SET_H_ERRNO(pvt->res, HOST_NOT_FOUND);
+ return (NULL);
+ }
+ RES_SET_H_ERRNO(pvt->res, NETDB_SUCCESS);
+ return (hp);
+}
+
+static struct hostent *
+ho_byaddr(struct irs_ho *this, const void *addr, int len, int af) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ const u_char *uaddr = addr;
+ struct hostent *hp;
+ int size;
+
+ if (init(this) == -1)
+ return (NULL);
+
+ if (af == AF_INET6 && len == IN6ADDRSZ &&
+ (!memcmp(uaddr, mapped, sizeof mapped) ||
+ !memcmp(uaddr, tunnelled, sizeof tunnelled))) {
+ /* Unmap. */
+ addr = (const u_char *)addr + sizeof mapped;
+ uaddr += sizeof mapped;
+ af = AF_INET;
+ len = INADDRSZ;
+ }
+ switch (af) {
+ case AF_INET:
+ size = INADDRSZ;
+ break;
+ case AF_INET6:
+ size = IN6ADDRSZ;
+ break;
+ default:
+ errno = EAFNOSUPPORT;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+ if (size > len) {
+ errno = EINVAL;
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+
+ /*
+ * Do the search.
+ */
+ ho_rewind(this);
+ while ((hp = ho_next(this)) != NULL) {
+ char **hap;
+
+ for (hap = hp->h_addr_list; *hap; hap++) {
+ const u_char *taddr = (const u_char *)*hap;
+ int taf = hp->h_addrtype;
+ int tlen = hp->h_length;
+
+ if (taf == AF_INET6 && tlen == IN6ADDRSZ &&
+ (!memcmp(taddr, mapped, sizeof mapped) ||
+ !memcmp(taddr, tunnelled, sizeof tunnelled))) {
+ /* Unmap. */
+ taddr += sizeof mapped;
+ taf = AF_INET;
+ tlen = INADDRSZ;
+ }
+ if (taf == af && tlen == len &&
+ !memcmp(taddr, uaddr, tlen))
+ goto found;
+ }
+ }
+ found:
+ if (!hp) {
+ RES_SET_H_ERRNO(pvt->res, HOST_NOT_FOUND);
+ return (NULL);
+ }
+ RES_SET_H_ERRNO(pvt->res, NETDB_SUCCESS);
+ return (hp);
+}
+
+static struct hostent *
+ho_next(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ char *cp, **q, *p;
+ char *bufp, *ndbuf, *dbuf = NULL;
+ int c, af, len, bufsiz, offset;
+
+ if (init(this) == -1)
+ return (NULL);
+
+ if (!pvt->fp)
+ ho_rewind(this);
+ if (!pvt->fp) {
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+ bufp = pvt->hostbuf;
+ bufsiz = sizeof pvt->hostbuf;
+ offset = 0;
+ again:
+ if (!(p = fgets(bufp + offset, bufsiz - offset, pvt->fp))) {
+ RES_SET_H_ERRNO(pvt->res, HOST_NOT_FOUND);
+ if (dbuf)
+ free(dbuf);
+ return (NULL);
+ }
+ if (!strchr(p, '\n') && !feof(pvt->fp)) {
+#define GROWBUF 1024
+ /* allocate space for longer line */
+ if (dbuf == NULL) {
+ if ((ndbuf = malloc(bufsiz + GROWBUF)) != NULL)
+ strcpy(ndbuf, bufp);
+ } else
+ ndbuf = realloc(dbuf, bufsiz + GROWBUF);
+ if (ndbuf) {
+ dbuf = ndbuf;
+ bufp = dbuf;
+ bufsiz += GROWBUF;
+ offset = strlen(dbuf);
+ } else {
+ /* allocation failed; skip this long line */
+ while ((c = getc(pvt->fp)) != EOF)
+ if (c == '\n')
+ break;
+ if (c != EOF)
+ ungetc(c, pvt->fp);
+ }
+ goto again;
+ }
+
+ p -= offset;
+ offset = 0;
+
+ if (*p == '#')
+ goto again;
+ if ((cp = strpbrk(p, "#\n")) != NULL)
+ *cp = '\0';
+ if (!(cp = strpbrk(p, " \t")))
+ goto again;
+ *cp++ = '\0';
+ if (inet_pton(AF_INET6, p, pvt->host_addr) > 0) {
+ af = AF_INET6;
+ len = IN6ADDRSZ;
+ } else if (inet_aton(p, (struct in_addr *)pvt->host_addr) > 0) {
+ if (pvt->res->options & RES_USE_INET6) {
+ map_v4v6_address((char*)pvt->host_addr,
+ (char*)pvt->host_addr);
+ af = AF_INET6;
+ len = IN6ADDRSZ;
+ } else {
+ af = AF_INET;
+ len = INADDRSZ;
+ }
+ } else {
+ goto again;
+ }
+ pvt->h_addr_ptrs[0] = (char *)pvt->host_addr;
+ pvt->h_addr_ptrs[1] = NULL;
+ pvt->host.h_addr_list = pvt->h_addr_ptrs;
+ pvt->host.h_length = len;
+ pvt->host.h_addrtype = af;
+ while (*cp == ' ' || *cp == '\t')
+ cp++;
+ pvt->host.h_name = cp;
+ q = pvt->host.h_aliases = pvt->host_aliases;
+ if ((cp = strpbrk(cp, " \t")) != NULL)
+ *cp++ = '\0';
+ while (cp && *cp) {
+ if (*cp == ' ' || *cp == '\t') {
+ cp++;
+ continue;
+ }
+ if (q < &pvt->host_aliases[MAXALIASES - 1])
+ *q++ = cp;
+ if ((cp = strpbrk(cp, " \t")) != NULL)
+ *cp++ = '\0';
+ }
+ *q = NULL;
+ if (dbuf)
+ free(dbuf);
+ RES_SET_H_ERRNO(pvt->res, NETDB_SUCCESS);
+ return (&pvt->host);
+}
+
+static void
+ho_rewind(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->fp) {
+ if (fseek(pvt->fp, 0L, SEEK_SET) == 0)
+ return;
+ (void)fclose(pvt->fp);
+ }
+ if (!(pvt->fp = fopen(_PATH_HOSTS, "r")))
+ return;
+ if (fcntl(fileno(pvt->fp), F_SETFD, 1) < 0) {
+ (void)fclose(pvt->fp);
+ pvt->fp = NULL;
+ }
+}
+
+static void
+ho_minimize(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->fp != NULL) {
+ (void)fclose(pvt->fp);
+ pvt->fp = NULL;
+ }
+ if (pvt->res)
+ res_nclose(pvt->res);
+}
+
+static struct __res_state *
+ho_res_get(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (!pvt->res) {
+ struct __res_state *res;
+ res = (struct __res_state *)malloc(sizeof *res);
+ if (!res) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(res, 0, sizeof *res);
+ ho_res_set(this, res, free);
+ }
+
+ return (pvt->res);
+}
+
+static void
+ho_res_set(struct irs_ho *this, struct __res_state *res,
+ void (*free_res)(void *)) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->res && pvt->free_res) {
+ res_nclose(pvt->res);
+ (*pvt->free_res)(pvt->res);
+ }
+
+ pvt->res = res;
+ pvt->free_res = free_res;
+}
+
+struct lcl_res_target {
+ struct lcl_res_target *next;
+ int family;
+};
+
+/* XXX */
+extern struct addrinfo *hostent2addrinfo __P((struct hostent *,
+ const struct addrinfo *pai));
+
+static struct addrinfo *
+ho_addrinfo(struct irs_ho *this, const char *name, const struct addrinfo *pai)
+{
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct hostent *hp;
+ struct lcl_res_target q, q2, *p;
+ struct addrinfo sentinel, *cur;
+
+ memset(&q, 0, sizeof(q2));
+ memset(&q2, 0, sizeof(q2));
+ memset(&sentinel, 0, sizeof(sentinel));
+ cur = &sentinel;
+
+ switch(pai->ai_family) {
+ case AF_UNSPEC: /*%< INET6 then INET4 */
+ q.family = AF_INET6;
+ q.next = &q2;
+ q2.family = AF_INET;
+ break;
+ case AF_INET6:
+ q.family = AF_INET6;
+ break;
+ case AF_INET:
+ q.family = AF_INET;
+ break;
+ default:
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY); /*%< ??? */
+ return(NULL);
+ }
+
+ for (p = &q; p; p = p->next) {
+ struct addrinfo *ai;
+
+ hp = (*this->byname2)(this, name, p->family);
+ if (hp == NULL) {
+ /* byname2 should've set an appropriate error */
+ continue;
+ }
+ if ((hp->h_name == NULL) || (hp->h_name[0] == 0) ||
+ (hp->h_addr_list[0] == NULL)) {
+ RES_SET_H_ERRNO(pvt->res, NO_RECOVERY);
+ continue;
+ }
+
+ ai = hostent2addrinfo(hp, pai);
+ if (ai) {
+ cur->ai_next = ai;
+ while (cur->ai_next)
+ cur = cur->ai_next;
+ }
+ }
+
+ if (sentinel.ai_next == NULL)
+ RES_SET_H_ERRNO(pvt->res, HOST_NOT_FOUND);
+
+ return(sentinel.ai_next);
+}
+
+/* Private. */
+
+static size_t
+ns_namelen(const char *s) {
+ int i;
+
+ for (i = strlen(s); i > 0 && s[i-1] == '.'; i--)
+ (void)NULL;
+ return ((size_t) i);
+}
+
+static int
+init(struct irs_ho *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (!pvt->res && !ho_res_get(this))
+ return (-1);
+ if (((pvt->res->options & RES_INIT) == 0U) &&
+ res_ninit(pvt->res) == -1)
+ return (-1);
+ return (0);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/lcl_ng.c b/usr/src/lib/libresolv2_joy/common/irs/lcl_ng.c
new file mode 100644
index 0000000000..319725ce70
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/lcl_ng.c
@@ -0,0 +1,446 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: lcl_ng.c,v 1.3 2005/04/27 04:56:31 sra Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <irs.h>
+#include <isc/memcluster.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "lcl_p.h"
+
+/* Definitions */
+
+#define NG_HOST 0 /*%< Host name */
+#define NG_USER 1 /*%< User name */
+#define NG_DOM 2 /*%< and Domain name */
+#define LINSIZ 1024 /*%< Length of netgroup file line */
+/*
+ * XXX Warning XXX
+ * This code is a hack-and-slash special. It realy needs to be
+ * rewritten with things like strdup, and realloc in mind.
+ * More reasonable data structures would not be a bad thing.
+ */
+
+/*%
+ * Static Variables and functions used by setnetgrent(), getnetgrent() and
+ * endnetgrent().
+ *
+ * There are two linked lists:
+ * \li linelist is just used by setnetgrent() to parse the net group file via.
+ * parse_netgrp()
+ * \li netgrp is the list of entries for the current netgroup
+ */
+struct linelist {
+ struct linelist *l_next; /*%< Chain ptr. */
+ int l_parsed; /*%< Flag for cycles */
+ char * l_groupname; /*%< Name of netgroup */
+ char * l_line; /*%< Netgroup entrie(s) to be parsed */
+};
+
+struct ng_old_struct {
+ struct ng_old_struct *ng_next; /*%< Chain ptr */
+ char * ng_str[3]; /*%< Field pointers, see below */
+};
+
+struct pvt {
+ FILE *fp;
+ struct linelist *linehead;
+ struct ng_old_struct *nextgrp;
+ struct {
+ struct ng_old_struct *gr;
+ char *grname;
+ } grouphead;
+};
+
+/* Forward */
+
+static void ng_rewind(struct irs_ng *, const char*);
+static void ng_close(struct irs_ng *);
+static int ng_next(struct irs_ng *, const char **,
+ const char **, const char **);
+static int ng_test(struct irs_ng *, const char *,
+ const char *, const char *,
+ const char *);
+static void ng_minimize(struct irs_ng *);
+
+static int parse_netgrp(struct irs_ng *, const char*);
+static struct linelist *read_for_group(struct irs_ng *, const char *);
+static void freelists(struct irs_ng *);
+
+/* Public */
+
+struct irs_ng *
+irs_lcl_ng(struct irs_acc *this) {
+ struct irs_ng *ng;
+ struct pvt *pvt;
+
+ UNUSED(this);
+
+ if (!(ng = memget(sizeof *ng))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(ng, 0x5e, sizeof *ng);
+ if (!(pvt = memget(sizeof *pvt))) {
+ memput(ng, sizeof *ng);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ ng->private = pvt;
+ ng->close = ng_close;
+ ng->next = ng_next;
+ ng->test = ng_test;
+ ng->rewind = ng_rewind;
+ ng->minimize = ng_minimize;
+ return (ng);
+}
+
+/* Methods */
+
+static void
+ng_close(struct irs_ng *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->fp != NULL)
+ fclose(pvt->fp);
+ freelists(this);
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+/*%
+ * Parse the netgroup file looking for the netgroup and build the list
+ * of netgrp structures. Let parse_netgrp() and read_for_group() do
+ * most of the work.
+ */
+static void
+ng_rewind(struct irs_ng *this, const char *group) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->fp != NULL && fseek(pvt->fp, SEEK_CUR, 0L) == -1) {
+ fclose(pvt->fp);
+ pvt->fp = NULL;
+ }
+
+ if (pvt->fp == NULL || pvt->grouphead.gr == NULL ||
+ strcmp(group, pvt->grouphead.grname)) {
+ freelists(this);
+ if (pvt->fp != NULL)
+ fclose(pvt->fp);
+ pvt->fp = fopen(_PATH_NETGROUP, "r");
+ if (pvt->fp != NULL) {
+ if (parse_netgrp(this, group))
+ freelists(this);
+ if (!(pvt->grouphead.grname = strdup(group)))
+ freelists(this);
+ fclose(pvt->fp);
+ pvt->fp = NULL;
+ }
+ }
+ pvt->nextgrp = pvt->grouphead.gr;
+}
+
+/*%
+ * Get the next netgroup off the list.
+ */
+static int
+ng_next(struct irs_ng *this, const char **host, const char **user,
+ const char **domain)
+{
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->nextgrp) {
+ *host = pvt->nextgrp->ng_str[NG_HOST];
+ *user = pvt->nextgrp->ng_str[NG_USER];
+ *domain = pvt->nextgrp->ng_str[NG_DOM];
+ pvt->nextgrp = pvt->nextgrp->ng_next;
+ return (1);
+ }
+ return (0);
+}
+
+/*%
+ * Search for a match in a netgroup.
+ */
+static int
+ng_test(struct irs_ng *this, const char *name,
+ const char *host, const char *user, const char *domain)
+{
+ const char *ng_host, *ng_user, *ng_domain;
+
+ ng_rewind(this, name);
+ while (ng_next(this, &ng_host, &ng_user, &ng_domain))
+ if ((host == NULL || ng_host == NULL ||
+ !strcmp(host, ng_host)) &&
+ (user == NULL || ng_user == NULL ||
+ !strcmp(user, ng_user)) &&
+ (domain == NULL || ng_domain == NULL ||
+ !strcmp(domain, ng_domain))) {
+ freelists(this);
+ return (1);
+ }
+ freelists(this);
+ return (0);
+}
+
+static void
+ng_minimize(struct irs_ng *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->fp != NULL) {
+ (void)fclose(pvt->fp);
+ pvt->fp = NULL;
+ }
+}
+
+/* Private */
+
+/*%
+ * endnetgrent() - cleanup
+ */
+static void
+freelists(struct irs_ng *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct linelist *lp, *olp;
+ struct ng_old_struct *gp, *ogp;
+
+ lp = pvt->linehead;
+ while (lp) {
+ olp = lp;
+ lp = lp->l_next;
+ free(olp->l_groupname);
+ free(olp->l_line);
+ free((char *)olp);
+ }
+ pvt->linehead = NULL;
+ if (pvt->grouphead.grname) {
+ free(pvt->grouphead.grname);
+ pvt->grouphead.grname = NULL;
+ }
+ gp = pvt->grouphead.gr;
+ while (gp) {
+ ogp = gp;
+ gp = gp->ng_next;
+ if (ogp->ng_str[NG_HOST])
+ free(ogp->ng_str[NG_HOST]);
+ if (ogp->ng_str[NG_USER])
+ free(ogp->ng_str[NG_USER]);
+ if (ogp->ng_str[NG_DOM])
+ free(ogp->ng_str[NG_DOM]);
+ free((char *)ogp);
+ }
+ pvt->grouphead.gr = NULL;
+}
+
+/*%
+ * Parse the netgroup file setting up the linked lists.
+ */
+static int
+parse_netgrp(struct irs_ng *this, const char *group) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ char *spos, *epos;
+ int len, strpos;
+ char *pos, *gpos;
+ struct ng_old_struct *grp;
+ struct linelist *lp = pvt->linehead;
+
+ /*
+ * First, see if the line has already been read in.
+ */
+ while (lp) {
+ if (!strcmp(group, lp->l_groupname))
+ break;
+ lp = lp->l_next;
+ }
+ if (lp == NULL &&
+ (lp = read_for_group(this, group)) == NULL)
+ return (1);
+ if (lp->l_parsed) {
+ /*fprintf(stderr, "Cycle in netgroup %s\n", lp->l_groupname);*/
+ return (1);
+ } else
+ lp->l_parsed = 1;
+ pos = lp->l_line;
+ while (*pos != '\0') {
+ if (*pos == '(') {
+ if (!(grp = malloc(sizeof (struct ng_old_struct)))) {
+ freelists(this);
+ errno = ENOMEM;
+ return (1);
+ }
+ memset(grp, 0, sizeof (struct ng_old_struct));
+ grp->ng_next = pvt->grouphead.gr;
+ pvt->grouphead.gr = grp;
+ pos++;
+ gpos = strsep(&pos, ")");
+ for (strpos = 0; strpos < 3; strpos++) {
+ if ((spos = strsep(&gpos, ","))) {
+ while (*spos == ' ' || *spos == '\t')
+ spos++;
+ if ((epos = strpbrk(spos, " \t"))) {
+ *epos = '\0';
+ len = epos - spos;
+ } else
+ len = strlen(spos);
+ if (len > 0) {
+ if(!(grp->ng_str[strpos]
+ = (char *)
+ malloc(len + 1))) {
+ freelists(this);
+ return (1);
+ }
+ memcpy(grp->ng_str[strpos],
+ spos,
+ len + 1);
+ }
+ } else
+ goto errout;
+ }
+ } else {
+ spos = strsep(&pos, ", \t");
+ if (spos != NULL && parse_netgrp(this, spos)) {
+ freelists(this);
+ return (1);
+ }
+ }
+ if (pos == NULL)
+ break;
+ while (*pos == ' ' || *pos == ',' || *pos == '\t')
+ pos++;
+ }
+ return (0);
+ errout:
+ /*fprintf(stderr, "Bad netgroup %s at ..%s\n", lp->l_groupname,
+ spos);*/
+ return (1);
+}
+
+/*%
+ * Read the netgroup file and save lines until the line for the netgroup
+ * is found. Return 1 if eof is encountered.
+ */
+static struct linelist *
+read_for_group(struct irs_ng *this, const char *group) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ char *pos, *spos, *linep = NULL, *olinep;
+ int len, olen, cont;
+ struct linelist *lp;
+ char line[LINSIZ + 1];
+
+ while (fgets(line, LINSIZ, pvt->fp) != NULL) {
+ pos = line;
+ if (*pos == '#')
+ continue;
+ while (*pos == ' ' || *pos == '\t')
+ pos++;
+ spos = pos;
+ while (*pos != ' ' && *pos != '\t' && *pos != '\n' &&
+ *pos != '\0')
+ pos++;
+ len = pos - spos;
+ while (*pos == ' ' || *pos == '\t')
+ pos++;
+ if (*pos != '\n' && *pos != '\0') {
+ if (!(lp = malloc(sizeof (*lp)))) {
+ freelists(this);
+ return (NULL);
+ }
+ lp->l_parsed = 0;
+ if (!(lp->l_groupname = malloc(len + 1))) {
+ free(lp);
+ freelists(this);
+ return (NULL);
+ }
+ memcpy(lp->l_groupname, spos, len);
+ *(lp->l_groupname + len) = '\0';
+ len = strlen(pos);
+ olen = 0;
+ olinep = NULL;
+
+ /*
+ * Loop around handling line continuations.
+ */
+ do {
+ if (*(pos + len - 1) == '\n')
+ len--;
+ if (*(pos + len - 1) == '\\') {
+ len--;
+ cont = 1;
+ } else
+ cont = 0;
+ if (len > 0) {
+ if (!(linep = malloc(olen + len + 1))){
+ if (olen > 0)
+ free(olinep);
+ free(lp->l_groupname);
+ free(lp);
+ freelists(this);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ if (olen > 0) {
+ memcpy(linep, olinep, olen);
+ free(olinep);
+ }
+ memcpy(linep + olen, pos, len);
+ olen += len;
+ *(linep + olen) = '\0';
+ olinep = linep;
+ }
+ if (cont) {
+ if (fgets(line, LINSIZ, pvt->fp)) {
+ pos = line;
+ len = strlen(pos);
+ } else
+ cont = 0;
+ }
+ } while (cont);
+ lp->l_line = linep;
+ lp->l_next = pvt->linehead;
+ pvt->linehead = lp;
+
+ /*
+ * If this is the one we wanted, we are done.
+ */
+ if (!strcmp(lp->l_groupname, group))
+ return (lp);
+ }
+ }
+ return (NULL);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/lcl_nw.c b/usr/src/lib/libresolv2_joy/common/irs/lcl_nw.c
new file mode 100644
index 0000000000..2e5cd55a6c
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/lcl_nw.c
@@ -0,0 +1,373 @@
+/*
+ * Copyright (c) 1989, 1993, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: lcl_nw.c,v 1.4 2005/04/27 04:56:31 sra Exp $";
+/* from getgrent.c 8.2 (Berkeley) 3/21/94"; */
+/* from BSDI Id: getgrent.c,v 2.8 1996/05/28 18:15:14 bostic Exp $ */
+#endif /* LIBC_SCCS and not lint */
+
+/* Imports */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <irs.h>
+#include <isc/memcluster.h>
+
+#include "port_after.h"
+
+#include <isc/misc.h>
+#include "irs_p.h"
+#include "lcl_p.h"
+
+#define MAXALIASES 35
+#define MAXADDRSIZE 4
+
+struct pvt {
+ FILE * fp;
+ char line[BUFSIZ+1];
+ struct nwent net;
+ char * aliases[MAXALIASES];
+ char addr[MAXADDRSIZE];
+ struct __res_state * res;
+ void (*free_res)(void *);
+};
+
+/* Forward */
+
+static void nw_close(struct irs_nw *);
+static struct nwent * nw_byname(struct irs_nw *, const char *, int);
+static struct nwent * nw_byaddr(struct irs_nw *, void *, int, int);
+static struct nwent * nw_next(struct irs_nw *);
+static void nw_rewind(struct irs_nw *);
+static void nw_minimize(struct irs_nw *);
+static struct __res_state * nw_res_get(struct irs_nw *this);
+static void nw_res_set(struct irs_nw *this,
+ struct __res_state *res,
+ void (*free_res)(void *));
+
+static int init(struct irs_nw *this);
+
+/* Portability. */
+
+#ifndef SEEK_SET
+# define SEEK_SET 0
+#endif
+
+/* Public */
+
+struct irs_nw *
+irs_lcl_nw(struct irs_acc *this) {
+ struct irs_nw *nw;
+ struct pvt *pvt;
+
+ UNUSED(this);
+
+ if (!(pvt = memget(sizeof *pvt))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ if (!(nw = memget(sizeof *nw))) {
+ memput(pvt, sizeof *pvt);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(nw, 0x5e, sizeof *nw);
+ nw->private = pvt;
+ nw->close = nw_close;
+ nw->byname = nw_byname;
+ nw->byaddr = nw_byaddr;
+ nw->next = nw_next;
+ nw->rewind = nw_rewind;
+ nw->minimize = nw_minimize;
+ nw->res_get = nw_res_get;
+ nw->res_set = nw_res_set;
+ return (nw);
+}
+
+/* Methods */
+
+static void
+nw_close(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ nw_minimize(this);
+ if (pvt->res && pvt->free_res)
+ (*pvt->free_res)(pvt->res);
+ if (pvt->fp)
+ (void)fclose(pvt->fp);
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+static struct nwent *
+nw_byaddr(struct irs_nw *this, void *net, int length, int type) {
+ struct nwent *p;
+
+ if (init(this) == -1)
+ return(NULL);
+
+ nw_rewind(this);
+ while ((p = nw_next(this)) != NULL)
+ if (p->n_addrtype == type && p->n_length == length)
+ if (bitncmp(p->n_addr, net, length) == 0)
+ break;
+ return (p);
+}
+
+static struct nwent *
+nw_byname(struct irs_nw *this, const char *name, int type) {
+ struct nwent *p;
+ char **ap;
+
+ if (init(this) == -1)
+ return(NULL);
+
+ nw_rewind(this);
+ while ((p = nw_next(this)) != NULL) {
+ if (ns_samename(p->n_name, name) == 1 &&
+ p->n_addrtype == type)
+ break;
+ for (ap = p->n_aliases; *ap; ap++)
+ if ((ns_samename(*ap, name) == 1) &&
+ (p->n_addrtype == type))
+ goto found;
+ }
+ found:
+ return (p);
+}
+
+static void
+nw_rewind(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->fp) {
+ if (fseek(pvt->fp, 0L, SEEK_SET) == 0)
+ return;
+ (void)fclose(pvt->fp);
+ }
+ if (!(pvt->fp = fopen(_PATH_NETWORKS, "r")))
+ return;
+ if (fcntl(fileno(pvt->fp), F_SETFD, 1) < 0) {
+ (void)fclose(pvt->fp);
+ pvt->fp = NULL;
+ }
+}
+
+static struct nwent *
+nw_next(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ struct nwent *ret = NULL;
+ char *p, *cp, **q;
+ char *bufp, *ndbuf, *dbuf = NULL;
+ int c, bufsiz, offset = 0;
+
+ if (init(this) == -1)
+ return(NULL);
+
+ if (pvt->fp == NULL)
+ nw_rewind(this);
+ if (pvt->fp == NULL) {
+ RES_SET_H_ERRNO(pvt->res, NETDB_INTERNAL);
+ return (NULL);
+ }
+ bufp = pvt->line;
+ bufsiz = sizeof(pvt->line);
+
+ again:
+ p = fgets(bufp + offset, bufsiz - offset, pvt->fp);
+ if (p == NULL)
+ goto cleanup;
+ if (!strchr(p, '\n') && !feof(pvt->fp)) {
+#define GROWBUF 1024
+ /* allocate space for longer line */
+ if (dbuf == NULL) {
+ if ((ndbuf = malloc(bufsiz + GROWBUF)) != NULL)
+ strcpy(ndbuf, bufp);
+ } else
+ ndbuf = realloc(dbuf, bufsiz + GROWBUF);
+ if (ndbuf) {
+ dbuf = ndbuf;
+ bufp = dbuf;
+ bufsiz += GROWBUF;
+ offset = strlen(dbuf);
+ } else {
+ /* allocation failed; skip this long line */
+ while ((c = getc(pvt->fp)) != EOF)
+ if (c == '\n')
+ break;
+ if (c != EOF)
+ ungetc(c, pvt->fp);
+ }
+ goto again;
+ }
+
+ p -= offset;
+ offset = 0;
+
+ if (*p == '#')
+ goto again;
+
+ cp = strpbrk(p, "#\n");
+ if (cp != NULL)
+ *cp = '\0';
+ pvt->net.n_name = p;
+ cp = strpbrk(p, " \t");
+ if (cp == NULL)
+ goto again;
+ *cp++ = '\0';
+ while (*cp == ' ' || *cp == '\t')
+ cp++;
+ p = strpbrk(cp, " \t");
+ if (p != NULL)
+ *p++ = '\0';
+ pvt->net.n_length = inet_net_pton(AF_INET, cp, pvt->addr,
+ sizeof pvt->addr);
+ if (pvt->net.n_length < 0)
+ goto again;
+ pvt->net.n_addrtype = AF_INET;
+ pvt->net.n_addr = pvt->addr;
+ q = pvt->net.n_aliases = pvt->aliases;
+ if (p != NULL) {
+ cp = p;
+ while (cp && *cp) {
+ if (*cp == ' ' || *cp == '\t') {
+ cp++;
+ continue;
+ }
+ if (q < &pvt->aliases[MAXALIASES - 1])
+ *q++ = cp;
+ cp = strpbrk(cp, " \t");
+ if (cp != NULL)
+ *cp++ = '\0';
+ }
+ }
+ *q = NULL;
+ ret = &pvt->net;
+
+ cleanup:
+ if (dbuf)
+ free(dbuf);
+
+ return (ret);
+}
+
+static void
+nw_minimize(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->res)
+ res_nclose(pvt->res);
+ if (pvt->fp != NULL) {
+ (void)fclose(pvt->fp);
+ pvt->fp = NULL;
+ }
+}
+
+static struct __res_state *
+nw_res_get(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (!pvt->res) {
+ struct __res_state *res;
+ res = (struct __res_state *)malloc(sizeof *res);
+ if (!res) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(res, 0, sizeof *res);
+ nw_res_set(this, res, free);
+ }
+
+ return (pvt->res);
+}
+
+static void
+nw_res_set(struct irs_nw *this, struct __res_state *res,
+ void (*free_res)(void *)) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->res && pvt->free_res) {
+ res_nclose(pvt->res);
+ (*pvt->free_res)(pvt->res);
+ }
+
+ pvt->res = res;
+ pvt->free_res = free_res;
+}
+
+static int
+init(struct irs_nw *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (!pvt->res && !nw_res_get(this))
+ return (-1);
+ if (((pvt->res->options & RES_INIT) == 0U) &&
+ res_ninit(pvt->res) == -1)
+ return (-1);
+ return (0);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/lcl_p.h b/usr/src/lib/libresolv2_joy/common/irs/lcl_p.h
new file mode 100644
index 0000000000..e3f4f009cb
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/lcl_p.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: lcl_p.h,v 1.3 2005/04/27 04:56:31 sra Exp $
+ */
+
+/*! \file
+ * \brief
+ * lcl_p.h - private include file for the local accessor functions.
+ */
+
+#ifndef _LCL_P_H_INCLUDED
+#define _LCL_P_H_INCLUDED
+
+/*%
+ * Object state.
+ */
+struct lcl_p {
+ struct __res_state * res;
+ void (*free_res) __P((void *));
+};
+
+/*
+ * Externs.
+ */
+
+extern struct irs_acc * irs_lcl_acc __P((const char *));
+extern struct irs_gr * irs_lcl_gr __P((struct irs_acc *));
+extern struct irs_pw * irs_lcl_pw __P((struct irs_acc *));
+extern struct irs_sv * irs_lcl_sv __P((struct irs_acc *));
+extern struct irs_pr * irs_lcl_pr __P((struct irs_acc *));
+extern struct irs_ho * irs_lcl_ho __P((struct irs_acc *));
+extern struct irs_nw * irs_lcl_nw __P((struct irs_acc *));
+extern struct irs_ng * irs_lcl_ng __P((struct irs_acc *));
+
+#endif /*_LCL_P_H_INCLUDED*/
diff --git a/usr/src/lib/libresolv2_joy/common/irs/lcl_pr.c b/usr/src/lib/libresolv2_joy/common/irs/lcl_pr.c
new file mode 100644
index 0000000000..e1538530eb
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/lcl_pr.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 1989, 1993, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: lcl_pr.c,v 1.4 2006/03/09 23:57:56 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/* extern */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <irs.h>
+#include <isc/memcluster.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "lcl_p.h"
+
+#ifndef _PATH_PROTOCOLS
+#define _PATH_PROTOCOLS "/etc/protocols"
+#endif
+#define MAXALIASES 35
+
+/* Types */
+
+struct pvt {
+ FILE * fp;
+ char line[BUFSIZ+1];
+ char * dbuf;
+ struct protoent proto;
+ char * proto_aliases[MAXALIASES];
+};
+
+/* Forward */
+
+static void pr_close(struct irs_pr *);
+static struct protoent * pr_next(struct irs_pr *);
+static struct protoent * pr_byname(struct irs_pr *, const char *);
+static struct protoent * pr_bynumber(struct irs_pr *, int);
+static void pr_rewind(struct irs_pr *);
+static void pr_minimize(struct irs_pr *);
+
+/* Portability. */
+
+#ifndef SEEK_SET
+# define SEEK_SET 0
+#endif
+
+/* Public */
+
+struct irs_pr *
+irs_lcl_pr(struct irs_acc *this) {
+ struct irs_pr *pr;
+ struct pvt *pvt;
+
+ if (!(pr = memget(sizeof *pr))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ if (!(pvt = memget(sizeof *pvt))) {
+ memput(pr, sizeof *this);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ pr->private = pvt;
+ pr->close = pr_close;
+ pr->byname = pr_byname;
+ pr->bynumber = pr_bynumber;
+ pr->next = pr_next;
+ pr->rewind = pr_rewind;
+ pr->minimize = pr_minimize;
+ pr->res_get = NULL;
+ pr->res_set = NULL;
+ return (pr);
+}
+
+/* Methods */
+
+static void
+pr_close(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->fp)
+ (void) fclose(pvt->fp);
+ if (pvt->dbuf)
+ free(pvt->dbuf);
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+static struct protoent *
+pr_byname(struct irs_pr *this, const char *name) {
+
+ struct protoent *p;
+ char **cp;
+
+ pr_rewind(this);
+ while ((p = pr_next(this))) {
+ if (!strcmp(p->p_name, name))
+ goto found;
+ for (cp = p->p_aliases; *cp; cp++)
+ if (!strcmp(*cp, name))
+ goto found;
+ }
+ found:
+ return (p);
+}
+
+static struct protoent *
+pr_bynumber(struct irs_pr *this, int proto) {
+ struct protoent *p;
+
+ pr_rewind(this);
+ while ((p = pr_next(this)))
+ if (p->p_proto == proto)
+ break;
+ return (p);
+}
+
+static void
+pr_rewind(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->fp) {
+ if (fseek(pvt->fp, 0L, SEEK_SET) == 0)
+ return;
+ (void)fclose(pvt->fp);
+ }
+ if (!(pvt->fp = fopen(_PATH_PROTOCOLS, "r" )))
+ return;
+ if (fcntl(fileno(pvt->fp), F_SETFD, 1) < 0) {
+ (void)fclose(pvt->fp);
+ pvt->fp = NULL;
+ }
+}
+
+static struct protoent *
+pr_next(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+ char *p, *cp, **q;
+ char *bufp, *ndbuf, *dbuf = NULL;
+ int c, bufsiz, offset;
+
+ if (!pvt->fp)
+ pr_rewind(this);
+ if (!pvt->fp)
+ return (NULL);
+ if (pvt->dbuf) {
+ free(pvt->dbuf);
+ pvt->dbuf = NULL;
+ }
+ bufp = pvt->line;
+ bufsiz = BUFSIZ;
+ offset = 0;
+ again:
+ if ((p = fgets(bufp + offset, bufsiz - offset, pvt->fp)) == NULL) {
+ if (dbuf)
+ free(dbuf);
+ return (NULL);
+ }
+ if (!strchr(p, '\n') && !feof(pvt->fp)) {
+#define GROWBUF 1024
+ /* allocate space for longer line */
+ if (dbuf == NULL) {
+ if ((ndbuf = malloc(bufsiz + GROWBUF)) != NULL)
+ strcpy(ndbuf, bufp);
+ } else
+ ndbuf = realloc(dbuf, bufsiz + GROWBUF);
+ if (ndbuf) {
+ dbuf = ndbuf;
+ bufp = dbuf;
+ bufsiz += GROWBUF;
+ offset = strlen(dbuf);
+ } else {
+ /* allocation failed; skip this long line */
+ while ((c = getc(pvt->fp)) != EOF)
+ if (c == '\n')
+ break;
+ if (c != EOF)
+ ungetc(c, pvt->fp);
+ }
+ goto again;
+ }
+
+ p -= offset;
+ offset = 0;
+
+ if (*p == '#')
+ goto again;
+ cp = strpbrk(p, "#\n");
+ if (cp != NULL)
+ *cp = '\0';
+ pvt->proto.p_name = p;
+ cp = strpbrk(p, " \t");
+ if (cp == NULL)
+ goto again;
+ *cp++ = '\0';
+ while (*cp == ' ' || *cp == '\t')
+ cp++;
+ p = strpbrk(cp, " \t");
+ if (p != NULL)
+ *p++ = '\0';
+ pvt->proto.p_proto = atoi(cp);
+ q = pvt->proto.p_aliases = pvt->proto_aliases;
+ if (p != NULL) {
+ cp = p;
+ while (cp && *cp) {
+ if (*cp == ' ' || *cp == '\t') {
+ cp++;
+ continue;
+ }
+ if (q < &pvt->proto_aliases[MAXALIASES - 1])
+ *q++ = cp;
+ cp = strpbrk(cp, " \t");
+ if (cp != NULL)
+ *cp++ = '\0';
+ }
+ }
+ *q = NULL;
+ pvt->dbuf = dbuf;
+ return (&pvt->proto);
+}
+
+static void
+pr_minimize(struct irs_pr *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->fp != NULL) {
+ (void)fclose(pvt->fp);
+ pvt->fp = NULL;
+ }
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/lcl_sv.c b/usr/src/lib/libresolv2_joy/common/irs/lcl_sv.c
new file mode 100644
index 0000000000..ad6526430c
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/lcl_sv.c
@@ -0,0 +1,432 @@
+/*
+ * Copyright (c) 1989, 1993, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: lcl_sv.c,v 1.4 2005/04/27 04:56:31 sra Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/* extern */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#ifdef IRS_LCL_SV_DB
+#include <db.h>
+#endif
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include <irs.h>
+#include <isc/memcluster.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "lcl_p.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) ((size_t)sprintf x)
+#endif
+
+/* Types */
+
+struct pvt {
+#ifdef IRS_LCL_SV_DB
+ DB * dbh;
+ int dbf;
+#endif
+ struct lcl_sv sv;
+};
+
+/* Forward */
+
+static void sv_close(struct irs_sv*);
+static struct servent * sv_next(struct irs_sv *);
+static struct servent * sv_byname(struct irs_sv *, const char *,
+ const char *);
+static struct servent * sv_byport(struct irs_sv *, int, const char *);
+static void sv_rewind(struct irs_sv *);
+static void sv_minimize(struct irs_sv *);
+/*global*/ struct servent * irs_lclsv_fnxt(struct lcl_sv *);
+#ifdef IRS_LCL_SV_DB
+static struct servent * sv_db_rec(struct lcl_sv *, DBT *, DBT *);
+#endif
+
+/* Portability */
+
+#ifndef SEEK_SET
+# define SEEK_SET 0
+#endif
+
+/* Public */
+
+struct irs_sv *
+irs_lcl_sv(struct irs_acc *this) {
+ struct irs_sv *sv;
+ struct pvt *pvt;
+
+ UNUSED(this);
+
+ if ((sv = memget(sizeof *sv)) == NULL) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(sv, 0x5e, sizeof *sv);
+ if ((pvt = memget(sizeof *pvt)) == NULL) {
+ memput(sv, sizeof *sv);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(pvt, 0, sizeof *pvt);
+ sv->private = pvt;
+ sv->close = sv_close;
+ sv->next = sv_next;
+ sv->byname = sv_byname;
+ sv->byport = sv_byport;
+ sv->rewind = sv_rewind;
+ sv->minimize = sv_minimize;
+ sv->res_get = NULL;
+ sv->res_set = NULL;
+#ifdef IRS_LCL_SV_DB
+ pvt->dbf = R_FIRST;
+#endif
+ return (sv);
+}
+
+/* Methods */
+
+static void
+sv_close(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+#ifdef IRS_LCL_SV_DB
+ if (pvt->dbh != NULL)
+ (*pvt->dbh->close)(pvt->dbh);
+#endif
+ if (pvt->sv.fp)
+ fclose(pvt->sv.fp);
+ memput(pvt, sizeof *pvt);
+ memput(this, sizeof *this);
+}
+
+static struct servent *
+sv_byname(struct irs_sv *this, const char *name, const char *proto) {
+#ifdef IRS_LCL_SV_DB
+ struct pvt *pvt = (struct pvt *)this->private;
+#endif
+ struct servent *p;
+ char **cp;
+
+ sv_rewind(this);
+#ifdef IRS_LCL_SV_DB
+ if (pvt->dbh != NULL) {
+ DBT key, data;
+
+ /* Note that (sizeof "/") == 2. */
+ if ((strlen(name) + sizeof "/" + proto ? strlen(proto) : 0)
+ > sizeof pvt->sv.line)
+ goto try_local;
+ key.data = pvt->sv.line;
+ key.size = SPRINTF((pvt->sv.line, "%s/%s", name,
+ proto ? proto : "")) + 1;
+ if (proto != NULL) {
+ if ((*pvt->dbh->get)(pvt->dbh, &key, &data, 0) != 0)
+ return (NULL);
+ } else if ((*pvt->dbh->seq)(pvt->dbh, &key, &data, R_CURSOR)
+ != 0)
+ return (NULL);
+ return (sv_db_rec(&pvt->sv, &key, &data));
+ }
+ try_local:
+#endif
+
+ while ((p = sv_next(this))) {
+ if (strcmp(name, p->s_name) == 0)
+ goto gotname;
+ for (cp = p->s_aliases; *cp; cp++)
+ if (strcmp(name, *cp) == 0)
+ goto gotname;
+ continue;
+ gotname:
+ if (proto == NULL || strcmp(p->s_proto, proto) == 0)
+ break;
+ }
+ return (p);
+}
+
+static struct servent *
+sv_byport(struct irs_sv *this, int port, const char *proto) {
+#ifdef IRS_LCL_SV_DB
+ struct pvt *pvt = (struct pvt *)this->private;
+#endif
+ struct servent *p;
+
+ sv_rewind(this);
+#ifdef IRS_LCL_SV_DB
+ if (pvt->dbh != NULL) {
+ DBT key, data;
+ u_short *ports;
+
+ ports = (u_short *)pvt->sv.line;
+ ports[0] = 0;
+ ports[1] = port;
+ key.data = ports;
+ key.size = sizeof(u_short) * 2;
+ if (proto && *proto) {
+ strncpy((char *)ports + key.size, proto,
+ BUFSIZ - key.size);
+ key.size += strlen((char *)ports + key.size) + 1;
+ if ((*pvt->dbh->get)(pvt->dbh, &key, &data, 0) != 0)
+ return (NULL);
+ } else {
+ if ((*pvt->dbh->seq)(pvt->dbh, &key, &data, R_CURSOR)
+ != 0)
+ return (NULL);
+ }
+ return (sv_db_rec(&pvt->sv, &key, &data));
+ }
+#endif
+ while ((p = sv_next(this))) {
+ if (p->s_port != port)
+ continue;
+ if (proto == NULL || strcmp(p->s_proto, proto) == 0)
+ break;
+ }
+ return (p);
+}
+
+static void
+sv_rewind(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+ if (pvt->sv.fp) {
+ if (fseek(pvt->sv.fp, 0L, SEEK_SET) == 0)
+ return;
+ (void)fclose(pvt->sv.fp);
+ pvt->sv.fp = NULL;
+ }
+#ifdef IRS_LCL_SV_DB
+ pvt->dbf = R_FIRST;
+ if (pvt->dbh != NULL)
+ return;
+ pvt->dbh = dbopen(_PATH_SERVICES_DB, O_RDONLY,O_RDONLY,DB_BTREE, NULL);
+ if (pvt->dbh != NULL) {
+ if (fcntl((*pvt->dbh->fd)(pvt->dbh), F_SETFD, 1) < 0) {
+ (*pvt->dbh->close)(pvt->dbh);
+ pvt->dbh = NULL;
+ }
+ return;
+ }
+#endif
+ if ((pvt->sv.fp = fopen(_PATH_SERVICES, "r")) == NULL)
+ return;
+ if (fcntl(fileno(pvt->sv.fp), F_SETFD, 1) < 0) {
+ (void)fclose(pvt->sv.fp);
+ pvt->sv.fp = NULL;
+ }
+}
+
+static struct servent *
+sv_next(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+#ifdef IRS_LCL_SV_DB
+ if (pvt->dbh == NULL && pvt->sv.fp == NULL)
+#else
+ if (pvt->sv.fp == NULL)
+#endif
+ sv_rewind(this);
+
+#ifdef IRS_LCL_SV_DB
+ if (pvt->dbh != NULL) {
+ DBT key, data;
+
+ while ((*pvt->dbh->seq)(pvt->dbh, &key, &data, pvt->dbf) == 0){
+ pvt->dbf = R_NEXT;
+ if (((char *)key.data)[0])
+ continue;
+ return (sv_db_rec(&pvt->sv, &key, &data));
+ }
+ }
+#endif
+
+ if (pvt->sv.fp == NULL)
+ return (NULL);
+ return (irs_lclsv_fnxt(&pvt->sv));
+}
+
+static void
+sv_minimize(struct irs_sv *this) {
+ struct pvt *pvt = (struct pvt *)this->private;
+
+#ifdef IRS_LCL_SV_DB
+ if (pvt->dbh != NULL) {
+ (*pvt->dbh->close)(pvt->dbh);
+ pvt->dbh = NULL;
+ }
+#endif
+ if (pvt->sv.fp != NULL) {
+ (void)fclose(pvt->sv.fp);
+ pvt->sv.fp = NULL;
+ }
+}
+
+/* Quasipublic. */
+
+struct servent *
+irs_lclsv_fnxt(struct lcl_sv *sv) {
+ char *p, *cp, **q;
+
+ again:
+ if ((p = fgets(sv->line, BUFSIZ, sv->fp)) == NULL)
+ return (NULL);
+ if (*p == '#')
+ goto again;
+ sv->serv.s_name = p;
+ while (*p && *p != '\n' && *p != ' ' && *p != '\t' && *p != '#')
+ ++p;
+ if (*p == '\0' || *p == '#' || *p == '\n')
+ goto again;
+ *p++ = '\0';
+ while (*p == ' ' || *p == '\t')
+ p++;
+ if (*p == '\0' || *p == '#' || *p == '\n')
+ goto again;
+ sv->serv.s_port = htons((u_short)strtol(p, &cp, 10));
+ if (cp == p || (*cp != '/' && *cp != ','))
+ goto again;
+ p = cp + 1;
+ sv->serv.s_proto = p;
+
+ q = sv->serv.s_aliases = sv->serv_aliases;
+
+ while (*p && *p != '\n' && *p != ' ' && *p != '\t' && *p != '#')
+ ++p;
+
+ while (*p == ' ' || *p == '\t') {
+ *p++ = '\0';
+ while (*p == ' ' || *p == '\t')
+ ++p;
+ if (*p == '\0' || *p == '#' || *p == '\n')
+ break;
+ if (q < &sv->serv_aliases[IRS_SV_MAXALIASES - 1])
+ *q++ = p;
+ while (*p && *p != '\n' && *p != ' ' && *p != '\t' && *p != '#')
+ ++p;
+ }
+
+ *p = '\0';
+ *q = NULL;
+ return (&sv->serv);
+}
+
+/* Private. */
+
+#ifdef IRS_LCL_SV_DB
+static struct servent *
+sv_db_rec(struct lcl_sv *sv, DBT *key, DBT *data) {
+ char *p, **q;
+ int n;
+
+ p = data->data;
+ p[data->size - 1] = '\0'; /*%< should be, but we depend on it */
+ if (((char *)key->data)[0] == '\0') {
+ if (key->size < sizeof(u_short)*2 || data->size < 2)
+ return (NULL);
+ sv->serv.s_port = ((u_short *)key->data)[1];
+ n = strlen(p) + 1;
+ if ((size_t)n > sizeof(sv->line)) {
+ n = sizeof(sv->line);
+ }
+ memcpy(sv->line, p, n);
+ sv->serv.s_name = sv->line;
+ if ((sv->serv.s_proto = strchr(sv->line, '/')) != NULL)
+ *(sv->serv.s_proto)++ = '\0';
+ p += n;
+ data->size -= n;
+ } else {
+ if (data->size < sizeof(u_short) + 1)
+ return (NULL);
+ if (key->size > sizeof(sv->line))
+ key->size = sizeof(sv->line);
+ ((char *)key->data)[key->size - 1] = '\0';
+ memcpy(sv->line, key->data, key->size);
+ sv->serv.s_name = sv->line;
+ if ((sv->serv.s_proto = strchr(sv->line, '/')) != NULL)
+ *(sv->serv.s_proto)++ = '\0';
+ sv->serv.s_port = *(u_short *)data->data;
+ p += sizeof(u_short);
+ data->size -= sizeof(u_short);
+ }
+ q = sv->serv.s_aliases = sv->serv_aliases;
+ while (data->size > 0 && q < &sv->serv_aliases[IRS_SV_MAXALIASES - 1]) {
+
+ *q++ = p;
+ n = strlen(p) + 1;
+ data->size -= n;
+ p += n;
+ }
+ *q = NULL;
+ return (&sv->serv);
+}
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/nis.c b/usr/src/lib/libresolv2_joy/common/irs/nis.c
new file mode 100644
index 0000000000..ac1796543c
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/nis.c
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: nis.c,v 1.3 2005/04/27 04:56:32 sra Exp $";
+#endif
+
+/* Imports */
+
+#include "port_before.h"
+
+#ifdef WANT_IRS_NIS
+
+#include <rpc/rpc.h>
+#include <rpc/xdr.h>
+#include <rpcsvc/yp_prot.h>
+#include <rpcsvc/ypclnt.h>
+
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#ifdef T_NULL
+#undef T_NULL /* Silence re-definition warning of T_NULL. */
+#endif
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#include <isc/memcluster.h>
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "hesiod.h"
+#include "nis_p.h"
+
+/* Forward */
+
+static void nis_close(struct irs_acc *);
+static struct __res_state * nis_res_get(struct irs_acc *);
+static void nis_res_set(struct irs_acc *, struct __res_state *,
+ void (*)(void *));
+
+/* Public */
+
+struct irs_acc *
+irs_nis_acc(const char *options) {
+ struct nis_p *nis;
+ struct irs_acc *acc;
+ char *domain;
+
+ UNUSED(options);
+
+ if (yp_get_default_domain(&domain) != 0)
+ return (NULL);
+ if (!(nis = memget(sizeof *nis))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(nis, 0, sizeof *nis);
+ if (!(acc = memget(sizeof *acc))) {
+ memput(nis, sizeof *nis);
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(acc, 0x5e, sizeof *acc);
+ acc->private = nis;
+ nis->domain = strdup(domain);
+#ifdef WANT_IRS_GR
+ acc->gr_map = irs_nis_gr;
+#else
+ acc->gr_map = NULL;
+#endif
+#ifdef WANT_IRS_PW
+ acc->pw_map = irs_nis_pw;
+#else
+ acc->pw_map = NULL;
+#endif
+ acc->sv_map = irs_nis_sv;
+ acc->pr_map = irs_nis_pr;
+ acc->ho_map = irs_nis_ho;
+ acc->nw_map = irs_nis_nw;
+ acc->ng_map = irs_nis_ng;
+ acc->res_get = nis_res_get;
+ acc->res_set = nis_res_set;
+ acc->close = nis_close;
+ return (acc);
+}
+
+/* Methods */
+
+static struct __res_state *
+nis_res_get(struct irs_acc *this) {
+ struct nis_p *nis = (struct nis_p *)this->private;
+
+ if (nis->res == NULL) {
+ struct __res_state *res;
+ res = (struct __res_state *)malloc(sizeof *res);
+ if (res == NULL)
+ return (NULL);
+ memset(res, 0, sizeof *res);
+ nis_res_set(this, res, free);
+ }
+
+ if ((nis->res->options & RES_INIT) == 0 &&
+ res_ninit(nis->res) < 0)
+ return (NULL);
+
+ return (nis->res);
+}
+
+static void
+nis_res_set(struct irs_acc *this, struct __res_state *res,
+ void (*free_res)(void *)) {
+ struct nis_p *nis = (struct nis_p *)this->private;
+
+ if (nis->res && nis->free_res) {
+ res_nclose(nis->res);
+ (*nis->free_res)(nis->res);
+ }
+
+ nis->res = res;
+ nis->free_res = free_res;
+}
+
+static void
+nis_close(struct irs_acc *this) {
+ struct nis_p *nis = (struct nis_p *)this->private;
+
+ if (nis->res && nis->free_res)
+ (*nis->free_res)(nis->res);
+ free(nis->domain);
+ memput(nis, sizeof *nis);
+ memput(this, sizeof *this);
+}
+
+#endif /*WANT_IRS_NIS*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/nis_p.h b/usr/src/lib/libresolv2_joy/common/irs/nis_p.h
new file mode 100644
index 0000000000..70e2948d67
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/nis_p.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: nis_p.h,v 1.3 2005/04/27 04:56:33 sra Exp $
+ */
+
+/*! \file
+ * \brief
+ * nis_p.h - private include file for the NIS functions.
+ */
+
+/*%
+ * Object state.
+ */
+struct nis_p {
+ char * domain;
+ struct __res_state * res;
+ void (*free_res) __P((void *));
+};
+
+
+/*
+ * Methods.
+ */
+
+extern struct irs_gr * irs_nis_gr __P((struct irs_acc *));
+extern struct irs_pw * irs_nis_pw __P((struct irs_acc *));
+extern struct irs_sv * irs_nis_sv __P((struct irs_acc *));
+extern struct irs_pr * irs_nis_pr __P((struct irs_acc *));
+extern struct irs_ho * irs_nis_ho __P((struct irs_acc *));
+extern struct irs_nw * irs_nis_nw __P((struct irs_acc *));
+extern struct irs_ng * irs_nis_ng __P((struct irs_acc *));
diff --git a/usr/src/lib/libresolv2_joy/common/irs/nul_ng.c b/usr/src/lib/libresolv2_joy/common/irs/nul_ng.c
new file mode 100644
index 0000000000..504813bde1
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/nul_ng.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: nul_ng.c,v 1.3 2005/04/27 04:56:34 sra Exp $";
+#endif
+
+/*! \file
+ * \brief
+ * nul_ng.c - the netgroup accessor null map
+ */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <netdb.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include <irs.h>
+#include <isc/memcluster.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+#include "hesiod.h"
+#include "dns_p.h"
+
+/* Forward. */
+
+static void ng_close(struct irs_ng *);
+static int ng_next(struct irs_ng *, const char **,
+ const char **, const char **);
+static int ng_test(struct irs_ng *,
+ const char *, const char *,
+ const char *, const char *);
+static void ng_rewind(struct irs_ng *, const char *);
+static void ng_minimize(struct irs_ng *);
+
+/* Public. */
+
+struct irs_ng *
+irs_nul_ng(struct irs_acc *this) {
+ struct irs_ng *ng;
+
+ UNUSED(this);
+
+ if (!(ng = memget(sizeof *ng))) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ memset(ng, 0x5e, sizeof *ng);
+ ng->private = NULL;
+ ng->close = ng_close;
+ ng->next = ng_next;
+ ng->test = ng_test;
+ ng->rewind = ng_rewind;
+ ng->minimize = ng_minimize;
+ return (ng);
+}
+
+/* Methods. */
+
+static void
+ng_close(struct irs_ng *this) {
+ memput(this, sizeof *this);
+}
+
+/* ARGSUSED */
+static int
+ng_next(struct irs_ng *this, const char **host, const char **user,
+ const char **domain)
+{
+ UNUSED(this);
+ UNUSED(host);
+ UNUSED(user);
+ UNUSED(domain);
+ errno = ENOENT;
+ return (-1);
+}
+
+static int
+ng_test(struct irs_ng *this, const char *name,
+ const char *user, const char *host, const char *domain)
+{
+ UNUSED(this);
+ UNUSED(name);
+ UNUSED(user);
+ UNUSED(host);
+ UNUSED(domain);
+ errno = ENODEV;
+ return (-1);
+}
+
+static void
+ng_rewind(struct irs_ng *this, const char *netgroup) {
+ UNUSED(this);
+ UNUSED(netgroup);
+ /* NOOP */
+}
+
+static void
+ng_minimize(struct irs_ng *this) {
+ UNUSED(this);
+ /* NOOP */
+}
diff --git a/usr/src/lib/libresolv2_joy/common/irs/pathnames.h b/usr/src/lib/libresolv2_joy/common/irs/pathnames.h
new file mode 100644
index 0000000000..1646842155
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/pathnames.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: pathnames.h,v 1.3 2005/04/27 04:56:34 sra Exp $
+ */
+
+#ifndef _PATH_IRS_CONF
+#define _PATH_IRS_CONF "/etc/irs.conf"
+#endif
+
+#ifndef _PATH_NETWORKS
+#define _PATH_NETWORKS "/etc/networks"
+#endif
+
+#ifndef _PATH_GROUP
+#define _PATH_GROUP "/etc/group"
+#endif
+
+#ifndef _PATH_NETGROUP
+#define _PATH_NETGROUP "/etc/netgroup"
+#endif
+
+#ifndef _PATH_SERVICES
+#define _PATH_SERVICES "/etc/services"
+#endif
+
+#ifdef IRS_LCL_SV_DB
+#ifndef _PATH_SERVICES_DB
+#define _PATH_SERVICES_DB _PATH_SERVICES ".db"
+#endif
+#endif
+
+#ifndef _PATH_HESIOD_CONF
+#define _PATH_HESIOD_CONF "/etc/hesiod.conf"
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/irs/util.c b/usr/src/lib/libresolv2_joy/common/irs/util.c
new file mode 100644
index 0000000000..9c70eabddc
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/irs/util.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: util.c,v 1.3 2005/04/27 04:56:34 sra Exp $";
+#endif
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include <irs.h>
+
+#include "port_after.h"
+
+#include "irs_p.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) sprintf x
+#endif
+
+void
+map_v4v6_address(const char *src, char *dst) {
+ u_char *p = (u_char *)dst;
+ char tmp[NS_INADDRSZ];
+ int i;
+
+ /* Stash a temporary copy so our caller can update in place. */
+ memcpy(tmp, src, NS_INADDRSZ);
+ /* Mark this ipv6 addr as a mapped ipv4. */
+ for (i = 0; i < 10; i++)
+ *p++ = 0x00;
+ *p++ = 0xff;
+ *p++ = 0xff;
+ /* Retrieve the saved copy and we're done. */
+ memcpy((void*)p, tmp, NS_INADDRSZ);
+}
+
+int
+make_group_list(struct irs_gr *this, const char *name,
+ gid_t basegid, gid_t *groups, int *ngroups)
+{
+ struct group *grp;
+ int i, ng;
+ int ret, maxgroups;
+
+ ret = -1;
+ ng = 0;
+ maxgroups = *ngroups;
+ /*
+ * When installing primary group, duplicate it;
+ * the first element of groups is the effective gid
+ * and will be overwritten when a setgid file is executed.
+ */
+ if (ng >= maxgroups)
+ goto done;
+ groups[ng++] = basegid;
+ if (ng >= maxgroups)
+ goto done;
+ groups[ng++] = basegid;
+ /*
+ * Scan the group file to find additional groups.
+ */
+ (*this->rewind)(this);
+ while ((grp = (*this->next)(this)) != NULL) {
+ if ((gid_t)grp->gr_gid == basegid)
+ continue;
+ for (i = 0; grp->gr_mem[i]; i++) {
+ if (!strcmp(grp->gr_mem[i], name)) {
+ if (ng >= maxgroups)
+ goto done;
+ groups[ng++] = grp->gr_gid;
+ break;
+ }
+ }
+ }
+ ret = 0;
+ done:
+ *ngroups = ng;
+ return (ret);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/assertions.c b/usr/src/lib/libresolv2_joy/common/isc/assertions.c
new file mode 100644
index 0000000000..b71e5a32d3
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/assertions.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 1997, 1999, 2001 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: assertions.c,v 1.5 2008/11/14 02:36:51 marka Exp $";
+#endif
+
+#include "port_before.h"
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <isc/assertions.h>
+
+#include "port_after.h"
+
+/*
+ * Forward.
+ */
+
+static void default_assertion_failed(const char *, int, assertion_type,
+ const char *, int);
+
+/*
+ * Public.
+ */
+
+assertion_failure_callback __assertion_failed = default_assertion_failed;
+
+void
+set_assertion_failure_callback(assertion_failure_callback f) {
+ if (f == NULL)
+ __assertion_failed = default_assertion_failed;
+ else
+ __assertion_failed = f;
+}
+
+const char *
+assertion_type_to_text(assertion_type type) {
+ const char *result;
+
+ switch (type) {
+ case assert_require:
+ result = "REQUIRE";
+ break;
+ case assert_ensure:
+ result = "ENSURE";
+ break;
+ case assert_insist:
+ result = "INSIST";
+ break;
+ case assert_invariant:
+ result = "INVARIANT";
+ break;
+ default:
+ result = NULL;
+ }
+ return (result);
+}
+
+/*
+ * Private.
+ */
+
+/* coverity[+kill] */
+static void
+default_assertion_failed(const char *file, int line, assertion_type type,
+ const char *cond, int print_errno)
+{
+ fprintf(stderr, "%s:%d: %s(%s)%s%s failed.\n",
+ file, line, assertion_type_to_text(type), cond,
+ (print_errno) ? ": " : "",
+ (print_errno) ? strerror(errno) : "");
+ abort();
+ /* NOTREACHED */
+}
+
+/*! \file */
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/common/base64.c b/usr/src/lib/libresolv2_joy/common/isc/base64.c
index 62776ce4e0..79c35722b1 100644
--- a/usr/src/cmd/ssh/libopenbsd-compat/common/base64.c
+++ b/usr/src/lib/libresolv2_joy/common/isc/base64.c
@@ -1,20 +1,24 @@
-/* $OpenBSD: base64.c,v 1.4 2002/01/02 23:00:10 deraadt Exp $ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
/*
- * Copyright (c) 1996 by Internet Software Consortium.
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
- * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
- * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
- * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
- * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
- * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
- * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/*
@@ -42,27 +46,33 @@
* IF IBM IS APPRISED OF THE POSSIBILITY OF SUCH DAMAGES.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include "includes.h"
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: base64.c,v 1.4 2005/04/27 04:56:34 sra Exp $";
+#endif /* not lint */
-#if !defined(HAVE_B64_NTOP) && !defined(HAVE___B64_NTOP)
+#include "port_before.h"
#include <sys/types.h>
#include <sys/param.h>
#include <sys/socket.h>
+
#include <netinet/in.h>
#include <arpa/inet.h>
+#include <arpa/nameser.h>
#include <ctype.h>
+#include <resolv_joy.h>
#include <stdio.h>
-
#include <stdlib.h>
#include <string.h>
-#include "base64.h"
+#include "port_after.h"
+
+#ifndef ORIGINAL_ISC_CODE
+#pragma weak __b64_ntop = b64_ntop
+#pragma weak __b64_pton = b64_pton
+#endif /* ORIGINAL_ISC_CODE */
-/* XXX abort illegal in library */
#define Assert(Cond) if (!(Cond)) abort()
static const char Base64[] =
@@ -70,7 +80,7 @@ static const char Base64[] =
static const char Pad64 = '=';
/* (From RFC1521 and draft-ietf-dnssec-secext-03.txt)
- The following encoding technique is taken from RFC 1521 by Borenstein
+ The following encoding technique is taken from RFC1521 by Borenstein
and Freed. It is reproduced here in a slightly edited form for
convenience.
@@ -133,14 +143,13 @@ static const char Pad64 = '=';
*/
int
-b64_ntop(u_char const *src, size_t srclength, char *target, size_t targsize)
-{
+b64_ntop(u_char const *src, size_t srclength, char *target, size_t targsize) {
size_t datalength = 0;
u_char input[3];
u_char output[4];
- int i;
+ size_t i;
- while (2 < srclength) {
+ while (2U < srclength) {
input[0] = *src++;
input[1] = *src++;
input[2] = *src++;
@@ -164,7 +173,7 @@ b64_ntop(u_char const *src, size_t srclength, char *target, size_t targsize)
}
/* Now we worry about padding. */
- if (0 != srclength) {
+ if (0U != srclength) {
/* Get what's left. */
input[0] = input[1] = input[2] = '\0';
for (i = 0; i < srclength; i++)
@@ -181,7 +190,7 @@ b64_ntop(u_char const *src, size_t srclength, char *target, size_t targsize)
return (-1);
target[datalength++] = Base64[output[0]];
target[datalength++] = Base64[output[1]];
- if (srclength == 1)
+ if (srclength == 1U)
target[datalength++] = Pad64;
else
target[datalength++] = Base64[output[2]];
@@ -189,7 +198,7 @@ b64_ntop(u_char const *src, size_t srclength, char *target, size_t targsize)
}
if (datalength >= targsize)
return (-1);
- target[datalength] = '\0'; /* Returned value doesn't count \0. */
+ target[datalength] = '\0'; /*%< Returned value doesn't count \\0. */
return (datalength);
}
@@ -200,7 +209,10 @@ b64_ntop(u_char const *src, size_t srclength, char *target, size_t targsize)
*/
int
-b64_pton(u_char const *src, u_char *target, size_t targsize)
+b64_pton(src, target, targsize)
+ char const *src;
+ u_char *target;
+ size_t targsize;
{
int tarindex, state, ch;
char *pos;
@@ -209,20 +221,20 @@ b64_pton(u_char const *src, u_char *target, size_t targsize)
tarindex = 0;
while ((ch = *src++) != '\0') {
- if (isspace(ch)) /* Skip whitespace anywhere. */
+ if (isspace(ch)) /*%< Skip whitespace anywhere. */
continue;
if (ch == Pad64)
break;
pos = strchr(Base64, ch);
- if (pos == 0) /* A non-base64 character. */
+ if (pos == 0) /*%< A non-base64 character. */
return (-1);
switch (state) {
case 0:
if (target) {
- if (tarindex >= targsize)
+ if ((size_t)tarindex >= targsize)
return (-1);
target[tarindex] = (pos - Base64) << 2;
}
@@ -230,7 +242,7 @@ b64_pton(u_char const *src, u_char *target, size_t targsize)
break;
case 1:
if (target) {
- if (tarindex + 1 >= targsize)
+ if ((size_t)tarindex + 1 >= targsize)
return (-1);
target[tarindex] |= (pos - Base64) >> 4;
target[tarindex+1] = ((pos - Base64) & 0x0f)
@@ -241,7 +253,7 @@ b64_pton(u_char const *src, u_char *target, size_t targsize)
break;
case 2:
if (target) {
- if (tarindex + 1 >= targsize)
+ if ((size_t)tarindex + 1 >= targsize)
return (-1);
target[tarindex] |= (pos - Base64) >> 2;
target[tarindex+1] = ((pos - Base64) & 0x03)
@@ -252,13 +264,15 @@ b64_pton(u_char const *src, u_char *target, size_t targsize)
break;
case 3:
if (target) {
- if (tarindex >= targsize)
+ if ((size_t)tarindex >= targsize)
return (-1);
target[tarindex] |= (pos - Base64);
}
tarindex++;
state = 0;
break;
+ default:
+ abort();
}
}
@@ -267,31 +281,31 @@ b64_pton(u_char const *src, u_char *target, size_t targsize)
* on a byte boundary, and/or with erroneous trailing characters.
*/
- if (ch == Pad64) { /* We got a pad char. */
- ch = *src++; /* Skip it, get next. */
+ if (ch == Pad64) { /*%< We got a pad char. */
+ ch = *src++; /*%< Skip it, get next. */
switch (state) {
- case 0: /* Invalid = in first position */
- case 1: /* Invalid = in second position */
+ case 0: /*%< Invalid = in first position */
+ case 1: /*%< Invalid = in second position */
return (-1);
- case 2: /* Valid, means one byte of info */
+ case 2: /*%< Valid, means one byte of info */
/* Skip any number of spaces. */
- for (; ch != '\0'; ch = *src++)
+ for ((void)NULL; ch != '\0'; ch = *src++)
if (!isspace(ch))
break;
/* Make sure there is another trailing = sign. */
if (ch != Pad64)
return (-1);
- ch = *src++; /* Skip the = */
+ ch = *src++; /*%< Skip the = */
/* Fall through to "single trailing =" case. */
/* FALLTHROUGH */
- case 3: /* Valid, means two bytes of info */
+ case 3: /*%< Valid, means two bytes of info */
/*
* We know this char is an =. Is there anything but
* whitespace after it?
*/
- for (; ch != '\0'; ch = *src++)
+ for ((void)NULL; ch != '\0'; ch = *src++)
if (!isspace(ch))
return (-1);
@@ -316,4 +330,4 @@ b64_pton(u_char const *src, u_char *target, size_t targsize)
return (tarindex);
}
-#endif /* !defined(HAVE_B64_NTOP) && !defined(HAVE___B64_NTOP) */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/bitncmp.c b/usr/src/lib/libresolv2_joy/common/isc/bitncmp.c
new file mode 100644
index 0000000000..efe5009292
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/bitncmp.c
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 1996, 1999, 2001 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: bitncmp.c,v 1.5 2008/11/14 02:36:51 marka Exp $";
+#endif
+
+#include "port_before.h"
+
+#include <sys/types.h>
+
+#include <string.h>
+
+#include "port_after.h"
+
+#include <isc/misc.h>
+
+/*%
+ * int
+ * bitncmp(l, r, n)
+ * compare bit masks l and r, for n bits.
+ * return:
+ * -1, 1, or 0 in the libc tradition.
+ * note:
+ * network byte order assumed. this means 192.5.5.240/28 has
+ * 0x11110000 in its fourth octet.
+ * author:
+ * Paul Vixie (ISC), June 1996
+ */
+int
+bitncmp(const void *l, const void *r, int n) {
+ u_int lb, rb;
+ int x, b;
+
+ b = n / 8;
+ x = memcmp(l, r, b);
+ if (x || (n % 8) == 0)
+ return (x);
+
+ lb = ((const u_char *)l)[b];
+ rb = ((const u_char *)r)[b];
+ for (b = n % 8; b > 0; b--) {
+ if ((lb & 0x80) != (rb & 0x80)) {
+ if (lb & 0x80)
+ return (1);
+ return (-1);
+ }
+ lb <<= 1;
+ rb <<= 1;
+ }
+ return (0);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/ctl_clnt.c b/usr/src/lib/libresolv2_joy/common/isc/ctl_clnt.c
new file mode 100644
index 0000000000..f71001a6d4
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/ctl_clnt.c
@@ -0,0 +1,620 @@
+/*
+ * Copyright (C) 2004, 2005, 2007, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 1998-2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(lint) && !defined(SABER)
+static const char rcsid[] = "$Id: ctl_clnt.c,v 1.11 2008/11/14 02:36:51 marka Exp $";
+#endif /* not lint */
+
+/* Extern. */
+
+#include "port_before.h"
+
+#include <sys/param.h>
+#include <sys/file.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <arpa/inet.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#ifdef HAVE_MEMORY_H
+#include <memory.h>
+#endif
+
+#include <isc/assertions.h>
+#include <isc/ctl.h>
+#include <isc/eventlib.h>
+#include <isc/list.h>
+#include <isc/memcluster.h>
+
+#include "ctl_p.h"
+
+#include "port_after.h"
+
+/* Constants. */
+
+
+/* Macros. */
+
+#define donefunc_p(ctx) ((ctx).donefunc != NULL)
+#define arpacode_p(line) (isdigit((unsigned char)(line[0])) && \
+ isdigit((unsigned char)(line[1])) && \
+ isdigit((unsigned char)(line[2])))
+#define arpacont_p(line) (line[3] == '-')
+#define arpadone_p(line) (line[3] == ' ' || line[3] == '\t' || \
+ line[3] == '\r' || line[3] == '\0')
+
+/* Types. */
+
+enum state {
+ initializing = 0, connecting, connected, destroyed
+};
+
+struct ctl_tran {
+ LINK(struct ctl_tran) link;
+ LINK(struct ctl_tran) wlink;
+ struct ctl_cctx * ctx;
+ struct ctl_buf outbuf;
+ ctl_clntdone donefunc;
+ void * uap;
+};
+
+struct ctl_cctx {
+ enum state state;
+ evContext ev;
+ int sock;
+ ctl_logfunc logger;
+ ctl_clntdone donefunc;
+ void * uap;
+ evConnID coID;
+ evTimerID tiID;
+ evFileID rdID;
+ evStreamID wrID;
+ struct ctl_buf inbuf;
+ struct timespec timeout;
+ LIST(struct ctl_tran) tran;
+ LIST(struct ctl_tran) wtran;
+};
+
+/* Forward. */
+
+static struct ctl_tran *new_tran(struct ctl_cctx *, ctl_clntdone, void *, int);
+static void start_write(struct ctl_cctx *);
+static void destroy(struct ctl_cctx *, int);
+static void error(struct ctl_cctx *);
+static void new_state(struct ctl_cctx *, enum state);
+static void conn_done(evContext, void *, int,
+ const void *, int,
+ const void *, int);
+static void write_done(evContext, void *, int, int);
+static void start_read(struct ctl_cctx *);
+static void stop_read(struct ctl_cctx *);
+static void readable(evContext, void *, int, int);
+static void start_timer(struct ctl_cctx *);
+static void stop_timer(struct ctl_cctx *);
+static void touch_timer(struct ctl_cctx *);
+static void timer(evContext, void *,
+ struct timespec, struct timespec);
+
+#ifndef HAVE_MEMCHR
+static void *
+memchr(const void *b, int c, size_t len) {
+ const unsigned char *p = b;
+ size_t i;
+
+ for (i = 0; i < len; i++, p++)
+ if (*p == (unsigned char)c)
+ return ((void *)p);
+ return (NULL);
+}
+#endif
+
+/* Private data. */
+
+static const char * const state_names[] = {
+ "initializing", "connecting", "connected", "destroyed"
+};
+
+/* Public. */
+
+/*%
+ * void
+ * ctl_client()
+ * create, condition, and connect to a listener on the control port.
+ */
+struct ctl_cctx *
+ctl_client(evContext lev, const struct sockaddr *cap, size_t cap_len,
+ const struct sockaddr *sap, size_t sap_len,
+ ctl_clntdone donefunc, void *uap,
+ u_int timeout, ctl_logfunc logger)
+{
+ static const char me[] = "ctl_client";
+ static const int on = 1;
+ struct ctl_cctx *ctx;
+ struct sockaddr *captmp;
+
+ if (logger == NULL)
+ logger = ctl_logger;
+ ctx = memget(sizeof *ctx);
+ if (ctx == NULL) {
+ (*logger)(ctl_error, "%s: getmem: %s", me, strerror(errno));
+ goto fatal;
+ }
+ ctx->state = initializing;
+ ctx->ev = lev;
+ ctx->logger = logger;
+ ctx->timeout = evConsTime(timeout, 0);
+ ctx->donefunc = donefunc;
+ ctx->uap = uap;
+ ctx->coID.opaque = NULL;
+ ctx->tiID.opaque = NULL;
+ ctx->rdID.opaque = NULL;
+ ctx->wrID.opaque = NULL;
+ buffer_init(ctx->inbuf);
+ INIT_LIST(ctx->tran);
+ INIT_LIST(ctx->wtran);
+ ctx->sock = socket(sap->sa_family, SOCK_STREAM, PF_UNSPEC);
+ if (ctx->sock > evHighestFD(ctx->ev)) {
+ ctx->sock = -1;
+ errno = ENOTSOCK;
+ }
+ if (ctx->sock < 0) {
+ (*ctx->logger)(ctl_error, "%s: socket: %s",
+ me, strerror(errno));
+ goto fatal;
+ }
+ if (cap != NULL) {
+ if (setsockopt(ctx->sock, SOL_SOCKET, SO_REUSEADDR,
+ (const char *)&on, sizeof on) != 0) {
+ (*ctx->logger)(ctl_warning,
+ "%s: setsockopt(REUSEADDR): %s",
+ me, strerror(errno));
+ }
+ DE_CONST(cap, captmp);
+ if (bind(ctx->sock, captmp, cap_len) < 0) {
+ (*ctx->logger)(ctl_error, "%s: bind: %s", me,
+ strerror(errno));
+ goto fatal;
+ }
+ }
+ if (evConnect(lev, ctx->sock, (const struct sockaddr *)sap, sap_len,
+ conn_done, ctx, &ctx->coID) < 0) {
+ (*ctx->logger)(ctl_error, "%s: evConnect(fd %d): %s",
+ me, ctx->sock, strerror(errno));
+ fatal:
+ if (ctx != NULL) {
+ if (ctx->sock >= 0)
+ close(ctx->sock);
+ memput(ctx, sizeof *ctx);
+ }
+ return (NULL);
+ }
+ new_state(ctx, connecting);
+ return (ctx);
+}
+
+/*%
+ * void
+ * ctl_endclient(ctx)
+ * close a client and release all of its resources.
+ */
+void
+ctl_endclient(struct ctl_cctx *ctx) {
+ if (ctx->state != destroyed)
+ destroy(ctx, 0);
+ memput(ctx, sizeof *ctx);
+}
+
+/*%
+ * int
+ * ctl_command(ctx, cmd, len, donefunc, uap)
+ * Queue a transaction, which will begin with sending cmd
+ * and complete by calling donefunc with the answer.
+ */
+int
+ctl_command(struct ctl_cctx *ctx, const char *cmd, size_t len,
+ ctl_clntdone donefunc, void *uap)
+{
+ struct ctl_tran *tran;
+ char *pc;
+ unsigned int n;
+
+ switch (ctx->state) {
+ case destroyed:
+ errno = ENOTCONN;
+ return (-1);
+ case connecting:
+ case connected:
+ break;
+ default:
+ abort();
+ }
+ if (len >= (size_t)MAX_LINELEN) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ tran = new_tran(ctx, donefunc, uap, 1);
+ if (tran == NULL)
+ return (-1);
+ if (ctl_bufget(&tran->outbuf, ctx->logger) < 0)
+ return (-1);
+ memcpy(tran->outbuf.text, cmd, len);
+ tran->outbuf.used = len;
+ for (pc = tran->outbuf.text, n = 0; n < tran->outbuf.used; pc++, n++)
+ if (!isascii((unsigned char)*pc) ||
+ !isprint((unsigned char)*pc))
+ *pc = '\040';
+ start_write(ctx);
+ return (0);
+}
+
+/* Private. */
+
+static struct ctl_tran *
+new_tran(struct ctl_cctx *ctx, ctl_clntdone donefunc, void *uap, int w) {
+ struct ctl_tran *new = memget(sizeof *new);
+
+ if (new == NULL)
+ return (NULL);
+ new->ctx = ctx;
+ buffer_init(new->outbuf);
+ new->donefunc = donefunc;
+ new->uap = uap;
+ INIT_LINK(new, link);
+ INIT_LINK(new, wlink);
+ APPEND(ctx->tran, new, link);
+ if (w)
+ APPEND(ctx->wtran, new, wlink);
+ return (new);
+}
+
+static void
+start_write(struct ctl_cctx *ctx) {
+ static const char me[] = "isc/ctl_clnt::start_write";
+ struct ctl_tran *tran;
+ struct iovec iov[2], *iovp = iov;
+ char * tmp;
+
+ REQUIRE(ctx->state == connecting || ctx->state == connected);
+ /* If there is a write in progress, don't try to write more yet. */
+ if (ctx->wrID.opaque != NULL)
+ return;
+ /* If there are no trans, make sure timer is off, and we're done. */
+ if (EMPTY(ctx->wtran)) {
+ if (ctx->tiID.opaque != NULL)
+ stop_timer(ctx);
+ return;
+ }
+ /* Pull it off the head of the write queue. */
+ tran = HEAD(ctx->wtran);
+ UNLINK(ctx->wtran, tran, wlink);
+ /* Since there are some trans, make sure timer is successfully "on". */
+ if (ctx->tiID.opaque != NULL)
+ touch_timer(ctx);
+ else
+ start_timer(ctx);
+ if (ctx->state == destroyed)
+ return;
+ /* Marshall a newline-terminated message and clock it out. */
+ *iovp++ = evConsIovec(tran->outbuf.text, tran->outbuf.used);
+ DE_CONST("\r\n", tmp);
+ *iovp++ = evConsIovec(tmp, 2);
+ if (evWrite(ctx->ev, ctx->sock, iov, iovp - iov,
+ write_done, tran, &ctx->wrID) < 0) {
+ (*ctx->logger)(ctl_error, "%s: evWrite: %s", me,
+ strerror(errno));
+ error(ctx);
+ return;
+ }
+ if (evTimeRW(ctx->ev, ctx->wrID, ctx->tiID) < 0) {
+ (*ctx->logger)(ctl_error, "%s: evTimeRW: %s", me,
+ strerror(errno));
+ error(ctx);
+ return;
+ }
+}
+
+static void
+destroy(struct ctl_cctx *ctx, int notify) {
+ struct ctl_tran *this, *next;
+
+ if (ctx->sock != -1) {
+ (void) close(ctx->sock);
+ ctx->sock = -1;
+ }
+ switch (ctx->state) {
+ case connecting:
+ REQUIRE(ctx->wrID.opaque == NULL);
+ REQUIRE(EMPTY(ctx->tran));
+ /*
+ * This test is nec'y since destroy() can be called from
+ * start_read() while the state is still "connecting".
+ */
+ if (ctx->coID.opaque != NULL) {
+ (void)evCancelConn(ctx->ev, ctx->coID);
+ ctx->coID.opaque = NULL;
+ }
+ break;
+ case connected:
+ REQUIRE(ctx->coID.opaque == NULL);
+ if (ctx->wrID.opaque != NULL) {
+ (void)evCancelRW(ctx->ev, ctx->wrID);
+ ctx->wrID.opaque = NULL;
+ }
+ if (ctx->rdID.opaque != NULL)
+ stop_read(ctx);
+ break;
+ case destroyed:
+ break;
+ default:
+ abort();
+ }
+ if (allocated_p(ctx->inbuf))
+ ctl_bufput(&ctx->inbuf);
+ for (this = HEAD(ctx->tran); this != NULL; this = next) {
+ next = NEXT(this, link);
+ if (allocated_p(this->outbuf))
+ ctl_bufput(&this->outbuf);
+ if (notify && this->donefunc != NULL)
+ (*this->donefunc)(ctx, this->uap, NULL, 0);
+ memput(this, sizeof *this);
+ }
+ if (ctx->tiID.opaque != NULL)
+ stop_timer(ctx);
+ new_state(ctx, destroyed);
+}
+
+static void
+error(struct ctl_cctx *ctx) {
+ REQUIRE(ctx->state != destroyed);
+ destroy(ctx, 1);
+}
+
+static void
+new_state(struct ctl_cctx *ctx, enum state new_state) {
+ static const char me[] = "isc/ctl_clnt::new_state";
+
+ (*ctx->logger)(ctl_debug, "%s: %s -> %s", me,
+ state_names[ctx->state], state_names[new_state]);
+ ctx->state = new_state;
+}
+
+static void
+conn_done(evContext ev, void *uap, int fd,
+ const void *la, int lalen,
+ const void *ra, int ralen)
+{
+ static const char me[] = "isc/ctl_clnt::conn_done";
+ struct ctl_cctx *ctx = uap;
+ struct ctl_tran *tran;
+
+ UNUSED(ev);
+ UNUSED(la);
+ UNUSED(lalen);
+ UNUSED(ra);
+ UNUSED(ralen);
+
+ ctx->coID.opaque = NULL;
+ if (fd < 0) {
+ (*ctx->logger)(ctl_error, "%s: evConnect: %s", me,
+ strerror(errno));
+ error(ctx);
+ return;
+ }
+ new_state(ctx, connected);
+ tran = new_tran(ctx, ctx->donefunc, ctx->uap, 0);
+ if (tran == NULL) {
+ (*ctx->logger)(ctl_error, "%s: new_tran failed: %s", me,
+ strerror(errno));
+ error(ctx);
+ return;
+ }
+ start_read(ctx);
+ if (ctx->state == destroyed) {
+ (*ctx->logger)(ctl_error, "%s: start_read failed: %s",
+ me, strerror(errno));
+ error(ctx);
+ return;
+ }
+}
+
+static void
+write_done(evContext lev, void *uap, int fd, int bytes) {
+ struct ctl_tran *tran = (struct ctl_tran *)uap;
+ struct ctl_cctx *ctx = tran->ctx;
+
+ UNUSED(lev);
+ UNUSED(fd);
+
+ ctx->wrID.opaque = NULL;
+ if (ctx->tiID.opaque != NULL)
+ touch_timer(ctx);
+ ctl_bufput(&tran->outbuf);
+ start_write(ctx);
+ if (bytes < 0)
+ destroy(ctx, 1);
+ else
+ start_read(ctx);
+}
+
+static void
+start_read(struct ctl_cctx *ctx) {
+ static const char me[] = "isc/ctl_clnt::start_read";
+
+ REQUIRE(ctx->state == connecting || ctx->state == connected);
+ REQUIRE(ctx->rdID.opaque == NULL);
+ if (evSelectFD(ctx->ev, ctx->sock, EV_READ, readable, ctx,
+ &ctx->rdID) < 0)
+ {
+ (*ctx->logger)(ctl_error, "%s: evSelect(fd %d): %s", me,
+ ctx->sock, strerror(errno));
+ error(ctx);
+ return;
+ }
+}
+
+static void
+stop_read(struct ctl_cctx *ctx) {
+ REQUIRE(ctx->coID.opaque == NULL);
+ REQUIRE(ctx->rdID.opaque != NULL);
+ (void)evDeselectFD(ctx->ev, ctx->rdID);
+ ctx->rdID.opaque = NULL;
+}
+
+static void
+readable(evContext ev, void *uap, int fd, int evmask) {
+ static const char me[] = "isc/ctl_clnt::readable";
+ struct ctl_cctx *ctx = uap;
+ struct ctl_tran *tran;
+ ssize_t n;
+ char *eos;
+
+ UNUSED(ev);
+
+ REQUIRE(ctx != NULL);
+ REQUIRE(fd >= 0);
+ REQUIRE(evmask == EV_READ);
+ REQUIRE(ctx->state == connected);
+ REQUIRE(!EMPTY(ctx->tran));
+ tran = HEAD(ctx->tran);
+ if (!allocated_p(ctx->inbuf) &&
+ ctl_bufget(&ctx->inbuf, ctx->logger) < 0) {
+ (*ctx->logger)(ctl_error, "%s: can't get an input buffer", me);
+ error(ctx);
+ return;
+ }
+ n = read(ctx->sock, ctx->inbuf.text + ctx->inbuf.used,
+ MAX_LINELEN - ctx->inbuf.used);
+ if (n <= 0) {
+ (*ctx->logger)(ctl_warning, "%s: read: %s", me,
+ (n == 0) ? "Unexpected EOF" : strerror(errno));
+ error(ctx);
+ return;
+ }
+ if (ctx->tiID.opaque != NULL)
+ touch_timer(ctx);
+ ctx->inbuf.used += n;
+ (*ctx->logger)(ctl_debug, "%s: read %d, used %d", me,
+ n, ctx->inbuf.used);
+ again:
+ eos = memchr(ctx->inbuf.text, '\n', ctx->inbuf.used);
+ if (eos != NULL && eos != ctx->inbuf.text && eos[-1] == '\r') {
+ int done = 0;
+
+ eos[-1] = '\0';
+ if (!arpacode_p(ctx->inbuf.text)) {
+ /* XXX Doesn't FTP do this sometimes? Is it legal? */
+ (*ctx->logger)(ctl_error, "%s: no arpa code (%s)", me,
+ ctx->inbuf.text);
+ error(ctx);
+ return;
+ }
+ if (arpadone_p(ctx->inbuf.text))
+ done = 1;
+ else if (arpacont_p(ctx->inbuf.text))
+ done = 0;
+ else {
+ /* XXX Doesn't FTP do this sometimes? Is it legal? */
+ (*ctx->logger)(ctl_error, "%s: no arpa flag (%s)", me,
+ ctx->inbuf.text);
+ error(ctx);
+ return;
+ }
+ (*tran->donefunc)(ctx, tran->uap, ctx->inbuf.text,
+ (done ? 0 : CTL_MORE));
+ ctx->inbuf.used -= ((eos - ctx->inbuf.text) + 1);
+ if (ctx->inbuf.used == 0U)
+ ctl_bufput(&ctx->inbuf);
+ else
+ memmove(ctx->inbuf.text, eos + 1, ctx->inbuf.used);
+ if (done) {
+ UNLINK(ctx->tran, tran, link);
+ memput(tran, sizeof *tran);
+ stop_read(ctx);
+ start_write(ctx);
+ return;
+ }
+ if (allocated_p(ctx->inbuf))
+ goto again;
+ return;
+ }
+ if (ctx->inbuf.used == (size_t)MAX_LINELEN) {
+ (*ctx->logger)(ctl_error, "%s: line too long (%-10s...)", me,
+ ctx->inbuf.text);
+ error(ctx);
+ }
+}
+
+/* Timer related stuff. */
+
+static void
+start_timer(struct ctl_cctx *ctx) {
+ static const char me[] = "isc/ctl_clnt::start_timer";
+
+ REQUIRE(ctx->tiID.opaque == NULL);
+ if (evSetIdleTimer(ctx->ev, timer, ctx, ctx->timeout, &ctx->tiID) < 0){
+ (*ctx->logger)(ctl_error, "%s: evSetIdleTimer: %s", me,
+ strerror(errno));
+ error(ctx);
+ return;
+ }
+}
+
+static void
+stop_timer(struct ctl_cctx *ctx) {
+ static const char me[] = "isc/ctl_clnt::stop_timer";
+
+ REQUIRE(ctx->tiID.opaque != NULL);
+ if (evClearIdleTimer(ctx->ev, ctx->tiID) < 0) {
+ (*ctx->logger)(ctl_error, "%s: evClearIdleTimer: %s", me,
+ strerror(errno));
+ error(ctx);
+ return;
+ }
+ ctx->tiID.opaque = NULL;
+}
+
+static void
+touch_timer(struct ctl_cctx *ctx) {
+ REQUIRE(ctx->tiID.opaque != NULL);
+
+ evTouchIdleTimer(ctx->ev, ctx->tiID);
+}
+
+static void
+timer(evContext ev, void *uap, struct timespec due, struct timespec itv) {
+ static const char me[] = "isc/ctl_clnt::timer";
+ struct ctl_cctx *ctx = uap;
+
+ UNUSED(ev);
+ UNUSED(due);
+ UNUSED(itv);
+
+ ctx->tiID.opaque = NULL;
+ (*ctx->logger)(ctl_error, "%s: timeout after %u seconds while %s", me,
+ ctx->timeout.tv_sec, state_names[ctx->state]);
+ error(ctx);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/ctl_p.c b/usr/src/lib/libresolv2_joy/common/isc/ctl_p.c
new file mode 100644
index 0000000000..7ab719a5e6
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/ctl_p.c
@@ -0,0 +1,188 @@
+#if !defined(lint) && !defined(SABER)
+static const char rcsid[] = "$Id: ctl_p.c,v 1.4 2005/04/27 04:56:35 sra Exp $";
+#endif /* not lint */
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1998,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* Extern. */
+
+#include "port_before.h"
+
+#include <sys/param.h>
+#include <sys/file.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <arpa/inet.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <isc/assertions.h>
+#include <isc/eventlib.h>
+#include <isc/logging.h>
+#include <isc/memcluster.h>
+#include <isc/ctl.h>
+
+#include "ctl_p.h"
+
+#include "port_after.h"
+
+/* Constants. */
+
+const char * const ctl_sevnames[] = {
+ "debug", "warning", "error"
+};
+
+/* Public. */
+
+/*%
+ * ctl_logger()
+ * if ctl_startup()'s caller didn't specify a logger, this one
+ * is used. this pollutes stderr with all kinds of trash so it will
+ * probably never be used in real applications.
+ */
+void
+ctl_logger(enum ctl_severity severity, const char *format, ...) {
+ va_list ap;
+ static const char me[] = "ctl_logger";
+
+ fprintf(stderr, "%s(%s): ", me, ctl_sevnames[severity]);
+ va_start(ap, format);
+ vfprintf(stderr, format, ap);
+ va_end(ap);
+ fputc('\n', stderr);
+}
+
+int
+ctl_bufget(struct ctl_buf *buf, ctl_logfunc logger) {
+ static const char me[] = "ctl_bufget";
+
+ REQUIRE(!allocated_p(*buf) && buf->used == 0U);
+ buf->text = memget(MAX_LINELEN);
+ if (!allocated_p(*buf)) {
+ (*logger)(ctl_error, "%s: getmem: %s", me, strerror(errno));
+ return (-1);
+ }
+ buf->used = 0;
+ return (0);
+}
+
+void
+ctl_bufput(struct ctl_buf *buf) {
+
+ REQUIRE(allocated_p(*buf));
+ memput(buf->text, MAX_LINELEN);
+ buf->text = NULL;
+ buf->used = 0;
+}
+
+const char *
+ctl_sa_ntop(const struct sockaddr *sa,
+ char *buf, size_t size,
+ ctl_logfunc logger)
+{
+ static const char me[] = "ctl_sa_ntop";
+ static const char punt[] = "[0].-1";
+ char tmp[INET6_ADDRSTRLEN];
+
+ switch (sa->sa_family) {
+ case AF_INET6: {
+ const struct sockaddr_in6 *in6 =
+ (const struct sockaddr_in6 *) sa;
+
+ if (inet_ntop(in6->sin6_family, &in6->sin6_addr, tmp, sizeof tmp)
+ == NULL) {
+ (*logger)(ctl_error, "%s: inet_ntop(%u %04x): %s",
+ me, in6->sin6_family,
+ in6->sin6_port, strerror(errno));
+ return (punt);
+ }
+ if (strlen(tmp) + sizeof "[].65535" > size) {
+ (*logger)(ctl_error, "%s: buffer overflow", me);
+ return (punt);
+ }
+ (void) sprintf(buf, "[%s].%u", tmp, ntohs(in6->sin6_port));
+ return (buf);
+ }
+ case AF_INET: {
+ const struct sockaddr_in *in =
+ (const struct sockaddr_in *) sa;
+
+ if (inet_ntop(in->sin_family, &in->sin_addr, tmp, sizeof tmp)
+ == NULL) {
+ (*logger)(ctl_error, "%s: inet_ntop(%u %04x %08x): %s",
+ me, in->sin_family,
+ in->sin_port, in->sin_addr.s_addr,
+ strerror(errno));
+ return (punt);
+ }
+ if (strlen(tmp) + sizeof "[].65535" > size) {
+ (*logger)(ctl_error, "%s: buffer overflow", me);
+ return (punt);
+ }
+ (void) sprintf(buf, "[%s].%u", tmp, ntohs(in->sin_port));
+ return (buf);
+ }
+#ifndef NO_SOCKADDR_UN
+ case AF_UNIX: {
+ const struct sockaddr_un *un =
+ (const struct sockaddr_un *) sa;
+ unsigned int x = sizeof un->sun_path;
+
+ if (x > size)
+ x = size;
+ strncpy(buf, un->sun_path, x - 1);
+ buf[x - 1] = '\0';
+ return (buf);
+ }
+#endif
+ default:
+ return (punt);
+ }
+}
+
+void
+ctl_sa_copy(const struct sockaddr *src, struct sockaddr *dst) {
+ switch (src->sa_family) {
+ case AF_INET6:
+ *((struct sockaddr_in6 *)dst) =
+ *((const struct sockaddr_in6 *)src);
+ break;
+ case AF_INET:
+ *((struct sockaddr_in *)dst) =
+ *((const struct sockaddr_in *)src);
+ break;
+#ifndef NO_SOCKADDR_UN
+ case AF_UNIX:
+ *((struct sockaddr_un *)dst) =
+ *((const struct sockaddr_un *)src);
+ break;
+#endif
+ default:
+ *dst = *src;
+ break;
+ }
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/ctl_p.h b/usr/src/lib/libresolv2_joy/common/isc/ctl_p.h
new file mode 100644
index 0000000000..18a52ae39c
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/ctl_p.h
@@ -0,0 +1,28 @@
+struct ctl_buf {
+ char * text;
+ size_t used;
+};
+
+#define MAX_LINELEN 990 /*%< Like SMTP. */
+#ifndef NO_SOCKADDR_UN
+#define MAX_NTOP PATH_MAX
+#else
+#define MAX_NTOP (sizeof "[255.255.255.255].65535")
+#endif
+
+#define allocated_p(Buf) ((Buf).text != NULL)
+#define buffer_init(Buf) ((Buf).text = 0, (Buf.used) = 0)
+
+#define ctl_bufget __ctl_bufget
+#define ctl_bufput __ctl_bufput
+#define ctl_sa_ntop __ctl_sa_ntop
+#define ctl_sa_copy __ctl_sa_copy
+
+int ctl_bufget(struct ctl_buf *, ctl_logfunc);
+void ctl_bufput(struct ctl_buf *);
+const char * ctl_sa_ntop(const struct sockaddr *, char *, size_t,
+ ctl_logfunc);
+void ctl_sa_copy(const struct sockaddr *,
+ struct sockaddr *);
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/ctl_srvr.c b/usr/src/lib/libresolv2_joy/common/isc/ctl_srvr.c
new file mode 100644
index 0000000000..8fd7a21ffa
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/ctl_srvr.c
@@ -0,0 +1,787 @@
+/*
+ * Copyright (C) 2004-2006, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 1998-2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(lint) && !defined(SABER)
+static const char rcsid[] = "$Id: ctl_srvr.c,v 1.10 2008/11/14 02:36:51 marka Exp $";
+#endif /* not lint */
+
+/* Extern. */
+
+#include "port_before.h"
+
+#include <sys/param.h>
+#include <sys/file.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <arpa/inet.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <fcntl.h>
+#ifdef HAVE_MEMORY_H
+#include <memory.h>
+#endif
+
+#include <isc/assertions.h>
+#include <isc/ctl.h>
+#include <isc/eventlib.h>
+#include <isc/list.h>
+#include <isc/logging.h>
+#include <isc/memcluster.h>
+
+#include "ctl_p.h"
+
+#include "port_after.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) ((size_t)sprintf x)
+#endif
+
+/* Macros. */
+
+#define lastverb_p(verb) (verb->name == NULL || verb->func == NULL)
+#define address_expr ctl_sa_ntop((struct sockaddr *)&sess->sa, \
+ tmp, sizeof tmp, ctx->logger)
+
+/* Types. */
+
+enum state {
+ available = 0, initializing, writing, reading, reading_data,
+ processing, idling, quitting, closing
+};
+
+union sa_un {
+ struct sockaddr_in in;
+#ifndef NO_SOCKADDR_UN
+ struct sockaddr_un un;
+#endif
+};
+
+struct ctl_sess {
+ LINK(struct ctl_sess) link;
+ struct ctl_sctx * ctx;
+ enum state state;
+ int sock;
+ union sa_un sa;
+ evFileID rdID;
+ evStreamID wrID;
+ evTimerID rdtiID;
+ evTimerID wrtiID;
+ struct ctl_buf inbuf;
+ struct ctl_buf outbuf;
+ const struct ctl_verb * verb;
+ u_int helpcode;
+ const void * respctx;
+ u_int respflags;
+ ctl_srvrdone donefunc;
+ void * uap;
+ void * csctx;
+};
+
+struct ctl_sctx {
+ evContext ev;
+ void * uctx;
+ u_int unkncode;
+ u_int timeoutcode;
+ const struct ctl_verb * verbs;
+ const struct ctl_verb * connverb;
+ int sock;
+ int max_sess;
+ int cur_sess;
+ struct timespec timeout;
+ ctl_logfunc logger;
+ evConnID acID;
+ LIST(struct ctl_sess) sess;
+};
+
+/* Forward. */
+
+static void ctl_accept(evContext, void *, int,
+ const void *, int,
+ const void *, int);
+static void ctl_close(struct ctl_sess *);
+static void ctl_new_state(struct ctl_sess *,
+ enum state,
+ const char *);
+static void ctl_start_read(struct ctl_sess *);
+static void ctl_stop_read(struct ctl_sess *);
+static void ctl_readable(evContext, void *, int, int);
+static void ctl_rdtimeout(evContext, void *,
+ struct timespec,
+ struct timespec);
+static void ctl_wrtimeout(evContext, void *,
+ struct timespec,
+ struct timespec);
+static void ctl_docommand(struct ctl_sess *);
+static void ctl_writedone(evContext, void *, int, int);
+static void ctl_morehelp(struct ctl_sctx *,
+ struct ctl_sess *,
+ const struct ctl_verb *,
+ const char *,
+ u_int, const void *, void *);
+static void ctl_signal_done(struct ctl_sctx *,
+ struct ctl_sess *);
+
+/* Private data. */
+
+static const char * state_names[] = {
+ "available", "initializing", "writing", "reading",
+ "reading_data", "processing", "idling", "quitting", "closing"
+};
+
+static const char space[] = " ";
+
+static const struct ctl_verb fakehelpverb = {
+ "fakehelp", ctl_morehelp , NULL
+};
+
+/* Public. */
+
+/*%
+ * void
+ * ctl_server()
+ * create, condition, and start a listener on the control port.
+ */
+struct ctl_sctx *
+ctl_server(evContext lev, const struct sockaddr *sap, size_t sap_len,
+ const struct ctl_verb *verbs,
+ u_int unkncode, u_int timeoutcode,
+ u_int timeout, int backlog, int max_sess,
+ ctl_logfunc logger, void *uctx)
+{
+ static const char me[] = "ctl_server";
+ static const int on = 1;
+ const struct ctl_verb *connverb;
+ struct ctl_sctx *ctx;
+ int save_errno;
+
+ if (logger == NULL)
+ logger = ctl_logger;
+ for (connverb = verbs;
+ connverb->name != NULL && connverb->func != NULL;
+ connverb++)
+ if (connverb->name[0] == '\0')
+ break;
+ if (connverb->func == NULL) {
+ (*logger)(ctl_error, "%s: no connection verb found", me);
+ return (NULL);
+ }
+ ctx = memget(sizeof *ctx);
+ if (ctx == NULL) {
+ (*logger)(ctl_error, "%s: getmem: %s", me, strerror(errno));
+ return (NULL);
+ }
+ ctx->ev = lev;
+ ctx->uctx = uctx;
+ ctx->unkncode = unkncode;
+ ctx->timeoutcode = timeoutcode;
+ ctx->verbs = verbs;
+ ctx->timeout = evConsTime(timeout, 0);
+ ctx->logger = logger;
+ ctx->connverb = connverb;
+ ctx->max_sess = max_sess;
+ ctx->cur_sess = 0;
+ INIT_LIST(ctx->sess);
+ ctx->sock = socket(sap->sa_family, SOCK_STREAM, PF_UNSPEC);
+ if (ctx->sock > evHighestFD(ctx->ev)) {
+ ctx->sock = -1;
+ errno = ENOTSOCK;
+ }
+ if (ctx->sock < 0) {
+ save_errno = errno;
+ (*ctx->logger)(ctl_error, "%s: socket: %s",
+ me, strerror(errno));
+ memput(ctx, sizeof *ctx);
+ errno = save_errno;
+ return (NULL);
+ }
+ if (ctx->sock > evHighestFD(lev)) {
+ close(ctx->sock);
+ (*ctx->logger)(ctl_error, "%s: file descriptor > evHighestFD");
+ errno = ENFILE;
+ memput(ctx, sizeof *ctx);
+ return (NULL);
+ }
+#ifdef NO_UNIX_REUSEADDR
+ if (sap->sa_family != AF_UNIX)
+#endif
+ if (setsockopt(ctx->sock, SOL_SOCKET, SO_REUSEADDR,
+ (const char *)&on, sizeof on) != 0) {
+ (*ctx->logger)(ctl_warning,
+ "%s: setsockopt(REUSEADDR): %s",
+ me, strerror(errno));
+ }
+ if (bind(ctx->sock, sap, sap_len) < 0) {
+ char tmp[MAX_NTOP];
+ save_errno = errno;
+ (*ctx->logger)(ctl_error, "%s: bind: %s: %s",
+ me, ctl_sa_ntop((const struct sockaddr *)sap,
+ tmp, sizeof tmp, ctx->logger),
+ strerror(save_errno));
+ close(ctx->sock);
+ memput(ctx, sizeof *ctx);
+ errno = save_errno;
+ return (NULL);
+ }
+ if (fcntl(ctx->sock, F_SETFD, 1) < 0) {
+ (*ctx->logger)(ctl_warning, "%s: fcntl: %s", me,
+ strerror(errno));
+ }
+ if (evListen(lev, ctx->sock, backlog, ctl_accept, ctx,
+ &ctx->acID) < 0) {
+ save_errno = errno;
+ (*ctx->logger)(ctl_error, "%s: evListen(fd %d): %s",
+ me, ctx->sock, strerror(errno));
+ close(ctx->sock);
+ memput(ctx, sizeof *ctx);
+ errno = save_errno;
+ return (NULL);
+ }
+ (*ctx->logger)(ctl_debug, "%s: new ctx %p, sock %d",
+ me, ctx, ctx->sock);
+ return (ctx);
+}
+
+/*%
+ * void
+ * ctl_endserver(ctx)
+ * if the control listener is open, close it. clean out all eventlib
+ * stuff. close all active sessions.
+ */
+void
+ctl_endserver(struct ctl_sctx *ctx) {
+ static const char me[] = "ctl_endserver";
+ struct ctl_sess *this, *next;
+
+ (*ctx->logger)(ctl_debug, "%s: ctx %p, sock %d, acID %p, sess %p",
+ me, ctx, ctx->sock, ctx->acID.opaque, ctx->sess);
+ if (ctx->acID.opaque != NULL) {
+ (void)evCancelConn(ctx->ev, ctx->acID);
+ ctx->acID.opaque = NULL;
+ }
+ if (ctx->sock != -1) {
+ (void) close(ctx->sock);
+ ctx->sock = -1;
+ }
+ for (this = HEAD(ctx->sess); this != NULL; this = next) {
+ next = NEXT(this, link);
+ ctl_close(this);
+ }
+ memput(ctx, sizeof *ctx);
+}
+
+/*%
+ * If body is non-NULL then it we add a "." line after it.
+ * Caller must have escaped lines with leading ".".
+ */
+void
+ctl_response(struct ctl_sess *sess, u_int code, const char *text,
+ u_int flags, const void *respctx, ctl_srvrdone donefunc,
+ void *uap, const char *body, size_t bodylen)
+{
+ static const char me[] = "ctl_response";
+ struct iovec iov[3], *iovp = iov;
+ struct ctl_sctx *ctx = sess->ctx;
+ char tmp[MAX_NTOP], *pc;
+ int n;
+
+ REQUIRE(sess->state == initializing ||
+ sess->state == processing ||
+ sess->state == reading_data ||
+ sess->state == writing);
+ REQUIRE(sess->wrtiID.opaque == NULL);
+ REQUIRE(sess->wrID.opaque == NULL);
+ ctl_new_state(sess, writing, me);
+ sess->donefunc = donefunc;
+ sess->uap = uap;
+ if (!allocated_p(sess->outbuf) &&
+ ctl_bufget(&sess->outbuf, ctx->logger) < 0) {
+ (*ctx->logger)(ctl_error, "%s: %s: cant get an output buffer",
+ me, address_expr);
+ goto untimely;
+ }
+ if (sizeof "000-\r\n" + strlen(text) > (size_t)MAX_LINELEN) {
+ (*ctx->logger)(ctl_error, "%s: %s: output buffer ovf, closing",
+ me, address_expr);
+ goto untimely;
+ }
+ sess->outbuf.used = SPRINTF((sess->outbuf.text, "%03d%c%s\r\n",
+ code, (flags & CTL_MORE) != 0 ? '-' : ' ',
+ text));
+ for (pc = sess->outbuf.text, n = 0;
+ n < (int)sess->outbuf.used-2; pc++, n++)
+ if (!isascii((unsigned char)*pc) ||
+ !isprint((unsigned char)*pc))
+ *pc = '\040';
+ *iovp++ = evConsIovec(sess->outbuf.text, sess->outbuf.used);
+ if (body != NULL) {
+ char *tmp;
+ DE_CONST(body, tmp);
+ *iovp++ = evConsIovec(tmp, bodylen);
+ DE_CONST(".\r\n", tmp);
+ *iovp++ = evConsIovec(tmp, 3);
+ }
+ (*ctx->logger)(ctl_debug, "%s: [%d] %s", me,
+ sess->outbuf.used, sess->outbuf.text);
+ if (evWrite(ctx->ev, sess->sock, iov, iovp - iov,
+ ctl_writedone, sess, &sess->wrID) < 0) {
+ (*ctx->logger)(ctl_error, "%s: %s: evWrite: %s", me,
+ address_expr, strerror(errno));
+ goto untimely;
+ }
+ if (evSetIdleTimer(ctx->ev, ctl_wrtimeout, sess, ctx->timeout,
+ &sess->wrtiID) < 0)
+ {
+ (*ctx->logger)(ctl_error, "%s: %s: evSetIdleTimer: %s", me,
+ address_expr, strerror(errno));
+ goto untimely;
+ }
+ if (evTimeRW(ctx->ev, sess->wrID, sess->wrtiID) < 0) {
+ (*ctx->logger)(ctl_error, "%s: %s: evTimeRW: %s", me,
+ address_expr, strerror(errno));
+ untimely:
+ ctl_signal_done(ctx, sess);
+ ctl_close(sess);
+ return;
+ }
+ sess->respctx = respctx;
+ sess->respflags = flags;
+}
+
+void
+ctl_sendhelp(struct ctl_sess *sess, u_int code) {
+ static const char me[] = "ctl_sendhelp";
+ struct ctl_sctx *ctx = sess->ctx;
+
+ sess->helpcode = code;
+ sess->verb = &fakehelpverb;
+ ctl_morehelp(ctx, sess, NULL, me, CTL_MORE,
+ (const void *)ctx->verbs, NULL);
+}
+
+void *
+ctl_getcsctx(struct ctl_sess *sess) {
+ return (sess->csctx);
+}
+
+void *
+ctl_setcsctx(struct ctl_sess *sess, void *csctx) {
+ void *old = sess->csctx;
+
+ sess->csctx = csctx;
+ return (old);
+}
+
+/* Private functions. */
+
+static void
+ctl_accept(evContext lev, void *uap, int fd,
+ const void *lav, int lalen,
+ const void *rav, int ralen)
+{
+ static const char me[] = "ctl_accept";
+ struct ctl_sctx *ctx = uap;
+ struct ctl_sess *sess = NULL;
+ char tmp[MAX_NTOP];
+
+ UNUSED(lev);
+ UNUSED(lalen);
+ UNUSED(ralen);
+
+ if (fd < 0) {
+ (*ctx->logger)(ctl_error, "%s: accept: %s",
+ me, strerror(errno));
+ return;
+ }
+ if (ctx->cur_sess == ctx->max_sess) {
+ (*ctx->logger)(ctl_error, "%s: %s: too many control sessions",
+ me, ctl_sa_ntop((const struct sockaddr *)rav,
+ tmp, sizeof tmp,
+ ctx->logger));
+ (void) close(fd);
+ return;
+ }
+ sess = memget(sizeof *sess);
+ if (sess == NULL) {
+ (*ctx->logger)(ctl_error, "%s: memget: %s", me,
+ strerror(errno));
+ (void) close(fd);
+ return;
+ }
+ if (fcntl(fd, F_SETFD, 1) < 0) {
+ (*ctx->logger)(ctl_warning, "%s: fcntl: %s", me,
+ strerror(errno));
+ }
+ ctx->cur_sess++;
+ INIT_LINK(sess, link);
+ APPEND(ctx->sess, sess, link);
+ sess->ctx = ctx;
+ sess->sock = fd;
+ sess->wrID.opaque = NULL;
+ sess->rdID.opaque = NULL;
+ sess->wrtiID.opaque = NULL;
+ sess->rdtiID.opaque = NULL;
+ sess->respctx = NULL;
+ sess->csctx = NULL;
+ if (((const struct sockaddr *)rav)->sa_family == AF_UNIX)
+ ctl_sa_copy((const struct sockaddr *)lav,
+ (struct sockaddr *)&sess->sa);
+ else
+ ctl_sa_copy((const struct sockaddr *)rav,
+ (struct sockaddr *)&sess->sa);
+ sess->donefunc = NULL;
+ buffer_init(sess->inbuf);
+ buffer_init(sess->outbuf);
+ sess->state = available;
+ ctl_new_state(sess, initializing, me);
+ sess->verb = ctx->connverb;
+ (*ctx->logger)(ctl_debug, "%s: %s: accepting (fd %d)",
+ me, address_expr, sess->sock);
+ (*ctx->connverb->func)(ctx, sess, ctx->connverb, "", 0,
+ (const struct sockaddr *)rav, ctx->uctx);
+}
+
+static void
+ctl_new_state(struct ctl_sess *sess, enum state new_state, const char *reason)
+{
+ static const char me[] = "ctl_new_state";
+ struct ctl_sctx *ctx = sess->ctx;
+ char tmp[MAX_NTOP];
+
+ (*ctx->logger)(ctl_debug, "%s: %s: %s -> %s (%s)",
+ me, address_expr,
+ state_names[sess->state],
+ state_names[new_state], reason);
+ sess->state = new_state;
+}
+
+static void
+ctl_close(struct ctl_sess *sess) {
+ static const char me[] = "ctl_close";
+ struct ctl_sctx *ctx = sess->ctx;
+ char tmp[MAX_NTOP];
+
+ REQUIRE(sess->state == initializing ||
+ sess->state == writing ||
+ sess->state == reading ||
+ sess->state == processing ||
+ sess->state == reading_data ||
+ sess->state == idling);
+ REQUIRE(sess->sock != -1);
+ if (sess->state == reading || sess->state == reading_data)
+ ctl_stop_read(sess);
+ else if (sess->state == writing) {
+ if (sess->wrID.opaque != NULL) {
+ (void) evCancelRW(ctx->ev, sess->wrID);
+ sess->wrID.opaque = NULL;
+ }
+ if (sess->wrtiID.opaque != NULL) {
+ (void) evClearIdleTimer(ctx->ev, sess->wrtiID);
+ sess->wrtiID.opaque = NULL;
+ }
+ }
+ ctl_new_state(sess, closing, me);
+ (void) close(sess->sock);
+ if (allocated_p(sess->inbuf))
+ ctl_bufput(&sess->inbuf);
+ if (allocated_p(sess->outbuf))
+ ctl_bufput(&sess->outbuf);
+ (*ctx->logger)(ctl_debug, "%s: %s: closed (fd %d)",
+ me, address_expr, sess->sock);
+ UNLINK(ctx->sess, sess, link);
+ memput(sess, sizeof *sess);
+ ctx->cur_sess--;
+}
+
+static void
+ctl_start_read(struct ctl_sess *sess) {
+ static const char me[] = "ctl_start_read";
+ struct ctl_sctx *ctx = sess->ctx;
+ char tmp[MAX_NTOP];
+
+ REQUIRE(sess->state == initializing ||
+ sess->state == writing ||
+ sess->state == processing ||
+ sess->state == idling);
+ REQUIRE(sess->rdtiID.opaque == NULL);
+ REQUIRE(sess->rdID.opaque == NULL);
+ sess->inbuf.used = 0;
+ if (evSetIdleTimer(ctx->ev, ctl_rdtimeout, sess, ctx->timeout,
+ &sess->rdtiID) < 0)
+ {
+ (*ctx->logger)(ctl_error, "%s: %s: evSetIdleTimer: %s", me,
+ address_expr, strerror(errno));
+ ctl_close(sess);
+ return;
+ }
+ if (evSelectFD(ctx->ev, sess->sock, EV_READ,
+ ctl_readable, sess, &sess->rdID) < 0) {
+ (*ctx->logger)(ctl_error, "%s: %s: evSelectFD: %s", me,
+ address_expr, strerror(errno));
+ return;
+ }
+ ctl_new_state(sess, reading, me);
+}
+
+static void
+ctl_stop_read(struct ctl_sess *sess) {
+ static const char me[] = "ctl_stop_read";
+ struct ctl_sctx *ctx = sess->ctx;
+
+ REQUIRE(sess->state == reading || sess->state == reading_data);
+ REQUIRE(sess->rdID.opaque != NULL);
+ (void) evDeselectFD(ctx->ev, sess->rdID);
+ sess->rdID.opaque = NULL;
+ if (sess->rdtiID.opaque != NULL) {
+ (void) evClearIdleTimer(ctx->ev, sess->rdtiID);
+ sess->rdtiID.opaque = NULL;
+ }
+ ctl_new_state(sess, idling, me);
+}
+
+static void
+ctl_readable(evContext lev, void *uap, int fd, int evmask) {
+ static const char me[] = "ctl_readable";
+ struct ctl_sess *sess = uap;
+ struct ctl_sctx *ctx;
+ char *eos, tmp[MAX_NTOP];
+ ssize_t n;
+
+ REQUIRE(sess != NULL);
+ REQUIRE(fd >= 0);
+ REQUIRE(evmask == EV_READ);
+ REQUIRE(sess->state == reading || sess->state == reading_data);
+
+ ctx = sess->ctx;
+ evTouchIdleTimer(lev, sess->rdtiID);
+ if (!allocated_p(sess->inbuf) &&
+ ctl_bufget(&sess->inbuf, ctx->logger) < 0) {
+ (*ctx->logger)(ctl_error, "%s: %s: cant get an input buffer",
+ me, address_expr);
+ ctl_close(sess);
+ return;
+ }
+ n = read(sess->sock, sess->inbuf.text + sess->inbuf.used,
+ MAX_LINELEN - sess->inbuf.used);
+ if (n <= 0) {
+ (*ctx->logger)(ctl_debug, "%s: %s: read: %s",
+ me, address_expr,
+ (n == 0) ? "Unexpected EOF" : strerror(errno));
+ ctl_close(sess);
+ return;
+ }
+ sess->inbuf.used += n;
+ eos = memchr(sess->inbuf.text, '\n', sess->inbuf.used);
+ if (eos != NULL && eos != sess->inbuf.text && eos[-1] == '\r') {
+ eos[-1] = '\0';
+ if ((sess->respflags & CTL_DATA) != 0) {
+ INSIST(sess->verb != NULL);
+ (*sess->verb->func)(sess->ctx, sess, sess->verb,
+ sess->inbuf.text,
+ CTL_DATA, sess->respctx,
+ sess->ctx->uctx);
+ } else {
+ ctl_stop_read(sess);
+ ctl_docommand(sess);
+ }
+ sess->inbuf.used -= ((eos - sess->inbuf.text) + 1);
+ if (sess->inbuf.used == 0U)
+ ctl_bufput(&sess->inbuf);
+ else
+ memmove(sess->inbuf.text, eos + 1, sess->inbuf.used);
+ return;
+ }
+ if (sess->inbuf.used == (size_t)MAX_LINELEN) {
+ (*ctx->logger)(ctl_error, "%s: %s: line too long, closing",
+ me, address_expr);
+ ctl_close(sess);
+ }
+}
+
+static void
+ctl_wrtimeout(evContext lev, void *uap,
+ struct timespec due,
+ struct timespec itv)
+{
+ static const char me[] = "ctl_wrtimeout";
+ struct ctl_sess *sess = uap;
+ struct ctl_sctx *ctx = sess->ctx;
+ char tmp[MAX_NTOP];
+
+ UNUSED(lev);
+ UNUSED(due);
+ UNUSED(itv);
+
+ REQUIRE(sess->state == writing);
+ sess->wrtiID.opaque = NULL;
+ (*ctx->logger)(ctl_warning, "%s: %s: write timeout, closing",
+ me, address_expr);
+ if (sess->wrID.opaque != NULL) {
+ (void) evCancelRW(ctx->ev, sess->wrID);
+ sess->wrID.opaque = NULL;
+ }
+ ctl_signal_done(ctx, sess);
+ ctl_new_state(sess, processing, me);
+ ctl_close(sess);
+}
+
+static void
+ctl_rdtimeout(evContext lev, void *uap,
+ struct timespec due,
+ struct timespec itv)
+{
+ static const char me[] = "ctl_rdtimeout";
+ struct ctl_sess *sess = uap;
+ struct ctl_sctx *ctx = sess->ctx;
+ char tmp[MAX_NTOP];
+
+ UNUSED(lev);
+ UNUSED(due);
+ UNUSED(itv);
+
+ REQUIRE(sess->state == reading);
+ sess->rdtiID.opaque = NULL;
+ (*ctx->logger)(ctl_warning, "%s: %s: timeout, closing",
+ me, address_expr);
+ if (sess->state == reading || sess->state == reading_data)
+ ctl_stop_read(sess);
+ ctl_signal_done(ctx, sess);
+ ctl_new_state(sess, processing, me);
+ ctl_response(sess, ctx->timeoutcode, "Timeout.", CTL_EXIT, NULL,
+ NULL, NULL, NULL, 0);
+}
+
+static void
+ctl_docommand(struct ctl_sess *sess) {
+ static const char me[] = "ctl_docommand";
+ char *name, *rest, tmp[MAX_NTOP];
+ struct ctl_sctx *ctx = sess->ctx;
+ const struct ctl_verb *verb;
+
+ REQUIRE(allocated_p(sess->inbuf));
+ (*ctx->logger)(ctl_debug, "%s: %s: \"%s\" [%u]",
+ me, address_expr,
+ sess->inbuf.text, (u_int)sess->inbuf.used);
+ ctl_new_state(sess, processing, me);
+ name = sess->inbuf.text + strspn(sess->inbuf.text, space);
+ rest = name + strcspn(name, space);
+ if (*rest != '\0') {
+ *rest++ = '\0';
+ rest += strspn(rest, space);
+ }
+ for (verb = ctx->verbs;
+ verb != NULL && verb->name != NULL && verb->func != NULL;
+ verb++)
+ if (verb->name[0] != '\0' && strcasecmp(name, verb->name) == 0)
+ break;
+ if (verb != NULL && verb->name != NULL && verb->func != NULL) {
+ sess->verb = verb;
+ (*verb->func)(ctx, sess, verb, rest, 0, NULL, ctx->uctx);
+ } else {
+ char buf[1100];
+
+ if (sizeof "Unrecognized command \"\" (args \"\")" +
+ strlen(name) + strlen(rest) > sizeof buf)
+ strcpy(buf, "Unrecognized command (buf ovf)");
+ else
+ sprintf(buf,
+ "Unrecognized command \"%s\" (args \"%s\")",
+ name, rest);
+ ctl_response(sess, ctx->unkncode, buf, 0, NULL, NULL, NULL,
+ NULL, 0);
+ }
+}
+
+static void
+ctl_writedone(evContext lev, void *uap, int fd, int bytes) {
+ static const char me[] = "ctl_writedone";
+ struct ctl_sess *sess = uap;
+ struct ctl_sctx *ctx = sess->ctx;
+ char tmp[MAX_NTOP];
+ int save_errno = errno;
+
+ UNUSED(lev);
+ UNUSED(uap);
+
+ REQUIRE(sess->state == writing);
+ REQUIRE(fd == sess->sock);
+ REQUIRE(sess->wrtiID.opaque != NULL);
+ sess->wrID.opaque = NULL;
+ (void) evClearIdleTimer(ctx->ev, sess->wrtiID);
+ sess->wrtiID.opaque = NULL;
+ if (bytes < 0) {
+ (*ctx->logger)(ctl_error, "%s: %s: %s",
+ me, address_expr, strerror(save_errno));
+ ctl_close(sess);
+ return;
+ }
+
+ INSIST(allocated_p(sess->outbuf));
+ ctl_bufput(&sess->outbuf);
+ if ((sess->respflags & CTL_EXIT) != 0) {
+ ctl_signal_done(ctx, sess);
+ ctl_close(sess);
+ return;
+ } else if ((sess->respflags & CTL_MORE) != 0) {
+ INSIST(sess->verb != NULL);
+ (*sess->verb->func)(sess->ctx, sess, sess->verb, "",
+ CTL_MORE, sess->respctx, sess->ctx->uctx);
+ } else {
+ ctl_signal_done(ctx, sess);
+ ctl_start_read(sess);
+ }
+}
+
+static void
+ctl_morehelp(struct ctl_sctx *ctx, struct ctl_sess *sess,
+ const struct ctl_verb *verb, const char *text,
+ u_int respflags, const void *respctx, void *uctx)
+{
+ const struct ctl_verb *this = respctx, *next = this + 1;
+
+ UNUSED(ctx);
+ UNUSED(verb);
+ UNUSED(text);
+ UNUSED(uctx);
+
+ REQUIRE(!lastverb_p(this));
+ REQUIRE((respflags & CTL_MORE) != 0);
+ if (lastverb_p(next))
+ respflags &= ~CTL_MORE;
+ ctl_response(sess, sess->helpcode, this->help, respflags, next,
+ NULL, NULL, NULL, 0);
+}
+
+static void
+ctl_signal_done(struct ctl_sctx *ctx, struct ctl_sess *sess) {
+ if (sess->donefunc != NULL) {
+ (*sess->donefunc)(ctx, sess, sess->uap);
+ sess->donefunc = NULL;
+ }
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/ev_connects.c b/usr/src/lib/libresolv2_joy/common/isc/ev_connects.c
new file mode 100644
index 0000000000..38dfdbe512
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/ev_connects.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1995-1999 by Internet Software Consortium
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* ev_connects.c - implement asynch connect/accept for the eventlib
+ * vix 16sep96 [initial]
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: ev_connects.c,v 1.8 2006/03/09 23:57:56 marka Exp $";
+#endif
+
+/* Import. */
+
+#include "port_before.h"
+#include "fd_setsize.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+
+#include <unistd.h>
+
+#include <isc/eventlib.h>
+#include <isc/assertions.h>
+#include "eventlib_p.h"
+
+#include "port_after.h"
+
+/* Macros. */
+
+#define GETXXXNAME(f, s, sa, len) ( \
+ (f((s), (&sa), (&len)) >= 0) ? 0 : \
+ (errno != EAFNOSUPPORT && errno != EOPNOTSUPP) ? -1 : ( \
+ memset(&(sa), 0, sizeof (sa)), \
+ (len) = sizeof (sa), \
+ (sa).sa_family = AF_UNIX, \
+ 0 \
+ ) \
+ )
+
+/* Forward. */
+
+static void listener(evContext ctx, void *uap, int fd, int evmask);
+static void connector(evContext ctx, void *uap, int fd, int evmask);
+
+/* Public. */
+
+int
+evListen(evContext opaqueCtx, int fd, int maxconn,
+ evConnFunc func, void *uap, evConnID *id)
+{
+ evContext_p *ctx = opaqueCtx.opaque;
+ evConn *new;
+ int mode;
+
+ OKNEW(new);
+ new->flags = EV_CONN_LISTEN;
+ OKFREE(mode = fcntl(fd, F_GETFL, NULL), new); /*%< side effect: validate fd. */
+ /*
+ * Remember the nonblocking status. We assume that either evSelectFD
+ * has not been done to this fd, or that if it has then the caller
+ * will evCancelConn before they evDeselectFD. If our assumptions
+ * are not met, then we might restore the old nonblocking status
+ * incorrectly.
+ */
+ if ((mode & PORT_NONBLOCK) == 0) {
+#ifdef USE_FIONBIO_IOCTL
+ int on = 1;
+ OKFREE(ioctl(fd, FIONBIO, (char *)&on), new);
+#else
+ OKFREE(fcntl(fd, F_SETFL, mode | PORT_NONBLOCK), new);
+#endif
+ new->flags |= EV_CONN_BLOCK;
+ }
+ OKFREE(listen(fd, maxconn), new);
+ if (evSelectFD(opaqueCtx, fd, EV_READ, listener, new, &new->file) < 0){
+ int save = errno;
+
+ FREE(new);
+ errno = save;
+ return (-1);
+ }
+ new->flags |= EV_CONN_SELECTED;
+ new->func = func;
+ new->uap = uap;
+ new->fd = fd;
+ if (ctx->conns != NULL)
+ ctx->conns->prev = new;
+ new->prev = NULL;
+ new->next = ctx->conns;
+ ctx->conns = new;
+ if (id)
+ id->opaque = new;
+ return (0);
+}
+
+int
+evConnect(evContext opaqueCtx, int fd, const void *ra, int ralen,
+ evConnFunc func, void *uap, evConnID *id)
+{
+ evContext_p *ctx = opaqueCtx.opaque;
+ evConn *new;
+
+ OKNEW(new);
+ new->flags = 0;
+ /* Do the select() first to get the socket into nonblocking mode. */
+ if (evSelectFD(opaqueCtx, fd, EV_MASK_ALL,
+ connector, new, &new->file) < 0) {
+ int save = errno;
+
+ FREE(new);
+ errno = save;
+ return (-1);
+ }
+ new->flags |= EV_CONN_SELECTED;
+ if (connect(fd, ra, ralen) < 0 &&
+ errno != EWOULDBLOCK &&
+ errno != EAGAIN &&
+ errno != EINPROGRESS) {
+ int save = errno;
+
+ (void) evDeselectFD(opaqueCtx, new->file);
+ FREE(new);
+ errno = save;
+ return (-1);
+ }
+ /* No error, or EWOULDBLOCK. select() tells when it's ready. */
+ new->func = func;
+ new->uap = uap;
+ new->fd = fd;
+ if (ctx->conns != NULL)
+ ctx->conns->prev = new;
+ new->prev = NULL;
+ new->next = ctx->conns;
+ ctx->conns = new;
+ if (id)
+ id->opaque = new;
+ return (0);
+}
+
+int
+evCancelConn(evContext opaqueCtx, evConnID id) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evConn *this = id.opaque;
+ evAccept *acc, *nxtacc;
+ int mode;
+
+ if ((this->flags & EV_CONN_SELECTED) != 0)
+ (void) evDeselectFD(opaqueCtx, this->file);
+ if ((this->flags & EV_CONN_BLOCK) != 0) {
+ mode = fcntl(this->fd, F_GETFL, NULL);
+ if (mode == -1) {
+ if (errno != EBADF)
+ return (-1);
+ } else {
+#ifdef USE_FIONBIO_IOCTL
+ int off = 0;
+ OK(ioctl(this->fd, FIONBIO, (char *)&off));
+#else
+ OK(fcntl(this->fd, F_SETFL, mode & ~PORT_NONBLOCK));
+#endif
+ }
+ }
+
+ /* Unlink from ctx->conns. */
+ if (this->prev != NULL)
+ this->prev->next = this->next;
+ else
+ ctx->conns = this->next;
+ if (this->next != NULL)
+ this->next->prev = this->prev;
+
+ /*
+ * Remove `this' from the ctx->accepts list (zero or more times).
+ */
+ for (acc = HEAD(ctx->accepts), nxtacc = NULL;
+ acc != NULL;
+ acc = nxtacc)
+ {
+ nxtacc = NEXT(acc, link);
+ if (acc->conn == this) {
+ UNLINK(ctx->accepts, acc, link);
+ close(acc->fd);
+ FREE(acc);
+ }
+ }
+
+ /* Wrap up and get out. */
+ FREE(this);
+ return (0);
+}
+
+int evHold(evContext opaqueCtx, evConnID id) {
+ evConn *this = id.opaque;
+
+ if ((this->flags & EV_CONN_LISTEN) == 0) {
+ errno = EINVAL;
+ return (-1);
+ }
+ if ((this->flags & EV_CONN_SELECTED) == 0)
+ return (0);
+ this->flags &= ~EV_CONN_SELECTED;
+ return (evDeselectFD(opaqueCtx, this->file));
+}
+
+int evUnhold(evContext opaqueCtx, evConnID id) {
+ evConn *this = id.opaque;
+ int ret;
+
+ if ((this->flags & EV_CONN_LISTEN) == 0) {
+ errno = EINVAL;
+ return (-1);
+ }
+ if ((this->flags & EV_CONN_SELECTED) != 0)
+ return (0);
+ ret = evSelectFD(opaqueCtx, this->fd, EV_READ, listener, this,
+ &this->file);
+ if (ret == 0)
+ this->flags |= EV_CONN_SELECTED;
+ return (ret);
+}
+
+int
+evTryAccept(evContext opaqueCtx, evConnID id, int *sys_errno) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evConn *conn = id.opaque;
+ evAccept *new;
+
+ if ((conn->flags & EV_CONN_LISTEN) == 0) {
+ errno = EINVAL;
+ return (-1);
+ }
+ OKNEW(new);
+ new->conn = conn;
+ new->ralen = sizeof new->ra;
+ new->fd = accept(conn->fd, &new->ra.sa, &new->ralen);
+ if (new->fd > ctx->highestFD) {
+ close(new->fd);
+ new->fd = -1;
+ new->ioErrno = ENOTSOCK;
+ }
+ if (new->fd >= 0) {
+ new->lalen = sizeof new->la;
+ if (GETXXXNAME(getsockname, new->fd, new->la.sa, new->lalen) < 0) {
+ new->ioErrno = errno;
+ (void) close(new->fd);
+ new->fd = -1;
+ } else
+ new->ioErrno = 0;
+ } else {
+ new->ioErrno = errno;
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ FREE(new);
+ return (-1);
+ }
+ }
+ INIT_LINK(new, link);
+ APPEND(ctx->accepts, new, link);
+ *sys_errno = new->ioErrno;
+ return (0);
+}
+
+/* Private. */
+
+static void
+listener(evContext opaqueCtx, void *uap, int fd, int evmask) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evConn *conn = uap;
+ union {
+ struct sockaddr sa;
+ struct sockaddr_in in;
+#ifndef NO_SOCKADDR_UN
+ struct sockaddr_un un;
+#endif
+ } la, ra;
+ int new;
+ ISC_SOCKLEN_T lalen = 0, ralen;
+
+ REQUIRE((evmask & EV_READ) != 0);
+ ralen = sizeof ra;
+ new = accept(fd, &ra.sa, &ralen);
+ if (new > ctx->highestFD) {
+ close(new);
+ new = -1;
+ errno = ENOTSOCK;
+ }
+ if (new >= 0) {
+ lalen = sizeof la;
+ if (GETXXXNAME(getsockname, new, la.sa, lalen) < 0) {
+ int save = errno;
+
+ (void) close(new);
+ errno = save;
+ new = -1;
+ }
+ } else if (errno == EAGAIN || errno == EWOULDBLOCK)
+ return;
+ (*conn->func)(opaqueCtx, conn->uap, new, &la.sa, lalen, &ra.sa, ralen);
+}
+
+static void
+connector(evContext opaqueCtx, void *uap, int fd, int evmask) {
+ evConn *conn = uap;
+ union {
+ struct sockaddr sa;
+ struct sockaddr_in in;
+#ifndef NO_SOCKADDR_UN
+ struct sockaddr_un un;
+#endif
+ } la, ra;
+ ISC_SOCKLEN_T lalen, ralen;
+#ifndef NETREAD_BROKEN
+ char buf[1];
+#endif
+ void *conn_uap;
+ evConnFunc conn_func;
+ evConnID id;
+ int socket_errno = 0;
+ ISC_SOCKLEN_T optlen;
+
+ UNUSED(evmask);
+
+ lalen = sizeof la;
+ ralen = sizeof ra;
+ conn_uap = conn->uap;
+ conn_func = conn->func;
+ id.opaque = conn;
+#ifdef SO_ERROR
+ optlen = sizeof socket_errno;
+ if (fd < 0 &&
+ getsockopt(conn->fd, SOL_SOCKET, SO_ERROR, (char *)&socket_errno,
+ &optlen) < 0)
+ socket_errno = errno;
+ else
+ errno = socket_errno;
+#endif
+ if (evCancelConn(opaqueCtx, id) < 0 ||
+ socket_errno ||
+#ifdef NETREAD_BROKEN
+ 0 ||
+#else
+ read(fd, buf, 0) < 0 ||
+#endif
+ GETXXXNAME(getsockname, fd, la.sa, lalen) < 0 ||
+ GETXXXNAME(getpeername, fd, ra.sa, ralen) < 0) {
+ int save = errno;
+
+ (void) close(fd); /*%< XXX closing caller's fd */
+ errno = save;
+ fd = -1;
+ }
+ (*conn_func)(opaqueCtx, conn_uap, fd, &la.sa, lalen, &ra.sa, ralen);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/ev_files.c b/usr/src/lib/libresolv2_joy/common/isc/ev_files.c
new file mode 100644
index 0000000000..b12baf1aaa
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/ev_files.c
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1995-1999 by Internet Software Consortium
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* ev_files.c - implement asynch file IO for the eventlib
+ * vix 11sep95 [initial]
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: ev_files.c,v 1.8 2005/07/28 06:51:48 marka Exp $";
+#endif
+
+#include "port_before.h"
+#include "fd_setsize.h"
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/ioctl.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <isc/eventlib.h>
+#include "eventlib_p.h"
+
+#include "port_after.h"
+
+static evFile *FindFD(const evContext_p *ctx, int fd, int eventmask);
+
+int
+evSelectFD(evContext opaqueCtx,
+ int fd,
+ int eventmask,
+ evFileFunc func,
+ void *uap,
+ evFileID *opaqueID
+) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evFile *id;
+ int mode;
+
+ evPrintf(ctx, 1,
+ "evSelectFD(ctx %p, fd %d, mask 0x%x, func %p, uap %p)\n",
+ ctx, fd, eventmask, func, uap);
+ if (eventmask == 0 || (eventmask & ~EV_MASK_ALL) != 0)
+ EV_ERR(EINVAL);
+#ifndef USE_POLL
+ if (fd > ctx->highestFD)
+ EV_ERR(EINVAL);
+#endif
+ OK(mode = fcntl(fd, F_GETFL, NULL)); /*%< side effect: validate fd. */
+ /*
+ * The first time we touch a file descriptor, we need to check to see
+ * if the application already had it in O_NONBLOCK mode and if so, all
+ * of our deselect()'s have to leave it in O_NONBLOCK. If not, then
+ * all but our last deselect() has to leave it in O_NONBLOCK.
+ */
+#ifdef USE_POLL
+ /* Make sure both ctx->pollfds[] and ctx->fdTable[] are large enough */
+ if (fd >= ctx->maxnfds && evPollfdRealloc(ctx, 1, fd) != 0)
+ EV_ERR(ENOMEM);
+#endif /* USE_POLL */
+ id = FindFD(ctx, fd, EV_MASK_ALL);
+ if (id == NULL) {
+ if (mode & PORT_NONBLOCK)
+ FD_SET(fd, &ctx->nonblockBefore);
+ else {
+#ifdef USE_FIONBIO_IOCTL
+ int on = 1;
+ OK(ioctl(fd, FIONBIO, (char *)&on));
+#else
+ OK(fcntl(fd, F_SETFL, mode | PORT_NONBLOCK));
+#endif
+ FD_CLR(fd, &ctx->nonblockBefore);
+ }
+ }
+
+ /*
+ * If this descriptor is already in use, search for it again to see
+ * if any of the eventmask bits we want to set are already captured.
+ * We cannot usefully capture the same fd event more than once in the
+ * same context.
+ */
+ if (id != NULL && FindFD(ctx, fd, eventmask) != NULL)
+ EV_ERR(ETOOMANYREFS);
+
+ /* Allocate and fill. */
+ OKNEW(id);
+ id->func = func;
+ id->uap = uap;
+ id->fd = fd;
+ id->eventmask = eventmask;
+
+ /*
+ * Insert at head. Order could be important for performance if we
+ * believe that evGetNext()'s accesses to the fd_sets will be more
+ * serial and therefore more cache-lucky if the list is ordered by
+ * ``fd.'' We do not believe these things, so we don't do it.
+ *
+ * The interesting sequence is where GetNext() has cached a select()
+ * result and the caller decides to evSelectFD() on some descriptor.
+ * Since GetNext() starts at the head, it can miss new entries we add
+ * at the head. This is not a serious problem since the event being
+ * evSelectFD()'d for has to occur before evSelectFD() is called for
+ * the file event to be considered "missed" -- a real corner case.
+ * Maintaining a "tail" pointer for ctx->files would fix this, but I'm
+ * not sure it would be ``more correct.''
+ */
+ if (ctx->files != NULL)
+ ctx->files->prev = id;
+ id->prev = NULL;
+ id->next = ctx->files;
+ ctx->files = id;
+
+ /* Insert into fd table. */
+ if (ctx->fdTable[fd] != NULL)
+ ctx->fdTable[fd]->fdprev = id;
+ id->fdprev = NULL;
+ id->fdnext = ctx->fdTable[fd];
+ ctx->fdTable[fd] = id;
+
+ /* Turn on the appropriate bits in the {rd,wr,ex}Next fd_set's. */
+ if (eventmask & EV_READ)
+ FD_SET(fd, &ctx->rdNext);
+ if (eventmask & EV_WRITE)
+ FD_SET(fd, &ctx->wrNext);
+ if (eventmask & EV_EXCEPT)
+ FD_SET(fd, &ctx->exNext);
+
+ /* Update fdMax. */
+ if (fd > ctx->fdMax)
+ ctx->fdMax = fd;
+
+ /* Remember the ID if the caller provided us a place for it. */
+ if (opaqueID)
+ opaqueID->opaque = id;
+
+ return (0);
+}
+
+int
+evDeselectFD(evContext opaqueCtx, evFileID opaqueID) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evFile *del = opaqueID.opaque;
+ evFile *cur;
+ int mode, eventmask;
+
+ if (!del) {
+ evPrintf(ctx, 11, "evDeselectFD(NULL) ignored\n");
+ errno = EINVAL;
+ return (-1);
+ }
+
+ evPrintf(ctx, 1, "evDeselectFD(fd %d, mask 0x%x)\n",
+ del->fd, del->eventmask);
+
+ /* Get the mode. Unless the file has been closed, errors are bad. */
+ mode = fcntl(del->fd, F_GETFL, NULL);
+ if (mode == -1 && errno != EBADF)
+ EV_ERR(errno);
+
+ /* Remove from the list of files. */
+ if (del->prev != NULL)
+ del->prev->next = del->next;
+ else
+ ctx->files = del->next;
+ if (del->next != NULL)
+ del->next->prev = del->prev;
+
+ /* Remove from the fd table. */
+ if (del->fdprev != NULL)
+ del->fdprev->fdnext = del->fdnext;
+ else
+ ctx->fdTable[del->fd] = del->fdnext;
+ if (del->fdnext != NULL)
+ del->fdnext->fdprev = del->fdprev;
+
+ /*
+ * If the file descriptor does not appear in any other select() entry,
+ * and if !EV_WASNONBLOCK, and if we got no EBADF when we got the mode
+ * earlier, then: restore the fd to blocking status.
+ */
+ if (!(cur = FindFD(ctx, del->fd, EV_MASK_ALL)) &&
+ !FD_ISSET(del->fd, &ctx->nonblockBefore) &&
+ mode != -1) {
+ /*
+ * Note that we won't return an error status to the caller if
+ * this fcntl() fails since (a) we've already done the work
+ * and (b) the caller didn't ask us anything about O_NONBLOCK.
+ */
+#ifdef USE_FIONBIO_IOCTL
+ int off = 0;
+ (void) ioctl(del->fd, FIONBIO, (char *)&off);
+#else
+ (void) fcntl(del->fd, F_SETFL, mode & ~PORT_NONBLOCK);
+#endif
+ }
+
+ /*
+ * Now find all other uses of this descriptor and OR together an event
+ * mask so that we don't turn off {rd,wr,ex}Next bits that some other
+ * file event is using. As an optimization, stop if the event mask
+ * fills.
+ */
+ eventmask = 0;
+ for ((void)NULL;
+ cur != NULL && eventmask != EV_MASK_ALL;
+ cur = cur->next)
+ if (cur->fd == del->fd)
+ eventmask |= cur->eventmask;
+
+ /* OK, now we know which bits we can clear out. */
+ if (!(eventmask & EV_READ)) {
+ FD_CLR(del->fd, &ctx->rdNext);
+ if (FD_ISSET(del->fd, &ctx->rdLast)) {
+ FD_CLR(del->fd, &ctx->rdLast);
+ ctx->fdCount--;
+ }
+ }
+ if (!(eventmask & EV_WRITE)) {
+ FD_CLR(del->fd, &ctx->wrNext);
+ if (FD_ISSET(del->fd, &ctx->wrLast)) {
+ FD_CLR(del->fd, &ctx->wrLast);
+ ctx->fdCount--;
+ }
+ }
+ if (!(eventmask & EV_EXCEPT)) {
+ FD_CLR(del->fd, &ctx->exNext);
+ if (FD_ISSET(del->fd, &ctx->exLast)) {
+ FD_CLR(del->fd, &ctx->exLast);
+ ctx->fdCount--;
+ }
+ }
+
+ /* If this was the maxFD, find the new one. */
+ if (del->fd == ctx->fdMax) {
+ ctx->fdMax = -1;
+ for (cur = ctx->files; cur; cur = cur->next)
+ if (cur->fd > ctx->fdMax)
+ ctx->fdMax = cur->fd;
+ }
+
+ /* If this was the fdNext, cycle that to the next entry. */
+ if (del == ctx->fdNext)
+ ctx->fdNext = del->next;
+
+ /* Couldn't free it before now since we were using fields out of it. */
+ FREE(del);
+
+ return (0);
+}
+
+static evFile *
+FindFD(const evContext_p *ctx, int fd, int eventmask) {
+ evFile *id;
+
+ for (id = ctx->fdTable[fd]; id != NULL; id = id->fdnext)
+ if (id->fd == fd && (id->eventmask & eventmask) != 0)
+ break;
+ return (id);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/ev_streams.c b/usr/src/lib/libresolv2_joy/common/isc/ev_streams.c
new file mode 100644
index 0000000000..5dad36d04a
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/ev_streams.c
@@ -0,0 +1,308 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* ev_streams.c - implement asynch stream file IO for the eventlib
+ * vix 04mar96 [initial]
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: ev_streams.c,v 1.5 2005/04/27 04:56:36 sra Exp $";
+#endif
+
+#include "port_before.h"
+#include "fd_setsize.h"
+
+#include <sys/types.h>
+#include <sys/uio.h>
+
+#include <errno.h>
+
+#include <isc/eventlib.h>
+#include <isc/assertions.h>
+#include "eventlib_p.h"
+
+#include "port_after.h"
+
+static int copyvec(evStream *str, const struct iovec *iov, int iocnt);
+static void consume(evStream *str, size_t bytes);
+static void done(evContext opaqueCtx, evStream *str);
+static void writable(evContext opaqueCtx, void *uap, int fd, int evmask);
+static void readable(evContext opaqueCtx, void *uap, int fd, int evmask);
+
+struct iovec
+evConsIovec(void *buf, size_t cnt) {
+ struct iovec ret;
+
+ memset(&ret, 0xf5, sizeof ret);
+ ret.iov_base = buf;
+ ret.iov_len = cnt;
+ return (ret);
+}
+
+int
+evWrite(evContext opaqueCtx, int fd, const struct iovec *iov, int iocnt,
+ evStreamFunc func, void *uap, evStreamID *id)
+{
+ evContext_p *ctx = opaqueCtx.opaque;
+ evStream *new;
+ int save;
+
+ OKNEW(new);
+ new->func = func;
+ new->uap = uap;
+ new->fd = fd;
+ new->flags = 0;
+ if (evSelectFD(opaqueCtx, fd, EV_WRITE, writable, new, &new->file) < 0)
+ goto free;
+ if (copyvec(new, iov, iocnt) < 0)
+ goto free;
+ new->prevDone = NULL;
+ new->nextDone = NULL;
+ if (ctx->streams != NULL)
+ ctx->streams->prev = new;
+ new->prev = NULL;
+ new->next = ctx->streams;
+ ctx->streams = new;
+ if (id != NULL)
+ id->opaque = new;
+ return (0);
+ free:
+ save = errno;
+ FREE(new);
+ errno = save;
+ return (-1);
+}
+
+int
+evRead(evContext opaqueCtx, int fd, const struct iovec *iov, int iocnt,
+ evStreamFunc func, void *uap, evStreamID *id)
+{
+ evContext_p *ctx = opaqueCtx.opaque;
+ evStream *new;
+ int save;
+
+ OKNEW(new);
+ new->func = func;
+ new->uap = uap;
+ new->fd = fd;
+ new->flags = 0;
+ if (evSelectFD(opaqueCtx, fd, EV_READ, readable, new, &new->file) < 0)
+ goto free;
+ if (copyvec(new, iov, iocnt) < 0)
+ goto free;
+ new->prevDone = NULL;
+ new->nextDone = NULL;
+ if (ctx->streams != NULL)
+ ctx->streams->prev = new;
+ new->prev = NULL;
+ new->next = ctx->streams;
+ ctx->streams = new;
+ if (id)
+ id->opaque = new;
+ return (0);
+ free:
+ save = errno;
+ FREE(new);
+ errno = save;
+ return (-1);
+}
+
+int
+evTimeRW(evContext opaqueCtx, evStreamID id, evTimerID timer) /*ARGSUSED*/ {
+ evStream *str = id.opaque;
+
+ UNUSED(opaqueCtx);
+
+ str->timer = timer;
+ str->flags |= EV_STR_TIMEROK;
+ return (0);
+}
+
+int
+evUntimeRW(evContext opaqueCtx, evStreamID id) /*ARGSUSED*/ {
+ evStream *str = id.opaque;
+
+ UNUSED(opaqueCtx);
+
+ str->flags &= ~EV_STR_TIMEROK;
+ return (0);
+}
+
+int
+evCancelRW(evContext opaqueCtx, evStreamID id) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evStream *old = id.opaque;
+
+ /*
+ * The streams list is doubly threaded. First, there's ctx->streams
+ * that's used by evDestroy() to find and cancel all streams. Second,
+ * there's ctx->strDone (head) and ctx->strLast (tail) which thread
+ * through the potentially smaller number of "IO completed" streams,
+ * used in evGetNext() to avoid scanning the entire list.
+ */
+
+ /* Unlink from ctx->streams. */
+ if (old->prev != NULL)
+ old->prev->next = old->next;
+ else
+ ctx->streams = old->next;
+ if (old->next != NULL)
+ old->next->prev = old->prev;
+
+ /*
+ * If 'old' is on the ctx->strDone list, remove it. Update
+ * ctx->strLast if necessary.
+ */
+ if (old->prevDone == NULL && old->nextDone == NULL) {
+ /*
+ * Either 'old' is the only item on the done list, or it's
+ * not on the done list. If the former, then we unlink it
+ * from the list. If the latter, we leave the list alone.
+ */
+ if (ctx->strDone == old) {
+ ctx->strDone = NULL;
+ ctx->strLast = NULL;
+ }
+ } else {
+ if (old->prevDone != NULL)
+ old->prevDone->nextDone = old->nextDone;
+ else
+ ctx->strDone = old->nextDone;
+ if (old->nextDone != NULL)
+ old->nextDone->prevDone = old->prevDone;
+ else
+ ctx->strLast = old->prevDone;
+ }
+
+ /* Deallocate the stream. */
+ if (old->file.opaque)
+ evDeselectFD(opaqueCtx, old->file);
+ memput(old->iovOrig, sizeof (struct iovec) * old->iovOrigCount);
+ FREE(old);
+ return (0);
+}
+
+/* Copy a scatter/gather vector and initialize a stream handler's IO. */
+static int
+copyvec(evStream *str, const struct iovec *iov, int iocnt) {
+ int i;
+
+ str->iovOrig = (struct iovec *)memget(sizeof(struct iovec) * iocnt);
+ if (str->iovOrig == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+ str->ioTotal = 0;
+ for (i = 0; i < iocnt; i++) {
+ str->iovOrig[i] = iov[i];
+ str->ioTotal += iov[i].iov_len;
+ }
+ str->iovOrigCount = iocnt;
+ str->iovCur = str->iovOrig;
+ str->iovCurCount = str->iovOrigCount;
+ str->ioDone = 0;
+ return (0);
+}
+
+/* Pull off or truncate lead iovec(s). */
+static void
+consume(evStream *str, size_t bytes) {
+ while (bytes > 0U) {
+ if (bytes < (size_t)str->iovCur->iov_len) {
+ str->iovCur->iov_len -= bytes;
+ str->iovCur->iov_base = (void *)
+ ((u_char *)str->iovCur->iov_base + bytes);
+ str->ioDone += bytes;
+ bytes = 0;
+ } else {
+ bytes -= str->iovCur->iov_len;
+ str->ioDone += str->iovCur->iov_len;
+ str->iovCur++;
+ str->iovCurCount--;
+ }
+ }
+}
+
+/* Add a stream to Done list and deselect the FD. */
+static void
+done(evContext opaqueCtx, evStream *str) {
+ evContext_p *ctx = opaqueCtx.opaque;
+
+ if (ctx->strLast != NULL) {
+ str->prevDone = ctx->strLast;
+ ctx->strLast->nextDone = str;
+ ctx->strLast = str;
+ } else {
+ INSIST(ctx->strDone == NULL);
+ ctx->strDone = ctx->strLast = str;
+ }
+ evDeselectFD(opaqueCtx, str->file);
+ str->file.opaque = NULL;
+ /* evDrop() will call evCancelRW() on us. */
+}
+
+/* Dribble out some bytes on the stream. (Called by evDispatch().) */
+static void
+writable(evContext opaqueCtx, void *uap, int fd, int evmask) {
+ evStream *str = uap;
+ int bytes;
+
+ UNUSED(evmask);
+
+ bytes = writev(fd, str->iovCur, str->iovCurCount);
+ if (bytes > 0) {
+ if ((str->flags & EV_STR_TIMEROK) != 0)
+ evTouchIdleTimer(opaqueCtx, str->timer);
+ consume(str, bytes);
+ } else {
+ if (bytes < 0 && errno != EINTR) {
+ str->ioDone = -1;
+ str->ioErrno = errno;
+ }
+ }
+ if (str->ioDone == -1 || str->ioDone == str->ioTotal)
+ done(opaqueCtx, str);
+}
+
+/* Scoop up some bytes from the stream. (Called by evDispatch().) */
+static void
+readable(evContext opaqueCtx, void *uap, int fd, int evmask) {
+ evStream *str = uap;
+ int bytes;
+
+ UNUSED(evmask);
+
+ bytes = readv(fd, str->iovCur, str->iovCurCount);
+ if (bytes > 0) {
+ if ((str->flags & EV_STR_TIMEROK) != 0)
+ evTouchIdleTimer(opaqueCtx, str->timer);
+ consume(str, bytes);
+ } else {
+ if (bytes == 0)
+ str->ioDone = 0;
+ else {
+ if (errno != EINTR) {
+ str->ioDone = -1;
+ str->ioErrno = errno;
+ }
+ }
+ }
+ if (str->ioDone <= 0 || str->ioDone == str->ioTotal)
+ done(opaqueCtx, str);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/ev_timers.c b/usr/src/lib/libresolv2_joy/common/isc/ev_timers.c
new file mode 100644
index 0000000000..12ac2cebca
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/ev_timers.c
@@ -0,0 +1,499 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1995-1999 by Internet Software Consortium
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* ev_timers.c - implement timers for the eventlib
+ * vix 09sep95 [initial]
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: ev_timers.c,v 1.6 2005/04/27 04:56:36 sra Exp $";
+#endif
+
+/* Import. */
+
+#include "port_before.h"
+#include "fd_setsize.h"
+
+#include <errno.h>
+
+#include <isc/assertions.h>
+#include <isc/eventlib.h>
+#include "eventlib_p.h"
+
+#include "port_after.h"
+
+/* Constants. */
+
+#define MILLION 1000000
+#define BILLION 1000000000
+
+/* Forward. */
+
+static int due_sooner(void *, void *);
+static void set_index(void *, int);
+static void free_timer(void *, void *);
+static void print_timer(void *, void *);
+static void idle_timeout(evContext, void *, struct timespec, struct timespec);
+
+/* Private type. */
+
+typedef struct {
+ evTimerFunc func;
+ void * uap;
+ struct timespec lastTouched;
+ struct timespec max_idle;
+ evTimer * timer;
+} idle_timer;
+
+/* Public. */
+
+struct timespec
+evConsTime(time_t sec, long nsec) {
+ struct timespec x;
+
+ x.tv_sec = sec;
+ x.tv_nsec = nsec;
+ return (x);
+}
+
+struct timespec
+evAddTime(struct timespec addend1, struct timespec addend2) {
+ struct timespec x;
+
+ x.tv_sec = addend1.tv_sec + addend2.tv_sec;
+ x.tv_nsec = addend1.tv_nsec + addend2.tv_nsec;
+ if (x.tv_nsec >= BILLION) {
+ x.tv_sec++;
+ x.tv_nsec -= BILLION;
+ }
+ return (x);
+}
+
+struct timespec
+evSubTime(struct timespec minuend, struct timespec subtrahend) {
+ struct timespec x;
+
+ x.tv_sec = minuend.tv_sec - subtrahend.tv_sec;
+ if (minuend.tv_nsec >= subtrahend.tv_nsec)
+ x.tv_nsec = minuend.tv_nsec - subtrahend.tv_nsec;
+ else {
+ x.tv_nsec = BILLION - subtrahend.tv_nsec + minuend.tv_nsec;
+ x.tv_sec--;
+ }
+ return (x);
+}
+
+int
+evCmpTime(struct timespec a, struct timespec b) {
+ long x = a.tv_sec - b.tv_sec;
+
+ if (x == 0L)
+ x = a.tv_nsec - b.tv_nsec;
+ return (x < 0L ? (-1) : x > 0L ? (1) : (0));
+}
+
+struct timespec
+evNowTime() {
+ struct timeval now;
+#ifdef CLOCK_REALTIME
+ struct timespec tsnow;
+ int m = CLOCK_REALTIME;
+
+#ifdef CLOCK_MONOTONIC
+ if (__evOptMonoTime)
+ m = CLOCK_MONOTONIC;
+#endif
+ if (clock_gettime(m, &tsnow) == 0)
+ return (tsnow);
+#endif
+ if (gettimeofday(&now, NULL) < 0)
+ return (evConsTime(0, 0));
+ return (evTimeSpec(now));
+}
+
+struct timespec
+evUTCTime() {
+ struct timeval now;
+#ifdef CLOCK_REALTIME
+ struct timespec tsnow;
+ if (clock_gettime(CLOCK_REALTIME, &tsnow) == 0)
+ return (tsnow);
+#endif
+ if (gettimeofday(&now, NULL) < 0)
+ return (evConsTime(0, 0));
+ return (evTimeSpec(now));
+}
+
+struct timespec
+evLastEventTime(evContext opaqueCtx) {
+ evContext_p *ctx = opaqueCtx.opaque;
+
+ return (ctx->lastEventTime);
+}
+
+struct timespec
+evTimeSpec(struct timeval tv) {
+ struct timespec ts;
+
+ ts.tv_sec = tv.tv_sec;
+ ts.tv_nsec = tv.tv_usec * 1000;
+ return (ts);
+}
+
+struct timeval
+evTimeVal(struct timespec ts) {
+ struct timeval tv;
+
+ tv.tv_sec = ts.tv_sec;
+ tv.tv_usec = ts.tv_nsec / 1000;
+ return (tv);
+}
+
+int
+evSetTimer(evContext opaqueCtx,
+ evTimerFunc func,
+ void *uap,
+ struct timespec due,
+ struct timespec inter,
+ evTimerID *opaqueID
+) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evTimer *id;
+
+ evPrintf(ctx, 1,
+"evSetTimer(ctx %p, func %p, uap %p, due %ld.%09ld, inter %ld.%09ld)\n",
+ ctx, func, uap,
+ (long)due.tv_sec, due.tv_nsec,
+ (long)inter.tv_sec, inter.tv_nsec);
+
+#ifdef __hpux
+ /*
+ * tv_sec and tv_nsec are unsigned.
+ */
+ if (due.tv_nsec >= BILLION)
+ EV_ERR(EINVAL);
+
+ if (inter.tv_nsec >= BILLION)
+ EV_ERR(EINVAL);
+#else
+ if (due.tv_sec < 0 || due.tv_nsec < 0 || due.tv_nsec >= BILLION)
+ EV_ERR(EINVAL);
+
+ if (inter.tv_sec < 0 || inter.tv_nsec < 0 || inter.tv_nsec >= BILLION)
+ EV_ERR(EINVAL);
+#endif
+
+ /* due={0,0} is a magic cookie meaning "now." */
+ if (due.tv_sec == (time_t)0 && due.tv_nsec == 0L)
+ due = evNowTime();
+
+ /* Allocate and fill. */
+ OKNEW(id);
+ id->func = func;
+ id->uap = uap;
+ id->due = due;
+ id->inter = inter;
+
+ if (heap_insert(ctx->timers, id) < 0)
+ return (-1);
+
+ /* Remember the ID if the caller provided us a place for it. */
+ if (opaqueID)
+ opaqueID->opaque = id;
+
+ if (ctx->debug > 7) {
+ evPrintf(ctx, 7, "timers after evSetTimer:\n");
+ (void) heap_for_each(ctx->timers, print_timer, (void *)ctx);
+ }
+
+ return (0);
+}
+
+int
+evClearTimer(evContext opaqueCtx, evTimerID id) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evTimer *del = id.opaque;
+
+ if (ctx->cur != NULL &&
+ ctx->cur->type == Timer &&
+ ctx->cur->u.timer.this == del) {
+ evPrintf(ctx, 8, "deferring delete of timer (executing)\n");
+ /*
+ * Setting the interval to zero ensures that evDrop() will
+ * clean up the timer.
+ */
+ del->inter = evConsTime(0, 0);
+ return (0);
+ }
+
+ if (heap_element(ctx->timers, del->index) != del)
+ EV_ERR(ENOENT);
+
+ if (heap_delete(ctx->timers, del->index) < 0)
+ return (-1);
+ FREE(del);
+
+ if (ctx->debug > 7) {
+ evPrintf(ctx, 7, "timers after evClearTimer:\n");
+ (void) heap_for_each(ctx->timers, print_timer, (void *)ctx);
+ }
+
+ return (0);
+}
+
+int
+evConfigTimer(evContext opaqueCtx,
+ evTimerID id,
+ const char *param,
+ int value
+) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evTimer *timer = id.opaque;
+ int result=0;
+
+ UNUSED(value);
+
+ if (heap_element(ctx->timers, timer->index) != timer)
+ EV_ERR(ENOENT);
+
+ if (strcmp(param, "rate") == 0)
+ timer->mode |= EV_TMR_RATE;
+ else if (strcmp(param, "interval") == 0)
+ timer->mode &= ~EV_TMR_RATE;
+ else
+ EV_ERR(EINVAL);
+
+ return (result);
+}
+
+int
+evResetTimer(evContext opaqueCtx,
+ evTimerID id,
+ evTimerFunc func,
+ void *uap,
+ struct timespec due,
+ struct timespec inter
+) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evTimer *timer = id.opaque;
+ struct timespec old_due;
+ int result=0;
+
+ if (heap_element(ctx->timers, timer->index) != timer)
+ EV_ERR(ENOENT);
+
+#ifdef __hpux
+ /*
+ * tv_sec and tv_nsec are unsigned.
+ */
+ if (due.tv_nsec >= BILLION)
+ EV_ERR(EINVAL);
+
+ if (inter.tv_nsec >= BILLION)
+ EV_ERR(EINVAL);
+#else
+ if (due.tv_sec < 0 || due.tv_nsec < 0 || due.tv_nsec >= BILLION)
+ EV_ERR(EINVAL);
+
+ if (inter.tv_sec < 0 || inter.tv_nsec < 0 || inter.tv_nsec >= BILLION)
+ EV_ERR(EINVAL);
+#endif
+
+ old_due = timer->due;
+
+ timer->func = func;
+ timer->uap = uap;
+ timer->due = due;
+ timer->inter = inter;
+
+ switch (evCmpTime(due, old_due)) {
+ case -1:
+ result = heap_increased(ctx->timers, timer->index);
+ break;
+ case 0:
+ result = 0;
+ break;
+ case 1:
+ result = heap_decreased(ctx->timers, timer->index);
+ break;
+ }
+
+ if (ctx->debug > 7) {
+ evPrintf(ctx, 7, "timers after evResetTimer:\n");
+ (void) heap_for_each(ctx->timers, print_timer, (void *)ctx);
+ }
+
+ return (result);
+}
+
+int
+evSetIdleTimer(evContext opaqueCtx,
+ evTimerFunc func,
+ void *uap,
+ struct timespec max_idle,
+ evTimerID *opaqueID
+) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ idle_timer *tt;
+
+ /* Allocate and fill. */
+ OKNEW(tt);
+ tt->func = func;
+ tt->uap = uap;
+ tt->lastTouched = ctx->lastEventTime;
+ tt->max_idle = max_idle;
+
+ if (evSetTimer(opaqueCtx, idle_timeout, tt,
+ evAddTime(ctx->lastEventTime, max_idle),
+ max_idle, opaqueID) < 0) {
+ FREE(tt);
+ return (-1);
+ }
+
+ tt->timer = opaqueID->opaque;
+
+ return (0);
+}
+
+int
+evClearIdleTimer(evContext opaqueCtx, evTimerID id) {
+ evTimer *del = id.opaque;
+ idle_timer *tt = del->uap;
+
+ FREE(tt);
+ return (evClearTimer(opaqueCtx, id));
+}
+
+int
+evResetIdleTimer(evContext opaqueCtx,
+ evTimerID opaqueID,
+ evTimerFunc func,
+ void *uap,
+ struct timespec max_idle
+) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evTimer *timer = opaqueID.opaque;
+ idle_timer *tt = timer->uap;
+
+ tt->func = func;
+ tt->uap = uap;
+ tt->lastTouched = ctx->lastEventTime;
+ tt->max_idle = max_idle;
+
+ return (evResetTimer(opaqueCtx, opaqueID, idle_timeout, tt,
+ evAddTime(ctx->lastEventTime, max_idle),
+ max_idle));
+}
+
+int
+evTouchIdleTimer(evContext opaqueCtx, evTimerID id) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evTimer *t = id.opaque;
+ idle_timer *tt = t->uap;
+
+ tt->lastTouched = ctx->lastEventTime;
+
+ return (0);
+}
+
+/* Public to the rest of eventlib. */
+
+heap_context
+evCreateTimers(const evContext_p *ctx) {
+
+ UNUSED(ctx);
+
+ return (heap_new(due_sooner, set_index, 2048));
+}
+
+void
+evDestroyTimers(const evContext_p *ctx) {
+ (void) heap_for_each(ctx->timers, free_timer, NULL);
+ (void) heap_free(ctx->timers);
+}
+
+/* Private. */
+
+static int
+due_sooner(void *a, void *b) {
+ evTimer *a_timer, *b_timer;
+
+ a_timer = a;
+ b_timer = b;
+ return (evCmpTime(a_timer->due, b_timer->due) < 0);
+}
+
+static void
+set_index(void *what, int index) {
+ evTimer *timer;
+
+ timer = what;
+ timer->index = index;
+}
+
+static void
+free_timer(void *what, void *uap) {
+ evTimer *t = what;
+
+ UNUSED(uap);
+
+ FREE(t);
+}
+
+static void
+print_timer(void *what, void *uap) {
+ evTimer *cur = what;
+ evContext_p *ctx = uap;
+
+ cur = what;
+ evPrintf(ctx, 7,
+ " func %p, uap %p, due %ld.%09ld, inter %ld.%09ld\n",
+ cur->func, cur->uap,
+ (long)cur->due.tv_sec, cur->due.tv_nsec,
+ (long)cur->inter.tv_sec, cur->inter.tv_nsec);
+}
+
+static void
+idle_timeout(evContext opaqueCtx,
+ void *uap,
+ struct timespec due,
+ struct timespec inter
+) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ idle_timer *this = uap;
+ struct timespec idle;
+
+ UNUSED(due);
+ UNUSED(inter);
+
+ idle = evSubTime(ctx->lastEventTime, this->lastTouched);
+ if (evCmpTime(idle, this->max_idle) >= 0) {
+ (this->func)(opaqueCtx, this->uap, this->timer->due,
+ this->max_idle);
+ /*
+ * Setting the interval to zero will cause the timer to
+ * be cleaned up in evDrop().
+ */
+ this->timer->inter = evConsTime(0, 0);
+ FREE(this);
+ } else {
+ /* evDrop() will reschedule the timer. */
+ this->timer->inter = evSubTime(this->max_idle, idle);
+ }
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/ev_waits.c b/usr/src/lib/libresolv2_joy/common/isc/ev_waits.c
new file mode 100644
index 0000000000..99da1526c7
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/ev_waits.c
@@ -0,0 +1,247 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* ev_waits.c - implement deferred function calls for the eventlib
+ * vix 05dec95 [initial]
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: ev_waits.c,v 1.4 2005/04/27 04:56:36 sra Exp $";
+#endif
+
+#include "port_before.h"
+#include "fd_setsize.h"
+
+#include <errno.h>
+
+#include <isc/eventlib.h>
+#include <isc/assertions.h>
+#include "eventlib_p.h"
+
+#include "port_after.h"
+
+/* Forward. */
+
+static void print_waits(evContext_p *ctx);
+static evWaitList * evNewWaitList(evContext_p *);
+static void evFreeWaitList(evContext_p *, evWaitList *);
+static evWaitList * evGetWaitList(evContext_p *, const void *, int);
+
+
+/* Public. */
+
+/*%
+ * Enter a new wait function on the queue.
+ */
+int
+evWaitFor(evContext opaqueCtx, const void *tag,
+ evWaitFunc func, void *uap, evWaitID *id)
+{
+ evContext_p *ctx = opaqueCtx.opaque;
+ evWait *new;
+ evWaitList *wl = evGetWaitList(ctx, tag, 1);
+
+ OKNEW(new);
+ new->func = func;
+ new->uap = uap;
+ new->tag = tag;
+ new->next = NULL;
+ if (wl->last != NULL)
+ wl->last->next = new;
+ else
+ wl->first = new;
+ wl->last = new;
+ if (id != NULL)
+ id->opaque = new;
+ if (ctx->debug >= 9)
+ print_waits(ctx);
+ return (0);
+}
+
+/*%
+ * Mark runnable all waiting functions having a certain tag.
+ */
+int
+evDo(evContext opaqueCtx, const void *tag) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evWaitList *wl = evGetWaitList(ctx, tag, 0);
+ evWait *first;
+
+ if (!wl) {
+ errno = ENOENT;
+ return (-1);
+ }
+
+ first = wl->first;
+ INSIST(first != NULL);
+
+ if (ctx->waitDone.last != NULL)
+ ctx->waitDone.last->next = first;
+ else
+ ctx->waitDone.first = first;
+ ctx->waitDone.last = wl->last;
+ evFreeWaitList(ctx, wl);
+
+ return (0);
+}
+
+/*%
+ * Remove a waiting (or ready to run) function from the queue.
+ */
+int
+evUnwait(evContext opaqueCtx, evWaitID id) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evWait *this, *prev;
+ evWaitList *wl;
+ int found = 0;
+
+ this = id.opaque;
+ INSIST(this != NULL);
+ wl = evGetWaitList(ctx, this->tag, 0);
+ if (wl != NULL) {
+ for (prev = NULL, this = wl->first;
+ this != NULL;
+ prev = this, this = this->next)
+ if (this == (evWait *)id.opaque) {
+ found = 1;
+ if (prev != NULL)
+ prev->next = this->next;
+ else
+ wl->first = this->next;
+ if (wl->last == this)
+ wl->last = prev;
+ if (wl->first == NULL)
+ evFreeWaitList(ctx, wl);
+ break;
+ }
+ }
+
+ if (!found) {
+ /* Maybe it's done */
+ for (prev = NULL, this = ctx->waitDone.first;
+ this != NULL;
+ prev = this, this = this->next)
+ if (this == (evWait *)id.opaque) {
+ found = 1;
+ if (prev != NULL)
+ prev->next = this->next;
+ else
+ ctx->waitDone.first = this->next;
+ if (ctx->waitDone.last == this)
+ ctx->waitDone.last = prev;
+ break;
+ }
+ }
+
+ if (!found) {
+ errno = ENOENT;
+ return (-1);
+ }
+
+ FREE(this);
+
+ if (ctx->debug >= 9)
+ print_waits(ctx);
+
+ return (0);
+}
+
+int
+evDefer(evContext opaqueCtx, evWaitFunc func, void *uap) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evWait *new;
+
+ OKNEW(new);
+ new->func = func;
+ new->uap = uap;
+ new->tag = NULL;
+ new->next = NULL;
+ if (ctx->waitDone.last != NULL)
+ ctx->waitDone.last->next = new;
+ else
+ ctx->waitDone.first = new;
+ ctx->waitDone.last = new;
+ if (ctx->debug >= 9)
+ print_waits(ctx);
+ return (0);
+}
+
+/* Private. */
+
+static void
+print_waits(evContext_p *ctx) {
+ evWaitList *wl;
+ evWait *this;
+
+ evPrintf(ctx, 9, "wait waiting:\n");
+ for (wl = ctx->waitLists; wl != NULL; wl = wl->next) {
+ INSIST(wl->first != NULL);
+ evPrintf(ctx, 9, " tag %p:", wl->first->tag);
+ for (this = wl->first; this != NULL; this = this->next)
+ evPrintf(ctx, 9, " %p", this);
+ evPrintf(ctx, 9, "\n");
+ }
+ evPrintf(ctx, 9, "wait done:");
+ for (this = ctx->waitDone.first; this != NULL; this = this->next)
+ evPrintf(ctx, 9, " %p", this);
+ evPrintf(ctx, 9, "\n");
+}
+
+static evWaitList *
+evNewWaitList(evContext_p *ctx) {
+ evWaitList *new;
+
+ NEW(new);
+ if (new == NULL)
+ return (NULL);
+ new->first = new->last = NULL;
+ new->prev = NULL;
+ new->next = ctx->waitLists;
+ if (new->next != NULL)
+ new->next->prev = new;
+ ctx->waitLists = new;
+ return (new);
+}
+
+static void
+evFreeWaitList(evContext_p *ctx, evWaitList *this) {
+
+ INSIST(this != NULL);
+
+ if (this->prev != NULL)
+ this->prev->next = this->next;
+ else
+ ctx->waitLists = this->next;
+ if (this->next != NULL)
+ this->next->prev = this->prev;
+ FREE(this);
+}
+
+static evWaitList *
+evGetWaitList(evContext_p *ctx, const void *tag, int should_create) {
+ evWaitList *this;
+
+ for (this = ctx->waitLists; this != NULL; this = this->next) {
+ if (this->first != NULL && this->first->tag == tag)
+ break;
+ }
+ if (this == NULL && should_create)
+ this = evNewWaitList(ctx);
+ return (this);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/eventlib.c b/usr/src/lib/libresolv2_joy/common/isc/eventlib.c
new file mode 100644
index 0000000000..be4a7848b9
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/eventlib.c
@@ -0,0 +1,933 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1995-1999 by Internet Software Consortium
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* eventlib.c - implement glue for the eventlib
+ * vix 09sep95 [initial]
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: eventlib.c,v 1.10 2006/03/09 23:57:56 marka Exp $";
+#endif
+
+#include "port_before.h"
+#include "fd_setsize.h"
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/stat.h>
+#ifdef SOLARIS2
+#include <limits.h>
+#endif /* SOLARIS2 */
+
+#include <errno.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <isc/eventlib.h>
+#include <isc/assertions.h>
+#include "eventlib_p.h"
+
+#include "port_after.h"
+
+int __evOptMonoTime;
+
+#ifdef USE_POLL
+#define pselect Pselect
+#endif /* USE_POLL */
+
+/* Forward. */
+
+#if defined(NEED_PSELECT) || defined(USE_POLL)
+static int pselect(int, void *, void *, void *,
+ struct timespec *,
+ const sigset_t *);
+#endif
+
+int __evOptMonoTime;
+
+/* Public. */
+
+int
+evCreate(evContext *opaqueCtx) {
+ evContext_p *ctx;
+
+ /* Make sure the memory heap is initialized. */
+ if (meminit(0, 0) < 0 && errno != EEXIST)
+ return (-1);
+
+ OKNEW(ctx);
+
+ /* Global. */
+ ctx->cur = NULL;
+
+ /* Debugging. */
+ ctx->debug = 0;
+ ctx->output = NULL;
+
+ /* Connections. */
+ ctx->conns = NULL;
+ INIT_LIST(ctx->accepts);
+
+ /* Files. */
+ ctx->files = NULL;
+#ifdef USE_POLL
+ ctx->pollfds = NULL;
+ ctx->maxnfds = 0;
+ ctx->firstfd = 0;
+ emulMaskInit(ctx, rdLast, EV_READ, 1);
+ emulMaskInit(ctx, rdNext, EV_READ, 0);
+ emulMaskInit(ctx, wrLast, EV_WRITE, 1);
+ emulMaskInit(ctx, wrNext, EV_WRITE, 0);
+ emulMaskInit(ctx, exLast, EV_EXCEPT, 1);
+ emulMaskInit(ctx, exNext, EV_EXCEPT, 0);
+ emulMaskInit(ctx, nonblockBefore, EV_WASNONBLOCKING, 0);
+#endif /* USE_POLL */
+ FD_ZERO(&ctx->rdNext);
+ FD_ZERO(&ctx->wrNext);
+ FD_ZERO(&ctx->exNext);
+ FD_ZERO(&ctx->nonblockBefore);
+ ctx->fdMax = -1;
+ ctx->fdNext = NULL;
+ ctx->fdCount = 0; /*%< Invalidate {rd,wr,ex}Last. */
+#ifndef USE_POLL
+ ctx->highestFD = FD_SETSIZE - 1;
+ memset(ctx->fdTable, 0, sizeof ctx->fdTable);
+#else
+ ctx->highestFD = INT_MAX / sizeof(struct pollfd);
+ ctx->fdTable = NULL;
+#endif /* USE_POLL */
+#ifdef EVENTLIB_TIME_CHECKS
+ ctx->lastFdCount = 0;
+#endif
+
+ /* Streams. */
+ ctx->streams = NULL;
+ ctx->strDone = NULL;
+ ctx->strLast = NULL;
+
+ /* Timers. */
+ ctx->lastEventTime = evNowTime();
+#ifdef EVENTLIB_TIME_CHECKS
+ ctx->lastSelectTime = ctx->lastEventTime;
+#endif
+ ctx->timers = evCreateTimers(ctx);
+ if (ctx->timers == NULL)
+ return (-1);
+
+ /* Waits. */
+ ctx->waitLists = NULL;
+ ctx->waitDone.first = ctx->waitDone.last = NULL;
+ ctx->waitDone.prev = ctx->waitDone.next = NULL;
+
+ opaqueCtx->opaque = ctx;
+ return (0);
+}
+
+void
+evSetDebug(evContext opaqueCtx, int level, FILE *output) {
+ evContext_p *ctx = opaqueCtx.opaque;
+
+ ctx->debug = level;
+ ctx->output = output;
+}
+
+int
+evDestroy(evContext opaqueCtx) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ int revs = 424242; /*%< Doug Adams. */
+ evWaitList *this_wl, *next_wl;
+ evWait *this_wait, *next_wait;
+
+ /* Connections. */
+ while (revs-- > 0 && ctx->conns != NULL) {
+ evConnID id;
+
+ id.opaque = ctx->conns;
+ (void) evCancelConn(opaqueCtx, id);
+ }
+ INSIST(revs >= 0);
+
+ /* Streams. */
+ while (revs-- > 0 && ctx->streams != NULL) {
+ evStreamID id;
+
+ id.opaque = ctx->streams;
+ (void) evCancelRW(opaqueCtx, id);
+ }
+
+ /* Files. */
+ while (revs-- > 0 && ctx->files != NULL) {
+ evFileID id;
+
+ id.opaque = ctx->files;
+ (void) evDeselectFD(opaqueCtx, id);
+ }
+ INSIST(revs >= 0);
+
+ /* Timers. */
+ evDestroyTimers(ctx);
+
+ /* Waits. */
+ for (this_wl = ctx->waitLists;
+ revs-- > 0 && this_wl != NULL;
+ this_wl = next_wl) {
+ next_wl = this_wl->next;
+ for (this_wait = this_wl->first;
+ revs-- > 0 && this_wait != NULL;
+ this_wait = next_wait) {
+ next_wait = this_wait->next;
+ FREE(this_wait);
+ }
+ FREE(this_wl);
+ }
+ for (this_wait = ctx->waitDone.first;
+ revs-- > 0 && this_wait != NULL;
+ this_wait = next_wait) {
+ next_wait = this_wait->next;
+ FREE(this_wait);
+ }
+
+ FREE(ctx);
+ return (0);
+}
+
+int
+evGetNext(evContext opaqueCtx, evEvent *opaqueEv, int options) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ struct timespec nextTime;
+ evTimer *nextTimer;
+ evEvent_p *new;
+ int x, pselect_errno, timerPast;
+#ifdef EVENTLIB_TIME_CHECKS
+ struct timespec interval;
+#endif
+
+ /* Ensure that exactly one of EV_POLL or EV_WAIT was specified. */
+ x = ((options & EV_POLL) != 0) + ((options & EV_WAIT) != 0);
+ if (x != 1)
+ EV_ERR(EINVAL);
+
+ /* Get the time of day. We'll do this again after select() blocks. */
+ ctx->lastEventTime = evNowTime();
+
+ again:
+ /* Finished accept()'s do not require a select(). */
+ if (!EMPTY(ctx->accepts)) {
+ OKNEW(new);
+ new->type = Accept;
+ new->u.accept.this = HEAD(ctx->accepts);
+ UNLINK(ctx->accepts, HEAD(ctx->accepts), link);
+ opaqueEv->opaque = new;
+ return (0);
+ }
+
+ /* Stream IO does not require a select(). */
+ if (ctx->strDone != NULL) {
+ OKNEW(new);
+ new->type = Stream;
+ new->u.stream.this = ctx->strDone;
+ ctx->strDone = ctx->strDone->nextDone;
+ if (ctx->strDone == NULL)
+ ctx->strLast = NULL;
+ opaqueEv->opaque = new;
+ return (0);
+ }
+
+ /* Waits do not require a select(). */
+ if (ctx->waitDone.first != NULL) {
+ OKNEW(new);
+ new->type = Wait;
+ new->u.wait.this = ctx->waitDone.first;
+ ctx->waitDone.first = ctx->waitDone.first->next;
+ if (ctx->waitDone.first == NULL)
+ ctx->waitDone.last = NULL;
+ opaqueEv->opaque = new;
+ return (0);
+ }
+
+ /* Get the status and content of the next timer. */
+ if ((nextTimer = heap_element(ctx->timers, 1)) != NULL) {
+ nextTime = nextTimer->due;
+ timerPast = (evCmpTime(nextTime, ctx->lastEventTime) <= 0);
+ } else
+ timerPast = 0; /*%< Make gcc happy. */
+ evPrintf(ctx, 9, "evGetNext: fdCount %d\n", ctx->fdCount);
+ if (ctx->fdCount == 0) {
+ static const struct timespec NoTime = {0, 0L};
+ enum { JustPoll, Block, Timer } m;
+ struct timespec t, *tp;
+
+ /* Are there any events at all? */
+ if ((options & EV_WAIT) != 0 && !nextTimer && ctx->fdMax == -1)
+ EV_ERR(ENOENT);
+
+ /* Figure out what select()'s timeout parameter should be. */
+ if ((options & EV_POLL) != 0) {
+ m = JustPoll;
+ t = NoTime;
+ tp = &t;
+ } else if (nextTimer == NULL) {
+ m = Block;
+ /* ``t'' unused. */
+ tp = NULL;
+ } else if (timerPast) {
+ m = JustPoll;
+ t = NoTime;
+ tp = &t;
+ } else {
+ m = Timer;
+ /* ``t'' filled in later. */
+ tp = &t;
+ }
+#ifdef EVENTLIB_TIME_CHECKS
+ if (ctx->debug > 0) {
+ interval = evSubTime(ctx->lastEventTime,
+ ctx->lastSelectTime);
+ if (interval.tv_sec > 0 || interval.tv_nsec > 0)
+ evPrintf(ctx, 1,
+ "time between pselect() %u.%09u count %d\n",
+ interval.tv_sec, interval.tv_nsec,
+ ctx->lastFdCount);
+ }
+#endif
+ do {
+#ifndef USE_POLL
+ /* XXX need to copy only the bits we are using. */
+ ctx->rdLast = ctx->rdNext;
+ ctx->wrLast = ctx->wrNext;
+ ctx->exLast = ctx->exNext;
+#else
+ /*
+ * The pollfd structure uses separate fields for
+ * the input and output events (corresponding to
+ * the ??Next and ??Last fd sets), so there's no
+ * need to copy one to the other.
+ */
+#endif /* USE_POLL */
+ if (m == Timer) {
+ INSIST(tp == &t);
+ t = evSubTime(nextTime, ctx->lastEventTime);
+ }
+
+ /* XXX should predict system's earliness and adjust. */
+ x = pselect(ctx->fdMax+1,
+ &ctx->rdLast, &ctx->wrLast, &ctx->exLast,
+ tp, NULL);
+ pselect_errno = errno;
+
+#ifndef USE_POLL
+ evPrintf(ctx, 4, "select() returns %d (err: %s)\n",
+ x, (x == -1) ? strerror(errno) : "none");
+#else
+ evPrintf(ctx, 4, "poll() returns %d (err: %s)\n",
+ x, (x == -1) ? strerror(errno) : "none");
+#endif /* USE_POLL */
+ /* Anything but a poll can change the time. */
+ if (m != JustPoll)
+ ctx->lastEventTime = evNowTime();
+
+ /* Select() likes to finish about 10ms early. */
+ } while (x == 0 && m == Timer &&
+ evCmpTime(ctx->lastEventTime, nextTime) < 0);
+#ifdef EVENTLIB_TIME_CHECKS
+ ctx->lastSelectTime = ctx->lastEventTime;
+#endif
+ if (x < 0) {
+ if (pselect_errno == EINTR) {
+ if ((options & EV_NULL) != 0)
+ goto again;
+ OKNEW(new);
+ new->type = Null;
+ /* No data. */
+ opaqueEv->opaque = new;
+ return (0);
+ }
+ if (pselect_errno == EBADF) {
+ for (x = 0; x <= ctx->fdMax; x++) {
+ struct stat sb;
+
+ if (FD_ISSET(x, &ctx->rdNext) == 0 &&
+ FD_ISSET(x, &ctx->wrNext) == 0 &&
+ FD_ISSET(x, &ctx->exNext) == 0)
+ continue;
+ if (fstat(x, &sb) == -1 &&
+ errno == EBADF)
+ evPrintf(ctx, 1, "EBADF: %d\n",
+ x);
+ }
+ abort();
+ }
+ EV_ERR(pselect_errno);
+ }
+ if (x == 0 && (nextTimer == NULL || !timerPast) &&
+ (options & EV_POLL))
+ EV_ERR(EWOULDBLOCK);
+ ctx->fdCount = x;
+#ifdef EVENTLIB_TIME_CHECKS
+ ctx->lastFdCount = x;
+#endif
+ }
+ INSIST(nextTimer || ctx->fdCount);
+
+ /* Timers go first since we'd like them to be accurate. */
+ if (nextTimer && !timerPast) {
+ /* Has anything happened since we blocked? */
+ timerPast = (evCmpTime(nextTime, ctx->lastEventTime) <= 0);
+ }
+ if (nextTimer && timerPast) {
+ OKNEW(new);
+ new->type = Timer;
+ new->u.timer.this = nextTimer;
+ opaqueEv->opaque = new;
+ return (0);
+ }
+
+ /* No timers, so there should be a ready file descriptor. */
+ x = 0;
+ while (ctx->fdCount > 0) {
+ evFile *fid;
+ int fd, eventmask;
+
+ if (ctx->fdNext == NULL) {
+ if (++x == 2) {
+ /*
+ * Hitting the end twice means that the last
+ * select() found some FD's which have since
+ * been deselected.
+ *
+ * On some systems, the count returned by
+ * selects is the total number of bits in
+ * all masks that are set, and on others it's
+ * the number of fd's that have some bit set,
+ * and on others, it's just broken. We
+ * always assume that it's the number of
+ * bits set in all masks, because that's what
+ * the man page says it should do, and
+ * the worst that can happen is we do an
+ * extra select().
+ */
+ ctx->fdCount = 0;
+ break;
+ }
+ ctx->fdNext = ctx->files;
+ }
+ fid = ctx->fdNext;
+ ctx->fdNext = fid->next;
+
+ fd = fid->fd;
+ eventmask = 0;
+ if (FD_ISSET(fd, &ctx->rdLast))
+ eventmask |= EV_READ;
+ if (FD_ISSET(fd, &ctx->wrLast))
+ eventmask |= EV_WRITE;
+ if (FD_ISSET(fd, &ctx->exLast))
+ eventmask |= EV_EXCEPT;
+ eventmask &= fid->eventmask;
+ if (eventmask != 0) {
+ if ((eventmask & EV_READ) != 0) {
+ FD_CLR(fd, &ctx->rdLast);
+ ctx->fdCount--;
+ }
+ if ((eventmask & EV_WRITE) != 0) {
+ FD_CLR(fd, &ctx->wrLast);
+ ctx->fdCount--;
+ }
+ if ((eventmask & EV_EXCEPT) != 0) {
+ FD_CLR(fd, &ctx->exLast);
+ ctx->fdCount--;
+ }
+ OKNEW(new);
+ new->type = File;
+ new->u.file.this = fid;
+ new->u.file.eventmask = eventmask;
+ opaqueEv->opaque = new;
+ return (0);
+ }
+ }
+ if (ctx->fdCount < 0) {
+ /*
+ * select()'s count is off on a number of systems, and
+ * can result in fdCount < 0.
+ */
+ evPrintf(ctx, 4, "fdCount < 0 (%d)\n", ctx->fdCount);
+ ctx->fdCount = 0;
+ }
+
+ /* We get here if the caller deselect()'s an FD. Gag me with a goto. */
+ goto again;
+}
+
+int
+evDispatch(evContext opaqueCtx, evEvent opaqueEv) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evEvent_p *ev = opaqueEv.opaque;
+#ifdef EVENTLIB_TIME_CHECKS
+ void *func;
+ struct timespec start_time;
+ struct timespec interval;
+#endif
+
+#ifdef EVENTLIB_TIME_CHECKS
+ if (ctx->debug > 0)
+ start_time = evNowTime();
+#endif
+ ctx->cur = ev;
+ switch (ev->type) {
+ case Accept: {
+ evAccept *this = ev->u.accept.this;
+
+ evPrintf(ctx, 5,
+ "Dispatch.Accept: fd %d -> %d, func %p, uap %p\n",
+ this->conn->fd, this->fd,
+ this->conn->func, this->conn->uap);
+ errno = this->ioErrno;
+ (this->conn->func)(opaqueCtx, this->conn->uap, this->fd,
+ &this->la, this->lalen,
+ &this->ra, this->ralen);
+#ifdef EVENTLIB_TIME_CHECKS
+ func = this->conn->func;
+#endif
+ break;
+ }
+ case File: {
+ evFile *this = ev->u.file.this;
+ int eventmask = ev->u.file.eventmask;
+
+ evPrintf(ctx, 5,
+ "Dispatch.File: fd %d, mask 0x%x, func %p, uap %p\n",
+ this->fd, this->eventmask, this->func, this->uap);
+ (this->func)(opaqueCtx, this->uap, this->fd, eventmask);
+#ifdef EVENTLIB_TIME_CHECKS
+ func = this->func;
+#endif
+ break;
+ }
+ case Stream: {
+ evStream *this = ev->u.stream.this;
+
+ evPrintf(ctx, 5,
+ "Dispatch.Stream: fd %d, func %p, uap %p\n",
+ this->fd, this->func, this->uap);
+ errno = this->ioErrno;
+ (this->func)(opaqueCtx, this->uap, this->fd, this->ioDone);
+#ifdef EVENTLIB_TIME_CHECKS
+ func = this->func;
+#endif
+ break;
+ }
+ case Timer: {
+ evTimer *this = ev->u.timer.this;
+
+ evPrintf(ctx, 5, "Dispatch.Timer: func %p, uap %p\n",
+ this->func, this->uap);
+ (this->func)(opaqueCtx, this->uap, this->due, this->inter);
+#ifdef EVENTLIB_TIME_CHECKS
+ func = this->func;
+#endif
+ break;
+ }
+ case Wait: {
+ evWait *this = ev->u.wait.this;
+
+ evPrintf(ctx, 5,
+ "Dispatch.Wait: tag %p, func %p, uap %p\n",
+ this->tag, this->func, this->uap);
+ (this->func)(opaqueCtx, this->uap, this->tag);
+#ifdef EVENTLIB_TIME_CHECKS
+ func = this->func;
+#endif
+ break;
+ }
+ case Null: {
+ /* No work. */
+#ifdef EVENTLIB_TIME_CHECKS
+ func = NULL;
+#endif
+ break;
+ }
+ default: {
+ abort();
+ }
+ }
+#ifdef EVENTLIB_TIME_CHECKS
+ if (ctx->debug > 0) {
+ interval = evSubTime(evNowTime(), start_time);
+ /*
+ * Complain if it took longer than 50 milliseconds.
+ *
+ * We call getuid() to make an easy to find mark in a kernel
+ * trace.
+ */
+ if (interval.tv_sec > 0 || interval.tv_nsec > 50000000)
+ evPrintf(ctx, 1,
+ "dispatch interval %u.%09u uid %d type %d func %p\n",
+ interval.tv_sec, interval.tv_nsec,
+ getuid(), ev->type, func);
+ }
+#endif
+ ctx->cur = NULL;
+ evDrop(opaqueCtx, opaqueEv);
+ return (0);
+}
+
+void
+evDrop(evContext opaqueCtx, evEvent opaqueEv) {
+ evContext_p *ctx = opaqueCtx.opaque;
+ evEvent_p *ev = opaqueEv.opaque;
+
+ switch (ev->type) {
+ case Accept: {
+ FREE(ev->u.accept.this);
+ break;
+ }
+ case File: {
+ /* No work. */
+ break;
+ }
+ case Stream: {
+ evStreamID id;
+
+ id.opaque = ev->u.stream.this;
+ (void) evCancelRW(opaqueCtx, id);
+ break;
+ }
+ case Timer: {
+ evTimer *this = ev->u.timer.this;
+ evTimerID opaque;
+
+ /* Check to see whether the user func cleared the timer. */
+ if (heap_element(ctx->timers, this->index) != this) {
+ evPrintf(ctx, 5, "Dispatch.Timer: timer rm'd?\n");
+ break;
+ }
+ /*
+ * Timer is still there. Delete it if it has expired,
+ * otherwise set it according to its next interval.
+ */
+ if (this->inter.tv_sec == (time_t)0 &&
+ this->inter.tv_nsec == 0L) {
+ opaque.opaque = this;
+ (void) evClearTimer(opaqueCtx, opaque);
+ } else {
+ opaque.opaque = this;
+ (void) evResetTimer(opaqueCtx, opaque, this->func,
+ this->uap,
+ evAddTime((this->mode & EV_TMR_RATE) ?
+ this->due :
+ ctx->lastEventTime,
+ this->inter),
+ this->inter);
+ }
+ break;
+ }
+ case Wait: {
+ FREE(ev->u.wait.this);
+ break;
+ }
+ case Null: {
+ /* No work. */
+ break;
+ }
+ default: {
+ abort();
+ }
+ }
+ FREE(ev);
+}
+
+int
+evMainLoop(evContext opaqueCtx) {
+ evEvent event;
+ int x;
+
+ while ((x = evGetNext(opaqueCtx, &event, EV_WAIT)) == 0)
+ if ((x = evDispatch(opaqueCtx, event)) < 0)
+ break;
+ return (x);
+}
+
+int
+evHighestFD(evContext opaqueCtx) {
+ evContext_p *ctx = opaqueCtx.opaque;
+
+ return (ctx->highestFD);
+}
+
+void
+evPrintf(const evContext_p *ctx, int level, const char *fmt, ...) {
+ va_list ap;
+
+ va_start(ap, fmt);
+ if (ctx->output != NULL && ctx->debug >= level) {
+ vfprintf(ctx->output, fmt, ap);
+ fflush(ctx->output);
+ }
+ va_end(ap);
+}
+
+int
+evSetOption(evContext *opaqueCtx, const char *option, int value) {
+ /* evContext_p *ctx = opaqueCtx->opaque; */
+
+ UNUSED(opaqueCtx);
+ UNUSED(value);
+#ifndef CLOCK_MONOTONIC
+ UNUSED(option);
+#endif
+
+#ifdef CLOCK_MONOTONIC
+ if (strcmp(option, "monotime") == 0) {
+ if (opaqueCtx != NULL)
+ errno = EINVAL;
+ if (value == 0 || value == 1) {
+ __evOptMonoTime = value;
+ return (0);
+ } else {
+ errno = EINVAL;
+ return (-1);
+ }
+ }
+#endif
+ errno = ENOENT;
+ return (-1);
+}
+
+int
+evGetOption(evContext *opaqueCtx, const char *option, int *value) {
+ /* evContext_p *ctx = opaqueCtx->opaque; */
+
+ UNUSED(opaqueCtx);
+#ifndef CLOCK_MONOTONIC
+ UNUSED(value);
+ UNUSED(option);
+#endif
+
+#ifdef CLOCK_MONOTONIC
+ if (strcmp(option, "monotime") == 0) {
+ if (opaqueCtx != NULL)
+ errno = EINVAL;
+ *value = __evOptMonoTime;
+ return (0);
+ }
+#endif
+ errno = ENOENT;
+ return (-1);
+}
+
+#if defined(NEED_PSELECT) || defined(USE_POLL)
+/* XXX needs to move to the porting library. */
+static int
+pselect(int nfds, void *rfds, void *wfds, void *efds,
+ struct timespec *tsp,
+ const sigset_t *sigmask)
+{
+ struct timeval tv, *tvp;
+ sigset_t sigs;
+ int n;
+#ifdef USE_POLL
+ int polltimeout = INFTIM;
+ evContext_p *ctx;
+ struct pollfd *fds;
+ nfds_t pnfds;
+
+ UNUSED(nfds);
+#endif /* USE_POLL */
+
+ if (tsp) {
+ tvp = &tv;
+ tv = evTimeVal(*tsp);
+#ifdef USE_POLL
+ polltimeout = 1000 * tv.tv_sec + tv.tv_usec / 1000;
+#endif /* USE_POLL */
+ } else
+ tvp = NULL;
+ if (sigmask)
+ sigprocmask(SIG_SETMASK, sigmask, &sigs);
+#ifndef USE_POLL
+ n = select(nfds, rfds, wfds, efds, tvp);
+#else
+ /*
+ * rfds, wfds, and efds should all be from the same evContext_p,
+ * so any of them will do. If they're all NULL, the caller is
+ * presumably calling us to block.
+ */
+ if (rfds != NULL)
+ ctx = ((__evEmulMask *)rfds)->ctx;
+ else if (wfds != NULL)
+ ctx = ((__evEmulMask *)wfds)->ctx;
+ else if (efds != NULL)
+ ctx = ((__evEmulMask *)efds)->ctx;
+ else
+ ctx = NULL;
+ if (ctx != NULL && ctx->fdMax != -1) {
+ fds = &(ctx->pollfds[ctx->firstfd]);
+ pnfds = ctx->fdMax - ctx->firstfd + 1;
+ } else {
+ fds = NULL;
+ pnfds = 0;
+ }
+ n = poll(fds, pnfds, polltimeout);
+ if (n > 0) {
+ int i, e;
+
+ INSIST(ctx != NULL);
+ for (e = 0, i = ctx->firstfd; i <= ctx->fdMax; i++) {
+ if (ctx->pollfds[i].fd < 0)
+ continue;
+ if (FD_ISSET(i, &ctx->rdLast))
+ e++;
+ if (FD_ISSET(i, &ctx->wrLast))
+ e++;
+ if (FD_ISSET(i, &ctx->exLast))
+ e++;
+ }
+ n = e;
+ }
+#endif /* USE_POLL */
+ if (sigmask)
+ sigprocmask(SIG_SETMASK, &sigs, NULL);
+ if (tsp)
+ *tsp = evTimeSpec(tv);
+ return (n);
+}
+#endif
+
+#ifdef USE_POLL
+int
+evPollfdRealloc(evContext_p *ctx, int pollfd_chunk_size, int fd) {
+
+ int i, maxnfds;
+ void *pollfds, *fdTable;
+
+ if (fd < ctx->maxnfds)
+ return (0);
+
+ /* Don't allow ridiculously small values for pollfd_chunk_size */
+ if (pollfd_chunk_size < 20)
+ pollfd_chunk_size = 20;
+
+ maxnfds = (1 + (fd/pollfd_chunk_size)) * pollfd_chunk_size;
+
+ pollfds = realloc(ctx->pollfds, maxnfds * sizeof(*ctx->pollfds));
+ if (pollfds != NULL)
+ ctx->pollfds = pollfds;
+ fdTable = realloc(ctx->fdTable, maxnfds * sizeof(*ctx->fdTable));
+ if (fdTable != NULL)
+ ctx->fdTable = fdTable;
+
+ if (pollfds == NULL || fdTable == NULL) {
+ evPrintf(ctx, 2, "pollfd() realloc (%ld) failed\n",
+ (long)maxnfds*sizeof(struct pollfd));
+ return (-1);
+ }
+
+ for (i = ctx->maxnfds; i < maxnfds; i++) {
+ ctx->pollfds[i].fd = -1;
+ ctx->pollfds[i].events = 0;
+ ctx->fdTable[i] = 0;
+ }
+
+ ctx->maxnfds = maxnfds;
+
+ return (0);
+}
+
+/* Find the appropriate 'events' or 'revents' field in the pollfds array */
+short *
+__fd_eventfield(int fd, __evEmulMask *maskp) {
+
+ evContext_p *ctx = (evContext_p *)maskp->ctx;
+
+ if (!maskp->result || maskp->type == EV_WASNONBLOCKING)
+ return (&(ctx->pollfds[fd].events));
+ else
+ return (&(ctx->pollfds[fd].revents));
+}
+
+/* Translate to poll(2) event */
+short
+__poll_event(__evEmulMask *maskp) {
+
+ switch ((maskp)->type) {
+ case EV_READ:
+ return (POLLRDNORM);
+ case EV_WRITE:
+ return (POLLWRNORM);
+ case EV_EXCEPT:
+ return (POLLRDBAND | POLLPRI | POLLWRBAND);
+ case EV_WASNONBLOCKING:
+ return (POLLHUP);
+ default:
+ return (0);
+ }
+}
+
+/*
+ * Clear the events corresponding to the specified mask. If this leaves
+ * the events mask empty (apart from the POLLHUP bit), set the fd field
+ * to -1 so that poll(2) will ignore this fd.
+ */
+void
+__fd_clr(int fd, __evEmulMask *maskp) {
+
+ evContext_p *ctx = maskp->ctx;
+
+ *__fd_eventfield(fd, maskp) &= ~__poll_event(maskp);
+ if ((ctx->pollfds[fd].events & ~POLLHUP) == 0) {
+ ctx->pollfds[fd].fd = -1;
+ if (fd == ctx->fdMax)
+ while (ctx->fdMax > ctx->firstfd &&
+ ctx->pollfds[ctx->fdMax].fd < 0)
+ ctx->fdMax--;
+ if (fd == ctx->firstfd)
+ while (ctx->firstfd <= ctx->fdMax &&
+ ctx->pollfds[ctx->firstfd].fd < 0)
+ ctx->firstfd++;
+ /*
+ * Do we have a empty set of descriptors?
+ */
+ if (ctx->firstfd > ctx->fdMax) {
+ ctx->fdMax = -1;
+ ctx->firstfd = 0;
+ }
+ }
+}
+
+/*
+ * Set the events bit(s) corresponding to the specified mask. If the events
+ * field has any other bits than POLLHUP set, also set the fd field so that
+ * poll(2) will watch this fd.
+ */
+void
+__fd_set(int fd, __evEmulMask *maskp) {
+
+ evContext_p *ctx = maskp->ctx;
+
+ *__fd_eventfield(fd, maskp) |= __poll_event(maskp);
+ if ((ctx->pollfds[fd].events & ~POLLHUP) != 0) {
+ ctx->pollfds[fd].fd = fd;
+ if (fd < ctx->firstfd || ctx->fdMax == -1)
+ ctx->firstfd = fd;
+ if (fd > ctx->fdMax)
+ ctx->fdMax = fd;
+ }
+}
+#endif /* USE_POLL */
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/eventlib_p.h b/usr/src/lib/libresolv2_joy/common/isc/eventlib_p.h
new file mode 100644
index 0000000000..0a3614ab23
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/eventlib_p.h
@@ -0,0 +1,281 @@
+/*
+ * Copyright (c) 2005 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1995-1999 by Internet Software Consortium
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*! \file
+ * \brief private interfaces for eventlib
+ * \author vix 09sep95 [initial]
+ *
+ * $Id: eventlib_p.h,v 1.9 2006/03/09 23:57:56 marka Exp $
+ */
+
+#ifndef _EVENTLIB_P_H
+#define _EVENTLIB_P_H
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <sys/un.h>
+
+#define EVENTLIB_DEBUG 1
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <isc/heap.h>
+#include <isc/list.h>
+#include <isc/memcluster.h>
+
+#define EV_MASK_ALL (EV_READ | EV_WRITE | EV_EXCEPT)
+#define EV_ERR(e) return (errno = (e), -1)
+#define OK(x) if ((x) < 0) EV_ERR(errno); else (void)NULL
+#define OKFREE(x, y) if ((x) < 0) { FREE((y)); EV_ERR(errno); } \
+ else (void)NULL
+
+#define NEW(p) if (((p) = memget(sizeof *(p))) != NULL) \
+ FILL(p); \
+ else \
+ (void)NULL;
+#define OKNEW(p) if (!((p) = memget(sizeof *(p)))) { \
+ errno = ENOMEM; \
+ return (-1); \
+ } else \
+ FILL(p)
+#define FREE(p) memput((p), sizeof *(p))
+
+#if EVENTLIB_DEBUG
+#define FILL(p) memset((p), 0xF5, sizeof *(p))
+#else
+#define FILL(p)
+#endif
+
+#ifdef USE_POLL
+#ifdef HAVE_STROPTS_H
+#include <stropts.h>
+#endif
+#include <poll.h>
+#endif /* USE_POLL */
+
+typedef struct evConn {
+ evConnFunc func;
+ void * uap;
+ int fd;
+ int flags;
+#define EV_CONN_LISTEN 0x0001 /*%< Connection is a listener. */
+#define EV_CONN_SELECTED 0x0002 /*%< evSelectFD(conn->file). */
+#define EV_CONN_BLOCK 0x0004 /*%< Listener fd was blocking. */
+ evFileID file;
+ struct evConn * prev;
+ struct evConn * next;
+} evConn;
+
+typedef struct evAccept {
+ int fd;
+ union {
+ struct sockaddr sa;
+ struct sockaddr_in in;
+#ifndef NO_SOCKADDR_UN
+ struct sockaddr_un un;
+#endif
+ } la;
+ ISC_SOCKLEN_T lalen;
+ union {
+ struct sockaddr sa;
+ struct sockaddr_in in;
+#ifndef NO_SOCKADDR_UN
+ struct sockaddr_un un;
+#endif
+ } ra;
+ ISC_SOCKLEN_T ralen;
+ int ioErrno;
+ evConn * conn;
+ LINK(struct evAccept) link;
+} evAccept;
+
+typedef struct evFile {
+ evFileFunc func;
+ void * uap;
+ int fd;
+ int eventmask;
+ int preemptive;
+ struct evFile * prev;
+ struct evFile * next;
+ struct evFile * fdprev;
+ struct evFile * fdnext;
+} evFile;
+
+typedef struct evStream {
+ evStreamFunc func;
+ void * uap;
+ evFileID file;
+ evTimerID timer;
+ int flags;
+#define EV_STR_TIMEROK 0x0001 /*%< IFF timer valid. */
+ int fd;
+ struct iovec * iovOrig;
+ int iovOrigCount;
+ struct iovec * iovCur;
+ int iovCurCount;
+ int ioTotal;
+ int ioDone;
+ int ioErrno;
+ struct evStream *prevDone, *nextDone;
+ struct evStream *prev, *next;
+} evStream;
+
+typedef struct evTimer {
+ evTimerFunc func;
+ void * uap;
+ struct timespec due, inter;
+ int index;
+ int mode;
+#define EV_TMR_RATE 1
+} evTimer;
+
+typedef struct evWait {
+ evWaitFunc func;
+ void * uap;
+ const void * tag;
+ struct evWait * next;
+} evWait;
+
+typedef struct evWaitList {
+ evWait * first;
+ evWait * last;
+ struct evWaitList * prev;
+ struct evWaitList * next;
+} evWaitList;
+
+typedef struct evEvent_p {
+ enum { Accept, File, Stream, Timer, Wait, Free, Null } type;
+ union {
+ struct { evAccept *this; } accept;
+ struct { evFile *this; int eventmask; } file;
+ struct { evStream *this; } stream;
+ struct { evTimer *this; } timer;
+ struct { evWait *this; } wait;
+ struct { struct evEvent_p *next; } free;
+ struct { const void *placeholder; } null;
+ } u;
+} evEvent_p;
+
+#ifdef USE_POLL
+typedef struct {
+ void *ctx; /* pointer to the evContext_p */
+ uint32_t type; /* READ, WRITE, EXCEPT, nonblk */
+ uint32_t result; /* 1 => revents, 0 => events */
+} __evEmulMask;
+
+#define emulMaskInit(ctx, field, ev, lastnext) \
+ ctx->field.ctx = ctx; \
+ ctx->field.type = ev; \
+ ctx->field.result = lastnext;
+
+extern short *__fd_eventfield(int fd, __evEmulMask *maskp);
+extern short __poll_event(__evEmulMask *maskp);
+extern void __fd_clr(int fd, __evEmulMask *maskp);
+extern void __fd_set(int fd, __evEmulMask *maskp);
+
+#undef FD_ZERO
+#define FD_ZERO(maskp)
+
+#undef FD_SET
+#define FD_SET(fd, maskp) \
+ __fd_set(fd, maskp)
+
+#undef FD_CLR
+#define FD_CLR(fd, maskp) \
+ __fd_clr(fd, maskp)
+
+#undef FD_ISSET
+#define FD_ISSET(fd, maskp) \
+ ((*__fd_eventfield(fd, maskp) & __poll_event(maskp)) != 0)
+
+#endif /* USE_POLL */
+
+typedef struct {
+ /* Global. */
+ const evEvent_p *cur;
+ /* Debugging. */
+ int debug;
+ FILE *output;
+ /* Connections. */
+ evConn *conns;
+ LIST(evAccept) accepts;
+ /* Files. */
+ evFile *files, *fdNext;
+#ifndef USE_POLL
+ fd_set rdLast, rdNext;
+ fd_set wrLast, wrNext;
+ fd_set exLast, exNext;
+ fd_set nonblockBefore;
+ int fdMax, fdCount, highestFD;
+ evFile *fdTable[FD_SETSIZE];
+#else
+ struct pollfd *pollfds; /* Allocated as needed */
+ evFile **fdTable; /* Ditto */
+ int maxnfds; /* # elements in above */
+ int firstfd; /* First active fd */
+ int fdMax; /* Last active fd */
+ int fdCount; /* # fd:s with I/O */
+ int highestFD; /* max fd allowed by OS */
+ __evEmulMask rdLast, rdNext;
+ __evEmulMask wrLast, wrNext;
+ __evEmulMask exLast, exNext;
+ __evEmulMask nonblockBefore;
+#endif /* USE_POLL */
+#ifdef EVENTLIB_TIME_CHECKS
+ struct timespec lastSelectTime;
+ int lastFdCount;
+#endif
+ /* Streams. */
+ evStream *streams;
+ evStream *strDone, *strLast;
+ /* Timers. */
+ struct timespec lastEventTime;
+ heap_context timers;
+ /* Waits. */
+ evWaitList *waitLists;
+ evWaitList waitDone;
+} evContext_p;
+
+/* eventlib.c */
+#define evPrintf __evPrintf
+void evPrintf(const evContext_p *ctx, int level, const char *fmt, ...)
+ ISC_FORMAT_PRINTF(3, 4);
+
+#ifdef USE_POLL
+extern int evPollfdRealloc(evContext_p *ctx, int pollfd_chunk_size, int fd);
+#endif /* USE_POLL */
+
+/* ev_timers.c */
+#define evCreateTimers __evCreateTimers
+heap_context evCreateTimers(const evContext_p *);
+#define evDestroyTimers __evDestroyTimers
+void evDestroyTimers(const evContext_p *);
+
+/* ev_waits.c */
+#define evFreeWait __evFreeWait
+evWait *evFreeWait(evContext_p *ctx, evWait *old);
+
+/* Global options */
+extern int __evOptMonoTime;
+
+#endif /*_EVENTLIB_P_H*/
diff --git a/usr/src/lib/libresolv2_joy/common/isc/heap.c b/usr/src/lib/libresolv2_joy/common/isc/heap.c
new file mode 100644
index 0000000000..3d22b6fc71
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/heap.c
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1997,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*%
+ * Heap implementation of priority queues adapted from the following:
+ *
+ * _Introduction to Algorithms_, Cormen, Leiserson, and Rivest,
+ * MIT Press / McGraw Hill, 1990, ISBN 0-262-03141-8, chapter 7.
+ *
+ * _Algorithms_, Second Edition, Sedgewick, Addison-Wesley, 1988,
+ * ISBN 0-201-06673-4, chapter 11.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: heap.c,v 1.4 2006/03/09 23:57:56 marka Exp $";
+#endif /* not lint */
+
+#include "port_before.h"
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include "port_after.h"
+
+#include <isc/heap.h>
+
+/*%
+ * Note: to make heap_parent and heap_left easy to compute, the first
+ * element of the heap array is not used; i.e. heap subscripts are 1-based,
+ * not 0-based.
+ */
+#define heap_parent(i) ((i) >> 1)
+#define heap_left(i) ((i) << 1)
+
+#define ARRAY_SIZE_INCREMENT 512
+
+heap_context
+heap_new(heap_higher_priority_func higher_priority, heap_index_func index,
+ int array_size_increment) {
+ heap_context ctx;
+
+ if (higher_priority == NULL)
+ return (NULL);
+
+ ctx = (heap_context)malloc(sizeof (struct heap_context));
+ if (ctx == NULL)
+ return (NULL);
+
+ ctx->array_size = 0;
+ if (array_size_increment == 0)
+ ctx->array_size_increment = ARRAY_SIZE_INCREMENT;
+ else
+ ctx->array_size_increment = array_size_increment;
+ ctx->heap_size = 0;
+ ctx->heap = NULL;
+ ctx->higher_priority = higher_priority;
+ ctx->index = index;
+ return (ctx);
+}
+
+int
+heap_free(heap_context ctx) {
+ if (ctx == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if (ctx->heap != NULL)
+ free(ctx->heap);
+ free(ctx);
+
+ return (0);
+}
+
+static int
+heap_resize(heap_context ctx) {
+ void **new_heap;
+
+ ctx->array_size += ctx->array_size_increment;
+ new_heap = (void **)realloc(ctx->heap,
+ (ctx->array_size) * (sizeof (void *)));
+ if (new_heap == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+ ctx->heap = new_heap;
+ return (0);
+}
+
+static void
+float_up(heap_context ctx, int i, void *elt) {
+ int p;
+
+ for ( p = heap_parent(i);
+ i > 1 && ctx->higher_priority(elt, ctx->heap[p]);
+ i = p, p = heap_parent(i) ) {
+ ctx->heap[i] = ctx->heap[p];
+ if (ctx->index != NULL)
+ (ctx->index)(ctx->heap[i], i);
+ }
+ ctx->heap[i] = elt;
+ if (ctx->index != NULL)
+ (ctx->index)(ctx->heap[i], i);
+}
+
+static void
+sink_down(heap_context ctx, int i, void *elt) {
+ int j, size, half_size;
+
+ size = ctx->heap_size;
+ half_size = size / 2;
+ while (i <= half_size) {
+ /* find smallest of the (at most) two children */
+ j = heap_left(i);
+ if (j < size && ctx->higher_priority(ctx->heap[j+1],
+ ctx->heap[j]))
+ j++;
+ if (ctx->higher_priority(elt, ctx->heap[j]))
+ break;
+ ctx->heap[i] = ctx->heap[j];
+ if (ctx->index != NULL)
+ (ctx->index)(ctx->heap[i], i);
+ i = j;
+ }
+ ctx->heap[i] = elt;
+ if (ctx->index != NULL)
+ (ctx->index)(ctx->heap[i], i);
+}
+
+int
+heap_insert(heap_context ctx, void *elt) {
+ int i;
+
+ if (ctx == NULL || elt == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ i = ++ctx->heap_size;
+ if (ctx->heap_size >= ctx->array_size && heap_resize(ctx) < 0)
+ return (-1);
+
+ float_up(ctx, i, elt);
+
+ return (0);
+}
+
+int
+heap_delete(heap_context ctx, int i) {
+ void *elt;
+ int less;
+
+ if (ctx == NULL || i < 1 || i > ctx->heap_size) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ if (i == ctx->heap_size) {
+ ctx->heap_size--;
+ } else {
+ elt = ctx->heap[ctx->heap_size--];
+ less = ctx->higher_priority(elt, ctx->heap[i]);
+ ctx->heap[i] = elt;
+ if (less)
+ float_up(ctx, i, ctx->heap[i]);
+ else
+ sink_down(ctx, i, ctx->heap[i]);
+ }
+
+ return (0);
+}
+
+int
+heap_increased(heap_context ctx, int i) {
+ if (ctx == NULL || i < 1 || i > ctx->heap_size) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ float_up(ctx, i, ctx->heap[i]);
+
+ return (0);
+}
+
+int
+heap_decreased(heap_context ctx, int i) {
+ if (ctx == NULL || i < 1 || i > ctx->heap_size) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ sink_down(ctx, i, ctx->heap[i]);
+
+ return (0);
+}
+
+void *
+heap_element(heap_context ctx, int i) {
+ if (ctx == NULL || i < 1 || i > ctx->heap_size) {
+ errno = EINVAL;
+ return (NULL);
+ }
+
+ return (ctx->heap[i]);
+}
+
+int
+heap_for_each(heap_context ctx, heap_for_each_func action, void *uap) {
+ int i;
+
+ if (ctx == NULL || action == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ for (i = 1; i <= ctx->heap_size; i++)
+ (action)(ctx->heap[i], uap);
+ return (0);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/hex.c b/usr/src/lib/libresolv2_joy/common/isc/hex.c
new file mode 100644
index 0000000000..e43be4f3b5
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/hex.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 2001 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <port_before.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+#include <isc/misc.h>
+#include <port_after.h>
+
+static const char hex[17] = "0123456789abcdef";
+
+int
+isc_gethexstring(unsigned char *buf, size_t len, int count, FILE *fp,
+ int *multiline)
+{
+ int c, n;
+ unsigned char x;
+ char *s;
+ int result = count;
+
+ x = 0; /*%< silence compiler */
+ n = 0;
+ while (count > 0) {
+ c = fgetc(fp);
+
+ if ((c == EOF) ||
+ (c == '\n' && !*multiline) ||
+ (c == '(' && *multiline) ||
+ (c == ')' && !*multiline))
+ goto formerr;
+ /* comment */
+ if (c == ';') {
+ do {
+ c = fgetc(fp);
+ } while (c != EOF && c != '\n');
+ if (c == '\n' && *multiline)
+ continue;
+ goto formerr;
+ }
+ /* white space */
+ if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
+ continue;
+ /* multiline */
+ if ('(' == c || c == ')') {
+ *multiline = (c == '(' /*)*/);
+ continue;
+ }
+ if ((s = strchr(hex, tolower(c))) == NULL)
+ goto formerr;
+ x = (x<<4) | (s - hex);
+ if (++n == 2) {
+ if (len > 0U) {
+ *buf++ = x;
+ len--;
+ } else
+ result = -1;
+ count--;
+ n = 0;
+ }
+ }
+ return (result);
+
+ formerr:
+ if (c == '\n')
+ ungetc(c, fp);
+ return (-1);
+}
+
+void
+isc_puthexstring(FILE *fp, const unsigned char *buf, size_t buflen,
+ size_t len1, size_t len2, const char *sep)
+{
+ size_t i = 0;
+
+ if (len1 < 4U)
+ len1 = 4;
+ if (len2 < 4U)
+ len2 = 4;
+ while (buflen > 0U) {
+ fputc(hex[(buf[0]>>4)&0xf], fp);
+ fputc(hex[buf[0]&0xf], fp);
+ i += 2;
+ buflen--;
+ buf++;
+ if (i >= len1 && sep != NULL) {
+ fputs(sep, fp);
+ i = 0;
+ len1 = len2;
+ }
+ }
+}
+
+void
+isc_tohex(const unsigned char *buf, size_t buflen, char *t) {
+ while (buflen > 0U) {
+ *t++ = hex[(buf[0]>>4)&0xf];
+ *t++ = hex[buf[0]&0xf];
+ buf++;
+ buflen--;
+ }
+ *t = '\0';
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/logging.c b/usr/src/lib/libresolv2_joy/common/isc/logging.c
new file mode 100644
index 0000000000..8c2af2b9e3
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/logging.c
@@ -0,0 +1,716 @@
+/*
+ * Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 1996-1999, 2001, 2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: logging.c,v 1.9 2008/11/14 02:36:51 marka Exp $";
+#endif /* not lint */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/stat.h>
+
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <syslog.h>
+#include <errno.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <isc/assertions.h>
+#include <isc/logging.h>
+#include <isc/memcluster.h>
+#include <isc/misc.h>
+
+#include "port_after.h"
+
+#include "logging_p.h"
+
+static const int syslog_priority[] = { LOG_DEBUG, LOG_INFO, LOG_NOTICE,
+ LOG_WARNING, LOG_ERR, LOG_CRIT };
+
+static const char *months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
+
+static const char *level_text[] = {
+ "info: ", "notice: ", "warning: ", "error: ", "critical: "
+};
+
+static void
+version_rename(log_channel chan) {
+ unsigned int ver;
+ char old_name[PATH_MAX+1];
+ char new_name[PATH_MAX+1];
+
+ ver = chan->out.file.versions;
+ if (ver < 1)
+ return;
+ if (ver > LOG_MAX_VERSIONS)
+ ver = LOG_MAX_VERSIONS;
+ /*
+ * Need to have room for '.nn' (XXX assumes LOG_MAX_VERSIONS < 100)
+ */
+ if (strlen(chan->out.file.name) > (size_t)(PATH_MAX-3))
+ return;
+ for (ver--; ver > 0; ver--) {
+ sprintf(old_name, "%s.%d", chan->out.file.name, ver-1);
+ sprintf(new_name, "%s.%d", chan->out.file.name, ver);
+ (void)isc_movefile(old_name, new_name);
+ }
+ sprintf(new_name, "%s.0", chan->out.file.name);
+ (void)isc_movefile(chan->out.file.name, new_name);
+}
+
+FILE *
+log_open_stream(log_channel chan) {
+ FILE *stream;
+ int fd, flags;
+ struct stat sb;
+ int regular;
+
+ if (chan == NULL || chan->type != log_file) {
+ errno = EINVAL;
+ return (NULL);
+ }
+
+ /*
+ * Don't open already open streams
+ */
+ if (chan->out.file.stream != NULL)
+ return (chan->out.file.stream);
+
+ if (stat(chan->out.file.name, &sb) < 0) {
+ if (errno != ENOENT) {
+ syslog(LOG_ERR,
+ "log_open_stream: stat of %s failed: %s",
+ chan->out.file.name, strerror(errno));
+ chan->flags |= LOG_CHANNEL_BROKEN;
+ return (NULL);
+ }
+ regular = 1;
+ } else
+ regular = (sb.st_mode & S_IFREG);
+
+ if (chan->out.file.versions) {
+ if (!regular) {
+ syslog(LOG_ERR,
+ "log_open_stream: want versions but %s isn't a regular file",
+ chan->out.file.name);
+ chan->flags |= LOG_CHANNEL_BROKEN;
+ errno = EINVAL;
+ return (NULL);
+ }
+ }
+
+ flags = O_WRONLY|O_CREAT|O_APPEND;
+
+ if ((chan->flags & LOG_TRUNCATE) != 0) {
+ if (regular) {
+ (void)unlink(chan->out.file.name);
+ flags |= O_EXCL;
+ } else {
+ syslog(LOG_ERR,
+ "log_open_stream: want truncation but %s isn't a regular file",
+ chan->out.file.name);
+ chan->flags |= LOG_CHANNEL_BROKEN;
+ errno = EINVAL;
+ return (NULL);
+ }
+ }
+
+ fd = open(chan->out.file.name, flags,
+ S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH);
+ if (fd < 0) {
+ syslog(LOG_ERR, "log_open_stream: open(%s) failed: %s",
+ chan->out.file.name, strerror(errno));
+ chan->flags |= LOG_CHANNEL_BROKEN;
+ return (NULL);
+ }
+ stream = fdopen(fd, "a");
+ if (stream == NULL) {
+ syslog(LOG_ERR, "log_open_stream: fdopen() failed");
+ chan->flags |= LOG_CHANNEL_BROKEN;
+ return (NULL);
+ }
+ (void) fchown(fd, chan->out.file.owner, chan->out.file.group);
+
+ chan->out.file.stream = stream;
+ return (stream);
+}
+
+int
+log_close_stream(log_channel chan) {
+ FILE *stream;
+
+ if (chan == NULL || chan->type != log_file) {
+ errno = EINVAL;
+ return (0);
+ }
+ stream = chan->out.file.stream;
+ chan->out.file.stream = NULL;
+ if (stream != NULL && fclose(stream) == EOF)
+ return (-1);
+ return (0);
+}
+
+void
+log_close_debug_channels(log_context lc) {
+ log_channel_list lcl;
+ int i;
+
+ for (i = 0; i < lc->num_categories; i++)
+ for (lcl = lc->categories[i]; lcl != NULL; lcl = lcl->next)
+ if (lcl->channel->type == log_file &&
+ lcl->channel->out.file.stream != NULL &&
+ lcl->channel->flags & LOG_REQUIRE_DEBUG)
+ (void)log_close_stream(lcl->channel);
+}
+
+FILE *
+log_get_stream(log_channel chan) {
+ if (chan == NULL || chan->type != log_file) {
+ errno = EINVAL;
+ return (NULL);
+ }
+ return (chan->out.file.stream);
+}
+
+char *
+log_get_filename(log_channel chan) {
+ if (chan == NULL || chan->type != log_file) {
+ errno = EINVAL;
+ return (NULL);
+ }
+ return (chan->out.file.name);
+}
+
+int
+log_check_channel(log_context lc, int level, log_channel chan) {
+ int debugging, chan_level;
+
+ REQUIRE(lc != NULL);
+
+ debugging = ((lc->flags & LOG_OPTION_DEBUG) != 0);
+
+ /*
+ * If not debugging, short circuit debugging messages very early.
+ */
+ if (level > 0 && !debugging)
+ return (0);
+
+ if ((chan->flags & (LOG_CHANNEL_BROKEN|LOG_CHANNEL_OFF)) != 0)
+ return (0);
+
+ /* Some channels only log when debugging is on. */
+ if ((chan->flags & LOG_REQUIRE_DEBUG) && !debugging)
+ return (0);
+
+ /* Some channels use the global level. */
+ if ((chan->flags & LOG_USE_CONTEXT_LEVEL) != 0) {
+ chan_level = lc->level;
+ } else
+ chan_level = chan->level;
+
+ if (level > chan_level)
+ return (0);
+
+ return (1);
+}
+
+int
+log_check(log_context lc, int category, int level) {
+ log_channel_list lcl;
+ int debugging;
+
+ REQUIRE(lc != NULL);
+
+ debugging = ((lc->flags & LOG_OPTION_DEBUG) != 0);
+
+ /*
+ * If not debugging, short circuit debugging messages very early.
+ */
+ if (level > 0 && !debugging)
+ return (0);
+
+ if (category < 0 || category > lc->num_categories)
+ category = 0; /*%< use default */
+ lcl = lc->categories[category];
+ if (lcl == NULL) {
+ category = 0;
+ lcl = lc->categories[0];
+ }
+
+ for ( /* nothing */; lcl != NULL; lcl = lcl->next) {
+ if (log_check_channel(lc, level, lcl->channel))
+ return (1);
+ }
+ return (0);
+}
+
+void
+log_vwrite(log_context lc, int category, int level, const char *format,
+ va_list args) {
+ log_channel_list lcl;
+ int pri, debugging, did_vsprintf = 0;
+ int original_category;
+ FILE *stream;
+ log_channel chan;
+ struct timeval tv;
+ struct tm *local_tm;
+#ifdef HAVE_TIME_R
+ struct tm tm_tmp;
+#endif
+ time_t tt;
+ const char *category_name;
+ const char *level_str;
+ char time_buf[256];
+ char level_buf[256];
+
+ REQUIRE(lc != NULL);
+
+ debugging = (lc->flags & LOG_OPTION_DEBUG);
+
+ /*
+ * If not debugging, short circuit debugging messages very early.
+ */
+ if (level > 0 && !debugging)
+ return;
+
+ if (category < 0 || category > lc->num_categories)
+ category = 0; /*%< use default */
+ original_category = category;
+ lcl = lc->categories[category];
+ if (lcl == NULL) {
+ category = 0;
+ lcl = lc->categories[0];
+ }
+
+ /*
+ * Get the current time and format it.
+ */
+ time_buf[0]='\0';
+ if (gettimeofday(&tv, NULL) < 0) {
+ syslog(LOG_INFO, "gettimeofday failed in log_vwrite()");
+ } else {
+ tt = tv.tv_sec;
+#ifdef HAVE_TIME_R
+ local_tm = localtime_r(&tt, &tm_tmp);
+#else
+ local_tm = localtime(&tt);
+#endif
+ if (local_tm != NULL) {
+ sprintf(time_buf, "%02d-%s-%4d %02d:%02d:%02d.%03ld ",
+ local_tm->tm_mday, months[local_tm->tm_mon],
+ local_tm->tm_year+1900, local_tm->tm_hour,
+ local_tm->tm_min, local_tm->tm_sec,
+ (long)tv.tv_usec/1000);
+ }
+ }
+
+ /*
+ * Make a string representation of the current category and level
+ */
+
+ if (lc->category_names != NULL &&
+ lc->category_names[original_category] != NULL)
+ category_name = lc->category_names[original_category];
+ else
+ category_name = "";
+
+ if (level >= log_critical) {
+ if (level >= 0) {
+ sprintf(level_buf, "debug %d: ", level);
+ level_str = level_buf;
+ } else
+ level_str = level_text[-level-1];
+ } else {
+ sprintf(level_buf, "level %d: ", level);
+ level_str = level_buf;
+ }
+
+ /*
+ * Write the message to channels.
+ */
+ for ( /* nothing */; lcl != NULL; lcl = lcl->next) {
+ chan = lcl->channel;
+
+ if (!log_check_channel(lc, level, chan))
+ continue;
+
+ if (!did_vsprintf) {
+ (void)vsprintf(lc->buffer, format, args);
+ if (strlen(lc->buffer) > (size_t)LOG_BUFFER_SIZE) {
+ syslog(LOG_CRIT,
+ "memory overrun in log_vwrite()");
+ exit(1);
+ }
+ did_vsprintf = 1;
+ }
+
+ switch (chan->type) {
+ case log_syslog:
+ if (level >= log_critical)
+ pri = (level >= 0) ? 0 : -level;
+ else
+ pri = -log_critical;
+ syslog(chan->out.facility|syslog_priority[pri],
+ "%s%s%s%s",
+ (chan->flags & LOG_TIMESTAMP) ? time_buf : "",
+ (chan->flags & LOG_PRINT_CATEGORY) ?
+ category_name : "",
+ (chan->flags & LOG_PRINT_LEVEL) ?
+ level_str : "",
+ lc->buffer);
+ break;
+ case log_file:
+ stream = chan->out.file.stream;
+ if (stream == NULL) {
+ stream = log_open_stream(chan);
+ if (stream == NULL)
+ break;
+ }
+ if (chan->out.file.max_size != ULONG_MAX) {
+ long pos;
+
+ pos = ftell(stream);
+ if (pos >= 0 &&
+ (unsigned long)pos >
+ chan->out.file.max_size) {
+ /*
+ * try to roll over the log files,
+ * ignoring all all return codes
+ * except the open (we don't want
+ * to write any more anyway)
+ */
+ log_close_stream(chan);
+ version_rename(chan);
+ stream = log_open_stream(chan);
+ if (stream == NULL)
+ break;
+ }
+ }
+ fprintf(stream, "%s%s%s%s\n",
+ (chan->flags & LOG_TIMESTAMP) ? time_buf : "",
+ (chan->flags & LOG_PRINT_CATEGORY) ?
+ category_name : "",
+ (chan->flags & LOG_PRINT_LEVEL) ?
+ level_str : "",
+ lc->buffer);
+ fflush(stream);
+ break;
+ case log_null:
+ break;
+ default:
+ syslog(LOG_ERR,
+ "unknown channel type in log_vwrite()");
+ }
+ }
+}
+
+void
+log_write(log_context lc, int category, int level, const char *format, ...) {
+ va_list args;
+
+ va_start(args, format);
+ log_vwrite(lc, category, level, format, args);
+ va_end(args);
+}
+
+/*%
+ * Functions to create, set, or destroy contexts
+ */
+
+int
+log_new_context(int num_categories, char **category_names, log_context *lc) {
+ log_context nlc;
+
+ nlc = memget(sizeof (struct log_context));
+ if (nlc == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+ nlc->num_categories = num_categories;
+ nlc->category_names = category_names;
+ nlc->categories = memget(num_categories * sizeof (log_channel_list));
+ if (nlc->categories == NULL) {
+ memput(nlc, sizeof (struct log_context));
+ errno = ENOMEM;
+ return (-1);
+ }
+ memset(nlc->categories, '\0',
+ num_categories * sizeof (log_channel_list));
+ nlc->flags = 0U;
+ nlc->level = 0;
+ *lc = nlc;
+ return (0);
+}
+
+void
+log_free_context(log_context lc) {
+ log_channel_list lcl, lcl_next;
+ log_channel chan;
+ int i;
+
+ REQUIRE(lc != NULL);
+
+ for (i = 0; i < lc->num_categories; i++)
+ for (lcl = lc->categories[i]; lcl != NULL; lcl = lcl_next) {
+ lcl_next = lcl->next;
+ chan = lcl->channel;
+ (void)log_free_channel(chan);
+ memput(lcl, sizeof (struct log_channel_list));
+ }
+ memput(lc->categories,
+ lc->num_categories * sizeof (log_channel_list));
+ memput(lc, sizeof (struct log_context));
+}
+
+int
+log_add_channel(log_context lc, int category, log_channel chan) {
+ log_channel_list lcl;
+
+ if (lc == NULL || category < 0 || category >= lc->num_categories) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ lcl = memget(sizeof (struct log_channel_list));
+ if (lcl == NULL) {
+ errno = ENOMEM;
+ return(-1);
+ }
+ lcl->channel = chan;
+ lcl->next = lc->categories[category];
+ lc->categories[category] = lcl;
+ chan->references++;
+ return (0);
+}
+
+int
+log_remove_channel(log_context lc, int category, log_channel chan) {
+ log_channel_list lcl, prev_lcl, next_lcl;
+ int found = 0;
+
+ if (lc == NULL || category < 0 || category >= lc->num_categories) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ for (prev_lcl = NULL, lcl = lc->categories[category];
+ lcl != NULL;
+ lcl = next_lcl) {
+ next_lcl = lcl->next;
+ if (lcl->channel == chan) {
+ log_free_channel(chan);
+ if (prev_lcl != NULL)
+ prev_lcl->next = next_lcl;
+ else
+ lc->categories[category] = next_lcl;
+ memput(lcl, sizeof (struct log_channel_list));
+ /*
+ * We just set found instead of returning because
+ * the channel might be on the list more than once.
+ */
+ found = 1;
+ } else
+ prev_lcl = lcl;
+ }
+ if (!found) {
+ errno = ENOENT;
+ return (-1);
+ }
+ return (0);
+}
+
+int
+log_option(log_context lc, int option, int value) {
+ if (lc == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+ switch (option) {
+ case LOG_OPTION_DEBUG:
+ if (value)
+ lc->flags |= option;
+ else
+ lc->flags &= ~option;
+ break;
+ case LOG_OPTION_LEVEL:
+ lc->level = value;
+ break;
+ default:
+ errno = EINVAL;
+ return (-1);
+ }
+ return (0);
+}
+
+int
+log_category_is_active(log_context lc, int category) {
+ if (lc == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+ if (category >= 0 && category < lc->num_categories &&
+ lc->categories[category] != NULL)
+ return (1);
+ return (0);
+}
+
+log_channel
+log_new_syslog_channel(unsigned int flags, int level, int facility) {
+ log_channel chan;
+
+ chan = memget(sizeof (struct log_channel));
+ if (chan == NULL) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ chan->type = log_syslog;
+ chan->flags = flags;
+ chan->level = level;
+ chan->out.facility = facility;
+ chan->references = 0;
+ return (chan);
+}
+
+log_channel
+log_new_file_channel(unsigned int flags, int level,
+ const char *name, FILE *stream, unsigned int versions,
+ unsigned long max_size) {
+ log_channel chan;
+
+ chan = memget(sizeof (struct log_channel));
+ if (chan == NULL) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ chan->type = log_file;
+ chan->flags = flags;
+ chan->level = level;
+ if (name != NULL) {
+ size_t len;
+
+ len = strlen(name);
+ /*
+ * Quantize length to a multiple of 256. There's space for the
+ * NUL, since if len is a multiple of 256, the size chosen will
+ * be the next multiple.
+ */
+ chan->out.file.name_size = ((len / 256) + 1) * 256;
+ chan->out.file.name = memget(chan->out.file.name_size);
+ if (chan->out.file.name == NULL) {
+ memput(chan, sizeof (struct log_channel));
+ errno = ENOMEM;
+ return (NULL);
+ }
+ /* This is safe. */
+ strcpy(chan->out.file.name, name);
+ } else {
+ chan->out.file.name_size = 0;
+ chan->out.file.name = NULL;
+ }
+ chan->out.file.stream = stream;
+ chan->out.file.versions = versions;
+ chan->out.file.max_size = max_size;
+ chan->out.file.owner = getuid();
+ chan->out.file.group = getgid();
+ chan->references = 0;
+ return (chan);
+}
+
+int
+log_set_file_owner(log_channel chan, uid_t owner, gid_t group) {
+ if (chan->type != log_file) {
+ errno = EBADF;
+ return (-1);
+ }
+ chan->out.file.owner = owner;
+ chan->out.file.group = group;
+ return (0);
+}
+
+log_channel
+log_new_null_channel() {
+ log_channel chan;
+
+ chan = memget(sizeof (struct log_channel));
+ if (chan == NULL) {
+ errno = ENOMEM;
+ return (NULL);
+ }
+ chan->type = log_null;
+ chan->flags = LOG_CHANNEL_OFF;
+ chan->level = log_info;
+ chan->references = 0;
+ return (chan);
+}
+
+int
+log_inc_references(log_channel chan) {
+ if (chan == NULL) {
+ errno = EINVAL;
+ return (-1);
+ }
+ chan->references++;
+ return (0);
+}
+
+int
+log_dec_references(log_channel chan) {
+ if (chan == NULL || chan->references <= 0) {
+ errno = EINVAL;
+ return (-1);
+ }
+ chan->references--;
+ return (0);
+}
+
+log_channel_type
+log_get_channel_type(log_channel chan) {
+ REQUIRE(chan != NULL);
+
+ return (chan->type);
+}
+
+int
+log_free_channel(log_channel chan) {
+ if (chan == NULL || chan->references <= 0) {
+ errno = EINVAL;
+ return (-1);
+ }
+ chan->references--;
+ if (chan->references == 0) {
+ if (chan->type == log_file) {
+ if ((chan->flags & LOG_CLOSE_STREAM) &&
+ chan->out.file.stream != NULL)
+ (void)fclose(chan->out.file.stream);
+ if (chan->out.file.name != NULL)
+ memput(chan->out.file.name,
+ chan->out.file.name_size);
+ }
+ memput(chan, sizeof (struct log_channel));
+ }
+ return (0);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/logging_p.h b/usr/src/lib/libresolv2_joy/common/isc/logging_p.h
new file mode 100644
index 0000000000..5e6314f190
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/logging_p.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef LOGGING_P_H
+#define LOGGING_P_H
+
+typedef struct log_file_desc {
+ char *name;
+ size_t name_size;
+ FILE *stream;
+ unsigned int versions;
+ unsigned long max_size;
+ uid_t owner;
+ gid_t group;
+} log_file_desc;
+
+typedef union log_output {
+ int facility;
+ log_file_desc file;
+} log_output;
+
+struct log_channel {
+ int level; /*%< don't log messages > level */
+ log_channel_type type;
+ log_output out;
+ unsigned int flags;
+ int references;
+};
+
+typedef struct log_channel_list {
+ log_channel channel;
+ struct log_channel_list *next;
+} *log_channel_list;
+
+#define LOG_BUFFER_SIZE 20480
+
+struct log_context {
+ int num_categories;
+ char **category_names;
+ log_channel_list *categories;
+ int flags;
+ int level;
+ char buffer[LOG_BUFFER_SIZE];
+};
+
+#endif /* !LOGGING_P_H */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/memcluster.c b/usr/src/lib/libresolv2_joy/common/isc/memcluster.c
new file mode 100644
index 0000000000..515793fd6a
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/memcluster.c
@@ -0,0 +1,588 @@
+/*
+ * Copyright (c) 2005 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1997,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+
+/* When this symbol is defined allocations via memget are made slightly
+ bigger and some debugging info stuck before and after the region given
+ back to the caller. */
+/* #define DEBUGGING_MEMCLUSTER */
+#define MEMCLUSTER_ATEND
+
+
+#if !defined(LINT) && !defined(CODECENTER)
+static const char rcsid[] = "$Id: memcluster.c,v 1.11 2006/08/30 23:34:38 marka Exp $";
+#endif /* not lint */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <isc/memcluster.h>
+#include <isc/assertions.h>
+
+#include "port_after.h"
+
+#ifdef MEMCLUSTER_RECORD
+#ifndef DEBUGGING_MEMCLUSTER
+#define DEBUGGING_MEMCLUSTER
+#endif
+#endif
+
+#define DEF_MAX_SIZE 1100
+#define DEF_MEM_TARGET 4096
+
+typedef u_int32_t fence_t;
+
+typedef struct {
+ void * next;
+#if defined(DEBUGGING_MEMCLUSTER)
+#if defined(MEMCLUSTER_RECORD)
+ const char * file;
+ int line;
+#endif
+ size_t size;
+ fence_t fencepost;
+#endif
+} memcluster_element;
+
+#define SMALL_SIZE_LIMIT sizeof(memcluster_element)
+#define P_SIZE sizeof(void *)
+#define FRONT_FENCEPOST 0xfebafeba
+#define BACK_FENCEPOST 0xabefabef
+#define FENCEPOST_SIZE 4
+
+#ifndef MEMCLUSTER_LITTLE_MALLOC
+#define MEMCLUSTER_BIG_MALLOC 1
+#define NUM_BASIC_BLOCKS 64
+#endif
+
+struct stats {
+ u_long gets;
+ u_long totalgets;
+ u_long blocks;
+ u_long freefrags;
+};
+
+#ifdef DO_PTHREADS
+#include <pthread.h>
+static pthread_mutex_t memlock = PTHREAD_MUTEX_INITIALIZER;
+#define MEMLOCK (void)pthread_mutex_lock(&memlock)
+#define MEMUNLOCK (void)pthread_mutex_unlock(&memlock)
+#else
+/*
+ * Catch bad lock usage in non threaded build.
+ */
+static unsigned int memlock = 0;
+#define MEMLOCK do { INSIST(memlock == 0); memlock = 1; } while (0)
+#define MEMUNLOCK do { INSIST(memlock == 1); memlock = 0; } while (0)
+#endif /* DO_PTHEADS */
+
+/* Private data. */
+
+static size_t max_size;
+static size_t mem_target;
+#ifndef MEMCLUSTER_BIG_MALLOC
+static size_t mem_target_half;
+static size_t mem_target_fudge;
+#endif
+static memcluster_element ** freelists;
+#ifdef MEMCLUSTER_RECORD
+static memcluster_element ** activelists;
+#endif
+#ifdef MEMCLUSTER_BIG_MALLOC
+static memcluster_element * basic_blocks;
+#endif
+static struct stats * stats;
+
+/* Forward. */
+
+static size_t quantize(size_t);
+#if defined(DEBUGGING_MEMCLUSTER)
+static void check(unsigned char *, int, size_t);
+#endif
+
+/* Public. */
+
+int
+meminit(size_t init_max_size, size_t target_size) {
+
+#if defined(DEBUGGING_MEMCLUSTER)
+ INSIST(sizeof(fence_t) == FENCEPOST_SIZE);
+#endif
+ if (freelists != NULL) {
+ errno = EEXIST;
+ return (-1);
+ }
+ if (init_max_size == 0U)
+ max_size = DEF_MAX_SIZE;
+ else
+ max_size = init_max_size;
+ if (target_size == 0U)
+ mem_target = DEF_MEM_TARGET;
+ else
+ mem_target = target_size;
+#ifndef MEMCLUSTER_BIG_MALLOC
+ mem_target_half = mem_target / 2;
+ mem_target_fudge = mem_target + mem_target / 4;
+#endif
+ freelists = malloc(max_size * sizeof (memcluster_element *));
+ stats = malloc((max_size+1) * sizeof (struct stats));
+ if (freelists == NULL || stats == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+ memset(freelists, 0,
+ max_size * sizeof (memcluster_element *));
+ memset(stats, 0, (max_size + 1) * sizeof (struct stats));
+#ifdef MEMCLUSTER_RECORD
+ activelists = malloc((max_size + 1) * sizeof (memcluster_element *));
+ if (activelists == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+ memset(activelists, 0,
+ (max_size + 1) * sizeof (memcluster_element *));
+#endif
+#ifdef MEMCLUSTER_BIG_MALLOC
+ basic_blocks = NULL;
+#endif
+ return (0);
+}
+
+void *
+__memget(size_t size) {
+ return (__memget_record(size, NULL, 0));
+}
+
+void *
+__memget_record(size_t size, const char *file, int line) {
+ size_t new_size = quantize(size);
+#if defined(DEBUGGING_MEMCLUSTER)
+ memcluster_element *e;
+ char *p;
+ fence_t fp = BACK_FENCEPOST;
+#endif
+ void *ret;
+
+ MEMLOCK;
+
+#if !defined(MEMCLUSTER_RECORD)
+ UNUSED(file);
+ UNUSED(line);
+#endif
+ if (freelists == NULL) {
+ if (meminit(0, 0) == -1) {
+ MEMUNLOCK;
+ return (NULL);
+ }
+ }
+ if (size == 0U) {
+ MEMUNLOCK;
+ errno = EINVAL;
+ return (NULL);
+ }
+ if (size >= max_size || new_size >= max_size) {
+ /* memget() was called on something beyond our upper limit. */
+ stats[max_size].gets++;
+ stats[max_size].totalgets++;
+#if defined(DEBUGGING_MEMCLUSTER)
+ e = malloc(new_size);
+ if (e == NULL) {
+ MEMUNLOCK;
+ errno = ENOMEM;
+ return (NULL);
+ }
+ e->next = NULL;
+ e->size = size;
+#ifdef MEMCLUSTER_RECORD
+ e->file = file;
+ e->line = line;
+ e->next = activelists[max_size];
+ activelists[max_size] = e;
+#endif
+ MEMUNLOCK;
+ e->fencepost = FRONT_FENCEPOST;
+ p = (char *)e + sizeof *e + size;
+ memcpy(p, &fp, sizeof fp);
+ return ((char *)e + sizeof *e);
+#else
+ MEMUNLOCK;
+ return (malloc(size));
+#endif
+ }
+
+ /*
+ * If there are no blocks in the free list for this size, get a chunk
+ * of memory and then break it up into "new_size"-sized blocks, adding
+ * them to the free list.
+ */
+ if (freelists[new_size] == NULL) {
+ int i, frags;
+ size_t total_size;
+ void *new;
+ char *curr, *next;
+
+#ifdef MEMCLUSTER_BIG_MALLOC
+ if (basic_blocks == NULL) {
+ new = malloc(NUM_BASIC_BLOCKS * mem_target);
+ if (new == NULL) {
+ MEMUNLOCK;
+ errno = ENOMEM;
+ return (NULL);
+ }
+ curr = new;
+ next = curr + mem_target;
+ for (i = 0; i < (NUM_BASIC_BLOCKS - 1); i++) {
+ ((memcluster_element *)curr)->next = next;
+ curr = next;
+ next += mem_target;
+ }
+ /*
+ * curr is now pointing at the last block in the
+ * array.
+ */
+ ((memcluster_element *)curr)->next = NULL;
+ basic_blocks = new;
+ }
+ total_size = mem_target;
+ new = basic_blocks;
+ basic_blocks = basic_blocks->next;
+#else
+ if (new_size > mem_target_half)
+ total_size = mem_target_fudge;
+ else
+ total_size = mem_target;
+ new = malloc(total_size);
+ if (new == NULL) {
+ MEMUNLOCK;
+ errno = ENOMEM;
+ return (NULL);
+ }
+#endif
+ frags = total_size / new_size;
+ stats[new_size].blocks++;
+ stats[new_size].freefrags += frags;
+ /* Set up a linked-list of blocks of size "new_size". */
+ curr = new;
+ next = curr + new_size;
+ for (i = 0; i < (frags - 1); i++) {
+#if defined (DEBUGGING_MEMCLUSTER)
+ memset(curr, 0xa5, new_size);
+#endif
+ ((memcluster_element *)curr)->next = next;
+ curr = next;
+ next += new_size;
+ }
+ /* curr is now pointing at the last block in the array. */
+#if defined (DEBUGGING_MEMCLUSTER)
+ memset(curr, 0xa5, new_size);
+#endif
+ ((memcluster_element *)curr)->next = freelists[new_size];
+ freelists[new_size] = new;
+ }
+
+ /* The free list uses the "rounded-up" size "new_size". */
+#if defined (DEBUGGING_MEMCLUSTER)
+ e = freelists[new_size];
+ ret = (char *)e + sizeof *e;
+ /*
+ * Check to see if this buffer has been written to while on free list.
+ */
+ check(ret, 0xa5, new_size - sizeof *e);
+ /*
+ * Mark memory we are returning.
+ */
+ memset(ret, 0xe5, size);
+#else
+ ret = freelists[new_size];
+#endif
+ freelists[new_size] = freelists[new_size]->next;
+#if defined(DEBUGGING_MEMCLUSTER)
+ e->next = NULL;
+ e->size = size;
+ e->fencepost = FRONT_FENCEPOST;
+#ifdef MEMCLUSTER_RECORD
+ e->file = file;
+ e->line = line;
+ e->next = activelists[size];
+ activelists[size] = e;
+#endif
+ p = (char *)e + sizeof *e + size;
+ memcpy(p, &fp, sizeof fp);
+#endif
+
+ /*
+ * The stats[] uses the _actual_ "size" requested by the
+ * caller, with the caveat (in the code above) that "size" >= the
+ * max. size (max_size) ends up getting recorded as a call to
+ * max_size.
+ */
+ stats[size].gets++;
+ stats[size].totalgets++;
+ stats[new_size].freefrags--;
+ MEMUNLOCK;
+#if defined(DEBUGGING_MEMCLUSTER)
+ return ((char *)e + sizeof *e);
+#else
+ return (ret);
+#endif
+}
+
+/*%
+ * This is a call from an external caller,
+ * so we want to count this as a user "put".
+ */
+void
+__memput(void *mem, size_t size) {
+ __memput_record(mem, size, NULL, 0);
+}
+
+void
+__memput_record(void *mem, size_t size, const char *file, int line) {
+ size_t new_size = quantize(size);
+#if defined (DEBUGGING_MEMCLUSTER)
+ memcluster_element *e;
+ memcluster_element *el;
+#ifdef MEMCLUSTER_RECORD
+ memcluster_element *prev;
+#endif
+ fence_t fp;
+ char *p;
+#endif
+
+ MEMLOCK;
+
+#if !defined (MEMCLUSTER_RECORD)
+ UNUSED(file);
+ UNUSED(line);
+#endif
+
+ REQUIRE(freelists != NULL);
+
+ if (size == 0U) {
+ MEMUNLOCK;
+ errno = EINVAL;
+ return;
+ }
+
+#if defined (DEBUGGING_MEMCLUSTER)
+ e = (memcluster_element *) ((char *)mem - sizeof *e);
+ INSIST(e->fencepost == FRONT_FENCEPOST);
+ INSIST(e->size == size);
+ p = (char *)e + sizeof *e + size;
+ memcpy(&fp, p, sizeof fp);
+ INSIST(fp == BACK_FENCEPOST);
+ INSIST(((u_long)mem % 4) == 0);
+#ifdef MEMCLUSTER_RECORD
+ prev = NULL;
+ if (size == max_size || new_size >= max_size)
+ el = activelists[max_size];
+ else
+ el = activelists[size];
+ while (el != NULL && el != e) {
+ prev = el;
+ el = el->next;
+ }
+ INSIST(el != NULL); /*%< double free */
+ if (prev == NULL) {
+ if (size == max_size || new_size >= max_size)
+ activelists[max_size] = el->next;
+ else
+ activelists[size] = el->next;
+ } else
+ prev->next = el->next;
+#endif
+#endif
+
+ if (size == max_size || new_size >= max_size) {
+ /* memput() called on something beyond our upper limit */
+#if defined(DEBUGGING_MEMCLUSTER)
+ free(e);
+#else
+ free(mem);
+#endif
+
+ INSIST(stats[max_size].gets != 0U);
+ stats[max_size].gets--;
+ MEMUNLOCK;
+ return;
+ }
+
+ /* The free list uses the "rounded-up" size "new_size": */
+#if defined(DEBUGGING_MEMCLUSTER)
+ memset(mem, 0xa5, new_size - sizeof *e); /*%< catch write after free */
+ e->size = 0; /*%< catch double memput() */
+#ifdef MEMCLUSTER_RECORD
+ e->file = file;
+ e->line = line;
+#endif
+#ifdef MEMCLUSTER_ATEND
+ e->next = NULL;
+ el = freelists[new_size];
+ while (el != NULL && el->next != NULL)
+ el = el->next;
+ if (el)
+ el->next = e;
+ else
+ freelists[new_size] = e;
+#else
+ e->next = freelists[new_size];
+ freelists[new_size] = (void *)e;
+#endif
+#else
+ ((memcluster_element *)mem)->next = freelists[new_size];
+ freelists[new_size] = (memcluster_element *)mem;
+#endif
+
+ /*
+ * The stats[] uses the _actual_ "size" requested by the
+ * caller, with the caveat (in the code above) that "size" >= the
+ * max. size (max_size) ends up getting recorded as a call to
+ * max_size.
+ */
+ INSIST(stats[size].gets != 0U);
+ stats[size].gets--;
+ stats[new_size].freefrags++;
+ MEMUNLOCK;
+}
+
+void *
+__memget_debug(size_t size, const char *file, int line) {
+ void *ptr;
+ ptr = __memget_record(size, file, line);
+ fprintf(stderr, "%s:%d: memget(%lu) -> %p\n", file, line,
+ (u_long)size, ptr);
+ return (ptr);
+}
+
+void
+__memput_debug(void *ptr, size_t size, const char *file, int line) {
+ fprintf(stderr, "%s:%d: memput(%p, %lu)\n", file, line, ptr,
+ (u_long)size);
+ __memput_record(ptr, size, file, line);
+}
+
+/*%
+ * Print the stats[] on the stream "out" with suitable formatting.
+ */
+void
+memstats(FILE *out) {
+ size_t i;
+#ifdef MEMCLUSTER_RECORD
+ memcluster_element *e;
+#endif
+
+ MEMLOCK;
+
+ if (freelists == NULL) {
+ MEMUNLOCK;
+ return;
+ }
+ for (i = 1; i <= max_size; i++) {
+ const struct stats *s = &stats[i];
+
+ if (s->totalgets == 0U && s->gets == 0U)
+ continue;
+ fprintf(out, "%s%5lu: %11lu gets, %11lu rem",
+ (i == max_size) ? ">=" : " ",
+ (unsigned long)i, s->totalgets, s->gets);
+ if (s->blocks != 0U)
+ fprintf(out, " (%lu bl, %lu ff)",
+ s->blocks, s->freefrags);
+ fputc('\n', out);
+ }
+#ifdef MEMCLUSTER_RECORD
+ fprintf(out, "Active Memory:\n");
+ for (i = 1; i <= max_size; i++) {
+ if ((e = activelists[i]) != NULL)
+ while (e != NULL) {
+ fprintf(out, "%s:%d %p:%lu\n",
+ e->file != NULL ? e->file :
+ "<UNKNOWN>", e->line,
+ (char *)e + sizeof *e,
+ (u_long)e->size);
+ e = e->next;
+ }
+ }
+#endif
+ MEMUNLOCK;
+}
+
+int
+memactive(void) {
+ size_t i;
+
+ if (stats == NULL)
+ return (0);
+ for (i = 1; i <= max_size; i++)
+ if (stats[i].gets != 0U)
+ return (1);
+ return (0);
+}
+
+/* Private. */
+
+/*%
+ * Round up size to a multiple of sizeof(void *). This guarantees that a
+ * block is at least sizeof void *, and that we won't violate alignment
+ * restrictions, both of which are needed to make lists of blocks.
+ */
+static size_t
+quantize(size_t size) {
+ int remainder;
+ /*
+ * If there is no remainder for the integer division of
+ *
+ * (rightsize/P_SIZE)
+ *
+ * then we already have a good size; if not, then we need
+ * to round up the result in order to get a size big
+ * enough to satisfy the request _and_ aligned on P_SIZE boundaries.
+ */
+ remainder = size % P_SIZE;
+ if (remainder != 0)
+ size += P_SIZE - remainder;
+#if defined(DEBUGGING_MEMCLUSTER)
+ return (size + SMALL_SIZE_LIMIT + sizeof (int));
+#else
+ return (size);
+#endif
+}
+
+#if defined(DEBUGGING_MEMCLUSTER)
+static void
+check(unsigned char *a, int value, size_t len) {
+ size_t i;
+ for (i = 0; i < len; i++)
+ INSIST(a[i] == value);
+}
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/movefile.c b/usr/src/lib/libresolv2_joy/common/isc/movefile.c
new file mode 100644
index 0000000000..0ffc7047e2
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/movefile.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 2000 by Internet Software Consortium, Inc.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+
+#include <port_before.h>
+#include <stdio.h>
+#include <isc/misc.h>
+#include <port_after.h>
+#ifndef HAVE_MOVEFILE
+/*
+ * rename() is lame (can't overwrite an existing file) on some systems.
+ * use movefile() instead, and let lame OS ports do what they need to.
+ */
+
+int
+isc_movefile(const char *oldname, const char *newname) {
+ return (rename(oldname, newname));
+}
+#else
+ static int os_port_has_isc_movefile = 1;
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/isc/tree.c b/usr/src/lib/libresolv2_joy/common/isc/tree.c
new file mode 100644
index 0000000000..8ba675fbe8
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/isc/tree.c
@@ -0,0 +1,534 @@
+#ifndef LINT
+static const char rcsid[] = "$Id: tree.c,v 1.4 2005/04/27 04:56:39 sra Exp $";
+#endif
+
+/*%
+ * tree - balanced binary tree library
+ *
+ * vix 05apr94 [removed vixie.h dependencies; cleaned up formatting, names]
+ * vix 22jan93 [revisited; uses RCS, ANSI, POSIX; has bug fixes]
+ * vix 23jun86 [added delete uar to add for replaced nodes]
+ * vix 20jun86 [added tree_delete per wirth a+ds (mod2 v.) p. 224]
+ * vix 06feb86 [added tree_mung()]
+ * vix 02feb86 [added tree balancing from wirth "a+ds=p" p. 220-221]
+ * vix 14dec85 [written]
+ */
+
+/*%
+ * This program text was created by Paul Vixie using examples from the book:
+ * "Algorithms & Data Structures," Niklaus Wirth, Prentice-Hall, 1986, ISBN
+ * 0-13-022005-1. Any errors in the conversion from Modula-2 to C are Paul
+ * Vixie's.
+ */
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*#define DEBUG "tree"*/
+
+#include "port_before.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "port_after.h"
+
+#include <isc/memcluster.h>
+#include <isc/tree.h>
+
+#ifdef DEBUG
+static int debugDepth = 0;
+static char *debugFuncs[256];
+# define ENTER(proc) { \
+ debugFuncs[debugDepth] = proc; \
+ fprintf(stderr, "ENTER(%d:%s.%s)\n", \
+ debugDepth, DEBUG, \
+ debugFuncs[debugDepth]); \
+ debugDepth++; \
+ }
+# define RET(value) { \
+ debugDepth--; \
+ fprintf(stderr, "RET(%d:%s.%s)\n", \
+ debugDepth, DEBUG, \
+ debugFuncs[debugDepth]); \
+ return (value); \
+ }
+# define RETV { \
+ debugDepth--; \
+ fprintf(stderr, "RETV(%d:%s.%s)\n", \
+ debugDepth, DEBUG, \
+ debugFuncs[debugDepth]); \
+ return; \
+ }
+# define MSG(msg) fprintf(stderr, "MSG(%s)\n", msg);
+#else
+# define ENTER(proc) ;
+# define RET(value) return (value);
+# define RETV return;
+# define MSG(msg) ;
+#endif
+
+#ifndef TRUE
+# define TRUE 1
+# define FALSE 0
+#endif
+
+static tree * sprout(tree **, tree_t, int *, int (*)(), void (*)());
+static int delete(tree **, int (*)(), tree_t, void (*)(), int *, int *);
+static void del(tree **, int *, tree **, void (*)(), int *);
+static void bal_L(tree **, int *);
+static void bal_R(tree **, int *);
+
+void
+tree_init(tree **ppr_tree) {
+ ENTER("tree_init")
+ *ppr_tree = NULL;
+ RETV
+}
+
+tree_t
+tree_srch(tree **ppr_tree, int (*pfi_compare)(tree_t, tree_t), tree_t p_user) {
+ ENTER("tree_srch")
+
+ if (*ppr_tree) {
+ int i_comp = (*pfi_compare)(p_user, (**ppr_tree).data);
+
+ if (i_comp > 0)
+ RET(tree_srch(&(**ppr_tree).right,
+ pfi_compare,
+ p_user))
+
+ if (i_comp < 0)
+ RET(tree_srch(&(**ppr_tree).left,
+ pfi_compare,
+ p_user))
+
+ /* not higher, not lower... this must be the one.
+ */
+ RET((**ppr_tree).data)
+ }
+
+ /* grounded. NOT found.
+ */
+ RET(NULL)
+}
+
+tree_t
+tree_add(tree **ppr_tree, int (*pfi_compare)(tree_t, tree_t),
+ tree_t p_user, void (*pfv_uar)())
+{
+ int i_balance = FALSE;
+
+ ENTER("tree_add")
+ if (!sprout(ppr_tree, p_user, &i_balance, pfi_compare, pfv_uar))
+ RET(NULL)
+ RET(p_user)
+}
+
+int
+tree_delete(tree **ppr_p, int (*pfi_compare)(tree_t, tree_t),
+ tree_t p_user, void (*pfv_uar)())
+{
+ int i_balance = FALSE, i_uar_called = FALSE;
+
+ ENTER("tree_delete");
+ RET(delete(ppr_p, pfi_compare, p_user, pfv_uar,
+ &i_balance, &i_uar_called))
+}
+
+int
+tree_trav(tree **ppr_tree, int (*pfi_uar)(tree_t)) {
+ ENTER("tree_trav")
+
+ if (!*ppr_tree)
+ RET(TRUE)
+
+ if (!tree_trav(&(**ppr_tree).left, pfi_uar))
+ RET(FALSE)
+ if (!(*pfi_uar)((**ppr_tree).data))
+ RET(FALSE)
+ if (!tree_trav(&(**ppr_tree).right, pfi_uar))
+ RET(FALSE)
+ RET(TRUE)
+}
+
+void
+tree_mung(tree **ppr_tree, void (*pfv_uar)(tree_t)) {
+ ENTER("tree_mung")
+ if (*ppr_tree) {
+ tree_mung(&(**ppr_tree).left, pfv_uar);
+ tree_mung(&(**ppr_tree).right, pfv_uar);
+ if (pfv_uar)
+ (*pfv_uar)((**ppr_tree).data);
+ memput(*ppr_tree, sizeof(tree));
+ *ppr_tree = NULL;
+ }
+ RETV
+}
+
+static tree *
+sprout(tree **ppr, tree_t p_data, int *pi_balance,
+ int (*pfi_compare)(tree_t, tree_t), void (*pfv_delete)(tree_t))
+{
+ tree *p1, *p2, *sub;
+ int cmp;
+
+ ENTER("sprout")
+
+ /* are we grounded? if so, add the node "here" and set the rebalance
+ * flag, then exit.
+ */
+ if (!*ppr) {
+ MSG("grounded. adding new node, setting h=true")
+ *ppr = (tree *) memget(sizeof(tree));
+ if (*ppr) {
+ (*ppr)->left = NULL;
+ (*ppr)->right = NULL;
+ (*ppr)->bal = 0;
+ (*ppr)->data = p_data;
+ *pi_balance = TRUE;
+ }
+ RET(*ppr);
+ }
+
+ /* compare the data using routine passed by caller.
+ */
+ cmp = (*pfi_compare)(p_data, (*ppr)->data);
+
+ /* if LESS, prepare to move to the left.
+ */
+ if (cmp < 0) {
+ MSG("LESS. sprouting left.")
+ sub = sprout(&(*ppr)->left, p_data, pi_balance,
+ pfi_compare, pfv_delete);
+ if (sub && *pi_balance) { /*%< left branch has grown */
+ MSG("LESS: left branch has grown")
+ switch ((*ppr)->bal) {
+ case 1:
+ /* right branch WAS longer; bal is ok now */
+ MSG("LESS: case 1.. bal restored implicitly")
+ (*ppr)->bal = 0;
+ *pi_balance = FALSE;
+ break;
+ case 0:
+ /* balance WAS okay; now left branch longer */
+ MSG("LESS: case 0.. balnce bad but still ok")
+ (*ppr)->bal = -1;
+ break;
+ case -1:
+ /* left branch was already too long. rebal */
+ MSG("LESS: case -1: rebalancing")
+ p1 = (*ppr)->left;
+ if (p1->bal == -1) { /*%< LL */
+ MSG("LESS: single LL")
+ (*ppr)->left = p1->right;
+ p1->right = *ppr;
+ (*ppr)->bal = 0;
+ *ppr = p1;
+ } else { /*%< double LR */
+ MSG("LESS: double LR")
+
+ p2 = p1->right;
+ p1->right = p2->left;
+ p2->left = p1;
+
+ (*ppr)->left = p2->right;
+ p2->right = *ppr;
+
+ if (p2->bal == -1)
+ (*ppr)->bal = 1;
+ else
+ (*ppr)->bal = 0;
+
+ if (p2->bal == 1)
+ p1->bal = -1;
+ else
+ p1->bal = 0;
+ *ppr = p2;
+ } /*else*/
+ (*ppr)->bal = 0;
+ *pi_balance = FALSE;
+ } /*switch*/
+ } /*if*/
+ RET(sub)
+ } /*if*/
+
+ /* if MORE, prepare to move to the right.
+ */
+ if (cmp > 0) {
+ MSG("MORE: sprouting to the right")
+ sub = sprout(&(*ppr)->right, p_data, pi_balance,
+ pfi_compare, pfv_delete);
+ if (sub && *pi_balance) {
+ MSG("MORE: right branch has grown")
+
+ switch ((*ppr)->bal) {
+ case -1:
+ MSG("MORE: balance was off, fixed implicitly")
+ (*ppr)->bal = 0;
+ *pi_balance = FALSE;
+ break;
+ case 0:
+ MSG("MORE: balance was okay, now off but ok")
+ (*ppr)->bal = 1;
+ break;
+ case 1:
+ MSG("MORE: balance was off, need to rebalance")
+ p1 = (*ppr)->right;
+ if (p1->bal == 1) { /*%< RR */
+ MSG("MORE: single RR")
+ (*ppr)->right = p1->left;
+ p1->left = *ppr;
+ (*ppr)->bal = 0;
+ *ppr = p1;
+ } else { /*%< double RL */
+ MSG("MORE: double RL")
+
+ p2 = p1->left;
+ p1->left = p2->right;
+ p2->right = p1;
+
+ (*ppr)->right = p2->left;
+ p2->left = *ppr;
+
+ if (p2->bal == 1)
+ (*ppr)->bal = -1;
+ else
+ (*ppr)->bal = 0;
+
+ if (p2->bal == -1)
+ p1->bal = 1;
+ else
+ p1->bal = 0;
+
+ *ppr = p2;
+ } /*else*/
+ (*ppr)->bal = 0;
+ *pi_balance = FALSE;
+ } /*switch*/
+ } /*if*/
+ RET(sub)
+ } /*if*/
+
+ /* not less, not more: this is the same key! replace...
+ */
+ MSG("FOUND: Replacing data value")
+ *pi_balance = FALSE;
+ if (pfv_delete)
+ (*pfv_delete)((*ppr)->data);
+ (*ppr)->data = p_data;
+ RET(*ppr)
+}
+
+static int
+delete(tree **ppr_p, int (*pfi_compare)(tree_t, tree_t), tree_t p_user,
+ void (*pfv_uar)(tree_t), int *pi_balance, int *pi_uar_called)
+{
+ tree *pr_q;
+ int i_comp, i_ret;
+
+ ENTER("delete")
+
+ if (*ppr_p == NULL) {
+ MSG("key not in tree")
+ RET(FALSE)
+ }
+
+ i_comp = (*pfi_compare)((*ppr_p)->data, p_user);
+ if (i_comp > 0) {
+ MSG("too high - scan left")
+ i_ret = delete(&(*ppr_p)->left, pfi_compare, p_user, pfv_uar,
+ pi_balance, pi_uar_called);
+ if (*pi_balance)
+ bal_L(ppr_p, pi_balance);
+ } else if (i_comp < 0) {
+ MSG("too low - scan right")
+ i_ret = delete(&(*ppr_p)->right, pfi_compare, p_user, pfv_uar,
+ pi_balance, pi_uar_called);
+ if (*pi_balance)
+ bal_R(ppr_p, pi_balance);
+ } else {
+ MSG("equal")
+ pr_q = *ppr_p;
+ if (pr_q->right == NULL) {
+ MSG("right subtree null")
+ *ppr_p = pr_q->left;
+ *pi_balance = TRUE;
+ } else if (pr_q->left == NULL) {
+ MSG("right subtree non-null, left subtree null")
+ *ppr_p = pr_q->right;
+ *pi_balance = TRUE;
+ } else {
+ MSG("neither subtree null")
+ del(&pr_q->left, pi_balance, &pr_q,
+ pfv_uar, pi_uar_called);
+ if (*pi_balance)
+ bal_L(ppr_p, pi_balance);
+ }
+ if (!*pi_uar_called && pfv_uar)
+ (*pfv_uar)(pr_q->data);
+ /* Thanks to wuth@castrov.cuc.ab.ca for the following stmt. */
+ memput(pr_q, sizeof(tree));
+ i_ret = TRUE;
+ }
+ RET(i_ret)
+}
+
+static void
+del(tree **ppr_r, int *pi_balance, tree **ppr_q,
+ void (*pfv_uar)(tree_t), int *pi_uar_called)
+{
+ ENTER("del")
+
+ if ((*ppr_r)->right != NULL) {
+ del(&(*ppr_r)->right, pi_balance, ppr_q,
+ pfv_uar, pi_uar_called);
+ if (*pi_balance)
+ bal_R(ppr_r, pi_balance);
+ } else {
+ if (pfv_uar)
+ (*pfv_uar)((*ppr_q)->data);
+ *pi_uar_called = TRUE;
+ (*ppr_q)->data = (*ppr_r)->data;
+ *ppr_q = *ppr_r;
+ *ppr_r = (*ppr_r)->left;
+ *pi_balance = TRUE;
+ }
+
+ RETV
+}
+
+static void
+bal_L(tree **ppr_p, int *pi_balance) {
+ tree *p1, *p2;
+ int b1, b2;
+
+ ENTER("bal_L")
+ MSG("left branch has shrunk")
+
+ switch ((*ppr_p)->bal) {
+ case -1:
+ MSG("was imbalanced, fixed implicitly")
+ (*ppr_p)->bal = 0;
+ break;
+ case 0:
+ MSG("was okay, is now one off")
+ (*ppr_p)->bal = 1;
+ *pi_balance = FALSE;
+ break;
+ case 1:
+ MSG("was already off, this is too much")
+ p1 = (*ppr_p)->right;
+ b1 = p1->bal;
+ if (b1 >= 0) {
+ MSG("single RR")
+ (*ppr_p)->right = p1->left;
+ p1->left = *ppr_p;
+ if (b1 == 0) {
+ MSG("b1 == 0")
+ (*ppr_p)->bal = 1;
+ p1->bal = -1;
+ *pi_balance = FALSE;
+ } else {
+ MSG("b1 != 0")
+ (*ppr_p)->bal = 0;
+ p1->bal = 0;
+ }
+ *ppr_p = p1;
+ } else {
+ MSG("double RL")
+ p2 = p1->left;
+ b2 = p2->bal;
+ p1->left = p2->right;
+ p2->right = p1;
+ (*ppr_p)->right = p2->left;
+ p2->left = *ppr_p;
+ if (b2 == 1)
+ (*ppr_p)->bal = -1;
+ else
+ (*ppr_p)->bal = 0;
+ if (b2 == -1)
+ p1->bal = 1;
+ else
+ p1->bal = 0;
+ *ppr_p = p2;
+ p2->bal = 0;
+ }
+ }
+ RETV
+}
+
+static void
+bal_R(tree **ppr_p, int *pi_balance) {
+ tree *p1, *p2;
+ int b1, b2;
+
+ ENTER("bal_R")
+ MSG("right branch has shrunk")
+ switch ((*ppr_p)->bal) {
+ case 1:
+ MSG("was imbalanced, fixed implicitly")
+ (*ppr_p)->bal = 0;
+ break;
+ case 0:
+ MSG("was okay, is now one off")
+ (*ppr_p)->bal = -1;
+ *pi_balance = FALSE;
+ break;
+ case -1:
+ MSG("was already off, this is too much")
+ p1 = (*ppr_p)->left;
+ b1 = p1->bal;
+ if (b1 <= 0) {
+ MSG("single LL")
+ (*ppr_p)->left = p1->right;
+ p1->right = *ppr_p;
+ if (b1 == 0) {
+ MSG("b1 == 0")
+ (*ppr_p)->bal = -1;
+ p1->bal = 1;
+ *pi_balance = FALSE;
+ } else {
+ MSG("b1 != 0")
+ (*ppr_p)->bal = 0;
+ p1->bal = 0;
+ }
+ *ppr_p = p1;
+ } else {
+ MSG("double LR")
+ p2 = p1->right;
+ b2 = p2->bal;
+ p1->right = p2->left;
+ p2->left = p1;
+ (*ppr_p)->left = p2->right;
+ p2->right = *ppr_p;
+ if (b2 == -1)
+ (*ppr_p)->bal = 1;
+ else
+ (*ppr_p)->bal = 0;
+ if (b2 == 1)
+ p1->bal = -1;
+ else
+ p1->bal = 0;
+ *ppr_p = p2;
+ p2->bal = 0;
+ }
+ }
+ RETV
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/llib-lresolv_joy b/usr/src/lib/libresolv2_joy/common/llib-lresolv_joy
new file mode 100644
index 0000000000..aedd06a0fa
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/llib-lresolv_joy
@@ -0,0 +1,59 @@
+/* LINTLIBRARY */
+/* PROTOLIB1 */
+
+/*
+ * Copyright (c) 1997-1999 by Sun Microsystems, Inc.
+ * All rights reserved.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <arpa/nameser.h>
+#include <resolv.h>
+
+/*
+ * usr/src/lib/libresolv2 routines
+ */
+
+int dn_skipname(const uchar_t *, const uchar_t *);
+void fp_query(const u_char *, FILE *);
+const uchar_t * p_cdname(const uchar_t *, const uchar_t *, FILE *);
+const char * p_class(int);
+void p_query(const u_char *);
+const char * p_time(unsigned int);
+const char * p_type(int);
+void putlong(unsigned int, uchar_t *);
+uint32_t _getlong(const u_char *);
+uint16_t _getshort(const u_char *);
+const char * hstrerror(int);
+int res_init(void);
+int res_mkquery(int, const char *, int, int, const u_char *,
+ int, const u_char *, u_char *, int);
+int res_query(const char *, int, int, u_char *, int);
+int res_querydomain(const char *, const char *, int, int,
+ u_char *, int);
+int res_search(const char *, int, int, u_char *, int);
+int res_send(const u_char *, int, u_char *, int);
+int res_update(ns_updrec *);
+int res_ninit(res_state);
+void fp_resstat(const res_state, FILE *);
+const char * res_hostalias(const res_state, const char *, char *, size_t);
+int res_nquery(res_state, const char *, int, int, u_char *, int);
+int res_nsearch(res_state, const char *, int, int, u_char *, int);
+int res_nquerydomain(res_state, const char *, const char *,
+ int, int, u_char *, int);
+int res_nmkquery(res_state, int, const char *, int, int,
+ const u_char *, int, const u_char *,
+ u_char *, int);
+int res_nsend(res_state, const u_char *, int, u_char *, int);
+int res_nmkupdate(res_state, ns_updrec *, u_char *, int);
+void res_nclose(res_state);
+int res_nsendsigned(res_state, const u_char *, int, ns_tsig_key *,
+ u_char *, int);
+int dn_comp(const char *, u_char *, int, u_char **, u_char **);
+int dn_expand(const u_char *, const u_char *, const u_char *,
+ char *, int);
diff --git a/usr/src/lib/libresolv2_joy/common/mapfile-vers b/usr/src/lib/libresolv2_joy/common/mapfile-vers
new file mode 100644
index 0000000000..e0c66a1c96
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/mapfile-vers
@@ -0,0 +1,60 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION SUNWprivate {
+ global:
+ joy_dn_expand;
+ joy_res_nsearch;
+ joy_res_ninit;
+ joy_res_ndestroy;
+ joy_res_gethostbyaddr;
+ joy_res_gethostbyname;
+ joy_res_gethostbyname2;
+ joy_res_sethostent;
+ joy_res_endhostent;
+ __joy_res_override_retry;
+ __joy_res_unset_no_hosts_fallback;
+ __joy_res_set_no_hosts_fallback;
+ __joy_h_errno;
+ __joy_ns_get16;
+ __joy_ns_get32;
+ local:
+ *;
+};
diff --git a/usr/src/lib/libresolv2_joy/common/nameser/ns_date.c b/usr/src/lib/libresolv2_joy/common/nameser/ns_date.c
new file mode 100644
index 0000000000..292375af63
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/nameser/ns_date.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef lint
+static const char rcsid[] = "$Id: ns_date.c,v 1.6 2005/04/27 04:56:39 sra Exp $";
+#endif
+
+/* Import. */
+
+#include "port_before.h"
+
+#include <arpa/nameser.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+
+#include "port_after.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) ((size_t)sprintf x)
+#endif
+
+/* Forward. */
+
+static int datepart(const char *, int, int, int, int *);
+
+/* Public. */
+
+/*%
+ * Convert a date in ASCII into the number of seconds since
+ * 1 January 1970 (GMT assumed). Format is yyyymmddhhmmss, all
+ * digits required, no spaces allowed.
+ */
+
+u_int32_t
+ns_datetosecs(const char *cp, int *errp) {
+ struct tm time;
+ u_int32_t result;
+ int mdays, i;
+ static const int days_per_month[12] =
+ {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
+
+ if (strlen(cp) != 14U) {
+ *errp = 1;
+ return (0);
+ }
+ *errp = 0;
+
+ memset(&time, 0, sizeof time);
+ time.tm_year = datepart(cp + 0, 4, 1990, 9999, errp) - 1900;
+ time.tm_mon = datepart(cp + 4, 2, 01, 12, errp) - 1;
+ time.tm_mday = datepart(cp + 6, 2, 01, 31, errp);
+ time.tm_hour = datepart(cp + 8, 2, 00, 23, errp);
+ time.tm_min = datepart(cp + 10, 2, 00, 59, errp);
+ time.tm_sec = datepart(cp + 12, 2, 00, 59, errp);
+ if (*errp) /*%< Any parse errors? */
+ return (0);
+
+ /*
+ * OK, now because timegm() is not available in all environments,
+ * we will do it by hand. Roll up sleeves, curse the gods, begin!
+ */
+
+#define SECS_PER_DAY ((u_int32_t)24*60*60)
+#define isleap(y) ((((y) % 4) == 0 && ((y) % 100) != 0) || ((y) % 400) == 0)
+
+ result = time.tm_sec; /*%< Seconds */
+ result += time.tm_min * 60; /*%< Minutes */
+ result += time.tm_hour * (60*60); /*%< Hours */
+ result += (time.tm_mday - 1) * SECS_PER_DAY; /*%< Days */
+ /* Months are trickier. Look without leaping, then leap */
+ mdays = 0;
+ for (i = 0; i < time.tm_mon; i++)
+ mdays += days_per_month[i];
+ result += mdays * SECS_PER_DAY; /*%< Months */
+ if (time.tm_mon > 1 && isleap(1900+time.tm_year))
+ result += SECS_PER_DAY; /*%< Add leapday for this year */
+ /* First figure years without leapdays, then add them in. */
+ /* The loop is slow, FIXME, but simple and accurate. */
+ result += (time.tm_year - 70) * (SECS_PER_DAY*365); /*%< Years */
+ for (i = 70; i < time.tm_year; i++)
+ if (isleap(1900+i))
+ result += SECS_PER_DAY; /*%< Add leapday for prev year */
+ return (result);
+}
+
+/* Private. */
+
+/*%
+ * Parse part of a date. Set error flag if any error.
+ * Don't reset the flag if there is no error.
+ */
+static int
+datepart(const char *buf, int size, int min, int max, int *errp) {
+ int result = 0;
+ int i;
+
+ for (i = 0; i < size; i++) {
+ if (!isdigit((unsigned char)(buf[i])))
+ *errp = 1;
+ result = (result * 10) + buf[i] - '0';
+ }
+ if (result < min)
+ *errp = 1;
+ if (result > max)
+ *errp = 1;
+ return (result);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/nameser/ns_name.c b/usr/src/lib/libresolv2_joy/common/nameser/ns_name.c
new file mode 100644
index 0000000000..f6b0accef1
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/nameser/ns_name.c
@@ -0,0 +1,1153 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef lint
+static const char rcsid[] = "$Id: ns_name.c,v 1.11 2009/01/23 19:59:16 each Exp $";
+#endif
+
+#include "port_before.h"
+
+#include <sys/types.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <resolv_joy.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <limits.h>
+
+#include "port_after.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) ((size_t)sprintf x)
+#endif
+
+#define NS_TYPE_ELT 0x40 /*%< EDNS0 extended label type */
+#define DNS_LABELTYPE_BITSTRING 0x41
+
+/* Data. */
+
+static const char digits[] = "0123456789";
+
+static const char digitvalue[256] = {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /*16*/
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /*32*/
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /*48*/
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /*64*/
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /*80*/
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /*96*/
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /*112*/
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /*128*/
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /*256*/
+};
+
+/* Forward. */
+
+static int special(int);
+static int printable(int);
+static int dn_find(const u_char *, const u_char *,
+ const u_char * const *,
+ const u_char * const *);
+static int encode_bitsring(const char **, const char *,
+ unsigned char **, unsigned char **,
+ unsigned const char *);
+static int labellen(const u_char *);
+static int decode_bitstring(const unsigned char **,
+ char *, const char *);
+
+/* Public. */
+
+/*%
+ * Convert an encoded domain name to printable ascii as per RFC1035.
+
+ * return:
+ *\li Number of bytes written to buffer, or -1 (with errno set)
+ *
+ * notes:
+ *\li The root is returned as "."
+ *\li All other domains are returned in non absolute form
+ */
+int
+ns_name_ntop(const u_char *src, char *dst, size_t dstsiz)
+{
+ const u_char *cp;
+ char *dn, *eom;
+ u_char c;
+ u_int n;
+ int l;
+
+ cp = src;
+ dn = dst;
+ eom = dst + dstsiz;
+
+ while ((n = *cp++) != 0) {
+ if ((n & NS_CMPRSFLGS) == NS_CMPRSFLGS) {
+ /* Some kind of compression pointer. */
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ if (dn != dst) {
+ if (dn >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ *dn++ = '.';
+ }
+ if ((l = labellen(cp - 1)) < 0) {
+ errno = EMSGSIZE; /*%< XXX */
+ return (-1);
+ }
+ if (dn + l >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ if ((n & NS_CMPRSFLGS) == NS_TYPE_ELT) {
+ int m;
+
+ if (n != DNS_LABELTYPE_BITSTRING) {
+ /* XXX: labellen should reject this case */
+ errno = EINVAL;
+ return (-1);
+ }
+ if ((m = decode_bitstring(&cp, dn, eom)) < 0)
+ {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ dn += m;
+ continue;
+ }
+ for ((void)NULL; l > 0; l--) {
+ c = *cp++;
+ if (special(c)) {
+ if (dn + 1 >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ *dn++ = '\\';
+ *dn++ = (char)c;
+ } else if (!printable(c)) {
+ if (dn + 3 >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ *dn++ = '\\';
+ *dn++ = digits[c / 100];
+ *dn++ = digits[(c % 100) / 10];
+ *dn++ = digits[c % 10];
+ } else {
+ if (dn >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ *dn++ = (char)c;
+ }
+ }
+ }
+ if (dn == dst) {
+ if (dn >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ *dn++ = '.';
+ }
+ if (dn >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ *dn++ = '\0';
+ return (dn - dst);
+}
+
+/*%
+ * Convert a ascii string into an encoded domain name as per RFC1035.
+ *
+ * return:
+ *
+ *\li -1 if it fails
+ *\li 1 if string was fully qualified
+ *\li 0 is string was not fully qualified
+ *
+ * notes:
+ *\li Enforces label and domain length limits.
+ */
+int
+ns_name_pton(const char *src, u_char *dst, size_t dstsiz) {
+ return (ns_name_pton2(src, dst, dstsiz, NULL));
+}
+
+/*
+ * ns_name_pton2(src, dst, dstsiz, *dstlen)
+ * Convert a ascii string into an encoded domain name as per RFC1035.
+ * return:
+ * -1 if it fails
+ * 1 if string was fully qualified
+ * 0 is string was not fully qualified
+ * side effects:
+ * fills in *dstlen (if non-NULL)
+ * notes:
+ * Enforces label and domain length limits.
+ */
+int
+ns_name_pton2(const char *src, u_char *dst, size_t dstsiz, size_t *dstlen) {
+ u_char *label, *bp, *eom;
+ int c, n, escaped, e = 0;
+ char *cp;
+
+ escaped = 0;
+ bp = dst;
+ eom = dst + dstsiz;
+ label = bp++;
+
+ while ((c = *src++) != 0) {
+ if (escaped) {
+ if (c == '[') { /*%< start a bit string label */
+ if ((cp = strchr(src, ']')) == NULL) {
+ errno = EINVAL; /*%< ??? */
+ return (-1);
+ }
+ if ((e = encode_bitsring(&src, cp + 2,
+ &label, &bp, eom))
+ != 0) {
+ errno = e;
+ return (-1);
+ }
+ escaped = 0;
+ label = bp++;
+ if ((c = *src++) == 0)
+ goto done;
+ else if (c != '.') {
+ errno = EINVAL;
+ return (-1);
+ }
+ continue;
+ }
+ else if ((cp = strchr(digits, c)) != NULL) {
+ n = (cp - digits) * 100;
+ if ((c = *src++) == 0 ||
+ (cp = strchr(digits, c)) == NULL) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ n += (cp - digits) * 10;
+ if ((c = *src++) == 0 ||
+ (cp = strchr(digits, c)) == NULL) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ n += (cp - digits);
+ if (n > 255) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ c = n;
+ }
+ escaped = 0;
+ } else if (c == '\\') {
+ escaped = 1;
+ continue;
+ } else if (c == '.') {
+ c = (bp - label - 1);
+ if ((c & NS_CMPRSFLGS) != 0) { /*%< Label too big. */
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ if (label >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ *label = c;
+ /* Fully qualified ? */
+ if (*src == '\0') {
+ if (c != 0) {
+ if (bp >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ *bp++ = '\0';
+ }
+ if ((bp - dst) > MAXCDNAME) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ if (dstlen != NULL)
+ *dstlen = (bp - dst);
+ return (1);
+ }
+ if (c == 0 || *src == '.') {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ label = bp++;
+ continue;
+ }
+ if (bp >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ *bp++ = (u_char)c;
+ }
+ c = (bp - label - 1);
+ if ((c & NS_CMPRSFLGS) != 0) { /*%< Label too big. */
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ done:
+ if (label >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ *label = c;
+ if (c != 0) {
+ if (bp >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ *bp++ = 0;
+ }
+ if ((bp - dst) > MAXCDNAME) { /*%< src too big */
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ if (dstlen != NULL)
+ *dstlen = (bp - dst);
+ return (0);
+}
+
+/*%
+ * Convert a network strings labels into all lowercase.
+ *
+ * return:
+ *\li Number of bytes written to buffer, or -1 (with errno set)
+ *
+ * notes:
+ *\li Enforces label and domain length limits.
+ */
+
+int
+ns_name_ntol(const u_char *src, u_char *dst, size_t dstsiz)
+{
+ const u_char *cp;
+ u_char *dn, *eom;
+ u_char c;
+ u_int n;
+ int l;
+
+ cp = src;
+ dn = dst;
+ eom = dst + dstsiz;
+
+ if (dn >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ while ((n = *cp++) != 0) {
+ if ((n & NS_CMPRSFLGS) == NS_CMPRSFLGS) {
+ /* Some kind of compression pointer. */
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ *dn++ = n;
+ if ((l = labellen(cp - 1)) < 0) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ if (dn + l >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ for ((void)NULL; l > 0; l--) {
+ c = *cp++;
+ if (isascii(c) && isupper(c))
+ *dn++ = tolower(c);
+ else
+ *dn++ = c;
+ }
+ }
+ *dn++ = '\0';
+ return (dn - dst);
+}
+
+/*%
+ * Unpack a domain name from a message, source may be compressed.
+ *
+ * return:
+ *\li -1 if it fails, or consumed octets if it succeeds.
+ */
+int
+ns_name_unpack(const u_char *msg, const u_char *eom, const u_char *src,
+ u_char *dst, size_t dstsiz)
+{
+ return (ns_name_unpack2(msg, eom, src, dst, dstsiz, NULL));
+}
+
+/*
+ * ns_name_unpack2(msg, eom, src, dst, dstsiz, *dstlen)
+ * Unpack a domain name from a message, source may be compressed.
+ * return:
+ * -1 if it fails, or consumed octets if it succeeds.
+ * side effect:
+ * fills in *dstlen (if non-NULL).
+ */
+int
+ns_name_unpack2(const u_char *msg, const u_char *eom, const u_char *src,
+ u_char *dst, size_t dstsiz, size_t *dstlen)
+{
+ const u_char *srcp, *dstlim;
+ u_char *dstp;
+ int n, len, checked, l;
+
+ len = -1;
+ checked = 0;
+ dstp = dst;
+ srcp = src;
+ dstlim = dst + dstsiz;
+ if (srcp < msg || srcp >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ /* Fetch next label in domain name. */
+ while ((n = *srcp++) != 0) {
+ /* Check for indirection. */
+ switch (n & NS_CMPRSFLGS) {
+ case 0:
+ case NS_TYPE_ELT:
+ /* Limit checks. */
+ if ((l = labellen(srcp - 1)) < 0) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ if (dstp + l + 1 >= dstlim || srcp + l >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ checked += l + 1;
+ *dstp++ = n;
+ memcpy(dstp, srcp, l);
+ dstp += l;
+ srcp += l;
+ break;
+
+ case NS_CMPRSFLGS:
+ if (srcp >= eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ if (len < 0)
+ len = srcp - src + 1;
+ srcp = msg + (((n & 0x3f) << 8) | (*srcp & 0xff));
+ if (srcp < msg || srcp >= eom) { /*%< Out of range. */
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ checked += 2;
+ /*
+ * Check for loops in the compressed name;
+ * if we've looked at the whole message,
+ * there must be a loop.
+ */
+ if (checked >= eom - msg) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ break;
+
+ default:
+ errno = EMSGSIZE;
+ return (-1); /*%< flag error */
+ }
+ }
+ *dstp++ = 0;
+ if (dstlen != NULL)
+ *dstlen = dstp - dst;
+ if (len < 0)
+ len = srcp - src;
+ return (len);
+}
+
+/*%
+ * Pack domain name 'domain' into 'comp_dn'.
+ *
+ * return:
+ *\li Size of the compressed name, or -1.
+ *
+ * notes:
+ *\li 'dnptrs' is an array of pointers to previous compressed names.
+ *\li dnptrs[0] is a pointer to the beginning of the message. The array
+ * ends with NULL.
+ *\li 'lastdnptr' is a pointer to the end of the array pointed to
+ * by 'dnptrs'.
+ *
+ * Side effects:
+ *\li The list of pointers in dnptrs is updated for labels inserted into
+ * the message as we compress the name. If 'dnptr' is NULL, we don't
+ * try to compress names. If 'lastdnptr' is NULL, we don't update the
+ * list.
+ */
+int
+ns_name_pack(const u_char *src, u_char *dst, int dstsiz,
+ const u_char **dnptrs, const u_char **lastdnptr)
+{
+ u_char *dstp;
+ const u_char **cpp, **lpp, *eob, *msg;
+ const u_char *srcp;
+ int n, l, first = 1;
+
+ srcp = src;
+ dstp = dst;
+ eob = dstp + dstsiz;
+ lpp = cpp = NULL;
+ if (dnptrs != NULL) {
+ if ((msg = *dnptrs++) != NULL) {
+ for (cpp = dnptrs; *cpp != NULL; cpp++)
+ (void)NULL;
+ lpp = cpp; /*%< end of list to search */
+ }
+ } else
+ msg = NULL;
+
+ /* make sure the domain we are about to add is legal */
+ l = 0;
+ do {
+ int l0;
+
+ n = *srcp;
+ if ((n & NS_CMPRSFLGS) == NS_CMPRSFLGS) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ if ((l0 = labellen(srcp)) < 0) {
+ errno = EINVAL;
+ return (-1);
+ }
+ l += l0 + 1;
+ if (l > MAXCDNAME) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ srcp += l0 + 1;
+ } while (n != 0);
+
+ /* from here on we need to reset compression pointer array on error */
+ srcp = src;
+ do {
+ /* Look to see if we can use pointers. */
+ n = *srcp;
+ if (n != 0 && msg != NULL) {
+ l = dn_find(srcp, msg, (const u_char * const *)dnptrs,
+ (const u_char * const *)lpp);
+ if (l >= 0) {
+ if (dstp + 1 >= eob) {
+ goto cleanup;
+ }
+ *dstp++ = (l >> 8) | NS_CMPRSFLGS;
+ *dstp++ = l % 256;
+ return (dstp - dst);
+ }
+ /* Not found, save it. */
+ if (lastdnptr != NULL && cpp < lastdnptr - 1 &&
+ (dstp - msg) < 0x4000 && first) {
+ *cpp++ = dstp;
+ *cpp = NULL;
+ first = 0;
+ }
+ }
+ /* copy label to buffer */
+ if ((n & NS_CMPRSFLGS) == NS_CMPRSFLGS) {
+ /* Should not happen. */
+ goto cleanup;
+ }
+ n = labellen(srcp);
+ if (dstp + 1 + n >= eob) {
+ goto cleanup;
+ }
+ memcpy(dstp, srcp, n + 1);
+ srcp += n + 1;
+ dstp += n + 1;
+ } while (n != 0);
+
+ if (dstp > eob) {
+cleanup:
+ if (msg != NULL)
+ *lpp = NULL;
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ return (dstp - dst);
+}
+
+/*%
+ * Expand compressed domain name to presentation format.
+ *
+ * return:
+ *\li Number of bytes read out of `src', or -1 (with errno set).
+ *
+ * note:
+ *\li Root domain returns as "." not "".
+ */
+int
+ns_name_uncompress(const u_char *msg, const u_char *eom, const u_char *src,
+ char *dst, size_t dstsiz)
+{
+ u_char tmp[NS_MAXCDNAME];
+ int n;
+
+ if ((n = ns_name_unpack(msg, eom, src, tmp, sizeof tmp)) == -1)
+ return (-1);
+ if (ns_name_ntop(tmp, dst, dstsiz) == -1)
+ return (-1);
+ return (n);
+}
+
+/*%
+ * Compress a domain name into wire format, using compression pointers.
+ *
+ * return:
+ *\li Number of bytes consumed in `dst' or -1 (with errno set).
+ *
+ * notes:
+ *\li 'dnptrs' is an array of pointers to previous compressed names.
+ *\li dnptrs[0] is a pointer to the beginning of the message.
+ *\li The list ends with NULL. 'lastdnptr' is a pointer to the end of the
+ * array pointed to by 'dnptrs'. Side effect is to update the list of
+ * pointers for labels inserted into the message as we compress the name.
+ *\li If 'dnptr' is NULL, we don't try to compress names. If 'lastdnptr'
+ * is NULL, we don't update the list.
+ */
+int
+ns_name_compress(const char *src, u_char *dst, size_t dstsiz,
+ const u_char **dnptrs, const u_char **lastdnptr)
+{
+ u_char tmp[NS_MAXCDNAME];
+
+ if (ns_name_pton(src, tmp, sizeof tmp) == -1)
+ return (-1);
+ return (ns_name_pack(tmp, dst, dstsiz, dnptrs, lastdnptr));
+}
+
+/*%
+ * Reset dnptrs so that there are no active references to pointers at or
+ * after src.
+ */
+void
+ns_name_rollback(const u_char *src, const u_char **dnptrs,
+ const u_char **lastdnptr)
+{
+ while (dnptrs < lastdnptr && *dnptrs != NULL) {
+ if (*dnptrs >= src) {
+ *dnptrs = NULL;
+ break;
+ }
+ dnptrs++;
+ }
+}
+
+/*%
+ * Advance *ptrptr to skip over the compressed name it points at.
+ *
+ * return:
+ *\li 0 on success, -1 (with errno set) on failure.
+ */
+int
+ns_name_skip(const u_char **ptrptr, const u_char *eom)
+{
+ const u_char *cp;
+ u_int n;
+ int l;
+
+ cp = *ptrptr;
+ while (cp < eom && (n = *cp++) != 0) {
+ /* Check for indirection. */
+ switch (n & NS_CMPRSFLGS) {
+ case 0: /*%< normal case, n == len */
+ cp += n;
+ continue;
+ case NS_TYPE_ELT: /*%< EDNS0 extended label */
+ if ((l = labellen(cp - 1)) < 0) {
+ errno = EMSGSIZE; /*%< XXX */
+ return (-1);
+ }
+ cp += l;
+ continue;
+ case NS_CMPRSFLGS: /*%< indirection */
+ cp++;
+ break;
+ default: /*%< illegal type */
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ break;
+ }
+ if (cp > eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ *ptrptr = cp;
+ return (0);
+}
+
+/* Find the number of octets an nname takes up, including the root label.
+ * (This is basically ns_name_skip() without compression-pointer support.)
+ * ((NOTE: can only return zero if passed-in namesiz argument is zero.))
+ */
+ssize_t
+ns_name_length(ns_nname_ct nname, size_t namesiz) {
+ ns_nname_ct orig = nname;
+ u_int n;
+
+ while (namesiz-- > 0 && (n = *nname++) != 0) {
+ if ((n & NS_CMPRSFLGS) != 0) {
+ errno = EISDIR;
+ return (-1);
+ }
+ if (n > namesiz) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ nname += n;
+ namesiz -= n;
+ }
+ return (nname - orig);
+}
+
+/* Compare two nname's for equality. Return -1 on error (setting errno).
+ */
+int
+ns_name_eq(ns_nname_ct a, size_t as, ns_nname_ct b, size_t bs) {
+ ns_nname_ct ae = a + as, be = b + bs;
+ int ac, bc;
+
+ while (ac = *a, bc = *b, ac != 0 && bc != 0) {
+ if ((ac & NS_CMPRSFLGS) != 0 || (bc & NS_CMPRSFLGS) != 0) {
+ errno = EISDIR;
+ return (-1);
+ }
+ if (a + ac >= ae || b + bc >= be) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ if (ac != bc || strncasecmp((const char *) ++a,
+ (const char *) ++b, ac) != 0)
+ return (0);
+ a += ac, b += bc;
+ }
+ return (ac == 0 && bc == 0);
+}
+
+/* Is domain "A" owned by (at or below) domain "B"?
+ */
+int
+ns_name_owned(ns_namemap_ct a, int an, ns_namemap_ct b, int bn) {
+ /* If A is shorter, it cannot be owned by B. */
+ if (an < bn)
+ return (0);
+
+ /* If they are unequal before the length of the shorter, A cannot... */
+ while (bn > 0) {
+ if (a->len != b->len ||
+ strncasecmp((const char *) a->base,
+ (const char *) b->base, a->len) != 0)
+ return (0);
+ a++, an--;
+ b++, bn--;
+ }
+
+ /* A might be longer or not, but either way, B owns it. */
+ return (1);
+}
+
+/* Build an array of <base,len> tuples from an nname, top-down order.
+ * Return the number of tuples (labels) thus discovered.
+ */
+int
+ns_name_map(ns_nname_ct nname, size_t namelen, ns_namemap_t map, int mapsize) {
+ u_int n;
+ int l;
+
+ n = *nname++;
+ namelen--;
+
+ /* Root zone? */
+ if (n == 0) {
+ /* Extra data follows name? */
+ if (namelen > 0) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ return (0);
+ }
+
+ /* Compression pointer? */
+ if ((n & NS_CMPRSFLGS) != 0) {
+ errno = EISDIR;
+ return (-1);
+ }
+
+ /* Label too long? */
+ if (n > namelen) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+
+ /* Recurse to get rest of name done first. */
+ l = ns_name_map(nname + n, namelen - n, map, mapsize);
+ if (l < 0)
+ return (-1);
+
+ /* Too many labels? */
+ if (l >= mapsize) {
+ errno = ENAMETOOLONG;
+ return (-1);
+ }
+
+ /* We're on our way back up-stack, store current map data. */
+ map[l].base = nname;
+ map[l].len = n;
+ return (l + 1);
+}
+
+/* Count the labels in a domain name. Root counts, so COM. has two. This
+ * is to make the result comparable to the result of ns_name_map().
+ */
+int
+ns_name_labels(ns_nname_ct nname, size_t namesiz) {
+ int ret = 0;
+ u_int n;
+
+ while (namesiz-- > 0 && (n = *nname++) != 0) {
+ if ((n & NS_CMPRSFLGS) != 0) {
+ errno = EISDIR;
+ return (-1);
+ }
+ if (n > namesiz) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ nname += n;
+ namesiz -= n;
+ ret++;
+ }
+ return (ret + 1);
+}
+
+/* Private. */
+
+/*%
+ * Thinking in noninternationalized USASCII (per the DNS spec),
+ * is this characted special ("in need of quoting") ?
+ *
+ * return:
+ *\li boolean.
+ */
+static int
+special(int ch) {
+ switch (ch) {
+ case 0x22: /*%< '"' */
+ case 0x2E: /*%< '.' */
+ case 0x3B: /*%< ';' */
+ case 0x5C: /*%< '\\' */
+ case 0x28: /*%< '(' */
+ case 0x29: /*%< ')' */
+ /* Special modifiers in zone files. */
+ case 0x40: /*%< '@' */
+ case 0x24: /*%< '$' */
+ return (1);
+ default:
+ return (0);
+ }
+}
+
+/*%
+ * Thinking in noninternationalized USASCII (per the DNS spec),
+ * is this character visible and not a space when printed ?
+ *
+ * return:
+ *\li boolean.
+ */
+static int
+printable(int ch) {
+ return (ch > 0x20 && ch < 0x7f);
+}
+
+/*%
+ * Thinking in noninternationalized USASCII (per the DNS spec),
+ * convert this character to lower case if it's upper case.
+ */
+static int
+mklower(int ch) {
+ if (ch >= 0x41 && ch <= 0x5A)
+ return (ch + 0x20);
+ return (ch);
+}
+
+/*%
+ * Search for the counted-label name in an array of compressed names.
+ *
+ * return:
+ *\li offset from msg if found, or -1.
+ *
+ * notes:
+ *\li dnptrs is the pointer to the first name on the list,
+ *\li not the pointer to the start of the message.
+ */
+static int
+dn_find(const u_char *domain, const u_char *msg,
+ const u_char * const *dnptrs,
+ const u_char * const *lastdnptr)
+{
+ const u_char *dn, *cp, *sp;
+ const u_char * const *cpp;
+ u_int n;
+
+ for (cpp = dnptrs; cpp < lastdnptr; cpp++) {
+ sp = *cpp;
+ /*
+ * terminate search on:
+ * root label
+ * compression pointer
+ * unusable offset
+ */
+ while (*sp != 0 && (*sp & NS_CMPRSFLGS) == 0 &&
+ (sp - msg) < 0x4000) {
+ dn = domain;
+ cp = sp;
+ while ((n = *cp++) != 0) {
+ /*
+ * check for indirection
+ */
+ switch (n & NS_CMPRSFLGS) {
+ case 0: /*%< normal case, n == len */
+ n = labellen(cp - 1); /*%< XXX */
+ if (n != *dn++)
+ goto next;
+
+ for ((void)NULL; n > 0; n--)
+ if (mklower(*dn++) !=
+ mklower(*cp++))
+ goto next;
+ /* Is next root for both ? */
+ if (*dn == '\0' && *cp == '\0')
+ return (sp - msg);
+ if (*dn)
+ continue;
+ goto next;
+ case NS_CMPRSFLGS: /*%< indirection */
+ cp = msg + (((n & 0x3f) << 8) | *cp);
+ break;
+
+ default: /*%< illegal type */
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ }
+ next: ;
+ sp += *sp + 1;
+ }
+ }
+ errno = ENOENT;
+ return (-1);
+}
+
+static int
+decode_bitstring(const unsigned char **cpp, char *dn, const char *eom)
+{
+ const unsigned char *cp = *cpp;
+ char *beg = dn, tc;
+ int b, blen, plen, i;
+
+ if ((blen = (*cp & 0xff)) == 0)
+ blen = 256;
+ plen = (blen + 3) / 4;
+ plen += sizeof("\\[x/]") + (blen > 99 ? 3 : (blen > 9) ? 2 : 1);
+ if (dn + plen >= eom)
+ return (-1);
+
+ cp++;
+ i = SPRINTF((dn, "\\[x"));
+ if (i < 0)
+ return (-1);
+ dn += i;
+ for (b = blen; b > 7; b -= 8, cp++) {
+ i = SPRINTF((dn, "%02x", *cp & 0xff));
+ if (i < 0)
+ return (-1);
+ dn += i;
+ }
+ if (b > 4) {
+ tc = *cp++;
+ i = SPRINTF((dn, "%02x", tc & (0xff << (8 - b))));
+ if (i < 0)
+ return (-1);
+ dn += i;
+ } else if (b > 0) {
+ tc = *cp++;
+ i = SPRINTF((dn, "%1x",
+ ((tc >> 4) & 0x0f) & (0x0f << (4 - b))));
+ if (i < 0)
+ return (-1);
+ dn += i;
+ }
+ i = SPRINTF((dn, "/%d]", blen));
+ if (i < 0)
+ return (-1);
+ dn += i;
+
+ *cpp = cp;
+ return (dn - beg);
+}
+
+static int
+encode_bitsring(const char **bp, const char *end, unsigned char **labelp,
+ unsigned char ** dst, unsigned const char *eom)
+{
+ int afterslash = 0;
+ const char *cp = *bp;
+ unsigned char *tp;
+ char c;
+ const char *beg_blen;
+ char *end_blen = NULL;
+ int value = 0, count = 0, tbcount = 0, blen = 0;
+
+ beg_blen = end_blen = NULL;
+
+ /* a bitstring must contain at least 2 characters */
+ if (end - cp < 2)
+ return (EINVAL);
+
+ /* XXX: currently, only hex strings are supported */
+ if (*cp++ != 'x')
+ return (EINVAL);
+ if (!isxdigit((*cp) & 0xff)) /*%< reject '\[x/BLEN]' */
+ return (EINVAL);
+
+ for (tp = *dst + 1; cp < end && tp < eom; cp++) {
+ switch((c = *cp)) {
+ case ']': /*%< end of the bitstring */
+ if (afterslash) {
+ if (beg_blen == NULL)
+ return (EINVAL);
+ blen = (int)strtol(beg_blen, &end_blen, 10);
+ if (*end_blen != ']')
+ return (EINVAL);
+ }
+ if (count)
+ *tp++ = ((value << 4) & 0xff);
+ cp++; /*%< skip ']' */
+ goto done;
+ case '/':
+ afterslash = 1;
+ break;
+ default:
+ if (afterslash) {
+ if (!isdigit(c&0xff))
+ return (EINVAL);
+ if (beg_blen == NULL) {
+
+ if (c == '0') {
+ /* blen never begings with 0 */
+ return (EINVAL);
+ }
+ beg_blen = cp;
+ }
+ } else {
+ if (!isxdigit(c&0xff))
+ return (EINVAL);
+ value <<= 4;
+ value += digitvalue[(int)c];
+ count += 4;
+ tbcount += 4;
+ if (tbcount > 256)
+ return (EINVAL);
+ if (count == 8) {
+ *tp++ = value;
+ count = 0;
+ }
+ }
+ break;
+ }
+ }
+ done:
+ if (cp >= end || tp >= eom)
+ return (EMSGSIZE);
+
+ /*
+ * bit length validation:
+ * If a <length> is present, the number of digits in the <bit-data>
+ * MUST be just sufficient to contain the number of bits specified
+ * by the <length>. If there are insignificant bits in a final
+ * hexadecimal or octal digit, they MUST be zero.
+ * RFC2673, Section 3.2.
+ */
+ if (blen > 0) {
+ int traillen;
+
+ if (((blen + 3) & ~3) != tbcount)
+ return (EINVAL);
+ traillen = tbcount - blen; /*%< between 0 and 3 */
+ if (((value << (8 - traillen)) & 0xff) != 0)
+ return (EINVAL);
+ }
+ else
+ blen = tbcount;
+ if (blen == 256)
+ blen = 0;
+
+ /* encode the type and the significant bit fields */
+ **labelp = DNS_LABELTYPE_BITSTRING;
+ **dst = blen;
+
+ *bp = cp;
+ *dst = tp;
+
+ return (0);
+}
+
+static int
+labellen(const u_char *lp)
+{
+ int bitlen;
+ u_char l = *lp;
+
+ if ((l & NS_CMPRSFLGS) == NS_CMPRSFLGS) {
+ /* should be avoided by the caller */
+ return (-1);
+ }
+
+ if ((l & NS_CMPRSFLGS) == NS_TYPE_ELT) {
+ if (l == DNS_LABELTYPE_BITSTRING) {
+ if ((bitlen = *(lp + 1)) == 0)
+ bitlen = 256;
+ return ((bitlen + 7 ) / 8 + 1);
+ }
+ return (-1); /*%< unknwon ELT */
+ }
+ return (l);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/nameser/ns_netint.c b/usr/src/lib/libresolv2_joy/common/nameser/ns_netint.c
new file mode 100644
index 0000000000..e196217f9b
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/nameser/ns_netint.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef lint
+static const char rcsid[] = "$Id: ns_netint.c,v 1.3 2005/04/27 04:56:40 sra Exp $";
+#endif
+
+/* Import. */
+
+#include "port_before.h"
+
+#include <arpa/nameser.h>
+
+#include "port_after.h"
+
+#pragma redefine_extname __ns_get16 __joy_ns_get16
+#pragma redefine_extname __ns_get32 __joy_ns_get32
+
+/* Public. */
+
+u_int
+ns_get16(const u_char *src) {
+ u_int dst;
+
+ NS_GET16(dst, src);
+ return (dst);
+}
+
+u_long
+ns_get32(const u_char *src) {
+ u_long dst;
+
+ NS_GET32(dst, src);
+ return (dst);
+}
+
+void
+ns_put16(u_int src, u_char *dst) {
+ NS_PUT16(src, dst);
+}
+
+void
+ns_put32(u_long src, u_char *dst) {
+ NS_PUT32(src, dst);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/nameser/ns_newmsg.c b/usr/src/lib/libresolv2_joy/common/nameser/ns_newmsg.c
new file mode 100644
index 0000000000..c18dd060e1
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/nameser/ns_newmsg.c
@@ -0,0 +1,273 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/*
+ * Copyright (C) 2009 Internet Systems Consortium, Inc. ("ISC")
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef lint
+static const char rcsid[] = "$Id: ns_newmsg.c,v 1.3 2009/02/26 10:48:57 marka Exp $";
+#endif
+
+#include <port_before.h>
+
+#include <arpa/nameser.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <string.h>
+
+#include <port_after.h>
+
+static int rdcpy(ns_newmsg *, ns_type, const u_char *, size_t);
+
+/* Initialize a "newmsg" object to empty.
+ */
+int
+ns_newmsg_init(u_char *buffer, size_t bufsiz, ns_newmsg *handle) {
+ ns_msg *msg = &handle->msg;
+
+ memset(handle, 0, sizeof *handle);
+ msg->_msg = buffer;
+ msg->_eom = buffer + bufsiz;
+ msg->_sect = ns_s_qd;
+ msg->_rrnum = 0;
+ msg->_msg_ptr = buffer + NS_HFIXEDSZ;
+ handle->dnptrs[0] = msg->_msg;
+ handle->dnptrs[1] = NULL;
+ handle->lastdnptr = &handle->dnptrs[sizeof handle->dnptrs /
+ sizeof handle->dnptrs[0] - 1];
+ return (0);
+}
+
+/* Initialize a "newmsg" object by copying an existing parsed message.
+ */
+int
+ns_newmsg_copy(ns_newmsg *handle, ns_msg *msg) {
+ ns_flag flag;
+ ns_sect sect;
+
+ ns_newmsg_id(handle, ns_msg_id(*msg));
+ for (flag = ns_f_qr; flag < ns_f_max; flag++)
+ ns_newmsg_flag(handle, flag, ns_msg_getflag(*msg, flag));
+ for (sect = ns_s_qd; sect < ns_s_max; sect++) {
+ int i, count;
+
+ count = ns_msg_count(*msg, sect);
+ for (i = 0; i < count; i++) {
+ ns_rr2 rr;
+ int x;
+
+ if (ns_parserr2(msg, sect, i, &rr) < 0)
+ return (-1);
+ if (sect == ns_s_qd)
+ x = ns_newmsg_q(handle,
+ ns_rr_nname(rr),
+ ns_rr_type(rr),
+ ns_rr_class(rr));
+ else
+ x = ns_newmsg_rr(handle, sect,
+ ns_rr_nname(rr),
+ ns_rr_type(rr),
+ ns_rr_class(rr),
+ ns_rr_ttl(rr),
+ ns_rr_rdlen(rr),
+ ns_rr_rdata(rr));
+ if (x < 0)
+ return (-1);
+ }
+ }
+ return (0);
+}
+
+/* Set the message-ID in a "newmsg" object.
+ */
+void
+ns_newmsg_id(ns_newmsg *handle, u_int16_t id) {
+ ns_msg *msg = &handle->msg;
+
+ msg->_id = id;
+}
+
+/* Set a flag (including rcode or opcode) in a "newmsg" object.
+ */
+void
+ns_newmsg_flag(ns_newmsg *handle, ns_flag flag, u_int value) {
+ extern struct _ns_flagdata _ns_flagdata[16];
+ struct _ns_flagdata *fd = &_ns_flagdata[flag];
+ ns_msg *msg = &handle->msg;
+
+ assert(flag < ns_f_max);
+ msg->_flags &= (~fd->mask);
+ msg->_flags |= (value << fd->shift);
+}
+
+/* Add a question (or zone, if it's an update) to a "newmsg" object.
+ */
+int
+ns_newmsg_q(ns_newmsg *handle, ns_nname_ct qname,
+ ns_type qtype, ns_class qclass)
+{
+ ns_msg *msg = &handle->msg;
+ u_char *t;
+ int n;
+
+ if (msg->_sect != ns_s_qd) {
+ errno = ENODEV;
+ return (-1);
+ }
+ t = (u_char *) (unsigned long) msg->_msg_ptr;
+ if (msg->_rrnum == 0)
+ msg->_sections[ns_s_qd] = t;
+ n = ns_name_pack(qname, t, msg->_eom - t,
+ handle->dnptrs, handle->lastdnptr);
+ if (n < 0)
+ return (-1);
+ t += n;
+ if (t + QFIXEDSZ >= msg->_eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ NS_PUT16(qtype, t);
+ NS_PUT16(qclass, t);
+ msg->_msg_ptr = t;
+ msg->_counts[ns_s_qd] = ++msg->_rrnum;
+ return (0);
+}
+
+/* Add an RR to a "newmsg" object.
+ */
+int
+ns_newmsg_rr(ns_newmsg *handle, ns_sect sect,
+ ns_nname_ct name, ns_type type,
+ ns_class rr_class, u_int32_t ttl,
+ u_int16_t rdlen, const u_char *rdata)
+{
+ ns_msg *msg = &handle->msg;
+ u_char *t;
+ int n;
+
+ if (sect < msg->_sect) {
+ errno = ENODEV;
+ return (-1);
+ }
+ t = (u_char *) (unsigned long) msg->_msg_ptr;
+ if (sect > msg->_sect) {
+ msg->_sect = sect;
+ msg->_sections[sect] = t;
+ msg->_rrnum = 0;
+ }
+ n = ns_name_pack(name, t, msg->_eom - t,
+ handle->dnptrs, handle->lastdnptr);
+ if (n < 0)
+ return (-1);
+ t += n;
+ if (t + RRFIXEDSZ + rdlen >= msg->_eom) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ NS_PUT16(type, t);
+ NS_PUT16(rr_class, t);
+ NS_PUT32(ttl, t);
+ msg->_msg_ptr = t;
+ if (rdcpy(handle, type, rdata, rdlen) < 0)
+ return (-1);
+ msg->_counts[sect] = ++msg->_rrnum;
+ return (0);
+}
+
+/* Complete a "newmsg" object and return its size for use in write().
+ * (Note: the "newmsg" object is also made ready for ns_parserr() etc.)
+ */
+size_t
+ns_newmsg_done(ns_newmsg *handle) {
+ ns_msg *msg = &handle->msg;
+ ns_sect sect;
+ u_char *t;
+
+ t = (u_char *) (unsigned long) msg->_msg;
+ NS_PUT16(msg->_id, t);
+ NS_PUT16(msg->_flags, t);
+ for (sect = 0; sect < ns_s_max; sect++)
+ NS_PUT16(msg->_counts[sect], t);
+ msg->_eom = msg->_msg_ptr;
+ msg->_sect = ns_s_max;
+ msg->_rrnum = -1;
+ msg->_msg_ptr = NULL;
+ return (msg->_eom - msg->_msg);
+}
+
+/* Private. */
+
+/* Copy an RDATA, using compression pointers where RFC1035 permits.
+ */
+static int
+rdcpy(ns_newmsg *handle, ns_type type, const u_char *rdata, size_t rdlen) {
+ ns_msg *msg = &handle->msg;
+ u_char *p = (u_char *) (unsigned long) msg->_msg_ptr;
+ u_char *t = p + NS_INT16SZ;
+ u_char *s = t;
+ int n;
+
+ switch (type) {
+ case ns_t_soa:
+ /* MNAME. */
+ n = ns_name_pack(rdata, t, msg->_eom - t,
+ handle->dnptrs, handle->lastdnptr);
+ if (n < 0)
+ return (-1);
+ t += n;
+ if (ns_name_skip(&rdata, msg->_eom) < 0)
+ return (-1);
+
+ /* ANAME. */
+ n = ns_name_pack(rdata, t, msg->_eom - t,
+ handle->dnptrs, handle->lastdnptr);
+ if (n < 0)
+ return (-1);
+ t += n;
+ if (ns_name_skip(&rdata, msg->_eom) < 0)
+ return (-1);
+
+ /* Serial, Refresh, Retry, Expiry, and Minimum. */
+ if ((msg->_eom - t) < (NS_INT32SZ * 5)) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ memcpy(t, rdata, NS_INT32SZ * 5);
+ t += (NS_INT32SZ * 5);
+ break;
+ case ns_t_ptr:
+ case ns_t_cname:
+ case ns_t_ns:
+ /* PTRDNAME, CNAME, or NSDNAME. */
+ n = ns_name_pack(rdata, t, msg->_eom - t,
+ handle->dnptrs, handle->lastdnptr);
+ if (n < 0)
+ return (-1);
+ t += n;
+ break;
+ default:
+ memcpy(t, rdata, rdlen);
+ t += rdlen;
+ }
+ NS_PUT16(t - s, p);
+ msg->_msg_ptr = t;
+ return (0);
+}
+
diff --git a/usr/src/lib/libresolv2_joy/common/nameser/ns_parse.c b/usr/src/lib/libresolv2_joy/common/nameser/ns_parse.c
new file mode 100644
index 0000000000..ba11eea707
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/nameser/ns_parse.c
@@ -0,0 +1,275 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef lint
+static const char rcsid[] = "$Id: ns_parse.c,v 1.10 2009/01/23 19:59:16 each Exp $";
+#endif
+
+/* Import. */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <resolv_joy.h>
+#include <string.h>
+
+#include "port_after.h"
+
+/* Forward. */
+
+static void setsection(ns_msg *msg, ns_sect sect);
+
+/* Macros. */
+
+#if !defined(SOLARIS2) || defined(__COVERITY__)
+#define RETERR(err) do { errno = (err); return (-1); } while (0)
+#else
+#define RETERR(err) \
+ do { errno = (err); if (errno == errno) return (-1); } while (0)
+#endif
+
+#define PARSE_FMT_PRESO 0 /* Parse using presentation-format names */
+#define PARSE_FMT_WIRE 1 /* Parse using network-format names */
+
+/* Public. */
+
+/* These need to be in the same order as the nres.h:ns_flag enum. */
+struct _ns_flagdata _ns_flagdata[16] = {
+ { 0x8000, 15 }, /*%< qr. */
+ { 0x7800, 11 }, /*%< opcode. */
+ { 0x0400, 10 }, /*%< aa. */
+ { 0x0200, 9 }, /*%< tc. */
+ { 0x0100, 8 }, /*%< rd. */
+ { 0x0080, 7 }, /*%< ra. */
+ { 0x0040, 6 }, /*%< z. */
+ { 0x0020, 5 }, /*%< ad. */
+ { 0x0010, 4 }, /*%< cd. */
+ { 0x000f, 0 }, /*%< rcode. */
+ { 0x0000, 0 }, /*%< expansion (1/6). */
+ { 0x0000, 0 }, /*%< expansion (2/6). */
+ { 0x0000, 0 }, /*%< expansion (3/6). */
+ { 0x0000, 0 }, /*%< expansion (4/6). */
+ { 0x0000, 0 }, /*%< expansion (5/6). */
+ { 0x0000, 0 }, /*%< expansion (6/6). */
+};
+
+int ns_msg_getflag(ns_msg handle, int flag) {
+ return(((handle)._flags & _ns_flagdata[flag].mask) >> _ns_flagdata[flag].shift);
+}
+
+int
+ns_skiprr(const u_char *ptr, const u_char *eom, ns_sect section, int count) {
+ const u_char *optr = ptr;
+
+ for ((void)NULL; count > 0; count--) {
+ int b, rdlength;
+
+ b = dn_skipname(ptr, eom);
+ if (b < 0)
+ RETERR(EMSGSIZE);
+ ptr += b/*Name*/ + NS_INT16SZ/*Type*/ + NS_INT16SZ/*Class*/;
+ if (section != ns_s_qd) {
+ if (ptr + NS_INT32SZ + NS_INT16SZ > eom)
+ RETERR(EMSGSIZE);
+ ptr += NS_INT32SZ/*TTL*/;
+ NS_GET16(rdlength, ptr);
+ ptr += rdlength/*RData*/;
+ }
+ }
+ if (ptr > eom)
+ RETERR(EMSGSIZE);
+ return (ptr - optr);
+}
+
+int
+ns_initparse(const u_char *msg, int msglen, ns_msg *handle) {
+ const u_char *eom = msg + msglen;
+ int i;
+
+ handle->_msg = msg;
+ handle->_eom = eom;
+ if (msg + NS_INT16SZ > eom)
+ RETERR(EMSGSIZE);
+ NS_GET16(handle->_id, msg);
+ if (msg + NS_INT16SZ > eom)
+ RETERR(EMSGSIZE);
+ NS_GET16(handle->_flags, msg);
+ for (i = 0; i < ns_s_max; i++) {
+ if (msg + NS_INT16SZ > eom)
+ RETERR(EMSGSIZE);
+ NS_GET16(handle->_counts[i], msg);
+ }
+ for (i = 0; i < ns_s_max; i++)
+ if (handle->_counts[i] == 0)
+ handle->_sections[i] = NULL;
+ else {
+ int b = ns_skiprr(msg, eom, (ns_sect)i,
+ handle->_counts[i]);
+
+ if (b < 0)
+ return (-1);
+ handle->_sections[i] = msg;
+ msg += b;
+ }
+ if (msg != eom)
+ RETERR(EMSGSIZE);
+ setsection(handle, ns_s_max);
+ return (0);
+}
+
+int
+ns_parserr(ns_msg *handle, ns_sect section, int rrnum, ns_rr *rr) {
+ int b;
+ int tmp;
+
+ /* Make section right. */
+ tmp = section;
+ if (tmp < 0 || section >= ns_s_max)
+ RETERR(ENODEV);
+ if (section != handle->_sect)
+ setsection(handle, section);
+
+ /* Make rrnum right. */
+ if (rrnum == -1)
+ rrnum = handle->_rrnum;
+ if (rrnum < 0 || rrnum >= handle->_counts[(int)section])
+ RETERR(ENODEV);
+ if (rrnum < handle->_rrnum)
+ setsection(handle, section);
+ if (rrnum > handle->_rrnum) {
+ b = ns_skiprr(handle->_msg_ptr, handle->_eom, section,
+ rrnum - handle->_rrnum);
+
+ if (b < 0)
+ return (-1);
+ handle->_msg_ptr += b;
+ handle->_rrnum = rrnum;
+ }
+
+ /* Do the parse. */
+ b = dn_expand(handle->_msg, handle->_eom,
+ handle->_msg_ptr, rr->name, NS_MAXDNAME);
+ if (b < 0)
+ return (-1);
+ handle->_msg_ptr += b;
+ if (handle->_msg_ptr + NS_INT16SZ + NS_INT16SZ > handle->_eom)
+ RETERR(EMSGSIZE);
+ NS_GET16(rr->type, handle->_msg_ptr);
+ NS_GET16(rr->rr_class, handle->_msg_ptr);
+ if (section == ns_s_qd) {
+ rr->ttl = 0;
+ rr->rdlength = 0;
+ rr->rdata = NULL;
+ } else {
+ if (handle->_msg_ptr + NS_INT32SZ + NS_INT16SZ > handle->_eom)
+ RETERR(EMSGSIZE);
+ NS_GET32(rr->ttl, handle->_msg_ptr);
+ NS_GET16(rr->rdlength, handle->_msg_ptr);
+ if (handle->_msg_ptr + rr->rdlength > handle->_eom)
+ RETERR(EMSGSIZE);
+ rr->rdata = handle->_msg_ptr;
+ handle->_msg_ptr += rr->rdlength;
+ }
+ if (++handle->_rrnum > handle->_counts[(int)section])
+ setsection(handle, (ns_sect)((int)section + 1));
+
+ /* All done. */
+ return (0);
+}
+
+/*
+ * This is identical to the above but uses network-format (uncompressed) names.
+ */
+int
+ns_parserr2(ns_msg *handle, ns_sect section, int rrnum, ns_rr2 *rr) {
+ int b;
+ int tmp;
+
+ /* Make section right. */
+ if ((tmp = section) < 0 || section >= ns_s_max)
+ RETERR(ENODEV);
+ if (section != handle->_sect)
+ setsection(handle, section);
+
+ /* Make rrnum right. */
+ if (rrnum == -1)
+ rrnum = handle->_rrnum;
+ if (rrnum < 0 || rrnum >= handle->_counts[(int)section])
+ RETERR(ENODEV);
+ if (rrnum < handle->_rrnum)
+ setsection(handle, section);
+ if (rrnum > handle->_rrnum) {
+ b = ns_skiprr(handle->_msg_ptr, handle->_eom, section,
+ rrnum - handle->_rrnum);
+
+ if (b < 0)
+ return (-1);
+ handle->_msg_ptr += b;
+ handle->_rrnum = rrnum;
+ }
+
+ /* Do the parse. */
+ b = ns_name_unpack2(handle->_msg, handle->_eom, handle->_msg_ptr,
+ rr->nname, NS_MAXNNAME, &rr->nnamel);
+ if (b < 0)
+ return (-1);
+ handle->_msg_ptr += b;
+ if (handle->_msg_ptr + NS_INT16SZ + NS_INT16SZ > handle->_eom)
+ RETERR(EMSGSIZE);
+ NS_GET16(rr->type, handle->_msg_ptr);
+ NS_GET16(rr->rr_class, handle->_msg_ptr);
+ if (section == ns_s_qd) {
+ rr->ttl = 0;
+ rr->rdlength = 0;
+ rr->rdata = NULL;
+ } else {
+ if (handle->_msg_ptr + NS_INT32SZ + NS_INT16SZ > handle->_eom)
+ RETERR(EMSGSIZE);
+ NS_GET32(rr->ttl, handle->_msg_ptr);
+ NS_GET16(rr->rdlength, handle->_msg_ptr);
+ if (handle->_msg_ptr + rr->rdlength > handle->_eom)
+ RETERR(EMSGSIZE);
+ rr->rdata = handle->_msg_ptr;
+ handle->_msg_ptr += rr->rdlength;
+ }
+ if (++handle->_rrnum > handle->_counts[(int)section])
+ setsection(handle, (ns_sect)((int)section + 1));
+
+ /* All done. */
+ return (0);
+}
+
+/* Private. */
+
+static void
+setsection(ns_msg *msg, ns_sect sect) {
+ msg->_sect = sect;
+ if (sect == ns_s_max) {
+ msg->_rrnum = -1;
+ msg->_msg_ptr = NULL;
+ } else {
+ msg->_rrnum = 0;
+ msg->_msg_ptr = msg->_sections[(int)sect];
+ }
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/nameser/ns_print.c b/usr/src/lib/libresolv2_joy/common/nameser/ns_print.c
new file mode 100644
index 0000000000..e70df376c1
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/nameser/ns_print.c
@@ -0,0 +1,1242 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef lint
+static const char rcsid[] = "$Id: ns_print.c,v 1.12 2009/03/03 05:29:58 each Exp $";
+#endif
+
+/* Import. */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <arpa/inet.h>
+
+#include <isc/assertions.h>
+#include <isc/dst.h>
+#include <errno.h>
+#include <resolv_joy.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "port_after.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) ((size_t)sprintf x)
+#endif
+
+/* Forward. */
+
+static size_t prune_origin(const char *name, const char *origin);
+static int charstr(const u_char *rdata, const u_char *edata,
+ char **buf, size_t *buflen);
+static int addname(const u_char *msg, size_t msglen,
+ const u_char **p, const char *origin,
+ char **buf, size_t *buflen);
+static void addlen(size_t len, char **buf, size_t *buflen);
+static int addstr(const char *src, size_t len,
+ char **buf, size_t *buflen);
+static int addtab(size_t len, size_t target, int spaced,
+ char **buf, size_t *buflen);
+
+/* Macros. */
+
+#define T(x) \
+ do { \
+ if ((x) < 0) \
+ return (-1); \
+ } while (0)
+
+static const char base32hex[] =
+ "0123456789ABCDEFGHIJKLMNOPQRSTUV=0123456789abcdefghijklmnopqrstuv";
+
+/* Public. */
+
+/*%
+ * Convert an RR to presentation format.
+ *
+ * return:
+ *\li Number of characters written to buf, or -1 (check errno).
+ */
+int
+ns_sprintrr(const ns_msg *handle, const ns_rr *rr,
+ const char *name_ctx, const char *origin,
+ char *buf, size_t buflen)
+{
+ int n;
+
+ n = ns_sprintrrf(ns_msg_base(*handle), ns_msg_size(*handle),
+ ns_rr_name(*rr), ns_rr_class(*rr), ns_rr_type(*rr),
+ ns_rr_ttl(*rr), ns_rr_rdata(*rr), ns_rr_rdlen(*rr),
+ name_ctx, origin, buf, buflen);
+ return (n);
+}
+
+/*%
+ * Convert the fields of an RR into presentation format.
+ *
+ * return:
+ *\li Number of characters written to buf, or -1 (check errno).
+ */
+int
+ns_sprintrrf(const u_char *msg, size_t msglen,
+ const char *name, ns_class class, ns_type type,
+ u_long ttl, const u_char *rdata, size_t rdlen,
+ const char *name_ctx, const char *origin,
+ char *buf, size_t buflen)
+{
+ const char *obuf = buf;
+ const u_char *edata = rdata + rdlen;
+ int spaced = 0;
+
+ const char *comment;
+ char tmp[100];
+ int len, x;
+
+ /*
+ * Owner.
+ */
+ if (name_ctx != NULL && ns_samename(name_ctx, name) == 1) {
+ T(addstr("\t\t\t", 3, &buf, &buflen));
+ } else {
+ len = prune_origin(name, origin);
+ if (*name == '\0') {
+ goto root;
+ } else if (len == 0) {
+ T(addstr("@\t\t\t", 4, &buf, &buflen));
+ } else {
+ T(addstr(name, len, &buf, &buflen));
+ /* Origin not used or not root, and no trailing dot? */
+ if (((origin == NULL || origin[0] == '\0') ||
+ (origin[0] != '.' && origin[1] != '\0' &&
+ name[len] == '\0')) && name[len - 1] != '.') {
+ root:
+ T(addstr(".", 1, &buf, &buflen));
+ len++;
+ }
+ T(spaced = addtab(len, 24, spaced, &buf, &buflen));
+ }
+ }
+
+ /*
+ * TTL, Class, Type.
+ */
+ T(x = ns_format_ttl(ttl, buf, buflen));
+ addlen(x, &buf, &buflen);
+ len = SPRINTF((tmp, " %s %s", p_class(class), p_type(type)));
+ T(addstr(tmp, len, &buf, &buflen));
+ T(spaced = addtab(x + len, 16, spaced, &buf, &buflen));
+
+ /*
+ * RData.
+ */
+ switch (type) {
+ case ns_t_a:
+ if (rdlen != (size_t)NS_INADDRSZ)
+ goto formerr;
+ (void) inet_ntop(AF_INET, rdata, buf, buflen);
+ addlen(strlen(buf), &buf, &buflen);
+ break;
+
+ case ns_t_cname:
+ case ns_t_mb:
+ case ns_t_mg:
+ case ns_t_mr:
+ case ns_t_ns:
+ case ns_t_ptr:
+ case ns_t_dname:
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+ break;
+
+ case ns_t_hinfo:
+ case ns_t_isdn:
+ /* First word. */
+ T(len = charstr(rdata, edata, &buf, &buflen));
+ if (len == 0)
+ goto formerr;
+ rdata += len;
+ T(addstr(" ", 1, &buf, &buflen));
+
+
+ /* Second word, optional in ISDN records. */
+ if (type == ns_t_isdn && rdata == edata)
+ break;
+
+ T(len = charstr(rdata, edata, &buf, &buflen));
+ if (len == 0)
+ goto formerr;
+ rdata += len;
+ break;
+
+ case ns_t_soa: {
+ u_long t;
+
+ /* Server name. */
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+ T(addstr(" ", 1, &buf, &buflen));
+
+ /* Administrator name. */
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+ T(addstr(" (\n", 3, &buf, &buflen));
+ spaced = 0;
+
+ if ((edata - rdata) != 5*NS_INT32SZ)
+ goto formerr;
+
+ /* Serial number. */
+ t = ns_get32(rdata); rdata += NS_INT32SZ;
+ T(addstr("\t\t\t\t\t", 5, &buf, &buflen));
+ len = SPRINTF((tmp, "%lu", t));
+ T(addstr(tmp, len, &buf, &buflen));
+ T(spaced = addtab(len, 16, spaced, &buf, &buflen));
+ T(addstr("; serial\n", 9, &buf, &buflen));
+ spaced = 0;
+
+ /* Refresh interval. */
+ t = ns_get32(rdata); rdata += NS_INT32SZ;
+ T(addstr("\t\t\t\t\t", 5, &buf, &buflen));
+ T(len = ns_format_ttl(t, buf, buflen));
+ addlen(len, &buf, &buflen);
+ T(spaced = addtab(len, 16, spaced, &buf, &buflen));
+ T(addstr("; refresh\n", 10, &buf, &buflen));
+ spaced = 0;
+
+ /* Retry interval. */
+ t = ns_get32(rdata); rdata += NS_INT32SZ;
+ T(addstr("\t\t\t\t\t", 5, &buf, &buflen));
+ T(len = ns_format_ttl(t, buf, buflen));
+ addlen(len, &buf, &buflen);
+ T(spaced = addtab(len, 16, spaced, &buf, &buflen));
+ T(addstr("; retry\n", 8, &buf, &buflen));
+ spaced = 0;
+
+ /* Expiry. */
+ t = ns_get32(rdata); rdata += NS_INT32SZ;
+ T(addstr("\t\t\t\t\t", 5, &buf, &buflen));
+ T(len = ns_format_ttl(t, buf, buflen));
+ addlen(len, &buf, &buflen);
+ T(spaced = addtab(len, 16, spaced, &buf, &buflen));
+ T(addstr("; expiry\n", 9, &buf, &buflen));
+ spaced = 0;
+
+ /* Minimum TTL. */
+ t = ns_get32(rdata); rdata += NS_INT32SZ;
+ T(addstr("\t\t\t\t\t", 5, &buf, &buflen));
+ T(len = ns_format_ttl(t, buf, buflen));
+ addlen(len, &buf, &buflen);
+ T(addstr(" )", 2, &buf, &buflen));
+ T(spaced = addtab(len, 16, spaced, &buf, &buflen));
+ T(addstr("; minimum\n", 10, &buf, &buflen));
+
+ break;
+ }
+
+ case ns_t_mx:
+ case ns_t_afsdb:
+ case ns_t_rt:
+ case ns_t_kx: {
+ u_int t;
+
+ if (rdlen < (size_t)NS_INT16SZ)
+ goto formerr;
+
+ /* Priority. */
+ t = ns_get16(rdata);
+ rdata += NS_INT16SZ;
+ len = SPRINTF((tmp, "%u ", t));
+ T(addstr(tmp, len, &buf, &buflen));
+
+ /* Target. */
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+
+ break;
+ }
+
+ case ns_t_px: {
+ u_int t;
+
+ if (rdlen < (size_t)NS_INT16SZ)
+ goto formerr;
+
+ /* Priority. */
+ t = ns_get16(rdata);
+ rdata += NS_INT16SZ;
+ len = SPRINTF((tmp, "%u ", t));
+ T(addstr(tmp, len, &buf, &buflen));
+
+ /* Name1. */
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+ T(addstr(" ", 1, &buf, &buflen));
+
+ /* Name2. */
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+
+ break;
+ }
+
+ case ns_t_x25:
+ T(len = charstr(rdata, edata, &buf, &buflen));
+ if (len == 0)
+ goto formerr;
+ rdata += len;
+ break;
+
+ case ns_t_txt:
+ case ns_t_spf:
+ while (rdata < edata) {
+ T(len = charstr(rdata, edata, &buf, &buflen));
+ if (len == 0)
+ goto formerr;
+ rdata += len;
+ if (rdata < edata)
+ T(addstr(" ", 1, &buf, &buflen));
+ }
+ break;
+
+ case ns_t_nsap: {
+ char t[2+255*3];
+
+ (void) inet_nsap_ntoa(rdlen, rdata, t);
+ T(addstr(t, strlen(t), &buf, &buflen));
+ break;
+ }
+
+ case ns_t_aaaa:
+ if (rdlen != (size_t)NS_IN6ADDRSZ)
+ goto formerr;
+ (void) inet_ntop(AF_INET6, rdata, buf, buflen);
+ addlen(strlen(buf), &buf, &buflen);
+ break;
+
+ case ns_t_loc: {
+ char t[255];
+
+ /* XXX protocol format checking? */
+ (void) loc_ntoa(rdata, t);
+ T(addstr(t, strlen(t), &buf, &buflen));
+ break;
+ }
+
+ case ns_t_naptr: {
+ u_int order, preference;
+ char t[50];
+
+ if (rdlen < 2U*NS_INT16SZ)
+ goto formerr;
+
+ /* Order, Precedence. */
+ order = ns_get16(rdata); rdata += NS_INT16SZ;
+ preference = ns_get16(rdata); rdata += NS_INT16SZ;
+ len = SPRINTF((t, "%u %u ", order, preference));
+ T(addstr(t, len, &buf, &buflen));
+
+ /* Flags. */
+ T(len = charstr(rdata, edata, &buf, &buflen));
+ if (len == 0)
+ goto formerr;
+ rdata += len;
+ T(addstr(" ", 1, &buf, &buflen));
+
+ /* Service. */
+ T(len = charstr(rdata, edata, &buf, &buflen));
+ if (len == 0)
+ goto formerr;
+ rdata += len;
+ T(addstr(" ", 1, &buf, &buflen));
+
+ /* Regexp. */
+ T(len = charstr(rdata, edata, &buf, &buflen));
+ if (len < 0)
+ return (-1);
+ if (len == 0)
+ goto formerr;
+ rdata += len;
+ T(addstr(" ", 1, &buf, &buflen));
+
+ /* Server. */
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+ break;
+ }
+
+ case ns_t_srv: {
+ u_int priority, weight, port;
+ char t[50];
+
+ if (rdlen < 3U*NS_INT16SZ)
+ goto formerr;
+
+ /* Priority, Weight, Port. */
+ priority = ns_get16(rdata); rdata += NS_INT16SZ;
+ weight = ns_get16(rdata); rdata += NS_INT16SZ;
+ port = ns_get16(rdata); rdata += NS_INT16SZ;
+ len = SPRINTF((t, "%u %u %u ", priority, weight, port));
+ T(addstr(t, len, &buf, &buflen));
+
+ /* Server. */
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+ break;
+ }
+
+ case ns_t_minfo:
+ case ns_t_rp:
+ /* Name1. */
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+ T(addstr(" ", 1, &buf, &buflen));
+
+ /* Name2. */
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+
+ break;
+
+ case ns_t_wks: {
+ int n, lcnt;
+
+ if (rdlen < 1U + NS_INT32SZ)
+ goto formerr;
+
+ /* Address. */
+ (void) inet_ntop(AF_INET, rdata, buf, buflen);
+ addlen(strlen(buf), &buf, &buflen);
+ rdata += NS_INADDRSZ;
+
+ /* Protocol. */
+ len = SPRINTF((tmp, " %u ( ", *rdata));
+ T(addstr(tmp, len, &buf, &buflen));
+ rdata += NS_INT8SZ;
+
+ /* Bit map. */
+ n = 0;
+ lcnt = 0;
+ while (rdata < edata) {
+ u_int c = *rdata++;
+ do {
+ if (c & 0200) {
+ if (lcnt == 0) {
+ T(addstr("\n\t\t\t\t", 5,
+ &buf, &buflen));
+ lcnt = 10;
+ spaced = 0;
+ }
+ len = SPRINTF((tmp, "%d ", n));
+ T(addstr(tmp, len, &buf, &buflen));
+ lcnt--;
+ }
+ c <<= 1;
+ } while (++n & 07);
+ }
+ T(addstr(")", 1, &buf, &buflen));
+
+ break;
+ }
+
+ case ns_t_key:
+ case ns_t_dnskey: {
+ char base64_key[NS_MD5RSA_MAX_BASE64];
+ u_int keyflags, protocol, algorithm, key_id;
+ const char *leader;
+ int n;
+
+ if (rdlen < 0U + NS_INT16SZ + NS_INT8SZ + NS_INT8SZ)
+ goto formerr;
+
+ /* Key flags, Protocol, Algorithm. */
+ key_id = dst_s_dns_key_id(rdata, edata-rdata);
+ keyflags = ns_get16(rdata); rdata += NS_INT16SZ;
+ protocol = *rdata++;
+ algorithm = *rdata++;
+ len = SPRINTF((tmp, "0x%04x %u %u",
+ keyflags, protocol, algorithm));
+ T(addstr(tmp, len, &buf, &buflen));
+
+ /* Public key data. */
+ len = b64_ntop(rdata, edata - rdata,
+ base64_key, sizeof base64_key);
+ if (len < 0)
+ goto formerr;
+ if (len > 15) {
+ T(addstr(" (", 2, &buf, &buflen));
+ leader = "\n\t\t";
+ spaced = 0;
+ } else
+ leader = " ";
+ for (n = 0; n < len; n += 48) {
+ T(addstr(leader, strlen(leader), &buf, &buflen));
+ T(addstr(base64_key + n, MIN(len - n, 48),
+ &buf, &buflen));
+ }
+ if (len > 15)
+ T(addstr(" )", 2, &buf, &buflen));
+ n = SPRINTF((tmp, " ; key_tag= %u", key_id));
+ T(addstr(tmp, n, &buf, &buflen));
+
+ break;
+ }
+
+ case ns_t_sig:
+ case ns_t_rrsig: {
+ char base64_key[NS_MD5RSA_MAX_BASE64];
+ u_int type, algorithm, labels, footprint;
+ const char *leader;
+ u_long t;
+ int n;
+
+ if (rdlen < 22U)
+ goto formerr;
+
+ /* Type covered, Algorithm, Label count, Original TTL. */
+ type = ns_get16(rdata); rdata += NS_INT16SZ;
+ algorithm = *rdata++;
+ labels = *rdata++;
+ t = ns_get32(rdata); rdata += NS_INT32SZ;
+ len = SPRINTF((tmp, "%s %d %d %lu ",
+ p_type(type), algorithm, labels, t));
+ T(addstr(tmp, len, &buf, &buflen));
+ if (labels > (u_int)dn_count_labels(name))
+ goto formerr;
+
+ /* Signature expiry. */
+ t = ns_get32(rdata); rdata += NS_INT32SZ;
+ len = SPRINTF((tmp, "%s ", p_secstodate(t)));
+ T(addstr(tmp, len, &buf, &buflen));
+
+ /* Time signed. */
+ t = ns_get32(rdata); rdata += NS_INT32SZ;
+ len = SPRINTF((tmp, "%s ", p_secstodate(t)));
+ T(addstr(tmp, len, &buf, &buflen));
+
+ /* Signature Footprint. */
+ footprint = ns_get16(rdata); rdata += NS_INT16SZ;
+ len = SPRINTF((tmp, "%u ", footprint));
+ T(addstr(tmp, len, &buf, &buflen));
+
+ /* Signer's name. */
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+
+ /* Signature. */
+ len = b64_ntop(rdata, edata - rdata,
+ base64_key, sizeof base64_key);
+ if (len > 15) {
+ T(addstr(" (", 2, &buf, &buflen));
+ leader = "\n\t\t";
+ spaced = 0;
+ } else
+ leader = " ";
+ if (len < 0)
+ goto formerr;
+ for (n = 0; n < len; n += 48) {
+ T(addstr(leader, strlen(leader), &buf, &buflen));
+ T(addstr(base64_key + n, MIN(len - n, 48),
+ &buf, &buflen));
+ }
+ if (len > 15)
+ T(addstr(" )", 2, &buf, &buflen));
+ break;
+ }
+
+ case ns_t_nxt: {
+ int n, c;
+
+ /* Next domain name. */
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+
+ /* Type bit map. */
+ n = edata - rdata;
+ for (c = 0; c < n*8; c++)
+ if (NS_NXT_BIT_ISSET(c, rdata)) {
+ len = SPRINTF((tmp, " %s", p_type(c)));
+ T(addstr(tmp, len, &buf, &buflen));
+ }
+ break;
+ }
+
+ case ns_t_cert: {
+ u_int c_type, key_tag, alg;
+ int n;
+ unsigned int siz;
+ char base64_cert[8192], tmp[40];
+ const char *leader;
+
+ c_type = ns_get16(rdata); rdata += NS_INT16SZ;
+ key_tag = ns_get16(rdata); rdata += NS_INT16SZ;
+ alg = (u_int) *rdata++;
+
+ len = SPRINTF((tmp, "%d %d %d ", c_type, key_tag, alg));
+ T(addstr(tmp, len, &buf, &buflen));
+ siz = (edata-rdata)*4/3 + 4; /* "+4" accounts for trailing \0 */
+ if (siz > sizeof(base64_cert) * 3/4) {
+ const char *str = "record too long to print";
+ T(addstr(str, strlen(str), &buf, &buflen));
+ }
+ else {
+ len = b64_ntop(rdata, edata-rdata, base64_cert, siz);
+
+ if (len < 0)
+ goto formerr;
+ else if (len > 15) {
+ T(addstr(" (", 2, &buf, &buflen));
+ leader = "\n\t\t";
+ spaced = 0;
+ }
+ else
+ leader = " ";
+
+ for (n = 0; n < len; n += 48) {
+ T(addstr(leader, strlen(leader),
+ &buf, &buflen));
+ T(addstr(base64_cert + n, MIN(len - n, 48),
+ &buf, &buflen));
+ }
+ if (len > 15)
+ T(addstr(" )", 2, &buf, &buflen));
+ }
+ break;
+ }
+
+ case ns_t_tkey: {
+ /* KJD - need to complete this */
+ u_long t;
+ int mode, err, keysize;
+
+ /* Algorithm name. */
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+ T(addstr(" ", 1, &buf, &buflen));
+
+ /* Inception. */
+ t = ns_get32(rdata); rdata += NS_INT32SZ;
+ len = SPRINTF((tmp, "%s ", p_secstodate(t)));
+ T(addstr(tmp, len, &buf, &buflen));
+
+ /* Experation. */
+ t = ns_get32(rdata); rdata += NS_INT32SZ;
+ len = SPRINTF((tmp, "%s ", p_secstodate(t)));
+ T(addstr(tmp, len, &buf, &buflen));
+
+ /* Mode , Error, Key Size. */
+ /* Priority, Weight, Port. */
+ mode = ns_get16(rdata); rdata += NS_INT16SZ;
+ err = ns_get16(rdata); rdata += NS_INT16SZ;
+ keysize = ns_get16(rdata); rdata += NS_INT16SZ;
+ len = SPRINTF((tmp, "%u %u %u ", mode, err, keysize));
+ T(addstr(tmp, len, &buf, &buflen));
+
+ /* XXX need to dump key, print otherdata length & other data */
+ break;
+ }
+
+ case ns_t_tsig: {
+ /* BEW - need to complete this */
+ int n;
+
+ T(len = addname(msg, msglen, &rdata, origin, &buf, &buflen));
+ T(addstr(" ", 1, &buf, &buflen));
+ rdata += 8; /*%< time */
+ n = ns_get16(rdata); rdata += INT16SZ;
+ rdata += n; /*%< sig */
+ n = ns_get16(rdata); rdata += INT16SZ; /*%< original id */
+ sprintf(buf, "%d", ns_get16(rdata));
+ rdata += INT16SZ;
+ addlen(strlen(buf), &buf, &buflen);
+ break;
+ }
+
+ case ns_t_a6: {
+ struct in6_addr a;
+ int pbyte, pbit;
+
+ /* prefix length */
+ if (rdlen == 0U) goto formerr;
+ len = SPRINTF((tmp, "%d ", *rdata));
+ T(addstr(tmp, len, &buf, &buflen));
+ pbit = *rdata;
+ if (pbit > 128) goto formerr;
+ pbyte = (pbit & ~7) / 8;
+ rdata++;
+
+ /* address suffix: provided only when prefix len != 128 */
+ if (pbit < 128) {
+ if (rdata + pbyte >= edata) goto formerr;
+ memset(&a, 0, sizeof(a));
+ memcpy(&a.s6_addr[pbyte], rdata, sizeof(a) - pbyte);
+ (void) inet_ntop(AF_INET6, &a, buf, buflen);
+ addlen(strlen(buf), &buf, &buflen);
+ rdata += sizeof(a) - pbyte;
+ }
+
+ /* prefix name: provided only when prefix len > 0 */
+ if (pbit == 0)
+ break;
+ if (rdata >= edata) goto formerr;
+ T(addstr(" ", 1, &buf, &buflen));
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+
+ break;
+ }
+
+ case ns_t_opt: {
+ len = SPRINTF((tmp, "%u bytes", class));
+ T(addstr(tmp, len, &buf, &buflen));
+ break;
+ }
+
+ case ns_t_ds:
+ case ns_t_dlv:
+ case ns_t_sshfp: {
+ u_int t;
+
+ if (type == ns_t_ds || type == ns_t_dlv) {
+ if (rdlen < 4U) goto formerr;
+ t = ns_get16(rdata);
+ rdata += NS_INT16SZ;
+ len = SPRINTF((tmp, "%u ", t));
+ T(addstr(tmp, len, &buf, &buflen));
+ } else
+ if (rdlen < 2U) goto formerr;
+
+ len = SPRINTF((tmp, "%u ", *rdata));
+ T(addstr(tmp, len, &buf, &buflen));
+ rdata++;
+
+ len = SPRINTF((tmp, "%u ", *rdata));
+ T(addstr(tmp, len, &buf, &buflen));
+ rdata++;
+
+ while (rdata < edata) {
+ len = SPRINTF((tmp, "%02X", *rdata));
+ T(addstr(tmp, len, &buf, &buflen));
+ rdata++;
+ }
+ break;
+ }
+
+ case ns_t_nsec3:
+ case ns_t_nsec3param: {
+ u_int t, w, l, j, k, c;
+
+ len = SPRINTF((tmp, "%u ", *rdata));
+ T(addstr(tmp, len, &buf, &buflen));
+ rdata++;
+
+ len = SPRINTF((tmp, "%u ", *rdata));
+ T(addstr(tmp, len, &buf, &buflen));
+ rdata++;
+
+ t = ns_get16(rdata);
+ rdata += NS_INT16SZ;
+ len = SPRINTF((tmp, "%u ", t));
+ T(addstr(tmp, len, &buf, &buflen));
+
+ t = *rdata++;
+ if (t == 0) {
+ T(addstr("-", 1, &buf, &buflen));
+ } else {
+ while (t-- > 0) {
+ len = SPRINTF((tmp, "%02X", *rdata));
+ T(addstr(tmp, len, &buf, &buflen));
+ rdata++;
+ }
+ }
+ if (type == ns_t_nsec3param)
+ break;
+ T(addstr(" ", 1, &buf, &buflen));
+
+ t = *rdata++;
+ while (t > 0) {
+ switch (t) {
+ case 1:
+ tmp[0] = base32hex[((rdata[0]>>3)&0x1f)];
+ tmp[1] = base32hex[((rdata[0]<<2)&0x1c)];
+ tmp[2] = tmp[3] = tmp[4] = '=';
+ tmp[5] = tmp[6] = tmp[7] = '=';
+ break;
+ case 2:
+ tmp[0] = base32hex[((rdata[0]>>3)&0x1f)];
+ tmp[1] = base32hex[((rdata[0]<<2)&0x1c)|
+ ((rdata[1]>>6)&0x03)];
+ tmp[2] = base32hex[((rdata[1]>>1)&0x1f)];
+ tmp[3] = base32hex[((rdata[1]<<4)&0x10)];
+ tmp[4] = tmp[5] = tmp[6] = tmp[7] = '=';
+ break;
+ case 3:
+ tmp[0] = base32hex[((rdata[0]>>3)&0x1f)];
+ tmp[1] = base32hex[((rdata[0]<<2)&0x1c)|
+ ((rdata[1]>>6)&0x03)];
+ tmp[2] = base32hex[((rdata[1]>>1)&0x1f)];
+ tmp[3] = base32hex[((rdata[1]<<4)&0x10)|
+ ((rdata[2]>>4)&0x0f)];
+ tmp[4] = base32hex[((rdata[2]<<1)&0x1e)];
+ tmp[5] = tmp[6] = tmp[7] = '=';
+ break;
+ case 4:
+ tmp[0] = base32hex[((rdata[0]>>3)&0x1f)];
+ tmp[1] = base32hex[((rdata[0]<<2)&0x1c)|
+ ((rdata[1]>>6)&0x03)];
+ tmp[2] = base32hex[((rdata[1]>>1)&0x1f)];
+ tmp[3] = base32hex[((rdata[1]<<4)&0x10)|
+ ((rdata[2]>>4)&0x0f)];
+ tmp[4] = base32hex[((rdata[2]<<1)&0x1e)|
+ ((rdata[3]>>7)&0x01)];
+ tmp[5] = base32hex[((rdata[3]>>2)&0x1f)];
+ tmp[6] = base32hex[(rdata[3]<<3)&0x18];
+ tmp[7] = '=';
+ break;
+ default:
+ tmp[0] = base32hex[((rdata[0]>>3)&0x1f)];
+ tmp[1] = base32hex[((rdata[0]<<2)&0x1c)|
+ ((rdata[1]>>6)&0x03)];
+ tmp[2] = base32hex[((rdata[1]>>1)&0x1f)];
+ tmp[3] = base32hex[((rdata[1]<<4)&0x10)|
+ ((rdata[2]>>4)&0x0f)];
+ tmp[4] = base32hex[((rdata[2]<<1)&0x1e)|
+ ((rdata[3]>>7)&0x01)];
+ tmp[5] = base32hex[((rdata[3]>>2)&0x1f)];
+ tmp[6] = base32hex[((rdata[3]<<3)&0x18)|
+ ((rdata[4]>>5)&0x07)];
+ tmp[7] = base32hex[(rdata[4]&0x1f)];
+ break;
+ }
+ T(addstr(tmp, 8, &buf, &buflen));
+ if (t >= 5) {
+ rdata += 5;
+ t -= 5;
+ } else {
+ rdata += t;
+ t -= t;
+ }
+ }
+
+ while (rdata < edata) {
+ w = *rdata++;
+ l = *rdata++;
+ for (j = 0; j < l; j++) {
+ if (rdata[j] == 0)
+ continue;
+ for (k = 0; k < 8; k++) {
+ if ((rdata[j] & (0x80 >> k)) == 0)
+ continue;
+ c = w * 256 + j * 8 + k;
+ len = SPRINTF((tmp, " %s", p_type(c)));
+ T(addstr(tmp, len, &buf, &buflen));
+ }
+ }
+ rdata += l;
+ }
+ break;
+ }
+
+ case ns_t_nsec: {
+ u_int w, l, j, k, c;
+
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+
+ while (rdata < edata) {
+ w = *rdata++;
+ l = *rdata++;
+ for (j = 0; j < l; j++) {
+ if (rdata[j] == 0)
+ continue;
+ for (k = 0; k < 8; k++) {
+ if ((rdata[j] & (0x80 >> k)) == 0)
+ continue;
+ c = w * 256 + j * 8 + k;
+ len = SPRINTF((tmp, " %s", p_type(c)));
+ T(addstr(tmp, len, &buf, &buflen));
+ }
+ }
+ rdata += l;
+ }
+ break;
+ }
+
+ case ns_t_dhcid: {
+ int n;
+ unsigned int siz;
+ char base64_dhcid[8192];
+ const char *leader;
+
+ siz = (edata-rdata)*4/3 + 4; /* "+4" accounts for trailing \0 */
+ if (siz > sizeof(base64_dhcid) * 3/4) {
+ const char *str = "record too long to print";
+ T(addstr(str, strlen(str), &buf, &buflen));
+ } else {
+ len = b64_ntop(rdata, edata-rdata, base64_dhcid, siz);
+
+ if (len < 0)
+ goto formerr;
+
+ else if (len > 15) {
+ T(addstr(" (", 2, &buf, &buflen));
+ leader = "\n\t\t";
+ spaced = 0;
+ }
+ else
+ leader = " ";
+
+ for (n = 0; n < len; n += 48) {
+ T(addstr(leader, strlen(leader),
+ &buf, &buflen));
+ T(addstr(base64_dhcid + n, MIN(len - n, 48),
+ &buf, &buflen));
+ }
+ if (len > 15)
+ T(addstr(" )", 2, &buf, &buflen));
+ }
+ }
+
+ case ns_t_ipseckey: {
+ int n;
+ unsigned int siz;
+ char base64_key[8192];
+ const char *leader;
+
+ if (rdlen < 2)
+ goto formerr;
+
+ switch (rdata[1]) {
+ case 0:
+ case 3:
+ if (rdlen < 3)
+ goto formerr;
+ break;
+ case 1:
+ if (rdlen < 7)
+ goto formerr;
+ break;
+ case 2:
+ if (rdlen < 19)
+ goto formerr;
+ break;
+ default:
+ comment = "unknown IPSECKEY gateway type";
+ goto hexify;
+ }
+
+ len = SPRINTF((tmp, "%u ", *rdata));
+ T(addstr(tmp, len, &buf, &buflen));
+ rdata++;
+
+ len = SPRINTF((tmp, "%u ", *rdata));
+ T(addstr(tmp, len, &buf, &buflen));
+ rdata++;
+
+ len = SPRINTF((tmp, "%u ", *rdata));
+ T(addstr(tmp, len, &buf, &buflen));
+ rdata++;
+
+ switch (rdata[-2]) {
+ case 0:
+ T(addstr(".", 1, &buf, &buflen));
+ break;
+ case 1:
+ (void) inet_ntop(AF_INET, rdata, buf, buflen);
+ addlen(strlen(buf), &buf, &buflen);
+ rdata += 4;
+ break;
+ case 2:
+ (void) inet_ntop(AF_INET6, rdata, buf, buflen);
+ addlen(strlen(buf), &buf, &buflen);
+ rdata += 16;
+ break;
+ case 3:
+ T(addname(msg, msglen, &rdata, origin, &buf, &buflen));
+ break;
+ }
+
+ if (rdata >= edata)
+ break;
+
+ siz = (edata-rdata)*4/3 + 4; /* "+4" accounts for trailing \0 */
+ if (siz > sizeof(base64_key) * 3/4) {
+ const char *str = "record too long to print";
+ T(addstr(str, strlen(str), &buf, &buflen));
+ } else {
+ len = b64_ntop(rdata, edata-rdata, base64_key, siz);
+
+ if (len < 0)
+ goto formerr;
+
+ else if (len > 15) {
+ T(addstr(" (", 2, &buf, &buflen));
+ leader = "\n\t\t";
+ spaced = 0;
+ }
+ else
+ leader = " ";
+
+ for (n = 0; n < len; n += 48) {
+ T(addstr(leader, strlen(leader),
+ &buf, &buflen));
+ T(addstr(base64_key + n, MIN(len - n, 48),
+ &buf, &buflen));
+ }
+ if (len > 15)
+ T(addstr(" )", 2, &buf, &buflen));
+ }
+ }
+
+ case ns_t_hip: {
+ unsigned int i, hip_len, algorithm, key_len;
+ char base64_key[NS_MD5RSA_MAX_BASE64];
+ unsigned int siz;
+ const char *leader = "\n\t\t\t\t\t";
+
+ hip_len = *rdata++;
+ algorithm = *rdata++;
+ key_len = ns_get16(rdata);
+ rdata += NS_INT16SZ;
+
+ siz = key_len*4/3 + 4; /* "+4" accounts for trailing \0 */
+ if (siz > sizeof(base64_key) * 3/4) {
+ const char *str = "record too long to print";
+ T(addstr(str, strlen(str), &buf, &buflen));
+ } else {
+ len = sprintf(tmp, "( %u ", algorithm);
+ T(addstr(tmp, len, &buf, &buflen));
+
+ for (i = 0; i < hip_len; i++) {
+ len = sprintf(tmp, "%02X", *rdata);
+ T(addstr(tmp, len, &buf, &buflen));
+ rdata++;
+ }
+ T(addstr(leader, strlen(leader), &buf, &buflen));
+
+ len = b64_ntop(rdata, key_len, base64_key, siz);
+ if (len < 0)
+ goto formerr;
+
+ T(addstr(base64_key, len, &buf, &buflen));
+
+ rdata += key_len;
+ while (rdata < edata) {
+ T(addstr(leader, strlen(leader), &buf, &buflen));
+ T(addname(msg, msglen, &rdata, origin,
+ &buf, &buflen));
+ }
+ T(addstr(" )", 2, &buf, &buflen));
+ }
+ break;
+ }
+
+ default:
+ comment = "unknown RR type";
+ goto hexify;
+ }
+ return (buf - obuf);
+ formerr:
+ comment = "RR format error";
+ hexify: {
+ int n, m;
+ char *p;
+
+ len = SPRINTF((tmp, "\\# %u%s\t; %s", (unsigned)(edata - rdata),
+ rdlen != 0U ? " (" : "", comment));
+ T(addstr(tmp, len, &buf, &buflen));
+ while (rdata < edata) {
+ p = tmp;
+ p += SPRINTF((p, "\n\t"));
+ spaced = 0;
+ n = MIN(16, edata - rdata);
+ for (m = 0; m < n; m++)
+ p += SPRINTF((p, "%02x ", rdata[m]));
+ T(addstr(tmp, p - tmp, &buf, &buflen));
+ if (n < 16) {
+ T(addstr(")", 1, &buf, &buflen));
+ T(addtab(p - tmp + 1, 48, spaced, &buf, &buflen));
+ }
+ p = tmp;
+ p += SPRINTF((p, "; "));
+ for (m = 0; m < n; m++)
+ *p++ = (isascii(rdata[m]) && isprint(rdata[m]))
+ ? rdata[m]
+ : '.';
+ T(addstr(tmp, p - tmp, &buf, &buflen));
+ rdata += n;
+ }
+ return (buf - obuf);
+ }
+}
+
+/* Private. */
+
+/*%
+ * size_t
+ * prune_origin(name, origin)
+ * Find out if the name is at or under the current origin.
+ * return:
+ * Number of characters in name before start of origin,
+ * or length of name if origin does not match.
+ * notes:
+ * This function should share code with samedomain().
+ */
+static size_t
+prune_origin(const char *name, const char *origin) {
+ const char *oname = name;
+
+ while (*name != '\0') {
+ if (origin != NULL && ns_samename(name, origin) == 1)
+ return (name - oname - (name > oname));
+ while (*name != '\0') {
+ if (*name == '\\') {
+ name++;
+ /* XXX need to handle \nnn form. */
+ if (*name == '\0')
+ break;
+ } else if (*name == '.') {
+ name++;
+ break;
+ }
+ name++;
+ }
+ }
+ return (name - oname);
+}
+
+/*%
+ * int
+ * charstr(rdata, edata, buf, buflen)
+ * Format a <character-string> into the presentation buffer.
+ * return:
+ * Number of rdata octets consumed
+ * 0 for protocol format error
+ * -1 for output buffer error
+ * side effects:
+ * buffer is advanced on success.
+ */
+static int
+charstr(const u_char *rdata, const u_char *edata, char **buf, size_t *buflen) {
+ const u_char *odata = rdata;
+ size_t save_buflen = *buflen;
+ char *save_buf = *buf;
+
+ if (addstr("\"", 1, buf, buflen) < 0)
+ goto enospc;
+ if (rdata < edata) {
+ int n = *rdata;
+
+ if (rdata + 1 + n <= edata) {
+ rdata++;
+ while (n-- > 0) {
+ if (strchr("\n\"\\", *rdata) != NULL)
+ if (addstr("\\", 1, buf, buflen) < 0)
+ goto enospc;
+ if (addstr((const char *)rdata, 1,
+ buf, buflen) < 0)
+ goto enospc;
+ rdata++;
+ }
+ }
+ }
+ if (addstr("\"", 1, buf, buflen) < 0)
+ goto enospc;
+ return (rdata - odata);
+ enospc:
+ errno = ENOSPC;
+ *buf = save_buf;
+ *buflen = save_buflen;
+ return (-1);
+}
+
+static int
+addname(const u_char *msg, size_t msglen,
+ const u_char **pp, const char *origin,
+ char **buf, size_t *buflen)
+{
+ size_t newlen, save_buflen = *buflen;
+ char *save_buf = *buf;
+ int n;
+
+ n = dn_expand(msg, msg + msglen, *pp, *buf, *buflen);
+ if (n < 0)
+ goto enospc; /*%< Guess. */
+ newlen = prune_origin(*buf, origin);
+ if (**buf == '\0') {
+ goto root;
+ } else if (newlen == 0U) {
+ /* Use "@" instead of name. */
+ if (newlen + 2 > *buflen)
+ goto enospc; /* No room for "@\0". */
+ (*buf)[newlen++] = '@';
+ (*buf)[newlen] = '\0';
+ } else {
+ if (((origin == NULL || origin[0] == '\0') ||
+ (origin[0] != '.' && origin[1] != '\0' &&
+ (*buf)[newlen] == '\0')) && (*buf)[newlen - 1] != '.') {
+ /* No trailing dot. */
+ root:
+ if (newlen + 2 > *buflen)
+ goto enospc; /* No room for ".\0". */
+ (*buf)[newlen++] = '.';
+ (*buf)[newlen] = '\0';
+ }
+ }
+ *pp += n;
+ addlen(newlen, buf, buflen);
+ **buf = '\0';
+ return (newlen);
+ enospc:
+ errno = ENOSPC;
+ *buf = save_buf;
+ *buflen = save_buflen;
+ return (-1);
+}
+
+static void
+addlen(size_t len, char **buf, size_t *buflen) {
+ INSIST(len <= *buflen);
+ *buf += len;
+ *buflen -= len;
+}
+
+static int
+addstr(const char *src, size_t len, char **buf, size_t *buflen) {
+ if (len >= *buflen) {
+ errno = ENOSPC;
+ return (-1);
+ }
+ memcpy(*buf, src, len);
+ addlen(len, buf, buflen);
+ **buf = '\0';
+ return (0);
+}
+
+static int
+addtab(size_t len, size_t target, int spaced, char **buf, size_t *buflen) {
+ size_t save_buflen = *buflen;
+ char *save_buf = *buf;
+ int t;
+
+ if (spaced || len >= target - 1) {
+ T(addstr(" ", 2, buf, buflen));
+ spaced = 1;
+ } else {
+ for (t = (target - len - 1) / 8; t >= 0; t--)
+ if (addstr("\t", 1, buf, buflen) < 0) {
+ *buflen = save_buflen;
+ *buf = save_buf;
+ return (-1);
+ }
+ spaced = 0;
+ }
+ return (spaced);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/nameser/ns_rdata.c b/usr/src/lib/libresolv2_joy/common/nameser/ns_rdata.c
new file mode 100644
index 0000000000..eac4052278
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/nameser/ns_rdata.c
@@ -0,0 +1,300 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/*
+ * Copyright (C) 2009 Internet Systems Consortium, Inc. ("ISC")
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef lint
+static const char rcsid[] = "$Id: ns_rdata.c,v 1.2 2009/01/23 23:49:15 tbox Exp $";
+#endif
+
+#include "port_before.h"
+
+#if __OpenBSD__
+#include <sys/types.h>
+#endif
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <resolv_joy.h>
+#include <string.h>
+
+#include "port_after.h"
+
+#define CONSUME_SRC \
+ do { \
+ rdata += n, rdlen -= n; \
+ } while (0)
+
+#define CONSUME_DST \
+ do { \
+ nrdata += n, nrdsiz -= n, nrdlen += n; \
+ } while (0)
+
+#define UNPACK_DNAME \
+ do { \
+ size_t t; \
+ \
+ if ((n = ns_name_unpack2(msg,eom,rdata,nrdata,nrdsiz,&t))<0) {\
+ errno = EMSGSIZE; \
+ return (-1); \
+ } \
+ CONSUME_SRC; \
+ n = t; \
+ CONSUME_DST; \
+ } while (0)
+
+#define UNPACK_SOME(x) \
+ do { \
+ n = (x); \
+ if ((size_t)n > rdlen || (size_t)n > nrdsiz) { \
+ errno = EMSGSIZE; \
+ return (-1); \
+ } \
+ memcpy(nrdata, rdata, n); \
+ CONSUME_SRC; CONSUME_DST; \
+ } while (0)
+
+#define UNPACK_REST(x) \
+ do { \
+ n = (x); \
+ if ((size_t)n != rdlen) { \
+ errno = EMSGSIZE; \
+ return (-1); \
+ } \
+ memcpy(nrdata, rdata, n); \
+ CONSUME_SRC; CONSUME_DST; \
+ } while (0)
+
+ssize_t
+ns_rdata_unpack(const u_char *msg, const u_char *eom,
+ ns_type type, const u_char *rdata, size_t rdlen,
+ u_char *nrdata, size_t nrdsiz)
+{
+ size_t nrdlen = 0;
+ int n;
+
+ switch (type) {
+ case ns_t_a:
+ UNPACK_REST(NS_INADDRSZ);
+ break;
+ case ns_t_aaaa:
+ UNPACK_REST(NS_IN6ADDRSZ);
+ break;
+ case ns_t_cname:
+ case ns_t_mb:
+ case ns_t_mg:
+ case ns_t_mr:
+ case ns_t_ns:
+ case ns_t_ptr:
+ case ns_t_dname:
+ UNPACK_DNAME;
+ break;
+ case ns_t_soa:
+ UNPACK_DNAME;
+ UNPACK_DNAME;
+ UNPACK_SOME(NS_INT32SZ * 5);
+ break;
+ case ns_t_mx:
+ case ns_t_afsdb:
+ case ns_t_rt:
+ UNPACK_SOME(NS_INT16SZ);
+ UNPACK_DNAME;
+ break;
+ case ns_t_px:
+ UNPACK_SOME(NS_INT16SZ);
+ UNPACK_DNAME;
+ UNPACK_DNAME;
+ break;
+ case ns_t_srv:
+ UNPACK_SOME(NS_INT16SZ * 3);
+ UNPACK_DNAME;
+ break;
+ case ns_t_minfo:
+ case ns_t_rp:
+ UNPACK_DNAME;
+ UNPACK_DNAME;
+ break;
+ default:
+ UNPACK_SOME(rdlen);
+ break;
+ }
+ if (rdlen > 0) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ return (nrdlen);
+}
+
+#define EQUAL_CONSUME \
+ do { \
+ rdata1 += n, rdlen1 -= n; \
+ rdata2 += n, rdlen2 -= n; \
+ } while (0)
+
+#define EQUAL_DNAME \
+ do { \
+ ssize_t n; \
+ \
+ if (rdlen1 != rdlen2) \
+ return (0); \
+ n = ns_name_eq(rdata1, rdlen1, rdata2, rdlen2); \
+ if (n <= 0) \
+ return (n); \
+ n = rdlen1; \
+ EQUAL_CONSUME; \
+ } while (0)
+
+#define EQUAL_SOME(x) \
+ do { \
+ size_t n = (x); \
+ \
+ if (n > rdlen1 || n > rdlen2) { \
+ errno = EMSGSIZE; \
+ return (-1); \
+ } \
+ if (memcmp(rdata1, rdata2, n) != 0) \
+ return (0); \
+ EQUAL_CONSUME; \
+ } while (0)
+
+int
+ns_rdata_equal(ns_type type,
+ const u_char *rdata1, size_t rdlen1,
+ const u_char *rdata2, size_t rdlen2)
+{
+ switch (type) {
+ case ns_t_cname:
+ case ns_t_mb:
+ case ns_t_mg:
+ case ns_t_mr:
+ case ns_t_ns:
+ case ns_t_ptr:
+ case ns_t_dname:
+ EQUAL_DNAME;
+ break;
+ case ns_t_soa:
+ /* "There can be only one." --Highlander */
+ break;
+ case ns_t_mx:
+ case ns_t_afsdb:
+ case ns_t_rt:
+ EQUAL_SOME(NS_INT16SZ);
+ EQUAL_DNAME;
+ break;
+ case ns_t_px:
+ EQUAL_SOME(NS_INT16SZ);
+ EQUAL_DNAME;
+ EQUAL_DNAME;
+ break;
+ case ns_t_srv:
+ EQUAL_SOME(NS_INT16SZ * 3);
+ EQUAL_DNAME;
+ break;
+ case ns_t_minfo:
+ case ns_t_rp:
+ EQUAL_DNAME;
+ EQUAL_DNAME;
+ break;
+ default:
+ EQUAL_SOME(rdlen1);
+ break;
+ }
+ if (rdlen1 != 0 || rdlen2 != 0)
+ return (0);
+ return (1);
+}
+
+#define REFERS_DNAME \
+ do { \
+ int n; \
+ \
+ n = ns_name_eq(rdata, rdlen, nname, NS_MAXNNAME); \
+ if (n < 0) \
+ return (-1); \
+ if (n > 0) \
+ return (1); \
+ n = dn_skipname(rdata, rdata + rdlen); \
+ if (n < 0) \
+ return (-1); \
+ CONSUME_SRC; \
+ } while (0)
+
+#define REFERS_SOME(x) \
+ do { \
+ size_t n = (x); \
+ \
+ if (n > rdlen) { \
+ errno = EMSGSIZE; \
+ return (-1); \
+ } \
+ CONSUME_SRC; \
+ } while (0)
+
+int
+ns_rdata_refers(ns_type type,
+ const u_char *rdata, size_t rdlen,
+ const u_char *nname)
+{
+ switch (type) {
+ case ns_t_cname:
+ case ns_t_mb:
+ case ns_t_mg:
+ case ns_t_mr:
+ case ns_t_ns:
+ case ns_t_ptr:
+ case ns_t_dname:
+ REFERS_DNAME;
+ break;
+ case ns_t_soa:
+ REFERS_DNAME;
+ REFERS_DNAME;
+ REFERS_SOME(NS_INT32SZ * 5);
+ break;
+ case ns_t_mx:
+ case ns_t_afsdb:
+ case ns_t_rt:
+ REFERS_SOME(NS_INT16SZ);
+ REFERS_DNAME;
+ break;
+ case ns_t_px:
+ REFERS_SOME(NS_INT16SZ);
+ REFERS_DNAME;
+ REFERS_DNAME;
+ break;
+ case ns_t_srv:
+ REFERS_SOME(NS_INT16SZ * 3);
+ REFERS_DNAME;
+ break;
+ case ns_t_minfo:
+ case ns_t_rp:
+ REFERS_DNAME;
+ REFERS_DNAME;
+ break;
+ default:
+ REFERS_SOME(rdlen);
+ break;
+ }
+ if (rdlen != 0) {
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ return (0);
+}
diff --git a/usr/src/lib/libresolv2_joy/common/nameser/ns_samedomain.c b/usr/src/lib/libresolv2_joy/common/nameser/ns_samedomain.c
new file mode 100644
index 0000000000..5e9f5cab54
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/nameser/ns_samedomain.c
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1995,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef lint
+static const char rcsid[] = "$Id: ns_samedomain.c,v 1.6 2005/04/27 04:56:40 sra Exp $";
+#endif
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <arpa/nameser.h>
+#include <errno.h>
+#include <string.h>
+
+#include "port_after.h"
+
+/*%
+ * Check whether a name belongs to a domain.
+ *
+ * Inputs:
+ *\li a - the domain whose ancestory is being verified
+ *\li b - the potential ancestor we're checking against
+ *
+ * Return:
+ *\li boolean - is a at or below b?
+ *
+ * Notes:
+ *\li Trailing dots are first removed from name and domain.
+ * Always compare complete subdomains, not only whether the
+ * domain name is the trailing string of the given name.
+ *
+ *\li "host.foobar.top" lies in "foobar.top" and in "top" and in ""
+ * but NOT in "bar.top"
+ */
+
+int
+ns_samedomain(const char *a, const char *b) {
+ size_t la, lb;
+ int diff, i, escaped;
+ const char *cp;
+
+ la = strlen(a);
+ lb = strlen(b);
+
+ /* Ignore a trailing label separator (i.e. an unescaped dot) in 'a'. */
+ if (la != 0U && a[la - 1] == '.') {
+ escaped = 0;
+ /* Note this loop doesn't get executed if la==1. */
+ for (i = la - 2; i >= 0; i--)
+ if (a[i] == '\\') {
+ if (escaped)
+ escaped = 0;
+ else
+ escaped = 1;
+ } else
+ break;
+ if (!escaped)
+ la--;
+ }
+
+ /* Ignore a trailing label separator (i.e. an unescaped dot) in 'b'. */
+ if (lb != 0U && b[lb - 1] == '.') {
+ escaped = 0;
+ /* note this loop doesn't get executed if lb==1 */
+ for (i = lb - 2; i >= 0; i--)
+ if (b[i] == '\\') {
+ if (escaped)
+ escaped = 0;
+ else
+ escaped = 1;
+ } else
+ break;
+ if (!escaped)
+ lb--;
+ }
+
+ /* lb == 0 means 'b' is the root domain, so 'a' must be in 'b'. */
+ if (lb == 0U)
+ return (1);
+
+ /* 'b' longer than 'a' means 'a' can't be in 'b'. */
+ if (lb > la)
+ return (0);
+
+ /* 'a' and 'b' being equal at this point indicates sameness. */
+ if (lb == la)
+ return (strncasecmp(a, b, lb) == 0);
+
+ /* Ok, we know la > lb. */
+
+ diff = la - lb;
+
+ /*
+ * If 'a' is only 1 character longer than 'b', then it can't be
+ * a subdomain of 'b' (because of the need for the '.' label
+ * separator).
+ */
+ if (diff < 2)
+ return (0);
+
+ /*
+ * If the character before the last 'lb' characters of 'b'
+ * isn't '.', then it can't be a match (this lets us avoid
+ * having "foobar.com" match "bar.com").
+ */
+ if (a[diff - 1] != '.')
+ return (0);
+
+ /*
+ * We're not sure about that '.', however. It could be escaped
+ * and thus not a really a label separator.
+ */
+ escaped = 0;
+ for (i = diff - 2; i >= 0; i--)
+ if (a[i] == '\\') {
+ if (escaped)
+ escaped = 0;
+ else
+ escaped = 1;
+ } else
+ break;
+ if (escaped)
+ return (0);
+
+ /* Now compare aligned trailing substring. */
+ cp = a + diff;
+ return (strncasecmp(cp, b, lb) == 0);
+}
+
+/*%
+ * is "a" a subdomain of "b"?
+ */
+int
+ns_subdomain(const char *a, const char *b) {
+ return (ns_samename(a, b) != 1 && ns_samedomain(a, b));
+}
+
+/*%
+ * make a canonical copy of domain name "src"
+ *
+ * notes:
+ * \code
+ * foo -> foo.
+ * foo. -> foo.
+ * foo.. -> foo.
+ * foo\. -> foo\..
+ * foo\\. -> foo\\.
+ * \endcode
+ */
+
+int
+ns_makecanon(const char *src, char *dst, size_t dstsize) {
+ size_t n = strlen(src);
+
+ if (n + sizeof "." > dstsize) { /*%< Note: sizeof == 2 */
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ strcpy(dst, src);
+ while (n >= 1U && dst[n - 1] == '.') /*%< Ends in "." */
+ if (n >= 2U && dst[n - 2] == '\\' && /*%< Ends in "\." */
+ (n < 3U || dst[n - 3] != '\\')) /*%< But not "\\." */
+ break;
+ else
+ dst[--n] = '\0';
+ dst[n++] = '.';
+ dst[n] = '\0';
+ return (0);
+}
+
+/*%
+ * determine whether domain name "a" is the same as domain name "b"
+ *
+ * return:
+ *\li -1 on error
+ *\li 0 if names differ
+ *\li 1 if names are the same
+ */
+
+int
+ns_samename(const char *a, const char *b) {
+ char ta[NS_MAXDNAME], tb[NS_MAXDNAME];
+
+ if (ns_makecanon(a, ta, sizeof ta) < 0 ||
+ ns_makecanon(b, tb, sizeof tb) < 0)
+ return (-1);
+ if (strcasecmp(ta, tb) == 0)
+ return (1);
+ else
+ return (0);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/nameser/ns_sign.c b/usr/src/lib/libresolv2_joy/common/nameser/ns_sign.c
new file mode 100644
index 0000000000..fc7dd19f7b
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/nameser/ns_sign.c
@@ -0,0 +1,387 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1999 by Internet Software Consortium, Inc.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef lint
+static const char rcsid[] = "$Id: ns_sign.c,v 1.6 2006/03/09 23:57:56 marka Exp $";
+#endif
+
+/* Import. */
+
+#include "port_before.h"
+#include "fd_setsize.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <arpa/inet.h>
+
+#include <errno.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <isc/dst.h>
+#include <isc/assertions.h>
+
+#include "port_after.h"
+
+#define BOUNDS_CHECK(ptr, count) \
+ do { \
+ if ((ptr) + (count) > eob) { \
+ errno = EMSGSIZE; \
+ return(NS_TSIG_ERROR_NO_SPACE); \
+ } \
+ } while (0)
+
+/*%
+ * ns_sign
+ *
+ * Parameters:
+ *\li msg message to be sent
+ *\li msglen input - length of message
+ * output - length of signed message
+ *\li msgsize length of buffer containing message
+ *\li error value to put in the error field
+ *\li key tsig key used for signing
+ *\li querysig (response), the signature in the query
+ *\li querysiglen (response), the length of the signature in the query
+ *\li sig a buffer to hold the generated signature
+ *\li siglen input - length of signature buffer
+ * output - length of signature
+ *
+ * Errors:
+ *\li - bad input data (-1)
+ *\li - bad key / sign failed (-BADKEY)
+ *\li - not enough space (NS_TSIG_ERROR_NO_SPACE)
+ */
+int
+ns_sign(u_char *msg, int *msglen, int msgsize, int error, void *k,
+ const u_char *querysig, int querysiglen, u_char *sig, int *siglen,
+ time_t in_timesigned)
+{
+ return(ns_sign2(msg, msglen, msgsize, error, k,
+ querysig, querysiglen, sig, siglen,
+ in_timesigned, NULL, NULL));
+}
+
+int
+ns_sign2(u_char *msg, int *msglen, int msgsize, int error, void *k,
+ const u_char *querysig, int querysiglen, u_char *sig, int *siglen,
+ time_t in_timesigned, u_char **dnptrs, u_char **lastdnptr)
+{
+ HEADER *hp = (HEADER *)msg;
+ DST_KEY *key = (DST_KEY *)k;
+ u_char *cp, *eob;
+ u_char *lenp;
+ u_char *alg;
+ int n;
+ time_t timesigned;
+ u_char name[NS_MAXCDNAME];
+
+ dst_init();
+ if (msg == NULL || msglen == NULL || sig == NULL || siglen == NULL)
+ return (-1);
+
+ cp = msg + *msglen;
+ eob = msg + msgsize;
+
+ /* Name. */
+ if (key != NULL && error != ns_r_badsig && error != ns_r_badkey) {
+ n = ns_name_pton(key->dk_key_name, name, sizeof name);
+ if (n != -1)
+ n = ns_name_pack(name, cp, eob - cp,
+ (const u_char **)dnptrs,
+ (const u_char **)lastdnptr);
+
+ } else {
+ n = ns_name_pton("", name, sizeof name);
+ if (n != -1)
+ n = ns_name_pack(name, cp, eob - cp, NULL, NULL);
+ }
+ if (n < 0)
+ return (NS_TSIG_ERROR_NO_SPACE);
+ cp += n;
+
+ /* Type, class, ttl, length (not filled in yet). */
+ BOUNDS_CHECK(cp, INT16SZ + INT16SZ + INT32SZ + INT16SZ);
+ PUTSHORT(ns_t_tsig, cp);
+ PUTSHORT(ns_c_any, cp);
+ PUTLONG(0, cp); /*%< TTL */
+ lenp = cp;
+ cp += 2;
+
+ /* Alg. */
+ if (key != NULL && error != ns_r_badsig && error != ns_r_badkey) {
+ if (key->dk_alg != KEY_HMAC_MD5)
+ return (-ns_r_badkey);
+ n = dn_comp(NS_TSIG_ALG_HMAC_MD5, cp, eob - cp, NULL, NULL);
+ }
+ else
+ n = dn_comp("", cp, eob - cp, NULL, NULL);
+ if (n < 0)
+ return (NS_TSIG_ERROR_NO_SPACE);
+ alg = cp;
+ cp += n;
+
+ /* Time. */
+ BOUNDS_CHECK(cp, INT16SZ + INT32SZ + INT16SZ);
+ PUTSHORT(0, cp);
+ timesigned = time(NULL);
+ if (error != ns_r_badtime)
+ PUTLONG(timesigned, cp);
+ else
+ PUTLONG(in_timesigned, cp);
+ PUTSHORT(NS_TSIG_FUDGE, cp);
+
+ /* Compute the signature. */
+ if (key != NULL && error != ns_r_badsig && error != ns_r_badkey) {
+ void *ctx;
+ u_char buf[NS_MAXCDNAME], *cp2;
+ int n;
+
+ dst_sign_data(SIG_MODE_INIT, key, &ctx, NULL, 0, NULL, 0);
+
+ /* Digest the query signature, if this is a response. */
+ if (querysiglen > 0 && querysig != NULL) {
+ u_int16_t len_n = htons(querysiglen);
+ dst_sign_data(SIG_MODE_UPDATE, key, &ctx,
+ (u_char *)&len_n, INT16SZ, NULL, 0);
+ dst_sign_data(SIG_MODE_UPDATE, key, &ctx,
+ querysig, querysiglen, NULL, 0);
+ }
+
+ /* Digest the message. */
+ dst_sign_data(SIG_MODE_UPDATE, key, &ctx, msg, *msglen,
+ NULL, 0);
+
+ /* Digest the key name. */
+ n = ns_name_ntol(name, buf, sizeof(buf));
+ INSIST(n > 0);
+ dst_sign_data(SIG_MODE_UPDATE, key, &ctx, buf, n, NULL, 0);
+
+ /* Digest the class and TTL. */
+ cp2 = buf;
+ PUTSHORT(ns_c_any, cp2);
+ PUTLONG(0, cp2);
+ dst_sign_data(SIG_MODE_UPDATE, key, &ctx, buf, cp2-buf,
+ NULL, 0);
+
+ /* Digest the algorithm. */
+ n = ns_name_ntol(alg, buf, sizeof(buf));
+ INSIST(n > 0);
+ dst_sign_data(SIG_MODE_UPDATE, key, &ctx, buf, n, NULL, 0);
+
+ /* Digest the time signed, fudge, error, and other data */
+ cp2 = buf;
+ PUTSHORT(0, cp2); /*%< Top 16 bits of time */
+ if (error != ns_r_badtime)
+ PUTLONG(timesigned, cp2);
+ else
+ PUTLONG(in_timesigned, cp2);
+ PUTSHORT(NS_TSIG_FUDGE, cp2);
+ PUTSHORT(error, cp2); /*%< Error */
+ if (error != ns_r_badtime)
+ PUTSHORT(0, cp2); /*%< Other data length */
+ else {
+ PUTSHORT(INT16SZ+INT32SZ, cp2); /*%< Other data length */
+ PUTSHORT(0, cp2); /*%< Top 16 bits of time */
+ PUTLONG(timesigned, cp2);
+ }
+ dst_sign_data(SIG_MODE_UPDATE, key, &ctx, buf, cp2-buf,
+ NULL, 0);
+
+ n = dst_sign_data(SIG_MODE_FINAL, key, &ctx, NULL, 0,
+ sig, *siglen);
+ if (n < 0)
+ return (-ns_r_badkey);
+ *siglen = n;
+ } else
+ *siglen = 0;
+
+ /* Add the signature. */
+ BOUNDS_CHECK(cp, INT16SZ + (*siglen));
+ PUTSHORT(*siglen, cp);
+ memcpy(cp, sig, *siglen);
+ cp += (*siglen);
+
+ /* The original message ID & error. */
+ BOUNDS_CHECK(cp, INT16SZ + INT16SZ);
+ PUTSHORT(ntohs(hp->id), cp); /*%< already in network order */
+ PUTSHORT(error, cp);
+
+ /* Other data. */
+ BOUNDS_CHECK(cp, INT16SZ);
+ if (error != ns_r_badtime)
+ PUTSHORT(0, cp); /*%< Other data length */
+ else {
+ PUTSHORT(INT16SZ+INT32SZ, cp); /*%< Other data length */
+ BOUNDS_CHECK(cp, INT32SZ+INT16SZ);
+ PUTSHORT(0, cp); /*%< Top 16 bits of time */
+ PUTLONG(timesigned, cp);
+ }
+
+ /* Go back and fill in the length. */
+ PUTSHORT(cp - lenp - INT16SZ, lenp);
+
+ hp->arcount = htons(ntohs(hp->arcount) + 1);
+ *msglen = (cp - msg);
+ return (0);
+}
+
+int
+ns_sign_tcp_init(void *k, const u_char *querysig, int querysiglen,
+ ns_tcp_tsig_state *state)
+{
+ dst_init();
+ if (state == NULL || k == NULL || querysig == NULL || querysiglen < 0)
+ return (-1);
+ state->counter = -1;
+ state->key = k;
+ if (state->key->dk_alg != KEY_HMAC_MD5)
+ return (-ns_r_badkey);
+ if (querysiglen > (int)sizeof(state->sig))
+ return (-1);
+ memcpy(state->sig, querysig, querysiglen);
+ state->siglen = querysiglen;
+ return (0);
+}
+
+int
+ns_sign_tcp(u_char *msg, int *msglen, int msgsize, int error,
+ ns_tcp_tsig_state *state, int done)
+{
+ return (ns_sign_tcp2(msg, msglen, msgsize, error, state,
+ done, NULL, NULL));
+}
+
+int
+ns_sign_tcp2(u_char *msg, int *msglen, int msgsize, int error,
+ ns_tcp_tsig_state *state, int done,
+ u_char **dnptrs, u_char **lastdnptr)
+{
+ u_char *cp, *eob, *lenp;
+ u_char buf[MAXDNAME], *cp2;
+ HEADER *hp = (HEADER *)msg;
+ time_t timesigned;
+ int n;
+
+ if (msg == NULL || msglen == NULL || state == NULL)
+ return (-1);
+
+ state->counter++;
+ if (state->counter == 0)
+ return (ns_sign2(msg, msglen, msgsize, error, state->key,
+ state->sig, state->siglen,
+ state->sig, &state->siglen, 0,
+ dnptrs, lastdnptr));
+
+ if (state->siglen > 0) {
+ u_int16_t siglen_n = htons(state->siglen);
+ dst_sign_data(SIG_MODE_INIT, state->key, &state->ctx,
+ NULL, 0, NULL, 0);
+ dst_sign_data(SIG_MODE_UPDATE, state->key, &state->ctx,
+ (u_char *)&siglen_n, INT16SZ, NULL, 0);
+ dst_sign_data(SIG_MODE_UPDATE, state->key, &state->ctx,
+ state->sig, state->siglen, NULL, 0);
+ state->siglen = 0;
+ }
+
+ dst_sign_data(SIG_MODE_UPDATE, state->key, &state->ctx, msg, *msglen,
+ NULL, 0);
+
+ if (done == 0 && (state->counter % 100 != 0))
+ return (0);
+
+ cp = msg + *msglen;
+ eob = msg + msgsize;
+
+ /* Name. */
+ n = dn_comp(state->key->dk_key_name, cp, eob - cp, dnptrs, lastdnptr);
+ if (n < 0)
+ return (NS_TSIG_ERROR_NO_SPACE);
+ cp += n;
+
+ /* Type, class, ttl, length (not filled in yet). */
+ BOUNDS_CHECK(cp, INT16SZ + INT16SZ + INT32SZ + INT16SZ);
+ PUTSHORT(ns_t_tsig, cp);
+ PUTSHORT(ns_c_any, cp);
+ PUTLONG(0, cp); /*%< TTL */
+ lenp = cp;
+ cp += 2;
+
+ /* Alg. */
+ n = dn_comp(NS_TSIG_ALG_HMAC_MD5, cp, eob - cp, NULL, NULL);
+ if (n < 0)
+ return (NS_TSIG_ERROR_NO_SPACE);
+ cp += n;
+
+ /* Time. */
+ BOUNDS_CHECK(cp, INT16SZ + INT32SZ + INT16SZ);
+ PUTSHORT(0, cp);
+ timesigned = time(NULL);
+ PUTLONG(timesigned, cp);
+ PUTSHORT(NS_TSIG_FUDGE, cp);
+
+ /*
+ * Compute the signature.
+ */
+
+ /* Digest the time signed and fudge. */
+ cp2 = buf;
+ PUTSHORT(0, cp2); /*%< Top 16 bits of time */
+ PUTLONG(timesigned, cp2);
+ PUTSHORT(NS_TSIG_FUDGE, cp2);
+
+ dst_sign_data(SIG_MODE_UPDATE, state->key, &state->ctx,
+ buf, cp2 - buf, NULL, 0);
+
+ n = dst_sign_data(SIG_MODE_FINAL, state->key, &state->ctx, NULL, 0,
+ state->sig, sizeof(state->sig));
+ if (n < 0)
+ return (-ns_r_badkey);
+ state->siglen = n;
+
+ /* Add the signature. */
+ BOUNDS_CHECK(cp, INT16SZ + state->siglen);
+ PUTSHORT(state->siglen, cp);
+ memcpy(cp, state->sig, state->siglen);
+ cp += state->siglen;
+
+ /* The original message ID & error. */
+ BOUNDS_CHECK(cp, INT16SZ + INT16SZ);
+ PUTSHORT(ntohs(hp->id), cp); /*%< already in network order */
+ PUTSHORT(error, cp);
+
+ /* Other data. */
+ BOUNDS_CHECK(cp, INT16SZ);
+ PUTSHORT(0, cp);
+
+ /* Go back and fill in the length. */
+ PUTSHORT(cp - lenp - INT16SZ, lenp);
+
+ hp->arcount = htons(ntohs(hp->arcount) + 1);
+ *msglen = (cp - msg);
+ return (0);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/nameser/ns_ttl.c b/usr/src/lib/libresolv2_joy/common/nameser/ns_ttl.c
new file mode 100644
index 0000000000..69c2f83f57
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/nameser/ns_ttl.c
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef lint
+static const char rcsid[] = "$Id: ns_ttl.c,v 1.4 2005/07/28 06:51:49 marka Exp $";
+#endif
+
+/* Import. */
+
+#include "port_before.h"
+
+#include <arpa/nameser.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "port_after.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) ((size_t)sprintf x)
+#endif
+
+/* Forward. */
+
+static int fmt1(int t, char s, char **buf, size_t *buflen);
+
+/* Macros. */
+
+#define T(x) if ((x) < 0) return (-1); else (void)NULL
+
+/* Public. */
+
+int
+ns_format_ttl(u_long src, char *dst, size_t dstlen) {
+ char *odst = dst;
+ int secs, mins, hours, days, weeks, x;
+ char *p;
+
+ secs = src % 60; src /= 60;
+ mins = src % 60; src /= 60;
+ hours = src % 24; src /= 24;
+ days = src % 7; src /= 7;
+ weeks = src; src = 0;
+
+ x = 0;
+ if (weeks) {
+ T(fmt1(weeks, 'W', &dst, &dstlen));
+ x++;
+ }
+ if (days) {
+ T(fmt1(days, 'D', &dst, &dstlen));
+ x++;
+ }
+ if (hours) {
+ T(fmt1(hours, 'H', &dst, &dstlen));
+ x++;
+ }
+ if (mins) {
+ T(fmt1(mins, 'M', &dst, &dstlen));
+ x++;
+ }
+ if (secs || !(weeks || days || hours || mins)) {
+ T(fmt1(secs, 'S', &dst, &dstlen));
+ x++;
+ }
+
+ if (x > 1) {
+ int ch;
+
+ for (p = odst; (ch = *p) != '\0'; p++)
+ if (isascii(ch) && isupper(ch))
+ *p = tolower(ch);
+ }
+
+ return (dst - odst);
+}
+
+int
+ns_parse_ttl(const char *src, u_long *dst) {
+ u_long ttl, tmp;
+ int ch, digits, dirty;
+
+ ttl = 0;
+ tmp = 0;
+ digits = 0;
+ dirty = 0;
+ while ((ch = *src++) != '\0') {
+ if (!isascii(ch) || !isprint(ch))
+ goto einval;
+ if (isdigit(ch)) {
+ tmp *= 10;
+ tmp += (ch - '0');
+ digits++;
+ continue;
+ }
+ if (digits == 0)
+ goto einval;
+ if (islower(ch))
+ ch = toupper(ch);
+ switch (ch) {
+ case 'W': tmp *= 7;
+ case 'D': tmp *= 24;
+ case 'H': tmp *= 60;
+ case 'M': tmp *= 60;
+ case 'S': break;
+ default: goto einval;
+ }
+ ttl += tmp;
+ tmp = 0;
+ digits = 0;
+ dirty = 1;
+ }
+ if (digits > 0) {
+ if (dirty)
+ goto einval;
+ else
+ ttl += tmp;
+ } else if (!dirty)
+ goto einval;
+ *dst = ttl;
+ return (0);
+
+ einval:
+ errno = EINVAL;
+ return (-1);
+}
+
+/* Private. */
+
+static int
+fmt1(int t, char s, char **buf, size_t *buflen) {
+ char tmp[50];
+ size_t len;
+
+ len = SPRINTF((tmp, "%d%c", t, s));
+ if (len + 1 > *buflen)
+ return (-1);
+ strcpy(*buf, tmp);
+ *buf += len;
+ *buflen -= len;
+ return (0);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/nameser/ns_verify.c b/usr/src/lib/libresolv2_joy/common/nameser/ns_verify.c
new file mode 100644
index 0000000000..b4b63d4641
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/nameser/ns_verify.c
@@ -0,0 +1,484 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1999 by Internet Software Consortium, Inc.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef lint
+static const char rcsid[] = "$Id: ns_verify.c,v 1.5 2006/03/09 23:57:56 marka Exp $";
+#endif
+
+/* Import. */
+
+#include "port_before.h"
+#include "fd_setsize.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <arpa/inet.h>
+
+#include <errno.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <isc/dst.h>
+
+#include "port_after.h"
+
+/* Private. */
+
+#define BOUNDS_CHECK(ptr, count) \
+ do { \
+ if ((ptr) + (count) > eom) { \
+ return (NS_TSIG_ERROR_FORMERR); \
+ } \
+ } while (0)
+
+/* Public. */
+
+u_char *
+ns_find_tsig(u_char *msg, u_char *eom) {
+ HEADER *hp = (HEADER *)msg;
+ int n, type;
+ u_char *cp = msg, *start;
+
+ if (msg == NULL || eom == NULL || msg > eom)
+ return (NULL);
+
+ if (cp + HFIXEDSZ >= eom)
+ return (NULL);
+
+ if (hp->arcount == 0)
+ return (NULL);
+
+ cp += HFIXEDSZ;
+
+ n = ns_skiprr(cp, eom, ns_s_qd, ntohs(hp->qdcount));
+ if (n < 0)
+ return (NULL);
+ cp += n;
+
+ n = ns_skiprr(cp, eom, ns_s_an, ntohs(hp->ancount));
+ if (n < 0)
+ return (NULL);
+ cp += n;
+
+ n = ns_skiprr(cp, eom, ns_s_ns, ntohs(hp->nscount));
+ if (n < 0)
+ return (NULL);
+ cp += n;
+
+ n = ns_skiprr(cp, eom, ns_s_ar, ntohs(hp->arcount) - 1);
+ if (n < 0)
+ return (NULL);
+ cp += n;
+
+ start = cp;
+ n = dn_skipname(cp, eom);
+ if (n < 0)
+ return (NULL);
+ cp += n;
+ if (cp + INT16SZ >= eom)
+ return (NULL);
+
+ GETSHORT(type, cp);
+ if (type != ns_t_tsig)
+ return (NULL);
+ return (start);
+}
+
+/* ns_verify
+ *
+ * Parameters:
+ *\li statp res stuff
+ *\li msg received message
+ *\li msglen length of message
+ *\li key tsig key used for verifying.
+ *\li querysig (response), the signature in the query
+ *\li querysiglen (response), the length of the signature in the query
+ *\li sig (query), a buffer to hold the signature
+ *\li siglen (query), input - length of signature buffer
+ * output - length of signature
+ *
+ * Errors:
+ *\li - bad input (-1)
+ *\li - invalid dns message (NS_TSIG_ERROR_FORMERR)
+ *\li - TSIG is not present (NS_TSIG_ERROR_NO_TSIG)
+ *\li - key doesn't match (-ns_r_badkey)
+ *\li - TSIG verification fails with BADKEY (-ns_r_badkey)
+ *\li - TSIG verification fails with BADSIG (-ns_r_badsig)
+ *\li - TSIG verification fails with BADTIME (-ns_r_badtime)
+ *\li - TSIG verification succeeds, error set to BAKEY (ns_r_badkey)
+ *\li - TSIG verification succeeds, error set to BADSIG (ns_r_badsig)
+ *\li - TSIG verification succeeds, error set to BADTIME (ns_r_badtime)
+ */
+int
+ns_verify(u_char *msg, int *msglen, void *k,
+ const u_char *querysig, int querysiglen, u_char *sig, int *siglen,
+ time_t *timesigned, int nostrip)
+{
+ HEADER *hp = (HEADER *)msg;
+ DST_KEY *key = (DST_KEY *)k;
+ u_char *cp = msg, *eom;
+ char name[MAXDNAME], alg[MAXDNAME];
+ u_char *recstart, *rdatastart;
+ u_char *sigstart, *otherstart;
+ int n;
+ int error;
+ u_int16_t type, length;
+ u_int16_t fudge, sigfieldlen, otherfieldlen;
+
+ dst_init();
+ if (msg == NULL || msglen == NULL || *msglen < 0)
+ return (-1);
+
+ eom = msg + *msglen;
+
+ recstart = ns_find_tsig(msg, eom);
+ if (recstart == NULL)
+ return (NS_TSIG_ERROR_NO_TSIG);
+
+ cp = recstart;
+
+ /* Read the key name. */
+ n = dn_expand(msg, eom, cp, name, MAXDNAME);
+ if (n < 0)
+ return (NS_TSIG_ERROR_FORMERR);
+ cp += n;
+
+ /* Read the type. */
+ BOUNDS_CHECK(cp, 2*INT16SZ + INT32SZ + INT16SZ);
+ GETSHORT(type, cp);
+ if (type != ns_t_tsig)
+ return (NS_TSIG_ERROR_NO_TSIG);
+
+ /* Skip the class and TTL, save the length. */
+ cp += INT16SZ + INT32SZ;
+ GETSHORT(length, cp);
+ if (eom - cp != length)
+ return (NS_TSIG_ERROR_FORMERR);
+
+ /* Read the algorithm name. */
+ rdatastart = cp;
+ n = dn_expand(msg, eom, cp, alg, MAXDNAME);
+ if (n < 0)
+ return (NS_TSIG_ERROR_FORMERR);
+ if (ns_samename(alg, NS_TSIG_ALG_HMAC_MD5) != 1)
+ return (-ns_r_badkey);
+ cp += n;
+
+ /* Read the time signed and fudge. */
+ BOUNDS_CHECK(cp, INT16SZ + INT32SZ + INT16SZ);
+ cp += INT16SZ;
+ GETLONG((*timesigned), cp);
+ GETSHORT(fudge, cp);
+
+ /* Read the signature. */
+ BOUNDS_CHECK(cp, INT16SZ);
+ GETSHORT(sigfieldlen, cp);
+ BOUNDS_CHECK(cp, sigfieldlen);
+ sigstart = cp;
+ cp += sigfieldlen;
+
+ /* Skip id and read error. */
+ BOUNDS_CHECK(cp, 2*INT16SZ);
+ cp += INT16SZ;
+ GETSHORT(error, cp);
+
+ /* Parse the other data. */
+ BOUNDS_CHECK(cp, INT16SZ);
+ GETSHORT(otherfieldlen, cp);
+ BOUNDS_CHECK(cp, otherfieldlen);
+ otherstart = cp;
+ cp += otherfieldlen;
+
+ if (cp != eom)
+ return (NS_TSIG_ERROR_FORMERR);
+
+ /* Verify that the key used is OK. */
+ if (key != NULL) {
+ if (key->dk_alg != KEY_HMAC_MD5)
+ return (-ns_r_badkey);
+ if (error != ns_r_badsig && error != ns_r_badkey) {
+ if (ns_samename(key->dk_key_name, name) != 1)
+ return (-ns_r_badkey);
+ }
+ }
+
+ hp->arcount = htons(ntohs(hp->arcount) - 1);
+
+ /*
+ * Do the verification.
+ */
+
+ if (key != NULL && error != ns_r_badsig && error != ns_r_badkey) {
+ void *ctx;
+ u_char buf[MAXDNAME];
+ u_char buf2[MAXDNAME];
+
+ /* Digest the query signature, if this is a response. */
+ dst_verify_data(SIG_MODE_INIT, key, &ctx, NULL, 0, NULL, 0);
+ if (querysiglen > 0 && querysig != NULL) {
+ u_int16_t len_n = htons(querysiglen);
+ dst_verify_data(SIG_MODE_UPDATE, key, &ctx,
+ (u_char *)&len_n, INT16SZ, NULL, 0);
+ dst_verify_data(SIG_MODE_UPDATE, key, &ctx,
+ querysig, querysiglen, NULL, 0);
+ }
+
+ /* Digest the message. */
+ dst_verify_data(SIG_MODE_UPDATE, key, &ctx, msg, recstart - msg,
+ NULL, 0);
+
+ /* Digest the key name. */
+ n = ns_name_pton(name, buf2, sizeof(buf2));
+ if (n < 0)
+ return (-1);
+ n = ns_name_ntol(buf2, buf, sizeof(buf));
+ if (n < 0)
+ return (-1);
+ dst_verify_data(SIG_MODE_UPDATE, key, &ctx, buf, n, NULL, 0);
+
+ /* Digest the class and TTL. */
+ dst_verify_data(SIG_MODE_UPDATE, key, &ctx,
+ recstart + dn_skipname(recstart, eom) + INT16SZ,
+ INT16SZ + INT32SZ, NULL, 0);
+
+ /* Digest the algorithm. */
+ n = ns_name_pton(alg, buf2, sizeof(buf2));
+ if (n < 0)
+ return (-1);
+ n = ns_name_ntol(buf2, buf, sizeof(buf));
+ if (n < 0)
+ return (-1);
+ dst_verify_data(SIG_MODE_UPDATE, key, &ctx, buf, n, NULL, 0);
+
+ /* Digest the time signed and fudge. */
+ dst_verify_data(SIG_MODE_UPDATE, key, &ctx,
+ rdatastart + dn_skipname(rdatastart, eom),
+ INT16SZ + INT32SZ + INT16SZ, NULL, 0);
+
+ /* Digest the error and other data. */
+ dst_verify_data(SIG_MODE_UPDATE, key, &ctx,
+ otherstart - INT16SZ - INT16SZ,
+ otherfieldlen + INT16SZ + INT16SZ, NULL, 0);
+
+ n = dst_verify_data(SIG_MODE_FINAL, key, &ctx, NULL, 0,
+ sigstart, sigfieldlen);
+
+ if (n < 0)
+ return (-ns_r_badsig);
+
+ if (sig != NULL && siglen != NULL) {
+ if (*siglen < sigfieldlen)
+ return (NS_TSIG_ERROR_NO_SPACE);
+ memcpy(sig, sigstart, sigfieldlen);
+ *siglen = sigfieldlen;
+ }
+ } else {
+ if (sigfieldlen > 0)
+ return (NS_TSIG_ERROR_FORMERR);
+ if (sig != NULL && siglen != NULL)
+ *siglen = 0;
+ }
+
+ /* Reset the counter, since we still need to check for badtime. */
+ hp->arcount = htons(ntohs(hp->arcount) + 1);
+
+ /* Verify the time. */
+ if (abs((*timesigned) - time(NULL)) > fudge)
+ return (-ns_r_badtime);
+
+ if (nostrip == 0) {
+ *msglen = recstart - msg;
+ hp->arcount = htons(ntohs(hp->arcount) - 1);
+ }
+
+ if (error != NOERROR)
+ return (error);
+
+ return (0);
+}
+
+int
+ns_verify_tcp_init(void *k, const u_char *querysig, int querysiglen,
+ ns_tcp_tsig_state *state)
+{
+ dst_init();
+ if (state == NULL || k == NULL || querysig == NULL || querysiglen < 0)
+ return (-1);
+ state->counter = -1;
+ state->key = k;
+ if (state->key->dk_alg != KEY_HMAC_MD5)
+ return (-ns_r_badkey);
+ if (querysiglen > (int)sizeof(state->sig))
+ return (-1);
+ memcpy(state->sig, querysig, querysiglen);
+ state->siglen = querysiglen;
+ return (0);
+}
+
+int
+ns_verify_tcp(u_char *msg, int *msglen, ns_tcp_tsig_state *state,
+ int required)
+{
+ HEADER *hp = (HEADER *)msg;
+ u_char *recstart, *sigstart;
+ unsigned int sigfieldlen, otherfieldlen;
+ u_char *cp, *eom, *cp2;
+ char name[MAXDNAME], alg[MAXDNAME];
+ u_char buf[MAXDNAME];
+ int n, type, length, fudge, error;
+ time_t timesigned;
+
+ if (msg == NULL || msglen == NULL || state == NULL)
+ return (-1);
+
+ eom = msg + *msglen;
+
+ state->counter++;
+ if (state->counter == 0)
+ return (ns_verify(msg, msglen, state->key,
+ state->sig, state->siglen,
+ state->sig, &state->siglen, &timesigned, 0));
+
+ if (state->siglen > 0) {
+ u_int16_t siglen_n = htons(state->siglen);
+
+ dst_verify_data(SIG_MODE_INIT, state->key, &state->ctx,
+ NULL, 0, NULL, 0);
+ dst_verify_data(SIG_MODE_UPDATE, state->key, &state->ctx,
+ (u_char *)&siglen_n, INT16SZ, NULL, 0);
+ dst_verify_data(SIG_MODE_UPDATE, state->key, &state->ctx,
+ state->sig, state->siglen, NULL, 0);
+ state->siglen = 0;
+ }
+
+ cp = recstart = ns_find_tsig(msg, eom);
+
+ if (recstart == NULL) {
+ if (required)
+ return (NS_TSIG_ERROR_NO_TSIG);
+ dst_verify_data(SIG_MODE_UPDATE, state->key, &state->ctx,
+ msg, *msglen, NULL, 0);
+ return (0);
+ }
+
+ hp->arcount = htons(ntohs(hp->arcount) - 1);
+ dst_verify_data(SIG_MODE_UPDATE, state->key, &state->ctx,
+ msg, recstart - msg, NULL, 0);
+
+ /* Read the key name. */
+ n = dn_expand(msg, eom, cp, name, MAXDNAME);
+ if (n < 0)
+ return (NS_TSIG_ERROR_FORMERR);
+ cp += n;
+
+ /* Read the type. */
+ BOUNDS_CHECK(cp, 2*INT16SZ + INT32SZ + INT16SZ);
+ GETSHORT(type, cp);
+ if (type != ns_t_tsig)
+ return (NS_TSIG_ERROR_NO_TSIG);
+
+ /* Skip the class and TTL, save the length. */
+ cp += INT16SZ + INT32SZ;
+ GETSHORT(length, cp);
+ if (eom - cp != length)
+ return (NS_TSIG_ERROR_FORMERR);
+
+ /* Read the algorithm name. */
+ n = dn_expand(msg, eom, cp, alg, MAXDNAME);
+ if (n < 0)
+ return (NS_TSIG_ERROR_FORMERR);
+ if (ns_samename(alg, NS_TSIG_ALG_HMAC_MD5) != 1)
+ return (-ns_r_badkey);
+ cp += n;
+
+ /* Verify that the key used is OK. */
+ if ((ns_samename(state->key->dk_key_name, name) != 1 ||
+ state->key->dk_alg != KEY_HMAC_MD5))
+ return (-ns_r_badkey);
+
+ /* Read the time signed and fudge. */
+ BOUNDS_CHECK(cp, INT16SZ + INT32SZ + INT16SZ);
+ cp += INT16SZ;
+ GETLONG(timesigned, cp);
+ GETSHORT(fudge, cp);
+
+ /* Read the signature. */
+ BOUNDS_CHECK(cp, INT16SZ);
+ GETSHORT(sigfieldlen, cp);
+ BOUNDS_CHECK(cp, sigfieldlen);
+ sigstart = cp;
+ cp += sigfieldlen;
+
+ /* Skip id and read error. */
+ BOUNDS_CHECK(cp, 2*INT16SZ);
+ cp += INT16SZ;
+ GETSHORT(error, cp);
+
+ /* Parse the other data. */
+ BOUNDS_CHECK(cp, INT16SZ);
+ GETSHORT(otherfieldlen, cp);
+ BOUNDS_CHECK(cp, otherfieldlen);
+ cp += otherfieldlen;
+
+ if (cp != eom)
+ return (NS_TSIG_ERROR_FORMERR);
+
+ /*
+ * Do the verification.
+ */
+
+ /* Digest the time signed and fudge. */
+ cp2 = buf;
+ PUTSHORT(0, cp2); /*%< Top 16 bits of time. */
+ PUTLONG(timesigned, cp2);
+ PUTSHORT(NS_TSIG_FUDGE, cp2);
+
+ dst_verify_data(SIG_MODE_UPDATE, state->key, &state->ctx,
+ buf, cp2 - buf, NULL, 0);
+
+ n = dst_verify_data(SIG_MODE_FINAL, state->key, &state->ctx, NULL, 0,
+ sigstart, sigfieldlen);
+ if (n < 0)
+ return (-ns_r_badsig);
+
+ if (sigfieldlen > sizeof(state->sig))
+ return (NS_TSIG_ERROR_NO_SPACE);
+
+ memcpy(state->sig, sigstart, sigfieldlen);
+ state->siglen = sigfieldlen;
+
+ /* Verify the time. */
+ if (abs(timesigned - time(NULL)) > fudge)
+ return (-ns_r_badtime);
+
+ *msglen = recstart - msg;
+
+ if (error != NOERROR)
+ return (error);
+
+ return (0);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/herror.c b/usr/src/lib/libresolv2_joy/common/resolv/herror.c
new file mode 100644
index 0000000000..568ce3ce68
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/herror.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 1987, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)herror.c 8.1 (Berkeley) 6/4/93";
+static const char rcsid[] = "$Id: herror.c,v 1.4 2005/04/27 04:56:41 sra Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/uio.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <string.h>
+#include <unistd.h>
+#include <irs.h>
+
+#include "port_after.h"
+
+const char *h_errlist[] = {
+ "Resolver Error 0 (no error)",
+ "Unknown host", /*%< 1 HOST_NOT_FOUND */
+ "Host name lookup failure", /*%< 2 TRY_AGAIN */
+ "Unknown server error", /*%< 3 NO_RECOVERY */
+ "No address associated with name", /*%< 4 NO_ADDRESS */
+};
+int h_nerr = { sizeof h_errlist / sizeof h_errlist[0] };
+
+#if !(__GLIBC__ > 2 || __GLIBC__ == 2 && __GLIBC_MINOR__ >= 3)
+#undef h_errno
+int h_errno;
+#endif
+
+/*%
+ * herror --
+ * print the error indicated by the h_errno value.
+ */
+void
+herror(const char *s) {
+ struct iovec iov[4], *v = iov;
+ char *t;
+
+ if (s != NULL && *s != '\0') {
+ DE_CONST(s, t);
+ v->iov_base = t;
+ v->iov_len = strlen(t);
+ v++;
+ DE_CONST(": ", t);
+ v->iov_base = t;
+ v->iov_len = 2;
+ v++;
+ }
+ DE_CONST(hstrerror(*__h_errno()), t);
+ v->iov_base = t;
+ v->iov_len = strlen(v->iov_base);
+ v++;
+ DE_CONST("\n", t);
+ v->iov_base = t;
+ v->iov_len = 1;
+ writev(STDERR_FILENO, iov, (v - iov) + 1);
+}
+
+/*%
+ * hstrerror --
+ * return the string associated with a given "host" errno value.
+ */
+const char *
+hstrerror(int err) {
+ if (err < 0)
+ return ("Resolver internal error");
+ else if (err < h_nerr)
+ return (h_errlist[err]);
+ return ("Unknown resolver error");
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/mtctxres.c b/usr/src/lib/libresolv2_joy/common/resolv/mtctxres.c
new file mode 100644
index 0000000000..2e79b1e7b4
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/mtctxres.c
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#include <port_before.h>
+#ifdef DO_PTHREADS
+#include <pthread.h>
+#endif
+#include <errno.h>
+#include <netdb.h>
+#include <stdlib.h>
+#include <string.h>
+#include <resolv_mt.h>
+#include <irs.h>
+#include <port_after.h>
+
+#ifdef DO_PTHREADS
+static pthread_key_t key;
+static int mt_key_initialized = 0;
+
+static int __res_init_ctx(void);
+static void __res_destroy_ctx(void *);
+
+#if defined(sun) && !defined(__GNUC__)
+#pragma init (_mtctxres_init)
+#endif
+#endif
+
+static mtctxres_t sharedctx;
+
+#ifdef DO_PTHREADS
+/*
+ * Initialize the TSD key. By doing this at library load time, we're
+ * implicitly running without interference from other threads, so there's
+ * no need for locking.
+ */
+static void
+_mtctxres_init(void) {
+ int pthread_keycreate_ret;
+
+ pthread_keycreate_ret = pthread_key_create(&key, __res_destroy_ctx);
+ if (pthread_keycreate_ret == 0)
+ mt_key_initialized = 1;
+}
+#endif
+
+/*
+ * To support binaries that used the private MT-safe interface in
+ * Solaris 8, we still need to provide the __res_enable_mt()
+ * and __res_disable_mt() entry points. They're do-nothing routines.
+ */
+int
+__res_enable_mt(void) {
+ return (-1);
+}
+
+int
+__res_disable_mt(void) {
+ return (0);
+}
+
+#ifdef DO_PTHREADS
+static int
+__res_init_ctx(void) {
+
+ mtctxres_t *mt;
+ int ret;
+
+
+ if (pthread_getspecific(key) != 0) {
+ /* Already exists */
+ return (0);
+ }
+
+ if ((mt = malloc(sizeof (mtctxres_t))) == 0) {
+ errno = ENOMEM;
+ return (-1);
+ }
+
+ memset(mt, 0, sizeof (mtctxres_t));
+
+ if ((ret = pthread_setspecific(key, mt)) != 0) {
+ free(mt);
+ errno = ret;
+ return (-1);
+ }
+
+ return (0);
+}
+
+static void
+__res_destroy_ctx(void *value) {
+
+ mtctxres_t *mt = (mtctxres_t *)value;
+
+ if (mt != 0)
+ free(mt);
+}
+#endif
+
+mtctxres_t *
+___mtctxres(void) {
+#ifdef DO_PTHREADS
+ mtctxres_t *mt;
+
+ /*
+ * This if clause should only be executed if we are linking
+ * statically. When linked dynamically _mtctxres_init() should
+ * be called at binding time due the #pragma above.
+ */
+ if (!mt_key_initialized) {
+ static pthread_mutex_t keylock = PTHREAD_MUTEX_INITIALIZER;
+ if (pthread_mutex_lock(&keylock) == 0) {
+ _mtctxres_init();
+ (void) pthread_mutex_unlock(&keylock);
+ }
+ }
+
+ /*
+ * If we have already been called in this thread return the existing
+ * context. Otherwise recreat a new context and return it. If
+ * that fails return a global context.
+ */
+ if (mt_key_initialized) {
+ if (((mt = pthread_getspecific(key)) != 0) ||
+ (__res_init_ctx() == 0 &&
+ (mt = pthread_getspecific(key)) != 0)) {
+ return (mt);
+ }
+ }
+#endif
+ return (&sharedctx);
+}
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/res_comp.c b/usr/src/lib/libresolv2_joy/common/resolv/res_comp.c
new file mode 100644
index 0000000000..c82bf01eb8
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/res_comp.c
@@ -0,0 +1,287 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/*
+ * Copyright (c) 1985, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Portions Copyright (c) 1993 by Digital Equipment Corporation.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies, and that
+ * the name of Digital Equipment Corporation not be used in advertising or
+ * publicity pertaining to distribution of the document or software without
+ * specific, written prior permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
+ * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)res_comp.c 8.1 (Berkeley) 6/4/93";
+static const char rcsid[] = "$Id: res_comp.c,v 1.5 2005/07/28 06:51:50 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include "port_before.h"
+#include <sys/types.h>
+#include <sys/param.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <ctype.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include "port_after.h"
+
+#ifndef ORIGINAL_ISC_CODE
+#pragma weak __dn_skipname = dn_skipname
+#pragma weak __res_dnok = res_dnok
+#pragma weak __res_hnok = res_hnok
+#pragma weak __res_mailok = res_mailok
+#pragma weak __res_ownok = res_ownok
+#endif /* ORIGINAL_ISC_CODE */
+
+/*%
+ * Expand compressed domain name 'src' to full domain name.
+ *
+ * \li 'msg' is a pointer to the begining of the message,
+ * \li 'eom' points to the first location after the message,
+ * \li 'dst' is a pointer to a buffer of size 'dstsiz' for the result.
+ * \li Return size of compressed name or -1 if there was an error.
+ */
+int
+dn_expand(const u_char *msg, const u_char *eom, const u_char *src,
+ char *dst, int dstsiz)
+{
+ int n = ns_name_uncompress(msg, eom, src, dst, (size_t)dstsiz);
+
+ if (n > 0 && dst[0] == '.')
+ dst[0] = '\0';
+ return (n);
+}
+
+/*%
+ * Pack domain name 'exp_dn' in presentation form into 'comp_dn'.
+ *
+ * \li Return the size of the compressed name or -1.
+ * \li 'length' is the size of the array pointed to by 'comp_dn'.
+ */
+int
+dn_comp(const char *src, u_char *dst, int dstsiz,
+ u_char **dnptrs, u_char **lastdnptr)
+{
+ return (ns_name_compress(src, dst, (size_t)dstsiz,
+ (const u_char **)dnptrs,
+ (const u_char **)lastdnptr));
+}
+
+
+/*%
+ * Skip over a compressed domain name. Return the size or -1.
+ */
+int
+dn_skipname(const u_char *ptr, const u_char *eom) {
+ const u_char *saveptr = ptr;
+
+ if (ns_name_skip(&ptr, eom) == -1)
+ return (-1);
+ return (ptr - saveptr);
+}
+
+/*%
+ * Verify that a domain name uses an acceptable character set.
+ *
+ * Note the conspicuous absence of ctype macros in these definitions. On
+ * non-ASCII hosts, we can't depend on string literals or ctype macros to
+ * tell us anything about network-format data. The rest of the BIND system
+ * is not careful about this, but for some reason, we're doing it right here.
+ */
+#define PERIOD 0x2e
+#define hyphenchar(c) ((c) == 0x2d)
+#define bslashchar(c) ((c) == 0x5c)
+#ifdef SUNW_HNOK_UNDERSCORE
+#define underscorechar(c) ((c) == 0x5f)
+#endif /* SUNW_HNOK_UNDERSCORE */
+#define periodchar(c) ((c) == PERIOD)
+#define asterchar(c) ((c) == 0x2a)
+#define alphachar(c) (((c) >= 0x41 && (c) <= 0x5a) \
+ || ((c) >= 0x61 && (c) <= 0x7a))
+#define digitchar(c) ((c) >= 0x30 && (c) <= 0x39)
+
+#define borderchar(c) (alphachar(c) || digitchar(c))
+#ifdef SUNW_HNOK_UNDERSCORE
+#define middlechar(c) (borderchar(c) || hyphenchar(c) || underscorechar(c))
+#else
+#define middlechar(c) (borderchar(c) || hyphenchar(c))
+#endif /* SUNW_HNOK_UNDERSCORE */
+#define domainchar(c) ((c) > 0x20 && (c) < 0x7f)
+
+int
+res_hnok(const char *dn) {
+ int pch = PERIOD, ch = *dn++;
+
+ while (ch != '\0') {
+ int nch = *dn++;
+
+ if (periodchar(ch)) {
+ (void)NULL;
+ } else if (periodchar(pch)) {
+ if (!borderchar(ch))
+ return (0);
+ } else if (periodchar(nch) || nch == '\0') {
+ if (!borderchar(ch))
+ return (0);
+ } else {
+ if (!middlechar(ch))
+ return (0);
+ }
+ pch = ch, ch = nch;
+ }
+ return (1);
+}
+
+/*%
+ * hostname-like (A, MX, WKS) owners can have "*" as their first label
+ * but must otherwise be as a host name.
+ */
+int
+res_ownok(const char *dn) {
+ if (asterchar(dn[0])) {
+ if (periodchar(dn[1]))
+ return (res_hnok(dn+2));
+ if (dn[1] == '\0')
+ return (1);
+ }
+ return (res_hnok(dn));
+}
+
+/*%
+ * SOA RNAMEs and RP RNAMEs can have any printable character in their first
+ * label, but the rest of the name has to look like a host name.
+ */
+int
+res_mailok(const char *dn) {
+ int ch, escaped = 0;
+
+ /* "." is a valid missing representation */
+ if (*dn == '\0')
+ return (1);
+
+ /* otherwise <label>.<hostname> */
+ while ((ch = *dn++) != '\0') {
+ if (!domainchar(ch))
+ return (0);
+ if (!escaped && periodchar(ch))
+ break;
+ if (escaped)
+ escaped = 0;
+ else if (bslashchar(ch))
+ escaped = 1;
+ }
+ if (periodchar(ch))
+ return (res_hnok(dn));
+ return (0);
+}
+
+/*%
+ * This function is quite liberal, since RFC1034's character sets are only
+ * recommendations.
+ */
+int
+res_dnok(const char *dn) {
+ int ch;
+
+ while ((ch = *dn++) != '\0')
+ if (!domainchar(ch))
+ return (0);
+ return (1);
+}
+
+#ifdef BIND_4_COMPAT
+/*%
+ * This module must export the following externally-visible symbols:
+ * ___putlong
+ * ___putshort
+ * __getlong
+ * __getshort
+ * Note that one _ comes from C and the others come from us.
+ */
+
+#ifdef SOLARIS2
+#ifdef __putlong
+#undef __putlong
+#endif
+#ifdef __putshort
+#undef __putshort
+#endif
+#pragma weak putlong = __putlong
+#pragma weak putshort = __putshort
+#endif /* SOLARIS2 */
+
+void __putlong(u_int32_t src, u_char *dst) { ns_put32(src, dst); }
+void __putshort(u_int16_t src, u_char *dst) { ns_put16(src, dst); }
+#ifndef __ultrix__
+u_int32_t _getlong(const u_char *src) { return (ns_get32(src)); }
+u_int16_t _getshort(const u_char *src) { return (ns_get16(src)); }
+#endif /*__ultrix__*/
+#endif /*BIND_4_COMPAT*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/res_data.c b/usr/src/lib/libresolv2_joy/common/resolv/res_data.c
new file mode 100644
index 0000000000..f1e0dd030b
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/res_data.c
@@ -0,0 +1,322 @@
+/*
+ * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1995-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "$Id: res_data.c,v 1.7 2008/12/11 09:59:00 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <ctype.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <res_update.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "port_after.h"
+
+#ifndef ORIGINAL_ISC_CODE
+#pragma weak __fp_nquery = fp_nquery
+#pragma weak __fp_query = fp_query
+#pragma weak __p_query = p_query
+#pragma weak __hostalias = hostalias
+#pragma weak __res_randomid = res_randomid
+#endif
+
+const char *_res_opcodes[] = {
+ "QUERY",
+ "IQUERY",
+ "CQUERYM",
+ "CQUERYU", /*%< experimental */
+ "NOTIFY", /*%< experimental */
+ "UPDATE",
+ "6",
+ "7",
+ "8",
+ "9",
+ "10",
+ "11",
+ "12",
+ "13",
+ "ZONEINIT",
+ "ZONEREF",
+};
+
+#ifdef BIND_UPDATE
+const char *_res_sectioncodes[] = {
+ "ZONE",
+ "PREREQUISITES",
+ "UPDATE",
+ "ADDITIONAL",
+};
+#endif
+
+#undef _res
+#ifndef __BIND_NOSTATIC
+struct __res_state _res
+# if defined(__BIND_RES_TEXT)
+ = { RES_TIMEOUT, } /*%< Motorola, et al. */
+# endif
+ ;
+
+#ifdef ORIGINAL_ISC_CODE
+#if defined(DO_PTHREADS) || defined(__linux)
+#define _res (*__res_state())
+#endif
+#endif
+
+/* Proto. */
+
+int res_ourserver_p(const res_state, const struct sockaddr_in *);
+
+int
+res_init(void) {
+ extern int __res_vinit(res_state, int);
+
+ /*
+ * These three fields used to be statically initialized. This made
+ * it hard to use this code in a shared library. It is necessary,
+ * now that we're doing dynamic initialization here, that we preserve
+ * the old semantics: if an application modifies one of these three
+ * fields of _res before res_init() is called, res_init() will not
+ * alter them. Of course, if an application is setting them to
+ * _zero_ before calling res_init(), hoping to override what used
+ * to be the static default, we can't detect it and unexpected results
+ * will follow. Zero for any of these fields would make no sense,
+ * so one can safely assume that the applications were already getting
+ * unexpected results.
+ *
+ * _res.options is tricky since some apps were known to diddle the bits
+ * before res_init() was first called. We can't replicate that semantic
+ * with dynamic initialization (they may have turned bits off that are
+ * set in RES_DEFAULT). Our solution is to declare such applications
+ * "broken". They could fool us by setting RES_INIT but none do (yet).
+ */
+ if (!_res.retrans)
+ _res.retrans = RES_TIMEOUT;
+ if (!_res.retry)
+ _res.retry = 4;
+ if (!(_res.options & RES_INIT))
+ _res.options = RES_DEFAULT;
+
+ /*
+ * This one used to initialize implicitly to zero, so unless the app
+ * has set it to something in particular, we can randomize it now.
+ */
+ if (!_res.id)
+ _res.id = res_nrandomid(&_res);
+
+ return (__res_vinit(&_res, 1));
+}
+
+void
+p_query(const u_char *msg) {
+ fp_query(msg, stdout);
+}
+
+void
+fp_query(const u_char *msg, FILE *file) {
+ fp_nquery(msg, PACKETSZ, file);
+}
+
+void
+fp_nquery(const u_char *msg, int len, FILE *file) {
+ if ((_res.options & RES_INIT) == 0U && res_init() == -1)
+ return;
+
+ res_pquery(&_res, msg, len, file);
+}
+
+int
+res_mkquery(int op, /*!< opcode of query */
+ const char *dname, /*!< domain name */
+ int class, int type, /*!< class and type of query */
+ const u_char *data, /*!< resource record data */
+ int datalen, /*!< length of data */
+ const u_char *newrr_in, /*!< new rr for modify or append */
+ u_char *buf, /*!< buffer to put query */
+ int buflen) /*!< size of buffer */
+{
+ if ((_res.options & RES_INIT) == 0U && res_init() == -1) {
+ RES_SET_H_ERRNO(&_res, NETDB_INTERNAL);
+ return (-1);
+ }
+ return (res_nmkquery(&_res, op, dname, class, type,
+ data, datalen,
+ newrr_in, buf, buflen));
+}
+
+int
+res_mkupdate(ns_updrec *rrecp_in, u_char *buf, int buflen) {
+ if ((_res.options & RES_INIT) == 0U && res_init() == -1) {
+ RES_SET_H_ERRNO(&_res, NETDB_INTERNAL);
+ return (-1);
+ }
+
+ return (res_nmkupdate(&_res, rrecp_in, buf, buflen));
+}
+
+int
+res_query(const char *name, /*!< domain name */
+ int class, int type, /*!< class and type of query */
+ u_char *answer, /*!< buffer to put answer */
+ int anslen) /*!< size of answer buffer */
+{
+ if ((_res.options & RES_INIT) == 0U && res_init() == -1) {
+ RES_SET_H_ERRNO(&_res, NETDB_INTERNAL);
+ return (-1);
+ }
+ return (res_nquery(&_res, name, class, type, answer, anslen));
+}
+
+void
+res_send_setqhook(res_send_qhook hook) {
+ _res.qhook = hook;
+}
+
+void
+res_send_setrhook(res_send_rhook hook) {
+ _res.rhook = hook;
+}
+
+int
+res_isourserver(const struct sockaddr_in *inp) {
+ return (res_ourserver_p(&_res, inp));
+}
+
+int
+res_send(const u_char *buf, int buflen, u_char *ans, int anssiz) {
+ if ((_res.options & RES_INIT) == 0U && res_init() == -1) {
+ /* errno should have been set by res_init() in this case. */
+ return (-1);
+ }
+
+ return (res_nsend(&_res, buf, buflen, ans, anssiz));
+}
+
+int
+res_sendsigned(const u_char *buf, int buflen, ns_tsig_key *key,
+ u_char *ans, int anssiz)
+{
+ if ((_res.options & RES_INIT) == 0U && res_init() == -1) {
+ /* errno should have been set by res_init() in this case. */
+ return (-1);
+ }
+
+ return (res_nsendsigned(&_res, buf, buflen, key, ans, anssiz));
+}
+
+void
+res_close(void) {
+ res_nclose(&_res);
+}
+
+int
+res_update(ns_updrec *rrecp_in) {
+ if ((_res.options & RES_INIT) == 0U && res_init() == -1) {
+ RES_SET_H_ERRNO(&_res, NETDB_INTERNAL);
+ return (-1);
+ }
+
+ return (res_nupdate(&_res, rrecp_in, NULL));
+}
+
+int
+res_search(const char *name, /*!< domain name */
+ int class, int type, /*!< class and type of query */
+ u_char *answer, /*!< buffer to put answer */
+ int anslen) /*!< size of answer */
+{
+ if ((_res.options & RES_INIT) == 0U && res_init() == -1) {
+ RES_SET_H_ERRNO(&_res, NETDB_INTERNAL);
+ return (-1);
+ }
+
+ return (res_nsearch(&_res, name, class, type, answer, anslen));
+}
+
+int
+res_querydomain(const char *name,
+ const char *domain,
+ int class, int type, /*!< class and type of query */
+ u_char *answer, /*!< buffer to put answer */
+ int anslen) /*!< size of answer */
+{
+ if ((_res.options & RES_INIT) == 0U && res_init() == -1) {
+ RES_SET_H_ERRNO(&_res, NETDB_INTERNAL);
+ return (-1);
+ }
+
+ return (res_nquerydomain(&_res, name, domain,
+ class, type,
+ answer, anslen));
+}
+
+u_int
+res_randomid(void) {
+ if ((_res.options & RES_INIT) == 0U && res_init() == -1) {
+ RES_SET_H_ERRNO(&_res, NETDB_INTERNAL);
+ return (-1);
+ }
+
+ return (res_nrandomid(&_res));
+}
+
+const char *
+hostalias(const char *name) {
+ static char abuf[MAXDNAME];
+
+ return (res_hostalias(&_res, name, abuf, sizeof abuf));
+}
+
+#ifdef ultrix
+int
+local_hostname_length(const char *hostname) {
+ int len_host, len_domain;
+
+ if (!*_res.defdname)
+ res_init();
+ len_host = strlen(hostname);
+ len_domain = strlen(_res.defdname);
+ if (len_host > len_domain &&
+ !strcasecmp(hostname + len_host - len_domain, _res.defdname) &&
+ hostname[len_host - len_domain - 1] == '.')
+ return (len_host - len_domain - 1);
+ return (0);
+}
+#endif /*ultrix*/
+
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/res_debug.c b/usr/src/lib/libresolv2_joy/common/resolv/res_debug.c
new file mode 100644
index 0000000000..e11fb29612
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/res_debug.c
@@ -0,0 +1,1252 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/*
+ * Portions Copyright (C) 2004, 2005, 2008, 2009 Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (C) 1996-2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Copyright (c) 1985
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Portions Copyright (c) 1993 by Digital Equipment Corporation.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies, and that
+ * the name of Digital Equipment Corporation not be used in advertising or
+ * publicity pertaining to distribution of the document or software without
+ * specific, written prior permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
+ * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+/*
+ * Portions Copyright (c) 1995 by International Business Machines, Inc.
+ *
+ * International Business Machines, Inc. (hereinafter called IBM) grants
+ * permission under its copyrights to use, copy, modify, and distribute this
+ * Software with or without fee, provided that the above copyright notice and
+ * all paragraphs of this notice appear in all copies, and that the name of IBM
+ * not be used in connection with the marketing of any product incorporating
+ * the Software or modifications thereof, without specific, written prior
+ * permission.
+ *
+ * To the extent it has a right to do so, IBM grants an immunity from suit
+ * under its patents, if any, for the use, sale or manufacture of products to
+ * the extent that such products are used for performing Domain Name System
+ * dynamic updates in TCP/IP networks by means of the Software. No immunity is
+ * granted for any product per se or for any other function of any product.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", AND IBM DISCLAIMS ALL WARRANTIES,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE. IN NO EVENT SHALL IBM BE LIABLE FOR ANY SPECIAL,
+ * DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE, EVEN
+ * IF IBM IS APPRISED OF THE POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)res_debug.c 8.1 (Berkeley) 6/4/93";
+static const char rcsid[] = "$Id: res_debug.c,v 1.19 2009/02/26 11:20:20 tbox Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <math.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <resolv_mt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "port_after.h"
+
+#ifdef SPRINTF_CHAR
+# define SPRINTF(x) strlen(sprintf/**/x)
+#else
+# define SPRINTF(x) sprintf x
+#endif
+
+extern const char *_res_opcodes[];
+extern const char *_res_sectioncodes[];
+
+#ifndef ORIGINAL_ISC_CODE
+#pragma weak __dn_count_labels = dn_count_labels
+#pragma weak __fp_resstat = fp_resstat
+#pragma weak __loc_aton = loc_aton
+#pragma weak __loc_ntoa = loc_ntoa
+#pragma weak __p_cdname = p_cdname
+#pragma weak __p_class = p_class
+#pragma weak __p_section = p_section
+#pragma weak __p_time = p_time
+#pragma weak __p_type = p_type
+#pragma weak __sym_ntop = sym_ntop
+#pragma weak __sym_ntos = sym_ntos
+#pragma weak __sym_ston = sym_ston
+#endif /* ORIGINAL_ISC_CODE */
+
+/*%
+ * Print the current options.
+ */
+void
+fp_resstat(const res_state statp, FILE *file) {
+ u_long mask;
+
+ fprintf(file, ";; res options:");
+ for (mask = 1; mask != 0U; mask <<= 1)
+ if (statp->options & mask)
+ fprintf(file, " %s", p_option(mask));
+ putc('\n', file);
+}
+
+static void
+do_section(const res_state statp,
+ ns_msg *handle, ns_sect section,
+ int pflag, FILE *file)
+{
+ int n, sflag, rrnum;
+ static int buflen = 2048;
+ char *buf;
+ ns_opcode opcode;
+ ns_rr rr;
+
+ /*
+ * Print answer records.
+ */
+ sflag = (statp->pfcode & pflag);
+ if (statp->pfcode && !sflag)
+ return;
+
+ buf = malloc(buflen);
+ if (buf == NULL) {
+ fprintf(file, ";; memory allocation failure\n");
+ return;
+ }
+
+ opcode = (ns_opcode) ns_msg_getflag(*handle, ns_f_opcode);
+ rrnum = 0;
+ for (;;) {
+ if (ns_parserr(handle, section, rrnum, &rr)) {
+ if (errno != ENODEV)
+ fprintf(file, ";; ns_parserr: %s\n",
+ strerror(errno));
+ else if (rrnum > 0 && sflag != 0 &&
+ (statp->pfcode & RES_PRF_HEAD1))
+ putc('\n', file);
+ goto cleanup;
+ }
+ if (rrnum == 0 && sflag != 0 && (statp->pfcode & RES_PRF_HEAD1))
+ fprintf(file, ";; %s SECTION:\n",
+ p_section(section, opcode));
+ if (section == ns_s_qd)
+ fprintf(file, ";;\t%s, type = %s, class = %s\n",
+ ns_rr_name(rr),
+ p_type(ns_rr_type(rr)),
+ p_class(ns_rr_class(rr)));
+ else if (section == ns_s_ar && ns_rr_type(rr) == ns_t_opt) {
+ u_int16_t optcode, optlen, rdatalen = ns_rr_rdlen(rr);
+ u_int32_t ttl = ns_rr_ttl(rr);
+
+ fprintf(file,
+ "; EDNS: version: %u, udp=%u, flags=%04x\n",
+ (ttl>>16)&0xff, ns_rr_class(rr), ttl&0xffff);
+
+ while (rdatalen >= 4) {
+ const u_char *cp = ns_rr_rdata(rr);
+ int i;
+
+ GETSHORT(optcode, cp);
+ GETSHORT(optlen, cp);
+
+ if (optcode == NS_OPT_NSID) {
+ fputs("; NSID: ", file);
+ if (optlen == 0) {
+ fputs("; NSID\n", file);
+ } else {
+ fputs("; NSID: ", file);
+ for (i = 0; i < optlen; i++)
+ fprintf(file, "%02x ",
+ cp[i]);
+ fputs(" (",file);
+ for (i = 0; i < optlen; i++)
+ fprintf(file, "%c",
+ isprint(cp[i])?
+ cp[i] : '.');
+ fputs(")\n", file);
+ }
+ } else {
+ if (optlen == 0) {
+ fprintf(file, "; OPT=%u\n",
+ optcode);
+ } else {
+ fprintf(file, "; OPT=%u: ",
+ optcode);
+ for (i = 0; i < optlen; i++)
+ fprintf(file, "%02x ",
+ cp[i]);
+ fputs(" (",file);
+ for (i = 0; i < optlen; i++)
+ fprintf(file, "%c",
+ isprint(cp[i]) ?
+ cp[i] : '.');
+ fputs(")\n", file);
+ }
+ }
+ rdatalen -= 4 + optlen;
+ }
+ } else {
+ n = ns_sprintrr(handle, &rr, NULL, NULL,
+ buf, buflen);
+ if (n < 0) {
+ if (errno == ENOSPC) {
+ free(buf);
+ buf = NULL;
+ if (buflen < 131072)
+ buf = malloc(buflen += 1024);
+ if (buf == NULL) {
+ fprintf(file,
+ ";; memory allocation failure\n");
+ return;
+ }
+ continue;
+ }
+ fprintf(file, ";; ns_sprintrr: %s\n",
+ strerror(errno));
+ goto cleanup;
+ }
+ fputs(buf, file);
+ fputc('\n', file);
+ }
+ rrnum++;
+ }
+ cleanup:
+ if (buf != NULL)
+ free(buf);
+}
+
+/*%
+ * Print the contents of a query.
+ * This is intended to be primarily a debugging routine.
+ */
+void
+res_pquery(const res_state statp, const u_char *msg, int len, FILE *file) {
+ ns_msg handle;
+ int qdcount, ancount, nscount, arcount;
+ u_int opcode, rcode, id;
+
+ if (ns_initparse(msg, len, &handle) < 0) {
+ fprintf(file, ";; ns_initparse: %s\n", strerror(errno));
+ return;
+ }
+ opcode = ns_msg_getflag(handle, ns_f_opcode);
+ rcode = ns_msg_getflag(handle, ns_f_rcode);
+ id = ns_msg_id(handle);
+ qdcount = ns_msg_count(handle, ns_s_qd);
+ ancount = ns_msg_count(handle, ns_s_an);
+ nscount = ns_msg_count(handle, ns_s_ns);
+ arcount = ns_msg_count(handle, ns_s_ar);
+
+ /*
+ * Print header fields.
+ */
+ if ((!statp->pfcode) || (statp->pfcode & RES_PRF_HEADX) || rcode)
+ fprintf(file,
+ ";; ->>HEADER<<- opcode: %s, status: %s, id: %d\n",
+ _res_opcodes[opcode], p_rcode(rcode), id);
+ if ((!statp->pfcode) || (statp->pfcode & RES_PRF_HEADX))
+ putc(';', file);
+ if ((!statp->pfcode) || (statp->pfcode & RES_PRF_HEAD2)) {
+ fprintf(file, "; flags:");
+ if (ns_msg_getflag(handle, ns_f_qr))
+ fprintf(file, " qr");
+ if (ns_msg_getflag(handle, ns_f_aa))
+ fprintf(file, " aa");
+ if (ns_msg_getflag(handle, ns_f_tc))
+ fprintf(file, " tc");
+ if (ns_msg_getflag(handle, ns_f_rd))
+ fprintf(file, " rd");
+ if (ns_msg_getflag(handle, ns_f_ra))
+ fprintf(file, " ra");
+ if (ns_msg_getflag(handle, ns_f_z))
+ fprintf(file, " ??");
+ if (ns_msg_getflag(handle, ns_f_ad))
+ fprintf(file, " ad");
+ if (ns_msg_getflag(handle, ns_f_cd))
+ fprintf(file, " cd");
+ }
+ if ((!statp->pfcode) || (statp->pfcode & RES_PRF_HEAD1)) {
+ fprintf(file, "; %s: %d",
+ p_section(ns_s_qd, opcode), qdcount);
+ fprintf(file, ", %s: %d",
+ p_section(ns_s_an, opcode), ancount);
+ fprintf(file, ", %s: %d",
+ p_section(ns_s_ns, opcode), nscount);
+ fprintf(file, ", %s: %d",
+ p_section(ns_s_ar, opcode), arcount);
+ }
+ if ((!statp->pfcode) || (statp->pfcode &
+ (RES_PRF_HEADX | RES_PRF_HEAD2 | RES_PRF_HEAD1))) {
+ putc('\n',file);
+ }
+ /*
+ * Print the various sections.
+ */
+ do_section(statp, &handle, ns_s_qd, RES_PRF_QUES, file);
+ do_section(statp, &handle, ns_s_an, RES_PRF_ANS, file);
+ do_section(statp, &handle, ns_s_ns, RES_PRF_AUTH, file);
+ do_section(statp, &handle, ns_s_ar, RES_PRF_ADD, file);
+ if (qdcount == 0 && ancount == 0 &&
+ nscount == 0 && arcount == 0)
+ putc('\n', file);
+}
+
+const u_char *
+p_cdnname(const u_char *cp, const u_char *msg, int len, FILE *file) {
+ char name[MAXDNAME];
+ int n;
+
+ if ((n = dn_expand(msg, msg + len, cp, name, sizeof name)) < 0)
+ return (NULL);
+ if (name[0] == '\0')
+ putc('.', file);
+ else
+ fputs(name, file);
+ return (cp + n);
+}
+
+const u_char *
+p_cdname(const u_char *cp, const u_char *msg, FILE *file) {
+ return (p_cdnname(cp, msg, PACKETSZ, file));
+}
+
+/*%
+ * Return a fully-qualified domain name from a compressed name (with
+ length supplied). */
+
+const u_char *
+p_fqnname(cp, msg, msglen, name, namelen)
+ const u_char *cp, *msg;
+ int msglen;
+ char *name;
+ int namelen;
+{
+ int n, newlen;
+
+ if ((n = dn_expand(msg, cp + msglen, cp, name, namelen)) < 0)
+ return (NULL);
+ newlen = strlen(name);
+ if (newlen == 0 || name[newlen - 1] != '.') {
+ if (newlen + 1 >= namelen) /*%< Lack space for final dot */
+ return (NULL);
+ else
+ strcpy(name + newlen, ".");
+ }
+ return (cp + n);
+}
+
+/* XXX: the rest of these functions need to become length-limited, too. */
+
+const u_char *
+p_fqname(const u_char *cp, const u_char *msg, FILE *file) {
+ char name[MAXDNAME];
+ const u_char *n;
+
+ n = p_fqnname(cp, msg, MAXCDNAME, name, sizeof name);
+ if (n == NULL)
+ return (NULL);
+ fputs(name, file);
+ return (n);
+}
+
+/*%
+ * Names of RR classes and qclasses. Classes and qclasses are the same, except
+ * that C_ANY is a qclass but not a class. (You can ask for records of class
+ * C_ANY, but you can't have any records of that class in the database.)
+ */
+const struct res_sym __p_class_syms[] = {
+ {C_IN, "IN", (char *)0},
+ {C_CHAOS, "CH", (char *)0},
+ {C_CHAOS, "CHAOS", (char *)0},
+ {C_HS, "HS", (char *)0},
+ {C_HS, "HESIOD", (char *)0},
+ {C_ANY, "ANY", (char *)0},
+ {C_NONE, "NONE", (char *)0},
+ {C_IN, (char *)0, (char *)0}
+};
+
+/*%
+ * Names of message sections.
+ */
+const struct res_sym __p_default_section_syms[] = {
+ {ns_s_qd, "QUERY", (char *)0},
+ {ns_s_an, "ANSWER", (char *)0},
+ {ns_s_ns, "AUTHORITY", (char *)0},
+ {ns_s_ar, "ADDITIONAL", (char *)0},
+ {0, (char *)0, (char *)0}
+};
+
+const struct res_sym __p_update_section_syms[] = {
+ {S_ZONE, "ZONE", (char *)0},
+ {S_PREREQ, "PREREQUISITE", (char *)0},
+ {S_UPDATE, "UPDATE", (char *)0},
+ {S_ADDT, "ADDITIONAL", (char *)0},
+ {0, (char *)0, (char *)0}
+};
+
+const struct res_sym __p_key_syms[] = {
+ {NS_ALG_MD5RSA, "RSA", "RSA KEY with MD5 hash"},
+ {NS_ALG_DH, "DH", "Diffie Hellman"},
+ {NS_ALG_DSA, "DSA", "Digital Signature Algorithm"},
+ {NS_ALG_EXPIRE_ONLY, "EXPIREONLY", "No algorithm"},
+ {NS_ALG_PRIVATE_OID, "PRIVATE", "Algorithm obtained from OID"},
+ {0, NULL, NULL}
+};
+
+const struct res_sym __p_cert_syms[] = {
+ {cert_t_pkix, "PKIX", "PKIX (X.509v3) Certificate"},
+ {cert_t_spki, "SPKI", "SPKI certificate"},
+ {cert_t_pgp, "PGP", "PGP certificate"},
+ {cert_t_url, "URL", "URL Private"},
+ {cert_t_oid, "OID", "OID Private"},
+ {0, NULL, NULL}
+};
+
+/*%
+ * Names of RR types and qtypes. Types and qtypes are the same, except
+ * that T_ANY is a qtype but not a type. (You can ask for records of type
+ * T_ANY, but you can't have any records of that type in the database.)
+ */
+const struct res_sym __p_type_syms[] = {
+ {ns_t_a, "A", "address"},
+ {ns_t_ns, "NS", "name server"},
+ {ns_t_md, "MD", "mail destination (deprecated)"},
+ {ns_t_mf, "MF", "mail forwarder (deprecated)"},
+ {ns_t_cname, "CNAME", "canonical name"},
+ {ns_t_soa, "SOA", "start of authority"},
+ {ns_t_mb, "MB", "mailbox"},
+ {ns_t_mg, "MG", "mail group member"},
+ {ns_t_mr, "MR", "mail rename"},
+ {ns_t_null, "NULL", "null"},
+ {ns_t_wks, "WKS", "well-known service (deprecated)"},
+ {ns_t_ptr, "PTR", "domain name pointer"},
+ {ns_t_hinfo, "HINFO", "host information"},
+ {ns_t_minfo, "MINFO", "mailbox information"},
+ {ns_t_mx, "MX", "mail exchanger"},
+ {ns_t_txt, "TXT", "text"},
+ {ns_t_rp, "RP", "responsible person"},
+ {ns_t_afsdb, "AFSDB", "DCE or AFS server"},
+ {ns_t_x25, "X25", "X25 address"},
+ {ns_t_isdn, "ISDN", "ISDN address"},
+ {ns_t_rt, "RT", "router"},
+ {ns_t_nsap, "NSAP", "nsap address"},
+ {ns_t_nsap_ptr, "NSAP_PTR", "domain name pointer"},
+ {ns_t_sig, "SIG", "signature"},
+ {ns_t_key, "KEY", "key"},
+ {ns_t_px, "PX", "mapping information"},
+ {ns_t_gpos, "GPOS", "geographical position (withdrawn)"},
+ {ns_t_aaaa, "AAAA", "IPv6 address"},
+ {ns_t_loc, "LOC", "location"},
+ {ns_t_nxt, "NXT", "next valid name (unimplemented)"},
+ {ns_t_eid, "EID", "endpoint identifier (unimplemented)"},
+ {ns_t_nimloc, "NIMLOC", "NIMROD locator (unimplemented)"},
+ {ns_t_srv, "SRV", "server selection"},
+ {ns_t_atma, "ATMA", "ATM address (unimplemented)"},
+ {ns_t_naptr, "NAPTR", "naptr"},
+ {ns_t_kx, "KX", "key exchange"},
+ {ns_t_cert, "CERT", "certificate"},
+ {ns_t_a6, "A", "IPv6 address (experminental)"},
+ {ns_t_dname, "DNAME", "non-terminal redirection"},
+ {ns_t_opt, "OPT", "opt"},
+ {ns_t_apl, "apl", "apl"},
+ {ns_t_ds, "DS", "delegation signer"},
+ {ns_t_sshfp, "SSFP", "SSH fingerprint"},
+ {ns_t_ipseckey, "IPSECKEY", "IPSEC key"},
+ {ns_t_rrsig, "RRSIG", "rrsig"},
+ {ns_t_nsec, "NSEC", "nsec"},
+ {ns_t_dnskey, "DNSKEY", "DNS key"},
+ {ns_t_dhcid, "DHCID", "dynamic host configuration identifier"},
+ {ns_t_nsec3, "NSEC3", "nsec3"},
+ {ns_t_nsec3param, "NSEC3PARAM", "NSEC3 parameters"},
+ {ns_t_hip, "HIP", "host identity protocol"},
+ {ns_t_spf, "SPF", "sender policy framework"},
+ {ns_t_tkey, "TKEY", "tkey"},
+ {ns_t_tsig, "TSIG", "transaction signature"},
+ {ns_t_ixfr, "IXFR", "incremental zone transfer"},
+ {ns_t_axfr, "AXFR", "zone transfer"},
+ {ns_t_zxfr, "ZXFR", "compressed zone transfer"},
+ {ns_t_mailb, "MAILB", "mailbox-related data (deprecated)"},
+ {ns_t_maila, "MAILA", "mail agent (deprecated)"},
+ {ns_t_naptr, "NAPTR", "URN Naming Authority"},
+ {ns_t_kx, "KX", "Key Exchange"},
+ {ns_t_cert, "CERT", "Certificate"},
+ {ns_t_a6, "A6", "IPv6 Address"},
+ {ns_t_dname, "DNAME", "dname"},
+ {ns_t_sink, "SINK", "Kitchen Sink (experimental)"},
+ {ns_t_opt, "OPT", "EDNS Options"},
+ {ns_t_any, "ANY", "\"any\""},
+ {ns_t_dlv, "DLV", "DNSSEC look-aside validation"},
+ {0, NULL, NULL}
+};
+
+/*%
+ * Names of DNS rcodes.
+ */
+const struct res_sym __p_rcode_syms[] = {
+ {ns_r_noerror, "NOERROR", "no error"},
+ {ns_r_formerr, "FORMERR", "format error"},
+ {ns_r_servfail, "SERVFAIL", "server failed"},
+ {ns_r_nxdomain, "NXDOMAIN", "no such domain name"},
+ {ns_r_notimpl, "NOTIMP", "not implemented"},
+ {ns_r_refused, "REFUSED", "refused"},
+ {ns_r_yxdomain, "YXDOMAIN", "domain name exists"},
+ {ns_r_yxrrset, "YXRRSET", "rrset exists"},
+ {ns_r_nxrrset, "NXRRSET", "rrset doesn't exist"},
+ {ns_r_notauth, "NOTAUTH", "not authoritative"},
+ {ns_r_notzone, "NOTZONE", "Not in zone"},
+ {ns_r_max, "", ""},
+ {ns_r_badsig, "BADSIG", "bad signature"},
+ {ns_r_badkey, "BADKEY", "bad key"},
+ {ns_r_badtime, "BADTIME", "bad time"},
+ {0, NULL, NULL}
+};
+
+int
+sym_ston(const struct res_sym *syms, const char *name, int *success) {
+ for ((void)NULL; syms->name != 0; syms++) {
+ if (strcasecmp (name, syms->name) == 0) {
+ if (success)
+ *success = 1;
+ return (syms->number);
+ }
+ }
+ if (success)
+ *success = 0;
+ return (syms->number); /*%< The default value. */
+}
+
+const char *
+sym_ntos(const struct res_sym *syms, int number, int *success) {
+ char *unname = sym_ntos_unname;
+
+ for ((void)NULL; syms->name != 0; syms++) {
+ if (number == syms->number) {
+ if (success)
+ *success = 1;
+ return (syms->name);
+ }
+ }
+
+ sprintf(unname, "%d", number); /*%< XXX nonreentrant */
+ if (success)
+ *success = 0;
+ return (unname);
+}
+
+const char *
+sym_ntop(const struct res_sym *syms, int number, int *success) {
+ char *unname = sym_ntop_unname;
+
+ for ((void)NULL; syms->name != 0; syms++) {
+ if (number == syms->number) {
+ if (success)
+ *success = 1;
+ return (syms->humanname);
+ }
+ }
+ sprintf(unname, "%d", number); /*%< XXX nonreentrant */
+ if (success)
+ *success = 0;
+ return (unname);
+}
+
+/*%
+ * Return a string for the type.
+ */
+const char *
+p_type(int type) {
+ int success;
+ const char *result;
+ static char typebuf[20];
+
+ result = sym_ntos(__p_type_syms, type, &success);
+ if (success)
+ return (result);
+ if (type < 0 || type > 0xffff)
+ return ("BADTYPE");
+ sprintf(typebuf, "TYPE%d", type);
+ return (typebuf);
+}
+
+/*%
+ * Return a string for the type.
+ */
+const char *
+p_section(int section, int opcode) {
+ const struct res_sym *symbols;
+
+ switch (opcode) {
+ case ns_o_update:
+ symbols = __p_update_section_syms;
+ break;
+ default:
+ symbols = __p_default_section_syms;
+ break;
+ }
+ return (sym_ntos(symbols, section, (int *)0));
+}
+
+/*%
+ * Return a mnemonic for class.
+ */
+const char *
+p_class(int class) {
+ int success;
+ const char *result;
+ static char classbuf[20];
+
+ result = sym_ntos(__p_class_syms, class, &success);
+ if (success)
+ return (result);
+ if (class < 0 || class > 0xffff)
+ return ("BADCLASS");
+ sprintf(classbuf, "CLASS%d", class);
+ return (classbuf);
+}
+
+/*%
+ * Return a mnemonic for an option
+ */
+const char *
+p_option(u_long option) {
+ char *nbuf = p_option_nbuf;
+
+ switch (option) {
+ case RES_INIT: return "init";
+ case RES_DEBUG: return "debug";
+ case RES_AAONLY: return "aaonly(unimpl)";
+ case RES_USEVC: return "usevc";
+ case RES_PRIMARY: return "primry(unimpl)";
+ case RES_IGNTC: return "igntc";
+ case RES_RECURSE: return "recurs";
+ case RES_DEFNAMES: return "defnam";
+ case RES_STAYOPEN: return "styopn";
+ case RES_DNSRCH: return "dnsrch";
+ case RES_INSECURE1: return "insecure1";
+ case RES_INSECURE2: return "insecure2";
+ case RES_NOALIASES: return "noaliases";
+ case RES_USE_INET6: return "inet6";
+#ifdef RES_USE_EDNS0 /*%< KAME extension */
+ case RES_USE_EDNS0: return "edns0";
+ case RES_NSID: return "nsid";
+#endif
+#ifdef RES_USE_DNAME
+ case RES_USE_DNAME: return "dname";
+#endif
+#ifdef RES_USE_DNSSEC
+ case RES_USE_DNSSEC: return "dnssec";
+#endif
+#ifdef RES_NOTLDQUERY
+ case RES_NOTLDQUERY: return "no-tld-query";
+#endif
+#ifdef RES_NO_NIBBLE2
+ case RES_NO_NIBBLE2: return "no-nibble2";
+#endif
+ /* XXX nonreentrant */
+ default: sprintf(nbuf, "?0x%lx?", (u_long)option);
+ return (nbuf);
+ }
+}
+
+/*%
+ * Return a mnemonic for a time to live.
+ */
+const char *
+p_time(u_int32_t value) {
+ char *nbuf = p_time_nbuf;
+
+ if (ns_format_ttl(value, nbuf, sizeof nbuf) < 0)
+ sprintf(nbuf, "%u", value);
+ return (nbuf);
+}
+
+/*%
+ * Return a string for the rcode.
+ */
+const char *
+p_rcode(int rcode) {
+ return (sym_ntos(__p_rcode_syms, rcode, (int *)0));
+}
+
+/*%
+ * Return a string for a res_sockaddr_union.
+ */
+const char *
+p_sockun(union res_sockaddr_union u, char *buf, size_t size) {
+ char ret[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:123.123.123.123"];
+
+ switch (u.sin.sin_family) {
+ case AF_INET:
+ inet_ntop(AF_INET, &u.sin.sin_addr, ret, sizeof ret);
+ break;
+#ifdef HAS_INET6_STRUCTS
+ case AF_INET6:
+ inet_ntop(AF_INET6, &u.sin6.sin6_addr, ret, sizeof ret);
+ break;
+#endif
+ default:
+ sprintf(ret, "[af%d]", u.sin.sin_family);
+ break;
+ }
+ if (size > 0U) {
+ strncpy(buf, ret, size - 1);
+ buf[size - 1] = '0';
+ }
+ return (buf);
+}
+
+/*%
+ * routines to convert between on-the-wire RR format and zone file format.
+ * Does not contain conversion to/from decimal degrees; divide or multiply
+ * by 60*60*1000 for that.
+ */
+
+static unsigned int poweroften[10] = {1, 10, 100, 1000, 10000, 100000,
+ 1000000,10000000,100000000,1000000000};
+
+/*% takes an XeY precision/size value, returns a string representation. */
+static const char *
+precsize_ntoa(prec)
+ u_int8_t prec;
+{
+ char *retbuf = precsize_ntoa_retbuf;
+ unsigned long val;
+ int mantissa, exponent;
+
+ mantissa = (int)((prec >> 4) & 0x0f) % 10;
+ exponent = (int)((prec >> 0) & 0x0f) % 10;
+
+ val = mantissa * poweroften[exponent];
+
+ (void) sprintf(retbuf, "%lu.%.2lu", val/100, val%100);
+ return (retbuf);
+}
+
+/*% converts ascii size/precision X * 10**Y(cm) to 0xXY. moves pointer. */
+static u_int8_t
+precsize_aton(const char **strptr) {
+ unsigned int mval = 0, cmval = 0;
+ u_int8_t retval = 0;
+ const char *cp;
+ int exponent;
+ int mantissa;
+
+ cp = *strptr;
+
+ while (isdigit((unsigned char)*cp))
+ mval = mval * 10 + (*cp++ - '0');
+
+ if (*cp == '.') { /*%< centimeters */
+ cp++;
+ if (isdigit((unsigned char)*cp)) {
+ cmval = (*cp++ - '0') * 10;
+ if (isdigit((unsigned char)*cp)) {
+ cmval += (*cp++ - '0');
+ }
+ }
+ }
+ cmval = (mval * 100) + cmval;
+
+ for (exponent = 0; exponent < 9; exponent++)
+ if (cmval < poweroften[exponent+1])
+ break;
+
+ mantissa = cmval / poweroften[exponent];
+ if (mantissa > 9)
+ mantissa = 9;
+
+ retval = (mantissa << 4) | exponent;
+
+ *strptr = cp;
+
+ return (retval);
+}
+
+/*% converts ascii lat/lon to unsigned encoded 32-bit number. moves pointer. */
+static u_int32_t
+latlon2ul(const char **latlonstrptr, int *which) {
+ const char *cp;
+ u_int32_t retval;
+ int deg = 0, min = 0, secs = 0, secsfrac = 0;
+
+ cp = *latlonstrptr;
+
+ while (isdigit((unsigned char)*cp))
+ deg = deg * 10 + (*cp++ - '0');
+
+ while (isspace((unsigned char)*cp))
+ cp++;
+
+ if (!(isdigit((unsigned char)*cp)))
+ goto fndhemi;
+
+ while (isdigit((unsigned char)*cp))
+ min = min * 10 + (*cp++ - '0');
+
+ while (isspace((unsigned char)*cp))
+ cp++;
+
+ if (!(isdigit((unsigned char)*cp)))
+ goto fndhemi;
+
+ while (isdigit((unsigned char)*cp))
+ secs = secs * 10 + (*cp++ - '0');
+
+ if (*cp == '.') { /*%< decimal seconds */
+ cp++;
+ if (isdigit((unsigned char)*cp)) {
+ secsfrac = (*cp++ - '0') * 100;
+ if (isdigit((unsigned char)*cp)) {
+ secsfrac += (*cp++ - '0') * 10;
+ if (isdigit((unsigned char)*cp)) {
+ secsfrac += (*cp++ - '0');
+ }
+ }
+ }
+ }
+
+ while (!isspace((unsigned char)*cp)) /*%< if any trailing garbage */
+ cp++;
+
+ while (isspace((unsigned char)*cp))
+ cp++;
+
+ fndhemi:
+ switch (*cp) {
+ case 'N': case 'n':
+ case 'E': case 'e':
+ retval = ((unsigned)1<<31)
+ + (((((deg * 60) + min) * 60) + secs) * 1000)
+ + secsfrac;
+ break;
+ case 'S': case 's':
+ case 'W': case 'w':
+ retval = ((unsigned)1<<31)
+ - (((((deg * 60) + min) * 60) + secs) * 1000)
+ - secsfrac;
+ break;
+ default:
+ retval = 0; /*%< invalid value -- indicates error */
+ break;
+ }
+
+ switch (*cp) {
+ case 'N': case 'n':
+ case 'S': case 's':
+ *which = 1; /*%< latitude */
+ break;
+ case 'E': case 'e':
+ case 'W': case 'w':
+ *which = 2; /*%< longitude */
+ break;
+ default:
+ *which = 0; /*%< error */
+ break;
+ }
+
+ cp++; /*%< skip the hemisphere */
+ while (!isspace((unsigned char)*cp)) /*%< if any trailing garbage */
+ cp++;
+
+ while (isspace((unsigned char)*cp)) /*%< move to next field */
+ cp++;
+
+ *latlonstrptr = cp;
+
+ return (retval);
+}
+
+/*%
+ * converts a zone file representation in a string to an RDATA on-the-wire
+ * representation. */
+int
+loc_aton(ascii, binary)
+ const char *ascii;
+ u_char *binary;
+{
+ const char *cp, *maxcp;
+ u_char *bcp;
+
+ u_int32_t latit = 0, longit = 0, alt = 0;
+ u_int32_t lltemp1 = 0, lltemp2 = 0;
+ int altmeters = 0, altfrac = 0, altsign = 1;
+ u_int8_t hp = 0x16; /*%< default = 1e6 cm = 10000.00m = 10km */
+ u_int8_t vp = 0x13; /*%< default = 1e3 cm = 10.00m */
+ u_int8_t siz = 0x12; /*%< default = 1e2 cm = 1.00m */
+ int which1 = 0, which2 = 0;
+
+ cp = ascii;
+ maxcp = cp + strlen(ascii);
+
+ lltemp1 = latlon2ul(&cp, &which1);
+
+ lltemp2 = latlon2ul(&cp, &which2);
+
+ switch (which1 + which2) {
+ case 3: /*%< 1 + 2, the only valid combination */
+ if ((which1 == 1) && (which2 == 2)) { /*%< normal case */
+ latit = lltemp1;
+ longit = lltemp2;
+ } else if ((which1 == 2) && (which2 == 1)) { /*%< reversed */
+ longit = lltemp1;
+ latit = lltemp2;
+ } else { /*%< some kind of brokenness */
+ return (0);
+ }
+ break;
+ default: /*%< we didn't get one of each */
+ return (0);
+ }
+
+ /* altitude */
+ if (*cp == '-') {
+ altsign = -1;
+ cp++;
+ }
+
+ if (*cp == '+')
+ cp++;
+
+ while (isdigit((unsigned char)*cp))
+ altmeters = altmeters * 10 + (*cp++ - '0');
+
+ if (*cp == '.') { /*%< decimal meters */
+ cp++;
+ if (isdigit((unsigned char)*cp)) {
+ altfrac = (*cp++ - '0') * 10;
+ if (isdigit((unsigned char)*cp)) {
+ altfrac += (*cp++ - '0');
+ }
+ }
+ }
+
+ alt = (10000000 + (altsign * (altmeters * 100 + altfrac)));
+
+ while (!isspace((unsigned char)*cp) && (cp < maxcp)) /*%< if trailing garbage or m */
+ cp++;
+
+ while (isspace((unsigned char)*cp) && (cp < maxcp))
+ cp++;
+
+ if (cp >= maxcp)
+ goto defaults;
+
+ siz = precsize_aton(&cp);
+
+ while (!isspace((unsigned char)*cp) && (cp < maxcp)) /*%< if trailing garbage or m */
+ cp++;
+
+ while (isspace((unsigned char)*cp) && (cp < maxcp))
+ cp++;
+
+ if (cp >= maxcp)
+ goto defaults;
+
+ hp = precsize_aton(&cp);
+
+ while (!isspace((unsigned char)*cp) && (cp < maxcp)) /*%< if trailing garbage or m */
+ cp++;
+
+ while (isspace((unsigned char)*cp) && (cp < maxcp))
+ cp++;
+
+ if (cp >= maxcp)
+ goto defaults;
+
+ vp = precsize_aton(&cp);
+
+ defaults:
+
+ bcp = binary;
+ *bcp++ = (u_int8_t) 0; /*%< version byte */
+ *bcp++ = siz;
+ *bcp++ = hp;
+ *bcp++ = vp;
+ PUTLONG(latit,bcp);
+ PUTLONG(longit,bcp);
+ PUTLONG(alt,bcp);
+
+ return (16); /*%< size of RR in octets */
+}
+
+/*% takes an on-the-wire LOC RR and formats it in a human readable format. */
+const char *
+loc_ntoa(binary, ascii)
+ const u_char *binary;
+ char *ascii;
+{
+ static const char *error = "?";
+ static char tmpbuf[sizeof
+"1000 60 60.000 N 1000 60 60.000 W -12345678.00m 90000000.00m 90000000.00m 90000000.00m"];
+ const u_char *cp = binary;
+
+ int latdeg, latmin, latsec, latsecfrac;
+ int longdeg, longmin, longsec, longsecfrac;
+ char northsouth, eastwest;
+ const char *altsign;
+ int altmeters, altfrac;
+
+ const u_int32_t referencealt = 100000 * 100;
+
+ int32_t latval, longval, altval;
+ u_int32_t templ;
+ u_int8_t sizeval, hpval, vpval, versionval;
+
+ char *sizestr, *hpstr, *vpstr;
+
+ versionval = *cp++;
+
+ if (ascii == NULL)
+ ascii = tmpbuf;
+
+ if (versionval) {
+ (void) sprintf(ascii, "; error: unknown LOC RR version");
+ return (ascii);
+ }
+
+ sizeval = *cp++;
+
+ hpval = *cp++;
+ vpval = *cp++;
+
+ GETLONG(templ, cp);
+ latval = (templ - ((unsigned)1<<31));
+
+ GETLONG(templ, cp);
+ longval = (templ - ((unsigned)1<<31));
+
+ GETLONG(templ, cp);
+ if (templ < referencealt) { /*%< below WGS 84 spheroid */
+ altval = referencealt - templ;
+ altsign = "-";
+ } else {
+ altval = templ - referencealt;
+ altsign = "";
+ }
+
+ if (latval < 0) {
+ northsouth = 'S';
+ latval = -latval;
+ } else
+ northsouth = 'N';
+
+ latsecfrac = latval % 1000;
+ latval = latval / 1000;
+ latsec = latval % 60;
+ latval = latval / 60;
+ latmin = latval % 60;
+ latval = latval / 60;
+ latdeg = latval;
+
+ if (longval < 0) {
+ eastwest = 'W';
+ longval = -longval;
+ } else
+ eastwest = 'E';
+
+ longsecfrac = longval % 1000;
+ longval = longval / 1000;
+ longsec = longval % 60;
+ longval = longval / 60;
+ longmin = longval % 60;
+ longval = longval / 60;
+ longdeg = longval;
+
+ altfrac = altval % 100;
+ altmeters = (altval / 100);
+
+ sizestr = strdup(precsize_ntoa(sizeval));
+ hpstr = strdup(precsize_ntoa(hpval));
+ vpstr = strdup(precsize_ntoa(vpval));
+
+ sprintf(ascii,
+ "%d %.2d %.2d.%.3d %c %d %.2d %.2d.%.3d %c %s%d.%.2dm %sm %sm %sm",
+ latdeg, latmin, latsec, latsecfrac, northsouth,
+ longdeg, longmin, longsec, longsecfrac, eastwest,
+ altsign, altmeters, altfrac,
+ (sizestr != NULL) ? sizestr : error,
+ (hpstr != NULL) ? hpstr : error,
+ (vpstr != NULL) ? vpstr : error);
+
+ if (sizestr != NULL)
+ free(sizestr);
+ if (hpstr != NULL)
+ free(hpstr);
+ if (vpstr != NULL)
+ free(vpstr);
+
+ return (ascii);
+}
+
+
+/*% Return the number of DNS hierarchy levels in the name. */
+int
+dn_count_labels(const char *name) {
+ int i, len, count;
+
+ len = strlen(name);
+ for (i = 0, count = 0; i < len; i++) {
+ /* XXX need to check for \. or use named's nlabels(). */
+ if (name[i] == '.')
+ count++;
+ }
+
+ /* don't count initial wildcard */
+ if (name[0] == '*')
+ if (count)
+ count--;
+
+ /* don't count the null label for root. */
+ /* if terminating '.' not found, must adjust */
+ /* count to include last label */
+ if (len > 0 && name[len-1] != '.')
+ count++;
+ return (count);
+}
+
+/*%
+ * Make dates expressed in seconds-since-Jan-1-1970 easy to read.
+ * SIG records are required to be printed like this, by the Secure DNS RFC.
+ */
+char *
+p_secstodate (u_long secs) {
+ char *output = p_secstodate_output;
+ time_t clock = secs;
+ struct tm *time;
+#ifdef HAVE_TIME_R
+ struct tm res;
+
+ time = gmtime_r(&clock, &res);
+#else
+ time = gmtime(&clock);
+#endif
+ time->tm_year += 1900;
+ time->tm_mon += 1;
+ sprintf(output, "%04d%02d%02d%02d%02d%02d",
+ time->tm_year, time->tm_mon, time->tm_mday,
+ time->tm_hour, time->tm_min, time->tm_sec);
+ return (output);
+}
+
+u_int16_t
+res_nametoclass(const char *buf, int *successp) {
+ unsigned long result;
+ char *endptr;
+ int success;
+
+ result = sym_ston(__p_class_syms, buf, &success);
+ if (success)
+ goto done;
+
+ if (strncasecmp(buf, "CLASS", 5) != 0 ||
+ !isdigit((unsigned char)buf[5]))
+ goto done;
+ errno = 0;
+ result = strtoul(buf + 5, &endptr, 10);
+ if (errno == 0 && *endptr == '\0' && result <= 0xffffU)
+ success = 1;
+ done:
+ if (successp)
+ *successp = success;
+ return (result);
+}
+
+u_int16_t
+res_nametotype(const char *buf, int *successp) {
+ unsigned long result;
+ char *endptr;
+ int success;
+
+ result = sym_ston(__p_type_syms, buf, &success);
+ if (success)
+ goto done;
+
+ if (strncasecmp(buf, "type", 4) != 0 ||
+ !isdigit((unsigned char)buf[4]))
+ goto done;
+ errno = 0;
+ result = strtoul(buf + 4, &endptr, 10);
+ if (errno == 0 && *endptr == '\0' && result <= 0xffffU)
+ success = 1;
+ done:
+ if (successp)
+ *successp = success;
+ return (result);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/res_debug.h b/usr/src/lib/libresolv2_joy/common/resolv/res_debug.h
new file mode 100644
index 0000000000..c28171d7c8
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/res_debug.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _RES_DEBUG_H_
+#define _RES_DEBUG_H_
+
+#ifndef DEBUG
+# define Dprint(cond, args) /*empty*/
+# define DprintQ(cond, args, query, size) /*empty*/
+# define Aerror(statp, file, string, error, address) /*empty*/
+# define Perror(statp, file, string, error) /*empty*/
+#else
+# define Dprint(cond, args) if (cond) {fprintf args;} else {}
+# define DprintQ(cond, args, query, size) if (cond) {\
+ fprintf args;\
+ res_pquery(statp, query, size, stdout);\
+ } else {}
+#endif
+
+#endif /* _RES_DEBUG_H_ */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/res_findzonecut.c b/usr/src/lib/libresolv2_joy/common/resolv/res_findzonecut.c
new file mode 100644
index 0000000000..431c0262c1
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/res_findzonecut.c
@@ -0,0 +1,722 @@
+#if !defined(lint) && !defined(SABER)
+static const char rcsid[] = "$Id: res_findzonecut.c,v 1.10 2005/10/11 00:10:16 marka Exp $";
+#endif /* not lint */
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* Import. */
+
+#include "port_before.h"
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <limits.h>
+#include <netdb.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <isc/list.h>
+
+#include "port_after.h"
+
+#include <resolv_joy.h>
+
+/* Data structures. */
+
+typedef struct rr_a {
+ LINK(struct rr_a) link;
+ union res_sockaddr_union addr;
+} rr_a;
+typedef LIST(rr_a) rrset_a;
+
+typedef struct rr_ns {
+ LINK(struct rr_ns) link;
+ const char * name;
+ unsigned int flags;
+ rrset_a addrs;
+} rr_ns;
+typedef LIST(rr_ns) rrset_ns;
+
+#define RR_NS_HAVE_V4 0x01
+#define RR_NS_HAVE_V6 0x02
+
+/* Forward. */
+
+static int satisfy(res_state, const char *, rrset_ns *,
+ union res_sockaddr_union *, int);
+static int add_addrs(res_state, rr_ns *,
+ union res_sockaddr_union *, int);
+static int get_soa(res_state, const char *, ns_class, int,
+ char *, size_t, char *, size_t,
+ rrset_ns *);
+static int get_ns(res_state, const char *, ns_class, int, rrset_ns *);
+static int get_glue(res_state, ns_class, int, rrset_ns *);
+static int save_ns(res_state, ns_msg *, ns_sect,
+ const char *, ns_class, int, rrset_ns *);
+static int save_a(res_state, ns_msg *, ns_sect,
+ const char *, ns_class, int, rr_ns *);
+static void free_nsrrset(rrset_ns *);
+static void free_nsrr(rrset_ns *, rr_ns *);
+static rr_ns * find_ns(rrset_ns *, const char *);
+static int do_query(res_state, const char *, ns_class, ns_type,
+ u_char *, ns_msg *);
+static void res_dprintf(const char *, ...) ISC_FORMAT_PRINTF(1, 2);
+
+/* Macros. */
+
+#define DPRINTF(x) do {\
+ int save_errno = errno; \
+ if ((statp->options & RES_DEBUG) != 0U) res_dprintf x; \
+ errno = save_errno; \
+ } while (0)
+
+/* Public. */
+
+/*%
+ * find enclosing zone for a <dname,class>, and some server addresses
+ *
+ * parameters:
+ *\li res - resolver context to work within (is modified)
+ *\li dname - domain name whose enclosing zone is desired
+ *\li class - class of dname (and its enclosing zone)
+ *\li zname - found zone name
+ *\li zsize - allocated size of zname
+ *\li addrs - found server addresses
+ *\li naddrs - max number of addrs
+ *
+ * return values:
+ *\li < 0 - an error occurred (check errno)
+ *\li = 0 - zname is now valid, but addrs[] wasn't changed
+ *\li > 0 - zname is now valid, and return value is number of addrs[] found
+ *
+ * notes:
+ *\li this function calls res_nsend() which means it depends on correctly
+ * functioning recursive nameservers (usually defined in /etc/resolv.conf
+ * or its local equivilent).
+ *
+ *\li we start by asking for an SOA<dname,class>. if we get one as an
+ * answer, that just means <dname,class> is a zone top, which is fine.
+ * more than likely we'll be told to go pound sand, in the form of a
+ * negative answer.
+ *
+ *\li note that we are not prepared to deal with referrals since that would
+ * only come from authority servers and our correctly functioning local
+ * recursive server would have followed the referral and got us something
+ * more definite.
+ *
+ *\li if the authority section contains an SOA, this SOA should also be the
+ * closest enclosing zone, since any intermediary zone cuts would've been
+ * returned as referrals and dealt with by our correctly functioning local
+ * recursive name server. but an SOA in the authority section should NOT
+ * match our dname (since that would have been returned in the answer
+ * section). an authority section SOA has to be "above" our dname.
+ *
+ *\li however, since authority section SOA's were once optional, it's
+ * possible that we'll have to go hunting for the enclosing SOA by
+ * ripping labels off the front of our dname -- this is known as "doing
+ * it the hard way."
+ *
+ *\li ultimately we want some server addresses, which are ideally the ones
+ * pertaining to the SOA.MNAME, but only if there is a matching NS RR.
+ * so the second phase (after we find an SOA) is to go looking for the
+ * NS RRset for that SOA's zone.
+ *
+ *\li no answer section processed by this code is allowed to contain CNAME
+ * or DNAME RR's. for the SOA query this means we strip a label and
+ * keep going. for the NS and A queries this means we just give up.
+ */
+
+int
+res_findzonecut(res_state statp, const char *dname, ns_class class, int opts,
+ char *zname, size_t zsize, struct in_addr *addrs, int naddrs)
+{
+ int result, i;
+ union res_sockaddr_union *u;
+
+
+ opts |= RES_IPV4ONLY;
+ opts &= ~RES_IPV6ONLY;
+
+ u = calloc(naddrs, sizeof(*u));
+ if (u == NULL)
+ return(-1);
+
+ result = res_findzonecut2(statp, dname, class, opts, zname, zsize,
+ u, naddrs);
+
+ for (i = 0; i < result; i++) {
+ addrs[i] = u[i].sin.sin_addr;
+ }
+ free(u);
+ return (result);
+}
+
+int
+res_findzonecut2(res_state statp, const char *dname, ns_class class, int opts,
+ char *zname, size_t zsize, union res_sockaddr_union *addrs,
+ int naddrs)
+{
+ char mname[NS_MAXDNAME];
+ u_long save_pfcode;
+ rrset_ns nsrrs;
+ int n;
+
+ DPRINTF(("START dname='%s' class=%s, zsize=%ld, naddrs=%d",
+ dname, p_class(class), (long)zsize, naddrs));
+ save_pfcode = statp->pfcode;
+ statp->pfcode |= RES_PRF_HEAD2 | RES_PRF_HEAD1 | RES_PRF_HEADX |
+ RES_PRF_QUES | RES_PRF_ANS |
+ RES_PRF_AUTH | RES_PRF_ADD;
+ INIT_LIST(nsrrs);
+
+ DPRINTF(("get the soa, and see if it has enough glue"));
+ if ((n = get_soa(statp, dname, class, opts, zname, zsize,
+ mname, sizeof mname, &nsrrs)) < 0 ||
+ ((opts & RES_EXHAUSTIVE) == 0 &&
+ (n = satisfy(statp, mname, &nsrrs, addrs, naddrs)) > 0))
+ goto done;
+
+ DPRINTF(("get the ns rrset and see if it has enough glue"));
+ if ((n = get_ns(statp, zname, class, opts, &nsrrs)) < 0 ||
+ ((opts & RES_EXHAUSTIVE) == 0 &&
+ (n = satisfy(statp, mname, &nsrrs, addrs, naddrs)) > 0))
+ goto done;
+
+ DPRINTF(("get the missing glue and see if it's finally enough"));
+ if ((n = get_glue(statp, class, opts, &nsrrs)) >= 0)
+ n = satisfy(statp, mname, &nsrrs, addrs, naddrs);
+
+ done:
+ DPRINTF(("FINISH n=%d (%s)", n, (n < 0) ? strerror(errno) : "OK"));
+ free_nsrrset(&nsrrs);
+ statp->pfcode = save_pfcode;
+ return (n);
+}
+
+/* Private. */
+
+static int
+satisfy(res_state statp, const char *mname, rrset_ns *nsrrsp,
+ union res_sockaddr_union *addrs, int naddrs)
+{
+ rr_ns *nsrr;
+ int n, x;
+
+ n = 0;
+ nsrr = find_ns(nsrrsp, mname);
+ if (nsrr != NULL) {
+ x = add_addrs(statp, nsrr, addrs, naddrs);
+ addrs += x;
+ naddrs -= x;
+ n += x;
+ }
+ for (nsrr = HEAD(*nsrrsp);
+ nsrr != NULL && naddrs > 0;
+ nsrr = NEXT(nsrr, link))
+ if (ns_samename(nsrr->name, mname) != 1) {
+ x = add_addrs(statp, nsrr, addrs, naddrs);
+ addrs += x;
+ naddrs -= x;
+ n += x;
+ }
+ DPRINTF(("satisfy(%s): %d", mname, n));
+ return (n);
+}
+
+static int
+add_addrs(res_state statp, rr_ns *nsrr,
+ union res_sockaddr_union *addrs, int naddrs)
+{
+ rr_a *arr;
+ int n = 0;
+
+ for (arr = HEAD(nsrr->addrs); arr != NULL; arr = NEXT(arr, link)) {
+ if (naddrs <= 0)
+ return (0);
+ *addrs++ = arr->addr;
+ naddrs--;
+ n++;
+ }
+ DPRINTF(("add_addrs: %d", n));
+ return (n);
+}
+
+static int
+get_soa(res_state statp, const char *dname, ns_class class, int opts,
+ char *zname, size_t zsize, char *mname, size_t msize,
+ rrset_ns *nsrrsp)
+{
+ char tname[NS_MAXDNAME];
+ u_char *resp = NULL;
+ int n, i, ancount, nscount;
+ ns_sect sect;
+ ns_msg msg;
+ u_int rcode;
+
+ /*
+ * Find closest enclosing SOA, even if it's for the root zone.
+ */
+
+ /* First canonicalize dname (exactly one unescaped trailing "."). */
+ if (ns_makecanon(dname, tname, sizeof tname) < 0)
+ goto cleanup;
+ dname = tname;
+
+ resp = malloc(NS_MAXMSG);
+ if (resp == NULL)
+ goto cleanup;
+
+ /* Now grovel the subdomains, hunting for an SOA answer or auth. */
+ for (;;) {
+ /* Leading or inter-label '.' are skipped here. */
+ while (*dname == '.')
+ dname++;
+
+ /* Is there an SOA? */
+ n = do_query(statp, dname, class, ns_t_soa, resp, &msg);
+ if (n < 0) {
+ DPRINTF(("get_soa: do_query('%s', %s) failed (%d)",
+ dname, p_class(class), n));
+ goto cleanup;
+ }
+ if (n > 0) {
+ DPRINTF(("get_soa: CNAME or DNAME found"));
+ sect = ns_s_max, n = 0;
+ } else {
+ rcode = ns_msg_getflag(msg, ns_f_rcode);
+ ancount = ns_msg_count(msg, ns_s_an);
+ nscount = ns_msg_count(msg, ns_s_ns);
+ if (ancount > 0 && rcode == ns_r_noerror)
+ sect = ns_s_an, n = ancount;
+ else if (nscount > 0)
+ sect = ns_s_ns, n = nscount;
+ else
+ sect = ns_s_max, n = 0;
+ }
+ for (i = 0; i < n; i++) {
+ const char *t;
+ const u_char *rdata;
+ ns_rr rr;
+
+ if (ns_parserr(&msg, sect, i, &rr) < 0) {
+ DPRINTF(("get_soa: ns_parserr(%s, %d) failed",
+ p_section(sect, ns_o_query), i));
+ goto cleanup;
+ }
+ if (ns_rr_type(rr) == ns_t_cname ||
+ ns_rr_type(rr) == ns_t_dname)
+ break;
+ if (ns_rr_type(rr) != ns_t_soa ||
+ ns_rr_class(rr) != class)
+ continue;
+ t = ns_rr_name(rr);
+ switch (sect) {
+ case ns_s_an:
+ if (ns_samedomain(dname, t) == 0) {
+ DPRINTF(
+ ("get_soa: ns_samedomain('%s', '%s') == 0",
+ dname, t)
+ );
+ errno = EPROTOTYPE;
+ goto cleanup;
+ }
+ break;
+ case ns_s_ns:
+ if (ns_samename(dname, t) == 1 ||
+ ns_samedomain(dname, t) == 0) {
+ DPRINTF(
+ ("get_soa: ns_samename() || !ns_samedomain('%s', '%s')",
+ dname, t)
+ );
+ errno = EPROTOTYPE;
+ goto cleanup;
+ }
+ break;
+ default:
+ abort();
+ }
+ if (strlen(t) + 1 > zsize) {
+ DPRINTF(("get_soa: zname(%lu) too small (%lu)",
+ (unsigned long)zsize,
+ (unsigned long)strlen(t) + 1));
+ errno = EMSGSIZE;
+ goto cleanup;
+ }
+ strcpy(zname, t);
+ rdata = ns_rr_rdata(rr);
+ if (ns_name_uncompress(resp, ns_msg_end(msg), rdata,
+ mname, msize) < 0) {
+ DPRINTF(("get_soa: ns_name_uncompress failed")
+ );
+ goto cleanup;
+ }
+ if (save_ns(statp, &msg, ns_s_ns,
+ zname, class, opts, nsrrsp) < 0) {
+ DPRINTF(("get_soa: save_ns failed"));
+ goto cleanup;
+ }
+ free(resp);
+ return (0);
+ }
+
+ /* If we're out of labels, then not even "." has an SOA! */
+ if (*dname == '\0')
+ break;
+
+ /* Find label-terminating "."; top of loop will skip it. */
+ while (*dname != '.') {
+ if (*dname == '\\')
+ if (*++dname == '\0') {
+ errno = EMSGSIZE;
+ goto cleanup;
+ }
+ dname++;
+ }
+ }
+ DPRINTF(("get_soa: out of labels"));
+ errno = EDESTADDRREQ;
+ cleanup:
+ if (resp != NULL)
+ free(resp);
+ return (-1);
+}
+
+static int
+get_ns(res_state statp, const char *zname, ns_class class, int opts,
+ rrset_ns *nsrrsp)
+{
+ u_char *resp;
+ ns_msg msg;
+ int n;
+
+ resp = malloc(NS_MAXMSG);
+ if (resp == NULL)
+ return (-1);
+
+ /* Go and get the NS RRs for this zone. */
+ n = do_query(statp, zname, class, ns_t_ns, resp, &msg);
+ if (n != 0) {
+ DPRINTF(("get_ns: do_query('%s', %s) failed (%d)",
+ zname, p_class(class), n));
+ free(resp);
+ return (-1);
+ }
+
+ /* Remember the NS RRs and associated A RRs that came back. */
+ if (save_ns(statp, &msg, ns_s_an, zname, class, opts, nsrrsp) < 0) {
+ DPRINTF(("get_ns save_ns('%s', %s) failed",
+ zname, p_class(class)));
+ free(resp);
+ return (-1);
+ }
+
+ free(resp);
+ return (0);
+}
+
+static int
+get_glue(res_state statp, ns_class class, int opts, rrset_ns *nsrrsp) {
+ rr_ns *nsrr, *nsrr_n;
+ u_char *resp;
+
+ resp = malloc(NS_MAXMSG);
+ if (resp == NULL)
+ return(-1);
+
+ /* Go and get the A RRs for each empty NS RR on our list. */
+ for (nsrr = HEAD(*nsrrsp); nsrr != NULL; nsrr = nsrr_n) {
+ ns_msg msg;
+ int n;
+
+ nsrr_n = NEXT(nsrr, link);
+
+ if ((nsrr->flags & RR_NS_HAVE_V4) == 0) {
+ n = do_query(statp, nsrr->name, class, ns_t_a,
+ resp, &msg);
+ if (n < 0) {
+ DPRINTF(
+ ("get_glue: do_query('%s', %s') failed",
+ nsrr->name, p_class(class)));
+ goto cleanup;
+ }
+ if (n > 0) {
+ DPRINTF((
+ "get_glue: do_query('%s', %s') CNAME or DNAME found",
+ nsrr->name, p_class(class)));
+ }
+ if (save_a(statp, &msg, ns_s_an, nsrr->name, class,
+ opts, nsrr) < 0) {
+ DPRINTF(("get_glue: save_r('%s', %s) failed",
+ nsrr->name, p_class(class)));
+ goto cleanup;
+ }
+ }
+
+ if ((nsrr->flags & RR_NS_HAVE_V6) == 0) {
+ n = do_query(statp, nsrr->name, class, ns_t_aaaa,
+ resp, &msg);
+ if (n < 0) {
+ DPRINTF(
+ ("get_glue: do_query('%s', %s') failed",
+ nsrr->name, p_class(class)));
+ goto cleanup;
+ }
+ if (n > 0) {
+ DPRINTF((
+ "get_glue: do_query('%s', %s') CNAME or DNAME found",
+ nsrr->name, p_class(class)));
+ }
+ if (save_a(statp, &msg, ns_s_an, nsrr->name, class,
+ opts, nsrr) < 0) {
+ DPRINTF(("get_glue: save_r('%s', %s) failed",
+ nsrr->name, p_class(class)));
+ goto cleanup;
+ }
+ }
+
+ /* If it's still empty, it's just chaff. */
+ if (EMPTY(nsrr->addrs)) {
+ DPRINTF(("get_glue: removing empty '%s' NS",
+ nsrr->name));
+ free_nsrr(nsrrsp, nsrr);
+ }
+ }
+ free(resp);
+ return (0);
+
+ cleanup:
+ free(resp);
+ return (-1);
+}
+
+static int
+save_ns(res_state statp, ns_msg *msg, ns_sect sect,
+ const char *owner, ns_class class, int opts,
+ rrset_ns *nsrrsp)
+{
+ int i;
+
+ for (i = 0; i < ns_msg_count(*msg, sect); i++) {
+ char tname[MAXDNAME];
+ const u_char *rdata;
+ rr_ns *nsrr;
+ ns_rr rr;
+
+ if (ns_parserr(msg, sect, i, &rr) < 0) {
+ DPRINTF(("save_ns: ns_parserr(%s, %d) failed",
+ p_section(sect, ns_o_query), i));
+ return (-1);
+ }
+ if (ns_rr_type(rr) != ns_t_ns ||
+ ns_rr_class(rr) != class ||
+ ns_samename(ns_rr_name(rr), owner) != 1)
+ continue;
+ nsrr = find_ns(nsrrsp, ns_rr_name(rr));
+ if (nsrr == NULL) {
+ nsrr = malloc(sizeof *nsrr);
+ if (nsrr == NULL) {
+ DPRINTF(("save_ns: malloc failed"));
+ return (-1);
+ }
+ rdata = ns_rr_rdata(rr);
+ if (ns_name_uncompress(ns_msg_base(*msg),
+ ns_msg_end(*msg), rdata,
+ tname, sizeof tname) < 0) {
+ DPRINTF(("save_ns: ns_name_uncompress failed")
+ );
+ free(nsrr);
+ return (-1);
+ }
+ nsrr->name = strdup(tname);
+ if (nsrr->name == NULL) {
+ DPRINTF(("save_ns: strdup failed"));
+ free(nsrr);
+ return (-1);
+ }
+ INIT_LINK(nsrr, link);
+ INIT_LIST(nsrr->addrs);
+ nsrr->flags = 0;
+ APPEND(*nsrrsp, nsrr, link);
+ }
+ if (save_a(statp, msg, ns_s_ar,
+ nsrr->name, class, opts, nsrr) < 0) {
+ DPRINTF(("save_ns: save_r('%s', %s) failed",
+ nsrr->name, p_class(class)));
+ return (-1);
+ }
+ }
+ return (0);
+}
+
+static int
+save_a(res_state statp, ns_msg *msg, ns_sect sect,
+ const char *owner, ns_class class, int opts,
+ rr_ns *nsrr)
+{
+ int i;
+
+ for (i = 0; i < ns_msg_count(*msg, sect); i++) {
+ ns_rr rr;
+ rr_a *arr;
+
+ if (ns_parserr(msg, sect, i, &rr) < 0) {
+ DPRINTF(("save_a: ns_parserr(%s, %d) failed",
+ p_section(sect, ns_o_query), i));
+ return (-1);
+ }
+ if ((ns_rr_type(rr) != ns_t_a &&
+ ns_rr_type(rr) != ns_t_aaaa) ||
+ ns_rr_class(rr) != class ||
+ ns_samename(ns_rr_name(rr), owner) != 1 ||
+ ns_rr_rdlen(rr) != NS_INADDRSZ)
+ continue;
+ if ((opts & RES_IPV6ONLY) != 0 && ns_rr_type(rr) != ns_t_aaaa)
+ continue;
+ if ((opts & RES_IPV4ONLY) != 0 && ns_rr_type(rr) != ns_t_a)
+ continue;
+ arr = malloc(sizeof *arr);
+ if (arr == NULL) {
+ DPRINTF(("save_a: malloc failed"));
+ return (-1);
+ }
+ INIT_LINK(arr, link);
+ memset(&arr->addr, 0, sizeof(arr->addr));
+ switch (ns_rr_type(rr)) {
+ case ns_t_a:
+ arr->addr.sin.sin_family = AF_INET;
+#ifdef HAVE_SA_LEN
+ arr->addr.sin.sin_len = sizeof(arr->addr.sin);
+#endif
+ memcpy(&arr->addr.sin.sin_addr, ns_rr_rdata(rr),
+ NS_INADDRSZ);
+ arr->addr.sin.sin_port = htons(NAMESERVER_PORT);
+ nsrr->flags |= RR_NS_HAVE_V4;
+ break;
+ case ns_t_aaaa:
+ arr->addr.sin6.sin6_family = AF_INET6;
+#ifdef HAVE_SA_LEN
+ arr->addr.sin6.sin6_len = sizeof(arr->addr.sin6);
+#endif
+ memcpy(&arr->addr.sin6.sin6_addr, ns_rr_rdata(rr), 16);
+ arr->addr.sin.sin_port = htons(NAMESERVER_PORT);
+ nsrr->flags |= RR_NS_HAVE_V6;
+ break;
+ default:
+ abort();
+ }
+ APPEND(nsrr->addrs, arr, link);
+ }
+ return (0);
+}
+
+static void
+free_nsrrset(rrset_ns *nsrrsp) {
+ rr_ns *nsrr;
+
+ while ((nsrr = HEAD(*nsrrsp)) != NULL)
+ free_nsrr(nsrrsp, nsrr);
+}
+
+static void
+free_nsrr(rrset_ns *nsrrsp, rr_ns *nsrr) {
+ rr_a *arr;
+ char *tmp;
+
+ while ((arr = HEAD(nsrr->addrs)) != NULL) {
+ UNLINK(nsrr->addrs, arr, link);
+ free(arr);
+ }
+ DE_CONST(nsrr->name, tmp);
+ free(tmp);
+ UNLINK(*nsrrsp, nsrr, link);
+ free(nsrr);
+}
+
+static rr_ns *
+find_ns(rrset_ns *nsrrsp, const char *dname) {
+ rr_ns *nsrr;
+
+ for (nsrr = HEAD(*nsrrsp); nsrr != NULL; nsrr = NEXT(nsrr, link))
+ if (ns_samename(nsrr->name, dname) == 1)
+ return (nsrr);
+ return (NULL);
+}
+
+static int
+do_query(res_state statp, const char *dname, ns_class class, ns_type qtype,
+ u_char *resp, ns_msg *msg)
+{
+ u_char req[NS_PACKETSZ];
+ int i, n;
+
+ n = res_nmkquery(statp, ns_o_query, dname, class, qtype,
+ NULL, 0, NULL, req, NS_PACKETSZ);
+ if (n < 0) {
+ DPRINTF(("do_query: res_nmkquery failed"));
+ return (-1);
+ }
+ n = res_nsend(statp, req, n, resp, NS_MAXMSG);
+ if (n < 0) {
+ DPRINTF(("do_query: res_nsend failed"));
+ return (-1);
+ }
+ if (n == 0) {
+ DPRINTF(("do_query: res_nsend returned 0"));
+ errno = EMSGSIZE;
+ return (-1);
+ }
+ if (ns_initparse(resp, n, msg) < 0) {
+ DPRINTF(("do_query: ns_initparse failed"));
+ return (-1);
+ }
+ n = 0;
+ for (i = 0; i < ns_msg_count(*msg, ns_s_an); i++) {
+ ns_rr rr;
+
+ if (ns_parserr(msg, ns_s_an, i, &rr) < 0) {
+ DPRINTF(("do_query: ns_parserr failed"));
+ return (-1);
+ }
+ n += (ns_rr_class(rr) == class &&
+ (ns_rr_type(rr) == ns_t_cname ||
+ ns_rr_type(rr) == ns_t_dname));
+ }
+ return (n);
+}
+
+static void
+res_dprintf(const char *fmt, ...) {
+ va_list ap;
+
+ va_start(ap, fmt);
+ fputs(";; res_findzonecut: ", stderr);
+ vfprintf(stderr, fmt, ap);
+ fputc('\n', stderr);
+ va_end(ap);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/res_init.c b/usr/src/lib/libresolv2_joy/common/resolv/res_init.c
new file mode 100644
index 0000000000..10254d1931
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/res_init.c
@@ -0,0 +1,958 @@
+/*
+ * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+
+/*
+ * Copyright (c) 1985, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Portions Copyright (c) 1993 by Digital Equipment Corporation.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies, and that
+ * the name of Digital Equipment Corporation not be used in advertising or
+ * publicity pertaining to distribution of the document or software without
+ * specific, written prior permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
+ * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)res_init.c 8.1 (Berkeley) 6/7/93";
+static const char rcsid[] = "$Id: res_init.c,v 1.26 2008/12/11 09:59:00 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <netdb.h>
+
+#ifndef HAVE_MD5
+# include "../dst/md5.h"
+#else
+# ifdef SOLARIS2
+# include <sys/md5.h>
+# endif
+#endif
+#ifndef _MD5_H_
+# define _MD5_H_ 1 /*%< make sure we do not include rsaref md5.h file */
+#endif
+
+
+#include "port_after.h"
+
+/* ensure that sockaddr_in6 and IN6ADDR_ANY_INIT are declared / defined */
+#include <resolv_joy.h>
+
+/* ISC purposely put port_after.h before <resolv.h> to force in6 stuff
+ * (above) so we explicitly include port_resolv.h here */
+#include "port_resolv.h"
+
+#include "res_private.h"
+
+/*% Options. Should all be left alone. */
+#define RESOLVSORT
+#define DEBUG
+
+#ifdef SUNW_INITCHKIF
+#include <net/if.h>
+#include <netinet/if_ether.h>
+#include <sys/sockio.h>
+#define MAXIFS 8192
+#endif /* SUNW_INITCHKIF */
+
+#ifdef SOLARIS2
+#include <sys/systeminfo.h>
+#endif
+
+static void res_setoptions __P((res_state, const char *, const char *));
+
+#ifdef RESOLVSORT
+static const char sort_mask[] = "/&";
+#define ISSORTMASK(ch) (strchr(sort_mask, ch) != NULL)
+static u_int32_t net_mask __P((struct in_addr));
+#endif
+
+#if !defined(isascii) /*%< XXX - could be a function */
+# define isascii(c) (!(c & 0200))
+#endif
+
+/*
+ * Resolver state default settings.
+ */
+
+/*%
+ * Set up default settings. If the configuration file exist, the values
+ * there will have precedence. Otherwise, the server address is set to
+ * INADDR_ANY and the default domain name comes from the gethostname().
+ *
+ * An interrim version of this code (BIND 4.9, pre-4.4BSD) used 127.0.0.1
+ * rather than INADDR_ANY ("0.0.0.0") as the default name server address
+ * since it was noted that INADDR_ANY actually meant ``the first interface
+ * you "ifconfig"'d at boot time'' and if this was a SLIP or PPP interface,
+ * it had to be "up" in order for you to reach your own name server. It
+ * was later decided that since the recommended practice is to always
+ * install local static routes through 127.0.0.1 for all your network
+ * interfaces, that we could solve this problem without a code change.
+ *
+ * The configuration file should always be used, since it is the only way
+ * to specify a default domain. If you are running a server on your local
+ * machine, you should say "nameserver 0.0.0.0" or "nameserver 127.0.0.1"
+ * in the configuration file.
+ *
+ * Return 0 if completes successfully, -1 on error
+ */
+int
+res_ninit(res_state statp) {
+ extern int __res_vinit(res_state, int);
+ return (__res_vinit(statp, 0));
+}
+
+/*% This function has to be reachable by res_data.c but not publically. */
+int
+__res_vinit(res_state statp, int preinit) {
+ register FILE *fp;
+ register char *cp, **pp;
+ register int n;
+ char buf[BUFSIZ];
+ int nserv = 0; /*%< number of nameserver records read from file */
+ int haveenv = 0;
+ int havesearch = 0;
+#ifdef RESOLVSORT
+ int nsort = 0;
+ char *net;
+#endif
+ int dots;
+ union res_sockaddr_union u[2];
+ int maxns = MAXNS;
+
+ RES_SET_H_ERRNO(statp, 0);
+ if (statp->_u._ext.ext != NULL)
+ res_ndestroy(statp);
+
+ if (!preinit) {
+ statp->retrans = RES_TIMEOUT;
+ statp->retry = RES_DFLRETRY;
+ statp->options = RES_DEFAULT;
+ res_rndinit(statp);
+ statp->id = res_nrandomid(statp);
+ }
+
+ memset(u, 0, sizeof(u));
+#ifdef USELOOPBACK
+ u[nserv].sin.sin_addr = inet_makeaddr(IN_LOOPBACKNET, 1);
+#else
+ u[nserv].sin.sin_addr.s_addr = INADDR_ANY;
+#endif
+ u[nserv].sin.sin_family = AF_INET;
+ u[nserv].sin.sin_port = htons(NAMESERVER_PORT);
+#ifdef HAVE_SA_LEN
+ u[nserv].sin.sin_len = sizeof(struct sockaddr_in);
+#endif
+ nserv++;
+#ifdef HAS_INET6_STRUCTS
+#ifdef USELOOPBACK
+ u[nserv].sin6.sin6_addr = in6addr_loopback;
+#else
+ u[nserv].sin6.sin6_addr = in6addr_any;
+#endif
+ u[nserv].sin6.sin6_family = AF_INET6;
+ u[nserv].sin6.sin6_port = htons(NAMESERVER_PORT);
+#ifdef HAVE_SA_LEN
+ u[nserv].sin6.sin6_len = sizeof(struct sockaddr_in6);
+#endif
+ nserv++;
+#endif
+ statp->nscount = 0;
+ statp->ndots = 1;
+ statp->pfcode = 0;
+ statp->_vcsock = -1;
+ statp->_flags = 0;
+ statp->qhook = NULL;
+ statp->rhook = NULL;
+ statp->_u._ext.nscount = 0;
+ statp->_u._ext.ext = malloc(sizeof(*statp->_u._ext.ext));
+ if (statp->_u._ext.ext != NULL) {
+ memset(statp->_u._ext.ext, 0, sizeof(*statp->_u._ext.ext));
+ statp->_u._ext.ext->nsaddrs[0].sin = statp->nsaddr;
+ strcpy(statp->_u._ext.ext->nsuffix, "ip6.arpa");
+ strcpy(statp->_u._ext.ext->nsuffix2, "ip6.int");
+ } else {
+ /*
+ * Historically res_init() rarely, if at all, failed.
+ * Examples and applications exist which do not check
+ * our return code. Furthermore several applications
+ * simply call us to get the systems domainname. So
+ * rather then immediately fail here we store the
+ * failure, which is returned later, in h_errno. And
+ * prevent the collection of 'nameserver' information
+ * by setting maxns to 0. Thus applications that fail
+ * to check our return code wont be able to make
+ * queries anyhow.
+ */
+ RES_SET_H_ERRNO(statp, NETDB_INTERNAL);
+ maxns = 0;
+ }
+#ifdef RESOLVSORT
+ statp->nsort = 0;
+#endif
+ res_setservers(statp, u, nserv);
+
+#ifdef SUNW_INITCHKIF
+/*
+ * Short circuit res_init() if no non-loopback interfaces are up. This is
+ * done to avoid boot delays if "dns" comes before "files" in nsswitch.conf.
+ * An additional fix has been added to this code, to count all external
+ * interfaces, which includes the IPv6 interfaces. If no external interfaces
+ * are found, an additional check is carried out to determine if any deprecated
+ * interfaces are up.
+ */
+ {
+ int s;
+ struct lifnum lifn;
+
+ if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
+ perror("res_init: socket");
+ goto freedata;
+ }
+ lifn.lifn_family = AF_UNSPEC;
+ lifn.lifn_flags = LIFC_EXTERNAL_SOURCE;
+ if (ioctl(s, SIOCGLIFNUM, (char *)&lifn) < 0) {
+ close(s);
+ goto freedata;
+ }
+ if (lifn.lifn_count == 0) {
+ /*
+ * Check if there are any deprecated interfaces up
+ */
+ struct lifconf lifc;
+ uchar_t *buf;
+ int buflen, i, int_up = 0;
+
+ lifn.lifn_flags = 0;
+ if ((ioctl(s, SIOCGLIFNUM, (char *)&lifn) < 0) ||
+ (lifn.lifn_count < 1)) {
+ close(s);
+ goto freedata;
+ }
+
+ buflen = lifn.lifn_count * sizeof (struct lifreq);
+ buf = (uchar_t *)malloc(buflen);
+ if (buf == NULL) {
+ close(s);
+ goto freedata;
+ }
+
+ lifc.lifc_family = AF_UNSPEC;
+ lifc.lifc_flags = 0;
+ lifc.lifc_len = buflen;
+ lifc.lifc_lifcu.lifcu_buf = (caddr_t)buf;
+ if (ioctl(s, SIOCGLIFCONF, (char *)&lifc) < 0) {
+ close(s);
+ free(buf);
+ goto freedata;
+ }
+
+ for (i = 0; i < lifn.lifn_count; ++i) {
+ struct lifreq *lreqp, lreq;
+
+ lreqp = (struct lifreq *)&lifc.lifc_req[i];
+ strlcpy(lreq.lifr_name, lreqp->lifr_name,
+ sizeof (lreq.lifr_name));
+ if (ioctl(s, SIOCGLIFFLAGS, &lreq) < 0) {
+ close(s);
+ free(buf);
+ goto freedata;
+ }
+ if ((lreq.lifr_flags & IFF_UP) &&
+ !(lreq.lifr_flags & IFF_NOLOCAL) &&
+ !(lreq.lifr_flags & IFF_NOXMIT) &&
+ !(lreq.lifr_flags & IFF_LOOPBACK)) {
+ int_up = 1;
+ break;
+ }
+ }
+ free(buf);
+
+ if (!int_up) {
+ close(s);
+ goto freedata;
+ }
+ }
+ close(s);
+ }
+#endif /* SUNW_INITCHKIF */
+
+#ifdef SOLARIS2
+ /*
+ * The old libresolv derived the defaultdomain from NIS/NIS+.
+ * We want to keep this behaviour
+ */
+ {
+ char buf[sizeof(statp->defdname)], *cp;
+ int ret;
+
+ if ((ret = sysinfo(SI_SRPC_DOMAIN, buf, sizeof(buf))) > 0 &&
+ (unsigned int)ret <= sizeof(buf)) {
+ if (buf[0] == '+')
+ buf[0] = '.';
+ cp = strchr(buf, '.');
+ cp = (cp == NULL) ? buf : (cp + 1);
+ strncpy(statp->defdname, cp,
+ sizeof(statp->defdname) - 1);
+ statp->defdname[sizeof(statp->defdname) - 1] = '\0';
+ }
+ }
+#endif /* SOLARIS2 */
+
+ /* Allow user to override the local domain definition */
+ if ((cp = getenv("LOCALDOMAIN")) != NULL) {
+ (void)strncpy(statp->defdname, cp, sizeof(statp->defdname) - 1);
+ statp->defdname[sizeof(statp->defdname) - 1] = '\0';
+ haveenv++;
+
+ /*
+ * Set search list to be blank-separated strings
+ * from rest of env value. Permits users of LOCALDOMAIN
+ * to still have a search list, and anyone to set the
+ * one that they want to use as an individual (even more
+ * important now that the rfc1535 stuff restricts searches)
+ */
+ cp = statp->defdname;
+ pp = statp->dnsrch;
+ *pp++ = cp;
+ for (n = 0; *cp && pp < statp->dnsrch + MAXDNSRCH; cp++) {
+ if (*cp == '\n') /*%< silly backwards compat */
+ break;
+ else if (*cp == ' ' || *cp == '\t') {
+ *cp = 0;
+ n = 1;
+ } else if (n) {
+ *pp++ = cp;
+ n = 0;
+ havesearch = 1;
+ }
+ }
+ /* null terminate last domain if there are excess */
+ while (*cp != '\0' && *cp != ' ' && *cp != '\t' && *cp != '\n')
+ cp++;
+ *cp = '\0';
+ *pp++ = 0;
+ }
+
+#define MATCH(line, name) \
+ (!strncmp(line, name, sizeof(name) - 1) && \
+ (line[sizeof(name) - 1] == ' ' || \
+ line[sizeof(name) - 1] == '\t'))
+
+ nserv = 0;
+ if ((fp = fopen(_PATH_RESCONF, "r")) != NULL) {
+ /* read the config file */
+ while (fgets(buf, sizeof(buf), fp) != NULL) {
+ /* skip comments */
+ if (*buf == ';' || *buf == '#')
+ continue;
+ /* read default domain name */
+ if (MATCH(buf, "domain")) {
+ if (haveenv) /*%< skip if have from environ */
+ continue;
+ cp = buf + sizeof("domain") - 1;
+ while (*cp == ' ' || *cp == '\t')
+ cp++;
+ if ((*cp == '\0') || (*cp == '\n'))
+ continue;
+ strncpy(statp->defdname, cp, sizeof(statp->defdname) - 1);
+ statp->defdname[sizeof(statp->defdname) - 1] = '\0';
+ if ((cp = strpbrk(statp->defdname, " \t\n")) != NULL)
+ *cp = '\0';
+ havesearch = 0;
+ continue;
+ }
+ /* set search list */
+ if (MATCH(buf, "search")) {
+ if (haveenv) /*%< skip if have from environ */
+ continue;
+ cp = buf + sizeof("search") - 1;
+ while (*cp == ' ' || *cp == '\t')
+ cp++;
+ if ((*cp == '\0') || (*cp == '\n'))
+ continue;
+ strncpy(statp->defdname, cp, sizeof(statp->defdname) - 1);
+ statp->defdname[sizeof(statp->defdname) - 1] = '\0';
+ if ((cp = strchr(statp->defdname, '\n')) != NULL)
+ *cp = '\0';
+ /*
+ * Set search list to be blank-separated strings
+ * on rest of line.
+ */
+ cp = statp->defdname;
+ pp = statp->dnsrch;
+ *pp++ = cp;
+ for (n = 0; *cp && pp < statp->dnsrch + MAXDNSRCH; cp++) {
+ if (*cp == ' ' || *cp == '\t') {
+ *cp = 0;
+ n = 1;
+ } else if (n) {
+ *pp++ = cp;
+ n = 0;
+ }
+ }
+ /* null terminate last domain if there are excess */
+ while (*cp != '\0' && *cp != ' ' && *cp != '\t')
+ cp++;
+ *cp = '\0';
+ *pp++ = 0;
+ havesearch = 1;
+ continue;
+ }
+ /* read nameservers to query */
+ if (MATCH(buf, "nameserver") && nserv < maxns) {
+ struct addrinfo hints, *ai;
+ char sbuf[NI_MAXSERV];
+ const size_t minsiz =
+ sizeof(statp->_u._ext.ext->nsaddrs[0]);
+
+ cp = buf + sizeof("nameserver") - 1;
+ while (*cp == ' ' || *cp == '\t')
+ cp++;
+ cp[strcspn(cp, ";# \t\n")] = '\0';
+ if ((*cp != '\0') && (*cp != '\n')) {
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = PF_UNSPEC;
+ hints.ai_socktype = SOCK_DGRAM; /*dummy*/
+ hints.ai_flags = AI_NUMERICHOST;
+ sprintf(sbuf, "%u", NAMESERVER_PORT);
+ if (getaddrinfo(cp, sbuf, &hints, &ai) == 0 &&
+ ai->ai_addrlen <= minsiz) {
+ if (statp->_u._ext.ext != NULL) {
+ memcpy(&statp->_u._ext.ext->nsaddrs[nserv],
+ ai->ai_addr, ai->ai_addrlen);
+ }
+ if (ai->ai_addrlen <=
+ sizeof(statp->nsaddr_list[nserv])) {
+ memcpy(&statp->nsaddr_list[nserv],
+ ai->ai_addr, ai->ai_addrlen);
+ } else
+ statp->nsaddr_list[nserv].sin_family = 0;
+ freeaddrinfo(ai);
+ nserv++;
+ }
+ }
+ continue;
+ }
+#ifdef RESOLVSORT
+ if (MATCH(buf, "sortlist")) {
+ struct in_addr a;
+
+ cp = buf + sizeof("sortlist") - 1;
+ while (nsort < MAXRESOLVSORT) {
+ while (*cp == ' ' || *cp == '\t')
+ cp++;
+ if (*cp == '\0' || *cp == '\n' || *cp == ';')
+ break;
+ net = cp;
+ while (*cp && !ISSORTMASK(*cp) && *cp != ';' &&
+ isascii(*cp) && !isspace((unsigned char)*cp))
+ cp++;
+ n = *cp;
+ *cp = 0;
+ if (inet_aton(net, &a)) {
+ statp->sort_list[nsort].addr = a;
+ if (ISSORTMASK(n)) {
+ *cp++ = n;
+ net = cp;
+ while (*cp && *cp != ';' &&
+ isascii(*cp) &&
+ !isspace((unsigned char)*cp))
+ cp++;
+ n = *cp;
+ *cp = 0;
+ if (inet_aton(net, &a)) {
+ statp->sort_list[nsort].mask = a.s_addr;
+ } else {
+ statp->sort_list[nsort].mask =
+ net_mask(statp->sort_list[nsort].addr);
+ }
+ } else {
+ statp->sort_list[nsort].mask =
+ net_mask(statp->sort_list[nsort].addr);
+ }
+ nsort++;
+ }
+ *cp = n;
+ }
+ continue;
+ }
+#endif
+ if (MATCH(buf, "options")) {
+ res_setoptions(statp, buf + sizeof("options") - 1, "conf");
+ continue;
+ }
+ }
+ if (nserv > 0)
+ statp->nscount = nserv;
+#ifdef RESOLVSORT
+ statp->nsort = nsort;
+#endif
+ (void) fclose(fp);
+ }
+/*
+ * Last chance to get a nameserver. This should not normally
+ * be necessary
+ */
+#ifdef NO_RESOLV_CONF
+ if(nserv == 0)
+ nserv = get_nameservers(statp);
+#endif
+
+ if (statp->defdname[0] == 0 &&
+ gethostname(buf, sizeof(statp->defdname) - 1) == 0 &&
+ (cp = strchr(buf, '.')) != NULL)
+ strcpy(statp->defdname, cp + 1);
+
+ /* find components of local domain that might be searched */
+ if (havesearch == 0) {
+ pp = statp->dnsrch;
+ *pp++ = statp->defdname;
+ *pp = NULL;
+
+ dots = 0;
+ for (cp = statp->defdname; *cp; cp++)
+ dots += (*cp == '.');
+
+ cp = statp->defdname;
+ while (pp < statp->dnsrch + MAXDFLSRCH) {
+ if (dots < LOCALDOMAINPARTS)
+ break;
+ cp = strchr(cp, '.') + 1; /*%< we know there is one */
+ *pp++ = cp;
+ dots--;
+ }
+ *pp = NULL;
+#ifdef DEBUG
+ if (statp->options & RES_DEBUG) {
+ printf(";; res_init()... default dnsrch list:\n");
+ for (pp = statp->dnsrch; *pp; pp++)
+ printf(";;\t%s\n", *pp);
+ printf(";;\t..END..\n");
+ }
+#endif
+ }
+
+ if ((cp = getenv("RES_OPTIONS")) != NULL)
+ res_setoptions(statp, cp, "env");
+ statp->options |= RES_INIT;
+ return (statp->res_h_errno);
+#ifdef SUNW_INITCHKIF
+freedata:
+ RES_SET_H_ERRNO(statp, NETDB_INTERNAL);
+ if (statp->_u._ext.ext != NULL) {
+ free(statp->_u._ext.ext);
+ statp->_u._ext.ext = NULL;
+ }
+ return (-1);
+#endif /* SUNW_INITCHKIF */
+
+}
+
+static void
+res_setoptions(res_state statp, const char *options, const char *source)
+{
+ const char *cp = options;
+ int i;
+ struct __res_state_ext *ext = statp->_u._ext.ext;
+
+#ifdef DEBUG
+ if (statp->options & RES_DEBUG)
+ printf(";; res_setoptions(\"%s\", \"%s\")...\n",
+ options, source);
+#endif
+ while (*cp) {
+ /* skip leading and inner runs of spaces */
+ while (*cp == ' ' || *cp == '\t')
+ cp++;
+ /* search for and process individual options */
+ if (!strncmp(cp, "ndots:", sizeof("ndots:") - 1)) {
+ i = atoi(cp + sizeof("ndots:") - 1);
+ if (i <= RES_MAXNDOTS)
+ statp->ndots = i;
+ else
+ statp->ndots = RES_MAXNDOTS;
+#ifdef DEBUG
+ if (statp->options & RES_DEBUG)
+ printf(";;\tndots=%d\n", statp->ndots);
+#endif
+ } else if (!strncmp(cp, "timeout:", sizeof("timeout:") - 1)) {
+ i = atoi(cp + sizeof("timeout:") - 1);
+ if (i <= RES_MAXRETRANS)
+ statp->retrans = i;
+ else
+ statp->retrans = RES_MAXRETRANS;
+#ifdef DEBUG
+ if (statp->options & RES_DEBUG)
+ printf(";;\ttimeout=%d\n", statp->retrans);
+#endif
+#ifdef SOLARIS2
+ } else if (!strncmp(cp, "retrans:", sizeof("retrans:") - 1)) {
+ /*
+ * For backward compatibility, 'retrans' is
+ * supported as an alias for 'timeout', though
+ * without an imposed maximum.
+ */
+ statp->retrans = atoi(cp + sizeof("retrans:") - 1);
+ } else if (!strncmp(cp, "retry:", sizeof("retry:") - 1)){
+ /*
+ * For backward compatibility, 'retry' is
+ * supported as an alias for 'attempts', though
+ * without an imposed maximum.
+ */
+ statp->retry = atoi(cp + sizeof("retry:") - 1);
+#endif /* SOLARIS2 */
+ } else if (!strncmp(cp, "attempts:", sizeof("attempts:") - 1)){
+ i = atoi(cp + sizeof("attempts:") - 1);
+ if (i <= RES_MAXRETRY)
+ statp->retry = i;
+ else
+ statp->retry = RES_MAXRETRY;
+#ifdef DEBUG
+ if (statp->options & RES_DEBUG)
+ printf(";;\tattempts=%d\n", statp->retry);
+#endif
+ } else if (!strncmp(cp, "debug", sizeof("debug") - 1)) {
+#ifdef DEBUG
+ if (!(statp->options & RES_DEBUG)) {
+ printf(";; res_setoptions(\"%s\", \"%s\")..\n",
+ options, source);
+ statp->options |= RES_DEBUG;
+ }
+ printf(";;\tdebug\n");
+#endif
+ } else if (!strncmp(cp, "no_tld_query",
+ sizeof("no_tld_query") - 1) ||
+ !strncmp(cp, "no-tld-query",
+ sizeof("no-tld-query") - 1)) {
+ statp->options |= RES_NOTLDQUERY;
+ } else if (!strncmp(cp, "inet6", sizeof("inet6") - 1)) {
+ statp->options |= RES_USE_INET6;
+ } else if (!strncmp(cp, "rotate", sizeof("rotate") - 1)) {
+ statp->options |= RES_ROTATE;
+ } else if (!strncmp(cp, "no-check-names",
+ sizeof("no-check-names") - 1)) {
+ statp->options |= RES_NOCHECKNAME;
+ }
+#ifdef RES_USE_EDNS0
+ else if (!strncmp(cp, "edns0", sizeof("edns0") - 1)) {
+ statp->options |= RES_USE_EDNS0;
+ }
+#endif
+ else if (!strncmp(cp, "dname", sizeof("dname") - 1)) {
+ statp->options |= RES_USE_DNAME;
+ }
+ else if (!strncmp(cp, "nibble:", sizeof("nibble:") - 1)) {
+ if (ext == NULL)
+ goto skip;
+ cp += sizeof("nibble:") - 1;
+ i = MIN(strcspn(cp, " \t"), sizeof(ext->nsuffix) - 1);
+ strncpy(ext->nsuffix, cp, i);
+ ext->nsuffix[i] = '\0';
+ }
+ else if (!strncmp(cp, "nibble2:", sizeof("nibble2:") - 1)) {
+ if (ext == NULL)
+ goto skip;
+ cp += sizeof("nibble2:") - 1;
+ i = MIN(strcspn(cp, " \t"), sizeof(ext->nsuffix2) - 1);
+ strncpy(ext->nsuffix2, cp, i);
+ ext->nsuffix2[i] = '\0';
+ }
+ else if (!strncmp(cp, "v6revmode:", sizeof("v6revmode:") - 1)) {
+ cp += sizeof("v6revmode:") - 1;
+ /* "nibble" and "bitstring" used to be valid */
+ if (!strncmp(cp, "single", sizeof("single") - 1)) {
+ statp->options |= RES_NO_NIBBLE2;
+ } else if (!strncmp(cp, "both", sizeof("both") - 1)) {
+ statp->options &=
+ ~RES_NO_NIBBLE2;
+ }
+ }
+ else {
+ /* XXX - print a warning here? */
+ }
+ skip:
+ /* skip to next run of spaces */
+ while (*cp && *cp != ' ' && *cp != '\t')
+ cp++;
+ }
+}
+
+#ifdef RESOLVSORT
+/* XXX - should really support CIDR which means explicit masks always. */
+static u_int32_t
+net_mask(in) /*!< XXX - should really use system's version of this */
+ struct in_addr in;
+{
+ register u_int32_t i = ntohl(in.s_addr);
+
+ if (IN_CLASSA(i))
+ return (htonl(IN_CLASSA_NET));
+ else if (IN_CLASSB(i))
+ return (htonl(IN_CLASSB_NET));
+ return (htonl(IN_CLASSC_NET));
+}
+#endif
+
+void
+res_rndinit(res_state statp)
+{
+ struct timeval now;
+ u_int32_t u32;
+ u_int16_t u16;
+
+ gettimeofday(&now, NULL);
+ u32 = now.tv_sec;
+ memcpy(statp->_u._ext._rnd, &u32, 4);
+ u32 = now.tv_usec;
+ memcpy(statp->_u._ext._rnd + 4, &u32, 4);
+ u32 += now.tv_sec;
+ memcpy(statp->_u._ext._rnd + 8, &u32, 4);
+ u16 = getpid();
+ memcpy(statp->_u._ext._rnd + 12, &u16, 2);
+
+}
+
+u_int
+res_nrandomid(res_state statp) {
+ struct timeval now;
+ u_int16_t u16;
+ MD5_CTX ctx;
+
+ gettimeofday(&now, NULL);
+ u16 = (u_int16_t) (now.tv_sec ^ now.tv_usec);
+
+ memcpy(statp->_u._ext._rnd + 14, &u16, 2);
+#ifndef HAVE_MD5
+ MD5_Init(&ctx);
+ MD5_Update(&ctx, statp->_u._ext._rnd, 16);
+ MD5_Final(statp->_u._ext._rnd, &ctx);
+#else
+ MD5Init(&ctx);
+ MD5Update(&ctx, statp->_u._ext._rnd, 16);
+ MD5Final(statp->_u._ext._rnd, &ctx);
+#endif
+ memcpy(&u16, statp->_u._ext._rnd + 14, 2);
+ return ((u_int) u16);
+}
+
+/*%
+ * This routine is for closing the socket if a virtual circuit is used and
+ * the program wants to close it. This provides support for endhostent()
+ * which expects to close the socket.
+ *
+ * This routine is not expected to be user visible.
+ */
+void
+res_nclose(res_state statp) {
+ int ns;
+
+ if (statp->_vcsock >= 0) {
+ (void) close(statp->_vcsock);
+ statp->_vcsock = -1;
+ statp->_flags &= ~(RES_F_VC | RES_F_CONN);
+ }
+ for (ns = 0; ns < statp->_u._ext.nscount; ns++) {
+ if (statp->_u._ext.nssocks[ns] != -1) {
+ (void) close(statp->_u._ext.nssocks[ns]);
+ statp->_u._ext.nssocks[ns] = -1;
+ }
+ }
+}
+
+void
+res_ndestroy(res_state statp) {
+ res_nclose(statp);
+ if (statp->_u._ext.ext != NULL)
+ free(statp->_u._ext.ext);
+ statp->options &= ~RES_INIT;
+ statp->_u._ext.ext = NULL;
+}
+
+const char *
+res_get_nibblesuffix(res_state statp) {
+ if (statp->_u._ext.ext)
+ return (statp->_u._ext.ext->nsuffix);
+ return ("ip6.arpa");
+}
+
+const char *
+res_get_nibblesuffix2(res_state statp) {
+ if (statp->_u._ext.ext)
+ return (statp->_u._ext.ext->nsuffix2);
+ return ("ip6.int");
+}
+
+void
+res_setservers(res_state statp, const union res_sockaddr_union *set, int cnt) {
+ int i, nserv;
+ size_t size;
+
+ /* close open servers */
+ res_nclose(statp);
+
+ /* cause rtt times to be forgotten */
+ statp->_u._ext.nscount = 0;
+
+ nserv = 0;
+ for (i = 0; i < cnt && nserv < MAXNS; i++) {
+ switch (set->sin.sin_family) {
+ case AF_INET:
+ size = sizeof(set->sin);
+ if (statp->_u._ext.ext)
+ memcpy(&statp->_u._ext.ext->nsaddrs[nserv],
+ &set->sin, size);
+ if (size <= sizeof(statp->nsaddr_list[nserv]))
+ memcpy(&statp->nsaddr_list[nserv],
+ &set->sin, size);
+ else
+ statp->nsaddr_list[nserv].sin_family = 0;
+ nserv++;
+ break;
+
+#ifdef HAS_INET6_STRUCTS
+ case AF_INET6:
+ size = sizeof(set->sin6);
+ if (statp->_u._ext.ext)
+ memcpy(&statp->_u._ext.ext->nsaddrs[nserv],
+ &set->sin6, size);
+ if (size <= sizeof(statp->nsaddr_list[nserv]))
+ memcpy(&statp->nsaddr_list[nserv],
+ &set->sin6, size);
+ else
+ statp->nsaddr_list[nserv].sin_family = 0;
+ nserv++;
+ break;
+#endif
+
+ default:
+ break;
+ }
+ set++;
+ }
+ statp->nscount = nserv;
+
+}
+
+int
+res_getservers(res_state statp, union res_sockaddr_union *set, int cnt) {
+ int i;
+ size_t size;
+ u_int16_t family;
+
+ for (i = 0; i < statp->nscount && i < cnt; i++) {
+ if (statp->_u._ext.ext)
+ family = statp->_u._ext.ext->nsaddrs[i].sin.sin_family;
+ else
+ family = statp->nsaddr_list[i].sin_family;
+
+ switch (family) {
+ case AF_INET:
+ size = sizeof(set->sin);
+ if (statp->_u._ext.ext)
+ memcpy(&set->sin,
+ &statp->_u._ext.ext->nsaddrs[i],
+ size);
+ else
+ memcpy(&set->sin, &statp->nsaddr_list[i],
+ size);
+ break;
+
+#ifdef HAS_INET6_STRUCTS
+ case AF_INET6:
+ size = sizeof(set->sin6);
+ if (statp->_u._ext.ext)
+ memcpy(&set->sin6,
+ &statp->_u._ext.ext->nsaddrs[i],
+ size);
+ else
+ memcpy(&set->sin6, &statp->nsaddr_list[i],
+ size);
+ break;
+#endif
+
+ default:
+ set->sin.sin_family = 0;
+ break;
+ }
+ set++;
+ }
+ return (statp->nscount);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/res_mkquery.c b/usr/src/lib/libresolv2_joy/common/resolv/res_mkquery.c
new file mode 100644
index 0000000000..cf36855e9d
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/res_mkquery.c
@@ -0,0 +1,386 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/*
+ * Portions Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (C) 1996, 1997, 1988, 1999, 2001, 2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Copyright (c) 1985, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Portions Copyright (c) 1993 by Digital Equipment Corporation.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies, and that
+ * the name of Digital Equipment Corporation not be used in advertising or
+ * publicity pertaining to distribution of the document or software without
+ * specific, written prior permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
+ * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)res_mkquery.c 8.1 (Berkeley) 6/4/93";
+static const char rcsid[] = "$Id: res_mkquery.c,v 1.10 2008/12/11 09:59:00 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include "port_before.h"
+#include <sys/types.h>
+#include <sys/param.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+
+#ifdef SUNW_CONFCHECK
+#include <sys/socket.h>
+#include <errno.h>
+#include <sys/stat.h>
+#endif /* SUNW_CONFCHECK */
+
+
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <string.h>
+#include "port_after.h"
+
+/* Options. Leave them on. */
+#define DEBUG
+
+extern const char *_res_opcodes[];
+
+#ifdef SUNW_CONFCHECK
+static int _confcheck(res_state statp);
+#endif /* SUNW_CONFCHECK */
+
+
+/*%
+ * Form all types of queries.
+ * Returns the size of the result or -1.
+ */
+int
+res_nmkquery(res_state statp,
+ int op, /*!< opcode of query */
+ const char *dname, /*!< domain name */
+ int class, int type, /*!< class and type of query */
+ const u_char *data, /*!< resource record data */
+ int datalen, /*!< length of data */
+ const u_char *newrr_in, /*!< new rr for modify or append */
+ u_char *buf, /*!< buffer to put query */
+ int buflen) /*!< size of buffer */
+{
+ register HEADER *hp;
+ register u_char *cp, *ep;
+ register int n;
+ u_char *dnptrs[20], **dpp, **lastdnptr;
+
+ UNUSED(newrr_in);
+
+#ifdef DEBUG
+ if (statp->options & RES_DEBUG)
+ printf(";; res_nmkquery(%s, %s, %s, %s)\n",
+ _res_opcodes[op], dname, p_class(class), p_type(type));
+#endif
+
+#ifdef SUNW_CONFCHECK
+ /*
+ * 1247019, 1265838, and 4034368: Check to see if we can
+ * bailout quickly.
+ */
+ if (_confcheck(statp) == -1) {
+ RES_SET_H_ERRNO(statp, NO_RECOVERY);
+ return(-1);
+ }
+#endif /* SUNW_CONFCHECK */
+
+ /*
+ * Initialize header fields.
+ */
+ if ((buf == NULL) || (buflen < HFIXEDSZ))
+ return (-1);
+ memset(buf, 0, HFIXEDSZ);
+ hp = (HEADER *) buf;
+ statp->id = res_nrandomid(statp);
+ hp->id = htons(statp->id);
+ hp->opcode = op;
+ hp->rd = (statp->options & RES_RECURSE) != 0U;
+ hp->rcode = NOERROR;
+ cp = buf + HFIXEDSZ;
+ ep = buf + buflen;
+ dpp = dnptrs;
+ *dpp++ = buf;
+ *dpp++ = NULL;
+ lastdnptr = dnptrs + sizeof dnptrs / sizeof dnptrs[0];
+ /*
+ * perform opcode specific processing
+ */
+ switch (op) {
+ case QUERY: /*FALLTHROUGH*/
+ case NS_NOTIFY_OP:
+ if (ep - cp < QFIXEDSZ)
+ return (-1);
+ if ((n = dn_comp(dname, cp, ep - cp - QFIXEDSZ, dnptrs,
+ lastdnptr)) < 0)
+ return (-1);
+ cp += n;
+ ns_put16(type, cp);
+ cp += INT16SZ;
+ ns_put16(class, cp);
+ cp += INT16SZ;
+ hp->qdcount = htons(1);
+ if (op == QUERY || data == NULL)
+ break;
+ /*
+ * Make an additional record for completion domain.
+ */
+ if ((ep - cp) < RRFIXEDSZ)
+ return (-1);
+ n = dn_comp((const char *)data, cp, ep - cp - RRFIXEDSZ,
+ dnptrs, lastdnptr);
+ if (n < 0)
+ return (-1);
+ cp += n;
+ ns_put16(T_NULL, cp);
+ cp += INT16SZ;
+ ns_put16(class, cp);
+ cp += INT16SZ;
+ ns_put32(0, cp);
+ cp += INT32SZ;
+ ns_put16(0, cp);
+ cp += INT16SZ;
+ hp->arcount = htons(1);
+ break;
+
+ case IQUERY:
+ /*
+ * Initialize answer section
+ */
+ if (ep - cp < 1 + RRFIXEDSZ + datalen)
+ return (-1);
+ *cp++ = '\0'; /*%< no domain name */
+ ns_put16(type, cp);
+ cp += INT16SZ;
+ ns_put16(class, cp);
+ cp += INT16SZ;
+ ns_put32(0, cp);
+ cp += INT32SZ;
+ ns_put16(datalen, cp);
+ cp += INT16SZ;
+ if (datalen) {
+ memcpy(cp, data, datalen);
+ cp += datalen;
+ }
+ hp->ancount = htons(1);
+ break;
+
+ default:
+ return (-1);
+ }
+ return (cp - buf);
+}
+
+#ifdef RES_USE_EDNS0
+/* attach OPT pseudo-RR, as documented in RFC2671 (EDNS0). */
+
+int
+res_nopt(res_state statp,
+ int n0, /*%< current offset in buffer */
+ u_char *buf, /*%< buffer to put query */
+ int buflen, /*%< size of buffer */
+ int anslen) /*%< UDP answer buffer size */
+{
+ register HEADER *hp;
+ register u_char *cp, *ep;
+ u_int16_t flags = 0;
+
+#ifdef DEBUG
+ if ((statp->options & RES_DEBUG) != 0U)
+ printf(";; res_nopt()\n");
+#endif
+
+ hp = (HEADER *) buf;
+ cp = buf + n0;
+ ep = buf + buflen;
+
+ if ((ep - cp) < 1 + RRFIXEDSZ)
+ return (-1);
+
+ *cp++ = 0; /*%< "." */
+ ns_put16(ns_t_opt, cp); /*%< TYPE */
+ cp += INT16SZ;
+ ns_put16(anslen & 0xffff, cp); /*%< CLASS = UDP payload size */
+ cp += INT16SZ;
+ *cp++ = NOERROR; /*%< extended RCODE */
+ *cp++ = 0; /*%< EDNS version */
+
+ if (statp->options & RES_USE_DNSSEC) {
+#ifdef DEBUG
+ if (statp->options & RES_DEBUG)
+ printf(";; res_opt()... ENDS0 DNSSEC\n");
+#endif
+ flags |= NS_OPT_DNSSEC_OK;
+ }
+ ns_put16(flags, cp);
+ cp += INT16SZ;
+
+ ns_put16(0U, cp); /*%< RDLEN */
+ cp += INT16SZ;
+
+ hp->arcount = htons(ntohs(hp->arcount) + 1);
+
+ return (cp - buf);
+}
+
+/*
+ * Construct variable data (RDATA) block for OPT psuedo-RR, append it
+ * to the buffer, then update the RDLEN field (previously set to zero by
+ * res_nopt()) with the new RDATA length.
+ */
+int
+res_nopt_rdata(res_state statp,
+ int n0, /*%< current offset in buffer */
+ u_char *buf, /*%< buffer to put query */
+ int buflen, /*%< size of buffer */
+ u_char *rdata, /*%< ptr to start of opt rdata */
+ u_short code, /*%< OPTION-CODE */
+ u_short len, /*%< OPTION-LENGTH */
+ u_char *data) /*%< OPTION_DATA */
+{
+ register u_char *cp, *ep;
+
+#ifdef DEBUG
+ if ((statp->options & RES_DEBUG) != 0U)
+ printf(";; res_nopt_rdata()\n");
+#endif
+
+ cp = buf + n0;
+ ep = buf + buflen;
+
+ if ((ep - cp) < (4 + len))
+ return (-1);
+
+ if (rdata < (buf + 2) || rdata >= ep)
+ return (-1);
+
+ ns_put16(code, cp);
+ cp += INT16SZ;
+
+ ns_put16(len, cp);
+ cp += INT16SZ;
+
+ memcpy(cp, data, len);
+ cp += len;
+
+ len = cp - rdata;
+ ns_put16(len, rdata - 2); /* Update RDLEN field */
+
+ return (cp - buf);
+}
+#endif
+
+#ifdef SUNW_CONFCHECK
+
+/*
+ * Time out quickly if there is no /etc/resolv.conf and a TCP connection
+ * to the local DNS server fails.
+ */
+static int _confcheck(res_state statp)
+{
+ int ns;
+ struct stat rc_stat;
+ struct sockaddr_in ns_sin;
+
+ /* First, we check to see if /etc/resolv.conf exists.
+ * If it doesn't, then it is likely that the localhost is
+ * the nameserver.
+ */
+ if (stat(_PATH_RESCONF, &rc_stat) == -1 && errno == ENOENT) {
+
+ /* Next, we check to see if _res.nsaddr is set to loopback.
+ * If it isn't, it has been altered by the application
+ * explicitly and we then want to bail with success.
+ */
+ if (statp->nsaddr.sin_addr.S_un.S_addr ==
+ htonl(INADDR_LOOPBACK)) {
+
+ /* Lastly, we try to connect to the TCP port of the
+ * nameserver. If this fails, then we know that
+ * DNS is misconfigured and we can quickly exit.
+ */
+ ns = socket(AF_INET, SOCK_STREAM, 0);
+ IN_SET_LOOPBACK_ADDR(&ns_sin);
+ ns_sin.sin_port = htons(NAMESERVER_PORT);
+ if (connect(ns, (struct sockaddr *) &ns_sin,
+ sizeof ns_sin) == -1) {
+ close(ns);
+ return(-1);
+ }
+ else {
+ close(ns);
+
+ return(0);
+ }
+ }
+
+ return(0);
+ }
+
+ return (0);
+}
+#endif /* SUNW_CONFCHECK */
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/res_mkupdate.c b/usr/src/lib/libresolv2_joy/common/resolv/res_mkupdate.c
new file mode 100644
index 0000000000..8f73e281d0
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/res_mkupdate.c
@@ -0,0 +1,1163 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*! \file
+ * \brief
+ * Based on the Dynamic DNS reference implementation by Viraj Bais
+ * &lt;viraj_bais@ccm.fm.intel.com>
+ */
+
+#if !defined(lint) && !defined(SABER)
+static const char rcsid[] = "$Id: res_mkupdate.c,v 1.10 2008/12/11 09:59:00 marka Exp $";
+#endif /* not lint */
+
+#include "port_before.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <arpa/inet.h>
+
+#include <errno.h>
+#include <limits.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <res_update.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <ctype.h>
+
+#include "port_after.h"
+
+/* Options. Leave them on. */
+#define DEBUG
+#define MAXPORT 1024
+
+static int getnum_str(u_char **, u_char *);
+static int gethexnum_str(u_char **, u_char *);
+static int getword_str(char *, int, u_char **, u_char *);
+static int getstr_str(char *, int, u_char **, u_char *);
+
+#define ShrinkBuffer(x) if ((buflen -= x) < 0) return (-2);
+
+/* Forward. */
+
+int res_protocolnumber(const char *);
+int res_servicenumber(const char *);
+
+/*%
+ * Form update packets.
+ * Returns the size of the resulting packet if no error
+ *
+ * On error,
+ * returns
+ *\li -1 if error in reading a word/number in rdata
+ * portion for update packets
+ *\li -2 if length of buffer passed is insufficient
+ *\li -3 if zone section is not the first section in
+ * the linked list, or section order has a problem
+ *\li -4 on a number overflow
+ *\li -5 unknown operation or no records
+ */
+int
+res_nmkupdate(res_state statp, ns_updrec *rrecp_in, u_char *buf, int buflen) {
+ ns_updrec *rrecp_start = rrecp_in;
+ HEADER *hp;
+ u_char *cp, *sp2, *startp, *endp;
+ int n, i, soanum, multiline;
+ ns_updrec *rrecp;
+ struct in_addr ina;
+ struct in6_addr in6a;
+ char buf2[MAXDNAME];
+ u_char buf3[MAXDNAME];
+ int section, numrrs = 0, counts[ns_s_max];
+ u_int16_t rtype, rclass;
+ u_int32_t n1, rttl;
+ u_char *dnptrs[20], **dpp, **lastdnptr;
+ int siglen, keylen, certlen;
+
+ /*
+ * Initialize header fields.
+ */
+ if ((buf == NULL) || (buflen < HFIXEDSZ))
+ return (-1);
+ memset(buf, 0, HFIXEDSZ);
+ hp = (HEADER *) buf;
+ statp->id = res_nrandomid(statp);
+ hp->id = htons(statp->id);
+ hp->opcode = ns_o_update;
+ hp->rcode = NOERROR;
+ cp = buf + HFIXEDSZ;
+ buflen -= HFIXEDSZ;
+ dpp = dnptrs;
+ *dpp++ = buf;
+ *dpp++ = NULL;
+ lastdnptr = dnptrs + sizeof dnptrs / sizeof dnptrs[0];
+
+ if (rrecp_start == NULL)
+ return (-5);
+ else if (rrecp_start->r_section != S_ZONE)
+ return (-3);
+
+ memset(counts, 0, sizeof counts);
+ for (rrecp = rrecp_start; rrecp; rrecp = NEXT(rrecp, r_glink)) {
+ numrrs++;
+ section = rrecp->r_section;
+ if (section < 0 || section >= ns_s_max)
+ return (-1);
+ counts[section]++;
+ for (i = section + 1; i < ns_s_max; i++)
+ if (counts[i])
+ return (-3);
+ rtype = rrecp->r_type;
+ rclass = rrecp->r_class;
+ rttl = rrecp->r_ttl;
+ /* overload class and type */
+ if (section == S_PREREQ) {
+ rttl = 0;
+ switch (rrecp->r_opcode) {
+ case YXDOMAIN:
+ rclass = C_ANY;
+ rtype = T_ANY;
+ rrecp->r_size = 0;
+ break;
+ case NXDOMAIN:
+ rclass = C_NONE;
+ rtype = T_ANY;
+ rrecp->r_size = 0;
+ break;
+ case NXRRSET:
+ rclass = C_NONE;
+ rrecp->r_size = 0;
+ break;
+ case YXRRSET:
+ if (rrecp->r_size == 0)
+ rclass = C_ANY;
+ break;
+ default:
+ fprintf(stderr,
+ "res_mkupdate: incorrect opcode: %d\n",
+ rrecp->r_opcode);
+ fflush(stderr);
+ return (-1);
+ }
+ } else if (section == S_UPDATE) {
+ switch (rrecp->r_opcode) {
+ case DELETE:
+ rclass = rrecp->r_size == 0 ? C_ANY : C_NONE;
+ break;
+ case ADD:
+ break;
+ default:
+ fprintf(stderr,
+ "res_mkupdate: incorrect opcode: %d\n",
+ rrecp->r_opcode);
+ fflush(stderr);
+ return (-1);
+ }
+ }
+
+ /*
+ * XXX appending default domain to owner name is omitted,
+ * fqdn must be provided
+ */
+ if ((n = dn_comp(rrecp->r_dname, cp, buflen, dnptrs,
+ lastdnptr)) < 0)
+ return (-1);
+ cp += n;
+ ShrinkBuffer(n + 2*INT16SZ);
+ PUTSHORT(rtype, cp);
+ PUTSHORT(rclass, cp);
+ if (section == S_ZONE) {
+ if (numrrs != 1 || rrecp->r_type != T_SOA)
+ return (-3);
+ continue;
+ }
+ ShrinkBuffer(INT32SZ + INT16SZ);
+ PUTLONG(rttl, cp);
+ sp2 = cp; /*%< save pointer to length byte */
+ cp += INT16SZ;
+ if (rrecp->r_size == 0) {
+ if (section == S_UPDATE && rclass != C_ANY)
+ return (-1);
+ else {
+ PUTSHORT(0, sp2);
+ continue;
+ }
+ }
+ startp = rrecp->r_data;
+ endp = startp + rrecp->r_size - 1;
+ /* XXX this should be done centrally. */
+ switch (rrecp->r_type) {
+ case T_A:
+ if (!getword_str(buf2, sizeof buf2, &startp, endp))
+ return (-1);
+ if (!inet_aton(buf2, &ina))
+ return (-1);
+ n1 = ntohl(ina.s_addr);
+ ShrinkBuffer(INT32SZ);
+ PUTLONG(n1, cp);
+ break;
+ case T_CNAME:
+ case T_MB:
+ case T_MG:
+ case T_MR:
+ case T_NS:
+ case T_PTR:
+ case ns_t_dname:
+ if (!getword_str(buf2, sizeof buf2, &startp, endp))
+ return (-1);
+ n = dn_comp(buf2, cp, buflen, dnptrs, lastdnptr);
+ if (n < 0)
+ return (-1);
+ cp += n;
+ ShrinkBuffer(n);
+ break;
+ case T_MINFO:
+ case T_SOA:
+ case T_RP:
+ for (i = 0; i < 2; i++) {
+ if (!getword_str(buf2, sizeof buf2, &startp,
+ endp))
+ return (-1);
+ n = dn_comp(buf2, cp, buflen,
+ dnptrs, lastdnptr);
+ if (n < 0)
+ return (-1);
+ cp += n;
+ ShrinkBuffer(n);
+ }
+ if (rrecp->r_type == T_SOA) {
+ ShrinkBuffer(5 * INT32SZ);
+ while (isspace(*startp) || !*startp)
+ startp++;
+ if (*startp == '(') {
+ multiline = 1;
+ startp++;
+ } else
+ multiline = 0;
+ /* serial, refresh, retry, expire, minimum */
+ for (i = 0; i < 5; i++) {
+ soanum = getnum_str(&startp, endp);
+ if (soanum < 0)
+ return (-1);
+ PUTLONG(soanum, cp);
+ }
+ if (multiline) {
+ while (isspace(*startp) || !*startp)
+ startp++;
+ if (*startp != ')')
+ return (-1);
+ }
+ }
+ break;
+ case T_MX:
+ case T_AFSDB:
+ case T_RT:
+ n = getnum_str(&startp, endp);
+ if (n < 0)
+ return (-1);
+ ShrinkBuffer(INT16SZ);
+ PUTSHORT(n, cp);
+ if (!getword_str(buf2, sizeof buf2, &startp, endp))
+ return (-1);
+ n = dn_comp(buf2, cp, buflen, dnptrs, lastdnptr);
+ if (n < 0)
+ return (-1);
+ cp += n;
+ ShrinkBuffer(n);
+ break;
+ case T_SRV:
+ n = getnum_str(&startp, endp);
+ if (n < 0)
+ return (-1);
+ ShrinkBuffer(INT16SZ);
+ PUTSHORT(n, cp);
+
+ n = getnum_str(&startp, endp);
+ if (n < 0)
+ return (-1);
+ ShrinkBuffer(INT16SZ);
+ PUTSHORT(n, cp);
+
+ n = getnum_str(&startp, endp);
+ if (n < 0)
+ return (-1);
+ ShrinkBuffer(INT16SZ);
+ PUTSHORT(n, cp);
+
+ if (!getword_str(buf2, sizeof buf2, &startp, endp))
+ return (-1);
+ n = dn_comp(buf2, cp, buflen, NULL, NULL);
+ if (n < 0)
+ return (-1);
+ cp += n;
+ ShrinkBuffer(n);
+ break;
+ case T_PX:
+ n = getnum_str(&startp, endp);
+ if (n < 0)
+ return (-1);
+ PUTSHORT(n, cp);
+ ShrinkBuffer(INT16SZ);
+ for (i = 0; i < 2; i++) {
+ if (!getword_str(buf2, sizeof buf2, &startp,
+ endp))
+ return (-1);
+ n = dn_comp(buf2, cp, buflen, dnptrs,
+ lastdnptr);
+ if (n < 0)
+ return (-1);
+ cp += n;
+ ShrinkBuffer(n);
+ }
+ break;
+ case T_WKS: {
+ char bm[MAXPORT/8];
+ unsigned int maxbm = 0;
+
+ if (!getword_str(buf2, sizeof buf2, &startp, endp))
+ return (-1);
+ if (!inet_aton(buf2, &ina))
+ return (-1);
+ n1 = ntohl(ina.s_addr);
+ ShrinkBuffer(INT32SZ);
+ PUTLONG(n1, cp);
+
+ if (!getword_str(buf2, sizeof buf2, &startp, endp))
+ return (-1);
+ if ((i = res_protocolnumber(buf2)) < 0)
+ return (-1);
+ ShrinkBuffer(1);
+ *cp++ = i & 0xff;
+
+ for (i = 0; i < MAXPORT/8 ; i++)
+ bm[i] = 0;
+
+ while (getword_str(buf2, sizeof buf2, &startp, endp)) {
+ if ((n = res_servicenumber(buf2)) <= 0)
+ return (-1);
+
+ if (n < MAXPORT) {
+ bm[n/8] |= (0x80>>(n%8));
+ if ((unsigned)n > maxbm)
+ maxbm = n;
+ } else
+ return (-1);
+ }
+ maxbm = maxbm/8 + 1;
+ ShrinkBuffer(maxbm);
+ memcpy(cp, bm, maxbm);
+ cp += maxbm;
+ break;
+ }
+ case T_HINFO:
+ for (i = 0; i < 2; i++) {
+ if ((n = getstr_str(buf2, sizeof buf2,
+ &startp, endp)) < 0)
+ return (-1);
+ if (n > 255)
+ return (-1);
+ ShrinkBuffer(n+1);
+ *cp++ = n;
+ memcpy(cp, buf2, n);
+ cp += n;
+ }
+ break;
+ case T_TXT:
+ for (;;) {
+ if ((n = getstr_str(buf2, sizeof buf2,
+ &startp, endp)) < 0) {
+ if (cp != (sp2 + INT16SZ))
+ break;
+ return (-1);
+ }
+ if (n > 255)
+ return (-1);
+ ShrinkBuffer(n+1);
+ *cp++ = n;
+ memcpy(cp, buf2, n);
+ cp += n;
+ }
+ break;
+ case T_X25:
+ /* RFC1183 */
+ if ((n = getstr_str(buf2, sizeof buf2, &startp,
+ endp)) < 0)
+ return (-1);
+ if (n > 255)
+ return (-1);
+ ShrinkBuffer(n+1);
+ *cp++ = n;
+ memcpy(cp, buf2, n);
+ cp += n;
+ break;
+ case T_ISDN:
+ /* RFC1183 */
+ if ((n = getstr_str(buf2, sizeof buf2, &startp,
+ endp)) < 0)
+ return (-1);
+ if ((n > 255) || (n == 0))
+ return (-1);
+ ShrinkBuffer(n+1);
+ *cp++ = n;
+ memcpy(cp, buf2, n);
+ cp += n;
+ if ((n = getstr_str(buf2, sizeof buf2, &startp,
+ endp)) < 0)
+ n = 0;
+ if (n > 255)
+ return (-1);
+ ShrinkBuffer(n+1);
+ *cp++ = n;
+ memcpy(cp, buf2, n);
+ cp += n;
+ break;
+ case T_NSAP:
+ if ((n = inet_nsap_addr((char *)startp, (u_char *)buf2, sizeof(buf2))) != 0) {
+ ShrinkBuffer(n);
+ memcpy(cp, buf2, n);
+ cp += n;
+ } else {
+ return (-1);
+ }
+ break;
+ case T_LOC:
+ if ((n = loc_aton((char *)startp, (u_char *)buf2)) != 0) {
+ ShrinkBuffer(n);
+ memcpy(cp, buf2, n);
+ cp += n;
+ } else
+ return (-1);
+ break;
+ case ns_t_sig:
+ {
+ int sig_type, success, dateerror;
+ u_int32_t exptime, timesigned;
+
+ /* type */
+ if ((n = getword_str(buf2, sizeof buf2,
+ &startp, endp)) < 0)
+ return (-1);
+ sig_type = sym_ston(__p_type_syms, buf2, &success);
+ if (!success || sig_type == ns_t_any)
+ return (-1);
+ ShrinkBuffer(INT16SZ);
+ PUTSHORT(sig_type, cp);
+ /* alg */
+ n = getnum_str(&startp, endp);
+ if (n < 0)
+ return (-1);
+ ShrinkBuffer(1);
+ *cp++ = n;
+ /* labels */
+ n = getnum_str(&startp, endp);
+ if (n <= 0 || n > 255)
+ return (-1);
+ ShrinkBuffer(1);
+ *cp++ = n;
+ /* ottl & expire */
+ if (!getword_str(buf2, sizeof buf2, &startp, endp))
+ return (-1);
+ exptime = ns_datetosecs(buf2, &dateerror);
+ if (!dateerror) {
+ ShrinkBuffer(INT32SZ);
+ PUTLONG(rttl, cp);
+ }
+ else {
+ char *ulendp;
+ u_int32_t ottl;
+
+ errno = 0;
+ ottl = strtoul(buf2, &ulendp, 10);
+ if (errno != 0 ||
+ (ulendp != NULL && *ulendp != '\0'))
+ return (-1);
+ ShrinkBuffer(INT32SZ);
+ PUTLONG(ottl, cp);
+ if (!getword_str(buf2, sizeof buf2, &startp,
+ endp))
+ return (-1);
+ exptime = ns_datetosecs(buf2, &dateerror);
+ if (dateerror)
+ return (-1);
+ }
+ /* expire */
+ ShrinkBuffer(INT32SZ);
+ PUTLONG(exptime, cp);
+ /* timesigned */
+ if (!getword_str(buf2, sizeof buf2, &startp, endp))
+ return (-1);
+ timesigned = ns_datetosecs(buf2, &dateerror);
+ if (!dateerror) {
+ ShrinkBuffer(INT32SZ);
+ PUTLONG(timesigned, cp);
+ }
+ else
+ return (-1);
+ /* footprint */
+ n = getnum_str(&startp, endp);
+ if (n < 0)
+ return (-1);
+ ShrinkBuffer(INT16SZ);
+ PUTSHORT(n, cp);
+ /* signer name */
+ if (!getword_str(buf2, sizeof buf2, &startp, endp))
+ return (-1);
+ n = dn_comp(buf2, cp, buflen, dnptrs, lastdnptr);
+ if (n < 0)
+ return (-1);
+ cp += n;
+ ShrinkBuffer(n);
+ /* sig */
+ if ((n = getword_str(buf2, sizeof buf2,
+ &startp, endp)) < 0)
+ return (-1);
+ siglen = b64_pton(buf2, buf3, sizeof(buf3));
+ if (siglen < 0)
+ return (-1);
+ ShrinkBuffer(siglen);
+ memcpy(cp, buf3, siglen);
+ cp += siglen;
+ break;
+ }
+ case ns_t_key:
+ /* flags */
+ n = gethexnum_str(&startp, endp);
+ if (n < 0)
+ return (-1);
+ ShrinkBuffer(INT16SZ);
+ PUTSHORT(n, cp);
+ /* proto */
+ n = getnum_str(&startp, endp);
+ if (n < 0)
+ return (-1);
+ ShrinkBuffer(1);
+ *cp++ = n;
+ /* alg */
+ n = getnum_str(&startp, endp);
+ if (n < 0)
+ return (-1);
+ ShrinkBuffer(1);
+ *cp++ = n;
+ /* key */
+ if ((n = getword_str(buf2, sizeof buf2,
+ &startp, endp)) < 0)
+ return (-1);
+ keylen = b64_pton(buf2, buf3, sizeof(buf3));
+ if (keylen < 0)
+ return (-1);
+ ShrinkBuffer(keylen);
+ memcpy(cp, buf3, keylen);
+ cp += keylen;
+ break;
+ case ns_t_nxt:
+ {
+ int success, nxt_type;
+ u_char data[32];
+ int maxtype;
+
+ /* next name */
+ if (!getword_str(buf2, sizeof buf2, &startp, endp))
+ return (-1);
+ n = dn_comp(buf2, cp, buflen, NULL, NULL);
+ if (n < 0)
+ return (-1);
+ cp += n;
+ ShrinkBuffer(n);
+ maxtype = 0;
+ memset(data, 0, sizeof data);
+ for (;;) {
+ if (!getword_str(buf2, sizeof buf2, &startp,
+ endp))
+ break;
+ nxt_type = sym_ston(__p_type_syms, buf2,
+ &success);
+ if (!success || !ns_t_rr_p(nxt_type))
+ return (-1);
+ NS_NXT_BIT_SET(nxt_type, data);
+ if (nxt_type > maxtype)
+ maxtype = nxt_type;
+ }
+ n = maxtype/NS_NXT_BITS+1;
+ ShrinkBuffer(n);
+ memcpy(cp, data, n);
+ cp += n;
+ break;
+ }
+ case ns_t_cert:
+ /* type */
+ n = getnum_str(&startp, endp);
+ if (n < 0)
+ return (-1);
+ ShrinkBuffer(INT16SZ);
+ PUTSHORT(n, cp);
+ /* key tag */
+ n = getnum_str(&startp, endp);
+ if (n < 0)
+ return (-1);
+ ShrinkBuffer(INT16SZ);
+ PUTSHORT(n, cp);
+ /* alg */
+ n = getnum_str(&startp, endp);
+ if (n < 0)
+ return (-1);
+ ShrinkBuffer(1);
+ *cp++ = n;
+ /* cert */
+ if ((n = getword_str(buf2, sizeof buf2,
+ &startp, endp)) < 0)
+ return (-1);
+ certlen = b64_pton(buf2, buf3, sizeof(buf3));
+ if (certlen < 0)
+ return (-1);
+ ShrinkBuffer(certlen);
+ memcpy(cp, buf3, certlen);
+ cp += certlen;
+ break;
+ case ns_t_aaaa:
+ if (!getword_str(buf2, sizeof buf2, &startp, endp))
+ return (-1);
+ if (inet_pton(AF_INET6, buf2, &in6a) <= 0)
+ return (-1);
+ ShrinkBuffer(NS_IN6ADDRSZ);
+ memcpy(cp, &in6a, NS_IN6ADDRSZ);
+ cp += NS_IN6ADDRSZ;
+ break;
+ case ns_t_naptr:
+ /* Order Preference Flags Service Replacement Regexp */
+ /* Order */
+ n = getnum_str(&startp, endp);
+ if (n < 0 || n > 65535)
+ return (-1);
+ ShrinkBuffer(INT16SZ);
+ PUTSHORT(n, cp);
+ /* Preference */
+ n = getnum_str(&startp, endp);
+ if (n < 0 || n > 65535)
+ return (-1);
+ ShrinkBuffer(INT16SZ);
+ PUTSHORT(n, cp);
+ /* Flags */
+ if ((n = getstr_str(buf2, sizeof buf2,
+ &startp, endp)) < 0) {
+ return (-1);
+ }
+ if (n > 255)
+ return (-1);
+ ShrinkBuffer(n+1);
+ *cp++ = n;
+ memcpy(cp, buf2, n);
+ cp += n;
+ /* Service Classes */
+ if ((n = getstr_str(buf2, sizeof buf2,
+ &startp, endp)) < 0) {
+ return (-1);
+ }
+ if (n > 255)
+ return (-1);
+ ShrinkBuffer(n+1);
+ *cp++ = n;
+ memcpy(cp, buf2, n);
+ cp += n;
+ /* Pattern */
+ if ((n = getstr_str(buf2, sizeof buf2,
+ &startp, endp)) < 0) {
+ return (-1);
+ }
+ if (n > 255)
+ return (-1);
+ ShrinkBuffer(n+1);
+ *cp++ = n;
+ memcpy(cp, buf2, n);
+ cp += n;
+ /* Replacement */
+ if (!getword_str(buf2, sizeof buf2, &startp, endp))
+ return (-1);
+ n = dn_comp(buf2, cp, buflen, NULL, NULL);
+ if (n < 0)
+ return (-1);
+ cp += n;
+ ShrinkBuffer(n);
+ break;
+ default:
+ return (-1);
+ } /*switch*/
+ n = (u_int16_t)((cp - sp2) - INT16SZ);
+ PUTSHORT(n, sp2);
+ } /*for*/
+
+ hp->qdcount = htons(counts[0]);
+ hp->ancount = htons(counts[1]);
+ hp->nscount = htons(counts[2]);
+ hp->arcount = htons(counts[3]);
+ return (cp - buf);
+}
+
+/*%
+ * Get a whitespace delimited word from a string (not file)
+ * into buf. modify the start pointer to point after the
+ * word in the string.
+ */
+static int
+getword_str(char *buf, int size, u_char **startpp, u_char *endp) {
+ char *cp;
+ int c;
+
+ for (cp = buf; *startpp <= endp; ) {
+ c = **startpp;
+ if (isspace(c) || c == '\0') {
+ if (cp != buf) /*%< trailing whitespace */
+ break;
+ else { /*%< leading whitespace */
+ (*startpp)++;
+ continue;
+ }
+ }
+ (*startpp)++;
+ if (cp >= buf+size-1)
+ break;
+ *cp++ = (u_char)c;
+ }
+ *cp = '\0';
+ return (cp != buf);
+}
+
+/*%
+ * get a white spae delimited string from memory. Process quoted strings
+ * and \\DDD escapes. Return length or -1 on error. Returned string may
+ * contain nulls.
+ */
+static char digits[] = "0123456789";
+static int
+getstr_str(char *buf, int size, u_char **startpp, u_char *endp) {
+ char *cp;
+ int c, c1 = 0;
+ int inquote = 0;
+ int seen_quote = 0;
+ int escape = 0;
+ int dig = 0;
+
+ for (cp = buf; *startpp <= endp; ) {
+ if ((c = **startpp) == '\0')
+ break;
+ /* leading white space */
+ if ((cp == buf) && !seen_quote && isspace(c)) {
+ (*startpp)++;
+ continue;
+ }
+
+ switch (c) {
+ case '\\':
+ if (!escape) {
+ escape = 1;
+ dig = 0;
+ c1 = 0;
+ (*startpp)++;
+ continue;
+ }
+ goto do_escape;
+ case '"':
+ if (!escape) {
+ inquote = !inquote;
+ seen_quote = 1;
+ (*startpp)++;
+ continue;
+ }
+ /* fall through */
+ default:
+ do_escape:
+ if (escape) {
+ switch (c) {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ c1 = c1 * 10 +
+ (strchr(digits, c) - digits);
+
+ if (++dig == 3) {
+ c = c1 &0xff;
+ break;
+ }
+ (*startpp)++;
+ continue;
+ }
+ escape = 0;
+ } else if (!inquote && isspace(c))
+ goto done;
+ if (cp >= buf+size-1)
+ goto done;
+ *cp++ = (u_char)c;
+ (*startpp)++;
+ }
+ }
+ done:
+ *cp = '\0';
+ return ((cp == buf)? (seen_quote? 0: -1): (cp - buf));
+}
+
+/*%
+ * Get a whitespace delimited base 16 number from a string (not file) into buf
+ * update the start pointer to point after the number in the string.
+ */
+static int
+gethexnum_str(u_char **startpp, u_char *endp) {
+ int c, n;
+ int seendigit = 0;
+ int m = 0;
+
+ if (*startpp + 2 >= endp || strncasecmp((char *)*startpp, "0x", 2) != 0)
+ return getnum_str(startpp, endp);
+ (*startpp)+=2;
+ for (n = 0; *startpp <= endp; ) {
+ c = **startpp;
+ if (isspace(c) || c == '\0') {
+ if (seendigit) /*%< trailing whitespace */
+ break;
+ else { /*%< leading whitespace */
+ (*startpp)++;
+ continue;
+ }
+ }
+ if (c == ';') {
+ while ((*startpp <= endp) &&
+ ((c = **startpp) != '\n'))
+ (*startpp)++;
+ if (seendigit)
+ break;
+ continue;
+ }
+ if (!isxdigit(c)) {
+ if (c == ')' && seendigit) {
+ (*startpp)--;
+ break;
+ }
+ return (-1);
+ }
+ (*startpp)++;
+ if (isdigit(c))
+ n = n * 16 + (c - '0');
+ else
+ n = n * 16 + (tolower(c) - 'a' + 10);
+ seendigit = 1;
+ }
+ return (n + m);
+}
+
+/*%
+ * Get a whitespace delimited base 10 number from a string (not file) into buf
+ * update the start pointer to point after the number in the string.
+ */
+static int
+getnum_str(u_char **startpp, u_char *endp) {
+ int c, n;
+ int seendigit = 0;
+ int m = 0;
+
+ for (n = 0; *startpp <= endp; ) {
+ c = **startpp;
+ if (isspace(c) || c == '\0') {
+ if (seendigit) /*%< trailing whitespace */
+ break;
+ else { /*%< leading whitespace */
+ (*startpp)++;
+ continue;
+ }
+ }
+ if (c == ';') {
+ while ((*startpp <= endp) &&
+ ((c = **startpp) != '\n'))
+ (*startpp)++;
+ if (seendigit)
+ break;
+ continue;
+ }
+ if (!isdigit(c)) {
+ if (c == ')' && seendigit) {
+ (*startpp)--;
+ break;
+ }
+ return (-1);
+ }
+ (*startpp)++;
+ n = n * 10 + (c - '0');
+ seendigit = 1;
+ }
+ return (n + m);
+}
+
+/*%
+ * Allocate a resource record buffer & save rr info.
+ */
+ns_updrec *
+res_mkupdrec(int section, const char *dname,
+ u_int class, u_int type, u_long ttl) {
+ ns_updrec *rrecp = (ns_updrec *)calloc(1, sizeof(ns_updrec));
+
+ if (!rrecp || !(rrecp->r_dname = strdup(dname))) {
+ if (rrecp)
+ free((char *)rrecp);
+ return (NULL);
+ }
+ INIT_LINK(rrecp, r_link);
+ INIT_LINK(rrecp, r_glink);
+ rrecp->r_class = (ns_class)class;
+ rrecp->r_type = (ns_type)type;
+ rrecp->r_ttl = ttl;
+ rrecp->r_section = (ns_sect)section;
+ return (rrecp);
+}
+
+/*%
+ * Free a resource record buffer created by res_mkupdrec.
+ */
+void
+res_freeupdrec(ns_updrec *rrecp) {
+ /* Note: freeing r_dp is the caller's responsibility. */
+ if (rrecp->r_dname != NULL)
+ free(rrecp->r_dname);
+ free(rrecp);
+}
+
+struct valuelist {
+ struct valuelist * next;
+ struct valuelist * prev;
+ char * name;
+ char * proto;
+ int port;
+};
+static struct valuelist *servicelist, *protolist;
+
+static void
+res_buildservicelist() {
+ struct servent *sp;
+ struct valuelist *slp;
+
+#ifdef MAYBE_HESIOD
+ setservent(0);
+#else
+ setservent(1);
+#endif
+ while ((sp = getservent()) != NULL) {
+ slp = (struct valuelist *)malloc(sizeof(struct valuelist));
+ if (!slp)
+ break;
+ slp->name = strdup(sp->s_name);
+ slp->proto = strdup(sp->s_proto);
+ if ((slp->name == NULL) || (slp->proto == NULL)) {
+ if (slp->name) free(slp->name);
+ if (slp->proto) free(slp->proto);
+ free(slp);
+ break;
+ }
+ slp->port = ntohs((u_int16_t)sp->s_port); /*%< host byt order */
+ slp->next = servicelist;
+ slp->prev = NULL;
+ if (servicelist)
+ servicelist->prev = slp;
+ servicelist = slp;
+ }
+ endservent();
+}
+
+void
+res_destroyservicelist() {
+ struct valuelist *slp, *slp_next;
+
+ for (slp = servicelist; slp != NULL; slp = slp_next) {
+ slp_next = slp->next;
+ free(slp->name);
+ free(slp->proto);
+ free(slp);
+ }
+ servicelist = (struct valuelist *)0;
+}
+
+void
+res_buildprotolist(void) {
+ struct protoent *pp;
+ struct valuelist *slp;
+
+#ifdef MAYBE_HESIOD
+ setprotoent(0);
+#else
+ setprotoent(1);
+#endif
+ while ((pp = getprotoent()) != NULL) {
+ slp = (struct valuelist *)malloc(sizeof(struct valuelist));
+ if (!slp)
+ break;
+ slp->name = strdup(pp->p_name);
+ if (slp->name == NULL) {
+ free(slp);
+ break;
+ }
+ slp->port = pp->p_proto; /*%< host byte order */
+ slp->next = protolist;
+ slp->prev = NULL;
+ if (protolist)
+ protolist->prev = slp;
+ protolist = slp;
+ }
+ endprotoent();
+}
+
+void
+res_destroyprotolist(void) {
+ struct valuelist *plp, *plp_next;
+
+ for (plp = protolist; plp != NULL; plp = plp_next) {
+ plp_next = plp->next;
+ free(plp->name);
+ free(plp);
+ }
+ protolist = (struct valuelist *)0;
+}
+
+static int
+findservice(const char *s, struct valuelist **list) {
+ struct valuelist *lp = *list;
+ int n;
+
+ for (; lp != NULL; lp = lp->next)
+ if (strcasecmp(lp->name, s) == 0) {
+ if (lp != *list) {
+ lp->prev->next = lp->next;
+ if (lp->next)
+ lp->next->prev = lp->prev;
+ (*list)->prev = lp;
+ lp->next = *list;
+ *list = lp;
+ }
+ return (lp->port); /*%< host byte order */
+ }
+ if (sscanf(s, "%d", &n) != 1 || n <= 0)
+ n = -1;
+ return (n);
+}
+
+/*%
+ * Convert service name or (ascii) number to int.
+ */
+int
+res_servicenumber(const char *p) {
+ if (servicelist == (struct valuelist *)0)
+ res_buildservicelist();
+ return (findservice(p, &servicelist));
+}
+
+/*%
+ * Convert protocol name or (ascii) number to int.
+ */
+int
+res_protocolnumber(const char *p) {
+ if (protolist == (struct valuelist *)0)
+ res_buildprotolist();
+ return (findservice(p, &protolist));
+}
+
+static struct servent *
+cgetservbyport(u_int16_t port, const char *proto) { /*%< Host byte order. */
+ struct valuelist **list = &servicelist;
+ struct valuelist *lp = *list;
+ static struct servent serv;
+
+ port = ntohs(port);
+ for (; lp != NULL; lp = lp->next) {
+ if (port != (u_int16_t)lp->port) /*%< Host byte order. */
+ continue;
+ if (strcasecmp(lp->proto, proto) == 0) {
+ if (lp != *list) {
+ lp->prev->next = lp->next;
+ if (lp->next)
+ lp->next->prev = lp->prev;
+ (*list)->prev = lp;
+ lp->next = *list;
+ *list = lp;
+ }
+ serv.s_name = lp->name;
+ serv.s_port = htons((u_int16_t)lp->port);
+ serv.s_proto = lp->proto;
+ return (&serv);
+ }
+ }
+ return (0);
+}
+
+static struct protoent *
+cgetprotobynumber(int proto) { /*%< Host byte order. */
+ struct valuelist **list = &protolist;
+ struct valuelist *lp = *list;
+ static struct protoent prot;
+
+ for (; lp != NULL; lp = lp->next)
+ if (lp->port == proto) { /*%< Host byte order. */
+ if (lp != *list) {
+ lp->prev->next = lp->next;
+ if (lp->next)
+ lp->next->prev = lp->prev;
+ (*list)->prev = lp;
+ lp->next = *list;
+ *list = lp;
+ }
+ prot.p_name = lp->name;
+ prot.p_proto = lp->port; /*%< Host byte order. */
+ return (&prot);
+ }
+ return (0);
+}
+
+const char *
+res_protocolname(int num) {
+ static char number[8];
+ struct protoent *pp;
+
+ if (protolist == (struct valuelist *)0)
+ res_buildprotolist();
+ pp = cgetprotobynumber(num);
+ if (pp == 0) {
+ (void) sprintf(number, "%d", num);
+ return (number);
+ }
+ return (pp->p_name);
+}
+
+const char *
+res_servicename(u_int16_t port, const char *proto) { /*%< Host byte order. */
+ static char number[8];
+ struct servent *ss;
+
+ if (servicelist == (struct valuelist *)0)
+ res_buildservicelist();
+ ss = cgetservbyport(htons(port), proto);
+ if (ss == 0) {
+ (void) sprintf(number, "%d", port);
+ return (number);
+ }
+ return (ss->s_name);
+}
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/res_mkupdate.h b/usr/src/lib/libresolv2_joy/common/resolv/res_mkupdate.h
new file mode 100644
index 0000000000..96c452d89e
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/res_mkupdate.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1998,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _RES_MKUPDATE_H_
+#define _RES_MKUPDATE_H_
+
+__BEGIN_DECLS
+__END_DECLS
+
+#endif /* _RES_MKUPDATE_H_ */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/res_private.h b/usr/src/lib/libresolv2_joy/common/resolv/res_private.h
new file mode 100644
index 0000000000..4e98157ced
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/res_private.h
@@ -0,0 +1,22 @@
+#ifndef res_private_h
+#define res_private_h
+
+struct __res_state_ext {
+ union res_sockaddr_union nsaddrs[MAXNS];
+ struct sort_list {
+ int af;
+ union {
+ struct in_addr ina;
+ struct in6_addr in6a;
+ } addr, mask;
+ } sort_list[MAXRESOLVSORT];
+ char nsuffix[64];
+ char nsuffix2[64];
+};
+
+extern int
+res_ourserver_p(const res_state statp, const struct sockaddr *sa);
+
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/res_query.c b/usr/src/lib/libresolv2_joy/common/resolv/res_query.c
new file mode 100644
index 0000000000..09df3da1fc
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/res_query.c
@@ -0,0 +1,440 @@
+/*
+ * Portions Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (C) 1996-2001, 2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Copyright (c) 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Portions Copyright (c) 1993 by Digital Equipment Corporation.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies, and that
+ * the name of Digital Equipment Corporation not be used in advertising or
+ * publicity pertaining to distribution of the document or software without
+ * specific, written prior permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
+ * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)res_query.c 8.1 (Berkeley) 6/4/93";
+static const char rcsid[] = "$Id: res_query.c,v 1.11 2008/11/14 02:36:51 marka Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include "port_before.h"
+#include <sys/types.h>
+#include <sys/param.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+#include <ctype.h>
+#include <errno.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "port_after.h"
+
+/* Options. Leave them on. */
+#define DEBUG
+
+#if PACKETSZ > 1024
+#define MAXPACKET PACKETSZ
+#else
+#define MAXPACKET 1024
+#endif
+
+/*%
+ * Formulate a normal query, send, and await answer.
+ * Returned answer is placed in supplied buffer "answer".
+ * Perform preliminary check of answer, returning success only
+ * if no error is indicated and the answer count is nonzero.
+ * Return the size of the response on success, -1 on error.
+ * Error number is left in H_ERRNO.
+ *
+ * Caller must parse answer and determine whether it answers the question.
+ */
+int
+res_nquery(res_state statp,
+ const char *name, /*%< domain name */
+ int class, int type, /*%< class and type of query */
+ u_char *answer, /*%< buffer to put answer */
+ int anslen) /*%< size of answer buffer */
+{
+ u_char buf[MAXPACKET];
+ HEADER *hp = (HEADER *) answer;
+ u_int oflags;
+ u_char *rdata;
+ int n;
+
+ oflags = statp->_flags;
+
+again:
+ hp->rcode = NOERROR; /*%< default */
+#ifdef DEBUG
+ if (statp->options & RES_DEBUG)
+ printf(";; res_query(%s, %d, %d)\n", name, class, type);
+#endif
+
+ n = res_nmkquery(statp, QUERY, name, class, type, NULL, 0, NULL,
+ buf, sizeof(buf));
+#ifdef RES_USE_EDNS0
+ if (n > 0 && (statp->_flags & RES_F_EDNS0ERR) == 0 &&
+ (statp->options & (RES_USE_EDNS0|RES_USE_DNSSEC|RES_NSID))) {
+ n = res_nopt(statp, n, buf, sizeof(buf), anslen);
+ rdata = &buf[n];
+ if (n > 0 && (statp->options & RES_NSID) != 0U) {
+ n = res_nopt_rdata(statp, n, buf, sizeof(buf), rdata,
+ NS_OPT_NSID, 0, NULL);
+ }
+ }
+#endif
+ if (n <= 0) {
+#ifdef DEBUG
+ if (statp->options & RES_DEBUG)
+ printf(";; res_query: mkquery failed\n");
+#endif
+ RES_SET_H_ERRNO(statp, NO_RECOVERY);
+ return (n);
+ }
+
+ n = res_nsend(statp, buf, n, answer, anslen);
+ if (n < 0) {
+#ifdef RES_USE_EDNS0
+ /* if the query choked with EDNS0, retry without EDNS0 */
+ if ((statp->options & (RES_USE_EDNS0|RES_USE_DNSSEC)) != 0U &&
+ ((oflags ^ statp->_flags) & RES_F_EDNS0ERR) != 0) {
+ statp->_flags |= RES_F_EDNS0ERR;
+ if (statp->options & RES_DEBUG)
+ printf(";; res_nquery: retry without EDNS0\n");
+ goto again;
+ }
+#endif
+#ifdef DEBUG
+ if (statp->options & RES_DEBUG)
+ printf(";; res_query: send error\n");
+#endif
+ RES_SET_H_ERRNO(statp, TRY_AGAIN);
+ return (n);
+ }
+
+ if (hp->rcode != NOERROR || ntohs(hp->ancount) == 0) {
+#ifdef DEBUG
+ if (statp->options & RES_DEBUG)
+ printf(";; rcode = (%s), counts = an:%d ns:%d ar:%d\n",
+ p_rcode(hp->rcode),
+ ntohs(hp->ancount),
+ ntohs(hp->nscount),
+ ntohs(hp->arcount));
+#endif
+ switch (hp->rcode) {
+ case NXDOMAIN:
+ RES_SET_H_ERRNO(statp, HOST_NOT_FOUND);
+ break;
+ case SERVFAIL:
+ RES_SET_H_ERRNO(statp, TRY_AGAIN);
+ break;
+ case NOERROR:
+ RES_SET_H_ERRNO(statp, NO_DATA);
+ break;
+ case FORMERR:
+ case NOTIMP:
+ case REFUSED:
+ default:
+ RES_SET_H_ERRNO(statp, NO_RECOVERY);
+ break;
+ }
+ return (-1);
+ }
+ return (n);
+}
+
+/*%
+ * Formulate a normal query, send, and retrieve answer in supplied buffer.
+ * Return the size of the response on success, -1 on error.
+ * If enabled, implement search rules until answer or unrecoverable failure
+ * is detected. Error code, if any, is left in H_ERRNO.
+ */
+int
+res_nsearch(res_state statp,
+ const char *name, /*%< domain name */
+ int class, int type, /*%< class and type of query */
+ u_char *answer, /*%< buffer to put answer */
+ int anslen) /*%< size of answer */
+{
+ const char *cp, * const *domain;
+ HEADER *hp = (HEADER *) answer;
+ char tmp[NS_MAXDNAME];
+ u_int dots;
+ int trailing_dot, ret, saved_herrno;
+ int got_nodata = 0, got_servfail = 0, root_on_list = 0;
+ int tried_as_is = 0;
+ int searched = 0;
+
+ errno = 0;
+ RES_SET_H_ERRNO(statp, HOST_NOT_FOUND); /*%< True if we never query. */
+ dots = 0;
+ for (cp = name; *cp != '\0'; cp++)
+ dots += (*cp == '.');
+ trailing_dot = 0;
+ if (cp > name && *--cp == '.')
+ trailing_dot++;
+
+ /* If there aren't any dots, it could be a user-level alias. */
+ if (!dots && (cp = res_hostalias(statp, name, tmp, sizeof tmp))!= NULL)
+ return (res_nquery(statp, cp, class, type, answer, anslen));
+
+ /*
+ * If there are enough dots in the name, let's just give it a
+ * try 'as is'. The threshold can be set with the "ndots" option.
+ * Also, query 'as is', if there is a trailing dot in the name.
+ */
+ saved_herrno = -1;
+ if (dots >= statp->ndots || trailing_dot) {
+ ret = res_nquerydomain(statp, name, NULL, class, type,
+ answer, anslen);
+ if (ret > 0 || trailing_dot)
+ return (ret);
+ saved_herrno = statp->res_h_errno;
+ tried_as_is++;
+ }
+
+ /*
+ * We do at least one level of search if
+ * - there is no dot and RES_DEFNAME is set, or
+ * - there is at least one dot, there is no trailing dot,
+ * and RES_DNSRCH is set.
+ */
+ if ((!dots && (statp->options & RES_DEFNAMES) != 0U) ||
+ (dots && !trailing_dot && (statp->options & RES_DNSRCH) != 0U)) {
+ int done = 0;
+
+ for (domain = (const char * const *)statp->dnsrch;
+ *domain && !done;
+ domain++) {
+ searched = 1;
+
+ if (domain[0][0] == '\0' ||
+ (domain[0][0] == '.' && domain[0][1] == '\0'))
+ root_on_list++;
+
+ ret = res_nquerydomain(statp, name, *domain,
+ class, type,
+ answer, anslen);
+ if (ret > 0)
+ return (ret);
+
+ /*
+ * If no server present, give up.
+ * If name isn't found in this domain,
+ * keep trying higher domains in the search list
+ * (if that's enabled).
+ * On a NO_DATA error, keep trying, otherwise
+ * a wildcard entry of another type could keep us
+ * from finding this entry higher in the domain.
+ * If we get some other error (negative answer or
+ * server failure), then stop searching up,
+ * but try the input name below in case it's
+ * fully-qualified.
+ */
+ if (errno == ECONNREFUSED) {
+ RES_SET_H_ERRNO(statp, TRY_AGAIN);
+ return (-1);
+ }
+
+ switch (statp->res_h_errno) {
+ case NO_DATA:
+ got_nodata++;
+ /* FALLTHROUGH */
+ case HOST_NOT_FOUND:
+ /* keep trying */
+ break;
+ case TRY_AGAIN:
+ if (hp->rcode == SERVFAIL) {
+ /* try next search element, if any */
+ got_servfail++;
+ break;
+ }
+ /* FALLTHROUGH */
+ default:
+ /* anything else implies that we're done */
+ done++;
+ }
+
+ /* if we got here for some reason other than DNSRCH,
+ * we only wanted one iteration of the loop, so stop.
+ */
+ if ((statp->options & RES_DNSRCH) == 0U)
+ done++;
+ }
+ }
+
+ /*
+ * If the query has not already been tried as is then try it
+ * unless RES_NOTLDQUERY is set and there were no dots.
+ */
+ if ((dots || !searched || (statp->options & RES_NOTLDQUERY) == 0U) &&
+ !(tried_as_is || root_on_list)) {
+ ret = res_nquerydomain(statp, name, NULL, class, type,
+ answer, anslen);
+ if (ret > 0)
+ return (ret);
+ }
+
+ /* if we got here, we didn't satisfy the search.
+ * if we did an initial full query, return that query's H_ERRNO
+ * (note that we wouldn't be here if that query had succeeded).
+ * else if we ever got a nodata, send that back as the reason.
+ * else send back meaningless H_ERRNO, that being the one from
+ * the last DNSRCH we did.
+ */
+ if (saved_herrno != -1)
+ RES_SET_H_ERRNO(statp, saved_herrno);
+ else if (got_nodata)
+ RES_SET_H_ERRNO(statp, NO_DATA);
+ else if (got_servfail)
+ RES_SET_H_ERRNO(statp, TRY_AGAIN);
+ return (-1);
+}
+
+/*%
+ * Perform a call on res_query on the concatenation of name and domain,
+ * removing a trailing dot from name if domain is NULL.
+ */
+int
+res_nquerydomain(res_state statp,
+ const char *name,
+ const char *domain,
+ int class, int type, /*%< class and type of query */
+ u_char *answer, /*%< buffer to put answer */
+ int anslen) /*%< size of answer */
+{
+ char nbuf[MAXDNAME];
+ const char *longname = nbuf;
+ int n, d;
+
+#ifdef DEBUG
+ if (statp->options & RES_DEBUG)
+ printf(";; res_nquerydomain(%s, %s, %d, %d)\n",
+ name, domain?domain:"<Nil>", class, type);
+#endif
+ if (domain == NULL) {
+ /*
+ * Check for trailing '.';
+ * copy without '.' if present.
+ */
+ n = strlen(name);
+ if (n >= MAXDNAME) {
+ RES_SET_H_ERRNO(statp, NO_RECOVERY);
+ return (-1);
+ }
+ n--;
+ if (n >= 0 && name[n] == '.') {
+ strncpy(nbuf, name, n);
+ nbuf[n] = '\0';
+ } else
+ longname = name;
+ } else {
+ n = strlen(name);
+ d = strlen(domain);
+ if (n + d + 1 >= MAXDNAME) {
+ RES_SET_H_ERRNO(statp, NO_RECOVERY);
+ return (-1);
+ }
+ sprintf(nbuf, "%s.%s", name, domain);
+ }
+ return (res_nquery(statp, longname, class, type, answer, anslen));
+}
+
+const char *
+res_hostalias(const res_state statp, const char *name, char *dst, size_t siz) {
+ char *file, *cp1, *cp2;
+ char buf[BUFSIZ];
+ FILE *fp;
+
+ if (statp->options & RES_NOALIASES)
+ return (NULL);
+ file = getenv("HOSTALIASES");
+ if (file == NULL || (fp = fopen(file, "r")) == NULL)
+ return (NULL);
+ setbuf(fp, NULL);
+ buf[sizeof(buf) - 1] = '\0';
+ while (fgets(buf, sizeof(buf), fp)) {
+ for (cp1 = buf; *cp1 && !isspace((unsigned char)*cp1); ++cp1)
+ ;
+ if (!*cp1)
+ break;
+ *cp1 = '\0';
+ if (ns_samename(buf, name) == 1) {
+ while (isspace((unsigned char)*++cp1))
+ ;
+ if (!*cp1)
+ break;
+ for (cp2 = cp1 + 1; *cp2 &&
+ !isspace((unsigned char)*cp2); ++cp2)
+ ;
+ *cp2 = '\0';
+ strncpy(dst, cp1, siz - 1);
+ dst[siz - 1] = '\0';
+ fclose(fp);
+ return (dst);
+ }
+ }
+ fclose(fp);
+ return (NULL);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/res_send.c b/usr/src/lib/libresolv2_joy/common/resolv/res_send.c
new file mode 100644
index 0000000000..289db4e77d
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/res_send.c
@@ -0,0 +1,1120 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/*
+ * Portions Copyright (C) 2004-2009 Internet Systems Consortium, Inc. ("ISC")
+ * Portions Copyright (C) 1996-2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Copyright (c) 1985, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Portions Copyright (c) 1993 by Digital Equipment Corporation.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies, and that
+ * the name of Digital Equipment Corporation not be used in advertising or
+ * publicity pertaining to distribution of the document or software without
+ * specific, written prior permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
+ * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char sccsid[] = "@(#)res_send.c 8.1 (Berkeley) 6/4/93";
+static const char rcsid[] = "$Id: res_send.c,v 1.22 2009/01/22 23:49:23 tbox Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+/*! \file
+ * \brief
+ * Send query to name server and wait for reply.
+ */
+
+#include "port_before.h"
+#include "fd_setsize.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <sys/uio.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <arpa/inet.h>
+
+#include <errno.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <isc/eventlib.h>
+
+#include "port_after.h"
+
+#ifdef USE_POLL
+#ifdef HAVE_STROPTS_H
+#include <stropts.h>
+#endif
+#include <poll.h>
+#endif /* USE_POLL */
+
+/* Options. Leave them on. */
+#define DEBUG
+#include "res_debug.h"
+#include "res_private.h"
+
+#define EXT(res) ((res)->_u._ext)
+
+#ifndef USE_POLL
+static const int highestFD = FD_SETSIZE - 1;
+#else
+static int highestFD = 0;
+#endif
+
+/* Forward. */
+
+static int get_salen __P((const struct sockaddr *));
+static struct sockaddr * get_nsaddr __P((res_state, size_t));
+static int send_vc(res_state, const u_char *, int,
+ u_char *, int, int *, int);
+static int send_dg(res_state, const u_char *, int,
+ u_char *, int, int *, int, int,
+ int *, int *);
+static void Aerror(const res_state, FILE *, const char *, int,
+ const struct sockaddr *, int);
+static void Perror(const res_state, FILE *, const char *, int);
+static int sock_eq(struct sockaddr *, struct sockaddr *);
+#if defined(NEED_PSELECT) && !defined(USE_POLL)
+static int pselect(int, void *, void *, void *,
+ struct timespec *,
+ const sigset_t *);
+#endif
+void res_pquery(const res_state, const u_char *, int, FILE *);
+
+#ifndef ORIGINAL_ISC_CODE
+#pragma weak __res_nameinquery = res_nameinquery
+#pragma weak __res_queriesmatch = res_queriesmatch
+#pragma weak res_nisourserver = res_ourserver_p
+#endif /* ORIGINAL_ISC_CODE */
+
+static const int niflags = NI_NUMERICHOST | NI_NUMERICSERV;
+
+/* Public. */
+
+/*%
+ * looks up "ina" in _res.ns_addr_list[]
+ *
+ * returns:
+ *\li 0 : not found
+ *\li >0 : found
+ *
+ * author:
+ *\li paul vixie, 29may94
+ */
+int
+res_ourserver_p(const res_state statp, const struct sockaddr *sa) {
+ const struct sockaddr_in *inp, *srv;
+ const struct sockaddr_in6 *in6p, *srv6;
+ int ns;
+
+ switch (sa->sa_family) {
+ case AF_INET:
+ inp = (const struct sockaddr_in *)sa;
+ for (ns = 0; ns < statp->nscount; ns++) {
+ srv = (struct sockaddr_in *)get_nsaddr(statp, ns);
+ if (srv->sin_family == inp->sin_family &&
+ srv->sin_port == inp->sin_port &&
+ (srv->sin_addr.s_addr == INADDR_ANY ||
+ srv->sin_addr.s_addr == inp->sin_addr.s_addr))
+ return (1);
+ }
+ break;
+ case AF_INET6:
+ if (EXT(statp).ext == NULL)
+ break;
+ in6p = (const struct sockaddr_in6 *)sa;
+ for (ns = 0; ns < statp->nscount; ns++) {
+ srv6 = (struct sockaddr_in6 *)get_nsaddr(statp, ns);
+ if (srv6->sin6_family == in6p->sin6_family &&
+ srv6->sin6_port == in6p->sin6_port &&
+#ifdef HAVE_SIN6_SCOPE_ID
+ (srv6->sin6_scope_id == 0 ||
+ srv6->sin6_scope_id == in6p->sin6_scope_id) &&
+#endif
+ (IN6_IS_ADDR_UNSPECIFIED(&srv6->sin6_addr) ||
+ IN6_ARE_ADDR_EQUAL(&srv6->sin6_addr, &in6p->sin6_addr)))
+ return (1);
+ }
+ break;
+ default:
+ break;
+ }
+ return (0);
+}
+
+/*%
+ * look for (name,type,class) in the query section of packet (buf,eom)
+ *
+ * requires:
+ *\li buf + HFIXEDSZ <= eom
+ *
+ * returns:
+ *\li -1 : format error
+ *\li 0 : not found
+ *\li >0 : found
+ *
+ * author:
+ *\li paul vixie, 29may94
+ */
+int
+res_nameinquery(const char *name, int type, int class,
+ const u_char *buf, const u_char *eom)
+{
+ const u_char *cp = buf + HFIXEDSZ;
+ int qdcount = ntohs(((const HEADER*)buf)->qdcount);
+
+ while (qdcount-- > 0) {
+ char tname[MAXDNAME+1];
+ int n, ttype, tclass;
+
+ n = dn_expand(buf, eom, cp, tname, sizeof tname);
+ if (n < 0)
+ return (-1);
+ cp += n;
+ if (cp + 2 * INT16SZ > eom)
+ return (-1);
+ ttype = ns_get16(cp); cp += INT16SZ;
+ tclass = ns_get16(cp); cp += INT16SZ;
+ if (ttype == type && tclass == class &&
+ ns_samename(tname, name) == 1)
+ return (1);
+ }
+ return (0);
+}
+
+/*%
+ * is there a 1:1 mapping of (name,type,class)
+ * in (buf1,eom1) and (buf2,eom2)?
+ *
+ * returns:
+ *\li -1 : format error
+ *\li 0 : not a 1:1 mapping
+ *\li >0 : is a 1:1 mapping
+ *
+ * author:
+ *\li paul vixie, 29may94
+ */
+int
+res_queriesmatch(const u_char *buf1, const u_char *eom1,
+ const u_char *buf2, const u_char *eom2)
+{
+ const u_char *cp = buf1 + HFIXEDSZ;
+ int qdcount = ntohs(((const HEADER*)buf1)->qdcount);
+
+ if (buf1 + HFIXEDSZ > eom1 || buf2 + HFIXEDSZ > eom2)
+ return (-1);
+
+ /*
+ * Only header section present in replies to
+ * dynamic update packets.
+ */
+ if ((((const HEADER *)buf1)->opcode == ns_o_update) &&
+ (((const HEADER *)buf2)->opcode == ns_o_update))
+ return (1);
+
+ if (qdcount != ntohs(((const HEADER*)buf2)->qdcount))
+ return (0);
+ while (qdcount-- > 0) {
+ char tname[MAXDNAME+1];
+ int n, ttype, tclass;
+
+ n = dn_expand(buf1, eom1, cp, tname, sizeof tname);
+ if (n < 0)
+ return (-1);
+ cp += n;
+ if (cp + 2 * INT16SZ > eom1)
+ return (-1);
+ ttype = ns_get16(cp); cp += INT16SZ;
+ tclass = ns_get16(cp); cp += INT16SZ;
+ if (!res_nameinquery(tname, ttype, tclass, buf2, eom2))
+ return (0);
+ }
+ return (1);
+}
+
+int
+res_nsend(res_state statp,
+ const u_char *buf, int buflen, u_char *ans, int anssiz)
+{
+ int gotsomewhere, terrno, tries, v_circuit, resplen, ns, n;
+ char abuf[NI_MAXHOST];
+
+#ifdef USE_POLL
+ highestFD = sysconf(_SC_OPEN_MAX) - 1;
+#endif
+
+ /* No name servers or res_init() failure */
+ if (statp->nscount == 0 || EXT(statp).ext == NULL) {
+ errno = ESRCH;
+ return (-1);
+ }
+ if (anssiz < HFIXEDSZ) {
+ errno = EINVAL;
+ return (-1);
+ }
+ DprintQ((statp->options & RES_DEBUG) || (statp->pfcode & RES_PRF_QUERY),
+ (stdout, ";; res_send()\n"), buf, buflen);
+ v_circuit = (statp->options & RES_USEVC) || buflen > PACKETSZ;
+ gotsomewhere = 0;
+ terrno = ETIMEDOUT;
+
+ /*
+ * If the ns_addr_list in the resolver context has changed, then
+ * invalidate our cached copy and the associated timing data.
+ */
+ if (EXT(statp).nscount != 0) {
+ int needclose = 0;
+ struct sockaddr_storage peer;
+ ISC_SOCKLEN_T peerlen;
+
+ if (EXT(statp).nscount != statp->nscount)
+ needclose++;
+ else
+ for (ns = 0; ns < statp->nscount; ns++) {
+ if (statp->nsaddr_list[ns].sin_family &&
+ !sock_eq((struct sockaddr *)&statp->nsaddr_list[ns],
+ (struct sockaddr *)&EXT(statp).ext->nsaddrs[ns])) {
+ needclose++;
+ break;
+ }
+
+ if (EXT(statp).nssocks[ns] == -1)
+ continue;
+ peerlen = sizeof(peer);
+ if (getpeername(EXT(statp).nssocks[ns],
+ (struct sockaddr *)&peer, &peerlen) < 0) {
+ needclose++;
+ break;
+ }
+ if (!sock_eq((struct sockaddr *)&peer,
+ get_nsaddr(statp, ns))) {
+ needclose++;
+ break;
+ }
+ }
+ if (needclose) {
+ res_nclose(statp);
+ EXT(statp).nscount = 0;
+ }
+ }
+
+ /*
+ * Maybe initialize our private copy of the ns_addr_list.
+ */
+ if (EXT(statp).nscount == 0) {
+ for (ns = 0; ns < statp->nscount; ns++) {
+ EXT(statp).nstimes[ns] = RES_MAXTIME;
+ EXT(statp).nssocks[ns] = -1;
+ if (!statp->nsaddr_list[ns].sin_family)
+ continue;
+ EXT(statp).ext->nsaddrs[ns].sin =
+ statp->nsaddr_list[ns];
+ }
+ EXT(statp).nscount = statp->nscount;
+ }
+
+ /*
+ * Some resolvers want to even out the load on their nameservers.
+ * Note that RES_BLAST overrides RES_ROTATE.
+ */
+ if ((statp->options & RES_ROTATE) != 0U &&
+ (statp->options & RES_BLAST) == 0U) {
+ union res_sockaddr_union inu;
+ struct sockaddr_in ina;
+ int lastns = statp->nscount - 1;
+ int fd;
+ u_int16_t nstime;
+
+ if (EXT(statp).ext != NULL)
+ inu = EXT(statp).ext->nsaddrs[0];
+ ina = statp->nsaddr_list[0];
+ fd = EXT(statp).nssocks[0];
+ nstime = EXT(statp).nstimes[0];
+ for (ns = 0; ns < lastns; ns++) {
+ if (EXT(statp).ext != NULL)
+ EXT(statp).ext->nsaddrs[ns] =
+ EXT(statp).ext->nsaddrs[ns + 1];
+ statp->nsaddr_list[ns] = statp->nsaddr_list[ns + 1];
+ EXT(statp).nssocks[ns] = EXT(statp).nssocks[ns + 1];
+ EXT(statp).nstimes[ns] = EXT(statp).nstimes[ns + 1];
+ }
+ if (EXT(statp).ext != NULL)
+ EXT(statp).ext->nsaddrs[lastns] = inu;
+ statp->nsaddr_list[lastns] = ina;
+ EXT(statp).nssocks[lastns] = fd;
+ EXT(statp).nstimes[lastns] = nstime;
+ }
+
+ /*
+ * Send request, RETRY times, or until successful.
+ */
+ for (tries = 0; tries < statp->retry; tries++) {
+ for (ns = 0; ns < statp->nscount; ns++) {
+ struct sockaddr *nsap;
+ int nsaplen;
+ nsap = get_nsaddr(statp, ns);
+ nsaplen = get_salen(nsap);
+ statp->_flags &= ~RES_F_LASTMASK;
+ statp->_flags |= (ns << RES_F_LASTSHIFT);
+ same_ns:
+ if (statp->qhook) {
+ int done = 0, loops = 0;
+
+ do {
+ res_sendhookact act;
+
+ act = (*statp->qhook)(&nsap, &buf, &buflen,
+ ans, anssiz, &resplen);
+ switch (act) {
+ case res_goahead:
+ done = 1;
+ break;
+ case res_nextns:
+ res_nclose(statp);
+ goto next_ns;
+ case res_done:
+ return (resplen);
+ case res_modified:
+ /* give the hook another try */
+ if (++loops < 42) /*doug adams*/
+ break;
+ /*FALLTHROUGH*/
+ case res_error:
+ /*FALLTHROUGH*/
+ default:
+ goto fail;
+ }
+ } while (!done);
+ }
+
+ Dprint(((statp->options & RES_DEBUG) &&
+ getnameinfo(nsap, nsaplen, abuf, sizeof(abuf),
+ NULL, 0, niflags) == 0),
+ (stdout, ";; Querying server (# %d) address = %s\n",
+ ns + 1, abuf));
+
+
+ if (v_circuit) {
+ /* Use VC; at most one attempt per server. */
+ tries = statp->retry;
+ n = send_vc(statp, buf, buflen, ans, anssiz, &terrno,
+ ns);
+ if (n < 0)
+ goto fail;
+ if (n == 0)
+ goto next_ns;
+ resplen = n;
+ } else {
+ /* Use datagrams. */
+ n = send_dg(statp, buf, buflen, ans, anssiz, &terrno,
+ ns, tries, &v_circuit, &gotsomewhere);
+ if (n < 0)
+ goto fail;
+ if (n == 0)
+ goto next_ns;
+ if (v_circuit)
+ goto same_ns;
+ resplen = n;
+ }
+
+ Dprint((statp->options & RES_DEBUG) ||
+ ((statp->pfcode & RES_PRF_REPLY) &&
+ (statp->pfcode & RES_PRF_HEAD1)),
+ (stdout, ";; got answer:\n"));
+
+ DprintQ((statp->options & RES_DEBUG) ||
+ (statp->pfcode & RES_PRF_REPLY),
+ (stdout, "%s", ""),
+ ans, (resplen > anssiz) ? anssiz : resplen);
+
+ /*
+ * If we have temporarily opened a virtual circuit,
+ * or if we haven't been asked to keep a socket open,
+ * close the socket.
+ */
+ if ((v_circuit && (statp->options & RES_USEVC) == 0U) ||
+ (statp->options & RES_STAYOPEN) == 0U) {
+ res_nclose(statp);
+ }
+ if (statp->rhook) {
+ int done = 0, loops = 0;
+
+ do {
+ res_sendhookact act;
+
+ act = (*statp->rhook)(nsap, buf, buflen,
+ ans, anssiz, &resplen);
+ switch (act) {
+ case res_goahead:
+ case res_done:
+ done = 1;
+ break;
+ case res_nextns:
+ res_nclose(statp);
+ goto next_ns;
+ case res_modified:
+ /* give the hook another try */
+ if (++loops < 42) /*doug adams*/
+ break;
+ /*FALLTHROUGH*/
+ case res_error:
+ /*FALLTHROUGH*/
+ default:
+ goto fail;
+ }
+ } while (!done);
+
+ }
+ return (resplen);
+ next_ns: ;
+ } /*foreach ns*/
+ } /*foreach retry*/
+ res_nclose(statp);
+ if (!v_circuit) {
+ if (!gotsomewhere)
+ errno = ECONNREFUSED; /*%< no nameservers found */
+ else
+ errno = ETIMEDOUT; /*%< no answer obtained */
+ } else
+ errno = terrno;
+ return (-1);
+ fail:
+ res_nclose(statp);
+ return (-1);
+}
+
+/* Private */
+
+static int
+get_salen(sa)
+ const struct sockaddr *sa;
+{
+
+#ifdef HAVE_SA_LEN
+ /* There are people do not set sa_len. Be forgiving to them. */
+ if (sa->sa_len)
+ return (sa->sa_len);
+#endif
+
+ if (sa->sa_family == AF_INET)
+ return (sizeof(struct sockaddr_in));
+ else if (sa->sa_family == AF_INET6)
+ return (sizeof(struct sockaddr_in6));
+ else
+ return (0); /*%< unknown, die on connect */
+}
+
+/*%
+ * pick appropriate nsaddr_list for use. see res_init() for initialization.
+ */
+static struct sockaddr *
+get_nsaddr(statp, n)
+ res_state statp;
+ size_t n;
+{
+
+ if (!statp->nsaddr_list[n].sin_family && EXT(statp).ext) {
+ /*
+ * - EXT(statp).ext->nsaddrs[n] holds an address that is larger
+ * than struct sockaddr, and
+ * - user code did not update statp->nsaddr_list[n].
+ */
+ return (struct sockaddr *)(void *)&EXT(statp).ext->nsaddrs[n];
+ } else {
+ /*
+ * - user code updated statp->nsaddr_list[n], or
+ * - statp->nsaddr_list[n] has the same content as
+ * EXT(statp).ext->nsaddrs[n].
+ */
+ return (struct sockaddr *)(void *)&statp->nsaddr_list[n];
+ }
+}
+
+static int
+send_vc(res_state statp,
+ const u_char *buf, int buflen, u_char *ans, int anssiz,
+ int *terrno, int ns)
+{
+ const HEADER *hp = (const HEADER *) buf;
+ HEADER *anhp = (HEADER *) ans;
+ struct sockaddr *nsap;
+ int nsaplen;
+ int truncating, connreset, resplen, n;
+ struct iovec iov[2];
+ u_short len;
+ u_char *cp;
+ void *tmp;
+#ifdef SO_NOSIGPIPE
+ int on = 1;
+#endif
+
+ nsap = get_nsaddr(statp, ns);
+ nsaplen = get_salen(nsap);
+
+ connreset = 0;
+ same_ns:
+ truncating = 0;
+
+ /* Are we still talking to whom we want to talk to? */
+ if (statp->_vcsock >= 0 && (statp->_flags & RES_F_VC) != 0) {
+ struct sockaddr_storage peer;
+ ISC_SOCKLEN_T size = sizeof peer;
+
+ if (getpeername(statp->_vcsock,
+ (struct sockaddr *)&peer, &size) < 0 ||
+ !sock_eq((struct sockaddr *)&peer, nsap)) {
+ res_nclose(statp);
+ statp->_flags &= ~RES_F_VC;
+ }
+ }
+
+ if (statp->_vcsock < 0 || (statp->_flags & RES_F_VC) == 0) {
+ if (statp->_vcsock >= 0)
+ res_nclose(statp);
+
+ statp->_vcsock = socket(nsap->sa_family, SOCK_STREAM, 0);
+ if (statp->_vcsock > highestFD) {
+ res_nclose(statp);
+ errno = ENOTSOCK;
+ }
+ if (statp->_vcsock < 0) {
+ switch (errno) {
+ case EPROTONOSUPPORT:
+#ifdef EPFNOSUPPORT
+ case EPFNOSUPPORT:
+#endif
+ case EAFNOSUPPORT:
+ Perror(statp, stderr, "socket(vc)", errno);
+ return (0);
+ default:
+ *terrno = errno;
+ Perror(statp, stderr, "socket(vc)", errno);
+ return (-1);
+ }
+ }
+#ifdef SO_NOSIGPIPE
+ /*
+ * Disable generation of SIGPIPE when writing to a closed
+ * socket. Write should return -1 and set errno to EPIPE
+ * instead.
+ *
+ * Push on even if setsockopt(SO_NOSIGPIPE) fails.
+ */
+ (void)setsockopt(statp->_vcsock, SOL_SOCKET, SO_NOSIGPIPE, &on,
+ sizeof(on));
+#endif
+ errno = 0;
+ if (connect(statp->_vcsock, nsap, nsaplen) < 0) {
+ *terrno = errno;
+ Aerror(statp, stderr, "connect/vc", errno, nsap,
+ nsaplen);
+ res_nclose(statp);
+ return (0);
+ }
+ statp->_flags |= RES_F_VC;
+ }
+
+ /*
+ * Send length & message
+ */
+ ns_put16((u_short)buflen, (u_char*)&len);
+ iov[0] = evConsIovec(&len, INT16SZ);
+ DE_CONST(buf, tmp);
+ iov[1] = evConsIovec(tmp, buflen);
+ if (writev(statp->_vcsock, iov, 2) != (INT16SZ + buflen)) {
+ *terrno = errno;
+ Perror(statp, stderr, "write failed", errno);
+ res_nclose(statp);
+ return (0);
+ }
+ /*
+ * Receive length & response
+ */
+ read_len:
+ cp = ans;
+ len = INT16SZ;
+ while ((n = read(statp->_vcsock, (char *)cp, (int)len)) > 0) {
+ cp += n;
+ if ((len -= n) == 0)
+ break;
+ }
+ if (n <= 0) {
+ *terrno = errno;
+ Perror(statp, stderr, "read failed", errno);
+ res_nclose(statp);
+ /*
+ * A long running process might get its TCP
+ * connection reset if the remote server was
+ * restarted. Requery the server instead of
+ * trying a new one. When there is only one
+ * server, this means that a query might work
+ * instead of failing. We only allow one reset
+ * per query to prevent looping.
+ */
+ if (*terrno == ECONNRESET && !connreset) {
+ connreset = 1;
+ res_nclose(statp);
+ goto same_ns;
+ }
+ res_nclose(statp);
+ return (0);
+ }
+ resplen = ns_get16(ans);
+ if (resplen > anssiz) {
+ Dprint(statp->options & RES_DEBUG,
+ (stdout, ";; response truncated\n")
+ );
+ truncating = 1;
+ len = anssiz;
+ } else
+ len = resplen;
+ if (len < HFIXEDSZ) {
+ /*
+ * Undersized message.
+ */
+ Dprint(statp->options & RES_DEBUG,
+ (stdout, ";; undersized: %d\n", len));
+ *terrno = EMSGSIZE;
+ res_nclose(statp);
+ return (0);
+ }
+ cp = ans;
+ while (len != 0 && (n = read(statp->_vcsock, (char *)cp, (int)len)) > 0){
+ cp += n;
+ len -= n;
+ }
+ if (n <= 0) {
+ *terrno = errno;
+ Perror(statp, stderr, "read(vc)", errno);
+ res_nclose(statp);
+ return (0);
+ }
+ if (truncating) {
+ /*
+ * Flush rest of answer so connection stays in synch.
+ */
+ anhp->tc = 1;
+ len = resplen - anssiz;
+ while (len != 0) {
+ char junk[PACKETSZ];
+
+ n = read(statp->_vcsock, junk,
+ (len > sizeof junk) ? sizeof junk : len);
+ if (n > 0)
+ len -= n;
+ else
+ break;
+ }
+ }
+ /*
+ * If the calling applicating has bailed out of
+ * a previous call and failed to arrange to have
+ * the circuit closed or the server has got
+ * itself confused, then drop the packet and
+ * wait for the correct one.
+ */
+ if (hp->id != anhp->id) {
+ DprintQ((statp->options & RES_DEBUG) ||
+ (statp->pfcode & RES_PRF_REPLY),
+ (stdout, ";; old answer (unexpected):\n"),
+ ans, (resplen > anssiz) ? anssiz: resplen);
+ goto read_len;
+ }
+
+ /*
+ * All is well, or the error is fatal. Signal that the
+ * next nameserver ought not be tried.
+ */
+ return (resplen);
+}
+
+static int
+send_dg(res_state statp, const u_char *buf, int buflen, u_char *ans,
+ int anssiz, int *terrno, int ns, int tries, int *v_circuit,
+ int *gotsomewhere)
+{
+ const HEADER *hp = (const HEADER *) buf;
+ HEADER *anhp = (HEADER *) ans;
+ const struct sockaddr *nsap;
+ int nsaplen;
+ struct timespec now, timeout, finish;
+ struct sockaddr_storage from;
+ ISC_SOCKLEN_T fromlen;
+ int resplen, seconds, n, s;
+#ifdef USE_POLL
+ int polltimeout;
+ struct pollfd pollfd;
+#else
+ fd_set dsmask;
+#endif
+
+ nsap = get_nsaddr(statp, ns);
+ nsaplen = get_salen(nsap);
+ if (EXT(statp).nssocks[ns] == -1) {
+ EXT(statp).nssocks[ns] = socket(nsap->sa_family, SOCK_DGRAM, 0);
+ if (EXT(statp).nssocks[ns] > highestFD) {
+ res_nclose(statp);
+ errno = ENOTSOCK;
+ }
+ if (EXT(statp).nssocks[ns] < 0) {
+ switch (errno) {
+ case EPROTONOSUPPORT:
+#ifdef EPFNOSUPPORT
+ case EPFNOSUPPORT:
+#endif
+ case EAFNOSUPPORT:
+ Perror(statp, stderr, "socket(dg)", errno);
+ return (0);
+ default:
+ *terrno = errno;
+ Perror(statp, stderr, "socket(dg)", errno);
+ return (-1);
+ }
+ }
+#ifndef CANNOT_CONNECT_DGRAM
+ /*
+ * On a 4.3BSD+ machine (client and server,
+ * actually), sending to a nameserver datagram
+ * port with no nameserver will cause an
+ * ICMP port unreachable message to be returned.
+ * If our datagram socket is "connected" to the
+ * server, we get an ECONNREFUSED error on the next
+ * socket operation, and select returns if the
+ * error message is received. We can thus detect
+ * the absence of a nameserver without timing out.
+ */
+ if (connect(EXT(statp).nssocks[ns], nsap, nsaplen) < 0) {
+ Aerror(statp, stderr, "connect(dg)", errno, nsap,
+ nsaplen);
+ res_nclose(statp);
+ return (0);
+ }
+#endif /* !CANNOT_CONNECT_DGRAM */
+ Dprint(statp->options & RES_DEBUG,
+ (stdout, ";; new DG socket\n"))
+ }
+ s = EXT(statp).nssocks[ns];
+#ifndef CANNOT_CONNECT_DGRAM
+ if (send(s, (const char*)buf, buflen, 0) != buflen) {
+ Perror(statp, stderr, "send", errno);
+ res_nclose(statp);
+ return (0);
+ }
+#else /* !CANNOT_CONNECT_DGRAM */
+ if (sendto(s, (const char*)buf, buflen, 0, nsap, nsaplen) != buflen)
+ {
+ Aerror(statp, stderr, "sendto", errno, nsap, nsaplen);
+ res_nclose(statp);
+ return (0);
+ }
+#endif /* !CANNOT_CONNECT_DGRAM */
+
+ /*
+ * Wait for reply.
+ */
+ seconds = (statp->retrans << tries);
+ if (ns > 0)
+ seconds /= statp->nscount;
+ if (seconds <= 0)
+ seconds = 1;
+ now = evNowTime();
+ timeout = evConsTime(seconds, 0);
+ finish = evAddTime(now, timeout);
+ goto nonow;
+ wait:
+ now = evNowTime();
+ nonow:
+#ifndef USE_POLL
+ FD_ZERO(&dsmask);
+ FD_SET(s, &dsmask);
+ if (evCmpTime(finish, now) > 0)
+ timeout = evSubTime(finish, now);
+ else
+ timeout = evConsTime(0, 0);
+ n = pselect(s + 1, &dsmask, NULL, NULL, &timeout, NULL);
+#else
+ timeout = evSubTime(finish, now);
+ if (timeout.tv_sec < 0)
+ timeout = evConsTime(0, 0);
+ polltimeout = 1000*timeout.tv_sec +
+ timeout.tv_nsec/1000000;
+ pollfd.fd = s;
+ pollfd.events = POLLRDNORM;
+ n = poll(&pollfd, 1, polltimeout);
+#endif /* USE_POLL */
+
+ if (n == 0) {
+ Dprint(statp->options & RES_DEBUG, (stdout, ";; timeout\n"));
+ *gotsomewhere = 1;
+ return (0);
+ }
+ if (n < 0) {
+ if (errno == EINTR)
+ goto wait;
+#ifndef USE_POLL
+ Perror(statp, stderr, "select", errno);
+#else
+ Perror(statp, stderr, "poll", errno);
+#endif /* USE_POLL */
+ res_nclose(statp);
+ return (0);
+ }
+ errno = 0;
+ fromlen = sizeof(from);
+ resplen = recvfrom(s, (char*)ans, anssiz,0,
+ (struct sockaddr *)&from, &fromlen);
+ if (resplen <= 0) {
+ Perror(statp, stderr, "recvfrom", errno);
+ res_nclose(statp);
+ return (0);
+ }
+ *gotsomewhere = 1;
+ if (resplen < HFIXEDSZ) {
+ /*
+ * Undersized message.
+ */
+ Dprint(statp->options & RES_DEBUG,
+ (stdout, ";; undersized: %d\n",
+ resplen));
+ *terrno = EMSGSIZE;
+ res_nclose(statp);
+ return (0);
+ }
+ if (hp->id != anhp->id) {
+ /*
+ * response from old query, ignore it.
+ * XXX - potential security hazard could
+ * be detected here.
+ */
+ DprintQ((statp->options & RES_DEBUG) ||
+ (statp->pfcode & RES_PRF_REPLY),
+ (stdout, ";; old answer:\n"),
+ ans, (resplen > anssiz) ? anssiz : resplen);
+ goto wait;
+ }
+ if (!(statp->options & RES_INSECURE1) &&
+ !res_ourserver_p(statp, (struct sockaddr *)&from)) {
+ /*
+ * response from wrong server? ignore it.
+ * XXX - potential security hazard could
+ * be detected here.
+ */
+ DprintQ((statp->options & RES_DEBUG) ||
+ (statp->pfcode & RES_PRF_REPLY),
+ (stdout, ";; not our server:\n"),
+ ans, (resplen > anssiz) ? anssiz : resplen);
+ goto wait;
+ }
+#ifdef RES_USE_EDNS0
+ if (anhp->rcode == FORMERR && (statp->options & RES_USE_EDNS0) != 0U) {
+ /*
+ * Do not retry if the server do not understand EDNS0.
+ * The case has to be captured here, as FORMERR packet do not
+ * carry query section, hence res_queriesmatch() returns 0.
+ */
+ DprintQ(statp->options & RES_DEBUG,
+ (stdout, "server rejected query with EDNS0:\n"),
+ ans, (resplen > anssiz) ? anssiz : resplen);
+ /* record the error */
+ statp->_flags |= RES_F_EDNS0ERR;
+ res_nclose(statp);
+ return (0);
+ }
+#endif
+ if (!(statp->options & RES_INSECURE2) &&
+ !res_queriesmatch(buf, buf + buflen,
+ ans, ans + anssiz)) {
+ /*
+ * response contains wrong query? ignore it.
+ * XXX - potential security hazard could
+ * be detected here.
+ */
+ DprintQ((statp->options & RES_DEBUG) ||
+ (statp->pfcode & RES_PRF_REPLY),
+ (stdout, ";; wrong query name:\n"),
+ ans, (resplen > anssiz) ? anssiz : resplen);
+ goto wait;
+ }
+ if (anhp->rcode == SERVFAIL ||
+ anhp->rcode == NOTIMP ||
+ anhp->rcode == REFUSED) {
+ DprintQ(statp->options & RES_DEBUG,
+ (stdout, "server rejected query:\n"),
+ ans, (resplen > anssiz) ? anssiz : resplen);
+ res_nclose(statp);
+ /* don't retry if called from dig */
+ if (!statp->pfcode)
+ return (0);
+ }
+ if (!(statp->options & RES_IGNTC) && anhp->tc) {
+ /*
+ * To get the rest of answer,
+ * use TCP with same server.
+ */
+ Dprint(statp->options & RES_DEBUG,
+ (stdout, ";; truncated answer\n"));
+ *v_circuit = 1;
+ res_nclose(statp);
+ return (1);
+ }
+ /*
+ * All is well, or the error is fatal. Signal that the
+ * next nameserver ought not be tried.
+ */
+ return (resplen);
+}
+
+static void
+Aerror(const res_state statp, FILE *file, const char *string, int error,
+ const struct sockaddr *address, int alen)
+{
+ int save = errno;
+ char hbuf[NI_MAXHOST];
+ char sbuf[NI_MAXSERV];
+
+ alen = alen;
+
+ if ((statp->options & RES_DEBUG) != 0U) {
+ if (getnameinfo(address, alen, hbuf, sizeof(hbuf),
+ sbuf, sizeof(sbuf), niflags)) {
+ strncpy(hbuf, "?", sizeof(hbuf) - 1);
+ hbuf[sizeof(hbuf) - 1] = '\0';
+ strncpy(sbuf, "?", sizeof(sbuf) - 1);
+ sbuf[sizeof(sbuf) - 1] = '\0';
+ }
+ fprintf(file, "res_send: %s ([%s].%s): %s\n",
+ string, hbuf, sbuf, strerror(error));
+ }
+ errno = save;
+}
+
+static void
+Perror(const res_state statp, FILE *file, const char *string, int error) {
+ int save = errno;
+
+ if ((statp->options & RES_DEBUG) != 0U)
+ fprintf(file, "res_send: %s: %s\n",
+ string, strerror(error));
+ errno = save;
+}
+
+static int
+sock_eq(struct sockaddr *a, struct sockaddr *b) {
+ struct sockaddr_in *a4, *b4;
+ struct sockaddr_in6 *a6, *b6;
+
+ if (a->sa_family != b->sa_family)
+ return 0;
+ switch (a->sa_family) {
+ case AF_INET:
+ a4 = (struct sockaddr_in *)a;
+ b4 = (struct sockaddr_in *)b;
+ return a4->sin_port == b4->sin_port &&
+ a4->sin_addr.s_addr == b4->sin_addr.s_addr;
+ case AF_INET6:
+ a6 = (struct sockaddr_in6 *)a;
+ b6 = (struct sockaddr_in6 *)b;
+ return a6->sin6_port == b6->sin6_port &&
+#ifdef HAVE_SIN6_SCOPE_ID
+ a6->sin6_scope_id == b6->sin6_scope_id &&
+#endif
+ IN6_ARE_ADDR_EQUAL(&a6->sin6_addr, &b6->sin6_addr);
+ default:
+ return 0;
+ }
+}
+
+#if defined(NEED_PSELECT) && !defined(USE_POLL)
+/* XXX needs to move to the porting library. */
+static int
+pselect(int nfds, void *rfds, void *wfds, void *efds,
+ struct timespec *tsp, const sigset_t *sigmask)
+{
+ struct timeval tv, *tvp;
+ sigset_t sigs;
+ int n;
+
+ if (tsp) {
+ tvp = &tv;
+ tv = evTimeVal(*tsp);
+ } else
+ tvp = NULL;
+ if (sigmask)
+ sigprocmask(SIG_SETMASK, sigmask, &sigs);
+ n = select(nfds, rfds, wfds, efds, tvp);
+ if (sigmask)
+ sigprocmask(SIG_SETMASK, &sigs, NULL);
+ if (tsp)
+ *tsp = evTimeSpec(tv);
+ return (n);
+}
+#endif
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/res_sendsigned.c b/usr/src/lib/libresolv2_joy/common/resolv/res_sendsigned.c
new file mode 100644
index 0000000000..5ebc1a70eb
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/res_sendsigned.c
@@ -0,0 +1,170 @@
+#include "port_before.h"
+#include "fd_setsize.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <arpa/inet.h>
+
+#include <isc/dst.h>
+
+#include <errno.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "port_after.h"
+
+#define DEBUG
+#include "res_debug.h"
+
+
+/*% res_nsendsigned */
+int
+res_nsendsigned(res_state statp, const u_char *msg, int msglen,
+ ns_tsig_key *key, u_char *answer, int anslen)
+{
+ res_state nstatp;
+ DST_KEY *dstkey;
+ int usingTCP = 0;
+ u_char *newmsg;
+ int newmsglen, bufsize, siglen;
+ u_char sig[64];
+ HEADER *hp;
+ time_t tsig_time;
+ int ret;
+ int len;
+
+ dst_init();
+
+ nstatp = (res_state) malloc(sizeof(*statp));
+ if (nstatp == NULL) {
+ errno = ENOMEM;
+ return (-1);
+ }
+ memcpy(nstatp, statp, sizeof(*statp));
+
+ bufsize = msglen + 1024;
+ newmsg = (u_char *) malloc(bufsize);
+ if (newmsg == NULL) {
+ free(nstatp);
+ errno = ENOMEM;
+ return (-1);
+ }
+ memcpy(newmsg, msg, msglen);
+ newmsglen = msglen;
+
+ if (ns_samename(key->alg, NS_TSIG_ALG_HMAC_MD5) != 1)
+ dstkey = NULL;
+ else
+ dstkey = dst_buffer_to_key(key->name, KEY_HMAC_MD5,
+ NS_KEY_TYPE_AUTH_ONLY,
+ NS_KEY_PROT_ANY,
+ key->data, key->len);
+ if (dstkey == NULL) {
+ errno = EINVAL;
+ free(nstatp);
+ free(newmsg);
+ return (-1);
+ }
+
+ nstatp->nscount = 1;
+ siglen = sizeof(sig);
+ ret = ns_sign(newmsg, &newmsglen, bufsize, NOERROR, dstkey, NULL, 0,
+ sig, &siglen, 0);
+ if (ret < 0) {
+ free (nstatp);
+ free (newmsg);
+ dst_free_key(dstkey);
+ if (ret == NS_TSIG_ERROR_NO_SPACE)
+ errno = EMSGSIZE;
+ else if (ret == -1)
+ errno = EINVAL;
+ return (ret);
+ }
+
+ if (newmsglen > PACKETSZ || nstatp->options & RES_USEVC)
+ usingTCP = 1;
+ if (usingTCP == 0)
+ nstatp->options |= RES_IGNTC;
+ else
+ nstatp->options |= RES_USEVC;
+ /*
+ * Stop res_send printing the answer.
+ */
+ nstatp->options &= ~RES_DEBUG;
+ nstatp->pfcode &= ~RES_PRF_REPLY;
+
+retry:
+
+ len = res_nsend(nstatp, newmsg, newmsglen, answer, anslen);
+ if (len < 0) {
+ free (nstatp);
+ free (newmsg);
+ dst_free_key(dstkey);
+ return (len);
+ }
+
+ ret = ns_verify(answer, &len, dstkey, sig, siglen,
+ NULL, NULL, &tsig_time, nstatp->options & RES_KEEPTSIG);
+ if (ret != 0) {
+ Dprint((statp->options & RES_DEBUG) ||
+ ((statp->pfcode & RES_PRF_REPLY) &&
+ (statp->pfcode & RES_PRF_HEAD1)),
+ (stdout, ";; got answer:\n"));
+
+ DprintQ((statp->options & RES_DEBUG) ||
+ (statp->pfcode & RES_PRF_REPLY),
+ (stdout, "%s", ""),
+ answer, (anslen > len) ? len : anslen);
+
+ if (ret > 0) {
+ Dprint(statp->pfcode & RES_PRF_REPLY,
+ (stdout, ";; server rejected TSIG (%s)\n",
+ p_rcode(ret)));
+ } else {
+ Dprint(statp->pfcode & RES_PRF_REPLY,
+ (stdout, ";; TSIG invalid (%s)\n",
+ p_rcode(-ret)));
+ }
+
+ free (nstatp);
+ free (newmsg);
+ dst_free_key(dstkey);
+ if (ret == -1)
+ errno = EINVAL;
+ else
+ errno = ENOTTY;
+ return (-1);
+ }
+
+ hp = (HEADER *) answer;
+ if (hp->tc && !usingTCP && (statp->options & RES_IGNTC) == 0U) {
+ nstatp->options &= ~RES_IGNTC;
+ usingTCP = 1;
+ goto retry;
+ }
+ Dprint((statp->options & RES_DEBUG) ||
+ ((statp->pfcode & RES_PRF_REPLY) &&
+ (statp->pfcode & RES_PRF_HEAD1)),
+ (stdout, ";; got answer:\n"));
+
+ DprintQ((statp->options & RES_DEBUG) ||
+ (statp->pfcode & RES_PRF_REPLY),
+ (stdout, "%s", ""),
+ answer, (anslen > len) ? len : anslen);
+
+ Dprint(statp->pfcode & RES_PRF_REPLY, (stdout, ";; TSIG ok\n"));
+
+ free (nstatp);
+ free (newmsg);
+ dst_free_key(dstkey);
+ return (len);
+}
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/common/resolv/res_update.c b/usr/src/lib/libresolv2_joy/common/resolv/res_update.c
new file mode 100644
index 0000000000..df24aee3bd
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/resolv/res_update.c
@@ -0,0 +1,213 @@
+#if !defined(lint) && !defined(SABER)
+static const char rcsid[] = "$Id: res_update.c,v 1.13 2005/04/27 04:56:43 sra Exp $";
+#endif /* not lint */
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*! \file
+ * \brief
+ * Based on the Dynamic DNS reference implementation by Viraj Bais
+ * &lt;viraj_bais@ccm.fm.intel.com>
+ */
+
+#include "port_before.h"
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <arpa/nameser.h>
+
+#include <errno.h>
+#include <limits.h>
+#include <netdb.h>
+#include <res_update.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <isc/list.h>
+#include <resolv_joy.h>
+
+#include "port_after.h"
+#include "res_private.h"
+
+/*%
+ * Separate a linked list of records into groups so that all records
+ * in a group will belong to a single zone on the nameserver.
+ * Create a dynamic update packet for each zone and send it to the
+ * nameservers for that zone, and await answer.
+ * Abort if error occurs in updating any zone.
+ * Return the number of zones updated on success, < 0 on error.
+ *
+ * On error, caller must deal with the unsynchronized zones
+ * eg. an A record might have been successfully added to the forward
+ * zone but the corresponding PTR record would be missing if error
+ * was encountered while updating the reverse zone.
+ */
+
+struct zonegrp {
+ char z_origin[MAXDNAME];
+ ns_class z_class;
+ union res_sockaddr_union z_nsaddrs[MAXNS];
+ int z_nscount;
+ int z_flags;
+ LIST(ns_updrec) z_rrlist;
+ LINK(struct zonegrp) z_link;
+};
+
+#define ZG_F_ZONESECTADDED 0x0001
+
+/* Forward. */
+
+static void res_dprintf(const char *, ...) ISC_FORMAT_PRINTF(1, 2);
+
+/* Macros. */
+
+#define DPRINTF(x) do {\
+ int save_errno = errno; \
+ if ((statp->options & RES_DEBUG) != 0U) res_dprintf x; \
+ errno = save_errno; \
+ } while (0)
+
+/* Public. */
+
+int
+res_nupdate(res_state statp, ns_updrec *rrecp_in, ns_tsig_key *key) {
+ ns_updrec *rrecp;
+ u_char answer[PACKETSZ];
+ u_char *packet;
+ struct zonegrp *zptr, tgrp;
+ LIST(struct zonegrp) zgrps;
+ int nzones = 0, nscount = 0, n;
+ union res_sockaddr_union nsaddrs[MAXNS];
+
+ packet = malloc(NS_MAXMSG);
+ if (packet == NULL) {
+ DPRINTF(("malloc failed"));
+ return (0);
+ }
+ /* Thread all of the updates onto a list of groups. */
+ INIT_LIST(zgrps);
+ memset(&tgrp, 0, sizeof (tgrp));
+ for (rrecp = rrecp_in; rrecp;
+ rrecp = LINKED(rrecp, r_link) ? NEXT(rrecp, r_link) : NULL) {
+ int nscnt;
+ /* Find the origin for it if there is one. */
+ tgrp.z_class = rrecp->r_class;
+ nscnt = res_findzonecut2(statp, rrecp->r_dname, tgrp.z_class,
+ RES_EXHAUSTIVE, tgrp.z_origin,
+ sizeof tgrp.z_origin,
+ tgrp.z_nsaddrs, MAXNS);
+ if (nscnt <= 0) {
+ DPRINTF(("res_findzonecut failed (%d)", nscnt));
+ goto done;
+ }
+ tgrp.z_nscount = nscnt;
+ /* Find the group for it if there is one. */
+ for (zptr = HEAD(zgrps); zptr != NULL; zptr = NEXT(zptr, z_link))
+ if (ns_samename(tgrp.z_origin, zptr->z_origin) == 1 &&
+ tgrp.z_class == zptr->z_class)
+ break;
+ /* Make a group for it if there isn't one. */
+ if (zptr == NULL) {
+ zptr = malloc(sizeof *zptr);
+ if (zptr == NULL) {
+ DPRINTF(("malloc failed"));
+ goto done;
+ }
+ *zptr = tgrp;
+ zptr->z_flags = 0;
+ INIT_LINK(zptr, z_link);
+ INIT_LIST(zptr->z_rrlist);
+ APPEND(zgrps, zptr, z_link);
+ }
+ /* Thread this rrecp onto the right group. */
+ APPEND(zptr->z_rrlist, rrecp, r_glink);
+ }
+
+ for (zptr = HEAD(zgrps); zptr != NULL; zptr = NEXT(zptr, z_link)) {
+ /* Construct zone section and prepend it. */
+ rrecp = res_mkupdrec(ns_s_zn, zptr->z_origin,
+ zptr->z_class, ns_t_soa, 0);
+ if (rrecp == NULL) {
+ DPRINTF(("res_mkupdrec failed"));
+ goto done;
+ }
+ PREPEND(zptr->z_rrlist, rrecp, r_glink);
+ zptr->z_flags |= ZG_F_ZONESECTADDED;
+
+ /* Marshall the update message. */
+ n = res_nmkupdate(statp, HEAD(zptr->z_rrlist),
+ packet, NS_MAXMSG);
+ DPRINTF(("res_mkupdate -> %d", n));
+ if (n < 0)
+ goto done;
+
+ /* Temporarily replace the resolver's nameserver set. */
+ nscount = res_getservers(statp, nsaddrs, MAXNS);
+ res_setservers(statp, zptr->z_nsaddrs, zptr->z_nscount);
+
+ /* Send the update and remember the result. */
+ if (key != NULL)
+ n = res_nsendsigned(statp, packet, n, key,
+ answer, sizeof answer);
+ else
+ n = res_nsend(statp, packet, n, answer, sizeof answer);
+ if (n < 0) {
+ DPRINTF(("res_nsend: send error, n=%d (%s)\n",
+ n, strerror(errno)));
+ goto done;
+ }
+ if (((HEADER *)answer)->rcode == NOERROR)
+ nzones++;
+
+ /* Restore resolver's nameserver set. */
+ res_setservers(statp, nsaddrs, nscount);
+ nscount = 0;
+ }
+ done:
+ while (!EMPTY(zgrps)) {
+ zptr = HEAD(zgrps);
+ if ((zptr->z_flags & ZG_F_ZONESECTADDED) != 0)
+ res_freeupdrec(HEAD(zptr->z_rrlist));
+ UNLINK(zgrps, zptr, z_link);
+ free(zptr);
+ }
+ if (nscount != 0)
+ res_setservers(statp, nsaddrs, nscount);
+
+ free(packet);
+ return (nzones);
+}
+
+/* Private. */
+
+static void
+res_dprintf(const char *fmt, ...) {
+ va_list ap;
+
+ va_start(ap, fmt);
+ fputs(";; res_nupdate: ", stderr);
+ vfprintf(stderr, fmt, ap);
+ fputc('\n', stderr);
+ va_end(ap);
+}
diff --git a/usr/src/lib/libresolv2_joy/common/sunw/sunw_mtctxres.c b/usr/src/lib/libresolv2_joy/common/sunw/sunw_mtctxres.c
new file mode 100644
index 0000000000..cc2a485ede
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/sunw/sunw_mtctxres.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <port_before.h>
+#include <thread.h>
+#include <errno.h>
+#include <netdb.h>
+#include <malloc.h>
+#include <string.h>
+#include <resolv_mt.h>
+#include <irs.h>
+#include <port_after.h>
+
+#pragma redefine_extname __h_errno __joy_h_errno
+
+/*
+ * much of the original version of sunw_mtxtxres.c was incorporated into
+ * ISC libbind as resolv/mtctxres.c. The following bits have not yet made
+ * it into ISC libbind.
+ */
+
+/*
+ * There used to be a private, MT-safe resolver interface that used TSD
+ * to store per-thread _res, h_errno, etc. We continue to provide the
+ * access functions __res_get_res() and __res_get_h_errno() so that binaries
+ * that used the private interface will continue to work.
+ */
+
+#ifdef _res
+#undef _res
+#endif
+
+extern struct __res_state *__res_state(void);
+
+struct __res_state *
+__res_get_res(void) {
+ return (__res_state());
+}
+
+
+#ifdef h_errno
+#undef h_errno
+#endif
+
+extern int *__h_errno(void);
+
+int *
+__res_get_h_errno(void) {
+ return (__h_errno());
+}
+
+
+#ifdef SUNW_HOSTS_FALLBACK
+
+/*
+ * When the name service switch calls libresolv, it doesn't want fallback
+ * to /etc/hosts, so we provide a method to turn it off.
+ */
+
+void
+__joy_res_set_no_hosts_fallback(void) {
+ ___mtctxres()->no_hosts_fallback_private = 1;
+}
+
+void
+__joy_res_unset_no_hosts_fallback(void) {
+ ___mtctxres()->no_hosts_fallback_private = 0;
+}
+
+int
+__res_no_hosts_fallback(void) {
+ return (___mtctxres()->no_hosts_fallback_private);
+}
+
+#endif /* SUNW_HOSTS_FALLBACK */
+
+#ifdef SUNW_OVERRIDE_RETRY
+
+/*
+ * The NS switch wants to be able to override the number of retries.
+ */
+
+int
+__joy_res_override_retry(int retry) {
+ ___mtctxres()->retry_private = retry;
+ /*
+ * This function doesn't really need a return value; saving the
+ * old retry setting, and restoring it, is handled by __res_retry()
+ * and __res_retry_reset() below. However, the nss_dns library
+ * must have a private version of this function to be used when
+ * running with an old libresolv. That private nss_dns function
+ * needs a return value, and a function pointer is used to select
+ * the right function at runtime. Thus, __res_override_retry
+ * must have a function prototype consistent with the private
+ * nss_dns function, i.e., one that returns an int.
+ *
+ * Given that we do have a return value, that value must be zero.
+ * That's because retry_private == 0 is used to indicate that
+ * no override retry value is in effect, and the way we expect
+ * nss_dns to call us is:
+ *
+ * int oldretry = __res_override_retry(N);
+ * <whatever>
+ * (void)__res_override_retry(old_retry);
+ */
+ return (0);
+}
+
+int
+__res_retry(int retry) {
+ mtctxres_t *mt = ___mtctxres();
+
+ mt->retry_save = retry;
+ return ((mt->retry_private != 0) ? mt->retry_private : retry);
+}
+
+int
+__res_retry_reset(void) {
+ mtctxres_t *mt = ___mtctxres();
+
+ return (mt->retry_save);
+}
+
+#endif /* SUNW_OVERRIDE_RETRY */
diff --git a/usr/src/lib/libresolv2_joy/common/sunw/sunw_updrec.c b/usr/src/lib/libresolv2_joy/common/sunw/sunw_updrec.c
new file mode 100644
index 0000000000..3b0ffc49df
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/sunw/sunw_updrec.c
@@ -0,0 +1,249 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * As of BIND 8.2.2, ISC (a) removed res_mkupdate(), res_update(), and
+ * res_mkupdrec() from what they consider the supported interface. The
+ * functions still exist, but their calling interface has changed, since
+ * the ns_updrec structure has changed.
+ *
+ * It seems probable that res_mkupdate() etc. will return, though possibly
+ * with other changes, in some future BIND release. In order to avoid
+ * going to PSARC twice (once to remove the functions, and then again to
+ * add them back), we retain the old interface as a wrapper around the
+ * new one.
+ */
+
+#include <port_before.h>
+
+#include <malloc.h>
+#include <strings.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+
+/* get the Solaris ns_updrec before any renaming happens */
+#include <arpa/nameser.h>
+
+/* get the __ISC_ns_updrec */
+#include <res_update.h>
+
+#include <port_after.h>
+
+/* un-rename ns_updrec and res_* functions so we can wrap them */
+#undef ns_updrec
+#undef res_mkupdate
+#undef res_update
+#undef res_mkupdrec
+#undef res_freeupdrec
+#undef res_nmkupdate
+#undef res_nupdate
+
+void res_freeupdrec(ns_updrec *);
+
+static int
+old2new(ns_updrec *old, __ISC_ns_updrec *new) {
+
+ if (old->r_dname != 0) {
+ if ((new->r_dname = strdup(old->r_dname)) == 0)
+ return (-1);
+ } else {
+ new->r_dname = 0;
+ }
+
+ new->r_glink.prev =
+ new->r_glink.next =
+ new->r_link.prev =
+ new->r_link.next = 0;
+
+ new->r_section = old->r_section;
+ new->r_class = old->r_class;
+ new->r_type = old->r_type;
+ new->r_ttl = old->r_ttl;
+ new->r_data = old->r_data;
+ new->r_size = old->r_size;
+ new->r_opcode = old->r_opcode;
+ new->r_dp = old->r_dp;
+ new->r_deldp = old->r_deldp;
+ new->r_zone = old->r_zone;
+
+ return (0);
+}
+
+
+static int
+new2old(__ISC_ns_updrec *new, ns_updrec *old) {
+ /* XXX r_prev and r_next unchanged */
+ if (new->r_dname != 0) {
+ if ((old->r_dname = strdup(new->r_dname)) == 0)
+ return (-1);
+ } else {
+ old->r_dname = 0;
+ }
+ old->r_section = new->r_section;
+ old->r_class = new->r_class;
+ old->r_type = new->r_type;
+ old->r_ttl = new->r_ttl;
+ old->r_data = new->r_data;
+ old->r_size = new->r_size;
+ old->r_opcode = new->r_opcode;
+ old->r_grpnext = 0; /* XXX */
+ old->r_dp = new->r_dp;
+ old->r_deldp = new->r_deldp;
+ old->r_zone = new->r_zone;
+
+ return (0);
+}
+
+
+static void
+delete_list(__ISC_ns_updrec *list) {
+
+ __ISC_ns_updrec *next;
+
+ for (; list != 0; list = next) {
+ next = list->r_link.next;
+ __ISC_res_freeupdrec(list);
+ }
+}
+
+
+static __ISC_ns_updrec *
+copy_list(ns_updrec *old, int do_glink) {
+
+ __ISC_ns_updrec *list = 0, *r, *p;
+
+ if (old == 0)
+ return (0);
+
+ for (p = 0; old != 0; old = old->r_next, p = r) {
+ if ((r = calloc(1, sizeof (*r))) == 0 ||
+ old2new(old, r) != 0) {
+ free(r);
+ delete_list(list);
+ return (0);
+ }
+ r->r_link.prev = p;
+ r->r_link.next = 0;
+ /* res_update and res_nupdate want r_glink set up like this */
+ if (do_glink) {
+ r->r_glink.prev = p;
+ r->r_glink.next = 0;
+ } else {
+ r->r_glink.prev = (void *)-1;
+ r->r_glink.next = (void *)-1;
+ }
+ if (p != 0) {
+ p->r_link.next = r;
+ if (do_glink) {
+ p->r_glink.next = r;
+ }
+ } else {
+ list = r;
+ }
+ }
+ return (list);
+}
+
+
+int
+res_mkupdate(ns_updrec *rrecp_in, uchar_t *buf, int length) {
+
+ __ISC_ns_updrec *r;
+ int ret;
+
+ if ((r = copy_list(rrecp_in, 1)) == 0)
+ return (-1);
+
+ ret = __ISC_res_mkupdate(r, buf, length);
+
+ delete_list(r);
+
+ return (ret);
+}
+
+int
+res_nmkupdate(res_state statp, ns_updrec *rrecp_in, uchar_t *buf, int length) {
+
+ __ISC_ns_updrec *r;
+ int ret;
+
+ if ((r = copy_list(rrecp_in, 1)) == 0)
+ return (-1);
+
+ ret = __ISC_res_nmkupdate(statp, r, buf, length);
+
+ delete_list(r);
+
+ return (ret);
+}
+
+
+int
+res_update(ns_updrec *rrecp_in) {
+
+ __ISC_ns_updrec *r;
+ int ret;
+
+ if ((r = copy_list(rrecp_in, 0)) == 0)
+ return (-1);
+
+ ret = __ISC_res_update(r);
+
+ delete_list(r);
+
+ return (ret);
+}
+
+int
+res_nupdate(res_state statp, ns_updrec *rrecp_in, ns_tsig_key *key) {
+
+ __ISC_ns_updrec *r;
+ int ret;
+
+ if ((r = copy_list(rrecp_in, 0)) == 0)
+ return (-1);
+
+ ret = __ISC_res_nupdate(statp, r, key);
+
+ delete_list(r);
+
+ return (ret);
+}
+
+
+
+ns_updrec *
+res_mkupdrec(int section, const char *dname, uint_t class, uint_t type,
+ uint_t ttl) {
+
+ __ISC_ns_updrec *n;
+ ns_updrec *o;
+
+ n = __ISC_res_mkupdrec(section, dname, class, type, ttl);
+ if (n == 0)
+ return (0);
+
+ if ((o = calloc(1, sizeof (*o))) != 0) {
+ if (new2old(n, o) != 0) {
+ res_freeupdrec(o);
+ o = 0;
+ }
+ }
+
+ __ISC_res_freeupdrec(n);
+
+ return (o);
+}
+
+
+void
+res_freeupdrec(ns_updrec *rrecp) {
+ if (rrecp == 0)
+ return;
+ /* Note: freeing r_dp is the caller's responsibility. */
+ if (rrecp->r_dname != NULL)
+ free(rrecp->r_dname);
+ free(rrecp);
+}
diff --git a/usr/src/lib/libresolv2_joy/common/sunw/sunw_wrappers.c b/usr/src/lib/libresolv2_joy/common/sunw/sunw_wrappers.c
new file mode 100644
index 0000000000..55bbe07024
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/common/sunw/sunw_wrappers.c
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <port_before.h>
+#include <resolv_joy.h>
+#include <arpa/inet.h>
+#include <port_after.h>
+
+#undef p_option
+/* extern const char * isc_p_option(); */
+const char *p_option(uint_t option) {
+ return (isc_p_option((ulong_t)option));
+}
+#pragma weak __p_option = p_option
+
+#undef p_secstodate
+/* extern char * isc_p_secstodate (); */
+char *p_secstodate(uint_t secs) {
+ return (isc_p_secstodate((ulong_t)secs));
+}
+#pragma weak __p_secstodate = p_secstodate
diff --git a/usr/src/lib/libresolv2_joy/i386/Makefile b/usr/src/lib/libresolv2_joy/i386/Makefile
new file mode 100644
index 0000000000..a333224278
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/i386/Makefile
@@ -0,0 +1,30 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/libresolv2_joy/include/Makefile b/usr/src/lib/libresolv2_joy/include/Makefile
new file mode 100644
index 0000000000..8bff3c3188
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/Makefile
@@ -0,0 +1,60 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../../../Makefile.master
+
+HDRS= os_version.h port_ipv6.h
+TMPHDRS= new_os_version.h new_port_ipv6.h
+
+all := TARGET= all
+clean := TARGET= clean
+clobber := TARGET= clobber
+install := TARGET= install
+
+.KEEP_STATE:
+
+all lint: $(HDRS)
+
+install: all
+
+clean:
+ $(RM) $(HDRS) $(TMPHDRS)
+
+clobber: clean
+
+# os_version.h and port_ipv6.h should be rebuilt when you change OS
+# revision. Since that's not easily expressed as a dependency, we
+# rebuild them every time.
+
+os_version.h: make_os_version FRC
+ ./make_os_version
+
+port_ipv6.h: probe_ipv6 FRC
+ CC="$(CC)" ./probe_ipv6
+
+FRC:
diff --git a/usr/src/lib/libresolv2_joy/include/arpa/port_inet.h b/usr/src/lib/libresolv2_joy/include/arpa/port_inet.h
new file mode 100644
index 0000000000..5eb1787f56
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/arpa/port_inet.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ *
+ * All rights reserved.
+ */
+
+#ifndef _ARPA_PORT_INET_H
+#define _ARPA_PORT_INET_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * these are libresolv2 functions that were made local in previous versions
+ * we rename them res_* because they conflict with libnsl or libsocket
+ */
+
+#define inet_lnaof res_inet_lnaof /* libsocket */
+ulong_t inet_lnaof(struct in_addr in);
+
+#define inet_makeaddr res_inet_makeaddr /* libsocket */
+struct in_addr inet_makeaddr(ulong_t net, ulong_t host);
+
+#define inet_netof res_inet_netof /* libnsl */
+ulong_t inet_netof(struct in_addr in);
+
+#define inet_network res_inet_network /* libsocket */
+ulong_t inet_network(register const char *cp);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+
+#endif /* _ARPA_PORT_INET_H */
diff --git a/usr/src/lib/libresolv2_joy/include/arpa/port_nameser.h b/usr/src/lib/libresolv2_joy/include/arpa/port_nameser.h
new file mode 100644
index 0000000000..b40ea0d163
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/arpa/port_nameser.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _ARPA_PORT_NAMESER_H
+#define _ARPA_PORT_NAMESER_H
+
+/*
+ * ISC changed the ns_updrec structure. However, it's a public interface
+ * in Solaris, so we rename it here and wrap in sunw_updrec.c
+ */
+#define ns_updrec __ISC_ns_updrec
+
+
+/*
+ * Due to the above, the following functions need to be renamed and
+ * wrapped in sunw_updrec.c.
+ *
+ * For BIND 8.2.2, ISC removed the dynamic update functions, and the
+ * definition of the ns_updrec structure, from the public include files
+ * (<resolv.h>, <arpa/nameser.h>. However, res_update(), res_mkupdate(),
+ * and res_mkupdrec() are in the public libresolv interface in Solaris,
+ * so we can't easily remove them. Thus, ISC's new versions of res_mkupdate()
+ * etc. can't be exposed under their original names.
+ *
+ * res_nmkupdate() and res_nupdate are new. We could either change them
+ * to accept the <arpa/nameser.h> ns_updrec, or leave them unchanged and
+ * undocumented. Since ISC may change ns_updrec again, we pick the latter
+ * solution for now.
+ */
+#define res_mkupdate __ISC_res_mkupdate
+#define res_update __ISC_res_update
+#define res_mkupdrec __ISC_res_mkupdrec
+#define res_freeupdrec __ISC_res_freeupdrec
+#define res_nmkupdate __ISC_res_nmkupdate
+#define res_nupdate __ISC_res_nupdate
+
+
+#endif /* _ARPA_PORT_NAMESER_H */
diff --git a/usr/src/lib/libresolv2_joy/include/conf/sunoptions.h b/usr/src/lib/libresolv2_joy/include/conf/sunoptions.h
new file mode 100644
index 0000000000..b75ff9d878
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/conf/sunoptions.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SUNOPTIONS_H
+#define _SUNOPTIONS_H
+
+#define USELOOPBACK /* Resolver library defaults to 127.0.0.1 */
+
+/* Additions for Solaris 2 */
+
+#define SUNW_INITCHKIF /* Check if any non-loopback interface is up */
+#define SUNW_CONFCHECK /* Abort quickly if no /etc/resolv.conf or */
+ /* local named */
+#define SUNW_HOSTS_FALLBACK /* Configurable /etc/hosts fallback */
+#define SUNW_HNOK_UNDERSCORE /* Allow underscore in hostnames (libresolv) */
+#define SUNW_MT_RESOLVER /* MT hot extensions (libresolv) */
+#define SUNW_SETHERRNO /* ISC does not set h_errno in gethostbyname */
+#define SUNW_OVERRIDE_RETRY /* Allow NS switch to override res->retry */
+#define SUNW_LIBMD5 /* Use md5(3EXT) instead of internal implementation */
+
+/* If compiling an MT warm libresolv, we also need reentrancy */
+#if defined(SUNW_MT_RESOLVER) && !defined(_REENTRANT)
+#define _REENTRANT
+#endif
+
+/* End additions for Solaris 2 */
+
+#endif /* _SUNOPTIONS_H */
diff --git a/usr/src/lib/libresolv2_joy/include/config.h b/usr/src/lib/libresolv2_joy/include/config.h
new file mode 100644
index 0000000000..35fb115a0f
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/config.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/* config.h. Generated from config.h.in by configure. */
+/* #undef _SOCKADDR_LEN */
+#define HAVE_FCNTL_H 1
+/* #undef HAVE_PATHS_H */
+#define HAVE_INTTYPES_H 1
+#define HAVE_STROPTS_H 1
+/* #undef HAVE_SYS_TIMERS_H */
+#define HAVE_SYS_SELECT_H 1
+#define HAVE_MEMORY_H 1
+/* #undef SYS_CDEFS_H */
+#define _POSIX_PTHREAD_SEMANTICS 1
+#define POSIX_GETPWUID_R 1
+#define POSIX_GETPWNAM_R 1
+#define POSIX_GETGRGID_R 1
+#define POSIX_GETGRNAM_R 1
+#define HAVE_MEMMOVE 1
+#define HAVE_MEMCHR 1
+/* #undef SPRINTF_CHAR */
+/* #undef VSPRINTF_CHAR */
+#define USE_SYSERROR_LIST 1
+/* #undef NEED_STRTOUL */
+/* #undef NEED_SUN4PROTOS */
+/* #undef REENABLE_SEND */
+
+#define NEED_SETGROUPENT 1
+#define NEED_GETGROUPLIST 1
+
+/* define if prototype for getgrnam_r() is required */
+/* #undef NEED_GETGRNAM_R */
+/* #undef NEED_GETGRGID_R */
+/* #undef NEED_GETGRENT_R */
+#define NEED_SETGRENT_R 1
+#define NEED_ENDGRENT_R 1
+
+#define NEED_INNETGR_R 1
+/* #undef NEED_SETNETGRENT_R */
+#define NEED_ENDNETGRENT_R 1
+
+/* #undef NEED_GETPWNAM_R */
+/* #undef NEED_GETPWUID_R */
+#define NEED_SETPWENT_R 1
+#define NEED_SETPASSENT_R 1
+#define NEED_SETPWENT_R 1
+/* #undef NEED_GETPWENT_R */
+#define NEED_ENDPWENT_R 1
+
+#define NEED_SETPASSENT 1
+
+/* #undef HAS_PW_CLASS */
+
+/* #undef ssize_t */
+/* #undef uintptr_t */
+
+/* Shut up warnings about sputaux in stdio.h on BSD/OS pre-4.1 */
+/* #undef SHUTUP_SPUTAUX */
+#ifdef SHUTUP_SPUTAUX
+struct __sFILE;
+extern __inline int __sputaux(int _c, struct __sFILE *_p);
+#endif
+#define BROKEN_IN6ADDR_INIT_MACROS 1
+#define HAVE_STRLCAT 1
+/* Shut up warnings about missing braces */
+/* #undef SHUTUP_MUTEX_INITIALIZER */
+#ifdef SHUTUP_MUTEX_INITIALIZER
+#define LIBBIND_MUTEX_INITIALIZER { PTHREAD_MUTEX_INITIALIZER }
+#else
+#define LIBBIND_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
+#endif
+
diff --git a/usr/src/lib/libresolv2_joy/include/err.h b/usr/src/lib/libresolv2_joy/include/err.h
new file mode 100644
index 0000000000..45992ea336
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/err.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2001 by Sun Microsystems, Inc.
+ * All rights reserved.
+ */
+
+/*-
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)err.h 8.1 (Berkeley) 6/2/93
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifndef _ERR_H_
+#define _ERR_H_
+
+#include <sys/cdefs.h>
+#include <stdarg.h>
+
+__BEGIN_DECLS
+__dead void err __P((int, const char *, ...)) __attribute__((__volatile));
+__dead void verr __P((int, const char *, va_list))
+ __attribute__((__volatile));
+__dead void errx __P((int, const char *, ...)) __attribute__((__volatile));
+__dead void verrx __P((int, const char *, va_list))
+ __attribute__((__volatile));
+void warn __P((const char *, ...));
+void vwarn __P((const char *, va_list));
+void warnx __P((const char *, ...));
+void vwarnx __P((const char *, va_list));
+__END_DECLS
+
+#endif /* !_ERR_H_ */
diff --git a/usr/src/lib/libresolv2_joy/include/fd_setsize.h b/usr/src/lib/libresolv2_joy/include/fd_setsize.h
new file mode 100644
index 0000000000..0e21049742
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/fd_setsize.h
@@ -0,0 +1,10 @@
+#ifndef _FD_SETSIZE_H
+#define _FD_SETSIZE_H
+
+/*%
+ * If you need a bigger FD_SETSIZE, this is NOT the place to set it.
+ * This file is a fallback for BIND ports which don't specify their own.
+ */
+
+#endif /* _FD_SETSIZE_H */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/hesiod.h b/usr/src/lib/libresolv2_joy/include/hesiod.h
new file mode 100644
index 0000000000..d64c0c5e80
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/hesiod.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*! \file
+ * \brief
+ * This file is primarily maintained by <tytso@mit.edu> and <ghudson@mit.edu>.
+ */
+
+/*
+ * $Id: hesiod.h,v 1.4 2005/04/27 04:56:14 sra Exp $
+ */
+
+#ifndef _HESIOD_H_INCLUDED
+#define _HESIOD_H_INCLUDED
+
+int hesiod_init __P((void **));
+void hesiod_end __P((void *));
+char * hesiod_to_bind __P((void *, const char *, const char *));
+char ** hesiod_resolve __P((void *, const char *, const char *));
+void hesiod_free_list __P((void *, char **));
+struct __res_state * __hesiod_res_get __P((void *));
+void __hesiod_res_set __P((void *, struct __res_state *,
+ void (*)(void *)));
+
+#endif /*_HESIOD_H_INCLUDED*/
diff --git a/usr/src/lib/libresolv2_joy/include/irp.h b/usr/src/lib/libresolv2_joy/include/irp.h
new file mode 100644
index 0000000000..1290bd068f
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/irp.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: irp.h,v 1.4 2005/04/27 04:56:15 sra Exp $
+ */
+
+#ifndef _IRP_H_INCLUDED
+#define _IRP_H_INCLUDED
+
+/*! \file */
+
+#define IRPD_TIMEOUT 30 /*%< seconds */
+#define IRPD_MAXSESS 50 /*%< number of simultaneous sessions. */
+#define IRPD_PORT 6660 /*%< 10 times the number of the beast. */
+#define IRPD_PATH "/var/run/irpd" /*%< af_unix socket path */
+
+/* If sets the environment variable IRPDSERVER to an IP address
+ (e.g. "192.5.5.1"), then that's the host the client expects irpd to be
+ running on. */
+#define IRPD_HOST_ENV "IRPDSERVER"
+
+/* Protocol response codes. */
+#define IRPD_WELCOME_CODE 200
+#define IRPD_NOT_WELCOME_CODE 500
+
+#define IRPD_GETHOST_ERROR 510
+#define IRPD_GETHOST_NONE 210
+#define IRPD_GETHOST_OK 211
+#define IRPD_GETHOST_SETOK 212
+
+#define IRPD_GETNET_ERROR 520
+#define IRPD_GETNET_NONE 220
+#define IRPD_GETNET_OK 221
+#define IRPD_GETNET_SETOK 222
+
+#define IRPD_GETUSER_ERROR 530
+#define IRPD_GETUSER_NONE 230
+#define IRPD_GETUSER_OK 231
+#define IRPD_GETUSER_SETOK 232
+
+#define IRPD_GETGROUP_ERROR 540
+#define IRPD_GETGROUP_NONE 240
+#define IRPD_GETGROUP_OK 241
+#define IRPD_GETGROUP_SETOK 242
+
+#define IRPD_GETSERVICE_ERROR 550
+#define IRPD_GETSERVICE_NONE 250
+#define IRPD_GETSERVICE_OK 251
+#define IRPD_GETSERVICE_SETOK 252
+
+#define IRPD_GETPROTO_ERROR 560
+#define IRPD_GETPROTO_NONE 260
+#define IRPD_GETPROTO_OK 261
+#define IRPD_GETPROTO_SETOK 262
+
+#define IRPD_GETNETGR_ERROR 570
+#define IRPD_GETNETGR_NONE 270
+#define IRPD_GETNETGR_OK 271
+#define IRPD_GETNETGR_NOMORE 272
+#define IRPD_GETNETGR_MATCHES 273
+#define IRPD_GETNETGR_NOMATCH 274
+#define IRPD_GETNETGR_SETOK 275
+#define IRPD_GETNETGR_SETERR 276
+
+#define irs_irp_read_body __irs_irp_read_body
+#define irs_irp_read_response __irs_irp_read_response
+#define irs_irp_disconnect __irs_irp_disconnect
+#define irs_irp_connect __irs_irp_connect
+#define irs_irp_connection_setup __irs_irp_connection_setup
+#define irs_irp_send_command __irs_irp_send_command
+
+struct irp_p;
+
+char *irs_irp_read_body(struct irp_p *, size_t *);
+int irs_irp_read_response(struct irp_p *, char *, size_t);
+void irs_irp_disconnect(struct irp_p *);
+int irs_irp_connect(struct irp_p *);
+int irs_irp_is_connected(struct irp_p *);
+int irs_irp_connection_setup(struct irp_p *, int *);
+#ifdef __GNUC__
+int irs_irp_send_command(struct irp_p *, const char *, ...)
+ __attribute__((__format__(__printf__, 2, 3)));
+#else
+int irs_irp_send_command(struct irp_p *, const char *, ...);
+#endif
+int irs_irp_get_full_response(struct irp_p *, int *, char *, size_t,
+ char **, size_t *);
+int irs_irp_read_line(struct irp_p *, char *, int);
+
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/irs.h b/usr/src/lib/libresolv2_joy/include/irs.h
new file mode 100644
index 0000000000..386e3cb3f6
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/irs.h
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: irs.h,v 1.5 2005/04/27 04:56:15 sra Exp $
+ */
+
+#ifndef _IRS_H_INCLUDED
+#define _IRS_H_INCLUDED
+
+/*! \file */
+
+#include <sys/types.h>
+
+#include <arpa/nameser.h>
+
+#include <grp.h>
+#include <netdb.h>
+#include <resolv_joy.h>
+#include <pwd.h>
+
+/*%
+ * This is the group map class.
+ */
+struct irs_gr {
+ void * private;
+ void (*close) __P((struct irs_gr *));
+ struct group * (*next) __P((struct irs_gr *));
+ struct group * (*byname) __P((struct irs_gr *, const char *));
+ struct group * (*bygid) __P((struct irs_gr *, gid_t));
+ int (*list) __P((struct irs_gr *, const char *,
+ gid_t, gid_t *, int *));
+ void (*rewind) __P((struct irs_gr *));
+ void (*minimize) __P((struct irs_gr *));
+ struct __res_state * (*res_get) __P((struct irs_gr *));
+ void (*res_set) __P((struct irs_gr *, res_state,
+ void (*)(void *)));
+};
+
+/*%
+ * This is the password map class.
+ */
+struct irs_pw {
+ void * private;
+ void (*close) __P((struct irs_pw *));
+ struct passwd * (*next) __P((struct irs_pw *));
+ struct passwd * (*byname) __P((struct irs_pw *, const char *));
+ struct passwd * (*byuid) __P((struct irs_pw *, uid_t));
+ void (*rewind) __P((struct irs_pw *));
+ void (*minimize) __P((struct irs_pw *));
+ struct __res_state * (*res_get) __P((struct irs_pw *));
+ void (*res_set) __P((struct irs_pw *, res_state,
+ void (*)(void *)));
+};
+
+/*%
+ * This is the service map class.
+ */
+struct irs_sv {
+ void * private;
+ void (*close) __P((struct irs_sv *));
+ struct servent *(*byname) __P((struct irs_sv *,
+ const char *, const char *));
+ struct servent *(*byport) __P((struct irs_sv *, int, const char *));
+ struct servent *(*next) __P((struct irs_sv *));
+ void (*rewind) __P((struct irs_sv *));
+ void (*minimize) __P((struct irs_sv *));
+ struct __res_state * (*res_get) __P((struct irs_sv *));
+ void (*res_set) __P((struct irs_sv *, res_state,
+ void (*)(void *)));
+};
+
+/*%
+ * This is the protocols map class.
+ */
+struct irs_pr {
+ void * private;
+ void (*close) __P((struct irs_pr *));
+ struct protoent *(*byname) __P((struct irs_pr *, const char *));
+ struct protoent *(*bynumber) __P((struct irs_pr *, int));
+ struct protoent *(*next) __P((struct irs_pr *));
+ void (*rewind) __P((struct irs_pr *));
+ void (*minimize) __P((struct irs_pr *));
+ struct __res_state * (*res_get) __P((struct irs_pr *));
+ void (*res_set) __P((struct irs_pr *, res_state,
+ void (*)(void *)));
+};
+
+/*%
+ * This is the hosts map class.
+ */
+struct irs_ho {
+ void * private;
+ void (*close) __P((struct irs_ho *));
+ struct hostent *(*byname) __P((struct irs_ho *, const char *));
+ struct hostent *(*byname2) __P((struct irs_ho *, const char *, int));
+ struct hostent *(*byaddr) __P((struct irs_ho *,
+ const void *, int, int));
+ struct hostent *(*next) __P((struct irs_ho *));
+ void (*rewind) __P((struct irs_ho *));
+ void (*minimize) __P((struct irs_ho *));
+ struct __res_state * (*res_get) __P((struct irs_ho *));
+ void (*res_set) __P((struct irs_ho *, res_state,
+ void (*)(void *)));
+ struct addrinfo *(*addrinfo) __P((struct irs_ho *, const char *,
+ const struct addrinfo *));
+};
+
+/*%
+ * This is the networks map class.
+ */
+struct irs_nw {
+ void * private;
+ void (*close) __P((struct irs_nw *));
+ struct nwent * (*byname) __P((struct irs_nw *, const char *, int));
+ struct nwent * (*byaddr) __P((struct irs_nw *, void *, int, int));
+ struct nwent * (*next) __P((struct irs_nw *));
+ void (*rewind) __P((struct irs_nw *));
+ void (*minimize) __P((struct irs_nw *));
+ struct __res_state * (*res_get) __P((struct irs_nw *));
+ void (*res_set) __P((struct irs_nw *, res_state,
+ void (*)(void *)));
+};
+
+/*%
+ * This is the netgroups map class.
+ */
+struct irs_ng {
+ void * private;
+ void (*close) __P((struct irs_ng *));
+ int (*next) __P((struct irs_ng *, const char **,
+ const char **, const char **));
+ int (*test) __P((struct irs_ng *, const char *,
+ const char *, const char *,
+ const char *));
+ void (*rewind) __P((struct irs_ng *, const char *));
+ void (*minimize) __P((struct irs_ng *));
+};
+
+/*%
+ * This is the generic map class, which copies the front of all others.
+ */
+struct irs_map {
+ void * private;
+ void (*close) __P((void *));
+};
+
+/*%
+ * This is the accessor class. It contains pointers to all of the
+ * initializers for the map classes for a particular accessor.
+ */
+struct irs_acc {
+ void * private;
+ void (*close) __P((struct irs_acc *));
+ struct irs_gr * (*gr_map) __P((struct irs_acc *));
+ struct irs_pw * (*pw_map) __P((struct irs_acc *));
+ struct irs_sv * (*sv_map) __P((struct irs_acc *));
+ struct irs_pr * (*pr_map) __P((struct irs_acc *));
+ struct irs_ho * (*ho_map) __P((struct irs_acc *));
+ struct irs_nw * (*nw_map) __P((struct irs_acc *));
+ struct irs_ng * (*ng_map) __P((struct irs_acc *));
+ struct __res_state * (*res_get) __P((struct irs_acc *));
+ void (*res_set) __P((struct irs_acc *, res_state,
+ void (*)(void *)));
+};
+
+/*%
+ * This is because the official definition of "struct netent" has no
+ * concept of CIDR even though it allows variant address families (on
+ * output but not input). The compatibility stubs convert the structs
+ * below into "struct netent"'s.
+ */
+struct nwent {
+ char *n_name; /*%< official name of net */
+ char **n_aliases; /*%< alias list */
+ int n_addrtype; /*%< net address type */
+ void *n_addr; /*%< network address */
+ int n_length; /*%< address length, in bits */
+};
+
+/*%
+ * Hide external function names from POSIX.
+ */
+#define irs_gen_acc __irs_gen_acc
+#define irs_lcl_acc __irs_lcl_acc
+#define irs_dns_acc __irs_dns_acc
+#define irs_nis_acc __irs_nis_acc
+#define irs_irp_acc __irs_irp_acc
+#define irs_destroy __irs_destroy
+#define irs_dns_gr __irs_dns_gr
+#define irs_dns_ho __irs_dns_ho
+#define irs_dns_nw __irs_dns_nw
+#define irs_dns_pr __irs_dns_pr
+#define irs_dns_pw __irs_dns_pw
+#define irs_dns_sv __irs_dns_sv
+#define irs_gen_gr __irs_gen_gr
+#define irs_gen_ho __irs_gen_ho
+#define irs_gen_ng __irs_gen_ng
+#define irs_gen_nw __irs_gen_nw
+#define irs_gen_pr __irs_gen_pr
+#define irs_gen_pw __irs_gen_pw
+#define irs_gen_sv __irs_gen_sv
+#define irs_irp_get_full_response __irs_irp_get_full_response
+#define irs_irp_gr __irs_irp_gr
+#define irs_irp_ho __irs_irp_ho
+#define irs_irp_is_connected __irs_irp_is_connected
+#define irs_irp_ng __irs_irp_ng
+#define irs_irp_nw __irs_irp_nw
+#define irs_irp_pr __irs_irp_pr
+#define irs_irp_pw __irs_irp_pw
+#define irs_irp_read_line __irs_irp_read_line
+#define irs_irp_sv __irs_irp_sv
+#define irs_lcl_gr __irs_lcl_gr
+#define irs_lcl_ho __irs_lcl_ho
+#define irs_lcl_ng __irs_lcl_ng
+#define irs_lcl_nw __irs_lcl_nw
+#define irs_lcl_pr __irs_lcl_pr
+#define irs_lcl_pw __irs_lcl_pw
+#define irs_lcl_sv __irs_lcl_sv
+#define irs_nis_gr __irs_nis_gr
+#define irs_nis_ho __irs_nis_ho
+#define irs_nis_ng __irs_nis_ng
+#define irs_nis_nw __irs_nis_nw
+#define irs_nis_pr __irs_nis_pr
+#define irs_nis_pw __irs_nis_pw
+#define irs_nis_sv __irs_nis_sv
+#define net_data_create __net_data_create
+#define net_data_destroy __net_data_destroy
+#define net_data_minimize __net_data_minimize
+
+/*%
+ * Externs.
+ */
+extern struct irs_acc * irs_gen_acc __P((const char *, const char *));
+extern struct irs_acc * irs_lcl_acc __P((const char *));
+extern struct irs_acc * irs_dns_acc __P((const char *));
+extern struct irs_acc * irs_nis_acc __P((const char *));
+extern struct irs_acc * irs_irp_acc __P((const char *));
+
+extern void irs_destroy __P((void));
+
+/*%
+ * These forward declarations are for the semi-private functions in
+ * the get*.c files. Each of these funcs implements the real get*
+ * functionality and the standard versions are just wrappers that
+ * call these. Apart from the wrappers, only irpd is expected to
+ * call these directly, hence these decls are put here and not in
+ * the /usr/include replacements.
+ */
+
+struct net_data; /*%< forward */
+/*
+ * net_data_create gets a singleton net_data object. net_data_init
+ * creates as many net_data objects as times it is called. Clients using
+ * the default interface will use net_data_create by default. Servers will
+ * probably want net_data_init (one call per client)
+ */
+struct net_data *net_data_create __P((const char *));
+struct net_data *net_data_init __P((const char *));
+void net_data_destroy __P((void *));
+
+extern struct group *getgrent_p __P((struct net_data *));
+extern struct group *getgrnam_p __P((const char *, struct net_data *));
+extern struct group *getgrgid_p __P((gid_t, struct net_data *));
+extern int setgroupent_p __P((int, struct net_data *));
+extern void endgrent_p __P((struct net_data *));
+extern int getgrouplist_p __P((const char *, gid_t, gid_t *, int *,
+ struct net_data *));
+
+#ifdef SETGRENT_VOID
+extern void setgrent_p __P((struct net_data *));
+#else
+extern int setgrent_p __P((struct net_data *));
+#endif
+
+extern struct hostent *gethostbyname_p __P((const char *,
+ struct net_data *));
+extern struct hostent *gethostbyname2_p __P((const char *, int,
+ struct net_data *));
+extern struct hostent *gethostbyaddr_p __P((const char *, int, int,
+ struct net_data *));
+extern struct hostent *gethostent_p __P((struct net_data *));
+extern void sethostent_p __P((int, struct net_data *));
+extern void endhostent_p __P((struct net_data *));
+extern struct hostent *getipnodebyname_p __P((const char *, int, int, int *,
+ struct net_data *));
+extern struct hostent *getipnodebyaddr_p __P((const void *, size_t,
+ int, int *, struct net_data *));
+
+extern struct netent *getnetent_p __P((struct net_data *));
+extern struct netent *getnetbyname_p __P((const char *, struct net_data *));
+extern struct netent *getnetbyaddr_p __P((unsigned long, int,
+ struct net_data *));
+extern void setnetent_p __P((int, struct net_data *));
+extern void endnetent_p __P((struct net_data *));
+
+extern void setnetgrent_p __P((const char *, struct net_data *));
+extern void endnetgrent_p __P((struct net_data *));
+extern int innetgr_p __P((const char *, const char *, const char *,
+ const char *, struct net_data *));
+extern int getnetgrent_p __P((const char **, const char **,
+ const char **, struct net_data *));
+
+extern struct protoent *getprotoent_p __P((struct net_data *));
+extern struct protoent *getprotobyname_p __P((const char *,
+ struct net_data *));
+extern struct protoent *getprotobynumber_p __P((int, struct net_data *));
+extern void setprotoent_p __P((int, struct net_data *));
+extern void endprotoent_p __P((struct net_data *));
+
+
+extern struct passwd *getpwent_p __P((struct net_data *));
+extern struct passwd *getpwnam_p __P((const char *, struct net_data *));
+extern struct passwd *getpwuid_p __P((uid_t, struct net_data *));
+extern int setpassent_p __P((int, struct net_data *));
+extern void endpwent_p __P((struct net_data *));
+
+#ifdef SETPWENT_VOID
+extern void setpwent_p __P((struct net_data *));
+#else
+extern int setpwent_p __P((struct net_data *));
+#endif
+
+extern struct servent *getservent_p __P((struct net_data *));
+extern struct servent *getservbyname_p __P((const char *, const char *,
+ struct net_data *));
+extern struct servent *getservbyport_p __P((int, const char *,
+ struct net_data *));
+extern void setservent_p __P((int, struct net_data *));
+extern void endservent_p __P((struct net_data *));
+
+#endif /*_IRS_H_INCLUDED*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/isc/assertions.h b/usr/src/lib/libresolv2_joy/include/isc/assertions.h
new file mode 100644
index 0000000000..68925e73b3
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/isc/assertions.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 1997-2001 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: assertions.h,v 1.5 2008/11/14 02:36:51 marka Exp $
+ */
+
+#ifndef ASSERTIONS_H
+#define ASSERTIONS_H 1
+
+typedef enum {
+ assert_require, assert_ensure, assert_insist, assert_invariant
+} assertion_type;
+
+typedef void (*assertion_failure_callback)(const char *, int, assertion_type,
+ const char *, int);
+
+/* coverity[+kill] */
+extern assertion_failure_callback __assertion_failed;
+void set_assertion_failure_callback(assertion_failure_callback f);
+const char *assertion_type_to_text(assertion_type type);
+
+#if defined(CHECK_ALL) || defined(__COVERITY__)
+#define CHECK_REQUIRE 1
+#define CHECK_ENSURE 1
+#define CHECK_INSIST 1
+#define CHECK_INVARIANT 1
+#endif
+
+#if defined(CHECK_NONE) && !defined(__COVERITY__)
+#define CHECK_REQUIRE 0
+#define CHECK_ENSURE 0
+#define CHECK_INSIST 0
+#define CHECK_INVARIANT 0
+#endif
+
+#ifndef CHECK_REQUIRE
+#define CHECK_REQUIRE 1
+#endif
+
+#ifndef CHECK_ENSURE
+#define CHECK_ENSURE 1
+#endif
+
+#ifndef CHECK_INSIST
+#define CHECK_INSIST 1
+#endif
+
+#ifndef CHECK_INVARIANT
+#define CHECK_INVARIANT 1
+#endif
+
+#if CHECK_REQUIRE != 0
+#define REQUIRE(cond) \
+ ((void) ((cond) || \
+ ((__assertion_failed)(__FILE__, __LINE__, assert_require, \
+ #cond, 0), 0)))
+#define REQUIRE_ERR(cond) \
+ ((void) ((cond) || \
+ ((__assertion_failed)(__FILE__, __LINE__, assert_require, \
+ #cond, 1), 0)))
+#else
+#define REQUIRE(cond) ((void) (cond))
+#define REQUIRE_ERR(cond) ((void) (cond))
+#endif /* CHECK_REQUIRE */
+
+#if CHECK_ENSURE != 0
+#define ENSURE(cond) \
+ ((void) ((cond) || \
+ ((__assertion_failed)(__FILE__, __LINE__, assert_ensure, \
+ #cond, 0), 0)))
+#define ENSURE_ERR(cond) \
+ ((void) ((cond) || \
+ ((__assertion_failed)(__FILE__, __LINE__, assert_ensure, \
+ #cond, 1), 0)))
+#else
+#define ENSURE(cond) ((void) (cond))
+#define ENSURE_ERR(cond) ((void) (cond))
+#endif /* CHECK_ENSURE */
+
+#if CHECK_INSIST != 0
+#define INSIST(cond) \
+ ((void) ((cond) || \
+ ((__assertion_failed)(__FILE__, __LINE__, assert_insist, \
+ #cond, 0), 0)))
+#define INSIST_ERR(cond) \
+ ((void) ((cond) || \
+ ((__assertion_failed)(__FILE__, __LINE__, assert_insist, \
+ #cond, 1), 0)))
+#else
+#define INSIST(cond) ((void) (cond))
+#define INSIST_ERR(cond) ((void) (cond))
+#endif /* CHECK_INSIST */
+
+#if CHECK_INVARIANT != 0
+#define INVARIANT(cond) \
+ ((void) ((cond) || \
+ ((__assertion_failed)(__FILE__, __LINE__, assert_invariant, \
+ #cond, 0), 0)))
+#define INVARIANT_ERR(cond) \
+ ((void) ((cond) || \
+ ((__assertion_failed)(__FILE__, __LINE__, assert_invariant, \
+ #cond, 1), 0)))
+#else
+#define INVARIANT(cond) ((void) (cond))
+#define INVARIANT_ERR(cond) ((void) (cond))
+#endif /* CHECK_INVARIANT */
+#endif /* ASSERTIONS_H */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/isc/ctl.h b/usr/src/lib/libresolv2_joy/include/isc/ctl.h
new file mode 100644
index 0000000000..e2ba20201d
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/isc/ctl.h
@@ -0,0 +1,112 @@
+#ifndef ISC_CTL_H
+#define ISC_CTL_H
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1998,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: ctl.h,v 1.5 2005/04/27 04:56:17 sra Exp $
+ */
+
+/*! \file */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <isc/eventlib.h>
+
+/* Macros. */
+
+#define CTL_MORE 0x0001 /*%< More will be / should be sent. */
+#define CTL_EXIT 0x0002 /*%< Close connection after this. */
+#define CTL_DATA 0x0004 /*%< Go into / this is DATA mode. */
+/* Types. */
+
+struct ctl_cctx;
+struct ctl_sctx;
+struct ctl_sess;
+struct ctl_verb;
+
+enum ctl_severity { ctl_debug, ctl_warning, ctl_error };
+
+typedef void (*ctl_logfunc)(enum ctl_severity, const char *, ...);
+
+typedef void (*ctl_verbfunc)(struct ctl_sctx *, struct ctl_sess *,
+ const struct ctl_verb *, const char *,
+ u_int, const void *, void *);
+
+typedef void (*ctl_srvrdone)(struct ctl_sctx *, struct ctl_sess *, void *);
+
+typedef void (*ctl_clntdone)(struct ctl_cctx *, void *, const char *, u_int);
+
+struct ctl_verb {
+ const char * name;
+ ctl_verbfunc func;
+ const char * help;
+};
+
+/* General symbols. */
+
+#define ctl_logger __ctl_logger
+
+#ifdef __GNUC__
+void ctl_logger(enum ctl_severity, const char *, ...)
+ __attribute__((__format__(__printf__, 2, 3)));
+#else
+void ctl_logger(enum ctl_severity, const char *, ...);
+#endif
+
+/* Client symbols. */
+
+#define ctl_client __ctl_client
+#define ctl_endclient __ctl_endclient
+#define ctl_command __ctl_command
+
+struct ctl_cctx * ctl_client(evContext, const struct sockaddr *, size_t,
+ const struct sockaddr *, size_t,
+ ctl_clntdone, void *,
+ u_int, ctl_logfunc);
+void ctl_endclient(struct ctl_cctx *);
+int ctl_command(struct ctl_cctx *, const char *, size_t,
+ ctl_clntdone, void *);
+
+/* Server symbols. */
+
+#define ctl_server __ctl_server
+#define ctl_endserver __ctl_endserver
+#define ctl_response __ctl_response
+#define ctl_sendhelp __ctl_sendhelp
+#define ctl_getcsctx __ctl_getcsctx
+#define ctl_setcsctx __ctl_setcsctx
+
+struct ctl_sctx * ctl_server(evContext, const struct sockaddr *, size_t,
+ const struct ctl_verb *,
+ u_int, u_int,
+ u_int, int, int,
+ ctl_logfunc, void *);
+void ctl_endserver(struct ctl_sctx *);
+void ctl_response(struct ctl_sess *, u_int,
+ const char *, u_int, const void *,
+ ctl_srvrdone, void *,
+ const char *, size_t);
+void ctl_sendhelp(struct ctl_sess *, u_int);
+void * ctl_getcsctx(struct ctl_sess *);
+void * ctl_setcsctx(struct ctl_sess *, void *);
+
+#endif /*ISC_CTL_H*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/isc/dst.h b/usr/src/lib/libresolv2_joy/include/isc/dst.h
new file mode 100644
index 0000000000..90a9e67468
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/isc/dst.h
@@ -0,0 +1,168 @@
+#ifndef DST_H
+#define DST_H
+
+#ifndef HAS_DST_KEY
+typedef struct dst_key {
+ char *dk_key_name; /*%< name of the key */
+ int dk_key_size; /*%< this is the size of the key in bits */
+ int dk_proto; /*%< what protocols this key can be used for */
+ int dk_alg; /*%< algorithm number from key record */
+ u_int32_t dk_flags; /*%< and the flags of the public key */
+ u_int16_t dk_id; /*%< identifier of the key */
+} DST_KEY;
+#endif /* HAS_DST_KEY */
+/*
+ * do not taint namespace
+ */
+#define dst_bsafe_init __dst_bsafe_init
+#define dst_buffer_to_key __dst_buffer_to_key
+#define dst_check_algorithm __dst_check_algorithm
+#define dst_compare_keys __dst_compare_keys
+#define dst_cylink_init __dst_cylink_init
+#define dst_dnskey_to_key __dst_dnskey_to_key
+#define dst_eay_dss_init __dst_eay_dss_init
+#define dst_free_key __dst_free_key
+#define dst_generate_key __dst_generate_key
+#define dst_hmac_md5_init __dst_hmac_md5_init
+#define dst_init __dst_init
+#define dst_key_to_buffer __dst_key_to_buffer
+#define dst_key_to_dnskey __dst_key_to_dnskey
+#define dst_read_key __dst_read_key
+#define dst_rsaref_init __dst_rsaref_init
+#define dst_s_build_filename __dst_s_build_filename
+#define dst_s_calculate_bits __dst_s_calculate_bits
+#define dst_s_conv_bignum_b64_to_u8 __dst_s_conv_bignum_b64_to_u8
+#define dst_s_conv_bignum_u8_to_b64 __dst_s_conv_bignum_u8_to_b64
+#define dst_s_dns_key_id __dst_s_dns_key_id
+#define dst_s_dump __dst_s_dump
+#define dst_s_filename_length __dst_s_filename_length
+#define dst_s_fopen __dst_s_fopen
+#define dst_s_get_int16 __dst_s_get_int16
+#define dst_s_get_int32 __dst_s_get_int32
+#define dst_s_id_calc __dst_s_id_calc
+#define dst_s_put_int16 __dst_s_put_int16
+#define dst_s_put_int32 __dst_s_put_int32
+#define dst_s_quick_random __dst_s_quick_random
+#define dst_s_quick_random_set __dst_s_quick_random_set
+#define dst_s_random __dst_s_random
+#define dst_s_semi_random __dst_s_semi_random
+#define dst_s_verify_str __dst_s_verify_str
+#define dst_sig_size __dst_sig_size
+#define dst_sign_data __dst_sign_data
+#define dst_verify_data __dst_verify_data
+#define dst_write_key __dst_write_key
+
+/*
+ * DST Crypto API defintions
+ */
+void dst_init(void);
+int dst_check_algorithm(const int);
+
+
+int dst_sign_data(const int, /*!< specifies INIT/UPDATE/FINAL/ALL */
+ DST_KEY *, /*!< the key to use */
+ void **, /*!< pointer to state structure */
+ const u_char *, /*!< data to be signed */
+ const int, /*!< length of input data */
+ u_char *, /*!< buffer to write signature to */
+ const int); /*!< size of output buffer */
+int dst_verify_data(const int, /*!< specifies INIT/UPDATE/FINAL/ALL */
+ DST_KEY *, /*!< the key to use */
+ void **, /*!< pointer to state structure */
+ const u_char *, /*!< data to be verified */
+ const int, /*!< length of input data */
+ const u_char *, /*!< buffer containing signature */
+ const int); /*!< length of signature */
+DST_KEY *dst_read_key(const char *, /*!< name of key */
+ const u_int16_t, /*!< key tag identifier */
+ const int, /*!< key algorithm */
+ const int); /*!< Private/PublicKey wanted */
+int dst_write_key(const DST_KEY *, /*!< key to write out */
+ const int); /*!< Public/Private */
+DST_KEY *dst_dnskey_to_key(const char *, /*!< KEY record name */
+ const u_char *, /*!< KEY RDATA */
+ const int); /*!< size of input buffer */
+int dst_key_to_dnskey(const DST_KEY *, /*!< key to translate */
+ u_char *, /*!< output buffer */
+ const int); /*!< size of out_storage */
+DST_KEY *dst_buffer_to_key(const char *, /*!< name of the key */
+ const int, /*!< algorithm */
+ const int, /*!< dns flags */
+ const int, /*!< dns protocol */
+ const u_char *, /*!< key in dns wire fmt */
+ const int); /*!< size of key */
+int dst_key_to_buffer(DST_KEY *, u_char *, int);
+
+DST_KEY *dst_generate_key(const char *, /*!< name of new key */
+ const int, /*!< key algorithm to generate */
+ const int, /*!< size of new key */
+ const int, /*!< alg dependent parameter */
+ const int, /*!< key DNS flags */
+ const int); /*!< key DNS protocol */
+DST_KEY *dst_free_key(DST_KEY *);
+int dst_compare_keys(const DST_KEY *, const DST_KEY *);
+
+int dst_sig_size(DST_KEY *);
+
+
+/* support for dns key tags/ids */
+u_int16_t dst_s_dns_key_id(const u_char *, const int);
+u_int16_t dst_s_id_calc(const u_char *, const int);
+
+/* Used by callers as well as by the library. */
+#define RAW_KEY_SIZE 8192 /*%< large enough to store any key */
+/* DST_API control flags */
+/* These are used used in functions dst_sign_data and dst_verify_data */
+#define SIG_MODE_INIT 1 /*%< initialize digest */
+#define SIG_MODE_UPDATE 2 /*%< add data to digest */
+#define SIG_MODE_FINAL 4 /*%< generate/verify signature */
+#define SIG_MODE_ALL (SIG_MODE_INIT|SIG_MODE_UPDATE|SIG_MODE_FINAL)
+
+/* Flags for dst_read_private_key() */
+#define DST_FORCE_READ 0x1000000
+#define DST_CAN_SIGN 0x010F
+#define DST_NO_AUTHEN 0x8000
+#define DST_EXTEND_FLAG 0x1000
+#define DST_STANDARD 0
+#define DST_PRIVATE 0x2000000
+#define DST_PUBLIC 0x4000000
+#define DST_RAND_SEMI 1
+#define DST_RAND_STD 2
+#define DST_RAND_KEY 3
+#define DST_RAND_DSS 4
+
+
+/* DST algorithm codes */
+#define KEY_RSA 1
+#define KEY_DH 2
+#define KEY_DSA 3
+#define KEY_PRIVATE 254
+#define KEY_EXPAND 255
+#define KEY_HMAC_MD5 157
+#define KEY_HMAC_SHA1 158
+#define UNKNOWN_KEYALG 0
+#define DST_MAX_ALGS KEY_HMAC_SHA1
+
+/* DST constants to locations in KEY record changes in new KEY record */
+#define DST_FLAGS_SIZE 2
+#define DST_KEY_PROT 2
+#define DST_KEY_ALG 3
+#define DST_EXT_FLAG 4
+#define DST_KEY_START 4
+
+#ifndef SIGN_F_NOKEY
+#define SIGN_F_NOKEY 0xC000
+#endif
+
+/* error codes from dst routines */
+#define SIGN_INIT_FAILURE (-23)
+#define SIGN_UPDATE_FAILURE (-24)
+#define SIGN_FINAL_FAILURE (-25)
+#define VERIFY_INIT_FAILURE (-26)
+#define VERIFY_UPDATE_FAILURE (-27)
+#define VERIFY_FINAL_FAILURE (-28)
+#define MISSING_KEY_OR_SIGNATURE (-30)
+#define UNSUPPORTED_KEYALG (-31)
+
+#endif /* DST_H */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/isc/eventlib.h b/usr/src/lib/libresolv2_joy/include/isc/eventlib.h
new file mode 100644
index 0000000000..a4cfdf9092
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/isc/eventlib.h
@@ -0,0 +1,206 @@
+/*
+ * Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 1995-1999, 2001, 2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* eventlib.h - exported interfaces for eventlib
+ * vix 09sep95 [initial]
+ *
+ * $Id: eventlib.h,v 1.7 2008/11/14 02:36:51 marka Exp $
+ */
+
+#ifndef _EVENTLIB_H
+#define _EVENTLIB_H
+
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/time.h>
+#include <stdio.h>
+
+#include <isc/platform.h>
+
+#ifndef __P
+# define __EVENTLIB_P_DEFINED
+# ifdef __STDC__
+# define __P(x) x
+# else
+# define __P(x) ()
+# endif
+#endif
+
+/* In the absence of branded types... */
+typedef struct { void *opaque; } evConnID;
+typedef struct { void *opaque; } evFileID;
+typedef struct { void *opaque; } evStreamID;
+typedef struct { void *opaque; } evTimerID;
+typedef struct { void *opaque; } evWaitID;
+typedef struct { void *opaque; } evContext;
+typedef struct { void *opaque; } evEvent;
+
+#define evInitID(id) ((id)->opaque = NULL)
+#define evTestID(id) ((id).opaque != NULL)
+
+typedef void (*evConnFunc)__P((evContext, void *, int, const void *, int,
+ const void *, int));
+typedef void (*evFileFunc)__P((evContext, void *, int, int));
+typedef void (*evStreamFunc)__P((evContext, void *, int, int));
+typedef void (*evTimerFunc)__P((evContext, void *,
+ struct timespec, struct timespec));
+typedef void (*evWaitFunc)__P((evContext, void *, const void *));
+
+typedef struct { unsigned char mask[256/8]; } evByteMask;
+#define EV_BYTEMASK_BYTE(b) ((b) / 8)
+#define EV_BYTEMASK_MASK(b) (1 << ((b) % 8))
+#define EV_BYTEMASK_SET(bm, b) \
+ ((bm).mask[EV_BYTEMASK_BYTE(b)] |= EV_BYTEMASK_MASK(b))
+#define EV_BYTEMASK_CLR(bm, b) \
+ ((bm).mask[EV_BYTEMASK_BYTE(b)] &= ~EV_BYTEMASK_MASK(b))
+#define EV_BYTEMASK_TST(bm, b) \
+ ((bm).mask[EV_BYTEMASK_BYTE(b)] & EV_BYTEMASK_MASK(b))
+
+#define EV_POLL 1
+#define EV_WAIT 2
+#define EV_NULL 4
+
+#define EV_READ 1
+#define EV_WRITE 2
+#define EV_EXCEPT 4
+
+#define EV_WASNONBLOCKING 8 /* Internal library use. */
+
+/* eventlib.c */
+#define evCreate __evCreate
+#define evSetDebug __evSetDebug
+#define evDestroy __evDestroy
+#define evGetNext __evGetNext
+#define evDispatch __evDispatch
+#define evDrop __evDrop
+#define evMainLoop __evMainLoop
+#define evHighestFD __evHighestFD
+#define evGetOption __evGetOption
+#define evSetOption __evSetOption
+
+int evCreate __P((evContext *));
+void evSetDebug __P((evContext, int, FILE *));
+int evDestroy __P((evContext));
+int evGetNext __P((evContext, evEvent *, int));
+int evDispatch __P((evContext, evEvent));
+void evDrop __P((evContext, evEvent));
+int evMainLoop __P((evContext));
+int evHighestFD __P((evContext));
+int evGetOption __P((evContext *, const char *, int *));
+int evSetOption __P((evContext *, const char *, int));
+
+/* ev_connects.c */
+#define evListen __evListen
+#define evConnect __evConnect
+#define evCancelConn __evCancelConn
+#define evHold __evHold
+#define evUnhold __evUnhold
+#define evTryAccept __evTryAccept
+
+int evListen __P((evContext, int, int, evConnFunc, void *, evConnID *));
+int evConnect __P((evContext, int, const void *, int,
+ evConnFunc, void *, evConnID *));
+int evCancelConn __P((evContext, evConnID));
+int evHold __P((evContext, evConnID));
+int evUnhold __P((evContext, evConnID));
+int evTryAccept __P((evContext, evConnID, int *));
+
+/* ev_files.c */
+#define evSelectFD __evSelectFD
+#define evDeselectFD __evDeselectFD
+
+int evSelectFD __P((evContext, int, int, evFileFunc, void *, evFileID *));
+int evDeselectFD __P((evContext, evFileID));
+
+/* ev_streams.c */
+#define evConsIovec __evConsIovec
+#define evWrite __evWrite
+#define evRead __evRead
+#define evTimeRW __evTimeRW
+#define evUntimeRW __evUntimeRW
+#define evCancelRW __evCancelRW
+
+struct iovec evConsIovec __P((void *, size_t));
+int evWrite __P((evContext, int, const struct iovec *, int,
+ evStreamFunc func, void *, evStreamID *));
+int evRead __P((evContext, int, const struct iovec *, int,
+ evStreamFunc func, void *, evStreamID *));
+int evTimeRW __P((evContext, evStreamID, evTimerID timer));
+int evUntimeRW __P((evContext, evStreamID));
+int evCancelRW __P((evContext, evStreamID));
+
+/* ev_timers.c */
+#define evConsTime __evConsTime
+#define evAddTime __evAddTime
+#define evSubTime __evSubTime
+#define evCmpTime __evCmpTime
+#define evTimeSpec __evTimeSpec
+#define evTimeVal __evTimeVal
+
+#define evNowTime __evNowTime
+#define evUTCTime __evUTCTime
+#define evLastEventTime __evLastEventTime
+#define evSetTimer __evSetTimer
+#define evClearTimer __evClearTimer
+#define evConfigTimer __evConfigTimer
+#define evResetTimer __evResetTimer
+#define evSetIdleTimer __evSetIdleTimer
+#define evClearIdleTimer __evClearIdleTimer
+#define evResetIdleTimer __evResetIdleTimer
+#define evTouchIdleTimer __evTouchIdleTimer
+
+struct timespec evConsTime __P((time_t sec, long nsec));
+struct timespec evAddTime __P((struct timespec, struct timespec));
+struct timespec evSubTime __P((struct timespec, struct timespec));
+struct timespec evNowTime __P((void));
+struct timespec evUTCTime __P((void));
+struct timespec evLastEventTime __P((evContext));
+struct timespec evTimeSpec __P((struct timeval));
+struct timeval evTimeVal __P((struct timespec));
+int evCmpTime __P((struct timespec, struct timespec));
+int evSetTimer __P((evContext, evTimerFunc, void *, struct timespec,
+ struct timespec, evTimerID *));
+int evClearTimer __P((evContext, evTimerID));
+int evConfigTimer __P((evContext, evTimerID, const char *param,
+ int value));
+int evResetTimer __P((evContext, evTimerID, evTimerFunc, void *,
+ struct timespec, struct timespec));
+int evSetIdleTimer __P((evContext, evTimerFunc, void *, struct timespec,
+ evTimerID *));
+int evClearIdleTimer __P((evContext, evTimerID));
+int evResetIdleTimer __P((evContext, evTimerID, evTimerFunc, void *,
+ struct timespec));
+int evTouchIdleTimer __P((evContext, evTimerID));
+
+/* ev_waits.c */
+#define evWaitFor __evWaitFor
+#define evDo __evDo
+#define evUnwait __evUnwait
+#define evDefer __evDefer
+
+int evWaitFor __P((evContext, const void *, evWaitFunc, void *, evWaitID *));
+int evDo __P((evContext, const void *));
+int evUnwait __P((evContext, evWaitID));
+int evDefer __P((evContext, evWaitFunc, void *));
+
+#ifdef __EVENTLIB_P_DEFINED
+# undef __P
+#endif
+
+#endif /*_EVENTLIB_H*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/isc/heap.h b/usr/src/lib/libresolv2_joy/include/isc/heap.h
new file mode 100644
index 0000000000..384d507cf5
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/isc/heap.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1997,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+typedef int (*heap_higher_priority_func)(void *, void *);
+typedef void (*heap_index_func)(void *, int);
+typedef void (*heap_for_each_func)(void *, void *);
+
+typedef struct heap_context {
+ int array_size;
+ int array_size_increment;
+ int heap_size;
+ void **heap;
+ heap_higher_priority_func higher_priority;
+ heap_index_func index;
+} *heap_context;
+
+#define heap_new __heap_new
+#define heap_free __heap_free
+#define heap_insert __heap_insert
+#define heap_delete __heap_delete
+#define heap_increased __heap_increased
+#define heap_decreased __heap_decreased
+#define heap_element __heap_element
+#define heap_for_each __heap_for_each
+
+heap_context heap_new(heap_higher_priority_func, heap_index_func, int);
+int heap_free(heap_context);
+int heap_insert(heap_context, void *);
+int heap_delete(heap_context, int);
+int heap_increased(heap_context, int);
+int heap_decreased(heap_context, int);
+void * heap_element(heap_context, int);
+int heap_for_each(heap_context, heap_for_each_func, void *);
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/isc/irpmarshall.h b/usr/src/lib/libresolv2_joy/include/isc/irpmarshall.h
new file mode 100644
index 0000000000..244b3e3460
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/isc/irpmarshall.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: irpmarshall.h,v 1.4 2005/04/27 04:56:17 sra Exp $
+ */
+
+#ifndef _IRPMARSHALL_H_INCLUDED
+#define _IRPMARSHALL_H_INCLUDED
+
+/* Hide function names */
+#define irp_marshall_gr __irp_marshall_gr
+#define irp_marshall_ho __irp_marshall_ho
+#define irp_marshall_ne __irp_marshall_ne
+#define irp_marshall_ng __irp_marshall_ng
+#define irp_marshall_nw __irp_marshall_nw
+#define irp_marshall_pr __irp_marshall_pr
+#define irp_marshall_pw __irp_marshall_pw
+#define irp_marshall_sv __irp_marshall_sv
+#define irp_unmarshall_gr __irp_unmarshall_gr
+#define irp_unmarshall_ho __irp_unmarshall_ho
+#define irp_unmarshall_ne __irp_unmarshall_ne
+#define irp_unmarshall_ng __irp_unmarshall_ng
+#define irp_unmarshall_nw __irp_unmarshall_nw
+#define irp_unmarshall_pr __irp_unmarshall_pr
+#define irp_unmarshall_pw __irp_unmarshall_pw
+#define irp_unmarshall_sv __irp_unmarshall_sv
+
+#define MAXPADDRSIZE (sizeof "255.255.255.255" + 1)
+#define ADDR_T_STR(x) (x == AF_INET ? "AF_INET" :\
+ (x == AF_INET6 ? "AF_INET6" : "UNKNOWN"))
+
+/* See comment below on usage */
+int irp_marshall_pw(const struct passwd *, char **, size_t *);
+int irp_unmarshall_pw(struct passwd *, char *);
+int irp_marshall_gr(const struct group *, char **, size_t *);
+int irp_unmarshall_gr(struct group *, char *);
+int irp_marshall_sv(const struct servent *, char **, size_t *);
+int irp_unmarshall_sv(struct servent *, char *);
+int irp_marshall_pr(struct protoent *, char **, size_t *);
+int irp_unmarshall_pr(struct protoent *, char *);
+int irp_marshall_ho(struct hostent *, char **, size_t *);
+int irp_unmarshall_ho(struct hostent *, char *);
+int irp_marshall_ng(const char *, const char *, const char *,
+ char **, size_t *);
+int irp_unmarshall_ng(const char **, const char **, const char **, char *);
+int irp_marshall_nw(struct nwent *, char **, size_t *);
+int irp_unmarshall_nw(struct nwent *, char *);
+int irp_marshall_ne(struct netent *, char **, size_t *);
+int irp_unmarshall_ne(struct netent *, char *);
+
+/*! \file
+ * \brief
+ * Functions to marshall and unmarshall various system data structures. We
+ * use a printable ascii format that is as close to various system config
+ * files as reasonable (e.g. /etc/passwd format).
+ *
+ * We are not forgiving with unmarhsalling misformatted buffers. In
+ * particular whitespace in fields is not ignored. So a formatted password
+ * entry "brister :1364:100:...." will yield a username of "brister "
+ *
+ * We potentially do a lot of mallocs to fill fields that are of type
+ * (char **) like a hostent h_addr field. Building (for example) the
+ * h_addr field and its associated addresses all in one buffer is
+ * certainly possible, but not done here.
+ *
+ * The following description is true for all the marshalling functions:
+ *
+ * int irp_marshall_XX(struct yyyy *XX, char **buffer, size_t *len);
+ *
+ * The argument XX (of type struct passwd for example) is marshalled in the
+ * buffer pointed at by *BUFFER, which is of length *LEN. Returns 0
+ * on success and -1 on failure. Failure will occur if *LEN is
+ * smaller than needed.
+ *
+ * If BUFFER is NULL, then *LEN is set to the size of the buffer
+ * needed to marshall the data and no marshalling is actually done.
+ *
+ * If *BUFFER is NULL, then a buffer large enough will be allocated
+ * with memget() and the size allocated will be stored in *LEN. An extra 2
+ * bytes will be allocated for the client to append CRLF if wanted. The
+ * value of *LEN will include these two bytes.
+ *
+ * All the marshalling functions produce a buffer with the fields
+ * separated by colons (except for the hostent marshalling, which uses '@'
+ * to separate fields). Fields that have multiple subfields (like the
+ * gr_mem field in struct group) have their subparts separated by
+ * commas.
+ *
+ * int irp_unmarshall_XX(struct YYYYY *XX, char *buffer);
+ *
+ * The unmashalling functions break apart the buffer and store the
+ * values in the struct pointed to by XX. All pointer values inside
+ * XX are allocated with malloc. All arrays of pointers have a NULL
+ * as the last element.
+ */
+
+#endif
diff --git a/usr/src/lib/libresolv2_joy/include/isc/list.h b/usr/src/lib/libresolv2_joy/include/isc/list.h
new file mode 100644
index 0000000000..5fe9031141
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/isc/list.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1997,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef LIST_H
+#define LIST_H 1
+#include <isc/assertions.h>
+
+#define LIST(type) struct { type *head, *tail; }
+#define INIT_LIST(list) \
+ do { (list).head = NULL; (list).tail = NULL; } while (0)
+
+#define LINK(type) struct { type *prev, *next; }
+#define INIT_LINK_TYPE(elt, link, type) \
+ do { \
+ (elt)->link.prev = (type *)(-1); \
+ (elt)->link.next = (type *)(-1); \
+ } while (0)
+#define INIT_LINK(elt, link) \
+ INIT_LINK_TYPE(elt, link, void)
+#define LINKED(elt, link) ((void *)((elt)->link.prev) != (void *)(-1) && \
+ (void *)((elt)->link.next) != (void *)(-1))
+
+#define HEAD(list) ((list).head)
+#define TAIL(list) ((list).tail)
+#define EMPTY(list) ((list).head == NULL)
+
+#define PREPEND(list, elt, link) \
+ do { \
+ INSIST(!LINKED(elt, link));\
+ if ((list).head != NULL) \
+ (list).head->link.prev = (elt); \
+ else \
+ (list).tail = (elt); \
+ (elt)->link.prev = NULL; \
+ (elt)->link.next = (list).head; \
+ (list).head = (elt); \
+ } while (0)
+
+#define APPEND(list, elt, link) \
+ do { \
+ INSIST(!LINKED(elt, link));\
+ if ((list).tail != NULL) \
+ (list).tail->link.next = (elt); \
+ else \
+ (list).head = (elt); \
+ (elt)->link.prev = (list).tail; \
+ (elt)->link.next = NULL; \
+ (list).tail = (elt); \
+ } while (0)
+
+#define UNLINK_TYPE(list, elt, link, type) \
+ do { \
+ INSIST(LINKED(elt, link));\
+ if ((elt)->link.next != NULL) \
+ (elt)->link.next->link.prev = (elt)->link.prev; \
+ else { \
+ INSIST((list).tail == (elt)); \
+ (list).tail = (elt)->link.prev; \
+ } \
+ if ((elt)->link.prev != NULL) \
+ (elt)->link.prev->link.next = (elt)->link.next; \
+ else { \
+ INSIST((list).head == (elt)); \
+ (list).head = (elt)->link.next; \
+ } \
+ INIT_LINK_TYPE(elt, link, type); \
+ } while (0)
+#define UNLINK(list, elt, link) \
+ UNLINK_TYPE(list, elt, link, void)
+
+#define PREV(elt, link) ((elt)->link.prev)
+#define NEXT(elt, link) ((elt)->link.next)
+
+#define INSERT_BEFORE(list, before, elt, link) \
+ do { \
+ INSIST(!LINKED(elt, link));\
+ if ((before)->link.prev == NULL) \
+ PREPEND(list, elt, link); \
+ else { \
+ (elt)->link.prev = (before)->link.prev; \
+ (before)->link.prev = (elt); \
+ (elt)->link.prev->link.next = (elt); \
+ (elt)->link.next = (before); \
+ } \
+ } while (0)
+
+#define INSERT_AFTER(list, after, elt, link) \
+ do { \
+ INSIST(!LINKED(elt, link));\
+ if ((after)->link.next == NULL) \
+ APPEND(list, elt, link); \
+ else { \
+ (elt)->link.next = (after)->link.next; \
+ (after)->link.next = (elt); \
+ (elt)->link.next->link.prev = (elt); \
+ (elt)->link.prev = (after); \
+ } \
+ } while (0)
+
+#define ENQUEUE(list, elt, link) APPEND(list, elt, link)
+#define DEQUEUE(list, elt, link) UNLINK(list, elt, link)
+
+#endif /* LIST_H */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/isc/logging.h b/usr/src/lib/libresolv2_joy/include/isc/logging.h
new file mode 100644
index 0000000000..c539443ff8
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/isc/logging.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996-1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef LOGGING_H
+#define LOGGING_H
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <unistd.h>
+
+#define log_critical (-5)
+#define log_error (-4)
+#define log_warning (-3)
+#define log_notice (-2)
+#define log_info (-1)
+#define log_debug(level) (level)
+
+typedef enum { log_syslog, log_file, log_null } log_channel_type;
+
+#define LOG_MAX_VERSIONS 99
+
+#define LOG_CLOSE_STREAM 0x0001
+#define LOG_TIMESTAMP 0x0002
+#define LOG_TRUNCATE 0x0004
+#define LOG_USE_CONTEXT_LEVEL 0x0008
+#define LOG_PRINT_LEVEL 0x0010
+#define LOG_REQUIRE_DEBUG 0x0020
+#define LOG_CHANNEL_BROKEN 0x0040
+#define LOG_PRINT_CATEGORY 0x0080
+#define LOG_CHANNEL_OFF 0x0100
+
+typedef struct log_context *log_context;
+typedef struct log_channel *log_channel;
+
+#define LOG_OPTION_DEBUG 0x01
+#define LOG_OPTION_LEVEL 0x02
+
+#define log_open_stream __log_open_stream
+#define log_close_stream __log_close_stream
+#define log_get_stream __log_get_stream
+#define log_get_filename __log_get_filename
+#define log_check_channel __log_check_channel
+#define log_check __log_check
+#define log_vwrite __log_vwrite
+#define log_write __log_write
+#define log_new_context __log_new_context
+#define log_free_context __log_free_context
+#define log_add_channel __log_add_channel
+#define log_remove_channel __log_remove_channel
+#define log_option __log_option
+#define log_category_is_active __log_category_is_active
+#define log_new_syslog_channel __log_new_syslog_channel
+#define log_new_file_channel __log_new_file_channel
+#define log_set_file_owner __log_set_file_owner
+#define log_new_null_channel __log_new_null_channel
+#define log_inc_references __log_inc_references
+#define log_dec_references __log_dec_references
+#define log_get_channel_type __log_get_channel_type
+#define log_free_channel __log_free_channel
+#define log_close_debug_channels __log_close_debug_channels
+
+FILE * log_open_stream(log_channel);
+int log_close_stream(log_channel);
+FILE * log_get_stream(log_channel);
+char * log_get_filename(log_channel);
+int log_check_channel(log_context, int, log_channel);
+int log_check(log_context, int, int);
+#ifdef __GNUC__
+void log_vwrite(log_context, int, int, const char *,
+ va_list args)
+ __attribute__((__format__(__printf__, 4, 0)));
+void log_write(log_context, int, int, const char *, ...)
+ __attribute__((__format__(__printf__, 4, 5)));
+#else
+void log_vwrite(log_context, int, int, const char *,
+ va_list args);
+void log_write(log_context, int, int, const char *, ...);
+#endif
+int log_new_context(int, char **, log_context *);
+void log_free_context(log_context);
+int log_add_channel(log_context, int, log_channel);
+int log_remove_channel(log_context, int, log_channel);
+int log_option(log_context, int, int);
+int log_category_is_active(log_context, int);
+log_channel log_new_syslog_channel(unsigned int, int, int);
+log_channel log_new_file_channel(unsigned int, int, const char *,
+ FILE *, unsigned int,
+ unsigned long);
+int log_set_file_owner(log_channel, uid_t, gid_t);
+log_channel log_new_null_channel(void);
+int log_inc_references(log_channel);
+int log_dec_references(log_channel);
+log_channel_type log_get_channel_type(log_channel);
+int log_free_channel(log_channel);
+void log_close_debug_channels(log_context);
+
+#endif /* !LOGGING_H */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/isc/memcluster.h b/usr/src/lib/libresolv2_joy/include/isc/memcluster.h
new file mode 100644
index 0000000000..0923deb5e7
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/isc/memcluster.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1997,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef MEMCLUSTER_H
+#define MEMCLUSTER_H
+
+#include <stdio.h>
+
+#define meminit __meminit
+#ifdef MEMCLUSTER_DEBUG
+#define memget(s) __memget_debug(s, __FILE__, __LINE__)
+#define memput(p, s) __memput_debug(p, s, __FILE__, __LINE__)
+#else /*MEMCLUSTER_DEBUG*/
+#ifdef MEMCLUSTER_RECORD
+#define memget(s) __memget_record(s, __FILE__, __LINE__)
+#define memput(p, s) __memput_record(p, s, __FILE__, __LINE__)
+#else /*MEMCLUSTER_RECORD*/
+#define memget __memget
+#define memput __memput
+#endif /*MEMCLUSTER_RECORD*/
+#endif /*MEMCLUSTER_DEBUG*/
+#define memstats __memstats
+#define memactive __memactive
+
+int meminit(size_t, size_t);
+void * __memget(size_t);
+void __memput(void *, size_t);
+void * __memget_debug(size_t, const char *, int);
+void __memput_debug(void *, size_t, const char *, int);
+void * __memget_record(size_t, const char *, int);
+void __memput_record(void *, size_t, const char *, int);
+void memstats(FILE *);
+int memactive(void);
+
+#endif /* MEMCLUSTER_H */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/isc/misc.h b/usr/src/lib/libresolv2_joy/include/isc/misc.h
new file mode 100644
index 0000000000..b54f4ee6ed
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/isc/misc.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 1995-2001, 2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: misc.h,v 1.7 2008/11/14 02:36:51 marka Exp $
+ */
+
+#ifndef _ISC_MISC_H
+#define _ISC_MISC_H
+
+/*! \file */
+
+#include <stdio.h>
+#include <sys/types.h>
+
+#define bitncmp __bitncmp
+/*#define isc_movefile __isc_movefile */
+
+extern int bitncmp(const void *, const void *, int);
+extern int isc_movefile(const char *, const char *);
+
+extern int isc_gethexstring(unsigned char *, size_t, int, FILE *,
+ int *);
+extern void isc_puthexstring(FILE *, const unsigned char *, size_t,
+ size_t, size_t, const char *);
+extern void isc_tohex(const unsigned char *, size_t, char *);
+
+#endif /*_ISC_MISC_H*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/isc/platform.h b/usr/src/lib/libresolv2_joy/include/isc/platform.h
new file mode 100644
index 0000000000..2fc59b61a8
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/isc/platform.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/*
+ * Copyright (C) 2008 Internet Systems Consortium, Inc. ("ISC")
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* $Id: platform.h.in,v 1.3 2008/01/23 02:15:56 tbox Exp $ */
+
+/*! \file */
+
+#ifndef ISC_PLATFORM_H
+#define ISC_PLATFORM_H
+
+/*
+ * Define if the OS does not define struct timespec.
+ */
+#undef ISC_PLATFORM_NEEDTIMESPEC
+#ifdef ISC_PLATFORM_NEEDTIMESPEC
+#include <time.h> /* For time_t */
+struct timespec {
+ time_t tv_sec; /* seconds */
+ long tv_nsec; /* nanoseconds */
+};
+#endif
+
+#endif
diff --git a/usr/src/lib/libresolv2_joy/include/isc/tree.h b/usr/src/lib/libresolv2_joy/include/isc/tree.h
new file mode 100644
index 0000000000..96feaca68d
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/isc/tree.h
@@ -0,0 +1,59 @@
+/* tree.h - declare structures used by tree library
+ *
+ * vix 22jan93 [revisited; uses RCS, ANSI, POSIX; has bug fixes]
+ * vix 27jun86 [broken out of tree.c]
+ *
+ * $Id: tree.h,v 1.3 2005/04/27 04:56:18 sra Exp $
+ */
+
+
+#ifndef _TREE_H_INCLUDED
+#define _TREE_H_INCLUDED
+
+
+#ifndef __P
+# if defined(__STDC__) || defined(__GNUC__)
+# define __P(x) x
+# else
+# define __P(x) ()
+# endif
+#endif
+
+/*%
+ * tree_t is our package-specific anonymous pointer.
+ */
+#if defined(__STDC__) || defined(__GNUC__)
+typedef void *tree_t;
+#else
+typedef char *tree_t;
+#endif
+
+/*%
+ * Do not taint namespace
+ */
+#define tree_add __tree_add
+#define tree_delete __tree_delete
+#define tree_init __tree_init
+#define tree_mung __tree_mung
+#define tree_srch __tree_srch
+#define tree_trav __tree_trav
+
+
+typedef struct tree_s {
+ tree_t data;
+ struct tree_s *left, *right;
+ short bal;
+ }
+ tree;
+
+
+void tree_init __P((tree **));
+tree_t tree_srch __P((tree **, int (*)(), tree_t));
+tree_t tree_add __P((tree **, int (*)(), tree_t, void (*)()));
+int tree_delete __P((tree **, int (*)(), tree_t, void (*)()));
+int tree_trav __P((tree **, int (*)()));
+void tree_mung __P((tree **, void (*)()));
+
+
+#endif /* _TREE_H_INCLUDED */
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/make_os_version b/usr/src/lib/libresolv2_joy/include/make_os_version
new file mode 100755
index 0000000000..3654490fee
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/make_os_version
@@ -0,0 +1,34 @@
+#!/bin/sh
+
+# Copyright (c) 1999 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+UNAME_R=`/usr/bin/uname -r`
+
+OS_MAJOR=`echo $UNAME_R | /usr/bin/sed -e 's/^\([^.]*\).*/\1/'`
+OS_MINOR=`echo $UNAME_R | /usr/bin/sed -e 's/^[^.]*\.\([^.]*\).*/\1/'`
+OS_VERSION=`echo $UNAME_R | tr '.' '_'`
+
+cat <<EOF > new_os_version.h
+#ifndef OS_VERSION_H
+#define OS_VERSION_H
+
+#define SUNOS_$OS_VERSION
+#define OS_MAJOR $OS_MAJOR
+#define OS_MINOR $OS_MINOR
+
+#endif
+EOF
+
+if [ -f os_version.h ]; then
+ if /usr/bin/cmp -s new_os_version.h os_version.h; then
+ /usr/bin/rm -f new_os_version.h
+ else
+ /usr/bin/rm -f os_version.h
+ /usr/bin/mv new_os_version.h os_version.h
+ fi
+else
+ /usr/bin/mv new_os_version.h os_version.h
+fi
diff --git a/usr/src/lib/libresolv2_joy/include/make_os_version.sh b/usr/src/lib/libresolv2_joy/include/make_os_version.sh
new file mode 100644
index 0000000000..3654490fee
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/make_os_version.sh
@@ -0,0 +1,34 @@
+#!/bin/sh
+
+# Copyright (c) 1999 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+UNAME_R=`/usr/bin/uname -r`
+
+OS_MAJOR=`echo $UNAME_R | /usr/bin/sed -e 's/^\([^.]*\).*/\1/'`
+OS_MINOR=`echo $UNAME_R | /usr/bin/sed -e 's/^[^.]*\.\([^.]*\).*/\1/'`
+OS_VERSION=`echo $UNAME_R | tr '.' '_'`
+
+cat <<EOF > new_os_version.h
+#ifndef OS_VERSION_H
+#define OS_VERSION_H
+
+#define SUNOS_$OS_VERSION
+#define OS_MAJOR $OS_MAJOR
+#define OS_MINOR $OS_MINOR
+
+#endif
+EOF
+
+if [ -f os_version.h ]; then
+ if /usr/bin/cmp -s new_os_version.h os_version.h; then
+ /usr/bin/rm -f new_os_version.h
+ else
+ /usr/bin/rm -f os_version.h
+ /usr/bin/mv new_os_version.h os_version.h
+ fi
+else
+ /usr/bin/mv new_os_version.h os_version.h
+fi
diff --git a/usr/src/lib/libresolv2_joy/include/port_after.h b/usr/src/lib/libresolv2_joy/include/port_after.h
new file mode 100644
index 0000000000..c3abf4b334
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/port_after.h
@@ -0,0 +1,539 @@
+/*
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*
+ * Copyright (C) 2004-2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 2001-2003 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* $Id: port_after.h.in,v 1.60 2008/02/28 05:34:17 marka Exp $ */
+
+#ifndef port_after_h
+#define port_after_h
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#if (!defined(BSD)) || (BSD < 199306)
+#include <sys/bitypes.h>
+#endif
+#ifdef HAVE_INTTYPES_H
+#include <inttypes.h>
+#endif
+#ifdef HAVE_SYS_SELECT_H
+#include <sys/select.h>
+#endif /* HAVE_SYS_SELECT_H */
+
+#ifdef REENABLE_SEND
+#undef send
+#endif
+
+#undef NEED_PSELECT
+#undef HAVE_SA_LEN
+#undef HAVE_MINIMUM_IFREQ
+#define NEED_DAEMON 1
+#undef NEED_STRSEP
+#undef NEED_STRERROR
+#ifdef NEED_STRERROR
+const char *isc_strerror(int);
+#define strerror isc_strerror
+#endif
+/* HAS_INET6_STRUCTS and HAVE_SIN6_SCOPE_ID are defined by port_ipv6.h
+ * #define HAS_INET6_STRUCTS 1
+ * #define HAVE_SIN6_SCOPE_ID 1
+ */
+#include <port_ipv6.h>
+
+#undef NEED_IN6ADDR_ANY
+#undef HAS_IN_ADDR6
+#define HAVE_SOCKADDR_STORAGE 1
+#undef NEED_GETTIMEOFDAY
+#define HAVE_STRNDUP
+#undef USE_FIONBIO_IOCTL
+#undef INNETGR_ARGS
+
+#undef USE_IFNAMELINKID
+#define PORT_NONBLOCK O_NONBLOCK
+
+#ifndef _POSIX_PATH_MAX
+#define _POSIX_PATH_MAX 255
+#endif
+#ifndef PATH_MAX
+#define PATH_MAX _POSIX_PATH_MAX
+#endif
+
+/*
+ * We need to know the IPv6 address family number even on IPv4-only systems.
+ * Note that this is NOT a protocol constant, and that if the system has its
+ * own AF_INET6, different from ours below, all of BIND's libraries and
+ * executables will need to be recompiled after the system <sys/socket.h>
+ * has had this type added. The type number below is correct on most BSD-
+ * derived systems for which AF_INET6 is defined.
+ */
+#ifndef AF_INET6
+#define AF_INET6 24
+#endif
+
+#ifndef PF_INET6
+#define PF_INET6 AF_INET6
+#endif
+
+#ifdef HAS_IN_ADDR6
+/* Map to pre-RFC structure. */
+#define in6_addr in_addr6
+#endif
+
+#ifndef HAS_INET6_STRUCTS
+/* Replace with structure from later rev of O/S if known. */
+struct in6_addr {
+ u_int8_t s6_addr[16];
+};
+
+#define IN6ADDR_ANY_INIT \
+ {{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}
+
+#define IN6ADDR_LOOPBACK_INIT \
+ {{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}
+
+/* Replace with structure from later rev of O/S if known. */
+struct sockaddr_in6 {
+#ifdef HAVE_SA_LEN
+ u_int8_t sin6_len; /* length of this struct */
+ u_int8_t sin6_family; /* AF_INET6 */
+#else
+ u_int16_t sin6_family; /* AF_INET6 */
+#endif
+ u_int16_t sin6_port; /* transport layer port # */
+ u_int32_t sin6_flowinfo; /* IPv6 flow information */
+ struct in6_addr sin6_addr; /* IPv6 address */
+ u_int32_t sin6_scope_id; /* set of interfaces for a scope */
+};
+#endif /* HAS_INET6_STRUCTS */
+
+#ifdef BROKEN_IN6ADDR_INIT_MACROS
+#undef IN6ADDR_ANY_INIT
+#undef IN6ADDR_LOOPBACK_INIT
+#endif
+
+#ifdef _AIX
+#ifndef IN6ADDR_ANY_INIT
+#define IN6ADDR_ANY_INIT {{{ 0, 0, 0, 0 }}}
+#endif
+#ifndef IN6ADDR_LOOPBACK_INIT
+#if BYTE_ORDER == BIG_ENDIAN
+#define IN6ADDR_LOOPBACK_INIT {{{ 0, 0, 0, 1 }}}
+#else
+#define IN6ADDR_LOOPBACK_INIT {{{0, 0, 0, 0x01000000}}}
+#endif
+#endif
+#endif
+
+#ifndef IN6ADDR_ANY_INIT
+#ifdef s6_addr
+#define IN6ADDR_ANY_INIT \
+ {{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}}
+#else
+#define IN6ADDR_ANY_INIT \
+ {{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}
+#endif
+
+#endif
+#ifndef IN6ADDR_LOOPBACK_INIT
+#ifdef s6_addr
+#define IN6ADDR_LOOPBACK_INIT \
+ {{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}}
+#else
+#define IN6ADDR_LOOPBACK_INIT \
+ {{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}
+#endif
+#endif
+
+#ifndef HAVE_SOCKADDR_STORAGE
+#define __SS_MAXSIZE 128
+#define __SS_ALLIGSIZE (sizeof (long))
+
+struct sockaddr_storage {
+#ifdef HAVE_SA_LEN
+ u_int8_t ss_len; /* address length */
+ u_int8_t ss_family; /* address family */
+ char __ss_pad1[__SS_ALLIGSIZE - 2 * sizeof(u_int8_t)];
+ long __ss_align;
+ char __ss_pad2[__SS_MAXSIZE - 2 * __SS_ALLIGSIZE];
+#else
+ u_int16_t ss_family; /* address family */
+ char __ss_pad1[__SS_ALLIGSIZE - sizeof(u_int16_t)];
+ long __ss_align;
+ char __ss_pad2[__SS_MAXSIZE - 2 * __SS_ALLIGSIZE];
+#endif
+};
+#endif
+
+
+#if !defined(HAS_INET6_STRUCTS) || defined(NEED_IN6ADDR_ANY)
+#define in6addr_any isc_in6addr_any
+extern const struct in6_addr in6addr_any;
+#endif
+
+/*
+ * IN6_ARE_ADDR_EQUAL, IN6_IS_ADDR_UNSPECIFIED, IN6_IS_ADDR_V4COMPAT and
+ * IN6_IS_ADDR_V4MAPPED are broken in glibc 2.1.
+ */
+#ifdef __GLIBC__
+#if __GLIBC__ < 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ < 2)
+#undef IN6_ARE_ADDR_EQUAL
+#undef IN6_IS_ADDR_UNSPECIFIED
+#undef IN6_IS_ADDR_V4COMPAT
+#undef IN6_IS_ADDR_V4MAPPED
+#endif
+#endif
+
+#ifndef IN6_ARE_ADDR_EQUAL
+#define IN6_ARE_ADDR_EQUAL(a,b) \
+ (memcmp(&(a)->s6_addr[0], &(b)->s6_addr[0], sizeof(struct in6_addr)) == 0)
+#endif
+
+#ifndef IN6_IS_ADDR_UNSPECIFIED
+#define IN6_IS_ADDR_UNSPECIFIED(a) \
+ IN6_ARE_ADDR_EQUAL(a, &in6addr_any)
+#endif
+
+#ifndef IN6_IS_ADDR_LOOPBACK
+extern const struct in6_addr isc_in6addr_loopback;
+#define IN6_IS_ADDR_LOOPBACK(a) \
+ IN6_ARE_ADDR_EQUAL(a, &isc_in6addr_loopback)
+#endif
+
+#ifndef IN6_IS_ADDR_V4MAPPED
+#define IN6_IS_ADDR_V4MAPPED(a) \
+ ((a)->s6_addr[0] == 0x00 && (a)->s6_addr[1] == 0x00 && \
+ (a)->s6_addr[2] == 0x00 && (a)->s6_addr[3] == 0x00 && \
+ (a)->s6_addr[4] == 0x00 && (a)->s6_addr[5] == 0x00 && \
+ (a)->s6_addr[6] == 0x00 && (a)->s6_addr[9] == 0x00 && \
+ (a)->s6_addr[8] == 0x00 && (a)->s6_addr[9] == 0x00 && \
+ (a)->s6_addr[10] == 0xff && (a)->s6_addr[11] == 0xff)
+#endif
+
+#ifndef IN6_IS_ADDR_SITELOCAL
+#define IN6_IS_ADDR_SITELOCAL(a) \
+ (((a)->s6_addr[0] == 0xfe) && (((a)->s6_addr[1] & 0xc0) == 0xc0))
+#endif
+
+#ifndef IN6_IS_ADDR_LINKLOCAL
+#define IN6_IS_ADDR_LINKLOCAL(a) \
+ (((a)->s6_addr[0] == 0xfe) && (((a)->s6_addr[1] & 0xc0) == 0x80))
+#endif
+
+#ifndef IN6_IS_ADDR_MULTICAST
+#define IN6_IS_ADDR_MULTICAST(a) ((a)->s6_addr[0] == 0xff)
+#endif
+
+#ifndef __IPV6_ADDR_MC_SCOPE
+#define __IPV6_ADDR_MC_SCOPE(a) ((a)->s6_addr[1] & 0x0f)
+#endif
+
+#ifndef __IPV6_ADDR_SCOPE_SITELOCAL
+#define __IPV6_ADDR_SCOPE_SITELOCAL 0x05
+#endif
+#ifndef __IPV6_ADDR_SCOPE_ORGLOCAL
+#define __IPV6_ADDR_SCOPE_ORGLOCAL 0x08
+#endif
+
+#ifndef IN6_IS_ADDR_MC_SITELOCAL
+#define IN6_IS_ADDR_MC_SITELOCAL(a) \
+ (IN6_IS_ADDR_MULTICAST(a) && \
+ (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_SITELOCAL))
+#endif
+
+#ifndef IN6_IS_ADDR_MC_ORGLOCAL
+#define IN6_IS_ADDR_MC_ORGLOCAL(a) \
+ (IN6_IS_ADDR_MULTICAST(a) && \
+ (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_ORGLOCAL))
+#endif
+
+#ifndef INADDR_NONE
+#define INADDR_NONE 0xffffffff
+#endif
+
+#ifndef MAXHOSTNAMELEN
+#define MAXHOSTNAMELEN 256
+#endif
+
+#ifndef INET6_ADDRSTRLEN
+/* sizeof("aaaa:bbbb:cccc:dddd:eeee:ffff:123.123.123.123") */
+#define INET6_ADDRSTRLEN 46
+#endif
+
+#ifndef MIN
+#define MIN(x,y) (((x) <= (y)) ? (x) : (y))
+#endif
+
+#ifndef MAX
+#define MAX(x,y) (((x) >= (y)) ? (x) : (y))
+#endif
+
+#ifdef NEED_DAEMON
+int daemon(int nochdir, int noclose);
+#endif
+
+#ifdef NEED_STRSEP
+char * strsep(char **stringp, const char *delim);
+#endif
+
+#ifndef ALIGN
+#define ALIGN(p) (((uintptr_t)(p) + (sizeof(long) - 1)) & ~(sizeof(long) - 1))
+#endif
+
+#ifdef NEED_SETGROUPENT
+int setgroupent(int stayopen);
+#endif
+
+#ifdef NEED_GETGROUPLIST
+int getgrouplist(GETGROUPLIST_ARGS);
+#endif
+
+#ifdef POSIX_GETGRNAM_R
+int
+__posix_getgrnam_r(const char *, struct group *, char *, int, struct group **);
+#endif
+
+#ifdef NEED_GETGRNAM_R
+int
+getgrnam_r(const char *, struct group *, char *, size_t, struct group **);
+#endif
+
+#ifdef POSIX_GETGRGID_R
+int
+__posix_getgrgid_r(gid_t, struct group *, char *, int, struct group **) ;
+#endif
+
+#ifdef NEED_GETGRGID_R
+int
+getgrgid_r(gid_t, struct group *, char *, size_t, struct group **);
+#endif
+
+#ifdef NEED_GETGRENT_R
+GROUP_R_RETURN getgrent_r(struct group *gptr, GROUP_R_ARGS);
+#endif
+
+#ifdef NEED_SETGRENT_R
+GROUP_R_SET_RETURN setgrent_r(GROUP_R_ENT_ARGS);
+#endif
+
+#ifdef NEED_ENDGRENT_R
+GROUP_R_END_RETURN endgrent_r(GROUP_R_ENT_ARGS);
+#endif
+
+#if defined(NEED_INNETGR_R) && defined(NGR_R_RETURN)
+NGR_R_RETURN
+innetgr_r(const char *, const char *, const char *, const char *);
+#endif
+
+#ifdef NEED_SETNETGRENT_R
+#ifdef NGR_R_SET_ARGS
+NGR_R_SET_RETURN setnetgrent_r(NGR_R_SET_CONST char *netgroup, NGR_R_SET_ARGS);
+#else
+NGR_R_SET_RETURN setnetgrent_r(NGR_R_SET_CONST char *netgroup);
+#endif
+#endif
+
+#ifdef NEED_ENDNETGRENT_R
+#ifdef NGR_R_END_ARGS
+NGR_R_END_RETURN endnetgrent_r(NGR_R_END_ARGS);
+#else
+NGR_R_END_RETURN endnetgrent_r(void);
+#endif
+#endif
+
+#ifdef POSIX_GETPWNAM_R
+int
+__posix_getpwnam_r(const char *login, struct passwd *pwptr,
+ char *buf, size_t buflen, struct passwd **result);
+#endif
+
+#ifdef NEED_GETPWNAM_R
+int
+getpwnam_r(const char *login, struct passwd *pwptr,
+ char *buf, size_t buflen, struct passwd **result);
+#endif
+
+#ifdef POSIX_GETPWUID_R
+int
+__posix_getpwuid_r(uid_t uid, struct passwd *pwptr,
+ char *buf, int buflen, struct passwd **result);
+#endif
+
+#ifdef NEED_GETPWUID_R
+int
+getpwuid_r(uid_t uid, struct passwd *pwptr,
+ char *buf, size_t buflen, struct passwd **result);
+#endif
+
+#ifdef NEED_SETPWENT_R
+#ifdef PASS_R_ENT_ARGS
+PASS_R_SET_RETURN setpwent_r(PASS_R_ENT_ARGS);
+#else
+PASS_R_SET_RETURN setpwent_r(void);
+#endif
+
+#endif
+
+#ifdef NEED_SETPASSENT_R
+#ifdef PASS_R_ENT_ARGS
+PASS_R_SET_RETURN setpassent_r(int stayopen, PASS_R_ENT_ARGS);
+#else
+PASS_R_SET_RETURN setpassent_r(int stayopen);
+#endif
+#endif
+
+#ifdef NEED_GETPWENT_R
+PASS_R_RETURN getpwent_r(struct passwd *pwptr, PASS_R_ARGS);
+#endif
+
+#ifdef NEED_ENDPWENT_R
+void endpwent_r(void);
+#endif
+
+#ifdef NEED_SETPASSENT
+int setpassent(int stayopen);
+#endif
+
+#define gettimeofday isc__gettimeofday
+#ifdef NEED_GETTIMEOFDAY
+int isc__gettimeofday(struct timeval *tvp, struct _TIMEZONE *tzp);
+#else
+int isc__gettimeofday(struct timeval *tp, struct timezone *tzp);
+#endif
+
+int getnetgrent(NGR_R_CONST char **machinep, NGR_R_CONST char **userp,
+ NGR_R_CONST char **domainp);
+
+#ifdef NGR_R_ARGS
+int getnetgrent_r(NGR_R_CONST char **machinep, NGR_R_CONST char **userp,
+ NGR_R_CONST char **domainp, NGR_R_ARGS);
+#endif
+
+/* setnetgrent and endnetgrent are defined in sunw_port_after.h
+#ifdef SETNETGRENT_ARGS
+void setnetgrent(SETNETGRENT_ARGS);
+#else
+void setnetgrent(const char *netgroup);
+#endif
+
+void endnetgrent(void);
+*/
+
+#ifdef INNETGR_ARGS
+int innetgr(INNETGR_ARGS);
+#else
+int innetgr(const char *netgroup, const char *machine,
+ const char *user, const char *domain);
+#endif
+
+#ifdef NGR_R_SET_ARGS
+NGR_R_SET_RETURN
+setnetgrent_r(NGR_R_SET_CONST char *netgroup, NGR_R_SET_ARGS);
+#else
+NGR_R_SET_RETURN
+setnetgrent_r(NGR_R_SET_CONST char *netgroup);
+#endif
+
+#ifdef NEED_STRTOUL
+unsigned long strtoul(const char *, char **, int);
+#endif
+
+#ifdef NEED_SUN4PROTOS
+#include <stdarg.h>
+#ifndef __SIZE_TYPE__
+#define __SIZE_TYPE__ int
+#endif
+struct sockaddr;
+struct iovec;
+struct timeval;
+struct timezone;
+int fprintf(FILE *, const char *, ...);
+int getsockname(int, struct sockaddr *, int *);
+int getpeername(int, struct sockaddr *, int *);
+int socket(int, int, int);
+int connect(int, const struct sockaddr *, int);
+int writev(int, struct iovec *, int);
+int readv(int, struct iovec *, int);
+int send(int, const char *, int, int);
+void bzero(char *, int);
+int recvfrom(int, char *, int, int, struct sockaddr *, int *);
+int syslog(int, const char *, ... );
+int printf(const char *, ...);
+__SIZE_TYPE__ fread(void *, __SIZE_TYPE__, __SIZE_TYPE__, FILE *);
+__SIZE_TYPE__ fwrite(const void *, __SIZE_TYPE__, __SIZE_TYPE__, FILE *);
+int fclose(FILE *);
+int ungetc(int, FILE *);
+int scanf(const char *, ...);
+int sscanf(const char *, const char *, ... );
+int tolower(int);
+int toupper(int);
+int strcasecmp(const char *, const char *);
+int strncasecmp(const char *, const char *, int);
+int select(int, fd_set *, fd_set *, fd_set *, struct timeval *);
+#ifdef gettimeofday
+#undef gettimeofday
+int gettimeofday(struct timeval *, struct timezone *);
+#define gettimeofday isc__gettimeofday
+#else
+int gettimeofday(struct timeval *, struct timezone *);
+#endif
+long strtol(const char*, char **, int);
+int fseek(FILE *, long, int);
+int setsockopt(int, int, int, const char *, int);
+int bind(int, const struct sockaddr *, int);
+void bcopy(char *, char *, int);
+int fputc(char, FILE *);
+int listen(int, int);
+int accept(int, struct sockaddr *, int *);
+int getsockopt(int, int, int, char *, int *);
+int vfprintf(FILE *, const char *, va_list);
+int fflush(FILE *);
+int fgetc(FILE *);
+int fputs(const char *, FILE *);
+int fchown(int, int, int);
+void setbuf(FILE *, char *);
+int gethostname(char *, int);
+int rename(const char *, const char *);
+time_t time(time_t *);
+int fscanf(FILE *, const char *, ...);
+int sscanf(const char *, const char *, ...);
+int ioctl(int, int, caddr_t);
+void perror(const char *);
+
+#if !defined(__USE_FIXED_PROTOTYPES__) && !defined(__cplusplus) && !defined(__STRICT_ANSI__)
+/*
+ * 'gcc -ansi' changes the prototype for vsprintf().
+ * Use this prototype when 'gcc -ansi' is not in effect.
+ */
+char *vsprintf(char *, const char *, va_list);
+#endif
+#endif
+
+/* Solaris-specific changes */
+#include "sunw_port_after.h"
+
+#endif /* port_after_h */
diff --git a/usr/src/lib/libresolv2_joy/include/port_before.h b/usr/src/lib/libresolv2_joy/include/port_before.h
new file mode 100644
index 0000000000..2801139223
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/port_before.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/*
+ * Copyright (C) 2005-2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 2001 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* $Id: port_before.h.in,v 1.31 2008/02/28 05:36:10 marka Exp $ */
+
+#ifndef port_before_h
+#define port_before_h
+/* Solaris-specific changes */
+#include "sunw_port_before.h"
+#include <config.h>
+
+#ifdef NEED_SUN4PROTOS
+#define _PARAMS(x) x
+#endif
+
+struct group; /* silence warning */
+struct passwd; /* silence warning */
+struct timeval; /* silence warning */
+struct timezone; /* silence warning */
+
+#ifdef HAVE_SYS_TIMERS_H
+#include <sys/timers.h>
+#endif
+#include <limits.h>
+
+#ifdef ISC_PLATFORM_NEEDTIMESPEC
+#include <time.h> /* For time_t */
+struct timespec {
+ time_t tv_sec; /* seconds */
+ long tv_nsec; /* nanoseconds */
+};
+#endif
+#ifndef HAVE_MEMMOVE
+#define memmove(a,b,c) bcopy(b,a,c)
+#endif
+
+#undef WANT_IRS_GR
+#undef WANT_IRS_NIS
+#undef WANT_IRS_PW
+
+#define BSD_COMP 1
+#define USE_POLL 1
+#define HAVE_MD5 1
+#define SOLARIS2 1
+
+/* DO_PTHREADS is conditionally defined in sunw_port_before.h
+ * #define DO_PTHREADS 1 */
+#define GETGROUPLIST_ARGS const char *name, gid_t basegid, gid_t *groups, int *ngroups
+#define GETNETBYADDR_ADDR_T long
+#define SETPWENT_VOID 1
+#define SETGRENT_VOID 1
+
+#define NET_R_ARGS char *buf, int buflen
+#define NET_R_BAD NULL
+#define NET_R_COPY buf, buflen
+#define NET_R_COPY_ARGS NET_R_ARGS
+#define NET_R_END_RESULT(x) /*empty*/
+#define NET_R_END_RETURN void
+#undef NET_R_ENT_ARGS /*empty*/
+#define NET_R_OK nptr
+#define NET_R_RETURN struct netent *
+#undef NET_R_SET_RESULT /*empty*/
+#undef NET_R_SETANSWER
+#define NET_R_SET_RETURN void
+#undef NETENT_DATA
+
+#define GROUP_R_RETURN struct group *
+#define GROUP_R_SET_RETURN void
+#undef GROUP_R_SET_RESULT /*empty*/
+#define GROUP_R_END_RETURN void
+#define GROUP_R_END_RESULT(x) /*empty*/
+#define GROUP_R_ARGS char *buf, int buflen
+#define GROUP_R_ENT_ARGS void
+#define GROUP_R_OK gptr
+#define GROUP_R_BAD NULL
+
+#define HOST_R_ARGS char *buf, int buflen, int *h_errnop
+#define HOST_R_BAD NULL
+#define HOST_R_COPY buf, buflen
+#define HOST_R_COPY_ARGS char *buf, int buflen
+#define HOST_R_END_RESULT(x) /*empty*/
+#define HOST_R_END_RETURN void
+#undef HOST_R_ENT_ARGS /*empty*/
+#define HOST_R_ERRNO *h_errnop = h_errno
+#define HOST_R_OK hptr
+#define HOST_R_RETURN struct hostent *
+#undef HOST_R_SETANSWER
+#undef HOST_R_SET_RESULT
+#define HOST_R_SET_RETURN void
+#undef HOSTENT_DATA
+
+#define NGR_R_ARGS char *buf, int buflen
+#define NGR_R_BAD (0)
+#define NGR_R_COPY buf, buflen
+#define NGR_R_COPY_ARGS NGR_R_ARGS
+#define NGR_R_CONST
+#define NGR_R_END_RESULT(x) /*empty*/
+#define NGR_R_END_RETURN void
+#undef NGR_R_END_ARGS /*empty*/
+#define NGR_R_OK 1
+#define NGR_R_RETURN int
+#define NGR_R_SET_CONST const
+#undef NGR_R_SET_RESULT /*empty*/
+#define NGR_R_SET_RETURN void
+#undef NGR_R_SET_ARGS
+
+
+#if !defined(NGR_R_SET_ARGS) && defined(NGR_R_END_ARGS)
+#define NGR_R_SET_ARGS NGR_R_END_ARGS
+#endif
+
+#define PROTO_R_ARGS char *buf, int buflen
+#define PROTO_R_BAD NULL
+#define PROTO_R_COPY buf, buflen
+#define PROTO_R_COPY_ARGS PROTO_R_ARGS
+#define PROTO_R_END_RESULT(x) /*empty*/
+#define PROTO_R_END_RETURN void
+#undef PROTO_R_ENT_ARGS /*empty*/
+#undef PROTO_R_ENT_UNUSED
+#define PROTO_R_OK pptr
+#undef PROTO_R_SETANSWER
+#define PROTO_R_RETURN struct protoent *
+#undef PROTO_R_SET_RESULT
+#define PROTO_R_SET_RETURN void
+#undef PROTOENT_DATA
+
+#define PASS_R_ARGS char *buf, int buflen
+#define PASS_R_BAD NULL
+#define PASS_R_COPY buf, buflen
+#define PASS_R_COPY_ARGS PASS_R_ARGS
+#define PASS_R_END_RESULT(x) /*empty*/
+#define PASS_R_END_RETURN void
+#undef PASS_R_ENT_ARGS
+#define PASS_R_OK pwptr
+#define PASS_R_RETURN struct passwd *
+#undef PASS_R_SET_RESULT /*empty*/
+#define PASS_R_SET_RETURN void
+
+#define SERV_R_ARGS char *buf, int buflen
+#define SERV_R_BAD NULL
+#define SERV_R_COPY buf, buflen
+#define SERV_R_COPY_ARGS SERV_R_ARGS
+#define SERV_R_END_RESULT(x) /*empty*/
+#define SERV_R_END_RETURN void
+#undef SERV_R_ENT_ARGS /*empty*/
+#undef SERV_R_ENT_UNUSED /*empty*/
+#define SERV_R_OK sptr
+#undef SERV_R_SETANSWER
+#define SERV_R_RETURN struct servent *
+#undef SERV_R_SET_RESULT
+#define SERV_R_SET_RETURN void
+
+
+
+#define DE_CONST(konst, var) \
+ do { \
+ union { const void *k; void *v; } _u; \
+ _u.k = konst; \
+ var = _u.v; \
+ } while (0)
+
+#define UNUSED(x) (x) = (x)
+
+#undef NEED_SOLARIS_BITTYPES
+#define ISC_SOCKLEN_T int
+
+#ifdef __GNUC__
+#define ISC_FORMAT_PRINTF(fmt, args) \
+ __attribute__((__format__(__printf__, fmt, args)))
+#else
+#define ISC_FORMAT_PRINTF(fmt, args)
+#endif
+
+/* Pull in host order macros when _XOPEN_SOURCE_EXTENDED is defined. */
+#if defined(__hpux) && defined(_XOPEN_SOURCE_EXTENDED)
+#include <sys/byteorder.h>
+#endif
+
+#endif
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/port_netdb.h b/usr/src/lib/libresolv2_joy/include/port_netdb.h
new file mode 100644
index 0000000000..a308cc7efa
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/port_netdb.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#ifndef _PORT_NETDB_H
+#define _PORT_NETDB_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* AI_NUMERICSERV is not a valid flag for getaddrinfo */
+#define AI_MASK 0x0038 /* mask of valid flags */
+
+/* EAI_OVERFLOW was removed from ISC */
+#define EAI_BADHINTS 12
+
+/*
+ * these are libresolv2 functions that were renamed in previous versions to
+ * res_* because they conflict with libnsl or libsocket
+ */
+
+#define endhostent joy_res_endhostent /* libnsl */
+void endhostent __P((void));
+#define endnetent res_endnetent /* libsocket */
+void endnetent __P((void));
+#define freeaddrinfo res_freeaddrinfo /* libsocket */
+void freeaddrinfo __P((struct addrinfo *));
+#define freehostent res_freehostent /* libsocket and libnsl */
+void freehostent __P((struct hostent *));
+#define getaddrinfo res_getaddrinfo /* libsocket */
+int getaddrinfo __P((const char *, const char *,
+ const struct addrinfo *, struct addrinfo **));
+#define gethostbyaddr joy_res_gethostbyaddr /* libnsl */
+struct hostent *gethostbyaddr __P((const char *, int, int));
+#define gethostbyname joy_res_gethostbyname /* libnsl */
+struct hostent *gethostbyname __P((const char *));
+#define gethostbyname2 joy_res_gethostbyname2 /* lib/nsswitch/dns */
+struct hostent *gethostbyname2 __P((const char *, int));
+#define gethostent res_gethostent /* libnsl */
+struct hostent *gethostent __P((void));
+#define getipnodebyaddr res_getipnodebyaddr /* libnsl and libsocket */
+struct hostent *getipnodebyaddr __P((const void *, size_t, int, int *));
+#define getipnodebyname res_getipnodebyname /* libnsl and libsocket */
+struct hostent *getipnodebyname __P((const char *, int, int, int *));
+
+#define getnetbyaddr res_getnetbyaddr /* libsocket */
+struct netent *getnetbyaddr __P((unsigned long, int));
+#define getnetbyname res_getnetbyname /* libsocket */
+struct netent *getnetbyname __P((const char *));
+#define getnetent res_getnetent /* libsocket */
+struct netent *getnetent __P((void));
+#define sethostent joy_res_sethostent /* libnsl */
+void sethostent __P((int));
+#define setnetent res_setnetent /* libsocket */
+void setnetent __P((int));
+
+/*
+ * these are other irs functions now included in libresolv.so.2. We rename the
+ * ones that overlap with libsocket or libnsl
+ */
+
+/* endprotoent is in libsocket.so.1 */
+#define endprotoent res_endprotoent
+void endprotoent __P((void));
+
+/* endservent is in libsocket.so.1 */
+#define endservent res_endservent
+void endservent __P((void));
+
+/* note: the next two symbols are variables, not functions */
+
+/* gai_errlist is in libsocket.so.1 */
+#define gai_errlist res_gai_errlist
+
+/* gai_nerr is in libsocket.so.1 */
+#define gai_nerr res_gai_nerr
+
+/* gai_strerror is in libsocket.so.1 */
+#define gai_strerror res_gai_strerror
+const char *gai_strerror __P((int ecode));
+
+/* gethostbyaddr_r is in libnsl.so.1 */
+#define gethostbyaddr_r res_gethostbyaddr_r
+struct hostent *gethostbyaddr_r __P((const char *addr, int len, int type,
+ struct hostent *hptr, char *buf,
+ int buflen, int *h_errnop));
+
+/* gethostbyname_r is in libnsl.so.1 */
+#define gethostbyname_r res_gethostbyname_r
+struct hostent *gethostbyname_r __P((const char *name, struct hostent *hptr,
+ char *buf, int buflen, int *h_errnop));
+
+/* gethostent_r is in libnsl.so.1 */
+#define gethostent_r res_gethostent_r
+struct hostent *gethostent_r __P((struct hostent *hptr, char *buf, int buflen,
+ int *h_errnop));
+
+/* getnameinfo is in libsocket.so.1 */
+#define getnameinfo res_getnameinfo
+int getnameinfo __P((const struct sockaddr *, size_t, char *,
+ size_t, char *, size_t, int));
+
+/* getnetbyaddr_r is in libsocket.so.1 */
+#define getnetbyaddr_r res_getnetbyaddr_r
+struct netent *getnetbyaddr_r __P((long, int, struct netent *, char *, int));
+
+/* getnetbyname_r is in libsocket.so.1 */
+#define getnetbyname_r res_getnetbyname_r
+struct netent *getnetbyname_r __P((const char *, struct netent *, char *, int));
+
+/* getnetent_r is in libsocket.so.1 */
+#define getnetent_r res_getnetent_r
+struct netent *getnetent_r __P((struct netent *, char *, int));
+
+/* getprotobyname is in libsocket.so.1 */
+#define getprotobyname res_getprotobyname
+struct protoent *getprotobyname __P((const char *));
+
+/* getprotobyname_r is in libsocket.so.1 */
+#define getprotobyname_r res_getprotobyname_r
+struct protoent *getprotobyname_r __P((const char *, struct protoent *,
+ char *, int));
+
+/* getprotobynumber is in libsocket.so.1 */
+#define getprotobynumber res_getprotobynumber
+struct protoent *getprotobynumber __P((int));
+
+/* getprotobynumber_r is in libsocket.so.1 */
+#define getprotobynumber_r res_getprotobynumber_r
+struct protoent *getprotobynumber_r __P((int,
+ struct protoent *, char *, int));
+
+/* getprotoent is in libsocket.so.1 */
+#define getprotoent res_getprotoent
+struct protoent *getprotoent __P((void));
+
+/* getprotoent_r is in libsocket.so.1 */
+#define getprotoent_r res_getprotoent_r
+struct protoent *getprotoent_r __P((struct protoent *, char *, int));
+
+/* getservbyname is in libsocket.so.1 and libnsl.so.1 */
+#define getservbyname res_getservbyname
+struct servent *getservbyname __P((const char *, const char *));
+
+/* getservbyname_r is in libsocket.so.1 and libnsl.so.1 */
+#define getservbyname_r res_getservbyname_r
+struct servent *getservbyname_r __P((const char *name, const char *,
+ struct servent *, char *, int));
+
+/* getservbyport is in libsocket.so.1 and libnsl.so.1 */
+#define getservbyport res_getservbyport
+struct servent *getservbyport __P((int, const char *));
+
+/* getservbyport_r is in libsocket.so.1 and libnsl.so.1 */
+#define getservbyport_r res_getservbyport_r
+struct servent *getservbyport_r __P((int port, const char *,
+ struct servent *, char *, int));
+
+/* getservent is in libsocket.so.1 */
+#define getservent res_getservent
+struct servent *getservent __P((void));
+
+/* getservent_r is in libsocket.so.1 */
+#define getservent_r res_getservent_r
+struct servent *getservent_r __P((struct servent *, char *, int));
+
+/* innetgr is in libsocket.so.1 */
+#define innetgr res_innetgr
+int innetgr __P((const char *, const char *, const char *, const char *));
+
+/* setprotoent is in libsocket.so.1 */
+#define setprotoent res_setprotoent
+void setprotoent __P((int));
+
+/* setservent is in libsocket.so.1 */
+#define setservent res_setservent
+void setservent __P((int));
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PORT_NETDB_H */
diff --git a/usr/src/lib/libresolv2_joy/include/port_resolv.h b/usr/src/lib/libresolv2_joy/include/port_resolv.h
new file mode 100644
index 0000000000..cd1a97d40c
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/port_resolv.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _PORT_RESOLV_H
+#define _PORT_RESOLV_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* RES_NSID has the same value as RES_NO_NIBBLE, which has been deleted */
+#define RES_NSID 0x00040000 /* request name server ID */
+
+/* RES_DEFAULT has a new value in libbind-6.0 */
+#undef RES_DEFAULT
+#define RES_DEFAULT (RES_RECURSE | RES_DEFNAMES | \
+ RES_DNSRCH | RES_NO_NIBBLE2)
+
+#ifndef __ultrix__
+u_int16_t _getshort __P((const uchar_t *));
+u_int32_t _getlong __P((const uchar_t *));
+#endif
+
+/* rename functions so they can be wrapped (see sunw/sunw_wrappers.c */
+#define p_option isc_p_option
+const char *p_option(ulong_t option);
+#define p_secstodate isc_p_secstodate
+char *p_secstodate(ulong_t secs);
+
+/* prevent namespace pollution */
+#define res_protocolnumber __res_protocolnumber
+#define res_servicenumber __res_servicenumber
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PORT_RESOLV_H */
diff --git a/usr/src/lib/libresolv2_joy/include/probe_ipv6 b/usr/src/lib/libresolv2_joy/include/probe_ipv6
new file mode 100755
index 0000000000..371ac96c55
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/probe_ipv6
@@ -0,0 +1,73 @@
+#!/bin/sh
+
+# Copyright 2003 by Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+set -e
+PATH=/bin:/usr/bin:$PATH; export PATH
+trap "rm -f tmp$$[abc].[oc]" 0
+target=port_ipv6
+new=new_${target}.h
+old=${target}.h
+
+cat > tmp$$a.c <<EOF
+#include <sys/types.h>
+#include <netinet/in.h>
+struct sockaddr_in6 xx;
+EOF
+
+cat > tmp$$b.c <<EOF
+#include <sys/types.h>
+#include <netinet/in.h>
+struct in6_addr xx;
+EOF
+
+cat > tmp$$c.c <<EOF
+#include <sys/types.h>
+#include <netinet/in.h>
+struct sockaddr_in6 xx;
+main() { xx.sin6_scope_id = 0; }
+EOF
+
+cat > ${new} <<EOF
+
+/* This file is automatically generated. Do Not Edit. */
+
+#ifndef ${target}_h
+#define ${target}_h
+
+EOF
+
+if ${CC} -c tmp$$a.c > /dev/null 2>&1
+then
+ echo "#define HAS_INET6_STRUCTS" >> ${new}
+ if ${CC} -c tmp$$b.c > /dev/null 2>&1
+ then
+ :
+ else
+ echo "#define in6_addr in_addr6" >> ${new}
+ fi
+ if ${CC} -c tmp$$c.c > /dev/null 2>&1
+ then
+ echo "#define HAVE_SIN6_SCOPE_ID" >> ${new}
+ else
+ echo "#undef HAVE_SIN6_SCOPE_ID" >> ${new}
+ fi
+else
+ echo "#undef HAS_INET6_STRUCTS" >> ${new}
+fi
+echo >> ${new}
+echo "#endif" >> ${new}
+if [ -f ${old} ]; then
+ if cmp -s ${new} ${old} ; then
+ rm -f ${new}
+ else
+ rm -f ${old}
+ mv ${new} ${old}
+ fi
+else
+ mv ${new} ${old}
+fi
+exit 0
diff --git a/usr/src/lib/libresolv2_joy/include/probe_ipv6.sh b/usr/src/lib/libresolv2_joy/include/probe_ipv6.sh
new file mode 100644
index 0000000000..371ac96c55
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/probe_ipv6.sh
@@ -0,0 +1,73 @@
+#!/bin/sh
+
+# Copyright 2003 by Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+set -e
+PATH=/bin:/usr/bin:$PATH; export PATH
+trap "rm -f tmp$$[abc].[oc]" 0
+target=port_ipv6
+new=new_${target}.h
+old=${target}.h
+
+cat > tmp$$a.c <<EOF
+#include <sys/types.h>
+#include <netinet/in.h>
+struct sockaddr_in6 xx;
+EOF
+
+cat > tmp$$b.c <<EOF
+#include <sys/types.h>
+#include <netinet/in.h>
+struct in6_addr xx;
+EOF
+
+cat > tmp$$c.c <<EOF
+#include <sys/types.h>
+#include <netinet/in.h>
+struct sockaddr_in6 xx;
+main() { xx.sin6_scope_id = 0; }
+EOF
+
+cat > ${new} <<EOF
+
+/* This file is automatically generated. Do Not Edit. */
+
+#ifndef ${target}_h
+#define ${target}_h
+
+EOF
+
+if ${CC} -c tmp$$a.c > /dev/null 2>&1
+then
+ echo "#define HAS_INET6_STRUCTS" >> ${new}
+ if ${CC} -c tmp$$b.c > /dev/null 2>&1
+ then
+ :
+ else
+ echo "#define in6_addr in_addr6" >> ${new}
+ fi
+ if ${CC} -c tmp$$c.c > /dev/null 2>&1
+ then
+ echo "#define HAVE_SIN6_SCOPE_ID" >> ${new}
+ else
+ echo "#undef HAVE_SIN6_SCOPE_ID" >> ${new}
+ fi
+else
+ echo "#undef HAS_INET6_STRUCTS" >> ${new}
+fi
+echo >> ${new}
+echo "#endif" >> ${new}
+if [ -f ${old} ]; then
+ if cmp -s ${new} ${old} ; then
+ rm -f ${new}
+ else
+ rm -f ${old}
+ mv ${new} ${old}
+ fi
+else
+ mv ${new} ${old}
+fi
+exit 0
diff --git a/usr/src/lib/libresolv2_joy/include/res_update.h b/usr/src/lib/libresolv2_joy/include/res_update.h
new file mode 100644
index 0000000000..0c6967db56
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/res_update.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1999 by Internet Software Consortium, Inc.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * $Id: res_update.h,v 1.3 2005/04/27 04:56:15 sra Exp $
+ */
+
+#ifndef __RES_UPDATE_H
+#define __RES_UPDATE_H
+
+/*! \file */
+
+#include <sys/types.h>
+#include <arpa/nameser.h>
+#include <isc/list.h>
+#include <resolv_joy.h>
+
+#ifndef ORIGINAL_ISC_CODE
+/* definition of u_int32_t needed on Solaris */
+#include <sys/bitypes.h>
+/* need to rename ns_updrec before we define it here */
+#include "arpa/port_nameser.h"
+#endif /* ORIGINAL_ISC_CODE */
+
+
+/*%
+ * This RR-like structure is particular to UPDATE.
+ */
+struct ns_updrec {
+ LINK(struct ns_updrec) r_link, r_glink;
+ ns_sect r_section; /*%< ZONE/PREREQUISITE/UPDATE */
+ char * r_dname; /*%< owner of the RR */
+ ns_class r_class; /*%< class number */
+ ns_type r_type; /*%< type number */
+ u_int32_t r_ttl; /*%< time to live */
+ u_char * r_data; /*%< rdata fields as text string */
+ u_int r_size; /*%< size of r_data field */
+ int r_opcode; /*%< type of operation */
+ /* following fields for private use by the resolver/server routines */
+ struct databuf *r_dp; /*%< databuf to process */
+ struct databuf *r_deldp; /*%< databuf's deleted/overwritten */
+ u_int r_zone; /*%< zone number on server */
+};
+typedef struct ns_updrec ns_updrec;
+typedef LIST(ns_updrec) ns_updque;
+
+#ifdef ORIGINAL_ISC_CODE
+#define res_mkupdate __res_mkupdate
+#define res_update __res_update
+#define res_mkupdrec __res_mkupdrec
+#define res_freeupdrec __res_freeupdrec
+#define res_nmkupdate __res_nmkupdate
+#define res_nupdate __res_nupdate
+#else
+/* these are renamed in "port_nameser.h" */
+#endif /* ORIGINAL_ISC_CODE */
+
+
+int res_mkupdate __P((ns_updrec *, u_char *, int));
+int res_update __P((ns_updrec *));
+ns_updrec * res_mkupdrec __P((int, const char *, u_int, u_int, u_long));
+void res_freeupdrec __P((ns_updrec *));
+int res_nmkupdate __P((res_state, ns_updrec *, u_char *, int));
+int res_nupdate __P((res_state, ns_updrec *, ns_tsig_key *));
+
+#endif /*__RES_UPDATE_H*/
+
+/*! \file */
diff --git a/usr/src/lib/libresolv2_joy/include/resolv_mt.h b/usr/src/lib/libresolv2_joy/include/resolv_mt.h
new file mode 100644
index 0000000000..500d4d764c
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/resolv_mt.h
@@ -0,0 +1,47 @@
+#ifndef _RESOLV_MT_H
+#define _RESOLV_MT_H
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/nameser.h>
+#include <resolv_joy.h>
+
+/* Access functions for the libresolv private interface */
+
+int __res_enable_mt(void);
+int __res_disable_mt(void);
+
+/* Per-thread context */
+
+typedef struct {
+int no_hosts_fallback_private;
+int retry_save;
+int retry_private;
+char inet_nsap_ntoa_tmpbuf[255*3];
+char sym_ntos_unname[20];
+char sym_ntop_unname[20];
+char p_option_nbuf[40];
+char p_time_nbuf[40];
+char precsize_ntoa_retbuf[sizeof "90000000.00"];
+char loc_ntoa_tmpbuf[sizeof
+"1000 60 60.000 N 1000 60 60.000 W -12345678.00m 90000000.00m 90000000.00m 90000000.00m"];
+char p_secstodate_output[15];
+} mtctxres_t;
+
+/* Thread-specific data (TSD) */
+
+mtctxres_t *___mtctxres(void);
+#define mtctxres (___mtctxres())
+
+/* Various static data that should be TSD */
+
+#define sym_ntos_unname (mtctxres->sym_ntos_unname)
+#define sym_ntop_unname (mtctxres->sym_ntop_unname)
+#define inet_nsap_ntoa_tmpbuf (mtctxres->inet_nsap_ntoa_tmpbuf)
+#define p_option_nbuf (mtctxres->p_option_nbuf)
+#define p_time_nbuf (mtctxres->p_time_nbuf)
+#define precsize_ntoa_retbuf (mtctxres->precsize_ntoa_retbuf)
+#define loc_ntoa_tmpbuf (mtctxres->loc_ntoa_tmpbuf)
+#define p_secstodate_output (mtctxres->p_secstodate_output)
+
+#endif /* _RESOLV_MT_H */
diff --git a/usr/src/lib/libresolv2_joy/include/sunw_port_after.h b/usr/src/lib/libresolv2_joy/include/sunw_port_after.h
new file mode 100644
index 0000000000..bff64a74c1
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/sunw_port_after.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SUNW_PORT_AFTER_H
+#define _SUNW_PORT_AFTER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * rename setnetgrent and endnetgrent which were formerly in a separate irs
+ * shared library. These functions should come from libc.so
+ */
+#define setnetgrent res_setnetgrent
+#ifdef SETNETGRENT_ARGS
+void setnetgrent(SETNETGRENT_ARGS);
+#else
+void setnetgrent(const char *netgroup);
+#endif
+
+#define endnetgrent res_endnetgrent
+void endnetgrent(void);
+
+
+/*
+ * include ports for the public header files. ISC's versions are quite different
+ * from those currently in OpenSolaris.
+ */
+
+#ifdef _RESOLV_JOY_H
+#include <port_resolv.h>
+#endif /* _RESOLV_JOY_H */
+
+#ifdef _NETDB_H
+#include <port_netdb.h>
+#endif /* _NETDB_H */
+
+#ifdef _ARPA_INET_H
+#include <arpa/port_inet.h>
+#endif /* _ARPA_INET_H */
+
+#ifdef _ARPA_NAMESER_H
+#include <arpa/port_nameser.h>
+#endif /* _ARPA_NAMESER_H */
+
+
+#ifdef _ARPA_NAMESER_COMPAT_H
+/* no changes */
+#endif /* _ARPA_NAMESER_COMPAT_H */
+
+/* version-specific defines */
+#include <os_version.h>
+
+/*
+ * Prior to 2.6, Solaris needs a prototype for gethostname().
+ */
+#if (OS_MAJOR == 5 && OS_MINOR < 6)
+extern int gethostname(char *, size_t);
+#endif
+/*
+ * gethostid() was not available until 2.5
+ * setsockopt(SO_REUSEADDR) fails on unix domain sockets before 2.5
+ * use ioctl(FIONBIO) rather than fcntl() calls to set/clear non-blocking i/o.
+ */
+#if (OS_MAJOR == 5 && OS_MINOR < 5)
+#define GET_HOST_ID_MISSING
+#define NO_UNIX_REUSEADDR
+#define USE_FIONBIO_IOCTL
+#endif
+
+#if (OS_MAJOR == 5 && OS_MINOR < 11)
+#define NEED_STRSEP
+extern char *strsep(char **, const char *);
+#endif
+
+
+/*
+ * Solaris 2.5 and later have getrlimit(), setrlimit() and getrusage().
+ */
+#if (OS_MAJOR > 5 || (OS_MAJOR == 5 && OS_MINOR >= 5))
+#include <sys/resource.h>
+#define HAVE_GETRUSAGE
+#define RLIMIT_TYPE rlim_t
+#define RLIMIT_FILE_INFINITY
+#endif
+
+/* the default syslog facility of named/lwresd. */
+#ifndef ISC_FACILITY
+#define ISC_FACILITY LOG_DAEMON
+#endif
+
+
+/*
+ * Solaris 8 has if_nametoindex().
+ */
+#if (OS_MAJOR > 5 || (OS_MAJOR == 5 && OS_MINOR >= 8))
+#define USE_IFNAMELINKID
+#endif
+
+#undef ALIGN
+#if (OS_MAJOR == 5 && OS_MINOR > 8)
+#define ALIGN(x) (((uintptr_t)(x) + (sizeof (char *) - 1UL)) & \
+ ~(sizeof (char *) - 1UL))
+#else
+#define ALIGN(x) (((unsigned long)(x) + (sizeof (char *) - 1UL)) & \
+ ~(sizeof (char *) - 1UL))
+#endif
+
+#if (OS_MAJOR == 5 && OS_MINOR < 5)
+#ifndef USE_FIONBIO_IOCTL
+#define USE_FIONBIO_IOCTL 1
+#endif
+#endif
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SUNW_PORT_AFTER_H */
diff --git a/usr/src/lib/libresolv2_joy/include/sunw_port_before.h b/usr/src/lib/libresolv2_joy/include/sunw_port_before.h
new file mode 100644
index 0000000000..776e311fcc
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/sunw_port_before.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SUNW_PORT_BEFORE_H
+#define _SUNW_PORT_BEFORE_H
+
+#ifdef SUNW_OPTIONS
+#include <conf/sunoptions.h>
+#endif
+
+/* version-specific defines */
+#include <os_version.h>
+#if (OS_MAJOR == 5 && OS_MINOR < 6)
+#ifndef SOLARIS_BITTYPES
+#define NEED_SOLARIS_BITTYPES 1
+#endif
+#endif
+
+#if (OS_MAJOR == 5 && OS_MINOR < 5)
+#undef HAS_PTHREADS
+#else
+#define HAS_PTHREADS
+#endif
+
+#if defined(HAS_PTHREADS) && defined(_REENTRANT)
+#define DO_PTHREADS
+#endif
+
+/*
+ * need these if we are using public versions of nameser.h, resolv.h, and
+ * inet.h
+ */
+#include <sys/param.h>
+#if (!defined(BSD)) || (BSD < 199306)
+#include <sys/bitypes.h>
+#else
+#include <sys/types.h>
+#endif
+#include <sys/cdefs.h>
+
+#endif /* _SUNW_PORT_BEFORE_H */
diff --git a/usr/src/lib/libresolv2_joy/include/sys/bitypes.h b/usr/src/lib/libresolv2_joy/include/sys/bitypes.h
new file mode 100644
index 0000000000..54fb42bad7
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/sys/bitypes.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2004, 2007, 2008 Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (C) 1998, 1999, 2001 Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* $Id: bitypes.h,v 1.7 2008/11/14 02:54:35 tbox Exp $ */
+
+#ifndef __BIT_TYPES_DEFINED__
+#define __BIT_TYPES_DEFINED__
+
+ /*
+ * Basic integral types. Omit the typedef if
+ * not possible for a machine/compiler combination.
+ */
+
+#ifdef NEED_SOLARIS_BITTYPES
+ typedef /*signed*/ char int8_t;
+ typedef short int16_t;
+ typedef int int32_t;
+#endif
+ typedef unsigned char u_int8_t;
+ typedef unsigned short u_int16_t;
+ typedef unsigned int u_int32_t;
+
+#endif /* __BIT_TYPES_DEFINED__ */
diff --git a/usr/src/lib/libresolv2_joy/include/sys/cdefs.h b/usr/src/lib/libresolv2_joy/include/sys/cdefs.h
new file mode 100644
index 0000000000..67aac00cc7
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/include/sys/cdefs.h
@@ -0,0 +1,144 @@
+/*
+ * ++Copyright++ 1991, 1993
+ * -
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * -
+ * Portions Copyright (c) 1993 by Digital Equipment Corporation.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies, and that
+ * the name of Digital Equipment Corporation not be used in advertising or
+ * publicity pertaining to distribution of the document or software without
+ * specific, written prior permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
+ * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ * -
+ * --Copyright--
+ */
+
+/*
+ * @(#)cdefs.h 8.1 (Berkeley) 6/2/93
+ * $Id: cdefs.h,v 1.2 2004/07/19 05:54:07 marka Exp $
+ */
+
+#ifndef _CDEFS_H_
+#define _CDEFS_H_
+
+#if defined(__cplusplus)
+#define __BEGIN_DECLS extern "C" {
+#define __END_DECLS };
+#else
+#define __BEGIN_DECLS
+#define __END_DECLS
+#endif
+
+/*
+ * The __CONCAT macro is used to concatenate parts of symbol names, e.g.
+ * with "#define OLD(foo) __CONCAT(old,foo)", OLD(foo) produces oldfoo.
+ * The __CONCAT macro is a bit tricky -- make sure you don't put spaces
+ * in between its arguments. __CONCAT can also concatenate double-quoted
+ * strings produced by the __STRING macro, but this only works with ANSI C.
+ */
+#if defined(__STDC__) || defined(__cplusplus)
+#define __P(protos) protos /* full-blown ANSI C */
+#define __CONCAT(x,y) x ## y
+#define __STRING(x) #x
+
+#define __const const /* define reserved names to standard */
+#define __signed signed
+#define __volatile volatile
+#if defined(__cplusplus)
+#define __inline inline /* convert to C++ keyword */
+#else
+#ifndef __GNUC__
+#define __inline /* delete GCC keyword */
+#endif /* !__GNUC__ */
+#endif /* !__cplusplus */
+
+#else /* !(__STDC__ || __cplusplus) */
+#define __P(protos) () /* traditional C preprocessor */
+#define __CONCAT(x,y) x/**/y
+#define __STRING(x) "x"
+
+#ifndef __GNUC__
+#define __const /* delete pseudo-ANSI C keywords */
+#define __inline
+#define __signed
+#define __volatile
+/*
+ * In non-ANSI C environments, new programs will want ANSI-only C keywords
+ * deleted from the program and old programs will want them left alone.
+ * When using a compiler other than gcc, programs using the ANSI C keywords
+ * const, inline etc. as normal identifiers should define -DNO_ANSI_KEYWORDS.
+ * When using "gcc -traditional", we assume that this is the intent; if
+ * __GNUC__ is defined but __STDC__ is not, we leave the new keywords alone.
+ */
+#ifndef NO_ANSI_KEYWORDS
+#define const /* delete ANSI C keywords */
+#define inline
+#define signed
+#define volatile
+#endif
+#endif /* !__GNUC__ */
+#endif /* !(__STDC__ || __cplusplus) */
+
+/*
+ * GCC1 and some versions of GCC2 declare dead (non-returning) and
+ * pure (no side effects) functions using "volatile" and "const";
+ * unfortunately, these then cause warnings under "-ansi -pedantic".
+ * GCC2 uses a new, peculiar __attribute__((attrs)) style. All of
+ * these work for GNU C++ (modulo a slight glitch in the C++ grammar
+ * in the distribution version of 2.5.5).
+ */
+#if !defined(__GNUC__) || __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 5)
+#define __attribute__(x) /* delete __attribute__ if non-gcc or gcc1 */
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+#define __dead __volatile
+#define __pure __const
+#endif
+#endif
+
+/* Delete pseudo-keywords wherever they are not available or needed. */
+#ifndef __dead
+#define __dead
+#define __pure
+#endif
+
+#endif /* !_CDEFS_H_ */
diff --git a/usr/src/lib/libresolv2_joy/sparc/Makefile b/usr/src/lib/libresolv2_joy/sparc/Makefile
new file mode 100644
index 0000000000..a333224278
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/sparc/Makefile
@@ -0,0 +1,30 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/libresolv2_joy/sparcv9/Makefile b/usr/src/lib/libresolv2_joy/sparcv9/Makefile
new file mode 100644
index 0000000000..ceed393e0d
--- /dev/null
+++ b/usr/src/lib/libresolv2_joy/sparcv9/Makefile
@@ -0,0 +1,38 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+
+include ../Makefile.com
+include ../../Makefile.lib.64
+
+# With the adition of BIND 8.3.3, the symbol table for 64 bit went over
+# the limit for Kpic, so we've added -KPIC here, for just the 64 bit
+# library. This avoids compiling the 32-bit library with PIC unnecessarily.
+
+sparcv9_C_PICFLAGS = -K PIC
+sparcv9_CC_PICFLAGS = -KPIC
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64)
diff --git a/usr/src/lib/libscf/inc/libscf.h b/usr/src/lib/libscf/inc/libscf.h
index c00a59dc5d..1940308f92 100644
--- a/usr/src/lib/libscf/inc/libscf.h
+++ b/usr/src/lib/libscf/inc/libscf.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#ifndef _LIBSCF_H
@@ -832,8 +833,13 @@ int smf_notify_del_params(const char *, const char *, int32_t);
/*
* SMF exit status definitions
+ *
+ * The SMF_EXIT_NODAEMON exit status should be used when a method does not
+ * need to run any persistent process. This indicates success, abandons the
+ * contract, and allows dependencies to be met.
*/
#define SMF_EXIT_OK 0
+#define SMF_EXIT_NODAEMON 94
#define SMF_EXIT_ERR_FATAL 95
#define SMF_EXIT_ERR_CONFIG 96
#define SMF_EXIT_MON_DEGRADE 97
diff --git a/usr/src/lib/libshare/nfs/libshare_nfs.c b/usr/src/lib/libshare/nfs/libshare_nfs.c
index a7fbae4014..a02237dd71 100644
--- a/usr/src/lib/libshare/nfs/libshare_nfs.c
+++ b/usr/src/lib/libshare/nfs/libshare_nfs.c
@@ -21,6 +21,7 @@
/*
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
*/
@@ -2827,9 +2828,16 @@ nfs_init()
ret = initprotofromsmf();
if (ret != SA_OK) {
- (void) printf(dgettext(TEXT_DOMAIN,
- "NFS plugin problem with SMF repository: %s\n"),
- sa_errorstr(ret));
+ /*
+ * This is a workaround. See the comment in
+ * cmd/fs.d/nfs/lib/smfcfg.c for an explanation.
+ */
+ if (getzoneid() == GLOBAL_ZONEID ||
+ ret != SCF_ERROR_NOT_FOUND) {
+ (void) printf(dgettext(TEXT_DOMAIN,
+ "NFS plugin problem with SMF repository: %s\n"),
+ sa_errorstr(ret));
+ }
ret = SA_OK;
}
add_defaults();
diff --git a/usr/src/lib/libshell/Makefile b/usr/src/lib/libshell/Makefile
index b584728d5f..1cce2316f0 100644
--- a/usr/src/lib/libshell/Makefile
+++ b/usr/src/lib/libshell/Makefile
@@ -64,6 +64,5 @@ $(SUBDIRS): FRC
FRC:
include Makefile.demo
-include Makefile.doc
include ../Makefile.targ
diff --git a/usr/src/lib/libshell/Makefile.doc b/usr/src/lib/libshell/Makefile.doc
deleted file mode 100644
index 5f507d28d9..0000000000
--- a/usr/src/lib/libshell/Makefile.doc
+++ /dev/null
@@ -1,77 +0,0 @@
-#
-# CDDL HEADER START
-#
-# The contents of this file are subject to the terms of the
-# Common Development and Distribution License (the "License").
-# You may not use this file except in compliance with the License.
-#
-# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
-# or http://www.opensolaris.org/os/licensing.
-# See the License for the specific language governing permissions
-# and limitations under the License.
-#
-# When distributing Covered Code, include this CDDL HEADER in each
-# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
-# If applicable, add the following below this CDDL HEADER, with the
-# fields enclosed by brackets "[]" replaced with your own identifying
-# information: Portions Copyright [yyyy] [name of copyright owner]
-#
-# CDDL HEADER END
-#
-
-#
-# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
-# Use is subject to license terms.
-#
-
-ROOTDOCDIRBASE= $(ROOT)/usr/share/doc/ksh
-
-DOCFILES= \
- RELEASE \
- README \
- TYPES \
- DESIGN \
- COMPATIBILITY \
- OBSOLETE \
- shell_styleguide.docbook \
- images/tag_bourne.png \
- images/tag_i18n.png \
- images/tag_ksh88.png \
- images/tag_ksh93.png \
- images/tag_ksh.png \
- images/tag_l10n.png \
- images/tag_perf.png \
- images/callouts/1.png \
- images/callouts/2.png \
- images/callouts/3.png \
- images/callouts/4.png \
- images/callouts/5.png \
- images/callouts/6.png \
- images/callouts/7.png \
- images/callouts/8.png \
- images/callouts/9.png \
- images/callouts/10.png
-
-# Documentation rules
-$(ROOTDOCDIRBASE)/%: common/%
- $(INS.file)
-
-$(ROOTDOCDIRBASE)/%: misc/%
- $(INS.file)
-
-ROOTDOCDIRS= \
- $(ROOTDOCDIRBASE) .WAIT \
- $(ROOTDOCDIRBASE)/images .WAIT \
- $(ROOTDOCDIRBASE)/images/callouts
-
-# Generic documentation rules
-DOCFILESRCDIR= common
-ROOTDOCFILES= $(DOCFILES:%=$(ROOTDOCDIRBASE)/%)
-$(ROOTDOCDIRS) := OWNER = root
-$(ROOTDOCDIRS) := GROUP = bin
-$(ROOTDOCDIRS) := DIRMODE = 755
-
-$(ROOTDOCDIRS):
- $(INS.dir)
-
-install: $(ROOTDOCDIRS) .WAIT $(ROOTDOCFILES)
diff --git a/usr/src/lib/libshell/common/COMPATIBILITY b/usr/src/lib/libshell/common/COMPATIBILITY
deleted file mode 100644
index d4d645a99a..0000000000
--- a/usr/src/lib/libshell/common/COMPATIBILITY
+++ /dev/null
@@ -1,134 +0,0 @@
-
- KSH-93 VS. KSH-88
-
-
-The following is a list of known incompatibilities between ksh-93 and ksh-88.
-I have not include cases that are clearly bugs in ksh-88. I also have
-omitted features that are completely upward compatible.
-
-1. Functions, defined with name() with ksh-93 are compatible with
- the POSIX standard, not with ksh-88. No local variables are
- permitted, and there is no separate scope. Functions defined
- with the function name syntax, maintain compatibility.
- This also affects function traces.
-
-2. ! is now a reserved word. As a result, any command by that
- name will no longer work with ksh-93.
-
-3. The -x attribute of alias and typeset -f is no longer
- effective and the ENV file is only read for interactive
- shells. You need to use FPATH to make function definitions
- visible to scripts.
-
-4. A built-in command named command has been added which is
- always found before the PATH search. Any script which uses
- this name as the name of a command (or function) will not
- be compatible.
-
-5. The output format for some built-ins has changed. In particular
- the output format for set, typeset and alias now have single
- quotes around values that have special characters. The output
- for trap without arguments has a format that can be used as input.
-
-6. With ksh-88, a dollar sign ($') followed by a single quote was
- interpreted literally. Now it is an ANSI-C string. You
- must quote the dollar sign to get the previous behavior.
- Also, a $ in front of a " indicates that the string needs
- to be translated for locales other than C or POSIX. The $
- is ignored in the C and POSIX locale.
-
-7. With ksh-88, tilde expansion did not take place inside ${...}.
- with ksh-93, ${foo-~} will cause tilde expansion if foo is
- not set. You need to escape the ~ for the previous behavior.
-
-8. Some changes in the tokenizing rules where made that might
- cause some scripts with previously ambiguous use of quoting
- to produce syntax errors.
-
-9. Programs that rely on specific exit values for the shell,
- (rather than 0 or non-zero) may not be compatible. The
- exit status for many shell failures has been changed.
-
-10. Built-ins in ksh-88 were always executed before looking for
- the command in the PATH variable. This is no longer true.
- Thus, with ksh-93, if you have the current directory first
- in your PATH, and you have a program named test in your
- directory, it will be executed when you type test; the
- built-in version will be run at the point /bin is found
- in your PATH.
-
-11. Some undocumented combinations of argument passing to ksh
- builtins no longer works since ksh-93 is getopts conforming
- with respect to its built-ins. For example, typeset -8i
- previously would work as a synonym for typeset -i8.
-
-12. Command substitution and arithmetic expansion are now performed
- on PS1, PS3, and ENV when they are expanded. Thus, ` and $(
- as part of the value of these variables must be preceded by a \
- to preserve their previous behavior.
-
-13. The ERRNO variable has been dropped.
-
-14. If the file name following a redirection symbol contain pattern
- characters they will only be expanded for interactive shells.
-
-15. The arguments to a dot script will be restored when it completes.
-
-16. The list of tracked aliases is not displayed with alias unless
- the -t option is specified.
-
-17. The POSIX standard requires that test "$arg" have exit status
- of 0, if and only if $arg is null. However, since this breaks
- programs that use test -t, ksh93 treats an explicit test -t
- as if the user had entered test -t 1.
-
-18. The ^T directive of emacs mode has been changed to work the
- way it does in gnu-emacs.
-
-19. ksh-88 allowed unbalanced parenthes within ${name op val} whereas
- ksh-93 does not. Thus, ${foo-(} needs to be written as ${foo-\(}
- which works with both versions.
-
-20. kill -l in ksh-93 lists only the signal names, not their numerical
- values.
-
-21. Local variables defined by typeset are statically scoped in
- ksh93. In ksh88 they were dynamically scoped although this
- behavior was never documented.
-
-22. The value of the variable given to getopts is set to ? when
- the end-of-options is reached to conform to the POSIX standard.
-
-23. Since the POSIX standard requires that octal constants be
- recongnized, doing arithmetic on typeset -Z variables can
- yield different results that with ksh88. Most of these
- differences were eliminated in ksh93o.
-
-24. Starting after ksh93l, If you run ksh name, where name does
- not contain a /, the current directory will be searched
- before doing a path search on name as required by the POSIX
- shell standard.
-
-25. In ksh93, cd - will output the directory that it changes
- to on standard output as required by X/Open. With ksh88,
- this only happened for interactive shells.
-
-26. As an undocumented feature of ksh-88, a leading 0 to an
- assignment of an integer variable caused that variable
- to be treated as unsigned. This behavior was removed
- starting in ksh93p.
-
-27. The getopts builtin in ksh93 requires that optstring contain
- a leading + to allow options to begin with a +.
-
-28. In emacs/gmacs mode, control-v will not display the version when
- the stty lnext character is set to control-v or is unset.
- The sequence escape control-v will display the shell version.
-
-29. In ksh88, DEBUG traps were executed. after each command. In ksh93
- DEBUG traps are exeucted before each command.
-
-30. In ksh88, a redirection to a file name given by an empty string was
- ignored. In ksh93, this is an error.
-I am interested in expanding this list so please let me know if you
-uncover any others.
diff --git a/usr/src/lib/libshell/common/DESIGN b/usr/src/lib/libshell/common/DESIGN
deleted file mode 100644
index c11c0aff1e..0000000000
--- a/usr/src/lib/libshell/common/DESIGN
+++ /dev/null
@@ -1,170 +0,0 @@
-Here is an overview of the source code organization for ksh93.
-
-Directory layout:
-
- The directory include contains header files for ksh93.
- The files nval.h and shell.h are intended to be public
- headers and can be used to add runtime builtin command.
- The remainder are private.
-
- The directory data contains readonly data files for ksh93.
- The man pages for built-ins are in builtins.c rather
- than included as statics with the implementations in the
- bltins directory because some systems don't make static const
- data readonly and we want these to be shared by all running
- shells.
-
- The directory edit contains the code for command line
- editing and history.
-
- The fun directory contains some shell function such as
- pushd, popd, and dirs.
-
- The directory features contains files that are used to generate
- header files in the FEATURE directory. Most of these files
- are in a format that is processed by iffe.
-
- The directory bltins contains code for most of the built-in
- commands. Additional built-in commands are part of libcmd.
-
- The directory sh contains most of the code for ksh93.
-
- The directory tests contains a number of regression tests.
- In most cases, when a bug gets fixed, a test is added to
- one of these files. The regression tests can be run by
- going to this directory and running
- SHELL=shell_path shell_path shtests
- where shell_path is an absolute pathname for the shell to
- be tested.
-
- The top level directory contains the nmake Makefile, a README,
- and several documentation files. The RELEASE file contains
- the list of bug fixes and new features since the original
- ksh93 release. The file COMPATIBILITY is a list of all
- known incompatibilities with ksh88.
-
- The bash_pre_rc.sh is a startup script used when emulating
- bash if the shell is compiled with SHOPT_BASH and the shell
- is invoked as bash. The bash emulation is not complete.
-
-Include directory:
- 1. argnod.h contains the type definitions for command
- nodes, io nodes, argument nodes, and for positional
- parameters.a It defines the prototypes for
- all the positional parameters functions.
- 2. builtins.h contains prototypes for builtins as well
- as symbolic constants that refer to the name-pairs
- that are associated with some of the built-ins.
- It also contains prototypes for many of the strings.
- 3. defs.h is the catch all for all definitions that
- don't fit elsewhere and it includes several other
- headers. It defines a strucuture that contains ksh
- global data, sh, and a structure that contains per
- function data, sh.st.
- 4. edit.h contains definitions that are common to both
- vi and emacs edit modes.
- 5. env.h contains interfaces for creating and modifying
- environment variables.
- 6. fault.h contains prototypes for signal related
- functions and trap and fault handling.
- 7. fcin.h contains macro and function definitions for
- reading from a file or string.
- 8. history.h contains macros and functions definitions
- related to history file processing.
- 9. jobs.h contains the definitions relating to job
- processing and control.
- 10. lexstates.h contains the states associated with
- lexical processing.
- 11. name.h contains the internal definitions related
- to name-value pair processing.
- 12. national.h contains a few I18N definitions, mostly
- obsolete.
- 13. nval.h is the public interface to the name-value
- pair library that is documented with nval.3.
- 14. path.h contains the interface for pathname processing
- and pathname searching.
- 15. shell.h is the public interface for shell functions
- that are documented int shell.3.
- 16. shlex.h contains the lexical token definitions and
- interfaces for lexical analysis.
- 17. shnodes.h contains the definition of the structures
- for each of the parse nodes and flags for the attributes.
- 18. shtable.h contains some interfaces and functions for
- table lookup.
- 19. streval.h contains the interface to the arithmetic
- functions.
- 20. terminal.h is a header file that includes the appropriate
- terminal include.
- 21. test.h contains the definitions for the test and [[...]]
- commands.
- 22. timeout.h contains the define constant for the maximum
- shell timeout.
- 23. ulimit.h includes the appropriate resource header.
- 24. variables.h contains symbolic constants for the built-in
- shell variables.
-
-sh directory:
- 1. args.c contains functions for parsing shell options
- and for processing positional parameters.
- 2. arith.c contains callback functions for the streval.c
- library and the interface to shell arithmetic.
- 3. array.c contains the code for indexed and associative
- arrays.
- 4. bash.h contains code used when compiling with SHOPT_BASH
- to add bash specific features such as shopt.
- 5. defs.c contains the data definitions for global symbols.
- 6. deparse.c contains code to generate shell script from
- a parse tree.
- 7. env.c contains code to add and delete environment variables
- to an environment list.
- 8. expand.c contains code for file name expansion and
- file name generation.
- 9. fault.c contains code for signal processing, trap
- handling and termination.
- 10. fcin.c contains code for reading and writing a character
- at a time from a file or string.
- 11. init.c contains initialization code and callbacks
- for get and set functions for built-in variables.
- 12. io.o contains code for redirections and managing file
- descriptors and file streams.
- 13. jobs.c contains the code for job management.
- 14. lex.c contains the code for the lexical analyzer.
- 15. macro.c contains code for the $ macro expansions, including
- here-documents.
- 16. main.c contains the calls to initialization, profile
- processing and the main evaluation loop as well as
- mail processing.
- 17. name.c contains the name-value pair routines that are
- built on the hash library in libast.
- 18. nvdisc.c contains code related to name-value pair disciplines.
- 19. nvtree.c contains code for compound variables and for
- walking the namespace.
- 20. nvtype.c contains most of the code related to types that
- are created with typeset -T.
- 21. parse.c contains the code for the shell parser.
- 22. path.c contains the code for pathname lookup and
- some path functions. It also contains the code
- that executes commands and scripts.
- 23. pmain.c is just a calls sh_main() so that all of the
- rest of the shell can be in a shared library.
- 24. shcomp.c contains the main program to the shell
- compiler. This program parses a script and creates
- a file that the shell can read containing the parse tree.
- 25. streval.c is an C arithmetic evaluator.
- 26. string.c contains some string related functions.
- 27. subshell.c contains the code to save and restore
- environments so that subshells can run without creating
- a new process.
- 28. suid_exec.c contains the program from running execute
- only and/or setuid/setgid scripts.
- 29. tdump.c contains the code to dump a parse tree into
- a file.
- 30. timers.c contains code for multiple event timeouts.
- 31. trestore contians the code for restoring the parse
- tree from the file created by tdump.
- 32. userinit.c contains a dummy userinit() function.
- This is now obsolete with the new version of sh_main().
- 33. waitevent.c contains the sh_waitnotify function so
- that builtins can handle processing events when the
- shell is waiting for input or for process completion.
- 34. xec.c is the main shell executuion loop.
diff --git a/usr/src/lib/libshell/common/OBSOLETE b/usr/src/lib/libshell/common/OBSOLETE
deleted file mode 100644
index c29cb8bb63..0000000000
--- a/usr/src/lib/libshell/common/OBSOLETE
+++ /dev/null
@@ -1,152 +0,0 @@
-.sp 3
-.tl ''Ksh Features That Are Obsolete in Ksh93''
-.sp 2
-.AL 1
-.LI
-Using a pair of grave accents \^\fB\(ga\fR ... \fB\(ga\fR\^
-for command substition. Use \fB$(\fR ... \fB)\fR instead.
-.LI
-.B FCEDIT
-is an obsolete name for
-the default editor name for the
-.B hist
-command.
-.B FCEDIT
-is not used when
-.B HISTEDIT
-is set. Use
-.B HISTEDIT
-instead.
-.LI
-The newtest (\fB[[\fR ... \fB]]\fR) operator
-\fB\-a\fP \fIfile\fP
-is obsolete. Use
-\fB\-e\fP instead.
-.LI
-The newtest (\fB[[\fR ... \fB]]\fR) operator
-.BR = ,
-as used in
-\fIstring\fP \fB=\fP \fIpattern\fP
-is obsolete. Use
-\fB==\fP instead.
-.LI
-The following obsolete arithmetic comparisons are also permitted:
-.in +5
-.VL 20
-.LI "\fIexp1\fP \fB\-eq\fP \fIexp2\fP"
-True, if
-.I exp1
-is equal to
-.IR exp2 .
-.LI "\fIexp1\fP \fB\-ne\fP \fIexp2\fP"
-True, if
-.I exp1
-is not equal to
-.IR exp2 .
-.LI "\fIexp1\fP \fB\-lt\fP \fIexp2\fP"
-True, if
-.I exp1
-is less than
-.IR exp2 .
-.LI "\fIexp1\fP \fB\-gt\fP \fIexp2\fP"
-True, if
-.I exp1
-is greater than
-.IR exp2 .
-.LI "\fIexp1\fP \fB\-le\fP \fIexp2\fP"
-True, if
-.I exp1
-is less than or equal to
-.IR exp2 .
-.LI "\fIexp1\fP \fB\-ge\fP \fIexp2\fP"
-True, if
-.I exp1
-is greater than or equal to
-.IR exp2 .
-.LE \" End .VL
-.in -5
-.LI
-Using test -t or [ -t ] without specifying the file unit number.
-.LI
-The
-.B \-k
-option to the \fBset\fR builtin is obsolete. It causes
-.I all\^
-variable assignment arguments are placed in the environment,
-even if they occur after the command name.
-The following
-first prints
-.B "a=b c"
-and then
-.BR c :
-There is no alternative.
-.LI
-The obsolete
-.B \-xf
-option of the
-.B typeset
-command allows a function to be exported
-to scripts that are executed without a separate
-invocation of the shell.
-Functions that need to be defined across separate
-invocations of the shell should
-be placed in a directory and the
-.B FPATH
-variable should contains the name of this directory.
-They may also
-be specified in the
-.B ENV
-file with the
-.B \-xf
-option of
-.BR typeset .
-.LI
-The shell environment variable
-.B FCEDIT
-is obsolete. Use
-.B HISTEDIT
-instead.
-.LI
-In the
-.B \-s
-option
-(to \fBfc\fR or \fBhist\fR command???)
-(
-and in obsolete versions, the editor name
-.B \-
-)
-is used to skip the editing phase and
-to re-execute the command.
-.LI
-The
-.B \-t
-option to \fBalias\fR builtin is is obsolete. It
-is used to set and list tracked aliases.
-There is no replacement.
-.LI
-The shell command line option
-.B \-t
-is obsolete. This option cause the shell to exit after reading
-and executing one command. The is no replacement (although ending
-\&"command" with the exit builtin should have the same effect).
-.LI
-As an obsolete feature of the "set" builtin,
-if the first
-.I arg\^
-is
-.B \-
-then the
-.B \-x
-and
-.B \-v
-options are turned off and the next
-.I arg
-is treated as the first argument.
-Using
-.B \+
-rather than
-.B \-
-causes these options to be turned off.
-These options can also be used upon invocation of the shell.
-.LE
-
diff --git a/usr/src/lib/libshell/common/README b/usr/src/lib/libshell/common/README
deleted file mode 100644
index 1feeec8a90..0000000000
--- a/usr/src/lib/libshell/common/README
+++ /dev/null
@@ -1,232 +0,0 @@
-This directory, and its subdirectories contain the source code
-for ksh-93; the language described in the second addition of
-the book, "The KornShell Command and Programming Language," by
-Morris Bolsky and David Korn which is published by Prentice Hall.
-ksh-93 has been compiled and run on several machines with several
-operating systems. The end of this file contains a partial list of
-operating systems and machines that ksh-93 has been known to run on.
-
-The layout of files for ksh-93 has changed somewhat since ksh-88,
-the last major release. Most of the source code for ksh remains in
-the sh directory. However, the shell editing and history routines
-are in the edit sub-directory. The code for shell built-ins is
-in the bltins directory. The data directory contains read-only
-data tables and messages that are used by the shell. The include
-files remain in the include directory and the shlib directory
-is gone. The features directory replaces the older install
-directory. The method for generating systems specific feature
-information has changed substantially.
-
-The Makefile file contains several compilation options that can be set
-before compiling ksh. Options are of the form SHOPT_option and become
-#define inside the code. These options are set to their recommended
-value and some of these may disappear as options in future releases.
-A value of 0, or no value represents off, 1 represents on.
-Note that == is needed, not =, because these are nmake state variables
-and changing their value will cause all modules that could be affected
-by this change to be recompiled.
-The options have the following defaults and meanings:
- ACCT off Shell accounting.
- ACCTFILE off Enable per user accounting info.
- AUDIT off For auditing specific users
- AUDITFILE "/etc/ksh_audit"
- APPEND on Allows var+=val string and array append.
- BASH off Bash compatibility mode. It is not fully implemented
- and is experimental.
- BRACEPAT on C-shell type abc{d,e}f style file generation
- CMDLIB_BLTIN off Makes all commands in libcmd.a builtins. The
- SH_CMDLIB_DIR nmake state variable can be used to
- specify a directory.
- CMDLIB_DIR off Sets CMDLIB_BLTIN=1 and provides a default value
- of "/opt/ast/bin" for SH_CMDLIB_DIR.
- COMPOUND_ARRAY
- on Allows all components of compound variables except the
- first to be any string by enclosing in [...]. It also
- allows components other than the last to be arrays.
- This is experimental and only partially complete.
- CRNL off <cr><nl> treated as <nl> in shell grammar.
- DYNAMIC on Dynamic loading of builtins. (Requires dlopen() interface.)
- ECHOPRINT off Make echo equivalent to print.
- ESH on Compile with emacs command line editing. The original
- emacs line editor code was provided by Mike Veach at IH.
- FILESCAN on Experimental option that allows fast reading of files
- using while < file;do ...; done and allowing fields in
- each line to be accessed as positional parameters.
- FS_3D off For use with 3-D file system. Enabled automatically for
- sytems with dynamic linking.
- KIA off Allow generation of shell cross reference database with -I.
- MULTIBYTE on Multibyte character handling. Requires mblen() and
- mbctowc().
- NAMESPACE on Allows namespaces. This is experimental, incomplete
- and undocumented.
- OLDTERMIO off Use either termios or termio at runtime.
- OO on Experimental object oriented extension. This option
- should disappear soon.
- OPTIMIZE on Optimize loop invariants for with for and while loops.
- P_SUID off If set, all real uids, greater than or equal to this
- value will require the -p flag to run suid/sgid scripts.
- PFSH off Compile with support for profile shell.
- RAWONLY off Turn on if the vi line mode doesn't work right unless
- you do a set -o viraw.
- SEVENBIT off Strip the eigth bit from characters.
- SPAWN off Use spawn as combined fork/exec. May improve speed on
- some systems.
- STATS on Add .sh.stats compound variable.
- SUID_EXEC on Execute /etc/suid_exec for setuid, setgid script.
- TIMEOUT off Set this to the number of seconds for timing out and
- exiting the shell when you don't enter a command. If
- non-zero, TMOUT can not be set larger than this value.
- TYPEDEF on Enable typeset type definitions.
- VSH on Compile with vi command line editing. The original vi
- line editor code was provided by Pat Sullivan at CB.
-
-The following compile options are set automatically by the feature testing:
- DEVFD Set when /dev/fd is a directory that names open files.
- SHELLMAGIC
- Set on systems that recognize script beginning with #! specially.
- VPIX Set on systems the have /usr/bin/vpix program for running MS-DOS.
-
-
-In most instances, you will generate ksh from a higher level directory
-which also generates libcmd and libast libraries on which ksh depends.
-However, it is possible to generate ksh, with by running make -f ksh.mk
-in this directory. The ksh.mk file was generated from the nmake Makefile.
-If you do not have make or nmake, but do have a Version 7 UNIX compatible
-shell, then you can run the script mamexec < Mamfile to build ksh.
-If you have nmake, version 2.3 or later, you can use it without the -f ksh.mk.
-In either case, ksh relies on libraries libast and libcmd which must be
-built first. The binary for ksh becomes the file named ./ksh which can
-be copied to where ever you install it.
-
-If you use old make or the Mamfile, and you system has dynamic shared
-libraries, then you should define the variables mam_cc_static and
-mam_cc_dynanamic as the compiler options that request static linking
-and dynamic linking respectively. This will decrease the number of
-shared libraries that ksh need and cut startup time substantially.
-
-The makefile should also generate shcomp, a program that will precompile
-a script. ksh93 is able to recognize files in this format and process
-them as scripts. You can use shcomp to send out scripts when you
-don't want to give away the original script source.
-
-It is advisable that you put the line PWD=$HOME;export PWD into the
-/etc/profile file to reduce initialization time for ksh.
-
-To be able to run setuid/setgid shell scripts, or scripts without read
-permission, the SUID_EXEC compile option must be on, and ksh must be installed
-in the /bin directory, the /usr/bin directory, the /usr/lbin directory,
-or the /usr/local/bin directory and the name must end in sh. The program
-suid_exec must be installed in the /etc directory, must be owned by root,
-and must be a suid program. If you must install ksh in some other directory
-and want to be able to run setuid/setgid and execute only scripts, then
-you will have to change the source code file sh/suid_exec.c explicitly.
-If you do not have ksh in one of these secure locations, /bin/sh will
-be invoked with the -p options and will fail when you execute a setuid/setgid
-and/or execute only script. Note, that ksh does not read the .profile
-or $ENV file when it the real and effective user/group id's are not
-equal.
-
-The tests sub-directory contains a number of regression tests for ksh.
-To run all these tests with the shell you just built, go to the tests
-directory and run the command
- SHELL=$dir/ksh $dir/ksh shtests
-where dir is the directory of the ksh you want to test.
-
-The file PROMO.mm is an advertisement that extolls the virtues of ksh.
-The file sh.1 contains the troff (man) description of this Shell.
-The file nval.3 contains the troff (man) description of the name-value
-pair library that is needed for writing built-ins that need to
-access shell variables.
-
-The file sh.memo contains a draft troff (mm) memo describing ksh. The
-file RELEASE88 contains the changes made for ksh88. The file RELEASE93
-contains the changes made in this release since ksh-88. The file
-RELEASE contains bug fixes made in this release since ksh-88. The file
-COMPATIBILITY contains a list of incompatibilities with ksh-88. The
-file bltins.mm is a draft troff (mm) memo describing how to write
-built-in commands that can be loaded at run time.
-
-Most of the work for internationalization has been done with ksh93.
-The file ksh.msg is a generated file that contains error messages
-that need to be translated. In addition, the function translate()
-in sh/init.c has to be completed to interface with the dictionary
-lookup. The translate function takes two argument, the string
-that is to be translated and a type which is
- 0 when a library string needs translation.
- 1 when one of the error messages in ksh.msg needs translation.
- 2 when a string in a script needs translation. You use a $ in front
- of a double quoted string in a script to indicate that it
- needs translation. The -D option for ksh builds the dictionary.
-The translate routine needs to return the translated message.
-For dictionaries that need to use a numeric key, it should be
-possible to use the strhash() function to generate numbers to
-go along with each of the messages and to use this number both
-when generating the dictionary and when converting strings.
-If you encounter error messages of type 1 that are not be translated via
-this translate() function send mail to the address below.
-
-Please report any problems or suggestions to:
-
-dgk@research.att.com
-
-
-ksh93 has been compiled and alpha tested on the following. An asterisk
-signifies that ksh has been installed as /bin/sh on this machine.
-
-* Sun OS 4.1.[123] on sparc.
- Sun OS 4.1.1 on sun.
- Solaris 2.[1-9] on sparc.
- Solaris 2.[4-8] on X86.
- HP/UX 8 on HP-9000/730.
- HP/UX 9 on HP-9000/730.
- HP/UX 10 on HP-9000/857.
- HP/UX 11 on pa-risc.
- System V Release 3 on Counterpoint C19
- System V Release 4 on AT&T Intel 486.
- System V Release 4 on NCR 4850 Intel 486.
- IRIX Release 4.0.? System V on SGI-MIPS.
- IRIX Release 5.1 System V on SGI-MIPS.
- IRIX Release 6.[1-5] System V on SGI-MIPS.
- System V Release 3.2 on 3B2.
- UTS 5.2.6 on Amdahl 3090,5990,580.
- System V Release 3.2 on i386.
- SMP_DC.OSx olivetti dcosx MIServer-S 2/128.
- SMP_DC.OSx Pyramid dcosx MIServer-S 2/160 r3000.
- 4.3BSD on Vax 8650.
- AIX release 2 on RS6000.
- AIX 3.2 on RS6000.
- Linux 1.X on Intel
- Linux 2.X on Intel
- Linux 2.X on Alpha
- Linux 2.X on Alpha
- Linux 2.X on OS/390
- Linux 2.X on sparc
- Linux 2.4 on intel itanium 64
- Linux Slackware on sparc64
-* Linux ARM on i-PAQ
- OSF1 on DEC alpha.
- OSF4 on DEC alpha.
- UMIPS 4.52 on mips.
- BSD-i [2-4] on X86.
- OpenBSD on X86
- NetBSD on X86
- FreeBSD on X86
- NeXT on Intel X86.
- NeXT on HP.
-* Windows NT using UWIN on X86
-* Windows NT using UWIN on alpha
- Windows NT using Cygwin on X86
- Windows NT with NutCracker libraries.
- Windows NT with Portage libraries.
- Windows 3.1 using custom C library.
- OpenEdition on MVS
- Darwin OS X on PPC
- MVS on OS 390
- SCO Openserver 3.2 on X86
- Unixware 7 on X86
-
-Good luck!!
-
-David Korn
-dgk@research.att.com
-
diff --git a/usr/src/lib/libshell/common/RELEASE b/usr/src/lib/libshell/common/RELEASE
deleted file mode 100644
index b8fc92511a..0000000000
--- a/usr/src/lib/libshell/common/RELEASE
+++ /dev/null
@@ -1,2204 +0,0 @@
-10-03-05 --- Release ksh93t+ ---
-10-03-05 A varibale unset memory leak has been fixed and tests/leaks.sh
- has been added to verify the fix.
-10-03-04 Documentation, comment, and disgnostic spelling typos corrected.
-10-02-14 Fix sh_getenv() initialization to cooperate with the 3d fs.
-10-02-12 A bug in which the get discipline function was not invoked for
- associative array subscripts for unset array elements has been fixed.
-10-02-12 A bug which could occur if the last line of a script was an eval
- that executed multiple commands has been fixed.
-10-02-02 A buffer overflow in read and another in binary type base64
- encoding were fixed.
-10-01-20 A bug in the evaluation of arithmetic expression in which the
- subscript was evaluated twice for $((foo[x++]++)) has been fixed.
-10-01-19 A workaround for a double-free of a trap in both a subshell and its
- parent has been added.
-10-01-18 A bug in type handling of typeset -H has been fixed.
-10-01-15 The "adding empty subscript" warning now only emitted with -x set.
-10-01-01 A bug in the parser in which '$((case i in i):;esac);:))' was not
- parsed correctly was fixed.
-10-01-01 A bug in the parser in which '$(( 2 , 3.6 ))' dumped core for locales
- with radix char , and thousands separator . has been fixed.
-09-12-28 A bug in the handling of SIGCLD on systems that generated SIGCLD
- while blocked waiting for process to complete has been fixed.
-09-12-24 ast setlocale() reworked to differentiate env var changes from user
- override.
-09-12-18 A bug with the SHOPT_BGX option set which disabled traps for signals
- < SIGCHLD when a trap for a signal > SIGCHLD was set has been fixed.
-09-12-18 A bug where [[ -v var ]] was incorrect for some variables (including
- LC_* vars) has been fixed.
-09-12-15 A bug that produced a syntax error when a multibyte character
- straddled a buffer boundary has been fixed.
-09-12-11 A bug where the subscript of an unset variable was not evaluated has
- been fixed.
-09-12-09 A bug where shcomp dumped core on certain syntax errors has been fixed.
-09-12-07 A bug where a parent shell environment var reset in a subshell removed
- the value in subsequent children of the parent shell has been fixed.
-09-12-04 A bug in which in some cases a trap in a function executed in
- a subshell could trigger twice has been fixed.
-09-12-03 A bug in which SHLVL exported with some attributes could cause
- the shell to abort at startup has been fixed.
-09-12-02 A bug with pipefail in which the shell could hang waiting for the
- writer to complete before the last reader command has been fixed.
-09-11-30 A bug in which a trap could be inherited by the first element of
- a pipeline when the command had more than 63 arguments that did
- not contain any macro expansions has been fixed.
-09-11-19 When read from a terminal was called from with a while or foo loop,
- and an edit mode was on, a backspace or erase no longer will
- overwrite the prompt.
-09-11-17 Change .paths parse to handle BUILTIN_LIB=foo BUILTIN_LIB=foo-1.2.
-09-11-17 Inside a function, typeset foo.bar will bind foo to global variable
- foo if local variable foo does not exist, instead of creating a
- local variable.
-09-11-17 "read -n1" from the terminal has been fixed to read exactly one character.
-09-11-11 Job control now works for subshell commands, (...).
-09-11-11 If set -e is on for an interactive shell errors in special builtins
- now cause the shell to exit.
-09-11-11 A bug in which an interrupt handler processed during the read builtin
- when IFS did not contain a new line has been fixed.
-09-11-09 A bug in which a variable that has been unset in a subshell and then
- exported from that subshell does not show up in the environment
- has been fixed.
-09-11-02 ``,2'' is now a valid numeric constant for locales with
- decimal_point=','.
-09-11-02 A bug where "return" in .profile did not restore the shell state
- has been fixed.
-09-10-31 A bug that corrupted saved exit status when pids wrapped around has
- been fixed.
-09-10-26 A bug in { LANG LC_ALL LC_category } ordering has been fixed in -last.
-09-10-16 A bug where notification to libast that the environment has changed
- has been fixed.
-09-10-12 A bug in which a function loaded in a subshell could leave side
- effects in the parent shell has been fixed.
-09-10-12 A bug in converting a printf %d operand to a number when the operand
- contains multiple subscripts for the same variable has been fixed.
-09-10-09 A bug in the handling of the escape character \ in directory prefixes
- in command completion has been fixed.
-09-10-09 $PATH processing has been changed to delay dir stat() and .paths
- lookup until the directory is needed in the path search.
-09-09-28 Call the ast setlocale() intercept on unset too.
-09-09-24 A bug in which LANG=foo; LC_ALL=foo; unset LC_ALL; did not revert
- LC_CTYPE etc. to the LANG value has been fixed.
-09-09-17 A bug in which unsetting SVLVL could cause a script invoked by
- name without #! to core dump has been fixed.
-09-09-16 A bug in which a pipeline in a here-document could hang when the
- pipefail option was on has been fixed.
-09-09-09 A bug in the processing of line joining in here documents which
- occurred when a buffer began with <escape><new-line> has been fixed.
-09-09-09 A leading ; with commands in a brace group or parenthesis group
- no longer causes an error. It now is used for the "showme" option.
-09-09-09 A bug in which a subshell containing a background process could
- block until the background process completed has been fixed.
-09-09-04 A bug in handing ${var[sub]}, where var is a nameref has been fixed.
-09-09-03 A bug which caused an index array to have the wrong number of elements
- when it was converted from a compound variable by adding an another
- element has been fixed.
-09-09-03 Specifying export for a compound variable now generates an error.
-09-09-02 $"..." localizations strings are no longer recognized inside `...`.
-09-09-01 A bug in the for loop optimizer in the handling of type static
- variables has been fixed.
-09-09-01 An error message is not displayed when * and @ are used as subscripts.
-09-09-01 Several bugs in the processing for types that included an associative
- array of another type has been fixed.
-09-09-01 A bug in the tracing of [[ a < b ]] and [[ a > b ]] has been fixed.
-09-08-26 The .sh.file variable was not being set for a script that was run
- by name and didn't start with #! and this has been fixed.
-09-08-25 A bug in which a function called to deeply from command substitution
- did not display an error message has been fixed.
-09-08-24 When processing profiles, ksh93 now violates the POSIX standard and
- treats &> as a redirection operator similar to bash.
-09-08-23 A bug in the handling of the trap on SIGPIPE that could lead to a
- memory fault has been fixed.
-09-08-21 A bug in the handling of the comma operator in arithmetic expressions
- that could cause a core dump on some systems has been fixed.
-09-08-20 A bug in which a compound variable containing an array of a type
- that doesn't have any elements now expands correctly.
-09-08-19 A bug which disabled function tracing inside a function after
- a call to another function has been fixed.
-09-08-19 A bug in which initializing a compound variable instance to another
- compound variable by name has been fixed.
-09-08-18 A bug in which compound variable instances could be lost after
- an instance that invoked a type method discipline has been fixed.
-09-08-18 A bug in which a discipline function for a type applied to an
- array instance when invoked in a function ignored the subscript
- has been fixed.
-09-08-18 A scoping error with variables in arithmetic expression with
- type variables when reference with a name reference has been fixed.
-09-08-10 Several memory leaks were fixed primarily related to subshells.
-09-08-06 A bug in which setting the trap on CHLD to ignore could cause
- a script to hang has been fixed.
-09-07-08 A bug in the processing of name reference assignments when it
- contained pattern expansions with quoting has been fixed.
-09-06-22 The default width for typeset -X has been changed so that there
- should be no loss of precision when converting to a string.
-09-06-19 A bug in the printing of array elements for binary variables with
- printf %B has been fixed.
-09-06-19 A bug which caused a core dump with trap DEBUG set with an array
- assignment with no elements has been fixed.
-09-06-19 A bug with read with typeset -b -Z<num> has been fixed.
-09-06-19 Two bugs related to read -b for array variables has been fixed.
-09-06-19 A bug with typeset for compound variables containing arrays of
- compound variables has been fixed.
-09-06-18 A bug in appending a compound variable to a an indexed array of
- compound variables has been fixed.
-09-06-18 A bug which occurs when appending a compound variable to an indexed
- array element has been fixed.
-09-06-18 Setting VISUAL to a value other than one ending in vi or emacs will
- no longer unset the edit mode.
-09-06-17 A bug in typeset -m when moving a local compound variable to a
- global compound variable via a name reference has been fixed.
-09-06-17 A bug in appending to nodes of an array of compound variables when
- addressing them via nameref has been fixed.
-09-06-17 A bug in typeset -p var, when var is an array of compound variables
- in which the output only contained on array element has been fixed.
-09-06-17 The prefix expansion ${!y.@} now works when y is a name
- reference to an element of an array.
-09-06-16 Traps on signals that are ignored when the shell is invoked
- no longer display. Previously they were ignored as required but
- would be listed with trap -p.
-09-06-12 A bug in vi edit mode in which hitting the up arrow key at the
- end of a line longer than 40 characters which caused a core dump
- has been fixed.
-09-06-11 A bug in which "eval non-builtin &" would create two processes,
- one for the & and another for non-builtin has been fixed.
-09-06-08 When var is an identifier and is unset, ${var} no longer tries to
- run command substitution on the command var.
-09-06-08 Process substitution arguments of the form <(command) can now be
- used following the < redirection operator to redirect from command.
-09-05-13 A bug in which redirections of the form 2>&1 1>&5 inside command
- substitution could cause the command substitution to hang has been
- fixed.
-09-05-12 To conform with POSIX, the -u option only checks for unset variables
- and subscript elements rather than checking for all parameters.
-09-05-12 A bug which could cause a core dump when a variable whose name
- begins with a . was referenced as part of a name reference inside
- a function has been fixed.
-09-05-01 A bug that caused a core dump when SIGWINCH was received and
- both vi and emacs mode were off has been fixed.
-09-04-22 Default alias compound='typeset -C' added.
-09-04-15 A bug that caused ${...;} to hang for large files has been fixed.
-09-04-08 A change was made in the -n option which printed out an incorrect
- warning with <>.
-09-04-07 The emacs edit command M-_ and M_. and the vi command _ was fixed
- to handle the case there there is no history file.
-09-04-05 A bug in handling new-lines with read -n has been fixed.
-09-04-05 The ENV variable defaults the the file named by $HOME/.kshrc rather
- then to the string $HOME/.kshrc.
-09-03-31 A bug in which a nested command substitution with redirections could
- leave a file descriptor open has been fixed.
-09-03-24 ksh now only uses the value of the _ variable on startup if it can
- verify that it was set by the invoking process rather than being
- inherited by some other ancestor.
-09-03-24 When ksh is invoked without -p and ruid!=euid, and the shell is
- compiled without SHOPT_P_UID or ruid<SHOPT_P_UID, the shell now
- enables the -p option. The previous version instead set the
- euid to the ruid as it does for set +p.
-09-03-24 When SHOPT_P_UID is defined at compile time and the shell is started
- without -p and ruid!=euid and ruid>=SHOPT_P_UID then euid is set
- to ruid. A bug that did the wrong test (ruid<SHOPT_P_UID) was fixed.
-09-03-17 The sleep(1) builtin now accept and ISO 8601 PnYnMnDTnHnMnS
- duration or date(1) compatible date/time operand.
-09-03-10 If a variable that was left or right justified or zero-filled was
- changed with a typeset statement that was left or right justified
- or zero-filled, then the original justification no longer affects
- the result.
-09-03-10 A bug in the handling of traps when the last command in a script
- is a subshell grouping command has been fixed.
-09-03-03 A bug in which an expansion of the form ${!prefix@} could generate
- an exception after the return from a function has been fixed.
-09-02-02 A bug in restricted mode in which the value of ENV could be
- changed from within a function has been fixed.
-09-02-02 A bug in which an erroneous message indicating that a process
- terminated with a coredump has been fixed.
-09-02-02 The exit status when exit was called without an argument from
- a signal handler was incorrect and has been fixed.
-09-02-02 A bug in which a function autoloaded in a subshell could cause
- a core dump when the subshell completed has been fixed.
-09-02-02 A bug in which 2>&1 inside a command substitution wasn't working
- correctly has been fixed.
-09-02-02 A bug in the call stack of arithmetic function with 2 args
- returning int has been fixed.
-09-01-30 A bug in which 'eval print \$0' inside a function was giving the
- wrong value for $0 has been fixed.
-09-01-28 A bug in which a command substitution could return an exit status
- of 127 when the pipefail option is enabled has been fixed.
-09-01-26 ksh93 now generates an error message if you attempt to create
- a global name reference to a local variable.
-09-01-26 The [[ -v var ]] operator was modified to test for array elements.
-09-01-23 The redirection operator <>; was added. It is similar to <>
- except that if the command it is applied to succeeds, the file
- is truncated to the offset at the command completion.
-09-01-23 The default file descriptor for <> was changed to 1.
-09-01-20 A bug in which the exit status specified in an exit trap was
- not used when a process terminated with a signal has been fixed.
-09-01-19 A bug in which a signal whose default action is to terminate
- a process could be ignored when the process is running a sub-shell
- has been fixed.
-09-01-19 A bug in which sending SIGWINCH to a process that reads from a pipe
- could cause a memory fault has been fixed.
-09-01-16 The -R unary operator was added to [[...]] and test to check whether
- a variable is a name reference.
-09-01-16 The -v unary operator was added to [[...]] and test to check whether
- a variable is set.
-09-01-14 The unset built-in was modified to return 0 exit status when
- unsetting a variable that was unset to conform with the POSIX
- standard.
-09-01-14 The unset built-in was modified to continue to unset variables
- after encountering a variable that it could not unset to
- conform to the POSIX standard.
-09-01-14 The parameter expansion ${x+value} no longer expands the value of
- the variable x when determining whether x is set or not.
-09-01-13 A bug in which background jobs and pipelines that were not waited
- for could, in rare instances, cause the shell to go into an infinite
- loop or fail has been fixed.
-09-01-06 A bug in indexed arrays of compound variables in which referencing
- non-existent sub-variable in an arithmetic expression could cause
- the sub-variable to be created has been fixed.
-09-01-05 A bug in which the \ character did not escape extended regular
- expression pattern characters has been fixed.
-08-12-24 A bug in which killing the last element of a pipe did not cause
- a write to the pipe to generate a SIGPIPE has been fixed.
-08-12-19 A bug which could cause command substitution to hang when the
- last element of a pipeline in a command substitution was a built-in
- and the output was more that PIPE_BUFF.
-08-12-18 A bug which occurs when a here documented marker embedded in a
- command substitution occurs on a buffer boundary has been fixed.
-08-12-17 A bug in the output of typeset -p for variables that had attributes
- but did not have a value has been fixed.
-08-12-16 A bug in which a name reference to a name reference variable that
- references an array element has been fixed.
-08-12-16 A bug in which a variable given both the -A and -C attribute along
- with an initial assignment didn't work correctly has been fixed.
-08-12-10 The [[ -t fd ]] test was fixed to handle fd>9.
-08-12-10 A bug where function stack misalignment could cause a bus error
- has been fixed.
-08-12-09 Command completion was changed to use \ to quote special characters
- instead of quoting the argument in single quotes.
-08-12-07 A bug in typeset -m which occurred when the target node was an
- associative array element has been fixed.
-08-12-07 A timing bug on some systems (for example darwin), that could
- cause the last process of a pipeline entered interactively to fail
- with an "Exec format error" has been fixed.
-08-12-04 SHOPT_BGX enables background job extensions. Noted by "J" in
- the version string when enabled. (1) JOBMAX=n limits the number
- of concurrent & jobs to n; the n+1 & job will block until a
- running background job completes. (2) SIGCHLD traps are queued
- so that each completing background job gets its own trap; $! is
- set to the job pid and $? is set to the job exit status at the
- beginning of the trap. (3) sleep -s added to sleep until the time
- expires or until a signal is delivered.
-08-12-04 The sign of floating point zero is preserved across arithmetic
- function calls.
-08-12-04 A bug that caused print(1) to produce garbled stdout/stderr
- output has been fixed.
-08-12-04 A bug in which printf "%d\n" "'<euro>'" did not output the
- numerical value of the EURO symbol, 8354, has been fixed.
-08-11-24 /dev/fd* and /dev/std* redirections are first attempted with
- open() to preserve seek semantics; failing that the corresponding
- file descriptors are dup()'d.
-08-11-20 A bug which could cause a core dump if a function compiled with
- shcomp was found has been fixed.
-08-11-20 A bug in which jobs were not cleared from the jobs table for
- interactive shells when the pipefail option is on has been fixed.
-08-11-11 A bug in which a field that was unset in a type definition and later
- set for an instance could appear twice when displaying the variable
- has been fixed.
-08-11-11 A bug in which running a simple command & inside a function would
- not return the correct process id has been fixed.
-08=11-10 A bug in which the exit status of a command could be lost if the pid
- was that of the most recent command substitution that had completed
- has been fixed.
-08-11-10 The maximum depth for subshells has been increased from 256 to 65536.
-08-11-06 A bug which could cause a core dump when the _ reference variable was
- used as an embedded type with a compound assignment has been fixed.
-
-08-10-31 --- Release ksh93t ---
-08-10-31 Variable scoping/initialization bugs that could dump core were fixed.
-08-10-24 The lexer now accepts all RE characters for patterns prefixed
- with a ksh ~(...) option expression.
-08-10-24 For ${var/pat/sub} \0 in sub expands to the text matched by pat.
-08-10-18 A bug in array scoping that could dump core has been fixed.
-08-10-10 read -n and -N fixed to count characters in multibyte locales.
-08-10-10 A bug that mishandled _.array[] type references has been fixed.
-08-10-09 ${.sh.version} now contains a concatenation of the following (after
- 'Version') denoting compile time features:
- A SHOPT_AUDIT
- B SHOPT_BASH
- L SHOPT_ACCT
- M SHOPT_MULTIBYTE
-08-10-09 A bug that caused subshell command substitution with redirection
- to hang has been fixed.
-08-10-08 Output errors, other than to stderr, now result in a diagnostic.
-08-10-08 ksh93 now supports types that contain arrays of other types as
- members. Earlier versions core dumped in this case.
-08-10-05 A bug which caused the shell to emit a syntax error for an arithmetic
- statement of the form (( var.name[sub] = value)) has been fixed.
-08-10-01 A bug that caused subshell command substitution to hang has
- been fixed.
-08-09-29 When the -p export option of typeset is used with other options,
- only those variables matching the specified options are displayed.
-08-09-29 When the shell reads the environment and finds variables that are
- not valid shell assignments, it now passes these on to subsequent
- commands rather than deleting them.
-08-09-29 A bug in the display of compound variables containing an indexed
- array of compound variables has been fixed.
-08-09-29 A bug in the display of compound variables containing an associative
- array with a subscript containing a . in the name has been fixed.
-08-09-26 A core dump in the subshell environment restore has been fixed.
-08-09-24 $(...) has been fixed to properly set the exit status in $?.
-08-09-23 $(<...) with IFS=$'\n\n' has been fixed to retain all but the last
- of multiple trailing newlines.
-08-09-23 The -p option to typeset when used with other attributes, restricts
- the output to variables with the specified attributes.
-08-09-22 A bug that sometimes lost the exit status of a job has been fixed.
-08-09-21 A bug that retained trailing command substitution newlines in
- cases where the command caused the shell to fork has been fixed.
-08-09-19 type, whence -v, and command -v were fixed to comply with POSIX
- by writing 'not found' diagnostics to the standard error.
-08-09-18 test and [...] were fixed to comply with POSIX in the case
- of test '(' binop ')' where binop is a valid binary test operator.
-08-09-16 If a method discipline named create is specified when defining a
- type, this function will be called when an instance is created.
-08-09-15 The variable _ is now set as a reference to the compound variable
- when defining a compound variable or a type.
-08-09-10 The shell now prints an error message when the type name specified
- for an indexed array subscript is not an enumeration type.
-08-09-10 A bug in which a subshell that spawned a background process could
- loose output that was produced after the foreground completed
- has been fixed.
-08-09-10 A timing bug on some systems that could cause coprocesses started by a
- subshell to not clean up and prevent other coprocesses has been fixed.
-08-09-09 The typeset -m option is now able to rename array elements from
- the same array.
-08-09-09 The exit status of 2 from the DEBUG trap causes the next command
- to be skipped. An exit value of 255 from a DEBUG trap called from
- a function causes the function to return.
-08-09-08 A bug in which a coprocess created in a subshell that did not
- complete when the subshell terminated could prevent a coprocess
- from being created in the parent shell has been fixed.
-08-09-05 An assignment of the form name1=name2 where name1 and name2
- are both compound variables causes name1 to get a copy of name2.
- name1+=name2 causes name2 sub-variables to be appended to name1.
-08-09-05 A bug in which unsetting a compound variable did not unset all
- the sub-variables has been fixed.
-08-09-01 A bug in the subshell cleanup code that could cause SIGSEGV has
- been fixed.
-06-08-26 The SHLVL variable which is an environment variable used by bash
- and zsh that gets incremented when the shell starts.
-08-08-25 For an indexed array, a negative subscript now refers to offsets
- from the end so that -1 refers to the last element.
-08-08-24 An alignment error for shorts on 64 bit architectures has been fixed.
-08-08-22 If oldvar is a compound variable, typeset -C newvar=oldvar creates
- newvar as a copy of oldvar.
-08-08-19 The ALRM signal no longer cause the sleep builtin to terminate.
-08-08-13 When used in an arithmetic expression, the .sh.version variable
- now produces a number that will be increasing for each release.
-08-08-11 A bug in which type instantiation with a compound assignment in
- a dot script in which the type is defined has been fixed.
-08-08-07 The -m option has been added to typeset to move or rename a
- variable. Not documented yet.
-08-08-06 A bug in read when used in a loop when a prompt was specified
- when reading from a terminal has been fixed.
-08-08-01 A bug with the pipefail option in which a nested pipeline could
- cause an asynchronous command to block has been fixed.
-08-08-01 A for loop optimizer bug that treats .sh.lineno as an invariant
- has been fixed.
-08-07-30 A bug in which expanding compound variable that had a get discipline
- from with a here document could cause a syntax error has been fixed.
-08-07-18 A bug in which a nameref caused a local variable to be created
- rather than binding to an existing variable in the global scope
- has been fixed.
-08-07-17 A bug which occurred when a nameref was created from within a
- function that was part of a pipeline has been fixed.
-08-07-14 The compile option SHOPT_STATS was added. With this option the
- compound variable .sh.stats keeps usage statistics that could help
- with performance tuning.
-08-07-10 The output of set now always uses a single line for each variable.
- For array variables, the complete set of values is now displayed.
-08-07-09 The typeset -C option can be used with arrays to indicate that
- each element should default to a compound variable.
-08-07-08 The %B format now outputs compound variables and arrays. The
- alternate flag # can be used to cause output into a single line.
-08-07-03 When the window change signal, WINCH, is received, the current
- edit line is redrawn in place.
-08-07-01 A bug in the handling of shared variables when inside an embedded
- type has been fixed.
-08-06-29 A bug in multiline edit mode which occurred when the prompt length
- was three characters or less has been fixed.
-08-06-23 A bug in which the SIGCLD was not be triggered when background
- jobs completed has been fixed.
-08-06-23 KSH_VERSION added as a name reference to .sh.version.
-08-06-20 type now outputs 'special builtin' for special builtins.
-08-06-19 A couple of bugs in multi-dimensional arrays have been fixed.
-08-06-19 A bug in which a syntax error in a dot script could generated
- a syntax error in the next subsequent command has been fixed.
-08-06-17 Reduced the maximum function call depth to 2048 to avoid exceptions
- on some architectures.
-08-06-16 A bug in which printf "%B" could generate an exception when the
- specified variable was not set has been fixed.
-08-06-16 When typeset -p is followed by variable names, it now displays
- the attributes names and values for the specific names.
-08-06-14 A bug that could effect the drawing of the screen from multiline
- emacs or gmacs mode when walking up the history file has been fixed.
-08-06-13 A bug in which a compound variable defined in a subshell could
- have side effects into the parent shell has been fixed.
-08-06-13 A number of bugs related to using .sh.level to set the stack from
- for DEBUG traps have been fixed.
-08-06-13 The .sh.lineno variable has been added. When .sh.level is changed
- inside a DEBUG trap, the .sh.lineno contains the calling line number
- for the specified stack frame.
-08-06-13 The .sh.level variable has been documented and now works.
-08-06-11 The -C option has been added to read for reading compound command
- definitions from a file.
-08-06-11 The . command is now permitted inside a compound command definition.
- The dot script can contain declaration commands and dot commands.
-08-06-09 Add -C option to typeset so that typeset -C foo, is equivalent
- to foo=().
-08-06-09 Added -n warning message for typeset option orderings that are valid
- with ksh88 but not valid with ksh93, for example Lx5.
-08-06-09 A bug in which the return value for an assignment command containing
- a command substitution with that failed was zero when the assignment
- contained redirections has been fixed.
-08-06-09 A bug in the quoting of $ inside a ERE pattern ~(E)(pattern)
- has been fixed.
-08-06-06 A bug when processing `` command substitution with the character
- sequence \$' has been fixed.
-08-06-02 When defining a type, the typeset -r attribute causes this field
- to be required to be specified for each instance of the type and
- does not allow a default value.
-08-06-02 Several bugs in which compound variables were modified by
- subshells have been fixed.
-08-05-22 The ceil function has been added to the math functions.
-08-05-21 A bug in which a name reference defined in a function and passed
- as an argument to another function could cause an incorrect binding.
-08-05-21 A bug in freeing compound variables that are local to functions
- has been fixed.
-08-05-19 The array expansions ${array[sub1..sub2]} and ${!array[sub1..sub2]}
- to expand to the value (or subscripts) for array between sub1 and
- sub2 inclusive. For associative arrays, the range is based on
- location in the POSIX locale. The .. must be explicit and cannot
- result from an expansion.
-08-05-15 The trap on SIGCLD is no longer triggered by the completion of
- the foreground job as with ksh88.
-08-05-14 A bug in the implementation of the editing feature added on
- 07-09-19 in emacs mode has been fixed.
-08-05-12 A bug in processing the test built-in with parenthesis has been
- fixed.
-08-05-12 The unset built-in now returns non-zero when deleting an array
- subscript that is not set.
-08-05-08 Changing the value of HISTFILE or HISTSIZE will cause the old
- history file to be close and reopened with the new name or size.
-08-05-08 When FPATH is changed functions that were found via a path search
- will be searched for again.
-08-05-08 A parser bug in which reserved words and labels were recognized
- inside compound indexed array assignment after a new-line has
- been fixed.
-08-05-07 A bug in getopts when handling numerical option arguments has
- been fixed.
-08-05-07 The typeset -S option was added for variables outside type
- definitions to provide a storage class similar to C static
- inside a function defined with function name. When outside
- type definitions and outside a function, the -S option cause
- the specified variable so be unset before the assignment and
- before the remaining attributes are supplied.
-08-05-07 A bug that affected the cursor movement in multiline mode when
- a character was deleted from near the beginning of the any
- line other than the first.
-08-05-01 In multiline edit mode, the refresh operation will now clear
- the remaining portion of the last line.
-08-05-01 A bug in computing prompt widths for the edit modes for prompts
- with multibyte characters has been fixed.
-08-05-01 A bug in the multiline edit mode which could cause the current
- line to be displayed incorrectly when moving backwards from third
- or higher line to the previous line has been fixed.
-08-05-01 A bug in which options set in functions declared with the function
- name syntax were carried across into functions invoked by these
- functions has been fixed.
-08-04-30 A bug which could cause a coprocess to hang when the read end
- is a builtin command has been fixed.
-08-04-30 The emacs and vi editors have been modified to handle window
- change commands as soon as they happen rather than waiting for
- the next command.
-08-04-28 A bug in which ${!x} did not expand to x when x was unset has been
- fixed.
-08-04-27 A bug in which the assignment x=(typeset -a foo=([0]=abc)) created
- x.foo as an associative array has been fixed.
-08-04-25 A bug in which $# did not report correctly when there were more
- than 32K positional parameters has been fixed.
-08-04-04 Choose the name _ as the sub-variable that holds type or instance
- specific data used by discipline functions.
-08-03-27 A bug in which the terminal group was not given back to the parent
- shell when the last part of a pipeline was handled by the parent shell
- and the other parts of the pipeline complete has been fixed.
- The symptom was that the pipeline became uninterruptable.
-08-03-25 A bug in restricted mode introduced in ksh93s that caused scripts
- that did not use #! to executed in restricted mode has been fixed.
-08-03-25 A bug in which the pipefail option did not work for a pipeline
- within a pipeline has been fixed.
-08-03-24 A bug in which OPTIND was not set correctly in subshells has
- been fixed.
-08-03-24 A bug which could cause a memory exception when a compound variable
- containing an indexed array with only element 0 defined was expanded.
-08-03-20 A bug in which ${!var[sub].*} was treated as an error has been fixed.
-08-03-20 Associative array assignments of the form ([name]=value ...)
- now allow ; as well as space tab and new line to separate elements.
-08-03-18 A buffering problem in which standard error was sometimes
- not flushed before sleep has been fixed.
-08-03-17 A bug in which a signal sent to $$ while in a subshell would be
- sent to the subshell rather than the parent has been fixed.
-08-03-17 --default option added to set(1) to handle set +o POSIX semantics.
- set --state added as a long name alias for set +o.
-08-03-14 A bug in which using monitor mode from within a script could
- cause the terminal group to change has been fixed.
-08-03-10 The new ${...} command substitution will treat the trailing }
- as a reserved word even if it is not at the beginning of a command,
- for example, ${ date }.
-08-03-10 If the name of the ENV begins with /./ or ././ then the
- /etc/ksh.kshrc file will not be executed on systems that support
- this interactive initialization file.
-08-03-07 A bug in which ksh -i did not run the ENV file has been fixed.
-08-03-07 A bug in which ulimit did not always produce the same output as
- ulimit -fS has been fixed.
-08-03-04 A bug in multiline mode in emacs and vi mode which could cause the
- cursor to be on the wrong line when interrupt was hit has been fixed.
-08-03-03 The change made in ksh93s+ on 07-06-18 in which braces became
- optional for ${a[i]} inside [[...]] was restored in the case
- where the argument can be a pattern.
-08-03-03 A bug in which creating a name reference to an associative array
- instance would fail when the subscript contained characters [ or
- ] has been fixed.
-08-02-29 The redirection operator >; has been added which for non-special
- files, generates the output in a temporary file and writes the
- specified file only of the command has completed successfully.
-08-02-15 A bug in ${var/pattern/string} for patterns of the form ?(*) and +(*)
- has bee fixed.
-08-02-07 A bug in which test \( ! -e \) produced an error has been fixed.
-08-02-14 The typeset -a option can now optionally be followed by the name
- of an enumeration type which allows subscripts to be enumerations.
-08-02-14 The enum builtin which creates enumeration types has been added.
-08-02-12 The backoff logic when there are no more processes has been fixed.
-08-02-07 The -X option has been added to typeset. The -X option creates
- a double precision number that gets displayed using the C99 %a
- format. It can be used along with -l for long double.
-08-01-31 The -T option to typeset has been added for creating typed
- variables. Also the -h and -S options have been added to
- typeset that are only applicable when defining a type.
-08-01-31 The prefix expansion operator @ has been added. ${@name}
- expands to the type of name or yields the attributes.
-07-11-15 A bug in the macro expander for multibyte characters in which
- part of the character contains a file pattern byte has been fixed.
-07-10-03 A bug in which : was not allowed as part of an alias name has been
- fixed.
-07-09-26 A bug in which appending a compound variable to a compound variable
- or to an index array didn't work has been fixed.
-07-09-19 In both emacs and vi edit mode, the escape sequence \E[A (usually
- cursor up, when the cursor is at the end of the line will fetch
- the most recent line starting with the current line.
-07-09-18 The value of ${!var} was correct when var was a reference to an
- array instance.
-07-09-18 The value of ${!var[sub]} was not expanding to var[sub] and this
- was fixed. It also fixed ${name} where name is a name reference
- to var[sub].
-07-09-18 It is now legal to create a name reference without an initialization.
- It will be bound to a variable on the first assignment.
-07-08-30 A discipline function can be invoked as ${x.foo} and is equivalent
- to ${ x.foo;} and can be invoked as x.foo inside ((...)).
-07-07-09 A bug in which typeset -a did not list indexed arrays has been
- fixed.
-07-07-03 The command substitution ${ command;} has been added. It behaves
- like $(command) except that command is executed in the current
- shell environment. The ${ must be followed by a blank or an
- operator.
-
-08-04-17 --- Release ksh93s+ ---
-08-04-17 A bug in which umask was not being restored correctly after a
- subshell has been fixed.
-08-04-15 A bug in which sending a STOP signal to a job control shell started
- from within a shell function caused cause the invoking shell to
- terminate has been fixed.
-08-04-11 A bug which caused $(exec > /dev/null) to go into an infinite loop
- has been fixed.
-08-03-27 A bug in which typeset -LZ was being treated as -RZ has been fixed.
-08-03-06 A bug with ksh -P on systems that support the the profile shell,
- in which it would exit after running a non-builtin has been fixed.
-08-01-31 A bug in which command substitution inside ((...)) could cause
- syntax errors or lead to core dumps has been fixed.
-08-01-17 A bug in which discipline functions could be deleted when invoked
- from a subshell has been fixed.
-08-01-17 A bug in which a command substitution consisting only of
- assignments was treated as a noop has been fixed.
-08-01-17 A bug in which discipline functions invoked from withing a
- compound assignment could fail has been fixed.
-08-01-16 Incomplete arithmetic assignments, for example ((x += )), now
- generate an error message.
-08-01-16 A bug in which a set discipline defined for a variable before
- an array assignment could cause a core dump has been fixed.
-08-01-03 A bug in on some systems in which exit status 0 is incorrectly
- returned by a process that catches the SIGCONT signal is stopped
- and then continued.
-07-12-13 A race condition in which a program that has been stopped and then
- continued could loose the exit status has been fixed.
-07-12-12 Code to check for file system out of space write errors for all
- writes has been added.
-07-12-11 A bug in the macro expander for multibyte characters in which
- part of the character contains a file pattern byte has been fixed.
-07-12-06 A bug in the emacs edit mode when multiline was set that output
- a backspace before the newline to the screen has been fixed.
-07-12-04 A bug in which using <n>TAB after a variable name listing expansion
- in the edit modes would cause the $ to disappear has been fixed.
-07-11-28 A bug in which setting IFS to readonly could cause a subsequent
- command substitution to fail has been fixed.
-07-11-27 A work around for a gcc 4.* C99 "feature" that could cause a job
- control shell to go into an infinite loop by adding the volatile
- attribute to some auto vars in functions that call setjmp().
-07-11-27 A bug in which the shell could read ahead on a pipe causing the
- standard input to be incorrectly positioned has been fixed.
-07-11-27 A bug in which compound variable UTF-8 multibyte values were not
- expanded or traced properly has been fixed.
-07-11-21 A bug where an unbalanced '[' in a command argument was not treated
- properly has been fixed.
-07-11-15 A bug in which compatibility mode (no long option names) getopts(1)
- incorrectly set the value of OPTARG for flag options has been fixed.
-07-11-15 A bug in which "hash -- name" treated "--" as an invalid name operand
- has been fixed.
-07-11-15 typeset now handles "-t -- [-r] [--]" for s5r4 hash(1) compatibility.
-07-11-15 A bug in which the umask builtin mis-handled symbolic mode operands
- has been fixed.
-07-11-15 Bugs in which shell arithmetic and the printf builtin mis-handled the
- signs of { -NaN -Inf -0.0 } have been fixed.
-07-11-15 The full { SIGRTMIN SIGRTMIN+1 ... SIGRTMAX-1 SIGRTMAX } range
- of signals, determined at runtime, are now supported.
-07-11-15 A bug in which creating an index array with only subscript 0 created
- only a simple variable has been fixed.
-07-11-14 A bug in which appending to an indexed array using the form
- name+=([sub]=value) could cause the array to become an associative
- array has been fixed.
-07-11-14 A bug in which typeset without arguments could coredump if a
- variable is declared as in indexed array and has no elements has
- been fixed.
-07-11-14 A bug in which creating a local SECONDS variable with typeset in
- a function could corrupt memory has been fixed.
-07-11-14 A bug which could cause a core dump when a script invoked by name
- from a function used compound variables has been fixed.
-07-11-05 A bug in which printf %d "'AB" did not diagnose unconverted characters
- has been fixed.
-07-11-05 printf %g "'A" support added for all floating point formats.
-07-11-01 A bug in which typeset -f fun did not display the function definition
- when invoked in a subshell has been fixed.
-07-10-29 The sleep builtin was fixed so that all floating point constants
- are valid operands.
-07-10-10 A bug in which the locale was not being restored after
- LANG=value command has been fixed.
-07-09-20 A bug in which a nameref to a compound variable that was local
- to the calling function would not expand correctly when displaying
- is value has been fixed.
-07-09-19 A bug which cause cause a core dump if .sh.edchar returned
- 80 characters or more from a keyboard trap has been fixed.
-07-09-14 A bug in which could cause a core dump when more than 8 file
- descriptors were in use has been fixed.
-07-09-10 A bug in which creating a name reference to an instance of
- an array when the array name is itself a reference has been fixed.
-07-09-10 The file completion code has been modified so that after an = in
- any word, each : will be considered a path delimiter.
-07-09-06 A bug in which subprocess cleanup could corrupt the malloc() heap
- has been fixed.
-07-08-26 A bug in which a name reference to an associative array instance
- could cause the subscript to be evaluated as an arithmetic expression
- has been fixed.
-07-08-22 A bug in which the value of an array instance was of a compound
- variable was not expanded correctly has been fixed.
-07-08-14 A bug which could cause a core dump when a compound assignment was
- made to a compound variable element with a typeset -a attribute
- has been fixed.
-07-08-08 A bug in which a trap ignored in a subshell caused it to be
- ignored by the parent has been fixed.
-07-08-07 A bug in which the set command would generated erroneous output
- for a variable with the -RZ attribute if the variable name had been
- passed to a function has been fixed.
-07-08-02 A bug in which read x[1] could core dump has been fixed.
-07-08-02 A second bug in which after read x[sub] into an associative array
- of an element that hasn't been assigned could lead to a core dump
- has been fixed.
-07-07-31 A bug in which a pipeline that completed correctly could have
- an exit status of 127 when pipefail was enabled has been fixed.
-07-07-09 The SHOPT_AUDIT compile option has been added for keyboard logging.
-07-06-25 In vi insert mode, ksh no longer emits a backspace character
- before the carriage return when the newline is entered.
-07-06-25 A bug in which pipefail would cause a command to return 0
- when the pipeline was the last command and the failure happened
- on a component other than the last has been fixed.
-07-06-25 A bug in the expansion of ${var/pattern/rep} when pattern or rep
- contained a left parenthesis in single quotes has been fixed.
-07-06-18 The braces for a subscripted variable with ${var[sub]} are now
- optional when inside [[...]], ((...)) or as a subscript.
-07-05-28 A bug in brace expansion in which single and double quotes did
- not treat the comma as a literal character has been fixed.
-07-05-24 The -p option of whence now disables -v.
-07-05-23 Several bug fixes in compound variables and arrays of arrays
- have been made.
-07-05-15 A bug in which the %B format of printf was affected by the
- locale has been fixed.
-07-05-14 A bug in which \ was not removed in the replacement pattern with
- ${var/pattern/rep} when it was not followed by \ or a digit has
- been fixed.
-07-05-10 A bug in which ksh -R file core dumped if no script was specified
- has been fixed. it not displays an error message.
-07-05-07 Added additional Solaris signals to signal table.
-07-04-30 A bug in which a pipeline with command substitution inside a
- function could cause a pipeline that invokes this function to
- hang when the pipefail option is on has been fixed.
-07-04-30 Added -q to whence.
-07-04-18 A small memory leak with each redirection of a non-builtin has
- been fixed.
-07-03-08 A bug in which set +o output command line options has been fixed.
-07-03-08 A bug in which an error in read (for example, an invalid variable
- name), could leave the terminal in raw mode has been fixed.
-07-03-06 A bug in which read could core dump when specified with an array
- variable with a subscript that is an arithmetic expression has
- been fixed.
-07-03-06 Several serious bugs with the restricted shell were reported and
- fixed.
-07-03-02 If a job is stopped, and subsequently restarted with a CONT
- signal and exits normally, ksh93 was incorrectly exiting with
- the exit status of the stop signal number.
-07-02-26 M-^L added to emacs mode to clear the screen.
-07-02-26 A bug in which setting a variable readonly in a subshell would
- cause an unset error when the subshell completed has been fixed.
-07-02-19 The format with printf uses the new = flag to center the output.
-07-02-19 A bug in which ksh93 did not allow multibyte characters in
- identifier names has been fixed.
-07-02-19 A bug introduced in ksh93 that causes global compound variable
- definitions inside functions to exit with "no parent" has been fixed.
-07-02-19 A bug in which using compound commands in process redirection
- arguments would give syntax errors <(...) and >(...) has been fixed.
-07-01-29 A bug which caused the shell to core dump which can occur when a
- built-in exits without closing files that it opens has been fixed.
-07-01-26 A bug in which ~(E) in patterns containing \ that are not inside ()
- has been fixed.
-
-06-12-29 --- Release ksh93s ---
-06-12-29 A bug in which the value of IFS could be changed after a command
- substitution has been fixed.
-06-12-22 /dev/(tcp|udp|sctp)/HOST/SEVRICE now handles IPv6 addresses on
- systems that provide getaddrinfo(3).
-06-12-19 A -v option was added to read. With this option the value of
- the first variable name argument will become the default value
- when read from a terminal device.
-06-11-20 A bug in which "${foo[@]:1}}" expands a null argument (instead of
- no argument), when foo[0] is not empty has been fixed.
-06-11-16 The discipline functions have been modified to allow each subscript
- to act independently. Currently the discipline function will not
- be called when called from a discipline function of the same variable.
-06-11-14 A bug which could cause a core dump if a file descriptor for
- an internal file was closed from with a subshell has been fixed.
-06-10-30 The redirections <# pattern, and <## pattern have been added.
- Both seek forward to the beginning of the next line that contains
- the pattern. The <## form copies the skipped portion to standard
- output.
-06-10-26 On systems that support stream control transport, the virtual file
- name /dev/sctp/host/port can now be used to establish connections.
-06-10-26 The printf modifier # when used with d produces units in thousands
- with a single letter suffix added. The modifier # when used with
- the i specification provides units of 1024 with a two letter suffix.
-06-10-24 The value of $! is now set to the process id of a job put
- into the background with the bg command as required by POSIX.
-06-10-23 A bug in which the value of $! was affected by a background
- job started from a subshell has been fixed.
-06-10-23 A bug in ${var:offset:len} in multibyte locales has been fixed.
-06-10-15 The remaining math functions from C99 were added for any system
- that supports them.
-06-10-13 The klockwork.com software detected a few coding errors that
- have been fixed.
-06-10-12 A bug when skipping over `...` with ${x:=`...`} when x is set
- has been fixed.
-06-10-11 A bug in process floating constants produced by the %a format
- of printf has been fixed.
-06-10-06 A bug in which IFS was not being restored correctly in some
- cases after a subshell has been fixed.
-06-10-06 A bug in which pipefail was not detecting some failures in
- pipelines with 3 or more states has been fixed.
-06-10-03 A bug in the processing of >(...) with builtins which could
- cause the builtin to hang has been fixed.
-06-10-03 A bug in the for loop optimizer which causes >(...) process
- substitution to be ignored has been fixed.
-06-09-17 The -a option was added to typeset for indexed arrays. This
- is only needed when using the ([subscript]=value ...) form.
-06-09-06 The showme option was added. Each simple command not beginning
- with a redirection and not occurring with in the while, until, if,
- select condition can be preceded by a semi-colon which will
- be ignored when showme is off. When showme is on, any command
- preceded by a colon will be traced but not executed.
-06-08-16 As a new feature, a leading ~(N) on a pattern has no effect
- except when used for file expansion. In this case if not
- matches are found, the pattern is replaced by nothing rather
- than itself.
-06-08-11 A bug in the expansion of ${.sh.match[i]:${#.shmatch[i]}} has
- been fixed.
-06-08-10 The read builtin options -n and -N have been modified to treat
- the size as characters rather than bytes unless storing into a
- binary (typeset -B) variable.
-06-07-27 When the here document operator << is followed directly by a #
- rather than a -, the first line of the here-document determines
- how much whitespace is removed for each line.
-06-07-26 A bug in the C-shell history (enabled with set -H) in which the
- history event !$ was not processed has been fixed.
-06-07-21 A bug on some systems in which assigning PATH on a command line
- would not take effect has been fixed.
-06-07-20 Add ksh93 and rksh93 as allowable names for ksh binaries.
-06-07-20 Removed the SHOPT_OO compilation option which was only partially
- implemented.
-06-07-20 The ability to use egrep, grep, and fgrep expressions within
- shell patterns has been documented.
-06-07-17 A bug with arithmetic command expressions for locales in which
- the comma is a thousands separator has been fixed.
-06-07-13 The default HISTSIZE was increased from 128 to 512.
-06-07-13 A multibyte problem with locales that use shift codes has been fixed.
-06-06-23 A number of bug fixes for command, file, and variable completion
- have been mode.
-06-06-20 Floating point division by zero now yields the constant Inf or -Inf
- and floating functions with invalid arguments yield NaN.
-06-06-20 The floating point constants Inf and NaN can be used in arithmetic
- expressions.
-06-06-20 The functions isinf(), isnan(), tanhl() have been added for
- arithmetic expressions.
-06-06-13 Internal change to use ordering for variables instead of hashing
- to speed up prefix matching.
-06-06-13 A window between fork/exec in which a signal could get lost
- and cause a program to hang has been eliminated
-06-06-13 A bug in edit completion with quoted strings has been fixed.
-06-06-07 The restricted options can now be enabled by set as well as on
- the command line. Once set, it can not be disabled.
-06-06-04 Modified built-in binding so that for systems for which /bin
- and /usr/bin are the same, a builtin bound to /bin will get
- selected when either /bin or /usr/bin is scanned.
-06-06-04 Added literal-next character processing for emacs/gmacs mode.
- This change is not compatible with earlier versions of ksh93
- and ksh88 when the stty lnext is control-v. The sequence
- escape-control-v will display the shell version.
-06-05-31 Modified emacs and vi mode so that entering a TAB after a partial
- TAB completion, generates a listing of possible completions.
- After the second TAB, a number followed by a TAB will perform
- the completion with the corresponding item.
-06-05-19 Modified arithmetic so that conversions to strings default to
- the maximum number of precision digits.
-06-05-16 Bug fixes for multibyte locales.
-06-05-10 The =~ operator was added to [[...]] and [[ string ~= ERE ]]
- is equivalent to [[ string == ~(E)ERE ]].
-06-04-25 A bug in the vi edit mode which could cause the shell to core dump
- when switching from emacs mode.
-06-04-17 A bug in which using LANG or LC_ in assignment lists with builtins
- did not restore the localed correctly has been fixed.
-06-04-04 A bug in which discipline functions could not be added to variables
- whose names started with .sh has been fixed.
-06-03-28 The -s option to typeset was added to modify -i to indicate short
- integers.
-06-03-28 A bug in which variables assignment lists before functions
- defined with function name were not passed on the functions
- invoked by this function has been fixed.
-06-03-28 A bug in which name references defined within a function defined
- with function name could not be used with compound variables has
- been fixed.
-06-03-27 A bug in which read <&p (print >&p) would cause the coprocess input
- (output) pipe to close before reading from (after writing to)
- it has been fixed.
-06-02-28 A bug in which stopping a job created with the hist builtin command
- would create a job that could not be restarted has been fixed.
-
-06-01-24 --- Release ksh93r ---
-06-01-24 A bug in which running commands with standard output closed would
- not work as expected has been fixed.
-06-01-23 A bug in which print -u<n> could fail when file descriptor <n> was
- open for writing has been fixed.
-06-01-19 The ?: arithmetic operator fixed to work when the operation after
- the colon was an assignment.
-05-12-24 A bug which could lead to a core dump when elements of a compound
- variable were array elements, i.e. foo=(bar=(1 2)), has been fixed.
-05-12-13 An arithmetic bug in which x+=y+=z was not working has been fixed.
-05-12-13 An arithmetic bug in which x||y was returning x when x was non-zero
- rather than 1 has been fixed.
-05-12-07 The aliases for integer and float have been changed to use attributes
- -li and -lE to handle long long and long double types.
-05-12-07 The histexpand (-H) option has been added which allows C-shell
- style history expansions using the history character !.
-05-12-07 The multiline option was added which changes that way the edit
- modes handle lines longer than the window width. Instead of
- horizontal scrolling, multiple lines on the screen are used.
-05-12-05 The whence builtin now returns an absolute pathname when the
- command is found in the current directory.
-05-11-29 A bug which caused ksh -c '[[ ! ((' to core dump rather than
- report a syntax error has been fixed.
-05-11-29 A bug when reading fixed length records into typeset -b variables
- which caused a zero byte to terminate the value has been fixed.
-05-11-22 The ability to seek to an offset within a file has been added
- with the new I/O redirection operators, <# and >#. Currently,
- these redirection operators must be followed by ((expr))
- but in a future release, it should be able to used to seek forward
- to the specified shell pattern. In addition $(n<#) expands to the
- current byte offset for file descriptor n.
-05-11-22 The .sh.match array variable is now set after each [[ ... ]]
- pattern match. Previously it was only set for substring matches.
-05-10-17 A bug in which the library path variable could be prefixed
- with a directory when a .path file was not encountered in
- the directory of the executable has been fixed.
-05-09-15 A for/while loop optimizer bug in which $OPTIND was not
- correctly expanded has been fixed.
-05-09-05 A bug in which a history command that invoked a history
- command could go into an infinite loop has been fixed.
-05-08-31 In the case that IFS contains to adjacent new-lines so that
- new-line is not treated as a space delimiter, only a single
- new-line is deleted at the end of a command substitution.
-05-08-19 When a tilde substitution expands to the / directory and is
- followed by a /, it is replaced by the empty string.
-05-08-16 A bug in which n<&m did not synchronize m has been fixed.
-05-08-16 A bug in which process substitution ( <() and >() ) was not
- working within for and while loops has been fixed.
-05-07-24 A bug in which the pattern ~(E)(foo|bar) was treated as a syntax
- error has been fixed.
-05-07-24 A bug in completion with <n>=, where n was the one of the
- previous selection choices has been fixed.
-05-07-21 A bug with multibyte input when no edit mode was specified which
- caused the input line to shift left/right has been fixed.
-05-06-24 A race condition which could cause the exit status to get lost
- on some fast systems has been fixed.
-05-06-21 A bug in which nested patterns of the form {m,n}(pat) would cause
- syntax errors has been fixed.
-05-06-21 A bug in the macro expander has been fixed which could cause a
- syntax error for an expansion of the form ${x-$(...)} when
- x is set and the command substitution contained certain strings.
-05-06-08 On systems for which echo does not do System V style \ expansions,
- the -e option was added to enable these expansion.
-05-06-08 A bug in which ${var op pattern} to not work when inside an
- arithmetic expression has been fixed.
-05-05-23 An extension to shell patterns that allows matching of nested
- groups while skipping over quoted strings has been added.
-05-05-18 A bug in which the line number for errors was not correct for
- functions loaded from FPATH has been fixed.
-05-04-18 A bug in which the exit status $? is not set when a trap triggered
- by the [[...]] command is executed has been fixed.
-05-04-08 Redirection operators can be directly preceded with {varname}
- with no intervening space, where varname is a variable name which
- allows the shell to select a file descriptor > 10 and store it
- into varname.
-05-04-08 SHOPT_CMDLIB_BLTIN=1 now includes <cmdlist.h> generated table.
-05-04-07 [[ -o ?option ]] is true if "option" is a supported option.
-05-04-05 A bug in handling file completion with spaces in the names
- has been fixed.
-05-03-25 The SIGWINCH signal is caught by default to keeps the LINES and
- COLUMNS variables in sync with the actual window size.
-05-03-25 Building ksh with SHOPT_REMOTE=1 causes ksh to set --rc if stdin is
- a socket (presumably part of a remote shell invocation.)
-05-03-25 Building ksh with SHOPT_SYSRC=1 causes interactive ksh to source
- /etc/ksh.kshrc (if it exists) before sourcing the $ENV file.
-05-03-25 {first..last[..incr][%fmt]} sequences added to brace expansions
- when braceexpand is enabled.
-05-03-03 A bug where a SIGCHLD interrupt could cause a fifo open to fail has
- been fixed.
-05-02-25 A bug in which a builtin command run in the background could
- keep a file descriptor open which could cause a foreground
- process to hang has been fixed.
-05-02-24 A bug where builtin library commands (e.g., date and TZ) failed to
- detect environment variable changes has been fixed.
-05-02-22 The read builtin and word splitting are now consistent with respect
- to IFS -- both treat IFS as field delimiters.
-05-02-22 The read builtin no longer strips off trailing delimiters that
- are not space characters when there are fewer variables than fields.
-05-02-17 A builtin bug on systems where dlsym(libcmd) returns link-time
- bindings has been fixed.
-05-02-12 A bug in which the lib_init() function for .paths BUILTIN_LIB
- libraries was not called has been fixed.
-05-02-06 A bug on some systems in which moving the write end of a co-process
- to a numbered file descriptor could cause it to close has been fixed.
-05-02-06 A bug in the vi-edit mode in which the character under the cursor
- was not deleted in some cases with the d% directive has been fixed.
-05-02-06 A bug where external builtin stdout/stderr redirection corrupted
- stdout has been fixed.
-05-02-04 A bug where times formatting assumed CLK_TCK==60 has been fixed.
-
-05-01-11 --- Release ksh93q ---
-05-01-11 A bug in the integral divide by zero check has been fixed.
-05-01-11 The -l option has been added to read /etc/profile and
- $HOME/.profile, if they exist, before the first command.
-05-01-11 An argument parsing bug that caused `kill -s x -- n' to fail has
- been fixed.
-05-01-11 The .paths file, introduced in ksh93m, which can appear in
- any directory in PATH, now allows a line of the form 'BUILTIN_LIB=.'
- When a command is searched for this directory, and the full path
- matches the path of the built-in version of the command (listed
- by the 'builtin' command) then the built-in version of the command
- is used. When ksh is built with SHOPT_CMDLIB_DIR=1 then all libcmd
- functions become builtins with the '/opt/ast/bin/' directory prefix.
-05-01-10 A bug in which a nameref to a compound name caused a core dump has
- been fixed.
-05-01-09 A bug in which some SIGCHLD interrupts (from child processes exiting)
- caused a fatal print/echo error diagnostic has been fixed.
-04-12-24 A bug in which some SIGCHLD interrupts (from child processes exiting)
- corrupted the internal process/job list, sometimes causing the shell
- to hang, has been fixed.
-04-12-01 A bug in which typeset -Fn truncated less than n digits for large
- numbers has been fixed.
-04-11-25 A bug in which standard error could be closed after a redirection
- to /dev/stderr has been fixed.
-04-11-17 A bug in which an expansion of the form ${array[@]:3} could expand
- to ${array[0]} when ${array[3]} was not set has been fixed.
-04-10-22 The -E or -orc command line option reads ${ENV-$HOME/.kshrc} file.
-04-10-22 `-o foo' equivalent to `+o nofoo', `-o nobar' equivalent to `+o bar'.
- `--foo' equivalent to `-o foo', `--nofoo' equivalent to `+o foo'
-04-10-05 The .paths file, introduced in ksh93m, which can appear in
- any directory in PATH, now allows a line of the form
- 'BUILTIN_LIB=libname'. When a command is searched for this directory,
- the shared library named by libname will first be searched for a
- built-in version of the command.
-04-09-03 <<< here documents now handle quotes in the word token correctly.
-04-08-08 The maximum size for read -n and and read -N was increased from
- 4095 to 32M.
-04-08-04 printf %q was modified so that if an no operand was supplied, no
- no output would be generated rather than a quoted empty string.
-04-08-01 The -n and -N options of the read builtin has been modified
- when reading variables with the binary attribute so that the
- data is stored directly rather than through assignment.
-04-08-01 The shcomp command has been modified to process alias commands
- under some conditions.
-04-07-31 The .sh.match variable added in ksh93l, now works like other
- indexed arrays.
-04-07-08 A loop optimizer bug which occurs when typeset is used in
- a for or while loop inside a function has been fixed.
-04-06-24 The number of subexpressions in a pattern was increased to 64
- from the current number of 20.
-04-06-17 The -t option to read was modified to allow seconds to be
- specified as any arithmetic expression rather than just
- an integral number of seconds, for example even -t 'sin(.5)'
- is now valid.
-04-06-16 Two small memory leak problems were fixed.
-04-06-15 A bug in ${var/pattern/"string"} which occurred when string
- contained pattern matching characters has been fixed.
-04-05-08 printf $'%d\n' produced an erroneous error message and has
- been fixed.
-04-05-24 A bug in which an associative array without any elements could
- cause a core dump when a script with an associative array with
- the same name was declared in a script invoked by name has
- been fixed.
-04-05-11 A bug in which an exec statement could close the script that
- is being processed in a script that is run by name causing
- a failure has been fixed.
-04-04-28 If the first character of assignment to an integer variable was 0,
- the variable had been treated as unsigned. This behavior was
- undocumented and has been removed.
-04-04-05 A bug in which the positioning of standard input could be incorrect
- after reading from standard input from a subshell has been fixed.
-04-03-30 A bug in the for loop optimizer which in rare cases could cause
- memory corruption has been fixed.
-04-03-29 The preset alias source='command .' has been added.
-04-03-29 A bug introduced in ksh93p on some systems in which invoked by
- name with #! on the first line would not get the signals handling
- initialized correctly has been fixed.
-04-03-29 A bug introduced in ksh93p in which a HUP signal received by
- a shell that is a session group leader was not passed down to
- its children has been fixed.
-
-04-02-28 --- Release ksh93p ---
-04-02-28 The ability to apply an append discipline to any variable has
- been added.
-04-02-14 A bug in which the exportall option (set -a) would cause incorrect
- results for arrays has been fixed.
-04-02-02 A bug in which an exported array would pass more than
- the first element to a script invoked by name has been fixed.
-04-02-02 A bug on some systems in which name=value pairs preceding a script
- invoked by name was not getting passed to the script has been fixed.
-04-01-20 A bug in which an unset discipline function could cause a core
- dump on some systems has been fixed.
-04-01-12 A bug in which a continue or break called outside a loop from
- inside a function defined with name() syntax could affect
- the invoking function has been fixed.
-04-01-08 If a command name begins with ~, only filename completion will be
- attempted rather than pathname completion using the builtin editors.
-04-01-08 A bug in the vi edit mode in which the wrong repeat count on
- multiple word replacements with the . directive has been fixed.
-04-01-06 Backspace characters are now handled correctly in prompt strings.
-04-01-06 The getopts builtin has been modified to accept numerical
- arguments of size long long on systems that support this.
-04-01-06 A bug in which unsetting all elements of an associative array
- would cause it to be treated as an indexed array has been fixed.
-03-12-15 A bug in which a quoted string ending with an unescaped $ would
- delete the ending $ in certain cases has been fixed.
-03-12-05 A bug in which the shell could hang when set -x tracing a command
- when an invalid multibyte character is encountered has been fixed.
-03-12-05 On some systems, if the KEYBD trap is set, then commands that use
- the meta key were not processed until return was hit. This
- has been fixed.
-03-12-05 A problem which occurred when the login shell was not a group
- leader that could cause it to fail has been fixed.
-03-12-05 A problem in which a shell could core dump after receiving a signal
- that should cause it to terminate while it was in the process
- of acquiring more space has been fixed.
-03-12-05 If ENV is not specified, the shell will default to $HOME/.kshrc
- for interactive shells.
-03-11-21 A bug introduced in ksh93o in which the DEBUG trap could get
- disabled after it triggered has been fixed.
-03-11-04 A bug in which using arithmetic prefix operators ++ or -- on a
- non-lvalue could cause a core dump has been fixed.
-03-11-04 A bug in which leading zeros were stripped from variable
- expansions within arithmetic computation to avoid being treated
- as octal constants when they should not have, has been fixed.
-03-10-08 A bug introduced in ksh93o in which a large here document inside
- a function definition could get corrupted has been fixed.
-03-09-22 A bug in which the .get discipline function was not being
- called when a string variable was implicitly referenced from
- within a numerical expression has been fixed.
-03-09-22 A bug in which a script without a leading #! could get executed
- by /bin/sh rather than the current shell on some systems has
- been fixed.
-03-09-12 To improve conformance with ksh88, leading zeros will be ignored
- when getting the numerical value of a string variable so that
- they will not be treated as octal constants.
-03-09-03 The builtin kill command now processes obsolete invocations
- such as kill -1 -pid.
-03-09-02 The restriction on modifying FPATH in a restricted shell (sh -r)
- has been documented.
-03-09-02 The restricted shell (sh -r) has been modified to disallow
- executing command -p.
-03-08-07 A bug in which the KEYBD trap was not being invoked when
- characters with the 8th bit set has been fixed.
-03-08-02 A parser bug introduced in ksh93o which caused the character
- after () in a Posix function definition to be skipped
- when reading from standard input has been fixed.
-03-08-01 A bug in which "${foo#pattern}(x)" treated (x) as if it were
- part of the pattern has been fixed.
-03-08-01 The command -x option has been modified so that any trailing
- arguments that do expand to a single word will be included
- on each invocation, so that commands like command -x mv * dir
- work as expected.
-
-03-07-20 --- Release ksh93o+ ---
-03-07-20 A bug in which could cause memory corruption when a posix
- function invoked another one has been fixed.
-03-07-15 A bug in which a file descriptor>2 could be closed before
- executing a script has been fixed.
-03-07-15 A parsing error for <() and >() process substitutions inside
- command substitution has been fixed.
-03-07-15 A parsing error for patterns of the form {...}(...) when
- used inside ${...} has been fixed.
-03-07-15 An error in which expanding an indexed array inside a compound
- variable could cause a core dump has been fixed.
-03-07-15 A bug in which on rare occasions a job completion interrupt
- could cause to core dump has been fixed.
-03-06-26 A bug in which process substitution embedded within command
- substitution would generate a syntax error has been fixed.
-03-96-23 A bug in which ${@:offset:len} could core dump when there
- were no arguments has been fixed.
-03-96-23 A bug in which ${X[@]:offset:len} could core dump when X
- was unset has been fixed.
-03-06-22 The -x option was added to the command builtin. If this
- option is on, and the number of arguments would exceed ARG_MAX,
- the command will be invoked multiple times with a subset of
- the arguments. For example, with alias grep='command -x grep,
- any number of arguments can be specified.
-03-06-14 A bug in which could cause a core dump on some systems with
- vi and emacs editors with the MULTIBYTE option has been fixed.
-03-06-06 A bug in which the shell could core dump when a script was
- run from its directory, and the script name a symlink to a file
- beginning with .., has been fixed.
-03-06-05 A bug in which the shell could core dump when a child process
- that it is unaware of terminates while it is calling malloc()
- has been fixed.
-03-06-02 An option named globstar (set -G) has been added. When enabled,
- during pathname expansion, any component that consists only of ** is
- matches all files and any number of directory levels.
-03-05-30 A bug in which the PATH search could give incorrect results when
- run from directory foo and PATH contained .:foo:xxx has been fixed.
-03-05-29 Some changes were made to the code that displays the prompt in edit
- mode to better handle escape sequences in the prompt.
-03-05-27 I added = to the list of characters that mark the beginning of
- a word for edit completion so that filenames in assignments
- can be completed.
-03-05-20 A bug in which read -N could hang on some systems when reading
- from a terminal or a pipe has been fixed.
-03-05-19 A bug in which the output of uname from a command substitution
- would go to the standard output of the invoking command when
- uname was invoked with a non-standard option has been fixed.
-03-05-19 A job control bug which would cause the shell to exit because
- it hadn't take back the terminal has been fixed. The bug
- could occur when running a function that contained a pipeline
- whose last element was a function.
-03-05-19 A job control timing bug introduced in ksh93o on some systems
- which could cause a pipeline to hang if the first component
- completed quickly has been fixed.
-03-05-13 The read builtin has been modified so that the builtin editors
- will not overwrite output from a previous incomplete line.
-03-05-13 A bug in which the name of an identifier could have the string
- .sh. prefixed to it after expanding a variable whose name begins
- with .sh. has been fixed.
-03-05-13 A bug in the expansion of $var for compound variables in which
- some elements would not be output when the name was a prefix
- of another name in the compound variable has been fixed.
-03-05-08 The last item in the ksh93o release on 03-01-02 has been
- altered slightly to preserve the leading 0's when the
- preceding character is a digit. Thus, with typeset -LZ3 x=10,
- $(( 1$x)) will be 1010 whereas $(( $x) will be 10.
-03-04-25 A bug in which if x is a name reference, then nameref y=x.foo
- did not follow x has been fixed.
-
-03-03-18 --- Release ksh93o ---
-03-03-18 A -N unary operator was added to test and [[...]] which returns
- true if the file exists and the file has been modified since it
- was last read.
-03-03-18 The TIMEFORMAT variable was added to control the format for
- the time compound command. The formatting description is
- described in the man page.
-03-03-06 A -N n option was added to read which causes exactly n bytes
- to be read unlike -n n which causes at most n bytes to be read.
-03-03-03 Three new shell variables were added. The variable .sh.file
- stores the full pathname of the file that the current command
- was found in. The variable .sh.fun names the current function
- that is running. The variable .sh.subshell contains the depth
- of the current subshell or command substitution.
-03-03-03 When the DEBUG trap is executed, the current command line after
- expansions is placed in the variable .sh.command. The trap
- is also now triggered before each iteration of a for, select,
- and case command and before each assignment and redirection.
-03-02-28 Function definitions are no longer stored in the history file so
- that set -o nolog no longer has any meaning.
-03-02-28 All function definitions can be displayed with typeset -f not
- just those stored in the history file. In addition, typeset +f
- displays the function name followed by a comment containing the
- line number and the path name for the file that defined this function.
-03-02-28 A bug in which the value of $LINENO was not correct when executing
- command contained inside mult-line command substitutions has been
- fixed.
-03-02-19 Since some existing ksh88 scripts use the undocumented and
- unintended ability to insert a : in front of the % and # parameter
- expansion operators, ksh93 was modified to accept :% as equivalent
- to % and :# as equivalent to # with ${name op word}.
-03-02-14 A bug which could cause a core dump when reading from standard
- error when standard error was a pty has been fixed.
-03-02-14 The shell arithmetic was modified to use long double on systems
- that provide this data type.
-03-02-09 A bug in which a function located in the first directory in FPATH
- would not be found when the last component of PATH was . and the
- current directory was one of the directories in PATH has been fixed.
-03-02-07 The trap and kill builtin commands now accept a leading SIG prefix
- on the signal names as documented.
-03-02-05 A bug in the expansion of ${var/$pattern}, when pattern contained
- \[ has been fixed.
-03-02-05 A bug in which .sh.match[n], n>0, was not being set for substring
- matches with % and %% has been fixed.
-03-01-15 A bug in which getopts did not work for numerical arguments specified
- as n#var in the getopts string has been fixed.
-03-01-09 A bug in which using ${.sh.match} multiple times could lead to
- a memory exception has been fixed.
-03-01-06 A bug in the expansion of ${var/pattern/$string} in the case that
- $string contains \digit has been fixed.
-03-01-02 A -P option was added for systems such as Solaris 8 that support
- profile shell.
-03-01-02 For backward compatibility with ksh88, arithmetic expansion
- with ((...)) and let has been modified so that if x is a zero-filled
- variable, $x will not be treated as an octal constant.
-
-02-12-05 --- Release ksh93n+ ---
-02-11-30 A bug that can show up in evaluating arithmetic statements that
- are in an autoloaded function when the function is autoload from
- another function has been fixed.
-02-11-30 An optimization bug in which an expansion of the form ${!name.@},
- which occurred inside a for or a while loop, when name is a name
- reference, has been fixed.
-02-11-18 A bug in which modifying array variables in a subshell could leave
- side effects in the parent shell environment has been fixed.
-02-11-18 A memory leak when unsetting an associative array has been fixed.
-02-11-14 The code to display compound objects was rewritten to make
- it easier for runtime extensions to reuse this code.
-02-11-14 A change was made to allow runtime builtins to be notified when
- a signal is received so that cleanup can be performed.
-02-10-31 User applications can now trap the ALRM signal. Previously,
- the ALRM signal was used internally and could not be used
- by applications.
-02-10-31 A bug in which signals received while reading from a coprocess
- for which traps were set was not handled correctly has been fixed.
-02-10-31 A bug in which a file opened with exec inside a subshell could
- be closed before the subshell completed has been fixed.
-02-10-21 A bug in which setting PATH or FPATH inside a function might not
- take effect has been fixed.
-02-10-21 A bug which could cause a core dump when a local SECONDS variable
- is defined in a function has been fixed.
-02-10-15 A bug in which the associate array name operator ${!array[@]}
- could return the same name multiple times has been fixed.
-02-10-15 A bug in which the zero'th element of an associative array was
- not getting set when an assignment was made without a subscript
- specified has been fixed.
-
-02-09-30 --- Release ksh93n ---
-02-09-30 The maximum indexed array size was increased to 16Megs.
-02-09-30 A bug which could cause a core dump when changing attributes
- of associative array has been fixed.
-02-09-30 A bug in which exporting an array variable would not export the
- 0-th element has been fixed.
-02-09-30 A bug in which an array assignment of the form a=($a ...) would unset
- 'a' before the right hand side was evaluated has been fixed.
-02-09-27 A bug in which the error message for ${var?message} when var was
- null or unset did not contain the variable name var has been fixed.
-02-09-27 A bug in which closing file descriptors 0 through 2 could
- cause a subsequent here document to fail has been fixed.
-02-09-14 A bug in whence which occurs when the specified name contained
- a / has been fixed.
-02-09-14 A bug in the parser for strings of the form name$((expr))=value
- has been fixed.
-02-09-14 A for loop optimization bug in which the number of elements in
- an array was treated as an invariant has been fixed.
-02-09-09 A bug in which redirection or closing of a file descriptor between
- 3 and 9 could cause a subsequent here document to fail has been
- fixed.
-02-09-09 A bug in which a background job was not removed from the job list
- when a subshell completed has been fixed, for example (prog&).
-02-09-03 A bug in which an assignment of the form name=(integer x=3)
- could be interpretted as an array assignment rather than a
- compound variable assignment has been fixed.
-02-08-19 A command completion bug which occurred on file systems that
- are case insensitive has been fixed.
-02-08-19 A bug which could lead to an exception on some systems (for
- example FREEBSD) which occurred when setting PATH has been fixed.
-02-08-11 A bug in arithmetic rounding in which a value input as a decimal
- string would output as a rounded version of the string has
- been fixed.
-02-08-11 A bug in which the last character could be deleted from shell
- traces and from whence when called from a multibyte locale
- has been fixed.
-02-08-01 A bug which could cause a core dump to occur when a shell script
- is executed while a coprocess is running that has closed the
- output pipe has been fixed.
-02-08-01 A bug in which command completion in multibyte mode could
- corrupt memory for long command lines has been fixed.
-
-02-06-17 --- Release ksh93n- ---
-02-06-17 A bug in which user defined macros could cause a core dump in
- with MULTIBYTE mode has been fixed.
-02-06-17 A bug in which printf format specifiers of the form %2$s were causing
- a core dump has been fixed.
-02-06-17 A bug in which setting stty to noecho mode did not prevent the
- echoing of characters by ksh when emacs or viraw mode
- was enabled has been fixed.
-02-06-17 A bug in which background job completion could cause the sleep
- builtin to terminate prematurely has been fixed.
-02-06-17 A bug in which the shell could core dump if getopts was called
- when the OPTIND variable contained a negative value has been fixed.
-02-06-10 The edit mode prompt has been modified to handle escape sequences.
-02-06-10 A bug which occurred for interactive shells in which the builtin
- cat command was used in command substitution on a file whose
- size was larger than PIPE_BUF has been fixed.
-02-06-10 A bug in which the trap on ERR was not being processed when
- set inside a function has been fixed.
-02-06-07 A bug in which function definitions could cause the history count
- to be decremented by one (and even become negative) has been fixed.
-02-06-05 A bug in read in which share mode could be enabled has been fixed.
-02-05-28 A bug which could occur when the last command of a script was
- a case statement and the action selected ended in ;& instead of ;;
- has been fixed.
-02-05-23 A bug with unary + introduced in ksh93k has been fixed.
-02-05-07 A bug in substitutions of the form ${var/pattern/string} in which
- a backslash was inserted in the replacement string when it contained
- a special pattern character has been fixed.
-02-05-01 A bug in the emacs edit mode which occurred in versions compiled
- for multibyte character sets which occurred when a repeated search
- was requested after a long line had been returned for the previous
- search has been fixed.
-02-04-02 vi and emacs edit modes were modified so that tab completion is
- disabled when invoked from the read built-in.
-
-02-03-26 --- Release ksh93m+ ---
-02-03-26 A bug in which \ was not handled correctly when used in file
- expansion has been fixed.
-02-02-18 A bug in which lines beginning with a # were deleted from here
- documents when the here-document delimiter was followed by
- a comment has been fixed.
-02-12-06 An optimization bug in which ${!x[@]) was treated as invariant in
- a for loop has been fixed.
-02-02-06 A bug in which the ERR trap is not cleared for a script invoked
- by name from within a function has been fixed.
-02-01-08 A bug in which a shell script executed from within a subshell
- could cause this script to have an invalid pointer leading
- to a memory fault has been fixed.
-02-01-07 Added here documents of the form <<< word (as per zsh) which
- is equivalent to << delim\nword\ndelim.
-02-01-07 A bug in which the first word of a compound assignment,
- x=(word ...), was treated as a reserved word has been fixed.
-02-01-07 A bug in the handling of \ when noglob was enabled and a
- substitution of the form ${word op pattern} occurred in the
- same word has been fixed.
-02-01-07 A compilation option, CMDLIB_BLTIN in the file OPTION, has
- been added. When this options is set, all commands implemented
- in libcmd become shell builtin commands by default.
-02-01-07 A bug in which builtin foo, where foo is already a builtin
- would result in the builtin foo getting removed has been fixed.
-02-01-07 A bug which the shell executed a command found in the current
- directory when PATH have no valid directories has been fixed.
-01-11-28 The value of $? was not being set when called with exit.
-01-11-28 If the last command was of the form (...) and a trap on EXIT or
- ERR was set, and the command inside () modified the trap, then
- the original trap wasn't executed.
-01-11-26 The value for 0 is now preceded by the base number when
- the base was not 10.
-01-11-26 The default has compilation mode has been changes so that
- viraw mode will always be on.
-
-01-10-31 --- Release ksh93m ---
-01-10-31 A for loop optimizer bug for subshells contained withing for
- loops has been fixed.
-01-10-16 typeset without arguments no longer outputs variable names
- that do not have any attributes that are set.
-01-10-16 A bug introduced in ksh93l in which assignments specified with
- the exec built-in were not being expanded properly has been
- fixed.
-01-10-11 An optimization bug in which ${!x) was treated as invariant in
- a for loop has been fixed.
-01-10-11 Unsigned integer variables in bases other than 10 are printed now
- expand in that base with the base prefix.
-01-10-10 A number of typos in the self generating man pages for shell
- built-ins have been fixed.
-01-10-04 The self generated man pages for hist and fc were not working
- correctly and have been fixed.
-01-10-03 Yet another optimizer bug in which shell patterns were
- treated as invariants has been fixed.
-01-09-27 Two bugs relating to multibyte history searches and to find
- have been fixed.
-01-09-27 A bug introduced in ksh93k in which the PATH searching was
- not restored after running a command with an assignment list
- has been fixed.
-01-09-26 A bug in which a zero filled field was treated as octal when
- converted to integer has been fixed.
-01-09-26 Yet another bug in the optimization of for loops related to
- recursive functions with break or continue statements has been fixed.
-01-09-25 The exponentiation operator ** was added to the shell arithmetic
- evaluation. It has higher precedence than * and is left
- associative.
-01-09-25 The code was modified to use the ast multibyte macros
- and functions for handing multibyte locales.
-01-09-25 The expansion ${parameter:offset:length} now handles negative
- offsets which cause offsets to be measured from the end.
-01-09-25 Some spelling errors in the documentation were corrected.
-01-09-24 The /dev/tcp/host/port and /dev/udp/host/port now allow
- the ports to be specified by service name.
-01-09-24 The change staring with ksh93g in which the the appropriate
- library path variable is prepended with a corresponding library
- directory has been modified. With the new method, only the
- library path defined in the file named .paths in the directory
- where the executable is found will be modified. See the
- man page for more details.
-01-09-23 The .fpath file (see ksh93h) is no longer looked for in each
- directory on the path to locate function directories. The
- file named .paths is used instead.
-01-09-23 A bug in which IFS was not being restored after being changed in
- a subshell has been fixed.
-01-09-16 With the vi and emacs edit modes, after a list of command
- or functions is generated with = or M-= respectively,
- any element from the list can be pasted on the command line
- by preceding the = or M-= with a numeric parameter specifying
- the position on the list.
-01-09-16 A bug in ksh93l caused command completion not to find aliases
- and functions. Command listing from the edit mode was presented
- in reverse order. This has been fixed.
-01-09-13 Another bug in the optimization of for loops related to subshells
- when traps were set has been fixed.
-01-09-07 A change in ksh93l caused brace expansion to stop working
- and this has been fixed.
-01-09-04 A bug introduced in ksh93k in which an arithmetic statement
- within a function that used name references did not follow the
- reference has been fixed.
-01-09-04 A bug introduced in ksh93l in which export -p did not prefix
- each export with the word export has been fixed.
-01-08-29 A bug in multibyte input which occurred when a partial multibyte
- character was received has been fixed.
-01-08-29 A bug introduced in ksh93l which could cause a core dump
- when an assignment list containing PATH is specified inside
- command substitution has been fixed.
-01-08-09 Another bug in the optimization of for loops in ksh93l caused
- errors in recursive functions using local variables that
- contained for loops has been fixed.
-01-07-27 A bug in which IFS would be unset after a command substitution
- inside a here document has been fixed.
-01-07-26 To conform to the POSIX standard, if you invoked ksh name,
- and name does not contain a /, it will first try to run
- one in the current directory whether it is executable or not
- before doing a path search for an executable script. Earlier
- versions first checked for an executable script using the
- PATH variable.
-01-07-23 A bug in which unset -f invoked in a subshell could unset a
- function defined in the parent has been fixed.
-01-07-16 A bug in the optimization of for loops in ksh93l caused
- name references to be treated as invariants has been fixed.
-01-07-09 A bug in which a discipline function applied to a local variable
- could cause a shell exception has been fixed. Discipline
- functions can only be specified for global variables.
-
-01-06-18 --- Release ksh93l ---
-01-06-18 A bug in assigning integers larger than can be represented as
- long integers to floating point variables has been fixed.
-01-06-18 A bug in the handling of unsigned integers (typeset -ui) has
- been fixed.
-01-06-04 The evaluation of the PS1 prompt no longer effects the value
- of the $? variable.
-01-06-01 A small memory leak from subshells has been fixed.
-01-05-22 A bug in which attributes for variables that did not have
- values would be lost after a subshell has been fixed.
-01-05-22 The %R format has been added to convert a shell pattern into
- an extended regular expression.
-01-05-22 The escape sequences \e, \cX, \C[.collating-element.], and
- \x{hex} have been added to ASCII-C strings and to printf format
- strings.
-01-05-20 Patterns of the form {n}(pattern) and {m,n}(pattern) are now
- recognized. The first form matches exactly n of pattern whereas,
- the second form matches from m to n instances of pattern.
-01-05-20 The shell allows *-(pattern), +-(pattern), ?-(pattern),
- {m,n}-(pattern}, and @-(pattern) to cause the minimal
- match of pattern to be selected whenever possible rather
- than the maximal (greedy) match.
-01-05-20 The character class [:word:] has been added to patterns.
- The word class is the union of [:alnum:] and the character _.
-01-05-20 Inside (...) pattern groups, the \ character is now treated
- specially even when in an enclosing character class. The
- sequences, \w, \d, \s are equivalent to the character classes
- word, digit, and space respectively. The sequences \W, \D,
- and \S are their complement sets.
-01-05-20 The shell now recognizes pattern groups of the form
- ~(options:pattern) where options or :pattern can be omitted.
- Options use the letters + and - to enable and disable options
- respectively. The option letters g (greedy), i (ignore case)
- are used to cause maximal matching and to cause case
- insensitive matching respectively. If :pattern is also
- specified, these options are only in effect while this
- pattern is being processed. Otherwise, these options remain
- in effect until the end of the pattern group that they are contained
- in or until another ~(...) is encountered. These pattern groups
- are not counted with respect to group numbering.
-01-05-14 When edit completion, expansion, or listing occurs in the
- middle of a quoted string, the leading quote is ignored when
- performing the completion, expansion, or listing.
-01-05-14 A small memory leak from subshells has been fixed.
-01-05-10 A bug in which open files were not restored after a subshell
- that had used exec to replace a file has been fixed.
-01-05-10 Redirection to a null file name now generates an error message.
-01-05-09 The shell now rejects some invalid parameter substitutions that
- were previously processed in undefined ways.
-01-05-09 A bug in which the output of select was not flushed before the
- read when input did not come from the terminal has been fixed.
-01-05-08 A bug in which job ids would not be freed for interactive shells
- when subshells ran built-ins in the background has been fixed.
-01-05-08 The FPATH variable now requires an explicit . to cause the
- current directory to be treated as a function directory.
-01-05-08 A bug in read -n when echo mode was disabled has been fixed.
-01-05-07 A bug in which function definitions could be listed as part
- of the history has been fixed.
-01-04-30 This release uses a new and often much faster pattern matcher than
- earlier releases.
-01-04-30 An optimizer now eliminates invariant parameter expansions from
- for while and until loops.
-01-04-30 The variable .sh.match is set after each pattern match (# % or /)
- in a variable substitution. The variable .sh.match is an
- indexed array with element 0 being the complete match.
- The array is only valid until the next subsequent pattern
- match or until the value of the variable changes which ever
- comes first.
-01-04-30 A self generating man page has been added to shcomp. Also,
- shcomp now stops compiling when it finds an exit or exec
- command and copies the remainder so that it can be used
- for standard input.
-01-04-30 The shcomp command was modified so that it can work in an
- EBCIDIC environment and that binary scripts are portable
- across environments.
-01-04-30 A bug in the handling of a trailing : in PATH has been fixed.
-01-04-30 A bug in which the builtin version of a command would get invoked
- even though the full pathname for the command was specified
- has been fixed.
-01-04-30 A bug in which read would loose the last character when
- reading the last line of a file that did not contain a new-line
- character has been fixed.
-01-04-23 A bug on some systems in which in vi mode the end of file
- character and end of line character could be swapped has
- been fixed.
-01-04-23 A bug on some systems in which invoking a shell script that
- did not have execute permission could set the exit value to
- 127 rather than 126 has been fixed.
-01-04-20 A bug in which read -n from a pipe would block if fewer than
- n characters was received has been fixed.
-01-04-09 A bug in which invalid patterns, for example, ) by itself,
- was not treated as a string has been fixed so that if i=')',
- then [[ $i == $i ]] is true.
-01-04-09 The shell arithmetic now interprets C character constants.
-01-04-09 A bug in which a non-zero return from a function defined
- with the function reserved word did not trigger the ERR
- trap or exit with set -e has been fixed.
-01-04-02 A bug on some systems, in which characters above 127 were
- not displayed correctly in vi or emacs edit mode has been fixed.
-01-04-02 A bug on some systems, introduced in the 'k' point release, in
- which the erase character in viraw mode was moving the cursor
- to the left without erasing the character has been fixed.
-01-04-02 On some systems the wcwith() function was returning a wrong
- value for characters and caused characters to be displayed
- incorrectly from the shell edit modes. A work around for
- this problem has been added.
-01-03-26 A bug in which valid scripts could produce syntax errors
- when run with locales that considered characters such as "'"
- to be space characters has been fixed.
-01-03-20 A bug in which an syntax error in an arithmetic expression
- entered interactively could cause the shell to go into
- an infinite loop outputting the error message has been fixed.
-01-03-10 ksh93 accepts -l as a synonym for -L in test on systems for
- which /bin/test -l tests for symbolic links.
-01-03-10 A bug in parsing scripts in which { and } are used in place of
- in and esac in case statements embedded in compound commands
- has been fixed. Use of { and } for in and esac is obsolete.
-01-03-06 A bug in which an argument of the form foo=bar was not
- being passed correctly to a traced function whose name
- was foo has been fixed.
-01-03-02 Using $(trap -p name) did not print the name of the current
- trap setting for trap name.
-01-02-26 Exported floating point variables gave incorrect results
- when passing them to ksh88. This has been fixed.
-01-02-25 A race condition in which a coprocess which completed too quickly
- would not allow subsequent coprocesses to start has been fixed.
-01-02-25 The 'g' format specifier is now handled by printf. It had
- inadvertently been omitted.
-01-02-20 The + was not being displayed during an execution trace
- with the += assignment operator.
-01-02-19 The error message which occurs when the interpreter name
- defined on the #! line does not exist is more informative.
-01-02-19 A bug in which $0 would not be set correctly when a
- script with #! was invoked by full pathname from the
- directory of the script has been fixed.
-01-02-19 A shell script did not always pick up tty mode changes
- made by external commands such as stty which could
- effect the behavior of read.
-01-02-19 The -u, -g, and -k unary tests did not give the correct
- results when used with negation and this has been fixed.
-
-01-02-05 --- Release ksh93k+ ---
-01-02-05 The sequence \<newline> inside $'...' was not incrementing
- the line count and this has been fixed.
-01-02-05 Modified expansion of "${@-}" so that if no arguments are set
- it results in null string rather than nothing.
-01-02-02 memory leak problem with local variables in functions fixed.
-01-01-25 allow arithmetic expressions with float%int and treat them
- as ((int)float)%int rather than as an error.
-01-01-19 read -n1 was not working and has been fixed.
-01-01-17 ksh now handles the case in which a here document in command
- substitution $() is terminated by the trailing ). Previously,
- a new-line was needed at the end of the delimiter word.
-01-01-02 A bug in which a KEYBD trap would cause a multi-line token
- to be processed incorrectly has been fixed.
-00-12-10 Arithmetic integer constants can now have L and U suffices.
-00-12-10 A bug in the processing of arithmetic expressions with compound
- variables when the -n option is on has been fixed.
-00-12-08 A bug in M-f and M-b from emacs mode has been fixed. This
- bug only occurs when ksh93 is compiled without MULTIBYTE enabled.
-00-11-29 A bug in which jobs -p would yield 0 for background
- jobs run in a script has been fixed.
-00-11-21 A bug in integer arrays in which the number of elements is
- incorrect when the ++ operator is applied to a non-existing
- element has been fixed. For example, integer x; ((x[3]++)).
-00-11-20 A timing bug in which the shell could reset the terminal
- group to the wrong value in the case that the a new process
- changes the terminal group during startup has been fixed.
-
-00-10-27 --- Release ksh93k ---
-00-10-27 Using tab for completion now works only when applied
- after a non-blank character at the end of the current line.
- In other case a tab is inserted.
-00-10-27 A bug in the emacs edit mode for ^X^E has been fixed.
- The ^X^E sequence is supposed to invoke the full editor
- on the current command.
-00-10-18 A bug in which expansions of the form ${var//pattern/string}
- did not work correctly when pattern was '/' or "/" has
- been fixed.
-00-10-18 The output format for indexed arrays in compound variables
- has been modified so that it can be used as input.
-00-10-18 Assignments with name references (typeset -n) will now
- implicitly unreference an existing name reference.
-00-10-17 A bug the += append operator when a single array element
- is appended to a variable that is not an array has been fixed.
-00-10-16 A bug in which the SIGCONT signal was being sent to
- each process will kill -0 or kill -n 0 has been fixed.
-00-10-12 The arithmetic evaluation portion has been rewritten to
- perform a number of optimizations.
-00-10-10 A bug in which name prefix matching ${!name.*} was not
- checking name to see if it was a name reference has been fixed.
-00-09-26 A bug in the multibyte version in which the width of for
- non-printing characters was not correct has been fixed.
-00-09-12 Made changes to get multibyte editing work on UWIN for windows
-00-09-12 A bug in which multibyte characters would be displayed incorrectly
- has been fixed.
-00-08-08 Removed build dependency on iswprint() and iswalph().
-00-07-20 In some cases the read builtin would read more than a single
- line from a pipe on standard input and therefore leave the seek
- position in the wrong location.
-00-07-05 If the directory / is on the path, a / will not be inserted
- between the directory and the file name during path searching
- to avoid searching // for systems that treat this specially.
-00-06-26 A bug in which on rare occasions wait could return before all
- jobs have completed has been fixed.
-00-06-21 A bug in which backspace did not work correctly during the
- R replace directive in vi-mode has been fixed.
-00-06-12 Added variable name completion/expansion/listing to the set of
- completions. Variable name completions begin with $ or "$ followed
- by a letter.
-00-05-09 --- Release ksh93j ---
-00-05-09 Modified command substitution to avoid using /tmp files when
- run on read-only file systems.
-00-04-17 Modified printf to handle '%..Xc' and '%..Xs' options where X
- is not an alpha character. Previous versions core dumped with this.
-00-04-10 Changes to multibyte editing code were made to use standard
- ISO C functions rather than methods devised before the standard.
-00-04-09 Add %H options to printf to output strings with <"'&\t> properly
- converted for use in HTML and XML documents.
-00-04-07 Modified getopts builtin to handle \f...\f in usage string
- by invoking specified function.
-00-04-04 Added self generating man pages for bg, fc, fg, disown, jobs,
- hist, let, ., and ulimit.
-00-03-30 The append operator += has been added and can be used
- for all assignments, strings, arrays, and compound variables.
-00-03-30 Code was modified in several places to support automatic
- generation of C locale dictionaries.
-00-03-28 A bug in which the set and trap commands invoked with --name
- type arguments would terminate the invoking script has
- been fixed.
-00-03-27 A bug in which the library path variable was not updated
- correctly on some systems as described in the 'g' point
- release has been fixed.
-00-03-07 printf now returns a non-zero exit status when one of
- its arguments cannot be converted to the given type.
-00-03-05 The return value and error message for a command that
- was found on the path but was not executable was set
- incorrectly.
-00-03-05 A prototype for ioctl() was removed from the vi edit mode.
-
-00-01-28 --- Release ksh93i ---
-00-01-28 Most of the built-in commands and ksh itself are now
- self documenting. Running command --man will produce
- screen output. Running command --html produces the
- man page in html format.
-00-01-28 The getopts builtin can process command description
- strings to produce man pages.
-00-01-28 A bug in which a script could terminate when getopts
- encountered an error when invoked inside a function
- has been fixed.
-00-01-28 When a symbolic link was specified as the name of
- the script to invoke by name, the value of $0 was
- set to the real file name rather than the link name
- in some cases and this has been fixed.
-00-01-28 A bug in which the precision given as an argument
- to printf was not working has been fixed.
-
-99-03-31 --- Release ksh93h ---
-99-03-31 The PATH search algorithm has been modified to look
- for a file named .fpath in each bin directory and if
- found, to search for functions in this directory if
- it cannot find the command in that directory.
-99-03-31 When performing pathname expansion, the shell checks
- to see whether each directory it reads is case sensitive
- or not, and performs the matching accordingly.
-99-03-31 The %T format for printing formatted date/time.
-99-03-31 The emacs and vi modes now handle arrow keys when
- they use standard ANSI escape sequences.
-99-03-31 The TAB key can be used for completion in emacs and viraw mode.
-99-03-31 A bug in setting .sh.editchar during the KEYBD trap
- for the MULTIBYTE option was fixed in release ksh93h.
-99-03-31 A bug in shcomp for compilation of unary operators with [[...]]
- has been fixed.
-99-03-31 A bug in which the value of $? was changed when executing
- a keyboard trap has been fixed.
-99-03-31 The handling of SIGCHLD has been changed so that the
- trap is not triggered while executing trap commands
- to avoid recursive trap calls.
-99-03-31 A bug in which a local variable in a function declared readonly
- would generated an error when the function went out of
- scope has been fixed.
-99-03-31 A bug in which \<new_line> entered from the keyboard
- with the KEYBD trap enabled has been fixed.
-99-03-31 The error message for a misplaced ((, for example print ((3),
- was often garbled and has been fixed.
-99-03-31 A bug in the KEYBD trap in which escape sequences of the form
- <ESC>[#~ were not being handled as a unit has been fixed.
-99-03-31 A bug in which ksh would consider expressions like [[ (a) ]]
- as syntax errors has been fixed.
-99-03-31 A function defined as foo() without a function body
- was not reported as a syntax error.
-99-03-31 A bug in which ksh could run out of file descriptors when
- a stream was repeatedly opened with exec and read from
- has been fixed.
-
-98-04-30 --- Release ksh93g ---
-98-04-30 The pipefail option has been added. With pipefail
- enabled, a pipeline will not complete until all
- commands are complete, and the return value will
- be that of the last command to fail, or zero if
- all complete successfully.
-98-04-30 The name-value pair library uses the cdt library rather
- than the hash library. This change should be transparent
- to applications.
-98-04-30 On the U/WIN version for Window 95 and Windows NT,
- when a directory beginning with a letter followed by
- a colon is given to cd, it is assumed to be an absolute
- directory
-98-04-30 When an executable is found on a given path,
- the appropriate library path variable is prepended
- with a corresponding library directory.
-98-04-30 A bug in which a name reference could be created to
- itself and later cause the shell to get into an infinite
- loop has been fixed.
-98-04-30 A bug in shcomp relating to compound variables was fixed.
-98-04-30 A bug introduced in ksh93e in which leading 0's in -Z
- fields caused the value to be treated as octal for arithmetic
- evaluation has been fixed.
-98-04-30 A bug when a name reference with a shorter name than
- the variable it references was the subject of a compound
- assignment has been fixed.
-98-04-30 A bug which in which assignment to array variables in
- a subshell could effect the parent shell has been
- fixed.
-98-04-30 read name?prompt was putting a 0 byte at the end of the
- prompt on standard error.
-98-04-30 A bug in [[ string1 > string2 ]] when ksh was run with -x
- has been fixed.
-98-04-30 A bug in which the escape character was not processed
- correctly inside {...} when brace expansion is enabled
- has been fixed, for example {\$foo}.
-98-04-30 A bug in line continuation in here-documents has been
- fixed.
-98-04-30 The default base when not specified with typeset -i is
- 10 in accordance with the documentation. Previously,
- the value was determined by the first assignment.
-98-04-30 A parsing bug in which a # preceded alphanumeric
- characters inside a command substitution caused
- a syntax error to be reported has been fixed.
-98-04-30 A bug in which a decimal constant represented as 10#ddd
- where ddd was more than five digits generated a syntax
- error has been fixed.
-98-04-30 A bug in here document expansion in which ${...} expansions
- were split across buffer boundaries has been fixed.
-98-04-30 The sh_fun() function now takes third argument which
- is an argument list for the invoked discipline function
- or built-in.
-98-04-30 A callback function can be installed which will give
- notification of file duplications and file closes.
-98-04-30 When ksh is compiled on systems that do not use fork()
- current option settings where not propagated to sub-shells.
-
-97-06-30 --- Release ksh93f ---
-97-06-30 Hostnames in addition to host addresses can be given in
- /dev/tcp/host/port virtual file names.
-97-06-30 File name completion and expansion now quotes special
- characters in file names from both emacs and vi edit modes.
-97-06-30 An empty for list behave like a for list with null expansions.
- It produces a warning message with sh -n.
-97-06-30 The code has been modified to work with EBCDIC as well as ASCII.
-97-06-30 A bug which would cause the secondary prompt to be
- displayed when a user entered a literal carriage
- return has been fixed.
-97-06-30 A bug which caused ksh read -s name to core dump was
- fixed.
-97-06-30 A bug with the expansion of \} and \] inside double
- quoted strings that also contained variable expansions
- has been fixed
-97-06-30 Changes in the ksh93e point release caused autoload
- functions invoked from within command substitution
- to fail. This has been fixed.
-97-06-30 A bug in the processing of here-documents that could
- prevent variable substitution to occur after $(...) command
- substitution for long here documents has been fixed.
-97-06-30 A bug caused by a race condition that could cause SIGTERM
- to be ignored by a child process has been fixed.
-97-06-30 A bug which prevented the startup of a coprocess immediately
- after killing a running coprocess has been fixed.
-97-06-30 ulimit foobar, where foobar is not an arithmetic
- expression, now gives an error message as it did with ksh88
- instead of setting the file size limit to 0.
-97-06-30 A bug which could cause an interactive shell to terminate when
- the last process of a pipeline was a POSIX function was fixed.
-97-06-30 A bug which could cause command substitution of a shell script
- to core dump has been fixed.
-97-06-30 A security hole was fixed in suid_exec.
-97-06-30 Arithmetic functions such as pow() that take more than
- one argument, did not work if arguments other than the
- first contained parenthesized sub-expression.
-97-06-30 The error message from a script containing an incomplete
- arithmetic expression has been corrected.
-97-06-30 A bug which caused a core dump on some machines when
- the value of a name reference contained a positional
- parameter and the name reference was not defined inside
- a function has been fixed.
-97-06-30 Arithmetic expressions now correctly handle hexadecimal
- constants.
-97-06-30 A bug in which integer variables could be expanded
- with a leading 10# when declared with typeset -i
- multiple times has been corrected.
-97-06-30 A bug in which IFS wasn't correctly restored when
- set within command substitution has been fixed.
-97-06-30 The _ character is now considered as part of a word
- with the M-f and M-b emacs directives as it was in ksh88.
-97-06-30 A bug in brace pattern expansions that caused expressions
- such as {foo\,bar,bam} to expand incorrectly have been fixed.
-
-
-96-07-31 --- Release ksh93e ---
-96-07-31 The math functions, atan2, hypot, fmod, and pow were added.
-96-07-31 When a shared library is loaded, if the function lib_init()
- is defined in the library, it is invoked the first time that
- the library is loaded with builtin -f library.
-96-07-31 The k-shell information abstraction database option, KIA,
- has been revamped.
-96-07-31 Empty command substitutions of the form $() now work.
- whence -v foo now gives the correct result after calling
- builtin -d foo.
-96-07-31 A bug in right to left arithmetic assignment for which
- the arithmetic expression (( y = x = 1.5 )) did not
- yield 1 for y when x was declared typeset -i was fixed.
-96-07-31 printf has been fixed to handle format containing \0
- and/or \0145 correctly. In addition, characters following
- %b in the format string are no longer displayed when
- the operand contains \c.
-96-07-31 A bug in printf that could cause the %E format to
- produce unnormalized results has been fixed.
-96-07-31 A bug which causes some arithmetic expressions to be
- incorrectly evaluated as integer expressions rather
- that floating point has been fixed.
-96-07-31 Functions defined inside a subshell no longer remain
- defined when the subshell completes.
-96-07-31 The error message from sh -c ';echo foo' has been
- corrected.
-96-07-31 The format for umask -S has been changed to agree
- with the specification in the POSIX standard.
-96-07-31 A bug that caused side effects in subscript evaluation
- when tracing was enabled for subscripts using ++ or --
- has been fixed.
-96-07-31 To conform to the Posix standard getopts has been changed
- so that the option char is set to ? when it returns with
- a non-zero exit status.
-96-07-31 The handling of \} inside ${name...} has been fixed so
- that the \ quotes the }.
-96-07-31 A bug that caused the read builtin to resume execution
- after processing a trap has been fixed.
-96-07-31 [[ -s file ]] has been fixed so that if file is open
- by ksh, it is flushed first.
-96-07-31 In some cases attributes and sizes for non exported
- variables weren't being reset before running a script.
-96-07-31 The value of TMOUT was affected by changes make to
- it in a subshell.
-96-07-31 The jobs command did not reflect changes make by
- sending the CONT signal to a command.
-96-07-31 The error message for ksh -o unknown was incorrect.
-96-07-31 Functions invoked as name=value name, did not use
- values from the calling scope when evaluating value.
-96-07-31 A bug in which the shell would reexecute previously
- executed code when a shell script or coprocess was
- run in the background has been fixed.
-96-07-31 A bug in which an empty here-document would leave
- a file descriptor open has been fixed.
-96-07-31 A bug in which $(set -A array ...) would leave a
- side effect has been fixed.
-96-07-31 A discipline function for a global variable defined
- within a function defined with the function keyword,
- incorrectly created a local variable of the same name
- and applied the discipline to it.
-
-95-08-28 --- Release ksh93d ---
-95-08-28 The \ character was not handled correctly in replacement
- patterns with ${x/pattern/replace}.
-95-08-28 A bug with read in which the line did not end with
- a new-line has been fixed.
-95-08-28 A bug in file name generation which sometimes
- appended a . for filenames that ended in / has
- been fixed.
-95-08-28 If a process is waited for after a status has
- been returned by a previous wait, wait now
- returns 127.
-95-08-28 A bug with hist (fc) -e which prevented a command
- to re-executed after it had been edited has been fixed.
-95-08-28 A bug which prevented quoting from removing the meaning
- of unary test operators has been fixed.
-95-08-28 A bug with typeahead and KEYBOARD traps with the
- MULTIBYTE option set has been fixed.
-95-08-28 Builtin functions can take a third argument which is
- a void*.
-95-08-28 The nv_scan() function can restrict the scope of a walk
- to the top scope.
-
-95-04-31 --- Release ksh93c ---
-95-04-31 The expansion of "$@" was incorrect when $1 was the null
- string.
-95-04-31 A bug which could incorrectly report a syntax error in
- a backquoted expression when a $ was preceded by \\
- has been fixed.
-95-04-31 A bug which prevented the shell from exiting after
- reporting an error when failing to open a script
- has been fixed.
-95-04-31 A bug that could lead to memory corruption when a
- large here document that required parameter or command
- substitution was expanded has been fixed.
-95-04-31 A bug that could cause a core dump on some systems
- after ksh detected an error when reading a function
- has been fixed.
-95-04-31 A bug which could cause a coprocess to hang when
- reading from a process that has terminated has been fixed.
-95-04-31 A bug which caused a script to terminate when set -e
- was on and the first command of and && or || list
- failed has been fixed.
-95-04-31 A bug with here documents inside $(...) when the delimiter
- word is an identifier has been fixed.
-95-04-31 A bug which caused $0 to display the wrong value when
- a script was invoked as an argument to the . command
- and the eval command has been fixed.
-95-04-31 A bug that could cause the built-in sleep to hang
- has been fixed.
-95-04-31 A bug introduces in 12/28/93b which caused the backslash
- to be removed when it was followed by digit inside double
- quotes in some instances has been fixed.
-95-04-31 A bug which could cause a core dump if ksh was invoked with
- standard input closed has been fixed.
-95-04-31 A bug which could cause a core dump if typeset -A was
- specified for an existing variable has been fixed.
-95-04-31 Variables that were unset but had attributes such as readonly
- and export were not listed with readonly, export and typeset.
-95-04-31 Several problems with signals have been fixed.
-95-04-31 A bug which prevented ulimit -t from working has been fixed.
- Also, a bug in which failed ulimits could cause a core dump
- has also been fixed.
-95-04-31 A bug in expansion of the form ${name/#pattern/string} and
- ${name/%pattern/string} has been fixed.
-95-04-31 A bug which caused read -r on a line that contained only
- blanks to get a non-null value has been fixed.
-95-04-31 A bug introduced in the 'a' point release in which
- ${x='\\'} expanded to \ when x was unset has been fixed.
-95-04-31 A bug which prevented a trap on EXIT from being executed
- when the last command in a script was a function invocation
- has been fixed.
-95-04-31 A bug which caused an interactive shell ignore input when
- standard error was redirected to a file with exec,
- and then restored with exec 2>&1 has been fixed.
-95-04-31 An interactive shell turns on monitor mode even when
- standard error has been redirected to a file.
-95-04-31 A bug which could cause standard input to be incorrectly
- positioned for the last command of a script has been fixed.
-95-04-31 A bug in the edit modes which allowed walking back in
- the history file for more than HISTSIZE commands has
- been fixed.
-95-04-31 A bug which could cause a core dump if variable TMPDIR was
- changed between two command substitutions has been fixed.
-95-04-31. A bug which prevented a trap on EXIT from being cleared
- has been fixed.
-95-04-31 A bug fixed for the v directive in vi MULTIBYTE has been
- fixed.
-95-04-31 Code to for IFS handling of multibyte characters has
- been added.
-95-04-31 The displaying of multibyte strings in export, readonly,
- typeset, and execution traces has been fixed.
-95-04-31 Variables inside functions are now statically scoped.
- The previous behavior was never documented.
-95-04-31 Variables inside functions are now statically scoped.
- The previous behavior was never documented.
-95-04-31 A few changes have been made to the name-value library
- that affect built-ins that use disciplines. The
- changes allow disciplines to be shared by variables
- and should make it possible to add new disciplines
- without recompilation.
-95-04-31 The name-value library interface has undergone significant
- change for this revision. See the new nval.3 man page.
-
-94-12-31 --- Release ksh93b ---
-94-12-31 Variables inside functions are now statically scoped.
- The previous behavior was never documented.
-94-12-31 If IFS contains two consecutive identical characters belonging
- to the [:space:] class, then this character is treated as
- a non-space delimiter so that each instance will delimit
- a field. For example, IFS=$'\t\t' will cause two consecutive
- tabs to delimit a null field.
-94-12-31 The getopts command has a -a name option that specifies a
- name that will be used for usage messages.
-94-12-31 A bug which caused unset RANDOM to dump core has been
- fixed.
-94-12-31 A bug which prevented return for terminating a profile
- or ENV file has been fixed.
-94-12-31 A bug which prevented standard input from being
- directed to /dev/null for background jobs when
- monitor mode was turned off has been fixed.
-94-12-31 Statements of the form typeset -options var[expr]=value
- did not perform substitutions on expr as expected.
-94-12-31 A bug which prevented the shell from sending a HUP
- signal to some background jobs that were not disowned
- has been fixed.
-94-12-31 A bug which allowed a script to trap signals that are
- ignored at the time that the shell was invoked by exec
- has been fixed.
-94-12-31 A bug which could cause a core dump when a discipline
- function was unset within a discipline was fixed.
-94-12-31 The typeset builtin now accepts a first argument of
- + or - for compatibility with ksh88.
-94-12-31 For compatibility with ksh88, the results of expansions
- of command arguments will treat the extended character
- match characters ()|& as ordinary characters.
-94-12-31 A bug which caused read to fail on a file that was
- open for read/write with <> when the first operation
- was print or printf has been fixed.
-94-12-31 When a job is suspended, it is put on the top of
- the job list as required by the POSIX standard.
-94-12-31 The value of OPTARG when an option that required
- an argument but didn't have one was incorrect in the
- case the the option string began with a :.
-94-12-31 A bug which caused the terminal to get into a bad
- state with some KEYBD traps in vi-mode has been fixed.
-94-12-31 A bug which caused an invalid trap to cause a script
- to terminate, rather than just return an error, has
- been fixed.
-94-12-31 Backreferencing sub-expressions in patterns and replacement
- strings now works.
-94-12-31 A bug in chmod which caused the -R option to fail has
- been fixed.
-94-12-31 More signal names have been added for Solaris
-
-94-06-30 --- Release ksh93a ---
-94-06-30 An expansion bug which causes portions of a word after
- a $((...)) expansion that contains a nested $var expansion
- to be lost has been fixed.
-94-06-30 A bug that caused a core dump when a script that did not
- have PWD set and did a cd inside command substitution
- has been fixed.
-94-06-30 A bug which caused a core dump on some machines when
- the LANG variable was assigned to has been fixed.
-94-06-30 A bug which incorrectly handled set disciplines that
- performed arithmetic evaluation when the discipline
- was called from the arithmetic evaluator has been fixed.
-94-06-30 A bug caused by an EXIT trap inside a function that
- was executed in a subshell was fixed.
-94-06-30 If foo is a function, and not a program, then command foo
- now reports that foo isn't found rather than invoking foo.
-94-06-30 The previous version incorrectly listed -A as an
- invocation option. The -A option is only for set.
-94-06-30 A bug was fixed which caused ksh to loop when execution trace
- was enabled and the PS4 prompt required command substitution.
-94-06-30 A bug which could cause the job control switch character
- to be disabled when a script that enabled monitor mode
- terminated was fixed.
-94-06-30 A bug in the macro expansion global replacement operator //,
- when the pattern began with a [ or +( has been fixed.
-94-06-30 A bug which prevented ~ expansion from occurring when
- it was terminated with a colon inside an assignment
- has been fixed.
-94-06-30 A bug in the dot command which prevented autoload functions
- from working has been fixed.
-94-06-30 A bug which caused a variable to be unset if the
- its value were expanded inside a set discipline has
- been fixed.
-94-06-30 Whence -a now longer reports that a defined function
- is undefined.
-94-06-30 A bug on some systems in which $0 would be incorrect
- in scripts invoked by name has been fixed.
-94-06-30 Here documents with an empty body now work.
-94-06-30 A bug which disabled argument passing and resetting
- of options for a script invoked by name inside a
- function has been fixed.
-94-06-30 A bug in which an EXIT trap set the caller of a function
- would be executed if a command called inside a function
- was not found has been fixed.
-94-06-30 A bug which allowed a script to trap signals that are
- ignored at the time that the shell was invoked has
- been fixed.
-94-06-30 A bug which caused 2<&1- when applied to a shell built-in
- to leave standard input closed has been fixed.
-94-06-30 A bug which caused the shell to incorrectly parse
- $() command substitutions with nested case statements
- has been fixed.
-
diff --git a/usr/src/lib/libshell/common/TYPES b/usr/src/lib/libshell/common/TYPES
deleted file mode 100644
index 6eb6f41b5e..0000000000
--- a/usr/src/lib/libshell/common/TYPES
+++ /dev/null
@@ -1,182 +0,0 @@
-
-The ability for users to define types has been added to ksh93t.
-Here is a quick summary of how types are defined and used in ksh93t.
-This is still a work in progress so some changes and additions
-are likely.
-
-A type can be defined either by a shared library or by using the new
-typeset -T option to the shell. The method for defining types via
-a shared library is not described here. However, the source file
-bltins/enum.c is an example of a builtin that creates enumeration types.
-
-By convention, typenames begin with a capitol letter and end in _t.
-To define a type, use
- typeset -T Type_t=(
- definition
- )
-where definition contains assignment commands, declaration commands,
-and function definitions. A declaration command (for example typeset,
-readonly, and export), is a built-in that differs from other builtins in
-that tilde substitution is performed on arguments after an =, assignments
-do not have to precede the command name, and field splitting and pathname
-expansion is not performed on the arguments.
-For example,
- typeset -T Pt_t=(
- float -h 'length in inches' x=1
- float -h 'width in inches' y=0
- integer -S count=0
- len()
- {
- print -r $((sqrt(_.x*_.x + _.y*_.y)))
- }
- set()
- {
- (( _.count++))
- }
- )
-
-defines a type Pt_t that has three variables x, y, and count defined as well
-as the discipline functions len and set. The variable x has an initial value
-of 1 and the variable y has an initial value of 0. The new -h option argument,
-is used for documentations purposes as described later and is ignored outside
-of a type definition.
-
-
-The variable count has the new -S attribute which means that it is shared
-between all instances of the type. The -S option to typeset is ignored
-outside of a type definition. Note the variable named _ that is used inside
-the function definition for len and set. It will be a reference to the
-instance of Pt_t that invoked the function. The functions len and set
-could also have been defined with function len and function set, but
-since there are no local variables, the len() and set() form are more
-efficient since they don't need to set up a context for local variables
-and for saving and restoring traps.
-
-If the discipline function named create is defined it will be
-invoked when creating each instance for that type. A function named
-create cannot be defined by any instance.
-
-When a type is defined, a declaration built-in command by this name
-is added to ksh. As with other shell builtins, you can get the man page
-for this newly added command by invoking Pt_t --man. The information from
-the -h options will be embedded in this man page. Any functions that
-use getopts to process arguments will be cross referenced on the generated
-man page.
-
-Since Pt_t is now a declaration command it can be used in the definition
-of other types, for example
- typeset -T Rect_t=( Pt_t ur ll)
-
-Because a type definition is a command, it can be loaded on first reference
-by putting the definition into a file that is found on FPATH.
-Thus, if this definition is in a file named Pt_t on FPATH, then
-a program can create instances of Pt_t without first including
-the definition.
-
-A type definition is readonly and cannot be unset. Unsetting non-shared
-elements of a type restores them to their default value. Unsetting a
-shared element has no effect.
-
-The Pt_t command is used to create an instance of Pt_t.
- Pt_t p1
-creates an instance named p1 with the initial value for p1.x set to 1
-and the initial value of p1.y set to 0.
- Pt_t p2=(x=3 y=4)
-creates an instance with the specified initial values. The len function
-gives the distance of the point to the origin. Thus, p1.len will output
-1 and p2.len will output 5.
-
-ksh93t also introduces a more efficient command substitution mechanism.
-Instead of $(command), the new command substitution ${ command;}
-can be used. Unlike (and ) which are always special, the { and } are
-reserved words and require the space after { and a newline or ; before }.
-Unlike $(), the ${ ;} command substitution executes the command in
-the current shell context saving the need to save and restore
-changes, therefore also allowing side effects.
-
-When trying to expand an element of a type, if the element does not exist,
-ksh will look for a discipline function with that name and treat this as if
-it were the ${ ;} command substitution. Thus, ${p1.len} is equivalent to
-${ p1.len;} and within an arithmetic expression, p1.len will be expanded
-via the new command substitution method.
-
-The type of any variable can be obtained from the new prefix
-operator @. Thus, ${@p1} will output Pt_t.
-
-By default, each instance inherits all the discipline functions defined
-by the type definition other than create. However, each instance can define
-a function by the same name that will override this definition.
-However, only discipline functions with the same name as those defined
-by the type or the standard get, set, append, and unset disciplines
-can be defined by each instance.
-
-Each instance of the type Pt_t behaves like a compound variable except
-that only the variables defined by the type can be referenced or set.
-Thus, p2.x=9 is valid, but p2.z=9 is not. Unless a set discipline function
-does otherwise, the value of $p1 will be expanded to the form of a compound
-variable that can be used for reinput into ksh.
-
-If the variables var1 and var2 are of the same type, then the assignment
- var2=var1
-will create a copy of the variable var1 into var2. This is equivalent to
- eval var2="$var1"
-but is faster since the variable does not need to get expanded or reparsed.
-
-The type Pt_t can be referenced as if it were a variable using the name
-.sh.type.Pt_t. To change the default point location for subsequent
-instances of Pt_t, you can do
- .sh.type.Pt_t=(x=5 y=12)
-so that
- Pt_t p3
- p3.len
-would be 13.
-
-Types can be defined for simple variables as well as for compound
-objects such as Pt_t. In this case, the variable named . inside
-the definition refers to the real value for the variable. For example,
-the type definition
- typeset -T Time_t=(
- integer .=0
- _='%H:%M:%S'
- get()
- {
- .sh.value=$(printf "%(${_._})T" "#$((_))" )
- }
- set()
- {
- .sh.value=$(printf "%(%#)T" "${.sh.value}")
-
- }
- )
-
-The sub-variable name _ is reserved for data used by discipline functions
-and will not be included with data written with the %B option to printf.
-In this case it is used to specify a date format.
-
-In this case
- Time_t t1 t2=now
-will define t1 as the time at the beginning of the epoch and t2
-as the current time. Unlike the previous case, $t2 will output
-the current time in the date format specified by the value t2._.
-However, the value of ${t2.} will expand the instance to a form
-that can be used as input to the shell.
-
-Finally, types can be derived from an existing type. If the first
-element in a type definition is named _, then the new type
-consists of all the elements and discipline functions from the
-type of _ extended by elements and discipline functions defined
-by new type definition. For example,
-
- typeset -T Pq_t=(
- Pt_t _
- float z=0.
- len()
- {
- print -r $((sqrt(_.x*_.x + _.y*_.y + _.z*_.z)))
- }
- )
-
-defines a new type Pq_t which is based on Pq_t and contains an additional
-field z and a different len discipline function. It is also possible
-to create a new type Pt_t based on the original Pt_t. In this case
-the original Pt_t is no longer accessible.
diff --git a/usr/src/lib/libshell/misc/images/callouts/1.png b/usr/src/lib/libshell/misc/images/callouts/1.png
deleted file mode 100644
index 608fad3596..0000000000
--- a/usr/src/lib/libshell/misc/images/callouts/1.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/callouts/10.png b/usr/src/lib/libshell/misc/images/callouts/10.png
deleted file mode 100644
index 39e55197cf..0000000000
--- a/usr/src/lib/libshell/misc/images/callouts/10.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/callouts/2.png b/usr/src/lib/libshell/misc/images/callouts/2.png
deleted file mode 100644
index 5444738841..0000000000
--- a/usr/src/lib/libshell/misc/images/callouts/2.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/callouts/3.png b/usr/src/lib/libshell/misc/images/callouts/3.png
deleted file mode 100644
index 64b87c7151..0000000000
--- a/usr/src/lib/libshell/misc/images/callouts/3.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/callouts/4.png b/usr/src/lib/libshell/misc/images/callouts/4.png
deleted file mode 100644
index c308193ac4..0000000000
--- a/usr/src/lib/libshell/misc/images/callouts/4.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/callouts/5.png b/usr/src/lib/libshell/misc/images/callouts/5.png
deleted file mode 100644
index 24799f0a43..0000000000
--- a/usr/src/lib/libshell/misc/images/callouts/5.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/callouts/6.png b/usr/src/lib/libshell/misc/images/callouts/6.png
deleted file mode 100644
index 8919a670cd..0000000000
--- a/usr/src/lib/libshell/misc/images/callouts/6.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/callouts/7.png b/usr/src/lib/libshell/misc/images/callouts/7.png
deleted file mode 100644
index e30e8a70cb..0000000000
--- a/usr/src/lib/libshell/misc/images/callouts/7.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/callouts/8.png b/usr/src/lib/libshell/misc/images/callouts/8.png
deleted file mode 100644
index 3e35c8827c..0000000000
--- a/usr/src/lib/libshell/misc/images/callouts/8.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/callouts/9.png b/usr/src/lib/libshell/misc/images/callouts/9.png
deleted file mode 100644
index ed2f14b4eb..0000000000
--- a/usr/src/lib/libshell/misc/images/callouts/9.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/tag_bourne.png b/usr/src/lib/libshell/misc/images/tag_bourne.png
deleted file mode 100644
index f1f78e3a25..0000000000
--- a/usr/src/lib/libshell/misc/images/tag_bourne.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/tag_i18n.png b/usr/src/lib/libshell/misc/images/tag_i18n.png
deleted file mode 100644
index 559929df2a..0000000000
--- a/usr/src/lib/libshell/misc/images/tag_i18n.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/tag_ksh.png b/usr/src/lib/libshell/misc/images/tag_ksh.png
deleted file mode 100644
index b33d5a7aa1..0000000000
--- a/usr/src/lib/libshell/misc/images/tag_ksh.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/tag_ksh88.png b/usr/src/lib/libshell/misc/images/tag_ksh88.png
deleted file mode 100644
index d36dc0f5f5..0000000000
--- a/usr/src/lib/libshell/misc/images/tag_ksh88.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/tag_ksh93.png b/usr/src/lib/libshell/misc/images/tag_ksh93.png
deleted file mode 100644
index 357ee3c50a..0000000000
--- a/usr/src/lib/libshell/misc/images/tag_ksh93.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/tag_l10n.png b/usr/src/lib/libshell/misc/images/tag_l10n.png
deleted file mode 100644
index be89f7a163..0000000000
--- a/usr/src/lib/libshell/misc/images/tag_l10n.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/images/tag_perf.png b/usr/src/lib/libshell/misc/images/tag_perf.png
deleted file mode 100644
index fcb2960852..0000000000
--- a/usr/src/lib/libshell/misc/images/tag_perf.png
+++ /dev/null
Binary files differ
diff --git a/usr/src/lib/libshell/misc/shell_styleguide.docbook b/usr/src/lib/libshell/misc/shell_styleguide.docbook
deleted file mode 100644
index 0376912d1f..0000000000
--- a/usr/src/lib/libshell/misc/shell_styleguide.docbook
+++ /dev/null
@@ -1,1464 +0,0 @@
-<?xml version="1.0"?>
-<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V5.0//EN" "http://www.oasis-open.org/docbook/xml/5.0b5/dtd/docbook.dtd" [
- <!ENTITY tag_bourneonly '<inlinemediaobject><imageobject><imagedata fileref="images/tag_bourne.png"></imagedata></imageobject><textobject><phrase>[Bourne]</phrase></textobject></inlinemediaobject> '>
- <!ENTITY tag_kshonly '<inlinemediaobject><imageobject><imagedata fileref="images/tag_ksh.png"></imagedata></imageobject><textobject><phrase>[ksh]</phrase></textobject></inlinemediaobject> '>
- <!ENTITY tag_ksh88only '<inlinemediaobject><imageobject><imagedata fileref="images/tag_ksh88.png"></imagedata></imageobject><textobject><phrase>[ksh88]</phrase></textobject></inlinemediaobject> '>
- <!ENTITY tag_ksh93only '<inlinemediaobject><imageobject><imagedata fileref="images/tag_ksh93.png"></imagedata></imageobject><textobject><phrase>[ksh93]</phrase></textobject></inlinemediaobject> '>
- <!ENTITY tag_performance '<inlinemediaobject><imageobject><imagedata fileref="images/tag_perf.png"></imagedata></imageobject><textobject><phrase>[perf]</phrase></textobject></inlinemediaobject> '>
- <!ENTITY tag_i18n '<inlinemediaobject><imageobject><imagedata fileref="images/tag_i18n.png"></imagedata></imageobject><textobject><phrase>[i18n]</phrase></textobject></inlinemediaobject> '>
- <!ENTITY tag_l10n '<inlinemediaobject><imageobject><imagedata fileref="images/tag_l10n.png"></imagedata></imageobject><textobject><phrase>[l10n]</phrase></textobject></inlinemediaobject> '>
-]>
-<!--
-
- CDDL HEADER START
-
- The contents of this file are subject to the terms of the
- Common Development and Distribution License (the "License").
- You may not use this file except in compliance with the License.
-
- You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- or http://www.opensolaris.org/os/licensing.
- See the License for the specific language governing permissions
- and limitations under the License.
-
- When distributing Covered Code, include this CDDL HEADER in each
- file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- If applicable, add the following below this CDDL HEADER, with the
- fields enclosed by brackets "[]" replaced with your own identifying
- information: Portions Copyright [yyyy] [name of copyright owner]
-
- CDDL HEADER END
-
--->
-
-<!--
-
- Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- Use is subject to license terms.
-
--->
-
-<!-- tag images were created like this:
-$ (text="perf" ;
- pbmtext -nomargins -lspace 0 -builtin fixed "${text}" |
- pbmtopgm 1 1 |
- pgmtoppm 1.0,1.0,1.0-0,0,0 /dev/stdin |
- ppmtogif |
- giftopnm |
- pnmtopng >"tag_${text}.png")
--->
-
-<!-- compile with:
-xsltproc &minus;&minus;stringparam generate.section.toc.level 0 \
- &minus;&minus;stringparam toc.max.depth 3 \
- &minus;&minus;stringparam toc.section.depth 12 \
- &minus;&minus;xinclude -o opensolaris_shell_styleguide.html /usr/share/sgml/docbook/docbook-xsl-stylesheets-1.69.1/html/docbook.xsl opensolaris_shell_styleguide.docbook
--->
-
-<article
- xmlns:xlink="http://www.w3.org/1999/xlink"
- xmlns="http://docbook.org/ns/docbook"
- xml:lang="en">
- <!-- xmlns:xi="http://www.w3.org/2001/XInclude" -->
-
- <info>
- <title><emphasis>[DRAFT]</emphasis> Bourne/Korn Shell Coding Conventions</title>
-
- <!-- subtitle abuse -->
- <subtitle>
- This page is currently work-in-progress until it is approved by the OS/Net community. Please send any comments to
- <email>shell-discuss@opensolaris.org</email>.
- </subtitle>
-
-
- <authorgroup>
-<!--
- <author><personname>David G. Korn</personname><email>dgk@research.att.com</email></author>
- <author><personname>Roland Mainz</personname><email>roland.mainz@nrubsig.org</email></author>
- <author><personname>Mike Shapiro</personname><email>mike.shapiro@sun.com</email></author>
--->
- <author><orgname>OpenSolaris.org</orgname></author>
- </authorgroup>
- </info>
-
-<section xml:id="intro">
- <title>Intro</title>
- <para>This document describes the shell coding style used for all the SMF script changes integrated into (Open)Solaris.</para>
- <para>All new SMF shell code should conform to this coding standard, which is intended to match our existing C coding standard.</para>
- <para>When in doubt, think "what would be the C-Style equivalent ?" and "What does the POSIX (shell) standard say ?"</para>
-</section><!-- end of intro -->
-
-
-<section xml:id="rules">
- <title>Rules</title>
-
-
-
- <section xml:id="general">
- <title>General</title>
-
- <section xml:id="basic_format">
- <title>Basic Format</title>
- <para>Similar to <literal>cstyle</literal>, the basic format is that all
- lines are indented by TABs or eight spaces, and continuation lines (which
- in the shell end with "\") are indented by an equivalent number of TABs
- and then an additional four spaces, e.g.
-<programlisting>
-cp foo bar
-cp some_realllllllllllllllly_realllllllllllllly_long_path \
- to_another_really_long_path
-</programlisting>
- </para>
- <para>The encoding used for the shell scripts is either <literal>ASCII</literal>
- or <literal>UTF-8</literal>, alternative encodings are only allowed when the
- application requires this.</para>
- </section>
-
-
- <section xml:id="commenting">
- <title>Commenting</title>
- <para>Shell comments are preceded by the '<literal>#</literal>' character. Place
- single-line comments in the right-hand margin. Use an extra '<literal>#</literal>'
- above and below the comment in the case of multi-line comments:
-<programlisting>
-cp foo bar # Copy foo to bar
-
-#
-# Modify the permissions on bar. We need to set them to root/sys
-# in order to match the package prototype.
-#
-chown root bar
-chgrp sys bar
-</programlisting>
- </para>
- </section>
-
-
- <section xml:id="interpreter_magic">
- <title>Interpreter magic</title>
- <para>The proper interpreter magic for your shell script should be one of these:
-<programlisting>
-#!/bin/sh Standard Bourne shell script
-#!/bin/ksh -p Standard Korn shell 88 script. You should always write ksh
- scripts with -p so that ${ENV} (if set by the user) is not
- sourced into your script by the shell.
-#!/bin/ksh93 Standard Korn shell 93 script (-p is not needed since ${ENV} is
- only used for interactive shell sessions).
-</programlisting>
- </para>
- </section>
-
-
- <section xml:id="harden_your_script_against_unexpected_input">
- <title>Harden the script against unexpected (user) input</title>
- <para>Harden your script against unexpected (user) input, including
- command line options, filenames with blanks (or other special
- characters) in the name, or file input</para>
- </section>
-
-
- <section xml:id="use_builtin_commands">
- <title>&tag_kshonly;&tag_performance;Use builtin commands if the shell provides them</title>
- <para>
- Use builtin commands if the shell provides them. For example ksh93s+
- (ksh93, version 's+') delivered with Solaris (as defined by PSARC 2006/550)
- supports the following builtins:
- <simplelist type="inline">
- <member>basename</member>
- <member>cat</member>
- <member>chgrp</member>
- <member>chmod</member>
- <member>chown</member>
- <member>cmp</member>
- <member>comm</member>
- <member>cp</member>
- <member>cut</member>
- <member>date</member>
- <member>dirname</member>
- <member>expr</member>
- <member>fds</member>
- <member>fmt</member>
- <member>fold</member>
- <member>getconf</member>
- <member>head</member>
- <member>id</member>
- <member>join</member>
- <member>ln</member>
- <member>logname</member>
- <member>mkdir</member>
- <member>mkfifo</member>
- <member>mv</member>
- <member>paste</member>
- <member>pathchk</member>
- <member>rev</member>
- <member>rm</member>
- <member>rmdir</member>
- <member>stty</member>
- <member>tail</member>
- <member>tee</member>
- <member>tty</member>
- <member>uname</member>
- <member>uniq</member>
- <member>wc</member>
- <member>sync</member>
- </simplelist>
- Those builtins can be enabled via <literal>$ builtin name_of_builtin #</literal> in shell
- scripts (note that ksh93 builtins implement exact POSIX behaviour - some
- commands in Solaris <filename>/usr/bin/</filename> directory implement pre-POSIX behaviour.
- Add <literal>/usr/xpg6/bin/:/usr/xpg4/bin</literal> before
- <filename>/usr/bin/</filename> in <envar>${PATH}</envar> to test whether your script works with
- the XPG6/POSIX versions)
- </para>
- </section>
-
-
- <section xml:id="use_blocks_not_subshells">
- <title>&tag_performance;Use blocks and not subshells if possible</title>
- <para>Use blocks and not subshells if possible, e.g. use
- <literal>$ { print "foo" ; print "bar" ; }</literal> instead of
- <literal>$ (print "foo" ; print "bar") #</literal> - blocks are
- faster since they do not require to save the subshell context (ksh93) or
- trigger a shell child process (Bourne shell, bash, ksh88 etc.)
- </para>
- </section>
-
-
- <section xml:id="use_long_options_for_set_builtin">
- <title>&tag_kshonly; use long options for "<literal>set</literal>"</title>
- <para>use long options for "<literal>set</literal>", for example instead of <literal>$ set -x #</literal>
- use <literal>$ set -o xtrace #</literal> to make the code more readable.</para>
- </section>
-
-
- <section xml:id="use_posix_command_substitutions_syntax">
- <title>&tag_kshonly; Use <literal>$(...)</literal> instead of <literal>`...`</literal> command substitutions</title>
- <para>Use <literal>$(...)</literal> instead of <literal>`...`</literal> - <literal>`...`</literal>
- is an obsolete construct in ksh+POSIX sh scripts and <literal>$(...)</literal>.is a cleaner design,
- requires no escaping rules, allows easy nesting etc.</para>
-
- <note><title>&tag_ksh93only; <literal>${ ...;}</literal>-style command substitutions</title>
- <para>ksh93 has support for an alternative version of command substitutions with the
- syntax <literal>${ ...;}</literal> which do not run in a subshell.
- </para></note>
- </section>
-
-
- <section xml:id="put_command_substitution_result_in_quotes">
- <title>&tag_kshonly; Always put the result of a <literal>$(...)</literal> or
- <literal>$( ...;)</literal> command substitution in quotes</title>
- <para>Always put the result of <literal>$( ... )</literal> or <literal>$( ...;)</literal> in
- quotes (e.g. <literal>foo="$( ... )"</literal> or <literal>foo="$( ...;)"</literal>) unless
- there is a very good reason for not doing it</para>
- </section>
-
-
- <section xml:id="always_set_path">
- <title>Scripts should always set their <envar>PATH</envar></title>
- <para>Scripts should always set their <envar>PATH</envar> to make sure they do not use
- alternative commands by accident (unless the value of <envar>PATH</envar> is well-known
- and guaranteed to be set by the caller)</para>
- </section>
-
-
- <section xml:id="make_sure_commands_are_available">
- <title>Make sure that commands from other packages/applications are really installed on the machine</title>
- <para>Scripts should make sure that commands in optional packages are really
- there, e.g. add a "precheck" block in scipts to avoid later failure when
- doing the main job</para>
- </section>
-
-
- <section xml:id="check_usage_of_boolean_variables">
- <title>Check how boolean values are used/implemented in your application</title>
- <para>Check how boolean values are used in your application.</para>
- <para>For example:
-<programlisting>
-mybool=0
-# do something
-if [ $mybool -eq 1 ] ; then do_something_1 ; fi
-</programlisting>
-could be rewritten like this:
-<programlisting>
-mybool=false # (valid values are "true" or "false", pointing
-# to the builtin equivalents of /bin/true or /bin/false)
-# do something
-if ${mybool} ; then do_something_1 ; fi
-</programlisting>
-or
-<programlisting>
-integer mybool=0 # values are 0 or 1
-# do something
-if (( mybool==1 )) ; then do_something_1 ; fi
-</programlisting>
- </para>
- </section>
-
- <section xml:id="shell_uses_characters_not_bytes">
- <title>&tag_i18n;The shell always operates on <emphasis>characters</emphasis> not bytes</title>
- <para>Shell scripts operate on characters and <emphasis>not</emphasis> bytes.
- Some locales use multiple bytes (called "multibyte locales") to represent one character</para>
-
- <note><para>ksh93 has support for binary variables which explicitly
- operate on bytes, not characters. This is the <emphasis>only</emphasis> allowed
- exception.</para></note>
- </section>
-
-
- <section xml:id="multibyte_locale_input">
- <title>&tag_i18n;Multibyte locales and input</title>
- <para>Think about whether your application has to handle file names or
- variables in multibyte locales and make sure all commands used in your
- script can handle such characters (e.g. lots of commands in Solaris's
- <filename>/usr/bin/</filename> are <emphasis>not</emphasis> able to handle such values - either use ksh93
- builtin constructs (which are guaranteed to be multibyte-aware) or
- commands from <filename>/usr/xpg4/bin/</filename> and/or <filename>/usr/xpg6/bin</filename>)
- </para>
- </section>
-
-
- <section xml:id="use_external_filters_only_for_large_datasets">
- <title>&tag_performance;Only use external filters like <literal>grep</literal>/<literal>sed</literal>/<literal>awk</literal>/etc.
- if you want to process lots of data with them</title>
- <para>Only use external filters like <literal>grep</literal>/<literal>sed</literal>/<literal>awk</literal>/etc.
- if a significant amount of data is processed by the filter or if
- benchmarking shows that the use of builtin commands is significantly slower
- (otherwise the time and resources needed to start the filter are
- far greater then the amount of data being processed,
- creating a performance problem).</para>
- <para>For example:
-<programlisting>
-if [ "$(echo "$x" | egrep '.*foo.*')" != "" ] ; then
- do_something ;
-done
-</programlisting>
-can be re-written using ksh93 builtin constructs, saving several
-<literal>|fork()|+|exec()|</literal>'s:
-<programlisting>
-if [[ "${x}" == ~(E).*foo.* ]] ; then
- do_something ;
-done
-</programlisting>
- </para>
- </section>
-
-
- <section xml:id="use_dashdash_if_first_arg_is_variable">
- <title>If the first operand of a command is a variable, use <literal>--</literal></title>
- <para>If the first operand of a command is a variable, use <literal>--</literal>
- for any command that accepts this as end of argument to
- avoid problems if the variable expands to a value starting with <literal>-</literal>.
- </para>
- <note><para>
- At least
- <simplelist type="inline">
- <member>print</member>
- <member>/usr/bin/fgrep</member><member>/usr/xpg4/bin/fgrep</member>
- <member>/usr/bin/grep</member> <member>/usr/xpg4/bin/grep</member>
- <member>/usr/bin/egrep</member><member>/usr/xpg4/bin/egrep</member>
- </simplelist>
- support <literal>--</literal> as "end of arguments"-terminator.
- </para></note>
- </section>
-
- <section xml:id="use_export">
- <title>&tag_kshonly;&tag_performance;Use <literal>$ export FOOBAR=val #</literal> instead of
- <literal>$ FOOBAR=val ; export FOOBAR #</literal></title>
- <para>Use <literal>$ export FOOBAR=val # instead of $ FOOBAR=val ; export FOOBAR #</literal> -
- this is much faster.</para>
- </section>
-
-
- <section xml:id="use_subshell_around_set_dashdash_usage">
- <title>Use a subshell (e.g. <literal>$ ( mycmd ) #</literal>) around places which use
- <literal>set -- $(mycmd)</literal> and/or <literal>shift</literal></title>
- <para>Use a subshell (e.g. <literal>$ ( mycmd ) #</literal>) around places which use
- <literal>set -- $(mycmd)</literal> and/or <literal>shift</literal> unless the variable
- affected is either a local one or if it's guaranteed that this variable will no longer be used
- (be careful for loadable functions, e.g. ksh/ksh93's <literal>autoload</literal> !!!!)
- </para>
- </section>
-
-
- <section xml:id="be_careful_with_tabs_in_script_code">
- <title>Be careful with using TABS in script code, they are not portable
- between editors or platforms</title>
- <para>Be careful with using TABS in script code, they are not portable
- between editors or platforms.</para>
- <para>If you use ksh93 use <literal>$'\t'</literal> to include TABs in sources, not the TAB character itself.</para>
- </section>
-
-
- <section xml:id="centralise_error_exit">
- <title>If you have multiple points where your application exits with an error
- message create a central function for this purpose</title>
- <para>If you have multiple points where your application exits with an error
- message create a central function for this, e.g.
-<programlisting>
-if [ -z "$tmpdir" ] ; then
- print -u2 "mktemp failed to produce output; aborting."
- exit 1
-fi
-if [ ! -d $tmpdir ] ; then
- print -u2 "mktemp failed to create a directory; aborting."
- exit 1
-fi
-</programlisting>
-should be replaced with
-<programlisting>
-function fatal_error
-{
- print -u2 "${progname}: $*"
- exit 1
-}
-# do something (and save ARGV[0] to variable "progname")
-if [ -z "$tmpdir" ] ; then
- fatal_error "mktemp failed to produce output; aborting."
-fi
-if [ ! -d "$tmpdir" ] ; then
- fatal_error "mktemp failed to create a directory; aborting."
-fi
-</programlisting>
- </para>
- </section>
-
-
- <section xml:id="use_set_o_nounset">
- <title>&tag_kshonly; Think about using <literal>$ set -o nounset #</literal> by default</title>
- <para>Think about using <literal>$ set -o nounset #</literal> by default (or at least during the
- script's development phase) to catch errors where variables are used
- when they are not set (yet), e.g.
-<screen>
-$ <userinput>(set -o nounset ; print ${foonotset})</userinput>
-<computeroutput>/bin/ksh93: foonotset: parameter not set</computeroutput>
-</screen>
- </para>
- </section>
-
-
- <section xml:id="avoid_eval_builtin">
- <title>Avoid using <literal>eval</literal> unless absolutely necessary</title>
- <para>Avoid using <literal>eval</literal> unless absolutely necessary. Subtle things
- can happen when a string is passed back through the shell
- parser. You can use name references to avoid uses such as
- <literal>eval $name="$value"</literal>.
- </para>
- </section>
-
-
- <section xml:id="use_concatenation_operator">
- <title>&tag_ksh93only;Use the string/array concatenation operator <literal>+=</literal></title>
- <para>Use <literal>+=</literal> instead of manually adding strings/array elements, e.g.
-<programlisting>
-foo=""
-foo="${foo}a"
-foo="${foo}b"
-foo="${foo}c"
-</programlisting>
-should be replaced with
-<programlisting>
-foo=""
-foo+="a"
-foo+="b"
-foo+="c"
-</programlisting>
- </para>
- </section>
-
- <section xml:id="use_source_not_dot">
- <title>&tag_ksh93only;Use <literal>source</literal> instead of '<literal>.</literal> '(dot)
- to include other shell script fragments</title>
- <para>Use <literal>source</literal> instead of '<literal>.</literal>'
- (dot) to include other shell script fragments - the new form is much
- more readable than the tiny dot and a failure can be caught within the script.</para>
- </section>
-
-
- <section xml:id="use_builtin_localisation_support">
- <title>&tag_ksh93only;&tag_performance;&tag_l10n;Use <literal>$"..."</literal> instead of
- <literal>gettext ... "..."</literal> for strings that need to be localized for different locales</title>
- <para>Use $"..." instead of <literal>gettext ... "..."</literal> for strings that need to be
- localized for different locales. <literal>gettext</literal> will require a
- <literal>fork()+exec()</literal> and
- reads the whole catalog each time it's called, creating a huge overhead for localisation
- (and the <literal>$"..."</literal> is easier to use, e.g. you only have to put a
- <literal>$</literal> in front of the catalog and the string will be localised).
- </para>
- </section>
-
-
- <section xml:id="use_set_o_noglob">
- <title>&tag_kshonly;&tag_performance;Use <literal>set -o noglob</literal> if you do not need to expand files</title>
- <para>If you don't expect to expand files, you can do set <literal>-f</literal>
- (<literal>set -o noglob</literal>) as well. This way the need to use <literal>""</literal> is
- greatly reduced.</para>
- </section>
-
-
- <section xml:id="use_empty_ifs_to_handle_spaces">
- <title>&tag_ksh93only;Use <literal>IFS=</literal> to avoid problems with spaces in filenames</title>
- <para>Unless you want to do word splitting, put <literal>IFS=</literal>
- at the beginning of a command. This way spaces in
- file names won't be a problem. You can do
- <literal>IFS='delims' read -r</literal> line
- to override <envar>IFS</envar> just for the <literal>read</literal> command. However,
- you can't do this for the <literal>set</literal> builtin.</para>
- </section>
-
-
- <section xml:id="set_locale_when_comparing_against_localised_output">
- <title>Set the message locale if you process output of tools which may be localised</title>
- <para>Set the message locale (<envar>LC_MESSAGES</envar>) if you process output of tools which may be localised</para>
- <example><title>Set <envar>LC_MESSAGES</envar> when testing for specific outout of the <filename>/usr/bin/file</filename> utility:</title>
-<programlisting>
-# set french as default message locale
-export LC_MESSAGES=fr_FR.UTF-8
-
-...
-
-# test whether the file "/tmp" has the filetype "directory" or not
-# we set LC_MESSAGES to "C" to ensure the returned message is in english
-if [[ "$(LC_MESSAGES=C file /tmp)" = *directory ]] ; then
- print "is a directory"
-fi
-</programlisting>
- <note><para>The environment variable <envar>LC_ALL</envar> always
- overrides any other <envar>LC_*</envar> environment variables
- (and <envar>LANG</envar>, too),
- including <envar>LC_MESSAGES</envar>.
- if there is the chance that <envar>LC_ALL</envar> may be set
- replace <envar>LC_MESSAGES</envar> with <envar>LC_ALL</envar>
- in the example above.</para></note>
- </example>
- </section>
-
- <section xml:id="cleanup_after_yourself">
- <title>Cleanup after yourself.</title>
- <para>Cleanup after yourself. For example ksh/ksh93 have an <literal>EXIT</literal> trap which
- is very useful for this.
- </para>
- <note><para>
- Note that the <literal>EXIT</literal> trap is executed for a subshell and each subshell
- level can run it's own <literal>EXIT</literal> trap, for example
-<screen>
-$ <userinput>(trap "print bam" EXIT ; (trap "print snap" EXIT ; print "foo"))</userinput>
-<computeroutput>foo
-snap
-bam</computeroutput>
-</screen>
- </para></note>
- </section>
-
- <section xml:id="use_proper_exit_code">
- <title>Use a proper <literal>exit</literal> code</title>
- <para>Explicitly set the exit code of a script, otherwise the exit code
- from the last command executed will be used which may trigger problems
- if the value is unexpected.</para>
- </section>
-
-
- <section xml:id="shell_lint">
- <title>&tag_ksh93only;Use <literal>shcomp -n scriptname.sh /dev/null</literal> to check for common errors</title>
- <para>Use <literal>shcomp -n scriptname.sh /dev/null</literal> to
- check for common problems (such as insecure, depreciated or ambiguous constructs) in shell scripts.</para>
- </section>
- </section><!-- end of general -->
-
-
-
-
-
- <section xml:id="functions">
- <title>Functions</title>
-
- <section xml:id="use_functions">
- <title>Use functions to break up your code</title>
- <para>Use functions to break up your code into smaller, logical blocks.</para>
- </section>
-
- <section xml:id="do_not_reserved_keywords_for_function_names">
- <title>Do not use function names which are reserved keywords in C/C++/JAVA or the POSIX shell standard</title>
- <para>Do not use function names which are reserved keywords (or function names) in C/C++/JAVA or the POSIX shell standard
- (to avoid confusion and/or future changes/updates to the shell language).
- </para>
- </section>
-
- <section xml:id="use_ksh_style_function_syntax">
- <title>&tag_kshonly;&tag_performance;Use ksh-style <literal>function</literal></title>
- <para>It is <emphasis>highly</emphasis> recommended to use ksh style functions
- (<literal>function foo { ... }</literal>) instead
- of Bourne-style functions (<literal>foo() { ... }</literal>) if possible
- (and local variables instead of spamming the global namespace).</para>
-
- <warning><para>
- The difference between old-style Bourne functions and ksh functions is one of the major differences
- between ksh88 and ksh93 - ksh88 allowed variables to be local for Bourne-style functions while ksh93
- conforms to the POSIX standard and will use a function-local scope for variables declared in
- Bourne-style functions.</para>
- <para>Example (note that "<literal>integer</literal>" is an alias for "<literal>typeset -li</literal>"):
-<programlisting>
-# new style function with local variable
-$ ksh93 -c 'integer x=2 ; function foo { integer x=5 ; } ; print "x=$x"
-; foo ; print "x=$x" ;'
-x=2
-x=2
-# old style function with an attempt to create a local variable
-$ ksh93 -c 'integer x=2 ; foo() { integer x=5 ; } ; print "x=$x" ; foo ;
-print "x=$x" ;'
-x=2
-x=5
-</programlisting>
-
- <uri xlink:href="http://www.opensolaris.org/os/project/ksh93-integration/docs/ksh93r/general/compatibility/">usr/src/lib/libshell/common/COMPATIBILITY</uri>
- says about this issue:
-<blockquote><para>
-Functions, defined with name() with ksh-93 are compatible with
-the POSIX standard, not with ksh-88. No local variables are
-permitted, and there is no separate scope. Functions defined
-with the function name syntax, maintain compatibility.
-This also affects function traces.
-</para></blockquote>
-(this issue also affects <filename>/usr/xpg4/bin/sh</filename> in Solaris 10 because it is based on ksh88. This is a bug.).
- </para></warning>
-
- </section>
-
-
- <section xml:id="use_proper_return_code">
- <title>Use a proper <literal>return</literal> code</title>
- <para>Explicitly set the return code of a function - otherwise the exit code
- from the last command executed will be used which may trigger problems
- if the value is unexpected.</para>
- <para>The only allowed exception is if a function uses the shell's <literal>errexit</literal> mode to leave
- a function, subshell or the script if a command returns a non-zero exit code.
- </para>
- </section>
-
- <section xml:id="use_fpath_to_load_common_code">
- <title>&tag_kshonly;Use <envar>FPATH</envar> to load common functions, not <literal>source</literal></title>
- <para>
- Use the ksh <envar>FPATH</envar> (function path) feature to load functions which are shared between scripts
- and not <literal>source</literal> - this allows to load such a function on demand and not all at once.</para>
- </section>
-
- </section><!-- end of functions -->
-
-
-
-
- <section xml:id="if_for_while">
- <title><literal>if</literal>, <literal>for</literal> and <literal>while</literal></title>
-
- <section xml:id="if_for_while_format">
- <title>Format</title>
- <para>To match <literal>cstyle</literal>, the shell token equivalent to the <literal>C</literal>
- "<literal>{</literal>" should appear on the same line, separated by a
- "<literal>;</literal>", as in:
-<programlisting>
-if [ "$x" = "hello" ] ; then
- echo $x
-fi
-
-if [[ "$x" = "hello" ]] ; then
- print $x
-fi
-
-for i in 1 2 3; do
- echo $i
-done
-
-for ((i=0 ; i &lt; 3 ; i++)); do
- print $i
-done
-
-while [ $# -gt 0 ]; do
- echo $1
- shift
-done
-
-while (( $# &gt; 0 )); do
- print $1
- shift
-done
-</programlisting>
- </para>
- </section>
-
-
- <section xml:id="test_builtin">
- <title><literal>test</literal> Builtin</title>
- <para>DO NOT use the test builtin. Sorry, executive decision.</para>
- <para>In our Bourne shell, the <literal>test</literal> built-in is the same as the "["
- builtin (if you don't believe me, try "type test" or refer to <filename>usr/src/cmd/sh/msg.c</filename>).</para>
- <para>
- So please do not write:
-<programlisting>
-if test $# -gt 0 ; then
-</programlisting>
-instead use:
-<programlisting>
-if [ $# -gt 0 ] ; then
-</programlisting>
- </para>
- </section>
-
-
- <section xml:id="use_ksh_test_syntax">
- <title>&tag_kshonly;&tag_performance;Use "<literal>[[ expr ]]</literal>" instead of "<literal>[ expr ]</literal>"</title>
- <para>Use "<literal>[[ expr ]]</literal>" instead of "<literal>[ expr ]</literal>" if possible
- since it avoids going through the whole pattern expansion/etc. machinery and
- adds additional operators not available in the Bourne shell, such as short-circuit
- <literal>&amp;&amp;</literal> and <literal>||</literal>.
- </para>
- </section>
-
-
- <section xml:id="use_posix_arithmetic_expressions">
- <title>&tag_kshonly; Use "<literal>(( ... ))</literal>" for arithmetic expressions</title>
- <para>Use "<literal>(( ... ))</literal>" instead of "<literal>[ expr ]</literal>"
- or "<literal>[[ expr ]]</literal>" expressions.
- </para>
- <para>
- Example: Replace
-<programlisting>
-i=5
-# do something
-if [ $i -gt 5 ] ; then
-</programlisting>
-with
-<programlisting>
-i=5
-# do something
-if (( i &gt; 5 )) ; then
-</programlisting>
- </para>
- </section>
-
-
- <section xml:id="compare_exit_code_using_math">
- <title>&tag_kshonly;&tag_performance;Compare exit code using arithmetic expressions expressions</title>
- <para>Use POSIX arithmetic expressions to test for exit/return codes of commands and functions.
- For example turn
-<programlisting>
-if [ $? -gt 0 ] ; then
-</programlisting>
-into
-<programlisting>
-if (( $? &gt; 0 )) ; then
-</programlisting>
- </para>
- </section>
-
-
- <section xml:id="use_builtin_commands_in_loops">
- <title>&tag_bourneonly; Use builtin commands in conditions for <literal>while</literal> endless loops</title>
- <para>Make sure that your shell has a "<literal>true</literal>" builtin (like ksh93) when
- executing endless loops like <literal>$ while true ; do do_something ; done #</literal> -
- otherwise each loop cycle runs a <literal>|fork()|+|exec()|</literal>-cycle to run
- <filename>/bin/true</filename>
- </para>
- </section>
-
-
- <section xml:id="single_line_if_statements">
- <title>Single-line if-statements</title>
- <para>It is permissible to use <literal>&amp;&amp;</literal> and <literal>||</literal> to construct
- shorthand for an "<literal>if</literal>" statement in the case where the if statement has a
- single consequent line:
-<programlisting>
-[ $# -eq 0 ] &amp;&amp; exit 0
-</programlisting>
-instead of the longer:
-<programlisting>
-if [ $# -eq 0 ]; then
- exit 0
-fi
-</programlisting>
- </para>
- </section>
-
-
- <section xml:id="exit_status_and_if_for_while">
- <title>Exit Status and <literal>if</literal>/<literal>while</literal> statements</title>
- <para>Recall that "<literal>if</literal>" and "<literal>while</literal>"
- operate on the exit status of the statement
- to be executed. In the shell, zero (0) means true and non-zero means false.
- The exit status of the last command which was executed is available in the $?
- variable. When using "<literal>if</literal>" and "<literal>while</literal>",
- it is typically not necessary to use
- <literal>$?</literal> explicitly, as in:
-<programlisting>
-grep foo /etc/passwd &gt;/dev/null 2>&amp;1
-if [ $? -eq 0 ]; then
- echo "found"
-fi
-</programlisting>
-Instead, you can more concisely write:
-<programlisting>
-if grep foo /etc/passwd &gt;/dev/null 2>&amp;1; then
- echo "found"
-fi
-</programlisting>
-Or, when appropriate:
-<programlisting>
-grep foo /etc/passwd &gt;/dev/null 2>&amp;1 &amp;&amp; echo "found"
-</programlisting>
- </para>
- </section>
-
- </section><!-- end of if/for/while -->
-
-
-
-
-
-
- <section xml:id="variables">
- <title>Variable types, naming and usage</title>
-
- <section xml:id="names_should_be_lowercase">
- <title>Names of local, non-environment, non-constant variables should be lowercase</title>
- <para>Names of variables local to the current script which are not exported to the environment
- should be lowercase while variable names which are exported to the
- environment should be uppercase.</para>
- <para>The only exception are global constants (=global readonly variables,
- e.g. <literal>$ float -r M_PI=3.14159265358979323846 #</literal> (taken from &lt;math.h&gt;))
- which may be allowed to use uppercase names, too.
- </para>
-
- <warning><para>
- Uppercase variable names should be avoided because there is a good chance
- of naming collisions with either special variable names used by the shell
- (e.g. <literal>PWD</literal>, <literal>SECONDS</literal> etc.).
- </para></warning>
- </section>
-
- <section xml:id="do_not_reserved_keywords_for_variable_names">
- <title>Do not use variable names which are reserved keywords/variable names in C/C++/JAVA or the POSIX shell standard</title>
- <para>Do not use variable names which are reserved keywords in C/C++/JAVA or the POSIX shell standard
- (to avoid confusion and/or future changes/updates to the shell language).
- </para>
- <note>
- <para>The Korn Shell and the POSIX shell standard have many more
- reserved variable names than the original Bourne shell. All
- these reserved variable names are spelled uppercase.
- </para>
- </note>
- </section>
-
- <section xml:id="use_brackets_around_long_names">
- <title>Always use <literal>'{'</literal>+<literal>'}'</literal> when using variable
- names longer than one character</title>
- <para>Always use <literal>'{'</literal>+<literal>'}'</literal> when using
- variable names longer than one character unless a simple variable name is
- followed by a blank, <literal>/</literal>, <literal>;</literal>, or <literal>$</literal>
- character (to avoid problems with array,
- compound variables or accidental misinterpretation by users/shell)
-<programlisting>
-print "$foo=info"
-</programlisting>
-should be rewritten to
-<programlisting>
-print "${foo}=info"
-</programlisting>
- </para>
- </section>
-
-
- <section xml:id="quote_variables_containing_filenames_or_userinput">
- <title><emphasis>Always</emphasis> put variables into quotes when handling filenames or user input</title>
- <para><emphasis>Always</emphasis> put variables into quotes when handling filenames or user input, even if
- the values are hardcoded or the values appear to be fixed. Otherwise at
- least two things may go wrong:
- <itemizedlist>
- <listitem><para>a malicious user may be able to exploit a script's inner working to
- infect his/her own code</para></listitem>
- <listitem><para>a script may (fatally) misbehave for unexpected input (e.g. file names
- with blanks and/or special symbols which are interpreted by the shell)</para></listitem>
- </itemizedlist>
- </para>
-
- <note><para>
- As alternative a script may set <literal>IFS='' ; set -o noglob</literal> to turn off the
- interpretation of any field seperators and the pattern globbing.
- </para></note>
- </section>
-
-
-
- <section xml:id="use_typed_variables">
- <title>&tag_kshonly;&tag_performance;Use typed variables if possible.</title>
- <para>For example the following is very
- inefficient since it transforms the integer values to strings and back
- several times:
-<programlisting>
-a=0
-b=1
-c=2
-# more code
-if [ $a -lt 5 -o $b -gt c ] ; then do_something ; fi
-</programlisting>
-This could be rewritten using ksh constructs:
-<programlisting>
-integer a=0
-integer b=1
-integer c=2
-# more code
-if (( a &lt; 5 || b &gt; c )) ; then do_something ; fi
-</programlisting>
- </para>
- </section>
-
-
- <section xml:id="store_lists_in_arrays">
- <title>&tag_ksh93only; Store lists in arrays or associative arrays</title>
- <para>Store lists in arrays or associative arrays - this is usually easier
- to manage.</para>
- <para>
- For example:
-<programlisting>
-x="
-/etc/foo
-/etc/bar
-/etc/baz
-"
-echo $x
-</programlisting>
-can be replaced with
-<programlisting>
-typeset -a mylist
-mylist[0]="/etc/foo"
-mylist[1]="/etc/bar"
-mylist[2]="/etc/baz"
-print "${mylist[@]}"
-</programlisting>
-or (ksh93-style append entries to a normal (non-associative) array)
-<programlisting>
-typeset -a mylist
-mylist+=( "/etc/foo" )
-mylist+=( "/etc/bar" )
-mylist+=( "/etc/baz" )
-print "${mylist[@]}"
-</programlisting>
- </para>
- <note>
- <title>Difference between expanding arrays with mylist[@] and mylist[*] subscript operators</title>
- <para>
- Arrays may be expanded using two similar subscript operators, @ and *. These subscripts
- differ only when the variable expansion appears within double quotes. If the variable expansion
- is between double-quotes, "${mylist[*]}" expands to a single string with the value of each array
- member separated by the first character of the <envar>IFS</envar> variable, and "${mylist[@]}"
- expands each element of name to a separate string.
- </para>
- <example><title>Difference between [@] and [*] when expanding arrays</title>
-<programlisting>
-typeset -a mylist
-mylist+=( "/etc/foo" )
-mylist+=( "/etc/bar" )
-mylist+=( "/etc/baz" )
-IFS=","
-printf "mylist[*]={ 0=|%s| 1=|%s| 2=|%s| 3=|%s| }\n" "${mylist[*]}"
-printf "mylist[@]={ 0=|%s| 1=|%s| 2=|%s| 3=|%s| }\n" "${mylist[@]}"
-</programlisting>
-<para>will print:</para>
-<screen>
-<computeroutput>mylist[*]={ 0=|/etc/foo,/etc/bar,/etc/baz| 1=|| 2=|| 3=|| }
-mylist[@]={ 0=|/etc/foo| 1=|/etc/bar| 2=|/etc/baz| 3=|| }
-</computeroutput>
-</screen>
- </example>
- </note>
- </section>
-
-
- <section xml:id="use_compound_variables_or_lists_for_grouping">
- <title>&tag_ksh93only; Use compound variables or associative arrays to group similar variables together</title>
- <para>Use compound variables or associative arrays to group similar variables together.</para>
- <para>
- For example:
-<programlisting>
-box_width=56
-box_height=10
-box_depth=19
-echo "${box_width} ${box_height} ${box_depth}"
-</programlisting>
-could be rewritten to ("associative array"-style)
-<programlisting>
-typeset -A -E box=( [width]=56 [height]=10 [depth]=19 )
-print -- "${box[width]} ${box[height]} ${box[depth]}"
-</programlisting>
-or ("compound variable"-style
-<programlisting>
-box=(
- float width=56
- float height=10
- float depth=19
- )
-print -- "${box.width} ${box.height} ${box.depth}"
-</programlisting>
- </para>
- </section>
- </section><!-- end of variables -->
-
-
-
-
-
-
-
- <section xml:id="io">
- <title>I/O</title>
-
- <section xml:id="avoid_echo">
- <title>Avoid using the "<literal>echo</literal>" command for output</title>
- <para>The behaviour of "<literal>echo</literal>" is not portable
- (e.g. System V, BSD, UCB and ksh93/bash shell builtin versions all
- slightly differ in functionality) and should be avoided if possible.
- POSIX defines the "<literal>printf</literal>" command as replacement
- which provides more flexible and portable behaviour.</para>
-
- <note>
- <title>&tag_kshonly;Use "<literal>print</literal>" and not "<literal>echo</literal>" in Korn Shell scripts</title>
- <para>Korn shell scripts should prefer the "<literal>print</literal>"
- builtin which was introduced as replacement for "<literal>echo</literal>".</para>
- <caution>
- <para>Use <literal>$ print -- ${varname}" #</literal> when there is the slightest chance that the
- variable "<literal>varname</literal>" may contain symbols like "-". Or better use "<literal>printf</literal>"
- instead, for example
-<programlisting>
-integer fx
-# do something
-print $fx
-</programlisting>
-may fail if "f" contains a negative value. A better way may be to use
-<programlisting>
-integer fx
-# do something
-printf "%d\n" fx
-</programlisting>
- </para>
- </caution>
- </note>
- </section>
-
- <section xml:id="use_redirect_not_exec_to_open_files">
- <title>&tag_ksh93only;Use <literal>redirect</literal> and not <literal>exec</literal> to open files</title>
- <para>Use <literal>redirect</literal> and not <literal>exec</literal> to open files - <literal>exec</literal>
- will terminate the current function or script if an error occurs while <literal>redirect</literal>
- just returns a non-zero exit code which can be caught.</para>
-<para>Example:
-<programlisting>
-if redirect 5&lt;/etc/profile ; then
- print "file open ok"
- head &lt;&amp;5
-else
- print "could not open file"
-fi
-</programlisting>
- </para>
- </section>
-
- <section xml:id="group_identical_redirections_together">
- <title>&tag_performance;Avoid redirections per command when the output goes into the same file,
- e.g. <literal>$ echo "foo" &gt;xxx ; echo "bar" &gt;&gt;xxx ; echo "baz" &gt;&gt;xxx #</literal></title>
- <para>Each of the redirections above trigger an
- <literal>|open()|,|write()|,|close()|</literal>-sequence. It is much
- more efficient (and faster) to group the rediction into a block,
- e.g. <literal>{ echo "foo" ; echo "bar" ; echo "baz" } &gt;xxx #</literal></para>
- </section>
-
-
- <section xml:id="avoid_using_temporary_files">
- <title>&tag_performance;Avoid the creation of temporary files and store the values in variables instead</title>
- <para>Avoid the creation of temporary files and store the values in variables instead if possible</para>
- <para>
- Example:
-<programlisting>
-ls -1 &gt;xxx
-for i in $(cat xxx) ; do
- do_something ;
-done
-</programlisting>
-can be replaced with
-<programlisting>
-x="$(ls -1)"
-for i in ${x} ; do
- do_something ;
-done
-</programlisting>
- </para>
- <note><para>ksh93 supports binary variables (e.g. <literal>typeset -b varname</literal>) which can hold any value.</para></note>
- </section>
-
-
- <section xml:id="create_subdirs_for_multiple_temporary_files">
- <title>If you create more than one temporary file create an unique subdir</title>
- <para>If you create more than one temporary file create an unique subdir for
- these files and make sure the dir is writable. Make sure you cleanup
- after yourself (unless you are debugging).
- </para>
- </section>
-
-
- <section xml:id="use_dynamic_file_descriptors">
- <title>&tag_ksh93only;Use {n}&lt;file instead of fixed file descriptor numbers</title>
- <para>When opening a file use {n}&lt;file, where <envar>n</envar> is an
- integer variable rather than specifying a fixed descriptor number.</para>
- <para>This is highly recommended in functions to avoid that fixed file
- descriptor numbers interfere with the calling script.</para>
-<example><title>Open a network connection and store the file descriptor number in a variable</title>
-<programlisting>
-function cat_http
-{
- integer netfd
-
-...
-
- # open TCP channel
- redirect {netfd}&lt;&gt;"/dev/tcp/${host}/${port}"
-
- # send HTTP request
- request="GET /${path} HTTP/1.1\n"
- request+="Host: ${host}\n"
- request+="User-Agent: demo code/ksh93 (2007-08-30; $(uname -s -r -p))\n"
- request+="Connection: close\n"
- print "${request}\n" &gt;&amp;${netfd}
-
- # collect response and send it to stdout
- cat &lt;&amp;${netfd}
-
- # close connection
- exec {netfd}&lt;&amp;-
-
-...
-
-}
-</programlisting>
-</example>
- </section>
-
-
- <section xml:id="use_inline_here_documents">
- <title>&tag_ksh93only;&tag_performance;Use inline here documents
- instead of <literal>echo "$x" | command</literal></title>
- <para>Use inline here documents, for example
-<programlisting>
-command &lt;&lt;&lt; $x
-</programlisting>
- rather than
-<programlisting>
-print -r -- "$x" | command
-</programlisting>
- </para>
- </section>
-
-
- <section xml:id="use_read_r">
- <title>&tag_ksh93only;Use the <literal>-r</literal> option of <literal>read</literal> to read a line</title>
- <para>Use the <literal>-r</literal> option of <literal>read</literal> to read a line.
- You never know when a line will end in <literal>\</literal> and without a
- <literal>-r</literal> multiple
- lines can be read.</para>
- </section>
-
-
- <section xml:id="print_compound_variables_using_print_C">
- <title>&tag_ksh93only;Print compound variables using <literal>print -C varname</literal> or <literal>print -v varname</literal></title>
- <para>Print compound variables using <literal>print -C varname</literal> or
- <literal>print -v varname</literal> to make sure that non-printable characters
- are correctly encoded.</para>
-<example><title>Print compound variable with non-printable characters</title>
-<programlisting>
-compound x=(
- a=5
- b="hello"
- c=(
- d=9
- e="$(printf "1\v3")" <co xml:id="co.vertical_tab1" />
- )
-)
-print -v x
-</programlisting>
-<para>will print:</para>
-<screen>
-<computeroutput>(
- a=5
- b=hello
- c=(
- d=9
- e=$'1\0133' <co xml:id="co.vertical_tab2" />
- )
-)</computeroutput>
-</screen>
-<calloutlist>
- <callout arearefs="co.vertical_tab1 co.vertical_tab2">
- <para>vertical tab, <literal>\v</literal>, octal=<literal>\013</literal>.</para>
- </callout>
-</calloutlist>
-</example>
- </section>
-
- <section xml:id="command_name_before_redirections">
- <title>Put the command name and arguments before redirections</title>
- <para>Put the command name and arguments before redirections.
- You can legally do <literal>$ &gt; file date</literal> instead of <literal>date &gt; file</literal>
- but don't do it.</para>
- </section>
-
- <section xml:id="enable_gmacs_editor_mode_for_user_prompts">
- <title>&tag_ksh93only;Enable the <literal>gmacs</literal> editor
- mode when reading user input using the <literal>read</literal> builtin</title>
- <para>Enable the <literal>gmacs</literal>editor mode before reading user
- input using the <literal>read</literal> builtin to enable the use of
- cursor+backspace+delete keys in the edit line</para>
-<example><title>Prompt user for a string with gmacs editor mode enabled</title>
-<programlisting>
-set -o gmacs <co xml:id="co.enable_gmacs" />
-typeset inputstring="default value"
-...
-read -v<co xml:id="co.read_v" /> inputstring<co xml:id="co.readvar" />?"Please enter a string: "<co xml:id="co.prompt" />
-...
-printf "The user entered the following string: '%s'\n" "${inputstring}"
-
-...
-</programlisting>
-<calloutlist>
- <callout arearefs="co.enable_gmacs">
- <para>Enable gmacs editor mode.</para>
- </callout>
- <callout arearefs="co.read_v">
- <para>The value of the variable is displayed and used as a default value.</para>
- </callout>
- <callout arearefs="co.readvar">
- <para>Variable used to store the result.</para>
- </callout>
- <callout arearefs="co.prompt">
- <para>Prompt string which is displayed in stderr.</para>
- </callout>
-</calloutlist>
-</example>
- </section>
- </section><!-- end of I/O -->
-
-
-
-
-
-
- <section xml:id="math">
- <title>Math</title>
-
- <section xml:id="use_builtin_arithmetic_expressions">
- <title>&tag_kshonly;&tag_performance;Use builtin arithmetic expressions instead of external applications</title>
- <para>Use builtin (POSIX shell) arithmetic expressions instead of
- <filename>expr</filename>,
- <filename>bc</filename>,
- <filename>dc</filename>,
- <filename>awk</filename>,
- <filename>nawk</filename> or
- <filename>perl</filename>.
- </para>
- <note>
- <para>ksh93 supports C99-like floating-point arithmetic including special values
- such as
- <simplelist type="inline">
- <member>+Inf</member>
- <member>-Inf</member>
- <member>+NaN</member>
- <member>-NaN</member>
- </simplelist>.
- </para>
- </note>
- </section>
-
-
- <section xml:id="use_floating_point_arithmetic_expressions">
- <title>&tag_ksh93only; Use floating-point arithmetic expressions if
- calculations may trigger a division by zero or other exceptions</title>
- <para>Use floating-point arithmetic expressions if calculations may
- trigger a division by zero or other exceptions - floating point arithmetic expressions in
- ksh93 support special values such as <literal>+Inf</literal>/<literal>-Inf</literal> and
- <literal>+NaN</literal>/<literal>-NaN</literal> which can greatly simplify testing for
- error conditions, e.g. instead of a <literal>trap</literal> or explicit
- <literal>if ... then... else</literal> checks for every sub-expression
- you can check the results for such special values.
- </para>
- <para>Example:
-<screen>
-$ <userinput>ksh93 -c 'integer i=0 j=5 ; print -- "x=$((j/i)) "'</userinput>
-<computeroutput>ksh93: line 1: j/i: divide by zero</computeroutput>
-$ <userinput>ksh93 -c 'float i=0 j=-5 ; print -- "x=$((j/i)) "'</userinput>
-<computeroutput>x=-Inf</computeroutput>
-</screen>
- </para>
- </section>
-
-
- <section xml:id="use_printf_a_for_passing_float_values">
- <title>&tag_ksh93only; Use <literal>printf "%a"</literal> when passing floating-point values</title>
- <para>Use <literal>printf "%a"</literal> when passing floating-point values between scripts or
- as output of a function to avoid rounding errors when converting between
- bases.</para>
- <para>
- Example:
-<programlisting>
-function xxx
-{
- float val
-
- (( val=sin(5.) ))
- printf "%a\n" val
-}
-float out
-(( out=$(xxx) ))
-xxx
-print -- $out
-</programlisting>
-This will print:
-<programlisting>
--0.9589242747
--0x1.eaf81f5e09933226af13e5563bc6p-01
-</programlisting>
- </para>
- </section>
-
-
- <section xml:id="put_constants_into_readonly_variables">
- <title>&tag_kshonly;&tag_performance;Put constant values into readonly variables</title>
- <para>Put constant values into readonly variables</para>
- <para>For example:
-<programlisting>
-float -r M_PI=3.14159265358979323846
-</programlisting>
-or
-<programlisting>
-float M_PI=3.14159265358979323846
-readonly M_PI
-</programlisting>
- </para>
- </section>
-
-
- <section xml:id="avoid_unnecessary_string_number_conversions">
- <title>&tag_kshonly;&tag_performance;Avoid string to number
- (and/or number to string) conversions in arithmetic expressions
- expressions</title>
- <para>Avoid string to number and/or number to string conversions in
- arithmetic expressions expressions to avoid performance degradation
- and rounding errors.</para>
- <example><title>(( x=$x*2 )) vs. (( x=x*2 ))</title>
-<programlisting>
-float x
-...
-(( x=$x*2 ))
-</programlisting>
-<para>
-will convert the variable "x" (stored in the machine's native
-<literal>|long double|</literal> datatype) to a string value in base10 format,
-apply pattern expansion (globbing), then insert this string into the
-arithmetic expressions and parse the value which converts it into the internal |long double| datatype format again.
-This is both slow and generates rounding errors when converting the floating-point value between
-the internal base2 and the base10 representation of the string.
-</para>
-<para>
-The correct usage would be:
-</para>
-<programlisting>
-float x
-...
-(( x=x*2 ))
-</programlisting>
-<para>
-e.g. omit the '$' because it's (at least) redundant within arithmetic expressions.
-</para>
- </example>
-
-
- <example><title>x=$(( y+5.5 )) vs. (( x=y+5.5 ))</title>
-<programlisting>
-float x
-float y=7.1
-...
-x=$(( y+5.5 ))
-</programlisting>
-<para>
-will calculate the value of <literal>y+5.5</literal>, convert it to a
-base-10 string value amd assign the value to the floating-point variable
-<literal>x</literal> again which will convert the string value back to the
-internal |long double| datatype format again.
-</para>
-<para>
-The correct usage would be:
-</para>
-<programlisting>
-float x
-float y=7.1
-...
-(( x=y+5.5 ))
-</programlisting>
-<para>
-i.e. this will save the string conversions and avoid any base2--&gt;base10--&gt;base2-conversions.
-</para>
- </example>
- </section>
-
-
- <section xml:id="set_lc_numeric_when_using_floating_point">
- <title>&tag_ksh93only;Set <envar>LC_NUMERIC</envar> when using floating-point constants</title>
- <para>Set <envar>LC_NUMERIC</envar> when using floating-point constants to avoid problems with radix-point
- representations which differ from the representation used in the script, for example the <literal>de_DE.*</literal> locale
- use ',' instead of '.' as default radix point symbol.</para>
- <para>For example:
-<programlisting>
-# Make sure all math stuff runs in the "C" locale to avoid problems with alternative
-# radix point representations (e.g. ',' instead of '.' in de_DE.*-locales). This
-# needs to be set _before_ any floating-point constants are defined in this script)
-if [[ "${LC_ALL}" != "" ]] ; then
- export \
- LC_MONETARY="${LC_ALL}" \
- LC_MESSAGES="${LC_ALL}" \
- LC_COLLATE="${LC_ALL}" \
- LC_CTYPE="${LC_ALL}"
- unset LC_ALL
-fi
-export LC_NUMERIC=C
-...
-float -r M_PI=3.14159265358979323846
-</programlisting>
- </para>
-
- <note><para>The environment variable <envar>LC_ALL</envar> always overrides all other <envar>LC_*</envar> variables,
- including <envar>LC_NUMERIC</envar>. The script should always protect itself against custom <envar>LC_NUMERIC</envar> and
- <envar>LC_ALL</envar> values as shown in the example above.
- </para></note>
- </section>
-
-
-
- </section><!-- end of math -->
-
-
-
-
-
-
- <section xml:id="misc">
- <title>Misc</title>
-
- <section xml:id="debug_use_lineno_in_ps4">
- <title>Put <literal>[${LINENO}]</literal> in your <envar>PS4</envar></title>
- <para>Put <literal>[${LINENO}]</literal> in your <envar>PS4</envar> prompt so that you will get line
- numbers with you run with <literal>-x</literal>. If you are looking at performance
- issues put <literal>$SECONDS</literal> in the <envar>PS4</envar> prompt as well.</para>
- </section>
-
- </section><!-- end of misc -->
-
-
-
-
-</section><!-- end of RULES -->
-
-
-
-
-</article>
diff --git a/usr/src/lib/libsmartsshd/Makefile b/usr/src/lib/libsmartsshd/Makefile
new file mode 100644
index 0000000000..50d9545ef1
--- /dev/null
+++ b/usr/src/lib/libsmartsshd/Makefile
@@ -0,0 +1,48 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2011 Joyent, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+include ../Makefile.lib
+
+SUBDIRS= $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+install := TARGET = install
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber install lint: $(SUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+_msg:
+
+FRC:
+
+include ../Makefile.targ
diff --git a/usr/src/lib/libsmartsshd/Makefile.com b/usr/src/lib/libsmartsshd/Makefile.com
new file mode 100644
index 0000000000..914aab055b
--- /dev/null
+++ b/usr/src/lib/libsmartsshd/Makefile.com
@@ -0,0 +1,47 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2011 Joyent, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+LIBRARY= libsmartsshd.a
+VERS= .1
+OBJECTS= sshd-plugin.o
+
+include ../../Makefile.lib
+include ../../Makefile.rootfs
+
+SRCDIR = ../common
+SRCS = $(OBJECTS:%.o=$(SRCDIR)/%.c)
+
+CPPFLAGS += -I$(SRCDIR) -D_REENTRANT -D_FILE_OFFSET_BITS=64
+LIBS = $(DYNLIB) $(LINTLIB)
+LDLIBS += -lc -ldoor
+
+$(LINTLIB) := SRCS= $(SRCDIR)/$(LINTSRC)
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include ../../Makefile.targ
diff --git a/usr/src/cmd/ssh/libssh/i386/Makefile b/usr/src/lib/libsmartsshd/amd64/Makefile
index fde6250470..31be0ef7e6 100644
--- a/usr/src/cmd/ssh/libssh/i386/Makefile
+++ b/usr/src/lib/libsmartsshd/amd64/Makefile
@@ -19,12 +19,14 @@
#
# CDDL HEADER END
#
-# Copyright 2003 Sun Microsystems, Inc. All rights reserved.
+#
+# Copyright 2011 Joyent, Inc. All rights reserved.
# Use is subject to license terms.
#
# ident "%Z%%M% %I% %E% SMI"
#
include ../Makefile.com
+include ../../Makefile.lib.64
-install: all
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/libsmartsshd/common/llib-lsmartsshd b/usr/src/lib/libsmartsshd/common/llib-lsmartsshd
new file mode 100644
index 0000000000..ace7017d41
--- /dev/null
+++ b/usr/src/lib/libsmartsshd/common/llib-lsmartsshd
@@ -0,0 +1,32 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*LINTLIBRARY*/
+/*PROTOLIB1*/
+/*
+ *
+ * Copyright 2011 Joyent, Inc. All rights reserved.
+ * Use is subject to license terms.
+ *
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
diff --git a/usr/src/cmd/ssh/sshd/mapfile-intf b/usr/src/lib/libsmartsshd/common/mapfile-vers
index 6981212730..ce5fc7a9c0 100644
--- a/usr/src/cmd/ssh/sshd/mapfile-intf
+++ b/usr/src/lib/libsmartsshd/common/mapfile-vers
@@ -18,13 +18,8 @@
#
# CDDL HEADER END
#
-
-#
-# Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, Joyent Inc. All rights reserved.
#
-# sshd defines its own log(), as do various other ssh utilities, thus their
-# symbols are reduced to locals via MAPFILE.NGB in Makefile-ssh.common. sshd
-# must export some symbols too, thus this mapfile augments the former.
#
# MAPFILE HEADER START
@@ -42,8 +37,9 @@
$mapfile_version 2
-SYMBOL_SCOPE {
- global:
- allow_severity; # required by libwrap
- deny_severity; # required by libwrap
+SYMBOL_VERSION SUNWprivate_1.1 {
+ global:
+ sshd_user_rsa_key_allowed;
+ local:
+ *;
};
diff --git a/usr/src/lib/libsmartsshd/common/sshd-plugin.c b/usr/src/lib/libsmartsshd/common/sshd-plugin.c
new file mode 100644
index 0000000000..4f0f0bc1ad
--- /dev/null
+++ b/usr/src/lib/libsmartsshd/common/sshd-plugin.c
@@ -0,0 +1,123 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2011 Joyent, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <alloca.h>
+#include <door.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <pwd.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <openssl/rsa.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LOG_OOM(SZ) fprintf(stderr, "Unable to alloca %d bytes\n", SZ)
+
+static const char *DOOR = "/var/tmp/._joyent_sshd_key_is_authorized";
+static const char *REQ_FMT_STR = "%s %d %s"; /* name uid fp */
+static const int RETURN_SZ = 2;
+
+static const int MAX_ATTEMPTS = 2;
+static const int SLEEP_PERIOD = 1;
+
+static int
+sshd_allowed_in_capi(struct passwd *pw, const char *fp)
+{
+ int allowed = 0;
+ int fd = -1;
+ int blen = 0;
+ int attempts = 0;
+ char *buf = NULL;
+ door_arg_t door_args = {0};
+
+ if (pw == NULL || fp == NULL)
+ return (0);
+
+ blen = snprintf(NULL, 0, REQ_FMT_STR, pw->pw_name, pw->pw_uid, fp) + 1;
+
+ buf = (char *)alloca(blen);
+ if (buf == NULL) {
+ LOG_OOM(blen);
+ return (0);
+ }
+
+ (void) snprintf(buf, blen, REQ_FMT_STR, pw->pw_name, pw->pw_uid, fp);
+ door_args.data_ptr = buf;
+ door_args.data_size = blen;
+
+ door_args.rsize = RETURN_SZ;
+ door_args.rbuf = alloca(RETURN_SZ);
+ if (door_args.rbuf == NULL) {
+ LOG_OOM(RETURN_SZ);
+ return (0);
+ }
+ memset(door_args.rbuf, 0, RETURN_SZ);
+
+ do {
+ fd = open(DOOR, O_RDWR);
+ if (fd < 0)
+ perror("smartplugin: open (of door FD) failed");
+
+ if (door_call(fd, &door_args) < 0) {
+ perror("smartplugin: door_call failed");
+ } else {
+ allowed = atoi(door_args.rbuf);
+ munmap(door_args.rbuf, door_args.rsize);
+ return (allowed);
+ }
+ if (++attempts < MAX_ATTEMPTS)
+ sleep(SLEEP_PERIOD);
+ } while (attempts < MAX_ATTEMPTS);
+
+ return (0);
+}
+
+int
+sshd_user_rsa_key_allowed(struct passwd *pw, RSA *key, const char *fp)
+{
+ return sshd_allowed_in_capi(pw, fp);
+}
+
+int
+sshd_user_dsa_key_allowed(struct passwd *pw, DSA *key, const char *fp)
+{
+ return sshd_allowed_in_capi(pw, fp);
+}
+
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/usr/src/cmd/ssh/libssh/sparc/Makefile b/usr/src/lib/libsmartsshd/i386/Makefile
index fde6250470..2bfe8001d9 100644
--- a/usr/src/cmd/ssh/libssh/sparc/Makefile
+++ b/usr/src/lib/libsmartsshd/i386/Makefile
@@ -19,7 +19,8 @@
#
# CDDL HEADER END
#
-# Copyright 2003 Sun Microsystems, Inc. All rights reserved.
+#
+# Copyright 2011 Joyent, Inc. All rights reserved.
# Use is subject to license terms.
#
# ident "%Z%%M% %I% %E% SMI"
@@ -27,4 +28,4 @@
include ../Makefile.com
-install: all
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/libumem/amd64/umem_genasm.c b/usr/src/lib/libumem/amd64/umem_genasm.c
index 00cc18ab67..ba68cb2d37 100644
--- a/usr/src/lib/libumem/amd64/umem_genasm.c
+++ b/usr/src/lib/libumem/amd64/umem_genasm.c
@@ -69,8 +69,6 @@
#include <umem_impl.h>
#include "umem_base.h"
-#include <stdio.h>
-
const int umem_genasm_supported = 1;
static uintptr_t umem_genasm_mptr = (uintptr_t)&_malloc;
static size_t umem_genasm_msize = 576;
diff --git a/usr/src/lib/libuutil/common/libuutil.h b/usr/src/lib/libuutil/common/libuutil.h
index 6675424466..ec1bf907c1 100644
--- a/usr/src/lib/libuutil/common/libuutil.h
+++ b/usr/src/lib/libuutil/common/libuutil.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
*/
#ifndef _LIBUUTIL_H
@@ -90,6 +91,8 @@ extern void uu_vdie(const char *, va_list) __NORETURN;
/*PRINTFLIKE2*/
extern void uu_xdie(int, const char *, ...) __NORETURN;
extern void uu_vxdie(int, const char *, va_list) __NORETURN;
+/*PRINTFLIKE1*/
+extern void uu_panic(const char *, ...) __NORETURN;
/*
* Exit status functions (not to be used directly)
diff --git a/usr/src/lib/libuutil/common/libuutil_impl.h b/usr/src/lib/libuutil/common/libuutil_impl.h
index 9466e59745..6b73e0f72d 100644
--- a/usr/src/lib/libuutil/common/libuutil_impl.h
+++ b/usr/src/lib/libuutil/common/libuutil_impl.h
@@ -24,11 +24,13 @@
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
+ */
+
#ifndef _LIBUUTIL_IMPL_H
#define _LIBUUTIL_IMPL_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <libuutil.h>
#include <pthread.h>
@@ -42,10 +44,6 @@ extern "C" {
void uu_set_error(uint_t);
#pragma rarely_called(uu_set_error)
-/*PRINTFLIKE1*/
-void uu_panic(const char *format, ...);
-#pragma rarely_called(uu_panic)
-
struct uu_dprintf {
char *uud_name;
uu_dprintf_severity_t uud_severity;
diff --git a/usr/src/lib/libuutil/common/mapfile-vers b/usr/src/lib/libuutil/common/mapfile-vers
index 75295894b6..c9d740bc8d 100644
--- a/usr/src/lib/libuutil/common/mapfile-vers
+++ b/usr/src/lib/libuutil/common/mapfile-vers
@@ -20,6 +20,7 @@
#
#
# Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2015, Joyent, Inc. All rights reserved.
#
#
@@ -101,6 +102,7 @@ SYMBOL_VERSION SUNWprivate_1.1 {
uu_memdup;
uu_msprintf;
uu_open_tmp;
+ uu_panic;
uu_setpname;
uu_strbw;
uu_strcaseeq;
diff --git a/usr/src/lib/libvnd/Makefile b/usr/src/lib/libvnd/Makefile
new file mode 100644
index 0000000000..1352adb8e4
--- /dev/null
+++ b/usr/src/lib/libvnd/Makefile
@@ -0,0 +1,50 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.lib
+
+HDRS = libvnd.h
+HDRDIR = common
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+install := TARGET = install
+lint := TARGET = lint
+
+TYPECHECK_LIB = libvnd.so.1
+TYPELIST = \
+ vnd_ioc_attach_t \
+ vnd_ioc_link_t \
+ vnd_ioc_unlink_t \
+ vnd_ioc_buf_t \
+ vnd_ioc_info_t
+
+.KEEP_STATE:
+
+all clean clobber install lint: $(SUBDIRS)
+
+install_h: $(ROOTHDRS)
+
+check: $(CHECKHDRS) $(TYPECHECK)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../Makefile.targ
diff --git a/usr/src/lib/libvnd/Makefile.com b/usr/src/lib/libvnd/Makefile.com
new file mode 100644
index 0000000000..3c6896de11
--- /dev/null
+++ b/usr/src/lib/libvnd/Makefile.com
@@ -0,0 +1,39 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../../Makefile.lib
+
+LIBRARY = libvnd.a
+VERS = .1
+OBJECTS = libvnd.o
+
+include ../../Makefile.lib
+
+LIBS = $(DYNLIB) $(LINTLIB)
+LDLIBS += -lc
+CPPFLAGS += -I../common
+
+SRCDIR = ../common
+
+C99MODE= -xc99=%all
+C99LMODE= -Xc99=%all
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include ../../Makefile.targ
diff --git a/usr/src/lib/libvnd/amd64/Makefile b/usr/src/lib/libvnd/amd64/Makefile
new file mode 100644
index 0000000000..15d904c616
--- /dev/null
+++ b/usr/src/lib/libvnd/amd64/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+include ../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/libvnd/common/libvnd.c b/usr/src/lib/libvnd/common/libvnd.c
new file mode 100644
index 0000000000..8972f6cf5a
--- /dev/null
+++ b/usr/src/lib/libvnd/common/libvnd.c
@@ -0,0 +1,550 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <strings.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <stdio.h>
+#include <zone.h>
+#include <assert.h>
+#include <sys/sysmacros.h>
+
+#include <sys/vnd.h>
+#include <libvnd.h>
+
+struct vnd_handle {
+ int vh_fd;
+ uint32_t vh_errno;
+ int vh_syserr;
+};
+
+static const char *vnd_strerror_tbl[] = {
+ "no error", /* VND_E_SUCCESS */
+ "not enough memory available", /* VND_E_NOMEM */
+ "no such datalink", /* VND_E_NODATALINK */
+ "datalink not of type DL_ETHER", /* VND_E_NOTETHER */
+ "unknown dlpi failure", /* VND_E_DLPIINVAL */
+ "DL_ATTACH_REQ failed", /* VND_E_ATTACHFAIL */
+ "DL_BIND_REQ failed", /* VND_E_PROMISCFAIL */
+ "DL_PROMISCON_REQ failed", /* VND_E_PROMISCFAIL */
+ "DLD_CAPAB_DIRECT enable failed", /* VND_E_DIRECTFAIL */
+ "bad datalink capability", /* VND_E_CAPACKINVAL */
+ "bad datalink subcapability", /* VND_E_SUBCAPINVAL */
+ "bad dld version", /* VND_E_DLDBADVERS */
+ "failed to create kstats", /* VND_E_KSTATCREATE */
+ "no such vnd link", /* VND_E_NODEV */
+ "netstack doesn't exist", /* VND_E_NONETSTACK */
+ "device already associated", /* VND_E_ASSOCIATED */
+ "device already attached", /* VND_E_ATTACHED */
+ "device already linked", /* VND_E_LINKED */
+ "invalid name", /* VND_E_BADNAME */
+ "permission denied", /* VND_E_PERM */
+ "no such zone", /* VND_E_NOZONE */
+ "failed to initialize vnd stream module", /* VND_E_STRINIT */
+ "device not attached", /* VND_E_NOTATTACHED */
+ "device not linked", /* VND_E_NOTLINKED */
+ "another device has the same link name", /* VND_E_LINKEXISTS */
+ "failed to create minor node", /* VND_E_MINORNODE */
+ "requested buffer size is too large", /* VND_E_BUFTOOBIG */
+ "requested buffer size is too small", /* VND_E_TOOSMALL */
+ "unable to obtain exclusive access to dlpi link, link busy",
+ /* VND_E_DLEXCL */
+ "DLD direct capability not supported over data link",
+ /* VND_E_DIRECTNOTSUP */
+ "invalid property size", /* VND_E_BADPROPSIZE */
+ "invalid property", /* VND_E_BADPROP */
+ "property is read only", /* VND_E_PROPRDONLY */
+ "unexpected system error", /* VND_E_SYS */
+ "capabilities invalid, pass-through module detected",
+ /* VND_E_CAPABPASS */
+ "unknown error" /* VND_E_UNKNOWN */
+};
+
+vnd_errno_t
+vnd_errno(vnd_handle_t *vhp)
+{
+ return (vhp->vh_errno);
+}
+
+const char *
+vnd_strerror(vnd_errno_t err)
+{
+ if (err >= VND_E_UNKNOWN)
+ err = VND_E_UNKNOWN;
+ return (vnd_strerror_tbl[err]);
+}
+
+int
+vnd_syserrno(vnd_handle_t *vhp)
+{
+ return (vhp->vh_syserr);
+}
+
+const char *
+vnd_strsyserror(int err)
+{
+ return (strerror(err));
+}
+
+static int
+vnd_ioc_return(vnd_handle_t *vhp, uint32_t err)
+{
+ if (err != VND_E_SUCCESS) {
+ vhp->vh_errno = err;
+ vhp->vh_syserr = 0;
+ } else {
+ if (errno == EFAULT)
+ abort();
+ vhp->vh_errno = VND_E_SYS;
+ vhp->vh_syserr = errno;
+ }
+ return (-1);
+}
+
+void
+vnd_close(vnd_handle_t *vhp)
+{
+ int ret;
+
+ if (vhp->vh_fd >= 0) {
+ ret = close(vhp->vh_fd);
+ assert(ret == 0);
+ }
+ free(vhp);
+}
+
+static int
+vnd_link(vnd_handle_t *vhp, const char *name)
+{
+ vnd_ioc_link_t vil;
+
+ if (strlen(name) >= VND_NAMELEN) {
+ errno = ENAMETOOLONG;
+ return (-1);
+ }
+
+ (void) strlcpy(vil.vil_name, name, sizeof (vil.vil_name));
+ vil.vil_errno = VND_E_SUCCESS;
+ if (ioctl(vhp->vh_fd, VND_IOC_LINK, &vil) != 0)
+ return (vnd_ioc_return(vhp, vil.vil_errno));
+
+ return (0);
+}
+
+static vnd_handle_t *
+vnd_open_ctl(vnd_errno_t *vnderr, int *syserr)
+{
+ int fd;
+ vnd_handle_t *vhp;
+
+ vhp = malloc(sizeof (vnd_handle_t));
+ if (vhp == NULL) {
+ if (vnderr != NULL)
+ *vnderr = VND_E_NOMEM;
+ if (syserr != NULL)
+ *syserr = 0;
+ return (NULL);
+ }
+ bzero(vhp, sizeof (vnd_handle_t));
+
+ fd = open("/dev/vnd/ctl", O_RDWR);
+ if (fd < 0) {
+ if (vnderr != NULL)
+ *vnderr = VND_E_SYS;
+ if (syserr != NULL)
+ *syserr = errno;
+ free(vhp);
+ return (NULL);
+ }
+
+ vhp->vh_fd = fd;
+ return (vhp);
+}
+
+vnd_handle_t *
+vnd_create(const char *zonename, const char *datalink, const char *linkname,
+ vnd_errno_t *vnderr, int *syserr)
+{
+ int ret;
+ vnd_handle_t *vhp;
+ vnd_ioc_attach_t via;
+ zoneid_t zid;
+
+ if (strlen(datalink) >= VND_NAMELEN) {
+ if (vnderr != NULL)
+ *vnderr = VND_E_BADNAME;
+ if (syserr != NULL)
+ *syserr = 0;
+ return (NULL);
+ }
+
+ vhp = vnd_open_ctl(vnderr, syserr);
+ if (vhp == NULL)
+ return (NULL); /* errno set for us */
+
+ if (zonename != NULL) {
+ zid = getzoneidbyname(zonename);
+ if (zid == -1) {
+ vnd_close(vhp);
+ if (vnderr != NULL)
+ *vnderr = VND_E_NOZONE;
+ if (syserr != NULL)
+ *syserr = 0;
+ return (NULL);
+ }
+ via.via_zoneid = zid;
+ } else {
+ via.via_zoneid = -1;
+ }
+
+ (void) strlcpy(via.via_name, datalink, sizeof (via.via_name));
+ via.via_errno = VND_E_SUCCESS;
+ if (ioctl(vhp->vh_fd, VND_IOC_ATTACH, &via) != 0) {
+ if (via.via_errno != VND_E_SUCCESS) {
+ if (vnderr != NULL)
+ *vnderr = via.via_errno;
+ if (syserr != NULL)
+ *syserr = 0;
+ } else {
+ if (vnderr != NULL)
+ *vnderr = VND_E_SYS;
+ if (syserr != NULL)
+ *syserr = errno;
+ }
+ vnd_close(vhp);
+ return (NULL);
+ }
+
+ ret = vnd_link(vhp, linkname);
+ if (ret != 0) {
+ if (vnderr != NULL)
+ *vnderr = vhp->vh_errno;
+ if (syserr != NULL)
+ *syserr = vhp->vh_syserr;
+ vnd_close(vhp);
+ return (NULL);
+ }
+
+ if (vnderr != NULL)
+ *vnderr = VND_E_SUCCESS;
+ if (syserr != NULL)
+ *syserr = 0;
+
+ return (vhp);
+}
+
+vnd_handle_t *
+vnd_open(const char *zone, const char *link, vnd_errno_t *vnderr, int *syserr)
+{
+ int fd, ret;
+ char path[MAXPATHLEN];
+ vnd_handle_t *vhp;
+
+ if (zone != NULL)
+ ret = snprintf(path, sizeof (path), "/dev/vnd/zone/%s/%s",
+ zone, link);
+ else
+ ret = snprintf(path, sizeof (path), "/dev/vnd/%s", link);
+
+ if (ret >= sizeof (path)) {
+ if (vnderr != NULL)
+ *vnderr = VND_E_BADNAME;
+ if (syserr != NULL)
+ *syserr = 0;
+ return (NULL);
+ }
+
+ fd = open(path, O_RDWR);
+ if (fd < 0) {
+ if (vnderr != NULL)
+ *vnderr = VND_E_SYS;
+ if (syserr != NULL)
+ *syserr = errno;
+ return (NULL);
+ }
+
+ vhp = malloc(sizeof (vnd_handle_t));
+ if (vhp == NULL) {
+ if (vnderr != NULL)
+ *vnderr = VND_E_NOMEM;
+ if (syserr != NULL)
+ *syserr = 0;
+ ret = close(fd);
+ assert(ret == 0);
+ return (NULL);
+ }
+
+ bzero(vhp, sizeof (vnd_handle_t));
+ vhp->vh_fd = fd;
+
+ return (vhp);
+}
+
+int
+vnd_unlink(vnd_handle_t *vhp)
+{
+ vnd_ioc_unlink_t viu;
+ viu.viu_errno = VND_E_SUCCESS;
+
+ if (ioctl(vhp->vh_fd, VND_IOC_UNLINK, &viu) != 0)
+ return (vnd_ioc_return(vhp, viu.viu_errno));
+
+ return (0);
+}
+
+int
+vnd_pollfd(vnd_handle_t *vhp)
+{
+ return (vhp->vh_fd);
+}
+
+int
+vnd_walk(vnd_walk_cb_t func, void *arg, vnd_errno_t *vnderr, int *syserr)
+{
+ vnd_handle_t *vhp;
+ vnd_ioc_list_t vl;
+ vnd_ioc_info_t *viip;
+ int i, ret;
+
+ vl.vl_nents = 0;
+ vl.vl_ents = NULL;
+
+ vhp = vnd_open_ctl(vnderr, syserr);
+ if (vhp == NULL)
+ return (-1); /* errno is set for us */
+
+ /* VND_IOC_LIST only returns generic errnos */
+ if (ioctl(vhp->vh_fd, VND_IOC_LIST, &vl) != 0) {
+ if (vnderr != NULL)
+ *vnderr = VND_E_SYS;
+ if (syserr != NULL)
+ *syserr = errno;
+ (void) vnd_ioc_return(vhp, VND_E_SUCCESS);
+ vnd_close(vhp);
+
+ return (-1);
+ }
+
+ if (vl.vl_actents == 0) {
+ vnd_close(vhp);
+ return (0);
+ }
+
+ viip = malloc(sizeof (vnd_ioc_info_t) * vl.vl_actents);
+ if (viip == NULL) {
+ if (vnderr != NULL)
+ *vnderr = VND_E_NOMEM;
+ if (syserr != NULL)
+ *syserr = 0;
+ vnd_close(vhp);
+ return (-1);
+ }
+
+ vl.vl_nents = vl.vl_actents;
+ vl.vl_ents = viip;
+
+ if (ioctl(vhp->vh_fd, VND_IOC_LIST, &vl) != 0) {
+ if (vnderr != NULL)
+ *vnderr = VND_E_SYS;
+ if (syserr != NULL)
+ *syserr = errno;
+ (void) vnd_ioc_return(vhp, VND_E_SUCCESS);
+ free(viip);
+ vnd_close(vhp);
+ return (-1);
+ }
+
+ ret = 0;
+ for (i = 0; i < MIN(vl.vl_nents, vl.vl_actents); i++) {
+ if (func((vnd_info_t *)(viip + i), arg) != 0) {
+ ret = 1;
+ break;
+ }
+ }
+
+ free(viip);
+ vnd_close(vhp);
+
+ return (ret);
+}
+
+static int
+vnd_prop_readonly(vnd_handle_t *vhp)
+{
+ vhp->vh_syserr = 0;
+ vhp->vh_errno = VND_E_PROPRDONLY;
+ return (-1);
+}
+
+/*ARGSUSED*/
+static int
+vnd_prop_getbuf(vnd_handle_t *vhp, int cmd, void *buf, size_t len)
+{
+ vnd_ioc_buf_t vib;
+ vnd_prop_buf_t *vpbp = (vnd_prop_buf_t *)buf;
+ vib.vib_errno = 0;
+
+ if (ioctl(vhp->vh_fd, cmd, &vib) != 0)
+ return (vnd_ioc_return(vhp, vib.vib_errno));
+
+ vpbp->vpb_size = vib.vib_size;
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+vnd_prop_setbuf(vnd_handle_t *vhp, int cmd, void *buf, size_t len)
+{
+ vnd_ioc_buf_t vib;
+ vnd_prop_buf_t *vpbp = (vnd_prop_buf_t *)buf;
+
+ vib.vib_errno = 0;
+ vib.vib_size = vpbp->vpb_size;
+ if (ioctl(vhp->vh_fd, cmd, &vib) != 0)
+ return (vnd_ioc_return(vhp, vib.vib_errno));
+
+ return (0);
+}
+
+typedef int (*vpt_prop_f)(vnd_handle_t *, int, void *, size_t);
+typedef struct vnd_prop_tab {
+ vnd_prop_t vpt_prop;
+ size_t vpt_size;
+ int vpt_ioctl_get;
+ int vpt_ioctl_set;
+ vpt_prop_f vpt_get;
+ vpt_prop_f vpt_set;
+} vnd_prop_tab_t;
+
+static vnd_prop_tab_t vnd_props[] = {
+ { VND_PROP_RXBUF, sizeof (vnd_prop_buf_t), VND_IOC_GETRXBUF,
+ VND_IOC_SETRXBUF, vnd_prop_getbuf, vnd_prop_setbuf},
+ { VND_PROP_TXBUF, sizeof (vnd_prop_buf_t), VND_IOC_GETTXBUF,
+ VND_IOC_SETTXBUF, vnd_prop_getbuf, vnd_prop_setbuf },
+ { VND_PROP_MAXBUF, sizeof (vnd_prop_buf_t), VND_IOC_GETMAXBUF,
+ -1, vnd_prop_getbuf, NULL },
+ { VND_PROP_MINTU, sizeof (vnd_prop_buf_t), VND_IOC_GETMINTU,
+ -1, vnd_prop_getbuf, NULL },
+ { VND_PROP_MAXTU, sizeof (vnd_prop_buf_t), VND_IOC_GETMAXTU,
+ -1, vnd_prop_getbuf, NULL },
+ { VND_PROP_MAX }
+};
+
+static int
+vnd_prop(vnd_handle_t *vhp, vnd_prop_t prop, void *buf, size_t len,
+ boolean_t get)
+{
+ vnd_prop_tab_t *vpt;
+
+ for (vpt = vnd_props; vpt->vpt_prop != VND_PROP_MAX; vpt++) {
+ if (vpt->vpt_prop != prop)
+ continue;
+
+ if (len != vpt->vpt_size) {
+ vhp->vh_errno = VND_E_BADPROPSIZE;
+ vhp->vh_syserr = 0;
+ return (-1);
+ }
+
+ if (get == B_TRUE) {
+ return (vpt->vpt_get(vhp, vpt->vpt_ioctl_get, buf,
+ len));
+ } else {
+ if (vpt->vpt_set == NULL)
+ return (vnd_prop_readonly(vhp));
+ return (vpt->vpt_set(vhp, vpt->vpt_ioctl_set, buf,
+ len));
+ }
+ }
+
+ vhp->vh_errno = VND_E_BADPROP;
+ vhp->vh_syserr = 0;
+ return (-1);
+}
+
+int
+vnd_prop_get(vnd_handle_t *vhp, vnd_prop_t prop, void *buf, size_t len)
+{
+ return (vnd_prop(vhp, prop, buf, len, B_TRUE));
+}
+
+int
+vnd_prop_set(vnd_handle_t *vhp, vnd_prop_t prop, void *buf, size_t len)
+{
+ return (vnd_prop(vhp, prop, buf, len, B_FALSE));
+}
+
+int
+vnd_prop_writeable(vnd_prop_t prop, boolean_t *write)
+{
+ vnd_prop_tab_t *vpt;
+
+ for (vpt = vnd_props; vpt->vpt_prop != VND_PROP_MAX; vpt++) {
+ if (vpt->vpt_prop != prop)
+ continue;
+
+ *write = (vpt->vpt_set != NULL);
+ return (0);
+ }
+
+ return (-1);
+}
+
+int
+vnd_prop_iter(vnd_handle_t *vhp, vnd_prop_iter_f func, void *arg)
+{
+ int i;
+
+ for (i = 0; i < VND_PROP_MAX; i++) {
+ if (func(vhp, i, arg) != 0)
+ return (1);
+ }
+
+ return (0);
+}
+
+int
+vnd_frameio_read(vnd_handle_t *vhp, frameio_t *fiop)
+{
+ int ret;
+
+ ret = ioctl(vhp->vh_fd, VND_IOC_FRAMEIO_READ, fiop);
+ if (ret == -1) {
+ vhp->vh_errno = VND_E_SYS;
+ vhp->vh_syserr = errno;
+ }
+
+ return (ret);
+}
+
+int
+vnd_frameio_write(vnd_handle_t *vhp, frameio_t *fiop)
+{
+ int ret;
+
+ ret = ioctl(vhp->vh_fd, VND_IOC_FRAMEIO_WRITE, fiop);
+ if (ret == -1) {
+ vhp->vh_errno = VND_E_SYS;
+ vhp->vh_syserr = errno;
+ }
+
+ return (ret);
+}
diff --git a/usr/src/lib/libvnd/common/libvnd.h b/usr/src/lib/libvnd/common/libvnd.h
new file mode 100644
index 0000000000..ea92f113b6
--- /dev/null
+++ b/usr/src/lib/libvnd/common/libvnd.h
@@ -0,0 +1,84 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _LIBVND_H
+#define _LIBVND_H
+
+/*
+ * libvnd interfaces
+ */
+
+#include <stdint.h>
+#include <sys/vnd_errno.h>
+#include <sys/frameio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LIBVND_NAMELEN 32
+
+typedef struct vnd_handle vnd_handle_t;
+
+extern vnd_handle_t *vnd_create(const char *, const char *, const char *,
+ vnd_errno_t *, int *);
+extern vnd_handle_t *vnd_open(const char *, const char *, vnd_errno_t *, int *);
+extern int vnd_unlink(vnd_handle_t *);
+extern void vnd_close(vnd_handle_t *);
+extern vnd_errno_t vnd_errno(vnd_handle_t *);
+extern int vnd_syserrno(vnd_handle_t *);
+extern const char *vnd_strerror(vnd_errno_t);
+extern const char *vnd_strsyserror(int);
+
+extern int vnd_pollfd(vnd_handle_t *);
+
+typedef struct vnd_info {
+ uint32_t vi_version;
+ zoneid_t vi_zone;
+ char vi_name[LIBVND_NAMELEN];
+ char vi_datalink[LIBVND_NAMELEN];
+} vnd_info_t;
+
+typedef int (*vnd_walk_cb_t)(vnd_info_t *, void *);
+extern int vnd_walk(vnd_walk_cb_t, void *, vnd_errno_t *, int *);
+
+typedef enum vnd_prop {
+ VND_PROP_RXBUF = 0,
+ VND_PROP_TXBUF,
+ VND_PROP_MAXBUF,
+ VND_PROP_MINTU,
+ VND_PROP_MAXTU,
+ VND_PROP_MAX
+} vnd_prop_t;
+
+typedef struct vnd_prop_buf {
+ uint64_t vpb_size;
+} vnd_prop_buf_t;
+
+extern int vnd_prop_get(vnd_handle_t *, vnd_prop_t, void *, size_t);
+extern int vnd_prop_set(vnd_handle_t *, vnd_prop_t, void *, size_t);
+extern int vnd_prop_writeable(vnd_prop_t, boolean_t *);
+
+typedef int (*vnd_prop_iter_f)(vnd_handle_t *, vnd_prop_t, void *);
+extern int vnd_prop_iter(vnd_handle_t *, vnd_prop_iter_f, void *);
+
+extern int vnd_frameio_read(vnd_handle_t *, frameio_t *);
+extern int vnd_frameio_write(vnd_handle_t *, frameio_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBVND_H */
diff --git a/usr/src/lib/libvnd/common/llib-lvnd b/usr/src/lib/libvnd/common/llib-lvnd
new file mode 100644
index 0000000000..80a4229e32
--- /dev/null
+++ b/usr/src/lib/libvnd/common/llib-lvnd
@@ -0,0 +1,19 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/* LINTLIBRARY */
+/* PROTOLIB1 */
+
+#include <libvnd.h>
diff --git a/usr/src/lib/libvnd/common/mapfile-vers b/usr/src/lib/libvnd/common/mapfile-vers
new file mode 100644
index 0000000000..0eb862ab60
--- /dev/null
+++ b/usr/src/lib/libvnd/common/mapfile-vers
@@ -0,0 +1,55 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+#
+# TODO When this makes it into illumos we should make it a public interface
+#
+SYMBOL_VERSION ILLUMOSprivate {
+ global:
+ vnd_create;
+ vnd_close;
+ vnd_errno;
+ vnd_frameio_read;
+ vnd_frameio_write;
+ vnd_open;
+ vnd_pollfd;
+ vnd_prop_get;
+ vnd_prop_iter;
+ vnd_prop_set;
+ vnd_prop_writeable;
+ vnd_strerror;
+ vnd_strsyserror;
+ vnd_syserrno;
+ vnd_unlink;
+ vnd_walk;
+ local:
+ *;
+};
diff --git a/usr/src/lib/libvnd/i386/Makefile b/usr/src/lib/libvnd/i386/Makefile
new file mode 100644
index 0000000000..41e699e8f8
--- /dev/null
+++ b/usr/src/lib/libvnd/i386/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/libwanboot/Makefile.com b/usr/src/lib/libwanboot/Makefile.com
index 322bfb51f7..a98a30cfd4 100644
--- a/usr/src/lib/libwanboot/Makefile.com
+++ b/usr/src/lib/libwanboot/Makefile.com
@@ -60,7 +60,7 @@ include ../../Makefile.lib
LIBS += $(LINTLIB)
LDLIBS += -lnvpair -lresolv -lnsl -lsocket -ldevinfo -ldhcputil \
- -linetutil -lc -lcrypto -lssl
+ -linetutil -lc -lsunw_crypto -lsunw_ssl
CPPFLAGS = -I$(SRC)/common/net/wanboot/crypt $(CPPFLAGS.master)
CERRWARN += -_gcc=-Wno-switch
CERRWARN += -_gcc=-Wno-parentheses
diff --git a/usr/src/lib/libzdoor/Makefile b/usr/src/lib/libzdoor/Makefile
new file mode 100644
index 0000000000..50d9545ef1
--- /dev/null
+++ b/usr/src/lib/libzdoor/Makefile
@@ -0,0 +1,48 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2011 Joyent, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+include ../Makefile.lib
+
+SUBDIRS= $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+install := TARGET = install
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber install lint: $(SUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+_msg:
+
+FRC:
+
+include ../Makefile.targ
diff --git a/usr/src/lib/libzdoor/Makefile.com b/usr/src/lib/libzdoor/Makefile.com
new file mode 100644
index 0000000000..09bde0f1b4
--- /dev/null
+++ b/usr/src/lib/libzdoor/Makefile.com
@@ -0,0 +1,50 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2011 Joyent, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+LIBRARY= libzdoor.a
+VERS= .1
+OBJECTS= zdoor.o \
+ zdoor-int.o \
+ ztree.o \
+ zerror.o
+
+include ../../Makefile.lib
+include ../../Makefile.rootfs
+
+SRCDIR = ../common
+SRCS = $(OBJECTS:%.o=$(SRCDIR)/%.c)
+
+CPPFLAGS += -I$(SRCDIR) -D_REENTRANT -D_FILE_OFFSET_BITS=64
+LIBS = $(DYNLIB) $(LINTLIB)
+LDLIBS += -lc -lzonecfg -lcontract
+
+$(LINTLIB) := SRCS= $(SRCDIR)/$(LINTSRC)
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include ../../Makefile.targ
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/i386/Makefile b/usr/src/lib/libzdoor/amd64/Makefile
index fde6250470..31be0ef7e6 100644
--- a/usr/src/cmd/ssh/libopenbsd-compat/i386/Makefile
+++ b/usr/src/lib/libzdoor/amd64/Makefile
@@ -19,12 +19,14 @@
#
# CDDL HEADER END
#
-# Copyright 2003 Sun Microsystems, Inc. All rights reserved.
+#
+# Copyright 2011 Joyent, Inc. All rights reserved.
# Use is subject to license terms.
#
# ident "%Z%%M% %I% %E% SMI"
#
include ../Makefile.com
+include ../../Makefile.lib.64
-install: all
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/libzdoor/common/llib-lzdoor b/usr/src/lib/libzdoor/common/llib-lzdoor
new file mode 100644
index 0000000000..a21a904b11
--- /dev/null
+++ b/usr/src/lib/libzdoor/common/llib-lzdoor
@@ -0,0 +1,34 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*LINTLIBRARY*/
+/*PROTOLIB1*/
+/*
+ *
+ * Copyright 2011 Joyent, Inc. All rights reserved.
+ * Use is subject to license terms.
+ *
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <zdoor.h>
diff --git a/usr/src/lib/libzdoor/common/mapfile-vers b/usr/src/lib/libzdoor/common/mapfile-vers
new file mode 100644
index 0000000000..38eba57ee2
--- /dev/null
+++ b/usr/src/lib/libzdoor/common/mapfile-vers
@@ -0,0 +1,48 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2011, Joyent Inc. All rights reserved.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION SUNWprivate_1.1 {
+ global:
+ zdoor_handle_init;
+ zdoor_handle_destroy;
+ zdoor_open;
+ zdoor_close;
+ local:
+ *;
+};
diff --git a/usr/src/lib/libzdoor/common/zdoor-int.c b/usr/src/lib/libzdoor/common/zdoor-int.c
new file mode 100644
index 0000000000..f77c1453d4
--- /dev/null
+++ b/usr/src/lib/libzdoor/common/zdoor-int.c
@@ -0,0 +1,324 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2012 Joyent, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/fork.h>
+#include <libcontract.h>
+#include <libzonecfg.h>
+#include <sys/contract/process.h>
+#include <sys/ctfs.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "zdoor-int.h"
+#include "zerror.h"
+
+#define ZDOOR_FMT_STR "/var/tmp/.%s"
+
+
+static int
+init_template(void)
+{
+ int fd = 0;
+ int err = 0;
+
+ fd = open64(CTFS_ROOT "/process/template", O_RDWR);
+ if (fd == -1)
+ return (-1);
+
+ err |= ct_tmpl_set_critical(fd, 0);
+ err |= ct_tmpl_set_informative(fd, 0);
+ err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
+ err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
+ if (err || ct_tmpl_activate(fd)) {
+ (void) close(fd);
+ return (-1);
+ }
+
+ return (fd);
+}
+
+static int
+contract_latest(ctid_t *id)
+{
+ int cfd = 0;
+ int r = 0;
+ ct_stathdl_t st = {0};
+ ctid_t result = {0};
+
+ if ((cfd = open64(CTFS_ROOT "/process/latest", O_RDONLY)) == -1)
+ return (errno);
+ if ((r = ct_status_read(cfd, CTD_COMMON, &st)) != 0) {
+ (void) close(cfd);
+ return (r);
+ }
+
+ result = ct_status_get_id(st);
+ ct_status_free(st);
+ (void) close(cfd);
+
+ *id = result;
+ return (0);
+}
+
+static int
+close_on_exec(int fd)
+{
+ int flags = fcntl(fd, F_GETFD, 0);
+ if ((flags != -1) && (fcntl(fd, F_SETFD, flags | FD_CLOEXEC) != -1))
+ return (0);
+ return (-1);
+}
+
+static int
+contract_open(ctid_t ctid, const char *type, const char *file, int oflag)
+{
+ char path[PATH_MAX];
+ int n = 0;
+ int fd = 0;
+
+ if (type == NULL)
+ type = "all";
+
+ n = snprintf(path, PATH_MAX, CTFS_ROOT "/%s/%ld/%s", type, ctid, file);
+ if (n >= sizeof (path)) {
+ errno = ENAMETOOLONG;
+ return (-1);
+ }
+
+ fd = open64(path, oflag);
+ if (fd != -1) {
+ if (close_on_exec(fd) == -1) {
+ int err = errno;
+ (void) close(fd);
+ errno = err;
+ return (-1);
+ }
+ }
+ return (fd);
+}
+
+static int
+contract_abandon_id(ctid_t ctid)
+{
+ int fd = 0;
+ int err = 0;
+
+ fd = contract_open(ctid, "all", "ctl", O_WRONLY);
+ if (fd == -1)
+ return (errno);
+
+ err = ct_ctl_abandon(fd);
+ (void) close(fd);
+
+ return (err);
+}
+
+/*
+ * zdoor_fattach(zone,service,door,detach_only) is heavily borrowed from
+ * zonestatd. Basically this forks, zone_enter's the targeted zone,
+ * fattaches to /var/tmp/.<service> with the door you've opened.
+ * detach_only gets passed in on door_stop to fdetach in the targeted zone.
+ * Note that this code really does require all the contract calls, which are
+ * all the static functions preceding this (have a look at zone_enter; without
+ * that code zone_enter will kick back EINVAL).
+ */
+int
+zdoor_fattach(zoneid_t zoneid, const char *service, int door, int detach_only)
+{
+ int fd = 0;
+ int len = 0;
+ int pid = 0;
+ int stat = 0;
+ int tmpl_fd = 0;
+ char path[MAXPATHLEN] = {0};
+ ctid_t ct = -1;
+
+ if (zoneid < 0) {
+ zdoor_debug("zdoor_fattach: zoneid < 0");
+ return (ZDOOR_ARGS_ERROR);
+ }
+
+ if (service == NULL) {
+ zdoor_debug("zdoor_fattach: NULL service");
+ return (ZDOOR_ARGS_ERROR);
+ }
+
+ if ((tmpl_fd = init_template()) < 0) {
+ zdoor_warn("zdoor_fattach: init contract for %d:%s failed",
+ zoneid, service);
+ return (ZDOOR_ERROR);
+ }
+
+ len = snprintf(NULL, 0, ZDOOR_FMT_STR, service) + 1;
+ if (len > MAXPATHLEN)
+ return (ZDOOR_ARGS_ERROR);
+ (void) snprintf(path, len, ZDOOR_FMT_STR, service);
+
+ zdoor_info("zdoor_fattach: ensuring %s", path);
+
+ pid = fork();
+ if (pid < 0) {
+ (void) ct_tmpl_clear(tmpl_fd);
+ zdoor_error("zdoor_fattach: unable to fork for zone_enter: %s",
+ strerror(errno));
+ return (ZDOOR_OK);
+ }
+
+ if (pid == 0) {
+ zdoor_debug("zdoor_fattach(CHILD): starting");
+ (void) ct_tmpl_clear(tmpl_fd);
+ (void) close(tmpl_fd);
+ if (zone_enter(zoneid) != 0) {
+ zdoor_debug("zdoor_fattach(CHILD): zone_enter fail %s",
+ strerror(errno));
+ if (errno == EINVAL) {
+ _exit(0);
+ }
+ _exit(1);
+ }
+ (void) fdetach(path);
+ (void) unlink(path);
+ if (detach_only) {
+ zdoor_debug("zdoor_fattach(CHILD): detach only, done");
+ _exit(0);
+ }
+ fd = open(path, O_CREAT|O_RDWR, 0644);
+ if (fd < 0) {
+ zdoor_debug("zdoor_fattach(CHILD): open failed: %s",
+ strerror(errno));
+ _exit(2);
+ }
+ if (fattach(door, path) != 0) {
+ zdoor_debug("zdoor_fattach(CHILD): fattach failed: %s",
+ strerror(errno));
+ _exit(3);
+ }
+ _exit(0);
+ }
+ if (contract_latest(&ct) == -1)
+ ct = -1;
+ (void) ct_tmpl_clear(tmpl_fd);
+ (void) close(tmpl_fd);
+ (void) contract_abandon_id(ct);
+
+ zdoor_debug("zdoor_fattach: waiting for child...");
+ while (waitpid(pid, &stat, 0) != pid)
+ ;
+ if (WIFEXITED(stat) && WEXITSTATUS(stat) == 0) {
+ zdoor_debug(" child exited with success");
+ zdoor_debug("zdoor_fattach: returning ZDOOR_OK");
+ return (ZDOOR_OK);
+ }
+
+ zdoor_debug(" child exited with %d", WEXITSTATUS(stat));
+ zdoor_debug("zdoor_fattach: returning ZDOOR_ERROR");
+ return (ZDOOR_ERROR);
+}
+
+/*
+ * zdoor_zone_is_running(zone) returns 1 if the specified zone is running, or 0
+ * if it is any other state. It additionally eats any other errors it
+ * encounters and returns 0 upon encountering them.
+ */
+boolean_t
+zdoor_zone_is_running(zoneid_t zoneid)
+{
+ zone_state_t state;
+ char zone[ZONENAME_MAX];
+ if (zoneid < 0)
+ return (B_FALSE);
+
+ if (getzonenamebyid(zoneid, zone, ZONENAME_MAX) < 0)
+ return (B_FALSE);
+
+ if (!zone_get_state((char *)zone, &state) == Z_OK)
+ return (B_FALSE);
+
+ return (state == ZONE_STATE_RUNNING);
+}
+
+/*
+ * zdoor_cookie_create simply allocates and initializes
+ * memory. Returns NULL on any error.
+ */
+zdoor_cookie_t *
+zdoor_cookie_create(const char *zonename, const char *service,
+const void *biscuit)
+{
+ zdoor_cookie_t *cookie = NULL;
+
+ if (zonename == NULL || service == NULL)
+ return (NULL);
+
+ cookie = (zdoor_cookie_t *)calloc(1, sizeof (zdoor_cookie_t));
+ if (cookie == NULL) {
+ OUT_OF_MEMORY();
+ return (NULL);
+ }
+ cookie->zdc_biscuit = (void *)biscuit;
+ cookie->zdc_zonename = strdup((char *)zonename);
+ if (cookie->zdc_zonename == NULL) {
+ zdoor_cookie_free(cookie);
+ OUT_OF_MEMORY();
+ return (NULL);
+ }
+ cookie->zdc_service = strdup((char *)service);
+ if (cookie->zdc_service == NULL) {
+ zdoor_cookie_free(cookie);
+ OUT_OF_MEMORY();
+ return (NULL);
+ }
+
+ return (cookie);
+}
+
+/*
+ * zdoor_cookie_free(cookie) cleans up any memory associated with the
+ * specified cookie.
+ */
+void
+zdoor_cookie_free(zdoor_cookie_t *cookie)
+{
+ if (cookie == NULL)
+ return;
+
+ if (cookie->zdc_zonename != NULL) {
+ free(cookie->zdc_zonename);
+ cookie->zdc_zonename = NULL;
+ }
+
+ if (cookie->zdc_service != NULL) {
+ free(cookie->zdc_service);
+ cookie->zdc_service = NULL;
+ }
+
+ free(cookie);
+}
diff --git a/usr/src/lib/libzdoor/common/zdoor-int.h b/usr/src/lib/libzdoor/common/zdoor-int.h
new file mode 100644
index 0000000000..782452c426
--- /dev/null
+++ b/usr/src/lib/libzdoor/common/zdoor-int.h
@@ -0,0 +1,64 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2011 Joyent, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _ZDOOR_INT_H
+#define _ZDOOR_INT_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <pthread.h>
+#include <zdoor.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum zdoor_action_t {
+ ZDOOR_ACTION_NOOP,
+ ZDOOR_ACTION_STOP,
+ ZDOOR_ACTION_START
+} zdoor_action_t;
+
+struct zdoor_handle {
+ pthread_mutex_t zdh_lock;
+ void *zdh_zonecfg_handle;
+ void *zdh_ztree;
+};
+
+zdoor_cookie_t *zdoor_cookie_create(const char *zonename, const char *service,
+ const void *biscuit);
+
+void zdoor_cookie_free(zdoor_cookie_t *cookie);
+
+boolean_t zdoor_zone_is_running(zoneid_t zoneid);
+
+int zdoor_fattach(zoneid_t zoneid, const char *service, int door,
+ int detach_only);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZDOOR_INT_H */
diff --git a/usr/src/lib/libzdoor/common/zdoor.c b/usr/src/lib/libzdoor/common/zdoor.c
new file mode 100644
index 0000000000..f2996b4e2d
--- /dev/null
+++ b/usr/src/lib/libzdoor/common/zdoor.c
@@ -0,0 +1,437 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2011 Joyent, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <alloca.h>
+#include <door.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libzonecfg.h>
+#include <pthread.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <syslog.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stropts.h>
+#include <unistd.h>
+#include <zdoor.h>
+#include <zone.h>
+
+#include "zdoor-int.h"
+#include "zerror.h"
+#include "ztree.h"
+
+extern void *
+zonecfg_notify_bind(int(*func)(const char *zonename, zoneid_t zid,
+ const char *newstate, const char *oldstate,
+ hrtime_t when, void *p), void *p);
+
+extern void
+zonecfg_notify_unbind(void *handle);
+
+/*
+ * _callback(cookie, door_args...) is our private function that we tell
+ * the Solaris door API about. This function does some sanity checking on
+ * arguments and issues a callback to the owner of this door. That API
+ * will return us memory that needs to be sent back to the client on the
+ * other end of the door, but since the door_return API never gives you
+ * back control of the function, this does a simple alloca/memcpy and
+ * frees up the memory pointed to by the parent. While this really doesn't
+ * let a client do much other than pass a simple struct of primitives (or
+ * more likely more common a char *), that's the way the door API works,
+ * and so this isn't really imposing any restriction that didn't already
+ * need to be dealt with by someone. This is why the zdoor_result structure
+ * takes a char *, rather than a void * for the data pointer.
+ */
+static void
+_callback(void *cookie, char *argp, size_t arg_size, door_desc_t *dp,
+ uint_t n_desc)
+{
+ zdoor_result_t *result = NULL;
+ void *door_response = NULL;
+ int size = 0;
+ dtree_entry_t *entry = (dtree_entry_t *)cookie;
+
+ if (entry == NULL) {
+ zdoor_warn("_callback: NULL cookie? door_returning");
+ (void) door_return(NULL, 0, NULL, 0);
+ }
+
+ (void) pthread_mutex_lock(&entry->dte_parent->zte_parent->zdh_lock);
+ zdoor_debug("_callback: calling back with %p", entry->dte_cookie);
+ result = entry->dte_callback(entry->dte_cookie, argp, arg_size);
+ zdoor_debug("_callback: app callback returned %p", result);
+ (void) pthread_mutex_unlock(&entry->dte_parent->zte_parent->zdh_lock);
+
+ if (result == NULL) {
+ zdoor_debug("_callback: door_returning NULL");
+ (void) door_return(NULL, 0, NULL, 0);
+ }
+
+ if (result->zdr_data != NULL && result->zdr_size > 0) {
+ door_response = alloca(result->zdr_size);
+ if (door_response != NULL) {
+ size = result->zdr_size;
+ (void) memcpy(door_response,
+ (void *) result->zdr_data, size);
+ }
+ }
+
+ if (result->zdr_data != NULL)
+ free(result->zdr_data);
+ free(result);
+
+ zdoor_debug("_callback: door_returning %p, %d", door_response, size);
+ (void) door_return(door_response, size, NULL, 0);
+}
+
+static void
+zdoor_stop(dtree_entry_t *entry)
+{
+ zoneid_t zid = -1;
+
+ if (entry == NULL) {
+ zdoor_debug("zdoor_stop: NULL arguments");
+ return;
+ }
+
+ zdoor_debug("zdoor_stop: entry=%p, zone=%s, service=%s",
+ entry, entry->dte_parent->zte_zonename, entry->dte_service);
+
+ zid = getzoneidbyname(entry->dte_parent->zte_zonename);
+ (void) zdoor_fattach(zid, entry->dte_service, entry->dte_door, 1);
+ (void) door_revoke(entry->dte_door);
+ entry->dte_door = -1;
+
+ zdoor_debug("zdoor_stop returning");
+}
+
+/*
+ * zdoor_create is called both by the main API
+ * call zdoor_open, as well as by the zone_monitor code upon a zone restart
+ * (assuming it already has a door in it). This code assumes that the
+ * permissions were correct (e.g., the target door is not a GZ, that this
+ * program is being run out of the GZ), but does not assume that the target
+ * door file has not changed out from under us, so that is explicitly rechecked.
+ *
+ * This also assumes the parent has already locked handle.
+ */
+static int
+zdoor_create(dtree_entry_t *entry)
+{
+ int status = ZDOOR_OK;
+ zoneid_t zid = -1;
+ zdoor_handle_t handle = NULL;
+
+ if (entry == NULL) {
+ zdoor_debug("zdoor_create: NULL arguments");
+ return (ZDOOR_ARGS_ERROR);
+ }
+
+ zdoor_debug("zdoor_create: entry=%p, zone=%s, service=%s",
+ entry, entry->dte_parent->zte_zonename, entry->dte_service);
+
+ handle = entry->dte_parent->zte_parent;
+
+ zid = getzoneidbyname(entry->dte_parent->zte_zonename);
+ if (zid < 0) {
+ zdoor_info("zdoor_create: %s is a non-existient zone",
+ entry->dte_parent->zte_zonename);
+ return (ZDOOR_ERROR);
+ }
+ if (!zdoor_zone_is_running(zid)) {
+ zdoor_debug("zdoor_create: %s is not running",
+ entry->dte_parent->zte_zonename);
+ return (ZDOOR_ZONE_NOT_RUNNING);
+ }
+
+ entry->dte_door = door_create(_callback, entry, 0);
+ zdoor_info("zdoor_create: door_create returned %d", entry->dte_door);
+ if (entry->dte_door < 0) {
+ zdoor_stop(entry);
+ return (ZDOOR_ERROR);
+ }
+
+ status = zdoor_fattach(zid, entry->dte_service, entry->dte_door, 0);
+
+ zdoor_debug("zdoor_create: returning %d", status);
+ return (status);
+}
+
+
+/*
+ * door_visitor(entry) is a callback from the ztree code that checks whether
+ * or not we should be taking some action on a given door. Note that the
+ * callpath to this API is:
+ * SYSTEM ->
+ * zone_monitor ->
+ * ztree_walk ->
+ * door_visitor
+ *
+ * Which is important to note that this API assumes that all things needing
+ * locking are locked by a parent caller (which is the zone_monitor).
+ */
+static void
+zdoor_visitor(dtree_entry_t *entry)
+{
+ if (entry == NULL) {
+ zdoor_info("zdoor_visitor: entered with NULL entry");
+ return;
+ }
+
+ zdoor_debug("zdoor_visitor: entered for entry=%p, service=%s",
+ entry, entry->dte_service);
+
+ if (entry->dte_parent->zte_action == ZDOOR_ACTION_STOP) {
+ zdoor_debug(" stopping zdoor");
+ zdoor_stop(entry);
+ } else if (entry->dte_parent->zte_action == ZDOOR_ACTION_START) {
+ zdoor_debug(" starting zdoor");
+ if (zdoor_create(entry) != ZDOOR_OK) {
+ zdoor_error("door_visitor: Unable to restart zdoor\n");
+ }
+ }
+}
+
+/*
+ * zone_monitor(zonename, zid, newstate, oldstate, when, cookie) is our
+ * registered callback with libzonecfg to notify us of any changes to a
+ * given zone. This activates a walk on all doors for a zone iff the state
+ * is changing from running or into running.
+ */
+static int
+zone_monitor(const char *zonename, zoneid_t zid, const char *newstate,
+ const char *oldstate, hrtime_t when, void *p)
+{
+ zdoor_handle_t handle = (zdoor_handle_t)p;
+ ztree_entry_t *entry = NULL;
+
+ if (handle == NULL) {
+ zdoor_warn("zone_monitor: entered with NULL handle?");
+ return (-1);
+ }
+
+ zdoor_info("zone_monitor: zone=%s, zid=%d, newst=%s, oldst=%s, p=%p",
+ zonename, zid, newstate, oldstate, p);
+
+ (void) pthread_mutex_lock(&(handle->zdh_lock));
+ entry = ztree_zone_find(handle, zonename);
+ if (entry != NULL) {
+ zdoor_debug(" found entry in ztree");
+ entry->zte_action = ZDOOR_ACTION_NOOP;
+ if (strcmp("running", newstate) == 0) {
+ if (strcmp("ready", oldstate) == 0)
+ entry->zte_action = ZDOOR_ACTION_START;
+ } else if (strcmp("shutting_down", newstate) == 0) {
+ if (strcmp("running", oldstate) == 0)
+ entry->zte_action = ZDOOR_ACTION_STOP;
+ }
+ zdoor_debug(" set state to: %d", entry->zte_action);
+ if (entry->zte_action != ZDOOR_ACTION_NOOP)
+ ztree_walk_doors(handle, zonename);
+ }
+ (void) pthread_mutex_unlock(&(handle->zdh_lock));
+
+ zdoor_info("zone_monitor: returning");
+ return (0);
+}
+
+zdoor_handle_t
+zdoor_handle_init()
+{
+ zdoor_handle_t handle = NULL;
+
+ zdoor_debug("zdoor_handle_init entered");
+
+ handle = (zdoor_handle_t)calloc(1, sizeof (struct zdoor_handle));
+ if (handle == NULL) {
+ OUT_OF_MEMORY();
+ return (NULL);
+ }
+
+ (void) pthread_mutex_init(&(handle->zdh_lock), NULL);
+ handle->zdh_zonecfg_handle = zonecfg_notify_bind(zone_monitor, handle);
+ if (handle->zdh_zonecfg_handle == NULL) {
+ zdoor_error("zonecfg_notify_bind failure: %s", strerror(errno));
+ return (NULL);
+ }
+
+ zdoor_debug("zdoor_handle_init returning %p", handle);
+ return (handle);
+}
+
+void
+zdoor_handle_destroy(zdoor_handle_t handle)
+{
+ if (handle == NULL) {
+ zdoor_debug("zdoor_handle_destroy: NULL arguments");
+ return;
+ }
+
+ zdoor_debug("zdoor_handle_destroy: handle=%p", handle);
+
+ (void) pthread_mutex_lock(&(handle->zdh_lock));
+ zonecfg_notify_unbind(handle->zdh_zonecfg_handle);
+ (void) pthread_mutex_unlock(&(handle->zdh_lock));
+ (void) pthread_mutex_destroy(&(handle->zdh_lock));
+ free(handle);
+}
+
+/*
+ * zdoor_open(zone, service, biscuit, callback) is the main public facing API in
+ * libzdoor. It will open a door with the name .[service] under
+ * [zonepath]/root/var/tmp, where [zonepath] is resolved on the fly. Note this
+ * API can only be invoked from the global zone, and will not allow you to open
+ * a zdoor in the global zone.
+ */
+int
+zdoor_open(zdoor_handle_t handle, const char *zonename, const char *service,
+ void *biscuit, zdoor_callback callback)
+{
+ zdoor_cookie_t *zdoor_cookie = NULL;
+ int rc = -1;
+ int status = ZDOOR_OK;
+ zoneid_t zid = -1;
+ dtree_entry_t *entry = NULL;
+
+ if (handle == NULL || zonename == NULL ||
+ service == NULL || callback == NULL) {
+ zdoor_debug("zdoor_open: NULL arguments");
+ return (ZDOOR_ARGS_ERROR);
+ }
+ zdoor_debug("zdoor_open: entered: handle=%p, zone=%s, service=%s",
+ handle, zonename, service);
+
+ if (getzoneid() != GLOBAL_ZONEID) {
+ zdoor_warn("zdoor_open: not invoked from global zone");
+ return (ZDOOR_NOT_GLOBAL_ZONE);
+ }
+
+
+ zid = getzoneidbyname(zonename);
+ if (zid < 0) {
+ zdoor_info("zdoor_open: %s is a non-existent zone", zonename);
+ return (ZDOOR_ARGS_ERROR);
+ }
+
+ if (zid == GLOBAL_ZONEID) {
+ zdoor_warn("zdoor_open: zdoors not allowed in global zone");
+ return (ZDOOR_ZONE_FORBIDDEN);
+ }
+
+ if (!zdoor_zone_is_running(zid)) {
+ zdoor_info("zdoor_open: %s is not running", zonename);
+ return (ZDOOR_ZONE_NOT_RUNNING);
+ }
+
+ zdoor_cookie = zdoor_cookie_create(zonename, service, biscuit);
+ if (zdoor_cookie == NULL) {
+ OUT_OF_MEMORY();
+ return (ZDOOR_OUT_OF_MEMORY);
+ }
+
+ (void) pthread_mutex_lock(&(handle->zdh_lock));
+ rc = ztree_zone_add(handle, zonename, zdoor_visitor);
+ if (rc != ZTREE_SUCCESS && rc != ZTREE_ALREADY_EXISTS) {
+ zdoor_debug("zdoor_open: unable to add zone to ztree: %d", rc);
+ status = ZDOOR_ERROR;
+ goto out;
+ }
+ rc = ztree_door_add(handle, zonename, service, callback,
+ zdoor_cookie);
+ if (rc != ZTREE_SUCCESS) {
+ zdoor_debug("zdoor_open: unable to add door to ztree: %d", rc);
+ if (rc == ZTREE_ALREADY_EXISTS) {
+ zdoor_warn("service %s already has a zdoor", service);
+ }
+ status = ZDOOR_ERROR;
+ goto out;
+ }
+
+ entry = ztree_door_find(handle, zonename, service);
+ if (entry == NULL) {
+ zdoor_debug("zdoor_open: unable to find door in ztree?");
+ status = ZDOOR_ERROR;
+ goto out;
+ }
+ if (zdoor_create(entry) != ZDOOR_OK) {
+ zdoor_info("zdoor_open: zdoor_create failed.");
+ status = ZDOOR_ERROR;
+ goto out;
+ }
+out:
+ if (status != ZDOOR_OK) {
+ zdoor_debug("zdoor_open: status not ok, stopping and cleaning");
+ zdoor_stop(entry);
+ ztree_door_remove(handle, entry);
+ zdoor_cookie_free(zdoor_cookie);
+ }
+ (void) pthread_mutex_unlock(&(handle->zdh_lock));
+ zdoor_debug("zdoor_open: returning %d", status);
+ return (status);
+}
+
+/*
+ * zdoor_close(zone, service) unregisters a previously created zdoor, and
+ * returns the biscuit provided at creation time, so the caller can free it.
+ * Returns NULL on any error.
+ */
+void *
+zdoor_close(zdoor_handle_t handle, const char *zonename, const char *service)
+{
+ dtree_entry_t *entry = NULL;
+ zdoor_cookie_t *cookie = NULL;
+ void *biscuit = NULL;
+
+ if (handle == NULL || zonename == NULL || service == NULL) {
+ zdoor_debug("zdoor_close: NULL arguments");
+ return (NULL);
+ }
+
+ zdoor_debug("zdoor_close: entered handle=%p, zone=%s, service=%s",
+ handle, zonename, service);
+
+ (void) pthread_mutex_lock(&(handle->zdh_lock));
+
+ entry = ztree_door_find(handle, zonename, service);
+ if (entry != NULL) {
+ zdoor_debug("zdoor_close: found door in ztree, stopping");
+ zdoor_stop(entry);
+ cookie = ztree_door_remove(handle, entry);
+ if (cookie != NULL) {
+ biscuit = cookie->zdc_biscuit;
+ zdoor_cookie_free(cookie);
+ }
+ } else {
+ zdoor_debug("zdoor_close: didn't find door in ztree");
+ }
+
+ (void) pthread_mutex_unlock(&(handle->zdh_lock));
+
+ zdoor_debug("zdoor_close: returning %p", biscuit);
+ return (biscuit);
+}
diff --git a/usr/src/lib/libzdoor/common/zerror.c b/usr/src/lib/libzdoor/common/zerror.c
new file mode 100644
index 0000000000..5ccb449e1b
--- /dev/null
+++ b/usr/src/lib/libzdoor/common/zerror.c
@@ -0,0 +1,117 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2011 Joyent, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <pthread.h>
+#include <sys/types.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "zerror.h"
+
+static const char *PREFIX = "%s ZDOOR:%s:T(%d): ";
+
+static const char *DEBUG_ENV_VAR = "ZDOOR_TRACE";
+
+static boolean_t
+is_debug_enabled()
+{
+ boolean_t enabled = B_FALSE;
+ const char *_envp = getenv(DEBUG_ENV_VAR);
+ if (_envp != NULL && atoi(_envp) >= 2)
+ enabled = B_TRUE;
+
+ return (enabled);
+}
+
+static boolean_t
+is_info_enabled()
+{
+ boolean_t enabled = B_FALSE;
+ const char *_envp = getenv(DEBUG_ENV_VAR);
+ if (_envp != NULL && atoi(_envp) >= 1)
+ enabled = B_TRUE;
+
+ return (enabled);
+}
+
+void
+zdoor_debug(const char *fmt, ...)
+{
+ va_list alist;
+
+ if (!is_debug_enabled())
+ return;
+
+ va_start(alist, fmt);
+
+ (void) fprintf(stderr, PREFIX, __TIME__, "DEBUG", pthread_self());
+ (void) vfprintf(stderr, fmt, alist);
+ (void) fprintf(stderr, "\n");
+ va_end(alist);
+}
+
+void
+zdoor_info(const char *fmt, ...)
+{
+ va_list alist;
+
+ if (!is_info_enabled())
+ return;
+
+ va_start(alist, fmt);
+
+ (void) fprintf(stderr, PREFIX, __TIME__, "INFO", pthread_self());
+ (void) vfprintf(stderr, fmt, alist);
+ (void) fprintf(stderr, "\n");
+ va_end(alist);
+}
+
+void
+zdoor_warn(const char *fmt, ...)
+{
+ va_list alist;
+
+ va_start(alist, fmt);
+
+ (void) fprintf(stderr, PREFIX, __TIME__, "WARN", pthread_self());
+ (void) vfprintf(stderr, fmt, alist);
+ (void) fprintf(stderr, "\n");
+ va_end(alist);
+}
+
+void
+zdoor_error(const char *fmt, ...)
+{
+ va_list alist;
+
+ va_start(alist, fmt);
+
+ (void) fprintf(stderr, PREFIX, __TIME__, "ERROR", pthread_self());
+ (void) vfprintf(stderr, fmt, alist);
+ (void) fprintf(stderr, "\n");
+ va_end(alist);
+}
diff --git a/usr/src/lib/libzdoor/common/zerror.h b/usr/src/lib/libzdoor/common/zerror.h
new file mode 100644
index 0000000000..afc848fcea
--- /dev/null
+++ b/usr/src/lib/libzdoor/common/zerror.h
@@ -0,0 +1,48 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2011 Joyent, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _ZERROR_H
+#define _ZERROR_H
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern void zdoor_debug(const char *fmt, ...);
+extern void zdoor_info(const char *fmt, ...);
+extern void zdoor_warn(const char *fmt, ...);
+extern void zdoor_error(const char *fmt, ...);
+
+#define OUT_OF_MEMORY() \
+ zdoor_error("Out of Memory at %s:%d", __FILE__, __LINE__)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZERROR_H */
diff --git a/usr/src/lib/libzdoor/common/ztree.c b/usr/src/lib/libzdoor/common/ztree.c
new file mode 100644
index 0000000000..25c67f62fb
--- /dev/null
+++ b/usr/src/lib/libzdoor/common/ztree.c
@@ -0,0 +1,344 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2011 Joyent, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <search.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "zerror.h"
+#include "ztree.h"
+
+
+/*
+ * ztree is just a helpful wrapper over a tsearch binary tree that deals with
+ * all of the libzdoor types.
+ *
+ * So what this ztree actually is is a tree of trees. The outer tree is a tree
+ * of zones, and each node holds a tree of doors.
+ */
+
+/*
+ * _ztree_compare(p1, p2) is the tsearch callback for comparing the "outer"
+ * tree (e.g., the one of zones).
+ */
+static int
+_ztree_compare(const void *p1, const void *p2)
+{
+ ztree_entry_t *z1 = (ztree_entry_t *)p1;
+ ztree_entry_t *z2 = (ztree_entry_t *)p2;
+
+ if (z1 == NULL && z2 == NULL)
+ return (0);
+ if (z1 == NULL && z2 != NULL)
+ return (-1);
+ if (z1 != NULL && z2 == NULL)
+ return (1);
+
+ return (strcmp(z1->zte_zonename, z2->zte_zonename));
+}
+
+/*
+ * _dtree_compare(p1, p2) is the tsearch callback for comparing the "inner"
+ * tree (e.g., the one of doors).
+ */
+static int
+_dtree_compare(const void *p1, const void *p2)
+{
+ dtree_entry_t *d1 = (dtree_entry_t *)p1;
+ dtree_entry_t *d2 = (dtree_entry_t *)p2;
+
+ if (d1 == NULL && d2 == NULL)
+ return (0);
+ if (d1 == NULL && d2 != NULL)
+ return (-1);
+ if (d1 != NULL && d2 == NULL)
+ return (1);
+
+ return (strcmp(d1->dte_service, d2->dte_service));
+}
+
+static void
+ztree_entry_free(ztree_entry_t *entry)
+{
+ if (entry == NULL)
+ return;
+
+ if (entry->zte_zonename != NULL)
+ free(entry->zte_zonename);
+
+ free(entry);
+}
+
+static void
+dtree_entry_free(dtree_entry_t *entry)
+{
+ if (entry == NULL)
+ return;
+
+ if (entry->dte_service)
+ free(entry->dte_service);
+
+ free(entry);
+}
+
+
+/*
+ * ztree_zone_add inserts a new zone into the tree iff
+ * there is not already an entry for that zone. This method returns one of
+ * four possible return codes, ZTREE_SUCCESS on :), ZTREE_ARGUMENT_ERROR if
+ * zone is NULL, ZTREE_ERROR if there is internal failure (e.g., OOM), and
+ * ZTREE_ALREADY_EXISTS if the zone is already in the tree.
+ */
+int
+ztree_zone_add(struct zdoor_handle *handle, const char *zonename,
+ztree_door_visitor visitor)
+{
+ ztree_entry_t *entry = NULL;
+ void *ret = NULL;
+ int status = ZTREE_SUCCESS;
+
+ if (handle == NULL || zonename == NULL)
+ return (ZTREE_ARGUMENT_ERROR);
+
+ entry = (ztree_entry_t *)calloc(1, sizeof (ztree_entry_t));
+ if (entry == NULL) {
+ OUT_OF_MEMORY();
+ return (ZTREE_ERROR);
+ }
+ entry->zte_zonename = strdup(zonename);
+ if (entry->zte_zonename == NULL) {
+ ztree_entry_free(entry);
+ OUT_OF_MEMORY();
+ return (ZTREE_ERROR);
+ }
+ entry->zte_action = ZDOOR_ACTION_NOOP;
+ entry->zte_parent = handle;
+ entry->zte_visitor = visitor;
+
+ ret = tsearch(entry, &(handle->zdh_ztree), _ztree_compare);
+ if (ret == NULL) {
+ ztree_entry_free(entry);
+ status = ZTREE_ERROR;
+ OUT_OF_MEMORY();
+ } else if ((*(ztree_entry_t **)ret) != entry) {
+ ztree_entry_free(entry);
+ status = ZTREE_ALREADY_EXISTS;
+ }
+
+ return (status);
+}
+
+
+/*
+ * ztree_zone_find returns the entry in the "outer" tree representing
+ * this zone, if it exists, NULL otherwise.
+ */
+ztree_entry_t *
+ztree_zone_find(struct zdoor_handle *handle, const char *zonename)
+{
+ ztree_entry_t key = {0};
+ void *ret = NULL;
+
+ if (handle == NULL || zonename == NULL)
+ return (NULL);
+
+ key.zte_zonename = (char *)zonename;
+ ret = tfind(&key, &(handle->zdh_ztree), _ztree_compare);
+
+ return (ret != NULL ? *(ztree_entry_t **)ret : NULL);
+}
+
+
+/*
+ * ztree_zone_remove removes an entry from the "outer" zone iff the
+ * zone exists. The cookie set by the creator is returned.
+ */
+void
+ztree_zone_remove(struct zdoor_handle *handle, ztree_entry_t *entry)
+{
+ if (handle == NULL || entry == NULL)
+ return;
+
+ tdelete(entry, &(handle->zdh_ztree), _ztree_compare);
+ ztree_entry_free(entry);
+}
+
+
+/*
+ * ztree_door_add inserts a new door into the inner tree iff
+ * there is not already an entry for that door. This method returns one of
+ * four possible return codes, ZTREE_SUCCESS on :), ZTREE_ARGUMENT_ERROR if
+ * zone is NULL, ZTREE_ERROR if there is internal failure (e.g., OOM), and
+ * ZTREE_ALREADY_EXISTS if the door is already in the tree.
+ */
+int
+ztree_door_add(struct zdoor_handle *handle, const char *zonename,
+const char *service, zdoor_callback callback, zdoor_cookie_t *cookie)
+{
+ dtree_entry_t *entry = NULL;
+ ztree_entry_t *znode = NULL;
+ void *ret = NULL;
+ int status = ZTREE_SUCCESS;
+
+ if (handle == NULL || zonename == NULL || service == NULL)
+ return (ZTREE_ARGUMENT_ERROR);
+
+ znode = ztree_zone_find(handle, zonename);
+ if (znode == NULL)
+ return (ZTREE_NOT_FOUND);
+
+ entry = (dtree_entry_t *)calloc(1, sizeof (dtree_entry_t));
+ if (entry == NULL) {
+ OUT_OF_MEMORY();
+ return (ZTREE_ERROR);
+ }
+ entry->dte_parent = znode;
+ entry->dte_callback = callback;
+ entry->dte_cookie = cookie;
+ entry->dte_service = strdup(service);
+ if (entry->dte_service == NULL) {
+ free(entry);
+ OUT_OF_MEMORY();
+ return (ZTREE_ERROR);
+ }
+
+ ret = tsearch(entry, &(znode->zte_door_tree), _dtree_compare);
+ if (ret == NULL) {
+ dtree_entry_free(entry);
+ OUT_OF_MEMORY();
+ status = ZTREE_ERROR;
+ } else if ((*(dtree_entry_t **)ret) != entry) {
+ dtree_entry_free(entry);
+ status = ZTREE_ALREADY_EXISTS;
+ } else {
+ znode->zte_num_doors++;
+ }
+
+ return (status);
+}
+
+
+/*
+ * ztree_door_find returns the entry in the "inner" tree
+ * representing this zone, if it exists, NULL otherwise.
+ */
+dtree_entry_t *
+ztree_door_find(struct zdoor_handle *handle, const char *zonename,
+const char *service)
+{
+ dtree_entry_t key = {0};
+ ztree_entry_t *znode = NULL;
+ void *ret = NULL;
+
+ if (handle == NULL || zonename == NULL || service == NULL)
+ return (NULL);
+
+ znode = ztree_zone_find(handle, zonename);
+ if (znode == NULL)
+ return (NULL);
+
+ key.dte_service = (char *)service;
+ ret = tfind(&key, &(znode->zte_door_tree), _dtree_compare);
+
+ return (ret != NULL ? *(dtree_entry_t **)ret : NULL);
+}
+
+
+/*
+ * ztree_door_remove(zone, door) removes a node from the inner tree iff
+ * both the door and zone exist. Note this frees the node as well. The
+ * cookie set by the creator is returned.
+ */
+zdoor_cookie_t *
+ztree_door_remove(struct zdoor_handle *handle, dtree_entry_t *entry)
+{
+ zdoor_cookie_t *cookie = NULL;
+ ztree_entry_t *znode = NULL;
+
+ if (handle == NULL || entry == NULL)
+ return (NULL);
+
+ znode = entry->dte_parent;
+ cookie = entry->dte_cookie;
+
+ tdelete(entry, &(znode->zte_door_tree), _dtree_compare);
+ dtree_entry_free(entry);
+
+ znode->zte_num_doors--;
+ if (znode->zte_num_doors == 0) {
+ zdoor_debug("ztree: zone %s has no doors left, removing",
+ znode->zte_zonename);
+ ztree_zone_remove(handle, znode);
+ }
+
+ return (cookie);
+}
+
+
+/*
+ * _ztree_door_visitor(nodep, which, depth) is the private function we use
+ * to wrap up tsearch's goofy API. We're really just using this to ensure
+ * zdoor doesn't get called > 1 times for a given entity in the ztree.
+ */
+static void
+_ztree_door_visitor(const void *nodep, const VISIT which, const int depth)
+{
+ dtree_entry_t *entry = *(dtree_entry_t **)nodep;
+
+ if (entry == NULL)
+ return;
+
+ switch (which) {
+ case preorder:
+ case endorder:
+ break;
+ case postorder:
+ case leaf:
+ if (entry->dte_parent->zte_visitor != NULL)
+ entry->dte_parent->zte_visitor(entry);
+ break;
+ }
+}
+
+
+/*
+ * ztree_walk_doors(zone) will proceed to visit every node in the "inner" tree
+ * for this zone, and callback the visitor that was registered on tree creation.
+ */
+void
+ztree_walk_doors(struct zdoor_handle *handle, const char *zonename)
+{
+ ztree_entry_t *znode = NULL;
+
+ if (handle == NULL || zonename == NULL)
+ return;
+
+ znode = ztree_zone_find(handle, zonename);
+ if (znode == NULL)
+ return;
+
+ twalk(znode->zte_door_tree, _ztree_door_visitor);
+}
diff --git a/usr/src/lib/libzdoor/common/ztree.h b/usr/src/lib/libzdoor/common/ztree.h
new file mode 100644
index 0000000000..b46dace287
--- /dev/null
+++ b/usr/src/lib/libzdoor/common/ztree.h
@@ -0,0 +1,88 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2011 Joyent, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _ZTREE_H
+#define _ZTREE_H
+
+#include <zdoor.h>
+#include <zone.h>
+#include "zdoor-int.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct dtree_entry;
+
+typedef void (*ztree_door_visitor)(struct dtree_entry *entry);
+
+typedef struct ztree_entry {
+ char *zte_zonename;
+ zdoor_action_t zte_action;
+ int zte_num_doors;
+ void *zte_door_tree;
+ ztree_door_visitor zte_visitor;
+ struct zdoor_handle *zte_parent;
+} ztree_entry_t;
+
+typedef struct dtree_entry {
+ char *dte_service;
+ int dte_door;
+ zdoor_callback dte_callback;
+ zdoor_cookie_t *dte_cookie;
+ ztree_entry_t *dte_parent;
+} dtree_entry_t;
+
+#define ZTREE_SUCCESS 0
+#define ZTREE_ERROR -1
+#define ZTREE_ARGUMENT_ERROR -2
+#define ZTREE_ALREADY_EXISTS -3
+#define ZTREE_NOT_FOUND -4
+
+extern int ztree_zone_add(struct zdoor_handle *handle,
+ const char *zonename, ztree_door_visitor visitor);
+
+extern ztree_entry_t *ztree_zone_find(struct zdoor_handle *handle,
+ const char *zonename);
+
+extern void ztree_zone_remove(struct zdoor_handle *handle,
+ ztree_entry_t *entry);
+
+extern int ztree_door_add(struct zdoor_handle *handle, const char *zonename,
+ const char *service, zdoor_callback callback, zdoor_cookie_t *cookie);
+
+extern dtree_entry_t *ztree_door_find(struct zdoor_handle *handle,
+ const char *zonename, const char *service);
+
+extern zdoor_cookie_t *ztree_door_remove(struct zdoor_handle *handle,
+ dtree_entry_t *entry);
+
+extern void ztree_walk_doors(struct zdoor_handle *handle, const char *zonename);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZTREE_H */
diff --git a/usr/src/cmd/ssh/libopenbsd-compat/sparc/Makefile b/usr/src/lib/libzdoor/i386/Makefile
index fde6250470..2bfe8001d9 100644
--- a/usr/src/cmd/ssh/libopenbsd-compat/sparc/Makefile
+++ b/usr/src/lib/libzdoor/i386/Makefile
@@ -19,7 +19,8 @@
#
# CDDL HEADER END
#
-# Copyright 2003 Sun Microsystems, Inc. All rights reserved.
+#
+# Copyright 2011 Joyent, Inc. All rights reserved.
# Use is subject to license terms.
#
# ident "%Z%%M% %I% %E% SMI"
@@ -27,4 +28,4 @@
include ../Makefile.com
-install: all
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/libzfs/common/libzfs.h b/usr/src/lib/libzfs/common/libzfs.h
index 82acd00609..29edae5109 100644
--- a/usr/src/lib/libzfs/common/libzfs.h
+++ b/usr/src/lib/libzfs/common/libzfs.h
@@ -66,6 +66,7 @@ typedef enum zfs_error {
EZFS_PROPTYPE, /* property does not apply to dataset type */
EZFS_PROPNONINHERIT, /* property is not inheritable */
EZFS_PROPSPACE, /* bad quota or reservation */
+ EZFS_PROPCACHED, /* prop unavail since cachedprops flag set */
EZFS_BADTYPE, /* dataset is not of appropriate type */
EZFS_BUSY, /* pool or dataset is busy */
EZFS_EXISTS, /* pool or dataset already exists */
@@ -186,6 +187,7 @@ extern libzfs_handle_t *zpool_get_handle(zpool_handle_t *);
extern libzfs_handle_t *zfs_get_handle(zfs_handle_t *);
extern void libzfs_print_on_error(libzfs_handle_t *, boolean_t);
+extern void libzfs_set_cachedprops(libzfs_handle_t *, boolean_t);
extern void zfs_save_arguments(int argc, char **, char *, int);
extern int zpool_log_history(libzfs_handle_t *, const char *);
diff --git a/usr/src/lib/libzfs/common/libzfs_dataset.c b/usr/src/lib/libzfs/common/libzfs_dataset.c
index aa4a0e9115..9c3c65d64b 100644
--- a/usr/src/lib/libzfs/common/libzfs_dataset.c
+++ b/usr/src/lib/libzfs/common/libzfs_dataset.c
@@ -300,6 +300,10 @@ get_stats_ioctl(zfs_handle_t *zhp, zfs_cmd_t *zc)
{
libzfs_handle_t *hdl = zhp->zfs_hdl;
+ if (hdl->libzfs_cachedprops &&
+ libzfs_cmd_set_cachedprops(hdl, zc) != 0)
+ return (-1);
+
(void) strlcpy(zc->zc_name, zhp->zfs_name, sizeof (zc->zc_name));
while (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, zc) != 0) {
@@ -2002,6 +2006,11 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src,
case ZFS_PROP_NORMALIZE:
case ZFS_PROP_UTF8ONLY:
case ZFS_PROP_CASE:
+ if (zhp->zfs_hdl->libzfs_cachedprops) {
+ return (zfs_error(zhp->zfs_hdl, EZFS_PROPCACHED,
+ "property unavailable since not cached"));
+ }
+
if (!zfs_prop_valid_for_type(prop, zhp->zfs_head_type) ||
zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0)
return (-1);
@@ -2241,6 +2250,7 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
const char *str;
const char *strval;
boolean_t received = zfs_is_recvd_props_mode(zhp);
+ boolean_t printerr;
/*
* Check to see if this property applies to our object
@@ -2251,6 +2261,16 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
if (received && zfs_prop_readonly(prop))
return (-1);
+ if (zhp->zfs_hdl->libzfs_cachedprops &&
+ zfs_prop_cacheable(prop)) {
+ printerr = zhp->zfs_hdl->libzfs_printerr;
+ libzfs_print_on_error(zhp->zfs_hdl, B_FALSE);
+ (void) zfs_error(zhp->zfs_hdl, EZFS_PROPCACHED,
+ "property unavailable since not cached");
+ libzfs_print_on_error(zhp->zfs_hdl, printerr);
+ return (-1);
+ }
+
if (src)
*src = ZPROP_SRC_NONE;
diff --git a/usr/src/lib/libzfs/common/libzfs_impl.h b/usr/src/lib/libzfs/common/libzfs_impl.h
index a3fcaab075..4d34e68492 100644
--- a/usr/src/lib/libzfs/common/libzfs_impl.h
+++ b/usr/src/lib/libzfs/common/libzfs_impl.h
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Pawel Jakub Dawidek. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
*/
@@ -80,6 +81,7 @@ struct libzfs_handle {
libzfs_fru_t **libzfs_fru_hash;
libzfs_fru_t *libzfs_fru_list;
char libzfs_chassis_id[256];
+ boolean_t libzfs_cachedprops;
};
#define ZFSSHARE_MISS 0x01 /* Didn't find entry in cache */
@@ -151,6 +153,7 @@ int get_dependents(libzfs_handle_t *, boolean_t, const char *, char ***,
size_t *);
zfs_handle_t *make_dataset_handle_zc(libzfs_handle_t *, zfs_cmd_t *);
zfs_handle_t *make_dataset_simple_handle_zc(zfs_handle_t *, zfs_cmd_t *);
+int libzfs_cmd_set_cachedprops(libzfs_handle_t *, zfs_cmd_t *);
int zprop_parse_value(libzfs_handle_t *, nvpair_t *, int, zfs_type_t,
nvlist_t *, char **, uint64_t *, const char *);
diff --git a/usr/src/lib/libzfs/common/libzfs_iter.c b/usr/src/lib/libzfs/common/libzfs_iter.c
index fb6a475d34..155161d661 100644
--- a/usr/src/lib/libzfs/common/libzfs_iter.c
+++ b/usr/src/lib/libzfs/common/libzfs_iter.c
@@ -24,6 +24,7 @@
* Copyright (c) 2013, 2015 by Delphix. All rights reserved.
* Copyright (c) 2012 Pawel Jakub Dawidek. All rights reserved.
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#include <stdio.h>
@@ -111,6 +112,10 @@ zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data)
if (zhp->zfs_type != ZFS_TYPE_FILESYSTEM)
return (0);
+ if (zhp->zfs_hdl->libzfs_cachedprops &&
+ libzfs_cmd_set_cachedprops(zhp->zfs_hdl, &zc) != 0)
+ return (-1);
+
if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0)
return (-1);
@@ -121,9 +126,8 @@ zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data)
* that the pool has since been removed.
*/
if ((nzhp = make_dataset_handle_zc(zhp->zfs_hdl,
- &zc)) == NULL) {
+ &zc)) == NULL)
continue;
- }
if ((ret = func(nzhp, data)) != 0) {
zcmd_free_nvlists(&zc);
@@ -149,10 +153,17 @@ zfs_iter_snapshots(zfs_handle_t *zhp, boolean_t simple, zfs_iter_f func,
zhp->zfs_type == ZFS_TYPE_BOOKMARK)
return (0);
+
+ if (zhp->zfs_hdl->libzfs_cachedprops &&
+ libzfs_cmd_set_cachedprops(zhp->zfs_hdl, &zc) != 0)
+ return (-1);
+
zc.zc_simple = simple;
+
if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0)
return (-1);
+
while ((ret = zfs_do_list_ioctl(zhp, ZFS_IOC_SNAPSHOT_LIST_NEXT,
&zc)) == 0) {
diff --git a/usr/src/lib/libzfs/common/libzfs_pool.c b/usr/src/lib/libzfs/common/libzfs_pool.c
index cfe6db8b4d..9e99903e8d 100644
--- a/usr/src/lib/libzfs/common/libzfs_pool.c
+++ b/usr/src/lib/libzfs/common/libzfs_pool.c
@@ -364,6 +364,7 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
default:
(void) snprintf(buf, len, "%llu", intval);
}
+
break;
case PROP_TYPE_INDEX:
diff --git a/usr/src/lib/libzfs/common/libzfs_util.c b/usr/src/lib/libzfs/common/libzfs_util.c
index 7dd587a537..848663e1f8 100644
--- a/usr/src/lib/libzfs/common/libzfs_util.c
+++ b/usr/src/lib/libzfs/common/libzfs_util.c
@@ -84,6 +84,9 @@ libzfs_error_description(libzfs_handle_t *hdl)
return (dgettext(TEXT_DOMAIN, "property cannot be inherited"));
case EZFS_PROPSPACE:
return (dgettext(TEXT_DOMAIN, "invalid quota or reservation"));
+ case EZFS_PROPCACHED:
+ return (dgettext(TEXT_DOMAIN, "property unavailable since "
+ "cachedprops flag set"));
case EZFS_BADTYPE:
return (dgettext(TEXT_DOMAIN, "operation not applicable to "
"datasets of this type"));
@@ -612,6 +615,42 @@ libzfs_print_on_error(libzfs_handle_t *hdl, boolean_t printerr)
hdl->libzfs_printerr = printerr;
}
+/*
+ * Set the value of the cachedprops flag. If the cachedprops flag is set,
+ * operations which get ZFS properties will only retrieve a property if the
+ * property is cached somewhere in memory.
+ *
+ * Consumers of libzfs should take care when setting this flag, as they will
+ * prevent themselves from listing the full set of ZFS properties.
+ *
+ * ZFS properties which always require disk I/O are ZPL properties (utf8only,
+ * normalization, etc.) and the volsize and volblocksize properties for volumes.
+ */
+void
+libzfs_set_cachedprops(libzfs_handle_t *hdl, boolean_t cachedprops)
+{
+ hdl->libzfs_cachedprops = cachedprops;
+}
+
+/*
+ * Adds a src nvlist to a zfs_cmd_t which specifies that only cached (i.e., will
+ * not require a disk access) properties should be retrieved.
+ */
+int
+libzfs_cmd_set_cachedprops(libzfs_handle_t *hdl, zfs_cmd_t *zc)
+{
+ nvlist_t *nvl;
+ int ret;
+
+ if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0 ||
+ nvlist_add_boolean_value(nvl, "cachedpropsonly", B_TRUE) != 0)
+ return (no_memory(hdl));
+
+ ret = zcmd_write_src_nvlist(hdl, zc, nvl);
+ nvlist_free(nvl);
+ return (ret);
+}
+
libzfs_handle_t *
libzfs_init(void)
{
@@ -647,6 +686,8 @@ libzfs_init(void)
zpool_feature_init();
libzfs_mnttab_init(hdl);
+ hdl->libzfs_cachedprops = B_FALSE;
+
return (hdl);
}
diff --git a/usr/src/lib/libzfs/common/mapfile-vers b/usr/src/lib/libzfs/common/mapfile-vers
index bc616be0c2..ffc32be5d3 100644
--- a/usr/src/lib/libzfs/common/mapfile-vers
+++ b/usr/src/lib/libzfs/common/mapfile-vers
@@ -22,6 +22,7 @@
#
# Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+# Copyright (c) 2012, Joyent, Inc. All rights reserved.
# Copyright 2016 Nexenta Systems, Inc.
#
@@ -63,6 +64,7 @@ SYMBOL_VERSION SUNWprivate_1.1 {
libzfs_init;
libzfs_mnttab_cache;
libzfs_print_on_error;
+ libzfs_set_cachedprops;
spa_feature_table;
zfs_allocatable_devs;
zfs_asprintf;
@@ -109,6 +111,7 @@ SYMBOL_VERSION SUNWprivate_1.1 {
zfs_path_to_zhandle;
zfs_promote;
zfs_prop_align_right;
+ zfs_prop_cacheable;
zfs_prop_column_name;
zfs_prop_default_numeric;
zfs_prop_default_string;
diff --git a/usr/src/lib/libzonecfg/Makefile.com b/usr/src/lib/libzonecfg/Makefile.com
index 0bee2fc908..6bec7141cb 100644
--- a/usr/src/lib/libzonecfg/Makefile.com
+++ b/usr/src/lib/libzonecfg/Makefile.com
@@ -20,11 +20,14 @@
#
#
# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2015 Joyent, Inc.
#
LIBRARY= libzonecfg.a
VERS= .1
-OBJECTS= libzonecfg.o getzoneent.o scratchops.o
+LIB_OBJS= libzonecfg.o getzoneent.o scratchops.o
+XML_OBJS= os_dtd.o
+OBJECTS= $(LIB_OBJS) $(XML_OBJS)
include ../../Makefile.lib
@@ -35,15 +38,27 @@ LDLIBS += -lc -lsocket -lnsl -luuid -lnvpair -lsysevent -lsec -lbrand \
$(DYNLIB) := LDLIBS += -lxml2
SRCDIR = ../common
+
+XMLDIR = $(SRC)/lib/xml
+SRCS = \
+ $(LIB_OBJS:%.o=$(SRCDIR)/%.c) \
+ $(XML_OBJS:%.o=$(XMLDIR)/%.c) \
+
CPPFLAGS += -I$(ADJUNCT_PROTO)/usr/include/libxml2 -I$(SRCDIR) -D_REENTRANT
CERRWARN += -_gcc=-Wno-uninitialized
CERRWARN += -_gcc=-Wno-parentheses
$(LINTLIB) := SRCS= $(SRCDIR)/$(LINTSRC)
+CPPFLAGS += -I$(XMLDIR)
+
.KEEP_STATE:
all: $(LIBS)
lint: lintcheck
+pics/%.o: $(XMLDIR)/%.c
+ $(COMPILE.c) -o $@ $<
+ $(POST_PROCESS_O)
+
include ../../Makefile.targ
diff --git a/usr/src/lib/libzonecfg/common/getzoneent.c b/usr/src/lib/libzonecfg/common/getzoneent.c
index 8155f7272a..c9f1c12bcf 100644
--- a/usr/src/lib/libzonecfg/common/getzoneent.c
+++ b/usr/src/lib/libzonecfg/common/getzoneent.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
@@ -127,6 +128,8 @@ getzoneent_private(FILE *cookie)
/* skip comment lines */
continue;
}
+
+ /* zonename */
p = gettok(&cp);
if (*p == '\0' || strlen(p) >= ZONENAME_MAX) {
/*
@@ -136,6 +139,7 @@ getzoneent_private(FILE *cookie)
}
(void) strlcpy(ze->zone_name, p, ZONENAME_MAX);
+ /* state */
p = gettok(&cp);
if (*p == '\0') {
/* state field should not be empty */
@@ -152,6 +156,7 @@ getzoneent_private(FILE *cookie)
continue;
}
+ /* zonepath */
p = gettok(&cp);
if (strlen(p) >= MAXPATHLEN) {
/* very long paths are not allowed */
@@ -159,10 +164,35 @@ getzoneent_private(FILE *cookie)
}
(void) strlcpy(ze->zone_path, p, MAXPATHLEN);
+ /* uuid */
p = gettok(&cp);
if (uuid_parse(p, ze->zone_uuid) == -1)
uuid_clear(ze->zone_uuid);
+ /* brand [optional] */
+ p = gettok(&cp);
+ if (strlen(p) >= MAXNAMELEN) {
+ /* very long names are not allowed */
+ continue;
+ }
+ (void) strlcpy(ze->zone_brand, p, MAXNAMELEN);
+
+ /* IP type [optional] */
+ p = gettok(&cp);
+ if (strlen(p) >= MAXNAMELEN) {
+ /* very long names are not allowed */
+ continue;
+ }
+ ze->zone_iptype = ZS_SHARED;
+ if (*p == 'e') {
+ ze->zone_iptype = ZS_EXCLUSIVE;
+ }
+
+ /* debug ID [optional] */
+ p = gettok(&cp);
+ if (*p != '\0')
+ ze->zone_did = atoi(p);
+
break;
}
@@ -170,10 +200,32 @@ getzoneent_private(FILE *cookie)
}
static boolean_t
-get_index_path(char *path)
+path_common(char *path, size_t path_size, const char *stem)
{
- return (snprintf(path, MAXPATHLEN, "%s%s", zonecfg_root,
- ZONE_INDEX_FILE) < MAXPATHLEN);
+ const char *native_root = zone_get_nroot();
+
+ if (native_root == NULL || zonecfg_in_alt_root()) {
+ /*
+ * Do not prepend the native system root (e.g. "/native") if an
+ * alternative configuration root has been selected.
+ */
+ native_root = "";
+ }
+
+ return (snprintf(path, path_size, "%s%s%s", native_root, zonecfg_root,
+ stem) < path_size);
+}
+
+static boolean_t
+get_index_path(char *path, size_t path_size)
+{
+ return (path_common(path, path_size, ZONE_INDEX_FILE));
+}
+
+static boolean_t
+get_temp_path(char *path, size_t path_size)
+{
+ return (path_common(path, path_size, _PATH_TMPFILE));
}
FILE *
@@ -181,7 +233,7 @@ setzoneent(void)
{
char path[MAXPATHLEN];
- if (!get_index_path(path)) {
+ if (!get_index_path(path, sizeof (path))) {
errno = EINVAL;
return (NULL);
}
@@ -202,8 +254,7 @@ lock_index_file(void)
struct flock lock;
char path[MAXPATHLEN];
- if (snprintf(path, sizeof (path), "%s%s", zonecfg_root,
- ZONE_INDEX_LOCK_DIR) >= sizeof (path))
+ if (!path_common(path, sizeof (path), ZONE_INDEX_LOCK_DIR))
return (-1);
if ((mkdir(path, S_IRWXU) == -1) && errno != EEXIST)
return (-1);
@@ -269,14 +320,17 @@ int
putzoneent(struct zoneent *ze, zoneent_op_t operation)
{
FILE *index_file, *tmp_file;
- char *tmp_file_name, buf[MAX_INDEX_LEN];
+ char buf[MAX_INDEX_LEN];
int tmp_file_desc, lock_fd, err;
boolean_t exist, need_quotes;
- char *cp;
+ char *cp, *tmpp;
+ char tmp_path[MAXPATHLEN];
char path[MAXPATHLEN];
char uuidstr[UUID_PRINTABLE_STRING_LENGTH];
- size_t tlen, namelen;
- const char *zone_name, *zone_state, *zone_path, *zone_uuid;
+ size_t namelen;
+ const char *zone_name, *zone_state, *zone_path, *zone_uuid,
+ *zone_brand = "", *zone_iptype;
+ zoneid_t zone_did;
assert(ze != NULL);
@@ -299,20 +353,14 @@ putzoneent(struct zoneent *ze, zoneent_op_t operation)
if ((lock_fd = lock_index_file()) == -1)
return (Z_LOCKING_FILE);
- /* using sizeof gives us room for the terminating NUL byte as well */
- tlen = sizeof (_PATH_TMPFILE) + strlen(zonecfg_root);
- tmp_file_name = malloc(tlen);
- if (tmp_file_name == NULL) {
+ if (!get_temp_path(tmp_path, sizeof (tmp_path))) {
(void) unlock_index_file(lock_fd);
return (Z_NOMEM);
}
- (void) snprintf(tmp_file_name, tlen, "%s%s", zonecfg_root,
- _PATH_TMPFILE);
- tmp_file_desc = mkstemp(tmp_file_name);
+ tmp_file_desc = mkstemp(tmp_path);
if (tmp_file_desc == -1) {
- (void) unlink(tmp_file_name);
- free(tmp_file_name);
+ (void) unlink(tmp_path);
(void) unlock_index_file(lock_fd);
return (Z_TEMP_FILE);
}
@@ -323,7 +371,7 @@ putzoneent(struct zoneent *ze, zoneent_op_t operation)
err = Z_MISC_FS;
goto error;
}
- if (!get_index_path(path)) {
+ if (!get_index_path(path, sizeof (path))) {
err = Z_MISC_FS;
goto error;
}
@@ -335,6 +383,9 @@ putzoneent(struct zoneent *ze, zoneent_op_t operation)
exist = B_FALSE;
zone_name = ze->zone_name;
namelen = strlen(zone_name);
+ zone_brand = ze->zone_brand;
+ zone_iptype = (ze->zone_iptype == ZS_SHARED ? "sh" : "ex");
+ zone_did = ze->zone_did;
for (;;) {
if (fgets(buf, sizeof (buf), index_file) == NULL) {
if (operation == PZE_ADD && !exist) {
@@ -389,6 +440,11 @@ putzoneent(struct zoneent *ze, zoneent_op_t operation)
}
zone_path = gettok(&cp);
zone_uuid = gettok(&cp);
+ zone_brand = gettok(&cp);
+ zone_iptype = gettok(&cp);
+ tmpp = gettok(&cp);
+ if (*tmpp != '\0')
+ zone_did = atoi(tmpp);
switch (operation) {
case PZE_ADD:
@@ -403,14 +459,6 @@ putzoneent(struct zoneent *ze, zoneent_op_t operation)
*/
if (ze->zone_state >= 0) {
zone_state = zone_state_str(ze->zone_state);
-
- /*
- * If the caller is uninstalling this zone,
- * then wipe out the uuid. The zone's contents
- * are no longer known.
- */
- if (ze->zone_state < ZONE_STATE_INSTALLED)
- zone_uuid = "";
}
/* If a new name is supplied, use it. */
@@ -419,6 +467,27 @@ putzoneent(struct zoneent *ze, zoneent_op_t operation)
if (ze->zone_path[0] != '\0')
zone_path = ze->zone_path;
+
+ /* If new UUID provided, replace it */
+ if (!uuid_is_null(ze->zone_uuid)) {
+ uuid_unparse(ze->zone_uuid, uuidstr);
+ zone_uuid = uuidstr;
+ }
+
+ /* If a brand is supplied, use it. */
+ if (ze->zone_brand[0] != '\0') {
+ zone_brand = ze->zone_brand;
+
+ /*
+ * Since the brand, iptype and did are optional,
+ * we we only reset the iptype and did if the
+ * brand is provided.
+ */
+ zone_iptype = (ze->zone_iptype == ZS_SHARED ?
+ "sh" : "ex");
+ zone_did = ze->zone_did;
+ }
+
break;
case PZE_REMOVE:
@@ -450,9 +519,17 @@ putzoneent(struct zoneent *ze, zoneent_op_t operation)
* method for escaping them.
*/
need_quotes = (strchr(zone_path, ':') != NULL);
- (void) fprintf(tmp_file, "%s:%s:%s%s%s:%s\n", zone_name,
- zone_state, need_quotes ? "\"" : "", zone_path,
- need_quotes ? "\"" : "", zone_uuid);
+
+ if (*zone_brand != '\0') {
+ (void) fprintf(tmp_file, "%s:%s:%s%s%s:%s:%s:%s:%d\n",
+ zone_name, zone_state, need_quotes ? "\"" : "",
+ zone_path, need_quotes ? "\"" : "", zone_uuid,
+ zone_brand, zone_iptype, zone_did);
+ } else {
+ (void) fprintf(tmp_file, "%s:%s:%s%s%s:%s\n", zone_name,
+ zone_state, need_quotes ? "\"" : "", zone_path,
+ need_quotes ? "\"" : "", zone_uuid);
+ }
exist = B_TRUE;
}
@@ -464,11 +541,10 @@ putzoneent(struct zoneent *ze, zoneent_op_t operation)
goto error;
}
tmp_file = NULL;
- if (rename(tmp_file_name, path) == -1) {
+ if (rename(tmp_path, path) == -1) {
err = errno == EACCES ? Z_ACCES : Z_MISC_FS;
goto error;
}
- free(tmp_file_name);
if (unlock_index_file(lock_fd) != Z_OK)
return (Z_UNLOCKING_FILE);
return (Z_OK);
@@ -478,8 +554,7 @@ error:
(void) fclose(index_file);
if (tmp_file != NULL)
(void) fclose(tmp_file);
- (void) unlink(tmp_file_name);
- free(tmp_file_name);
+ (void) unlink(tmp_path);
(void) unlock_index_file(lock_fd);
return (err);
}
diff --git a/usr/src/lib/libzonecfg/common/libzonecfg.c b/usr/src/lib/libzonecfg/common/libzonecfg.c
index 91502b6a2f..2bd4855138 100644
--- a/usr/src/lib/libzonecfg/common/libzonecfg.c
+++ b/usr/src/lib/libzonecfg/common/libzonecfg.c
@@ -22,6 +22,7 @@
/*
* Copyright 2014 Gary Mills
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Joyent Inc.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*/
@@ -59,6 +60,8 @@
#include <secdb.h>
#include <user_attr.h>
#include <prof_attr.h>
+#include <sys/debug.h>
+#include <os_dtd.h>
#include <arpa/inet.h>
#include <netdb.h>
@@ -79,6 +82,8 @@
#define ZONE_EVENT_PING_SUBCLASS "ping"
#define ZONE_EVENT_PING_PUBLISHER "solaris"
+#define DEBUGID_FILE "/etc/zones/did.txt"
+
/* Hard-code the DTD element/attribute/entity names just once, here. */
#define DTD_ELEM_ATTR (const xmlChar *) "attr"
#define DTD_ELEM_COMMENT (const xmlChar *) "comment"
@@ -86,6 +91,7 @@
#define DTD_ELEM_FS (const xmlChar *) "filesystem"
#define DTD_ELEM_FSOPTION (const xmlChar *) "fsoption"
#define DTD_ELEM_NET (const xmlChar *) "network"
+#define DTD_ELEM_NETATTR (const xmlChar *) "net-attr"
#define DTD_ELEM_RCTL (const xmlChar *) "rctl"
#define DTD_ELEM_RCTLVALUE (const xmlChar *) "rctl-value"
#define DTD_ELEM_ZONE (const xmlChar *) "zone"
@@ -105,10 +111,12 @@
#define DTD_ATTR_IPTYPE (const xmlChar *) "ip-type"
#define DTD_ATTR_DEFROUTER (const xmlChar *) "defrouter"
#define DTD_ATTR_DIR (const xmlChar *) "directory"
+#define DTD_ATTR_GNIC (const xmlChar *) "global-nic"
#define DTD_ATTR_LIMIT (const xmlChar *) "limit"
#define DTD_ATTR_LIMITPRIV (const xmlChar *) "limitpriv"
#define DTD_ATTR_BOOTARGS (const xmlChar *) "bootargs"
#define DTD_ATTR_SCHED (const xmlChar *) "scheduling-class"
+#define DTD_ATTR_MAC (const xmlChar *) "mac-addr"
#define DTD_ATTR_MATCH (const xmlChar *) "match"
#define DTD_ATTR_NAME (const xmlChar *) "name"
#define DTD_ATTR_PHYSICAL (const xmlChar *) "physical"
@@ -118,6 +126,7 @@
#define DTD_ATTR_SPECIAL (const xmlChar *) "special"
#define DTD_ATTR_TYPE (const xmlChar *) "type"
#define DTD_ATTR_VALUE (const xmlChar *) "value"
+#define DTD_ATTR_VLANID (const xmlChar *) "vlan-id"
#define DTD_ATTR_ZONEPATH (const xmlChar *) "zonepath"
#define DTD_ATTR_NCPU_MIN (const xmlChar *) "ncpu_min"
#define DTD_ATTR_NCPU_MAX (const xmlChar *) "ncpu_max"
@@ -130,6 +139,7 @@
#define DTD_ATTR_MODE (const xmlChar *) "mode"
#define DTD_ATTR_ACL (const xmlChar *) "acl"
#define DTD_ATTR_BRAND (const xmlChar *) "brand"
+#define DTD_ATTR_DID (const xmlChar *) "debugid"
#define DTD_ATTR_HOSTID (const xmlChar *) "hostid"
#define DTD_ATTR_USER (const xmlChar *) "user"
#define DTD_ATTR_AUTHS (const xmlChar *) "auths"
@@ -176,9 +186,12 @@ static struct alias {
{ALIAS_MAXSEMIDS, "zone.max-sem-ids", "privileged", "deny", 0},
{ALIAS_MAXLOCKEDMEM, "zone.max-locked-memory", "privileged", "deny", 0},
{ALIAS_MAXSWAP, "zone.max-swap", "privileged", "deny", 0},
+ {ALIAS_MAXPHYSMEM, "zone.max-physical-memory", "privileged", "deny",
+ 1048576},
{ALIAS_SHARES, "zone.cpu-shares", "privileged", "none", 0},
{ALIAS_CPUCAP, "zone.cpu-cap", "privileged", "deny", 0},
{ALIAS_MAXPROCS, "zone.max-processes", "privileged", "deny", 100},
+ {ALIAS_ZFSPRI, "zone.zfs-io-priority", "privileged", "none", 0},
{NULL, NULL, NULL, NULL, 0}
};
@@ -226,6 +239,8 @@ static int zone_lock_cnt = 0;
static char zoneadm_lock_held[] = LOCK_ENV_VAR"=1";
static char zoneadm_lock_not_held[] = LOCK_ENV_VAR"=0";
+static void zonecfg_notify_delete(const char *);
+
char *zonecfg_root = "";
/*
@@ -269,17 +284,35 @@ zonecfg_in_alt_root(void)
*/
static boolean_t
-config_file_path(const char *zonename, char *answer)
+file_path_common(const char *zonename, const char *subdir, const char *stem,
+ char *answer, size_t answer_size)
{
- return (snprintf(answer, MAXPATHLEN, "%s%s/%s.xml", zonecfg_root,
- ZONE_CONFIG_ROOT, zonename) < MAXPATHLEN);
+ const char *native_root = zone_get_nroot();
+
+ if (native_root == NULL || zonecfg_in_alt_root()) {
+ /*
+ * Do not prepend the native system root (e.g. "/native") if an
+ * alternative configuration root has been selected.
+ */
+ native_root = "";
+ }
+
+ return (snprintf(answer, answer_size, "%s%s%s/%s.%s", native_root,
+ zonecfg_root, subdir, zonename, stem) < answer_size);
}
static boolean_t
-snap_file_path(const char *zonename, char *answer)
+config_file_path(const char *zonename, char *answer, size_t answer_size)
{
- return (snprintf(answer, MAXPATHLEN, "%s%s/%s.snapshot.xml",
- zonecfg_root, ZONE_SNAPSHOT_ROOT, zonename) < MAXPATHLEN);
+ return (file_path_common(zonename, ZONE_CONFIG_ROOT, "xml", answer,
+ answer_size));
+}
+
+static boolean_t
+snap_file_path(const char *zonename, char *answer, size_t answer_size)
+{
+ return (file_path_common(zonename, ZONE_SNAPSHOT_ROOT, "snapshot.xml",
+ answer, answer_size));
}
/*ARGSUSED*/
@@ -350,7 +383,7 @@ zonecfg_destroy(const char *zonename, boolean_t force)
int err, state_err;
zone_state_t state;
- if (!config_file_path(zonename, path))
+ if (!config_file_path(zonename, path, sizeof (path)))
return (Z_MISC_FS);
state_err = zone_get_state((char *)zonename, &state);
@@ -397,8 +430,10 @@ zonecfg_destroy(const char *zonename, boolean_t force)
* Treat failure to find the XML file silently, since, well, it's
* gone, and with the index file cleaned up, we're done.
*/
- if (err == Z_OK || err == Z_NO_ZONE)
+ if (err == Z_OK || err == Z_NO_ZONE) {
+ zonecfg_notify_delete(zonename);
return (Z_OK);
+ }
return (err);
}
@@ -407,7 +442,7 @@ zonecfg_destroy_snapshot(const char *zonename)
{
char path[MAXPATHLEN];
- if (!snap_file_path(zonename, path))
+ if (!snap_file_path(zonename, path, sizeof (path)))
return (Z_MISC_FS);
return (zonecfg_destroy_impl(path));
}
@@ -574,9 +609,8 @@ static int
zonecfg_get_handle_impl(const char *zonename, const char *filename,
zone_dochandle_t handle)
{
- xmlValidCtxtPtr cvp;
struct stat statbuf;
- int valid;
+ boolean_t valid;
if (zonename == NULL)
return (Z_NO_ZONE);
@@ -587,14 +621,13 @@ zonecfg_get_handle_impl(const char *zonename, const char *filename,
return (Z_INVALID_DOCUMENT);
return (Z_NO_ZONE);
}
- if ((cvp = xmlNewValidCtxt()) == NULL)
+
+ if (os_dtd_validate(handle->zone_dh_doc, B_FALSE, &valid) != 0) {
return (Z_NOMEM);
- cvp->error = zonecfg_error_func;
- cvp->warning = zonecfg_error_func;
- valid = xmlValidateDocument(cvp, handle->zone_dh_doc);
- xmlFreeValidCtxt(cvp);
- if (valid == 0)
+ }
+ if (!valid) {
return (Z_INVALID_DOCUMENT);
+ }
/* delete any comments such as inherited Sun copyright / ident str */
stripcomments(handle);
@@ -606,7 +639,7 @@ zonecfg_get_handle(const char *zonename, zone_dochandle_t handle)
{
char path[MAXPATHLEN];
- if (!config_file_path(zonename, path))
+ if (!config_file_path(zonename, path, sizeof (path)))
return (Z_MISC_FS);
handle->zone_dh_newzone = B_FALSE;
@@ -650,7 +683,7 @@ zonecfg_get_snapshot_handle(const char *zonename, zone_dochandle_t handle)
{
char path[MAXPATHLEN];
- if (!snap_file_path(zonename, path))
+ if (!snap_file_path(zonename, path, sizeof (path)))
return (Z_MISC_FS);
handle->zone_dh_newzone = B_FALSE;
return (zonecfg_get_handle_impl(zonename, path, handle));
@@ -663,7 +696,7 @@ zonecfg_get_template_handle(const char *template, const char *zonename,
char path[MAXPATHLEN];
int err;
- if (!config_file_path(template, path))
+ if (!config_file_path(template, path, sizeof (path)))
return (Z_MISC_FS);
if ((err = zonecfg_get_handle_impl(template, path, handle)) != Z_OK)
@@ -697,21 +730,19 @@ int
zonecfg_attach_manifest(int fd, zone_dochandle_t local_handle,
zone_dochandle_t rem_handle)
{
- xmlValidCtxtPtr cvp;
- int valid;
+ boolean_t valid;
/* load the manifest into the handle for the remote system */
if ((rem_handle->zone_dh_doc = xmlReadFd(fd, NULL, NULL, 0)) == NULL) {
return (Z_INVALID_DOCUMENT);
}
- if ((cvp = xmlNewValidCtxt()) == NULL)
+
+ if (os_dtd_validate(rem_handle->zone_dh_doc, B_FALSE, &valid) != 0) {
return (Z_NOMEM);
- cvp->error = zonecfg_error_func;
- cvp->warning = zonecfg_error_func;
- valid = xmlValidateDocument(cvp, rem_handle->zone_dh_doc);
- xmlFreeValidCtxt(cvp);
- if (valid == 0)
+ }
+ if (!valid) {
return (Z_INVALID_DOCUMENT);
+ }
/* delete any comments such as inherited Sun copyright / ident str */
stripcomments(rem_handle);
@@ -732,14 +763,12 @@ zonecfg_attach_manifest(int fd, zone_dochandle_t local_handle,
* We need to re-run xmlValidateDocument on local_handle to properly
* update the in-core representation of the configuration.
*/
- if ((cvp = xmlNewValidCtxt()) == NULL)
+ if (os_dtd_validate(local_handle->zone_dh_doc, B_FALSE, &valid) != 0) {
return (Z_NOMEM);
- cvp->error = zonecfg_error_func;
- cvp->warning = zonecfg_error_func;
- valid = xmlValidateDocument(cvp, local_handle->zone_dh_doc);
- xmlFreeValidCtxt(cvp);
- if (valid == 0)
+ }
+ if (!valid) {
return (Z_INVALID_DOCUMENT);
+ }
strip_sw_inv(local_handle);
@@ -1101,7 +1130,7 @@ zonecfg_set_sched(zone_dochandle_t handle, char *sched)
* In general, the operation of this function should succeed or fail as
* a unit.
*/
-int
+static int
zonecfg_refresh_index_file(zone_dochandle_t handle)
{
char name[ZONENAME_MAX], zonepath[MAXPATHLEN];
@@ -1123,6 +1152,15 @@ zonecfg_refresh_index_file(zone_dochandle_t handle)
(void) strlcpy(ze.zone_path, zonepath + strlen(zonecfg_root),
sizeof (ze.zone_path));
+ if ((err = zonecfg_get_brand(handle, ze.zone_brand,
+ sizeof (ze.zone_brand))) != 0)
+ return (err);
+
+ if ((err = zonecfg_get_iptype(handle, &ze.zone_iptype)) != Z_OK)
+ return (err);
+
+ ze.zone_did = zonecfg_get_did(handle);
+
if (is_renaming(handle)) {
opcode = PZE_MODIFY;
(void) strlcpy(ze.zone_name, handle->zone_dh_delete_name,
@@ -1193,9 +1231,9 @@ zonecfg_save_impl(zone_dochandle_t handle, char *filename)
{
char tmpfile[MAXPATHLEN];
char bakdir[MAXPATHLEN], bakbase[MAXPATHLEN], bakfile[MAXPATHLEN];
- int tmpfd, err, valid;
- xmlValidCtxt cvp = { NULL };
+ int tmpfd, err;
boolean_t backup;
+ boolean_t valid;
(void) strlcpy(tmpfile, filename, sizeof (tmpfile));
(void) dirname(tmpfile);
@@ -1208,16 +1246,13 @@ zonecfg_save_impl(zone_dochandle_t handle, char *filename)
}
(void) close(tmpfd);
- cvp.error = zonecfg_error_func;
- cvp.warning = zonecfg_error_func;
-
/*
* We do a final validation of the document. Since the library has
* malfunctioned if it fails to validate, we follow-up with an
* assert() that the doc is valid.
*/
- valid = xmlValidateDocument(&cvp, handle->zone_dh_doc);
- assert(valid != 0);
+ VERIFY0(os_dtd_validate(handle->zone_dh_doc, B_FALSE, &valid));
+ VERIFY(valid == B_TRUE);
if (xmlSaveFormatFile(tmpfile, handle->zone_dh_doc, 1) <= 0)
goto err;
@@ -1272,7 +1307,6 @@ zonecfg_save_impl(zone_dochandle_t handle, char *filename)
/*
* Try to restore from our backup.
*/
- (void) unlink(filename);
(void) rename(bakfile, filename);
} else {
/*
@@ -1315,7 +1349,7 @@ zonecfg_save(zone_dochandle_t handle)
if ((err = zonecfg_get_name(handle, zname, sizeof (zname))) != Z_OK)
return (err);
- if (!config_file_path(zname, path))
+ if (!config_file_path(zname, path, sizeof (path)))
return (Z_MISC_FS);
addcomment(handle, "\n DO NOT EDIT THIS "
@@ -1336,8 +1370,10 @@ zonecfg_save(zone_dochandle_t handle)
handle->zone_dh_newzone = B_FALSE;
if (is_renaming(handle)) {
- if (config_file_path(handle->zone_dh_delete_name, delpath))
+ if (config_file_path(handle->zone_dh_delete_name, delpath,
+ sizeof (delpath))) {
(void) unlink(delpath);
+ }
handle->zone_dh_delete_name[0] = '\0';
}
@@ -1347,23 +1383,18 @@ zonecfg_save(zone_dochandle_t handle)
int
zonecfg_verify_save(zone_dochandle_t handle, char *filename)
{
- int valid;
-
- xmlValidCtxt cvp = { NULL };
+ boolean_t valid;
if (zonecfg_check_handle(handle) != Z_OK)
return (Z_BAD_HANDLE);
- cvp.error = zonecfg_error_func;
- cvp.warning = zonecfg_error_func;
-
/*
* We do a final validation of the document. Since the library has
* malfunctioned if it fails to validate, we follow-up with an
* assert() that the doc is valid.
*/
- valid = xmlValidateDocument(&cvp, handle->zone_dh_doc);
- assert(valid != 0);
+ VERIFY0(os_dtd_validate(handle->zone_dh_doc, B_FALSE, &valid));
+ VERIFY(valid == B_TRUE);
if (xmlSaveFormatFile(filename, handle->zone_dh_doc, 1) <= 0)
return (Z_SAVING_FILE);
@@ -1377,9 +1408,8 @@ zonecfg_detach_save(zone_dochandle_t handle, uint_t flags)
char zname[ZONENAME_MAX];
char path[MAXPATHLEN];
char migpath[MAXPATHLEN];
- xmlValidCtxt cvp = { NULL };
int err = Z_SAVING_FILE;
- int valid;
+ boolean_t valid;
if (zonecfg_check_handle(handle) != Z_OK)
return (Z_BAD_HANDLE);
@@ -1406,16 +1436,13 @@ zonecfg_detach_save(zone_dochandle_t handle, uint_t flags)
addcomment(handle, "\n DO NOT EDIT THIS FILE. "
"Use zonecfg(1M) and zoneadm(1M) attach.\n");
- cvp.error = zonecfg_error_func;
- cvp.warning = zonecfg_error_func;
-
/*
* We do a final validation of the document. Since the library has
* malfunctioned if it fails to validate, we follow-up with an
* assert() that the doc is valid.
*/
- valid = xmlValidateDocument(&cvp, handle->zone_dh_doc);
- assert(valid != 0);
+ VERIFY0(os_dtd_validate(handle->zone_dh_doc, B_FALSE, &valid));
+ VERIFY(valid == B_TRUE);
if (xmlSaveFormatFile(migpath, handle->zone_dh_doc, 1) <= 0)
return (Z_SAVING_FILE);
@@ -1476,6 +1503,56 @@ zonecfg_rm_detached(zone_dochandle_t handle, boolean_t forced)
}
}
+static void
+zonecfg_notify_conf_change(const char *zname, char *os, char *ns)
+{
+ evchan_t *ze_chan;
+ struct timeval now;
+ uint64_t t;
+ nvlist_t *nvl = NULL;
+
+ if (sysevent_evc_bind(ZONE_EVENT_CHANNEL, &ze_chan, 0) != 0)
+ return;
+
+ /* Current time since Jan 1 1970 but consumers expect NS */
+ gettimeofday(&now, NULL);
+ t = (now.tv_sec * NANOSEC) + (now.tv_usec * 1000);
+
+ if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0 &&
+ nvlist_add_string(nvl, ZONE_CB_NAME, zname) == 0 &&
+ nvlist_add_string(nvl, ZONE_CB_NEWSTATE, ns) == 0 &&
+ nvlist_add_string(nvl, ZONE_CB_OLDSTATE, os) == 0 &&
+ nvlist_add_int32(nvl, ZONE_CB_ZONEID, -1) == 0 &&
+ nvlist_add_uint64(nvl, ZONE_CB_TIMESTAMP, t) == 0) {
+ (void) sysevent_evc_publish(ze_chan, ZONE_EVENT_STATUS_CLASS,
+ ZONE_EVENT_STATUS_SUBCLASS, "sun.com", "zonecfg", nvl,
+ EVCH_SLEEP);
+ }
+
+ nvlist_free(nvl);
+ (void) sysevent_evc_unbind(ze_chan);
+}
+
+void
+zonecfg_notify_create(zone_dochandle_t handle)
+{
+ char zname[ZONENAME_MAX];
+
+ if (zonecfg_check_handle(handle) != Z_OK)
+ return;
+
+ if (zonecfg_get_name(handle, zname, sizeof (zname)) != Z_OK)
+ return;
+
+ zonecfg_notify_conf_change(zname, "", ZONE_STATE_STR_CONFIGURED);
+}
+
+static void
+zonecfg_notify_delete(const char *zname)
+{
+ zonecfg_notify_conf_change(zname, ZONE_STATE_STR_CONFIGURED, "");
+}
+
/*
* Special case: if access(2) fails with ENOENT, then try again using
* ZONE_CONFIG_ROOT instead of config_file_path(zonename). This is how we
@@ -1488,7 +1565,7 @@ zonecfg_access(const char *zonename, int amode)
{
char path[MAXPATHLEN];
- if (!config_file_path(zonename, path))
+ if (!config_file_path(zonename, path, sizeof (path)))
return (Z_INVAL);
if (access(path, amode) == 0)
return (Z_OK);
@@ -1553,7 +1630,7 @@ zonecfg_create_snapshot(const char *zonename)
goto out;
}
- if (!snap_file_path(zonename, path)) {
+ if (!snap_file_path(zonename, path, sizeof (path))) {
error = Z_MISC_FS;
goto out;
}
@@ -2066,6 +2143,32 @@ zonecfg_ifname_exists(sa_family_t af, char *ifname)
}
/*
+ * Turn an addr that looks like f:2:0:44:5:6C into 0f:02:00:44:05:6c
+ * We're expecting a dst of at least MAXMACADDRLEN size here.
+ */
+static void
+normalize_mac_addr(char *dst, const char *src, int len)
+{
+ char *p, *e, *sep = "";
+ long n;
+ char buf[MAXMACADDRLEN], tmp[4];
+
+ *dst = '\0';
+ (void) strlcpy(buf, src, sizeof (buf));
+ p = strtok(buf, ":");
+ while (p != NULL) {
+ n = strtol(p, &e, 16);
+ if (*e != NULL || n > 0xff)
+ return;
+ (void) snprintf(tmp, sizeof (tmp), "%s%02x", sep, n);
+ (void) strlcat(dst, tmp, len);
+
+ sep = ":";
+ p = strtok(NULL, ":");
+ }
+}
+
+/*
* Determines whether there is a net resource with the physical interface, IP
* address, and default router specified by 'tabptr' in the zone configuration
* to which 'handle' refers. 'tabptr' must have an interface, an address, a
@@ -2084,13 +2187,18 @@ zonecfg_ifname_exists(sa_family_t af, char *ifname)
int
zonecfg_lookup_nwif(zone_dochandle_t handle, struct zone_nwiftab *tabptr)
{
- xmlNodePtr cur;
+ xmlNodePtr cur, val;
xmlNodePtr firstmatch;
int err;
char address[INET6_ADDRSTRLEN];
char physical[LIFNAMSIZ];
+ char mac[MAXMACADDRLEN];
+ char norm_mac[MAXMACADDRLEN];
+ char gnic[LIFNAMSIZ];
size_t addrspec; /* nonzero if tabptr has IP addr */
size_t physspec; /* nonzero if tabptr has interface */
+ size_t macspec; /* nonzero if tabptr has mac addr */
+ size_t gnicspec; /* nonzero if tabptr has gnic */
size_t defrouterspec; /* nonzero if tabptr has def. router */
size_t allowed_addrspec;
zone_iptype_t iptype;
@@ -2102,17 +2210,20 @@ zonecfg_lookup_nwif(zone_dochandle_t handle, struct zone_nwiftab *tabptr)
* Determine the fields that will be searched. There must be at least
* one.
*
- * zone_nwif_address, zone_nwif_physical, and zone_nwif_defrouter are
+ * zone_nwif_address, zone_nwif_physical, zone_nwif_defrouter,
+ * zone_nwif_mac, zone_nwif_vlan_id and zone_nwif_gnic are
* arrays, so no NULL checks are necessary.
*/
addrspec = strlen(tabptr->zone_nwif_address);
physspec = strlen(tabptr->zone_nwif_physical);
+ macspec = strlen(tabptr->zone_nwif_mac);
+ gnicspec = strlen(tabptr->zone_nwif_gnic);
defrouterspec = strlen(tabptr->zone_nwif_defrouter);
allowed_addrspec = strlen(tabptr->zone_nwif_allowed_address);
if (addrspec != 0 && allowed_addrspec != 0)
return (Z_INVAL); /* can't specify both */
if (addrspec == 0 && physspec == 0 && defrouterspec == 0 &&
- allowed_addrspec == 0)
+ allowed_addrspec == 0 && macspec == 0 && gnicspec == 0)
return (Z_INSUFFICIENT_SPEC);
if ((err = operation_prep(handle)) != Z_OK)
@@ -2139,6 +2250,19 @@ zonecfg_lookup_nwif(zone_dochandle_t handle, struct zone_nwiftab *tabptr)
physical, sizeof (physical)) != Z_OK ||
strcmp(tabptr->zone_nwif_physical, physical) != 0))
continue;
+ if (iptype == ZS_EXCLUSIVE && macspec != 0) {
+ if (fetchprop(cur, DTD_ATTR_MAC, mac, sizeof (mac)) !=
+ Z_OK)
+ continue;
+ normalize_mac_addr(norm_mac, mac, sizeof (norm_mac));
+ if (strcmp(tabptr->zone_nwif_mac, norm_mac) != 0)
+ continue;
+ }
+ if (iptype == ZS_EXCLUSIVE && gnicspec != 0 &&
+ (fetchprop(cur, DTD_ATTR_GNIC, gnic,
+ sizeof (gnic)) != Z_OK ||
+ strcmp(tabptr->zone_nwif_gnic, gnic) != 0))
+ continue;
if (iptype == ZS_SHARED && addrspec != 0 &&
(fetchprop(cur, DTD_ATTR_ADDRESS, address,
sizeof (address)) != Z_OK ||
@@ -2181,6 +2305,21 @@ zonecfg_lookup_nwif(zone_dochandle_t handle, struct zone_nwiftab *tabptr)
return (err);
if (iptype == ZS_EXCLUSIVE &&
+ (err = fetchprop(cur, DTD_ATTR_MAC, tabptr->zone_nwif_mac,
+ sizeof (tabptr->zone_nwif_mac))) != Z_OK)
+ return (err);
+
+ if (iptype == ZS_EXCLUSIVE &&
+ (err = fetchprop(cur, DTD_ATTR_VLANID, tabptr->zone_nwif_vlan_id,
+ sizeof (tabptr->zone_nwif_vlan_id))) != Z_OK)
+ return (err);
+
+ if (iptype == ZS_EXCLUSIVE &&
+ (err = fetchprop(cur, DTD_ATTR_GNIC, tabptr->zone_nwif_gnic,
+ sizeof (tabptr->zone_nwif_gnic))) != Z_OK)
+ return (err);
+
+ if (iptype == ZS_EXCLUSIVE &&
(err = fetchprop(cur, DTD_ATTR_ALLOWED_ADDRESS,
tabptr->zone_nwif_allowed_address,
sizeof (tabptr->zone_nwif_allowed_address))) != Z_OK)
@@ -2191,13 +2330,40 @@ zonecfg_lookup_nwif(zone_dochandle_t handle, struct zone_nwiftab *tabptr)
sizeof (tabptr->zone_nwif_defrouter))) != Z_OK)
return (err);
+ tabptr->zone_nwif_attrp = NULL;
+ for (val = cur->xmlChildrenNode; val != NULL; val = val->next) {
+ struct zone_res_attrtab *valptr;
+
+ valptr = (struct zone_res_attrtab *)malloc(
+ sizeof (struct zone_res_attrtab));
+ if (valptr == NULL)
+ return (Z_NOMEM);
+
+ valptr->zone_res_attr_name[0] =
+ valptr->zone_res_attr_value[0] = '\0';
+ if (zonecfg_add_res_attr(&(tabptr->zone_nwif_attrp), valptr)
+ != Z_OK) {
+ free(valptr);
+ break;
+ }
+
+ if ((fetchprop(val, DTD_ATTR_NAME, valptr->zone_res_attr_name,
+ sizeof (valptr->zone_res_attr_name)) != Z_OK))
+ break;
+ if ((fetchprop(val, DTD_ATTR_VALUE,
+ valptr->zone_res_attr_value,
+ sizeof (valptr->zone_res_attr_value)) != Z_OK))
+ break;
+ }
+
return (Z_OK);
}
static int
zonecfg_add_nwif_core(zone_dochandle_t handle, struct zone_nwiftab *tabptr)
{
- xmlNodePtr newnode, cur = handle->zone_dh_cur;
+ xmlNodePtr newnode, cur = handle->zone_dh_cur, valnode;
+ struct zone_res_attrtab *valptr;
int err;
newnode = xmlNewTextChild(cur, NULL, DTD_ELEM_NET, NULL);
@@ -2213,13 +2379,40 @@ zonecfg_add_nwif_core(zone_dochandle_t handle, struct zone_nwiftab *tabptr)
tabptr->zone_nwif_physical)) != Z_OK)
return (err);
/*
- * Do not add this property when it is not set, for backwards
- * compatibility and because it is optional.
+ * Do not add these properties when they are not set, for backwards
+ * compatibility and because they are optional.
*/
if ((strlen(tabptr->zone_nwif_defrouter) > 0) &&
((err = newprop(newnode, DTD_ATTR_DEFROUTER,
tabptr->zone_nwif_defrouter)) != Z_OK))
return (err);
+ if (strlen(tabptr->zone_nwif_mac) > 0 &&
+ (err = newprop(newnode, DTD_ATTR_MAC,
+ tabptr->zone_nwif_mac)) != Z_OK)
+ return (err);
+ if (strlen(tabptr->zone_nwif_vlan_id) > 0 &&
+ (err = newprop(newnode, DTD_ATTR_VLANID,
+ tabptr->zone_nwif_vlan_id)) != Z_OK)
+ return (err);
+ if (strlen(tabptr->zone_nwif_gnic) > 0 &&
+ (err = newprop(newnode, DTD_ATTR_GNIC,
+ tabptr->zone_nwif_gnic)) != Z_OK)
+ return (err);
+
+ for (valptr = tabptr->zone_nwif_attrp; valptr != NULL;
+ valptr = valptr->zone_res_attr_next) {
+ valnode = xmlNewTextChild(newnode, NULL, DTD_ELEM_NETATTR,
+ NULL);
+ err = newprop(valnode, DTD_ATTR_NAME,
+ valptr->zone_res_attr_name);
+ if (err != Z_OK)
+ return (err);
+ err = newprop(valnode, DTD_ATTR_VALUE,
+ valptr->zone_res_attr_value);
+ if (err != Z_OK)
+ return (err);
+ }
+
return (Z_OK);
}
@@ -2244,7 +2437,8 @@ static int
zonecfg_delete_nwif_core(zone_dochandle_t handle, struct zone_nwiftab *tabptr)
{
xmlNodePtr cur = handle->zone_dh_cur;
- boolean_t addr_match, phys_match, allowed_addr_match;
+ boolean_t addr_match, phys_match, allowed_addr_match, mac_match,
+ gnic_match;
for (cur = cur->xmlChildrenNode; cur != NULL; cur = cur->next) {
if (xmlStrcmp(cur->name, DTD_ELEM_NET))
@@ -2256,8 +2450,13 @@ zonecfg_delete_nwif_core(zone_dochandle_t handle, struct zone_nwiftab *tabptr)
tabptr->zone_nwif_allowed_address);
phys_match = match_prop(cur, DTD_ATTR_PHYSICAL,
tabptr->zone_nwif_physical);
+ mac_match = match_prop(cur, DTD_ATTR_MAC,
+ tabptr->zone_nwif_mac);
+ gnic_match = match_prop(cur, DTD_ATTR_GNIC,
+ tabptr->zone_nwif_gnic);
- if (addr_match && allowed_addr_match && phys_match) {
+ if (((addr_match && allowed_addr_match) || mac_match ||
+ gnic_match) && phys_match) {
xmlUnlinkNode(cur);
xmlFreeNode(cur);
return (Z_OK);
@@ -2306,6 +2505,58 @@ zonecfg_modify_nwif(
return (Z_OK);
}
+void
+zonecfg_free_res_attr_list(struct zone_res_attrtab *valtab)
+{
+ if (valtab == NULL)
+ return;
+ zonecfg_free_res_attr_list(valtab->zone_res_attr_next);
+ free(valtab);
+}
+
+int
+zonecfg_add_res_attr(struct zone_res_attrtab **headptr,
+ struct zone_res_attrtab *valtabptr)
+{
+ struct zone_res_attrtab *last, *old, *new;
+
+ last = *headptr;
+ for (old = last; old != NULL; old = old->zone_res_attr_next)
+ last = old; /* walk to the end of the list */
+ new = valtabptr; /* alloc'd by caller */
+ new->zone_res_attr_next = NULL;
+ if (last == NULL)
+ *headptr = new;
+ else
+ last->zone_res_attr_next = new;
+ return (Z_OK);
+}
+
+int
+zonecfg_remove_res_attr(struct zone_res_attrtab **headptr,
+ struct zone_res_attrtab *valtabptr)
+{
+ struct zone_res_attrtab *last, *this, *next;
+
+ last = *headptr;
+ for (this = last; this != NULL; this = this->zone_res_attr_next) {
+ if (strcmp(this->zone_res_attr_name,
+ valtabptr->zone_res_attr_name) == 0 &&
+ strcmp(this->zone_res_attr_value,
+ valtabptr->zone_res_attr_value) == 0) {
+ next = this->zone_res_attr_next;
+ if (this == *headptr)
+ *headptr = next;
+ else
+ last->zone_res_attr_next = next;
+ free(this);
+ return (Z_OK);
+ } else
+ last = this;
+ }
+ return (Z_NO_PROPERTY_ID);
+}
+
/*
* Must be a comma-separated list of alpha-numeric file system names.
*/
@@ -2455,7 +2706,7 @@ zonecfg_set_hostid(zone_dochandle_t handle, const char *hostidp)
int
zonecfg_lookup_dev(zone_dochandle_t handle, struct zone_devtab *tabptr)
{
- xmlNodePtr cur, firstmatch;
+ xmlNodePtr cur, val, firstmatch;
int err;
char match[MAXPATHLEN];
@@ -2500,13 +2751,40 @@ zonecfg_lookup_dev(zone_dochandle_t handle, struct zone_devtab *tabptr)
sizeof (tabptr->zone_dev_match))) != Z_OK)
return (err);
+ tabptr->zone_dev_attrp = NULL;
+ for (val = cur->xmlChildrenNode; val != NULL; val = val->next) {
+ struct zone_res_attrtab *valptr;
+
+ valptr = (struct zone_res_attrtab *)malloc(
+ sizeof (struct zone_res_attrtab));
+ if (valptr == NULL)
+ return (Z_NOMEM);
+
+ valptr->zone_res_attr_name[0] =
+ valptr->zone_res_attr_value[0] = '\0';
+ if (zonecfg_add_res_attr(&(tabptr->zone_dev_attrp), valptr)
+ != Z_OK) {
+ free(valptr);
+ break;
+ }
+
+ if ((fetchprop(val, DTD_ATTR_NAME, valptr->zone_res_attr_name,
+ sizeof (valptr->zone_res_attr_name)) != Z_OK))
+ break;
+ if ((fetchprop(val, DTD_ATTR_VALUE,
+ valptr->zone_res_attr_value,
+ sizeof (valptr->zone_res_attr_value)) != Z_OK))
+ break;
+ }
+
return (Z_OK);
}
static int
zonecfg_add_dev_core(zone_dochandle_t handle, struct zone_devtab *tabptr)
{
- xmlNodePtr newnode, cur = handle->zone_dh_cur;
+ xmlNodePtr newnode, cur = handle->zone_dh_cur, valnode;
+ struct zone_res_attrtab *valptr;
int err;
newnode = xmlNewTextChild(cur, NULL, DTD_ELEM_DEVICE, NULL);
@@ -2515,6 +2793,21 @@ zonecfg_add_dev_core(zone_dochandle_t handle, struct zone_devtab *tabptr)
tabptr->zone_dev_match)) != Z_OK)
return (err);
+ for (valptr = tabptr->zone_dev_attrp; valptr != NULL;
+ valptr = valptr->zone_res_attr_next) {
+ valnode = xmlNewTextChild(newnode, NULL, DTD_ELEM_NETATTR,
+ NULL);
+ err = newprop(valnode, DTD_ATTR_NAME,
+ valptr->zone_res_attr_name);
+ if (err != Z_OK)
+ return (err);
+ err = newprop(valnode, DTD_ATTR_VALUE,
+ valptr->zone_res_attr_value);
+ if (err != Z_OK)
+ return (err);
+ }
+
+
return (Z_OK);
}
@@ -4574,7 +4867,7 @@ get_pool_sched_class(char *poolname, char *class, int clsize)
pool_conf_t *poolconf;
pool_t *pool;
pool_elem_t *pe;
- pool_value_t *pv = pool_value_alloc();
+ pool_value_t *pv;
const char *sched_str;
if (pool_get_status(&status) != PO_SUCCESS || status != POOL_ENABLED)
@@ -4595,15 +4888,23 @@ get_pool_sched_class(char *poolname, char *class, int clsize)
return (Z_NO_POOL);
}
+ if ((pv = pool_value_alloc()) == NULL) {
+ (void) pool_conf_close(poolconf);
+ pool_conf_free(poolconf);
+ return (Z_NO_POOL);
+ }
+
pe = pool_to_elem(poolconf, pool);
if (pool_get_property(poolconf, pe, "pool.scheduler", pv) !=
POC_STRING) {
(void) pool_conf_close(poolconf);
+ pool_value_free(pv);
pool_conf_free(poolconf);
return (Z_NO_ENTRY);
}
(void) pool_value_get_string(pv, &sched_str);
(void) pool_conf_close(poolconf);
+ pool_value_free(pv);
pool_conf_free(poolconf);
if (strlcpy(class, sched_str, clsize) >= clsize)
return (Z_TOO_BIG);
@@ -4712,7 +5013,8 @@ zonecfg_setnwifent(zone_dochandle_t handle)
int
zonecfg_getnwifent(zone_dochandle_t handle, struct zone_nwiftab *tabptr)
{
- xmlNodePtr cur;
+ xmlNodePtr cur, val;
+ struct zone_res_attrtab *valptr;
int err;
if (handle == NULL)
@@ -4748,6 +5050,24 @@ zonecfg_getnwifent(zone_dochandle_t handle, struct zone_nwiftab *tabptr)
return (err);
}
+ if ((err = fetchprop(cur, DTD_ATTR_MAC, tabptr->zone_nwif_mac,
+ sizeof (tabptr->zone_nwif_mac))) != Z_OK) {
+ handle->zone_dh_cur = handle->zone_dh_top;
+ return (err);
+ }
+
+ if ((err = fetchprop(cur, DTD_ATTR_VLANID, tabptr->zone_nwif_vlan_id,
+ sizeof (tabptr->zone_nwif_vlan_id))) != Z_OK) {
+ handle->zone_dh_cur = handle->zone_dh_top;
+ return (err);
+ }
+
+ if ((err = fetchprop(cur, DTD_ATTR_GNIC, tabptr->zone_nwif_gnic,
+ sizeof (tabptr->zone_nwif_gnic))) != Z_OK) {
+ handle->zone_dh_cur = handle->zone_dh_top;
+ return (err);
+ }
+
if ((err = fetchprop(cur, DTD_ATTR_DEFROUTER,
tabptr->zone_nwif_defrouter,
sizeof (tabptr->zone_nwif_defrouter))) != Z_OK) {
@@ -4755,6 +5075,29 @@ zonecfg_getnwifent(zone_dochandle_t handle, struct zone_nwiftab *tabptr)
return (err);
}
+ tabptr->zone_nwif_attrp = NULL;
+ for (val = cur->xmlChildrenNode; val != NULL; val = val->next) {
+ valptr = (struct zone_res_attrtab *)malloc(
+ sizeof (struct zone_res_attrtab));
+ if (valptr == NULL)
+ return (Z_NOMEM);
+
+ valptr->zone_res_attr_name[0] =
+ valptr->zone_res_attr_value[0] = '\0';
+ if (zonecfg_add_res_attr(&(tabptr->zone_nwif_attrp), valptr)
+ != Z_OK) {
+ free(valptr);
+ break;
+ }
+
+ if (fetchprop(val, DTD_ATTR_NAME, valptr->zone_res_attr_name,
+ sizeof (valptr->zone_res_attr_name)) != Z_OK)
+ break;
+ if (fetchprop(val, DTD_ATTR_VALUE, valptr->zone_res_attr_value,
+ sizeof (valptr->zone_res_attr_value)) != Z_OK)
+ break;
+ }
+
handle->zone_dh_cur = cur->next;
return (Z_OK);
}
@@ -4774,7 +5117,7 @@ zonecfg_setdevent(zone_dochandle_t handle)
int
zonecfg_getdevent(zone_dochandle_t handle, struct zone_devtab *tabptr)
{
- xmlNodePtr cur;
+ xmlNodePtr cur, val;
int err;
if (handle == NULL)
@@ -4797,6 +5140,31 @@ zonecfg_getdevent(zone_dochandle_t handle, struct zone_devtab *tabptr)
return (err);
}
+ tabptr->zone_dev_attrp = NULL;
+ for (val = cur->xmlChildrenNode; val != NULL; val = val->next) {
+ struct zone_res_attrtab *valptr;
+
+ valptr = (struct zone_res_attrtab *)malloc(
+ sizeof (struct zone_res_attrtab));
+ if (valptr == NULL)
+ return (Z_NOMEM);
+
+ valptr->zone_res_attr_name[0] =
+ valptr->zone_res_attr_value[0] = '\0';
+ if (zonecfg_add_res_attr(&(tabptr->zone_dev_attrp), valptr)
+ != Z_OK) {
+ free(valptr);
+ break;
+ }
+
+ if ((fetchprop(val, DTD_ATTR_NAME, valptr->zone_res_attr_name,
+ sizeof (valptr->zone_res_attr_name)) != Z_OK))
+ break;
+ if ((fetchprop(val, DTD_ATTR_VALUE, valptr->zone_res_attr_value,
+ sizeof (valptr->zone_res_attr_value)) != Z_OK))
+ break;
+ }
+
handle->zone_dh_cur = cur->next;
return (Z_OK);
}
@@ -5525,6 +5893,164 @@ zone_get_brand(char *zone_name, char *brandname, size_t rp_sz)
}
/*
+ * Atomically get a new zone_did value. The currently allocated value
+ * is stored in /etc/zones/did.txt. Lock the file, read the current value,
+ * increment, save the new value and unlock the file. Return the new value
+ * or -1 if there was an error. The ID namespace is large enough that we
+ * don't worry about recycling an ID when a zone is deleted.
+ */
+static zoneid_t
+new_zone_did()
+{
+ int fd;
+ int len;
+ int val;
+ struct flock lck;
+ char buf[80];
+
+ if ((fd = open(DEBUGID_FILE, O_RDWR | O_CREAT,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) < 0) {
+ perror("new_zone_did open failed");
+ return (-1);
+ }
+
+ /* Initialize the lock. */
+ lck.l_whence = SEEK_SET;
+ lck.l_start = 0;
+ lck.l_len = 0;
+
+ /* Wait until we acquire an exclusive lock on the file. */
+ lck.l_type = F_WRLCK;
+ if (fcntl(fd, F_SETLKW, &lck) == -1) {
+ perror("new_zone_did lock failed");
+ (void) close(fd);
+ return (-1);
+ }
+
+ /* Get currently allocated value */
+ len = read(fd, buf, sizeof (buf));
+ if (len == -1) {
+ perror("new_zone_did read failed");
+ val = -1;
+ } else {
+ if (lseek(fd, 0L, SEEK_SET) == -1) {
+ perror("new_zone_did seek failed");
+ val = -1;
+ } else {
+ if (len == 0) {
+ /* Just created the file, initialize at 1 */
+ val = 1;
+ } else {
+ val = atoi(buf);
+ val++;
+ }
+
+ (void) snprintf(buf, sizeof (buf), "%d\n", val);
+ len = strlen(buf);
+
+ /* Save newly allocated value */
+ if (write(fd, buf, len) == -1) {
+ perror("new_zone_did write failed");
+ val = -1;
+ }
+ }
+ }
+
+ /* Release the file lock. */
+ lck.l_type = F_UNLCK;
+ if (fcntl(fd, F_SETLK, &lck) == -1) {
+ perror("new_zone_did unlock failed");
+ val = -1;
+ }
+
+ if (close(fd) != 0)
+ perror("new_zone_did close failed");
+
+ return (val);
+}
+
+/*
+ * Called by zoneadmd to get the zone's debug ID.
+ * If the zone doesn't already have an ID, a new one is generated and
+ * persistently saved onto the zone. Normally either zoneadm or zonecfg
+ * will assign a new ID for the zone, so zoneadmd should never have to
+ * generate one, but we also handle that here just to be paranoid.
+ */
+zoneid_t
+zone_get_did(char *zone_name)
+{
+ int res;
+ zoneid_t new_did;
+ zone_dochandle_t handle;
+ char did_str[80];
+
+ if ((handle = zonecfg_init_handle()) == NULL)
+ return (getpid());
+
+ if (zonecfg_get_handle((char *)zone_name, handle) != Z_OK)
+ return (getpid());
+
+ res = getrootattr(handle, DTD_ATTR_DID, did_str, sizeof (did_str));
+
+ /* If the zone already has an assigned debug ID, return it. */
+ if (res == Z_OK && did_str[0] != '\0') {
+ zonecfg_fini_handle(handle);
+ return (atoi(did_str));
+ }
+
+ /*
+ * The zone doesn't have an assigned debug ID yet, generate one and
+ * save it as part of the zone definition.
+ */
+ if ((new_did = new_zone_did()) == -1) {
+ /*
+ * We should really never hit this block of code.
+ * Generating a new ID failed for some reason. Use the current
+ * pid as a temporary ID so that the zone can continue to boot
+ * but we don't persistently save this temporary ID on the zone.
+ */
+ zonecfg_fini_handle(handle);
+ return (getpid());
+ }
+
+ /* Now persistently save this new ID onto the zone. */
+ (void) snprintf(did_str, sizeof (did_str), "%d", new_did);
+ (void) setrootattr(handle, DTD_ATTR_DID, did_str);
+ (void) zonecfg_save(handle);
+
+ zonecfg_fini_handle(handle);
+ return (new_did);
+}
+
+zoneid_t
+zonecfg_get_did(zone_dochandle_t handle)
+{
+ char did_str[80];
+ int err;
+ zoneid_t did;
+
+ err = getrootattr(handle, DTD_ATTR_DID, did_str, sizeof (did_str));
+ if (err == Z_OK && did_str[0] != '\0')
+ did = atoi(did_str);
+ else
+ did = -1;
+
+ return (did);
+}
+
+void
+zonecfg_set_did(zone_dochandle_t handle)
+{
+ zoneid_t new_did;
+ char did_str[80];
+
+ if ((new_did = new_zone_did()) == -1)
+ return;
+ (void) snprintf(did_str, sizeof (did_str), "%d", new_did);
+ (void) setrootattr(handle, DTD_ATTR_DID, did_str);
+}
+
+/*
* Return the appropriate root for the active /dev.
* For normal zone, the path is $ZONEPATH/root;
* for scratch zone, the dev path is $ZONEPATH/lu.
@@ -5648,16 +6174,27 @@ int
zone_set_state(char *zone, zone_state_t state)
{
struct zoneent ze;
+ int res;
+ zone_state_t oldst = (zone_state_t)-1;
if (state != ZONE_STATE_CONFIGURED && state != ZONE_STATE_INSTALLED &&
state != ZONE_STATE_INCOMPLETE)
return (Z_INVAL);
+ (void) zone_get_state(zone, &oldst);
+
bzero(&ze, sizeof (ze));
(void) strlcpy(ze.zone_name, zone, sizeof (ze.zone_name));
ze.zone_state = state;
(void) strlcpy(ze.zone_path, "", sizeof (ze.zone_path));
- return (putzoneent(&ze, PZE_MODIFY));
+ res = putzoneent(&ze, PZE_MODIFY);
+
+ if (res == Z_OK) {
+ zonecfg_notify_conf_change(zone, zone_state_str(oldst),
+ zone_state_str(state));
+ }
+
+ return (res);
}
/*
@@ -5807,6 +6344,30 @@ zonecfg_get_uuid(const char *zonename, uuid_t uuid)
}
/*
+ * Changes a zone's UUID to the given value. Returns an error if the UUID is
+ * malformed or if the zone cannot be located.
+ */
+int
+zonecfg_set_uuid(const char *zonename, const char *zonepath,
+ const char *uuid)
+{
+ int err;
+ struct zoneent ze;
+
+ bzero(&ze, sizeof (ze));
+ ze.zone_state = -1; /* Preserve existing state in index */
+ (void) strlcpy(ze.zone_name, zonename, sizeof (ze.zone_name));
+ (void) strlcpy(ze.zone_path, zonepath, sizeof (ze.zone_path));
+ if (uuid_parse((char *)uuid, ze.zone_uuid) == -1)
+ return (Z_INVALID_PROPERTY);
+
+ if ((err = putzoneent(&ze, PZE_MODIFY)) != Z_OK)
+ return (err);
+
+ return (Z_OK);
+}
+
+/*
* File-system convenience functions.
*/
boolean_t
@@ -6839,131 +7400,49 @@ zonecfg_getpsetent(zone_dochandle_t handle, struct zone_psettab *tabptr)
return (err);
}
-static int
-add_mcap(zone_dochandle_t handle, struct zone_mcaptab *tabptr)
-{
- xmlNodePtr newnode, cur = handle->zone_dh_cur;
- int err;
-
- newnode = xmlNewTextChild(cur, NULL, DTD_ELEM_MCAP, NULL);
- if ((err = newprop(newnode, DTD_ATTR_PHYSCAP, tabptr->zone_physmem_cap))
- != Z_OK)
- return (err);
-
- return (Z_OK);
-}
-
-int
-zonecfg_delete_mcap(zone_dochandle_t handle)
-{
- int err;
- xmlNodePtr cur = handle->zone_dh_cur;
-
- if ((err = operation_prep(handle)) != Z_OK)
- return (err);
-
- for (cur = cur->xmlChildrenNode; cur != NULL; cur = cur->next) {
- if (xmlStrcmp(cur->name, DTD_ELEM_MCAP) != 0)
- continue;
-
- xmlUnlinkNode(cur);
- xmlFreeNode(cur);
- return (Z_OK);
- }
- return (Z_NO_RESOURCE_ID);
-}
-
-int
-zonecfg_modify_mcap(zone_dochandle_t handle, struct zone_mcaptab *tabptr)
-{
- int err;
-
- if (tabptr == NULL)
- return (Z_INVAL);
-
- err = zonecfg_delete_mcap(handle);
- /* it is ok if there is no mcap entry */
- if (err != Z_OK && err != Z_NO_RESOURCE_ID)
- return (err);
-
- if ((err = add_mcap(handle, tabptr)) != Z_OK)
- return (err);
-
- return (Z_OK);
-}
-
+/*
+ * Cleanup obsolete constructs in the configuration.
+ * Return true of the config has been updated and must be commited.
+ */
int
-zonecfg_lookup_mcap(zone_dochandle_t handle, struct zone_mcaptab *tabptr)
+zonecfg_fix_obsolete(zone_dochandle_t handle)
{
+ int res = 0;
+ int add_physmem_rctl = 0;
xmlNodePtr cur;
- int err;
-
- if (tabptr == NULL)
- return (Z_INVAL);
+ char zone_physmem_cap[MAXNAMELEN];
- if ((err = operation_prep(handle)) != Z_OK)
- return (err);
+ if (operation_prep(handle) != Z_OK)
+ return (res);
+ /*
+ * If an obsolete mcap entry exists, convert it to the rctl.
+ */
cur = handle->zone_dh_cur;
for (cur = cur->xmlChildrenNode; cur != NULL; cur = cur->next) {
if (xmlStrcmp(cur->name, DTD_ELEM_MCAP) != 0)
continue;
- if ((err = fetchprop(cur, DTD_ATTR_PHYSCAP,
- tabptr->zone_physmem_cap,
- sizeof (tabptr->zone_physmem_cap))) != Z_OK) {
- handle->zone_dh_cur = handle->zone_dh_top;
- return (err);
+
+ if (fetchprop(cur, DTD_ATTR_PHYSCAP,
+ zone_physmem_cap, sizeof (zone_physmem_cap)) == Z_OK) {
+ res = 1;
+ add_physmem_rctl = 1;
}
- return (Z_OK);
+ xmlUnlinkNode(cur);
+ xmlFreeNode(cur);
+ break;
}
- return (Z_NO_ENTRY);
-}
-
-static int
-getmcapent_core(zone_dochandle_t handle, struct zone_mcaptab *tabptr)
-{
- xmlNodePtr cur;
- int err;
-
- if (handle == NULL)
- return (Z_INVAL);
-
- if ((cur = handle->zone_dh_cur) == NULL)
- return (Z_NO_ENTRY);
-
- for (; cur != NULL; cur = cur->next)
- if (xmlStrcmp(cur->name, DTD_ELEM_MCAP) == 0)
- break;
- if (cur == NULL) {
- handle->zone_dh_cur = handle->zone_dh_top;
- return (Z_NO_ENTRY);
- }
+ if (add_physmem_rctl) {
+ uint64_t cap;
+ char *endp;
- if ((err = fetchprop(cur, DTD_ATTR_PHYSCAP, tabptr->zone_physmem_cap,
- sizeof (tabptr->zone_physmem_cap))) != Z_OK) {
- handle->zone_dh_cur = handle->zone_dh_top;
- return (err);
+ cap = strtoull(zone_physmem_cap, &endp, 10);
+ (void) zonecfg_set_aliased_rctl(handle, ALIAS_MAXPHYSMEM, cap);
}
- handle->zone_dh_cur = cur->next;
- return (Z_OK);
-}
-
-int
-zonecfg_getmcapent(zone_dochandle_t handle, struct zone_mcaptab *tabptr)
-{
- int err;
-
- if ((err = zonecfg_setent(handle)) != Z_OK)
- return (err);
-
- err = getmcapent_core(handle, tabptr);
-
- (void) zonecfg_endent(handle);
-
- return (err);
+ return (res);
}
/*
@@ -7631,7 +8110,7 @@ zonecfg_update_userauths(zone_dochandle_t handle, char *zonename)
(void) fclose(uaf);
return (Z_MISC_FS);
}
- if (!config_file_path(zonename, config_file)) {
+ if (!config_file_path(zonename, config_file, sizeof (config_file))) {
(void) fclose(uaf);
return (Z_MISC_FS);
}
diff --git a/usr/src/lib/libzonecfg/common/mapfile-vers b/usr/src/lib/libzonecfg/common/mapfile-vers
index b908a28174..f9a17e4962 100644
--- a/usr/src/lib/libzonecfg/common/mapfile-vers
+++ b/usr/src/lib/libzonecfg/common/mapfile-vers
@@ -20,6 +20,7 @@
#
#
# Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2015, Joyent Inc.
#
#
@@ -53,6 +54,7 @@ SYMBOL_VERSION SUNWprivate_1.1 {
zonecfg_add_fs_option;
zonecfg_add_admin;
zonecfg_add_nwif;
+ zonecfg_add_res_attr;
zonecfg_add_pkg;
zonecfg_add_pset;
zonecfg_add_rctl;
@@ -79,7 +81,6 @@ SYMBOL_VERSION SUNWprivate_1.1 {
zonecfg_delete_dev;
zonecfg_delete_ds;
zonecfg_delete_filesystem;
- zonecfg_delete_mcap;
zonecfg_delete_nwif;
zonecfg_delete_pset;
zonecfg_delete_rctl;
@@ -104,7 +105,9 @@ SYMBOL_VERSION SUNWprivate_1.1 {
zonecfg_find_mounts;
zonecfg_find_scratch;
zonecfg_fini_handle;
+ zonecfg_fix_obsolete;
zonecfg_free_fs_option_list;
+ zonecfg_free_res_attr_list;
zonecfg_free_rctl_value_list;
zonecfg_get_aliased_rctl;
zonecfg_get_attach_handle;
@@ -118,6 +121,7 @@ SYMBOL_VERSION SUNWprivate_1.1 {
zonecfg_get_bootargs;
zonecfg_get_brand;
zonecfg_get_dflt_sched_class;
+ zonecfg_get_did;
zonecfg_getdevent;
zonecfg_getdevperment;
zonecfg_getdsent;
@@ -127,7 +131,6 @@ SYMBOL_VERSION SUNWprivate_1.1 {
zonecfg_get_hostid;
zonecfg_get_iptype;
zonecfg_get_limitpriv;
- zonecfg_getmcapent;
zonecfg_get_name;
zonecfg_get_name_by_uuid;
zonecfg_getnwifent;
@@ -160,7 +163,6 @@ SYMBOL_VERSION SUNWprivate_1.1 {
zonecfg_lookup_dev;
zonecfg_lookup_ds;
zonecfg_lookup_filesystem;
- zonecfg_lookup_mcap;
zonecfg_lookup_nwif;
zonecfg_lookup_pset;
zonecfg_lookup_rctl;
@@ -169,11 +171,11 @@ SYMBOL_VERSION SUNWprivate_1.1 {
zonecfg_modify_dev;
zonecfg_modify_ds;
zonecfg_modify_filesystem;
- zonecfg_modify_mcap;
zonecfg_modify_nwif;
zonecfg_modify_pset;
zonecfg_modify_rctl;
zonecfg_notify_bind;
+ zonecfg_notify_create;
zonecfg_notify_critical_abort;
zonecfg_notify_critical_enter;
zonecfg_notify_critical_exit;
@@ -183,6 +185,7 @@ SYMBOL_VERSION SUNWprivate_1.1 {
zonecfg_ping_zoneadmd;
zonecfg_release_lock_file;
zonecfg_remove_fs_option;
+ zonecfg_remove_res_attr;
zonecfg_remove_rctl_value;
zonecfg_remove_userauths;
zonecfg_reverse_scratch;
@@ -196,6 +199,7 @@ SYMBOL_VERSION SUNWprivate_1.1 {
zonecfg_set_autoboot;
zonecfg_set_bootargs;
zonecfg_set_brand;
+ zonecfg_set_did;
zonecfg_setdevent;
zonecfg_setdevperment;
zonecfg_setdsent;
@@ -211,6 +215,7 @@ SYMBOL_VERSION SUNWprivate_1.1 {
zonecfg_set_root;
zonecfg_set_sched;
zonecfg_set_swinv;
+ zonecfg_set_uuid;
zonecfg_set_zonepath;
zonecfg_strerror;
zonecfg_str_to_bytes;
@@ -229,6 +234,7 @@ SYMBOL_VERSION SUNWprivate_1.1 {
zonecfg_verify_save;
zonecfg_warn_poold;
zone_get_brand;
+ zone_get_did;
zone_get_devroot;
zone_get_id;
zone_get_rootpath;
diff --git a/usr/src/lib/libzonecfg/dtd/zonecfg.dtd.1 b/usr/src/lib/libzonecfg/dtd/zonecfg.dtd.1
index d94bb09c5f..3c7198efef 100644
--- a/usr/src/lib/libzonecfg/dtd/zonecfg.dtd.1
+++ b/usr/src/lib/libzonecfg/dtd/zonecfg.dtd.1
@@ -21,6 +21,7 @@
CDDL HEADER END
Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ Copyright (c) 2011, Joyent Inc. All rights reserved.
-->
@@ -46,14 +47,21 @@
<!ATTLIST inherited-pkg-dir directory CDATA #REQUIRED>
-<!ELEMENT network EMPTY>
+<!ELEMENT net-attr EMPTY>
+<!ATTLIST net-attr name CDATA #REQUIRED
+ value CDATA #REQUIRED>
+
+<!ELEMENT network (net-attr)*>
<!ATTLIST network address CDATA ""
allowed-address CDATA ""
defrouter CDATA ""
- physical CDATA #REQUIRED>
+ global-nic CDATA ""
+ mac-addr CDATA ""
+ physical CDATA #REQUIRED
+ vlan-id CDATA "">
-<!ELEMENT device EMPTY>
+<!ELEMENT device (net-attr)*>
<!ATTLIST device match CDATA #REQUIRED>
@@ -156,6 +164,7 @@
limitpriv CDATA ""
bootargs CDATA ""
brand CDATA ""
+ debugid CDATA ""
scheduling-class CDATA ""
fs-allowed CDATA ""
version NMTOKEN #FIXED '1'>
diff --git a/usr/src/lib/libzpool/common/sys/zfs_context.h b/usr/src/lib/libzpool/common/sys/zfs_context.h
index 94d59874d3..9adba03fab 100644
--- a/usr/src/lib/libzpool/common/sys/zfs_context.h
+++ b/usr/src/lib/libzpool/common/sys/zfs_context.h
@@ -22,7 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_ZFS_CONTEXT_H
@@ -318,6 +318,7 @@ extern void kstat_runq_back_to_waitq(kstat_io_t *);
#define KM_SLEEP UMEM_NOFAIL
#define KM_PUSHPAGE KM_SLEEP
#define KM_NOSLEEP UMEM_DEFAULT
+#define KM_NORMALPRI 0
#define KMC_NODEBUG UMC_NODEBUG
#define KMC_NOTOUCH 0 /* not needed for userland caches */
#define kmem_alloc(_s, _f) umem_alloc(_s, _f)
diff --git a/usr/src/lib/mergeq/mergeq.c b/usr/src/lib/mergeq/mergeq.c
new file mode 100644
index 0000000000..fd9a9c32ea
--- /dev/null
+++ b/usr/src/lib/mergeq/mergeq.c
@@ -0,0 +1,606 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * Merge queue
+ *
+ * A multi-threaded merging queue.
+ *
+ * The general constraint of the merge queue is that if a set of items are
+ * inserted into the queue in the same order, then no matter how many threads
+ * are on the scene, we will always process the items in the same order. The
+ * secondary constraint is that to support environments that must be
+ * single-threaded, we explicitly *must not* create a thread in the case where
+ * the number of requested threads is just one.
+ *
+ * To that end, we've designed our queue as a circular buffer. We will grow that
+ * buffer to contain enough space for all the input items, after which we'll
+ * then treat it as a circular buffer.
+ *
+ * Items will be issued to a processing function two at a time, until there is
+ * only one item remaining in the queue, at which point we will be doing doing
+ * any merging work.
+ *
+ * A given queue has three different entries that we care about tracking:
+ *
+ * o mq_nproc - What is the slot of the next item to process for something
+ * looking for work.
+ *
+ * o mq_next - What is the slot of the next item that should be inserted into
+ * the queue.
+ *
+ * o mq_ncommit - What is the slot of the next item that should be committed.
+ *
+ * When a thread comes and looks for work, we pop entries off of the queue based
+ * on the index provided by mq_nproc. At the same time, it also gets the slot
+ * that it should place the result in, which is mq_next. However, because we
+ * have multiple threads that are operating on the system, we want to make sure
+ * that we push things onto the queue in order. We do that by allocating a slot
+ * to each task and when it completes, it waits for its slot to be ready based
+ * on it being the value of mq_ncommit.
+ *
+ * In addition, we keep track of the number of items in the queue as well as the
+ * number of active workers. There's also a generation count that is used to
+ * figure out when the various values might lap one another.
+ *
+ * The following images show what happens when we have a queue with six items
+ * and whose capacity has been shrunk to six, to better fit in the screen.
+ *
+ *
+ * 1) This is the initial configuration of the queue right before any processing
+ * is done in the context of mergeq_merge(). Every box has an initial item for
+ * merging in it (represented by an 'x'). Here, the mq_nproc, mq_next, and
+ * mq_ncommit will all point at the initial entry. However, the mq_next has
+ * already lapped around the array and thus has a generation count of one.
+ *
+ * The '+' characters indicate which bucket the corresponding value of mq_nproc,
+ * mq_ncommit, and mq_nproc.
+ *
+ * +---++---++---++---++---++---+
+ * | X || X || X || X || X || X |
+ * +---++---++---++---++---++---+
+ * mq_next (g1) +
+ * mq_ncommit (g0) +
+ * mq_nproc (g0) +
+ *
+ * 2) This shows the state right as the first thread begins to process an entry.
+ * Note in this example we will have two threads processing this queue. Note,
+ * mq_ncommit has not advanced. This is because the first thread has started
+ * processing entries, but it has not finished, and thus we can't commit it.
+ * We've incremented mq_next by one because it has gone ahead and assigned a
+ * single entry. We've incremented mq_nproc by two, because we have removed two
+ * entries and thus will have another set available.
+ *
+ * +---++---++---++---++---++---+ t1 - slot 0
+ * | || || X || X || X || X | t2 - idle
+ * +---++---++---++---++---++---+
+ * mq_next (g1) +
+ * mq_ncommit (g0) +
+ * mq_nproc (g0) +
+ *
+ *
+ * 3) This shows the state right after the second thread begins to process an
+ * entry, note that the first thread has not finished. The changes are very
+ * similar to the previous state, we've advanced, mq_nproc and mq_next, but not
+ * mq_ncommit.
+ *
+ * +---++---++---++---++---++---+ t1 - slot 0
+ * | || || || || X || X | t2 - slot 1
+ * +---++---++---++---++---++---+
+ * mq_next (g1) +
+ * mq_ncommit (g0) +
+ * mq_nproc (g0) +
+ *
+ * 4) This shows the state after thread one has finished processing an item, but
+ * before it does anything else. Note that even if thread two finishes early, it
+ * cannot commit its item until thread one finishes. Here 'Y' refers to the
+ * result of merging the first two 'X's.
+ *
+ * +---++---++---++---++---++---+ t1 - idle
+ * | Y || || || || X || X | t2 - slot 1
+ * +---++---++---++---++---++---+
+ * mq_next (g1) +
+ * mq_ncommit (g0) +
+ * mq_nproc (g0) +
+ *
+ * 5) This shows the state after thread one has begun to process the next round
+ * and after thread two has committed, but before it begins processing the next
+ * item. Note that mq_nproc has wrapped around and we've bumped its generation
+ * counter.
+ *
+ * +---++---++---++---++---++---+ t1 - slot 2
+ * | Y || Y || || || || | t2 - idle
+ * +---++---++---++---++---++---+
+ * mq_next (g1) +
+ * mq_ncommit (g0) +
+ * mq_nproc (g0) +
+ *
+ * 6) Here, thread two, will take the next two Y values and thread 1 will commit
+ * its 'Y'. Thread one now must wait until thread two finishes such that it can
+ * do additional work.
+ *
+ * +---++---++---++---++---++---+ t1 - waiting
+ * | || || Y || || || | t2 - slot 3
+ * +---++---++---++---++---++---+
+ * mq_next (g1) +
+ * mq_ncommit (g0) +
+ * mq_nproc (g0) +
+ *
+ * 7) Here, thread two has committed and thread one is about to go process the
+ * final entry. The character 'Z' represents the results of merging two 'Y's.
+ *
+ * +---++---++---++---++---++---+ t1 - idle
+ * | || || Y || Z || || | t2 - idle
+ * +---++---++---++---++---++---+
+ * mq_next (g1) +
+ * mq_ncommit (g0) +
+ * mq_nproc (g0) +
+ *
+ * 8) Here, thread one is processing the final item. Thread two is waiting in
+ * mergeq_pop() for enough items to be available. In this case, it will never
+ * happen; however, once all threads have finished it will break out.
+ *
+ * +---++---++---++---++---++---+ t1 - slot 4
+ * | || || || || || | t2 - idle
+ * +---++---++---++---++---++---+
+ * mq_next (g1) +
+ * mq_ncommit (g0) +
+ * mq_nproc (g0) +
+ *
+ * 9) This is the final state of the queue, it has a single '*' item which is
+ * the final merge result. At this point, both thread one and thread two would
+ * stop processing and we'll return the result to the user.
+ *
+ * +---++---++---++---++---++---+ t1 - slot 4
+ * | || || || || * || | t2 - idle
+ * +---++---++---++---++---++---+
+ * mq_next (g1) +
+ * mq_ncommit (g0) +
+ * mq_nproc (g0) +
+ *
+ *
+ * Note, that if at any point in time the processing function fails, then all
+ * the merges will quiesce and that error will be propagated back to the user.
+ */
+
+#include <strings.h>
+#include <sys/debug.h>
+#include <thread.h>
+#include <synch.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdlib.h>
+
+#include "mergeq.h"
+
+struct mergeq {
+ mutex_t mq_lock; /* Protects items below */
+ cond_t mq_cond; /* Condition variable */
+ void **mq_items; /* Array of items to process */
+ size_t mq_nitems; /* Number of items in the queue */
+ size_t mq_cap; /* Capacity of the items */
+ size_t mq_next; /* Place to put next entry */
+ size_t mq_gnext; /* Generation for next */
+ size_t mq_nproc; /* Index of next thing to process */
+ size_t mq_gnproc; /* Generation for next proc */
+ size_t mq_ncommit; /* Index of the next thing to commit */
+ size_t mq_gncommit; /* Commit generation */
+ uint_t mq_nactthrs; /* Number of active threads */
+ uint_t mq_ndthreads; /* Desired number of threads */
+ thread_t *mq_thrs; /* Actual threads */
+ mergeq_proc_f *mq_func; /* Processing function */
+ void *mq_arg; /* Argument for processing */
+ boolean_t mq_working; /* Are we working on processing */
+ boolean_t mq_iserror; /* Have we encountered an error? */
+ int mq_error;
+};
+
+#define MERGEQ_DEFAULT_CAP 64
+
+static int
+mergeq_error(int err)
+{
+ errno = err;
+ return (MERGEQ_ERROR);
+}
+
+void
+mergeq_fini(mergeq_t *mqp)
+{
+ if (mqp == NULL)
+ return;
+
+ VERIFY(mqp->mq_working != B_TRUE);
+
+ if (mqp->mq_items != NULL)
+ mergeq_free(mqp->mq_items, sizeof (void *) * mqp->mq_cap);
+ if (mqp->mq_ndthreads > 0) {
+ mergeq_free(mqp->mq_thrs, sizeof (thread_t) *
+ mqp->mq_ndthreads);
+ }
+ VERIFY0(cond_destroy(&mqp->mq_cond));
+ VERIFY0(mutex_destroy(&mqp->mq_lock));
+ mergeq_free(mqp, sizeof (mergeq_t));
+}
+
+int
+mergeq_init(mergeq_t **outp, uint_t nthrs)
+{
+ int ret;
+ mergeq_t *mqp;
+
+ mqp = mergeq_alloc(sizeof (mergeq_t));
+ if (mqp == NULL)
+ return (mergeq_error(ENOMEM));
+
+ bzero(mqp, sizeof (mergeq_t));
+ mqp->mq_items = mergeq_alloc(sizeof (void *) * MERGEQ_DEFAULT_CAP);
+ if (mqp->mq_items == NULL) {
+ mergeq_free(mqp, sizeof (mergeq_t));
+ return (mergeq_error(ENOMEM));
+ }
+ bzero(mqp->mq_items, sizeof (void *) * MERGEQ_DEFAULT_CAP);
+
+ mqp->mq_ndthreads = nthrs - 1;
+ if (mqp->mq_ndthreads > 0) {
+ mqp->mq_thrs = mergeq_alloc(sizeof (thread_t) *
+ mqp->mq_ndthreads);
+ if (mqp->mq_thrs == NULL) {
+ mergeq_free(mqp->mq_items, sizeof (void *) *
+ MERGEQ_DEFAULT_CAP);
+ mergeq_free(mqp, sizeof (mergeq_t));
+ return (mergeq_error(ENOMEM));
+ }
+ }
+
+ if ((ret = mutex_init(&mqp->mq_lock, USYNC_THREAD | LOCK_ERRORCHECK,
+ NULL)) != 0) {
+ if (mqp->mq_ndthreads > 0) {
+ mergeq_free(mqp->mq_thrs,
+ sizeof (thread_t) * mqp->mq_ndthreads);
+ }
+ mergeq_free(mqp->mq_items, sizeof (void *) *
+ MERGEQ_DEFAULT_CAP);
+ mergeq_free(mqp, sizeof (mergeq_t));
+ return (mergeq_error(ret));
+ }
+
+ if ((ret = cond_init(&mqp->mq_cond, USYNC_THREAD, NULL)) != 0) {
+ VERIFY0(mutex_destroy(&mqp->mq_lock));
+ if (mqp->mq_ndthreads > 0) {
+ mergeq_free(mqp->mq_thrs,
+ sizeof (thread_t) * mqp->mq_ndthreads);
+ }
+ mergeq_free(mqp->mq_items, sizeof (void *) *
+ MERGEQ_DEFAULT_CAP);
+ mergeq_free(mqp, sizeof (mergeq_t));
+ return (mergeq_error(ret));
+ }
+
+ mqp->mq_cap = MERGEQ_DEFAULT_CAP;
+ *outp = mqp;
+ return (0);
+}
+
+static void
+mergeq_reset(mergeq_t *mqp)
+{
+ VERIFY(MUTEX_HELD(&mqp->mq_lock));
+ VERIFY(mqp->mq_working == B_FALSE);
+ if (mqp->mq_cap != 0)
+ bzero(mqp->mq_items, sizeof (void *) * mqp->mq_cap);
+ mqp->mq_nitems = 0;
+ mqp->mq_next = 0;
+ mqp->mq_gnext = 0;
+ mqp->mq_nproc = 0;
+ mqp->mq_gnproc = 0;
+ mqp->mq_ncommit = 0;
+ mqp->mq_gncommit = 0;
+ mqp->mq_func = NULL;
+ mqp->mq_arg = NULL;
+ mqp->mq_iserror = B_FALSE;
+ mqp->mq_error = 0;
+}
+
+static int
+mergeq_grow(mergeq_t *mqp)
+{
+ size_t ncap;
+ void **items;
+
+ VERIFY(MUTEX_HELD(&mqp->mq_lock));
+ VERIFY(mqp->mq_working == B_FALSE);
+
+ if (SIZE_MAX - mqp->mq_cap < MERGEQ_DEFAULT_CAP)
+ return (ENOSPC);
+
+ ncap = mqp->mq_cap + MERGEQ_DEFAULT_CAP;
+ items = mergeq_alloc(ncap * sizeof (void *));
+ if (items == NULL)
+ return (ENOMEM);
+
+ bzero(items, ncap * sizeof (void *));
+ bcopy(mqp->mq_items, items, mqp->mq_cap * sizeof (void *));
+ mergeq_free(mqp->mq_items, sizeof (mqp->mq_cap) * sizeof (void *));
+ mqp->mq_items = items;
+ mqp->mq_cap = ncap;
+ return (0);
+}
+
+int
+mergeq_add(mergeq_t *mqp, void *item)
+{
+ VERIFY0(mutex_lock(&mqp->mq_lock));
+ if (mqp->mq_working == B_TRUE) {
+ VERIFY0(mutex_unlock(&mqp->mq_lock));
+ return (mergeq_error(ENXIO));
+ }
+
+ if (mqp->mq_next == mqp->mq_cap) {
+ int ret;
+
+ if ((ret = mergeq_grow(mqp)) != 0) {
+ VERIFY0(mutex_unlock(&mqp->mq_lock));
+ return (mergeq_error(ret));
+ }
+ }
+ mqp->mq_items[mqp->mq_next] = item;
+ mqp->mq_next++;
+ mqp->mq_nitems++;
+
+ VERIFY0(mutex_unlock(&mqp->mq_lock));
+ return (0);
+}
+
+static size_t
+mergeq_slot(mergeq_t *mqp)
+{
+ size_t s;
+
+ VERIFY(MUTEX_HELD(&mqp->mq_lock));
+ VERIFY(mqp->mq_next < mqp->mq_cap);
+
+ /*
+ * This probably should be a cv / wait thing.
+ */
+ VERIFY(mqp->mq_nproc != (mqp->mq_next + 1) % mqp->mq_cap);
+
+ s = mqp->mq_next;
+ mqp->mq_next++;
+ if (mqp->mq_next == mqp->mq_cap) {
+ mqp->mq_next %= mqp->mq_cap;
+ mqp->mq_gnext++;
+ }
+
+ return (s);
+}
+
+/*
+ * Internal function to push items onto the queue which is now a circular
+ * buffer. This should only be used once we begin working on the queue.
+ */
+static void
+mergeq_push(mergeq_t *mqp, size_t slot, void *item)
+{
+ VERIFY(MUTEX_HELD(&mqp->mq_lock));
+ VERIFY(slot < mqp->mq_cap);
+
+ /*
+ * We need to verify that we don't push over something that exists.
+ * Based on the design, this should never happen. However, in the face
+ * of bugs, anything is possible.
+ */
+ while (mqp->mq_ncommit != slot && mqp->mq_iserror == B_FALSE)
+ (void) cond_wait(&mqp->mq_cond, &mqp->mq_lock);
+
+ if (mqp->mq_iserror == B_TRUE)
+ return;
+
+ mqp->mq_items[slot] = item;
+ mqp->mq_nitems++;
+ mqp->mq_ncommit++;
+ if (mqp->mq_ncommit == mqp->mq_cap) {
+ mqp->mq_ncommit %= mqp->mq_cap;
+ mqp->mq_gncommit++;
+ }
+ cond_broadcast(&mqp->mq_cond);
+}
+
+static void *
+mergeq_pop_one(mergeq_t *mqp)
+{
+ void *out;
+
+ /*
+ * We can't move mq_nproc beyond mq_next if they're on the same
+ * generation.
+ */
+ VERIFY(mqp->mq_gnext != mqp->mq_gnproc ||
+ mqp->mq_nproc != mqp->mq_next);
+
+ out = mqp->mq_items[mqp->mq_nproc];
+
+ mqp->mq_items[mqp->mq_nproc] = NULL;
+ mqp->mq_nproc++;
+ if (mqp->mq_nproc == mqp->mq_cap) {
+ mqp->mq_nproc %= mqp->mq_cap;
+ mqp->mq_gnproc++;
+ }
+ mqp->mq_nitems--;
+
+ return (out);
+}
+
+/*
+ * Pop a set of two entries from the queue. We may not have anything to process
+ * at the moment, eg. be waiting for someone to add something. In which case,
+ * we'll be sitting and waiting.
+ */
+static boolean_t
+mergeq_pop(mergeq_t *mqp, void **first, void **second)
+{
+ VERIFY(MUTEX_HELD(&mqp->mq_lock));
+ VERIFY(mqp->mq_nproc < mqp->mq_cap);
+
+ while (mqp->mq_nitems < 2 && mqp->mq_nactthrs > 0 &&
+ mqp->mq_iserror == B_FALSE)
+ (void) cond_wait(&mqp->mq_cond, &mqp->mq_lock);
+
+ if (mqp->mq_iserror == B_TRUE)
+ return (B_FALSE);
+
+ if (mqp->mq_nitems < 2 && mqp->mq_nactthrs == 0) {
+ VERIFY(mqp->mq_iserror == B_TRUE || mqp->mq_nitems == 1);
+ return (B_FALSE);
+ }
+ VERIFY(mqp->mq_nitems >= 2);
+
+ *first = mergeq_pop_one(mqp);
+ *second = mergeq_pop_one(mqp);
+
+ return (B_TRUE);
+}
+
+static void *
+mergeq_thr_merge(void *arg)
+{
+ mergeq_t *mqp = arg;
+
+ VERIFY0(mutex_lock(&mqp->mq_lock));
+
+ /*
+ * Check to make sure creation worked and if not, fail fast.
+ */
+ if (mqp->mq_iserror == B_TRUE) {
+ VERIFY0(mutex_unlock(&mqp->mq_lock));
+ return (NULL);
+ }
+
+ for (;;) {
+ void *first, *second, *out;
+ int ret;
+ size_t slot;
+
+ if (mqp->mq_nitems == 1 && mqp->mq_nactthrs == 0) {
+ VERIFY0(mutex_unlock(&mqp->mq_lock));
+ return (NULL);
+ }
+
+ if (mergeq_pop(mqp, &first, &second) == B_FALSE) {
+ VERIFY0(mutex_unlock(&mqp->mq_lock));
+ return (NULL);
+ }
+ slot = mergeq_slot(mqp);
+
+ mqp->mq_nactthrs++;
+
+ VERIFY0(mutex_unlock(&mqp->mq_lock));
+ ret = mqp->mq_func(first, second, &out, mqp->mq_arg);
+ VERIFY0(mutex_lock(&mqp->mq_lock));
+
+ if (ret != 0) {
+ if (mqp->mq_iserror == B_FALSE) {
+ mqp->mq_iserror = B_TRUE;
+ mqp->mq_error = ret;
+ cond_broadcast(&mqp->mq_cond);
+ }
+ mqp->mq_nactthrs--;
+ VERIFY0(mutex_unlock(&mqp->mq_lock));
+ return (NULL);
+ }
+ mergeq_push(mqp, slot, out);
+ mqp->mq_nactthrs--;
+ }
+}
+
+int
+mergeq_merge(mergeq_t *mqp, mergeq_proc_f *func, void *arg, void **outp,
+ int *errp)
+{
+ int ret, i;
+ boolean_t seterr = B_FALSE;
+
+ if (mqp == NULL || func == NULL || outp == NULL) {
+ return (mergeq_error(EINVAL));
+ }
+
+ VERIFY0(mutex_lock(&mqp->mq_lock));
+ if (mqp->mq_working == B_TRUE) {
+ VERIFY0(mutex_unlock(&mqp->mq_lock));
+ return (mergeq_error(EBUSY));
+ }
+
+ if (mqp->mq_nitems == 0) {
+ *outp = NULL;
+ mergeq_reset(mqp);
+ VERIFY0(mutex_unlock(&mqp->mq_lock));
+ return (0);
+ }
+
+ /*
+ * Now that we've finished adding items to the queue, turn it into a
+ * circular buffer.
+ */
+ mqp->mq_func = func;
+ mqp->mq_arg = arg;
+ mqp->mq_nproc = 0;
+ mqp->mq_working = B_TRUE;
+ if (mqp->mq_next == mqp->mq_cap) {
+ mqp->mq_next %= mqp->mq_cap;
+ mqp->mq_gnext++;
+ }
+ mqp->mq_ncommit = mqp->mq_next;
+
+ ret = 0;
+ for (i = 0; i < mqp->mq_ndthreads; i++) {
+ ret = thr_create(NULL, 0, mergeq_thr_merge, mqp, 0,
+ &mqp->mq_thrs[i]);
+ if (ret != 0) {
+ mqp->mq_iserror = B_TRUE;
+ break;
+ }
+ }
+
+ VERIFY0(mutex_unlock(&mqp->mq_lock));
+ if (ret == 0)
+ (void) mergeq_thr_merge(mqp);
+
+ for (i = 0; i < mqp->mq_ndthreads; i++) {
+ VERIFY0(thr_join(mqp->mq_thrs[i], NULL, NULL));
+ }
+
+ VERIFY0(mutex_lock(&mqp->mq_lock));
+
+ VERIFY(mqp->mq_nactthrs == 0);
+ mqp->mq_working = B_FALSE;
+ if (ret == 0 && mqp->mq_iserror == B_FALSE) {
+ VERIFY(mqp->mq_nitems == 1);
+ *outp = mergeq_pop_one(mqp);
+ } else if (ret == 0 && mqp->mq_iserror == B_TRUE) {
+ ret = MERGEQ_UERROR;
+ if (errp != NULL)
+ *errp = mqp->mq_error;
+ } else {
+ seterr = B_TRUE;
+ }
+
+ mergeq_reset(mqp);
+ VERIFY0(mutex_unlock(&mqp->mq_lock));
+
+ if (seterr == B_TRUE)
+ return (mergeq_error(ret));
+
+ return (ret);
+}
diff --git a/usr/src/lib/mergeq/mergeq.h b/usr/src/lib/mergeq/mergeq.h
new file mode 100644
index 0000000000..4c1a21d696
--- /dev/null
+++ b/usr/src/lib/mergeq/mergeq.h
@@ -0,0 +1,52 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _MERGEQ_H
+#define _MERGEQ_H
+
+/*
+ * mergeq library routines
+ */
+
+#include <sys/types.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct mergeq mergeq_t;
+typedef int (mergeq_proc_f)(void *, void *, void **, void *);
+
+extern int mergeq_init(mergeq_t **, uint_t);
+extern void mergeq_fini(mergeq_t *);
+
+extern int mergeq_add(mergeq_t *, void *);
+
+#define MERGEQ_ERROR -1
+#define MERGEQ_UERROR -2
+extern int mergeq_merge(mergeq_t *, mergeq_proc_f *, void *, void **, int *);
+
+/*
+ * Routines consumers need to implement
+ */
+extern void *mergeq_alloc(size_t);
+extern void mergeq_free(void *, size_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _MERGEQ_H */
diff --git a/usr/src/lib/mergeq/workq.c b/usr/src/lib/mergeq/workq.c
new file mode 100644
index 0000000000..b9f1f2aa1c
--- /dev/null
+++ b/usr/src/lib/mergeq/workq.c
@@ -0,0 +1,311 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * Work queue
+ *
+ * A multi-threaded work queue.
+ *
+ * The general design of this is to add a fixed number of items to the queue and
+ * then drain them with the specified number of threads.
+ */
+
+#include <strings.h>
+#include <sys/debug.h>
+#include <thread.h>
+#include <synch.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdlib.h>
+
+#include "workq.h"
+
+struct workq {
+ mutex_t wq_lock; /* Protects below items */
+ cond_t wq_cond; /* Condition variable */
+ void **wq_items; /* Array of items to process */
+ size_t wq_nitems; /* Number of items in queue */
+ size_t wq_cap; /* Queue capacity */
+ size_t wq_next; /* Next item to process */
+ uint_t wq_ndthreads; /* Desired number of threads */
+ thread_t *wq_thrs; /* Actual threads */
+ workq_proc_f *wq_func; /* Processing function */
+ void *wq_arg; /* Argument for processing */
+ boolean_t wq_working; /* Are we actively using it? */
+ boolean_t wq_iserror; /* Have we encountered an error? */
+ int wq_error; /* Error value, if any */
+};
+
+#define WORKQ_DEFAULT_CAP 64
+
+static int
+workq_error(int err)
+{
+ VERIFY(err != 0);
+ errno = err;
+ return (WORKQ_ERROR);
+}
+
+void
+workq_fini(workq_t *wqp)
+{
+ if (wqp == NULL)
+ return;
+
+ VERIFY(wqp->wq_working != B_TRUE);
+ VERIFY0(mutex_destroy(&wqp->wq_lock));
+ VERIFY0(cond_destroy(&wqp->wq_cond));
+ if (wqp->wq_cap > 0)
+ workq_free(wqp->wq_items, sizeof (void *) * wqp->wq_cap);
+ if (wqp->wq_ndthreads > 0)
+ workq_free(wqp->wq_thrs, sizeof (thread_t) * wqp->wq_ndthreads);
+ workq_free(wqp, sizeof (workq_t));
+}
+
+int
+workq_init(workq_t **outp, uint_t nthrs)
+{
+ int ret;
+ workq_t *wqp;
+
+ wqp = workq_alloc(sizeof (workq_t));
+ if (wqp == NULL)
+ return (workq_error(ENOMEM));
+
+ bzero(wqp, sizeof (workq_t));
+ wqp->wq_items = workq_alloc(sizeof (void *) * WORKQ_DEFAULT_CAP);
+ if (wqp->wq_items == NULL) {
+ workq_free(wqp, sizeof (workq_t));
+ return (workq_error(ENOMEM));
+ }
+ bzero(wqp->wq_items, sizeof (void *) * WORKQ_DEFAULT_CAP);
+
+ wqp->wq_ndthreads = nthrs - 1;
+ if (wqp->wq_ndthreads > 0) {
+ wqp->wq_thrs = workq_alloc(sizeof (thread_t) *
+ wqp->wq_ndthreads);
+ if (wqp->wq_thrs == NULL) {
+ workq_free(wqp->wq_items, sizeof (void *) *
+ WORKQ_DEFAULT_CAP);
+ workq_free(wqp, sizeof (workq_t));
+ return (workq_error(ENOMEM));
+ }
+ }
+
+ if ((ret = mutex_init(&wqp->wq_lock, USYNC_THREAD | LOCK_ERRORCHECK,
+ NULL)) != 0) {
+ if (wqp->wq_ndthreads > 0) {
+ workq_free(wqp->wq_thrs,
+ sizeof (thread_t) * wqp->wq_ndthreads);
+ }
+ workq_free(wqp->wq_items, sizeof (void *) * WORKQ_DEFAULT_CAP);
+ workq_free(wqp, sizeof (workq_t));
+ return (workq_error(ret));
+ }
+
+ if ((ret = cond_init(&wqp->wq_cond, USYNC_THREAD, NULL)) != 0) {
+ VERIFY0(mutex_destroy(&wqp->wq_lock));
+ if (wqp->wq_ndthreads > 0) {
+ workq_free(wqp->wq_thrs,
+ sizeof (thread_t) * wqp->wq_ndthreads);
+ }
+ workq_free(wqp->wq_items, sizeof (void *) * WORKQ_DEFAULT_CAP);
+ workq_free(wqp, sizeof (workq_t));
+ return (workq_error(ret));
+ }
+
+ wqp->wq_cap = WORKQ_DEFAULT_CAP;
+ *outp = wqp;
+ return (0);
+}
+
+static void
+workq_reset(workq_t *wqp)
+{
+ VERIFY(MUTEX_HELD(&wqp->wq_lock));
+ VERIFY(wqp->wq_working == B_FALSE);
+ if (wqp->wq_cap > 0)
+ bzero(wqp->wq_items, sizeof (void *) * wqp->wq_cap);
+ wqp->wq_nitems = 0;
+ wqp->wq_next = 0;
+ wqp->wq_func = NULL;
+ wqp->wq_arg = NULL;
+ wqp->wq_iserror = B_FALSE;
+ wqp->wq_error = 0;
+}
+
+static int
+workq_grow(workq_t *wqp)
+{
+ size_t ncap;
+ void **items;
+
+ VERIFY(MUTEX_HELD(&wqp->wq_lock));
+ VERIFY(wqp->wq_working == B_FALSE);
+
+ if (SIZE_MAX - wqp->wq_cap < WORKQ_DEFAULT_CAP)
+ return (ENOSPC);
+
+ ncap = wqp->wq_cap + WORKQ_DEFAULT_CAP;
+ items = workq_alloc(ncap * sizeof (void *));
+ if (items == NULL)
+ return (ENOMEM);
+
+ bzero(items, ncap * sizeof (void *));
+ bcopy(wqp->wq_items, items, wqp->wq_cap * sizeof (void *));
+ workq_free(wqp->wq_items, sizeof (void *) * wqp->wq_cap);
+ wqp->wq_items = items;
+ wqp->wq_cap = ncap;
+ return (0);
+}
+
+int
+workq_add(workq_t *wqp, void *item)
+{
+ VERIFY0(mutex_lock(&wqp->wq_lock));
+ if (wqp->wq_working == B_TRUE) {
+ VERIFY0(mutex_unlock(&wqp->wq_lock));
+ return (workq_error(ENXIO));
+ }
+
+ if (wqp->wq_nitems == wqp->wq_cap) {
+ int ret;
+
+ if ((ret = workq_grow(wqp)) != 0) {
+ VERIFY0(mutex_unlock(&wqp->wq_lock));
+ return (workq_error(ret));
+ }
+ }
+
+ wqp->wq_items[wqp->wq_nitems] = item;
+ wqp->wq_nitems++;
+
+ VERIFY0(mutex_unlock(&wqp->wq_lock));
+
+ return (0);
+}
+
+static void *
+workq_pop(workq_t *wqp)
+{
+ void *out;
+
+ VERIFY(MUTEX_HELD(&wqp->wq_lock));
+ VERIFY(wqp->wq_next < wqp->wq_nitems);
+
+ out = wqp->wq_items[wqp->wq_next];
+ wqp->wq_items[wqp->wq_next] = NULL;
+ wqp->wq_next++;
+
+ return (out);
+}
+
+static void *
+workq_thr_work(void *arg)
+{
+ workq_t *wqp = arg;
+
+ VERIFY0(mutex_lock(&wqp->wq_lock));
+ VERIFY(wqp->wq_working == B_TRUE);
+
+ for (;;) {
+ int ret;
+ void *item;
+
+ if (wqp->wq_iserror == B_TRUE ||
+ wqp->wq_next == wqp->wq_nitems) {
+ VERIFY0(mutex_unlock(&wqp->wq_lock));
+ return (NULL);
+ }
+
+ item = workq_pop(wqp);
+
+ VERIFY0(mutex_unlock(&wqp->wq_lock));
+ ret = wqp->wq_func(item, wqp->wq_arg);
+ VERIFY0(mutex_lock(&wqp->wq_lock));
+
+ if (ret != 0) {
+ if (wqp->wq_iserror == B_FALSE) {
+ wqp->wq_iserror = B_TRUE;
+ wqp->wq_error = ret;
+ }
+ VERIFY0(mutex_unlock(&wqp->wq_lock));
+ return (NULL);
+ }
+ }
+}
+
+int
+workq_work(workq_t *wqp, workq_proc_f *func, void *arg, int *errp)
+{
+ int i, ret;
+ boolean_t seterr = B_FALSE;
+
+ if (wqp == NULL || func == NULL)
+ return (workq_error(EINVAL));
+
+ VERIFY0(mutex_lock(&wqp->wq_lock));
+ if (wqp->wq_working == B_TRUE) {
+ VERIFY0(mutex_unlock(&wqp->wq_lock));
+ return (workq_error(EBUSY));
+ }
+
+ if (wqp->wq_nitems == 0) {
+ workq_reset(wqp);
+ VERIFY0(mutex_unlock(&wqp->wq_lock));
+ return (0);
+ }
+
+ wqp->wq_func = func;
+ wqp->wq_arg = arg;
+ wqp->wq_next = 0;
+ wqp->wq_working = B_TRUE;
+
+ ret = 0;
+ for (i = 0; i < wqp->wq_ndthreads; i++) {
+ ret = thr_create(NULL, 0, workq_thr_work, wqp, 0,
+ &wqp->wq_thrs[i]);
+ if (ret != 0) {
+ wqp->wq_iserror = B_TRUE;
+ }
+ }
+
+ VERIFY0(mutex_unlock(&wqp->wq_lock));
+ if (ret == 0)
+ (void) workq_thr_work(wqp);
+
+ for (i = 0; i < wqp->wq_ndthreads; i++) {
+ VERIFY0(thr_join(wqp->wq_thrs[i], NULL, NULL));
+ }
+
+ VERIFY0(mutex_lock(&wqp->wq_lock));
+ wqp->wq_working = B_FALSE;
+ if (ret == 0 && wqp->wq_iserror == B_TRUE) {
+ ret = WORKQ_UERROR;
+ if (errp != NULL)
+ *errp = wqp->wq_error;
+ } else if (ret != 0) {
+ VERIFY(wqp->wq_iserror == B_FALSE);
+ seterr = B_TRUE;
+ }
+
+ workq_reset(wqp);
+ VERIFY0(mutex_unlock(&wqp->wq_lock));
+
+ if (seterr == B_TRUE)
+ return (workq_error(ret));
+
+ return (ret);
+}
diff --git a/usr/src/lib/mergeq/workq.h b/usr/src/lib/mergeq/workq.h
new file mode 100644
index 0000000000..20cfec4a95
--- /dev/null
+++ b/usr/src/lib/mergeq/workq.h
@@ -0,0 +1,52 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _WORKQ_H
+#define _WORKQ_H
+
+/*
+ * workq library routines
+ */
+
+#include <sys/types.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct workq workq_t;
+typedef int (workq_proc_f)(void *, void *);
+
+extern int workq_init(workq_t **, uint_t);
+extern void workq_fini(workq_t *);
+
+extern int workq_add(workq_t *, void *);
+
+#define WORKQ_ERROR (-1)
+#define WORKQ_UERROR (-2)
+extern int workq_work(workq_t *, workq_proc_f *, void *, int *);
+
+/*
+ * Routines consumers need to implement
+ */
+extern void *workq_alloc(size_t);
+extern void workq_free(void *, size_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _WORKQ_H */
diff --git a/usr/src/lib/nsswitch/dns/Makefile.com b/usr/src/lib/nsswitch/dns/Makefile.com
index 60716d843d..0366633c0c 100644
--- a/usr/src/lib/nsswitch/dns/Makefile.com
+++ b/usr/src/lib/nsswitch/dns/Makefile.com
@@ -44,5 +44,5 @@ CPPFLAGS += -DNSS_DNS_LIBRESOLV=\"libresolv.so.2\"
LINTFLAGS += -erroff=E_GLOBAL_COULD_BE_STATIC2
-LDLIBS += -lnsl -lsocket
+LDLIBS += -lnsl -lresolv_joy -lsocket
DYNLIB1 = nss_dns.so$(VERS)
diff --git a/usr/src/lib/nsswitch/dns/common/dns_common.h b/usr/src/lib/nsswitch/dns/common/dns_common.h
index 717f56d70f..83d486cf57 100644
--- a/usr/src/lib/nsswitch/dns/common/dns_common.h
+++ b/usr/src/lib/nsswitch/dns/common/dns_common.h
@@ -31,8 +31,6 @@
#ifndef _DNS_COMMON_H
#define _DNS_COMMON_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <stdio.h>
#include <ctype.h>
#include <sys/types.h>
@@ -43,7 +41,7 @@
#include <thread.h>
#include <arpa/inet.h>
#include <arpa/nameser.h>
-#include <resolv.h>
+#include <resolv_joy.h>
#include <syslog.h>
#include <nsswitch.h>
#include <nss_common.h>
diff --git a/usr/src/lib/nsswitch/dns/common/dns_mt.c b/usr/src/lib/nsswitch/dns/common/dns_mt.c
index 128b1bde75..4af3f671c0 100644
--- a/usr/src/lib/nsswitch/dns/common/dns_mt.c
+++ b/usr/src/lib/nsswitch/dns/common/dns_mt.c
@@ -23,8 +23,9 @@
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ */
/*
* dns_mt.c
@@ -49,52 +50,41 @@
static void _nss_dns_init(void);
extern struct hostent *res_gethostbyname(const char *);
-#pragma weak res_gethostbyname
-#define RES_SET_NO_HOSTS_FALLBACK "__res_set_no_hosts_fallback"
-extern void __res_set_no_hosts_fallback(void);
-#pragma weak __res_set_no_hosts_fallback
+#define RES_SET_NO_HOSTS_FALLBACK "__joy_res_set_no_hosts_fallback"
+extern void __joy_res_set_no_hosts_fallback(void);
-#define RES_UNSET_NO_HOSTS_FALLBACK "__res_unset_no_hosts_fallback"
-extern void __res_unset_no_hosts_fallback(void);
-#pragma weak __res_unset_no_hosts_fallback
+#define RES_UNSET_NO_HOSTS_FALLBACK "__joy_res_unset_no_hosts_fallback"
+extern void __joy_res_unset_no_hosts_fallback(void);
#define RES_GET_RES "__res_get_res"
extern struct __res_state *__res_get_res(void);
-#pragma weak __res_get_res
#define RES_ENABLE_MT "__res_enable_mt"
extern int __res_enable_mt(void);
-#pragma weak __res_enable_mt
#define RES_DISABLE_MT "__res_disable_mt"
extern int __res_disable_mt(void);
-#pragma weak __res_disable_mt
#define RES_GET_H_ERRNO "__res_get_h_errno"
extern int *__res_get_h_errno();
-#pragma weak __res_get_h_errno
-#define __H_ERRNO "__h_errno"
-extern int *__h_errno(void);
-#pragma weak __h_errno
+#define __H_ERRNO "__joy_h_errno"
+extern int *__joy_h_errno(void);
-#define RES_OVERRIDE_RETRY "__res_override_retry"
-extern int __res_override_retry(int);
-#pragma weak __res_override_retry
+#define RES_OVERRIDE_RETRY "__joy_res_override_retry"
+extern int __joy_res_override_retry(int);
static void __fallback_set_no_hosts(void);
-static int *__fallback_h_errno(void);
-static int __fallback_override_retry(int);
static int __is_mt_safe(void);
void (*set_no_hosts_fallback)(void) = __fallback_set_no_hosts;
void (*unset_no_hosts_fallback)(void) = __fallback_set_no_hosts;
struct __res_state *(*set_res_retry)() = 0;
-int (*enable_mt)() = 0;
-int (*disable_mt)() = 0;
-int *(*get_h_errno)(void) = 0;
-int (*override_retry)(int) = 0;
+int (*enable_mt)() = __is_mt_safe;
+int (*disable_mt)() = __is_mt_safe;
+int *(*get_h_errno)(void) = __joy_h_errno;
+int (*override_retry)(int) = __joy_res_override_retry;
/* Usually set from the Makefile */
#ifndef NSS_DNS_LIBRESOLV
@@ -106,91 +96,12 @@ extern int h_errno;
mutex_t one_lane = DEFAULTMUTEX;
+/* Because we link against libresolv_joy.so.2, this is relatively easy. */
void
_nss_dns_init(void)
{
- void *reslib, (*f_void_ptr)();
-
- /* If no libresolv library, then load one */
- if (res_gethostbyname == 0) {
- if ((reslib =
- dlopen(NSS_DNS_LIBRESOLV, RTLD_LAZY|RTLD_GLOBAL)) != 0) {
- /* Turn off /etc/hosts fall back in libresolv */
- if ((f_void_ptr = (void (*)(void))dlsym(reslib,
- RES_SET_NO_HOSTS_FALLBACK)) != 0) {
- set_no_hosts_fallback = f_void_ptr;
- }
- if ((f_void_ptr = (void (*)(void))dlsym(reslib,
- RES_SET_NO_HOSTS_FALLBACK)) != 0) {
- unset_no_hosts_fallback = f_void_ptr;
- }
- /* Set number of resolver retries */
- if ((override_retry = (int (*)(int))dlsym(reslib,
- RES_OVERRIDE_RETRY)) == 0) {
- set_res_retry =
- (struct __res_state *(*)(void))dlsym(reslib,
- RES_GET_RES);
- override_retry = __fallback_override_retry;
- }
- /*
- * Select h_errno retrieval function. A BIND 8.2.2
- * libresolv.so.2 will have __h_errno, a BIND 8.1.2
- * one will have __res_get_h_errno, and other
- * versions may have nothing at all.
- *
- * Also try to bind to the relevant MT enable/disable
- * functions which are also dependent on the version
- * of the BIND libresolv.so.2 being used.
- */
- if ((get_h_errno = (int *(*)(void))dlsym(reslib,
- __H_ERRNO)) != 0) {
- /* BIND 8.2.2 libresolv.so.2 is MT safe. */
- enable_mt = __is_mt_safe;
- disable_mt = __is_mt_safe;
- } else {
- if ((get_h_errno =
- (int *(*)(void))dlsym(reslib,
- RES_GET_H_ERRNO)) == 0) {
- get_h_errno = __fallback_h_errno;
- }
- /*
- * Pre-BIND 8.2.2 was not MT safe. Try to
- * bind the MT enable/disable functions.
- */
- if ((enable_mt = (int (*)(void))dlsym(reslib,
- RES_ENABLE_MT)) != 0 &&
- (disable_mt = (int (*)(void))dlsym(reslib,
- RES_DISABLE_MT)) == 0) {
- enable_mt = 0;
- }
- }
- }
- } else {
- /* Libresolv already loaded */
- if ((f_void_ptr = __res_set_no_hosts_fallback) != 0) {
- set_no_hosts_fallback = f_void_ptr;
- }
- if ((f_void_ptr = __res_unset_no_hosts_fallback) != 0) {
- unset_no_hosts_fallback = f_void_ptr;
- }
- if ((override_retry = __res_override_retry) == 0) {
- set_res_retry = __res_get_res;
- override_retry = __fallback_override_retry;
- }
- if ((get_h_errno = __h_errno) == 0 &&
- (get_h_errno = __res_get_h_errno) == 0) {
- get_h_errno = __fallback_h_errno;
- }
- if (get_h_errno == __h_errno) {
- enable_mt = __is_mt_safe;
- disable_mt = __is_mt_safe;
- } else {
- if ((enable_mt = __res_enable_mt) != 0 &&
- (disable_mt = __res_disable_mt) == 0) {
- enable_mt = 0;
- }
- }
- }
+ enable_mt = __is_mt_safe;
+ disable_mt = __is_mt_safe;
}
@@ -228,36 +139,6 @@ __is_mt_safe(void) {
}
-/*
- * Return pointer to the global h_errno variable
- */
-static int *
-__fallback_h_errno(void) {
- return (&h_errno);
-}
-
-
-/*
- * This function is called when the resolver library doesn't provide its
- * own function to establish an override retry. If we can get a pointer
- * to the per-thread _res (i.e., set_res_retry != 0), we set the retries
- * directly, and return the previous number of retries. Otherwise, there's
- * nothing to do.
- */
-static int
-__fallback_override_retry(int retry) {
- struct __res_state *res;
- int old_retry = 0;
-
- if (set_res_retry != 0) {
- res = set_res_retry();
- old_retry = res->retry;
- res->retry = retry;
- }
- return (old_retry);
-}
-
-
static void
__fallback_set_no_hosts(void) {
}
diff --git a/usr/src/lib/nsswitch/dns/common/gethostent.c b/usr/src/lib/nsswitch/dns/common/gethostent.c
index 648ea8ba01..d321dd24c6 100644
--- a/usr/src/lib/nsswitch/dns/common/gethostent.c
+++ b/usr/src/lib/nsswitch/dns/common/gethostent.c
@@ -24,8 +24,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* gethostent.c
*
@@ -53,12 +51,6 @@ static struct hostent *_gethostbyaddr(int *h_errnop, const char *addr,
int len, int type);
struct hostent *_nss_dns_gethostbyname2(int *h_errnop, const char *name);
-#pragma weak res_gethostbyname
-#pragma weak res_gethostbyname2
-#pragma weak res_gethostbyaddr
-#pragma weak res_sethostent
-#pragma weak res_endhostent
-
nss_backend_t *_nss_dns_constr(dns_backend_op_t ops[], int n_ops);
nss_status_t __nss_dns_getbyaddr(dns_backend_ptr_t, void *);
diff --git a/usr/src/lib/nsswitch/dns/common/gethostent6.c b/usr/src/lib/nsswitch/dns/common/gethostent6.c
index ee85832073..f3efc4eae6 100644
--- a/usr/src/lib/nsswitch/dns/common/gethostent6.c
+++ b/usr/src/lib/nsswitch/dns/common/gethostent6.c
@@ -24,8 +24,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* This is the DNS backend for IPv6 addresses.
* getbyname() is a local routine, but getbyaddr() actually shares the
@@ -50,8 +48,6 @@
*/
-#pragma weak res_endhostent
-
extern struct hostent *_gethostbyname(int *, const char *);
extern struct hostent *_nss_dns_gethostbyname2(int *, const char *);
diff --git a/usr/src/lib/pkcs11/pkcs11_tpm/Makefile.com b/usr/src/lib/pkcs11/pkcs11_tpm/Makefile.com
index 020051c977..7345ddc892 100644
--- a/usr/src/lib/pkcs11/pkcs11_tpm/Makefile.com
+++ b/usr/src/lib/pkcs11/pkcs11_tpm/Makefile.com
@@ -73,7 +73,7 @@ TSSLIB=-L$(TSPILIBDIR)
TSSLIB64=-L$(TSPILIBDIR)/$(MACH64)
TSSINC=-I$(TSPIINCDIR)
-LDLIBS += $(TSSLIB) -L$(ADJUNCT_PROTO)/lib -lc -luuid -lmd -ltspi -lcrypto
+LDLIBS += $(TSSLIB) -L$(ADJUNCT_PROTO)/lib -lc -luuid -lmd -ltspi -lsunw_crypto
CPPFLAGS += -xCC -D_POSIX_PTHREAD_SEMANTICS $(TSSINC)
CPPFLAGS64 += $(CPPFLAGS)
C99MODE= $(C99_ENABLE)
diff --git a/usr/src/lib/pysolaris/Makefile.com b/usr/src/lib/pysolaris/Makefile.com
index 28d68704f9..a6194a9a2a 100644
--- a/usr/src/lib/pysolaris/Makefile.com
+++ b/usr/src/lib/pysolaris/Makefile.com
@@ -44,6 +44,7 @@ C99LMODE= -Xc99=%all
LIBS = $(DYNLIB)
LDLIBS += -lc -lsec -lidmap -lpython2.6
CFLAGS += $(CCVERBOSE)
+CPPFLAGS += -I$(ADJUNCT_PROTO)/usr/include/python2.6
CERRWARN += -_gcc=-Wno-unused-variable
CPPFLAGS += -I$(ADJUNCT_PROTO)/usr/include/python2.6
diff --git a/usr/src/lib/varpd/Makefile b/usr/src/lib/varpd/Makefile
new file mode 100644
index 0000000000..daa849572e
--- /dev/null
+++ b/usr/src/lib/varpd/Makefile
@@ -0,0 +1,33 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+SUBDIRS = libvarpd .WAIT direct files svp
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+check := TARGET = check
+install := TARGET = install
+install_h := TARGET = install_h
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber install install_h check lint: $(SUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/lib/varpd/Makefile.plugin b/usr/src/lib/varpd/Makefile.plugin
new file mode 100644
index 0000000000..48f188500c
--- /dev/null
+++ b/usr/src/lib/varpd/Makefile.plugin
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+ROOTLIBDIR = $(ROOT)/usr/lib/varpd
+ROOTLIBDIR64 = $(ROOT)/usr/lib/varpd/$(MACH64)
+
+MAPFILES += ../../libvarpd/common/mapfile-plugin
diff --git a/usr/src/lib/varpd/direct/Makefile b/usr/src/lib/varpd/direct/Makefile
new file mode 100644
index 0000000000..275f07bf8b
--- /dev/null
+++ b/usr/src/lib/varpd/direct/Makefile
@@ -0,0 +1,40 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+include ../../Makefile.lib
+
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+install := TARGET = install
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber install lint: $(SUBDIRS)
+
+install_h:
+
+check:
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../../Makefile.targ
diff --git a/usr/src/lib/varpd/direct/Makefile.com b/usr/src/lib/varpd/direct/Makefile.com
new file mode 100644
index 0000000000..4072c9a0ab
--- /dev/null
+++ b/usr/src/lib/varpd/direct/Makefile.com
@@ -0,0 +1,38 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+LIBRARY = libvarpd_direct.a
+VERS = .1
+OBJECTS = libvarpd_direct.o
+
+include ../../../Makefile.lib
+include ../../Makefile.plugin
+
+LIBS = $(DYNLIB)
+LDLIBS += -lc -lumem -lnvpair -lnsl
+CPPFLAGS += -I../common
+
+C99MODE= -xc99=%all
+C99LMODE= -Xc99=%all
+
+SRCDIR = ../common
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include ../../../Makefile.targ
diff --git a/usr/src/lib/varpd/direct/amd64/Makefile b/usr/src/lib/varpd/direct/amd64/Makefile
new file mode 100644
index 0000000000..d552642882
--- /dev/null
+++ b/usr/src/lib/varpd/direct/amd64/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+include ../Makefile.com
+include ../../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/varpd/direct/common/libvarpd_direct.c b/usr/src/lib/varpd/direct/common/libvarpd_direct.c
new file mode 100644
index 0000000000..018cdf641c
--- /dev/null
+++ b/usr/src/lib/varpd/direct/common/libvarpd_direct.c
@@ -0,0 +1,411 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+/*
+ * Point to point plug-in for varpd.
+ *
+ * This plugin implements a simple point to point plugin for a packet. It
+ * represents the traditional tunnel, just in overlay form. As such, the only
+ * properties it needs are those to determine where to send everything. At this
+ * time, we don't allow a mulicast address; however, there's no reason that the
+ * direct plugin shouldn't in theory support multicast, though when implementing
+ * it the best path will become clear.
+ *
+ * In general this module has been designed to make it easy to support a
+ * destination of either IP or IP and port; however, we restrict it to the
+ * latter as we don't currently have an implementation that would allow us to
+ * test that.
+ */
+
+#include <libvarpd_provider.h>
+#include <umem.h>
+#include <errno.h>
+#include <thread.h>
+#include <synch.h>
+#include <strings.h>
+#include <assert.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <libnvpair.h>
+
+typedef struct varpd_direct {
+ overlay_plugin_dest_t vad_dest; /* RO */
+ mutex_t vad_lock; /* Protects the rest */
+ boolean_t vad_hip;
+ boolean_t vad_hport;
+ struct in6_addr vad_ip;
+ uint16_t vad_port;
+} varpd_direct_t;
+
+static const char *varpd_direct_props[] = {
+ "direct/dest_ip",
+ "direct/dest_port"
+};
+
+static boolean_t
+varpd_direct_valid_dest(overlay_plugin_dest_t dest)
+{
+ if (dest & ~(OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))
+ return (B_FALSE);
+
+ if (!(dest & (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT)))
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+/* ARGSUSED */
+static int
+varpd_direct_create(varpd_provider_handle_t *hdl, void **outp,
+ overlay_plugin_dest_t dest)
+{
+ int ret;
+ varpd_direct_t *vdp;
+
+ if (varpd_direct_valid_dest(dest) == B_FALSE)
+ return (ENOTSUP);
+
+ vdp = umem_alloc(sizeof (varpd_direct_t), UMEM_DEFAULT);
+ if (vdp == NULL)
+ return (ENOMEM);
+
+ if ((ret = mutex_init(&vdp->vad_lock, USYNC_THREAD | LOCK_ERRORCHECK,
+ NULL)) != 0) {
+ umem_free(vdp, sizeof (varpd_direct_t));
+ return (ret);
+ }
+
+ vdp->vad_dest = dest;
+ vdp->vad_hip = B_FALSE;
+ vdp->vad_hport = B_FALSE;
+ *outp = vdp;
+ return (0);
+}
+
+static int
+varpd_direct_start(void *arg)
+{
+ varpd_direct_t *vdp = arg;
+
+ mutex_enter(&vdp->vad_lock);
+ if (vdp->vad_hip == B_FALSE ||((vdp->vad_dest & OVERLAY_PLUGIN_D_IP) &&
+ vdp->vad_hport == B_FALSE)) {
+ mutex_exit(&vdp->vad_lock);
+ return (EAGAIN);
+ }
+ mutex_exit(&vdp->vad_lock);
+
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+varpd_direct_stop(void *arg)
+{
+}
+
+static void
+varpd_direct_destroy(void *arg)
+{
+ varpd_direct_t *vdp = arg;
+
+ if (mutex_destroy(&vdp->vad_lock) != 0)
+ abort();
+ umem_free(vdp, sizeof (varpd_direct_t));
+}
+
+static int
+varpd_direct_default(void *arg, overlay_target_point_t *otp)
+{
+ varpd_direct_t *vdp = arg;
+
+ mutex_enter(&vdp->vad_lock);
+ bcopy(&vdp->vad_ip, &otp->otp_ip, sizeof (struct in6_addr));
+ otp->otp_port = vdp->vad_port;
+ mutex_exit(&vdp->vad_lock);
+
+ return (VARPD_LOOKUP_OK);
+}
+
+static int
+varpd_direct_nprops(void *arg, uint_t *nprops)
+{
+ const varpd_direct_t *vdp = arg;
+
+ *nprops = 0;
+ if (vdp->vad_dest & OVERLAY_PLUGIN_D_ETHERNET)
+ *nprops += 1;
+
+ if (vdp->vad_dest & OVERLAY_PLUGIN_D_IP)
+ *nprops += 1;
+
+ if (vdp->vad_dest & OVERLAY_PLUGIN_D_PORT)
+ *nprops += 1;
+
+ assert(*nprops == 1 || *nprops == 2);
+
+ return (0);
+}
+
+static int
+varpd_direct_propinfo(void *arg, uint_t propid, varpd_prop_handle_t *vph)
+{
+ varpd_direct_t *vdp = arg;
+
+ /*
+ * Because we only support IP + port combos right now, prop 0 should
+ * always be the IP. We don't support a port without an IP.
+ */
+ assert(vdp->vad_dest & OVERLAY_PLUGIN_D_IP);
+ if (propid == 0) {
+ libvarpd_prop_set_name(vph, varpd_direct_props[0]);
+ libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
+ libvarpd_prop_set_type(vph, OVERLAY_PROP_T_IP);
+ libvarpd_prop_set_nodefault(vph);
+ return (0);
+ }
+
+ if (propid == 1 && vdp->vad_dest & OVERLAY_PLUGIN_D_PORT) {
+ libvarpd_prop_set_name(vph, varpd_direct_props[1]);
+ libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
+ libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
+ libvarpd_prop_set_nodefault(vph);
+ libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX);
+ return (0);
+ }
+
+ return (EINVAL);
+}
+
+static int
+varpd_direct_getprop(void *arg, const char *pname, void *buf, uint32_t *sizep)
+{
+ varpd_direct_t *vdp = arg;
+
+ /* direct/dest_ip */
+ if (strcmp(pname, varpd_direct_props[0]) == 0) {
+ if (*sizep < sizeof (struct in6_addr))
+ return (EOVERFLOW);
+ mutex_enter(&vdp->vad_lock);
+ if (vdp->vad_hip == B_FALSE) {
+ *sizep = 0;
+ } else {
+ bcopy(&vdp->vad_ip, buf, sizeof (struct in6_addr));
+ *sizep = sizeof (struct in6_addr);
+ }
+ mutex_exit(&vdp->vad_lock);
+ return (0);
+ }
+
+ /* direct/dest_port */
+ if (strcmp(pname, varpd_direct_props[1]) == 0) {
+ uint64_t val;
+
+ if (*sizep < sizeof (uint64_t))
+ return (EOVERFLOW);
+ mutex_enter(&vdp->vad_lock);
+ if (vdp->vad_hport == B_FALSE) {
+ *sizep = 0;
+ } else {
+ val = vdp->vad_port;
+ bcopy(&val, buf, sizeof (uint64_t));
+ *sizep = sizeof (uint64_t);
+ }
+ mutex_exit(&vdp->vad_lock);
+ return (0);
+ }
+
+ return (EINVAL);
+}
+
+static int
+varpd_direct_setprop(void *arg, const char *pname, const void *buf,
+ const uint32_t size)
+{
+ varpd_direct_t *vdp = arg;
+
+ /* direct/dest_ip */
+ if (strcmp(pname, varpd_direct_props[0]) == 0) {
+ const struct in6_addr *ipv6 = buf;
+
+ if (size < sizeof (struct in6_addr))
+ return (EOVERFLOW);
+
+ if (IN6_IS_ADDR_V4COMPAT(ipv6))
+ return (EINVAL);
+
+ if (IN6_IS_ADDR_6TO4(ipv6))
+ return (EINVAL);
+
+ mutex_enter(&vdp->vad_lock);
+ bcopy(buf, &vdp->vad_ip, sizeof (struct in6_addr));
+ vdp->vad_hip = B_TRUE;
+ mutex_exit(&vdp->vad_lock);
+ return (0);
+ }
+
+ /* direct/dest_port */
+ if (strcmp(pname, varpd_direct_props[1]) == 0) {
+ const uint64_t *valp = buf;
+ if (size < sizeof (uint64_t))
+ return (EOVERFLOW);
+
+ if (*valp == 0 || *valp > UINT16_MAX)
+ return (EINVAL);
+
+ mutex_enter(&vdp->vad_lock);
+ vdp->vad_port = (uint16_t)*valp;
+ vdp->vad_hport = B_TRUE;
+ mutex_exit(&vdp->vad_lock);
+ return (0);
+ }
+
+ return (EINVAL);
+}
+
+static int
+varpd_direct_save(void *arg, nvlist_t *nvp)
+{
+ int ret;
+ varpd_direct_t *vdp = arg;
+
+ mutex_enter(&vdp->vad_lock);
+ if (vdp->vad_hport == B_TRUE) {
+ if ((ret = nvlist_add_uint16(nvp, varpd_direct_props[1],
+ vdp->vad_port)) != 0) {
+ mutex_exit(&vdp->vad_lock);
+ return (ret);
+ }
+ }
+
+ if (vdp->vad_hip == B_TRUE) {
+ char buf[INET6_ADDRSTRLEN];
+
+ if (inet_ntop(AF_INET6, &vdp->vad_ip, buf, sizeof (buf)) ==
+ NULL)
+ abort();
+ if ((ret = nvlist_add_string(nvp, varpd_direct_props[0],
+ buf)) != 0) {
+ mutex_exit(&vdp->vad_lock);
+ return (ret);
+ }
+ }
+ mutex_exit(&vdp->vad_lock);
+
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+varpd_direct_restore(nvlist_t *nvp, varpd_provider_handle_t *hdl,
+ overlay_plugin_dest_t dest, void **outp)
+{
+ int ret;
+ char *ipstr;
+ varpd_direct_t *vdp;
+
+ if (varpd_direct_valid_dest(dest) == B_FALSE)
+ return (ENOTSUP);
+
+ vdp = umem_alloc(sizeof (varpd_direct_t), UMEM_DEFAULT);
+ if (vdp == NULL)
+ return (ENOMEM);
+
+ if ((ret = mutex_init(&vdp->vad_lock, USYNC_THREAD | LOCK_ERRORCHECK,
+ NULL)) != 0) {
+ umem_free(vdp, sizeof (varpd_direct_t));
+ return (ret);
+ }
+
+ if ((ret = nvlist_lookup_uint16(nvp, varpd_direct_props[1],
+ &vdp->vad_port)) != 0) {
+ if (ret != ENOENT) {
+ if (mutex_destroy(&vdp->vad_lock) != 0)
+ abort();
+ umem_free(vdp, sizeof (varpd_direct_t));
+ return (ret);
+ }
+ vdp->vad_hport = B_FALSE;
+ } else {
+ vdp->vad_hport = B_TRUE;
+ }
+
+ if ((ret = nvlist_lookup_string(nvp, varpd_direct_props[0],
+ &ipstr)) != 0) {
+ if (ret != ENOENT) {
+ if (mutex_destroy(&vdp->vad_lock) != 0)
+ abort();
+ umem_free(vdp, sizeof (varpd_direct_t));
+ return (ret);
+ }
+ vdp->vad_hip = B_FALSE;
+ } else {
+ ret = inet_pton(AF_INET6, ipstr, &vdp->vad_ip);
+ /*
+ * inet_pton is only defined to return -1 with errno set to
+ * EAFNOSUPPORT, which really, shouldn't happen.
+ */
+ if (ret == -1) {
+ assert(errno == EAFNOSUPPORT);
+ abort();
+ }
+ if (ret == 0) {
+ if (mutex_destroy(&vdp->vad_lock) != 0)
+ abort();
+ umem_free(vdp, sizeof (varpd_direct_t));
+ return (EINVAL);
+ }
+ }
+
+ *outp = vdp;
+ return (0);
+}
+
+static const varpd_plugin_ops_t varpd_direct_ops = {
+ 0,
+ varpd_direct_create,
+ varpd_direct_start,
+ varpd_direct_stop,
+ varpd_direct_destroy,
+ varpd_direct_default,
+ NULL,
+ varpd_direct_nprops,
+ varpd_direct_propinfo,
+ varpd_direct_getprop,
+ varpd_direct_setprop,
+ varpd_direct_save,
+ varpd_direct_restore
+};
+
+#pragma init(varpd_direct_init)
+static void
+varpd_direct_init(void)
+{
+ int err;
+ varpd_plugin_register_t *vpr;
+
+ vpr = libvarpd_plugin_alloc(VARPD_CURRENT_VERSION, &err);
+ if (vpr == NULL)
+ return;
+
+ vpr->vpr_mode = OVERLAY_TARGET_POINT;
+ vpr->vpr_name = "direct";
+ vpr->vpr_ops = &varpd_direct_ops;
+ (void) libvarpd_plugin_register(vpr);
+ libvarpd_plugin_free(vpr);
+}
diff --git a/usr/src/lib/varpd/direct/common/llib-lvarpd_direct b/usr/src/lib/varpd/direct/common/llib-lvarpd_direct
new file mode 100644
index 0000000000..03c34f4fcb
--- /dev/null
+++ b/usr/src/lib/varpd/direct/common/llib-lvarpd_direct
@@ -0,0 +1,18 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/* LINTLIBRARY */
+/* PROTOLIB1 */
+
diff --git a/usr/src/lib/varpd/direct/common/mapfile-vers b/usr/src/lib/varpd/direct/common/mapfile-vers
new file mode 100644
index 0000000000..6b7c5a5067
--- /dev/null
+++ b/usr/src/lib/varpd/direct/common/mapfile-vers
@@ -0,0 +1,35 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION SUNWprivate {
+ local:
+ *;
+};
diff --git a/usr/src/lib/varpd/direct/i386/Makefile b/usr/src/lib/varpd/direct/i386/Makefile
new file mode 100644
index 0000000000..f2b4f63da5
--- /dev/null
+++ b/usr/src/lib/varpd/direct/i386/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/varpd/direct/sparc/Makefile b/usr/src/lib/varpd/direct/sparc/Makefile
new file mode 100644
index 0000000000..f2b4f63da5
--- /dev/null
+++ b/usr/src/lib/varpd/direct/sparc/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/varpd/direct/sparcv9/Makefile b/usr/src/lib/varpd/direct/sparcv9/Makefile
new file mode 100644
index 0000000000..d552642882
--- /dev/null
+++ b/usr/src/lib/varpd/direct/sparcv9/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+include ../Makefile.com
+include ../../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/varpd/files/Makefile b/usr/src/lib/varpd/files/Makefile
new file mode 100644
index 0000000000..275f07bf8b
--- /dev/null
+++ b/usr/src/lib/varpd/files/Makefile
@@ -0,0 +1,40 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+include ../../Makefile.lib
+
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+install := TARGET = install
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber install lint: $(SUBDIRS)
+
+install_h:
+
+check:
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../../Makefile.targ
diff --git a/usr/src/lib/varpd/files/Makefile.com b/usr/src/lib/varpd/files/Makefile.com
new file mode 100644
index 0000000000..1f6a7c03b1
--- /dev/null
+++ b/usr/src/lib/varpd/files/Makefile.com
@@ -0,0 +1,42 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+LIBRARY = libvarpd_files.a
+VERS = .1
+OBJECTS = libvarpd_files.o \
+ libvarpd_files_json.o
+
+include ../../../Makefile.lib
+include ../../Makefile.plugin
+
+LIBS = $(DYNLIB)
+LDLIBS += -lc -lumem -lnvpair -lsocket -lnsl -lcmdutils
+CPPFLAGS += -I../common
+
+LINTFLAGS += -erroff=E_BAD_PTR_CAST_ALIGN
+LINTFLAGS64 += -erroff=E_BAD_PTR_CAST_ALIGN
+
+C99MODE= -xc99=%all
+C99LMODE= -Xc99=%all
+
+SRCDIR = ../common
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include ../../../Makefile.targ
diff --git a/usr/src/lib/varpd/files/amd64/Makefile b/usr/src/lib/varpd/files/amd64/Makefile
new file mode 100644
index 0000000000..d552642882
--- /dev/null
+++ b/usr/src/lib/varpd/files/amd64/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+include ../Makefile.com
+include ../../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/varpd/files/common/libvarpd_files.c b/usr/src/lib/varpd/files/common/libvarpd_files.c
new file mode 100644
index 0000000000..812919a07d
--- /dev/null
+++ b/usr/src/lib/varpd/files/common/libvarpd_files.c
@@ -0,0 +1,605 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015, Joyent, Inc.
+ */
+
+/*
+ * Files based plug-in for varpd
+ *
+ * This is a dynamic varpd plug-in that has a static backing store. It's really
+ * nothing more than a glorified version of /etc/ethers, though it facilitiates
+ * a bit more. The files module allows for the full set of mappings to be fixed
+ * at creation time. In addition, it also provides support for proxying ARP,
+ * NDP, and DHCP.
+ *
+ * At this time, the plugin requires that the destination type involve both an
+ * IP address and a port; however, there's no reason that this cannot be made
+ * more flexible as we have additional encapsulation algorithms that support it.
+ * The plug-in only has a single property, which is the location of the JSON
+ * file. The JSON file itself looks something like:
+ *
+ * {
+ * "aa:bb:cc:dd:ee:ff": {
+ * "arp": "10.23.69.1",
+ * "ndp": "2600:3c00::f03c:91ff:fe96:a264",
+ * "ip": "192.168.1.1",
+ * "port": 8080
+ * },
+ * ...
+ * }
+ */
+
+#include <libvarpd_provider.h>
+#include <umem.h>
+#include <errno.h>
+#include <thread.h>
+#include <synch.h>
+#include <strings.h>
+#include <assert.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <libnvpair.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/ethernet.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#include <libvarpd_files_json.h>
+
+typedef struct varpd_files {
+ overlay_plugin_dest_t vaf_dest; /* RO */
+ varpd_provider_handle_t *vaf_hdl; /* RO */
+ char *vaf_path; /* WO */
+ nvlist_t *vaf_nvl; /* WO */
+ uint64_t vaf_nmisses; /* Atomic */
+ uint64_t vaf_narp; /* Atomic */
+} varpd_files_t;
+
+static const char *varpd_files_props[] = {
+ "files/config"
+};
+
+static boolean_t
+varpd_files_valid_dest(overlay_plugin_dest_t dest)
+{
+ if (dest & ~(OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))
+ return (B_FALSE);
+
+ if (!(dest & (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT)))
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+static int
+varpd_files_create(varpd_provider_handle_t *hdl, void **outp,
+ overlay_plugin_dest_t dest)
+{
+ varpd_files_t *vaf;
+
+ if (varpd_files_valid_dest(dest) == B_FALSE)
+ return (ENOTSUP);
+
+ vaf = umem_alloc(sizeof (varpd_files_t), UMEM_DEFAULT);
+ if (vaf == NULL)
+ return (ENOMEM);
+
+ bzero(vaf, sizeof (varpd_files_t));
+ vaf->vaf_dest = dest;
+ vaf->vaf_path = NULL;
+ vaf->vaf_nvl = NULL;
+ vaf->vaf_hdl = hdl;
+ *outp = vaf;
+ return (0);
+}
+
+static int
+varpd_files_normalize_nvlist(varpd_files_t *vaf, nvlist_t *nvl)
+{
+ int ret;
+ nvlist_t *out;
+ nvpair_t *pair;
+
+ if ((ret = nvlist_alloc(&out, NV_UNIQUE_NAME, 0)) != 0)
+ return (ret);
+
+ for (pair = nvlist_next_nvpair(nvl, NULL); pair != NULL;
+ pair = nvlist_next_nvpair(nvl, pair)) {
+ char *name, fname[ETHERADDRSTRL];
+ nvlist_t *data;
+ struct ether_addr ether, *e;
+ e = &ether;
+
+ if (nvpair_type(pair) != DATA_TYPE_NVLIST) {
+ nvlist_free(out);
+ return (EINVAL);
+ }
+
+ name = nvpair_name(pair);
+ if ((ret = nvpair_value_nvlist(pair, &data)) != 0) {
+ nvlist_free(out);
+ return (EINVAL);
+ }
+
+ if (ether_aton_r(name, e) == NULL) {
+ nvlist_free(out);
+ return (EINVAL);
+ }
+
+ if (ether_ntoa_r(e, fname) == NULL) {
+ nvlist_free(out);
+ return (ENOMEM);
+ }
+
+ if ((ret = nvlist_add_nvlist(out, fname, data)) != 0) {
+ nvlist_free(out);
+ return (EINVAL);
+ }
+ }
+
+ vaf->vaf_nvl = out;
+ return (0);
+}
+
+static int
+varpd_files_start(void *arg)
+{
+ int fd, ret;
+ void *maddr;
+ struct stat st;
+ nvlist_t *nvl;
+ varpd_files_t *vaf = arg;
+
+ if (vaf->vaf_path == NULL)
+ return (EAGAIN);
+
+ if ((fd = open(vaf->vaf_path, O_RDONLY)) < 0)
+ return (errno);
+
+ if (fstat(fd, &st) != 0) {
+ ret = errno;
+ if (close(fd) != 0)
+ abort();
+ return (ret);
+ }
+
+ maddr = mmap(NULL, st.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE,
+ fd, 0);
+ if (maddr == NULL) {
+ ret = errno;
+ if (close(fd) != 0)
+ abort();
+ return (ret);
+ }
+
+ ret = nvlist_parse_json(maddr, st.st_size, &nvl,
+ NVJSON_FORCE_INTEGER, NULL);
+ if (ret == 0) {
+ ret = varpd_files_normalize_nvlist(vaf, nvl);
+ nvlist_free(nvl);
+ }
+ if (munmap(maddr, st.st_size) != 0)
+ abort();
+ if (close(fd) != 0)
+ abort();
+
+ return (ret);
+}
+
+static void
+varpd_files_stop(void *arg)
+{
+ varpd_files_t *vaf = arg;
+
+ nvlist_free(vaf->vaf_nvl);
+ vaf->vaf_nvl = NULL;
+}
+
+static void
+varpd_files_destroy(void *arg)
+{
+ varpd_files_t *vaf = arg;
+
+ assert(vaf->vaf_nvl == NULL);
+ if (vaf->vaf_path != NULL) {
+ umem_free(vaf->vaf_path, strlen(vaf->vaf_path) + 1);
+ vaf->vaf_path = NULL;
+ }
+ umem_free(vaf, sizeof (varpd_files_t));
+}
+
+static void
+varpd_files_lookup(void *arg, varpd_query_handle_t *qh,
+ const overlay_targ_lookup_t *otl, overlay_target_point_t *otp)
+{
+ char macstr[ETHERADDRSTRL], *ipstr;
+ nvlist_t *nvl;
+ varpd_files_t *vaf = arg;
+ int32_t port;
+ static const uint8_t bcast[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+ /* We don't support a default */
+ if (otl == NULL) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (otl->otl_sap == ETHERTYPE_ARP) {
+ libvarpd_plugin_proxy_arp(vaf->vaf_hdl, qh, otl);
+ return;
+ }
+
+ if (otl->otl_sap == ETHERTYPE_IPV6 &&
+ otl->otl_dstaddr[0] == 0x33 &&
+ otl->otl_dstaddr[1] == 0x33) {
+ libvarpd_plugin_proxy_ndp(vaf->vaf_hdl, qh, otl);
+ return;
+ }
+
+ if (otl->otl_sap == ETHERTYPE_IP &&
+ bcmp(otl->otl_dstaddr, bcast, ETHERADDRL) == 0) {
+ char *mac;
+ struct ether_addr a, *addr;
+
+ addr = &a;
+ if (ether_ntoa_r((struct ether_addr *)otl->otl_srcaddr,
+ macstr) == NULL) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (nvlist_lookup_nvlist(vaf->vaf_nvl, macstr, &nvl) != 0) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (nvlist_lookup_string(nvl, "dhcp-proxy", &mac) != 0) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (ether_aton_r(mac, addr) == NULL) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ libvarpd_plugin_proxy_dhcp(vaf->vaf_hdl, qh, otl);
+ return;
+ }
+
+ if (ether_ntoa_r((struct ether_addr *)otl->otl_dstaddr,
+ macstr) == NULL) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (nvlist_lookup_nvlist(vaf->vaf_nvl, macstr, &nvl) != 0) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (nvlist_lookup_int32(nvl, "port", &port) != 0) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (port <= 0 || port > UINT16_MAX) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+ otp->otp_port = port;
+
+ if (nvlist_lookup_string(nvl, "ip", &ipstr) != 0) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ /*
+ * Try to parse it as a v6 address and then if it's not, try to
+ * transform it into a v4 address which we'll then wrap it into a v4
+ * mapped address.
+ */
+ if (inet_pton(AF_INET6, ipstr, &otp->otp_ip) != 1) {
+ uint32_t v4;
+ if (inet_pton(AF_INET, ipstr, &v4) != 1) {
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_DROP);
+ return;
+ }
+ IN6_IPADDR_TO_V4MAPPED(v4, &otp->otp_ip);
+ }
+
+ libvarpd_plugin_query_reply(qh, VARPD_LOOKUP_OK);
+}
+
+/* ARGSUSED */
+static int
+varpd_files_nprops(void *arg, uint_t *nprops)
+{
+ *nprops = 1;
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+varpd_files_propinfo(void *arg, uint_t propid, varpd_prop_handle_t *vph)
+{
+ if (propid != 0)
+ return (EINVAL);
+
+ libvarpd_prop_set_name(vph, varpd_files_props[0]);
+ libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
+ libvarpd_prop_set_type(vph, OVERLAY_PROP_T_STRING);
+ libvarpd_prop_set_nodefault(vph);
+ return (0);
+}
+
+static int
+varpd_files_getprop(void *arg, const char *pname, void *buf, uint32_t *sizep)
+{
+ varpd_files_t *vaf = arg;
+
+ if (strcmp(pname, varpd_files_props[0]) != 0)
+ return (EINVAL);
+
+ if (vaf->vaf_path != NULL) {
+ size_t len = strlen(vaf->vaf_path) + 1;
+ if (*sizep < len)
+ return (EOVERFLOW);
+ *sizep = len;
+ (void) strlcpy(buf, vaf->vaf_path, *sizep);
+
+ } else {
+ *sizep = 0;
+ }
+
+ return (0);
+}
+
+static int
+varpd_files_setprop(void *arg, const char *pname, const void *buf,
+ const uint32_t size)
+{
+ varpd_files_t *vaf = arg;
+
+ if (strcmp(pname, varpd_files_props[0]) != 0)
+ return (EINVAL);
+
+ if (vaf->vaf_path != NULL)
+ umem_free(vaf->vaf_path, strlen(vaf->vaf_path) + 1);
+
+ vaf->vaf_path = umem_alloc(size, UMEM_DEFAULT);
+ if (vaf->vaf_path == NULL)
+ return (ENOMEM);
+ (void) strlcpy(vaf->vaf_path, buf, size);
+ return (0);
+}
+
+static int
+varpd_files_save(void *arg, nvlist_t *nvp)
+{
+ int ret;
+ varpd_files_t *vaf = arg;
+
+ if (vaf->vaf_path == NULL)
+ return (0);
+
+ if ((ret = nvlist_add_string(nvp, varpd_files_props[0],
+ vaf->vaf_path)) != 0)
+ return (ret);
+
+ if ((ret = nvlist_add_uint64(nvp, "files/vaf_nmisses",
+ vaf->vaf_nmisses)) != 0)
+ return (ret);
+
+ if ((ret = nvlist_add_uint64(nvp, "files/vaf_narp",
+ vaf->vaf_narp)) != 0)
+ return (ret);
+ return (0);
+}
+
+static int
+varpd_files_restore(nvlist_t *nvp, varpd_provider_handle_t *hdl,
+ overlay_plugin_dest_t dest, void **outp)
+{
+ varpd_files_t *vaf;
+ char *str;
+ int ret;
+ uint64_t nmisses, narp;
+
+ if (varpd_files_valid_dest(dest) == B_FALSE)
+ return (EINVAL);
+
+ ret = nvlist_lookup_string(nvp, varpd_files_props[0], &str);
+ if (ret != 0 && ret != ENOENT)
+ return (ret);
+ else if (ret == ENOENT)
+ str = NULL;
+
+ if (nvlist_lookup_uint64(nvp, "files/vaf_nmisses", &nmisses) != 0)
+ return (EINVAL);
+ if (nvlist_lookup_uint64(nvp, "files/vaf_narp", &narp) != 0)
+ return (EINVAL);
+
+ vaf = umem_alloc(sizeof (varpd_files_t), UMEM_DEFAULT);
+ if (vaf == NULL)
+ return (ENOMEM);
+
+ bzero(vaf, sizeof (varpd_files_t));
+ vaf->vaf_dest = dest;
+ if (str != NULL) {
+ size_t len = strlen(str) + 1;
+ vaf->vaf_path = umem_alloc(len, UMEM_DEFAULT);
+ if (vaf->vaf_path == NULL) {
+ umem_free(vaf, sizeof (varpd_files_t));
+ return (ENOMEM);
+ }
+ (void) strlcpy(vaf->vaf_path, str, len);
+ }
+
+ vaf->vaf_hdl = hdl;
+ *outp = vaf;
+ return (0);
+}
+
+static void
+varpd_files_proxy_arp(void *arg, varpd_arp_handle_t *vah, int kind,
+ const struct sockaddr *sock, uint8_t *out)
+{
+ varpd_files_t *vaf = arg;
+ const struct sockaddr_in *ip;
+ const struct sockaddr_in6 *ip6;
+ nvpair_t *pair;
+
+ if (kind != VARPD_QTYPE_ETHERNET) {
+ libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (sock->sa_family != AF_INET && sock->sa_family != AF_INET6) {
+ libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ ip = (const struct sockaddr_in *)sock;
+ ip6 = (const struct sockaddr_in6 *)sock;
+ for (pair = nvlist_next_nvpair(vaf->vaf_nvl, NULL); pair != NULL;
+ pair = nvlist_next_nvpair(vaf->vaf_nvl, pair)) {
+ char *mac, *ipstr;
+ nvlist_t *data;
+ struct in_addr ia;
+ struct in6_addr ia6;
+ struct ether_addr ether, *e;
+ e = &ether;
+
+ if (nvpair_type(pair) != DATA_TYPE_NVLIST)
+ continue;
+
+ mac = nvpair_name(pair);
+ if (nvpair_value_nvlist(pair, &data) != 0)
+ continue;
+
+
+ if (sock->sa_family == AF_INET) {
+ if (nvlist_lookup_string(data, "arp", &ipstr) != 0)
+ continue;
+
+ if (inet_pton(AF_INET, ipstr, &ia) != 1)
+ continue;
+
+ if (bcmp(&ia, &ip->sin_addr,
+ sizeof (struct in_addr)) != 0)
+ continue;
+ } else {
+ if (nvlist_lookup_string(data, "ndp", &ipstr) != 0)
+ continue;
+
+ if (inet_pton(AF_INET6, ipstr, &ia6) != 1)
+ continue;
+
+ if (bcmp(&ia6, &ip6->sin6_addr,
+ sizeof (struct in6_addr)) != 0)
+ continue;
+ }
+
+ if (ether_aton_r(mac, e) == NULL) {
+ libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ bcopy(e, out, ETHERADDRL);
+ libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_OK);
+ return;
+ }
+
+ libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
+}
+
+static void
+varpd_files_proxy_dhcp(void *arg, varpd_dhcp_handle_t *vdh, int type,
+ const overlay_targ_lookup_t *otl, uint8_t *out)
+{
+ varpd_files_t *vaf = arg;
+ nvlist_t *nvl;
+ char macstr[ETHERADDRSTRL], *mac;
+ struct ether_addr a, *addr;
+
+ addr = &a;
+ if (type != VARPD_QTYPE_ETHERNET) {
+ libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (ether_ntoa_r((struct ether_addr *)otl->otl_srcaddr,
+ macstr) == NULL) {
+ libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (nvlist_lookup_nvlist(vaf->vaf_nvl, macstr, &nvl) != 0) {
+ libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (nvlist_lookup_string(nvl, "dhcp-proxy", &mac) != 0) {
+ libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ if (ether_aton_r(mac, addr) == NULL) {
+ libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ bcopy(addr, out, ETHERADDRL);
+ libvarpd_plugin_dhcp_reply(vdh, VARPD_LOOKUP_OK);
+}
+
+static const varpd_plugin_ops_t varpd_files_ops = {
+ 0,
+ varpd_files_create,
+ varpd_files_start,
+ varpd_files_stop,
+ varpd_files_destroy,
+ NULL,
+ varpd_files_lookup,
+ varpd_files_nprops,
+ varpd_files_propinfo,
+ varpd_files_getprop,
+ varpd_files_setprop,
+ varpd_files_save,
+ varpd_files_restore,
+ varpd_files_proxy_arp,
+ varpd_files_proxy_dhcp
+};
+
+#pragma init(varpd_files_init)
+static void
+varpd_files_init(void)
+{
+ int err;
+ varpd_plugin_register_t *vpr;
+
+ vpr = libvarpd_plugin_alloc(VARPD_CURRENT_VERSION, &err);
+ if (vpr == NULL)
+ return;
+
+ vpr->vpr_mode = OVERLAY_TARGET_DYNAMIC;
+ vpr->vpr_name = "files";
+ vpr->vpr_ops = &varpd_files_ops;
+ (void) libvarpd_plugin_register(vpr);
+ libvarpd_plugin_free(vpr);
+}
diff --git a/usr/src/lib/varpd/files/common/libvarpd_files_json.c b/usr/src/lib/varpd/files/common/libvarpd_files_json.c
new file mode 100644
index 0000000000..240c84bd77
--- /dev/null
+++ b/usr/src/lib/varpd/files/common/libvarpd_files_json.c
@@ -0,0 +1,936 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <strings.h>
+#include <errno.h>
+#include <libnvpair.h>
+#include <sys/ccompile.h>
+
+#include "libvarpd_files_json.h"
+
+typedef enum json_type {
+ JSON_TYPE_NOTHING = 0,
+ JSON_TYPE_STRING = 1,
+ JSON_TYPE_INTEGER,
+ JSON_TYPE_DOUBLE,
+ JSON_TYPE_BOOLEAN,
+ JSON_TYPE_NULL,
+ JSON_TYPE_OBJECT,
+ JSON_TYPE_ARRAY
+} json_type_t;
+
+typedef enum parse_state {
+ PARSE_ERROR = -1,
+ PARSE_DONE = 0,
+ PARSE_REST,
+ PARSE_OBJECT,
+ PARSE_KEY_STRING,
+ PARSE_COLON,
+ PARSE_STRING,
+ PARSE_OBJECT_COMMA,
+ PARSE_ARRAY,
+ PARSE_BAREWORD,
+ PARSE_NUMBER,
+ PARSE_ARRAY_VALUE,
+ PARSE_ARRAY_COMMA
+} parse_state_t;
+
+#define JSON_MARKER ".__json_"
+#define JSON_MARKER_ARRAY JSON_MARKER "array"
+
+typedef struct parse_frame {
+ parse_state_t pf_ps;
+ nvlist_t *pf_nvl;
+
+ char *pf_key;
+ void *pf_value;
+ json_type_t pf_value_type;
+ int pf_array_index;
+
+ struct parse_frame *pf_next;
+} parse_frame_t;
+
+typedef struct state {
+ const char *s_in;
+ unsigned long s_pos;
+ unsigned long s_len;
+
+ parse_frame_t *s_top;
+
+ nvlist_parse_json_flags_t s_flags;
+
+ /*
+ * This string buffer is used for temporary storage by the
+ * "collect_*()" family of functions.
+ */
+ custr_t *s_collect;
+
+ int s_errno;
+ custr_t *s_errstr;
+} state_t;
+
+typedef void (*parse_handler_t)(state_t *);
+
+static void
+movestate(state_t *s, parse_state_t ps)
+{
+ if (s->s_flags & NVJSON_DEBUG) {
+ (void) fprintf(stderr, "nvjson: move state %d -> %d\n",
+ s->s_top->pf_ps, ps);
+ }
+ s->s_top->pf_ps = ps;
+}
+
+static void
+posterror(state_t *s, int erno, const char *error)
+{
+ /*
+ * If the caller wants error messages printed to stderr, do that
+ * first.
+ */
+ if (s->s_flags & NVJSON_ERRORS_TO_STDERR) {
+ (void) fprintf(stderr, "nvjson error (pos %ld, errno %d): %s\n",
+ s->s_pos, erno, error);
+ }
+
+ /*
+ * Try and store the error message for the caller. This may fail if
+ * the error was related to memory pressure, and that condition still
+ * exists.
+ */
+ s->s_errno = erno;
+ if (s->s_errstr != NULL) {
+ (void) custr_append(s->s_errstr, error);
+ }
+
+ movestate(s, PARSE_ERROR);
+}
+
+static int
+pushstate(state_t *s, parse_state_t ps, parse_state_t retps)
+{
+ parse_frame_t *n;
+
+ if (s->s_flags & NVJSON_DEBUG) {
+ (void) fprintf(stderr, "nvjson: push state %d -> %d (ret %d)\n",
+ s->s_top->pf_ps, ps, retps);
+ }
+
+ if ((n = calloc(1, sizeof (*n))) == NULL) {
+ posterror(s, errno, "pushstate calloc failure");
+ return (-1);
+ }
+
+ /*
+ * Store the state we'll return to when popping this
+ * frame:
+ */
+ s->s_top->pf_ps = retps;
+
+ /*
+ * Store the initial state for the new frame, and
+ * put it on top of the stack:
+ */
+ n->pf_ps = ps;
+ n->pf_value_type = JSON_TYPE_NOTHING;
+
+ n->pf_next = s->s_top;
+ s->s_top = n;
+
+ return (0);
+}
+
+static char
+popchar(state_t *s)
+{
+ if (s->s_pos > s->s_len) {
+ return (0);
+ }
+ return (s->s_in[s->s_pos++]);
+}
+
+static char
+peekchar(state_t *s)
+{
+ if (s->s_pos > s->s_len) {
+ return (0);
+ }
+ return (s->s_in[s->s_pos]);
+}
+
+static void
+discard_whitespace(state_t *s)
+{
+ while (isspace(peekchar(s))) {
+ (void) popchar(s);
+ }
+}
+
+static char *escape_pairs[] = {
+ "\"\"", "\\\\", "//", "b\b", "f\f", "n\n", "r\r", "t\t", NULL
+};
+
+static char
+collect_string_escape(state_t *s)
+{
+ int i;
+ char c = popchar(s);
+
+ if (c == '\0') {
+ posterror(s, EPROTO, "EOF mid-escape sequence");
+ return (-1);
+ }
+
+ /*
+ * Handle four-digit Unicode escapes up to and including \u007f.
+ * Strings that cannot be represented as 7-bit clean ASCII are not
+ * currently supported.
+ */
+ if (c == 'u') {
+ int res;
+ int ndigs = 0;
+ char digs[5];
+
+ /*
+ * Deal with 4-digit unicode escape.
+ */
+ while (ndigs < 4) {
+ if ((digs[ndigs++] = popchar(s)) == '\0') {
+ posterror(s, EPROTO, "EOF mid-escape "
+ "sequence");
+ return (-1);
+ }
+ }
+ digs[4] = '\0';
+ if ((res = atoi(digs)) > 127) {
+ posterror(s, EPROTO, "unicode escape above 0x7f");
+ return (-1);
+ }
+
+ if (custr_appendc(s->s_collect, res) != 0) {
+ posterror(s, errno, "custr_appendc failure");
+ return (-1);
+ }
+ return (0);
+ }
+
+ /*
+ * See if this is a C-style escape character we recognise.
+ */
+ for (i = 0; escape_pairs[i] != NULL; i++) {
+ char *ep = escape_pairs[i];
+ if (ep[0] == c) {
+ if (custr_appendc(s->s_collect, ep[1]) != 0) {
+ posterror(s, errno, "custr_appendc failure");
+ return (-1);
+ }
+ return (0);
+ }
+ }
+
+ posterror(s, EPROTO, "unrecognised escape sequence");
+ return (-1);
+}
+
+static int
+collect_string(state_t *s)
+{
+ custr_reset(s->s_collect);
+
+ for (;;) {
+ char c;
+
+ switch (c = popchar(s)) {
+ case '"':
+ /*
+ * Legal End of String.
+ */
+ return (0);
+
+ case '\0':
+ posterror(s, EPROTO, "EOF mid-string");
+ return (-1);
+
+ case '\\':
+ /*
+ * Escape Characters and Sequences.
+ */
+ if (collect_string_escape(s) != 0) {
+ return (-1);
+ }
+ break;
+
+ default:
+ if (custr_appendc(s->s_collect, c) != 0) {
+ posterror(s, errno, "custr_appendc failure");
+ return (-1);
+ }
+ break;
+ }
+ }
+}
+
+static int
+collect_bareword(state_t *s)
+{
+ custr_reset(s->s_collect);
+
+ for (;;) {
+ if (!islower(peekchar(s))) {
+ return (0);
+ }
+
+ if (custr_appendc(s->s_collect, popchar(s)) != 0) {
+ posterror(s, errno, "custr_appendc failure");
+ return (-1);
+ }
+ }
+}
+
+static void
+hdlr_bareword(state_t *s)
+{
+ const char *str;
+
+ if (collect_bareword(s) != 0) {
+ return;
+ }
+
+ str = custr_cstr(s->s_collect);
+ if (strcmp(str, "true") == 0) {
+ s->s_top->pf_value_type = JSON_TYPE_BOOLEAN;
+ s->s_top->pf_value = (void *)B_TRUE;
+ } else if (strcmp(str, "false") == 0) {
+ s->s_top->pf_value_type = JSON_TYPE_BOOLEAN;
+ s->s_top->pf_value = (void *)B_FALSE;
+ } else if (strcmp(str, "null") == 0) {
+ s->s_top->pf_value_type = JSON_TYPE_NULL;
+ } else {
+ posterror(s, EPROTO, "expected 'true', 'false' or 'null'");
+ return;
+ }
+
+ movestate(s, PARSE_DONE);
+}
+
+/* ARGSUSED */
+static int
+collect_number(state_t *s, boolean_t *isint, int32_t *result,
+ double *fresult __GNU_UNUSED)
+{
+ boolean_t neg = B_FALSE;
+ int t;
+
+ custr_reset(s->s_collect);
+
+ if (peekchar(s) == '-') {
+ neg = B_TRUE;
+ (void) popchar(s);
+ }
+ /*
+ * Read the 'int' portion:
+ */
+ if (!isdigit(peekchar(s))) {
+ posterror(s, EPROTO, "malformed number: expected digit (0-9)");
+ return (-1);
+ }
+ for (;;) {
+ if (!isdigit(peekchar(s))) {
+ break;
+ }
+ if (custr_appendc(s->s_collect, popchar(s)) != 0) {
+ posterror(s, errno, "custr_append failure");
+ return (-1);
+ }
+ }
+ if (peekchar(s) == '.' || peekchar(s) == 'e' || peekchar(s) == 'E') {
+ posterror(s, ENOTSUP, "do not yet support FRACs or EXPs");
+ return (-1);
+ }
+
+ t = atoi(custr_cstr(s->s_collect));
+
+ *isint = B_TRUE;
+ *result = (neg == B_TRUE) ? (-t) : t;
+ return (0);
+}
+
+static void
+hdlr_number(state_t *s)
+{
+ boolean_t isint;
+ int32_t result;
+ double fresult;
+
+ if (collect_number(s, &isint, &result, &fresult) != 0) {
+ return;
+ }
+
+ if (isint == B_TRUE) {
+ s->s_top->pf_value = (void *)(uintptr_t)result;
+ s->s_top->pf_value_type = JSON_TYPE_INTEGER;
+ } else {
+ s->s_top->pf_value = malloc(sizeof (fresult));
+ bcopy(&fresult, s->s_top->pf_value, sizeof (fresult));
+ s->s_top->pf_value_type = JSON_TYPE_DOUBLE;
+ }
+
+ movestate(s, PARSE_DONE);
+}
+
+static void
+hdlr_rest(state_t *s)
+{
+ char c;
+ discard_whitespace(s);
+ c = popchar(s);
+ switch (c) {
+ case '{':
+ movestate(s, PARSE_OBJECT);
+ return;
+
+ case '[':
+ movestate(s, PARSE_ARRAY);
+ return;
+
+ default:
+ posterror(s, EPROTO, "EOF before object or array");
+ return;
+ }
+}
+
+static int
+add_empty_child(state_t *s)
+{
+ /*
+ * Here, we create an empty nvlist to represent this object
+ * or array:
+ */
+ nvlist_t *empty;
+ if (nvlist_alloc(&empty, NV_UNIQUE_NAME, 0) != 0) {
+ posterror(s, errno, "nvlist_alloc failure");
+ return (-1);
+ }
+ if (s->s_top->pf_next != NULL) {
+ /*
+ * If we're a child of the frame above, we store ourselves in
+ * that frame's nvlist:
+ */
+ nvlist_t *nvl = s->s_top->pf_next->pf_nvl;
+ char *key = s->s_top->pf_next->pf_key;
+
+ if (nvlist_add_nvlist(nvl, key, empty) != 0) {
+ posterror(s, errno, "nvlist_add_nvlist failure");
+ nvlist_free(empty);
+ return (-1);
+ }
+ nvlist_free(empty);
+ if (nvlist_lookup_nvlist(nvl, key, &empty) != 0) {
+ posterror(s, errno, "nvlist_lookup_nvlist failure");
+ return (-1);
+ }
+ }
+ s->s_top->pf_nvl = empty;
+ return (0);
+}
+
+static int
+decorate_array(state_t *s)
+{
+ int idx = s->s_top->pf_array_index;
+ /*
+ * When we are done creating an array, we store a 'length'
+ * property on it, as well as an internal-use marker value.
+ */
+ if (nvlist_add_boolean(s->s_top->pf_nvl, JSON_MARKER_ARRAY) != 0 ||
+ nvlist_add_uint32(s->s_top->pf_nvl, "length", idx) != 0) {
+ posterror(s, errno, "nvlist_add failure");
+ return (-1);
+ }
+
+ return (0);
+}
+
+static void
+hdlr_array(state_t *s)
+{
+ s->s_top->pf_value_type = JSON_TYPE_ARRAY;
+
+ if (add_empty_child(s) != 0) {
+ return;
+ }
+
+ discard_whitespace(s);
+
+ switch (peekchar(s)) {
+ case ']':
+ (void) popchar(s);
+
+ if (decorate_array(s) != 0) {
+ return;
+ }
+
+ movestate(s, PARSE_DONE);
+ return;
+
+ default:
+ movestate(s, PARSE_ARRAY_VALUE);
+ return;
+ }
+}
+
+static void
+hdlr_array_comma(state_t *s)
+{
+ discard_whitespace(s);
+
+ switch (popchar(s)) {
+ case ']':
+ if (decorate_array(s) != 0) {
+ return;
+ }
+
+ movestate(s, PARSE_DONE);
+ return;
+ case ',':
+ movestate(s, PARSE_ARRAY_VALUE);
+ return;
+ default:
+ posterror(s, EPROTO, "expected ',' or ']'");
+ return;
+ }
+}
+
+static void
+hdlr_array_value(state_t *s)
+{
+ char c;
+
+ /*
+ * Generate keyname from the next array index:
+ */
+ if (s->s_top->pf_key != NULL) {
+ (void) fprintf(stderr, "pf_key not null! was %s\n",
+ s->s_top->pf_key);
+ abort();
+ }
+
+ if (asprintf(&s->s_top->pf_key, "%d", s->s_top->pf_array_index++) < 0) {
+ posterror(s, errno, "asprintf failure");
+ return;
+ }
+
+ discard_whitespace(s);
+
+ /*
+ * Select which type handler we need for the next value:
+ */
+ switch (c = peekchar(s)) {
+ case '"':
+ (void) popchar(s);
+ (void) pushstate(s, PARSE_STRING, PARSE_ARRAY_COMMA);
+ return;
+
+ case '{':
+ (void) popchar(s);
+ (void) pushstate(s, PARSE_OBJECT, PARSE_ARRAY_COMMA);
+ return;
+
+ case '[':
+ (void) popchar(s);
+ (void) pushstate(s, PARSE_ARRAY, PARSE_ARRAY_COMMA);
+ return;
+
+ default:
+ if (islower(c)) {
+ (void) pushstate(s, PARSE_BAREWORD,
+ PARSE_ARRAY_COMMA);
+ return;
+ } else if (c == '-' || isdigit(c)) {
+ (void) pushstate(s, PARSE_NUMBER, PARSE_ARRAY_COMMA);
+ return;
+ } else {
+ posterror(s, EPROTO, "unexpected character at start "
+ "of value");
+ return;
+ }
+ }
+}
+
+static void
+hdlr_object(state_t *s)
+{
+ s->s_top->pf_value_type = JSON_TYPE_OBJECT;
+
+ if (add_empty_child(s) != 0) {
+ return;
+ }
+
+ discard_whitespace(s);
+
+ switch (popchar(s)) {
+ case '}':
+ movestate(s, PARSE_DONE);
+ return;
+
+ case '"':
+ movestate(s, PARSE_KEY_STRING);
+ return;
+
+ default:
+ posterror(s, EPROTO, "expected key or '}'");
+ return;
+ }
+}
+
+static void
+hdlr_key_string(state_t *s)
+{
+ if (collect_string(s) != 0) {
+ return;
+ }
+
+ /*
+ * Record the key name of the next value.
+ */
+ if ((s->s_top->pf_key = strdup(custr_cstr(s->s_collect))) == NULL) {
+ posterror(s, errno, "strdup failure");
+ return;
+ }
+
+ movestate(s, PARSE_COLON);
+}
+
+static void
+hdlr_colon(state_t *s)
+{
+ char c;
+ discard_whitespace(s);
+
+ if ((c = popchar(s)) != ':') {
+ posterror(s, EPROTO, "expected ':'");
+ return;
+ }
+
+ discard_whitespace(s);
+
+ /*
+ * Select which type handler we need for the value after the colon:
+ */
+ switch (c = peekchar(s)) {
+ case '"':
+ (void) popchar(s);
+ (void) pushstate(s, PARSE_STRING, PARSE_OBJECT_COMMA);
+ return;
+
+ case '{':
+ (void) popchar(s);
+ (void) pushstate(s, PARSE_OBJECT, PARSE_OBJECT_COMMA);
+ return;
+
+ case '[':
+ (void) popchar(s);
+ (void) pushstate(s, PARSE_ARRAY, PARSE_OBJECT_COMMA);
+ return;
+
+ default:
+ if (islower(c)) {
+ (void) pushstate(s, PARSE_BAREWORD, PARSE_OBJECT_COMMA);
+ return;
+ } else if (c == '-' || isdigit(c)) {
+ (void) pushstate(s, PARSE_NUMBER, PARSE_OBJECT_COMMA);
+ return;
+ } else {
+ (void) posterror(s, EPROTO, "unexpected character at "
+ "start of value");
+ return;
+ }
+ }
+}
+
+static void
+hdlr_object_comma(state_t *s)
+{
+ discard_whitespace(s);
+
+ switch (popchar(s)) {
+ case '}':
+ movestate(s, PARSE_DONE);
+ return;
+
+ case ',':
+ discard_whitespace(s);
+ if (popchar(s) != '"') {
+ posterror(s, EPROTO, "expected '\"'");
+ return;
+ }
+ movestate(s, PARSE_KEY_STRING);
+ return;
+
+ default:
+ posterror(s, EPROTO, "expected ',' or '}'");
+ return;
+ }
+}
+
+static void
+hdlr_string(state_t *s)
+{
+ if (collect_string(s) != 0) {
+ return;
+ }
+
+ s->s_top->pf_value_type = JSON_TYPE_STRING;
+ if ((s->s_top->pf_value = strdup(custr_cstr(s->s_collect))) == NULL) {
+ posterror(s, errno, "strdup failure");
+ return;
+ }
+
+ movestate(s, PARSE_DONE);
+}
+
+static int
+store_value(state_t *s)
+{
+ nvlist_t *targ = s->s_top->pf_next->pf_nvl;
+ char *key = s->s_top->pf_next->pf_key;
+ json_type_t type = s->s_top->pf_value_type;
+ int ret = 0;
+
+ switch (type) {
+ case JSON_TYPE_STRING:
+ if (nvlist_add_string(targ, key, s->s_top->pf_value) != 0) {
+ posterror(s, errno, "nvlist_add_string failure");
+ ret = -1;
+ }
+ free(s->s_top->pf_value);
+ break;
+
+ case JSON_TYPE_BOOLEAN:
+ if (nvlist_add_boolean_value(targ, key,
+ (boolean_t)s->s_top->pf_value) != 0) {
+ posterror(s, errno, "nvlist_add_boolean_value "
+ "failure");
+ ret = -1;
+ }
+ break;
+
+ case JSON_TYPE_NULL:
+ if (nvlist_add_boolean(targ, key) != 0) {
+ posterror(s, errno, "nvlist_add_boolean failure");
+ ret = -1;
+ }
+ break;
+
+ case JSON_TYPE_INTEGER:
+ if (nvlist_add_int32(targ, key,
+ (int32_t)(uintptr_t)s->s_top->pf_value) != 0) {
+ posterror(s, errno, "nvlist_add_int32 failure");
+ ret = -1;
+ }
+ break;
+
+ case JSON_TYPE_ARRAY:
+ case JSON_TYPE_OBJECT:
+ /*
+ * Objects and arrays are already 'stored' in their target
+ * nvlist on creation. See: hdlr_object, hdlr_array.
+ */
+ break;
+
+ default:
+ (void) fprintf(stderr, "ERROR: could not store unknown "
+ "type %d\n", type);
+ abort();
+ }
+
+ s->s_top->pf_value = NULL;
+ free(s->s_top->pf_next->pf_key);
+ s->s_top->pf_next->pf_key = NULL;
+ return (ret);
+}
+
+static parse_frame_t *
+parse_frame_free(parse_frame_t *pf, boolean_t free_nvl)
+{
+ parse_frame_t *next = pf->pf_next;
+ if (pf->pf_key != NULL) {
+ free(pf->pf_key);
+ }
+ if (pf->pf_value != NULL) {
+ abort();
+ }
+ if (free_nvl && pf->pf_nvl != NULL) {
+ nvlist_free(pf->pf_nvl);
+ }
+ free(pf);
+ return (next);
+}
+
+static parse_handler_t hdlrs[] = {
+ NULL, /* PARSE_DONE */
+ hdlr_rest, /* PARSE_REST */
+ hdlr_object, /* PARSE_OBJECT */
+ hdlr_key_string, /* PARSE_KEY_STRING */
+ hdlr_colon, /* PARSE_COLON */
+ hdlr_string, /* PARSE_STRING */
+ hdlr_object_comma, /* PARSE_OBJECT_COMMA */
+ hdlr_array, /* PARSE_ARRAY */
+ hdlr_bareword, /* PARSE_BAREWORD */
+ hdlr_number, /* PARSE_NUMBER */
+ hdlr_array_value, /* PARSE_ARRAY_VALUE */
+ hdlr_array_comma /* PARSE_ARRAY_COMMA */
+};
+#define NUM_PARSE_HANDLERS (int)(sizeof (hdlrs) / sizeof (hdlrs[0]))
+
+int
+nvlist_parse_json(const char *buf, size_t buflen, nvlist_t **nvlp,
+ nvlist_parse_json_flags_t flag, nvlist_parse_json_error_t *errout)
+{
+ state_t s;
+
+ /*
+ * Check for valid flags:
+ */
+ if ((flag & NVJSON_FORCE_INTEGER) && (flag & NVJSON_FORCE_DOUBLE)) {
+ errno = EINVAL;
+ return (-1);
+ }
+ if ((flag & ~NVJSON_ALL) != 0) {
+ errno = EINVAL;
+ return (-1);
+ }
+
+ /*
+ * Initialise parsing state structure:
+ */
+ bzero(&s, sizeof (s));
+ s.s_in = buf;
+ s.s_pos = 0;
+ s.s_len = buflen;
+ s.s_flags = flag;
+
+ /*
+ * Allocate the collect buffer string.
+ */
+ if (custr_alloc(&s.s_collect) != 0) {
+ s.s_errno = errno;
+ if (errout != NULL) {
+ (void) snprintf(errout->nje_message,
+ sizeof (errout->nje_message),
+ "custr alloc failure: %s",
+ strerror(errno));
+ }
+ goto out;
+ }
+
+ /*
+ * If the caller has requested error information, allocate the error
+ * string now.
+ */
+ if (errout != NULL) {
+ if (custr_alloc_buf(&s.s_errstr, errout->nje_message,
+ sizeof (errout->nje_message)) != 0) {
+ s.s_errno = errno;
+ (void) snprintf(errout->nje_message,
+ sizeof (errout->nje_message),
+ "custr alloc failure: %s",
+ strerror(errno));
+ goto out;
+ }
+ custr_reset(s.s_errstr);
+ }
+
+ /*
+ * Allocate top-most stack frame:
+ */
+ if ((s.s_top = calloc(1, sizeof (*s.s_top))) == NULL) {
+ s.s_errno = errno;
+ goto out;
+ }
+
+ s.s_top->pf_ps = PARSE_REST;
+ for (;;) {
+ if (s.s_top->pf_ps < 0) {
+ /*
+ * The parser reported an error.
+ */
+ goto out;
+ }
+
+ if (s.s_top->pf_ps == PARSE_DONE) {
+ if (s.s_top->pf_next == NULL) {
+ /*
+ * Last frame, so we're really
+ * done.
+ */
+ *nvlp = s.s_top->pf_nvl;
+ goto out;
+ } else {
+ /*
+ * Otherwise, pop a frame and continue in
+ * previous state. Copy out the value we
+ * created in the old frame:
+ */
+ if (store_value(&s) != 0) {
+ goto out;
+ }
+
+ /*
+ * Free old frame:
+ */
+ s.s_top = parse_frame_free(s.s_top, B_FALSE);
+ }
+ }
+
+ /*
+ * Dispatch to parser handler routine for this state:
+ */
+ if (s.s_top->pf_ps >= NUM_PARSE_HANDLERS ||
+ hdlrs[s.s_top->pf_ps] == NULL) {
+ (void) fprintf(stderr, "no handler for state %d\n",
+ s.s_top->pf_ps);
+ abort();
+ }
+ hdlrs[s.s_top->pf_ps](&s);
+ }
+
+out:
+ if (errout != NULL) {
+ /*
+ * Copy out error number and parse position. The custr_t for
+ * the error message was backed by the buffer in the error
+ * object, so no copying is required.
+ */
+ errout->nje_errno = s.s_errno;
+ errout->nje_pos = s.s_pos;
+ }
+
+ /*
+ * Free resources:
+ */
+ while (s.s_top != NULL) {
+ s.s_top = parse_frame_free(s.s_top, s.s_errno == 0 ? B_FALSE :
+ B_TRUE);
+ }
+ custr_free(s.s_collect);
+ custr_free(s.s_errstr);
+
+ errno = s.s_errno;
+ return (s.s_errno == 0 ? 0 : -1);
+}
diff --git a/usr/src/lib/varpd/files/common/libvarpd_files_json.h b/usr/src/lib/varpd/files/common/libvarpd_files_json.h
new file mode 100644
index 0000000000..27506e22d6
--- /dev/null
+++ b/usr/src/lib/varpd/files/common/libvarpd_files_json.h
@@ -0,0 +1,52 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _LIBVARPD_FILES_JSON_H
+#define _LIBVARPD_FILES_JSON_H
+
+#include <libnvpair.h>
+#include <libcmdutils.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum nvlist_parse_json_flags {
+ NVJSON_FORCE_INTEGER = 0x01,
+ NVJSON_FORCE_DOUBLE = 0x02,
+ NVJSON_ERRORS_TO_STDERR = 0x04,
+ NVJSON_DEBUG = 0x08
+} nvlist_parse_json_flags_t;
+
+typedef struct nvlist_parse_json_error {
+ int nje_errno;
+ long nje_pos;
+ char nje_message[512];
+} nvlist_parse_json_error_t;
+
+#define NVJSON_ALL \
+ (NVJSON_FORCE_INTEGER | \
+ NVJSON_FORCE_DOUBLE | \
+ NVJSON_ERRORS_TO_STDERR | \
+ NVJSON_DEBUG)
+
+extern int nvlist_parse_json(const char *, size_t, nvlist_t **,
+ nvlist_parse_json_flags_t, nvlist_parse_json_error_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBVARPD_FILES_JSON_H */
diff --git a/usr/src/lib/varpd/files/common/llib-lvarpd_files b/usr/src/lib/varpd/files/common/llib-lvarpd_files
new file mode 100644
index 0000000000..03c34f4fcb
--- /dev/null
+++ b/usr/src/lib/varpd/files/common/llib-lvarpd_files
@@ -0,0 +1,18 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/* LINTLIBRARY */
+/* PROTOLIB1 */
+
diff --git a/usr/src/lib/varpd/files/common/mapfile-vers b/usr/src/lib/varpd/files/common/mapfile-vers
new file mode 100644
index 0000000000..6b7c5a5067
--- /dev/null
+++ b/usr/src/lib/varpd/files/common/mapfile-vers
@@ -0,0 +1,35 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION SUNWprivate {
+ local:
+ *;
+};
diff --git a/usr/src/lib/varpd/files/i386/Makefile b/usr/src/lib/varpd/files/i386/Makefile
new file mode 100644
index 0000000000..f2b4f63da5
--- /dev/null
+++ b/usr/src/lib/varpd/files/i386/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/varpd/files/sparc/Makefile b/usr/src/lib/varpd/files/sparc/Makefile
new file mode 100644
index 0000000000..f2b4f63da5
--- /dev/null
+++ b/usr/src/lib/varpd/files/sparc/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/varpd/files/sparcv9/Makefile b/usr/src/lib/varpd/files/sparcv9/Makefile
new file mode 100644
index 0000000000..d552642882
--- /dev/null
+++ b/usr/src/lib/varpd/files/sparcv9/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+include ../Makefile.com
+include ../../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/varpd/libvarpd/Makefile b/usr/src/lib/varpd/libvarpd/Makefile
new file mode 100644
index 0000000000..2a4f8f070c
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/Makefile
@@ -0,0 +1,55 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+include ../../Makefile.lib
+
+HDRS = libvarpd.h libvarpd_client.h libvarpd_provider.h
+HDRDIR = common
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+TYPECHECK_LIB = libvarpd.so.1
+TYPELIST = \
+ varpd_client_instance_arg_t \
+ varpd_client_nprops_arg_t \
+ varpd_client_propinfo_arg_t \
+ varpd_client_eresp_t \
+ varpd_persist_header_t \
+ overlay_targ_cache_entry_t \
+ overlay_targ_cache_t \
+ overlay_targ_cache_iter_t
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+install := TARGET = install
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber lint: $(SUBDIRS)
+
+install: $(SUBDIRS) $(VARPD_MAPFILES) install_h
+
+install_h: $(ROOTHDRS)
+
+check: $(CHECKHDRS) $(TYPECHECK)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../../Makefile.targ
diff --git a/usr/src/lib/varpd/libvarpd/Makefile.com b/usr/src/lib/varpd/libvarpd/Makefile.com
new file mode 100644
index 0000000000..1521f83f8f
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/Makefile.com
@@ -0,0 +1,53 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+LIBRARY = libvarpd.a
+VERS = .1
+OBJECTS = libvarpd.o \
+ libvarpd_arp.o \
+ libvarpd_client.o \
+ libvarpd_door.o \
+ libvarpd_overlay.o \
+ libvarpd_panic.o \
+ libvarpd_persist.o \
+ libvarpd_prop.o \
+ libvarpd_plugin.o \
+ libvarpd_util.o
+
+include ../../../Makefile.lib
+
+LIBS = $(DYNLIB) $(LINTLIB)
+LDLIBS += -lc -lavl -lumem -lidspace -lnvpair -lmd5 -lrename \
+ -lbunyan
+CPPFLAGS += -I../common
+
+CERRWARN += -erroff=E_STRUCT_DERIVED_FROM_FLEX_MBR
+LINTFLAGS += -erroff=E_STRUCT_DERIVED_FROM_FLEX_MBR \
+ -erroff=E_BAD_PTR_CAST_ALIGN
+LINTFLAGS64 += -erroff=E_STRUCT_DERIVED_FROM_FLEX_MBR \
+ -erroff=E_BAD_PTR_CAST_ALIGN
+
+C99MODE= -xc99=%all
+C99LMODE= -Xc99=%all
+
+SRCDIR = ../common
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include ../../../Makefile.targ
diff --git a/usr/src/lib/varpd/libvarpd/amd64/Makefile b/usr/src/lib/varpd/libvarpd/amd64/Makefile
new file mode 100644
index 0000000000..d552642882
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/amd64/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+include ../Makefile.com
+include ../../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd.c b/usr/src/lib/varpd/libvarpd/common/libvarpd.c
new file mode 100644
index 0000000000..e4460089cc
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd.c
@@ -0,0 +1,360 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * varpd library
+ */
+
+#include <stdlib.h>
+#include <errno.h>
+#include <umem.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/avl.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <strings.h>
+
+#include <libvarpd_impl.h>
+
+static int
+libvarpd_instance_comparator(const void *lp, const void *rp)
+{
+ const varpd_instance_t *lpp, *rpp;
+ lpp = lp;
+ rpp = rp;
+
+ if (lpp->vri_id > rpp->vri_id)
+ return (1);
+ if (lpp->vri_id < rpp->vri_id)
+ return (-1);
+ return (0);
+}
+
+static int
+libvarpd_instance_lcomparator(const void *lp, const void *rp)
+{
+ const varpd_instance_t *lpp, *rpp;
+ lpp = lp;
+ rpp = rp;
+
+ if (lpp->vri_linkid > rpp->vri_linkid)
+ return (1);
+ if (lpp->vri_linkid < rpp->vri_linkid)
+ return (-1);
+ return (0);
+}
+
+int
+libvarpd_create(varpd_handle_t **vphp)
+{
+ int ret;
+ varpd_impl_t *vip;
+ char buf[32];
+
+ if (vphp == NULL)
+ return (EINVAL);
+
+ *vphp = NULL;
+ vip = umem_alloc(sizeof (varpd_impl_t), UMEM_DEFAULT);
+ if (vip == NULL)
+ return (errno);
+
+ bzero(vip, sizeof (varpd_impl_t));
+ (void) snprintf(buf, sizeof (buf), "varpd_%p", vip);
+ vip->vdi_idspace = id_space_create(buf, LIBVARPD_ID_MIN,
+ LIBVARPD_ID_MAX);
+ if (vip->vdi_idspace == NULL) {
+ int ret = errno;
+ umem_free(vip, sizeof (varpd_impl_t));
+ return (ret);
+ }
+
+ vip->vdi_qcache = umem_cache_create("query", sizeof (varpd_query_t), 0,
+ NULL, NULL, NULL, NULL, NULL, 0);
+ if (vip->vdi_qcache == NULL) {
+ int ret = errno;
+ id_space_destroy(vip->vdi_idspace);
+ umem_free(vip, sizeof (varpd_impl_t));
+ return (ret);
+ }
+
+ if ((ret = libvarpd_overlay_init(vip)) != 0) {
+ umem_cache_destroy(vip->vdi_qcache);
+ id_space_destroy(vip->vdi_idspace);
+ umem_free(vip, sizeof (varpd_impl_t));
+ return (ret);
+ }
+
+ if ((ret = bunyan_init("varpd", &vip->vdi_bunyan)) != 0) {
+ libvarpd_overlay_fini(vip);
+ umem_cache_destroy(vip->vdi_qcache);
+ id_space_destroy(vip->vdi_idspace);
+ umem_free(vip, sizeof (varpd_impl_t));
+ return (ret);
+ }
+
+ libvarpd_persist_init(vip);
+
+ avl_create(&vip->vdi_plugins, libvarpd_plugin_comparator,
+ sizeof (varpd_plugin_t), offsetof(varpd_plugin_t, vpp_node));
+
+ avl_create(&vip->vdi_instances, libvarpd_instance_comparator,
+ sizeof (varpd_instance_t), offsetof(varpd_instance_t, vri_inode));
+ avl_create(&vip->vdi_linstances, libvarpd_instance_lcomparator,
+ sizeof (varpd_instance_t), offsetof(varpd_instance_t, vri_lnode));
+
+ if (mutex_init(&vip->vdi_lock, USYNC_THREAD | LOCK_ERRORCHECK,
+ NULL) != 0)
+ libvarpd_panic("failed to create mutex: %d", errno);
+
+ vip->vdi_doorfd = -1;
+ *vphp = (varpd_handle_t *)vip;
+ return (0);
+}
+
+void
+libvarpd_destroy(varpd_handle_t *vhp)
+{
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+
+ libvarpd_overlay_lookup_quiesce(vhp);
+ if (mutex_destroy(&vip->vdi_lock) != 0)
+ libvarpd_panic("failed to destroy mutex: %d", errno);
+ libvarpd_persist_fini(vip);
+ libvarpd_overlay_fini(vip);
+ umem_cache_destroy(vip->vdi_qcache);
+ id_space_destroy(vip->vdi_idspace);
+ umem_free(vip, sizeof (varpd_impl_t));
+}
+
+int
+libvarpd_instance_create(varpd_handle_t *vhp, datalink_id_t linkid,
+ const char *pname, varpd_instance_handle_t **outp)
+{
+ int ret;
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+ varpd_plugin_t *plugin;
+ varpd_instance_t *inst, lookup;
+ overlay_plugin_dest_t dest;
+ uint64_t vid;
+
+ /*
+ * We should really have our own errnos.
+ */
+ plugin = libvarpd_plugin_lookup(vip, pname);
+ if (plugin == NULL)
+ return (ENOENT);
+
+ if ((ret = libvarpd_overlay_info(vip, linkid, &dest, NULL, &vid)) != 0)
+ return (ret);
+
+ inst = umem_alloc(sizeof (varpd_instance_t), UMEM_DEFAULT);
+ if (inst == NULL)
+ return (ENOMEM);
+
+ inst->vri_id = id_alloc(vip->vdi_idspace);
+ if (inst->vri_id == -1)
+ libvarpd_panic("failed to allocate id from vdi_idspace: %d",
+ errno);
+ inst->vri_linkid = linkid;
+ inst->vri_vnetid = vid;
+ inst->vri_mode = plugin->vpp_mode;
+ inst->vri_dest = dest;
+ inst->vri_plugin = plugin;
+ inst->vri_impl = vip;
+ inst->vri_flags = 0;
+ if ((ret = plugin->vpp_ops->vpo_create((varpd_provider_handle_t *)inst,
+ &inst->vri_private, dest)) != 0) {
+ id_free(vip->vdi_idspace, inst->vri_id);
+ umem_free(inst, sizeof (varpd_instance_t));
+ return (ret);
+ }
+
+ if (mutex_init(&inst->vri_lock, USYNC_THREAD | LOCK_ERRORCHECK,
+ NULL) != 0)
+ libvarpd_panic("failed to create mutex: %d", errno);
+
+ mutex_enter(&vip->vdi_lock);
+ lookup.vri_id = inst->vri_id;
+ if (avl_find(&vip->vdi_instances, &lookup, NULL) != NULL)
+ libvarpd_panic("found duplicate instance with id %d",
+ lookup.vri_id);
+ avl_add(&vip->vdi_instances, inst);
+ lookup.vri_linkid = inst->vri_linkid;
+ if (avl_find(&vip->vdi_linstances, &lookup, NULL) != NULL)
+ libvarpd_panic("found duplicate linstance with id %d",
+ lookup.vri_linkid);
+ avl_add(&vip->vdi_linstances, inst);
+ mutex_exit(&vip->vdi_lock);
+ *outp = (varpd_instance_handle_t *)inst;
+ return (0);
+}
+
+uint64_t
+libvarpd_instance_id(varpd_instance_handle_t *ihp)
+{
+ varpd_instance_t *inst = (varpd_instance_t *)ihp;
+ return (inst->vri_id);
+}
+
+uint64_t
+libvarpd_plugin_vnetid(varpd_provider_handle_t *vhp)
+{
+ varpd_instance_t *inst = (varpd_instance_t *)vhp;
+ return (inst->vri_vnetid);
+}
+
+varpd_instance_handle_t *
+libvarpd_instance_lookup(varpd_handle_t *vhp, uint64_t id)
+{
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+ varpd_instance_t lookup, *retp;
+
+ lookup.vri_id = id;
+ mutex_enter(&vip->vdi_lock);
+ retp = avl_find(&vip->vdi_instances, &lookup, NULL);
+ mutex_exit(&vip->vdi_lock);
+ return ((varpd_instance_handle_t *)retp);
+}
+
+/*
+ * If this function becomes external to varpd, we need to change it to return a
+ * varpd_instance_handle_t.
+ */
+varpd_instance_t *
+libvarpd_instance_lookup_by_dlid(varpd_impl_t *vip, datalink_id_t linkid)
+{
+ varpd_instance_t lookup, *retp;
+
+ lookup.vri_linkid = linkid;
+ mutex_enter(&vip->vdi_lock);
+ retp = avl_find(&vip->vdi_linstances, &lookup, NULL);
+ mutex_exit(&vip->vdi_lock);
+ return (retp);
+}
+
+/*
+ * When an instance is being destroyed, that means we should deactivate it, as
+ * well as clean it up. That means here, the proper order is calling the plug-in
+ * stop and then the destroy function.
+ */
+void
+libvarpd_instance_destroy(varpd_instance_handle_t *ihp)
+{
+ varpd_instance_t *inst = (varpd_instance_t *)ihp;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ /*
+ * First things first, remove it from global visibility.
+ */
+ mutex_enter(&vip->vdi_lock);
+ avl_remove(&vip->vdi_instances, inst);
+ avl_remove(&vip->vdi_linstances, inst);
+ mutex_exit(&vip->vdi_lock);
+
+ mutex_enter(&inst->vri_lock);
+
+ /*
+ * We need to clean up this instance, that means remove it from
+ * persistence and stopping it. Then finally we'll have to clean it up
+ * entirely.
+ */
+ if (inst->vri_flags & VARPD_INSTANCE_F_ACTIVATED) {
+ inst->vri_flags &= ~VARPD_INSTANCE_F_ACTIVATED;
+ libvarpd_torch_instance(vip, inst);
+ inst->vri_plugin->vpp_ops->vpo_stop(inst->vri_private);
+ inst->vri_plugin->vpp_ops->vpo_destroy(inst->vri_private);
+ inst->vri_private = NULL;
+ }
+ mutex_exit(&inst->vri_lock);
+
+ /* Do the full clean up of the instance */
+ if (mutex_destroy(&inst->vri_lock) != 0)
+ libvarpd_panic("failed to destroy instance vri_lock");
+ id_free(vip->vdi_idspace, inst->vri_id);
+ umem_free(inst, sizeof (varpd_instance_t));
+}
+
+int
+libvarpd_instance_activate(varpd_instance_handle_t *ihp)
+{
+ int ret;
+ varpd_instance_t *inst = (varpd_instance_t *)ihp;
+
+ mutex_enter(&inst->vri_lock);
+
+ if (inst->vri_flags & VARPD_INSTANCE_F_ACTIVATED) {
+ ret = EEXIST;
+ goto out;
+ }
+
+ if ((ret = inst->vri_plugin->vpp_ops->vpo_start(inst->vri_private)) !=
+ 0)
+ goto out;
+
+ if ((ret = libvarpd_persist_instance(inst->vri_impl, inst)) != 0)
+ goto out;
+
+ /*
+ * If this fails, we don't need to call stop, as the caller should end
+ * up calling destroy on the instance, which takes care of calling stop
+ * and destroy.
+ */
+ if ((ret = libvarpd_overlay_associate(inst)) != 0)
+ goto out;
+
+ inst->vri_flags |= VARPD_INSTANCE_F_ACTIVATED;
+
+out:
+ mutex_exit(&inst->vri_lock);
+ return (ret);
+}
+
+const bunyan_logger_t *
+libvarpd_plugin_bunyan(varpd_provider_handle_t *vhp)
+{
+ varpd_instance_t *inst = (varpd_instance_t *)vhp;
+ return (inst->vri_impl->vdi_bunyan);
+}
+
+static void
+libvarpd_prefork(void)
+{
+ libvarpd_plugin_prefork();
+}
+
+static void
+libvarpd_postfork(void)
+{
+ libvarpd_plugin_postfork();
+}
+
+#pragma init(libvarpd_init)
+static void
+libvarpd_init(void)
+{
+ libvarpd_plugin_init();
+ if (pthread_atfork(libvarpd_prefork, libvarpd_postfork,
+ libvarpd_postfork) != 0)
+ libvarpd_panic("failed to create varpd atfork: %d", errno);
+}
+
+#pragma fini(libvarpd_fini)
+static void
+libvarpd_fini(void)
+{
+ libvarpd_plugin_fini();
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd.h b/usr/src/lib/varpd/libvarpd/common/libvarpd.h
new file mode 100644
index 0000000000..aaf78e2ca6
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd.h
@@ -0,0 +1,77 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _LIBVARPD_H
+#define _LIBVARPD_H
+
+/*
+ * varpd interfaces
+ */
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <sys/mac.h>
+#include <libvarpd_client.h>
+#include <stdio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct __varpd_handle varpd_handle_t;
+typedef struct __varpd_prop_handle varpd_prop_handle_t;
+typedef struct __varpd_instance_handle varpd_instance_handle_t;
+
+extern int libvarpd_create(varpd_handle_t **);
+extern void libvarpd_destroy(varpd_handle_t *);
+
+extern int libvarpd_persist_enable(varpd_handle_t *, const char *);
+extern int libvarpd_persist_restore(varpd_handle_t *);
+extern int libvarpd_persist_disable(varpd_handle_t *);
+
+extern int libvarpd_instance_create(varpd_handle_t *, datalink_id_t,
+ const char *, varpd_instance_handle_t **);
+extern uint64_t libvarpd_instance_id(varpd_instance_handle_t *);
+extern varpd_instance_handle_t *libvarpd_instance_lookup(varpd_handle_t *,
+ uint64_t);
+extern void libvarpd_instance_destroy(varpd_instance_handle_t *);
+extern int libvarpd_instance_activate(varpd_instance_handle_t *);
+
+extern int libvarpd_plugin_load(varpd_handle_t *, const char *);
+typedef int (*libvarpd_plugin_walk_f)(varpd_handle_t *, const char *, void *);
+extern int libvarpd_plugin_walk(varpd_handle_t *, libvarpd_plugin_walk_f,
+ void *);
+
+extern int libvarpd_prop_handle_alloc(varpd_handle_t *,
+ varpd_instance_handle_t *, varpd_prop_handle_t **);
+extern void libvarpd_prop_handle_free(varpd_prop_handle_t *);
+extern int libvarpd_prop_nprops(varpd_instance_handle_t *, uint_t *);
+extern int libvarpd_prop_info_fill(varpd_prop_handle_t *, uint_t);
+extern int libvarpd_prop_info(varpd_prop_handle_t *, const char **, uint_t *,
+ uint_t *, const void **, uint32_t *, const mac_propval_range_t **);
+extern int libvarpd_prop_get(varpd_prop_handle_t *, void *, uint32_t *);
+extern int libvarpd_prop_set(varpd_prop_handle_t *, const void *, uint32_t);
+
+extern int libvarpd_door_server_create(varpd_handle_t *, const char *);
+extern void libvarpd_door_server_destroy(varpd_handle_t *);
+
+extern void libvarpd_overlay_lookup_run(varpd_handle_t *);
+extern void libvarpd_overlay_lookup_quiesce(varpd_handle_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBVARPD_H */
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_arp.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_arp.c
new file mode 100644
index 0000000000..df69207fe0
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_arp.c
@@ -0,0 +1,650 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * Common routines for implmeenting proxy arp
+ */
+
+#include <sys/types.h>
+#include <net/if.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <netinet/udp.h>
+#include <netinet/dhcp.h>
+#include <libvarpd_impl.h>
+#include <sys/vlan.h>
+#include <strings.h>
+#include <assert.h>
+
+#define IPV6_VERSION 6
+
+typedef struct varpd_arp_query {
+ int vaq_type;
+ char vaq_buf[ETHERMAX + VLAN_TAGSZ];
+ size_t vaq_bsize;
+ uint8_t vaq_lookup[ETHERADDRL];
+ struct sockaddr_storage vaq_sock;
+ varpd_instance_t *vaq_inst;
+ struct ether_arp *vaq_ea;
+ varpd_query_handle_t *vaq_query;
+ const overlay_targ_lookup_t *vaq_otl;
+ ip6_t *vaq_ip6;
+ nd_neighbor_solicit_t *vaq_ns;
+} varpd_arp_query_t;
+
+typedef struct varpd_dhcp_query {
+ char vdq_buf[ETHERMAX + VLAN_TAGSZ];
+ size_t vdq_bsize;
+ uint8_t vdq_lookup[ETHERADDRL];
+ const overlay_targ_lookup_t *vdq_otl;
+ varpd_instance_t *vdq_inst;
+ varpd_query_handle_t *vdq_query;
+ struct ether_header *vdq_ether;
+} varpd_dhcp_query_t;
+
+static const uint8_t libvarpd_arp_bcast[6] = { 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff };
+
+void
+libvarpd_plugin_proxy_arp(varpd_provider_handle_t *hdl,
+ varpd_query_handle_t *vqh, const overlay_targ_lookup_t *otl)
+{
+ varpd_arp_query_t *vaq;
+ varpd_instance_t *inst = (varpd_instance_t *)hdl;
+ struct ether_arp *ea;
+ struct sockaddr_in *ip;
+
+ vaq = umem_alloc(sizeof (varpd_arp_query_t), UMEM_DEFAULT);
+ if (vaq == NULL) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ return;
+ }
+ vaq->vaq_bsize = sizeof (vaq->vaq_buf);
+
+ if (otl->otl_sap != ETHERTYPE_ARP) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ /*
+ * An ARP packet should not be very large because it's definited to only
+ * be allowed to have a single entry at a given time. But our data must
+ * be at least as large as an ether_arp and our header must be at least
+ * as large as a standard ethernet header.
+ */
+ if (otl->otl_hdrsize + otl->otl_pktsize > vaq->vaq_bsize ||
+ otl->otl_pktsize < sizeof (struct ether_arp) ||
+ otl->otl_hdrsize < sizeof (struct ether_header)) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ if (libvarpd_overlay_packet(inst->vri_impl, otl, vaq->vaq_buf,
+ &vaq->vaq_bsize) != 0) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ if (otl->otl_hdrsize + otl->otl_pktsize < vaq->vaq_bsize) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ ea = (void *)((uintptr_t)vaq->vaq_buf + (uintptr_t)otl->otl_hdrsize);
+
+ /*
+ * Make sure it matches something that we know about.
+ */
+ if (ntohs(ea->ea_hdr.ar_hrd) != ARPHRD_ETHER ||
+ ntohs(ea->ea_hdr.ar_pro) != ETHERTYPE_IP ||
+ ea->ea_hdr.ar_hln != ETHERADDRL ||
+ ea->ea_hdr.ar_pln != sizeof (ea->arp_spa) ||
+ ntohs(ea->ea_hdr.ar_op) != ARPOP_REQUEST) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ /*
+ * Now that we've verified that our data is sane, see if we're doing a
+ * gratuitous arp and if so, drop it. Otherwise, we may end up
+ * triggering duplicate address detection.
+ */
+ if (bcmp(ea->arp_spa, ea->arp_tpa, sizeof (ea->arp_spa)) == 0) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ bzero(&vaq->vaq_sock, sizeof (struct sockaddr_storage));
+ ip = (struct sockaddr_in *)&vaq->vaq_sock;
+ ip->sin_family = AF_INET;
+ bcopy(ea->arp_tpa, &ip->sin_addr, sizeof (ea->arp_tpa));
+
+ vaq->vaq_type = AF_INET;
+ vaq->vaq_inst = inst;
+ vaq->vaq_ea = ea;
+ vaq->vaq_query = vqh;
+ vaq->vaq_otl = otl;
+
+ if (inst->vri_plugin->vpp_ops->vpo_arp == NULL)
+ libvarpd_panic("%s plugin asked to do arp, but has no method",
+ inst->vri_plugin->vpp_name);
+
+ inst->vri_plugin->vpp_ops->vpo_arp(inst->vri_private,
+ (varpd_arp_handle_t *)vaq, VARPD_QTYPE_ETHERNET,
+ (struct sockaddr *)ip, vaq->vaq_lookup);
+}
+
+static void
+libvarpd_proxy_arp_fini(varpd_arp_query_t *vaq)
+{
+ struct ether_header *ether;
+ struct sockaddr_in *ip;
+
+ ip = (struct sockaddr_in *)&vaq->vaq_sock;
+ /*
+ * Modify our packet in place for a reply. We need to swap around the
+ * sender and target addresses.
+ */
+ vaq->vaq_ea->ea_hdr.ar_op = htons(ARPOP_REPLY);
+ bcopy(vaq->vaq_ea->arp_sha, vaq->vaq_ea->arp_tha, ETHERADDRL);
+ bcopy(vaq->vaq_lookup, vaq->vaq_ea->arp_sha, ETHERADDRL);
+ bcopy(vaq->vaq_ea->arp_spa, &ip->sin_addr,
+ sizeof (vaq->vaq_ea->arp_spa));
+ bcopy(vaq->vaq_ea->arp_tpa, vaq->vaq_ea->arp_spa,
+ sizeof (vaq->vaq_ea->arp_spa));
+ bcopy(&ip->sin_addr, vaq->vaq_ea->arp_tpa,
+ sizeof (vaq->vaq_ea->arp_spa));
+
+ /*
+ * Finally go ahead and fix up the mac header and reply to the sender
+ * explicitly.
+ */
+ ether = (struct ether_header *)vaq->vaq_buf;
+ bcopy(&ether->ether_shost, &ether->ether_dhost, ETHERADDRL);
+ bcopy(vaq->vaq_lookup, &ether->ether_shost, ETHERADDRL);
+
+ (void) libvarpd_overlay_inject(vaq->vaq_inst->vri_impl, vaq->vaq_otl,
+ vaq->vaq_buf, vaq->vaq_bsize);
+
+ libvarpd_plugin_query_reply(vaq->vaq_query, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+}
+
+static uint16_t
+libvarpd_icmpv6_checksum(const ip6_t *v6hdr, const uint16_t *buf, uint16_t mlen)
+{
+ int i;
+ uint16_t *v;
+ uint32_t sum = 0;
+
+ assert(mlen % 2 == 0);
+ v = (uint16_t *)&v6hdr->ip6_src;
+ for (i = 0; i < sizeof (struct in6_addr); i += 2, v++)
+ sum += *v;
+ v = (uint16_t *)&v6hdr->ip6_dst;
+ for (i = 0; i < sizeof (struct in6_addr); i += 2, v++)
+ sum += *v;
+ sum += htons(mlen);
+#ifdef _BIG_ENDIAN
+ sum += IPPROTO_ICMPV6;
+#else
+ sum += IPPROTO_ICMPV6 << 8;
+#endif /* _BIG_ENDIAN */
+
+ for (i = 0; i < mlen; i += 2, buf++)
+ sum += *buf;
+
+ while ((sum >> 16) != 0)
+ sum = (sum & 0xffff) + (sum >> 16);
+
+ return (sum & 0xffff);
+}
+
+/*
+ * Proxying NDP is much more involved than proxying ARP. For starters, NDP
+ * neighbor solicitations are implemented in terms of IPv6 ICMP as opposed to
+ * its own Ethertype. Therefore, we're going to have to grab a packet if it's a
+ * multicast packet and then determine if we actually want to do anything with
+ * it.
+ */
+void
+libvarpd_plugin_proxy_ndp(varpd_provider_handle_t *hdl,
+ varpd_query_handle_t *vqh, const overlay_targ_lookup_t *otl)
+{
+ size_t bsize, plen;
+ varpd_arp_query_t *vaq;
+ ip6_t *v6hdr;
+ nd_neighbor_solicit_t *ns;
+ nd_opt_hdr_t *opt;
+ struct sockaddr_in6 *s6;
+
+ varpd_instance_t *inst = (varpd_instance_t *)hdl;
+ uint8_t *eth = NULL;
+
+ vaq = umem_alloc(sizeof (varpd_arp_query_t), UMEM_DEFAULT);
+ if (vaq == NULL) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ return;
+ }
+ vaq->vaq_bsize = sizeof (vaq->vaq_buf);
+
+ if (otl->otl_dstaddr[0] != 0x33 ||
+ otl->otl_dstaddr[1] != 0x33) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ /*
+ * If we have more than a standard frame size for the ICMP neighbor
+ * solicitation, drop it. Similarly if there isn't enough data present
+ * for us, drop it.
+ */
+ if (otl->otl_hdrsize + otl->otl_pktsize > vaq->vaq_bsize) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ if (otl->otl_pktsize < sizeof (ip6_t) +
+ sizeof (nd_neighbor_solicit_t)) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ if (libvarpd_overlay_packet(inst->vri_impl, otl, vaq->vaq_buf,
+ &vaq->vaq_bsize) != 0) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ bsize = vaq->vaq_bsize;
+ bsize -= otl->otl_hdrsize;
+ assert(bsize > sizeof (ip6_t));
+
+ v6hdr = (ip6_t *)(vaq->vaq_buf + otl->otl_hdrsize);
+ if (((v6hdr->ip6_vfc & 0xf0) >> 4) != IPV6_VERSION) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ if (v6hdr->ip6_nxt != IPPROTO_ICMPV6) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ /*
+ * In addition to getting these requests on the multicast address for
+ * node solicitation, we may also end up getting them on a generic
+ * multicast address due to timeouts or other choices by various OSes.
+ * We should fairly liberal and accept both, even though the standard
+ * wants them to a solicitation address.
+ */
+ if (!IN6_IS_ADDR_MC_SOLICITEDNODE(&v6hdr->ip6_dst) &&
+ !IN6_IS_ADDR_MC_LINKLOCAL(&v6hdr->ip6_dst)) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ bsize -= sizeof (ip6_t);
+ plen = ntohs(v6hdr->ip6_plen);
+ if (bsize < plen) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ /*
+ * Now we know that this is an ICMPv6 request targetting the right
+ * IPv6 multicast prefix. Let's go through and verify that ICMPv6
+ * indicates that we have the real thing and ensure that per RFC 4861
+ * the target address is not a multicast address. Further, because this
+ * is a multicast on Ethernet, we must have a source link-layer address.
+ *
+ * We should probably enforce that we have a valid ICMP checksum at some
+ * point.
+ */
+ ns = (nd_neighbor_solicit_t *)(vaq->vaq_buf + otl->otl_hdrsize +
+ sizeof (ip6_t));
+ if (ns->nd_ns_type != ND_NEIGHBOR_SOLICIT && ns->nd_ns_code != 0) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ if (IN6_IS_ADDR_MULTICAST(&ns->nd_ns_target) ||
+ IN6_IS_ADDR_V4MAPPED(&ns->nd_ns_target) ||
+ IN6_IS_ADDR_LOOPBACK(&ns->nd_ns_target)) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ plen -= sizeof (nd_neighbor_solicit_t);
+ opt = (nd_opt_hdr_t *)(ns+1);
+ while (plen >= sizeof (struct nd_opt_hdr)) {
+ /* If we have an option with no lenght, that's clear bogus */
+ if (opt->nd_opt_len == 0) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ if (opt->nd_opt_type == ND_OPT_SOURCE_LINKADDR) {
+ eth = (uint8_t *)((uintptr_t)opt +
+ sizeof (nd_opt_hdr_t));
+ }
+ plen -= opt->nd_opt_len * 8;
+ opt = (nd_opt_hdr_t *)((uintptr_t)opt +
+ opt->nd_opt_len * 8);
+ }
+
+ if (eth == NULL) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ }
+
+ bzero(&vaq->vaq_sock, sizeof (struct sockaddr_storage));
+ s6 = (struct sockaddr_in6 *)&vaq->vaq_sock;
+ s6->sin6_family = AF_INET6;
+ bcopy(&ns->nd_ns_target, &s6->sin6_addr, sizeof (s6->sin6_addr));
+
+ if (inst->vri_plugin->vpp_ops->vpo_arp == NULL)
+ libvarpd_panic("%s plugin asked to do arp, but has no method",
+ inst->vri_plugin->vpp_name);
+
+ vaq->vaq_type = AF_INET6;
+ vaq->vaq_inst = inst;
+ vaq->vaq_ea = NULL;
+ vaq->vaq_query = vqh;
+ vaq->vaq_otl = otl;
+ vaq->vaq_ns = ns;
+ vaq->vaq_ip6 = v6hdr;
+ inst->vri_plugin->vpp_ops->vpo_arp(inst->vri_private,
+ (varpd_arp_handle_t *)vaq, VARPD_QTYPE_ETHERNET,
+ (struct sockaddr *)s6, vaq->vaq_lookup);
+}
+
+static void
+libvarpd_proxy_ndp_fini(varpd_arp_query_t *vaq)
+{
+ char resp[ETHERMAX + VLAN_TAGSZ];
+ struct ether_header *ether;
+ nd_neighbor_advert_t *na;
+ nd_opt_hdr_t *opt;
+ ip6_t *v6hdr;
+ size_t roff = 0;
+
+ /*
+ * Now we need to assemble an RA as a response. Unlike with arp, we opt
+ * to use a new packet just to make things a bit simpler saner here.
+ */
+ v6hdr = vaq->vaq_ip6;
+ bcopy(vaq->vaq_buf, resp, vaq->vaq_otl->otl_hdrsize);
+ ether = (struct ether_header *)resp;
+ bcopy(&ether->ether_shost, &ether->ether_dhost, ETHERADDRL);
+ bcopy(vaq->vaq_lookup, &ether->ether_shost, ETHERADDRL);
+ roff += vaq->vaq_otl->otl_hdrsize;
+ bcopy(v6hdr, resp + roff, sizeof (ip6_t));
+ v6hdr = (ip6_t *)(resp + roff);
+ bcopy(&v6hdr->ip6_src, &v6hdr->ip6_dst, sizeof (struct in6_addr));
+ bcopy(&vaq->vaq_ns->nd_ns_target, &v6hdr->ip6_src,
+ sizeof (struct in6_addr));
+ roff += sizeof (ip6_t);
+ na = (nd_neighbor_advert_t *)(resp + roff);
+ na->nd_na_type = ND_NEIGHBOR_ADVERT;
+ na->nd_na_code = 0;
+ /*
+ * RFC 4443 defines that we should set the checksum to zero before we
+ * calculate the checksumat we should set the checksum to zero before we
+ * calculate it.
+ */
+ na->nd_na_cksum = 0;
+ /*
+ * Nota bene, the header <netinet/icmp6.h> has already transformed this
+ * into the appropriate host order. Don't use htonl.
+ */
+ na->nd_na_flags_reserved = ND_NA_FLAG_SOLICITED | ND_NA_FLAG_OVERRIDE;
+ bcopy(&vaq->vaq_ns->nd_ns_target, &na->nd_na_target,
+ sizeof (struct in6_addr));
+ roff += sizeof (nd_neighbor_advert_t);
+
+ opt = (nd_opt_hdr_t *)(resp + roff);
+ opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
+ opt->nd_opt_len = 1;
+ roff += sizeof (nd_opt_hdr_t);
+ bcopy(vaq->vaq_lookup, resp + roff, ETHERADDRL);
+ roff += ETHERADDRL;
+
+ /*
+ * Now that we've filled in the packet, go back and compute the checksum
+ * and fill in the IPv6 payload size.
+ */
+ v6hdr->ip6_plen = htons(roff - sizeof (ip6_t) -
+ vaq->vaq_otl->otl_hdrsize);
+ na->nd_na_cksum = ~libvarpd_icmpv6_checksum(v6hdr, (uint16_t *)na,
+ ntohs(v6hdr->ip6_plen)) & 0xffff;
+
+ (void) libvarpd_overlay_inject(vaq->vaq_inst->vri_impl, vaq->vaq_otl,
+ resp, roff);
+
+ libvarpd_plugin_query_reply(vaq->vaq_query, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+}
+
+void
+libvarpd_plugin_arp_reply(varpd_arp_handle_t *vah, int action)
+{
+ varpd_arp_query_t *vaq = (varpd_arp_query_t *)vah;
+
+ if (vaq == NULL)
+ libvarpd_panic("unknown plugin passed invalid "
+ "varpd_arp_handle_t");
+
+ if (action == VARPD_LOOKUP_DROP) {
+ libvarpd_plugin_query_reply(vaq->vaq_query, VARPD_LOOKUP_DROP);
+ umem_free(vaq, sizeof (varpd_arp_query_t));
+ return;
+ } else if (action != VARPD_LOOKUP_OK)
+ libvarpd_panic("%s plugin returned invalid action %d",
+ vaq->vaq_inst->vri_plugin->vpp_name, action);
+
+ switch (vaq->vaq_type) {
+ case AF_INET:
+ libvarpd_proxy_arp_fini(vaq);
+ break;
+ case AF_INET6:
+ libvarpd_proxy_ndp_fini(vaq);
+ break;
+ default:
+ libvarpd_panic("encountered unknown vaq_type: %d",
+ vaq->vaq_type);
+ }
+}
+
+void
+libvarpd_plugin_proxy_dhcp(varpd_provider_handle_t *hdl,
+ varpd_query_handle_t *vqh, const overlay_targ_lookup_t *otl)
+{
+ varpd_dhcp_query_t *vdq;
+ struct ether_header *ether;
+ struct ip *ip;
+ struct udphdr *udp;
+ varpd_instance_t *inst = (varpd_instance_t *)hdl;
+
+ vdq = umem_alloc(sizeof (varpd_dhcp_query_t), UMEM_DEFAULT);
+ if (vdq == NULL) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ return;
+ }
+ vdq->vdq_bsize = sizeof (vdq->vdq_buf);
+
+ if (otl->otl_sap != ETHERTYPE_IP) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+ return;
+ }
+
+ if (bcmp(otl->otl_dstaddr, libvarpd_arp_bcast, ETHERADDRL) != 0) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+ return;
+ }
+
+ if (otl->otl_hdrsize + otl->otl_pktsize > vdq->vdq_bsize ||
+ otl->otl_pktsize < sizeof (struct ip) + sizeof (struct udphdr) +
+ sizeof (struct dhcp) ||
+ otl->otl_hdrsize < sizeof (struct ether_header)) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+ return;
+ }
+
+ if (libvarpd_overlay_packet(inst->vri_impl, otl, vdq->vdq_buf,
+ &vdq->vdq_bsize) != 0) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+ return;
+ }
+
+ if (vdq->vdq_bsize != otl->otl_hdrsize + otl->otl_pktsize) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+ return;
+ }
+
+ ether = (struct ether_header *)vdq->vdq_buf;
+ ip = (struct ip *)(vdq->vdq_buf + otl->otl_hdrsize);
+
+ if (ip->ip_v != IPVERSION && ip->ip_p != IPPROTO_UDP) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+ return;
+ }
+
+ if (otl->otl_hdrsize + ip->ip_hl * 4 + sizeof (struct udphdr) >
+ vdq->vdq_bsize) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+ return;
+ }
+
+ udp = (struct udphdr *)(vdq->vdq_buf + otl->otl_hdrsize +
+ ip->ip_hl * 4);
+
+ if (ntohs(udp->uh_sport) != IPPORT_BOOTPC ||
+ ntohs(udp->uh_dport) != IPPORT_BOOTPS) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+ return;
+ }
+
+ vdq->vdq_ether = ether;
+ vdq->vdq_inst = inst;
+ vdq->vdq_query = vqh;
+ vdq->vdq_otl = otl;
+
+ if (inst->vri_plugin->vpp_ops->vpo_dhcp == NULL)
+ libvarpd_panic("%s plugin asked to do dhcp, but has no method",
+ inst->vri_plugin->vpp_name);
+
+ inst->vri_plugin->vpp_ops->vpo_dhcp(inst->vri_private,
+ (varpd_dhcp_handle_t *)vdq, VARPD_QTYPE_ETHERNET, otl,
+ vdq->vdq_lookup);
+}
+
+void
+libvarpd_plugin_dhcp_reply(varpd_dhcp_handle_t *vdh, int action)
+{
+ varpd_dhcp_query_t *vdq = (varpd_dhcp_query_t *)vdh;
+
+ if (vdq == NULL)
+ libvarpd_panic("unknown plugin passed invalid "
+ "varpd_dhcp_handle_t");
+
+ if (action == VARPD_LOOKUP_DROP) {
+ libvarpd_plugin_query_reply(vdq->vdq_query, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+ return;
+ } else if (action != VARPD_LOOKUP_OK)
+ libvarpd_panic("%s plugin returned invalid action %d",
+ vdq->vdq_inst->vri_plugin->vpp_name, action);
+
+ bcopy(vdq->vdq_lookup, &vdq->vdq_ether->ether_dhost, ETHERADDRL);
+ (void) libvarpd_overlay_resend(vdq->vdq_inst->vri_impl, vdq->vdq_otl,
+ vdq->vdq_buf, vdq->vdq_bsize);
+
+ libvarpd_plugin_query_reply(vdq->vdq_query, VARPD_LOOKUP_DROP);
+ umem_free(vdq, sizeof (varpd_dhcp_query_t));
+}
+
+/*
+ * Inject a gratuitious ARP packet to the specified mac address.
+ */
+void
+libvarpd_inject_arp(varpd_provider_handle_t *vph, const uint16_t vlan,
+ const uint8_t *srcmac, const struct in_addr *srcip, const uint8_t *dstmac)
+{
+ char buf[500];
+ size_t bsize = 0;
+ struct ether_arp *ea;
+ varpd_instance_t *inst = (varpd_instance_t *)vph;
+
+ if (vlan != 0) {
+ struct ether_vlan_header *eh;
+ eh = (struct ether_vlan_header *)(buf + bsize);
+ bsize += sizeof (struct ether_vlan_header);
+ bcopy(dstmac, &eh->ether_dhost, ETHERADDRL);
+ bcopy(srcmac, &eh->ether_shost, ETHERADDRL);
+ eh->ether_tpid = htons(ETHERTYPE_VLAN);
+ eh->ether_tci = htons(VLAN_TCI(0, ETHER_CFI, vlan));
+ eh->ether_type = htons(ETHERTYPE_ARP);
+ } else {
+ struct ether_header *eh;
+ eh = (struct ether_header *)(buf + bsize);
+ bsize += sizeof (struct ether_header);
+ bcopy(dstmac, &eh->ether_dhost, ETHERADDRL);
+ bcopy(srcmac, &eh->ether_shost, ETHERADDRL);
+ eh->ether_type = htons(ETHERTYPE_ARP);
+ }
+
+ ea = (struct ether_arp *)(buf + bsize);
+ bsize += sizeof (struct ether_arp);
+ ea->ea_hdr.ar_hrd = htons(ARPHRD_ETHER);
+ ea->ea_hdr.ar_pro = htons(ETHERTYPE_IP);
+ ea->ea_hdr.ar_hln = ETHERADDRL;
+ ea->ea_hdr.ar_pln = sizeof (struct in_addr);
+ ea->ea_hdr.ar_op = htons(ARPOP_REQUEST);
+ bcopy(srcmac, ea->arp_sha, ETHERADDRL);
+ bcopy(srcip, ea->arp_spa, sizeof (struct in_addr));
+ bcopy(libvarpd_arp_bcast, ea->arp_tha, ETHERADDRL);
+ bcopy(srcip, ea->arp_tpa, sizeof (struct in_addr));
+
+ (void) libvarpd_overlay_instance_inject(inst, buf, bsize);
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_client.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_client.c
new file mode 100644
index 0000000000..b0fa907386
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_client.c
@@ -0,0 +1,626 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * varpd client interfaces
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <umem.h>
+#include <unistd.h>
+#include <string.h>
+#include <strings.h>
+#include <door.h>
+
+#include <libvarpd_impl.h>
+
+typedef struct varpd_client {
+ int vcl_doorfd;
+} varpd_client_t;
+
+typedef struct varpd_client_prop_info {
+ varpd_client_t *vcprop_client;
+ uint64_t vcprop_instance;
+ uint_t vcprop_propid;
+ uint_t vcprop_type;
+ uint_t vcprop_prot;
+ uint32_t vcprop_defsize;
+ uint32_t vcprop_psize;
+ char vcprop_name[LIBVARPD_PROP_NAMELEN];
+ uint8_t vcprop_default[LIBVARPD_PROP_SIZEMAX];
+ uint8_t vcprop_poss[LIBVARPD_PROP_SIZEMAX];
+} varpd_client_prop_info_t;
+
+static int
+libvarpd_c_door_call(varpd_client_t *client, varpd_client_arg_t *argp,
+ size_t altsize)
+{
+ int ret;
+ door_arg_t darg;
+
+ darg.data_ptr = (char *)argp;
+ darg.desc_ptr = NULL;
+ darg.desc_num = 0;
+ darg.rbuf = (char *)argp;
+ if (altsize != 0) {
+ darg.data_size = altsize;
+ darg.rsize = altsize;
+ } else {
+ darg.data_size = sizeof (varpd_client_arg_t);
+ darg.rsize = sizeof (varpd_client_arg_t);
+ }
+
+ do {
+ ret = door_call(client->vcl_doorfd, &darg);
+ } while (ret != 0 && errno == EINTR);
+ if (ret != 0) {
+ switch (errno) {
+ case E2BIG:
+ case EFAULT:
+ case EINVAL:
+ case ENOTSUP:
+ case EOVERFLOW:
+ case ENFILE:
+ libvarpd_panic("unhandalable errno from door_call: %d",
+ errno);
+ }
+ ret = errno;
+ }
+
+ return (ret);
+}
+
+int
+libvarpd_c_create(varpd_client_handle_t **chpp, const char *doorname)
+{
+ varpd_client_t *client;
+
+ client = umem_alloc(sizeof (varpd_client_t), UMEM_DEFAULT);
+ if (client == NULL)
+ return (ENOMEM);
+
+ client->vcl_doorfd = open(doorname, O_RDWR);
+ if (client->vcl_doorfd < 0) {
+ int ret = errno;
+ umem_free(client, sizeof (varpd_client_t));
+ return (ret);
+ }
+
+ *chpp = (varpd_client_handle_t *)client;
+ return (0);
+}
+
+void
+libvarpd_c_destroy(varpd_client_handle_t *chp)
+{
+ varpd_client_t *client = (varpd_client_t *)chp;
+ if (close(client->vcl_doorfd) != 0)
+ libvarpd_panic("failed to close door fd %d: %d",
+ client->vcl_doorfd, errno);
+
+ umem_free(chp, sizeof (varpd_client_handle_t *));
+}
+
+int
+libvarpd_c_instance_create(varpd_client_handle_t *chp, datalink_id_t linkid,
+ const char *search, uint64_t *cidp)
+{
+ int ret;
+ varpd_client_t *client = (varpd_client_t *)chp;
+ varpd_client_arg_t carg;
+ varpd_client_create_arg_t *cap = &carg.vca_un.vca_create;
+
+ if (strlen(search) >= LIBVARPD_PROP_NAMELEN)
+ return (EINVAL);
+ carg.vca_command = VARPD_CLIENT_CREATE;
+ carg.vca_errno = 0;
+ cap->vcca_linkid = linkid;
+ (void) strlcpy(cap->vcca_plugin, search, LIBVARPD_PROP_NAMELEN);
+
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ *cidp = cap->vcca_id;
+
+ return (0);
+}
+
+int
+libvarpd_c_instance_activate(varpd_client_handle_t *chp, uint64_t cid)
+{
+ int ret;
+ varpd_client_t *client = (varpd_client_t *)chp;
+ varpd_client_arg_t carg;
+ varpd_client_instance_arg_t *vciap = &carg.vca_un.vca_instance;
+
+ carg.vca_command = VARPD_CLIENT_ACTIVATE;
+ carg.vca_errno = 0;
+ vciap->vcia_id = cid;
+
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ return (0);
+}
+
+int
+libvarpd_c_instance_destroy(varpd_client_handle_t *chp, uint64_t cid)
+{
+ int ret;
+ varpd_client_t *client = (varpd_client_t *)chp;
+ varpd_client_arg_t carg;
+ varpd_client_instance_arg_t *vciap = &carg.vca_un.vca_instance;
+
+ carg.vca_command = VARPD_CLIENT_DESTROY;
+ carg.vca_errno = 0;
+ vciap->vcia_id = cid;
+
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ return (0);
+}
+
+int
+libvarpd_c_prop_nprops(varpd_client_handle_t *chp, uint64_t cid, uint_t *nprops)
+{
+ int ret;
+ varpd_client_t *client = (varpd_client_t *)chp;
+ varpd_client_arg_t carg;
+ varpd_client_nprops_arg_t *vcnap = &carg.vca_un.vca_nprops;
+
+ carg.vca_command = VARPD_CLIENT_NPROPS;
+ carg.vca_errno = 0;
+ vcnap->vcna_id = cid;
+ vcnap->vcna_nprops = 0;
+
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+ *nprops = vcnap->vcna_nprops;
+ return (0);
+}
+
+int
+libvarpd_c_prop_handle_alloc(varpd_client_handle_t *chp, uint64_t cid,
+ varpd_client_prop_handle_t **phdlp)
+{
+ varpd_client_prop_info_t *infop;
+
+ infop = umem_alloc(sizeof (varpd_client_prop_info_t), UMEM_DEFAULT);
+ if (infop == NULL)
+ return (ENOMEM);
+
+ bzero(infop, sizeof (varpd_client_prop_info_t));
+ infop->vcprop_client = (varpd_client_t *)chp;
+ infop->vcprop_instance = cid;
+ infop->vcprop_propid = UINT_MAX;
+ *phdlp = (varpd_client_prop_handle_t *)infop;
+ return (0);
+}
+
+void
+libvarpd_c_prop_handle_free(varpd_client_prop_handle_t *phdl)
+{
+ umem_free(phdl, sizeof (varpd_client_prop_info_t));
+ phdl = NULL;
+}
+
+static void
+libvarpd_c_prop_info_from_door(varpd_client_prop_info_t *infop,
+ const varpd_client_propinfo_arg_t *vcfap)
+{
+ infop->vcprop_propid = vcfap->vcfa_propid;
+ infop->vcprop_type = vcfap->vcfa_type;
+ infop->vcprop_prot = vcfap->vcfa_prot;
+ infop->vcprop_defsize = vcfap->vcfa_defsize;
+ infop->vcprop_psize = vcfap->vcfa_psize;
+ bcopy(vcfap->vcfa_name, infop->vcprop_name, LIBVARPD_PROP_NAMELEN);
+ bcopy(vcfap->vcfa_default, infop->vcprop_default,
+ LIBVARPD_PROP_SIZEMAX);
+ bcopy(vcfap->vcfa_poss, infop->vcprop_poss, LIBVARPD_PROP_SIZEMAX);
+}
+
+int
+libvarpd_c_prop_info_fill_by_name(varpd_client_prop_handle_t *phdl,
+ const char *name)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_propinfo_arg_t *vcfap = &carg.vca_un.vca_info;
+ varpd_client_prop_info_t *infop = (varpd_client_prop_info_t *)phdl;
+
+ if (strlen(name) >= LIBVARPD_PROP_NAMELEN)
+ return (EINVAL);
+ bzero(&carg, sizeof (varpd_client_arg_t));
+ carg.vca_command = VARPD_CLIENT_PROPINFO;
+ carg.vca_errno = 0;
+ vcfap->vcfa_id = infop->vcprop_instance;
+ vcfap->vcfa_propid = UINT_MAX;
+ (void) strlcpy(vcfap->vcfa_name, name, LIBVARPD_PROP_NAMELEN);
+
+ ret = libvarpd_c_door_call(infop->vcprop_client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ libvarpd_c_prop_info_from_door(infop, vcfap);
+ return (0);
+}
+
+int
+libvarpd_c_prop_info_fill(varpd_client_prop_handle_t *phdl, uint_t propid)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_propinfo_arg_t *vcfap = &carg.vca_un.vca_info;
+ varpd_client_prop_info_t *infop = (varpd_client_prop_info_t *)phdl;
+
+ bzero(&carg, sizeof (varpd_client_arg_t));
+ carg.vca_command = VARPD_CLIENT_PROPINFO;
+ carg.vca_errno = 0;
+ vcfap->vcfa_id = infop->vcprop_instance;
+ vcfap->vcfa_propid = propid;
+
+ ret = libvarpd_c_door_call(infop->vcprop_client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ libvarpd_c_prop_info_from_door(infop, vcfap);
+ return (0);
+}
+
+int
+libvarpd_c_prop_info(varpd_client_prop_handle_t *phdl, const char **namep,
+ uint_t *typep, uint_t *protp, const void **defp, uint32_t *defsizep,
+ const mac_propval_range_t **possp)
+{
+ varpd_client_prop_info_t *infop = (varpd_client_prop_info_t *)phdl;
+ if (infop->vcprop_propid == UINT_MAX)
+ return (EINVAL);
+
+ if (namep != NULL)
+ *namep = infop->vcprop_name;
+ if (typep != NULL)
+ *typep = infop->vcprop_type;
+ if (protp != NULL)
+ *protp = infop->vcprop_prot;
+ if (defp != NULL)
+ *defp = infop->vcprop_default;
+ if (defsizep != NULL)
+ *defsizep = infop->vcprop_defsize;
+ if (possp != NULL)
+ *possp = (const mac_propval_range_t *)infop->vcprop_poss;
+ return (0);
+}
+
+int
+libvarpd_c_prop_get(varpd_client_prop_handle_t *phdl, void *buf, uint32_t *len)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_prop_arg_t *vcpap = &carg.vca_un.vca_prop;
+ varpd_client_prop_info_t *infop = (varpd_client_prop_info_t *)phdl;
+
+ if (len == NULL || buf == NULL || infop->vcprop_propid == UINT_MAX)
+ return (EINVAL);
+ if (*len < LIBVARPD_PROP_SIZEMAX)
+ return (EOVERFLOW);
+
+ bzero(&carg, sizeof (varpd_client_arg_t));
+ carg.vca_command = VARPD_CLIENT_GETPROP;
+ carg.vca_errno = 0;
+ vcpap->vcpa_id = infop->vcprop_instance;
+ vcpap->vcpa_propid = infop->vcprop_propid;
+
+ ret = libvarpd_c_door_call(infop->vcprop_client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ /*
+ * If the buffer size is too large then something odd has certainly
+ * happened here, it means that varpd has gone rogue. In such a case we
+ * return a rather odd errror, though we don't believe that this should
+ * generally happen.
+ */
+ if (vcpap->vcpa_bufsize > LIBVARPD_PROP_SIZEMAX)
+ return (E2BIG);
+
+ bcopy(vcpap->vcpa_buf, buf, vcpap->vcpa_bufsize);
+ *len = vcpap->vcpa_bufsize;
+ return (0);
+}
+
+int
+libvarpd_c_prop_set(varpd_client_prop_handle_t *phdl, const void *buf,
+ uint32_t len)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_prop_arg_t *vcpap = &carg.vca_un.vca_prop;
+ varpd_client_prop_info_t *infop = (varpd_client_prop_info_t *)phdl;
+
+ if (len == NULL || buf == NULL || infop->vcprop_propid == UINT_MAX)
+ return (EINVAL);
+ if (len > LIBVARPD_PROP_SIZEMAX)
+ return (EOVERFLOW);
+
+ carg.vca_command = VARPD_CLIENT_SETPROP;
+ carg.vca_errno = 0;
+ vcpap->vcpa_id = infop->vcprop_instance;
+ vcpap->vcpa_propid = infop->vcprop_propid;
+ vcpap->vcpa_bufsize = len;
+ bcopy(buf, vcpap->vcpa_buf, len);
+
+ ret = libvarpd_c_door_call(infop->vcprop_client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ return (0);
+}
+
+int
+libvarpd_c_instance_lookup(varpd_client_handle_t *chp, datalink_id_t linkid,
+ uint64_t *instp)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_lookup_arg_t *vclap = &carg.vca_un.vca_lookup;
+ varpd_client_t *client = (varpd_client_t *)chp;
+
+ carg.vca_command = VARPD_CLIENT_LOOKUP;
+ carg.vca_errno = 0;
+ vclap->vcla_linkid = linkid;
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+ if (instp != NULL)
+ *instp = vclap->vcla_id;
+
+ return (0);
+}
+
+int
+libvarpd_c_instance_target_mode(varpd_client_handle_t *chp, uint64_t cid,
+ uint_t *dtype, uint_t *mtype)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_target_mode_arg_t *vctmap = &carg.vca_un.vca_mode;
+ varpd_client_t *client = (varpd_client_t *)chp;
+
+ carg.vca_command = VARPD_CLIENT_TARGET_MODE;
+ carg.vca_errno = 0;
+ vctmap->vtma_id = cid;
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+ if (ret == 0) {
+ if (mtype != NULL)
+ *mtype = vctmap->vtma_mode;
+ if (dtype != NULL)
+ *dtype = vctmap->vtma_dest;
+ }
+
+ return (ret);
+}
+
+int
+libvarpd_c_instance_cache_flush(varpd_client_handle_t *chp, uint64_t cid)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_target_cache_arg_t *vctcap = &carg.vca_un.vca_cache;
+ varpd_client_t *client = (varpd_client_t *)chp;
+
+ carg.vca_command = VARPD_CLIENT_CACHE_FLUSH;
+ carg.vca_errno = 0;
+
+ vctcap->vtca_id = cid;
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ return (0);
+}
+
+int
+libvarpd_c_instance_cache_delete(varpd_client_handle_t *chp, uint64_t cid,
+ const struct ether_addr *key)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_target_cache_arg_t *vctcap = &carg.vca_un.vca_cache;
+ varpd_client_t *client = (varpd_client_t *)chp;
+
+ if (key == NULL)
+ return (EINVAL);
+
+ carg.vca_command = VARPD_CLIENT_CACHE_DELETE;
+ carg.vca_errno = 0;
+ vctcap->vtca_id = cid;
+ bcopy(key, vctcap->vtca_key, ETHERADDRL);
+
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ return (0);
+}
+
+int
+libvarpd_c_instance_cache_get(varpd_client_handle_t *chp, uint64_t cid,
+ const struct ether_addr *key, varpd_client_cache_entry_t *entry)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_target_cache_arg_t *vctcap = &carg.vca_un.vca_cache;
+ varpd_client_t *client = (varpd_client_t *)chp;
+
+ if (key == NULL || entry == NULL)
+ return (EINVAL);
+
+ carg.vca_command = VARPD_CLIENT_CACHE_GET;
+ carg.vca_errno = 0;
+ vctcap->vtca_id = cid;
+ bcopy(key, vctcap->vtca_key, ETHERADDRL);
+ bzero(&vctcap->vtca_entry, sizeof (varpd_client_cache_entry_t));
+
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ bcopy(&vctcap->vtca_entry, entry, sizeof (varpd_client_cache_entry_t));
+ return (0);
+}
+
+int
+libvarpd_c_instance_cache_set(varpd_client_handle_t *chp, uint64_t cid,
+ const struct ether_addr *key, const varpd_client_cache_entry_t *entry)
+{
+ int ret;
+ varpd_client_arg_t carg;
+ varpd_client_target_cache_arg_t *vctcap = &carg.vca_un.vca_cache;
+ varpd_client_t *client = (varpd_client_t *)chp;
+
+ if (key == NULL || entry == NULL)
+ return (EINVAL);
+
+ carg.vca_command = VARPD_CLIENT_CACHE_SET;
+ carg.vca_errno = 0;
+ vctcap->vtca_id = cid;
+ bcopy(key, vctcap->vtca_key, ETHERADDRL);
+ bcopy(entry, &vctcap->vtca_entry, sizeof (varpd_client_cache_entry_t));
+
+ ret = libvarpd_c_door_call(client, &carg, 0);
+ if (ret != 0)
+ return (ret);
+
+ if (carg.vca_errno != 0)
+ return (carg.vca_errno);
+
+ return (0);
+}
+
+int
+libvarpd_c_instance_cache_walk(varpd_client_handle_t *chp, uint64_t cid,
+ varpd_client_cache_f func, void *arg)
+{
+ int ret = 0;
+ size_t bufsize = sizeof (varpd_client_arg_t) +
+ 100 * sizeof (varpd_client_cache_entry_t);
+ varpd_client_t *client = (varpd_client_t *)chp;
+ varpd_client_arg_t *cargp;
+ varpd_client_target_walk_arg_t *vctwap;
+
+ /*
+ * Because the number of entries involved in a walk may be large, we
+ * dynamically allocate a number of queries to make at a single time.
+ * This also means that the average door request doesn't inflate by the
+ * number of entries we want. For now, let's always grab 100 entries in
+ * a request.
+ */
+ cargp = umem_zalloc(bufsize, UMEM_DEFAULT);
+ if (cargp == NULL)
+ return (errno);
+ vctwap = &cargp->vca_un.vca_walk;
+ for (;;) {
+ int i;
+
+ cargp->vca_command = VARPD_CLIENT_CACHE_WALK;
+ cargp->vca_errno = 0;
+ vctwap->vtcw_id = cid;
+ vctwap->vtcw_count = 100;
+
+ ret = libvarpd_c_door_call(client, cargp, bufsize);
+ if (ret != 0)
+ break;
+
+ if (cargp->vca_errno != 0) {
+ ret = cargp->vca_errno;
+ break;
+ }
+
+ if (vctwap->vtcw_count == 0) {
+ ret = 0;
+ break;
+ }
+
+ for (i = 0; i < vctwap->vtcw_count; i++) {
+ varpd_client_cache_entry_t ent;
+
+ ent.vcp_flags = vctwap->vtcw_ents[i].otce_flags;
+ bcopy(vctwap->vtcw_ents[i].otce_dest.otp_mac,
+ &ent.vcp_mac, ETHERADDRL);
+ ent.vcp_ip = vctwap->vtcw_ents[i].otce_dest.otp_ip;
+ ent.vcp_port = vctwap->vtcw_ents[i].otce_dest.otp_port;
+ ret = func(chp, cid,
+ (struct ether_addr *)vctwap->vtcw_ents[i].otce_mac,
+ &ent, arg);
+ if (ret != 0) {
+ ret = 0;
+ goto done;
+ }
+ }
+ }
+
+done:
+ umem_free(cargp, bufsize);
+ return (ret);
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_client.h b/usr/src/lib/varpd/libvarpd/common/libvarpd_client.h
new file mode 100644
index 0000000000..459711b385
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_client.h
@@ -0,0 +1,92 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _LIBVARPD_CLIENT_H
+#define _LIBVARPD_CLIENT_H
+
+/*
+ * varpd interfaces
+ */
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <sys/mac.h>
+#include <sys/overlay_target.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct __varpd_client_handle varpd_client_handle_t;
+typedef struct __varpd_client_prop_handle varpd_client_prop_handle_t;
+
+typedef struct varpd_client_cache_entry {
+ struct ether_addr vcp_mac;
+ uint16_t vcp_flags;
+ struct in6_addr vcp_ip;
+ uint16_t vcp_port;
+} varpd_client_cache_entry_t;
+
+/*
+ * We just use the values from the kernel for now.
+ */
+#define LIBVARPD_PROP_SIZEMAX OVERLAY_PROP_SIZEMAX
+#define LIBVARPD_PROP_NAMELEN OVERLAY_PROP_NAMELEN
+
+extern int libvarpd_c_create(varpd_client_handle_t **, const char *);
+extern void libvarpd_c_destroy(varpd_client_handle_t *);
+extern int libvarpd_c_instance_create(varpd_client_handle_t *, datalink_id_t,
+ const char *, uint64_t *);
+extern int libvarpd_c_instance_activate(varpd_client_handle_t *, uint64_t);
+extern int libvarpd_c_instance_destroy(varpd_client_handle_t *, uint64_t);
+
+extern int libvarpd_c_prop_nprops(varpd_client_handle_t *, uint64_t, uint_t *);
+extern int libvarpd_c_prop_handle_alloc(varpd_client_handle_t *, uint64_t,
+ varpd_client_prop_handle_t **);
+extern void libvarpd_c_prop_handle_free(varpd_client_prop_handle_t *);
+extern int libvarpd_c_prop_info_fill(varpd_client_prop_handle_t *, uint_t);
+extern int libvarpd_c_prop_info_fill_by_name(varpd_client_prop_handle_t *,
+ const char *);
+extern int libvarpd_c_prop_info(varpd_client_prop_handle_t *, const char **,
+ uint_t *, uint_t *, const void **, uint32_t *,
+ const mac_propval_range_t **);
+extern int libvarpd_c_prop_get(varpd_client_prop_handle_t *, void *,
+ uint32_t *);
+extern int libvarpd_c_prop_set(varpd_client_prop_handle_t *, const void *,
+ uint32_t);
+
+extern int libvarpd_c_instance_lookup(varpd_client_handle_t *, datalink_id_t,
+ uint64_t *);
+extern int libvarpd_c_instance_target_mode(varpd_client_handle_t *, uint64_t,
+ uint_t *, uint_t *);
+extern int libvarpd_c_instance_cache_flush(varpd_client_handle_t *, uint64_t);
+extern int libvarpd_c_instance_cache_delete(varpd_client_handle_t *, uint64_t,
+ const struct ether_addr *);
+extern int libvarpd_c_instance_cache_get(varpd_client_handle_t *, uint64_t,
+ const struct ether_addr *, varpd_client_cache_entry_t *);
+extern int libvarpd_c_instance_cache_set(varpd_client_handle_t *, uint64_t,
+ const struct ether_addr *, const varpd_client_cache_entry_t *);
+
+typedef int (*varpd_client_cache_f)(varpd_client_handle_t *, uint64_t,
+ const struct ether_addr *, const varpd_client_cache_entry_t *, void *);
+extern int libvarpd_c_instance_cache_walk(varpd_client_handle_t *, uint64_t,
+ varpd_client_cache_f, void *);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBVARPD_CLIENT_H */
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_door.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_door.c
new file mode 100644
index 0000000000..f684e031a8
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_door.c
@@ -0,0 +1,469 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * varpd door server logic
+ */
+
+#include <door.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stropts.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <priv.h>
+#include <libvarpd_impl.h>
+
+typedef int (libvarpd_door_f)(varpd_impl_t *, varpd_client_arg_t *, ucred_t *);
+
+static boolean_t
+libvarpd_door_privileged(ucred_t *credp)
+{
+ const priv_set_t *ps;
+
+ ps = ucred_getprivset(credp, PRIV_EFFECTIVE);
+ if (ps == NULL)
+ return (B_FALSE);
+
+ return (priv_ismember(ps, PRIV_SYS_NET_CONFIG));
+}
+
+/* ARGSUSED */
+static int
+libvarpd_door_f_create(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ int ret;
+ varpd_instance_handle_t *ihdl;
+ varpd_client_create_arg_t *vccap = &vcap->vca_un.vca_create;
+
+ vccap->vcca_plugin[LIBVARPD_PROP_NAMELEN-1] = '\0';
+ ret = libvarpd_instance_create((varpd_handle_t *)vip,
+ vccap->vcca_linkid, vccap->vcca_plugin, &ihdl);
+ if (ret == 0)
+ vccap->vcca_id = libvarpd_instance_id(ihdl);
+
+ return (ret);
+}
+
+/* ARGSUSED */
+static int
+libvarpd_door_f_activate(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_handle_t *ihp;
+ varpd_client_instance_arg_t *vciap = &vcap->vca_un.vca_instance;
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vciap->vcia_id);
+ if (ihp == NULL)
+ return (ENOENT);
+ return (libvarpd_instance_activate(ihp));
+}
+
+/* ARGSUSED */
+static int
+libvarpd_door_f_destroy(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_handle_t *ihp;
+ varpd_client_instance_arg_t *vciap = &vcap->vca_un.vca_instance;
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vciap->vcia_id);
+ if (ihp == NULL)
+ return (ENOENT);
+ libvarpd_instance_destroy(ihp);
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+libvarpd_door_f_nprops(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_handle_t *ihp;
+ varpd_client_nprops_arg_t *vcnap = &vcap->vca_un.vca_nprops;
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vcnap->vcna_id);
+ if (ihp == NULL)
+ return (ENOENT);
+
+ return (libvarpd_prop_nprops(ihp, &vcnap->vcna_nprops));
+}
+
+/* ARGSUSED */
+static int
+libvarpd_door_f_propinfo(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ int ret;
+ varpd_instance_handle_t *ihp;
+ varpd_prop_handle_t *phdl;
+ varpd_client_propinfo_arg_t *vcfap = &vcap->vca_un.vca_info;
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vcfap->vcfa_id);
+ if (ihp == NULL)
+ return (ENOENT);
+ ret = libvarpd_prop_handle_alloc((varpd_handle_t *)vip, ihp, &phdl);
+ if (ret != 0)
+ return (ret);
+
+ if (vcfap->vcfa_propid != UINT_MAX) {
+ ret = libvarpd_prop_info_fill(phdl, vcfap->vcfa_propid);
+ if (ret != 0) {
+ libvarpd_prop_handle_free(phdl);
+ return (ret);
+ }
+ } else {
+ uint_t i, nprop;
+ const char *name;
+
+ vcfap->vcfa_name[LIBVARPD_PROP_NAMELEN-1] = '\0';
+ ret = libvarpd_prop_nprops(ihp, &nprop);
+ if (ret != 0) {
+ libvarpd_prop_handle_free(phdl);
+ return (ret);
+ }
+ for (i = 0; i < nprop; i++) {
+ ret = libvarpd_prop_info_fill(phdl, i);
+ if (ret != 0) {
+ libvarpd_prop_handle_free(phdl);
+ return (ret);
+ }
+ ret = libvarpd_prop_info(phdl, &name, NULL, NULL, NULL,
+ NULL, NULL);
+ if (ret != 0) {
+ libvarpd_prop_handle_free(phdl);
+ return (ret);
+ }
+ if (strcmp(vcfap->vcfa_name, name) == 0)
+ break;
+ }
+
+ if (i == nprop) {
+ libvarpd_prop_handle_free(phdl);
+ return (ENOENT);
+ }
+ vcfap->vcfa_propid = i;
+ }
+ libvarpd_prop_door_convert(phdl, vcfap);
+ libvarpd_prop_handle_free(phdl);
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+libvarpd_door_f_getprop(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ int ret;
+ uint32_t size;
+ varpd_instance_handle_t *ihp;
+ varpd_prop_handle_t *phdl;
+ varpd_client_prop_arg_t *vcpap = &vcap->vca_un.vca_prop;
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vcpap->vcpa_id);
+ if (ihp == NULL)
+ return (ENOENT);
+ ret = libvarpd_prop_handle_alloc((varpd_handle_t *)vip, ihp, &phdl);
+ if (ret != 0)
+ return (ret);
+
+ ret = libvarpd_prop_info_fill(phdl, vcpap->vcpa_propid);
+ if (ret != 0) {
+ libvarpd_prop_handle_free(phdl);
+ return (ret);
+ }
+
+ ret = libvarpd_prop_get(phdl, vcpap->vcpa_buf, &size);
+ if (ret == 0)
+ vcpap->vcpa_bufsize = size;
+ libvarpd_prop_handle_free(phdl);
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+libvarpd_door_f_setprop(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ int ret;
+ varpd_instance_handle_t *ihp;
+ varpd_prop_handle_t *phdl;
+ varpd_client_prop_arg_t *vcpap = &vcap->vca_un.vca_prop;
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vcpap->vcpa_id);
+ if (ihp == NULL)
+ return (ENOENT);
+ ret = libvarpd_prop_handle_alloc((varpd_handle_t *)vip, ihp, &phdl);
+ if (ret != 0)
+ return (ret);
+
+ ret = libvarpd_prop_info_fill(phdl, vcpap->vcpa_propid);
+ if (ret != 0) {
+ libvarpd_prop_handle_free(phdl);
+ return (ret);
+ }
+
+ ret = libvarpd_prop_set(phdl, vcpap->vcpa_buf, vcpap->vcpa_bufsize);
+ libvarpd_prop_handle_free(phdl);
+ return (ret);
+}
+
+/* ARGSUSED */
+static int
+libvarpd_door_f_lookup(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_t *inst;
+ varpd_client_lookup_arg_t *vclap = &vcap->vca_un.vca_lookup;
+
+ inst = libvarpd_instance_lookup_by_dlid(vip, vclap->vcla_linkid);
+ if (inst == NULL)
+ return (ENOENT);
+
+ vclap->vcla_id = inst->vri_id;
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+libvarpd_door_f_target(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_handle_t *ihp;
+ varpd_instance_t *inst;
+ varpd_client_target_mode_arg_t *vtmap = &vcap->vca_un.vca_mode;
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vtmap->vtma_id);
+ if (ihp == NULL)
+ return (ENOENT);
+ inst = (varpd_instance_t *)ihp;
+ vtmap->vtma_dest = inst->vri_dest;
+ vtmap->vtma_mode = inst->vri_mode;
+ return (0);
+}
+
+static int
+libvarpd_door_f_flush(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_handle_t *ihp;
+ varpd_client_target_cache_arg_t *vtcap = &vcap->vca_un.vca_cache;
+
+ if (libvarpd_door_privileged(credp) == B_FALSE)
+ return (EPERM);
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vtcap->vtca_id);
+ if (ihp == NULL)
+ return (ENOENT);
+ return (libvarpd_overlay_cache_flush((varpd_instance_t *)ihp));
+}
+
+static int
+libvarpd_door_f_delete(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_handle_t *ihp;
+ varpd_client_target_cache_arg_t *vtcap = &vcap->vca_un.vca_cache;
+
+ if (libvarpd_door_privileged(credp) == B_FALSE)
+ return (EPERM);
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vtcap->vtca_id);
+ if (ihp == NULL)
+ return (ENOENT);
+ return (libvarpd_overlay_cache_delete((varpd_instance_t *)ihp,
+ vtcap->vtca_key));
+}
+
+/* ARGSUSED */
+static int
+libvarpd_door_f_get(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_handle_t *ihp;
+ varpd_client_target_cache_arg_t *vtcap = &vcap->vca_un.vca_cache;
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vtcap->vtca_id);
+ if (ihp == NULL)
+ return (ENOENT);
+ return (libvarpd_overlay_cache_get((varpd_instance_t *)ihp,
+ vtcap->vtca_key, &vtcap->vtca_entry));
+}
+
+static int
+libvarpd_door_f_set(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_handle_t *ihp;
+ varpd_client_target_cache_arg_t *vtcap = &vcap->vca_un.vca_cache;
+
+ if (libvarpd_door_privileged(credp) == B_FALSE)
+ return (EPERM);
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vtcap->vtca_id);
+ if (ihp == NULL)
+ return (ENOENT);
+
+ return (libvarpd_overlay_cache_set((varpd_instance_t *)ihp,
+ vtcap->vtca_key, &vtcap->vtca_entry));
+}
+
+/* ARGSUSED */
+static int
+libvarpd_door_f_walk(varpd_impl_t *vip, varpd_client_arg_t *vcap,
+ ucred_t *credp)
+{
+ varpd_instance_handle_t *ihp;
+ varpd_client_target_walk_arg_t *vctwp = &vcap->vca_un.vca_walk;
+
+ ihp = libvarpd_instance_lookup((varpd_handle_t *)vip, vctwp->vtcw_id);
+ if (ihp == NULL)
+ return (ENOENT);
+
+ return (libvarpd_overlay_cache_walk_fill((varpd_instance_t *)ihp,
+ &vctwp->vtcw_marker, &vctwp->vtcw_count, vctwp->vtcw_ents));
+}
+
+static libvarpd_door_f *libvarpd_door_table[] = {
+ libvarpd_door_f_create,
+ libvarpd_door_f_activate,
+ libvarpd_door_f_destroy,
+ libvarpd_door_f_nprops,
+ libvarpd_door_f_propinfo,
+ libvarpd_door_f_getprop,
+ libvarpd_door_f_setprop,
+ libvarpd_door_f_lookup,
+ libvarpd_door_f_target,
+ libvarpd_door_f_flush,
+ libvarpd_door_f_delete,
+ libvarpd_door_f_get,
+ libvarpd_door_f_set,
+ libvarpd_door_f_walk
+};
+
+/* ARGSUSED */
+static void
+libvarpd_door_server(void *cookie, char *argp, size_t argsz, door_desc_t *dp,
+ uint_t ndesc)
+{
+ int ret;
+ varpd_client_eresp_t err;
+ ucred_t *credp = NULL;
+ varpd_impl_t *vip = cookie;
+ varpd_client_arg_t *vcap = (varpd_client_arg_t *)argp;
+
+ err.vce_command = VARPD_CLIENT_INVALID;
+ if (argsz < sizeof (varpd_client_arg_t)) {
+ err.vce_errno = EINVAL;
+ goto errout;
+ }
+
+ if ((ret = door_ucred(&credp)) != 0) {
+ err.vce_errno = ret;
+ goto errout;
+ }
+
+ if (vcap->vca_command == VARPD_CLIENT_INVALID ||
+ vcap->vca_command >= VARPD_CLIENT_MAX) {
+ err.vce_errno = EINVAL;
+ goto errout;
+ }
+
+ vcap->vca_errno = 0;
+ ret = libvarpd_door_table[vcap->vca_command - 1](vip, vcap, credp);
+ if (ret != 0)
+ vcap->vca_errno = ret;
+
+ ucred_free(credp);
+ (void) door_return(argp, argsz, NULL, 0);
+ return;
+
+errout:
+ ucred_free(credp);
+ (void) door_return((char *)&err, sizeof (err), NULL, 0);
+}
+
+int
+libvarpd_door_server_create(varpd_handle_t *vhp, const char *path)
+{
+ int fd, ret;
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+
+ mutex_enter(&vip->vdi_lock);
+ if (vip->vdi_doorfd >= 0) {
+ mutex_exit(&vip->vdi_lock);
+ return (EEXIST);
+ }
+
+ vip->vdi_doorfd = door_create(libvarpd_door_server, vip,
+ DOOR_REFUSE_DESC | DOOR_NO_CANCEL);
+ if (vip->vdi_doorfd == -1) {
+ mutex_exit(&vip->vdi_lock);
+ return (errno);
+ }
+
+ if ((fd = open(path, O_CREAT | O_RDWR, 0666)) == -1) {
+ ret = errno;
+ if (door_revoke(vip->vdi_doorfd) != 0)
+ libvarpd_panic("failed to revoke door: %d",
+ errno);
+ mutex_exit(&vip->vdi_lock);
+ return (errno);
+ }
+
+ if (fchown(fd, UID_NETADM, GID_NETADM) != 0) {
+ ret = errno;
+ if (door_revoke(vip->vdi_doorfd) != 0)
+ libvarpd_panic("failed to revoke door: %d",
+ errno);
+ mutex_exit(&vip->vdi_lock);
+ return (ret);
+ }
+
+ if (close(fd) != 0)
+ libvarpd_panic("failed to close door fd %d: %d",
+ fd, errno);
+ (void) fdetach(path);
+ if (fattach(vip->vdi_doorfd, path) != 0) {
+ ret = errno;
+ if (door_revoke(vip->vdi_doorfd) != 0)
+ libvarpd_panic("failed to revoke door: %d",
+ errno);
+ mutex_exit(&vip->vdi_lock);
+ return (ret);
+ }
+
+ mutex_exit(&vip->vdi_lock);
+ return (0);
+}
+
+void
+libvarpd_door_server_destroy(varpd_handle_t *vhp)
+{
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+
+ mutex_enter(&vip->vdi_lock);
+ if (vip->vdi_doorfd != 0) {
+ if (door_revoke(vip->vdi_doorfd) != 0)
+ libvarpd_panic("failed to revoke door: %d",
+ errno);
+ vip->vdi_doorfd = -1;
+ }
+ mutex_exit(&vip->vdi_lock);
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_impl.h b/usr/src/lib/varpd/libvarpd/common/libvarpd_impl.h
new file mode 100644
index 0000000000..7ecd3a952f
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_impl.h
@@ -0,0 +1,247 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _LIBVARPD_IMPL_H
+#define _LIBVARPD_IMPL_H
+
+/*
+ * varpd internal interfaces
+ */
+
+#include <libvarpd.h>
+#include <libvarpd_provider.h>
+#include <sys/avl.h>
+#include <thread.h>
+#include <synch.h>
+#include <limits.h>
+#include <libidspace.h>
+#include <umem.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LIBVARPD_ID_MIN 1
+#define LIBVARPD_ID_MAX INT32_MAX
+
+typedef struct varpd_plugin {
+ avl_node_t vpp_node;
+ const char *vpp_name;
+ overlay_target_mode_t vpp_mode;
+ const varpd_plugin_ops_t *vpp_ops;
+ mutex_t vpp_lock;
+ uint_t vpp_active;
+} varpd_plugin_t;
+
+typedef struct varpd_impl {
+ mutex_t vdi_lock;
+ rwlock_t vdi_pfdlock;
+ avl_tree_t vdi_plugins; /* vdi_lock */
+ avl_tree_t vdi_instances; /* vdi_lock */
+ avl_tree_t vdi_linstances; /* vdi_lock */
+ id_space_t *vdi_idspace; /* RO */
+ umem_cache_t *vdi_qcache; /* RO */
+ bunyan_logger_t *vdi_bunyan; /* RO */
+ int vdi_overlayfd; /* RO */
+ int vdi_doorfd; /* vdi_lock */
+ int vdi_persistfd; /* vdi_plock */
+ cond_t vdi_lthr_cv; /* vdi_lock */
+ boolean_t vdi_lthr_quiesce; /* vdi_lock */
+ uint_t vdi_lthr_count; /* vdi_lock */
+} varpd_impl_t;
+
+typedef enum varpd_instance_flags {
+ VARPD_INSTANCE_F_ACTIVATED = 0x01
+} varpd_instance_flags_t;
+
+typedef struct varpd_instance {
+ avl_node_t vri_inode;
+ avl_node_t vri_lnode;
+ uint64_t vri_id; /* RO */
+ uint64_t vri_vnetid; /* RO */
+ datalink_id_t vri_linkid; /* RO */
+ overlay_target_mode_t vri_mode; /* RO */
+ overlay_plugin_dest_t vri_dest; /* RO */
+ varpd_impl_t *vri_impl; /* RO */
+ varpd_plugin_t *vri_plugin; /* RO */
+ void *vri_private; /* RO */
+ mutex_t vri_lock;
+ varpd_instance_flags_t vri_flags; /* vri_lock */
+} varpd_instance_t;
+
+typedef struct varpd_query {
+ overlay_targ_lookup_t vq_lookup;
+ overlay_targ_resp_t vq_response;
+ varpd_instance_t *vq_instance;
+} varpd_query_t;
+
+typedef struct varpd_client_create_arg {
+ datalink_id_t vcca_linkid;
+ uint64_t vcca_id;
+ char vcca_plugin[LIBVARPD_PROP_NAMELEN];
+} varpd_client_create_arg_t;
+
+typedef struct varpd_client_instance_arg {
+ uint64_t vcia_id;
+} varpd_client_instance_arg_t;
+
+typedef struct varpd_client_nprops_arg {
+ uint64_t vcna_id;
+ uint_t vcna_nprops;
+} varpd_client_nprops_arg_t;
+
+typedef struct varpd_client_propinfo_arg {
+ uint64_t vcfa_id;
+ uint_t vcfa_propid;
+ uint_t vcfa_type;
+ uint_t vcfa_prot;
+ uint32_t vcfa_defsize;
+ uint32_t vcfa_psize;
+ char vcfa_name[LIBVARPD_PROP_NAMELEN];
+ uint8_t vcfa_default[LIBVARPD_PROP_SIZEMAX];
+ uint8_t vcfa_poss[LIBVARPD_PROP_SIZEMAX];
+} varpd_client_propinfo_arg_t;
+
+typedef struct varpd_client_prop_arg {
+ uint64_t vcpa_id;
+ uint_t vcpa_propid;
+ uint8_t vcpa_buf[LIBVARPD_PROP_SIZEMAX];
+ size_t vcpa_bufsize;
+} varpd_client_prop_arg_t;
+
+typedef struct varpd_client_lookup_arg {
+ datalink_id_t vcla_linkid;
+ uint32_t vcla_pad;
+ uint64_t vcla_id;
+} varpd_client_lookup_arg_t;
+
+typedef struct varpd_client_target_mode_arg {
+ uint64_t vtma_id;
+ uint32_t vtma_dest;
+ uint32_t vtma_mode;
+} varpd_client_target_mode_arg_t;
+
+typedef struct varpd_client_target_cache_arg {
+ uint64_t vtca_id;
+ uint8_t vtca_key[ETHERADDRL];
+ uint8_t vtca_pad[2];
+ varpd_client_cache_entry_t vtca_entry;
+} varpd_client_target_cache_arg_t;
+
+typedef struct varpd_client_target_walk_arg {
+ uint64_t vtcw_id;
+ uint64_t vtcw_marker;
+ uint64_t vtcw_count;
+ overlay_targ_cache_entry_t vtcw_ents[];
+} varpd_client_target_walk_arg_t;
+
+typedef enum varpd_client_command {
+ VARPD_CLIENT_INVALID = 0x0,
+ VARPD_CLIENT_CREATE,
+ VARPD_CLIENT_ACTIVATE,
+ VARPD_CLIENT_DESTROY,
+ VARPD_CLIENT_NPROPS,
+ VARPD_CLIENT_PROPINFO,
+ VARPD_CLIENT_GETPROP,
+ VARPD_CLIENT_SETPROP,
+ VARPD_CLIENT_LOOKUP,
+ VARPD_CLIENT_TARGET_MODE,
+ VARPD_CLIENT_CACHE_FLUSH,
+ VARPD_CLIENT_CACHE_DELETE,
+ VARPD_CLIENT_CACHE_GET,
+ VARPD_CLIENT_CACHE_SET,
+ VARPD_CLIENT_CACHE_WALK,
+ VARPD_CLIENT_MAX
+} varpd_client_command_t;
+
+typedef struct varpd_client_arg {
+ uint_t vca_command;
+ uint_t vca_errno;
+ union {
+ varpd_client_create_arg_t vca_create;
+ varpd_client_instance_arg_t vca_instance;
+ varpd_client_nprops_arg_t vca_nprops;
+ varpd_client_propinfo_arg_t vca_info;
+ varpd_client_prop_arg_t vca_prop;
+ varpd_client_lookup_arg_t vca_lookup;
+ varpd_client_target_mode_arg_t vca_mode;
+ varpd_client_target_cache_arg_t vca_cache;
+ varpd_client_target_walk_arg_t vca_walk;
+ } vca_un;
+} varpd_client_arg_t;
+
+typedef struct varpd_client_eresp {
+ uint_t vce_command;
+ uint_t vce_errno;
+} varpd_client_eresp_t;
+
+extern void libvarpd_plugin_init(void);
+extern void libvarpd_plugin_prefork(void);
+extern void libvarpd_plugin_postfork(void);
+extern void libvarpd_plugin_fini(void);
+extern int libvarpd_plugin_comparator(const void *, const void *);
+extern varpd_plugin_t *libvarpd_plugin_lookup(varpd_impl_t *, const char *);
+
+extern varpd_instance_t *libvarpd_instance_lookup_by_dlid(varpd_impl_t *,
+ datalink_id_t);
+
+extern void libvarpd_prop_door_convert(const varpd_prop_handle_t *,
+ varpd_client_propinfo_arg_t *);
+
+extern const char *libvarpd_isaext(void);
+typedef int (*libvarpd_dirwalk_f)(varpd_impl_t *, const char *, void *);
+extern int libvarpd_dirwalk(varpd_impl_t *, const char *, const char *,
+ libvarpd_dirwalk_f, void *);
+
+extern int libvarpd_overlay_init(varpd_impl_t *);
+extern void libvarpd_overlay_fini(varpd_impl_t *);
+extern int libvarpd_overlay_info(varpd_impl_t *, datalink_id_t,
+ overlay_plugin_dest_t *, uint64_t *, uint64_t *);
+extern int libvarpd_overlay_associate(varpd_instance_t *);
+extern int libvarpd_overlay_disassociate(varpd_instance_t *);
+extern int libvarpd_overlay_degrade(varpd_instance_t *, const char *);
+extern int libvarpd_overlay_degrade_datalink(varpd_impl_t *, datalink_id_t,
+ const char *);
+extern int libvarpd_overlay_restore(varpd_instance_t *);
+extern int libvarpd_overlay_packet(varpd_impl_t *,
+ const overlay_targ_lookup_t *, void *, size_t *);
+extern int libvarpd_overlay_inject(varpd_impl_t *,
+ const overlay_targ_lookup_t *, void *, size_t);
+extern int libvarpd_overlay_instance_inject(varpd_instance_t *, void *, size_t);
+extern int libvarpd_overlay_resend(varpd_impl_t *,
+ const overlay_targ_lookup_t *, void *, size_t);
+typedef int (*libvarpd_overlay_iter_f)(varpd_impl_t *, datalink_id_t, void *);
+extern int libvarpd_overlay_iter(varpd_impl_t *, libvarpd_overlay_iter_f,
+ void *);
+extern int libvarpd_overlay_cache_flush(varpd_instance_t *);
+extern int libvarpd_overlay_cache_delete(varpd_instance_t *, const uint8_t *);
+extern int libvarpd_overlay_cache_delete(varpd_instance_t *, const uint8_t *);
+extern int libvarpd_overlay_cache_get(varpd_instance_t *, const uint8_t *,
+ varpd_client_cache_entry_t *);
+extern int libvarpd_overlay_cache_set(varpd_instance_t *, const uint8_t *,
+ const varpd_client_cache_entry_t *);
+extern int libvarpd_overlay_cache_walk_fill(varpd_instance_t *, uint64_t *,
+ uint64_t *, overlay_targ_cache_entry_t *);
+
+extern void libvarpd_persist_init(varpd_impl_t *);
+extern void libvarpd_persist_fini(varpd_impl_t *);
+extern int libvarpd_persist_instance(varpd_impl_t *, varpd_instance_t *);
+extern void libvarpd_torch_instance(varpd_impl_t *, varpd_instance_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBVARPD_IMPL_H */
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_overlay.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_overlay.c
new file mode 100644
index 0000000000..b7506c56cf
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_overlay.c
@@ -0,0 +1,584 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * Interactions with /dev/overlay
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <assert.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stropts.h>
+#include <strings.h>
+#include <umem.h>
+
+#include <libvarpd_impl.h>
+#include <sys/overlay_target.h>
+
+#define OVERLAY_PATH "/dev/overlay"
+
+int
+libvarpd_overlay_init(varpd_impl_t *vip)
+{
+ vip->vdi_overlayfd = open(OVERLAY_PATH, O_RDWR | O_EXCL);
+ if (vip->vdi_overlayfd == -1)
+ return (errno);
+ return (0);
+}
+
+void
+libvarpd_overlay_fini(varpd_impl_t *vip)
+{
+ assert(vip->vdi_overlayfd > 0);
+ if (close(vip->vdi_overlayfd) != 0)
+ libvarpd_panic("failed to close /dev/overlay fd %d: %d",
+ vip->vdi_overlayfd, errno);
+}
+
+int
+libvarpd_overlay_info(varpd_impl_t *vip, datalink_id_t linkid,
+ overlay_plugin_dest_t *destp, uint64_t *flags, uint64_t *vnetid)
+{
+ overlay_targ_info_t oti;
+
+ oti.oti_linkid = linkid;
+ if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_INFO, &oti) != 0)
+ return (errno);
+
+ if (destp != NULL)
+ *destp = oti.oti_needs;
+ if (flags != NULL)
+ *flags = oti.oti_flags;
+ if (vnetid != NULL)
+ *vnetid = oti.oti_vnetid;
+ return (0);
+}
+
+int
+libvarpd_overlay_associate(varpd_instance_t *inst)
+{
+ overlay_targ_associate_t ota;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ bzero(&ota, sizeof (overlay_targ_associate_t));
+ ota.ota_linkid = inst->vri_linkid;
+ ota.ota_mode = inst->vri_mode;
+ ota.ota_id = inst->vri_id;
+ ota.ota_provides = inst->vri_dest;
+
+ if (ota.ota_mode == OVERLAY_TARGET_POINT) {
+ int ret;
+ ret = inst->vri_plugin->vpp_ops->vpo_default(inst->vri_private,
+ &ota.ota_point);
+ if (ret != VARPD_LOOKUP_OK)
+ return (ret);
+ }
+
+ if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_ASSOCIATE, &ota) != 0)
+ return (errno);
+
+ return (0);
+}
+
+int
+libvarpd_overlay_disassociate(varpd_instance_t *inst)
+{
+ overlay_targ_id_t otid;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ otid.otid_linkid = inst->vri_linkid;
+ if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_DISASSOCIATE, &otid) != 0)
+ return (errno);
+ return (0);
+}
+
+int
+libvarpd_overlay_degrade_datalink(varpd_impl_t *vip, datalink_id_t linkid,
+ const char *msg)
+{
+ overlay_targ_degrade_t otd;
+
+ otd.otd_linkid = linkid;
+ (void) strlcpy(otd.otd_buf, msg, OVERLAY_STATUS_BUFLEN);
+ if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_DEGRADE, &otd) != 0)
+ return (errno);
+ return (0);
+
+}
+
+int
+libvarpd_overlay_degrade(varpd_instance_t *inst, const char *msg)
+{
+ return (libvarpd_overlay_degrade_datalink(inst->vri_impl,
+ inst->vri_linkid, msg));
+}
+
+int
+libvarpd_overlay_restore(varpd_instance_t *inst)
+{
+ overlay_targ_id_t otid;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ otid.otid_linkid = inst->vri_linkid;
+ if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_RESTORE, &otid) != 0)
+ return (errno);
+ return (0);
+}
+
+int
+libvarpd_overlay_packet(varpd_impl_t *vip, const overlay_targ_lookup_t *otl,
+ void *buf, size_t *buflen)
+{
+ int ret;
+ overlay_targ_pkt_t otp;
+
+ otp.otp_linkid = UINT64_MAX;
+ otp.otp_reqid = otl->otl_reqid;
+ otp.otp_size = *buflen;
+ otp.otp_buf = buf;
+
+ do {
+ ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_PKT, &otp);
+ } while (ret != 0 && errno == EINTR);
+ if (ret != 0 && errno == EFAULT)
+ libvarpd_panic("OVERLAY_TARG_PKT ioctl efault");
+ else if (ret != 0)
+ ret = errno;
+
+ if (ret == 0)
+ *buflen = otp.otp_size;
+
+ return (ret);
+}
+
+static int
+libvarpd_overlay_inject_common(varpd_impl_t *vip, varpd_instance_t *inst,
+ const overlay_targ_lookup_t *otl, void *buf, size_t buflen, int cmd)
+{
+ int ret;
+ overlay_targ_pkt_t otp;
+
+ if (otl == NULL) {
+ otp.otp_linkid = inst->vri_linkid;
+ otp.otp_reqid = 0;
+ } else {
+ otp.otp_linkid = UINT64_MAX;
+ otp.otp_reqid = otl->otl_reqid;
+ }
+ otp.otp_size = buflen;
+ otp.otp_buf = buf;
+
+ do {
+ ret = ioctl(vip->vdi_overlayfd, cmd, &otp);
+ } while (ret != 0 && errno == EINTR);
+ if (ret != 0 && errno == EFAULT)
+ libvarpd_panic("overlay_inject_common ioctl EFAULT");
+ else if (ret != 0)
+ ret = errno;
+
+ return (ret);
+}
+
+int
+libvarpd_overlay_inject(varpd_impl_t *vip, const overlay_targ_lookup_t *otl,
+ void *buf, size_t buflen)
+{
+ return (libvarpd_overlay_inject_common(vip, NULL, otl, buf, buflen,
+ OVERLAY_TARG_INJECT));
+}
+
+int
+libvarpd_overlay_instance_inject(varpd_instance_t *inst, void *buf,
+ size_t buflen)
+{
+ return (libvarpd_overlay_inject_common(inst->vri_impl, inst, NULL, buf,
+ buflen, OVERLAY_TARG_INJECT));
+}
+
+int
+libvarpd_overlay_resend(varpd_impl_t *vip, const overlay_targ_lookup_t *otl,
+ void *buf, size_t buflen)
+{
+ return (libvarpd_overlay_inject_common(vip, NULL, otl, buf, buflen,
+ OVERLAY_TARG_RESEND));
+}
+
+static void
+libvarpd_overlay_lookup_reply(varpd_impl_t *vip,
+ const overlay_targ_lookup_t *otl, overlay_targ_resp_t *otr, int cmd)
+{
+ int ret;
+
+ otr->otr_reqid = otl->otl_reqid;
+ do {
+ ret = ioctl(vip->vdi_overlayfd, cmd, otr);
+ } while (ret != 0 && errno == EINTR);
+
+ /*
+ * The only errors that should cause us to end up here are due to
+ * programmer errors. Aruably the EINAVL case indicates that something
+ * is a bit off; however, at this time we don't opt to kill varpd.
+ */
+ if (ret != 0 && errno != EINVAL)
+ libvarpd_panic("receieved bad errno from lookup_reply "
+ "(cmd %d): %d\n", cmd, errno);
+}
+
+static void
+libvarpd_overlay_lookup_handle(varpd_impl_t *vip)
+{
+ int ret;
+ varpd_query_t *vqp;
+ overlay_targ_lookup_t *otl;
+ overlay_targ_resp_t *otr;
+ varpd_instance_t *inst;
+
+ vqp = umem_cache_alloc(vip->vdi_qcache, UMEM_DEFAULT);
+ otl = &vqp->vq_lookup;
+ otr = &vqp->vq_response;
+ /*
+ * abort doesn't really help here that much, maybe we can instead try
+ * and for a reap or something?
+ */
+ if (vqp == NULL)
+ libvarpd_panic("failed to allocate memory for lookup "
+ "handle..., we should not panic()");
+ ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_LOOKUP, otl);
+ if (ret != 0 && errno != ETIME && errno != EINTR)
+ libvarpd_panic("received bad errno from OVERLAY_TARG_LOOKUP: "
+ "%d", errno);
+
+ if (ret != 0) {
+ umem_cache_free(vip->vdi_qcache, vqp);
+ return;
+ }
+
+ inst = (varpd_instance_t *)libvarpd_instance_lookup(
+ (varpd_handle_t *)vip, otl->otl_varpdid);
+ if (inst == NULL) {
+ libvarpd_overlay_lookup_reply(vip, otl, otr,
+ OVERLAY_TARG_DROP);
+ umem_cache_free(vip->vdi_qcache, vqp);
+ return;
+ }
+ vqp->vq_instance = inst;
+
+ inst->vri_plugin->vpp_ops->vpo_lookup(inst->vri_private,
+ (varpd_query_handle_t *)vqp, otl, &otr->otr_answer);
+}
+
+void
+libvarpd_overlay_lookup_run(varpd_handle_t *vhp)
+{
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+
+ mutex_enter(&vip->vdi_lock);
+ if (vip->vdi_lthr_quiesce == B_TRUE) {
+ mutex_exit(&vip->vdi_lock);
+ return;
+ }
+ vip->vdi_lthr_count++;
+
+ for (;;) {
+ mutex_exit(&vip->vdi_lock);
+ libvarpd_overlay_lookup_handle(vip);
+ mutex_enter(&vip->vdi_lock);
+ if (vip->vdi_lthr_quiesce == B_TRUE)
+ break;
+ }
+ assert(vip->vdi_lthr_count > 0);
+ vip->vdi_lthr_count--;
+ (void) cond_signal(&vip->vdi_lthr_cv);
+ mutex_exit(&vip->vdi_lock);
+}
+
+void
+libvarpd_overlay_lookup_quiesce(varpd_handle_t *vhp)
+{
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+
+ mutex_enter(&vip->vdi_lock);
+ if (vip->vdi_lthr_count == 0) {
+ mutex_exit(&vip->vdi_lock);
+ return;
+ }
+ vip->vdi_lthr_quiesce = B_TRUE;
+ while (vip->vdi_lthr_count > 0)
+ (void) cond_wait(&vip->vdi_lthr_cv, &vip->vdi_lock);
+ vip->vdi_lthr_quiesce = B_FALSE;
+ mutex_exit(&vip->vdi_lock);
+}
+
+int
+libvarpd_overlay_iter(varpd_impl_t *vip, libvarpd_overlay_iter_f func,
+ void *arg)
+{
+ uint32_t curents = 0, i;
+ size_t size;
+ overlay_targ_list_t *otl;
+
+ for (;;) {
+ size = sizeof (overlay_targ_list_t) +
+ sizeof (uint32_t) * curents;
+ otl = umem_alloc(size, UMEM_DEFAULT);
+ if (otl == NULL)
+ return (ENOMEM);
+
+ otl->otl_nents = curents;
+ if (ioctl(vip->vdi_overlayfd, OVERLAY_TARG_LIST, otl) != 0) {
+ if (errno == EFAULT)
+ libvarpd_panic("OVERLAY_TARG_LIST ioctl "
+ "efault");
+ umem_free(otl, size);
+ if (errno == EINTR)
+ continue;
+ else
+ return (errno);
+ }
+
+ if (otl->otl_nents == curents)
+ break;
+
+ curents = otl->otl_nents;
+ umem_free(otl, size);
+ }
+
+ for (i = 0; i < otl->otl_nents; i++) {
+ if (func(vip, otl->otl_ents[i], arg) != 0)
+ break;
+ }
+ umem_free(otl, size);
+ return (0);
+}
+
+int
+libvarpd_overlay_cache_flush(varpd_instance_t *inst)
+{
+ int ret;
+ overlay_targ_cache_t cache;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ bzero(&cache, sizeof (overlay_targ_cache_t));
+ cache.otc_linkid = inst->vri_linkid;
+
+ ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_FLUSH, &cache);
+ if (ret != 0 && errno == EFAULT)
+ libvarpd_panic("OVERLAY_TARG_CACHE_FLUSH ioctl efault");
+ else if (ret != 0)
+ ret = errno;
+
+ return (ret);
+}
+
+int
+libvarpd_overlay_cache_delete(varpd_instance_t *inst, const uint8_t *key)
+{
+ int ret;
+ overlay_targ_cache_t cache;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ bzero(&cache, sizeof (overlay_targ_cache_t));
+ cache.otc_linkid = inst->vri_linkid;
+ bcopy(key, cache.otc_entry.otce_mac, ETHERADDRL);
+
+ ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_REMOVE, &cache);
+ if (ret != 0 && errno == EFAULT)
+ libvarpd_panic("OVERLAY_TARG_CACHE_REMOVE ioctl efault");
+ else if (ret != 0)
+ ret = errno;
+
+ return (ret);
+
+}
+
+int
+libvarpd_overlay_cache_get(varpd_instance_t *inst, const uint8_t *key,
+ varpd_client_cache_entry_t *entry)
+{
+ int ret;
+ overlay_targ_cache_t cache;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ bzero(&cache, sizeof (overlay_targ_cache_t));
+ cache.otc_linkid = inst->vri_linkid;
+ bcopy(key, cache.otc_entry.otce_mac, ETHERADDRL);
+
+ ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_GET, &cache);
+ if (ret != 0 && errno == EFAULT)
+ libvarpd_panic("OVERLAY_TARG_CACHE_GET ioctl efault");
+ else if (ret != 0)
+ return (errno);
+
+ bcopy(cache.otc_entry.otce_dest.otp_mac, &entry->vcp_mac, ETHERADDRL);
+ entry->vcp_flags = cache.otc_entry.otce_flags;
+ entry->vcp_ip = cache.otc_entry.otce_dest.otp_ip;
+ entry->vcp_port = cache.otc_entry.otce_dest.otp_port;
+
+ return (0);
+}
+
+int
+libvarpd_overlay_cache_set(varpd_instance_t *inst, const uint8_t *key,
+ const varpd_client_cache_entry_t *entry)
+{
+ int ret;
+ overlay_targ_cache_t cache;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ bzero(&cache, sizeof (overlay_targ_cache_t));
+ cache.otc_linkid = inst->vri_linkid;
+ bcopy(key, cache.otc_entry.otce_mac, ETHERADDRL);
+ bcopy(&entry->vcp_mac, cache.otc_entry.otce_dest.otp_mac, ETHERADDRL);
+ cache.otc_entry.otce_flags = entry->vcp_flags;
+ cache.otc_entry.otce_dest.otp_ip = entry->vcp_ip;
+ cache.otc_entry.otce_dest.otp_port = entry->vcp_port;
+
+ ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_SET, &cache);
+ if (ret != 0 && errno == EFAULT)
+ libvarpd_panic("OVERLAY_TARG_CACHE_SET ioctl efault");
+ else if (ret != 0)
+ return (errno);
+
+ return (0);
+}
+
+int
+libvarpd_overlay_cache_walk_fill(varpd_instance_t *inst, uint64_t *markerp,
+ uint64_t *countp, overlay_targ_cache_entry_t *ents)
+{
+ int ret;
+ size_t asize;
+ overlay_targ_cache_iter_t *iter;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ if (*countp > 200)
+ return (E2BIG);
+
+ asize = sizeof (overlay_targ_cache_iter_t) +
+ *countp * sizeof (overlay_targ_cache_entry_t);
+ iter = umem_alloc(asize, UMEM_DEFAULT);
+ if (iter == NULL)
+ return (ENOMEM);
+
+ iter->otci_linkid = inst->vri_linkid;
+ iter->otci_marker = *markerp;
+ iter->otci_count = *countp;
+ ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_ITER, iter);
+ if (ret != 0 && errno == EFAULT)
+ libvarpd_panic("OVERLAY_TARG_CACHE_ITER ioctl efault");
+ else if (ret != 0) {
+ ret = errno;
+ goto out;
+ }
+
+ *markerp = iter->otci_marker;
+ *countp = iter->otci_count;
+ bcopy(iter->otci_ents, ents,
+ *countp * sizeof (overlay_targ_cache_entry_t));
+out:
+ umem_free(iter, asize);
+ return (ret);
+}
+
+void
+libvarpd_plugin_query_reply(varpd_query_handle_t *vqh, int action)
+{
+ varpd_query_t *vqp = (varpd_query_t *)vqh;
+
+ if (vqp == NULL)
+ libvarpd_panic("unkonwn plugin passed invalid "
+ "varpd_query_handle_t");
+
+ if (action == VARPD_LOOKUP_DROP)
+ libvarpd_overlay_lookup_reply(vqp->vq_instance->vri_impl,
+ &vqp->vq_lookup, &vqp->vq_response, OVERLAY_TARG_DROP);
+ else if (action == VARPD_LOOKUP_OK)
+ libvarpd_overlay_lookup_reply(vqp->vq_instance->vri_impl,
+ &vqp->vq_lookup, &vqp->vq_response, OVERLAY_TARG_RESPOND);
+ else
+ libvarpd_panic("plugin %s passed in an invalid action: %d",
+ vqp->vq_instance->vri_plugin->vpp_name, action);
+}
+
+void
+libvarpd_inject_varp(varpd_provider_handle_t *vph, const uint8_t *mac,
+ const overlay_target_point_t *otp)
+{
+ int ret;
+ overlay_targ_cache_t otc;
+ varpd_instance_t *inst = (varpd_instance_t *)vph;
+ varpd_impl_t *vip = inst->vri_impl;
+
+ if (otp == NULL) {
+ (void) libvarpd_overlay_cache_delete(inst, mac);
+ return;
+ }
+
+ otc.otc_linkid = inst->vri_linkid;
+ otc.otc_entry.otce_flags = 0;
+ bcopy(mac, otc.otc_entry.otce_mac, ETHERADDRL);
+ bcopy(otp, &otc.otc_entry.otce_dest, sizeof (overlay_target_point_t));
+
+ ret = ioctl(vip->vdi_overlayfd, OVERLAY_TARG_CACHE_SET, &otc);
+ if (ret != 0) {
+ switch (errno) {
+ case EBADF:
+ case EFAULT:
+ case ENOTSUP:
+ libvarpd_panic("received bad errno from "
+ "OVERLAY_TARG_CACHE_SET: %d", errno);
+ default:
+ break;
+ }
+ }
+}
+
+void
+libvarpd_fma_degrade(varpd_provider_handle_t *vph, const char *msg)
+{
+ int ret;
+ varpd_instance_t *inst = (varpd_instance_t *)vph;
+
+ ret = libvarpd_overlay_degrade(inst, msg);
+ switch (ret) {
+ case ENOENT:
+ case EFAULT:
+ libvarpd_panic("received bad errno from degrade ioctl: %d",
+ errno);
+ default:
+ break;
+ }
+}
+
+void
+libvarpd_fma_restore(varpd_provider_handle_t *vph)
+{
+ int ret;
+ varpd_instance_t *inst = (varpd_instance_t *)vph;
+
+ ret = libvarpd_overlay_restore(inst);
+ switch (ret) {
+ case ENOENT:
+ case EFAULT:
+ libvarpd_panic("received bad errno from restore ioctl: %d",
+ errno);
+ default:
+ break;
+ }
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_panic.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_panic.c
new file mode 100644
index 0000000000..6728d79d6e
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_panic.c
@@ -0,0 +1,53 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015, Joyent, Inc.
+ */
+
+/*
+ * No, 'tis not so deep as a well, nor so wide as a church door; but 'tis
+ * enough,'twill serve. Ask for me tomorrow, and you shall find me a grave man.
+ *
+ * This file maintains various routines for handling when we die.
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <thread.h>
+#include <stdlib.h>
+
+/*
+ * Normally these would be static, but if they're static, that throws off lint
+ * because it thinks we never use them, which is kind of the point, because we
+ * only read them in the core...
+ */
+int varpd_panic_errno;
+char varpd_panic_buf[1024];
+thread_t varpd_panic_thread;
+
+void
+libvarpd_panic(const char *fmt, ...)
+{
+ va_list ap;
+
+ /* Always save errno first! */
+ varpd_panic_errno = errno;
+ varpd_panic_thread = thr_self();
+
+ if (fmt != NULL) {
+ va_start(ap, fmt);
+ (void) vsnprintf(varpd_panic_buf, sizeof (varpd_panic_buf), fmt,
+ ap);
+ }
+ abort();
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_persist.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_persist.c
new file mode 100644
index 0000000000..27cc802a9c
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_persist.c
@@ -0,0 +1,586 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * varpd persistence backend
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <strings.h>
+#include <librename.h>
+#include <md5.h>
+#include <sys/sysmacros.h>
+#include <dirent.h>
+#include <sys/mman.h>
+#include <umem.h>
+#include <sys/debug.h>
+
+#include <libvarpd_impl.h>
+
+static uint8_t varpd_persist_magic[4] = {
+ 'v',
+ 'a',
+ 'r',
+ 'p',
+};
+
+#define VARPD_PERSIST_MAXWRITE 4096
+#define VARPD_PERSIST_VERSION_ONE 1
+#define VARPD_PERSIST_SUFFIX ".varpd"
+
+typedef struct varpd_persist_header {
+ uint8_t vph_magic[4];
+ uint32_t vph_version;
+ uint8_t vph_md5[16];
+} varpd_persist_header_t;
+
+void
+libvarpd_persist_init(varpd_impl_t *vip)
+{
+ vip->vdi_persistfd = -1;
+ if (rwlock_init(&vip->vdi_pfdlock, USYNC_THREAD, NULL) != 0)
+ libvarpd_panic("failed to create rw vdi_pfdlock");
+}
+
+void
+libvarpd_persist_fini(varpd_impl_t *vip)
+{
+ /*
+ * Clean up for someone that left something behind.
+ */
+ if (vip->vdi_persistfd != -1) {
+ if (close(vip->vdi_persistfd) != 0)
+ libvarpd_panic("failed to close persist fd %d: %d",
+ vip->vdi_persistfd, errno);
+ vip->vdi_persistfd = -1;
+ }
+ if (rwlock_destroy(&vip->vdi_pfdlock) != 0)
+ libvarpd_panic("failed to destroy rw vdi_pfdlock");
+}
+
+int
+libvarpd_persist_enable(varpd_handle_t *vhp, const char *rootdir)
+{
+ int fd;
+ struct stat st;
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+
+ fd = open(rootdir, O_RDONLY);
+ if (fd < 0)
+ return (errno);
+
+ if (fstat(fd, &st) != 0) {
+ int ret = errno;
+ if (close(fd) != 0)
+ libvarpd_panic("failed to close rootdir fd (%s) %d: %d",
+ rootdir, fd, errno);
+ return (ret);
+ }
+
+ if (!S_ISDIR(st.st_mode)) {
+ if (close(fd) != 0)
+ libvarpd_panic("failed to close rootdir fd (%s) %d: %d",
+ rootdir, fd, errno);
+ return (EINVAL);
+ }
+
+
+ VERIFY0(rw_wrlock(&vip->vdi_pfdlock));
+ if (vip->vdi_persistfd != -1) {
+ VERIFY0(rw_unlock(&vip->vdi_pfdlock));
+ if (close(fd) != 0)
+ libvarpd_panic("failed to close rootdir fd (%s) %d: %d",
+ rootdir, fd, errno);
+ return (EEXIST);
+ }
+ vip->vdi_persistfd = fd;
+ VERIFY0(rw_unlock(&vip->vdi_pfdlock));
+
+ return (0);
+}
+
+static int
+libvarpd_persist_write(int fd, const void *buf, size_t buflen)
+{
+ ssize_t ret;
+ off_t off = 0;
+
+ while (buflen > 0) {
+ ret = write(fd, (void *)((uintptr_t)buf + off),
+ MIN(buflen, VARPD_PERSIST_MAXWRITE));
+ if (ret == -1 && errno == EINTR)
+ continue;
+ if (ret == -1)
+ return (errno);
+
+ off += ret;
+ buflen -= ret;
+ }
+
+ return (0);
+}
+
+static int
+libvarpd_persist_nvlist(int dirfd, uint64_t id, nvlist_t *nvl)
+{
+ int err, fd;
+ size_t size;
+ varpd_persist_header_t hdr;
+ librename_atomic_t *lrap;
+ char *buf = NULL, *name;
+
+ if ((err = nvlist_pack(nvl, &buf, &size, NV_ENCODE_XDR, 0)) != 0)
+ return (err);
+
+ if (asprintf(&name, "%llu%s", (unsigned long long)id, ".varpd") == -1) {
+ err = errno;
+ free(buf);
+ return (err);
+ }
+
+ if ((err = librename_atomic_fdinit(dirfd, name, NULL, 0600, 0,
+ &lrap)) != 0) {
+ free(name);
+ free(buf);
+ return (err);
+ }
+
+ fd = librename_atomic_fd(lrap);
+
+ bzero(&hdr, sizeof (varpd_persist_header_t));
+ bcopy(varpd_persist_magic, hdr.vph_magic, sizeof (varpd_persist_magic));
+ hdr.vph_version = VARPD_PERSIST_VERSION_ONE;
+ md5_calc(hdr.vph_md5, buf, size);
+
+ if ((err = libvarpd_persist_write(fd, &hdr,
+ sizeof (varpd_persist_header_t))) != 0) {
+ librename_atomic_fini(lrap);
+ free(name);
+ free(buf);
+ return (err);
+ }
+
+ if ((err = libvarpd_persist_write(fd, buf, size)) != 0) {
+ librename_atomic_fini(lrap);
+ free(name);
+ free(buf);
+ return (err);
+ }
+
+ do {
+ err = librename_atomic_commit(lrap);
+ } while (err == EINTR);
+
+ librename_atomic_fini(lrap);
+ free(name);
+ free(buf);
+ return (err);
+}
+
+int
+libvarpd_persist_instance(varpd_impl_t *vip, varpd_instance_t *inst)
+{
+ int err = 0;
+ nvlist_t *nvl = NULL, *cvl = NULL;
+
+ VERIFY0(rw_rdlock(&vip->vdi_pfdlock));
+ /* Check if persistence exists */
+ if (vip->vdi_persistfd == -1)
+ goto out;
+
+ if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0)) != 0)
+ goto out;
+
+ if ((err = nvlist_alloc(&cvl, NV_UNIQUE_NAME, 0)) != 0)
+ goto out;
+
+ if ((err = nvlist_add_uint64(nvl, "vri_id", inst->vri_id)) != 0)
+ goto out;
+
+ if ((err = nvlist_add_uint32(nvl, "vri_linkid", inst->vri_linkid)) != 0)
+ goto out;
+
+ if ((err = nvlist_add_uint32(nvl, "vri_dest",
+ (uint32_t)inst->vri_dest)) != 0)
+ goto out;
+ if ((err = nvlist_add_uint32(nvl, "vri_mode",
+ (uint32_t)inst->vri_mode)) != 0)
+ goto out;
+
+ if ((err = nvlist_add_string(nvl, "vri_plugin",
+ inst->vri_plugin->vpp_name)) != 0)
+ goto out;
+
+ err = inst->vri_plugin->vpp_ops->vpo_save(inst->vri_private, cvl);
+ if (err != 0)
+ goto out;
+
+ if ((err = nvlist_add_nvlist(nvl, "vri_private", cvl)) != 0)
+ goto out;
+
+ err = libvarpd_persist_nvlist(vip->vdi_persistfd, inst->vri_id, nvl);
+out:
+ nvlist_free(nvl);
+ nvlist_free(cvl);
+ VERIFY0(rw_unlock(&vip->vdi_pfdlock));
+ return (err);
+}
+
+void
+libvarpd_torch_instance(varpd_impl_t *vip, varpd_instance_t *inst)
+{
+ char buf[32];
+ int ret;
+
+ VERIFY0(rw_rdlock(&vip->vdi_pfdlock));
+ if (vip->vdi_persistfd == -1) {
+ VERIFY0(rw_unlock(&vip->vdi_pfdlock));
+ return;
+ }
+
+ if (snprintf(buf, sizeof (buf), "%lld.varpd", inst->vri_id) >= 32)
+ libvarpd_panic("somehow exceeded static value for "
+ "libvarpd_torch_instance buffer");
+
+ do {
+ ret = unlinkat(vip->vdi_persistfd, buf, 0);
+ } while (ret == -1 && errno == EINTR);
+ if (ret != 0) {
+ switch (errno) {
+ case ENOENT:
+ break;
+ default:
+ libvarpd_panic("failed to unlinkat %d`%s: %s",
+ vip->vdi_persistfd, buf, strerror(errno));
+ }
+ }
+
+ VERIFY0(rw_unlock(&vip->vdi_pfdlock));
+}
+
+static int
+libvarpd_persist_restore_instance(varpd_impl_t *vip, nvlist_t *nvl)
+{
+ int err;
+ nvlist_t *pvl;
+ uint64_t id, flags, vid;
+ uint32_t linkid, dest, mode;
+ char *pluginstr;
+ varpd_plugin_t *plugin;
+ overlay_plugin_dest_t adest;
+ varpd_instance_t *inst, lookup;
+
+ if (nvlist_lookup_uint64(nvl, "vri_id", &id) != 0)
+ return (EINVAL);
+
+ if (nvlist_lookup_uint32(nvl, "vri_linkid", &linkid) != 0)
+ return (EINVAL);
+
+ if (nvlist_lookup_uint32(nvl, "vri_dest", &dest) != 0)
+ return (EINVAL);
+
+ if (nvlist_lookup_uint32(nvl, "vri_mode", &mode) != 0)
+ return (EINVAL);
+
+ if (nvlist_lookup_string(nvl, "vri_plugin", &pluginstr) != 0)
+ return (EINVAL);
+
+ if (nvlist_lookup_nvlist(nvl, "vri_private", &pvl) != 0)
+ return (EINVAL);
+
+ plugin = libvarpd_plugin_lookup(vip, pluginstr);
+ if (plugin == NULL)
+ return (EINVAL);
+
+ if (plugin->vpp_mode != mode)
+ return (EINVAL);
+
+ if (libvarpd_overlay_info(vip, linkid, &adest, &flags, &vid) != 0)
+ return (EINVAL);
+
+ if (dest != adest)
+ return (EINVAL);
+
+ inst = umem_alloc(sizeof (varpd_instance_t), UMEM_DEFAULT);
+ if (inst == NULL)
+ libvarpd_panic("failed to allocate instance for restore");
+
+ inst->vri_id = id_alloc_specific(vip->vdi_idspace, id);
+ if (inst->vri_id != id) {
+ umem_free(inst, sizeof (varpd_instance_t));
+ return (EINVAL);
+ }
+
+ inst->vri_linkid = linkid;
+ inst->vri_vnetid = vid;
+ inst->vri_mode = plugin->vpp_mode;
+ inst->vri_dest = dest;
+ inst->vri_plugin = plugin;
+ inst->vri_impl = vip;
+ inst->vri_flags = 0;
+ if (plugin->vpp_ops->vpo_restore(pvl, (varpd_provider_handle_t *)inst,
+ dest, &inst->vri_private) != 0) {
+ id_free(vip->vdi_idspace, id);
+ umem_free(inst, sizeof (varpd_instance_t));
+ return (EINVAL);
+ }
+
+ if (mutex_init(&inst->vri_lock, USYNC_THREAD | LOCK_ERRORCHECK,
+ NULL) != 0)
+ libvarpd_panic("failed to create vri_lock mutex");
+
+ mutex_enter(&vip->vdi_lock);
+ lookup.vri_id = inst->vri_id;
+ if (avl_find(&vip->vdi_instances, &lookup, NULL) != NULL)
+ libvarpd_panic("found duplicate instance with id %d",
+ lookup.vri_id);
+ avl_add(&vip->vdi_instances, inst);
+ lookup.vri_linkid = inst->vri_linkid;
+ if (avl_find(&vip->vdi_linstances, &lookup, NULL) != NULL)
+ libvarpd_panic("found duplicate linstance with id %d",
+ lookup.vri_linkid);
+ avl_add(&vip->vdi_linstances, inst);
+ mutex_exit(&vip->vdi_lock);
+
+ if (plugin->vpp_ops->vpo_start(inst->vri_private) != 0) {
+ libvarpd_instance_destroy((varpd_instance_handle_t *)inst);
+ return (EINVAL);
+ }
+
+ if (flags & OVERLAY_TARG_INFO_F_ACTIVE)
+ (void) libvarpd_overlay_disassociate(inst);
+
+ if (libvarpd_overlay_associate(inst) != 0) {
+ libvarpd_instance_destroy((varpd_instance_handle_t *)inst);
+ return (EINVAL);
+ }
+
+ if (flags & OVERLAY_TARG_INFO_F_DEGRADED) {
+ if ((err = libvarpd_overlay_restore(inst)) != 0) {
+ libvarpd_panic("failed to restore instance %p: %d\n",
+ inst, err);
+ }
+ }
+
+ mutex_enter(&inst->vri_lock);
+ inst->vri_flags |= VARPD_INSTANCE_F_ACTIVATED;
+ mutex_exit(&inst->vri_lock);
+
+ return (0);
+}
+
+static int
+libvarpd_persist_restore_one(varpd_impl_t *vip, int fd)
+{
+ int err;
+ size_t fsize;
+ struct stat st;
+ void *buf, *datap;
+ varpd_persist_header_t *hdr;
+ uint8_t md5[16];
+ nvlist_t *nvl;
+
+ if (fstat(fd, &st) != 0)
+ return (errno);
+
+ if (st.st_size <= sizeof (varpd_persist_header_t))
+ return (EINVAL);
+ fsize = st.st_size - sizeof (varpd_persist_header_t);
+
+ buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (buf == MAP_FAILED)
+ return (errno);
+
+ hdr = buf;
+ if (bcmp(varpd_persist_magic, hdr->vph_magic,
+ sizeof (varpd_persist_magic)) != 0) {
+ if (munmap(buf, st.st_size) != 0)
+ libvarpd_panic("failed to munmap %p: %d", buf, errno);
+ return (EINVAL);
+ }
+
+ if (hdr->vph_version != VARPD_PERSIST_VERSION_ONE) {
+ if (munmap(buf, st.st_size) != 0)
+ libvarpd_panic("failed to munmap %p: %d", buf, errno);
+ return (EINVAL);
+ }
+
+ datap = (void *)((uintptr_t)buf + sizeof (varpd_persist_header_t));
+ md5_calc(md5, datap, fsize);
+ if (bcmp(md5, hdr->vph_md5, sizeof (uint8_t) * 16) != 0) {
+ if (munmap(buf, st.st_size) != 0)
+ libvarpd_panic("failed to munmap %p: %d", buf, errno);
+ return (EINVAL);
+ }
+
+ err = nvlist_unpack(datap, fsize, &nvl, 0);
+ if (munmap(buf, st.st_size) != 0)
+ libvarpd_panic("failed to munmap %p: %d", buf, errno);
+
+ if (err != 0)
+ return (EINVAL);
+
+ err = libvarpd_persist_restore_instance(vip, nvl);
+ nvlist_free(nvl);
+ return (err);
+}
+
+/* ARGSUSED */
+static int
+libvarpd_check_degrade_cb(varpd_impl_t *vip, datalink_id_t linkid, void *arg)
+{
+ varpd_instance_t *inst;
+
+ mutex_enter(&vip->vdi_lock);
+ for (inst = avl_first(&vip->vdi_instances); inst != NULL;
+ inst = AVL_NEXT(&vip->vdi_instances, inst)) {
+ if (inst->vri_linkid == linkid) {
+ mutex_exit(&vip->vdi_lock);
+ return (0);
+ }
+ }
+
+ mutex_exit(&vip->vdi_lock);
+
+ (void) libvarpd_overlay_degrade_datalink(vip, linkid,
+ "no varpd instance exists");
+ return (0);
+}
+
+static void
+libvarpd_check_degrade(varpd_impl_t *vip)
+{
+ (void) libvarpd_overlay_iter(vip, libvarpd_check_degrade_cb, NULL);
+}
+
+int
+libvarpd_persist_restore(varpd_handle_t *vhp)
+{
+ int dirfd;
+ int ret = 0;
+ DIR *dirp = NULL;
+ struct dirent *dp;
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+
+ VERIFY0(rw_rdlock(&vip->vdi_pfdlock));
+ if ((dirfd = dup(vip->vdi_persistfd)) < 0) {
+ ret = errno;
+ goto out;
+ }
+
+ if ((dirp = fdopendir(dirfd)) == NULL) {
+ ret = errno;
+ if (close(dirfd) != 0)
+ libvarpd_panic("failed to close dirfd %d: %d",
+ dirfd, errno);
+ goto out;
+ }
+
+ for (;;) {
+ int fd;
+ uint64_t id;
+ char *eptr;
+ struct stat st;
+
+ errno = 0;
+ dp = readdir(dirp);
+ if (dp == NULL) {
+ ret = errno;
+ break;
+ }
+
+ if (strcmp(dp->d_name, ".") == 0 ||
+ strcmp(dp->d_name, "..") == 0)
+ continue;
+
+ /*
+ * Leave files that we don't recognize alone. A valid file has
+ * the format `%llu.varpd`.
+ */
+ errno = 0;
+ id = strtoull(dp->d_name, &eptr, 10);
+ if ((id == 0 && errno == EINVAL) ||
+ (id == ULLONG_MAX && errno == ERANGE))
+ continue;
+
+ if (strcmp(eptr, VARPD_PERSIST_SUFFIX) != 0)
+ continue;
+
+ fd = openat(vip->vdi_persistfd, dp->d_name, O_RDONLY);
+ if (fd < 0) {
+ ret = errno;
+ break;
+ }
+
+ if (fstat(fd, &st) != 0) {
+ ret = errno;
+ break;
+ }
+
+ if (!S_ISREG(st.st_mode)) {
+ if (close(fd) != 0)
+ libvarpd_panic("failed to close fd (%s) %d: "
+ "%d\n", dp->d_name, fd, errno);
+ continue;
+ }
+
+ ret = libvarpd_persist_restore_one(vip, fd);
+ if (close(fd) != 0)
+ libvarpd_panic("failed to close fd (%s) %d: "
+ "%d\n", dp->d_name, fd, errno);
+ /*
+ * This is an invalid file. We'll unlink it to save us this
+ * trouble in the future.
+ */
+ if (ret != 0) {
+ if (unlinkat(vip->vdi_persistfd, dp->d_name, 0) != 0) {
+ ret = errno;
+ break;
+ }
+ }
+ }
+
+ libvarpd_check_degrade(vip);
+
+out:
+ if (dirp != NULL)
+ (void) closedir(dirp);
+ VERIFY0(rw_unlock(&vip->vdi_pfdlock));
+ return (ret);
+}
+
+int
+libvarpd_persist_disable(varpd_handle_t *vhp)
+{
+ varpd_impl_t *vip = (varpd_impl_t *)vhp;
+
+ VERIFY0(rw_wrlock(&vip->vdi_pfdlock));
+ if (vip->vdi_persistfd == -1) {
+ mutex_exit(&vip->vdi_lock);
+ VERIFY0(rw_unlock(&vip->vdi_pfdlock));
+ return (ENOENT);
+ }
+ if (close(vip->vdi_persistfd) != 0)
+ libvarpd_panic("failed to close persist fd %d: %d",
+ vip->vdi_persistfd, errno);
+ vip->vdi_persistfd = -1;
+ VERIFY0(rw_unlock(&vip->vdi_pfdlock));
+ return (0);
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_plugin.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_plugin.c
new file mode 100644
index 0000000000..ac73286fdd
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_plugin.c
@@ -0,0 +1,269 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * varpd plugin management
+ */
+
+#include <libvarpd_impl.h>
+#include <errno.h>
+#include <umem.h>
+#include <assert.h>
+#include <strings.h>
+#include <dlfcn.h>
+#include <link.h>
+#include <stdio.h>
+#include <bunyan.h>
+
+static varpd_impl_t *varpd_load_handle;
+static const char *varpd_load_path;
+static mutex_t varpd_load_lock;
+static cond_t varpd_load_cv;
+
+int
+libvarpd_plugin_comparator(const void *lp, const void *rp)
+{
+ int ret;
+ const varpd_plugin_t *lpp, *rpp;
+
+ lpp = lp;
+ rpp = rp;
+
+ ret = strcmp(lpp->vpp_name, rpp->vpp_name);
+ if (ret > 0)
+ return (1);
+ if (ret < 0)
+ return (-1);
+ return (0);
+}
+
+varpd_plugin_register_t *
+libvarpd_plugin_alloc(uint_t version, int *errp)
+{
+ int err;
+ varpd_plugin_register_t *vprp;
+
+ if (errp == NULL)
+ errp = &err;
+
+ if (version != VARPD_VERSION_ONE) {
+ (void) bunyan_warn(varpd_load_handle->vdi_bunyan,
+ "unsupported registration version",
+ BUNYAN_T_STRING, "module_path", varpd_load_path,
+ BUNYAN_T_INT32, "module_version", version,
+ BUNYAN_T_END);
+ *errp = EINVAL;
+ return (NULL);
+ }
+
+ vprp = umem_alloc(sizeof (varpd_plugin_register_t), UMEM_DEFAULT);
+ if (vprp == NULL) {
+ (void) bunyan_warn(varpd_load_handle->vdi_bunyan,
+ "failed to allocate registration handle",
+ BUNYAN_T_STRING, "module_path", varpd_load_path,
+ BUNYAN_T_END);
+ *errp = ENOMEM;
+ return (NULL);
+ }
+
+ vprp->vpr_version = VARPD_VERSION_ONE;
+
+ return (vprp);
+}
+
+void
+libvarpd_plugin_free(varpd_plugin_register_t *vprp)
+{
+ umem_free(vprp, sizeof (varpd_plugin_register_t));
+}
+
+int
+libvarpd_plugin_register(varpd_plugin_register_t *vprp)
+{
+ varpd_plugin_t *vpp;
+ varpd_plugin_t lookup;
+
+ vpp = umem_alloc(sizeof (varpd_plugin_t), UMEM_DEFAULT);
+ if (vpp == NULL) {
+ (void) bunyan_warn(varpd_load_handle->vdi_bunyan,
+ "failed to allocate memory for the varpd_plugin_t",
+ BUNYAN_T_STRING, "module_path", varpd_load_path,
+ BUNYAN_T_END);
+ return (ENOMEM);
+ }
+
+ /* Watch out for an evil plugin */
+ if (vprp->vpr_version != VARPD_VERSION_ONE) {
+ (void) bunyan_warn(varpd_load_handle->vdi_bunyan,
+ "unsupported registration version",
+ BUNYAN_T_STRING, "module_path", varpd_load_path,
+ BUNYAN_T_INT32, "module_version", vprp->vpr_version,
+ BUNYAN_T_END);
+ return (EINVAL);
+ }
+
+ mutex_enter(&varpd_load_lock);
+ if (varpd_load_handle == NULL)
+ libvarpd_panic("varpd_load_handle was unexpectedly null");
+
+ mutex_enter(&varpd_load_handle->vdi_lock);
+ lookup.vpp_name = vprp->vpr_name;
+ if (avl_find(&varpd_load_handle->vdi_plugins, &lookup, NULL) != NULL) {
+ (void) bunyan_warn(varpd_load_handle->vdi_bunyan,
+ "module already exists with requested name",
+ BUNYAN_T_STRING, "module_path", varpd_load_path,
+ BUNYAN_T_STRING, "name", vprp->vpr_name,
+ BUNYAN_T_END);
+ mutex_exit(&varpd_load_handle->vdi_lock);
+ mutex_exit(&varpd_load_lock);
+ umem_free(vpp, sizeof (varpd_plugin_t));
+ return (EEXIST);
+ }
+ vpp->vpp_name = strdup(vprp->vpr_name);
+ if (vpp->vpp_name == NULL) {
+ (void) bunyan_warn(varpd_load_handle->vdi_bunyan,
+ "failed to allocate memory to duplicate name",
+ BUNYAN_T_STRING, "module_path", varpd_load_path,
+ BUNYAN_T_END);
+ mutex_exit(&varpd_load_handle->vdi_lock);
+ mutex_exit(&varpd_load_lock);
+ umem_free(vpp, sizeof (varpd_plugin_t));
+ return (ENOMEM);
+ }
+
+ vpp->vpp_mode = vprp->vpr_mode;
+ vpp->vpp_ops = vprp->vpr_ops;
+ if (mutex_init(&vpp->vpp_lock, USYNC_THREAD | LOCK_ERRORCHECK,
+ NULL) != 0)
+ libvarpd_panic("failed to create plugin's vpp_lock");
+ vpp->vpp_active = 0;
+ avl_add(&varpd_load_handle->vdi_plugins, vpp);
+ mutex_exit(&varpd_load_handle->vdi_lock);
+ mutex_exit(&varpd_load_lock);
+
+ return (0);
+}
+
+varpd_plugin_t *
+libvarpd_plugin_lookup(varpd_impl_t *vip, const char *name)
+{
+ varpd_plugin_t lookup, *ret;
+
+ lookup.vpp_name = name;
+ mutex_enter(&vip->vdi_lock);
+ ret = avl_find(&vip->vdi_plugins, &lookup, NULL);
+ mutex_exit(&vip->vdi_lock);
+
+ return (ret);
+}
+
+/* ARGSUSED */
+static int
+libvarpd_plugin_load_cb(varpd_impl_t *vip, const char *path, void *unused)
+{
+ void *dlp;
+
+ varpd_load_path = path;
+ dlp = dlopen(path, RTLD_LOCAL | RTLD_NOW);
+ if (dlp == NULL) {
+ (void) bunyan_error(vip->vdi_bunyan, "dlopen failed",
+ BUNYAN_T_STRING, "module path", path,
+ BUNYAN_T_END);
+ }
+ path = NULL;
+
+ return (0);
+}
+
+int
+libvarpd_plugin_load(varpd_handle_t *vph, const char *path)
+{
+ int ret = 0;
+ varpd_impl_t *vip = (varpd_impl_t *)vph;
+
+ if (vip == NULL || path == NULL)
+ return (EINVAL);
+ mutex_enter(&varpd_load_lock);
+ while (varpd_load_handle != NULL)
+ (void) cond_wait(&varpd_load_cv, &varpd_load_lock);
+ varpd_load_handle = vip;
+ mutex_exit(&varpd_load_lock);
+
+ ret = libvarpd_dirwalk(vip, path, ".so", libvarpd_plugin_load_cb, NULL);
+
+ mutex_enter(&varpd_load_lock);
+ varpd_load_handle = NULL;
+ (void) cond_signal(&varpd_load_cv);
+ mutex_exit(&varpd_load_lock);
+
+ return (ret);
+}
+
+int
+libvarpd_plugin_walk(varpd_handle_t *vph, libvarpd_plugin_walk_f func,
+ void *arg)
+{
+ varpd_impl_t *vip = (varpd_impl_t *)vph;
+ varpd_plugin_t *vpp;
+
+ mutex_enter(&vip->vdi_lock);
+ for (vpp = avl_first(&vip->vdi_plugins); vpp != NULL;
+ vpp = AVL_NEXT(&vip->vdi_plugins, vpp)) {
+ if (func(vph, vpp->vpp_name, arg) != 0) {
+ mutex_exit(&vip->vdi_lock);
+ return (1);
+ }
+ }
+ mutex_exit(&vip->vdi_lock);
+ return (0);
+}
+
+void
+libvarpd_plugin_init(void)
+{
+ if (mutex_init(&varpd_load_lock, USYNC_THREAD | LOCK_RECURSIVE |
+ LOCK_ERRORCHECK, NULL) != 0)
+ libvarpd_panic("failed to create varpd_load_lock");
+
+ if (cond_init(&varpd_load_cv, USYNC_THREAD, NULL) != 0)
+ libvarpd_panic("failed to create varpd_load_cv");
+
+ varpd_load_handle = NULL;
+}
+
+void
+libvarpd_plugin_fini(void)
+{
+ assert(varpd_load_handle == NULL);
+ if (mutex_destroy(&varpd_load_lock) != 0)
+ libvarpd_panic("failed to destroy varpd_load_lock");
+ if (cond_destroy(&varpd_load_cv) != 0)
+ libvarpd_panic("failed to destroy varpd_load_cv");
+}
+
+void
+libvarpd_plugin_prefork(void)
+{
+ mutex_enter(&varpd_load_lock);
+ while (varpd_load_handle != NULL)
+ (void) cond_wait(&varpd_load_cv, &varpd_load_lock);
+}
+
+void
+libvarpd_plugin_postfork(void)
+{
+ (void) cond_signal(&varpd_load_cv);
+ mutex_exit(&varpd_load_lock);
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_prop.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_prop.c
new file mode 100644
index 0000000000..abe65a8c5d
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_prop.c
@@ -0,0 +1,299 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * varpd property management
+ */
+
+#include <libvarpd_impl.h>
+#include <errno.h>
+#include <strings.h>
+#include <sys/mac.h>
+#include <umem.h>
+
+typedef struct varpd_prop_info {
+ varpd_impl_t *vprop_vip;
+ varpd_instance_t *vprop_instance;
+ uint_t vprop_type;
+ uint_t vprop_prot;
+ uint32_t vprop_defsize;
+ uint32_t vprop_psize;
+ char vprop_name[LIBVARPD_PROP_NAMELEN];
+ uint8_t vprop_default[LIBVARPD_PROP_SIZEMAX];
+ uint8_t vprop_poss[LIBVARPD_PROP_SIZEMAX];
+} varpd_prop_info_t;
+
+/* Internal Properties */
+static int varpd_nintprops = 1;
+static const char *varpd_intprops[] = {
+ "search"
+};
+
+static int
+libvarpd_prop_get_search(varpd_prop_info_t *infop, void *buf, uint32_t *sizep)
+{
+ varpd_plugin_t *vpp = infop->vprop_instance->vri_plugin;
+ size_t nlen;
+
+ nlen = strlen(vpp->vpp_name) + 1;
+ if (nlen > *sizep)
+ return (EOVERFLOW);
+ *sizep = nlen;
+ (void) strlcpy(buf, vpp->vpp_name, *sizep);
+ return (0);
+}
+
+void
+libvarpd_prop_set_name(varpd_prop_handle_t *phdl, const char *name)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+ (void) strlcpy(infop->vprop_name, name, OVERLAY_PROP_NAMELEN);
+}
+
+void
+libvarpd_prop_set_prot(varpd_prop_handle_t *phdl, overlay_prop_prot_t perm)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+ infop->vprop_prot = perm;
+}
+
+void
+libvarpd_prop_set_type(varpd_prop_handle_t *phdl, overlay_prop_type_t type)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+ infop->vprop_type = type;
+}
+
+int
+libvarpd_prop_set_default(varpd_prop_handle_t *phdl, void *buf, ssize_t len)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+
+ if (len > LIBVARPD_PROP_SIZEMAX)
+ return (E2BIG);
+
+ if (len < 0)
+ return (EOVERFLOW);
+
+ bcopy(buf, infop->vprop_default, len);
+ infop->vprop_defsize = len;
+ return (0);
+}
+
+void
+libvarpd_prop_set_nodefault(varpd_prop_handle_t *phdl)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+
+ infop->vprop_default[0] = '\0';
+ infop->vprop_defsize = 0;
+}
+
+void
+libvarpd_prop_set_range_uint32(varpd_prop_handle_t *phdl, uint32_t min,
+ uint32_t max)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+ mac_propval_range_t *rangep = (mac_propval_range_t *)infop->vprop_poss;
+
+ if (rangep->mpr_count != 0 && rangep->mpr_type != MAC_PROPVAL_UINT32)
+ return;
+
+ if (infop->vprop_psize + sizeof (mac_propval_uint32_range_t) >
+ sizeof (infop->vprop_poss))
+ return;
+
+ infop->vprop_psize += sizeof (mac_propval_uint32_range_t);
+ rangep->mpr_count++;
+ rangep->mpr_type = MAC_PROPVAL_UINT32;
+ rangep->u.mpr_uint32[rangep->mpr_count-1].mpur_min = min;
+ rangep->u.mpr_uint32[rangep->mpr_count-1].mpur_max = max;
+}
+
+void
+libvarpd_prop_set_range_str(varpd_prop_handle_t *phdl, const char *str)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+ size_t len = strlen(str) + 1; /* Account for a null terminator */
+ mac_propval_range_t *rangep = (mac_propval_range_t *)infop->vprop_poss;
+ mac_propval_str_range_t *pstr = &rangep->u.mpr_str;
+
+ if (rangep->mpr_count != 0 && rangep->mpr_type != MAC_PROPVAL_STR)
+ return;
+
+ if (infop->vprop_psize + len > sizeof (infop->vprop_poss))
+ return;
+
+ rangep->mpr_count++;
+ rangep->mpr_type = MAC_PROPVAL_STR;
+ (void) strlcpy((char *)&pstr->mpur_data[pstr->mpur_nextbyte], str,
+ sizeof (infop->vprop_poss) - infop->vprop_psize);
+ pstr->mpur_nextbyte += len;
+ infop->vprop_psize += len;
+}
+
+int
+libvarpd_prop_handle_alloc(varpd_handle_t *vph, varpd_instance_handle_t *inst,
+ varpd_prop_handle_t **phdlp)
+{
+ varpd_prop_info_t *infop;
+
+ infop = umem_alloc(sizeof (varpd_prop_info_t), UMEM_DEFAULT);
+ if (infop == NULL)
+ return (ENOMEM);
+
+ bzero(infop, sizeof (varpd_prop_info_t));
+ infop->vprop_vip = (varpd_impl_t *)vph;
+ infop->vprop_instance = (varpd_instance_t *)inst;
+
+ *phdlp = (varpd_prop_handle_t *)infop;
+ return (0);
+}
+
+void
+libvarpd_prop_handle_free(varpd_prop_handle_t *phdl)
+{
+ umem_free(phdl, sizeof (varpd_prop_info_t));
+}
+
+int
+libvarpd_prop_nprops(varpd_instance_handle_t *ihdl, uint_t *np)
+{
+ int ret;
+ varpd_instance_t *instp = (varpd_instance_t *)ihdl;
+
+ ret = instp->vri_plugin->vpp_ops->vpo_nprops(instp->vri_private, np);
+ if (ret != 0)
+ return (ret);
+ *np += varpd_nintprops;
+ return (0);
+}
+
+static int
+libvarpd_prop_info_fill_int_cb(varpd_handle_t *handle, const char *name,
+ void *arg)
+{
+ varpd_prop_handle_t *vph = arg;
+ libvarpd_prop_set_range_str(vph, name);
+ return (0);
+}
+
+static int
+libvarpd_prop_info_fill_int(varpd_prop_handle_t *vph, uint_t propid)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)vph;
+ if (propid >= varpd_nintprops)
+ abort();
+ libvarpd_prop_set_name(vph, varpd_intprops[0]);
+ libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_READ);
+ libvarpd_prop_set_type(vph, OVERLAY_PROP_T_STRING);
+ libvarpd_prop_set_nodefault(vph);
+ libvarpd_plugin_walk((varpd_handle_t *)infop->vprop_instance->vri_impl,
+ libvarpd_prop_info_fill_int_cb, vph);
+ return (0);
+}
+
+int
+libvarpd_prop_info_fill(varpd_prop_handle_t *phdl, uint_t propid)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+ varpd_instance_t *instp = infop->vprop_instance;
+ mac_propval_range_t *rangep = (mac_propval_range_t *)infop->vprop_poss;
+
+ infop->vprop_psize = sizeof (mac_propval_range_t);
+
+ bzero(rangep, sizeof (mac_propval_range_t));
+ if (propid < varpd_nintprops) {
+ return (libvarpd_prop_info_fill_int(phdl, propid));
+ } else {
+ varpd_plugin_t *vpp = instp->vri_plugin;
+ return (vpp->vpp_ops->vpo_propinfo(instp->vri_private,
+ propid - varpd_nintprops, phdl));
+ }
+}
+
+int
+libvarpd_prop_info(varpd_prop_handle_t *phdl, const char **namep,
+ uint_t *typep, uint_t *protp, const void **defp, uint32_t *sizep,
+ const mac_propval_range_t **possp)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+ if (namep != NULL)
+ *namep = infop->vprop_name;
+ if (typep != NULL)
+ *typep = infop->vprop_type;
+ if (protp != NULL)
+ *protp = infop->vprop_prot;
+ if (defp != NULL)
+ *defp = infop->vprop_default;
+ if (sizep != NULL)
+ *sizep = infop->vprop_psize;
+ if (possp != NULL)
+ *possp = (mac_propval_range_t *)infop->vprop_poss;
+ return (0);
+}
+
+int
+libvarpd_prop_get(varpd_prop_handle_t *phdl, void *buf, uint32_t *sizep)
+{
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+ varpd_instance_t *instp = infop->vprop_instance;
+
+ if (infop->vprop_name[0] == '\0')
+ return (EINVAL);
+
+ if (strcmp(varpd_intprops[0], infop->vprop_name) == 0) {
+ /* search property */
+ return (libvarpd_prop_get_search(infop, buf, sizep));
+ }
+
+ return (instp->vri_plugin->vpp_ops->vpo_getprop(instp->vri_private,
+ infop->vprop_name, buf, sizep));
+}
+
+int
+libvarpd_prop_set(varpd_prop_handle_t *phdl, const void *buf, uint32_t size)
+{
+ int i;
+ varpd_prop_info_t *infop = (varpd_prop_info_t *)phdl;
+ varpd_instance_t *instp = infop->vprop_instance;
+
+ if (infop->vprop_name[0] == '\0')
+ return (EINVAL);
+
+ for (i = 0; i < varpd_nintprops; i++) {
+ if (strcmp(infop->vprop_name, varpd_intprops[i]) == 0) {
+ return (EPERM);
+ }
+ }
+
+ return (instp->vri_plugin->vpp_ops->vpo_setprop(instp->vri_private,
+ infop->vprop_name, buf, size));
+}
+
+void
+libvarpd_prop_door_convert(const varpd_prop_handle_t *phdl,
+ varpd_client_propinfo_arg_t *vcfap)
+{
+ const varpd_prop_info_t *infop = (const varpd_prop_info_t *)phdl;
+
+ vcfap->vcfa_type = infop->vprop_type;
+ vcfap->vcfa_prot = infop->vprop_prot;
+ vcfap->vcfa_defsize = infop->vprop_defsize;
+ vcfap->vcfa_psize = infop->vprop_psize;
+ bcopy(infop->vprop_name, vcfap->vcfa_name, LIBVARPD_PROP_NAMELEN);
+ bcopy(infop->vprop_default, vcfap->vcfa_default, LIBVARPD_PROP_SIZEMAX);
+ bcopy(infop->vprop_poss, vcfap->vcfa_poss, LIBVARPD_PROP_SIZEMAX);
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_provider.h b/usr/src/lib/varpd/libvarpd/common/libvarpd_provider.h
new file mode 100644
index 0000000000..64fa99d308
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_provider.h
@@ -0,0 +1,419 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _LIBVARPD_PROVIDER_H
+#define _LIBVARPD_PROVIDER_H
+
+/*
+ * varpd provider interface for lookup modules
+ *
+ * This header file defines all the structures and functions that a given lookup
+ * module needs to implement and perform its purpose. At this time, all of these
+ * interfaces are considered private to illumos and therefore are subject to
+ * change. At some point we will move to more broadly stabilize these interfaces
+ * and commit to them. Until such time, expect breakage for out of gate
+ * consumers.
+ *
+ * A plugin is a dynamic shared object that is placed inside of varpd's default
+ * module.
+ *
+ * The shared object must define an initializer, such as with #pragma init. This
+ * function will be run with the module is dlopened by libvarpd. In that init
+ * function, the function must allocate a varpd_plugin_register by calling
+ * libvarpd_plugin_alloc() and specifying VARPD_CURRENT_VERSION. If that
+ * succeeds, then it should proceed to fill out the registration and then call,
+ * libvarpd_plugin_register() with it. Regardless of whether it succeeds or
+ * fails, it should call libvarpd_plugin_free(). In the case of failure, there
+ * is not much that the module should do, other than log some message to the
+ * standard bunyan logger that exists.
+ *
+ * Once libvarpd_plugin_register() returns, the module should assume that any
+ * of the operations it defined in the operation vector may be called and
+ * therefore it is recommended that any other required initialization should be
+ * performed at that time.
+ *
+ * At this time, once a plugin is loaded, it will not be unloaded. Therefore,
+ * there is no corresponding requirement to unregister, though that may come in
+ * a future version.
+ *
+ * -----------------------------
+ * Plugin Types and Destinations
+ * -----------------------------
+ *
+ * There are two different kinds of plugins in this world, there are point to
+ * point plugins and there are dynamic plugins. The key difference is in how
+ * packets are routed through the system. In a point to point plugin, a single
+ * destination is used when the instance is started. In dynamic plugins,
+ * destinations are looked up as they are required and an instance of a plugin
+ * is required to provide that.
+ *
+ * These point to point plugins define a type of OVERLAY_TARGET_POINT and the
+ * dynamic plugins instead define a type of OVERLAY_TARGET_DYNAMIC.
+ *
+ * Encapsulation plugins have multiple types of destinations. They may require
+ * an Ethernet address (OVERLAY_PLUGIN_D_ETHERNET), IP address
+ * (OVERLAY_PLUGIN_D_IP), and a port (OVERLAY_PLUGIN_D_PORT). For example,
+ * consider vxlan, it requires an IP and a port; while a hypothetical nvgre,
+ * would only require an IP.
+ *
+ * A plugin is allowed to describe which of these fields that it supports and
+ * given which encapsulation plugin it is paired with, it can support a varying
+ * degree of properties. For example, consider the example of the direct plugin.
+ * It has a notion of a destination port and a destination IP. If it is paired
+ * with a plugin that only requires an IP, then it wouldn't need to show a
+ * property that's related to a destination port.
+ *
+ * ------------------
+ * Plugin Definitions
+ * ------------------
+ *
+ * A plugin is required to fill in both an operations vector and a series of
+ * additional metadata that it passed in to libvarpd_plugin_register(). The
+ * following lists all of the routines and their purposes. The full signatures
+ * are available in the body of the header file.
+ *
+ * varpd_plugin_create_f
+ *
+ * Create a new instance of a plugin. Each instance refers to a different
+ * overlay device and thus a different overlay identifier. Each instance
+ * has its own property space and is unique. This function gives the chance
+ * for the plugin to create and provide any private data that it will
+ * require.
+ *
+ * In addition, the plugin is given the type of destination that is
+ * required and it is its job to determine whether or not it supports it.
+ *
+ * varpd_plugin_destory_f
+ *
+ * This is the opposite of varpd_plugin_create_f. It is called to allow the
+ * plugin to reclaim any resources with the private argument that it passed
+ * out as part of the destroy function.
+ *
+ * varpd_plugin_start_f
+ *
+ * This routine is called to indicate that an instance should be started.
+ * This is a plugin's chance to verify that it has all of its required
+ * properties set and to take care of any action that needs to be handled
+ * to begin the plugin. After this point it will be legal to have the
+ * varpd_plugin_default_f, varpd_plugin_lookup_f, varpd_plugin_arp_f and
+ * varpd_plugin_dhcp_f endpoints called.
+ *
+ * varpd_plugin_stop_f
+ *
+ * This routine is called to indicate that an instance is stopping, it is
+ * the opposite of varpd_plugin_start_f. This is a chance to clean up
+ * resources that are a side effect of having started the instance.
+ *
+ * varpd_plugin_default_f
+ *
+ * This routine is defined by plugins of type OVERLAY_TARGET_POINT. It is
+ * used to answer the question of where should all traffic for this
+ * instance be destined. Plugins of type OVERLAY_TARGET_DYNAMIC should
+ * leave this entry set to NULL.
+ *
+ * On success, the default routine should return VARPD_LOOKUP_OK. On
+ * failure, it should return the macro VARPD_LOOKUP_DROP.
+ *
+ * varpd_plugin_lookup_f
+ *
+ * This routine must be defined by plugins of type OVERLAY_TARGET_DYNAMIC.
+ * It is used to lookup the destination for a given request. Each request
+ * comes in with its own MAC address this allows a plugin to direct it to
+ * any remote location.
+ *
+ * This is designed as an asynchronous API. Once a lookup is completed it
+ * should call libvarpd_plugin_query_reply() and pass as the second
+ * argument either VARPD_LOOKUP_OK to indicate that it went alright or it
+ * should reply VARPD_LOOKUP_DROP to indicate that the packet should be
+ * dropped.
+ *
+ * In addition, there are several utility routines that can take care of
+ * various kinds of traffic automatically. For example, if an ARP, NDP, or
+ * DHCP packet comes in, there are utilities such as
+ * libvarpd_plugin_proxy_arp(), libvarpd_plugin_proxy_ndp() and
+ * libvarpd_plugin_proxy_dhcp(), which allows the system to do the heavy
+ * lifting of validating the packet once it finds that it matches certain
+ * properties.
+ *
+ * varpd_plugin_arp_f
+ *
+ * This is an optional entry for plugins of type OVERLAY_TARGET_DYNAMIC.
+ * This is called after a plugin calls libvarpd_plugin_proxy_arp() and is
+ * used to ask the plugin to perform an ARP or NDP query. The type of query
+ * is passed in in the third argument, the only valid value for which will
+ * be VARPD_QTYPE_ETHERNET, to indicate we're doing an Ethernet lookup.
+ *
+ * The layer three IP address that is being looked up will be included in
+ * the struct sockaddr. The sockaddr(3SOCKET)'s sa_family will be set to
+ * indicate the type, eg. AF_INET or AF_INET6 and that will indicate the
+ * kind of sockaddr that will be used. For more information see
+ * sockaddr(3SOCKET). The implementation ensures that enough space for the
+ * link layer address will exist.
+ *
+ * This is an asynchronous lookup. Once the answer has been written, a
+ * plugin should call libvarpd_plugin_arp_reply and if it was successful,
+ * VARPD_LOOKUP_OK should be passed in and if it failed, VARPD_LOOKUP_DROP
+ * should be passed in instead.
+ *
+ * varpd_plugin_dhcp_f
+ *
+ * This is an optional entry for plugins of type OVERLAY_TARGET_DYNAMIC.
+ * This is called after a plugin calls the libvarpd_plugin_proxy_dhcp() and
+ * is used to ask the plugin to determine where is the DHCP server that
+ * this packet should actually be sent to. What is happening here is that
+ * rather than broadcast the initial DHCP request, we instead unicast it to
+ * a specified DHCP server that this operation vector indicates.
+ *
+ * The plugin is given a type, the same as the ARP plugin which indicates
+ * the kind of link layer address, the only valid type is
+ * VARPD_QTYPE_ETHERNET, other types should be rejected. Then, like the arp
+ * entry point, the dhcp entry point should determine the link layer
+ * address of the DHCP server and write that out in the appropriate memory
+ * and call libvarpd_plugin_dhcp_reply() when done. Similar to the arp
+ * entry point, it should use VARPD_LOOKUP_OK to indicate that it was
+ * filled in and VARPD_LOOKUP_DROP to indicate that it was not.
+ *
+ * varpd_plugin_nprops_f
+ *
+ * This is used by a plugin to indicate the number of properties that
+ * should exist for this instance. Recall from the section that Plugin
+ * types and Destinations, that the number of entries here may vary. As
+ * such, the plugin should return the number that is appropriate for the
+ * instance.
+ *
+ * This number will be used to obtain information about a property via the
+ * propinfo functions. However, the getprop and setprop interfaces will
+ * always use names to indicate the property it is getting and setting.
+ * This difference is structured this way to deal with property discovery
+ * and to make the getprop and setprop interfaces slightly easier for other
+ * parts of the broader varpd/dladm infrastructure.
+ *
+ * varpd_plugin_propinfo_f
+ *
+ * This interface is used to get information about a property, the property
+ * that information is being requested for is being passed in via the
+ * second argument. Here, callers should set properties such as the name,
+ * the protection, whether or not the property is required, set any default
+ * value, if it exist, and if relevant, set the valid range of values.
+ *
+ * varpd_plugin_getprop_f
+ *
+ * This is used to get the value of a property, if it is set. The passed in
+ * length indicates the length of the buffer that is used for updating
+ * properties. If it is not of sufficient size, the function should return
+ * an error and not update the buffer. Otherwise, it should update the size
+ * pointer with the valid size.
+ *
+ * varpd_plugin_setprop_f
+ *
+ * This is used to set the value of a property. An endpoint should validate
+ * that the property is valid before updating it. In addition, it should
+ * update its state as appropriate.
+ *
+ * varpd_plugin_save_f
+ *
+ * This is used to serialize the state of a given instance of a plugin such
+ * that if varpd crashes, it can be recovered. The plugin should write all
+ * state into the nvlist that it is passed in, it may use any keys and
+ * values that it wants. The only consumer of that nvlist will be the
+ * plugin itself when the restore endpoint is called.
+ *
+ * varpd_plugin_restore_f
+ *
+ * This is called by the server to restore an instance that used to exist,
+ * but was lost due to a crash. This is a combination of calling create and
+ * setting properties. The plugin should restore any private state that it
+ * can find recorded from the nvlist. The only items in the nvlist will be
+ * those that were written out during a previous call to
+ * varpd_plugin_save_f.
+ *
+ *
+ * Once all of these interfaces are implemented, the plugin should define the
+ * following members in the varpd_plugin_register_t.
+ *
+ * vpr_version
+ *
+ * This indicates the version of the plugin. Plugins should set this to the
+ * macro VARPD_CURRENT_VERSION.
+ *
+ * vpr_mode
+ *
+ * This indicates the mode of the plugin. The plugin's mode should be one
+ * of OVERLAY_TARGET_POINT and OVERLAY_TARGET_DYNAMIC. For more discussion
+ * of these types and the differences, see the section on Plugin Types and
+ * Destinations.
+ *
+ * vpr_name
+ *
+ * This is the name of the plugin. This is how users will refer to it in
+ * the context of running dladm(1M) commands. Note, this name must be
+ * unique across the different plugins, as it will cause others with the
+ * same name not to be registered.
+ *
+ * vpr_ops
+ *
+ * This is the operations vector as described above. Importantly, the
+ * member vpo_callbacks must be set to zero, this is being used for future
+ * expansion of the structure.
+ *
+ *
+ * --------------------------------------------------
+ * Downcalls, Upcalls, and Synchronization Guarantees
+ * --------------------------------------------------
+ *
+ * Every instance of a plugin is independent. Calls into a plugin may be made
+ * for different instances in parallel. Any necessary locking is left to the
+ * plugin module. Within an instance, various calls may come in parallel.
+ *
+ * The primary guarantees are that none of the varpd_plugin_save_f,
+ * varpd_plugin_lookup_f, varpd_default_f, varpd_plugin_arp_f, and
+ * varpd_plugin_dhcp_f will be called until after a call to varpd_plugin_start_f
+ * has been called. Similarly, they will not be called after a call to
+ * vardp_plugin_stop_f.
+ *
+ * The functions documented in this header may be called back into from any
+ * context, including from the operation vectors.
+ */
+
+#include <bunyan.h>
+#include <libvarpd.h>
+#include <libnvpair.h>
+#include <sys/socket.h>
+#include <sys/overlay_target.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define VARPD_VERSION_ONE 1
+#define VARPD_CURRENT_VERSION VARPD_VERSION_ONE
+
+typedef struct __varpd_provier_handle varpd_provider_handle_t;
+typedef struct __varpd_query_handle varpd_query_handle_t;
+typedef struct __varpd_arp_handle varpd_arp_handle_t;
+typedef struct __varpd_dhcp_handle varpd_dhcp_handle_t;
+
+typedef int (*varpd_plugin_create_f)(varpd_provider_handle_t *, void **,
+ overlay_plugin_dest_t);
+typedef int (*varpd_plugin_start_f)(void *);
+typedef void (*varpd_plugin_stop_f)(void *);
+typedef void (*varpd_plugin_destroy_f)(void *);
+
+#define VARPD_LOOKUP_OK (0)
+#define VARPD_LOOKUP_DROP (-1)
+typedef int (*varpd_plugin_default_f)(void *, overlay_target_point_t *);
+typedef void (*varpd_plugin_lookup_f)(void *, varpd_query_handle_t *,
+ const overlay_targ_lookup_t *, overlay_target_point_t *);
+
+#define VARPD_QTYPE_ETHERNET 0x0
+typedef void (*varpd_plugin_arp_f)(void *, varpd_arp_handle_t *, int,
+ const struct sockaddr *, uint8_t *);
+typedef void (*varpd_plugin_dhcp_f)(void *, varpd_dhcp_handle_t *, int,
+ const overlay_targ_lookup_t *, uint8_t *);
+
+typedef int (*varpd_plugin_nprops_f)(void *, uint_t *);
+typedef int (*varpd_plugin_propinfo_f)(void *, const uint_t,
+ varpd_prop_handle_t *);
+typedef int (*varpd_plugin_getprop_f)(void *, const char *, void *, uint32_t *);
+typedef int (*varpd_plugin_setprop_f)(void *, const char *, const void *,
+ const uint32_t);
+
+typedef int (*varpd_plugin_save_f)(void *, nvlist_t *);
+typedef int (*varpd_plugin_restore_f)(nvlist_t *, varpd_provider_handle_t *,
+ overlay_plugin_dest_t, void **);
+
+typedef struct varpd_plugin_ops {
+ uint_t vpo_callbacks;
+ varpd_plugin_create_f vpo_create;
+ varpd_plugin_start_f vpo_start;
+ varpd_plugin_stop_f vpo_stop;
+ varpd_plugin_destroy_f vpo_destroy;
+ varpd_plugin_default_f vpo_default;
+ varpd_plugin_lookup_f vpo_lookup;
+ varpd_plugin_nprops_f vpo_nprops;
+ varpd_plugin_propinfo_f vpo_propinfo;
+ varpd_plugin_getprop_f vpo_getprop;
+ varpd_plugin_setprop_f vpo_setprop;
+ varpd_plugin_save_f vpo_save;
+ varpd_plugin_restore_f vpo_restore;
+ varpd_plugin_arp_f vpo_arp;
+ varpd_plugin_dhcp_f vpo_dhcp;
+} varpd_plugin_ops_t;
+
+typedef struct varpd_plugin_register {
+ uint_t vpr_version;
+ uint_t vpr_mode;
+ const char *vpr_name;
+ const varpd_plugin_ops_t *vpr_ops;
+} varpd_plugin_register_t;
+
+extern varpd_plugin_register_t *libvarpd_plugin_alloc(uint_t, int *);
+extern void libvarpd_plugin_free(varpd_plugin_register_t *);
+extern int libvarpd_plugin_register(varpd_plugin_register_t *);
+
+/*
+ * Blowing up and logging
+ */
+extern void libvarpd_panic(const char *, ...) __NORETURN;
+extern const bunyan_logger_t *libvarpd_plugin_bunyan(varpd_provider_handle_t *);
+
+/*
+ * Misc. Information APIs
+ */
+extern uint64_t libvarpd_plugin_vnetid(varpd_provider_handle_t *);
+
+/*
+ * Lookup Replying query and proxying
+ */
+extern void libvarpd_plugin_query_reply(varpd_query_handle_t *, int);
+
+extern void libvarpd_plugin_proxy_arp(varpd_provider_handle_t *,
+ varpd_query_handle_t *, const overlay_targ_lookup_t *);
+extern void libvarpd_plugin_proxy_ndp(varpd_provider_handle_t *,
+ varpd_query_handle_t *, const overlay_targ_lookup_t *);
+extern void libvarpd_plugin_arp_reply(varpd_arp_handle_t *, int);
+
+extern void libvarpd_plugin_proxy_dhcp(varpd_provider_handle_t *,
+ varpd_query_handle_t *, const overlay_targ_lookup_t *);
+extern void libvarpd_plugin_dhcp_reply(varpd_dhcp_handle_t *, int);
+
+
+/*
+ * Property information callbacks
+ */
+extern void libvarpd_prop_set_name(varpd_prop_handle_t *, const char *);
+extern void libvarpd_prop_set_prot(varpd_prop_handle_t *, overlay_prop_prot_t);
+extern void libvarpd_prop_set_type(varpd_prop_handle_t *, overlay_prop_type_t);
+extern int libvarpd_prop_set_default(varpd_prop_handle_t *, void *, ssize_t);
+extern void libvarpd_prop_set_nodefault(varpd_prop_handle_t *);
+extern void libvarpd_prop_set_range_uint32(varpd_prop_handle_t *, uint32_t,
+ uint32_t);
+extern void libvarpd_prop_set_range_str(varpd_prop_handle_t *, const char *);
+
+/*
+ * Various injecting and invalidation routines
+ */
+extern void libvarpd_inject_varp(varpd_provider_handle_t *, const uint8_t *,
+ const overlay_target_point_t *);
+extern void libvarpd_inject_arp(varpd_provider_handle_t *, const uint16_t,
+ const uint8_t *, const struct in_addr *, const uint8_t *);
+extern void libvarpd_fma_degrade(varpd_provider_handle_t *, const char *);
+extern void libvarpd_fma_restore(varpd_provider_handle_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBVARPD_PROVIDER_H */
diff --git a/usr/src/lib/varpd/libvarpd/common/libvarpd_util.c b/usr/src/lib/varpd/libvarpd/common/libvarpd_util.c
new file mode 100644
index 0000000000..e21fed2126
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/libvarpd_util.c
@@ -0,0 +1,97 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <libvarpd_impl.h>
+#include <assert.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+const char *
+libvarpd_isaext(void)
+{
+#if defined(__sparc)
+#if defined(__sparcv9)
+ return ("64");
+#else /* __sparcv9 */
+ return ("");
+#endif /* __sparvc9 */
+#elif defined(__amd64)
+ return ("64");
+#elif defined(__i386)
+ return ("");
+#else
+#error "unkonwn ISA"
+#endif
+}
+
+int
+libvarpd_dirwalk(varpd_impl_t *vip, const char *path, const char *suffix,
+ libvarpd_dirwalk_f func, void *arg)
+{
+ int ret;
+ size_t slen;
+ char *dirpath, *filepath;
+ DIR *dirp;
+ struct dirent *dp;
+ assert(vip != NULL && path != NULL);
+
+ if (asprintf(&dirpath, "%s/%s", path, libvarpd_isaext()) == -1)
+ return (errno);
+
+ if ((dirp = opendir(dirpath)) == NULL) {
+ ret = errno;
+ return (ret);
+ }
+
+ slen = strlen(suffix);
+ for (;;) {
+ size_t len;
+
+ errno = 0;
+ dp = readdir(dirp);
+ if (dp == NULL) {
+ ret = errno;
+ break;
+ }
+
+ len = strlen(dp->d_name);
+ if (len <= slen)
+ continue;
+
+ if (strcmp(suffix, dp->d_name + (len - slen)) != 0)
+ continue;
+
+ if (asprintf(&filepath, "%s/%s", dirpath, dp->d_name) == -1) {
+ ret = errno;
+ break;
+ }
+
+ if (func(vip, filepath, arg) != 0) {
+ free(filepath);
+ ret = 0;
+ break;
+ }
+
+ free(filepath);
+ }
+
+ (void) closedir(dirp);
+ free(dirpath);
+ return (ret);
+}
diff --git a/usr/src/lib/varpd/libvarpd/common/llib-lvarpd b/usr/src/lib/varpd/libvarpd/common/llib-lvarpd
new file mode 100644
index 0000000000..85150d3463
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/llib-lvarpd
@@ -0,0 +1,19 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/* LINTLIBRARY */
+/* PROTOLIB1 */
+
+#include <libvarpd.h>
diff --git a/usr/src/lib/varpd/libvarpd/common/mapfile-plugin b/usr/src/lib/varpd/libvarpd/common/mapfile-plugin
new file mode 100644
index 0000000000..8cef7f669f
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/mapfile-plugin
@@ -0,0 +1,57 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_SCOPE {
+ global:
+ libvarpd_fma_degrade { FLAGS = EXTERN };
+ libvarpd_inject_arp { FLAGS = EXTERN };
+ libvarpd_inject_ndp { FLAGS = EXTERN };
+ libvarpd_inject_varp { FLAGS = EXTERN };
+ libvarpd_fma_restore { FLAGS = EXTERN };
+ libvarpd_panic { FLAGS = EXTERN };
+ libvarpd_plugin_alloc { FLAGS = EXTERN };
+ libvarpd_plugin_arp_reply { FLAGS = EXTERN };
+ libvarpd_plugin_dhcp_reply { FLAGS = EXTERN };
+ libvarpd_plugin_free { FLAGS = EXTERN };
+ libvarpd_plugin_proxy_arp { FLAGS = EXTERN };
+ libvarpd_plugin_proxy_dhcp { FLAGS = EXTERN };
+ libvarpd_plugin_proxy_ndp { FLAGS = EXTERN };
+ libvarpd_plugin_query_reply { FLAGS = EXTERN };
+ libvarpd_plugin_register { FLAGS = EXTERN };
+ libvarpd_plugin_vnetid { FLAGS = EXTERN };
+ libvarpd_prop_set_name { FLAGS = EXTERN };
+ libvarpd_prop_set_prot { FLAGS = EXTERN };
+ libvarpd_prop_set_type { FLAGS = EXTERN };
+ libvarpd_prop_set_default { FLAGS = EXTERN };
+ libvarpd_prop_set_nodefault { FLAGS = EXTERN };
+ libvarpd_prop_set_range_uint32 { FLAGS = EXTERN };
+ libvarpd_prop_set_rangestr { FLAGS = EXTERN };
+};
diff --git a/usr/src/lib/varpd/libvarpd/common/mapfile-vers b/usr/src/lib/varpd/libvarpd/common/mapfile-vers
new file mode 100644
index 0000000000..7aa930cb54
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/common/mapfile-vers
@@ -0,0 +1,113 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION SUNWprivate {
+ global:
+ libvarpd_c_create;
+ libvarpd_c_destroy;
+ libvarpd_c_instance_activate;
+ libvarpd_c_instance_create;
+ libvarpd_c_instance_destroy;
+ libvarpd_c_prop_nprops;
+ libvarpd_c_prop_handle_alloc;
+ libvarpd_c_prop_handle_free;
+ libvarpd_c_prop_info_fill;
+ libvarpd_c_prop_info_fill_by_name;
+ libvarpd_c_prop_info;
+ libvarpd_c_prop_get;
+ libvarpd_c_prop_set;
+
+ libvarpd_c_instance_lookup;
+ libvarpd_c_instance_target_mode;
+ libvarpd_c_instance_cache_flush;
+ libvarpd_c_instance_cache_delete;
+ libvarpd_c_instance_cache_get;
+ libvarpd_c_instance_cache_set;
+ libvarpd_c_instance_cache_walk;
+
+ libvarpd_create;
+ libvarpd_destroy;
+
+ libvarpd_door_server_create;
+ libvarpd_door_server_destroy;
+
+ libvarpd_fma_degrade;
+ libvarpd_fma_restore;
+
+ libvarpd_inject_varp;
+ libvarpd_inject_arp;
+
+ libvarpd_instance_activate;
+ libvarpd_instance_create;
+ libvarpd_instance_destroy;
+ libvarpd_instance_lookup;
+ libvarpd_instance_id;
+
+ libvarpd_panic;
+
+ libvarpd_persist_disable;
+ libvarpd_persist_enable;
+ libvarpd_persist_restore;
+
+ libvarpd_plugin_alloc;
+ libvarpd_plugin_load;
+ libvarpd_plugin_free;
+ libvarpd_plugin_arp_reply;
+ libvarpd_plugin_dhcp_reply;
+ libvarpd_plugin_query_reply;
+ libvarpd_plugin_proxy_arp;
+ libvarpd_plugin_proxy_dhcp;
+ libvarpd_plugin_proxy_ndp;
+ libvarpd_plugin_register;
+ libvarpd_plugin_walk;
+ libvarpd_plugin_vnetid;
+
+ libvarpd_prop_set_default;
+ libvarpd_prop_set_nodefault;
+ libvarpd_prop_set_name;
+ libvarpd_prop_set_prot;
+ libvarpd_prop_set_range_uint32;
+ libvarpd_prop_set_range_str;
+ libvarpd_prop_set_type;
+
+ libvarpd_prop_handle_alloc;
+ libvarpd_prop_handle_free;
+ libvarpd_prop_nprops;
+ libvarpd_prop_info_fill;
+ libvarpd_prop_info;
+ libvarpd_prop_get;
+ libvarpd_prop_set;
+
+ libvarpd_overlay_lookup_quiesce;
+ libvarpd_overlay_lookup_run;
+ local:
+ *;
+};
diff --git a/usr/src/lib/varpd/libvarpd/i386/Makefile b/usr/src/lib/varpd/libvarpd/i386/Makefile
new file mode 100644
index 0000000000..f2b4f63da5
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/i386/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/varpd/libvarpd/sparc/Makefile b/usr/src/lib/varpd/libvarpd/sparc/Makefile
new file mode 100644
index 0000000000..f2b4f63da5
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/sparc/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/varpd/libvarpd/sparcv9/Makefile b/usr/src/lib/varpd/libvarpd/sparcv9/Makefile
new file mode 100644
index 0000000000..d552642882
--- /dev/null
+++ b/usr/src/lib/varpd/libvarpd/sparcv9/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+include ../Makefile.com
+include ../../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/varpd/svp/Makefile b/usr/src/lib/varpd/svp/Makefile
new file mode 100644
index 0000000000..275f07bf8b
--- /dev/null
+++ b/usr/src/lib/varpd/svp/Makefile
@@ -0,0 +1,40 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+include ../../Makefile.lib
+
+SUBDIRS = $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+install := TARGET = install
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber install lint: $(SUBDIRS)
+
+install_h:
+
+check:
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
+
+include ../../Makefile.targ
diff --git a/usr/src/lib/varpd/svp/Makefile.com b/usr/src/lib/varpd/svp/Makefile.com
new file mode 100644
index 0000000000..15b6540e74
--- /dev/null
+++ b/usr/src/lib/varpd/svp/Makefile.com
@@ -0,0 +1,54 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+LIBRARY = libvarpd_svp.a
+VERS = .1
+OBJECTS = libvarpd_svp.o \
+ libvarpd_svp_conn.o \
+ libvarpd_svp_crc.o \
+ libvarpd_svp_host.o \
+ libvarpd_svp_loop.o \
+ libvarpd_svp_remote.o \
+ libvarpd_svp_shootdown.o \
+ libvarpd_svp_timer.o
+
+include ../../../Makefile.lib
+include ../../Makefile.plugin
+
+LIBS = $(DYNLIB)
+
+#
+# Yes, this isn't a command, but libcmdutils does have the list(9F)
+# functions and better to use that then compile list.o yet again
+# ourselves... probably.
+#
+LDLIBS += -lc -lumem -lnvpair -lsocket -lnsl -lavl \
+ -lcmdutils -lidspace -lbunyan
+CPPFLAGS += -I../common
+
+LINTFLAGS += -erroff=E_BAD_PTR_CAST_ALIGN
+LINTFLAGS64 += -erroff=E_BAD_PTR_CAST_ALIGN
+SRCDIR = ../common
+
+C99MODE= -xc99=%all
+C99LMODE= -Xc99=%all
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+lint: lintcheck
+
+include ../../../Makefile.targ
diff --git a/usr/src/lib/varpd/svp/amd64/Makefile b/usr/src/lib/varpd/svp/amd64/Makefile
new file mode 100644
index 0000000000..d552642882
--- /dev/null
+++ b/usr/src/lib/varpd/svp/amd64/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+include ../Makefile.com
+include ../../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp.c b/usr/src/lib/varpd/svp/common/libvarpd_svp.c
new file mode 100644
index 0000000000..58828065a1
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/libvarpd_svp.c
@@ -0,0 +1,1138 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015, Joyent, Inc.
+ */
+
+/*
+ * This plugin implements the SDC VXLAN Protocol (SVP).
+ *
+ * This plugin is designed to work with a broader distributed system that
+ * mainains a database of mappings and provides a means of looking up data and
+ * provides a stream of updates. While it is named after VXLAN, there isn't
+ * anything specific to VXLAN baked into the protocol at this time, other than
+ * that it requires both an IP address and a port; however, if there's a good
+ * reason to support others here, we can modify that.
+ *
+ * -----------
+ * Terminology
+ * -----------
+ *
+ * Throughout this module we refer to a few different kinds of addresses:
+ *
+ * VL3
+ *
+ * A VL3 address, or virtual layer 3, refers to the layer three addreses
+ * that are used by entities on an overlay network. As far as we're
+ * concerned that means that this is the IP address of an interface on an
+ * overlay network.
+ *
+ * VL2
+ *
+ * A VL2 address, or a virtual layer 2, referes to the link-layer addresses
+ * that are used by entities on an overlay network. As far as we're
+ * concerned that means that this is the MAC addresses of an interface on
+ * an overlay network.
+ *
+ * UL3
+ *
+ * A UL3, or underlay layer 3, refers to the layer three (IP) address on
+ * the underlay network.
+ *
+ * The svp plugin provides lookups from VL3->VL2, eg. the equivalent of an ARP
+ * or NDP query, and then also provides VL2->UL3 lookups.
+ *
+ * -------------------
+ * Protocol Operations
+ * -------------------
+ *
+ * The svp protocol is defined in lib/varpd/svp/common/libvarpd_svp_prot.h. It
+ * defines the basic TCP protocol that we use to communicate to hosts. At this
+ * time, it is not quite 100% implemented in both this plug-in and our primary
+ * server, sdc-portolan (see https://github.com/joyent/sdc-portolan).
+ *
+ * At this time, we don't quite support everything that we need to. Including
+ * the SVP_R_BULK_REQ and SVP_R_SHOOTDOWN.
+ *
+ * ---------------------------------
+ * General Design and Considerations
+ * ---------------------------------
+ *
+ * Every instance of the svp plugin requires the hostname and port of a server
+ * to contact. Though, we have co-opted the port 1296 (the year of the oldest
+ * extant portolan) as our default port.
+ *
+ * Each of the different instance of the plugins has a corresponding remote
+ * backend. The remote backend represents the tuple of the [ host, port ].
+ * Different instances that share the same host and port tuple will use the same
+ * backend.
+ *
+ * The backend is actually in charge of performing lookups, resolving and
+ * updating the set of remote hosts based on the DNS resolution we've been
+ * provided, and taking care of things like shootdowns.
+ *
+ * The whole plugin itself maintains an event loop and a number of threads to
+ * service that event loop. On top of that event loop, we have a simple timer
+ * backend that ticks at one second intervals and performs various callbacks,
+ * such as idle query timers, DNS resolution, connection backoff, etc. Each of
+ * the remote hosts that we obtain is wrapped up in an svp_conn_t, which manages
+ * the connection state, reconnecting, etc.
+ *
+ * All in all, the general way that this all looks like is:
+ *
+ * +----------------------------+
+ * | Plugin Instance |
+ * | svp_t |
+ * | |
+ * | varpd_provider_handle_t * -+-> varpd handle
+ * | uint64_t ----+-> varpd ID
+ * | char * ----+-> remote host
+ * | uint16_t ----+-> remote port
+ * | svp_remote_t * ---+------+-> remote backend
+ * +---------------------+------+
+ * |
+ * v
+ * +----------------------+ +----------------+
+ * | Remote backend |------------------>| Remove Backend |---> ...
+ * | svp_remote_t | | svp_remote_t |
+ * | | +----------------+
+ * | svp_remote_state_t --+-> state flags
+ * | svp_degrade_state_t -+-> degraded reason
+ * | struct addrinfo * --+-> resolved hosts
+ * | uint_t ---+-> active hosts
+ * | uint_t ---+-> DNS generation
+ * | uint_t ---+-> Reference count
+ * | uint_t ---+-> active conns
+ * | uint_t ---+-> degraded conns
+ * | list_t ---+---+-> connection list
+ * +------------------+---+
+ * |
+ * +------------------------------+-----------------+
+ * | | |
+ * v v v
+ * +-------------------+ +----------------
+ * | SVP Connection | | SVP connection | ...
+ * | svp_conn_t | | svp_conn_t |
+ * | | +----------------+
+ * | svp_event_t ----+-> event loop handle
+ * | svp_timer_t ----+-> backoff timer
+ * | svp_timer_t ----+-> query timer
+ * | int ----+-> socket fd
+ * | uint_t ----+-> generation
+ * | uint_t ----+-> current backoff
+ * | svp_conn_flags_t -+-> connection flags
+ * | svp_conn_state_t -+-> connection state
+ * | svp_conn_error_t -+-> connection error
+ * | int ---+-> last errrno
+ * | hrtime_t ---+-> activity timestamp
+ * | svp_conn_out_t ---+-> outgoing data state
+ * | svp_conn_in_t ---+-> incoming data state
+ * | list_t ---+--+-> active queries
+ * +----------------+--+
+ * |
+ * +----------------------------------+-----------------+
+ * | | |
+ * v v v
+ * +--------------------+ +-------------+
+ * | SVP Query | | SVP Query | ...
+ * | svp_query_t | | svp_query_t |
+ * | | +-------------+
+ * | svp_query_f ---+-> callback function
+ * | void * ---+-> callback arg
+ * | svp_query_state_t -+-> state flags
+ * | svp_req_t ---+-> svp prot. header
+ * | svp_query_data_t --+-> read data
+ * | svp_query_data_t --+-> write data
+ * | svp_status_t ---+-> request status
+ * +--------------------+
+ *
+ * The svp_t is the instance that we assoicate with varpd. The instance itself
+ * maintains properties and then when it's started associates with an
+ * svp_remote_t, which is the remote backend. The remote backend itself,
+ * maintains the DNS state and spins up and downs connections based on the
+ * results from DNS. By default, we query DNS every 30 seconds. For more on the
+ * connection life cycle, see the next section.
+ *
+ * By default, each connection maintains its own back off timer and list of
+ * queries it's servicing. Only one request is generally outstanding at a time
+ * and requests are round robined across the various connections.
+ *
+ * The query itself represents the svp request that's going on and keep track of
+ * its state and is a place for data that's read and written to as part of the
+ * request.
+ *
+ * Connections maintain a query timer such that if we have not received data on
+ * a socket for a certain amount of time, we kill that socket and begin a
+ * reconnection cycle with backoff.
+ *
+ * ------------------------
+ * Connection State Machine
+ * ------------------------
+ *
+ * We have a connection pool that's built upon DNS records. DNS describes the
+ * membership of the set of remote peers that make up our pool and we maintain
+ * one connection to each of them. In addition, we maintain an exponential
+ * backoff for each peer and will attempt to reconect immediately before backing
+ * off. The following are the valid states that a connection can be in:
+ *
+ * SVP_CS_ERROR An OS error has occurred on this connection,
+ * such as failure to create a socket or associate
+ * the socket with an event port. We also
+ * transition all connections to this state before
+ * we destroy them.
+ *
+ * SVP_CS_INITIAL This is the initial state of a connection, all
+ * that should exist is an unbound socket.
+ *
+ * SVP_CS_CONNECTING A call to connect has been made and we are
+ * polling for it to complete.
+ *
+ * SVP_CS_BACKOFF A connect attempt has failed and we are
+ * currently backing off, waiting to try again.
+ *
+ * SVP_CS_ACTIVE We have successfully connected to the remote
+ * system.
+ *
+ * SVP_CS_WINDDOWN This connection is going to valhalla. In other
+ * words, a previously active connection is no
+ * longer valid in DNS, so we should curb our use
+ * of it, and reap it as soon as we have other
+ * active connections.
+ *
+ * The following diagram attempts to describe our state transition scheme, and
+ * when we transition from one state to the next.
+ *
+ * |
+ * * New remote IP from DNS resolution,
+ * | not currently active in the system.
+ * |
+ * v Socket Error,
+ * +----------------+ still in DNS
+ * +----------------<---| SVP_CS_INITIAL |<----------------------*-----+
+ * | +----------------+ |
+ * | System | |
+ * | Connection . . . . . success * Successful |
+ * | failed . | connect() |
+ * | +----*---------+ | +-----------*--+ |
+ * | | | | | | |
+ * | V ^ v ^ V ^
+ * | +----------------+ +-------------------+ +---------------+
+ * +<-| SVP_CS_BACKOFF | | SVP_CS_CONNECTING | | SVP_CS_ACTIVE |
+ * | +----------------+ +-------------------+ +---------------+
+ * | V ^ V V V
+ * | Backoff wait * | | | * Removed
+ * v interval +--------------+ +-----------------<-----+ | from DNS
+ * | finished | |
+ * | V |
+ * | | V
+ * | | +-----------------+
+ * +----------------+----------<-----+-------<----| SVP_CS_WINDDOWN |
+ * | +-----------------+
+ * * . . . Fatal system, not
+ * | socket error or
+ * V quiesced after
+ * +--------------+ removal from DNS
+ * | SVP_CS_ERROR |
+ * +--------------+
+ * |
+ * * . . . Removed from DNS
+ * v
+ * +------------+
+ * | Connection |
+ * | Destroyed |
+ * +------------+
+ *
+ * --------------------------
+ * Connection Event Injection
+ * --------------------------
+ *
+ * For each connection that exists in the system, we have a timer in place that
+ * is in charge of performing timeout activity. It fires once every thirty
+ * seconds or so for a given connection and checks to ensure that we have had
+ * activity for the most recent query on the connection. If not, it terminates
+ * the connection. This is important as if we have sent all our data and are
+ * waiting for the remote end to reply, without enabling something like TCP
+ * keep-alive, we will not be notified that anything that has happened to the
+ * remote connection, for example a panic. In addition, this also protects
+ * against a server that is up, but a portolan that is not making forward
+ * progress.
+ *
+ * When a timeout occurs, we first try to disassociate any active events, which
+ * by definition must exist. Once that's done, we inject a port source user
+ * event. Now, there is a small gotcha. Let's assume for a moment that we have a
+ * pathological portolan. That means that it knows to inject activity right at
+ * the time out window. That means, that the event may be disassociated before
+ * we could get to it. If that's the case, we must _not_ inject the user event
+ * and instead, we'll let the pending event take care of it. We know that the
+ * pending event hasn't hit the main part of the loop yet, otherwise, it would
+ * have released the lock protecting our state and associated the event.
+ *
+ * ------------
+ * Notes on DNS
+ * ------------
+ *
+ * Unfortunately, doing host name resolution in a way that allows us to leverage
+ * the system's resolvers and the system's caching, require us to make blocking
+ * calls in libc via getaddrinfo(3SOCKET). If we can't reach a given server,
+ * that will tie up a thread for quite some time. To work around that fact,
+ * we're going to create a fixed number of threads and we'll use them to service
+ * our DNS requests. While this isn't ideal, until we have a sane means of
+ * integrating a DNS resolution into an event loop with say portfs, it's not
+ * going to be a fun day no matter what we do.
+ *
+ * ------
+ * Timers
+ * ------
+ *
+ * We maintain a single timer based on CLOCK_REALTIME. It's designed to fire
+ * every second. While we'd rather use CLOCK_HIGHRES just to alleviate ourselves
+ * from timer drift; however, as zones may not actually have CLOCK_HIGHRES
+ * access, we don't want them to end up in there. The timer itself is just a
+ * simple avl tree sorted by expiration time, which is stored as a tick in the
+ * future, a tick is just one second.
+ *
+ * ----------
+ * Shootdowns
+ * ----------
+ *
+ * As part of the protocol, we need to be able to handle shootdowns that inform
+ * us some of the information in the system is out of date. This information
+ * needs to be processed promptly; however, the information is hopefully going
+ * to be relatively infrequent relative to the normal flow of information.
+ *
+ * The shoot down information needs to be done on a per-backend basis. The
+ * general design is that we'll have a single query for this which can fire on a
+ * 5-10s period, we randmoize the latter part to give us a bit more load
+ * spreading. If we complete because there's no work to do, then we wait the
+ * normal period. If we complete, but there's still work to do, we'll go again
+ * after a second.
+ *
+ * A shootdown has a few different parts. We first receive a list of items to
+ * shootdown. After performing all of those, we need to acknowledge them. When
+ * that's been done successfully, we can move onto the next part. From a
+ * protocol perspective, we make a SVP_R_LOG_REQ, we get a reply, and then after
+ * processing them, send an SVP_R_LOG_RM. Only once that's been acked do we
+ * continue.
+ *
+ * However, one of the challenges that we have is that these invalidations are
+ * just that, an invalidation. For a virtual layer two request, that's fine,
+ * because the kernel supports that. However, for virtual layer three
+ * invalidations, we have a bit more work to do. These protocols, ARP and NDP,
+ * don't really support a notion of just an invalidation, instead you have to
+ * inject the new data in a gratuitous fashion.
+ *
+ * To that end, what we instead do is when we receive a VL3 invalidation, we
+ * turn that info a VL3 request. We hold the general request as outstanding
+ * until we receive all of the callbacks for the VL3 invalidations, at which
+ * point we go through and do the log removal request.
+ */
+
+#include <umem.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <libnvpair.h>
+#include <strings.h>
+#include <string.h>
+#include <assert.h>
+#include <unistd.h>
+
+#include <libvarpd_provider.h>
+#include "libvarpd_svp.h"
+
+bunyan_logger_t *svp_bunyan;
+static int svp_defport = 1296;
+static int svp_defuport = 1339;
+static umem_cache_t *svp_lookup_cache;
+
+typedef enum svp_lookup_type {
+ SVP_L_UNKNOWN = 0x0,
+ SVP_L_VL2 = 0x1,
+ SVP_L_VL3 = 0x2
+} svp_lookup_type_t;
+
+typedef struct svp_lookup {
+ int svl_type;
+ union {
+ struct svl_lookup_vl2 {
+ varpd_query_handle_t *svl_handle;
+ overlay_target_point_t *svl_point;
+ } svl_vl2;
+ struct svl_lookup_vl3 {
+ varpd_arp_handle_t *svl_vah;
+ uint8_t *svl_out;
+ } svl_vl3;
+ } svl_u;
+ svp_query_t svl_query;
+} svp_lookup_t;
+
+static const char *varpd_svp_props[] = {
+ "svp/host",
+ "svp/port",
+ "svp/underlay_ip",
+ "svp/underlay_port"
+};
+
+static const uint8_t svp_bcast[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+int
+svp_comparator(const void *l, const void *r)
+{
+ const svp_t *ls = l;
+ const svp_t *rs = r;
+
+ if (ls->svp_vid > rs->svp_vid)
+ return (1);
+ if (ls->svp_vid < rs->svp_vid)
+ return (-1);
+ return (0);
+}
+
+static void
+svp_vl2_lookup_cb(svp_t *svp, svp_status_t status, const struct in6_addr *uip,
+ const uint16_t uport, void *arg)
+{
+ svp_lookup_t *svl = arg;
+ overlay_target_point_t *otp;
+
+ assert(svp != NULL);
+ assert(arg != NULL);
+
+ if (status != SVP_S_OK) {
+ libvarpd_plugin_query_reply(svl->svl_u.svl_vl2.svl_handle,
+ VARPD_LOOKUP_DROP);
+ umem_cache_free(svp_lookup_cache, svl);
+ return;
+ }
+
+ otp = svl->svl_u.svl_vl2.svl_point;
+ bcopy(uip, &otp->otp_ip, sizeof (struct in6_addr));
+ otp->otp_port = uport;
+ libvarpd_plugin_query_reply(svl->svl_u.svl_vl2.svl_handle,
+ VARPD_LOOKUP_OK);
+ umem_cache_free(svp_lookup_cache, svl);
+}
+
+static void
+svp_vl3_lookup_cb(svp_t *svp, svp_status_t status, const uint8_t *vl2mac,
+ const struct in6_addr *uip, const uint16_t uport, void *arg)
+{
+ overlay_target_point_t point;
+ svp_lookup_t *svl = arg;
+
+ assert(svp != NULL);
+ assert(svl != NULL);
+
+ if (status != SVP_S_OK) {
+ libvarpd_plugin_arp_reply(svl->svl_u.svl_vl3.svl_vah,
+ VARPD_LOOKUP_DROP);
+ umem_cache_free(svp_lookup_cache, svl);
+ return;
+ }
+
+ /* Inject the L2 mapping before the L3 */
+ bcopy(uip, &point.otp_ip, sizeof (struct in6_addr));
+ point.otp_port = uport;
+ libvarpd_inject_varp(svp->svp_hdl, vl2mac, &point);
+
+ bcopy(vl2mac, svl->svl_u.svl_vl3.svl_out, ETHERADDRL);
+ libvarpd_plugin_arp_reply(svl->svl_u.svl_vl3.svl_vah,
+ VARPD_LOOKUP_OK);
+ umem_cache_free(svp_lookup_cache, svl);
+}
+
+static void
+svp_vl2_invalidate_cb(svp_t *svp, const uint8_t *vl2mac)
+{
+ libvarpd_inject_varp(svp->svp_hdl, vl2mac, NULL);
+}
+
+static void
+svp_vl3_inject_cb(svp_t *svp, const uint16_t vlan, const struct in6_addr *vl3ip,
+ const uint8_t *vl2mac, const uint8_t *targmac)
+{
+ struct in_addr v4;
+
+ /*
+ * At the moment we don't support any IPv6 related log entries, this
+ * will change soon as we develop a bit more of the IPv6 related
+ * infrastructure so we can properly test the injection.
+ */
+ if (IN6_IS_ADDR_V4MAPPED(vl3ip) == 0) {
+ return;
+ } else {
+ IN6_V4MAPPED_TO_INADDR(vl3ip, &v4);
+ if (targmac == NULL)
+ targmac = svp_bcast;
+ libvarpd_inject_arp(svp->svp_hdl, vlan, vl2mac, &v4, targmac);
+ }
+}
+
+/* ARGSUSED */
+static void
+svp_shootdown_cb(svp_t *svp, const uint8_t *vl2mac, const struct in6_addr *uip,
+ const uint16_t uport)
+{
+ /*
+ * We should probably do a conditional invlaidation here.
+ */
+ libvarpd_inject_varp(svp->svp_hdl, vl2mac, NULL);
+}
+
+static svp_cb_t svp_defops = {
+ svp_vl2_lookup_cb,
+ svp_vl3_lookup_cb,
+ svp_vl2_invalidate_cb,
+ svp_vl3_inject_cb,
+ svp_shootdown_cb
+};
+
+static boolean_t
+varpd_svp_valid_dest(overlay_plugin_dest_t dest)
+{
+ if (dest != (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+static int
+varpd_svp_create(varpd_provider_handle_t *hdl, void **outp,
+ overlay_plugin_dest_t dest)
+{
+ int ret;
+ svp_t *svp;
+
+ if (varpd_svp_valid_dest(dest) == B_FALSE)
+ return (ENOTSUP);
+
+ svp = umem_zalloc(sizeof (svp_t), UMEM_DEFAULT);
+ if (svp == NULL)
+ return (ENOMEM);
+
+ if ((ret = mutex_init(&svp->svp_lock, USYNC_THREAD | LOCK_ERRORCHECK,
+ NULL)) != 0) {
+ umem_free(svp, sizeof (svp_t));
+ return (ret);
+ }
+
+ svp->svp_port = svp_defport;
+ svp->svp_uport = svp_defuport;
+ svp->svp_cb = svp_defops;
+ svp->svp_hdl = hdl;
+ svp->svp_vid = libvarpd_plugin_vnetid(svp->svp_hdl);
+ *outp = svp;
+ return (0);
+}
+
+static int
+varpd_svp_start(void *arg)
+{
+ int ret;
+ svp_remote_t *srp;
+ svp_t *svp = arg;
+
+ mutex_enter(&svp->svp_lock);
+ if (svp->svp_host == NULL || svp->svp_port == 0 ||
+ svp->svp_huip == B_FALSE || svp->svp_uport == 0) {
+ mutex_exit(&svp->svp_lock);
+ return (EAGAIN);
+ }
+ mutex_exit(&svp->svp_lock);
+
+ if ((ret = svp_remote_find(svp->svp_host, svp->svp_port, &svp->svp_uip,
+ &srp)) != 0)
+ return (ret);
+
+ if ((ret = svp_remote_attach(srp, svp)) != 0) {
+ svp_remote_release(srp);
+ return (ret);
+ }
+
+ return (0);
+}
+
+static void
+varpd_svp_stop(void *arg)
+{
+ svp_t *svp = arg;
+
+ svp_remote_detach(svp);
+}
+
+static void
+varpd_svp_destroy(void *arg)
+{
+ svp_t *svp = arg;
+
+ if (svp->svp_host != NULL)
+ umem_free(svp->svp_host, strlen(svp->svp_host) + 1);
+
+ if (mutex_destroy(&svp->svp_lock) != 0)
+ libvarpd_panic("failed to destroy svp_t`svp_lock");
+
+ umem_free(svp, sizeof (svp_t));
+}
+
+static void
+varpd_svp_lookup(void *arg, varpd_query_handle_t *vqh,
+ const overlay_targ_lookup_t *otl, overlay_target_point_t *otp)
+{
+ svp_lookup_t *slp;
+ svp_t *svp = arg;
+
+ /*
+ * Check if this is something that we need to proxy, eg. arp or ndp.
+ */
+ if (otl->otl_sap == ETHERTYPE_ARP) {
+ libvarpd_plugin_proxy_arp(svp->svp_hdl, vqh, otl);
+ return;
+ }
+
+ if (otl->otl_dstaddr[0] == 0x33 &&
+ otl->otl_dstaddr[1] == 0x33) {
+ if (otl->otl_sap == ETHERTYPE_IPV6) {
+ libvarpd_plugin_proxy_ndp(svp->svp_hdl, vqh, otl);
+ } else {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ }
+ return;
+ }
+
+ /*
+ * Watch out for various multicast and broadcast addresses. We've
+ * already taken care of the IPv6 range above. Now we just need to
+ * handle broadcast and if the multicast bit is set, lowest bit of the
+ * first octet of the MAC, then we drop it now.
+ */
+ if (bcmp(otl->otl_dstaddr, svp_bcast, ETHERADDRL) == 0 ||
+ (otl->otl_dstaddr[0] & 0x01) == 0x01) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ /*
+ * If we have a failure to allocate memory for this, that's not good.
+ * However, telling the kernel to just drop this packet is much better
+ * than the alternative at this moment. At least we'll try again and we
+ * may have something more available to us in a little bit.
+ */
+ slp = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT);
+ if (slp == NULL) {
+ libvarpd_plugin_query_reply(vqh, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ slp->svl_type = SVP_L_VL2;
+ slp->svl_u.svl_vl2.svl_handle = vqh;
+ slp->svl_u.svl_vl2.svl_point = otp;
+
+ svp_remote_vl2_lookup(svp, &slp->svl_query, otl->otl_dstaddr, slp);
+}
+
+/* ARGSUSED */
+static int
+varpd_svp_nprops(void *arg, uint_t *nprops)
+{
+ *nprops = sizeof (varpd_svp_props) / sizeof (char *);
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+varpd_svp_propinfo(void *arg, uint_t propid, varpd_prop_handle_t *vph)
+{
+ switch (propid) {
+ case 0:
+ /* svp/host */
+ libvarpd_prop_set_name(vph, varpd_svp_props[0]);
+ libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
+ libvarpd_prop_set_type(vph, OVERLAY_PROP_T_STRING);
+ libvarpd_prop_set_nodefault(vph);
+ break;
+ case 1:
+ /* svp/port */
+ libvarpd_prop_set_name(vph, varpd_svp_props[1]);
+ libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
+ libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
+ (void) libvarpd_prop_set_default(vph, &svp_defport,
+ sizeof (svp_defport));
+ libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX);
+ break;
+ case 2:
+ /* svp/underlay_ip */
+ libvarpd_prop_set_name(vph, varpd_svp_props[2]);
+ libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
+ libvarpd_prop_set_type(vph, OVERLAY_PROP_T_IP);
+ libvarpd_prop_set_nodefault(vph);
+ break;
+ case 3:
+ /* svp/underlay_port */
+ libvarpd_prop_set_name(vph, varpd_svp_props[3]);
+ libvarpd_prop_set_prot(vph, OVERLAY_PROP_PERM_RRW);
+ libvarpd_prop_set_type(vph, OVERLAY_PROP_T_UINT);
+ (void) libvarpd_prop_set_default(vph, &svp_defuport,
+ sizeof (svp_defuport));
+ libvarpd_prop_set_range_uint32(vph, 1, UINT16_MAX);
+ break;
+ default:
+ return (EINVAL);
+ }
+ return (0);
+}
+
+static int
+varpd_svp_getprop(void *arg, const char *pname, void *buf, uint32_t *sizep)
+{
+ svp_t *svp = arg;
+
+ /* svp/host */
+ if (strcmp(pname, varpd_svp_props[0]) == 0) {
+ size_t len;
+
+ mutex_enter(&svp->svp_lock);
+ if (svp->svp_host == NULL) {
+ *sizep = 0;
+ } else {
+ len = strlen(svp->svp_host) + 1;
+ if (*sizep < len) {
+ mutex_exit(&svp->svp_lock);
+ return (EOVERFLOW);
+ }
+ *sizep = len;
+ (void) strlcpy(buf, svp->svp_host, *sizep);
+ }
+ mutex_exit(&svp->svp_lock);
+ return (0);
+ }
+
+ /* svp/port */
+ if (strcmp(pname, varpd_svp_props[1]) == 0) {
+ uint64_t val;
+
+ if (*sizep < sizeof (uint64_t))
+ return (EOVERFLOW);
+
+ mutex_enter(&svp->svp_lock);
+ if (svp->svp_port == 0) {
+ *sizep = 0;
+ } else {
+ val = svp->svp_port;
+ bcopy(&val, buf, sizeof (uint64_t));
+ *sizep = sizeof (uint64_t);
+ }
+
+ mutex_exit(&svp->svp_lock);
+ return (0);
+ }
+
+ /* svp/underlay_ip */
+ if (strcmp(pname, varpd_svp_props[2]) == 0) {
+ if (*sizep > sizeof (struct in6_addr))
+ return (EOVERFLOW);
+ mutex_enter(&svp->svp_lock);
+ if (svp->svp_huip == B_FALSE) {
+ *sizep = 0;
+ } else {
+ bcopy(&svp->svp_uip, buf, sizeof (struct in6_addr));
+ *sizep = sizeof (struct in6_addr);
+ }
+ return (0);
+ }
+
+ /* svp/underlay_port */
+ if (strcmp(pname, varpd_svp_props[3]) == 0) {
+ uint64_t val;
+
+ if (*sizep < sizeof (uint64_t))
+ return (EOVERFLOW);
+
+ mutex_enter(&svp->svp_lock);
+ if (svp->svp_uport == 0) {
+ *sizep = 0;
+ } else {
+ val = svp->svp_uport;
+ bcopy(&val, buf, sizeof (uint64_t));
+ *sizep = sizeof (uint64_t);
+ }
+
+ mutex_exit(&svp->svp_lock);
+ return (0);
+ }
+
+ return (EINVAL);
+}
+
+static int
+varpd_svp_setprop(void *arg, const char *pname, const void *buf,
+ const uint32_t size)
+{
+ svp_t *svp = arg;
+
+ /* svp/host */
+ if (strcmp(pname, varpd_svp_props[0]) == 0) {
+ char *dup;
+ dup = umem_alloc(size, UMEM_DEFAULT);
+ (void) strlcpy(dup, buf, size);
+ if (dup == NULL)
+ return (ENOMEM);
+ mutex_enter(&svp->svp_lock);
+ if (svp->svp_host != NULL)
+ umem_free(svp->svp_host, strlen(svp->svp_host) + 1);
+ svp->svp_host = dup;
+ mutex_exit(&svp->svp_lock);
+ return (0);
+ }
+
+ /* svp/port */
+ if (strcmp(pname, varpd_svp_props[1]) == 0) {
+ const uint64_t *valp = buf;
+ if (size < sizeof (uint64_t))
+ return (EOVERFLOW);
+
+ if (*valp == 0 || *valp > UINT16_MAX)
+ return (EINVAL);
+
+ mutex_enter(&svp->svp_lock);
+ svp->svp_port = (uint16_t)*valp;
+ mutex_exit(&svp->svp_lock);
+ return (0);
+ }
+
+ /* svp/underlay_ip */
+ if (strcmp(pname, varpd_svp_props[2]) == 0) {
+ const struct in6_addr *ipv6 = buf;
+
+ if (size < sizeof (struct in6_addr))
+ return (EOVERFLOW);
+
+ if (IN6_IS_ADDR_V4COMPAT(ipv6))
+ return (EINVAL);
+
+ if (IN6_IS_ADDR_MULTICAST(ipv6))
+ return (EINVAL);
+
+ if (IN6_IS_ADDR_6TO4(ipv6))
+ return (EINVAL);
+
+ if (IN6_IS_ADDR_V4MAPPED(ipv6)) {
+ ipaddr_t v4;
+ IN6_V4MAPPED_TO_IPADDR(ipv6, v4);
+ if (IN_MULTICAST(v4))
+ return (EINVAL);
+ }
+
+ mutex_enter(&svp->svp_lock);
+ bcopy(buf, &svp->svp_uip, sizeof (struct in6_addr));
+ svp->svp_huip = B_TRUE;
+ mutex_exit(&svp->svp_lock);
+ return (0);
+ }
+
+ /* svp/underlay_port */
+ if (strcmp(pname, varpd_svp_props[3]) == 0) {
+ const uint64_t *valp = buf;
+ if (size < sizeof (uint64_t))
+ return (EOVERFLOW);
+
+ if (*valp == 0 || *valp > UINT16_MAX)
+ return (EINVAL);
+
+ mutex_enter(&svp->svp_lock);
+ svp->svp_uport = (uint16_t)*valp;
+ mutex_exit(&svp->svp_lock);
+
+ return (0);
+ }
+
+ return (EINVAL);
+}
+
+static int
+varpd_svp_save(void *arg, nvlist_t *nvp)
+{
+ int ret;
+ svp_t *svp = arg;
+
+ mutex_enter(&svp->svp_lock);
+ if (svp->svp_host != NULL) {
+ if ((ret = nvlist_add_string(nvp, varpd_svp_props[0],
+ svp->svp_host)) != 0) {
+ mutex_exit(&svp->svp_lock);
+ return (ret);
+ }
+ }
+
+ if (svp->svp_port != 0) {
+ if ((ret = nvlist_add_uint16(nvp, varpd_svp_props[1],
+ svp->svp_port)) != 0) {
+ mutex_exit(&svp->svp_lock);
+ return (ret);
+ }
+ }
+
+ if (svp->svp_huip == B_TRUE) {
+ char buf[INET6_ADDRSTRLEN];
+
+ if (inet_ntop(AF_INET6, &svp->svp_uip, buf, sizeof (buf)) ==
+ NULL)
+ libvarpd_panic("unexpected inet_ntop failure: %d",
+ errno);
+
+ if ((ret = nvlist_add_string(nvp, varpd_svp_props[2],
+ buf)) != 0) {
+ mutex_exit(&svp->svp_lock);
+ return (ret);
+ }
+ }
+
+ if (svp->svp_uport != 0) {
+ if ((ret = nvlist_add_uint16(nvp, varpd_svp_props[3],
+ svp->svp_uport)) != 0) {
+ mutex_exit(&svp->svp_lock);
+ return (ret);
+ }
+ }
+
+ mutex_exit(&svp->svp_lock);
+ return (0);
+}
+
+static int
+varpd_svp_restore(nvlist_t *nvp, varpd_provider_handle_t *hdl,
+ overlay_plugin_dest_t dest, void **outp)
+{
+ int ret;
+ svp_t *svp;
+ char *ipstr, *hstr;
+
+ if (varpd_svp_valid_dest(dest) == B_FALSE)
+ return (ENOTSUP);
+
+ if ((ret = varpd_svp_create(hdl, (void **)&svp, dest)) != 0)
+ return (ret);
+
+ if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[0],
+ &hstr)) != 0) {
+ if (ret != ENOENT) {
+ varpd_svp_destroy(svp);
+ return (ret);
+ }
+ svp->svp_host = NULL;
+ } else {
+ size_t blen = strlen(hstr) + 1;
+ svp->svp_host = umem_alloc(blen, UMEM_DEFAULT);
+ (void) strlcpy(svp->svp_host, hstr, blen);
+ }
+
+ if ((ret = nvlist_lookup_uint16(nvp, varpd_svp_props[1],
+ &svp->svp_port)) != 0) {
+ if (ret != ENOENT) {
+ varpd_svp_destroy(svp);
+ return (ret);
+ }
+ svp->svp_port = 0;
+ }
+
+ if ((ret = nvlist_lookup_string(nvp, varpd_svp_props[2],
+ &ipstr)) != 0) {
+ if (ret != ENOENT) {
+ varpd_svp_destroy(svp);
+ return (ret);
+ }
+ svp->svp_huip = B_FALSE;
+ } else {
+ ret = inet_pton(AF_INET6, ipstr, &svp->svp_uip);
+ if (ret == -1) {
+ assert(errno == EAFNOSUPPORT);
+ libvarpd_panic("unexpected inet_pton failure: %d",
+ errno);
+ }
+
+ if (ret == 0) {
+ varpd_svp_destroy(svp);
+ return (EINVAL);
+ }
+ svp->svp_huip = B_TRUE;
+ }
+
+ if ((ret = nvlist_lookup_uint16(nvp, varpd_svp_props[3],
+ &svp->svp_uport)) != 0) {
+ if (ret != ENOENT) {
+ varpd_svp_destroy(svp);
+ return (ret);
+ }
+ svp->svp_uport = 0;
+ }
+
+ svp->svp_hdl = hdl;
+ *outp = svp;
+ return (0);
+}
+
+static void
+varpd_svp_arp(void *arg, varpd_arp_handle_t *vah, int type,
+ const struct sockaddr *sock, uint8_t *out)
+{
+ svp_t *svp = arg;
+ svp_lookup_t *svl;
+
+ if (type != VARPD_QTYPE_ETHERNET) {
+ libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ svl = umem_cache_alloc(svp_lookup_cache, UMEM_DEFAULT);
+ if (svl == NULL) {
+ libvarpd_plugin_arp_reply(vah, VARPD_LOOKUP_DROP);
+ return;
+ }
+
+ svl->svl_type = SVP_L_VL3;
+ svl->svl_u.svl_vl3.svl_vah = vah;
+ svl->svl_u.svl_vl3.svl_out = out;
+ svp_remote_vl3_lookup(svp, &svl->svl_query, sock, svl);
+}
+
+static const varpd_plugin_ops_t varpd_svp_ops = {
+ 0,
+ varpd_svp_create,
+ varpd_svp_start,
+ varpd_svp_stop,
+ varpd_svp_destroy,
+ NULL,
+ varpd_svp_lookup,
+ varpd_svp_nprops,
+ varpd_svp_propinfo,
+ varpd_svp_getprop,
+ varpd_svp_setprop,
+ varpd_svp_save,
+ varpd_svp_restore,
+ varpd_svp_arp,
+ NULL
+};
+
+static int
+svp_bunyan_init(void)
+{
+ int ret;
+
+ if ((ret = bunyan_init("svp", &svp_bunyan)) != 0)
+ return (ret);
+ ret = bunyan_stream_add(svp_bunyan, "stderr", BUNYAN_L_INFO,
+ bunyan_stream_fd, (void *)STDERR_FILENO);
+ if (ret != 0)
+ bunyan_fini(svp_bunyan);
+ return (ret);
+}
+
+static void
+svp_bunyan_fini(void)
+{
+ if (svp_bunyan != NULL)
+ bunyan_fini(svp_bunyan);
+}
+
+#pragma init(varpd_svp_init)
+static void
+varpd_svp_init(void)
+{
+ int err;
+ varpd_plugin_register_t *vpr;
+
+ if (svp_bunyan_init() != 0)
+ return;
+
+ if ((err = svp_host_init()) != 0) {
+ (void) bunyan_error(svp_bunyan, "failed to init host subsystem",
+ BUNYAN_T_INT32, "error", err,
+ BUNYAN_T_END);
+ svp_bunyan_fini();
+ return;
+ }
+
+ svp_lookup_cache = umem_cache_create("svp_lookup",
+ sizeof (svp_lookup_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+ if (svp_lookup_cache == NULL) {
+ (void) bunyan_error(svp_bunyan,
+ "failed to create svp_lookup cache",
+ BUNYAN_T_INT32, "error", errno,
+ BUNYAN_T_END);
+ svp_bunyan_fini();
+ return;
+ }
+
+ if ((err = svp_event_init()) != 0) {
+ (void) bunyan_error(svp_bunyan,
+ "failed to init event subsystem",
+ BUNYAN_T_INT32, "error", err,
+ BUNYAN_T_END);
+ svp_bunyan_fini();
+ umem_cache_destroy(svp_lookup_cache);
+ return;
+ }
+
+ if ((err = svp_timer_init()) != 0) {
+ (void) bunyan_error(svp_bunyan,
+ "failed to init timer subsystem",
+ BUNYAN_T_INT32, "error", err,
+ BUNYAN_T_END);
+ svp_event_fini();
+ umem_cache_destroy(svp_lookup_cache);
+ svp_bunyan_fini();
+ return;
+ }
+
+ if ((err = svp_remote_init()) != 0) {
+ (void) bunyan_error(svp_bunyan,
+ "failed to init remote subsystem",
+ BUNYAN_T_INT32, "error", err,
+ BUNYAN_T_END);
+ svp_event_fini();
+ umem_cache_destroy(svp_lookup_cache);
+ svp_bunyan_fini();
+ return;
+ }
+
+ vpr = libvarpd_plugin_alloc(VARPD_CURRENT_VERSION, &err);
+ if (vpr == NULL) {
+ (void) bunyan_error(svp_bunyan,
+ "failed to alloc varpd plugin",
+ BUNYAN_T_INT32, "error", err,
+ BUNYAN_T_END);
+ svp_remote_fini();
+ svp_event_fini();
+ umem_cache_destroy(svp_lookup_cache);
+ svp_bunyan_fini();
+ return;
+ }
+
+ vpr->vpr_mode = OVERLAY_TARGET_DYNAMIC;
+ vpr->vpr_name = "svp";
+ vpr->vpr_ops = &varpd_svp_ops;
+
+ if ((err = libvarpd_plugin_register(vpr)) != 0) {
+ (void) bunyan_error(svp_bunyan,
+ "failed to register varpd plugin",
+ BUNYAN_T_INT32, "error", err,
+ BUNYAN_T_END);
+ svp_remote_fini();
+ svp_event_fini();
+ umem_cache_destroy(svp_lookup_cache);
+ svp_bunyan_fini();
+
+ }
+ libvarpd_plugin_free(vpr);
+}
diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp.h b/usr/src/lib/varpd/svp/common/libvarpd_svp.h
new file mode 100644
index 0000000000..8192b842ce
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/libvarpd_svp.h
@@ -0,0 +1,357 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _LIBVARPD_SVP_H
+#define _LIBVARPD_SVP_H
+
+/*
+ * Implementation details of the SVP plugin and the SVP protocol.
+ */
+
+#include <netinet/in.h>
+#include <sys/ethernet.h>
+#include <thread.h>
+#include <synch.h>
+#include <libvarpd_provider.h>
+#include <sys/avl.h>
+#include <port.h>
+#include <sys/list.h>
+#include <bunyan.h>
+
+#include <libvarpd_svp_prot.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct svp svp_t;
+typedef struct svp_remote svp_remote_t;
+typedef struct svp_conn svp_conn_t;
+typedef struct svp_query svp_query_t;
+
+typedef void (*svp_event_f)(port_event_t *, void *);
+
+typedef struct svp_event {
+ svp_event_f se_func;
+ void *se_arg;
+ int se_events;
+} svp_event_t;
+
+typedef void (*svp_timer_f)(void *);
+
+typedef struct svp_timer {
+ svp_timer_f st_func; /* Timer callback function */
+ void *st_arg; /* Timer callback arg */
+ boolean_t st_oneshot; /* Is timer a one shot? */
+ uint32_t st_value; /* periodic or one-shot time */
+ /* Fields below here are private to the svp_timer implementaiton */
+ uint64_t st_expire; /* Next expiration */
+ boolean_t st_delivering; /* Are we currently delivering this */
+ avl_node_t st_link;
+} svp_timer_t;
+
+/*
+ * Note, both the svp_log_ack_t and svp_lrm_req_t are not part of this structure
+ * as they are rather variable sized data and we don't want to constrain their
+ * size. Instead, the rdata and wdata members must be set appropriately.
+ */
+typedef union svp_query_data {
+ svp_vl2_req_t sqd_vl2r;
+ svp_vl2_ack_t sqd_vl2a;
+ svp_vl3_req_t sdq_vl3r;
+ svp_vl3_ack_t sdq_vl3a;
+ svp_log_req_t sdq_logr;
+ svp_lrm_ack_t sdq_lrma;
+} svp_query_data_t;
+
+typedef void (*svp_query_f)(svp_query_t *, void *);
+
+typedef enum svp_query_state {
+ SVP_QUERY_INIT = 0x00,
+ SVP_QUERY_WRITING = 0x01,
+ SVP_QUERY_READING = 0x02,
+ SVP_QUERY_FINISHED = 0x03
+} svp_query_state_t;
+
+/*
+ * The query structure is usable for all forms of svp queries that end up
+ * getting passed across. Right now it's optimized for the fixed size data
+ * requests as opposed to requests whose responses will always be streaming in
+ * nature. Though, the streaming requests are the less common ones we have. We
+ * may need to make additional changes for those.
+ */
+struct svp_query {
+ list_node_t sq_lnode; /* List entry */
+ svp_query_f sq_func; /* Callback function */
+ svp_query_state_t sq_state; /* Query state */
+ void *sq_arg; /* Callback function arg */
+ svp_t *sq_svp; /* Pointer back to svp_t */
+ svp_req_t sq_header; /* Header for the query */
+ svp_query_data_t sq_rdun; /* Union for read data */
+ svp_query_data_t sq_wdun; /* Union for write data */
+ svp_status_t sq_status; /* Query response status */
+ size_t sq_size; /* Query response size */
+ void *sq_rdata; /* Read data pointer */
+ size_t sq_rsize; /* Read data size */
+ void *sq_wdata; /* Write data pointer */
+ size_t sq_wsize; /* Write data size */
+ hrtime_t sq_acttime; /* Last I/O activity time */
+};
+
+typedef enum svp_conn_state {
+ SVP_CS_ERROR = 0x00,
+ SVP_CS_INITIAL = 0x01,
+ SVP_CS_CONNECTING = 0x02,
+ SVP_CS_BACKOFF = 0x03,
+ SVP_CS_ACTIVE = 0x04,
+ SVP_CS_WINDDOWN = 0x05
+} svp_conn_state_t;
+
+typedef enum svp_conn_error {
+ SVP_CE_NONE = 0x00,
+ SVP_CE_ASSOCIATE = 0x01,
+ SVP_CE_NOPOLLOUT = 0x02,
+ SVP_CE_SOCKET = 0x03
+} svp_conn_error_t;
+
+typedef enum svp_conn_flags {
+ SVP_CF_ADDED = 0x01,
+ SVP_CF_DEGRADED = 0x02,
+ SVP_CF_REAP = 0x04,
+ SVP_CF_TEARDOWN = 0x08,
+ SVP_CF_UFLAG = 0x0c,
+ SVP_CF_USER = 0x10
+} svp_conn_flags_t;
+
+typedef struct svp_conn_out {
+ svp_query_t *sco_query;
+ size_t sco_offset;
+} svp_conn_out_t;
+
+typedef struct svp_conn_in {
+ svp_query_t *sci_query;
+ svp_req_t sci_req;
+ size_t sci_offset;
+} svp_conn_in_t;
+
+struct svp_conn {
+ svp_remote_t *sc_remote; /* RO */
+ struct in6_addr sc_addr; /* RO */
+ list_node_t sc_rlist; /* svp_remote_t`sr_lock */
+ mutex_t sc_lock;
+ svp_event_t sc_event;
+ svp_timer_t sc_btimer;
+ svp_timer_t sc_qtimer;
+ int sc_socket;
+ uint_t sc_gen;
+ uint_t sc_nbackoff;
+ svp_conn_flags_t sc_flags;
+ svp_conn_state_t sc_cstate;
+ svp_conn_error_t sc_error;
+ int sc_errno;
+ list_t sc_queries;
+ svp_conn_out_t sc_output;
+ svp_conn_in_t sc_input;
+};
+
+typedef enum svp_remote_state {
+ SVP_RS_LOOKUP_SCHEDULED = 0x01, /* On the DNS Queue */
+ SVP_RS_LOOKUP_INPROGRESS = 0x02, /* Doing a DNS lookup */
+ SVP_RS_LOOKUP_VALID = 0x04 /* addrinfo valid */
+} svp_remote_state_t;
+
+/*
+ * These series of bit-based flags should be ordered such that the most severe
+ * is first. We only can set one message that user land can see, so if more than
+ * one is set we want to make sure that one is there.
+ */
+typedef enum svp_degrade_state {
+ SVP_RD_DNS_FAIL = 0x01, /* DNS Resolution Failure */
+ SVP_RD_REMOTE_FAIL = 0x02, /* cannot reach any remote peers */
+ SVP_RD_ALL = 0x03 /* Only suitable for restore */
+} svp_degrade_state_t;
+
+typedef enum svp_shootdown_flags {
+ SVP_SD_RUNNING = 0x01,
+ SVP_SD_QUIESCE = 0x02,
+ SVP_SD_DORM = 0x04
+} svp_shootdown_flags_t;
+
+/*
+ * There is a single svp_sdlog_t per svp_remote_t. It maintains its own lock and
+ * condition variables. See the big theory statement for more information on how
+ * it's used.
+ */
+typedef struct svp_sdlog {
+ mutex_t sdl_lock;
+ cond_t sdl_cond;
+ uint_t sdl_ref;
+ svp_timer_t sdl_timer;
+ svp_shootdown_flags_t sdl_flags;
+ svp_query_t sdl_query;
+ void *sdl_logack;
+ void *sdl_logrm;
+ void *sdl_remote;
+} svp_sdlog_t;
+
+struct svp_remote {
+ char *sr_hostname; /* RO */
+ uint16_t sr_rport; /* RO */
+ struct in6_addr sr_uip; /* RO */
+ avl_node_t sr_gnode; /* svp_remote_lock */
+ svp_remote_t *sr_nexthost; /* svp_host_lock */
+ mutex_t sr_lock;
+ cond_t sr_cond;
+ svp_remote_state_t sr_state;
+ svp_degrade_state_t sr_degrade;
+ struct addrinfo *sr_addrinfo;
+ avl_tree_t sr_tree;
+ uint_t sr_count; /* active count */
+ uint_t sr_gen;
+ uint_t sr_tconns; /* total conns + dconns */
+ uint_t sr_ndconns; /* number of degraded conns */
+ list_t sr_conns; /* all conns */
+ svp_sdlog_t sr_shoot;
+};
+
+/*
+ * We have a bunch of different things that we get back from the API at the
+ * plug-in layer. These include:
+ *
+ * o OOB Shootdowns
+ * o VL3->VL2 Lookups
+ * o VL2->UL3 Lookups
+ * o VL2 Log invalidations
+ * o VL3 Log injections
+ */
+typedef void (*svp_vl2_lookup_f)(svp_t *, svp_status_t, const struct in6_addr *,
+ const uint16_t, void *);
+typedef void (*svp_vl3_lookup_f)(svp_t *, svp_status_t, const uint8_t *,
+ const struct in6_addr *, const uint16_t, void *);
+typedef void (*svp_vl2_invalidation_f)(svp_t *, const uint8_t *);
+typedef void (*svp_vl3_inject_f)(svp_t *, const uint16_t,
+ const struct in6_addr *, const uint8_t *, const uint8_t *);
+typedef void (*svp_shootdown_f)(svp_t *, const uint8_t *,
+ const struct in6_addr *, const uint16_t uport);
+
+typedef struct svp_cb {
+ svp_vl2_lookup_f scb_vl2_lookup;
+ svp_vl3_lookup_f scb_vl3_lookup;
+ svp_vl2_invalidation_f scb_vl2_invalidate;
+ svp_vl3_inject_f scb_vl3_inject;
+ svp_shootdown_f scb_shootdown;
+} svp_cb_t;
+
+/*
+ * Core implementation structure.
+ */
+struct svp {
+ overlay_plugin_dest_t svp_dest; /* RO */
+ varpd_provider_handle_t *svp_hdl; /* RO */
+ svp_cb_t svp_cb; /* RO */
+ uint64_t svp_vid; /* RO */
+ avl_node_t svp_rlink; /* Owned by svp_remote */
+ svp_remote_t *svp_remote; /* RO iff started */
+ mutex_t svp_lock;
+ char *svp_host; /* svp_lock */
+ uint16_t svp_port; /* svp_lock */
+ uint16_t svp_uport; /* svp_lock */
+ boolean_t svp_huip; /* svp_lock */
+ struct in6_addr svp_uip; /* svp_lock */
+};
+
+extern bunyan_logger_t *svp_bunyan;
+
+extern int svp_remote_find(char *, uint16_t, struct in6_addr *,
+ svp_remote_t **);
+extern int svp_remote_attach(svp_remote_t *, svp_t *);
+extern void svp_remote_detach(svp_t *);
+extern void svp_remote_release(svp_remote_t *);
+extern void svp_remote_vl3_lookup(svp_t *, svp_query_t *,
+ const struct sockaddr *, void *);
+extern void svp_remote_vl2_lookup(svp_t *, svp_query_t *, const uint8_t *,
+ void *);
+
+/*
+ * Init functions
+ */
+extern int svp_remote_init(void);
+extern void svp_remote_fini(void);
+extern int svp_event_init(void);
+extern int svp_event_timer_init(svp_event_t *);
+extern void svp_event_fini(void);
+extern int svp_host_init(void);
+extern int svp_timer_init(void);
+
+/*
+ * Timers
+ */
+extern int svp_tickrate;
+extern void svp_timer_add(svp_timer_t *);
+extern void svp_timer_remove(svp_timer_t *);
+
+/*
+ * Event loop management
+ */
+extern int svp_event_associate(svp_event_t *, int);
+extern int svp_event_dissociate(svp_event_t *, int);
+extern int svp_event_inject(svp_event_t *);
+
+/*
+ * Connection manager
+ */
+extern int svp_conn_create(svp_remote_t *, const struct in6_addr *);
+extern void svp_conn_destroy(svp_conn_t *);
+extern void svp_conn_fallout(svp_conn_t *);
+extern void svp_conn_queue(svp_conn_t *, svp_query_t *);
+
+/*
+ * FMA related
+ */
+extern void svp_remote_degrade(svp_remote_t *, svp_degrade_state_t);
+extern void svp_remote_restore(svp_remote_t *, svp_degrade_state_t);
+
+/*
+ * Misc.
+ */
+extern int svp_comparator(const void *, const void *);
+extern void svp_remote_reassign(svp_remote_t *, svp_conn_t *);
+extern void svp_remote_resolved(svp_remote_t *, struct addrinfo *);
+extern void svp_host_queue(svp_remote_t *);
+extern void svp_query_release(svp_query_t *);
+extern void svp_query_crc32(svp_req_t *, void *, size_t);
+
+/*
+ * Shootdown related
+ */
+extern void svp_remote_shootdown_vl3(svp_remote_t *, svp_log_vl3_t *,
+ svp_sdlog_t *);
+extern void svp_remote_shootdown_vl2(svp_remote_t *, svp_log_vl2_t *);
+extern void svp_remote_log_request(svp_remote_t *, svp_query_t *, void *,
+ size_t);
+extern void svp_remote_lrm_request(svp_remote_t *, svp_query_t *, void *,
+ size_t);
+extern void svp_shootdown_logr_cb(svp_remote_t *, svp_status_t, void *, size_t);
+extern void svp_shootdown_lrm_cb(svp_remote_t *, svp_status_t);
+extern void svp_shootdown_vl3_cb(svp_status_t, svp_log_vl3_t *, svp_sdlog_t *);
+extern int svp_shootdown_init(svp_remote_t *);
+extern void svp_shootdown_fini(svp_remote_t *);
+extern void svp_shootdown_start(svp_remote_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBVARPD_SVP_H */
diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_conn.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_conn.c
new file mode 100644
index 0000000000..5d19f8a388
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_conn.c
@@ -0,0 +1,1031 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * Logic to manage an individual connection to a remote host.
+ *
+ * For more information, see the big theory statement in
+ * lib/varpd/svp/common/libvarpd_svp.c.
+ */
+
+#include <assert.h>
+#include <umem.h>
+#include <errno.h>
+#include <strings.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <sys/uio.h>
+#include <sys/debug.h>
+
+#include <libvarpd_svp.h>
+
+int svp_conn_query_timeout = 30;
+static int svp_conn_backoff_tbl[] = { 1, 2, 4, 8, 16, 32 };
+static int svp_conn_nbackoff = sizeof (svp_conn_backoff_tbl) / sizeof (int);
+
+typedef enum svp_conn_act {
+ SVP_RA_NONE = 0x00,
+ SVP_RA_DEGRADE = 0x01,
+ SVP_RA_RESTORE = 0x02,
+ SVP_RA_ERROR = 0x03,
+ SVP_RA_CLEANUP = 0x04
+} svp_conn_act_t;
+
+static void
+svp_conn_inject(svp_conn_t *scp)
+{
+ int ret;
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ if (scp->sc_flags & SVP_CF_USER)
+ return;
+ scp->sc_flags |= SVP_CF_USER;
+ if ((ret = svp_event_inject(&scp->sc_event)) != 0)
+ libvarpd_panic("failed to inject event: %d\n", ret);
+}
+
+static void
+svp_conn_degrade(svp_conn_t *scp)
+{
+ svp_remote_t *srp = scp->sc_remote;
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ if (scp->sc_flags & SVP_CF_DEGRADED)
+ return;
+
+ scp->sc_flags |= SVP_CF_DEGRADED;
+ srp->sr_ndconns++;
+ if (srp->sr_ndconns == srp->sr_tconns)
+ svp_remote_degrade(srp, SVP_RD_REMOTE_FAIL);
+}
+
+static void
+svp_conn_restore(svp_conn_t *scp)
+{
+ svp_remote_t *srp = scp->sc_remote;
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ if (!(scp->sc_flags & SVP_CF_DEGRADED))
+ return;
+
+ scp->sc_flags &= ~SVP_CF_DEGRADED;
+ if (srp->sr_ndconns == srp->sr_tconns)
+ svp_remote_restore(srp, SVP_RD_REMOTE_FAIL);
+ srp->sr_ndconns--;
+}
+
+static void
+svp_conn_add(svp_conn_t *scp)
+{
+ svp_remote_t *srp = scp->sc_remote;
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ if (scp->sc_flags & SVP_CF_ADDED)
+ return;
+
+ list_insert_tail(&srp->sr_conns, scp);
+ scp->sc_flags |= SVP_CF_ADDED;
+ srp->sr_tconns++;
+}
+
+static void
+svp_conn_remove(svp_conn_t *scp)
+{
+ svp_remote_t *srp = scp->sc_remote;
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ if (!(scp->sc_flags & SVP_CF_ADDED))
+ return;
+
+ scp->sc_flags &= ~SVP_CF_ADDED;
+ if (scp->sc_flags & SVP_CF_DEGRADED)
+ srp->sr_ndconns--;
+ srp->sr_tconns--;
+ if (srp->sr_tconns == srp->sr_ndconns)
+ svp_remote_degrade(srp, SVP_RD_REMOTE_FAIL);
+}
+
+static svp_query_t *
+svp_conn_query_find(svp_conn_t *scp, uint32_t id)
+{
+ svp_query_t *sqp;
+
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ for (sqp = list_head(&scp->sc_queries); sqp != NULL;
+ sqp = list_next(&scp->sc_queries, sqp)) {
+ if (sqp->sq_header.svp_id == id)
+ break;
+ }
+
+ return (sqp);
+}
+
+static svp_conn_act_t
+svp_conn_backoff(svp_conn_t *scp)
+{
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ if (close(scp->sc_socket) != 0)
+ libvarpd_panic("failed to close socket %d: %d\n",
+ scp->sc_socket, errno);
+ scp->sc_socket = -1;
+
+ scp->sc_cstate = SVP_CS_BACKOFF;
+ scp->sc_nbackoff++;
+ if (scp->sc_nbackoff >= svp_conn_nbackoff) {
+ scp->sc_btimer.st_value =
+ svp_conn_backoff_tbl[svp_conn_nbackoff - 1];
+ } else {
+ scp->sc_btimer.st_value =
+ svp_conn_backoff_tbl[scp->sc_nbackoff - 1];
+ }
+ svp_timer_add(&scp->sc_btimer);
+
+ if (scp->sc_nbackoff > svp_conn_nbackoff)
+ return (SVP_RA_DEGRADE);
+ return (SVP_RA_NONE);
+}
+
+static svp_conn_act_t
+svp_conn_connect(svp_conn_t *scp)
+{
+ int ret;
+ struct sockaddr_in6 in6;
+
+ assert(MUTEX_HELD(&scp->sc_lock));
+ assert(scp->sc_cstate == SVP_CS_BACKOFF ||
+ scp->sc_cstate == SVP_CS_INITIAL);
+ assert(scp->sc_socket == -1);
+ if (scp->sc_cstate == SVP_CS_INITIAL)
+ scp->sc_nbackoff = 0;
+
+ scp->sc_socket = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
+ if (scp->sc_socket == -1) {
+ scp->sc_error = SVP_CE_SOCKET;
+ scp->sc_errno = errno;
+ scp->sc_cstate = SVP_CS_ERROR;
+ return (SVP_RA_DEGRADE);
+ }
+
+ bzero(&in6, sizeof (struct sockaddr_in6));
+ in6.sin6_family = AF_INET6;
+ in6.sin6_port = htons(scp->sc_remote->sr_rport);
+ bcopy(&scp->sc_addr, &in6.sin6_addr, sizeof (struct in6_addr));
+ ret = connect(scp->sc_socket, (struct sockaddr *)&in6,
+ sizeof (struct sockaddr_in6));
+ if (ret != 0) {
+ boolean_t async = B_FALSE;
+
+ switch (errno) {
+ case EACCES:
+ case EADDRINUSE:
+ case EAFNOSUPPORT:
+ case EALREADY:
+ case EBADF:
+ case EISCONN:
+ case ELOOP:
+ case ENOENT:
+ case ENOSR:
+ case EWOULDBLOCK:
+ libvarpd_panic("unanticipated connect errno %d", errno);
+ break;
+ case EINPROGRESS:
+ case EINTR:
+ async = B_TRUE;
+ default:
+ break;
+ }
+
+ /*
+ * So, we will be connecting to this in the future, advance our
+ * state and make sure that we poll for the next round.
+ */
+ if (async == B_TRUE) {
+ scp->sc_cstate = SVP_CS_CONNECTING;
+ scp->sc_event.se_events = POLLOUT | POLLHUP;
+ ret = svp_event_associate(&scp->sc_event,
+ scp->sc_socket);
+ if (ret == 0)
+ return (SVP_RA_NONE);
+ scp->sc_error = SVP_CE_ASSOCIATE;
+ scp->sc_errno = ret;
+ scp->sc_cstate = SVP_CS_ERROR;
+ return (SVP_RA_DEGRADE);
+ } else {
+ /*
+ * This call failed, which means that we obtained one of
+ * the following:
+ *
+ * EADDRNOTAVAIL
+ * ECONNREFUSED
+ * EIO
+ * ENETUNREACH
+ * EHOSTUNREACH
+ * ENXIO
+ * ETIMEDOUT
+ *
+ * Therefore we need to set ourselves into backoff and
+ * wait for that to clear up.
+ */
+ return (svp_conn_backoff(scp));
+ }
+ }
+
+ /*
+ * We've connected. Successfully move ourselves to the bound
+ * state and start polling.
+ */
+ scp->sc_cstate = SVP_CS_ACTIVE;
+ scp->sc_event.se_events = POLLIN | POLLRDNORM | POLLHUP;
+ ret = svp_event_associate(&scp->sc_event, scp->sc_socket);
+ if (ret == 0)
+ return (SVP_RA_RESTORE);
+ scp->sc_error = SVP_CE_ASSOCIATE;
+ scp->sc_cstate = SVP_CS_ERROR;
+
+ return (SVP_RA_DEGRADE);
+}
+
+/*
+ * This should be the first call we get after a connect. If we have successfully
+ * connected, we should see a writeable event. We may also see an error or a
+ * hang up. In either of these cases, we transition to error mode. If there is
+ * also a readable event, we ignore it at the moment and just let a
+ * reassociation pick it up so we can simplify the set of state transitions that
+ * we have.
+ */
+static svp_conn_act_t
+svp_conn_poll_connect(port_event_t *pe, svp_conn_t *scp)
+{
+ int ret, err;
+ socklen_t sl = sizeof (err);
+ if (!(pe->portev_events & POLLOUT)) {
+ scp->sc_errno = 0;
+ scp->sc_error = SVP_CE_NOPOLLOUT;
+ scp->sc_cstate = SVP_CS_ERROR;
+ return (SVP_RA_DEGRADE);
+ }
+
+ ret = getsockopt(scp->sc_socket, SOL_SOCKET, SO_ERROR, &err, &sl);
+ if (ret != 0)
+ libvarpd_panic("unanticipated getsockopt error");
+ if (err != 0) {
+ return (svp_conn_backoff(scp));
+ }
+
+ scp->sc_cstate = SVP_CS_ACTIVE;
+ scp->sc_event.se_events = POLLIN | POLLRDNORM | POLLHUP;
+ ret = svp_event_associate(&scp->sc_event, scp->sc_socket);
+ if (ret == 0)
+ return (SVP_RA_RESTORE);
+ scp->sc_error = SVP_CE_ASSOCIATE;
+ scp->sc_errno = ret;
+ scp->sc_cstate = SVP_CS_ERROR;
+ return (SVP_RA_DEGRADE);
+}
+
+static svp_conn_act_t
+svp_conn_pollout(svp_conn_t *scp)
+{
+ svp_query_t *sqp;
+ svp_req_t *req;
+ size_t off;
+ struct iovec iov[2];
+ int nvecs = 0;
+ ssize_t ret;
+
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ /*
+ * We need to find a query and start writing it out.
+ */
+ if (scp->sc_output.sco_query == NULL) {
+ for (sqp = list_head(&scp->sc_queries); sqp != NULL;
+ sqp = list_next(&scp->sc_queries, sqp)) {
+ if (sqp->sq_state != SVP_QUERY_INIT)
+ continue;
+ break;
+ }
+
+ if (sqp == NULL) {
+ scp->sc_event.se_events &= ~POLLOUT;
+ return (SVP_RA_NONE);
+ }
+
+ scp->sc_output.sco_query = sqp;
+ scp->sc_output.sco_offset = 0;
+ sqp->sq_state = SVP_QUERY_WRITING;
+ svp_query_crc32(&sqp->sq_header, sqp->sq_rdata, sqp->sq_rsize);
+ }
+
+ sqp = scp->sc_output.sco_query;
+ req = &sqp->sq_header;
+ off = scp->sc_output.sco_offset;
+ if (off < sizeof (svp_req_t)) {
+ iov[nvecs].iov_base = (void *)((uintptr_t)req + off);
+ iov[nvecs].iov_len = sizeof (svp_req_t) - off;
+ nvecs++;
+ off = 0;
+ } else {
+ off -= sizeof (svp_req_t);
+ }
+
+ iov[nvecs].iov_base = (void *)((uintptr_t)sqp->sq_rdata + off);
+ iov[nvecs].iov_len = sqp->sq_rsize - off;
+ nvecs++;
+
+ do {
+ ret = writev(scp->sc_socket, iov, nvecs);
+ } while (ret == -1 && errno == EAGAIN);
+ if (ret == -1) {
+ switch (errno) {
+ case EAGAIN:
+ scp->sc_event.se_events |= POLLOUT;
+ return (SVP_RA_NONE);
+ case EIO:
+ case ENXIO:
+ case ECONNRESET:
+ return (SVP_RA_ERROR);
+ default:
+ libvarpd_panic("unexpected errno: %d", errno);
+ }
+ }
+
+ sqp->sq_acttime = gethrtime();
+ scp->sc_output.sco_offset += ret;
+ if (ret >= sizeof (svp_req_t) + sqp->sq_rsize) {
+ sqp->sq_state = SVP_QUERY_READING;
+ scp->sc_output.sco_query = NULL;
+ scp->sc_output.sco_offset = 0;
+ scp->sc_event.se_events |= POLLOUT;
+ }
+ return (SVP_RA_NONE);
+}
+
+static boolean_t
+svp_conn_pollin_validate(svp_conn_t *scp)
+{
+ svp_query_t *sqp;
+ uint32_t nsize;
+ uint16_t nvers, nop;
+ svp_req_t *resp = &scp->sc_input.sci_req;
+
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ nvers = ntohs(resp->svp_ver);
+ nop = ntohs(resp->svp_op);
+ nsize = ntohl(resp->svp_size);
+
+ if (nvers != SVP_CURRENT_VERSION) {
+ (void) bunyan_warn(svp_bunyan, "unsupported version",
+ BUNYAN_T_IP, "remote_ip", &scp->sc_addr,
+ BUNYAN_T_INT32, "remote_port", scp->sc_remote->sr_rport,
+ BUNYAN_T_INT32, "version", nvers,
+ BUNYAN_T_INT32, "operation", nop,
+ BUNYAN_T_INT32, "response_id", resp->svp_id,
+ BUNYAN_T_END);
+ return (B_FALSE);
+ }
+
+ if (nop != SVP_R_VL2_ACK && nop != SVP_R_VL3_ACK &&
+ nop != SVP_R_LOG_ACK && nop != SVP_R_LOG_RM_ACK) {
+ (void) bunyan_warn(svp_bunyan, "unsupported operation",
+ BUNYAN_T_IP, "remote_ip", &scp->sc_addr,
+ BUNYAN_T_INT32, "remote_port", scp->sc_remote->sr_rport,
+ BUNYAN_T_INT32, "version", nvers,
+ BUNYAN_T_INT32, "operation", nop,
+ BUNYAN_T_INT32, "response_id", resp->svp_id,
+ BUNYAN_T_END);
+ return (B_FALSE);
+ }
+
+ sqp = svp_conn_query_find(scp, resp->svp_id);
+ if (sqp == NULL) {
+ (void) bunyan_warn(svp_bunyan, "unknown response id",
+ BUNYAN_T_IP, "remote_ip", &scp->sc_addr,
+ BUNYAN_T_INT32, "remote_port", scp->sc_remote->sr_rport,
+ BUNYAN_T_INT32, "version", nvers,
+ BUNYAN_T_INT32, "operation", nop,
+ BUNYAN_T_INT32, "response_id", resp->svp_id,
+ BUNYAN_T_END);
+ return (B_FALSE);
+ }
+
+ if (sqp->sq_state != SVP_QUERY_READING) {
+ (void) bunyan_warn(svp_bunyan,
+ "got response for unexpecting query",
+ BUNYAN_T_IP, "remote_ip", &scp->sc_addr,
+ BUNYAN_T_INT32, "remote_port", scp->sc_remote->sr_rport,
+ BUNYAN_T_INT32, "version", nvers,
+ BUNYAN_T_INT32, "operation", nop,
+ BUNYAN_T_INT32, "response_id", resp->svp_id,
+ BUNYAN_T_INT32, "query_state", sqp->sq_state,
+ BUNYAN_T_END);
+ return (B_FALSE);
+ }
+
+ if ((nop == SVP_R_VL2_ACK && nsize != sizeof (svp_vl2_ack_t)) ||
+ (nop == SVP_R_VL3_ACK && nsize != sizeof (svp_vl3_ack_t)) ||
+ (nop == SVP_R_LOG_RM_ACK && nsize != sizeof (svp_lrm_ack_t))) {
+ (void) bunyan_warn(svp_bunyan, "response size too large",
+ BUNYAN_T_IP, "remote_ip", &scp->sc_addr,
+ BUNYAN_T_INT32, "remote_port", scp->sc_remote->sr_rport,
+ BUNYAN_T_INT32, "version", nvers,
+ BUNYAN_T_INT32, "operation", nop,
+ BUNYAN_T_INT32, "response_id", resp->svp_id,
+ BUNYAN_T_INT32, "response_size", nsize,
+ BUNYAN_T_INT32, "expected_size", nop == SVP_R_VL2_ACK ?
+ sizeof (svp_vl2_ack_t) : sizeof (svp_vl3_ack_t),
+ BUNYAN_T_INT32, "query_state", sqp->sq_state,
+ BUNYAN_T_END);
+ return (B_FALSE);
+ }
+
+ /*
+ * The valid size is anything <= to what the user requested, but at
+ * least svp_log_ack_t bytes large.
+ */
+ if (nop == SVP_R_LOG_ACK) {
+ const char *msg = NULL;
+ if (nsize < sizeof (svp_log_ack_t))
+ msg = "response size too small";
+ else if (nsize > ((svp_log_req_t *)sqp->sq_rdata)->svlr_count)
+ msg = "response size too large";
+ if (msg != NULL) {
+ (void) bunyan_warn(svp_bunyan, msg,
+ BUNYAN_T_IP, "remote_ip", &scp->sc_addr,
+ BUNYAN_T_INT32, "remote_port",
+ scp->sc_remote->sr_rport,
+ BUNYAN_T_INT32, "version", nvers,
+ BUNYAN_T_INT32, "operation", nop,
+ BUNYAN_T_INT32, "response_id", resp->svp_id,
+ BUNYAN_T_INT32, "response_size", nsize,
+ BUNYAN_T_INT32, "expected_size",
+ ((svp_log_req_t *)sqp->sq_rdata)->svlr_count,
+ BUNYAN_T_INT32, "query_state", sqp->sq_state,
+ BUNYAN_T_END);
+ return (B_FALSE);
+ }
+ }
+
+ sqp->sq_size = nsize;
+ scp->sc_input.sci_query = sqp;
+ if (nop == SVP_R_VL2_ACK || nop == SVP_R_VL3_ACK ||
+ nop == SVP_R_LOG_RM_ACK) {
+ sqp->sq_wdata = &sqp->sq_wdun;
+ sqp->sq_wsize = sizeof (svp_query_data_t);
+ } else {
+ VERIFY(nop == SVP_R_LOG_ACK);
+ assert(sqp->sq_wdata != NULL);
+ assert(sqp->sq_wsize != 0);
+ }
+
+ return (B_TRUE);
+}
+
+static svp_conn_act_t
+svp_conn_pollin(svp_conn_t *scp)
+{
+ size_t off, total;
+ ssize_t ret;
+ svp_query_t *sqp;
+ uint32_t crc;
+ uint16_t nop;
+
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ /*
+ * No query implies that we're reading in the header and that the offset
+ * is associted with it.
+ */
+ off = scp->sc_input.sci_offset;
+ sqp = scp->sc_input.sci_query;
+ if (scp->sc_input.sci_query == NULL) {
+ svp_req_t *resp = &scp->sc_input.sci_req;
+
+ assert(off < sizeof (svp_req_t));
+
+ do {
+ ret = read(scp->sc_socket,
+ (void *)((uintptr_t)resp + off),
+ sizeof (svp_req_t) - off);
+ } while (ret == -1 && errno == EINTR);
+ if (ret == -1) {
+ switch (errno) {
+ case EAGAIN:
+ scp->sc_event.se_events |= POLLIN | POLLRDNORM;
+ return (SVP_RA_NONE);
+ case EIO:
+ case ECONNRESET:
+ return (SVP_RA_ERROR);
+ break;
+ default:
+ libvarpd_panic("unexpeted read errno: %d",
+ errno);
+ }
+ } else if (ret == 0) {
+ /* Try to reconnect to the remote host */
+ return (SVP_RA_ERROR);
+ }
+
+ /* Didn't get all the data we need */
+ if (off + ret < sizeof (svp_req_t)) {
+ scp->sc_input.sci_offset += ret;
+ scp->sc_event.se_events |= POLLIN | POLLRDNORM;
+ return (SVP_RA_NONE);
+ }
+
+ if (svp_conn_pollin_validate(scp) != B_TRUE)
+ return (SVP_RA_ERROR);
+ }
+
+ sqp = scp->sc_input.sci_query;
+ assert(sqp != NULL);
+ sqp->sq_acttime = gethrtime();
+ total = ntohl(scp->sc_input.sci_req.svp_size);
+ do {
+ ret = read(scp->sc_socket,
+ (void *)((uintptr_t)sqp->sq_wdata + off),
+ total - off);
+ } while (ret == -1 && errno == EINTR);
+
+ if (ret == -1) {
+ switch (errno) {
+ case EAGAIN:
+ scp->sc_event.se_events |= POLLIN | POLLRDNORM;
+ return (SVP_RA_NONE);
+ case EIO:
+ case ECONNRESET:
+ return (SVP_RA_ERROR);
+ break;
+ default:
+ libvarpd_panic("unexpeted read errno: %d", errno);
+ }
+ } else if (ret == 0) {
+ /* Try to reconnect to the remote host */
+ return (SVP_RA_ERROR);
+ }
+
+ if (ret + off < total) {
+ scp->sc_input.sci_offset += ret;
+ return (SVP_RA_NONE);
+ }
+
+ nop = ntohs(scp->sc_input.sci_req.svp_op);
+ crc = scp->sc_input.sci_req.svp_crc32;
+ svp_query_crc32(&scp->sc_input.sci_req, sqp->sq_wdata, total);
+ if (crc != scp->sc_input.sci_req.svp_crc32) {
+ (void) bunyan_info(svp_bunyan, "crc32 mismatch",
+ BUNYAN_T_IP, "remote ip", &scp->sc_addr,
+ BUNYAN_T_INT32, "remote port", scp->sc_remote->sr_rport,
+ BUNYAN_T_INT32, "version",
+ ntohs(scp->sc_input.sci_req.svp_ver),
+ BUNYAN_T_INT32, "operation", nop,
+ BUNYAN_T_INT32, "response id",
+ ntohl(scp->sc_input.sci_req.svp_id),
+ BUNYAN_T_INT32, "query state", sqp->sq_state,
+ BUNYAN_T_UINT32, "msg_crc", ntohl(crc),
+ BUNYAN_T_UINT32, "calc_crc",
+ ntohl(scp->sc_input.sci_req.svp_crc32),
+ BUNYAN_T_END);
+ return (SVP_RA_ERROR);
+ }
+ scp->sc_input.sci_query = NULL;
+ scp->sc_input.sci_offset = 0;
+
+ if (nop == SVP_R_VL2_ACK) {
+ svp_vl2_ack_t *sl2a = sqp->sq_wdata;
+ sqp->sq_status = ntohl(sl2a->sl2a_status);
+ } else if (nop == SVP_R_VL3_ACK) {
+ svp_vl3_ack_t *sl3a = sqp->sq_wdata;
+ sqp->sq_status = ntohl(sl3a->sl3a_status);
+ } else if (nop == SVP_R_LOG_ACK) {
+ svp_log_ack_t *svla = sqp->sq_wdata;
+ sqp->sq_status = ntohl(svla->svla_status);
+ } else if (nop == SVP_R_LOG_RM_ACK) {
+ svp_lrm_ack_t *svra = sqp->sq_wdata;
+ sqp->sq_status = ntohl(svra->svra_status);
+ } else {
+ libvarpd_panic("unhandled nop: %d", nop);
+ }
+
+ list_remove(&scp->sc_queries, sqp);
+ mutex_exit(&scp->sc_lock);
+
+ /*
+ * We have to release all of our resources associated with this entry
+ * before we call the callback. After we call it, the memory will be
+ * lost to time.
+ */
+ svp_query_release(sqp);
+ sqp->sq_func(sqp, sqp->sq_arg);
+ mutex_enter(&scp->sc_lock);
+ scp->sc_event.se_events |= POLLIN | POLLRDNORM;
+
+ return (SVP_RA_NONE);
+}
+
+static svp_conn_act_t
+svp_conn_reset(svp_conn_t *scp)
+{
+ svp_remote_t *srp = scp->sc_remote;
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+ assert(MUTEX_HELD(&scp->sc_lock));
+
+ assert(svp_event_dissociate(&scp->sc_event, scp->sc_socket) ==
+ ENOENT);
+ if (close(scp->sc_socket) != 0)
+ libvarpd_panic("failed to close socket %d: %d", scp->sc_socket,
+ errno);
+ scp->sc_flags &= ~SVP_CF_TEARDOWN;
+ scp->sc_socket = -1;
+ scp->sc_cstate = SVP_CS_INITIAL;
+ scp->sc_input.sci_query = NULL;
+ scp->sc_output.sco_query = NULL;
+
+ svp_remote_reassign(srp, scp);
+
+ return (svp_conn_connect(scp));
+}
+
+/*
+ * This is our general state transition function. We're called here when we want
+ * to advance part of our state machine as well as to re-arm ourselves. We can
+ * also end up here from the standard event loop as a result of having a user
+ * event posted.
+ */
+static void
+svp_conn_handler(port_event_t *pe, void *arg)
+{
+ svp_conn_t *scp = arg;
+ svp_remote_t *srp = scp->sc_remote;
+ svp_conn_act_t ret = SVP_RA_NONE;
+ svp_conn_state_t oldstate;
+
+ mutex_enter(&scp->sc_lock);
+
+ /*
+ * Check if one of our event interrupts is set. An event interrupt, such
+ * as having to be reaped or be torndown is notified by a
+ * PORT_SOURCE_USER event that tries to take care of this. However,
+ * because of the fact that the event loop can be ongoing despite this,
+ * we may get here before the PORT_SOURCE_USER has casued us to get
+ * here. In such a case, if the PORT_SOURCE_USER event is tagged, then
+ * we're going to opt to do nothing here and wait for it to come and
+ * tear us down. That will also indicate to us that we have nothing to
+ * worry about as far as general timing and the like goes.
+ */
+ if ((scp->sc_flags & SVP_CF_UFLAG) != 0 &&
+ (scp->sc_flags & SVP_CF_USER) != 0 &&
+ pe != NULL &&
+ pe->portev_source != PORT_SOURCE_USER) {
+ mutex_exit(&scp->sc_lock);
+ return;
+ }
+
+ if (pe != NULL && pe->portev_source == PORT_SOURCE_USER) {
+ scp->sc_flags &= ~SVP_CF_USER;
+ if ((scp->sc_flags & SVP_CF_UFLAG) == 0) {
+ mutex_exit(&scp->sc_lock);
+ return;
+ }
+ }
+
+ /* Check if this needs to be freed */
+ if (scp->sc_flags & SVP_CF_REAP) {
+ mutex_exit(&scp->sc_lock);
+ svp_conn_destroy(scp);
+ return;
+ }
+
+ /* Check if this needs to be reset */
+ if (scp->sc_flags & SVP_CF_TEARDOWN) {
+ /* Make sure any other users of this are disassociated */
+ ret = SVP_RA_ERROR;
+ goto out;
+ }
+
+ switch (scp->sc_cstate) {
+ case SVP_CS_INITIAL:
+ case SVP_CS_BACKOFF:
+ assert(pe == NULL);
+ ret = svp_conn_connect(scp);
+ break;
+ case SVP_CS_CONNECTING:
+ assert(pe != NULL);
+ ret = svp_conn_poll_connect(pe, scp);
+ break;
+ case SVP_CS_ACTIVE:
+ case SVP_CS_WINDDOWN:
+ assert(pe != NULL);
+ oldstate = scp->sc_cstate;
+ if (pe->portev_events & POLLOUT)
+ ret = svp_conn_pollout(scp);
+ if (ret == SVP_RA_NONE && (pe->portev_events & POLLIN))
+ ret = svp_conn_pollin(scp);
+
+ if (oldstate == SVP_CS_WINDDOWN &&
+ (list_is_empty(&scp->sc_queries) || ret != SVP_RA_NONE)) {
+ ret = SVP_RA_CLEANUP;
+ }
+
+ if (ret == SVP_RA_NONE) {
+ int err;
+ if ((err = svp_event_associate(&scp->sc_event,
+ scp->sc_socket)) != 0) {
+ scp->sc_error = SVP_CE_ASSOCIATE;
+ scp->sc_errno = err;
+ scp->sc_cstate = SVP_CS_ERROR;
+ ret = SVP_RA_DEGRADE;
+ }
+ }
+ break;
+ default:
+ libvarpd_panic("svp_conn_handler encountered unexpected "
+ "state: %d", scp->sc_cstate);
+ }
+out:
+ mutex_exit(&scp->sc_lock);
+
+ if (ret == SVP_RA_NONE)
+ return;
+
+ mutex_enter(&srp->sr_lock);
+ mutex_enter(&scp->sc_lock);
+ if (ret == SVP_RA_ERROR)
+ ret = svp_conn_reset(scp);
+
+ if (ret == SVP_RA_DEGRADE)
+ svp_conn_degrade(scp);
+ if (ret == SVP_RA_RESTORE)
+ svp_conn_restore(scp);
+
+ if (ret == SVP_RA_CLEANUP) {
+ svp_conn_remove(scp);
+ scp->sc_flags |= SVP_CF_REAP;
+ svp_conn_inject(scp);
+ }
+ mutex_exit(&scp->sc_lock);
+ mutex_exit(&srp->sr_lock);
+}
+
+static void
+svp_conn_backtimer(void *arg)
+{
+ svp_conn_t *scp = arg;
+
+ svp_conn_handler(NULL, scp);
+}
+
+/*
+ * This fires every svp_conn_query_timeout seconds. Its purpos is to determine
+ * if we haven't heard back on a request with in svp_conn_query_timeout seconds.
+ * If any of the svp_conn_query_t's that have been started (indicated by
+ * svp_query_t`sq_acttime != -1), and more than svp_conn_query_timeout seconds
+ * have passed, we basically tear this connection down and reassign outstanding
+ * queries.
+ */
+static void
+svp_conn_querytimer(void *arg)
+{
+ int ret;
+ svp_query_t *sqp;
+ svp_conn_t *scp = arg;
+ hrtime_t now = gethrtime();
+
+ mutex_enter(&scp->sc_lock);
+
+ /*
+ * If we're not in the active state, then we don't care about this as
+ * we're already either going to die or we have no connections to worry
+ * about.
+ */
+ if (scp->sc_cstate != SVP_CS_ACTIVE) {
+ mutex_exit(&scp->sc_lock);
+ return;
+ }
+
+ for (sqp = list_head(&scp->sc_queries); sqp != NULL;
+ sqp = list_next(&scp->sc_queries, sqp)) {
+ if (sqp->sq_acttime == -1)
+ continue;
+ if ((now - sqp->sq_acttime) / NANOSEC > svp_conn_query_timeout)
+ break;
+ }
+
+ /* Nothing timed out, we're good here */
+ if (sqp == NULL) {
+ mutex_exit(&scp->sc_lock);
+ return;
+ }
+
+ (void) bunyan_warn(svp_bunyan, "query timed out on connection",
+ BUNYAN_T_IP, "remote_ip", &scp->sc_addr,
+ BUNYAN_T_INT32, "remote_port", scp->sc_remote->sr_rport,
+ BUNYAN_T_INT32, "operation", ntohs(sqp->sq_header.svp_op),
+ BUNYAN_T_END);
+
+ /*
+ * Begin the tear down process for this connect. If we lose the
+ * disassociate, then we don't inject an event. See the big theory
+ * statement in libvarpd_svp.c for more information.
+ */
+ scp->sc_flags |= SVP_CF_TEARDOWN;
+
+ ret = svp_event_dissociate(&scp->sc_event, scp->sc_socket);
+ if (ret == 0)
+ svp_conn_inject(scp);
+ else
+ VERIFY(ret == ENOENT);
+
+ mutex_exit(&scp->sc_lock);
+}
+
+/*
+ * This connection has fallen out of DNS, figure out what we need to do with it.
+ */
+void
+svp_conn_fallout(svp_conn_t *scp)
+{
+ svp_remote_t *srp = scp->sc_remote;
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+
+ mutex_enter(&scp->sc_lock);
+ switch (scp->sc_cstate) {
+ case SVP_CS_ERROR:
+ /*
+ * Connection is already inactive, so it's safe to tear down.
+ * Fire it off through the state machine to tear down via the
+ * backoff timer.
+ */
+ svp_conn_remove(scp);
+ scp->sc_flags |= SVP_CF_REAP;
+ svp_conn_inject(scp);
+ break;
+ case SVP_CS_INITIAL:
+ case SVP_CS_BACKOFF:
+ case SVP_CS_CONNECTING:
+ /*
+ * Here, we have something actively going on, so we'll let it be
+ * clean up the next time we hit the event loop by the event
+ * loop itself. As it has no connections, there isn't much to
+ * really do, though we'll take this chance to go ahead and
+ * remove it from the remote.
+ */
+ svp_conn_remove(scp);
+ scp->sc_flags |= SVP_CF_REAP;
+ svp_conn_inject(scp);
+ break;
+ case SVP_CS_ACTIVE:
+ case SVP_CS_WINDDOWN:
+ /*
+ * If there are no outstanding queries, then we should simply
+ * clean this up now,t he same way we would with the others.
+ * Othewrise, as we know the event loop is ongoing, we'll make
+ * sure that these entries get cleaned up once they're done.
+ */
+ scp->sc_cstate = SVP_CS_WINDDOWN;
+ if (list_is_empty(&scp->sc_queries)) {
+ svp_conn_remove(scp);
+ scp->sc_flags |= SVP_CF_REAP;
+ svp_conn_inject(scp);
+ }
+ break;
+ default:
+ libvarpd_panic("svp_conn_fallout encountered"
+ "unkonwn state");
+ }
+ mutex_exit(&scp->sc_lock);
+ mutex_exit(&srp->sr_lock);
+}
+
+int
+svp_conn_create(svp_remote_t *srp, const struct in6_addr *addr)
+{
+ int ret;
+ svp_conn_t *scp;
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+ scp = umem_zalloc(sizeof (svp_conn_t), UMEM_DEFAULT);
+ if (scp == NULL)
+ return (ENOMEM);
+
+ if ((ret = mutex_init(&scp->sc_lock, USYNC_THREAD | LOCK_ERRORCHECK,
+ NULL)) != 0) {
+ umem_free(scp, sizeof (svp_conn_t));
+ return (ret);
+ }
+
+ scp->sc_remote = srp;
+ scp->sc_event.se_func = svp_conn_handler;
+ scp->sc_event.se_arg = scp;
+ scp->sc_btimer.st_func = svp_conn_backtimer;
+ scp->sc_btimer.st_arg = scp;
+ scp->sc_btimer.st_oneshot = B_TRUE;
+ scp->sc_btimer.st_value = 1;
+
+ scp->sc_qtimer.st_func = svp_conn_querytimer;
+ scp->sc_qtimer.st_arg = scp;
+ scp->sc_qtimer.st_oneshot = B_FALSE;
+ scp->sc_qtimer.st_value = svp_conn_query_timeout;
+
+ scp->sc_socket = -1;
+
+ list_create(&scp->sc_queries, sizeof (svp_query_t),
+ offsetof(svp_query_t, sq_lnode));
+ scp->sc_gen = srp->sr_gen;
+ bcopy(addr, &scp->sc_addr, sizeof (struct in6_addr));
+ scp->sc_cstate = SVP_CS_INITIAL;
+ mutex_enter(&scp->sc_lock);
+ svp_conn_add(scp);
+ mutex_exit(&scp->sc_lock);
+
+ /* Now that we're locked and loaded, add our timers */
+ svp_timer_add(&scp->sc_qtimer);
+ svp_timer_add(&scp->sc_btimer);
+
+ return (0);
+}
+
+/*
+ * At the time of calling, the entry has been removed from all lists. In
+ * addition, the entries state should be SVP_CS_ERROR, therefore, we know that
+ * the fd should not be associated with the event loop. We'll double check that
+ * just in case. We should also have already been removed from the remote's
+ * list.
+ */
+void
+svp_conn_destroy(svp_conn_t *scp)
+{
+ int ret;
+
+ mutex_enter(&scp->sc_lock);
+ if (scp->sc_cstate != SVP_CS_ERROR)
+ libvarpd_panic("asked to tear down an active connection");
+ if (scp->sc_flags & SVP_CF_ADDED)
+ libvarpd_panic("asked to remove a connection still in "
+ "the remote list\n");
+ if (!list_is_empty(&scp->sc_queries))
+ libvarpd_panic("asked to remove a connection with non-empty "
+ "query list");
+
+ if ((ret = svp_event_dissociate(&scp->sc_event, scp->sc_socket)) !=
+ ENOENT) {
+ libvarpd_panic("dissociate failed or was actually "
+ "associated: %d", ret);
+ }
+ mutex_exit(&scp->sc_lock);
+
+ /* Verify our timers are killed */
+ svp_timer_remove(&scp->sc_btimer);
+ svp_timer_remove(&scp->sc_qtimer);
+
+ if (scp->sc_socket != -1 && close(scp->sc_socket) != 0)
+ libvarpd_panic("failed to close svp_conn_t`scp_socket fd "
+ "%d: %d", scp->sc_socket, errno);
+
+ list_destroy(&scp->sc_queries);
+ umem_free(scp, sizeof (svp_conn_t));
+}
+
+void
+svp_conn_queue(svp_conn_t *scp, svp_query_t *sqp)
+{
+ assert(MUTEX_HELD(&scp->sc_lock));
+ assert(scp->sc_cstate == SVP_CS_ACTIVE);
+
+ sqp->sq_acttime = -1;
+ list_insert_tail(&scp->sc_queries, sqp);
+ if (!(scp->sc_event.se_events & POLLOUT)) {
+ scp->sc_event.se_events |= POLLOUT;
+ /*
+ * If this becomes frequent, we should instead give up on this
+ * set of connections instead of aborting.
+ */
+ if (svp_event_associate(&scp->sc_event, scp->sc_socket) != 0)
+ libvarpd_panic("svp_event_associate failed somehow");
+ }
+}
diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_crc.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_crc.c
new file mode 100644
index 0000000000..ade47ff998
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_crc.c
@@ -0,0 +1,53 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015, Joyent, Inc.
+ */
+
+/*
+ * Perform standard crc32 functions.
+ *
+ * For more information, see the big theory statement in
+ * lib/varpd/svp/common/libvarpd_svp.c.
+ *
+ * Really, this should just be a libcrc.
+ */
+
+#include <sys/crc32.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <inttypes.h>
+#include <libvarpd_svp.h>
+
+static uint32_t svp_crc32_tab[] = { CRC32_TABLE };
+
+static uint32_t
+svp_crc32(uint32_t old, const uint8_t *buf, size_t len)
+{
+ uint32_t out;
+
+ CRC32(out, buf, len, old, svp_crc32_tab);
+ return (out);
+}
+
+void
+svp_query_crc32(svp_req_t *shp, void *buf, size_t data)
+{
+ uint32_t crc = -1U;
+
+ shp->svp_crc32 = 0;
+ crc = svp_crc32(crc, (uint8_t *)shp, sizeof (svp_req_t));
+ crc = svp_crc32(crc, buf, data);
+ crc = ~crc;
+ shp->svp_crc32 = htonl(crc);
+}
diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_host.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_host.c
new file mode 100644
index 0000000000..e91cc30e9d
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_host.c
@@ -0,0 +1,171 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * DNS Host-name related functions.
+ *
+ * For more information, see the big theory statement in
+ * lib/varpd/svp/common/libvarpd_svp.c.
+ */
+
+#include <sys/socket.h>
+#include <netdb.h>
+#include <thread.h>
+#include <synch.h>
+#include <assert.h>
+#include <errno.h>
+
+#include <libvarpd_svp.h>
+
+int svp_host_nthreads = 8;
+
+static mutex_t svp_host_lock = ERRORCHECKMUTEX;
+static cond_t svp_host_cv = DEFAULTCV;
+static svp_remote_t *svp_host_head;
+
+/* ARGSUSED */
+static void *
+svp_host_loop(void *unused)
+{
+ for (;;) {
+ int err;
+ svp_remote_t *srp;
+ struct addrinfo *addrs;
+
+ mutex_enter(&svp_host_lock);
+ while (svp_host_head == NULL)
+ (void) cond_wait(&svp_host_cv, &svp_host_lock);
+ srp = svp_host_head;
+ svp_host_head = srp->sr_nexthost;
+ if (svp_host_head != NULL)
+ (void) cond_signal(&svp_host_cv);
+ mutex_exit(&svp_host_lock);
+
+ mutex_enter(&srp->sr_lock);
+ assert(srp->sr_state & SVP_RS_LOOKUP_SCHEDULED);
+ srp->sr_state &= ~SVP_RS_LOOKUP_SCHEDULED;
+ if (srp->sr_state & SVP_RS_LOOKUP_INPROGRESS) {
+ mutex_exit(&srp->sr_lock);
+ continue;
+ }
+ srp->sr_state |= SVP_RS_LOOKUP_INPROGRESS;
+ mutex_exit(&srp->sr_lock);
+
+ for (;;) {
+ err = getaddrinfo(srp->sr_hostname, NULL, NULL, &addrs);
+ if (err == 0)
+ break;
+ if (err != 0) {
+ switch (err) {
+ case EAI_ADDRFAMILY:
+ case EAI_BADFLAGS:
+ case EAI_FAMILY:
+ case EAI_SERVICE:
+ case EAI_SOCKTYPE:
+ case EAI_OVERFLOW:
+ default:
+ libvarpd_panic("unexpected getaddrinfo "
+ "failure: %d", err);
+ break;
+ case EAI_AGAIN:
+ case EAI_MEMORY:
+ case EAI_SYSTEM:
+ continue;
+ case EAI_FAIL:
+ case EAI_NODATA:
+ case EAI_NONAME:
+ /*
+ * At this point in time we have
+ * something which isn't very good. This
+ * may have been a typo or something may
+ * have been destroyed. We should go
+ * ahead and degrade this overall
+ * instance, because we're not going to
+ * make much forward progress... It'd be
+ * great if we could actually issue more
+ * of an EREPORT to describe what
+ * happened, some day.
+ */
+ mutex_enter(&srp->sr_lock);
+ svp_remote_degrade(srp,
+ SVP_RD_DNS_FAIL);
+ mutex_exit(&srp->sr_lock);
+ break;
+ }
+ }
+ break;
+ }
+
+ if (err == 0) {
+ /*
+ * We've successfully resolved something, mark this
+ * degredation over for now.
+ */
+ mutex_enter(&srp->sr_lock);
+ svp_remote_restore(srp, SVP_RD_DNS_FAIL);
+ mutex_exit(&srp->sr_lock);
+ svp_remote_resolved(srp, addrs);
+ }
+
+ mutex_enter(&srp->sr_lock);
+ srp->sr_state &= ~SVP_RS_LOOKUP_INPROGRESS;
+ (void) cond_broadcast(&srp->sr_cond);
+ mutex_exit(&srp->sr_lock);
+ }
+
+ /* LINTED: E_STMT_NOT_REACHED */
+ return (NULL);
+}
+
+void
+svp_host_queue(svp_remote_t *srp)
+{
+ svp_remote_t *s;
+ mutex_enter(&svp_host_lock);
+ mutex_enter(&srp->sr_lock);
+ if (srp->sr_state & SVP_RS_LOOKUP_SCHEDULED) {
+ mutex_exit(&srp->sr_lock);
+ mutex_exit(&svp_host_lock);
+ return;
+ }
+ srp->sr_state |= SVP_RS_LOOKUP_SCHEDULED;
+ s = svp_host_head;
+ while (s != NULL && s->sr_nexthost != NULL)
+ s = s->sr_nexthost;
+ if (s == NULL) {
+ assert(s == svp_host_head);
+ svp_host_head = srp;
+ } else {
+ s->sr_nexthost = srp;
+ }
+ srp->sr_nexthost = NULL;
+ (void) cond_signal(&svp_host_cv);
+ mutex_exit(&srp->sr_lock);
+ mutex_exit(&svp_host_lock);
+}
+
+int
+svp_host_init(void)
+{
+ int i;
+
+ for (i = 0; i < svp_host_nthreads; i++) {
+ if (thr_create(NULL, 0, svp_host_loop, NULL,
+ THR_DETACHED | THR_DAEMON, NULL) != 0)
+ return (errno);
+ }
+
+ return (0);
+}
diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_loop.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_loop.c
new file mode 100644
index 0000000000..18a79b9dff
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_loop.c
@@ -0,0 +1,210 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * Event loop mechanism for our backend.
+ *
+ * For more information, see the big theory statement in
+ * lib/varpd/svp/common/libvarpd_svp.c.
+ */
+
+#include <unistd.h>
+#include <thread.h>
+#include <port.h>
+#include <signal.h>
+#include <time.h>
+#include <errno.h>
+#include <umem.h>
+
+#include <libvarpd_svp.h>
+
+typedef struct svp_event_loop {
+ int sel_port; /* RO */
+ int sel_nthread; /* RO */
+ thread_t *sel_threads; /* RO */
+ boolean_t sel_stop; /* svp_elock */
+ timer_t sel_hosttimer;
+} svp_event_loop_t;
+
+static svp_event_loop_t svp_event;
+static mutex_t svp_elock = ERRORCHECKMUTEX;
+
+/* ARGSUSED */
+static void *
+svp_event_thr(void *arg)
+{
+ for (;;) {
+ int ret;
+ port_event_t pe;
+ svp_event_t *sep;
+
+ mutex_enter(&svp_elock);
+ if (svp_event.sel_stop == B_TRUE) {
+ mutex_exit(&svp_elock);
+ break;
+ }
+ mutex_exit(&svp_elock);
+
+ ret = port_get(svp_event.sel_port, &pe, NULL);
+ if (ret != 0) {
+ switch (errno) {
+ case EFAULT:
+ case EBADF:
+ case EINVAL:
+ libvarpd_panic("unexpected port_get errno: %d",
+ errno);
+ default:
+ break;
+ }
+ }
+
+ if (pe.portev_user == NULL)
+ libvarpd_panic("received event (%p) without "
+ "protev_user set", &pe);
+ sep = (svp_event_t *)pe.portev_user;
+ sep->se_func(&pe, sep->se_arg);
+ }
+
+ return (NULL);
+}
+
+int
+svp_event_associate(svp_event_t *sep, int fd)
+{
+ int ret;
+
+ ret = port_associate(svp_event.sel_port, PORT_SOURCE_FD, fd,
+ sep->se_events, sep);
+ if (ret != 0) {
+ switch (errno) {
+ case EBADF:
+ case EBADFD:
+ case EINVAL:
+ case EAGAIN:
+ libvarpd_panic("unexpected port_associate error: %d",
+ errno);
+ default:
+ ret = errno;
+ break;
+ }
+ }
+
+ return (ret);
+}
+
+/* ARGSUSED */
+int
+svp_event_dissociate(svp_event_t *sep, int fd)
+{
+ int ret;
+
+ ret = port_dissociate(svp_event.sel_port, PORT_SOURCE_FD, fd);
+ if (ret != 0) {
+ if (errno != ENOENT)
+ libvarpd_panic("unexpected port_dissociate error: %d",
+ errno);
+ ret = errno;
+ }
+ return (ret);
+}
+
+int
+svp_event_inject(svp_event_t *user)
+{
+ return (port_send(svp_event.sel_port, 0, user));
+}
+
+int
+svp_event_timer_init(svp_event_t *sep)
+{
+ port_notify_t pn;
+ struct sigevent evp;
+ struct itimerspec ts;
+
+ pn.portnfy_port = svp_event.sel_port;
+ pn.portnfy_user = sep;
+ evp.sigev_notify = SIGEV_PORT;
+ evp.sigev_value.sival_ptr = &pn;
+
+ if (timer_create(CLOCK_REALTIME, &evp, &svp_event.sel_hosttimer) != 0)
+ return (errno);
+
+ ts.it_value.tv_sec = svp_tickrate;
+ ts.it_value.tv_nsec = 0;
+ ts.it_interval.tv_sec = svp_tickrate;
+ ts.it_interval.tv_nsec = 0;
+
+ if (timer_settime(svp_event.sel_hosttimer, TIMER_RELTIME, &ts,
+ NULL) != 0) {
+ int ret = errno;
+ (void) timer_delete(svp_event.sel_hosttimer);
+ return (ret);
+ }
+
+ return (0);
+}
+
+int
+svp_event_init(void)
+{
+ long i, ncpus;
+
+ svp_event.sel_port = port_create();
+ if (svp_event.sel_port == -1)
+ return (errno);
+
+ ncpus = sysconf(_SC_NPROCESSORS_ONLN) * 2 + 1;
+ if (ncpus <= 0)
+ libvarpd_panic("sysconf for nprocs failed... %d/%d",
+ ncpus, errno);
+
+ svp_event.sel_threads = umem_alloc(sizeof (thread_t) * ncpus,
+ UMEM_DEFAULT);
+ if (svp_event.sel_threads == NULL) {
+ int ret = errno;
+ (void) timer_delete(svp_event.sel_hosttimer);
+ (void) close(svp_event.sel_port);
+ svp_event.sel_port = -1;
+ return (ret);
+ }
+
+ for (i = 0; i < ncpus; i++) {
+ int ret;
+ thread_t *thr = &svp_event.sel_threads[i];
+
+ ret = thr_create(NULL, 0, svp_event_thr, NULL,
+ THR_DETACHED | THR_DAEMON, thr);
+ if (ret != 0) {
+ ret = errno;
+ (void) timer_delete(svp_event.sel_hosttimer);
+ (void) close(svp_event.sel_port);
+ svp_event.sel_port = -1;
+ return (errno);
+ }
+ }
+
+ return (0);
+}
+
+void
+svp_event_fini(void)
+{
+ mutex_enter(&svp_elock);
+ svp_event.sel_stop = B_TRUE;
+ mutex_exit(&svp_elock);
+
+ (void) timer_delete(svp_event.sel_hosttimer);
+ (void) close(svp_event.sel_port);
+}
diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_prot.h b/usr/src/lib/varpd/svp/common/libvarpd_svp_prot.h
new file mode 100644
index 0000000000..16dbdbec05
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_prot.h
@@ -0,0 +1,236 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _LIBVARPD_SVP_PROT_H
+#define _LIBVARPD_SVP_PROT_H
+
+/*
+ * SVP protocol Definitions
+ */
+
+#include <sys/types.h>
+#include <inttypes.h>
+#include <sys/ethernet.h>
+#include <netinet/in.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * SDC VXLAN Protocol Definitions
+ */
+
+#define SVP_VERSION_ONE 1
+#define SVP_CURRENT_VERSION SVP_VERSION_ONE
+
+typedef struct svp_req {
+ uint16_t svp_ver;
+ uint16_t svp_op;
+ uint32_t svp_size;
+ uint32_t svp_id;
+ uint32_t svp_crc32;
+} svp_req_t;
+
+typedef enum svp_op {
+ SVP_R_UNKNOWN = 0x00,
+ SVP_R_PING = 0x01,
+ SVP_R_PONG = 0x02,
+ SVP_R_VL2_REQ = 0x03,
+ SVP_R_VL2_ACK = 0x04,
+ SVP_R_VL3_REQ = 0x05,
+ SVP_R_VL3_ACK = 0x06,
+ SVP_R_BULK_REQ = 0x07,
+ SVP_R_BULK_ACK = 0x08,
+ SVP_R_LOG_REQ = 0x09,
+ SVP_R_LOG_ACK = 0x0A,
+ SVP_R_LOG_RM = 0x0B,
+ SVP_R_LOG_RM_ACK = 0x0C,
+ SVP_R_SHOOTDOWN = 0x0D
+} svp_op_t;
+
+typedef enum svp_status {
+ SVP_S_OK = 0x00, /* Everything OK */
+ SVP_S_FATAL = 0x01, /* Fatal error, close connection */
+ SVP_S_NOTFOUND = 0x02, /* Entry not found */
+ SVP_S_BADL3TYPE = 0x03, /* Unknown svp_vl3_type_t */
+ SVP_S_BADBULK = 0x04 /* Unknown svp_bulk_type_t */
+} svp_status_t;
+
+/*
+ * A client issues the SVP_R_VL2_REQ whenever it needs to perform a VLS->UL3
+ * lookup. Requests have the following structure:
+ */
+typedef struct svp_vl2_req {
+ uint8_t sl2r_mac[ETHERADDRL];
+ uint8_t sl2r_pad[2];
+ uint32_t sl2r_vnetid;
+} svp_vl2_req_t;
+
+/*
+ * This is the message a server uses to reply to the SVP_R_VL2_REQ. If the
+ * destination on the underlay is an IPv4 address, it should be encoded as an
+ * IPv4-mapped IPv6 address.
+ */
+typedef struct svp_vl2_ack {
+ uint16_t sl2a_status;
+ uint16_t sl2a_port;
+ uint8_t sl2a_addr[16];
+} svp_vl2_ack_t;
+
+
+/*
+ * A client issues the SVP_R_VL3_REQ request whenever it needs to perform a
+ * VL3->VL2 lookup. Note, that this also implicitly performs a VL2->UL3 lookup
+ * as well. The sl3r_type member is used to indicate the kind of lookup type
+ * that we're performing, eg. is it a L3 or L2.
+ */
+typedef enum svp_vl3_type {
+ SVP_VL3_IP = 0x01,
+ SVP_VL3_IPV6 = 0x02
+} svp_vl3_type_t;
+
+typedef struct svp_vl3_req {
+ uint8_t sl3r_ip[16];
+ uint32_t sl3r_type;
+ uint32_t sl3r_vnetid;
+} svp_vl3_req_t;
+
+/*
+ * This response, corresponding to the SVP_R_VL3_ACK, includes an answer to both
+ * the VL3->VL2 and the VL2->UL3 requests.
+ */
+typedef struct svp_vl3_ack {
+ uint32_t sl3a_status;
+ uint8_t sl3a_mac[ETHERADDRL];
+ uint16_t sl3a_uport;
+ uint8_t sl3a_uip[16];
+} svp_vl3_ack_t;
+
+/*
+ * SVP_R_BULK_REQ requests a bulk dump of data. Currently we have two kinds of
+ * data tables that we need to dump: VL3->VL2 mappings and VL2->UL3 mappings.
+ * The kind that we want is indicated using the svbr_type member.
+ */
+typedef enum svp_bulk_type {
+ SVP_BULK_VL2 = 0x01,
+ SVP_BULK_VL3 = 0x02
+} svp_bulk_type_t;
+
+typedef struct svp_bulk_req {
+ uint32_t svbr_type;
+} svp_bulk_req_t;
+
+/*
+ * When replying to a bulk request (SVP_R_BULK_ACK), data is streamed back
+ * across. The format of the data is currently undefined and as we work on the
+ * system, we'll get a better understanding of what this should look like. A
+ * client may need to stream such a request to disk, or the format will need to
+ * be in a streamable format that allows the client to construct data.
+ */
+typedef struct svp_bulk_ack {
+ uint32_t svba_status;
+ uint32_t svba_type;
+ uint8_t svba_data[];
+} svp_bulk_ack_t;
+
+/*
+ * SVP_R_LOG_REQ requests a log entries from the specified log from the server.
+ * The total number of bytes that the user is ready to receive is in svlr_count.
+ * However, the server should not block for data if none is available and thus
+ * may return less than svlr_count bytes back. We identify the IP address of the
+ * underlay to use here explicitly.
+ */
+typedef struct svp_log_req {
+ uint32_t svlr_count;
+ uint8_t svlr_ip[16];
+} svp_log_req_t;
+
+/*
+ * The server replies to a log request by sending a series of log entries.
+ * These log entries may be a mixture of both vl2 and vl3 records. The reply is
+ * a stream of bytes after the status message whose length is determined baseed
+ * on the header itself. Each entry begins with a uint32_t that describes its
+ * type and then is followed by the remaining data payload. The next entry
+ * follows immediately which again begins with the uint32_t word that describes
+ * what it should be.
+ */
+typedef enum svp_log_type {
+ SVP_LOG_VL2 = 0x01,
+ SVP_LOG_VL3 = 0x02
+} svp_log_type_t;
+
+typedef struct svp_log_vl2 {
+ uint32_t svl2_type; /* Should be SVP_LOG_VL2 */
+ uint8_t svl2_id[16]; /* 16-byte UUID */
+ uint8_t svl2_mac[ETHERADDRL];
+ uint8_t svl2_pad[2];
+ uint32_t svl2_vnetid;
+} svp_log_vl2_t;
+
+typedef struct svp_log_vl3 {
+ uint32_t svl3_type; /* Should be SVP_LOG_VL3 */
+ uint8_t svl3_id[16]; /* 16-byte UUID */
+ uint8_t svl3_ip[16];
+ uint8_t svl3_pad[2];
+ uint16_t svl3_vlan;
+ uint32_t svl3_vnetid;
+} svp_log_vl3_t;
+
+typedef struct svp_log_ack {
+ uint32_t svla_status;
+ uint8_t svla_data[];
+} svp_log_ack_t;
+
+/*
+ * SVP_R_LOG_RM is used after the client successfully processes a series of the
+ * log stream. It replies to tell the server that it can remove those IDs from
+ * processing. The IDs used are the same IDs that were in the individual
+ * SVP_R_LOG_ACK entries.
+ */
+typedef struct svp_lrm_req {
+ uint32_t svrr_count;
+ uint8_t svrr_ids[];
+} svp_lrm_req_t;
+
+/*
+ * SVP_R_LOG_RM_ACK is used to indicate that a log entry has been successfully
+ * deleted and at this point it makes sense to go and ask for another
+ * SVP_R_LOG_REQ.
+ */
+typedef struct svp_lrm_ack {
+ uint32_t svra_status;
+} svp_lrm_ack_t;
+
+/*
+ * A shootdown (SVP_R_SHOOTDOWN) is used by a CN to reply to another CN that it
+ * sent an invalid entry that could not be processed. This should be a
+ * relatively infrequent occurrence. Unlike the rest of the messages, there is
+ * no reply to it. It's a single request to try and help get us out there. When
+ * a node receives this, it will issue a conditional revocation ioctl, that
+ * removes the entry if and only if, it matches the IP. That way if we've
+ * already gotten an updated entry for this, we don't remove it again.
+ */
+typedef struct svp_shootdown {
+ uint8_t svsd_mac[ETHERADDRL];
+ uint8_t svsd_pad[2];
+ uint32_t svsd_vnetid;
+} svp_shootdown_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBVARPD_SVP_PROT_H */
diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_remote.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_remote.c
new file mode 100644
index 0000000000..8d482e4a12
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_remote.c
@@ -0,0 +1,821 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * Remote backend management
+ *
+ * For more information, see the big theory statement in
+ * lib/varpd/svp/common/libvarpd_svp.c.
+ */
+
+#include <umem.h>
+#include <strings.h>
+#include <string.h>
+#include <stddef.h>
+#include <thread.h>
+#include <synch.h>
+#include <assert.h>
+#include <sys/socket.h>
+#include <netdb.h>
+#include <errno.h>
+#include <libidspace.h>
+
+#include <libvarpd_provider.h>
+#include <libvarpd_svp.h>
+
+typedef struct svp_shoot_vl3 {
+ svp_query_t ssv_query;
+ struct sockaddr_in6 ssv_sock;
+ svp_log_vl3_t *ssv_vl3;
+ svp_sdlog_t *ssv_log;
+} svp_shoot_vl3_t;
+
+static mutex_t svp_remote_lock = ERRORCHECKMUTEX;
+static avl_tree_t svp_remote_tree;
+static svp_timer_t svp_dns_timer;
+static id_space_t *svp_idspace;
+static int svp_dns_timer_rate = 30; /* seconds */
+
+static void
+svp_remote_mkfmamsg(svp_remote_t *srp, svp_degrade_state_t state, char *buf,
+ size_t buflen)
+{
+ switch (state) {
+ case SVP_RD_DNS_FAIL:
+ (void) snprintf(buf, buflen, "failed to resolve or find "
+ "entries for hostname %s", srp->sr_hostname);
+ break;
+ case SVP_RD_REMOTE_FAIL:
+ (void) snprintf(buf, buflen, "cannot reach any remote peers");
+ break;
+ default:
+ (void) snprintf(buf, buflen, "unkonwn error state: %d", state);
+ }
+}
+
+static int
+svp_remote_comparator(const void *l, const void *r)
+{
+ int ret;
+ const svp_remote_t *lr = l, *rr = r;
+
+ ret = strcmp(lr->sr_hostname, rr->sr_hostname);
+ if (ret > 0)
+ return (1);
+ else if (ret < 0)
+ return (-1);
+
+ if (lr->sr_rport > rr->sr_rport)
+ return (1);
+ else if (lr->sr_rport < rr->sr_rport)
+ return (-1);
+
+ return (memcmp(&lr->sr_uip, &rr->sr_uip, sizeof (struct in6_addr)));
+}
+
+void
+svp_query_release(svp_query_t *sqp)
+{
+ id_free(svp_idspace, sqp->sq_header.svp_id);
+}
+
+static void
+svp_remote_destroy(svp_remote_t *srp)
+{
+ size_t len;
+
+ /*
+ * Clean up any unrelated DNS information. At this point we know that
+ * we're not in the remote tree. That means, that svp_remote_dns_timer
+ * cannot queue us. However, if any of our DNS related state flags are
+ * set, we have to hang out.
+ */
+ mutex_enter(&srp->sr_lock);
+ while (srp->sr_state &
+ (SVP_RS_LOOKUP_SCHEDULED | SVP_RS_LOOKUP_INPROGRESS)) {
+ (void) cond_wait(&srp->sr_cond, &srp->sr_lock);
+ }
+ mutex_exit(&srp->sr_lock);
+ svp_shootdown_fini(srp);
+
+ if (cond_destroy(&srp->sr_cond) != 0)
+ libvarpd_panic("failed to destroy cond sr_cond");
+
+ if (mutex_destroy(&srp->sr_lock) != 0)
+ libvarpd_panic("failed to destroy mutex sr_lock");
+
+ if (srp->sr_addrinfo != NULL)
+ freeaddrinfo(srp->sr_addrinfo);
+ len = strlen(srp->sr_hostname) + 1;
+ umem_free(srp->sr_hostname, len);
+ umem_free(srp, sizeof (svp_remote_t));
+}
+
+static int
+svp_remote_create(const char *host, uint16_t port, struct in6_addr *uip,
+ svp_remote_t **outp)
+{
+ size_t hlen;
+ svp_remote_t *remote;
+
+ assert(MUTEX_HELD(&svp_remote_lock));
+
+ remote = umem_zalloc(sizeof (svp_remote_t), UMEM_DEFAULT);
+ if (remote == NULL) {
+ mutex_exit(&svp_remote_lock);
+ return (ENOMEM);
+ }
+
+ if (svp_shootdown_init(remote) != 0) {
+ umem_free(remote, sizeof (svp_remote_t));
+ mutex_exit(&svp_remote_lock);
+ return (ENOMEM);
+ }
+
+ hlen = strlen(host) + 1;
+ remote->sr_hostname = umem_alloc(hlen, UMEM_DEFAULT);
+ if (remote->sr_hostname == NULL) {
+ svp_shootdown_fini(remote);
+ umem_free(remote, sizeof (svp_remote_t));
+ mutex_exit(&svp_remote_lock);
+ return (ENOMEM);
+ }
+ remote->sr_rport = port;
+ if (mutex_init(&remote->sr_lock,
+ USYNC_THREAD | LOCK_ERRORCHECK, NULL) != 0)
+ libvarpd_panic("failed to create mutex sr_lock");
+ if (cond_init(&remote->sr_cond, USYNC_PROCESS, NULL) != 0)
+ libvarpd_panic("failed to create cond sr_cond");
+ list_create(&remote->sr_conns, sizeof (svp_conn_t),
+ offsetof(svp_conn_t, sc_rlist));
+ avl_create(&remote->sr_tree, svp_comparator, sizeof (svp_t),
+ offsetof(svp_t, svp_rlink));
+ (void) strlcpy(remote->sr_hostname, host, hlen);
+ remote->sr_count = 1;
+ remote->sr_uip = *uip;
+
+ svp_shootdown_start(remote);
+
+ *outp = remote;
+ return (0);
+}
+
+int
+svp_remote_find(char *host, uint16_t port, struct in6_addr *uip,
+ svp_remote_t **outp)
+{
+ int ret;
+ svp_remote_t lookup, *remote;
+
+ lookup.sr_hostname = host;
+ lookup.sr_rport = port;
+ lookup.sr_uip = *uip;
+ mutex_enter(&svp_remote_lock);
+ remote = avl_find(&svp_remote_tree, &lookup, NULL);
+ if (remote != NULL) {
+ assert(remote->sr_count > 0);
+ remote->sr_count++;
+ *outp = remote;
+ mutex_exit(&svp_remote_lock);
+ return (0);
+ }
+
+ if ((ret = svp_remote_create(host, port, uip, outp)) != 0) {
+ mutex_exit(&svp_remote_lock);
+ return (ret);
+ }
+
+ avl_add(&svp_remote_tree, *outp);
+ mutex_exit(&svp_remote_lock);
+
+ /* Make sure DNS is up to date */
+ svp_host_queue(*outp);
+
+ return (0);
+}
+
+void
+svp_remote_release(svp_remote_t *srp)
+{
+ mutex_enter(&svp_remote_lock);
+ mutex_enter(&srp->sr_lock);
+ srp->sr_count--;
+ if (srp->sr_count != 0) {
+ mutex_exit(&srp->sr_lock);
+ mutex_exit(&svp_remote_lock);
+ return;
+ }
+ mutex_exit(&srp->sr_lock);
+
+ avl_remove(&svp_remote_tree, srp);
+ mutex_exit(&svp_remote_lock);
+ svp_remote_destroy(srp);
+}
+
+int
+svp_remote_attach(svp_remote_t *srp, svp_t *svp)
+{
+ svp_t check;
+ avl_index_t where;
+
+ mutex_enter(&srp->sr_lock);
+ if (svp->svp_remote != NULL)
+ libvarpd_panic("failed to create mutex sr_lock");
+
+ /*
+ * We require everything except shootdowns
+ */
+ if (svp->svp_cb.scb_vl2_lookup == NULL)
+ libvarpd_panic("missing callback scb_vl2_lookup");
+ if (svp->svp_cb.scb_vl3_lookup == NULL)
+ libvarpd_panic("missing callback scb_vl3_lookup");
+ if (svp->svp_cb.scb_vl2_invalidate == NULL)
+ libvarpd_panic("missing callback scb_vl2_invalidate");
+ if (svp->svp_cb.scb_vl3_inject == NULL)
+ libvarpd_panic("missing callback scb_vl3_inject");
+
+ check.svp_vid = svp->svp_vid;
+ if (avl_find(&srp->sr_tree, &check, &where) != NULL)
+ libvarpd_panic("found duplicate entry with vid %ld",
+ svp->svp_vid);
+ avl_insert(&srp->sr_tree, svp, where);
+ svp->svp_remote = srp;
+ mutex_exit(&srp->sr_lock);
+
+ return (0);
+}
+
+void
+svp_remote_detach(svp_t *svp)
+{
+ svp_t *lookup;
+ svp_remote_t *srp = svp->svp_remote;
+
+ if (srp == NULL)
+ libvarpd_panic("trying to detach remote when none exists");
+
+ mutex_enter(&srp->sr_lock);
+ lookup = avl_find(&srp->sr_tree, svp, NULL);
+ if (lookup == NULL || lookup != svp)
+ libvarpd_panic("inconsitent remote avl tree...");
+ avl_remove(&srp->sr_tree, svp);
+ svp->svp_remote = NULL;
+ mutex_exit(&srp->sr_lock);
+ svp_remote_release(srp);
+}
+
+/*
+ * Walk the list of connections and find the first one that's available, the
+ * move it to the back of the list so it's less likely to be used again.
+ */
+static boolean_t
+svp_remote_conn_queue(svp_remote_t *srp, svp_query_t *sqp)
+{
+ svp_conn_t *scp;
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+ for (scp = list_head(&srp->sr_conns); scp != NULL;
+ scp = list_next(&srp->sr_conns, scp)) {
+ mutex_enter(&scp->sc_lock);
+ if (scp->sc_cstate != SVP_CS_ACTIVE) {
+ mutex_exit(&scp->sc_lock);
+ continue;
+ }
+ svp_conn_queue(scp, sqp);
+ mutex_exit(&scp->sc_lock);
+ list_remove(&srp->sr_conns, scp);
+ list_insert_tail(&srp->sr_conns, scp);
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+static void
+svp_remote_vl2_lookup_cb(svp_query_t *sqp, void *arg)
+{
+ svp_t *svp = sqp->sq_svp;
+ svp_vl2_ack_t *vl2a = (svp_vl2_ack_t *)sqp->sq_wdata;
+
+ if (sqp->sq_status == SVP_S_OK)
+ svp->svp_cb.scb_vl2_lookup(svp, sqp->sq_status,
+ (struct in6_addr *)vl2a->sl2a_addr, ntohs(vl2a->sl2a_port),
+ arg);
+ else
+ svp->svp_cb.scb_vl2_lookup(svp, sqp->sq_status, NULL, 0, arg);
+}
+
+void
+svp_remote_vl2_lookup(svp_t *svp, svp_query_t *sqp, const uint8_t *mac,
+ void *arg)
+{
+ svp_remote_t *srp;
+ svp_vl2_req_t *vl2r = &sqp->sq_rdun.sqd_vl2r;
+
+ srp = svp->svp_remote;
+ sqp->sq_func = svp_remote_vl2_lookup_cb;
+ sqp->sq_arg = arg;
+ sqp->sq_svp = svp;
+ sqp->sq_state = SVP_QUERY_INIT;
+ sqp->sq_header.svp_ver = htons(SVP_CURRENT_VERSION);
+ sqp->sq_header.svp_op = htons(SVP_R_VL2_REQ);
+ sqp->sq_header.svp_size = htonl(sizeof (svp_vl2_req_t));
+ sqp->sq_header.svp_id = id_alloc(svp_idspace);
+ if (sqp->sq_header.svp_id == (id_t)-1)
+ libvarpd_panic("failed to allcoate from svp_idspace: %d",
+ errno);
+ sqp->sq_header.svp_crc32 = htonl(0);
+ sqp->sq_rdata = vl2r;
+ sqp->sq_rsize = sizeof (svp_vl2_req_t);
+ sqp->sq_wdata = NULL;
+ sqp->sq_wsize = 0;
+
+ bcopy(mac, vl2r->sl2r_mac, ETHERADDRL);
+ vl2r->sl2r_vnetid = ntohl(svp->svp_vid);
+
+ mutex_enter(&srp->sr_lock);
+ if (svp_remote_conn_queue(srp, sqp) == B_FALSE)
+ svp->svp_cb.scb_vl2_lookup(svp, SVP_S_FATAL, NULL, NULL, arg);
+ mutex_exit(&srp->sr_lock);
+}
+
+static void
+svp_remote_vl3_lookup_cb(svp_query_t *sqp, void *arg)
+{
+ svp_t *svp = sqp->sq_svp;
+ svp_vl3_ack_t *vl3a = (svp_vl3_ack_t *)sqp->sq_wdata;
+
+ if (sqp->sq_status == SVP_S_OK)
+ svp->svp_cb.scb_vl3_lookup(svp, sqp->sq_status, vl3a->sl3a_mac,
+ (struct in6_addr *)vl3a->sl3a_uip, ntohs(vl3a->sl3a_uport),
+ arg);
+ else
+ svp->svp_cb.scb_vl3_lookup(svp, sqp->sq_status, NULL, NULL, 0,
+ arg);
+}
+
+static void
+svp_remote_vl3_common(svp_remote_t *srp, svp_query_t *sqp,
+ const struct sockaddr *addr, svp_query_f func, void *arg, uint32_t vid)
+{
+ svp_vl3_req_t *vl3r = &sqp->sq_rdun.sdq_vl3r;
+
+ if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
+ libvarpd_panic("unexpected sa_family for the vl3 lookup");
+
+ sqp->sq_func = func;
+ sqp->sq_arg = arg;
+ sqp->sq_state = SVP_QUERY_INIT;
+ sqp->sq_header.svp_ver = htons(SVP_CURRENT_VERSION);
+ sqp->sq_header.svp_op = htons(SVP_R_VL3_REQ);
+ sqp->sq_header.svp_size = htonl(sizeof (svp_vl3_req_t));
+ sqp->sq_header.svp_id = id_alloc(svp_idspace);
+ if (sqp->sq_header.svp_id == (id_t)-1)
+ libvarpd_panic("failed to allcoate from svp_idspace: %d",
+ errno);
+ sqp->sq_header.svp_crc32 = htonl(0);
+ sqp->sq_rdata = vl3r;
+ sqp->sq_rsize = sizeof (svp_vl3_req_t);
+ sqp->sq_wdata = NULL;
+ sqp->sq_wsize = 0;
+
+ if (addr->sa_family == AF_INET6) {
+ struct sockaddr_in6 *s6 = (struct sockaddr_in6 *)addr;
+ vl3r->sl3r_type = htonl(SVP_VL3_IPV6);
+ bcopy(&s6->sin6_addr, vl3r->sl3r_ip,
+ sizeof (struct in6_addr));
+ } else {
+ struct sockaddr_in *s4 = (struct sockaddr_in *)addr;
+ struct in6_addr v6;
+
+ vl3r->sl3r_type = htonl(SVP_VL3_IP);
+ IN6_INADDR_TO_V4MAPPED(&s4->sin_addr, &v6);
+ bcopy(&v6, vl3r->sl3r_ip, sizeof (struct in6_addr));
+ }
+ vl3r->sl3r_vnetid = htonl(vid);
+
+ mutex_enter(&srp->sr_lock);
+ if (svp_remote_conn_queue(srp, sqp) == B_FALSE) {
+ sqp->sq_status = SVP_S_FATAL;
+ sqp->sq_func(sqp, arg);
+ }
+ mutex_exit(&srp->sr_lock);
+}
+
+/*
+ * This is a request to do a VL3 look-up that originated internally as opposed
+ * to coming from varpd. As such we need a slightly different query callback
+ * function upon completion and don't go through the normal path with the svp_t.
+ */
+void
+svp_remote_vl3_logreq(svp_remote_t *srp, svp_query_t *sqp, uint32_t vid,
+ const struct sockaddr *addr, svp_query_f func, void *arg)
+{
+ svp_remote_vl3_common(srp, sqp, addr, func, arg, vid);
+}
+
+void
+svp_remote_vl3_lookup(svp_t *svp, svp_query_t *sqp,
+ const struct sockaddr *addr, void *arg)
+{
+ svp_remote_t *srp = svp->svp_remote;
+
+ sqp->sq_svp = svp;
+ svp_remote_vl3_common(srp, sqp, addr, svp_remote_vl3_lookup_cb,
+ arg, svp->svp_vid);
+}
+
+static void
+svp_remote_log_request_cb(svp_query_t *sqp, void *arg)
+{
+ svp_remote_t *srp = sqp->sq_arg;
+
+ assert(sqp->sq_wdata != NULL);
+ if (sqp->sq_status == SVP_S_OK)
+ svp_shootdown_logr_cb(srp, sqp->sq_status, sqp->sq_wdata,
+ sqp->sq_size);
+ else
+ svp_shootdown_logr_cb(srp, sqp->sq_status, NULL, 0);
+}
+
+void
+svp_remote_log_request(svp_remote_t *srp, svp_query_t *sqp, void *buf,
+ size_t buflen)
+{
+ svp_log_req_t *logr = &sqp->sq_rdun.sdq_logr;
+ boolean_t queued;
+
+ sqp->sq_func = svp_remote_log_request_cb;
+ sqp->sq_state = SVP_QUERY_INIT;
+ sqp->sq_arg = srp;
+ sqp->sq_header.svp_ver = htons(SVP_CURRENT_VERSION);
+ sqp->sq_header.svp_op = htons(SVP_R_LOG_REQ);
+ sqp->sq_header.svp_size = htonl(sizeof (svp_log_req_t));
+ sqp->sq_header.svp_id = id_alloc(svp_idspace);
+ if (sqp->sq_header.svp_id == (id_t)-1)
+ libvarpd_panic("failed to allcoate from svp_idspace: %d",
+ errno);
+ sqp->sq_header.svp_crc32 = htonl(0);
+ sqp->sq_rdata = logr;
+ sqp->sq_rsize = sizeof (svp_log_req_t);
+ sqp->sq_wdata = buf;
+ sqp->sq_wsize = buflen;
+
+ logr->svlr_count = htonl(buflen);
+ bcopy(&srp->sr_uip, logr->svlr_ip, sizeof (struct in6_addr));
+
+ /*
+ * If this fails, there isn't much that we can't do. Give the callback
+ * with a fatal status.
+ */
+ mutex_enter(&srp->sr_lock);
+ queued = svp_remote_conn_queue(srp, sqp);
+ mutex_exit(&srp->sr_lock);
+
+ if (queued == B_FALSE)
+ svp_shootdown_logr_cb(srp, SVP_S_FATAL, NULL, 0);
+}
+
+static void
+svp_remote_lrm_request_cb(svp_query_t *sqp, void *arg)
+{
+ svp_remote_t *srp = arg;
+
+ svp_shootdown_lrm_cb(srp, sqp->sq_status);
+}
+
+void
+svp_remote_lrm_request(svp_remote_t *srp, svp_query_t *sqp, void *buf,
+ size_t buflen)
+{
+ boolean_t queued;
+ svp_lrm_req_t *svrr = buf;
+
+ sqp->sq_func = svp_remote_lrm_request_cb;
+ sqp->sq_state = SVP_QUERY_INIT;
+ sqp->sq_arg = srp;
+ sqp->sq_header.svp_ver = htons(SVP_CURRENT_VERSION);
+ sqp->sq_header.svp_op = htons(SVP_R_LOG_RM);
+ sqp->sq_header.svp_size = htonl(buflen);
+ sqp->sq_header.svp_id = id_alloc(svp_idspace);
+ if (sqp->sq_header.svp_id == (id_t)-1)
+ libvarpd_panic("failed to allcoate from svp_idspace: %d",
+ errno);
+ sqp->sq_header.svp_crc32 = htonl(0);
+ sqp->sq_rdata = buf;
+ sqp->sq_rsize = buflen;
+ sqp->sq_wdata = NULL;
+ sqp->sq_wsize = 0;
+
+ /*
+ * We need to fix up the count to be in proper network order.
+ */
+ svrr->svrr_count = htonl(svrr->svrr_count);
+
+ /*
+ * If this fails, there isn't much that we can't do. Give the callback
+ * with a fatal status.
+ */
+ mutex_enter(&srp->sr_lock);
+ queued = svp_remote_conn_queue(srp, sqp);
+ mutex_exit(&srp->sr_lock);
+
+ if (queued == B_FALSE)
+ svp_shootdown_logr_cb(srp, SVP_S_FATAL, NULL, 0);
+}
+
+/* ARGSUSED */
+void
+svp_remote_dns_timer(void *unused)
+{
+ svp_remote_t *s;
+ mutex_enter(&svp_remote_lock);
+ for (s = avl_first(&svp_remote_tree); s != NULL;
+ s = AVL_NEXT(&svp_remote_tree, s)) {
+ svp_host_queue(s);
+ }
+ mutex_exit(&svp_remote_lock);
+}
+
+void
+svp_remote_resolved(svp_remote_t *srp, struct addrinfo *newaddrs)
+{
+ struct addrinfo *a;
+ svp_conn_t *scp;
+ int ngen;
+
+ mutex_enter(&srp->sr_lock);
+ srp->sr_gen++;
+ ngen = srp->sr_gen;
+ mutex_exit(&srp->sr_lock);
+
+ for (a = newaddrs; a != NULL; a = a->ai_next) {
+ struct in6_addr in6;
+ struct in6_addr *addrp;
+
+ if (a->ai_family != AF_INET && a->ai_family != AF_INET6)
+ continue;
+
+ if (a->ai_family == AF_INET) {
+ struct sockaddr_in *v4;
+ v4 = (struct sockaddr_in *)a->ai_addr;
+ addrp = &in6;
+ IN6_INADDR_TO_V4MAPPED(&v4->sin_addr, addrp);
+ } else {
+ struct sockaddr_in6 *v6;
+ v6 = (struct sockaddr_in6 *)a->ai_addr;
+ addrp = &v6->sin6_addr;
+ }
+
+ mutex_enter(&srp->sr_lock);
+ for (scp = list_head(&srp->sr_conns); scp != NULL;
+ scp = list_next(&srp->sr_conns, scp)) {
+ mutex_enter(&scp->sc_lock);
+ if (bcmp(addrp, &scp->sc_addr,
+ sizeof (struct in6_addr)) == 0) {
+ scp->sc_gen = ngen;
+ mutex_exit(&scp->sc_lock);
+ break;
+ }
+ mutex_exit(&scp->sc_lock);
+ }
+
+ /*
+ * We need to be careful in the assumptions that we make here,
+ * as there's a good chance that svp_conn_create will
+ * drop the svp_remote_t`sr_lock to kick off its effective event
+ * loop.
+ */
+ if (scp == NULL)
+ (void) svp_conn_create(srp, addrp);
+ mutex_exit(&srp->sr_lock);
+ }
+
+ /*
+ * Now it's time to clean things up. We do not actively clean up the
+ * current connections that we have, instead allowing them to stay
+ * around assuming that they're still useful. Instead, we go through and
+ * purge the degraded list for anything that's from an older generation.
+ */
+ mutex_enter(&srp->sr_lock);
+ for (scp = list_head(&srp->sr_conns); scp != NULL;
+ scp = list_next(&srp->sr_conns, scp)) {
+ boolean_t fall = B_FALSE;
+ mutex_enter(&scp->sc_lock);
+ if (scp->sc_gen < srp->sr_gen)
+ fall = B_TRUE;
+ mutex_exit(&scp->sc_lock);
+ if (fall == B_TRUE)
+ svp_conn_fallout(scp);
+ }
+ mutex_exit(&srp->sr_lock);
+}
+
+/*
+ * This connection is in the process of being reset, we need to reassign all of
+ * its queries to other places or mark them as fatal. Note that the first
+ * connection was the one in flight when this failed. We always mark it as
+ * failed to avoid trying to reset its state.
+ */
+void
+svp_remote_reassign(svp_remote_t *srp, svp_conn_t *scp)
+{
+ boolean_t first = B_TRUE;
+ assert(MUTEX_HELD(&srp->sr_lock));
+ assert(MUTEX_HELD(&srp->sr_lock));
+ svp_query_t *sqp;
+
+ /*
+ * As we try to reassigning all of its queries, remove it from the list.
+ */
+ list_remove(&srp->sr_conns, scp);
+
+ while ((sqp = list_remove_head(&scp->sc_queries)) != NULL) {
+
+ if (first == B_TRUE) {
+ sqp->sq_status = SVP_S_FATAL;
+ sqp->sq_func(sqp, sqp->sq_arg);
+ continue;
+ }
+
+ sqp->sq_acttime = -1;
+
+ /*
+ * We may want to maintain a queue of these for some time rather
+ * than just failing them all.
+ */
+ if (svp_remote_conn_queue(srp, sqp) == B_FALSE) {
+ sqp->sq_status = SVP_S_FATAL;
+ sqp->sq_func(sqp, sqp->sq_arg);
+ }
+ }
+
+ /*
+ * Now that we're done, go ahead and re-insert.
+ */
+ list_insert_tail(&srp->sr_conns, scp);
+}
+
+void
+svp_remote_degrade(svp_remote_t *srp, svp_degrade_state_t flag)
+{
+ int sf, nf;
+ char buf[256];
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+
+ if (flag == SVP_RD_ALL || flag == 0)
+ libvarpd_panic("invalid flag passed to degrade");
+
+ if ((flag & srp->sr_degrade) != 0) {
+ return;
+ }
+
+ sf = ffs(srp->sr_degrade);
+ nf = ffs(flag);
+ srp->sr_degrade |= flag;
+ if (sf == 0 || sf > nf) {
+ svp_t *svp;
+ svp_remote_mkfmamsg(srp, flag, buf, sizeof (buf));
+
+ for (svp = avl_first(&srp->sr_tree); svp != NULL;
+ svp = AVL_NEXT(&srp->sr_tree, svp)) {
+ libvarpd_fma_degrade(svp->svp_hdl, buf);
+ }
+ }
+}
+
+void
+svp_remote_restore(svp_remote_t *srp, svp_degrade_state_t flag)
+{
+ int sf, nf;
+
+ assert(MUTEX_HELD(&srp->sr_lock));
+ sf = ffs(srp->sr_degrade);
+ if ((srp->sr_degrade & flag) != flag)
+ return;
+ srp->sr_degrade &= ~flag;
+ nf = ffs(srp->sr_degrade);
+
+ /*
+ * If we're now empty, restore the device. If we still are degraded, but
+ * we now have a higher base than we used to, change the message.
+ */
+ if (srp->sr_degrade == 0) {
+ svp_t *svp;
+ for (svp = avl_first(&srp->sr_tree); svp != NULL;
+ svp = AVL_NEXT(&srp->sr_tree, svp)) {
+ libvarpd_fma_restore(svp->svp_hdl);
+ }
+ } else if (nf != sf) {
+ svp_t *svp;
+ char buf[256];
+
+ svp_remote_mkfmamsg(srp, 1U << (nf - 1), buf, sizeof (buf));
+ for (svp = avl_first(&srp->sr_tree); svp != NULL;
+ svp = AVL_NEXT(&srp->sr_tree, svp)) {
+ libvarpd_fma_degrade(svp->svp_hdl, buf);
+ }
+ }
+}
+
+void
+svp_remote_shootdown_vl3_cb(svp_query_t *sqp, void *arg)
+{
+ svp_shoot_vl3_t *squery = arg;
+ svp_log_vl3_t *svl3 = squery->ssv_vl3;
+ svp_sdlog_t *sdl = squery->ssv_log;
+
+ if (sqp->sq_status == SVP_S_OK) {
+ svp_t *svp, lookup;
+
+ svp_remote_t *srp = sdl->sdl_remote;
+ svp_vl3_ack_t *vl3a = (svp_vl3_ack_t *)sqp->sq_wdata;
+
+ lookup.svp_vid = ntohl(svl3->svl3_vnetid);
+ mutex_enter(&srp->sr_lock);
+ if ((svp = avl_find(&srp->sr_tree, &lookup, NULL)) != NULL) {
+ svp->svp_cb.scb_vl3_inject(svp, ntohs(svl3->svl3_vlan),
+ (struct in6_addr *)svl3->svl3_ip, vl3a->sl3a_mac,
+ NULL);
+ }
+ mutex_exit(&srp->sr_lock);
+
+ }
+
+ svp_shootdown_vl3_cb(sqp->sq_status, svl3, sdl);
+
+ umem_free(squery, sizeof (svp_shoot_vl3_t));
+}
+
+void
+svp_remote_shootdown_vl3(svp_remote_t *srp, svp_log_vl3_t *svl3,
+ svp_sdlog_t *sdl)
+{
+ svp_shoot_vl3_t *squery;
+
+ squery = umem_zalloc(sizeof (svp_shoot_vl3_t), UMEM_DEFAULT);
+ if (squery == NULL) {
+ svp_shootdown_vl3_cb(SVP_S_FATAL, svl3, sdl);
+ return;
+ }
+
+ squery->ssv_vl3 = svl3;
+ squery->ssv_log = sdl;
+ squery->ssv_sock.sin6_family = AF_INET6;
+ bcopy(svl3->svl3_ip, &squery->ssv_sock.sin6_addr,
+ sizeof (svl3->svl3_ip));
+ svp_remote_vl3_logreq(srp, &squery->ssv_query, ntohl(svl3->svl3_vnetid),
+ (struct sockaddr *)&squery->ssv_sock, svp_remote_shootdown_vl3_cb,
+ squery);
+}
+
+void
+svp_remote_shootdown_vl2(svp_remote_t *srp, svp_log_vl2_t *svl2)
+{
+ svp_t *svp, lookup;
+
+ lookup.svp_vid = ntohl(svl2->svl2_vnetid);
+ mutex_enter(&srp->sr_lock);
+ if ((svp = avl_find(&srp->sr_tree, &lookup, NULL)) != NULL) {
+ svp->svp_cb.scb_vl2_invalidate(svp, svl2->svl2_mac);
+ }
+ mutex_exit(&srp->sr_lock);
+}
+
+int
+svp_remote_init(void)
+{
+ svp_idspace = id_space_create("svp_req_ids", 1, INT32_MAX);
+ if (svp_idspace == NULL)
+ return (errno);
+ avl_create(&svp_remote_tree, svp_remote_comparator,
+ sizeof (svp_remote_t), offsetof(svp_remote_t, sr_gnode));
+ svp_dns_timer.st_func = svp_remote_dns_timer;
+ svp_dns_timer.st_arg = NULL;
+ svp_dns_timer.st_oneshot = B_FALSE;
+ svp_dns_timer.st_value = svp_dns_timer_rate;
+ svp_timer_add(&svp_dns_timer);
+ return (0);
+}
+
+void
+svp_remote_fini(void)
+{
+ svp_timer_remove(&svp_dns_timer);
+ avl_destroy(&svp_remote_tree);
+ if (svp_idspace == NULL)
+ id_space_destroy(svp_idspace);
+}
diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_shootdown.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_shootdown.c
new file mode 100644
index 0000000000..76afb2519f
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_shootdown.c
@@ -0,0 +1,474 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * Shootdown processing logic.
+ *
+ * For more information, see the big theory statement in
+ * lib/varpd/svp/common/libvarpd_svp.c.
+ */
+
+#include <umem.h>
+#include <sys/uuid.h>
+#include <assert.h>
+#include <strings.h>
+#include <errno.h>
+#include <sys/debug.h>
+
+#include <libvarpd_provider.h>
+#include <libvarpd_svp.h>
+
+/*
+ * When we've determined that there's nothing left for us to do, then we go
+ * ahead and wait svp_shootdown_base seconds + up to an additional
+ * svp_shootdown_base seconds before asking again. However, if there is actually
+ * some work going on, just use the svp_shootdown_cont time.
+ */
+static int svp_shootdown_base = 5;
+static int svp_shootdown_cont = 1;
+
+/*
+ * These are sizes for our logack and logrm buffers. The sizing of the shootdown
+ * buffere would give us approximately 18 or so VL3 entries and 32 VL2 entries
+ * or some combination thereof. While it's a bit of overkill, we just use the
+ * same sized buffer for the list of uuids that we pass to remove log entries
+ * that we've acted upon.
+ */
+static int svp_shootdown_buf = 1024;
+
+static void
+svp_shootdown_schedule(svp_sdlog_t *sdl, boolean_t cont)
+{
+ assert(MUTEX_HELD(&sdl->sdl_lock));
+
+ if (cont == B_TRUE) {
+ sdl->sdl_timer.st_value = svp_shootdown_cont;
+ } else {
+ sdl->sdl_timer.st_value = svp_shootdown_base +
+ arc4random_uniform(svp_shootdown_base + 1);
+ }
+ svp_timer_add(&sdl->sdl_timer);
+}
+
+void
+svp_shootdown_lrm_cb(svp_remote_t *srp, svp_status_t status)
+{
+ svp_sdlog_t *sdl = &srp->sr_shoot;
+
+ mutex_enter(&sdl->sdl_lock);
+ sdl->sdl_flags &= ~SVP_SD_RUNNING;
+ svp_shootdown_schedule(sdl, B_TRUE);
+ mutex_exit(&sdl->sdl_lock);
+
+ if (status != SVP_S_OK) {
+ (void) bunyan_warn(svp_bunyan, "SVP_R_LOG_RM failed",
+ BUNYAN_T_STRING, "remote_host", srp->sr_hostname,
+ BUNYAN_T_INT32, "remote_port", srp->sr_rport,
+ BUNYAN_T_INT32, "status", status,
+ BUNYAN_T_END);
+ }
+}
+
+static void
+svp_shootdown_ref(svp_sdlog_t *sdl)
+{
+ mutex_enter(&sdl->sdl_lock);
+ sdl->sdl_ref++;
+ mutex_exit(&sdl->sdl_lock);
+}
+
+static void
+svp_shootdown_rele(svp_sdlog_t *sdl)
+{
+ svp_lrm_req_t *svrr = sdl->sdl_logrm;
+ boolean_t next;
+
+ mutex_enter(&sdl->sdl_lock);
+ VERIFY(sdl->sdl_ref > 0);
+ sdl->sdl_ref--;
+ if (sdl->sdl_ref > 0) {
+ mutex_exit(&sdl->sdl_lock);
+ return;
+ }
+
+ /*
+ * At this point we know that we hold the last reference, therefore it's
+ * safe for us to go ahead and clean up and move on and attempt to
+ * deliver the reply. We always deliver the reply by going through the
+ * timer. This can be rather important as the final reference may be
+ * coming through a failed query and it's not always safe for us to
+ * callback into the remote routines from this context.
+ *
+ * We should only do this if we have a non-zero number of entries to
+ * take down.
+ */
+ sdl->sdl_flags &= ~SVP_SD_RUNNING;
+ if (svrr->svrr_count > 0) {
+ sdl->sdl_flags |= SVP_SD_DORM;
+ next = B_TRUE;
+ } else {
+ next = B_FALSE;
+ }
+ svp_shootdown_schedule(sdl, next);
+ mutex_exit(&sdl->sdl_lock);
+}
+
+/*
+ * This is a callback used to indicate that the VL3 lookup has completed and an
+ * entry, if any, has been injected. If the command succeeded, eg. we got that
+ * the status was OK or that it was not found, then we will add it to he list to
+ * shoot down. Otherwise, there's nothing else for us to really do here.
+ */
+void
+svp_shootdown_vl3_cb(svp_status_t status, svp_log_vl3_t *vl3, svp_sdlog_t *sdl)
+{
+ svp_lrm_req_t *svrr = sdl->sdl_logrm;
+
+ mutex_enter(&sdl->sdl_lock);
+ if (status == SVP_S_OK || status == SVP_S_NOTFOUND) {
+ bcopy(vl3->svl3_id, &svrr->svrr_ids[svrr->svrr_count * 16],
+ UUID_LEN);
+ svrr->svrr_count++;
+ }
+ mutex_exit(&sdl->sdl_lock);
+
+ svp_shootdown_rele(sdl);
+}
+
+static int
+svp_shootdown_logr_shoot(void *data, svp_log_type_t type, void *arg)
+{
+ svp_sdlog_t *sdl = arg;
+ svp_remote_t *srp = sdl->sdl_remote;
+ svp_lrm_req_t *svrr = sdl->sdl_logrm;
+
+ if (type != SVP_LOG_VL2 && type != SVP_LOG_VL3)
+ libvarpd_panic("encountered unknown type: %d\n", type);
+
+ if (type == SVP_LOG_VL2) {
+ svp_log_vl2_t *svl2 = data;
+ svp_remote_shootdown_vl2(srp, svl2);
+ mutex_enter(&sdl->sdl_lock);
+ bcopy(svl2->svl2_id, &svrr->svrr_ids[svrr->svrr_count * 16],
+ UUID_LEN);
+ svrr->svrr_count++;
+ mutex_exit(&sdl->sdl_lock);
+ } else {
+ svp_log_vl3_t *svl3 = data;
+
+ /* Take a hold for the duration of this request */
+ svp_shootdown_ref(sdl);
+ svp_remote_shootdown_vl3(srp, svl3, sdl);
+ }
+
+ return (0);
+}
+
+static int
+svp_shootdown_logr_count(void *data, svp_log_type_t type, void *arg)
+{
+ uint_t *u = arg;
+ *u = *u + 1;
+ return (0);
+}
+
+
+static int
+svp_shootdown_logr_iter(svp_remote_t *srp, void *buf, size_t len,
+ int (*cb)(void *, svp_log_type_t, void *), void *arg)
+{
+ int ret;
+ off_t cboff = 0;
+ uint32_t *typep, type;
+ svp_log_vl2_t *svl2;
+ svp_log_vl3_t *svl3;
+
+ /* Adjust for initial status word */
+ assert(len >= sizeof (uint32_t));
+ len -= sizeof (uint32_t);
+ cboff += sizeof (uint32_t);
+
+ while (len > 0) {
+ size_t opsz;
+
+ if (len < sizeof (uint32_t)) {
+ (void) bunyan_warn(svp_bunyan,
+ "failed to get initial shootdown tag",
+ BUNYAN_T_STRING, "remote_host", srp->sr_hostname,
+ BUNYAN_T_INT32, "remote_port", srp->sr_rport,
+ BUNYAN_T_INT32, "response_size", cboff + len,
+ BUNYAN_T_INT32, "response_offset", cboff,
+ BUNYAN_T_END);
+ return (-1);
+ }
+
+ typep = buf + cboff;
+ type = ntohl(*typep);
+ if (type == SVP_LOG_VL2) {
+ opsz = sizeof (svp_log_vl2_t);
+ if (len < opsz) {
+ (void) bunyan_warn(svp_bunyan,
+ "not enough data for svp_log_vl2_t",
+ BUNYAN_T_STRING, "remote_host",
+ srp->sr_hostname,
+ BUNYAN_T_INT32, "remote_port",
+ srp->sr_rport,
+ BUNYAN_T_INT32, "response_size",
+ cboff + len,
+ BUNYAN_T_INT32, "response_offset", cboff,
+ BUNYAN_T_END);
+ return (-1);
+ }
+ svl2 = (void *)typep;
+ if ((ret = cb(svl2, type, arg)) != 0)
+ return (ret);
+ } else if (type == SVP_LOG_VL3) {
+
+ opsz = sizeof (svp_log_vl3_t);
+ if (len < opsz) {
+ (void) bunyan_warn(svp_bunyan,
+ "not enough data for svp_log_vl3_t",
+ BUNYAN_T_STRING, "remote_host",
+ srp->sr_hostname,
+ BUNYAN_T_INT32, "remote_port",
+ srp->sr_rport,
+ BUNYAN_T_INT32, "response_size",
+ cboff + len,
+ BUNYAN_T_INT32, "response_offset", cboff,
+ BUNYAN_T_END);
+ return (-1);
+ }
+ svl3 = (void *)typep;
+ if ((ret = cb(svl3, type, arg)) != 0)
+ return (ret);
+ } else {
+ (void) bunyan_warn(svp_bunyan,
+ "unknown log structure type",
+ BUNYAN_T_STRING, "remote_host",
+ srp->sr_hostname,
+ BUNYAN_T_INT32, "remote_port", srp->sr_rport,
+ BUNYAN_T_INT32, "response_size", cboff + len,
+ BUNYAN_T_INT32, "response_offset", cboff,
+ BUNYAN_T_INT32, "structure_type", type,
+ BUNYAN_T_END);
+ return (-1);
+ }
+ len -= opsz;
+ cboff += opsz;
+ }
+
+ return (0);
+}
+
+void
+svp_shootdown_logr_cb(svp_remote_t *srp, svp_status_t status, void *cbdata,
+ size_t cbsize)
+{
+ uint_t count;
+ svp_sdlog_t *sdl = &srp->sr_shoot;
+
+ if (status != SVP_S_OK) {
+ (void) bunyan_warn(svp_bunyan,
+ "log request not OK",
+ BUNYAN_T_STRING, "remote_host", srp->sr_hostname,
+ BUNYAN_T_INT32, "remote_port", srp->sr_rport,
+ BUNYAN_T_INT32, "response_size", cbsize,
+ BUNYAN_T_INT32, "status", status,
+ BUNYAN_T_END);
+ mutex_enter(&sdl->sdl_lock);
+ sdl->sdl_flags &= ~SVP_SD_RUNNING;
+ svp_shootdown_schedule(sdl, B_FALSE);
+ mutex_exit(&sdl->sdl_lock);
+ return;
+ }
+
+ /*
+ * First go ahead and count the number of entries. This effectively
+ * allows us to validate that all the data is valid, if this fails, then
+ * we fail the request.
+ */
+ count = 0;
+ if ((svp_shootdown_logr_iter(srp, cbdata, cbsize,
+ svp_shootdown_logr_count, &count)) != 0) {
+ mutex_enter(&sdl->sdl_lock);
+ sdl->sdl_flags &= ~SVP_SD_RUNNING;
+ svp_shootdown_schedule(sdl, B_FALSE);
+ mutex_exit(&sdl->sdl_lock);
+ return;
+ }
+
+ /*
+ * If we have no entries, then we're also done.
+ */
+ if (count == 0) {
+ mutex_enter(&sdl->sdl_lock);
+ sdl->sdl_flags &= ~SVP_SD_RUNNING;
+ svp_shootdown_schedule(sdl, B_FALSE);
+ mutex_exit(&sdl->sdl_lock);
+ return;
+ }
+
+ /*
+ * We have work to do. Because we may have asynchronous VL3 tasks, we're
+ * going to first grab a reference before we do the iteration. Then, for
+ * each asynchronous VL3 request we make, that'll also grab a hold. Once
+ * we're done with the iteration, we'll drop our hold. If that's the
+ * last one, it'll move on accordingly.
+ */
+ svp_shootdown_ref(sdl);
+ bzero(sdl->sdl_logrm, svp_shootdown_buf);
+
+ /*
+ * If this fails, we're going to determine what to do next based on the
+ * number of entries that were entered into the log removal. At this
+ * point success or failure don't really look different, all it changes
+ * is how many entries we have to remove.
+ */
+ (void) svp_shootdown_logr_iter(srp, cbdata, cbsize,
+ svp_shootdown_logr_shoot, sdl);
+
+ /*
+ * Now that we're done with our work, release the hold. If we don't have
+ * any vl3 tasks outstanding, this'll trigger the next phase of the log
+ * removals.
+ */
+ svp_shootdown_rele(sdl);
+}
+
+static void
+svp_shootdown_timer(void *arg)
+{
+ svp_sdlog_t *sdl = arg;
+ svp_remote_t *srp = sdl->sdl_remote;
+ boolean_t init = B_TRUE;
+
+ mutex_enter(&sdl->sdl_lock);
+
+ /*
+ * If we've been asked to quiesce, we're done.
+ */
+ if ((sdl->sdl_flags & SVP_SD_QUIESCE) != 0) {
+ mutex_exit(&sdl->sdl_lock);
+ return;
+ }
+
+ /*
+ * We shouldn't be able to have ourselves currently be running and reach
+ * here. If that's the case, we should immediately panic.
+ */
+ if ((sdl->sdl_flags & SVP_SD_RUNNING) != 0) {
+ libvarpd_panic("remote %p shootdown timer fired while still "
+ "running", srp);
+ }
+
+ if ((sdl->sdl_flags & SVP_SD_DORM) != 0) {
+ sdl->sdl_flags &= ~SVP_SD_DORM;
+ init = B_FALSE;
+ }
+
+ sdl->sdl_flags |= SVP_SD_RUNNING;
+ mutex_exit(&sdl->sdl_lock);
+
+ if (init == B_FALSE) {
+ svp_lrm_req_t *svrr = sdl->sdl_logrm;
+
+ bzero(&sdl->sdl_query, sizeof (svp_query_t));
+ svp_remote_lrm_request(sdl->sdl_remote, &sdl->sdl_query, svrr,
+ sizeof (*svrr) + 16 * svrr->svrr_count);
+ } else {
+ bzero(&sdl->sdl_query, sizeof (svp_query_t));
+ svp_remote_log_request(srp, &sdl->sdl_query, sdl->sdl_logack,
+ svp_shootdown_buf);
+ }
+}
+
+void
+svp_shootdown_fini(svp_remote_t *srp)
+{
+ svp_sdlog_t *sdl = &srp->sr_shoot;
+
+ mutex_enter(&sdl->sdl_lock);
+ sdl->sdl_flags |= SVP_SD_QUIESCE;
+ mutex_exit(&sdl->sdl_lock);
+
+ svp_timer_remove(&sdl->sdl_timer);
+
+ mutex_enter(&sdl->sdl_lock);
+
+ /*
+ * Normally svp_timer_remove would be enough. However, the query could
+ * have been put out again outside of the svp_timer interface. Therefore
+ * we still need to check for SVP_SD_RUNNING.
+ */
+ while (sdl->sdl_flags & SVP_SD_RUNNING)
+ (void) cond_wait(&sdl->sdl_cond, &sdl->sdl_lock);
+ mutex_exit(&sdl->sdl_lock);
+
+ umem_free(sdl->sdl_logack, svp_shootdown_buf);
+ umem_free(sdl->sdl_logrm, svp_shootdown_buf);
+ sdl->sdl_logack = NULL;
+ sdl->sdl_logrm = NULL;
+ (void) cond_destroy(&sdl->sdl_cond);
+ (void) mutex_destroy(&sdl->sdl_lock);
+}
+
+void
+svp_shootdown_start(svp_remote_t *srp)
+{
+ svp_sdlog_t *sdl = &srp->sr_shoot;
+
+ mutex_enter(&sdl->sdl_lock);
+ svp_shootdown_schedule(sdl, B_FALSE);
+ mutex_exit(&sdl->sdl_lock);
+}
+
+int
+svp_shootdown_init(svp_remote_t *srp)
+{
+ int ret;
+ svp_sdlog_t *sdl = &srp->sr_shoot;
+ if ((ret = mutex_init(&sdl->sdl_lock, USYNC_THREAD | LOCK_ERRORCHECK,
+ NULL)) != 0)
+ return (ret);
+
+ if ((ret = cond_init(&sdl->sdl_cond, USYNC_THREAD, NULL)) != 0) {
+ (void) mutex_destroy(&sdl->sdl_lock);
+ return (ret);
+ }
+
+ if ((sdl->sdl_logack = umem_alloc(svp_shootdown_buf, UMEM_DEFAULT)) ==
+ NULL) {
+ ret = errno;
+ (void) cond_destroy(&sdl->sdl_cond);
+ (void) mutex_destroy(&sdl->sdl_lock);
+ return (ret);
+ }
+
+ if ((sdl->sdl_logrm = umem_alloc(svp_shootdown_buf, UMEM_DEFAULT)) ==
+ NULL) {
+ ret = errno;
+ umem_free(sdl->sdl_logack, svp_shootdown_buf);
+ (void) cond_destroy(&sdl->sdl_cond);
+ (void) mutex_destroy(&sdl->sdl_lock);
+ return (ret);
+ }
+
+ sdl->sdl_remote = srp;
+ sdl->sdl_timer.st_oneshot = B_TRUE;
+ sdl->sdl_timer.st_func = svp_shootdown_timer;
+ sdl->sdl_timer.st_arg = sdl;
+
+ return (0);
+}
diff --git a/usr/src/lib/varpd/svp/common/libvarpd_svp_timer.c b/usr/src/lib/varpd/svp/common/libvarpd_svp_timer.c
new file mode 100644
index 0000000000..10b02748f3
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/libvarpd_svp_timer.c
@@ -0,0 +1,150 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015, Joyent, Inc.
+ */
+
+#include <stddef.h>
+#include <libvarpd_svp.h>
+
+/*
+ * svp timer backend
+ *
+ * This implements all of the logic of maintaining a timer for the svp backend.
+ * We have a timer that fires at a one second tick. We maintain all of our
+ * events in avl tree, sorted by the tick that they need to be processed at.
+ *
+ * For more information, see the big theory statement in
+ * lib/varpd/svp/common/libvarpd_svp.c.
+ */
+
+int svp_tickrate = 1;
+static svp_event_t svp_timer_event;
+static mutex_t svp_timer_lock = ERRORCHECKMUTEX;
+static cond_t svp_timer_cv = DEFAULTCV;
+static avl_tree_t svp_timer_tree;
+static uint64_t svp_timer_nticks;
+
+static int
+svp_timer_comparator(const void *l, const void *r)
+{
+ const svp_timer_t *lt, *rt;
+
+ lt = l;
+ rt = r;
+
+ if (lt->st_expire > rt->st_expire)
+ return (1);
+ else if (lt->st_expire < rt->st_expire)
+ return (-1);
+
+ /*
+ * Multiple timers can have the same delivery time, so sort within that
+ * by the address of the timer itself.
+ */
+ if ((uintptr_t)lt > (uintptr_t)rt)
+ return (1);
+ else if ((uintptr_t)lt < (uintptr_t)rt)
+ return (-1);
+
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+svp_timer_tick(port_event_t *pe, void *arg)
+{
+ mutex_enter(&svp_timer_lock);
+ svp_timer_nticks++;
+
+ for (;;) {
+ svp_timer_t *t;
+
+ t = avl_first(&svp_timer_tree);
+ if (t == NULL || t->st_expire > svp_timer_nticks)
+ break;
+
+ avl_remove(&svp_timer_tree, t);
+
+ /*
+ * We drop this while performing an operation so that way state
+ * can advance in the face of a long-running callback.
+ */
+ t->st_delivering = B_TRUE;
+ mutex_exit(&svp_timer_lock);
+ t->st_func(t->st_arg);
+ mutex_enter(&svp_timer_lock);
+ t->st_delivering = B_FALSE;
+ (void) cond_broadcast(&svp_timer_cv);
+ if (t->st_oneshot == B_FALSE) {
+ t->st_expire += t->st_value;
+ avl_add(&svp_timer_tree, t);
+ }
+ }
+ mutex_exit(&svp_timer_lock);
+}
+
+void
+svp_timer_add(svp_timer_t *stp)
+{
+ if (stp->st_value == 0)
+ libvarpd_panic("tried to add svp timer with zero value");
+
+ mutex_enter(&svp_timer_lock);
+ stp->st_delivering = B_FALSE;
+ stp->st_expire = svp_timer_nticks + stp->st_value;
+ avl_add(&svp_timer_tree, stp);
+ mutex_exit(&svp_timer_lock);
+}
+
+void
+svp_timer_remove(svp_timer_t *stp)
+{
+ mutex_enter(&svp_timer_lock);
+
+ /*
+ * If the event in question is not currently being delivered, then we
+ * can stop it before it next fires. If it is currently being delivered,
+ * we need to wait for that to finish. Because we hold the timer lock,
+ * we know that it cannot be rearmed. Therefore, we make sure the one
+ * shot is set to zero, and wait until it's no longer set to delivering.
+ */
+ if (stp->st_delivering == B_FALSE) {
+ avl_remove(&svp_timer_tree, stp);
+ mutex_exit(&svp_timer_lock);
+ return;
+ }
+
+ stp->st_oneshot = B_TRUE;
+ while (stp->st_delivering == B_TRUE)
+ (void) cond_wait(&svp_timer_cv, &svp_timer_lock);
+
+ mutex_exit(&svp_timer_lock);
+}
+
+int
+svp_timer_init(void)
+{
+ int ret;
+
+ svp_timer_event.se_func = svp_timer_tick;
+ svp_timer_event.se_arg = NULL;
+
+ avl_create(&svp_timer_tree, svp_timer_comparator, sizeof (svp_timer_t),
+ offsetof(svp_timer_t, st_link));
+
+ if ((ret = svp_event_timer_init(&svp_timer_event)) != 0) {
+ avl_destroy(&svp_timer_tree);
+ }
+
+ return (ret);
+}
diff --git a/usr/src/lib/varpd/svp/common/llib-lvarpd_svp b/usr/src/lib/varpd/svp/common/llib-lvarpd_svp
new file mode 100644
index 0000000000..03c34f4fcb
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/llib-lvarpd_svp
@@ -0,0 +1,18 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/* LINTLIBRARY */
+/* PROTOLIB1 */
+
diff --git a/usr/src/lib/varpd/svp/common/mapfile-vers b/usr/src/lib/varpd/svp/common/mapfile-vers
new file mode 100644
index 0000000000..6b7c5a5067
--- /dev/null
+++ b/usr/src/lib/varpd/svp/common/mapfile-vers
@@ -0,0 +1,35 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION SUNWprivate {
+ local:
+ *;
+};
diff --git a/usr/src/lib/varpd/svp/i386/Makefile b/usr/src/lib/varpd/svp/i386/Makefile
new file mode 100644
index 0000000000..f2b4f63da5
--- /dev/null
+++ b/usr/src/lib/varpd/svp/i386/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/varpd/svp/sparc/Makefile b/usr/src/lib/varpd/svp/sparc/Makefile
new file mode 100644
index 0000000000..f2b4f63da5
--- /dev/null
+++ b/usr/src/lib/varpd/svp/sparc/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+include ../Makefile.com
+
+install: all $(ROOTLIBS) $(ROOTLINKS) $(ROOTLINT)
diff --git a/usr/src/lib/varpd/svp/sparcv9/Makefile b/usr/src/lib/varpd/svp/sparcv9/Makefile
new file mode 100644
index 0000000000..d552642882
--- /dev/null
+++ b/usr/src/lib/varpd/svp/sparcv9/Makefile
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+include ../Makefile.com
+include ../../../Makefile.lib.64
+
+install: all $(ROOTLIBS64) $(ROOTLINKS64) $(ROOTLINT64)
diff --git a/usr/src/lib/xml/os_dtd.c b/usr/src/lib/xml/os_dtd.c
new file mode 100644
index 0000000000..579e99ba3c
--- /dev/null
+++ b/usr/src/lib/xml/os_dtd.c
@@ -0,0 +1,238 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * Utility functions for working with XML documents that are validated against
+ * Document Type Definitions (DTD) shipped in the operating system.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <strings.h>
+#include <errno.h>
+#include <zone.h>
+
+#include <libxml/parser.h>
+#include <libxml/xmlmemory.h>
+
+#include "os_dtd.h"
+
+struct os_dtd_path {
+ os_dtd_id_t odp_id;
+ const char *odp_name;
+ const char *odp_public_ident;
+ const char *odp_path;
+};
+
+static os_dtd_path_t os_dtd_paths[] = {
+ /*
+ * DTDs for Zones infrastructure:
+ */
+ { OS_DTD_ZONES_BRAND, "brand",
+ "-//Sun Microsystems Inc//DTD Brands//EN",
+ "/usr/share/lib/xml/dtd/brand.dtd.1" },
+ { OS_DTD_ZONES_ZONE, "zone",
+ "-//Sun Microsystems Inc//DTD Zones//EN",
+ "/usr/share/lib/xml/dtd/zonecfg.dtd.1" },
+ { OS_DTD_ZONES_PLATFORM, "platform",
+ "-//Sun Microsystems Inc//Zones Platform//EN",
+ "/usr/share/lib/xml/dtd/zone_platform.dtd.1" },
+
+ /*
+ * DTDs for smf(5):
+ */
+ { OS_DTD_SMF_SERVICE_BUNDLE, "service_bundle",
+ NULL,
+ "/usr/share/lib/xml/dtd/service_bundle.dtd.1" },
+
+ { OS_DTD_UNKNOWN, NULL, NULL, NULL }
+};
+
+/*
+ * Check this document to see if it references the public identifier of a
+ * well-known DTD that we ship with the operating system. If there is no DTD,
+ * or the public identifier is unknown to us, return OS_DTD_UNKNOWN.
+ */
+os_dtd_id_t
+os_dtd_identify(xmlDocPtr doc)
+{
+ xmlDtdPtr dp;
+ int i;
+
+ if ((dp = xmlGetIntSubset(doc)) == NULL) {
+ /*
+ * This document does not have an internal subset pointing
+ * to a DTD.
+ */
+ errno = EIO;
+ return (OS_DTD_UNKNOWN);
+ }
+
+ /*
+ * The use of a symbolic name via the public identifier is preferred.
+ * Check to see if the document refers to a public identifier for any
+ * well-known DTD:
+ */
+ for (i = 0; os_dtd_paths[i].odp_id != OS_DTD_UNKNOWN; i++) {
+ os_dtd_path_t *odp = &os_dtd_paths[i];
+ const xmlChar *pubid = (const xmlChar *)odp->odp_public_ident;
+
+ if (dp->ExternalID == NULL || odp->odp_public_ident == NULL) {
+ continue;
+ }
+
+ if (xmlStrEqual(pubid, dp->ExternalID)) {
+ return (odp->odp_id);
+ }
+ }
+
+ /*
+ * If a public identifier is not present, or does not match any known
+ * DTD, fall back to inspecting the system identifier.
+ */
+ for (i = 0; os_dtd_paths[i].odp_id != OS_DTD_UNKNOWN; i++) {
+ os_dtd_path_t *odp = &os_dtd_paths[i];
+ char uri[sizeof ("file://") + MAXPATHLEN];
+ const xmlChar *path = (const xmlChar *)odp->odp_path;
+
+ if (dp->SystemID == NULL || odp->odp_path == NULL) {
+ continue;
+ }
+
+ /*
+ * The system identifier may be a regular path.
+ */
+ if (xmlStrEqual(path, dp->SystemID)) {
+ return (odp->odp_id);
+ }
+
+ /*
+ * The system identifier may also be a "file://"
+ * URI referring to a path:
+ */
+ (void) snprintf(uri, sizeof (uri), "file://%s", odp->odp_path);
+ if (xmlStrEqual((const xmlChar *)uri, dp->SystemID)) {
+ return (odp->odp_id);
+ }
+ }
+
+ errno = ENOENT;
+ return (OS_DTD_UNKNOWN);
+}
+
+static os_dtd_path_t *
+os_dtd_lookup(os_dtd_id_t id)
+{
+ int i;
+
+ for (i = 0; os_dtd_paths[i].odp_id != OS_DTD_UNKNOWN; i++) {
+ os_dtd_path_t *odp = &os_dtd_paths[i];
+
+ if (odp->odp_id == id) {
+ return (odp);
+ }
+ }
+
+ errno = ENOENT;
+ return (NULL);
+}
+
+/*
+ * If this document references a DTD, remove that reference (the "internal
+ * subset"). Install a new internal subset reference to the well-known
+ * operating system DTD passed by the caller. The URI in this reference will
+ * respect the current native system prefix (e.g. "/native") if there is one,
+ * such as when running in a branded zone.
+ */
+int
+os_dtd_attach(xmlDocPtr doc, os_dtd_id_t id)
+{
+ xmlDtdPtr dp;
+ os_dtd_path_t *odp;
+ const char *zroot = zone_get_nroot();
+ char uri[sizeof ("file://") + MAXPATHLEN];
+
+ if ((odp = os_dtd_lookup(id)) == NULL) {
+ return (-1);
+ }
+
+ if ((dp = xmlGetIntSubset(doc)) != NULL) {
+ /*
+ * This document already has an internal subset. Remove it
+ * before attaching the new one.
+ */
+ xmlUnlinkNode((xmlNodePtr)dp);
+ xmlFreeNode((xmlNodePtr)dp);
+ }
+
+ /*
+ * The "system identifier" of this internal subset must refer to the
+ * path in the filesystem where the DTD file (the external subset) is
+ * stored. If we are running in a branded zone, that file may be at a
+ * different path (e.g. under "/native").
+ */
+ (void) snprintf(uri, sizeof (uri), "file://%s%s", zroot != NULL ?
+ zroot : "", odp->odp_path);
+
+ if (xmlCreateIntSubset(doc, (const xmlChar *)odp->odp_name,
+ (const xmlChar *)odp->odp_public_ident,
+ (const xmlChar *)uri) == NULL) {
+ errno = EIO;
+ return (-1);
+ }
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static void
+os_dtd_print_nothing(void *ctx, const char *msg, ...)
+{
+}
+
+int
+os_dtd_validate(xmlDocPtr doc, boolean_t emit_messages, boolean_t *valid)
+{
+ int ret;
+ xmlValidCtxtPtr cvp;
+ os_dtd_id_t dtdid;
+
+ if ((dtdid = os_dtd_identify(doc)) != OS_DTD_UNKNOWN) {
+ /*
+ * This document refers to a well-known DTD shipped with
+ * the operating system. Ensure that it points to the
+ * correct local path for validation in the current context.
+ */
+ if (os_dtd_attach(doc, dtdid) != 0) {
+ return (-1);
+ }
+ }
+
+ if ((cvp = xmlNewValidCtxt()) == NULL) {
+ return (-1);
+ }
+
+ if (!emit_messages) {
+ cvp->error = os_dtd_print_nothing;
+ cvp->warning = os_dtd_print_nothing;
+ }
+
+ ret = xmlValidateDocument(cvp, doc);
+ xmlFreeValidCtxt(cvp);
+
+ *valid = (ret == 1 ? B_TRUE : B_FALSE);
+ return (0);
+}
diff --git a/usr/src/lib/xml/os_dtd.h b/usr/src/lib/xml/os_dtd.h
new file mode 100644
index 0000000000..c6fe24a293
--- /dev/null
+++ b/usr/src/lib/xml/os_dtd.h
@@ -0,0 +1,49 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _OS_DTD_H
+#define _OS_DTD_H
+
+/*
+ * Utility functions for working with XML documents that are validated against
+ * Document Type Definitions (DTD) shipped in the operating system.
+ */
+
+#include <libxml/parser.h>
+#include <libxml/xmlmemory.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum os_dtd_id {
+ OS_DTD_UNKNOWN = 0,
+ OS_DTD_ZONES_BRAND,
+ OS_DTD_ZONES_ZONE,
+ OS_DTD_ZONES_PLATFORM,
+ OS_DTD_SMF_SERVICE_BUNDLE
+} os_dtd_id_t;
+
+typedef struct os_dtd_path os_dtd_path_t;
+
+extern os_dtd_id_t os_dtd_identify(xmlDocPtr);
+extern int os_dtd_attach(xmlDocPtr, os_dtd_id_t);
+extern int os_dtd_validate(xmlDocPtr, boolean_t, boolean_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _OS_DTD_H */
diff --git a/usr/src/man/Makefile b/usr/src/man/Makefile
index 74d27fb221..f4fc11d776 100644
--- a/usr/src/man/Makefile
+++ b/usr/src/man/Makefile
@@ -84,6 +84,7 @@ SUBDIRS= man1 \
man3tsol \
man3uuid \
man3volmgt \
+ man3vnd \
man3xcurses \
man3xnet \
man4 \
diff --git a/usr/src/man/man1/Makefile b/usr/src/man/man1/Makefile
index e2ec2251aa..552ba5450e 100644
--- a/usr/src/man/man1/Makefile
+++ b/usr/src/man/man1/Makefile
@@ -73,6 +73,7 @@ MANFILES= acctcom.1 \
clear.1 \
cmp.1 \
col.1 \
+ column.1 \
comm.1 \
command.1 \
compress.1 \
@@ -85,6 +86,8 @@ MANFILES= acctcom.1 \
csh.1 \
csplit.1 \
ctags.1 \
+ ctfdiff.1 \
+ ctfdump.1 \
ctrun.1 \
ctstat.1 \
ctwatch.1 \
@@ -218,6 +221,7 @@ MANFILES= acctcom.1 \
m4.1 \
mac.1 \
mach.1 \
+ machid.1 \
madv.so.1.1 \
mail.1 \
mailcompat.1 \
@@ -351,13 +355,6 @@ MANFILES= acctcom.1 \
spell.1 \
split.1 \
srchtxt.1 \
- ssh.sunssh.1 \
- ssh-add.sunssh.1 \
- ssh-agent.sunssh.1 \
- ssh-http-proxy-connect.sunssh.1 \
- ssh-keygen.sunssh.1 \
- ssh-keyscan.sunssh.1 \
- ssh-socks5-proxy-connect.sunssh.1 \
strchg.1 \
strings.1 \
strip.1 \
@@ -475,6 +472,9 @@ MANLINKS= batch.1 \
helpuid.1 \
helpyorn.1 \
hist.1 \
+ i286.1 \
+ i386.1 \
+ i486.1 \
if.1 \
intro.1 \
jsh.1 \
@@ -484,8 +484,10 @@ MANLINKS= batch.1 \
notify.1 \
onintr.1 \
page.1 \
+ pauxv.1 \
pcat.1 \
pcred.1 \
+ penv.1 \
pfcsh.1 \
pfiles.1 \
pfksh.1 \
@@ -521,16 +523,11 @@ MANLINKS= batch.1 \
sh.1 \
snca.1 \
source.1 \
+ sparc.1 \
spellin.1 \
- ssh.1 \
- ssh-add.1 \
- ssh-agent.1 \
- ssh-http-proxy-connect.1 \
- ssh-keygen.1 \
- ssh-keyscan.1 \
- ssh-socks5-proxy-connect.1 \
stop.1 \
strconf.1 \
+ sun.1 \
switch.1 \
ulimit.1 \
unalias.1 \
@@ -660,6 +657,12 @@ unlimit.1 := LINKSRC = limit.1
dumpkeys.1 := LINKSRC = loadkeys.1
+i286.1 := LINKSRC = machid.1
+i386.1 := LINKSRC = machid.1
+i486.1 := LINKSRC = machid.1
+sparc.1 := LINKSRC = machid.1
+sun.1 := LINKSRC = machid.1
+
rmail.1 := LINKSRC = mail.1
page.1 := LINKSRC = more.1
@@ -669,6 +672,9 @@ snca.1 := LINKSRC = nca.1
pcat.1 := LINKSRC = pack.1
unpack.1 := LINKSRC = pack.1
+pauxv.1 := LINKSRC = pargs.1
+penv.1 := LINKSRC = pargs.1
+
pfcsh.1 := LINKSRC = pfexec.1
pfksh.1 := LINKSRC = pfexec.1
pfsh.1 := LINKSRC = pfexec.1
@@ -716,13 +722,6 @@ spellin.1 := LINKSRC = spell.1
scp.1 := LINKSRC = scp.sunssh.1
sftp.1 := LINKSRC = sftp.sunssh.1
-ssh.1 := LINKSRC = ssh.sunssh.1
-ssh-add.1 := LINKSRC = ssh-add.sunssh.1
-ssh-agent.1 := LINKSRC = ssh-agent.sunssh.1
-ssh-http-proxy-connect.1 := LINKSRC = ssh-http-proxy-connect.sunssh.1
-ssh-keygen.1 := LINKSRC = ssh-keygen.sunssh.1
-ssh-keyscan.1 := LINKSRC = ssh-keyscan.sunssh.1
-ssh-socks5-proxy-connect.1 := LINKSRC = ssh-socks5-proxy-connect.sunssh.1
strconf.1 := LINKSRC = strchg.1
diff --git a/usr/src/man/man1/column.1 b/usr/src/man/man1/column.1
new file mode 100644
index 0000000000..a8c23310ba
--- /dev/null
+++ b/usr/src/man/man1/column.1
@@ -0,0 +1,129 @@
+.\" Copyright (c) 1989, 1990, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)column.1 8.1 (Berkeley) 6/6/93
+.\" $FreeBSD$
+.\"
+.\" Portions Copyright (c) 2013 Joyent, Inc. All rights reserved.
+.\"
+.TH COLUMN 1 "Jan 10, 2013"
+.SH NAME
+column \- columnate lists
+.SH SYNOPSIS
+.LP
+.nf
+\fBcolumn\fR [\fB-tx\fR] [\fB-c\fR \fIcolumns\fR] [\fB-s\fR \fIsep\fR] [\fIfile\fR ... ]
+.fi
+
+.SH DESCRIPTION
+.sp
+.LP
+The \fBcolumn\fR
+utility formats its input into multiple columns.
+Rows are filled before columns.
+Input is taken from
+\fIfile\fR
+operands, or, by default, from the standard input.
+Empty lines are ignored.
+.SH OPTIONS
+.sp
+.LP
+The options are as follows:
+.sp
+.ne 2
+.na
+\fB\fB-c\fR \fIcolumns\fR\fR
+.ad
+.RS 17n
+Output is formatted for a display \fIcolumns\fR
+wide.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fB-s\fR \fIsep\fR\fR
+.ad
+.RS 17n
+Specify a set of characters to be used to delimit columns for the
+\fB-t\fR option.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fB-t\fR\fR
+.ad
+.RS 17n
+Determine the number of columns the input contains and create a table.
+Columns are delimited with whitespace, by default, or with the characters
+supplied using the \fBs\fR
+option.
+Useful for pretty-printing displays.
+.RE
+
+.sp
+.ne 2
+.na
+\fB-x\fR
+.ad
+.RS 17n
+Fill columns before filling rows.
+.RE
+
+.SH ENVIRONMENT
+The COLUMNS , LANG , LC_ALL
+and
+LC_CTYPE
+environment variables affect the execution of
+\fBcolumn\fR
+as described in
+\fBenviron\fR(5).
+
+.SH EXIT STATUS
+The \fBcolumn\fR utility exits 0 on success and >0 if an error occurs.
+
+.SH EXAMPLES
+.sp
+.in +2
+.nf
+(printf \&"PERM LINKS OWNER GROUP SIZE MONTH DAY \&"\ \&;\ \&\e
+printf \&"HH:MM/YEAR NAME\en\&"\ \&;\ \&\e
+ls -l \&| sed 1d) \&| column -t
+.fi
+.in -2
+.sp
+
+
+.SH SEE ALSO
+\fBls\fR(1), \fBpaste\fR(1), \fBsort\fR(1)
+
+.SH HISTORY
+The \fBcolumn\fR command appeared in 4.3BSD-Reno.
+
+.SH BUGS
+Input lines are limited to LINE_MAX bytes in length.
diff --git a/usr/src/man/man1/crontab.1 b/usr/src/man/man1/crontab.1
index c93255f406..1008b63a21 100644
--- a/usr/src/man/man1/crontab.1
+++ b/usr/src/man/man1/crontab.1
@@ -1,6 +1,7 @@
'\" te
.\" Copyright 1989 AT&T
.\" Copyright (c) 2009, Sun Microsystems, Inc. All Rights Reserved
+.\" Copyright (c) 2011, Joyent, Inc. All Rights Reserved
.\" Portions Copyright (c) 1992, X/Open Company Limited All Rights Reserved
.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at http://www.opengroup.org/bookstore/.
.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text
@@ -10,7 +11,7 @@
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH CRONTAB 1 "Apr 6, 2009"
+.TH CRONTAB 1 "Sep 23, 2013"
.SH NAME
crontab \- user crontab file
.SH SYNOPSIS
@@ -26,7 +27,7 @@ crontab \- user crontab file
.LP
.nf
-\fB/usr/bin/crontab\fR \fB-l\fR [\fIusername\fR]
+\fB/usr/bin/crontab\fR \fB-l\fR [\fB-g\fR] [\fIusername\fR]
.fi
.LP
@@ -46,7 +47,7 @@ crontab \- user crontab file
.LP
.nf
-\fB/usr/xpg4/bin/crontab\fR \fB-l\fR [\fIusername\fR]
+\fB/usr/xpg4/bin/crontab\fR \fB-l\fR [\fB-g\fR] [\fIusername\fR]
.fi
.LP
@@ -66,7 +67,7 @@ crontab \- user crontab file
.LP
.nf
-\fB/usr/xpg6/bin/crontab\fR \fB-l\fR [\fIusername\fR]
+\fB/usr/xpg6/bin/crontab\fR \fB-l\fR [\fB-g\fR] [\fIusername\fR]
.fi
.LP
@@ -86,6 +87,17 @@ users' crontabs.
.LP
If \fBcrontab\fR is invoked with \fIfilename\fR, this overwrites an existing
\fBcrontab\fR entry for the user that invokes it.
+.sp
+.LP
+Cron supports a merged crontab with entries coming from either the user's
+\fB/var/spool/cron/crontabs\fR file or from the user's
+\fB/etc/cron.d/crontabs\fR file. The entries in the user's
+\fB/var/spool/cron/crontabs\fR file are editable whereas those in
+\fB/etc/cron.d/crontabs\fR are system-defined entries which may not
+be customized by the user. The dual set of crontab entries is only
+of interest to system-defined users such as \fBroot\fR. Except where
+otherwise explicitly indicated, all variants of the \fBcrontab\fR command
+act only on the editable crontab files found in \fB/var/spool/cron/crontabs\fR.
.SS "\fBcrontab\fR Access Control"
.sp
.LP
@@ -343,6 +355,9 @@ file using the \fB-r\fR option.
If \fIusername\fR is specified, the specified user's \fBcrontab\fR file is
edited, rather than the current user's \fBcrontab\fR file. This can only be
done by root or by a user with the \fBsolaris.jobs.admin\fR authorization.
+.sp
+Only the entries in the user's \fB/var/spool/cron/crontabs\fR file are
+editable.
.RE
.sp
@@ -354,6 +369,22 @@ done by root or by a user with the \fBsolaris.jobs.admin\fR authorization.
Lists the \fBcrontab\fR file for the invoking user. Only root or a user with
the \fBsolaris.jobs.admin\fR authorization can specify a username following the
\fB-l\fR option to list the \fBcrontab\fR file of the specified user.
+.sp
+Entries from the user's \fB/var/spool/cron/crontabs\fR file are listed, unless
+the \fB-g\fR option is given, in which case only entries from the user's
+\fB/etc/cron.d/crontabs\fR file are listed.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fB-g\fR\fR
+.ad
+.RS 6n
+In conjunction with the \fB-l\fR option, lists the global \fBcrontab\fR file
+for the invoking or specified user (if authorized) instead of the editable
+\fBcrontab\fR file. This option is not valid unless the \fB-l\fR option is
+also given.
.RE
.sp
@@ -583,6 +614,15 @@ list of denied users
.sp
.ne 2
.na
+\fB\fB/etc/cron.d/crontabs\fR\fR
+.ad
+.RS 28n
+system spool area for \fBcrontab\fR
+.RE
+
+.sp
+.ne 2
+.na
\fB\fB/var/cron/log\fR\fR
.ad
.RS 28n
diff --git a/usr/src/man/man1/ctfdiff.1 b/usr/src/man/man1/ctfdiff.1
new file mode 100644
index 0000000000..1934c64c52
--- /dev/null
+++ b/usr/src/man/man1/ctfdiff.1
@@ -0,0 +1,337 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2015, Joyent, Inc.
+.\"
+.Dd Oct 4, 2014
+.Dt CTFDIFF 1
+.Os
+.Sh NAME
+.Nm ctfdiff
+.Nd compare two CTF containers
+.Sh SYNOPSIS
+.Nm ctfdiff
+.Op Fl afIloqt
+.Op Fl F Ar function
+.Op Fl O Ar object
+.Op Fl p Ar parent1
+.Op Fl p Ar parent2
+.Op Fl T Ar type
+.Ar file1 file2
+.Sh DESCRIPTION
+The
+.Nm
+utility identifies differences between the contents of the
+.Sy CTF
+containers found in
+.Em file1
+and
+.Em file2 .
+.Lp
+.Nm
+can find differences between two
+.Sy CTF
+container's
+.Sy labels ,
+.Sy functions ,
+.Sy objects ,
+and
+.Sy types .
+When no options are specified,
+.Nm
+will only consider
+.Sy functions ,
+.Sy objects,
+and
+.Sy types .
+.Lp
+Two
+.Sy labels
+are considered the same, if they have the same name. Two
+.Sy objects
+are considered the same if they have the same name and the type of the
+object is the same. Two
+.Sy functions
+are considered the same if they have the same, the same return type, the
+same number of arguments, and the types of their arguments are the same.
+.Lp
+Two
+.Sy types
+are considered the same if they have the same, they represent the same
+kind of thing, and the contents of the type are the same. This varies
+for each specific kind, for example, two structs are the same if they
+have the same members whose types, offsets, and names are all the same.
+For more information on the specifics for what we look at for each kind
+of type, and the kinds themselves, see the information we use to encode
+them in
+.Xr ctf 4 . If the option
+.Fl I
+is specified, then the names of basic integer types are ignored. For an
+example of where this makes sense, see
+.Sy Example 4 .
+.Lp
+If the
+.Sy CTF
+container found inside of either
+.Em file1
+or
+.Em file2
+has been uniquified (see
+.Xr ctf 4
+for more on uniquification), then the parent
+.Sy CTF
+container is also required for the diff to complete.
+.Sh OPTIONS
+The following options are supported:
+.Bl -hang -width Ds
+.It Fl a
+.Bd -filled -compact
+Diff
+.Sy labels ,
+.Sy types ,
+.Sy objects ,
+and
+.Sy functions .
+.Ed
+.It Fl f
+.Bd -filled -compact
+Diff
+.Sy function
+type argument information.
+.Ed
+.It Fl F Ar function
+.Bd -filled -compact
+When diffing
+.Sy functions ,
+only consider the function
+.Em function .
+This option requires that the option
+.Fl -f
+be specified and can be repeated multiple times.
+.Ed
+.It Fl I
+.Bd -filled -compact
+Ignore the names of integral types. This option is useful when comparing
+types between two
+.Sy CTF
+containers that have different programming models. In this case, when
+comparing integers, the name of the type is not considered. This means
+that the ILP32 type long which is a 32-bit wide signed integer is the
+same as the LP64 type int which is a 32-bit wide signed integer, even
+though they have different names.
+.Ed
+.It Fl l
+.Bd -filled -compact
+Diff the
+.Sy labels
+contained inside the
+.Sy CTF
+containers.
+.Ed
+.It Fl o
+.Bd -filled -compact
+Diff type information for
+.Sy objects .
+.Ed
+.It Fl O Ar object
+.Bd -filled -compact
+When diffing type information for
+.Sy objects ,
+only compare if the object is name
+.Em object . This option requires
+.Fl o
+to be specified and can be repeated multiple times.
+.Ed
+.It Fl p Ar parent1
+.Bd -filled -compact
+Specifies the path of file that is the parent of the
+.Sy CTF
+container inside of
+.Em file1
+is
+.Em parent1 .
+This option is required if
+.Em file1
+has been uniquified. For more information on uniquification, see
+.Xr ctf 4 .
+.Ed
+.It Fl P Ar parent2
+.Bd -filled -compact
+Specifies the path of file that is the parent of the
+.Sy CTF
+container inside of
+.Em file2 is
+.Em parent2 .
+This option is required if
+.Em file1
+has been uniquified. For more information on uniquification, see
+.Xr ctf 4 .
+.Ed
+.It Fl q
+.Bd -filled -compact
+Enables quiet mode. Standard output from the diff will not be emitted.
+However, diagnostics messages will still be emitted to standard error.
+.Ed
+.It Fl t
+.Bd -filled -compact
+Diff the
+.Sy type
+information sections in the
+.Sy CTF
+containers.
+.Ed
+.It Fl T Ar type
+.Bd -filled -compact
+When diffing the
+.Sy types
+section, only consider it if the type is name
+.Em type .
+Types specified here do not impact the diffing of
+.Sy objects
+or
+.Sy functions .
+Even with
+.Fl -T
+specified, other types will be diffed as necessary for the evaluation of
+the named types; however, the results of those intermediate differences
+will not impact the results of
+.Nm ,
+only named types are considered when evaluating the exit status of
+.Nm .
+.Ed
+.El
+.Sh EXIT STATUS
+.Bl -inset
+.It Sy 0
+.Bd -filled -offset indent -compact
+Execution completed successfully, no differences were detected
+between
+.Em file1
+and
+.Em file2 .
+.Ed
+.It Sy 1
+.Bd -filled -offset indent -compact
+Execution completed successfully, but differences were detected
+between
+.Em file1
+and
+.Em file2 .
+.Ed
+.It Sy 2
+.D1 Invalid command line options were specified.
+.It Sy 3
+.D1 A fatal error occured.
+.El
+.Sh EXAMPLES
+.Sy Example 1
+Diffing Two
+.Sy CTF
+Containers
+.Lp
+The following example compares two
+.Sy CTF
+containers using the default set
+of comparisons:
+.Sy objects ,
+.Sy functions ,
+and
+.Sy types .
+.Bd -literal -offset 6n
+$ ctfdiff /usr/lib/libc.so.1 /usr/lib/libdtrace.so.1
+ctf container /usr/lib/libc.so.1 type 37 is different
+ctf container /usr/lib/libc.so.1 type 38 is different
+ctf container /usr/lib/libc.so.1 type 39 is different
+ctf container /usr/lib/libc.so.1 type 40 is different
+ctf container /usr/lib/libc.so.1 type 41 is different
+ctf container /usr/lib/libc.so.1 type 42 is different
+ctf container /usr/lib/libc.so.1 type 43 is different
+ctf container /usr/lib/libc.so.1 type 47 is different
+ctf container /usr/lib/libc.so.1 type 48 is different
+ctf container /usr/lib/libc.so.1 type 49 is different
+\&...
+.Ed
+.Sy Example 2
+Diffing Types Between Two
+.Sy CTF
+Containers with Parents
+.Lp
+The following example compares two
+.Sy CTF
+containers
+.Sy /ws/rm/zlan/proto/kernel/drv/amd64/vnd
+and
+.Sy /ws/rm/zlan/proto/kernel/drv/amd64/overlay
+that have been uniquified against the same container
+.Sy /ws/rm/zlan/proto/kernel/amd64/genunix .
+.Bd -literal -offset 6n
+$ ctfdiff -t -p /ws/rm/zlan/proto/kernel/amd64/genunix \\
+ -P /ws/rm/zlan/proto/kernel/amd64/genunix \\
+ /ws/rm/zlan/proto/kernel/drv/amd64/vnd \\
+ /ws/rm/zlan/proto/kernel/drv/amd64/overlay
+ctf container /ws/rm/zlan/proto/kernel/drv/amd64/vnd type 32769 is different
+ctf container /ws/rm/zlan/proto/kernel/drv/amd64/vnd type 32770 is different
+ctf container /ws/rm/zlan/proto/kernel/drv/amd64/vnd type 32771 is different
+ctf container /ws/rm/zlan/proto/kernel/drv/amd64/vnd type 32772 is different
+ctf container /ws/rm/zlan/proto/kernel/drv/amd64/vnd type 32774 is different
+ctf container /ws/rm/zlan/proto/kernel/drv/amd64/vnd type 32775 is different
+ctf container /ws/rm/zlan/proto/kernel/drv/amd64/vnd type 32776 is different
+ctf container /ws/rm/zlan/proto/kernel/drv/amd64/vnd type 32777 is different
+ctf container /ws/rm/zlan/proto/kernel/drv/amd64/vnd type 32778 is different
+ctf container /ws/rm/zlan/proto/kernel/drv/amd64/vnd type 32779 is different
+\&...
+.Ed
+.Lp
+.Sy Example 3
+Diffing a Specific Function in Two
+.Sy CTF
+Containers
+.Lp
+This example shows us looking for differences in the function
+.Sy libzfs_core_init
+in two different version of the library
+.Sy libzfs_core.so.1 .
+.Bd -literal -offset 6n
+$ ctfdiff -f -F libzfs_core_init /usr/lib/libzfs_core.so.1 \\
+ /ws/rm/ctf/proto/usr/lib/libzfs_core.so.1
+$ echo $?
+.Ed
+.Lp
+.Sy Example 4
+Diffing Types to Find Differences Between Different Data Models.
+.Lp
+This example looks for differences between structures used in an ioctl
+that the kernel wants to be bitness neutral by comparing a 32-bit and
+64-bit library that consumes it. In this example, we'll use the library
+.Sy libvnd.so.1
+and the types
+.Sy vnd_ioc_attach_t ,
+.Sy vnd_ioc_link_t ,
+.Sy vnd_ioc_unlink_t ,
+.Sy vnd_ioc_buf_t ,
+and
+.Sy vnd_ioc_info_t .
+.Bd -literal -offset 6n
+$ ctfdiff -t -I -T vnd_ioc_attach_t -T vnd_ioc_link_t \\
+ -T vnd_ioc_unlink_t -T vnd_ioc_buf_t -T vnd_ioc_info_t \\
+ i386/libvnd.so.1 amd64/libvnd.so.1
+$ echo $?
+0
+.Ed
+.Sh INTERFACE STABILITY
+The command syntax is
+.Sy Committed .
+The output format is
+.Sy Uncommitted .
+.Sh SEE ALSO
+.Xr ctfdump 1 ,
+.Xr diff 1 ,
+.Xr ctf 4
diff --git a/usr/src/man/man1/ctfdump.1 b/usr/src/man/man1/ctfdump.1
new file mode 100644
index 0000000000..a8c2d978ae
--- /dev/null
+++ b/usr/src/man/man1/ctfdump.1
@@ -0,0 +1,393 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2015, Joyent, Inc.
+.\"
+.Dd Oct 4, 2014
+.Dt CTFDUMP 1
+.Os
+.Sh NAME
+.Nm ctfdump
+.Nd dump parts of ctf data from files
+.Sh SYNOPSIS
+.Nm ctfdump
+.Op Fl dfhlsSt
+.Op Fl p Ar parent
+.Op Fl u Ar outfile
+.Ar file
+.Sh DESCRIPTION
+The
+.Nm
+utility dumps and decodes the
+.Sy CTF
+data contained inside of
+.Sy ELF
+objects and raw
+.Sy CTF
+files.
+.Lp
+.Nm
+can dump information about the
+.Sy CTF header ,
+the
+.Sy labels
+encoded in the
+.Sy CTF
+container,
+the types of
+.Sy data objects ,
+the internal
+.Sy string
+table,
+the types of the return function and the arguments for
+.Sy functions ,
+and of course, it displays information about the
+.Sy types
+defined in the
+.Sy CTF
+container.
+.Lp
+.Nm
+can also be used to dump out the raw
+.Sy CTF
+data and send it to another file. When writing out data, it always
+ensures that the
+.Sy CTF
+data is decompressed. In this form, the
+.Sy CTF
+data can be inspected using
+.Nm
+and other tools such as
+.Xr mdb 1 .
+.Lp
+When no options are specified,
+.Nm
+displays all information. However, when the
+.Fl u
+option is used, then no information is displayed by default, unless
+requested through the the appropriate option.
+.Sh OPTIONS
+The following options are supported:
+.Bl -hang -width Ds
+.It Fl d
+.Bd -filled -compact
+Dump the types of symbols that correspond to objects.
+.Ed
+.It Fl f
+.Bd -filled -compact
+Dump the types of the return values and arguments of the functions.
+.Ed
+.It Fl h
+.Bd -filled -compact
+Dump the
+.Sy CTF
+header
+.Ed
+.It Fl l
+.Bd -filled -compact
+Dump all
+.Sy CTF
+labels associated with the file.
+.Ed
+.It Fl p Ar parent
+.Bd -filled -compact
+Use the type information in
+.Em parent
+to supplement output. This is useful when a
+.Nm CTF
+container has been
+.Sy uniquified
+against
+.Em parent .
+This allows
+.Nm
+to use the names of types when used with
+.Fl t .
+.Ed
+.It Fl s
+.Bd -filled -compact
+Dump the internal
+.Sy CTF
+string table
+.Ed
+.It Fl S
+.Bd -filled -compact
+Displays statistics about the
+.Sy CTF
+container.
+.Ed
+.It Fl t
+.Bd -filled -compact
+Dump the type information contained in the
+.Sy CTF
+conatiner.
+.Ed
+.It Fl u Ar outfile
+.Bd -filled -compact
+Copies the uncompressed
+.Sy CTF
+data to the file specified by
+.Em outfile .
+This can be used to make it easier to inspect the raw
+.Sy CTF
+data.
+.Ed
+.El
+.Sh OUTPUT
+When the
+.Nm
+utility is executed with its default options, it prints out a textual
+representation of the
+.Sy CTF
+information. Note, the output format of
+.Nm
+is subject to change at any time and should not be relied upon as a
+stable format to be used for parsing.
+.Ss CTF Header
+This section describes the values in the
+.Sy CTF
+header. Each line in the section describes the value of one of the
+members of the header. For more information on the meaning and
+interpretation of these members, see
+.Xr ctf 4 .
+.Ss Label Table
+This section describes information about the labels present in the
+.Sy CTF
+information. Each entry in this section, if present, starts with a
+number and is followed by a string. An example entry in the label
+section might look like:
+.Bd -literal
+\&...
+ 2270 joyent_20151001T070028Z
+\&...
+.Ed
+.Pp
+The number,
+.Em 2270 ,
+represents the last type that the label applies to. The string,
+.Em joyent_20151001T070028Z ,
+is the name of the label. In this case, if there were no other labels,
+it would indicate that the label applied to all types up to, and
+including, the type number 2270. For more information
+on how labels are used with
+.Sy CTF
+information, see the section
+.Em The Label Section
+in
+.Xr ctf 4 .
+.Ss Data Objects
+This section describes the type information relating to data objects
+from the symbol table. An entry for a data object consists of four
+columns. The first column is just a monotonic ID. The second number is
+the type id of the object. The third column is the name of the symbol
+and the fourth column is the corresponding index from the symbol table.
+.Pp
+Take for example, the following couple of entries:
+.Bd -literal
+\&...
+ [0] 13 hz (48)
+ [1] 78 _nd (49)
+ [2] 1656 __pfmt_label (56)
+ [3] 926 _aio_hash (68)
+ [4] 13 _lio_free (70)
+ [5] 1321 u8_number_of_bytes (73)
+\&...
+.Ed
+.Pp
+Let's take the first entry in the list above. The symbol is named
+.Sy hz .
+It is the first data object, as indicated by the number zero in
+brackets. It has a type id of 13 and in this case, it has a symbol table
+index of 48.
+.Ss Functions
+This section describes the type information for functions. For each
+function present in the symbol table with type information, the
+function's entry into the function section, the function's name, the
+function's symbol table index, the function's return type, and the types
+of the function's arguments are printed. If a function is a variadic
+function, then the variadic argument is printed as the string
+.Sy '...' .
+.Pp
+Take for example, the following couple of entries:
+.Bd -literal
+\&...
+ [687] pfprint_stack (3110) returns: 11 args: (385, 115, 29, 1704, 223, 116, 2)
+ [688] pfprint_stddev (3111) returns: 11 args: (385, 115, 29, 1704, 223, 116, 2)
+ [689] pfprint_quantize (3112) returns: 11 args: (385, 115, 29, 1704, 223, 116, 2)
+ [690] pfprint_lquantize (3113) returns: 11 args: (385, 115, 29, 1704, 223, 116, 2)
+ [691] pfprint_llquantize (3114) returns: 11 args: (385, 115, 29, 1704, 223, 116, 2)
+\&...
+.Ed
+.Pp
+The first column is the function's entry number in the function type
+information section. It is enclosed in brackets. The next column is the
+function's name and it is followed in parenthesis by the its index in the
+symbol table. The following portions of this entry describe the return
+type and then all of the arguments, in positional order.
+.Ss Types
+The types section gives information about each type in the
+.Sy CTF
+container. Each entry begins with its type identifier. The type
+identifier may either be in square brackets or in angle brackets. If the
+type identifier is enclosed in angle brackets, then that represents that
+it is a root type or top-level type. If it is square brackets, then it
+is not. For more information on root types, see
+.Xr ctf 4 .
+.Pp
+Next, the type will have its name and kind. If it is an array, it will
+be followed with a subscript that describes the number of entries in the
+array. If it is a pointer, it will followed by the
+.Sy *
+symbol to indicate that it is a pointer. If the type has the
+.Sy const ,
+.Sy volatile ,
+.Sy typedef ,
+or
+.Sy restrict
+keyword applied to it, that will precede the name. All of these
+reference types, including pointer, will then be followed with an
+example of the type that they refer to.
+.Pp
+Types which are an integral or floating point value will be followed by
+information about their encoding and the number of bits represented in
+the type.
+.Pp
+Arrays will be followed by two different entries, the contents and
+index. The contents member contains the type id of the array's contents
+and the index member describes a type which can represent the array's
+index.
+.Pp
+Structures and unions will be preceded with the corresponding C keyword,
+.Sy struct
+or
+.Sy union .
+After that, the size in bytes of the structure will be indicated. ON
+each subsequent line, a single member will be listed. That line will
+contain the member's name, it's type identifier, and the offset into the
+structure that it can be found in, in bits.
+.Pp
+The following show examples of type information for all of these
+different types:
+.Bd -literal
+\&...
+ [5] char [12] contents: 1, index: 2
+ [6] short encoding=SIGNED offset=0 bits=16
+ <7> struct exit_status (4 bytes)
+ e_termination type=6 off=0
+ e_exit type=6 off=16
+
+ <8> typedef time_t refers to 2
+ <9> struct utmp (36 bytes)
+ ut_user type=3 off=0
+ ut_id type=4 off=64
+ ut_line type=5 off=96
+ ut_pid type=6 off=192
+ ut_type type=6 off=208
+ ut_exit type=7 off=224
+ ut_time type=8 off=256
+
+ <10> struct utmp * refers to 9
+ [11] const struct utmp refers to 9
+ [12] const struct utmp * refers to 11
+ <13> int encoding=SIGNED offset=0 bits=32
+ <14> typedef int32_t refers to 13
+\&...
+.Ed
+.Ss String Table
+This section describes all of the strings that are present in the
+.Sy CTF
+container. Each line represents an entry in the string table. First the
+byte offset into the string table is shown in brackets and then the
+string's value is displayed. Note the following examples:
+.Bd -literal
+ [0] \0
+ [1] joyent_20151001T070028Z
+ [25] char
+ [30] long
+ [35] short
+.Ed
+.Ss Statistics
+This section contains miscellaneous statistics about the
+.Sy CTF
+data present. Each line contains a single statistic. A brief explanation
+of the statistic is placed first, followed by an equals sign, and then
+finally the value.
+.Sh EXIT STATUS
+.Bl -inset
+.It Sy 0
+.Dl Execution completed successfully.
+.It Sy 1
+.Dl A fatal error occured.
+.It Sy 2
+.Dl Invalid command line options were specified.
+.El
+.Sh EXAMPLES
+.Sy Example 1
+Displaying the Type Section of a Single File
+.Lp
+The following example dumps the type section of the file
+.Sy /usr/lib/libc.so.1 .
+.Bd -literal -offset 6n
+$ ctfdump -t /usr/lib/libc.so.1
+- Types ----------------------------------------------------
+
+ <1> int encoding=SIGNED offset=0 bits=32
+ <2> long encoding=SIGNED offset=0 bits=32
+ <3> typedef pid_t refers to 2
+ <4> unsigned int encoding=0x0 offset=0 bits=32
+ <5> typedef uid_t refers to 4
+ <6> typedef gid_t refers to 5
+ <7> typedef uintptr_t refers to 4
+\&...
+.Ed
+.Lp
+.Sy Example 2
+Dumping the CTF data to Another File
+.Lp
+The following example dumps the entire CTF data from the file
+.Sy /usr/lib/libc.so.1
+and places it into the file
+.Sy ctf.out .
+This then shows how you can use the
+.Xr mdb 1
+to inspect its contents.
+.Bd -literal -offset 6n
+$ ctfdump -u ctf.out /usr/lib/libc.so.1
+$ mdb ./ctf.out
+> ::typedef -r /usr/lib/libctf.so.1
+> 0::print ctf_header_t
+{
+ cth_preamble = {
+ ctp_magic = 0xcff1
+ ctp_version = 0x2
+ ctp_flags = 0
+ }
+ cth_parlabel = 0
+ cth_parname = 0
+ cth_lbloff = 0
+ cth_objtoff = 0x8
+ cth_funcoff = 0x5e0
+ cth_typeoff = 0x7178
+ cth_stroff = 0x12964
+ cth_strlen = 0x7c9c
+}
+.Ed
+.Sh INTERFACE STABILITY
+The command syntax is
+.Sy Committed .
+The output format is
+.Sy Uncommitted .
+.Sh SEE ALSO
+.Xr ctfdiff 1 ,
+.Xr dump 1 ,
+.Xr elfdump 1 ,
+.Xr mdb 1 ,
+.Xr ctf 4
diff --git a/usr/src/man/man1/ld.1 b/usr/src/man/man1/ld.1
index fdddea7ece..71cbe0f185 100644
--- a/usr/src/man/man1/ld.1
+++ b/usr/src/man/man1/ld.1
@@ -24,7 +24,7 @@ ld \- link-editor for object files
[\fB-z\fR combreloc | nocombreloc ] [\fB-z\fR defs | nodefs]
[\fB-z\fR direct | nodirect] [\fB-z\fR endfiltee]
[\fB-z\fR fatal-warnings | nofatal-warnings ] [\fB-z\fR finiarray=\fIfunction\fR]
-[\fB-z\fR globalaudit] [\fB-z\fR groupperm | nogroupperm]
+[\fB-z\fR globalaudit] [\fB-z\fR groupperm | nogroupperm]
[\fB-z\fR guidance[=\fIid1\fR,\fIid2\fR...] [\fB-z\fR help ]
[\fB-z\fR ignore | record] [\fB-z\fR initarray=\fIfunction\fR] [\fB-z\fR initfirst]
[\fB-z\fR interpose] [\fB-z\fR lazyload | nolazyload]
diff --git a/usr/src/man/man1/ld.so.1.1 b/usr/src/man/man1/ld.so.1.1
index 4b14ca4f1a..19afbbf3d6 100644
--- a/usr/src/man/man1/ld.so.1.1
+++ b/usr/src/man/man1/ld.so.1.1
@@ -1,9 +1,10 @@
'\"
.\" Copyright (c) 2009, Sun Microsystems, Inc. All Rights Reserved
+.\" Copyright (c) 2014, Joyent, Inc. All Rights Reserved
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
.\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with
.\" the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH LD.SO.1 1 "Oct 5, 2012"
+.TH LD.SO.1 1 "May 8, 2014"
.SH NAME
ld.so.1 \- runtime linker for dynamic objects
.SH SYNOPSIS
@@ -574,6 +575,24 @@ aid debugging. See also the \fBRTLD_DI_SETSIGNAL\fR request to
.RE
.sp
+.ne 2
+.na
+.BR LD_TOXIC_PATH,
+.BR LD_TOXIC_PATH_32,
+.BR LD_TOXIC_PATH_64,
+.ad
+.sp .6
+.RS 4n
+The toxic path refers to a set of paths where by, if
+.B ld.so.1
+were to load a dependency on that path, rather than loading it, it
+should kill the process. This is useful when having built libraries that
+while matching the native architecture of the system, are not suitable
+to be used, for example, libraries that that correspond to an alternate
+release of an operating system.
+.RE
+
+.sp
.LP
Notice that environment variable names beginning with the
characters '\fBLD_\fR' are reserved for possible future enhancements to \fBld\fR(1) and
diff --git a/usr/src/man/man1/machid.1 b/usr/src/man/man1/machid.1
new file mode 100644
index 0000000000..cb95fa36b6
--- /dev/null
+++ b/usr/src/man/man1/machid.1
@@ -0,0 +1,25 @@
+'\" te
+.\" Copyright 1989 AT&T
+.\" Copyright (c) 1999, Sun Microsystems, Inc.
+.\" All Rights Reserved
+.\" Copyright 2015, Joyent, Inc.
+.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
+.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
+.TH MACHID 1 "Feb 27, 2015"
+.SH NAME
+machid, sun, i286, i386, i486, sparc \- get processor type truth value
+
+.SH DESCRIPTION
+.sp
+.LP
+These commands are obsolete and may be removed in a future version of the
+software.
+.sp
+.ne 2
+.na
+.SH NOTES
+.sp
+.LP
+The \fBmachid\fR family of commands is obsolete. Use \fBuname\fR \fB-p\fR and
+\fBuname\fR \fB-m\fR instead.
diff --git a/usr/src/man/man1/nawk.1 b/usr/src/man/man1/nawk.1
index 425db5d299..2c27e1baa0 100644
--- a/usr/src/man/man1/nawk.1
+++ b/usr/src/man/man1/nawk.1
@@ -15,6 +15,12 @@ nawk \- pattern scanning and processing language
.SH SYNOPSIS
.LP
.nf
+\fB/usr/bin/awk\fR [\fB-F\fR \fIERE\fR] [\fB-v\fR \fIassignment\fR] \fI\&'program'\fR | \fB-f\fR \fIprogfile\fR...
+ [\fIargument\fR]...
+.fi
+
+.LP
+.nf
\fB/usr/bin/nawk\fR [\fB-F\fR \fIERE\fR] [\fB-v\fR \fIassignment\fR] \fI\&'program'\fR | \fB-f\fR \fIprogfile\fR...
[\fIargument\fR]...
.fi
@@ -28,7 +34,8 @@ nawk \- pattern scanning and processing language
.SH DESCRIPTION
.sp
.LP
-The \fB/usr/bin/nawk\fR and \fB/usr/xpg4/bin/awk\fR utilities execute
+The \fB/usr/bin/awk\fR, \fB/usr/bin/nawk\fR and \fB/usr/xpg4/bin/awk\fR
+utilities execute
\fIprogram\fRs written in the \fBnawk\fR programming language, which is
specialized for textual data manipulation. A \fBnawk\fR \fIprogram\fR is a
sequence of patterns and corresponding actions. The string specifying
diff --git a/usr/src/man/man1/pargs.1 b/usr/src/man/man1/pargs.1
index 3f8e2bae8c..58b7bfc7f9 100644
--- a/usr/src/man/man1/pargs.1
+++ b/usr/src/man/man1/pargs.1
@@ -1,20 +1,22 @@
'\" te
.\" Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved
+.\" Copyright 2015 Joyent, Inc.
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH PARGS 1 "Jun 19, 2006"
+.TH PARGS 1 "Oct 5, 2015"
.SH NAME
-pargs \- print process arguments, environment variables, or auxiliary
+pargs, penv, pauxv \- print process arguments, environment variables, or auxiliary
vector
.SH SYNOPSIS
.LP
.nf
\fBpargs\fR [\fB-aceFlx\fR] [\fIpid\fR | \fIcore\fR]...
+\fBpauxv\fR [\fB-cF\fR] [\fIpid\fR | \fIcore\fR]...
+\fBpenv\fR [\fB-cF\fR] [\fIpid\fR | \fIcore\fR]...
.fi
.SH DESCRIPTION
-.sp
.LP
The \fBpargs\fR utility examines a target process or process core file and
prints arguments, environment variables and values, or the process auxiliary
@@ -32,10 +34,18 @@ the target process and the \fBpargs\fR process do not share a common character
encoding, \fBpargs\fR attempts to employ the \fBiconv\fR(3C) facility to
generate a printable version of the extracted strings. In the event that such
a conversion is impossible, strings are displayed as 7-bit \fBASCII\fR.
-.SH OPTIONS
.sp
.LP
-The following options are supported:
+The \fBpauxv\fR command is equivalent to running \fBpargs\fR with the
+\fB-x\fR option.
+.sp
+.LP
+The \fBpenv\fR command is equivalent to running \fBpargs\fR with the
+\fB-e\fR option.
+.SH OPTIONS
+.LP
+The following options are supported by \fBpargs\fR. Only the \fB-c\fR
+and \fB-F\fR options are supported by \fBpauxv\fR and \fBpenv\fR:
.sp
.ne 2
.na
@@ -98,7 +108,6 @@ Prints process auxiliary vector.
.RE
.SH OPERANDS
-.sp
.LP
The following operands are supported:
.sp
@@ -120,7 +129,6 @@ Process core file.
.RE
.SH USAGE
-.sp
.LP
Caution should be exercised when using the \fB-F\fR flag. Imposing two
controlling processes on one victim process can lead to chaos. Safety is
@@ -128,7 +136,6 @@ assured only if the primary controlling process, typically a debugger, has
stopped the victim process and the primary controlling process is doing nothing
at the moment of application of the \fBproc\fR tool in question.
.SH EXIT STATUS
-.sp
.LP
The following exit values are returned:
.sp
@@ -151,7 +158,6 @@ option).
.RE
.SH FILES
-.sp
.ne 2
.na
\fB\fB/proc/pid/*\fR\fR
@@ -161,7 +167,6 @@ Process information and control files.
.RE
.SH ATTRIBUTES
-.sp
.LP
See \fBattributes\fR(5) for descriptions of the following attributes:
.sp
@@ -177,7 +182,6 @@ Interface Stability Evolving
.TE
.SH SEE ALSO
-.sp
.LP
\fBproc\fR(1), \fBiconv\fR(3C), \fBproc\fR(4), \fBascii\fR(5),
\fBattributes\fR(5), \fBenviron\fR(5), \fBformats\fR(5)
diff --git a/usr/src/man/man1/proc.1 b/usr/src/man/man1/proc.1
index e005535810..3565512f0e 100644
--- a/usr/src/man/man1/proc.1
+++ b/usr/src/man/man1/proc.1
@@ -538,12 +538,12 @@ Interface Stability See below.
The human readable output is Uncommitted. The options are Committed.
.SH SEE ALSO
.LP
-\fBgcore\fR(1), \fBldd\fR(1), \fBpargs\fR(1), \fBpgrep\fR(1), \fBpkill\fR(1),
-\fBplimit\fR(1), \fBpmap\fR(1), \fBpreap\fR(1), \fBps\fR(1), \fBptree\fR(1),
-\fBppgsz\fR(1), \fBpwd\fR(1), \fBrlogin\fR(1), \fBtime\fR(1), \fBtruss\fR(1),
-\fBwait\fR(1), \fBfcntl\fR(2), \fBfstat\fR(2), \fBsetuid\fR(2),
-\fBdlopen\fR(3C), \fBsignal.h\fR(3HEAD), \fBcore\fR(4), \fBproc\fR(4),
-\fBprocess\fR(4), \fBattributes\fR(5), \fBzones\fR(5)
+\fBgcore\fR(1), \fBldd\fR(1), \fBpargs\fR(1), \fBpauxv\fR(1), \fBpenv\fR(1),
+\fBpgrep\fR(1), \fBpkill\fR(1), \fBplimit\fR(1), \fBpmap\fR(1), \fBpreap\fR(1),
+\fBps\fR(1), \fBptree\fR(1), \fBppgsz\fR(1), \fBpwd\fR(1), \fBrlogin\fR(1),
+\fBtime\fR(1), \fBtruss\fR(1), \fBwait\fR(1), \fBfcntl\fR(2), \fBfstat\fR(2),
+\fBsetuid\fR(2), \fBdlopen\fR(3C), \fBsignal.h\fR(3HEAD), \fBcore\fR(4),
+\fBproc\fR(4), \fBprocess\fR(4), \fBattributes\fR(5), \fBzones\fR(5)
.SH WARNINGS
.LP
The following \fBproc\fR tools stop their target processes while inspecting
diff --git a/usr/src/man/man1/ps.1 b/usr/src/man/man1/ps.1
index 7ff574f12b..2b0d435c44 100644
--- a/usr/src/man/man1/ps.1
+++ b/usr/src/man/man1/ps.1
@@ -38,6 +38,9 @@ displayed is controlled by the options.
Some options accept lists as arguments. Items in a list can be either separated
by commas or else enclosed in quotes and separated by commas or spaces. Values
for \fIproclist\fR and \fIgrplist\fR must be numeric.
+.sp
+.LP
+The \fBps\fR command also accepts BSD-style options. See \fBps\fR(1b).
.SH OPTIONS
.sp
.LP
@@ -1311,7 +1314,8 @@ Standard See \fBstandards\fR(5).
.sp
.LP
\fBkill\fR(1), \fBlgrpinfo\fR(1), \fBnice\fR(1), \fBpagesize\fR(1),
-\fBpmap\fR(1), \fBpriocntl\fR(1), \fBwho\fR(1), \fBgetty\fR(1M), \fBproc\fR(4),
+\fBpmap\fR(1), \fBpriocntl\fR(1), \fBps\fR(1b), \fBwho\fR(1), \fBgetty\fR(1M),
+\fBproc\fR(4),
\fBttysrch\fR(4), \fBattributes\fR(5), \fBenviron\fR(5),
\fBresource_controls\fR(5), \fBstandards\fR(5), \fBzones\fR(5)
.SH NOTES
diff --git a/usr/src/man/man1/sed.1 b/usr/src/man/man1/sed.1
index 126402343c..cd805bd841 100644
--- a/usr/src/man/man1/sed.1
+++ b/usr/src/man/man1/sed.1
@@ -1,8 +1,6 @@
.\" Copyright (c) 1992, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
-.\" Copyright 2011 Nexenta Systems, Inc. All rights reserved.
-.\"
.\" This code is derived from software contributed to Berkeley by
.\" the Institute of Electrical and Electronics Engineers, Inc.
.\"
@@ -44,7 +42,8 @@
[\fB\-Ealnr\fP]
[\fB\-e\fP \fIcommand\fP]
[\fB\-f\fP \fIcommand_file\fP]
-[\fB\-I\fP[\fIextension\fP] | \fB\-i\fP[\fIextension\fP]]
+[\fB\-I\fP \fIextension\fP]
+[\fB\-i\fP \fIextension\fP]
[\fIfile ...\fP]
.SH DESCRIPTION
The
@@ -97,11 +96,16 @@ Append the editing commands found in the file
to the list of commands.
The editing commands should each be listed on a separate line.
.TP
-\fB\-I\fP[\fIextension\fP]
-Edit files in-place, saving backups if \fIextension\fP was specified.
-It is not recommended to omit saving backups when in-place editing files,
-as you risk corruption or partial content in situations where disk
-space is exhausted, etc.
+\fB\-I\fP \fIextension\fP
+Edit files in-place, saving backups with the specified
+\fIextension\fP.
+If a zero-length
+\fIextension\fP
+is given, no backup will be saved.
+It is not recommended to give a zero-length
+\fIextension\fP
+when in-place editing files, as you risk corruption or partial content
+in situations where disk space is exhausted, etc.
Note that in-place editing with
\fB\-I\fP
@@ -119,7 +123,7 @@ where using
\fB\-i\fP
is desired.
.TP
-\fB\-i\fP[\fIextension\fP]
+\fB\-i\fP \fIextension\fP
Edit files in-place similarly to
\fB\-I\fP,
but treat each file independently from other files.
diff --git a/usr/src/man/man1/zlogin.1 b/usr/src/man/man1/zlogin.1
index 7c99eb6cb1..476faab86f 100644
--- a/usr/src/man/man1/zlogin.1
+++ b/usr/src/man/man1/zlogin.1
@@ -13,43 +13,43 @@
.\" Portions Copyright [yyyy] [name of copyright owner]
.\" Copyright 2013 DEY Storage Systems, Inc.
.\" Copyright (c) 2014 Gary Mills
+.\" Copyright (c) 2015, Joyent, Inc. All Rights Reserved
.\" Copyright 2015 Nexenta Systems, Inc. All rights reserved.
-.TH ZLOGIN 1 "Mar 17, 2015"
+.TH ZLOGIN 1 "Mar 30, 2015"
.SH NAME
zlogin \- enter a zone
.SH SYNOPSIS
.LP
.nf
-\fBzlogin\fR [\fB-dCEQ\fR] [\fB-e\fR \fIc\fR] [\fB-l\fR \fIusername\fR] \fIzonename\fR
+\fBzlogin\fR [\fB-dCEINQ\fR] [\fB-e\fR \fIc\fR] [\fB-l\fR \fIusername\fR] \fIzonename\fR
.fi
.LP
.nf
-\fBzlogin\fR [\fB-nEQS\fR] [\fB-e\fR \fIc\fR] [\fB-l\fR \fIusername\fR] \fIzonename\fR \fIutility\fR
+\fBzlogin\fR [\fB-inEQS\fR] [\fB-e\fR \fIc\fR] [\fB-l\fR \fIusername\fR] \fIzonename\fR \fIutility\fR
[\fIargument\fR]...
.fi
.SH DESCRIPTION
-.sp
.LP
The \fBzlogin\fR utility is used by the administrator to enter an operating
system zone. Only a superuser operating in the global system zone can use this
utility.
.sp
.LP
-\fBzlogin\fR operates in one of three modes:
+\fBzlogin\fR operates in one of four modes:
.sp
.ne 2
.na
\fBInteractive Mode\fR
.ad
.RS 24n
-If no utility argument is given and the stdin file descriptor for the
-\fBzlogin\fR process is a tty device, \fBzlogin\fR operates in \fBinteractive
-mode\fR. In this mode, \fBzlogin\fR creates a new pseudo terminal for use
-within the login session. Programs requiring a tty device, for example,
-\fBvi\fR(1), work properly in this mode. In this mode, \fBzlogin\fR invokes
-\fBlogin\fR(1) to provide a suitable login session.
+If no utility argument is given or if the \fB-i\fR option is specified, and the
+stdin file descriptor for the \fBzlogin\fR process is a tty device, \fBzlogin\fR
+operates in \fBinteractive mode\fR. In this mode, \fBzlogin\fR creates a new
+pseudo terminal for use within the login session. Programs requiring a tty
+device, for example, \fBvi\fR(1), work properly in this mode. In this mode,
+\fBzlogin\fR invokes \fBlogin\fR(1) to provide a suitable login session.
.RE
.sp
@@ -58,11 +58,12 @@ within the login session. Programs requiring a tty device, for example,
\fBNon-Interactive Mode\fR
.ad
.RS 24n
-If a utility is specified, \fBzlogin\fR operates in \fBnon-interactive mode\fR.
-This mode can be useful for script authors since stdin, stdout, and stderr are
-preserved and the exit status of \fIutility\fR is returned upon termination. In
-this mode, \fBzlogin\fR invokes \fBsu\fR(1M) in order to set up the user's
-environment and to provide a login environment.
+If a utility is specified and the \fB-i\fR option is not specified, \fBzlogin\fR
+operates in \fBnon-interactive mode\fR. This mode can be useful for script
+authors since stdin, stdout, and stderr are preserved and the exit status of
+\fIutility\fR is returned upon termination. In this mode, \fBzlogin\fR invokes
+\fBsu\fR(1M) in order to set up the user's environment and to provide a login
+environment.
.sp
The specified command is passed as a string and interpreted by a shell running
in the non-global zone. See \fBrsh\fR(1).
@@ -80,8 +81,17 @@ available once the zone is in the installed state. Connections to the console
are persistent across reboot of the zone.
.RE
-.SH OPTIONS
.sp
+.ne 2
+.na
+\fBStandalone-processs Interactive Mode\fR
+.ad
+.RS 24n
+If the \fB-I\fR option is specified the user is connected to the zone's stdin,
+stdout and stderr \fBzfd(7D)\fR devices.
+.RE
+
+.SH OPTIONS
.LP
The following options are supported:
.sp
@@ -127,6 +137,25 @@ login by using the escape sequence character.
.sp
.ne 2
.na
+\fB\fB-i\fR\fR
+.ad
+.RS 15n
+Forces interactive mode when a utility argument is specified.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fB-I\fR\fR
+.ad
+.RS 15n
+Connects to the zone's \fBzfd(7D)\fR devices.
+.RE
+
+.sp
+.sp
+.ne 2
+.na
\fB\fB-l\fR \fIusername\fR\fR
.ad
.RS 15n
@@ -149,6 +178,17 @@ and the shell which invokes \fBzlogin\fR both read from standard input.
.sp
.ne 2
.na
+\fB-N\fR
+.ad
+.RS 15n
+Nohup. This may only be used with the -I option to avoid sending EOF to the zfd
+device when zlogin's stdin receives EOF. It can also be toggled by sending
+\fBSIGUSR1\fR to an attached zlogin process.
+.RE
+
+.sp
+.ne 2
+.na
\fB-Q\fR
.ad
.RS 15n
@@ -172,7 +212,6 @@ other forms of login have become impossible.
.RE
.SS "Escape Sequences"
-.sp
.LP
Lines that you type that start with the tilde character (\fB~\fR) are "escape
sequences". The escape character can be changed using the \fB-e\fR option.
@@ -187,12 +226,10 @@ host breaks the connection with no warning to the zone's end.
.RE
.SH SECURITY
-.sp
.LP
Once a process has been placed in a zone other than the global zone, the
process cannot change zone again, nor can any of its children.
.SH OPERANDS
-.sp
.LP
The following operands are supported:
.sp
@@ -223,7 +260,6 @@ Arguments passed to the utility.
.RE
.SH EXIT STATUS
-.sp
.LP
In interactive and non-interactive modes, the \fBzlogin\fR utility exits when
the command or shell in the non-global zone exits. In non-interactive mode, the
@@ -268,7 +304,6 @@ mode.
.RE
.SH ATTRIBUTES
-.sp
.LP
See \fBattributes\fR(5) for descriptions of the following attributes:
.sp
@@ -284,12 +319,10 @@ Interface Stability Evolving
.TE
.SH SEE ALSO
-.sp
.LP
\fBlogin\fR(1), \fBrsh\fR(1), \fBvi\fR(1), \fBsu\fR(1M), \fBzoneadm\fR(1M),
\fBzonecfg\fR(1M), \fBattributes\fR(5), \fBzones\fR(5)
.SH NOTES
-.sp
.LP
\fBzlogin\fR fails if its open files or any portion of its address space
corresponds to an NFS file. This includes the executable itself or the shared
diff --git a/usr/src/man/man1m/Makefile b/usr/src/man/man1m/Makefile
index b00898cce4..8ba56e1fc1 100644
--- a/usr/src/man/man1m/Makefile
+++ b/usr/src/man/man1m/Makefile
@@ -11,12 +11,12 @@
#
# Copyright 2011, Richard Lowe
-# Copyright (c) 2012, Joyent, Inc. All rights reserved.
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
# Copyright 2015 Nexenta Systems, Inc. All rights reserved.
# Copyright 2016 Toomas Soome <tsoome@me.com>
#
-include $(SRC)//Makefile.master
+include $(SRC)//Makefile.master
MANSECT= 1m
@@ -483,8 +483,6 @@ _MANFILES= 6to4relay.1m \
soconfig.1m \
sppptun.1m \
spray.1m \
- ssh-keysign.sunssh.1m \
- sshd.sunssh.1m \
statd.1m \
stmfadm.1m \
stmsboot.1m \
@@ -546,7 +544,10 @@ _MANFILES= 6to4relay.1m \
uucleanup.1m \
uusched.1m \
uuxqt.1m \
+ vfsstat.1m \
vmstat.1m \
+ vndadm.1m \
+ vndstat.1m \
volcopy.1m \
volcopy_ufs.1m \
vscanadm.1m \
@@ -647,8 +648,6 @@ MANLINKS= acctcon1.1m \
sa2.1m \
sadc.1m \
sftp-server.1m \
- ssh-keysign.1m \
- sshd.1m \
shutacct.1m \
sprayd.1m \
startup.1m \
@@ -724,8 +723,6 @@ hal-set-property.1m := LINKSRC = hal-get-property.1m
poweroff.1m := LINKSRC = halt.1m
sftp-server.1m := LINKSRC = sftp-server.sunssh.1m
-ssh-keysign.1m := LINKSRC = ssh-keysign.sunssh.1m
-sshd.1m := LINKSRC = sshd.sunssh.1m
comsat.1m := LINKSRC = in.comsat.1m
fingerd.1m := LINKSRC = in.fingerd.1m
diff --git a/usr/src/man/man1m/dladm.1m b/usr/src/man/man1m/dladm.1m
index 4d46f45cf3..12224b1ecb 100644
--- a/usr/src/man/man1m/dladm.1m
+++ b/usr/src/man/man1m/dladm.1m
@@ -9,14 +9,14 @@
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
.\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH DLADM 1M "Dec 03, 2014"
+.TH DLADM 1M "Apr 09, 2015"
.SH NAME
dladm \- administer data links
.SH SYNOPSIS
.LP
.nf
\fBdladm show-link\fR [\fB-P\fR] [\fB-s\fR [\fB-i\fR \fIinterval\fR]] [[\fB-p\fR] \fB-o\fR \fIfield\fR[,...]] [\fIlink\fR]
-\fBdladm rename-link\fR [\fB-R\fR \fIroot-dir\fR] \fIlink\fR \fInew-link\fR
+\fBdladm rename-link\fR [\fB-R\fR \fIroot-dir\fR] [\fB-z\fR \fIzonename\fR] \fIlink\fR \fInew-link\fR
.fi
.LP
@@ -99,9 +99,11 @@ dladm \- administer data links
.LP
.nf
-\fBdladm set-linkprop\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] \fB-p\fR \fIprop\fR=\fIvalue\fR[,...] \fIlink\fR
-\fBdladm reset-linkprop\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] [\fB-p\fR \fIprop\fR[,...]] \fIlink\fR
-\fBdladm show-linkprop\fR [\fB-P\fR] [[\fB-c\fR] \fB-o\fR \fIfield\fR[,...]] [\fB-p\fR \fIprop\fR[,...]] [\fIlink\fR]
+\fBdladm set-linkprop\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] [\fB-z\fR \fIzonename\fR] \fB-p\fR \fIprop\fR=\fIvalue\fR[,...]
+ \fIlink\fR
+\fBdladm reset-linkprop\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] [\fB-z\fR \fIzonename\fR] [\fB-p\fR \fIprop\fR[,...]] \fIlink\fR
+\fBdladm show-linkprop\fR [\fB-P\fR] [\fB-z\fR \fIzonename\fR] [[\fB-c\fR] \fB-o\fR \fIfield\fR[,...]]
+ [\fB-p\fR \fIprop\fR[,...]] [\fIlink\fR]
.fi
.LP
@@ -116,9 +118,9 @@ dladm \- administer data links
\fBdladm create-vnic\fR [\fB-t\fR] \fB-l\fR \fIlink\fR [\fB-R\fR \fIroot-dir\fR] [\fB-m\fR \fIvalue\fR | auto |
{factory \fB-n\fR \fIslot-identifier\fR]} | {random [\fB-r\fR \fIprefix\fR]}]
[\fB-v\fR \fIvlan-id\fR] [\fB-p\fR \fIprop\fR=\fIvalue\fR[,...]] \fIvnic-link\fR
-\fBdladm delete-vnic\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] \fIvnic-link\fR
+\fBdladm delete-vnic\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] [\fB-z\fR \fIzonename\fR] \fIvnic-link\fR
\fBdladm show-vnic\fR [\fB-pP\fR] [\fB-s\fR [\fB-i\fR \fIinterval\fR]] [\fB-o\fR \fIfield\fR[,...]]
- [\fB-l\fR \fIlink\fR] [\fIvnic-link\fR]
+ [\fB-l\fR \fIlink\fR] [\fB-z\fR \fIzonename\fR] [\fIvnic-link\fR]
.fi
.LP
@@ -140,6 +142,14 @@ dladm \- administer data links
.LP
.nf
+\fBdladm create-overlay\fR [\fB-t\fR] \fB-e\fR \fIencap\fR \fB-s\fR \fIsearch\fR \fB-v\fR \fIvnetid\fR [\fB-p\fR \fIprop\fR=\fIvalue\fR[,...]] \fIoverlay\fR
+\fBdladm delete-overlay\fR \fIoverlay\fR
+\fBdladm modify-overlay\fR \fB-d\fR \fImac\fR | \fB-f\fR | \fB-s\fR \fImac=ip:port\fR \fIoverlay\fR
+\fBdladm show-overlay\fR [ \fB-f\fR | \fB-t\fR ] [[\fB-p\fR] \fB-o\fR \fIfield\fR[,...]] [\fIoverlay\fR]
+.fi
+
+.LP
+.nf
\fBdladm show-usage\fR [\fB-a\fR] \fB-f\fR \fIfilename\fR [\fB-p\fR \fIplotfile\fR \fB-F\fR \fIformat\fR] [\fB-s\fR \fItime\fR]
[\fB-e\fR \fItime\fR] [\fIlink\fR]
.fi
@@ -227,9 +237,9 @@ A WiFi datalink.
.ad
.sp .6
.RS 4n
-A virtual network interface created on a link or an \fBetherstub\fR. It is a
-pseudo device that can be treated as if it were an network interface card on a
-machine.
+A virtual network interface created on a link, an \fBetherstub\fR, or \fBan
+overlay\fR. It is a pseudo device that can be treated as if it were an network
+interface card on a machine.
.RE
.sp
@@ -297,6 +307,20 @@ use any alphanumeric characters, as well as underscore (\fB_\fR), period
characters.
.RE
+.sp
+.ne 2
+.na
+.B overlay
+.ad
+.sp .6
+.RS 4n
+An overlay instance, identified by an administratively-chosen name. An overlay
+can be used to create or join an existing software defined network.
+VNICs created on an overlay will appear to be connected by a local virtual
+switch and will also be connected to interfaces on matching overlays provided by
+other hosts. For more information on overlay devices, see \fBoverlay\fR(5).
+.RE
+
.SS "Options"
.LP
Each \fBdladm\fR subcommand has its own set of options. However, many of the
@@ -569,8 +593,7 @@ will be displayed only once.
.sp
.ne 2
.na
-\fB\fBdladm rename-link\fR [\fB-R\fR \fIroot-dir\fR] \fIlink\fR
-\fInew-link\fR\fR
+\fB\fBdladm rename-link\fR [\fB-R\fR \fIroot-dir\fR] [\fB-z\fR \fIzonename\fR] \fIlink\fR \fInew-link\fR\fR
.ad
.sp .6
.RS 4n
@@ -588,6 +611,16 @@ examples of how this subcommand is used.
See "Options," above.
.RE
+.sp
+.ne 2
+.na
+\fB\fB-z\fR \fIzonename\fR
+.ad
+.sp .6
+.RS 4n
+A link assigned to a zone can only be renamed while the zone is in the ready state.
+.RE
+
.RE
.sp
@@ -3193,8 +3226,7 @@ Extended output is displayed for \fBPTYPE\fR values of \fBcurrent\fR,
.sp
.ne 2
.na
-\fB\fBdladm set-linkprop\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] \fB-p\fR
-\fIprop\fR=\fIvalue\fR[,...] \fIlink\fR\fR
+\fB\fBdladm set-linkprop\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] [\fB-z\fR \fIzonename\fR] \fB-p\fR \fIprop\fR=\fIvalue\fR[,...] \fIlink\fR\fR
.ad
.sp .6
.RS 4n
@@ -3226,6 +3258,16 @@ See "Options," above.
.sp
.ne 2
.na
+\fB\fB-z\fR \fIzonenme\fR
+.ad
+.sp .6
+.RS 4n
+Operate on a link that has been delegated to the specified zone.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fB-p\fR \fIprop\fR=\fIvalue\fR[,...], \fB--prop\fR
\fIprop\fR=\fIvalue\fR[,...]\fR
.ad
@@ -3245,8 +3287,7 @@ same value.
.sp
.ne 2
.na
-\fB\fBdladm reset-linkprop\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] [\fB-p\fR
-\fIprop\fR,...] \fIlink\fR\fR
+\fB\fBdladm reset-linkprop\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] [\fB-z\fR \fIzonename\fR] [\fB-p\fR \fIprop\fR,...] \fIlink\fR\fR
.ad
.sp .6
.RS 4n
@@ -3278,6 +3319,16 @@ See "Options," above.
.sp
.ne 2
.na
+\fB\fB-z\fR \fIzonenme\fR
+.ad
+.sp .6
+.RS 4n
+Operate on a link that has been delegated to the specified zone.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fB-p\fR \fIprop, ...\fR, \fB--prop\fR=\fIprop, ...\fR\fR
.ad
.sp .6
@@ -3292,8 +3343,7 @@ the same value.
.sp
.ne 2
.na
-\fB\fBdladm show-linkprop\fR [\fB-P\fR] [[\fB-c\fR] \fB-o\fR
-\fIfield\fR[,...]][\fB-p\fR \fIprop\fR[,...]] [\fIlink\fR]\fR
+\fB\fBdladm show-linkprop\fR [\fB-P\fR] [\fB-z\fR \fIzonename\fR] [[\fB-c\fR] \fB-o\fR \fIfield\fR[,...]][\fB-p\fR \fIprop\fR[,...]] [\fIlink\fR]\fR
.ad
.sp .6
.RS 4n
@@ -3411,6 +3461,16 @@ Display persistent link property information
.sp
.ne 2
.na
+\fB\fB-z\fR \fIzonenme\fR
+.ad
+.sp .6
+.RS 4n
+Operate on a link that has been delegated to the specified zone.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fB-p\fR \fIprop, ...\fR, \fB--prop\fR=\fIprop, ...\fR\fR
.ad
.sp .6
@@ -3728,8 +3788,7 @@ A comma-separated list of properties to set to the specified values.
.sp
.ne 2
.na
-\fB\fBdladm delete-vnic\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR]
-\fIvnic-link\fR\fR
+\fB\fBdladm delete-vnic\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] [\fB-z\fR \fIzonename\fR] \fIvnic-link\fR\fR
.ad
.sp .6
.RS 4n
@@ -3755,13 +3814,22 @@ next reboot.
See "Options," above.
.RE
+.sp
+.ne 2
+.na
+\fB\fB-z\fR \fIzonenme\fR
+.ad
+.sp .6
+.RS 4n
+Operate on a link that has been delegated to the specified zone.
+.RE
+
.RE
.sp
.ne 2
.na
-\fB\fBdladm show-vnic\fR [\fB-pP\fR] [\fB-s\fR [\fB-i\fR \fIinterval\fR]]
-[\fB-o\fR \fIfield\fR[,...]] [\fB-l\fR \fIlink\fR] [\fIvnic-link\fR]\fR
+\fB\fBdladm show-vnic\fR [\fB-pP\fR] [\fB-s\fR [\fB-i\fR \fIinterval\fR]] [\fB-o\fR \fIfield\fR[,...]] [\fB-l\fR \fIlink\fR] [\fB-z\fR \fIzonename\fR] [\fIvnic-link\fR]\fR
.ad
.sp .6
.RS 4n
@@ -3904,6 +3972,16 @@ will be displayed only once.
Display information for all VNICs on the named link.
.RE
+.sp
+.ne 2
+.na
+\fB\fB-z\fR \fIzonenme\fR
+.ad
+.sp .6
+.RS 4n
+Operate on a link that has been delegated to the specified zone.
+.RE
+
.RE
.sp
@@ -4321,6 +4399,348 @@ The tunnel destination address.
.sp
.ne 2
.na
+\fBdladm create-overlay\fR \fB-e\fR \fIencap\fR \fB-s\fR \fIsearch\fR
+\fB-v\fR \fIvnetid\fR [\fB-p\fR \fIprop\fR=\fIvalue\fR[,...]] \fIoverlay\fR
+.ad
+.sp .6
+.RS 4n
+Create an overlay device named \fIoverlay\fR.
+.sp
+Overlay devices are similar to etherstubs. VNICs can be created on top
+of them. However, unlike an etherstub which is local to the system, an
+overlay device can be configured to communicate to remote hosts,
+providing a means for network virtualization. The way in which it does
+this is described by the encapsulation module and the search plugin. For
+more information on these, see \fBoverlay\fR(5).
+.sp
+An overlay device has a series of required and optional properties. These
+properties vary based upon the search and encapsulation modules and are fully
+specified in \fBoverlay\fR(5). Not every property needs to be specified - some
+have default values which will be used if nothing specific is specified. For
+example, the default port for VXLAN comes from its IANA standard. If a
+required property is missing, the command will fail and inform you of the
+missing properties.
+.sp
+.ne 2
+.na
+\fB\fB-t\fR, \fB--temporary\fR\fR
+.ad
+.sp .6
+.RS 4n
+Specifies that the overlay is temporary. Temporary overlays last until
+the next reboot.
+.RE
+
+.sp
+.ne 2
+.na
+\fB-e\fR \fIencap\fR, \fB--encap\fR=\fIencap\fR
+.ad
+.sp .6
+.RS 4n
+Use \fIencap\fR as the encapsulation plugin for the overlay device
+\fIoverlay\fR. The encapsulation plugin determines how packets are transformed
+before being put on the wire.
+.RE
+
+.sp
+.ne 2
+.na
+\fB-s\fR \fIsearch\fR, \fB--search\fR=\fIsearch\fR
+.ad
+.sp .6
+.RS 4n
+Use \fIsearch\fR as the search plugin for \fIoverlay\fR. The search plugin
+determines how non-local targets are found and where packets are directed to.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fB-p\fR \fIprop\fR=\fIvalue\fR,..., \fB--prop\fR
+\fIprop\fR=\fIvalue\fR,...\fR
+.ad
+.sp .6
+.RS 4n
+A comma-separated list of properties to set to the specified values.
+.RE
+
+.sp
+.ne 2
+.na
+\fB-v\fR \fIvnetid\fR, \fB--vnetid\fR=\fIvnetid\fR
+.ad
+.sp .6
+.RS 4n
+Sets the virtual networking identfier to \fIvnetid\fR. A virtual network
+identifier determines is similar to a VLAN identifier, in that it identifies a
+unique virtual network. All overlay devices on the system share the same space
+for the virtual network idenfifiter. However, the valid range of identifiers is
+determined by the encapsulation plugin specified by \fB-e\fR.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.na
+\fBdladm delete-overlay\fR \fIoverlay\fR
+.ad
+.sp .6
+.RS 4n
+Delete the specified overlay. This will fail if there are VNICs on top of the
+device.
+.RE
+
+.sp
+.ne 2
+.na
+\fBdladm modify-overlay\fR \fB-d\fR \fImac\fR | \fB-f\fR | \fB-s\fR \fImac=ip:port\fR \fIoverlay\fR
+.ad
+.sp .6
+.RS 4n
+Modifies the target tables for the specified overlay.
+.sp
+The different options allow for different ways of modifying the target table.
+One of \fB-d\fR, \fB-f\fR, and \fB-s\fR is required. This is not applicable for
+all kinds of overlay devices. For more information, see \fBoverlay\fR(5).
+.sp
+.ne 2
+.na
+\fB-d\fR \fImac\fR, \fB--delete-entry\fR=\fImac\fR
+.ad
+.sp .6
+.RS 4n
+Deletes the entry for \fImac\fR from the target table for \fIoverlay\fR. Note,
+if a lookup is pending or outstanding, this does not cancel it or stop it from
+updating the value.
+.RE
+
+.sp
+.ne 2
+.na
+\fB-f\fR, \fB--flush-table\fR
+.ad
+.sp .6
+.RS 4n
+Flushes all values in the target table for \fIoverlay\fR.
+.RE
+
+.sp
+.ne 2
+.na
+\fB-s\fR \fImac\fR=\fIvalue\fR, \fB--set-entry\fR=\fImac\fR=\fIvalue\fR
+.ad
+.sp .6
+.RS 4n
+Sets the value of \fIoverlay\fR's target table entry for \fImac\fR to the
+specified value. The specified value varies upon the encapsulation plugin. The
+value may be a combination of a MAC address, IP adress, and port. Generall, this
+looks like [\fImac\fR,][\fIIP\fR:][\fIport\fR]. If a component is the last one,
+then there is no need for a separator. eg. if just the MAC address or IP is
+needed, it would look like \fImac\fR and \fIIP\fR respectively.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.na
+\fBdladm show-overlay\fR [ \fB-f\fR | \fB-t\fR ] [[\fB-p\fR] \fB-o\fR \fIfield\fR[,...]] [\fIoverlay\fR]
+.ad
+.sp .6
+.RS 4n
+Shows overlay configuration (the default), internal target tables (\fB-t\fR), or
+the FMA state (\fB-f\fR), either for all overlays or the specified overlay.
+.sp
+By default (with neither \fB-f\fR or \fB-t\fR specified), the following fields
+will be displayed:
+.sp
+.ne 2
+.na
+\fB\fBLINK\fR\fR
+.ad
+.sp .6
+.RS 4n
+The name of the overlay.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBPROPERTY\fR\fR
+.ad
+.sp .6
+.RS 4n
+The name of the property.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBPERM\fR\fR
+.ad
+.sp .6
+.RS 4n
+The read/write permissions of the property. The value shown is one of \fBr-\fR
+or \fBrw\fR.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBVALUE\fR\fR
+.ad
+.sp .6
+.RS 4n
+The current property value. If the value is not set, it is shown as \fB--\fR.
+If it is unknown, the value is shown as \fB?\fR.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBDEFAULT\fR\fR
+.ad
+.sp .6
+.RS 4n
+The default value of the property. If the property has no default value,
+\fB--\fR is shown.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBPOSSIBLE\fR\fR
+.ad
+.sp .6
+.RS 4n
+A comma-separated list of the values the property can have. If the values span
+a numeric range, \fImin\fR - \fImax\fR might be shown as shorthand. If the
+possible values are unknown or unbounded, \fB--\fR is shown.
+.RE
+
+.sp
+When the \fB-f\fR option is displayed, the following fields will be displayed:
+.sp
+.ne 2
+.na
+\fB\fBLINK\fR\fR
+.ad
+.sp .6
+.RS 4n
+The name of the overlay.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBSTATUS\fR\fR
+.ad
+.sp .6
+.RS 4n
+Either \fBONLINE\fR or \fBDEGRADED\fR.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBDETAILS\fR\fR
+.ad
+.sp .6
+.RS 4n
+When the \fBoverlay\fR's status is \fBONLINE\fR, then this has the value
+\fB--\fR. Otherwise, when it is \fBDEGRADED\fR, this field provides a more
+detailed explanation as to why it's degraded.
+.RE
+
+.sp
+When the \fB-t\fR option is displayed, the following fields will be displayed:
+.sp
+.ne 2
+.na
+\fB\fBLINK\fR\fR
+.ad
+.sp .6
+.RS 4n
+The name of the overlay.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBTARGET\fR\fR
+.ad
+.sp .6
+.RS 4n
+The target MAC address of a table entry.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBDESTINATION\fR\fR
+.ad
+.sp .6
+.RS 4n
+The address that an encapsulated packet will be sent to when a packet has the
+address specified by \fBTARGET\fR.
+.RE
+
+The \fBshow-overlay\fR command supports the following options:
+
+.sp
+.ne 2
+.na
+\fB-f\fR, \fB--fma\fR
+.ad
+.sp .6
+.RS 4n
+Displays information about an overlay device's FMA state. For more
+information on the target table, see \fBoverlay\fR(5).
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fB-o\fR \fIfield\fR[,...], \fB--output\fR=\fIfield\fR\fR
+.ad
+.sp .6
+.RS 4n
+A case-insensitive, comma-separated list of output fields to display. The field
+name must be one of the fields listed above, or the special value \fBall\fR, to
+display all fields. The fields applicable to the \fB-o\fR option are limited to
+those listed under each output mode. For example, if using \fB-L\fR, only the
+fields listed under \fB-L\fR, above, can be used with \fB-o\fR.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fB-p\fR, \fB--parseable\fR\fR
+.ad
+.sp .6
+.RS 4n
+Display using a stable machine-parseable format. The \fB-o\fR option is
+required with \fB-p\fR. See "Parseable Output Format", below.
+.RE
+
+.sp
+.ne 2
+.na
+\fB-t\fR, \fB--target\fR
+.ad
+.sp .6
+.RS 4n
+Displays information about an overlay device's target table. For more
+information on the target table, see \fBoverlay\fR(5).
+.RE
+
+.RE
+
+.sp
+.ne 2
+.na
\fB\fBdladm show-usage\fR [\fB-a\fR] \fB-f\fR \fIfilename\fR [\fB-p\fR
\fIplotfile\fR \fB-F\fR \fIformat\fR] [\fB-s\fR \fItime\fR] [\fB-e\fR
\fItime\fR] [\fIlink\fR]\fR
@@ -5481,7 +5901,7 @@ Interface Stability Committed
.LP
\fBacctadm\fR(1M), \fBautopush\fR(1M), \fBifconfig\fR(1M), \fBipsecconf\fR(1M),
\fBndd\fR(1M), \fBpsrset\fR(1M), \fBwpad\fR(1M), \fBzonecfg\fR(1M),
-\fBattributes\fR(5), \fBieee802.3\fR(5), \fBdlpi\fR(7P)
+\fBattributes\fR(5), \fBieee802.3\fR(5), \fBoverlay\fR(5), \fBdlpi\fR(7P)
.SH NOTES
.LP
The preferred method of referring to an aggregation in the aggregation
diff --git a/usr/src/man/man1m/flowadm.1m b/usr/src/man/man1m/flowadm.1m
index b12ce6af96..c9d5b846d5 100644
--- a/usr/src/man/man1m/flowadm.1m
+++ b/usr/src/man/man1m/flowadm.1m
@@ -1,5 +1,6 @@
'\" te
.\" Copyright (c) 2009, Sun Microsystems, Inc. All Rights Reserved
+.\" Copyright (c) 2011, Joyent, Inc. All Rights Reserved
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
@@ -11,14 +12,14 @@ services, containers, and virtual machines
.LP
.nf
\fBflowadm show-flow\fR [\fB-pP\fR] [\fB-S\fR] [\fB-s\fR [\fB-i\fR \fIinterval\fR]] [\fB-l\fR \fIlink\fR]
- [\fB-o\fR \fIfield\fR[,...]] [\fIflow\fR]
+ [\fB-o\fR \fIfield\fR[,...]] [\fB-z\fR \fIzonename\fR] [\fIflow\fR]
.fi
.LP
.nf
-\fBflowadm add-flow\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] \fB-l\fR \fIlink\fR \fB-a\fR \fIattr\fR=\fIvalue\fR[,...]
- \fB-p\fR \fIprop\fR=\fIvalue\fR[,...] \fIflow\fR
-\fBflowadm remove-flow\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] {\fB-l\fR \fIlink\fR | \fIflow\fR}
+\fBflowadm add-flow\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] [\fB-z\fR \fIzonename\fR] \fB-l\fR \fIlink\fR
+ \fB-a\fR \fIattr\fR=\fIvalue\fR[,...] \fB-p\fR \fIprop\fR=\fIvalue\fR[,...] \fIflow\fR
+\fBflowadm remove-flow\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] [\fB-z\fR \fIzonename\fR] {\fB-l\fR \fIlink\fR | \fIflow\fR}
.fi
.LP
@@ -77,8 +78,7 @@ The following subcommands are supported:
.sp
.ne 2
.na
-\fB\fBflowadm show-flow\fR [\fB-pP\fR] [\fB-s\fR [\fB-i\fR \fIinterval\fR]]
-[\fB-o\fR \fIfield\fR[,...]] [\fB-l\fR \fIlink\fR] [\fIflow\fR]\fR
+\fB\fBflowadm show-flow\fR [\fB-pP\fR] [\fB-s\fR [\fB-i\fR \fIinterval\fR]] [\fB-o\fR \fIfield\fR[,...]] [\fB-l\fR \fIlink\fR] [\fB-z\fR \fIzonename\fR] [\fIflow\fR]\fR
.ad
.sp .6
.RS 4n
@@ -223,14 +223,22 @@ Display information for all flows on the named link or information for the
named flow.
.RE
+.sp
+.ne 2
+.na
+\fB\fB-z\fR \fIzonename\fR
+.ad
+.sp .6
+.RS 4n
+Operate on a link that has been delegated to the specified zone.
+.RE
+
.RE
.sp
.ne 2
.na
-\fB\fBflowadm add-flow\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] \fB-l\fR
-\fIlink\fR \fB-a\fR \fIattr\fR=\fIvalue\fR[,...] \fB-p\fR
-\fIprop\fR=\fIvalue\fR[,...] \fIflow\fR\fR
+\fB\fBflowadm add-flow\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] [\fB-z\fR \fIzonename\fR] \fB-l\fR \fIlink\fR \fB-a\fR \fIattr\fR=\fIvalue\fR[,...] \fB-p\fR \fIprop\fR=\fIvalue\fR[,...] \fIflow\fR\fR
.ad
.sp .6
.RS 4n
@@ -268,6 +276,16 @@ persistent creation.
.sp
.ne 2
.na
+\fB\fB-z\fR \fIzonename\fR
+.ad
+.sp .6
+.RS 4n
+Operate on a link that has been delegated to the specified zone.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fB-l\fR \fIlink\fR, \fB--link\fR=\fIlink\fR\fR
.ad
.sp .6
@@ -300,8 +318,7 @@ A comma-separated list of properties to be set to the specified values.
.sp
.ne 2
.na
-\fB\fBflowadm remove-flow\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] \fB-l\fR
-{\fIlink\fR | \fIflow\fR}\fR
+\fB\fBflowadm remove-flow\fR [\fB-t\fR] [\fB-R\fR \fIroot-dir\fR] [\fB-z\fR \fIzonename\fR] \fB-l\fR {\fIlink\fR | \fIflow\fR}\fR
.ad
.sp .6
.RS 4n
@@ -331,6 +348,16 @@ persistent removal.
.sp
.ne 2
.na
+\fB\fB-z\fR \fIzonename\fR
+.ad
+.sp .6
+.RS 4n
+Operate on a link that has been delegated to the specified zone.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fB-l\fR \fIlink\fR | \fIflow\fR, \fB--link\fR=\fIlink\fR | \fIflow\fR\fR
.ad
.sp .6
diff --git a/usr/src/man/man1m/mount_tmpfs.1m b/usr/src/man/man1m/mount_tmpfs.1m
index a4f38d3b1f..7ea64130bd 100644
--- a/usr/src/man/man1m/mount_tmpfs.1m
+++ b/usr/src/man/man1m/mount_tmpfs.1m
@@ -1,9 +1,10 @@
'\" te
.\" Copyright (c) 2003, Sun Microsystems, Inc. All Rights Reserved
+.\" Copyright 2015 Joyent, Inc.
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH MOUNT_TMPFS 1M "Nov 24, 2003"
+.TH MOUNT_TMPFS 1M "Mar 18, 2015"
.SH NAME
mount_tmpfs \- mount tmpfs file systems
.SH SYNOPSIS
@@ -13,7 +14,6 @@ mount_tmpfs \- mount tmpfs file systems
.fi
.SH DESCRIPTION
-.sp
.LP
\fBtmpfs\fR is a memory based file system which uses kernel resources relating
to the \fBVM\fR system and page cache as a file system.
@@ -26,13 +26,13 @@ remain hidden until the file system is once again unmounted. The attributes
(mode, owner, and group) of the root of the \fBtmpfs\fR filesystem are
inherited from the underlying \fImount_point\fR, provided that those attributes
are determinable. If not, the root's attributes are set to their default
-values.
+values. The mode may also be overriden by the \fBmode\fR mount option, which
+takes precedence if set.
.sp
.LP
The \fIspecial\fR argument is usually specified as \fBswap\fR but is in fact
disregarded and assumed to be the virtual memory resources within the system.
.SH OPTIONS
-.sp
.ne 2
.na
\fB\fB-o\fR \fIspecific_options\fR\fR
@@ -45,13 +45,40 @@ available:
.sp
.ne 2
.na
+\fB\fBremount\fR\fR
+.ad
+.sp .6
+.RS 19n
+Remounts a file system with a new size. A size not explicitly
+set with \fBremount\fR reverts to no limit.
+.RE
+
+.sp
+.ne 2
+.na
+\fBmode=\fIoctalmode\fR\fR
+.ad
+.RS 19n
+The \fImode\fR argument controls the permissions of the \fBtmpfs\fR mount
+point. The argument must be an octal number, of the form passed to
+\fBchmod\fR(1). Only the access mode, setuid, setgid, and sticky bits (a mask
+of \fB07777\fR) may be set. If this option is not provided then the default
+mode behaviour, as described above, applies.
+.RE
+
+.sp
+.sp
+.ne 2
+.na
\fBsize=\fIsz\fR\fR
.ad
.RS 19n
The \fIsz\fR argument controls the size of this particular \fBtmpfs\fR file
system. If the argument is has a `k' suffix, the number will be interpreted as
a number of kilobytes. An `m' suffix will be interpreted as a number of
-megabytes. No suffix is interpreted as bytes. In all cases, the actual size of
+megabytes. A `g' suffix will be interpreted as a number of gigabytes. A `%'
+suffix will be interpreted as a percentage of the swap space available to the
+zone. No suffix is interpreted as bytes. In all cases, the actual size of
the file system is the number of bytes specified, rounded up to the physical
pagesize of the system.
.RE
@@ -82,7 +109,6 @@ producing the error\f(CWdevice busy\fR.
.RE
.SH FILES
-.sp
.ne 2
.na
\fB\fB/etc/mnttab\fR\fR
@@ -92,13 +118,11 @@ Table of mounted file systems
.RE
.SH SEE ALSO
-.sp
.LP
\fBmount\fR(1M), \fBmkdir\fR(2), \fBmount\fR(2), \fBopen\fR(2),
\fBumount\fR(2), \fBmnttab\fR(4), \fBattributes\fR(5), \fBfsattr\fR(5),
\fBtmpfs\fR(7FS)
.SH NOTES
-.sp
.LP
If the directory on which a file system is to be mounted is a symbolic link,
the file system is mounted on the directory to which the symbolic link refers,
diff --git a/usr/src/man/man1m/prstat.1m b/usr/src/man/man1m/prstat.1m
index a5f02621cf..08e2ce9907 100644
--- a/usr/src/man/man1m/prstat.1m
+++ b/usr/src/man/man1m/prstat.1m
@@ -1,6 +1,7 @@
'\" te
.\" Copyright (c) 2013 Gary Mills
.\" Copyright (c) 2006, 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2013, Joyent, Inc. All Rights Reserved.
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
.\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with
.\" the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
@@ -10,10 +11,10 @@ prstat \- report active process statistics
.SH SYNOPSIS
.LP
.nf
-\fBprstat\fR [\fB-acHJLmRrtTvWZ\fR] [\fB-d\fR u | d] [\fB-C\fR \fIpsrsetlist\fR] [\fB-h\fR \fIlgrplist\fR]
+\fBprstat\fR [\fB-acHJLmRrtTvVWZ\fR] [\fB-d\fR u | d] [\fB-C\fR \fIpsrsetlist\fR] [\fB-h\fR \fIlgrplist\fR]
[\fB-j\fR \fIprojlist\fR] [\fB-k\fR \fItasklist\fR] [\fB-n\fR \fIntop\fR[,\fInbottom\fR]]
[\fB-p\fR \fIpidlist\fR] [\fB-P\fR \fIcpulist\fR] [\fB-s\fR \fIkey\fR | \fB-S\fR \fIkey\fR ]
- [\fB-u\fR \fIeuidlist\fR] [\fB-U\fR \fIuidlist\fR] [\fB-z\fR \fIzoneidlist\fR]
+ [\fB-u\fR \fIeuidlist\fR] [\fB-U\fR \fIuidlist\fR] [\fB-z\fR \fIzoneidlist\fR] [\fB-Z\fR]
[\fIinterval\fR [\fIcount\fR]]
.fi
@@ -366,6 +367,18 @@ with the \fB-\fR sign.
.sp
.ne 2
.na
+\fB\fB-V\fR\fR
+.ad
+.sp .6
+.RS 4n
+Report accurate aggregated SWAP and RSS values when used with the \fB-J\fR,
+\fB-t\fR, \fB-T\fR or \fB-Z\fR options. This uses an accurate, but more expensive,
+calculation to determine the aggregated values for the specified grouping.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fB-W\fR\fR
.ad
.sp .6
@@ -447,9 +460,11 @@ devices, in kilobytes (\fBK\fR), megabytes (\fBM\fR), or gigabytes (\fBG\fR).
.RS 4n
The resident set size of the process (\fBRSS\fR), in kilobytes (\fBK\fR),
megabytes (\fBM\fR), or gigabytes (\fBG\fR). The RSS value is an estimate
-provided by \fBproc\fR(4) that might underestimate the actual resident set
-size. Users who want to get more accurate usage information for capacity
-planning should use the \fB-x\fR option to \fBpmap\fR(1) instead.
+provided by \fBproc\fR(4) that might underestimate the actual
+per-process resident set size and usually overestimates the aggregated
+resident set size. Users who want to get more accurate usage information for
+capacity planning should use either the \fB-V\fR option or, for per-process
+results, the \fB-x\fR option to \fBpmap\fR(1) instead.
.RE
.sp
diff --git a/usr/src/man/man1m/reboot.1m b/usr/src/man/man1m/reboot.1m
index 1ff92b6f33..8eadedf18d 100644
--- a/usr/src/man/man1m/reboot.1m
+++ b/usr/src/man/man1m/reboot.1m
@@ -144,8 +144,7 @@ This option is currently available only on x86 systems. The \fB-p\fR and
.ad
.sp .6
.RS 4n
-Quick. Reboot quickly and ungracefully, without shutting down running processes
-first.
+Quick. Reboot quickly without halting running zones first.
.RE
.SH OPERANDS
diff --git a/usr/src/man/man1m/snoop.1m b/usr/src/man/man1m/snoop.1m
index ca969e22b4..4184c66e10 100644
--- a/usr/src/man/man1m/snoop.1m
+++ b/usr/src/man/man1m/snoop.1m
@@ -1,9 +1,10 @@
'\" te
.\" Copyright (C) 2009, Sun Microsystems, Inc. All Rights Reserved
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH SNOOP 1M "Feb 18, 2009"
+.TH SNOOP 1M "Feb 24, 2014"
.SH NAME
snoop \- capture and inspect network packets
.SH SYNOPSIS
@@ -12,7 +13,7 @@ snoop \- capture and inspect network packets
\fBsnoop\fR [\fB-aqrCDINPSvV\fR] [\fB-t\fR [r | a | d]] [\fB-c\fR \fImaxcount\fR]
[\fB-d\fR \fIdevice\fR] [\fB-i\fR \fIfilename\fR] [\fB-n\fR \fIfilename\fR] [\fB-o\fR \fIfilename\fR]
[\fB-p\fR \fIfirst\fR [, \fIlast\fR]] [\fB-s\fR \fIsnaplen\fR] [\fB-x\fR \fIoffset\fR [, \fIlength\fR]]
- [\fIexpression\fR]
+ [\fB-z\fR \fIzonename\fR] [\fIexpression\fR]
.fi
.SH DESCRIPTION
@@ -298,6 +299,22 @@ the whole packet, use an \fIoffset\fR of 0. If a \fIlength\fR value is not
provided, the rest of the packet is displayed.
.RE
+.sp
+.ne 2
+.na
+.BI -z zonename
+.ad
+.sp .6
+.RS 4n
+Open an earlier datalink specified via
+.B -d
+or
+.B -I
+in the specified zone \fIzonename\fR.
+This option is only meaningful in the global zone and
+allows the global zone to inspect datalinks of non-global zones.
+.RE
+
.SH OPERANDS
.sp
.ne 2
diff --git a/usr/src/man/man1m/svc.startd.1m b/usr/src/man/man1m/svc.startd.1m
index 7c80c35e23..103c6b5fec 100644
--- a/usr/src/man/man1m/svc.startd.1m
+++ b/usr/src/man/man1m/svc.startd.1m
@@ -4,7 +4,7 @@
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH SVC.STARTD 1M "Mar 18, 2011"
+.TH SVC.STARTD 1M "Aug 23, 2012"
.SH NAME
svc.startd \- Service Management Facility master restarter
.SH SYNOPSIS
@@ -372,10 +372,13 @@ properties listed below in the \fBstartd\fR property group.
.RS 4n
The \fBcritical_failure_count\fR and \fBcritical_failure_period\fR properties
together specify the maximum number of service failures allowed in a given
-time interval before \fBsvc.startd\fR transitions the service to maintenance.
+number of seconds before \fBsvc.startd\fR transitions the service to
+maintenance.
If the number of failures exceeds \fBcritical_failure_count\fR in any period of
\fBcritical_failure_period\fR seconds, \fBsvc.startd\fR will transition the
-service to maintenance.
+service to maintenance. The \fBcritical_failure_count\fR value is limited
+to the range 1-10 and defaults to 10. The \fBcritical_failure_period\fR
+defaults to 600 seconds.
.RE
.sp
diff --git a/usr/src/man/man1m/tunefs.1m b/usr/src/man/man1m/tunefs.1m
index 7f522f43fa..0b849f2dd7 100644
--- a/usr/src/man/man1m/tunefs.1m
+++ b/usr/src/man/man1m/tunefs.1m
@@ -3,7 +3,7 @@
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH TUNEFS 1M "Dec 5, 2003"
+.TH TUNEFS 1M "Sep 19, 2013"
.SH NAME
tunefs \- tune an existing UFS file system
.SH SYNOPSIS
@@ -120,3 +120,9 @@ encountering files greater than or equal to 2 Gbyte ( 2^31 bytes).
.sp
.LP
\fBmkfs_ufs\fR(1M), \fBnewfs\fR(1M), \fBattributes\fR(5), \fBlargefile\fR(5)
+
+.\" Take this out and a Unix Demon will dog your steps from now until
+.\" the time_t's wrap around.
+.SH BUGS
+.sp
+You can tune a file system, but you can't tune a fish.
diff --git a/usr/src/man/man1m/vfsstat.1m b/usr/src/man/man1m/vfsstat.1m
new file mode 100644
index 0000000000..aef8431a09
--- /dev/null
+++ b/usr/src/man/man1m/vfsstat.1m
@@ -0,0 +1,213 @@
+'\" te
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\" Copyright 2014 Joyent, Inc. All rights reserved.
+.\"
+.TH "VFSSTAT" "1m" "May 1, 2014" "" ""
+.
+.SH "NAME"
+\fBvfsstat\fR \-\- Report VFS read and write activity
+.
+.SH "SYNOPSIS"
+.
+.nf
+vfsstat [\-hIMrzZ] [interval [count]]
+.
+.fi
+.
+.SH "DESCRIPTION"
+The vfsstat utility reports a summary of VFS read and write
+activity per zone\. It first prints all activity since boot, then
+reports activity over a specified interval\.
+.
+.P
+When run from a non\-global zone (NGZ), only activity from that NGZ
+can be observed\. When run from a the global zone (GZ), activity
+from the GZ and all other NGZs can be observed\.
+.
+.P
+This tool is convenient for examining I/O performance as
+experienced by a particular zone or application\. Other tools
+which examine solely disk I/O do not report reads and writes which
+may use the filesystem\'s cache\. Since all read and write system
+calls pass through the VFS layer, even those which are satisfied
+by the filesystem cache, this tool is a useful starting point when
+looking at a potential I/O performance problem\. The vfsstat
+command reports the most accurate reading of I/O performance as
+experienced by an application or zone\.
+.
+.P
+One additional feature is that ZFS zvol performance is also reported
+by this tool, even though zvol I/O does not go through the VFS
+layer\. This is done so that this single tool can be used to monitor
+I/O performance and because its not unreasonable to think of zvols
+as being included along with other ZFS filesystems\.
+.
+.P
+The calculations and output fields emulate those from iostat(1m)
+as closely as possible\. When only one zone is actively performing
+disk I/O, the results from iostat(1m) in the global zone and
+vfsstat in the local zone should be almost identical\. Note that
+many VFS read operations may be handled by the filesystem cache,
+so vfsstat and iostat(1m) will be similar only when most
+operations require a disk access\.
+.
+.P
+As with iostat(1m), a result of 100% for VFS read and write
+utilization does not mean that the VFS layer is fully saturated\.
+Instead, that measurement just shows that at least one operation
+was pending over the last interval of time examined\. Since the
+VFS layer can process more than one operation concurrently, this
+measurement will frequently be 100% but the VFS layer can still
+accept additional requests\.
+.
+.SH "OUTPUT"
+The vfsstat utility reports the following information:
+.
+.IP "" 4
+.
+.nf
+r/s
+.RS
+reads per second
+.RE
+
+w/s
+.RS
+writes per second
+.RE
+
+kr/s
+.RS
+kilobytes read per second
+.RE
+
+kw/s
+.RS
+kilobytes written per second
+.RE
+
+ractv
+.RS
+average number of read operations actively being serviced by the VFS layer
+.RE
+
+wactv
+.RS
+average number of write operations actively being serviced by the VFS layer
+.RE
+
+read_t
+.RS
+average VFS read latency, in microseconds
+.RE
+
+writ_t
+.RS
+average VFS write latency, in microseconds
+.RE
+
+%r
+.RS
+percent of time there is a VFS read operation pending
+.RE
+
+%w
+.RS
+percent of time there is a VFS write operation pending
+.RE
+
+d/s
+.RS
+VFS operations per second delayed by the ZFS I/O throttle
+.RE
+
+del_t
+.RS
+average ZFS I/O throttle delay, in microseconds
+.RE
+.
+.fi
+.
+.IP "" 0
+.
+.SH "OPTIONS"
+The following options are supported:
+.
+.P
+\-h
+.RS
+Show help message and exit
+.RE
+.
+.P
+\-I
+.RS
+Print results per interval, rather than per second (where applicable)
+.RE
+.
+.P
+\-M
+.RS
+Print results in MB/s instead of KB/s
+.RE
+.
+.P
+\-r
+.RS
+Show results in a comma\-separated format
+.RE
+.
+.P
+\-z
+.RS
+Hide zones with no VFS activity
+.RE
+.
+.P
+\-Z
+.RS
+Print results for all zones, not just the current zone
+.RE
+.
+.SH "OPERANDS"
+interval
+.
+.P
+Specifies the length in seconds to pause between each interval
+report\. If not specified, vfsstat will print a summary since boot
+and exit\.
+.
+.P
+count
+.
+.P
+Specifies the number of intervals to report\. Defaults to
+unlimited if not specified\.
+.
+.SH "SEE ALSO"
+.
+.nf
+iostat(1m), ziostat(1m), mpstat(1m)
+.
+.fi
+.
+.SH "NOTES"
+This command does not examine readdir or any other VFS operations,
+only read and write operations\.
+.
+.P
+This command does not look at network I/O, only I/O operations to
+or from a file\.
+.
+.P
+The output format from vfsstat may change over time; use the
+comma\-separated output for a stable output format\.
diff --git a/usr/src/man/man1m/vndadm.1m b/usr/src/man/man1m/vndadm.1m
new file mode 100644
index 0000000000..253518a88a
--- /dev/null
+++ b/usr/src/man/man1m/vndadm.1m
@@ -0,0 +1,651 @@
+'\" te
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\"
+.TH VNDADM 1M "Mar 06, 2014"
+.SH NAME
+vndadm \- administer vnd devices
+
+.SH SYNOPSIS
+
+.nf
+vndadm create [-z zonename] [-l datalink] device
+vndadm destroy [-z zonename] device...
+vndadm list [-p] [-d delim] [-o field,...] [-z zonename] [device]...
+vndadm get [-p] [-d delim] [-z zonename] device [prop]...
+vndadm set [-z zonename] device prop=val...
+.fi
+
+.SH DESCRIPTION
+.sp
+.LP
+The vndadm command is used to administer vnd devices. A vnd device is
+similar to an IP network interface, except that the vnd device operates
+at layer two. A vnd device is created over a data link (see dladm(1M))
+and its address is that of the underlying data link. For ethernet based
+devices, that address would be the MAC address of the data link. vnd
+devices are character devices which may be used to send and receive
+layer two packets. When reading or writing to a vnd device, the full
+frame must be present. This is useful for working with virtual machines,
+or other environments where you need to manipulate the entire layer two
+frame.
+
+.sp
+.LP
+Every command takes a device as an argument. To specify a vnd device,
+you just use the name of the device. Devices are scoped to zones. If no
+zone is specified, the current zone is assumed. A device name can be any
+series of alphanumeric ascii characters which typically match the name
+of the underlying data link. A given vnd device name must be unique in a
+given zone, but the same name can be used across zones.
+.sp
+.SH OPTIONS
+.sp
+.LP
+All vndadm subcommands have the following common option:
+.sp
+.ne 2
+.na
+-z zonename
+.ad
+.sp .6
+.RS 4n
+Operate in the context of the specified zone. When creating a vnd
+device, the named device is created in the specified zone. All other
+operations scope the device lookup to the specified zone. If the user is
+not in the global zone, the use of -z will not work.
+
+.sp
+.LP
+When -z is used and multiple devices are specified, then
+the use of -z applies to all of the devices.
+.RE
+
+.SH SUBCOMMANDS
+.sp
+.ne 2
+.na
+vndadm create [-z zonename] [-l datalink] device
+.ad
+.sp
+.RS 4n
+Creates a vnd device with the specified name device. If -l datalink is
+not specified, it is assumed that the data link and the device share the
+same name. The created device will exist for as long as the zone exists
+or until a call to vndadm destroy. vnd devices do not persist across
+system reboots. Note, if an IP interface or another libdlpi(3LIB)
+consumer is already using the data link, then vnd will fail.
+
+.sp
+The maximum length of the name of device is 31 characters. The allowed
+set of characters is alphanumberic characters, ':', \'-', and \'_'. The
+names 'zone' and 'ctl' are reserved and may not be used.
+
+.sp
+.ne 2
+.na
+-l datalink
+.ad
+.sp .6
+.RS 4n
+Specifies the name of the data link to create the device over. This
+allows the vnd device name to be different from the data link's name.
+.RE
+.sp
+.ne 2
+.na
+-z zonename
+.ad
+.sp .6
+.RS 4n
+See OPTIONS above.
+.RE
+
+.RE
+
+.sp
+.ne 2
+.na
+vndadm destroy [-z zonename] device...
+.ad
+.sp
+.RS 4n
+Destroys the specified device. The destruction is analogous to
+unlink(2). If the device is still open and used by applications, the
+device will continue to exist, but it will no longer be accessible by
+the name device.
+.sp
+.ne 2
+.na
+-z zonename
+.ad
+.sp .6
+.RS 4n
+See OPTIONS above.
+.RE
+.RE
+
+.sp
+.ne 2
+.na
+vndadm list [-p] [-d delim] [-o field,...] [-z zonename] [device]...
+.ad
+.sp
+.RS 4n
+Lists active vnd devices. By default, vnadm list lists all devices in
+every zone that the caller is allowed to see; the current zone if in the
+non-global zone, and all zones in the global zone. If device is
+specified one or more times, then output will be limited to the
+specified devices.
+.sp
+.ne 2
+.na
+-o field[,...]
+.ad
+.sp .6
+.RS 4n
+A case-insensitive, comma-separated list of output fields. When -o is
+not used, all of the fields listed below are shown. The field name must
+be one of the following fields:
+
+.sp
+.ne 2
+.na
+NAME
+.ad
+.sp .6
+.RS 4n
+The name of the vnd device.
+.RE
+
+.sp
+.ne 2
+.na
+DATALINK
+.ad
+.sp .6
+.RS 4n
+The name of the data link the vnd device was created over.
+.RE
+
+.sp
+.ne 2
+.na
+ZONENAME
+.ad
+.sp .6
+.RS 4n
+The name of the zone that the vnd device exists in.
+.RE
+.RE
+
+.sp
+.ne 2
+.na
+-p
+.ad
+.sp .6
+.RS 4n
+Display the output in a stable machine parseable format. The -o option
+is required with the -p option. See "Parseable Output Format" below.
+.RE
+
+.sp
+.ne 2
+.na
+-d delim
+.ad
+.sp .6
+.RS 4n
+Change the delimiter used in conjunction with generating parseable
+output. This option may only be specified when -p is also specified.
+.RE
+
+.sp
+.ne 2
+.na
+-z zonename
+.ad
+.sp .6
+.RS 4n
+See OPTIONS above.
+.RE
+
+.RE
+
+
+.sp
+.ne 2
+.na
+vndadm get [-p] [-d delim] [-z zonename] device [prop]...
+.ad
+.sp
+.RS 4n
+Displays the properties for the specified device. By default, all
+properties of a given device are displayed. If prop is specified one or
+more times, then only the specified properties will be displayed for
+device. For a list of properties, see the section "Properties" below.
+The property output consists of the following four columns:
+.sp
+.ne 2
+.na
+LINK
+.ad
+.sp .6
+.RS 4n
+The name of the device
+.RE
+
+.sp
+.ne 2
+.na
+PROPERTY
+.ad
+.sp .6
+.RS 4n
+The name of the property. Note that some properties that are private to
+the implementation may be displayed. Those properties begin with a
+leading underscore.
+.RE
+
+.sp
+.ne 2
+.na
+PERM
+.ad
+.sp .6
+.RS 4n
+Describes whether the property is read-only or
+if it is read-write. This field does not
+indicate if the current user has permission, but
+lists permissions for a privileged user.
+.RE
+
+.sp
+.ne 2
+.na
+VALUE
+.ad
+.sp .6
+.RS 4n
+The value of the property.
+.RE
+
+.sp
+.ne 2
+.na
+-p
+.ad
+.sp .6
+.RS 4n
+Display the output in a stable machine parseable format. See "Parseable
+Output Format" below.
+.RE
+
+.sp
+.ne 2
+.na
+-d delim
+.ad
+.sp .6
+.RS 4n
+Change the delimiter used in conjunction with generating parseable
+output. This option may only be specified when -p is also specified.
+.RE
+
+.sp
+.ne 2
+.na
+-z zonename
+.ad
+.sp .6
+.RS 4n
+See OPTIONS above.
+.RE
+.RE
+
+.sp
+.ne 2
+.na
+vndadm set [-z zonename] device prop=val...
+.ad
+.sp
+.RS 4n
+Sets properties on the named device. Setting a property takes effect for
+all operations on the device, after the program returns. Multiple
+properties can be set at once; however, properties are applied one at a
+time to the device. Property names and values must be separated with an
+equals sign. Additional property and value pairs should be separated by
+white space. For a list of properties, see the section "Properties"
+below.
+
+.sp
+.ne 2
+.na
+-z zonename
+.ad
+.sp .6
+.RS 4n
+See OPTIONS above.
+.RE
+.RE
+
+.SS Parseable Output Format
+.sp
+.LP
+The default output for parseable data is to be separated with a single
+ascii space character. The delimiter may be changed with the -d
+option. When parseable output is requested, no numbers that represent
+sizes will be displayed in human readable form, they will be fully
+expanded. eg. the number 42K will instead be 43008.
+
+.SS Properties
+.sp
+.LP
+The following are supported and stable properties. Note that any
+properties that starts with a leading underscore are not a stable
+property and may be removed at any time.
+
+.sp
+.ne 2
+.na
+rxbuf
+.ad
+.sp .6
+.RS 4n
+A read/write property that controls the size of the receive buffer for
+the device. All received data enters the receive buffer until a consumer
+consumes it. If adding a received frame would exceed the size of the
+receive buffer, then that frame will be dropped. The maximum size of the
+buffer is limited by the 'maxsize' property. The minimum size of the
+buffer is the value of the 'maxtu' property. The property's value may be
+anything between that maximum and minimum. When setting this property,
+standard size suffixes such as 'K' and 'M' may be used.
+.RE
+
+.sp
+.ne 2
+.na
+txbuf
+.ad
+.sp .6
+.RS 4n
+A read/write property that controls the size of the transmit buffer. All
+in-flight transmitted data must be able to fit into the transmit buffer
+to account for potential flow control events. If there is not enough
+space in the transmit buffer, transmit related I/O operations will
+either block or fail based on whether the file has been put into
+non-blocking mode by setting O_NONBLOCK or O_NDELAY with fcntl(2). The
+maximum size of the buffer is limited by the 'maxsize' property. The
+minimum size of the buffer is the value of the 'maxtu' property. The
+property's value may be anything between that maximum and minimum. When
+setting this property, standard size suffixes such as 'K' and 'M' may be
+used.
+
+.RE
+
+.sp
+.ne 2
+.na
+maxsize
+.ad
+.sp .6
+.RS 4n
+A read-only property that describes the maximum size of buffers in the
+system. Properties such as rxbuf and txbuf cannot be set beyond this.
+.RE
+
+.sp
+.ne 2
+.na
+mintu
+.ad
+.sp .6
+.RS 4n
+A read-only property that describes the minimum size of a frame
+transmitted to the underlying data link. Note that the minimum listed
+here may be less than the size of a valid layer two frame and therefore
+may be dropped. A frame smaller than this value will be rejected by vnd.
+.RE
+
+.sp
+.ne 2
+.na
+maxtu
+.ad
+.sp .6
+.RS 4n
+A read-only property that describes the maximum size of a frame
+transmitted to the underlying data link. A frame larger than this value
+will be rejected by vnd.
+.RE
+
+.SH EXAMPLES
+.LP
+Example 1 Creating a vnd device
+.sp
+.LP
+To create a vnd device over the VNIC named net0, enter the following
+command:
+
+.sp
+.in +2
+.nf
+# vndadm create net0
+.fi
+.in -2
+.sp
+
+.LP
+Example 2 Creating a vnd device in another zone
+.sp
+.LP
+
+To create a vnd device over the VNIC named net1 in the zone
+1b7155a4-aef9-e7f0-d33c-9705e4b8b525, enter the following command:
+
+.sp
+.in +2
+.nf
+# vndadm create -z 1b7155a4-aef9-e7f0-d33c-9705e4b8b525 net1
+.fi
+.in -2
+.sp
+
+.LP
+Example 3 Destroying a vnd device
+.sp
+.LP
+
+To destroy the vnd device named net0, enter the following command:
+
+.sp
+.in +2
+.nf
+# vndadm destroy net0
+.fi
+.in -2
+.sp
+
+.LP
+Example 4 Destroying a vnd device in another zone
+.sp
+.LP
+
+To destroy the vnd device named net1 in the zone
+1b7155a4-aef9-e7f0-d33c-9705e4b8b525, enter the following command:
+
+.sp
+.in +2
+.nf
+# vndadm destroy -z 1b7155a4-aef9-e7f0-d33c-9705e4b8b525 net1
+.fi
+.in -2
+.sp
+
+.LP
+Example 5 List all vnd devices
+.sp
+.LP
+
+To list all devices, run the following command:
+
+.sp
+.in +2
+.nf
+# vndadm list
+NAME DATALINK ZONENAME
+net0 net0 global
+net0 net0 1b7155a4-aef9-e7f0-d33c-9705e4b8b525
+.fi
+.in -2
+.sp
+
+.LP
+Example 6 Listing devices in a specific zone
+.sp
+.LP
+
+To list devices in a specific zone, run the following command:
+
+.sp
+.in +2
+.nf
+# vndadm list -z 1b7155a4-aef9-e7f0-d33c-9705e4b8b525
+
+NAME DATALINK ZONENAME
+net0 net0 1b7155a4-aef9-e7f0-d33c-9705e4b8b525
+.fi
+.in -2
+.sp
+
+.LP
+Example 7 List all devices in a parseable format
+.sp
+.LP
+
+To list all devices in a parseable format with the delimiter of ':', run
+the following command:
+
+.sp
+.in +2
+.nf
+# vndadm list -p -d: -o name,datalink,zone
+net0:net0:global
+net0:net0:1b7155a4-aef9-e7f0-d33c-9705e4b8b525
+.fi
+.in -2
+.sp
+
+.LP
+Example 8 Retrieving all properties for a device
+.sp
+.LP
+
+To retrieve all of the properties for the vnd device foo0, run the
+following command:
+
+.sp
+.in +2
+.nf
+# vndadm get foo0
+LINK PROPERTY PERM VALUE
+foo0 rxbuf rw 65536
+foo0 txbuf rw 65536
+foo0 maxsize r- 4194304
+foo0 mintu r- 0
+foo0 maxtu r- 1518
+foo0 _nflush rw 10
+foo0 _burstsz rw 10
+.fi
+.in -2
+.sp
+
+.LP
+Example 9 Retrieving specific properties for a device
+.sp
+.LP
+
+To retrieve just the rxbuf and txbuf properties for the vnd device foo0,
+run the following command:
+
+.sp
+.in +2
+.nf
+# vndadm get foo0 rxbuf txbuf
+LINK PROPERTY PERM VALUE
+foo0 rxbuf rw 65536
+foo0 txbuf rw 65536
+.fi
+.in -2
+.sp
+
+.LP
+Example 10 Retrieving properties for a device in a parseable format
+.sp
+.LP
+
+To retrieve all properties for the vnd device foo0 in a parseable
+format, run the following command:
+
+.sp
+.in +2
+.nf
+# vndadm get -p foo0
+foo0 rxbuf rw 65536
+foo0 txbuf rw 65536
+foo0 maxsize r- 4194304
+foo0 mintu r- 0
+foo0 maxtu r- 1518
+foo0 _nflush rw 10
+foo0 _burstsz rw 10
+.fi
+.in -2
+.sp
+
+.LP
+Example 11 Setting a property on a device
+.sp
+.LP
+
+To set the receive buffer size to one megabyte on the device foo0, run
+the following command:
+
+.sp
+.in +2
+.nf
+# vndadm set foo0 rxbuf=1M
+.fi
+.in -2
+.sp
+
+.LP
+Example 12 Setting multiple properties on a device
+.sp
+.LP
+
+To set the transmit buffer to 300 Kb and the receive buffer to 1 Mb, run
+the following command:
+
+.sp
+.in +2
+.nf
+# vndadm set foo0 rxbuf=300K txbuf=1M
+.fi
+.in -2
+.sp
+
+.SH SEE ALSO
+
+dladm(1M), ipadm(1M), fcntl(2), fcntl.h(3HEAD), libvnd(3LIB),
+vndstat(1M), vnd(7D)
diff --git a/usr/src/man/man1m/vndstat.1m b/usr/src/man/man1m/vndstat.1m
new file mode 100644
index 0000000000..a7f843e228
--- /dev/null
+++ b/usr/src/man/man1m/vndstat.1m
@@ -0,0 +1,163 @@
+'\" te
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\"
+.TH VNDSTAT 1M "Mar 06, 2014"
+.SH NAME
+vndstat \- report vnd activity
+
+.SH SYNOPSIS
+
+vndstat [interval [count]]
+
+.SH DESCRIPTION
+.sp
+.LP
+The vndstat command reports a summary of per-device vnd
+activity. Once per interval it prints a table of statistics per
+device. In the global zone, vndstat reports on all devices in the
+system. From the non-global zone, it only reports on devices that are
+present in that zone. vndstat reports on all vnd devices
+that exist, including anonymous devices which are not linked into the
+file system.
+.sp
+.LP
+The vndstat command's output includes the following information:
+.sp
+.ne 2
+.na
+.B name
+.ad
+.RS 14n
+The name of the device, if bound. If a given vnd device is not
+bound into the file system, hence considered anonymous, then there will
+be no name for the device.
+.RE
+
+.sp
+.ne 2
+.na
+.B rx B/s
+.ad
+.RS 14n
+The number of bytes received by the device during interval.
+.RE
+
+.sp
+.ne 2
+.na
+.B tx B/s
+.ad
+.RS 14n
+The number of bytes transmitted by the device during interval.
+.RE
+
+.sp
+.ne 2
+.na
+.B drops
+.ad
+.RS 14n
+The number of packets and messages which have been dropped. This
+includes all drops due to insufficient buffer space, IP hooks, and
+unknown or malformed DLPI messages.
+.RE
+
+.sp
+.ne 2
+.na
+.B txfc
+.ad
+.RS 14n
+The number of flow control events that have occurred. A flow control
+event occurs when the layers below vnd request that all transmits
+be paused until a future call resumes the flow. This statistic is
+incremented when the flow is resumed. It is not incremented when it is
+first paused.
+.RE
+
+.sp
+.ne 2
+.na
+.B zone
+.ad
+.RS 14n
+The name of the zone the device is located in.
+.RE
+
+.SH OPTIONS
+
+.sp
+.ne 2
+.na
+interval
+.ad
+.RS 13n
+Report once each interval seconds. interval may not be
+fractional.
+.RE
+
+.sp
+.ne 2
+.na
+count
+.ad
+.RS 13n
+Only print count reports, then exit.
+.RE
+.sp
+.LP
+When no arguments are given to vndstat, it will always print at an
+interval of one second. Reports will continue until vndstat
+is terminated.
+
+.SH EXAMPLES
+.LP
+Example 1 Print five seconds of data
+
+.sp
+.in +2
+.nf
+example% vndstat 1 5
+ name | rx B/s | tx B/s | drops txfc | zone
+ net0 | 1.45MB/s | 14.1KB/s | 0 0 | 1b7155a4-aef9-e7f0-d33c-9705e4b8b525
+ net0 | 3.50MB/s | 19.5KB/s | 0 0 | 1b7155a4-aef9-e7f0-d33c-9705e4b8b525
+ net0 | 2.83MB/s | 30.8KB/s | 0 0 | 1b7155a4-aef9-e7f0-d33c-9705e4b8b525
+ net0 | 3.08MB/s | 30.6KB/s | 0 0 | 1b7155a4-aef9-e7f0-d33c-9705e4b8b525
+ net0 | 3.21MB/s | 30.6KB/s | 0 0 | 1b7155a4-aef9-e7f0-d33c-9705e4b8b525
+.fi
+.in -2
+.sp
+
+.SH ATTRIBUTES
+.sp
+.LP
+See attributes(5) for descriptions of the following attributes:
+.sp
+
+.sp
+.TS
+box;
+c | c
+l | l .
+ATTRIBUTE TYPE ATTRIBUTE VALUE
+_
+Interface Stability See below.
+.TE
+
+.sp
+.LP
+Invocation is evolving. Human readable output is unstable.
+.SH SEE ALSO
+
+dlstat(1M), nicstat(1M), vndadm(1M), vnd(7M)
diff --git a/usr/src/man/man1m/zfs.1m b/usr/src/man/man1m/zfs.1m
index 7c28cd8c79..ddf7eb908b 100644
--- a/usr/src/man/man1m/zfs.1m
+++ b/usr/src/man/man1m/zfs.1m
@@ -23,7 +23,7 @@
.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
.\" Copyright (c) 2011, 2015 by Delphix. All rights reserved.
.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
-.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2015, Joyent, Inc. All rights reserved.
.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
.\" Copyright (c) 2014 Integros [integros.com]
.\" Copyright 2016 Nexenta Systems, Inc.
diff --git a/usr/src/man/man1m/zoneadm.1m b/usr/src/man/man1m/zoneadm.1m
index 1ddfc1356a..1056bb92d2 100644
--- a/usr/src/man/man1m/zoneadm.1m
+++ b/usr/src/man/man1m/zoneadm.1m
@@ -1,6 +1,7 @@
'\" te
.\" Copyright 2015 Nexenta Systems, Inc. All rights reserved.
.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2011 Joyent, Inc. All Rights Reserved.
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
@@ -128,12 +129,14 @@ Use the following command to attach a zone:
.sp
.ne 2
.na
-\fB\fBboot\fR [\fB--\fR \fIboot_options\fR]\fR
+\fB\fBboot\fR [\fB-X\fR] [\fB--\fR \fIboot_options\fR]\fR
.ad
.sp .6
.RS 4n
Boot (or activate) the specified zones.
.sp
+The \fI-X\fR option enables debug for the zone's brand while booting.
+.sp
The following \fIboot_options\fR are supported:
.sp
.ne 2
@@ -248,12 +251,25 @@ The source zone must be halted before this subcommand can be used.
.sp
.ne 2
.na
-\fB\fBhalt\fR\fR
+\fB\fBhalt [\fB-X\fR]\fR\fR
.ad
.sp .6
.RS 4n
Halt the specified zones. \fBhalt\fR bypasses running the shutdown scripts
inside the zone. It also removes run time resources of the zone.
+.sp
+The \fI-X\fR option enables debug for the zone's brand while halting.
+.sp
+Use:
+.sp
+.in +2
+.nf
+zlogin \fIzone\fR shutdown
+.fi
+.in -2
+.sp
+
+to cleanly shutdown the zone by running the shutdown scripts.
.RE
.sp
@@ -403,24 +419,28 @@ and normal restrictions for \fIzonepath\fR apply.
.sp
.ne 2
.na
-\fB\fBready\fR\fR
+\fB\fBready [\fB-X\fR]\fR\fR
.ad
.sp .6
.RS 4n
Prepares a zone for running applications but does not start any user processes
in the zone.
+.sp
+The \fI-X\fR option enables debug for the zone's brand while readying.
.RE
.sp
.ne 2
.na
-\fB\fBreboot\fR\ [\fB--\fR \fIboot_options\fR]]\fR
+\fB\fBreboot\fR\ [\fB-X\fR] [\fB--\fR \fIboot_options\fR]]\fR
.ad
.sp .6
.RS 4n
Restart the zones. This is equivalent to a \fBhalt\fR \fBboot\fR sequence. This
subcommand fails if the specified zones are not active. See \fIboot\fR subcommand
for the boot options.
+.sp
+The \fI-X\fR option enables debug for the zone's brand while rebooting.
.RE
.sp
diff --git a/usr/src/man/man1m/zonecfg.1m b/usr/src/man/man1m/zonecfg.1m
index f7a491ceee..bfdbcd26c9 100644
--- a/usr/src/man/man1m/zonecfg.1m
+++ b/usr/src/man/man1m/zonecfg.1m
@@ -1,26 +1,26 @@
'\" te
.\" Copyright (c) 2004, 2009 Sun Microsystems, Inc. All Rights Reserved.
-.\" Copyright 2013 Joyent, Inc. All Rights Reserved.
+.\" Copyright 2015 Joyent, Inc.
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
.\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH ZONECFG 1M "Feb 28, 2014"
+.TH ZONECFG 1M "Nov 4, 2015"
.SH NAME
zonecfg \- set up zone configuration
.SH SYNOPSIS
.LP
.nf
-\fBzonecfg\fR \fB-z\fR \fIzonename\fR
+\fBzonecfg\fR {\fB-z\fR \fIzonename\fR | \fB-u\fR \fIuuid\fR}
.fi
.LP
.nf
-\fBzonecfg\fR \fB-z\fR \fIzonename\fR \fIsubcommand\fR
+\fBzonecfg\fR {\fB-z\fR \fIzonename\fR | \fB-u\fR \fIuuid\fR} \fIsubcommand\fR
.fi
.LP
.nf
-\fBzonecfg\fR \fB-z\fR \fIzonename\fR \fB-f\fR \fIcommand_file\fR
+\fBzonecfg\fR {\fB-z\fR \fIzonename\fR | \fB-u\fR \fIuuid\fR} \fB-f\fR \fIcommand_file\fR
.fi
.LP
@@ -29,7 +29,6 @@ zonecfg \- set up zone configuration
.fi
.SH DESCRIPTION
-.sp
.LP
The \fBzonecfg\fR utility creates and modifies the configuration of a zone.
Zone configuration consists of a number of resources and properties.
@@ -43,7 +42,8 @@ The following synopsis of the \fBzonecfg\fR command is for interactive usage:
.sp
.in +2
.nf
-zonecfg \fB-z\fR \fIzonename subcommand\fR
+{\fB-z\fR \fIzonename\fR | \fB-u\fR \fIuuid\fR}
+zonecfg {\fB-z\fR \fIzonename | \fB-u\fR \fIuuid} subcommand\fR
.fi
.in -2
.sp
@@ -72,7 +72,6 @@ the \fBzonecfg\fR properties and resources. See the brand-specific man page for
more details on each brand. For an overview of brands, see the \fBbrands\fR(5)
man page.
.SS "Resources"
-.sp
.LP
The following resource types are supported:
.sp
@@ -166,7 +165,6 @@ Resource control.
.RE
.SS "Properties"
-.sp
.LP
Each resource type has one or more properties. There are also some global
properties, that is, properties of the configuration as a whole, rather than of
@@ -337,6 +335,16 @@ The following properties are supported:
.sp
.ne 2
.na
+\fB(global)\fR
+.ad
+.sp .6
+.RS 4n
+\fBzfs-io-priority\fR
+.RE
+
+.sp
+.ne 2
+.na
\fB\fBfs\fR\fR
.ad
.sp .6
@@ -351,7 +359,7 @@ The following properties are supported:
.ad
.sp .6
.RS 4n
-\fBaddress\fR, \fBphysical\fR, \fBdefrouter\fR
+\fBaddress\fR, \fBallowed-address\fR, \fBdefrouter\fR, \fBglobal-nic\fR, \fBmac-addr\fR, \fBphysical\fR, \fBproperty\fR, \fBvlan-id\fR
.RE
.sp
@@ -614,7 +622,17 @@ Values needed to determine how, where, and so forth to mount file systems. See
.sp
.ne 2
.na
-\fB\fBnet\fR: address, physical, defrouter\fR
+\fB\fBinherit-pkg-dir\fR: dir\fR
+.ad
+.sp .6
+.RS 4n
+The directory path.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBnet\fR: address, allowed-address, defrouter, global-nic, mac-addr, physical, property, vlan-id\fR
.ad
.sp .6
.RS 4n
@@ -653,6 +671,10 @@ zone. However, if the interface is not used by the global zone, it should be
configured \fBdown\fR in the global zone, and the default router for the
interface should be specified here.
.sp
+The global-nic is used for exclusive stack zones which will use a VNIC on-demand. When the zone boots, a VNIC named using the physical property will be created on the global NIC. If provided, the mac-addr and vlan-id will be set on this VNIC.
+.sp
+The \fBproperty\fR setting is a resource which can be used to set arbitrary name/value pairs on the network. These name/value pairs are made available to the zone's brand, which can use them as needed to set up the network interface.
+.sp
For an exclusive-IP zone, the physical property must be set and the address and
default router properties cannot be set.
.RE
@@ -784,7 +806,7 @@ property is not specified, the scheduling class is established as follows:
.ie t \(bu
.el o
If the \fBcpu-shares\fR property or equivalent rctl is set, the scheduling
-class FSS is used.
+class \fBFSS\fR is used.
.RE
.RS +4
.TP
@@ -800,10 +822,15 @@ used.
.el o
Under any other conditions, the system default scheduling class is used.
.RE
+.sp
+If the \fBFX\fR scheduling class is specified, then the optional
+\fBfixed-hi-pri\fR attribute can be set to \fBtrue\fR. This causes all of the
+processes in the zone to run at the highest \fBFX\fR priority. By default
+processes under \fBFX\fR run at the lowest priority. See \fBpriocntl\fR(2)
+for details on each scheduling class.
.RE
-
.sp
.ne 2
.na
@@ -884,6 +911,16 @@ is not supported.
.RE
.sp
+.ne 2
+.na
+\fBglobal: \fBzfs-io-priority\fR\fR
+.ad
+.sp .6
+.RS 4n
+Specifies a priority for this zone's ZFS I/O. The priority is used by the ZFS I/O scheduler as in input to determine how to schedule I/O across zones. By default all zones have a priority of 1. The value can be increased for zones whose I/O is more critical. This property is the preferred way to set the \fBzone.zfs-io-priority\fR rctl.
+.RE
+
+.sp
.LP
The following table summarizes resources, property-names, and types:
.sp
@@ -906,13 +943,22 @@ resource property-name type
(global) max-shm-ids simple
(global) max-shm-memory simple
(global) scheduling-class simple
+(global) zfs-io-priority simple
fs dir simple
special simple
raw simple
type simple
options list of simple
net address simple
+ allowed-address simple
+ defrouter simple
+ global-nic simple
+ mac-addr simple
physical simple
+ property list of complex
+ name simple
+ value simple
+ vlan-id simple
device match simple
rctl name simple
value list of complex
@@ -944,7 +990,6 @@ contain alphanumerics plus the hyphen (\fB-\fR), underscore (\fB_\fR), and dot
use by the system. Finally, the "autoboot" global property must have a value of
"true" or "false".
.SS "Using Kernel Statistics to Monitor CPU Caps"
-.sp
.LP
Using the kernel statistics (\fBkstat\fR(3KSTAT)) module \fBcaps\fR, the system
maintains information for all capped projects and zones. You can access this
@@ -1097,7 +1142,6 @@ Name of the zone for which statistics are displayed.
.LP
See \fBEXAMPLES\fR for sample output from a \fBkstat\fR command.
.SH OPTIONS
-.sp
.LP
The following options are supported:
.sp
@@ -1125,8 +1169,17 @@ name \fBglobal\fR and all names beginning with \fBSUNW\fR are reserved and
cannot be used.
.RE
-.SH SUBCOMMANDS
.sp
+.ne 2
+.na
+\fB\fB-u\fR \fIuuid\fR\fR
+.ad
+.sp .6
+.RS 4n
+Specify the uuid of a zone instead of the Zone name.
+.RE
+
+.SH SUBCOMMANDS
.LP
You can use the \fBadd\fR and \fBselect\fR subcommands to select a specific
resource, at which point the scope changes to that resource. The \fBend\fR and
@@ -1215,8 +1268,7 @@ correct to be committed, this operation automatically does a verify.
.sp
.ne 2
.na
-\fB\fBcreate [\fR\fB-F\fR\fB] [\fR \fB-a\fR \fIpath\fR |\fB-b\fR \fB|\fR
-\fB-t\fR \fItemplate\fR\fB]\fR\fR
+\fB\fBcreate [\fR\fB-F\fR\fB] [\fR \fB-a\fR \fIpath\fR |\fB-b\fR \fB|\fR \fB-t\fR \fItemplate\fR\fB] [\fR\fB-X\fR\fB]\fR\fR
.ad
.sp .6
.RS 4n
@@ -1238,6 +1290,8 @@ configured, it should be installed using the "\fBzoneadm attach\fR" command
.sp
Use the \fB-b\fR option to create a blank configuration. Without arguments,
\fBcreate\fR applies the Sun default settings.
+.sp
+Use the \fB-X\fR option to facilitate creating a zone whose XML definition already exists on the host. The zone will be atomically added to the zone index file.
.RE
.sp
@@ -1314,18 +1368,21 @@ which is currently being added or modified.
.sp
.ne 2
.na
-\fB\fBremove\fR \fIresource-type\fR\fB{\fR\fIproperty-name\fR\fB=\fR\fIproperty
--value\fR\fB}\fR(global scope)\fR
+\fB\fBremove\fR [\fR\fB-F\fR\fB] \fIresource-type\fR\fB [\fR\fIproperty-name\fR\fB=\fR\fIproperty-value\fR\fB]* \fR(global scope)\fR
+.br
+\fB\fBremove\fR \fR\fIproperty-name\fR\fB \fR\fIproperty-value\fR\fB \fR(resource scope)\fR
.ad
.sp .6
.RS 4n
In the global scope, removes the specified resource. The \fB[]\fR syntax means
-0 or more of whatever is inside the square braces. If you want only to remove a
+0 or more property name-value pairs. If you want to only remove a
single instance of the resource, you must specify enough property name-value
pairs for the resource to be uniquely identified. If no property name-value
pairs are specified, all instances will be removed. If there is more than one
-pair is specified, a confirmation is required, unless you use the \fB-F\fR
-option.
+pair specified, a confirmation is required, unless you use the \fB-F\fR
+option. Likewise, the \fB-F\fR option can be used to remove a resource that
+does not exist (that is, no error will occur). In the resource scope, remove
+the specified name-value pair.
.RE
.sp
@@ -1803,7 +1860,6 @@ for a specific project, the second for the same project within zone 1.
.sp
.SH EXIT STATUS
-.sp
.LP
The following exit values are returned:
.sp
@@ -1837,7 +1893,6 @@ Invalid usage.
.RE
.SH ATTRIBUTES
-.sp
.LP
See \fBattributes\fR(5) for descriptions of the following attributes:
.sp
@@ -1853,12 +1908,11 @@ Interface Stability Volatile
.TE
.SH SEE ALSO
-.sp
.LP
\fBppriv\fR(1), \fBprctl\fR(1), \fBzlogin\fR(1), \fBkstat\fR(1M),
\fBmount\fR(1M), \fBpooladm\fR(1M), \fBpoolcfg\fR(1M), \fBpoold\fR(1M),
\fBrcapd\fR(1M), \fBrctladm\fR(1M), \fBsvcadm\fR(1M), \fBsysidtool\fR(1M),
-\fBzfs\fR(1M), \fBzoneadm\fR(1M), \fBpriv_str_to_set\fR(3C),
+\fBzfs\fR(1M), \fBzoneadm\fR(1M), \fBpriocntl\fR(2), \fBpriv_str_to_set\fR(3C),
\fBkstat\fR(3KSTAT), \fBvfstab\fR(4), \fBattributes\fR(5), \fBbrands\fR(5),
\fBfnmatch\fR(5), \fBlx\fR(5), \fBprivileges\fR(5), \fBresource_controls\fR(5),
\fBzones\fR(5)
@@ -1867,6 +1921,5 @@ Interface Stability Volatile
\fISystem Administration Guide: Solaris Containers-Resource Management, and
Solaris Zones\fR
.SH NOTES
-.sp
.LP
All character data used by \fBzonecfg\fR must be in US-ASCII encoding.
diff --git a/usr/src/man/man1m/zpool.1m b/usr/src/man/man1m/zpool.1m
index eadb5482be..db714ef595 100644
--- a/usr/src/man/man1m/zpool.1m
+++ b/usr/src/man/man1m/zpool.1m
@@ -20,6 +20,7 @@
.\"
.\"
.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2013, Joyent, Inc. All Rights Reserved.
.\" Copyright (c) 2013 by Delphix. All rights reserved.
.\" Copyright 2016 Nexenta Systems, Inc.
.\"
diff --git a/usr/src/man/man3c/Makefile b/usr/src/man/man3c/Makefile
index 61591de7e7..4b8e8336a0 100644
--- a/usr/src/man/man3c/Makefile
+++ b/usr/src/man/man3c/Makefile
@@ -210,6 +210,9 @@ MANFILES= __fbufsize.3c \
imaxdiv.3c \
index.3c \
initgroups.3c \
+ inotify_init.3c \
+ inotify_add_watch.3c \
+ inotify_rm_watch.3c \
insque.3c \
is_system_labeled.3c \
isaexec.3c \
diff --git a/usr/src/man/man3c/inotify_add_watch.3c b/usr/src/man/man3c/inotify_add_watch.3c
new file mode 100644
index 0000000000..4f79e03c82
--- /dev/null
+++ b/usr/src/man/man3c/inotify_add_watch.3c
@@ -0,0 +1,120 @@
+'\" te
+.\" Copyright (c) 2014, Joyent, Inc. All Rights Reserved.
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.TH INOTIFY_ADD_WATCH 3C "Sep 17, 2014"
+.SH NAME
+inotify_add_watch \- add a watch to an inotify instance
+.SH SYNOPSIS
+
+.LP
+.nf
+#include <sys/inotify.h>
+
+\fBint\fR \fBinotify_add_watch\fR(\fBint\fR \fIfd\fR, \fBconst char *\fR\fIpathname\fR, \fBuint32_t\fR \fImask\fR);
+.fi
+
+.SH DESCRIPTION
+.sp
+.LP
+The \fBinotify_add_watch()\fR function adds a watch for the file or
+directory specified by \fIpathname\fR to the inotify instance
+specified by \fIfd\fR for the events specified by \fImask\fR. See
+\fBinotify\fR(5) for details on the meaning of \fImask\fR, how
+it affects the interpretation of \fIpathname\fR, and how
+events for the watched file or directory are subsequently
+retrieved via \fBread\fR(2).
+
+.SH RETURN VALUES
+.sp
+.LP
+Upon succesful completion, \fBinotify_add_watch()\fR returns the
+watch descriptor associated with the new watch.
+If an error occurs, -1 is returned and errno is set to indicate
+the error.
+
+.SH ERRORS
+.sp
+.LP
+\fBinotify_add_watch()\fR will fail if:
+.sp
+.ne 2
+.na
+\fB\fBEACCES\fR\fR
+.ad
+.RS 10n
+\fIpathname\fR could not be opened for reading.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBEBADF\fR\fR
+.ad
+.RS 10n
+The \fIfd\fR argument is not a valid open file descriptor.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBEFAULT\fR\fR
+.ad
+.RS 10n
+The memory associated with \fIpathname\fR was not mapped.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBEINVAL\fR\fR
+.ad
+.RS 10n
+The \fIfd\fR argument does not correspond to an
+\fBinotify\fR(5) instance as initialized with
+\fBinotify_init\fR(3C) or \fBinotify_init1\fR(3C).
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBENOSPC\fR\fR
+.ad
+.RS 10n
+The number of watches on the specified instance would exceed the
+maximum number of watches per \fBinotify\fR(5) instance.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBENOTDIR\fR\fR
+.ad
+.RS 10n
+\fIpathname\fR does not correspond to a directory and
+\fBIN_ONLYDIR\fR was specified in \fImask\fR.
+.RE
+
+.sp
+.SH NOTES
+.sp
+.LP
+
+While the \fBinotify\fR(5) facility is implemented for purposes of
+offering compatibility for Linux-borne applications, native
+applications may opt to use it instead of (or in addition to) the
+\fBPORT_SOURCE_FILE\fR capability of event ports. See
+\fBinotify\fR(5) for details and restrictions.
+
+.SH SEE ALSO
+.sp
+.LP
+\fBinotify_init\fR(3C), \fBinotify_init1\fR(3C),
+\fBport_create\fR(3C), \fBport_associate\fR(3C), \fBport_get\fR(3C),
+\fBinotify\fR(5)
diff --git a/usr/src/man/man3c/inotify_init.3c b/usr/src/man/man3c/inotify_init.3c
new file mode 100644
index 0000000000..551a2ca798
--- /dev/null
+++ b/usr/src/man/man3c/inotify_init.3c
@@ -0,0 +1,107 @@
+'\" te
+.\" Copyright (c) 2014, Joyent, Inc. All Rights Reserved.
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.TH INOTIFY_INIT 3C "Sep 17, 2014"
+.SH NAME
+inotify_init, inotify_init1 \- initialize an inotify instance
+.SH SYNOPSIS
+
+.LP
+.nf
+#include <sys/inotify.h>
+
+\fBint\fR \fBinotify_init\fR(\fBvoid\fR);
+.fi
+
+.LP
+.nf
+\fBint\fR \fBinotify_init1\fR(\fBint\fR \fIflags\fR);
+.fi
+
+.SH DESCRIPTION
+.sp
+.LP
+The \fBinotify_init()\fR and \fBinotify_init1()\fR functions both create an
+\fBinotify\fR(5) instance that can be operated upon via
+\fBinotify_add_watch\fR(3C), \fBinotify_rm_watch\fR(3C) and \fBread\fR(2).
+\fBinotify\fR instances are
+represented as file descriptors, and should be closed via \fBclose\fR(2).
+
+The only difference between the two functions is their signature;
+\fBinotify_init()\fR takes no arguments,
+while \fBinotify_init1()\fR takes a \fIflags\fR argument that can have
+any of the following values:
+
+.sp
+.ne 2
+.na
+\fBIN_CLOEXEC\fR
+.ad
+.RS 12n
+Instance should be closed upon an
+\fBexec\fR(2); see \fBopen\fR(2)'s description of \fBO_CLOEXEC\fR.
+.RE
+
+.sp
+.ne 2
+.na
+\fBIN_NONBLOCK\fR
+.ad
+.RS 12n
+Instance will be set to be non-blocking. A \fBread\fR(2) on an
+\fBinotify\fR instance that has been initialized with
+\fBIN_NONBLOCK\fR will return \fBEAGAIN\fR if there are
+no events enqueued in lieu of blocking.
+.RE
+
+.SH RETURN VALUES
+.sp
+.LP
+Upon succesful completion, 0 is returned. Otherwise, -1 is returned and errno
+is set to indicate the error.
+.SH ERRORS
+.sp
+.LP
+The \fBinotify_init()\fR and \fBinotify_init1()\fR functions will fail if:
+.sp
+.ne 2
+.na
+\fB\fBEINVAL\fR\fR
+.ad
+.RS 10n
+The \fIflags\fR are invalid (\fBinotify_init1()\fR).
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBEMFILE\fR\fR
+.ad
+.RS 10n
+There are currently {\fBOPEN_MAX\fR} file descriptors open in the calling
+process, or the maximum number of \fBinotify\fR instances for the user
+would be exceeded.
+.RE
+
+.sp
+.SH NOTES
+.sp
+.LP
+
+While the \fBinotify\fR(5) facility is implemented for purposes of
+offering compatibility for Linux-borne applications, native
+applications may opt to use it instead of (or in addition to) the
+\fBPORT_SOURCE_FILE\fR capability of event ports. See
+\fBinotify\fR(5) for details and restrictions.
+
+.SH SEE ALSO
+.sp
+.LP
+\fBinotiy_add_watch\fR(3C), \fBinotify_rm_watch\fR(3C), \fBinotify\fR(5)
diff --git a/usr/src/man/man3c/inotify_rm_watch.3c b/usr/src/man/man3c/inotify_rm_watch.3c
new file mode 100644
index 0000000000..de568f8e24
--- /dev/null
+++ b/usr/src/man/man3c/inotify_rm_watch.3c
@@ -0,0 +1,81 @@
+'\" te
+.\" Copyright (c) 2014, Joyent, Inc. All Rights Reserved.
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.TH INOTIFY_RM_WATCH 3C "Sep 17, 2014"
+.SH NAME
+inotify_rm_watch \- remove a watch from an inotify instance
+.SH SYNOPSIS
+
+.LP
+.nf
+#include <sys/inotify.h>
+
+\fBint\fR \fBinotify_rm_watch\fR(\fBint\fR \fIfd\fR, \fBint\fR \fIwd\fR);
+.fi
+
+.SH DESCRIPTION
+.sp
+.LP
+The \fBinotify_rm_watch()\fR function removes the watch specified
+by \fIwd\fR from the inotify instance associated with \fIfd\fR.
+Removing a watch will induce an \fBIN_IGNORED\fR event; see
+\fBinotify\fR(5) for details.
+
+.SH RETURN VALUES
+.sp
+.LP
+Upon succesful completion, \fBinotify_add_watch()\fR returns the
+watch descriptor associated with the new watch.
+If an error occurs, -1 is returned and errno is set to indicate
+the error.
+
+.SH ERRORS
+.sp
+.LP
+\fBinotify_rm_watch()\fR will fail if:
+.sp
+.ne 2
+.na
+\fB\fBEBADF\fR\fR
+.ad
+.RS 10n
+The \fIfd\fR argument is not a valid open file descriptor.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBEINVAL\fR\fR
+.ad
+.RS 10n
+The \fIfd\fR argument does not correspond to an
+\fBinotify\fR(5) instance as initialized with
+\fBinotify_init\fR(3C) or \fBinotify_init1\fR(3C), or
+\fIwd\fR is not a valid watch for the specified inotify
+instance.
+.RE
+
+.sp
+.SH NOTES
+.sp
+.LP
+
+While the \fBinotify\fR(5) facility is implemented for purposes of
+offering compatibility for Linux-borne applications, native
+applications may opt to use it instead of (or in addition to) the
+\fBPORT_SOURCE_FILE\fR capability of event ports. See
+\fBinotify\fR(5) for details and restrictions.
+
+.SH SEE ALSO
+.sp
+.LP
+\fBinotify_init\fR(3C), \fBinotify_init1\fR(3C),
+\fBport_create\fR(3C), \fBport_associate\fR(3C), \fBport_get\fR(3C),
+\fBinotify\fR(5)
diff --git a/usr/src/man/man3c/psignal.3c b/usr/src/man/man3c/psignal.3c
index 655dedb022..a977fb6df4 100644
--- a/usr/src/man/man3c/psignal.3c
+++ b/usr/src/man/man3c/psignal.3c
@@ -1,16 +1,17 @@
'\" te
.\" Copyright 1989 AT&T. Copyright (c) 2005, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
.\" Copyright 2015 Circonus, Inc. All rights reserved.
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH PSIGNAL 3C "Mar 31, 2005"
+.TH PSIGNAL 3C "Aug 14, 2014"
.SH NAME
psignal, psiginfo \- system signal messages
.SH SYNOPSIS
.LP
.nf
-#include <siginfo.h>
+#include <signal.h>
\fBvoid\fR \fBpsignal\fR(\fBint\fR \fIsig\fR, \fBconst char *\fR\fIs\fR);
.fi
@@ -28,7 +29,9 @@ that may have been passed as the first argument to a signal handler. The
\fIpinfo\fR argument is a pointer to a \fBsiginfo\fR structure that may have
been passed as the second argument to an enhanced signal handler. See
\fBsigaction\fR(2). The argument string \fIs\fR is printed first, followed by a
-colon and a blank, followed by the message and a \fBNEWLINE\fR character.
+colon and a blank, followed by the message and a \fBNEWLINE\fR character. If
+\fBs\fR is the value \fBNULL\fR or an empty string, then nothing is printed for
+the user's string and the colon and blank are omitted.
.SH USAGE
.LP
Messages printed from these functions are in the native language specified by
diff --git a/usr/src/man/man3c/putenv.3c b/usr/src/man/man3c/putenv.3c
index 05e7f01594..d5b59a42fe 100644
--- a/usr/src/man/man3c/putenv.3c
+++ b/usr/src/man/man3c/putenv.3c
@@ -1,5 +1,6 @@
'\" te
.\" Copyright 1989 AT&T. Copyright (c) 2004, Sun Microsystems, Inc. All Rights Reserved. Portions Copyright (c) 1992, X/Open Company Limited. All Rights Reserved.
+.\" Copyright 2016 Joyent, Inc.
.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at
.\" http://www.opengroup.org/bookstore/.
.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html.
@@ -19,7 +20,6 @@ putenv \- change or add value to environment
.fi
.SH DESCRIPTION
-.sp
.LP
The \fBputenv()\fR function makes the value of the environment variable
\fIname\fR equal to \fIvalue\fR by altering an existing variable or creating a
@@ -29,7 +29,11 @@ the environment, so altering the string will change the environment.
.LP
The \fIstring\fR argument points to a string of the form
\fIname\fR\fB=\fR\fIvalue\fR. The space used by \fIstring\fR is no longer used
-once a new string-defining \fIname\fR is passed to \fBputenv()\fR.
+once a new string-defining \fIname\fR is passed to \fBputenv()\fR. If
+there is no equals sign (\fB=\fR) present as a separator, then
+\fIstring\fR is treated as the \fIname\fR of an environment variable to
+remove from the environment, as though \fBunsetenv\fR(3C) had been
+called.
.sp
.LP
The \fBputenv()\fR function uses \fBmalloc\fR(3C) to enlarge the environment.
@@ -38,12 +42,10 @@ The \fBputenv()\fR function uses \fBmalloc\fR(3C) to enlarge the environment.
After \fBputenv()\fR is called, environment variables are not in alphabetical
order.
.SH RETURN VALUES
-.sp
.LP
Upon successful completion, \fBputenv()\fR returns 0. Otherwise, it returns a
non-zero value and sets \fBerrno\fR to indicate the error.
.SH ERRORS
-.sp
.LP
The \fBputenv()\fR function may fail if:
.sp
@@ -56,7 +58,6 @@ Insufficient memory was available.
.RE
.SH USAGE
-.sp
.LP
The \fBputenv()\fR function can be safely called from multithreaded programs.
Caution must be exercised when using this function and \fBgetenv\fR(3C) in
@@ -66,7 +67,6 @@ list from being accessed simultaneously by two different threads. It does not,
however, prevent two threads from successively accessing the environment list
using \fBputenv()\fR or \fBgetenv()\fR.
.SH ATTRIBUTES
-.sp
.LP
See \fBattributes\fR(5) for descriptions of the following attributes:
.sp
@@ -84,12 +84,10 @@ MT-Level Safe
.TE
.SH SEE ALSO
-.sp
.LP
\fBexec\fR(2), \fBgetenv\fR(3C), \fBmalloc\fR(3C), \fBattributes\fR(5),
\fBenviron\fR(5), \fBstandards\fR(5)
.SH WARNINGS
-.sp
.LP
The \fIstring\fR argument should not be an automatic variable. It should be
declared static if it is declared within a function because it cannot be
diff --git a/usr/src/man/man3c/signalfd.3c b/usr/src/man/man3c/signalfd.3c
index 43699a50a5..aa1bb90b1d 100644
--- a/usr/src/man/man3c/signalfd.3c
+++ b/usr/src/man/man3c/signalfd.3c
@@ -8,9 +8,9 @@
.\" source. A copy of the CDDL is also available via the Internet at
.\" http://www.illumos.org/license/CDDL.
.\"
-.\" Copyright 2015, Joyent, Inc.
+.\" Copyright 2016, Joyent, Inc.
.\"
-.Dd "Jun 15, 2015"
+.Dd "May 05, 2016"
.Dt SIGNALFD 3C
.Os
.Sh NAME
@@ -150,6 +150,20 @@ typedef struct signalfd_siginfo {
uint8_t ssi_pad[48]; /* pad size to 128 bytes */
} signalfd_siginfo_t;
.Ed
+.Sh NOTES
+File descriptor duplication during fork presents a challenge to the
+.Sy signalfd
+facility since signal data and events are dependent on the process from which
+they are queried. Its use with caching event systems such as
+.Xr epoll 5 ,
+.Sy /dev/poll ,
+or
+.Xr port_create 3C
+can result in undefined behavior if signalfd and polling descriptors are used
+together after being shared across a fork. Such restrictions do not apply if
+the child only calls
+.Xr close 2
+on the descriptors.
.Sh RETURN VALUES
Upon succesful completion, a file descriptor associated with the instance
is returned. Otherwise, -1 is returned and errno is set to indicate the error.
@@ -187,6 +201,8 @@ Unable to allocate state for the file descriptor.
.Sh SEE ALSO
.Xr poll 2 ,
.Xr sigwait 2 ,
+.Xr port_create 3C ,
.Xr sigsetops 3C ,
.Xr sigwaitinfo 3C ,
-.Xr signal.h 3HEAD
+.Xr signal.h 3HEAD ,
+.Xr epoll 5
diff --git a/usr/src/man/man3c/timer_create.3c b/usr/src/man/man3c/timer_create.3c
index 2f93e954e3..f57bf1d25f 100644
--- a/usr/src/man/man3c/timer_create.3c
+++ b/usr/src/man/man3c/timer_create.3c
@@ -9,7 +9,7 @@
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH TIMER_CREATE 3C "Sep 15, 2015"
+.TH TIMER_CREATE 3C "Mar 24, 2016"
.SH NAME
timer_create \- create a timer
.SH SYNOPSIS
@@ -125,17 +125,6 @@ system.
The specified clock \fBID\fR, \fIclock_id\fR, is not defined.
.RE
-.sp
-.ne 2
-.na
-\fB\fBEPERM\fR\fR
-.ad
-.RS 10n
-The specified clock \fBID\fR, \fIclock_id\fR, is \fBCLOCK_HIGHRES\fR and the
-{\fBPRIV_PROC_CLOCK_HIGHRES\fR} is not asserted in the effective set of the
-calling process.
-.RE
-
.SH ATTRIBUTES
.LP
See \fBattributes\fR(5) for descriptions of the following attributes:
@@ -159,5 +148,4 @@ Standard See \fBstandards\fR(5).
.LP
\fBexec\fR(2), \fBfork\fR(2), \fBtime\fR(2), \fBclock_settime\fR(3C),
\fBsignal\fR(3C), \fBsignal.h\fR(3HEAD), \fBtimer_delete\fR(3C),
-\fBtimer_settime\fR(3C), \fBattributes\fR(5), \fBprivileges\fR(5),
-\fBstandards\fR(5)
+\fBtimer_settime\fR(3C), \fBattributes\fR(5), \fBstandards\fR(5)
diff --git a/usr/src/man/man3c/timer_settime.3c b/usr/src/man/man3c/timer_settime.3c
index 543f042aba..8ae32ccc91 100644
--- a/usr/src/man/man3c/timer_settime.3c
+++ b/usr/src/man/man3c/timer_settime.3c
@@ -1,5 +1,6 @@
'\" te
.\" Copyright (c) 2008, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2016, Joyent, Inc.
.\" Copyright 1989 AT&T
.\" Portions Copyright (c) 1992, X/Open Company Limited. All Rights Reserved.
.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at
@@ -9,7 +10,7 @@
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH TIMER_SETTIME 3C "Feb 5, 2008"
+.TH TIMER_SETTIME 3C "Mar 24, 2016"
.SH NAME
timer_settime, timer_gettime, timer_getoverrun \- per-process timers
.SH SYNOPSIS
@@ -103,6 +104,12 @@ set to \fBDELAYTIMER_MAX\fR. The value returned by \fBtimer_getoverrun()\fR
applies to the most recent expiration signal delivery or acceptance for the
timer. If no expiration signal has been delivered for the timer, the meaning of
the overrun count returned is undefined.
+.sp
+.LP
+If the specified timer is of type \fBCLOCK_HIGHRES\fR and the time value is
+smaller than a system defined threshold, then {\fBPRIV_PROC_CLOCK_HIGHRES\fR}
+must be asserted in the effective set of the calling process or the time values
+will be adjusted up to the threshold value.
.SH RETURN VALUES
.sp
.LP
@@ -171,4 +178,5 @@ Standard See \fBstandards\fR(5).
.sp
.LP
\fBtime.h\fR(3HEAD), \fBclock_settime\fR(3C), \fBtimer_create\fR(3C),
-\fBtimer_delete\fR(3C), \fBattributes\fR(5), \fBstandards\fR(5)
+\fBtimer_delete\fR(3C), \fBattributes\fR(5), \fBprivileges\fR(5),
+\fBstandards\fR(5)
diff --git a/usr/src/man/man3dlpi/Makefile b/usr/src/man/man3dlpi/Makefile
index cdd24216bd..4c5448f0be 100644
--- a/usr/src/man/man3dlpi/Makefile
+++ b/usr/src/man/man3dlpi/Makefile
@@ -41,10 +41,12 @@ MANFILES= dlpi_arptype.3dlpi \
dlpi_walk.3dlpi
MANLINKS= dlpi_disabmulti.3dlpi \
+ dlpi_open_zone.3dlpi \
dlpi_promiscoff.3dlpi
dlpi_disabmulti.3dlpi := LINKSRC = dlpi_enabmulti.3dlpi
+dlpi_open_zone.3dlpi := LINKSRC = man3dlpi/dlpi_open.3dlpi
dlpi_promiscoff.3dlpi := LINKSRC = dlpi_promiscon.3dlpi
.KEEP_STATE:
diff --git a/usr/src/man/man3dlpi/dlpi_open.3dlpi b/usr/src/man/man3dlpi/dlpi_open.3dlpi
index 8129a75404..489f66066a 100644
--- a/usr/src/man/man3dlpi/dlpi_open.3dlpi
+++ b/usr/src/man/man3dlpi/dlpi_open.3dlpi
@@ -1,9 +1,10 @@
'\" te
.\" Copyright (c) 2008, Sun Microsystems, Inc. All Rights Reserved
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH DLPI_OPEN 3DLPI "Nov 17, 2008"
+.TH DLPI_OPEN 3DLPI "Feb 24, 2014"
.SH NAME
dlpi_open \- open DLPI link
.SH SYNOPSIS
@@ -14,6 +15,9 @@ dlpi_open \- open DLPI link
\fBint\fR \fBdlpi_open\fR(\fBconst char *\fR\fIlinkname\fR, \fBdlpi_handle_t *\fR\fIdhp\fR,
\fBuint_t\fR \fIflags\fR);
+
+\fBint\fR \fBdlpi_open_zone\fR(\fBconst char *\fR\fIlinkname\fR, \fBconst char *\fR
+ \fIzonename\fR, \fBdlpi_handle_t *\fR\fIdhp\fR, \fBuint_t\fR \fIflags\fR);
.fi
.SH DESCRIPTION
@@ -114,6 +118,18 @@ value ensures that \fBDLPI_ETIMEDOUT\fR is returned from a \fBlibdlpi\fR
operation only in the event that the \fBDLPI\fR link becomes unresponsive. The
timeout value can be changed with \fBdlpi_set_timeout\fR(3DLPI), although this
should seldom be necessary.
+
+.sp
+.LP
+The \fBdlpi_open_zone()\fR function behaves as \fBdlpi_open()\fR, except that it
+looks for the link specified by \fBlinkname\fR in the specified zone
+\fBzonename\fR as opposed to the current zone. This function is only meaningful
+from the global zone. Instead of scanning \fB/dev/net\fR, \fBdlpi_open_zone()\fR
+scans \fB/dev/net/zone/<\fIzonename\fR> for the data link and
+\fB/dev/ipnet/zone/<\fIzonename\fR> when DLPI_DEVIPNET is present in
+\fBflags\fR. If a NULL or empty string is passed into \fBdlpi_open_zone()\fR, it
+will behave as though \fBdlpi_open\fR has been called.
+
.SH RETURN VALUES
.sp
.LP
@@ -124,7 +140,7 @@ section is returned.
.SH ERRORS
.sp
.LP
-The \fBdlpi_open()\fR function will fail if:
+The \fBdlpi_open()\fR and \fBdlpi_open_zone()\fR function will fail if:
.sp
.ne 2
.na
@@ -195,6 +211,17 @@ DLPI operation failed
See \fBattributes\fR(5) for description of the following attributes:
.sp
+.LP
+The \fBdlpi_open_zone()\fR function will fail if:
+.sp
+.ne 2
+.na
+\fB\fBDLPI_EZONENAMEINVAL\fR\fR
+.ad
+.RS 25n
+Invalid \fIzonename\fR argument
+.RE
+
.sp
.TS
box;
diff --git a/usr/src/man/man3lib/Makefile b/usr/src/man/man3lib/Makefile
index 220ed2ae3e..a470f408fa 100644
--- a/usr/src/man/man3lib/Makefile
+++ b/usr/src/man/man3lib/Makefile
@@ -105,6 +105,7 @@ MANFILES= libMPAPI.3lib \
libumem.3lib \
libuuid.3lib \
libvolmgt.3lib \
+ libvnd.3lib \
libw.3lib \
libxnet.3lib \
liby.3lib
diff --git a/usr/src/man/man3lib/libvnd.3lib b/usr/src/man/man3lib/libvnd.3lib
new file mode 100644
index 0000000000..ead69ff82e
--- /dev/null
+++ b/usr/src/man/man3lib/libvnd.3lib
@@ -0,0 +1,690 @@
+'\" te
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\"
+.TH LIBVND 3LIB "Mar 06, 2014"
+.SH NAME
+libvnd \- vnd library
+
+.SH SYNOPSIS
+.LP
+.nf
+cc [ flag... ] file... -lvnd [ library... ]
+#include <libvnd.h>
+.fi
+
+.SH DESCRIPTION
+.LP
+The libvnd library provides a stable and programmatic interface to
+vnd(7D) devices. vnd devices provide the means for creating a layer two
+interface over a data link, similar to the use of libdlpi(3LIB) and
+IP(7P). In dlpi parlance, a vnd device obtains data from all service
+attachment points (SAP). For ethernet devices, this means that a vnd
+device sends and receives traffic for all ethertypes. It is intended to
+be used for services such as virtual machines which emulate layer two
+devices.
+
+.LP
+Handles to vnd(7D) devices are obtained through the use of vnd_create
+and vnd_open. With a handle, I/O can be performed and properties on the
+device can be set and retrieved. I/O on devices should be performed
+through the vnd_frameio_read and vnd_frameio_write functions. A file
+descriptor suitable for use with event ports and polling may be obtained
+through vnd_pollfd. Handles are relinquished through calls to vnd_close;
+however, devices will persist until vnd_unlink has been called.
+
+.LP
+The rest of this manual documents the interfaces, properties, errors,
+and threading model for libvnd. The in-depth description of individual
+interfaces, their arguments, and examples, are in manual pages for each
+provided interface.
+
+
+.SH INTERFACES
+.sp
+.LP
+
+The shared object libvnd.so.1 provides the public interfaces defined
+below. See Intro(3) for additional information on shared object
+interfaces. Individual functions are documented in their own manual
+pages.
+
+.sp
+.TS
+l l
+l l .
+vnd_create vnd_errno
+vnd_open vnd_syserrno
+vnd_unlink vnd_strerror
+vnd_close vnd_strsyserror
+vnd_pollfd vnd_walk
+vnd_prop_get vnd_prop_set
+vnd_prop_iter vnd_prop_writeable
+vnd_frameio_read vnd_frameio_write
+.TE
+
+.SH PROPERTIES
+
+.LP
+The following table summarizes properties of a vnd device. The
+properties can be retrieved and set with the functions
+vnd_prop_get(3VND) and vnd_prop_set(3VND). Following the table, the
+structures and properties are described in greater detail.
+
+.nf
+ +-------------------+---------------------+-------+
+ | PROPERTY | STRUCTURE | PERM |
+ +-------------------+---------------------+-------+
+ | VND_PROP_RXBUF | vnd_prop_buf_t | R/W |
+ +-------------------+---------------------+-------+
+ | VND_PROP_TXBUF | vnd_prop_buf_t | R/W |
+ +-------------------+---------------------+-------+
+ | VND_PROP_MAXBUF | vnd_prop_buf_t | R/- |
+ +-------------------+---------------------+-------+
+ | VND_PROP_MINTU | vnd_prop_buf_t | R/- |
+ +-------------------+---------------------+-------+
+ | VND_PROP_MAXTU | vnd_prop_buf_t | R/- |
+ +-------------------+---------------------+-------+
+.fi
+
+.SS Structures
+
+.LP
+The vnd_prop_buf_t structure has the following members:
+
+.in +2
+.nf
+uint64_t vpb_size;
+.fi
+.in -2
+
+.LP
+The vpb_size member refers to a size in bytes. When getting a property,
+it represents the size of that property, when setting a property, it is
+the size to set the property to.
+
+
+.SS Property Descriptions
+.sp
+.ne 2
+.na
+rxbuf
+.ad
+.sp .6
+.RS 4n
+A read/write property that controls the size of the receive buffer for
+the device. All received data enters the receive buffer until a consumer
+consumes it. If adding a received frame would exceed the size of the
+receive buffer, then that frame will be dropped. The maximum size of the
+buffer is limited by the 'maxsize' property.
+.RE
+
+.sp
+.ne 2
+.na
+txbuf
+.ad
+.sp .6
+.RS 4n
+A read/write property that controls the size of the transmit buffer. All
+in-flight transmitted data must be able to fix into the transmit buffer
+to deal with potential flow control events. If there is not enough space
+in the transmit buffer, transmit related I/O operations will either
+block or fail based on whether or not O_NONBLOCK or O_NDELAY were set
+with fcntl(2).
+.RE
+
+.sp
+.ne 2
+.na
+maxsize
+.ad
+.sp .6
+.RS 4n
+A read only property that describes the maximum size of buffers in the
+system. Properties such as rxbuf and txbuf cannot be set beyond this.
+.RE
+
+.sp
+.ne 2
+.na
+mintu
+.ad
+.sp .6
+.RS 4n
+A read only property that describes the minimum size of a frame
+transmitted to the underlying data link. Note that the minimum listed
+here may be less than the size of a valid layer two frame and therefore
+may be dropped. A frame smaller than this value will be rejected by vnd.
+.RE
+
+.sp
+.ne 2
+.na
+maxtu
+.ad
+.sp .6
+.RS 4n
+A read only property that describes the maximum size of
+a frame transmitted to the underlying data link. A frame
+larger than this value will be rejected by vnd.
+.RE
+
+
+.SH ERRORS
+.sp
+.LP
+Most interfaces provided by libvnd provide a means to retrieve a
+vnd_errno_t that describes an error that has occurred. The manuals for
+individual interfaces describe whether or not this additional error
+information is available and how to retrieve it. The following is a
+complete list of the error numbers and their names as defined in
+<sys/vnd_errno.h>. Any entries not listed here are private to the
+implementation and may change at any time.
+
+.sp
+.ne 2
+.na
+0 VND_E_SUCCESS
+.ad
+.RS 23n
+no error
+.sp
+This indicates that the operation completed successfully.
+.RE
+
+.sp
+.ne 2
+.na
+1 VND_E_NOMEM
+.ad
+.RS 23n
+not enough memory available
+.sp
+Insufficient memory was available. This is the equivalent of the
+standard system errno ENOMEM.
+.RE
+
+.sp
+.ne 2
+.na
+2 VND_E_NODATALINK
+.ad
+.RS 23n
+no such datalink
+.sp
+The data link requested to be used as part of vnd_create does not exist
+in the requested zone.
+.RE
+
+.sp
+.ne 2
+.na
+3 VND_E_NOTETHER
+.ad
+.RS 23n
+datalink not of type DL_ETHER
+.sp
+The data link used as part of a call to vnd_create is not an Ethernet
+device. vnd_create only works with Ethernet devices at this time.
+.RE
+
+.sp
+.ne 2
+.na
+4 VND_E_DLPIINVAL
+.ad
+.RS 23n
+unknown dlpi failure
+.sp
+An unexpected DLPI message was received during vnd device
+initialization.
+.RE
+
+.sp
+.ne 2
+.na
+5 VND_E_ATTACHFAIL
+.ad
+.RS 23n
+DL_ATTACH_REQ failed
+.sp
+During vnd device initialization, the dlpi call to attach to the
+requested data link failed.
+.RE
+
+.sp
+.ne 2
+.na
+6 VND_E_BINDFAIL
+.ad
+.RS 23n
+DL_BIND_REQ failed
+.sp
+
+During vnd device initialization, the dlpi call to bind to a service
+attachment point on the data link failed.
+.RE
+
+.sp
+.ne 2
+.na
+7 VND_E_PROMISCFAIL
+.ad
+.RS 23n
+DL_PROMISCON_REQ failed
+.sp
+
+During vnd device initialization, the dlpi call to enable promiscuous
+mode on the underlying device failed.
+.RE
+
+.sp
+.ne 2
+.na
+8 VND_E_DIRECTFAIL
+.ad
+.RS 23n
+DLD_CAPAB_DIRECT enable failed
+.sp
+During vnd device initialization, the dlpi call to enable the DLD fast
+path failed.
+.RE
+
+.sp
+.ne 2
+.na
+9 VND_E_CAPACKINVAL
+.ad
+.RS 23n
+bad datalink capability
+.sp
+During vnd device initialization, the kernel responded with an invalid
+capability acknowledgement.
+.RE
+
+.sp
+.ne 2
+.na
+10 VND_E_SUBCAPINVAL
+.ad
+.RS 23n
+bad datalink subcapability
+.sp
+During vnd device initialization, the kernel responded with an invalid
+sub-capability.
+.RE
+
+.sp
+.ne 2
+.na
+11 VND_E_DLDBADVERS
+.ad
+.RS 23n
+bad dld version
+.sp
+The vnd(7D) module does not support the version of the dld capability
+that the kernel sent. As such, the data path could not be brought up and
+the device could not be fully initialized.
+.RE
+
+.sp
+.ne 2
+.na
+12 VND_E_KSTATCREATE
+.ad
+.RS 23n
+failed to create kstats
+.sp
+During vnd device initialization, the necessary kstats could not be
+created.
+.RE
+
+.sp
+.ne 2
+.na
+13 VND_E_NODEV
+.ad
+.RS 23n
+no such vnd link
+.sp
+During device initialization, the requested character device did not
+exist.
+.RE
+
+.sp
+.ne 2
+.na
+14 VND_E_NONETSTACK
+.ad
+.RS 23n
+netstack doesn't exist
+.sp
+During device initialization, the networking stack for the device did
+not exist.
+.RE
+
+.sp
+.ne 2
+.na
+15 VND_E_ASSOCIATED
+.ad
+.RS 23n
+device already associated
+.sp
+During vnd device initialization, the vnd STREAMS device was already
+associated with another vnd device.
+.RE
+
+.sp
+.ne 2
+.na
+16 VND_E_ATTACHED
+.ad
+.RS 23n
+device already attached
+.sp
+The given vnd device has already been created over a data link and
+cannot be created over another one.
+.RE
+
+.sp
+.ne 2
+.na
+17 VND_E_LINKED
+.ad
+.RS 23n
+device already linked
+.sp
+The given vnd device has already been given a name and bound into the
+file system name space.
+.RE
+
+.sp
+.ne 2
+.na
+18 VND_E_BADNAME
+.ad
+.RS 23n
+invalid name
+.sp
+The requested name is not a valid name. Valid names are alphanumeric
+ascii names, along with the following ascii characters: ':', '\-', and
+\'_'. Names must be less than LIBVND_NAMELEN bytes including the null
+terminator.
+.RE
+
+.sp
+.ne 2
+.na
+19 VND_E_PERM
+.ad
+.RS 23n
+permission denied
+.sp
+A request was made from a non-global zone to manipulate a vnd device
+that belongs to a different zone.
+.RE
+
+.sp
+.ne 2
+.na
+20 VND_E_NOZONE
+.ad
+.RS 23n
+no such zone
+.sp
+A request was made which targeted a zone that did not exist.
+.RE
+
+.sp
+.ne 2
+.na
+21 VND_E_STRINIT
+.ad
+.RS 23n
+failed to initialize vnd stream module
+.sp
+During vnd device initialization, the vnd STREAMS module could not be
+pushed onto the data link's stream head.
+.RE
+
+.sp
+.ne 2
+.na
+22 VND_E_NOTATTACHED
+.ad
+.RS 23n
+device not attached
+.sp
+A request was made that requires a vnd device be attached to a data
+link, such as a call to change a property. The device was not attached
+to a data link.
+.RE
+
+.sp
+.ne 2
+.na
+23 VND_E_NOTLINKED
+.ad
+.RS 23n
+device not linked
+.sp
+A request was made to a vnd device that requires the vnd device to be
+named and present in /dev. The given device was not linked into /dev at
+the time of the call.
+.RE
+
+.sp
+.ne 2
+.na
+24 VND_E_LINKEXISTS
+.ad
+.RS 23n
+another device has the same link name
+.sp
+When trying to link a given vnd device into a zones /dev name space,
+another device already exists with the same name.
+.RE
+
+.sp
+.ne 2
+.na
+25 VND_E_MINORNODE
+.ad
+.RS 23n
+failed to create minor node
+.sp
+While trying to link a vnd device into the /devices and /dev name space,
+the call to ddi_create_minor_node() failed.
+.RE
+
+.sp
+.ne 2
+.na
+26 VND_E_BUFTOOBIG
+.ad
+.RS 23n
+requested buffer size is too large
+.sp
+The requested buffer size exceeds the maximum valid value for the given
+property.
+.RE
+
+.sp
+.ne 2
+.na
+27 VND_E_BUFTOOSMALL
+.ad
+.RS 23n
+requested buffer size is too small
+.sp
+The requested buffer size is less than the minimum buffer size. This
+generally occurs when making the buffer size less than the maximum
+transmission unit.
+.RE
+
+.sp
+.ne 2
+.na
+28 VND_E_DLEXCL
+.ad
+.RS 23n
+unable to obtain exclusive access to dlpi link, link busy
+.sp
+When a vnd device is created, it expects exclusive active access to the
+device. If any other active dlpi consumers, such as IP, are already
+using the device, then the vnd device will not be created. Passive
+consumers, such as snoop, can still use a device that has been
+exclusively opened.
+.RE
+
+.sp
+.ne 2
+.na
+28 VND_E_DIRECTNOTSUP
+.ad
+.RS 23n
+DLD direct capability not supported over data link
+.sp
+The data link that the vnd device was created over does not supported
+the DLD Direct capability. As such, the data path could not be
+initialized.
+.RE
+
+.sp
+.ne 2
+.na
+30 VND_E_BADPROPSIZE
+.ad
+.RS 23n
+invalid property size
+.sp
+The size of the data passed into vnd_prop_get or vnd_prop_set is
+incorrect and does not match the expected data size.
+.RE
+
+.sp
+.ne 2
+.na
+31 VND_E_BADPROP
+.ad
+.RS 23n
+invalid property
+.sp
+An unknown property identifier was specified. For a list of valid
+properties, see the section above entitled "PROPERTIES".
+.RE
+
+.sp
+.ne 2
+.na
+32 VND_E_PROPRDONLY
+.ad
+.RS 23n
+property is read only
+.sp
+An operation tried to update the value of a read only property. For a
+list of which properties are read only and which are readable and
+writeable, see the section above entitled "PROPERTIES".
+.RE
+
+.sp
+.ne 2
+.na
+33 VND_E_SYS
+.ad
+.RS 23n
+unexpected system error
+.sp
+This indicates that there is no vnd specific error available and that
+the system errno is valid. The system errno can be obtained and printed
+through vnd_syserrno and vnd_strsyserror. The possible values and their
+meanings are documented in Intro(2).
+.RE
+
+.sp
+.ne 2
+.na
+34 VND_E_CAPABPASS
+.ad
+.RS 23n
+capabilities invalid, pass-through module detected
+.sp
+While negotiating capabilities, a pass-through module was detected and
+the capability had to be discarded. Because of this, the data path could
+not be initialized.
+.RE
+
+
+.SH THREADING
+
+.LP
+The libvnd library is not truly MT-safe. MT-safety is provided on
+the granularity of a given vnd_handle_t. Operations on a single
+vnd_handle_t are unsafe; however, operations on different handles are
+MT-safe. If a single vnd_handle_t is used by multiple threads, it
+is the caller's responsibility to provide locking to ensure that
+multiple threads aren't simultaneously calling into libvnd on a
+single handle.
+
+
+.SH FILES
+.sp
+.ne 2
+.na
+/usr/lib/libvnd.so.1
+.ad
+.RS 27n
+shared object
+.RE
+
+.sp
+.ne 2
+.na
+/usr/lib/64/libvnd.so.1
+.ad
+.RS 27n
+64-bit shared object
+.RE
+
+.SH ATTRIBUTES
+
+.sp
+.LP
+See attributes(5) for descriptions of the following attributes:
+
+.sp
+.TS
+box;
+c | c
+l | l .
+ATTRIBUTE TYPE ATTRIBUTE VALUE
+_
+Stability Committed
+_
+MT-Level See "THREADING"
+.TE
+
+.SH SEE ALSO
+
+.sp
+.LP
+attributes(5), Intro(2), fcntl(2), Intro(3), fcntl.h(3HEAD), libdlpi(3LIB), port_create(3C), vnd(7D)
+.sp
+.LP
+vnd_close(3VND), vnd_create(3VND), vnd_errno(3VND),
+vnd_frameio_read(3VND), vnd_frameio_write(3VND), vnd_open(3VND)
+vnd_pollfd(3VND), vnd_prop_get(3VND), vnd_prop_iter(3VND),
+vnd_prop_set(3VND),
+vnd_prop_writeable(3VND), vnd_walk(3VND)
diff --git a/usr/src/man/man3vnd/Makefile b/usr/src/man/man3vnd/Makefile
new file mode 100644
index 0000000000..64abf9dcd6
--- /dev/null
+++ b/usr/src/man/man3vnd/Makefile
@@ -0,0 +1,70 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet
+# at http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014, Joyent, Inc. All rights reserved.
+#
+
+include $(SRC)/Makefile.master
+
+MANSECT= 3vnd
+
+MANFILES= vnd_create.3vnd \
+ vnd_errno.3vnd \
+ vnd_frameio_read.3vnd \
+ vnd_pollfd.3vnd \
+ vnd_prop_get.3vnd \
+ vnd_prop_iter.3vnd \
+ vnd_prop_writeable.3vnd \
+ vnd_walk.3vnd
+
+MANLINKS= frameio_t.3vnd \
+ framevec_t.3vnd \
+ vnd_close.3vnd \
+ vnd_frameio_write.3vnd \
+ vnd_open.3vnd \
+ vnd_prop_set.3vnd \
+ vnd_prop_iter_f.3vnd \
+ vnd_strerror.3vnd \
+ vnd_strsyserror.3vnd \
+ vnd_syserrno.3vnd \
+ vnd_unlink.3vnd \
+ vnd_walk_cb_f.3vnd
+
+# vnd_create.3vnd
+vnd_open.3vnd := LINKSRC = vnd_create.3vnd
+vnd_unlink.3vnd := LINKSRC = vnd_create.3vnd
+vnd_close.3vnd := LINKSRC = vnd_create.3vnd
+
+# vnd_errno.3vnd
+vnd_strerror.3vnd := LINKSRC = vnd_errno.3vnd
+vnd_syserrno.3vnd := LINKSRC = vnd_errno.3vnd
+vnd_strsyserror.3vnd := LINKSRC = vnd_errno.3vnd
+
+# vnd_frameio_read.3vnd
+vnd_frameio_write.3vnd := LINKSRC = vnd_frameio_read.3vnd
+framevec_t.3vnd := LINKSRC = vnd_frameio_read.3vnd
+frameio_t.3vnd := LINKSRC = vnd_frameio_read.3vnd
+
+# vnd_prop_get.3vnd
+vnd_prop_set.3vnd := LINKSRC = vnd_prop_get.3vnd
+
+# vnd_prop_iter.3vnd
+vnd_prop_iter_f.3vnd := LINKSRC = vnd_prop_iter.3vnd
+
+# vnd_walk.3vnd
+vnd_walk_cb_f.3vnd := LINKSRC = vnd_walk.3vnd
+
+.KEEP_STATE:
+
+include $(SRC)/man/Makefile.man
+
+install: $(ROOTMANFILES) $(ROOTMANLINKS)
diff --git a/usr/src/man/man3vnd/vnd_create.3vnd b/usr/src/man/man3vnd/vnd_create.3vnd
new file mode 100644
index 0000000000..d29237a60c
--- /dev/null
+++ b/usr/src/man/man3vnd/vnd_create.3vnd
@@ -0,0 +1,280 @@
+'\" te
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\"
+.TH VND_CREATE 3VND "Feb 21, 2014"
+
+.SH NAME
+
+vnd_create, vnd_open, vnd_unlink, vnd_close \- create, open, and destroy
+vnd devices
+
+.SH SYNOPSIS
+
+.LP
+.nf
+cc [ flag... ] file... -lvnd [ library... ]
+#include <libvnd.h>
+
+vnd_handle_t *vnd_create(const char *zonename, const char *datalink,
+ const char *linkname, vnd_errno_t *vnderr, int *syserr);
+
+vnd_handle_t *vnd_open(const char *zonename, const char *linkname,
+ vnd_errno_t *vnderr, int *syserr);
+
+int vnd_unlink(vnd_handle_t *vhp);
+
+void vnd_close(vnd_handle_t *vhp);
+.fi
+
+
+.SH DESCRIPTION
+.LP
+These functions create vnd devices, obtain handles to extant vnd
+devices, and close handles to vnd devices, for use with the rest of
+libvnd(3LIB).
+
+.LP
+The vnd_create function creates a new vnd device in the zone specified
+by zonename. The zone name argument may be null, in which case the
+caller's current zone is used instead. The vnd device and data link it
+is created over must both be in the same zone. The datalink argument
+indicates the name of the DLPI data link to create the vnd device over.
+The linkname argument indicates the name of the new vnd device. The
+linkname argument must be less than VND_NAMELEN characters long,
+excluding the null terminator. It should be an alphanumeric string. The
+only non-alphanumeric characters allowed are ':', '-', and \'_'.
+Neither the datalink argument nor linkname argument may be NULL. A
+handle to the created device is returned to the caller. Once the
+vnd_create function returns, the device can be subsequently opened with
+a call to vnd_open. The named device persists until a call to vnd_unlink
+or the containing zone is halted. Creating a vnd device requires
+PRIV_SYS_NET_CONFIG as well as PRIV_RAWACCESS. The arguments vnderr and
+syserr are used to obtain errors in the cases where the call to
+vnd_create fails. Both arguments may be NULL pointers, in which case the
+more detailed error information is discarded.
+
+.LP
+The vnd_open function opens an existing vnd device and returns a
+unique handle to that device. The vnd device to open is specified by
+both zonename and linkname. The zonename argument specifies what zone
+to look for the vnd device in. The linkname specifies the name of the
+link. The zonename argument may be NULL. If it is, the current zone is
+used. Similar to vnd_create, the integer values pointed to by the
+arguments vnderr and syserr will be filled in with additional error
+information in the cases where a call to vnd_open fails. Both
+arguments may be NULL to indicate that the error information is not
+requested, though this is not recommended.
+
+.LP
+The vnd_unlink function unlinks the vnd device specified by the vnd
+handle vhp. This unlink is similar to the use of unlink in a file
+system. After a call to unlink, the vnd device will no longer be
+accessible by callers to vnd_open and the name will be available for
+use in vnd_create. However, the device will continue to exist until
+all handles to the device have been closed.
+
+.LP
+The vnd_close function relinquishes the vnd device referenced by the
+handle vhp. After a call to vnd_close, the handle is invalidated and
+must not be used by the consumer again. The act of calling vnd_close
+on a handle does not remove the device. The device is persisted as
+long as vnd_unlink has not been called on the device or the containing
+zone has not been destroyed.
+
+.SH RETURN VALUES
+
+.LP
+Upon successful completion, the functions vnd_create and vnd_open
+return a pointer to a vnd_handle_t. This handle is used for all
+subsequent library operations. If either function fails, then a NULL
+pointer is returned and more detailed error information is filled into
+the integers pointed to by vnderr and syserr. The vnderr and syserr
+correspond to the values that would normally be returned by a call to
+vnd_errno(3VND) and vnd_syserrno(3VND). For the full list of possible
+errors see libvnd(3LIB).
+
+.LP
+The vnd_unlink function returns zero on success and -1 on failure. On
+failure, the vnd and system errnos are updated and available through
+the vnd_errno(3VND) and vnd_syserrno(3VND) functions.
+
+.LP
+The vnd_close function does not return any values nor does it set
+vnderr or syserr. The handle passed to vnd_close can no longer be
+used.
+
+.SH EXAMPLES
+.LP
+Example 1 Creating a device
+.sp
+.LP
+
+The following sample C program shows how to create a vnd device over
+an existing datalink named "net0" that other applications can open
+and use as "vnd0".
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+#include <stdio.h>
+
+int
+main(void)
+{
+ vnd_handle_t *vhp;
+ vnd_errno_t vnderr;
+ int syserr;
+
+ /* Errors are considered fatal */
+ vhp = vnd_create(NULL, "net0", "vnd0", &vnderr, &syserr);
+
+ if (vhp == NULL) {
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to create device: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to create device: %s",
+ vnd_strerror(vnderr));
+ return (1);
+ }
+
+ (void) printf("successfully created vnd0\n");
+ vnd_close(vhp);
+ return (0);
+}
+.fi
+.in -2
+
+.LP
+Example 2 Opening an existing device in another zone
+.sp
+.LP
+
+The following sample C program opens the device named "vnd1" in the zone
+named "turin" for further use.
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+#include <stdio.h>
+
+int
+main(void)
+{
+ vnd_handle_t *vhp;
+ vnd_errno_t vnderr;
+ int syserr, ret;
+
+ vhp = vnd_open("turin", "vnd1", &vnderr, &syserr);
+ if (vhp != NULL) {
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strerror(vnderr));
+ return (1);
+ }
+
+ /*
+ * Use the device vnd1 with the handle vhp with any of
+ * the other interfaces documented in libvnd(3LIB) here.
+ *
+ * After an arbitrary amount of code, the program will
+ * set the variable ret with the exit code for the
+ * program and should execute the following code before
+ * returning.
+ */
+ vnd_close(vhp);
+ return (ret);
+}
+.fi
+.in -2
+
+
+.LP
+Example 3 Removing a device
+.sp
+.LP
+
+The following sample C program removes a vnd device named vnd0. This
+program makes it so no additional programs can access the device.
+However, if anyone is actively using it, it will still exist,
+similar to calling unlink(2).
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+#include <stdio.h>
+
+int
+main(void)
+{
+ vnd_handle_t *vhp;
+ vnd_errno_t vnderr;
+ int syserr, ret;
+
+ vhp = vnd_open(NULL, "vnd0", &vnderr, &syserr);
+ if (vhp != NULL) {
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strerror(vnderr));
+ return (1);
+ }
+
+ if (vnd_unlink(vhp) != 0) {
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to unlink device: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to unlink device: %s",
+ vnd_strerror(vnderr));
+ ret = 1;
+ } else {
+ (void) printf("successfully unlinked vnd0!\n");
+ ret = 0;
+ }
+
+ vnd_close(vhp);
+ return (ret);
+}
+.fi
+.in -2
+
+.SH ATTRIBUTES
+.sp
+.LP
+See attributes(5) for descriptions of the following attributes:
+
+.sp
+.TS
+box;
+c | c
+l | l .
+ATTRIBUTE TYPE ATTRIBUTE VALUE
+_
+Stability Committed
+_
+MT-Level See "THREADING" in libvnd(3LIB)
+.TE
+
+.SH SEE ALSO
+
+libvnd(3LIB), vnd_errno(3VND), vnd_syserrno(3VND), attributes(5), privileges(5)
diff --git a/usr/src/man/man3vnd/vnd_errno.3vnd b/usr/src/man/man3vnd/vnd_errno.3vnd
new file mode 100644
index 0000000000..ddd6126dd1
--- /dev/null
+++ b/usr/src/man/man3vnd/vnd_errno.3vnd
@@ -0,0 +1,170 @@
+'\" te
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\"
+.TH VND_ERRNO 3VND "Feb 21, 2014"
+
+.SH NAME
+
+vnd_errno, vnd_syserrno, vnd_strerror, vnd_strsyserror \- obtain and
+translate vnd errors
+
+
+.SH SYNOPSIS
+
+.LP
+.nf
+cc [ flag... ] file... -lvnd [ library... ]
+#include <libvnd.h>
+
+uint32_t vnd_errno(vnd_handle_t *vhp);
+
+const char *vnd_strerror(vnd_errno_t err);
+
+int vnd_syserrno(vnd_handle_t *vhp);
+
+const char *vnd_strsyserror(int syserr);
+.fi
+
+.SH DESCRIPTION
+
+.LP
+The libvnd(3LIB) library supports a complementary array of errors that
+give more specific error information than the traditional set of
+system errors available via errno(3C). When an error occurs, consumers
+should call the vnd_errno function first and check its value. If the
+value of the vnd_errno_t is VND_E_SYS, then the system errno should be
+checked. If the vnd_errno_t is not VND_E_SYS, then the contents of the
+system errno returned from vnd_syserrno are undefined. Both the vnd
+and system errors are only valid for a given handle after a libvnd
+library function returned an error and before another libvnd library
+function is called on the same handle. The act of making an additional
+function call with the same vnd_handle_t invalidates any prior vnd or
+system error numbers. For the full list of valid vnd errors see
+libvnd(3LIB). For the full list of valid system errors, see Intro(2).
+
+.LP
+The vnd_errno and vnd_syserrno functions retrieve the most recent vnd
+and syserr error number respectively from a vnd handle vhp.
+
+.LP
+The vnd_strerror function translates a vnd_errno_t err to a
+corresponding string and returns a pointer to that constant string.
+
+.LP
+The vnd_syserrno function is analogous to the vnd_strerror function,
+except that it translates a system error back to a string.
+
+
+.SH RETURN VALUES
+
+.LP
+The vnd_errno function returns a vnd_errno_t which contains the vnd
+error information.
+
+.LP
+The vnd_syserror function returns an integer which contains the system
+error information. These values are the same as those returned by
+errno(3C).
+
+.LP
+The vnd_strerror function returns a pointer to a constant string. If
+the error passed in is unknown, the string "unknown error" is
+returned.
+
+.LP
+The vnd_strsyserror function returns a pointer to the translated
+constant string. If an unknown error number is passed, it returns the
+string "Unknown error". If an error occurs, it returns a NULL pointer.
+
+.SH EXAMPLES
+
+.LP
+Example 1 Obtaining errors from a vnd_handle_t
+
+.sp
+.LP
+The following sample C function, which can be incorporated into a larger
+program, shows how to obtain the vnd and system errors from a
+vnd_handle_t after a vnd interface on a handle failed.
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+
+static void
+print_errnos(vnd_handle_t *vhp)
+{
+ vnd_errno_t vnderr;
+ int syserr;
+
+ vnderr = vnd_errno(vhp);
+ syserr = vnd_syserrno(vhp);
+
+ (void) printf("vnd err: %d, sys err: %d\n",
+ vnderr, syserr);
+}
+.fi
+.in -2
+
+.LP
+Example 2 A perror-like function
+
+.sp
+.LP
+The following sample C function which can be incorporated into a
+larger program shows how to write a perror-like function to print
+out error messages for a vnd device.
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+
+static void
+sample_perror(const char *msg, vnd_error_t vnderr, int syserr)
+{
+ (void) fprintf(stderr, "%s: %s", msg,
+ vnderr != VND_E_SYS ? vnd_strerror(vnderr) :
+ vnd_strsyserror(syserr));
+}
+.fi
+.in -2
+
+.SH ATTRIBUTES
+.sp
+.LP
+See attributes(5) for descriptions of the following attributes:
+
+.sp
+.TS
+box;
+c | c
+l | l .
+ATTRIBUTE TYPE ATTRIBUTE VALUE
+_
+Stability Committed
+_
+MT-Level See below
+.TE
+
+.LP
+The MT-Level of the functions vnd_strerror and vnd_strsyserror is
+MT-Safe. See "THREADING" in libvnd(3LIB) for a discussion of the
+MT-Level of vnd_errno and vnd_syserrno.
+
+
+.SH SEE ALSO
+
+Intro(2), errno(3C), libvnd(3LIB), attributes(5)
diff --git a/usr/src/man/man3vnd/vnd_frameio_read.3vnd b/usr/src/man/man3vnd/vnd_frameio_read.3vnd
new file mode 100644
index 0000000000..5fc65c96a3
--- /dev/null
+++ b/usr/src/man/man3vnd/vnd_frameio_read.3vnd
@@ -0,0 +1,705 @@
+'\" te
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\"
+.TH VND_FRAMEIO_READ 3VND "Mar 06, 2014"
+
+.SH NAME
+
+vnd_frameio_read, vnd_frameio_write \- perform framed I/O to a vnd device
+
+.SH SYNOPSIS
+
+.LP
+.nf
+cc [ flag... ] file... -lvnd [ library... ]
+#include <libvnd.h>
+
+int vnd_frameio_read(vnd_handle_t *vhp, frameio_t *fiop);
+
+int vnd_frameio_write(vnd_handle_t *vhp, frameio_t *fiop);
+.fi
+
+.SH DESCRIPTION
+.LP
+Framed I/O is a general means to manipulate data that is inherently
+framed, meaning that there is a maximum frame size, but the data may
+often be less than that size. As an example, an Ethernet device's MTU
+describes the maximum frame size, but the size of an individual frame
+is often much less. You can read a single frame at a time, or you can
+read multiple frames in a single call.
+
+In addition, framed I/O allows the consumer to break individual frames
+into a series of vectors. This is analogous to the use of an iovec(9S)
+with readv(2) and writev(2).
+
+vnd_frameio_read performs a framed I/O read of the device represented by
+the handle vhp, with the framed I/O data described by fiop.
+vnd_frameio_write works in the same manner, except performing a write
+instead of a read.
+
+.LP
+The basic vector component of the frameio_t is the framevec_t. Each
+framevec_t represents a single vector entry. An array of these is
+present in the frameio_t. The framevec_t structure has the following
+members:
+
+.in +2
+.nf
+void *fv_buf /* data buffer */
+size_t fv_buflen; /* total size of buffer */
+size_t fv_actlen; /* amount of buffer consumed */
+.fi
+.in -2
+
+.LP
+The fv_buf member points to a buffer which contains the data for this
+individual vector. When reading, data is consumed from fv_buf. When
+writing, data is written into fv_buf.
+
+The fv_buflen should indicate the total amount of data that is in the
+buffer. When reading, it indicates the size of the buffer. It must be
+set prior to calling vnd_frameio_read(). When writing, it indicates the
+amount of data that is valid in the buffer.
+
+The fv_actlen is a read-only member. It is set on successful return of
+the functions vnd_frameio_read and vnd_frameio_write. When reading, it
+is updated with the amount of data that was read into fv_buf. When
+writing, it is instead updated with the amount of data from fv_buf that
+was actually consumed. Generally when writing data, a framevec_t will
+either be entirely consumed or it will not be consumed at all.
+
+
+.LP
+A series of framevec_t's is encapsulated in a frameio_t. The frameio_t
+structure has the following members:
+
+.in +2
+.nf
+uint_t fio_version; /* current version */
+uint_t fio_nvpf; /* number of vectors in one frame */
+uint_t fio_nvecs; /* The total number of vectors */
+framevec_t fio_vecs[]; /* vectors */
+.fi
+.in -2
+
+.LP
+The fio_version member represents the current version of the frameio_t.
+The fio_version should be set to the macro FRAMEIO_CURRENT_VERSION,
+which is currently 1.
+
+The members fio_nvpf and fio_nvecs describe the number of frames that
+exist. fio_nvecs describes the total number of vectors that are present
+in fio_vecs. The upper bound on this is described by FRAMEIO_NVECS_MAX
+which is currently 32. fio_nvpf describe the number of vectors that
+should be used to make up each frame. By setting fio_vecs to be an even
+multiple of fio_nvpf, multiple frames can be read or written in a single
+call.
+
+After a call to vnd_frameio_read or vnd_frameio_write fio_nvecs is
+updated with total number of vectors read or written to. This value can
+be divided by fio_nvpf to determine the total number of frames that were
+written or read.
+
+.LP
+Each frame can be broken down into a series of multiple vectors. As an
+example, someone might want to break Ethernet frames into mac headers
+and payloads. The value of fio_nvpf would be set to two, to indicate
+that a single frame consists of two different vector components. The
+member fio_nvecs describes the total number of frames. As such, the
+value of fio_vecs divided by fio_nvpf describes the total number of
+frames that can be consumed in one call. As a result of this, fio_nvpf
+must evenly divide fio_vecs. If fio_nvpf is set to two and
+fio_nvecs is set to ten, then a total of five frames can be processed
+at once, each frame being broken down into two different vector
+components.
+
+A given frame will never overflow the number of vectors described by
+fio_nvpf. Consider the case where each vector component has a buffer
+sized to 1518 bytes, fio_nvpf is set to one, and fio_nvecs is set to
+three. If a call to vnd_frameio_read is made and four 500 byte Ethernet
+frames come in, then each frame will be mapped to a single vector. The
+500 bytes will be copied into fio_nvecs[i]->fio_buf and
+fio_nvecs[i]->fio_actlen will be set to 500. To contrast this, if
+readv(2) had been called, the first three frames would all be in the
+first iov and the fourth frame's first eight bytes would be in the first
+iov and the remaining in the second.
+
+.LP
+The user must properly initialize fio_nvecs framevec_t's worth of the
+fio_vecs array. When multiple vectors comprise a frame, fv_buflen data
+is consumed before moving onto the next vector. Consider the case
+where the user wants to break a vector into three different
+components, an 18 byte vector for an Ethernet VLAN header, a 20 byte
+vector for an IPv4 header, and a third 1500 byte vector for the
+remaining payload. If a frame was received that only had 30 bytes,
+then the first 18 bytes would fill up the first vector, the remaining
+12 bytes would fill up the IPv4 header. If instead a 524 byte frame
+came in, then the first 18 bytes would be placed in the first vector,
+the next 24 bytes would be placed in the next vector, and the remaining
+500 bytes in the third.
+
+.LP
+The functions vnd_frameio_read and vnd_frameio_write operate in both
+blocking and non-blocking mode. If either O_NONBLOCK or O_NDELAY have
+been set on the file descriptor, then the I/O will behave in
+non-blocking mode. When in non-blocking mode, if no data is available
+when vnd_frameio_read is called, EAGAIN is returned. When
+vnd_frameio_write is called in non-blocking mode, if sufficient buffer
+space to hold all of the output frames is not available, then
+vnd_frameio_write will return EAGAIN. To know when the given vnd device
+has sufficient space, the device fires POLLIN/POLLRDNORM when data is
+available for read and POLLOUT/POLLRDOUT when space in the buffer has
+opened up for write. These events can be watched for through
+port_associate(3C) and similar routines with a file descriptor returned
+from vnd_polfd(3VND).
+
+.LP
+When non-blocking mode is disabled, calls to vnd_frameio_read will
+block until some amount of data is available. Calls to
+vnd_frameio_write will block until sufficient buffer space is
+available.
+
+.LP
+Similar to read(2) and write(2), vnd_frameio_read and
+vnd_frameio_write make no guarantees about the ordering of data when
+multiple threads simultaneously call the interface. While the data
+itself will be atomic, the ordering of multiple simultaneous calls is
+not defined.
+
+.SH RETURN VALUES
+
+.LP
+The vnd_frameio_read function returns zero on success. The member
+fio_nvecs of fiop is updated with the total number of vectors that had
+data read into them. Each updated framevec_t will have the buffer
+pointed to by fv_buf filled in with data, and fv_actlen will be
+updated with the amount of valid data in fv_buf.
+
+.LP
+The vnd_frameio_write function returns zero on success. The member
+fio_nvecs of fiop is updated with the total number of vectors that
+were written out to the underlying datalink. The fv_actlen of each
+vector is updated to indicate the amount of data that was written from
+that buffer.
+
+.LP
+On failure, both vnd_frameio_read and vnd_frameio_write return -1. The
+vnd and system error numbers are updated and available via
+vnd_errno(3VND) and vnd_syserrno(3VND). See ERRORS below for a list of
+errors and their meaning.
+
+
+.SH ERRORS
+.LP
+The functions vnd_frameio_read and vnd_frameio_write always set the
+vnd error to VND_E_SYS. The following system errors will be
+encountered:
+
+.sp
+.ne 2
+.na
+EAGAIN
+.ad
+.RS 10n
+Insufficient system memory was available for the operation.
+.sp
+Non-blocking mode was enabled and during the call to vnd_frameio_read,
+no data was available. Non-blocking mode was enabled and during the call
+to vnd_frameio_write, insufficient buffer space was available.
+.RE
+
+.sp
+.ne 2
+.na
+ENXIO
+.ad
+.RS 10n
+The vnd device referred to by vhp is not currently attached to an
+underlying data link and cannot send data.
+.RE
+
+.sp
+.ne 2
+.na
+EFAULT
+.ad
+.RS 10n
+The fiop argument points to an illegal address or the fv_buf members of
+the framevec_t's associated with the fiop member fio_vecs point to
+illegal addresses.
+.RE
+
+.sp
+.ne 2
+.na
+EINVAL
+.ad
+.RS 10n
+The fio_version member of fiop was unknown, the number of vectors
+specified by fio_nvecs is zero or greater than FRAMEIO_NVECS_MAX,
+fio_nvpf equals zero, fio_nvecs is not evenly divisible by fio_nvpf, or
+a buffer in fio_vecs[] has set fv_buf or fv_buflen to zero.
+.RE
+
+
+.sp
+.ne 2
+.na
+EINTR
+.ad
+.RS 10n
+A signal was caught during vnd_frameio_read or vnd_frameio_write, and no
+data was transferred.
+.RE
+
+
+.sp
+.ne 2
+.na
+EOVERFLOW
+.ad
+.RS 10n
+During vnd_frameio_read, the size of a frame specified by fiop->fio_nvpf
+and fiop->fio_vecs[].fv_buflen cannot contain a frame.
+.sp
+In a ILP32 environment, more data than UINT_MAX would be set in
+fv_actlen.
+.RE
+
+
+.sp
+.ne 2
+.na
+ERANGE
+.ad
+.RS 10n
+During vnd_frameio_write, the size of a frame is less than the device's
+minimum transmission unit or it is larger than the size of the maximum
+transmission unit.
+.RE
+
+
+.SH EXAMPLES
+
+.LP
+Example 1 Read a single frame with a single vector
+
+.sp
+.LP
+The following sample C program opens an existing vnd device named
+"vnd0" in the current zone and performs a blocking read of a single
+frame from it.
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+#include <stdio.h>
+
+int
+main(void)
+{
+ vnd_handle_t *vhp;
+ vnd_errno_t vnderr;
+ int syserr, i;
+ frameio_t *fiop;
+
+ fiop = malloc(sizeof (frameio_t) + sizeof (framevec_t));
+ if (fiop == NULL) {
+ perror("malloc frameio_t");
+ return (1);
+ }
+ fiop->fio_version = FRAMEIO_CURRENT_VERSION;
+ fiop->fio_nvpf = 1;
+ fiop->fio_nvecs = 1;
+ fiop->fio_vecs[0].fv_buf = malloc(1518);
+ fiop->fio_vecs[0].fv_buflen = 1518;
+ if (fiop->fio_vecs[0].fv_buf == NULL) {
+ perror("malloc framevec_t.fv_buf");
+ free(fiop);
+ return (1);
+ }
+
+ vhp = vnd_open(NULL, "vnd1", &vnderr, &syserr);
+ if (vhp != NULL) {
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strerror(vnderr));
+ free(fiop->fio_vecs[0].fv_buf);
+ free(fiop);
+ return (1);
+ }
+
+ if (frameio_read(vhp, fiop) != 0) {
+ vnd_errno_t vnderr = vnd_errno(vhp);
+ int syserr = vnd_syserrno(vhp);
+
+ /* Most consumers should retry on EINTR */
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to read: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to read: %s",
+ vnd_strerror(vnderr));
+ vnd_close(vhp);
+ free(fiop->fio_vecs[0].fv_buf);
+ free(fiop);
+ return (1);
+ }
+
+
+ /* Consume the data however it's desired */
+ (void) printf("received %d bytes\n", fiop->fio_vecs[0].fv_actlen);
+ for (i = 0; i < fiop->fio_vecs[0].fv_actlen)
+ (void) printf("%x ", fiop->fio_vecs[0].fv_buf[i]);
+
+ vnd_close(vhp);
+ free(fiop->fio_vecs[0].fv_buf);
+ free(viop);
+ return (0);
+}
+.fi
+.in -2
+
+.LP
+Example 2 Write a single frame with a single vector
+.sp
+.LP
+The following sample C program opens an existing vnd device named
+"vnd0" in the current zone and performs a blocking write of a single
+frame to it.
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+#include <stdio.h>
+#include <string.h>
+
+int
+main(void)
+{
+ vnd_handle_t *vhp;
+ vnd_errno_t vnderr;
+ int syserr;
+ frameio_t *fiop;
+
+ fiop = malloc(sizeof (frameio_t) + sizeof (framevec_t));
+ if (fiop == NULL) {
+ perror("malloc frameio_t");
+ return (1);
+ }
+ fiop->fio_version = FRAMEIO_CURRENT_VERSION;
+ fiop->fio_nvpf = 1;
+ fiop->fio_nvecs = 1;
+ fiop->fio_vecs[0].fv_buf = malloc(1518);
+ if (fiop->fio_vecs[0].fv_buf == NULL) {
+ perror("malloc framevec_t.fv_buf");
+ free(fiop);
+ return (1);
+ }
+
+ /*
+ * Fill in your data however you desire. This is an entirely
+ * invalid frame and while the frameio write may succeed, the
+ * networking stack will almost certainly drop it.
+ */
+ (void) memset(fiop->fio_vecs[0].fv_buf, 'r', 1518);
+ fiop->fio_vecs[0].fv_buflen = 1518;
+
+ vhp = vnd_open(NULL, "vnd0", &vnderr, &syserr);
+ if (vhp != NULL) {
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strerror(vnderr));
+ free(fiop->fio_vecs[0].fv_buf);
+ free(fiop);
+ return (1);
+ }
+
+ if (frameio_write(vhp, fiop) != 0) {
+ /* Most consumers should retry on EINTR */
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to write: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to write: %s",
+ vnd_strerror(vnderr));
+ vnd_close(vhp);
+ free(fiop->fio_vecs[0].fv_buf);
+ free(fiop);
+ return (1);
+ }
+
+
+ (void) printf("wrote %d bytes\n", fiop->fio_vecs[0].fv_actlen);
+
+ vnd_close(vhp);
+ free(fiop->fio_vecs[0].fv_buf);
+ free(viop);
+ return (0);
+}
+.fi
+.in -2
+
+.LP
+Example 3 Read frames comprised of multiple vectors
+.sp
+.LP
+The following sample C program is similar to example 1, except instead
+of reading a single frame consisting of a single vector it reads
+multiple frames consisting of two vectors. The first vector has room for
+an 18 byte VLAN enabled Ethernet header and the second vector has room
+for a 1500 byte payload.
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+#include <stdio.h>
+
+int
+main(void)
+{
+ vnd_handle_t *vhp;
+ vnd_errno_t vnderr;
+ int syserr, i, nframes;
+ frameio_t *fiop;
+
+ /* Allocate enough framevec_t's for 5 frames */
+ fiop = malloc(sizeof (frameio_t) + sizeof (framevec_t) * 10);
+ if (fiop == NULL) {
+ perror("malloc frameio_t");
+ return (1);
+ }
+ fiop->fio_version = FRAMEIO_CURRENT_VERSION;
+ fiop->fio_nvpf = 2;
+ fiop->fio_nvecs = 10;
+ for (i = 0; i < 10; i += 2) {
+ fiop->fio_vecs[i].fv_buf = malloc(18);
+ fiop->fio_vecs[i].fv_buflen = 18;
+ if (fiop->fio_vecs[i].fv_buf == NULL) {
+ perror("malloc framevec_t.fv_buf");
+ /* Perform appropriate memory cleanup */
+ return (1);
+ }
+ fiop->fio_vecs[i+1].fv_buf = malloc(1500);
+ fiop->fio_vecs[i+1].fv_buflen = 1500;
+ if (fiop->fio_vecs[i+1].fv_buf == NULL) {
+ perror("malloc framevec_t.fv_buf");
+ /* Perform appropriate memory cleanup */
+ return (1);
+ }
+ }
+
+ vhp = vnd_open(NULL, "vnd1", &vnderr, &syserr);
+ if (vhp != NULL) {
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strerror(vnderr));
+ /* Perform appropriate memory cleanup */
+ return (1);
+ }
+
+ if (frameio_read(vhp, fiop) != 0) {
+ /* Most consumers should retry on EINTR */
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to read: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to read: %s",
+ vnd_strerror(vnderr));
+ vnd_close(vhp);
+ /* Perform appropriate memory cleanup */
+ return (1);
+ }
+
+ /* Consume the data however it's desired */
+ nframes = fiop->fio_nvecs / fiop->fio_nvpf;
+ (void) printf("consumed %d frames!\n", nframes);
+ for (i = 0; i < nframes; i++) {
+ (void) printf("received %d bytes of Ethernet Header\n",
+ fiop->fio_vecs[i].fv_actlen);
+ (void) printf("received %d bytes of payload\n",
+ fiop->fio_vecs[i+1].fv_actlen);
+ }
+
+ vnd_close(vhp);
+ /* Do proper memory cleanup */
+ return (0);
+}
+.fi
+.in -2
+
+.LP
+Example 4 Perform non-blocking reads of multiple frames with a
+single vector
+.sp
+.LP
+In this sample C program, opens an existing vnd device named "vnd0" in
+the current zone, ensures that it is in non-blocking mode, and uses
+event ports to do device reads.
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+#include <stdio.h>
+#include <port.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/tpyes.h>
+#include <fcntl.h>
+
+int
+main(void)
+{
+ vnd_handle_t *vhp;
+ vnd_errno_t vnderr;
+ int syserr, i, nframes, port, vfd;
+ frameio_t *fiop;
+
+ port = port_create();
+ if (port < 0) {
+ perror("port_create");
+ return (1);
+ }
+ /* Allocate enough framevec_t's for 10 frames */
+ fiop = malloc(sizeof (frameio_t) + sizeof (framevec_t) * 10);
+ if (fiop == NULL) {
+ perror("malloc frameio_t");
+ (void) close(port);
+ return (1);
+ }
+ fiop->fio_version = FRAMEIO_CURRENT_VERSION;
+ fiop->fio_nvpf = 1;
+ fiop->fio_nvecs = 10;
+ for (i = 0; i < 10; i++) {
+ fiop->fio_vecs[i].fv_buf = malloc(1518);
+ fiop->fio_vecs[i].fv_buflen = 1518;
+ if (fiop->fio_vecs[i].fv_buf == NULL) {
+ perror("malloc framevec_t.fv_buf");
+ /* Perform appropriate memory cleanup */
+ (void) close(port);
+ return (1);
+ }
+ }
+
+ vhp = vnd_open(NULL, "vnd1", &vnderr, &syserr);
+ if (vhp != NULL) {
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strerror(vnderr));
+ /* Perform appropriate memory cleanup */
+ (void) close(port);
+ return (1);
+ }
+ vfd = vnd_pollfd(vhp);
+ if (fcntl(fd, F_SETFL, O_NONBLOCK) != 0) {
+ (void) fprintf(stderr, "failed to enable non-blocking mode: %s",
+ strerrror(errno));
+ }
+
+ for (;;) {
+ port_event_t pe;
+
+ if (port_associate(port, PORT_SOURCE_FD, vfd, POLLIN,
+ vhp) != 0) {
+ perror("port_associate");
+ vnd_close(vhp);
+ /* Perform appropriate memory cleanup */
+ (void) close(port);
+ return (1);
+ }
+
+ if (port_get(port, &pe, NULL) != 0) {
+ if (errno == EINTR)
+ continue;
+ perror("port_associate");
+ vnd_close(vhp);
+ /* Perform appropriate memory cleanup */
+ (void) close(port);
+ return (1);
+ }
+
+ /*
+ * Most real applications will need to compare the file
+ * descriptor and switch on it. In this case, assume
+ * that the fd in question that is readable is 'vfd'.
+ */
+ if (frameio_read(pe.portev_user, fiop) != 0) {
+ vnd_errno_t vnderr = vnd_errno(vhp);
+ int syserr = vnd_syserrno(vhp);
+
+ if (vnderr == VND_E_SYS && (syserr == EINTR ||
+ syserr == EAGAIN))
+ continue;
+ (void) fprintf(stderr, "failed to get read: %s",
+ vnd_strsyserror(vnderr));
+ vnd_close(vhp);
+ /* Perform appropriate memory cleanup */
+ (void) close(port);
+ return (1);
+ }
+
+ /* Consume the data however it's desired */
+ nframes = fiop->fio_nvecs / fiop->fio_nvpf;
+ for (i = 0; i < nframes; i++) {
+ (void) printf("frame %d is %d bytes large\n", i,
+ fiop->fio_vecs[i].fv_actlen);
+ }
+
+ }
+
+ vnd_close(vhp);
+ /* Do proper memory cleanup */
+ return (0);
+}
+.fi
+.in -2
+
+.SH ATTRIBUTES
+.LP
+See attributes(5) for descriptions of the following attributes:
+
+.sp
+.TS
+box;
+c | c
+l | l .
+ATTRIBUTE TYPE ATTRIBUTE VALUE
+_
+Stability Committed
+_
+MT-Level See "THREADING" in libvnd(3LIB)
+.TE
+
+
+.SH SEE ALSO
+
+Intro(2), getmsg(2), read(2), readv(2), write(2), writev(2),
+libvnd(3VND), vnd_errno(3VND), vnd_pollfd(3VND), vnd_syserrno(3VND),
+iovec(9S)
diff --git a/usr/src/man/man3vnd/vnd_pollfd.3vnd b/usr/src/man/man3vnd/vnd_pollfd.3vnd
new file mode 100644
index 0000000000..500d3bac99
--- /dev/null
+++ b/usr/src/man/man3vnd/vnd_pollfd.3vnd
@@ -0,0 +1,155 @@
+'\" te
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\"
+.TH VND_POLLFD 3VND "Feb 21, 2014"
+
+.SH NAME
+
+vnd_pollfd \- get file descriptor for polling
+
+.SH SYNOPSIS
+
+.LP
+.nf
+cc [ flag... ] file... -lvnd [ library... ]
+#include <libvnd.h>
+
+int vnd_pollfd(vnd_handle_t *vhp);
+.fi
+
+.SH DESCRIPTION
+.LP
+The vnd_pollfd() function returns an integer id which corresponds to
+the file descriptor that represents the underlying device that is
+associated with the vnd handle vhp. This file descriptor is suitable
+for use with port_associate(3C) and similar polling techniques such as
+poll(2). Use of the file descriptor outside of these uses may cause
+undocumented behavior from the rest of the library.
+
+.LP
+The file descriptor in question is still managed by libvnd. The caller
+must not call close(2) on it. Once vnd_close(3VND) has been called,
+any further use of the file descriptor is undefined behavior.
+
+
+.SH RETURN VALUES
+.LP
+The function returns the integer id of the file descriptor that
+corresponds to the underlying vnd device.
+
+.SH EXAMPLES
+
+.LP
+Example 1 Use event ports for vnd notifications
+.sp
+.LP
+The following sample C program shows how to use the vnd_pollfd
+function with event ports to be notified whenever there is data
+available to be read. This program assumes that a vnd device named
+"vnd0" exists in the current zone. For an example of creating the
+device, see Example 1 in vnd_create(3VND).
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+#include <port.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+int
+main(void)
+{
+ vnd_handle_t *vhp;
+ vnd_errno_t vnderr;
+ int port, syserr, vfd, ret;
+
+ port = port_create();
+ if (port < 0) {
+ perror("port_create");
+ return (1);
+ }
+
+ vhp = vnd_open(NULL, "vnd0", &vnderr, &syserr);
+ if (vhp == NULL) {
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strerror(vnderr));
+ (void) close(port);
+ return (1);
+ }
+
+ vfd = vnd_pollfd(vhp);
+ if (fcntl(vfd, F_SETFL, O_NONBLOCK) != 0) {
+ perror("fcntl");
+ vnd_close(vhp);
+ (void) close(port);
+ return (1);
+ }
+
+ if (port_associate(port, PORT_SOURCE_FD, vfd, POLLIN, NULL) != 0) {
+ perror("port_associate");
+ vnd_close(vhp);
+ (void) close(port);
+ return (1);
+ }
+
+ for (;;) {
+ port_event_t pe;
+
+
+ if (port_get(port, &pe, NULL) != 0) {
+ if (errno == EINTR)
+ continue;
+ perror("port_get");
+ vnd_close(vhp);
+ (void) close(port);
+ return (1);
+ }
+
+ /*
+ * Read the data with vnd_frameio_read(3VND) and
+ * optionally break out of the loop or continue to the
+ * next iteration and reassociate vfd with the event
+ * port.
+ */
+ }
+}
+.fi
+.in -2
+
+.SH ATTRIBUTES
+.sp
+.LP
+See attributes(5) for descriptions of the following attributes:
+
+.sp
+.TS
+box;
+c | c
+l | l .
+ATTRIBUTE TYPE ATTRIBUTE VALUE
+_
+Stability Committed
+_
+MT-Level See "THREADING" in libvnd(3LIB)
+.TE
+
+.SH SEE ALSO
+
+close(2), poll(2), port_create(3C), libvnd(3LIB), vnd_close(3VND)
diff --git a/usr/src/man/man3vnd/vnd_prop_get.3vnd b/usr/src/man/man3vnd/vnd_prop_get.3vnd
new file mode 100644
index 0000000000..e47698c85e
--- /dev/null
+++ b/usr/src/man/man3vnd/vnd_prop_get.3vnd
@@ -0,0 +1,242 @@
+'\" te
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\"
+.TH VND_PROP_GET 3VND "Feb 21, 2014"
+
+.SH NAME
+
+vnd_prop_get, vnd_prop_set \- get and set vnd properties
+
+.SH SYNOPSIS
+
+.LP
+.nf
+cc [ flag... ] file... -lvnd [ library... ]
+#include <libvnd.h>
+
+int vnd_prop_get(vnd_handle_t *vhp, vnd_prop_t prop, void *buf, size_t len);
+
+int vnd_prop_set(vnd_handle_t *vhp, vnd_prop_t prop, void *buf, size_t len);
+.fi
+
+.SH DESCRIPTION
+.LP
+The vnd_prop_get and vnd_prop_set functions are used to retrieve
+and set property values on the vnd_handle_t referred to by vhp. The
+property to get or set is specified by the argument prop. The
+argument buf and the size of buf, in len, should be a pointer to the
+appropriate structure for the property as defined in libvnd(3LIB).
+
+.LP
+All of the supported properties are listed and described in the
+libvnd(3LIB) manual page.
+
+
+.SH RETURN VALUES
+.LP
+On success, the vnd_prop_get and vnd_prop_set functions return zero.
+On failure, they return -1 and additional error information is
+available through vnd_errno(3VND) and vnd_syserrno(3VND).
+
+.LP
+When vnd_prop_get returns successfully, the contents of buf are
+filled in with the value of the corresponding property. The contents
+of buf should not change across a call to vnd_prop_set.
+
+.SH EXAMPLES
+
+.LP
+Example 1 Getting the value of the rxbuf property
+.LP
+The following sample C program retrieves the value of the
+rxbuf property and prints it to standard out.
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+#include <stdio.h>
+
+int
+main(void)
+{
+ vnd_handle_t *vhp;
+ vnd_errno_t vnderr;
+ int syserr;
+ vnd_prop_buf_t vpb;
+
+ vhp = vnd_open(NULL, "vnd1", &vnderr, &syserr);
+ if (vhp != NULL) {
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strerror(vnderr));
+ return (1);
+ }
+
+ if (vnd_prop_get(vhp, VND_PROP_RXBUF, &vpn, sizeof (vpn)) != 0) {
+ vnderr = vnd_errno(vhp);
+ syserr = vnd_syserrno(vhp);
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to get VND_PROP_RXBUF: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to get VND_PROP_RXBUF: %s",
+ vnd_strerror(vnderr));
+ return (1);
+ }
+
+ (void) printf("recieve buffer size is %d bytes\n", vpb.vpb_size);
+
+ vnd_close(vnd);
+ return (0);
+}
+.fi
+.in -2
+
+.LP
+EXAMPLE 2 Setting a property
+.LP
+This sample C program sets the property VND_PROP_RXBUF to the value of
+4200 bytes.
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+#include <stdio.h>
+
+int
+main(void)
+{
+ vnd_handle_t *vhp;
+ vnd_errno_t vnderr;
+ int syserr;
+ vnd_prop_buf_t vpb;
+
+ vhp = vnd_open(NULL, "vnd1", &vnderr, &syserr);
+ if (vhp != NULL) {
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strerror(vnderr));
+ return (1);
+ }
+
+ vpb.vpb_size = 4200;
+ if (vnd_prop_set(vhp, VND_PROP_RXBUF, &vpb, sizeof (vpb)) != 0) {
+ vnderr = vnd_errno(vhp);
+ syserr = vnd_syserrno(vhp);
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to set VND_PROP_RXBUF: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to set VND_PROP_RXBUF: %s",
+ vnd_strerror(vnderr));
+ return (1);
+ }
+
+ (void) printf("successfully set VND_PROP_RXBUF to 4200\n");
+
+ vnd_close(vnd);
+ return (0);
+}
+.fi
+.in -2
+
+.LP
+Example 3 Setting a property to the value of another.
+.LP
+In this sample C program, we set the VND_PROP_TXBUF to the maximum
+allowable size as determined by the read-only property VND_PROP_MAXBUF.
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+#include <stdio.h>
+
+int
+main(void)
+{
+ vnd_handle_t *vhp;
+ vnd_errno_t vnderr;
+ int syserr;
+ vnd_prop_buf_t vpb;
+
+ vhp = vnd_open(NULL, "vnd1", &vnderr, &syserr);
+ if (vhp != NULL) {
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strerror(vnderr));
+ return (1);
+ }
+
+ if (vnd_prop_get(vhp, VND_PROP_MAXBUF, &vpb, sizeof (vpb)) != 0) {
+ vnderr = vnd_errno(vhp);
+ syserr = vnd_syserrno(vhp);
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to get VND_PROP_MAXBUF: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to get VND_PROP_MAXBUF: %s",
+ vnd_strerror(vnderr));
+ return (1);
+ }
+
+ if (vnd_prop_set(vhp, VND_PROP_TXBUF, &vpb, sizeof (vpb)) != 0) {
+ vnderr = vnd_errno(vhp);
+ syserr = vnd_syserrno(vhp);
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to set VND_PROP_TXBUF: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to set VND_PROP_TXBUF: %s",
+ vnd_strerror(vnderr));
+ return (1);
+ }
+
+ (void) printf("successfully set VND_PROP_TXBUF to %d\n", vpb.vpb_size);
+
+ vnd_close(vnd);
+ return (0);
+}
+.fi
+
+.SH ATTRIBUTES
+.sp
+.LP
+See attributes(5) for descriptions of the following attributes:
+
+.sp
+.TS
+box;
+c | c
+l | l .
+ATTRIBUTE TYPE ATTRIBUTE VALUE
+_
+Stability Committed
+_
+MT-Level See "THREADING" in libvnd(3LIB)
+.TE
+
+.SH SEE ALSO
+libvnd(3VND), vnd_errno(3VND, vnd_syserrno(3VND)
diff --git a/usr/src/man/man3vnd/vnd_prop_iter.3vnd b/usr/src/man/man3vnd/vnd_prop_iter.3vnd
new file mode 100644
index 0000000000..29221734c5
--- /dev/null
+++ b/usr/src/man/man3vnd/vnd_prop_iter.3vnd
@@ -0,0 +1,148 @@
+'\" te
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\"
+.TH VND_PROP_ITER 3VND "Feb 21, 2014"
+
+.SH NAME
+
+vnd_prop_iter \- iterate vnd properties
+
+.SH SYNOPSIS
+
+.LP
+.nf
+cc [ flag... ] file... -lvnd [ library... ]
+#include <libvnd.h>
+
+typedef int (vnd_prop_iter_f)(vnd_handle_t *vhp, vnd_prop_t prop,
+ void *cbarg);
+
+int vnd_prop_iter(vnd_handle_t *vhp, vnd_prop_iter_f cb,
+ void *arg);
+.fi
+
+.SH DESCRIPTION
+.LP
+The vnd_prop_iter function iterates over all the available properties
+for the vnd handle vhp and calls the user supplied callback function
+cb. The argument arg is passed directly to the callback function.
+
+.LP
+The function specified by cb receives three arguments. The first, vhp,
+is the same vnd library handle that was passed to vnd_prop_iter. During
+the callback, the consumer should not call vnd_close(3VND). Doing so
+will lead to undefined and undocumented behavior. The second argument,
+prop, is the current property. While vnd_prop_iter guarantees that all
+properties will be recieved, it does not guarantee the order of them.
+The final argument, cbarg, is the same argument that the caller passed
+in during arg.
+
+.LP
+The return value of the callback function cb indicates whether or not
+property iteration should continue. To continue iteration, the
+function cb should return zero. Otherwise, to stop property iteration
+it should return non-zero.
+
+.SH RETURN VALUES
+
+.LP
+On success, the function vnd_prop_iter returns zero. If the callback
+function returned non-zero to terminate iteration, vnd_prop_iter will
+instead return one. In the case of library failure, vnd_prop_iter will
+return -1. In such cases, the vnd and system errors will be updated
+and available via vnd_errno(3VND) and vnd_syserrno(3VND).
+
+.SH EXAMPLES
+
+.LP
+Example 1 Print writeable properties
+
+.LP
+The following sample C program walks over every vnd property and
+prints out whether the property is read-only or read-write for the
+vnd device "vnd1" in the current zone.
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+static int
+print_prop(vnd_handle_t *vhp, vnd_prop_t prop, void *unused)
+{
+ boolean_t canwrite;
+
+ if (vnd_prop_writeable(vhp, &canwrite) != 0)
+ abort();
+
+ (void) printf("prop %d is %s", prop, canwrite == B_TRUE ? "rw" : "r-");
+ return (0);
+}
+
+int
+main(void)
+{
+ vnd_handle_t *vhp;
+ vnd_errno_t vnderr;
+ int syserr;
+
+ vhp = vnd_open(NULL, "vnd1", &vnderr, &syserr);
+ if (vhp != NULL) {
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strerror(vnderr));
+ return (1);
+ }
+
+ if (vnd_prop_iter(vhp, print_prop, NULL) != 0) {
+ vnderr = vnd_errno(vhp);
+ syserr = vnd_syserrno(vhp);
+ if (vnderr == VND_E_SYS)
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strsyserror(syserr));
+ else
+ (void) fprintf(stderr, "failed to open device: %s",
+ vnd_strerror(vnderr));
+ return (1);
+ }
+
+ vnd_close(vnd);
+ return (0);
+}
+.fi
+.in -2
+
+.SH ATTRIBUTES
+.sp
+.LP
+See attributes(5) for descriptions of the following attributes:
+
+.sp
+.TS
+box;
+c | c
+l | l .
+ATTRIBUTE TYPE ATTRIBUTE VALUE
+_
+Stability Committed
+_
+MT-Level See "THREADING" in libvnd(3LIB)
+.TE
+
+libvnd(3LIB), vnd_close(3VND), vnd_errno(3VND), vnd_syserrno(3VND)
diff --git a/usr/src/man/man3vnd/vnd_prop_writeable.3vnd b/usr/src/man/man3vnd/vnd_prop_writeable.3vnd
new file mode 100644
index 0000000000..c23414718b
--- /dev/null
+++ b/usr/src/man/man3vnd/vnd_prop_writeable.3vnd
@@ -0,0 +1,101 @@
+'\" te
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\"
+.TH VND_PROP_WRITEABLE 3VND "Feb 21, 2014"
+
+.SH NAME
+
+vnd_prop_writeable \- determine if a vnd property can be updated
+
+.SH SYNOPSIS
+
+.LP
+.nf
+cc [ flag... ] file... -lvnd [ library... ]
+#include <libvnd.h>
+
+int vnd_prop_writeable(vnd_prop_t prop, boolean_t *wp);
+.fi
+
+
+.SH DESCRIPTION
+.LP
+The vnd_prop_writeable function is used as a programmatic means to
+determine whether a given vnd property is writeable or not. The
+property to check is specified in prop and should be from the list
+described in libvnd(3VND). The argument wp is a pointer to a boolean_t
+which will be updated upon the successful completion of the function.
+The argument wp must be a valid pointer. If a property is writeable
+than the value pointed to by wp is set to B_TRUE. If the property is
+read-only, then the value is set to B_FALSE.
+
+
+.SH RETURN VALUES
+.LP
+On success, vnd_prop_writeable returns zero and the value pointed to
+by wp is updated with whether the property is writeable. If the
+property prop does not exist, then vnd_prop_writeable will return -1.
+
+.SH EXAMPLES
+.LP
+Example 1 Check whether the property VND_PROP_TXBUF is writable
+.LP
+The following sample C program checks whether the vnd property
+VND_PROP_TXBUF is writeable or not.
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int
+main(void)
+{
+ boolean_t canwrite;
+
+ if (vnd_prop_writeable(VND_PROP_TXBUF, &prop) != 0)
+ abort();
+
+ if (canwrite == B_TRUE)
+ (void) printf("VND_PROP_TXBUF is writeable\n");
+ else
+ (void) printf("VND_PROP_TXBUF is read only\n");
+
+ return (0);
+}
+.fi
+.in -2
+
+.SH ATTRIBUTES
+.sp
+.LP
+See attributes(5) for descriptions of the following attributes:
+
+.sp
+.TS
+box;
+c | c
+l | l .
+ATTRIBUTE TYPE ATTRIBUTE VALUE
+_
+Stability Committed
+_
+MT-Level MT-Safe
+.TE
+
+.SH SEE ALSO
+
+vndadm(1M), libvnd(3VND)
diff --git a/usr/src/man/man3vnd/vnd_walk.3vnd b/usr/src/man/man3vnd/vnd_walk.3vnd
new file mode 100644
index 0000000000..bed53130d3
--- /dev/null
+++ b/usr/src/man/man3vnd/vnd_walk.3vnd
@@ -0,0 +1,155 @@
+'\" te
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\"
+.TH VND_CREATE 3VND "Feb 21, 2014"
+
+.SH NAME
+
+vnd_walk \- walk all vnd devices
+
+
+.SH SYNOPSIS
+
+.LP
+.nf
+cc [ flag... ] file... -lvnd [ library... ]
+#include <libvnd.h>
+
+typedef int (*vnd_walk_cb_f)(vnd_info_t *viip, void *cbarg);
+
+int vnd_walk(vnd_walk_cb_t cb, void *arg, vnd_errno_t *vnderr, int *syserr);
+.fi
+
+
+.SH DESCRIPTION
+.LP
+The vnd_walk() function fires the callback function cb once for every
+vnd device that is visible in the current zone. If the caller is in
+the global zone, then all vnd devices in all zones will be walked. If
+the caller is in a non-global zone, then only the devices in that zone
+will be visible.
+
+.LP
+The function cb will be called with two arguments. The first argument,
+viip, is a pointer to a structure that contains information about the
+link. The second argument to the function cb, cbarg, is the same
+argument that is passed to the function vnd_walk as arg. To continue
+the function cb should return zero. If the function cb returns
+non-zero the walk will terminate.
+
+.LP
+As the vnd_walk function does not have a handle, errors are returned
+in vnderr and syserr. Both vnderr and syserr are allowed to be NULL
+pointers. If either one is a NULL pointer, then error information for
+that class of error will not be returned. It is not recommended that
+consumers supply NULL pointers.
+
+.LP
+The vnd_info_t structure contains the following members:
+
+.in +2
+.nf
+uint32_t vi_version
+zoneid_t vi_zone
+char vi_name[LIBVND_NAMELEN];
+char vi_datalink[LIBVND_NAMELEN];
+.fi
+.in -2
+
+.LP
+The member vi_version is guaranteed to be the first member of the
+structure. This number indicates the current revision of the structure
+and is set to the integer value 1. More properties may be added in
+future releases. Those properties will be tied to a greater version
+number so software knows whether or not it is legal to access them.
+
+.LP
+The vi_zone field indicates the zone id that the vnd device exists in.
+The vi_name field is the name of the vnd device. If the vnd_device is
+not linked, the name field is set to "<unknown>". The vi_datalink
+field is filled in with the name of the data link the vnd device is on
+top of.
+
+
+.SH RETURN VALUES
+
+.LP
+The vnd_walk function will return zero on success. If the consumer
+supplied callback function returned non-zero, then the vnd_walk
+function will return 1. If an error occurred, -1 is returned, and if
+vnderr and syserr are non-null, they are filled in with their
+respective error values. See vnd_errno(3VND) for more information on
+these errors.
+
+.SH EXAMPLES
+
+.LP
+Example 1 Walk all devices and print information about them
+
+.LP
+The following sample C program walks every vnd device and prints out
+information about them.
+
+.sp
+.in +2
+.nf
+#include <libvnd.h>
+#include <stdio.h>
+
+static int
+print_entry(vnd_info_t *viip, void *unused)
+{
+ (void) printf("device %s over data link %s in zone %d\n",
+ viip->vi_name, viip->vi_datalink, viip->vi_zone);
+ return (0);
+}
+
+int
+main(void)
+{
+ vnd_errno_t vnderr;
+ int syserr;
+
+ if (vnd_walk(print_entry, NULL, &vnderr, &syserr) != 0) {
+ (void) fprintf(stderr, "failed to walk vnd devices: %s\n",
+ vnderr != VND_E_SYS ? vnd_strerror(vnderr) :
+ vnd_strsyserror(syserr));
+ return (1);
+ }
+
+ return (0);
+}
+.fi
+.in -2
+
+.SH ATTRIBUTES
+.sp
+.LP
+See attributes(5) for descriptions of the following attributes:
+
+.sp
+.TS
+box;
+c | c
+l | l .
+ATTRIBUTE TYPE ATTRIBUTE VALUE
+_
+Stability Committed
+_
+MT-Level MT-Safe
+.TE
+
+.SH SEE ALSO
+
+libvnd(3VND), vnd_errno(3VND), attributes(5), zones(5)
diff --git a/usr/src/man/man3xnet/Makefile b/usr/src/man/man3xnet/Makefile
index 81cd96cba3..cc231f60c4 100644
--- a/usr/src/man/man3xnet/Makefile
+++ b/usr/src/man/man3xnet/Makefile
@@ -62,6 +62,7 @@ MANLINKS= getaddrinfo.3xnet \
getservbyname.3xnet \
getservbyport.3xnet \
getservent.3xnet \
+ htonll.3xnet \
htons.3xnet \
if_freenameindex.3xnet \
if_indextoname.3xnet \
@@ -73,6 +74,7 @@ MANLINKS= getaddrinfo.3xnet \
inet_ntoa.3xnet \
inet_pton.3xnet \
ntohl.3xnet \
+ ntohll.3xnet \
ntohs.3xnet \
sethostent.3xnet \
setnetent.3xnet \
@@ -97,6 +99,7 @@ getservbyname.3xnet := LINKSRC = endservent.3xnet
getservbyport.3xnet := LINKSRC = endservent.3xnet
getservent.3xnet := LINKSRC = endservent.3xnet
+htonll.3xnet := LINKSRC = htonl.3xnet
htons.3xnet := LINKSRC = htonl.3xnet
if_freenameindex.3xnet := LINKSRC = if_nametoindex.3xnet
@@ -112,6 +115,7 @@ inet_ntoa.3xnet := LINKSRC = inet_addr.3xnet
inet_pton.3xnet := LINKSRC = inet_ntop.3xnet
ntohl.3xnet := LINKSRC = htonl.3xnet
+ntohll.3xnet := LINKSRC = htonl.3xnet
ntohs.3xnet := LINKSRC = htonl.3xnet
sethostent.3xnet := LINKSRC = endhostent.3xnet
diff --git a/usr/src/man/man3xnet/htonl.3xnet b/usr/src/man/man3xnet/htonl.3xnet
index aa8470a28e..dda4586eb7 100644
--- a/usr/src/man/man3xnet/htonl.3xnet
+++ b/usr/src/man/man3xnet/htonl.3xnet
@@ -7,7 +7,8 @@
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH HTONL 3XNET "Jun 10, 2002"
+.\" Portions Copyright (c) 2012 Joyent, Inc. All rights reserved.
+.TH HTONL 3XNET "Jan 03, 2013"
.SH NAME
htonl, htons, ntohl, ntohs \- convert values between host and network byte
order
@@ -22,6 +23,11 @@ order
.LP
.nf
+\fBuint64_t\fR \fBhtonll\fR(\fBuint64_t\fR \fIhostlonglong\fR);
+.fi
+
+.LP
+.nf
\fBuint16_t\fR \fBhtons\fR(\fBuint16_t\fR \fIhostshort\fR);
.fi
@@ -32,18 +38,23 @@ order
.LP
.nf
+\fBuint64_t\fR \fBntohll\fR(\fBuint64_t\fR \fInetlonglong\fR);
+.fi
+
+.LP
+.nf
\fBuint16_t\fR \fBntohs\fR(\fBuint16_t\fR \fI netshort\fR);
.fi
.SH DESCRIPTION
.sp
.LP
-These functions convert 16-bit and 32-bit quantities between network byte order
-and host byte order.
+These functions convert 16-bit, 32-bit, and 64-bit quantities between network
+byte order and host byte order.
.sp
.LP
-The \fBuint32_t\fR and \fBuint16_t\fR types are made available by inclusion
-of \fB<inttypes.h>\fR\&.
+The \fBuint32_t\fR, \fBuint16_t\fR, and \fBuint64_t\fR types are made available
+by inclusion of \fB<inttypes.h>\fR\&.
.SH USAGE
.sp
.LP
@@ -56,12 +67,12 @@ value of their argument.
.SH RETURN VALUES
.sp
.LP
-The \fBhtonl()\fR and \fBhtons()\fR functions return the argument value
-converted from host to network byte order.
+The \fBhtonl()\fR, \fBhtonll()\fR, and \fBhtons()\fR functions return the
+argument value converted from host to network byte order.
.sp
.LP
-The \fBntohl()\fR and \fBntohs()\fR functions return the argument value
-converted from network to host byte order.
+The \fBntohl()\fR, \fBntohll()\fR, and \fBntohs()\fR functions return the
+argument value converted from network to host byte order.
.SH ERRORS
.sp
.LP
diff --git a/usr/src/man/man4/Makefile b/usr/src/man/man4/Makefile
index a4a64fbd8f..806d634969 100644
--- a/usr/src/man/man4/Makefile
+++ b/usr/src/man/man4/Makefile
@@ -138,6 +138,7 @@ _MANFILES= Intro.4 \
nsmbrc.4 \
nss.4 \
nsswitch.conf.4 \
+ overlay_files.4 \
packingrules.4 \
pam.conf.4 \
passwd.4 \
@@ -187,8 +188,6 @@ _MANFILES= Intro.4 \
sndr.4 \
sock2path.d.4 \
space.4 \
- ssh_config.sunssh.4 \
- sshd_config.sunssh.4 \
sulog.4 \
syslog.conf.4 \
system.4 \
@@ -243,8 +242,6 @@ _MANLINKS= addresses.4 \
rhosts.4 \
sendmail.cf.4 \
snapshot_cache.4 \
- ssh_config.4 \
- sshd_config.4 \
submit.cf.4 \
volume-defaults.4 \
wtmp.4 \
@@ -296,9 +293,6 @@ pcie.4 := LINKSRC = pci.4
sendmail.cf.4 := LINKSRC = sendmail.4
submit.cf.4 := LINKSRC = sendmail.4
-ssh_config.4 := LINKSRC = ssh_config.sunssh.4
-sshd_config.4 := LINKSRC = sshd_config.sunssh.4
-
isa.4 := LINKSRC = sysbus.4
dumpdates.4 := LINKSRC = ufsdump.4
diff --git a/usr/src/man/man4/overlay_files.4 b/usr/src/man/man4/overlay_files.4
new file mode 100644
index 0000000000..a0f66c65fd
--- /dev/null
+++ b/usr/src/man/man4/overlay_files.4
@@ -0,0 +1,169 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright 2015, Joyent, Inc.
+.\"
+.Dd Apr 13, 2015
+.Dt OVERLAY_FILES 4
+.Os
+.Sh NAME
+.Nm overlay_files
+.Nd Overlay files plugin file format
+.Sh DESCRIPTION
+The
+.Sy files
+plugin provides a means for a dynamic overlay where the destinations are
+determined based on a static description contained in a
+.Sy JSON
+file. This manual describes the format of the file used by the
+.Sy files/config
+property. To create and manage overlays
+with the
+.Sy files
+plugin, use
+.Xr dladm 1M .
+For more information on overlays, see
+.Xr overlay 5 .
+.Pp
+Using the
+.Sy files
+module, a static and simple overlay network can be created. This network
+does not support the use of
+.Em broadcast
+or
+.Em multicast
+traffic. Both ARP and NDP traffic are proxied by the plugin itself. In
+addition, the plugin allows for DHCP. Instead of providing a traditional
+DHCP proxy, when an initial DHCP broadcast goes out to a broadcast
+address, it will get rewritten to target a specific MAC address. The
+.Sy files
+plugin is useful as proof of concept and for simple static networks
+where addresses do not need to be reconfigured. If more advanced
+topologies or more streamlined updates are required, consider a different
+plugin.
+.Pp
+The file format is encoded as a series of
+.Sy JSON
+objects. Each object has a key, which is a MAC address on the
+.Sy overlay
+network. It has multiple values, some required, some optional, which
+describe various properties. The valid properties are:
+.Bl -hang -width Ds
+.It Sy ip
+.Bd -filled -compact
+The
+.Sy ip
+key indicates the IP address on the
+.Sy underlay
+network that houses the MAC address in question. Packets directed for
+the MAC address will be encapsulated and set to this address. This field
+is required.
+.Pp
+The value is a
+.Em JSON String .
+Both IPv4 and IPv6 addresses are supported and should be written out in their
+traditional forms. Follow the guidelines for writing addresses in
+.Xr inet_aton 3SOCKET .
+.Ed
+.It Sy port
+.Bd -filled -compact
+The
+.Sy port
+key indicates the port on the
+.Sy underlay
+network that houses the MAC address in question. This property is required if
+the encapsulation module requires a port for its destination. The value is
+a
+.Em JSON Number .
+.Ed
+.It Sy arp
+.Bd -filled -compact
+The
+.Sy arp
+key stores the IPv4 address that corresponds to this MAC address on the
+.Sy overlay
+network. This will be used to respond to ARP queries that would traditionally
+have been received by the OS kernel. If this address is not present, no IPv4
+packets directed to this IP address will be received by the network interface
+that has this MAC address, regardless of what is configured on top of it.
+.Pp
+The value is a
+.Em JSON String
+and should be written out following the guidelines for IPv4 addresses in
+.Xr inet_aton 3SOCKET .
+.Ed
+.It Sy ndp
+.Bd -filled -compact
+The
+.Sy ndp
+key stores the IPv6 address that corresponds to this MAC address on the
+.Sy overlay
+network. This will be used to respond to NDP queries that would traditionally
+have been received by the OS kernel. If this address is not present, no IPv6
+packets directed to this IP address will be received by the network interface
+that has this MAC address, regardless of what is configured on top of it.
+.Pp
+The value is a
+.Em JSON String
+and should be written out following the guidelines for IPv6 addresses in
+.Xr inet_aton 3SOCKET .
+.Ed
+.It Sy dhcp-proxy
+.Bd -filled -compact
+The
+.Sy dhcp-proxy
+key stores a MAC address that DHCP messages directed to a broadcast address get
+rewritten to be sent to. This can be viewed as a form of proxy DHCP, but is
+different in mechanism from a traditional proxy. The value is a
+.Em JSON String
+and should be written as a traditional MAC address string as described by
+.Xr ether_aton 3SOCKET .
+.Ed
+.El
+.Sh EXAMPLES
+.Sy Example 1
+Sample configuration file
+.Pp
+This configuration file provides information for three different MAC
+addresses. Each MAC address has an entry which describes what its IPv4
+and IPv6 address is, as well as the IP address and port of the host on
+the underlay network. Finally, one host has a DHCP proxy entry to
+demonstrate how one might configure DHCP.
+.Bd -literal -offset indent
+{
+ "de:ad:be:ef:00:00": {
+ "arp": "10.55.55.2",
+ "ip": "10.88.88.69",
+ "ndp": "fe80::3",
+ "port": 4789
+ },
+ "de:ad:be:ef:00:01": {
+ "arp": "10.55.55.3",
+ "dhcp-proxy": "de:ad:be:ef:00:00",
+ "ip": "10.88.88.70",
+ "ndp": "fe80::4",
+ "port": 4789
+ },
+ "de:ad:be:ef:00:02": {
+ "arp": "10.55.55.4",
+ "ip": "10.88.88.71",
+ "ndp": "fe80::5",
+ "port": 4789
+ }
+}
+.Ed
+.Sh STABILITY
+This file format is
+.Sy committed ;
+however, keys that are not listed here are reserved for future use.
+.Sh SEE ALSO
+.Xr dladm 1M ,
+.Xr overlay 5
diff --git a/usr/src/man/man4/proc.4 b/usr/src/man/man4/proc.4
index 20f2089f5e..c0a044164a 100644
--- a/usr/src/man/man4/proc.4
+++ b/usr/src/man/man4/proc.4
@@ -1,15 +1,14 @@
'\" te
.\" Copyright 1989 AT&T
.\" Copyright (c) 2006, Sun Microsystems, Inc. All Rights Reserved.
-.\" Copyright (c) 2013, Joyent, Inc. All rights reserved.
+.\" Copyright 2015, Joyent, Inc.
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH PROC 4 "Mar 31, 2013"
+.TH PROC 4 "May 19, 2014"
.SH NAME
proc \- /proc, the process file system
.SH DESCRIPTION
-.sp
.LP
\fB/proc\fR is a file system that provides access to the state of each process
and light-weight process (lwp) in the system. The name of each entry in the
@@ -174,7 +173,6 @@ To help deal with system data structures that are read from 32-bit processes, a
explicit 32-bit fixed-width data structures (like \fBcstruct stat32\fR) visible
to the 64-bit program. See \fBtypes32.h\fR(3HEAD).
.SH DIRECTORY STRUCTURE
-.sp
.LP
At the top level, the directory \fB/proc\fR contains entries each of which
names an existing process in the system. These entries are themselves
@@ -201,7 +199,6 @@ structures may grow by the addition of elements at the end in future releases
of the system and it is not legitimate for a program to assume that they will
not.
.SH STRUCTURE OF \fB/proc/\fR\fIpid\fR
-.sp
.LP
A given directory \fB/proc/\fR\fIpid\fR contains the following entries. A
process can use the invisible alias \fB/proc/self\fR if it wishes to open one
@@ -209,13 +206,11 @@ of its own \fB/proc\fR files (invisible in the sense that the name ``self''
does not appear in a directory listing of \fB/proc\fR obtained from
\fBls\fR(1), \fBgetdents\fR(2), or \fBreaddir\fR(3C)).
.SS "contracts"
-.sp
.LP
A directory containing references to the contracts held by the process. Each
entry is a symlink to the contract's directory under \fB/system/contract\fR.
See \fBcontract\fR(4).
.SS "as"
-.sp
.LP
Contains the address-space image of the process; it can be opened for both
reading and writing. \fBlseek\fR(2) is used to position the file at the virtual
@@ -223,7 +218,6 @@ address of interest and then the address space can be examined or changed
through \fBread\fR(2) or \fBwrite\fR(2) (or by using \fBpread\fR(2) or
\fBpwrite\fR(2) for the combined operation).
.SS "ctl"
-.sp
.LP
A write-only file to which structured messages are written directing the system
to change some aspect of the process's state or control its behavior in some
@@ -235,7 +229,6 @@ message is immediately reflected in the state of the process visible through
appropriate status and information files. The types of control messages are
described in detail later. See \fBCONTROL MESSAGES\fR.
.SS "status"
-.sp
.LP
Contains state information about the process and the representative lwp. The
file contains a \fBpstatus\fR structure which contains an embedded
@@ -672,6 +665,18 @@ the process. \fBpr_what\fR is unused in this case.
.RE
.sp
+.ne 2
+.na
+\fB\fBPR_BRAND\fR\fR
+.ad
+.RS 17n
+indicates that the lwp stopped for a brand-specific reason. Interpretation
+of the value of \fBpr_what\fR depends on which zone brand is in use. It is
+not generally expected that an lwp stopped in this state will be restarted
+by native \fBproc\fR(4) consumers.
+.RE
+
+.sp
.LP
\fBpr_cursig\fR names the current signal, that is, the next signal to be
delivered to the lwp, if any. \fBpr_info\fR, when the lwp is in a
@@ -864,7 +869,6 @@ registers.
.LP
If the lwp is not stopped, all register values are undefined.
.SS "psinfo"
-.sp
.LP
Contains miscellaneous information about the process and the representative lwp
needed by the \fBps\fR(1) command. \fBpsinfo\fR remains accessible after a
@@ -930,6 +934,15 @@ migrate to checking \fBPR_ISSYS\fR in the \fBpstatus\fR structure's
0x8000). \fBpr_pctcpu\fR is the summation over all lwps in the process.
.sp
.LP
+The \fBpr_fname\fR and \fBpr_psargs\fR are writable by the owner of the
+process. To write to them, the \fBpsinfo\fR file should be open for writing
+and the desired value for the field should be written at the file offset
+that corresponds to the member of structure. No other entry may be written
+to; if a write is attempted to an offset that does not represent one of
+these two memers, or if the size of the write is not exactly the size of
+the member being written, no bytes will be written and zero will be returned.
+.sp
+.LP
\fBpr_lwp\fR contains the \fBps\fR(1) information for the representative lwp.
If the process is a \fIzombie\fR, \fBpr_nlwp\fR, \fBpr_nzomb\fR, and
\fBpr_lwp.pr_lwpid\fR are zero and the other fields of \fBpr_lwp\fR are
@@ -983,7 +996,6 @@ maximum value is 1/N, where N is the number of \fBCPU\fRs.
\fBpr_contract\fR is the id of the process contract of which the process is a
member. See \fBcontract\fR(4) and \fBprocess\fR(4).
.SS "cred"
-.sp
.LP
Contains a description of the credentials associated with the process:
.sp
@@ -1010,7 +1022,6 @@ length; the \fBcred\fR file contains all of the supplementary groups.
\fBpr_ngroups\fR indicates the number of supplementary groups. (See also the
\fBPCSCRED\fR and \fBPCSCREDX\fR control operations.)
.SS "priv"
-.sp
.LP
Contains a description of the privileges associated with the process:
.sp
@@ -1044,7 +1055,6 @@ which is followed by additional information about the process state
The full size of the structure can be computed using
\fBPRIV_PRPRIV_SIZE\fR(\fBprpriv_t *\fR).
.SS "sigact"
-.sp
.LP
Contains an array of \fBsigaction structures\fR describing the current
dispositions of all signals associated with the traced process (see
@@ -1052,14 +1062,18 @@ dispositions of all signals associated with the traced process (see
that the action for signal number \fIn\fR appears in position \fIn\fR-1 of the
array.
.SS "auxv"
-.sp
.LP
Contains the initial values of the process's aux vector in an array of
\fBauxv_t\fR structures (see \fB<sys/auxv.h>\fR). The values are those that
were passed by the operating system as startup information to the dynamic
linker.
+.SS "argv"
+.LP
+Contains the concatenation of each of the argument strings, including their
+\fBNUL\fR terminators, in the argument vector (\fBargv\fR) for the process. If
+the process has modified either its argument vector, or the contents of any of
+the strings referenced by that vector, those changes will be visible here.
.SS "ldt"
-.sp
.LP
This file exists only on x86-based machines. It is non-empty only if the
process has established a local descriptor table (\fBLDT\fR). If non-empty, the
@@ -1067,7 +1081,6 @@ file contains the array of currently active \fBLDT\fR entries in an array of
elements of type \fBstruct ssd\fR, defined in \fB<sys/sysi86.h>\fR, one element
for each active \fBLDT\fR entry.
.SS "map, xmap"
-.sp
.LP
Contain information about the virtual address map of the process. The map file
contains an array of \fBprmap\fR structures while the xmap file contains an
@@ -1235,7 +1248,6 @@ translation for the mapping. \fBpr_hatpagesize\fR may be different than
\fBpr_pagesize.\fR The possible values are hardware architecture specific, and
may change over a mapping's lifetime.
.SS "rmap"
-.sp
.LP
Contains information about the reserved address ranges of the process. The file
contains an array of \fBprmap\fR structures, as defined above for the \fBmap\fR
@@ -1246,21 +1258,18 @@ not use any part of it for the new mapping. Examples of such reservations
include the address ranges reserved for the process stack and the individual
thread stacks of a multi-threaded process.
.SS "cwd"
-.sp
.LP
A symbolic link to the process's current working directory. See \fBchdir\fR(2).
A \fBreadlink\fR(2) of \fB/proc/\fIpid\fR/cwd\fR yields a null string. However,
it can be opened, listed, and searched as a directory, and can be the target of
\fBchdir\fR(2).
.SS "root"
-.sp
.LP
A symbolic link to the process's root directory.
\fB/proc/\fR\fIpid\fR\fB/root\fR can differ from the system root directory if
the process or one of its ancestors executed \fBchroot\fR(2) as super user. It
has the same semantics as \fB/proc/\fR\fIpid\fR\fB/cwd\fR.
.SS "fd"
-.sp
.LP
A directory containing references to the open files of the process. Each entry
is a decimal number corresponding to an open file descriptor in the process.
@@ -1274,7 +1283,6 @@ directory, it can be accessed with the same semantics as
\fB/proc/\fIpid\fR/cwd\fR. An attempt to open any other type of entry fails
with \fBEACCES\fR.
.SS "object"
-.sp
.LP
A directory containing read-only files with names corresponding to the
\fBpr_mapname\fR entries in the \fBmap\fR and \fBpagedata\fR files. Opening
@@ -1287,7 +1295,6 @@ The \fBobject\fR directory makes it possible for a controlling process to gain
access to the object file and any shared libraries (and consequently the symbol
tables) without having to know the actual path names of the executable files.
.SS "path"
-.sp
.LP
A directory containing symbolic links to files opened by the process. The
directory includes one entry for \fBcwd\fR and \fBroot\fR. The directory also
@@ -1299,7 +1306,6 @@ namespace (such as \fBFIFO\fRs and sockets), but can also happen for regular
files. For the file descriptor entries, the path may be different from the one
used by the process to open the file.
.SS "pagedata"
-.sp
.LP
Opening the page data file enables tracking of address space references and
modifications on a per-page basis.
@@ -1381,13 +1387,11 @@ to a system-imposed limit per traced process. A read of one does not affect the
data being collected by the system for the others. An open of the page data
file will fail with \fBENOMEM\fR if the system-imposed limit would be exceeded.
.SS "watch"
-.sp
.LP
Contains an array of \fBprwatch\fR structures, one for each watched area
established by the \fBPCWATCH\fR control operation. See \fBPCWATCH\fR for
details.
.SS "usage"
-.sp
.LP
Contains process usage information described by a \fBprusage\fR structure which
contains at least the following fields:
@@ -1434,7 +1438,6 @@ previously an estimate, if microstate accounting were not enabled, the current
information is now never an estimate represents time the process has spent in
various states.
.SS "lstatus"
-.sp
.LP
Contains a \fBprheader\fR structure followed by an array of \fBlwpstatus\fR
structures, one for each active lwp in the process (see also
@@ -1459,13 +1462,11 @@ file header to index through the array. These comments apply to all \fB/proc\fR
files that include a \fBprheader\fR structure (\fBlpsinfo\fR and \fBlusage\fR,
below).
.SS "lpsinfo"
-.sp
.LP
Contains a \fBprheader\fR structure followed by an array of \fBlwpsinfo\fR
structures, one for eachactive and zombie lwp in the process. See also
\fB/proc/\fR\fIpid\fR\fB/lwp/\fR\fIlwpid\fR/\fBlwpsinfo\fR, below.
.SS "lusage"
-.sp
.LP
Contains a \fBprheader\fR structure followed by an array of \fBprusage\fR
structures, one for each active lwp in the process, plus an additional element
@@ -1476,43 +1477,36 @@ summation over all these structures is the definition of the process usage
information obtained from the \fBusage\fR file. (See also
\fB/proc/\fR\fIpid\fR\fB/lwp/\fR\fIlwpid\fR/\fBlwpusage\fR, below.)
.SS "lwp"
-.sp
.LP
A directory containing entries each of which names an active or zombie lwp
within the process. These entries are themselves directories containing
additional files as described below. Only the \fBlwpsinfo\fR file exists in the
directory of a zombie lwp.
.SH STRUCTURE OF \fB/proc/\fR\fIpid\fR\fB/lwp/\fR\fIlwpid\fR
-.sp
.LP
A given directory \fB/proc/\fR\fIpid\fR\fB/lwp/\fR\fIlwpid\fR contains the
following entries:
.SS "lwpctl"
-.sp
.LP
Write-only control file. The messages written to this file affect the specific
lwp rather than the representative lwp, as is the case for the process's
\fBctl\fR file.
.SS "lwpstatus"
-.sp
.LP
lwp-specific state information. This file contains the \fBlwpstatus\fR
structure for the specific lwp as described above for the representative lwp in
the process's \fBstatus\fR file.
.SS "lwpsinfo"
-.sp
.LP
lwp-specific \fBps\fR(1) information. This file contains the \fBlwpsinfo\fR
structure for the specific lwp as described above for the representative lwp in
the process's \fBpsinfo\fR file. The \fBlwpsinfo\fR file remains accessible
after an lwp becomes a zombie.
.SS "lwpusage"
-.sp
.LP
This file contains the \fBprusage\fR structure for the specific lwp as
described above for the process's \fBusage\fR file.
.SS "gwindows"
-.sp
.LP
This file exists only on SPARC based machines. If it is non-empty, it contains
a \fBgwindows_t\fR structure, defined in \fB<sys/regset.h>\fR, with the values
@@ -1523,7 +1517,6 @@ pointer is improperly aligned. If the lwp is not stopped or if there are no
register windows that could not be stored on the stack, the file is empty (the
usual case).
.SS "xregs"
-.sp
.LP
Extra state registers. The extra state register set is architecture dependent;
this file is empty if the system does not support extra state registers. If the
@@ -1532,7 +1525,6 @@ file is non-empty, it contains an architecture dependent structure of type
extra state registers. If the lwp is not stopped, all register values are
undefined. See also the \fBPCSXREG\fR control operation, below.
.SS "asrs"
-.sp
.LP
This file exists only for 64-bit SPARC V9 processes. It contains an
\fBasrset_t\fR structure, defined in <\fBsys/regset.h\fR>, containing the
@@ -1540,7 +1532,6 @@ values of the lwp's platform-dependent ancillary state registers. If the lwp is
not stopped, all register values are undefined. See also the \fBPCSASRS\fR
control operation, below.
.SS "spymaster"
-.sp
.LP
For an agent lwp (see \fBPCAGENT\fR), this file contains a \fBpsinfo_t\fR
structure that corresponds to the process that created the agent lwp at the
@@ -1549,7 +1540,6 @@ the \fBpsinfo\fR file, with one modification: the \fBpr_time\fR field does not
correspond to the CPU time for the process, but rather to the creation time of
the agent lwp.
.SS "templates"
-.sp
.LP
A directory which contains references to the active templates for the lwp,
named by the contract type. Changes made to an active template descriptor do
@@ -1557,7 +1547,6 @@ not affect the original template which was activated, though they do affect the
active template. It is not possible to activate an active template descriptor.
See \fBcontract\fR(4).
.SH CONTROL MESSAGES
-.sp
.LP
Process state changes are effected through messages written to a process's
\fBctl\fR file or to an individual lwp's \fBlwpctl\fR file. All control
@@ -1577,7 +1566,6 @@ Descriptions of the allowable control messages follow. In all cases, writing a
message to a control file for a process or lwp that has terminated elicits the
error \fBENOENT\fR.
.SS "PCSTOP PCDSTOP PCWSTOP PCTWSTOP"
-.sp
.LP
When applied to the process control file, \fBPCSTOP\fR directs all lwps to stop
and waits for them to stop, \fBPCDSTOP\fR directs all lwps to stop without
@@ -1626,7 +1614,6 @@ level, has no user-level address space visible through \fB/proc\fR, and cannot
be stopped. Applying one of these operations to a system process or any of its
lwps elicits the error \fBEBUSY\fR.
.SS "PCRUN"
-.sp
.LP
Make an lwp runnable again after a stop. This operation takes a \fBlong\fR
operand containing zero or more of the following flags:
@@ -1707,7 +1694,6 @@ event of interest, the representative lwp is marked \fBPR_REQUESTED\fR. If, as
a consequence, all lwps are in the \fBPR_REQUESTED\fR or \fBPR_SUSPENDED\fR
stop state, all lwps showing \fBPR_REQUESTED\fR are made runnable.
.SS "PCSTRACE"
-.sp
.LP
Define a set of signals to be traced in the process. The receipt of one of
these signals by an lwp causes the lwp to stop. The set of signals is defined
@@ -1720,11 +1706,9 @@ sent to the lwp, the signal is not received and does not cause a stop until it
is removed from the held signal set, either by the lwp itself or by setting the
held signal set with \fBPCSHOLD\fR.
.SS "PCCSIG"
-.sp
.LP
The current signal, if any, is cleared from the specific or representative lwp.
.SS "PCSSIG"
-.sp
.LP
The current signal and its associated signal information for the specific or
representative lwp are set according to the contents of the operand
@@ -1736,7 +1720,6 @@ and an additional \fBPR_SIGNALLED\fR stop does not intervene even if the signal
is traced. Setting the current signal to \fBSIGKILL\fR terminates the process
immediately.
.SS "PCKILL"
-.sp
.LP
If applied to the process control file, a signal is sent to the process with
semantics identical to those of \fBkill\fR(2). If applied to an lwp control
@@ -1744,7 +1727,6 @@ file, a directed signal is sent to the specific lwp. The signal is named in a
\fBlong\fR operand contained in the message. Sending \fBSIGKILL\fR terminates
the process immediately.
.SS "PCUNKILL"
-.sp
.LP
A signal is deleted, that is, it is removed from the set of pending signals. If
applied to the process control file, the signal is deleted from the process's
@@ -1753,14 +1735,12 @@ the lwp's pending signals. The current signal (if any) is unaffected. The
signal is named in a \fBlong\fR operand in the control message. It is an error
(\fBEINVAL\fR) to attempt to delete \fBSIGKILL\fR.
.SS "PCSHOLD"
-.sp
.LP
Set the set of held signals for the specific or representative lwp (signals
whose delivery will be blocked if sent to the lwp). The set of signals is
specified with a \fBsigset_t\fR operand. \fBSIGKILL\fR and \fBSIGSTOP\fR cannot
be held; if specified, they are silently ignored.
.SS "PCSFAULT"
-.sp
.LP
Define a set of hardware faults to be traced in the process. On incurring one
of these faults, an lwp stops. The set is defined via the operand
@@ -1885,12 +1865,10 @@ no signal is posted. The \fBpr_info\fR field in the \fBlwpstatus\fR structure
identifies the signal to be sent and contains machine-specific information
about the fault.
.SS "PCCFAULT"
-.sp
.LP
The current fault, if any, is cleared; the associated signal will not be sent
to the specific or representative lwp.
.SS "PCSENTRY PCSEXIT"
-.sp
.LP
These control operations instruct the process's lwps to stop on entry to or
exit from specified system calls. The set of system calls to be traced is
@@ -1911,7 +1889,6 @@ instructed to go directly to system call exit by specifying the \fBPRSABORT\fR
flag in a \fBPCRUN\fR control message. Unless exit from the system call is
being traced, the lwp returns to user level showing \fBEINTR\fR.
.SS "PCWATCH"
-.sp
.LP
Set or clear a watched area in the controlled process from a \fBprwatch\fR
structure operand:
@@ -2060,7 +2037,6 @@ process's inherit-on-fork mode, \fBPR_FORK\fR, is set (see \fBPCSET\fR, below).
All watched areas are cancelled when the traced process performs a successful
\fBexec\fR(2).
.SS "PCSET PCUNSET"
-.sp
.LP
\fBPCSET\fR sets one or more modes of operation for the traced process.
\fBPCUNSET\fR unsets these modes. The modes to be set or unset are specified by
@@ -2176,7 +2152,6 @@ or to apply these operations to a system process. The current modes are
reported in the \fBpr_flags\fR field of \fB/proc/\fR\fIpid\fR\fB/status\fR and
\fB/proc/\fR\fIpid\fR\fB/lwp/\fR\fIlwp\fR\fB/lwpstatus\fR.
.SS "PCSREG"
-.sp
.LP
Set the general registers for the specific or representative lwp according to
the operand \fBprgregset_t\fR structure.
@@ -2195,7 +2170,6 @@ overflow-bit.
\fBPCSREG\fR fails with \fBEBUSY\fR if the lwp is not stopped on an event of
interest.
.SS "PCSVADDR"
-.sp
.LP
Set the address at which execution will resume for the specific or
representative lwp from the operand \fBlong\fR. On SPARC based systems, both
@@ -2203,7 +2177,6 @@ representative lwp from the operand \fBlong\fR. On SPARC based systems, both
address. On x86-based systems, only %eip is set. \fBPCSVADDR\fR fails with
\fBEBUSY\fR if the lwp is not stopped on an event of interest.
.SS "PCSFPREG"
-.sp
.LP
Set the floating-point registers for the specific or representative lwp
according to the operand \fBprfpregset_t\fR structure. An error (\fBEINVAL\fR)
@@ -2212,7 +2185,6 @@ floating-point hardware and the system does not emulate floating-point machine
instructions). \fBPCSFPREG\fR fails with \fBEBUSY\fR if the lwp is not stopped
on an event of interest.
.SS "PCSXREG"
-.sp
.LP
Set the extra state registers for the specific or representative lwp according
to the architecture-dependent operand \fBprxregset_t\fR structure. An error
@@ -2220,7 +2192,6 @@ to the architecture-dependent operand \fBprxregset_t\fR structure. An error
registers. \fBPCSXREG\fR fails with \fBEBUSY\fR if the lwp is not stopped on an
event of interest.
.SS "PCSASRS"
-.sp
.LP
Set the ancillary state registers for the specific or representative lwp
according to the SPARC V9 platform-dependent operand \fBasrset_t\fR structure.
@@ -2230,7 +2201,6 @@ state registers are privileged registers that cannot be modified. Only those
that can be modified are set; all others are silently ignored. \fBPCSASRS\fR
fails with \fBEBUSY\fR if the lwp is not stopped on an event of interest.
.SS "PCAGENT"
-.sp
.LP
Create an agent lwp in the controlled process with register values from the
operand \fBprgregset_t\fR structure (see \fBPCSREG\fR, above). The agent lwp is
@@ -2291,7 +2261,6 @@ agent lwp.
Symbolic constants for system call trap numbers like \fBSYS_lwp_exit\fR and
\fBSYS_lwp_create\fR can be found in the header file <\fBsys/syscall.h\fR>.
.SS "PCREAD PCWRITE"
-.sp
.LP
Read or write the target process's address space via a \fBpriovec\fR structure
operand:
@@ -2318,7 +2287,6 @@ space, or when stepping over a breakpointed instruction. Unlike \fBpread\fR(2)
and \fBpwrite\fR(2), no provision is made for partial reads or writes; if the
operation cannot be performed completely, it fails with \fBEIO\fR.
.SS "PCNICE"
-.sp
.LP
The traced process's \fBnice\fR(2) value is incremented by the amount in the
operand \fBlong\fR. Only a process with the {\fBPRIV_PROC_PRIOCNTL\fR}
@@ -2326,7 +2294,6 @@ privilege asserted in its effective set can better a process's priority in this
way, but any user may lower the priority. This operation is not meaningful for
all scheduling classes.
.SS "PCSCRED"
-.sp
.LP
Set the target process credentials to the values contained in the
\fBprcred_t\fR structure operand (see \fB/proc/\fR\fIpid\fR\fB/cred\fR). The
@@ -2336,13 +2303,11 @@ set. The target process's supplementary groups are not changed; the
ignored. Only the privileged processes can perform this operation; for all
others it fails with \fBEPERM\fR.
.SS "PCSCREDX"
-.sp
.LP
Operates like \fBPCSCRED\fR but also sets the supplementary groups; the length
of the data written with this control operation should be "sizeof
(\fBprcred_t\fR) + sizeof (\fBgid_t)\fR * (#groups - 1)".
.SS "PCSPRIV"
-.sp
.LP
Set the target process privilege to the values contained in the \fBprpriv_t\fR
operand (see \fB/proc/pid/priv\fR). The effective, permitted, inheritable, and
@@ -2360,7 +2325,6 @@ of the sets in the target process.
If any of the above restrictions are not met, \fBEPERM\fR is returned. If the
structure written is improperly formatted, \fBEINVAL\fR is returned.
.SH PROGRAMMING NOTES
-.sp
.LP
For security reasons, except for the \fBpsinfo\fR, \fBusage\fR, \fBlpsinfo\fR,
\fBlusage\fR, \fBlwpsinfo\fR, and \fBlwpusage\fR files, which are
@@ -2419,7 +2383,6 @@ descriptor has become invalid. \fBPOLLNVAL\fR is returned immediately if
to a system process (see \fBPCSTOP\fR). The requested events may be empty to
wait simply for termination.
.SH FILES
-.sp
.ne 2
.na
\fB\fB/proc\fR\fR
@@ -2582,6 +2545,16 @@ process aux vector
.sp
.ne 2
.na
+\fB\fB/proc/\fIpid\fR/argv\fR\fR
+.ad
+.sp .6
+.RS 4n
+process argument vector
+.RE
+
+.sp
+.ne 2
+.na
\fB\fB/proc/\fIpid\fR/ldt\fR\fR
.ad
.sp .6
@@ -2820,7 +2793,6 @@ For an agent LWP, the controlling process
.RE
.SH SEE ALSO
-.sp
.LP
\fBls\fR(1), \fBps\fR(1), \fBchroot\fR(1M), \fBalarm\fR(2), \fBbrk\fR(2),
\fBchdir\fR(2), \fBchroot\fR(2), \fBclose\fR(2), \fBcreat\fR(2), \fBdup\fR(2),
@@ -2836,7 +2808,6 @@ For an agent LWP, the controlling process
\fBwait\fR(3C), \fBcontract\fR(4), \fBcore\fR(4), \fBprocess\fR(4),
\fBlfcompile\fR(5), \fBprivileges\fR(5)
.SH DIAGNOSTICS
-.sp
.LP
Errors that can occur in addition to the errors normally associated with file
system access:
@@ -2991,14 +2962,12 @@ restrictions. See \fBprivileges\fR(5).
.RE
.SH NOTES
-.sp
.LP
Descriptions of structures in this document include only interesting structure
elements, not filler and padding fields, and may show elements out of order for
descriptive clarity. The actual structure definitions are contained in
\fB<procfs.h>\fR\&.
.SH BUGS
-.sp
.LP
Because the old \fBioctl\fR(2)-based version of \fB/proc\fR is currently
supported for binary compatibility with old applications, the top-level
diff --git a/usr/src/man/man4/process.4 b/usr/src/man/man4/process.4
index 50f80efca2..6f44792138 100644
--- a/usr/src/man/man4/process.4
+++ b/usr/src/man/man4/process.4
@@ -1,9 +1,10 @@
'\" te
.\" Copyright (c) 2008, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2015, Joyent, Inc.
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH PROCESS 4 "Mar 25, 2008"
+.TH PROCESS 4 "Oct 7, 2015"
.SH NAME
process \- process contract type
.SH SYNOPSIS
@@ -205,6 +206,19 @@ exits.
.sp
.ne 2
.na
+\fB\fBCT_PR_KEEP_EXEC\fR\fR
+.ad
+.sp .6
+.RS 4n
+If set, the process contract template remains active across \fBexec\fR(2).
+This can be used to setup a contract for children of an application which
+is not contract-aware. If this is not set then the system clears the active
+template when the process execs.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fBCT_PR_NOORPHAN\fR\fR
.ad
.sp .6
diff --git a/usr/src/man/man5/Makefile b/usr/src/man/man5/Makefile
index 71ed86715f..896752d883 100644
--- a/usr/src/man/man5/Makefile
+++ b/usr/src/man/man5/Makefile
@@ -14,7 +14,7 @@
# Copyright (c) 2012 by Delphix. All rights reserved.
# Copyright 2014 Nexenta Systems, Inc.
# Copyright 2014 Garrett D'Amore <garrett@damore.org>
-# Copyright (c) 2015, Joyent, Inc. All rights reserved.
+# Copyright 2016, Joyent, Inc.
#
include $(SRC)/Makefile.master
@@ -59,6 +59,7 @@ MANFILES= Intro.5 \
iconv_unicode.5 \
ieee802.3.5 \
ieee802.11.5 \
+ inotify.5 \
ipfilter.5 \
isalist.5 \
kerberos.5 \
@@ -69,6 +70,7 @@ MANFILES= Intro.5 \
lfcompile.5 \
lfcompile64.5 \
locale.5 \
+ lx.5 \
man.5 \
mandoc_char.5 \
mandoc_roff.5 \
@@ -79,6 +81,7 @@ MANFILES= Intro.5 \
ms.5 \
mutex.5 \
nfssec.5 \
+ overlay.5 \
pam_allow.5 \
pam_authtok_check.5 \
pam_authtok_get.5 \
diff --git a/usr/src/man/man5/brands.5 b/usr/src/man/man5/brands.5
index 9d7ce16d42..cc79ac173a 100644
--- a/usr/src/man/man5/brands.5
+++ b/usr/src/man/man5/brands.5
@@ -83,5 +83,4 @@ Interface Stability Evolving
\fBin.rlogind\fR(1M), \fBsshd\fR(1M), \fBzoneadm\fR(1M), \fBzonecfg\fR(1M),
\fBkill\fR(2), \fBpriocntl\fR(2), \fBgetzoneid\fR(3C), \fBucred_get\fR(3C),
\fBgetzoneid\fR(3C), \fBproc\fR(4), \fBattributes\fR(5), \fBlx\fR(5),
-\fBnative\fR(5), \fBprivileges\fR(5), \fBzones\fR(5), \fBlx_systrace\fR(7D),
-\fBcrgetzoneid\fR(9F)
+\fBprivileges\fR(5), \fBzones\fR(5), \fBcrgetzoneid\fR(9F)
diff --git a/usr/src/man/man5/inotify.5 b/usr/src/man/man5/inotify.5
new file mode 100644
index 0000000000..9b0016101d
--- /dev/null
+++ b/usr/src/man/man5/inotify.5
@@ -0,0 +1,305 @@
+'\" te
+.\" Copyright (c) 2014, Joyent, Inc. All Rights Reserved.
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.TH INOTIFY 5 "Sep 17, 2014"
+.SH NAME
+inotify \- Linux-compatible file event notification facility
+.SH SYNOPSIS
+
+.LP
+.nf
+#include <sys/inotify.h>
+.fi
+
+.SH DESCRIPTION
+.sp
+.LP
+
+\fBinotify\fR is a facility for receiving file system events on specified
+files or directories. When monitoring a directory, \fBinotify\fR can be
+used to retrieve events not only on the directory, but also on any files
+that the directory contains. \fBinotify\fR originated with Linux, and
+this facility is designed to be binary-compatible with the Linux facility,
+including the following interfaces:
+
+.RS +4
+.TP
+.ie t \(bu
+.el o
+\fBinotify_init\fR(3C) creates an \fBinotify\fR instance, returning a file
+descriptor associated with the in-kernel event queue.
+.RE
+.RS +4
+.TP
+.ie t \(bu
+.el o
+\fBinotify_init1\fR(3C) also creates an \fBinotify\fR instance, but allows
+for a flags argument that controls some attributes of the returned file
+descriptor.
+.RE
+.RS +4
+.TP
+.ie t \(bu
+.el o
+\fBinotify_add_watch\fR(3C) allows a watch of a particular file or directory
+to be added to a watch list associated with the specified \fBinotify\fR
+instance. \fBinotify_add_watch\fR(3C) returns a watch descriptor that will
+be reflected in the \fIwd\fR member of the \fIinotify_event\fR structure
+returned via a \fBread\fR(2) of the instance.
+.RE
+.RS +4
+.TP
+.ie t \(bu
+.el o
+\fBinotify_rm_watch\fR(3C) removes the watch that corresponds to the specified
+watch descriptor.
+.RE
+
+When all file descriptors referring to a particular \fBinotify\fR instance
+are closed, the instance and all watches associated with that instance are
+freed.
+
+To consume events on an \fBinotify\fR instance, an application should
+issue a \fBread\fR(2) to the instance. If no events are available
+(and the \fBinotify\fR instance has not been explicitly made non-blocking
+via \fBinotify_init1\fR(3C)) the \fBread\fR(2) will block until a
+watched event occurs. If and when events are available, \fBread\fR(2) will
+return an array of the following structures:
+
+.sp
+.in +2
+.nf
+struct inotify_event {
+ int wd; /* watch descriptor */
+ uint32_t mask; /* mask of event */
+ uint32_t cookie; /* cookie for associating renames */
+ uint32_t len; /* size of name field */
+ char name[]; /* optional name */
+};
+.fi
+.in -2
+
+\fIwd\fR contains the watch descriptor that corresponds to the event,
+as returned by \fBinotify_add_watch\fR(3C).
+
+\fImask\fR is a bitwise \fBOR\fR of event masks (see below) that
+describes the event.
+
+\fIcookie\fR is an opaque value that can be used to associate different
+events into a single logical event. In particular, it allows consumers to
+associate \fBIN_MOVED_FROM\fR events with subsequent \fBIN_MOVED_TO\fR
+events.
+
+\fIlen\fR denotes the length of the \fIname\fR field, including any padding
+required for trailing null bytes and alignment. The size of the entire
+event is therefore the size of the \fIinotify_event\fR structure plus the
+value of \fIlen\fR.
+
+\fIname\fR contains the name of the file associated with the event, if any.
+This field is only present when the watched entity is a directory and
+the event corresponds to a file that was contained by the watched directory
+(though see \fBNOTES\fR and \fBWARNINGS\fR for details and limitations).
+When present, \fIname\fR is null terminated, and may contain additional
+zero bytes
+to pad for alignment. (The length of this field -- including any bytes
+for alignment -- is denoted by the \fIlen\fR field.)
+
+.SS "Events"
+
+The events that can be generated on a watched entity are as follows:
+
+.sp
+.in +2
+.TS
+c c
+l l .
+\fIEvent\fR \fIDescription\fR
+\fBIN_ACCESS\fR File/directory was accessed
+\fBIN_ATTRIB\fR File/directory attributes were changed
+\fBIN_CLOSE_WRITE\fR File/directory opened for writing was closed
+\fBIN_CLOSE_NOWRITE\fR File/directory not opened for writing was closed
+\fBIN_CREATE\fR File/directory created in watched directory
+\fBIN_DELETE\fR File/directory deleted from watched directory
+\fBIN_DELETE_SELF\fR Watched file/directory was deleted
+\fBIN_MODIFY\fR File/directory was modified
+\fBIN_MODIFY_SELF\fR Watched file/directory was modified
+\fBIN_MOVED_FROM\fR File was renamed from entity in watched directory
+\fBIN_MOVED_TO\fR File was renamed to entity in watched directory
+\fBIN_OPEN\fR File/directory was opened
+.TE
+.in -2
+
+Of these, all events except \fBIN_MOVE_SELF\fR and \fBIN_DELETE_SELF\fR
+can refer to either the watched entity or (if the watched entity
+is a directory) a file or directory contained by the watched directory.
+(See \fBNOTES\fR and \fBWARNINGS\fR, below for details on this
+mechanism and its limitations.)
+If the event corresponds to a contained entity,
+\fIname\fR will be set to the name of the affected
+entity.
+
+In addition to speciyfing events of interest, watched events may
+be modified by potentially setting any of the following when adding a
+watch via \fBinotify_add_watch\fR(3C):
+
+.sp
+.ne 2
+.na
+\fBIN_DONT_FOLLOW\fR
+.ad
+.RS 12n
+Don't follow the specified pathname if it is a symbolic link.
+.RE
+
+.sp
+.ne 2
+.na
+\fBIN_EXCL_UNLINK\fR
+.ad
+.RS 12n
+If watching a directory and a contained entity becomes unlinked, cease
+generating events for that entity. (By default, contained entities will
+continue to generate events on their former parent directory.)
+.RE
+
+.sp
+.ne 2
+.na
+\fBIN_MASK_ADD\fR
+.ad
+.RS 12n
+If the specified pathname is already being watched, the specified events
+will be added to the watched events instead of the default behavior of
+replacing them. (If one
+may forgive the editorializing, this particular interface gewgaw
+seems entirely superfluous, and a canonical example of
+feasibility trumping wisdom.)
+.RE
+
+.sp
+.ne 2
+.na
+\fBIN_ONESHOT\fR
+.ad
+.RS 12n
+Once an event has been generated for the watched entity, remove the
+watch from the watch list as if \fBinotify_rm_watch\fR(3C) had been called
+on it (thereby inducing an \fBIN_IGNORED\fR event).
+.RE
+
+.sp
+.ne 2
+.na
+\fBIN_ONLYDIR\fR
+.ad
+.RS 12n
+Only watch the specified pathname if it is a directory.
+.RE
+
+In addition to the specified events, the following bits may be specified
+in the \fImask\fR field as returned from \fBread\fR(2):
+
+.sp
+.ne 2
+.na
+\fBIN_IGNORED\fR
+.ad
+.RS 12n
+A watch was removed explicitly (i.e, via \fBinotify_rm_watch\fR(3C)) or
+implicitly (e.g., because \fBIN_ONESHOT\fR was set or because the watched
+entity was deleted).
+.RE
+
+.sp
+.ne 2
+.na
+\fBIN_ISDIR\fR
+.ad
+.RS 12n
+The entity inducing the event is a directory.
+.RE
+
+.sp
+.ne 2
+.na
+\fBIN_Q_OVERFLOW\fR
+.ad
+.RS 12n
+The event queue exceeded the maximum event queue length per instance.
+(By default, this is 16384, but it can be tuned by setting
+\fBinotify_maxevents\fR via \fB/etc/system\fR.)
+.RE
+
+.sp
+.ne 2
+.na
+\fBIN_UNMOUNT\fR
+.ad
+.RS 12n
+The filesystem containing the watched entity was unmounted.
+.RE
+
+.sp
+.SH NOTES
+.sp
+.LP
+
+\fBinotify\fR instances can be monitored via \fBpoll\fR(2),
+\fBport_get\fR(3C), \fBepoll\fR(5), etc.
+
+The event queue associated with an \fBinotify\fR instance is serialized
+and ordered: events will be placed on the tail of the queue in the order
+that they occur.
+
+If at the time an event occurs the tail of the event queue is identical
+to the newly received event, the newly received event will be dropped,
+effectively coalescing the two events.
+
+When watching a directory and receieving events on contained elements
+(i.e., a contained file or subdirectory), note that the information
+received in the \fIname\fR field may be stale: the file may have been
+renamed between the event and its processing. If a file has been unlinked
+(and if \fBIN_EXCL_UNLINK\fR has not been set),
+the \fIname\fR will reflect the last name that resolved to the file.
+If a new file is created in the same directory with the old name, events
+on the new file and the old (unlinked) file will become undistinguishable.
+
+The number of bytes that are available to be read on an \fBinotify\fR
+instance can be determined via a \fBFIONREAD\fR \fBioctl\fR(2).
+
+.sp
+.SH WARNINGS
+.sp
+.LP
+
+While a best effort has been made to mimic the Linux semantics, there
+remains a fundamental difference with respect to hard links: on Linux,
+if a file has multiple hard links to it, a notification on a watched
+directory or file will be received if and only if that event was received
+via the watched path. For events that are induced by open files
+(such as \fBIN_MODIFY\fR), these semantics seem peculiar: the watched
+file is in fact changing, but because it is not changing via the watched
+path, no notification is received. By contrast, the implementation here
+will always yield an event in this case -- even if the event was induced
+by an \fBopen\fR(2) via an unwatched path. If an event occurs within a
+watched directory on a file for which there exist multiple hard links within
+the same (watched) directory, the event's \fIname\fR will correspond to one
+of the links to the file. If multiple hard links exist to the
+same file in the same watched directory and one of the links is removed,
+notifications may not necessarily continue to be received for the file,
+despite the (remaining) link in the watched directory; users of
+\fBinotify\fR should exercise extreme caution when watching directories
+that contain files with multiple hard links in the same directory.
+
+.SH SEE ALSO
+.sp
+.LP
+\fBinotify_init\fR(3C), \fBinotify_init1\fR(3C), \fBinotify_add_watch\fR(3C),
+\fBinotify_rm_watch\fR(3C), \fBport_get\fR(3C), \fBepoll\fR(5)
diff --git a/usr/src/man/man5/lx.5 b/usr/src/man/man5/lx.5
new file mode 100644
index 0000000000..0060f9ecfa
--- /dev/null
+++ b/usr/src/man/man5/lx.5
@@ -0,0 +1,108 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright 2016, Joyent, Inc.
+.\"
+.Dd February 5, 2106
+.Dt LX 5
+.Os
+.Sh NAME
+.Nm lx
+.Nd zone brand for running a GNU/Linux user-level environment
+.Sh DESCRIPTION
+The
+.Em lx
+brand
+uses the
+.Xr brands 5
+framework to provide an environment for running binary applications built
+for GNU/Linux. User-level code, including an entire Linux distribution, can
+run inside the zone. Both 32-bit and 64-bit applications are supported. The
+majority of Linux system calls are provided, along with emulation for a
+variety of Linux file systems, such as
+.Em proc ,
+.Em cgroup
+and
+.Em sysfs .
+.Pp
+The
+.Em /proc
+file system within the zone is a subset of a full Linux
+.Em /proc .
+Most kernel-level tuning applied to
+.Em /proc
+is unavailable or ignored. Some tuning can be performed, but only to reduce
+the overall limits that have been specified on the zone's configuration.
+That is, within the zone there is no way to increase the resource limits set
+on the zone itself.
+.Pp
+The zone must be installed using a clone of a
+.Xr zfs 1m
+dataset which contains an image of the software to be run in the zone.
+.Pp
+Example:
+.Dl zoneadm -z myzone install -x nodataset -t debian7
+.Pp
+Applications provided by the base SunOS operating system are also available
+within the zone under the
+.Em /native
+mount point. This allows the use of various native tools such as
+.Xr dtrace 1m ,
+.Xr mdb 1 ,
+or the
+.Xr proc 1
+tools on GNU/Linux applications. However, not every native tool will work
+properly within an
+.Em lx
+zone.
+.Sh CONFIGURATION
+The
+.Em kernel-version
+attribute can be included in the zone's
+.Xr zonecfg 1m
+settings as a way to specify the Linux version that the zone is emulating. For
+example, the value could be
+.Em 3.13.0 .
+.Sh LIMITATIONS
+The brand only supports the exclusive IP stack zone configuration.
+.Pp
+Most modern GNU/Linux application software runs on
+.Em lx ,
+but because there are some system calls or file systems which are not currently
+implemented, it's possible that an application won't run. This does not
+preclude the application running in the future as the
+.Em lx
+brand adds new capabilities.
+.Pp
+Because there is only the single SunOS kernel running on the system, there
+is no support for any Linux kernel-level modules. That is, there is no support
+for add-on drivers or any other modules that are part of the Linux kernel
+itself. If that is required, a full virtual machine should be used instead of
+an
+.Em lx
+branded zone.
+.Pp
+Any core files produced within the zone are in the native SunOS format.
+.Pp
+As with any zone, the normal security mechanisms and privileges apply. Thus,
+certain operations (for example, changing the system time), will not be allowed
+unless the zone has been configured with the appropriate additional privileges.
+.Sh SEE ALSO
+.Xr mdb 1 ,
+.Xr proc 1 ,
+.Xr dtrace 1m ,
+.Xr zfs 1m ,
+.Xr zoneadm 1m ,
+.Xr zonecfg 1m ,
+.Xr brands 5 ,
+.Xr privileges 5 ,
+.Xr resource_controls 5 ,
+.Xr zones 5
diff --git a/usr/src/man/man5/overlay.5 b/usr/src/man/man5/overlay.5
new file mode 100644
index 0000000000..c415cad4b6
--- /dev/null
+++ b/usr/src/man/man5/overlay.5
@@ -0,0 +1,499 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright 2015 Joyent, Inc.
+.\"
+.Dd Apr 09, 2015
+.Dt OVERLAY 5
+.Os
+.Sh NAME
+.Nm overlay
+.Nd Overlay Devices
+.Sh DESCRIPTION
+Overlay devices are a GLDv3 device that allows users to create overlay
+networks that can be used to form the basis of network virtualization
+and software defined networking. Overlay networks allow a single
+physical network, often called an
+.Sy underlay
+network, to provide the means for creating multiple logical, isolated,
+and discrete layer two and layer three networks on top of it.
+.Pp
+Overlay devices are administered through
+.Xr dladm 1M .
+Overlay devices themselves cannot be plumbed up with
+.Sy IP ,
+.Sy vnd ,
+or any other protocol. Instead, like an
+.Sy etherstub ,
+they allow for VNICs to be created on top of them. Like an
+.Sy etherstub ,
+an overlay device acts as a local switch; however, when it encounters a
+non-local destination address, it instead looks up where it should send
+the packet, encapsulates it, and sends it out another interface in the
+system.
+.Pp
+A single overlay device encapsulates the logic to answer two different,
+but related, questions:
+.Pp
+.Bl -enum -offset indent -compact
+.It
+How should a packet be transformed and put on the wire?
+.It
+Where should a transformed packet be sent?
+.El
+.Pp
+Each of these questions is answered by a plugin. The first question is
+answered by what's called an
+.Em encapsulation plugin .
+The second question is answered by what's called a
+.Em search plugin .
+Packets are encapsulated and decapsulated using the encapsulation plugin
+by the kernel. The search plugins are all user land plugins that are
+consumed by the varpd service whose FMRI is
+.Em svc:/network/varpd:default .
+This separation allows for the kernel to be responsible for the data
+path, while having the search plugins in userland allows the system to
+provide a much more expressive interface.
+.Ss Overlay Types
+Overlay devices come in
+two different flavors, one where all packets are always sent to a single
+address, the other, where the destination of a packet varies based on
+the target MAC address of the packet. This information is maintained in
+a
+.Em target table ,
+which is independent and unique to each overlay device. We call the
+plugins that send traffic to a single location, for example a single
+unicast or multicast IP address, a
+.Sy point to point
+overlay and the overlay devices that can send traffic to different
+locations based on the MAC address of that packet a
+.Sy dynamic
+overlay. The plugin type is determined based on the type of the
+.Sy search plugin .
+These are all fully listed in the section
+.Sx Plugins and their Properties .
+.Ss Overlay Destination
+Both encapsulation and search plugins define the kinds of destinations
+that they know how to support. An encapsulation plugin always has a
+single destination type that's determined based on how the encapsulation
+is defined. A search plugin, on the other hand, can support multiple
+combinations of destinations. A search plugin must support the
+destination type of the encapsulation device. The destination may
+require any of the following three pieces of information, depending on
+the encapsulation plugin:
+.Bl -hang -width Ds
+.It Sy MAC Address
+.Bd -filled -compact
+An Ethernet MAC address is required to determine the destination.
+.Ed
+.It Sy IP Address
+.Bd -filled -compact
+An IP address is required. Both IPv4 and IPv6 addresses are supported.
+.Ed
+.It Sy Port
+.Bd -filled -compact
+An IP protocol level (TCP, UDP, SCTP, etc.) port is required.
+.Ed
+.El
+.Pp
+The list of destination types that are supported by both the search and
+encapsulation plugins is listed in the section
+.Sx Plugins and their Properties .
+.Ss varpd
+The varpd service, mentioned above, is responsible for providing the
+virtual ARP daemon. Its responsibility is conceptually similar to ARP.
+It runs all instances of search plugins in the system and is responsible
+for answering the kernel's ARP-like questions for where packets should
+be sent.
+.Pp
+The varpd service, svc:/network/varpd:default, must be enabled for
+overlay devices to function. If it is disabled while there are active
+devices, then most overlay devices will not function correctly and
+likely will end up dropping traffic.
+.Sh PLUGINS AND PROPERTIES
+Properties fall into three categories in the system:
+.Bl -enum -offset indent -compact
+.It
+Generic properties all overlay devices have
+.It
+Properties specific to the encapsulation plugin
+.It
+Properties specific to the search plugin
+.El
+.Pp
+Each property in the system has the following attributes, which mirror
+the traditional
+.Xr dladm 1M
+link properties:
+.Bl -hang -width Ds
+.It Sy Name
+.Bd -filled -compact
+The name of a property is namespaced by its module and always structured
+and referred to as as module/property. This allows for both an
+encapsulation and search plugin to have a property with the same name.
+Properties that are valid for all overlay devices and not specific to a
+module do not generally use a module prefix.
+.Pp
+For example, the property
+.Sy vxlan/listen_ip
+is associated with the
+.Sy vxlan
+encapsulation module.
+.Ed
+.It Sy Type
+.Bd -filled -compact
+Each property in the system has a type.
+.Xr dladm 1M
+takes care of converting between the internal representation and a
+value, but the type influences the acceptable input range. The types
+are:
+.Bl -hang -width Ds
+.It Sy INT
+A signed integer that is up to eight bytes long
+.Pq Sy int64_t .
+.It Sy UINT
+An unsigned integer that is up to eight bytes long
+.Pq Sy uint64_t .
+.It Sy IP
+Either an IPv4 or IPv6 address in traditional string form. For example,
+192.168.128.23 or 2001:470:8af4::1:1. IPv4 addresses may also be encoded
+as IPv4-mapped IPv6 addresses.
+.It Sy STRING
+A string of ASCII or UTF-8 encoded characters terminated with a
+.Sy NUL
+byte. The maximum string length, including the terminator, is currently
+256 bytes.
+.El
+.Ed
+.It Sy Permissions
+.Bd -filled -compact
+Each property has permissions associated with it, which indicate whether
+the system considers them read-only properties or read-write properties.
+A read-only property can never be updated once the device is created.
+This generally includes things like the overlay's encapsulation module.
+.Ed
+.It Sy Required
+.Bd -filled -compact
+This property indicates whether the property is required for the given
+plugin. If it is not specified during a call to
+.Sy dladm create-overlay ,
+then the overlay cannot be successfully created. Properties which have a
+.Sy default
+will use that value if one is not specified rather than cause the
+overlay creation to fail.
+.Ed
+.It Sy Current Value
+.Bd -filled -compact
+The current value of a property, if the property has a value set.
+Required properties always have a value set.
+.Ed
+.It Sy Default Value
+.Bd -filled -compact
+The default value is an optional part of a given property. If a property
+does define a default value, then it will be used when an overlay is
+created and no other value is given.
+.Ed
+.It Sy Value ranges
+.Bd -filled -compact
+Value ranges are an optional part of a given property. They indicate a
+range or set of values that are valid and may be set for a property. A
+property may not declare such a range as it may be impractical or
+unknown. For example, most properties based on IP addresses will not
+declare a range.
+.Ed
+.El
+.Pp
+The following sections describe both the modules and the properties that
+exist for each module, noting their name, type, permissions, whether or
+not they are required, and if there is a default value. In addition, the
+effects of each property will be described.
+.Ss Encapsulation Plugins
+.Bl -hang -width Ds
+.It Sy vxlan
+The
+.Sy vxlan
+module is a UDP based encapsulation method. It takes a frame that would
+be put on the wire, wraps it up in a VXLAN header and places it in a UDP
+packet that gets sent out on the underlying network. For more details
+about the specific format of the VXLAN header, see
+.Xr vxlan 7P .
+.Pp
+The
+.Sy vxlan
+module requires both an
+.Sy IP address
+and
+.Sy port
+to address it. It has a 24-bit virtual network ID space, allowing for
+virtual network identifiers that range from
+.Sy 0
+-
+.Sy 16777215 .
+.Pp
+The
+.Sy vxlan
+module has the following properties:
+.Bl -hang -width Ds
+.It Sy vxlan/listen_ip
+.Bd -filled -compact
+Type:
+.Sy IP |
+Permissions:
+.Sy Read/Write |
+.Sy Required
+.Ed
+.Bd -filled
+The
+.Sy vxlan/listen_ip
+property determines the IP address that the system will accept VXLAN
+encapsulated packets on for this overlay.
+.Ed
+.It Sy vxlan/listen_port
+.Bd -filled -compact
+Type:
+.Sy UINT |
+Permissions:
+.Sy Read/Write |
+.Sy Required
+.Ed
+.Bd -filled -compact
+Default Value:
+.Sy 4789 |
+Range:
+.Sy 0 - 65535
+.Ed
+.Bd -filled
+The
+.Sy vxlan/listen_port
+property determines the UDP port that the system will listen on for
+VXLAN traffic for this overlay. The default value is
+.Sy 4789 ,
+the IANA assigned port for VXLAN.
+.Ed
+.El
+.Pp
+The
+.Sy vxlan/listen_ip
+and
+.Sy vxlan/listen_port
+properties determine how the system will accept VXLAN encapsulated
+packets for this interface. It does not determine the interface that
+packets will be sent out over. Multiple overlays that all use VXLAN can
+share the same IP and port combination, as the virtual network
+identifier can be used to tell the different overlays apart.
+.El
+.Ss Search Plugins
+Because search plugins may support multiple destinations, they may have
+more properties listed than necessarily show up for a given overlay.
+For example, the
+.Sy direct
+plugin supports destinations that are identified by both an IP address
+and a port, or just an IP address. In cases where the device is created
+over an overlay that only uses an IP address for its destination, then
+it will not have the
+.Sy direct/dest_port
+property.
+.Bl -hang -width Ds
+.It Sy direct
+The
+.Sy direct
+plugin is a point to point module that can be used to create an overlay
+that forwards all non-local traffic to a single destination. It supports
+destinations that are a combination of an
+.Sy IP Address
+and a
+.Sy port .
+.Pp
+The
+.Sy direct
+plugin has the following properties:
+.Bl -hang -width Ds
+.It Sy direct/dest_ip
+.Bd -filled -compact
+Type:
+.Sy IP |
+Permissions:
+.Sy Read/Write |
+.Sy Required
+.Ed
+.Bd -filled
+The
+.Sy direct/dest_ip
+property indicates the IP address that all traffic will be sent out.
+Traffic will be sent out the corresponding interface based on
+traditional IP routing rules and the configuration of the networking
+stack of the global zone.
+.Ed
+.It Sy direct/dest_port
+.Bd -filled -compact
+Type:
+.Sy UINT |
+Permissions:
+.Sy Read/Write |
+.Sy Required
+.Ed
+.Bd -filled -compact
+Default Value:
+.Sy - |
+Range:
+.Sy 0 - 65535
+.Ed
+.Bd -filled
+The
+.Sy direct/dest_port
+property indicates the TCP or UDP port that all traffic will be directed
+to.
+.Ed
+.El
+.It Sy files
+The
+.Sy files
+plugin implements a
+.Sy dynamic
+plugin that specifies where traffic should be sent based on a file. It
+is a glorified version of /etc/ethers. The
+.Sy dynamic
+plugin does not support broadcast or multicast traffic, but it has
+support for proxy ARP, NDP, and DHCPv4. For the full details of the file
+format, see
+.Xr overlay_files 4 .
+.Pp
+The
+.Sy files
+plugin has the following property:
+.Bl -hang -width Ds
+.It Sy files/config
+.Bd -filled -compact
+Type:
+.Sy String |
+Permissions:
+.Sy Read/Write |
+.Sy Required
+.Ed
+.Bd -filled
+The
+.Sy files/config
+property specifies an absolute path to a file to read. The file is a
+JSON file that is formatted according to
+.Xr overlay_files 4 .
+.Ed
+.El
+.El
+.Ss General Properties
+Each overaly has the following properties which are used to give
+additional information about the system. None of these properties may be
+specified as part of a
+.Sy dladm create-overlay ,
+instead they come from other arguments or from internal parts of the
+system.
+.Bl -hang -width Ds
+.It Sy encap
+.Bd -filled -compact
+.Sy String |
+Permissions:
+.Sy Read Only
+.Ed
+.Bd -filled
+The
+.Sy encap
+property contains the name of the encapsulation module that's in use.
+.Ed
+.It Sy mtu
+.Bd -filled -compact
+.Sy UINT |
+Permissions:
+.Sy Read/Write
+.Ed
+.Bd -filled -compact
+Default Value:
+.Sy 1400 |
+Range:
+.Sy 576 - 9000
+.Ed
+.Bd -filled
+The
+.Sy mtu
+property describes the maximum transmission unit of the overlay. The
+default value is
+.Sy 1400
+bytes, which ensures that in a traditional deployment with an MTU of
+1500 bytes, the overhead that is added from encapsulation is all
+accounted for. It is the administrator's responsibility to ensure that
+the device's MTU and the encapsulation overhead does not exceed that of
+the interfaces that the encapsulated traffic will be sent out of.
+.Pp
+To modify the
+.Sy mtu
+property, use
+.Sy dladm set-linkprop .
+.Ed
+.It Sy search
+.Bd -filled -compact
+.Sy String |
+Permissions:
+.Sy Read Only
+.Ed
+.Bd -filled
+The
+.Sy search
+property contains the name of the search plugin that's in use.
+.Ed
+.It Sy varpd/id
+.Bd -filled -compact
+.Sy String |
+Permissions:
+.Sy Read Only
+.Ed
+.Bd -filled
+The
+.Sy varpd/id
+property indicates the identifier which the
+.Sy varpd
+service uses for this overlay.
+.Ed
+.It Sy vnetid
+.Bd -filled -compact
+.Sy UINT |
+Permissions:
+.Sy Read/Write
+.Ed
+.Bd -filled
+The
+.Sy vnetid
+property has the virtual network identifier that belongs to this overlay.
+The valid range for the virtual network identifier depends on the
+encapsulation engine.
+.Ed
+.El
+.Sh FMA INTEGRATION
+Overlay devices are wired into FMA, the illumos fault management
+architecture, and generates error reports depending on the
+.Sy search
+plugin in use. Due to limitations in FMA today, when a single overlay
+enters a degraded state, meaning that it cannot properly perform look
+ups or another error occurred, then it degrades the overall
+.Sy overlay
+psuedo-device driver.
+.Pp
+For more fine-grained information about which overlay is actually in a
+.Em degraded
+state, one should run
+.Sy dladm show-overlay -f .
+In addition, for each overlay in a degraded state a more useful
+diagnostic message is provided which describes the reason that caused
+this overlay to enter into a degraded state.
+.Pp
+The overlay driver is self-healing. If the problem corrects itself on
+its own, it will clear the fault on the corresponding device.
+.Sh SEE ALSO
+.Xr dladm 1M ,
+.Xr overlay_files 4 ,
+.Xr vxlan 7P
diff --git a/usr/src/man/man5/privileges.5 b/usr/src/man/man5/privileges.5
index 6f7ce67ac3..2ca9ca6fd2 100644
--- a/usr/src/man/man5/privileges.5
+++ b/usr/src/man/man5/privileges.5
@@ -1,10 +1,10 @@
'\" te
.\" Copyright (c) 2009, Sun Microsystems, Inc. All Rights Reserved.
-.\" Copyright 2015, Joyent, Inc. All Rights Reserved.
+.\" Copyright 2016, Joyent, Inc. All Rights Reserved.
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.
.\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with
.\" the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH PRIVILEGES 5 "Oct 30, 2015"
+.TH PRIVILEGES 5 "Mar 24, 2016"
.SH NAME
privileges \- process privilege model
.SH DESCRIPTION
@@ -307,6 +307,16 @@ Allow a process to perform privileged mappings through a graphics device.
.sp
.ne 2
.na
+\fB\fBPRIV_HYPRLOFS_CONTROL\fR\fR
+.ad
+.sp .6
+.RS 4n
+Allow a process to perform hyprlofs name space management.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fBPRIV_IPC_DAC_READ\fR\fR
.ad
.sp .6
@@ -475,7 +485,7 @@ Allow a process to change its root directory.
.ad
.sp .6
.RS 4n
-Allow a process to use high resolution timers.
+Allow a process to use high resolution timers with very small time values.
.RE
.sp
@@ -686,6 +696,16 @@ Allow a process to configure a system's datalink interfaces.
.sp
.ne 2
.na
+\fB\fBPRIV_SYS_FS_IMPORT\fR\fR
+.ad
+.sp .6
+.RS 4n
+Allow a process to import a potentially untrusted file system (e.g. ZFS recv).
+.RE
+
+.sp
+.ne 2
+.na
\fB\fBPRIV_SYS_IP_CONFIG\fR\fR
.ad
.sp .6
diff --git a/usr/src/man/man5/resource_controls.5 b/usr/src/man/man5/resource_controls.5
index 6546af320d..f815cc6907 100644
--- a/usr/src/man/man5/resource_controls.5
+++ b/usr/src/man/man5/resource_controls.5
@@ -1,15 +1,17 @@
'\" te
.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright (c) 2012, Joyent, Inc. All Rights Reserved.
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
.TH RESOURCE_CONTROLS 5 "Jul 19, 2013"
.SH NAME
-resource_controls \- resource controls available through project database
+resource_controls \- resource controls available through projects and zones
.SH DESCRIPTION
.LP
-The resource controls facility is configured through the project database. See
-\fBproject\fR(4). You can set and modify resource controls through the
+For projects the resource controls facility is configured through the project
+database. See \fBproject\fR(4). For zones, resource controls are configured
+through \fBzonecfg\fR(1M). You can set and modify resource controls through the
following utilities:
.RS +4
.TP
@@ -35,6 +37,12 @@ following utilities:
.el o
\fBrctladm\fR(1M)
.RE
+.RS +4
+.TP
+.ie t \(bu
+.el o
+\fBzonecfg\fR(1M)
+.RE
.sp
.LP
In a program, you use \fBsetrctl\fR(2) to set resource control values.
@@ -282,6 +290,19 @@ Maximum allowable number of event ports, expressed as an integer.
.sp
.ne 2
.na
+\fB\fBproject.max-processes\fR\fR
+.ad
+.sp .6
+.RS 4n
+Maximum number of processes that can be active in a project. This rctl is
+similar to \fBproject.max-lwps\fR, except that zombie processes are included.
+This rctl prevents process-slot exhaustion which can occur due to an excessive
+number of zombies. Expressed as an integer.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fBproject.max-sem-ids\fR\fR
.ad
.sp .6
@@ -370,6 +391,33 @@ The following zone-wide resource controls are available:
.sp
.ne 2
.na
+\fB\fBzone.cpu-baseline\fR\fR
+.ad
+.sp .6
+.RS 4n
+Sets a baseline amount of CPU time that a zone can use before it is considered
+to be bursting. The unit used is the percentage of a single CPU that is being
+used by all user threads in a zone. The value should be less than the
+\fBzone.cpu-cap\fR rctl value and is expressed as an integer.
+This resource control does not support the \fBsyslog\fR action.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBzone.cpu-burst-time\fR\fR
+.ad
+.sp .6
+.RS 4n
+Sets the number of seconds that a zone can exceed the \fBzone.cpu-baseline\fR
+rctl value before being cpu-capped down to the \fBzone.cpu-baseline\fR.
+A value of 0 means that \fBzone.cpu-baseline\fR can be exceeded indefinitely.
+This resource control does not support the \fBsyslog\fR action.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fBzone.cpu-cap\fR\fR
.ad
.sp .6
@@ -388,7 +436,7 @@ not support the \fBsyslog\fR action.
.ad
.sp .6
.RS 4n
-Sets a limit on the number of fair share scheduler (FSS) CPU shares for a zone.
+Sets a value on the number of fair share scheduler (FSS) CPU shares for a zone.
CPU shares are first allocated to the zone, and then further subdivided among
projects within the zone as specified in the \fBproject.cpu-shares\fR entries.
Expressed as an integer. This resource control does not support the
@@ -408,14 +456,25 @@ Total amount of physical locked memory available to a zone.
.sp
.ne 2
.na
+\fB\fBzone.max-lofi\fR\fR
+.ad
+.sp .6
+.RS 4n
+Sets a limit on the number of \fBLOFI\fR(7D) devices that can be created in a
+zone. Expressed as an integer. This resource control does not support the
+\fBsyslog\fR action.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fBzone.max-lwps\fR\fR
.ad
.sp .6
.RS 4n
-Enhances resource isolation by preventing too many LWPs in one zone from
-affecting other zones. A zone's total LWPs can be further subdivided among
-projects within the zone within the zone by using \fBproject.max-lwps\fR
-entries. Expressed as an integer.
+Sets a limit on how many LWPs can be active in a zone. A zone's total LWPs
+can be further subdivided among projects within the zone within the zone by
+using \fBproject.max-lwps\fR entries. Expressed as an integer.
.RE
.sp
@@ -432,6 +491,33 @@ integer.
.sp
.ne 2
.na
+\fB\fBzone.max-physical-memory\fR\fR
+.ad
+.sp .6
+.RS 4n
+Sets a limit on the amount of physical memory (RSS) that can be used by a zone
+before resident pages start being forcibly paged out. The unit used is bytes.
+Expressed as an integer. This resource control does not support the
+\fBsyslog\fR action.
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBzone.max-processes\fR\fR
+.ad
+.sp .6
+.RS 4n
+Maximum number of processes that can be active in a zone. This rctl is
+similar to \fBzone.max-lwps\fR, except that zombie processes are included.
+This rctl prevents process-slot exhaustion which can occur due to an excessive
+number of zombies. This rctl can be further subdivided among projects within
+the zone using \fBproject.max-processes\fR. Expressed as an integer.
+.RE
+
+.sp
+.ne 2
+.na
\fB\fBzone.max-sem-ids\fR\fR
.ad
.sp .6
@@ -473,6 +559,18 @@ mappings and \fBtmpfs\fR mounts for this zone.
.RE
.sp
+.ne 2
+.na
+\fB\fBzone.zfs-io-priority\fR\fR
+.ad
+.sp .6
+.RS 4n
+Sets a value for the \fBzfs\fR(1M) I/O priority for a zone. This is used as
+one of the inputs to determine if a zone's I/O should be throttled. Expressed
+as an integer. This resource control does not support the \fBsyslog\fR action.
+.RE
+
+.sp
.LP
See \fBzones\fR(5).
.SS "Units Used in Resource Controls"
diff --git a/usr/src/man/man7d/Makefile b/usr/src/man/man7d/Makefile
index 85422556e7..e3404ff3a5 100644
--- a/usr/src/man/man7d/Makefile
+++ b/usr/src/man/man7d/Makefile
@@ -12,6 +12,7 @@
#
# Copyright 2011, Richard Lowe
# Copyright 2015 Nexenta Systems, Inc. All rights reserved.
+# Copyright 2016 Joyent, Inc.
# Copyright 2016 Garrett D'Amore <garrett@damore.org>
#
@@ -59,6 +60,7 @@ _MANFILES= aac.7d \
hme.7d \
hubd.7d \
hxge.7d \
+ i40e.7d \
ib.7d \
ibcm.7d \
ibd.7d \
@@ -142,11 +144,13 @@ _MANFILES= aac.7d \
virtualkm.7d \
vni.7d \
vr.7d \
+ vnd.7d \
wscons.7d \
xge.7d \
yge.7d \
zcons.7d \
- zero.7d
+ zero.7d \
+ zfd.7d
sparc_MANFILES= audiocs.7d \
bbc_beep.7d \
diff --git a/usr/src/man/man7d/cpuid.7d b/usr/src/man/man7d/cpuid.7d
index 1ede29ccc5..b14cc64742 100644
--- a/usr/src/man/man7d/cpuid.7d
+++ b/usr/src/man/man7d/cpuid.7d
@@ -1,9 +1,10 @@
'\" te
.\" Copyright (c) 2004, Sun Microsystems, Inc. All Rights Reserved
+.\" Copyright 2015, Joyent, Inc.
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
-.TH CPUID 7D "Sep 16, 2004"
+.TH CPUID 7D "Jun 04, 2015"
.SH NAME
cpuid \- CPU identification driver
.SH SYNOPSIS
@@ -14,29 +15,24 @@ cpuid \- CPU identification driver
.SH DESCRIPTION
.SS "SPARC and x86 system"
-.sp
.LP
This device provides implementation-private information via ioctls about
various aspects of the implementation to Solaris libraries and utilities.
.SS "x86 systems only"
-.sp
.LP
This device also provides a file-like view of the namespace and return values
of the x86 cpuid instruction. The cpuid instruction takes a single 32-bit
integer function code, and returns four 32-bit integer values corresponding to
the input value that describe various aspects of the capabilities and
configuration of the processor.
-.sp
.LP
The API for the character device consists of using the seek offset to set the
function code value, and using a \fBread\fR(2) or \fBpread\fR(2) of 16 bytes to
fetch the four 32-bit return values of the instruction in the order %\fBeax\fR,
%\fBebx\fR, %\fBecx\fR and %\fBedx\fR.
-.sp
.LP
No data can be written to the device. Like the \fBcpuid\fR instruction, no
special privileges are required to use the device.
-.sp
.LP
The device is useful to enable low-level configuration information to be
extracted from the CPU without having to write any assembler code to invoke the
@@ -44,13 +40,15 @@ extracted from the CPU without having to write any assembler code to invoke the
correct any erroneous data returned by the instruction (prompted by occassional
errors in the information exported by various processor implementations over
the years).
-.sp
.LP
See the processor manufacturers documentation for further information about the
syntax and semantics of the wide variety of information available from this
instruction.
+.LP
+Some systems can be configured to limit the cpuid opcodes which are accessible.
+While illumos handles this condition, other software may malfunction when such
+limits are enabled. Those settings are typically manipulated in the BIOS.
.SH EXAMPLE
-.sp
.LP
This example allows you to determine if the current x86 processor supports
"long mode," which is a necessary (but not sufficient) condition for running
@@ -113,7 +111,6 @@ fail:
.in -2
.SH ERRORS
-.sp
.ne 2
.na
\fBENXIO\fR
@@ -134,7 +131,6 @@ with a size that is not multiple of 16 bytes.
.RE
.SH FILES
-.sp
.ne 2
.na
\fB\fB/dev/cpu/self/cpuid\fR\fR
@@ -144,7 +140,6 @@ Provides access to CPU identification data.
.RE
.SH ATTRIBUTES
-.sp
.LP
See \fBattributes\fR(5) for descriptions of the following attributes:
.sp
@@ -160,7 +155,6 @@ Interface Stability Evolving
.TE
.SH SEE ALSO
-.sp
.LP
\fBpsrinfo\fR(1M), \fBprtconf\fR(1M), \fBpread\fR(2), \fBread\fR(2),
\fBattributes\fR(5)
diff --git a/usr/src/man/man7d/i40e.7d b/usr/src/man/man7d/i40e.7d
new file mode 100644
index 0000000000..082f8b0eeb
--- /dev/null
+++ b/usr/src/man/man7d/i40e.7d
@@ -0,0 +1,282 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright 2016 Joyent, Inc.
+.\"
+.Dd March 25, 2016
+.Dt I40E 7D
+.Os
+.Sh NAME
+.Nm i40e
+.Nd Intel XL710 10/40 Gigabit Ethernet Device Driver
+.Sh SYNOPSIS
+.Pa /dev/i40e*
+.Sh DESCRIPTION
+The
+.Nm
+driver is a GLDv3, multi-threaded, clonable, loadable device driver that
+supports the Data Link Provider Interface,
+.Xr dlpi 7P .
+The
+.Nm
+driver supports the Intel XL710 Ethernet Controller family of networking
+interface cards which come in both 10 GbE and 40 GbE variants.
+.Pp
+In addition to basic device initialization and the sending and receiving
+of frames, it supports the following features:
+.Bl -dash -offset indent
+.It
+Jumbo frames up to 9710 bytes.
+.It
+Promiscuous access via
+.Xr snoop 1M and
+.Xr dlpi 7P
+.It
+IPv4 Checksum Offload
+.It
+TCP, UDP, and SCTP checksum offload
+.El
+.Pp
+At this time, the
+.Nm
+driver does not enable the use of energy efficient Ethernet (EEE) or
+support the use of flow control through hardware pause frames.
+.Sh APPLICATION PROGRAMMING INTERFACE
+For each device supported by the
+.Nm
+installed in the system, a character-special file will be created. This
+file supports the Data Link Provider Interface (DLPI) which is documented
+in
+.Xr dlpi 7P .
+For most consumers, the use of
+.Xr libdlpi 3LIB ,
+is recommended.
+.Pp
+Each instance is assigned a unique ascending integer identifier. A
+device which has multiple ports may appear to the system as separate
+instances. The system does not provide a guarnatee on how these will be
+presented. Using this instance identifier, one can determine the exact
+character-special file to open. For example, the first instance
+enumerated in the system, with id 0, would be named
+.Sy i40e0 .
+It exists in the file system at
+.Pa /dev/i40e0 .
+.Sh CONFIGURATION
+The
+.Nm i40e
+driver always performs auto-negotiation and depending on the model may
+negotiate to 40 Gbps, 10 Gbps, or 1 Gbps. At this time, there are no
+models that support 1000BASE-T and thus auto-negotiation is required.
+.Pp
+The
+.Nm
+driver is managed by the
+.Xr dladm 1M
+utility and it is the preferred interface for setting all properties. While
+.Xr driver.conf based configuration is possible,
+.Xr dladm 1M
+is recommended. The
+.Nm
+driver may be joined into an aggregation based on the link aggregation
+control protocol (LACP) through
+.Xr dladm 1M .
+.Sh PROPERTIES
+The device supports the following properties which may be tuned through
+its driver.conf file,
+.Pa /kernel/drv/i40e.conf .
+Most of these properties cannot be changed after the device has been
+started. The device is started in response to a DLPI consumer opening
+the device and binding to it. This happens when an IP interfaces is
+plumbed or another
+.Xr dlpi 7P
+consumer such as
+.Xr snoop 1M
+or an LLDP daemon is started.
+.Pp
+Some properties may be tuned at runtime with the
+.Xr dladm 1M
+utility. Properties that can be will have the name of the dladm property
+called out explicitly.
+.Pp
+These properties are not considered stable at this time. They may change
+and should not be relied on. They are considered
+.Sy Volatile .
+It is not expected that administrators of the system will have to tune
+these values.
+.Bl -hang -width Ds
+.It Sy default_mtu
+.Bd -filled -compact
+Minimum:
+.Sy 1500 |
+Maximum:
+.Sy 9710 |
+Runtime Property:
+.Sy mtu
+.Ed
+.Bd -filled
+The
+.Sy default_mtu
+property determines the starting MTU of the various device instances.
+Note that the device's MTU also determines the upper bound of the MTU of
+all VNICs created over the device. The default MTU is
+.Sy 1500 .
+.Ed
+.It Sy mr_enable
+.Bd -filled -compact
+Minimum:
+.Sy 0 |
+Maximum:
+.Sy 1
+.Ed
+.Bd -filled
+The
+.Sy mr_enable
+proeprty determines whether or not support for multiple rings is enabled
+for the device. The default is always to enable them. It is not
+recommended to to disable them.
+.Ed
+.It Sy rx_ring_size
+.Bd -filled -compact
+Minimum:
+.Sy 64 |
+Maximum:
+.Sy 4096
+.Ed
+.Bd -filled
+The
+.Sy rx_ring_size
+property determines the number of descriptors that will be used in each
+receive ring on the card. Administrators should not normally need to
+tune this value. Hardware requires that the ring size be a multiple of
+32. The system will round up the set value to the nearest multiple of
+32.
+.Ed
+.It Sy tx_ring_size
+.Bd -filled -compact
+Minimum:
+.Sy 64 |
+Maximum:
+.Sy 4096
+.Ed
+.Bd -filled
+The
+.Sy tx_ring_size
+property determines the number of descriptors that will be used in each
+transmit ring on the card. Administrators should not normally need to
+tune this value. Hardware requires that the ring size be a multiple of
+32. The system will round up the set value to the nearest multiple of
+32.
+.Ed
+.It Sy tx_resched_threshold
+.Bd -filled -compact
+Minimum:
+.Sy 8 |
+Maximum:
+.Sy Variable
+.Ed
+.Bd -filled
+The
+.Sy tx_resched_threshold
+property determines the number of descriptors that must be available for
+a frame to be transmitted. The maximum is variable. It is dependent on
+the value of the
+.Sy tx_ring_size
+property. At least eight descriptors must be available for the device to
+function correctly.
+.Ed
+.It Sy rx_limit_per_intr
+.Bd -filled -compact
+Minimum:
+.Sy 16 |
+Maximum:
+.Sy 4096
+.Ed
+.Bd -filled
+The
+.Sy rx_limit_per_intr
+property determines the maximum number of packets that will be processed
+on a given ring during a single interrupt. This is done to try and
+guarantee some amount of liveness in the system. It is not expected
+that administrators will have to tune this value.
+.Ed
+.It Sy tx_hcksum_enable
+.Bd -filled -compact
+Minimum:
+.Sy 0 |
+Maximum:
+.Sy 1
+.Ed
+.Bd -filled
+The
+.Sy tx_hcksum_enable
+property controls whether or not the device enables support for hardware
+checksuming of outgoing packets. The default is to always enable support
+for this. Turning it off will increase latency and decrease throughput
+when transmitting packets, but should be done if a hardware bug is
+suspected.
+.Ed
+.It Sy rx_hcksum_enable
+.Bd -filled -compact
+Minimum:
+.Sy 0 |
+Maximum:
+.Sy 1
+.Ed
+.Bd -filled
+The
+.Sy rx_hcksum_enable
+property controls whether or not the device enables support for hardware
+checksuming of incoming packets. The default is to always enable support
+for this. Turning it off will increase latency and decrease throughput
+when receiving packets, but should be done if a hardware bug is
+suspected.
+.Ed
+.It Sy rx_dma_threshold
+.Bd -filled -compact
+Minimum:
+.Sy 0 |
+Maximum:
+.Sy INT32_MAX |
+Runtime Property:
+.Sy _rx_dma_treshold
+.Ed
+.Bd -filled
+The
+.Sy rx_dma_treshold
+indicates the size in bytes of a received frame, including all of its
+headers, at which the driver should not copy the frame but instead bind
+DMA memory. By setting this property to its minimum, all frames will be
+processed with DMA binding. By setting this property to its maximum, all
+frames will be processed by copying the frame.
+.Ed
+.El
+.Sh ARCHITECTURE
+The
+.Nm
+driver is only supported on
+.Sy x86
+systems at this time.
+.Sh FILES
+.Bl -tag -width Pa
+.It Pa /dev/i40e*
+Per-instance character device.
+.It Pa /kernel/drv/i40e
+32-bit device driver (x86).
+.It Pa /kernel/drv/amd64/i40e
+64-bit device driver (x86).
+.It Pa /kernel/drv/i40e.conf
+Driver configuration file.
+.El
+.Sh SEE ALSO
+.Xr dladm 1M ,
+.Xr snoop 1M ,
+.Xr driver.conf 4 ,
+.Xr dlpi 7P
diff --git a/usr/src/man/man7d/vnd.7d b/usr/src/man/man7d/vnd.7d
new file mode 100644
index 0000000000..d311c4dc08
--- /dev/null
+++ b/usr/src/man/man7d/vnd.7d
@@ -0,0 +1,118 @@
+'\" te
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\"
+.TH VND 7D "Feb 11, 2014"
+.SH NAME
+vnd \- virtual layer two network driver
+
+.SH SYNOPSIS
+.nf
+.LP
+/dev/vnd/ctl
+.LP
+/dev/vnd/*
+.fi
+
+.SH DESCRIPTION
+.sp
+.LP
+The vnd driver provides support for a layer two datapath in an
+analogous way that IP(7P) provides a support for an IP-based layer
+three datapath. Both devices operate exclusively on datalinks. A
+datalink that has been plumbed up with IP via ifconfig(1M) or
+ipadm(1M) cannot be used with vnd or vice-versa.
+.sp
+.LP
+The vnd driver supports and takes advantage of the the following
+illumos features:
+.RS
+.sp
+.LP
+Supports dld/dls feature negotation of GLDv3 features, such
+as direct calls, flow control, checksum offloading, and more.
+.sp
+.LP
+All IP and IPv6 based traffic is sent through ipfilter(5),
+allowing packet filtering.
+.sp
+.LP
+Better control over vectored reads and writes in a frame-centric manner
+through framed I/O. See libvnd(3LIB) for more information on these
+interfaces.
+.RE
+.sp
+.LP
+The vnd driver exposes two different kinds of device nodes. The first is
+a self-cloning control node which can be used to create vnd devices on
+top of datalinks. Those devices can optionally be bound into the file
+system namespace under /dev/vnd. Control operations on the control node
+or named devices are private to the implementation. Instead,
+libvnd(3LIB) provides a stable interfaces for using, creating, and
+manipulating vnd devices.
+.sp
+.SH FILES
+.sp
+.ne 2
+.na
+/dev/vnd/ctl
+.ad
+.RS 16n
+vnd self-cloning control node
+.RE
+
+.sp
+.ne 2
+.na
+/dev/vnd/%link
+.ad
+.RS 16n
+Character device that corresponds to the vnd device of the given
+name (%link). A given device will appear for each actively linked device
+in the current zone.
+.RE
+
+.sp
+.ne 2
+.na
+/dev/vnd/zone/%zone/%link
+.ad
+.RS 16n
+These are character devices that correspond to the vnd device of
+the given name (%link). They are organized based on the zone that they
+appear in. Thus if a zone named foo has a vnd device named
+bar, then the global zone will have the file
+/dev/vnd/zone/foo/bar. Note, these only occur in the global zone.
+.RE
+
+.SH ATTRIBUTES
+.sp
+.LP
+See attributes(5) for descriptions of the following attributes:
+.sp
+
+.sp
+.TS
+box;
+c | c
+l | l .
+ATTRIBUTE TYPE ATTRIBUTE VALUE
+_
+Interface Stability Evolving
+.TE
+
+.SH SEE ALSO
+.sp
+.LP
+dladm(1M), ipflter(5), libvnd(3LIB), vndadm(1M),
+vndstat(1)
diff --git a/usr/src/man/man7d/zfd.7d b/usr/src/man/man7d/zfd.7d
new file mode 100644
index 0000000000..71b3084fdc
--- /dev/null
+++ b/usr/src/man/man7d/zfd.7d
@@ -0,0 +1,81 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright 2015, Joyent, Inc. All rights reserved.
+.\"
+.Dd "Oct 16, 2015"
+.Dt ZFD 7D
+.Os
+.Sh NAME
+.Nm zfd
+.Nd Zone file descriptor driver
+.Sh DESCRIPTION
+The
+.Nm zfd
+character driver exports devices into the zone which can be used by a
+a standalone process within the zone as
+.Vt stdin ,
+.Vt stdout ,
+and
+.Vt stderr .
+The
+.Nm zfd
+driver behaves in a similar manner as the
+.Nm zcons(7D)
+device.
+Inside a zone, the slave side devices appear as
+.Nm /dev/zfd/[0-4] .
+.sp
+The zone's zfd device configuration is driven by
+.Nm zoneadmd
+and a zone attribute
+.Nm zlog-mode
+which is somewhat of a misnomer since its purpose has evolved. The attribute
+can have a variety of values, but the lowest two positions in the value string
+are used to control how many zfd devices are created inside the zone and if the
+primary stream is a tty.
+.sp
+.Dl --
+.Dl -n
+.Dl t-
+.Dl tn
+.sp
+With the
+.Nm t
+flag set,
+.Vt stdin ,
+.Vt stdout ,
+and
+.Vt stderr ,
+are multiplexed onto a single full-duplex stream which is configured as a tty.
+That is,
+.Nm ptem ,
+.Nm ldterm
+and
+.Nm ttycompat
+are autopushed onto the stream when the slave side is opened. There is only a
+single zfd device (0) needed for the primary stream.
+.sp
+When the
+.Nm n
+flag is set, it is assumed that output logging will be done within the zone
+itself. In this configuration 1 or 2 additional zfd devices, depending on tty
+mode
+.Nm ( t
+flag), are created within the zone. An application can then configure the
+zfd streams driver into a multiplexer. Output from the stdout/stderr zfd(s)
+will be teed into the correspond logging zfd(s) within the zone.
+.sp
+.Sh SEE ALSO
+.Xr zlogin 1 ,
+.Xr zoneadmd 1M ,
+.Xr zonecfg 1M ,
+.Xr zcons 7D
diff --git a/usr/src/man/man7fs/Makefile b/usr/src/man/man7fs/Makefile
index 187bacff78..57aa608a65 100644
--- a/usr/src/man/man7fs/Makefile
+++ b/usr/src/man/man7fs/Makefile
@@ -26,7 +26,9 @@ MANFILES= bootfs.7fs \
devfs.7fs \
fd.7fs \
hsfs.7fs \
+ hyprlofs.7fs \
lofs.7fs \
+ lxproc.7fs \
objfs.7fs \
pcfs.7fs \
sharefs.7fs \
diff --git a/usr/src/man/man7fs/hyprlofs.7fs b/usr/src/man/man7fs/hyprlofs.7fs
new file mode 100644
index 0000000000..8655791193
--- /dev/null
+++ b/usr/src/man/man7fs/hyprlofs.7fs
@@ -0,0 +1,62 @@
+'\" te
+.\" Copyright (c) 2012, Joyent, Inc.
+.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
+.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
+.TH HYPRLOFS 7FS "March 7, 2012"
+.SH NAME
+hyprlofs \- fast name space virtual file system
+.SH SYNOPSIS
+.LP
+.nf
+#include <sys/fs/hyprlofs.h>
+
+\fB\fR\fBmount\fR (\fB\fR\fIspecial\fR, \fB\fR\fIdirectory\fR, \fB\fR\fIMS_DATA\fR, \fB\fR\fI"hyprlofs"\fR, \fB\fR\fINULL\fR, \fB\fR\fI0\fR);
+.fi
+
+.SH DESCRIPTION
+.sp
+.LP
+\fBhyprlofs\fR is a hybrid file system combining features from
+\fBtmpfs(7FS)\fR and \fBlofs(7FS)\fR.
+As with \fBlofs\fR, the \fBhyprlofs\fR file system allows new, virtual file
+systems to be created which provide access to existing files using alternate
+pathnames. In addition, the files themselves may have alternate names and
+paths within the mount.
+Unlike \fBlofs\fR, files cannot be created and backing files cannot be removed.
+The name space is completely managed through ioctls on the mount.
+Entries in the name space are not mounts and thus, they will not appear in the
+mnttab. The file system is designed to provide a very fast name space to the
+backing files. The name space can be modified very quickly through the ioctl
+interface.
+.sp
+.LP
+\fBhyprlofs\fR file systems can be mounted with the command:
+.sp
+.in +2
+.nf
+\fBmount \fR\fB-F\fR\fB hyprlofs swap \fR\fIdirectory\fR
+.fi
+.in -2
+
+.sp
+.LP
+The name space used by \fBhyprlofs\fR exists only in-memory so it will consume
+a small amount of the system's virtual memory. The files themselves are backed
+by the original file as with \fBlofs\fR.
+
+.SH SEE ALSO
+.sp
+.LP
+\fBdf\fR(1M), \fBmount\fR(1M), \fBswap\fR(1M),
+\fBmount\fR(2), \fBumount\fR(2)
+.sp
+.LP
+\fISystem Administration Guide: Basic Administration\fR
+.SH DIAGNOSTICS
+.sp
+.LP
+\fBdf\fR(1M) output is of limited accuracy since
+the space available to \fBhyprlofs\fR is dependent on the swap
+space demands of the entire system and the files in the name space are not
+included.
diff --git a/usr/src/man/man7fs/lxproc.7fs b/usr/src/man/man7fs/lxproc.7fs
new file mode 100644
index 0000000000..7ef10ce343
--- /dev/null
+++ b/usr/src/man/man7fs/lxproc.7fs
@@ -0,0 +1,115 @@
+'\" te
+.\" Copyright (c) 2012, Joyent, Inc.
+.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
+.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
+.TH LXPROC 7FS "April 25, 2012"
+.SH NAME
+lxproc \- a loosely Linux-compatible /proc
+.SH SYNOPSIS
+.LP
+.nf
+\fB\fR\fBmount\fR (\fB\fR\fI"lxproc"\fR, \fB\fR\fIdirectory\fR, \fB\fR\fIMS_DATA\fR, \fB\fR\fI"lxproc"\fR, \fB\fR\fINULL\fR, \fB\fR\fI0\fR);
+.fi
+
+.SH DESCRIPTION
+.sp
+.LP
+\fBlxproc\fR is an implementation of the \fB/proc\fR filesystem that
+loosely matches the Linux semantics of providing human-readable text files
+that correspond to elements of the system.
+As with both \fBproc\fR(4) and Linux \fB/proc\fR, \fBlxproc\fR makes available
+a directory for every process, with each directory containing a number
+of files; like Linux \fB/proc\fR but unlike \fBproc\fR(4), \fBlxproc\fR also
+makes available a number of files related to system-wide information.
+To ascertain the meaning and structure of the files provided via
+\fBlxproc\fR, users should consult the Linux documentation.
+.sp
+.LP
+The \fBlxproc\fR compatibility layer is
+provided only as a best-effort for simple Linux \fB/proc\fR readers; it
+is not intended to exactly mimic Linux semantics and nor does it attempt to
+somehow fool a consumer into believing that it is operating within a Linux
+environment. As such, \fBlxproc\fR should only be used by Linux-specific
+programs that are willing to trade precision in understanding the
+system in return for Linux compatibility. To programmatically understand
+the system precisely and in terms of its native constructs,
+one should not use \fBlxproc\fR, but rather \fBproc\fR(4) or
+\fBkstat\fR(3KSTAT).
+To understand
+a process or group of processes from either a shell script or the command line,
+one should not use \fBlxproc\fR, but rather \fBproc\fR(4)-based tools like
+\fBprstat\fR(1M),
+\fBpfiles\fR(1),
+\fBpargs\fR(1),
+\fBpmap\fR(1),
+\fBptree\fR(1),
+\fBplimit\fR(1),
+\fBpflags\fR(1),
+\fBpcred\fR(1),
+\fBpstack\fR(1),
+\fBpldd\fR(1),
+\fBpsig\fR(1),
+or
+\fBpwdx\fR(1).
+To understand system-wide constructs from either a shell script or the
+command line, one should not use \fBlxproc\fR, but rather
+\fBkstat\fR(3KSTAT)-based tools like
+\fBkstat\fR(1M),
+\fBmpstat\fR(1M),
+\fBiostat\fR(1M),
+\fBnetstat\fR(1M) or
+\fBpsrinfo\fR(1M).
+.sp
+.LP
+Like \fB/proc\fR, \fBlxproc\fR can be mounted on any mount point, but the
+preferred mount point is \fB/system/lxproc\fR; if a zone brand elects to
+mount it by default, this will (or should) generally be the mount point.
+.sp
+.LP
+\fBlxproc\fR can be mounted with the command:
+.sp
+.in +2
+.nf
+\fBmount \fR\fB-F\fR\fB lxproc lxproc \fR\fIdirectory\fR
+.fi
+.in -2
+
+.SH SEE ALSO
+.sp
+.LP
+\fBdf\fR(1M),
+\fBiostat\fR(1M),
+\fBkstat\fR(1M),
+\fBmpstat\fR(1M),
+\fBmount\fR(1M),
+\fBnetstat\fR(1M),
+\fBpargs\fR(1),
+\fBpcred\fR(1),
+\fBpfiles\fR(1),
+\fBpflags\fR(1),
+\fBpldd\fR(1),
+\fBplimit\fR(1),
+\fBpmap\fR(1),
+\fBprstat\fR(1M),
+\fBpsig\fR(1),
+\fBpsrinfo\fR(1M),
+\fBpstack\fR(1),
+\fBptree\fR(1),
+\fBpwdx\fR(1),
+\fBmount\fR(2), \fBumount\fR(2), \fBkstat\fR(3KSTAT), \fBproc\fR(4),
+\fBkstat\fR(9S)
+
+.SH NOTES
+.sp
+.LP
+When choosing between offering
+Linux compatibility and telling the truth, \fBlxproc\fR emphatically picks
+the truth. A particular glaring example of this is the Linux notion of
+"tasks" (that is, threads), which -- due to historical misadventures on
+Linux -- allocate their identifiers from the process identifier space.
+(That is, each thread has in effect a pid.) Some Linux \fB/proc\fR readers
+have come to depend on this attribute, and become confused when threads
+appear with proper identifiers, so \fBlxproc\fR simply opts for the pre-2.6
+behavior, and does not present the tasks directory at all.
+
diff --git a/usr/src/man/man7m/Makefile b/usr/src/man/man7m/Makefile
index bb44184ba7..36eb293fc3 100644
--- a/usr/src/man/man7m/Makefile
+++ b/usr/src/man/man7m/Makefile
@@ -12,6 +12,7 @@
#
# Copyright 2011, Richard Lowe
# Copyright 2013 Nexenta Systems, Inc. All rights reserved.
+# Copyright 2015 Joyent, Inc.
#
include $(SRC)/Makefile.master
@@ -20,6 +21,7 @@ MANSECT= 7m
_MANFILES = bufmod.7m \
connld.7m \
+ datafilt.7m \
ldterm.7m \
pckt.7m \
pfmod.7m \
diff --git a/usr/src/man/man7m/datafilt.7m b/usr/src/man/man7m/datafilt.7m
new file mode 100644
index 0000000000..f74ac0b103
--- /dev/null
+++ b/usr/src/man/man7m/datafilt.7m
@@ -0,0 +1,46 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright 2014 Ryan Zezeski
+.\" Copyright 2015 Joyent, Inc.
+.\"
+.Dd Apr 21, 2015
+.Dt DATAFILT 7M
+.Os
+.Sh NAME
+.Nm datafilt
+.Nd socket filter module for deferred TCP connections
+.Sh DESCRIPTION
+The
+.Nm datafilt
+socket filter provides deferment of
+.Xr accept 3SOCKET
+for TCP connections. The accept call will not return until at least
+one byte has been buffered by the kernel. Deferment assures the
+application that the first call to
+.Xr read 2 or
+.Xr recv 3SOCKET
+will not block. It reduces unnecessary switching between user and
+kernel.
+.Sh EXAMPLES
+.Ss Example 1
+Enable deferment on the listening socket.
+.Bd -literal
+ setsockopt(lsock, SOL_FILTER, FIL_ATTACH, "datafilt", 8);
+.Ed
+.Ss Example 2
+Disable deferment on the listening socket.
+.Bd -literal
+ char filt[] = "datafilt";
+ setsockopt(lsock, SOL_FILTER, FIL_DETACH, filt, strlen(filt) + 1);
+.Ed
+.Sh SEE ALSO
+.Xr setsockopt 3SOCKET
diff --git a/usr/src/man/man7p/Makefile b/usr/src/man/man7p/Makefile
index 13cb58770d..f73a157f47 100644
--- a/usr/src/man/man7p/Makefile
+++ b/usr/src/man/man7p/Makefile
@@ -39,7 +39,8 @@ MANFILES= arp.7p \
sip.7p \
slp.7p \
tcp.7p \
- udp.7p
+ udp.7p \
+ vxlan.7p
MANLINKS= AH.7p \
ARP.7p \
@@ -51,6 +52,7 @@ MANLINKS= AH.7p \
SCTP.7p \
TCP.7p \
UDP.7p \
+ VXLAN.7p \
if.7p
ARP.7p := LINKSRC = arp.7p
@@ -75,6 +77,8 @@ TCP.7p := LINKSRC = tcp.7p
UDP.7p := LINKSRC = udp.7p
+VXLAN.7p := LINKSRC = vxlan.7p
+
.KEEP_STATE:
include $(SRC)/man/Makefile.man
diff --git a/usr/src/man/man7p/vxlan.7p b/usr/src/man/man7p/vxlan.7p
new file mode 100644
index 0000000000..a32637b484
--- /dev/null
+++ b/usr/src/man/man7p/vxlan.7p
@@ -0,0 +1,124 @@
+.\"
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.\"
+.\"
+.\" Copyright 2015 Joyent, Inc.
+.\"
+.Dd Apr 10, 2015
+.Dt VXLAN 7P
+.Os
+.Sh NAME
+.Nm VXLAN ,
+.Nm vxlan
+.Nd Virtual eXtensible Local Area Network
+.Sh SYNOPSIS
+.In sys/vxlan.h
+.Sh DESCRIPTION
+.Nm
+(RFC 7348) is a network encapsulation protocol that is used by
+.Xr overlay 5
+devices. A payload, commonly an Ethernet frame, is placed inside of a
+UDP packet and prepended with an 8-byte
+.Nm
+header.
+.Pp
+The
+.Nm
+header contains two 32-bit words. The first word is an 8-bit flags field
+followed by 24 reserved bits. The second word is a 24-bit virtual network
+identifier followed by 8 reserved bits. The virtual network identifier
+identifies a unique
+.Nm
+and
+is similar in concept to an IEEE 802.1Q VLAN identifier.
+.Pp
+The system provides access to
+.Nm
+through dladm overlays. See
+.Xr dladm 1M
+and
+.Xr overlay 5
+for more information.
+.Pp
+The
+.In sys/vxlan.h
+header provides information for working with the
+.Nm
+protocol. The contents of this header are
+.Sy uncommitted .
+The header defines a structure that may be used to encode and decode a VXLAN
+header. It defines a packed structure type
+.Sy vxlan_hdr_t
+which represents the
+.Nm
+frame header and has the following members:
+.Bd -literal
+ uint32_t vxlan_flags; /* flags in upper 8 bits */
+ uint32_t vxlan_id; /* VXLAN ID in upper 24 bits */
+.Ed
+.Sh EXAMPLES
+.Sy Example 1
+Decoding a
+.Nm
+header
+.Pp
+The following example shows how to validate a
+.Nm header. For more information on this process, see RFC 7348.
+.Bd -literal -offset indent
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <inttypes.h>
+#include <sys/vxlan.h>
+
+\&...
+
+/*
+ * Validate the following bytes as a VXLAN header. If valid, return
+ * 0 and store the VXLAN identifier in *vidp. Otherwise, return an
+ * error.
+ */
+int
+validate_vxlan(void *buf, int len, uint32_t *vidp)
+{
+ vxlan_hdr_t *hdr;
+
+ if (len < sizeof (vxlan_hdr_t))
+ return (EINAVL);
+
+ hdr = buf;
+ if ((ntohl(hdr->vxlan_flags) & VXLAN_MAGIC) == 0)
+ return (EINAVL);
+
+ *vidp = ntohl(vxlan->vxlan_id) >> VXLAN_ID_SHIFT;
+
+ return (0);
+}
+.Ed
+.Sh STABILITY
+The contents of
+.In sys/vxlan.h
+are
+.Sy Uncommitted .
+.Sh SEE ALSO
+.Xr dladm 1M ,
+.Xr overlay 5
+.Rs
+.%A Mahalingam, M.
+.%A Dutt, D.
+.%A Duda, K.
+.%A Agarwal, P.
+.%A Kreeger L.
+.%A Sridhar, T.
+.%A Bursell, M.
+.%A C. Wright
+.%T RFC 7348, Virtual eXtensible Local Area Network (VXLAN): A Framework
+.%T for Overlaying Virtualized Layer 2 Networks over Layer 3 Networks
+.%D August 2014
+.Re
diff --git a/usr/src/man/man9e/chpoll.9e b/usr/src/man/man9e/chpoll.9e
index 468ef7b53f..7b4b3edf0b 100644
--- a/usr/src/man/man9e/chpoll.9e
+++ b/usr/src/man/man9e/chpoll.9e
@@ -214,6 +214,38 @@ hold any mutex across the call to \fBpollwakeup\fR(9F) that is acquired in its
the time of the call to \fBchpoll()\fR.
.RE
+.RS +4
+.TP
+4.
+In the \fBclose\fR(9E) entry point, the driver should call \fBpollwakeup()\fR
+on the \fBpollhead\fR structure that corresponds to the closing software
+state, specifying \fBPOLLERR\fR for the events. Further, upon return from
+\fBpollwakeup()\fR, the driver's \fBclose\fR(9E) entry point should call
+the \fBpollhead_clean\fR(9F) function, specifying the \fBpollhead\fR that
+corresponds to the structure that will be deallocated:
+
+.sp
+.in +2
+.nf
+static int
+mydriver_close(dev_t dev, int flag, int otyp, cred_t *cp)
+{
+ minor_t minor = getminor(dev);
+ mydriver_state_t *state;
+
+ state = ddi_get_soft_state(mydriver_softstate, minor);
+
+ pollwakeup(&state->mydriver_pollhd, POLLERR);
+ pollhead_clean(&state->mydriver_pollhd);
+ ...
+.fi
+.in -2
+
+This step is necessary to inform other kernel subsystems that the memory
+associated with the \fBpollhead\fR is about to be deallocated by the
+\fBclose\fR(9E) entry point.
+
+.RE
.SH RETURN VALUES
.LP
\fBchpoll()\fR should return \fB0\fR for success, or the appropriate error
diff --git a/usr/src/man/man9f/Makefile b/usr/src/man/man9f/Makefile
index d02ddd76c5..066f5373f6 100644
--- a/usr/src/man/man9f/Makefile
+++ b/usr/src/man/man9f/Makefile
@@ -394,6 +394,7 @@ MANFILES= ASSERT.9f \
pm_power_has_changed.9f \
pm_raise_power.9f \
pm_trans_check.9f \
+ pollhead_clean.9f \
pollwakeup.9f \
priv_getbyname.9f \
priv_policy.9f \
diff --git a/usr/src/man/man9f/kmem_alloc.9f b/usr/src/man/man9f/kmem_alloc.9f
index 9c4f8ccb0c..201544b57c 100644
--- a/usr/src/man/man9f/kmem_alloc.9f
+++ b/usr/src/man/man9f/kmem_alloc.9f
@@ -129,5 +129,8 @@ uninitialized kernel memory should be handled carefully. For example, never
.SH NOTES
.sp
.LP
-\fBkmem_alloc(0\fR, \fIflag\fR\fB)\fR always returns \fINULL\fR.
-\fBkmem_free(NULL, 0)\fR is legal.
+\fBkmem_alloc(0\fR, \fIflag\fR\fB)\fR always returns \fINULL\fR, but
+if \fBKM_SLEEP\fR is set, this behavior is considered to be deprecated;
+the system may be configured to explicitly panic in this case in lieu
+of returning \fINULL\fR.
+\fBkmem_free(NULL, 0)\fR is legal, however.
diff --git a/usr/src/man/man9f/pollhead_clean.9f b/usr/src/man/man9f/pollhead_clean.9f
new file mode 100644
index 0000000000..a163a65a51
--- /dev/null
+++ b/usr/src/man/man9f/pollhead_clean.9f
@@ -0,0 +1,64 @@
+'\" te
+.\" Copyright (c) 2015, Joyent, Inc. All Rights Reserved.
+.\" This file and its contents are supplied under the terms of the
+.\" Common Development and Distribution License ("CDDL"), version 1.0.
+.\" You may only use this file in accordance with the terms of version
+.\" 1.0 of the CDDL.
+.\"
+.\" A full copy of the text of the CDDL should have accompanied this
+.\" source. A copy of the CDDL is also available via the Internet at
+.\" http://www.illumos.org/license/CDDL.
+.TH POLLHEAD_CLEAN 9F "Jun 12, 1998"
+.\"
+.\" A little inside joke with the above date: that's the date that the
+.\" devpoll work integrated (under bug 1265897). The original work included
+.\" pollhead_clean() -- but didn't bother to document it! With the date,
+.\" we are therefore giving this man page the date it should have had in an
+.\" attempt to right an historical wrong -- albeit nearly two decades after
+.\" the fact.
+.\"
+.SH NAME
+pollhead_clean \- inform the kernel that a pollhead is being deallocated
+.SH SYNOPSIS
+.LP
+.nf
+#include <sys/poll.h>
+
+
+
+\fBvoid\fR \fBpollhead_clean\fR(\fBstruct pollhead *\fR\fIphp\fR);
+.fi
+
+.SH INTERFACE LEVEL
+.sp
+.LP
+Architecture independent level 1 (DDI/DKI).
+.SH PARAMETERS
+.sp
+.ne 2
+.na
+\fB\fIphp\fR\fR
+.ad
+.RS 9n
+Pointer to a \fBpollhead\fR structure.
+.RE
+
+.SH DESCRIPTION
+.sp
+.LP
+The \fBpollhead_clean()\fR function informs the kernel that a driver's
+\fBpollhead\fR structure is about to be deallocated, usually as part of
+the driver's \fBclose\fR(9E) entry point before the software state that
+contains the \fBpollhead\fR is deallocated via \fBddi_soft_state_free\fR(9F).
+See \fBchpoll\fR(9E), \fBpollwakeup\fR(9E) and \fBpoll\fR(2) for more detail.
+.SH CONTEXT
+.sp
+.LP
+The \fBpollhead_clean()\fR function is generally called from the context
+of a \fBclose\fR(9E) entry point, but may be called from user or kernel
+context.
+.SH SEE ALSO
+.sp
+.LP
+\fBpoll\fR(2), \fBchpoll\fR(9E), \fBpollwakeup\fR(9E)
+
diff --git a/usr/src/pkg/manifests/SUNWcs.mf b/usr/src/pkg/manifests/SUNWcs.mf
index 65f45ca501..8824052e7b 100644
--- a/usr/src/pkg/manifests/SUNWcs.mf
+++ b/usr/src/pkg/manifests/SUNWcs.mf
@@ -237,9 +237,6 @@ $(i386_ONLY)dir path=usr/sbin/$(ARCH32)
dir path=usr/sbin/$(ARCH64)
dir path=usr/share
dir path=usr/share/doc group=other
-dir path=usr/share/doc/ksh
-dir path=usr/share/doc/ksh/images
-dir path=usr/share/doc/ksh/images/callouts
dir path=usr/share/lib
dir path=usr/share/lib/mailx
dir path=usr/share/lib/pub
@@ -1411,30 +1408,6 @@ file path=usr/sbin/volcopy mode=0555
file path=usr/sbin/wall group=tty mode=2555
file path=usr/sbin/zdump mode=0555
file path=usr/sbin/zic mode=0555
-file path=usr/share/doc/ksh/COMPATIBILITY
-file path=usr/share/doc/ksh/DESIGN
-file path=usr/share/doc/ksh/OBSOLETE
-file path=usr/share/doc/ksh/README
-file path=usr/share/doc/ksh/RELEASE
-file path=usr/share/doc/ksh/TYPES
-file path=usr/share/doc/ksh/images/callouts/1.png
-file path=usr/share/doc/ksh/images/callouts/10.png
-file path=usr/share/doc/ksh/images/callouts/2.png
-file path=usr/share/doc/ksh/images/callouts/3.png
-file path=usr/share/doc/ksh/images/callouts/4.png
-file path=usr/share/doc/ksh/images/callouts/5.png
-file path=usr/share/doc/ksh/images/callouts/6.png
-file path=usr/share/doc/ksh/images/callouts/7.png
-file path=usr/share/doc/ksh/images/callouts/8.png
-file path=usr/share/doc/ksh/images/callouts/9.png
-file path=usr/share/doc/ksh/images/tag_bourne.png
-file path=usr/share/doc/ksh/images/tag_i18n.png
-file path=usr/share/doc/ksh/images/tag_ksh.png
-file path=usr/share/doc/ksh/images/tag_ksh88.png
-file path=usr/share/doc/ksh/images/tag_ksh93.png
-file path=usr/share/doc/ksh/images/tag_l10n.png
-file path=usr/share/doc/ksh/images/tag_perf.png
-file path=usr/share/doc/ksh/shell_styleguide.docbook
file path=usr/share/lib/mailx/mailx.help
file path=usr/share/lib/mailx/mailx.help.~
file path=usr/share/lib/tabset/3101
diff --git a/usr/src/pkg/manifests/SUNWlx.mf b/usr/src/pkg/manifests/SUNWlx.mf
index 0d5d285fe5..b7da7af793 100644
--- a/usr/src/pkg/manifests/SUNWlx.mf
+++ b/usr/src/pkg/manifests/SUNWlx.mf
@@ -20,11 +20,12 @@
#
#
-# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
#
-# Was renamed to system/zones/brand/lx, both now obsolete.
-
-set name=pkg.fmri value=pkg:/SUNWlx@0.5.11,5.11-0.143
-set name=pkg.obsolete value=true
+set name=pkg.fmri value=pkg:/SUNWlx@0.5.11,5.11-0.133
+set name=pkg.renamed value=true
set name=variant.arch value=i386
+set name=variant.opensolaris.zone value=global value=nonglobal
+depend fmri=pkg:/system/zones/brand/lx@0.5.11,5.11-0.133 type=require
diff --git a/usr/src/pkg/manifests/developer-debug-mdb.mf b/usr/src/pkg/manifests/developer-debug-mdb.mf
index 254ecc8557..f1929da69b 100644
--- a/usr/src/pkg/manifests/developer-debug-mdb.mf
+++ b/usr/src/pkg/manifests/developer-debug-mdb.mf
@@ -98,6 +98,7 @@ $(i386_ONLY)file path=kernel/kmdb/$(ARCH64)/cpu_ms.AuthenticAMD.15 group=sys \
file path=kernel/kmdb/$(ARCH64)/crypto group=sys mode=0555
file path=kernel/kmdb/$(ARCH64)/genunix group=sys mode=0555
file path=kernel/kmdb/$(ARCH64)/hook group=sys mode=0555
+$(i386_ONLY)file path=kernel/kmdb/$(ARCH64)/i40e group=sys mode=0555
$(sparc_ONLY)file path=kernel/kmdb/$(ARCH64)/intr group=sys mode=0555
file path=kernel/kmdb/$(ARCH64)/ip group=sys mode=0555
file path=kernel/kmdb/$(ARCH64)/ipc group=sys mode=0555
@@ -136,6 +137,7 @@ $(i386_ONLY)file path=kernel/kmdb/cpu_ms.AuthenticAMD.15 group=sys mode=0555
$(i386_ONLY)file path=kernel/kmdb/crypto group=sys mode=0555
$(i386_ONLY)file path=kernel/kmdb/genunix group=sys mode=0555
$(i386_ONLY)file path=kernel/kmdb/hook group=sys mode=0555
+$(i386_ONLY)file path=kernel/kmdb/i40e group=sys mode=0555
$(i386_ONLY)file path=kernel/kmdb/ip group=sys mode=0555
$(i386_ONLY)file path=kernel/kmdb/ipc group=sys mode=0555
$(i386_ONLY)file path=kernel/kmdb/ipp group=sys mode=0555
@@ -216,6 +218,7 @@ $(i386_ONLY)file path=usr/lib/mdb/kvm/$(ARCH64)/cpu_ms.AuthenticAMD.15.so \
file path=usr/lib/mdb/kvm/$(ARCH64)/crypto.so group=sys mode=0555
file path=usr/lib/mdb/kvm/$(ARCH64)/genunix.so group=sys mode=0555
file path=usr/lib/mdb/kvm/$(ARCH64)/hook.so group=sys mode=0555
+$(i386_ONLY)file path=usr/lib/mdb/kvm/$(ARCH64)/i40e.so group=sys mode=0555
$(sparc_ONLY)file path=usr/lib/mdb/kvm/$(ARCH64)/intr.so group=sys mode=0555
file path=usr/lib/mdb/kvm/$(ARCH64)/ip.so group=sys mode=0555
file path=usr/lib/mdb/kvm/$(ARCH64)/ipc.so group=sys mode=0555
@@ -256,6 +259,7 @@ $(i386_ONLY)file path=usr/lib/mdb/kvm/cpu_ms.AuthenticAMD.15.so group=sys \
$(i386_ONLY)file path=usr/lib/mdb/kvm/crypto.so group=sys mode=0555
$(i386_ONLY)file path=usr/lib/mdb/kvm/genunix.so group=sys mode=0555
$(i386_ONLY)file path=usr/lib/mdb/kvm/hook.so group=sys mode=0555
+$(i386_ONLY)file path=usr/lib/mdb/kvm/i40e.so group=sys mode=0555
$(i386_ONLY)file path=usr/lib/mdb/kvm/ip.so group=sys mode=0555
$(i386_ONLY)file path=usr/lib/mdb/kvm/ipc.so group=sys mode=0555
$(i386_ONLY)file path=usr/lib/mdb/kvm/ipp.so group=sys mode=0555
diff --git a/usr/src/pkg/manifests/driver-crypto-nfp.mf b/usr/src/pkg/manifests/driver-crypto-nfp.mf
new file mode 100644
index 0000000000..fcdf538c1d
--- /dev/null
+++ b/usr/src/pkg/manifests/driver-crypto-nfp.mf
@@ -0,0 +1,33 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2014 <contributor>
+#
+
+set name=pkg.fmri value=pkg:/driver/crypto/nfp@$(PKGVERS)
+set name=pkg.description value="nfast Crypto Accelerator Driver"
+set name=pkg.summary value="nfast Crypto Accelerator"
+set name=info.classification \
+ value=org.opensolaris.category.2008:System/Hardware
+set name=variant.arch value=i386
+dir path=kernel group=sys
+dir path=kernel/drv group=sys
+dir path=kernel/drv/$(ARCH64) group=sys
+driver name=nfp \
+ alias=pci1011,1065.100.100 \
+ alias=pci8086,b555.100.100 \
+ alias=pciex1011,1065.100.100 \
+ alias=pciex8086,b555.100.100
+file path=kernel/drv/$(ARCH64)/nfp group=sys
+$(i386_ONLY)file path=kernel/drv/nfp group=sys
+license usr/src/uts/common/io/nfp/THIRDPARTYLICENSE \
+ license=usr/src/uts/common/io/nfp/THIRDPARTYLICENSE
diff --git a/usr/src/pkg/manifests/driver-network-i40e.mf b/usr/src/pkg/manifests/driver-network-i40e.mf
new file mode 100644
index 0000000000..897414c3f8
--- /dev/null
+++ b/usr/src/pkg/manifests/driver-network-i40e.mf
@@ -0,0 +1,43 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2016 Joyent, Inc.
+#
+
+<include global_zone_only_component>
+set name=pkg.fmri value=pkg:/driver/network/i40e$(PKGVERS)
+set name=pkg.description value="Intel XL710 10/40 Gb Ethernet Driver"
+set name=pkg.summary value="Intel XL710 10/40 Gb Ethernet Driver"
+set name=info.classification \
+ value=org.opensolaris.category.2008:Drivers/Networking
+set name=variant.arch value=i386
+dir path=kernel group=sys
+dir path=kernel/drv group=sys
+dir path=kernel/drv/$(ARCH64) group=sys
+dir path=usr/share/man
+dir path=usr/share/man/man7d
+driver name=i40e clone_perms="i40e 0666 root sys" perms="* 0666 root sys" \
+ alias=pciex8086,1572 \
+ alias=pciex8086,1580 \
+ alias=pciex8086,1581 \
+ alias=pciex8086,1583 \
+ alias=pciex8086,1584 \
+ alias=pciex8086,1585 \
+ alias=pciex8086,1586 \
+ alias=pciex8086,1589
+file path=kernel/drv/$(ARCH64)/i40e group=sys
+file path=kernel/drv/i40e group=sys
+file path=kernel/drv/i40e.conf group=sys
+file path=usr/share/man/man7d/i40e.7d
+license lic_CDDL license=lic_CDDL
+license usr/src/uts/common/io/i40e/core/THIRDPARTYLICENSE \
+ license=usr/src/uts/common/io/i40e/core/THIRDPARTYLICENSE
diff --git a/usr/src/pkg/manifests/system-dtrace-tests.mf b/usr/src/pkg/manifests/system-dtrace-tests.mf
index 8afe5cccb6..3946445d99 100644
--- a/usr/src/pkg/manifests/system-dtrace-tests.mf
+++ b/usr/src/pkg/manifests/system-dtrace-tests.mf
@@ -1579,6 +1579,8 @@ file path=opt/SUNWdtrt/tst/common/scalars/tst.16kglobal.d mode=0444
file path=opt/SUNWdtrt/tst/common/scalars/tst.16klocal.d mode=0444
file path=opt/SUNWdtrt/tst/common/scalars/tst.basicvar.d mode=0444
file path=opt/SUNWdtrt/tst/common/scalars/tst.basicvar.d.out mode=0444
+file path=opt/SUNWdtrt/tst/common/scalars/err.bigglobal.d mode=0444
+file path=opt/SUNWdtrt/tst/common/scalars/err.biglocal.d mode=0444
file path=opt/SUNWdtrt/tst/common/scalars/tst.localvar.d mode=0444
file path=opt/SUNWdtrt/tst/common/scalars/tst.misc.d mode=0444
file path=opt/SUNWdtrt/tst/common/scalars/tst.self.d mode=0444
diff --git a/usr/src/pkg/manifests/system-zones-brand-joyent.mf b/usr/src/pkg/manifests/system-zones-brand-joyent.mf
new file mode 100644
index 0000000000..33452cdcd7
--- /dev/null
+++ b/usr/src/pkg/manifests/system-zones-brand-joyent.mf
@@ -0,0 +1,54 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2010 Joyent, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+set name=pkg.fmri value=pkg:/system/zones/brand/joyent@$(PKGVERS)
+set name=pkg.description value="Support for the 'joyent' Brand"
+set name=pkg.summary value="Joyent Containers: joyent brand support"
+set name=info.classification \
+ value=org.opensolaris.category.2008:System/Virtualization
+set name=variant.arch value=$(ARCH)
+dir path=etc group=sys
+dir path=etc/zones group=sys
+dir path=lib variant.opensolaris.zone=global
+dir path=lib/svc variant.opensolaris.zone=global
+dir path=lib/svc/manifest group=sys variant.opensolaris.zone=global
+dir path=lib/svc/manifest/system group=sys variant.opensolaris.zone=global
+dir path=lib/svc/method variant.opensolaris.zone=global
+dir path=usr group=sys
+dir path=usr/lib
+dir path=usr/lib/brand
+dir path=usr/lib/brand/joyent group=sys
+file path=etc/zones/Joyent.xml mode=0444
+file path=lib/svc/manifest/system/joyinit.xml group=sys mode=0444 \
+ variant.opensolaris.zone=global
+file path=lib/svc/method/svc-joyinit mode=0555 variant.opensolaris.zone=global
+file path=usr/lib/brand/joyent/config.xml mode=0444
+file path=usr/lib/brand/joyent/jinstall mode=0755
+file path=usr/lib/brand/joyent/juninstall mode=0755
+file path=usr/lib/brand/joyent/pinstall mode=0755
+file path=usr/lib/brand/joyent/platform.xml mode=0444
+file path=usr/lib/brand/joyent/prestate mode=0755
+license lic_CDDL license=lic_CDDL
diff --git a/usr/src/pkg/manifests/system-zones-brand-lx.mf b/usr/src/pkg/manifests/system-zones-brand-lx.mf
index ca3a8cc541..58a016fa9c 100644
--- a/usr/src/pkg/manifests/system-zones-brand-lx.mf
+++ b/usr/src/pkg/manifests/system-zones-brand-lx.mf
@@ -20,9 +20,104 @@
#
#
-# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+# Copyright 2015 Joyent, Inc.
#
-set name=pkg.fmri value=pkg:/system/zones/brand/lx@0.5.11,5.11-0.143
-set name=pkg.obsolete value=true
+#
+# This package will install successfully into any zone, global or
+# non-global. The files, directories, links, and hardlinks, however,
+# will only be installed into the global zone.
+#
+<include global_zone_only_component>
+set name=pkg.fmri value=pkg:/system/zones/brand/lx@$(PKGVERS)
+set name=pkg.description value="Support for the 'lx' Brand"
+set name=pkg.summary value="lx Brand"
+set name=info.classification \
+ value="org.opensolaris.category.2008:Applications/System Utilities"
set name=variant.arch value=i386
+dir path=etc group=sys
+dir path=etc/zones group=sys
+dir path=usr group=sys
+dir path=usr/kernel group=sys
+dir path=usr/kernel/brand group=sys
+dir path=usr/kernel/brand/$(ARCH64) group=sys
+dir path=usr/kernel/drv group=sys
+dir path=usr/kernel/drv/$(ARCH64) group=sys
+dir path=usr/kernel/dtrace group=sys
+dir path=usr/kernel/dtrace/$(ARCH64) group=sys
+dir path=usr/kernel/fs group=sys
+dir path=usr/kernel/fs/$(ARCH64) group=sys
+dir path=usr/kernel/strmod group=sys
+dir path=usr/kernel/strmod/$(ARCH64) group=sys
+dir path=usr/lib
+dir path=usr/lib/brand
+dir path=usr/lib/brand/lx
+dir path=usr/lib/brand/lx/$(ARCH64)
+dir path=usr/lib/brand/lx/distros
+dir path=usr/lib/devfsadm group=sys
+dir path=usr/lib/devfsadm/linkmod group=sys
+driver name=lx_audio
+driver name=lx_ptm perms="lx_ptmajor 0666 root sys"
+driver name=lx_systrace perms="* 0644 root sys"
+file path=etc/zones/SUNWlx.xml mode=0444
+file path=etc/zones/SUNWlx26.xml mode=0444
+file path=usr/kernel/brand/$(ARCH64)/lx_brand group=sys mode=0755
+file path=usr/kernel/brand/lx_brand group=sys mode=0755
+file path=usr/kernel/drv/$(ARCH64)/lx_audio group=sys
+file path=usr/kernel/drv/$(ARCH64)/lx_ptm group=sys
+file path=usr/kernel/drv/$(ARCH64)/lx_systrace group=sys
+file path=usr/kernel/drv/lx_audio group=sys
+file path=usr/kernel/drv/lx_audio.conf group=sys
+file path=usr/kernel/drv/lx_ptm group=sys
+file path=usr/kernel/drv/lx_ptm.conf group=sys
+file path=usr/kernel/drv/lx_systrace group=sys
+file path=usr/kernel/drv/lx_systrace.conf group=sys
+file path=usr/kernel/fs/$(ARCH64)/lx_afs group=sys mode=0755
+file path=usr/kernel/fs/$(ARCH64)/lx_proc group=sys mode=0755
+file path=usr/kernel/fs/lx_afs group=sys mode=0755
+file path=usr/kernel/fs/lx_proc group=sys mode=0755
+file path=usr/kernel/strmod/$(ARCH64)/ldlinux group=sys mode=0755
+file path=usr/kernel/strmod/ldlinux group=sys mode=0755
+file path=usr/lib/brand/lx/$(ARCH64)/lx_librtld_db.so.1
+file path=usr/lib/brand/lx/config.xml mode=0444
+file path=usr/lib/brand/lx/distros/centos35.distro mode=0444
+file path=usr/lib/brand/lx/distros/centos36.distro mode=0444
+file path=usr/lib/brand/lx/distros/centos37.distro mode=0444
+file path=usr/lib/brand/lx/distros/centos38.distro mode=0444
+file path=usr/lib/brand/lx/distros/rhel35.distro mode=0444
+file path=usr/lib/brand/lx/distros/rhel36.distro mode=0444
+file path=usr/lib/brand/lx/distros/rhel37.distro mode=0444
+file path=usr/lib/brand/lx/distros/rhel38.distro mode=0444
+file path=usr/lib/brand/lx/distros/rhel_centos_common mode=0444
+file path=usr/lib/brand/lx/etc_default_nfs group=sys mode=0444
+file path=usr/lib/brand/lx/lx_distro_install mode=0755
+file path=usr/lib/brand/lx/lxinit mode=0755
+file path=usr/lib/brand/lx/lx_init_zone mode=0755
+file path=usr/lib/brand/lx/lx_init_zone_debian mode=0755
+file path=usr/lib/brand/lx/lx_init_zone_redhat mode=0755
+file path=usr/lib/brand/lx/lx_install mode=0755
+file path=usr/lib/brand/lx/lx_librtld_db.so.1
+file path=usr/lib/brand/lx/lx_native mode=0755
+file path=usr/lib/brand/lx/lx_support mode=0755
+file path=usr/lib/brand/lx/platform.xml mode=0444
+file path=usr/lib/devfsadm/linkmod/SUNW_lx_link_$(ARCH).so group=sys
+file path=usr/lib/lx_brand.so.1
+hardlink path=usr/kernel/dtrace/$(ARCH64)/lx_systrace \
+ target=../../../kernel/drv/$(ARCH64)/lx_systrace
+hardlink path=usr/kernel/dtrace/lx_systrace \
+ target=../../kernel/drv/lx_systrace
+legacy pkg=SUNWlxr arch=$(ARCH) category=system \
+ desc="Support for the 'lx' Brand" \
+ hotline="Please contact your local service provider" \
+ name="lx Brand (Root)" vendor="Sun Microsystems, Inc." \
+ version=11.11,REV=2009.11.11
+legacy pkg=SUNWlxu arch=$(ARCH) category=system \
+ desc="Support for the 'lx' Brand" \
+ hotline="Please contact your local service provider" \
+ name="lx Brand (Usr)" vendor="Sun Microsystems, Inc." \
+ version=11.11,REV=2009.11.11
+license cr_Sun license=cr_Sun
+license lic_CDDL license=lic_CDDL
+link path=usr/lib/brand/lx/64 target=$(ARCH64)
diff --git a/usr/src/stand/lib/fs/hsfs/hsfsops.c b/usr/src/stand/lib/fs/hsfs/hsfsops.c
index 0119d3fe99..21f8c37081 100644
--- a/usr/src/stand/lib/fs/hsfs/hsfsops.c
+++ b/usr/src/stand/lib/fs/hsfs/hsfsops.c
@@ -1026,6 +1026,7 @@ boot_hsfs_getdents(int fd, struct dirent *dep, unsigned size)
* alignment.
*/
n = strlen(hdp->hs_ufs_dir.d_name);
+
n = roundup((sizeof (struct dirent) + ((n > SLOP) ? n : 0)),
sizeof (off_t));
diff --git a/usr/src/test/libc-tests/runfiles/default.run b/usr/src/test/libc-tests/runfiles/default.run
index 6cc36ee305..801558459a 100644
--- a/usr/src/test/libc-tests/runfiles/default.run
+++ b/usr/src/test/libc-tests/runfiles/default.run
@@ -60,6 +60,8 @@ outputdir = /var/tmp/test_results
[/opt/libc-tests/tests/call_once.32]
[/opt/libc-tests/tests/call_once.64]
[/opt/libc-tests/tests/catopen]
+[/opt/libc-tests/tests/env-OS-4089.32]
+[/opt/libc-tests/tests/env-OS-4089.64]
[/opt/libc-tests/tests/endian.32]
[/opt/libc-tests/tests/endian.64]
[/opt/libc-tests/tests/quick_exit]
diff --git a/usr/src/test/libc-tests/tests/Makefile b/usr/src/test/libc-tests/tests/Makefile
index f8e539234c..f6fed9445e 100644
--- a/usr/src/test/libc-tests/tests/Makefile
+++ b/usr/src/test/libc-tests/tests/Makefile
@@ -29,6 +29,7 @@ SUBDIRS = \
wctype
PROGS = \
+ env-OS-4089 \
aligned_alloc \
c11_threads \
c11_tss \
diff --git a/usr/src/test/libc-tests/tests/env-OS-4089.c b/usr/src/test/libc-tests/tests/env-OS-4089.c
new file mode 100644
index 0000000000..0f52201c79
--- /dev/null
+++ b/usr/src/test/libc-tests/tests/env-OS-4089.c
@@ -0,0 +1,73 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+/*
+ * Regression test for OS-4089 where doing a putenv() call without an '=' sign
+ * may lead to a segmentation fault when doing a getenv() depending on the
+ * circumstances of the environment's layout. Verify putenv() mimics
+ * unsetenv().
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/debug.h>
+
+int
+main(void)
+{
+ if (putenv("FOO=bar") != 0) {
+ fprintf(stderr, "failed to put FOO into the environment: %s\n",
+ strerror(errno));
+ return (1);
+ }
+
+ if (getenv("FOO") == NULL) {
+ fprintf(stderr, "failed to retrieve FOO from the "
+ "environment!\n");
+ return (1);
+ }
+
+ VERIFY0(unsetenv("FOO"));
+
+ if (getenv("FOO") != NULL) {
+ fprintf(stderr, "Somehow retrieved FOO from the "
+ "environment after unsetenv()!\n");
+ return (1);
+ }
+
+ if (putenv("FOO=bar") != 0) {
+ fprintf(stderr, "failed to put FOO into the environment: %s\n",
+ strerror(errno));
+ return (1);
+ }
+
+ if (getenv("FOO") == NULL) {
+ fprintf(stderr, "failed to retrieve FOO from the "
+ "environment!\n");
+ return (1);
+ }
+
+ VERIFY0(putenv("FOO"));
+
+ if (getenv("FOO") != NULL) {
+ fprintf(stderr, "Somehow retrieved FOO from the "
+ "environment after putenv()!\n");
+ return (1);
+ }
+
+ return (0);
+}
diff --git a/usr/src/test/libc-tests/tests/random/Makefile b/usr/src/test/libc-tests/tests/random/Makefile
index ed480dacb9..c1a1a18e1f 100644
--- a/usr/src/test/libc-tests/tests/random/Makefile
+++ b/usr/src/test/libc-tests/tests/random/Makefile
@@ -17,7 +17,6 @@ include $(SRC)/Makefile.master
ROOTOPTPKG = $(ROOT)/opt/libc-tests
TESTDIR = $(ROOTOPTPKG)/tests/random
-ROOTBINDIR = $(ROOTOPTPKG)/bin
PROGS = arc4random \
arc4random_prefork \
@@ -74,17 +73,12 @@ arc4random_preforksig: arc4random_forksig.c
$(POST_PROCESS)
chacha: chacha_tv.c
- $(COMPILE.c) -DKEYSTREAM_ONLY -I$(SRC)/common/crypto/chacha -o chacha.o -c $(SRC)/common/crypto/chacha/chacha.c
+ $(COMPILE.c) -DKEYSTREAM_ONLY -I$(SRC)/common/crypto/chacha \
+ -o chacha.o -c $(SRC)/common/crypto/chacha/chacha.c
$(COMPILE.c) -I$(SRC)/common/crypto/chacha -o chacha_tv.o -c chacha_tv.c
$(LINK.c) -o $@ chacha_tv.o chacha.o $(LDLIBS)
$(POST_PROCESS)
-$(ROOTBINDIR):
- $(INS.dir)
-
-$(ROOTBINDIR)/%: %
- $(INS.file)
-
$(TESTDIR):
$(INS.dir)
diff --git a/usr/src/test/test-runner/cmd/Makefile b/usr/src/test/test-runner/cmd/Makefile
index 33e7a61275..948aea9ed8 100644
--- a/usr/src/test/test-runner/cmd/Makefile
+++ b/usr/src/test/test-runner/cmd/Makefile
@@ -34,4 +34,6 @@ $(ROOTBIN):
$(INS.dir)
$(ROOTBIN)/%: %.py
- $(INS.rename)
+ $(RM) $@
+ $(SED.py) $< > $@
+ $(CHMOD) 0555 $@
diff --git a/usr/src/test/test-runner/cmd/run.py b/usr/src/test/test-runner/cmd/run.py
index 44bb9a3f5a..0609a89aff 100644
--- a/usr/src/test/test-runner/cmd/run.py
+++ b/usr/src/test/test-runner/cmd/run.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python2.6
+#!ON_PYTHON_26
#
# This file and its contents are supplied under the terms of the
diff --git a/usr/src/test/util-tests/runfiles/default.run b/usr/src/test/util-tests/runfiles/default.run
index 3f7498e23b..dcdbf43f95 100644
--- a/usr/src/test/util-tests/runfiles/default.run
+++ b/usr/src/test/util-tests/runfiles/default.run
@@ -24,9 +24,14 @@ outputdir = /var/tmp/test_results
[/opt/util-tests/tests/printf_test]
[/opt/util-tests/tests/allowed-ips]
+[/opt/util-tests/tests/vnic-mtu]
+[/opt/util-tests/tests/bunyan]
[/opt/util-tests/tests/xargs_test]
+[/opt/util-tests/tests/mqt]
+[/opt/util-tests/tests/wqt]
+
[/opt/util-tests/tests/libnvpair_json]
tests = ['json_00_blank', 'json_01_boolean', 'json_02_numbers',
'json_03_empty_arrays', 'json_04_number_arrays', 'json_05_strings',
diff --git a/usr/src/test/util-tests/tests/Makefile b/usr/src/test/util-tests/tests/Makefile
index 4709c7adcd..739b79893f 100644
--- a/usr/src/test/util-tests/tests/Makefile
+++ b/usr/src/test/util-tests/tests/Makefile
@@ -14,7 +14,6 @@
# Copyright 2014 Garrett D'Amore <garrett@damore.org>
#
-SUBDIRS = dladm printf xargs
-SUBDIRS = dladm libnvpair_json printf xargs
+SUBDIRS = dladm libnvpair_json printf xargs mergeq workq
include $(SRC)/test/Makefile.com
diff --git a/usr/src/test/util-tests/tests/bunyan/Makefile b/usr/src/test/util-tests/tests/bunyan/Makefile
new file mode 100644
index 0000000000..5f58bf1614
--- /dev/null
+++ b/usr/src/test/util-tests/tests/bunyan/Makefile
@@ -0,0 +1,68 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+include $(SRC)/Makefile.master
+
+ROOTOPTPKG = $(ROOT)/opt/util-tests
+TESTDIR = $(ROOTOPTPKG)/tests/bunyan
+TESTBIN = $(ROOTOPTPKG)/bin
+
+PROG = btest
+
+SCRIPTS = \
+ bunyan
+
+include $(SRC)/cmd/Makefile.cmd
+include $(SRC)/test/Makefile.com
+
+OBJS = $(PROG:%=%.o)
+SRCS = $(OBJS:%.o=%.c)
+
+CMDS = $(PROG:%=$(TESTBIN)/%) $(SCRIPTS:%=$(TESTDIR)/%)
+$(CMDS) := FILEMODE = 0555
+
+DIRS = $(TESTDIR) $(TESTBIN)
+
+LDLIBS += -lbunyan
+
+all: $(PROG)
+
+$(PROG): $(OBJS)
+ $(LINK.c) $(OBJS) -o $@ $(LDLIBS)
+ $(POST_PROCESS)
+
+install: all $(CMDS)
+
+lint:
+
+clobber: clean
+ -$(RM) $(PROG)
+
+clean:
+ -$(RM) $(OBJS)
+
+$(CMDS): $(DIRS)
+
+$(DIRS):
+ $(INS.dir)
+
+$(TESTDIR)/%: %.ksh
+ $(INS.rename)
+
+$(TESTDIR)/%: %
+ $(INS.file)
+
+$(TESTBIN)/%: %
+ $(INS.file)
diff --git a/usr/src/test/util-tests/tests/bunyan/btest.c b/usr/src/test/util-tests/tests/bunyan/btest.c
new file mode 100644
index 0000000000..5239e91f1e
--- /dev/null
+++ b/usr/src/test/util-tests/tests/bunyan/btest.c
@@ -0,0 +1,312 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc.
+ */
+
+#include <stdio.h>
+#include <assert.h>
+#include <bunyan.h>
+#include <netinet/in.h>
+#include <strings.h>
+
+static void
+create_handles(void)
+{
+ bunyan_logger_t *a, *b, *c;
+
+ assert(bunyan_init("foo", &a) == 0);
+ assert(bunyan_init("foo", &b) == 0);
+ assert(bunyan_init("foo", &c) == 0);
+ bunyan_fini(a);
+ bunyan_fini(b);
+ bunyan_fini(c);
+}
+
+static void
+create_stream(void)
+{
+ bunyan_logger_t *a;
+
+ assert(bunyan_init("foo", &a) == 0);
+ assert(bunyan_stream_add(a, "bar", BUNYAN_L_TRACE,
+ bunyan_stream_fd, (void *)1) == 0);
+ assert(bunyan_stream_add(a, "baz", BUNYAN_L_TRACE,
+ bunyan_stream_fd, (void *)1) == 0);
+ assert(bunyan_stream_add(a, "bar", BUNYAN_L_TRACE,
+ bunyan_stream_fd, (void *)1) == EEXIST);
+ assert(bunyan_stream_add(a, "baz", BUNYAN_L_TRACE,
+ bunyan_stream_fd, (void *)1) == EEXIST);
+ assert(bunyan_stream_remove(a, "baz") == 0);
+ assert(bunyan_stream_remove(a, "baz") == ENOENT);
+ assert(bunyan_stream_remove(a, "foobaz") == ENOENT);
+ assert(bunyan_stream_remove(a, "blah") == ENOENT);
+ assert(bunyan_stream_add(a, "bar", BUNYAN_L_TRACE,
+ bunyan_stream_fd, (void *)1) == EEXIST);
+ assert(bunyan_stream_add(a, "baz", BUNYAN_L_TRACE,
+ bunyan_stream_fd, (void *)1) == 0);
+ assert(bunyan_stream_add(a, "debug", BUNYAN_L_DEBUG,
+ bunyan_stream_fd, (void *)1) == 0);
+ assert(bunyan_stream_add(a, "info", BUNYAN_L_INFO,
+ bunyan_stream_fd, (void *)1) == 0);
+ assert(bunyan_stream_add(a, "warn", BUNYAN_L_WARN,
+ bunyan_stream_fd, (void *)1) == 0);
+ assert(bunyan_stream_add(a, "error", BUNYAN_L_ERROR,
+ bunyan_stream_fd, (void *)1) == 0);
+ assert(bunyan_stream_add(a, "fatal", BUNYAN_L_FATAL,
+ bunyan_stream_fd, (void *)1) == 0);
+
+ bunyan_fini(a);
+}
+
+static void
+create_key(void)
+{
+ bunyan_logger_t *a;
+ struct in_addr v4;
+ struct in6_addr v6;
+
+ assert(bunyan_init("foo", &a) == 0);
+ assert(bunyan_key_remove(a, "blah") == ENOENT);
+ assert(bunyan_key_add(a, BUNYAN_T_END) == 0);
+
+ assert(bunyan_key_add(a, BUNYAN_T_STRING, "s", "foo",
+ BUNYAN_T_POINTER, "p", (void *)a, BUNYAN_T_IP, "v4", &v4,
+ BUNYAN_T_IP6, "v6", &v6, BUNYAN_T_BOOLEAN, "b", B_TRUE,
+ BUNYAN_T_INT32, "i32", 69, BUNYAN_T_INT64, "i64", (uint64_t)6969,
+ BUNYAN_T_UINT32, "u32", 23, BUNYAN_T_UINT64, "u64", (uint64_t)2323,
+ BUNYAN_T_DOUBLE, "d", 3.14,
+ BUNYAN_T_INT64STR, "i64s", (uint64_t)12345,
+ BUNYAN_T_UINT64STR, "u64s", (uint64_t)54321, BUNYAN_T_END) == 0);
+
+ assert(bunyan_key_remove(a, "s") == 0);
+ assert(bunyan_key_remove(a, "s") == ENOENT);
+ assert(bunyan_key_remove(a, "p") == 0);
+ assert(bunyan_key_remove(a, "p") == ENOENT);
+ assert(bunyan_key_remove(a, "v4") == 0);
+ assert(bunyan_key_remove(a, "v4") == ENOENT);
+ assert(bunyan_key_remove(a, "v6") == 0);
+ assert(bunyan_key_remove(a, "v6") == ENOENT);
+ assert(bunyan_key_remove(a, "b") == 0);
+ assert(bunyan_key_remove(a, "b") == ENOENT);
+ assert(bunyan_key_remove(a, "i32") == 0);
+ assert(bunyan_key_remove(a, "i32") == ENOENT);
+ assert(bunyan_key_remove(a, "i64") == 0);
+ assert(bunyan_key_remove(a, "i64") == ENOENT);
+ assert(bunyan_key_remove(a, "u32") == 0);
+ assert(bunyan_key_remove(a, "u32") == ENOENT);
+ assert(bunyan_key_remove(a, "u64") == 0);
+ assert(bunyan_key_remove(a, "u64") == ENOENT);
+ assert(bunyan_key_remove(a, "d") == 0);
+ assert(bunyan_key_remove(a, "d") == ENOENT);
+ assert(bunyan_key_remove(a, "i64s") == 0);
+ assert(bunyan_key_remove(a, "i64s") == ENOENT);
+ assert(bunyan_key_remove(a, "u64s") == 0);
+ assert(bunyan_key_remove(a, "u64s") == ENOENT);
+
+ bunyan_fini(a);
+}
+
+static void
+bad_level(void)
+{
+ bunyan_logger_t *a;
+
+ assert(bunyan_init("bad level", &a) == 0);
+ assert(bunyan_stream_add(a, "bar", BUNYAN_L_TRACE - 1,
+ bunyan_stream_fd, (void *)1) == EINVAL);
+ assert(bunyan_stream_add(a, "bar", BUNYAN_L_TRACE + 1,
+ bunyan_stream_fd, (void *)1) == EINVAL);
+ assert(bunyan_stream_add(a, "bar", BUNYAN_L_DEBUG - 1,
+ bunyan_stream_fd, (void *)1) == EINVAL);
+ assert(bunyan_stream_add(a, "bar", BUNYAN_L_INFO + 1,
+ bunyan_stream_fd, (void *)1) == EINVAL);
+ assert(bunyan_stream_add(a, "bar", BUNYAN_L_WARN - 1,
+ bunyan_stream_fd, (void *)1) == EINVAL);
+ assert(bunyan_stream_add(a, "bar", BUNYAN_L_ERROR + 1,
+ bunyan_stream_fd, (void *)1) == EINVAL);
+ assert(bunyan_stream_add(a, "bar", BUNYAN_L_FATAL - 1,
+ bunyan_stream_fd, (void *)1) == EINVAL);
+ assert(bunyan_stream_add(a, "bar", -5,
+ bunyan_stream_fd, (void *)1) == EINVAL);
+
+ bunyan_fini(a);
+}
+
+static void
+basic_log(void)
+{
+ bunyan_logger_t *a;
+
+ assert(bunyan_init("basic", &a) == 0);
+ assert(bunyan_stream_add(a, "foo", BUNYAN_L_TRACE,
+ bunyan_stream_fd, (void *)1) == 0);
+ assert(bunyan_trace(a, "trace", BUNYAN_T_END) == 0);
+ assert(bunyan_debug(a, "debug", BUNYAN_T_END) == 0);
+ assert(bunyan_info(a, "info", BUNYAN_T_END) == 0);
+ assert(bunyan_warn(a, "warn", BUNYAN_T_END) == 0);
+ assert(bunyan_error(a, "error", BUNYAN_T_END) == 0);
+ assert(bunyan_fatal(a, "fatal", BUNYAN_T_END) == 0);
+
+ bunyan_fini(a);
+}
+
+static void
+crazy_log(void)
+{
+ bunyan_logger_t *a;
+ struct in_addr v4;
+ struct in6_addr v6;
+
+ bzero(&v4, sizeof (struct in_addr));
+ bzero(&v6, sizeof (struct in6_addr));
+
+ assert(bunyan_init("basic", &a) == 0);
+ assert(bunyan_stream_add(a, "foo", BUNYAN_L_TRACE,
+ bunyan_stream_fd, (void *)1) == 0);
+ assert(bunyan_trace(a, "trace", BUNYAN_T_STRING, "s", "foo",
+ BUNYAN_T_POINTER, "p", (void *)a, BUNYAN_T_IP, "v4", &v4,
+ BUNYAN_T_IP6, "v6", &v6, BUNYAN_T_BOOLEAN, "b", B_TRUE,
+ BUNYAN_T_INT32, "i32", 69, BUNYAN_T_INT64, "i64", (uint64_t)6969,
+ BUNYAN_T_UINT32, "u32", 23, BUNYAN_T_UINT64, "u64", (uint64_t)2323,
+ BUNYAN_T_DOUBLE, "d", 3.14,
+ BUNYAN_T_INT64STR, "i64s", (uint64_t)12345,
+ BUNYAN_T_UINT64STR, "u64s", (uint64_t)54321, BUNYAN_T_END) == 0);
+ assert(bunyan_debug(a, "debug", BUNYAN_T_STRING, "s", "foo",
+ BUNYAN_T_POINTER, "p", (void *)a, BUNYAN_T_IP, "v4", &v4,
+ BUNYAN_T_IP6, "v6", &v6, BUNYAN_T_BOOLEAN, "b", B_TRUE,
+ BUNYAN_T_INT32, "i32", 69, BUNYAN_T_INT64, "i64", (uint64_t)6969,
+ BUNYAN_T_UINT32, "u32", 23, BUNYAN_T_UINT64, "u64", (uint64_t)2323,
+ BUNYAN_T_DOUBLE, "d", 3.14,
+ BUNYAN_T_INT64STR, "i64s", (uint64_t)12345,
+ BUNYAN_T_UINT64STR, "u64s", (uint64_t)54321, BUNYAN_T_END) == 0);
+ assert(bunyan_info(a, "info", BUNYAN_T_STRING, "s", "foo",
+ BUNYAN_T_POINTER, "p", (void *)a, BUNYAN_T_IP, "v4", &v4,
+ BUNYAN_T_IP6, "v6", &v6, BUNYAN_T_BOOLEAN, "b", B_TRUE,
+ BUNYAN_T_INT32, "i32", 69, BUNYAN_T_INT64, "i64", (uint64_t)6969,
+ BUNYAN_T_UINT32, "u32", 23, BUNYAN_T_UINT64, "u64", (uint64_t)2323,
+ BUNYAN_T_DOUBLE, "d", 3.14,
+ BUNYAN_T_INT64STR, "i64s", (uint64_t)12345,
+ BUNYAN_T_UINT64STR, "u64s", (uint64_t)54321, BUNYAN_T_END) == 0);
+ assert(bunyan_warn(a, "warn", BUNYAN_T_STRING, "s", "foo",
+ BUNYAN_T_POINTER, "p", (void *)a, BUNYAN_T_IP, "v4", &v4,
+ BUNYAN_T_IP6, "v6", &v6, BUNYAN_T_BOOLEAN, "b", B_TRUE,
+ BUNYAN_T_INT32, "i32", 69, BUNYAN_T_INT64, "i64", (uint64_t)6969,
+ BUNYAN_T_UINT32, "u32", 23, BUNYAN_T_UINT64, "u64", (uint64_t)2323,
+ BUNYAN_T_DOUBLE, "d", 3.14,
+ BUNYAN_T_INT64STR, "i64s", (uint64_t)12345,
+ BUNYAN_T_UINT64STR, "u64s", (uint64_t)54321, BUNYAN_T_END) == 0);
+ assert(bunyan_error(a, "error", BUNYAN_T_STRING, "s", "foo",
+ BUNYAN_T_POINTER, "p", (void *)a, BUNYAN_T_IP, "v4", &v4,
+ BUNYAN_T_IP6, "v6", &v6, BUNYAN_T_BOOLEAN, "b", B_TRUE,
+ BUNYAN_T_INT32, "i32", 69, BUNYAN_T_INT64, "i64", (uint64_t)6969,
+ BUNYAN_T_UINT32, "u32", 23, BUNYAN_T_UINT64, "u64", (uint64_t)2323,
+ BUNYAN_T_DOUBLE, "d", 3.14,
+ BUNYAN_T_INT64STR, "i64s", (uint64_t)12345,
+ BUNYAN_T_UINT64STR, "u64s", (uint64_t)54321, BUNYAN_T_END) == 0);
+ assert(bunyan_fatal(a, "fatal", BUNYAN_T_STRING, "s", "foo",
+ BUNYAN_T_POINTER, "p", (void *)a, BUNYAN_T_IP, "v4", &v4,
+ BUNYAN_T_IP6, "v6", &v6, BUNYAN_T_BOOLEAN, "b", B_TRUE,
+ BUNYAN_T_INT32, "i32", 69, BUNYAN_T_INT64, "i64", (uint64_t)6969,
+ BUNYAN_T_UINT32, "u32", 23, BUNYAN_T_UINT64, "u64", (uint64_t)2323,
+ BUNYAN_T_DOUBLE, "d", 3.14,
+ BUNYAN_T_INT64STR, "i64s", (uint64_t)12345,
+ BUNYAN_T_UINT64STR, "u64s", (uint64_t)54321, BUNYAN_T_END) == 0);
+
+ bunyan_fini(a);
+}
+
+static void
+child_log(void)
+{
+ bunyan_logger_t *a, *child;
+ struct in_addr v4;
+ struct in6_addr v6;
+
+ bzero(&v4, sizeof (struct in_addr));
+ bzero(&v6, sizeof (struct in6_addr));
+
+ assert(bunyan_init("child", &a) == 0);
+ assert(bunyan_stream_add(a, "foo", BUNYAN_L_TRACE,
+ bunyan_stream_fd, (void *)1) == 0);
+ assert(bunyan_child(a, &child, BUNYAN_T_STRING, "s", "foo",
+ BUNYAN_T_POINTER, "p", (void *)a, BUNYAN_T_IP, "v4", &v4,
+ BUNYAN_T_IP6, "v6", &v6, BUNYAN_T_BOOLEAN, "b", B_TRUE,
+ BUNYAN_T_INT32, "i32", 69, BUNYAN_T_INT64, "i64", (uint64_t)6969,
+ BUNYAN_T_UINT32, "u32", 23, BUNYAN_T_UINT64, "u64", (uint64_t)2323,
+ BUNYAN_T_DOUBLE, "d", 3.14,
+ BUNYAN_T_INT64STR, "i64s", (uint64_t)12345,
+ BUNYAN_T_UINT64STR, "u64s", (uint64_t)54321, BUNYAN_T_END) == 0);
+
+ bunyan_fini(a);
+ assert(bunyan_trace(child, "trace", BUNYAN_T_END) == 0);
+ assert(bunyan_debug(child, "debug", BUNYAN_T_END) == 0);
+ assert(bunyan_info(child, "info", BUNYAN_T_END) == 0);
+ assert(bunyan_warn(child, "warn", BUNYAN_T_END) == 0);
+ assert(bunyan_error(child, "error", BUNYAN_T_END) == 0);
+ assert(bunyan_fatal(child, "fatal", BUNYAN_T_END) == 0);
+
+ bunyan_fini(child);
+}
+
+static void
+crazy_child(void)
+{
+ bunyan_logger_t *a, *child;
+ struct in_addr v4;
+ struct in6_addr v6;
+
+ bzero(&v4, sizeof (struct in_addr));
+ bzero(&v6, sizeof (struct in6_addr));
+
+ assert(bunyan_init("crazy child", &a) == 0);
+ assert(bunyan_stream_add(a, "foo", BUNYAN_L_TRACE,
+ bunyan_stream_fd, (void *)1) == 0);
+ assert(bunyan_key_add(a, BUNYAN_T_STRING, "s", "foo",
+ BUNYAN_T_POINTER, "p", (void *)a, BUNYAN_T_IP, "v4", &v4,
+ BUNYAN_T_IP6, "v6", &v6, BUNYAN_T_BOOLEAN, "b", B_TRUE,
+ BUNYAN_T_INT32, "i32", 69, BUNYAN_T_INT64, "i64", (uint64_t)6969,
+ BUNYAN_T_UINT32, "u32", 23, BUNYAN_T_UINT64, "u64", (uint64_t)2323,
+ BUNYAN_T_DOUBLE, "d", 3.14,
+ BUNYAN_T_INT64STR, "i64s", (uint64_t)12345,
+ BUNYAN_T_UINT64STR, "u64s", (uint64_t)54321, BUNYAN_T_END) == 0);
+ assert(bunyan_child(a, &child, BUNYAN_T_END) == 0);
+ bunyan_fini(a);
+
+ assert(bunyan_stream_remove(child, "foo") == 0);
+ assert(bunyan_stream_add(child, "bar", BUNYAN_L_TRACE,
+ bunyan_stream_fd, (void *)1) == 0);
+ assert(bunyan_key_remove(child, "u64s") == 0);
+ assert(bunyan_trace(child, "trace", BUNYAN_T_END) == 0);
+ assert(bunyan_debug(child, "debug", BUNYAN_T_END) == 0);
+ assert(bunyan_info(child, "info", BUNYAN_T_END) == 0);
+ assert(bunyan_warn(child, "warn", BUNYAN_T_END) == 0);
+ assert(bunyan_error(child, "error", BUNYAN_T_END) == 0);
+ assert(bunyan_fatal(child, "fatal", BUNYAN_T_END) == 0);
+
+ bunyan_fini(child);
+}
+
+int
+main(void)
+{
+ create_handles();
+ create_stream();
+ create_key();
+ bad_level();
+ basic_log();
+ crazy_log();
+ child_log();
+ crazy_child();
+
+ return (0);
+}
diff --git a/usr/src/test/util-tests/tests/bunyan/bunyan.ksh b/usr/src/test/util-tests/tests/bunyan/bunyan.ksh
new file mode 100644
index 0000000000..4ec0f4bd62
--- /dev/null
+++ b/usr/src/test/util-tests/tests/bunyan/bunyan.ksh
@@ -0,0 +1,26 @@
+#! /usr/bin/ksh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014, Joyent, Inc.
+#
+
+#
+# Simple wrapper for the classic test.out
+#
+
+set -o errexit
+
+btest_root=$(dirname $0)/../..
+btest_bin=$btest_root/bin/btest
+
+LD_PRELOAD=libumem.so UMEM_DEBUG=default $btest_bin >/dev/null
diff --git a/usr/src/test/util-tests/tests/dladm/Makefile b/usr/src/test/util-tests/tests/dladm/Makefile
index df3997656c..a71332b016 100644
--- a/usr/src/test/util-tests/tests/dladm/Makefile
+++ b/usr/src/test/util-tests/tests/dladm/Makefile
@@ -17,7 +17,7 @@ include $(SRC)/cmd/Makefile.cmd
include $(SRC)/test/Makefile.com
ROOTOPTPKG = $(ROOT)/opt/util-tests/tests
-PROG = allowed-ips
+PROG = allowed-ips vnic-mtu
ROOTPROG = $(PROG:%=$(ROOTOPTPKG)/%)
diff --git a/usr/src/test/util-tests/tests/dladm/vnic-mtu.ksh b/usr/src/test/util-tests/tests/dladm/vnic-mtu.ksh
new file mode 100644
index 0000000000..49daa74f3b
--- /dev/null
+++ b/usr/src/test/util-tests/tests/dladm/vnic-mtu.ksh
@@ -0,0 +1,116 @@
+#!/bin/ksh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+#
+# The purpose of this is to test the MTU property on VNICs, using both
+# temporary and persistent properties. To do this, we create an
+# Etherstub and then create various VNICs on top of it.
+#
+
+vm_arg0="$(basename $0)"
+vm_stub="teststub$$"
+vm_vnic="testvnic$$"
+
+VM_MTU_MIN=576
+VM_MTU_MAX=9000
+
+fatal()
+{
+ typeset msg="$*"
+ [[ -z "$msg" ]] && msg="failed"
+ echo "TEST_FAIL: $vm_arg0: $msg" >&2
+
+ # Try to clean up just in case
+ dladm delete-vnic $vm_vnic 2>/dev/null
+ dladm delete-etherstub $vm_stub 2>/dev/null
+ exit 1
+}
+
+#
+# Validate that the MTU of the datalink dev has the MTU that we expect
+#
+validate_mtu()
+{
+ typeset dev=$1
+ typeset mtu=$2
+ typeset val
+
+ [[ -z "$dev" ]] && fatal "missing required device"
+ [[ -z "$mtu" ]] && fatal "missing required mtu"
+ val=$(dladm show-linkprop -c -p mtu -o value $dev)
+ [[ $? -eq 0 ]] || fatal "failed to get MTU for $dev"
+ (( $val == $mtu )) || fatal \
+ "mtu mismatch on $dev: expected $mtu, got $val"
+}
+
+delete_stub()
+{
+ dladm delete-etherstub $vm_stub || fatal \
+ "failed to delete stub $vm_stub"
+}
+
+create_stub()
+{
+ dladm create-etherstub $vm_stub || fatal \
+ "failed to create stub"
+ validate_mtu $vm_stub $VM_MTU_MAX
+}
+
+delete_vnic()
+{
+ dladm delete-vnic $vm_vnic || fatal "failed to delete vnic $vm_vnic"
+}
+
+test_vnic_pass()
+{
+ typeset mtu=$1
+ typeset flags=$2
+
+ [[ -z "$mtu" ]] && fatal "missing required mtu"
+ dladm create-vnic $flags -l $vm_stub -p mtu=$mtu $vm_vnic || fatal \
+ "failed tocreate vnic: $vm_vnic"
+ validate_mtu "$vm_vnic" "$mtu"
+ delete_vnic
+}
+
+test_vnic_fail()
+{
+ typeset mtu=$1
+ typeset flags=$2
+
+ [[ -z "$mtu" ]] && fatal "missing required mtu"
+ dladm create-vnic $flags -l $vm_stub -p mtu=$mtu \
+ $vm_vnic 2>/dev/null && fatal \
+ "created vnic with mtu $mtu, but failure expected"
+}
+
+test_pass()
+{
+ typeset flags=$1
+
+ create_stub
+ test_vnic_pass 1500 $flags
+ test_vnic_pass 1400 $flags
+ test_vnic_pass $VM_MTU_MIN $flags
+ test_vnic_pass $VM_MTU_MAX $flags
+ test_vnic_fail $((($VM_MTU_MIN - 1))) $flags
+ test_vnic_fail $((($VM_MTU_MAX + 1))) $flags
+ delete_stub
+}
+
+test_pass "-t"
+test_pass
+echo "TEST PASS: $vm_arg0"
diff --git a/usr/src/test/util-tests/tests/libnvpair_json/Makefile b/usr/src/test/util-tests/tests/libnvpair_json/Makefile
index d1d3585b02..3b48e0b630 100644
--- a/usr/src/test/util-tests/tests/libnvpair_json/Makefile
+++ b/usr/src/test/util-tests/tests/libnvpair_json/Makefile
@@ -10,14 +10,14 @@
#
#
-# Copyright (c) 2014 Joyent, Inc.
+# Copyright 2015 Joyent, Inc.
#
include $(SRC)/Makefile.master
ROOTOPTPKG = $(ROOT)/opt/util-tests
TESTDIR = $(ROOTOPTPKG)/tests/libnvpair_json
-ROOTBINDIR = $(ROOTOPTPKG)/bin
+TESTBIN = $(ROOTOPTPKG)/bin
PROG = print_json
@@ -38,9 +38,11 @@ include $(SRC)/test/Makefile.com
OBJS = $(PROG:%=%.o)
SRCS = $(OBJS:%.o=%.c)
-CMDS = $(PROG:%=$(ROOTBINDIR)/%) $(SCRIPTS:%=$(TESTDIR)/%)
+CMDS = $(PROG:%=$(TESTBIN)/%) $(SCRIPTS:%=$(TESTDIR)/%)
$(CMDS) := FILEMODE = 0555
+DIRS = $(TESTDIR) $(TESTBIN)
+
LDLIBS += -lnvpair
LINTFLAGS += -erroff=E_FUNC_ARG_UNUSED
@@ -61,16 +63,13 @@ clobber: clean
clean:
-$(RM) $(OBJS)
-$(CMDS): $(TESTDIR) $(PROG)
+$(CMDS): $(DIRS)
-$(ROOTBINDIR):
- $(INS.dir)
-
-$(ROOTBINDIR)/%: %
- $(INS.file)
-
-$(TESTDIR):
+$(DIRS):
$(INS.dir)
$(TESTDIR)/%: %.ksh
$(INS.rename)
+
+$(TESTBIN)/%: %
+ $(INS.file)
diff --git a/usr/src/test/util-tests/tests/libnvpair_json/json_08_large_data.ksh b/usr/src/test/util-tests/tests/libnvpair_json/json_08_large_data.ksh
new file mode 100644
index 0000000000..ca19e10d06
--- /dev/null
+++ b/usr/src/test/util-tests/tests/libnvpair_json/json_08_large_data.ksh
@@ -0,0 +1,37 @@
+#!/bin/ksh
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015, Joyent, Inc.
+#
+
+DIR=$(dirname $(whence $0))
+. ${DIR}/json_common
+
+BASELINE="$(cat <<EOF
+{\
+"o":[\
+$(for i in {1..50}; do
+ printf '"%s",' 'some_very_big_string_that_takes_up_space'
+done)\
+"end"\
+]\
+}
+EOF)"
+
+OUTPUT="$(${DIR}/../../bin/print_json <<-EOF
+add_string_array "o" $(for i in {1..50}; do
+ printf '"%s" ' 'some_very_big_string_that_takes_up_space'
+done)"end";
+EOF)"
+
+complete
diff --git a/usr/src/test/util-tests/tests/mergeq/Makefile b/usr/src/test/util-tests/tests/mergeq/Makefile
new file mode 100644
index 0000000000..ef6c6abb99
--- /dev/null
+++ b/usr/src/test/util-tests/tests/mergeq/Makefile
@@ -0,0 +1,64 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+include $(SRC)/Makefile.master
+
+ROOTOPTPKG = $(ROOT)/opt/util-tests
+TESTDIR = $(ROOTOPTPKG)/tests/mergeq
+
+PROG = mqt
+OBJS = mqt.o mergeq.o
+
+include $(SRC)/cmd/Makefile.cmd
+include $(SRC)/cmd/Makefile.ctf
+include $(SRC)/test/Makefile.com
+
+CMDS = $(PROG:%=$(TESTDIR)/%)
+$(CMDS) := FILEMODE = 0555
+
+CPPFLAGS += -I$(SRC)/lib/mergeq -D_REENTRANT
+LDLIBS += -lumem
+
+all: $(PROG)
+
+install: all $(CMDS)
+
+lint: lint_SRCS
+
+clobber: clean
+ -$(RM) $(PROG)
+
+clean:
+ -$(RM) $(OBJS)
+
+%.o: %.c
+ $(COMPILE.c) -o $@ -c $<
+ $(POST_PROCESS_O)
+
+%.o: $(SRC)/lib/mergeq/%.c
+ $(COMPILE.c) -o $@ -c $<
+ $(POST_PROCESS_O)
+
+$(PROG): $(OBJS)
+ $(LINK.c) $(OBJS) -o $@ $(LDLIBS)
+ $(POST_PROCESS)
+
+$(CMDS): $(TESTDIR) $(PROG)
+
+$(TESTDIR):
+ $(INS.dir)
+
+$(TESTDIR)/%: %
+ $(INS.file)
diff --git a/usr/src/test/util-tests/tests/mergeq/mqt.c b/usr/src/test/util-tests/tests/mergeq/mqt.c
new file mode 100644
index 0000000000..e61e9173d2
--- /dev/null
+++ b/usr/src/test/util-tests/tests/mergeq/mqt.c
@@ -0,0 +1,217 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * mergeq testing routines
+ */
+
+#include <mergeq.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+
+const char *
+_umem_debug_init()
+{
+ return ("default,verbose");
+}
+
+const char *
+_umem_logging_init(void)
+{
+ return ("fail,contents");
+}
+
+void *
+mergeq_alloc(size_t size)
+{
+ return (malloc(size));
+}
+
+/*ARGSUSED*/
+void
+mergeq_free(void *buf, size_t size)
+{
+ free(buf);
+}
+
+static int
+mqt_int(void *first, void *second, void **outp, void *arg)
+{
+ uintptr_t a, b, c;
+ a = (uintptr_t)first;
+ b = (uintptr_t)second;
+ c = a + b;
+ *outp = (void *)c;
+
+ return (0);
+}
+
+static int
+mqt_append(void *first, void *second, void **outp, void *arg)
+{
+ char *out;
+
+ /* Yes, this leaks, don't worry about it for the test */
+ if (asprintf(&out, "%s%s", first, second) != -1) {
+ *outp = out;
+ return (0);
+ }
+ return (-1);
+}
+
+static int
+mqt_fatal(void *first, void *second, void **outp, void *arg)
+{
+ return (-1);
+}
+
+/*
+ * Test structures and cases. We really want mq_args to be a flexible array
+ * member, but then we cant initialize it. Thus we set a fixed size number of
+ * entries.
+ */
+typedef struct mq_test {
+ const char *mq_desc; /* test description/name */
+ mergeq_proc_f *mq_proc; /* processing function */
+ int mq_rval; /* mergeq_merge return value */
+ int mq_uerr; /* user error, if any */
+ boolean_t mq_strcmp; /* use strcmp rather than == */
+ void *mq_result; /* expected result */
+ void **mq_args; /* argument array */
+} mq_test_t;
+
+static void *mqt_empty_args[] = { NULL };
+static void *mqt_single_args[] = { (void *)42, NULL };
+static void *mqt_double_args[] = { (void *)42, (void *)27, NULL };
+static void *mqt_wrap_args[] = {
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, NULL
+};
+static void *mqt_grow_args[] = {
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, NULL
+};
+static void *mqt_order_args[] = { "l", "e", "g", "e", "n", "d", " ", "o", "f",
+ " ", "z", "e", "l", "d", "a", NULL };
+
+
+static mq_test_t mq_tests[] = {
+ { "empty", mqt_int, 0, 0, B_FALSE, NULL, mqt_empty_args },
+ { "single", mqt_int, 0, 0, B_FALSE, (void *)42, mqt_single_args },
+ { "double", mqt_int, 0, 0, B_FALSE, (void *)69, mqt_double_args },
+ { "wrap", mqt_int, 0, 0, B_FALSE, (void *)64, mqt_wrap_args },
+ { "grow", mqt_int, 0, 0, B_FALSE, (void *)92, mqt_grow_args },
+ { "fatal", mqt_fatal, MERGEQ_UERROR, -1, B_FALSE, NULL,
+ mqt_double_args },
+ { "order", mqt_append, 0, 0, B_TRUE, "alegend of zeld", mqt_order_args }
+};
+
+#define NMQ_TESTS (sizeof (mq_tests) / sizeof (mq_test_t))
+
+static void
+mq_test_run(mergeq_t *mqp, mq_test_t *mqt)
+{
+ int ret, err;
+ void **itemp = mqt->mq_args;
+ void *out;
+
+ while (*itemp != NULL) {
+ if ((ret = mergeq_add(mqp, *itemp)) != 0) {
+ (void) fprintf(stderr,
+ "test %s: failed to add item: %s\n",
+ mqt->mq_desc, strerror(errno));
+ exit(1);
+ }
+ itemp++;
+ }
+
+ ret = mergeq_merge(mqp, mqt->mq_proc, NULL, &out, &err);
+ if (ret != mqt->mq_rval) {
+ (void) fprintf(stderr, "test %s: got incorrect rval. "
+ "Expected %d, got %d\n", mqt->mq_desc, mqt->mq_rval, ret);
+ exit(1);
+ }
+
+ if (ret == MERGEQ_UERROR && err != mqt->mq_uerr) {
+ (void) fprintf(stderr, "test %s: got incorrect user error. "
+ "Expected %d, got %d\n", mqt->mq_desc, mqt->mq_uerr, err);
+ exit(1);
+ }
+
+ if (ret == 0) {
+ if (mqt->mq_strcmp == B_TRUE &&
+ strcmp(out, mqt->mq_result) != 0) {
+ (void) fprintf(stderr, "test %s: got unexpected "
+ "result: %s, expected %s\n", mqt->mq_desc, out,
+ mqt->mq_result);
+ exit(1);
+ } else if (mqt->mq_strcmp == B_FALSE && out != mqt->mq_result) {
+ (void) fprintf(stderr, "test %s: got unexpected "
+ "result: %p, expected %p\n", mqt->mq_desc, out,
+ mqt->mq_result);
+ exit(1);
+ }
+ }
+}
+
+int
+main(void)
+{
+ int ret, i, t;
+ mergeq_t *mqp;
+ int nthreads[] = { 1, 2, 4, 8, 16, 32, 64, 128, 256, -1 };
+
+ for (t = 0; nthreads[t] != -1; t++) {
+ printf("Beginning tests with %d threads\n", nthreads[t]);
+ if ((ret = mergeq_init(&mqp, nthreads[t])) != 0) {
+ fprintf(stderr, "failed to init mergeq: %s\n",
+ strerror(errno));
+ return (1);
+ }
+
+ for (i = 0; i < NMQ_TESTS; i++) {
+ mq_test_run(mqp, &mq_tests[i]);
+ }
+
+ mergeq_fini(mqp);
+ }
+
+ return (0);
+}
diff --git a/usr/src/test/util-tests/tests/workq/Makefile b/usr/src/test/util-tests/tests/workq/Makefile
new file mode 100644
index 0000000000..ab5455fd86
--- /dev/null
+++ b/usr/src/test/util-tests/tests/workq/Makefile
@@ -0,0 +1,64 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+include $(SRC)/Makefile.master
+
+ROOTOPTPKG = $(ROOT)/opt/util-tests
+TESTDIR = $(ROOTOPTPKG)/tests/mergeq
+
+PROG = wqt
+OBJS = wqt.o workq.o
+
+include $(SRC)/cmd/Makefile.cmd
+include $(SRC)/cmd/Makefile.ctf
+include $(SRC)/test/Makefile.com
+
+CMDS = $(PROG:%=$(TESTDIR)/%)
+$(CMDS) := FILEMODE = 0555
+
+CPPFLAGS += -I$(SRC)/lib/mergeq -D_REENTRANT
+LDLIBS += -lumem
+
+all: $(PROG)
+
+install: all $(CMDS)
+
+lint: lint_SRCS
+
+clobber: clean
+ -$(RM) $(PROG)
+
+clean:
+ -$(RM) $(OBJS)
+
+%.o: %.c
+ $(COMPILE.c) -o $@ -c $<
+ $(POST_PROCESS_O)
+
+%.o: $(SRC)/lib/mergeq/%.c
+ $(COMPILE.c) -o $@ -c $<
+ $(POST_PROCESS_O)
+
+$(PROG): $(OBJS)
+ $(LINK.c) $(OBJS) -o $@ $(LDLIBS)
+ $(POST_PROCESS)
+
+$(CMDS): $(TESTDIR) $(PROG)
+
+$(TESTDIR):
+ $(INS.dir)
+
+$(TESTDIR)/%: %
+ $(INS.file)
diff --git a/usr/src/test/util-tests/tests/workq/wqt.c b/usr/src/test/util-tests/tests/workq/wqt.c
new file mode 100644
index 0000000000..bda0b4a9e5
--- /dev/null
+++ b/usr/src/test/util-tests/tests/workq/wqt.c
@@ -0,0 +1,196 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * workq testing routines
+ *
+ * What we want to guarantee is that every function is executed exactly once. To
+ * that end we have the callback function basically increment a global in the
+ * test around a mutex.
+ */
+
+#include <workq.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <thread.h>
+#include <synch.h>
+
+mutex_t wqt_lock = ERRORCHECKMUTEX;
+uintptr_t wqt_count;
+
+const char *
+_umem_debug_init()
+{
+ return ("default,verbose");
+}
+
+const char *
+_umem_logging_init(void)
+{
+ return ("fail,contents");
+}
+
+void *
+workq_alloc(size_t size)
+{
+ return (malloc(size));
+}
+
+/*ARGSUSED*/
+void
+workq_free(void *buf, size_t size)
+{
+ free(buf);
+}
+
+/*ARGSUSED*/
+int
+wqt_fatal(void *item, void *arg)
+{
+ return (-1);
+}
+
+int
+wqt_add(void *item, void *arg)
+{
+ uintptr_t a = (uintptr_t)item;
+
+ mutex_enter(&wqt_lock);
+ wqt_count += a;
+ mutex_exit(&wqt_lock);
+
+ return (0);
+}
+
+typedef struct wq_test {
+ const char *wq_desc; /* test description/name */
+ workq_proc_f *wq_proc; /* processing function */
+ int wq_rval; /* workq_work return value */
+ int wq_uerr; /* user error, if any */
+ uintptr_t wq_sum; /* expected sum */
+ void **wq_args; /* argument array */
+} wq_test_t;
+
+static void *wqt_empty_args[] = { NULL };
+static void *wqt_single_args[] = { (void *)42, NULL };
+static void *wqt_double_args[] = { (void *)42, (void *)27, NULL };
+static void *wqt_wrap_args[] = {
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, NULL
+};
+static void *wqt_grow_args[] = {
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, (void *)1, (void *)1, (void *)1, (void *)1,
+ (void *)1, (void *)1, NULL
+};
+
+static wq_test_t wq_tests[] = {
+ { "empty", wqt_add, 0, 0, NULL, wqt_empty_args },
+ { "single", wqt_add, 0, 0, 42, wqt_single_args },
+ { "double", wqt_add, 0, 0, 69, wqt_double_args },
+ { "wrap", wqt_add, 0, 0, 64, wqt_wrap_args },
+ { "grow", wqt_add, 0, 0, 92, wqt_grow_args },
+ { "fatal", wqt_fatal, WORKQ_UERROR, -1, -1, wqt_double_args }
+};
+
+#define NWQ_TESTS (sizeof (wq_tests) / sizeof (wq_test_t))
+
+static void
+wq_test_run(workq_t *wqp, wq_test_t *wqt)
+{
+ int ret, err;
+ void **itemp = wqt->wq_args;
+
+ while (*itemp != NULL) {
+ if ((ret = workq_add(wqp, *itemp)) != 0) {
+ (void) fprintf(stderr, "test %s: failed to add item: "
+ "%s\n", wqt->wq_desc, strerror(errno));
+ exit(1);
+ }
+ itemp++;
+ }
+
+ wqt_count = 0;
+ ret = workq_work(wqp, wqt->wq_proc, NULL, &err);
+ if (ret != wqt->wq_rval) {
+ (void) fprintf(stderr, "test %s: got incorrect rval. "
+ "Expected %d, got %d (%d)\n", wqt->wq_desc, wqt->wq_rval,
+ ret, errno);
+ exit(1);
+ }
+
+ if (ret == WORKQ_UERROR && err != wqt->wq_uerr) {
+ (void) fprintf(stderr, "test %s: got incorrect user error. "
+ "Expected %d, got %d\n", wqt->wq_desc, wqt->wq_uerr, err);
+ exit(1);
+ }
+
+ if (ret == 0 && wqt_count != wqt->wq_sum) {
+ (void) fprintf(stderr, "test %s: got unexpected "
+ "result: %d, expected %d\n", wqt->wq_desc, wqt_count,
+ wqt->wq_sum);
+ exit(1);
+ }
+}
+
+int
+main(void)
+{
+ int ret, i, t;
+ workq_t *wqp;
+ int nthreads[] = { 1, 2, 4, 8, 16, 32, 64, 128, 256, -1 };
+
+ for (t = 0; nthreads[t] != -1; t++) {
+ printf("Beginning tests with %d threads\n", nthreads[t]);
+ if ((ret = workq_init(&wqp, nthreads[t])) != 0) {
+ fprintf(stderr, "failed to init workq: %s\n",
+ strerror(errno));
+ return (1);
+ }
+
+ for (i = 0; i < NWQ_TESTS; i++) {
+ wq_test_run(wqp, &wq_tests[i]);
+ }
+
+ workq_fini(wqp);
+ }
+
+
+ return (0);
+}
diff --git a/usr/src/tools/Makefile b/usr/src/tools/Makefile
index bd541b758c..659e1da8af 100644
--- a/usr/src/tools/Makefile
+++ b/usr/src/tools/Makefile
@@ -56,6 +56,7 @@ COMMON_SUBDIRS= \
#
UNSHIPPED_SUBDIRS = \
elfsign \
+ man \
mandoc
sparc_SUBDIRS= \
diff --git a/usr/src/tools/README.tools b/usr/src/tools/README.tools
index 6cb51f028a..323d6c679c 100644
--- a/usr/src/tools/README.tools
+++ b/usr/src/tools/README.tools
@@ -192,6 +192,9 @@ findunref
sort > ~/unref-sparc.out
$ comm -12 ~/unref-i386.out ~/unref-sparc.out > ~/unref.out
+git-active
+ helper used by webrev to generate file lists for Git workspaces.
+
hdrchk
checks headers for compliance with OS/Net standards (form, includes,
C++ guards).
diff --git a/usr/src/tools/ctf/Makefile b/usr/src/tools/ctf/Makefile
index dee410e27c..b6679b2678 100644
--- a/usr/src/tools/ctf/Makefile
+++ b/usr/src/tools/ctf/Makefile
@@ -26,7 +26,7 @@
include ../Makefile.tools
-SUBDIRS = cvt dump stabs ctfstrip
+SUBDIRS = cvt stabs ctfstrip libctf ctfdiff ctfmerge ctfdump ctfconvert
.PARALLEL: $(SUBDIRS)
@@ -38,6 +38,11 @@ lint := TARGET= lint
.KEEP_STATE:
+ctfmerge: libctf
+ctfdiff: libctf
+ctfdump: libctf
+ctfconvert: libctf
+
all clean clobber install lint: dwarf .WAIT $(SUBDIRS)
dwarf $(SUBDIRS): FRC
diff --git a/usr/src/tools/ctf/Makefile.ctf b/usr/src/tools/ctf/Makefile.ctf
index 7c5b041746..0cf32bf1bc 100644
--- a/usr/src/tools/ctf/Makefile.ctf
+++ b/usr/src/tools/ctf/Makefile.ctf
@@ -41,6 +41,7 @@ HDRDIRS= \
-I$(SRC) \
-I/usr/include \
-I$(SRC)/uts/common \
+ -I$(SRC)/common/ctf \
-I$(NATIVE_ADJUNCT)/include
CPPFLAGS += $(HDRDIRS)
diff --git a/usr/src/tools/ctf/common/ctf_headers.h b/usr/src/tools/ctf/common/ctf_headers.h
index b00b8fd9a6..a63690be77 100644
--- a/usr/src/tools/ctf/common/ctf_headers.h
+++ b/usr/src/tools/ctf/common/ctf_headers.h
@@ -27,8 +27,6 @@
#ifndef _CTF_HEADERS_H
#define _CTF_HEADERS_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Because the ON tools are executed on the system where they are built,
* the tools need to include the headers installed on the build system,
@@ -37,6 +35,7 @@
* as part of Solaris. These include the following:
*
* $(SRC)/lib/libctf/common/libctf.h
+ * $(SRC)/lib/libctf/common/libctf_impl.h
* $(SRC)/uts/common/sys/ctf_api.h
* $(SRC)/uts/common/sys/ctf.h
*
@@ -67,6 +66,8 @@
#include <uts/common/sys/ctf.h>
#include <uts/common/sys/ctf_api.h>
+#include <common/ctf/ctf_impl.h>
#include <lib/libctf/common/libctf.h>
+#include <lib/libctf/common/libctf_impl.h>
#endif /* _CTF_HEADERS_H */
diff --git a/usr/src/tools/ctf/common/utils.h b/usr/src/tools/ctf/common/utils.h
index 9b07361a53..4ae2dd0917 100644
--- a/usr/src/tools/ctf/common/utils.h
+++ b/usr/src/tools/ctf/common/utils.h
@@ -27,9 +27,8 @@
#ifndef _UTILS_H
#define _UTILS_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <stdarg.h>
+#include <ctf_headers.h>
#ifdef __cplusplus
extern "C" {
diff --git a/usr/src/tools/ctf/dump/Makefile b/usr/src/tools/ctf/ctfconvert/Makefile
index 16d8280cdd..07fadc5f8f 100644
--- a/usr/src/tools/ctf/dump/Makefile
+++ b/usr/src/tools/ctf/ctfconvert/Makefile
@@ -23,7 +23,6 @@
# Copyright (c) 2001 by Sun Microsystems, Inc.
# All rights reserved.
#
-#ident "%Z%%M% %I% %E% SMI"
include ../../Makefile.tools
diff --git a/usr/src/tools/ctf/ctfconvert/Makefile.com b/usr/src/tools/ctf/ctfconvert/Makefile.com
new file mode 100644
index 0000000000..d1cea479fb
--- /dev/null
+++ b/usr/src/tools/ctf/ctfconvert/Makefile.com
@@ -0,0 +1,44 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+PROG = ctfconvert-altexec
+SRCS = ctfconvert.c
+
+include ../../Makefile.ctf
+
+CFLAGS += $(CCVERBOSE)
+LDLIBS += -lctf -lelf
+
+LDFLAGS = \
+ -L$(ROOTONBLDLIBMACH) \
+ '-R$$ORIGIN/../../lib/$(MACH)' \
+
+CPPFLAGS += -include ../../common/ctf_headers.h
+
+OBJS = $(SRCS:%.c=%.o)
+
+all: $(PROG)
+
+$(PROG): $(OBJS)
+ $(LINK.c) $(OBJS) -o $@ $(LDLIBS)
+ $(POST_PROCESS)
+
+%.o: $(SRC)/cmd/ctfconvert/%.c
+ $(COMPILE.c) $<
+
+$(ROOTONBLDMACHPROG): $(PROG)
+
+install: $(ROOTONBLDMACHPROG)
+
+clean:
+ $(RM) $(OBJS) $(LINTFILES)
+
+include $(SRC)/tools/Makefile.targ
diff --git a/usr/src/tools/ctf/dump/i386/Makefile b/usr/src/tools/ctf/ctfconvert/i386/Makefile
index 94e5883ae2..cd5462bee3 100644
--- a/usr/src/tools/ctf/dump/i386/Makefile
+++ b/usr/src/tools/ctf/ctfconvert/i386/Makefile
@@ -23,6 +23,5 @@
# Copyright (c) 2001 by Sun Microsystems, Inc.
# All rights reserved.
#
-#ident "%Z%%M% %I% %E% SMI"
include ../Makefile.com
diff --git a/usr/src/tools/ctf/dump/sparc/Makefile b/usr/src/tools/ctf/ctfconvert/sparc/Makefile
index 94e5883ae2..cd5462bee3 100644
--- a/usr/src/tools/ctf/dump/sparc/Makefile
+++ b/usr/src/tools/ctf/ctfconvert/sparc/Makefile
@@ -23,6 +23,5 @@
# Copyright (c) 2001 by Sun Microsystems, Inc.
# All rights reserved.
#
-#ident "%Z%%M% %I% %E% SMI"
include ../Makefile.com
diff --git a/usr/src/tools/ctf/ctfdiff/Makefile b/usr/src/tools/ctf/ctfdiff/Makefile
new file mode 100644
index 0000000000..07fadc5f8f
--- /dev/null
+++ b/usr/src/tools/ctf/ctfdiff/Makefile
@@ -0,0 +1,44 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+
+include ../../Makefile.tools
+
+SUBDIRS = $(MACH)
+
+all := TARGET= all
+install := TARGET= install
+clean := TARGET= clean
+clobber := TARGET= clobber
+lint := TARGET= lint
+
+.KEEP_STATE:
+
+install all clean clobber lint: $(SUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/tools/ctf/ctfdiff/Makefile.com b/usr/src/tools/ctf/ctfdiff/Makefile.com
new file mode 100644
index 0000000000..3c5e19fb6e
--- /dev/null
+++ b/usr/src/tools/ctf/ctfdiff/Makefile.com
@@ -0,0 +1,44 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+PROG = ctfdiff
+SRCS = ctfdiff.c
+
+include ../../Makefile.ctf
+
+CFLAGS += $(CCVERBOSE)
+LDLIBS += -lctf
+
+LDFLAGS = \
+ -L$(ROOTONBLDLIBMACH) \
+ '-R$$ORIGIN/../../lib/$(MACH)' \
+
+CPPFLAGS += -include ../../common/ctf_headers.h
+
+OBJS = $(SRCS:%.c=%.o)
+
+all: $(PROG)
+
+$(PROG): $(OBJS)
+ $(LINK.c) $(OBJS) -o $@ $(LDLIBS)
+ $(POST_PROCESS)
+
+%.o: $(SRC)/cmd/ctfdiff/%.c
+ $(COMPILE.c) $<
+
+$(ROOTONBLDMACHPROG): $(PROG)
+
+install: $(ROOTONBLDMACHPROG)
+
+clean:
+ $(RM) $(OBJS) $(LINTFILES)
+
+include $(SRC)/tools/Makefile.targ
diff --git a/usr/src/tools/ctf/ctfdiff/i386/Makefile b/usr/src/tools/ctf/ctfdiff/i386/Makefile
new file mode 100644
index 0000000000..92c756cb42
--- /dev/null
+++ b/usr/src/tools/ctf/ctfdiff/i386/Makefile
@@ -0,0 +1,16 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2015, Joyent, Inc.
+#
+
+include ../Makefile.com
diff --git a/usr/src/tools/ctf/ctfdiff/sparc/Makefile b/usr/src/tools/ctf/ctfdiff/sparc/Makefile
new file mode 100644
index 0000000000..92c756cb42
--- /dev/null
+++ b/usr/src/tools/ctf/ctfdiff/sparc/Makefile
@@ -0,0 +1,16 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2015, Joyent, Inc.
+#
+
+include ../Makefile.com
diff --git a/usr/src/tools/ctf/ctfdump/Makefile b/usr/src/tools/ctf/ctfdump/Makefile
new file mode 100644
index 0000000000..20d304216a
--- /dev/null
+++ b/usr/src/tools/ctf/ctfdump/Makefile
@@ -0,0 +1,33 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2015, Joyent, Inc.
+#
+
+include ../../Makefile.tools
+
+SUBDIRS = $(MACH)
+
+all := TARGET= all
+install := TARGET= install
+clean := TARGET= clean
+clobber := TARGET= clobber
+lint := TARGET= lint
+
+.KEEP_STATE:
+
+install all clean clobber lint: $(SUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/tools/ctf/ctfdump/Makefile.com b/usr/src/tools/ctf/ctfdump/Makefile.com
new file mode 100644
index 0000000000..3e1414ea59
--- /dev/null
+++ b/usr/src/tools/ctf/ctfdump/Makefile.com
@@ -0,0 +1,44 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+PROG = ctfdump
+SRCS = ctfdump.c
+
+include ../../Makefile.ctf
+
+CFLAGS += $(CCVERBOSE)
+LDLIBS += -lctf
+
+LDFLAGS = \
+ -L$(ROOTONBLDLIBMACH) \
+ '-R$$ORIGIN/../../lib/$(MACH)' \
+
+CPPFLAGS += -include ../../common/ctf_headers.h
+
+OBJS = $(SRCS:%.c=%.o)
+
+all: $(PROG)
+
+$(PROG): $(OBJS)
+ $(LINK.c) $(OBJS) -o $@ $(LDLIBS)
+ $(POST_PROCESS)
+
+%.o: $(SRC)/cmd/ctfdump/%.c
+ $(COMPILE.c) $<
+
+$(ROOTONBLDMACHPROG): $(PROG)
+
+install: $(ROOTONBLDMACHPROG)
+
+clean:
+ $(RM) $(OBJS) $(LINTFILES)
+
+include $(SRC)/tools/Makefile.targ
diff --git a/usr/src/tools/ctf/ctfdump/i386/Makefile b/usr/src/tools/ctf/ctfdump/i386/Makefile
new file mode 100644
index 0000000000..92c756cb42
--- /dev/null
+++ b/usr/src/tools/ctf/ctfdump/i386/Makefile
@@ -0,0 +1,16 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2015, Joyent, Inc.
+#
+
+include ../Makefile.com
diff --git a/usr/src/tools/ctf/ctfdump/sparc/Makefile b/usr/src/tools/ctf/ctfdump/sparc/Makefile
new file mode 100644
index 0000000000..92c756cb42
--- /dev/null
+++ b/usr/src/tools/ctf/ctfdump/sparc/Makefile
@@ -0,0 +1,16 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2015, Joyent, Inc.
+#
+
+include ../Makefile.com
diff --git a/usr/src/tools/ctf/ctfmerge/Makefile b/usr/src/tools/ctf/ctfmerge/Makefile
new file mode 100644
index 0000000000..07fadc5f8f
--- /dev/null
+++ b/usr/src/tools/ctf/ctfmerge/Makefile
@@ -0,0 +1,44 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+
+include ../../Makefile.tools
+
+SUBDIRS = $(MACH)
+
+all := TARGET= all
+install := TARGET= install
+clean := TARGET= clean
+clobber := TARGET= clobber
+lint := TARGET= lint
+
+.KEEP_STATE:
+
+install all clean clobber lint: $(SUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/tools/ctf/ctfmerge/Makefile.com b/usr/src/tools/ctf/ctfmerge/Makefile.com
new file mode 100644
index 0000000000..3f58feed9c
--- /dev/null
+++ b/usr/src/tools/ctf/ctfmerge/Makefile.com
@@ -0,0 +1,46 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+PROG = ctfmerge-altexec
+SRCS = ctfmerge.c
+
+include ../../Makefile.ctf
+
+CFLAGS += $(CCVERBOSE)
+LDLIBS += -lctf -lelf
+
+LDFLAGS += \
+ -L$(ROOTONBLDLIBMACH) \
+ '-R$$ORIGIN/../../lib/$(MACH)' \
+
+CPPFLAGS += -include ../../common/ctf_headers.h
+CERRWARN += -_gcc=-Wno-unused-variable
+CERRWARN += -_gcc=-Wno-uninitialized
+
+OBJS = $(SRCS:%.c=%.o)
+
+all: $(PROG)
+
+$(PROG): $(OBJS)
+ $(LINK.c) $(OBJS) -o $@ $(LDLIBS)
+ $(POST_PROCESS)
+
+%.o: $(SRC)/cmd/ctfmerge/%.c
+ $(COMPILE.c) $<
+
+$(ROOTONBLDMACHPROG): $(PROG)
+
+install: $(ROOTONBLDMACHPROG)
+
+clean:
+ $(RM) $(OBJS) $(LINTFILES)
+
+include $(SRC)/tools/Makefile.targ
diff --git a/usr/src/tools/ctf/ctfmerge/i386/Makefile b/usr/src/tools/ctf/ctfmerge/i386/Makefile
new file mode 100644
index 0000000000..cd5462bee3
--- /dev/null
+++ b/usr/src/tools/ctf/ctfmerge/i386/Makefile
@@ -0,0 +1,27 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+
+include ../Makefile.com
diff --git a/usr/src/tools/ctf/ctfmerge/sparc/Makefile b/usr/src/tools/ctf/ctfmerge/sparc/Makefile
new file mode 100644
index 0000000000..cd5462bee3
--- /dev/null
+++ b/usr/src/tools/ctf/ctfmerge/sparc/Makefile
@@ -0,0 +1,27 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2001 by Sun Microsystems, Inc.
+# All rights reserved.
+#
+
+include ../Makefile.com
diff --git a/usr/src/tools/ctf/cvt/Makefile.com b/usr/src/tools/ctf/cvt/Makefile.com
index 5385b3769f..0eab68b9bd 100644
--- a/usr/src/tools/ctf/cvt/Makefile.com
+++ b/usr/src/tools/ctf/cvt/Makefile.com
@@ -32,6 +32,7 @@ PROG=ctfconvert ctfmerge
GENSRCS= \
alist.c \
+ altexec.c \
barrier.c \
ctf.c \
fifo.c \
@@ -71,7 +72,7 @@ DWARFLDFLAGS = \
-L$(ROOTONBLDLIBMACH) \
'-R$$ORIGIN/../../lib/$(MACH)' \
-ldwarf
-DWARFCPPFLAGS = -I../../dwarf/common
+DWARFCPPFLAGS = -I$(SRC)/lib/libdwarf/common
LDFLAGS += -L$(NATIVE_ADJUNCT)/lib
LDLIBS += -lz -lelf
diff --git a/usr/src/tools/ctf/cvt/altexec.c b/usr/src/tools/ctf/cvt/altexec.c
new file mode 100644
index 0000000000..c986c0731a
--- /dev/null
+++ b/usr/src/tools/ctf/cvt/altexec.c
@@ -0,0 +1,45 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2015, Joyent, Inc.
+ */
+
+/*
+ * Alternate execution engine for CTF tools
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "ctftools.h"
+
+void
+ctf_altexec(const char *env, int argc, char **argv)
+{
+ const char *alt;
+ char *altexec;
+
+ alt = getenv(env);
+ if (alt == NULL || *alt == '\0')
+ return;
+
+ altexec = strdup(alt);
+ if (altexec == NULL)
+ terminate("failed to allocate memory for altexec\n");
+
+ if (unsetenv(env) != 0)
+ aborterr("failed to remove %s from environment", env);
+
+ (void) execv(altexec, argv);
+ terminate("failed to altexec %s", altexec);
+}
diff --git a/usr/src/tools/ctf/cvt/ctfconvert.c b/usr/src/tools/ctf/cvt/ctfconvert.c
index 36496c5e9d..af005e1934 100644
--- a/usr/src/tools/ctf/cvt/ctfconvert.c
+++ b/usr/src/tools/ctf/cvt/ctfconvert.c
@@ -146,6 +146,7 @@ main(int argc, char **argv)
{
tdata_t *filetd, *mstrtd;
char *label = NULL;
+ char *altexec;
int verbose = 0;
int ignore_non_c = 0;
int c;
@@ -156,12 +157,21 @@ main(int argc, char **argv)
progname = basename(argv[0]);
+ ctf_altexec("CTFCONVERT_ALTEXEC", argc, argv);
+
if (getenv("CTFCONVERT_DEBUG_LEVEL"))
debug_level = atoi(getenv("CTFCONVERT_DEBUG_LEVEL"));
if (getenv("CTFCONVERT_DEBUG_PARSE"))
debug_parse = atoi(getenv("CTFCONVERT_DEBUG_PARSE"));
- while ((c = getopt(argc, argv, ":l:L:o:ivs")) != EOF) {
+ if ((altexec = getenv("CTFCONVERT_ALTEXEC")) != NULL) {
+ (void) unsetenv("CTFCONVERT_ALTEXEC");
+ (void) execv(altexec, argv);
+ (void) fprintf(stderr, "ctfconvert altexec failed to "
+ "run %s: %s\n", altexec, strerror(errno));
+ }
+
+ while ((c = getopt(argc, argv, ":l:L:o:givs")) != EOF) {
switch (c) {
case 'l':
label = optarg;
diff --git a/usr/src/tools/ctf/cvt/ctfmerge.c b/usr/src/tools/ctf/cvt/ctfmerge.c
index 5706cceb17..b8053a7a23 100644
--- a/usr/src/tools/ctf/cvt/ctfmerge.c
+++ b/usr/src/tools/ctf/cvt/ctfmerge.c
@@ -745,6 +745,8 @@ main(int argc, char **argv)
progname = basename(argv[0]);
+ ctf_altexec("CTFMERGE_ALTEXEC", argc, argv);
+
if (getenv("CTFMERGE_DEBUG_LEVEL"))
debug_level = atoi(getenv("CTFMERGE_DEBUG_LEVEL"));
diff --git a/usr/src/tools/ctf/cvt/ctftools.h b/usr/src/tools/ctf/cvt/ctftools.h
index 708dfd82ef..755af5176d 100644
--- a/usr/src/tools/ctf/cvt/ctftools.h
+++ b/usr/src/tools/ctf/cvt/ctftools.h
@@ -43,6 +43,17 @@
extern "C" {
#endif
+/*
+ * XXX: This is hack to deal with GCC 4.x removing __builtin_stdarg_start
+ *
+ * We need to build on systems that don't have the fixed va_impl.h on the
+ * system, to achieve that, we stub it out here and in all similar places to
+ * give us a leg up.
+ */
+#if __GNUC__ >= 4
+#define __builtin_stdarg_start(list, name) __builtin_va_start(list, name)
+#endif
+
#include "list.h"
#include "hash.h"
@@ -441,6 +452,9 @@ void warning(char *, ...);
void vadebug(int, char *, va_list);
void debug(int, char *, ...);
+/* altexec.c */
+void ctf_altexec(const char *, int argc, char **);
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/tools/ctf/dump/dump.c b/usr/src/tools/ctf/dump/dump.c
deleted file mode 100644
index 5579bae596..0000000000
--- a/usr/src/tools/ctf/dump/dump.c
+++ /dev/null
@@ -1,1028 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-#pragma ident "%Z%%M% %I% %E% SMI"
-
-#include <sys/types.h>
-#include <sys/sysmacros.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-
-#include <strings.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <fcntl.h>
-#include <gelf.h>
-#include <zlib.h>
-
-#include "ctf_headers.h"
-#include "utils.h"
-#include "symbol.h"
-
-#define WARN(x) { warn(x); return (E_ERROR); }
-
-/*
- * Flags that indicate what data is to be displayed. An explicit `all' value is
- * provided to allow the code to distinguish between a request for everything
- * (currently requested by invoking ctfdump without flags) and individual
- * requests for all of the types of data (an invocation with all flags). In the
- * former case, we want to be able to implicitly adjust the definition of `all'
- * based on the CTF version of the file being dumped. For example, if a v2 file
- * is being dumped, `all' includes F_LABEL - a request to dump the label
- * section. If a v1 file is being dumped, `all' does not include F_LABEL,
- * because v1 CTF doesn't support labels. We need to be able to distinguish
- * between `ctfdump foo', which has an implicit request for labels if `foo'
- * supports them, and `ctfdump -l foo', which has an explicity request. In the
- * latter case, we exit with an error if `foo' is a v1 CTF file.
- */
-static enum {
- F_DATA = 0x01, /* show data object section */
- F_FUNC = 0x02, /* show function section */
- F_HDR = 0x04, /* show header */
- F_STR = 0x08, /* show string table */
- F_TYPES = 0x10, /* show type section */
- F_STATS = 0x20, /* show statistics */
- F_LABEL = 0x40, /* show label section */
- F_ALL = 0x80, /* explicit request for `all' */
- F_ALLMSK = 0xff /* show all sections and statistics */
-} flags = 0;
-
-static struct {
- ulong_t s_ndata; /* total number of data objects */
- ulong_t s_nfunc; /* total number of functions */
- ulong_t s_nargs; /* total number of function arguments */
- ulong_t s_argmax; /* longest argument list */
- ulong_t s_ntypes; /* total number of types */
- ulong_t s_types[16]; /* number of types by kind */
- ulong_t s_nsmem; /* total number of struct members */
- ulong_t s_nsbytes; /* total size of all structs */
- ulong_t s_smmax; /* largest struct in terms of members */
- ulong_t s_sbmax; /* largest struct in terms of bytes */
- ulong_t s_numem; /* total number of union members */
- ulong_t s_nubytes; /* total size of all unions */
- ulong_t s_ummax; /* largest union in terms of members */
- ulong_t s_ubmax; /* largest union in terms of bytes */
- ulong_t s_nemem; /* total number of enum members */
- ulong_t s_emmax; /* largest enum in terms of members */
- ulong_t s_nstr; /* total number of strings */
- size_t s_strlen; /* total length of all strings */
- size_t s_strmax; /* longest string length */
-} stats;
-
-typedef struct ctf_data {
- caddr_t cd_ctfdata; /* Pointer to the CTF data */
- size_t cd_ctflen; /* Length of CTF data */
-
- /*
- * cd_symdata will be non-NULL if the CTF data is being retrieved from
- * an ELF file with a symbol table. cd_strdata and cd_nsyms should be
- * used only if cd_symdata is non-NULL.
- */
- Elf_Data *cd_symdata; /* Symbol table */
- Elf_Data *cd_strdata; /* Symbol table strings */
- int cd_nsyms; /* Number of symbol table entries */
-} ctf_data_t;
-
-static const char *
-ref_to_str(uint_t name, const ctf_header_t *hp, const ctf_data_t *cd)
-{
- size_t offset = CTF_NAME_OFFSET(name);
- const char *s = cd->cd_ctfdata + hp->cth_stroff + offset;
-
- if (CTF_NAME_STID(name) != CTF_STRTAB_0)
- return ("<< ??? - name in external strtab >>");
-
- if (offset >= hp->cth_strlen)
- return ("<< ??? - name exceeds strlab len >>");
-
- if (hp->cth_stroff + offset >= cd->cd_ctflen)
- return ("<< ??? - file truncated >>");
-
- if (s[0] == '\0')
- return ("(anon)");
-
- return (s);
-}
-
-static const char *
-int_encoding_to_str(uint_t encoding)
-{
- static char buf[32];
-
- if (encoding == 0 || (encoding & ~(CTF_INT_SIGNED | CTF_INT_CHAR |
- CTF_INT_BOOL | CTF_INT_VARARGS)) != 0)
- (void) snprintf(buf, sizeof (buf), " 0x%x", encoding);
- else {
- buf[0] = '\0';
- if (encoding & CTF_INT_SIGNED)
- (void) strcat(buf, " SIGNED");
- if (encoding & CTF_INT_CHAR)
- (void) strcat(buf, " CHAR");
- if (encoding & CTF_INT_BOOL)
- (void) strcat(buf, " BOOL");
- if (encoding & CTF_INT_VARARGS)
- (void) strcat(buf, " VARARGS");
- }
-
- return (buf + 1);
-}
-
-static const char *
-fp_encoding_to_str(uint_t encoding)
-{
- static const char *const encs[] = {
- NULL, "SINGLE", "DOUBLE", "COMPLEX", "DCOMPLEX", "LDCOMPLEX",
- "LDOUBLE", "INTERVAL", "DINTERVAL", "LDINTERVAL", "IMAGINARY",
- "DIMAGINARY", "LDIMAGINARY"
- };
-
- static char buf[16];
-
- if (encoding < 1 || encoding >= (sizeof (encs) / sizeof (char *))) {
- (void) snprintf(buf, sizeof (buf), "%u", encoding);
- return (buf);
- }
-
- return (encs[encoding]);
-}
-
-static void
-print_line(const char *s)
-{
- static const char line[] = "----------------------------------------"
- "----------------------------------------";
- (void) printf("\n%s%.*s\n\n", s, (int)(78 - strlen(s)), line);
-}
-
-static int
-print_header(const ctf_header_t *hp, const ctf_data_t *cd)
-{
- print_line("- CTF Header ");
-
- (void) printf(" cth_magic = 0x%04x\n", hp->cth_magic);
- (void) printf(" cth_version = %u\n", hp->cth_version);
- (void) printf(" cth_flags = 0x%02x\n", hp->cth_flags);
- (void) printf(" cth_parlabel = %s\n",
- ref_to_str(hp->cth_parlabel, hp, cd));
- (void) printf(" cth_parname = %s\n",
- ref_to_str(hp->cth_parname, hp, cd));
- (void) printf(" cth_lbloff = %u\n", hp->cth_lbloff);
- (void) printf(" cth_objtoff = %u\n", hp->cth_objtoff);
- (void) printf(" cth_funcoff = %u\n", hp->cth_funcoff);
- (void) printf(" cth_typeoff = %u\n", hp->cth_typeoff);
- (void) printf(" cth_stroff = %u\n", hp->cth_stroff);
- (void) printf(" cth_strlen = %u\n", hp->cth_strlen);
-
- return (E_SUCCESS);
-}
-
-static int
-print_labeltable(const ctf_header_t *hp, const ctf_data_t *cd)
-{
- /* LINTED - pointer alignment */
- const ctf_lblent_t *ctl = (ctf_lblent_t *)(cd->cd_ctfdata +
- hp->cth_lbloff);
- ulong_t i, n = (hp->cth_objtoff - hp->cth_lbloff) / sizeof (*ctl);
-
- print_line("- Label Table ");
-
- if (hp->cth_lbloff & 3)
- WARN("cth_lbloff is not aligned properly\n");
- if (hp->cth_lbloff >= cd->cd_ctflen)
- WARN("file is truncated or cth_lbloff is corrupt\n");
- if (hp->cth_objtoff >= cd->cd_ctflen)
- WARN("file is truncated or cth_objtoff is corrupt\n");
- if (hp->cth_lbloff > hp->cth_objtoff)
- WARN("file is corrupt -- cth_lbloff > cth_objtoff\n");
-
- for (i = 0; i < n; i++, ctl++) {
- (void) printf(" %5u %s\n", ctl->ctl_typeidx,
- ref_to_str(ctl->ctl_label, hp, cd));
- }
-
- return (E_SUCCESS);
-}
-
-/*
- * Given the current symbol index (-1 to start at the beginning of the symbol
- * table) and the type of symbol to match, this function returns the index of
- * the next matching symbol (if any), and places the name of that symbol in
- * *namep. If no symbol is found, -1 is returned.
- */
-static int
-next_sym(const ctf_data_t *cd, const int symidx, const uchar_t matchtype,
- char **namep)
-{
- int i;
-
- for (i = symidx + 1; i < cd->cd_nsyms; i++) {
- GElf_Sym sym;
- char *name;
- int type;
-
- if (gelf_getsym(cd->cd_symdata, i, &sym) == 0)
- return (-1);
-
- name = (char *)cd->cd_strdata->d_buf + sym.st_name;
- type = GELF_ST_TYPE(sym.st_info);
-
- /*
- * Skip various types of symbol table entries.
- */
- if (type != matchtype || ignore_symbol(&sym, name))
- continue;
-
- /* Found one */
- *namep = name;
- return (i);
- }
-
- return (-1);
-}
-
-static int
-read_data(const ctf_header_t *hp, const ctf_data_t *cd)
-{
- /* LINTED - pointer alignment */
- const ushort_t *idp = (ushort_t *)(cd->cd_ctfdata + hp->cth_objtoff);
- ulong_t n = (hp->cth_funcoff - hp->cth_objtoff) / sizeof (ushort_t);
-
- if (flags != F_STATS)
- print_line("- Data Objects ");
-
- if (hp->cth_objtoff & 1)
- WARN("cth_objtoff is not aligned properly\n");
- if (hp->cth_objtoff >= cd->cd_ctflen)
- WARN("file is truncated or cth_objtoff is corrupt\n");
- if (hp->cth_funcoff >= cd->cd_ctflen)
- WARN("file is truncated or cth_funcoff is corrupt\n");
- if (hp->cth_objtoff > hp->cth_funcoff)
- WARN("file is corrupt -- cth_objtoff > cth_funcoff\n");
-
- if (flags != F_STATS) {
- int symidx, len, i;
- char *name = NULL;
-
- for (symidx = -1, i = 0; i < n; i++) {
- int nextsym;
-
- if (cd->cd_symdata == NULL || (nextsym = next_sym(cd,
- symidx, STT_OBJECT, &name)) < 0)
- name = NULL;
- else
- symidx = nextsym;
-
- len = printf(" [%u] %u", i, *idp++);
- if (name != NULL)
- (void) printf("%*s%s (%u)", (15 - len), "",
- name, symidx);
- (void) putchar('\n');
- }
- }
-
- stats.s_ndata = n;
- return (E_SUCCESS);
-}
-
-static int
-read_funcs(const ctf_header_t *hp, const ctf_data_t *cd)
-{
- /* LINTED - pointer alignment */
- const ushort_t *fp = (ushort_t *)(cd->cd_ctfdata + hp->cth_funcoff);
-
- /* LINTED - pointer alignment */
- const ushort_t *end = (ushort_t *)(cd->cd_ctfdata + hp->cth_typeoff);
-
- ulong_t id;
- int symidx;
-
- if (flags != F_STATS)
- print_line("- Functions ");
-
- if (hp->cth_funcoff & 1)
- WARN("cth_funcoff is not aligned properly\n");
- if (hp->cth_funcoff >= cd->cd_ctflen)
- WARN("file is truncated or cth_funcoff is corrupt\n");
- if (hp->cth_typeoff >= cd->cd_ctflen)
- WARN("file is truncated or cth_typeoff is corrupt\n");
- if (hp->cth_funcoff > hp->cth_typeoff)
- WARN("file is corrupt -- cth_funcoff > cth_typeoff\n");
-
- for (symidx = -1, id = 0; fp < end; id++) {
- ushort_t info = *fp++;
- ushort_t kind = CTF_INFO_KIND(info);
- ushort_t n = CTF_INFO_VLEN(info);
- ushort_t i;
- int nextsym;
- char *name;
-
- if (cd->cd_symdata == NULL || (nextsym = next_sym(cd, symidx,
- STT_FUNC, &name)) < 0)
- name = NULL;
- else
- symidx = nextsym;
-
- if (kind == CTF_K_UNKNOWN && n == 0)
- continue; /* skip padding */
-
- if (kind != CTF_K_FUNCTION) {
- (void) printf(" [%lu] unexpected kind -- %u\n",
- id, kind);
- return (E_ERROR);
- }
-
- if (fp + n > end) {
- (void) printf(" [%lu] vlen %u extends past section "
- "boundary\n", id, n);
- return (E_ERROR);
- }
-
- if (flags != F_STATS) {
- (void) printf(" [%lu] FUNC ", id);
- if (name != NULL)
- (void) printf("(%s) ", name);
- (void) printf("returns: %u args: (", *fp++);
-
- if (n != 0) {
- (void) printf("%u", *fp++);
- for (i = 1; i < n; i++)
- (void) printf(", %u", *fp++);
- }
-
- (void) printf(")\n");
- } else
- fp += n + 1; /* skip to next function definition */
-
- stats.s_nfunc++;
- stats.s_nargs += n;
- stats.s_argmax = MAX(stats.s_argmax, n);
- }
-
- return (E_SUCCESS);
-}
-
-static int
-read_types(const ctf_header_t *hp, const ctf_data_t *cd)
-{
- /* LINTED - pointer alignment */
- const ctf_type_t *tp = (ctf_type_t *)(cd->cd_ctfdata + hp->cth_typeoff);
-
- /* LINTED - pointer alignment */
- const ctf_type_t *end = (ctf_type_t *)(cd->cd_ctfdata + hp->cth_stroff);
-
- ulong_t id;
-
- if (flags != F_STATS)
- print_line("- Types ");
-
- if (hp->cth_typeoff & 3)
- WARN("cth_typeoff is not aligned properly\n");
- if (hp->cth_typeoff >= cd->cd_ctflen)
- WARN("file is truncated or cth_typeoff is corrupt\n");
- if (hp->cth_stroff >= cd->cd_ctflen)
- WARN("file is truncated or cth_stroff is corrupt\n");
- if (hp->cth_typeoff > hp->cth_stroff)
- WARN("file is corrupt -- cth_typeoff > cth_stroff\n");
-
- id = 1;
- if (hp->cth_parlabel || hp->cth_parname)
- id += 1 << CTF_PARENT_SHIFT;
-
- for (/* */; tp < end; id++) {
- ulong_t i, n = CTF_INFO_VLEN(tp->ctt_info);
- size_t size, increment, vlen = 0;
- int kind = CTF_INFO_KIND(tp->ctt_info);
-
- union {
- const void *ptr;
- const ctf_array_t *ap;
- const ctf_member_t *mp;
- const ctf_lmember_t *lmp;
- const ctf_enum_t *ep;
- const ushort_t *argp;
- } u;
-
- if (flags != F_STATS) {
- (void) printf(" %c%lu%c ",
- "[<"[CTF_INFO_ISROOT(tp->ctt_info)], id,
- "]>"[CTF_INFO_ISROOT(tp->ctt_info)]);
- }
-
- if (tp->ctt_size == CTF_LSIZE_SENT) {
- increment = sizeof (ctf_type_t);
- size = (size_t)CTF_TYPE_LSIZE(tp);
- } else {
- increment = sizeof (ctf_stype_t);
- size = tp->ctt_size;
- }
- u.ptr = (caddr_t)tp + increment;
-
- switch (kind) {
- case CTF_K_INTEGER:
- if (flags != F_STATS) {
- uint_t encoding = *((const uint_t *)u.ptr);
-
- (void) printf("INTEGER %s encoding=%s offset=%u"
- " bits=%u", ref_to_str(tp->ctt_name, hp,
- cd), int_encoding_to_str(
- CTF_INT_ENCODING(encoding)),
- CTF_INT_OFFSET(encoding),
- CTF_INT_BITS(encoding));
- }
- vlen = sizeof (uint_t);
- break;
-
- case CTF_K_FLOAT:
- if (flags != F_STATS) {
- uint_t encoding = *((const uint_t *)u.ptr);
-
- (void) printf("FLOAT %s encoding=%s offset=%u "
- "bits=%u", ref_to_str(tp->ctt_name, hp,
- cd), fp_encoding_to_str(
- CTF_FP_ENCODING(encoding)),
- CTF_FP_OFFSET(encoding),
- CTF_FP_BITS(encoding));
- }
- vlen = sizeof (uint_t);
- break;
-
- case CTF_K_POINTER:
- if (flags != F_STATS) {
- (void) printf("POINTER %s refers to %u",
- ref_to_str(tp->ctt_name, hp, cd),
- tp->ctt_type);
- }
- break;
-
- case CTF_K_ARRAY:
- if (flags != F_STATS) {
- (void) printf("ARRAY %s content: %u index: %u "
- "nelems: %u\n", ref_to_str(tp->ctt_name,
- hp, cd), u.ap->cta_contents,
- u.ap->cta_index, u.ap->cta_nelems);
- }
- vlen = sizeof (ctf_array_t);
- break;
-
- case CTF_K_FUNCTION:
- if (flags != F_STATS) {
- (void) printf("FUNCTION %s returns: %u args: (",
- ref_to_str(tp->ctt_name, hp, cd),
- tp->ctt_type);
-
- if (n != 0) {
- (void) printf("%u", *u.argp++);
- for (i = 1; i < n; i++, u.argp++)
- (void) printf(", %u", *u.argp);
- }
-
- (void) printf(")");
- }
-
- vlen = sizeof (ushort_t) * (n + (n & 1));
- break;
-
- case CTF_K_STRUCT:
- case CTF_K_UNION:
- if (kind == CTF_K_STRUCT) {
- stats.s_nsmem += n;
- stats.s_smmax = MAX(stats.s_smmax, n);
- stats.s_nsbytes += size;
- stats.s_sbmax = MAX(stats.s_sbmax, size);
-
- if (flags != F_STATS)
- (void) printf("STRUCT");
- } else {
- stats.s_numem += n;
- stats.s_ummax = MAX(stats.s_ummax, n);
- stats.s_nubytes += size;
- stats.s_ubmax = MAX(stats.s_ubmax, size);
-
- if (flags != F_STATS)
- (void) printf("UNION");
- }
-
- if (flags != F_STATS) {
- (void) printf(" %s (%d bytes)\n",
- ref_to_str(tp->ctt_name, hp, cd), size);
-
- if (size >= CTF_LSTRUCT_THRESH) {
- for (i = 0; i < n; i++, u.lmp++) {
- (void) printf(
- "\t%s type=%u off=%llu\n",
- ref_to_str(u.lmp->ctlm_name,
- hp, cd), u.lmp->ctlm_type,
- CTF_LMEM_OFFSET(u.lmp));
- }
- } else {
- for (i = 0; i < n; i++, u.mp++) {
- (void) printf(
- "\t%s type=%u off=%u\n",
- ref_to_str(u.mp->ctm_name,
- hp, cd), u.mp->ctm_type,
- u.mp->ctm_offset);
- }
- }
- }
-
- vlen = n * (size >= CTF_LSTRUCT_THRESH ?
- sizeof (ctf_lmember_t) : sizeof (ctf_member_t));
- break;
-
- case CTF_K_ENUM:
- if (flags != F_STATS) {
- (void) printf("ENUM %s\n",
- ref_to_str(tp->ctt_name, hp, cd));
-
- for (i = 0; i < n; i++, u.ep++) {
- (void) printf("\t%s = %d\n",
- ref_to_str(u.ep->cte_name, hp, cd),
- u.ep->cte_value);
- }
- }
-
- stats.s_nemem += n;
- stats.s_emmax = MAX(stats.s_emmax, n);
-
- vlen = sizeof (ctf_enum_t) * n;
- break;
-
- case CTF_K_FORWARD:
- if (flags != F_STATS) {
- (void) printf("FORWARD %s",
- ref_to_str(tp->ctt_name, hp, cd));
- }
- break;
-
- case CTF_K_TYPEDEF:
- if (flags != F_STATS) {
- (void) printf("TYPEDEF %s refers to %u",
- ref_to_str(tp->ctt_name, hp, cd),
- tp->ctt_type);
- }
- break;
-
- case CTF_K_VOLATILE:
- if (flags != F_STATS) {
- (void) printf("VOLATILE %s refers to %u",
- ref_to_str(tp->ctt_name, hp, cd),
- tp->ctt_type);
- }
- break;
-
- case CTF_K_CONST:
- if (flags != F_STATS) {
- (void) printf("CONST %s refers to %u",
- ref_to_str(tp->ctt_name, hp, cd),
- tp->ctt_type);
- }
- break;
-
- case CTF_K_RESTRICT:
- if (flags != F_STATS) {
- (void) printf("RESTRICT %s refers to %u",
- ref_to_str(tp->ctt_name, hp, cd),
- tp->ctt_type);
- }
- break;
-
- case CTF_K_UNKNOWN:
- break; /* hole in type id space */
-
- default:
- (void) printf("unexpected kind %u\n", kind);
- return (E_ERROR);
- }
-
- if (flags != F_STATS)
- (void) printf("\n");
-
- stats.s_ntypes++;
- stats.s_types[kind]++;
-
- tp = (ctf_type_t *)((uintptr_t)tp + increment + vlen);
- }
-
- return (E_SUCCESS);
-}
-
-static int
-read_strtab(const ctf_header_t *hp, const ctf_data_t *cd)
-{
- size_t n, off, len = hp->cth_strlen;
- const char *s = cd->cd_ctfdata + hp->cth_stroff;
-
- if (flags != F_STATS)
- print_line("- String Table ");
-
- if (hp->cth_stroff >= cd->cd_ctflen)
- WARN("file is truncated or cth_stroff is corrupt\n");
- if (hp->cth_stroff + hp->cth_strlen > cd->cd_ctflen)
- WARN("file is truncated or cth_strlen is corrupt\n");
-
- for (off = 0; len != 0; off += n) {
- if (flags != F_STATS) {
- (void) printf(" [%lu] %s\n", (ulong_t)off,
- s[0] == '\0' ? "\\0" : s);
- }
- n = strlen(s) + 1;
- len -= n;
- s += n;
-
- stats.s_nstr++;
- stats.s_strlen += n;
- stats.s_strmax = MAX(stats.s_strmax, n);
- }
-
- return (E_SUCCESS);
-}
-
-static void
-long_stat(const char *name, ulong_t value)
-{
- (void) printf(" %-36s= %lu\n", name, value);
-}
-
-static void
-fp_stat(const char *name, float value)
-{
- (void) printf(" %-36s= %.2f\n", name, value);
-}
-
-static int
-print_stats(void)
-{
- print_line("- CTF Statistics ");
-
- long_stat("total number of data objects", stats.s_ndata);
- (void) printf("\n");
-
- long_stat("total number of functions", stats.s_nfunc);
- long_stat("total number of function arguments", stats.s_nargs);
- long_stat("maximum argument list length", stats.s_argmax);
-
- if (stats.s_nfunc != 0) {
- fp_stat("average argument list length",
- (float)stats.s_nargs / (float)stats.s_nfunc);
- }
-
- (void) printf("\n");
-
- long_stat("total number of types", stats.s_ntypes);
- long_stat("total number of integers", stats.s_types[CTF_K_INTEGER]);
- long_stat("total number of floats", stats.s_types[CTF_K_FLOAT]);
- long_stat("total number of pointers", stats.s_types[CTF_K_POINTER]);
- long_stat("total number of arrays", stats.s_types[CTF_K_ARRAY]);
- long_stat("total number of func types", stats.s_types[CTF_K_FUNCTION]);
- long_stat("total number of structs", stats.s_types[CTF_K_STRUCT]);
- long_stat("total number of unions", stats.s_types[CTF_K_UNION]);
- long_stat("total number of enums", stats.s_types[CTF_K_ENUM]);
- long_stat("total number of forward tags", stats.s_types[CTF_K_FORWARD]);
- long_stat("total number of typedefs", stats.s_types[CTF_K_TYPEDEF]);
- long_stat("total number of volatile types",
- stats.s_types[CTF_K_VOLATILE]);
- long_stat("total number of const types", stats.s_types[CTF_K_CONST]);
- long_stat("total number of restrict types",
- stats.s_types[CTF_K_RESTRICT]);
- long_stat("total number of unknowns (holes)",
- stats.s_types[CTF_K_UNKNOWN]);
-
- (void) printf("\n");
-
- long_stat("total number of struct members", stats.s_nsmem);
- long_stat("maximum number of struct members", stats.s_smmax);
- long_stat("total size of all structs", stats.s_nsbytes);
- long_stat("maximum size of a struct", stats.s_sbmax);
-
- if (stats.s_types[CTF_K_STRUCT] != 0) {
- fp_stat("average number of struct members",
- (float)stats.s_nsmem / (float)stats.s_types[CTF_K_STRUCT]);
- fp_stat("average size of a struct", (float)stats.s_nsbytes /
- (float)stats.s_types[CTF_K_STRUCT]);
- }
-
- (void) printf("\n");
-
- long_stat("total number of union members", stats.s_numem);
- long_stat("maximum number of union members", stats.s_ummax);
- long_stat("total size of all unions", stats.s_nubytes);
- long_stat("maximum size of a union", stats.s_ubmax);
-
- if (stats.s_types[CTF_K_UNION] != 0) {
- fp_stat("average number of union members",
- (float)stats.s_numem / (float)stats.s_types[CTF_K_UNION]);
- fp_stat("average size of a union", (float)stats.s_nubytes /
- (float)stats.s_types[CTF_K_UNION]);
- }
-
- (void) printf("\n");
-
- long_stat("total number of enum members", stats.s_nemem);
- long_stat("maximum number of enum members", stats.s_emmax);
-
- if (stats.s_types[CTF_K_ENUM] != 0) {
- fp_stat("average number of enum members",
- (float)stats.s_nemem / (float)stats.s_types[CTF_K_ENUM]);
- }
-
- (void) printf("\n");
-
- long_stat("total number of unique strings", stats.s_nstr);
- long_stat("bytes of string data", stats.s_strlen);
- long_stat("maximum string length", stats.s_strmax);
-
- if (stats.s_nstr != 0) {
- fp_stat("average string length",
- (float)stats.s_strlen / (float)stats.s_nstr);
- }
-
- (void) printf("\n");
- return (E_SUCCESS);
-}
-
-static int
-print_usage(FILE *fp, int verbose)
-{
- (void) fprintf(fp, "Usage: %s [-dfhlsSt] [-u file] file\n", getpname());
-
- if (verbose) {
- (void) fprintf(fp,
- "\t-d dump data object section\n"
- "\t-f dump function section\n"
- "\t-h dump file header\n"
- "\t-l dump label table\n"
- "\t-s dump string table\n"
- "\t-S dump statistics\n"
- "\t-t dump type section\n"
- "\t-u save uncompressed CTF to a file\n");
- }
-
- return (E_USAGE);
-}
-
-static Elf_Scn *
-findelfscn(Elf *elf, GElf_Ehdr *ehdr, char *secname)
-{
- GElf_Shdr shdr;
- Elf_Scn *scn;
- char *name;
-
- for (scn = NULL; (scn = elf_nextscn(elf, scn)) != NULL; ) {
- if (gelf_getshdr(scn, &shdr) != NULL && (name =
- elf_strptr(elf, ehdr->e_shstrndx, shdr.sh_name)) != NULL &&
- strcmp(name, secname) == 0)
- return (scn);
- }
-
- return (NULL);
-}
-
-int
-main(int argc, char *argv[])
-{
- const char *filename = NULL;
- const char *ufile = NULL;
- int error = 0;
- int c, fd, ufd;
-
- ctf_data_t cd;
- const ctf_preamble_t *pp;
- ctf_header_t *hp;
- Elf *elf;
- GElf_Ehdr ehdr;
-
- (void) elf_version(EV_CURRENT);
-
- for (opterr = 0; optind < argc; optind++) {
- while ((c = getopt(argc, argv, "dfhlsStu:")) != (int)EOF) {
- switch (c) {
- case 'd':
- flags |= F_DATA;
- break;
- case 'f':
- flags |= F_FUNC;
- break;
- case 'h':
- flags |= F_HDR;
- break;
- case 'l':
- flags |= F_LABEL;
- break;
- case 's':
- flags |= F_STR;
- break;
- case 'S':
- flags |= F_STATS;
- break;
- case 't':
- flags |= F_TYPES;
- break;
- case 'u':
- ufile = optarg;
- break;
- default:
- if (optopt == '?')
- return (print_usage(stdout, 1));
- warn("illegal option -- %c\n", optopt);
- return (print_usage(stderr, 0));
- }
- }
-
- if (optind < argc) {
- if (filename != NULL)
- return (print_usage(stderr, 0));
- filename = argv[optind];
- }
- }
-
- if (filename == NULL)
- return (print_usage(stderr, 0));
-
- if (flags == 0 && ufile == NULL)
- flags = F_ALLMSK;
-
- if ((fd = open(filename, O_RDONLY)) == -1)
- die("failed to open %s", filename);
-
- if ((elf = elf_begin(fd, ELF_C_READ, NULL)) != NULL &&
- gelf_getehdr(elf, &ehdr) != NULL) {
-
- Elf_Data *dp;
- Elf_Scn *ctfscn = findelfscn(elf, &ehdr, ".SUNW_ctf");
- Elf_Scn *symscn;
- GElf_Shdr ctfshdr;
-
- if (ctfscn == NULL || (dp = elf_getdata(ctfscn, NULL)) == NULL)
- die("%s does not contain .SUNW_ctf data\n", filename);
-
- cd.cd_ctfdata = dp->d_buf;
- cd.cd_ctflen = dp->d_size;
-
- /*
- * If the sh_link field of the CTF section header is non-zero
- * it indicates which section contains the symbol table that
- * should be used. We default to the .symtab section if sh_link
- * is zero or if there's an error reading the section header.
- */
- if (gelf_getshdr(ctfscn, &ctfshdr) != NULL &&
- ctfshdr.sh_link != 0) {
- symscn = elf_getscn(elf, ctfshdr.sh_link);
- } else {
- symscn = findelfscn(elf, &ehdr, ".symtab");
- }
-
- /* If we found a symbol table, find the corresponding strings */
- if (symscn != NULL) {
- GElf_Shdr shdr;
- Elf_Scn *symstrscn;
-
- if (gelf_getshdr(symscn, &shdr) != NULL) {
- symstrscn = elf_getscn(elf, shdr.sh_link);
-
- cd.cd_nsyms = shdr.sh_size / shdr.sh_entsize;
- cd.cd_symdata = elf_getdata(symscn, NULL);
- cd.cd_strdata = elf_getdata(symstrscn, NULL);
- }
- }
- } else {
- struct stat st;
-
- if (fstat(fd, &st) == -1)
- die("failed to fstat %s", filename);
-
- cd.cd_ctflen = st.st_size;
- cd.cd_ctfdata = mmap(NULL, cd.cd_ctflen, PROT_READ,
- MAP_PRIVATE, fd, 0);
- if (cd.cd_ctfdata == MAP_FAILED)
- die("failed to mmap %s", filename);
- }
-
- /*
- * Get a pointer to the CTF data buffer and interpret the first portion
- * as a ctf_header_t. Validate the magic number and size.
- */
-
- if (cd.cd_ctflen < sizeof (ctf_preamble_t))
- die("%s does not contain a CTF preamble\n", filename);
-
- /* LINTED - pointer alignment */
- pp = (const ctf_preamble_t *)cd.cd_ctfdata;
-
- if (pp->ctp_magic != CTF_MAGIC)
- die("%s does not appear to contain CTF data\n", filename);
-
- if (pp->ctp_version == CTF_VERSION) {
- /* LINTED - pointer alignment */
- hp = (ctf_header_t *)cd.cd_ctfdata;
- cd.cd_ctfdata = (caddr_t)cd.cd_ctfdata + sizeof (ctf_header_t);
-
- if (cd.cd_ctflen < sizeof (ctf_header_t)) {
- die("%s does not contain a v%d CTF header\n", filename,
- CTF_VERSION);
- }
-
- } else {
- die("%s contains unsupported CTF version %d\n", filename,
- pp->ctp_version);
- }
-
- /*
- * If the data buffer is compressed, then malloc a buffer large enough
- * to hold the decompressed data, and use zlib to decompress it.
- */
- if (hp->cth_flags & CTF_F_COMPRESS) {
- z_stream zstr;
- void *buf;
- int rc;
-
- if ((buf = malloc(hp->cth_stroff + hp->cth_strlen)) == NULL)
- die("failed to allocate decompression buffer");
-
- bzero(&zstr, sizeof (z_stream));
- zstr.next_in = (void *)cd.cd_ctfdata;
- zstr.avail_in = cd.cd_ctflen;
- zstr.next_out = buf;
- zstr.avail_out = hp->cth_stroff + hp->cth_strlen;
-
- if ((rc = inflateInit(&zstr)) != Z_OK)
- die("failed to initialize zlib: %s\n", zError(rc));
-
- if ((rc = inflate(&zstr, Z_FINISH)) != Z_STREAM_END)
- die("failed to decompress CTF data: %s\n", zError(rc));
-
- if ((rc = inflateEnd(&zstr)) != Z_OK)
- die("failed to finish decompression: %s\n", zError(rc));
-
- if (zstr.total_out != hp->cth_stroff + hp->cth_strlen)
- die("CTF data is corrupt -- short decompression\n");
-
- cd.cd_ctfdata = buf;
- cd.cd_ctflen = hp->cth_stroff + hp->cth_strlen;
- }
-
- if (flags & F_HDR)
- error |= print_header(hp, &cd);
- if (flags & (F_LABEL))
- error |= print_labeltable(hp, &cd);
- if (flags & (F_DATA | F_STATS))
- error |= read_data(hp, &cd);
- if (flags & (F_FUNC | F_STATS))
- error |= read_funcs(hp, &cd);
- if (flags & (F_TYPES | F_STATS))
- error |= read_types(hp, &cd);
- if (flags & (F_STR | F_STATS))
- error |= read_strtab(hp, &cd);
- if (flags & F_STATS)
- error |= print_stats();
-
- /*
- * If the -u option is specified, write the uncompressed CTF data to a
- * raw CTF file. CTF data can already be extracted compressed by
- * applying elfdump -w -N .SUNW_ctf to an ELF file, so we don't bother.
- */
- if (ufile != NULL) {
- ctf_header_t h;
-
- bcopy(hp, &h, sizeof (h));
- h.cth_flags &= ~CTF_F_COMPRESS;
-
- if ((ufd = open(ufile, O_WRONLY|O_CREAT|O_TRUNC, 0666)) < 0 ||
- write(ufd, &h, sizeof (h)) != sizeof (h) ||
- write(ufd, cd.cd_ctfdata, cd.cd_ctflen) != cd.cd_ctflen) {
- warn("failed to write CTF data to '%s'", ufile);
- error |= E_ERROR;
- }
-
- (void) close(ufd);
- }
-
- if (elf != NULL)
- (void) elf_end(elf);
-
- (void) close(fd);
- return (error);
-}
diff --git a/usr/src/tools/ctf/dwarf/Makefile.com b/usr/src/tools/ctf/dwarf/Makefile.com
index 51e788ff04..d90e85b55a 100644
--- a/usr/src/tools/ctf/dwarf/Makefile.com
+++ b/usr/src/tools/ctf/dwarf/Makefile.com
@@ -67,11 +67,10 @@ OBJECTS=dwarf_abbrev.o \
include $(SRC)/lib/Makefile.lib
-SRCS= $(PICS:%.o=../common/%.c)
-FILEMODE = 0755
-
-SRCDIR = ../common/
+FILEMODE = 0755
+SRCDIR = $(SRC)/lib/libdwarf/common/
+SRCS = $(PICS:%.o=$(SRCDIR)/%.c)
CPPFLAGS += -I$(SRCDIR) -DELF_TARGET_ALL=1
CERRWARN += -_gcc=-Wno-unused
diff --git a/usr/src/tools/ctf/libctf/Makefile b/usr/src/tools/ctf/libctf/Makefile
new file mode 100644
index 0000000000..5e9236c43d
--- /dev/null
+++ b/usr/src/tools/ctf/libctf/Makefile
@@ -0,0 +1,46 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+include ../../Makefile.tools
+
+HDRS = libctf.h
+HDRDIR = common
+
+SUBDIRS = $(MACH)
+
+all := TARGET = all
+clean := TARGET = clean
+clobber := TARGET = clobber
+install := TARGET = install
+lint := TARGET = lint
+
+.KEEP_STATE:
+
+all clean clobber install lint: $(SUBDIRS)
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
diff --git a/usr/src/tools/ctf/libctf/Makefile.com b/usr/src/tools/ctf/libctf/Makefile.com
new file mode 100644
index 0000000000..0444b975d5
--- /dev/null
+++ b/usr/src/tools/ctf/libctf/Makefile.com
@@ -0,0 +1,55 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2015, Joyent, Inc.
+#
+
+include $(SRC)/lib/libctf/Makefile.shared.com
+include ../../Makefile.ctf
+
+#
+# For some reason LDFLAGS doesn't seem to be taking effect at the
+# moment. Therefore add what we need to LDLIBS for now.
+#
+LDLIBS += \
+ -L$(ROOTONBLDLIBMACH) \
+ '-R$$ORIGIN/../../lib/$(MACH)' \
+
+CPPFLAGS += -I$(SRC)/lib/libctf/common/ \
+ -I$(SRC)/lib/libdwarf/common/ \
+ -I$(SRC)/lib/mergeq \
+ -include ../../common/ctf_headers.h \
+ -DCTF_OLD_VERSIONS \
+ -DCTF_TOOLS_BUILD
+LDLIBS += -lc -lelf -ldwarf -lavl
+
+.KEEP_STATE:
+
+all: $(LIBS)
+
+install: all $(ROOTONBLDLIBMACH)/libctf.so.1 $(ROOTONBLDLIBMACH)/libctf.so
+
+$(ROOTONBLDLIBMACH)/%: %
+ $(INS.file)
+
+$(ROOTONBLDLIBMACH)/$(LIBLINKS): $(ROOTONBLDLIBMACH)/$(LIBLINKS)$(VERS)
+ $(INS.liblink)
+
+#
+# Just like with libdwarf, we can't actually add ctf to ourselves,
+# because we're part of the tools for creating CTF.
+#
+$(DYNLIB) := CTFMERGE_POST= :
+CTFCONVERT_O= :
+
+include $(SRC)/lib/Makefile.targ
+include $(SRC)/lib/libctf/Makefile.shared.targ
diff --git a/usr/src/tools/ctf/libctf/i386/Makefile b/usr/src/tools/ctf/libctf/i386/Makefile
new file mode 100644
index 0000000000..bb2f077f86
--- /dev/null
+++ b/usr/src/tools/ctf/libctf/i386/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2015, Joyent, Inc.
+#
+
+include ../Makefile.com
+
+all: $(LIBS)
diff --git a/usr/src/tools/ctf/libctf/sparc/Makefile b/usr/src/tools/ctf/libctf/sparc/Makefile
new file mode 100644
index 0000000000..bb2f077f86
--- /dev/null
+++ b/usr/src/tools/ctf/libctf/sparc/Makefile
@@ -0,0 +1,18 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2015, Joyent, Inc.
+#
+
+include ../Makefile.com
+
+all: $(LIBS)
diff --git a/usr/src/tools/env/illumos.sh b/usr/src/tools/env/illumos.sh
index 862a680d11..9fbb794c92 100644
--- a/usr/src/tools/env/illumos.sh
+++ b/usr/src/tools/env/illumos.sh
@@ -228,3 +228,9 @@ export ENABLE_SMB_PRINTING=
#export PERL_VERSION=5.16.1
#export PERL_ARCH=i86pc-solaris-thread-multi-64int
#export PERL_PKGVERS=-5161
+
+#
+# These checks ensure that if we accidentally run a program linked against the
+# proto area, that we then fail the build.
+#
+export LD_TOXIC_PATH="$ROOT/lib:$ROOT/usr/lib"
diff --git a/usr/src/tools/findunref/exception_list.git b/usr/src/tools/findunref/exception_list.git
new file mode 100644
index 0000000000..d14dbd22f0
--- /dev/null
+++ b/usr/src/tools/findunref/exception_list.git
@@ -0,0 +1,39 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Cyril Plisko. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Git-specific exception list
+#
+# See README.exception_lists for details
+#
+
+#
+# Without nested repositories, this list could be empty, because ON
+# checks for unref relative to usr, and the git files are all in the
+# root of the repository.
+#
+
+*/.git
diff --git a/usr/src/tools/findunref/exception_list.unknown b/usr/src/tools/findunref/exception_list.unknown
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/usr/src/tools/findunref/exception_list.unknown
diff --git a/usr/src/tools/man/Makefile b/usr/src/tools/man/Makefile
new file mode 100644
index 0000000000..5f0e685323
--- /dev/null
+++ b/usr/src/tools/man/Makefile
@@ -0,0 +1,37 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015, Joyent, Inc.
+#
+
+include $(SRC)/cmd/man/Makefile.com
+include ../Makefile.tools
+
+include ../Makefile.targ
+
+all: $(PROG)
+
+$(PROG): $(OBJS)
+ $(LINK.c) $(OBJS) -o $@ $(LDLIBS)
+ $(POST_PROCESS)
+
+%.o: $(SRC)/cmd/man/%.c
+ $(COMPILE.c) $<
+
+$(ROOTONBLDMACHPROG): $(PROG)
+
+install: $(ROOTONBLDMACHPROG)
+
+clean:
+ $(RM) $(OBJS) $(LINTFILES)
+
+include $(SRC)/tools/Makefile.targ
diff --git a/usr/src/tools/onbld/Checks/Cddl.py b/usr/src/tools/onbld/Checks/Cddl.py
index 1f5f99f953..0f4d995e89 100644
--- a/usr/src/tools/onbld/Checks/Cddl.py
+++ b/usr/src/tools/onbld/Checks/Cddl.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#!ON_PYTHON
CDDL = '''
CDDL HEADER START
diff --git a/usr/src/tools/onbld/Checks/CmtBlk.py b/usr/src/tools/onbld/Checks/CmtBlk.py
index 2f3d29fa79..ddf5caec48 100644
--- a/usr/src/tools/onbld/Checks/CmtBlk.py
+++ b/usr/src/tools/onbld/Checks/CmtBlk.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#!ON_PYTHON
#
# CDDL HEADER START
diff --git a/usr/src/tools/onbld/Checks/Comments.py b/usr/src/tools/onbld/Checks/Comments.py
index daf6aa47d9..5d6feb265a 100644
--- a/usr/src/tools/onbld/Checks/Comments.py
+++ b/usr/src/tools/onbld/Checks/Comments.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#!ON_PYTHON
#
# CDDL HEADER START
#
@@ -38,7 +38,7 @@ import re, sys
from onbld.Checks.DbLookups import BugDB
-bugre = re.compile(r'^(\d{2,7}) (.*)$')
+bugre = re.compile(r'^(\d{2,7}|[A-Z]{1,7}-\d{1,7}) (.*)$')
def isBug(comment):
diff --git a/usr/src/tools/onbld/Checks/Copyright.py b/usr/src/tools/onbld/Checks/Copyright.py
index 81a80058aa..8071b7f435 100644
--- a/usr/src/tools/onbld/Checks/Copyright.py
+++ b/usr/src/tools/onbld/Checks/Copyright.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#!ON_PYTHON
#
# CDDL HEADER START
#
diff --git a/usr/src/tools/onbld/Checks/DbLookups.py b/usr/src/tools/onbld/Checks/DbLookups.py
index 11fd4185be..2843673035 100644
--- a/usr/src/tools/onbld/Checks/DbLookups.py
+++ b/usr/src/tools/onbld/Checks/DbLookups.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#!ON_PYTHON
#
# CDDL HEADER START
#
@@ -37,6 +37,7 @@ import htmllib
import re
import urllib
import urllib2
+import json
try: # Python >= 2.5
from xml.etree import ElementTree
@@ -62,9 +63,9 @@ class BugDB(object):
print r["6505625"]["synopsis"]
"""
- VALID_DBS = ["illumos"]
+ VALID_DBS = ["illumos", "smartos"]
- def __init__(self, priority = ["illumos"]):
+ def __init__(self, priority = VALID_DBS):
"""Create a BugDB object.
Keyword argument:
@@ -75,6 +76,24 @@ class BugDB(object):
raise BugDBException, database
self.__priority = priority
+ def __smartosbug(self, cr):
+ url = "http://smartos.org/bugview/json/%s" % cr
+ req = urllib2.Request(url)
+
+ try:
+ data = urllib2.urlopen(req)
+ except urllib2.HTTPError, e:
+ if e.code == 404 or e.code == 403 or e.code == 400:
+ raise NonExistentBug(cr)
+ else:
+ raise
+
+ bug = json.load(data)
+
+ return {'cr_number': bug['id'],
+ 'synopsis': bug['summary']
+ }
+
def __illbug(self, cr):
url = "http://illumos.org/issues/%s.xml" % cr
@@ -117,6 +136,12 @@ class BugDB(object):
results[str(cr)] = self.__illbug(cr)
except NonExistentBug:
continue
+ elif database == "smartos":
+ for cr in crs:
+ try:
+ results[str(cr)] = self.__smartosbug(cr)
+ except NonExistentBug:
+ continue
# the CR has already been found by one bug database
# so don't bother looking it up in the others
diff --git a/usr/src/tools/onbld/Checks/HdrChk.py b/usr/src/tools/onbld/Checks/HdrChk.py
index c2697dcaf2..8f7b946d12 100644
--- a/usr/src/tools/onbld/Checks/HdrChk.py
+++ b/usr/src/tools/onbld/Checks/HdrChk.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#!ON_PYTHON
#
# CDDL HEADER START
#
diff --git a/usr/src/tools/onbld/Checks/Keywords.py b/usr/src/tools/onbld/Checks/Keywords.py
index 5c374c3abb..ac3555be32 100644
--- a/usr/src/tools/onbld/Checks/Keywords.py
+++ b/usr/src/tools/onbld/Checks/Keywords.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#!ON_PYTHON
#
# CDDL HEADER START
#
@@ -24,8 +24,6 @@
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
-#
#
# Mercurial (lack of) keyword checks
diff --git a/usr/src/tools/onbld/Checks/Mapfile.py b/usr/src/tools/onbld/Checks/Mapfile.py
index 2a8cb74aed..d4fa70d141 100644
--- a/usr/src/tools/onbld/Checks/Mapfile.py
+++ b/usr/src/tools/onbld/Checks/Mapfile.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#!ON_PYTHON
#
# CDDL HEADER START
#
diff --git a/usr/src/tools/onbld/Checks/__init__.py b/usr/src/tools/onbld/Checks/__init__.py
index 7051b0c565..2fde833098 100644
--- a/usr/src/tools/onbld/Checks/__init__.py
+++ b/usr/src/tools/onbld/Checks/__init__.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#!ON_PYTHON
#
# CDDL HEADER START
#
diff --git a/usr/src/tools/onbld/Scm/__init__.py b/usr/src/tools/onbld/Scm/__init__.py
index f45ecbc95f..8934eb3942 100644
--- a/usr/src/tools/onbld/Scm/__init__.py
+++ b/usr/src/tools/onbld/Scm/__init__.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#!ON_PYTHON
#
# CDDL HEADER START
#
@@ -24,5 +24,3 @@
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
-#
diff --git a/usr/src/tools/onbld/hgext/__init__.py b/usr/src/tools/onbld/hgext/__init__.py
index f45ecbc95f..8934eb3942 100644
--- a/usr/src/tools/onbld/hgext/__init__.py
+++ b/usr/src/tools/onbld/hgext/__init__.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#!ON_PYTHON
#
# CDDL HEADER START
#
@@ -24,5 +24,3 @@
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
-#
diff --git a/usr/src/tools/scripts/bldenv.sh b/usr/src/tools/scripts/bldenv.sh
index f841a3cfc5..71987a44b5 100644
--- a/usr/src/tools/scripts/bldenv.sh
+++ b/usr/src/tools/scripts/bldenv.sh
@@ -291,9 +291,6 @@ if "${flags.t}" ; then
export CTFSTABS="${TOOLS_PROTO}/opt/onbld/bin/${MACH}/ctfstabs"
export GENOFFSETS="${TOOLS_PROTO}/opt/onbld/bin/genoffsets"
- export CTFCONVERT="${TOOLS_PROTO}/opt/onbld/bin/${MACH}/ctfconvert"
- export CTFMERGE="${TOOLS_PROTO}/opt/onbld/bin/${MACH}/ctfmerge"
-
PATH="${TOOLS_PROTO}/opt/onbld/bin/${MACH}:${PATH}"
PATH="${TOOLS_PROTO}/opt/onbld/bin:${PATH}"
export PATH
diff --git a/usr/src/tools/scripts/cddlchk.py b/usr/src/tools/scripts/cddlchk.py
index 816d2f33a7..ad6a6e6f56 100644
--- a/usr/src/tools/scripts/cddlchk.py
+++ b/usr/src/tools/scripts/cddlchk.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python2.6
+#!ON_PYTHON_26
#
# CDDL HEADER START
#
diff --git a/usr/src/tools/scripts/copyrightchk.py b/usr/src/tools/scripts/copyrightchk.py
index 210ef1b46c..f778947bfc 100644
--- a/usr/src/tools/scripts/copyrightchk.py
+++ b/usr/src/tools/scripts/copyrightchk.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python2.6
+#!ON_PYTHON_26
#
# CDDL HEADER START
#
diff --git a/usr/src/tools/scripts/git-pbchk.py b/usr/src/tools/scripts/git-pbchk.py
index 92303f967f..ddaa65a622 100644
--- a/usr/src/tools/scripts/git-pbchk.py
+++ b/usr/src/tools/scripts/git-pbchk.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python2.6
+#!ON_PYTHON_26
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2
diff --git a/usr/src/tools/scripts/hdrchk.py b/usr/src/tools/scripts/hdrchk.py
index 84acbc8616..aa62f7b090 100644
--- a/usr/src/tools/scripts/hdrchk.py
+++ b/usr/src/tools/scripts/hdrchk.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python2.6
+#!ON_PYTHON_26
#
# CDDL HEADER START
#
diff --git a/usr/src/tools/scripts/hg-active.py b/usr/src/tools/scripts/hg-active.py
index 88394e98e5..23f6c7cd98 100644
--- a/usr/src/tools/scripts/hg-active.py
+++ b/usr/src/tools/scripts/hg-active.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python2.6
+#!ON_PYTHON_26
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2
diff --git a/usr/src/tools/scripts/mapfilechk.py b/usr/src/tools/scripts/mapfilechk.py
index 9cf2000c7a..ed90d8b426 100644
--- a/usr/src/tools/scripts/mapfilechk.py
+++ b/usr/src/tools/scripts/mapfilechk.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python2.6
+#!ON_PYTHON_26
#
# CDDL HEADER START
#
diff --git a/usr/src/tools/scripts/nightly.sh b/usr/src/tools/scripts/nightly.sh
index a77d6f223e..842c63a295 100644
--- a/usr/src/tools/scripts/nightly.sh
+++ b/usr/src/tools/scripts/nightly.sh
@@ -25,6 +25,7 @@
# Copyright 2008, 2010, Richard Lowe
# Copyright 2011 Nexenta Systems, Inc. All rights reserved.
# Copyright 2012 Joshua M. Clulow <josh@sysmgr.org>
+# Copyright 2013 (c) Joyent, Inc. All rights reserved.
#
# Based on the nightly script from the integration folks,
# Mostly modified and owned by mike_s.
@@ -517,11 +518,6 @@ function use_tools {
GENOFFSETS=${TOOLSROOT}/opt/onbld/bin/genoffsets
export GENOFFSETS
- CTFCONVERT=${TOOLSROOT}/opt/onbld/bin/${MACH}/ctfconvert
- export CTFCONVERT
- CTFMERGE=${TOOLSROOT}/opt/onbld/bin/${MACH}/ctfmerge
- export CTFMERGE
-
if [ "$VERIFY_ELFSIGN" = "y" ]; then
ELFSIGN=${TOOLSROOT}/opt/onbld/bin/elfsigncmp
else
@@ -541,8 +537,6 @@ function use_tools {
echo "\n==== New environment settings. ====\n" >> $LOGFILE
echo "STABS=${STABS}" >> $LOGFILE
echo "CTFSTABS=${CTFSTABS}" >> $LOGFILE
- echo "CTFCONVERT=${CTFCONVERT}" >> $LOGFILE
- echo "CTFMERGE=${CTFMERGE}" >> $LOGFILE
echo "ELFSIGN=${ELFSIGN}" >> $LOGFILE
echo "PATH=${PATH}" >> $LOGFILE
echo "ONBLD_TOOLS=${ONBLD_TOOLS}" >> $LOGFILE
diff --git a/usr/src/tools/scripts/validate_pkg.py b/usr/src/tools/scripts/validate_pkg.py
index 6678b211e8..155df9ba25 100644
--- a/usr/src/tools/scripts/validate_pkg.py
+++ b/usr/src/tools/scripts/validate_pkg.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python2.6
+#!ON_PYTHON_26
#
# CDDL HEADER START
#
diff --git a/usr/src/tools/scripts/webrev.sh b/usr/src/tools/scripts/webrev.sh
index 5fbed75864..74b02fc923 100644
--- a/usr/src/tools/scripts/webrev.sh
+++ b/usr/src/tools/scripts/webrev.sh
@@ -26,6 +26,7 @@
# Copyright 2012 Marcel Telka <marcel@telka.sk>
# Copyright 2014 Bart Coddens <bart.coddens@gmail.com>
# Copyright 2016 Nexenta Systems, Inc.
+# Copyright 2016 Joyent, Inc.
#
#
@@ -294,20 +295,20 @@ function remote_mkdirs
"cannot create temporary file for batch file"
return 1
fi
- OLDIFS=$IFS
- IFS=/
+ OLDIFS=$IFS
+ IFS=/
typeset dir
- for dir in ${dirs_mk}; do
+ for dir in ${dirs_mk}; do
#
# Use the '-' prefix to ignore mkdir errors in order
# to avoid an error in case the directory already
# exists. We check the directory with chdir to be sure
# there is one.
#
- print -- "-mkdir ${dir}" >> ${batch_file_mkdir}
- print "chdir ${dir}" >> ${batch_file_mkdir}
- done
- IFS=$OLDIFS
+ print -- "-mkdir ${dir}" >> ${batch_file_mkdir}
+ print "chdir ${dir}" >> ${batch_file_mkdir}
+ done
+ IFS=$OLDIFS
typeset -r sftp_err_msg=$( $MKTEMP /tmp/webrev_scp_err.XXXXXX )
if [[ -z ${sftp_err_msg} ]]; then
print "\nERROR: remote_mkdirs:" \
@@ -891,7 +892,7 @@ sdiff_to_html()
print "$HTML<head>$STDHEAD"
print "<title>$WNAME Sdiff $TPATH/$TNAME</title>"
print "</head><body id=\"SUNWwebrev\">"
- print "<a class=\"print\" href=\"javascript:print()\">Print this page</a>"
+ print "<a class=\"print\" href=\"javascript:print()\">Print this page</a>"
print "<pre>$COMMENT</pre>\n"
print "<table><tr valign=\"top\">"
print "<td><pre>"
@@ -969,10 +970,10 @@ function framed_sdiff
<frame src="${RTOP}ancnav.html" scrolling="no" marginwidth="0"
marginheight="0" name="nav"></frame>
<noframes>
- <body id="SUNWwebrev">
+ <body id="SUNWwebrev">
Alas 'frames' webrev requires that your browser supports frames
and has the feature enabled.
- </body>
+ </body>
</noframes>
</frameset>
</html>
@@ -1110,20 +1111,20 @@ function insert_anchors
#
function relative_dir
{
- typeset cur="${1##$2?(/)}"
-
- #
- # If the first path was specified absolutely, and it does
- # not start with the second path, it's an error.
- #
- if [[ "$cur" = "/${1#/}" ]]; then
- # Should never happen.
- print -u2 "\nWARNING: relative_dir: \"$1\" not relative "
- print -u2 "to \"$2\". Check input paths. Framed webrev "
- print -u2 "will not be relocatable!"
- print $2
- return
- fi
+ typeset cur="${1##$2?(/)}"
+
+ #
+ # If the first path was specified absolutely, and it does
+ # not start with the second path, it's an error.
+ #
+ if [[ "$cur" = "/${1#/}" ]]; then
+ # Should never happen.
+ print -u2 "\nWARNING: relative_dir: \"$1\" not relative "
+ print -u2 "to \"$2\". Check input paths. Framed webrev "
+ print -u2 "will not be relocatable!"
+ print $2
+ return
+ fi
#
# This is kind of ugly. The sed script will do the following:
@@ -1157,22 +1158,23 @@ function frame_nav_js
{
cat << \EOF
var myInt;
-var scrolling=0;
+var scrolling = 0;
var sfactor = 3;
-var scount=10;
+var scount = 10;
-function scrollByPix() {
- if (scount<=0) {
- sfactor*=1.2;
- scount=10;
+function scrollByPix()
+{
+ if (scount <= 0) {
+ sfactor *= 1.2;
+ scount = 10;
}
- parent.lhs.scrollBy(0,sfactor);
- parent.rhs.scrollBy(0,sfactor);
+ parent.lhs.scrollBy(0, sfactor);
+ parent.rhs.scrollBy(0, sfactor);
scount--;
}
-function scrollToAnc(num) {
-
+function scrollToAnc(num)
+{
// Update the value of the anchor in the form which we use as
// storage for this value. setAncValue() will take care of
// correcting for overflow and underflow of the value and return
@@ -1189,8 +1191,8 @@ function scrollToAnc(num) {
parent.lhs.location.replace(parent.lhs.location.pathname + "#" + num);
parent.rhs.location.replace(parent.rhs.location.pathname + "#" + num);
- parent.lhs.scrollBy(0,-30);
- parent.rhs.scrollBy(0,-30);
+ parent.lhs.scrollBy(0, -30);
+ parent.rhs.scrollBy(0, -30);
}
function getAncValue()
@@ -1226,50 +1228,54 @@ function setAncValue(val)
return (val);
}
-function stopScroll() {
- if (scrolling==1) {
+function stopScroll()
+{
+ if (scrolling == 1) {
clearInterval(myInt);
- scrolling=0;
+ scrolling = 0;
}
}
-function startScroll() {
+function startScroll()
+{
stopScroll();
- scrolling=1;
- myInt=setInterval("scrollByPix()",10);
+ scrolling = 1;
+ myInt = setInterval("scrollByPix()", 10);
}
-function handlePress(b) {
-
+function handlePress(b)
+{
switch (b) {
- case 1 :
+ case 1:
scrollToAnc(-1);
break;
- case 2 :
+ case 2:
scrollToAnc(getAncValue() - 1);
break;
- case 3 :
- sfactor=-3;
+ case 3:
+ sfactor = -3;
startScroll();
break;
- case 4 :
- sfactor=3;
+ case 4:
+ sfactor = 3;
startScroll();
break;
- case 5 :
+ case 5:
scrollToAnc(getAncValue() + 1);
break;
- case 6 :
+ case 6:
scrollToAnc(999999);
break;
}
}
-function handleRelease(b) {
+function handleRelease(b)
+{
stopScroll();
}
-function keypress(ev) {
+function keypress(ev)
+{
var keynum;
var keychar;
@@ -1288,10 +1294,15 @@ function keypress(ev) {
handlePress(5);
return (0);
}
+
return (1);
}
-function ValidateDiffNum(){
+function ValidateDiffNum()
+{
+ var val;
+ var i;
+
val = parent.nav.document.diff.display.value;
if (val == "EOF") {
scrollToAnc(999999);
@@ -1303,15 +1314,15 @@ function ValidateDiffNum(){
return;
}
- i=parseInt(val);
- if (isNaN(i)) {
- parent.nav.document.diff.display.value = getAncValue();
- } else {
- scrollToAnc(i);
- }
- return false;
-}
+ i = parseInt(val);
+ if (isNaN(i)) {
+ parent.nav.document.diff.display.value = getAncValue();
+ } else {
+ scrollToAnc(i);
+ }
+ return (false);
+}
EOF
}
@@ -1582,8 +1593,8 @@ function difflines
/^[0-9]*,[0-9]*c$/ {
n=split(substr($1,1,length($1)-1), counts, ",");
if (n != 2) {
- error=2
- exit;
+ error=2
+ exit;
}
#
# 3,5c means lines 3 , 4 and 5 are changed, a total of 3 lines.
@@ -2001,25 +2012,25 @@ function build_old_new_git
if [[ "$PDIR" == "." ]]; then
file="$PF"
else
- file="$PDIR/$PF"
+ file="$PDIR/$PF"
fi
if [[ -n $parent_webrev && -e $PWS/$PDIR/$PF ]]; then
cp $PWS/$PDIR/$PF $olddir/$PDIR/$PF
else
- $GIT ls-tree $GIT_PARENT $file | read o_mode type o_object junk
- $GIT cat-file $type $o_object > $olddir/$file 2>/dev/null
-
- if (( $? != 0 )); then
- rm -f $olddir/$file
- elif [[ -n $o_mode ]]; then
- # Strip the first 3 digits, to get a regular octal mode
- o_mode=${o_mode/???/}
- chmod $o_mode $olddir/$file
- else
- # should never happen
- print -u2 "ERROR: set mode of $olddir/$file"
- fi
+ $GIT ls-tree $GIT_PARENT $file | read o_mode type o_object junk
+ $GIT cat-file $type $o_object > $olddir/$file 2>/dev/null
+
+ if (( $? != 0 )); then
+ rm -f $olddir/$file
+ elif [[ -n $o_mode ]]; then
+ # Strip the first 3 digits, to get a regular octal mode
+ o_mode=${o_mode/???/}
+ chmod $o_mode $olddir/$file
+ else
+ # should never happen
+ print -u2 "ERROR: set mode of $olddir/$file"
+ fi
fi
#
@@ -2033,8 +2044,8 @@ function build_old_new_git
rm -rf $newdir/$file
if [[ -e $CWS/$DIR/$F ]]; then
- cp $CWS/$DIR/$F $newdir/$DIR/$F
- chmod $(get_file_mode $CWS/$DIR/$F) $newdir/$DIR/$F
+ cp $CWS/$DIR/$F $newdir/$DIR/$F
+ chmod $(get_file_mode $CWS/$DIR/$F) $newdir/$DIR/$F
fi
cd $OWD
}
@@ -2329,7 +2340,7 @@ if [[ $SCM_MODE == "mercurial" ]]; then
exit 1
fi
else
- testparent=${CODEMGR_WS}
+ testparent=${CODEMGR_WS}
fi
[[ -z $codemgr_ws && -n $testparent ]] && \
codemgr_ws=$(hg root -R $testparent 2>/dev/null)
@@ -2343,7 +2354,7 @@ elif [[ $SCM_MODE == "git" ]]; then
#
[[ -z $codemgr_ws && -n $CODEMGR_WS ]] && \
codemgr_ws=$($GIT --git-dir=$CODEMGR_WS/.git rev-parse --git-dir \
- 2>/dev/null)
+ 2>/dev/null)
[[ -z $codemgr_ws ]] && \
codemgr_ws=$($GIT rev-parse --git-dir 2>/dev/null)
@@ -2607,13 +2618,16 @@ elif [[ $SCM_MODE == "git" ]]; then
this_branch=$($GIT branch | nawk '$1 == "*" { print $2 }')
par_branch="origin/master"
- # If we're not on a branch there's nothing we can do
- if [[ $this_branch != "(no branch)" ]]; then
- $GIT for-each-ref \
- --format='%(refname:short) %(upstream:short)' refs/heads/ | \
- while read local remote; do \
- [[ "$local" == "$this_branch" ]] && par_branch="$remote"; \
- done
+ # If we're not on a branch there's nothing we can do
+ if [[ $this_branch != "(no branch)" ]]; then
+ $GIT for-each-ref \
+ --format='%(refname:short) %(upstream:short)' \
+ refs/heads/ | \
+ while read local remote; do
+ if [[ "$local" == "$this_branch" ]]; then
+ par_branch="$remote"
+ fi
+ done
fi
if [[ -z $codemgr_parent ]]; then
@@ -2697,26 +2711,28 @@ elif [[ $SCM_MODE == "subversion" ]]; then
flist_from_subversion $CWS $OLDPWD
fi
else
- if [[ $SCM_MODE == "unknown" ]]; then
- print -u2 " Unknown type of SCM in use"
- else
- print -u2 " Unsupported SCM in use: $SCM_MODE"
- fi
+ if [[ $SCM_MODE == "unknown" ]]; then
+ print -u2 " Unknown type of SCM in use"
+ else
+ print -u2 " Unsupported SCM in use: $SCM_MODE"
+ fi
- env_from_flist
+ env_from_flist
- if [[ -z $CODEMGR_WS ]]; then
- print -u2 "SCM not detected/supported and CODEMGR_WS not specified"
- exit 1
- fi
+ if [[ -z $CODEMGR_WS ]]; then
+ print -u2 "SCM not detected/supported and " \
+ "CODEMGR_WS not specified"
+ exit 1
+ fi
- if [[ -z $CODEMGR_PARENT ]]; then
- print -u2 "SCM not detected/supported and CODEMGR_PARENT not specified"
- exit 1
- fi
+ if [[ -z $CODEMGR_PARENT ]]; then
+ print -u2 "SCM not detected/supported and " \
+ "CODEMGR_PARENT not specified"
+ exit 1
+ fi
- CWS=$CODEMGR_WS
- PWS=$CODEMGR_PARENT
+ CWS=$CODEMGR_WS
+ PWS=$CODEMGR_PARENT
fi
#
@@ -2871,7 +2887,12 @@ done
#
# Output directory.
#
-WDIR=${WDIR:-$CWS/webrev}
+if [[ $SCM_MODE == "git" ]]; then
+ ws_top_dir=$(dirname $CWS)
+ WDIR=${WDIR:-$ws_top_dir/webrev}
+else
+ WDIR=${WDIR:-$CWS/webrev}
+fi
#
# Name of the webrev, derived from the workspace name or output directory;
@@ -3068,7 +3089,7 @@ do
F=${P##*/}
- else
+ else
DIR=${P%/*}
if [[ "$DIR" == "$P" ]]; then
DIR="." # File at root of workspace
@@ -3104,8 +3125,22 @@ do
#
OWD=$PWD
cd $WDIR/raw_files
- ofile=old/$PDIR/$PF
- nfile=new/$DIR/$F
+
+ #
+ # The "git apply" command does not tolerate the spurious
+ # "./" that we otherwise insert; be careful not to include
+ # it in the paths that we pass to diff(1).
+ #
+ if [[ $PDIR == "." ]]; then
+ ofile=old/$PF
+ else
+ ofile=old/$PDIR/$PF
+ fi
+ if [[ $DIR == "." ]]; then
+ nfile=new/$F
+ else
+ nfile=new/$DIR/$F
+ fi
mv_but_nodiff=
cmp $ofile $nfile > /dev/null 2>&1
@@ -3120,7 +3155,7 @@ do
# - renames must be handled specially: we emit a 'remove'
# diff and an 'add' diff
# - new files and deleted files must be handled specially
- # - GNU patch doesn't interpret the output of illumos diff
+ # - GNU patch doesn't interpret the output of illumos diff
# properly when it comes to adds and deletes. We need to
# do some "cleansing" transformations:
# [to add a file] @@ -1,0 +X,Y @@ --> @@ -0,0 +X,Y @@
@@ -3566,6 +3601,7 @@ do
fi
if [[ $SCM_MODE == "mercurial" ||
+ $SCM_MODE == "git" ||
$SCM_MODE == "unknown" ]]; then
# Include warnings for important file mode situations:
# 1) New executable files
diff --git a/usr/src/tools/scripts/wsdiff.py b/usr/src/tools/scripts/wsdiff.py
index 27458f43fc..4ae79e889f 100644
--- a/usr/src/tools/scripts/wsdiff.py
+++ b/usr/src/tools/scripts/wsdiff.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python2.6
+#!ON_PYTHON_26
#
# CDDL HEADER START
#
diff --git a/usr/src/cmd/ssh/ssh-http-proxy-connect/Makefile b/usr/src/ucbhead/Makefile
index bfe63cb29a..ef9fc7f2fe 100644
--- a/usr/src/cmd/ssh/ssh-http-proxy-connect/Makefile
+++ b/usr/src/ucbhead/Makefile
@@ -19,47 +19,50 @@
#
# CDDL HEADER END
#
-# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+#
+# Copyright 1989-2003 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
+# ucbhead/Makefile
+#
+# include global definitions
+include ../Makefile.master
-PROG= ssh-http-proxy-connect
-
-DIRS= $(ROOTLIBSSH) $(ROOTLIBSUNSSH)
-
-OBJS= ssh-http-proxy-connect.o
-
-SRCS= $(OBJS:.o=.c)
+LN= ln
-POFILE_DIR= ..
+HDRS= curses.h dbm.h setjmp.h sgtty.h signal.h stdio.h strings.h unistd.h
-.KEEP_STATE:
+SYSHDRS= \
+dir.h fcntl.h file.h ioctl.h \
+param.h resource.h rusage.h signal.h \
+sysmacros.h ttychars.h types.h vfs.h \
+wait.h
-.PARALLEL: $(OBJS)
+ROOTHDRS= $(HDRS:%=$(ROOT)/usr/ucbinclude/%) \
+ $(SYSHDRS:%=$(ROOT)/usr/ucbinclude/sys/%)
-all: $(PROG)
+DIRS= sys
+ROOTDIRS= $(ROOT)/usr/ucbinclude $(DIRS:%=$(ROOT)/usr/ucbinclude/%)
-include ../../Makefile.cmd
-include ../Makefile.ssh-common
+FCNTLH= $(ROOT)/usr/ucbinclude/fcntl.h
+SYSFCNTLH= $(ROOT)/usr/ucbinclude/sys/fcntl.h
-LDLIBS += $(SSH_COMMON_LDLIBS) -lsocket
+INS.FCNTLH= $(RM) $@; $(SYMLINK) sys/fcntl.h $@
-install: all $(DIRS) $(ROOTLIBSSHPROG) $(ROOTLIBSSH)
+# install rules
+$(ROOT)/usr/ucbinclude/sys/%: sys/%
+ $(INS.file)
-$(ROOTLIBSSHPROG)/%: %
+$(ROOT)/usr/ucbinclude/%: %
$(INS.file)
-$(DIRS):
- $(INS.dir)
+.KEEP_STATE:
-$(PROG): $(OBJS) ../libssh/$(MACH)/libssh.a ../libopenbsd-compat/$(MACH)/libopenbsd-compat.a
- $(LINK.c) $(OBJS) -o $@ $(LDLIBS)
- $(POST_PROCESS)
+install_h: $(ROOTDIRS) $(ROOTHDRS) $(FCNTLH)
-clean:
- $(RM) -f $(OBJS) $(PROG)
+$(FCNTLH): $(SYSFCNTLH)
+ $(INS.FCNTLH)
-lint: lint_SRCS
+$(ROOTDIRS):
+ $(INS.dir)
-include ../../Makefile.targ
-include ../Makefile.msg.targ
diff --git a/usr/src/ucbhead/sys/file.h b/usr/src/ucbhead/sys/file.h
index 10863380dd..152141fcd7 100644
--- a/usr/src/ucbhead/sys/file.h
+++ b/usr/src/ucbhead/sys/file.h
@@ -77,6 +77,10 @@ typedef struct file
#include <sys/fcntl.h>
#endif
+#if !defined(__XOPEN_OR_POSIX) || defined(__EXTENSIONS__)
+int flock(int, int);
+#endif
+
/* flags - see also fcntl.h */
#ifndef FOPEN
diff --git a/usr/src/ucblib/libucb/port/sys/flock.c b/usr/src/ucblib/libucb/port/sys/flock.c
index d29a07eac0..364d4f1f5f 100644
--- a/usr/src/ucblib/libucb/port/sys/flock.c
+++ b/usr/src/ucblib/libucb/port/sys/flock.c
@@ -34,9 +34,9 @@
/*LINTLIBRARY*/
+#include <sys/fcntl.h>
#include <sys/types.h>
#include <sys/file.h>
-#include <sys/fcntl.h>
#include <errno.h>
int
diff --git a/usr/src/uts/Makefile.mapfile b/usr/src/uts/Makefile.mapfile
new file mode 100644
index 0000000000..45ab0ae22b
--- /dev/null
+++ b/usr/src/uts/Makefile.mapfile
@@ -0,0 +1,43 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2016 Joyent, Inc.
+#
+
+#
+# This Makefile should be included in individual kernel modules to opt
+# into a -z defs world. Note, this should be included after a MAPFILES
+# entry is defined. See uts/common/README.mapfiles for more information.
+#
+
+#
+# Mapfile base
+#
+MAPBASE = $(UTSBASE)/common/mapfiles
+DTRACE_MAPFILE = $(OBJS_DIR)/dtrace.mapfile
+
+#
+# Always append -z defs to the LD FLAGS and append all mapfiles.
+#
+LDFLAGS += -z defs $(MAPFILES:%=-M $(MAPBASE)/%.mapfile) -M $(DTRACE_MAPFILE)
+
+#
+# Definitions and rulesto assemble the DTrace probe mapfile. There's no
+# good way to automatically do this, hence we have a slightly gross
+# series of automated tools. This does mean that we have a bit more work
+# to do, but also means that probes can be added arbitrarily without
+# having to manually edit mapfiles.
+#
+DTRACE_AWK_FILE = $(MAPBASE)/dtrace.mapfile.awk
+
+$(OBJS_DIR)/dtrace.mapfile: $(OBJECTS) $(DTRACE_AWK_FILE)
+ $(NM) -u $(OBJECTS) | $(AWK) -f $(DTRACE_AWK_FILE) > $@
diff --git a/usr/src/uts/Makefile.targ b/usr/src/uts/Makefile.targ
index 034817dc31..72ca278100 100644
--- a/usr/src/uts/Makefile.targ
+++ b/usr/src/uts/Makefile.targ
@@ -20,6 +20,7 @@
#
#
# Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2015, Joyent, Inc.
#
# This Makefiles contains the common targets and definitions for
# all kernels. It is to be included in the Makefiles for specific
@@ -50,7 +51,7 @@ $(OBJECTS): $(INLINES)
# ELFSIGN_MOD is defined in the individual KCF plug-in modules Makefiles,
# and will sign the ELF objects using elfsign(1).
#
-$(BINARY): $(OBJECTS)
+$(BINARY): $(OBJECTS) $(DTRACE_MAPFILE)
$(LD) -r $(LDFLAGS) -o $@ $(OBJECTS)
$(CTFMERGE_UNIQUIFY_AGAINST_GENUNIX)
$(POST_PROCESS)
@@ -205,6 +206,9 @@ $(ROOT_FONT_DIR)/%: $(OBJS_DIR)/% $(ROOT_MOD_DIR) $(ROOT_FONT_DIR) FRC
$(ROOT_MAC_DIR)/%: $(OBJS_DIR)/% $(ROOT_MOD_DIR) $(ROOT_MAC_DIR) FRC
$(INS.file)
+$(ROOT_OVERLAY_DIR)/%: $(OBJS_DIR)/% $(ROOT_MOD_DIR) $(ROOT_OVERLAY_DIR) FRC
+ $(INS.file)
+
$(USR_DRV_DIR)/%: $(OBJS_DIR)/% $(USR_DRV_DIR) FRC
$(INS.file)
diff --git a/usr/src/uts/Makefile.uts b/usr/src/uts/Makefile.uts
index 6a418b34e7..273949fb74 100644
--- a/usr/src/uts/Makefile.uts
+++ b/usr/src/uts/Makefile.uts
@@ -24,6 +24,7 @@
# Copyright (c) 2011 Bayard G. Bell. All rights reserved.
# Copyright (c) 2011 by Delphix. All rights reserved.
# Copyright (c) 2013 Andrew Stormont. All rights reserved.
+# Copyright 2015 Joyent, Inc.
#
#
@@ -213,6 +214,7 @@ AS_CPPFLAGS = $(ALWAYS_DEFS) $(ALL_DEFS) $(CONFIG_DEFS) $(AS_DEFS) \
# Override the default, the kernel is squeaky clean
CERRWARN = -errtags=yes -errwarn=%all
+$(__GNUC4)CERRWARN += -_gcc=-Wno-address
CERRWARN += -_gcc=-Wno-missing-braces
CERRWARN += -_gcc=-Wno-sign-compare
@@ -412,6 +414,7 @@ ROOT_FONT_DIR_32 = $(ROOT_MOD_DIR)/fonts
ROOT_DACF_DIR_32 = $(ROOT_MOD_DIR)/dacf
ROOT_CRYPTO_DIR_32 = $(ROOT_MOD_DIR)/crypto
ROOT_MAC_DIR_32 = $(ROOT_MOD_DIR)/mac
+ROOT_OVERLAY_DIR_32 = $(ROOT_MOD_DIR)/overlay
ROOT_KICONV_DIR_32 = $(ROOT_MOD_DIR)/kiconv
ROOT_KERN_DIR_64 = $(ROOT_MOD_DIR)/$(SUBDIR64)
@@ -439,6 +442,7 @@ ROOT_FONT_DIR_64 = $(ROOT_MOD_DIR)/fonts/$(SUBDIR64)
ROOT_DACF_DIR_64 = $(ROOT_MOD_DIR)/dacf/$(SUBDIR64)
ROOT_CRYPTO_DIR_64 = $(ROOT_MOD_DIR)/crypto/$(SUBDIR64)
ROOT_MAC_DIR_64 = $(ROOT_MOD_DIR)/mac/$(SUBDIR64)
+ROOT_OVERLAY_DIR_64 = $(ROOT_MOD_DIR)/overlay/$(SUBDIR64)
ROOT_KICONV_DIR_64 = $(ROOT_MOD_DIR)/kiconv/$(SUBDIR64)
ROOT_KERN_DIR = $(ROOT_KERN_DIR_$(CLASS))
@@ -466,6 +470,7 @@ ROOT_FONT_DIR = $(ROOT_FONT_DIR_$(CLASS))
ROOT_DACF_DIR = $(ROOT_DACF_DIR_$(CLASS))
ROOT_CRYPTO_DIR = $(ROOT_CRYPTO_DIR_$(CLASS))
ROOT_MAC_DIR = $(ROOT_MAC_DIR_$(CLASS))
+ROOT_OVERLAY_DIR = $(ROOT_OVERLAY_DIR_$(CLASS))
ROOT_KICONV_DIR = $(ROOT_KICONV_DIR_$(CLASS))
ROOT_MOD_DIRS_32 = $(ROOT_BRAND_DIR_32) $(ROOT_DRV_DIR_32)
@@ -483,6 +488,7 @@ ROOT_MOD_DIRS_32 += $(ROOT_EMLXS_FW_DIR_32)
ROOT_MOD_DIRS_32 += $(ROOT_CPU_DIR_32) $(ROOT_FONT_DIR_32)
ROOT_MOD_DIRS_32 += $(ROOT_TOD_DIR_32) $(ROOT_DACF_DIR_32)
ROOT_MOD_DIRS_32 += $(ROOT_CRYPTO_DIR_32) $(ROOT_MAC_DIR_32)
+ROOT_MOD_DIRS_32 += $(ROOT_OVERLAY_DIR_32)
ROOT_MOD_DIRS_32 += $(ROOT_KICONV_DIR_32)
USR_MOD_DIR = $(ROOT)/usr/kernel
@@ -575,7 +581,7 @@ PARALLEL_KMODS = $(DRV_KMODS) $(EXEC_KMODS) $(FS_KMODS) $(SCHED_KMODS) \
$(MMU_KMODS) $(DACF_KMODS) $(EXPORT_KMODS) $(IPP_KMODS) \
$(CRYPTO_KMODS) $(PCBE_KMODS) \
$(DRV_KMODS_$(CLASS)) $(MISC_KMODS_$(CLASS)) $(MAC_KMODS) \
- $(BRAND_KMODS) $(KICONV_KMODS) \
+ $(BRAND_KMODS) $(KICONV_KMODS) $(OVERLAY_KMODS) \
$(SOCKET_KMODS)
KMODS = $(GENUNIX_KMODS) $(PARALLEL_KMODS)
@@ -629,3 +635,11 @@ PRIVS_DEF = $(SRC)/uts/common/os/priv_defs
#
USBDEVS_AWK = $(SRC)/uts/common/io/usb/usbdevs2h.awk
USBDEVS_DATA = $(SRC)/uts/common/io/usb/usbdevs
+
+
+#
+# If we're using the newer CTF tools, then we need to make sure that we
+# are building with the private -X option to ctfconvert which allows us
+# to fixup the struct cpu to account for machcpu.
+#
+$(ENABLE_NEW_CTF)$(_GNUC)CTFCVTFLAGS += -X
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index e46c461c54..16d89ee737 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -21,10 +21,10 @@
#
# Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright (c) 2012 Joyent, Inc. All rights reserved.
# Copyright (c) 2011, 2014 by Delphix. All rights reserved.
# Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
# Copyright 2015 Nexenta Systems, Inc. All rights reserved.
+# Copyright 2016 Joyent, Inc.
# Copyright 2016 Garrett D'Amore <garrett@damore.org>
#
@@ -282,6 +282,7 @@ GENUNIX_OBJS += \
rctl.o \
rctlsys.o \
readlink.o \
+ refhash.o \
refstr.o \
rename.o \
resolvepath.o \
@@ -303,6 +304,7 @@ GENUNIX_OBJS += \
seg_map.o \
seg_vn.o \
seg_spt.o \
+ seg_umap.o \
semaphore.o \
sendfile.o \
session.o \
@@ -428,6 +430,8 @@ PROFILE_OBJS += profile.o
SYSTRACE_OBJS += systrace.o
+LX_SYSTRACE_OBJS += lx_systrace.o
+
LOCKSTAT_OBJS += lockstat.o
FASTTRAP_OBJS += fasttrap.o fasttrap_isa.o
@@ -492,6 +496,10 @@ PTSL_OBJS += tty_pts.o
PTM_OBJS += ptm.o
+LX_PTM_OBJS += lx_ptm.o
+
+LX_NETLINK_OBJS += lx_netlink.o
+
MII_OBJS += mii.o mii_cicada.o mii_natsemi.o mii_intel.o mii_qualsemi.o \
mii_marvell.o mii_realtek.o mii_other.o
@@ -549,6 +557,7 @@ IP_SCTP_OBJS = sctp.o sctp_opt_data.o sctp_output.o \
sctp_addr.o tn_ipopt.o tnet.o ip_netinfo.o \
sctp_misc.o
IP_ILB_OBJS = ilb.o ilb_nat.o ilb_conn.o ilb_alg_hash.o ilb_alg_rr.o
+IP_COMM_OBJS = inet_hash.o
IP_OBJS += igmp.o ipmp.o ip.o ip6.o ip6_asp.o ip6_if.o ip6_ire.o \
ip6_rts.o ip_if.o ip_ire.o ip_listutils.o ip_mroute.o \
@@ -564,7 +573,8 @@ IP_OBJS += igmp.o ipmp.o ip.o ip6.o ip6_asp.o ip6_if.o ip6_ire.o \
$(IP_TCP_OBJS) \
$(IP_UDP_OBJS) \
$(IP_SCTP_OBJS) \
- $(IP_ILB_OBJS)
+ $(IP_ILB_OBJS) \
+ $(IP_COMM_OBJS)
IP6_OBJS += ip6ddi.o
@@ -582,6 +592,8 @@ IPSECESP_OBJS += ipsecespddi.o ipsecesp.o
IPSECAH_OBJS += ipsecahddi.o ipsecah.o sadb.o
+DATAFILT_OBJS += datafilt.o
+
SPPP_OBJS += sppp.o sppp_dlpi.o sppp_mod.o s_common.o
SPPPTUN_OBJS += sppptun.o sppptun_mod.o
@@ -679,6 +691,15 @@ NET80211_OBJS += net80211.o net80211_proto.o net80211_input.o \
VNIC_OBJS += vnic_ctl.o vnic_dev.o
+OVERLAY_OBJS += overlay.o overlay_fm.o overlay_mux.o overlay_plugin.o \
+ overlay_prop.o overlay_target.o
+
+OVERLAY_VXLAN_OBJS += overlay_vxlan.o
+
+VND_OBJS += vnd.o frameio.o
+
+GSQUEUE_OBJS += gsqueue.o
+
SIMNET_OBJS += simnet.o
IB_OBJS += ibnex.o ibnex_ioctl.o ibnex_hca.o
@@ -997,6 +1018,8 @@ SIGNALFD_OBJS += signalfd.o
I8042_OBJS += i8042.o
+INOTIFY_OBJS += inotify.o
+
KB8042_OBJS += \
at_keyprocess.o \
kb8042.o \
@@ -1071,6 +1094,8 @@ QLGE_OBJS += qlge.o qlge_dbg.o qlge_flash.o qlge_fm.o qlge_gld.o qlge_mpi.o
ZCONS_OBJS += zcons.o
+ZFD_OBJS += zfd.o
+
NV_SATA_OBJS += nv_sata.o
SI3124_OBJS += si3124.o
@@ -1124,8 +1149,7 @@ DEVFS_OBJS += devfs_subr.o devfs_vfsops.o devfs_vnops.o
DEV_OBJS += sdev_subr.o sdev_vfsops.o sdev_vnops.o \
sdev_ptsops.o sdev_zvolops.o sdev_comm.o \
sdev_profile.o sdev_ncache.o sdev_netops.o \
- sdev_ipnetops.o \
- sdev_vtops.o
+ sdev_ipnetops.o sdev_vtops.o sdev_plugin.o
CTFS_OBJS += ctfs_all.o ctfs_cdir.o ctfs_ctl.o ctfs_event.o \
ctfs_latest.o ctfs_root.o ctfs_sym.o ctfs_tdir.o ctfs_tmpl.o
@@ -1142,8 +1166,13 @@ PIPE_OBJS += pipe.o
HSFS_OBJS += hsfs_node.o hsfs_subr.o hsfs_vfsops.o hsfs_vnops.o \
hsfs_susp.o hsfs_rrip.o hsfs_susp_subr.o
+HYPRLOFS_OBJS += hyprlofs_dir.o hyprlofs_subr.o \
+ hyprlofs_vnops.o hyprlofs_vfsops.o
+
LOFS_OBJS += lofs_subr.o lofs_vfsops.o lofs_vnops.o
+LXPROC_OBJS += lxpr_subr.o lxpr_vfsops.o lxpr_vnops.o
+
NAMEFS_OBJS += namevfs.o namevno.o
NFS_OBJS += nfs_client.o nfs_common.o nfs_dump.o \
@@ -1295,8 +1324,8 @@ SMBSRV_OBJS += $(SMBSRV_SHARED_OBJS) \
PCFS_OBJS += pc_alloc.o pc_dir.o pc_node.o pc_subr.o \
pc_vfsops.o pc_vnops.o
-PROC_OBJS += prcontrol.o prioctl.o prsubr.o prusrio.o \
- prvfsops.o prvnops.o
+PROC_OBJS += prargv.o prcontrol.o prioctl.o prsubr.o \
+ prusrio.o prvfsops.o prvnops.o
MNTFS_OBJS += mntvfsops.o mntvnops.o
@@ -1438,6 +1467,7 @@ ZFS_COMMON_OBJS += \
zfs_fuid.o \
zfs_sa.o \
zfs_znode.o \
+ zfs_zone.o \
zil.o \
zio.o \
zio_checksum.o \
@@ -1884,7 +1914,7 @@ ZYD_OBJS += zyd.o zyd_usb.o zyd_hw.o zyd_fw.o
MXFE_OBJS += mxfe.o
-MPTSAS_OBJS += mptsas.o mptsas_hash.o mptsas_impl.o mptsas_init.o \
+MPTSAS_OBJS += mptsas.o mptsas_impl.o mptsas_init.o \
mptsas_raid.o mptsas_smhba.o
SFE_OBJS += sfe.o sfe_util.o
@@ -2005,6 +2035,15 @@ IXGBE_OBJS = ixgbe_82598.o ixgbe_82599.o ixgbe_api.o \
ixgbe_tx.o ixgbe_x540.o ixgbe_mbx.o
#
+# Intel 40GbE PCIe NIC driver module
+#
+
+# illumos-written ones.
+I40E_OBJS = i40e_main.o i40e_osdep.o i40e_intr.o i40e_transceiver.o \
+ i40e_stats.o i40e_gld.o
+# Intel-written ones.
+I40E_INTC_OBJS = i40e_adminq.o i40e_common.o i40e_hmc.o i40e_lan_hmc.o \
+ i40e_nvm.o
# Solarflare 1/10/40GbE NIC driver module
#
# NB: The illumos specific sources are listed first, with the
@@ -2103,6 +2142,11 @@ MEGA_SAS_OBJS = megaraid_sas.o
MR_SAS_OBJS = ld_pd_map.o mr_sas.o mr_sas_tbolt.o mr_sas_list.o
#
+# DR_SAS module
+#
+DR_SAS_OBJS = dr_sas.o
+
+#
# CPQARY3 module
#
CPQARY3_OBJS = cpqary3.o cpqary3_noe.o cpqary3_talk2ctlr.o \
@@ -2141,6 +2185,20 @@ NULLDRIVER_OBJS = nulldriver.o
TPM_OBJS = tpm.o tpm_hcall.o
#
+# USB Fast ethernet drivers
+#
+USBGEM_OBJS = usbgem.o
+AXF_OBJS = axf_usbgem.o
+UDMF_OBJS = udmf_usbgem.o
+URF_OBJS = urf_usbgem.o
+UPF_OBJS = upf_usbgem.o
+
+#
+# NFP objects
+#
+NFP_OBJS = hostif.o osif.o drvlist.o i21555.o i21285.o i21555d.o
+
+#
# BNXE objects
#
BNXE_OBJS += bnxe_cfg.o \
diff --git a/usr/src/uts/common/Makefile.rules b/usr/src/uts/common/Makefile.rules
index fdbe9717f3..a80ec6293f 100644
--- a/usr/src/uts/common/Makefile.rules
+++ b/usr/src/uts/common/Makefile.rules
@@ -23,6 +23,7 @@
# Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright 2016 Garrett D'Amore <garrett@damore.org>
# Copyright 2015 Nexenta Systems, Inc. All rights reserved.
+# Copyright 2016 Joyent, Inc.
# Copyright 2013 Saso Kiselkov. All rights reserved.
#
@@ -96,6 +97,10 @@ $(OBJS_DIR)/%.o: $(COMMONBASE)/avl/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(COMMONBASE)/inet/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(COMMONBASE)/ucode/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -244,10 +249,18 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/fs/hsfs/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/fs/hyprlofs/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/fs/lofs/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/fs/lxproc/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/fs/mntfs/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -759,6 +772,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/io/drm/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/dr_sas/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/efe/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -987,6 +1004,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/io/net80211/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/nfp/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/nge/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -1006,6 +1027,14 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/io/nxge/npi/%.c
$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/nxge/%.s
$(COMPILE.s) -o $@ $<
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/overlay/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/overlay/plugins/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/pci-ide/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -1142,6 +1171,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/io/sdcard/targets/sdcard/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/gsqueue/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/sfe/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -1154,6 +1187,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/io/softmac/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/vnd/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/uath/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -1278,6 +1315,30 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/io/usb/usba10/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/usbgem/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/axf/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/udf/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/udmf/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/upf/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/urf/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/vuidmice/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -1358,6 +1419,14 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/io/ixgbe/core/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/i40e/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/i40e/core/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/ntxn/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -1462,9 +1531,14 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/io/vioblk/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(COMMONBASE)/idspace/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/io/vioif/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+
#
# krtld must refer to its own bzero/bcopy until the kernel is fully linked
#
@@ -1533,6 +1607,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/common/pcmcia/pcs/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/refhash/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/rpc/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -1656,6 +1734,9 @@ $(LINTS_DIR)/%.ln: $(COMMONBASE)/acl/%.c
$(LINTS_DIR)/%.ln: $(COMMONBASE)/avl/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(COMMONBASE)/inet/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(COMMONBASE)/ucode/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
@@ -1791,9 +1872,15 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/fs/fifofs/%.c
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/fs/hsfs/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/fs/hyprlofs/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/fs/lofs/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/fs/lxproc/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/fs/mntfs/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
@@ -2139,6 +2226,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/dmfe/%.c
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/drm/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/dr_sas/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/efe/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
@@ -2310,6 +2400,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/mwl/mwl_fw/%.c
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/net80211/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/nfp/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/nge/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
@@ -2325,6 +2418,12 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/nxge/%.s
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/nxge/npi/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/overlay/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/overlay/plugins/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/pci-ide/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
@@ -2532,6 +2631,21 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/usb/usba/%.c
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/usb/usba10/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/usbgem/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/axf/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/udmf/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/upf/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/urf/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/vuidmice/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
@@ -2592,6 +2706,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/ixgbe/%.c
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/ixgbe/core/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/i40e/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/ntxn/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
@@ -2682,6 +2799,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/pcmcia/nexus/%.c
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/pcmcia/pcs/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/refhash/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/rpc/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
@@ -2775,3 +2895,6 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/common/io/skd/%.c
$(LINTS_DIR)/%.ln: $(COMMONBASE)/fsreparse/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+
+$(LINTS_DIR)/%.ln: $(COMMONBASE)/idspace/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/common/brand/lx/autofs/lx_autofs.c b/usr/src/uts/common/brand/lx/autofs/lx_autofs.c
new file mode 100644
index 0000000000..c55fc6d95f
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/autofs/lx_autofs.c
@@ -0,0 +1,3152 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
+ */
+
+/*
+ * See the big theory statement in ../sys/lx_autofs.h
+ */
+
+#include <fs/fs_subr.h>
+#include <sys/stat.h>
+#include <sys/atomic.h>
+#include <sys/cmn_err.h>
+#include <sys/dirent.h>
+#include <sys/fs/fifonode.h>
+#include <sys/modctl.h>
+#include <sys/mount.h>
+#include <sys/policy.h>
+#include <sys/sunddi.h>
+#include <sys/conf.h>
+#include <sys/sdt.h>
+
+#include <sys/sysmacros.h>
+#include <sys/vfs.h>
+#include <sys/vfs_opreg.h>
+
+#include <sys/dnlc.h>
+#include <nfs/rnode.h>
+#include <nfs/rnode4.h>
+#include <sys/lx_autofs_impl.h>
+#include <sys/lx_types.h>
+
+/*
+ * External functions
+ */
+extern uintptr_t space_fetch(char *key);
+extern int space_store(char *key, uintptr_t ptr);
+extern int umount2_engine(vfs_t *, int, cred_t *, int);
+
+/*
+ * Globals
+ */
+static vfsops_t *lx_autofs_vfsops;
+static vnodeops_t *lx_autofs_vn_ops = NULL;
+static int lx_autofs_fstype;
+static major_t lx_autofs_major;
+static minor_t lx_autofs_minor = 0;
+static dev_info_t *lx_autofs_dip = NULL;
+
+#define LX_AUTOFS_DEV_VERSION_MAJOR 1
+#define LX_AUTOFS_DEV_VERSION_MINOR 0
+
+/* The Linux autofs superblock magic number */
+#define LX_AUTOFS_SB_MAGIC 0x0187
+
+/* Linux autofs mount types */
+#define LX_AUTOFS_TYPE_INDIRECT 1
+#define LX_AUTOFS_TYPE_DIRECT 2
+#define LX_AUTOFS_TYPE_OFFSET 4
+
+/* Structure passed for autofs dev ioctls */
+typedef struct lx_autofs_dv_ioctl {
+ uint32_t lad_ver_major;
+ uint32_t lad_ver_minor;
+ uint32_t lad_size;
+ uint32_t lad_ioctlfd;
+ uint32_t lad_arg1;
+ uint32_t lad_arg2;
+ char lad_path[0];
+} lx_autofs_dv_ioctl_t;
+
+/*
+ * Support functions
+ */
+static void
+lx_autofs_strfree(char *str)
+{
+ kmem_free(str, strlen(str) + 1);
+}
+
+static char *
+lx_autofs_strdup(char *str)
+{
+ int n = strlen(str);
+ char *ptr = kmem_alloc(n + 1, KM_SLEEP);
+ bcopy(str, ptr, n + 1);
+ return (ptr);
+}
+
+static int
+lx_autofs_str_to_int(char *str, int *val)
+{
+ long res;
+
+ if (str == NULL)
+ return (-1);
+
+ if ((ddi_strtol(str, NULL, 10, &res) != 0) ||
+ (res < INT_MIN) || (res > INT_MAX))
+ return (-1);
+
+ *val = res;
+ return (0);
+}
+
+static void
+ls_autofs_stack_init(list_t *lp)
+{
+ list_create(lp,
+ sizeof (stack_elem_t), offsetof(stack_elem_t, se_list));
+}
+
+static void
+lx_autofs_stack_fini(list_t *lp)
+{
+ ASSERT(list_head(lp) == NULL);
+ list_destroy(lp);
+}
+
+static void
+lx_autofs_stack_push(list_t *lp, caddr_t ptr1, caddr_t ptr2, caddr_t ptr3)
+{
+ stack_elem_t *se;
+
+ se = kmem_alloc(sizeof (*se), KM_SLEEP);
+ se->se_ptr1 = ptr1;
+ se->se_ptr2 = ptr2;
+ se->se_ptr3 = ptr3;
+ list_insert_head(lp, se);
+}
+
+static int
+lx_autofs_stack_pop(list_t *lp, caddr_t *ptr1, caddr_t *ptr2, caddr_t *ptr3)
+{
+ stack_elem_t *se;
+
+ if ((se = list_head(lp)) == NULL)
+ return (-1);
+ list_remove(lp, se);
+ if (ptr1 != NULL)
+ *ptr1 = se->se_ptr1;
+ if (ptr2 != NULL)
+ *ptr2 = se->se_ptr2;
+ if (ptr3 != NULL)
+ *ptr3 = se->se_ptr3;
+ kmem_free(se, sizeof (*se));
+ return (0);
+}
+
+static vnode_t *
+lx_autofs_fifo_peer_vp(vnode_t *vp)
+{
+ fifonode_t *fnp = VTOF(vp);
+ fifonode_t *fn_dest = fnp->fn_dest;
+ return (FTOV(fn_dest));
+}
+
+static vnode_t *
+lx_autofs_vn_alloc(vfs_t *vfsp, vnode_t *uvp)
+{
+ lx_autofs_vfs_t *data = (lx_autofs_vfs_t *)vfsp->vfs_data;
+ vnode_t *vp, *vp_old;
+
+ /* Allocate a new vnode structure in case we need it. */
+ vp = vn_alloc(KM_SLEEP);
+ vn_setops(vp, lx_autofs_vn_ops);
+ VN_SET_VFS_TYPE_DEV(vp, vfsp, uvp->v_type, uvp->v_rdev);
+ vp->v_data = uvp;
+ ASSERT(vp->v_count == 1);
+
+ /*
+ * Take a hold on the vfs structure. This is how unmount will
+ * determine if there are any active vnodes in the file system.
+ */
+ VFS_HOLD(vfsp);
+
+ /*
+ * Check if we already have a vnode allocated for this underlying
+ * vnode_t.
+ */
+ mutex_enter(&data->lav_lock);
+ if (mod_hash_find(data->lav_vn_hash,
+ (mod_hash_key_t)uvp, (mod_hash_val_t *)&vp_old) != 0) {
+
+ /*
+ * Didn't find an existing node.
+ * Add this node to the hash and return.
+ */
+ VERIFY(mod_hash_insert(data->lav_vn_hash,
+ (mod_hash_key_t)uvp,
+ (mod_hash_val_t)vp) == 0);
+ mutex_exit(&data->lav_lock);
+ return (vp);
+ }
+
+ /* Get a hold on the existing vnode and free up the one we allocated. */
+ VN_HOLD(vp_old);
+ mutex_exit(&data->lav_lock);
+
+ /* Free up the new vnode we allocated. */
+ VN_RELE(uvp);
+ VFS_RELE(vfsp);
+ vn_invalid(vp);
+ vn_free(vp);
+
+ return (vp_old);
+}
+
+static void
+lx_autofs_vn_free(vnode_t *vp)
+{
+ vfs_t *vfsp = vp->v_vfsp;
+ lx_autofs_vfs_t *data = (lx_autofs_vfs_t *)vfsp->vfs_data;
+ vnode_t *uvp = vp->v_data;
+ vnode_t *vp_tmp;
+
+ ASSERT(MUTEX_HELD((&data->lav_lock)));
+ ASSERT(MUTEX_HELD((&vp->v_lock)));
+
+ ASSERT(vp->v_count == 0);
+
+ /* We're about to free this vnode so take it out of the hash. */
+ (void) mod_hash_remove(data->lav_vn_hash,
+ (mod_hash_key_t)uvp, (mod_hash_val_t)&vp_tmp);
+
+ /*
+ * No one else can lookup this vnode any more so there's no need
+ * to hold locks.
+ */
+ mutex_exit(&data->lav_lock);
+ mutex_exit(&vp->v_lock);
+
+ /* Release the underlying vnode. */
+ VN_RELE(uvp);
+ VFS_RELE(vfsp);
+ vn_invalid(vp);
+ vn_free(vp);
+}
+
+static lx_autofs_automnt_req_t *
+lx_autofs_la_alloc(lx_autofs_vfs_t *data, boolean_t *is_dup, boolean_t expire,
+ char *nm)
+{
+ lx_autofs_automnt_req_t *laar, *laar_dup;
+
+ /* Pre-allocate a new automounter request before grabbing locks. */
+ laar = kmem_zalloc(sizeof (*laar), KM_SLEEP);
+ mutex_init(&laar->laar_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&laar->laar_cv, NULL, CV_DEFAULT, NULL);
+ laar->laar_ref = 1;
+
+ if (data->lav_min_proto == 5) {
+ laar->laar_pkt.lap_protover = LX_AUTOFS_PROTO_VERS5;
+
+ if (data->lav_mnttype == LXAMT_INDIR) {
+ if (expire) {
+ laar->laar_pkt.lap_type =
+ LX_AUTOFS_PTYPE_EXPIRE_INDIR;
+ } else {
+ laar->laar_pkt.lap_type =
+ LX_AUTOFS_PTYPE_MISSING_INDIR;
+ }
+ } else {
+ if (expire) {
+ laar->laar_pkt.lap_type =
+ LX_AUTOFS_PTYPE_EXPIRE_DIRECT;
+ } else {
+ laar->laar_pkt.lap_type =
+ LX_AUTOFS_PTYPE_MISSING_DIRECT;
+ }
+ }
+ laar->laar_pkt_size = sizeof (lx_autofs_v5_pkt_t);
+
+ laar->laar_pkt.lap_v5.lap_dev = data->lav_dev;
+ laar->laar_pkt.lap_v5.lap_ino = data->lav_ino;
+ /*
+ * Note that we're currently not filling in the other v5 pkt
+ * fields (pid, uid, etc.) since they don't appear to be used
+ * by the automounter. We can fill those in later if it proves
+ * necessary.
+ */
+
+ /*
+ * For indirect mounts the token expected by the automounter is
+ * the name of the directory entry to look up (not the entire
+ * path that is being accessed.) For direct mounts the Linux
+ * kernel passes a dummy name, so this is just as good.
+ */
+ laar->laar_pkt.lap_v5.lap_name_len = strlen(nm);
+ if (laar->laar_pkt.lap_v5.lap_name_len >
+ (sizeof (laar->laar_pkt.lap_v5.lap_name) - 1)) {
+ zcmn_err(getzoneid(), CE_NOTE,
+ "invalid autofs automnt req: \"%s\"", nm);
+ kmem_free(laar, sizeof (*laar));
+ return (NULL);
+ }
+ (void) strlcpy(laar->laar_pkt.lap_v5.lap_name, nm,
+ sizeof (laar->laar_pkt.lap_v5.lap_name));
+
+ } else if (expire) {
+ zcmn_err(getzoneid(), CE_WARN,
+ "unsupported expire protocol request: \"%s\"", nm);
+ kmem_free(laar, sizeof (*laar));
+ return (NULL);
+
+ } else {
+ ASSERT(expire == B_FALSE);
+
+ /* Older protocol pkt (really v2) */
+ laar->laar_pkt.lap_protover = LX_AUTOFS_PROTO_VERS2;
+ laar->laar_pkt.lap_type = LX_AUTOFS_PTYPE_MISSING;
+ laar->laar_pkt_size = sizeof (lx_autofs_v2_pkt_t);
+
+ /*
+ * The token expected by the linux automount is the name of
+ * the directory entry to look up. (And not the entire
+ * path that is being accessed.)
+ */
+ laar->laar_pkt.lap_v2.lap_name_len = strlen(nm);
+ if (laar->laar_pkt.lap_v2.lap_name_len >
+ (sizeof (laar->laar_pkt.lap_v2.lap_name) - 1)) {
+ zcmn_err(getzoneid(), CE_NOTE,
+ "invalid autofs lookup: \"%s\"", nm);
+ kmem_free(laar, sizeof (*laar));
+ return (NULL);
+ }
+ (void) strlcpy(laar->laar_pkt.lap_v2.lap_name, nm,
+ sizeof (laar->laar_pkt.lap_v2.lap_name));
+ }
+
+ /* Assign a unique id for this request. */
+ laar->laar_pkt.lap_id = id_alloc(data->lav_ids);
+
+ /* Check for an outstanding request for this path. */
+ mutex_enter(&data->lav_lock);
+ if (mod_hash_find(data->lav_path_hash,
+ (mod_hash_key_t)nm, (mod_hash_val_t *)&laar_dup) == 0) {
+ /*
+ * There's already an outstanding request for this
+ * path so we don't need a new one.
+ */
+ id_free(data->lav_ids, laar->laar_pkt.lap_id);
+ kmem_free(laar, sizeof (*laar));
+ laar = laar_dup;
+
+ /* Bump the ref count on the old request. */
+ atomic_add_int(&laar->laar_ref, 1);
+
+ *is_dup = 1;
+ } else {
+ /* Add it to the hashes. */
+ VERIFY(mod_hash_insert(data->lav_id_hash,
+ (mod_hash_key_t)(uintptr_t)laar->laar_pkt.lap_id,
+ (mod_hash_val_t)laar) == 0);
+ VERIFY(mod_hash_insert(data->lav_path_hash,
+ (mod_hash_key_t)lx_autofs_strdup(nm),
+ (mod_hash_val_t)laar) == 0);
+
+ *is_dup = 0;
+ }
+ mutex_exit(&data->lav_lock);
+
+ return (laar);
+}
+
+static lx_autofs_automnt_req_t *
+lx_autofs_la_find(lx_autofs_vfs_t *data, int id)
+{
+ lx_autofs_automnt_req_t *laar;
+
+ /* Check for an outstanding request for this id. */
+ mutex_enter(&data->lav_lock);
+ if (mod_hash_find(data->lav_id_hash, (mod_hash_key_t)(uintptr_t)id,
+ (mod_hash_val_t *)&laar) != 0) {
+ mutex_exit(&data->lav_lock);
+ return (NULL);
+ }
+ atomic_add_int(&laar->laar_ref, 1);
+ mutex_exit(&data->lav_lock);
+ return (laar);
+}
+
+static void
+lx_autofs_la_complete(lx_autofs_vfs_t *data, lx_autofs_automnt_req_t *laar)
+{
+ lx_autofs_automnt_req_t *laar_tmp;
+
+ /* Remove this request from the hashes so no one can look it up. */
+ mutex_enter(&data->lav_lock);
+ (void) mod_hash_remove(data->lav_id_hash,
+ (mod_hash_key_t)(uintptr_t)laar->laar_pkt.lap_id,
+ (mod_hash_val_t)&laar_tmp);
+ if (data->lav_min_proto == 5) {
+ (void) mod_hash_remove(data->lav_path_hash,
+ (mod_hash_key_t)laar->laar_pkt.lap_v5.lap_name,
+ (mod_hash_val_t)&laar_tmp);
+ } else {
+ (void) mod_hash_remove(data->lav_path_hash,
+ (mod_hash_key_t)laar->laar_pkt.lap_v2.lap_name,
+ (mod_hash_val_t)&laar_tmp);
+ }
+ mutex_exit(&data->lav_lock);
+
+ /* Mark this requst as complete and wakeup anyone waiting on it. */
+ mutex_enter(&laar->laar_lock);
+ laar->laar_complete = 1;
+ cv_broadcast(&laar->laar_cv);
+ mutex_exit(&laar->laar_lock);
+}
+
+static void
+lx_autofs_la_release(lx_autofs_vfs_t *data, lx_autofs_automnt_req_t *laar)
+{
+ ASSERT(!MUTEX_HELD(&laar->laar_lock));
+ if (atomic_add_int_nv(&laar->laar_ref, -1) > 0)
+ return;
+ ASSERT(laar->laar_ref == 0);
+ id_free(data->lav_ids, laar->laar_pkt.lap_id);
+ kmem_free(laar, sizeof (*laar));
+}
+
+static void
+lx_autofs_la_abort(lx_autofs_vfs_t *data, lx_autofs_automnt_req_t *laar)
+{
+ lx_autofs_automnt_req_t *laar_tmp;
+
+ /*
+ * This is a little tricky. We're aborting the wait for this
+ * request. So if anyone else is waiting for this request we
+ * can't free it, but if no one else is waiting for the request
+ * we should free it.
+ */
+ mutex_enter(&data->lav_lock);
+ if (atomic_add_int_nv(&laar->laar_ref, -1) > 0) {
+ mutex_exit(&data->lav_lock);
+ return;
+ }
+ ASSERT(laar->laar_ref == 0);
+
+ /* Remove this request from the hashes so no one can look it up. */
+ (void) mod_hash_remove(data->lav_id_hash,
+ (mod_hash_key_t)(uintptr_t)laar->laar_pkt.lap_id,
+ (mod_hash_val_t)&laar_tmp);
+ if (data->lav_min_proto == 5) {
+ (void) mod_hash_remove(data->lav_path_hash,
+ (mod_hash_key_t)laar->laar_pkt.lap_v5.lap_name,
+ (mod_hash_val_t)&laar_tmp);
+ } else {
+ (void) mod_hash_remove(data->lav_path_hash,
+ (mod_hash_key_t)laar->laar_pkt.lap_v2.lap_name,
+ (mod_hash_val_t)&laar_tmp);
+ }
+ mutex_exit(&data->lav_lock);
+
+ /* It's ok to free this now because the ref count was zero. */
+ id_free(data->lav_ids, laar->laar_pkt.lap_id);
+ kmem_free(laar, sizeof (*laar));
+}
+
+static int
+lx_autofs_fifo_lookup(pid_t pgrp, int fd, file_t **fpp_wr, file_t **fpp_rd)
+{
+ proc_t *prp;
+ uf_info_t *fip;
+ uf_entry_t *ufp_wr, *ufp_rd = NULL;
+ file_t *fp_wr, *fp_rd = NULL;
+ vnode_t *vp_wr, *vp_rd;
+ int i;
+
+ /*
+ * sprlock() is zone aware, so assuming this mount call was
+ * initiated by a process in a zone, if it tries to specify
+ * a pgrp outside of it's zone this call will fail.
+ *
+ * Also, we want to grab hold of the main automounter process
+ * and its going to be the group leader for pgrp, so its
+ * pid will be equal to pgrp.
+ */
+ prp = sprlock(pgrp);
+ if (prp == NULL)
+ return (-1);
+ mutex_exit(&prp->p_lock);
+
+ /* Now we want to access the processes open file descriptors. */
+ fip = P_FINFO(prp);
+ mutex_enter(&fip->fi_lock);
+
+ /* Sanity check fifo write fd. */
+ if (fd >= fip->fi_nfiles) {
+ mutex_exit(&fip->fi_lock);
+ mutex_enter(&prp->p_lock);
+ sprunlock(prp);
+ return (-1);
+ }
+
+ /* Get a pointer to the write fifo. */
+ UF_ENTER(ufp_wr, fip, fd);
+ if (((fp_wr = ufp_wr->uf_file) == NULL) ||
+ ((vp_wr = fp_wr->f_vnode) == NULL) || (vp_wr->v_type != VFIFO)) {
+ /* Invalid fifo fd. */
+ UF_EXIT(ufp_wr);
+ mutex_exit(&fip->fi_lock);
+ mutex_enter(&prp->p_lock);
+ sprunlock(prp);
+ return (-1);
+ }
+
+ /*
+ * Now we need to find the read end of the fifo (for reasons
+ * explained below.) We assume that the read end of the fifo
+ * is in the same process as the write end.
+ */
+ vp_rd = lx_autofs_fifo_peer_vp(fp_wr->f_vnode);
+ for (i = 0; i < fip->fi_nfiles; i++) {
+ if (i == fd)
+ continue;
+ UF_ENTER(ufp_rd, fip, i);
+ if (((fp_rd = ufp_rd->uf_file) != NULL) &&
+ (fp_rd->f_vnode == vp_rd))
+ break;
+ UF_EXIT(ufp_rd);
+ }
+ if (i == fip->fi_nfiles) {
+ /* Didn't find it. */
+ UF_EXIT(ufp_wr);
+ mutex_exit(&fip->fi_lock);
+ mutex_enter(&prp->p_lock);
+ sprunlock(prp);
+ return (-1);
+ }
+
+ /*
+ * We need to drop fi_lock before we can try to acquire f_tlock
+ * the good news is that the file pointers are protected because
+ * we're still holding uf_lock.
+ */
+ mutex_exit(&fip->fi_lock);
+
+ /*
+ * Here we bump the open counts on the fifos. The reason
+ * that we do this is because when we go to write to the
+ * fifo we want to ensure that they are actually open (and
+ * not in the process of being closed) without having to
+ * stop the automounter. (If the write end of the fifo
+ * were closed and we tried to write to it we would panic.
+ * If the read end of the fifo was closed and we tried to
+ * write to the other end, the process that invoked the
+ * lookup operation would get an unexpected SIGPIPE.)
+ */
+ mutex_enter(&fp_wr->f_tlock);
+ fp_wr->f_count++;
+ ASSERT(fp_wr->f_count >= 2);
+ mutex_exit(&fp_wr->f_tlock);
+
+ mutex_enter(&fp_rd->f_tlock);
+ fp_rd->f_count++;
+ ASSERT(fp_rd->f_count >= 2);
+ mutex_exit(&fp_rd->f_tlock);
+
+ /* Release all our locks. */
+ UF_EXIT(ufp_wr);
+ UF_EXIT(ufp_rd);
+ mutex_enter(&prp->p_lock);
+ sprunlock(prp);
+
+ /* Return the file pointers. */
+ *fpp_rd = fp_rd;
+ *fpp_wr = fp_wr;
+ return (0);
+}
+
+static uint_t
+/*ARGSUSED*/
+lx_autofs_fifo_close_cb(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
+{
+ int *id = (int *)arg;
+ /* Return the key and terminate the walk. */
+ *id = (uintptr_t)key;
+ return (MH_WALK_TERMINATE);
+}
+
+static void
+lx_autofs_fifo_close(lx_autofs_vfs_t *data)
+{
+ /*
+ * Close the fifo to prevent any future requests from
+ * getting sent to the automounter.
+ */
+ mutex_enter(&data->lav_lock);
+ if (data->lav_fifo_wr != NULL) {
+ (void) closef(data->lav_fifo_wr);
+ data->lav_fifo_wr = NULL;
+ }
+ if (data->lav_fifo_rd != NULL) {
+ (void) closef(data->lav_fifo_rd);
+ data->lav_fifo_rd = NULL;
+ }
+ mutex_exit(&data->lav_lock);
+
+ /*
+ * Wakeup any threads currently waiting for the automounter
+ * note that it's possible for multiple threads to have entered
+ * this function and to be doing the work below simultaneously.
+ */
+ for (;;) {
+ lx_autofs_automnt_req_t *laar;
+ int id;
+
+ /* Lookup the first entry in the hash. */
+ id = -1;
+ mod_hash_walk(data->lav_id_hash,
+ lx_autofs_fifo_close_cb, &id);
+ if (id == -1) {
+ /* No more id's in the hash. */
+ break;
+ }
+ if ((laar = lx_autofs_la_find(data, id)) == NULL) {
+ /* Someone else beat us to it. */
+ continue;
+ }
+
+ /* Mark the request as complete and release it. */
+ lx_autofs_la_complete(data, laar);
+ lx_autofs_la_release(data, laar);
+ }
+}
+
+static int
+lx_autofs_fifo_verify_rd(lx_autofs_vfs_t *data)
+{
+ proc_t *prp;
+ uf_info_t *fip;
+ uf_entry_t *ufp_rd = NULL;
+ file_t *fp_rd = NULL;
+ vnode_t *vp_rd;
+ int i;
+
+ ASSERT(MUTEX_HELD((&data->lav_lock)));
+
+ /* Check if we've already been shut down. */
+ if (data->lav_fifo_wr == NULL) {
+ ASSERT(data->lav_fifo_rd == NULL);
+ return (-1);
+ }
+ vp_rd = lx_autofs_fifo_peer_vp(data->lav_fifo_wr->f_vnode);
+
+ /*
+ * sprlock() is zone aware, so assuming this mount call was
+ * initiated by a process in a zone, if it tries to specify
+ * a pgrp outside of it's zone this call will fail.
+ *
+ * Also, we want to grab hold of the main automounter process
+ * and its going to be the group leader for pgrp, so its
+ * pid will be equal to pgrp.
+ */
+ prp = sprlock(data->lav_pgrp);
+ if (prp == NULL)
+ return (-1);
+ mutex_exit(&prp->p_lock);
+
+ /* Now we want to access the processes open file descriptors. */
+ fip = P_FINFO(prp);
+ mutex_enter(&fip->fi_lock);
+
+ /*
+ * Now we need to find the read end of the fifo (for reasons
+ * explained below.) We assume that the read end of the fifo
+ * is in the same process as the write end.
+ */
+ for (i = 0; i < fip->fi_nfiles; i++) {
+ UF_ENTER(ufp_rd, fip, i);
+ if (((fp_rd = ufp_rd->uf_file) != NULL) &&
+ (fp_rd->f_vnode == vp_rd))
+ break;
+ UF_EXIT(ufp_rd);
+ }
+ if (i == fip->fi_nfiles) {
+ /* Didn't find it. */
+ mutex_exit(&fip->fi_lock);
+ mutex_enter(&prp->p_lock);
+ sprunlock(prp);
+ return (-1);
+ }
+
+ /*
+ * Seems the automounter still has the read end of the fifo
+ * open, we're done here. Release all our locks and exit.
+ */
+ mutex_exit(&fip->fi_lock);
+ UF_EXIT(ufp_rd);
+ mutex_enter(&prp->p_lock);
+ sprunlock(prp);
+
+ return (0);
+}
+
+static int
+lx_autofs_fifo_write(lx_autofs_vfs_t *data, lx_autofs_automnt_req_t *laarp)
+{
+ struct uio uio;
+ struct iovec iov;
+ file_t *fp_wr, *fp_rd;
+ int error;
+
+ /*
+ * The catch here is we need to make sure _we_ don't close
+ * the the fifo while writing to it. (Another thread could come
+ * along and realize the automounter process is gone and close
+ * the fifo. To do this we bump the open count before we
+ * write to the fifo.
+ */
+ mutex_enter(&data->lav_lock);
+ if (data->lav_fifo_wr == NULL) {
+ ASSERT(data->lav_fifo_rd == NULL);
+ mutex_exit(&data->lav_lock);
+ return (ENOENT);
+ }
+ fp_wr = data->lav_fifo_wr;
+ fp_rd = data->lav_fifo_rd;
+
+ /* Bump the open count on the write fifo. */
+ mutex_enter(&fp_wr->f_tlock);
+ fp_wr->f_count++;
+ mutex_exit(&fp_wr->f_tlock);
+
+ /* Bump the open count on the read fifo. */
+ mutex_enter(&fp_rd->f_tlock);
+ fp_rd->f_count++;
+ mutex_exit(&fp_rd->f_tlock);
+
+ mutex_exit(&data->lav_lock);
+
+ iov.iov_base = (caddr_t)&laarp->laar_pkt;
+ iov.iov_len = laarp->laar_pkt_size;
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ uio.uio_loffset = 0;
+ uio.uio_segflg = (short)UIO_SYSSPACE;
+ uio.uio_resid = laarp->laar_pkt_size;
+ uio.uio_llimit = 0;
+ uio.uio_fmode = FWRITE | FNDELAY | FNONBLOCK;
+
+ error = VOP_WRITE(fp_wr->f_vnode, &uio, 0, kcred, NULL);
+ (void) closef(fp_wr);
+ (void) closef(fp_rd);
+
+ /*
+ * After every write we verify that the automounter still has
+ * these files open.
+ */
+ mutex_enter(&data->lav_lock);
+ if (lx_autofs_fifo_verify_rd(data) != 0) {
+ /*
+ * Something happened to the automounter.
+ * Close down the communication pipe we setup.
+ */
+ mutex_exit(&data->lav_lock);
+ lx_autofs_fifo_close(data);
+ if (error != 0)
+ return (error);
+ return (ENOENT);
+ }
+ mutex_exit(&data->lav_lock);
+
+ return (error);
+}
+
+static int
+lx_autofs_bs_readdir(vnode_t *dvp, list_t *dir_stack, list_t *file_stack)
+{
+ struct iovec iov;
+ struct uio uio;
+ dirent64_t *dp, *dbuf;
+ vnode_t *vp;
+ size_t dlen, dbuflen;
+ int eof, error, ndirents = 64;
+ char *nm;
+
+ dlen = ndirents * (sizeof (*dbuf));
+ dbuf = kmem_alloc(dlen, KM_SLEEP);
+
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_fmode = 0;
+ uio.uio_extflg = UIO_COPY_CACHED;
+ uio.uio_loffset = 0;
+ uio.uio_llimit = MAXOFFSET_T;
+
+ eof = 0;
+ error = 0;
+ while (!error && !eof) {
+ uio.uio_resid = dlen;
+ iov.iov_base = (char *)dbuf;
+ iov.iov_len = dlen;
+
+ (void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL);
+ if (VOP_READDIR(dvp, &uio, kcred, &eof, NULL, 0) != 0) {
+ VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL);
+ kmem_free(dbuf, dlen);
+ return (-1);
+ }
+ VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL);
+
+ if ((dbuflen = dlen - uio.uio_resid) == 0) {
+ /* We're done. */
+ break;
+ }
+
+ for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
+ dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
+
+ nm = dp->d_name;
+
+ if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
+ continue;
+
+ if (VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, kcred,
+ NULL, NULL, NULL) != 0) {
+ kmem_free(dbuf, dlen);
+ return (-1);
+ }
+ if (vp->v_type == VDIR) {
+ if (dir_stack != NULL) {
+ lx_autofs_stack_push(dir_stack,
+ (caddr_t)dvp,
+ (caddr_t)vp, lx_autofs_strdup(nm));
+ } else {
+ VN_RELE(vp);
+ }
+ } else {
+ if (file_stack != NULL) {
+ lx_autofs_stack_push(file_stack,
+ (caddr_t)dvp,
+ (caddr_t)vp, lx_autofs_strdup(nm));
+ } else {
+ VN_RELE(vp);
+ }
+ }
+ }
+ }
+ kmem_free(dbuf, dlen);
+ return (0);
+}
+
+static void
+lx_autofs_bs_destroy(vnode_t *dvp, char *path)
+{
+ list_t search_stack;
+ list_t dir_stack;
+ list_t file_stack;
+ vnode_t *pdvp, *vp;
+ char *dpath, *fpath;
+ int ret;
+
+ if (VOP_LOOKUP(dvp, path, &vp, NULL, 0, NULL, kcred,
+ NULL, NULL, NULL) != 0) {
+ /* A directory entry with this name doesn't actually exist. */
+ return;
+ }
+
+ if ((vp->v_type & VDIR) == 0) {
+ /* Easy, the directory entry is a file so delete it. */
+ VN_RELE(vp);
+ (void) VOP_REMOVE(dvp, path, kcred, NULL, 0);
+ return;
+ }
+
+ /*
+ * The directory entry is a subdirectory, now we have a bit more
+ * work to do. (We'll have to recurse into the sub directory.)
+ * It would have been much easier to do this recursively but kernel
+ * stacks are notoriously small.
+ */
+ ls_autofs_stack_init(&search_stack);
+ ls_autofs_stack_init(&dir_stack);
+ ls_autofs_stack_init(&file_stack);
+
+ /* Save our newfound subdirectory into a list. */
+ lx_autofs_stack_push(&search_stack, (caddr_t)dvp, (caddr_t)vp,
+ lx_autofs_strdup(path));
+
+ /* Do a recursive depth first search into the subdirectories. */
+ while (lx_autofs_stack_pop(&search_stack,
+ (caddr_t *)&pdvp, (caddr_t *)&dvp, &dpath) == 0) {
+
+ /* Get a list of the subdirectories in this directory. */
+ if (lx_autofs_bs_readdir(dvp, &search_stack, NULL) != 0)
+ goto exit;
+
+ /* Save the current directory a separate stack. */
+ lx_autofs_stack_push(&dir_stack, (caddr_t)pdvp, (caddr_t)dvp,
+ dpath);
+ }
+
+ /*
+ * Now dir_stack contains a list of directories, the deepest paths
+ * are at the top of the list. So let's go through and process them.
+ */
+ while (lx_autofs_stack_pop(&dir_stack,
+ (caddr_t *)&pdvp, (caddr_t *)&dvp, &dpath) == 0) {
+
+ /* Get a list of the files in this directory. */
+ if (lx_autofs_bs_readdir(dvp, NULL, &file_stack) != 0) {
+ VN_RELE(dvp);
+ lx_autofs_strfree(dpath);
+ goto exit;
+ }
+
+ /* Delete all the files in this directory. */
+ while (lx_autofs_stack_pop(&file_stack,
+ NULL, (caddr_t *)&vp, &fpath) == 0) {
+ VN_RELE(vp)
+ ret = VOP_REMOVE(dvp, fpath, kcred, NULL, 0);
+ lx_autofs_strfree(fpath);
+ if (ret != 0) {
+ lx_autofs_strfree(dpath);
+ goto exit;
+ }
+ }
+
+ /* Delete this directory. */
+ VN_RELE(dvp);
+ ret = VOP_RMDIR(pdvp, dpath, pdvp, kcred, NULL, 0);
+ lx_autofs_strfree(dpath);
+ if (ret != 0)
+ goto exit;
+ }
+
+exit:
+ while (
+ (lx_autofs_stack_pop(&search_stack, NULL, (caddr_t *)&vp,
+ &path) == 0) ||
+ (lx_autofs_stack_pop(&dir_stack, NULL, (caddr_t *)&vp,
+ &path) == 0) ||
+ (lx_autofs_stack_pop(&file_stack, NULL, (caddr_t *)&vp,
+ &path) == 0)) {
+ VN_RELE(vp);
+ lx_autofs_strfree(path);
+ }
+ lx_autofs_stack_fini(&search_stack);
+ lx_autofs_stack_fini(&dir_stack);
+ lx_autofs_stack_fini(&file_stack);
+}
+
+static vnode_t *
+lx_autofs_bs_create(vnode_t *dvp, char *bs_name)
+{
+ vnode_t *vp;
+ vattr_t vattr;
+
+ /*
+ * After looking at the mkdir syscall path it seems we don't need
+ * to initialize all of the vattr_t structure.
+ */
+ bzero(&vattr, sizeof (vattr));
+ vattr.va_type = VDIR;
+ vattr.va_mode = 0755; /* u+rwx,og=rx */
+ vattr.va_mask = AT_TYPE|AT_MODE;
+
+ if (VOP_MKDIR(dvp, bs_name, &vattr, &vp, kcred, NULL, 0, NULL) != 0)
+ return (NULL);
+ return (vp);
+}
+
+static int
+lx_autofs_automounter_call(vnode_t *dvp, char *nm)
+{
+ lx_autofs_automnt_req_t *laar;
+ lx_autofs_vfs_t *data;
+ int error;
+ boolean_t is_dup;
+
+ /* Get a pointer to the vfs mount data. */
+ data = (lx_autofs_vfs_t *)dvp->v_vfsp->vfs_data;
+
+ /* The automounter only supports queries in the root directory. */
+ if (dvp != data->lav_root)
+ return (ENOENT);
+
+ /*
+ * Check if the current process is in the automounters process
+ * group. (If it is, the current process is either the autmounter
+ * itself or one of it's forked child processes.) If so, don't
+ * redirect this call back into the automounter because we'll
+ * hang.
+ */
+ mutex_enter(&pidlock);
+ if (data->lav_pgrp == curproc->p_pgrp) {
+ mutex_exit(&pidlock);
+ return (ENOENT);
+ }
+ mutex_exit(&pidlock);
+
+ /* Verify that the automount process pipe still exists. */
+ mutex_enter(&data->lav_lock);
+ if (data->lav_fifo_wr == NULL) {
+ ASSERT(data->lav_fifo_rd == NULL);
+ mutex_exit(&data->lav_lock);
+ return (ENOENT);
+ }
+ mutex_exit(&data->lav_lock);
+
+ /* Allocate an automounter request structure. */
+ if ((laar = lx_autofs_la_alloc(data, &is_dup, B_FALSE,
+ nm)) == NULL)
+ return (ENOENT);
+
+ /*
+ * If we were the first one to allocate this request then we
+ * need to send it to the automounter.
+ */
+ if ((!is_dup) &&
+ ((error = lx_autofs_fifo_write(data, laar)) != 0)) {
+ /*
+ * Unable to send the request to the automounter.
+ * Unblock any other threads waiting on the request
+ * and release the request.
+ */
+ lx_autofs_la_complete(data, laar);
+ lx_autofs_la_release(data, laar);
+ return (error);
+ }
+
+ /* Wait for someone to signal us that this request has completed. */
+ mutex_enter(&laar->laar_lock);
+ while (!laar->laar_complete) {
+ if (cv_wait_sig(&laar->laar_cv, &laar->laar_lock) == 0) {
+ /* We got a signal, abort this call. */
+ mutex_exit(&laar->laar_lock);
+ lx_autofs_la_abort(data, laar);
+ return (EINTR);
+ }
+ }
+ mutex_exit(&laar->laar_lock);
+
+ if (laar->laar_result == LXACR_READY) {
+ /*
+ * Mount succeeded, keep track for future expire calls.
+ *
+ * See vfs lav_vn_hash. Is this something we could use for
+ * iterating mounts under this autofs? Used by
+ * lx_autofs_vn_alloc
+ */
+ lx_autofs_mntent_t *mp;
+
+ mp = kmem_zalloc(sizeof (lx_autofs_mntent_t), KM_SLEEP);
+ mp->lxafme_len = strlen(nm) + 1;
+ mp->lxafme_path = kmem_zalloc(mp->lxafme_len, KM_SLEEP);
+ mp->lxafme_ts = TICK_TO_SEC(ddi_get_lbolt64());
+ (void) strlcpy(mp->lxafme_path, nm, mp->lxafme_len);
+
+ mutex_enter(&data->lav_lock);
+ list_insert_tail(&data->lav_mnt_list, mp);
+ mutex_exit(&data->lav_lock);
+ }
+
+ lx_autofs_la_release(data, laar);
+
+ return (0);
+}
+
+/*
+ * Same preliminary checks as in lx_autofs_unmount.
+ */
+static boolean_t
+lx_autofs_may_unmount(vfs_t *vfsp, struct cred *cr)
+{
+ lx_autofs_vfs_t *data;
+
+ if (secpolicy_fs_unmount(cr, vfsp) != 0)
+ return (B_FALSE);
+
+ /*
+ * We should never have a reference count of less than 2: one for the
+ * caller, one for the root vnode.
+ */
+ ASSERT(vfsp->vfs_count >= 2);
+
+ /* If there are any outstanding vnodes, we can't unmount. */
+ if (vfsp->vfs_count > 2)
+ return (B_FALSE);
+
+ data = (lx_autofs_vfs_t *)vfsp->vfs_data;
+ ASSERT(data->lav_root->v_vfsp == vfsp);
+
+ /* Check for any remaining holds on the root vnode. */
+ if (data->lav_root->v_count > 1)
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+static vfs_t *
+lx_autofs_get_mountvfs(char *fs_mntpt, int *cnt)
+{
+ struct vfs *vfsp;
+ struct vfs *vfslist;
+ vfs_t *fnd_vfs = NULL;
+ int fsmplen;
+ int acnt = 0;
+
+ fsmplen = strlen(fs_mntpt);
+
+ vfs_list_read_lock();
+
+ vfsp = vfslist = curzone->zone_vfslist;
+ if (vfslist == NULL) {
+ vfs_list_unlock();
+ *cnt = 0;
+ return (NULL);
+ }
+
+ do {
+ /* Skip mounts we shouldn't show. */
+ if (!(vfsp->vfs_flag & VFS_NOMNTTAB)) {
+ char *mntpt;
+
+ mntpt = (char *)refstr_value(vfsp->vfs_mntpt);
+ if (strncmp(fs_mntpt, mntpt, fsmplen) == 0 &&
+ (mntpt[fsmplen] == '\0' || mntpt[fsmplen] == '/')) {
+ /*
+ * We'll return the first one we find but don't
+ * return a mount that is actually autofs (i.e.
+ * autofs direct or offset mount).
+ */
+ if (vfsp->vfs_op == lx_autofs_vfsops) {
+ acnt++;
+ } else if (fnd_vfs == NULL) {
+ fnd_vfs = vfsp;
+ VFS_HOLD(fnd_vfs)
+ }
+ }
+ }
+ vfsp = vfsp->vfs_zone_next;
+ } while (vfsp != vfslist);
+
+ vfs_list_unlock();
+
+ *cnt = acnt;
+ return (fnd_vfs);
+}
+
+/*
+ * Unmount all autofs offset mounts below the given path.
+ */
+static boolean_t
+lx_autofs_umount_offset(char *fs_mntpt, struct cred *cr)
+{
+ struct vfs *vfsp;
+ struct vfs *vfslist;
+ boolean_t busy = B_FALSE;
+ int fsmplen = strlen(fs_mntpt);
+
+restart:
+ vfs_list_read_lock();
+
+ vfsp = vfslist = curzone->zone_vfslist;
+ if (vfslist == NULL) {
+ vfs_list_unlock();
+ return (B_FALSE);
+ }
+
+ do {
+ char *mntpt;
+ lx_autofs_vfs_t *data;
+
+ /* Skip mounts we should ignore. */
+ if ((vfsp->vfs_flag & VFS_NOMNTTAB)) {
+ vfsp = vfsp->vfs_zone_next;
+ continue;
+ }
+
+ mntpt = (char *)refstr_value(vfsp->vfs_mntpt);
+ if (strncmp(fs_mntpt, mntpt, fsmplen) != 0 ||
+ (mntpt[fsmplen] != '\0' && mntpt[fsmplen] != '/')) {
+ vfsp = vfsp->vfs_zone_next;
+ continue;
+ }
+
+ if (vfsp->vfs_op != lx_autofs_vfsops) {
+ /*
+ * Something got mounted over the autofs mountpoint
+ * after we checked that this inidrect hierarchy was
+ * not busy.
+ */
+ busy = B_TRUE;
+ break;
+ }
+
+ data = (lx_autofs_vfs_t *)vfsp->vfs_data;
+ if (data->lav_mnttype != LXAMT_OFFSET) {
+ /*
+ * Something mounted a non-offset autofs fs under this
+ * indirect mnt!
+ */
+ busy = B_TRUE;
+ break;
+ }
+
+ /*
+ * Attempt to umount - set busy if fails.
+ *
+ * umount2_engine will call VFS_RELE, so we need to take an
+ * extra hold to match the behavior during the normal umount
+ * path.
+ *
+ * We also need to drop the list lock to prevent deadlock
+ * during umount.
+ */
+ VFS_HOLD(vfsp);
+ vfs_list_unlock();
+ if (umount2_engine(vfsp, 0, cr, 0) != 0) {
+ busy = B_TRUE;
+ goto errexit;
+ }
+
+ /* Retake list lock and look for more. */
+ goto restart;
+ } while (vfsp != vfslist);
+
+ vfs_list_unlock();
+
+errexit:
+ return (busy);
+}
+
+
+/*
+ * Note that lx_autofs_automounter_call() only supports queries in the root
+ * directory, so all mntent names are relative to that.
+ */
+static int
+lx_autofs_expire(vfs_t *vfsp, struct cred *cr)
+{
+ lx_autofs_vfs_t *data;
+ lx_autofs_mntent_t *mp;
+ lx_autofs_automnt_req_t *laar;
+ boolean_t is_dup;
+ vfs_t *fnd_vfs;
+ int autofs_cnt;
+ boolean_t busy = B_FALSE;
+ char exp_path[MAXPATHLEN];
+
+ data = (lx_autofs_vfs_t *)vfsp->vfs_data;
+
+ /*
+ * We process only the first element (i.e. do not do multi). This
+ * works fine for the automounter.
+ */
+ mutex_enter(&data->lav_lock);
+ mp = (lx_autofs_mntent_t *)list_remove_head(&data->lav_mnt_list);
+ mutex_exit(&data->lav_lock);
+ if (mp == NULL) {
+ if (data->lav_mnttype == LXAMT_OFFSET) {
+ /*
+ * During restart the automounter will openmount each
+ * offset mount for management. It won't closemount the
+ * offset mount until we expire it, even though nothing
+ * is mounted over that offset. We handle this as a
+ * special expiration case.
+ */
+ int cnt;
+
+ mutex_enter(&data->lav_lock);
+ cnt = data->lav_openmnt_cnt;
+ mutex_exit(&data->lav_lock);
+
+ if (cnt == 1 && vn_ismntpt(data->lav_root) == 0) {
+ char *mntpt = (char *)
+ refstr_value(vfsp->vfs_mntpt);
+ char *nm = ZONE_PATH_TRANSLATE(mntpt, curzone);
+
+ mp = kmem_zalloc(sizeof (lx_autofs_mntent_t),
+ KM_SLEEP);
+ mp->lxafme_len = strlen(nm) + 1;
+ mp->lxafme_path = kmem_zalloc(mp->lxafme_len,
+ KM_SLEEP);
+ mp->lxafme_ts = TICK_TO_SEC(ddi_get_lbolt64());
+ (void) strlcpy(mp->lxafme_path, nm,
+ mp->lxafme_len);
+
+ goto exp_offset;
+ }
+ }
+
+ return (EAGAIN);
+ }
+
+ /*
+ * We only return an expired mount if it is inactive for the full
+ * timeout. This reduces overly aggressive umount/mount activity.
+ */
+ if (data->lav_timeout > 0) {
+ uint64_t now = TICK_TO_SEC(ddi_get_lbolt64());
+
+ if ((now - mp->lxafme_ts) < data->lav_timeout) {
+ /* put it back at the end of the line */
+ mutex_enter(&data->lav_lock);
+ list_insert_tail(&data->lav_mnt_list, mp);
+ mutex_exit(&data->lav_lock);
+ return (EAGAIN);
+ }
+ }
+
+ if (data->lav_mnttype == LXAMT_INDIR) {
+ (void) snprintf(exp_path, sizeof (exp_path), "%s/%s",
+ (char *)refstr_value(vfsp->vfs_mntpt), mp->lxafme_path);
+ } else {
+ (void) strlcpy(exp_path, (char *)refstr_value(vfsp->vfs_mntpt),
+ sizeof (exp_path));
+ }
+
+ fnd_vfs = lx_autofs_get_mountvfs(exp_path, &autofs_cnt);
+ if (fnd_vfs != NULL) {
+ boolean_t skip = B_FALSE;
+ vfssw_t *vfssw;
+
+ /*
+ * If it's an NFS file system (typical) then we check in
+ * advance to see if it can be unmounted, otherwise, proceed.
+ * The fs-specific umount attempted by the automounter will
+ * either succeed or fail. Both are valid outcomes but checking
+ * now for nfs will save a bunch of work by the automounter
+ * if the fs is busy.
+ *
+ * Unfortunately, for NFS the vfs_fstype is the same for all
+ * versions of NFS, so we need to check the vfs_op member to
+ * determine which version of NFS we're dealing with.
+ */
+ if (!skip && (vfssw = vfs_getvfssw("nfs4")) != NULL) {
+ if (vfs_matchops(fnd_vfs, &vfssw->vsw_vfsops)) {
+ (void) dnlc_purge_vfsp(fnd_vfs, 0);
+ if (check_rtable4(fnd_vfs))
+ busy = B_TRUE;
+ skip = B_TRUE;
+ }
+ vfs_unrefvfssw(vfssw);
+ }
+
+ if (!skip && (vfssw = vfs_getvfssw("nfs3")) != NULL) {
+ if (vfs_matchops(fnd_vfs, &vfssw->vsw_vfsops)) {
+ (void) dnlc_purge_vfsp(fnd_vfs, 0);
+ if (check_rtable(fnd_vfs))
+ busy = B_TRUE;
+ }
+ vfs_unrefvfssw(vfssw);
+ }
+
+ VFS_RELE(fnd_vfs);
+
+ } else if (autofs_cnt > 0) {
+ /*
+ * The automounter is asking us to expire and we pulled this
+ * name from our vfs mountpoint list, but if
+ * lx_autofs_get_mountvfs returns null then that means we
+ * didn't find a non-autofs mount under this name. Thus, the
+ * name could be a subdirectory under an autofs toplevel
+ * indirect mount with one or more offset mounts below.
+ * autofs_cnt will indicate how many autofs mounts exist below
+ * this subdirectory name.
+ *
+ * The automounter will take care of unmounting any fs mounted
+ * over one of these offset mounts (i.e. offset is like a
+ * direct mount which the automounter will manage) but the
+ * automounter will not unmount the actual autofs offset mount
+ * itself, so we have to do that before we can expire the
+ * top-level subrectory name.
+ */
+ busy = lx_autofs_umount_offset(exp_path, cr);
+ }
+
+ if (busy) {
+ /*
+ * Can't unmount this one right now, put it at the end of the
+ * list and return. The caller will return EAGAIN for the
+ * expire ioctl and the automounter will check again later.
+ */
+ mp->lxafme_ts = TICK_TO_SEC(ddi_get_lbolt64());
+ mutex_enter(&data->lav_lock);
+ list_insert_tail(&data->lav_mnt_list, mp);
+ mutex_exit(&data->lav_lock);
+ return (EAGAIN);
+ }
+
+ /*
+ * See lx_autofs_automounter_call. We want to send a msg up the pipe
+ * to the automounter in a similar way.
+ */
+
+exp_offset:
+ /* Verify that the automount process pipe still exists. */
+ mutex_enter(&data->lav_lock);
+ if (data->lav_fifo_wr == NULL) {
+ ASSERT(data->lav_fifo_rd == NULL);
+ mutex_exit(&data->lav_lock);
+ goto err_free;
+ }
+ mutex_exit(&data->lav_lock);
+
+ /* Allocate an automounter expire structure. */
+ if ((laar = lx_autofs_la_alloc(data, &is_dup, B_TRUE,
+ mp->lxafme_path)) == NULL)
+ goto err_free;
+
+ /*
+ * If we were the first one to allocate this request then we
+ * need to send it to the automounter.
+ */
+ if (!is_dup && lx_autofs_fifo_write(data, laar) != 0) {
+ /*
+ * Unable to send the request to the automounter.
+ * Unblock any other threads waiting on the request
+ * and release the request.
+ */
+ lx_autofs_la_complete(data, laar);
+ lx_autofs_la_release(data, laar);
+ goto err_free;
+ }
+
+ /* Wait for someone to signal us that this request has completed. */
+ mutex_enter(&laar->laar_lock);
+ while (!laar->laar_complete) {
+ if (cv_wait_sig(&laar->laar_cv, &laar->laar_lock) == 0) {
+ /* We got a signal, abort this request. */
+ mutex_exit(&laar->laar_lock);
+ lx_autofs_la_abort(data, laar);
+ goto err_free;
+ }
+ }
+ mutex_exit(&laar->laar_lock);
+
+ /*
+ * If it failed or if the file system is still mounted after we get the
+ * response from our expire msg, then that means the automounter tried
+ * to unmount it but failed because the file system is busy, so we put
+ * this entry back on our list to try to expire it again later.
+ */
+ fnd_vfs = NULL;
+ if (laar->laar_result == LXACR_FAIL ||
+ (fnd_vfs = lx_autofs_get_mountvfs(exp_path, &autofs_cnt)) != NULL ||
+ autofs_cnt > 0) {
+ if (fnd_vfs != NULL)
+ VFS_RELE(fnd_vfs);
+ mp->lxafme_ts = TICK_TO_SEC(ddi_get_lbolt64());
+ mutex_enter(&data->lav_lock);
+ list_insert_tail(&data->lav_mnt_list, mp);
+ mutex_exit(&data->lav_lock);
+ } else {
+ kmem_free(mp->lxafme_path, mp->lxafme_len);
+ kmem_free(mp, sizeof (lx_autofs_mntent_t));
+ }
+
+ lx_autofs_la_release(data, laar);
+ return (0);
+
+err_free:
+ kmem_free(mp->lxafme_path, mp->lxafme_len);
+ kmem_free(mp, sizeof (lx_autofs_mntent_t));
+ return (EAGAIN);
+}
+
+static int
+lx_autofs_ack(int reqid, vfs_t *vfsp, enum lx_autofs_callres result)
+{
+ lx_autofs_vfs_t *data;
+ lx_autofs_automnt_req_t *laar;
+
+ data = (lx_autofs_vfs_t *)vfsp->vfs_data;
+ if ((laar = lx_autofs_la_find(data, reqid)) == NULL)
+ return (ENXIO);
+
+ /* Mark the request as complete and release it. */
+ laar->laar_result = result;
+ lx_autofs_la_complete(data, laar);
+ lx_autofs_la_release(data, laar);
+ return (0);
+}
+
+static int
+lx_autofs_automounter_ioctl(vnode_t *vp, int cmd, intptr_t arg, cred_t *cr)
+{
+ lx_autofs_vfs_t *data = (lx_autofs_vfs_t *)vp->v_vfsp->vfs_data;
+ int id = arg;
+ int v;
+ int err;
+
+ /*
+ * Be strict.
+ * We only accept ioctls from the automounter process group.
+ */
+ mutex_enter(&pidlock);
+ if (data->lav_pgrp != curproc->p_pgrp) {
+ mutex_exit(&pidlock);
+ return (ENOENT);
+ }
+ mutex_exit(&pidlock);
+
+ switch (cmd) {
+ case LX_AUTOFS_IOC_READY:
+ if ((err = lx_autofs_ack(id, vp->v_vfsp, LXACR_READY)) != 0)
+ return (err);
+ return (0);
+
+ case LX_AUTOFS_IOC_FAIL:
+ if ((err = lx_autofs_ack(id, vp->v_vfsp, LXACR_FAIL)) != 0)
+ return (err);
+ return (0);
+
+ case LX_AUTOFS_IOC_CATATONIC:
+ /* The automounter is shutting down. */
+ lx_autofs_fifo_close(data);
+ return (0);
+
+ case LX_AUTOFS_IOC_PROTOVER:
+ v = LX_AUTOFS_PROTO_VERS5;
+ if (copyout(&v, (caddr_t)arg, sizeof (int)) != 0)
+ return (EFAULT);
+ return (0);
+
+ case LX_AUTOFS_IOC_PROTOSUBVER:
+ v = LX_AUTOFS_PROTO_SUBVERSION;
+ if (copyout(&v, (caddr_t)arg, sizeof (int)) != 0)
+ return (EFAULT);
+ return (0);
+
+ case LX_AUTOFS_IOC_ASKUMOUNT:
+ /*
+ * This is asking if autofs can be unmounted, not asking to
+ * actually unmount it. We return 1 if it is busy or 0 if it
+ * can be unmounted.
+ */
+ v = 1;
+ if (lx_autofs_may_unmount(vp->v_vfsp, cr))
+ v = 0;
+
+ if (copyout(&v, (caddr_t)arg, sizeof (int)) != 0)
+ return (EFAULT);
+ return (0);
+
+ case LX_AUTOFS_IOC_SETTIMEOUT:
+ if (copyin((caddr_t)arg, &data->lav_timeout, sizeof (ulong_t))
+ != 0)
+ return (EFAULT);
+ return (0);
+
+ case LX_AUTOFS_IOC_EXPIRE:
+ return (ENOTSUP);
+
+ case LX_AUTOFS_IOC_EXPIRE_MULTI:
+ lx_autofs_expire(vp->v_vfsp, cr);
+ return (EAGAIN);
+
+ default:
+ ASSERT(0);
+ return (ENOTSUP);
+ }
+}
+
+static int
+lx_autofs_parse_mntopt(vfs_t *vfsp, lx_autofs_vfs_t *data)
+{
+ char *fd_str, *pgrp_str, *minproto_str, *maxproto_str;
+ int fd, pgrp, minproto, maxproto;
+ file_t *fp_wr, *fp_rd;
+
+ /* Require these options to be present. */
+ if ((vfs_optionisset(vfsp, LX_MNTOPT_FD, &fd_str) != 1) ||
+ (vfs_optionisset(vfsp, LX_MNTOPT_PGRP, &pgrp_str) != 1) ||
+ (vfs_optionisset(vfsp, LX_MNTOPT_MINPROTO, &minproto_str) != 1) ||
+ (vfs_optionisset(vfsp, LX_MNTOPT_MAXPROTO, &maxproto_str) != 1))
+ return (EINVAL);
+
+ /* Get the values for each parameter. */
+ if ((lx_autofs_str_to_int(fd_str, &fd) != 0) ||
+ (lx_autofs_str_to_int(pgrp_str, &pgrp) != 0) ||
+ (lx_autofs_str_to_int(minproto_str, &minproto) != 0) ||
+ (lx_autofs_str_to_int(maxproto_str, &maxproto) != 0))
+ return (EINVAL);
+
+ /*
+ * We primarily support v2 & v5 of the linux kernel automounter
+ * protocol. The userland daemon typically needs v5. We'll reject
+ * unsupported ioctls later if we get one.
+ */
+ if ((minproto > 5) || (maxproto < 2))
+ return (EINVAL);
+
+ /*
+ * Now we need to lookup the fifos we'll be using
+ * to talk to the userland automounter process.
+ */
+ if (lx_autofs_fifo_lookup(pgrp, fd, &fp_wr, &fp_rd) != 0) {
+ /*
+ * The automounter doesn't always have the same id as the pgrp.
+ * This happens when it is started via one of the various
+ * service managers. In this case the fifo lookup will fail
+ * so we retry with our own pid.
+ */
+ int pid = (int)curproc->p_pid;
+
+ if (lx_autofs_fifo_lookup(pid, fd, &fp_wr, &fp_rd) != 0)
+ return (EINVAL);
+ }
+
+ if (vfs_optionisset(vfsp, LX_MNTOPT_INDIRECT, NULL)) {
+ data->lav_mnttype = LXAMT_INDIR;
+ }
+ if (vfs_optionisset(vfsp, LX_MNTOPT_DIRECT, NULL)) {
+ if (data->lav_mnttype != LXAMT_NONE)
+ return (EINVAL);
+ data->lav_mnttype = LXAMT_DIRECT;
+ }
+ if (vfs_optionisset(vfsp, LX_MNTOPT_OFFSET, NULL)) {
+ if (data->lav_mnttype != LXAMT_NONE)
+ return (EINVAL);
+ data->lav_mnttype = LXAMT_OFFSET;
+ }
+ /* The automounter does test mounts with none of the options */
+ if (data->lav_mnttype == LXAMT_NONE)
+ data->lav_mnttype = LXAMT_DIRECT;
+
+ /* Save the mount options and fifo pointers. */
+ data->lav_fd = fd;
+ data->lav_min_proto = minproto;
+ data->lav_pgrp = pgrp;
+ data->lav_fifo_rd = fp_rd;
+ data->lav_fifo_wr = fp_wr;
+ return (0);
+}
+
+static uint64_t
+s2l_dev(dev_t dev)
+{
+ major_t maj = getmajor(dev);
+ minor_t min = getminor(dev);
+
+ return (LX_MAKEDEVICE(maj, min));
+}
+
+/*
+ * VFS entry points
+ */
+static int
+lx_autofs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
+{
+ lx_autofs_vfs_t *data;
+ dev_t dev;
+ char name[40];
+ int error;
+ vattr_t va;
+
+ if (secpolicy_fs_mount(cr, mvp, vfsp) != 0)
+ return (EPERM);
+
+ if (mvp->v_type != VDIR)
+ return (ENOTDIR);
+
+ if ((uap->flags & MS_OVERLAY) == 0 &&
+ (mvp->v_count > 1 || (mvp->v_flag & VROOT)))
+ return (EBUSY);
+
+ /* We don't support mounts in the global zone. */
+ if (getzoneid() == GLOBAL_ZONEID)
+ return (EPERM);
+
+ /*
+ * Offset mounts will occur below the top-level mountpoint so we
+ * need to allow for autofs mounts even though mvp is an autofs.
+ */
+
+ /* Allocate a vfs struct. */
+ data = kmem_zalloc(sizeof (lx_autofs_vfs_t), KM_SLEEP);
+
+ /* Parse mount options. */
+ if ((error = lx_autofs_parse_mntopt(vfsp, data)) != 0) {
+ kmem_free(data, sizeof (lx_autofs_vfs_t));
+ return (error);
+ }
+
+ /* Initialize the backing store. */
+ lx_autofs_bs_destroy(mvp, LX_AUTOFS_BS_DIR);
+ data->lav_bs_vp = lx_autofs_bs_create(mvp, LX_AUTOFS_BS_DIR);
+ if (data->lav_bs_vp == NULL) {
+ kmem_free(data, sizeof (lx_autofs_vfs_t));
+ return (EBUSY);
+ }
+ data->lav_bs_name = LX_AUTOFS_BS_DIR;
+
+ /* Get the backing store inode for use in v5 protocol msgs */
+ va.va_mask = AT_STAT;
+ if ((error = VOP_GETATTR(data->lav_bs_vp, &va, 0, cr, NULL)) != 0) {
+ kmem_free(data, sizeof (lx_autofs_vfs_t));
+ return (error);
+ }
+ data->lav_ino = va.va_nodeid;
+
+ /* We have to hold the underlying vnode we're mounted on. */
+ data->lav_mvp = mvp;
+ VN_HOLD(mvp);
+
+ /* Initialize vfs fields */
+ vfsp->vfs_bsize = DEV_BSIZE;
+ vfsp->vfs_fstype = lx_autofs_fstype;
+ vfsp->vfs_data = data;
+
+ /* Invent a dev_t (sigh) */
+ do {
+ dev = makedevice(lx_autofs_major,
+ atomic_add_32_nv(&lx_autofs_minor, 1) & L_MAXMIN32);
+ } while (vfs_devismounted(dev));
+ vfsp->vfs_dev = dev;
+ vfs_make_fsid(&vfsp->vfs_fsid, dev, lx_autofs_fstype);
+
+ data->lav_dev = s2l_dev(vfsp->vfs_dev);
+
+ /* Create an id space arena for automounter requests. */
+ (void) snprintf(name, sizeof (name), "lx_autofs_id_%d",
+ getminor(vfsp->vfs_dev));
+ data->lav_ids = id_space_create(name, 1, INT_MAX);
+
+ /* Create hashes to keep track of automounter requests. */
+ mutex_init(&data->lav_lock, NULL, MUTEX_DEFAULT, NULL);
+ (void) snprintf(name, sizeof (name), "lx_autofs_path_hash_%d",
+ getminor(vfsp->vfs_dev));
+ data->lav_path_hash = mod_hash_create_strhash(name,
+ LX_AUTOFS_VFS_PATH_HASH_SIZE, mod_hash_null_valdtor);
+ (void) snprintf(name, sizeof (name), "lx_autofs_id_hash_%d",
+ getminor(vfsp->vfs_dev));
+ data->lav_id_hash = mod_hash_create_idhash(name,
+ LX_AUTOFS_VFS_ID_HASH_SIZE, mod_hash_null_valdtor);
+
+ /* Create a hash to keep track of vnodes. */
+ (void) snprintf(name, sizeof (name), "lx_autofs_vn_hash_%d",
+ getminor(vfsp->vfs_dev));
+ data->lav_vn_hash = mod_hash_create_ptrhash(name,
+ LX_AUTOFS_VFS_VN_HASH_SIZE, mod_hash_null_valdtor,
+ sizeof (vnode_t));
+
+ list_create(&data->lav_mnt_list, sizeof (lx_autofs_mntent_t),
+ offsetof(lx_autofs_mntent_t, lxafme_lst));
+
+ /* Create root vnode */
+ data->lav_root = lx_autofs_vn_alloc(vfsp, data->lav_bs_vp);
+
+ data->lav_root->v_flag |= VROOT | VNOCACHE | VNOMAP | VNOSWAP;
+
+ /*
+ * For a direct mountpoint we need to allow a filesystem to be
+ * mounted overtop of this autofs mount. Otherwise, disallow that.
+ */
+ if (data->lav_mnttype == LXAMT_INDIR)
+ data->lav_root->v_flag |= VNOMOUNT;
+
+ return (0);
+}
+
+static int
+lx_autofs_unmount(vfs_t *vfsp, int flag, struct cred *cr)
+{
+ lx_autofs_vfs_t *data;
+
+ if (secpolicy_fs_unmount(cr, vfsp) != 0)
+ return (EPERM);
+
+ /* We do not currently support forced unmounts. */
+ if (flag & MS_FORCE)
+ return (ENOTSUP);
+
+ /*
+ * We should never have a reference count of less than 2: one for the
+ * caller, one for the root vnode.
+ */
+ ASSERT(vfsp->vfs_count >= 2);
+
+ /* If there are any outstanding vnodes, we can't unmount. */
+ if (vfsp->vfs_count > 2)
+ return (EBUSY);
+
+ /* Check for any remaining holds on the root vnode. */
+ data = (lx_autofs_vfs_t *)vfsp->vfs_data;
+ ASSERT(data->lav_root->v_vfsp == vfsp);
+ if (data->lav_root->v_count > 1)
+ return (EBUSY);
+
+ /* Close the fifo to the automount process. */
+ if (data->lav_fifo_wr != NULL)
+ (void) closef(data->lav_fifo_wr);
+ if (data->lav_fifo_rd != NULL)
+ (void) closef(data->lav_fifo_rd);
+
+ /*
+ * We have to release our hold on our root vnode before we can
+ * delete the backing store. (Since the root vnode is linked
+ * to the backing store.)
+ */
+ VN_RELE(data->lav_root);
+
+ /* Cleanup the backing store. */
+ lx_autofs_bs_destroy(data->lav_mvp, data->lav_bs_name);
+ VN_RELE(data->lav_mvp);
+
+ /*
+ * Delete all listed mounts.
+ */
+ for (;;) {
+ lx_autofs_mntent_t *mp;
+
+ mp = list_remove_head(&data->lav_mnt_list);
+ if (mp == NULL)
+ break;
+ kmem_free(mp->lxafme_path, mp->lxafme_len);
+ kmem_free(mp, sizeof (lx_autofs_mntent_t));
+ }
+
+ /* Cleanup out remaining data structures. */
+ mod_hash_destroy_strhash(data->lav_path_hash);
+ mod_hash_destroy_idhash(data->lav_id_hash);
+ mod_hash_destroy_ptrhash(data->lav_vn_hash);
+ id_space_destroy(data->lav_ids);
+ list_destroy(&data->lav_mnt_list);
+ kmem_free(data, sizeof (lx_autofs_vfs_t));
+
+ return (0);
+}
+
+static int
+lx_autofs_root(vfs_t *vfsp, vnode_t **vpp)
+{
+ lx_autofs_vfs_t *data = (lx_autofs_vfs_t *)vfsp->vfs_data;
+
+ *vpp = data->lav_root;
+ VN_HOLD(*vpp);
+
+ return (0);
+}
+
+static int
+lx_autofs_statvfs(vfs_t *vfsp, statvfs64_t *sp)
+{
+ lx_autofs_vfs_t *data = (lx_autofs_vfs_t *)vfsp->vfs_data;
+ vnode_t *urvp = data->lav_root->v_data;
+ dev32_t d32;
+ int error;
+
+ if ((error = VFS_STATVFS(urvp->v_vfsp, sp)) != 0)
+ return (error);
+
+ /* Update some of values before returning. */
+ (void) cmpldev(&d32, vfsp->vfs_dev);
+ sp->f_fsid = d32;
+ (void) strlcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name,
+ sizeof (sp->f_basetype));
+ sp->f_flag = vf_to_stf(vfsp->vfs_flag);
+ bzero(sp->f_fstr, sizeof (sp->f_fstr));
+ return (0);
+}
+
+static const fs_operation_def_t lx_autofs_vfstops[] = {
+ { VFSNAME_MOUNT, { .vfs_mount = lx_autofs_mount } },
+ { VFSNAME_UNMOUNT, { .vfs_unmount = lx_autofs_unmount } },
+ { VFSNAME_ROOT, { .vfs_root = lx_autofs_root } },
+ { VFSNAME_STATVFS, { .vfs_statvfs = lx_autofs_statvfs } },
+ { NULL, NULL }
+};
+
+/*
+ * VOP entry points - simple passthrough
+ *
+ * For most VOP entry points we can simply pass the request on to
+ * the underlying filesystem we're mounted on.
+ */
+static int
+lx_autofs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
+ caller_context_t *ctp)
+{
+ vnode_t *uvp = vp->v_data;
+ return (VOP_CLOSE(uvp, flag, count, offset, cr, ctp));
+}
+
+static int
+lx_autofs_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp,
+ caller_context_t *ctp, int flags)
+{
+ vnode_t *uvp = vp->v_data;
+ return (VOP_READDIR(uvp, uiop, cr, eofp, ctp, flags));
+}
+
+static int
+lx_autofs_access(vnode_t *vp, int mode, int flags, cred_t *cr,
+ caller_context_t *ctp)
+{
+ vnode_t *uvp = vp->v_data;
+ return (VOP_ACCESS(uvp, mode, flags, cr, ctp));
+}
+
+static int
+lx_autofs_rwlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
+{
+ vnode_t *uvp = vp->v_data;
+ return (VOP_RWLOCK(uvp, write_lock, ctp));
+}
+
+static void
+lx_autofs_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
+{
+ vnode_t *uvp = vp->v_data;
+ VOP_RWUNLOCK(uvp, write_lock, ctp);
+}
+
+/*
+ * Check if attempting to access a 'direct' mount and if so, call the
+ * automounter to perform the mount. Once the mount occurs, the new filesystem
+ * will be mounted overtop of this autofs mountpoint and we will no longer
+ * come through this path.
+ */
+static vnode_t *
+lx_autofs_do_direct(vnode_t *vp)
+{
+ vfs_t *vfsp = vp->v_vfsp;
+ lx_autofs_vfs_t *data = (lx_autofs_vfs_t *)vfsp->vfs_data;
+ vnode_t *nvp;
+ boolean_t skip_am_call = B_FALSE;
+
+ if (data->lav_mnttype == LXAMT_INDIR)
+ return (NULL);
+
+ /*
+ * Check if the current process is in the automounter's process group.
+ * If it is, the current process is either the automounter itself or
+ * one of it's children. If so, don't call back into the automounter.
+ */
+ mutex_enter(&pidlock);
+ if (data->lav_pgrp == curproc->p_pgrp) {
+ skip_am_call = B_TRUE;
+ }
+ mutex_exit(&pidlock);
+
+ /*
+ * It is possible there is already a new fs mounted on top of our vnode.
+ * This can happen if the caller first did a lookup of a file name
+ * using our vnode as the directory vp. The lookup would trigger the
+ * autofs mount on top of ourself, but if the caller then uses our
+ * vnode to do a getattr on the directory, it will use the autofs
+ * vnode and not the newly mounted vnode. We need to skip re-calling
+ * the automounter for this case.
+ */
+ if (!skip_am_call && vn_mountedvfs(vp) == NULL) {
+ char tbuf[MAXPATHLEN];
+ char *nm;
+
+ (void) strlcpy(tbuf, (char *)refstr_value(vfsp->vfs_mntpt),
+ sizeof (tbuf));
+ nm = tbuf + strlen(tbuf);
+ while (*nm != '/' && nm != tbuf)
+ nm--;
+ if (*nm == '/')
+ nm++;
+ (void) lx_autofs_automounter_call(vp, nm);
+ }
+
+ /*
+ * We need to take an extra hold on our vp (which is the autofs
+ * root vp) to account for the rele done in traverse. traverse will
+ * take a hold on the new vp so the caller is responsible for calling
+ * VN_RELE on the returned vp.
+ */
+ VN_HOLD(vp);
+ nvp = vp;
+ if (traverse(&nvp) != 0) {
+ VN_RELE(nvp);
+ return (NULL);
+ }
+
+ /* Confirm that we have a non-autofs fs mounted now */
+ if (nvp->v_op == lx_autofs_vn_ops) {
+ VN_RELE(nvp);
+ return (NULL);
+ }
+
+ return (nvp);
+}
+
+/*ARGSUSED*/
+static int
+lx_autofs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
+ caller_context_t *ctp, int flags)
+{
+ vnode_t *udvp = dvp->v_data;
+ vnode_t *nvp;
+
+ /* handle direct mount here */
+ if ((nvp = lx_autofs_do_direct(dvp)) != NULL) {
+ int error;
+
+ error = VOP_RMDIR(nvp, nm, cdir, cr, ctp, flags);
+ VN_RELE(nvp);
+ return (error);
+ }
+
+ /*
+ * cdir is the calling processes current directory.
+ * If cdir is lx_autofs vnode then get its real underlying
+ * vnode ptr. (It seems like the only thing cdir is
+ * ever used for is to make sure the user doesn't delete
+ * their current directory.)
+ */
+ if (vn_matchops(cdir, lx_autofs_vn_ops)) {
+ vnode_t *ucdir = cdir->v_data;
+ return (VOP_RMDIR(udvp, nm, ucdir, cr, ctp, flags));
+ }
+
+ return (VOP_RMDIR(udvp, nm, cdir, cr, ctp, flags));
+}
+
+/*
+ * VOP entry points - special passthrough
+ *
+ * For some VOP entry points we will first pass the request on to
+ * the underlying filesystem we're mounted on. If there's an error
+ * then we immediately return the error, but if the request succeeds
+ * we have to do some extra work before returning.
+ */
+static int
+lx_autofs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ctp)
+{
+ vnode_t *ovp = *vpp;
+ vnode_t *uvp = ovp->v_data;
+ int error;
+
+ /* direct mounts were handled by the lookup to get *vpp */
+
+ if ((error = VOP_OPEN(&uvp, flag, cr, ctp)) != 0)
+ return (error);
+
+ /* Check for clone opens. */
+ if (uvp == ovp->v_data)
+ return (0);
+
+ /* Deal with clone opens by returning a new vnode. */
+ *vpp = lx_autofs_vn_alloc(ovp->v_vfsp, uvp);
+ VN_RELE(ovp);
+ return (0);
+}
+
+static int
+lx_autofs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
+ caller_context_t *ctp)
+{
+ vnode_t *uvp = vp->v_data;
+ vnode_t *dvp;
+ int error;
+ lx_autofs_vfs_t *data = (lx_autofs_vfs_t *)vp->v_vfsp->vfs_data;
+
+ if ((dvp = lx_autofs_do_direct(vp)) != NULL) {
+ uvp = dvp;
+ }
+
+ error = VOP_GETATTR(uvp, vap, flags, cr, ctp);
+
+ if (dvp != NULL) {
+ /* we operated on the direct mounted fs */
+ VN_RELE(dvp);
+ if (error == 0) {
+ /*
+ * During automounter restart recovery the automounter
+ * will fstat the fd provided in the setpipe ioctl. It
+ * uses the resulting inode & dev to correlate future
+ * autofs fifo requests to the correct entry. Thus, we
+ * have to update the attributes with our own id's.
+ */
+ vap->va_fsid = data->lav_dev;
+ vap->va_nodeid = data->lav_ino;
+ }
+ } else if (error == 0) {
+ /* Update the attributes with our filesystem id. */
+ vap->va_fsid = data->lav_dev;
+ }
+
+ return (error);
+}
+
+static int
+lx_autofs_mkdir(vnode_t *dvp, char *nm, struct vattr *vap, vnode_t **vpp,
+ cred_t *cr, caller_context_t *ctp, int flags, vsecattr_t *vsecp)
+{
+ vnode_t *udvp = dvp->v_data;
+ vnode_t *nvp;
+ int error;
+
+ if ((nvp = lx_autofs_do_direct(dvp)) != NULL) {
+ udvp = nvp;
+ }
+
+ error = VOP_MKDIR(udvp, nm, vap, vpp, cr, ctp, flags, vsecp);
+
+ if (nvp != NULL) {
+ /* we operated on the direct mounted fs */
+ VN_RELE(nvp);
+ } else if (error == 0) {
+ vnode_t *uvp = NULL;
+
+ /* Update the attributes with our filesystem id. */
+ vap->va_fsid = dvp->v_vfsp->vfs_dev;
+
+ /* Allocate our new vnode. */
+ uvp = *vpp;
+ *vpp = lx_autofs_vn_alloc(dvp->v_vfsp, uvp);
+ }
+
+ return (error);
+}
+
+/*
+ * VOP entry points - custom
+ */
+/*ARGSUSED*/
+static void
+lx_autofs_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ctp)
+{
+ lx_autofs_vfs_t *data = (lx_autofs_vfs_t *)vp->v_vfsp->vfs_data;
+
+ /*
+ * We need to hold the vfs lock because if we're going to free
+ * this vnode we have to prevent anyone from looking it up
+ * in the vnode hash.
+ */
+ mutex_enter(&data->lav_lock);
+ mutex_enter(&vp->v_lock);
+
+ if (vp->v_count < 1) {
+ panic("lx_autofs_inactive: bad v_count");
+ /*NOTREACHED*/
+ }
+
+ /* Drop the temporary hold by vn_rele now. */
+ if (--vp->v_count > 0) {
+ mutex_exit(&vp->v_lock);
+ mutex_exit(&data->lav_lock);
+ return;
+ }
+
+ /*
+ * No one should have been blocked on this lock because we're
+ * about to free this vnode.
+ */
+ lx_autofs_vn_free(vp);
+}
+
+static int
+lx_autofs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
+ int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ctp,
+ int *direntflags, pathname_t *realpnp)
+{
+ vnode_t *udvp = dvp->v_data;
+ vnode_t *uvp = NULL;
+ lx_autofs_vfs_t *data;
+ int error = ENOENT;
+
+ data = (lx_autofs_vfs_t *)dvp->v_vfsp->vfs_data;
+
+ /*
+ * For an indirect mount first try to lookup if this path component
+ * already exists.
+ */
+ if (data->lav_mnttype == LXAMT_INDIR) {
+ if ((error = VOP_LOOKUP(udvp, nm, &uvp, pnp, flags, rdir, cr,
+ ctp, direntflags, realpnp)) == 0) {
+ *vpp = lx_autofs_vn_alloc(dvp->v_vfsp, uvp);
+ return (0);
+ }
+ }
+
+ /* Only query the automounter if the path does not exist. */
+ if (error != ENOENT)
+ return (error);
+
+ if (data->lav_catatonic)
+ return (ENOENT);
+
+ /* Save the uid/gid for the requestor ioctl. */
+ data->lav_uid = crgetuid(cr);
+ data->lav_gid = crgetgid(cr);
+
+ /* Refer the lookup to the automounter. */
+ if ((error = lx_autofs_automounter_call(dvp, nm)) != 0)
+ return (error);
+
+ if (data->lav_mnttype == LXAMT_INDIR) {
+ /*
+ * Indirect mount. The automounter call should have mounted
+ * something on nm. Retry the lookup operation.
+ */
+ if ((error = VOP_LOOKUP(udvp, nm, &uvp, pnp, flags, rdir, cr,
+ ctp, direntflags, realpnp)) == 0) {
+ *vpp = lx_autofs_vn_alloc(dvp->v_vfsp, uvp);
+ return (0);
+ }
+ } else {
+ /*
+ * Direct or offset mount. The automounter call should have
+ * covered our 'dvp' with a new filesystem. Traverse into the
+ * new mount and retry the lookup.
+ *
+ * We need to take an extra hold on our vp (which is the autofs
+ * root vp) to acount for the rele done in traverse. Our caller
+ * will also do a rele on the original dvp and that would leave
+ * us one ref short on our autofs root vnode.
+ */
+ VN_HOLD(dvp);
+ if ((error = traverse(&dvp)) != 0) {
+ VN_RELE(dvp);
+ return (error);
+ }
+
+ error = VOP_LOOKUP(dvp, nm, vpp, pnp, flags, rdir, cr, ctp,
+ direntflags, realpnp);
+
+ /* release the traverse hold */
+ VN_RELE(dvp);
+ }
+ return (error);
+}
+
+static int
+lx_autofs_ioctl(vnode_t *vp, int cmd, intptr_t arg, int mode, cred_t *cr,
+ int *rvalp, caller_context_t *ctp)
+{
+ vnode_t *uvp = vp->v_data;
+
+ /* Intercept our ioctls. */
+ switch ((uint_t)cmd) {
+ case LX_AUTOFS_IOC_READY:
+ case LX_AUTOFS_IOC_FAIL:
+ case LX_AUTOFS_IOC_CATATONIC:
+ case LX_AUTOFS_IOC_PROTOVER:
+ case LX_AUTOFS_IOC_SETTIMEOUT:
+ case LX_AUTOFS_IOC_EXPIRE:
+ case LX_AUTOFS_IOC_EXPIRE_MULTI:
+ case LX_AUTOFS_IOC_PROTOSUBVER:
+ case LX_AUTOFS_IOC_ASKUMOUNT:
+ return (lx_autofs_automounter_ioctl(vp, cmd, arg, cr));
+ }
+
+ /* Pass any remaining ioctl on. */
+ return (VOP_IOCTL(uvp, cmd, arg, mode, cr, rvalp, ctp));
+}
+
+/*
+ * VOP entry points definitions
+ */
+static const fs_operation_def_t lx_autofs_tops_root[] = {
+ { VOPNAME_OPEN, { .vop_open = lx_autofs_open } },
+ { VOPNAME_CLOSE, { .vop_close = lx_autofs_close } },
+ { VOPNAME_IOCTL, { .vop_ioctl = lx_autofs_ioctl } },
+ { VOPNAME_RWLOCK, { .vop_rwlock = lx_autofs_rwlock } },
+ { VOPNAME_RWUNLOCK, { .vop_rwunlock = lx_autofs_rwunlock } },
+ { VOPNAME_GETATTR, { .vop_getattr = lx_autofs_getattr } },
+ { VOPNAME_ACCESS, { .vop_access = lx_autofs_access } },
+ { VOPNAME_READDIR, { .vop_readdir = lx_autofs_readdir } },
+ { VOPNAME_LOOKUP, { .vop_lookup = lx_autofs_lookup } },
+ { VOPNAME_INACTIVE, { .vop_inactive = lx_autofs_inactive } },
+ { VOPNAME_MKDIR, { .vop_mkdir = lx_autofs_mkdir } },
+ { VOPNAME_RMDIR, { .vop_rmdir = lx_autofs_rmdir } },
+ { NULL }
+};
+
+/*
+ * DEV-specific entry points
+ */
+
+/*ARGSUSED*/
+static int
+lx_autofs_dev_open(dev_t *devp, int flags, int otyp, cred_t *credp)
+{
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+lx_autofs_dev_close(dev_t dev, int flags, int otyp, cred_t *credp)
+{
+ return (0);
+}
+
+static int
+lx_autofs_dev_validate_cmd(intptr_t arg, lx_autofs_dv_ioctl_t *dcmd)
+{
+ if (copyin((caddr_t)arg, dcmd, sizeof (lx_autofs_dv_ioctl_t)) != 0)
+ return (EFAULT);
+
+ if (dcmd->lad_ver_major != LX_AUTOFS_DEV_VERSION_MAJOR ||
+ dcmd->lad_ver_minor > LX_AUTOFS_DEV_VERSION_MINOR)
+ return (EINVAL);
+
+ DTRACE_PROBE1(lx__dev__cmd, void *, dcmd);
+
+ /* Fill in the version for return */
+ dcmd->lad_ver_major = LX_AUTOFS_DEV_VERSION_MAJOR;
+ dcmd->lad_ver_minor = LX_AUTOFS_DEV_VERSION_MINOR;
+ return (0);
+}
+
+static vfs_t *
+lx_autofs_dev_getvfs_bypath(char *fs_mntpt)
+{
+ struct vfs *vfsp;
+ struct vfs *vfslist;
+ vfs_t *fnd_vfs = NULL;
+ zone_t *zone = curzone;
+
+ vfs_list_read_lock();
+
+ vfsp = vfslist = curzone->zone_vfslist;
+ if (vfslist == NULL) {
+ vfs_list_unlock();
+ return (NULL);
+ }
+
+ do {
+ if (vfsp->vfs_op == lx_autofs_vfsops) {
+ char *mntpt = (char *)refstr_value(vfsp->vfs_mntpt);
+
+ if (strcmp(fs_mntpt, ZONE_PATH_TRANSLATE(mntpt, zone))
+ == 0) {
+ fnd_vfs = vfsp;
+ VFS_HOLD(fnd_vfs)
+ break;
+ }
+ }
+ vfsp = vfsp->vfs_zone_next;
+ } while (vfsp != vfslist);
+
+ vfs_list_unlock();
+
+ return (fnd_vfs);
+}
+
+static int
+lx_autofs_dev_fd_preamble(intptr_t arg, lx_autofs_dv_ioctl_t *dc, vfs_t **vfspp)
+{
+ int err;
+ lx_autofs_vfs_t *data;
+ file_t *fp;
+ vfs_t *vfsp;
+
+ if ((err = lx_autofs_dev_validate_cmd(arg, dc)) != 0)
+ return (err);
+
+ if ((fp = getf(dc->lad_ioctlfd)) == NULL)
+ return (EBADF);
+
+ vfsp = fp->f_vnode->v_vfsp;
+ if (vfsp->vfs_op != lx_autofs_vfsops) {
+ releasef(dc->lad_ioctlfd);
+ return (EBADF);
+ }
+
+ data = (lx_autofs_vfs_t *)vfsp->vfs_data;
+ if (data->lav_root->v_count <= 1) {
+ releasef(dc->lad_ioctlfd);
+ return (EBADF);
+ }
+
+ VFS_HOLD(vfsp);
+ *vfspp = vfsp;
+
+ releasef(dc->lad_ioctlfd);
+ return (0);
+}
+
+static int
+lx_autofs_dev_vers(intptr_t arg)
+{
+ int err;
+ lx_autofs_dv_ioctl_t dcmd;
+
+ if ((err = lx_autofs_dev_validate_cmd(arg, &dcmd)) != 0)
+ return (err);
+
+ if (copyout(&dcmd, (caddr_t)arg, sizeof (dcmd)) != 0)
+ return (EFAULT);
+
+ return (0);
+}
+
+static int
+lx_autofs_dev_protver(intptr_t arg)
+{
+ int err;
+ lx_autofs_dv_ioctl_t dcmd;
+
+ if ((err = lx_autofs_dev_validate_cmd(arg, &dcmd)) != 0)
+ return (err);
+
+ dcmd.lad_arg1 = LX_AUTOFS_PROTO_VERS5;
+
+ if (copyout(&dcmd, (caddr_t)arg, sizeof (dcmd)) != 0)
+ return (EFAULT);
+
+ return (0);
+}
+
+static int
+lx_autofs_dev_protosubver(intptr_t arg)
+{
+ int err;
+ lx_autofs_dv_ioctl_t dcmd;
+
+ if ((err = lx_autofs_dev_validate_cmd(arg, &dcmd)) != 0)
+ return (err);
+
+ dcmd.lad_arg1 = LX_AUTOFS_PROTO_SUBVERSION;
+
+ if (copyout(&dcmd, (caddr_t)arg, sizeof (dcmd)) != 0)
+ return (EFAULT);
+
+ return (0);
+}
+
+static int
+lx_autofs_dev_get_path_cmd(intptr_t arg, lx_autofs_dv_ioctl_t **dcp)
+{
+ int err;
+ lx_autofs_dv_ioctl_t dcmd, *dc;
+
+ if ((err = lx_autofs_dev_validate_cmd(arg, &dcmd)) != 0)
+ return (err);
+
+ if (dcmd.lad_size <= sizeof (dcmd) ||
+ dcmd.lad_size > (sizeof (dcmd) + MAXPATHLEN))
+ return (EINVAL);
+
+ dc = kmem_alloc(dcmd.lad_size, KM_SLEEP);
+
+ /* re-copyin the full struct with the path */
+ if (copyin((caddr_t)arg, dc, dcmd.lad_size) != 0) {
+ kmem_free(dc, dcmd.lad_size);
+ return (EFAULT);
+ }
+ dc->lad_size = dcmd.lad_size;
+
+ if (dc->lad_path[0] != '/' ||
+ dc->lad_path[dcmd.lad_size - sizeof (dcmd) - 1] != '\0') {
+ kmem_free(dc, dcmd.lad_size);
+ return (EINVAL);
+ }
+
+ *dcp = dc;
+ return (0);
+}
+
+static int
+lx_autofs_dev_openmount(intptr_t arg)
+{
+ int err;
+ int fd;
+ lx_autofs_dv_ioctl_t *dc;
+ vfs_t *vfsp;
+ lx_autofs_vfs_t *data;
+
+ if ((err = lx_autofs_dev_get_path_cmd(arg, &dc)) != 0)
+ return (err);
+
+ if ((vfsp = lx_autofs_dev_getvfs_bypath(dc->lad_path)) == NULL) {
+ kmem_free(dc, dc->lad_size);
+ return (EINVAL);
+ }
+
+ /* lad_arg1 is the dev number of the mnt but we don't check that */
+
+ /*
+ * Do an "open" on the root vnode. To fully simulate "open" we also add
+ * a hold on the root vnode itself since lx_autofs_open will only open
+ * (and hold) the underlying vnode.
+ */
+ data = (lx_autofs_vfs_t *)vfsp->vfs_data;
+ VN_HOLD(data->lav_root);
+ if ((err = fassign(&data->lav_root, FWRITE|FREAD, &fd)) != 0) {
+ VN_RELE(data->lav_root);
+ VFS_RELE(vfsp);
+ kmem_free(dc, dc->lad_size);
+ return (err);
+ }
+
+ mutex_enter(&data->lav_lock);
+ data->lav_openmnt_cnt++;
+ mutex_exit(&data->lav_lock);
+
+ dc->lad_ioctlfd = fd;
+
+ if (copyout(dc, (caddr_t)arg, sizeof (lx_autofs_dv_ioctl_t)) != 0) {
+ mutex_enter(&data->lav_lock);
+ data->lav_openmnt_cnt--;
+ mutex_exit(&data->lav_lock);
+ (void) closeandsetf(fd, NULL);
+ VFS_RELE(vfsp);
+ kmem_free(dc, dc->lad_size);
+ return (EFAULT);
+ }
+ VFS_RELE(vfsp);
+
+ kmem_free(dc, dc->lad_size);
+ return (0);
+}
+
+static int
+lx_autofs_dev_closemount(intptr_t arg)
+{
+ int err;
+ lx_autofs_dv_ioctl_t dcmd;
+ vfs_t *vfsp;
+ lx_autofs_vfs_t *data;
+
+ if ((err = lx_autofs_dev_fd_preamble(arg, &dcmd, &vfsp)) != 0)
+ return (err);
+
+ data = (lx_autofs_vfs_t *)vfsp->vfs_data;
+
+ /* "close" the vnode */
+ if ((err = closeandsetf(dcmd.lad_ioctlfd, NULL)) != 0) {
+ VFS_RELE(vfsp);
+ return (err);
+ }
+
+ mutex_enter(&data->lav_lock);
+ ASSERT(data->lav_openmnt_cnt > 0);
+ data->lav_openmnt_cnt--;
+ mutex_exit(&data->lav_lock);
+
+ VFS_RELE(vfsp);
+ return (0);
+}
+
+static int
+lx_autofs_dev_ready(intptr_t arg)
+{
+ int err;
+ lx_autofs_dv_ioctl_t dcmd;
+ vfs_t *vfsp;
+
+ if ((err = lx_autofs_dev_fd_preamble(arg, &dcmd, &vfsp)) != 0)
+ return (err);
+
+ if ((err = lx_autofs_ack(dcmd.lad_arg1, vfsp, LXACR_READY)) != 0) {
+ VFS_RELE(vfsp);
+ return (err);
+ }
+
+ VFS_RELE(vfsp);
+ return (0);
+}
+
+static int
+lx_autofs_dev_fail(intptr_t arg)
+{
+ int err;
+ lx_autofs_dv_ioctl_t dcmd;
+ vfs_t *vfsp;
+
+ if ((err = lx_autofs_dev_fd_preamble(arg, &dcmd, &vfsp)) != 0)
+ return (err);
+
+ if ((err = lx_autofs_ack(dcmd.lad_arg1, vfsp, LXACR_FAIL)) != 0) {
+ VFS_RELE(vfsp);
+ return (err);
+ }
+
+ VFS_RELE(vfsp);
+ return (0);
+}
+
+/*
+ * Update the fifo pipe information we use to talk to the automounter. The
+ * ioctl is used when the automounter restarts. This logic is similar to the
+ * handling done in lx_autofs_parse_mntopt() when the filesytem is first
+ * mounted.
+ */
+static int
+lx_autofs_dev_setpipefd(intptr_t arg)
+{
+ int err;
+ lx_autofs_dv_ioctl_t dcmd;
+ vfs_t *vfsp;
+ lx_autofs_vfs_t *data;
+ int fd, pgrp;
+ file_t *fp_wr, *fp_rd;
+
+ if ((err = lx_autofs_dev_fd_preamble(arg, &dcmd, &vfsp)) != 0)
+ return (err);
+
+ mutex_enter(&pidlock);
+ pgrp = curproc->p_pgrp;
+ mutex_exit(&pidlock);
+ fd = dcmd.lad_arg1;
+
+ /* Lookup the new fifos. See comment in lx_autofs_parse_mntopt. */
+ if (lx_autofs_fifo_lookup(pgrp, fd, &fp_wr, &fp_rd) != 0) {
+ int pid = (int)curproc->p_pid;
+
+ if (lx_autofs_fifo_lookup(pid, fd, &fp_wr, &fp_rd) != 0) {
+ VFS_RELE(vfsp);
+ return (EINVAL);
+ }
+ }
+
+ data = (lx_autofs_vfs_t *)vfsp->vfs_data;
+
+ /* Close the old fifos. */
+ if (data->lav_fifo_wr != NULL)
+ (void) closef(data->lav_fifo_wr);
+ if (data->lav_fifo_rd != NULL)
+ (void) closef(data->lav_fifo_rd);
+
+ data->lav_fd = fd;
+ data->lav_pgrp = pgrp;
+ data->lav_fifo_rd = fp_rd;
+ data->lav_fifo_wr = fp_wr;
+ /*
+ * Not explicitly in the ioctl spec. but necessary for correct recovery
+ */
+ data->lav_catatonic = B_FALSE;
+
+ VFS_RELE(vfsp);
+
+ return (0);
+}
+
+static int
+lx_autofs_dev_catatonic(intptr_t arg)
+{
+ int err;
+ lx_autofs_dv_ioctl_t dcmd;
+ vfs_t *vfsp;
+ lx_autofs_vfs_t *data;
+
+ if ((err = lx_autofs_dev_fd_preamble(arg, &dcmd, &vfsp)) != 0)
+ return (err);
+
+ data = (lx_autofs_vfs_t *)vfsp->vfs_data;
+ data->lav_catatonic = B_TRUE;
+ VFS_RELE(vfsp);
+
+ return (0);
+}
+
+static int
+lx_autofs_dev_expire(intptr_t arg)
+{
+ int err;
+ lx_autofs_dv_ioctl_t dcmd;
+ vfs_t *vfsp;
+
+ if ((err = lx_autofs_dev_fd_preamble(arg, &dcmd, &vfsp)) != 0)
+ return (err);
+
+ /* If it succeeds in expiring then we don't want to return EAGAIN */
+ if ((err = lx_autofs_expire(vfsp, kcred)) == 0) {
+ VFS_RELE(vfsp);
+ return (0);
+ }
+
+ VFS_RELE(vfsp);
+ return (EAGAIN);
+}
+
+static int
+lx_autofs_dev_timeout(intptr_t arg)
+{
+ int err;
+ lx_autofs_dv_ioctl_t dcmd;
+ vfs_t *vfsp;
+ lx_autofs_vfs_t *data;
+
+ if ((err = lx_autofs_dev_fd_preamble(arg, &dcmd, &vfsp)) != 0)
+ return (err);
+
+ data = (lx_autofs_vfs_t *)vfsp->vfs_data;
+ data->lav_timeout = dcmd.lad_arg1;
+ VFS_RELE(vfsp);
+
+ return (0);
+}
+
+static int
+lx_autofs_dev_requestor(intptr_t arg)
+{
+ int err;
+ lx_autofs_dv_ioctl_t *dc;
+ vfs_t *vfsp;
+ vfs_t *fnd_vfs = NULL;
+ struct vfs *vfslist;
+ zone_t *zone = curzone;
+ lx_autofs_vfs_t *data;
+ uid_t uid;
+ gid_t gid;
+
+ if ((err = lx_autofs_dev_get_path_cmd(arg, &dc)) != 0)
+ return (err);
+
+ vfs_list_read_lock();
+ vfsp = vfslist = curzone->zone_vfslist;
+ if (vfslist == NULL) {
+ vfs_list_unlock();
+ kmem_free(dc, dc->lad_size);
+ return (EINVAL);
+ }
+
+ do {
+ /* Skip mounts we shouldn't show. */
+ if (!(vfsp->vfs_flag & VFS_NOMNTTAB)) {
+ char *mntpt = (char *)refstr_value(vfsp->vfs_mntpt);
+
+ if (strcmp(dc->lad_path,
+ ZONE_PATH_TRANSLATE(mntpt, zone)) == 0) {
+
+ if (vfsp->vfs_op != lx_autofs_vfsops) {
+ /*
+ * Found an indirect mount (probably
+ * NFS) so we need to get the vfs it's
+ * mounted onto.
+ */
+ vnode_t *vn = vfsp->vfs_vnodecovered;
+ vfsp = vn->v_vfsp;
+
+ if (vfsp->vfs_op != lx_autofs_vfsops) {
+ /*
+ * autofs doesn't manage this
+ * path.
+ */
+ break;
+ }
+ }
+
+ fnd_vfs = vfsp;
+ VFS_HOLD(fnd_vfs)
+ break;
+ }
+ }
+ vfsp = vfsp->vfs_zone_next;
+ } while (vfsp != vfslist);
+ vfs_list_unlock();
+
+ if (fnd_vfs == NULL) {
+ kmem_free(dc, dc->lad_size);
+ return (EINVAL);
+ }
+
+ data = (lx_autofs_vfs_t *)fnd_vfs->vfs_data;
+ uid = data->lav_uid;
+ gid = data->lav_gid;
+ VFS_RELE(fnd_vfs);
+
+ dc->lad_arg1 = uid;
+ dc->lad_arg2 = gid;
+
+ if (copyout(dc, (caddr_t)arg, sizeof (lx_autofs_dv_ioctl_t)) != 0) {
+ kmem_free(dc, dc->lad_size);
+ return (EFAULT);
+ }
+
+ kmem_free(dc, dc->lad_size);
+ return (0);
+}
+
+static int
+lx_autofs_dev_ismntpt(intptr_t arg)
+{
+ int err = 0;
+ lx_autofs_dv_ioctl_t *dc;
+ struct vfs *vfslist;
+ vfs_t *vfsp;
+ vfs_t *fnd_vfs = NULL;
+ zone_t *zone = curzone;
+
+ if ((err = lx_autofs_dev_get_path_cmd(arg, &dc)) != 0)
+ return (err);
+
+ /*
+ * The automounter will always pass a path. It can also either pass an
+ * ioctlfd or, if it's -1, arg1 can be an LX_AUTOFS_TYPE_* value. We
+ * currently don't need those for our algorithm.
+ */
+
+ vfs_list_read_lock();
+ vfsp = vfslist = curzone->zone_vfslist;
+ if (vfslist == NULL) {
+ vfs_list_unlock();
+ kmem_free(dc, dc->lad_size);
+ return (0); /* return 0 if not a mount point */
+ }
+
+ do {
+ if (!(vfsp->vfs_flag & VFS_NOMNTTAB)) {
+ char *mntpt = (char *)refstr_value(vfsp->vfs_mntpt);
+
+ if (strcmp(dc->lad_path,
+ ZONE_PATH_TRANSLATE(mntpt, zone)) == 0) {
+
+ /*
+ * To handle direct mounts (on top of an autofs
+ * mount), we must prefer non-autofs vfs for
+ * this request.
+ */
+ if (fnd_vfs != NULL)
+ VFS_RELE(fnd_vfs);
+
+ fnd_vfs = vfsp;
+ VFS_HOLD(fnd_vfs)
+
+ if (fnd_vfs->vfs_op != lx_autofs_vfsops)
+ break;
+ }
+ }
+ vfsp = vfsp->vfs_zone_next;
+ } while (vfsp != vfslist);
+ vfs_list_unlock();
+
+ if (fnd_vfs == NULL) {
+ kmem_free(dc, dc->lad_size);
+ return (0); /* return 0 if not a mount point */
+ }
+
+ /*
+ * arg1 is device number, arg2 is superblock magic number
+ * The superblock value only matters if autofs or not.
+ */
+ dc->lad_arg1 = fnd_vfs->vfs_dev;
+ if (fnd_vfs->vfs_op == lx_autofs_vfsops) {
+ dc->lad_arg2 = LX_AUTOFS_SB_MAGIC;
+ } else {
+ dc->lad_arg2 = ~LX_AUTOFS_SB_MAGIC;
+ }
+
+ VFS_RELE(fnd_vfs);
+
+ if (copyout(dc, (caddr_t)arg, sizeof (lx_autofs_dv_ioctl_t)) != 0) {
+ kmem_free(dc, dc->lad_size);
+ return (EFAULT);
+ }
+
+ kmem_free(dc, dc->lad_size);
+
+ /*
+ * We have to return 1 if it is a mount point. The lx ioctl autofs
+ * translator will convert a negative value back to a positive,
+ * non-error return value.
+ */
+ return (-1);
+}
+
+static int
+lx_autofs_dev_askumount(intptr_t arg)
+{
+ int err;
+ int v;
+ lx_autofs_dv_ioctl_t dcmd;
+ vfs_t *vfsp;
+
+ if ((err = lx_autofs_dev_fd_preamble(arg, &dcmd, &vfsp)) != 0)
+ return (err);
+
+ if (lx_autofs_may_unmount(vfsp, kcred)) {
+ v = 0;
+ } else {
+ v = 1;
+ }
+ VFS_RELE(vfsp);
+
+ dcmd.lad_arg1 = v;
+ if (copyout(&dcmd, (caddr_t)arg, sizeof (dcmd)) != 0)
+ return (EFAULT);
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+lx_autofs_dev_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
+ int *rvalp)
+{
+ switch (cmd) {
+ case LX_AUTOFS_DEV_IOC_VERSION_CMD:
+ return (lx_autofs_dev_vers(arg));
+
+ case LX_AUTOFS_DEV_IOC_PROTOVER_CMD:
+ return (lx_autofs_dev_protver(arg));
+
+ case LX_AUTOFS_DEV_IOC_PROTOSUBVER_CMD:
+ return (lx_autofs_dev_protosubver(arg));
+
+ case LX_AUTOFS_DEV_IOC_OPENMOUNT_CMD:
+ return (lx_autofs_dev_openmount(arg));
+
+ case LX_AUTOFS_DEV_IOC_CLOSEMOUNT_CMD:
+ return (lx_autofs_dev_closemount(arg));
+
+ case LX_AUTOFS_DEV_IOC_READY_CMD:
+ return (lx_autofs_dev_ready(arg));
+
+ case LX_AUTOFS_DEV_IOC_FAIL_CMD:
+ return (lx_autofs_dev_fail(arg));
+
+ case LX_AUTOFS_DEV_IOC_SETPIPEFD_CMD:
+ return (lx_autofs_dev_setpipefd(arg));
+
+ case LX_AUTOFS_DEV_IOC_CATATONIC_CMD:
+ return (lx_autofs_dev_catatonic(arg));
+
+ case LX_AUTOFS_DEV_IOC_TIMEOUT_CMD:
+ return (lx_autofs_dev_timeout(arg));
+
+ case LX_AUTOFS_DEV_IOC_REQUESTER_CMD:
+ return (lx_autofs_dev_requestor(arg));
+
+ case LX_AUTOFS_DEV_IOC_EXPIRE_CMD:
+ return (lx_autofs_dev_expire(arg));
+
+ case LX_AUTOFS_DEV_IOC_ASKUMOUNT_CMD:
+ return (lx_autofs_dev_askumount(arg));
+
+ case LX_AUTOFS_DEV_IOC_ISMOUNTPOINT_CMD:
+ return (lx_autofs_dev_ismntpt(arg));
+ }
+
+ return (EINVAL);
+}
+
+/*
+ * lx_autofs_init() gets invoked via the mod_install() call in
+ * this module's _init() routine. Therefore, the code that cleans
+ * up the structures we allocate below is actually found in
+ * our _fini() routine.
+ */
+/* ARGSUSED */
+static int
+lx_autofs_init(int fstype, char *name)
+{
+ int error;
+
+ lx_autofs_major = ddi_name_to_major(LX_AUTOFS_NAME);
+
+ lx_autofs_fstype = fstype;
+ if ((error = vfs_setfsops(fstype, lx_autofs_vfstops,
+ &lx_autofs_vfsops)) != 0) {
+ cmn_err(CE_WARN, "lx_autofs_init: bad vfs ops template");
+ return (error);
+ }
+
+ if ((error = vn_make_ops(name, lx_autofs_tops_root,
+ &lx_autofs_vn_ops)) != 0) {
+ VERIFY(vfs_freevfsops_by_type(fstype) == 0);
+ lx_autofs_vn_ops = NULL;
+ return (error);
+ }
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+lx_autofs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int instance = ddi_get_instance(dip);
+
+ if (cmd != DDI_ATTACH)
+ return (DDI_FAILURE);
+
+ ASSERT(instance == 0);
+ if (instance != 0)
+ return (DDI_FAILURE);
+
+ /* create our minor node */
+ if (ddi_create_minor_node(dip, LX_AUTOFS_MINORNAME, S_IFCHR, 0,
+ DDI_PSEUDO, 0) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+
+ lx_autofs_dip = dip;
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+lx_autofs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ if (cmd != DDI_DETACH)
+ return (DDI_FAILURE);
+
+ lx_autofs_dip = NULL;
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+lx_autofs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
+ void **resultp)
+{
+ switch (infocmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ *resultp = lx_autofs_dip;
+ return (DDI_SUCCESS);
+
+ case DDI_INFO_DEVT2INSTANCE:
+ *resultp = (void *)0;
+ return (DDI_SUCCESS);
+ }
+ return (DDI_FAILURE);
+}
+
+/*
+ * Driver flags
+ */
+static struct cb_ops lx_autofs_cb_ops = {
+ lx_autofs_dev_open, /* open */
+ lx_autofs_dev_close, /* close */
+ nodev, /* strategy */
+ nodev, /* print */
+ nodev, /* dump */
+ nodev, /* read */
+ nodev, /* write */
+ lx_autofs_dev_ioctl, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ nochpoll, /* poll */
+ ddi_prop_op, /* vb_prop_op */
+ NULL, /* streamtab */
+ D_NEW | D_MP /* Driver compatibility flag */
+};
+
+/*
+ * Module linkage
+ */
+static mntopt_t lx_autofs_mntopt[] = {
+ { LX_MNTOPT_FD, NULL, 0, MO_HASVALUE },
+ { LX_MNTOPT_PGRP, NULL, 0, MO_HASVALUE },
+ { LX_MNTOPT_MINPROTO, NULL, 0, MO_HASVALUE },
+ { LX_MNTOPT_MAXPROTO, NULL, 0, MO_HASVALUE },
+ { LX_MNTOPT_INDIRECT, NULL, 0, 0 },
+ { LX_MNTOPT_DIRECT, NULL, 0, 0 },
+ { LX_MNTOPT_OFFSET, NULL, 0, 0 }
+};
+
+static mntopts_t lx_autofs_mntopts = {
+ sizeof (lx_autofs_mntopt) / sizeof (mntopt_t),
+ lx_autofs_mntopt
+};
+
+static vfsdef_t vfw = {
+ VFSDEF_VERSION,
+ LX_AUTOFS_NAME,
+ lx_autofs_init,
+ VSW_HASPROTO | VSW_VOLATILEDEV | VSW_ZMOUNT,
+ &lx_autofs_mntopts
+};
+
+static struct dev_ops lx_autofs_dev_ops = {
+ DEVO_REV, /* version */
+ 0, /* refcnt */
+ lx_autofs_info, /* info */
+ nulldev, /* identify */
+ nulldev, /* probe */
+ lx_autofs_attach, /* attach */
+ lx_autofs_detach, /* detach */
+ nodev, /* reset */
+ &lx_autofs_cb_ops, /* driver operations */
+ NULL, /* no bus operations */
+ NULL, /* power */
+ ddi_quiesce_not_needed /* quiesce */
+};
+
+extern struct mod_ops mod_fsops;
+
+static struct modlfs modlfs = {
+ &mod_fsops, "lx autofs filesystem", &vfw
+};
+
+static struct modldrv modldrv = {
+ &mod_driverops, "lx autofs driver", &lx_autofs_dev_ops
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ (void *)&modlfs,
+ (void *)&modldrv,
+ NULL
+};
+
+int
+_init(void)
+{
+ int error;
+
+ if ((error = mod_install(&modlinkage)) != 0) {
+ return (error);
+ }
+
+ return (0);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int error;
+
+ if ((error = mod_remove(&modlinkage)) != 0)
+ return (error);
+
+ if (lx_autofs_vn_ops != NULL) {
+ vn_freevnodeops(lx_autofs_vn_ops);
+ lx_autofs_vn_ops = NULL;
+ }
+
+ /*
+ * In our init routine, if we get an error after calling
+ * vfs_setfsops() we cleanup by calling vfs_freevfsops_by_type().
+ * But we don't need to call vfs_freevfsops_by_type() here
+ * because the fs framework did this for us as part of the
+ * mod_remove() call above.
+ */
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/autofs/lxautofs.conf b/usr/src/uts/common/brand/lx/autofs/lxautofs.conf
new file mode 100644
index 0000000000..36e0119e33
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/autofs/lxautofs.conf
@@ -0,0 +1,14 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# Copyright 2016 Joyent, Inc.
+#
+
+name="lxautofs" parent="pseudo" instance=0;
diff --git a/usr/src/uts/common/brand/lx/cgroups/cgrps.h b/usr/src/uts/common/brand/lx/cgroups/cgrps.h
new file mode 100644
index 0000000000..df938adcea
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/cgroups/cgrps.h
@@ -0,0 +1,223 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#ifndef _LXCGRPS_H
+#define _LXCGRPS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * cgrps.h: declarations, data structures and macros for lx_cgroup
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/policy.h>
+#include <sys/dirent.h>
+#include <sys/errno.h>
+#include <sys/kmem.h>
+#include <sys/pathname.h>
+#include <sys/systm.h>
+#include <sys/var.h>
+#include <sys/sysmacros.h>
+#include <sys/cred.h>
+#include <sys/priv.h>
+#include <sys/vnode.h>
+#include <sys/vfs.h>
+#include <sys/statvfs.h>
+#include <sys/cmn_err.h>
+#include <sys/zone.h>
+#include <sys/uio.h>
+#include <sys/utsname.h>
+#include <sys/atomic.h>
+#include <vm/anon.h>
+
+/*
+ * cgrpmgr ioctl interface.
+ */
+#define CGRPFS_IOC ('C' << 16 | 'G' << 8)
+#define CGRPFS_GETEVNT (CGRPFS_IOC | 1)
+
+typedef struct cgrpmgr_info {
+ pid_t cgmi_pid;
+ char *cgmi_rel_agent_path;
+ char *cgmi_cgroup_path;
+} cgrpmgr_info_t;
+
+#if defined(_KERNEL)
+
+#include <sys/lx_brand.h>
+
+typedef struct cgrpmgr_info32 {
+ pid_t cgmi_pid;
+ caddr32_t cgmi_rel_agent_path;
+ caddr32_t cgmi_cgroup_path;
+} cgrpmgr_info32_t;
+
+#define CG_PSNSIZE 256 /* max size of pseudo file name entries */
+#define CG_PSDSIZE 16 /* pretend that a dir entry takes 16 bytes */
+
+/*
+ * The order of these entries must be in sync with the cg_ssde_dir array.
+ */
+typedef enum cgrp_ssid {
+ CG_SSID_GENERIC = 1,
+ CG_SSID_NUM /* last ssid for range checking */
+} cgrp_ssid_t;
+
+typedef enum cgrp_nodetype {
+ CG_CGROUP_DIR = 1, /* cgroup directory entry */
+ CG_NOTIFY, /* notify_on_release file */
+ CG_PROCS, /* cgroup.procs file */
+ CG_REL_AGENT, /* release_agent file */
+ CG_TASKS, /* tasks file */
+} cgrp_nodetype_t;
+
+typedef struct cgrp_subsys_dirent {
+ cgrp_nodetype_t cgrp_ssd_type;
+ char *cgrp_ssd_name;
+} cgrp_subsys_dirent_t;
+
+#define N_DIRENTS(m) (cgrp_num_pseudo_ents((m)->cg_ssid) + 2)
+
+/*
+ * A modern systemd-based Linux system typically has 50-60 cgroups so
+ * we size the hash for 2x that number.
+ */
+#define CGRP_HASH_SZ 128
+#define CGRP_AGENT_LEN (MAXPATHLEN + 1)
+
+/*
+ * cgroups per-mount data structure.
+ *
+ * All but the event related fields are protected by cg_contents.
+ * The evnt_list and counter is protected by cg_events.
+ */
+typedef struct cgrp_mnt {
+ struct vfs *cg_vfsp; /* filesystem's vfs struct */
+ struct cgrp_node *cg_rootnode; /* root cgrp_node */
+ char *cg_mntpath; /* name of cgroup mount point */
+ cgrp_ssid_t cg_ssid; /* subsystem type */
+ dev_t cg_dev; /* unique dev # of mounted `device' */
+ uint_t cg_gen; /* node ID source for files */
+ uint_t cg_grp_gen; /* ID source for cgroups */
+ kmutex_t cg_contents; /* global lock for most fs activity */
+ char cg_agent[CGRP_AGENT_LEN]; /* release_agent path */
+ /* ptr to zone data for containing zone */
+ lx_zone_data_t *cg_lxzdata;
+ struct cgrp_node **cg_grp_hash; /* hash list of cgroups in the fs */
+} cgrp_mnt_t;
+
+/*
+ * cgrp_node is the file system dependent node for cgroups.
+ *
+ * The node is used to represent both directories (a cgroup) and pseudo files
+ * within the directory.
+ *
+ * Members are tagged in the comment to note which type of node they apply to:
+ * A - all
+ * D - dir (i.e. a cgroup)
+ * F - pseudo file
+ */
+
+typedef struct cgrp_node {
+ struct cgrp_node *cgn_back; /* A lnked lst of cgrp_nodes */
+ struct cgrp_node *cgn_forw; /* A lnked lst of cgrp_nodes */
+ struct cgrp_dirent *cgn_dir; /* D dirent list */
+ struct cgrp_node *cgn_parent; /* A dir containing this node */
+ struct cgrp_node *cgn_next; /* D link in per-mount cgroup */
+ /* hash table */
+ uint_t cgn_dirents; /* D number of dirents */
+ cgrp_nodetype_t cgn_type; /* A type for this node */
+ uint_t cgn_notify; /* D notify_on_release value */
+ uint_t cgn_task_cnt; /* D number of threads in grp */
+ struct vnode *cgn_vnode; /* A vnode for this cgrp_node */
+ uint_t cgn_id; /* D ID number for the cgroup */
+ struct vattr cgn_attr; /* A attributes */
+} cgrp_node_t;
+
+/*
+ * File system independent to cgroups conversion macros
+ */
+#define VFSTOCGM(vfsp) ((cgrp_mnt_t *)(vfsp)->vfs_data)
+#define VTOCGM(vp) ((cgrp_mnt_t *)(vp)->v_vfsp->vfs_data)
+#define VTOCGN(vp) ((struct cgrp_node *)(vp)->v_data)
+#define CGNTOV(cn) ((cn)->cgn_vnode)
+#define cgnode_hold(cn) VN_HOLD(CGNTOV(cn))
+#define cgnode_rele(cn) VN_RELE(CGNTOV(cn))
+
+/*
+ * Attributes
+ */
+#define cgn_mask cgn_attr.va_mask
+#define cgn_mode cgn_attr.va_mode
+#define cgn_uid cgn_attr.va_uid
+#define cgn_gid cgn_attr.va_gid
+#define cgn_fsid cgn_attr.va_fsid
+#define cgn_nodeid cgn_attr.va_nodeid
+#define cgn_nlink cgn_attr.va_nlink
+#define cgn_size cgn_attr.va_size
+#define cgn_atime cgn_attr.va_atime
+#define cgn_mtime cgn_attr.va_mtime
+#define cgn_ctime cgn_attr.va_ctime
+#define cgn_rdev cgn_attr.va_rdev
+#define cgn_blksize cgn_attr.va_blksize
+#define cgn_nblocks cgn_attr.va_nblocks
+#define cgn_seq cgn_attr.va_seq
+
+/*
+ * cgroup directories are made up of a linked list of cg_dirent structures
+ * hanging off directory cgrp_nodes. File names are not fixed length,
+ * but are null terminated.
+ */
+typedef struct cgrp_dirent {
+ struct cgrp_node *cgd_cgrp_node; /* cg node for this file */
+ struct cgrp_dirent *cgd_next; /* next directory entry */
+ struct cgrp_dirent *cgd_prev; /* prev directory entry */
+ uint_t cgd_offset; /* "offset" of dir entry */
+ uint_t cgd_hash; /* a hash of cgd_name */
+ struct cgrp_dirent *cgd_link; /* linked via hash table */
+ struct cgrp_node *cgd_parent; /* parent, dir we are in */
+ char *cgd_name; /* null terminated */
+} cgrp_dirent_t;
+
+enum de_op { DE_CREATE, DE_MKDIR, DE_RENAME }; /* direnter ops */
+enum dr_op { DR_REMOVE, DR_RMDIR, DR_RENAME }; /* dirremove ops */
+
+extern struct vnodeops *cgrp_vnodeops;
+
+int cgrp_dirdelete(cgrp_node_t *, cgrp_node_t *, char *, enum dr_op, cred_t *);
+int cgrp_direnter(cgrp_mnt_t *, cgrp_node_t *, char *, enum de_op,
+ cgrp_node_t *, struct vattr *, cgrp_node_t **, cred_t *,
+ caller_context_t *);
+void cgrp_dirinit(cgrp_node_t *, cgrp_node_t *, cred_t *);
+int cgrp_dirlookup(cgrp_node_t *, char *, cgrp_node_t **, cred_t *);
+void cgrp_dirtrunc(cgrp_node_t *);
+void cgrp_node_init(cgrp_mnt_t *, cgrp_node_t *, vattr_t *, cred_t *);
+int cgrp_taccess(void *, int, cred_t *);
+ino_t cgrp_inode(cgrp_nodetype_t, unsigned int);
+int cgrp_num_pseudo_ents(cgrp_ssid_t);
+cgrp_node_t *cgrp_cg_hash_lookup(cgrp_mnt_t *, uint_t);
+void cgrp_rel_agent_event(cgrp_mnt_t *, cgrp_node_t *);
+
+#endif /* KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LXCGRPS_H */
diff --git a/usr/src/uts/common/brand/lx/cgroups/cgrps_node.c b/usr/src/uts/common/brand/lx/cgroups/cgrps_node.c
new file mode 100644
index 0000000000..8950be1966
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/cgroups/cgrps_node.c
@@ -0,0 +1,1019 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/vfs.h>
+#include <sys/vnode.h>
+#include <sys/errno.h>
+#include <sys/cmn_err.h>
+#include <sys/cred.h>
+#include <sys/stat.h>
+#include <sys/mode.h>
+#include <sys/policy.h>
+#include <sys/sdt.h>
+
+#include "cgrps.h"
+
+static int cgrp_dirmakecgnode(cgrp_node_t *, cgrp_mnt_t *, struct vattr *,
+ enum de_op, cgrp_node_t **, struct cred *);
+static int cgrp_diraddentry(cgrp_node_t *, cgrp_node_t *, char *, enum de_op);
+
+static cgrp_subsys_dirent_t cgrp_generic_dir[] = {
+ { CG_PROCS, "cgroup.procs" },
+ { CG_NOTIFY, "notify_on_release" },
+ { CG_TASKS, "tasks" }
+};
+
+typedef struct cgrp_ssde {
+ cgrp_subsys_dirent_t *cg_ssde_files;
+ int cg_ssde_nfiles;
+} cgrp_ssde_t;
+
+#define CGDIRLISTSZ(l) (sizeof (l) / sizeof ((l)[0]))
+
+/*
+ * Note, these entries must be in the same order as the cgrp_ssid_t entries.
+ */
+static cgrp_ssde_t cg_ssde_dir[] = {
+ /* subsystems start at 1 */
+ {NULL, 0},
+
+ /* CG_SSID_GENERIC */
+ {cgrp_generic_dir, CGDIRLISTSZ(cgrp_generic_dir)},
+};
+
+
+#define CG_HASH_SIZE 8192 /* must be power of 2 */
+#define CG_MUTEX_SIZE 64
+
+static cgrp_dirent_t *cg_hashtable[CG_HASH_SIZE];
+static kmutex_t cg_hashmutex[CG_MUTEX_SIZE];
+
+#define CG_HASH_INDEX(a) ((a) & (CG_HASH_SIZE-1))
+#define CG_MUTEX_INDEX(a) ((a) & (CG_MUTEX_SIZE-1))
+
+#define CG_HASH(cp, name, hash) \
+ { \
+ char Xc, *Xcp; \
+ hash = (uint_t)(uintptr_t)(cp) >> 8; \
+ for (Xcp = (name); (Xc = *Xcp) != 0; Xcp++) \
+ hash = (hash << 4) + hash + (uint_t)Xc; \
+ }
+
+#define MODESHIFT 3
+
+typedef enum cgrp_nodehold {
+ NOHOLD,
+ HOLD
+} cgrp_nodehold_t;
+
+void
+cgrp_hash_init(void)
+{
+ int i;
+
+ for (i = 0; i < CG_MUTEX_SIZE; i++)
+ mutex_init(&cg_hashmutex[i], NULL, MUTEX_DEFAULT, NULL);
+}
+
+static void
+cgrp_hash_in(cgrp_dirent_t *c)
+{
+ uint_t hash;
+ cgrp_dirent_t **prevpp;
+ kmutex_t *cg_hmtx;
+
+ CG_HASH(c->cgd_parent, c->cgd_name, hash);
+ c->cgd_hash = hash;
+ prevpp = &cg_hashtable[CG_HASH_INDEX(hash)];
+ cg_hmtx = &cg_hashmutex[CG_MUTEX_INDEX(hash)];
+ mutex_enter(cg_hmtx);
+ c->cgd_link = *prevpp;
+ *prevpp = c;
+ mutex_exit(cg_hmtx);
+}
+
+static void
+cgrp_hash_out(cgrp_dirent_t *c)
+{
+ uint_t hash;
+ cgrp_dirent_t **prevpp;
+ kmutex_t *cg_hmtx;
+
+ hash = c->cgd_hash;
+ prevpp = &cg_hashtable[CG_HASH_INDEX(hash)];
+ cg_hmtx = &cg_hashmutex[CG_MUTEX_INDEX(hash)];
+ mutex_enter(cg_hmtx);
+ while (*prevpp != c)
+ prevpp = &(*prevpp)->cgd_link;
+ *prevpp = c->cgd_link;
+ mutex_exit(cg_hmtx);
+}
+
+static cgrp_dirent_t *
+cgrp_hash_lookup(char *name, cgrp_node_t *parent, cgrp_nodehold_t hold,
+ cgrp_node_t **found)
+{
+ cgrp_dirent_t *l;
+ uint_t hash;
+ kmutex_t *cg_hmtx;
+ cgrp_node_t *cnp;
+
+ CG_HASH(parent, name, hash);
+ cg_hmtx = &cg_hashmutex[CG_MUTEX_INDEX(hash)];
+ mutex_enter(cg_hmtx);
+ l = cg_hashtable[CG_HASH_INDEX(hash)];
+ while (l) {
+ if ((l->cgd_hash == hash) &&
+ (l->cgd_parent == parent) &&
+ (strcmp(l->cgd_name, name) == 0)) {
+ /*
+ * We need to make sure that the cgrp_node that
+ * we put a hold on is the same one that we pass back.
+ * Hence, temporary variable cnp is necessary.
+ */
+ cnp = l->cgd_cgrp_node;
+ if (hold == HOLD) {
+ ASSERT(cnp);
+ cgnode_hold(cnp);
+ }
+ if (found)
+ *found = cnp;
+ mutex_exit(cg_hmtx);
+ return (l);
+ } else {
+ l = l->cgd_link;
+ }
+ }
+ mutex_exit(cg_hmtx);
+ return (NULL);
+}
+
+/*
+ * The following functions maintain the per-mount cgroup hash table.
+ */
+static void
+cgrp_cg_hash_insert(cgrp_mnt_t *cgm, cgrp_node_t *cn)
+{
+ uint_t cgid;
+ int hsh;
+
+ ASSERT(MUTEX_HELD(&cgm->cg_contents));
+
+ cgid = cn->cgn_id;
+ hsh = cgid % CGRP_HASH_SZ;
+
+ cn->cgn_next = cgm->cg_grp_hash[hsh];
+ cgm->cg_grp_hash[hsh] = cn;
+}
+
+static void
+cgrp_cg_hash_remove(cgrp_mnt_t *cgm, cgrp_node_t *cn)
+{
+ uint_t cgid;
+ int hsh;
+ cgrp_node_t *np = NULL, *curp, *prevp = NULL;
+
+ ASSERT(MUTEX_HELD(&cgm->cg_contents));
+
+ cgid = cn->cgn_id;
+ hsh = cgid % CGRP_HASH_SZ;
+
+ for (curp = cgm->cg_grp_hash[hsh]; curp != NULL;
+ curp = curp->cgn_next) {
+ if (curp->cgn_id == cgid) {
+ if (prevp == NULL) {
+ cgm->cg_grp_hash[hsh] = curp->cgn_next;
+ } else {
+ prevp->cgn_next = curp->cgn_next;
+ }
+ np = curp;
+ np->cgn_next = NULL;
+ break;
+ }
+
+ prevp = curp;
+ }
+
+ ASSERT(np != NULL);
+ ASSERT(np->cgn_task_cnt == 0);
+}
+
+/*
+ * Count up the number of threads already running in the zone and initialize the
+ * first cgroup's task counter.
+ *
+ * We have to look at all of the processes to find applicable ones.
+ */
+static void
+cgrp_cg_hash_init(cgrp_mnt_t *cgm, cgrp_node_t *cn)
+{
+ int i;
+ int cnt = 0;
+ zoneid_t zoneid = curproc->p_zone->zone_id;
+ pid_t schedpid = curproc->p_zone->zone_zsched->p_pid;
+
+ ASSERT(MUTEX_HELD(&cgm->cg_contents));
+
+ /* Scan all of the process entries */
+ mutex_enter(&pidlock);
+ for (i = 1; i < v.v_proc; i++) {
+ proc_t *p;
+
+ /*
+ * Skip indices for which there is no pid_entry, PIDs for
+ * which there is no corresponding process, system processes,
+ * a PID of 0, the pid for our zsched process, anything the
+ * security policy doesn't allow us to look at, its not an
+ * lx-branded process and processes that are not in the zone.
+ */
+ if ((p = pid_entry(i)) == NULL ||
+ p->p_stat == SIDL ||
+ (p->p_flag & SSYS) != 0 ||
+ p->p_pid == 0 ||
+ p->p_pid == schedpid ||
+ secpolicy_basic_procinfo(CRED(), p, curproc) != 0 ||
+ p->p_zone->zone_id != zoneid) {
+ continue;
+ }
+
+ mutex_enter(&p->p_lock);
+ if (p->p_brand != &lx_brand) {
+ mutex_exit(&p->p_lock);
+ continue;
+ }
+ cnt += p->p_lwpcnt;
+ mutex_exit(&p->p_lock);
+ }
+
+ /*
+ * There should be at least the init process with 1 thread in the zone
+ */
+ ASSERT(cnt > 0);
+ cn->cgn_task_cnt = cnt;
+
+ DTRACE_PROBE2(cgrp__grp__init, void *, cn, int, cnt);
+
+ mutex_exit(&pidlock);
+}
+
+cgrp_node_t *
+cgrp_cg_hash_lookup(cgrp_mnt_t *cgm, uint_t cgid)
+{
+ int hsh = cgid % CGRP_HASH_SZ;
+ cgrp_node_t *curp;
+
+ ASSERT(MUTEX_HELD(&cgm->cg_contents));
+
+ for (curp = cgm->cg_grp_hash[hsh]; curp != NULL;
+ curp = curp->cgn_next) {
+ if (curp->cgn_id == cgid) {
+ return (curp);
+ }
+ }
+
+ return (NULL);
+}
+
+/*
+ * Calculate an inode number
+ *
+ * This takes various bits of info and munges them to give the inode number for
+ * a cgrp pseudo file node.
+ */
+ino_t
+cgrp_inode(cgrp_nodetype_t type, unsigned int cgrpid)
+{
+ /*
+ * cgroup inode format:
+ * 00000000AABBBBBB
+ *
+ * AA - node type (from subsystem list)
+ * BBBBBB - id of the cgroup
+ */
+
+ return ((ino_t)(type << 24) | (cgrpid & 0xffffff));
+}
+
+/*
+ * Return the number of pseudo file entries in a cgroup directory for the
+ * given subsystem.
+ */
+int
+cgrp_num_pseudo_ents(cgrp_ssid_t ssid)
+{
+ cgrp_ssde_t *ssdp = &cg_ssde_dir[ssid];
+
+ return (ssdp->cg_ssde_nfiles);
+}
+
+int
+cgrp_taccess(void *vcp, int mode, cred_t *cred)
+{
+ cgrp_node_t *cn = vcp;
+ int shift = 0;
+ /*
+ * Check access based on owner, group and public perms in cgrp_node.
+ */
+ if (crgetuid(cred) != cn->cgn_uid) {
+ shift += MODESHIFT;
+ if (groupmember(cn->cgn_gid, cred) == 0)
+ shift += MODESHIFT;
+ }
+
+ return (secpolicy_vnode_access2(cred, CGNTOV(cn), cn->cgn_uid,
+ cn->cgn_mode << shift, mode));
+}
+
+/*
+ * Search directory 'parent' for entry 'name'.
+ *
+ * 0 is returned on success and *foundcp points
+ * to the found cgrp_node with its vnode held.
+ */
+int
+cgrp_dirlookup(cgrp_node_t *parent, char *name, cgrp_node_t **foundcp,
+ cred_t *cred)
+{
+ cgrp_mnt_t *cgm = VTOCGM(parent->cgn_vnode);
+ int error;
+
+ ASSERT(MUTEX_HELD(&cgm->cg_contents));
+ *foundcp = NULL;
+ if (parent->cgn_type != CG_CGROUP_DIR)
+ return (ENOTDIR);
+
+ if ((error = cgrp_taccess(parent, VEXEC, cred)))
+ return (error);
+
+ if (*name == '\0') {
+ cgnode_hold(parent);
+ *foundcp = parent;
+ return (0);
+ }
+
+ /*
+ * Search the directory for the matching name
+ * We need the lock protecting the cgn_dir list
+ * so that it doesn't change out from underneath us.
+ * cgrp_hash_lookup() will pass back the cgrp_node
+ * with a hold on it.
+ */
+
+ if (cgrp_hash_lookup(name, parent, HOLD, foundcp) != NULL) {
+ ASSERT(*foundcp);
+ return (0);
+ }
+
+ return (ENOENT);
+}
+
+/*
+ * Enter a directory entry for 'name' and 'cp' into directory 'dir'
+ *
+ * Returns 0 on success.
+ */
+int
+cgrp_direnter(
+ cgrp_mnt_t *cgm,
+ cgrp_node_t *dir, /* target directory to make entry in */
+ char *name, /* name of entry */
+ enum de_op op, /* entry operation */
+ cgrp_node_t *cn, /* existing cgrp_node, if rename */
+ struct vattr *va,
+ cgrp_node_t **cnp, /* return cgrp_node, if create/mkdir */
+ cred_t *cred,
+ caller_context_t *ctp)
+{
+ cgrp_dirent_t *cdp;
+ cgrp_node_t *found = NULL;
+ int error = 0;
+ char *s;
+
+ ASSERT(MUTEX_HELD(&cgm->cg_contents));
+ ASSERT(dir->cgn_type == CG_CGROUP_DIR);
+
+ /*
+ * Don't allow '/' characters in pathname component,
+ */
+ for (s = name; *s; s++)
+ if (*s == '/')
+ return (EACCES);
+
+ if (name[0] == '\0')
+ panic("cgrp_direnter: NULL name");
+
+ /*
+ * For rename lock the source entry and check the link count
+ * to see if it has been removed while it was unlocked.
+ * Remember that we can only rename within the same directory.
+ */
+ if (op == DE_RENAME) {
+ if (cn->cgn_nlink == 0) {
+ return (ENOENT);
+ }
+
+ if (cn->cgn_nlink == MAXLINK) {
+ return (EMLINK);
+ }
+ cn->cgn_nlink++;
+ gethrestime(&cn->cgn_ctime);
+ }
+
+ /*
+ * This might be a "dangling detached directory".
+ * it could have been removed, but a reference
+ * to it kept in u_cwd. don't bother searching
+ * it, and with any luck the user will get tired
+ * of dealing with us and cd to some absolute
+ * pathway. *sigh*, thus in ufs, too.
+ */
+ if (dir->cgn_nlink == 0) {
+ error = ENOENT;
+ goto out;
+ }
+
+ /*
+ * Search for the entry. In all cases it is an error if it exists.
+ */
+ cdp = cgrp_hash_lookup(name, dir, HOLD, &found);
+
+ if (cdp) {
+ ASSERT(found != NULL);
+ error = EEXIST;
+ mutex_exit(&cgm->cg_contents);
+ cgnode_rele(found);
+ mutex_enter(&cgm->cg_contents);
+ } else {
+
+ /*
+ * The entry does not exist. Check write permission in
+ * directory to see if entry can be created.
+ */
+ if ((error = cgrp_taccess(dir, VWRITE, cred)) != 0)
+ goto out;
+ if (op == DE_CREATE || op == DE_MKDIR) {
+ /*
+ * Make new cgrp_node and directory entry as required.
+ */
+ error = cgrp_dirmakecgnode(dir, cgm, va, op, &cn, cred);
+ if (error)
+ goto out;
+
+ if (op == DE_MKDIR) {
+ /*
+ * inherit notify_on_release value from parent
+ */
+ cn->cgn_notify = dir->cgn_notify;
+ }
+ }
+
+ error = cgrp_diraddentry(dir, cn, name, op);
+ if (error != 0) {
+ if (op == DE_CREATE || op == DE_MKDIR) {
+ /*
+ * Unmake the inode we just made.
+ */
+ if ((cn->cgn_type) == CG_CGROUP_DIR) {
+ ASSERT(cdp == NULL);
+ /*
+ * cleanup allocs made by cgrp_dirinit
+ */
+ cgrp_dirtrunc(cn);
+ }
+ cn->cgn_nlink = 0;
+ gethrestime(&cn->cgn_ctime);
+ mutex_exit(&cgm->cg_contents);
+ cgnode_rele(cn);
+ mutex_enter(&cgm->cg_contents);
+ cn = NULL;
+ }
+ } else if (cnp) {
+ *cnp = cn;
+ } else if (op == DE_CREATE || op == DE_MKDIR) {
+ mutex_exit(&cgm->cg_contents);
+ cgnode_rele(cn);
+ mutex_enter(&cgm->cg_contents);
+ }
+ }
+
+out:
+ if (error && op == DE_RENAME) {
+ /* Undo bumped link count. */
+ cn->cgn_nlink--;
+ gethrestime(&cn->cgn_ctime);
+ }
+ return (error);
+}
+
+/*
+ * Delete entry cn of name "nm" from parent dir. This is used to both remove
+ * a cgroup directory and to remove the pseudo file nodes within the cgroup
+ * directory (by recursively calling itself). It frees the dir entry space
+ * and decrements link count on cgrp_node(s).
+ *
+ * Return 0 on success.
+ */
+int
+cgrp_dirdelete(cgrp_node_t *dir, cgrp_node_t *cn, char *nm, enum dr_op op,
+ cred_t *cred)
+{
+ cgrp_mnt_t *cgm = VTOCGM(cn->cgn_vnode);
+ cgrp_dirent_t *cndp;
+ int error;
+ size_t namelen;
+ cgrp_node_t *cnnp;
+ timestruc_t now;
+
+ ASSERT(MUTEX_HELD(&cgm->cg_contents));
+
+ if (nm[0] == '\0')
+ panic("cgrp_dirdelete: empty name for 0x%p", (void *)cn);
+
+ /*
+ * return error when removing . and ..
+ */
+ if (nm[0] == '.') {
+ if (nm[1] == '\0')
+ return (EINVAL);
+ if (nm[1] == '.' && nm[2] == '\0')
+ return (EEXIST); /* thus in ufs */
+ }
+
+ if ((error = cgrp_taccess(dir, VEXEC|VWRITE, cred)) != 0)
+ return (error);
+
+ if (dir->cgn_dir == NULL)
+ return (ENOENT);
+
+ if (op == DR_RMDIR) {
+ /*
+ * This is the top-level removal of a cgroup dir. Start by
+ * removing the fixed pseudo file entries from the dir. We do
+ * this by recursively calling back into this function with
+ * a different op code. The caller of this function has
+ * already verified that it is safe to remove this directory.
+ */
+ cgrp_dirent_t *cdp;
+
+ ASSERT(cn->cgn_type == CG_CGROUP_DIR);
+
+ cdp = cn->cgn_dir;
+ while (cdp) {
+ cgrp_node_t *pseudo_node;
+ cgrp_dirent_t *nextp;
+
+ if (strcmp(cdp->cgd_name, ".") == 0 ||
+ strcmp(cdp->cgd_name, "..") == 0) {
+ cdp = cdp->cgd_next;
+ continue;
+ }
+
+ pseudo_node = cdp->cgd_cgrp_node;
+ nextp = cdp->cgd_next;
+
+ cgnode_hold(pseudo_node);
+ error = cgrp_dirdelete(cn, pseudo_node,
+ cdp->cgd_name, DR_REMOVE, cred);
+ mutex_exit(&cgm->cg_contents);
+ cgnode_rele(pseudo_node);
+ mutex_enter(&cgm->cg_contents);
+
+ cdp = nextp;
+ }
+
+ cgrp_cg_hash_remove(cgm, cn);
+ }
+
+ cndp = cgrp_hash_lookup(nm, dir, NOHOLD, &cnnp);
+ VERIFY(cndp != NULL);
+ VERIFY(cn == cnnp);
+
+ cgrp_hash_out(cndp);
+
+ /* Take cndp out of the directory list. */
+ ASSERT(cndp->cgd_next != cndp);
+ ASSERT(cndp->cgd_prev != cndp);
+ if (cndp->cgd_prev) {
+ cndp->cgd_prev->cgd_next = cndp->cgd_next;
+ }
+ if (cndp->cgd_next) {
+ cndp->cgd_next->cgd_prev = cndp->cgd_prev;
+ }
+
+ /*
+ * If the roving slot pointer happens to match cndp,
+ * point it at the previous dirent.
+ */
+ if (dir->cgn_dir->cgd_prev == cndp) {
+ dir->cgn_dir->cgd_prev = cndp->cgd_prev;
+ }
+ ASSERT(cndp->cgd_next != cndp);
+ ASSERT(cndp->cgd_prev != cndp);
+
+ /* cndp points to the correct directory entry */
+ namelen = strlen(cndp->cgd_name) + 1;
+
+ kmem_free(cndp, sizeof (cgrp_dirent_t) + namelen);
+ dir->cgn_size -= (sizeof (cgrp_dirent_t) + namelen);
+ dir->cgn_dirents--;
+
+ gethrestime(&now);
+ dir->cgn_mtime = now;
+ dir->cgn_ctime = now;
+ cn->cgn_ctime = now;
+
+ ASSERT(cn->cgn_nlink > 0);
+ cn->cgn_nlink--;
+ if (op == DR_RMDIR && cn->cgn_type == CG_CGROUP_DIR) {
+ cgrp_dirtrunc(cn);
+ ASSERT(cn->cgn_nlink == 0);
+ }
+ return (0);
+}
+
+/*
+ * Initialize a cgrp_node and add it to file list under mount point.
+ */
+void
+cgrp_node_init(cgrp_mnt_t *cgm, cgrp_node_t *cn, vattr_t *vap, cred_t *cred)
+{
+ struct vnode *vp;
+ timestruc_t now;
+
+ ASSERT(MUTEX_HELD(&cgm->cg_contents));
+ ASSERT(vap != NULL);
+
+ cn->cgn_mode = MAKEIMODE(vap->va_type, vap->va_mode);
+ cn->cgn_mask = 0;
+ cn->cgn_attr.va_type = vap->va_type;
+ cn->cgn_nlink = 1;
+ cn->cgn_size = 0;
+
+ if (cred == NULL) {
+ cn->cgn_uid = vap->va_uid;
+ cn->cgn_gid = vap->va_gid;
+ } else {
+ cn->cgn_uid = crgetuid(cred);
+ cn->cgn_gid = crgetgid(cred);
+ }
+
+ cn->cgn_fsid = cgm->cg_dev;
+ cn->cgn_rdev = vap->va_rdev;
+ cn->cgn_blksize = PAGESIZE;
+ cn->cgn_nblocks = 0;
+ gethrestime(&now);
+ cn->cgn_atime = now;
+ cn->cgn_mtime = now;
+ cn->cgn_ctime = now;
+ cn->cgn_seq = 0;
+ cn->cgn_dir = NULL;
+
+ cn->cgn_vnode = vn_alloc(KM_SLEEP);
+ vp = CGNTOV(cn);
+ vn_setops(vp, cgrp_vnodeops);
+ vp->v_vfsp = cgm->cg_vfsp;
+ vp->v_type = vap->va_type;
+ vp->v_rdev = vap->va_rdev;
+ vp->v_data = (caddr_t)cn;
+
+ cn->cgn_nodeid = cgm->cg_gen++;
+
+ /*
+ * Add new cgrp_node to end of linked list of cgrp_nodes for this
+ * cgroup fs. Root directory is handled specially in cgrp_mount.
+ */
+ if (cgm->cg_rootnode != (cgrp_node_t *)NULL) {
+ cn->cgn_forw = NULL;
+ cn->cgn_back = cgm->cg_rootnode->cgn_back;
+ cn->cgn_back->cgn_forw = cgm->cg_rootnode->cgn_back = cn;
+ }
+ vn_exists(vp);
+}
+
+void
+cgrp_addnode(cgrp_mnt_t *cgm, cgrp_node_t *dir, char *name,
+ cgrp_nodetype_t type, struct vattr *nattr, cred_t *cr)
+{
+ cgrp_node_t *ncn;
+
+ ASSERT(MUTEX_HELD(&cgm->cg_contents));
+
+ cgrp_direnter(cgm, dir, name, DE_CREATE, (cgrp_node_t *)NULL, nattr,
+ &ncn, cr, NULL);
+
+ /*
+ * Fix the inode and assign the pseudo file type to be correct.
+ */
+ ncn->cgn_nodeid = cgrp_inode(type, dir->cgn_nodeid);
+ ncn->cgn_type = type;
+
+ /*
+ * Since we're creating these entries here and not via the
+ * normal VOP_CREATE code path, we need to do the rele to drop
+ * our hold. This will leave the vnode v_count at 0 when we
+ * come out of cgrp_inactive but we won't reclaim the vnode
+ * there since the cgn_nlink value will still be 1.
+ */
+ mutex_exit(&cgm->cg_contents);
+ cgnode_rele(ncn);
+ mutex_enter(&cgm->cg_contents);
+}
+
+/*
+ * cgrp_dirinit is used internally to initialize a directory (dir)
+ * with '.' and '..' entries without checking permissions and locking
+ * It also creates the entries for the pseudo file nodes that reside in the
+ * directory.
+ */
+void
+cgrp_dirinit(cgrp_node_t *parent, cgrp_node_t *dir, cred_t *cr)
+{
+ cgrp_dirent_t *dot, *dotdot;
+ timestruc_t now;
+ cgrp_mnt_t *cgm = VTOCGM(dir->cgn_vnode);
+ cgrp_ssde_t *ssdp;
+ cgrp_subsys_dirent_t *pseudo_files;
+ struct vattr nattr;
+ int i;
+
+ ASSERT(MUTEX_HELD(&cgm->cg_contents));
+ ASSERT(dir->cgn_type == CG_CGROUP_DIR);
+
+ ASSERT(cgm->cg_ssid > 0 && cgm->cg_ssid < CG_SSID_NUM);
+ ssdp = &cg_ssde_dir[cgm->cg_ssid];
+
+ /*
+ * If this is the top-level cgroup created by the mount then we need to
+ * count up the number of procs and tasks already running in the zone.
+ */
+
+ /*
+ * Set the cgroup ID for this cgrp_node by using a counter on each
+ * mount.
+ */
+ dir->cgn_id = cgm->cg_grp_gen++;
+ cgrp_cg_hash_insert(cgm, dir);
+ /* Initialise the first cgroup if this is top-level group */
+ if (parent == dir)
+ cgrp_cg_hash_init(cgm, dir);
+
+ /*
+ * Initialize the entries
+ */
+ dot = kmem_zalloc(sizeof (cgrp_dirent_t) + 2, KM_SLEEP);
+ dot->cgd_cgrp_node = dir;
+ dot->cgd_offset = 0;
+ dot->cgd_name = (char *)dot + sizeof (cgrp_dirent_t);
+ dot->cgd_name[0] = '.';
+ dot->cgd_parent = dir;
+ cgrp_hash_in(dot);
+
+ dotdot = kmem_zalloc(sizeof (cgrp_dirent_t) + 3, KM_SLEEP);
+ dotdot->cgd_cgrp_node = parent;
+ dotdot->cgd_offset = 1;
+ dotdot->cgd_name = (char *)dotdot + sizeof (cgrp_dirent_t);
+ dotdot->cgd_name[0] = '.';
+ dotdot->cgd_name[1] = '.';
+ dotdot->cgd_parent = dir;
+ cgrp_hash_in(dotdot);
+
+ /*
+ * Initialize directory entry list.
+ */
+ dot->cgd_next = dotdot;
+ dot->cgd_prev = dotdot; /* dot's cgd_prev holds roving slot pointer */
+ dotdot->cgd_next = NULL;
+ dotdot->cgd_prev = dot;
+
+ gethrestime(&now);
+ dir->cgn_mtime = now;
+ dir->cgn_ctime = now;
+
+ parent->cgn_nlink++;
+ parent->cgn_ctime = now;
+
+ dir->cgn_dir = dot;
+ dir->cgn_size = 2 * sizeof (cgrp_dirent_t) + 5; /* dot and dotdot */
+ dir->cgn_dirents = 2;
+ dir->cgn_nlink = 2;
+
+ bzero(&nattr, sizeof (struct vattr));
+ nattr.va_mode = (mode_t)(0644);
+ nattr.va_type = VREG;
+ nattr.va_rdev = 0;
+
+ /*
+ * If this is the top-level dir in the file system then it always
+ * has a release_agent pseudo file. Only the top-level dir has this
+ * file.
+ */
+ if (parent == dir) {
+ cgrp_addnode(cgm, dir, "release_agent", CG_REL_AGENT, &nattr,
+ cr);
+ }
+
+ pseudo_files = ssdp->cg_ssde_files;
+ for (i = 0; i < ssdp->cg_ssde_nfiles; i++) {
+ cgrp_addnode(cgm, dir, pseudo_files[i].cgrp_ssd_name,
+ pseudo_files[i].cgrp_ssd_type, &nattr, cr);
+ }
+}
+
+/*
+ * cgrp_dirtrunc is called to remove all directory entries under this directory.
+ */
+void
+cgrp_dirtrunc(cgrp_node_t *dir)
+{
+ cgrp_dirent_t *cgdp;
+ timestruc_t now;
+ cgrp_mnt_t *cgm = VTOCGM(dir->cgn_vnode);
+
+ ASSERT(MUTEX_HELD(&cgm->cg_contents));
+ ASSERT(dir->cgn_type == CG_CGROUP_DIR);
+
+ for (cgdp = dir->cgn_dir; cgdp; cgdp = dir->cgn_dir) {
+ size_t namelen;
+ cgrp_node_t *cn;
+
+ ASSERT(cgdp->cgd_next != cgdp);
+ ASSERT(cgdp->cgd_prev != cgdp);
+ ASSERT(cgdp->cgd_cgrp_node);
+
+ dir->cgn_dir = cgdp->cgd_next;
+ namelen = strlen(cgdp->cgd_name) + 1;
+
+ /*
+ * Adjust the link counts to account for this directory entry
+ * removal. We do hold/rele operations to free up these nodes.
+ */
+ cn = cgdp->cgd_cgrp_node;
+ ASSERT(cn->cgn_nlink > 0);
+ cn->cgn_nlink--;
+
+ cgrp_hash_out(cgdp);
+ kmem_free(cgdp, sizeof (cgrp_dirent_t) + namelen);
+ dir->cgn_size -= (sizeof (cgrp_dirent_t) + namelen);
+ dir->cgn_dirents--;
+ }
+
+ gethrestime(&now);
+ dir->cgn_mtime = now;
+ dir->cgn_ctime = now;
+
+ ASSERT(dir->cgn_dir == NULL);
+ ASSERT(dir->cgn_size == 0);
+ ASSERT(dir->cgn_dirents == 0);
+}
+
+static int
+cgrp_diraddentry(cgrp_node_t *dir, cgrp_node_t *cn, char *name, enum de_op op)
+{
+ cgrp_dirent_t *cdp, *cpdp;
+ size_t namelen, alloc_size;
+ timestruc_t now;
+
+ /*
+ * Make sure the parent directory wasn't removed from
+ * underneath the caller.
+ */
+ if (dir->cgn_dir == NULL)
+ return (ENOENT);
+
+ /* Check that everything is on the same filesystem. */
+ if (cn->cgn_vnode->v_vfsp != dir->cgn_vnode->v_vfsp)
+ return (EXDEV);
+
+ /* Allocate and initialize directory entry */
+ namelen = strlen(name) + 1;
+ alloc_size = namelen + sizeof (cgrp_dirent_t);
+ cdp = kmem_zalloc(alloc_size, KM_NOSLEEP | KM_NORMALPRI);
+ if (cdp == NULL)
+ return (ENOSPC);
+
+ cn->cgn_parent = dir;
+
+ dir->cgn_size += alloc_size;
+ dir->cgn_dirents++;
+ cdp->cgd_cgrp_node = cn;
+ cdp->cgd_parent = dir;
+
+ /* The directory entry and its name were allocated sequentially. */
+ cdp->cgd_name = (char *)cdp + sizeof (cgrp_dirent_t);
+ (void) strcpy(cdp->cgd_name, name);
+
+ cgrp_hash_in(cdp);
+
+ /*
+ * Some utilities expect the size of a directory to remain
+ * somewhat static. For example, a routine which removes
+ * subdirectories between calls to readdir(); the size of the
+ * directory changes from underneath it and so the real
+ * directory offset in bytes is invalid. To circumvent
+ * this problem, we initialize a directory entry with an
+ * phony offset, and use this offset to determine end of
+ * file in cgrp_readdir.
+ */
+ cpdp = dir->cgn_dir->cgd_prev;
+ /*
+ * Install at first empty "slot" in directory list.
+ */
+ while (cpdp->cgd_next != NULL && (cpdp->cgd_next->cgd_offset -
+ cpdp->cgd_offset) <= 1) {
+ ASSERT(cpdp->cgd_next != cpdp);
+ ASSERT(cpdp->cgd_prev != cpdp);
+ ASSERT(cpdp->cgd_next->cgd_offset > cpdp->cgd_offset);
+ cpdp = cpdp->cgd_next;
+ }
+ cdp->cgd_offset = cpdp->cgd_offset + 1;
+
+ /*
+ * If we're at the end of the dirent list and the offset (which
+ * is necessarily the largest offset in this directory) is more
+ * than twice the number of dirents, that means the directory is
+ * 50% holes. At this point we reset the slot pointer back to
+ * the beginning of the directory so we start using the holes.
+ * The idea is that if there are N dirents, there must also be
+ * N holes, so we can satisfy the next N creates by walking at
+ * most 2N entries; thus the average cost of a create is constant.
+ * Note that we use the first dirent's cgd_prev as the roving
+ * slot pointer; it's ugly, but it saves a word in every dirent.
+ */
+ if (cpdp->cgd_next == NULL && cpdp->cgd_offset > 2 * dir->cgn_dirents)
+ dir->cgn_dir->cgd_prev = dir->cgn_dir->cgd_next;
+ else
+ dir->cgn_dir->cgd_prev = cdp;
+
+ ASSERT(cpdp->cgd_next != cpdp);
+ ASSERT(cpdp->cgd_prev != cpdp);
+
+ cdp->cgd_next = cpdp->cgd_next;
+ if (cdp->cgd_next) {
+ cdp->cgd_next->cgd_prev = cdp;
+ }
+ cdp->cgd_prev = cpdp;
+ cpdp->cgd_next = cdp;
+
+ ASSERT(cdp->cgd_next != cdp);
+ ASSERT(cdp->cgd_prev != cdp);
+ ASSERT(cpdp->cgd_next != cpdp);
+ ASSERT(cpdp->cgd_prev != cpdp);
+
+ gethrestime(&now);
+ dir->cgn_mtime = now;
+ dir->cgn_ctime = now;
+
+ return (0);
+}
+
+static int
+cgrp_dirmakecgnode(cgrp_node_t *dir, cgrp_mnt_t *cgm, struct vattr *va,
+ enum de_op op, cgrp_node_t **newnode, struct cred *cred)
+{
+ cgrp_node_t *cn;
+
+ ASSERT(MUTEX_HELD(&cgm->cg_contents));
+ ASSERT(va != NULL);
+
+ if (((va->va_mask & AT_ATIME) && TIMESPEC_OVERFLOW(&va->va_atime)) ||
+ ((va->va_mask & AT_MTIME) && TIMESPEC_OVERFLOW(&va->va_mtime)))
+ return (EOVERFLOW);
+
+ cn = kmem_zalloc(sizeof (cgrp_node_t), KM_SLEEP);
+ cgrp_node_init(cgm, cn, va, cred);
+
+ cn->cgn_vnode->v_rdev = cn->cgn_rdev = NODEV;
+ cn->cgn_vnode->v_type = va->va_type;
+ cn->cgn_uid = crgetuid(cred);
+ cn->cgn_gid = crgetgid(cred);
+
+ if (va->va_mask & AT_ATIME)
+ cn->cgn_atime = va->va_atime;
+ if (va->va_mask & AT_MTIME)
+ cn->cgn_mtime = va->va_mtime;
+
+ if (op == DE_MKDIR) {
+ cn->cgn_type = CG_CGROUP_DIR;
+ cgrp_dirinit(dir, cn, cred);
+ }
+
+ *newnode = cn;
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/cgroups/cgrps_vfsops.c b/usr/src/uts/common/brand/lx/cgroups/cgrps_vfsops.c
new file mode 100644
index 0000000000..a9bd783569
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/cgroups/cgrps_vfsops.c
@@ -0,0 +1,1052 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+/*
+ * The cgroup file system implements a subset of the Linux cgroup functionality
+ * for use by lx-branded zones. On Linux, cgroups are a generic process grouping
+ * mechanism which is used to apply various behaviors to the processes within
+ * the group, although it's primary purpose is for resource management.
+ *
+ * In Linux, the cgroup file system provides two pieces of functionality:
+ * 1) A per-mount set of cgroups arranged in a tree, such that every task in
+ * the system is in one, and only one, of the cgroups in the tree.
+ * 2) A set of subsystems; each subsystem has subsystem-specific state and
+ * behavior and is associated with a cgroup mount. This provides a way to
+ * apply arbitrary functionality (but generally resource management related)
+ * to the processes associated with the nodes in the tree at that mount
+ * point.
+ *
+ * For example, it is common to see cgroup trees (each is its own mount with a
+ * different subsystem controller) for blkio, cpuset, memory, systemd (has no
+ * controller), etc. Within each tree there is a top-level directory with at
+ * least a cgroup.procs, notify_on_release, release_agent, and tasks file.
+ * The cgroup.procs file lists the processes within that group and the tasks
+ * file lists the threads in the group. There could be subdirectories, which
+ * define new cgroups, that then contain a subset of the processes. Each
+ * subdirectory also has, at a minimum, a cgroup.procs, notify_on_release, and
+ * tasks file.
+ *
+ * Since we're using lx to run user-level code within zones, the majority (all?)
+ * of the cgroup resource management functionality simply doesn't apply to us.
+ * The primary need for cgroups is to support the init program 'systemd' as the
+ * consumer. systemd only requires the process grouping hierarchy of cgroups,
+ * although it can also use the resource management features if they are
+ * available. Given this, our cgroup file system only implements the process
+ * hierarchy and does not report that any resource management controllers are
+ * available for separate mounts.
+ *
+ * In addition to the hierarchy, the other important component of cgroups that
+ * is used by systemd is the 'release_agent'. This provides a mechanism to
+ * run a command when a cgroup becomes empty (the last task in the group
+ * leaves, either by exit or move, and there are no more sub-cgroups). The
+ * 'release_agent' file only exists in the top-level cgroup of the mounted
+ * file system and holds the path to a command to run. The 'notify_on_release'
+ * file exists in each cgroup dir. If that file contains a '1' then the agent
+ * is run when that group becomes empty. The agent is passed a path string of
+ * the cgroup, relative to the file system mount point (e.g. a mount on
+ * /sys/fs/cgroups/systemd with a sub-cgroup of /sys/fs/cgroups/systemd/foo/bar
+ * gets the arg /foo/bar).
+ *
+ * Cgroup membership is implemented via hooks into the lx brand code. When
+ * the cgroup file system loads it installs callbacks for:
+ * lx_cgrp_initlwp
+ * lx_cgrp_freelwp
+ * and when it unloads it clears those hooks. The lx brand code calls those
+ * hooks when a lwp starts and when it exits. Internally we use a
+ * simple reference counter (cgn_task_cnt) on the cgroup node to track how many
+ * threads are in the group, so we can tell when a group becomes empty.
+ * To make this quick, a hash table (cg_grp_hash) is maintained on the
+ * cgrp_mnt_t struct to allow quick lookups by cgroup ID. The hash table is
+ * sized so that there should typically only be 0 or 1 cgroups per bucket.
+ * We also keep a reference to the file system in the zone-specific brand data
+ * (lxzd_cgroup) so that the lx brand code can pass in the correct vfs_t
+ * when it runs the hook.
+ *
+ * Once a cgroup is about to become empty, the final process exiting the cgroup
+ * will launch a new user-level process which execs the release agent. The new
+ * process is created as a child of zsched (indicated by the -1 pid argument
+ * to newproc) and is not associated with the exiting process in any way.
+ *
+ * This file system is similar to tmpfs in that directories only exist in
+ * memory. Each subdirectory represents a different cgroup. Within the cgroup
+ * there are pseudo files (see cg_ssde_dir) with well-defined names which
+ * control the configuration and behavior of the cgroup (see cgrp_nodetype_t).
+ * The primary files within every cgroup are named 'cgroup.procs',
+ * 'notify_on_release', and 'tasks' (as well as 'release_agent' in the
+ * top-level cgroup). The cgroup.procs and tasks files are used to control and
+ * list which processes/threads belong to the cgroup. In the general case there
+ * could be additional files in the cgroup, which defined additional behavior
+ * (i.e. subsystem specific pseudo files), although none exist at this time.
+ *
+ * Each cgroup node has a unique ID (cgn_nodeid) within the mount. This ID is
+ * used to correlate with the threads to determine cgroup membership. When
+ * assigning a PID to a cgroup (via write) the code updates the br_cgroupid
+ * member in the brand-specific lx_lwp_data structure to control which cgroup
+ * the thread belongs to. Note that because the br_cgroupid lives in
+ * lx_lwp_data, native processes will not appear in the cgroup hierarchy.
+ *
+ * An overview of the behavior for the various vnode operations is:
+ * - no hardlinks or symlinks
+ * - no file create (the subsystem-specific files are a fixed list of
+ * pseudo-files accessible within the directory)
+ * - no file remove
+ * - no file rename, but a directory (i.e. a cgroup) can be renamed within the
+ * containing directory, but not into a different directory
+ * - can mkdir and rmdir to create/destroy cgroups
+ * - cannot rmdir while it contains tasks or a subdir (i.e. a sub-cgroup)
+ * - open, read/write, close on the subsytem-specific pseudo files is
+ * allowed, as this is the interface to configure and report on the cgroup.
+ * The pseudo file's mode controls write access and cannot be changed.
+ *
+ * The locking in this file system is simple since the file system is not
+ * subjected to heavy I/O activity and all data is in-memory. There is a single
+ * global mutex for each mount (cg_contents). This mutex is held for the life
+ * of most vnode operations. The most active path is probably the LWP start and
+ * exit hooks which increment/decrement the reference counter on the cgroup
+ * node. The lock is important for this case since we don't want concurrent
+ * activity (such as moving the process into another cgroup) while we're trying
+ * to lookup the cgroup from the mount's hash table. We must be careful to
+ * avoid a deadlock while reading or writing since that code can take pidlock
+ * and p_lock, but the cgrp_lwp_fork_helper can also be called while one of
+ * those is held. To prevent deadlock we always take cg_contents after pidlock
+ * and p_lock.
+ *
+ * EXTENDING THE FILE SYSTEM
+ *
+ * When adding support for a new subsystem, be sure to also update the
+ * lxpr_read_cgroups function in lx_procfs so that the subsystem is reported
+ * by proc.
+ *
+ * Although we don't currently support any subsystem controllers, the design
+ * allows for the file system to be extended to add controller emulation
+ * if needed. New controller IDs (i.e. different subsystems) for a mount can
+ * be defined in the cgrp_ssid_t enum (e.g. CG_SSID_CPUSET or CG_SSID_MEMORY)
+ * and new node types for additional pseudo files in the tree can be defined in
+ * the cgrp_nodetype_t enum (e.g. CG_CPUSET_CPUS or CG_MEMORY_USAGE_IN_BYTES).
+ * The cg_ssde_dir array would need a new entry for the new subsystem to
+ * control which nodes are visible in a directory for the new subsystem.
+ *
+ * New emulation would then need to be written to manage the behavior on the
+ * new pseudo file(s) associated with new cgrp_nodetype_t types.
+ *
+ * Within lx procfs the lxpr_read_pid_cgroup() function would need to be
+ * updated so that it reported the various subsystems used by the different
+ * mounts.
+ *
+ * In addition, in order to support more than one cgroup mount we would need a
+ * list of cgroup IDs associated with every thread, instead of just one ID
+ * (br_cgroupid). The thread data would need to become a struct which held
+ * both an ID and an indication as to which mounted cgroup file system instance
+ * the ID was associated with. We would also need a list of cgroup mounts per
+ * zone, instead the current single zone reference.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/kmem.h>
+#include <sys/time.h>
+#include <sys/pathname.h>
+#include <sys/vfs.h>
+#include <sys/vfs_opreg.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+#include <sys/stat.h>
+#include <sys/errno.h>
+#include <sys/cmn_err.h>
+#include <sys/cred.h>
+#include <sys/statvfs.h>
+#include <sys/mount.h>
+#include <sys/systm.h>
+#include <sys/mntent.h>
+#include <sys/policy.h>
+#include <sys/sdt.h>
+#include <sys/ddi.h>
+#include <sys/vmparam.h>
+#include <sys/corectl.h>
+#include <sys/contract_impl.h>
+#include <sys/pool.h>
+#include <sys/stack.h>
+#include <sys/rt.h>
+#include <sys/fx.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+
+#include "cgrps.h"
+
+/* Module level parameters */
+static int cgrp_fstype;
+static dev_t cgrp_dev;
+
+#define MAX_AGENT_EVENTS 32 /* max num queued events */
+
+#define UMNT_DELAY_TIME drv_usectohz(50000) /* 500th of a second */
+#define UMNT_RETRY_MAX 100 /* 100 times - 2 secs */
+
+/*
+ * cgrp_mountcount is used to prevent module unloads while there is still
+ * state from a former mount hanging around. The filesystem module must not be
+ * allowed to go away before the last VFS_FREEVFS() call has been made. Since
+ * this is just an atomic counter, there's no need for locking.
+ */
+static uint32_t cgrp_mountcount;
+
+/*
+ * cgrp_minfree is the minimum amount of swap space that cgroups leaves for
+ * the rest of the zone. In other words, if the amount of free swap space
+ * in the zone drops below cgrp_minfree, cgroup anon allocations will fail.
+ * This number is only likely to become factor when DRAM and swap have both
+ * been capped low to allow for maximum tenancy.
+ */
+size_t cgrp_minfree = 0;
+
+/*
+ * CGMINFREE -- the value from which cgrp_minfree is derived -- should be
+ * configured to a value that is roughly the smallest practical value for
+ * memory + swap minus the largest reasonable size for cgroups in such
+ * a configuration. As of this writing, the smallest practical memory + swap
+ * configuration is 128MB, and it seems reasonable to allow cgroups to consume
+ * no more than half of this, yielding a CGMINFREE of 64MB.
+ */
+#define CGMINFREE 64 * 1024 * 1024 /* 64 Megabytes */
+
+extern pgcnt_t swapfs_minfree;
+
+/*
+ * cgroup vfs operations.
+ */
+static int cgrp_init(int, char *);
+static int cgrp_mount(struct vfs *, struct vnode *,
+ struct mounta *, struct cred *);
+static int cgrp_unmount(struct vfs *, int, struct cred *);
+static int cgrp_root(struct vfs *, struct vnode **);
+static int cgrp_statvfs(struct vfs *, struct statvfs64 *);
+static void cgrp_freevfs(vfs_t *vfsp);
+
+/* Forward declarations for hooks */
+static void cgrp_lwp_fork_helper(vfs_t *, uint_t, id_t, pid_t);
+static void cgrp_lwp_exit_helper(vfs_t *, uint_t, id_t, pid_t);
+
+/*
+ * Loadable module wrapper
+ */
+#include <sys/modctl.h>
+
+static vfsdef_t vfw = {
+ VFSDEF_VERSION,
+ "lx_cgroup",
+ cgrp_init,
+ VSW_ZMOUNT,
+ NULL
+};
+
+/*
+ * Module linkage information
+ */
+static struct modlfs modlfs = {
+ &mod_fsops, "lx brand cgroups", &vfw
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, &modlfs, NULL
+};
+
+int
+_init()
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_fini()
+{
+ int error;
+
+ if (cgrp_mountcount)
+ return (EBUSY);
+
+ if ((error = mod_remove(&modlinkage)) != 0)
+ return (error);
+
+ /* Disable hooks used by the lx brand module. */
+ lx_cgrp_initlwp = NULL;
+ lx_cgrp_freelwp = NULL;
+
+ /*
+ * Tear down the operations vectors
+ */
+ (void) vfs_freevfsops_by_type(cgrp_fstype);
+ vn_freevnodeops(cgrp_vnodeops);
+ return (0);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+/*
+ * Initialize global locks, etc. Called when loading cgroup module.
+ */
+static int
+cgrp_init(int fstype, char *name)
+{
+ static const fs_operation_def_t cgrp_vfsops_template[] = {
+ VFSNAME_MOUNT, { .vfs_mount = cgrp_mount },
+ VFSNAME_UNMOUNT, { .vfs_unmount = cgrp_unmount },
+ VFSNAME_ROOT, { .vfs_root = cgrp_root },
+ VFSNAME_STATVFS, { .vfs_statvfs = cgrp_statvfs },
+ VFSNAME_FREEVFS, { .vfs_freevfs = cgrp_freevfs },
+ NULL, NULL
+ };
+ extern const struct fs_operation_def cgrp_vnodeops_template[];
+ int error;
+ extern void cgrp_hash_init();
+ major_t dev;
+
+ cgrp_hash_init();
+ cgrp_fstype = fstype;
+ ASSERT(cgrp_fstype != 0);
+
+ error = vfs_setfsops(fstype, cgrp_vfsops_template, NULL);
+ if (error != 0) {
+ cmn_err(CE_WARN, "cgrp_init: bad vfs ops template");
+ return (error);
+ }
+
+ error = vn_make_ops(name, cgrp_vnodeops_template, &cgrp_vnodeops);
+ if (error != 0) {
+ (void) vfs_freevfsops_by_type(fstype);
+ cmn_err(CE_WARN, "cgrp_init: bad vnode ops template");
+ return (error);
+ }
+
+ /*
+ * cgrp_minfree doesn't need to be some function of configured
+ * swap space since it really is an absolute limit of swap space
+ * which still allows other processes to execute.
+ */
+ if (cgrp_minfree == 0) {
+ /* Set if not patched */
+ cgrp_minfree = btopr(CGMINFREE);
+ }
+
+ if ((dev = getudev()) == (major_t)-1) {
+ cmn_err(CE_WARN, "cgrp_init: Can't get unique device number.");
+ dev = 0;
+ }
+
+ /*
+ * Make the pseudo device
+ */
+ cgrp_dev = makedevice(dev, 0);
+
+ /* Install the hooks used by the lx brand module. */
+ lx_cgrp_initlwp = cgrp_lwp_fork_helper;
+ lx_cgrp_freelwp = cgrp_lwp_exit_helper;
+
+ return (0);
+}
+
+static int
+cgrp_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
+{
+ cgrp_mnt_t *cgm = NULL;
+ struct cgrp_node *cp;
+ struct pathname dpn;
+ int error;
+ struct vattr rattr;
+ cgrp_ssid_t ssid = CG_SSID_GENERIC;
+ lx_zone_data_t *lxzdata;
+
+ if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
+ return (error);
+
+ if (mvp->v_type != VDIR)
+ return (ENOTDIR);
+
+ /*
+ * Since we depend on per-thread lx brand data, only allow mounting
+ * within lx zones.
+ */
+ if (curproc->p_zone->zone_brand != &lx_brand)
+ return (EINVAL);
+
+ /*
+ * Ensure we don't allow overlaying mounts
+ */
+ mutex_enter(&mvp->v_lock);
+ if ((uap->flags & MS_OVERLAY) == 0 &&
+ (mvp->v_count > 1 || (mvp->v_flag & VROOT))) {
+ mutex_exit(&mvp->v_lock);
+ return (EBUSY);
+ }
+ mutex_exit(&mvp->v_lock);
+
+ /*
+ * Having the resource be anything but "swap" doesn't make sense.
+ */
+ vfs_setresource(vfsp, "swap", 0);
+
+ /* cgroups don't support read-only mounts */
+ if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) {
+ error = EINVAL;
+ goto out;
+ }
+
+ /*
+ * Here is where we could support subsystem-specific controller
+ * mounting. For example, if mounting a cgroup fs with the 'cpuset'
+ * option to specify that particular controller.
+ *
+ * char *argstr;
+ * if (vfs_optionisset(vfsp, "cpuset", &argstr)) {
+ * if (ssid != CG_SSID_GENERIC) {
+ * error = EINVAL;
+ * goto out;
+ * }
+ * ssid = CG_SSID_CPUSET;
+ * }
+ */
+
+ error = pn_get(uap->dir,
+ (uap->flags & MS_SYSSPACE) ? UIO_SYSSPACE : UIO_USERSPACE, &dpn);
+ if (error != 0)
+ goto out;
+
+ /*
+ * We currently only support one mount per zone.
+ */
+ lxzdata = ztolxzd(curproc->p_zone);
+ mutex_enter(&lxzdata->lxzd_lock);
+ if (lxzdata->lxzd_cgroup != NULL) {
+ mutex_exit(&lxzdata->lxzd_lock);
+ return (EINVAL);
+ }
+
+ cgm = kmem_zalloc(sizeof (*cgm), KM_SLEEP);
+
+ /* Set but don't bother entering the mutex (not on mount list yet) */
+ mutex_init(&cgm->cg_contents, NULL, MUTEX_DEFAULT, NULL);
+
+ cgm->cg_vfsp = lxzdata->lxzd_cgroup = vfsp;
+ mutex_exit(&lxzdata->lxzd_lock);
+
+ cgm->cg_lxzdata = lxzdata;
+ cgm->cg_ssid = ssid;
+
+ vfsp->vfs_data = (caddr_t)cgm;
+ vfsp->vfs_fstype = cgrp_fstype;
+ vfsp->vfs_dev = cgrp_dev;
+ vfsp->vfs_bsize = PAGESIZE;
+ vfsp->vfs_flag |= VFS_NOTRUNC;
+ vfs_make_fsid(&vfsp->vfs_fsid, cgrp_dev, cgrp_fstype);
+ cgm->cg_mntpath = kmem_zalloc(dpn.pn_pathlen + 1, KM_SLEEP);
+ (void) strcpy(cgm->cg_mntpath, dpn.pn_path);
+
+ cgm->cg_grp_hash = kmem_zalloc(sizeof (cgrp_node_t *) * CGRP_HASH_SZ,
+ KM_SLEEP);
+
+ /* allocate and initialize root cgrp_node structure */
+ bzero(&rattr, sizeof (struct vattr));
+ rattr.va_mode = (mode_t)(S_IFDIR | 0755);
+ rattr.va_type = VDIR;
+ rattr.va_rdev = 0;
+ cp = kmem_zalloc(sizeof (struct cgrp_node), KM_SLEEP);
+
+ mutex_enter(&cgm->cg_contents);
+ cgrp_node_init(cgm, cp, &rattr, cr);
+
+ CGNTOV(cp)->v_flag |= VROOT;
+
+ /*
+ * initialize linked list of cgrp_nodes so that the back pointer of
+ * the root cgrp_node always points to the last one on the list
+ * and the forward pointer of the last node is null
+ */
+ cp->cgn_back = cp;
+ cp->cgn_forw = NULL;
+ cp->cgn_nlink = 0;
+ cgm->cg_rootnode = cp;
+
+ cp->cgn_type = CG_CGROUP_DIR;
+ cp->cgn_nodeid = cgrp_inode(ssid, cgm->cg_gen);
+ cgrp_dirinit(cp, cp, cr);
+
+ mutex_exit(&cgm->cg_contents);
+
+ pn_free(&dpn);
+ error = 0;
+ atomic_inc_32(&cgrp_mountcount);
+
+out:
+ if (error == 0)
+ vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS);
+
+ return (error);
+}
+
+static int
+cgrp_unmount(struct vfs *vfsp, int flag, struct cred *cr)
+{
+ cgrp_mnt_t *cgm = (cgrp_mnt_t *)VFSTOCGM(vfsp);
+ cgrp_node_t *cgnp, *cancel;
+ struct vnode *vp;
+ int error;
+ uint_t cnt;
+ int retry_cnt = 0;
+
+ if ((error = secpolicy_fs_unmount(cr, vfsp)) != 0)
+ return (error);
+
+retry:
+ mutex_enter(&cgm->cg_contents);
+
+ /*
+ * In the normal unmount case, if there were no open files, only the
+ * root node would have a reference count. However, the user-level
+ * agent manager should have the root vnode open and be waiting in
+ * ioctl. We need to wake the manager and it may take some retries
+ * before it closes its file descriptor.
+ *
+ * With cg_contents held, nothing can be added or removed.
+ * There may be some dirty pages. To prevent fsflush from
+ * disrupting the unmount, put a hold on each node while scanning.
+ * If we find a previously referenced node, undo the holds we have
+ * placed and fail EBUSY.
+ */
+ cgnp = cgm->cg_rootnode;
+
+ ASSERT(cgm->cg_lxzdata->lxzd_cgroup != NULL);
+
+ vp = CGNTOV(cgnp);
+ mutex_enter(&vp->v_lock);
+
+ if (flag & MS_FORCE) {
+ mutex_exit(&vp->v_lock);
+ mutex_exit(&cgm->cg_contents);
+ return (EINVAL);
+ }
+
+
+ cnt = vp->v_count;
+ if (cnt > 1) {
+ mutex_exit(&vp->v_lock);
+ mutex_exit(&cgm->cg_contents);
+ /* Likely because the user-level manager hasn't exited yet */
+ if (retry_cnt++ < UMNT_RETRY_MAX) {
+ delay(UMNT_DELAY_TIME);
+ goto retry;
+ }
+ return (EBUSY);
+ }
+
+ mutex_exit(&vp->v_lock);
+
+ /*
+ * Check for open files. An open file causes everything to unwind.
+ */
+ for (cgnp = cgnp->cgn_forw; cgnp; cgnp = cgnp->cgn_forw) {
+ vp = CGNTOV(cgnp);
+ mutex_enter(&vp->v_lock);
+ cnt = vp->v_count;
+ if (cnt > 0) {
+ /* An open file; unwind the holds we've been adding. */
+ mutex_exit(&vp->v_lock);
+ cancel = cgm->cg_rootnode->cgn_forw;
+ while (cancel != cgnp) {
+ vp = CGNTOV(cancel);
+ ASSERT(vp->v_count > 0);
+ VN_RELE(vp);
+ cancel = cancel->cgn_forw;
+ }
+ mutex_exit(&cgm->cg_contents);
+ return (EBUSY);
+ } else {
+ /* directly add a VN_HOLD since we have the lock */
+ vp->v_count++;
+ mutex_exit(&vp->v_lock);
+ }
+ }
+
+ mutex_enter(&cgm->cg_lxzdata->lxzd_lock);
+ cgm->cg_lxzdata->lxzd_cgroup = NULL;
+ mutex_exit(&cgm->cg_lxzdata->lxzd_lock);
+ kmem_free(cgm->cg_grp_hash, sizeof (cgrp_node_t *) * CGRP_HASH_SZ);
+
+ /*
+ * We can drop the mutex now because
+ * no one can find this mount anymore
+ */
+ vfsp->vfs_flag |= VFS_UNMOUNTED;
+ mutex_exit(&cgm->cg_contents);
+
+ return (0);
+}
+
+/*
+ * Implementation of VFS_FREEVFS(). This is called by the vfs framework after
+ * umount and the last VFS_RELE, to trigger the release of any resources still
+ * associated with the given vfs_t. This is normally called immediately after
+ * cgrp_umount.
+ */
+void
+cgrp_freevfs(vfs_t *vfsp)
+{
+ cgrp_mnt_t *cgm = (cgrp_mnt_t *)VFSTOCGM(vfsp);
+ cgrp_node_t *cn;
+ struct vnode *vp;
+
+ /*
+ * Free all kmemalloc'd and anonalloc'd memory associated with
+ * this filesystem. To do this, we go through the file list twice,
+ * once to remove all the directory entries, and then to remove
+ * all the pseudo files.
+ */
+
+ /*
+ * Now that we are tearing ourselves down we need to remove the
+ * UNMOUNTED flag. If we don't, we'll later hit a VN_RELE when we remove
+ * files from the system causing us to have a negative value. Doing this
+ * seems a bit better than trying to set a flag on the tmount that says
+ * we're tearing down.
+ */
+ vfsp->vfs_flag &= ~VFS_UNMOUNTED;
+
+ /*
+ * Remove all directory entries
+ */
+ for (cn = cgm->cg_rootnode; cn; cn = cn->cgn_forw) {
+ mutex_enter(&cgm->cg_contents);
+ if (cn->cgn_type == CG_CGROUP_DIR)
+ cgrp_dirtrunc(cn);
+ mutex_exit(&cgm->cg_contents);
+ }
+
+ ASSERT(cgm->cg_rootnode);
+
+ /*
+ * All links are gone, v_count is keeping nodes in place.
+ * VN_RELE should make the node disappear, unless somebody
+ * is holding pages against it. Nap and retry until it disappears.
+ *
+ * We re-acquire the lock to prevent others who have a HOLD on
+ * a cgrp_node via its pages or anon slots from blowing it away
+ * (in cgrp_inactive) while we're trying to get to it here. Once
+ * we have a HOLD on it we know it'll stick around.
+ *
+ */
+ mutex_enter(&cgm->cg_contents);
+
+ /* Remove all the files (except the rootnode) backwards. */
+ while ((cn = cgm->cg_rootnode->cgn_back) != cgm->cg_rootnode) {
+ mutex_exit(&cgm->cg_contents);
+ /*
+ * All nodes will be released here. Note we handled the link
+ * count above.
+ */
+ vp = CGNTOV(cn);
+ VN_RELE(vp);
+ mutex_enter(&cgm->cg_contents);
+ /*
+ * It's still there after the RELE. Someone else like pageout
+ * has a hold on it so wait a bit and then try again - we know
+ * they'll give it up soon.
+ */
+ if (cn == cgm->cg_rootnode->cgn_back) {
+ VN_HOLD(vp);
+ mutex_exit(&cgm->cg_contents);
+ delay(hz / 4);
+ mutex_enter(&cgm->cg_contents);
+ }
+ }
+ mutex_exit(&cgm->cg_contents);
+
+ VN_RELE(CGNTOV(cgm->cg_rootnode));
+
+ ASSERT(cgm->cg_mntpath);
+
+ kmem_free(cgm->cg_mntpath, strlen(cgm->cg_mntpath) + 1);
+
+ mutex_destroy(&cgm->cg_contents);
+ kmem_free(cgm, sizeof (cgrp_mnt_t));
+
+ /* Allow _fini() to succeed now */
+ atomic_dec_32(&cgrp_mountcount);
+}
+
+/*
+ * return root cgnode for given vnode
+ */
+static int
+cgrp_root(struct vfs *vfsp, struct vnode **vpp)
+{
+ cgrp_mnt_t *cgm = (cgrp_mnt_t *)VFSTOCGM(vfsp);
+ cgrp_node_t *cp = cgm->cg_rootnode;
+ struct vnode *vp;
+
+ ASSERT(cp);
+
+ vp = CGNTOV(cp);
+ VN_HOLD(vp);
+ *vpp = vp;
+ return (0);
+}
+
+static int
+cgrp_statvfs(struct vfs *vfsp, struct statvfs64 *sbp)
+{
+ cgrp_mnt_t *cgm = (cgrp_mnt_t *)VFSTOCGM(vfsp);
+ ulong_t blocks;
+ dev32_t d32;
+ zoneid_t eff_zid;
+ struct zone *zp;
+
+ zp = cgm->cg_vfsp->vfs_zone;
+
+ if (zp == NULL)
+ eff_zid = GLOBAL_ZONEUNIQID;
+ else
+ eff_zid = zp->zone_id;
+
+ sbp->f_bsize = PAGESIZE;
+ sbp->f_frsize = PAGESIZE;
+
+ /*
+ * Find the amount of available physical and memory swap
+ */
+ mutex_enter(&anoninfo_lock);
+ ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv);
+ blocks = (ulong_t)CURRENT_TOTAL_AVAILABLE_SWAP;
+ mutex_exit(&anoninfo_lock);
+
+ if (blocks > cgrp_minfree)
+ sbp->f_bfree = blocks - cgrp_minfree;
+ else
+ sbp->f_bfree = 0;
+
+ sbp->f_bavail = sbp->f_bfree;
+
+ /*
+ * Total number of blocks is just what's available
+ */
+ sbp->f_blocks = (fsblkcnt64_t)(sbp->f_bfree);
+
+ if (eff_zid != GLOBAL_ZONEUNIQID &&
+ zp->zone_max_swap_ctl != UINT64_MAX) {
+ /*
+ * If the fs is used by a zone with a swap cap,
+ * then report the capped size.
+ */
+ rctl_qty_t cap, used;
+ pgcnt_t pgcap, pgused;
+
+ mutex_enter(&zp->zone_mem_lock);
+ cap = zp->zone_max_swap_ctl;
+ used = zp->zone_max_swap;
+ mutex_exit(&zp->zone_mem_lock);
+
+ pgcap = btop(cap);
+ pgused = btop(used);
+
+ sbp->f_bfree = MIN(pgcap - pgused, sbp->f_bfree);
+ sbp->f_bavail = sbp->f_bfree;
+ sbp->f_blocks = MIN(pgcap, sbp->f_blocks);
+ }
+
+ /*
+ * The maximum number of files available is approximately the number
+ * of cgrp_nodes we can allocate from the remaining kernel memory
+ * available to cgroups. This is fairly inaccurate since it doesn't
+ * take into account the names stored in the directory entries.
+ */
+ sbp->f_ffree = sbp->f_files = ptob(availrmem) /
+ (sizeof (cgrp_node_t) + sizeof (cgrp_dirent_t));
+ sbp->f_favail = (fsfilcnt64_t)(sbp->f_ffree);
+ (void) cmpldev(&d32, vfsp->vfs_dev);
+ sbp->f_fsid = d32;
+ (void) strcpy(sbp->f_basetype, vfssw[cgrp_fstype].vsw_name);
+ (void) strncpy(sbp->f_fstr, cgm->cg_mntpath, sizeof (sbp->f_fstr));
+ /* ensure null termination */
+ sbp->f_fstr[sizeof (sbp->f_fstr) - 1] = '\0';
+ sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
+ sbp->f_namemax = MAXNAMELEN - 1;
+ return (0);
+}
+
+static int
+cgrp_get_dirname(cgrp_node_t *cn, char *buf, int blen)
+{
+ cgrp_node_t *parent;
+ cgrp_dirent_t *dp;
+
+ buf[0] = '\0';
+
+ parent = cn->cgn_parent;
+ if (parent == NULL || parent == cn) {
+ (void) strlcpy(buf, ".", blen);
+ return (0);
+ }
+
+ /*
+ * Search the parent dir list to find this cn's name.
+ */
+ for (dp = parent->cgn_dir; dp != NULL; dp = dp->cgd_next) {
+ if (dp->cgd_cgrp_node->cgn_id == cn->cgn_id) {
+ (void) strlcpy(buf, dp->cgd_name, blen);
+ return (0);
+ }
+ }
+
+ return (-1);
+}
+
+typedef struct cgrp_rra_arg {
+ char *crraa_agent_path;
+ char *crraa_event_path;
+} cgrp_rra_arg_t;
+
+static void
+cgrp_run_rel_agent(void *a)
+{
+ cgrp_rra_arg_t *rarg = a;
+ proc_t *p = ttoproc(curthread);
+ zone_t *z = p->p_zone;
+ struct core_globals *cg;
+ int res;
+
+ ASSERT(!INGLOBALZONE(curproc));
+
+ /* The following block is derived from start_init_common */
+ ASSERT_STACK_ALIGNED();
+
+ p->p_cstime = p->p_stime = p->p_cutime = p->p_utime = 0;
+ p->p_usrstack = (caddr_t)USRSTACK32;
+ p->p_model = DATAMODEL_ILP32;
+ p->p_stkprot = PROT_ZFOD & ~PROT_EXEC;
+ p->p_datprot = PROT_ZFOD & ~PROT_EXEC;
+ p->p_stk_ctl = INT32_MAX;
+
+ p->p_as = as_alloc();
+ p->p_as->a_proc = p;
+ p->p_as->a_userlimit = (caddr_t)USERLIMIT32;
+ (void) hat_setup(p->p_as->a_hat, HAT_INIT);
+
+ VERIFY((cg = zone_getspecific(core_zone_key, z)) != NULL);
+
+ corectl_path_hold(cg->core_default_path);
+ corectl_content_hold(cg->core_default_content);
+
+ curproc->p_corefile = cg->core_default_path;
+ curproc->p_content = cg->core_default_content;
+
+ init_mstate(curthread, LMS_SYSTEM);
+ res = exec_init(rarg->crraa_agent_path, rarg->crraa_event_path);
+
+ /* End of code derived from start_init_common */
+
+ kmem_free(rarg->crraa_event_path, MAXPATHLEN);
+ kmem_free(rarg->crraa_agent_path, CGRP_AGENT_LEN);
+ kmem_free(rarg, sizeof (cgrp_rra_arg_t));
+
+ /* The following is derived from zone_start_init - see comments there */
+ if (res != 0 || zone_status_get(global_zone) >= ZONE_IS_SHUTTING_DOWN) {
+ if (proc_exit(CLD_EXITED, res) != 0) {
+ mutex_enter(&p->p_lock);
+ ASSERT(p->p_flag & SEXITLWPS);
+ lwp_exit();
+ }
+ } else {
+ id_t cid = curthread->t_cid;
+
+ mutex_enter(&class_lock);
+ ASSERT(cid < loaded_classes);
+ if (strcmp(sclass[cid].cl_name, "FX") == 0 &&
+ z->zone_fixed_hipri) {
+ pcparms_t pcparms;
+
+ pcparms.pc_cid = cid;
+ ((fxkparms_t *)pcparms.pc_clparms)->fx_upri = FXMAXUPRI;
+ ((fxkparms_t *)pcparms.pc_clparms)->fx_uprilim =
+ FXMAXUPRI;
+ ((fxkparms_t *)pcparms.pc_clparms)->fx_cflags =
+ FX_DOUPRILIM | FX_DOUPRI;
+
+ mutex_enter(&pidlock);
+ mutex_enter(&curproc->p_lock);
+ (void) parmsset(&pcparms, curthread);
+ mutex_exit(&curproc->p_lock);
+ mutex_exit(&pidlock);
+ } else if (strcmp(sclass[cid].cl_name, "RT") == 0) {
+ curthread->t_pri = RTGPPRIO0;
+ }
+ mutex_exit(&class_lock);
+
+ /* cause the process to return to userland. */
+ lwp_rtt();
+ }
+}
+
+/*
+ * Launch the user-level release_agent manager. The event data is the
+ * pathname (relative to the mount point of the file system) of the newly empty
+ * cgroup.
+ *
+ * The cg_contents mutex is held on entry and dropped before returning.
+ */
+void
+cgrp_rel_agent_event(cgrp_mnt_t *cgm, cgrp_node_t *cn)
+{
+ cgrp_node_t *parent;
+ char nm[MAXNAMELEN];
+ char *argstr, *oldstr, *tmp;
+ id_t cid;
+ int agent_err;
+ proc_t *p = ttoproc(curthread);
+ zone_t *z = p->p_zone;
+ lx_lwp_data_t *plwpd = ttolxlwp(curthread);
+ cgrp_rra_arg_t *rarg;
+
+ ASSERT(MUTEX_HELD(&cgm->cg_contents));
+
+ /* Nothing to do if the agent is not set */
+ if (cgm->cg_agent[0] == '\0') {
+ mutex_exit(&cgm->cg_contents);
+ return;
+ }
+
+ parent = cn->cgn_parent;
+ /* Cannot remove the top-level cgroup (only via unmount) */
+ if (parent == cn) {
+ mutex_exit(&cgm->cg_contents);
+ return;
+ }
+
+ argstr = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ oldstr = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ *argstr = '\0';
+
+ /*
+ * Iterate up the directory tree to construct the agent argument string.
+ */
+ do {
+ cgrp_get_dirname(cn, nm, sizeof (nm));
+ DTRACE_PROBE1(cgrp__dir__name, char *, nm);
+ if (*argstr == '\0') {
+ (void) snprintf(argstr, MAXPATHLEN, "/%s", nm);
+ } else {
+ tmp = oldstr;
+ oldstr = argstr;
+ argstr = tmp;
+ (void) snprintf(argstr, MAXPATHLEN, "/%s%s", nm,
+ oldstr);
+ }
+
+ if (cn->cgn_parent == NULL)
+ break;
+ cn = cn->cgn_parent;
+ parent = cn->cgn_parent;
+
+ /*
+ * The arg path is relative to the mountpoint so we stop when
+ * we get to the top level.
+ */
+ if (parent == NULL || parent == cn)
+ break;
+ } while (parent != cn);
+
+ kmem_free(oldstr, MAXPATHLEN);
+
+ rarg = kmem_alloc(sizeof (cgrp_rra_arg_t), KM_SLEEP);
+ rarg->crraa_agent_path = kmem_alloc(sizeof (cgm->cg_agent), KM_SLEEP);
+ (void) strlcpy(rarg->crraa_agent_path, cgm->cg_agent,
+ sizeof (cgm->cg_agent));
+ rarg->crraa_event_path = argstr;
+
+ DTRACE_PROBE2(cgrp__agent__event, cgrp_rra_arg_t *, rarg,
+ int, plwpd->br_cgroupid);
+
+ /* The release agent process cannot belong to our cgroup */
+ plwpd->br_cgroupid = 0;
+
+ /*
+ * The cg_contents mutex cannot be held while taking the pool lock
+ * or calling newproc.
+ */
+ mutex_exit(&cgm->cg_contents);
+
+ if (z->zone_defaultcid > 0) {
+ cid = z->zone_defaultcid;
+ } else {
+ pool_lock();
+ cid = pool_get_class(z->zone_pool);
+ pool_unlock();
+ }
+ if (cid == -1)
+ cid = defaultcid;
+
+ if ((agent_err = newproc(cgrp_run_rel_agent, (void *)rarg, cid,
+ minclsyspri - 1, NULL, -1)) != 0) {
+ /* There's nothing we can do if creating the proc fails. */
+ kmem_free(rarg->crraa_event_path, MAXPATHLEN);
+ kmem_free(rarg->crraa_agent_path, sizeof (cgm->cg_agent));
+ kmem_free(rarg, sizeof (cgrp_rra_arg_t));
+ }
+}
+
+/*ARGSUSED*/
+static void
+cgrp_lwp_fork_helper(vfs_t *vfsp, uint_t cg_id, id_t tid, pid_t tpid)
+{
+ cgrp_mnt_t *cgm = (cgrp_mnt_t *)VFSTOCGM(vfsp);
+ cgrp_node_t *cn;
+
+ mutex_enter(&cgm->cg_contents);
+ cn = cgrp_cg_hash_lookup(cgm, cg_id);
+ ASSERT(cn != NULL);
+ cn->cgn_task_cnt++;
+ mutex_exit(&cgm->cg_contents);
+
+ DTRACE_PROBE1(cgrp__lwp__fork, void *, cn);
+}
+
+/*ARGSUSED*/
+static void
+cgrp_lwp_exit_helper(vfs_t *vfsp, uint_t cg_id, id_t tid, pid_t tpid)
+{
+ cgrp_mnt_t *cgm = (cgrp_mnt_t *)VFSTOCGM(vfsp);
+ cgrp_node_t *cn;
+
+ mutex_enter(&cgm->cg_contents);
+ cn = cgrp_cg_hash_lookup(cgm, cg_id);
+ ASSERT(cn != NULL);
+ if (cn->cgn_task_cnt == 0) {
+ /* top-level cgroup cnt can be 0 during reboot */
+ mutex_exit(&cgm->cg_contents);
+ return;
+ }
+ cn->cgn_task_cnt--;
+ DTRACE_PROBE1(cgrp__lwp__exit, void *, cn);
+
+ if (cn->cgn_task_cnt == 0 && cn->cgn_dirents == N_DIRENTS(cgm) &&
+ cn->cgn_notify == 1) {
+ cgrp_rel_agent_event(cgm, cn);
+ ASSERT(MUTEX_NOT_HELD(&cgm->cg_contents));
+ } else {
+ mutex_exit(&cgm->cg_contents);
+ }
+}
diff --git a/usr/src/uts/common/brand/lx/cgroups/cgrps_vnops.c b/usr/src/uts/common/brand/lx/cgroups/cgrps_vnops.c
new file mode 100644
index 0000000000..bd571c8c18
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/cgroups/cgrps_vnops.c
@@ -0,0 +1,1608 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sysmacros.h>
+#include <sys/user.h>
+#include <sys/time.h>
+#include <sys/vfs.h>
+#include <sys/vfs_opreg.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/fcntl.h>
+#include <sys/flock.h>
+#include <sys/kmem.h>
+#include <sys/uio.h>
+#include <sys/errno.h>
+#include <sys/stat.h>
+#include <sys/cred.h>
+#include <sys/dirent.h>
+#include <sys/pathname.h>
+#include <vm/seg_vn.h>
+#include <sys/cmn_err.h>
+#include <sys/buf.h>
+#include <sys/vm.h>
+#include <sys/prsystm.h>
+#include <sys/policy.h>
+#include <fs/fs_subr.h>
+#include <sys/sdt.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+
+#include "cgrps.h"
+
+typedef enum cgrp_wr_type {
+ CG_WR_PROCS = 1,
+ CG_WR_TASKS
+} cgrp_wr_type_t;
+
+/* ARGSUSED1 */
+static int
+cgrp_open(struct vnode **vpp, int flag, struct cred *cred, caller_context_t *ct)
+{
+ /*
+ * swapon to a cgrp file is not supported so access is denied on open
+ * if VISSWAP is set.
+ */
+ if ((*vpp)->v_flag & VISSWAP)
+ return (EINVAL);
+
+ return (0);
+}
+
+/* ARGSUSED1 */
+static int
+cgrp_close(struct vnode *vp, int flag, int count, offset_t offset,
+ struct cred *cred, caller_context_t *ct)
+{
+ cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
+ cleanshares(vp, ttoproc(curthread)->p_pid);
+ return (0);
+}
+
+/*
+ * Lookup proc or task based on pid and typ.
+ */
+static proc_t *
+cgrp_p_for_wr(pid_t pid, cgrp_wr_type_t typ)
+{
+ int i;
+ zoneid_t zoneid = curproc->p_zone->zone_id;
+ pid_t schedpid = curproc->p_zone->zone_zsched->p_pid;
+
+ ASSERT(MUTEX_HELD(&pidlock));
+
+ /* getting a proc from a pid is easy */
+ if (typ == CG_WR_PROCS)
+ return (prfind(pid));
+
+ ASSERT(typ == CG_WR_TASKS);
+
+ /*
+ * We have to scan all of the process entries to find the proc
+ * containing this task.
+ */
+ mutex_exit(&pidlock);
+ for (i = 1; i < v.v_proc; i++) {
+ proc_t *p;
+ kthread_t *t;
+
+ mutex_enter(&pidlock);
+ /*
+ * Skip indices for which there is no pid_entry, PIDs for
+ * which there is no corresponding process, system processes,
+ * a PID of 0, the pid for our zsched process, anything the
+ * security policy doesn't allow us to look at, its not an
+ * lx-branded process and processes that are not in the zone.
+ */
+ if ((p = pid_entry(i)) == NULL ||
+ p->p_stat == SIDL ||
+ (p->p_flag & SSYS) != 0 ||
+ p->p_pid == 0 ||
+ p->p_pid == schedpid ||
+ secpolicy_basic_procinfo(CRED(), p, curproc) != 0 ||
+ p->p_brand != &lx_brand ||
+ p->p_zone->zone_id != zoneid) {
+ mutex_exit(&pidlock);
+ continue;
+ }
+
+ mutex_enter(&p->p_lock);
+ if ((t = p->p_tlist) == NULL) {
+ /* no threads, skip it */
+ mutex_exit(&p->p_lock);
+ mutex_exit(&pidlock);
+ continue;
+ }
+
+ /*
+ * Check all threads in this proc.
+ */
+ do {
+ lx_lwp_data_t *plwpd = ttolxlwp(t);
+ if (plwpd != NULL && plwpd->br_pid == pid) {
+ mutex_exit(&p->p_lock);
+ return (p);
+ }
+
+ t = t->t_forw;
+ } while (t != p->p_tlist);
+
+ mutex_exit(&p->p_lock);
+ mutex_exit(&pidlock);
+ }
+
+ mutex_enter(&pidlock);
+ return (NULL);
+}
+
+/*
+ * Move a thread from one cgroup to another. If the old cgroup is empty
+ * we queue up an agent event. We return true in that case since we've
+ * dropped the locks and the caller needs to reacquire them.
+ */
+static boolean_t
+cgrp_thr_move(cgrp_mnt_t *cgm, lx_lwp_data_t *plwpd, cgrp_node_t *ncn,
+ uint_t cg_id, proc_t *p)
+{
+ cgrp_node_t *ocn;
+
+ ASSERT(MUTEX_HELD(&cgm->cg_contents));
+ ASSERT(MUTEX_HELD(&p->p_lock));
+
+ ocn = cgrp_cg_hash_lookup(cgm, plwpd->br_cgroupid);
+ VERIFY(ocn != NULL);
+
+ ASSERT(ocn->cgn_task_cnt > 0);
+ atomic_dec_32(&ocn->cgn_task_cnt);
+ atomic_inc_32(&ncn->cgn_task_cnt);
+ plwpd->br_cgroupid = cg_id;
+
+ if (ocn->cgn_task_cnt == 0 && ocn->cgn_dirents == N_DIRENTS(cgm) &&
+ ocn->cgn_notify == 1) {
+ /*
+ * We want to drop p_lock before queuing the event since
+ * that might sleep. Dropping p_lock might cause the caller to
+ * have to restart the move process from the beginning.
+ */
+ mutex_exit(&p->p_lock);
+ cgrp_rel_agent_event(cgm, ocn);
+ ASSERT(MUTEX_NOT_HELD(&cgm->cg_contents));
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+/*
+ * Assign either all of the threads, or a single thread, for the specified pid
+ * to the new cgroup. Controlled by the typ argument.
+ */
+static int
+cgrp_proc_set_id(cgrp_mnt_t *cgm, uint_t cg_id, pid_t pid, cgrp_wr_type_t typ)
+{
+ proc_t *p;
+ kthread_t *t;
+ int error;
+ cgrp_node_t *ncn;
+
+ if (pid == 1)
+ pid = curproc->p_zone->zone_proc_initpid;
+
+ /*
+ * Move one or all threads to this cgroup.
+ */
+ if (typ == CG_WR_TASKS) {
+ error = ESRCH;
+ } else {
+ error = 0;
+ }
+
+restart:
+ mutex_enter(&pidlock);
+
+ p = cgrp_p_for_wr(pid, typ);
+ if (p == NULL) {
+ mutex_exit(&pidlock);
+ return (ESRCH);
+ }
+
+ /*
+ * Fail writes for pids for which there is no corresponding process,
+ * system processes, a pid of 0, the pid for our zsched process,
+ * anything the security policy doesn't allow us to look at, and
+ * processes that are not in the zone.
+ */
+ if (p->p_stat == SIDL ||
+ (p->p_flag & SSYS) != 0 ||
+ p->p_pid == 0 ||
+ p->p_pid == curproc->p_zone->zone_zsched->p_pid ||
+ secpolicy_basic_procinfo(CRED(), p, curproc) != 0 ||
+ p->p_zone->zone_id != curproc->p_zone->zone_id) {
+ mutex_exit(&pidlock);
+ return (ESRCH);
+ }
+
+ /*
+ * Ignore writes for PID which is not an lx-branded process or with
+ * no threads.
+ */
+
+ mutex_enter(&p->p_lock);
+ mutex_exit(&pidlock);
+ if (p->p_brand != &lx_brand || (t = p->p_tlist) == NULL ||
+ p->p_flag & SEXITING) {
+ mutex_exit(&p->p_lock);
+ return (0);
+ }
+
+ mutex_enter(&cgm->cg_contents);
+
+ ncn = cgrp_cg_hash_lookup(cgm, cg_id);
+ VERIFY(ncn != NULL);
+
+ do {
+ lx_lwp_data_t *plwpd = ttolxlwp(t);
+ if (plwpd != NULL && plwpd->br_cgroupid != cg_id) {
+ if (typ == CG_WR_PROCS) {
+ if (cgrp_thr_move(cgm, plwpd, ncn, cg_id, p)) {
+ /*
+ * We dropped all of the locks so we
+ * need to start over.
+ */
+ goto restart;
+ }
+
+ } else if (plwpd->br_pid == pid) {
+ /* type is CG_WR_TASKS and we found the task */
+ error = 0;
+ if (cgrp_thr_move(cgm, plwpd, ncn, cg_id, p)) {
+ goto done;
+ } else {
+ break;
+ }
+ }
+ }
+ t = t->t_forw;
+ } while (t != p->p_tlist);
+
+ mutex_exit(&cgm->cg_contents);
+ mutex_exit(&p->p_lock);
+done:
+
+ return (error);
+}
+
+/*
+ * User-level is writing a pid string. We need to get that string and convert
+ * it to a pid. The user-level code has to completely write an entire pid
+ * string at once. The user-level code could write multiple strings (delimited
+ * by newline) although that is frowned upon. However, we must handle this
+ * case too. Thus we consume the input one byte at a time until we get a whole
+ * pid string. We can't consume more than a byte at a time since otherwise we
+ * might be left with a partial pid string.
+ */
+static int
+cgrp_get_pid_str(struct uio *uio, pid_t *pid)
+{
+ char buf[16]; /* big enough for a pid string */
+ int i;
+ int error;
+ char *p = &buf[0];
+ char *ep;
+ long pidnum;
+
+ bzero(buf, sizeof (buf));
+ for (i = 0; uio->uio_resid > 0 && i < sizeof (buf); i++, p++) {
+ error = uiomove(p, 1, UIO_WRITE, uio);
+ if (error != 0)
+ return (error);
+ if (buf[i] == '\n') {
+ buf[i] = '\0';
+ break;
+ }
+ }
+
+ if (buf[0] == '\0' || i >= sizeof (buf)) /* no input or too long */
+ return (EINVAL);
+
+ error = ddi_strtol(buf, &ep, 10, &pidnum);
+ if (error != 0 || *ep != '\0' || pidnum > maxpid || pidnum < 0)
+ return (EINVAL);
+
+ *pid = (pid_t)pidnum;
+ return (0);
+}
+
+static int
+cgrp_wr_notify(cgrp_node_t *cn, struct uio *uio)
+{
+ int error;
+ uint_t value;
+
+ /*
+ * This is cheesy but since we only take a 0 or 1 value we can
+ * let the pid_str function do the uio string conversion.
+ */
+ error = cgrp_get_pid_str(uio, (pid_t *)&value);
+ if (error != 0)
+ return (error);
+
+ if (value != 0 && value != 1)
+ return (EINVAL);
+
+ /*
+ * The flag is on the containing dir. We don't bother taking the
+ * cg_contents lock since this is a simple assignment.
+ */
+ cn->cgn_parent->cgn_notify = value;
+ return (0);
+}
+
+static int
+cgrp_wr_rel_agent(cgrp_mnt_t *cgm, struct uio *uio)
+{
+ int error;
+ int len;
+ char *wrp;
+
+ len = uio->uio_offset + uio->uio_resid;
+ if (len > MAXPATHLEN)
+ return (EFBIG);
+
+ mutex_enter(&cgm->cg_contents);
+
+ wrp = &cgm->cg_agent[uio->uio_offset];
+ error = uiomove(wrp, uio->uio_resid, UIO_WRITE, uio);
+ cgm->cg_agent[len] = '\0';
+ if (len > 1 && cgm->cg_agent[len - 1] == '\n')
+ cgm->cg_agent[len - 1] = '\0';
+
+ mutex_exit(&cgm->cg_contents);
+ return (error);
+}
+
+static int
+cgrp_wr_proc_or_task(cgrp_mnt_t *cgm, cgrp_node_t *cn, struct uio *uio,
+ cgrp_wr_type_t typ)
+{
+ /* the cgroup ID is on the containing dir */
+ uint_t cg_id = cn->cgn_parent->cgn_id;
+ int error;
+ pid_t pidnum;
+
+ while (uio->uio_resid > 0) {
+ error = cgrp_get_pid_str(uio, &pidnum);
+ if (error != 0)
+ return (error);
+
+ error = cgrp_proc_set_id(cgm, cg_id, pidnum, typ);
+ if (error != 0)
+ return (error);
+ }
+
+ return (0);
+}
+
+static int
+cgrp_wr(cgrp_mnt_t *cgm, cgrp_node_t *cn, struct uio *uio, struct cred *cr,
+ caller_context_t *ct)
+{
+ struct vnode *vp;
+ int error = 0;
+ rlim64_t limit = uio->uio_llimit;
+
+ vp = CGNTOV(cn);
+ ASSERT(vp->v_type == VREG);
+
+ if (uio->uio_loffset < 0)
+ return (EINVAL);
+
+ if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
+ limit = MAXOFFSET_T;
+
+ if (uio->uio_loffset >= MAXOFF_T)
+ return (EFBIG);
+
+ if (uio->uio_resid == 0)
+ return (0);
+
+ if (limit > MAXOFF_T)
+ limit = MAXOFF_T;
+
+ switch (cn->cgn_type) {
+ case CG_NOTIFY:
+ error = cgrp_wr_notify(cn, uio);
+ break;
+ case CG_PROCS:
+ error = cgrp_wr_proc_or_task(cgm, cn, uio, CG_WR_PROCS);
+ break;
+ case CG_REL_AGENT:
+ error = cgrp_wr_rel_agent(cgm, uio);
+ break;
+ case CG_TASKS:
+ error = cgrp_wr_proc_or_task(cgm, cn, uio, CG_WR_TASKS);
+ break;
+ default:
+ VERIFY(0);
+ }
+
+ return (error);
+}
+
+/*
+ * pidlock is held on entry but dropped on exit. Because we might have to drop
+ * locks and loop if the process is already P_PR_LOCKed, it is possible that
+ * the process might be gone when we return from this function.
+ */
+static proc_t *
+cgrp_p_lock(proc_t *p)
+{
+ kmutex_t *mp;
+ pid_t pid;
+
+ ASSERT(MUTEX_HELD(&pidlock));
+
+ /* first try the fast path */
+ mutex_enter(&p->p_lock);
+ if (p->p_flag & SEXITING) {
+ mutex_exit(&p->p_lock);
+ mutex_exit(&pidlock);
+ return (NULL);
+ }
+
+ if (!(p->p_proc_flag & P_PR_LOCK)) {
+ p->p_proc_flag |= P_PR_LOCK;
+ mutex_exit(&p->p_lock);
+ mutex_exit(&pidlock);
+ THREAD_KPRI_REQUEST();
+ return (p);
+ }
+ mutex_exit(&p->p_lock);
+
+ pid = p->p_pid;
+ for (;;) {
+ /*
+ * p_lock is persistent, but p itself is not -- it could
+ * vanish during cv_wait(). Load p->p_lock now so we can
+ * drop it after cv_wait() without referencing p.
+ */
+ mp = &p->p_lock;
+ mutex_enter(mp);
+ mutex_exit(&pidlock);
+
+ if (p->p_flag & SEXITING) {
+ mutex_exit(mp);
+ return (NULL);
+ }
+
+ if (!(p->p_proc_flag & P_PR_LOCK))
+ break;
+
+ cv_wait(&pr_pid_cv[p->p_slot], mp);
+ mutex_exit(mp);
+
+ mutex_enter(&pidlock);
+ p = prfind(pid);
+ if (p == NULL || p->p_stat == SIDL) {
+ mutex_exit(&pidlock);
+ return (NULL);
+ }
+ }
+
+ p->p_proc_flag |= P_PR_LOCK;
+ mutex_exit(mp);
+ ASSERT(!MUTEX_HELD(&pidlock));
+ THREAD_KPRI_REQUEST();
+ return (p);
+}
+
+static void
+cgrp_p_unlock(proc_t *p)
+{
+ ASSERT(p->p_proc_flag & P_PR_LOCK);
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ ASSERT(!MUTEX_HELD(&pidlock));
+
+ p->p_proc_flag &= ~P_PR_LOCK;
+ cv_signal(&pr_pid_cv[p->p_slot]);
+ mutex_exit(&p->p_lock);
+ THREAD_KPRI_RELEASE();
+}
+
+/*
+ * Read value from the notify_on_release pseudo file on the parent node
+ * (which is the actual cgroup node). We don't bother taking the cg_contents
+ * lock since it's a single instruction so an empty group action/read will
+ * only see one value or the other.
+ */
+/* ARGSUSED */
+static int
+cgrp_rd_notify(cgrp_mnt_t *cgm, cgrp_node_t *cn, struct uio *uio)
+{
+ int len;
+ int error = 0;
+ char buf[16];
+ char *rdp;
+ /* the flag is on the containing dir */
+ uint_t value = cn->cgn_parent->cgn_notify;
+
+ len = snprintf(buf, sizeof (buf), "%u\n", value);
+ if (uio->uio_offset > len)
+ return (0);
+
+ len -= uio->uio_offset;
+ rdp = &buf[uio->uio_offset];
+ len = (uio->uio_resid < len) ? uio->uio_resid : len;
+
+ error = uiomove(rdp, len, UIO_READ, uio);
+ return (error);
+}
+
+/*
+ * Read value from the release_agent pseudo file.
+ */
+static int
+cgrp_rd_rel_agent(cgrp_mnt_t *cgm, struct uio *uio)
+{
+ int len;
+ int error = 0;
+ char *rdp;
+
+ mutex_enter(&cgm->cg_contents);
+
+ if (cgm->cg_agent[0] == '\0') {
+ mutex_exit(&cgm->cg_contents);
+ return (0);
+ }
+
+ len = strlen(cgm->cg_agent);
+ if (uio->uio_offset > len) {
+ mutex_exit(&cgm->cg_contents);
+ return (0);
+ }
+
+ len -= uio->uio_offset;
+ rdp = &cgm->cg_agent[uio->uio_offset];
+ len = (uio->uio_resid < len) ? uio->uio_resid : len;
+
+ error = uiomove(rdp, len, UIO_READ, uio);
+
+ mutex_exit(&cgm->cg_contents);
+
+ return (error);
+}
+
+/*
+ * Read pids from the cgroup.procs pseudo file. We have to look at all of the
+ * processes to find applicable ones, then report pids for any process which
+ * has all of its threads in the same cgroup.
+ */
+static int
+cgrp_rd_procs(cgrp_mnt_t *cgm, cgrp_node_t *cn, struct uio *uio)
+{
+ int i;
+ ssize_t offset = 0;
+ ssize_t uresid;
+ zoneid_t zoneid = curproc->p_zone->zone_id;
+ int error = 0;
+ pid_t initpid = curproc->p_zone->zone_proc_initpid;
+ pid_t schedpid = curproc->p_zone->zone_zsched->p_pid;
+ /* the cgroup ID is on the containing dir */
+ uint_t cg_id = cn->cgn_parent->cgn_id;
+
+ /* Scan all of the process entries */
+ for (i = 1; i < v.v_proc && (uresid = uio->uio_resid) > 0; i++) {
+ proc_t *p;
+ int len;
+ pid_t pid;
+ char buf[16];
+ char *rdp;
+ kthread_t *t;
+ boolean_t in_cg;
+
+ mutex_enter(&pidlock);
+ /*
+ * Skip indices for which there is no pid_entry, PIDs for
+ * which there is no corresponding process, system processes,
+ * a PID of 0, the pid for our zsched process, anything the
+ * security policy doesn't allow us to look at, its not an
+ * lx-branded process and processes that are not in the zone.
+ */
+ if ((p = pid_entry(i)) == NULL ||
+ p->p_stat == SIDL ||
+ (p->p_flag & SSYS) != 0 ||
+ p->p_pid == 0 ||
+ p->p_pid == schedpid ||
+ secpolicy_basic_procinfo(CRED(), p, curproc) != 0 ||
+ p->p_brand != &lx_brand ||
+ p->p_zone->zone_id != zoneid) {
+ mutex_exit(&pidlock);
+ continue;
+ }
+
+ mutex_enter(&p->p_lock);
+ if ((t = p->p_tlist) == NULL) {
+ /* no threads, skip it */
+ mutex_exit(&p->p_lock);
+ mutex_exit(&pidlock);
+ continue;
+ }
+
+ /*
+ * Check if all threads are in this cgroup.
+ */
+ in_cg = B_TRUE;
+ mutex_enter(&cgm->cg_contents);
+ do {
+ lx_lwp_data_t *plwpd = ttolxlwp(t);
+ if (plwpd == NULL || plwpd->br_cgroupid != cg_id) {
+ in_cg = B_FALSE;
+ break;
+ }
+
+ t = t->t_forw;
+ } while (t != p->p_tlist);
+ mutex_exit(&cgm->cg_contents);
+
+ mutex_exit(&p->p_lock);
+ if (!in_cg) {
+ /*
+ * This proc, or at least one of its threads, is not
+ * in this cgroup.
+ */
+ mutex_exit(&pidlock);
+ continue;
+ }
+
+ /*
+ * Convert pid to the Linux default of 1 if we're the zone's
+ * init process, otherwise use the value from the proc struct
+ */
+ if (p->p_pid == initpid) {
+ pid = 1;
+ } else {
+ pid = p->p_pid;
+ }
+
+ mutex_exit(&pidlock);
+
+ /*
+ * Generate pid line and write all or part of it if we're
+ * in the right spot within the pseudo file.
+ */
+ len = snprintf(buf, sizeof (buf), "%u\n", pid);
+ if ((offset + len) > uio->uio_offset) {
+ int diff = (int)(uio->uio_offset - offset);
+
+ ASSERT(diff < len);
+ offset += diff;
+ rdp = &buf[diff];
+ len -= diff;
+ if (len > uresid)
+ len = uresid;
+
+ error = uiomove(rdp, len, UIO_READ, uio);
+ if (error != 0)
+ return (error);
+ }
+ offset += len;
+ }
+
+ return (0);
+}
+
+/*
+ * We are given a locked process we know is valid, report on any of its thresds
+ * that are in the cgroup.
+ */
+static int
+cgrp_rd_proc_tasks(uint_t cg_id, proc_t *p, pid_t initpid, ssize_t *offset,
+ struct uio *uio)
+{
+ int error = 0;
+ uint_t tid;
+ char buf[16];
+ char *rdp;
+ kthread_t *t;
+
+ ASSERT(p->p_proc_flag & P_PR_LOCK);
+
+ /*
+ * Report all threads in this cgroup.
+ */
+ t = p->p_tlist;
+ do {
+ lx_lwp_data_t *plwpd = ttolxlwp(t);
+ if (plwpd == NULL) {
+ t = t->t_forw;
+ continue;
+ }
+
+ if (plwpd->br_cgroupid == cg_id) {
+ int len;
+
+ /*
+ * Convert taskid to the Linux default of 1 if
+ * we're the zone's init process.
+ */
+ tid = plwpd->br_pid;
+ if (tid == initpid)
+ tid = 1;
+
+ len = snprintf(buf, sizeof (buf), "%u\n", tid);
+ if ((*offset + len) > uio->uio_offset) {
+ int diff;
+
+ diff = (int)(uio->uio_offset - *offset);
+ ASSERT(diff < len);
+ *offset = *offset + diff;
+ rdp = &buf[diff];
+ len -= diff;
+ if (len > uio->uio_resid)
+ len = uio->uio_resid;
+
+ error = uiomove(rdp, len, UIO_READ, uio);
+ if (error != 0)
+ return (error);
+ }
+ *offset = *offset + len;
+ }
+
+ t = t->t_forw;
+ } while (t != p->p_tlist && uio->uio_resid > 0);
+
+ return (0);
+}
+
+/*
+ * Read pids from the tasks pseudo file. We have to look at all of the
+ * processes to find applicable ones, then report pids for any thread in the
+ * cgroup. We return the emulated lx thread pid here, not the internal thread
+ * ID. Because we're possibly doing IO for each taskid we lock the process
+ * so that the threads don't change while we're working on it (although threads
+ * can change if we fill up the read buffer and come back later for a
+ * subsequent read).
+ */
+int
+cgrp_rd_tasks(cgrp_mnt_t *cgm, cgrp_node_t *cn, struct uio *uio)
+{
+ int i;
+ ssize_t offset = 0;
+ ssize_t uresid;
+ zoneid_t zoneid = curproc->p_zone->zone_id;
+ int error = 0;
+ pid_t initpid = curproc->p_zone->zone_proc_initpid;
+ pid_t schedpid = curproc->p_zone->zone_zsched->p_pid;
+ /* the cgroup ID is on the containing dir */
+ uint_t cg_id = cn->cgn_parent->cgn_id;
+
+ /* Scan all of the process entries */
+ for (i = 1; i < v.v_proc && (uresid = uio->uio_resid) > 0; i++) {
+ proc_t *p;
+
+ mutex_enter(&pidlock);
+ /*
+ * Skip indices for which there is no pid_entry, PIDs for
+ * which there is no corresponding process, system processes,
+ * a PID of 0, the pid for our zsched process, anything the
+ * security policy doesn't allow us to look at, its not an
+ * lx-branded process and processes that are not in the zone.
+ */
+ if ((p = pid_entry(i)) == NULL ||
+ p->p_stat == SIDL ||
+ (p->p_flag & SSYS) != 0 ||
+ p->p_pid == 0 ||
+ p->p_pid == schedpid ||
+ secpolicy_basic_procinfo(CRED(), p, curproc) != 0 ||
+ p->p_brand != &lx_brand ||
+ p->p_zone->zone_id != zoneid) {
+ mutex_exit(&pidlock);
+ continue;
+ }
+
+ if (p->p_tlist == NULL) {
+ /* no threads, skip it */
+ mutex_exit(&pidlock);
+ continue;
+ }
+
+ p = cgrp_p_lock(p);
+ ASSERT(!MUTEX_HELD(&pidlock));
+ if (p == NULL)
+ continue;
+
+ mutex_enter(&cgm->cg_contents);
+ error = cgrp_rd_proc_tasks(cg_id, p, initpid, &offset, uio);
+ mutex_exit(&cgm->cg_contents);
+
+ mutex_enter(&p->p_lock);
+ cgrp_p_unlock(p);
+
+ if (error != 0)
+ return (error);
+ }
+
+ return (0);
+}
+
+static int
+cgrp_rd(cgrp_mnt_t *cgm, cgrp_node_t *cn, struct uio *uio, caller_context_t *ct)
+{
+ int error = 0;
+
+ if (uio->uio_loffset >= MAXOFF_T)
+ return (0);
+ if (uio->uio_loffset < 0)
+ return (EINVAL);
+ if (uio->uio_resid == 0)
+ return (0);
+
+ switch (cn->cgn_type) {
+ case CG_NOTIFY:
+ error = cgrp_rd_notify(cgm, cn, uio);
+ break;
+ case CG_PROCS:
+ error = cgrp_rd_procs(cgm, cn, uio);
+ break;
+ case CG_REL_AGENT:
+ error = cgrp_rd_rel_agent(cgm, uio);
+ break;
+ case CG_TASKS:
+ error = cgrp_rd_tasks(cgm, cn, uio);
+ break;
+ default:
+ VERIFY(0);
+ }
+
+ return (error);
+}
+
+/* ARGSUSED2 */
+static int
+cgrp_read(struct vnode *vp, struct uio *uiop, int ioflag, cred_t *cred,
+ struct caller_context *ct)
+{
+ cgrp_node_t *cn = VTOCGN(vp);
+ cgrp_mnt_t *cgm = VTOCGM(vp);
+ int error;
+
+ /*
+ * We don't support reading non-regular files
+ */
+ if (vp->v_type == VDIR)
+ return (EISDIR);
+ if (vp->v_type != VREG)
+ return (EINVAL);
+ error = cgrp_rd(cgm, cn, uiop, ct);
+
+ return (error);
+}
+
+static int
+cgrp_write(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cred,
+ struct caller_context *ct)
+{
+ cgrp_node_t *cn = VTOCGN(vp);
+ cgrp_mnt_t *cgm = VTOCGM(vp);
+ int error;
+
+ /*
+ * We don't support writing to non-regular files
+ */
+ if (vp->v_type != VREG)
+ return (EINVAL);
+
+ if (ioflag & FAPPEND) {
+ /* In append mode start at end of file. */
+ uiop->uio_loffset = cn->cgn_size;
+ }
+
+ error = cgrp_wr(cgm, cn, uiop, cred, ct);
+
+ return (error);
+}
+
+/* ARGSUSED2 */
+static int
+cgrp_getattr(struct vnode *vp, struct vattr *vap, int flags, struct cred *cred,
+ caller_context_t *ct)
+{
+ cgrp_node_t *cn = VTOCGN(vp);
+ cgrp_mnt_t *cgm;
+ struct vattr va;
+ int attrs = 1;
+
+ cgm = VTOCGM(cn->cgn_vnode);
+ mutex_enter(&cgm->cg_contents);
+ if (attrs == 0) {
+ cn->cgn_uid = va.va_uid;
+ cn->cgn_gid = va.va_gid;
+ }
+ vap->va_type = vp->v_type;
+ vap->va_mode = cn->cgn_mode & MODEMASK;
+ vap->va_uid = cn->cgn_uid;
+ vap->va_gid = cn->cgn_gid;
+ vap->va_fsid = cn->cgn_fsid;
+ vap->va_nodeid = (ino64_t)cn->cgn_nodeid;
+ vap->va_nlink = cn->cgn_nlink;
+ vap->va_size = (u_offset_t)cn->cgn_size;
+ vap->va_atime = cn->cgn_atime;
+ vap->va_mtime = cn->cgn_mtime;
+ vap->va_ctime = cn->cgn_ctime;
+ vap->va_blksize = PAGESIZE;
+ vap->va_rdev = cn->cgn_rdev;
+ vap->va_seq = cn->cgn_seq;
+
+ vap->va_nblocks = (fsblkcnt64_t)btodb(ptob(btopr(vap->va_size)));
+ mutex_exit(&cgm->cg_contents);
+ return (0);
+}
+
+/*ARGSUSED4*/
+static int
+cgrp_setattr(struct vnode *vp, struct vattr *vap, int flags, struct cred *cred,
+ caller_context_t *ct)
+{
+ cgrp_node_t *cn = VTOCGN(vp);
+ cgrp_mnt_t *cgm;
+ int error = 0;
+ struct vattr *get;
+ long mask;
+
+ /*
+ * Cannot set these attributes
+ */
+ if ((vap->va_mask & AT_NOSET) || (vap->va_mask & AT_XVATTR) ||
+ (vap->va_mode & (S_ISUID | S_ISGID)) || (vap->va_mask & AT_SIZE))
+ return (EINVAL);
+
+ cgm = VTOCGM(cn->cgn_vnode);
+ mutex_enter(&cgm->cg_contents);
+
+ get = &cn->cgn_attr;
+ /*
+ * Change file access modes. Must be owner or have sufficient
+ * privileges.
+ */
+ error = secpolicy_vnode_setattr(cred, vp, vap, get, flags, cgrp_taccess,
+ cn);
+
+ if (error)
+ goto out;
+
+ mask = vap->va_mask;
+
+ if (mask & AT_MODE) {
+ get->va_mode &= S_IFMT;
+ get->va_mode |= vap->va_mode & ~S_IFMT;
+ }
+
+ if (mask & AT_UID)
+ get->va_uid = vap->va_uid;
+ if (mask & AT_GID)
+ get->va_gid = vap->va_gid;
+ if (mask & AT_ATIME)
+ get->va_atime = vap->va_atime;
+ if (mask & AT_MTIME)
+ get->va_mtime = vap->va_mtime;
+
+ if (mask & (AT_UID | AT_GID | AT_MODE | AT_MTIME))
+ gethrestime(&cn->cgn_ctime);
+
+out:
+ mutex_exit(&cgm->cg_contents);
+ return (error);
+}
+
+/* ARGSUSED2 */
+static int
+cgrp_access(struct vnode *vp, int mode, int flags, struct cred *cred,
+ caller_context_t *ct)
+{
+ cgrp_node_t *cn = VTOCGN(vp);
+ cgrp_mnt_t *cgm;
+ int error;
+
+ cgm = VTOCGM(cn->cgn_vnode);
+ mutex_enter(&cgm->cg_contents);
+ error = cgrp_taccess(cn, mode, cred);
+ mutex_exit(&cgm->cg_contents);
+ return (error);
+}
+
+/* ARGSUSED3 */
+static int
+cgrp_lookup(struct vnode *dvp, char *nm, struct vnode **vpp,
+ struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred,
+ caller_context_t *ct, int *direntflags, pathname_t *realpnp)
+{
+ cgrp_node_t *cn = VTOCGN(dvp);
+ cgrp_mnt_t *cgm;
+ cgrp_node_t *ncn = NULL;
+ int error;
+
+ /* disallow extended attrs */
+ if (flags & LOOKUP_XATTR)
+ return (EINVAL);
+
+ /*
+ * Null component name is a synonym for directory being searched.
+ */
+ if (*nm == '\0') {
+ VN_HOLD(dvp);
+ *vpp = dvp;
+ return (0);
+ }
+ ASSERT(cn);
+
+ cgm = VTOCGM(cn->cgn_vnode);
+ mutex_enter(&cgm->cg_contents);
+ error = cgrp_dirlookup(cn, nm, &ncn, cred);
+ mutex_exit(&cgm->cg_contents);
+
+ if (error == 0) {
+ ASSERT(ncn);
+ *vpp = CGNTOV(ncn);
+ }
+
+ return (error);
+}
+
+/*ARGSUSED7*/
+static int
+cgrp_create(struct vnode *dvp, char *nm, struct vattr *vap,
+ enum vcexcl exclusive, int mode, struct vnode **vpp, struct cred *cred,
+ int flag, caller_context_t *ct, vsecattr_t *vsecp)
+{
+ cgrp_node_t *parent = VTOCGN(dvp);
+ cgrp_node_t *cn = NULL;
+ cgrp_mnt_t *cgm;
+ int error;
+
+ if (*nm == '\0')
+ return (EPERM);
+
+ cgm = VTOCGM(parent->cgn_vnode);
+ mutex_enter(&cgm->cg_contents);
+ error = cgrp_dirlookup(parent, nm, &cn, cred);
+ if (error == 0) { /* name found */
+ ASSERT(cn);
+
+ mutex_exit(&cgm->cg_contents);
+ /*
+ * Creating an existing file, allow it except for the following
+ * errors.
+ */
+ if (exclusive == EXCL) {
+ error = EEXIST;
+ } else if ((CGNTOV(cn)->v_type == VDIR) && (mode & VWRITE)) {
+ error = EISDIR;
+ } else {
+ error = cgrp_taccess(cn, mode, cred);
+ }
+ if (error != 0) {
+ cgnode_rele(cn);
+ return (error);
+ }
+ *vpp = CGNTOV(cn);
+ return (0);
+ }
+ mutex_exit(&cgm->cg_contents);
+
+ /*
+ * cgroups doesn't allow creation of additional, non-subsystem specific
+ * files in a dir
+ */
+ return (EPERM);
+}
+
+/* ARGSUSED3 */
+static int
+cgrp_remove(struct vnode *dvp, char *nm, struct cred *cred,
+ caller_context_t *ct, int flags)
+{
+ cgrp_node_t *parent = VTOCGN(dvp);
+ int error;
+ cgrp_node_t *cn = NULL;
+ cgrp_mnt_t *cgm;
+
+ /*
+ * Removal of subsystem-specific files is not allowed but we need
+ * to return the correct error if they try to remove a non-existent
+ * file.
+ */
+
+ cgm = VTOCGM(parent->cgn_vnode);
+ mutex_enter(&cgm->cg_contents);
+ error = cgrp_dirlookup(parent, nm, &cn, cred);
+ mutex_exit(&cgm->cg_contents);
+ if (error)
+ return (error);
+
+ ASSERT(cn);
+ cgnode_rele(cn);
+ return (EPERM);
+}
+
+/* ARGSUSED4 */
+static int
+cgrp_link(struct vnode *dvp, struct vnode *srcvp, char *cnm, struct cred *cred,
+ caller_context_t *ct, int flags)
+{
+ /* cgroups doesn't support hard links */
+ return (EPERM);
+}
+
+/*
+ * Rename of subsystem-specific files is not allowed but we can rename
+ * directories (i.e. sub-groups). We cannot mv subdirs from one group to
+ * another so the src and dest vnode must be the same.
+ */
+/* ARGSUSED5 */
+static int
+cgrp_rename(
+ struct vnode *odvp, /* source parent vnode */
+ char *onm, /* source name */
+ struct vnode *ndvp, /* destination parent vnode */
+ char *nnm, /* destination name */
+ struct cred *cred,
+ caller_context_t *ct,
+ int flags)
+{
+ cgrp_node_t *fromparent;
+ cgrp_node_t *toparent;
+ cgrp_node_t *fromcn = NULL; /* source cgrp_node */
+ cgrp_mnt_t *cgm = VTOCGM(odvp);
+ int error, err;
+
+ fromparent = VTOCGN(odvp);
+ toparent = VTOCGN(ndvp);
+
+ if (fromparent != toparent)
+ return (EIO);
+
+ /* discourage additional use of toparent */
+ toparent = NULL;
+
+ mutex_enter(&cgm->cg_contents);
+
+ /*
+ * Look up cgrp_node of file we're supposed to rename.
+ */
+ error = cgrp_dirlookup(fromparent, onm, &fromcn, cred);
+ if (error) {
+ mutex_exit(&cgm->cg_contents);
+ return (error);
+ }
+
+ if (fromcn->cgn_type != CG_CGROUP_DIR) {
+ error = EPERM;
+ goto done;
+ }
+
+ /*
+ * Make sure we can delete the old (source) entry. This
+ * requires write permission on the containing directory.
+ */
+ if (((error = cgrp_taccess(fromparent, VWRITE, cred)) != 0))
+ goto done;
+
+ /*
+ * Check for renaming to or from '.' or '..' or that
+ * fromcn == fromparent
+ */
+ if ((onm[0] == '.' &&
+ (onm[1] == '\0' || (onm[1] == '.' && onm[2] == '\0'))) ||
+ (nnm[0] == '.' &&
+ (nnm[1] == '\0' || (nnm[1] == '.' && nnm[2] == '\0'))) ||
+ (fromparent == fromcn)) {
+ error = EINVAL;
+ goto done;
+ }
+
+ /*
+ * Link source to new target
+ */
+ error = cgrp_direnter(cgm, fromparent, nnm, DE_RENAME,
+ fromcn, (struct vattr *)NULL,
+ (cgrp_node_t **)NULL, cred, ct);
+
+ if (error)
+ goto done;
+
+ /*
+ * Unlink from source.
+ */
+ error = err = cgrp_dirdelete(fromparent, fromcn, onm, DR_RENAME, cred);
+
+ /*
+ * The following handles the case where our source cgrp_node was
+ * removed before we got to it.
+ */
+ if (error == ENOENT)
+ error = 0;
+
+ if (err == 0) {
+ vnevent_rename_src(CGNTOV(fromcn), odvp, onm, ct);
+ vnevent_rename_dest_dir(ndvp, CGNTOV(fromcn), nnm, ct);
+ }
+
+done:
+ mutex_exit(&cgm->cg_contents);
+ cgnode_rele(fromcn);
+
+ return (error);
+}
+
+/* ARGSUSED5 */
+static int
+cgrp_mkdir(struct vnode *dvp, char *nm, struct vattr *va, struct vnode **vpp,
+ struct cred *cred, caller_context_t *ct, int flags, vsecattr_t *vsecp)
+{
+ cgrp_node_t *parent = VTOCGN(dvp);
+ cgrp_node_t *self = NULL;
+ cgrp_mnt_t *cgm = VTOCGM(dvp);
+ int error;
+
+ /*
+ * Might be dangling directory. Catch it here, because a ENOENT
+ * return from cgrp_dirlookup() is an "ok return".
+ */
+ if (parent->cgn_nlink == 0)
+ return (ENOENT);
+
+ mutex_enter(&cgm->cg_contents);
+ error = cgrp_dirlookup(parent, nm, &self, cred);
+ if (error == 0) {
+ ASSERT(self != NULL);
+ mutex_exit(&cgm->cg_contents);
+ cgnode_rele(self);
+ return (EEXIST);
+ }
+ if (error != ENOENT) {
+ mutex_exit(&cgm->cg_contents);
+ return (error);
+ }
+
+ error = cgrp_direnter(cgm, parent, nm, DE_MKDIR, (cgrp_node_t *)NULL,
+ va, &self, cred, ct);
+ if (error) {
+ mutex_exit(&cgm->cg_contents);
+ if (self != NULL)
+ cgnode_rele(self);
+ return (error);
+ }
+ mutex_exit(&cgm->cg_contents);
+ *vpp = CGNTOV(self);
+ return (0);
+}
+
+/* ARGSUSED4 */
+static int
+cgrp_rmdir(struct vnode *dvp, char *nm, struct vnode *cdir, struct cred *cred,
+ caller_context_t *ct, int flags)
+{
+ cgrp_node_t *parent = VTOCGN(dvp);
+ cgrp_mnt_t *cgm;
+ cgrp_node_t *self = NULL;
+ struct vnode *vp;
+ int error = 0;
+
+ /*
+ * Return error when removing . and ..
+ */
+ if (strcmp(nm, ".") == 0)
+ return (EINVAL);
+ if (strcmp(nm, "..") == 0)
+ return (EEXIST); /* Should be ENOTEMPTY */
+
+ cgm = VTOCGM(parent->cgn_vnode);
+ mutex_enter(&cgm->cg_contents);
+
+ error = cgrp_dirlookup(parent, nm, &self, cred);
+ if (error) {
+ mutex_exit(&cgm->cg_contents);
+ return (error);
+ }
+
+ vp = CGNTOV(self);
+ if (vp == dvp || vp == cdir) {
+ error = EINVAL;
+ goto done;
+ }
+ if (self->cgn_type != CG_CGROUP_DIR) {
+ error = ENOTDIR;
+ goto done;
+ }
+
+ cgm = (cgrp_mnt_t *)VFSTOCGM(self->cgn_vnode->v_vfsp);
+
+ /*
+ * Check for the existence of any sub-cgroup directories or tasks in
+ * the cgroup.
+ */
+ if (self->cgn_task_cnt > 0 || self->cgn_dirents > N_DIRENTS(cgm)) {
+ error = EEXIST;
+ /*
+ * Update atime because checking cn_dirents is logically
+ * equivalent to reading the directory
+ */
+ gethrestime(&self->cgn_atime);
+ goto done;
+ }
+
+ if (vn_vfswlock(vp)) {
+ error = EBUSY;
+ goto done;
+ }
+ if (vn_mountedvfs(vp) != NULL) {
+ error = EBUSY;
+ } else {
+ error = cgrp_dirdelete(parent, self, nm, DR_RMDIR, cred);
+ }
+
+ vn_vfsunlock(vp);
+
+ if (parent->cgn_task_cnt == 0 &&
+ parent->cgn_dirents == N_DIRENTS(cgm) && parent->cgn_notify == 1) {
+ cgrp_rel_agent_event(cgm, parent);
+ ASSERT(MUTEX_NOT_HELD(&cgm->cg_contents));
+ goto dropped;
+ }
+
+done:
+ mutex_exit(&cgm->cg_contents);
+dropped:
+ vnevent_rmdir(CGNTOV(self), dvp, nm, ct);
+ cgnode_rele(self);
+
+ return (error);
+}
+
+/* ARGSUSED2 */
+static int
+cgrp_readdir(struct vnode *vp, struct uio *uiop, struct cred *cred, int *eofp,
+ caller_context_t *ct, int flags)
+{
+ cgrp_node_t *cn = VTOCGN(vp);
+ cgrp_mnt_t *cgm;
+ cgrp_dirent_t *cdp;
+ int error = 0;
+ size_t namelen;
+ struct dirent64 *dp;
+ ulong_t offset;
+ ulong_t total_bytes_wanted;
+ long outcount = 0;
+ long bufsize;
+ int reclen;
+ caddr_t outbuf;
+
+ if (uiop->uio_loffset >= MAXOFF_T) {
+ if (eofp)
+ *eofp = 1;
+ return (0);
+ }
+
+ if (uiop->uio_iovcnt != 1)
+ return (EINVAL);
+
+ if (vp->v_type != VDIR)
+ return (ENOTDIR);
+
+ cgm = VTOCGM(cn->cgn_vnode);
+ mutex_enter(&cgm->cg_contents);
+
+ if (cn->cgn_dir == NULL) {
+ VERIFY(cn->cgn_nlink == 0);
+ mutex_exit(&cgm->cg_contents);
+ return (0);
+ }
+
+ /*
+ * Get space for multiple directory entries
+ */
+ total_bytes_wanted = uiop->uio_iov->iov_len;
+ bufsize = total_bytes_wanted + sizeof (struct dirent64);
+ outbuf = kmem_alloc(bufsize, KM_SLEEP);
+
+ dp = (struct dirent64 *)outbuf;
+
+ offset = 0;
+ cdp = cn->cgn_dir;
+ while (cdp) {
+ namelen = strlen(cdp->cgd_name); /* no +1 needed */
+ offset = cdp->cgd_offset;
+ if (offset >= uiop->uio_offset) {
+ reclen = (int)DIRENT64_RECLEN(namelen);
+ if (outcount + reclen > total_bytes_wanted) {
+ if (!outcount) {
+ /* Buffer too small for any entries. */
+ error = EINVAL;
+ }
+ break;
+ }
+ ASSERT(cdp->cgd_cgrp_node != NULL);
+
+ /* use strncpy(9f) to zero out uninitialized bytes */
+
+ (void) strncpy(dp->d_name, cdp->cgd_name,
+ DIRENT64_NAMELEN(reclen));
+ dp->d_reclen = (ushort_t)reclen;
+ dp->d_ino = (ino64_t)cdp->cgd_cgrp_node->cgn_nodeid;
+ dp->d_off = (offset_t)cdp->cgd_offset + 1;
+ dp = (struct dirent64 *)((uintptr_t)dp + dp->d_reclen);
+ outcount += reclen;
+ ASSERT(outcount <= bufsize);
+ }
+ cdp = cdp->cgd_next;
+ }
+
+ if (!error)
+ error = uiomove(outbuf, outcount, UIO_READ, uiop);
+
+ if (!error) {
+ /*
+ * If we reached the end of the list our offset should now be
+ * just past the end.
+ */
+ if (!cdp) {
+ offset += 1;
+ if (eofp)
+ *eofp = 1;
+ } else if (eofp)
+ *eofp = 0;
+ uiop->uio_offset = offset;
+ }
+ gethrestime(&cn->cgn_atime);
+
+ mutex_exit(&cgm->cg_contents);
+
+ kmem_free(outbuf, bufsize);
+ return (error);
+}
+
+/* ARGSUSED5 */
+static int
+cgrp_symlink(struct vnode *dvp, char *lnm, struct vattr *cva, char *cnm,
+ struct cred *cred, caller_context_t *ct, int flags)
+{
+ /* cgroups doesn't support symlinks */
+ return (EPERM);
+}
+
+/* ARGSUSED */
+static void
+cgrp_inactive(struct vnode *vp, struct cred *cred, caller_context_t *ct)
+{
+ cgrp_node_t *cn = VTOCGN(vp);
+ cgrp_mnt_t *cgm = VFSTOCGM(vp->v_vfsp);
+
+ mutex_enter(&cgm->cg_contents);
+ mutex_enter(&vp->v_lock);
+ ASSERT(vp->v_count >= 1);
+
+ /*
+ * If we don't have the last hold or the link count is non-zero,
+ * there's little to do -- just drop our hold.
+ */
+ if (vp->v_count > 1 || cn->cgn_nlink != 0) {
+ vp->v_count--;
+ mutex_exit(&vp->v_lock);
+ mutex_exit(&cgm->cg_contents);
+ return;
+ }
+
+ if (cn->cgn_forw == NULL)
+ cgm->cg_rootnode->cgn_back = cn->cgn_back;
+ else
+ cn->cgn_forw->cgn_back = cn->cgn_back;
+ cn->cgn_back->cgn_forw = cn->cgn_forw;
+
+ mutex_exit(&vp->v_lock);
+ mutex_exit(&cgm->cg_contents);
+
+ /* Here's our chance to send invalid event */
+ vn_invalid(CGNTOV(cn));
+
+ vn_free(CGNTOV(cn));
+ kmem_free(cn, sizeof (cgrp_node_t));
+}
+
+/* ARGSUSED */
+static int
+cgrp_seek(struct vnode *vp, offset_t ooff, offset_t *noffp,
+ caller_context_t *ct)
+{
+ return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
+}
+
+/* ARGSUSED */
+static int
+cgrp_rwlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
+{
+ return (write_lock);
+}
+
+/* ARGSUSED */
+static void
+cgrp_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
+{
+}
+
+static int
+cgrp_pathconf(struct vnode *vp, int cmd, ulong_t *valp, cred_t *cr,
+ caller_context_t *ct)
+{
+ int error;
+
+ switch (cmd) {
+ case _PC_XATTR_EXISTS:
+ if (vp->v_vfsp->vfs_flag & VFS_XATTR) {
+ *valp = 0; /* assume no attributes */
+ error = 0; /* okay to ask */
+ } else {
+ error = EINVAL;
+ }
+ break;
+ case _PC_SATTR_ENABLED:
+ case _PC_SATTR_EXISTS:
+ *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) &&
+ (vp->v_type == VREG || vp->v_type == VDIR);
+ error = 0;
+ break;
+ case _PC_TIMESTAMP_RESOLUTION:
+ /* nanosecond timestamp resolution */
+ *valp = 1L;
+ error = 0;
+ break;
+ default:
+ error = fs_pathconf(vp, cmd, valp, cr, ct);
+ }
+ return (error);
+}
+
+
+struct vnodeops *cgrp_vnodeops;
+
+const fs_operation_def_t cgrp_vnodeops_template[] = {
+ VOPNAME_OPEN, { .vop_open = cgrp_open },
+ VOPNAME_CLOSE, { .vop_close = cgrp_close },
+ VOPNAME_READ, { .vop_read = cgrp_read },
+ VOPNAME_WRITE, { .vop_write = cgrp_write },
+ VOPNAME_GETATTR, { .vop_getattr = cgrp_getattr },
+ VOPNAME_SETATTR, { .vop_setattr = cgrp_setattr },
+ VOPNAME_ACCESS, { .vop_access = cgrp_access },
+ VOPNAME_LOOKUP, { .vop_lookup = cgrp_lookup },
+ VOPNAME_CREATE, { .vop_create = cgrp_create },
+ VOPNAME_REMOVE, { .vop_remove = cgrp_remove },
+ VOPNAME_LINK, { .vop_link = cgrp_link },
+ VOPNAME_RENAME, { .vop_rename = cgrp_rename },
+ VOPNAME_MKDIR, { .vop_mkdir = cgrp_mkdir },
+ VOPNAME_RMDIR, { .vop_rmdir = cgrp_rmdir },
+ VOPNAME_READDIR, { .vop_readdir = cgrp_readdir },
+ VOPNAME_SYMLINK, { .vop_symlink = cgrp_symlink },
+ VOPNAME_INACTIVE, { .vop_inactive = cgrp_inactive },
+ VOPNAME_RWLOCK, { .vop_rwlock = cgrp_rwlock },
+ VOPNAME_RWUNLOCK, { .vop_rwunlock = cgrp_rwunlock },
+ VOPNAME_SEEK, { .vop_seek = cgrp_seek },
+ VOPNAME_PATHCONF, { .vop_pathconf = cgrp_pathconf },
+ VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support },
+ NULL, NULL
+};
diff --git a/usr/src/uts/common/brand/lx/devfs/lxd.h b/usr/src/uts/common/brand/lx/devfs/lxd.h
new file mode 100644
index 0000000000..cd256c27c5
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/devfs/lxd.h
@@ -0,0 +1,232 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#ifndef _LXD_H
+#define _LXD_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * lxd.h: declarations, data structures and macros for lxd (lxd devfs).
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/policy.h>
+#include <sys/dirent.h>
+#include <sys/errno.h>
+#include <sys/kmem.h>
+#include <sys/pathname.h>
+#include <sys/systm.h>
+#include <sys/var.h>
+#include <sys/sysmacros.h>
+#include <sys/cred.h>
+#include <sys/priv.h>
+#include <sys/vnode.h>
+#include <sys/vfs.h>
+#include <sys/statvfs.h>
+#include <sys/cmn_err.h>
+#include <sys/zone.h>
+#include <sys/uio.h>
+#include <sys/utsname.h>
+#include <sys/atomic.h>
+#include <vm/anon.h>
+#include <sys/lx_types.h>
+
+#if defined(_KERNEL)
+
+#include <sys/lx_brand.h>
+
+/*
+ * It's unlikely that we need to create more than 50-60 subdirs/symlinks
+ * as front files so we size the file system hash for 2x that number.
+ * The back devfs typically has ~80 nodes so this is also a comfortable size
+ * for the back hash table.
+ */
+#define LXD_HASH_SZ 128
+
+#define LXD_BACK_HASH(v) ((((intptr_t)(v)) >> 10) & ((LXD_HASH_SZ) - 1))
+
+#define LXD_NM_HASH(ldn, name, hash) \
+ { \
+ char Xc, *Xcp; \
+ hash = (uint_t)(uintptr_t)(ldn) >> 8; \
+ for (Xcp = (name); (Xc = *Xcp) != 0; Xcp++) \
+ hash = (hash << 4) + hash + (uint_t)Xc; \
+ hash &= (LXD_HASH_SZ - 1); \
+ }
+
+
+enum lxd_node_type { LXDNT_NONE, LXDNT_BACK, LXDNT_FRONT };
+
+/*
+ * lxd per-mount data structure.
+ *
+ * All fields are protected by lxd_contents.
+ * File renames on a specific file system are protected lxdm_renamelck.
+ */
+typedef struct lxd_mnt {
+ struct vfs *lxdm_vfsp; /* filesystem's vfs struct */
+ struct lxd_node *lxdm_rootnode; /* root lxd_node */
+ char *lxdm_mntpath; /* name of lxd mount point */
+ dev_t lxdm_dev; /* unique dev # of mounted `device' */
+ kmutex_t lxdm_contents; /* per-mount lock */
+ kmutex_t lxdm_renamelck; /* rename lock for this mount */
+ uint_t lxdm_gen; /* node ID source for files */
+
+ /* protects buckets in both "dir ent" and "back" hash tables */
+ kmutex_t lxdm_hash_mutex[LXD_HASH_SZ];
+
+ /* per-mount data for "back" vnodes in the fs */
+ uint_t lxdm_back_refcnt; /* # outstanding "back" vnodes */
+ struct lxd_node *lxdm_back_htable[LXD_HASH_SZ];
+
+ /*
+ * Per-mount directory data for "front" nodes in the fs.
+ * Each front node has a directory entry but directory entries can live
+ * on either front or back nodes.
+ */
+ uint_t lxdm_dent_refcnt; /* # outstanding dir ents */
+ struct lxd_dirent *lxdm_dent_htable[LXD_HASH_SZ];
+} lxd_mnt_t;
+
+/*
+ * lxd_node is the file system dependent node for lxd.
+ *
+ * The node is used to represent both front and back files. For front files
+ * the node can represent either a directory or symlink.
+ */
+typedef struct lxd_node {
+ enum lxd_node_type lxdn_type;
+
+ /* Data for "front" nodes */
+ struct lxd_node *lxdn_prev; /* lnked lst of lxd nodes */
+ struct lxd_node *lxdn_next; /* lnked lst of lxd nodes */
+ struct lxd_node *lxdn_parent; /* dir containing this node */
+ krwlock_t lxdn_rwlock; /* serialize mods/dir updates */
+ kmutex_t lxdn_tlock; /* time, flag, and nlink lock */
+
+ /* these could be in a union ala tmpfs but not really necessary */
+ uint_t lxdn_dirents; /* number of dirents */
+ struct lxd_dirent *lxdn_dir; /* dirent list */
+ char *lxdn_symlink; /* pointer to symlink */
+ struct vattr lxdn_attr; /* attributes */
+
+ /* Hash table link */
+ struct lxd_node *lxdn_hnxt; /* link in per-mount entry */
+ /* hash table */
+ vnode_t *lxdn_vnode; /* vnode for this lxd_node */
+
+ vnode_t *lxdn_real_vp; /* back file - real vnode */
+} lxd_node_t;
+
+/*
+ * Attributes
+ */
+#define lxdn_mask lxdn_attr.va_mask
+#define lxdn_mode lxdn_attr.va_mode
+#define lxdn_uid lxdn_attr.va_uid
+#define lxdn_gid lxdn_attr.va_gid
+#define lxdn_fsid lxdn_attr.va_fsid
+#define lxdn_nodeid lxdn_attr.va_nodeid
+#define lxdn_nlink lxdn_attr.va_nlink
+#define lxdn_size lxdn_attr.va_size
+#define lxdn_atime lxdn_attr.va_atime
+#define lxdn_mtime lxdn_attr.va_mtime
+#define lxdn_ctime lxdn_attr.va_ctime
+#define lxdn_rdev lxdn_attr.va_rdev
+#define lxdn_blksize lxdn_attr.va_blksize
+#define lxdn_nblocks lxdn_attr.va_nblocks
+#define lxdn_seq lxdn_attr.va_seq
+
+/*
+ * lx devfs conversion macros
+ */
+#define VFSTOLXDM(vfsp) ((lxd_mnt_t *)(vfsp)->vfs_data)
+#define VTOLXDM(vp) ((lxd_mnt_t *)(vp)->v_vfsp->vfs_data)
+#define VTOLDN(vp) ((lxd_node_t *)(vp)->v_data)
+#define LDNTOV(ln) ((ln)->lxdn_vnode)
+#define ldnode_hold(ln) VN_HOLD(LDNTOV(ln))
+#define ldnode_rele(ln) VN_RELE(LDNTOV(ln))
+
+#define REALVP(vp) (VTOLDN(vp)->lxdn_real_vp)
+
+/*
+ * front directories are made up of a linked list of lxd_dirent structures
+ * hanging off directory lxdn_nodes. File names are not fixed length, but are
+ * null terminated.
+ */
+typedef struct lxd_dirent {
+ lxd_node_t *lddir_node; /* lxd node for this file */
+ struct lxd_dirent *lddir_next; /* next directory entry */
+ struct lxd_dirent *lddir_prev; /* prev directory entry */
+ uint_t lddir_offset; /* "offset" of dir entry */
+ uint_t lddir_hash; /* a hash of lddir_name */
+ struct lxd_dirent *lddir_link; /* linked via hash table */
+ lxd_node_t *lddir_parent; /* parent, dir we are in */
+ char *lddir_name; /* null terminated */
+} lxd_dirent_t;
+
+enum de_op { DE_CREATE, DE_MKDIR, DE_RENAME }; /* direnter ops */
+enum dr_op { DR_REMOVE, DR_RMDIR, DR_RENAME }; /* dirremove ops */
+
+typedef struct lxd_minor_translator {
+ char *lxd_mt_path; /* illumos minor node path */
+ minor_t lxd_mt_minor; /* illumos minor node number */
+ int lxd_mt_lx_major; /* linux major node number */
+ int lxd_mt_lx_minor; /* linux minor node number */
+} lxd_minor_translator_t;
+
+enum lxd_xl_tp { DTT_INVALID, DTT_LIST, DTT_CUSTOM };
+
+#define xl_list lxd_xl_minor.lxd_xl_list
+#define xl_custom lxd_xl_minor.lxd_xl_custom
+
+typedef struct lxd_devt_translator {
+ char *lxd_xl_driver; /* driver name */
+ major_t lxd_xl_major; /* driver number */
+
+ enum lxd_xl_tp lxd_xl_type; /* dictates how we intrep. xl_minor */
+ union {
+ uintptr_t lxd_xl_foo; /* required to compile */
+ lxd_minor_translator_t *lxd_xl_list;
+ void (*lxd_xl_custom)(dev_t, dev_t *);
+ } lxd_xl_minor;
+} lxd_devt_translator_t;
+
+extern struct vnodeops *lxd_vnodeops;
+extern lxd_devt_translator_t lxd_devt_translators[];
+
+vnode_t *lxd_make_back_node(vnode_t *, lxd_mnt_t *);
+void lxd_free_back_node(lxd_node_t *);
+int lxd_dirdelete(lxd_node_t *, lxd_node_t *, char *, enum dr_op, cred_t *);
+int lxd_direnter(lxd_mnt_t *, lxd_node_t *, char *, enum de_op, lxd_node_t *,
+ lxd_node_t *, struct vattr *, lxd_node_t **, cred_t *,
+ caller_context_t *);
+void lxd_dirinit(lxd_node_t *, lxd_node_t *, cred_t *);
+int lxd_dirlookup(lxd_node_t *, char *, lxd_node_t **, cred_t *);
+void lxd_dirtrunc(lxd_node_t *);
+void lxd_node_init(lxd_mnt_t *, lxd_node_t *, vnode_t *, vattr_t *, cred_t *);
+int lxd_naccess(void *, int, cred_t *);
+
+#endif /* KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LXD_H */
diff --git a/usr/src/uts/common/brand/lx/devfs/lxd_node.c b/usr/src/uts/common/brand/lx/devfs/lxd_node.c
new file mode 100644
index 0000000000..9e67f988bc
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/devfs/lxd_node.c
@@ -0,0 +1,1004 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/vfs.h>
+#include <sys/vnode.h>
+#include <sys/errno.h>
+#include <sys/cmn_err.h>
+#include <sys/cred.h>
+#include <sys/stat.h>
+#include <sys/mode.h>
+#include <sys/policy.h>
+#include <sys/sdt.h>
+
+#include "lxd.h"
+
+#define LXD_HASH_SIZE 8192 /* must be power of 2 */
+#define LXD_MUTEX_SIZE 64
+
+
+#define MODESHIFT 3
+
+typedef enum lxd_nodehold {
+ NOHOLD,
+ HOLD
+} lxd_nodehold_t;
+
+/*
+ * The following functions maintain the per-mount "front" files.
+ */
+static void
+lxd_save_dirent(lxd_dirent_t *de)
+{
+ lxd_mnt_t *lxdm = VTOLXDM(LDNTOV(de->lddir_parent));
+ uint_t hash;
+ kmutex_t *hmtx;
+
+ LXD_NM_HASH(de->lddir_parent, de->lddir_name, hash);
+ de->lddir_hash = hash;
+
+ hmtx = &lxdm->lxdm_hash_mutex[hash];
+
+ mutex_enter(hmtx);
+ ASSERT(de->lddir_link == NULL);
+ de->lddir_link = lxdm->lxdm_dent_htable[hash];
+ lxdm->lxdm_dent_htable[hash] = de;
+ mutex_exit(hmtx);
+
+ atomic_inc_32(&lxdm->lxdm_dent_refcnt);
+}
+
+static void
+lxd_rm_dirent(lxd_dirent_t *de)
+{
+ lxd_mnt_t *lxdm = VTOLXDM(LDNTOV(de->lddir_parent));
+ uint_t hash;
+ lxd_dirent_t **prevpp;
+ kmutex_t *hmtx;
+
+ hash = de->lddir_hash;
+ hmtx = &lxdm->lxdm_hash_mutex[hash];
+
+ mutex_enter(hmtx);
+ prevpp = &lxdm->lxdm_dent_htable[hash];
+ while (*prevpp != de)
+ prevpp = &(*prevpp)->lddir_link;
+ *prevpp = de->lddir_link;
+ de->lddir_link = NULL;
+ mutex_exit(hmtx);
+
+ ASSERT(lxdm->lxdm_dent_refcnt > 0);
+ atomic_dec_32(&lxdm->lxdm_dent_refcnt);
+}
+
+static lxd_dirent_t *
+lxd_find_dirent(char *name, lxd_node_t *parent, lxd_nodehold_t do_hold,
+ lxd_node_t **found)
+{
+ lxd_mnt_t *lxdm = VTOLXDM(LDNTOV(parent));
+ lxd_dirent_t *de;
+ uint_t hash;
+ kmutex_t *hmtx;
+
+ LXD_NM_HASH(parent, name, hash);
+ hmtx = &lxdm->lxdm_hash_mutex[hash];
+
+ mutex_enter(hmtx);
+ de = lxdm->lxdm_dent_htable[hash];
+ while (de) {
+ if (de->lddir_hash == hash && de->lddir_parent == parent &&
+ strcmp(de->lddir_name, name) == 0) {
+ lxd_node_t *ldn = de->lddir_node;
+
+ if (do_hold == HOLD) {
+ ASSERT(ldn != NULL);
+ ldnode_hold(ldn);
+ }
+ if (found != NULL)
+ *found = ldn;
+ mutex_exit(hmtx);
+ return (de);
+ }
+
+ de = de->lddir_link;
+ }
+ mutex_exit(hmtx);
+ return (NULL);
+}
+
+int
+lxd_naccess(void *vcp, int mode, cred_t *cr)
+{
+ lxd_node_t *ldn = vcp;
+ int shift = 0;
+ /*
+ * Check access based on owner, group and public perms in lxd_node.
+ */
+ if (crgetuid(cr) != ldn->lxdn_uid) {
+ shift += MODESHIFT;
+ if (groupmember(ldn->lxdn_gid, cr) == 0)
+ shift += MODESHIFT;
+ }
+
+ if (ldn->lxdn_type == LXDNT_FRONT)
+ return (secpolicy_vnode_access2(cr, LDNTOV(ldn),
+ ldn->lxdn_uid, ldn->lxdn_mode << shift, mode));
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ return (VOP_ACCESS(ldn->lxdn_real_vp, mode, 0, cr, NULL));
+}
+
+static lxd_node_t *
+lxd_find_back(struct vnode *vp, uint_t hash, lxd_mnt_t *lxdm)
+{
+ lxd_node_t *l;
+
+ ASSERT(MUTEX_HELD(&lxdm->lxdm_hash_mutex[hash]));
+
+ for (l = lxdm->lxdm_back_htable[hash]; l != NULL; l = l->lxdn_hnxt) {
+ if (l->lxdn_real_vp == vp) {
+ ASSERT(l->lxdn_type == LXDNT_BACK);
+
+ VN_HOLD(LDNTOV(l));
+ return (l);
+ }
+ }
+ return (NULL);
+}
+
+static void
+lxd_save_back(lxd_node_t *l, uint_t hash, lxd_mnt_t *lxdm)
+{
+ ASSERT(l->lxdn_type == LXDNT_BACK);
+ ASSERT(l->lxdn_real_vp != NULL);
+ ASSERT(MUTEX_HELD(&lxdm->lxdm_hash_mutex[hash]));
+
+ atomic_inc_32(&lxdm->lxdm_back_refcnt);
+
+ l->lxdn_hnxt = lxdm->lxdm_back_htable[hash];
+ lxdm->lxdm_back_htable[hash] = l;
+}
+
+
+struct vnode *
+lxd_make_back_node(struct vnode *vp, lxd_mnt_t *lxdm)
+{
+ uint_t hash;
+ kmutex_t *hmtx;
+ lxd_node_t *l;
+
+ hash = LXD_BACK_HASH(vp); /* Note: hashing with realvp */
+ hmtx = &lxdm->lxdm_hash_mutex[hash];
+ mutex_enter(hmtx);
+
+ l = lxd_find_back(vp, hash, lxdm);
+ if (l == NULL) {
+ vnode_t *nvp;
+
+ l = kmem_zalloc(sizeof (lxd_node_t), KM_SLEEP);
+ nvp = vn_alloc(KM_SLEEP);
+
+ rw_init(&l->lxdn_rwlock, NULL, RW_DEFAULT, NULL);
+ mutex_init(&l->lxdn_tlock, NULL, MUTEX_DEFAULT, NULL);
+
+ l->lxdn_vnode = nvp;
+ l->lxdn_type = LXDNT_BACK;
+ l->lxdn_real_vp = vp;
+
+ VN_SET_VFS_TYPE_DEV(nvp, lxdm->lxdm_vfsp, vp->v_type,
+ vp->v_rdev);
+ nvp->v_flag |= (vp->v_flag & (VNOMOUNT|VNOMAP|VDIROPEN));
+ vn_setops(nvp, lxd_vnodeops);
+ nvp->v_data = (caddr_t)l;
+
+ lxd_save_back(l, hash, lxdm);
+ vn_exists(vp);
+ } else {
+ VN_RELE(vp);
+ }
+
+ mutex_exit(hmtx);
+ return (LDNTOV(l));
+}
+
+void
+lxd_free_back_node(lxd_node_t *lp)
+{
+ uint_t hash;
+ kmutex_t *hmtx;
+ lxd_node_t *l;
+ lxd_node_t *lprev = NULL;
+ vnode_t *vp = LDNTOV(lp);
+ vnode_t *realvp = REALVP(vp);
+ lxd_mnt_t *lxdm = VTOLXDM(vp);
+
+ /* in lxd_make_back_node we call lxd_find_back with the realvp */
+ hash = LXD_BACK_HASH(realvp);
+ hmtx = &lxdm->lxdm_hash_mutex[hash];
+ mutex_enter(hmtx);
+
+ mutex_enter(&vp->v_lock);
+ if (vp->v_count > 1) {
+ vp->v_count--; /* release our hold from vn_rele */
+ mutex_exit(&vp->v_lock);
+ mutex_exit(hmtx);
+ return;
+ }
+ mutex_exit(&vp->v_lock);
+
+ for (l = lxdm->lxdm_back_htable[hash]; l != NULL;
+ lprev = l, l = l->lxdn_hnxt) {
+
+ if (l != lp)
+ continue;
+
+ ASSERT(l->lxdn_type == LXDNT_BACK);
+ ASSERT(lxdm->lxdm_back_refcnt > 0);
+
+ atomic_dec_32(&lxdm->lxdm_back_refcnt);
+ vn_invalid(vp);
+
+ if (lprev == NULL) {
+ lxdm->lxdm_back_htable[hash] = l->lxdn_hnxt;
+ } else {
+ lprev->lxdn_hnxt = l->lxdn_hnxt;
+ }
+
+ mutex_exit(hmtx);
+ rw_destroy(&l->lxdn_rwlock);
+ mutex_destroy(&l->lxdn_tlock);
+ kmem_free(l, sizeof (lxd_node_t));
+ vn_free(vp);
+ VN_RELE(realvp);
+ return;
+ }
+
+ panic("lxd_free_back_node");
+ /*NOTREACHED*/
+}
+/*
+ * Search directory 'parent' for entry 'name'.
+ *
+ * 0 is returned on success and *foundcp points
+ * to the found lxd_node with its vnode held.
+ */
+int
+lxd_dirlookup(lxd_node_t *parent, char *name, lxd_node_t **foundnp, cred_t *cr)
+{
+ int error;
+
+ *foundnp = NULL;
+ if (parent->lxdn_vnode->v_type != VDIR)
+ return (ENOTDIR);
+
+ if ((error = lxd_naccess(parent, VEXEC, cr)))
+ return (error);
+
+ if (*name == '\0') {
+ ldnode_hold(parent);
+ *foundnp = parent;
+ return (0);
+ }
+
+ /*
+ * Search the directory for the matching name
+ * We need the lock protecting the lxdn_dir list
+ * so that it doesn't change out from underneath us.
+ * lxd_find_dirent() will pass back the lxd_node
+ * with a hold on it.
+ */
+
+ if (lxd_find_dirent(name, parent, HOLD, foundnp) != NULL) {
+ ASSERT(*foundnp);
+ return (0);
+ }
+
+ return (ENOENT);
+}
+
+/*
+ * Check if the source directory is in the path of the target directory.
+ * The target directory is locked by the caller.
+ */
+static int
+lxd_dircheckpath(lxd_node_t *fromnode, lxd_node_t *toparent, cred_t *cr)
+{
+ int error = 0;
+ lxd_node_t *dir, *dotdot;
+
+ ASSERT(RW_WRITE_HELD(&toparent->lxdn_rwlock));
+ ASSERT(toparent->lxdn_vnode->v_type == VDIR);
+
+ dotdot = toparent->lxdn_parent;
+ if (dotdot == NULL)
+ return (ENOENT);
+ ldnode_hold(dotdot);
+
+ if (dotdot == toparent) {
+ /* root of fs. search trivially satisfied. */
+ ldnode_rele(dotdot);
+ return (0);
+ }
+
+ for (;;) {
+ /*
+ * Return error for cases like "mv c c/d",
+ * "mv c c/d/e" and so on.
+ */
+ if (dotdot == fromnode) {
+ ldnode_rele(dotdot);
+ error = EINVAL;
+ break;
+ }
+
+ dir = dotdot;
+ dotdot = dir->lxdn_parent;
+ if (dotdot == NULL) {
+ ldnode_rele(dir);
+ error = ENOENT;
+ break;
+ }
+ ldnode_hold(dotdot);
+
+ /*
+ * We're okay if we traverse the directory tree up to
+ * the root directory and don't run into the
+ * parent directory.
+ */
+ if (dir == dotdot) {
+ ldnode_rele(dir);
+ ldnode_rele(dotdot);
+ break;
+ }
+ ldnode_rele(dir);
+ }
+
+ return (error);
+}
+
+static int
+lxd_dir_make_node(lxd_node_t *dir, lxd_mnt_t *lxdm, struct vattr *va,
+ enum de_op op, lxd_node_t **newnode, struct cred *cred)
+{
+ lxd_node_t *ldn;
+
+ ASSERT(va != NULL);
+
+ if (((va->va_mask & AT_ATIME) && TIMESPEC_OVERFLOW(&va->va_atime)) ||
+ ((va->va_mask & AT_MTIME) && TIMESPEC_OVERFLOW(&va->va_mtime)))
+ return (EOVERFLOW);
+
+ ldn = kmem_zalloc(sizeof (lxd_node_t), KM_SLEEP);
+
+ ldn->lxdn_type = LXDNT_FRONT;
+ lxd_node_init(lxdm, ldn, NULL, va, cred);
+
+ ldn->lxdn_vnode->v_rdev = ldn->lxdn_rdev = NODEV;
+ ldn->lxdn_vnode->v_type = va->va_type;
+ ldn->lxdn_uid = crgetuid(cred);
+ ldn->lxdn_gid = crgetgid(cred);
+ ldn->lxdn_nodeid = lxdm->lxdm_gen++;
+
+ if (va->va_mask & AT_ATIME)
+ ldn->lxdn_atime = va->va_atime;
+ if (va->va_mask & AT_MTIME)
+ ldn->lxdn_mtime = va->va_mtime;
+
+ if (op == DE_MKDIR) {
+ lxd_dirinit(dir, ldn, cred);
+ }
+
+ *newnode = ldn;
+ return (0);
+}
+
+static int
+lxd_diraddentry(lxd_node_t *dir, lxd_node_t *ldn, char *name, enum de_op op)
+{
+ lxd_dirent_t *dp, *pdp;
+ size_t namelen, alloc_size;
+ timestruc_t now;
+
+ /*
+ * Make sure the parent directory wasn't removed from
+ * underneath the caller.
+ */
+ if (dir->lxdn_dir == NULL)
+ return (ENOENT);
+
+ /* Check that everything is on the same filesystem. */
+ if (ldn->lxdn_vnode->v_vfsp != dir->lxdn_vnode->v_vfsp)
+ return (EXDEV);
+
+ /* Allocate and initialize directory entry */
+ namelen = strlen(name) + 1;
+ alloc_size = namelen + sizeof (lxd_dirent_t);
+ dp = kmem_zalloc(alloc_size, KM_NOSLEEP | KM_NORMALPRI);
+ if (dp == NULL)
+ return (ENOSPC);
+
+ ldn->lxdn_parent = dir;
+
+ dir->lxdn_size += alloc_size;
+ dir->lxdn_dirents++;
+ dp->lddir_node = ldn;
+ dp->lddir_parent = dir;
+
+ /* The directory entry and its name were allocated sequentially. */
+ dp->lddir_name = (char *)dp + sizeof (lxd_dirent_t);
+ (void) strcpy(dp->lddir_name, name);
+
+ lxd_save_dirent(dp);
+
+ /*
+ * Some utilities expect the size of a directory to remain
+ * somewhat static. For example, a routine which removes
+ * subdirectories between calls to readdir(); the size of the
+ * directory changes from underneath it and so the real
+ * directory offset in bytes is invalid. To circumvent
+ * this problem, we initialize a directory entry with an
+ * phony offset, and use this offset to determine end of
+ * file in lxd_readdir.
+ */
+ pdp = dir->lxdn_dir->lddir_prev;
+ /*
+ * Install at first empty "slot" in directory list.
+ */
+ while (pdp->lddir_next != NULL &&
+ (pdp->lddir_next->lddir_offset - pdp->lddir_offset) <= 1) {
+ ASSERT(pdp->lddir_next != pdp);
+ ASSERT(pdp->lddir_prev != pdp);
+ ASSERT(pdp->lddir_next->lddir_offset > pdp->lddir_offset);
+ pdp = pdp->lddir_next;
+ }
+ dp->lddir_offset = pdp->lddir_offset + 1;
+
+ /*
+ * If we're at the end of the dirent list and the offset (which
+ * is necessarily the largest offset in this directory) is more
+ * than twice the number of dirents, that means the directory is
+ * 50% holes. At this point we reset the slot pointer back to
+ * the beginning of the directory so we start using the holes.
+ * The idea is that if there are N dirents, there must also be
+ * N holes, so we can satisfy the next N creates by walking at
+ * most 2N entries; thus the average cost of a create is constant.
+ * Note that we use the first dirent's lddir_prev as the roving
+ * slot pointer; it's ugly, but it saves a word in every dirent.
+ */
+ if (pdp->lddir_next == NULL &&
+ pdp->lddir_offset > 2 * dir->lxdn_dirents)
+ dir->lxdn_dir->lddir_prev = dir->lxdn_dir->lddir_next;
+ else
+ dir->lxdn_dir->lddir_prev = dp;
+
+ ASSERT(pdp->lddir_next != pdp);
+ ASSERT(pdp->lddir_prev != pdp);
+
+ dp->lddir_next = pdp->lddir_next;
+ if (dp->lddir_next) {
+ dp->lddir_next->lddir_prev = dp;
+ }
+ dp->lddir_prev = pdp;
+ pdp->lddir_next = dp;
+
+ ASSERT(dp->lddir_next != dp);
+ ASSERT(dp->lddir_prev != dp);
+ ASSERT(pdp->lddir_next != pdp);
+ ASSERT(pdp->lddir_prev != pdp);
+
+ gethrestime(&now);
+ dir->lxdn_mtime = now;
+ dir->lxdn_ctime = now;
+
+ return (0);
+}
+
+/*
+ * Enter a directory entry for 'name' into directory 'dir'
+ *
+ * Returns 0 on success.
+ */
+int
+lxd_direnter(
+ lxd_mnt_t *lxdm,
+ lxd_node_t *dir, /* target directory to make entry in */
+ char *name, /* name of entry */
+ enum de_op op, /* entry operation */
+ lxd_node_t *fromparent, /* original directory if rename */
+ lxd_node_t *ldn, /* existing lxd_node, if rename */
+ struct vattr *va,
+ lxd_node_t **rnp, /* return lxd_node, if create/mkdir */
+ cred_t *cr,
+ caller_context_t *ctp)
+{
+ lxd_dirent_t *dirp;
+ lxd_node_t *found = NULL;
+ int error = 0;
+ char *s;
+
+ /* lxdn_rwlock is held to serialize direnter and dirdeletes */
+ ASSERT(RW_WRITE_HELD(&dir->lxdn_rwlock));
+ ASSERT(dir->lxdn_vnode->v_type == VDIR);
+
+ /*
+ * Don't allow '/' characters in pathname component,
+ */
+ for (s = name; *s; s++)
+ if (*s == '/')
+ return (EACCES);
+
+ if (name[0] == '\0')
+ panic("lxd_direnter: NULL name");
+
+ /*
+ * For rename lock the source entry and check the link count
+ * to see if it has been removed while it was unlocked.
+ */
+ if (op == DE_RENAME) {
+ mutex_enter(&ldn->lxdn_tlock);
+ if (ldn->lxdn_nlink == 0) {
+ mutex_exit(&ldn->lxdn_tlock);
+ return (ENOENT);
+ }
+
+ if (ldn->lxdn_nlink == MAXLINK) {
+ mutex_exit(&ldn->lxdn_tlock);
+ return (EMLINK);
+ }
+ ldn->lxdn_nlink++;
+ gethrestime(&ldn->lxdn_ctime);
+ mutex_exit(&ldn->lxdn_tlock);
+ }
+
+ /*
+ * This might be a "dangling detached directory" (it could have been
+ * removed, but a reference to it kept in u_cwd). Don't bother
+ * searching it, and with any luck the user will get tired of dealing
+ * with us and cd to some absolute pathway (thus in ufs, too).
+ */
+ if (dir->lxdn_nlink == 0) {
+ error = ENOENT;
+ goto out;
+ }
+
+ /*
+ * If this is a rename of a directory and the parent is different
+ * (".." must be changed), then the source directory must not be in the
+ * directory hierarchy above the target, as this would orphan
+ * everything below the source directory.
+ */
+ if (op == DE_RENAME) {
+ if (ldn == dir) {
+ error = EINVAL;
+ goto out;
+ }
+ if ((ldn->lxdn_vnode->v_type) == VDIR) {
+ if ((fromparent != dir) &&
+ (error = lxd_dircheckpath(ldn, dir, cr)) != 0) {
+ goto out;
+ }
+ }
+ }
+
+ /* Search for an existing entry. */
+ dirp = lxd_find_dirent(name, dir, HOLD, &found);
+ if (dirp != NULL) {
+ ASSERT(found != NULL);
+ switch (op) {
+ case DE_CREATE:
+ case DE_MKDIR:
+ if (rnp != NULL) {
+ *rnp = found;
+ error = EEXIST;
+ } else {
+ ldnode_rele(found);
+ }
+ break;
+
+ case DE_RENAME:
+ /*
+ * Note that we only hit this path when we're renaming
+ * a symlink from one directory to another and there is
+ * a pre-existing symlink as the target. lxd_rename
+ * will unlink the src from the original directory but
+ * here we need to unlink the dest that we collided
+ * with, then create the new directory entry as we do
+ * below when there is no pre-existing symlink.
+ */
+ if ((error = lxd_naccess(dir, VWRITE, cr)) != 0)
+ goto out;
+
+ ASSERT(found->lxdn_vnode->v_type == VLNK);
+ /* dir rw lock is already held and asserted above */
+ rw_enter(&found->lxdn_rwlock, RW_WRITER);
+ error = lxd_dirdelete(dir, found, name, DR_RENAME, cr);
+ rw_exit(&found->lxdn_rwlock);
+ ldnode_rele(found);
+ if (error != 0)
+ goto out;
+
+ error = lxd_diraddentry(dir, ldn, name, op);
+ if (error == 0 && rnp != NULL)
+ *rnp = ldn;
+ break;
+ }
+ } else {
+
+ /*
+ * The directory entry does not exist, but the node might if
+ * this is a rename. Check write permission in directory to
+ * see if entry can be created.
+ */
+ if ((error = lxd_naccess(dir, VWRITE, cr)) != 0)
+ goto out;
+ if (op == DE_CREATE || op == DE_MKDIR) {
+ /*
+ * Make new lxd_node and directory entry as required.
+ */
+ error = lxd_dir_make_node(dir, lxdm, va, op, &ldn, cr);
+ if (error)
+ goto out;
+ }
+
+ error = lxd_diraddentry(dir, ldn, name, op);
+ if (error != 0) {
+ if (op == DE_CREATE || op == DE_MKDIR) {
+ /*
+ * Unmake the inode we just made.
+ */
+ rw_enter(&ldn->lxdn_rwlock, RW_WRITER);
+ if ((ldn->lxdn_vnode->v_type) == VDIR) {
+ ASSERT(dirp == NULL);
+ /*
+ * cleanup allocs made by lxd_dirinit
+ */
+ lxd_dirtrunc(ldn);
+ }
+ mutex_enter(&ldn->lxdn_tlock);
+ ldn->lxdn_nlink = 0;
+ gethrestime(&ldn->lxdn_ctime);
+ mutex_exit(&ldn->lxdn_tlock);
+ rw_exit(&ldn->lxdn_rwlock);
+ ldnode_rele(ldn);
+ ldn = NULL;
+ }
+ } else if (rnp != NULL) {
+ *rnp = ldn;
+ } else if (op == DE_CREATE || op == DE_MKDIR) {
+ ldnode_rele(ldn);
+ }
+ }
+
+out:
+ if (error && op == DE_RENAME) {
+ /* Undo bumped link count. */
+ mutex_enter(&ldn->lxdn_tlock);
+ ldn->lxdn_nlink--;
+ gethrestime(&ldn->lxdn_ctime);
+ mutex_exit(&ldn->lxdn_tlock);
+ }
+ return (error);
+}
+
+/*
+ * Delete entry ldn of name "nm" from parent dir. This is used to both remove
+ * a directory and to remove file nodes within the directory (by recursively
+ * calling itself). It frees the dir entry space and decrements link count on
+ * lxd_node(s).
+ *
+ * Return 0 on success.
+ */
+int
+lxd_dirdelete(lxd_node_t *dir, lxd_node_t *ldn, char *nm, enum dr_op op,
+ cred_t *cred)
+{
+ lxd_dirent_t *dirp;
+ int error;
+ size_t namelen;
+ lxd_node_t *fndnp;
+ timestruc_t now;
+
+ ASSERT(RW_WRITE_HELD(&dir->lxdn_rwlock));
+ ASSERT(RW_WRITE_HELD(&ldn->lxdn_rwlock));
+ ASSERT(dir->lxdn_vnode->v_type == VDIR);
+
+ if (nm[0] == '\0')
+ panic("lxd_dirdelete: empty name for 0x%p", (void *)ldn);
+
+ /*
+ * return error when removing . and ..
+ */
+ if (nm[0] == '.') {
+ if (nm[1] == '\0')
+ return (EINVAL);
+ if (nm[1] == '.' && nm[2] == '\0')
+ return (EEXIST); /* thus in ufs */
+ }
+
+ if ((error = lxd_naccess(dir, VEXEC|VWRITE, cred)) != 0)
+ return (error);
+
+ if (dir->lxdn_dir == NULL)
+ return (ENOENT);
+
+ if (op == DR_RMDIR) {
+ /*
+ * This is the top-level removal of a directory. Start by
+ * removing any file entries from the dir. We do this by
+ * recursively calling back into this function with a different
+ * op code. The caller of this function has already verified
+ * that it is safe to remove this directory.
+ */
+ lxd_dirent_t *dirp;
+
+ ASSERT(ldn->lxdn_vnode->v_type == VDIR);
+
+ dirp = ldn->lxdn_dir;
+ while (dirp) {
+ lxd_node_t *dn;
+ lxd_dirent_t *nextp;
+
+ if (strcmp(dirp->lddir_name, ".") == 0 ||
+ strcmp(dirp->lddir_name, "..") == 0) {
+ dirp = dirp->lddir_next;
+ continue;
+ }
+
+ dn = dirp->lddir_node;
+ nextp = dirp->lddir_next;
+
+ ldnode_hold(dn);
+ error = lxd_dirdelete(ldn, dn, dirp->lddir_name,
+ DR_REMOVE, cred);
+ ldnode_rele(dn);
+
+ dirp = nextp;
+ }
+ }
+
+ dirp = lxd_find_dirent(nm, dir, NOHOLD, &fndnp);
+ VERIFY(dirp != NULL);
+ VERIFY(ldn == fndnp);
+
+ lxd_rm_dirent(dirp);
+
+ /* Take dirp out of the directory list. */
+ ASSERT(dirp->lddir_next != dirp);
+ ASSERT(dirp->lddir_prev != dirp);
+ if (dirp->lddir_prev) {
+ dirp->lddir_prev->lddir_next = dirp->lddir_next;
+ }
+ if (dirp->lddir_next) {
+ dirp->lddir_next->lddir_prev = dirp->lddir_prev;
+ }
+
+ /*
+ * If the roving slot pointer happens to match dirp,
+ * point it at the previous dirent.
+ */
+ if (dir->lxdn_dir->lddir_prev == dirp) {
+ dir->lxdn_dir->lddir_prev = dirp->lddir_prev;
+ }
+ ASSERT(dirp->lddir_next != dirp);
+ ASSERT(dirp->lddir_prev != dirp);
+
+ /* dirp points to the correct directory entry */
+ namelen = strlen(dirp->lddir_name) + 1;
+
+ kmem_free(dirp, sizeof (lxd_dirent_t) + namelen);
+ dir->lxdn_size -= (sizeof (lxd_dirent_t) + namelen);
+ dir->lxdn_dirents--;
+
+ gethrestime(&now);
+ dir->lxdn_mtime = now;
+ dir->lxdn_ctime = now;
+ ldn->lxdn_ctime = now;
+
+ ASSERT(ldn->lxdn_nlink > 0);
+ mutex_enter(&ldn->lxdn_tlock);
+ ldn->lxdn_nlink--;
+ mutex_exit(&ldn->lxdn_tlock);
+ if (op == DR_RMDIR && ldn->lxdn_vnode->v_type == VDIR) {
+ lxd_dirtrunc(ldn);
+ ASSERT(ldn->lxdn_nlink == 0);
+ }
+ return (0);
+}
+
+/*
+ * Initialize a lxd_node and add it to file list under mount point.
+ */
+void
+lxd_node_init(lxd_mnt_t *lxdm, lxd_node_t *ldn, vnode_t *realvp, vattr_t *vap,
+ cred_t *cred)
+{
+ struct vnode *vp;
+ timestruc_t now;
+
+ ASSERT(vap != NULL);
+
+ rw_init(&ldn->lxdn_rwlock, NULL, RW_DEFAULT, NULL);
+ mutex_init(&ldn->lxdn_tlock, NULL, MUTEX_DEFAULT, NULL);
+ ldn->lxdn_mode = MAKEIMODE(vap->va_type, vap->va_mode);
+ ldn->lxdn_mask = 0;
+ ldn->lxdn_attr.va_type = vap->va_type;
+ ldn->lxdn_nlink = 1;
+ ldn->lxdn_size = 0;
+
+ if (cred == NULL) {
+ ldn->lxdn_uid = vap->va_uid;
+ ldn->lxdn_gid = vap->va_gid;
+ } else {
+ ldn->lxdn_uid = crgetuid(cred);
+ ldn->lxdn_gid = crgetgid(cred);
+ }
+
+ ldn->lxdn_fsid = lxdm->lxdm_dev;
+ ldn->lxdn_rdev = vap->va_rdev;
+ ldn->lxdn_blksize = PAGESIZE;
+ ldn->lxdn_nblocks = 0;
+ gethrestime(&now);
+ ldn->lxdn_atime = now;
+ ldn->lxdn_mtime = now;
+ ldn->lxdn_ctime = now;
+ ldn->lxdn_seq = 0;
+ ldn->lxdn_dir = NULL;
+
+ ldn->lxdn_real_vp = realvp;
+
+ ldn->lxdn_vnode = vn_alloc(KM_SLEEP);
+ vp = LDNTOV(ldn);
+ vn_setops(vp, lxd_vnodeops);
+ vp->v_vfsp = lxdm->lxdm_vfsp;
+ vp->v_type = vap->va_type;
+ vp->v_rdev = vap->va_rdev;
+ vp->v_data = (caddr_t)ldn;
+
+ mutex_enter(&lxdm->lxdm_contents);
+ ldn->lxdn_nodeid = lxdm->lxdm_gen++;
+
+ /*
+ * Add new lxd_node to end of linked list of lxd_nodes for this
+ * lxdevfs. Root directory is handled specially in lxd_mount.
+ */
+ if (lxdm->lxdm_rootnode != (lxd_node_t *)NULL) {
+ ldn->lxdn_next = NULL;
+ ldn->lxdn_prev = lxdm->lxdm_rootnode->lxdn_prev;
+ ldn->lxdn_prev->lxdn_next = lxdm->lxdm_rootnode->lxdn_prev =
+ ldn;
+ }
+ mutex_exit(&lxdm->lxdm_contents);
+ vn_exists(vp);
+}
+
+/*
+ * lxd_dirinit is used internally to initialize a directory (dir)
+ * with '.' and '..' entries without checking permissions and locking
+ * It also creates the entries for the pseudo file nodes that reside in the
+ * directory.
+ */
+void
+lxd_dirinit(lxd_node_t *parent, lxd_node_t *dir, cred_t *cr)
+{
+ lxd_dirent_t *dot, *dotdot;
+ timestruc_t now;
+ lxd_mnt_t *lxdm = VTOLXDM(dir->lxdn_vnode);
+ struct vattr nattr;
+
+ ASSERT(RW_WRITE_HELD(&parent->lxdn_rwlock));
+ ASSERT(dir->lxdn_vnode->v_type == VDIR);
+
+ dir->lxdn_nodeid = lxdm->lxdm_gen++;
+
+ /*
+ * Initialize the entries
+ */
+ dot = kmem_zalloc(sizeof (lxd_dirent_t) + 2, KM_SLEEP);
+ dot->lddir_node = dir;
+ dot->lddir_offset = 0;
+ dot->lddir_name = (char *)dot + sizeof (lxd_dirent_t);
+ dot->lddir_name[0] = '.';
+ dot->lddir_parent = dir;
+ lxd_save_dirent(dot);
+
+ dotdot = kmem_zalloc(sizeof (lxd_dirent_t) + 3, KM_SLEEP);
+ dotdot->lddir_node = parent;
+ dotdot->lddir_offset = 1;
+ dotdot->lddir_name = (char *)dotdot + sizeof (lxd_dirent_t);
+ dotdot->lddir_name[0] = '.';
+ dotdot->lddir_name[1] = '.';
+ dotdot->lddir_parent = dir;
+ lxd_save_dirent(dotdot);
+
+ /*
+ * Initialize directory entry list.
+ */
+ dot->lddir_next = dotdot;
+ dot->lddir_prev = dotdot; /* dot's lddir_prev holds roving slot ptr */
+ dotdot->lddir_next = NULL;
+ dotdot->lddir_prev = dot;
+
+ gethrestime(&now);
+ dir->lxdn_mtime = now;
+ dir->lxdn_ctime = now;
+
+ parent->lxdn_nlink++;
+ parent->lxdn_ctime = now;
+
+ dir->lxdn_dir = dot;
+ dir->lxdn_size = 2 * sizeof (lxd_dirent_t) + 5; /* dot and dotdot */
+ dir->lxdn_dirents = 2;
+ dir->lxdn_nlink = 2;
+ dir->lxdn_parent = parent;
+
+ bzero(&nattr, sizeof (struct vattr));
+ nattr.va_mode = (mode_t)(0644);
+ nattr.va_type = VREG;
+ nattr.va_rdev = 0;
+}
+
+/*
+ * lxd_dirtrunc is called to remove all directory entries under this directory.
+ */
+void
+lxd_dirtrunc(lxd_node_t *dir)
+{
+ lxd_dirent_t *ldp;
+ timestruc_t now;
+
+ ASSERT(RW_WRITE_HELD(&dir->lxdn_rwlock));
+ ASSERT(dir->lxdn_vnode->v_type == VDIR);
+
+ for (ldp = dir->lxdn_dir; ldp; ldp = dir->lxdn_dir) {
+ size_t namelen;
+ lxd_node_t *ldn;
+
+ ASSERT(ldp->lddir_next != ldp);
+ ASSERT(ldp->lddir_prev != ldp);
+ ASSERT(ldp->lddir_node);
+
+ dir->lxdn_dir = ldp->lddir_next;
+ namelen = strlen(ldp->lddir_name) + 1;
+
+ /*
+ * Adjust the link counts to account for this directory entry
+ * removal. We do hold/rele operations to free up these nodes.
+ */
+ ldn = ldp->lddir_node;
+
+ ASSERT(ldn->lxdn_nlink > 0);
+ mutex_enter(&ldn->lxdn_tlock);
+ ldn->lxdn_nlink--;
+ mutex_exit(&ldn->lxdn_tlock);
+
+ lxd_rm_dirent(ldp);
+ kmem_free(ldp, sizeof (lxd_dirent_t) + namelen);
+ dir->lxdn_size -= (sizeof (lxd_dirent_t) + namelen);
+ dir->lxdn_dirents--;
+ }
+
+ gethrestime(&now);
+ dir->lxdn_mtime = now;
+ dir->lxdn_ctime = now;
+
+ ASSERT(dir->lxdn_dir == NULL);
+ ASSERT(dir->lxdn_size == 0);
+ ASSERT(dir->lxdn_dirents == 0);
+}
diff --git a/usr/src/uts/common/brand/lx/devfs/lxd_vfsops.c b/usr/src/uts/common/brand/lx/devfs/lxd_vfsops.c
new file mode 100644
index 0000000000..b474c329ad
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/devfs/lxd_vfsops.c
@@ -0,0 +1,830 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+/*
+ * The lx devfs (lxd) file system is used within lx branded zones to provide
+ * the Linux view of /dev.
+ *
+ * In the past, the Linux /dev was simply a lofs mount pointing at /native/dev.
+ * lxd now provides the Linux /dev.
+ *
+ * The lxd file system is a hybrid of lofs and tmpfs. It supports a "back" file
+ * system which is the special device and corresponds to the special device in
+ * a lofs mount. As with lofs, all files in the special device are accessible
+ * through the lxd mount. Because the zone's devfs is not directly modifiable
+ * within the zone (also mknod(2) is not generally allowed within a zone) it is
+ * impossible to create files in devfs. For lx, in some cases it's useful to be
+ * able to make new symlinks or new directories under /dev. lxd implements
+ * these operations by creating "files" in memory in the same way as tmpfs
+ * does. Within lxd these are referred to as "front" files. For operations such
+ * as lookup or readdir, lxd provides a merged view of both the front and back
+ * files. lxd does not support regular front files or simple I/O (read/write)
+ * to front files, since there is no need for that. For back files, all
+ * operations are simply passed through to the real vnode, as is done with
+ * lofs. Front files are not allowed to mask back files.
+ *
+ * The Linux /dev is now a lxd mount with the special file (i.e. the back
+ * file system) as /native/dev.
+ *
+ * In addition, lx has a need for some illumos/Linux translation for the
+ * various *stat(2) system calls when used on a device. This translation can
+ * be centralized within lxd's getattr vnode entry point.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/kmem.h>
+#include <sys/time.h>
+#include <sys/pathname.h>
+#include <sys/vfs.h>
+#include <sys/vfs_opreg.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+#include <sys/stat.h>
+#include <sys/errno.h>
+#include <sys/cmn_err.h>
+#include <sys/cred.h>
+#include <sys/statvfs.h>
+#include <sys/mount.h>
+#include <sys/systm.h>
+#include <sys/mntent.h>
+#include <sys/policy.h>
+#include <sys/sdt.h>
+#include <sys/ddi.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_ptm.h>
+#include <sys/lx_impl.h>
+
+#include "lxd.h"
+
+/* Module level parameters */
+static int lxd_fstype;
+static dev_t lxd_dev;
+
+/*
+ * lxd_mountcount is used to prevent module unloads while there is still
+ * state from a former mount hanging around. The filesystem module must not be
+ * allowed to go away before the last VFS_FREEVFS() call has been made. Since
+ * this is just an atomic counter, there's no need for locking.
+ */
+static uint32_t lxd_mountcount;
+
+/*
+ * lxd_minfree is the minimum amount of swap space that lx devfs leaves for
+ * the rest of the zone.
+ */
+size_t lxd_minfree = 0;
+
+/*
+ * LXDMINFREE -- the value from which lxd_minfree is derived -- should be
+ * configured to a value that is roughly the smallest practical value for
+ * memory + swap minus the largest reasonable size for lxd in such
+ * a configuration. As of this writing, the smallest practical memory + swap
+ * configuration is 128MB, and it seems reasonable to allow lxd to consume
+ * no more than ~10% of this, yielding a LXDMINFREE of 12MB.
+ */
+#define LXDMINFREE 12 * 1024 * 1024 /* 12 Megabytes */
+
+extern pgcnt_t swapfs_minfree;
+
+extern int lxd_symlink(vnode_t *, char *, struct vattr *, char *, cred_t *,
+ caller_context_t *, int);
+extern int stat64(char *, struct stat64 *);
+
+/*
+ * lxd vfs operations.
+ */
+static int lxd_init(int, char *);
+static int lxd_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *);
+static int lxd_unmount(vfs_t *, int, cred_t *);
+static int lxd_root(vfs_t *, vnode_t **);
+static int lxd_statvfs(vfs_t *, statvfs64_t *);
+static void lxd_freevfs(vfs_t *vfsp);
+
+/*
+ * Loadable module wrapper
+ */
+#include <sys/modctl.h>
+
+static vfsdef_t vfw = {
+ VFSDEF_VERSION,
+ "lx_devfs",
+ lxd_init,
+ VSW_ZMOUNT,
+ NULL
+};
+
+/*
+ * Module linkage information
+ */
+static struct modlfs modlfs = {
+ &mod_fsops, "lx brand devfs", &vfw
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, &modlfs, NULL
+};
+
+/*
+ * Definitions and translators for devt's.
+ */
+static void lxd_pts_devt_translator(dev_t, dev_t *);
+static void lxd_ptm_devt_translator(dev_t, dev_t *);
+
+static kmutex_t lxd_xlate_lock;
+static boolean_t lxd_xlate_initialized = B_FALSE;
+
+static lxd_minor_translator_t lxd_mtranslator_mm[] = {
+ { "/dev/null", 0, 1, 3 },
+ { "/dev/zero", 0, 1, 5 },
+ { NULL, 0, 0, 0 }
+};
+static lxd_minor_translator_t lxd_mtranslator_random[] = {
+ { "/dev/random", 0, 1, 8 },
+ { "/dev/urandom", 0, 1, 9 },
+ { NULL, 0, 0, 0 }
+};
+static lxd_minor_translator_t lxd_mtranslator_sy[] = {
+ { "/dev/tty", 0, LX_TTY_MAJOR, 0 },
+ { NULL, 0, 0, 0 }
+};
+static lxd_minor_translator_t lxd_mtranslator_zcons[] = {
+ { "/dev/console", 0, LX_TTY_MAJOR, 1 },
+ { NULL, 0, 0, 0 }
+};
+lxd_devt_translator_t lxd_devt_translators[] = {
+ { "mm", 0, DTT_LIST, (uintptr_t)&lxd_mtranslator_mm },
+ { "random", 0, DTT_LIST, (uintptr_t)&lxd_mtranslator_random },
+ { "sy", 0, DTT_LIST, (uintptr_t)&lxd_mtranslator_sy },
+ { "zcons", 0, DTT_LIST, (uintptr_t)&lxd_mtranslator_zcons },
+ { LX_PTM_DRV, 0, DTT_CUSTOM, (uintptr_t)lxd_ptm_devt_translator },
+ { "pts", 0, DTT_CUSTOM, (uintptr_t)lxd_pts_devt_translator },
+ { NULL, 0, DTT_INVALID, NULL }
+};
+
+int
+_init()
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_fini()
+{
+ int error;
+
+ if (lxd_mountcount > 0)
+ return (EBUSY);
+
+ if ((error = mod_remove(&modlinkage)) != 0)
+ return (error);
+
+ /*
+ * Tear down the operations vectors
+ */
+ (void) vfs_freevfsops_by_type(lxd_fstype);
+ vn_freevnodeops(lxd_vnodeops);
+ mutex_destroy(&lxd_xlate_lock);
+ return (0);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+/*
+ * Initialize global locks, etc. Called when loading lxd module.
+ */
+static int
+lxd_init(int fstype, char *name)
+{
+ static const fs_operation_def_t lxd_vfsops_template[] = {
+ VFSNAME_MOUNT, { .vfs_mount = lxd_mount },
+ VFSNAME_UNMOUNT, { .vfs_unmount = lxd_unmount },
+ VFSNAME_ROOT, { .vfs_root = lxd_root },
+ VFSNAME_STATVFS, { .vfs_statvfs = lxd_statvfs },
+ VFSNAME_FREEVFS, { .vfs_freevfs = lxd_freevfs },
+ NULL, NULL
+ };
+ extern const struct fs_operation_def lxd_vnodeops_template[];
+ int error;
+ major_t dev;
+
+ lxd_fstype = fstype;
+ ASSERT(lxd_fstype != 0);
+
+ error = vfs_setfsops(fstype, lxd_vfsops_template, NULL);
+ if (error != 0) {
+ cmn_err(CE_WARN, "lxd_init: bad vfs ops template");
+ return (error);
+ }
+
+ error = vn_make_ops(name, lxd_vnodeops_template, &lxd_vnodeops);
+ if (error != 0) {
+ (void) vfs_freevfsops_by_type(fstype);
+ cmn_err(CE_WARN, "lxd_init: bad vnode ops template");
+ return (error);
+ }
+
+ /*
+ * lxd_minfree doesn't need to be some function of configured
+ * swap space since it really is an absolute limit of swap space
+ * which still allows other processes to execute.
+ */
+ if (lxd_minfree == 0) {
+ /* Set if not patched */
+ lxd_minfree = btopr(LXDMINFREE);
+ }
+
+ if ((dev = getudev()) == (major_t)-1) {
+ cmn_err(CE_WARN, "lxd_init: Can't get unique device number.");
+ dev = 0;
+ }
+
+ /*
+ * Make the pseudo device
+ */
+ lxd_dev = makedevice(dev, 0);
+
+ mutex_init(&lxd_xlate_lock, NULL, MUTEX_DEFAULT, NULL);
+
+ return (0);
+}
+
+/*
+ * Initialize device translator mapping table.
+ *
+ * Note that we cannot do this in lxd_init since that can lead to a recursive
+ * rw_enter while we're doing lookupnameat (via sdev_lookup/prof_make_maps/
+ * devi_attach_node/modload). Thus we do it in the mount path and keep track
+ * so that we only initialize the table once.
+ */
+static void
+lxd_xlate_init()
+{
+ int i;
+
+ mutex_enter(&lxd_xlate_lock);
+ if (lxd_xlate_initialized) {
+ mutex_exit(&lxd_xlate_lock);
+ return;
+ }
+
+ for (i = 0; lxd_devt_translators[i].lxd_xl_driver != NULL; i++) {
+ lxd_minor_translator_t *mt;
+ int j;
+
+ lxd_devt_translators[i].lxd_xl_major =
+ mod_name_to_major(lxd_devt_translators[i].lxd_xl_driver);
+
+ /* if this translator doesn't use a list mapping we're done. */
+ if (lxd_devt_translators[i].lxd_xl_type != DTT_LIST)
+ continue;
+
+ /* for each device listed, lookup the minor node number */
+ mt = lxd_devt_translators[i].xl_list;
+ for (j = 0; mt[j].lxd_mt_path != NULL; j++) {
+ vnode_t *vp;
+ struct vattr va;
+ char *tpath;
+ char tnm[MAXPATHLEN];
+
+ /*
+ * The attach might be triggered in either the global
+ * zone or in a non-global zone, so we may need to
+ * adjust the path if we're in a NGZ.
+ */
+ if (curproc->p_zone->zone_id == GLOBAL_ZONEUNIQID) {
+ tpath = mt[j].lxd_mt_path;
+ } else {
+ (void) snprintf(tnm, sizeof (tnm), "/native%s",
+ mt[j].lxd_mt_path);
+ tpath = tnm;
+ }
+
+ if (lookupnameat(tpath, UIO_SYSSPACE, FOLLOW, NULL,
+ &vp, NULL) != 0) {
+ mt[j].lxd_mt_minor = -1;
+ continue;
+ }
+
+ va.va_mask = AT_RDEV;
+ if (VOP_GETATTR(vp, &va, 0, kcred, NULL) != 0) {
+ va.va_rdev = NODEV;
+ } else {
+ ASSERT(getmajor(va.va_rdev) ==
+ lxd_devt_translators[i].lxd_xl_major);
+ ASSERT(mt[j].lxd_mt_lx_minor < LX_MAXMIN);
+ }
+
+ mt[j].lxd_mt_minor = getminor(va.va_rdev);
+
+ VN_RELE(vp);
+ }
+ }
+
+ lxd_xlate_initialized = B_TRUE;
+ mutex_exit(&lxd_xlate_lock);
+}
+
+static int
+lxd_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
+{
+ lxd_mnt_t *lxdm = NULL;
+ struct lxd_node *ldn;
+ struct pathname dpn;
+ int error;
+ int i;
+ int nodev;
+ struct vattr rattr;
+ vnode_t *realrootvp;
+ vnode_t *tvp;
+ lx_zone_data_t *lxzdata;
+ lx_virt_disk_t *vd;
+ vattr_t vattr;
+
+ nodev = vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL);
+
+ if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
+ return (error);
+
+ if (mvp->v_type != VDIR)
+ return (ENOTDIR);
+
+ lxd_xlate_init();
+
+ /*
+ * This is the same behavior as with lofs.
+ * Loopback devices which get "nodevices" added can be done without
+ * "nodevices" set because we cannot import devices into a zone
+ * with loopback. Note that we have all zone privileges when
+ * this happens; if not, we'd have gotten "nosuid".
+ */
+ if (!nodev && vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
+ vfs_setmntopt(vfsp, MNTOPT_DEVICES, NULL, VFS_NODISPLAY);
+
+ /*
+ * Only allow mounting within lx zones.
+ */
+ if (curproc->p_zone->zone_brand != &lx_brand)
+ return (EINVAL);
+
+ /*
+ * Ensure we don't allow overlaying mounts
+ */
+ mutex_enter(&mvp->v_lock);
+ if ((uap->flags & MS_OVERLAY) == 0 &&
+ (mvp->v_count > 1 || (mvp->v_flag & VROOT))) {
+ mutex_exit(&mvp->v_lock);
+ return (EBUSY);
+ }
+ mutex_exit(&mvp->v_lock);
+
+ /* lxd doesn't support read-only mounts */
+ if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) {
+ error = EINVAL;
+ goto out;
+ }
+
+ error = pn_get(uap->dir,
+ (uap->flags & MS_SYSSPACE) ? UIO_SYSSPACE : UIO_USERSPACE, &dpn);
+ if (error != 0)
+ goto out;
+
+ /*
+ * Find real root
+ */
+ if ((error = lookupname(uap->spec, (uap->flags & MS_SYSSPACE) ?
+ UIO_SYSSPACE : UIO_USERSPACE, FOLLOW, NULLVPP, &realrootvp))) {
+ pn_free(&dpn);
+ return (error);
+ }
+
+ if ((error = VOP_ACCESS(realrootvp, 0, 0, cr, NULL)) != 0) {
+ pn_free(&dpn);
+ VN_RELE(realrootvp);
+ return (error);
+ }
+
+ /* If realroot is not a devfs, error out */
+ if (strcmp(realrootvp->v_op->vnop_name, "dev") != 0) {
+ pn_free(&dpn);
+ VN_RELE(realrootvp);
+ return (EINVAL);
+ }
+
+ lxdm = kmem_zalloc(sizeof (*lxdm), KM_SLEEP);
+
+ /* init but don't bother entering the mutex (not on mount list yet) */
+ mutex_init(&lxdm->lxdm_contents, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&lxdm->lxdm_renamelck, NULL, MUTEX_DEFAULT, NULL);
+
+ /* Initialize the hash table mutexes */
+ for (i = 0; i < LXD_HASH_SZ; i++) {
+ mutex_init(&lxdm->lxdm_hash_mutex[i], NULL, MUTEX_DEFAULT,
+ NULL);
+ }
+
+ lxdm->lxdm_vfsp = vfsp;
+ lxdm->lxdm_gen = 1; /* start inode counter at 1 */
+
+ vfsp->vfs_data = (caddr_t)lxdm;
+ vfsp->vfs_fstype = lxd_fstype;
+ vfsp->vfs_dev = lxd_dev;
+ vfsp->vfs_bsize = PAGESIZE;
+ vfsp->vfs_flag |= VFS_NOTRUNC;
+ vfs_make_fsid(&vfsp->vfs_fsid, lxd_dev, lxd_fstype);
+ lxdm->lxdm_mntpath = kmem_zalloc(dpn.pn_pathlen + 1, KM_SLEEP);
+ (void) strcpy(lxdm->lxdm_mntpath, dpn.pn_path);
+
+ /* allocate and initialize root lxd_node structure */
+ bzero(&rattr, sizeof (struct vattr));
+ rattr.va_mode = (mode_t)(S_IFDIR | 0755);
+ rattr.va_type = VDIR;
+ rattr.va_rdev = 0;
+
+ tvp = lxd_make_back_node(realrootvp, lxdm);
+ ldn = VTOLDN(tvp);
+
+ rw_enter(&ldn->lxdn_rwlock, RW_WRITER);
+ LDNTOV(ldn)->v_flag |= VROOT;
+
+ /*
+ * initialize linked list of lxd_nodes so that the back pointer of
+ * the root lxd_node always points to the last one on the list
+ * and the forward pointer of the last node is null
+ */
+ ldn->lxdn_prev = ldn;
+ ldn->lxdn_next = NULL;
+ ldn->lxdn_nlink = 0;
+ lxdm->lxdm_rootnode = ldn;
+
+ ldn->lxdn_nodeid = lxdm->lxdm_gen++;
+ lxd_dirinit(ldn, ldn, cr);
+
+ rw_exit(&ldn->lxdn_rwlock);
+
+ pn_free(&dpn);
+ error = 0;
+ atomic_inc_32(&lxd_mountcount);
+
+ lxzdata = ztolxzd(curproc->p_zone);
+ ASSERT(lxzdata->lxzd_vdisks != NULL);
+
+ vattr.va_mask = AT_TYPE | AT_MODE;
+ vattr.va_type = VLNK;
+ vattr.va_mode = 0777;
+
+ vd = list_head(lxzdata->lxzd_vdisks);
+ while (vd != NULL) {
+ /* only create links for actual zvols */
+ if (vd->lxvd_type == LXVD_ZVOL) {
+ char lnknm[MAXPATHLEN];
+
+ (void) snprintf(lnknm, sizeof (lnknm),
+ "./zvol/dsk/%s", vd->lxvd_real_name);
+ (void) lxd_symlink(LDNTOV(ldn), vd->lxvd_name, &vattr,
+ lnknm, cr, NULL, 0);
+ }
+
+ vd = list_next(lxzdata->lxzd_vdisks, vd);
+ }
+
+out:
+ if (error == 0)
+ vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS);
+
+ return (error);
+}
+
+static int
+lxd_unmount(struct vfs *vfsp, int flag, struct cred *cr)
+{
+ lxd_mnt_t *lxdm = (lxd_mnt_t *)VFSTOLXDM(vfsp);
+ lxd_node_t *ldn, *cancel;
+ struct vnode *vp;
+ int error;
+ uint_t cnt;
+
+ if ((error = secpolicy_fs_unmount(cr, vfsp)) != 0)
+ return (error);
+
+ mutex_enter(&lxdm->lxdm_contents);
+
+ /*
+ * In the normal unmount case only the root node would have a reference
+ * count.
+ *
+ * With lxdm_contents held, nothing can be added or removed.
+ * If we find a previously referenced node, undo the holds we have
+ * placed and fail EBUSY.
+ */
+ ldn = lxdm->lxdm_rootnode;
+
+ vp = LDNTOV(ldn);
+ mutex_enter(&vp->v_lock);
+
+ if (flag & MS_FORCE) {
+ mutex_exit(&vp->v_lock);
+ mutex_exit(&lxdm->lxdm_contents);
+ return (EINVAL);
+ }
+
+ cnt = vp->v_count;
+ if (cnt > 1) {
+ mutex_exit(&vp->v_lock);
+ mutex_exit(&lxdm->lxdm_contents);
+ return (EBUSY);
+ }
+
+ mutex_exit(&vp->v_lock);
+
+ /*
+ * Check for open files. An open file causes everything to unwind.
+ */
+ for (ldn = ldn->lxdn_next; ldn; ldn = ldn->lxdn_next) {
+ vp = LDNTOV(ldn);
+ mutex_enter(&vp->v_lock);
+ cnt = vp->v_count;
+ if (cnt > 0) {
+ /* An open file; unwind the holds we've been adding. */
+ mutex_exit(&vp->v_lock);
+ cancel = lxdm->lxdm_rootnode->lxdn_next;
+ while (cancel != ldn) {
+ vp = LDNTOV(cancel);
+ ASSERT(vp->v_count > 0);
+ VN_RELE(vp);
+ cancel = cancel->lxdn_next;
+ }
+ mutex_exit(&lxdm->lxdm_contents);
+ return (EBUSY);
+ } else {
+ /*
+ * It may seem incorrect for us to have a vnode with
+ * a count of 0, but this is modeled on tmpfs and works
+ * the same way. See lxd_front_inactive. There we allow
+ * the v_count to go to 0 but rely on the link count to
+ * keep the vnode alive. Since we now want to cleanup
+ * these vnodes we manually add a VN_HOLD so that the
+ * VN_RELEs that occur in the lxd_freevfs() cleanup
+ * will take us down the lxd_inactive code path. We
+ * can directly add a VN_HOLD since we have the lock.
+ */
+ vp->v_count++;
+ mutex_exit(&vp->v_lock);
+ }
+ }
+
+ /*
+ * We can drop the mutex now because
+ * no one can find this mount anymore
+ */
+ vfsp->vfs_flag |= VFS_UNMOUNTED;
+ mutex_exit(&lxdm->lxdm_contents);
+
+ return (0);
+}
+
+/*
+ * Implementation of VFS_FREEVFS(). This is called by the vfs framework after
+ * umount and the last VFS_RELE, to trigger the release of any resources still
+ * associated with the given vfs_t. This is normally called immediately after
+ * lxd_unmount.
+ */
+void
+lxd_freevfs(vfs_t *vfsp)
+{
+ lxd_mnt_t *lxdm = (lxd_mnt_t *)VFSTOLXDM(vfsp);
+ lxd_node_t *ldn;
+ struct vnode *vp;
+
+ /*
+ * Free all kmemalloc'd and anonalloc'd memory associated with
+ * this filesystem. To do this, we go through the file list twice,
+ * once to remove all the directory entries, and then to remove
+ * all the pseudo files.
+ */
+
+ /*
+ * Now that we are tearing ourselves down we need to remove the
+ * UNMOUNTED flag. If we don't, we'll later hit a VN_RELE when we remove
+ * files from the system causing us to have a negative value. Doing this
+ * seems a bit better than trying to set a flag on the lxd_mnt_t that
+ * says we're tearing down.
+ */
+ vfsp->vfs_flag &= ~VFS_UNMOUNTED;
+
+ /*
+ * Remove all directory entries (this doesn't remove top-level dirs).
+ */
+ for (ldn = lxdm->lxdm_rootnode; ldn; ldn = ldn->lxdn_next) {
+ rw_enter(&ldn->lxdn_rwlock, RW_WRITER);
+ if (ldn->lxdn_vnode->v_type == VDIR)
+ lxd_dirtrunc(ldn);
+ rw_exit(&ldn->lxdn_rwlock);
+ }
+
+ ASSERT(lxdm->lxdm_rootnode != NULL);
+
+ /*
+ * All links are gone, v_count is keeping nodes in place.
+ * VN_RELE should make the node disappear, unless somebody
+ * is holding pages against it. Nap and retry until it disappears.
+ *
+ * We re-acquire the lock to prevent others who have a HOLD on a
+ * lxd_node from blowing it away (in lxd_inactive) while we're trying
+ * to get to it here. Once we have a HOLD on it we know it'll stick
+ * around.
+ */
+ mutex_enter(&lxdm->lxdm_contents);
+
+ /*
+ * Remove all the files (except the rootnode) backwards.
+ */
+ while ((ldn = lxdm->lxdm_rootnode->lxdn_prev) != lxdm->lxdm_rootnode) {
+ mutex_exit(&lxdm->lxdm_contents);
+ /*
+ * All nodes will be released here. Note we handled the link
+ * count above.
+ */
+ vp = LDNTOV(ldn);
+ ASSERT(vp->v_type == VLNK || vp->v_type == VDIR ||
+ vp->v_type == VSOCK);
+ VN_RELE(vp);
+ mutex_enter(&lxdm->lxdm_contents);
+ /*
+ * It's still there after the RELE. Someone else like pageout
+ * has a hold on it so wait a bit and then try again - we know
+ * they'll give it up soon.
+ */
+ if (ldn == lxdm->lxdm_rootnode->lxdn_prev) {
+ VN_HOLD(vp);
+ mutex_exit(&lxdm->lxdm_contents);
+ delay(hz / 4);
+ mutex_enter(&lxdm->lxdm_contents);
+ }
+ }
+ mutex_exit(&lxdm->lxdm_contents);
+
+ ASSERT(lxdm->lxdm_back_refcnt == 1);
+ ASSERT(lxdm->lxdm_dent_refcnt == 0);
+
+ VN_RELE(LDNTOV(lxdm->lxdm_rootnode));
+
+ ASSERT(lxdm->lxdm_mntpath != NULL);
+ kmem_free(lxdm->lxdm_mntpath, strlen(lxdm->lxdm_mntpath) + 1);
+
+ mutex_destroy(&lxdm->lxdm_contents);
+ mutex_destroy(&lxdm->lxdm_renamelck);
+ kmem_free(lxdm, sizeof (lxd_mnt_t));
+
+ /* Allow _fini() to succeed now */
+ atomic_dec_32(&lxd_mountcount);
+}
+
+/*
+ * return root lxdnode for given vnode
+ */
+static int
+lxd_root(struct vfs *vfsp, struct vnode **vpp)
+{
+ lxd_mnt_t *lxdm = (lxd_mnt_t *)VFSTOLXDM(vfsp);
+ lxd_node_t *ldn = lxdm->lxdm_rootnode;
+ struct vnode *vp;
+
+ ASSERT(ldn != NULL);
+
+ vp = LDNTOV(ldn);
+ VN_HOLD(vp);
+ *vpp = vp;
+ return (0);
+}
+
+static int
+lxd_statvfs(struct vfs *vfsp, statvfs64_t *sbp)
+{
+ lxd_mnt_t *lxdm = (lxd_mnt_t *)VFSTOLXDM(vfsp);
+ ulong_t blocks;
+ dev32_t d32;
+ zoneid_t eff_zid;
+ struct zone *zp;
+
+ zp = lxdm->lxdm_vfsp->vfs_zone;
+
+ if (zp == NULL)
+ eff_zid = GLOBAL_ZONEUNIQID;
+ else
+ eff_zid = zp->zone_id;
+
+ sbp->f_bsize = PAGESIZE;
+ sbp->f_frsize = PAGESIZE;
+
+ /*
+ * Find the amount of available physical and memory swap
+ */
+ mutex_enter(&anoninfo_lock);
+ ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv);
+ blocks = (ulong_t)CURRENT_TOTAL_AVAILABLE_SWAP;
+ mutex_exit(&anoninfo_lock);
+
+ if (blocks > lxd_minfree)
+ sbp->f_bfree = blocks - lxd_minfree;
+ else
+ sbp->f_bfree = 0;
+
+ sbp->f_bavail = sbp->f_bfree;
+
+ /*
+ * Total number of blocks is just what's available
+ */
+ sbp->f_blocks = (fsblkcnt64_t)(sbp->f_bfree);
+
+ if (eff_zid != GLOBAL_ZONEUNIQID &&
+ zp->zone_max_swap_ctl != UINT64_MAX) {
+ /*
+ * If the fs is used by a zone with a swap cap,
+ * then report the capped size.
+ */
+ rctl_qty_t cap, used;
+ pgcnt_t pgcap, pgused;
+
+ mutex_enter(&zp->zone_mem_lock);
+ cap = zp->zone_max_swap_ctl;
+ used = zp->zone_max_swap;
+ mutex_exit(&zp->zone_mem_lock);
+
+ pgcap = btop(cap);
+ pgused = btop(used);
+
+ sbp->f_bfree = MIN(pgcap - pgused, sbp->f_bfree);
+ sbp->f_bavail = sbp->f_bfree;
+ sbp->f_blocks = MIN(pgcap, sbp->f_blocks);
+ }
+
+ /*
+ * The maximum number of files available is approximately the number
+ * of lxd_nodes we can allocate from the remaining kernel memory
+ * available to lxdevfs in this zone. This is fairly inaccurate since
+ * it doesn't take into account the names stored in the directory
+ * entries.
+ */
+ sbp->f_ffree = sbp->f_files = ptob(availrmem) /
+ (sizeof (lxd_node_t) + sizeof (lxd_dirent_t));
+ sbp->f_favail = (fsfilcnt64_t)(sbp->f_ffree);
+ (void) cmpldev(&d32, vfsp->vfs_dev);
+ sbp->f_fsid = d32;
+ (void) strcpy(sbp->f_basetype, vfssw[lxd_fstype].vsw_name);
+ (void) strncpy(sbp->f_fstr, lxdm->lxdm_mntpath, sizeof (sbp->f_fstr));
+ /* ensure null termination */
+ sbp->f_fstr[sizeof (sbp->f_fstr) - 1] = '\0';
+ sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
+ sbp->f_namemax = MAXNAMELEN - 1;
+ return (0);
+}
+
+static void
+lxd_pts_devt_translator(dev_t dev, dev_t *jdev)
+{
+ minor_t min = getminor(dev);
+ int lx_maj, lx_min;
+
+ /*
+ * Linux uses a range of major numbers for pts devices to address the
+ * relatively small minor number space (20 bits).
+ */
+
+ lx_maj = LX_PTS_MAJOR_MIN + (min / LX_MAXMIN);
+ lx_min = min % LX_MAXMIN;
+ if (lx_maj > LX_PTS_MAJOR_MAX) {
+ /*
+ * The major is outside the acceptable range but there's little
+ * we can presently do about it short of overhauling the
+ * translation logic.
+ */
+ lx_unsupported("pts major out of translation range");
+ }
+
+ *jdev = LX_MAKEDEVICE(lx_maj, lx_min);
+}
+
+static void
+lxd_ptm_devt_translator(dev_t dev, dev_t *jdev)
+{
+ *jdev = LX_MAKEDEVICE(LX_PTM_MAJOR, LX_PTM_MINOR);
+}
diff --git a/usr/src/uts/common/brand/lx/devfs/lxd_vnops.c b/usr/src/uts/common/brand/lx/devfs/lxd_vnops.c
new file mode 100644
index 0000000000..bee93f6aad
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/devfs/lxd_vnops.c
@@ -0,0 +1,1506 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/vnode.h>
+#include <sys/vfs.h>
+#include <sys/vfs_opreg.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+#include <sys/cred.h>
+#include <sys/pathname.h>
+#include <sys/debug.h>
+#include <sys/sdt.h>
+#include <fs/fs_subr.h>
+#include <vm/as.h>
+#include <vm/seg.h>
+#include <sys/lx_brand.h>
+#include <sys/brand.h>
+
+#include "lxd.h"
+
+static int
+lxd_open(vnode_t **vpp, int flag, struct cred *cr, caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(*vpp);
+ vnode_t *vp = *vpp;
+ vnode_t *rvp;
+ vnode_t *oldvp;
+ int error;
+
+ if (ldn->lxdn_type == LXDNT_FRONT)
+ return (0);
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ oldvp = vp;
+ vp = rvp = REALVP(vp);
+ /*
+ * Need to hold new reference to vp since VOP_OPEN() may
+ * decide to release it.
+ */
+ VN_HOLD(vp);
+ error = VOP_OPEN(&rvp, flag, cr, ct);
+
+ if (!error && rvp != vp) {
+ /*
+ * the FS which we called should have released the
+ * new reference on vp
+ */
+ *vpp = lxd_make_back_node(rvp, VFSTOLXDM(oldvp->v_vfsp));
+
+ if (IS_DEVVP(*vpp)) {
+ vnode_t *svp;
+
+ svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
+ VN_RELE(*vpp);
+ if (svp == NULL)
+ error = ENOSYS;
+ else
+ *vpp = svp;
+ }
+ VN_RELE(oldvp);
+ } else {
+ ASSERT(rvp->v_count > 1);
+ VN_RELE(rvp);
+ }
+
+ return (error);
+}
+
+static int
+lxd_close(vnode_t *vp, int flag, int count, offset_t offset, struct cred *cr,
+ caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT)
+ return (0);
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ vp = REALVP(vp);
+ return (VOP_CLOSE(vp, flag, count, offset, cr, ct));
+}
+
+static int
+lxd_read(vnode_t *vp, struct uio *uiop, int ioflag, struct cred *cr,
+ caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT)
+ return (ENOTSUP);
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ vp = REALVP(vp);
+ return (VOP_READ(vp, uiop, ioflag, cr, ct));
+}
+
+static int
+lxd_write(vnode_t *vp, struct uio *uiop, int ioflag, struct cred *cr,
+ caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT)
+ return (ENOTSUP);
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ vp = REALVP(vp);
+ return (VOP_WRITE(vp, uiop, ioflag, cr, ct));
+}
+
+static int
+lxd_ioctl(vnode_t *vp, int cmd, intptr_t arg, int flag, struct cred *cr,
+ int *rvalp, caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT)
+ return (ENOTSUP);
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ vp = REALVP(vp);
+ return (VOP_IOCTL(vp, cmd, arg, flag, cr, rvalp, ct));
+}
+
+static int
+lxd_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr, caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT)
+ return (ENOTSUP);
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ vp = REALVP(vp);
+ return (VOP_SETFL(vp, oflags, nflags, cr, ct));
+}
+
+/*
+ * Translate SunOS devt to Linux devt.
+ */
+static void
+lxd_s2l_devt(dev_t dev, dev_t *rdev)
+{
+ lxd_minor_translator_t *mt;
+ int i, j;
+ major_t maj = getmajor(dev);
+ minor_t min = getminor(dev);
+
+ /* look for a devt translator for this major number */
+ for (i = 0; lxd_devt_translators[i].lxd_xl_driver != NULL; i++) {
+ if (lxd_devt_translators[i].lxd_xl_major == maj)
+ break;
+ }
+
+ if (lxd_devt_translators[i].lxd_xl_driver != NULL) {
+ /* try to translate the illumos devt to a linux devt */
+ switch (lxd_devt_translators[i].lxd_xl_type) {
+ case DTT_INVALID:
+ ASSERT(0);
+ break;
+
+ case DTT_LIST:
+ mt = lxd_devt_translators[i].xl_list;
+ for (j = 0; mt[j].lxd_mt_path != NULL; j++) {
+ if (mt[j].lxd_mt_minor == min) {
+ ASSERT(mt[j].lxd_mt_minor < LX_MAXMIN);
+
+ /* found a translation */
+ *rdev = LX_MAKEDEVICE(
+ mt[j].lxd_mt_lx_major,
+ mt[j].lxd_mt_lx_minor);
+ return;
+ }
+ }
+ break;
+
+ case DTT_CUSTOM:
+ lxd_devt_translators[i].xl_custom(dev, rdev);
+ return;
+ }
+ }
+
+ /* we don't have a translator for this device */
+ *rdev = LX_MAKEDEVICE(maj, min);
+}
+
+static int
+lxd_getattr(vnode_t *vp, struct vattr *vap, int flags, struct cred *cr,
+ caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+ int error;
+ vnode_t *rvp;
+
+ if (ldn->lxdn_type == LXDNT_FRONT) {
+ mutex_enter(&ldn->lxdn_tlock);
+
+ vap->va_type = vp->v_type;
+ vap->va_mode = ldn->lxdn_mode & MODEMASK;
+ vap->va_uid = ldn->lxdn_uid;
+ vap->va_gid = ldn->lxdn_gid;
+ vap->va_fsid = ldn->lxdn_fsid;
+ vap->va_nodeid = (ino64_t)ldn->lxdn_nodeid;
+ vap->va_nlink = ldn->lxdn_nlink;
+ vap->va_size = (u_offset_t)ldn->lxdn_size;
+ vap->va_atime = ldn->lxdn_atime;
+ vap->va_mtime = ldn->lxdn_mtime;
+ vap->va_ctime = ldn->lxdn_ctime;
+ vap->va_blksize = PAGESIZE;
+ vap->va_rdev = 0; /* no devs in front */
+ vap->va_seq = ldn->lxdn_seq;
+
+ vap->va_nblocks = (fsblkcnt64_t)btodb(ptob(btopr(
+ vap->va_size)));
+ mutex_exit(&ldn->lxdn_tlock);
+ return (0);
+ }
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ rvp = REALVP(vp);
+ if ((error = VOP_GETATTR(rvp, vap, flags, cr, ct)))
+ return (error);
+
+ /* Skip devt translation for native programs */
+ if (curproc->p_brand != &lx_brand) {
+ return (0);
+ } else {
+ /*
+ * We also skip translation when called from the user-land
+ * emulation code.
+ */
+ lx_lwp_data_t *lwpd = ttolxlwp(curthread);
+
+ if (lwpd == NULL || lwpd->br_stack_mode != LX_STACK_MODE_BRAND)
+ return (0);
+ }
+
+ if (rvp->v_type == VCHR) {
+ dev_t ldev;
+
+ lxd_s2l_devt(vap->va_rdev, &ldev);
+ DTRACE_PROBE3(lxd__devxl, void *, rvp, void *, vap, int, ldev);
+ vap->va_rdev = ldev;
+ }
+
+ return (0);
+}
+
+static int
+lxd_setattr(vnode_t *vp, struct vattr *vap, int flags, struct cred *cr,
+ caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT) {
+ int error = 0;
+ struct vattr *set;
+ long mask = vap->va_mask;
+
+ /* Cannot set these attributes */
+ if ((mask & AT_NOSET) || (mask & AT_XVATTR) ||
+ (mask & AT_MODE && vap->va_mode & (S_ISUID | S_ISGID)) ||
+ (mask & AT_SIZE))
+ return (EINVAL);
+
+ mutex_enter(&ldn->lxdn_tlock);
+
+ set = &ldn->lxdn_attr;
+ /*
+ * Change file access modes. Must be owner or have sufficient
+ * privileges.
+ */
+ error = secpolicy_vnode_setattr(cr, vp, vap, set, flags,
+ lxd_naccess, ldn);
+ if (error) {
+ mutex_exit(&ldn->lxdn_tlock);
+ return (error);
+ }
+
+ if (mask & AT_MODE) {
+ set->va_mode &= S_IFMT;
+ set->va_mode |= vap->va_mode & ~S_IFMT;
+ }
+
+ if (mask & AT_UID)
+ set->va_uid = vap->va_uid;
+ if (mask & AT_GID)
+ set->va_gid = vap->va_gid;
+ if (mask & AT_ATIME)
+ set->va_atime = vap->va_atime;
+ if (mask & AT_MTIME)
+ set->va_mtime = vap->va_mtime;
+
+ if (mask & (AT_UID | AT_GID | AT_MODE | AT_MTIME))
+ gethrestime(&ldn->lxdn_ctime);
+
+ mutex_exit(&ldn->lxdn_tlock);
+ return (error);
+ }
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ vp = REALVP(vp);
+ return (VOP_SETATTR(vp, vap, flags, cr, ct));
+}
+
+static int
+lxd_access(vnode_t *vp, int mode, int flags, struct cred *cr,
+ caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT) {
+ int error;
+
+ mutex_enter(&ldn->lxdn_tlock);
+ error = lxd_naccess(ldn, mode, cr);
+ mutex_exit(&ldn->lxdn_tlock);
+ return (error);
+ }
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ if (mode & VWRITE) {
+ if (vp->v_type == VREG && vn_is_readonly(vp))
+ return (EROFS);
+ }
+ vp = REALVP(vp);
+ return (VOP_ACCESS(vp, mode, flags, cr, ct));
+}
+
+static int
+lxd_fsync(vnode_t *vp, int syncflag, struct cred *cr, caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT)
+ return (0);
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ vp = REALVP(vp);
+ return (VOP_FSYNC(vp, syncflag, cr, ct));
+}
+
+static void
+lxd_front_inactive(struct vnode *vp, struct cred *cred, caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+ lxd_mnt_t *lxdm = VTOLXDM(vp);
+
+ ASSERT(ldn->lxdn_type == LXDNT_FRONT);
+ rw_enter(&ldn->lxdn_rwlock, RW_WRITER);
+
+ mutex_enter(&ldn->lxdn_tlock);
+ mutex_enter(&vp->v_lock);
+ ASSERT(vp->v_count >= 1);
+
+ /*
+ * If we don't have the last hold or the link count is non-zero,
+ * there's little to do -- just drop our hold.
+ */
+ if (vp->v_count > 1 || ldn->lxdn_nlink != 0) {
+ vp->v_count--;
+
+ mutex_exit(&vp->v_lock);
+ mutex_exit(&ldn->lxdn_tlock);
+ rw_exit(&ldn->lxdn_rwlock);
+ return;
+ }
+
+ /*
+ * We have the last hold *and* the link count is zero, so this node is
+ * dead from the filesystem's viewpoint.
+ */
+ if (ldn->lxdn_size != 0) {
+ if (ldn->lxdn_vnode->v_type == VLNK)
+ kmem_free(ldn->lxdn_symlink, ldn->lxdn_size + 1);
+ }
+
+ mutex_exit(&vp->v_lock);
+ mutex_exit(&ldn->lxdn_tlock);
+
+ vn_invalid(LDNTOV(ldn));
+
+ mutex_enter(&lxdm->lxdm_contents);
+ if (ldn->lxdn_next == NULL)
+ lxdm->lxdm_rootnode->lxdn_prev = ldn->lxdn_prev;
+ else
+ ldn->lxdn_next->lxdn_prev = ldn->lxdn_prev;
+ ldn->lxdn_prev->lxdn_next = ldn->lxdn_next;
+
+ mutex_exit(&lxdm->lxdm_contents);
+ rw_exit(&ldn->lxdn_rwlock);
+ rw_destroy(&ldn->lxdn_rwlock);
+ mutex_destroy(&ldn->lxdn_tlock);
+
+ vn_free(LDNTOV(ldn));
+ kmem_free(ldn, sizeof (lxd_node_t));
+}
+
+/*ARGSUSED*/
+static void
+lxd_inactive(vnode_t *vp, struct cred *cr, caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT) {
+ lxd_front_inactive(vp, cr, ct);
+ return;
+ }
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ lxd_free_back_node(ldn);
+}
+
+/* ARGSUSED */
+static int
+lxd_fid(vnode_t *vp, struct fid *fidp, caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT)
+ return (ENOTSUP);
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ vp = REALVP(vp);
+ return (VOP_FID(vp, fidp, ct));
+}
+
+/*
+ * For a front node lookup in the dirent hash table and return a shadow vnode
+ * (lxd_node_t type) of type LXDNT_FRONT.
+ *
+ * For a back node, lookup nm name and return a shadow vnode (lxd_node_t type)
+ * of the real vnode found.
+ */
+static int
+lxd_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
+ int flags, vnode_t *rdir, struct cred *cr, caller_context_t *ct,
+ int *direntflags, pathname_t *realpnp)
+{
+ vnode_t *vp = NULL;
+ int error;
+ vnode_t *realdvp;
+ lxd_mnt_t *lxdm = VTOLXDM(dvp);
+ int doingdotdot = 0;
+ lxd_node_t *ldn = VTOLDN(dvp);
+ lxd_node_t *nldn = NULL;
+
+ /*
+ * First check for front file which could be instantiated on either a
+ * front or back node (e.g. the top-level moint point directory node is
+ * a back node which can have front files created in it).
+ */
+
+ /* disallow extended attrs */
+ if (flags & LOOKUP_XATTR)
+ return (EINVAL);
+
+ /* Null component name is a synonym for dir being searched. */
+ if (*nm == '\0') {
+ VN_HOLD(dvp);
+ *vpp = dvp;
+ return (0);
+ }
+
+ rw_enter(&ldn->lxdn_rwlock, RW_READER);
+ error = lxd_dirlookup(ldn, nm, &nldn, cr);
+ rw_exit(&ldn->lxdn_rwlock);
+
+ if (error == 0) {
+ /* found */
+ ASSERT(nldn != NULL);
+ *vpp = LDNTOV(nldn);
+ return (0);
+ }
+
+ /* At this point, if dir node is a front node, error */
+ if (ldn->lxdn_type == LXDNT_FRONT) {
+ return (ENOENT);
+ }
+
+ realdvp = REALVP(dvp);
+
+ if (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0') {
+ doingdotdot++;
+ /*
+ * Handle ".." out of mounted filesystem
+ */
+ while ((realdvp->v_flag & VROOT) && realdvp != rootdir) {
+ realdvp = realdvp->v_vfsp->vfs_vnodecovered;
+ ASSERT(realdvp != NULL);
+ }
+ }
+
+ *vpp = NULL; /* default(error) case */
+
+ /*
+ * Do the normal lookup
+ */
+ if ((error = VOP_LOOKUP(realdvp, nm, &vp, pnp, flags, rdir, cr,
+ ct, direntflags, realpnp)) != 0) {
+ vp = NULL;
+ goto out;
+ }
+
+ /*
+ * We do this check here to avoid returning a stale file handle to the
+ * caller.
+ */
+ if (nm[0] == '.' && nm[1] == '\0') {
+ ASSERT(vp == realdvp);
+ VN_HOLD(dvp);
+ VN_RELE(vp);
+ *vpp = dvp;
+ return (0);
+ }
+
+ if (doingdotdot) {
+ *vpp = lxd_make_back_node(vp, lxdm);
+ return (0);
+ }
+
+ /*
+ * If this vnode is mounted on, then we
+ * traverse to the vnode which is the root of
+ * the mounted file system.
+ */
+ if ((error = traverse(&vp)) != 0)
+ goto out;
+
+ /*
+ * Make a lxd node for the real vnode.
+ */
+ *vpp = lxd_make_back_node(vp, lxdm);
+ if (vp->v_type != VDIR) {
+ if (IS_DEVVP(*vpp)) {
+ vnode_t *svp;
+
+ svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
+ VN_RELE(*vpp);
+ if (svp == NULL) {
+ VN_RELE(vp);
+ error = ENOSYS;
+ } else {
+ *vpp = svp;
+ }
+ }
+ return (error);
+ }
+
+out:
+ if (error != 0 && vp != NULL)
+ VN_RELE(vp);
+
+ return (error);
+}
+
+/*ARGSUSED*/
+static int
+lxd_create(vnode_t *dvp, char *nm, struct vattr *va, enum vcexcl exclusive,
+ int mode, vnode_t **vpp, struct cred *cr, int flag, caller_context_t *ct,
+ vsecattr_t *vsecp)
+{
+ int error;
+ lxd_node_t *parent = VTOLDN(dvp);
+ lxd_node_t *lnp = NULL;
+
+ rw_enter(&parent->lxdn_rwlock, RW_READER);
+ error = lxd_dirlookup(parent, nm, &lnp, cr);
+ rw_exit(&parent->lxdn_rwlock);
+ /*
+ * If this vnode already exists in lx devfs, we should pass the create
+ * operation through to the underlying resource it represents. For
+ * existing back nodes, the VOP_CREATE is done directly against the
+ * returned lxd node with an empty name (to avoid a redunant lookup).
+ * For existing front nodes, an appropriate error must be chosen since
+ * they cannot represent regular files
+ */
+ if (error == 0) {
+ if (lnp->lxdn_type == LXDNT_BACK) {
+ error = VOP_CREATE(lnp->lxdn_real_vp, "\0", va,
+ exclusive, mode, vpp, cr, flag, ct, vsecp);
+ } else {
+ if (exclusive == EXCL) {
+ error = EEXIST;
+ } else if (LDNTOV(lnp)->v_type == VDIR &&
+ (mode & S_IWRITE)) {
+ error = EISDIR;
+ } else {
+ error = ENOTSUP;
+ }
+ }
+ if (error != 0) {
+ ldnode_rele(lnp);
+ }
+ return (error);
+ }
+
+ /*
+ * We cannot create files in the back devfs but we want to allow for
+ * O_CREAT on existing files. Pass this through and let the back file
+ * system allow or deny it.
+ */
+ if (parent->lxdn_type == LXDNT_BACK) {
+ vnode_t *vp = NULL;
+
+ if (*nm == '\0') {
+ ASSERT(vpp && dvp == *vpp);
+ vp = REALVP(*vpp);
+ }
+ if ((error = VOP_CREATE(REALVP(dvp), nm, va, exclusive, mode,
+ &vp, cr, flag, ct, vsecp)) == 0) {
+ *vpp = lxd_make_back_node(vp, VFSTOLXDM(dvp->v_vfsp));
+ if (IS_DEVVP(*vpp)) {
+ vnode_t *svp;
+
+ svp = specvp(*vpp, (*vpp)->v_rdev,
+ (*vpp)->v_type, cr);
+ VN_RELE(*vpp);
+ if (svp == NULL) {
+ return (ENOSYS);
+ }
+ *vpp = svp;
+ }
+ return (0);
+ }
+ /*
+ * If we were unable to perform the VOP_CREATE for any reason
+ * other than sdev being read-only, we should bail.
+ */
+ if (error != ENOTSUP && error != EROFS) {
+ return (error);
+ }
+ }
+
+ /*
+ * While we don't allow create data-containing files under LX devfs, we
+ * must allow VSOCK front nodes to be created so that paths such as
+ * /dev/log can be used as AF_UNIX sockets.
+ */
+ if (va->va_type == VSOCK) {
+ lxd_mnt_t *lxdm = VTOLXDM(parent->lxdn_vnode);
+
+ lnp = NULL;
+ rw_enter(&parent->lxdn_rwlock, RW_WRITER);
+ error = lxd_direnter(lxdm, parent, nm, DE_CREATE, NULL, NULL,
+ va, &lnp, cr, ct);
+ rw_exit(&parent->lxdn_rwlock);
+
+ if (error == 0) {
+ *vpp = LDNTOV(lnp);
+ } else if (lnp != NULL) {
+ /*
+ * It's possible that a racing process created an entry
+ * at this name since we last performed the lookup.
+ */
+ ldnode_rele(lnp);
+ }
+ } else {
+ error = ENOTSUP;
+ }
+
+ return (error);
+}
+
+static int
+lxd_remove(vnode_t *dvp, char *nm, struct cred *cr, caller_context_t *ct,
+ int flags)
+{
+ lxd_node_t *parent = VTOLDN(dvp);
+ lxd_node_t *ldn = NULL;
+ int error;
+
+ /* can only remove existing front nodes */
+ error = lxd_dirlookup(parent, nm, &ldn, cr);
+ if (error) {
+ return (error);
+ }
+
+ ASSERT(ldn != NULL);
+ ASSERT(ldn->lxdn_type == LXDNT_FRONT);
+ rw_enter(&parent->lxdn_rwlock, RW_WRITER);
+ rw_enter(&ldn->lxdn_rwlock, RW_WRITER);
+
+ error = lxd_dirdelete(parent, ldn, nm, DR_REMOVE, cr);
+
+ rw_exit(&ldn->lxdn_rwlock);
+ rw_exit(&parent->lxdn_rwlock);
+
+ ldnode_rele(ldn);
+
+ return (error);
+}
+
+static int
+lxd_link(vnode_t *tdvp, vnode_t *vp, char *tnm, struct cred *cr,
+ caller_context_t *ct, int flags)
+{
+ return (ENOTSUP);
+}
+
+static int
+lxd_rename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, struct cred *cr,
+ caller_context_t *ct, int flags)
+{
+ lxd_node_t *oldparent = VTOLDN(odvp);
+ lxd_node_t *newparent;
+ lxd_mnt_t *lxdm = VTOLXDM(oldparent->lxdn_vnode);
+ lxd_node_t *fromnode = NULL;
+ int error;
+ int samedir = 0;
+
+ if (!vn_matchops(ndvp, lxd_vnodeops)) {
+ /* cannot rename out of this file system */
+ return (EACCES);
+ }
+
+ mutex_enter(&lxdm->lxdm_renamelck);
+
+ newparent = VTOLDN(ndvp);
+
+ /*
+ * We can only rename front nodes.
+ */
+ error = lxd_dirlookup(oldparent, onm, &fromnode, cr);
+ if (error != 0) {
+ /* not found in front */
+ mutex_exit(&lxdm->lxdm_renamelck);
+ return (error);
+ }
+
+ /*
+ * Make sure we can delete the old (source) entry. This
+ * requires write permission on the containing directory. If
+ * that directory is "sticky" it requires further checks.
+ */
+ if ((error = lxd_naccess(oldparent, VWRITE, cr)) != 0)
+ goto done;
+
+ /*
+ * Check for renaming to or from '.' or '..' or that
+ * fromnode == oldparent
+ */
+ if ((onm[0] == '.' &&
+ (onm[1] == '\0' || (onm[1] == '.' && onm[2] == '\0'))) ||
+ (nnm[0] == '.' &&
+ (nnm[1] == '\0' || (nnm[1] == '.' && nnm[2] == '\0'))) ||
+ (oldparent == fromnode)) {
+ error = EINVAL;
+ goto done;
+ }
+
+ samedir = (oldparent == newparent);
+
+ /*
+ * Make sure we can search and rename into the destination directory.
+ */
+ if (!samedir) {
+ if ((error = lxd_naccess(newparent, VEXEC|VWRITE, cr)) != 0)
+ goto done;
+ }
+
+ /*
+ * Link source to new target
+ */
+ rw_enter(&newparent->lxdn_rwlock, RW_WRITER);
+ error = lxd_direnter(lxdm, newparent, nnm, DE_RENAME,
+ oldparent, fromnode, (struct vattr *)NULL, (lxd_node_t **)NULL,
+ cr, ct);
+ rw_exit(&newparent->lxdn_rwlock);
+
+ if (error)
+ goto done;
+
+ /*
+ * Unlink from source.
+ */
+ rw_enter(&oldparent->lxdn_rwlock, RW_WRITER);
+ rw_enter(&fromnode->lxdn_rwlock, RW_WRITER);
+
+ error = lxd_dirdelete(oldparent, fromnode, onm, DR_RENAME, cr);
+
+ /*
+ * The following handles the case where our source node was
+ * removed before we got to it.
+ */
+ if (error == ENOENT)
+ error = 0;
+
+ rw_exit(&fromnode->lxdn_rwlock);
+ rw_exit(&oldparent->lxdn_rwlock);
+
+done:
+ ldnode_rele(fromnode);
+ mutex_exit(&lxdm->lxdm_renamelck);
+ return (error);
+}
+
+static int
+lxd_mkdir(vnode_t *dvp, char *nm, struct vattr *va, vnode_t **vpp,
+ struct cred *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp)
+{
+ int error;
+ vnode_t *tvp;
+ lxd_node_t *ndir = NULL;
+ lxd_node_t *parent = VTOLDN(dvp);
+ lxd_mnt_t *lxdm = VTOLXDM(parent->lxdn_vnode);
+
+ /* check for existence in both front and back */
+ if (lxd_lookup(dvp, nm, &tvp, NULL, 0, NULL, cr, ct, NULL, NULL) == 0) {
+ /* The entry already exists */
+ VN_RELE(tvp);
+ return (EEXIST);
+ }
+
+ /* make front directory */
+ rw_enter(&parent->lxdn_rwlock, RW_WRITER);
+ error = lxd_direnter(lxdm, parent, nm, DE_MKDIR, NULL, NULL,
+ va, &ndir, cr, ct);
+ rw_exit(&parent->lxdn_rwlock);
+
+ if (error != 0) {
+ if (ndir != NULL)
+ ldnode_rele(ndir);
+ } else {
+ *vpp = LDNTOV(ndir);
+ }
+
+ return (error);
+}
+
+static int
+lxd_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT) {
+ *vpp = vp;
+ return (0);
+ }
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ while (vn_matchops(vp, lxd_vnodeops))
+ vp = REALVP(vp);
+
+ if (VOP_REALVP(vp, vpp, ct) != 0)
+ *vpp = vp;
+ return (0);
+}
+
+static int
+lxd_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, struct cred *cr,
+ caller_context_t *ct, int flags)
+{
+ int error;
+ lxd_node_t *ldn;
+ struct vnode *vp;
+ lxd_node_t *parent = VTOLDN(dvp);
+
+ /*
+ * Return error if trying to remove . or ..
+ */
+ if (strcmp(nm, ".") == 0)
+ return (EINVAL);
+ if (strcmp(nm, "..") == 0)
+ return (EEXIST);
+
+ error = lxd_dirlookup(VTOLDN(dvp), nm, &ldn, cr);
+ if (error != 0) {
+ /* not found in front */
+ return (error);
+ }
+
+ rw_enter(&parent->lxdn_rwlock, RW_WRITER);
+ rw_enter(&ldn->lxdn_rwlock, RW_WRITER);
+
+ vp = LDNTOV(ldn);
+ if (vp == dvp || vp == cdir) {
+ error = EINVAL;
+ goto err;
+ }
+
+ if (ldn->lxdn_vnode->v_type != VDIR) {
+ error = ENOTDIR;
+ goto err;
+ }
+
+ mutex_enter(&ldn->lxdn_tlock);
+ if (ldn->lxdn_nlink > 2) {
+ mutex_exit(&ldn->lxdn_tlock);
+ error = EEXIST;
+ goto err;
+ }
+ mutex_exit(&ldn->lxdn_tlock);
+
+ /* Check for an empty directory */
+ if (ldn->lxdn_dirents > 2) {
+ error = EEXIST;
+ gethrestime(&ldn->lxdn_atime);
+ goto err;
+ }
+
+ if (vn_vfswlock(vp)) {
+ error = EBUSY;
+ goto err;
+ }
+ if (vn_mountedvfs(vp) != NULL) {
+ error = EBUSY;
+ vn_vfsunlock(vp);
+ goto err;
+ }
+
+ error = lxd_dirdelete(parent, ldn, nm, DR_RMDIR, cr);
+ vn_vfsunlock(vp);
+
+err:
+ rw_exit(&ldn->lxdn_rwlock);
+ rw_exit(&parent->lxdn_rwlock);
+ ldnode_rele(ldn);
+
+ return (error);
+}
+
+/* Not static so it can be used during mount. */
+int
+lxd_symlink(vnode_t *dvp, char *nm, struct vattr *tva, char *tnm,
+ struct cred *cr, caller_context_t *ct, int flags)
+{
+ lxd_node_t *parent = VTOLDN(dvp);
+ lxd_mnt_t *lxdm = VTOLXDM(parent->lxdn_vnode);
+ lxd_node_t *self = NULL;
+ vnode_t *tvp;
+ char *cp = NULL;
+ int error;
+ size_t len;
+
+ /* this will check for existence in both front and back */
+ if (lxd_lookup(dvp, nm, &tvp, NULL, 0, NULL, cr, ct, NULL, NULL) == 0) {
+ /* The entry already exists */
+ VN_RELE(tvp);
+ return (EEXIST);
+ }
+
+ /* make symlink in the front */
+ rw_enter(&parent->lxdn_rwlock, RW_WRITER);
+ error = lxd_direnter(lxdm, parent, nm, DE_CREATE, NULL, NULL,
+ tva, &self, cr, ct);
+ rw_exit(&parent->lxdn_rwlock);
+
+ if (error) {
+ if (self != NULL)
+ ldnode_rele(self);
+ return (error);
+ }
+
+ len = strlen(tnm) + 1;
+ cp = kmem_alloc(len, KM_NOSLEEP | KM_NORMALPRI);
+ if (cp == NULL) {
+ ldnode_rele(self);
+ return (ENOSPC);
+ }
+ (void) strcpy(cp, tnm);
+
+ self->lxdn_symlink = cp;
+ self->lxdn_size = len - 1;
+ ldnode_rele(self);
+
+ return (error);
+}
+
+static int
+lxd_readlink(vnode_t *vp, struct uio *uiop, struct cred *cr,
+ caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT) {
+ int error;
+
+ if (vp->v_type != VLNK)
+ return (EINVAL);
+
+ rw_enter(&ldn->lxdn_rwlock, RW_READER);
+ error = uiomove(ldn->lxdn_symlink, ldn->lxdn_size, UIO_READ,
+ uiop);
+ gethrestime(&ldn->lxdn_atime);
+ rw_exit(&ldn->lxdn_rwlock);
+ return (error);
+ }
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ vp = REALVP(vp);
+ return (VOP_READLINK(vp, uiop, cr, ct));
+}
+
+static int
+lx_merge_front(vnode_t *vp, struct uio *uiop, off_t req_off, int *eofp)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+ struct dirent *sd;
+ lxd_dirent_t *ldp;
+ enum lxd_node_type type = ldn->lxdn_type;
+ ssize_t uresid;
+ off_t front_off;
+ int error = 0;
+ int sdlen;
+
+ /* skip the front entries if the back read was incomplete */
+ if (*eofp == 0)
+ return (0);
+
+ /*
+ * If this was a back node then reading that node has completed and we
+ * may have a partially full uio struct. eof should be set to true.
+ * Leave it set since we're likely to hit eof for the front nodes (if
+ * any).
+ */
+
+ front_off = uiop->uio_offset + 1;
+ sdlen = sizeof (struct dirent) + MAXPATHLEN;
+ /* zalloc to ensure we don't have anything in the d_name buffer */
+ sd = (struct dirent *)kmem_zalloc(sdlen, KM_SLEEP);
+ ldp = ldn->lxdn_dir;
+ while (ldp != NULL && (uresid = uiop->uio_resid) > 0) {
+ int namelen;
+ int reclen;
+
+ /*
+ * Skip dot and dotdot for back nodes since we have them
+ * already.
+ */
+ if (type == LXDNT_BACK &&
+ (strcmp(ldp->lddir_name, ".") == 0 ||
+ strcmp(ldp->lddir_name, "..") == 0)) {
+ ldp = ldp->lddir_next;
+ continue;
+ }
+
+ /*
+ * Might have previously had a partial readdir of the front
+ * nodes, and now we're back for more, or we may just be
+ * be doing a follow-up readdir after we've previously
+ * returned all front and back nodes.
+ */
+ if (front_off > req_off) {
+ namelen = strlen(ldp->lddir_name); /* no +1 needed */
+ reclen = (int)DIRENT64_RECLEN(namelen);
+
+ /*
+ * If the size of the data to transfer is greater
+ * than that requested, then we can't do it this
+ * transfer.
+ */
+ if (reclen > uresid) {
+ *eofp = 0;
+ /* Buffer too small for any entries. */
+ if (front_off == 0)
+ error = EINVAL;
+ break;
+ }
+
+ (void) strncpy(sd->d_name, ldp->lddir_name,
+ DIRENT64_NAMELEN(reclen));
+ sd->d_reclen = (ushort_t)reclen;
+ sd->d_ino = (ino_t)ldp->lddir_node->lxdn_nodeid;
+ sd->d_off = front_off;
+
+ /* uiomove will adjust iov_base properly */
+ if ((error = uiomove((caddr_t)sd, reclen, UIO_READ,
+ uiop)) != 0) {
+ *eofp = 0;
+ break;
+ }
+ }
+
+ /*
+ * uiomove() above updates both uio_resid and uio_offset by the
+ * same amount but we want uio_offset to change in increments
+ * of 1, which is different from the number of bytes being
+ * returned to the caller, so we set uio_offset explicitly,
+ * ignoring what uiomove() did.
+ */
+ uiop->uio_offset = front_off;
+ front_off++;
+
+ ldp = ldp->lddir_next;
+ }
+
+ kmem_free(sd, sdlen);
+ return (error);
+}
+
+static int
+lxd_readdir(vnode_t *vp, struct uio *uiop, struct cred *cr, int *eofp,
+ caller_context_t *ct, int flags)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+ vnode_t *rvp;
+ int res;
+ off_t req_off;
+
+ if (uiop->uio_iovcnt != 1)
+ return (EINVAL);
+
+ if (vp->v_type != VDIR)
+ return (ENOTDIR);
+
+ req_off = uiop->uio_offset;
+
+ /* First read the back node (if it is one) */
+ if (ldn->lxdn_type == LXDNT_BACK) {
+ rvp = REALVP(vp);
+ res = VOP_READDIR(rvp, uiop, cr, eofp, ct, flags);
+ if (res != 0)
+ return (res);
+ } else {
+ /* setup for merge_front */
+ ASSERT(ldn->lxdn_type == LXDNT_FRONT);
+ /* caller should have already called lxd_rwlock */
+ ASSERT(RW_READ_HELD(&ldn->lxdn_rwlock));
+
+ *eofp = 1;
+ /*
+ * The merge code starts the offset calculation from uio_offset,
+ * which is normally already set to the high value by the back
+ * code, but in this case we need to count up from 0.
+ */
+ uiop->uio_offset = 0;
+ }
+
+ /*
+ * Our back nodes can also have front entries hanging on them so we
+ * need to merge those in. Or, we may simply have a front node (i.e. a
+ * front subdir).
+ */
+ res = lx_merge_front(vp, uiop, req_off, eofp);
+ return (res);
+}
+
+static int
+lxd_rwlock(vnode_t *vp, int write_lock, caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT) {
+ if (write_lock) {
+ rw_enter(&ldn->lxdn_rwlock, RW_WRITER);
+ } else {
+ rw_enter(&ldn->lxdn_rwlock, RW_READER);
+ }
+ return (write_lock);
+ }
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ vp = REALVP(vp);
+ return (VOP_RWLOCK(vp, write_lock, ct));
+}
+
+static void
+lxd_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT) {
+ rw_exit(&ldn->lxdn_rwlock);
+ return;
+ }
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ vp = REALVP(vp);
+ VOP_RWUNLOCK(vp, write_lock, ct);
+}
+
+static int
+lxd_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT) {
+ return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
+ }
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ vp = REALVP(vp);
+ return (VOP_SEEK(vp, ooff, noffp, ct));
+}
+
+static int
+lxd_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
+{
+ while (vn_matchops(vp1, lxd_vnodeops) &&
+ VTOLDN(vp1)->lxdn_type == LXDNT_BACK) {
+ vp1 = REALVP(vp1);
+ }
+ while (vn_matchops(vp2, lxd_vnodeops) &&
+ VTOLDN(vp2)->lxdn_type == LXDNT_BACK) {
+ vp2 = REALVP(vp2);
+ }
+
+ if (vn_matchops(vp1, lxd_vnodeops) || vn_matchops(vp2, lxd_vnodeops))
+ return (vp1 == vp2);
+
+ return (VOP_CMP(vp1, vp2, ct));
+}
+
+static int
+lxd_frlock(vnode_t *vp, int cmd, struct flock64 *bfp, int flag, offset_t offset,
+ struct flk_callback *flk_cbp, cred_t *cr, caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT) {
+ return (EINVAL);
+ }
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ vp = REALVP(vp);
+ return (VOP_FRLOCK(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
+}
+
+static int
+lxd_space(vnode_t *vp, int cmd, struct flock64 *bfp, int flag, offset_t offset,
+ struct cred *cr, caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT) {
+ return (EINVAL);
+ }
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ vp = REALVP(vp);
+ return (VOP_SPACE(vp, cmd, bfp, flag, offset, cr, ct));
+}
+
+static int
+lxd_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *prot,
+ struct page *parr[], size_t psz, struct seg *seg, caddr_t addr,
+ enum seg_rw rw, struct cred *cr, caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT) {
+ return (EINVAL);
+ }
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ vp = REALVP(vp);
+ return (VOP_GETPAGE(vp, off, len, prot, parr, psz, seg, addr, rw, cr,
+ ct));
+}
+
+static int
+lxd_putpage(vnode_t *vp, offset_t off, size_t len, int flags, struct cred *cr,
+ caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT) {
+ return (EINVAL);
+ }
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ vp = REALVP(vp);
+ return (VOP_PUTPAGE(vp, off, len, flags, cr, ct));
+}
+
+static int
+lxd_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, size_t len,
+ uchar_t prot, uchar_t maxprot, uint_t flags, struct cred *cr,
+ caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT) {
+ return (EINVAL);
+ }
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ vp = REALVP(vp);
+ return (VOP_MAP(vp, off, as, addrp, len, prot, maxprot, flags, cr, ct));
+}
+
+static int
+lxd_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, size_t len,
+ uchar_t prot, uchar_t maxprot, uint_t flags, struct cred *cr,
+ caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT) {
+ return (EINVAL);
+ }
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ vp = REALVP(vp);
+ return (VOP_ADDMAP(vp, off, as, addr, len, prot, maxprot, flags, cr,
+ ct));
+}
+
+static int
+lxd_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, size_t len,
+ uint_t prot, uint_t maxprot, uint_t flags, struct cred *cr,
+ caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT) {
+ return (EINVAL);
+ }
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ vp = REALVP(vp);
+ return (VOP_DELMAP(vp, off, as, addr, len, prot, maxprot, flags, cr,
+ ct));
+}
+
+static int
+lxd_poll(vnode_t *vp, short events, int anyyet, short *reventsp,
+ struct pollhead **phpp, caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT) {
+ return (EINVAL);
+ }
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ vp = REALVP(vp);
+ return (VOP_POLL(vp, events, anyyet, reventsp, phpp, ct));
+}
+
+static int
+lxd_dump(vnode_t *vp, caddr_t addr, offset_t bn, offset_t count,
+ caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT) {
+ return (EINVAL);
+ }
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ vp = REALVP(vp);
+ return (VOP_DUMP(vp, addr, bn, count, ct));
+}
+
+static int
+lxd_pathconf(vnode_t *vp, int cmd, ulong_t *valp, struct cred *cr,
+ caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT) {
+ return (EINVAL);
+ }
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ vp = REALVP(vp);
+ return (VOP_PATHCONF(vp, cmd, valp, cr, ct));
+}
+
+static int
+lxd_pageio(vnode_t *vp, struct page *pp, u_offset_t io_off, size_t io_len,
+ int flags, cred_t *cr, caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT) {
+ return (EINVAL);
+ }
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ vp = REALVP(vp);
+ return (VOP_PAGEIO(vp, pp, io_off, io_len, flags, cr, ct));
+}
+
+static void
+lxd_dispose(vnode_t *vp, page_t *pp, int fl, int dn, cred_t *cr,
+ caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT) {
+ return;
+ }
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ vp = REALVP(vp);
+ if (vp != NULL && !VN_ISKAS(vp))
+ VOP_DISPOSE(vp, pp, fl, dn, cr, ct);
+}
+
+static int
+lxd_setsecattr(vnode_t *vp, vsecattr_t *secattr, int flags, struct cred *cr,
+ caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+ if (ldn->lxdn_type == LXDNT_FRONT) {
+ return (ENOSYS);
+ }
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ if (vn_is_readonly(vp))
+ return (EROFS);
+
+ vp = REALVP(vp);
+ return (VOP_SETSECATTR(vp, secattr, flags, cr, ct));
+}
+
+static int
+lxd_getsecattr(vnode_t *vp, vsecattr_t *secattr, int flags, struct cred *cr,
+ caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT) {
+ return (ENOSYS);
+ }
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ vp = REALVP(vp);
+ return (VOP_GETSECATTR(vp, secattr, flags, cr, ct));
+}
+
+static int
+lxd_shrlock(vnode_t *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr,
+ caller_context_t *ct)
+{
+ lxd_node_t *ldn = VTOLDN(vp);
+
+ if (ldn->lxdn_type == LXDNT_FRONT) {
+ return (EINVAL);
+ }
+
+ ASSERT(ldn->lxdn_type == LXDNT_BACK);
+ vp = REALVP(vp);
+ return (VOP_SHRLOCK(vp, cmd, shr, flag, cr, ct));
+}
+
+/*
+ * Loopback vnode operations vector.
+ */
+
+struct vnodeops *lxd_vnodeops;
+
+const fs_operation_def_t lxd_vnodeops_template[] = {
+ VOPNAME_OPEN, { .vop_open = lxd_open },
+ VOPNAME_CLOSE, { .vop_close = lxd_close },
+ VOPNAME_READ, { .vop_read = lxd_read },
+ VOPNAME_WRITE, { .vop_write = lxd_write },
+ VOPNAME_IOCTL, { .vop_ioctl = lxd_ioctl },
+ VOPNAME_SETFL, { .vop_setfl = lxd_setfl },
+ VOPNAME_GETATTR, { .vop_getattr = lxd_getattr },
+ VOPNAME_SETATTR, { .vop_setattr = lxd_setattr },
+ VOPNAME_ACCESS, { .vop_access = lxd_access },
+ VOPNAME_LOOKUP, { .vop_lookup = lxd_lookup },
+ VOPNAME_CREATE, { .vop_create = lxd_create },
+ VOPNAME_REMOVE, { .vop_remove = lxd_remove },
+ VOPNAME_LINK, { .vop_link = lxd_link },
+ VOPNAME_RENAME, { .vop_rename = lxd_rename },
+ VOPNAME_MKDIR, { .vop_mkdir = lxd_mkdir },
+ VOPNAME_RMDIR, { .vop_rmdir = lxd_rmdir },
+ VOPNAME_READDIR, { .vop_readdir = lxd_readdir },
+ VOPNAME_SYMLINK, { .vop_symlink = lxd_symlink },
+ VOPNAME_READLINK, { .vop_readlink = lxd_readlink },
+ VOPNAME_FSYNC, { .vop_fsync = lxd_fsync },
+ VOPNAME_INACTIVE, { .vop_inactive = lxd_inactive },
+ VOPNAME_FID, { .vop_fid = lxd_fid },
+ VOPNAME_RWLOCK, { .vop_rwlock = lxd_rwlock },
+ VOPNAME_RWUNLOCK, { .vop_rwunlock = lxd_rwunlock },
+ VOPNAME_SEEK, { .vop_seek = lxd_seek },
+ VOPNAME_CMP, { .vop_cmp = lxd_cmp },
+ VOPNAME_FRLOCK, { .vop_frlock = lxd_frlock },
+ VOPNAME_SPACE, { .vop_space = lxd_space },
+ VOPNAME_REALVP, { .vop_realvp = lxd_realvp },
+ VOPNAME_GETPAGE, { .vop_getpage = lxd_getpage },
+ VOPNAME_PUTPAGE, { .vop_putpage = lxd_putpage },
+ VOPNAME_MAP, { .vop_map = lxd_map },
+ VOPNAME_ADDMAP, { .vop_addmap = lxd_addmap },
+ VOPNAME_DELMAP, { .vop_delmap = lxd_delmap },
+ VOPNAME_POLL, { .vop_poll = lxd_poll },
+ VOPNAME_DUMP, { .vop_dump = lxd_dump },
+ VOPNAME_DUMPCTL, { .error = fs_error },
+ VOPNAME_PATHCONF, { .vop_pathconf = lxd_pathconf },
+ VOPNAME_PAGEIO, { .vop_pageio = lxd_pageio },
+ VOPNAME_DISPOSE, { .vop_dispose = lxd_dispose },
+ VOPNAME_SETSECATTR, { .vop_setsecattr = lxd_setsecattr },
+ VOPNAME_GETSECATTR, { .vop_getsecattr = lxd_getsecattr },
+ VOPNAME_SHRLOCK, { .vop_shrlock = lxd_shrlock },
+ NULL, NULL
+};
diff --git a/usr/src/uts/common/brand/lx/dtrace/lx_systrace.c b/usr/src/uts/common/brand/lx/dtrace/lx_systrace.c
new file mode 100644
index 0000000000..510626d220
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/dtrace/lx_systrace.c
@@ -0,0 +1,497 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
+ */
+
+
+#include <sys/modctl.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/stat.h>
+#include <sys/conf.h>
+#include <sys/frame.h>
+#include <sys/dtrace.h>
+#include <sys/dtrace_impl.h>
+
+#include <sys/lx_brand.h>
+#include <sys/lx_impl.h>
+
+/*
+ * We store the syscall number in the low 16 bits (which limits us to 64k
+ * syscalls). The next bit indicates entry/return probe and the next bit
+ * indicates 64bit/32bit syscall.
+ */
+#define SCALL_MASK 0xffff
+#define ENTRY_FLAG 0x10000
+#define SYSC_64_BIT 0x100000
+
+#define LX_SYSTRACE_IS64BIT(x) ((int)(x) & SYSC_64_BIT)
+#define LX_SYSTRACE_ISENTRY(x) ((int)(x) & ENTRY_FLAG)
+#define LX_SYSTRACE_SYSNUM(x) ((int)(x) & SCALL_MASK)
+
+#define LX_SYSTRACE32_ENTRY(id) (ENTRY_FLAG | (id))
+#define LX_SYSTRACE32_RETURN(id) (id)
+
+#define LX_SYSTRACE64_ENTRY(id) (SYSC_64_BIT | ENTRY_FLAG | (id))
+#define LX_SYSTRACE64_RETURN(id) (SYSC_64_BIT | id)
+
+#define LX_SYSTRACE_ENTRY_AFRAMES 2
+#define LX_SYSTRACE_RETURN_AFRAMES 4
+
+typedef struct lx_systrace_sysent {
+ const char *lss_name;
+ dtrace_id_t lss_entry;
+ dtrace_id_t lss_return;
+} lx_systrace_sysent_t;
+
+static dev_info_t *lx_systrace_devi;
+static dtrace_provider_id_t lx_systrace_id;
+static kmutex_t lx_systrace_lock;
+static uint_t lx_systrace_nenabled;
+
+static int lx_systrace_nsysent32;
+static lx_systrace_sysent_t *lx_systrace_sysent32;
+
+#if defined(_LP64)
+static int lx_systrace_nsysent64;
+static lx_systrace_sysent_t *lx_systrace_sysent64;
+#endif
+
+/*ARGSUSED*/
+static void
+lx_systrace_entry(ulong_t sysnum, ulong_t arg0, ulong_t arg1, ulong_t arg2,
+ ulong_t arg3, ulong_t arg4, ulong_t arg5)
+{
+ dtrace_id_t id;
+
+#if defined(_LP64)
+ if ((ttoproc(curthread))->p_model == DATAMODEL_NATIVE) {
+ if (sysnum >= lx_systrace_nsysent64)
+ return;
+ id = lx_systrace_sysent64[sysnum].lss_entry;
+ } else
+#endif
+ {
+ if (sysnum >= lx_systrace_nsysent32)
+ return;
+ id = lx_systrace_sysent32[sysnum].lss_entry;
+ }
+
+ if (id == DTRACE_IDNONE)
+ return;
+ dtrace_probe(id, arg0, arg1, arg2, arg3, arg4);
+}
+
+/*ARGSUSED*/
+static void
+lx_systrace_return(ulong_t sysnum, ulong_t arg0, ulong_t arg1, ulong_t arg2,
+ ulong_t arg3, ulong_t arg4, ulong_t arg5)
+{
+ dtrace_id_t id;
+
+#if defined(_LP64)
+ if ((ttoproc(curthread))->p_model == DATAMODEL_NATIVE) {
+ if (sysnum >= lx_systrace_nsysent64)
+ return;
+ id = lx_systrace_sysent64[sysnum].lss_return;
+ } else
+#endif
+ {
+ if (sysnum >= lx_systrace_nsysent32)
+ return;
+ id = lx_systrace_sysent32[sysnum].lss_return;
+ }
+
+ if (id == DTRACE_IDNONE)
+ return;
+ dtrace_probe(id, arg0, arg1, arg2, arg3, arg4);
+}
+
+/*ARGSUSED*/
+static void
+lx_systrace_provide(void *arg, const dtrace_probedesc_t *desc)
+{
+ int i;
+
+ if (desc != NULL)
+ return;
+
+ for (i = 0; i < lx_systrace_nsysent32; i++) {
+ if (dtrace_probe_lookup(lx_systrace_id, "sys32",
+ lx_systrace_sysent32[i].lss_name, "entry") != 0)
+ continue;
+
+ (void) dtrace_probe_create(lx_systrace_id, "sys32",
+ lx_systrace_sysent32[i].lss_name, "entry",
+ LX_SYSTRACE_ENTRY_AFRAMES,
+ (void *)((uintptr_t)LX_SYSTRACE32_ENTRY(i)));
+
+ (void) dtrace_probe_create(lx_systrace_id, "sys32",
+ lx_systrace_sysent32[i].lss_name, "return",
+ LX_SYSTRACE_RETURN_AFRAMES,
+ (void *)((uintptr_t)LX_SYSTRACE32_RETURN(i)));
+
+ lx_systrace_sysent32[i].lss_entry = DTRACE_IDNONE;
+ lx_systrace_sysent32[i].lss_return = DTRACE_IDNONE;
+ }
+
+#if defined(_LP64)
+ for (i = 0; i < lx_systrace_nsysent64; i++) {
+ if (dtrace_probe_lookup(lx_systrace_id, "sys64",
+ lx_systrace_sysent64[i].lss_name, "entry") != 0)
+ continue;
+
+ (void) dtrace_probe_create(lx_systrace_id, "sys64",
+ lx_systrace_sysent64[i].lss_name, "entry",
+ LX_SYSTRACE_ENTRY_AFRAMES,
+ (void *)((uintptr_t)LX_SYSTRACE64_ENTRY(i)));
+
+ (void) dtrace_probe_create(lx_systrace_id, "sys64",
+ lx_systrace_sysent64[i].lss_name, "return",
+ LX_SYSTRACE_RETURN_AFRAMES,
+ (void *)((uintptr_t)LX_SYSTRACE64_RETURN(i)));
+
+ lx_systrace_sysent64[i].lss_entry = DTRACE_IDNONE;
+ lx_systrace_sysent64[i].lss_return = DTRACE_IDNONE;
+ }
+#endif
+}
+
+/*ARGSUSED*/
+static int
+lx_systrace_enable(void *arg, dtrace_id_t id, void *parg)
+{
+ int sysnum = LX_SYSTRACE_SYSNUM((uintptr_t)parg);
+
+ mutex_enter(&lx_systrace_lock);
+ if (lx_systrace_nenabled++ == 0)
+ lx_brand_systrace_enable();
+ mutex_exit(&lx_systrace_lock);
+
+ if (LX_SYSTRACE_IS64BIT((uintptr_t)parg)) {
+#if defined(_LP64)
+ ASSERT(sysnum < lx_systrace_nsysent64);
+
+ if (LX_SYSTRACE_ISENTRY((uintptr_t)parg)) {
+ lx_systrace_sysent64[sysnum].lss_entry = id;
+ } else {
+ lx_systrace_sysent64[sysnum].lss_return = id;
+ }
+#endif
+ } else {
+ ASSERT(sysnum < lx_systrace_nsysent32);
+
+ if (LX_SYSTRACE_ISENTRY((uintptr_t)parg)) {
+ lx_systrace_sysent32[sysnum].lss_entry = id;
+ } else {
+ lx_systrace_sysent32[sysnum].lss_return = id;
+ }
+ }
+ return (0);
+}
+
+/*ARGSUSED*/
+static void
+lx_systrace_disable(void *arg, dtrace_id_t id, void *parg)
+{
+ int sysnum = LX_SYSTRACE_SYSNUM((uintptr_t)parg);
+
+ if (LX_SYSTRACE_IS64BIT((uintptr_t)parg)) {
+#if defined(_LP64)
+ ASSERT(sysnum < lx_systrace_nsysent64);
+
+ if (LX_SYSTRACE_ISENTRY((uintptr_t)parg)) {
+ lx_systrace_sysent64[sysnum].lss_entry = DTRACE_IDNONE;
+ } else {
+ lx_systrace_sysent64[sysnum].lss_return = DTRACE_IDNONE;
+ }
+#endif
+ } else {
+ ASSERT(sysnum < lx_systrace_nsysent32);
+
+ if (LX_SYSTRACE_ISENTRY((uintptr_t)parg)) {
+ lx_systrace_sysent32[sysnum].lss_entry = DTRACE_IDNONE;
+ } else {
+ lx_systrace_sysent32[sysnum].lss_return = DTRACE_IDNONE;
+ }
+ }
+
+ mutex_enter(&lx_systrace_lock);
+ if (--lx_systrace_nenabled == 0)
+ lx_brand_systrace_disable();
+ mutex_exit(&lx_systrace_lock);
+}
+
+/*ARGSUSED*/
+static void
+lx_systrace_destroy(void *arg, dtrace_id_t id, void *parg)
+{
+}
+
+/*ARGSUSED*/
+static uint64_t
+lx_systrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
+ int aframes)
+{
+ struct frame *fp = (struct frame *)dtrace_getfp();
+ uintptr_t *stack;
+ uint64_t val = 0;
+ int i;
+
+ if (argno >= 6)
+ return (0);
+
+ /*
+ * Walk the four frames down the stack to the entry or return callback.
+ * Our callback calls dtrace_probe() which calls dtrace_dif_variable()
+ * which invokes this function to get the extended arguments. We get
+ * the frame pointer in via call to dtrace_getfp() above which makes for
+ * four frames.
+ */
+ for (i = 0; i < 4; i++) {
+ fp = (struct frame *)fp->fr_savfp;
+ }
+
+ stack = (uintptr_t *)&fp[1];
+
+ /*
+ * Skip the first argument to the callback -- the system call number.
+ */
+ argno++;
+
+#ifdef __amd64
+ /*
+ * On amd64, the first 6 arguments are passed in registers while
+ * subsequent arguments are on the stack.
+ */
+ argno -= 6;
+#endif
+
+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
+ val = stack[argno];
+ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
+
+ return (val);
+}
+
+
+static const dtrace_pattr_t lx_systrace_attr = {
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
+};
+
+static dtrace_pops_t lx_systrace_pops = {
+ lx_systrace_provide,
+ NULL,
+ lx_systrace_enable,
+ lx_systrace_disable,
+ NULL,
+ NULL,
+ NULL,
+ lx_systrace_getarg,
+ NULL,
+ lx_systrace_destroy
+};
+
+static int
+lx_systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
+{
+ int i;
+
+ switch (cmd) {
+ case DDI_ATTACH:
+ break;
+ case DDI_RESUME:
+ return (DDI_SUCCESS);
+ default:
+ return (DDI_FAILURE);
+ }
+
+ if (ddi_create_minor_node(devi, "lx_systrace", S_IFCHR,
+ 0, DDI_PSEUDO, NULL) == DDI_FAILURE ||
+ dtrace_register("lx-syscall", &lx_systrace_attr,
+ DTRACE_PRIV_USER, 0, &lx_systrace_pops, NULL,
+ &lx_systrace_id) != 0) {
+ ddi_remove_minor_node(devi, NULL);
+ return (DDI_FAILURE);
+ }
+
+ ddi_report_dev(devi);
+ lx_systrace_devi = devi;
+
+ /*
+ * Initialize the 32-bit table.
+ */
+ VERIFY(lx_nsysent32 > 0);
+ lx_systrace_nsysent32 = lx_nsysent32;
+ lx_systrace_sysent32 = kmem_zalloc(lx_systrace_nsysent32 *
+ sizeof (lx_systrace_sysent_t), KM_SLEEP);
+
+ for (i = 0; i < lx_systrace_nsysent32; i++) {
+ lx_systrace_sysent32[i].lss_name = lx_sysent32[i].sy_name;
+ lx_systrace_sysent32[i].lss_entry = DTRACE_IDNONE;
+ lx_systrace_sysent32[i].lss_return = DTRACE_IDNONE;
+ }
+
+#if defined(_LP64)
+ /*
+ * Initialize the 64-bit table.
+ */
+ VERIFY(lx_nsysent64 > 0);
+ lx_systrace_nsysent64 = lx_nsysent64;
+ lx_systrace_sysent64 = kmem_zalloc(lx_systrace_nsysent64 *
+ sizeof (lx_systrace_sysent_t), KM_SLEEP);
+
+ for (i = 0; i < lx_systrace_nsysent64; i++) {
+ lx_systrace_sysent64[i].lss_name = lx_sysent64[i].sy_name;
+ lx_systrace_sysent64[i].lss_entry = DTRACE_IDNONE;
+ lx_systrace_sysent64[i].lss_return = DTRACE_IDNONE;
+ }
+#endif
+
+ /*
+ * Install probe triggers.
+ */
+ lx_systrace_entry_ptr = lx_systrace_entry;
+ lx_systrace_return_ptr = lx_systrace_return;
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+lx_systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
+{
+ switch (cmd) {
+ case DDI_DETACH:
+ break;
+ case DDI_SUSPEND:
+ return (DDI_SUCCESS);
+ default:
+ return (DDI_FAILURE);
+ }
+
+ if (dtrace_unregister(lx_systrace_id) != 0)
+ return (DDI_FAILURE);
+
+ /*
+ * Free tables.
+ */
+ kmem_free(lx_systrace_sysent32, lx_systrace_nsysent32 *
+ sizeof (lx_systrace_sysent_t));
+ lx_systrace_sysent32 = NULL;
+ lx_systrace_nsysent32 = 0;
+
+#if defined(_LP64)
+ kmem_free(lx_systrace_sysent64, lx_systrace_nsysent64 *
+ sizeof (lx_systrace_sysent_t));
+ lx_systrace_sysent64 = NULL;
+ lx_systrace_nsysent64 = 0;
+#endif
+
+ /*
+ * Reset probe triggers.
+ */
+ lx_systrace_entry_ptr = NULL;
+ lx_systrace_return_ptr = NULL;
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+lx_systrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
+{
+ return (0);
+}
+
+static struct cb_ops lx_systrace_cb_ops = {
+ lx_systrace_open, /* open */
+ nodev, /* close */
+ nulldev, /* strategy */
+ nulldev, /* print */
+ nodev, /* dump */
+ nodev, /* read */
+ nodev, /* write */
+ nodev, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ nochpoll, /* poll */
+ ddi_prop_op, /* cb_prop_op */
+ 0, /* streamtab */
+ D_NEW | D_MP /* Driver compatibility flag */
+};
+
+static struct dev_ops lx_systrace_ops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* refcnt */
+ ddi_getinfo_1to1, /* get_dev_info */
+ nulldev, /* identify */
+ nulldev, /* probe */
+ lx_systrace_attach, /* attach */
+ lx_systrace_detach, /* detach */
+ nodev, /* reset */
+ &lx_systrace_cb_ops, /* driver operations */
+ NULL, /* bus operations */
+ nodev, /* dev power */
+ ddi_quiesce_not_needed, /* quiesce */
+};
+
+/*
+ * Module linkage information for the kernel.
+ */
+static struct modldrv modldrv = {
+ &mod_driverops, /* module type (this is a pseudo driver) */
+ "Linux Brand System Call Tracing", /* name of module */
+ &lx_systrace_ops /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ (void *)&modldrv,
+ NULL
+};
+
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ return (mod_remove(&modlinkage));
+}
diff --git a/usr/src/uts/common/brand/lx/dtrace/lx_systrace.conf b/usr/src/uts/common/brand/lx/dtrace/lx_systrace.conf
new file mode 100644
index 0000000000..e4499c8a5b
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/dtrace/lx_systrace.conf
@@ -0,0 +1,27 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+
+name="lx_systrace" parent="pseudo" instance=0;
diff --git a/usr/src/uts/common/brand/lx/io/lx_netlink.c b/usr/src/uts/common/brand/lx/io/lx_netlink.c
new file mode 100644
index 0000000000..6fec9ef4cb
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/io/lx_netlink.c
@@ -0,0 +1,1684 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * Compatibility for the Linux netlink(7) kernel/user transport, as well as
+ * for in-kernel netlink(7) providers like rtnetlink(7). See RFC 3549 for
+ * details of the protocol, and the Linux man pages for details of the Linux
+ * implementation that we're mimicking.
+ */
+
+#include <sys/strsubr.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+#include <sys/strsun.h>
+#include <sys/tihdr.h>
+#include <sys/sockio.h>
+#include <sys/brand.h>
+#include <sys/debug.h>
+#include <sys/ucred.h>
+#include <inet/ip.h>
+#include <inet/ip6.h>
+#include <inet/ip_impl.h>
+#include <inet/ip_ire.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_socket.h>
+#include <sys/ethernet.h>
+#include <sys/dlpi.h>
+#include <sys/policy.h>
+
+/*
+ * Flags in netlink header
+ */
+#define LX_NETLINK_NLM_F_REQUEST 1
+#define LX_NETLINK_NLM_F_MULTI 2
+#define LX_NETLINK_NLM_F_ACK 4
+#define LX_NETLINK_NLM_F_ECHO 8
+#define LX_NETLINK_NLM_F_DUMP_INTR 16
+#define LX_NETLINK_NLM_F_ROOT 0x100
+#define LX_NETLINK_NLM_F_MATCH 0x200
+#define LX_NETLINK_NLM_F_ATOMIC 0x400
+
+/*
+ * Generic message type constants
+ */
+#define LX_NETLINK_NLMSG_NONE 0
+#define LX_NETLINK_NLMSG_NOOP 1
+#define LX_NETLINK_NLMSG_ERROR 2
+#define LX_NETLINK_NLMSG_DONE 3
+#define LX_NETLINK_NLMSG_OVERRUN 4
+
+/*
+ * Protocol constants.
+ */
+#define LX_NETLINK_ROUTE 0
+#define LX_NETLINK_UNUSED 1
+#define LX_NETLINK_USERSOCK 2
+#define LX_NETLINK_FIREWALL 3
+#define LX_NETLINK_SOCK_DIAG 4
+#define LX_NETLINK_NFLOG 5
+#define LX_NETLINK_XFRM 6
+#define LX_NETLINK_SELINUX 7
+#define LX_NETLINK_ISCSI 8
+#define LX_NETLINK_AUDIT 9
+#define LX_NETLINK_FIB_LOOKUP 10
+#define LX_NETLINK_CONNECTOR 11
+#define LX_NETLINK_NETFILTER 12
+#define LX_NETLINK_IP6_FW 13
+#define LX_NETLINK_DNRTMSG 14
+#define LX_NETLINK_KOBJECT_UEVENT 15
+#define LX_NETLINK_GENERIC 16
+#define LX_NETLINK_SCSITRANSPORT 18
+#define LX_NETLINK_ECRYPTFS 19
+#define LX_NETLINK_RDMA 20
+#define LX_NETLINK_CRYPTO 21
+
+/*
+ * rtnetlink(7) attribute-related constants
+ */
+#define LX_NETLINK_NLA_ALIGNTO 4
+
+#define LX_NETLINK_RTM_NEWLINK 16
+#define LX_NETLINK_RTM_DELLINK 17
+#define LX_NETLINK_RTM_GETLINK 18
+#define LX_NETLINK_RTM_SETLINK 19
+#define LX_NETLINK_RTM_NEWADDR 20
+#define LX_NETLINK_RTM_DELADDR 21
+#define LX_NETLINK_RTM_GETADDR 22
+#define LX_NETLINK_RTM_NEWROUTE 24
+#define LX_NETLINK_RTM_DELROUTE 25
+#define LX_NETLINK_RTM_GETROUTE 26
+#define LX_NETLINK_RTM_NEWNEIGH 28
+#define LX_NETLINK_RTM_DELNEIGH 29
+#define LX_NETLINK_RTM_GETNEIGH 30
+#define LX_NETLINK_RTM_NEWRULE 32
+#define LX_NETLINK_RTM_DELRULE 33
+#define LX_NETLINK_RTM_GETRULE 34
+#define LX_NETLINK_RTM_NEWQDISC 36
+#define LX_NETLINK_RTM_DELQDISC 37
+#define LX_NETLINK_RTM_GETQDISC 38
+#define LX_NETLINK_RTM_NEWTCLASS 40
+#define LX_NETLINK_RTM_DELTCLASS 41
+#define LX_NETLINK_RTM_GETTCLASS 42
+#define LX_NETLINK_RTM_NEWTFILTER 44
+#define LX_NETLINK_RTM_DELTFILTER 45
+#define LX_NETLINK_RTM_GETTFILTER 46
+#define LX_NETLINK_RTM_NEWACTION 48
+#define LX_NETLINK_RTM_DELACTION 49
+#define LX_NETLINK_RTM_GETACTION 50
+#define LX_NETLINK_RTM_NEWPREFIX 52
+#define LX_NETLINK_RTM_GETMULTICAST 58
+#define LX_NETLINK_RTM_GETANYCAST 62
+#define LX_NETLINK_RTM_NEWNEIGHTBL 64
+#define LX_NETLINK_RTM_GETNEIGHTBL 66
+#define LX_NETLINK_RTM_SETNEIGHTBL 67
+#define LX_NETLINK_RTM_NEWNDUSEROPT 68
+#define LX_NETLINK_RTM_NEWADDRLABEL 72
+#define LX_NETLINK_RTM_DELADDRLABEL 73
+#define LX_NETLINK_RTM_GETADDRLABEL 74
+#define LX_NETLINK_RTM_GETDCB 78
+#define LX_NETLINK_RTM_SETDCB 79
+#define LX_NETLINK_RTM_NEWNETCONF 80
+#define LX_NETLINK_RTM_GETNETCONF 82
+#define LX_NETLINK_RTM_NEWMDB 84
+#define LX_NETLINK_RTM_DELMDB 85
+#define LX_NETLINK_RTM_GETMDB 86
+#define LX_NETLINK_RTM_MAX 87
+
+/*
+ * rtnetlink(7) attribute constants
+ */
+#define LX_NETLINK_RTA_UNSPEC 0
+#define LX_NETLINK_RTA_DST 1
+#define LX_NETLINK_RTA_SRC 2
+#define LX_NETLINK_RTA_IIF 3
+#define LX_NETLINK_RTA_OIF 4
+#define LX_NETLINK_RTA_GATEWAY 5
+#define LX_NETLINK_RTA_PRIORITY 6
+#define LX_NETLINK_RTA_PREFSRC 7
+#define LX_NETLINK_RTA_METRICS 8
+#define LX_NETLINK_RTA_MULTIPATH 9
+#define LX_NETLINK_RTA_PROTOINFO 10
+#define LX_NETLINK_RTA_FLOW 11
+#define LX_NETLINK_RTA_CACHEINFO 12
+#define LX_NETLINK_RTA_SESSION 13
+#define LX_NETLINK_RTA_MP_ALGO 14
+#define LX_NETLINK_RTA_TABLE 15
+#define LX_NETLINK_RTA_MARK 16
+#define LX_NETLINK_RTA_MFC_STATS 17
+#define LX_NETLINK_MAX_RTA LX_NETLINK_RTA_MFC_STATS
+
+/*
+ * rtnetlink(7) NEWLINK/DELLINK/GETLINK constants
+ */
+#define LX_NETLINK_IFLA_UNSPEC 0
+#define LX_NETLINK_IFLA_ADDRESS 1
+#define LX_NETLINK_IFLA_BROADCAST 2
+#define LX_NETLINK_IFLA_IFNAME 3
+#define LX_NETLINK_IFLA_MTU 4
+#define LX_NETLINK_IFLA_LINK 5
+#define LX_NETLINK_IFLA_QDISC 6
+#define LX_NETLINK_IFLA_STATS 7
+#define LX_NETLINK_IFLA_COST 8
+#define LX_NETLINK_IFLA_PRIORITY 9
+#define LX_NETLINK_IFLA_MASTER 10
+#define LX_NETLINK_IFLA_WIRELESS 11
+#define LX_NETLINK_IFLA_PROTINFO 12
+#define LX_NETLINK_IFLA_TXQLEN 13
+#define LX_NETLINK_IFLA_MAP 14
+#define LX_NETLINK_IFLA_WEIGHT 15
+#define LX_NETLINK_IFLA_OPERSTATE 16
+#define LX_NETLINK_IFLA_LINKMODE 17
+#define LX_NETLINK_IFLA_LINKINFO 18
+#define LX_NETLINK_IFLA_NET_NS_PID 19
+#define LX_NETLINK_IFLA_IFALIAS 20
+#define LX_NETLINK_IFLA_NUM_VF 21
+#define LX_NETLINK_IFLA_VFINFO_LIST 22
+#define LX_NETLINK_IFLA_STATS64 23
+#define LX_NETLINK_IFLA_VF_PORTS 24
+#define LX_NETLINK_IFLA_PORT_SELF 25
+#define LX_NETLINK_IFLA_AF_SPEC 26
+#define LX_NETLINK_IFLA_GROUP 27
+#define LX_NETLINK_IFLA_NET_NS_FD 28
+#define LX_NETLINK_IFLA_EXT_MASK 29
+#define LX_NETLINK_IFLA_PROMISCUITY 30
+#define LX_NETLINK_IFLA_NUM_TX_QUEUES 31
+#define LX_NETLINK_IFLA_NUM_RX_QUEUES 32
+#define LX_NETLINK_IFLA_CARRIER 33
+#define LX_NETLINK_IFLA_PHYS_PORT_ID 34
+#define LX_NETLINK_IFLA_CARRIER_CHANGES 35
+#define LX_NETLINK_IFLA_MAX 36
+
+/*
+ * rtnetlink(7) NEWADDR/DELADDR/GETADDR constants
+ */
+#define LX_NETLINK_IFA_UNSPEC 0
+#define LX_NETLINK_IFA_ADDRESS 1
+#define LX_NETLINK_IFA_LOCAL 2
+#define LX_NETLINK_IFA_LABEL 3
+#define LX_NETLINK_IFA_BROADCAST 4
+#define LX_NETLINK_IFA_ANYCAST 5
+#define LX_NETLINK_IFA_CACHEINFO 6
+#define LX_NETLINK_IFA_MULTICAST 7
+#define LX_NETLINK_IFA_FLAGS 8
+#define LX_NETLINK_IFA_MAX 9
+
+#define LX_NETLINK_IFA_F_SECONDARY 0x01
+#define LX_NETLINK_IFA_F_TEMPORARY LX_NETLINK_IFA_F_SECONDARY
+#define LX_NETLINK_IFA_F_NODAD 0x02
+#define LX_NETLINK_IFA_F_OPTIMISTIC 0x04
+#define LX_NETLINK_IFA_F_DADFAILED 0x08
+#define LX_NETLINK_IFA_F_HOMEADDRESS 0x10
+#define LX_NETLINK_IFA_F_DEPRECATED 0x20
+#define LX_NETLINK_IFA_F_TENTATIVE 0x40
+#define LX_NETLINK_IFA_F_PERMANENT 0x80
+#define LX_NETLINK_IFA_F_MANAGETEMPADDR 0x100
+#define LX_NETLINK_IFA_F_NOPREFIXROUTE 0x200
+
+/*
+ * Linux interface flags.
+ */
+#define LX_IFF_UP (1<<0)
+#define LX_IFF_BROADCAST (1<<1)
+#define LX_IFF_DEBUG (1<<2)
+#define LX_IFF_LOOPBACK (1<<3)
+#define LX_IFF_POINTOPOINT (1<<4)
+#define LX_IFF_NOTRAILERS (1<<5)
+#define LX_IFF_RUNNING (1<<6)
+#define LX_IFF_NOARP (1<<7)
+#define LX_IFF_PROMISC (1<<8)
+#define LX_IFF_ALLMULTI (1<<9)
+#define LX_IFF_MASTER (1<<10)
+#define LX_IFF_SLAVE (1<<11)
+#define LX_IFF_MULTICAST (1<<12)
+#define LX_IFF_PORTSEL (1<<13)
+#define LX_IFF_AUTOMEDIA (1<<14)
+#define LX_IFF_DYNAMIC (1<<15)
+#define LX_IFF_LOWER_UP (1<<16)
+#define LX_IFF_DORMANT (1<<17)
+#define LX_IFF_ECHO (1<<18)
+
+/* rtm_table */
+#define LX_ROUTE_TABLE_MAIN 254
+
+/* rtm_type */
+#define LX_RTN_UNSPEC 0
+#define LX_RTN_UNICAST 1
+#define LX_RTN_LOCAL 2
+#define LX_RTN_BROADCAST 3
+#define LX_RTN_ANYCAST 4
+#define LX_RTN_MULTICAST 5
+#define LX_RTN_BLACKHOLE 6
+#define LX_RTN_UNREACHABLE 7
+#define LX_RTN_PROHIBIT 8
+#define LX_RTN_THROW 9
+#define LX_RTN_NAT 10
+#define LX_RTN_XRESOLVE 11
+
+/* rtm_protocol */
+#define LX_RTPROT_UNSPEC 0
+#define LX_RTPROT_REDIRECT 1 /* From ICMP redir */
+#define LX_RTPROT_KERNEL 2 /* From kernel */
+#define LX_RTPROT_BOOT 3 /* From boot */
+#define LX_RTPROT_STATIC 4 /* From administrator */
+#define LX_RTPROT_NULL 0xff /* Uninitialized */
+
+/* rtm_scope */
+#define LX_RTSCOPE_UNIVERSE 0
+#define LX_RTSCOPE_SITE 200
+#define LX_RTSCOPE_LINK 253
+#define LX_RTSCOPE_HOST 254
+#define LX_RTSCOPE_NOWHERE 255
+
+
+/*
+ * Netlink sockopts
+ */
+#define SOL_LX_NETLINK 270
+
+#define LX_NETLINK_SO_ADD_MEMBERSHIP 1
+#define LX_NETLINK_SO_DROP_MEMBERSHIP 2
+#define LX_NETLINK_SO_PKTINFO 3
+#define LX_NETLINK_SO_BROADCAST_ERROR 4
+#define LX_NETLINK_SO_NO_ENOBUFS 5
+#define LX_NETLINK_SO_RX_RING 6
+#define LX_NETLINK_SO_TX_RING 7
+
+/* Internal socket flags */
+#define LXNLF_RECVUCRED 0x1
+
+/* nlmsg structure macros */
+#define LXNLMSG_ALIGNTO 4
+#define LXNLMSG_ALIGN(len) \
+ (((len) + LXNLMSG_ALIGNTO - 1) & ~(LXNLMSG_ALIGNTO - 1))
+#define LXNLMSG_HDRLEN \
+ ((int)LXNLMSG_ALIGN(sizeof (lx_netlink_hdr_t)))
+#define LXNLMSG_LENGTH(len) ((len) + NLMSG_HDRLEN)
+#define LXNLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len))
+#define LXNLMSG_DATA(nlh) ((void*)(((char *)nlh) + NLMSG_LENGTH(0)))
+#define LXNLMSG_PAYLOAD(nlh, len) \
+ ((nlh)->nlmsg_len - NLMSG_SPACE((len)))
+
+#define LXATTR_PAYLOAD(lxa) \
+ ((void*)((caddr_t)(lxa) + sizeof (lx_netlink_attr_t)))
+#define LXATTR_HDRLEN LXNLMSG_ALIGN(sizeof (lx_netlink_attr_t))
+#define LXATTR_LEN(len) (LXATTR_HDRLEN + LXNLMSG_ALIGN(len))
+
+typedef struct lx_netlink_hdr {
+ uint32_t lxnh_len; /* length of message */
+ uint16_t lxnh_type; /* type of message */
+ uint16_t lxnh_flags; /* flags */
+ uint32_t lxnh_seq; /* sequence number */
+ uint32_t lxnh_pid; /* sending pid */
+} lx_netlink_hdr_t;
+
+typedef struct lx_netlink_err {
+ lx_netlink_hdr_t lxne_hdr; /* header */
+ int32_t lxne_errno; /* errno */
+ lx_netlink_hdr_t lxne_failed; /* header of err */
+} lx_netlink_err_t;
+
+typedef struct lx_netlink_attr {
+ uint16_t lxna_len; /* length of attribute */
+ uint16_t lxna_type; /* type of attribute */
+} lx_netlink_attr_t;
+
+typedef struct lx_netlink_ifinfomsg {
+ uint8_t lxnl_ifi_family; /* family: AF_UNSPEC */
+ uint8_t lxnl_ifi__pad;
+ uint16_t lxnl_ifi_type; /* device type */
+ uint32_t lxnl_ifi_index; /* interface index */
+ uint32_t lxnl_ifi_flags; /* device flags */
+ uint32_t lxnl_ifi_change; /* unused; must be -1 */
+} lx_netlink_ifinfomsg_t;
+
+typedef struct lx_netlink_ifaddrmsg {
+ uint8_t lxnl_ifa_family; /* address type */
+ uint8_t lxnl_ifa_prefixlen; /* prefix length of address */
+ uint8_t lxnl_ifa_flags; /* address flags */
+ uint8_t lxnl_ifa_scope; /* address scope */
+ uint8_t lxnl_ifa_index; /* interface index */
+} lx_netlink_ifaddrmsg_t;
+
+typedef struct lx_netlink_rtmsg {
+ uint8_t rtm_family; /* route AF */
+ uint8_t rtm_dst_len; /* destination addr length */
+ uint8_t rtm_src_len; /* source addr length */
+ uint8_t rtm_tos; /* TOS filter */
+ uint8_t rtm_table; /* routing table ID */
+ uint8_t rtm_protocol; /* routing protocol */
+ uint8_t rtm_scope;
+ uint8_t rtm_type;
+ uint32_t rtm_flags;
+} lx_netlink_rtmsg_t;
+
+typedef struct lx_netlink_sockaddr {
+ sa_family_t lxnl_family; /* AF_LX_NETLINK */
+ uint16_t lxnl_pad; /* padding */
+ uint32_t lxnl_port; /* port id */
+ uint32_t lxnl_groups; /* multicast groups mask */
+} lx_netlink_sockaddr_t;
+
+typedef struct lx_netlink_sock {
+ struct lx_netlink_sock *lxns_next; /* list of lx_netlink sockets */
+ sock_upcalls_t *lxns_upcalls; /* pointer to socket upcalls */
+ sock_upper_handle_t lxns_uphandle; /* socket upcall handle */
+ ldi_handle_t lxns_iphandle; /* handle to /dev/ip */
+ ldi_handle_t lxns_ip6handle; /* handle to /dev/ip6 */
+ ldi_handle_t lxns_current; /* current ip handle */
+ int lxns_proto; /* protocol */
+ uint32_t lxns_port; /* port identifier */
+ uint32_t lxns_groups; /* group subscriptions */
+ uint32_t lxns_bufsize; /* buffer size */
+ uint32_t lxns_flags; /* socket flags */
+} lx_netlink_sock_t;
+
+typedef struct lx_netlink_reply {
+ lx_netlink_hdr_t lxnr_hdr; /* header that we're reply to */
+ lx_netlink_sock_t *lxnr_sock; /* socket */
+ uint32_t lxnr_seq; /* sequence number */
+ uint16_t lxnr_type; /* type of reply */
+ mblk_t *lxnr_mp; /* current mblk */
+ mblk_t *lxnr_err; /* error mblk */
+ mblk_t *lxnr_mp1; /* T_UNITDATA_IND mblk */
+ int lxnr_errno; /* errno, if any */
+} lx_netlink_reply_t;
+
+static lx_netlink_sock_t *lx_netlink_head; /* head of lx_netlink sockets */
+static kmutex_t lx_netlink_lock; /* lock to protect state */
+static ldi_ident_t lx_netlink_ldi; /* LDI handle */
+static int lx_netlink_bufsize = 4096; /* default buffer size */
+static int lx_netlink_flowctrld; /* # of times flow controlled */
+
+/*ARGSUSED*/
+static void
+lx_netlink_activate(sock_lower_handle_t handle,
+ sock_upper_handle_t sock_handle, sock_upcalls_t *sock_upcalls,
+ int flags, cred_t *cr)
+{
+ lx_netlink_sock_t *lxsock = (lx_netlink_sock_t *)handle;
+ struct sock_proto_props sopp;
+
+ sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT |
+ SOCKOPT_RCVLOWAT | SOCKOPT_MAXADDRLEN | SOCKOPT_MAXPSZ |
+ SOCKOPT_MAXBLK | SOCKOPT_MINPSZ;
+ sopp.sopp_wroff = 0;
+ sopp.sopp_rxhiwat = SOCKET_RECVHIWATER;
+ sopp.sopp_rxlowat = SOCKET_RECVLOWATER;
+ sopp.sopp_maxaddrlen = sizeof (struct sockaddr_dl);
+ sopp.sopp_maxpsz = INFPSZ;
+ sopp.sopp_maxblk = INFPSZ;
+ sopp.sopp_minpsz = 0;
+
+ lxsock->lxns_upcalls = sock_upcalls;
+ lxsock->lxns_uphandle = sock_handle;
+
+ sock_upcalls->su_set_proto_props(sock_handle, &sopp);
+}
+
+/*ARGSUSED*/
+static int
+lx_netlink_setsockopt(sock_lower_handle_t handle, int level,
+ int option_name, const void *optval, socklen_t optlen, struct cred *cr)
+{
+ lx_netlink_sock_t *lxsock = (lx_netlink_sock_t *)handle;
+
+ if (level == SOL_SOCKET && option_name == SO_RECVUCRED) {
+ int *ival;
+ if (optlen != sizeof (int)) {
+ return (EINVAL);
+ }
+ ival = (int *)optval;
+ if (*ival == 0) {
+ lxsock->lxns_flags &= ~LXNLF_RECVUCRED;
+ } else {
+ lxsock->lxns_flags |= LXNLF_RECVUCRED;
+ }
+ return (0);
+ } else if (level == SOL_SOCKET) {
+ /* Punt on the other SOL_SOCKET options */
+ return (0);
+ } else if (level != SOL_LX_NETLINK) {
+ return (EOPNOTSUPP);
+ }
+
+ switch (option_name) {
+ case LX_NETLINK_SO_ADD_MEMBERSHIP:
+ case LX_NETLINK_SO_DROP_MEMBERSHIP:
+ case LX_NETLINK_SO_PKTINFO:
+ case LX_NETLINK_SO_BROADCAST_ERROR:
+ case LX_NETLINK_SO_NO_ENOBUFS:
+ case LX_NETLINK_SO_RX_RING:
+ case LX_NETLINK_SO_TX_RING:
+ /* Blatant lie */
+ return (0);
+ default:
+ return (EINVAL);
+ }
+}
+
+/*ARGSUSED*/
+static int
+lx_netlink_bind(sock_lower_handle_t handle, struct sockaddr *name,
+ socklen_t namelen, struct cred *cr)
+{
+ lx_netlink_sock_t *lxsock = (lx_netlink_sock_t *)handle;
+ lx_netlink_sockaddr_t *lxsa = (lx_netlink_sockaddr_t *)name;
+
+ if (namelen != sizeof (lx_netlink_sockaddr_t) ||
+ lxsa->lxnl_family != AF_LX_NETLINK) {
+ return (EINVAL);
+ }
+
+
+ if (lxsa->lxnl_groups != 0) {
+ /*
+ * On linux, CAP_NET_ADMIN is needed to bind to netlink groups.
+ * This roughly maps to PRIV_SYS_IP_CONFIG.
+ */
+ if (secpolicy_ip_config(cr, B_FALSE) != 0) {
+ return (EACCES);
+ }
+
+ /* Lie about group subscription for now */
+ lxsock->lxns_groups = lxsa->lxnl_groups;
+ }
+
+ /*
+ * Linux netlink uses nl_port to identify distinct netlink sockets.
+ * Binding to an address of nl_port=0 triggers the kernel to
+ * automatically assign a free nl_port identifier. Originally,
+ * consumers of lx_netlink were required to bind with that automatic
+ * address. We now support non-zero values for nl_port although strict
+ * checking to identify conflicts is not performed. Use of the
+ * id_space facility could be a convenient solution, if a need arose.
+ */
+ if (lxsa->lxnl_port == 0) {
+ /*
+ * Because we are not doing conflict detection, there is no
+ * need to expend effort selecting a unique port for automatic
+ * addressing during bind.
+ */
+ lxsock->lxns_port = curproc->p_pid;
+ } else {
+ lxsock->lxns_port = lxsa->lxnl_port;
+ }
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+lx_netlink_getsockname(sock_lower_handle_t handle, struct sockaddr *sa,
+ socklen_t *len, struct cred *cr)
+{
+ lx_netlink_sock_t *lxsock = (lx_netlink_sock_t *)handle;
+ lx_netlink_sockaddr_t *lxsa = (lx_netlink_sockaddr_t *)sa;
+
+ if (*len < sizeof (lx_netlink_sockaddr_t))
+ return (EINVAL);
+
+ lxsa->lxnl_family = AF_LX_NETLINK;
+ lxsa->lxnl_pad = 0;
+ lxsa->lxnl_port = lxsock->lxns_port;
+ lxsa->lxnl_groups = lxsock->lxns_groups;
+
+ *len = sizeof (lx_netlink_sockaddr_t);
+
+ return (0);
+}
+
+static mblk_t *
+lx_netlink_alloc_mp1(lx_netlink_sock_t *lxsock)
+{
+ mblk_t *mp;
+ size_t size;
+ struct T_unitdata_ind *tunit;
+ lx_netlink_sockaddr_t *lxsa;
+ boolean_t send_ucred;
+
+ /*
+ * Certain netlink clients (such as systemd) will set SO_RECVUCRED
+ * (via the Linux SCM_CREDENTIALS) on the expectation that all replies
+ * will contain credentials passed via cmsg. They require this to
+ * authenticate those messages as having originated in the kernel by
+ * checking uc_pid == 0.
+ */
+ VERIFY(lxsock != NULL);
+ send_ucred = ((lxsock->lxns_flags & LXNLF_RECVUCRED) != 0);
+
+ /*
+ * Message structure:
+ * +----------------------------+
+ * | struct T_unit_data_ind |
+ * +----------------------------+
+ * | lx_netlink_sockaddr_t |
+ * +----------------------------+ -+
+ * | struct cmsghdr (SCM_UCRED) | |
+ * +----------------------------+ +-(optional)
+ * | struct ucred_s (cmsg data) | |
+ * +----------------------------+ -+
+ */
+ size = sizeof (*tunit) + sizeof (*lxsa);
+ if (send_ucred) {
+ size += sizeof (struct cmsghdr) +
+ ROUNDUP_cmsglen(sizeof (struct ucred_s));
+ }
+ mp = allocb(size, 0);
+ if (mp == NULL) {
+ return (NULL);
+ }
+
+ tunit = (struct T_unitdata_ind *)mp->b_rptr;
+ lxsa = (lx_netlink_sockaddr_t *)((caddr_t)tunit + sizeof (*tunit));
+ mp->b_wptr += size;
+
+ mp->b_datap->db_type = M_PROTO;
+ tunit->PRIM_type = T_UNITDATA_IND;
+ tunit->SRC_length = sizeof (*lxsa);
+ tunit->SRC_offset = (caddr_t)lxsa - (caddr_t)mp->b_rptr;
+
+ lxsa->lxnl_family = AF_LX_NETLINK;
+ lxsa->lxnl_port = 0;
+ lxsa->lxnl_groups = 0;
+ lxsa->lxnl_pad = 0;
+
+ if (send_ucred) {
+ struct cmsghdr *cmsg;
+ struct ucred_s *ucred;
+
+ cmsg = (struct cmsghdr *)((caddr_t)lxsa + sizeof (*lxsa));
+ ucred = (struct ucred_s *)CMSG_CONTENT(cmsg);
+ cmsg->cmsg_len = sizeof (*cmsg) + sizeof (*ucred);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_UCRED;
+ bzero(ucred, sizeof (*ucred));
+ ucred->uc_size = sizeof (*ucred);
+ ucred->uc_zoneid = getzoneid();
+
+ tunit->OPT_length = sizeof (*cmsg) +
+ ROUNDUP_cmsglen(sizeof (*ucred));
+ tunit->OPT_offset = (caddr_t)cmsg - (caddr_t)mp->b_rptr;
+ } else {
+ tunit->OPT_length = 0;
+ tunit->OPT_offset = 0;
+ }
+
+ return (mp);
+}
+
+static lx_netlink_reply_t *
+lx_netlink_reply(lx_netlink_sock_t *lxsock,
+ lx_netlink_hdr_t *hdr, uint16_t type)
+{
+ lx_netlink_reply_t *reply;
+ mblk_t *err, *mp1;
+
+ /*
+ * We always allocate an error block to assure that even if subsequent
+ * allocations fail, we can return an error.
+ */
+ if ((err = allocb(sizeof (lx_netlink_err_t), 0)) == NULL)
+ return (NULL);
+
+ if ((mp1 = lx_netlink_alloc_mp1(lxsock)) == NULL) {
+ freeb(err);
+ return (NULL);
+ }
+
+ reply = kmem_zalloc(sizeof (lx_netlink_reply_t), KM_SLEEP);
+ reply->lxnr_err = err;
+ reply->lxnr_sock = lxsock;
+ reply->lxnr_hdr = *hdr;
+ reply->lxnr_type = type;
+ reply->lxnr_mp1 = mp1;
+
+ return (reply);
+}
+
+static void
+lx_netlink_reply_add(lx_netlink_reply_t *reply, void *payload, uint32_t size)
+{
+ lx_netlink_hdr_t *hdr;
+ lx_netlink_sock_t *lxsock = reply->lxnr_sock;
+ uint32_t aligned;
+ mblk_t *mp = reply->lxnr_mp;
+
+ if (reply->lxnr_errno)
+ return;
+
+ aligned = LXNLMSG_ALIGN(size);
+ hdr = (lx_netlink_hdr_t *)mp->b_rptr;
+
+ if (hdr->lxnh_len + aligned > lxsock->lxns_bufsize) {
+ reply->lxnr_errno = E2BIG;
+ return;
+ }
+
+ bcopy(payload, mp->b_wptr, size);
+ hdr->lxnh_len += aligned;
+ mp->b_wptr += aligned;
+}
+
+static void
+lx_netlink_reply_msg(lx_netlink_reply_t *reply, void *payload, uint32_t size)
+{
+ lx_netlink_hdr_t *hdr;
+ lx_netlink_sock_t *lxsock = reply->lxnr_sock;
+ mblk_t *mp;
+
+ if (reply->lxnr_errno)
+ return;
+
+ VERIFY(reply->lxnr_mp == NULL);
+
+ if ((reply->lxnr_mp = mp = allocb(lxsock->lxns_bufsize, 0)) == NULL) {
+ reply->lxnr_errno = ENOMEM;
+ return;
+ }
+
+ bzero(mp->b_rptr, lxsock->lxns_bufsize);
+ hdr = (lx_netlink_hdr_t *)mp->b_rptr;
+ hdr->lxnh_flags = LX_NETLINK_NLM_F_MULTI;
+ hdr->lxnh_len = LXNLMSG_ALIGN(sizeof (lx_netlink_hdr_t));
+ hdr->lxnh_seq = reply->lxnr_hdr.lxnh_seq;
+ hdr->lxnh_pid = lxsock->lxns_port;
+
+ mp->b_wptr += LXNLMSG_ALIGN(sizeof (lx_netlink_hdr_t));
+
+ if (payload == NULL) {
+ /*
+ * A NULL payload denotes a "done" message.
+ */
+ hdr->lxnh_type = LX_NETLINK_NLMSG_DONE;
+ } else {
+ hdr->lxnh_type = reply->lxnr_type;
+ lx_netlink_reply_add(reply, payload, size);
+ }
+}
+
+static void
+lx_netlink_reply_attr(lx_netlink_reply_t *reply, uint16_t type,
+ void *payload, uint32_t size)
+{
+ lx_netlink_attr_t attr;
+
+ attr.lxna_len = size + sizeof (lx_netlink_attr_t);
+ attr.lxna_type = type;
+
+ lx_netlink_reply_add(reply, &attr, sizeof (attr));
+ lx_netlink_reply_add(reply, payload, size);
+}
+
+static void
+lx_netlink_reply_attr_string(lx_netlink_reply_t *reply,
+ uint16_t type, const char *str)
+{
+ lx_netlink_reply_attr(reply, type, (void *)str, strlen(str) + 1);
+}
+
+static void
+lx_netlink_reply_attr_int32(lx_netlink_reply_t *reply,
+ uint16_t type, int32_t val)
+{
+ int32_t v = val;
+
+ lx_netlink_reply_attr(reply, type, &v, sizeof (int32_t));
+}
+
+static int
+lx_netlink_reply_ioctl(lx_netlink_reply_t *reply, int cmd, void *arg)
+{
+ int rval;
+
+ if (reply->lxnr_errno != 0)
+ return (reply->lxnr_errno);
+
+ if ((rval = ldi_ioctl(reply->lxnr_sock->lxns_current,
+ cmd, (intptr_t)arg, FKIOCTL, kcred, NULL)) != 0) {
+ reply->lxnr_errno = rval;
+ }
+
+ return (rval);
+}
+
+static void
+lx_netlink_reply_sendup(lx_netlink_reply_t *reply, mblk_t *mp, mblk_t *mp1)
+{
+ lx_netlink_sock_t *lxsock = reply->lxnr_sock;
+ int error;
+
+ /*
+ * To prevent the stream head from coalescing messages and to indicate
+ * their origin, we send them as T_UNITDATA_IND messages, not as raw
+ * M_DATA.
+ */
+ mp1->b_cont = mp;
+
+ lxsock->lxns_upcalls->su_recv(lxsock->lxns_uphandle, mp1,
+ msgdsize(mp1), 0, &error, NULL);
+
+ if (error != 0)
+ lx_netlink_flowctrld++;
+}
+
+static void
+lx_netlink_reply_send(lx_netlink_reply_t *reply)
+{
+ mblk_t *mp1;
+
+ if (reply->lxnr_errno)
+ return;
+
+ if ((mp1 = lx_netlink_alloc_mp1(reply->lxnr_sock)) == NULL) {
+ reply->lxnr_errno = ENOMEM;
+ return;
+ }
+
+ lx_netlink_reply_sendup(reply, reply->lxnr_mp, mp1);
+ reply->lxnr_mp = NULL;
+}
+
+static void
+lx_netlink_reply_done(lx_netlink_reply_t *reply)
+{
+ lx_netlink_sock_t *lxsock = reply->lxnr_sock;
+ mblk_t *mp;
+
+ /*
+ * Denote that we're done via a message with a NULL payload.
+ */
+ lx_netlink_reply_msg(reply, NULL, 0);
+
+ if (reply->lxnr_errno) {
+ /*
+ * If anything failed, we'll send up an error message.
+ */
+ lx_netlink_hdr_t *hdr;
+ lx_netlink_err_t *err;
+
+ if (reply->lxnr_mp != NULL) {
+ freeb(reply->lxnr_mp);
+ reply->lxnr_mp = NULL;
+ }
+
+ mp = reply->lxnr_err;
+ VERIFY(mp != NULL);
+ reply->lxnr_err = NULL;
+ err = (lx_netlink_err_t *)mp->b_rptr;
+ hdr = &err->lxne_hdr;
+ mp->b_wptr += sizeof (lx_netlink_err_t);
+
+ err->lxne_failed = reply->lxnr_hdr;
+ err->lxne_errno = reply->lxnr_errno;
+ hdr->lxnh_type = LX_NETLINK_NLMSG_ERROR;
+ hdr->lxnh_seq = reply->lxnr_hdr.lxnh_seq;
+ hdr->lxnh_len = sizeof (lx_netlink_err_t);
+ hdr->lxnh_seq = reply->lxnr_hdr.lxnh_seq;
+ hdr->lxnh_pid = lxsock->lxns_port;
+ } else {
+ mp = reply->lxnr_mp;
+ VERIFY(mp != NULL);
+ reply->lxnr_mp = NULL;
+ }
+
+ lx_netlink_reply_sendup(reply, mp, reply->lxnr_mp1);
+
+ if (reply->lxnr_mp != NULL)
+ freeb(reply->lxnr_mp);
+
+ if (reply->lxnr_err != NULL)
+ freeb(reply->lxnr_err);
+
+ kmem_free(reply, sizeof (lx_netlink_reply_t));
+}
+
+static int
+lx_netlink_reply_error(lx_netlink_sock_t *lxsock,
+ lx_netlink_hdr_t *hdr, int errno)
+{
+ /*
+ * The type of the message doesn't matter, as we're going to explicitly
+ * set lxnr_errno and therefore send only an error message.
+ */
+ lx_netlink_reply_t *reply = lx_netlink_reply(lxsock, hdr, 0);
+
+ if (reply == NULL)
+ return (ENOMEM);
+
+ reply->lxnr_errno = errno;
+ lx_netlink_reply_done(reply);
+
+ return (0);
+}
+
+static int
+lx_netlink_parse_msg_attrs(mblk_t *mp, void **msgp, unsigned int msg_size,
+ lx_netlink_attr_t **attrp, unsigned int *attr_max)
+{
+ lx_netlink_hdr_t *hdr = (lx_netlink_hdr_t *)mp->b_rptr;
+ lx_netlink_attr_t *lxa;
+ unsigned char *buf = mp->b_rptr + LXNLMSG_HDRLEN;
+ unsigned int i;
+ uint32_t buf_left = MBLKL(mp) - LXNLMSG_HDRLEN;
+ uint32_t msg_left = hdr->lxnh_len;
+
+ msg_size = LXNLMSG_ALIGN(msg_size);
+ if (msg_size > buf_left || msg_size > msg_left) {
+ return (-1);
+ }
+
+ *msgp = (void *)buf;
+ buf += msg_size;
+ buf_left -= msg_size;
+ msg_left -= msg_size;
+
+ /* Do not bother with attr parsing if not requested */
+ if (attrp == NULL || *attr_max == 0) {
+ return (0);
+ }
+
+ for (i = 0; i < *attr_max; i++) {
+ if (buf_left < LXATTR_HDRLEN || msg_left < LXATTR_HDRLEN) {
+ break;
+ }
+
+ lxa = (lx_netlink_attr_t *)buf;
+ if (lxa->lxna_len > buf_left || lxa->lxna_len > msg_left) {
+ return (-1);
+ }
+
+ attrp[i] = lxa;
+ buf += lxa->lxna_len;
+ buf_left -= lxa->lxna_len;
+ msg_left -= lxa->lxna_len;
+ }
+ *attr_max = i;
+
+ return (0);
+}
+
+/*
+ * Takes an IPv4 address (in network byte order) and returns the address scope.
+ */
+static uint8_t
+lx_ipv4_rtscope(in_addr_t nbo_addr) {
+ in_addr_t addr = ntohl(nbo_addr);
+ if ((addr >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
+ return (LX_RTSCOPE_HOST);
+ } else if ((addr & IN_AUTOCONF_MASK) == IN_AUTOCONF_NET) {
+ return (LX_RTSCOPE_LINK);
+ } else if ((addr & IN_PRIVATE8_MASK) == IN_PRIVATE8_NET ||
+ (addr & IN_PRIVATE12_MASK) == IN_PRIVATE12_NET ||
+ (addr & IN_PRIVATE16_MASK) == IN_PRIVATE16_NET) {
+ return (LX_RTSCOPE_SITE);
+ } else {
+ return (LX_RTSCOPE_UNIVERSE);
+ }
+}
+
+/*
+ * Takes an IPv6 address and returns the address scope.
+ */
+static uint8_t
+lx_ipv6_rtscope(const in6_addr_t *addr) {
+ if (IN6_ARE_ADDR_EQUAL(addr, &ipv6_loopback)) {
+ return (LX_RTSCOPE_HOST);
+ } else if (IN6_IS_ADDR_LINKLOCAL(addr)) {
+ return (LX_RTSCOPE_LINK);
+ } else if (IN6_IS_ADDR_SITELOCAL(addr)) {
+ return (LX_RTSCOPE_SITE);
+ } else {
+ return (LX_RTSCOPE_UNIVERSE);
+ }
+}
+
+static void
+lx_netlink_getlink_lifreq(lx_netlink_reply_t *reply, struct lifreq *lifr)
+{
+ lx_netlink_ifinfomsg_t ifi;
+ int i;
+ char if_name[IFNAMSIZ];
+ struct sockaddr_dl *sdl;
+ struct sockaddr hwaddr;
+ int hwaddr_size;
+ boolean_t is_loopback;
+
+ struct {
+ int native;
+ int lx;
+ } flags[] = {
+ { IFF_UP, LX_IFF_UP },
+ { IFF_BROADCAST, LX_IFF_BROADCAST },
+ { IFF_DEBUG, LX_IFF_DEBUG },
+ { IFF_LOOPBACK, LX_IFF_LOOPBACK },
+ { IFF_POINTOPOINT, LX_IFF_POINTOPOINT },
+ { IFF_NOTRAILERS, LX_IFF_NOTRAILERS },
+ { IFF_RUNNING, LX_IFF_RUNNING },
+ { IFF_NOARP, LX_IFF_NOARP },
+ { IFF_PROMISC, LX_IFF_PROMISC },
+ { IFF_ALLMULTI, LX_IFF_ALLMULTI },
+ { IFF_MULTICAST, LX_IFF_MULTICAST },
+ { 0 }
+ };
+
+ /*
+ * illumos interfaces that contain a ':' are non-zero logical
+ * interfaces. We should only emit the name of the zeroth logical
+ * interface, since RTM_GETLINK only expects to see the name of
+ * devices. The addresses of all logical devices will be
+ * returned via an RTM_GETADDR.
+ */
+ if (strchr(lifr->lifr_name, ':') != NULL)
+ return;
+
+ /*
+ * Most of the lx_netlink module is architected to emit information in
+ * an illumos-native manner. Socket syscalls such as getsockname will
+ * not translate fields to values Linux programs would expect since
+ * that conversion is performed by the generic socket emulation.
+ *
+ * This is _not_ true of the actual protocol output from lx_netlink.
+ * Since translating it at the socket layer would be onerous, all
+ * output (including constants and names) is pre-translated to values
+ * valid for Linux.
+ */
+
+ bzero(&ifi, sizeof (ifi));
+ ifi.lxnl_ifi_family = AF_UNSPEC;
+ ifi.lxnl_ifi_change = (uint32_t)-1;
+
+ /* Convert the name to be Linux-friendly */
+ (void) strlcpy(if_name, lifr->lifr_name, IFNAMSIZ);
+ lx_ifname_convert(if_name, LX_IF_FROMNATIVE);
+ is_loopback = (strncmp(if_name, "lo", 2) == 0);
+
+ if (lx_netlink_reply_ioctl(reply, SIOCGLIFINDEX, lifr) != 0)
+ return;
+
+ ifi.lxnl_ifi_index = lifr->lifr_index;
+
+ if (lx_netlink_reply_ioctl(reply, SIOCGLIFFLAGS, lifr) != 0)
+ return;
+
+ for (i = 0; flags[i].native; i++) {
+ if (lifr->lifr_flags & flags[i].native)
+ ifi.lxnl_ifi_flags |= flags[i].lx;
+ }
+
+ /*
+ * Query the datalink address.
+ * The interface type will be included in the outgoing infomsg while
+ * the address itself will be output separately.
+ */
+ sdl = (struct sockaddr_dl *)&lifr->lifr_addr;
+ bzero(sdl, sizeof (*sdl));
+ if (!is_loopback) {
+ lx_netlink_reply_ioctl(reply, SIOCGLIFHWADDR, lifr);
+ } else {
+ /* Simulate an empty hwaddr for loopback */
+ sdl->sdl_type = DL_LOOP;
+ sdl->sdl_alen = ETHERADDRL;
+ }
+ lx_stol_hwaddr(sdl, &hwaddr, &hwaddr_size);
+
+ ifi.lxnl_ifi_type = hwaddr.sa_family;
+ lx_netlink_reply_msg(reply, &ifi, sizeof (lx_netlink_ifinfomsg_t));
+
+ lx_netlink_reply_attr_string(reply, LX_NETLINK_IFLA_IFNAME, if_name);
+
+ if (lx_netlink_reply_ioctl(reply, SIOCGLIFMTU, lifr) != 0)
+ return;
+
+ lx_netlink_reply_attr_int32(reply, LX_NETLINK_IFLA_MTU, lifr->lifr_mtu);
+
+ if (hwaddr_size != 0) {
+ lx_netlink_reply_attr(reply, LX_NETLINK_IFLA_ADDRESS,
+ hwaddr.sa_data, hwaddr_size);
+ }
+
+ /* Emulate a txqlen of 1. (0 for loopbacks) */
+ lx_netlink_reply_attr_int32(reply, LX_NETLINK_IFLA_TXQLEN,
+ (is_loopback) ? 0 : 1);
+
+ lx_netlink_reply_send(reply);
+}
+
+static void
+lx_netlink_reply_eachfamily(lx_netlink_reply_t *reply,
+ void (*func)(lx_netlink_reply_t *, struct lifreq *), boolean_t distinct)
+{
+ lx_netlink_sock_t *sock = reply->lxnr_sock;
+ int nlifr, i;
+
+ struct {
+ int family;
+ ldi_handle_t handle;
+ struct lifconf lifc;
+ struct lifnum lifn;
+ } families[] = {
+ { AF_INET, sock->lxns_iphandle },
+ { AF_INET6, sock->lxns_ip6handle },
+ { AF_UNSPEC }
+ }, *family, *check;
+
+ for (family = families; family->family != AF_UNSPEC; family++) {
+ struct lifconf *lifc = &family->lifc;
+ struct lifnum *lifn = &family->lifn;
+
+ lifn->lifn_family = family->family;
+ sock->lxns_current = family->handle;
+
+ if (lx_netlink_reply_ioctl(reply, SIOCGLIFNUM, lifn) != 0)
+ break;
+
+ lifc->lifc_family = lifn->lifn_family;
+ lifc->lifc_flags = 0;
+ lifc->lifc_len = lifn->lifn_count * sizeof (struct lifreq);
+ if (lifn->lifn_count == 0) {
+ lifc->lifc_buf = NULL;
+ continue;
+ }
+ lifc->lifc_buf = kmem_alloc(lifc->lifc_len, KM_SLEEP);
+
+ if (lx_netlink_reply_ioctl(reply, SIOCGLIFCONF, lifc) != 0)
+ break;
+
+ nlifr = lifc->lifc_len / sizeof (lifc->lifc_req[0]);
+
+ for (i = 0; i < nlifr; i++) {
+ if (!distinct) {
+ func(reply, &lifc->lifc_req[i]);
+ continue;
+ }
+
+ /*
+ * If we have been asked to provide each interface
+ * exactly once, we need to (annoyingly) check this
+ * name against others that we've already processed for
+ * other families. Yes, this is quadratic time -- but
+ * the number of interfaces per family is expected to
+ * be very small.
+ */
+ for (check = families; check != family; check++) {
+ struct lifconf *clifc = &check->lifc;
+ int cnlifr = clifc->lifc_len /
+ sizeof (clifc->lifc_req[0]), j;
+ char *nm = lifc->lifc_req[i].lifr_name, *cnm;
+
+ for (j = 0; j < cnlifr; j++) {
+ cnm = clifc->lifc_req[j].lifr_name;
+
+ if (strcmp(nm, cnm) == 0)
+ break;
+ }
+
+ if (j != cnlifr)
+ break;
+ }
+
+ if (check != family)
+ continue;
+
+ func(reply, &lifc->lifc_req[i]);
+ }
+ }
+
+ for (family = families; family->family != AF_UNSPEC; family++) {
+ struct lifconf *lifc = &family->lifc;
+
+ if (lifc->lifc_buf != NULL)
+ kmem_free(lifc->lifc_buf, lifc->lifc_len);
+ }
+}
+
+/*ARGSUSED*/
+static int
+lx_netlink_getlink(lx_netlink_sock_t *lxsock, lx_netlink_hdr_t *hdr, mblk_t *mp)
+{
+ lx_netlink_reply_t *reply;
+
+ reply = lx_netlink_reply(lxsock, hdr, LX_NETLINK_RTM_NEWLINK);
+
+ if (reply == NULL)
+ return (ENOMEM);
+
+ lx_netlink_reply_eachfamily(reply, lx_netlink_getlink_lifreq, B_TRUE);
+ lx_netlink_reply_done(reply);
+
+ return (0);
+}
+
+static void
+lx_netlink_getaddr_lifreq(lx_netlink_reply_t *reply, struct lifreq *lifr)
+{
+ lx_netlink_ifaddrmsg_t ifa;
+
+ bzero(&ifa, sizeof (ifa));
+
+ if (lx_netlink_reply_ioctl(reply, SIOCGLIFINDEX, lifr) != 0)
+ return;
+
+ ifa.lxnl_ifa_index = lifr->lifr_index;
+
+ if (lx_netlink_reply_ioctl(reply, SIOCGLIFFLAGS, lifr) != 0)
+ return;
+
+ /*
+ * Don't report on-link subnets
+ */
+ if ((lifr->lifr_flags & IFF_NOLOCAL) != 0)
+ return;
+
+ if (lx_netlink_reply_ioctl(reply, SIOCGLIFSUBNET, lifr) != 0)
+ return;
+
+ ifa.lxnl_ifa_prefixlen = lifr->lifr_addrlen;
+
+ if (lx_netlink_reply_ioctl(reply, SIOCGLIFADDR, lifr) != 0)
+ return;
+
+ if (lifr->lifr_addr.ss_family == AF_INET) {
+ struct sockaddr_in *sin;
+
+ ifa.lxnl_ifa_family = LX_AF_INET;
+
+ sin = (struct sockaddr_in *)&lifr->lifr_addr;
+ ifa.lxnl_ifa_scope = lx_ipv4_rtscope(
+ sin->sin_addr.s_addr);
+
+ lx_netlink_reply_msg(reply, &ifa,
+ sizeof (lx_netlink_ifaddrmsg_t));
+
+ lx_netlink_reply_attr_int32(reply,
+ LX_NETLINK_IFA_ADDRESS, sin->sin_addr.s_addr);
+ } else {
+ struct sockaddr_in6 *sin;
+
+ ifa.lxnl_ifa_family = LX_AF_INET6;
+
+ sin = (struct sockaddr_in6 *)&lifr->lifr_addr;
+ ifa.lxnl_ifa_scope = lx_ipv6_rtscope(&sin->sin6_addr);
+
+ lx_netlink_reply_msg(reply, &ifa,
+ sizeof (lx_netlink_ifaddrmsg_t));
+
+ lx_netlink_reply_attr(reply, LX_NETLINK_IFA_ADDRESS,
+ &sin->sin6_addr, sizeof (sin->sin6_addr));
+ }
+
+ lx_netlink_reply_send(reply);
+}
+
+/*ARGSUSED*/
+static int
+lx_netlink_getaddr(lx_netlink_sock_t *lxsock, lx_netlink_hdr_t *hdr, mblk_t *mp)
+{
+ lx_netlink_reply_t *reply;
+
+ reply = lx_netlink_reply(lxsock, hdr, LX_NETLINK_RTM_NEWADDR);
+
+ if (reply == NULL)
+ return (ENOMEM);
+
+ lx_netlink_reply_eachfamily(reply, lx_netlink_getaddr_lifreq, B_FALSE);
+ lx_netlink_reply_done(reply);
+
+ return (0);
+}
+
+struct lx_getroute_ctx {
+ lx_netlink_reply_t *lgrtctx_reply;
+ lx_netlink_rtmsg_t *lgrtctx_rtmsg;
+ lx_netlink_attr_t *lgrtctx_attrs[LX_NETLINK_MAX_RTA];
+ unsigned int lgrtctx_max_attr;
+ lx_netlink_attr_t *lgrtctx_rtadst;
+};
+
+static void
+lx_netlink_getroute_ipv4(ire_t *ire, struct lx_getroute_ctx *ctx)
+{
+ lx_netlink_reply_t *reply = ctx->lgrtctx_reply;
+ lx_netlink_rtmsg_t *rtmsg = ctx->lgrtctx_rtmsg;
+ lx_netlink_attr_t *rtadst = ctx->lgrtctx_rtadst;
+ lx_netlink_rtmsg_t res;
+ ill_t *ill = NULL;
+
+ /* Certain IREs are too specific for netlink */
+ if ((ire->ire_type & (IRE_BROADCAST | IRE_MULTICAST | IRE_NOROUTE |
+ IRE_LOOPBACK | IRE_LOCAL)) != 0 || ire->ire_testhidden != 0) {
+ return;
+ }
+ /*
+ * When listing routes, CLONE entries are undesired.
+ * They are required for 'ip route get' on a local address.
+ */
+ if (rtmsg->rtm_dst_len == 0 && (ire->ire_type & IRE_IF_CLONE) != 0) {
+ return;
+ }
+
+ bzero(&res, sizeof (res));
+ res.rtm_family = LX_AF_INET;
+ res.rtm_table = LX_ROUTE_TABLE_MAIN;
+ res.rtm_type = LX_RTN_UNICAST;
+ res.rtm_dst_len = ire->ire_masklen;
+
+ if (ire->ire_type & (IRE_IF_NORESOLVER|IRE_IF_RESOLVER)) {
+ /* Interface-local networks considered kernel-created */
+ res.rtm_protocol = LX_RTPROT_KERNEL;
+ res.rtm_scope = LX_RTSCOPE_LINK;
+ } else if (ire->ire_flags & RTF_STATIC) {
+ res.rtm_protocol = LX_RTPROT_STATIC;
+ }
+
+ if (rtmsg->rtm_dst_len == 0x20 && rtadst != NULL) {
+ /*
+ * SpecifY single-destination route.
+ * RTA_DST details will be added later
+ */
+ res.rtm_dst_len = rtmsg->rtm_dst_len;
+ }
+
+
+ lx_netlink_reply_msg(reply, &res, sizeof (res));
+
+ if (rtmsg->rtm_dst_len == 0x20 && rtadst != NULL) {
+ /* Add RTA_DST details for single-destination route. */
+ lx_netlink_reply_attr(reply, LX_NETLINK_RTA_DST,
+ LXATTR_PAYLOAD(rtadst), sizeof (ipaddr_t));
+ } else if (ire->ire_masklen != 0) {
+ lx_netlink_reply_attr(reply, LX_NETLINK_RTA_DST,
+ &ire->ire_addr, sizeof (ire->ire_addr));
+ }
+
+ if (ire->ire_ill != NULL) {
+ ill = ire->ire_ill;
+ } else if (ire->ire_dep_parent != NULL) {
+ ill = ire->ire_dep_parent->ire_ill;
+ }
+
+ if (ill != NULL) {
+ uint32_t ifindex, addr_src;
+
+ ifindex = ill->ill_phyint->phyint_ifindex;
+ lx_netlink_reply_attr(reply, LX_NETLINK_RTA_OIF,
+ &ifindex, sizeof (ifindex));
+
+ addr_src = ill->ill_ipif->ipif_lcl_addr;
+ lx_netlink_reply_attr(reply, LX_NETLINK_RTA_PREFSRC,
+ &addr_src, sizeof (addr_src));
+ }
+
+ if (ire->ire_flags & RTF_GATEWAY) {
+ lx_netlink_reply_attr(reply, LX_NETLINK_RTA_GATEWAY,
+ &ire->ire_gateway_addr, sizeof (ire->ire_gateway_addr));
+ }
+
+ lx_netlink_reply_send(reply);
+}
+
+/*ARGSUSED*/
+static int
+lx_netlink_getroute(lx_netlink_sock_t *lxsock, lx_netlink_hdr_t *hdr,
+ mblk_t *mp)
+{
+ struct lx_getroute_ctx ctx;
+ lx_netlink_reply_t *reply;
+ lx_netlink_rtmsg_t rtmsg, *rtmsgp;
+ int rtmsg_size = sizeof (rtmsg);
+ netstack_t *ns;
+ int i;
+
+ bzero(&ctx, sizeof (ctx));
+ ctx.lgrtctx_max_attr = LX_NETLINK_MAX_RTA;
+
+ if (lx_netlink_parse_msg_attrs(mp, (void **)&rtmsgp,
+ rtmsg_size, ctx.lgrtctx_attrs, &ctx.lgrtctx_max_attr) != 0) {
+ return (EPROTO);
+ }
+
+ /*
+ * Older version of libnetlink send a truncated rtmsg struct for
+ * certain RTM_GETROUTE queries. We must detect this condition and
+ * truncate our input to prevent later confusion.
+ */
+ if (curproc->p_zone->zone_brand == &lx_brand &&
+ lx_kern_release_cmp(curproc->p_zone, "2.6.32") <= 0 &&
+ rtmsgp->rtm_dst_len == 0) {
+ rtmsg_size = sizeof (rtmsg.rtm_family);
+ }
+ bzero(&rtmsg, sizeof (rtmsg));
+ bcopy(rtmsgp, &rtmsg, rtmsg_size);
+ ctx.lgrtctx_rtmsg = &rtmsg;
+
+ /* If RTA_DST was passed, it effects later decisions */
+ for (i = 0; i < ctx.lgrtctx_max_attr; i++) {
+ lx_netlink_attr_t *attr = ctx.lgrtctx_attrs[i];
+
+ if (attr->lxna_type == LX_NETLINK_RTA_DST &&
+ attr->lxna_len == LXATTR_LEN(sizeof (ipaddr_t))) {
+ ctx.lgrtctx_rtadst = attr;
+ break;
+ }
+ }
+
+ reply = lx_netlink_reply(lxsock, hdr, LX_NETLINK_RTM_NEWROUTE);
+ if (reply == NULL) {
+ return (ENOMEM);
+ }
+ ctx.lgrtctx_reply = reply;
+
+ /* Do not report anything outside the main table */
+ if (rtmsg.rtm_table != LX_ROUTE_TABLE_MAIN &&
+ rtmsg.rtm_table != 0) {
+ lx_netlink_reply_done(reply);
+ return (0);
+ }
+
+ ns = netstack_get_current();
+ if (ns == NULL) {
+ lx_netlink_reply_done(reply);
+ return (0);
+ }
+ if (rtmsg.rtm_family == LX_AF_INET || rtmsg.rtm_family == 0) {
+ if (rtmsg.rtm_dst_len == 0x20 && ctx.lgrtctx_rtadst != NULL) {
+ /* resolve route for host */
+ ipaddr_t *dst = LXATTR_PAYLOAD(ctx.lgrtctx_rtadst);
+ ire_t *ire_dst;
+
+ ire_dst = ire_route_recursive_dstonly_v4(*dst, 0, 0,
+ ns->netstack_ip);
+ lx_netlink_getroute_ipv4(ire_dst, &ctx);
+ ire_refrele(ire_dst);
+ } else {
+ /* get route listing */
+ ire_walk_v4(&lx_netlink_getroute_ipv4, &ctx, ALL_ZONES,
+ ns->netstack_ip);
+ }
+ }
+ if (rtmsg.rtm_family == LX_AF_INET6) {
+ /* punt on ipv6 for now */
+ netstack_rele(ns);
+ lx_netlink_reply_done(reply);
+ return (EPROTO);
+ }
+ netstack_rele(ns);
+
+ lx_netlink_reply_done(reply);
+ return (0);
+}
+
+
+/*ARGSUSED*/
+static int
+lx_netlink_audit(lx_netlink_sock_t *lxsock, lx_netlink_hdr_t *hdr, mblk_t *mp)
+{
+ /*
+ * For all auditing messages, we return ECONNREFUSED, which seems to
+ * keep user-level auditing happy. (Or at least, non-suicidal.)
+ */
+ return (ECONNREFUSED);
+}
+
+/*ARGSUSED*/
+static int
+lx_netlink_kobject_uevent(lx_netlink_sock_t *lxsock,
+ lx_netlink_hdr_t *hdr, mblk_t *mp)
+{
+ /*
+ * For udev, we just silently accept all writes and never actually
+ * reply with anything -- which appears to be sufficient for things
+ * to work.
+ */
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+lx_netlink_send(sock_lower_handle_t handle, mblk_t *mp,
+ struct nmsghdr *msg, cred_t *cr)
+{
+ lx_netlink_sock_t *lxsock = (lx_netlink_sock_t *)handle;
+ lx_netlink_hdr_t *hdr = (lx_netlink_hdr_t *)mp->b_rptr;
+ int i, rval;
+
+ static struct {
+ int proto;
+ uint16_t type;
+ int (*func)(lx_netlink_sock_t *, lx_netlink_hdr_t *, mblk_t *);
+ } handlers[] = {
+ { LX_NETLINK_ROUTE,
+ LX_NETLINK_RTM_GETLINK, lx_netlink_getlink },
+ { LX_NETLINK_ROUTE,
+ LX_NETLINK_RTM_GETADDR, lx_netlink_getaddr },
+ { LX_NETLINK_ROUTE,
+ LX_NETLINK_RTM_GETROUTE, lx_netlink_getroute },
+ { LX_NETLINK_AUDIT,
+ LX_NETLINK_NLMSG_NONE, lx_netlink_audit },
+ { LX_NETLINK_KOBJECT_UEVENT,
+ LX_NETLINK_NLMSG_NONE, lx_netlink_kobject_uevent },
+ { LX_NETLINK_NLMSG_NOOP, LX_NETLINK_NLMSG_NONE, NULL }
+ };
+
+ if (DB_TYPE(mp) != M_DATA || MBLKL(mp) < sizeof (lx_netlink_hdr_t)) {
+ freemsg(mp);
+ return (EPROTO);
+ }
+
+ for (i = 0; handlers[i].func != NULL; i++) {
+ if (lxsock->lxns_proto != handlers[i].proto)
+ continue;
+
+ if (handlers[i].type != LX_NETLINK_NLMSG_NONE &&
+ hdr->lxnh_type != handlers[i].type)
+ continue;
+
+ rval = handlers[i].func(lxsock, hdr, mp);
+ freemsg(mp);
+
+ return (rval);
+ }
+
+ /*
+ * An unrecognized message. We will bounce up an EOPNOTSUPP reply.
+ */
+ rval = lx_netlink_reply_error(lxsock, hdr, EOPNOTSUPP);
+ freemsg(mp);
+
+ return (rval);
+}
+
+/*ARGSUSED*/
+static int
+lx_netlink_close(sock_lower_handle_t handle, int flags, cred_t *cr)
+{
+ lx_netlink_sock_t *lxsock = (lx_netlink_sock_t *)handle, *sock, **prev;
+
+ mutex_enter(&lx_netlink_lock);
+
+ prev = &lx_netlink_head;
+
+ for (sock = *prev; sock != lxsock; sock = sock->lxns_next)
+ prev = &sock->lxns_next;
+
+ *prev = sock->lxns_next;
+
+ mutex_exit(&lx_netlink_lock);
+
+ (void) ldi_close(lxsock->lxns_iphandle, FREAD, kcred);
+ (void) ldi_close(lxsock->lxns_ip6handle, FREAD, kcred);
+ kmem_free(lxsock, sizeof (lx_netlink_sock_t));
+
+ return (0);
+}
+
+static sock_downcalls_t sock_lx_netlink_downcalls = {
+ lx_netlink_activate, /* sd_activate */
+ sock_accept_notsupp, /* sd_accept */
+ lx_netlink_bind, /* sd_bind */
+ sock_listen_notsupp, /* sd_listen */
+ sock_connect_notsupp, /* sd_connect */
+ sock_getpeername_notsupp, /* sd_getpeername */
+ lx_netlink_getsockname, /* sd_getsockname */
+ sock_getsockopt_notsupp, /* sd_getsockopt */
+ lx_netlink_setsockopt, /* sd_setsockopt */
+ lx_netlink_send, /* sd_send */
+ NULL, /* sd_send_uio */
+ NULL, /* sd_recv_uio */
+ NULL, /* sd_poll */
+ sock_shutdown_notsupp, /* sd_shutdown */
+ sock_clr_flowctrl_notsupp, /* sd_setflowctrl */
+ sock_ioctl_notsupp, /* sd_ioctl */
+ lx_netlink_close /* sd_close */
+};
+
+/*ARGSUSED*/
+static sock_lower_handle_t
+lx_netlink_create(int family, int type, int proto,
+ sock_downcalls_t **sock_downcalls, uint_t *smodep, int *errorp,
+ int flags, cred_t *credp)
+{
+ lx_netlink_sock_t *lxsock;
+ ldi_handle_t handle, handle6;
+ cred_t *kcred = zone_kcred();
+ int err;
+
+ if (family != AF_LX_NETLINK ||
+ (type != SOCK_DGRAM && type != SOCK_RAW)) {
+ *errorp = EPROTONOSUPPORT;
+ return (NULL);
+ }
+
+ switch (proto) {
+ case LX_NETLINK_ROUTE:
+ case LX_NETLINK_AUDIT:
+ case LX_NETLINK_KOBJECT_UEVENT:
+ break;
+
+ default:
+ *errorp = EPROTONOSUPPORT;
+ return (NULL);
+ }
+
+ if ((err = ldi_open_by_name(DEV_IP, FREAD, kcred,
+ &handle, lx_netlink_ldi)) != 0) {
+ *errorp = err;
+ return (NULL);
+ }
+
+ if ((err = ldi_open_by_name(DEV_IP6, FREAD, kcred,
+ &handle6, lx_netlink_ldi)) != 0) {
+ (void) ldi_close(handle, FREAD, kcred);
+ *errorp = err;
+ return (NULL);
+ }
+
+ *sock_downcalls = &sock_lx_netlink_downcalls;
+ *smodep = SM_ATOMIC;
+
+ lxsock = kmem_zalloc(sizeof (lx_netlink_sock_t), KM_SLEEP);
+ lxsock->lxns_iphandle = handle;
+ lxsock->lxns_ip6handle = handle6;
+ lxsock->lxns_bufsize = lx_netlink_bufsize;
+ lxsock->lxns_proto = proto;
+
+ mutex_enter(&lx_netlink_lock);
+
+ lxsock->lxns_next = lx_netlink_head;
+ lx_netlink_head = lxsock;
+
+ mutex_exit(&lx_netlink_lock);
+
+ return ((sock_lower_handle_t)lxsock);
+}
+
+static void
+lx_netlink_init(void)
+{
+ major_t major = mod_name_to_major("ip");
+ int err;
+
+ VERIFY(major != DDI_MAJOR_T_NONE);
+
+ err = ldi_ident_from_major(major, &lx_netlink_ldi);
+ VERIFY(err == 0);
+}
+
+static void
+lx_netlink_fini(void)
+{
+ ldi_ident_release(lx_netlink_ldi);
+}
+
+static smod_reg_t sinfo = {
+ SOCKMOD_VERSION,
+ "lx_netlink",
+ SOCK_UC_VERSION,
+ SOCK_DC_VERSION,
+ lx_netlink_create,
+ NULL
+};
+
+/* modldrv structure */
+static struct modlsockmod sockmod = {
+ &mod_sockmodops, "AF_LX_NETLINK socket module", &sinfo
+};
+
+/* modlinkage structure */
+static struct modlinkage ml = {
+ MODREV_1,
+ &sockmod,
+ NULL
+};
+
+int
+_init(void)
+{
+ int err;
+
+ lx_netlink_init();
+
+ if ((err = mod_install(&ml)) != 0)
+ lx_netlink_fini();
+
+ return (err);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&ml, modinfop));
+}
+
+int
+_fini(void)
+{
+ int err = 0;
+
+ mutex_enter(&lx_netlink_lock);
+
+ if (lx_netlink_head != NULL)
+ err = EBUSY;
+
+ mutex_exit(&lx_netlink_lock);
+
+ if (err == 0 && (err = mod_remove(&ml)) == 0)
+ lx_netlink_fini();
+
+ return (err);
+}
diff --git a/usr/src/uts/common/brand/lx/io/lx_ptm.c b/usr/src/uts/common/brand/lx/io/lx_ptm.c
new file mode 100644
index 0000000000..23e0c6f459
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/io/lx_ptm.c
@@ -0,0 +1,1188 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright 2016 Joyent, Inc. All rights reserved.
+ */
+
+
+/*
+ * This driver attempts to emulate some of the the behaviors of
+ * Linux terminal devices (/dev/ptmx and /dev/pts/[0-9][0-9]*) on Solaris
+ *
+ * It does this by layering over the /dev/ptmx device and intercepting
+ * opens to it.
+ *
+ * This driver makes the following assumptions about the way the ptm/pts
+ * drivers on Solaris work:
+ *
+ * - all opens of the /dev/ptmx device node return a unique dev_t.
+ *
+ * - the dev_t minor node value for each open ptm instance corrospondes
+ * to it's associated slave terminal device number. ie. the path to
+ * the slave terminal device associated with an open ptm instance
+ * who's dev_t minor node vaue is 5, is /dev/pts/5.
+ *
+ * - the ptm driver always allocates the lowest numbered slave terminal
+ * device possible.
+ */
+
+#include <sys/conf.h>
+#include <sys/ddi.h>
+#include <sys/devops.h>
+#include <sys/file.h>
+#include <sys/filio.h>
+#include <sys/kstr.h>
+#include <sys/lx_ptm.h>
+#include <sys/modctl.h>
+#include <sys/pathname.h>
+#include <sys/ptms.h>
+#include <sys/ptyvar.h>
+#include <sys/stat.h>
+#include <sys/stropts.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <sys/sdt.h>
+
+#define LP_PTM_PATH "/dev/ptmx"
+#define LP_PTS_PATH "/dev/pts/"
+#define LP_PTS_DRV_NAME "pts"
+#define LP_PTS_USEC_DELAY (5 * 1000) /* 5 ms */
+#define LP_PTS_USEC_DELAY_MAX (5 * MILLISEC) /* 5 ms */
+
+/*
+ * this driver is layered on top of the ptm driver. we'd like to
+ * make this drivers minor name space a mirror of the ptm drivers
+ * namespace, but we can't actually do this. the reason is that the
+ * ptm driver is opened via the clone driver. there for no minor nodes
+ * of the ptm driver are actually accessible via the filesystem.
+ * since we're not a streams device we can't be opened by the clone
+ * driver. there for we need to have at least minor node accessible
+ * via the filesystem so that consumers can open it. we use the device
+ * node with a minor number of 0 for this purpose. what this means is
+ * that minor node 0 can't be used to map ptm minor node 0. since this
+ * minor node is now reserved we need to shift our ptm minor node
+ * mappings by one. ie. a ptm minor node with a value of 0 will
+ * corrospond to our minor node with a value of 1. these mappings are
+ * managed with the following macros.
+ */
+#define DEVT_TO_INDEX(x) LX_PTM_DEV_TO_PTS(x)
+#define INDEX_TO_MINOR(x) ((x) + 1)
+
+/*
+ * grow our layered handle array by the same size increment that the ptm
+ * driver uses to grow the pty device space - PTY_MAXDELTA
+ */
+#define LP_PTY_INC 128
+
+/*
+ * lx_ptm_ops contains state information about outstanding operations on the
+ * underlying master terminal device. Currently we only track information
+ * for read operations.
+ *
+ * Note that this data has not been rolled directly into the lx_ptm_handle
+ * structure because we can't put mutex's of condition variables into
+ * lx_ptm_handle structure. The reason is that the array of lx_ptm_handle
+ * structures linked to from the global lx_ptm state can be resized
+ * dynamically, and when it's resized, the new array is at a different
+ * memory location and the old array memory is discarded. Mutexs and cvs
+ * are accessed based off their address, so if this array was re-sized while
+ * there were outstanding operations on any mutexs or cvs in the array
+ * then the system would tip over. In the future the lx_ptm_handle structure
+ * array should probably be replaced with either an array of pointers to
+ * lx_ptm_handle structures or some other kind of data structure containing
+ * pointers to lx_ptm_handle structures. Then the lx_ptm_ops structure
+ * could be folded directly into the lx_ptm_handle structures. (This will
+ * also require the definition of a new locking mechanism to protect the
+ * contents of lx_ptm_handle structures.)
+ */
+typedef struct lx_ptm_ops {
+ int lpo_rops;
+ kcondvar_t lpo_rops_cv;
+ kmutex_t lpo_rops_lock;
+} lx_ptm_ops_t;
+
+/*
+ * Every open of the master terminal device in a zone results in a new
+ * lx_ptm_handle handle allocation. These handles are stored in an array
+ * hanging off the lx_ptm_state structure.
+ */
+typedef struct lx_ptm_handle {
+ /* Device handle to the underlying real /dev/ptmx master terminal. */
+ ldi_handle_t lph_handle;
+
+ /* Flag to indicate if TIOCPKT mode has been enabled. */
+ int lph_pktio;
+
+ /* Number of times the slave device has been opened/closed. */
+ int lph_eofed;
+
+ /* Callback handler in the ptm driver to check if slave is open. */
+ ptmptsopencb_t lph_ppocb;
+
+ /* Pointer to state for operations on underlying device. */
+ lx_ptm_ops_t *lph_lpo;
+} lx_ptm_handle_t;
+
+/*
+ * Global state for the lx_ptm driver.
+ */
+typedef struct lx_ptm_state {
+ /* lx_ptm device devinfo pointer */
+ dev_info_t *lps_dip;
+
+ /* LDI ident used to open underlying real /dev/ptmx master terminals. */
+ ldi_ident_t lps_li;
+
+ /* pts drivers major number */
+ major_t lps_pts_major;
+
+ /* rw lock used to manage access and growth of lps_lh_array */
+ krwlock_t lps_lh_rwlock;
+
+ /* number of elements in lps_lh_array */
+ uint_t lps_lh_count;
+
+ /* Array of handles to underlying real /dev/ptmx master terminals. */
+ lx_ptm_handle_t *lps_lh_array;
+} lx_ptm_state_t;
+
+/* Pointer to the lx_ptm global state structure. */
+static lx_ptm_state_t lps;
+
+/*
+ * List of modules to be autopushed onto slave terminal devices when they
+ * are opened in an lx branded zone.
+ */
+static char *lx_pts_mods[] = {
+ "ptem",
+ "ldterm",
+ "ttcompat",
+ NULL
+};
+
+static void
+lx_ptm_lh_grow(uint_t index)
+{
+ uint_t new_lh_count, old_lh_count;
+ lx_ptm_handle_t *new_lh_array, *old_lh_array;
+
+ /*
+ * allocate a new array. we drop the rw lock on the array so that
+ * readers can still access devices in case our memory allocation
+ * blocks.
+ */
+ new_lh_count = MAX(lps.lps_lh_count + LP_PTY_INC, index + 1);
+ new_lh_array =
+ kmem_zalloc(sizeof (lx_ptm_handle_t) * new_lh_count, KM_SLEEP);
+
+ /*
+ * double check that we still actually need to increase the size
+ * of the array
+ */
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+ if (index < lps.lps_lh_count) {
+ /* someone beat us to it so there's nothing more to do */
+ rw_exit(&lps.lps_lh_rwlock);
+ kmem_free(new_lh_array,
+ sizeof (lx_ptm_handle_t) * new_lh_count);
+ return;
+ }
+
+ /* copy the existing data into the new array */
+ ASSERT((lps.lps_lh_count != 0) || (lps.lps_lh_array == NULL));
+ ASSERT((lps.lps_lh_count == 0) || (lps.lps_lh_array != NULL));
+ if (lps.lps_lh_count != 0) {
+ bcopy(lps.lps_lh_array, new_lh_array,
+ sizeof (lx_ptm_handle_t) * lps.lps_lh_count);
+ }
+
+ /* save info on the old array */
+ old_lh_array = lps.lps_lh_array;
+ old_lh_count = lps.lps_lh_count;
+
+ /* install the new array */
+ lps.lps_lh_array = new_lh_array;
+ lps.lps_lh_count = new_lh_count;
+
+ rw_exit(&lps.lps_lh_rwlock);
+
+ /* free the old array */
+ if (old_lh_array != NULL) {
+ kmem_free(old_lh_array,
+ sizeof (lx_ptm_handle_t) * old_lh_count);
+ }
+}
+
+static void
+lx_ptm_lh_insert(uint_t index, ldi_handle_t lh)
+{
+ lx_ptm_ops_t *lpo;
+
+ ASSERT(lh != NULL);
+
+ /* Allocate and initialize the ops structure */
+ lpo = kmem_zalloc(sizeof (lx_ptm_ops_t), KM_SLEEP);
+ mutex_init(&lpo->lpo_rops_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&lpo->lpo_rops_cv, NULL, CV_DEFAULT, NULL);
+
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+ /* check if we need to grow the size of the layered handle array */
+ if (index >= lps.lps_lh_count) {
+ rw_exit(&lps.lps_lh_rwlock);
+ lx_ptm_lh_grow(index);
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+ }
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle == NULL);
+ ASSERT(lps.lps_lh_array[index].lph_pktio == 0);
+ ASSERT(lps.lps_lh_array[index].lph_eofed == 0);
+ ASSERT(lps.lps_lh_array[index].lph_lpo == NULL);
+
+ /* insert the new handle and return */
+ lps.lps_lh_array[index].lph_handle = lh;
+ lps.lps_lh_array[index].lph_pktio = 0;
+ lps.lps_lh_array[index].lph_eofed = 0;
+ lps.lps_lh_array[index].lph_lpo = lpo;
+
+ rw_exit(&lps.lps_lh_rwlock);
+}
+
+static ldi_handle_t
+lx_ptm_lh_remove(uint_t index)
+{
+ ldi_handle_t lh;
+
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+ ASSERT(lps.lps_lh_array[index].lph_lpo->lpo_rops == 0);
+ ASSERT(!MUTEX_HELD(&lps.lps_lh_array[index].lph_lpo->lpo_rops_lock));
+
+ /* free the write handle */
+ kmem_free(lps.lps_lh_array[index].lph_lpo, sizeof (lx_ptm_ops_t));
+ lps.lps_lh_array[index].lph_lpo = NULL;
+
+ /* remove the handle and return it */
+ lh = lps.lps_lh_array[index].lph_handle;
+ lps.lps_lh_array[index].lph_handle = NULL;
+ lps.lps_lh_array[index].lph_pktio = 0;
+ lps.lps_lh_array[index].lph_eofed = 0;
+ rw_exit(&lps.lps_lh_rwlock);
+ return (lh);
+}
+
+static void
+lx_ptm_lh_get_ppocb(uint_t index, ptmptsopencb_t *ppocb)
+{
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+ *ppocb = lps.lps_lh_array[index].lph_ppocb;
+ rw_exit(&lps.lps_lh_rwlock);
+}
+
+static void
+lx_ptm_lh_set_ppocb(uint_t index, ptmptsopencb_t *ppocb)
+{
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+ lps.lps_lh_array[index].lph_ppocb = *ppocb;
+ rw_exit(&lps.lps_lh_rwlock);
+}
+
+static ldi_handle_t
+lx_ptm_lh_lookup(uint_t index)
+{
+ ldi_handle_t lh;
+
+ rw_enter(&lps.lps_lh_rwlock, RW_READER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+ /* return the handle */
+ lh = lps.lps_lh_array[index].lph_handle;
+ rw_exit(&lps.lps_lh_rwlock);
+ return (lh);
+}
+
+static lx_ptm_ops_t *
+lx_ptm_lpo_lookup(uint_t index)
+{
+ lx_ptm_ops_t *lpo;
+
+ rw_enter(&lps.lps_lh_rwlock, RW_READER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_lpo != NULL);
+
+ /* return the handle */
+ lpo = lps.lps_lh_array[index].lph_lpo;
+ rw_exit(&lps.lps_lh_rwlock);
+ return (lpo);
+}
+
+static int
+lx_ptm_lh_pktio_get(uint_t index)
+{
+ int pktio;
+
+ rw_enter(&lps.lps_lh_rwlock, RW_READER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+ /* return the pktio state */
+ pktio = lps.lps_lh_array[index].lph_pktio;
+ rw_exit(&lps.lps_lh_rwlock);
+ return (pktio);
+}
+
+static void
+lx_ptm_lh_pktio_set(uint_t index, int pktio)
+{
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+ /* set the pktio state */
+ lps.lps_lh_array[index].lph_pktio = pktio;
+ rw_exit(&lps.lps_lh_rwlock);
+}
+
+static int
+lx_ptm_lh_eofed_get(uint_t index)
+{
+ int eofed;
+
+ rw_enter(&lps.lps_lh_rwlock, RW_READER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+ /* return the eofed state */
+ eofed = lps.lps_lh_array[index].lph_eofed;
+ rw_exit(&lps.lps_lh_rwlock);
+ return (eofed);
+}
+
+static void
+lx_ptm_lh_eofed_set(uint_t index)
+{
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+ /* set the eofed state */
+ lps.lps_lh_array[index].lph_eofed++;
+ rw_exit(&lps.lps_lh_rwlock);
+}
+
+static int
+lx_ptm_read_start(dev_t dev)
+{
+ lx_ptm_ops_t *lpo = lx_ptm_lpo_lookup(DEVT_TO_INDEX(dev));
+
+ mutex_enter(&lpo->lpo_rops_lock);
+ ASSERT(lpo->lpo_rops >= 0);
+
+ /* Wait for other read operations to finish */
+ while (lpo->lpo_rops != 0) {
+ if (cv_wait_sig(&lpo->lpo_rops_cv, &lpo->lpo_rops_lock) == 0) {
+ mutex_exit(&lpo->lpo_rops_lock);
+ return (-1);
+ }
+ }
+
+ /* Start a read operation */
+ VERIFY(++lpo->lpo_rops == 1);
+ mutex_exit(&lpo->lpo_rops_lock);
+ return (0);
+}
+
+static void
+lx_ptm_read_end(dev_t dev)
+{
+ lx_ptm_ops_t *lpo = lx_ptm_lpo_lookup(DEVT_TO_INDEX(dev));
+
+ mutex_enter(&lpo->lpo_rops_lock);
+ ASSERT(lpo->lpo_rops >= 0);
+
+ /* End a read operation */
+ VERIFY(--lpo->lpo_rops == 0);
+ cv_signal(&lpo->lpo_rops_cv);
+
+ mutex_exit(&lpo->lpo_rops_lock);
+}
+
+static int
+lx_ptm_pts_isopen(dev_t dev)
+{
+ ptmptsopencb_t ppocb;
+
+ lx_ptm_lh_get_ppocb(DEVT_TO_INDEX(dev), &ppocb);
+ return (ppocb.ppocb_func(ppocb.ppocb_arg));
+}
+
+static void
+lx_ptm_eof_read(ldi_handle_t lh)
+{
+ struct uio uio;
+ iovec_t iov;
+ char junk[1];
+
+ /*
+ * We can remove any EOF message from the head of the stream by
+ * doing a zero byte read from the stream.
+ */
+ iov.iov_len = 0;
+ iov.iov_base = junk;
+ uio.uio_iovcnt = 1;
+ uio.uio_iov = &iov;
+ uio.uio_resid = iov.iov_len;
+ uio.uio_offset = 0;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_fmode = 0;
+ uio.uio_extflg = 0;
+ uio.uio_llimit = MAXOFFSET_T;
+ (void) ldi_read(lh, &uio, kcred);
+}
+
+static int
+lx_ptm_eof_drop_1(dev_t dev, int *rvalp)
+{
+ ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+ int err, msg_size, msg_count;
+
+ *rvalp = 0;
+
+ /*
+ * Check if there is an EOF message (represented by a zero length
+ * data message) at the head of the stream. Note that the
+ * I_NREAD ioctl is a streams framework ioctl so it will succeed
+ * even if there have been previous write errors on this stream.
+ */
+ if ((err = ldi_ioctl(lh, I_NREAD, (intptr_t)&msg_size,
+ FKIOCTL, kcred, &msg_count)) != 0)
+ return (err);
+
+ if ((msg_count == 0) || (msg_size != 0)) {
+ /* No EOF message found */
+ return (0);
+ }
+
+ /* Record the fact that the slave device has been closed. */
+ lx_ptm_lh_eofed_set(DEVT_TO_INDEX(dev));
+
+ /* drop the EOF */
+ lx_ptm_eof_read(lh);
+ *rvalp = 1;
+ return (0);
+}
+
+static int
+lx_ptm_eof_drop(dev_t dev, int *rvalp)
+{
+ int rval, err;
+
+ if (rvalp != NULL)
+ *rvalp = 0;
+ for (;;) {
+ if ((err = lx_ptm_eof_drop_1(dev, &rval)) != 0)
+ return (err);
+ if (rval == 0)
+ return (0);
+ if (rvalp != NULL)
+ *rvalp = 1;
+ }
+}
+
+static int
+lx_ptm_data_check(dev_t dev, int ignore_eof, int *rvalp)
+{
+ ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+ int err;
+
+ *rvalp = 0;
+ if (ignore_eof) {
+ int size, rval;
+
+ if ((err = ldi_ioctl(lh, FIONREAD, (intptr_t)&size,
+ FKIOCTL, kcred, &rval)) != 0)
+ return (err);
+ if (size != 0)
+ *rvalp = 1;
+ } else {
+ int msg_size, msg_count;
+
+ if ((err = ldi_ioctl(lh, I_NREAD, (intptr_t)&msg_size,
+ FKIOCTL, kcred, &msg_count)) != 0)
+ return (err);
+ if (msg_count != 0)
+ *rvalp = 1;
+ }
+ return (0);
+}
+
+static int
+lx_ptm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int err;
+
+ if (cmd != DDI_ATTACH)
+ return (DDI_FAILURE);
+
+ if (ddi_create_minor_node(dip, LX_PTM_MINOR_NODE, S_IFCHR,
+ ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+
+ err = ldi_ident_from_dip(dip, &lps.lps_li);
+ if (err != 0) {
+ ddi_remove_minor_node(dip, ddi_get_name(dip));
+ return (DDI_FAILURE);
+ }
+
+ lps.lps_dip = dip;
+ lps.lps_pts_major = ddi_name_to_major(LP_PTS_DRV_NAME);
+
+ rw_init(&lps.lps_lh_rwlock, NULL, RW_DRIVER, NULL);
+ lps.lps_lh_count = 0;
+ lps.lps_lh_array = NULL;
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+lx_ptm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ if (cmd != DDI_DETACH)
+ return (DDI_FAILURE);
+
+ ldi_ident_release(lps.lps_li);
+ lps.lps_dip = NULL;
+
+ ASSERT((lps.lps_lh_count != 0) || (lps.lps_lh_array == NULL));
+ ASSERT((lps.lps_lh_count == 0) || (lps.lps_lh_array != NULL));
+ if (lps.lps_lh_array != NULL) {
+ kmem_free(lps.lps_lh_array,
+ sizeof (lx_ptm_handle_t) * lps.lps_lh_count);
+ lps.lps_lh_array = NULL;
+ lps.lps_lh_count = 0;
+ }
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+lx_ptm_open(dev_t *devp, int flag, int otyp, cred_t *credp)
+{
+ struct strioctl iocb;
+ ptmptsopencb_t ppocb = { NULL, NULL };
+ ldi_handle_t lh;
+ major_t maj, our_major = getmajor(*devp);
+ minor_t min, lastmin;
+ uint_t index, anchor = 1;
+ dev_t ptm_dev;
+ int err, rval = 0;
+
+ /*
+ * Don't support the FNDELAY flag and FNONBLOCK until we either
+ * find a Linux app that opens /dev/ptmx with the O_NDELAY
+ * or O_NONBLOCK flags explicitly, or until we create test cases
+ * to determine how reads of master terminal devices opened with
+ * these flags behave in different situations on Linux. Supporting
+ * these flags will involve enhancing our read implementation
+ * and changing the way it deals with EOF notifications.
+ */
+ if (flag & (FNDELAY | FNONBLOCK))
+ return (ENOTSUP);
+
+ /*
+ * we're layered on top of the ptm driver so open that driver
+ * first. (note that we're opening /dev/ptmx in the global
+ * zone, not ourselves in the lx zone.)
+ */
+ err = ldi_open_by_name(LP_PTM_PATH, flag, credp, &lh, lps.lps_li);
+ if (err != 0)
+ return (err);
+
+ /* get the devt returned by the ptmx open */
+ err = ldi_get_dev(lh, &ptm_dev);
+ if (err != 0) {
+ (void) ldi_close(lh, flag, credp);
+ return (err);
+ }
+
+ /*
+ * we're a cloning driver so here's where we'll change the devt that we
+ * return. the ptmx is also a cloning driver so we'll just use
+ * it's minor number as our minor number (it already manages it's
+ * minor name space so no reason to duplicate the effort.)
+ */
+ index = getminor(ptm_dev);
+ *devp = makedevice(our_major, INDEX_TO_MINOR(index));
+
+ /* Get a callback function to query if the pts device is open. */
+ iocb.ic_cmd = PTMPTSOPENCB;
+ iocb.ic_timout = 0;
+ iocb.ic_len = sizeof (ppocb);
+ iocb.ic_dp = (char *)&ppocb;
+
+ err = ldi_ioctl(lh, I_STR, (intptr_t)&iocb, FKIOCTL, kcred, &rval);
+ if ((err != 0) || (rval != 0)) {
+ (void) ldi_close(lh, flag, credp);
+ return (EIO); /* XXX return something else here? */
+ }
+ ASSERT(ppocb.ppocb_func != NULL);
+
+ /*
+ * now setup autopush for the terminal slave device. this is
+ * necessary so that when a Linux program opens the device we
+ * can push required strmod modules onto the stream. in Solaris
+ * this is normally done by the application that actually
+ * allocates the terminal.
+ */
+ maj = lps.lps_pts_major;
+ min = index;
+ lastmin = 0;
+ err = kstr_autopush(SET_AUTOPUSH, &maj, &min, &lastmin,
+ &anchor, lx_pts_mods);
+ if (err != 0 && err != EEXIST) {
+ (void) ldi_close(lh, flag, credp);
+ return (EIO); /* XXX return something else here? */
+ }
+
+ /* save off this layered handle for future accesses */
+ lx_ptm_lh_insert(index, lh);
+ lx_ptm_lh_set_ppocb(index, &ppocb);
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+lx_ptm_close(dev_t dev, int flag, int otyp, cred_t *credp)
+{
+ ldi_handle_t lh;
+ major_t maj;
+ minor_t min, lastmin;
+ uint_t index;
+ int err;
+ int i;
+
+ index = DEVT_TO_INDEX(dev);
+
+ /*
+ * we must cleanup all the state associated with this major/minor
+ * terminal pair before actually closing the ptm master device.
+ * this is required because once the close of the ptm device is
+ * complete major/minor terminal pair is immediatly available for
+ * re-use in any zone.
+ */
+
+ /* free up our saved reference for this layered handle */
+ lh = lx_ptm_lh_remove(index);
+
+ /* unconfigure autopush for the associated terminal slave device */
+ maj = lps.lps_pts_major;
+ min = index;
+ lastmin = 0;
+ for (i = 0; i < 5; i++) {
+ /*
+ * we loop here because we don't want to release this ptm
+ * node if autopush can't be disabled on the associated
+ * slave device because then bad things could happen if
+ * another brand were to get this terminal allocated
+ * to them. If we keep failing we eventually drive on so that
+ * things don't hang.
+ */
+ err = kstr_autopush(CLR_AUTOPUSH, &maj, &min, &lastmin,
+ 0, NULL);
+ if (err == 0)
+ break;
+
+ cmn_err(CE_WARN, "lx zoneid %d: error %d on kstr_autopush",
+ getzoneid(), err);
+
+ /* wait one second and try again */
+ delay(drv_usectohz(1000000));
+ }
+
+ err = ldi_close(lh, flag, credp);
+
+ /*
+ * note that we don't have to bother with changing the permissions
+ * on the associated slave device here. the reason is that no one
+ * can actually open the device untill it's associated master
+ * device is re-opened, which will result in the permissions on
+ * it being reset.
+ */
+ return (err);
+}
+
+static int
+lx_ptm_read_loop(dev_t dev, struct uio *uiop, cred_t *credp, int *loop)
+{
+ ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+ int err, rval;
+ struct uio uio = *uiop;
+
+ *loop = 0;
+
+ /*
+ * Here's another way that Linux master terminals behave differently
+ * from Solaris master terminals. If you do a read on a Linux
+ * master terminal (that was opened witout NDELAY and NONBLOCK)
+ * who's corrosponding slave terminal is currently closed and
+ * has been opened and closed at least once, Linux return -1 and
+ * set errno to EIO where as Solaris blocks.
+ */
+ if (lx_ptm_lh_eofed_get(DEVT_TO_INDEX(dev))) {
+ /* Slave has been opened and closed at least once. */
+ if (lx_ptm_pts_isopen(dev) == 0) {
+ /*
+ * Slave is closed. Make sure that data is avaliable
+ * before attempting a read.
+ */
+ if ((err = lx_ptm_data_check(dev, 0, &rval)) != 0)
+ return (err);
+
+ /* If there is no data available then return. */
+ if (rval == 0)
+ return (EIO);
+ }
+ }
+
+ /* Actually do the read operation. */
+ if ((err = ldi_read(lh, uiop, credp)) != 0)
+ return (err);
+
+ /* If read returned actual data then return. */
+ if (uio.uio_resid != uiop->uio_resid)
+ return (0);
+
+ /*
+ * This was a zero byte read (ie, an EOF). This indicates
+ * that the slave terinal device has been closed. Record
+ * the fact that the slave device has been closed and retry
+ * the read operation.
+ */
+ lx_ptm_lh_eofed_set(DEVT_TO_INDEX(dev));
+ *loop = 1;
+ return (0);
+}
+
+static int
+lx_ptm_read(dev_t dev, struct uio *uiop, cred_t *credp)
+{
+ int pktio = lx_ptm_lh_pktio_get(DEVT_TO_INDEX(dev));
+ int err, loop;
+ struct uio uio;
+ struct iovec iovp;
+
+ ASSERT(uiop->uio_iovcnt > 0);
+
+ /*
+ * If packet mode has been enabled (via TIOCPKT) we need to pad
+ * all read requests with a leading byte that indicates any
+ * relevant control status information.
+ */
+ if (pktio != 0) {
+ /*
+ * We'd like to write the control information into
+ * the current buffer but we can't yet. We don't
+ * want to modify userspace memory here only to have
+ * the read operation fail later. So instead
+ * what we'll do here is read one character from the
+ * beginning of the memory pointed to by the uio
+ * structure. This will advance the output pointer
+ * by one. Then when the read completes successfully
+ * we can update the byte that we passed over. Before
+ * we do the read make a copy of the current uiop and
+ * iovec structs so we can write to them later.
+ */
+ uio = *uiop;
+ iovp = *uiop->uio_iov;
+ uio.uio_iov = &iovp;
+
+ if (uwritec(uiop) == -1)
+ return (EFAULT);
+ }
+
+ do {
+ /*
+ * Before we actually attempt a read operation we need
+ * to make sure there's some buffer space to actually
+ * read in some data. We do this because if we're in
+ * pktio mode and the caller only requested one byte,
+ * then we've already used up that one byte and we
+ * don't want to pass this read request. Doing a 0
+ * byte read (unless there is a problem with the stream
+ * head) always returns succcess. Normally when a streams
+ * read returns 0 bytes we interpret that as an EOF on
+ * the stream (ie, the slave side has been opened and
+ * closed) and we ignore it and re-try the read operation.
+ * So if we pass on a 0 byte read here lx_ptm_read_loop()
+ * will tell us to loop around and we'll end up in an
+ * infinite loop.
+ */
+ if (uiop->uio_resid == 0)
+ break;
+
+ /*
+ * Serialize all reads. We need to do this so that we can
+ * properly emulate the behavior of master terminals on Linux.
+ * In reality this serializaion should not pose any kind of
+ * performance problem since it would be very strange to have
+ * multiple threads trying to read from the same master
+ * terminal device concurrently.
+ */
+ if (lx_ptm_read_start(dev) != 0)
+ return (EINTR);
+
+ err = lx_ptm_read_loop(dev, uiop, credp, &loop);
+ lx_ptm_read_end(dev);
+ if (err != 0)
+ return (err);
+ } while (loop != 0);
+
+ if (pktio != 0) {
+ uint8_t pktio_data = TIOCPKT_DATA;
+
+ /*
+ * Note that the control status information we
+ * pass back is faked up in the sense that we
+ * don't actually report any events, we always
+ * report a status of 0.
+ */
+ if (uiomove(&pktio_data, 1, UIO_READ, &uio) != 0)
+ return (EFAULT);
+ }
+
+ return (0);
+}
+
+static int
+lx_ptm_write(dev_t dev, struct uio *uiop, cred_t *credp)
+{
+ ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+ int err;
+
+ err = ldi_write(lh, uiop, credp);
+
+ return (err);
+}
+
+static int
+lx_ptm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
+ int *rvalp)
+{
+ ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+ int err;
+
+ /*
+ * here we need to make sure that we never allow the
+ * I_SETSIG and I_ESETSIG ioctls to pass through. we
+ * do this because we can't support them.
+ *
+ * the native Solaris ptm device supports these ioctls because
+ * they are streams framework ioctls and all streams devices
+ * support them by default. these ioctls cause the current
+ * process to be registered with a stream and receive signals
+ * when certain stream events occur.
+ *
+ * a problem arises with cleanup of these registrations
+ * for layered drivers.
+ *
+ * normally the streams framework is notified whenever a
+ * process closes any reference to a stream and it goes ahead
+ * and cleans up these registrations. but actual device drivers
+ * are not notified when a process performs a close operation
+ * unless the process is closing the last opened reference to
+ * the device on the entire system.
+ *
+ * so while we could pass these ioctls on and allow processes
+ * to register for signal delivery, we would never receive
+ * any notification when those processes exit (or close a
+ * stream) and we wouldn't be able to unregister them.
+ *
+ * luckily these operations are streams specific and Linux
+ * doesn't support streams devices. so it doesn't actually
+ * seem like we need to support these ioctls. if it turns
+ * out that we do need to support them for some reason in
+ * the future, the current driver model will have to be
+ * enhanced to better support streams device layering.
+ */
+ if ((cmd == I_SETSIG) || (cmd == I_ESETSIG))
+ return (EINVAL);
+
+ /*
+ * here we fake up support for TIOCPKT. Linux applications expect
+ * /etc/ptmx to support this ioctl, but on Solaris it doesn't.
+ * (it is supported on older bsd style ptys.) so we'll fake
+ * up support for it here.
+ *
+ * the reason that this ioctl is emulated here instead of in
+ * userland is that this ioctl affects the results returned
+ * from read() operations. if this ioctl was emulated in
+ * userland the brand library would need to intercept all
+ * read operations and check to see if pktio was enabled
+ * for the fd being read from. since this ioctl only needs
+ * to be supported on the ptmx device it makes more sense
+ * to support it here where we can easily update the results
+ * returned for read() operations performed on ourselves.
+ */
+ if (cmd == TIOCPKT) {
+ int pktio;
+
+ if (ddi_copyin((void *)arg, &pktio, sizeof (pktio),
+ mode) != DDI_SUCCESS)
+ return (EFAULT);
+
+ if (pktio == 0)
+ lx_ptm_lh_pktio_set(DEVT_TO_INDEX(dev), 0);
+ else
+ lx_ptm_lh_pktio_set(DEVT_TO_INDEX(dev), 1);
+
+ return (0);
+ }
+
+ err = ldi_ioctl(lh, cmd, arg, mode, credp, rvalp);
+
+ /*
+ * On recent versions of Linux some apps issue the following ioctls to
+ * the master side of the ptm before opening the slave side. Because
+ * our streams modules (specifically ptem) aren't autopushed until the
+ * slave side has been opened, these ioctls will fail. To alleviate the
+ * issue we simply pretend that these ioctls have succeeded.
+ *
+ * We could push our own "lx_ptem" module onto the master side of the
+ * stream in lx_ptm_open if we need better emulation, but that would
+ * require an "lx_ptem" module which duplicates most of ptem. ptem
+ * doesn't work properly when pushed on the master side.
+ */
+ if (err == EINVAL && (cmd == TIOCSWINSZ || cmd == TCSETS) &&
+ lx_ptm_pts_isopen(dev) == 0) {
+ /* slave side not open, assume we need to succeed */
+ DTRACE_PROBE1(lx_ptm_ioctl__override, int, cmd);
+ return (0);
+ }
+
+ return (err);
+}
+
+static int
+lx_ptm_poll_loop(dev_t dev, short events, int anyyet, short *reventsp,
+ struct pollhead **phpp, int *loop)
+{
+ ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+ short reventsp2;
+ int err, rval;
+
+ *loop = 0;
+
+ /*
+ * If the slave device has been opened and closed at least
+ * once and the slave device is currently closed, then poll
+ * always needs to returns immediatly.
+ */
+ if ((lx_ptm_lh_eofed_get(DEVT_TO_INDEX(dev)) != 0) &&
+ (lx_ptm_pts_isopen(dev) == 0)) {
+ /* In this case always return POLLHUP */
+ *reventsp = POLLHUP;
+
+ /*
+ * Check if there really is data on the stream.
+ * If so set the correct return flags.
+ */
+ if ((err = lx_ptm_data_check(dev, 1, &rval)) != 0) {
+ /* Something went wrong. */
+ return (err);
+ }
+ if (rval != 0)
+ *reventsp |= (events & (POLLIN | POLLRDNORM));
+
+ /*
+ * Is the user checking for writability? Note that for ptm
+ * devices Linux seems to ignore the POLLWRBAND write flag.
+ */
+ if ((events & POLLWRNORM) == 0)
+ return (0);
+
+ /*
+ * To check if the stream is writable we have to actually
+ * call poll, but make sure to set anyyet to 1 to prevent
+ * the streams framework from setting up callbacks.
+ */
+ if ((err = ldi_poll(lh, POLLWRNORM, 1, &reventsp2, NULL)) != 0)
+ return (err);
+
+ *reventsp |= (reventsp2 & POLLWRNORM);
+ } else {
+ int lockstate;
+
+ /* The slave device is open, do the poll */
+ if ((err = ldi_poll(lh, events, anyyet, reventsp, phpp)) != 0)
+ return (err);
+
+ /*
+ * Drop any leading EOFs on the stream.
+ *
+ * Note that we have to use pollunlock() here to avoid
+ * recursive mutex enters in the poll framework. The
+ * reason is that if there is an EOF message on the stream
+ * then the act of reading from the queue to remove the
+ * message can cause the ptm drivers event service
+ * routine to be invoked, and if there is no open
+ * slave device then the ptm driver may generate
+ * error messages and put them on the stream. This
+ * in turn will generate a poll event and the poll
+ * framework will try to invoke any poll callbacks
+ * associated with the stream. In the process of
+ * doing that the poll framework will try to aquire
+ * locks that we are already holding. So we need to
+ * drop those locks here before we do our read.
+ */
+ if (pollunlock(&lockstate) != 0) {
+ *reventsp = POLLNVAL;
+ return (0);
+ }
+ err = lx_ptm_eof_drop(dev, &rval);
+ pollrelock(lockstate);
+ if (err)
+ return (err);
+
+ /* If no EOF was dropped then return */
+ if (rval == 0)
+ return (0);
+
+ /*
+ * An EOF was removed from the stream. Retry the entire
+ * poll operation from the top because polls on the ptm
+ * device should behave differently now.
+ */
+ *loop = 1;
+ }
+ return (0);
+}
+
+static int
+lx_ptm_poll(dev_t dev, short events, int anyyet, short *reventsp,
+ struct pollhead **phpp)
+{
+ int loop, err;
+
+ do {
+ /* Serialize ourself wrt read operations. */
+ if (lx_ptm_read_start(dev) != 0)
+ return (EINTR);
+
+ err = lx_ptm_poll_loop(dev,
+ events, anyyet, reventsp, phpp, &loop);
+ lx_ptm_read_end(dev);
+ if (err != 0)
+ return (err);
+ } while (loop != 0);
+ return (0);
+}
+
+static struct cb_ops lx_ptm_cb_ops = {
+ lx_ptm_open, /* open */
+ lx_ptm_close, /* close */
+ nodev, /* strategy */
+ nodev, /* print */
+ nodev, /* dump */
+ lx_ptm_read, /* read */
+ lx_ptm_write, /* write */
+ lx_ptm_ioctl, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ lx_ptm_poll, /* chpoll */
+ ddi_prop_op, /* prop_op */
+ NULL, /* cb_str */
+ D_NEW | D_MP,
+ CB_REV,
+ NULL,
+ NULL
+};
+
+static struct dev_ops lx_ptm_ops = {
+ DEVO_REV,
+ 0,
+ ddi_getinfo_1to1,
+ nulldev,
+ nulldev,
+ lx_ptm_attach,
+ lx_ptm_detach,
+ nodev,
+ &lx_ptm_cb_ops,
+ NULL,
+ NULL,
+ ddi_quiesce_not_needed, /* quiesce */
+};
+
+static struct modldrv modldrv = {
+ &mod_driverops, /* type of module */
+ "Linux master terminal driver", /* description of module */
+ &lx_ptm_ops /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ &modldrv,
+ NULL
+};
+
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ return (mod_remove(&modlinkage));
+}
diff --git a/usr/src/uts/common/brand/lx/io/lx_ptm.conf b/usr/src/uts/common/brand/lx/io/lx_ptm.conf
new file mode 100644
index 0000000000..481b4e3c74
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/io/lx_ptm.conf
@@ -0,0 +1,27 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+
+name="lx_ptm" parent="pseudo" instance=0;
diff --git a/usr/src/uts/common/brand/lx/os/lx_brand.c b/usr/src/uts/common/brand/lx/os/lx_brand.c
new file mode 100644
index 0000000000..6cff045a80
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/os/lx_brand.c
@@ -0,0 +1,2586 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2016, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * The LX Brand: emulation of a Linux operating environment within a zone.
+ *
+ * OVERVIEW
+ *
+ * The LX brand enables a full Linux userland -- including a C library,
+ * init(1) framework, and some set of applications -- to run unmodified
+ * within an illumos zone. Unlike illumos, where applications are expected
+ * to link against and consume functions exported from libraries, the
+ * supported Linux binary compatibility boundary is the system call
+ * interface. By accurately emulating the behaviour of Linux system calls,
+ * Linux software can be executed in this environment as if it were running
+ * on a native Linux system.
+ *
+ * EMULATING LINUX SYSTEM CALLS
+ *
+ * Linux system calls are made in 32-bit processes via the "int 0x80"
+ * instruction; in 64-bit processes the "syscall" instruction is used, as it
+ * is with native illumos processes. In both cases, arguments to system
+ * calls are generally passed in registers and the usermode stack is not
+ * interpreted or modified by the Linux kernel.
+ *
+ * When the emulated Linux process makes a system call, it traps into the
+ * illumos kernel. The in-kernel brand module contains various emulation
+ * routines, and can fully service some emulated system calls; e.g. read(2)
+ * and write(2). Other system calls require assistance from the illumos
+ * libc, bouncing back out to the brand library ("lx_brand.so.1") for
+ * emulation.
+ *
+ * The brand mechanism allows for the provision of an alternative trap
+ * handler for the various system call mechanisms. Traditionally this was
+ * used to immediately revector execution to the usermode emulation library,
+ * which was responsible for handling all system calls. In the interests of
+ * more accurate emulation and increased performance, much of the regular
+ * illumos system call path is now invoked. Only the argument processing and
+ * handler dispatch are replaced by the brand, via the per-LWP
+ * "lwp_brand_syscall" interposition function pointer.
+ *
+ * THE NATIVE AND BRAND STACKS
+ *
+ * Some runtime environments (e.g. the Go language) allocate very small
+ * thread stacks, preferring to grow or split the stack as necessary. The
+ * Linux kernel generally does not use the usermode stack when servicing
+ * system calls, so this is not a problem. In order for our emulation to
+ * have the same zero stack impact, we must execute usermode emulation
+ * routines on an _alternate_ stack. This is similar, in principle, to the
+ * use of sigaltstack(3C) to run signal handlers off the main thread stack.
+ *
+ * To this end, the brand library allocates and installs an alternate stack
+ * (called the "native" stack) for each LWP. The in-kernel brand code uses
+ * this stack for usermode emulation calls and interposed signal delivery,
+ * while the emulated Linux process sees only the data on the main thread
+ * stack, known as the "brand" stack. The stack mode is tracked in the
+ * per-LWP brand-private data, using the LX_STACK_MODE_* enum.
+ *
+ * The stack mode doubles as a system call "mode bit". When in the
+ * LX_STACK_MODE_BRAND mode, system calls are processed as emulated Linux
+ * system calls. In other modes, system calls are assumed to be native
+ * illumos system calls as made during brand library initialisation and
+ * usermode emulation.
+ *
+ * USERMODE EMULATION
+ *
+ * When a Linux system call cannot be emulated within the kernel, we preserve
+ * the register state of the Linux process and revector the LWP to the brand
+ * library usermode emulation handler: the "lx_emulate()" function in
+ * "lx_brand.so.1". This revectoring is modelled on the delivery of signals,
+ * and is performed in "lx_emulate_user()".
+ *
+ * First, the emulated process state is written out to the usermode stack of
+ * the process as a "ucontext_t" object. Arguments to the emulation routine
+ * are passed on the stack or in registers, depending on the ABI. When the
+ * usermode emulation is complete, the result is passed back to the kernel
+ * (via the "B_EMULATION_DONE" brandsys subcommand) with the saved context
+ * for restoration.
+ *
+ * SIGNAL DELIVERY, SETCONTEXT AND GETCONTEXT
+ *
+ * When servicing emulated system calls in the usermode brand library, or
+ * during signal delivery, various state is preserved by the kernel so that
+ * the running LWP may be revectored to a handling routine. The context
+ * allows the kernel to restart the program at the point of interruption,
+ * either at the return of the signal handler, via setcontext(3C); or after
+ * the usermode emulation request has been serviced, via B_EMULATION_DONE.
+ *
+ * In illumos native processes, the saved context (a "ucontext_t" object)
+ * includes the state of registers and the current signal mask at the point
+ * of interruption. The context also includes a link to the most recently
+ * saved context, forming a chain to be unwound as requests complete. The LX
+ * brand requires additional book-keeping to describe the machine state: in
+ * particular, the current stack mode and the occupied extent of the native
+ * stack.
+ *
+ * The brand code is able to interpose on the context save and restore
+ * operations in the kernel -- see "lx_savecontext()" and
+ * "lx_restorecontext()" -- to enable getcontext(3C) and setcontext(3C) to
+ * function correctly in the face of a dual stack LWP. The brand also
+ * interposes on the signal delivery mechanism -- see "lx_sendsig()" and
+ * "lx_sendsig_stack()" -- to allow all signals to be delivered to the brand
+ * library interposer on the native stack, regardless of the interrupted
+ * execution mode. Linux sigaltstack(2) emulation is performed entirely by
+ * the usermode brand library during signal handler interposition.
+ */
+
+#include <sys/types.h>
+#include <sys/kmem.h>
+#include <sys/errno.h>
+#include <sys/thread.h>
+#include <sys/systm.h>
+#include <sys/syscall.h>
+#include <sys/proc.h>
+#include <sys/modctl.h>
+#include <sys/cmn_err.h>
+#include <sys/model.h>
+#include <sys/exec.h>
+#include <sys/lx_impl.h>
+#include <sys/machbrand.h>
+#include <sys/lx_syscalls.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_futex.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_types.h>
+#include <sys/param.h>
+#include <sys/termios.h>
+#include <sys/sunddi.h>
+#include <sys/ddi.h>
+#include <sys/vnode.h>
+#include <sys/pathname.h>
+#include <sys/auxv.h>
+#include <sys/priv.h>
+#include <sys/regset.h>
+#include <sys/privregs.h>
+#include <sys/archsystm.h>
+#include <sys/zone.h>
+#include <sys/brand.h>
+#include <sys/sdt.h>
+#include <sys/x86_archext.h>
+#include <sys/controlregs.h>
+#include <sys/core.h>
+#include <sys/stack.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <lx_signum.h>
+#include <util/sscanf.h>
+#include <sys/lx_brand.h>
+#include <sys/zfs_ioctl.h>
+
+int lx_debug = 0;
+
+void lx_init_brand_data(zone_t *, kmutex_t *);
+void lx_free_brand_data(zone_t *);
+void lx_setbrand(proc_t *);
+int lx_getattr(zone_t *, int, void *, size_t *);
+int lx_setattr(zone_t *, int, void *, size_t);
+int lx_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t, uintptr_t);
+void lx_set_kern_version(zone_t *, char *);
+void lx_copy_procdata(proc_t *, proc_t *);
+
+extern int getsetcontext(int, void *);
+extern int waitsys(idtype_t, id_t, siginfo_t *, int);
+#if defined(_SYSCALL32_IMPL)
+extern int getsetcontext32(int, void *);
+extern int waitsys32(idtype_t, id_t, siginfo_t *, int);
+#endif
+
+extern int zvol_name2minor(const char *, minor_t *);
+extern int zvol_create_minor(const char *);
+
+extern void lx_proc_exit(proc_t *);
+extern int lx_sched_affinity(int, uintptr_t, int, uintptr_t, int64_t *);
+
+extern void lx_ioctl_init();
+extern void lx_ioctl_fini();
+extern void lx_socket_init();
+extern void lx_socket_fini();
+
+lx_systrace_f *lx_systrace_entry_ptr;
+lx_systrace_f *lx_systrace_return_ptr;
+
+static int lx_systrace_enabled;
+
+/*
+ * cgroup file system maintenance functions which are set when cgroups loads.
+ */
+void (*lx_cgrp_initlwp)(vfs_t *, uint_t, id_t, pid_t);
+void (*lx_cgrp_freelwp)(vfs_t *, uint_t, id_t, pid_t);
+
+/*
+ * While this is effectively mmu.hole_start - PAGESIZE, we don't particularly
+ * want an MMU dependency here (and should there be a microprocessor without
+ * a hole, we don't want to start allocating from the top of the VA range).
+ */
+#define LX_MAXSTACK64 0x7ffffff00000
+
+uint64_t lx_maxstack64 = LX_MAXSTACK64;
+
+static int lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args,
+ struct intpdata *idata, int level, long *execsz, int setid,
+ caddr_t exec_file, struct cred *cred, int *brand_action);
+
+static boolean_t lx_native_exec(uint8_t, const char **);
+static uint32_t lx_map32limit(proc_t *);
+
+static void lx_savecontext(ucontext_t *);
+static void lx_restorecontext(ucontext_t *);
+static caddr_t lx_sendsig_stack(int);
+static void lx_sendsig(int);
+#if defined(_SYSCALL32_IMPL)
+static void lx_savecontext32(ucontext32_t *);
+#endif
+static int lx_setid_clear(vattr_t *, cred_t *);
+#if defined(_LP64)
+static int lx_pagefault(proc_t *, klwp_t *, caddr_t, enum fault_type,
+ enum seg_rw);
+#endif
+
+typedef struct lx_zfs_ds {
+ list_node_t ds_link;
+ char ds_name[MAXPATHLEN];
+ uint64_t ds_cookie;
+} lx_zfs_ds_t;
+
+/* lx brand */
+struct brand_ops lx_brops = {
+ lx_init_brand_data, /* b_init_brand_data */
+ lx_free_brand_data, /* b_free_brand_data */
+ lx_brandsys, /* b_brandsys */
+ lx_setbrand, /* b_setbrand */
+ lx_getattr, /* b_getattr */
+ lx_setattr, /* b_setattr */
+ lx_copy_procdata, /* b_copy_procdata */
+ lx_proc_exit, /* b_proc_exit */
+ lx_exec, /* b_exec */
+ lx_setrval, /* b_lwp_setrval */
+ lx_lwpdata_alloc, /* b_lwpdata_alloc */
+ lx_lwpdata_free, /* b_lwpdata_free */
+ lx_initlwp, /* b_initlwp */
+ lx_initlwp_post, /* b_initlwp_post */
+ lx_forklwp, /* b_forklwp */
+ lx_freelwp, /* b_freelwp */
+ lx_exitlwp, /* b_lwpexit */
+ lx_elfexec, /* b_elfexec */
+ NULL, /* b_sigset_native_to_brand */
+ NULL, /* b_sigset_brand_to_native */
+ lx_sigfd_translate, /* b_sigfd_translate */
+ NSIG, /* b_nsig */
+ lx_exit_with_sig, /* b_exit_with_sig */
+ lx_wait_filter, /* b_wait_filter */
+ lx_native_exec, /* b_native_exec */
+ lx_map32limit, /* b_map32limit */
+ lx_stop_notify, /* b_stop_notify */
+ lx_waitid_helper, /* b_waitid_helper */
+ lx_sigcld_repost, /* b_sigcld_repost */
+ lx_ptrace_issig_stop, /* b_issig_stop */
+ lx_ptrace_sig_ignorable, /* b_sig_ignorable */
+ lx_savecontext, /* b_savecontext */
+#if defined(_SYSCALL32_IMPL)
+ lx_savecontext32, /* b_savecontext32 */
+#endif
+ lx_restorecontext, /* b_restorecontext */
+ lx_sendsig_stack, /* b_sendsig_stack */
+ lx_sendsig, /* b_sendsig */
+ lx_setid_clear, /* b_setid_clear */
+#if defined(_LP64)
+ lx_pagefault, /* b_pagefault */
+#else
+ NULL,
+#endif
+ B_FALSE /* b_intp_parse_arg */
+};
+
+struct brand_mach_ops lx_mops = {
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ lx_fixsegreg,
+ lx_fsbase
+};
+
+struct brand lx_brand = {
+ BRAND_VER_1,
+ "lx",
+ &lx_brops,
+ &lx_mops,
+ sizeof (struct lx_proc_data)
+};
+
+static struct modlbrand modlbrand = {
+ &mod_brandops, "lx brand", &lx_brand
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, (void *)&modlbrand, NULL
+};
+
+void
+lx_proc_exit(proc_t *p)
+{
+ lx_proc_data_t *lxpd;
+ proc_t *cp;
+
+ mutex_enter(&p->p_lock);
+ VERIFY(lxpd = ptolxproc(p));
+ VERIFY(lxpd->l_ptrace == 0);
+ if ((lxpd->l_flags & LX_PROC_CHILD_DEATHSIG) == 0) {
+ mutex_exit(&p->p_lock);
+ return;
+ }
+ mutex_exit(&p->p_lock);
+
+ /* Check for children which desire notification of parental death. */
+ mutex_enter(&pidlock);
+ for (cp = p->p_child; cp != NULL; cp = cp->p_sibling) {
+ mutex_enter(&cp->p_lock);
+ if ((lxpd = ptolxproc(cp)) == NULL) {
+ mutex_exit(&cp->p_lock);
+ continue;
+ }
+ if (lxpd->l_parent_deathsig != 0) {
+ sigtoproc(cp, NULL, lxpd->l_parent_deathsig);
+ }
+ mutex_exit(&cp->p_lock);
+ }
+ mutex_exit(&pidlock);
+}
+
+void
+lx_setbrand(proc_t *p)
+{
+ /* Send SIGCHLD to parent by default when child exits */
+ ptolxproc(p)->l_signal = stol_signo[SIGCHLD];
+
+ lx_read_argv_bounds(p);
+}
+
+/* ARGSUSED */
+int
+lx_setattr(zone_t *zone, int attr, void *ubuf, size_t ubufsz)
+{
+ lx_zone_data_t *lxzd = (lx_zone_data_t *)zone->zone_brand_data;
+
+ switch (attr) {
+ case LX_ATTR_KERN_RELEASE: {
+ char buf[LX_KERN_RELEASE_MAX];
+ bzero(buf, LX_KERN_RELEASE_MAX);
+ if (ubufsz >= LX_KERN_RELEASE_MAX) {
+ return (ERANGE);
+ }
+ if (copyin(ubuf, buf, ubufsz) != 0) {
+ return (EFAULT);
+ }
+ mutex_enter(&lxzd->lxzd_lock);
+ (void) strlcpy(lxzd->lxzd_kernel_release, buf,
+ LX_KERN_RELEASE_MAX);
+ mutex_exit(&lxzd->lxzd_lock);
+ return (0);
+ }
+ case LX_ATTR_KERN_VERSION: {
+ char buf[LX_KERN_VERSION_MAX];
+ bzero(buf, LX_KERN_VERSION_MAX);
+ if (ubufsz >= LX_KERN_VERSION_MAX) {
+ return (ERANGE);
+ }
+ if (copyin(ubuf, buf, ubufsz) != 0) {
+ return (EFAULT);
+ }
+ mutex_enter(&lxzd->lxzd_lock);
+ (void) strlcpy(lxzd->lxzd_kernel_version, buf,
+ LX_KERN_VERSION_MAX);
+ mutex_exit(&lxzd->lxzd_lock);
+ return (0);
+ }
+ default:
+ return (EINVAL);
+ }
+}
+
+/* ARGSUSED */
+int
+lx_getattr(zone_t *zone, int attr, void *ubuf, size_t *ubufsz)
+{
+ lx_zone_data_t *lxzd = (lx_zone_data_t *)zone->zone_brand_data;
+ int len;
+
+ switch (attr) {
+ case LX_ATTR_KERN_RELEASE: {
+ char buf[LX_KERN_RELEASE_MAX];
+
+ mutex_enter(&lxzd->lxzd_lock);
+ len = strnlen(lxzd->lxzd_kernel_release, LX_KERN_RELEASE_MAX);
+ len++;
+ if (*ubufsz < len) {
+ mutex_exit(&lxzd->lxzd_lock);
+ return (ERANGE);
+ }
+ bzero(buf, sizeof (buf));
+ (void) strncpy(buf, lxzd->lxzd_kernel_release, sizeof (buf));
+ mutex_exit(&lxzd->lxzd_lock);
+ if (copyout(buf, ubuf, len) != 0) {
+ return (EFAULT);
+ }
+ *ubufsz = len;
+ return (0);
+ }
+ case LX_ATTR_KERN_VERSION: {
+ char buf[LX_KERN_VERSION_MAX];
+
+ mutex_enter(&lxzd->lxzd_lock);
+ len = strnlen(lxzd->lxzd_kernel_version, LX_KERN_VERSION_MAX);
+ len++;
+ if (*ubufsz < len) {
+ mutex_exit(&lxzd->lxzd_lock);
+ return (ERANGE);
+ }
+ bzero(buf, sizeof (buf));
+ (void) strncpy(buf, lxzd->lxzd_kernel_version, sizeof (buf));
+ mutex_exit(&lxzd->lxzd_lock);
+ if (copyout(buf, ubuf, len) != 0) {
+ return (EFAULT);
+ }
+ *ubufsz = len;
+ return (0);
+ }
+ default:
+ return (EINVAL);
+ }
+}
+
+uint32_t
+lx_map32limit(proc_t *p)
+{
+ /*
+ * To be bug-for-bug compatible with Linux, we have MAP_32BIT only
+ * allow mappings in the first 31 bits. This was a nuance in the
+ * original Linux implementation circa 2002, and applications have
+ * come to depend on its behavior.
+ *
+ * This is only relevant for 64-bit processes.
+ */
+ if (p->p_model == DATAMODEL_LP64)
+ return (1 << 31);
+
+ return ((uint32_t)USERLIMIT32);
+}
+
+void
+lx_brand_systrace_enable(void)
+{
+ VERIFY(!lx_systrace_enabled);
+
+ lx_systrace_enabled = 1;
+}
+
+void
+lx_brand_systrace_disable(void)
+{
+ VERIFY(lx_systrace_enabled);
+
+ lx_systrace_enabled = 0;
+}
+
+void
+lx_lwp_set_native_stack_current(lx_lwp_data_t *lwpd, uintptr_t new_sp)
+{
+ VERIFY(lwpd->br_ntv_stack != 0);
+
+ /*
+ * The "brand-lx-set-ntv-stack-current" probe has arguments:
+ * arg0: stack pointer before change
+ * arg1: stack pointer after change
+ * arg2: current stack base
+ */
+ DTRACE_PROBE3(brand__lx__set__ntv__stack__current,
+ uintptr_t, lwpd->br_ntv_stack_current,
+ uintptr_t, new_sp,
+ uintptr_t, lwpd->br_ntv_stack);
+
+ lwpd->br_ntv_stack_current = new_sp;
+}
+
+#if defined(_LP64)
+static int
+lx_pagefault(proc_t *p, klwp_t *lwp, caddr_t addr, enum fault_type type,
+ enum seg_rw rw)
+{
+ int syscall_num;
+
+ /*
+ * We only want to handle a very specific set of circumstances.
+ * Namely: this is a 64-bit LX-branded process attempting to execute an
+ * address in a page for which it does not have a valid mapping. If
+ * this is not the case, we bail out as fast as possible.
+ */
+ VERIFY(PROC_IS_BRANDED(p));
+ if (type != F_INVAL || rw != S_EXEC || lwp_getdatamodel(lwp) !=
+ DATAMODEL_NATIVE) {
+ return (-1);
+ }
+
+ if (!lx_vsyscall_iscall(lwp, (uintptr_t)addr, &syscall_num)) {
+ return (-1);
+ }
+
+ /*
+ * This is a valid vsyscall address. We service the system call and
+ * return 0 to signal that the pagefault has been handled completely.
+ */
+ lx_vsyscall_enter(p, lwp, syscall_num);
+ return (0);
+}
+#endif
+
+/*
+ * This hook runs prior to sendsig() processing and allows us to nominate
+ * an alternative stack pointer for delivery of the signal handling frame.
+ * Critically, this routine should _not_ modify any LWP state as the
+ * savecontext() does not run until after this hook.
+ */
+static caddr_t
+lx_sendsig_stack(int sig)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+
+ /*
+ * We want to take signal delivery on the native stack, but only if
+ * one has been allocated and installed for this LWP.
+ */
+ if (lwpd->br_stack_mode == LX_STACK_MODE_BRAND) {
+ /*
+ * The program is not running on the native stack. Return
+ * the native stack pointer from our brand-private data so
+ * that we may switch to it for signal handling.
+ */
+ return ((caddr_t)lwpd->br_ntv_stack_current);
+ } else {
+ struct regs *rp = lwptoregs(lwp);
+
+ /*
+ * Either the program is already running on the native stack,
+ * or one has not yet been allocated for this LWP. Use the
+ * current stack pointer value.
+ */
+ return ((caddr_t)rp->r_sp);
+ }
+}
+
+/*
+ * This hook runs after sendsig() processing and allows us to update the
+ * per-LWP mode flags for system calls and stacks. The pre-signal
+ * context has already been saved and delivered to the user at this point.
+ */
+static void
+lx_sendsig(int sig)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ struct regs *rp = lwptoregs(lwp);
+
+ switch (lwpd->br_stack_mode) {
+ case LX_STACK_MODE_BRAND:
+ case LX_STACK_MODE_NATIVE:
+ /*
+ * In lx_sendsig_stack(), we nominated a stack pointer from the
+ * native stack. Update the stack mode, and the current in-use
+ * extent of the native stack, accordingly:
+ */
+ lwpd->br_stack_mode = LX_STACK_MODE_NATIVE;
+ lx_lwp_set_native_stack_current(lwpd, rp->r_sp);
+
+ /*
+ * Fix up segment registers, etc.
+ */
+ lx_switch_to_native(lwp);
+ break;
+
+ default:
+ /*
+ * Otherwise, the brand library has not yet installed the
+ * alternate stack for this LWP. Signals will be handled on
+ * the regular stack thread.
+ */
+ return;
+ }
+}
+
+/*
+ * This hook runs prior to the context restoration, allowing us to take action
+ * or modify the context before it is loaded.
+ */
+static void
+lx_restorecontext(ucontext_t *ucp)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ uintptr_t flags = (uintptr_t)ucp->uc_brand_data[0];
+ caddr_t sp = ucp->uc_brand_data[1];
+
+ /*
+ * We have a saved native stack pointer value that we must restore
+ * into the per-LWP data.
+ */
+ if (flags & LX_UC_RESTORE_NATIVE_SP) {
+ lx_lwp_set_native_stack_current(lwpd, (uintptr_t)sp);
+ }
+
+ /*
+ * We do not wish to restore the value of uc_link in this context,
+ * so replace it with the value currently in the LWP.
+ */
+ if (flags & LX_UC_IGNORE_LINK) {
+ ucp->uc_link = (ucontext_t *)lwp->lwp_oldcontext;
+ }
+
+ /*
+ * Restore the stack mode:
+ */
+ if (flags & LX_UC_STACK_NATIVE) {
+ lwpd->br_stack_mode = LX_STACK_MODE_NATIVE;
+ } else if (flags & LX_UC_STACK_BRAND) {
+ lwpd->br_stack_mode = LX_STACK_MODE_BRAND;
+ }
+
+#if defined(__amd64)
+ /*
+ * Override the fs/gsbase in the context with the value provided
+ * through the Linux arch_prctl(2) system call.
+ */
+ if (flags & LX_UC_STACK_BRAND) {
+ if (lwpd->br_lx_fsbase != 0) {
+ ucp->uc_mcontext.gregs[REG_FSBASE] = lwpd->br_lx_fsbase;
+ }
+ if (lwpd->br_lx_gsbase != 0) {
+ ucp->uc_mcontext.gregs[REG_GSBASE] = lwpd->br_lx_gsbase;
+ }
+ }
+#endif
+}
+
+static void
+lx_savecontext(ucontext_t *ucp)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ uintptr_t flags = 0;
+
+ /*
+ * The ucontext_t affords us three private pointer-sized members in
+ * "uc_brand_data". We pack a variety of flags into the first element,
+ * and an optional stack pointer in the second element. The flags
+ * determine which stack pointer (native or brand), if any, is stored
+ * in the second element. The third element may contain the system
+ * call number; this is analogous to the "orig_[er]ax" member of a
+ * Linux "user_regs_struct".
+ */
+
+ if (lwpd->br_stack_mode != LX_STACK_MODE_INIT &&
+ lwpd->br_stack_mode != LX_STACK_MODE_PREINIT) {
+ /*
+ * Record the value of the native stack pointer to restore
+ * when returning to this branded context:
+ */
+ flags |= LX_UC_RESTORE_NATIVE_SP;
+ ucp->uc_brand_data[1] = (void *)lwpd->br_ntv_stack_current;
+ }
+
+ /*
+ * Save the stack mode:
+ */
+ if (lwpd->br_stack_mode == LX_STACK_MODE_NATIVE) {
+ flags |= LX_UC_STACK_NATIVE;
+ } else if (lwpd->br_stack_mode == LX_STACK_MODE_BRAND) {
+ flags |= LX_UC_STACK_BRAND;
+ }
+
+ /*
+ * If we might need to restart this system call, save that information
+ * in the context:
+ */
+ if (lwpd->br_stack_mode == LX_STACK_MODE_BRAND) {
+ ucp->uc_brand_data[2] =
+ (void *)(uintptr_t)lwpd->br_syscall_num;
+ if (lwpd->br_syscall_restart) {
+ flags |= LX_UC_RESTART_SYSCALL;
+ }
+ } else {
+ ucp->uc_brand_data[2] = NULL;
+ }
+
+ ucp->uc_brand_data[0] = (void *)flags;
+}
+
+#if defined(_SYSCALL32_IMPL)
+static void
+lx_savecontext32(ucontext32_t *ucp)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ unsigned int flags = 0;
+
+ /*
+ * The ucontext_t affords us three private pointer-sized members in
+ * "uc_brand_data". We pack a variety of flags into the first element,
+ * and an optional stack pointer in the second element. The flags
+ * determine which stack pointer (native or brand), if any, is stored
+ * in the second element. The third element may contain the system
+ * call number; this is analogous to the "orig_[er]ax" member of a
+ * Linux "user_regs_struct".
+ */
+
+ if (lwpd->br_stack_mode != LX_STACK_MODE_INIT &&
+ lwpd->br_stack_mode != LX_STACK_MODE_PREINIT) {
+ /*
+ * Record the value of the native stack pointer to restore
+ * when returning to this branded context:
+ */
+ flags |= LX_UC_RESTORE_NATIVE_SP;
+ ucp->uc_brand_data[1] = (caddr32_t)lwpd->br_ntv_stack_current;
+ }
+
+ /*
+ * Save the stack mode:
+ */
+ if (lwpd->br_stack_mode == LX_STACK_MODE_NATIVE) {
+ flags |= LX_UC_STACK_NATIVE;
+ } else if (lwpd->br_stack_mode == LX_STACK_MODE_BRAND) {
+ flags |= LX_UC_STACK_BRAND;
+ }
+
+ /*
+ * If we might need to restart this system call, save that information
+ * in the context:
+ */
+ if (lwpd->br_stack_mode == LX_STACK_MODE_BRAND) {
+ ucp->uc_brand_data[2] = (caddr32_t)lwpd->br_syscall_num;
+ if (lwpd->br_syscall_restart) {
+ flags |= LX_UC_RESTART_SYSCALL;
+ }
+ } else {
+ ucp->uc_brand_data[2] = NULL;
+ }
+
+ ucp->uc_brand_data[0] = flags;
+}
+#endif
+
+static int
+lx_zfs_ioctl(ldi_handle_t lh, int cmd, zfs_cmd_t *zc, size_t *dst_alloc_size)
+{
+ uint64_t cookie;
+ size_t dstsize;
+ int rc, unused;
+
+ cookie = zc->zc_cookie;
+
+ dstsize = (dst_alloc_size == NULL ? 0 : 8192);
+
+again:
+ if (dst_alloc_size != NULL) {
+ zc->zc_nvlist_dst = (uint64_t)(intptr_t)kmem_alloc(dstsize,
+ KM_SLEEP);
+ zc->zc_nvlist_dst_size = dstsize;
+ }
+
+ rc = ldi_ioctl(lh, cmd, (intptr_t)zc, FKIOCTL, kcred, &unused);
+ if (rc == ENOMEM && dst_alloc_size != NULL) {
+ /*
+ * Our nvlist_dst buffer was too small, retry with a bigger
+ * buffer. ZFS will tell us the exact needed size.
+ */
+ size_t newsize = zc->zc_nvlist_dst_size;
+ ASSERT(newsize > dstsize);
+
+ kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, dstsize);
+ dstsize = newsize;
+ zc->zc_cookie = cookie;
+
+ goto again;
+ }
+
+ if (dst_alloc_size != NULL) {
+ *dst_alloc_size = dstsize;
+ }
+
+ return (rc);
+}
+
+static int
+lx_zone_zfs_open(ldi_handle_t *lh, dev_t *zfs_dev)
+{
+ ldi_ident_t li;
+
+ if (ldi_ident_from_mod(&modlinkage, &li) != 0) {
+ return (-1);
+ }
+ if (ldi_open_by_name("/dev/zfs", FREAD|FWRITE, kcred, lh, li) != 0) {
+ ldi_ident_release(li);
+ return (-1);
+ }
+ ldi_ident_release(li);
+ if (ldi_get_dev(*lh, zfs_dev) != 0) {
+ ldi_close(*lh, FREAD|FWRITE, kcred);
+ return (-1);
+ }
+ return (0);
+}
+
+/*
+ * We only get the relevant properties for zvols. This is because we're
+ * essentially iterating all of the ZFS datasets/zvols on the entire system
+ * when we boot the zone and there is a significant performance penalty if we
+ * have to retrieve all of the properties for everything. Especially since we
+ * don't care about any of them except the zvols actually in our delegated
+ * datasets.
+ *
+ * Note that the two properties we care about, volsize & volblocksize, are
+ * mandatory for zvols and should always be present. Also, note that the
+ * blocksize property value cannot change after the zvol has been created.
+ */
+static void
+lx_zvol_props(ldi_handle_t lh, zfs_cmd_t *zc, uint64_t *vsz, uint64_t *bsz)
+{
+ int rc;
+ size_t size;
+ nvlist_t *nv = NULL, *nv2;
+
+ rc = lx_zfs_ioctl(lh, ZFS_IOC_OBJSET_STATS, zc, &size);
+ if (rc != 0)
+ return;
+
+ rc = nvlist_unpack((char *)(uintptr_t)zc->zc_nvlist_dst,
+ zc->zc_nvlist_dst_size, &nv, 0);
+ ASSERT(rc == 0);
+
+ kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
+ zc->zc_nvlist_dst = NULL;
+ zc->zc_nvlist_dst_size = 0;
+
+ if ((rc = nvlist_lookup_nvlist(nv, "volsize", &nv2)) == 0) {
+ uint64_t val;
+
+ rc = nvlist_lookup_uint64(nv2, ZPROP_VALUE, &val);
+ if (rc == 0) {
+ *vsz = val;
+ }
+ }
+
+ if ((rc = nvlist_lookup_nvlist(nv, "volblocksize", &nv2)) == 0) {
+ uint64_t val;
+
+ rc = nvlist_lookup_uint64(nv2, ZPROP_VALUE, &val);
+ if (rc == 0) {
+ *bsz = val;
+ }
+ }
+
+ nvlist_free(nv);
+}
+
+/*
+ * Unlike ZFS proper, which does dynamic zvols, we currently only generate the
+ * zone's "disk" list once at zone boot time and use that consistently in all
+ * of the various subsystems (devfs, sysfs, procfs). This allows us to avoid
+ * re-iterating the datasets every time one of those subsystems accesses a
+ * "disk" and allows us to keep the view consistent across all subsystems, but
+ * it does mean a reboot is required to see new "disks". This is somewhat
+ * mitigated by its similarity to actual disk drives on a real system.
+ */
+static void
+lx_zone_get_zvols(zone_t *zone, ldi_handle_t lh, minor_t *emul_minor)
+{
+ lx_zone_data_t *lxzd;
+ list_t *zvol_lst, ds_lst;
+ int rc;
+ unsigned int devnum = 0;
+ size_t size;
+ zfs_cmd_t *zc;
+ nvpair_t *elem = NULL;
+ nvlist_t *pnv = NULL;
+
+ lxzd = ztolxzd(zone);
+ ASSERT(lxzd != NULL);
+ zvol_lst = lxzd->lxzd_vdisks;
+
+ zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
+ if (lx_zfs_ioctl(lh, ZFS_IOC_POOL_CONFIGS, zc, &size) != 0) {
+ goto out;
+ }
+ ASSERT(zc->zc_cookie > 0);
+
+ rc = nvlist_unpack((char *)(uintptr_t)zc->zc_nvlist_dst,
+ zc->zc_nvlist_dst_size, &pnv, 0);
+ kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
+ if (rc != 0)
+ goto out;
+
+ /*
+ * We use a dataset list to process all of the datasets in the pool
+ * without doing recursion so that we don't risk blowing the kernel
+ * stack.
+ */
+ list_create(&ds_lst, sizeof (lx_zfs_ds_t),
+ offsetof(lx_zfs_ds_t, ds_link));
+
+ while ((elem = nvlist_next_nvpair(pnv, elem)) != NULL) {
+ lx_zfs_ds_t *ds;
+
+ ds = kmem_zalloc(sizeof (lx_zfs_ds_t), KM_SLEEP);
+ (void) strcpy(ds->ds_name, nvpair_name(elem));
+ list_insert_head(&ds_lst, ds);
+
+ while (ds != NULL) {
+ int w; /* dummy variable */
+
+ bzero(zc, sizeof (zfs_cmd_t));
+ zc->zc_cookie = ds->ds_cookie;
+ (void) strcpy(zc->zc_name, ds->ds_name);
+
+ rc = lx_zfs_ioctl(lh, ZFS_IOC_DATASET_LIST_NEXT,
+ zc, NULL);
+ /* Update the cookie before doing anything else. */
+ ds->ds_cookie = zc->zc_cookie;
+
+ if (rc != 0) {
+ list_remove(&ds_lst, ds);
+ kmem_free(ds, sizeof (lx_zfs_ds_t));
+ ds = list_tail(&ds_lst);
+ continue;
+ }
+
+ /* Reserved internal names, skip over these. */
+ if (strchr(zc->zc_name, '$') != NULL ||
+ strchr(zc->zc_name, '%') != NULL)
+ continue;
+
+ if (!zone_dataset_visible_inzone(zone, zc->zc_name, &w))
+ continue;
+
+ if (zc->zc_objset_stats.dds_type == DMU_OST_ZVOL) {
+ lx_virt_disk_t *vd;
+ minor_t m = 0;
+ char *znm = zc->zc_name;
+
+ /* Create a virtual disk entry for the zvol */
+ vd = kmem_zalloc(sizeof (lx_virt_disk_t),
+ KM_SLEEP);
+ vd->lxvd_type = LXVD_ZVOL;
+ (void) snprintf(vd->lxvd_name,
+ sizeof (vd->lxvd_name),
+ "zvol%u", devnum++);
+ (void) strlcpy(vd->lxvd_real_name,
+ zc->zc_name,
+ sizeof (vd->lxvd_real_name));
+
+ /* Record emulated and real dev_t values */
+ vd->lxvd_emul_dev = makedevice(LX_MAJOR_DISK,
+ (*emul_minor)++);
+ if (zvol_name2minor(znm, &m) != 0) {
+ (void) zvol_create_minor(znm);
+ zvol_name2minor(znm, &m);
+ }
+ if (m != 0) {
+ vd->lxvd_real_dev = makedevice(
+ getmajor(lxzd->lxzd_zfs_dev), m);
+ }
+
+ /* Query volume size properties */
+ lx_zvol_props(lh, zc, &vd->lxvd_volsize,
+ &vd->lxvd_blksize);
+
+ list_insert_tail(zvol_lst, vd);
+ } else {
+ lx_zfs_ds_t *nds;
+
+ /* Create a new ds_t for the child. */
+ nds = kmem_zalloc(sizeof (lx_zfs_ds_t),
+ KM_SLEEP);
+ (void) strcpy(nds->ds_name, zc->zc_name);
+ list_insert_after(&ds_lst, ds, nds);
+
+ /* Depth-first, so do the one just created. */
+ ds = nds;
+ }
+ }
+
+ ASSERT(list_is_empty(&ds_lst));
+ }
+
+ list_destroy(&ds_lst);
+
+out:
+ nvlist_free(pnv);
+ kmem_free(zc, sizeof (zfs_cmd_t));
+}
+
+static void
+lx_zone_get_zfsds(zone_t *zone, minor_t *emul_minor)
+{
+ lx_zone_data_t *lxzd = ztolxzd(zone);
+ vfs_t *vfsp = zone->zone_rootvp->v_vfsp;
+
+ /*
+ * Only the root will be mounted at zone init time.
+ * Finding means of discovering other datasets mounted in the zone
+ * would be a good enhancement later.
+ */
+ if (getmajor(vfsp->vfs_dev) == getmajor(lxzd->lxzd_zfs_dev)) {
+ lx_virt_disk_t *vd;
+
+ vd = kmem_zalloc(sizeof (lx_virt_disk_t), KM_SLEEP);
+ vd->lxvd_type = LXVD_ZFS_DS;
+ vd->lxvd_real_dev = vfsp->vfs_dev;
+ vd->lxvd_emul_dev = makedevice(LX_MAJOR_DISK, (*emul_minor)++);
+ snprintf(vd->lxvd_name, sizeof (vd->lxvd_name),
+ "zfsds%u", 0);
+ (void) strlcpy(vd->lxvd_real_name,
+ refstr_value(vfsp->vfs_resource),
+ sizeof (vd->lxvd_real_name));
+
+ list_insert_tail(lxzd->lxzd_vdisks, vd);
+ }
+}
+
+/* Cleanup virtual disk list */
+static void
+lx_zone_cleanup_vdisks(lx_zone_data_t *lxzd)
+{
+ lx_virt_disk_t *vd;
+
+ ASSERT(lxzd->lxzd_vdisks != NULL);
+ vd = (list_remove_head(lxzd->lxzd_vdisks));
+ while (vd != NULL) {
+ kmem_free(vd, sizeof (lx_virt_disk_t));
+ vd = list_remove_head(lxzd->lxzd_vdisks);
+ }
+
+ list_destroy(lxzd->lxzd_vdisks);
+ kmem_free(lxzd->lxzd_vdisks, sizeof (list_t));
+ lxzd->lxzd_vdisks = NULL;
+}
+
+void
+lx_init_brand_data(zone_t *zone, kmutex_t *zsl)
+{
+ lx_zone_data_t *data;
+ ldi_handle_t lh;
+
+ ASSERT(MUTEX_HELD(zsl));
+ ASSERT(zone->zone_brand == &lx_brand);
+ ASSERT(zone->zone_brand_data == NULL);
+
+ data = (lx_zone_data_t *)kmem_zalloc(sizeof (lx_zone_data_t), KM_SLEEP);
+ mutex_init(&data->lxzd_lock, NULL, MUTEX_DEFAULT, NULL);
+
+ /* No need to hold mutex now since zone_brand_data is not set yet. */
+
+ /*
+ * Set the default lxzd_kernel_version to 2.4.
+ * This can be changed by a call to setattr() during zone boot.
+ */
+ (void) strlcpy(data->lxzd_kernel_release, "2.4.21",
+ LX_KERN_RELEASE_MAX);
+ (void) strlcpy(data->lxzd_kernel_version, "BrandZ virtual linux",
+ LX_KERN_VERSION_MAX);
+
+ zone->zone_brand_data = data;
+
+ /*
+ * In Linux, if the init(1) process terminates the system panics.
+ * The zone must reboot to simulate this behaviour.
+ */
+ zone->zone_reboot_on_init_exit = B_TRUE;
+
+ /*
+ * We cannot hold the zone_status_lock while performing zfs operations
+ * so we drop the lock, get the zfs devs as the last step in this
+ * function, then reaquire the lock. Don't add any code after this
+ * which requires that the zone_status_lock was continuously held.
+ */
+ mutex_exit(zsl);
+
+ data->lxzd_vdisks = kmem_alloc(sizeof (list_t), KM_SLEEP);
+ list_create(data->lxzd_vdisks, sizeof (lx_virt_disk_t),
+ offsetof(lx_virt_disk_t, lxvd_link));
+
+ if (lx_zone_zfs_open(&lh, &data->lxzd_zfs_dev) == 0) {
+ minor_t emul_minor = 1;
+
+ lx_zone_get_zfsds(zone, &emul_minor);
+ lx_zone_get_zvols(zone, lh, &emul_minor);
+ ldi_close(lh, FREAD|FWRITE, kcred);
+ } else {
+ /* Avoid matching any devices */
+ data->lxzd_zfs_dev = makedevice(-1, 0);
+ }
+ mutex_enter(zsl);
+}
+
+void
+lx_free_brand_data(zone_t *zone)
+{
+ lx_zone_data_t *data = ztolxzd(zone);
+ ASSERT(data != NULL);
+ mutex_enter(&data->lxzd_lock);
+ if (data->lxzd_ioctl_sock != NULL) {
+ /*
+ * Since zone_kcred has been cleaned up already, close the
+ * socket using the global kcred.
+ */
+ ksocket_close(data->lxzd_ioctl_sock, kcred);
+ data->lxzd_ioctl_sock = NULL;
+ }
+ ASSERT(data->lxzd_cgroup == NULL);
+
+ lx_zone_cleanup_vdisks(data);
+
+ mutex_exit(&data->lxzd_lock);
+ zone->zone_brand_data = NULL;
+ mutex_destroy(&data->lxzd_lock);
+ kmem_free(data, sizeof (*data));
+}
+
+void
+lx_unsupported(char *dmsg)
+{
+ lx_proc_data_t *pd = ttolxproc(curthread);
+
+ DTRACE_PROBE1(brand__lx__unsupported, char *, dmsg);
+
+ if (pd != NULL && (pd->l_flags & LX_PROC_STRICT_MODE) != 0) {
+ /*
+ * If this process was run with strict mode enabled
+ * (via LX_STRICT in the environment), we mark this
+ * LWP as having triggered an unsupported behaviour.
+ * This flag will be checked at an appropriate point
+ * by lx_check_strict_failure().
+ */
+ lx_lwp_data_t *lwpd = ttolxlwp(curthread);
+
+ lwpd->br_strict_failure = B_TRUE;
+ }
+}
+
+void
+lx_check_strict_failure(lx_lwp_data_t *lwpd)
+{
+ proc_t *p;
+
+ if (!lwpd->br_strict_failure) {
+ return;
+ }
+
+ lwpd->br_strict_failure = B_FALSE;
+
+ /*
+ * If this process is operating in strict mode (via LX_STRICT in
+ * the environment), and has triggered a call to
+ * lx_unsupported(), we drop SIGSYS on it as we return.
+ */
+ p = curproc;
+ mutex_enter(&p->p_lock);
+ sigtoproc(p, curthread, SIGSYS);
+ mutex_exit(&p->p_lock);
+}
+
+void
+lx_trace_sysenter(int syscall_num, uintptr_t *args)
+{
+ if (lx_systrace_enabled) {
+ VERIFY(lx_systrace_entry_ptr != NULL);
+
+ (*lx_systrace_entry_ptr)(syscall_num, args[0], args[1],
+ args[2], args[3], args[4], args[5]);
+ }
+}
+
+void
+lx_trace_sysreturn(int syscall_num, long ret)
+{
+ if (lx_systrace_enabled) {
+ VERIFY(lx_systrace_return_ptr != NULL);
+
+ (*lx_systrace_return_ptr)(syscall_num, ret, ret, 0, 0, 0, 0);
+ }
+}
+
+/*
+ * Get the addresses of the user-space system call handler and attach it to
+ * the proc structure. Returning 0 indicates success; the value returned
+ * by the system call is the value stored in rval. Returning a non-zero
+ * value indicates a failure; the value returned is used to set errno, -1
+ * is returned from the syscall and the contents of rval are ignored. To
+ * set errno and have the syscall return a value other than -1 we can
+ * manually set errno and rval and return 0.
+ */
+int
+lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
+ uintptr_t arg3, uintptr_t arg4, uintptr_t arg5)
+{
+ kthread_t *t = curthread;
+ klwp_t *lwp = ttolwp(t);
+ proc_t *p = ttoproc(t);
+ lx_proc_data_t *pd;
+ struct termios *termios;
+ uint_t termios_len;
+ int error;
+ int code;
+ int sig;
+ lx_brand_registration_t reg;
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+
+ /*
+ * There is one operation that is suppored for non-branded
+ * process. B_EXEC_BRAND. This is the equilivant of an
+ * exec call, but the new process that is created will be
+ * a branded process.
+ */
+ if (cmd == B_EXEC_BRAND) {
+ VERIFY(p->p_zone != NULL);
+ VERIFY(p->p_zone->zone_brand == &lx_brand);
+ return (exec_common(
+ (char *)arg1, (const char **)arg2, (const char **)arg3,
+ EBA_BRAND));
+ }
+
+ /* For all other operations this must be a branded process. */
+ if (p->p_brand == NULL)
+ return (ENOSYS);
+
+ VERIFY(p->p_brand == &lx_brand);
+ VERIFY(p->p_brand_data != NULL);
+
+ switch (cmd) {
+ case B_REGISTER:
+ if (lwpd->br_stack_mode != LX_STACK_MODE_PREINIT) {
+ lx_print("stack mode was not PREINIT during "
+ "REGISTER\n");
+ return (EINVAL);
+ }
+
+ if (p->p_model == DATAMODEL_NATIVE) {
+ if (copyin((void *)arg1, &reg, sizeof (reg)) != 0) {
+ lx_print("Failed to copyin brand registration "
+ "at 0x%p\n", (void *)arg1);
+ return (EFAULT);
+ }
+ }
+#ifdef _LP64
+ else {
+ /* 32-bit userland on 64-bit kernel */
+ lx_brand_registration32_t reg32;
+
+ if (copyin((void *)arg1, &reg32, sizeof (reg32)) != 0) {
+ lx_print("Failed to copyin brand registration "
+ "at 0x%p\n", (void *)arg1);
+ return (EFAULT);
+ }
+
+ reg.lxbr_version = (uint_t)reg32.lxbr_version;
+ reg.lxbr_handler =
+ (void *)(uintptr_t)reg32.lxbr_handler;
+ reg.lxbr_flags = reg32.lxbr_flags;
+ }
+#endif
+
+ if (reg.lxbr_version != LX_VERSION_1) {
+ lx_print("Invalid brand library version (%u)\n",
+ reg.lxbr_version);
+ return (EINVAL);
+ }
+
+ if ((reg.lxbr_flags & ~LX_PROC_ALL) != 0) {
+ lx_print("Invalid brand flags (%u)\n",
+ reg.lxbr_flags);
+ return (EINVAL);
+ }
+
+ lx_print("Assigning brand 0x%p and handler 0x%p to proc 0x%p\n",
+ (void *)&lx_brand, (void *)reg.lxbr_handler, (void *)p);
+ pd = p->p_brand_data;
+ pd->l_handler = (uintptr_t)reg.lxbr_handler;
+ pd->l_flags = reg.lxbr_flags & LX_PROC_ALL;
+
+ return (0);
+
+ case B_TTYMODES:
+ /* This is necessary for emulating TCGETS ioctls. */
+ if (ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, ddi_root_node(),
+ DDI_PROP_NOTPROM, "ttymodes", (uchar_t **)&termios,
+ &termios_len) != DDI_SUCCESS)
+ return (EIO);
+
+ ASSERT(termios_len == sizeof (*termios));
+
+ if (copyout(&termios, (void *)arg1, sizeof (termios)) != 0) {
+ ddi_prop_free(termios);
+ return (EFAULT);
+ }
+
+ ddi_prop_free(termios);
+ return (0);
+
+ case B_ELFDATA: {
+ mutex_enter(&p->p_lock);
+ pd = curproc->p_brand_data;
+ if (get_udatamodel() == DATAMODEL_NATIVE) {
+ lx_elf_data_t led;
+
+ bcopy(&pd->l_elf_data, &led, sizeof (led));
+ mutex_exit(&p->p_lock);
+
+ if (copyout(&led, (void *)arg1,
+ sizeof (lx_elf_data_t)) != 0) {
+ return (EFAULT);
+ }
+ }
+#if defined(_LP64)
+ else {
+ /* 32-bit userland on 64-bit kernel */
+ lx_elf_data32_t led32;
+
+ led32.ed_phdr = (int)pd->l_elf_data.ed_phdr;
+ led32.ed_phent = (int)pd->l_elf_data.ed_phent;
+ led32.ed_phnum = (int)pd->l_elf_data.ed_phnum;
+ led32.ed_entry = (int)pd->l_elf_data.ed_entry;
+ led32.ed_base = (int)pd->l_elf_data.ed_base;
+ led32.ed_ldentry = (int)pd->l_elf_data.ed_ldentry;
+ mutex_exit(&p->p_lock);
+
+ if (copyout(&led32, (void *)arg1,
+ sizeof (led32)) != 0) {
+ return (EFAULT);
+ }
+ }
+#endif
+ return (0);
+ }
+
+ case B_EXEC_NATIVE:
+ return (exec_common((char *)arg1, (const char **)arg2,
+ (const char **)arg3, EBA_NATIVE));
+
+ /*
+ * The B_TRUSS_POINT subcommand is used so that we can make a no-op
+ * syscall for debugging purposes (dtracing) from within the user-level
+ * emulation.
+ */
+ case B_TRUSS_POINT:
+ return (0);
+
+ case B_LPID_TO_SPAIR: {
+ /*
+ * Given a Linux pid as arg1, return the Solaris pid in arg2 and
+ * the Solaris LWP in arg3. We also translate pid 1 (which is
+ * hardcoded in many applications) to the zone's init process.
+ */
+ pid_t s_pid;
+ id_t s_tid;
+
+ if ((pid_t)arg1 == 1) {
+ s_pid = p->p_zone->zone_proc_initpid;
+ /* handle the dead/missing init(1M) case */
+ if (s_pid == -1)
+ s_pid = 1;
+ s_tid = 1;
+ } else if (lx_lpid_to_spair((pid_t)arg1, &s_pid, &s_tid) < 0) {
+ return (ESRCH);
+ }
+
+ if (copyout(&s_pid, (void *)arg2, sizeof (s_pid)) != 0 ||
+ copyout(&s_tid, (void *)arg3, sizeof (s_tid)) != 0) {
+ return (EFAULT);
+ }
+
+ return (0);
+ }
+
+ case B_SIGEV_THREAD_ID: {
+ /*
+ * Emulate Linux's timer_create(2) SIGEV_THREAD_ID
+ * notification method. This mechanism is only meant
+ * for userland threading libraries such as glibc and
+ * is documented as such. Therefore, assume this is
+ * only ever invoked for the purpose of alerting a
+ * Linux threading library. Assume that the tid is a
+ * member of the caller's process and the signal
+ * number is valid. See lx_sigev_thread_id() for the
+ * userland side of this emulation.
+ *
+ * The return code from this function is not checked
+ * by the caller since it executes in an asynchronous
+ * context and there is nothing much to be done. If
+ * this function does fail then it will manifest as
+ * Linux threads waiting for a signal they will never
+ * receive.
+ *
+ * arg1 -- Linux tid
+ * arg2 -- Linux signal number
+ * arg3 -- sigval pointer
+ */
+
+ int native_sig = lx_ltos_signo((int)arg2, 0);
+ pid_t native_pid;
+ int native_tid;
+ sigqueue_t *sqp;
+
+ if (native_sig == 0)
+ return (EINVAL);
+
+ lx_lpid_to_spair((pid_t)arg1, &native_pid, &native_tid);
+ sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
+ mutex_enter(&curproc->p_lock);
+
+ if ((t = idtot(curproc, native_tid)) == NULL) {
+ mutex_exit(&curproc->p_lock);
+ kmem_free(sqp, sizeof (sigqueue_t));
+ return (ESRCH);
+ }
+
+ sqp->sq_info.si_signo = native_sig;
+ sqp->sq_info.si_code = SI_TIMER;
+ sqp->sq_info.si_pid = curproc->p_pid;
+ sqp->sq_info.si_zoneid = getzoneid();
+ sqp->sq_info.si_uid = crgetruid(CRED());
+ sqp->sq_info.si_value.sival_ptr = (void *)arg3;
+ sigaddqa(curproc, t, sqp);
+
+ mutex_exit(&curproc->p_lock);
+
+ return (0);
+ }
+
+ case B_SET_AFFINITY_MASK:
+ case B_GET_AFFINITY_MASK:
+ /*
+ * Retrieve or store the CPU affinity mask for the
+ * requested linux pid.
+ *
+ * arg1 is a linux PID (0 means curthread).
+ * arg2 is the size of the given mask.
+ * arg3 is the address of the affinity mask.
+ */
+ return (lx_sched_affinity(cmd, arg1, arg2, arg3, rval));
+
+ case B_PTRACE_STOP_FOR_OPT:
+ return (lx_ptrace_stop_for_option((int)arg1, arg2 == 0 ?
+ B_FALSE : B_TRUE, (ulong_t)arg3, arg4));
+
+ case B_PTRACE_CLONE_BEGIN:
+ return (lx_ptrace_set_clone_inherit((int)arg1, arg2 == 0 ?
+ B_FALSE : B_TRUE));
+
+ case B_HELPER_WAITID: {
+ idtype_t idtype = (idtype_t)arg1;
+ id_t id = (id_t)arg2;
+ siginfo_t *infop = (siginfo_t *)arg3;
+ int options = (int)arg4;
+
+ lwpd = ttolxlwp(curthread);
+
+ /*
+ * Our brand-specific waitid helper only understands a subset of
+ * the possible idtypes. Ensure we keep to that subset here:
+ */
+ if (idtype != P_ALL && idtype != P_PID && idtype != P_PGID) {
+ return (EINVAL);
+ }
+
+ /*
+ * Enable the return of emulated ptrace(2) stop conditions
+ * through lx_waitid_helper, and stash the Linux-specific
+ * extra waitid() flags.
+ */
+ lwpd->br_waitid_emulate = B_TRUE;
+ lwpd->br_waitid_flags = (int)arg5;
+
+#if defined(_SYSCALL32_IMPL)
+ if (get_udatamodel() != DATAMODEL_NATIVE) {
+ return (waitsys32(idtype, id, infop, options));
+ } else
+#endif
+ {
+ return (waitsys(idtype, id, infop, options));
+ }
+
+ lwpd->br_waitid_emulate = B_FALSE;
+ lwpd->br_waitid_flags = 0;
+
+ return (0);
+ }
+
+ case B_UNSUPPORTED: {
+ char dmsg[256];
+
+ if (copyin((void *)arg1, &dmsg, sizeof (dmsg)) != 0) {
+ lx_print("Failed to copyin unsupported msg "
+ "at 0x%p\n", (void *)arg1);
+ return (EFAULT);
+ }
+ dmsg[255] = '\0';
+ lx_unsupported(dmsg);
+
+ lx_check_strict_failure(lwpd);
+
+ return (0);
+ }
+
+ case B_STORE_ARGS: {
+ /*
+ * B_STORE_ARGS subcommand
+ * arg1 = address of struct to be copied in
+ * arg2 = size of the struct being copied in
+ * arg3-arg6 ignored
+ * rval = the amount of data copied.
+ */
+ void *buf;
+
+ /* only have upper limit because arg2 is unsigned */
+ if (arg2 > LX_BR_ARGS_SIZE_MAX) {
+ return (EINVAL);
+ }
+
+ buf = kmem_alloc(arg2, KM_SLEEP);
+ if (copyin((void *)arg1, buf, arg2) != 0) {
+ lx_print("Failed to copyin scall arg at 0x%p\n",
+ (void *) arg1);
+ kmem_free(buf, arg2);
+ /*
+ * Purposely not setting br_scall_args to NULL
+ * to preserve data for debugging.
+ */
+ return (EFAULT);
+ }
+
+ if (lwpd->br_scall_args != NULL) {
+ ASSERT(lwpd->br_args_size > 0);
+ kmem_free(lwpd->br_scall_args,
+ lwpd->br_args_size);
+ }
+
+ lwpd->br_scall_args = buf;
+ lwpd->br_args_size = arg2;
+ *rval = arg2;
+ return (0);
+ }
+
+ case B_HELPER_CLONE:
+ return (lx_helper_clone(rval, arg1, (void *)arg2, (void *)arg3,
+ (void *)arg4));
+
+ case B_HELPER_SETGROUPS:
+ return (lx_helper_setgroups(arg1, (gid_t *)arg2));
+
+ case B_HELPER_SIGQUEUE:
+ return (lx_helper_rt_sigqueueinfo(arg1, arg2,
+ (siginfo_t *)arg3));
+
+ case B_HELPER_TGSIGQUEUE:
+ return (lx_helper_rt_tgsigqueueinfo(arg1, arg2, arg3,
+ (siginfo_t *)arg4));
+
+ case B_SET_THUNK_PID:
+ lwpd->br_lx_thunk_pid = arg1;
+ return (0);
+
+ case B_GETPID:
+ /*
+ * The usermode clone(2) code needs to be able to call
+ * lx_getpid() from native code:
+ */
+ *rval = lx_getpid();
+ return (0);
+
+ case B_SET_NATIVE_STACK:
+ /*
+ * B_SET_NATIVE_STACK subcommand
+ * arg1 = the base of the stack to use for emulation
+ */
+ if (lwpd->br_stack_mode != LX_STACK_MODE_PREINIT) {
+ lx_print("B_SET_NATIVE_STACK when stack was already "
+ "set to %p\n", (void *)arg1);
+ return (EEXIST);
+ }
+
+ /*
+ * We move from the PREINIT state, where we have no brand
+ * emulation stack, to the INIT state. Here, we are still
+ * running on what will become the BRAND stack, but are running
+ * emulation (i.e. native) code. Once the initialisation
+ * process for this thread has finished, we will jump to
+ * brand-specific code, while moving to the BRAND mode.
+ *
+ * When a new LWP is created, lx_initlwp() will clear the
+ * stack data. If that LWP is actually being duplicated
+ * into a child process by fork(2), lx_forklwp() will copy
+ * it so that the cloned thread will keep using the same
+ * alternate stack.
+ */
+ lwpd->br_ntv_stack = arg1;
+ lwpd->br_stack_mode = LX_STACK_MODE_INIT;
+ lx_lwp_set_native_stack_current(lwpd, arg1);
+
+ return (0);
+
+ case B_GET_CURRENT_CONTEXT:
+ /*
+ * B_GET_CURRENT_CONTEXT subcommand:
+ * arg1 = address for pointer to current ucontext_t
+ */
+
+#if defined(_SYSCALL32_IMPL)
+ if (get_udatamodel() != DATAMODEL_NATIVE) {
+ caddr32_t addr = (caddr32_t)lwp->lwp_oldcontext;
+
+ error = copyout(&addr, (void *)arg1, sizeof (addr));
+ } else
+#endif
+ {
+ error = copyout(&lwp->lwp_oldcontext, (void *)arg1,
+ sizeof (lwp->lwp_oldcontext));
+ }
+
+ return (error != 0 ? EFAULT : 0);
+
+ case B_JUMP_TO_LINUX:
+ /*
+ * B_JUMP_TO_LINUX subcommand:
+ * arg1 = ucontext_t pointer for jump state
+ */
+
+ if (arg1 == NULL)
+ return (EINVAL);
+
+ switch (lwpd->br_stack_mode) {
+ case LX_STACK_MODE_NATIVE: {
+ struct regs *rp = lwptoregs(lwp);
+
+ /*
+ * We are on the NATIVE stack, so we must preserve
+ * the extent of that stack. The pointer will be
+ * reset by a future setcontext().
+ */
+ lx_lwp_set_native_stack_current(lwpd,
+ (uintptr_t)rp->r_sp);
+ break;
+ }
+
+ case LX_STACK_MODE_INIT:
+ /*
+ * The LWP is transitioning to Linux code for the first
+ * time.
+ */
+ break;
+
+ case LX_STACK_MODE_PREINIT:
+ /*
+ * This LWP has not installed an alternate stack for
+ * usermode emulation handling.
+ */
+ return (ENOENT);
+
+ case LX_STACK_MODE_BRAND:
+ /*
+ * The LWP should not be on the BRAND stack.
+ */
+ exit(CLD_KILLED, SIGSYS);
+ return (0);
+ }
+
+ /*
+ * Transfer control to Linux:
+ */
+ return (lx_runexe(lwp, (void *)arg1));
+
+ case B_EMULATION_DONE:
+ /*
+ * B_EMULATION_DONE subcommand:
+ * arg1 = ucontext_t * to restore
+ * arg2 = system call number
+ * arg3 = return code
+ * arg4 = if operation failed, the errno value
+ */
+
+ /*
+ * The first part of this operation is a setcontext() to
+ * restore the register state to the copy we preserved
+ * before vectoring to the usermode emulation routine.
+ * If that fails, we return (hopefully) to the emulation
+ * routine and it will handle the error.
+ */
+#if (_SYSCALL32_IMPL)
+ if (get_udatamodel() != DATAMODEL_NATIVE) {
+ error = getsetcontext32(SETCONTEXT, (void *)arg1);
+ } else
+#endif
+ {
+ error = getsetcontext(SETCONTEXT, (void *)arg1);
+ }
+
+ if (error != 0) {
+ return (error);
+ }
+
+ /*
+ * The saved Linux context has been restored. We handle the
+ * return value or errno with code common to the in-kernel
+ * system call emulation.
+ */
+ if ((error = (int)arg4) != 0) {
+ /*
+ * lx_syscall_return() looks at the errno in the LWP,
+ * so set it here:
+ */
+ set_errno(error);
+ }
+ lx_syscall_return(ttolwp(curthread), (int)arg2, (long)arg3);
+
+ return (0);
+
+ case B_EXIT_AS_SIG:
+ code = CLD_KILLED;
+ sig = (int)arg1;
+ proc_is_exiting(p);
+ if (exitlwps(1) != 0) {
+ mutex_enter(&p->p_lock);
+ lwp_exit();
+ }
+ ttolwp(curthread)->lwp_cursig = sig;
+ if (sig == SIGSEGV) {
+ if (core(sig, 0) == 0)
+ code = CLD_DUMPED;
+ }
+ exit(code, sig);
+ /* NOTREACHED */
+ break;
+
+ case B_OVERRIDE_KERN_VER: {
+ void *urel = (void *)arg1;
+ void *uver = (void *)arg2;
+ size_t len;
+
+ pd = ptolxproc(p);
+ if (urel != NULL) {
+ if (copyinstr(urel, pd->l_uname_release,
+ LX_KERN_RELEASE_MAX, &len) != 0) {
+ return (EFAULT);
+ }
+ pd->l_uname_release[LX_KERN_RELEASE_MAX - 1] = '\0';
+ }
+ if (uver != NULL) {
+ if (copyinstr(uver, pd->l_uname_version,
+ LX_KERN_VERSION_MAX, &len) != 0) {
+ return (EFAULT);
+ }
+ pd->l_uname_version[LX_KERN_VERSION_MAX - 1] = '\0';
+ }
+
+ return (0);
+ }
+
+ case B_GET_PERSONALITY: {
+ unsigned int result;
+
+ mutex_enter(&p->p_lock);
+ pd = ptolxproc(p);
+ result = pd->l_personality;
+ mutex_exit(&p->p_lock);
+ return (result);
+ }
+
+ }
+
+ return (EINVAL);
+}
+
+/*
+ * Compare linux kernel version to the one set for the zone.
+ * Returns greater than 0 if zone version is higher, less than 0 if the zone
+ * version is lower, and 0 if the versions are equal.
+ */
+int
+lx_kern_release_cmp(zone_t *zone, const char *vers)
+{
+ int zvers[3] = {0, 0, 0};
+ int cvers[3] = {0, 0, 0};
+ int i;
+ lx_zone_data_t *lxzd = (lx_zone_data_t *)zone->zone_brand_data;
+
+ VERIFY(zone->zone_brand == &lx_brand);
+
+ mutex_enter(&lxzd->lxzd_lock);
+ (void) sscanf(lxzd->lxzd_kernel_release, "%d.%d.%d", &zvers[0],
+ &zvers[1], &zvers[2]);
+ mutex_exit(&lxzd->lxzd_lock);
+ (void) sscanf(vers, "%d.%d.%d", &cvers[0], &cvers[1], &cvers[2]);
+
+ for (i = 0; i < 3; i++) {
+ if (zvers[i] > cvers[i]) {
+ return (1);
+ } else if (zvers[i] < cvers[i]) {
+ return (-1);
+ }
+ }
+ return (0);
+}
+
+/*
+ * Linux unconditionally removes the setuid and setgid bits when changing
+ * file ownership. This brand hook overrides the illumos native behaviour,
+ * which is based on the PRIV_FILE_SETID privilege.
+ */
+static int
+lx_setid_clear(vattr_t *vap, cred_t *cr)
+{
+ if (S_ISDIR(vap->va_mode)) {
+ return (0);
+ }
+
+ if (vap->va_mode & S_ISUID) {
+ vap->va_mask |= AT_MODE;
+ vap->va_mode &= ~S_ISUID;
+ }
+ if ((vap->va_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
+ vap->va_mask |= AT_MODE;
+ vap->va_mode &= ~S_ISGID;
+ }
+
+ return (0);
+}
+
+/*
+ * Copy the per-process brand data from a parent proc to a child.
+ */
+void
+lx_copy_procdata(proc_t *cp, proc_t *pp)
+{
+ lx_proc_data_t *cpd, *ppd;
+
+ /*
+ * Since b_copy_procdata is called during getproc(), while the child
+ * process is still being initialized, acquiring cp->p_lock should not
+ * be required.
+ */
+ VERIFY(cp->p_brand == &lx_brand);
+ VERIFY(cpd = cp->p_brand_data);
+
+ mutex_enter(&pp->p_lock);
+ VERIFY(pp->p_brand == &lx_brand);
+ VERIFY(ppd = pp->p_brand_data);
+
+ bcopy(ppd, cpd, sizeof (lx_proc_data_t));
+ mutex_exit(&pp->p_lock);
+
+ /*
+ * The l_ptrace count is normally manipulated only while under holding
+ * p_lock. Since this is a freshly created process, it's safe to zero
+ * out. If it is to be inherited, the attach will occur later.
+ */
+ cpd->l_ptrace = 0;
+
+ cpd->l_fake_limits[LX_RLFAKE_LOCKS].rlim_cur = LX_RLIM64_INFINITY;
+ cpd->l_fake_limits[LX_RLFAKE_LOCKS].rlim_max = LX_RLIM64_INFINITY;
+
+ cpd->l_fake_limits[LX_RLFAKE_NICE].rlim_cur = 20;
+ cpd->l_fake_limits[LX_RLFAKE_NICE].rlim_max = 20;
+
+ cpd->l_fake_limits[LX_RLFAKE_RTPRIO].rlim_cur = LX_RLIM64_INFINITY;
+ cpd->l_fake_limits[LX_RLFAKE_RTPRIO].rlim_max = LX_RLIM64_INFINITY;
+
+ cpd->l_fake_limits[LX_RLFAKE_RTTIME].rlim_cur = LX_RLIM64_INFINITY;
+ cpd->l_fake_limits[LX_RLFAKE_RTTIME].rlim_max = LX_RLIM64_INFINITY;
+}
+
+#if defined(_LP64)
+static void
+Ehdr32to64(Elf32_Ehdr *src, Ehdr *dst)
+{
+ bcopy(src->e_ident, dst->e_ident, sizeof (src->e_ident));
+ dst->e_type = src->e_type;
+ dst->e_machine = src->e_machine;
+ dst->e_version = src->e_version;
+ dst->e_entry = src->e_entry;
+ dst->e_phoff = src->e_phoff;
+ dst->e_shoff = src->e_shoff;
+ dst->e_flags = src->e_flags;
+ dst->e_ehsize = src->e_ehsize;
+ dst->e_phentsize = src->e_phentsize;
+ dst->e_phnum = src->e_phnum;
+ dst->e_shentsize = src->e_shentsize;
+ dst->e_shnum = src->e_shnum;
+ dst->e_shstrndx = src->e_shstrndx;
+}
+#endif /* _LP64 */
+
+static void
+restoreexecenv(struct execenv *ep, stack_t *sp)
+{
+ klwp_t *lwp = ttolwp(curthread);
+
+ setexecenv(ep);
+ lwp->lwp_sigaltstack.ss_sp = sp->ss_sp;
+ lwp->lwp_sigaltstack.ss_size = sp->ss_size;
+ lwp->lwp_sigaltstack.ss_flags = sp->ss_flags;
+}
+
+extern int elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int,
+ long *, int, caddr_t, cred_t *, int *);
+
+extern int elf32exec(struct vnode *, execa_t *, uarg_t *, intpdata_t *, int,
+ long *, int, caddr_t, cred_t *, int *);
+
+static uintptr_t
+lx_map_vdso(struct uarg *args, struct cred *cred)
+{
+ int err;
+ char *fpath = LX_VDSO_PATH;
+ vnode_t *vp;
+ vattr_t attr;
+ caddr_t addr;
+
+#if defined(_LP64)
+ if (args->to_model != DATAMODEL_NATIVE) {
+ fpath = LX_VDSO_PATH32;
+ }
+#endif
+
+ /*
+ * The comm page should have been mapped in already.
+ */
+ if (args->commpage == NULL) {
+ return (NULL);
+ }
+
+ /*
+ * Ensure the VDSO library is present and appropriately sized.
+ * This lookup is started at the zone root to avoid complications for
+ * processes which have chrooted. For the specified lookup root to be
+ * used, the leading slash must be dropped from the path.
+ */
+ ASSERT(fpath[0] == '/');
+ fpath++;
+ if (lookupnameat(fpath, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp,
+ curzone->zone_rootvp) != 0) {
+ return (NULL);
+ }
+
+ /*
+ * The VDSO requires data exposed via the comm page in order to
+ * function properly. The VDSO is always mapped in at a fixed known
+ * offset from the comm page, providing an easy means to locate it.
+ */
+ addr = (caddr_t)(args->commpage - LX_VDSO_SIZE);
+ attr.va_mask = AT_SIZE;
+ if (VOP_GETATTR(vp, &attr, 0, cred, NULL) != 0 ||
+ attr.va_size > LX_VDSO_SIZE) {
+ VN_RELE(vp);
+ return (NULL);
+ }
+
+ err = execmap(vp, addr, attr.va_size, 0, 0,
+ PROT_USER|PROT_READ|PROT_EXEC, 1, 0);
+ VN_RELE(vp);
+ if (err != 0) {
+ return (NULL);
+ }
+ return ((uintptr_t)addr);
+}
+
+/*
+ * Exec routine called by elfexec() to load either 32-bit or 64-bit Linux
+ * binaries.
+ */
+static int
+lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args,
+ struct intpdata *idata, int level, long *execsz, int setid,
+ caddr_t exec_file, struct cred *cred, int *brand_action)
+{
+ int error, i;
+ vnode_t *nvp;
+ Ehdr ehdr;
+ Addr uphdr_vaddr;
+ intptr_t voffset;
+ char *interp = NULL;
+ uintptr_t ldaddr = NULL;
+ proc_t *p = ttoproc(curthread);
+ klwp_t *lwp = ttolwp(curthread);
+ lx_proc_data_t *lxpd = ptolxproc(p);
+ struct execenv env, origenv;
+ stack_t orig_sigaltstack;
+ struct user *up = PTOU(ttoproc(curthread));
+ lx_elf_data_t edp;
+ char *lib_path = LX_LIB_PATH;
+ boolean_t execstk = B_TRUE;
+ unsigned int personality;
+
+ ASSERT(p->p_brand == &lx_brand);
+ ASSERT(lxpd != NULL);
+
+ /*
+ * Start with a separate struct for ELF data instead of inheriting
+ * values from the currently running binary. This ensures that fields
+ * such as ed_base are cleared if the new binary does not utilize an
+ * interpreter.
+ */
+ bzero(&edp, sizeof (edp));
+
+#if defined(_LP64)
+ if (args->to_model != DATAMODEL_NATIVE) {
+ lib_path = LX_LIB_PATH32;
+ }
+#endif
+
+ /*
+ * Set the brandname and library name for the new process so that
+ * elfexec() puts them onto the stack.
+ */
+ args->brandname = LX_BRANDNAME;
+ args->emulator = lib_path;
+
+#if defined(_LP64)
+ /*
+ * To conform with the way Linux lays out the address space, we clamp
+ * the stack to be the top of the lower region of the x86-64 canonical
+ * form address space -- which has the side-effect of laying out the
+ * entire address space in that lower region. Note that this only
+ * matters on 64-bit processes (this value will always be greater than
+ * the size of a 32-bit address space) and doesn't actually affect
+ * USERLIMIT: if a Linux-branded processes wishes to map something
+ * into the top half of the address space, it can do so -- but with
+ * the user stack starting at the top of the bottom region, those high
+ * virtual addresses won't be used unless explicitly directed.
+ */
+ args->maxstack = lx_maxstack64;
+#endif
+
+ /*
+ * Search the binary for a PT_GNU_STACK header. The PF_X bit contained
+ * within is used to dictate protection defaults for the stack, among
+ * other things.
+ */
+ if (args->to_model == DATAMODEL_NATIVE) {
+ Ehdr ehdr;
+ Phdr *phdrp;
+ caddr_t phdrbase = NULL;
+ ssize_t phdrsize = 0;
+ int nphdrs, hsize;
+
+ if ((error = elfreadhdr(vp, cred, &ehdr, &nphdrs, &phdrbase,
+ &phdrsize)) != 0) {
+ return (error);
+ }
+
+ hsize = ehdr.e_phentsize;
+ phdrp = (Phdr *)phdrbase;
+ for (i = nphdrs; i > 0; i--) {
+ switch (phdrp->p_type) {
+ case PT_GNU_STACK:
+ if ((phdrp->p_flags & PF_X) == 0) {
+ execstk = B_FALSE;
+ }
+ break;
+ }
+ phdrp = (Phdr *)((caddr_t)phdrp + hsize);
+ }
+ kmem_free(phdrbase, phdrsize);
+ }
+#if defined(_LP64)
+ else {
+ Elf32_Ehdr ehdr;
+ Elf32_Phdr *phdrp;
+ caddr_t phdrbase = NULL;
+ ssize_t phdrsize = 0;
+ int nphdrs, hsize;
+
+ if ((error = elf32readhdr(vp, cred, &ehdr, &nphdrs, &phdrbase,
+ &phdrsize)) != 0) {
+ return (error);
+ }
+
+ hsize = ehdr.e_phentsize;
+ phdrp = (Elf32_Phdr *)phdrbase;
+ for (i = nphdrs; i > 0; i--) {
+ switch (phdrp->p_type) {
+ case PT_GNU_STACK:
+ if ((phdrp->p_flags & PF_X) == 0) {
+ execstk = B_FALSE;
+ }
+ break;
+ }
+ phdrp = (Elf32_Phdr *)((caddr_t)phdrp + hsize);
+ }
+ kmem_free(phdrbase, phdrsize);
+ }
+#endif
+
+ /*
+ * Revert the base personality while maintaining any existing flags.
+ */
+ personality = LX_PER_LINUX | (lxpd->l_personality & ~LX_PER_MASK);
+
+ /*
+ * Linux defaults to an executable stack unless the aformentioned
+ * PT_GNU_STACK entry in the elf header dictates otherwise. Enabling
+ * the READ_IMPLIES_EXEC personality flag is also implied in this case.
+ */
+ if (execstk) {
+ args->stk_prot |= PROT_EXEC;
+ args->stk_prot_override = B_TRUE;
+ personality |= LX_PER_READ_IMPLIES_EXEC;
+ }
+
+ /*
+ * We will first exec the brand library, then map in the linux
+ * executable and the linux linker.
+ */
+ if ((error = lookupname(lib_path, UIO_SYSSPACE, FOLLOW, NULLVPP,
+ &nvp))) {
+ uprintf("%s: not found.", lib_path);
+ return (error);
+ }
+
+ /*
+ * We will eventually set the p_exec member to be the vnode for the new
+ * executable when we call setexecenv(). However, if we get an error
+ * before that call we need to restore the execenv to its original
+ * values so that when we return to the caller fop_close() works
+ * properly while cleaning up from the failed exec(). Restoring the
+ * original value will also properly decrement the 2nd VN_RELE that we
+ * took on the brand library.
+ */
+ origenv.ex_bssbase = p->p_bssbase;
+ origenv.ex_brkbase = p->p_brkbase;
+ origenv.ex_brksize = p->p_brksize;
+ origenv.ex_vp = p->p_exec;
+ orig_sigaltstack.ss_sp = lwp->lwp_sigaltstack.ss_sp;
+ orig_sigaltstack.ss_size = lwp->lwp_sigaltstack.ss_size;
+ orig_sigaltstack.ss_flags = lwp->lwp_sigaltstack.ss_flags;
+
+ if (args->to_model == DATAMODEL_NATIVE) {
+ error = elfexec(nvp, uap, args, idata, INTP_MAXDEPTH + 1,
+ execsz, setid, exec_file, cred, brand_action);
+ }
+#if defined(_LP64)
+ else {
+ error = elf32exec(nvp, uap, args, idata, INTP_MAXDEPTH + 1,
+ execsz, setid, exec_file, cred, brand_action);
+ }
+#endif
+ VN_RELE(nvp);
+ if (error != 0) {
+ restoreexecenv(&origenv, &orig_sigaltstack);
+ return (error);
+ }
+
+ /*
+ * exec-ed in the brand library above.
+ * The u_auxv vectors are now setup by elfexec to point to the
+ * brand emulation library and its linker.
+ */
+
+ /*
+ * After execing the brand library (which should have implicitly mapped
+ * in the comm page), map the VDSO into the approprate place in the AS.
+ */
+ lxpd->l_vdso = lx_map_vdso(args, cred);
+
+ bzero(&env, sizeof (env));
+
+ /*
+ * map in the the Linux executable
+ */
+ if (args->to_model == DATAMODEL_NATIVE) {
+ error = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr,
+ &voffset, exec_file, &interp, &env.ex_bssbase,
+ &env.ex_brkbase, &env.ex_brksize, NULL, NULL);
+ }
+#if defined(_LP64)
+ else {
+ Elf32_Ehdr ehdr32;
+ Elf32_Addr uphdr_vaddr32;
+
+ error = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32,
+ &voffset, exec_file, &interp, &env.ex_bssbase,
+ &env.ex_brkbase, &env.ex_brksize, NULL, NULL);
+
+ Ehdr32to64(&ehdr32, &ehdr);
+
+ if (uphdr_vaddr32 == (Elf32_Addr)-1)
+ uphdr_vaddr = (Addr)-1;
+ else
+ uphdr_vaddr = uphdr_vaddr32;
+ }
+#endif
+ if (error != 0) {
+ restoreexecenv(&origenv, &orig_sigaltstack);
+
+ if (interp != NULL)
+ kmem_free(interp, MAXPATHLEN);
+
+ return (error);
+ }
+
+ /*
+ * Save off the important properties of the lx executable. The brand
+ * library will ask us for this data later, when it is ready to set
+ * things up for the lx executable.
+ */
+ edp.ed_phdr = (uphdr_vaddr == -1) ? voffset + ehdr.e_phoff :
+ voffset + uphdr_vaddr;
+ edp.ed_entry = voffset + ehdr.e_entry;
+ edp.ed_phent = ehdr.e_phentsize;
+ edp.ed_phnum = ehdr.e_phnum;
+
+ if (interp != NULL) {
+ if (ehdr.e_type == ET_DYN) {
+ /*
+ * This is a shared object executable, so we need to
+ * pick a reasonable place to put the heap. Just don't
+ * use the first page.
+ */
+ env.ex_brkbase = (caddr_t)PAGESIZE;
+ env.ex_bssbase = (caddr_t)PAGESIZE;
+ }
+
+ /*
+ * If the program needs an interpreter (most do), map it in and
+ * store relevant information about it in the aux vector, where
+ * the brand library can find it.
+ */
+ if ((error = lookupname(interp, UIO_SYSSPACE, FOLLOW,
+ NULLVPP, &nvp))) {
+ uprintf("%s: not found.", interp);
+ restoreexecenv(&origenv, &orig_sigaltstack);
+ kmem_free(interp, MAXPATHLEN);
+ return (error);
+ }
+
+ kmem_free(interp, MAXPATHLEN);
+ interp = NULL;
+
+ /*
+ * map in the Linux linker
+ */
+ if (args->to_model == DATAMODEL_NATIVE) {
+ error = mapexec_brand(nvp, args, &ehdr,
+ &uphdr_vaddr, &voffset, exec_file, NULL, NULL,
+ NULL, NULL, NULL, &ldaddr);
+ }
+#if defined(_LP64)
+ else {
+ Elf32_Ehdr ehdr32;
+ Elf32_Addr uphdr_vaddr32;
+
+ error = mapexec32_brand(nvp, args, &ehdr32,
+ &uphdr_vaddr32, &voffset, exec_file, NULL, NULL,
+ NULL, NULL, NULL, &ldaddr);
+
+ Ehdr32to64(&ehdr32, &ehdr);
+
+ if (uphdr_vaddr32 == (Elf32_Addr)-1)
+ uphdr_vaddr = (Addr)-1;
+ else
+ uphdr_vaddr = uphdr_vaddr32;
+ }
+#endif
+
+ VN_RELE(nvp);
+ if (error != 0) {
+ restoreexecenv(&origenv, &orig_sigaltstack);
+ return (error);
+ }
+
+ /*
+ * Now that we know the base address of the brand's linker,
+ * we also save this for later use by the brand library.
+ */
+ edp.ed_base = voffset;
+ edp.ed_ldentry = voffset + ehdr.e_entry;
+ } else {
+ /*
+ * This program has no interpreter. The lx brand library will
+ * jump to the address in the AT_SUN_BRAND_LDENTRY aux vector,
+ * so in this case, put the entry point of the main executable
+ * there.
+ */
+ if (ehdr.e_type == ET_EXEC) {
+ /*
+ * An executable with no interpreter, this must be a
+ * statically linked executable, which means we loaded
+ * it at the address specified in the elf header, in
+ * which case the e_entry field of the elf header is an
+ * absolute address.
+ */
+ edp.ed_ldentry = ehdr.e_entry;
+ edp.ed_entry = ehdr.e_entry;
+ } else {
+ /*
+ * A shared object with no interpreter, we use the
+ * calculated address from above.
+ */
+ edp.ed_ldentry = edp.ed_entry;
+
+ /*
+ * In all situations except an ET_DYN elf object with no
+ * interpreter, we want to leave the brk and base
+ * values set by mapexec_brand alone. Normally when
+ * running ET_DYN objects on Solaris (most likely
+ * /lib/ld.so.1) the kernel sets brk and base to 0 since
+ * it doesn't know where to put the heap, and later the
+ * linker will call brk() to initialize the heap in:
+ * usr/src/cmd/sgs/rtld/common/setup.c:setup()
+ * after it has determined where to put it. (This
+ * decision is made after the linker loads and inspects
+ * elf properties of the target executable being run.)
+ *
+ * So for ET_DYN Linux executables, we also don't know
+ * where the heap should go, so we'll set the brk and
+ * base to 0. But in this case the Solaris linker will
+ * not initialize the heap, so when the Linux linker
+ * starts running there is no heap allocated. This
+ * seems to be ok on Linux 2.4 based systems because the
+ * Linux linker/libc fall back to using mmap() to
+ * allocate memory. But on 2.6 systems, running
+ * applications by specifying them as command line
+ * arguments to the linker results in segfaults for an
+ * as yet undetermined reason (which seems to indicatej
+ * that a more permanent fix for heap initalization in
+ * these cases may be necessary).
+ */
+ if (ehdr.e_type == ET_DYN) {
+ env.ex_bssbase = (caddr_t)0;
+ env.ex_brkbase = (caddr_t)0;
+ env.ex_brksize = 0;
+ }
+ }
+ }
+
+ env.ex_vp = vp;
+ setexecenv(&env);
+
+ /*
+ * We try to keep /proc's view of the aux vector consistent with
+ * what's on the process stack.
+ */
+ if (args->to_model == DATAMODEL_NATIVE) {
+ auxv_t phdr_auxv[4] = {
+ { AT_SUN_BRAND_LX_PHDR, 0 },
+ { AT_SUN_BRAND_LX_INTERP, 0 },
+ { AT_SUN_BRAND_LX_CLKTCK, 0 },
+ { AT_SUN_BRAND_LX_SYSINFO_EHDR, 0 }
+ };
+ phdr_auxv[0].a_un.a_val = edp.ed_phdr;
+ phdr_auxv[1].a_un.a_val = ldaddr;
+ phdr_auxv[2].a_un.a_val = hz;
+ phdr_auxv[3].a_un.a_val = lxpd->l_vdso;
+
+ if (copyout(&phdr_auxv, args->auxp_brand,
+ sizeof (phdr_auxv)) == -1)
+ return (EFAULT);
+ }
+#if defined(_LP64)
+ else {
+ auxv32_t phdr_auxv32[4] = {
+ { AT_SUN_BRAND_LX_PHDR, 0 },
+ { AT_SUN_BRAND_LX_INTERP, 0 },
+ { AT_SUN_BRAND_LX_CLKTCK, 0 },
+ { AT_SUN_BRAND_LX_SYSINFO_EHDR, 0 }
+ };
+ phdr_auxv32[0].a_un.a_val = edp.ed_phdr;
+ phdr_auxv32[1].a_un.a_val = ldaddr;
+ phdr_auxv32[2].a_un.a_val = hz;
+ phdr_auxv32[3].a_un.a_val = lxpd->l_vdso;
+
+ if (copyout(&phdr_auxv32, args->auxp_brand,
+ sizeof (phdr_auxv32)) == -1)
+ return (EFAULT);
+ }
+#endif
+
+ /*
+ * /proc uses the AT_ENTRY aux vector entry to deduce
+ * the location of the executable in the address space. The user
+ * structure contains a copy of the aux vector that needs to have those
+ * entries patched with the values of the real lx executable (they
+ * currently contain the values from the lx brand library that was
+ * elfexec'd, above).
+ *
+ * For live processes, AT_BASE is used to locate the linker segment,
+ * which /proc and friends will later use to find Solaris symbols
+ * (such as rtld_db_preinit). However, for core files, /proc uses
+ * AT_ENTRY to find the right segment to label as the executable.
+ * So we set AT_ENTRY to be the entry point of the linux executable,
+ * but leave AT_BASE to be the address of the Solaris linker.
+ */
+ for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
+ switch (up->u_auxv[i].a_type) {
+ case AT_ENTRY:
+ up->u_auxv[i].a_un.a_val = edp.ed_entry;
+ break;
+
+ case AT_SUN_BRAND_LX_PHDR:
+ up->u_auxv[i].a_un.a_val = edp.ed_phdr;
+ break;
+
+ case AT_SUN_BRAND_LX_INTERP:
+ up->u_auxv[i].a_un.a_val = ldaddr;
+ break;
+
+ case AT_SUN_BRAND_LX_CLKTCK:
+ up->u_auxv[i].a_un.a_val = hz;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ /*
+ * Record the brand ELF data and new personality now that the exec has
+ * proceeded successfully.
+ */
+ bcopy(&edp, &lxpd->l_elf_data, sizeof (edp));
+ lxpd->l_personality = personality;
+
+ return (0);
+}
+
+boolean_t
+lx_native_exec(uint8_t osabi, const char **interp)
+{
+ if (osabi != ELFOSABI_SOLARIS)
+ return (B_FALSE);
+
+ /*
+ * If the process root matches the zone root, prepend /native to the
+ * interpreter path for native executables. Absolute precision from
+ * VN_CMP is not necessary since any change of process root is likely
+ * to make native binaries inaccessible via /native.
+ *
+ * Processes which chroot directly into /native will be able to
+ * function as expected with no need for the prefix.
+ */
+ if (VN_CMP(curproc->p_user.u_rdir, curproc->p_zone->zone_rootvp)) {
+ *interp = "/native";
+ }
+
+ return (B_TRUE);
+}
+
+static void
+lx_syscall_init(void)
+{
+ int i;
+
+ /*
+ * Count up the 32-bit Linux system calls. Note that lx_sysent32
+ * has (LX_NSYSCALLS + 1) entries.
+ */
+ for (i = 0; i <= LX_NSYSCALLS && lx_sysent32[i].sy_name != NULL; i++)
+ continue;
+ lx_nsysent32 = i;
+
+#if defined(_LP64)
+ /*
+ * Count up the 64-bit Linux system calls. Note that lx_sysent64
+ * has (LX_NSYSCALLS + 1) entries.
+ */
+ for (i = 0; i <= LX_NSYSCALLS && lx_sysent64[i].sy_name != NULL; i++)
+ continue;
+ lx_nsysent64 = i;
+#endif
+}
+
+int
+_init(void)
+{
+ int err = 0;
+
+ lx_syscall_init();
+ lx_pid_init();
+ lx_ioctl_init();
+ lx_futex_init();
+ lx_ptrace_init();
+ lx_socket_init();
+
+ err = mod_install(&modlinkage);
+ if (err != 0) {
+ cmn_err(CE_WARN, "Couldn't install lx brand module");
+
+ /*
+ * This looks drastic, but it should never happen. These
+ * two data structures should be completely free-able until
+ * they are used by Linux processes. Since the brand
+ * wasn't loaded there should be no Linux processes, and
+ * thus no way for these data structures to be modified.
+ */
+ lx_pid_fini();
+ lx_ioctl_fini();
+ if (lx_futex_fini())
+ panic("lx brand module cannot be loaded or unloaded.");
+ }
+ return (err);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int err;
+ int futex_done = 0;
+
+ /*
+ * If there are any zones using this brand, we can't allow it to be
+ * unloaded.
+ */
+ if (brand_zone_count(&lx_brand))
+ return (EBUSY);
+
+ lx_ptrace_fini();
+ lx_pid_fini();
+ lx_ioctl_fini();
+ lx_socket_fini();
+
+ if ((err = lx_futex_fini()) != 0) {
+ goto done;
+ }
+ futex_done = 1;
+
+ err = mod_remove(&modlinkage);
+
+done:
+ if (err) {
+ /*
+ * If we can't unload the module, then we have to get it
+ * back into a sane state.
+ */
+ lx_ptrace_init();
+ lx_pid_init();
+ lx_ioctl_init();
+ lx_socket_init();
+
+ if (futex_done) {
+ lx_futex_init();
+ }
+ }
+
+ return (err);
+}
diff --git a/usr/src/uts/common/brand/lx/os/lx_misc.c b/usr/src/uts/common/brand/lx/os/lx_misc.c
new file mode 100644
index 0000000000..7ede833ca4
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/os/lx_misc.c
@@ -0,0 +1,1103 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2016, Joyent, Inc.
+ */
+
+#include <sys/errno.h>
+#include <sys/systm.h>
+#include <sys/archsystm.h>
+#include <sys/privregs.h>
+#include <sys/exec.h>
+#include <sys/lwp.h>
+#include <sys/sem.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_siginfo.h>
+#include <sys/lx_futex.h>
+#include <lx_errno.h>
+#include <sys/cmn_err.h>
+#include <sys/siginfo.h>
+#include <sys/contract/process_impl.h>
+#include <sys/x86_archext.h>
+#include <sys/sdt.h>
+#include <lx_signum.h>
+#include <lx_syscall.h>
+#include <sys/proc.h>
+#include <sys/procfs.h>
+#include <net/if.h>
+#include <inet/ip6.h>
+#include <sys/sunddi.h>
+#include <sys/dlpi.h>
+#include <sys/sysmacros.h>
+
+/* Linux specific functions and definitions */
+static void lx_save(klwp_t *);
+static void lx_restore(klwp_t *);
+
+/*
+ * Set the return code for the forked child, always zero
+ */
+/*ARGSUSED*/
+void
+lx_setrval(klwp_t *lwp, int v1, int v2)
+{
+ lwptoregs(lwp)->r_r0 = 0;
+}
+
+/*
+ * Reset process state on exec(2)
+ */
+void
+lx_exec()
+{
+ klwp_t *lwp = ttolwp(curthread);
+ struct lx_lwp_data *lwpd = lwptolxlwp(lwp);
+ proc_t *p = ttoproc(curthread);
+ lx_proc_data_t *pd = ptolxproc(p);
+ struct regs *rp = lwptoregs(lwp);
+
+ /* b_exec is called without p_lock held */
+ VERIFY(MUTEX_NOT_HELD(&p->p_lock));
+
+ /*
+ * Any l_handler handlers set as a result of B_REGISTER are now
+ * invalid; clear them.
+ */
+ pd->l_handler = NULL;
+
+ /*
+ * If this was a multi-threaded Linux process and this lwp wasn't the
+ * main lwp, then we need to make its Illumos and Linux PIDs match.
+ */
+ if (curthread->t_tid != 1) {
+ lx_pid_reassign(curthread);
+ }
+
+ /*
+ * Inform ptrace(2) that we are processing an execve(2) call so that if
+ * we are traced we can post either the PTRACE_EVENT_EXEC event or the
+ * legacy SIGTRAP.
+ */
+ (void) lx_ptrace_stop_for_option(LX_PTRACE_O_TRACEEXEC, B_FALSE, 0, 0);
+
+ /* clear the fs/gsbase values until the app. can reinitialize them */
+ lwpd->br_lx_fsbase = NULL;
+ lwpd->br_ntv_fsbase = NULL;
+ lwpd->br_lx_gsbase = NULL;
+ lwpd->br_ntv_gsbase = NULL;
+
+ /*
+ * Clear the native stack flags. This will be reinitialised by
+ * lx_init() in the new process image.
+ */
+ lwpd->br_stack_mode = LX_STACK_MODE_PREINIT;
+ lwpd->br_ntv_stack = 0;
+ lwpd->br_ntv_stack_current = 0;
+
+ installctx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL, lx_save,
+ NULL);
+
+ /*
+ * clear out the tls array
+ */
+ bzero(lwpd->br_tls, sizeof (lwpd->br_tls));
+
+ /*
+ * reset the tls entries in the gdt
+ */
+ kpreempt_disable();
+ lx_restore(lwp);
+ kpreempt_enable();
+
+ /* Grab the updated argv bounds */
+ mutex_enter(&p->p_lock);
+ lx_read_argv_bounds(p);
+ mutex_exit(&p->p_lock);
+
+ /*
+ * The exec syscall doesn't return (so we don't call lx_syscall_return)
+ * but for our ptrace emulation we need to do this so that a tracer
+ * does not get out of sync. We know that by the time this lx_exec
+ * function is called that the exec has succeeded.
+ */
+ rp->r_r0 = 0;
+ lx_ptrace_stop(LX_PR_SYSEXIT);
+}
+
+static void
+lx_cleanlwp(klwp_t *lwp, proc_t *p)
+{
+ struct lx_lwp_data *lwpd = lwptolxlwp(lwp);
+ void *rb_list = NULL;
+
+ VERIFY(lwpd != NULL);
+
+ mutex_enter(&p->p_lock);
+ if ((lwpd->br_ptrace_flags & LX_PTF_EXITING) == 0) {
+ lx_ptrace_exit(p, lwp);
+ }
+
+ /*
+ * While we have p_lock, safely grab any robust_list references and
+ * clear the lwp field.
+ */
+ sprlock_proc(p);
+ rb_list = lwpd->br_robust_list;
+ lwpd->br_robust_list = NULL;
+ sprunlock(p);
+
+ if (rb_list != NULL) {
+ lx_futex_robust_exit((uintptr_t)rb_list, lwpd->br_pid);
+ }
+}
+
+void
+lx_exitlwp(klwp_t *lwp)
+{
+ struct lx_lwp_data *lwpd = lwptolxlwp(lwp);
+ proc_t *p = lwptoproc(lwp);
+ kthread_t *t;
+ sigqueue_t *sqp = NULL;
+ pid_t ppid;
+ id_t ptid;
+
+ VERIFY(MUTEX_NOT_HELD(&p->p_lock));
+
+ if (lwpd == NULL) {
+ /* second time thru' */
+ return;
+ }
+
+ lx_cleanlwp(lwp, p);
+
+ if (lwpd->br_clear_ctidp != NULL) {
+ (void) suword32(lwpd->br_clear_ctidp, 0);
+ (void) lx_futex((uintptr_t)lwpd->br_clear_ctidp, FUTEX_WAKE, 1,
+ NULL, NULL, 0);
+ lwpd->br_clear_ctidp = NULL;
+ }
+
+ if (lwpd->br_signal != 0) {
+ /*
+ * The first thread in a process doesn't cause a signal to
+ * be sent when it exits. It was created by a fork(), not
+ * a clone(), so the parent should get signalled when the
+ * process exits.
+ */
+ if (lwpd->br_ptid == -1)
+ goto free;
+
+ sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
+ /*
+ * If br_ppid is 0, it means this is a CLONE_PARENT thread,
+ * so the signal goes to the parent process - not to a
+ * specific thread in this process.
+ */
+ p = lwptoproc(lwp);
+ if (lwpd->br_ppid == 0) {
+ mutex_enter(&p->p_lock);
+ ppid = p->p_ppid;
+ t = NULL;
+ } else {
+ /*
+ * If we have been reparented to init or if our
+ * parent thread is gone, then nobody gets
+ * signaled.
+ */
+ if ((lx_lwp_ppid(lwp, &ppid, &ptid) == 1) ||
+ (ptid == -1))
+ goto free;
+
+ mutex_enter(&pidlock);
+ if ((p = prfind(ppid)) == NULL || p->p_stat == SIDL) {
+ mutex_exit(&pidlock);
+ goto free;
+ }
+ mutex_enter(&p->p_lock);
+ mutex_exit(&pidlock);
+
+ if ((t = idtot(p, ptid)) == NULL) {
+ mutex_exit(&p->p_lock);
+ goto free;
+ }
+ }
+
+ sqp->sq_info.si_signo = lwpd->br_signal;
+ sqp->sq_info.si_code = lwpd->br_exitwhy;
+ sqp->sq_info.si_status = lwpd->br_exitwhat;
+ sqp->sq_info.si_pid = lwpd->br_pid;
+ sqp->sq_info.si_uid = crgetruid(CRED());
+ sigaddqa(p, t, sqp);
+ mutex_exit(&p->p_lock);
+ sqp = NULL;
+ }
+
+free:
+ if (lwpd->br_scall_args != NULL) {
+ ASSERT(lwpd->br_args_size > 0);
+ kmem_free(lwpd->br_scall_args, lwpd->br_args_size);
+ }
+ if (sqp)
+ kmem_free(sqp, sizeof (sigqueue_t));
+}
+
+void
+lx_freelwp(klwp_t *lwp)
+{
+ struct lx_lwp_data *lwpd = lwptolxlwp(lwp);
+ proc_t *p = lwptoproc(lwp);
+ lx_zone_data_t *lxzdata;
+ vfs_t *cgrp;
+
+ VERIFY(MUTEX_NOT_HELD(&p->p_lock));
+
+ if (lwpd == NULL) {
+ /*
+ * There is one case where an LX branded process will possess
+ * LWPs which lack their own brand data. During the course of
+ * executing native binary, the process will be preemptively
+ * branded to allow hooks such as b_native_exec to function.
+ * If that process possesses multiple LWPS, they will _not_ be
+ * branded since they will exit if the exec succeeds. It's
+ * during this LWP exit that lx_freelwp would be called on an
+ * unbranded LWP. When that is the case, it is acceptable to
+ * bypass the hook.
+ */
+ return;
+ }
+
+ /* cgroup integration */
+ lxzdata = ztolxzd(p->p_zone);
+ mutex_enter(&lxzdata->lxzd_lock);
+ cgrp = lxzdata->lxzd_cgroup;
+ if (cgrp != NULL) {
+ VFS_HOLD(cgrp);
+ mutex_exit(&lxzdata->lxzd_lock);
+ ASSERT(lx_cgrp_freelwp != NULL);
+ (*lx_cgrp_freelwp)(cgrp, lwpd->br_cgroupid, lwptot(lwp)->t_tid,
+ lwpd->br_pid);
+ VFS_RELE(cgrp);
+ } else {
+ mutex_exit(&lxzdata->lxzd_lock);
+ }
+
+ /*
+ * It is possible for the lx_freelwp hook to be called without a prior
+ * call to lx_exitlwp being made. This happens as part of lwp
+ * de-branding when a native binary is executed from a branded process.
+ *
+ * To cover all cases, lx_cleanlwp is called from lx_exitlwp as well
+ * here in lx_freelwp. When the second call is redundant, the
+ * resources will already be freed and no work will be needed.
+ */
+ lx_cleanlwp(lwp, p);
+
+ /*
+ * Remove our system call interposer.
+ */
+ lwp->lwp_brand_syscall = NULL;
+ lwp->lwp_brand_syscall_fast = NULL;
+
+ (void) removectx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL,
+ lx_save, NULL);
+ if (lwpd->br_pid != 0) {
+ lx_pid_rele(lwptoproc(lwp)->p_pid, lwptot(lwp)->t_tid);
+ }
+
+ /*
+ * Ensure that lx_ptrace_exit() has been called to detach
+ * ptrace(2) tracers and tracees.
+ */
+ VERIFY(lwpd->br_ptrace_tracer == NULL);
+ VERIFY(lwpd->br_ptrace_accord == NULL);
+
+ lwp->lwp_brand = NULL;
+ kmem_free(lwpd, sizeof (struct lx_lwp_data));
+}
+
+void *
+lx_lwpdata_alloc(proc_t *p)
+{
+ lx_lwp_data_t *lwpd;
+ struct lx_pid *lpidp;
+ pid_t newpid = 0;
+ struct pid *pidp = NULL;
+
+ VERIFY(MUTEX_NOT_HELD(&p->p_lock));
+
+ /*
+ * LWPs beyond the first will require a pid to be allocated to emulate
+ * Linux's goofy thread model. While this allocation may be
+ * unnecessary when a single-lwp process undergoes branding, it cannot
+ * be performed during b_initlwp due to p_lock being held.
+ */
+ if (p->p_lwpcnt > 0) {
+ if ((newpid = pid_allocate(p, 0, 0)) < 0) {
+ return (NULL);
+ }
+ pidp = pid_find(newpid);
+ }
+
+ lwpd = kmem_zalloc(sizeof (struct lx_lwp_data), KM_SLEEP);
+ lpidp = kmem_zalloc(sizeof (struct lx_pid), KM_SLEEP);
+
+ lpidp->l_pid = newpid;
+ lpidp->l_pidp = pidp;
+ lwpd->br_lpid = lpidp;
+ return (lwpd);
+}
+
+/*
+ * Free lwp brand data if an error occurred during lwp_create.
+ * Otherwise, lx_freelwp will be used to free the resources after they're
+ * associated with the lwp via lx_initlwp.
+ */
+void
+lx_lwpdata_free(void *lwpbd)
+{
+ lx_lwp_data_t *lwpd = (lx_lwp_data_t *)lwpbd;
+ VERIFY(lwpd != NULL);
+ VERIFY(lwpd->br_lpid != NULL);
+
+ if (lwpd->br_lpid->l_pidp != NULL) {
+ (void) pid_rele(lwpd->br_lpid->l_pidp);
+ }
+ kmem_free(lwpd->br_lpid, sizeof (*lwpd->br_lpid));
+ kmem_free(lwpd, sizeof (*lwpd));
+}
+
+void
+lx_initlwp(klwp_t *lwp, void *lwpbd)
+{
+ lx_lwp_data_t *lwpd = (lx_lwp_data_t *)lwpbd;
+ lx_lwp_data_t *plwpd = ttolxlwp(curthread);
+ kthread_t *tp = lwptot(lwp);
+ proc_t *p = lwptoproc(lwp);
+ lx_zone_data_t *lxzdata;
+ vfs_t *cgrp;
+
+ VERIFY(MUTEX_HELD(&p->p_lock));
+ VERIFY(lwp->lwp_brand == NULL);
+
+ lwpd->br_exitwhy = CLD_EXITED;
+ lwpd->br_lwp = lwp;
+ lwpd->br_clear_ctidp = NULL;
+ lwpd->br_set_ctidp = NULL;
+ lwpd->br_signal = 0;
+ lwpd->br_stack_mode = LX_STACK_MODE_PREINIT;
+ /*
+ * lwpd->br_affinitymask was zeroed by kmem_zalloc()
+ * as was lwpd->br_scall_args and lwpd->br_args_size.
+ */
+
+ /*
+ * The first thread in a process has ppid set to the parent
+ * process's pid, and ptid set to -1. Subsequent threads in the
+ * process have their ppid set to the pid of the thread that
+ * created them, and their ptid to that thread's tid.
+ */
+ if (tp->t_next == tp) {
+ lwpd->br_ppid = tp->t_procp->p_ppid;
+ lwpd->br_ptid = -1;
+ } else if (plwpd != NULL) {
+ bcopy(plwpd->br_tls, lwpd->br_tls, sizeof (lwpd->br_tls));
+ lwpd->br_ppid = plwpd->br_pid;
+ lwpd->br_ptid = curthread->t_tid;
+ /* The child inherits the fs/gsbase values from the parent */
+ lwpd->br_lx_fsbase = plwpd->br_lx_fsbase;
+ lwpd->br_ntv_fsbase = plwpd->br_ntv_fsbase;
+ lwpd->br_lx_gsbase = plwpd->br_lx_gsbase;
+ lwpd->br_ntv_gsbase = plwpd->br_ntv_gsbase;
+ } else {
+ /*
+ * Oddball case: the parent thread isn't a Linux process.
+ */
+ lwpd->br_ppid = 0;
+ lwpd->br_ptid = -1;
+ }
+ lwp->lwp_brand = lwpd;
+
+ /*
+ * When during lx_lwpdata_alloc, we must decide whether or not to
+ * allocate a new pid to associate with the lwp. Since p_lock is not
+ * held at that point, the only time we can guarantee a new pid isn't
+ * needed is when p_lwpcnt == 0. This is because other lwps won't be
+ * present to race with us with regards to pid allocation.
+ *
+ * This means that in all other cases (where p_lwpcnt > 0), we expect
+ * that lx_lwpdata_alloc will allocate a pid for us to use here, even
+ * if it is uneeded. If this process is undergoing an exec, for
+ * example, the single existing lwp will not need a new pid when it is
+ * rebranded. In that case, lx_pid_assign will free the uneeded pid.
+ */
+ VERIFY(lwpd->br_lpid->l_pidp != NULL || p->p_lwpcnt == 0);
+
+ lx_pid_assign(tp, lwpd->br_lpid);
+ lwpd->br_tgid = lwpd->br_pid;
+ /*
+ * Having performed the lx pid assignement, the lpid reference is no
+ * longer needed. The underlying data will be freed during lx_freelwp.
+ */
+ lwpd->br_lpid = NULL;
+
+ installctx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL,
+ lx_save, NULL);
+
+ /*
+ * Install branded system call hooks for this LWP:
+ */
+ lwp->lwp_brand_syscall = lx_syscall_enter;
+ lwp->lwp_brand_syscall_fast = lx_syscall_fast_enter;
+
+ /*
+ * The new LWP inherits the parent LWP cgroup ID.
+ */
+ if (plwpd != NULL) {
+ lwpd->br_cgroupid = plwpd->br_cgroupid;
+ }
+ lxzdata = ztolxzd(p->p_zone);
+ mutex_enter(&lxzdata->lxzd_lock);
+ cgrp = lxzdata->lxzd_cgroup;
+ if (cgrp != NULL) {
+ VFS_HOLD(cgrp);
+ mutex_exit(&lxzdata->lxzd_lock);
+ ASSERT(lx_cgrp_initlwp != NULL);
+ (*lx_cgrp_initlwp)(cgrp, lwpd->br_cgroupid, lwptot(lwp)->t_tid,
+ lwpd->br_pid);
+ VFS_RELE(cgrp);
+ } else {
+ mutex_exit(&lxzdata->lxzd_lock);
+ }
+}
+
+void
+lx_initlwp_post(klwp_t *lwp)
+{
+ lx_lwp_data_t *plwpd = ttolxlwp(curthread);
+ /*
+ * If the parent LWP has a ptrace(2) tracer, the new LWP may
+ * need to inherit that same tracer.
+ */
+ if (plwpd != NULL) {
+ lx_ptrace_inherit_tracer(plwpd, lwptolxlwp(lwp));
+ }
+}
+
+/*
+ * There is no need to have any locking for either the source or
+ * destination struct lx_lwp_data structs. This is always run in the
+ * thread context of the source thread, and the destination thread is
+ * always newly created and not referred to from anywhere else.
+ */
+void
+lx_forklwp(klwp_t *srclwp, klwp_t *dstlwp)
+{
+ struct lx_lwp_data *src = srclwp->lwp_brand;
+ struct lx_lwp_data *dst = dstlwp->lwp_brand;
+
+ dst->br_ppid = src->br_pid;
+ dst->br_ptid = lwptot(srclwp)->t_tid;
+ bcopy(src->br_tls, dst->br_tls, sizeof (dst->br_tls));
+
+ switch (src->br_stack_mode) {
+ case LX_STACK_MODE_BRAND:
+ case LX_STACK_MODE_NATIVE:
+ /*
+ * The parent LWP has an alternate stack installed.
+ * The child LWP should have the same stack base and extent.
+ */
+ dst->br_stack_mode = src->br_stack_mode;
+ dst->br_ntv_stack = src->br_ntv_stack;
+ dst->br_ntv_stack_current = src->br_ntv_stack_current;
+ break;
+
+ default:
+ /*
+ * Otherwise, clear the stack data for this LWP.
+ */
+ dst->br_stack_mode = LX_STACK_MODE_PREINIT;
+ dst->br_ntv_stack = 0;
+ dst->br_ntv_stack_current = 0;
+ }
+
+ /*
+ * copy only these flags
+ */
+ dst->br_lwp_flags = src->br_lwp_flags & BR_CPU_BOUND;
+ dst->br_scall_args = NULL;
+}
+
+/*
+ * When switching a Linux process off the CPU, clear its GDT entries.
+ */
+/* ARGSUSED */
+static void
+lx_save(klwp_t *t)
+{
+ int i;
+
+#if defined(__amd64)
+ reset_sregs();
+#endif
+ for (i = 0; i < LX_TLSNUM; i++)
+ gdt_update_usegd(GDT_TLSMIN + i, &null_udesc);
+}
+
+/*
+ * When switching a Linux process on the CPU, set its GDT entries.
+ *
+ * For 64-bit code we don't have to worry about explicitly setting the
+ * %fsbase via wrmsr(MSR_AMD_FSBASE) here. Instead, that should happen
+ * automatically in update_sregs if we are executing in user-land. If this
+ * is the case then pcb_rupdate should be set.
+ */
+static void
+lx_restore(klwp_t *t)
+{
+ struct lx_lwp_data *lwpd = lwptolxlwp(t);
+ user_desc_t *tls;
+ int i;
+
+ ASSERT(lwpd);
+
+ tls = lwpd->br_tls;
+ for (i = 0; i < LX_TLSNUM; i++)
+ gdt_update_usegd(GDT_TLSMIN + i, &tls[i]);
+}
+
+void
+lx_set_gdt(int entry, user_desc_t *descrp)
+{
+
+ gdt_update_usegd(entry, descrp);
+}
+
+void
+lx_clear_gdt(int entry)
+{
+ gdt_update_usegd(entry, &null_udesc);
+}
+
+longlong_t
+lx_nosys()
+{
+ return (set_errno(ENOSYS));
+}
+
+/*
+ * Brand-specific routine to check if given non-Solaris standard segment
+ * register values should be modified to other values.
+ */
+/*ARGSUSED*/
+greg_t
+lx_fixsegreg(greg_t sr, model_t datamodel)
+{
+ uint16_t idx = SELTOIDX(sr);
+
+ ASSERT(sr == (sr & 0xffff));
+
+ /*
+ * If the segment selector is a valid TLS selector, just return it.
+ */
+ if (!SELISLDT(sr) && idx >= GDT_TLSMIN && idx <= GDT_TLSMAX)
+ return (sr | SEL_UPL);
+
+ /*
+ * Force the SR into the LDT in ring 3 for 32-bit processes.
+ *
+ * 64-bit processes get the null GDT selector since they are not
+ * allowed to have a private LDT.
+ */
+#if defined(__amd64)
+ return (datamodel == DATAMODEL_ILP32 ? (sr | SEL_TI_LDT | SEL_UPL) : 0);
+#elif defined(__i386)
+ datamodel = datamodel; /* datamodel currently unused for 32-bit */
+ return (sr | SEL_TI_LDT | SEL_UPL);
+#endif /* __amd64 */
+}
+
+/*
+ * Brand-specific function to convert the fsbase as pulled from the register
+ * into a native fsbase suitable for locating the ulwp_t from the kernel.
+ */
+uintptr_t
+lx_fsbase(klwp_t *lwp, uintptr_t fsbase)
+{
+ lx_lwp_data_t *lwpd = lwp->lwp_brand;
+
+ if (lwpd->br_stack_mode != LX_STACK_MODE_BRAND ||
+ lwpd->br_ntv_fsbase == NULL) {
+ return (fsbase);
+ }
+
+ return (lwpd->br_ntv_fsbase);
+}
+
+/*
+ * These two functions simulate winfo and post_sigcld for the lx brand. The
+ * difference is delivering a designated signal as opposed to always SIGCLD.
+ */
+static void
+lx_winfo(proc_t *pp, k_siginfo_t *ip, struct lx_proc_data *dat)
+{
+ ASSERT(MUTEX_HELD(&pidlock));
+ bzero(ip, sizeof (k_siginfo_t));
+ ip->si_signo = ltos_signo[dat->l_signal];
+ ip->si_code = pp->p_wcode;
+ ip->si_pid = pp->p_pid;
+ ip->si_ctid = PRCTID(pp);
+ ip->si_zoneid = pp->p_zone->zone_id;
+ ip->si_status = pp->p_wdata;
+ ip->si_stime = pp->p_stime;
+ ip->si_utime = pp->p_utime;
+}
+
+static void
+lx_post_exit_sig(proc_t *cp, sigqueue_t *sqp, struct lx_proc_data *dat)
+{
+ proc_t *pp = cp->p_parent;
+
+ ASSERT(MUTEX_HELD(&pidlock));
+ mutex_enter(&pp->p_lock);
+ /*
+ * Since Linux doesn't queue SIGCHLD, or any other non RT
+ * signals, we just blindly deliver whatever signal we can.
+ */
+ ASSERT(sqp != NULL);
+ lx_winfo(cp, &sqp->sq_info, dat);
+ sigaddqa(pp, NULL, sqp);
+ sqp = NULL;
+ mutex_exit(&pp->p_lock);
+}
+
+
+/*
+ * Brand specific code for exiting and sending a signal to the parent, as
+ * opposed to sigcld().
+ */
+void
+lx_exit_with_sig(proc_t *cp, sigqueue_t *sqp)
+{
+ proc_t *pp = cp->p_parent;
+ lx_proc_data_t *lx_brand_data = ptolxproc(cp);
+ ASSERT(MUTEX_HELD(&pidlock));
+
+ switch (cp->p_wcode) {
+ case CLD_EXITED:
+ case CLD_DUMPED:
+ case CLD_KILLED:
+ ASSERT(cp->p_stat == SZOMB);
+ /*
+ * The broadcast on p_srwchan_cv is a kludge to
+ * wakeup a possible thread in uadmin(A_SHUTDOWN).
+ */
+ cv_broadcast(&cp->p_srwchan_cv);
+
+ /*
+ * Add to newstate list of the parent
+ */
+ add_ns(pp, cp);
+
+ cv_broadcast(&pp->p_cv);
+ if ((pp->p_flag & SNOWAIT) ||
+ PTOU(pp)->u_signal[SIGCLD - 1] == SIG_IGN) {
+ if (!(cp->p_pidflag & CLDWAITPID))
+ freeproc(cp);
+ } else if (!(cp->p_pidflag & CLDNOSIGCHLD) &&
+ lx_brand_data->l_signal != 0) {
+ lx_post_exit_sig(cp, sqp, lx_brand_data);
+ sqp = NULL;
+ }
+ break;
+
+ case CLD_STOPPED:
+ case CLD_CONTINUED:
+ case CLD_TRAPPED:
+ panic("Should not be called in this case");
+ }
+
+ if (sqp)
+ siginfofree(sqp);
+}
+
+/*
+ * Filters based on arguments that have been passed in by a separate syscall
+ * using the B_STORE_ARGS mechanism. if the __WALL flag is set, no filter is
+ * applied, otherwise we look at the difference between a clone and non-clone
+ * process.
+ * The definition of a clone process in Linux is a thread that does not deliver
+ * SIGCHLD to its parent. The option __WCLONE indicates to wait only on clone
+ * processes. Without that option, a process should only wait on normal
+ * children. The following table shows the cases.
+ *
+ * default __WCLONE
+ * no SIGCHLD - X
+ * SIGCHLD X -
+ *
+ * This is an XOR of __WCLONE being set, and SIGCHLD being the signal sent on
+ * process exit.
+ *
+ * More information on wait in lx brands can be found at
+ * usr/src/lib/brand/lx/lx_brand/common/wait.c.
+ */
+boolean_t
+lx_wait_filter(proc_t *pp, proc_t *cp)
+{
+ lx_lwp_data_t *lwpd = ttolxlwp(curthread);
+ int flags = lwpd->br_waitid_flags;
+ boolean_t ret;
+
+ if (!lwpd->br_waitid_emulate) {
+ return (B_TRUE);
+ }
+
+ mutex_enter(&cp->p_lock);
+ if (flags & LX_WALL) {
+ ret = B_TRUE;
+ } else {
+ lx_proc_data_t *pd = ptolxproc(cp);
+ boolean_t is_sigchld = B_TRUE;
+ boolean_t match_wclone = B_FALSE;
+
+ /*
+ * When calling clone, an alternate signal can be chosen to
+ * deliver to the parent when the child exits.
+ */
+ if (pd != NULL && pd->l_signal != stol_signo[SIGCHLD]) {
+ is_sigchld = B_FALSE;
+ }
+ if ((flags & LX_WCLONE) != 0) {
+ match_wclone = B_TRUE;
+ }
+
+ ret = (match_wclone ^ is_sigchld) ? B_TRUE : B_FALSE;
+ }
+ mutex_exit(&cp->p_lock);
+
+ return (ret);
+}
+
+void
+lx_ifname_convert(char *ifname, lx_if_action_t act)
+{
+ if (act == LX_IF_TONATIVE) {
+ if (strncmp(ifname, "lo", IFNAMSIZ) == 0)
+ (void) strlcpy(ifname, "lo0", IFNAMSIZ);
+ } else {
+ if (strncmp(ifname, "lo0", IFNAMSIZ) == 0)
+ (void) strlcpy(ifname, "lo", IFNAMSIZ);
+ }
+}
+
+void
+lx_ifflags_convert(uint64_t *flags, lx_if_action_t act)
+{
+ uint64_t buf;
+
+ buf = *flags & (IFF_UP | IFF_BROADCAST | IFF_DEBUG |
+ IFF_LOOPBACK | IFF_POINTOPOINT | IFF_NOTRAILERS |
+ IFF_RUNNING | IFF_NOARP | IFF_PROMISC | IFF_ALLMULTI);
+
+ /* Linux has different shift for multicast flag */
+ if (act == LX_IF_TONATIVE) {
+ if (*flags & 0x1000)
+ buf |= IFF_MULTICAST;
+ } else {
+ if (*flags & IFF_MULTICAST)
+ buf |= 0x1000;
+ }
+ *flags = buf;
+}
+
+/*
+ * Convert an IPv6 address into the numbers used by /proc/net/if_inet6
+ */
+unsigned int
+lx_ipv6_scope_convert(const in6_addr_t *addr)
+{
+ if (IN6_IS_ADDR_V4COMPAT(addr)) {
+ return (LX_IPV6_ADDR_COMPATv4);
+ } else if (IN6_ARE_ADDR_EQUAL(addr, &ipv6_loopback)) {
+ return (LX_IPV6_ADDR_LOOPBACK);
+ } else if (IN6_IS_ADDR_LINKLOCAL(addr)) {
+ return (LX_IPV6_ADDR_LINKLOCAL);
+ } else if (IN6_IS_ADDR_SITELOCAL(addr)) {
+ return (LX_IPV6_ADDR_SITELOCAL);
+ } else {
+ return (0x0000U);
+ }
+}
+
+
+void
+lx_stol_hwaddr(const struct sockaddr_dl *src, struct sockaddr *dst, int *size)
+{
+ int copy_size = MIN(src->sdl_alen, sizeof (dst->sa_data));
+
+ switch (src->sdl_type) {
+ case DL_ETHER:
+ dst->sa_family = LX_ARPHRD_ETHER;
+ break;
+ case DL_LOOP:
+ dst->sa_family = LX_ARPHRD_LOOPBACK;
+ break;
+ default:
+ dst->sa_family = LX_ARPHRD_VOID;
+ }
+
+ bcopy(LLADDR(src), dst->sa_data, copy_size);
+ *size = copy_size;
+}
+
+/*
+ * Brand hook to convert native kernel siginfo signal number, errno, code, pid
+ * and si_status to Linux values. Similar to the stol_ksiginfo function but
+ * this one converts in-place, converts the pid, and does not copyout.
+ */
+void
+lx_sigfd_translate(k_siginfo_t *infop)
+{
+ infop->si_signo = lx_stol_signo(infop->si_signo, LX_SIGKILL);
+
+ infop->si_status = lx_stol_status(infop->si_status, LX_SIGKILL);
+
+ infop->si_code = lx_stol_sigcode(infop->si_code);
+
+ infop->si_errno = lx_errno(infop->si_errno, EINVAL);
+
+ if (infop->si_pid == curproc->p_zone->zone_proc_initpid) {
+ infop->si_pid = 1;
+ } else if (infop->si_pid == curproc->p_zone->zone_zsched->p_pid) {
+ infop->si_pid = 0;
+ }
+}
+
+int
+stol_ksiginfo_copyout(k_siginfo_t *sip, void *ulxsip)
+{
+ lx_siginfo_t lsi;
+
+ bzero(&lsi, sizeof (lsi));
+ lsi.lsi_signo = lx_stol_signo(sip->si_signo, SIGCLD);
+ lsi.lsi_code = lx_stol_sigcode(sip->si_code);
+ lsi.lsi_errno = lx_errno(sip->si_errno, EINVAL);
+
+ switch (lsi.lsi_signo) {
+ case LX_SIGPOLL:
+ lsi.lsi_band = sip->si_band;
+ lsi.lsi_fd = sip->si_fd;
+ break;
+
+ case LX_SIGCHLD:
+ lsi.lsi_pid = sip->si_pid;
+ if (sip->si_code <= 0 || sip->si_code == CLD_EXITED) {
+ lsi.lsi_status = sip->si_status;
+ } else {
+ lsi.lsi_status = lx_stol_status(sip->si_status,
+ SIGKILL);
+ }
+ lsi.lsi_utime = sip->si_utime;
+ lsi.lsi_stime = sip->si_stime;
+ break;
+
+ case LX_SIGILL:
+ case LX_SIGBUS:
+ case LX_SIGFPE:
+ case LX_SIGSEGV:
+ lsi.lsi_addr = sip->si_addr;
+ break;
+
+ default:
+ lsi.lsi_pid = sip->si_pid;
+ lsi.lsi_uid = LX_UID32_TO_UID16(sip->si_uid);
+ }
+
+ if (copyout(&lsi, ulxsip, sizeof (lsi)) != 0) {
+ return (set_errno(EFAULT));
+ }
+
+ return (0);
+}
+
+#if defined(_SYSCALL32_IMPL)
+int
+stol_ksiginfo32_copyout(k_siginfo_t *sip, void *ulxsip)
+{
+ lx_siginfo32_t lsi;
+
+ bzero(&lsi, sizeof (lsi));
+ lsi.lsi_signo = lx_stol_signo(sip->si_signo, SIGCLD);
+ lsi.lsi_code = lx_stol_sigcode(sip->si_code);
+ lsi.lsi_errno = lx_errno(sip->si_errno, EINVAL);
+
+ switch (lsi.lsi_signo) {
+ case LX_SIGPOLL:
+ lsi.lsi_band = sip->si_band;
+ lsi.lsi_fd = sip->si_fd;
+ break;
+
+ case LX_SIGCHLD:
+ lsi.lsi_pid = sip->si_pid;
+ if (sip->si_code <= 0 || sip->si_code == CLD_EXITED) {
+ lsi.lsi_status = sip->si_status;
+ } else {
+ lsi.lsi_status = lx_stol_status(sip->si_status,
+ SIGKILL);
+ }
+ lsi.lsi_utime = sip->si_utime;
+ lsi.lsi_stime = sip->si_stime;
+ break;
+
+ case LX_SIGILL:
+ case LX_SIGBUS:
+ case LX_SIGFPE:
+ case LX_SIGSEGV:
+ lsi.lsi_addr = (caddr32_t)(uintptr_t)sip->si_addr;
+ break;
+
+ default:
+ lsi.lsi_pid = sip->si_pid;
+ lsi.lsi_uid = LX_UID32_TO_UID16(sip->si_uid);
+ }
+
+ if (copyout(&lsi, ulxsip, sizeof (lsi)) != 0) {
+ return (set_errno(EFAULT));
+ }
+
+ return (0);
+}
+#endif
+
+/*
+ * Linux uses the original bounds of the argv array when determining the
+ * contents of /proc/<pid/cmdline. We mimic those bounds using argv[0] and
+ * envp[0] as the beginning and end, respectively.
+ */
+void
+lx_read_argv_bounds(proc_t *p)
+{
+ user_t *up = PTOU(p);
+ lx_proc_data_t *pd = ptolxproc(p);
+ uintptr_t addr_arg = up->u_argv;
+ uintptr_t addr_env = up->u_envp;
+ uintptr_t arg_start = 0, env_start = 0, env_end = 0;
+ int i = 0;
+
+ VERIFY(pd != NULL);
+ VERIFY(MUTEX_HELD(&p->p_lock));
+
+ /*
+ * Use AT_SUN_PLATFORM in the aux vector to find the end of the envp
+ * strings.
+ */
+ for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
+ if (up->u_auxv[i].a_type == AT_SUN_PLATFORM) {
+ env_end = (uintptr_t)up->u_auxv[i].a_un.a_val;
+ }
+ }
+
+ mutex_exit(&p->p_lock);
+#if defined(_LP64)
+ if (p->p_model != DATAMODEL_NATIVE) {
+ uint32_t buf32;
+ if (copyin((void *)addr_arg, &buf32, sizeof (buf32)) == 0) {
+ arg_start = (uintptr_t)buf32;
+ }
+ if (copyin((void *)addr_env, &buf32, sizeof (buf32)) == 0) {
+ env_start = (uintptr_t)buf32;
+ }
+ } else
+#endif /* defined(_LP64) */
+ {
+ uintptr_t buf;
+ if (copyin((void *)addr_arg, &buf, sizeof (buf)) == 0) {
+ arg_start = buf;
+ }
+ if (copyin((void *)addr_env, &buf, sizeof (buf)) == 0) {
+ env_start = buf;
+ }
+ }
+ mutex_enter(&p->p_lock);
+ pd->l_args_start = arg_start;
+ pd->l_envs_start = env_start;
+ pd->l_envs_end = env_end;
+}
+
+/* Given an LX LWP, determine where user register state is stored. */
+lx_regs_location_t
+lx_regs_location(lx_lwp_data_t *lwpd, void **ucp, boolean_t for_write)
+{
+ switch (lwpd->br_stack_mode) {
+ case LX_STACK_MODE_BRAND:
+ /*
+ * The LWP was stopped with the brand stack and register state
+ * loaded, e.g. during a syscall emulated within the kernel.
+ */
+ return (LX_REG_LOC_LWP);
+
+ case LX_STACK_MODE_PREINIT:
+ if (for_write) {
+ /* setting registers not allowed in this state */
+ break;
+ }
+ if (lwpd->br_ptrace_whatstop == LX_PR_SIGNALLED ||
+ lwpd->br_ptrace_whatstop == LX_PR_SYSEXIT) {
+ /* The LWP was stopped by tracing on exec. */
+ return (LX_REG_LOC_LWP);
+ }
+ break;
+
+ case LX_STACK_MODE_NATIVE:
+ if (for_write) {
+ /* setting registers not allowed in this state */
+ break;
+ }
+ if (lwpd->br_ptrace_whystop == PR_BRAND &&
+ lwpd->br_ptrace_whatstop == LX_PR_EVENT) {
+ /* Called while ptrace-event-stopped by lx_exec. */
+ return (LX_REG_LOC_LWP);
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (lwpd->br_ptrace_stopucp != NULL) {
+ /*
+ * The LWP was stopped in the usermode emulation library
+ * but a ucontext_t for the preserved brand stack and
+ * register state was provided. Return the register state
+ * from that ucontext_t.
+ */
+ VERIFY(ucp != NULL);
+ *ucp = (void *)lwpd->br_ptrace_stopucp;
+ return (LX_REG_LOC_UCP);
+ }
+
+ return (LX_REG_LOC_UNAVAIL);
+}
diff --git a/usr/src/uts/common/brand/lx/os/lx_pid.c b/usr/src/uts/common/brand/lx/os/lx_pid.c
new file mode 100644
index 0000000000..40179bbdaf
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/os/lx_pid.c
@@ -0,0 +1,395 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2015, Joyent, Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/bitmap.h>
+#include <sys/var.h>
+#include <sys/thread.h>
+#include <sys/proc.h>
+#include <sys/brand.h>
+#include <sys/zone.h>
+#include <sys/lx_brand.h>
+
+#define LINUX_PROC_FACTOR 8 /* factor down the hash table by this */
+static int hash_len = 4; /* desired average hash chain length */
+static int hash_size; /* no of buckets in the hash table */
+
+static struct lx_pid **stol_pid_hash;
+static struct lx_pid **ltos_pid_hash;
+
+#define LTOS_HASH(pid) ((pid) & (hash_size - 1))
+#define STOL_HASH(pid, tid) (((pid) + (tid)) & (hash_size - 1))
+
+static kmutex_t hash_lock;
+
+static void
+lx_pid_insert_hash(struct lx_pid *lpidp)
+{
+ int shash = STOL_HASH(lpidp->s_pid, lpidp->s_tid);
+ int lhash = LTOS_HASH(lpidp->l_pid);
+
+ ASSERT(MUTEX_HELD(&hash_lock));
+
+ lpidp->stol_next = stol_pid_hash[shash];
+ stol_pid_hash[shash] = lpidp;
+
+ lpidp->ltos_next = ltos_pid_hash[lhash];
+ ltos_pid_hash[lhash] = lpidp;
+}
+
+static struct lx_pid *
+lx_pid_remove_hash(pid_t pid, id_t tid)
+{
+ struct lx_pid **hpp;
+ struct lx_pid *lpidp = NULL;
+
+ ASSERT(MUTEX_HELD(&hash_lock));
+
+ hpp = &stol_pid_hash[STOL_HASH(pid, tid)];
+ while (*hpp) {
+ if ((*hpp)->s_pid == pid && (*hpp)->s_tid == tid) {
+ lpidp = *hpp;
+ *hpp = (*hpp)->stol_next;
+ break;
+ }
+ hpp = &(*hpp)->stol_next;
+ }
+
+ /*
+ * when called during error recovery the pid may already
+ * be released
+ */
+ if (lpidp == NULL)
+ return (NULL);
+
+ hpp = &ltos_pid_hash[LTOS_HASH(lpidp->l_pid)];
+ while (*hpp) {
+ if (*hpp == lpidp) {
+ *hpp = lpidp->ltos_next;
+ break;
+ }
+ hpp = &(*hpp)->ltos_next;
+ }
+
+ return (lpidp);
+}
+
+/*
+ * given a solaris pid/tid pair, create a linux pid
+ */
+void
+lx_pid_assign(kthread_t *t, struct lx_pid *lpidp)
+{
+ proc_t *p = ttoproc(t);
+ lx_lwp_data_t *lwpd = ttolxlwp(t);
+ pid_t s_pid = p->p_pid;
+ id_t s_tid = t->t_tid;
+
+ /*
+ * When lx_initlwp is called from lx_setbrand, p_lwpcnt will already be
+ * equal to 1. Since lx_initlwp is being called against an lwp that
+ * already exists, an additional pid allocation is not necessary.
+ *
+ * We check for this by testing br_ppid == 0.
+ */
+ if (p->p_lwpcnt > 0 && lwpd->br_ppid != 0) {
+ /*
+ * Assign allocated pid to any thread other than the first.
+ * The l_pid and l_pidp fields should be populated.
+ */
+ VERIFY(lpidp->l_pidp != NULL);
+ VERIFY(lpidp->l_pid != 0);
+ } else {
+ /*
+ * There are cases where a pid is speculatively allocated but
+ * is not needed. We are obligated to free it here.
+ */
+ if (lpidp->l_pidp != NULL) {
+ (void) pid_rele(lpidp->l_pidp);
+ }
+ lpidp->l_pidp = NULL;
+ lpidp->l_pid = s_pid;
+ }
+
+ lpidp->s_pid = s_pid;
+ lpidp->s_tid = s_tid;
+ lpidp->l_start = t->t_start;
+
+ /*
+ * now put the pid into the linux-solaris and solaris-linux
+ * conversion hash tables
+ */
+ mutex_enter(&hash_lock);
+ lx_pid_insert_hash(lpidp);
+ mutex_exit(&hash_lock);
+
+ lwpd->br_pid = lpidp->l_pid;
+}
+
+/*
+ * If we are exec()ing the process, this thread's tid is about to be reset
+ * to 1. Make sure the Linux PID bookkeeping reflects that change.
+ */
+void
+lx_pid_reassign(kthread_t *t)
+{
+ proc_t *p = ttoproc(t);
+ struct pid *old_pidp;
+ struct lx_pid *lpidp;
+
+ ASSERT(p->p_lwpcnt == 1);
+
+ mutex_enter(&hash_lock);
+
+ /*
+ * Clean up all the traces of this thread's 'fake' Linux PID.
+ */
+ lpidp = lx_pid_remove_hash(p->p_pid, t->t_tid);
+ ASSERT(lpidp != NULL);
+ old_pidp = lpidp->l_pidp;
+ lpidp->l_pidp = NULL;
+
+ /*
+ * Now register this thread as (pid, 1).
+ */
+ lpidp->l_pid = p->p_pid;
+ lpidp->s_pid = p->p_pid;
+ lpidp->s_tid = 1;
+ lx_pid_insert_hash(lpidp);
+
+ mutex_exit(&hash_lock);
+
+ if (old_pidp)
+ (void) pid_rele(old_pidp);
+}
+
+/*
+ * release a solaris pid/tid pair
+ */
+void
+lx_pid_rele(pid_t pid, id_t tid)
+{
+ struct lx_pid *lpidp;
+
+ mutex_enter(&hash_lock);
+ lpidp = lx_pid_remove_hash(pid, tid);
+ mutex_exit(&hash_lock);
+
+ if (lpidp) {
+ if (lpidp->l_pidp)
+ (void) pid_rele(lpidp->l_pidp);
+
+ kmem_free(lpidp, sizeof (*lpidp));
+ }
+}
+
+/*
+ * given a linux pid, return the solaris pid/tid pair
+ */
+int
+lx_lpid_to_spair(pid_t l_pid, pid_t *s_pid, id_t *s_tid)
+{
+ struct lx_pid *hp;
+
+ if (l_pid == 1) {
+ pid_t initpid;
+
+ /*
+ * We are trying to look up the Linux init process for the
+ * current zone, which we pretend has pid 1.
+ */
+ if ((initpid = curzone->zone_proc_initpid) == -1) {
+ /*
+ * We could not find the init process for this zone.
+ */
+ return (-1);
+ }
+
+ if (s_pid != NULL)
+ *s_pid = initpid;
+ if (s_tid != NULL)
+ *s_tid = 1;
+
+ return (0);
+ }
+
+ mutex_enter(&hash_lock);
+ for (hp = ltos_pid_hash[LTOS_HASH(l_pid)]; hp; hp = hp->ltos_next) {
+ if (l_pid == hp->l_pid) {
+ if (s_pid)
+ *s_pid = hp->s_pid;
+ if (s_tid)
+ *s_tid = hp->s_tid;
+ break;
+ }
+ }
+ mutex_exit(&hash_lock);
+ if (hp != NULL)
+ return (0);
+
+ /*
+ * We didn't find this pid in our translation table.
+ * But this still could be the pid of a native process
+ * running in the current zone so check for that here.
+ *
+ * Note that prfind() only searches for processes in the current zone.
+ */
+ mutex_enter(&pidlock);
+ if (prfind(l_pid) != NULL) {
+ mutex_exit(&pidlock);
+ if (s_pid)
+ *s_pid = l_pid;
+ if (s_tid)
+ *s_tid = 0;
+ return (0);
+ }
+ mutex_exit(&pidlock);
+
+ return (-1);
+}
+
+/*
+ * Given an lwp, return the Linux pid of its parent. If the caller
+ * wants them, we return the Solaris (pid, tid) as well.
+ */
+pid_t
+lx_lwp_ppid(klwp_t *lwp, pid_t *ppidp, id_t *ptidp)
+{
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ proc_t *p = lwptoproc(lwp);
+ struct lx_pid *hp;
+ pid_t zoneinit = curproc->p_zone->zone_proc_initpid;
+ pid_t lppid, ppid;
+
+ /*
+ * Be sure not to return a parent pid that should be invisible
+ * within this zone.
+ */
+ ppid = ((p->p_flag & SZONETOP)
+ ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
+
+ /*
+ * If the parent process's pid is the zone's init process, force it
+ * to the Linux init pid value of 1.
+ */
+ if (ppid == zoneinit)
+ ppid = 1;
+
+ /*
+ * There are two cases in which the Linux definition of a 'parent'
+ * matches that of Solaris:
+ *
+ * - if our tgid is the same as our PID, then we are either the
+ * first thread in the process or a CLONE_THREAD thread.
+ *
+ * - if the brand lwp value for ppid is 0, then we are either the
+ * child of a differently-branded process or a CLONE_PARENT thread.
+ */
+ if (p->p_pid == lwpd->br_tgid || lwpd->br_ppid == 0) {
+ if (ppidp != NULL)
+ *ppidp = ppid;
+ if (ptidp != NULL)
+ *ptidp = -1;
+ return (ppid);
+ }
+
+ /*
+ * Set the default Linux parent pid to be the pid of the zone's init
+ * process; this will get converted back to the Linux default of 1
+ * later.
+ */
+ lppid = zoneinit;
+
+ /*
+ * If the process's parent isn't init, try and look up the Linux "pid"
+ * corresponding to the process's parent.
+ */
+ if (ppid != 1) {
+ /*
+ * In all other cases, we are looking for the parent of this
+ * specific thread, which in Linux refers to the thread that
+ * clone()d it. We stashed that thread's PID away when this
+ * thread was created.
+ */
+ mutex_enter(&hash_lock);
+ for (hp = ltos_pid_hash[LTOS_HASH(lwpd->br_ppid)]; hp;
+ hp = hp->ltos_next) {
+ if (lwpd->br_ppid == hp->l_pid) {
+ /*
+ * We found the PID we were looking for, but
+ * since we cached its value in this LWP's brand
+ * structure, it has exited and been reused by
+ * another process.
+ */
+ if (hp->l_start > lwptot(lwp)->t_start)
+ break;
+
+ lppid = lwpd->br_ppid;
+ if (ppidp != NULL)
+ *ppidp = hp->s_pid;
+ if (ptidp != NULL)
+ *ptidp = hp->s_tid;
+
+ break;
+ }
+ }
+ mutex_exit(&hash_lock);
+ }
+
+ if (lppid == zoneinit) {
+ lppid = 1;
+
+ if (ppidp != NULL)
+ *ppidp = lppid;
+ if (ptidp != NULL)
+ *ptidp = -1;
+ }
+
+ return (lppid);
+}
+
+void
+lx_pid_init(void)
+{
+ hash_size = 1 << highbit(v.v_proc / (hash_len * LINUX_PROC_FACTOR));
+
+ stol_pid_hash = kmem_zalloc(sizeof (struct lx_pid *) * hash_size,
+ KM_SLEEP);
+ ltos_pid_hash = kmem_zalloc(sizeof (struct lx_pid *) * hash_size,
+ KM_SLEEP);
+
+ mutex_init(&hash_lock, NULL, MUTEX_DEFAULT, NULL);
+}
+
+void
+lx_pid_fini(void)
+{
+ kmem_free(stol_pid_hash, sizeof (struct lx_pid *) * hash_size);
+ kmem_free(ltos_pid_hash, sizeof (struct lx_pid *) * hash_size);
+}
diff --git a/usr/src/uts/common/brand/lx/os/lx_ptrace.c b/usr/src/uts/common/brand/lx/os/lx_ptrace.c
new file mode 100644
index 0000000000..0f521df61b
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/os/lx_ptrace.c
@@ -0,0 +1,2564 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+/*
+ * Emulation of the Linux ptrace(2) interface.
+ *
+ * OVERVIEW
+ *
+ * The Linux process model is somewhat different from the illumos native
+ * model. One critical difference is that each Linux thread has a unique
+ * identifier in the pid namespace. The lx brand assigns a pid to each LWP
+ * within the emulated process, giving the pid of the process itself to the
+ * first LWP.
+ *
+ * The Linux ptrace(2) interface allows for any LWP in a branded process to
+ * exert control over any other LWP within the same zone. Control is exerted
+ * by the use of the ptrace(2) system call itself, which accepts a number of
+ * request codes. Feedback on traced events is primarily received by the
+ * tracer through SIGCLD and the emulated waitpid(2) and waitid(2) system
+ * calls. Many of the possible ptrace(2) requests will only succeed if the
+ * target LWP is in a "ptrace-stop" condition.
+ *
+ * HISTORY
+ *
+ * The brand support for ptrace(2) was originally built on top of the rich
+ * support for debugging and tracing provided through the illumos /proc
+ * interfaces, mounted at /native/proc within the zone. The native legacy
+ * ptrace(3C) functionality was used as a starting point, but was generally
+ * insufficient for complete and precise emulation. The extant legacy
+ * interface, and indeed our native SIGCLD and waitid(2) facilities, are
+ * focused on _process_ level concerns -- the Linux interface has been
+ * extended to be aware of LWPs as well.
+ *
+ * In order to allow us to focus on providing more complete and accurate
+ * emulation without extensive and undesirable changes to the native
+ * facilities, this second generation ptrace(2) emulation is mostly separate
+ * from any other tracing or debugging framework in the system.
+ *
+ * ATTACHING TRACERS TO TRACEES
+ *
+ * There are several ways that a child LWP may becomed traced by a tracer.
+ * To determine which attach method caused a tracee to become attached, one
+ * may inspect the "br_ptrace_attach" member of the LWP-specific brand data
+ * with the debugger.
+ *
+ * The first attach methods to consider are the attaching ptrace(2) requests:
+ *
+ * PTRACE_TRACEME
+ *
+ * If an LWP makes a PTRACE_TRACEME call, it will be attached as a tracee
+ * to its parent LWP (br_ppid). Using PTRACE_TRACEME does _not_ cause the
+ * tracee to be held in a stop condition. It is common practice for
+ * consumers to raise(SIGSTOP) immediately afterward.
+ *
+ * PTRACE_ATTACH
+ *
+ * An LWP may attempt to trace any other LWP in this, or another, process.
+ * We currently allow any attach where the process containing the tracer
+ * LWP has permission to write to /proc for the process containing the
+ * intended tracer. This action also sends a SIGSTOP to the newly attached
+ * tracee.
+ *
+ * The second class of attach methods are the clone(2)/fork(2) inheritance
+ * options that may be set on a tracee with PTRACE_SETOPTIONS:
+ *
+ * PTRACE_O_TRACEFORK, PTRACE_O_TRACEVFORK and PTRACE_O_TRACECLONE
+ *
+ * If these options have been set on a tracee, then a fork(2), vfork(2) or
+ * clone(2) respectively will cause the newly created LWP to be traced by
+ * the same tracer. The same set of ptrace(2) options will also be set on
+ * the new child.
+ *
+ * The third class of attach method is the PTRACE_CLONE flag to clone(2).
+ * This flag induces the same inheritance as PTRACE_O_TRACECLONE, but is
+ * passed by the tracee as an argument to clone(2).
+ *
+ * DETACHING TRACEES
+ *
+ * Tracees can be detached by the tracer with the PTRACE_DETACH request.
+ * This request is only valid when the tracee is in a ptrace(2) stop
+ * condition, and is itself a restarting action.
+ *
+ * If the tracer exits without detaching all of its tracees, then all of the
+ * tracees are automatically detached and restarted. If a tracee was in
+ * "signal-delivery-stop" at the time the tracer exited, the signal will be
+ * released to the child unless it is a SIGSTOP. We drop this instance of
+ * SIGSTOP in order to prevent the child from becoming stopped by job
+ * control.
+ *
+ * ACCORD ALLOCATION AND MANAGEMENT
+ *
+ * The "lx_ptrace_accord_t" object tracks the agreement between a tracer LWP
+ * and zero or more tracee LWPs. It is explicitly illegal for a tracee to
+ * trace its tracer, and we block this in PTRACE_ATTACH/PTRACE_TRACEME.
+ *
+ * An LWP starts out without an accord. If a child of that LWP calls
+ * ptrace(2) with the PTRACE_TRACEME subcommand, or if the LWP itself uses
+ * PTRACE_ATTACH, an accord will be allocated and stored on that LWP. The
+ * accord structure is not released from that LWP until it arrives in
+ * lx_exitlwp(), as called by lwp_exit(). A new accord will not be
+ * allocated, even if one does not exist, once an LWP arrives in lx_exitlwp()
+ * and sets the LX_PTF_EXITING flag. An LWP will have at most one accord
+ * structure throughout its entire lifecycle; once it has one, it has the
+ * same one until death.
+ *
+ * The accord is reference counted (lxpa_refcnt), starting at a count of one
+ * at creation to represent the link from the tracer LWP to its accord. The
+ * accord is not freed until the reference count falls to zero.
+ *
+ * To make mutual exclusion between a detaching tracer and various notifying
+ * tracees simpler, the tracer will hold "pidlock" while it clears the
+ * accord members that point back to the tracer LWP and CV.
+ *
+ * SIGNALS AND JOB CONTROL
+ *
+ * Various actions, either directly ptrace(2) related or commonly associated
+ * with tracing, cause process- or thread-directed SIGSTOP signals to be sent
+ * to tracees. These signals, and indeed any signal other than SIGKILL, can
+ * be suppressed by the tracer when using a restarting request (including
+ * PTRACE_DETACH) on a child. The signal may also be substituted for a
+ * different signal.
+ *
+ * If a SIGSTOP (or other stopping signal) is not suppressed by the tracer,
+ * it will induce the regular illumos native job control stop of the entire
+ * traced process. This is at least passingly similar to the Linux "group
+ * stop" ptrace(2) condition.
+ *
+ * SYSTEM CALL TRACING
+ *
+ * The ptrace(2) interface enables the tracer to hold the tracee on entry and
+ * exit from system calls. When a stopped tracee is restarted through the
+ * PTRACE_SYSCALL request, the LX_PTF_SYSCALL flag is set until the next
+ * system call boundary. Whether this is a "syscall-entry-stop" or
+ * "syscall-exit-stop", the tracee is held and the tracer is notified via
+ * SIGCLD/waitpid(2) in the usual way. The flag LX_PTF_SYSCALL flag is
+ * cleared after each stop; for ongoing system call tracing the tracee must
+ * be continuously restarted with PTRACE_SYSCALL.
+ *
+ * EVENT STOPS
+ *
+ * Various events (particularly FORK, VFORK, CLONE, EXEC and EXIT) are
+ * enabled by the tracer through PTRACE_SETOPTIONS. Once enabled, the tracee
+ * will be stopped at the nominated points of interest and the tracer
+ * notified. The tracer may request additional information about the event,
+ * such as the pid of new LWPs and processes, via PTRACE_GETEVENTMSG.
+ *
+ * LOCK ORDERING RULES
+ *
+ * It is not safe, in general, to hold p_lock for two different processes at
+ * the same time. This constraint is the primary reason for the existence
+ * (and complexity) of the ptrace(2) accord mechanism.
+ *
+ * In order to facilitate looking up accords by the "pid" of a tracer LWP,
+ * p_lock for the tracer process may be held while entering the accord mutex
+ * (lxpa_lock). This mutex protects the accord flags and reference count.
+ * The reference count is manipulated through lx_ptrace_accord_hold() and
+ * lx_ptrace_accord_rele().
+ *
+ * DO NOT interact with the accord mutex (lxpa_lock) directly. The
+ * lx_ptrace_accord_enter() and lx_ptrace_accord_exit() functions do various
+ * book-keeping and lock ordering enforcement and MUST be used.
+ *
+ * It is NOT legal to take ANY p_lock while holding the accord mutex
+ * (lxpa_lock). If the lxpa_tracees_lock is to be held concurrently with
+ * lxpa_lock, lxpa_lock MUST be taken first and dropped before taking p_lock
+ * of any processes from the tracee list.
+ *
+ * It is NOT legal to take a tracee p_lock and then attempt to enter the
+ * accord mutex (or tracee list mutex) of its tracer. When running as the
+ * tracee LWP, the tracee's hold will prevent the accord from being freed.
+ * Use of the LX_PTF_STOPPING or LX_PTF_CLONING flag in the LWP-specific brand
+ * data prevents an exiting tracer from altering the tracee until the tracee
+ * has come to an orderly stop, without requiring the tracee to hold its own
+ * p_lock the entire time it is stopping.
+ *
+ * It is not safe, in general, to enter "pidlock" while holding the p_lock of
+ * any process. It is similarly illegal to hold any accord locks (lxpa_lock
+ * or lxpa_sublock) while attempting to enter "pidlock". As "pidlock" is a
+ * global mutex, it should be held for the shortest possible time.
+ */
+
+#include <sys/types.h>
+#include <sys/kmem.h>
+#include <sys/ksynch.h>
+#include <sys/sysmacros.h>
+#include <sys/procfs.h>
+#include <sys/cmn_err.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/wait.h>
+#include <sys/prsystm.h>
+#include <sys/note.h>
+
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_impl.h>
+#include <sys/lx_misc.h>
+#include <lx_syscall.h>
+#include <lx_signum.h>
+
+
+typedef enum lx_ptrace_cont_flags_t {
+ LX_PTC_NONE = 0x00,
+ LX_PTC_SYSCALL = 0x01,
+ LX_PTC_SINGLESTEP = 0x02
+} lx_ptrace_cont_flags_t;
+
+
+extern int lx_user_regs_copyin(lx_lwp_data_t *, void *);
+extern int lx_user_regs_copyout(lx_lwp_data_t *, void *);
+extern int lx_ptrace_peekuser(lx_lwp_data_t *, uintptr_t, void *);
+extern int lx_ptrace_pokeuser(lx_lwp_data_t *, uintptr_t, void *);
+extern int lx_user_fpregs_copyin(lx_lwp_data_t *, void *);
+extern int lx_user_fpregs_copyout(lx_lwp_data_t *, void *);
+extern int lx_user_fpxregs_copyin(lx_lwp_data_t *, void *);
+extern int lx_user_fpxregs_copyout(lx_lwp_data_t *, void *);
+
+/*
+ * Macros for checking the state of an LWP via "br_ptrace_flags":
+ */
+#define LX_PTRACE_BUSY \
+ (LX_PTF_EXITING | LX_PTF_STOPPING | LX_PTF_CLONING)
+
+#define VISIBLE(a) (((a)->br_ptrace_flags & LX_PTF_EXITING) == 0)
+#define TRACEE_BUSY(a) (((a)->br_ptrace_flags & LX_PTRACE_BUSY) != 0)
+
+#define ACCORD_HELD(a) MUTEX_HELD(&(a)->lxpa_lock)
+
+static kcondvar_t lx_ptrace_busy_cv;
+static kmem_cache_t *lx_ptrace_accord_cache;
+
+/*
+ * Enter the accord mutex.
+ */
+static void
+lx_ptrace_accord_enter(lx_ptrace_accord_t *accord)
+{
+ VERIFY(MUTEX_NOT_HELD(&accord->lxpa_tracees_lock));
+
+ mutex_enter(&accord->lxpa_lock);
+}
+
+/*
+ * Exit the accord mutex. If the reference count has dropped to zero,
+ * free the accord.
+ */
+static void
+lx_ptrace_accord_exit(lx_ptrace_accord_t *accord)
+{
+ VERIFY(ACCORD_HELD(accord));
+
+ if (accord->lxpa_refcnt > 0) {
+ mutex_exit(&accord->lxpa_lock);
+ return;
+ }
+
+ /*
+ * When the reference count drops to zero we must free the accord.
+ */
+ VERIFY(accord->lxpa_tracer == NULL);
+ VERIFY(MUTEX_NOT_HELD(&accord->lxpa_tracees_lock));
+ VERIFY(list_is_empty(&accord->lxpa_tracees));
+ VERIFY(accord->lxpa_flags & LX_ACC_TOMBSTONE);
+
+ mutex_destroy(&accord->lxpa_lock);
+ mutex_destroy(&accord->lxpa_tracees_lock);
+
+ kmem_cache_free(lx_ptrace_accord_cache, accord);
+}
+
+/*
+ * Drop our reference to this accord. If this drops the reference count
+ * to zero, the next lx_ptrace_accord_exit() will free the accord.
+ */
+static void
+lx_ptrace_accord_rele(lx_ptrace_accord_t *accord)
+{
+ VERIFY(ACCORD_HELD(accord));
+
+ VERIFY(accord->lxpa_refcnt > 0);
+ accord->lxpa_refcnt--;
+}
+
+/*
+ * Place an additional hold on an accord.
+ */
+static void
+lx_ptrace_accord_hold(lx_ptrace_accord_t *accord)
+{
+ VERIFY(ACCORD_HELD(accord));
+
+ accord->lxpa_refcnt++;
+}
+
+/*
+ * Fetch the accord for this LWP. If one has not yet been created, and the
+ * process is not exiting, allocate it now. Must be called with p_lock held
+ * for the process containing the target LWP.
+ *
+ * If successful, we return holding the accord lock (lxpa_lock).
+ */
+static int
+lx_ptrace_accord_get_locked(klwp_t *lwp, lx_ptrace_accord_t **accordp,
+ boolean_t allocate_one)
+{
+ lx_ptrace_accord_t *lxpa;
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ proc_t *p = lwptoproc(lwp);
+
+ VERIFY(MUTEX_HELD(&p->p_lock));
+
+ /*
+ * If this LWP does not have an accord, we wish to allocate
+ * and install one.
+ */
+ if ((lxpa = lwpd->br_ptrace_accord) == NULL) {
+ if (!allocate_one || !VISIBLE(lwpd)) {
+ /*
+ * Either we do not wish to allocate an accord, or this
+ * LWP has already begun exiting from a ptrace
+ * perspective.
+ */
+ *accordp = NULL;
+ return (ESRCH);
+ }
+
+ lxpa = kmem_cache_alloc(lx_ptrace_accord_cache, KM_SLEEP);
+ bzero(lxpa, sizeof (*lxpa));
+
+ /*
+ * The initial reference count is 1 because we are referencing
+ * it in from the soon-to-be tracer LWP.
+ */
+ lxpa->lxpa_refcnt = 1;
+ mutex_init(&lxpa->lxpa_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&lxpa->lxpa_tracees_lock, NULL, MUTEX_DEFAULT, NULL);
+ list_create(&lxpa->lxpa_tracees, sizeof (lx_lwp_data_t),
+ offsetof(lx_lwp_data_t, br_ptrace_linkage));
+ lxpa->lxpa_cvp = &p->p_cv;
+
+ lxpa->lxpa_tracer = lwpd;
+ lwpd->br_ptrace_accord = lxpa;
+ }
+
+ /*
+ * Lock the accord before returning it to the caller.
+ */
+ lx_ptrace_accord_enter(lxpa);
+
+ /*
+ * There should be at least one active reference to this accord,
+ * otherwise it should have been freed.
+ */
+ VERIFY(lxpa->lxpa_refcnt > 0);
+
+ *accordp = lxpa;
+ return (0);
+}
+
+/*
+ * Accords belong to the tracer LWP. Get the accord for this tracer or return
+ * an error if it was not possible. To prevent deadlocks, the caller MUST NOT
+ * hold p_lock on its own or any other process.
+ *
+ * If successful, we return holding the accord lock (lxpa_lock).
+ */
+static int
+lx_ptrace_accord_get_by_pid(pid_t lxpid, lx_ptrace_accord_t **accordp)
+{
+ int ret = ESRCH;
+ pid_t apid;
+ id_t atid;
+ proc_t *aproc;
+ kthread_t *athr;
+ klwp_t *alwp;
+ lx_lwp_data_t *alwpd;
+
+ VERIFY(MUTEX_NOT_HELD(&curproc->p_lock));
+
+ /*
+ * Locate the process containing the tracer LWP based on its Linux pid
+ * and lock it.
+ */
+ if (lx_lpid_to_spair(lxpid, &apid, &atid) != 0 ||
+ (aproc = sprlock(apid)) == NULL) {
+ return (ESRCH);
+ }
+
+ /*
+ * Locate the tracer LWP itself and ensure that it is visible to
+ * ptrace(2).
+ */
+ if ((athr = idtot(aproc, atid)) == NULL ||
+ (alwp = ttolwp(athr)) == NULL ||
+ (alwpd = lwptolxlwp(alwp)) == NULL ||
+ !VISIBLE(alwpd)) {
+ sprunlock(aproc);
+ return (ESRCH);
+ }
+
+ /*
+ * We should not fetch our own accord this way.
+ */
+ if (athr == curthread) {
+ sprunlock(aproc);
+ return (EPERM);
+ }
+
+ /*
+ * Fetch (or allocate) the accord owned by this tracer LWP:
+ */
+ ret = lx_ptrace_accord_get_locked(alwp, accordp, B_TRUE);
+
+ /*
+ * Unlock the process and return.
+ */
+ sprunlock(aproc);
+ return (ret);
+}
+
+/*
+ * Get (or allocate) the ptrace(2) accord for the current LWP, acting as a
+ * tracer. The caller MUST NOT currently hold p_lock on the process containing
+ * this LWP.
+ *
+ * If successful, we return holding the accord lock (lxpa_lock).
+ */
+static int
+lx_ptrace_accord_get(lx_ptrace_accord_t **accordp, boolean_t allocate_one)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ proc_t *p = lwptoproc(lwp);
+ int ret;
+
+ VERIFY(MUTEX_NOT_HELD(&p->p_lock));
+
+ /*
+ * Lock the tracer (this LWP).
+ */
+ mutex_enter(&p->p_lock);
+
+ /*
+ * Fetch (or allocate) the accord for this LWP:
+ */
+ ret = lx_ptrace_accord_get_locked(lwp, accordp, allocate_one);
+
+ mutex_exit(&p->p_lock);
+
+ return (ret);
+}
+
+/*
+ * Restart an LWP if it is in "ptrace-stop". This function may induce sleep,
+ * so the caller MUST NOT hold any mutexes other than p_lock for the process
+ * containing the LWP.
+ */
+static void
+lx_ptrace_restart_lwp(klwp_t *lwp)
+{
+ kthread_t *rt = lwptot(lwp);
+ proc_t *rproc = lwptoproc(lwp);
+ lx_lwp_data_t *rlwpd = lwptolxlwp(lwp);
+
+ VERIFY(rt != curthread);
+ VERIFY(MUTEX_HELD(&rproc->p_lock));
+
+ /*
+ * Exclude potential meddling from procfs.
+ */
+ prbarrier(rproc);
+
+ /*
+ * Check that the LWP is still in "ptrace-stop" and, if so, restart it.
+ */
+ thread_lock(rt);
+ if (BSTOPPED(rt) && rt->t_whystop == PR_BRAND) {
+ rt->t_schedflag |= TS_BSTART;
+ setrun_locked(rt);
+
+ /*
+ * Clear stop reason.
+ */
+ rlwpd->br_ptrace_whystop = 0;
+ rlwpd->br_ptrace_whatstop = 0;
+ rlwpd->br_ptrace_flags &= ~(LX_PTF_CLDPEND | LX_PTF_WAITPEND);
+ }
+ thread_unlock(rt);
+}
+
+static void
+lx_ptrace_winfo(lx_lwp_data_t *remote, k_siginfo_t *ip, boolean_t waitflag,
+ pid_t *event_ppid, pid_t *event_pid)
+{
+ int signo;
+
+ /*
+ * Populate our k_siginfo_t with data about this "ptrace-stop"
+ * condition:
+ */
+ bzero(ip, sizeof (*ip));
+ ip->si_signo = SIGCLD;
+ ip->si_pid = remote->br_pid;
+ ip->si_code = CLD_TRAPPED;
+
+ switch (remote->br_ptrace_whatstop) {
+ case LX_PR_SYSENTRY:
+ case LX_PR_SYSEXIT:
+ ip->si_status = SIGTRAP;
+ if (remote->br_ptrace_options & LX_PTRACE_O_TRACESYSGOOD) {
+ ip->si_status |= 0x80;
+ }
+ break;
+
+ case LX_PR_SIGNALLED:
+ signo = remote->br_ptrace_stopsig;
+ if (signo < 1 || signo >= LX_NSIG) {
+ /*
+ * If this signal number is not valid, pretend it
+ * was a SIGTRAP.
+ */
+ ip->si_status = SIGTRAP;
+ } else {
+ ip->si_status = ltos_signo[signo];
+ }
+ break;
+
+ case LX_PR_EVENT:
+ ip->si_status = SIGTRAP | remote->br_ptrace_event;
+ /*
+ * Record the Linux pid of both this LWP and the create
+ * event we are dispatching. We will use this information
+ * to unblock any subsequent ptrace(2) events that depend
+ * on this one.
+ */
+ if (event_ppid != NULL)
+ *event_ppid = remote->br_pid;
+ if (event_pid != NULL)
+ *event_pid = (pid_t)remote->br_ptrace_eventmsg;
+ break;
+
+ default:
+ cmn_err(CE_PANIC, "unxpected stop subreason: %d",
+ remote->br_ptrace_whatstop);
+ }
+
+ /*
+ * If WNOWAIT was specified, do not mark the event as posted
+ * so that it may be re-fetched on another call to waitid().
+ */
+ if (waitflag)
+ remote->br_ptrace_flags &= ~(LX_PTF_CLDPEND | LX_PTF_WAITPEND);
+}
+
+/*
+ * Receive notification from stop() of a PR_BRAND stop.
+ */
+void
+lx_stop_notify(proc_t *p, klwp_t *lwp, ushort_t why, ushort_t what)
+{
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ lx_ptrace_accord_t *accord;
+ klwp_t *plwp = NULL;
+ proc_t *pp = NULL;
+ lx_lwp_data_t *parent;
+ boolean_t cldpend = B_TRUE;
+ boolean_t cldpost = B_FALSE;
+ sigqueue_t *sqp = NULL;
+
+ /*
+ * We currently only care about LX-specific stop reasons.
+ */
+ if (why != PR_BRAND)
+ return;
+
+ switch (what) {
+ case LX_PR_SYSENTRY:
+ case LX_PR_SYSEXIT:
+ case LX_PR_SIGNALLED:
+ case LX_PR_EVENT:
+ break;
+ default:
+ cmn_err(CE_PANIC, "unexpected subreason for PR_BRAND"
+ " stop: %d", (int)what);
+ }
+
+ /*
+ * We should be holding the lock on our containing process. The
+ * STOPPING flag should have been set by lx_ptrace_stop() for all
+ * PR_BRAND stops.
+ */
+ VERIFY(MUTEX_HELD(&p->p_lock));
+ VERIFY(lwpd->br_ptrace_flags & LX_PTF_STOPPING);
+ VERIFY((accord = lwpd->br_ptrace_tracer) != NULL);
+
+ /*
+ * We must drop our process lock to take "pidlock". The
+ * LX_PTF_STOPPING flag protects us from an exiting tracer.
+ */
+ mutex_exit(&p->p_lock);
+
+ /*
+ * Allocate before we enter any mutexes.
+ */
+ sqp = kmem_zalloc(sizeof (*sqp), KM_SLEEP);
+
+ /*
+ * We take pidlock now, which excludes all callers of waitid() and
+ * prevents a detaching tracer from clearing critical accord members.
+ */
+ mutex_enter(&pidlock);
+ mutex_enter(&p->p_lock);
+
+ /*
+ * Get the ptrace(2) "parent" process, to which we may send
+ * a SIGCLD signal later.
+ */
+ if ((parent = accord->lxpa_tracer) != NULL &&
+ (plwp = parent->br_lwp) != NULL) {
+ pp = lwptoproc(plwp);
+ }
+
+ /*
+ * Our tracer should not have been modified in our absence; the
+ * LX_PTF_STOPPING flag prevents it.
+ */
+ VERIFY(lwpd->br_ptrace_tracer == accord);
+
+ /*
+ * Stash data for this stop condition in the LWP data while we hold
+ * both pidlock and our p_lock.
+ */
+ lwpd->br_ptrace_whystop = why;
+ lwpd->br_ptrace_whatstop = what;
+ lwpd->br_ptrace_flags |= LX_PTF_WAITPEND;
+
+ /*
+ * If this event does not depend on an event from the parent LWP,
+ * populate the siginfo_t for the event pending on this tracee LWP.
+ */
+ if (!(lwpd->br_ptrace_flags & LX_PTF_PARENT_WAIT) && pp != NULL) {
+ cldpost = B_TRUE;
+ lx_ptrace_winfo(lwpd, &sqp->sq_info, B_FALSE, NULL, NULL);
+ }
+
+ /*
+ * Drop our p_lock so that we may lock the tracer.
+ */
+ mutex_exit(&p->p_lock);
+ if (cldpost && pp != NULL) {
+ /*
+ * Post the SIGCLD to the tracer.
+ */
+ mutex_enter(&pp->p_lock);
+ if (!sigismember(&pp->p_sig, SIGCLD)) {
+ sigaddqa(pp, plwp->lwp_thread, sqp);
+ cldpend = B_FALSE;
+ sqp = NULL;
+ }
+ mutex_exit(&pp->p_lock);
+ }
+
+ /*
+ * We re-take our process lock now. The lock will be held until
+ * the thread is actually marked stopped, so we will not race with
+ * lx_ptrace_lock_if_stopped() or lx_waitid_helper().
+ */
+ mutex_enter(&p->p_lock);
+
+ /*
+ * We clear the STOPPING flag; stop() continues to hold our p_lock
+ * until our thread stop state is visible.
+ */
+ lwpd->br_ptrace_flags &= ~LX_PTF_STOPPING;
+ lwpd->br_ptrace_flags |= LX_PTF_STOPPED;
+ if (cldpend) {
+ /*
+ * We sent the SIGCLD for this new wait condition already.
+ */
+ lwpd->br_ptrace_flags |= LX_PTF_CLDPEND;
+ }
+
+ /*
+ * If lx_ptrace_exit_tracer() is trying to detach our tracer, it will
+ * be sleeping on this CV until LX_PTF_STOPPING is clear. Wake it
+ * now.
+ */
+ cv_broadcast(&lx_ptrace_busy_cv);
+
+ /*
+ * While still holding pidlock, we attempt to wake our tracer from a
+ * potential waitid() slumber.
+ */
+ if (accord->lxpa_cvp != NULL) {
+ cv_broadcast(accord->lxpa_cvp);
+ }
+
+ /*
+ * We release pidlock and return as we were called: with our p_lock
+ * held.
+ */
+ mutex_exit(&pidlock);
+
+ if (sqp != NULL) {
+ kmem_free(sqp, sizeof (*sqp));
+ }
+}
+
+/*
+ * For any restarting action (e.g. PTRACE_CONT, PTRACE_SYSCALL or
+ * PTRACE_DETACH) to be allowed, the tracee LWP must be in "ptrace-stop". This
+ * check must ONLY be run on tracees of the current LWP. If the check is
+ * successful, we return with the tracee p_lock held.
+ */
+static int
+lx_ptrace_lock_if_stopped(lx_ptrace_accord_t *accord, lx_lwp_data_t *remote)
+{
+ klwp_t *rlwp = remote->br_lwp;
+ proc_t *rproc = lwptoproc(rlwp);
+ kthread_t *rt = lwptot(rlwp);
+
+ /*
+ * We must never check that we, ourselves, are stopped. We must also
+ * have the accord tracee list locked while we lock our tracees.
+ */
+ VERIFY(curthread != rt);
+ VERIFY(MUTEX_HELD(&accord->lxpa_tracees_lock));
+ VERIFY(accord->lxpa_tracer == ttolxlwp(curthread));
+
+ /*
+ * Lock the process containing the tracee LWP.
+ */
+ mutex_enter(&rproc->p_lock);
+ if (!VISIBLE(remote)) {
+ /*
+ * The tracee LWP is currently detaching itself as it exits.
+ * It is no longer visible to ptrace(2).
+ */
+ mutex_exit(&rproc->p_lock);
+ return (ESRCH);
+ }
+
+ /*
+ * We must only check whether tracees of the current LWP are stopped.
+ * We check this condition after confirming visibility as an exiting
+ * tracee may no longer be completely consistent.
+ */
+ VERIFY(remote->br_ptrace_tracer == accord);
+
+ if (!(remote->br_ptrace_flags & LX_PTF_STOPPED)) {
+ /*
+ * The tracee is not in "ptrace-stop", so we release the
+ * process.
+ */
+ mutex_exit(&rproc->p_lock);
+ return (ESRCH);
+ }
+
+ /*
+ * The tracee is stopped. We return holding its process lock so that
+ * the caller may manipulate it.
+ */
+ return (0);
+}
+
+static int
+lx_ptrace_setoptions(lx_lwp_data_t *remote, uintptr_t options)
+{
+ /*
+ * Check for valid options.
+ */
+ if ((options & ~LX_PTRACE_O_ALL) != 0) {
+ return (EINVAL);
+ }
+
+ /*
+ * Set ptrace options on the target LWP.
+ */
+ remote->br_ptrace_options = (lx_ptrace_options_t)options;
+
+ return (0);
+}
+
+static int
+lx_ptrace_geteventmsg(lx_lwp_data_t *remote, void *umsgp)
+{
+ int error;
+
+#if defined(_SYSCALL32_IMPL)
+ if (get_udatamodel() != DATAMODEL_NATIVE) {
+ uint32_t tmp = remote->br_ptrace_eventmsg;
+
+ error = copyout(&tmp, umsgp, sizeof (uint32_t));
+ } else
+#endif
+ {
+ error = copyout(&remote->br_ptrace_eventmsg, umsgp,
+ sizeof (ulong_t));
+ }
+
+ return (error);
+}
+
+static int
+lx_ptrace_getsiginfo(lx_lwp_data_t *remote, void *usiginfo)
+{
+ klwp_t *lwp = remote->br_lwp;
+ int lx_sig;
+
+ lx_sig = lx_stol_signo(lwp->lwp_cursig, 0);
+ if (lx_sig < 1 || lwp->lwp_curinfo == NULL) {
+ return (EINVAL);
+ }
+
+#if defined(_SYSCALL32_IMPL)
+ if (get_udatamodel() != DATAMODEL_NATIVE) {
+ if (stol_ksiginfo32_copyout(&lwp->lwp_curinfo->sq_info,
+ usiginfo) != 0) {
+ return (EFAULT);
+ }
+ } else
+#endif
+ {
+ if (stol_ksiginfo_copyout(&lwp->lwp_curinfo->sq_info,
+ usiginfo) != 0) {
+ return (EFAULT);
+ }
+ }
+
+ return (0);
+}
+
+
+/*
+ * Implements the PTRACE_CONT subcommand of the Linux ptrace(2) interface.
+ */
+static int
+lx_ptrace_cont(lx_lwp_data_t *remote, lx_ptrace_cont_flags_t flags, int signo)
+{
+ klwp_t *lwp = remote->br_lwp;
+
+ if (flags & LX_PTC_SINGLESTEP) {
+ /*
+ * We do not currently support single-stepping.
+ */
+ lx_unsupported("PTRACE_SINGLESTEP not currently implemented");
+ return (EINVAL);
+ }
+
+ /*
+ * The tracer may choose to suppress the delivery of a signal, or
+ * select an alternative signal for delivery. If this is an
+ * appropriate ptrace(2) "signal-delivery-stop", br_ptrace_stopsig
+ * will be used as the new signal number.
+ *
+ * As with so many other aspects of the Linux ptrace(2) interface, this
+ * may fail silently if the state machine is not aligned correctly.
+ */
+ remote->br_ptrace_stopsig = signo;
+ remote->br_ptrace_donesig = 0;
+
+ /*
+ * Handle the syscall-stop flag if this is a PTRACE_SYSCALL restart:
+ */
+ if (flags & LX_PTC_SYSCALL) {
+ remote->br_ptrace_flags |= LX_PTF_SYSCALL;
+ } else {
+ remote->br_ptrace_flags &= ~LX_PTF_SYSCALL;
+ }
+
+ lx_ptrace_restart_lwp(lwp);
+
+ return (0);
+}
+
+/*
+ * Implements the PTRACE_DETACH subcommand of the Linux ptrace(2) interface.
+ *
+ * The LWP identified by the Linux pid "lx_pid" will, if it as a tracee of the
+ * current LWP, be detached and set runnable. If the specified LWP is not
+ * currently in the "ptrace-stop" state, the routine will return ESRCH as if
+ * the LWP did not exist at all.
+ *
+ * The caller must not hold p_lock on any process.
+ */
+static int
+lx_ptrace_detach(lx_ptrace_accord_t *accord, lx_lwp_data_t *remote, int signo,
+ boolean_t *release_hold)
+{
+ klwp_t *rlwp = remote->br_lwp;
+
+ /*
+ * The tracee LWP was in "ptrace-stop" and we now hold its p_lock.
+ * Detach the LWP from the accord and set it running.
+ */
+ VERIFY(!TRACEE_BUSY(remote));
+ VERIFY(MUTEX_HELD(&accord->lxpa_tracees_lock));
+ remote->br_ptrace_flags &= ~(LX_PTF_SYSCALL | LX_PTF_INHERIT);
+ VERIFY(list_link_active(&remote->br_ptrace_linkage));
+ list_remove(&accord->lxpa_tracees, remote);
+
+ remote->br_ptrace_attach = LX_PTA_NONE;
+ remote->br_ptrace_tracer = NULL;
+ remote->br_ptrace_flags = 0;
+ *release_hold = B_TRUE;
+
+ /*
+ * Decrement traced-lwp count for the process.
+ */
+ ASSERT(MUTEX_HELD(&rlwp->lwp_procp->p_lock));
+ VERIFY(ptolxproc(rlwp->lwp_procp)->l_ptrace-- >= 1);
+
+ /*
+ * The tracer may, as described in lx_ptrace_cont(), choose to suppress
+ * or modify the delivered signal.
+ */
+ remote->br_ptrace_stopsig = signo;
+ remote->br_ptrace_donesig = 0;
+
+ lx_ptrace_restart_lwp(rlwp);
+
+ return (0);
+}
+
+/*
+ * This routine implements the PTRACE_ATTACH operation of the Linux ptrace(2)
+ * interface.
+ *
+ * This LWP is requesting to be attached as a tracer to another LWP -- the
+ * tracee. If a ptrace accord to track the list of tracees has not yet been
+ * allocated, one will be allocated and attached to this LWP now.
+ *
+ * The "br_ptrace_tracer" on the tracee LWP is set to this accord, and the
+ * tracee LWP is then added to the "lxpa_tracees" list in the accord. We drop
+ * locks between these two phases; the only consumer of trace events from this
+ * accord is this LWP, which obviously cannot be running waitpid(2) at the same
+ * time as this call to ptrace(2).
+ */
+static int
+lx_ptrace_attach(pid_t lx_pid)
+{
+ int error = ESRCH;
+ /*
+ * Our (Tracer) LWP:
+ */
+ lx_ptrace_accord_t *accord;
+ lx_lwp_data_t *lwpd = ttolxlwp(curthread);
+ /*
+ * Remote (Tracee) LWP:
+ */
+ pid_t rpid;
+ id_t rtid;
+ proc_t *rproc;
+ kthread_t *rthr;
+ klwp_t *rlwp;
+ lx_lwp_data_t *rlwpd;
+
+ if (lwpd->br_pid == lx_pid) {
+ /*
+ * We cannot trace ourselves.
+ */
+ return (EPERM);
+ }
+
+ /*
+ * Ensure that we have an accord and obtain a lock on it. This
+ * routine should not fail because the LWP cannot make ptrace(2) system
+ * calls after it has begun exiting.
+ */
+ VERIFY0(lwpd->br_ptrace_flags & LX_PTF_EXITING);
+ VERIFY(lx_ptrace_accord_get(&accord, B_TRUE) == 0);
+
+ /*
+ * Place speculative hold in case the attach is successful.
+ */
+ lx_ptrace_accord_hold(accord);
+ lx_ptrace_accord_exit(accord);
+
+ /*
+ * Locate the process containing the tracee LWP based on its Linux pid
+ * and lock it.
+ */
+ if (lx_lpid_to_spair(lx_pid, &rpid, &rtid) != 0 ||
+ (rproc = sprlock(rpid)) == NULL) {
+ /*
+ * We could not find the target process.
+ */
+ goto errout;
+ }
+
+ /*
+ * Locate the tracee LWP.
+ */
+ if ((rthr = idtot(rproc, rtid)) == NULL ||
+ (rlwp = ttolwp(rthr)) == NULL ||
+ (rlwpd = lwptolxlwp(rlwp)) == NULL ||
+ !VISIBLE(rlwpd)) {
+ /*
+ * The LWP could not be found, was not branded, or is not
+ * visible to ptrace(2) at this time.
+ */
+ goto unlock_errout;
+ }
+
+ /*
+ * We now hold the lock on the tracee. Attempt to install ourselves
+ * as the tracer.
+ */
+ if (curproc != rproc && priv_proc_cred_perm(curproc->p_cred, rproc,
+ NULL, VWRITE) != 0) {
+ /*
+ * This process does not have permission to trace the remote
+ * process.
+ */
+ error = EPERM;
+ } else if (rlwpd->br_ptrace_tracer != NULL) {
+ /*
+ * This LWP is already being traced.
+ */
+ VERIFY(list_link_active(&rlwpd->br_ptrace_linkage));
+ VERIFY(rlwpd->br_ptrace_attach != LX_PTA_NONE);
+ error = EPERM;
+ } else {
+ lx_proc_data_t *rprocd = ptolxproc(rproc);
+
+ /*
+ * Bond the tracee to the accord.
+ */
+ VERIFY0(rlwpd->br_ptrace_flags & LX_PTF_EXITING);
+ VERIFY(rlwpd->br_ptrace_attach == LX_PTA_NONE);
+ rlwpd->br_ptrace_attach = LX_PTA_ATTACH;
+ rlwpd->br_ptrace_tracer = accord;
+
+ /*
+ * We had no tracer, and are thus not in the tracees list.
+ * It is safe to take the tracee list lock while we insert
+ * ourselves.
+ */
+ mutex_enter(&accord->lxpa_tracees_lock);
+ VERIFY(!list_link_active(&rlwpd->br_ptrace_linkage));
+ list_insert_tail(&accord->lxpa_tracees, rlwpd);
+ /*
+ * Bump traced-lwp count for the remote process.
+ */
+ rprocd->l_ptrace++;
+ mutex_exit(&accord->lxpa_tracees_lock);
+
+ /*
+ * Send a thread-directed SIGSTOP.
+ */
+ sigtoproc(rproc, rthr, SIGSTOP);
+
+
+ error = 0;
+ }
+
+unlock_errout:
+ /*
+ * Unlock the process containing the tracee LWP and the accord.
+ */
+ sprunlock(rproc);
+
+errout:
+ if (error != 0) {
+ /*
+ * The attach was not successful. Remove our speculative
+ * hold.
+ */
+ lx_ptrace_accord_enter(accord);
+ lx_ptrace_accord_rele(accord);
+ lx_ptrace_accord_exit(accord);
+ }
+
+ return (error);
+}
+
+int
+lx_ptrace_set_clone_inherit(int option, boolean_t inherit_flag)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ proc_t *p = lwptoproc(lwp);
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+
+ mutex_enter(&p->p_lock);
+
+ switch (option) {
+ case LX_PTRACE_O_TRACEFORK:
+ case LX_PTRACE_O_TRACEVFORK:
+ case LX_PTRACE_O_TRACECLONE:
+ lwpd->br_ptrace_clone_option = option;
+ break;
+
+ default:
+ return (EINVAL);
+ }
+
+ if (inherit_flag) {
+ lwpd->br_ptrace_flags |= LX_PTF_INHERIT;
+ } else {
+ lwpd->br_ptrace_flags &= ~LX_PTF_INHERIT;
+ }
+
+ mutex_exit(&p->p_lock);
+ return (0);
+}
+
+/*
+ * If the parent LWP is being traced, we want to attach ourselves to the
+ * same accord.
+ */
+void
+lx_ptrace_inherit_tracer(lx_lwp_data_t *src, lx_lwp_data_t *dst)
+{
+ proc_t *srcp = lwptoproc(src->br_lwp);
+ proc_t *dstp = lwptoproc(dst->br_lwp);
+ lx_ptrace_accord_t *accord;
+ boolean_t is_fork = B_FALSE;
+
+ VERIFY(MUTEX_HELD(&dstp->p_lock));
+ if (srcp != dstp) {
+ /*
+ * In the case of being called via forklwp, some lock shuffling
+ * is required. The destination p_lock must be dropped to
+ * avoid deadlocks when locking the source and manipulating
+ * ptrace accord resources.
+ */
+ is_fork = B_TRUE;
+ sprlock_proc(dstp);
+ mutex_exit(&dstp->p_lock);
+ mutex_enter(&srcp->p_lock);
+ }
+
+ if ((accord = src->br_ptrace_tracer) == NULL) {
+ /*
+ * The source LWP does not have a tracer to inherit.
+ */
+ goto out;
+ }
+
+ /*
+ * There are two conditions to check when determining if the new
+ * child should inherit the same tracer (and tracing options) as its
+ * parent. Either condition is sufficient to trigger inheritance.
+ */
+ dst->br_ptrace_attach = LX_PTA_NONE;
+ if ((src->br_ptrace_options & src->br_ptrace_clone_option) != 0) {
+ /*
+ * Condition 1:
+ * The clone(2), fork(2) and vfork(2) emulated system calls
+ * populate "br_ptrace_clone_option" with the specific
+ * ptrace(2) SETOPTIONS option that applies to this
+ * operation. If the relevant option has been enabled by the
+ * tracer then we inherit.
+ */
+ dst->br_ptrace_attach |= LX_PTA_INHERIT_OPTIONS;
+
+ } else if ((src->br_ptrace_flags & LX_PTF_INHERIT) != 0) {
+ /*
+ * Condition 2:
+ * If the caller opted in to inheritance with the
+ * PTRACE_CLONE flag to clone(2), the LX_PTF_INHERIT flag
+ * will be set and we inherit.
+ */
+ dst->br_ptrace_attach |= LX_PTA_INHERIT_CLONE;
+ }
+
+ /*
+ * These values only apply for the duration of a single clone(2), et
+ * al, system call.
+ */
+ src->br_ptrace_flags &= ~LX_PTF_INHERIT;
+ src->br_ptrace_clone_option = 0;
+
+ if (dst->br_ptrace_attach == LX_PTA_NONE) {
+ /*
+ * No condition triggered inheritance.
+ */
+ goto out;
+ }
+
+ /*
+ * Set the LX_PTF_CLONING flag to prevent us from being detached
+ * while our p_lock is dropped.
+ */
+ src->br_ptrace_flags |= LX_PTF_CLONING;
+ mutex_exit(&srcp->p_lock);
+
+ /*
+ * Hold the accord for the new LWP.
+ */
+ lx_ptrace_accord_enter(accord);
+ lx_ptrace_accord_hold(accord);
+ lx_ptrace_accord_exit(accord);
+
+ /*
+ * Install the tracer and copy the current PTRACE_SETOPTIONS options.
+ */
+ dst->br_ptrace_tracer = accord;
+ dst->br_ptrace_options = src->br_ptrace_options;
+
+ /*
+ * This flag prevents waitid() from seeing events for the new child
+ * until the parent is able to post the relevant ptrace event to
+ * the tracer.
+ */
+ dst->br_ptrace_flags |= LX_PTF_PARENT_WAIT;
+
+ mutex_enter(&accord->lxpa_tracees_lock);
+ VERIFY(list_link_active(&src->br_ptrace_linkage));
+ VERIFY(!list_link_active(&dst->br_ptrace_linkage));
+ list_insert_tail(&accord->lxpa_tracees, dst);
+ mutex_exit(&accord->lxpa_tracees_lock);
+
+ /*
+ * Relock our process and clear our busy flag.
+ */
+ mutex_enter(&srcp->p_lock);
+ src->br_ptrace_flags &= ~LX_PTF_CLONING;
+
+ /*
+ * Bump traced-lwp count for the process.
+ */
+ ptolxproc(dstp)->l_ptrace++;
+
+ /*
+ * If lx_ptrace_exit_tracer() is trying to detach our tracer, it will
+ * be sleeping on this CV until LX_PTF_CLONING is clear. Wake it
+ * now.
+ */
+ cv_broadcast(&lx_ptrace_busy_cv);
+
+out:
+ if (is_fork) {
+ mutex_exit(&srcp->p_lock);
+ mutex_enter(&dstp->p_lock);
+ sprunprlock(dstp);
+ }
+}
+
+static int
+lx_ptrace_traceme(void)
+{
+ int error;
+ boolean_t did_attach = B_FALSE;
+ /*
+ * Our (Tracee) LWP:
+ */
+ klwp_t *lwp = ttolwp(curthread);
+ proc_t *p = lwptoproc(lwp);
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ /*
+ * Remote (Tracer) LWP:
+ */
+ lx_ptrace_accord_t *accord;
+
+ /*
+ * We are intending to be the tracee. Fetch (or allocate) the accord
+ * for our parent LWP.
+ */
+ if ((error = lx_ptrace_accord_get_by_pid(lx_lwp_ppid(lwp, NULL,
+ NULL), &accord)) != 0) {
+ /*
+ * Could not determine the Linux pid of the parent LWP, or
+ * could not get the accord for that LWP.
+ */
+ return (error);
+ }
+
+ /*
+ * We now hold the accord lock.
+ */
+ if (accord->lxpa_flags & LX_ACC_TOMBSTONE) {
+ /*
+ * The accord is marked for death; give up now.
+ */
+ lx_ptrace_accord_exit(accord);
+ return (ESRCH);
+ }
+
+ /*
+ * Bump the reference count so that the accord is not freed. We need
+ * to drop the accord lock before we take our own p_lock.
+ */
+ lx_ptrace_accord_hold(accord);
+ lx_ptrace_accord_exit(accord);
+
+ /*
+ * We now lock _our_ process and determine if we can install our parent
+ * as our tracer.
+ */
+ mutex_enter(&p->p_lock);
+ if (lwpd->br_ptrace_tracer != NULL) {
+ /*
+ * This LWP is already being traced.
+ */
+ VERIFY(lwpd->br_ptrace_attach != LX_PTA_NONE);
+ error = EPERM;
+ } else {
+ /*
+ * Bond ourselves to the accord. We already bumped the accord
+ * reference count.
+ */
+ VERIFY(lwpd->br_ptrace_attach == LX_PTA_NONE);
+ lwpd->br_ptrace_attach = LX_PTA_TRACEME;
+ lwpd->br_ptrace_tracer = accord;
+ did_attach = B_TRUE;
+ error = 0;
+
+ /*
+ * Speculatively bump l_ptrace now before dropping p_lock.
+ * It will be reverted if the tracee attachment fails.
+ */
+ ptolxproc(p)->l_ptrace++;
+ }
+ mutex_exit(&p->p_lock);
+
+ /*
+ * Lock the accord tracee list and add this LWP. Once we are in the
+ * tracee list, it is the responsibility of the tracer to detach us.
+ */
+ if (error == 0) {
+ lx_ptrace_accord_enter(accord);
+ mutex_enter(&accord->lxpa_tracees_lock);
+
+ if (!(accord->lxpa_flags & LX_ACC_TOMBSTONE)) {
+ /*
+ * Put ourselves in the tracee list for this accord.
+ */
+ VERIFY(!list_link_active(&lwpd->br_ptrace_linkage));
+ list_insert_tail(&accord->lxpa_tracees, lwpd);
+ mutex_exit(&accord->lxpa_tracees_lock);
+ lx_ptrace_accord_exit(accord);
+
+ return (0);
+ }
+ mutex_exit(&accord->lxpa_tracees_lock);
+
+ /*
+ * The accord has been marked for death. We must
+ * untrace ourselves.
+ */
+ error = ESRCH;
+ lx_ptrace_accord_exit(accord);
+
+ /*
+ * Undo speculative increment of ptracer count.
+ */
+ mutex_enter(&p->p_lock);
+ ptolxproc(p)->l_ptrace--;
+ mutex_exit(&p->p_lock);
+ }
+
+ /*
+ * Our optimism was unjustified: We were unable to attach. We need to
+ * lock the process containing this LWP again in order to remove the
+ * tracer.
+ */
+ VERIFY(error != 0);
+ mutex_enter(&p->p_lock);
+ if (did_attach) {
+ /*
+ * Verify that things were as we left them:
+ */
+ VERIFY(!list_link_active(&lwpd->br_ptrace_linkage));
+ VERIFY(lwpd->br_ptrace_tracer == accord);
+
+ lwpd->br_ptrace_attach = LX_PTA_NONE;
+ lwpd->br_ptrace_tracer = NULL;
+ }
+ mutex_exit(&p->p_lock);
+
+ /*
+ * Remove our speculative hold on the accord, possibly causing it to be
+ * freed in the process.
+ */
+ lx_ptrace_accord_enter(accord);
+ lx_ptrace_accord_rele(accord);
+ lx_ptrace_accord_exit(accord);
+
+ return (error);
+}
+
+static boolean_t
+lx_ptrace_stop_common(proc_t *p, lx_lwp_data_t *lwpd, ushort_t what)
+{
+ boolean_t reset_nostop = B_FALSE;
+
+ VERIFY(MUTEX_HELD(&p->p_lock));
+
+ /*
+ * Mark this LWP as stopping and call stop() to enter "ptrace-stop".
+ */
+ VERIFY0(lwpd->br_ptrace_flags & LX_PTF_STOPPING);
+ lwpd->br_ptrace_flags |= LX_PTF_STOPPING;
+
+ if (lwpd->br_lwp->lwp_nostop == 1 &&
+ lwpd->br_ptrace_event == LX_PTRACE_EVENT_EXEC) {
+ /* We need to clear this to get the signal delivered. */
+ lwpd->br_lwp->lwp_nostop = 0;
+ reset_nostop = B_TRUE;
+ }
+
+ stop(PR_BRAND, what);
+
+ if (reset_nostop) {
+ VERIFY(lwpd->br_lwp->lwp_nostop == 0);
+ lwpd->br_lwp->lwp_nostop = 1;
+ }
+
+ /*
+ * We are back from "ptrace-stop" with our process lock held.
+ */
+ lwpd->br_ptrace_flags &= ~(LX_PTF_STOPPING | LX_PTF_STOPPED |
+ LX_PTF_CLDPEND);
+ lwpd->br_ptrace_stopucp = NULL;
+ cv_broadcast(&lx_ptrace_busy_cv);
+ mutex_exit(&p->p_lock);
+
+ return (B_TRUE);
+}
+
+int
+lx_ptrace_stop_for_option(int option, boolean_t child, ulong_t msg,
+ uintptr_t ucp)
+{
+ kthread_t *t = curthread;
+ klwp_t *lwp = ttolwp(t);
+ proc_t *p = lwptoproc(lwp);
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+
+ mutex_enter(&p->p_lock);
+ if (lwpd->br_ptrace_tracer == NULL) {
+ mutex_exit(&p->p_lock);
+ return (ESRCH);
+ }
+
+ if (!child) {
+ /*
+ * Only the first event posted by a new process is to be held
+ * until the matching parent event is dispatched, and only if
+ * it is a "child" event. This is not a child event, so we
+ * clear the wait flag.
+ */
+ lwpd->br_ptrace_flags &= ~LX_PTF_PARENT_WAIT;
+
+ } else if (option == LX_PTRACE_O_TRACEVFORK) {
+ /*
+ * For a child, we have to handle vfork as a special case. In
+ * lx_ptrace_inherit_tracer() we set LX_PTF_PARENT_WAIT to
+ * force events to be delayed until the parent posts its event.
+ * This flag is cleared in lx_waitid_helper() to enforce a
+ * "happens after" relationship. However, this obviously cannot
+ * work for the vfork case. Thus, we clear our flag now so that
+ * we can deliver the signal in lx_stop_notify(), if necessary.
+ */
+ lwpd->br_ptrace_flags &= ~LX_PTF_PARENT_WAIT;
+ }
+
+ if (!(lwpd->br_ptrace_options & option)) {
+ if (option == LX_PTRACE_O_TRACEEXEC) {
+ /*
+ * Without PTRACE_O_TRACEEXEC, the Linux kernel will
+ * send SIGTRAP to the process.
+ */
+ sigtoproc(p, t, SIGTRAP);
+ mutex_exit(&p->p_lock);
+ return (0);
+ }
+
+ /*
+ * The flag for this trace event is not enabled, so we will not
+ * stop.
+ */
+ mutex_exit(&p->p_lock);
+ return (ESRCH);
+ }
+
+ if (child) {
+ switch (option) {
+ case LX_PTRACE_O_TRACECLONE:
+ case LX_PTRACE_O_TRACEFORK:
+ case LX_PTRACE_O_TRACEVFORK:
+ /*
+ * Send the child LWP a directed SIGSTOP.
+ */
+ sigtoproc(p, t, SIGSTOP);
+ mutex_exit(&p->p_lock);
+ return (0);
+ default:
+ goto nostop;
+ }
+ }
+
+ lwpd->br_ptrace_eventmsg = msg;
+
+ switch (option) {
+ case LX_PTRACE_O_TRACECLONE:
+ lwpd->br_ptrace_event = LX_PTRACE_EVENT_CLONE;
+ break;
+ case LX_PTRACE_O_TRACEEXEC:
+ lwpd->br_ptrace_event = LX_PTRACE_EVENT_EXEC;
+ lwpd->br_ptrace_eventmsg = 0;
+ break;
+ case LX_PTRACE_O_TRACEEXIT:
+ lwpd->br_ptrace_event = LX_PTRACE_EVENT_EXIT;
+ break;
+ case LX_PTRACE_O_TRACEFORK:
+ lwpd->br_ptrace_event = LX_PTRACE_EVENT_FORK;
+ break;
+ case LX_PTRACE_O_TRACEVFORK:
+ lwpd->br_ptrace_event = LX_PTRACE_EVENT_VFORK;
+ break;
+ case LX_PTRACE_O_TRACEVFORKDONE:
+ lwpd->br_ptrace_event = LX_PTRACE_EVENT_VFORK_DONE;
+ lwpd->br_ptrace_eventmsg = 0;
+ break;
+ default:
+ goto nostop;
+ }
+
+ /*
+ * Userland may have passed in a ucontext_t pointer for
+ * PTRACE_GETREGS/PTRACE_SETREGS usage while stopped.
+ */
+ lwpd->br_ptrace_stopucp = ucp;
+
+ /*
+ * p_lock for the process containing the tracee will be dropped by
+ * lx_ptrace_stop_common().
+ */
+ return (lx_ptrace_stop_common(p, lwpd, LX_PR_EVENT) ? 0 : ESRCH);
+
+nostop:
+ lwpd->br_ptrace_event = 0;
+ lwpd->br_ptrace_eventmsg = 0;
+ mutex_exit(&p->p_lock);
+ return (ESRCH);
+}
+
+boolean_t
+lx_ptrace_stop(ushort_t what)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ proc_t *p = lwptoproc(lwp);
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+
+ VERIFY(what == LX_PR_SYSENTRY || what == LX_PR_SYSEXIT ||
+ what == LX_PR_SIGNALLED);
+
+ /*
+ * If we do not have an accord, bail out early.
+ */
+ if (lwpd->br_ptrace_tracer == NULL)
+ return (B_FALSE);
+
+ /*
+ * Lock this process and re-check the condition.
+ */
+ mutex_enter(&p->p_lock);
+ if (lwpd->br_ptrace_tracer == NULL) {
+ VERIFY0(lwpd->br_ptrace_flags & LX_PTF_SYSCALL);
+ mutex_exit(&p->p_lock);
+ return (B_FALSE);
+ }
+
+ if (what == LX_PR_SYSENTRY || what == LX_PR_SYSEXIT) {
+ /*
+ * This is a syscall-entry-stop or syscall-exit-stop point.
+ */
+ if (!(lwpd->br_ptrace_flags & LX_PTF_SYSCALL)) {
+ /*
+ * A system call stop has not been requested.
+ */
+ mutex_exit(&p->p_lock);
+ return (B_FALSE);
+ }
+
+ /*
+ * The PTRACE_SYSCALL restart command applies only to the next
+ * system call entry or exit. The tracer must restart us with
+ * PTRACE_SYSCALL while we are in ptrace-stop for us to fire
+ * again at the next system call boundary.
+ */
+ lwpd->br_ptrace_flags &= ~LX_PTF_SYSCALL;
+ }
+
+ /*
+ * p_lock for the process containing the tracee will be dropped by
+ * lx_ptrace_stop_common().
+ */
+ return (lx_ptrace_stop_common(p, lwpd, what));
+}
+
+int
+lx_ptrace_issig_stop(proc_t *p, klwp_t *lwp)
+{
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ int lx_sig;
+
+ VERIFY(MUTEX_HELD(&p->p_lock));
+
+ /*
+ * In very rare circumstances, a process which is almost completely
+ * through proc_exit() may incur issig checks in the current thread via
+ * clean-up actions. The process will still be branded, but the thread
+ * will have already been stripped of any LX-specific data on its way
+ * to the grave. Bail early if the brand data is missing.
+ */
+ if (lwpd == NULL) {
+ return (0);
+ }
+
+ /*
+ * If we do not have an accord, bail out now. Additionally, if there
+ * is no valid signal then we have no reason to stop.
+ */
+ if (lwpd->br_ptrace_tracer == NULL || lwp->lwp_cursig == SIGKILL ||
+ (lwp->lwp_cursig == 0 || lwp->lwp_cursig > NSIG) ||
+ (lx_sig = stol_signo[lwp->lwp_cursig]) < 1) {
+ if (lwp->lwp_cursig == 0) {
+ /*
+ * If this lwp has no current signal, it means that any
+ * signal ignorance enabled by br_ptrace_donesig has
+ * already taken place (the signal was consumed).
+ * By clearing donesig, we declare desire to ignore no
+ * signals for accurate ptracing.
+ */
+ lwpd->br_ptrace_donesig = 0;
+ }
+ return (0);
+ }
+
+ /*
+ * We stash the signal on the LWP where our waitid_helper will find it
+ * and enter the ptrace "signal-delivery-stop" condition.
+ */
+ lwpd->br_ptrace_stopsig = lx_sig;
+ lwpd->br_ptrace_donesig = 0;
+ (void) lx_ptrace_stop_common(p, lwpd, LX_PR_SIGNALLED);
+ mutex_enter(&p->p_lock);
+
+ /*
+ * When we return, the signal may have been altered or suppressed.
+ */
+ if (lwpd->br_ptrace_stopsig != lx_sig) {
+ int native_sig;
+ lx_sig = lwpd->br_ptrace_stopsig;
+
+ if (lx_sig >= LX_NSIG) {
+ lx_sig = 0;
+ }
+
+ /*
+ * Translate signal from Linux signal number back to
+ * an illumos native signal.
+ */
+ if (lx_sig >= LX_NSIG || lx_sig < 0 || (native_sig =
+ ltos_signo[lx_sig]) < 1) {
+ /*
+ * The signal is not deliverable.
+ */
+ lwp->lwp_cursig = 0;
+ lwp->lwp_extsig = 0;
+ if (lwp->lwp_curinfo) {
+ siginfofree(lwp->lwp_curinfo);
+ lwp->lwp_curinfo = NULL;
+ }
+ } else {
+ /*
+ * Alter the currently dispatching signal.
+ */
+ if (native_sig == SIGKILL) {
+ /*
+ * We mark ourselves the victim and request
+ * a restart of signal processing.
+ */
+ p->p_flag |= SKILLED;
+ p->p_flag &= ~SEXTKILLED;
+ return (-1);
+ }
+ lwp->lwp_cursig = native_sig;
+ lwp->lwp_extsig = 0;
+ if (lwp->lwp_curinfo != NULL) {
+ lwp->lwp_curinfo->sq_info.si_signo = native_sig;
+ }
+ }
+ }
+
+ lwpd->br_ptrace_donesig = lwp->lwp_cursig;
+ lwpd->br_ptrace_stopsig = 0;
+ return (0);
+}
+
+boolean_t
+lx_ptrace_sig_ignorable(proc_t *p, klwp_t *lwp, int sig)
+{
+ lx_proc_data_t *lxpd = ptolxproc(p);
+
+ /*
+ * Ignored signals and ptrace:
+ *
+ * When a process is being ptraced by another, special care is needed
+ * while handling signals. Since the tracer is interested in all
+ * signals sent to the tracee, an effort must be made to initially
+ * bypass signal ignorance logic. This allows the signal to be placed
+ * in the tracee's sigqueue to be inspected and potentially altered by
+ * the tracer.
+ *
+ * A critical detail in this procedure is how a signal is handled after
+ * tracer has completed processing for the event. If the signal would
+ * have been ignored, were it not for the initial ptrace override, then
+ * lx_ptrace_sig_ignorable must report B_TRUE when the tracee is
+ * restarted and resumes signal processing. This is done by recording
+ * the most recent tracee signal consumed by ptrace.
+ */
+
+ if (lxpd->l_ptrace != 0 && lx_stol_signo(sig, 0) != 0) {
+ /*
+ * This process is being ptraced. Bypass signal ignorance for
+ * anything that maps to a valid Linux signal...
+ */
+ if (lwp != NULL && lwptolxlwp(lwp)->br_ptrace_donesig == sig) {
+ /*
+ * ...Unless it is a signal which has already been
+ * processed by the tracer.
+ */
+ return (B_TRUE);
+ }
+ return (B_FALSE);
+ }
+ return (B_TRUE);
+}
+
+static void
+lx_ptrace_exit_tracer(proc_t *p, lx_lwp_data_t *lwpd,
+ lx_ptrace_accord_t *accord)
+{
+ VERIFY(MUTEX_NOT_HELD(&p->p_lock));
+
+ lx_ptrace_accord_enter(accord);
+ /*
+ * Mark this accord for death. This means no new tracees can be
+ * attached to this accord.
+ */
+ VERIFY0(accord->lxpa_flags & LX_ACC_TOMBSTONE);
+ accord->lxpa_flags |= LX_ACC_TOMBSTONE;
+ lx_ptrace_accord_exit(accord);
+
+ /*
+ * Walk the list of tracees, detaching them and setting them runnable
+ * if they are stopped.
+ */
+ for (;;) {
+ klwp_t *rlwp;
+ proc_t *rproc;
+ lx_lwp_data_t *remote;
+ kmutex_t *rmp;
+
+ mutex_enter(&accord->lxpa_tracees_lock);
+ if (list_is_empty(&accord->lxpa_tracees)) {
+ mutex_exit(&accord->lxpa_tracees_lock);
+ break;
+ }
+
+ /*
+ * Fetch the first tracee LWP in the list and lock the process
+ * which contains it.
+ */
+ remote = list_head(&accord->lxpa_tracees);
+ rlwp = remote->br_lwp;
+ rproc = lwptoproc(rlwp);
+ /*
+ * The p_lock mutex persists beyond the life of the process
+ * itself. We save the address, here, to prevent the need to
+ * dereference the proc_t after awaking from sleep.
+ */
+ rmp = &rproc->p_lock;
+ mutex_enter(rmp);
+
+ if (TRACEE_BUSY(remote)) {
+ /*
+ * This LWP is currently detaching itself on exit, or
+ * mid-way through stop(). We must wait for this
+ * action to be completed. While we wait on the CV, we
+ * must drop the accord tracee list lock.
+ */
+ mutex_exit(&accord->lxpa_tracees_lock);
+ cv_wait(&lx_ptrace_busy_cv, rmp);
+
+ /*
+ * While we were waiting, some state may have changed.
+ * Restart the walk to be sure we don't miss anything.
+ */
+ mutex_exit(rmp);
+ continue;
+ }
+
+ /*
+ * We now hold p_lock on the process. Remove the tracee from
+ * the list.
+ */
+ VERIFY(list_link_active(&remote->br_ptrace_linkage));
+ list_remove(&accord->lxpa_tracees, remote);
+
+ /*
+ * Unlink the accord and clear our trace flags.
+ */
+ remote->br_ptrace_attach = LX_PTA_NONE;
+ remote->br_ptrace_tracer = NULL;
+ remote->br_ptrace_flags = 0;
+
+ /*
+ * Let go of the list lock before we restart the LWP. We must
+ * not hold any locks other than the process p_lock when
+ * we call lx_ptrace_restart_lwp() as it will thread_lock
+ * the tracee.
+ */
+ mutex_exit(&accord->lxpa_tracees_lock);
+
+ /*
+ * Decrement traced-lwp count for the remote process.
+ */
+ VERIFY(ptolxproc(rproc)->l_ptrace-- >= 1);
+
+ /*
+ * Ensure that the LWP is not stopped on our account.
+ */
+ lx_ptrace_restart_lwp(rlwp);
+
+ /*
+ * Unlock the former tracee.
+ */
+ mutex_exit(rmp);
+
+ /*
+ * Drop the hold this tracee had on the accord.
+ */
+ lx_ptrace_accord_enter(accord);
+ lx_ptrace_accord_rele(accord);
+ lx_ptrace_accord_exit(accord);
+ }
+
+ mutex_enter(&p->p_lock);
+ lwpd->br_ptrace_accord = NULL;
+ mutex_exit(&p->p_lock);
+
+ /*
+ * Clean up and release our hold on the accord If we completely
+ * detached all tracee LWPs, this will free the accord. Otherwise, it
+ * will be freed when they complete their cleanup.
+ *
+ * We hold "pidlock" while clearing these members for easy exclusion of
+ * waitid(), etc.
+ */
+ mutex_enter(&pidlock);
+ lx_ptrace_accord_enter(accord);
+ accord->lxpa_cvp = NULL;
+ accord->lxpa_tracer = NULL;
+ mutex_exit(&pidlock);
+ lx_ptrace_accord_rele(accord);
+ lx_ptrace_accord_exit(accord);
+}
+
+static void
+lx_ptrace_exit_tracee(proc_t *p, lx_lwp_data_t *lwpd,
+ lx_ptrace_accord_t *accord)
+{
+ VERIFY(MUTEX_NOT_HELD(&p->p_lock));
+
+ /*
+ * We are the tracee LWP. Lock the accord tracee list and then our
+ * containing process.
+ */
+ mutex_enter(&accord->lxpa_tracees_lock);
+ mutex_enter(&p->p_lock);
+
+ /*
+ * Remove our reference to the accord. We will release our hold
+ * later.
+ */
+ VERIFY(lwpd->br_ptrace_tracer == accord);
+ lwpd->br_ptrace_attach = LX_PTA_NONE;
+ lwpd->br_ptrace_tracer = NULL;
+
+ /*
+ * Remove this LWP from the accord tracee list:
+ */
+ VERIFY(list_link_active(&lwpd->br_ptrace_linkage));
+ list_remove(&accord->lxpa_tracees, lwpd);
+
+ /*
+ * Wake up any tracers waiting for us to detach from the accord.
+ */
+ cv_broadcast(&lx_ptrace_busy_cv);
+
+ /*
+ * Decrement traced-lwp count for the process.
+ */
+ VERIFY(ptolxproc(p)->l_ptrace-- >= 1);
+
+ mutex_exit(&p->p_lock);
+ mutex_exit(&accord->lxpa_tracees_lock);
+
+ /*
+ * Grab "pidlock" and wake the tracer if it is blocked in waitid().
+ */
+ mutex_enter(&pidlock);
+ if (accord->lxpa_cvp != NULL) {
+ cv_broadcast(accord->lxpa_cvp);
+ }
+ mutex_exit(&pidlock);
+
+ /*
+ * Release our hold on the accord.
+ */
+ lx_ptrace_accord_enter(accord);
+ lx_ptrace_accord_rele(accord);
+ lx_ptrace_accord_exit(accord);
+}
+
+/*
+ * This routine is called from lx_exitlwp() when an LWP is ready to exit. If
+ * this LWP is being traced, it will be detached from the tracer's accord. The
+ * routine will also detach any LWPs being traced by this LWP.
+ */
+void
+lx_ptrace_exit(proc_t *p, klwp_t *lwp)
+{
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ lx_ptrace_accord_t *accord;
+
+ VERIFY(MUTEX_HELD(&p->p_lock));
+
+ /*
+ * Mark our LWP as exiting from a ptrace perspective. This will
+ * prevent a new accord from being allocated if one does not exist
+ * already, and will make us invisible to PTRACE_ATTACH/PTRACE_TRACEME.
+ */
+ VERIFY0(lwpd->br_ptrace_flags & LX_PTF_EXITING);
+ lwpd->br_ptrace_flags |= LX_PTF_EXITING;
+
+ if ((accord = lwpd->br_ptrace_tracer) != NULL) {
+ /*
+ * We are traced by another LWP and must detach ourselves.
+ */
+ mutex_exit(&p->p_lock);
+ lx_ptrace_exit_tracee(p, lwpd, accord);
+ mutex_enter(&p->p_lock);
+ }
+
+ if ((accord = lwpd->br_ptrace_accord) != NULL) {
+ /*
+ * We have been tracing other LWPs, and must detach from
+ * them and clean up our accord.
+ */
+ mutex_exit(&p->p_lock);
+ lx_ptrace_exit_tracer(p, lwpd, accord);
+ mutex_enter(&p->p_lock);
+ }
+}
+
+/*
+ * Called when a SIGCLD signal is dispatched so that we may enqueue another.
+ * Return 0 if we enqueued a signal, or -1 if not.
+ */
+int
+lx_sigcld_repost(proc_t *pp, sigqueue_t *sqp)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ lx_ptrace_accord_t *accord;
+ lx_lwp_data_t *remote;
+ klwp_t *rlwp;
+ proc_t *rproc;
+ boolean_t found = B_FALSE;
+
+ VERIFY(MUTEX_HELD(&pidlock));
+ VERIFY(MUTEX_NOT_HELD(&pp->p_lock));
+ VERIFY(lwptoproc(lwp) == pp);
+
+ mutex_enter(&pp->p_lock);
+ if ((accord = lwpd->br_ptrace_accord) == NULL) {
+ /*
+ * This LWP is not a tracer LWP, so there will be no
+ * SIGCLD.
+ */
+ mutex_exit(&pp->p_lock);
+ return (-1);
+ }
+ mutex_exit(&pp->p_lock);
+
+ mutex_enter(&accord->lxpa_tracees_lock);
+ for (remote = list_head(&accord->lxpa_tracees); remote != NULL;
+ remote = list_next(&accord->lxpa_tracees, remote)) {
+ rlwp = remote->br_lwp;
+ rproc = lwptoproc(rlwp);
+
+ /*
+ * Check if this LWP is in "ptrace-stop". If in the correct
+ * stop condition, lock the process containing the tracee LWP.
+ */
+ if (lx_ptrace_lock_if_stopped(accord, remote) != 0) {
+ continue;
+ }
+
+ if (remote->br_ptrace_flags & LX_PTF_PARENT_WAIT) {
+ /*
+ * This event depends on waitid() clearing out the
+ * event of another LWP. Skip it for now.
+ */
+ mutex_exit(&rproc->p_lock);
+ continue;
+ }
+
+ if (!(remote->br_ptrace_flags & LX_PTF_CLDPEND)) {
+ /*
+ * No SIGCLD is required for this LWP.
+ */
+ mutex_exit(&rproc->p_lock);
+ continue;
+ }
+
+ if (!(remote->br_ptrace_flags & LX_PTF_WAITPEND) ||
+ remote->br_ptrace_whystop == 0 ||
+ remote->br_ptrace_whatstop == 0) {
+ /*
+ * No (new) stop reason to post for this LWP.
+ */
+ mutex_exit(&rproc->p_lock);
+ continue;
+ }
+
+ /*
+ * We found a process of interest. Leave the process
+ * containing the tracee LWP locked and break out of the loop.
+ */
+ found = B_TRUE;
+ break;
+ }
+ mutex_exit(&accord->lxpa_tracees_lock);
+
+ if (!found) {
+ return (-1);
+ }
+
+ /*
+ * Generate siginfo for this tracee LWP.
+ */
+ lx_ptrace_winfo(remote, &sqp->sq_info, B_FALSE, NULL, NULL);
+ remote->br_ptrace_flags &= ~LX_PTF_CLDPEND;
+ mutex_exit(&rproc->p_lock);
+
+ mutex_enter(&pp->p_lock);
+ if (sigismember(&pp->p_sig, SIGCLD)) {
+ mutex_exit(&pp->p_lock);
+
+ mutex_enter(&rproc->p_lock);
+ remote->br_ptrace_flags |= LX_PTF_CLDPEND;
+ mutex_exit(&rproc->p_lock);
+
+ return (-1);
+ }
+ sigaddqa(pp, curthread, sqp);
+ mutex_exit(&pp->p_lock);
+
+ return (0);
+}
+
+/*
+ * Consume the next available ptrace(2) event queued against the accord for
+ * this LWP. The event will be emitted as if through waitid(), and converted
+ * by lx_waitpid() and friends before the return to usermode.
+ */
+int
+lx_waitid_helper(idtype_t idtype, id_t id, k_siginfo_t *ip, int options,
+ boolean_t *brand_wants_wait, int *rval)
+{
+ lx_ptrace_accord_t *accord;
+ klwp_t *lwp = ttolwp(curthread);
+ proc_t *p = lwptoproc(lwp);
+ lx_lwp_data_t *local = lwptolxlwp(lwp);
+ lx_lwp_data_t *remote;
+ boolean_t found = B_FALSE;
+ klwp_t *rlwp = NULL;
+ proc_t *rproc = NULL;
+ pid_t event_pid = 0, event_ppid = 0;
+ boolean_t waitflag = !(options & WNOWAIT);
+ boolean_t target_found = B_FALSE;
+
+ VERIFY(MUTEX_HELD(&pidlock));
+ VERIFY(MUTEX_NOT_HELD(&p->p_lock));
+
+ /*
+ * By default, we do not expect waitid() to block on our account.
+ */
+ *brand_wants_wait = B_FALSE;
+
+ if (!local->br_waitid_emulate) {
+ /*
+ * This waitid() call is not expecting emulated results.
+ */
+ return (-1);
+ }
+
+ switch (idtype) {
+ case P_ALL:
+ case P_PID:
+ case P_PGID:
+ break;
+ default:
+ /*
+ * This idtype has no power here.
+ */
+ return (-1);
+ }
+
+ if (lx_ptrace_accord_get(&accord, B_FALSE) != 0) {
+ /*
+ * This LWP does not have an accord; it cannot be tracing.
+ */
+ return (-1);
+ }
+
+ /*
+ * We do not need an additional hold on the accord as it belongs to
+ * the running, tracer, LWP.
+ */
+ lx_ptrace_accord_exit(accord);
+
+ mutex_enter(&accord->lxpa_tracees_lock);
+ if (list_is_empty(&accord->lxpa_tracees)) {
+ /*
+ * Though it has an accord, there are currently no tracees in
+ * the list for this LWP.
+ */
+ mutex_exit(&accord->lxpa_tracees_lock);
+ return (-1);
+ }
+
+ /*
+ * Walk the list of tracees and determine if any of them have events to
+ * report.
+ */
+ for (remote = list_head(&accord->lxpa_tracees); remote != NULL;
+ remote = list_next(&accord->lxpa_tracees, remote)) {
+ rlwp = remote->br_lwp;
+ rproc = lwptoproc(rlwp);
+
+ /*
+ * We check to see if this LWP matches an id we are waiting for.
+ */
+ switch (idtype) {
+ case P_ALL:
+ break;
+ case P_PID:
+ if (remote->br_pid != id)
+ continue;
+ break;
+ case P_PGID:
+ if (rproc->p_pgrp != id)
+ continue;
+ break;
+ default:
+ cmn_err(CE_PANIC, "unexpected idtype: %d", idtype);
+ }
+
+ /* This tracee matches provided idtype and id */
+ target_found = B_TRUE;
+
+ /*
+ * Check if this LWP is in "ptrace-stop". If in the correct
+ * stop condition, lock the process containing the tracee LWP.
+ */
+ if (lx_ptrace_lock_if_stopped(accord, remote) != 0) {
+ continue;
+ }
+
+ if (remote->br_ptrace_flags & LX_PTF_PARENT_WAIT) {
+ /*
+ * This event depends on waitid() clearing out the
+ * event of another LWP. Skip it for now.
+ */
+ mutex_exit(&rproc->p_lock);
+ continue;
+ }
+
+ if (!(remote->br_ptrace_flags & LX_PTF_WAITPEND) ||
+ remote->br_ptrace_whystop == 0 ||
+ remote->br_ptrace_whatstop == 0) {
+ /*
+ * No (new) stop reason to post for this LWP.
+ */
+ mutex_exit(&rproc->p_lock);
+ continue;
+ }
+
+ /*
+ * We found a process of interest. Leave the process
+ * containing the tracee LWP locked and break out of the loop.
+ */
+ found = B_TRUE;
+ break;
+ }
+ mutex_exit(&accord->lxpa_tracees_lock);
+
+ if (!found) {
+ /*
+ * There were no events of interest, but we have tracees.
+ * If any of the tracees matched the spcified criteria, signal
+ * to waitid() that it should block if the provided flags allow
+ * for it.
+ */
+ if (target_found) {
+ *brand_wants_wait = B_TRUE;
+ }
+
+ return (-1);
+ }
+
+ /*
+ * Populate the signal information.
+ */
+ lx_ptrace_winfo(remote, ip, waitflag, &event_ppid, &event_pid);
+
+ /*
+ * Unlock the tracee.
+ */
+ mutex_exit(&rproc->p_lock);
+
+ if (event_pid != 0 && event_ppid != 0) {
+ /*
+ * We need to do another pass around the tracee list and
+ * unblock any events that have a "happens after" relationship
+ * with this event.
+ */
+ mutex_enter(&accord->lxpa_tracees_lock);
+ for (remote = list_head(&accord->lxpa_tracees); remote != NULL;
+ remote = list_next(&accord->lxpa_tracees, remote)) {
+ rlwp = remote->br_lwp;
+ rproc = lwptoproc(rlwp);
+
+ mutex_enter(&rproc->p_lock);
+
+ if (remote->br_pid != event_pid ||
+ remote->br_ppid != event_ppid) {
+ mutex_exit(&rproc->p_lock);
+ continue;
+ }
+
+ remote->br_ptrace_flags &= ~LX_PTF_PARENT_WAIT;
+
+ mutex_exit(&rproc->p_lock);
+ }
+ mutex_exit(&accord->lxpa_tracees_lock);
+ }
+
+ /*
+ * If we are consuming this wait state, we remove the SIGCLD from
+ * the queue and post another.
+ */
+ if (waitflag) {
+ mutex_exit(&pidlock);
+ sigcld_delete(ip);
+ sigcld_repost();
+ mutex_enter(&pidlock);
+ }
+
+ *rval = 0;
+ return (0);
+}
+
+static int
+lx_ptrace_peek(lx_lwp_data_t *lwpd, uintptr_t addr, void *data)
+{
+ proc_t *p = lwptoproc(lwpd->br_lwp);
+ long buf;
+ int error = 0, size = sizeof (buf);
+
+#if defined(_LP64)
+ if (get_udatamodel() != DATAMODEL_NATIVE) {
+ size = sizeof (uint32_t);
+ }
+#endif
+ if ((addr & (size - 1)) != 0) {
+ /* unaligned access */
+ return (EINVAL);
+ }
+
+ mutex_exit(&p->p_lock);
+ error = uread(p, &buf, size, addr);
+ mutex_enter(&p->p_lock);
+
+ if (error != 0) {
+ return (EIO);
+ }
+ if (copyout(&buf, data, size) != 0) {
+ return (EFAULT);
+ }
+
+ return (0);
+}
+
+static int
+lx_ptrace_poke(lx_lwp_data_t *lwpd, uintptr_t addr, uintptr_t data)
+{
+ proc_t *p = lwptoproc(lwpd->br_lwp);
+ int error = 0, size = sizeof (data);
+
+#if defined(_LP64)
+ if (get_udatamodel() != DATAMODEL_NATIVE) {
+ size = sizeof (uint32_t);
+ }
+#endif
+ if ((addr & (size - 1)) != 0) {
+ /* unaligned access */
+ return (EINVAL);
+ }
+
+ mutex_exit(&p->p_lock);
+ error = uwrite(p, &data, size, addr);
+ mutex_enter(&p->p_lock);
+
+ if (error != 0) {
+ return (EIO);
+ }
+ return (0);
+}
+
+static int
+lx_ptrace_kill(lx_lwp_data_t *lwpd)
+{
+ sigtoproc(lwptoproc(lwpd->br_lwp), NULL, SIGKILL);
+
+ return (0);
+}
+
+static int
+lx_ptrace_kernel(int ptrace_op, pid_t lxpid, uintptr_t addr, uintptr_t data)
+{
+ lx_lwp_data_t *local = ttolxlwp(curthread);
+ lx_ptrace_accord_t *accord;
+ lx_lwp_data_t *remote;
+ klwp_t *rlwp;
+ proc_t *rproc;
+ int error;
+ boolean_t found = B_FALSE;
+
+ /*
+ * PTRACE_TRACEME and PTRACE_ATTACH operations induce the tracing of
+ * one LWP by another. The target LWP must not be traced already.
+ */
+ switch (ptrace_op) {
+ case LX_PTRACE_TRACEME:
+ return (lx_ptrace_traceme());
+
+ case LX_PTRACE_ATTACH:
+ return (lx_ptrace_attach(lxpid));
+ }
+
+ /*
+ * Ensure that we have an accord and obtain a lock on it. This routine
+ * should not fail because the LWP cannot make ptrace(2) system calls
+ * after it has begun exiting.
+ */
+ VERIFY0(local->br_ptrace_flags & LX_PTF_EXITING);
+ VERIFY(lx_ptrace_accord_get(&accord, B_TRUE) == 0);
+
+ /*
+ * The accord belongs to this (the tracer) LWP, and we have a hold on
+ * it. We drop the lock so that we can take other locks.
+ */
+ lx_ptrace_accord_exit(accord);
+
+ /*
+ * Does the tracee list contain the pid in question?
+ */
+ mutex_enter(&accord->lxpa_tracees_lock);
+ for (remote = list_head(&accord->lxpa_tracees); remote != NULL;
+ remote = list_next(&accord->lxpa_tracees, remote)) {
+ if (remote->br_pid == lxpid) {
+ found = B_TRUE;
+ break;
+ }
+ }
+ if (!found) {
+ /*
+ * The requested pid does not appear in the tracee list.
+ */
+ mutex_exit(&accord->lxpa_tracees_lock);
+ return (ESRCH);
+ }
+
+ /*
+ * Attempt to lock the target LWP.
+ */
+ if ((error = lx_ptrace_lock_if_stopped(accord, remote)) != 0) {
+ /*
+ * The LWP was not in "ptrace-stop".
+ */
+ mutex_exit(&accord->lxpa_tracees_lock);
+ return (error);
+ }
+
+ /*
+ * The target LWP is in "ptrace-stop". We have the containing process
+ * locked.
+ */
+ rlwp = remote->br_lwp;
+ rproc = lwptoproc(rlwp);
+
+
+ if (ptrace_op == LX_PTRACE_DETACH) {
+ boolean_t release_hold = B_FALSE;
+ error = lx_ptrace_detach(accord, remote, (int)data,
+ &release_hold);
+ /*
+ * Drop the lock on both the tracee process and the tracee list.
+ */
+ mutex_exit(&rproc->p_lock);
+ mutex_exit(&accord->lxpa_tracees_lock);
+
+ if (release_hold) {
+ /*
+ * Release a hold from the accord.
+ */
+ lx_ptrace_accord_enter(accord);
+ lx_ptrace_accord_rele(accord);
+ lx_ptrace_accord_exit(accord);
+ }
+
+ return (error);
+ }
+
+ /*
+ * The tracees lock is not needed for any of the other operations.
+ * Drop it so further actions can avoid deadlock.
+ */
+ mutex_exit(&accord->lxpa_tracees_lock);
+
+ /*
+ * Process the ptrace(2) request:
+ */
+ switch (ptrace_op) {
+ case LX_PTRACE_CONT:
+ error = lx_ptrace_cont(remote, LX_PTC_NONE, (int)data);
+ break;
+
+ case LX_PTRACE_SYSCALL:
+ error = lx_ptrace_cont(remote, LX_PTC_SYSCALL, (int)data);
+ break;
+
+ case LX_PTRACE_SINGLESTEP:
+ error = lx_ptrace_cont(remote, LX_PTC_SINGLESTEP, (int)data);
+ break;
+
+ case LX_PTRACE_SETOPTIONS:
+ error = lx_ptrace_setoptions(remote, data);
+ break;
+
+ case LX_PTRACE_GETEVENTMSG:
+ error = lx_ptrace_geteventmsg(remote, (void *)data);
+ break;
+
+ case LX_PTRACE_GETREGS:
+ error = lx_user_regs_copyout(remote, (void *)data);
+ break;
+
+ case LX_PTRACE_SETREGS:
+ error = lx_user_regs_copyin(remote, (void *)data);
+ break;
+
+ case LX_PTRACE_GETSIGINFO:
+ error = lx_ptrace_getsiginfo(remote, (void *)data);
+ break;
+
+ case LX_PTRACE_PEEKTEXT:
+ case LX_PTRACE_PEEKDATA:
+ error = lx_ptrace_peek(remote, addr, (void *)data);
+ break;
+
+ case LX_PTRACE_POKETEXT:
+ case LX_PTRACE_POKEDATA:
+ error = lx_ptrace_poke(remote, addr, data);
+ break;
+
+ case LX_PTRACE_PEEKUSER:
+ error = lx_ptrace_peekuser(remote, addr, (void *)data);
+ break;
+
+ case LX_PTRACE_POKEUSER:
+ error = lx_ptrace_pokeuser(remote, addr, (void *)data);
+ break;
+
+ case LX_PTRACE_GETFPREGS:
+ error = lx_user_fpregs_copyout(remote, (void *)data);
+ break;
+
+ case LX_PTRACE_SETFPREGS:
+ error = lx_user_fpregs_copyin(remote, (void *)data);
+ break;
+
+ case LX_PTRACE_GETFPXREGS:
+ error = lx_user_fpxregs_copyout(remote, (void *)data);
+ break;
+
+ case LX_PTRACE_SETFPXREGS:
+ error = lx_user_fpxregs_copyin(remote, (void *)data);
+ break;
+
+ case LX_PTRACE_KILL:
+ error = lx_ptrace_kill(remote);
+ break;
+
+ default:
+ error = EINVAL;
+ }
+
+ /*
+ * Drop the lock on both the tracee process and the tracee list.
+ */
+ mutex_exit(&rproc->p_lock);
+
+ return (error);
+}
+
+int
+lx_ptrace(int ptrace_op, pid_t lxpid, uintptr_t addr, uintptr_t data)
+{
+ int error;
+
+ error = lx_ptrace_kernel(ptrace_op, lxpid, addr, data);
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (0);
+}
+
+void
+lx_ptrace_init(void)
+{
+ cv_init(&lx_ptrace_busy_cv, NULL, CV_DEFAULT, NULL);
+
+ lx_ptrace_accord_cache = kmem_cache_create("lx_ptrace_accord",
+ sizeof (lx_ptrace_accord_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+}
+
+void
+lx_ptrace_fini(void)
+{
+ cv_destroy(&lx_ptrace_busy_cv);
+
+ kmem_cache_destroy(lx_ptrace_accord_cache);
+}
diff --git a/usr/src/uts/common/brand/lx/os/lx_signal.c b/usr/src/uts/common/brand/lx/os/lx_signal.c
new file mode 100644
index 0000000000..53e0cecc14
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/os/lx_signal.c
@@ -0,0 +1,50 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/signal.h>
+#include <sys/sunddi.h>
+#include <lx_signum.h>
+
+void
+lx_ltos_sigset(lx_sigset_t *lsigp, k_sigset_t *ssigp)
+{
+ int lx_sig, sig;
+
+ sigemptyset(ssigp);
+ for (lx_sig = 1; lx_sig <= LX_NSIG; lx_sig++) {
+ if (lx_sigismember(lsigp, lx_sig) &&
+ ((sig = ltos_signo[lx_sig]) > 0))
+ sigaddset(ssigp, sig);
+ }
+
+ /* Emulate sigutok() restrictions */
+ ssigp->__sigbits[0] &= (FILLSET0 & ~CANTMASK0);
+ ssigp->__sigbits[1] &= (FILLSET1 & ~CANTMASK1);
+ ssigp->__sigbits[2] &= (FILLSET2 & ~CANTMASK2);
+}
+
+void
+lx_stol_sigset(k_sigset_t *ssigp, lx_sigset_t *lsigp)
+{
+ int sig, lx_sig;
+
+ bzero(lsigp, sizeof (lx_sigset_t));
+ for (sig = 1; sig < NSIG; sig++) {
+ if (sigismember(ssigp, sig) &&
+ ((lx_sig = stol_signo[sig]) > 0))
+ lx_sigaddset(lsigp, lx_sig);
+ }
+}
diff --git a/usr/src/uts/common/brand/lx/os/lx_syscall.c b/usr/src/uts/common/brand/lx/os/lx_syscall.c
new file mode 100644
index 0000000000..f48b043aa3
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/os/lx_syscall.c
@@ -0,0 +1,1316 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <sys/kmem.h>
+#include <sys/errno.h>
+#include <sys/thread.h>
+#include <sys/systm.h>
+#include <sys/syscall.h>
+#include <sys/proc.h>
+#include <sys/modctl.h>
+#include <sys/cmn_err.h>
+#include <sys/model.h>
+#include <sys/privregs.h>
+#include <sys/brand.h>
+#include <sys/machbrand.h>
+#include <sys/sdt.h>
+#include <sys/lx_syscalls.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_impl.h>
+#include <sys/lx_misc.h>
+#include <lx_errno.h>
+
+
+/*
+ * Flags for sysent entries:
+ */
+#define LX_SYS_NOSYS_REASON 0x07
+#define LX_SYS_EBPARG6 0x08
+
+/*
+ * Flags that denote the specific reason we do not have a particular system
+ * call. These reasons are only valid if the function is NULL.
+ */
+#define NOSYS_USERMODE 0
+#define NOSYS_NULL 1
+#define NOSYS_NONE 2
+#define NOSYS_NO_EQUIV 3
+#define NOSYS_KERNEL 4
+#define NOSYS_UNDOC 5
+#define NOSYS_OBSOLETE 6
+#define NOSYS_MAX NOSYS_OBSOLETE
+
+#if NOSYS_MAX > LX_SYS_NOSYS_REASON
+#error NOSYS reason codes must fit in LX_SYS_NOSYS_REASON
+#endif
+
+/*
+ * Strings describing the reason we do not emulate a particular system call
+ * in the kernel.
+ */
+static char *nosys_reasons[] = {
+ NULL, /* NOSYS_USERMODE means this call is emulated in usermode */
+ "Not done yet",
+ "No such Linux system call",
+ "No equivalent illumos functionality",
+ "Reads/modifies Linux kernel state",
+ "Undocumented and/or rarely used system call",
+ "Unsupported, obsolete system call"
+};
+
+
+#if defined(_LP64)
+/*
+ * System call handler table and entry count for Linux x86_64 (amd64):
+ */
+lx_sysent_t lx_sysent64[LX_NSYSCALLS + 1];
+int lx_nsysent64;
+#endif
+/*
+ * System call handler table and entry count for Linux x86 (i386):
+ */
+lx_sysent_t lx_sysent32[LX_NSYSCALLS + 1];
+int lx_nsysent32;
+
+#if defined(_LP64)
+struct lx_vsyscall
+{
+ uintptr_t lv_addr;
+ uintptr_t lv_scnum;
+} lx_vsyscalls[] = {
+ { LX_VSYS_gettimeofday, LX_SYS_gettimeofday },
+ { LX_VSYS_time, LX_SYS_time },
+ { LX_VSYS_getcpu, LX_SYS_getcpu },
+ { NULL, NULL }
+};
+#endif
+
+#if defined(__amd64)
+static int
+lx_emulate_args(klwp_t *lwp, const lx_sysent_t *s, uintptr_t *args)
+{
+ struct regs *rp = lwptoregs(lwp);
+
+ if (get_udatamodel() == DATAMODEL_NATIVE) {
+ /*
+ * Note: Syscall argument passing is different from function
+ * call argument passing on amd64. For function calls, the
+ * fourth arg is passed via %rcx, but for system calls the 4th
+ * arg is passed via %r10. This is because in amd64, the
+ * syscall instruction puts the lower 32 bits of %rflags in
+ * %r11 and puts the %rip value to %rcx.
+ *
+ * Appendix A of the amd64 ABI (Linux conventions) states that
+ * syscalls are limited to 6 args and no arg is passed on the
+ * stack.
+ */
+ args[0] = rp->r_rdi;
+ args[1] = rp->r_rsi;
+ args[2] = rp->r_rdx;
+ args[3] = rp->r_r10;
+ args[4] = rp->r_r8;
+ args[5] = rp->r_r9;
+ } else {
+ /*
+ * If the system call takes 6 args, then libc has stashed them
+ * in memory at the address contained in %ebx. Except for some
+ * syscalls which store the 6th argument in %ebp.
+ */
+ if (s->sy_narg == 6 && !(s->sy_flags & LX_SYS_EBPARG6)) {
+ uint32_t args32[6];
+
+ if (copyin((void *)rp->r_rbx, &args32,
+ sizeof (args32)) != 0) {
+ /*
+ * Clear the argument vector so that the
+ * trace probe does not expose kernel
+ * memory.
+ */
+ bzero(args, 6 * sizeof (uintptr_t));
+ return (set_errno(EFAULT));
+ }
+
+ args[0] = args32[0];
+ args[1] = args32[1];
+ args[2] = args32[2];
+ args[3] = args32[3];
+ args[4] = args32[4];
+ args[5] = args32[5];
+ } else {
+ args[0] = rp->r_rbx;
+ args[1] = rp->r_rcx;
+ args[2] = rp->r_rdx;
+ args[3] = rp->r_rsi;
+ args[4] = rp->r_rdi;
+ args[5] = rp->r_rbp;
+ }
+ }
+
+ return (0);
+}
+
+#else /* !__amd64 */
+
+static int
+lx_emulate_args(klwp_t *lwp, const lx_sysent_t *s, uintptr_t *args)
+{
+ struct regs *rp = lwptoregs(lwp);
+
+ /*
+ * If the system call takes 6 args, then libc has stashed them
+ * in memory at the address contained in %ebx. Except for some
+ * syscalls which store the 6th argument in %ebp.
+ */
+ if (s->sy_narg == 6 && !(s->sy_flags & LX_SYS_EBPARG6)) {
+ if (copyin((void *)rp->r_ebx, args, 6 * sizeof (uintptr_t)) !=
+ 0) {
+ /*
+ * Clear the argument vector so that the trace probe
+ * does not expose kernel memory.
+ */
+ bzero(args, 6 * sizeof (uintptr_t));
+ return (set_errno(EFAULT));
+ }
+ } else {
+ args[0] = rp->r_ebx;
+ args[1] = rp->r_ecx;
+ args[2] = rp->r_edx;
+ args[3] = rp->r_esi;
+ args[4] = rp->r_edi;
+ args[5] = rp->r_ebp;
+ }
+
+ return (0);
+}
+#endif
+
+int
+lx_syscall_return(klwp_t *lwp, int syscall_num, long ret)
+{
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ struct regs *rp = lwptoregs(lwp);
+ int error = lwp->lwp_errno;
+
+ if (error != EINTR) {
+ /*
+ * If this system call was not interrupted, clear the system
+ * call restart flag before lx_setcontext() can pass it to
+ * usermode.
+ */
+ lwpd->br_syscall_restart = B_FALSE;
+ }
+
+ if (error != 0) {
+ /*
+ * Convert from illumos to Linux errno:
+ */
+ ret = -lx_errno(error, EINVAL);
+ }
+
+ /*
+ * 32-bit Linux system calls return via %eax; 64-bit calls return via
+ * %rax.
+ */
+ rp->r_r0 = ret;
+
+ /*
+ * Hold for the ptrace(2) "syscall-exit-stop" condition if required by
+ * PTRACE_SYSCALL. Note that the register state may be modified by
+ * tracer.
+ */
+ lx_ptrace_stop(LX_PR_SYSEXIT);
+
+ /*
+ * Fire the DTrace "lx-syscall:::return" probe:
+ */
+ lx_trace_sysreturn(syscall_num, ret);
+
+ /*
+ * Clear errno for next time. We do not clear "br_syscall_restart" or
+ * "br_syscall_num" as they are potentially used by "lx_savecontext()"
+ * in the signal delivery path.
+ */
+ lwp->lwp_errno = 0;
+
+ lx_check_strict_failure(lwpd);
+
+ /*
+ * We want complete control of the registers on return from this
+ * emulated Linux system call:
+ */
+ lwp->lwp_eosys = JUSTRETURN;
+ curthread->t_post_sys = 1;
+ aston(curthread);
+
+ return (0);
+}
+
+static void
+lx_syscall_unsup_msg(lx_sysent_t *s, int syscall_num, int unsup_reason)
+{
+ char buf[100];
+
+ if (s == NULL) {
+ (void) snprintf(buf, sizeof (buf), "NOSYS (%d): out of bounds",
+ syscall_num);
+ } else {
+ VERIFY(unsup_reason < (sizeof (nosys_reasons) /
+ sizeof (*nosys_reasons)));
+
+ if (s->sy_name == NULL) {
+ (void) snprintf(buf, sizeof (buf), "NOSYS (%d): %s",
+ syscall_num, nosys_reasons[unsup_reason]);
+ } else {
+ (void) snprintf(buf, sizeof (buf), "NOSYS (%s): %s",
+ s->sy_name, nosys_reasons[unsup_reason]);
+ }
+ }
+
+ lx_unsupported(buf);
+}
+
+/*
+ * This function is used to override the processing of arguments and
+ * invocation of a handler for emulated system calls, installed on each
+ * branded LWP as "lwp_brand_syscall". If this system call should use the
+ * native path, we return 1. If we handled this system call (and have made
+ * arrangements with respect to post-return usermode register state) we
+ * return 0.
+ */
+int
+lx_syscall_enter(void)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ struct regs *rp = lwptoregs(lwp);
+ int syscall_num;
+ int error;
+ long ret = 0;
+ lx_sysent_t *s;
+ uintptr_t args[6];
+ unsigned int unsup_reason;
+
+ /*
+ * If we got here, we should have an LWP-specific brand data
+ * structure.
+ */
+ VERIFY(lwpd != NULL);
+
+ if (lwpd->br_stack_mode != LX_STACK_MODE_BRAND) {
+ /*
+ * The lwp is not in in BRAND execution mode, so we return
+ * to the regular native system call path.
+ */
+ DTRACE_PROBE(brand__lx__syscall__hook__skip);
+ return (1);
+ }
+
+ /*
+ * Clear the restartable system call flag. This flag will be set
+ * on in the system call handler if the call is a candidate for
+ * a restart. It will be saved by lx_setcontext() in the event
+ * that we take a signal, and used in the signal handling path
+ * to restart the system call iff SA_RESTART was set for this
+ * signal. Save the system call number so that we can store it
+ * in the saved context if required.
+ */
+ lwpd->br_syscall_restart = B_FALSE;
+ lwpd->br_syscall_num = (int)rp->r_r0;
+
+ /*
+ * Hold for the ptrace(2) "syscall-entry-stop" condition if traced by
+ * PTRACE_SYSCALL. The system call number and arguments may be
+ * modified by the tracer.
+ */
+ lx_ptrace_stop(LX_PR_SYSENTRY);
+
+ /*
+ * Check that the system call number is within the bounds we expect.
+ */
+ syscall_num = lwpd->br_syscall_num;
+ if (syscall_num < 0 || syscall_num > LX_MAX_SYSCALL(lwp)) {
+ lx_syscall_unsup_msg(NULL, syscall_num, 0);
+
+ set_errno(ENOTSUP);
+ lx_syscall_return(lwp, syscall_num, -1);
+ return (0);
+ }
+
+#if defined(_LP64)
+ if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) {
+ s = &lx_sysent64[syscall_num];
+ } else
+#endif
+ {
+ s = &lx_sysent32[syscall_num];
+ }
+
+ /*
+ * Process the arguments for this system call and fire the DTrace
+ * "lx-syscall:::entry" probe:
+ */
+ error = lx_emulate_args(lwp, s, args);
+ lx_trace_sysenter(syscall_num, args);
+ if (error != 0) {
+ /*
+ * Could not read and process the arguments. Return the error
+ * to the process.
+ */
+ set_errno(error);
+ lx_syscall_return(lwp, syscall_num, -1);
+ return (0);
+ }
+
+ if (s->sy_callc != NULL) {
+ /*
+ * Call the in-kernel handler for this Linux system call:
+ */
+ lwpd->br_eosys = NORMALRETURN;
+ ret = s->sy_callc(args[0], args[1], args[2], args[3], args[4],
+ args[5]);
+ if (lwpd->br_eosys == NORMALRETURN) {
+ lx_syscall_return(lwp, syscall_num, ret);
+ }
+ return (0);
+ }
+
+ /*
+ * There is no in-kernel handler.
+ */
+ switch (unsup_reason = (s->sy_flags & LX_SYS_NOSYS_REASON)) {
+ case NOSYS_USERMODE:
+ /*
+ * Pass to the usermode emulation routine.
+ */
+#if defined(_LP64)
+ if (get_udatamodel() != DATAMODEL_NATIVE) {
+ lx_emulate_user32(lwp, syscall_num, args);
+ } else
+#endif
+ {
+ lx_emulate_user(lwp, syscall_num, args);
+ }
+ return (0);
+
+ default:
+ /*
+ * We are not emulating this system call at all.
+ */
+ lx_syscall_unsup_msg(s, syscall_num, unsup_reason);
+
+ set_errno(ENOTSUP);
+ lx_syscall_return(lwp, syscall_num, -1);
+ return (0);
+ }
+}
+
+#if defined(_LP64)
+/*
+ * Emulate vsyscall support.
+ *
+ * Linux magically maps a single page into the address space of each process,
+ * allowing them to make 'vsyscalls'. Originally designed to counteract the
+ * perceived overhead of regular system calls, vsyscalls were implemented as
+ * code residing in userspace which could be called directly. The userspace
+ * implementations of these vsyscalls which have now been replaced by
+ * instructions which vector into the normal syscall path.
+ *
+ * Implementing vsyscalls on Illumos is complicated by the fact that the
+ * required static address region resides inside the kernel address space.
+ * Rather than mapping a user-accessible page into the KAS, a different
+ * approach is taken. The vsyscall gate is emulated by interposing on
+ * pagefaults in trap(). An attempt to execute a known vsyscall address will
+ * result in emulating the appropriate system call rather than inducing a
+ * SIGSEGV.
+ */
+void
+lx_vsyscall_enter(proc_t *p, klwp_t *lwp, int scnum)
+{
+ struct regs *rp = lwptoregs(lwp);
+ uintptr_t raddr;
+
+ /*
+ * Fetch the return address from the process stack.
+ */
+ VERIFY(MUTEX_NOT_HELD(&p->p_lock));
+ if (copyin((void *)rp->r_rsp, &raddr, sizeof (raddr)) != 0) {
+#if DEBUG
+ printf("lx_vsyscall_call: bad brand stack at vsyscall "
+ "cmd=%s, pid=%d, sp=0x%p\n", PTOU(p)->u_comm,
+ p->p_pid, (void *)rp->r_rsp);
+#endif
+
+ /*
+ * The process jumped to the vsyscall address without a
+ * correctly configured stack. Terminate the process.
+ */
+ exit(CLD_KILLED, SIGSEGV);
+ return;
+ }
+
+ DTRACE_PROBE1(brand__lx__vsyscall, int, scnum);
+
+ /* Simulate vectoring into the syscall */
+ rp->r_rax = scnum;
+ rp->r_rip = raddr;
+ rp->r_rsp += sizeof (uintptr_t);
+
+ lx_syscall_enter();
+}
+
+boolean_t
+lx_vsyscall_iscall(klwp_t *lwp, uintptr_t addr, int *scnum)
+{
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ int i;
+
+ if (lwpd->br_stack_mode != LX_STACK_MODE_BRAND) {
+ /*
+ * We only handle vsyscalls when running Linux code.
+ */
+ return (B_FALSE);
+ }
+
+ if (addr < LX_VSYSCALL_ADDR ||
+ addr >= (LX_VSYSCALL_ADDR + LX_VSYSCALL_SIZE)) {
+ /*
+ * Ignore faults outside the vsyscall page.
+ */
+ return (B_FALSE);
+ }
+
+ for (i = 0; lx_vsyscalls[i].lv_addr != NULL; i++) {
+ if (addr == lx_vsyscalls[i].lv_addr) {
+ /*
+ * This is a valid vsyscall address.
+ */
+ *scnum = lx_vsyscalls[i].lv_scnum;
+ return (B_TRUE);
+ }
+ }
+
+ lx_unsupported("bad vsyscall access");
+ return (B_FALSE);
+}
+#endif
+
+/*
+ * This function is used to provide a fasttrap-like interface for emulated
+ * syscalls. By skipping housekeeping such as mstate transitions, it should
+ * cut down on overhead for syscalls which would normally be fasttraps in a
+ * native process.
+ */
+int
+lx_syscall_fast_enter(void)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ struct regs *rp = lwptoregs(lwp);
+ int syscall_num, error;
+ lx_sysent_t *s;
+ uintptr_t args[6];
+ long ret = 0;
+
+ /*
+ * If we got here, we should have an LWP-specific brand data structure.
+ */
+ VERIFY(lwpd != NULL);
+
+ if (lwpd->br_stack_mode != LX_STACK_MODE_BRAND) {
+ /*
+ * The lwp is not in in BRAND execution mode, so we return to
+ * the regular native system call path.
+ */
+ DTRACE_PROBE(brand__lx__syscall__hook__skip);
+ return (1);
+ }
+ if (lwpd->br_ptrace_tracer != NULL) {
+ /*
+ * Given that ptrace is the antithesis of "fast", return to the
+ * regular system call path if we are being traced.
+ */
+ return (1);
+ }
+
+ syscall_num = (int)rp->r_r0;
+#if defined(_LP64)
+ if (get_udatamodel() != DATAMODEL_NATIVE) {
+ switch (syscall_num) {
+ case LX_SYS32_gettimeofday:
+ case LX_SYS32_time:
+ case LX_SYS32_clock_gettime:
+ case LX_SYS32_getcpu:
+ s = &lx_sysent32[syscall_num];
+ break;
+ default:
+ return (1);
+ }
+ } else
+#endif
+ {
+ switch (syscall_num) {
+ case LX_SYS_gettimeofday:
+ case LX_SYS_time:
+ case LX_SYS_clock_gettime:
+ case LX_SYS_getcpu:
+#if defined(_LP64)
+ s = &lx_sysent64[syscall_num];
+#else
+ s = &lx_sysent32[syscall_num];
+#endif
+ break;
+ default:
+ return (1);
+ }
+ }
+
+ /*
+ * The above syscall restrictions should ensure that we do not arrive
+ * at this point without a suitable syscall planned. Since the
+ * lx_emulate_args routine can only fail for 6-arg syscalls, none of
+ * which would be performed as a fasttrap, it is assumed to succeed.
+ */
+ VERIFY(s->sy_callc != NULL);
+ VERIFY(s->sy_narg < 6);
+ (void) lx_emulate_args(lwp, s, args);
+ lx_trace_sysenter(syscall_num, args);
+ ret = s->sy_callc(args[0], args[1], args[2], args[3], args[4],
+ args[5]);
+
+ if ((error = lwp->lwp_errno) != 0) {
+ ret = -lx_errno(error, EINVAL);
+ lwp->lwp_errno = 0;
+ }
+ rp->r_r0 = ret;
+ lx_trace_sysreturn(syscall_num, ret);
+ lwp->lwp_eosys = JUSTRETURN;
+ return (0);
+}
+
+/*
+ * Linux defines system call numbers for 32-bit x86 in the file:
+ * arch/x86/syscalls/syscall_32.tbl
+ */
+lx_sysent_t lx_sysent32[] = {
+ {"nosys", NULL, NOSYS_NONE, 0}, /* 0 */
+ {"exit", NULL, 0, 1}, /* 1 */
+ {"fork", NULL, 0, 0}, /* 2 */
+ {"read", lx_read, 0, 3}, /* 3 */
+ {"write", lx_write, 0, 3}, /* 4 */
+ {"open", lx_open, 0, 3}, /* 5 */
+ {"close", lx_close, 0, 1}, /* 6 */
+ {"waitpid", lx_waitpid, 0, 3}, /* 7 */
+ {"creat", NULL, 0, 2}, /* 8 */
+ {"link", lx_link, 0, 2}, /* 9 */
+ {"unlink", NULL, 0, 1}, /* 10 */
+ {"execve", NULL, 0, 3}, /* 11 */
+ {"chdir", NULL, 0, 1}, /* 12 */
+ {"time", lx_time, 0, 1}, /* 13 */
+ {"mknod", NULL, 0, 3}, /* 14 */
+ {"chmod", lx_chmod, 0, 2}, /* 15 */
+ {"lchown16", lx_lchown16, 0, 3}, /* 16 */
+ {"break", NULL, NOSYS_OBSOLETE, 0}, /* 17 */
+ {"stat", NULL, NOSYS_OBSOLETE, 0}, /* 18 */
+ {"lseek", NULL, 0, 3}, /* 19 */
+ {"getpid", lx_getpid, 0, 0}, /* 20 */
+ {"mount", NULL, 0, 5}, /* 21 */
+ {"umount", NULL, 0, 1}, /* 22 */
+ {"setuid16", NULL, 0, 1}, /* 23 */
+ {"getuid16", NULL, 0, 0}, /* 24 */
+ {"stime", NULL, 0, 1}, /* 25 */
+ {"ptrace", lx_ptrace, 0, 4}, /* 26 */
+ {"alarm", NULL, 0, 1}, /* 27 */
+ {"fstat", NULL, NOSYS_OBSOLETE, 0}, /* 28 */
+ {"pause", NULL, 0, 0}, /* 29 */
+ {"utime", NULL, 0, 2}, /* 30 */
+ {"stty", NULL, NOSYS_OBSOLETE, 0}, /* 31 */
+ {"gtty", NULL, NOSYS_OBSOLETE, 0}, /* 32 */
+ {"access", lx_access, 0, 2}, /* 33 */
+ {"nice", NULL, 0, 1}, /* 34 */
+ {"ftime", NULL, NOSYS_OBSOLETE, 0}, /* 35 */
+ {"sync", NULL, 0, 0}, /* 36 */
+ {"kill", lx_kill, 0, 2}, /* 37 */
+ {"rename", NULL, 0, 2}, /* 38 */
+ {"mkdir", lx_mkdir, 0, 2}, /* 39 */
+ {"rmdir", NULL, 0, 1}, /* 40 */
+ {"dup", NULL, 0, 1}, /* 41 */
+ {"pipe", lx_pipe, 0, 1}, /* 42 */
+ {"times", NULL, 0, 1}, /* 43 */
+ {"prof", NULL, NOSYS_OBSOLETE, 0}, /* 44 */
+ {"brk", lx_brk, 0, 1}, /* 45 */
+ {"setgid16", NULL, 0, 1}, /* 46 */
+ {"getgid16", NULL, 0, 0}, /* 47 */
+ {"signal", NULL, 0, 2}, /* 48 */
+ {"geteuid16", NULL, 0, 0}, /* 49 */
+ {"getegid16", NULL, 0, 0}, /* 50 */
+ {"acct", NULL, NOSYS_NO_EQUIV, 0}, /* 51 */
+ {"umount2", NULL, 0, 2}, /* 52 */
+ {"lock", NULL, NOSYS_OBSOLETE, 0}, /* 53 */
+ {"ioctl", lx_ioctl, 0, 3}, /* 54 */
+ {"fcntl", lx_fcntl, 0, 3}, /* 55 */
+ {"mpx", NULL, NOSYS_OBSOLETE, 0}, /* 56 */
+ {"setpgid", NULL, 0, 2}, /* 57 */
+ {"ulimit", NULL, NOSYS_OBSOLETE, 0}, /* 58 */
+ {"olduname", NULL, NOSYS_OBSOLETE, 0}, /* 59 */
+ {"umask", NULL, 0, 1}, /* 60 */
+ {"chroot", NULL, 0, 1}, /* 61 */
+ {"ustat", NULL, NOSYS_OBSOLETE, 2}, /* 62 */
+ {"dup2", NULL, 0, 2}, /* 63 */
+ {"getppid", lx_getppid, 0, 0}, /* 64 */
+ {"getpgrp", NULL, 0, 0}, /* 65 */
+ {"setsid", NULL, 0, 0}, /* 66 */
+ {"sigaction", NULL, 0, 3}, /* 67 */
+ {"sgetmask", NULL, NOSYS_OBSOLETE, 0}, /* 68 */
+ {"ssetmask", NULL, NOSYS_OBSOLETE, 0}, /* 69 */
+ {"setreuid16", NULL, 0, 2}, /* 70 */
+ {"setregid16", NULL, 0, 2}, /* 71 */
+ {"sigsuspend", NULL, 0, 1}, /* 72 */
+ {"sigpending", NULL, 0, 1}, /* 73 */
+ {"sethostname", NULL, 0, 2}, /* 74 */
+ {"setrlimit", lx_setrlimit, 0, 2}, /* 75 */
+ {"getrlimit", lx_oldgetrlimit, 0, 2}, /* 76 */
+ {"getrusage", NULL, 0, 2}, /* 77 */
+ {"gettimeofday", lx_gettimeofday, 0, 2}, /* 78 */
+ {"settimeofday", NULL, 0, 2}, /* 79 */
+ {"getgroups16", NULL, 0, 2}, /* 80 */
+ {"setgroups16", NULL, 0, 2}, /* 81 */
+ {"select", NULL, NOSYS_OBSOLETE, 0}, /* 82 */
+ {"symlink", NULL, 0, 2}, /* 83 */
+ {"oldlstat", NULL, NOSYS_OBSOLETE, 0}, /* 84 */
+ {"readlink", NULL, 0, 3}, /* 85 */
+ {"uselib", NULL, NOSYS_KERNEL, 0}, /* 86 */
+ {"swapon", NULL, NOSYS_KERNEL, 0}, /* 87 */
+ {"reboot", NULL, 0, 4}, /* 88 */
+ {"readdir", NULL, 0, 3}, /* 89 */
+ {"mmap", NULL, 0, 6}, /* 90 */
+ {"munmap", NULL, 0, 2}, /* 91 */
+ {"truncate", NULL, 0, 2}, /* 92 */
+ {"ftruncate", NULL, 0, 2}, /* 93 */
+ {"fchmod", lx_fchmod, 0, 2}, /* 94 */
+ {"fchown16", lx_fchown16, 0, 3}, /* 95 */
+ {"getpriority", NULL, 0, 2}, /* 96 */
+ {"setpriority", NULL, 0, 3}, /* 97 */
+ {"profil", NULL, NOSYS_NO_EQUIV, 0}, /* 98 */
+ {"statfs", NULL, 0, 2}, /* 99 */
+ {"fstatfs", NULL, 0, 2}, /* 100 */
+ {"ioperm", NULL, NOSYS_NO_EQUIV, 0}, /* 101 */
+ {"socketcall", lx_socketcall, 0, 2}, /* 102 */
+ {"syslog", NULL, 0, 3}, /* 103 */
+ {"setitimer", NULL, 0, 3}, /* 104 */
+ {"getitimer", NULL, 0, 2}, /* 105 */
+ {"stat", lx_stat32, 0, 2}, /* 106 */
+ {"lstat", lx_lstat32, 0, 2}, /* 107 */
+ {"fstat", lx_fstat32, 0, 2}, /* 108 */
+ {"uname", NULL, NOSYS_OBSOLETE, 0}, /* 109 */
+ {"oldiopl", NULL, NOSYS_NO_EQUIV, 0}, /* 110 */
+ {"vhangup", NULL, 0, 0}, /* 111 */
+ {"idle", NULL, NOSYS_NO_EQUIV, 0}, /* 112 */
+ {"vm86old", NULL, NOSYS_OBSOLETE, 0}, /* 113 */
+ {"wait4", lx_wait4, 0, 4}, /* 114 */
+ {"swapoff", NULL, NOSYS_KERNEL, 0}, /* 115 */
+ {"sysinfo", lx_sysinfo32, 0, 1}, /* 116 */
+ {"ipc", NULL, 0, 5}, /* 117 */
+ {"fsync", NULL, 0, 1}, /* 118 */
+ {"sigreturn", NULL, 0, 1}, /* 119 */
+ {"clone", NULL, 0, 5}, /* 120 */
+ {"setdomainname", NULL, 0, 2}, /* 121 */
+ {"uname", lx_uname, 0, 1}, /* 122 */
+ {"modify_ldt", lx_modify_ldt, 0, 3}, /* 123 */
+ {"adjtimex", NULL, 0, 1}, /* 124 */
+ {"mprotect", NULL, 0, 3}, /* 125 */
+ {"sigprocmask", NULL, 0, 3}, /* 126 */
+ {"create_module", NULL, NOSYS_KERNEL, 0}, /* 127 */
+ {"init_module", NULL, NOSYS_KERNEL, 0}, /* 128 */
+ {"delete_module", NULL, NOSYS_KERNEL, 0}, /* 129 */
+ {"get_kernel_syms", NULL, NOSYS_KERNEL, 0}, /* 130 */
+ {"quotactl", NULL, NOSYS_KERNEL, 0}, /* 131 */
+ {"getpgid", NULL, 0, 1}, /* 132 */
+ {"fchdir", NULL, 0, 1}, /* 133 */
+ {"bdflush", NULL, NOSYS_KERNEL, 0}, /* 134 */
+ {"sysfs", NULL, 0, 3}, /* 135 */
+ {"personality", lx_personality, 0, 1}, /* 136 */
+ {"afs_syscall", NULL, NOSYS_KERNEL, 0}, /* 137 */
+ {"setfsuid16", NULL, 0, 1}, /* 138 */
+ {"setfsgid16", NULL, 0, 1}, /* 139 */
+ {"llseek", NULL, 0, 5}, /* 140 */
+ {"getdents", lx_getdents_32, 0, 3}, /* 141 */
+ {"select", lx_select, 0, 5}, /* 142 */
+ {"flock", NULL, 0, 2}, /* 143 */
+ {"msync", NULL, 0, 3}, /* 144 */
+ {"readv", lx_readv, 0, 3}, /* 145 */
+ {"writev", lx_writev, 0, 3}, /* 146 */
+ {"getsid", NULL, 0, 1}, /* 147 */
+ {"fdatasync", NULL, 0, 1}, /* 148 */
+ {"sysctl", NULL, 0, 1}, /* 149 */
+ {"mlock", NULL, 0, 2}, /* 150 */
+ {"munlock", NULL, 0, 2}, /* 151 */
+ {"mlockall", NULL, 0, 1}, /* 152 */
+ {"munlockall", NULL, 0, 0}, /* 153 */
+ {"sched_setparam", NULL, 0, 2}, /* 154 */
+ {"sched_getparam", NULL, 0, 2}, /* 155 */
+ {"sched_setscheduler", NULL, 0, 3}, /* 156 */
+ {"sched_getscheduler", NULL, 0, 1}, /* 157 */
+ {"sched_yield", lx_sched_yield, 0, 0}, /* 158 */
+ {"sched_get_priority_max", NULL, 0, 1}, /* 159 */
+ {"sched_get_priority_min", NULL, 0, 1}, /* 160 */
+ {"sched_rr_get_interval", NULL, 0, 2}, /* 161 */
+ {"nanosleep", lx_nanosleep, 0, 2}, /* 162 */
+ {"mremap", NULL, 0, 5}, /* 163 */
+ {"setresuid16", lx_setresuid16, 0, 3}, /* 164 */
+ {"getresuid16", NULL, 0, 3}, /* 165 */
+ {"vm86", NULL, NOSYS_NO_EQUIV, 0}, /* 166 */
+ {"query_module", NULL, 0, 5}, /* 167 */
+ {"poll", lx_poll, 0, 3}, /* 168 */
+ {"nfsservctl", NULL, NOSYS_KERNEL, 0}, /* 169 */
+ {"setresgid16", lx_setresgid16, 0, 3}, /* 170 */
+ {"getresgid16", NULL, 0, 3}, /* 171 */
+ {"prctl", lx_prctl, 0, 5}, /* 172 */
+ {"rt_sigreturn", NULL, 0, 0}, /* 173 */
+ {"rt_sigaction", NULL, 0, 4}, /* 174 */
+ {"rt_sigprocmask", NULL, 0, 4}, /* 175 */
+ {"rt_sigpending", NULL, 0, 2}, /* 176 */
+ {"rt_sigtimedwait", NULL, 0, 4}, /* 177 */
+ {"rt_sigqueueinfo", NULL, 0, 3}, /* 178 */
+ {"rt_sigsuspend", NULL, 0, 2}, /* 179 */
+ {"pread64", lx_pread32, 0, 5}, /* 180 */
+ {"pwrite64", lx_pwrite32, 0, 5}, /* 181 */
+ {"chown16", lx_chown16, 0, 3}, /* 182 */
+ {"getcwd", lx_getcwd, 0, 2}, /* 183 */
+ {"capget", NULL, 0, 2}, /* 184 */
+ {"capset", NULL, 0, 2}, /* 185 */
+ {"sigaltstack", NULL, 0, 2}, /* 186 */
+ {"sendfile", NULL, 0, 4}, /* 187 */
+ {"getpmsg", NULL, NOSYS_OBSOLETE, 0}, /* 188 */
+ {"putpmsg", NULL, NOSYS_OBSOLETE, 0}, /* 189 */
+ {"vfork", NULL, 0, 0}, /* 190 */
+ {"getrlimit", lx_getrlimit, 0, 2}, /* 191 */
+ {"mmap2", NULL, LX_SYS_EBPARG6, 6}, /* 192 */
+ {"truncate64", NULL, 0, 3}, /* 193 */
+ {"ftruncate64", NULL, 0, 3}, /* 194 */
+ {"stat64", lx_stat64, 0, 2}, /* 195 */
+ {"lstat64", lx_lstat64, 0, 2}, /* 196 */
+ {"fstat64", lx_fstat64, 0, 2}, /* 197 */
+ {"lchown", lx_lchown, 0, 3}, /* 198 */
+ {"getuid", NULL, 0, 0}, /* 199 */
+ {"getgid", NULL, 0, 0}, /* 200 */
+ {"geteuid", NULL, 0, 0}, /* 201 */
+ {"getegid", NULL, 0, 0}, /* 202 */
+ {"setreuid", NULL, 0, 0}, /* 203 */
+ {"setregid", NULL, 0, 0}, /* 204 */
+ {"getgroups", NULL, 0, 2}, /* 205 */
+ {"setgroups", NULL, 0, 2}, /* 206 */
+ {"fchown", lx_fchown, 0, 3}, /* 207 */
+ {"setresuid", lx_setresuid, 0, 3}, /* 208 */
+ {"getresuid", NULL, 0, 3}, /* 209 */
+ {"setresgid", lx_setresgid, 0, 3}, /* 210 */
+ {"getresgid", NULL, 0, 3}, /* 211 */
+ {"chown", lx_chown, 0, 3}, /* 212 */
+ {"setuid", NULL, 0, 1}, /* 213 */
+ {"setgid", NULL, 0, 1}, /* 214 */
+ {"setfsuid", NULL, 0, 1}, /* 215 */
+ {"setfsgid", NULL, 0, 1}, /* 216 */
+ {"pivot_root", NULL, NOSYS_KERNEL, 0}, /* 217 */
+ {"mincore", NULL, 0, 3}, /* 218 */
+ {"madvise", NULL, 0, 3}, /* 219 */
+ {"getdents64", lx_getdents64, 0, 3}, /* 220 */
+ {"fcntl64", lx_fcntl64, 0, 3}, /* 221 */
+ {"tux", NULL, NOSYS_NO_EQUIV, 0}, /* 222 */
+ {"security", NULL, NOSYS_NO_EQUIV, 0}, /* 223 */
+ {"gettid", lx_gettid, 0, 0}, /* 224 */
+ {"readahead", NULL, NOSYS_NO_EQUIV, 0}, /* 225 */
+ {"setxattr", lx_setxattr, 0, 5}, /* 226 */
+ {"lsetxattr", lx_lsetxattr, 0, 5}, /* 227 */
+ {"fsetxattr", lx_fsetxattr, 0, 5}, /* 228 */
+ {"getxattr", lx_getxattr, 0, 4}, /* 229 */
+ {"lgetxattr", lx_lgetxattr, 0, 4}, /* 230 */
+ {"fgetxattr", lx_fgetxattr, 0, 4}, /* 231 */
+ {"listxattr", lx_listxattr, 0, 3}, /* 232 */
+ {"llistxattr", lx_llistxattr, 0, 3}, /* 233 */
+ {"flistxattr", lx_flistxattr, 0, 3}, /* 234 */
+ {"removexattr", lx_removexattr, 0, 2}, /* 235 */
+ {"lremovexattr", lx_lremovexattr, 0, 2}, /* 236 */
+ {"fremovexattr", lx_fremovexattr, 0, 2}, /* 237 */
+ {"tkill", lx_tkill, 0, 2}, /* 238 */
+ {"sendfile64", NULL, 0, 4}, /* 239 */
+ {"futex", lx_futex, LX_SYS_EBPARG6, 6}, /* 240 */
+ {"sched_setaffinity", NULL, 0, 3}, /* 241 */
+ {"sched_getaffinity", NULL, 0, 3}, /* 242 */
+ {"set_thread_area", lx_set_thread_area, 0, 1}, /* 243 */
+ {"get_thread_area", lx_get_thread_area, 0, 1}, /* 244 */
+ {"io_setup", lx_io_setup, 0, 2}, /* 245 */
+ {"io_destroy", NULL, 0, 1}, /* 246 */
+ {"io_getevents", NULL, 0, 5}, /* 247 */
+ {"io_submit", NULL, 0, 3}, /* 248 */
+ {"io_cancel", NULL, 0, 3}, /* 249 */
+ {"fadvise64", NULL, 0, 4}, /* 250 */
+ {"nosys", NULL, 0, 0}, /* 251 */
+ {"group_exit", NULL, 0, 1}, /* 252 */
+ {"lookup_dcookie", NULL, NOSYS_NO_EQUIV, 0}, /* 253 */
+ {"epoll_create", lx_epoll_create, 0, 1}, /* 254 */
+ {"epoll_ctl", lx_epoll_ctl, 0, 4}, /* 255 */
+ {"epoll_wait", lx_epoll_wait, 0, 4}, /* 256 */
+ {"remap_file_pages", NULL, NOSYS_NO_EQUIV, 0}, /* 257 */
+ {"set_tid_address", lx_set_tid_address, 0, 1}, /* 258 */
+ {"timer_create", NULL, 0, 3}, /* 259 */
+ {"timer_settime", NULL, 0, 4}, /* 260 */
+ {"timer_gettime", NULL, 0, 2}, /* 261 */
+ {"timer_getoverrun", NULL, 0, 1}, /* 262 */
+ {"timer_delete", NULL, 0, 1}, /* 263 */
+ {"clock_settime", lx_clock_settime, 0, 2}, /* 264 */
+ {"clock_gettime", lx_clock_gettime, 0, 2}, /* 265 */
+ {"clock_getres", lx_clock_getres, 0, 2}, /* 266 */
+ {"clock_nanosleep", NULL, 0, 4}, /* 267 */
+ {"statfs64", NULL, 0, 2}, /* 268 */
+ {"fstatfs64", NULL, 0, 2}, /* 269 */
+ {"tgkill", lx_tgkill, 0, 3}, /* 270 */
+
+/*
+ * The following system calls only exist in kernel 2.6 and greater:
+ */
+ {"utimes", NULL, 0, 2}, /* 271 */
+ {"fadvise64_64", NULL, 0, 4}, /* 272 */
+ {"vserver", NULL, NOSYS_NULL, 0}, /* 273 */
+ {"mbind", NULL, NOSYS_NULL, 0}, /* 274 */
+ {"get_mempolicy", NULL, NOSYS_NULL, 0}, /* 275 */
+ {"set_mempolicy", NULL, NOSYS_NULL, 0}, /* 276 */
+ {"mq_open", NULL, NOSYS_NULL, 0}, /* 277 */
+ {"mq_unlink", NULL, NOSYS_NULL, 0}, /* 278 */
+ {"mq_timedsend", NULL, NOSYS_NULL, 0}, /* 279 */
+ {"mq_timedreceive", NULL, NOSYS_NULL, 0}, /* 280 */
+ {"mq_notify", NULL, NOSYS_NULL, 0}, /* 281 */
+ {"mq_getsetattr", NULL, NOSYS_NULL, 0}, /* 282 */
+ {"kexec_load", NULL, NOSYS_NULL, 0}, /* 283 */
+ {"waitid", lx_waitid, 0, 4}, /* 284 */
+ {"sys_setaltroot", NULL, NOSYS_NULL, 0}, /* 285 */
+ {"add_key", NULL, NOSYS_NULL, 0}, /* 286 */
+ {"request_key", NULL, NOSYS_NULL, 0}, /* 287 */
+ {"keyctl", NULL, NOSYS_NULL, 0}, /* 288 */
+ {"ioprio_set", lx_ioprio_set, 0, 3}, /* 289 */
+ {"ioprio_get", lx_ioprio_get, 0, 2}, /* 290 */
+ {"inotify_init", NULL, 0, 0}, /* 291 */
+ {"inotify_add_watch", NULL, 0, 3}, /* 292 */
+ {"inotify_rm_watch", NULL, 0, 2}, /* 293 */
+ {"migrate_pages", NULL, NOSYS_NULL, 0}, /* 294 */
+ {"openat", lx_openat, 0, 4}, /* 295 */
+ {"mkdirat", lx_mkdirat, 0, 3}, /* 296 */
+ {"mknodat", NULL, 0, 4}, /* 297 */
+ {"fchownat", lx_fchownat, 0, 5}, /* 298 */
+ {"futimesat", NULL, 0, 3}, /* 299 */
+ {"fstatat64", lx_fstatat64, 0, 4}, /* 300 */
+ {"unlinkat", NULL, 0, 3}, /* 301 */
+ {"renameat", NULL, 0, 4}, /* 302 */
+ {"linkat", lx_linkat, 0, 5}, /* 303 */
+ {"symlinkat", NULL, 0, 3}, /* 304 */
+ {"readlinkat", NULL, 0, 4}, /* 305 */
+ {"fchmodat", lx_fchmodat, 0, 3}, /* 306 */
+ {"faccessat", lx_faccessat, 0, 4}, /* 307 */
+ {"pselect6", lx_pselect, LX_SYS_EBPARG6, 6}, /* 308 */
+ {"ppoll", lx_ppoll, 0, 5}, /* 309 */
+ {"unshare", NULL, NOSYS_NULL, 0}, /* 310 */
+ {"set_robust_list", lx_set_robust_list, 0, 2}, /* 311 */
+ {"get_robust_list", lx_get_robust_list, 0, 3}, /* 312 */
+ {"splice", NULL, NOSYS_NULL, 0}, /* 313 */
+ {"sync_file_range", lx_sync_file_range, 0, 4}, /* 314 */
+ {"tee", NULL, NOSYS_NULL, 0}, /* 315 */
+ {"vmsplice", NULL, NOSYS_NULL, 0}, /* 316 */
+ {"move_pages", NULL, NOSYS_NULL, 0}, /* 317 */
+ {"getcpu", lx_getcpu, 0, 3}, /* 318 */
+ {"epoll_pwait", lx_epoll_pwait, 0, 5}, /* 319 */
+ {"utimensat", NULL, 0, 4}, /* 320 */
+ {"signalfd", NULL, 0, 3}, /* 321 */
+ {"timerfd_create", NULL, 0, 2}, /* 322 */
+ {"eventfd", NULL, 0, 1}, /* 323 */
+ {"fallocate", lx_fallocate32, LX_SYS_EBPARG6, 6}, /* 324 */
+ {"timerfd_settime", NULL, 0, 4}, /* 325 */
+ {"timerfd_gettime", NULL, 0, 2}, /* 326 */
+ {"signalfd4", NULL, 0, 4}, /* 327 */
+ {"eventfd2", NULL, 0, 2}, /* 328 */
+ {"epoll_create1", lx_epoll_create1, 0, 1}, /* 329 */
+ {"dup3", NULL, 0, 3}, /* 330 */
+ {"pipe2", lx_pipe2, 0, 2}, /* 331 */
+ {"inotify_init1", NULL, 0, 1}, /* 332 */
+ {"preadv", lx_preadv32, 0, 5}, /* 333 */
+ {"pwritev", lx_pwritev32, 0, 5}, /* 334 */
+ {"rt_tgsigqueueinfo", NULL, 0, 4}, /* 335 */
+ {"perf_event_open", NULL, NOSYS_NULL, 0}, /* 336 */
+ {"recvmmsg", NULL, NOSYS_NULL, 0}, /* 337 */
+ {"fanotify_init", NULL, NOSYS_NULL, 0}, /* 338 */
+ {"fanotify_mark", NULL, NOSYS_NULL, 0}, /* 339 */
+ {"prlimit64", lx_prlimit64, 0, 4}, /* 340 */
+ {"name_to_handle_at", NULL, NOSYS_NULL, 0}, /* 341 */
+ {"open_by_handle_at", NULL, NOSYS_NULL, 0}, /* 342 */
+ {"clock_adjtime", NULL, NOSYS_NULL, 0}, /* 343 */
+ {"syncfs", lx_syncfs, 0, 1}, /* 344 */
+ {"sendmmsg", NULL, NOSYS_NULL, 0}, /* 345 */
+ {"setns", NULL, NOSYS_NULL, 0}, /* 346 */
+ {"process_vm_readv", NULL, NOSYS_NULL, 0}, /* 347 */
+ {"process_vm_writev", NULL, NOSYS_NULL, 0}, /* 348 */
+ {"kcmp", NULL, NOSYS_NULL, 0}, /* 349 */
+ {"finit_module", NULL, NOSYS_NULL, 0}, /* 350 */
+ {"sched_setattr", NULL, NOSYS_NULL, 0}, /* 351 */
+ {"sched_getattr", NULL, NOSYS_NULL, 0}, /* 352 */
+ {"renameat2", NULL, NOSYS_NULL, 0}, /* 353 */
+ {"seccomp", NULL, NOSYS_NULL, 0}, /* 354 */
+ {"getrandom", lx_getrandom, 0, 3}, /* 355 */
+ {"memfd_create", NULL, NOSYS_NULL, 0}, /* 356 */
+ {"bpf", NULL, NOSYS_NULL, 0}, /* 357 */
+ {"execveat", NULL, NOSYS_NULL, 0}, /* 358 */
+};
+
+#if defined(_LP64)
+/*
+ * Linux defines system call numbers for 64-bit x86 in the file:
+ * arch/x86/syscalls/syscall_64.tbl
+ */
+lx_sysent_t lx_sysent64[] = {
+ {"read", lx_read, 0, 3}, /* 0 */
+ {"write", lx_write, 0, 3}, /* 1 */
+ {"open", lx_open, 0, 3}, /* 2 */
+ {"close", lx_close, 0, 1}, /* 3 */
+ {"stat", lx_stat64, 0, 2}, /* 4 */
+ {"fstat", lx_fstat64, 0, 2}, /* 5 */
+ {"lstat", lx_lstat64, 0, 2}, /* 6 */
+ {"poll", lx_poll, 0, 3}, /* 7 */
+ {"lseek", NULL, 0, 3}, /* 8 */
+ {"mmap", NULL, 0, 6}, /* 9 */
+ {"mprotect", NULL, 0, 3}, /* 10 */
+ {"munmap", NULL, 0, 2}, /* 11 */
+ {"brk", lx_brk, 0, 1}, /* 12 */
+ {"rt_sigaction", NULL, 0, 4}, /* 13 */
+ {"rt_sigprocmask", NULL, 0, 4}, /* 14 */
+ {"rt_sigreturn", NULL, 0, 0}, /* 15 */
+ {"ioctl", lx_ioctl, 0, 3}, /* 16 */
+ {"pread64", lx_pread, 0, 4}, /* 17 */
+ {"pwrite64", lx_pwrite, 0, 4}, /* 18 */
+ {"readv", lx_readv, 0, 3}, /* 19 */
+ {"writev", lx_writev, 0, 3}, /* 20 */
+ {"access", lx_access, 0, 2}, /* 21 */
+ {"pipe", lx_pipe, 0, 1}, /* 22 */
+ {"select", lx_select, 0, 5}, /* 23 */
+ {"sched_yield", lx_sched_yield, 0, 0}, /* 24 */
+ {"mremap", NULL, 0, 5}, /* 25 */
+ {"msync", NULL, 0, 3}, /* 26 */
+ {"mincore", NULL, 0, 3}, /* 27 */
+ {"madvise", NULL, 0, 3}, /* 28 */
+ {"shmget", NULL, 0, 3}, /* 29 */
+ {"shmat", NULL, 0, 4}, /* 30 */
+ {"shmctl", NULL, 0, 3}, /* 31 */
+ {"dup", NULL, 0, 1}, /* 32 */
+ {"dup2", NULL, 0, 2}, /* 33 */
+ {"pause", NULL, 0, 0}, /* 34 */
+ {"nanosleep", lx_nanosleep, 0, 2}, /* 35 */
+ {"getitimer", NULL, 0, 2}, /* 36 */
+ {"alarm", NULL, 0, 1}, /* 37 */
+ {"setitimer", NULL, 0, 3}, /* 38 */
+ {"getpid", lx_getpid, 0, 0}, /* 39 */
+ {"sendfile", NULL, 0, 4}, /* 40 */
+ {"socket", lx_socket, 0, 3}, /* 41 */
+ {"connect", lx_connect, 0, 3}, /* 42 */
+ {"accept", lx_accept, 0, 3}, /* 43 */
+ {"sendto", lx_sendto, 0, 6}, /* 44 */
+ {"recvfrom", lx_recvfrom, 0, 6}, /* 45 */
+ {"sendmsg", lx_sendmsg, 0, 3}, /* 46 */
+ {"recvmsg", lx_recvmsg, 0, 3}, /* 47 */
+ {"shutdown", NULL, 0, 2}, /* 48 */
+ {"bind", lx_bind, 0, 3}, /* 49 */
+ {"listen", NULL, 0, 2}, /* 50 */
+ {"getsockname", lx_getsockname, 0, 3}, /* 51 */
+ {"getpeername", lx_getpeername, 0, 3}, /* 52 */
+ {"socketpair", NULL, 0, 4}, /* 53 */
+ {"setsockopt", lx_setsockopt, 0, 5}, /* 54 */
+ {"getsockopt", lx_getsockopt, 0, 5}, /* 55 */
+ {"clone", NULL, 0, 5}, /* 56 */
+ {"fork", NULL, 0, 0}, /* 57 */
+ {"vfork", NULL, 0, 0}, /* 58 */
+ {"execve", NULL, 0, 3}, /* 59 */
+ {"exit", NULL, 0, 1}, /* 60 */
+ {"wait4", lx_wait4, 0, 4}, /* 61 */
+ {"kill", lx_kill, 0, 2}, /* 62 */
+ {"uname", lx_uname, 0, 1}, /* 63 */
+ {"semget", NULL, 0, 3}, /* 64 */
+ {"semop", NULL, 0, 3}, /* 65 */
+ {"semctl", NULL, 0, 4}, /* 66 */
+ {"shmdt", NULL, 0, 1}, /* 67 */
+ {"msgget", NULL, 0, 2}, /* 68 */
+ {"msgsnd", NULL, 0, 4}, /* 69 */
+ {"msgrcv", NULL, 0, 5}, /* 70 */
+ {"msgctl", NULL, 0, 3}, /* 71 */
+ {"fcntl", lx_fcntl64, 0, 3}, /* 72 */
+ {"flock", NULL, 0, 2}, /* 73 */
+ {"fsync", NULL, 0, 1}, /* 74 */
+ {"fdatasync", NULL, 0, 1}, /* 75 */
+ {"truncate", NULL, 0, 2}, /* 76 */
+ {"ftruncate", NULL, 0, 2}, /* 77 */
+ {"getdents", lx_getdents_64, 0, 3}, /* 78 */
+ {"getcwd", lx_getcwd, 0, 2}, /* 79 */
+ {"chdir", NULL, 0, 1}, /* 80 */
+ {"fchdir", NULL, 0, 1}, /* 81 */
+ {"rename", NULL, 0, 2}, /* 82 */
+ {"mkdir", lx_mkdir, 0, 2}, /* 83 */
+ {"rmdir", NULL, 0, 1}, /* 84 */
+ {"creat", NULL, 0, 2}, /* 85 */
+ {"link", lx_link, 0, 2}, /* 86 */
+ {"unlink", NULL, 0, 1}, /* 87 */
+ {"symlink", NULL, 0, 2}, /* 88 */
+ {"readlink", NULL, 0, 3}, /* 89 */
+ {"chmod", lx_chmod, 0, 2}, /* 90 */
+ {"fchmod", lx_fchmod, 0, 2}, /* 91 */
+ {"chown", lx_chown, 0, 3}, /* 92 */
+ {"fchown", lx_fchown, 0, 3}, /* 93 */
+ {"lchown", lx_lchown, 0, 3}, /* 94 */
+ {"umask", NULL, 0, 1}, /* 95 */
+ {"gettimeofday", lx_gettimeofday, 0, 2}, /* 96 */
+ {"getrlimit", lx_getrlimit, 0, 2}, /* 97 */
+ {"getrusage", NULL, 0, 2}, /* 98 */
+ {"sysinfo", lx_sysinfo64, 0, 1}, /* 99 */
+ {"times", NULL, 0, 1}, /* 100 */
+ {"ptrace", lx_ptrace, 0, 4}, /* 101 */
+ {"getuid", NULL, 0, 0}, /* 102 */
+ {"syslog", NULL, 0, 3}, /* 103 */
+ {"getgid", NULL, 0, 0}, /* 104 */
+ {"setuid", NULL, 0, 1}, /* 105 */
+ {"setgid", NULL, 0, 1}, /* 106 */
+ {"geteuid", NULL, 0, 0}, /* 107 */
+ {"getegid", NULL, 0, 0}, /* 108 */
+ {"setpgid", NULL, 0, 2}, /* 109 */
+ {"getppid", lx_getppid, 0, 0}, /* 110 */
+ {"getpgrp", NULL, 0, 0}, /* 111 */
+ {"setsid", NULL, 0, 0}, /* 112 */
+ {"setreuid", NULL, 0, 0}, /* 113 */
+ {"setregid", NULL, 0, 0}, /* 114 */
+ {"getgroups", NULL, 0, 2}, /* 115 */
+ {"setgroups", NULL, 0, 2}, /* 116 */
+ {"setresuid", lx_setresuid, 0, 3}, /* 117 */
+ {"getresuid", NULL, 0, 3}, /* 118 */
+ {"setresgid", lx_setresgid, 0, 3}, /* 119 */
+ {"getresgid", NULL, 0, 3}, /* 120 */
+ {"getpgid", NULL, 0, 1}, /* 121 */
+ {"setfsuid", NULL, 0, 1}, /* 122 */
+ {"setfsgid", NULL, 0, 1}, /* 123 */
+ {"getsid", NULL, 0, 1}, /* 124 */
+ {"capget", NULL, 0, 2}, /* 125 */
+ {"capset", NULL, 0, 2}, /* 126 */
+ {"rt_sigpending", NULL, 0, 2}, /* 127 */
+ {"rt_sigtimedwait", NULL, 0, 4}, /* 128 */
+ {"rt_sigqueueinfo", NULL, 0, 3}, /* 129 */
+ {"rt_sigsuspend", NULL, 0, 2}, /* 130 */
+ {"sigaltstack", NULL, 0, 2}, /* 131 */
+ {"utime", NULL, 0, 2}, /* 132 */
+ {"mknod", NULL, 0, 3}, /* 133 */
+ {"uselib", NULL, NOSYS_KERNEL, 0}, /* 134 */
+ {"personality", lx_personality, 0, 1}, /* 135 */
+ {"ustat", NULL, NOSYS_OBSOLETE, 2}, /* 136 */
+ {"statfs", NULL, 0, 2}, /* 137 */
+ {"fstatfs", NULL, 0, 2}, /* 138 */
+ {"sysfs", NULL, 0, 3}, /* 139 */
+ {"getpriority", NULL, 0, 2}, /* 140 */
+ {"setpriority", NULL, 0, 3}, /* 141 */
+ {"sched_setparam", NULL, 0, 2}, /* 142 */
+ {"sched_getparam", NULL, 0, 2}, /* 143 */
+ {"sched_setscheduler", NULL, 0, 3}, /* 144 */
+ {"sched_getscheduler", NULL, 0, 1}, /* 145 */
+ {"sched_get_priority_max", NULL, 0, 1}, /* 146 */
+ {"sched_get_priority_min", NULL, 0, 1}, /* 147 */
+ {"sched_rr_get_interval", NULL, 0, 2}, /* 148 */
+ {"mlock", NULL, 0, 2}, /* 149 */
+ {"munlock", NULL, 0, 2}, /* 150 */
+ {"mlockall", NULL, 0, 1}, /* 151 */
+ {"munlockall", NULL, 0, 0}, /* 152 */
+ {"vhangup", NULL, 0, 0}, /* 153 */
+ {"modify_ldt", lx_modify_ldt, 0, 3}, /* 154 */
+ {"pivot_root", NULL, NOSYS_KERNEL, 0}, /* 155 */
+ {"sysctl", NULL, 0, 1}, /* 156 */
+ {"prctl", lx_prctl, 0, 5}, /* 157 */
+ {"arch_prctl", lx_arch_prctl, 0, 2}, /* 158 */
+ {"adjtimex", NULL, 0, 1}, /* 159 */
+ {"setrlimit", lx_setrlimit, 0, 2}, /* 160 */
+ {"chroot", NULL, 0, 1}, /* 161 */
+ {"sync", NULL, 0, 0}, /* 162 */
+ {"acct", NULL, NOSYS_NO_EQUIV, 0}, /* 163 */
+ {"settimeofday", NULL, 0, 2}, /* 164 */
+ {"mount", NULL, 0, 5}, /* 165 */
+ {"umount2", NULL, 0, 2}, /* 166 */
+ {"swapon", NULL, NOSYS_KERNEL, 0}, /* 167 */
+ {"swapoff", NULL, NOSYS_KERNEL, 0}, /* 168 */
+ {"reboot", NULL, 0, 4}, /* 169 */
+ {"sethostname", NULL, 0, 2}, /* 170 */
+ {"setdomainname", NULL, 0, 2}, /* 171 */
+ {"iopl", NULL, NOSYS_NO_EQUIV, 0}, /* 172 */
+ {"ioperm", NULL, NOSYS_NO_EQUIV, 0}, /* 173 */
+ {"create_module", NULL, NOSYS_KERNEL, 0}, /* 174 */
+ {"init_module", NULL, NOSYS_KERNEL, 0}, /* 175 */
+ {"delete_module", NULL, NOSYS_KERNEL, 0}, /* 176 */
+ {"get_kernel_syms", NULL, NOSYS_KERNEL, 0}, /* 177 */
+ {"query_module", NULL, 0, 5}, /* 178 */
+ {"quotactl", NULL, NOSYS_KERNEL, 0}, /* 179 */
+ {"nfsservctl", NULL, NOSYS_KERNEL, 0}, /* 180 */
+ {"getpmsg", NULL, NOSYS_OBSOLETE, 0}, /* 181 */
+ {"putpmsg", NULL, NOSYS_OBSOLETE, 0}, /* 182 */
+ {"afs_syscall", NULL, NOSYS_KERNEL, 0}, /* 183 */
+ {"tux", NULL, NOSYS_NO_EQUIV, 0}, /* 184 */
+ {"security", NULL, NOSYS_NO_EQUIV, 0}, /* 185 */
+ {"gettid", lx_gettid, 0, 0}, /* 186 */
+ {"readahead", NULL, NOSYS_NO_EQUIV, 0}, /* 187 */
+ {"setxattr", lx_setxattr, 0, 5}, /* 188 */
+ {"lsetxattr", lx_lsetxattr, 0, 5}, /* 189 */
+ {"fsetxattr", lx_fsetxattr, 0, 5}, /* 190 */
+ {"getxattr", lx_getxattr, 0, 4}, /* 191 */
+ {"lgetxattr", lx_lgetxattr, 0, 4}, /* 192 */
+ {"fgetxattr", lx_fgetxattr, 0, 4}, /* 193 */
+ {"listxattr", lx_listxattr, 0, 3}, /* 194 */
+ {"llistxattr", lx_llistxattr, 0, 3}, /* 195 */
+ {"flistxattr", lx_flistxattr, 0, 3}, /* 196 */
+ {"removexattr", lx_removexattr, 0, 2}, /* 197 */
+ {"lremovexattr", lx_lremovexattr, 0, 2}, /* 198 */
+ {"fremovexattr", lx_fremovexattr, 0, 2}, /* 199 */
+ {"tkill", lx_tkill, 0, 2}, /* 200 */
+ {"time", lx_time, 0, 1}, /* 201 */
+ {"futex", lx_futex, 0, 6}, /* 202 */
+ {"sched_setaffinity", NULL, 0, 3}, /* 203 */
+ {"sched_getaffinity", NULL, 0, 3}, /* 204 */
+ {"set_thread_area", lx_set_thread_area, 0, 1}, /* 205 */
+ {"io_setup", lx_io_setup, 0, 2}, /* 206 */
+ {"io_destroy", NULL, 0, 1}, /* 207 */
+ {"io_getevents", NULL, 0, 5}, /* 208 */
+ {"io_submit", NULL, 0, 3}, /* 209 */
+ {"io_cancel", NULL, 0, 3}, /* 210 */
+ {"get_thread_area", lx_get_thread_area, 0, 1}, /* 211 */
+ {"lookup_dcookie", NULL, NOSYS_NO_EQUIV, 0}, /* 212 */
+ {"epoll_create", lx_epoll_create, 0, 1}, /* 213 */
+ {"epoll_ctl_old", NULL, NOSYS_NULL, 0}, /* 214 */
+ {"epoll_wait_old", NULL, NOSYS_NULL, 0}, /* 215 */
+ {"remap_file_pages", NULL, NOSYS_NO_EQUIV, 0}, /* 216 */
+ {"getdents64", lx_getdents64, 0, 3}, /* 217 */
+ {"set_tid_address", lx_set_tid_address, 0, 1}, /* 218 */
+ {"restart_syscall", NULL, NOSYS_NULL, 0}, /* 219 */
+ {"semtimedop", NULL, 0, 4}, /* 220 */
+ {"fadvise64", NULL, 0, 4}, /* 221 */
+ {"timer_create", NULL, 0, 3}, /* 222 */
+ {"timer_settime", NULL, 0, 4}, /* 223 */
+ {"timer_gettime", NULL, 0, 2}, /* 224 */
+ {"timer_getoverrun", NULL, 0, 1}, /* 225 */
+ {"timer_delete", NULL, 0, 1}, /* 226 */
+ {"clock_settime", lx_clock_settime, 0, 2}, /* 227 */
+ {"clock_gettime", lx_clock_gettime, 0, 2}, /* 228 */
+ {"clock_getres", lx_clock_getres, 0, 2}, /* 229 */
+ {"clock_nanosleep", NULL, 0, 4}, /* 230 */
+ {"exit_group", NULL, 0, 1}, /* 231 */
+ {"epoll_wait", lx_epoll_wait, 0, 4}, /* 232 */
+ {"epoll_ctl", lx_epoll_ctl, 0, 4}, /* 233 */
+ {"tgkill", lx_tgkill, 0, 3}, /* 234 */
+ {"utimes", NULL, 0, 2}, /* 235 */
+ {"vserver", NULL, NOSYS_NULL, 0}, /* 236 */
+ {"mbind", NULL, NOSYS_NULL, 0}, /* 237 */
+ {"set_mempolicy", NULL, NOSYS_NULL, 0}, /* 238 */
+ {"get_mempolicy", NULL, NOSYS_NULL, 0}, /* 239 */
+ {"mq_open", NULL, NOSYS_NULL, 0}, /* 240 */
+ {"mq_unlink", NULL, NOSYS_NULL, 0}, /* 241 */
+ {"mq_timedsend", NULL, NOSYS_NULL, 0}, /* 242 */
+ {"mq_timedreceive", NULL, NOSYS_NULL, 0}, /* 243 */
+ {"mq_notify", NULL, NOSYS_NULL, 0}, /* 244 */
+ {"mq_getsetattr", NULL, NOSYS_NULL, 0}, /* 245 */
+ {"kexec_load", NULL, NOSYS_NULL, 0}, /* 246 */
+ {"waitid", lx_waitid, 0, 4}, /* 247 */
+ {"add_key", NULL, NOSYS_NULL, 0}, /* 248 */
+ {"request_key", NULL, NOSYS_NULL, 0}, /* 249 */
+ {"keyctl", NULL, NOSYS_NULL, 0}, /* 250 */
+ {"ioprio_set", lx_ioprio_set, 0, 3}, /* 251 */
+ {"ioprio_get", lx_ioprio_get, 0, 2}, /* 252 */
+ {"inotify_init", NULL, 0, 0}, /* 253 */
+ {"inotify_add_watch", NULL, 0, 3}, /* 254 */
+ {"inotify_rm_watch", NULL, 0, 2}, /* 255 */
+ {"migrate_pages", NULL, NOSYS_NULL, 0}, /* 256 */
+ {"openat", lx_openat, 0, 4}, /* 257 */
+ {"mkdirat", lx_mkdirat, 0, 3}, /* 258 */
+ {"mknodat", NULL, 0, 4}, /* 259 */
+ {"fchownat", lx_fchownat, 0, 5}, /* 260 */
+ {"futimesat", NULL, 0, 3}, /* 261 */
+ {"fstatat64", lx_fstatat64, 0, 4}, /* 262 */
+ {"unlinkat", NULL, 0, 3}, /* 263 */
+ {"renameat", NULL, 0, 4}, /* 264 */
+ {"linkat", lx_linkat, 0, 5}, /* 265 */
+ {"symlinkat", NULL, 0, 3}, /* 266 */
+ {"readlinkat", NULL, 0, 4}, /* 267 */
+ {"fchmodat", lx_fchmodat, 0, 3}, /* 268 */
+ {"faccessat", lx_faccessat, 0, 4}, /* 269 */
+ {"pselect6", lx_pselect, 0, 6}, /* 270 */
+ {"ppoll", lx_ppoll, 0, 5}, /* 271 */
+ {"unshare", NULL, NOSYS_NULL, 0}, /* 272 */
+ {"set_robust_list", lx_set_robust_list, 0, 2}, /* 273 */
+ {"get_robust_list", lx_get_robust_list, 0, 3}, /* 274 */
+ {"splice", NULL, NOSYS_NULL, 0}, /* 275 */
+ {"tee", NULL, NOSYS_NULL, 0}, /* 276 */
+ {"sync_file_range", lx_sync_file_range, 0, 4}, /* 277 */
+ {"vmsplice", NULL, NOSYS_NULL, 0}, /* 278 */
+ {"move_pages", NULL, NOSYS_NULL, 0}, /* 279 */
+ {"utimensat", NULL, 0, 4}, /* 280 */
+ {"epoll_pwait", lx_epoll_pwait, 0, 5}, /* 281 */
+ {"signalfd", NULL, 0, 3}, /* 282 */
+ {"timerfd_create", NULL, 0, 2}, /* 283 */
+ {"eventfd", NULL, 0, 1}, /* 284 */
+ {"fallocate", lx_fallocate, 0, 4}, /* 285 */
+ {"timerfd_settime", NULL, 0, 4}, /* 286 */
+ {"timerfd_gettime", NULL, 0, 2}, /* 287 */
+ {"accept4", lx_accept4, 0, 4}, /* 288 */
+ {"signalfd4", NULL, 0, 4}, /* 289 */
+ {"eventfd2", NULL, 0, 2}, /* 290 */
+ {"epoll_create1", lx_epoll_create1, 0, 1}, /* 291 */
+ {"dup3", NULL, 0, 3}, /* 292 */
+ {"pipe2", lx_pipe2, 0, 2}, /* 293 */
+ {"inotify_init1", NULL, 0, 1}, /* 294 */
+ {"preadv", lx_preadv, 0, 4}, /* 295 */
+ {"pwritev", lx_pwritev, 0, 4}, /* 296 */
+ {"rt_tgsigqueueinfo", NULL, 0, 4}, /* 297 */
+ {"perf_event_open", NULL, NOSYS_NULL, 0}, /* 298 */
+ {"recvmmsg", NULL, NOSYS_NULL, 0}, /* 299 */
+ {"fanotify_init", NULL, NOSYS_NULL, 0}, /* 300 */
+ {"fanotify_mark", NULL, NOSYS_NULL, 0}, /* 301 */
+ {"prlimit64", lx_prlimit64, 0, 4}, /* 302 */
+ {"name_to_handle_at", NULL, NOSYS_NULL, 0}, /* 303 */
+ {"open_by_handle_at", NULL, NOSYS_NULL, 0}, /* 304 */
+ {"clock_adjtime", NULL, NOSYS_NULL, 0}, /* 305 */
+ {"syncfs", lx_syncfs, 0, 1}, /* 306 */
+ {"sendmmsg", NULL, NOSYS_NULL, 0}, /* 307 */
+ {"setns", NULL, NOSYS_NULL, 0}, /* 309 */
+ {"getcpu", lx_getcpu, 0, 3}, /* 309 */
+ {"process_vm_readv", NULL, NOSYS_NULL, 0}, /* 310 */
+ {"process_vm_writev", NULL, NOSYS_NULL, 0}, /* 311 */
+ {"kcmp", NULL, NOSYS_NULL, 0}, /* 312 */
+ {"finit_module", NULL, NOSYS_NULL, 0}, /* 313 */
+ {"sched_setattr", NULL, NOSYS_NULL, 0}, /* 314 */
+ {"sched_getattr", NULL, NOSYS_NULL, 0}, /* 315 */
+ {"renameat2", NULL, NOSYS_NULL, 0}, /* 316 */
+ {"seccomp", NULL, NOSYS_NULL, 0}, /* 317 */
+ {"getrandom", lx_getrandom, 0, 3}, /* 318 */
+ {"memfd_create", NULL, NOSYS_NULL, 0}, /* 319 */
+ {"kexec_file_load", NULL, NOSYS_NULL, 0}, /* 320 */
+ {"bpf", NULL, NOSYS_NULL, 0}, /* 321 */
+ {"execveat", NULL, NOSYS_NULL, 0}, /* 322 */
+
+ /* XXX TBD gap then x32 syscalls from 512 - 544 */
+};
+#endif
diff --git a/usr/src/uts/common/brand/lx/procfs/lx_proc.h b/usr/src/uts/common/brand/lx/procfs/lx_proc.h
new file mode 100644
index 0000000000..131a061062
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/procfs/lx_proc.h
@@ -0,0 +1,350 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#ifndef _LX_PROC_H
+#define _LX_PROC_H
+
+#ifdef _LXPROC_NATIVE_H
+#error Attempted to include branded lx_proc.h after native lxproc.h
+#endif
+
+#define _LXPROC_BRANDED_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * lxproc.h: declarations, data structures and macros for lxprocfs
+ */
+
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/policy.h>
+#include <sys/debug.h>
+#include <sys/dirent.h>
+#include <sys/errno.h>
+#include <sys/file.h>
+#include <sys/kmem.h>
+#include <sys/pathname.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/var.h>
+#include <sys/user.h>
+#include <sys/t_lock.h>
+#include <sys/sysmacros.h>
+#include <sys/cred.h>
+#include <sys/priv.h>
+#include <sys/vnode.h>
+#include <sys/vfs.h>
+#include <sys/statvfs.h>
+#include <sys/cmn_err.h>
+#include <sys/zone.h>
+#include <sys/uio.h>
+#include <sys/utsname.h>
+#include <sys/dnlc.h>
+#include <sys/atomic.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/nvpair.h>
+#include <vm/as.h>
+#include <vm/anon.h>
+
+/*
+ * Convert a vnode into an lxpr_mnt_t
+ */
+#define VTOLXPM(vp) ((lxpr_mnt_t *)(vp)->v_vfsp->vfs_data)
+
+/*
+ * convert a vnode into an lxpr_node
+ */
+#define VTOLXP(vp) ((lxpr_node_t *)(vp)->v_data)
+
+/*
+ * convert a lxprnode into a vnode
+ */
+#define LXPTOV(lxpnp) ((lxpnp)->lxpr_vnode)
+
+/*
+ * convert a lxpr_node into zone for fs
+ */
+#define LXPTOZ(lxpnp) \
+ (((lxpr_mnt_t *)(lxpnp)->lxpr_vnode->v_vfsp->vfs_data)->lxprm_zone)
+
+#define LXPNSIZ 256 /* max size of lx /proc file name entries */
+
+/*
+ * Pretend that a directory entry takes 16 bytes
+ */
+#define LXPR_SDSIZE 16
+
+/*
+ * Node/file types for lx /proc files
+ * (directories and files contained therein).
+ */
+typedef enum lxpr_nodetype {
+ LXPR_INVALID, /* nodes start at 1 */
+ LXPR_PROCDIR, /* /proc */
+ LXPR_PIDDIR, /* /proc/<pid> */
+ LXPR_PID_AUXV, /* /proc/<pid>/auxv */
+ LXPR_PID_CGROUP, /* /proc/<pid>/cgroup */
+ LXPR_PID_CMDLINE, /* /proc/<pid>/cmdline */
+ LXPR_PID_COMM, /* /proc/<pid>/comm */
+ LXPR_PID_CPU, /* /proc/<pid>/cpu */
+ LXPR_PID_CURDIR, /* /proc/<pid>/cwd */
+ LXPR_PID_ENV, /* /proc/<pid>/environ */
+ LXPR_PID_EXE, /* /proc/<pid>/exe */
+ LXPR_PID_LIMITS, /* /proc/<pid>/limits */
+ LXPR_PID_LOGINUID, /* /proc/<pid>/loginuid */
+ LXPR_PID_MAPS, /* /proc/<pid>/maps */
+ LXPR_PID_MEM, /* /proc/<pid>/mem */
+ LXPR_PID_MOUNTINFO, /* /proc/<pid>/mountinfo */
+ LXPR_PID_OOM_SCR_ADJ, /* /proc/<pid>/oom_score_adj */
+ LXPR_PID_PERSONALITY, /* /proc/<pid>/personality */
+ LXPR_PID_ROOTDIR, /* /proc/<pid>/root */
+ LXPR_PID_STAT, /* /proc/<pid>/stat */
+ LXPR_PID_STATM, /* /proc/<pid>/statm */
+ LXPR_PID_STATUS, /* /proc/<pid>/status */
+ LXPR_PID_TASKDIR, /* /proc/<pid>/task */
+ LXPR_PID_TASK_IDDIR, /* /proc/<pid>/task/<tid> */
+ LXPR_PID_FDDIR, /* /proc/<pid>/fd */
+ LXPR_PID_FD_FD, /* /proc/<pid>/fd/nn */
+ LXPR_PID_TID_AUXV, /* /proc/<pid>/task/<tid>/auxv */
+ LXPR_PID_TID_CGROUP, /* /proc/<pid>/task/<tid>/cgroup */
+ LXPR_PID_TID_CMDLINE, /* /proc/<pid>/task/<tid>/cmdline */
+ LXPR_PID_TID_COMM, /* /proc/<pid>/task/<tid>/comm */
+ LXPR_PID_TID_CPU, /* /proc/<pid>/task/<tid>/cpu */
+ LXPR_PID_TID_CURDIR, /* /proc/<pid>/task/<tid>/cwd */
+ LXPR_PID_TID_ENV, /* /proc/<pid>/task/<tid>/environ */
+ LXPR_PID_TID_EXE, /* /proc/<pid>/task/<tid>/exe */
+ LXPR_PID_TID_LIMITS, /* /proc/<pid>/task/<tid>/limits */
+ LXPR_PID_TID_LOGINUID, /* /proc/<pid>/task/<tid>/loginuid */
+ LXPR_PID_TID_MAPS, /* /proc/<pid>/task/<tid>/maps */
+ LXPR_PID_TID_MEM, /* /proc/<pid>/task/<tid>/mem */
+ LXPR_PID_TID_MOUNTINFO, /* /proc/<pid>/task/<tid>/mountinfo */
+ LXPR_PID_TID_OOM_SCR_ADJ, /* /proc/<pid>/task/<tid>/oom_score_adj */
+ LXPR_PID_TID_PERSONALITY, /* /proc/<pid>/task/<tid>/personality */
+ LXPR_PID_TID_ROOTDIR, /* /proc/<pid>/task/<tid>/root */
+ LXPR_PID_TID_STAT, /* /proc/<pid>/task/<tid>/stat */
+ LXPR_PID_TID_STATM, /* /proc/<pid>/task/<tid>/statm */
+ LXPR_PID_TID_STATUS, /* /proc/<pid>/task/<tid>/status */
+ LXPR_PID_TID_FDDIR, /* /proc/<pid>/task/<tid>/fd */
+ LXPR_PID_TID_FD_FD, /* /proc/<pid>/task/<tid>/fd/nn */
+ LXPR_CGROUPS, /* /proc/cgroups */
+ LXPR_CMDLINE, /* /proc/cmdline */
+ LXPR_CPUINFO, /* /proc/cpuinfo */
+ LXPR_DEVICES, /* /proc/devices */
+ LXPR_DISKSTATS, /* /proc/diskstats */
+ LXPR_DMA, /* /proc/dma */
+ LXPR_FILESYSTEMS, /* /proc/filesystems */
+ LXPR_INTERRUPTS, /* /proc/interrupts */
+ LXPR_IOPORTS, /* /proc/ioports */
+ LXPR_KCORE, /* /proc/kcore */
+ LXPR_KMSG, /* /proc/kmsg */
+ LXPR_LOADAVG, /* /proc/loadavg */
+ LXPR_MEMINFO, /* /proc/meminfo */
+ LXPR_MODULES, /* /proc/modules */
+ LXPR_MOUNTS, /* /proc/mounts */
+ LXPR_NETDIR, /* /proc/net */
+ LXPR_NET_ARP, /* /proc/net/arp */
+ LXPR_NET_DEV, /* /proc/net/dev */
+ LXPR_NET_DEV_MCAST, /* /proc/net/dev_mcast */
+ LXPR_NET_IF_INET6, /* /proc/net/if_inet6 */
+ LXPR_NET_IGMP, /* /proc/net/igmp */
+ LXPR_NET_IP_MR_CACHE, /* /proc/net/ip_mr_cache */
+ LXPR_NET_IP_MR_VIF, /* /proc/net/ip_mr_vif */
+ LXPR_NET_IPV6_ROUTE, /* /proc/net/ipv6_route */
+ LXPR_NET_MCFILTER, /* /proc/net/mcfilter */
+ LXPR_NET_NETSTAT, /* /proc/net/netstat */
+ LXPR_NET_RAW, /* /proc/net/raw */
+ LXPR_NET_ROUTE, /* /proc/net/route */
+ LXPR_NET_RPC, /* /proc/net/rpc */
+ LXPR_NET_RT_CACHE, /* /proc/net/rt_cache */
+ LXPR_NET_SOCKSTAT, /* /proc/net/sockstat */
+ LXPR_NET_SNMP, /* /proc/net/snmp */
+ LXPR_NET_STAT, /* /proc/net/stat */
+ LXPR_NET_TCP, /* /proc/net/tcp */
+ LXPR_NET_TCP6, /* /proc/net/tcp6 */
+ LXPR_NET_UDP, /* /proc/net/udp */
+ LXPR_NET_UDP6, /* /proc/net/udp6 */
+ LXPR_NET_UNIX, /* /proc/net/unix */
+ LXPR_PARTITIONS, /* /proc/partitions */
+ LXPR_SELF, /* /proc/self */
+ LXPR_STAT, /* /proc/stat */
+ LXPR_SWAPS, /* /proc/swaps */
+ LXPR_SYSDIR, /* /proc/sys/ */
+ LXPR_SYS_FSDIR, /* /proc/sys/fs/ */
+ LXPR_SYS_FS_INOTIFYDIR, /* /proc/sys/fs/inotify */
+ LXPR_SYS_FS_INOTIFY_MAX_QUEUED_EVENTS, /* inotify/max_queued_events */
+ LXPR_SYS_FS_INOTIFY_MAX_USER_INSTANCES, /* inotify/max_user_instances */
+ LXPR_SYS_FS_INOTIFY_MAX_USER_WATCHES, /* inotify/max_user_watches */
+ LXPR_SYS_KERNELDIR, /* /proc/sys/kernel/ */
+ LXPR_SYS_KERNEL_CAPLCAP, /* /proc/sys/kernel/cap_last_cap */
+ LXPR_SYS_KERNEL_COREPATT, /* /proc/sys/kernel/core_pattern */
+ LXPR_SYS_KERNEL_HOSTNAME, /* /proc/sys/kernel/hostname */
+ LXPR_SYS_KERNEL_MSGMNI, /* /proc/sys/kernel/msgmni */
+ LXPR_SYS_KERNEL_NGROUPS_MAX, /* /proc/sys/kernel/ngroups_max */
+ LXPR_SYS_KERNEL_OSREL, /* /proc/sys/kernel/osrelease */
+ LXPR_SYS_KERNEL_PID_MAX, /* /proc/sys/kernel/pid_max */
+ LXPR_SYS_KERNEL_RANDDIR, /* /proc/sys/kernel/random */
+ LXPR_SYS_KERNEL_RAND_BOOTID, /* /proc/sys/kernel/random/boot_id */
+ LXPR_SYS_KERNEL_SEM, /* /proc/sys/kernel/sem */
+ LXPR_SYS_KERNEL_SHMALL, /* /proc/sys/kernel/shmall */
+ LXPR_SYS_KERNEL_SHMMAX, /* /proc/sys/kernel/shmmax */
+ LXPR_SYS_KERNEL_SHMMNI, /* /proc/sys/kernel/shmmni */
+ LXPR_SYS_KERNEL_THREADS_MAX, /* /proc/sys/kernel/threads-max */
+ LXPR_SYS_NETDIR, /* /proc/sys/net */
+ LXPR_SYS_NET_COREDIR, /* /proc/sys/net/core */
+ LXPR_SYS_NET_CORE_SOMAXCON, /* /proc/sys/net/core/somaxconn */
+ LXPR_SYS_NET_IPV4DIR, /* /proc/sys/net/ipv4 */
+ LXPR_SYS_NET_IPV4_IP_LPORT_RANGE, /* .../net/ipv4/ip_local_port_range */
+ LXPR_SYS_NET_IPV4_TCP_FIN_TO, /* /proc/sys/net/ipv4/tcp_fin_timeout */
+ LXPR_SYS_NET_IPV4_TCP_KA_INT, /* .../net/ipv4/tcp_keepalive_intvl */
+ LXPR_SYS_NET_IPV4_TCP_KA_TIM, /* .../net/ipv4/tcp_keepalive_time */
+ LXPR_SYS_NET_IPV4_TCP_SACK, /* /proc/sys/net/ipv4/tcp_sack */
+ LXPR_SYS_NET_IPV4_TCP_WINSCALE, /* .../net/ipv4/tcp_window_scaling */
+ LXPR_SYS_VMDIR, /* /proc/sys/vm */
+ LXPR_SYS_VM_MAX_MAP_CNT, /* /proc/sys/vm/max_map_count */
+ LXPR_SYS_VM_MINFR_KB, /* /proc/sys/vm/min_free_kbytes */
+ LXPR_SYS_VM_NHUGEP, /* /proc/sys/vm/nr_hugepages */
+ LXPR_SYS_VM_OVERCOMMIT_MEM, /* /proc/sys/vm/overcommit_memory */
+ LXPR_SYS_VM_SWAPPINESS, /* /proc/sys/vm/swappiness */
+ LXPR_UPTIME, /* /proc/uptime */
+ LXPR_VERSION, /* /proc/version */
+ LXPR_VMSTAT, /* /proc/vmstat */
+ LXPR_NFILES /* number of lx /proc file types */
+} lxpr_nodetype_t;
+
+
+/*
+ * Number of fds allowed for in the inode number calculation
+ * per process (if a process has more fds then inode numbers
+ * may be duplicated)
+ */
+#define LXPR_FD_PERPROC 2000
+
+/*
+ * Linux sector size for /proc/diskstats
+ */
+#define LXPR_SECTOR_SIZE 512
+
+/*
+ * external dirent characteristics
+ */
+typedef struct {
+ lxpr_nodetype_t d_type;
+ char *d_name;
+} lxpr_dirent_t;
+
+/*
+ * This is the lxprocfs private data object
+ * which is attached to v_data in the vnode structure
+ */
+typedef struct lxpr_node {
+ lxpr_nodetype_t lxpr_type; /* type of this node */
+ vnode_t *lxpr_vnode; /* vnode for the node */
+ vnode_t *lxpr_parent; /* parent directory */
+ vnode_t *lxpr_realvp; /* real vnode, file in dirs */
+ timestruc_t lxpr_time; /* creation etc time for file */
+ mode_t lxpr_mode; /* file mode bits */
+ uid_t lxpr_uid; /* file owner */
+ gid_t lxpr_gid; /* file group owner */
+ pid_t lxpr_pid; /* pid of proc referred to */
+ uint_t lxpr_desc; /* addl. descriptor (fd or tid) */
+ ino_t lxpr_ino; /* node id */
+} lxpr_node_t;
+
+struct zone; /* forward declaration */
+
+/*
+ * This is the lxprocfs private data object
+ * which is attached to vfs_data in the vfs structure
+ */
+typedef struct lxpr_mnt {
+ lxpr_node_t *lxprm_node; /* node at root of proc mount */
+ struct zone *lxprm_zone; /* zone for this mount */
+ ldi_ident_t lxprm_li; /* ident for ldi */
+} lxpr_mnt_t;
+
+extern vnodeops_t *lxpr_vnodeops;
+extern int nproc_highbit; /* highbit(v.v_nproc) */
+
+typedef struct mounta mounta_t;
+
+extern void lxpr_initnodecache();
+extern void lxpr_fininodecache();
+extern void lxpr_initrootnode(lxpr_node_t **, vfs_t *);
+extern ino_t lxpr_inode(lxpr_nodetype_t, pid_t, int);
+extern ino_t lxpr_parentinode(lxpr_node_t *);
+extern boolean_t lxpr_is_writable(lxpr_nodetype_t);
+extern lxpr_node_t *lxpr_getnode(vnode_t *, lxpr_nodetype_t, proc_t *, int);
+extern void lxpr_freenode(lxpr_node_t *);
+extern vnode_t *lxpr_lookup_fdnode(vnode_t *, const char *);
+extern int lxpr_readlink_fdnode(lxpr_node_t *, char *, size_t);
+
+typedef struct lxpr_uiobuf {
+ uio_t *uiop;
+ char *buffer;
+ uint32_t buffsize;
+ char *pos;
+ size_t beg;
+ int error;
+} lxpr_uiobuf_t;
+
+extern lxpr_uiobuf_t *lxpr_uiobuf_new(uio_t *);
+extern void lxpr_uiobuf_free(lxpr_uiobuf_t *);
+extern int lxpr_uiobuf_flush(lxpr_uiobuf_t *);
+extern void lxpr_uiobuf_seek(lxpr_uiobuf_t *, offset_t);
+extern boolean_t lxpr_uiobuf_nonblock(lxpr_uiobuf_t *);
+extern void lxpr_uiobuf_write(lxpr_uiobuf_t *, const char *, size_t);
+extern void lxpr_uiobuf_printf(lxpr_uiobuf_t *, const char *, ...);
+extern void lxpr_uiobuf_seterr(lxpr_uiobuf_t *, int);
+
+extern int lxpr_core_path_l2s(const char *, char *, size_t);
+extern int lxpr_core_path_s2l(const char *, char *, size_t);
+
+typedef enum lxpr_zombok {
+ NO_ZOMB = 0,
+ ZOMB_OK
+} zombok_t;
+
+proc_t *lxpr_lock(pid_t, zombok_t);
+void lxpr_unlock(proc_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef islower
+#define islower(x) (((unsigned)(x) >= 'a') && ((unsigned)(x) <= 'z'))
+#endif
+#ifndef toupper
+#define toupper(x) (islower(x) ? (x) - 'a' + 'A' : (x))
+#endif
+
+#endif /* _LX_PROC_H */
diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c b/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c
new file mode 100644
index 0000000000..c12118a3ea
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c
@@ -0,0 +1,851 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * lxprsubr.c: Various functions for the /lxproc vnodeops.
+ */
+
+#include <sys/varargs.h>
+
+#include <sys/cpuvar.h>
+#include <sys/mman.h>
+#include <sys/vmsystm.h>
+#include <sys/prsystm.h>
+#include <sys/zfs_ioctl.h>
+
+#include "lx_proc.h"
+
+#define LXPRCACHE_NAME "lxbpr_cache"
+
+static int lxpr_node_constructor(void *, void *, int);
+static void lxpr_node_destructor(void *, void *);
+
+static kmem_cache_t *lxpr_node_cache;
+
+int lx_pr_bufsize = 4000;
+
+struct lxpr_zfs_ds {
+ list_node_t ds_link;
+ char ds_name[MAXPATHLEN];
+ uint64_t ds_cookie;
+};
+
+struct lxpr_uiobuf *
+lxpr_uiobuf_new(uio_t *uiop)
+{
+ /* Allocate memory for both lxpr_uiobuf and output buffer */
+ int bufsize = lx_pr_bufsize;
+ struct lxpr_uiobuf *uiobuf =
+ kmem_alloc(sizeof (struct lxpr_uiobuf) + bufsize, KM_SLEEP);
+
+ uiobuf->uiop = uiop;
+ uiobuf->buffer = (char *)&uiobuf[1];
+ uiobuf->buffsize = bufsize;
+ uiobuf->pos = uiobuf->buffer;
+ uiobuf->beg = 0;
+ uiobuf->error = 0;
+
+ return (uiobuf);
+}
+
+void
+lxpr_uiobuf_free(struct lxpr_uiobuf *uiobuf)
+{
+ ASSERT(uiobuf != NULL);
+ ASSERT(uiobuf->pos == uiobuf->buffer);
+
+ kmem_free(uiobuf, sizeof (struct lxpr_uiobuf) + uiobuf->buffsize);
+}
+
+void
+lxpr_uiobuf_seek(struct lxpr_uiobuf *uiobuf, offset_t offset)
+{
+ uiobuf->uiop->uio_offset = (off_t)offset;
+}
+
+boolean_t
+lxpr_uiobuf_nonblock(struct lxpr_uiobuf *uiobuf)
+{
+ if ((uiobuf->uiop->uio_fmode & FNONBLOCK) != 0)
+ return (B_TRUE);
+ return (B_FALSE);
+}
+
+void
+lxpr_uiobuf_seterr(struct lxpr_uiobuf *uiobuf, int err)
+{
+ ASSERT(uiobuf->error == 0);
+
+ uiobuf->error = err;
+}
+
+int
+lxpr_uiobuf_flush(struct lxpr_uiobuf *uiobuf)
+{
+ off_t off = uiobuf->uiop->uio_offset;
+ caddr_t uaddr = uiobuf->buffer;
+ size_t beg = uiobuf->beg;
+ size_t size = (uintptr_t)uiobuf->pos - (uintptr_t)uaddr;
+
+ if (uiobuf->error == 0 && uiobuf->uiop->uio_resid != 0) {
+ ASSERT(off >= beg);
+
+ if (beg + size > off && off >= 0)
+ uiobuf->error =
+ uiomove(uaddr + (off - beg), size - (off - beg),
+ UIO_READ, uiobuf->uiop);
+
+ uiobuf->beg += size;
+ }
+
+ uiobuf->pos = uaddr;
+
+ return (uiobuf->error);
+}
+
+void
+lxpr_uiobuf_write(struct lxpr_uiobuf *uiobuf, const char *buf, size_t size)
+{
+ /* While we can still carry on */
+ while (uiobuf->error == 0 && uiobuf->uiop->uio_resid != 0) {
+ uintptr_t remain = (uintptr_t)uiobuf->buffsize -
+ ((uintptr_t)uiobuf->pos - (uintptr_t)uiobuf->buffer);
+
+ /* Enough space in buffer? */
+ if (remain >= size) {
+ bcopy(buf, uiobuf->pos, size);
+ uiobuf->pos += size;
+ return;
+ }
+
+ /* Not enough space, so copy all we can and try again */
+ bcopy(buf, uiobuf->pos, remain);
+ uiobuf->pos += remain;
+ (void) lxpr_uiobuf_flush(uiobuf);
+ buf += remain;
+ size -= remain;
+ }
+}
+
+#define TYPBUFFSIZE 256
+
+void
+lxpr_uiobuf_printf(struct lxpr_uiobuf *uiobuf, const char *fmt, ...)
+{
+ va_list args;
+ char buff[TYPBUFFSIZE];
+ int len;
+ char *buffer;
+
+ /* Can we still do any output */
+ if (uiobuf->error != 0 || uiobuf->uiop->uio_resid == 0)
+ return;
+
+ va_start(args, fmt);
+
+ /* Try using stack allocated buffer */
+ len = vsnprintf(buff, TYPBUFFSIZE, fmt, args);
+ if (len < TYPBUFFSIZE) {
+ va_end(args);
+ lxpr_uiobuf_write(uiobuf, buff, len);
+ return;
+ }
+
+ /* Not enough space in pre-allocated buffer */
+ buffer = kmem_alloc(len + 1, KM_SLEEP);
+
+ /*
+ * We know we allocated the correct amount of space
+ * so no check on the return value
+ */
+ (void) vsnprintf(buffer, len+1, fmt, args);
+ lxpr_uiobuf_write(uiobuf, buffer, len);
+ va_end(args);
+ kmem_free(buffer, len+1);
+}
+
+/*
+ * lxpr_lock():
+ *
+ * Lookup process from pid and return with p_plock and P_PR_LOCK held.
+ */
+proc_t *
+lxpr_lock(pid_t pid, zombok_t zombie_ok)
+{
+ proc_t *p;
+ kmutex_t *mp;
+ pid_t find_pid;
+ uint_t flags;
+
+ ASSERT(!MUTEX_HELD(&pidlock));
+
+ for (;;) {
+ mutex_enter(&pidlock);
+
+ /*
+ * If the pid is 1, we really want the zone's init process;
+ * if 0 we want zsched.
+ */
+ if (pid == 1) {
+ find_pid = curproc->p_zone->zone_proc_initpid;
+ } else if (pid == 0) {
+ find_pid = curproc->p_zone->zone_zsched->p_pid;
+ } else {
+ find_pid = pid;
+ }
+ p = prfind(find_pid);
+
+ if (p == NULL || p->p_stat == SIDL) {
+ mutex_exit(&pidlock);
+ return (NULL);
+ }
+
+ /*
+ * p_lock is persistent, but p itself is not -- it could
+ * vanish during cv_wait(). Load p->p_lock now so we can
+ * drop it after cv_wait() without referencing p.
+ */
+ mp = &p->p_lock;
+ mutex_enter(mp);
+
+ mutex_exit(&pidlock);
+
+ /*
+ * Filter out exiting or zombie processes, if requested.
+ */
+ if (zombie_ok == NO_ZOMB &&
+ ((p->p_flag & SEXITING) || p->p_stat == SZOMB)) {
+ mutex_exit(mp);
+ return (NULL);
+ }
+
+ flags = p->p_proc_flag & (P_PR_LOCK | P_PR_EXEC);
+ if (flags == 0) {
+ break;
+ } else if (flags == P_PR_EXEC && p == curproc) {
+ /*
+ * Forward progress with (only) the PR_EXEC flag is
+ * safe if a process is accessing resources in its own
+ * piddir. Executing its own /proc/<pid>/exe symlink
+ * is one potential example.
+ *
+ * For all other processes, it is necessary to wait
+ * until the exec is completed.
+ */
+ break;
+ }
+
+ cv_wait(&pr_pid_cv[p->p_slot], mp);
+ mutex_exit(mp);
+ }
+
+ p->p_proc_flag |= P_PR_LOCK;
+ THREAD_KPRI_REQUEST();
+ return (p);
+}
+
+/*
+ * lxpr_unlock()
+ *
+ * Unlock locked process
+ */
+void
+lxpr_unlock(proc_t *p)
+{
+ ASSERT(p->p_proc_flag & P_PR_LOCK);
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ ASSERT(!MUTEX_HELD(&pidlock));
+
+ cv_signal(&pr_pid_cv[p->p_slot]);
+ p->p_proc_flag &= ~P_PR_LOCK;
+ mutex_exit(&p->p_lock);
+ THREAD_KPRI_RELEASE();
+}
+
+void
+lxpr_initnodecache()
+{
+ lxpr_node_cache = kmem_cache_create(LXPRCACHE_NAME,
+ sizeof (lxpr_node_t), 0,
+ lxpr_node_constructor, lxpr_node_destructor, NULL, NULL, NULL, 0);
+}
+
+void
+lxpr_fininodecache()
+{
+ kmem_cache_destroy(lxpr_node_cache);
+}
+
+/* ARGSUSED */
+static int
+lxpr_node_constructor(void *buf, void *un, int kmflags)
+{
+ lxpr_node_t *lxpnp = buf;
+ vnode_t *vp;
+
+ vp = lxpnp->lxpr_vnode = vn_alloc(kmflags);
+ if (vp == NULL)
+ return (-1);
+
+ (void) vn_setops(vp, lxpr_vnodeops);
+ vp->v_data = lxpnp;
+
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+lxpr_node_destructor(void *buf, void *un)
+{
+ lxpr_node_t *lxpnp = buf;
+
+ vn_free(LXPTOV(lxpnp));
+}
+
+/*
+ * Calculate an inode number
+ *
+ * This takes various bits of info and munges them
+ * to give the inode number for an lxproc node
+ */
+ino_t
+lxpr_inode(lxpr_nodetype_t type, pid_t pid, int desc)
+{
+ if (pid == 1) {
+ pid = curproc->p_zone->zone_proc_initpid;
+ } else if (pid == 0) {
+ pid = curproc->p_zone->zone_zsched->p_pid;
+ }
+
+ switch (type) {
+ case LXPR_PIDDIR:
+ return (maxpid + pid + 1);
+ case LXPR_PID_TASK_IDDIR:
+ return (maxpid + (desc * 10));
+ case LXPR_PROCDIR:
+ return (maxpid + 2);
+ case LXPR_PID_FD_FD:
+ return (maxpid + 2 +
+ (pid * (LXPR_FD_PERPROC + LXPR_NFILES)) +
+ LXPR_NFILES + desc);
+ default:
+ return (maxpid + 2 +
+ (pid * (LXPR_FD_PERPROC + LXPR_NFILES)) +
+ type);
+ }
+}
+
+/*
+ * Return inode number of parent (directory)
+ */
+ino_t
+lxpr_parentinode(lxpr_node_t *lxpnp)
+{
+ /*
+ * If the input node is the root then the parent inode
+ * is the mounted on inode so just return our inode number
+ */
+ if (lxpnp->lxpr_type != LXPR_PROCDIR)
+ return (VTOLXP(lxpnp->lxpr_parent)->lxpr_ino);
+ else
+ return (lxpnp->lxpr_ino);
+}
+
+/*
+ * Allocate a new lxproc node
+ *
+ * This also allocates the vnode associated with it
+ */
+lxpr_node_t *
+lxpr_getnode(vnode_t *dp, lxpr_nodetype_t type, proc_t *p, int desc)
+{
+ lxpr_node_t *lxpnp;
+ vnode_t *vp;
+ user_t *up;
+ timestruc_t now;
+
+ /*
+ * Allocate a new node. It is deallocated in vop_innactive
+ */
+ lxpnp = kmem_cache_alloc(lxpr_node_cache, KM_SLEEP);
+
+ /*
+ * Set defaults (may be overridden below)
+ */
+ gethrestime(&now);
+ lxpnp->lxpr_type = type;
+ lxpnp->lxpr_realvp = NULL;
+ lxpnp->lxpr_parent = dp;
+ lxpnp->lxpr_desc = desc;
+ VN_HOLD(dp);
+ if (p != NULL) {
+ if (p->p_pid == curproc->p_zone->zone_proc_initpid) {
+ lxpnp->lxpr_pid = 1;
+ } else if (p->p_pid == curproc->p_zone->zone_zsched->p_pid) {
+ lxpnp->lxpr_pid = 0;
+ } else {
+ lxpnp->lxpr_pid = p->p_pid;
+ }
+
+ lxpnp->lxpr_time = PTOU(p)->u_start;
+ lxpnp->lxpr_uid = crgetruid(p->p_cred);
+ lxpnp->lxpr_gid = crgetrgid(p->p_cred);
+ lxpnp->lxpr_ino = lxpr_inode(type, p->p_pid, desc);
+ } else {
+ /* Pretend files without a proc belong to sched */
+ lxpnp->lxpr_pid = 0;
+ lxpnp->lxpr_time = now;
+ lxpnp->lxpr_uid = lxpnp->lxpr_gid = 0;
+ lxpnp->lxpr_ino = lxpr_inode(type, 0, 0);
+ }
+
+ /* initialize the vnode data */
+ vp = lxpnp->lxpr_vnode;
+ vn_reinit(vp);
+ vp->v_flag = VNOCACHE|VNOMAP|VNOSWAP|VNOMOUNT;
+ vp->v_vfsp = dp->v_vfsp;
+
+ /*
+ * Do node specific stuff
+ */
+ if (lxpr_is_writable(type)) {
+ /* These two have different modes; handled later. */
+ if (type != LXPR_PID_FD_FD && type != LXPR_PID_TID_FD_FD) {
+ vp->v_type = VREG;
+ lxpnp->lxpr_mode = 0644;
+ return (lxpnp);
+ }
+ }
+
+ switch (type) {
+ case LXPR_PROCDIR:
+ vp->v_flag |= VROOT;
+ vp->v_type = VDIR;
+ lxpnp->lxpr_mode = 0555; /* read-search by everyone */
+ break;
+
+ case LXPR_PID_CURDIR:
+ ASSERT(p != NULL);
+
+ /*
+ * Zombie check. p_stat is officially protected by pidlock,
+ * but we can't grab pidlock here because we already hold
+ * p_lock. Luckily if we look at the process exit code
+ * we see that p_stat only transisions from SRUN to SZOMB
+ * while p_lock is held. Aside from this, the only other
+ * p_stat transition that we need to be aware about is
+ * SIDL to SRUN, but that's not a problem since lxpr_lock()
+ * ignores nodes in the SIDL state so we'll never get a node
+ * that isn't already in the SRUN state.
+ */
+ if (p->p_stat == SZOMB) {
+ lxpnp->lxpr_realvp = NULL;
+ } else {
+ up = PTOU(p);
+ lxpnp->lxpr_realvp = up->u_cdir;
+ ASSERT(lxpnp->lxpr_realvp != NULL);
+ VN_HOLD(lxpnp->lxpr_realvp);
+ }
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0777; /* anyone does anything ! */
+ break;
+
+ case LXPR_PID_ROOTDIR:
+ ASSERT(p != NULL);
+ /* Zombie check. see locking comment above */
+ if (p->p_stat == SZOMB) {
+ lxpnp->lxpr_realvp = NULL;
+ } else {
+ up = PTOU(p);
+ lxpnp->lxpr_realvp =
+ up->u_rdir != NULL ? up->u_rdir : rootdir;
+ ASSERT(lxpnp->lxpr_realvp != NULL);
+ VN_HOLD(lxpnp->lxpr_realvp);
+ }
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0777; /* anyone does anything ! */
+ break;
+
+ case LXPR_PID_EXE:
+ ASSERT(p != NULL);
+ lxpnp->lxpr_realvp = p->p_exec;
+ if (lxpnp->lxpr_realvp != NULL) {
+ VN_HOLD(lxpnp->lxpr_realvp);
+ }
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0777;
+ break;
+
+ case LXPR_SELF:
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0777; /* anyone does anything ! */
+ break;
+
+ case LXPR_PID_TASKDIR:
+ ASSERT(p != NULL);
+ vp->v_type = VDIR;
+ lxpnp->lxpr_mode = 0555; /* read-search by everyone */
+ break;
+
+ case LXPR_PID_TASK_IDDIR:
+ ASSERT(p != NULL);
+ vp->v_type = VDIR;
+ lxpnp->lxpr_mode = 0555; /* read-search by everyone */
+ break;
+
+ case LXPR_PID_FD_FD:
+ case LXPR_PID_TID_FD_FD:
+ ASSERT(p != NULL);
+ /* lxpr_realvp is set after we return */
+ lxpnp->lxpr_mode = 0700; /* read-write-exe owner only */
+ vp->v_type = VLNK;
+ break;
+
+ case LXPR_PID_FDDIR:
+ case LXPR_PID_TID_FDDIR:
+ ASSERT(p != NULL);
+ vp->v_type = VDIR;
+ lxpnp->lxpr_mode = 0500; /* read-search by owner only */
+ break;
+
+ case LXPR_PIDDIR:
+ ASSERT(p != NULL);
+ vp->v_type = VDIR;
+ lxpnp->lxpr_mode = 0511;
+ break;
+
+ case LXPR_NETDIR:
+ case LXPR_SYSDIR:
+ case LXPR_SYS_FSDIR:
+ case LXPR_SYS_FS_INOTIFYDIR:
+ case LXPR_SYS_KERNELDIR:
+ case LXPR_SYS_KERNEL_RANDDIR:
+ case LXPR_SYS_NETDIR:
+ case LXPR_SYS_NET_COREDIR:
+ case LXPR_SYS_NET_IPV4DIR:
+ case LXPR_SYS_VMDIR:
+ vp->v_type = VDIR;
+ lxpnp->lxpr_mode = 0555; /* read-search by all */
+ break;
+
+ case LXPR_PID_ENV:
+ case LXPR_PID_MEM:
+ ASSERT(p != NULL);
+ /*FALLTHRU*/
+ case LXPR_KCORE:
+ vp->v_type = VREG;
+ lxpnp->lxpr_mode = 0400; /* read-only by owner only */
+ break;
+
+ default:
+ vp->v_type = VREG;
+ lxpnp->lxpr_mode = 0444; /* read-only by all */
+ break;
+ }
+
+ return (lxpnp);
+}
+
+
+/*
+ * Free the storage obtained from lxpr_getnode().
+ */
+void
+lxpr_freenode(lxpr_node_t *lxpnp)
+{
+ ASSERT(lxpnp != NULL);
+ ASSERT(LXPTOV(lxpnp) != NULL);
+
+ /*
+ * delete any association with realvp
+ */
+ if (lxpnp->lxpr_realvp != NULL)
+ VN_RELE(lxpnp->lxpr_realvp);
+
+ /*
+ * delete any association with parent vp
+ */
+ if (lxpnp->lxpr_parent != NULL)
+ VN_RELE(lxpnp->lxpr_parent);
+
+ /*
+ * Release the lxprnode.
+ */
+ kmem_cache_free(lxpr_node_cache, lxpnp);
+}
+
+/*
+ * Attempt to locate vnode for /proc/<pid>/fd/<#>.
+ */
+vnode_t *
+lxpr_lookup_fdnode(vnode_t *dvp, const char *name)
+{
+ lxpr_node_t *lxdp = VTOLXP(dvp);
+ lxpr_node_t *lxfp;
+ char *endptr = NULL;
+ long num;
+ int fd;
+ proc_t *p;
+ vnode_t *vp = NULL;
+ file_t *fp;
+ uf_entry_t *ufp;
+ uf_info_t *fip;
+
+ ASSERT(lxdp->lxpr_type == LXPR_PID_FDDIR ||
+ lxdp->lxpr_type == LXPR_PID_TID_FDDIR);
+
+ if (ddi_strtol(name, &endptr, 10, &num) != 0) {
+ return (NULL);
+ } else if (name[0] < '0' || name[0] > '9' || *endptr != '\0') {
+ /*
+ * ddi_strtol allows leading spaces and trailing garbage
+ * We do not tolerate such foolishness.
+ */
+ return (NULL);
+ } else if ((fd = (int)num) < 0) {
+ return (NULL);
+ }
+
+ /* Lock the owner process */
+ p = lxpr_lock(lxdp->lxpr_pid, NO_ZOMB);
+ if ((p == NULL))
+ return (NULL);
+
+ /* Not applicable to processes which are system-owned. */
+ if ((p->p_flag & SSYS) || (p->p_as == &kas)) {
+ lxpr_unlock(p);
+ return (NULL);
+ }
+
+ lxfp = lxpr_getnode(dvp, LXPR_PID_FD_FD, p, fd);
+
+ /*
+ * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from
+ * going away while we dereference into fi_list.
+ */
+ fip = P_FINFO(p);
+ mutex_exit(&p->p_lock);
+ mutex_enter(&fip->fi_lock);
+ if (fd < fip->fi_nfiles) {
+ UF_ENTER(ufp, fip, fd);
+ if ((fp = ufp->uf_file) != NULL) {
+ vp = fp->f_vnode;
+ VN_HOLD(vp);
+ }
+ UF_EXIT(ufp);
+ }
+ mutex_exit(&fip->fi_lock);
+ mutex_enter(&p->p_lock);
+
+ if (vp == NULL) {
+ lxpr_unlock(p);
+ lxpr_freenode(lxfp);
+ return (NULL);
+ } else {
+ /*
+ * Fill in the lxpr_node so future references will be able to
+ * find the underlying vnode. The vnode is held on the realvp.
+ */
+ lxfp->lxpr_realvp = vp;
+
+ /*
+ * For certain entries (sockets, pipes, etc), Linux expects a
+ * bogus-named symlink. If that's the case, report the type as
+ * VNON to bypass link-following elsewhere in the vfs system.
+ *
+ * See lxpr_readlink for more details.
+ */
+ if (lxpr_readlink_fdnode(lxfp, NULL, 0) == 0)
+ LXPTOV(lxfp)->v_type = VNON;
+ }
+
+ lxpr_unlock(p);
+ ASSERT(LXPTOV(lxfp) != NULL);
+ return (LXPTOV(lxfp));
+}
+
+/*
+ * Attempt to create Linux-proc-style fake symlinks contents for supported
+ * /proc/<pid>/fd/<#> entries.
+ */
+int
+lxpr_readlink_fdnode(lxpr_node_t *lxpnp, char *bp, size_t len)
+{
+ const char *format;
+ vnode_t *rvp = lxpnp->lxpr_realvp;
+ vattr_t attr;
+
+ switch (rvp->v_type) {
+ case VSOCK:
+ format = "socket:[%lu]";
+ break;
+ case VFIFO:
+ format = "pipe:[%lu]";
+ break;
+ default:
+ return (-1);
+ }
+
+ /* Fetch the inode of the underlying vnode */
+ if (VOP_GETATTR(rvp, &attr, 0, CRED(), NULL) != 0)
+ return (-1);
+
+ if (bp != NULL)
+ (void) snprintf(bp, len, format, (ino_t)attr.va_nodeid);
+ return (0);
+}
+
+/*
+ * Translate a Linux core_pattern path to a native Illumos one, by replacing
+ * the appropriate % escape sequences.
+ *
+ * Any % escape sequences that are not recognised are double-escaped so that
+ * they will be inserted literally into the path (to mimic Linux).
+ */
+int
+lxpr_core_path_l2s(const char *inp, char *outp, size_t outsz)
+{
+ int i = 0, j = 0;
+ char x;
+
+ while (j < outsz - 1) {
+ x = inp[i++];
+ if (x == '\0')
+ break;
+ if (x != '%') {
+ outp[j++] = x;
+ continue;
+ }
+
+ x = inp[i++];
+ if (x == '\0')
+ break;
+
+ /* Make sure we have enough space in the output buffer. */
+ if (j + 2 >= outsz - 1)
+ return (EINVAL);
+
+ switch (x) {
+ case 'E':
+ if (j + 4 >= outsz - 1)
+ return (EINVAL);
+ outp[j++] = '%';
+ outp[j++] = 'd';
+ outp[j++] = '%';
+ outp[j++] = 'f';
+ break;
+ case 'e':
+ outp[j++] = '%';
+ outp[j++] = 'f';
+ break;
+ case 'p':
+ case 'g':
+ case 'u':
+ case 't':
+ case '%':
+ outp[j++] = '%';
+ outp[j++] = x;
+ break;
+ case 'h':
+ outp[j++] = '%';
+ outp[j++] = 'n';
+ break;
+ default:
+ /* No translation, make it literal. */
+ if (j + 3 >= outsz - 1)
+ return (EINVAL);
+ outp[j++] = '%';
+ outp[j++] = '%';
+ outp[j++] = x;
+ break;
+ }
+ }
+
+ outp[j] = '\0';
+ return (0);
+}
+
+/*
+ * Translate an Illumos core pattern path back to Linux format.
+ */
+int
+lxpr_core_path_s2l(const char *inp, char *outp, size_t outsz)
+{
+ int i = 0, j = 0;
+ char x;
+
+ while (j < outsz - 1) {
+ x = inp[i++];
+ if (x == '\0')
+ break;
+ if (x != '%') {
+ outp[j++] = x;
+ continue;
+ }
+
+ x = inp[i++];
+ if (x == '\0')
+ break;
+
+ /* Make sure we have enough space in the output buffer. */
+ if (j + 2 >= outsz - 1)
+ return (EINVAL);
+
+ switch (x) {
+ case 'd':
+ /* No Linux equivalent unless it's %d%f. */
+ if (inp[i] == '%' && inp[i + 1] == 'f') {
+ i += 2;
+ outp[j++] = '%';
+ outp[j++] = 'E';
+ }
+ break;
+ case 'f':
+ outp[j++] = '%';
+ outp[j++] = 'e';
+ break;
+ case 'p':
+ case 'P':
+ case 'g':
+ case 'u':
+ case 't':
+ case '%':
+ outp[j++] = '%';
+ outp[j++] = (x == 'P' ? 'p' : x);
+ break;
+ case 'n':
+ outp[j++] = '%';
+ outp[j++] = 'h';
+ break;
+ default:
+ /* No translation. */
+ break;
+ }
+ }
+
+ outp[j] = '\0';
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prvfsops.c b/usr/src/uts/common/brand/lx/procfs/lx_prvfsops.c
new file mode 100644
index 0000000000..b4dc5091c2
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/procfs/lx_prvfsops.c
@@ -0,0 +1,377 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
+ */
+
+/*
+ * lxprvfsops.c: vfs operations for /lxprocfs.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/cmn_err.h>
+#include <sys/cred.h>
+#include <sys/debug.h>
+#include <sys/errno.h>
+#include <sys/proc.h>
+#include <sys/stat.h>
+#include <sys/statvfs.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+#include <sys/var.h>
+#include <sys/vfs.h>
+#include <sys/vfs_opreg.h>
+#include <sys/vnode.h>
+#include <sys/mode.h>
+#include <sys/signal.h>
+#include <sys/user.h>
+#include <sys/mount.h>
+#include <sys/bitmap.h>
+#include <sys/kmem.h>
+#include <sys/policy.h>
+#include <sys/modctl.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+#include <sys/lx_impl.h>
+#include <sys/lx_brand.h>
+
+#include "lx_proc.h"
+
+/* Module level parameters */
+static int lxprocfstype;
+static dev_t lxprocdev;
+static kmutex_t lxpr_mount_lock;
+
+int nproc_highbit; /* highbit(v.v_nproc) */
+
+static int lxpr_mount(vfs_t *, vnode_t *, mounta_t *, cred_t *);
+static int lxpr_unmount(vfs_t *, int, cred_t *);
+static int lxpr_root(vfs_t *, vnode_t **);
+static int lxpr_statvfs(vfs_t *, statvfs64_t *);
+static int lxpr_init(int, char *);
+
+static vfsdef_t vfw = {
+ VFSDEF_VERSION,
+ "lx_proc",
+ lxpr_init,
+ VSW_ZMOUNT,
+ NULL
+};
+
+/*
+ * Module linkage information for the kernel.
+ */
+extern struct mod_ops mod_fsops;
+
+static struct modlfs modlfs = {
+ &mod_fsops, "lx brand procfs", &vfw
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, (void *)&modlfs, NULL
+};
+
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int retval;
+
+ /*
+ * attempt to unload the module
+ */
+ if ((retval = mod_remove(&modlinkage)) != 0)
+ goto done;
+
+ /*
+ * destroy lxpr_node cache
+ */
+ lxpr_fininodecache();
+
+ /*
+ * clean out the vfsops and vnodeops
+ */
+ (void) vfs_freevfsops_by_type(lxprocfstype);
+ vn_freevnodeops(lxpr_vnodeops);
+
+ mutex_destroy(&lxpr_mount_lock);
+done:
+ return (retval);
+}
+
+static int
+lxpr_init(int fstype, char *name)
+{
+ static const fs_operation_def_t lxpr_vfsops_template[] = {
+ VFSNAME_MOUNT, { .vfs_mount = lxpr_mount },
+ VFSNAME_UNMOUNT, { .vfs_unmount = lxpr_unmount },
+ VFSNAME_ROOT, { .vfs_root = lxpr_root },
+ VFSNAME_STATVFS, { .vfs_statvfs = lxpr_statvfs },
+ NULL, NULL
+ };
+ extern const fs_operation_def_t lxpr_vnodeops_template[];
+ int error;
+ major_t dev;
+
+ nproc_highbit = highbit(v.v_proc);
+ lxprocfstype = fstype;
+ ASSERT(lxprocfstype != 0);
+
+ mutex_init(&lxpr_mount_lock, NULL, MUTEX_DEFAULT, NULL);
+
+ /*
+ * Associate VFS ops vector with this fstype.
+ */
+ error = vfs_setfsops(fstype, lxpr_vfsops_template, NULL);
+ if (error != 0) {
+ cmn_err(CE_WARN, "lxpr_init: bad vfs ops template");
+ return (error);
+ }
+
+ /*
+ * Set up vnode ops vector too.
+ */
+ error = vn_make_ops(name, lxpr_vnodeops_template, &lxpr_vnodeops);
+ if (error != 0) {
+ (void) vfs_freevfsops_by_type(fstype);
+ cmn_err(CE_WARN, "lxpr_init: bad vnode ops template");
+ return (error);
+ }
+
+ /*
+ * Assign a unique "device" number (used by stat(2)).
+ */
+ if ((dev = getudev()) == (major_t)-1) {
+ cmn_err(CE_WARN, "lxpr_init: can't get unique device number");
+ dev = 0;
+ }
+
+ /*
+ * Make the pseudo device
+ */
+ lxprocdev = makedevice(dev, 0);
+
+ /*
+ * Initialise cache for lxpr_nodes
+ */
+ lxpr_initnodecache();
+
+ return (0);
+}
+
+static int
+lxpr_mount(vfs_t *vfsp, vnode_t *mvp, mounta_t *uap, cred_t *cr)
+{
+ lxpr_mnt_t *lxpr_mnt;
+ zone_t *zone = curproc->p_zone;
+ ldi_ident_t li;
+ int err;
+
+ /*
+ * must be root to mount
+ */
+ if (secpolicy_fs_mount(cr, mvp, vfsp) != 0)
+ return (EPERM);
+
+ /*
+ * mount point must be a directory
+ */
+ if (mvp->v_type != VDIR)
+ return (ENOTDIR);
+
+ /*
+ * Mounting lx_proc is not allowed outside an LX zone.
+ */
+ if (zone->zone_brand != &lx_brand) {
+ return (ENOTSUP);
+ }
+
+ /*
+ * Having the resource be anything but "lxproc" doesn't make sense
+ */
+ vfs_setresource(vfsp, "lxproc", 0);
+
+ lxpr_mnt = kmem_alloc(sizeof (*lxpr_mnt), KM_SLEEP);
+
+ if ((err = ldi_ident_from_mod(&modlinkage, &li)) != 0) {
+ kmem_free(lxpr_mnt, sizeof (*lxpr_mnt));
+ return (err);
+ }
+ lxpr_mnt->lxprm_li = li;
+
+ mutex_enter(&lxpr_mount_lock);
+
+ /*
+ * Ensure we don't allow overlaying mounts
+ */
+ mutex_enter(&mvp->v_lock);
+ if ((uap->flags & MS_OVERLAY) == 0 &&
+ (mvp->v_count > 1 || (mvp->v_flag & VROOT))) {
+ mutex_exit(&mvp->v_lock);
+ mutex_exit(&lxpr_mount_lock);
+ kmem_free(lxpr_mnt, sizeof ((*lxpr_mnt)));
+ return (EBUSY);
+ }
+ mutex_exit(&mvp->v_lock);
+
+ /*
+ * Hold a zone reference for access to the lxzd structure.
+ */
+ zone_hold(lxpr_mnt->lxprm_zone = zone);
+
+ /*
+ * Allocate the first vnode and arbitrarily set the parent vnode to the
+ * mounted over directory
+ */
+ lxpr_mnt->lxprm_node = lxpr_getnode(mvp, LXPR_PROCDIR, NULL, 0);
+
+ /* Correctly set the fs for the root node */
+ lxpr_mnt->lxprm_node->lxpr_vnode->v_vfsp = vfsp;
+
+ vfs_make_fsid(&vfsp->vfs_fsid, lxprocdev, lxprocfstype);
+ vfsp->vfs_bsize = DEV_BSIZE;
+ vfsp->vfs_fstype = lxprocfstype;
+ vfsp->vfs_data = (caddr_t)lxpr_mnt;
+ vfsp->vfs_dev = lxprocdev;
+
+ mutex_exit(&lxpr_mount_lock);
+
+ return (0);
+}
+
+static int
+lxpr_unmount(vfs_t *vfsp, int flag, cred_t *cr)
+{
+ lxpr_mnt_t *lxpr_mnt = (lxpr_mnt_t *)vfsp->vfs_data;
+ vnode_t *vp;
+ int count;
+
+ ASSERT(lxpr_mnt != NULL);
+ vp = LXPTOV(lxpr_mnt->lxprm_node);
+
+ mutex_enter(&lxpr_mount_lock);
+
+ /*
+ * must be root to unmount
+ */
+ if (secpolicy_fs_unmount(cr, vfsp) != 0) {
+ mutex_exit(&lxpr_mount_lock);
+ return (EPERM);
+ }
+
+ /*
+ * forced unmount is not supported by this file system
+ */
+ if (flag & MS_FORCE) {
+ mutex_exit(&lxpr_mount_lock);
+ return (ENOTSUP);
+ }
+
+ /*
+ * Ensure that no vnodes are in use on this mount point.
+ */
+ mutex_enter(&vp->v_lock);
+ count = vp->v_count;
+ mutex_exit(&vp->v_lock);
+ if (count > 1) {
+ mutex_exit(&lxpr_mount_lock);
+ return (EBUSY);
+ }
+
+
+ /*
+ * purge the dnlc cache for vnode entries
+ * associated with this file system
+ */
+ count = dnlc_purge_vfsp(vfsp, 0);
+
+ /*
+ * free up the lxprnode
+ */
+ lxpr_freenode(lxpr_mnt->lxprm_node);
+ zone_rele(lxpr_mnt->lxprm_zone);
+
+ ldi_ident_release(lxpr_mnt->lxprm_li);
+
+ kmem_free(lxpr_mnt, sizeof (*lxpr_mnt));
+
+ mutex_exit(&lxpr_mount_lock);
+
+ return (0);
+}
+
+static int
+lxpr_root(vfs_t *vfsp, vnode_t **vpp)
+{
+ lxpr_node_t *lxpnp = ((lxpr_mnt_t *)vfsp->vfs_data)->lxprm_node;
+ vnode_t *vp = LXPTOV(lxpnp);
+
+ VN_HOLD(vp);
+ *vpp = vp;
+ return (0);
+}
+
+static int
+lxpr_statvfs(vfs_t *vfsp, statvfs64_t *sp)
+{
+ int n;
+ dev32_t d32;
+ extern uint_t nproc;
+
+ n = v.v_proc - nproc;
+
+ bzero((caddr_t)sp, sizeof (*sp));
+ sp->f_bsize = DEV_BSIZE;
+ sp->f_frsize = DEV_BSIZE;
+ sp->f_blocks = (fsblkcnt64_t)0;
+ sp->f_bfree = (fsblkcnt64_t)0;
+ sp->f_bavail = (fsblkcnt64_t)0;
+ sp->f_files = (fsfilcnt64_t)v.v_proc + 2;
+ sp->f_ffree = (fsfilcnt64_t)n;
+ sp->f_favail = (fsfilcnt64_t)n;
+ (void) cmpldev(&d32, vfsp->vfs_dev);
+ sp->f_fsid = d32;
+ /* It is guaranteed that vsw_name will fit in f_basetype */
+ (void) strcpy(sp->f_basetype, vfssw[lxprocfstype].vsw_name);
+ sp->f_flag = vf_to_stf(vfsp->vfs_flag);
+ sp->f_namemax = 64; /* quite arbitrary */
+ bzero(sp->f_fstr, sizeof (sp->f_fstr));
+
+ /* We know f_fstr is 32 chars */
+ (void) strcpy(sp->f_fstr, "/proc");
+ (void) strcpy(&sp->f_fstr[6], "/proc");
+
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
new file mode 100644
index 0000000000..262339c31c
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
@@ -0,0 +1,7085 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
+ */
+
+/*
+ * lx_proc -- a Linux-compatible /proc for the LX brand
+ *
+ * We have -- confusingly -- two implementations of Linux /proc. One is to
+ * support native (but Linux-borne) programs that wish to view the native
+ * system through the Linux /proc model; the other -- this one -- is to
+ * support Linux binaries via the LX brand. These two implementations differ
+ * greatly in their aspirations (and their willingness to bend the truth
+ * of the system to accommodate those aspirations); they should not be unified.
+ */
+
+#include <sys/cpupart.h>
+#include <sys/cpuvar.h>
+#include <sys/session.h>
+#include <sys/vmparam.h>
+#include <sys/mman.h>
+#include <vm/rm.h>
+#include <vm/seg_vn.h>
+#include <sys/sdt.h>
+#include <lx_signum.h>
+#include <sys/strlog.h>
+#include <sys/stropts.h>
+#include <sys/cmn_err.h>
+#include <sys/lx_brand.h>
+#include <lx_auxv.h>
+#include <sys/x86_archext.h>
+#include <sys/archsystm.h>
+#include <sys/fp.h>
+#include <sys/pool_pset.h>
+#include <sys/pset.h>
+#include <sys/zone.h>
+#include <sys/fcntl.h>
+#include <sys/pghw.h>
+#include <sys/vfs_opreg.h>
+#include <sys/param.h>
+#include <sys/utsname.h>
+#include <sys/rctl.h>
+#include <sys/kstat.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_types.h>
+#include <sys/brand.h>
+#include <sys/cred_impl.h>
+#include <sys/tihdr.h>
+#include <sys/corectl.h>
+#include <inet/ip.h>
+#include <inet/ip_ire.h>
+#include <inet/ip6.h>
+#include <inet/ip_if.h>
+#include <inet/tcp.h>
+#include <inet/tcp_impl.h>
+#include <inet/udp_impl.h>
+#include <inet/ipclassifier.h>
+#include <sys/socketvar.h>
+#include <fs/sockfs/socktpi.h>
+
+/* Dependent on procfs */
+extern kthread_t *prchoose(proc_t *);
+extern int prreadargv(proc_t *, char *, size_t, size_t *);
+extern int prreadenvv(proc_t *, char *, size_t, size_t *);
+extern int prreadbuf(proc_t *, uintptr_t, uint8_t *, size_t, size_t *);
+
+#include "lx_proc.h"
+
+extern pgcnt_t swapfs_minfree;
+extern time_t boot_time;
+
+/*
+ * Pointer to the vnode ops vector for this fs.
+ * This is instantiated in lxprinit() in lxpr_vfsops.c
+ */
+vnodeops_t *lxpr_vnodeops;
+
+static int lxpr_open(vnode_t **, int, cred_t *, caller_context_t *);
+static int lxpr_close(vnode_t *, int, int, offset_t, cred_t *,
+ caller_context_t *);
+static int lxpr_create(struct vnode *, char *, struct vattr *, enum vcexcl,
+ int, struct vnode **, struct cred *, int, caller_context_t *, vsecattr_t *);
+static int lxpr_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
+static int lxpr_write(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
+static int lxpr_space(vnode_t *, int, flock64_t *, int, offset_t, cred_t *,
+ caller_context_t *);
+static int lxpr_setattr(vnode_t *, vattr_t *, int, cred_t *,
+ caller_context_t *);
+static int lxpr_getattr(vnode_t *, vattr_t *, int, cred_t *,
+ caller_context_t *);
+static int lxpr_access(vnode_t *, int, int, cred_t *, caller_context_t *);
+static int lxpr_lookup(vnode_t *, char *, vnode_t **,
+ pathname_t *, int, vnode_t *, cred_t *, caller_context_t *, int *,
+ pathname_t *);
+static int lxpr_readdir(vnode_t *, uio_t *, cred_t *, int *,
+ caller_context_t *, int);
+static int lxpr_readlink(vnode_t *, uio_t *, cred_t *, caller_context_t *);
+static int lxpr_cmp(vnode_t *, vnode_t *, caller_context_t *);
+static int lxpr_realvp(vnode_t *, vnode_t **, caller_context_t *);
+static int lxpr_sync(void);
+static void lxpr_inactive(vnode_t *, cred_t *, caller_context_t *);
+
+static vnode_t *lxpr_lookup_procdir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_piddir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_not_a_dir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_fddir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_netdir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_sysdir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_sys_fsdir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_sys_fs_inotifydir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_sys_kerneldir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_sys_kdir_randdir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_sys_netdir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_sys_net_coredir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_sys_net_ipv4dir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_sys_vmdir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_taskdir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_task_tid_dir(vnode_t *, char *);
+
+static int lxpr_readdir_procdir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_piddir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_not_a_dir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_fddir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_netdir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_sysdir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_sys_fsdir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_sys_fs_inotifydir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_sys_kerneldir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_sys_kdir_randdir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_sys_netdir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_sys_net_coredir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_sys_net_ipv4dir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_sys_vmdir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_taskdir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_task_tid_dir(lxpr_node_t *, uio_t *, int *);
+
+static void lxpr_read_invalid(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_empty(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_cgroups(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_cpuinfo(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_devices(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_diskstats(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_isdir(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_fd(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_filesystems(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_kmsg(lxpr_node_t *, lxpr_uiobuf_t *, ldi_handle_t);
+static void lxpr_read_loadavg(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_meminfo(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_mounts(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_partitions(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_stat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_swaps(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_uptime(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_version(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_vmstat(lxpr_node_t *, lxpr_uiobuf_t *);
+
+static void lxpr_read_pid_auxv(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_cgroup(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_cmdline(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_comm(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_env(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_limits(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_loginuid(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_maps(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_mountinfo(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_oom_scr_adj(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_personality(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_stat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_statm(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_status(lxpr_node_t *, lxpr_uiobuf_t *);
+
+static void lxpr_read_pid_tid_stat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_tid_status(lxpr_node_t *, lxpr_uiobuf_t *);
+
+static void lxpr_read_net_arp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_dev(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_dev_mcast(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_if_inet6(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_igmp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_ip_mr_cache(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_ip_mr_vif(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_ipv6_route(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_mcfilter(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_netstat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_raw(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_route(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_rpc(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_rt_cache(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_sockstat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_snmp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_stat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_tcp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_tcp6(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_udp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_udp6(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_unix(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_fs_inotify_max_queued_events(lxpr_node_t *,
+ lxpr_uiobuf_t *);
+static void lxpr_read_sys_fs_inotify_max_user_instances(lxpr_node_t *,
+ lxpr_uiobuf_t *);
+static void lxpr_read_sys_fs_inotify_max_user_watches(lxpr_node_t *,
+ lxpr_uiobuf_t *);
+static void lxpr_read_sys_kernel_caplcap(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_kernel_corepatt(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_kernel_hostname(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_kernel_msgmni(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_kernel_ngroups_max(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_kernel_osrel(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_kernel_pid_max(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_kernel_rand_bootid(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_kernel_sem(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_kernel_shmall(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_kernel_shmmax(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_kernel_shmmni(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_kernel_threads_max(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_net_core_somaxc(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_net_ipv4_ip_lport_range(lxpr_node_t *,
+ lxpr_uiobuf_t *);
+static void lxpr_read_sys_net_ipv4_tcp_fin_to(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_net_ipv4_tcp_ka_int(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_net_ipv4_tcp_ka_tim(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_net_ipv4_tcp_sack(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_net_ipv4_tcp_winscale(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_vm_max_map_cnt(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_vm_minfr_kb(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_vm_nhpages(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_vm_overcommit_mem(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_sys_vm_swappiness(lxpr_node_t *, lxpr_uiobuf_t *);
+
+static int lxpr_write_pid_loginuid(lxpr_node_t *, uio_t *, cred_t *,
+ caller_context_t *);
+static int lxpr_write_sys_net_core_somaxc(lxpr_node_t *, uio_t *, cred_t *,
+ caller_context_t *);
+static int lxpr_write_sys_net_ipv4_ip_lport_range(lxpr_node_t *, uio_t *,
+ cred_t *, caller_context_t *);
+static int lxpr_write_sys_net_ipv4_tcp_fin_to(lxpr_node_t *, uio_t *, cred_t *,
+ caller_context_t *);
+static int lxpr_write_sys_net_ipv4_tcp_ka_int(lxpr_node_t *, uio_t *,
+ cred_t *, caller_context_t *);
+static int lxpr_write_sys_net_ipv4_tcp_ka_tim(lxpr_node_t *, uio_t *,
+ cred_t *, caller_context_t *);
+static int lxpr_write_sys_net_ipv4_tcp_sack(lxpr_node_t *, uio_t *,
+ cred_t *, caller_context_t *);
+static int lxpr_write_sys_net_ipv4_tcp_winscale(lxpr_node_t *, uio_t *,
+ cred_t *, caller_context_t *);
+static int lxpr_write_sys_kernel_corepatt(lxpr_node_t *, uio_t *, cred_t *,
+ caller_context_t *);
+
+/*
+ * Simple conversion
+ */
+#define btok(x) ((x) >> 10) /* bytes to kbytes */
+#define ptok(x) ((x) << (PAGESHIFT - 10)) /* pages to kbytes */
+
+#define ttolxlwp(t) ((struct lx_lwp_data *)ttolwpbrand(t))
+
+extern rctl_hndl_t rc_process_semmsl;
+extern rctl_hndl_t rc_process_semopm;
+extern rctl_hndl_t rc_zone_semmni;
+
+extern rctl_hndl_t rc_zone_msgmni;
+extern rctl_hndl_t rc_zone_shmmax;
+extern rctl_hndl_t rc_zone_shmmni;
+#define FOURGB 4294967295
+
+/*
+ * The maximum length of the concatenation of argument vector strings we
+ * will return to the user via the branded procfs. Likewise for the env vector.
+ */
+int lxpr_maxargvlen = 4096;
+int lxpr_maxenvvlen = 4096;
+
+/*
+ * The lx /proc vnode operations vector
+ */
+const fs_operation_def_t lxpr_vnodeops_template[] = {
+ VOPNAME_OPEN, { .vop_open = lxpr_open },
+ VOPNAME_CLOSE, { .vop_close = lxpr_close },
+ VOPNAME_READ, { .vop_read = lxpr_read },
+ VOPNAME_WRITE, { .vop_read = lxpr_write },
+ VOPNAME_GETATTR, { .vop_getattr = lxpr_getattr },
+ VOPNAME_ACCESS, { .vop_access = lxpr_access },
+ VOPNAME_LOOKUP, { .vop_lookup = lxpr_lookup },
+ VOPNAME_CREATE, { .vop_create = lxpr_create },
+ VOPNAME_READDIR, { .vop_readdir = lxpr_readdir },
+ VOPNAME_READLINK, { .vop_readlink = lxpr_readlink },
+ VOPNAME_SPACE, { .vop_space = lxpr_space },
+ VOPNAME_SETATTR, { .vop_setattr = lxpr_setattr },
+ VOPNAME_FSYNC, { .error = lxpr_sync },
+ VOPNAME_SEEK, { .error = lxpr_sync },
+ VOPNAME_INACTIVE, { .vop_inactive = lxpr_inactive },
+ VOPNAME_CMP, { .vop_cmp = lxpr_cmp },
+ VOPNAME_REALVP, { .vop_realvp = lxpr_realvp },
+ NULL, NULL
+};
+
+
+/*
+ * file contents of an lx /proc directory.
+ */
+static lxpr_dirent_t lx_procdir[] = {
+ { LXPR_CGROUPS, "cgroups" },
+ { LXPR_CMDLINE, "cmdline" },
+ { LXPR_CPUINFO, "cpuinfo" },
+ { LXPR_DEVICES, "devices" },
+ { LXPR_DISKSTATS, "diskstats" },
+ { LXPR_DMA, "dma" },
+ { LXPR_FILESYSTEMS, "filesystems" },
+ { LXPR_INTERRUPTS, "interrupts" },
+ { LXPR_IOPORTS, "ioports" },
+ { LXPR_KCORE, "kcore" },
+ { LXPR_KMSG, "kmsg" },
+ { LXPR_LOADAVG, "loadavg" },
+ { LXPR_MEMINFO, "meminfo" },
+ { LXPR_MODULES, "modules" },
+ { LXPR_MOUNTS, "mounts" },
+ { LXPR_NETDIR, "net" },
+ { LXPR_PARTITIONS, "partitions" },
+ { LXPR_SELF, "self" },
+ { LXPR_STAT, "stat" },
+ { LXPR_SWAPS, "swaps" },
+ { LXPR_SYSDIR, "sys" },
+ { LXPR_UPTIME, "uptime" },
+ { LXPR_VERSION, "version" },
+ { LXPR_VMSTAT, "vmstat" }
+};
+
+#define PROCDIRFILES (sizeof (lx_procdir) / sizeof (lx_procdir[0]))
+
+/*
+ * Contents of an lx /proc/<pid> directory.
+ */
+static lxpr_dirent_t piddir[] = {
+ { LXPR_PID_AUXV, "auxv" },
+ { LXPR_PID_CGROUP, "cgroup" },
+ { LXPR_PID_CMDLINE, "cmdline" },
+ { LXPR_PID_COMM, "comm" },
+ { LXPR_PID_CPU, "cpu" },
+ { LXPR_PID_CURDIR, "cwd" },
+ { LXPR_PID_ENV, "environ" },
+ { LXPR_PID_EXE, "exe" },
+ { LXPR_PID_LIMITS, "limits" },
+ { LXPR_PID_LOGINUID, "loginuid" },
+ { LXPR_PID_MAPS, "maps" },
+ { LXPR_PID_MEM, "mem" },
+ { LXPR_PID_MOUNTINFO, "mountinfo" },
+ { LXPR_PID_OOM_SCR_ADJ, "oom_score_adj" },
+ { LXPR_PID_PERSONALITY, "personality" },
+ { LXPR_PID_ROOTDIR, "root" },
+ { LXPR_PID_STAT, "stat" },
+ { LXPR_PID_STATM, "statm" },
+ { LXPR_PID_STATUS, "status" },
+ { LXPR_PID_TASKDIR, "task" },
+ { LXPR_PID_FDDIR, "fd" }
+};
+
+#define PIDDIRFILES (sizeof (piddir) / sizeof (piddir[0]))
+
+/*
+ * Contents of an lx /proc/<pid>/task/<tid> directory.
+ */
+static lxpr_dirent_t tiddir[] = {
+ { LXPR_PID_TID_AUXV, "auxv" },
+ { LXPR_PID_CGROUP, "cgroup" },
+ { LXPR_PID_CMDLINE, "cmdline" },
+ { LXPR_PID_TID_COMM, "comm" },
+ { LXPR_PID_CPU, "cpu" },
+ { LXPR_PID_CURDIR, "cwd" },
+ { LXPR_PID_ENV, "environ" },
+ { LXPR_PID_EXE, "exe" },
+ { LXPR_PID_LIMITS, "limits" },
+ { LXPR_PID_LOGINUID, "loginuid" },
+ { LXPR_PID_MAPS, "maps" },
+ { LXPR_PID_MEM, "mem" },
+ { LXPR_PID_MOUNTINFO, "mountinfo" },
+ { LXPR_PID_TID_OOM_SCR_ADJ, "oom_score_adj" },
+ { LXPR_PID_PERSONALITY, "personality" },
+ { LXPR_PID_ROOTDIR, "root" },
+ { LXPR_PID_TID_STAT, "stat" },
+ { LXPR_PID_STATM, "statm" },
+ { LXPR_PID_TID_STATUS, "status" },
+ { LXPR_PID_FDDIR, "fd" }
+};
+
+#define TIDDIRFILES (sizeof (tiddir) / sizeof (tiddir[0]))
+
+#define LX_RLIM_INFINITY 0xFFFFFFFFFFFFFFFF
+
+#define RCTL_INFINITE(x) \
+ ((x.rcv_flagaction & RCTL_LOCAL_MAXIMAL) && \
+ (x.rcv_flagaction & RCTL_GLOBAL_INFINITE))
+
+typedef struct lxpr_rlimtab {
+ char *rlim_name; /* limit name */
+ char *rlim_unit; /* limit unit */
+ char *rlim_rctl; /* rctl source */
+} lxpr_rlimtab_t;
+
+static lxpr_rlimtab_t lxpr_rlimtab[] = {
+ { "Max cpu time", "seconds", "process.max-cpu-time" },
+ { "Max file size", "bytes", "process.max-file-size" },
+ { "Max data size", "bytes", "process.max-data-size" },
+ { "Max stack size", "bytes", "process.max-stack-size" },
+ { "Max core file size", "bytes", "process.max-core-size" },
+ { "Max resident set", "bytes", "zone.max-physical-memory" },
+ { "Max processes", "processes", "zone.max-lwps" },
+ { "Max open files", "files", "process.max-file-descriptor" },
+ { "Max locked memory", "bytes", "zone.max-locked-memory" },
+ { "Max address space", "bytes", "process.max-address-space" },
+ { "Max file locks", "locks", NULL },
+ { "Max pending signals", "signals",
+ "process.max-sigqueue-size" },
+ { "Max msgqueue size", "bytes", "process.max-msg-messages" }
+};
+
+#define LX_RLIM_TAB_LEN (sizeof (lxpr_rlimtab) / sizeof (lxpr_rlimtab[0]))
+
+
+/*
+ * contents of lx /proc/net directory
+ */
+static lxpr_dirent_t netdir[] = {
+ { LXPR_NET_ARP, "arp" },
+ { LXPR_NET_DEV, "dev" },
+ { LXPR_NET_DEV_MCAST, "dev_mcast" },
+ { LXPR_NET_IF_INET6, "if_inet6" },
+ { LXPR_NET_IGMP, "igmp" },
+ { LXPR_NET_IP_MR_CACHE, "ip_mr_cache" },
+ { LXPR_NET_IP_MR_VIF, "ip_mr_vif" },
+ { LXPR_NET_IPV6_ROUTE, "ipv6_route" },
+ { LXPR_NET_MCFILTER, "mcfilter" },
+ { LXPR_NET_NETSTAT, "netstat" },
+ { LXPR_NET_RAW, "raw" },
+ { LXPR_NET_ROUTE, "route" },
+ { LXPR_NET_RPC, "rpc" },
+ { LXPR_NET_RT_CACHE, "rt_cache" },
+ { LXPR_NET_SOCKSTAT, "sockstat" },
+ { LXPR_NET_SNMP, "snmp" },
+ { LXPR_NET_STAT, "stat" },
+ { LXPR_NET_TCP, "tcp" },
+ { LXPR_NET_TCP6, "tcp6" },
+ { LXPR_NET_UDP, "udp" },
+ { LXPR_NET_UDP6, "udp6" },
+ { LXPR_NET_UNIX, "unix" }
+};
+
+#define NETDIRFILES (sizeof (netdir) / sizeof (netdir[0]))
+
+/*
+ * contents of /proc/sys directory
+ */
+static lxpr_dirent_t sysdir[] = {
+ { LXPR_SYS_FSDIR, "fs" },
+ { LXPR_SYS_KERNELDIR, "kernel" },
+ { LXPR_SYS_NETDIR, "net" },
+ { LXPR_SYS_VMDIR, "vm" },
+};
+
+#define SYSDIRFILES (sizeof (sysdir) / sizeof (sysdir[0]))
+
+/*
+ * contents of /proc/sys/fs directory
+ */
+static lxpr_dirent_t sys_fsdir[] = {
+ { LXPR_SYS_FS_INOTIFYDIR, "inotify" },
+};
+
+#define SYS_FSDIRFILES (sizeof (sys_fsdir) / sizeof (sys_fsdir[0]))
+
+/*
+ * contents of /proc/sys/fs/inotify directory
+ */
+static lxpr_dirent_t sys_fs_inotifydir[] = {
+ { LXPR_SYS_FS_INOTIFY_MAX_QUEUED_EVENTS, "max_queued_events" },
+ { LXPR_SYS_FS_INOTIFY_MAX_USER_INSTANCES, "max_user_instances" },
+ { LXPR_SYS_FS_INOTIFY_MAX_USER_WATCHES, "max_user_watches" },
+};
+
+#define SYS_FS_INOTIFYDIRFILES \
+ (sizeof (sys_fs_inotifydir) / sizeof (sys_fs_inotifydir[0]))
+
+/*
+ * contents of /proc/sys/kernel directory
+ */
+static lxpr_dirent_t sys_kerneldir[] = {
+ { LXPR_SYS_KERNEL_CAPLCAP, "cap_last_cap" },
+ { LXPR_SYS_KERNEL_COREPATT, "core_pattern" },
+ { LXPR_SYS_KERNEL_HOSTNAME, "hostname" },
+ { LXPR_SYS_KERNEL_MSGMNI, "msgmni" },
+ { LXPR_SYS_KERNEL_NGROUPS_MAX, "ngroups_max" },
+ { LXPR_SYS_KERNEL_OSREL, "osrelease" },
+ { LXPR_SYS_KERNEL_PID_MAX, "pid_max" },
+ { LXPR_SYS_KERNEL_RANDDIR, "random" },
+ { LXPR_SYS_KERNEL_SEM, "sem" },
+ { LXPR_SYS_KERNEL_SHMALL, "shmall" },
+ { LXPR_SYS_KERNEL_SHMMAX, "shmmax" },
+ { LXPR_SYS_KERNEL_SHMMNI, "shmmni" },
+ { LXPR_SYS_KERNEL_THREADS_MAX, "threads-max" },
+};
+
+#define SYS_KERNELDIRFILES (sizeof (sys_kerneldir) / sizeof (sys_kerneldir[0]))
+
+/*
+ * contents of /proc/sys/kernel/random directory
+ */
+static lxpr_dirent_t sys_randdir[] = {
+ { LXPR_SYS_KERNEL_RAND_BOOTID, "boot_id" },
+};
+
+#define SYS_RANDDIRFILES (sizeof (sys_randdir) / sizeof (sys_randdir[0]))
+
+/*
+ * contents of /proc/sys/net directory
+ */
+static lxpr_dirent_t sys_netdir[] = {
+ { LXPR_SYS_NET_COREDIR, "core" },
+ { LXPR_SYS_NET_IPV4DIR, "ipv4" },
+};
+
+#define SYS_NETDIRFILES (sizeof (sys_netdir) / sizeof (sys_netdir[0]))
+
+/*
+ * contents of /proc/sys/net/core directory
+ */
+static lxpr_dirent_t sys_net_coredir[] = {
+ { LXPR_SYS_NET_CORE_SOMAXCON, "somaxconn" },
+};
+
+#define SYS_NET_COREDIRFILES \
+ (sizeof (sys_net_coredir) / sizeof (sys_net_coredir[0]))
+
+/*
+ * contents of /proc/sys/net/ipv4 directory
+ * See the Linux ip(7) & tcp(7) man pages for descriptions and the illumos
+ * ip(7p) & tcp(7p) man pages for the native descriptions.
+ */
+static lxpr_dirent_t sys_net_ipv4dir[] = {
+ { LXPR_SYS_NET_IPV4_IP_LPORT_RANGE, "ip_local_port_range" },
+ { LXPR_SYS_NET_IPV4_TCP_FIN_TO, "tcp_fin_timeout" },
+ { LXPR_SYS_NET_IPV4_TCP_KA_INT, "tcp_keepalive_intvl" },
+ { LXPR_SYS_NET_IPV4_TCP_KA_TIM, "tcp_keepalive_time" },
+ { LXPR_SYS_NET_IPV4_TCP_SACK, "tcp_sack" },
+ { LXPR_SYS_NET_IPV4_TCP_WINSCALE, "tcp_window_scaling" },
+};
+
+#define SYS_NET_IPV4DIRFILES \
+ (sizeof (sys_net_ipv4dir) / sizeof (sys_net_ipv4dir[0]))
+
+/*
+ * contents of /proc/sys/vm directory
+ */
+static lxpr_dirent_t sys_vmdir[] = {
+ { LXPR_SYS_VM_MAX_MAP_CNT, "max_map_count" },
+ { LXPR_SYS_VM_MINFR_KB, "min_free_kbytes" },
+ { LXPR_SYS_VM_NHUGEP, "nr_hugepages" },
+ { LXPR_SYS_VM_OVERCOMMIT_MEM, "overcommit_memory" },
+ { LXPR_SYS_VM_SWAPPINESS, "swappiness" },
+};
+
+#define SYS_VMDIRFILES (sizeof (sys_vmdir) / sizeof (sys_vmdir[0]))
+
+/*
+ * Table for standard writable files. Non-standard writable files not in this
+ * table can be handled explicitly as special cases.
+ * This table drives lxpr_is_writable, lxpr_write, and lxpr_create.
+ * Note that the entries LXPR_PID_FD_FD and LXPR_PID_TID_FD_FD exist in the
+ * table both to verify writability and to satisfy opening with O_CREATE.
+ */
+typedef struct wftab {
+ lxpr_nodetype_t wft_type; /* file entry type */
+ int (*wft_wrf)(lxpr_node_t *, struct uio *, cred_t *,
+ caller_context_t *); /* write function */
+} wftab_t;
+
+static wftab_t wr_tab[] = {
+ {LXPR_PID_FD_FD, NULL},
+ {LXPR_PID_LOGINUID, lxpr_write_pid_loginuid},
+ {LXPR_PID_OOM_SCR_ADJ, NULL},
+ {LXPR_PID_TID_FD_FD, NULL},
+ {LXPR_PID_TID_OOM_SCR_ADJ, NULL},
+ {LXPR_SYS_KERNEL_COREPATT, lxpr_write_sys_kernel_corepatt},
+ {LXPR_SYS_KERNEL_SHMALL, NULL},
+ {LXPR_SYS_KERNEL_SHMMAX, NULL},
+ {LXPR_SYS_NET_CORE_SOMAXCON, lxpr_write_sys_net_core_somaxc},
+ {LXPR_SYS_NET_IPV4_IP_LPORT_RANGE,
+ lxpr_write_sys_net_ipv4_ip_lport_range},
+ {LXPR_SYS_NET_IPV4_TCP_FIN_TO, lxpr_write_sys_net_ipv4_tcp_fin_to},
+ {LXPR_SYS_NET_IPV4_TCP_KA_INT, lxpr_write_sys_net_ipv4_tcp_ka_int},
+ {LXPR_SYS_NET_IPV4_TCP_KA_TIM, lxpr_write_sys_net_ipv4_tcp_ka_tim},
+ {LXPR_SYS_NET_IPV4_TCP_SACK, lxpr_write_sys_net_ipv4_tcp_sack},
+ {LXPR_SYS_NET_IPV4_TCP_WINSCALE, lxpr_write_sys_net_ipv4_tcp_winscale},
+ {LXPR_SYS_VM_OVERCOMMIT_MEM, NULL},
+ {LXPR_SYS_VM_SWAPPINESS, NULL},
+ {LXPR_INVALID, NULL}
+};
+
+/*
+ * Centralized test for the standard writable proc files. Other non-standard
+ * writable files might be handled separately.
+ */
+boolean_t
+lxpr_is_writable(lxpr_nodetype_t type)
+{
+ int i;
+
+ for (i = 0; wr_tab[i].wft_type != LXPR_INVALID; i++) {
+ if (wr_tab[i].wft_type == type)
+ return (B_TRUE);
+ }
+ return (B_FALSE);
+}
+
+/*
+ * lxpr_open(): Vnode operation for VOP_OPEN()
+ */
+static int
+lxpr_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
+{
+ vnode_t *vp = *vpp;
+ lxpr_node_t *lxpnp = VTOLXP(vp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ vnode_t *rvp;
+ int error = 0;
+
+ /* Restrict writes to certain files */
+ if ((flag & FWRITE) && !lxpr_is_writable(type)) {
+ return (EPERM);
+ }
+
+ /*
+ * If we are opening an underlying file only allow regular files,
+ * fifos or sockets; reject the open for anything else.
+ * Just do it if we are opening the current or root directory.
+ */
+ if (lxpnp->lxpr_realvp != NULL) {
+ rvp = lxpnp->lxpr_realvp;
+
+ if (type == LXPR_PID_FD_FD && rvp->v_type != VREG &&
+ rvp->v_type != VFIFO && rvp->v_type != VSOCK) {
+ error = EACCES;
+ } else {
+ if (type == LXPR_PID_FD_FD && rvp->v_type == VFIFO) {
+ /*
+ * This flag lets the fifo open know that
+ * we're using proc/fd to open a fd which we
+ * already have open. Otherwise, the fifo might
+ * reject an open if the other end has closed.
+ */
+ flag |= FKLYR;
+ }
+ /*
+ * Need to hold rvp since VOP_OPEN() may release it.
+ */
+ VN_HOLD(rvp);
+ error = VOP_OPEN(&rvp, flag, cr, ct);
+ if (error) {
+ VN_RELE(rvp);
+ } else {
+ *vpp = rvp;
+ VN_RELE(vp);
+ }
+ }
+ }
+
+ return (error);
+}
+
+
+/*
+ * lxpr_close(): Vnode operation for VOP_CLOSE()
+ */
+/* ARGSUSED */
+static int
+lxpr_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
+ caller_context_t *ct)
+{
+ lxpr_node_t *lxpr = VTOLXP(vp);
+ lxpr_nodetype_t type = lxpr->lxpr_type;
+
+ /*
+ * we should never get here because the close is done on the realvp
+ * for these nodes
+ */
+ ASSERT(type != LXPR_PID_FD_FD &&
+ type != LXPR_PID_CURDIR &&
+ type != LXPR_PID_ROOTDIR &&
+ type != LXPR_PID_EXE);
+
+ return (0);
+}
+
+static void (*lxpr_read_function[LXPR_NFILES])() = {
+ NULL, /* invalid */
+ lxpr_read_isdir, /* /proc */
+ lxpr_read_isdir, /* /proc/<pid> */
+ lxpr_read_pid_auxv, /* /proc/<pid>/auxv */
+ lxpr_read_pid_cgroup, /* /proc/<pid>/cgroup */
+ lxpr_read_pid_cmdline, /* /proc/<pid>/cmdline */
+ lxpr_read_pid_comm, /* /proc/<pid>/comm */
+ lxpr_read_empty, /* /proc/<pid>/cpu */
+ lxpr_read_invalid, /* /proc/<pid>/cwd */
+ lxpr_read_pid_env, /* /proc/<pid>/environ */
+ lxpr_read_invalid, /* /proc/<pid>/exe */
+ lxpr_read_pid_limits, /* /proc/<pid>/limits */
+ lxpr_read_pid_loginuid, /* /proc/<pid>/loginuid */
+ lxpr_read_pid_maps, /* /proc/<pid>/maps */
+ lxpr_read_empty, /* /proc/<pid>/mem */
+ lxpr_read_pid_mountinfo, /* /proc/<pid>/mountinfo */
+ lxpr_read_pid_oom_scr_adj, /* /proc/<pid>/oom_score_adj */
+ lxpr_read_pid_personality, /* /proc/<pid>/personality */
+ lxpr_read_invalid, /* /proc/<pid>/root */
+ lxpr_read_pid_stat, /* /proc/<pid>/stat */
+ lxpr_read_pid_statm, /* /proc/<pid>/statm */
+ lxpr_read_pid_status, /* /proc/<pid>/status */
+ lxpr_read_isdir, /* /proc/<pid>/task */
+ lxpr_read_isdir, /* /proc/<pid>/task/nn */
+ lxpr_read_isdir, /* /proc/<pid>/fd */
+ lxpr_read_fd, /* /proc/<pid>/fd/nn */
+ lxpr_read_pid_auxv, /* /proc/<pid>/task/<tid>/auxv */
+ lxpr_read_pid_cgroup, /* /proc/<pid>/task/<tid>/cgroup */
+ lxpr_read_pid_cmdline, /* /proc/<pid>/task/<tid>/cmdline */
+ lxpr_read_pid_comm, /* /proc/<pid>/task/<tid>/comm */
+ lxpr_read_empty, /* /proc/<pid>/task/<tid>/cpu */
+ lxpr_read_invalid, /* /proc/<pid>/task/<tid>/cwd */
+ lxpr_read_pid_env, /* /proc/<pid>/task/<tid>/environ */
+ lxpr_read_invalid, /* /proc/<pid>/task/<tid>/exe */
+ lxpr_read_pid_limits, /* /proc/<pid>/task/<tid>/limits */
+ lxpr_read_pid_loginuid, /* /proc/<pid>/task/<tid>/loginuid */
+ lxpr_read_pid_maps, /* /proc/<pid>/task/<tid>/maps */
+ lxpr_read_empty, /* /proc/<pid>/task/<tid>/mem */
+ lxpr_read_pid_mountinfo, /* /proc/<pid>/task/<tid>/mountinfo */
+ lxpr_read_pid_oom_scr_adj, /* /proc/<pid>/task/<tid>/oom_scr_adj */
+ lxpr_read_pid_personality, /* /proc/<pid>/task/<tid>/personality */
+ lxpr_read_invalid, /* /proc/<pid>/task/<tid>/root */
+ lxpr_read_pid_tid_stat, /* /proc/<pid>/task/<tid>/stat */
+ lxpr_read_pid_statm, /* /proc/<pid>/task/<tid>/statm */
+ lxpr_read_pid_tid_status, /* /proc/<pid>/task/<tid>/status */
+ lxpr_read_isdir, /* /proc/<pid>/task/<tid>/fd */
+ lxpr_read_fd, /* /proc/<pid>/task/<tid>/fd/nn */
+ lxpr_read_cgroups, /* /proc/cgroups */
+ lxpr_read_empty, /* /proc/cmdline */
+ lxpr_read_cpuinfo, /* /proc/cpuinfo */
+ lxpr_read_devices, /* /proc/devices */
+ lxpr_read_diskstats, /* /proc/diskstats */
+ lxpr_read_empty, /* /proc/dma */
+ lxpr_read_filesystems, /* /proc/filesystems */
+ lxpr_read_empty, /* /proc/interrupts */
+ lxpr_read_empty, /* /proc/ioports */
+ lxpr_read_empty, /* /proc/kcore */
+ lxpr_read_invalid, /* /proc/kmsg -- see lxpr_read() */
+ lxpr_read_loadavg, /* /proc/loadavg */
+ lxpr_read_meminfo, /* /proc/meminfo */
+ lxpr_read_empty, /* /proc/modules */
+ lxpr_read_mounts, /* /proc/mounts */
+ lxpr_read_isdir, /* /proc/net */
+ lxpr_read_net_arp, /* /proc/net/arp */
+ lxpr_read_net_dev, /* /proc/net/dev */
+ lxpr_read_net_dev_mcast, /* /proc/net/dev_mcast */
+ lxpr_read_net_if_inet6, /* /proc/net/if_inet6 */
+ lxpr_read_net_igmp, /* /proc/net/igmp */
+ lxpr_read_net_ip_mr_cache, /* /proc/net/ip_mr_cache */
+ lxpr_read_net_ip_mr_vif, /* /proc/net/ip_mr_vif */
+ lxpr_read_net_ipv6_route, /* /proc/net/ipv6_route */
+ lxpr_read_net_mcfilter, /* /proc/net/mcfilter */
+ lxpr_read_net_netstat, /* /proc/net/netstat */
+ lxpr_read_net_raw, /* /proc/net/raw */
+ lxpr_read_net_route, /* /proc/net/route */
+ lxpr_read_net_rpc, /* /proc/net/rpc */
+ lxpr_read_net_rt_cache, /* /proc/net/rt_cache */
+ lxpr_read_net_sockstat, /* /proc/net/sockstat */
+ lxpr_read_net_snmp, /* /proc/net/snmp */
+ lxpr_read_net_stat, /* /proc/net/stat */
+ lxpr_read_net_tcp, /* /proc/net/tcp */
+ lxpr_read_net_tcp6, /* /proc/net/tcp6 */
+ lxpr_read_net_udp, /* /proc/net/udp */
+ lxpr_read_net_udp6, /* /proc/net/udp6 */
+ lxpr_read_net_unix, /* /proc/net/unix */
+ lxpr_read_partitions, /* /proc/partitions */
+ lxpr_read_invalid, /* /proc/self */
+ lxpr_read_stat, /* /proc/stat */
+ lxpr_read_swaps, /* /proc/swaps */
+ lxpr_read_invalid, /* /proc/sys */
+ lxpr_read_invalid, /* /proc/sys/fs */
+ lxpr_read_invalid, /* /proc/sys/fs/inotify */
+ lxpr_read_sys_fs_inotify_max_queued_events, /* max_queued_events */
+ lxpr_read_sys_fs_inotify_max_user_instances, /* max_user_instances */
+ lxpr_read_sys_fs_inotify_max_user_watches, /* max_user_watches */
+ lxpr_read_invalid, /* /proc/sys/kernel */
+ lxpr_read_sys_kernel_caplcap, /* /proc/sys/kernel/cap_last_cap */
+ lxpr_read_sys_kernel_corepatt, /* /proc/sys/kernel/core_pattern */
+ lxpr_read_sys_kernel_hostname, /* /proc/sys/kernel/hostname */
+ lxpr_read_sys_kernel_msgmni, /* /proc/sys/kernel/msgmni */
+ lxpr_read_sys_kernel_ngroups_max, /* /proc/sys/kernel/ngroups_max */
+ lxpr_read_sys_kernel_osrel, /* /proc/sys/kernel/osrelease */
+ lxpr_read_sys_kernel_pid_max, /* /proc/sys/kernel/pid_max */
+ lxpr_read_invalid, /* /proc/sys/kernel/random */
+ lxpr_read_sys_kernel_rand_bootid, /* /proc/sys/kernel/random/boot_id */
+ lxpr_read_sys_kernel_sem, /* /proc/sys/kernel/sem */
+ lxpr_read_sys_kernel_shmall, /* /proc/sys/kernel/shmall */
+ lxpr_read_sys_kernel_shmmax, /* /proc/sys/kernel/shmmax */
+ lxpr_read_sys_kernel_shmmni, /* /proc/sys/kernel/shmmni */
+ lxpr_read_sys_kernel_threads_max, /* /proc/sys/kernel/threads-max */
+ lxpr_read_invalid, /* /proc/sys/net */
+ lxpr_read_invalid, /* /proc/sys/net/core */
+ lxpr_read_sys_net_core_somaxc, /* /proc/sys/net/core/somaxconn */
+ lxpr_read_invalid, /* /proc/sys/net/ipv4 */
+ lxpr_read_sys_net_ipv4_ip_lport_range, /* ../ipv4/ip_local_port_range */
+ lxpr_read_sys_net_ipv4_tcp_fin_to, /* .../ipv4/tcp_fin_timeout */
+ lxpr_read_sys_net_ipv4_tcp_ka_int, /* .../ipv4/tcp_keepalive_intvl */
+ lxpr_read_sys_net_ipv4_tcp_ka_tim, /* .../ipv4/tcp_keepalive_time */
+ lxpr_read_sys_net_ipv4_tcp_sack, /* .../ipv4/tcp_sack */
+ lxpr_read_sys_net_ipv4_tcp_winscale, /* .../ipv4/tcp_window_scaling */
+ lxpr_read_invalid, /* /proc/sys/vm */
+ lxpr_read_sys_vm_max_map_cnt, /* /proc/sys/vm/max_map_count */
+ lxpr_read_sys_vm_minfr_kb, /* /proc/sys/vm/min_free_kbytes */
+ lxpr_read_sys_vm_nhpages, /* /proc/sys/vm/nr_hugepages */
+ lxpr_read_sys_vm_overcommit_mem, /* /proc/sys/vm/overcommit_memory */
+ lxpr_read_sys_vm_swappiness, /* /proc/sys/vm/swappiness */
+ lxpr_read_uptime, /* /proc/uptime */
+ lxpr_read_version, /* /proc/version */
+ lxpr_read_vmstat, /* /proc/vmstat */
+};
+
+/*
+ * Array of lookup functions, indexed by lx /proc file type.
+ */
+static vnode_t *(*lxpr_lookup_function[LXPR_NFILES])() = {
+ NULL, /* invalid */
+ lxpr_lookup_procdir, /* /proc */
+ lxpr_lookup_piddir, /* /proc/<pid> */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/auxv */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/cgroup */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/cmdline */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/comm */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/cpu */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/cwd */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/environ */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/exe */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/limits */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/loginuid */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/maps */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/mem */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/mountinfo */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/oom_score_adj */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/personality */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/root */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/stat */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/statm */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/status */
+ lxpr_lookup_taskdir, /* /proc/<pid>/task */
+ lxpr_lookup_task_tid_dir, /* /proc/<pid>/task/nn */
+ lxpr_lookup_fddir, /* /proc/<pid>/fd */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/fd/nn */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/auxv */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cgroup */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cmdline */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/comm */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cpu */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/cwd */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/environ */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/exe */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/limits */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/loginuid */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/maps */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/mem */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/mountinfo */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/oom_scr_adj */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/personality */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/root */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/stat */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/statm */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/status */
+ lxpr_lookup_fddir, /* /proc/<pid>/task/<tid>/fd */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/task/<tid>/fd/nn */
+ lxpr_lookup_not_a_dir, /* /proc/cgroups */
+ lxpr_lookup_not_a_dir, /* /proc/cmdline */
+ lxpr_lookup_not_a_dir, /* /proc/cpuinfo */
+ lxpr_lookup_not_a_dir, /* /proc/devices */
+ lxpr_lookup_not_a_dir, /* /proc/diskstats */
+ lxpr_lookup_not_a_dir, /* /proc/dma */
+ lxpr_lookup_not_a_dir, /* /proc/filesystems */
+ lxpr_lookup_not_a_dir, /* /proc/interrupts */
+ lxpr_lookup_not_a_dir, /* /proc/ioports */
+ lxpr_lookup_not_a_dir, /* /proc/kcore */
+ lxpr_lookup_not_a_dir, /* /proc/kmsg */
+ lxpr_lookup_not_a_dir, /* /proc/loadavg */
+ lxpr_lookup_not_a_dir, /* /proc/meminfo */
+ lxpr_lookup_not_a_dir, /* /proc/modules */
+ lxpr_lookup_not_a_dir, /* /proc/mounts */
+ lxpr_lookup_netdir, /* /proc/net */
+ lxpr_lookup_not_a_dir, /* /proc/net/arp */
+ lxpr_lookup_not_a_dir, /* /proc/net/dev */
+ lxpr_lookup_not_a_dir, /* /proc/net/dev_mcast */
+ lxpr_lookup_not_a_dir, /* /proc/net/if_inet6 */
+ lxpr_lookup_not_a_dir, /* /proc/net/igmp */
+ lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_cache */
+ lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_vif */
+ lxpr_lookup_not_a_dir, /* /proc/net/ipv6_route */
+ lxpr_lookup_not_a_dir, /* /proc/net/mcfilter */
+ lxpr_lookup_not_a_dir, /* /proc/net/netstat */
+ lxpr_lookup_not_a_dir, /* /proc/net/raw */
+ lxpr_lookup_not_a_dir, /* /proc/net/route */
+ lxpr_lookup_not_a_dir, /* /proc/net/rpc */
+ lxpr_lookup_not_a_dir, /* /proc/net/rt_cache */
+ lxpr_lookup_not_a_dir, /* /proc/net/sockstat */
+ lxpr_lookup_not_a_dir, /* /proc/net/snmp */
+ lxpr_lookup_not_a_dir, /* /proc/net/stat */
+ lxpr_lookup_not_a_dir, /* /proc/net/tcp */
+ lxpr_lookup_not_a_dir, /* /proc/net/tcp6 */
+ lxpr_lookup_not_a_dir, /* /proc/net/udp */
+ lxpr_lookup_not_a_dir, /* /proc/net/udp6 */
+ lxpr_lookup_not_a_dir, /* /proc/net/unix */
+ lxpr_lookup_not_a_dir, /* /proc/partitions */
+ lxpr_lookup_not_a_dir, /* /proc/self */
+ lxpr_lookup_not_a_dir, /* /proc/stat */
+ lxpr_lookup_not_a_dir, /* /proc/swaps */
+ lxpr_lookup_sysdir, /* /proc/sys */
+ lxpr_lookup_sys_fsdir, /* /proc/sys/fs */
+ lxpr_lookup_sys_fs_inotifydir, /* /proc/sys/fs/inotify */
+ lxpr_lookup_not_a_dir, /* .../inotify/max_queued_events */
+ lxpr_lookup_not_a_dir, /* .../inotify/max_user_instances */
+ lxpr_lookup_not_a_dir, /* .../inotify/max_user_watches */
+ lxpr_lookup_sys_kerneldir, /* /proc/sys/kernel */
+ lxpr_lookup_not_a_dir, /* /proc/sys/kernel/cap_last_cap */
+ lxpr_lookup_not_a_dir, /* /proc/sys/kernel/core_pattern */
+ lxpr_lookup_not_a_dir, /* /proc/sys/kernel/hostname */
+ lxpr_lookup_not_a_dir, /* /proc/sys/kernel/msgmni */
+ lxpr_lookup_not_a_dir, /* /proc/sys/kernel/ngroups_max */
+ lxpr_lookup_not_a_dir, /* /proc/sys/kernel/osrelease */
+ lxpr_lookup_not_a_dir, /* /proc/sys/kernel/pid_max */
+ lxpr_lookup_sys_kdir_randdir, /* /proc/sys/kernel/random */
+ lxpr_lookup_not_a_dir, /* /proc/sys/kernel/random/boot_id */
+ lxpr_lookup_not_a_dir, /* /proc/sys/kernel/sem */
+ lxpr_lookup_not_a_dir, /* /proc/sys/kernel/shmall */
+ lxpr_lookup_not_a_dir, /* /proc/sys/kernel/shmmax */
+ lxpr_lookup_not_a_dir, /* /proc/sys/kernel/shmmni */
+ lxpr_lookup_not_a_dir, /* /proc/sys/kernel/threads-max */
+ lxpr_lookup_sys_netdir, /* /proc/sys/net */
+ lxpr_lookup_sys_net_coredir, /* /proc/sys/net/core */
+ lxpr_lookup_not_a_dir, /* /proc/sys/net/core/somaxconn */
+ lxpr_lookup_sys_net_ipv4dir, /* /proc/sys/net/ipv4 */
+ lxpr_lookup_not_a_dir, /* .../net/ipv4/ip_local_port_range */
+ lxpr_lookup_not_a_dir, /* .../net/ipv4/tcp_fin_timeout */
+ lxpr_lookup_not_a_dir, /* .../net/ipv4/tcp_keepalive_intvl */
+ lxpr_lookup_not_a_dir, /* .../net/ipv4/tcp_keepalive_time */
+ lxpr_lookup_not_a_dir, /* .../net/ipv4/tcp_sack */
+ lxpr_lookup_not_a_dir, /* .../net/ipv4/tcp_window_scaling */
+ lxpr_lookup_sys_vmdir, /* /proc/sys/vm */
+ lxpr_lookup_not_a_dir, /* /proc/sys/vm/max_map_count */
+ lxpr_lookup_not_a_dir, /* /proc/sys/vm/min_free_kbytes */
+ lxpr_lookup_not_a_dir, /* /proc/sys/vm/nr_hugepages */
+ lxpr_lookup_not_a_dir, /* /proc/sys/vm/overcommit_memory */
+ lxpr_lookup_not_a_dir, /* /proc/sys/vm/swappiness */
+ lxpr_lookup_not_a_dir, /* /proc/uptime */
+ lxpr_lookup_not_a_dir, /* /proc/version */
+ lxpr_lookup_not_a_dir, /* /proc/vmstat */
+};
+
+/*
+ * Array of readdir functions, indexed by /proc file type.
+ */
+static int (*lxpr_readdir_function[LXPR_NFILES])() = {
+ NULL, /* invalid */
+ lxpr_readdir_procdir, /* /proc */
+ lxpr_readdir_piddir, /* /proc/<pid> */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/auxv */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/cgroup */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/cmdline */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/comm */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/cpu */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/cwd */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/environ */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/exe */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/limits */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/loginuid */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/maps */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/mem */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/mountinfo */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/oom_score_adj */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/personality */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/root */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/stat */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/statm */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/status */
+ lxpr_readdir_taskdir, /* /proc/<pid>/task */
+ lxpr_readdir_task_tid_dir, /* /proc/<pid>/task/nn */
+ lxpr_readdir_fddir, /* /proc/<pid>/fd */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/fd/nn */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/auxv */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cgroup */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cmdline */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/comm */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cpu */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/cwd */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/environ */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/exe */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/limits */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/loginuid */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/maps */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/mem */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/mountinfo */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid/oom_scr_adj */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid/personality */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/root */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/stat */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/statm */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/status */
+ lxpr_readdir_fddir, /* /proc/<pid>/task/<tid>/fd */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/task/<tid>/fd/nn */
+ lxpr_readdir_not_a_dir, /* /proc/cgroups */
+ lxpr_readdir_not_a_dir, /* /proc/cmdline */
+ lxpr_readdir_not_a_dir, /* /proc/cpuinfo */
+ lxpr_readdir_not_a_dir, /* /proc/devices */
+ lxpr_readdir_not_a_dir, /* /proc/diskstats */
+ lxpr_readdir_not_a_dir, /* /proc/dma */
+ lxpr_readdir_not_a_dir, /* /proc/filesystems */
+ lxpr_readdir_not_a_dir, /* /proc/interrupts */
+ lxpr_readdir_not_a_dir, /* /proc/ioports */
+ lxpr_readdir_not_a_dir, /* /proc/kcore */
+ lxpr_readdir_not_a_dir, /* /proc/kmsg */
+ lxpr_readdir_not_a_dir, /* /proc/loadavg */
+ lxpr_readdir_not_a_dir, /* /proc/meminfo */
+ lxpr_readdir_not_a_dir, /* /proc/modules */
+ lxpr_readdir_not_a_dir, /* /proc/mounts */
+ lxpr_readdir_netdir, /* /proc/net */
+ lxpr_readdir_not_a_dir, /* /proc/net/arp */
+ lxpr_readdir_not_a_dir, /* /proc/net/dev */
+ lxpr_readdir_not_a_dir, /* /proc/net/dev_mcast */
+ lxpr_readdir_not_a_dir, /* /proc/net/if_inet6 */
+ lxpr_readdir_not_a_dir, /* /proc/net/igmp */
+ lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_cache */
+ lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_vif */
+ lxpr_readdir_not_a_dir, /* /proc/net/ipv6_route */
+ lxpr_readdir_not_a_dir, /* /proc/net/mcfilter */
+ lxpr_readdir_not_a_dir, /* /proc/net/netstat */
+ lxpr_readdir_not_a_dir, /* /proc/net/raw */
+ lxpr_readdir_not_a_dir, /* /proc/net/route */
+ lxpr_readdir_not_a_dir, /* /proc/net/rpc */
+ lxpr_readdir_not_a_dir, /* /proc/net/rt_cache */
+ lxpr_readdir_not_a_dir, /* /proc/net/sockstat */
+ lxpr_readdir_not_a_dir, /* /proc/net/snmp */
+ lxpr_readdir_not_a_dir, /* /proc/net/stat */
+ lxpr_readdir_not_a_dir, /* /proc/net/tcp */
+ lxpr_readdir_not_a_dir, /* /proc/net/tcp6 */
+ lxpr_readdir_not_a_dir, /* /proc/net/udp */
+ lxpr_readdir_not_a_dir, /* /proc/net/udp6 */
+ lxpr_readdir_not_a_dir, /* /proc/net/unix */
+ lxpr_readdir_not_a_dir, /* /proc/partitions */
+ lxpr_readdir_not_a_dir, /* /proc/self */
+ lxpr_readdir_not_a_dir, /* /proc/stat */
+ lxpr_readdir_not_a_dir, /* /proc/swaps */
+ lxpr_readdir_sysdir, /* /proc/sys */
+ lxpr_readdir_sys_fsdir, /* /proc/sys/fs */
+ lxpr_readdir_sys_fs_inotifydir, /* /proc/sys/fs/inotify */
+ lxpr_readdir_not_a_dir, /* .../inotify/max_queued_events */
+ lxpr_readdir_not_a_dir, /* .../inotify/max_user_instances */
+ lxpr_readdir_not_a_dir, /* .../inotify/max_user_watches */
+ lxpr_readdir_sys_kerneldir, /* /proc/sys/kernel */
+ lxpr_readdir_not_a_dir, /* /proc/sys/kernel/cap_last_cap */
+ lxpr_readdir_not_a_dir, /* /proc/sys/kernel/core_pattern */
+ lxpr_readdir_not_a_dir, /* /proc/sys/kernel/hostname */
+ lxpr_readdir_not_a_dir, /* /proc/sys/kernel/msgmni */
+ lxpr_readdir_not_a_dir, /* /proc/sys/kernel/ngroups_max */
+ lxpr_readdir_not_a_dir, /* /proc/sys/kernel/osrelease */
+ lxpr_readdir_not_a_dir, /* /proc/sys/kernel/pid_max */
+ lxpr_readdir_sys_kdir_randdir, /* /proc/sys/kernel/random */
+ lxpr_readdir_not_a_dir, /* /proc/sys/kernel/random/boot_id */
+ lxpr_readdir_not_a_dir, /* /proc/sys/kernel/sem */
+ lxpr_readdir_not_a_dir, /* /proc/sys/kernel/shmall */
+ lxpr_readdir_not_a_dir, /* /proc/sys/kernel/shmmax */
+ lxpr_readdir_not_a_dir, /* /proc/sys/kernel/shmmni */
+ lxpr_readdir_not_a_dir, /* /proc/sys/kernel/threads-max */
+ lxpr_readdir_sys_netdir, /* /proc/sys/net */
+ lxpr_readdir_sys_net_coredir, /* /proc/sys/net/core */
+ lxpr_readdir_not_a_dir, /* /proc/sys/net/core/somaxconn */
+ lxpr_readdir_sys_net_ipv4dir, /* /proc/sys/net/ipv4 */
+ lxpr_readdir_not_a_dir, /* .../net/ipv4/ip_local_port_range */
+ lxpr_readdir_not_a_dir, /* .../net/ipv4/tcp_fin_timeout */
+ lxpr_readdir_not_a_dir, /* .../net/ipv4/tcp_keepalive_intvl */
+ lxpr_readdir_not_a_dir, /* .../net/ipv4/tcp_keepalive_time */
+ lxpr_readdir_not_a_dir, /* .../net/ipv4/tcp_sack */
+ lxpr_readdir_not_a_dir, /* .../net/ipv4/tcp_window_scaling */
+ lxpr_readdir_sys_vmdir, /* /proc/sys/vm */
+ lxpr_readdir_not_a_dir, /* /proc/sys/vm/max_map_count */
+ lxpr_readdir_not_a_dir, /* /proc/sys/vm/min_free_kbytes */
+ lxpr_readdir_not_a_dir, /* /proc/sys/vm/nr_hugepages */
+ lxpr_readdir_not_a_dir, /* /proc/sys/vm/overcommit_memory */
+ lxpr_readdir_not_a_dir, /* /proc/sys/vm/swappiness */
+ lxpr_readdir_not_a_dir, /* /proc/uptime */
+ lxpr_readdir_not_a_dir, /* /proc/version */
+ lxpr_readdir_not_a_dir, /* /proc/vmstat */
+};
+
+
+/*
+ * lxpr_read(): Vnode operation for VOP_READ()
+ *
+ * As the format of all the files that can be read in the lx procfs is human
+ * readable and not binary structures there do not have to be different
+ * read variants depending on whether the reading process model is 32 or 64 bits
+ * (at least in general, and certainly the difference is unlikely to be enough
+ * to justify have different routines for 32 and 64 bit reads
+ */
+/* ARGSUSED */
+static int
+lxpr_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
+ caller_context_t *ct)
+{
+ lxpr_node_t *lxpnp = VTOLXP(vp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ lxpr_uiobuf_t *uiobuf = lxpr_uiobuf_new(uiop);
+ int error;
+
+ ASSERT(type < LXPR_NFILES);
+
+ if (type == LXPR_KMSG) {
+ ldi_ident_t li = VTOLXPM(vp)->lxprm_li;
+ ldi_handle_t ldih;
+ struct strioctl str;
+ int rv;
+
+ /*
+ * Open the zone's console device using the layered driver
+ * interface.
+ */
+ if ((error =
+ ldi_open_by_name("/dev/log", FREAD, cr, &ldih, li)) != 0)
+ return (error);
+
+ /*
+ * Send an ioctl to the underlying console device, letting it
+ * know we're interested in getting console messages.
+ */
+ str.ic_cmd = I_CONSLOG;
+ str.ic_timout = 0;
+ str.ic_len = 0;
+ str.ic_dp = NULL;
+ if ((error = ldi_ioctl(ldih, I_STR,
+ (intptr_t)&str, FKIOCTL, cr, &rv)) != 0)
+ return (error);
+
+ lxpr_read_kmsg(lxpnp, uiobuf, ldih);
+
+ if ((error = ldi_close(ldih, FREAD, cr)) != 0)
+ return (error);
+ } else {
+ lxpr_read_function[type](lxpnp, uiobuf);
+ }
+
+ error = lxpr_uiobuf_flush(uiobuf);
+ lxpr_uiobuf_free(uiobuf);
+
+ return (error);
+}
+
+/*
+ * lxpr_read_invalid(), lxpr_read_isdir(), lxpr_read_empty()
+ *
+ * Various special case reads:
+ * - trying to read a directory
+ * - invalid file (used to mean a file that should be implemented,
+ * but isn't yet)
+ * - empty file
+ * - wait to be able to read a file that will never have anything to read
+ */
+/* ARGSUSED */
+static void
+lxpr_read_isdir(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_uiobuf_seterr(uiobuf, EISDIR);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_invalid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_empty(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/*
+ * lxpr_read_pid_auxv(): read process aux vector
+ */
+static void
+lxpr_read_pid_auxv(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ lx_proc_data_t *pd;
+ lx_elf_data_t *edp = NULL;
+ int i, cnt;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_AUXV ||
+ lxpnp->lxpr_type == LXPR_PID_TID_AUXV);
+
+ p = lxpr_lock(lxpnp->lxpr_pid, NO_ZOMB);
+
+ if (p == NULL) {
+ return;
+ }
+ if ((pd = ptolxproc(p)) == NULL) {
+ /* Emit a single AT_NULL record for non-branded processes */
+ auxv_t buf;
+
+ bzero(&buf, sizeof (buf));
+ lxpr_unlock(p);
+ lxpr_uiobuf_write(uiobuf, (char *)&buf, sizeof (buf));
+ return;
+ } else {
+ edp = &pd->l_elf_data;
+ }
+
+ if (p->p_model == DATAMODEL_NATIVE) {
+ auxv_t buf[__KERN_NAUXV_IMPL];
+
+ /*
+ * Because a_type is only of size int (not long), the buffer
+ * contents must be zeroed first to ensure cleanliness.
+ */
+ bzero(buf, sizeof (buf));
+ for (i = 0, cnt = 0; i < __KERN_NAUXV_IMPL; i++) {
+ if (lx_auxv_stol(&p->p_user.u_auxv[i],
+ &buf[cnt], edp) == 0) {
+ cnt++;
+ }
+ if (p->p_user.u_auxv[i].a_type == AT_NULL) {
+ break;
+ }
+ }
+ lxpr_unlock(p);
+ lxpr_uiobuf_write(uiobuf, (char *)buf, cnt * sizeof (buf[0]));
+ }
+#if defined(_SYSCALL32_IMPL)
+ else {
+ auxv32_t buf[__KERN_NAUXV_IMPL];
+
+ for (i = 0, cnt = 0; i < __KERN_NAUXV_IMPL; i++) {
+ auxv_t temp;
+
+ if (lx_auxv_stol(&p->p_user.u_auxv[i],
+ &temp, edp) == 0) {
+ buf[cnt].a_type = (int)temp.a_type;
+ buf[cnt].a_un.a_val = (int)temp.a_un.a_val;
+ cnt++;
+ }
+ if (p->p_user.u_auxv[i].a_type == AT_NULL) {
+ break;
+ }
+ }
+ lxpr_unlock(p);
+ lxpr_uiobuf_write(uiobuf, (char *)buf, cnt * sizeof (buf[0]));
+ }
+#endif /* defined(_SYSCALL32_IMPL) */
+}
+
+/*
+ * lxpr_read_pid_cgroup(): read cgroups for process
+ */
+static void
+lxpr_read_pid_cgroup(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_CGROUP ||
+ lxpnp->lxpr_type == LXPR_PID_TID_CGROUP);
+
+ p = lxpr_lock(lxpnp->lxpr_pid, ZOMB_OK);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+ lxpr_unlock(p);
+
+ /* basic stub, 3rd field will need to be populated */
+ lxpr_uiobuf_printf(uiobuf, "1:name=systemd:/\n");
+}
+
+static void
+lxpr_copy_cmdline(proc_t *p, lx_proc_data_t *pd, lxpr_uiobuf_t *uiobuf)
+{
+ uio_t *uiop = uiobuf->uiop;
+ char *buf = uiobuf->buffer;
+ int bsz = uiobuf->buffsize;
+ boolean_t env_overflow = B_FALSE;
+ uintptr_t pos = pd->l_args_start + uiop->uio_offset;
+ uintptr_t estart = pd->l_envs_start;
+ uintptr_t eend = pd->l_envs_end;
+ size_t chunk, copied;
+ int err = 0;
+
+ /* Do not bother with data beyond the end of the envp strings area. */
+ if (pos > eend) {
+ return;
+ }
+ mutex_exit(&p->p_lock);
+
+ /*
+ * If the starting or ending bounds are outside the argv strings area,
+ * check to see if the process has overwritten the terminating NULL.
+ * If not, no data needs to be copied from oustide the argv area.
+ */
+ if (pos >= estart || (pos + uiop->uio_resid) >= estart) {
+ uint8_t term;
+ if (uread(p, &term, sizeof (term), estart - 1) != 0) {
+ err = EFAULT;
+ } else if (term != 0) {
+ env_overflow = B_TRUE;
+ }
+ }
+
+ /* Data between astart and estart-1 can be copied freely. */
+ while (pos < estart && uiop->uio_resid > 0 && err == 0) {
+ chunk = MIN(estart - pos, uiop->uio_resid);
+ chunk = MIN(chunk, bsz);
+
+ if (prreadbuf(p, pos, (uint8_t *)buf, chunk, &copied) != 0 ||
+ copied != chunk) {
+ err = EFAULT;
+ break;
+ }
+ err = uiomove(buf, copied, UIO_READ, uiop);
+ pos += copied;
+ }
+
+ /*
+ * Onward from estart, data is copied as a contiguous string. To
+ * protect env data from potential snooping, only one buffer-sized copy
+ * is allowed to avoid complex seek logic.
+ */
+ if (err == 0 && env_overflow && pos == estart && uiop->uio_resid > 0) {
+ chunk = MIN(eend - pos, uiop->uio_resid);
+ chunk = MIN(chunk, bsz);
+ if (prreadbuf(p, pos, (uint8_t *)buf, chunk, &copied) == 0) {
+ int len = strnlen(buf, copied);
+ if (len > 0) {
+ err = uiomove(buf, len, UIO_READ, uiop);
+ }
+ }
+ }
+
+ uiobuf->error = err;
+ /* reset any uiobuf state */
+ uiobuf->pos = uiobuf->buffer;
+ uiobuf->beg = 0;
+
+ mutex_enter(&p->p_lock);
+}
+
+/*
+ * lxpr_read_pid_cmdline(): read argument vector from process
+ */
+static void
+lxpr_read_pid_cmdline(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ char *buf;
+ size_t asz = lxpr_maxargvlen, sz;
+ lx_proc_data_t *pd;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_CMDLINE ||
+ lxpnp->lxpr_type == LXPR_PID_TID_CMDLINE);
+
+ buf = kmem_alloc(asz, KM_SLEEP);
+
+ p = lxpr_lock(lxpnp->lxpr_pid, NO_ZOMB);
+ if (p == NULL) {
+ kmem_free(buf, asz);
+ return;
+ }
+
+ if ((pd = ptolxproc(p)) != NULL && pd->l_args_start != 0 &&
+ pd->l_envs_start != 0 && pd->l_envs_end != 0) {
+ /* Use Linux-style argv bounds if possible. */
+ lxpr_copy_cmdline(p, pd, uiobuf);
+ lxpr_unlock(p);
+ } else {
+ int r;
+
+ r = prreadargv(p, buf, asz, &sz);
+ lxpr_unlock(p);
+
+ if (r != 0) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ } else {
+ lxpr_uiobuf_write(uiobuf, buf, sz);
+ }
+ }
+ kmem_free(buf, asz);
+}
+
+/*
+ * lxpr_read_pid_comm(): read command from process
+ */
+static void
+lxpr_read_pid_comm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ char buf[MAXCOMLEN + 1];
+
+ VERIFY(lxpnp->lxpr_type == LXPR_PID_COMM ||
+ lxpnp->lxpr_type == LXPR_PID_TID_COMM);
+
+ /*
+ * Because prctl(PR_SET_NAME) does not set custom names for threads
+ * (vs processes), there is no need for special handling here.
+ */
+ if ((p = lxpr_lock(lxpnp->lxpr_pid, ZOMB_OK)) == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+ strlcpy(buf, p->p_user.u_comm, sizeof (buf));
+ lxpr_unlock(p);
+ lxpr_uiobuf_printf(uiobuf, "%s\n", buf);
+}
+
+/*
+ * lxpr_read_pid_env(): read env vector from process
+ */
+static void
+lxpr_read_pid_env(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ char *buf;
+ size_t asz = lxpr_maxenvvlen, sz;
+ int r;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_ENV);
+
+ buf = kmem_alloc(asz, KM_SLEEP);
+
+ p = lxpr_lock(lxpnp->lxpr_pid, NO_ZOMB);
+ if (p == NULL) {
+ kmem_free(buf, asz);
+ return;
+ }
+
+ r = prreadenvv(p, buf, asz, &sz);
+ lxpr_unlock(p);
+
+ if (r != 0) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ } else {
+ lxpr_uiobuf_write(uiobuf, buf, sz);
+ }
+ kmem_free(buf, asz);
+}
+
+/*
+ * lxpr_read_pid_limits(): ulimit file
+ */
+static void
+lxpr_read_pid_limits(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ rctl_qty_t cur[LX_RLIM_TAB_LEN], max[LX_RLIM_TAB_LEN];
+ int i;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_LIMITS ||
+ lxpnp->lxpr_type == LXPR_PID_TID_LIMITS);
+
+ p = lxpr_lock(lxpnp->lxpr_pid, NO_ZOMB);
+ if (p == NULL) {
+ return;
+ }
+
+ for (i = 0; i < LX_RLIM_TAB_LEN; i++) {
+ char *kname = lxpr_rlimtab[i].rlim_rctl;
+ rctl_val_t nval, *oval = NULL;
+ rctl_hndl_t hndl;
+
+ /* default to unlimited for resources without an analog */
+ cur[i] = RLIM_INFINITY;
+ max[i] = RLIM_INFINITY;
+ if (kname == NULL || (hndl = rctl_hndl_lookup(kname)) == -1) {
+ continue;
+ }
+ while (rctl_local_get(hndl, oval, &nval, p) == 0) {
+ oval = &nval;
+ switch (nval.rcv_privilege) {
+ case RCPRIV_BASIC:
+ if (!RCTL_INFINITE(nval))
+ cur[i] = nval.rcv_value;
+ break;
+ case RCPRIV_PRIVILEGED:
+ if (!RCTL_INFINITE(nval))
+ max[i] = nval.rcv_value;
+ break;
+ }
+ }
+ }
+ lxpr_unlock(p);
+
+ lxpr_uiobuf_printf(uiobuf, "%-25s %-20s %-20s %-10s\n",
+ "Limit", "Soft Limit", "Hard Limit", "Units");
+ for (i = 0; i < LX_RLIM_TAB_LEN; i++) {
+ lxpr_uiobuf_printf(uiobuf, "%-25s", lxpr_rlimtab[i].rlim_name);
+ if (cur[i] == RLIM_INFINITY || cur[i] == LX_RLIM_INFINITY) {
+ lxpr_uiobuf_printf(uiobuf, " %-20s", "unlimited");
+ } else {
+ lxpr_uiobuf_printf(uiobuf, " %-20lu", cur[i]);
+ }
+ if (max[i] == RLIM_INFINITY || max[i] == LX_RLIM_INFINITY) {
+ lxpr_uiobuf_printf(uiobuf, " %-20s", "unlimited");
+ } else {
+ lxpr_uiobuf_printf(uiobuf, " %-20lu", max[i]);
+ }
+ lxpr_uiobuf_printf(uiobuf, " %-10s\n",
+ lxpr_rlimtab[i].rlim_unit);
+ }
+}
+
+/*
+ * lxpr_read_pid_loginuid(): loginuid file
+ */
+static void
+lxpr_read_pid_loginuid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ lx_proc_data_t *pd;
+ uid_t lu = 0;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_LOGINUID ||
+ lxpnp->lxpr_type == LXPR_PID_TID_LOGINUID);
+
+ p = lxpr_lock(lxpnp->lxpr_pid, NO_ZOMB);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, ENXIO);
+ return;
+ }
+
+ if ((pd = ptolxproc(p)) != NULL) {
+ lu = pd->l_loginuid;
+ }
+ lxpr_unlock(p);
+
+ lxpr_uiobuf_printf(uiobuf, "%d", lu);
+}
+
+/*
+ * lxpr_read_pid_maps(): memory map file
+ */
+static void
+lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ lx_proc_data_t *lxpd;
+ struct as *as;
+ struct seg *seg;
+ char *buf;
+ int buflen = MAXPATHLEN;
+ struct print_data {
+ uintptr_t saddr;
+ uintptr_t eaddr;
+ int type;
+ char prot[5];
+ uintptr_t offset;
+ vnode_t *vp;
+ char *name_override;
+ struct print_data *next;
+ } *print_head = NULL;
+ struct print_data **print_tail = &print_head;
+ struct print_data *pbuf;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_MAPS ||
+ lxpnp->lxpr_type == LXPR_PID_TID_MAPS);
+
+ p = lxpr_lock(lxpnp->lxpr_pid, NO_ZOMB);
+ if (p == NULL) {
+ return;
+ }
+
+ as = p->p_as;
+ lxpd = ptolxproc(p);
+
+ if (as == &kas) {
+ lxpr_unlock(p);
+ return;
+ }
+
+ mutex_exit(&p->p_lock);
+
+ /* Iterate over all segments in the address space */
+ AS_LOCK_ENTER(as, RW_READER);
+ for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
+ vnode_t *vp;
+ uint_t protbits;
+
+ pbuf = kmem_alloc(sizeof (*pbuf), KM_SLEEP);
+
+ pbuf->saddr = (uintptr_t)seg->s_base;
+ pbuf->eaddr = pbuf->saddr + seg->s_size;
+ pbuf->type = SEGOP_GETTYPE(seg, seg->s_base);
+
+ /*
+ * Cheat and only use the protection bits of the first page
+ * in the segment
+ */
+ (void) strncpy(pbuf->prot, "----", sizeof (pbuf->prot));
+ (void) SEGOP_GETPROT(seg, seg->s_base, 0, &protbits);
+
+ if (protbits & PROT_READ) pbuf->prot[0] = 'r';
+ if (protbits & PROT_WRITE) pbuf->prot[1] = 'w';
+ if (protbits & PROT_EXEC) pbuf->prot[2] = 'x';
+ if (pbuf->type & MAP_SHARED) pbuf->prot[3] = 's';
+ else if (pbuf->type & MAP_PRIVATE) pbuf->prot[3] = 'p';
+
+ if (seg->s_ops == &segvn_ops &&
+ SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
+ vp != NULL && vp->v_type == VREG) {
+ VN_HOLD(vp);
+ pbuf->vp = vp;
+ } else {
+ pbuf->vp = NULL;
+ }
+
+ pbuf->offset = SEGOP_GETOFFSET(seg, (caddr_t)pbuf->saddr);
+
+ pbuf->name_override = NULL;
+ if (lxpd != NULL) {
+ if (pbuf->saddr == lxpd->l_vdso) {
+ pbuf->name_override = "[vdso]";
+ } else if (pbuf->saddr == p->p_user.u_commpagep) {
+ pbuf->name_override = "[vvar]";
+ }
+ }
+
+ pbuf->next = NULL;
+ *print_tail = pbuf;
+ print_tail = &pbuf->next;
+ }
+ AS_LOCK_EXIT(as);
+ mutex_enter(&p->p_lock);
+ lxpr_unlock(p);
+
+ buf = kmem_alloc(buflen, KM_SLEEP);
+
+ /* print the data we've extracted */
+ pbuf = print_head;
+ while (pbuf != NULL) {
+ struct print_data *pbuf_next;
+ vattr_t vattr;
+
+ int maj = 0;
+ int min = 0;
+ ino_t inode = 0;
+
+ *buf = '\0';
+ if (pbuf->name_override != NULL) {
+ (void) strncpy(buf, pbuf->name_override, buflen);
+ } else if (pbuf->vp != NULL) {
+ vattr.va_mask = AT_FSID | AT_NODEID;
+ if (VOP_GETATTR(pbuf->vp, &vattr, 0, CRED(),
+ NULL) == 0) {
+ maj = getmajor(vattr.va_fsid);
+ min = getminor(vattr.va_fsid);
+ inode = vattr.va_nodeid;
+ }
+ (void) vnodetopath(NULL, pbuf->vp, buf, buflen, CRED());
+ VN_RELE(pbuf->vp);
+ }
+
+ if (p->p_model == DATAMODEL_LP64) {
+ lxpr_uiobuf_printf(uiobuf,
+ "%08llx-%08llx %s %08llx %02x:%02x %llu%s%s\n",
+ pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset,
+ maj, min, inode, *buf != '\0' ? " " : "", buf);
+ } else {
+ lxpr_uiobuf_printf(uiobuf,
+ "%08x-%08x %s %08x %02x:%02x %llu%s%s\n",
+ (uint32_t)pbuf->saddr, (uint32_t)pbuf->eaddr,
+ pbuf->prot, (uint32_t)pbuf->offset, maj, min,
+ inode, *buf != '\0' ? " " : "", buf);
+ }
+
+ pbuf_next = pbuf->next;
+ kmem_free(pbuf, sizeof (*pbuf));
+ pbuf = pbuf_next;
+ }
+
+ kmem_free(buf, buflen);
+}
+
+/*
+ * Make mount entry look more like Linux. Non-zero return to skip it.
+ */
+static int
+lxpr_clean_mntent(char **mntpt, char **fstype, char **resource)
+{
+ if (strcmp(*mntpt, "/var/ld") == 0 ||
+ strcmp(*fstype, "objfs") == 0 ||
+ strcmp(*fstype, "mntfs") == 0 ||
+ strcmp(*fstype, "ctfs") == 0 ||
+ strncmp(*mntpt, "/native/", 8) == 0) {
+ return (1);
+ }
+
+ if (strcmp(*fstype, "tmpfs") == 0) {
+ *resource = "tmpfs";
+ } else if (strcmp(*fstype, "lx_proc") == 0) {
+ *resource = *fstype = "proc";
+ } else if (strcmp(*fstype, "lx_sysfs") == 0) {
+ *resource = *fstype = "sysfs";
+ } else if (strcmp(*fstype, "lx_devfs") == 0) {
+ *resource = *fstype = "devtmpfs";
+ } else if (strcmp(*fstype, "lx_cgroup") == 0) {
+ *resource = *fstype = "cgroup";
+ } else if (strcmp(*fstype, "lxautofs") == 0) {
+ *fstype = "autofs";
+ }
+
+ return (0);
+}
+
+
+typedef struct lxpr_mount_entry {
+ list_node_t lme_link;
+ uint_t lme_id;
+ uint_t lme_parent_id;
+ refstr_t *lme_mntpt;
+ refstr_t *lme_resource;
+ uint_t lme_flag;
+ int lme_fstype;
+ dev_t lme_dev;
+ boolean_t lme_force;
+} lxpr_mount_entry_t;
+
+static int lxpr_zfs_fstype = -1;
+
+#define LXPR_ROOT_MOUNT_ID 15
+
+static list_t *
+lxpr_enumerate_mounts(zone_t *zone)
+{
+ vfs_t *vfsp, *rvfsp, *vfslist;
+ lx_zone_data_t *lxzd = ztolxzd(zone);
+ list_t *result;
+ lxpr_mount_entry_t *lme;
+ lx_virt_disk_t *vd;
+ uint_t root_id, mount_id;
+ char tmppath[MAXPATHLEN];
+
+ result = kmem_alloc(sizeof (list_t), KM_SLEEP);
+ list_create(result, sizeof (lxpr_mount_entry_t),
+ offsetof(lxpr_mount_entry_t, lme_link));
+ /* use an arbitrary start value for the root mount_id */
+ root_id = 15;
+ mount_id = root_id + 1;
+
+ ASSERT(zone != global_zone);
+ ASSERT(lxzd != NULL);
+ ASSERT(lxzd->lxzd_vdisks != NULL);
+
+ vfs_list_read_lock();
+ vfsp = vfslist = zone->zone_vfslist;
+
+ /*
+ * If the zone has a root entry, it will be the first in the list.
+ * Conjure one up if needed.
+ */
+ if (vfslist == NULL || strcmp(refstr_value(vfsp->vfs_mntpt),
+ zone->zone_rootpath) != 0) {
+ rvfsp = zone->zone_rootvp->v_vfsp;
+ } else {
+ rvfsp = vfslist;
+ vfsp = vfslist->vfs_zone_next;
+ }
+
+ lme = kmem_alloc(sizeof (lxpr_mount_entry_t), KM_SLEEP);
+ lme->lme_id = root_id;
+ lme->lme_parent_id = 0;
+ lme->lme_mntpt = refstr_alloc(zone->zone_rootpath);
+ lme->lme_flag = rvfsp->vfs_flag;
+ lme->lme_fstype = rvfsp->vfs_fstype;
+ lme->lme_force = B_TRUE;
+
+ lme->lme_resource = NULL;
+ vd = list_head(lxzd->lxzd_vdisks);
+ while (vd != NULL) {
+ if (vd->lxvd_type == LXVD_ZFS_DS &&
+ vd->lxvd_real_dev == rvfsp->vfs_dev) {
+ (void) snprintf(tmppath, sizeof (tmppath),
+ "%sdev/%s", zone->zone_rootpath, vd->lxvd_name);
+ lme->lme_resource = refstr_alloc(tmppath);
+ lme->lme_dev = vd->lxvd_emul_dev;
+ break;
+ }
+ vd = list_next(lxzd->lxzd_vdisks, vd);
+ }
+ if (lme->lme_resource == NULL) {
+ lme->lme_resource = refstr_alloc(zone->zone_rootpath);
+ lme->lme_dev = rvfsp->vfs_dev;
+ }
+ list_insert_head(result, lme);
+
+ do {
+ if (vfsp == NULL) {
+ break;
+ }
+ /* Skip mounts we shouldn't show */
+ if ((vfsp->vfs_flag & VFS_NOMNTTAB) != 0) {
+ vfsp = vfsp->vfs_zone_next;
+ continue;
+ }
+
+ lme = kmem_alloc(sizeof (lxpr_mount_entry_t), KM_SLEEP);
+ lme->lme_id = mount_id++;
+ lme->lme_parent_id = root_id;
+ lme->lme_mntpt = vfsp->vfs_mntpt;
+ refstr_hold(vfsp->vfs_mntpt);
+ lme->lme_flag = vfsp->vfs_flag;
+ lme->lme_fstype = vfsp->vfs_fstype;
+ lme->lme_force = B_FALSE;
+
+ lme->lme_resource = NULL;
+ vd = list_head(lxzd->lxzd_vdisks);
+ while (vd != NULL) {
+ if (vd->lxvd_type == LXVD_ZFS_DS &&
+ vd->lxvd_real_dev == vfsp->vfs_dev) {
+ char vdev[MAXPATHLEN];
+
+ (void) snprintf(vdev, sizeof (vdev),
+ "%sdev/%s",
+ zone->zone_rootpath, vd->lxvd_name);
+ lme->lme_resource = refstr_alloc(vdev);
+ lme->lme_dev = vd->lxvd_emul_dev;
+ break;
+ }
+ vd = list_next(lxzd->lxzd_vdisks, vd);
+ }
+ if (lme->lme_resource == NULL) {
+ lme->lme_resource = vfsp->vfs_resource;
+ refstr_hold(vfsp->vfs_resource);
+ lme->lme_dev = vfsp->vfs_dev;
+ }
+ list_insert_tail(result, lme);
+ vfsp = vfsp->vfs_zone_next;
+ } while (vfsp != vfslist);
+
+ vfs_list_unlock();
+
+ /* Add a single dummy entry for /native/usr */
+ lme = kmem_alloc(sizeof (lxpr_mount_entry_t), KM_SLEEP);
+ lme->lme_id = mount_id++;
+ lme->lme_parent_id = root_id;
+ lme->lme_flag = VFS_RDONLY;
+ lme->lme_dev = makedevice(0, 1);
+ (void) snprintf(tmppath, sizeof (tmppath),
+ "%snative/usr", zone->zone_rootpath);
+ lme->lme_mntpt = refstr_alloc(tmppath);
+ lme->lme_resource = lme->lme_mntpt;
+ refstr_hold(lme->lme_mntpt);
+ if (lxpr_zfs_fstype == -1) {
+ vfssw_t *zfssw = vfs_getvfssw("zfs");
+ VERIFY(zfssw != NULL);
+ lxpr_zfs_fstype = ((uintptr_t)zfssw - (uintptr_t)vfssw) /
+ sizeof (vfssw[0]);
+ VERIFY(&vfssw[lxpr_zfs_fstype] == zfssw);
+ }
+ lme->lme_fstype = lxpr_zfs_fstype;
+ lme->lme_force = B_TRUE;
+ list_insert_tail(result, lme);
+
+ return (result);
+}
+
+/*
+ * lxpr_read_pid_mountinfo(): information about process mount points.
+ */
+static void
+lxpr_read_pid_mountinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ zone_t *zone = LXPTOZ(lxpnp);
+ list_t *mounts;
+ lxpr_mount_entry_t *lme;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_MOUNTINFO ||
+ lxpnp->lxpr_type == LXPR_PID_TID_MOUNTINFO);
+
+ mounts = lxpr_enumerate_mounts(zone);
+
+ /*
+ * now we can run through what we've extracted without holding
+ * vfs_list_read_lock()
+ */
+ lme = (lxpr_mount_entry_t *)list_remove_head(mounts);
+ while (lme != NULL) {
+ char *resource, *mntpt, *fstype, *rwflag;
+ vnode_t *vp;
+ int error;
+
+ mntpt = (char *)refstr_value(lme->lme_mntpt);
+ resource = (char *)refstr_value(lme->lme_resource);
+
+ if (mntpt == NULL || mntpt[0] == '\0') {
+ goto nextp;
+ }
+ mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
+ error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
+ if (error != 0) {
+ goto nextp;
+ } else if ((vp->v_flag & VROOT) == 0 && !lme->lme_force) {
+ VN_RELE(vp);
+ goto nextp;
+ }
+ VN_RELE(vp);
+
+ if (resource != NULL && resource[0] != '\0') {
+ if (resource[0] == '/') {
+ resource = ZONE_PATH_VISIBLE(resource, zone) ?
+ ZONE_PATH_TRANSLATE(resource, zone) : mntpt;
+ }
+ } else {
+ resource = "none";
+ }
+
+ /* Make things look more like Linux. */
+ fstype = vfssw[lme->lme_fstype].vsw_name;
+ if (lxpr_clean_mntent(&mntpt, &fstype, &resource) != 0 &&
+ !lme->lme_force) {
+ goto nextp;
+ }
+ rwflag = ((lme->lme_flag & VFS_RDONLY) == 0) ? "rw" : "ro";
+
+ /*
+ * XXX parent ID is not tracked correctly here. Currently we
+ * always assume the parent ID is the root ID.
+ */
+ lxpr_uiobuf_printf(uiobuf,
+ "%d %d %d:%d / %s %s - %s %s %s\n",
+ lme->lme_id, lme->lme_parent_id,
+ getmajor(lme->lme_dev), getminor(lme->lme_dev),
+ mntpt, rwflag, fstype, resource, rwflag);
+
+nextp:
+ refstr_rele(lme->lme_mntpt);
+ refstr_rele(lme->lme_resource);
+ kmem_free(lme, sizeof (lxpr_mount_entry_t));
+ lme = (lxpr_mount_entry_t *)list_remove_head(mounts);
+ }
+
+ list_destroy(mounts);
+ kmem_free(mounts, sizeof (list_t));
+}
+
+/*
+ * lxpr_read_pid_oom_scr_adj(): read oom_score_adj for process
+ */
+static void
+lxpr_read_pid_oom_scr_adj(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_OOM_SCR_ADJ ||
+ lxpnp->lxpr_type == LXPR_PID_TID_OOM_SCR_ADJ);
+
+ p = lxpr_lock(lxpnp->lxpr_pid, ZOMB_OK);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+ lxpr_unlock(p);
+
+ /* always 0 */
+ lxpr_uiobuf_printf(uiobuf, "0\n");
+}
+
+/*
+ * lxpr_read_pid_personality(): read personality for process
+ */
+static void
+lxpr_read_pid_personality(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ lx_proc_data_t *lxpd;
+ unsigned int personality;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_PERSONALITY);
+
+ p = lxpr_lock(lxpnp->lxpr_pid, ZOMB_OK);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+ if ((lxpd = ptolxproc(p)) != NULL) {
+ personality = lxpd->l_personality;
+ } else {
+ /* Report native processes as having the SunOS personality */
+ personality = LX_PER_SUNOS;
+ }
+ lxpr_unlock(p);
+
+ lxpr_uiobuf_printf(uiobuf, "%08x\n", personality);
+}
+
+/*
+ * lxpr_read_pid_statm(): memory status file
+ */
+static void
+lxpr_read_pid_statm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ struct as *as;
+ size_t vsize, rss;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_STATM ||
+ lxpnp->lxpr_type == LXPR_PID_TID_STATM);
+
+ p = lxpr_lock(lxpnp->lxpr_pid, ZOMB_OK);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ as = p->p_as;
+ mutex_exit(&p->p_lock);
+ if (as != &kas) {
+ AS_LOCK_ENTER(as, RW_READER);
+ vsize = btopr(as->a_resvsize);
+ rss = rm_asrss(as);
+ AS_LOCK_EXIT(as);
+ } else {
+ vsize = 0;
+ rss = 0;
+ }
+ mutex_enter(&p->p_lock);
+ lxpr_unlock(p);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%lu %lu %lu %lu %lu %lu %lu\n",
+ vsize, rss, 0l, rss, 0l, 0l, 0l);
+}
+
+/*
+ * Look for either the main thread (lookup_id is 0) or the specified thread.
+ * If we're looking for the main thread but the proc does not have one, we
+ * fallback to using prchoose to get any thread available.
+ */
+static kthread_t *
+lxpr_get_thread(proc_t *p, uint_t lookup_id)
+{
+ kthread_t *t;
+ uint_t emul_tid;
+ lx_lwp_data_t *lwpd;
+ pid_t pid = p->p_pid;
+ pid_t init_pid = curproc->p_zone->zone_proc_initpid;
+ boolean_t branded = (p->p_brand == &lx_brand);
+
+ /* get specified thread */
+ if ((t = p->p_tlist) == NULL)
+ return (NULL);
+
+ do {
+ if (lookup_id == 0 && t->t_tid == 1) {
+ thread_lock(t);
+ return (t);
+ }
+
+ lwpd = ttolxlwp(t);
+ if (branded && lwpd != NULL) {
+ if (pid == init_pid && lookup_id == 1) {
+ emul_tid = t->t_tid;
+ } else {
+ emul_tid = lwpd->br_pid;
+ }
+ } else {
+ /*
+ * Make only the first (assumed to be main) thread
+ * visible for non-branded processes.
+ */
+ emul_tid = p->p_pid;
+ }
+ if (emul_tid == lookup_id) {
+ thread_lock(t);
+ return (t);
+ }
+ } while ((t = t->t_forw) != p->p_tlist);
+
+ if (lookup_id == 0)
+ return (prchoose(p));
+ return (NULL);
+}
+
+/*
+ * Lookup the real pid for procs 0 or 1.
+ */
+static pid_t
+get_real_pid(pid_t p)
+{
+ pid_t find_pid;
+
+ if (p == 1) {
+ find_pid = curproc->p_zone->zone_proc_initpid;
+ } else if (p == 0) {
+ find_pid = curproc->p_zone->zone_zsched->p_pid;
+ } else {
+ find_pid = p;
+ }
+
+ return (find_pid);
+}
+
+/*
+ * pid/tid common code to read status file
+ */
+static void
+lxpr_read_status_common(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf,
+ uint_t lookup_id)
+{
+ proc_t *p;
+ kthread_t *t;
+ user_t *up;
+ cred_t *cr;
+ const gid_t *groups;
+ struct as *as;
+ char *status;
+ pid_t pid, ppid;
+ k_sigset_t current, ignore, handle;
+ int i, lx_sig, lwpcnt, ngroups;
+ pid_t real_pid;
+ char buf_comm[MAXCOMLEN + 1];
+ rlim64_t fdlim;
+ size_t vsize = 0, nlocked = 0, rss = 0, stksize = 0;
+ boolean_t printsz = B_FALSE;
+
+ real_pid = get_real_pid(lxpnp->lxpr_pid);
+ p = lxpr_lock(real_pid, ZOMB_OK);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ pid = p->p_pid;
+
+ /*
+ * Convert pid to the Linux default of 1 if we're the zone's init
+ * process or if we're the zone's zsched the pid is 0.
+ */
+ if (pid == curproc->p_zone->zone_proc_initpid) {
+ pid = 1;
+ ppid = 0; /* parent pid for init is 0 */
+ } else if (pid == curproc->p_zone->zone_zsched->p_pid) {
+ pid = 0; /* zsched is pid 0 */
+ ppid = 0; /* parent pid for zsched is itself */
+ } else {
+ /*
+ * Make sure not to reference parent PIDs that reside outside
+ * the zone
+ */
+ ppid = ((p->p_flag & SZONETOP)
+ ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
+
+ /*
+ * Convert ppid to the Linux default of 1 if our parent is the
+ * zone's init process
+ */
+ if (ppid == curproc->p_zone->zone_proc_initpid)
+ ppid = 1;
+ }
+
+ t = lxpr_get_thread(p, lookup_id);
+ if (t != NULL) {
+ switch (t->t_state) {
+ case TS_SLEEP:
+ status = "S (sleeping)";
+ break;
+ case TS_RUN:
+ case TS_ONPROC:
+ status = "R (running)";
+ break;
+ case TS_ZOMB:
+ status = "Z (zombie)";
+ break;
+ case TS_STOPPED:
+ status = "T (stopped)";
+ break;
+ default:
+ status = "! (unknown)";
+ break;
+ }
+ thread_unlock(t);
+ } else {
+ if (lookup_id != 0) {
+ /* we can't find this specific thread */
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ lxpr_unlock(p);
+ return;
+ }
+
+ /*
+ * there is a hole in the exit code, where a proc can have
+ * no threads but it is yet to be flagged SZOMB. We will
+ * assume we are about to become a zombie
+ */
+ status = "Z (zombie)";
+ }
+
+ up = PTOU(p);
+ mutex_enter(&p->p_crlock);
+ crhold(cr = p->p_cred);
+ mutex_exit(&p->p_crlock);
+
+ strlcpy(buf_comm, up->u_comm, sizeof (buf_comm));
+ fdlim = p->p_fno_ctl;
+ lwpcnt = p->p_lwpcnt;
+
+ /*
+ * Gather memory information
+ */
+ as = p->p_as;
+ if ((p->p_stat != SZOMB) && !(p->p_flag & (SSYS | SEXITING)) &&
+ (as != &kas)) {
+ mutex_exit(&p->p_lock);
+ AS_LOCK_ENTER(as, RW_READER);
+ vsize = as->a_resvsize;
+ rss = rm_asrss(as);
+ AS_LOCK_EXIT(as);
+ mutex_enter(&p->p_lock);
+
+ nlocked = p->p_locked_mem;
+ stksize = p->p_stksize;
+ printsz = B_TRUE;
+ }
+
+ /*
+ * Gather signal information
+ */
+ sigemptyset(&current);
+ sigemptyset(&ignore);
+ sigemptyset(&handle);
+ for (i = 1; i < NSIG; i++) {
+ lx_sig = stol_signo[i];
+
+ if ((lx_sig > 0) && (lx_sig <= LX_NSIG)) {
+ if (sigismember(&p->p_sig, i))
+ sigaddset(&current, lx_sig);
+
+ if (up->u_signal[i - 1] == SIG_IGN)
+ sigaddset(&ignore, lx_sig);
+ else if (up->u_signal[i - 1] != SIG_DFL)
+ sigaddset(&handle, lx_sig);
+ }
+ }
+ lxpr_unlock(p);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "Name:\t%s\n"
+ "State:\t%s\n"
+ "Tgid:\t%d\n"
+ "Pid:\t%d\n"
+ "PPid:\t%d\n"
+ "TracerPid:\t%d\n"
+ "Uid:\t%u\t%u\t%u\t%u\n"
+ "Gid:\t%u\t%u\t%u\t%u\n"
+ "FDSize:\t%d\n"
+ "Groups:\t",
+ buf_comm,
+ status,
+ pid, /* thread group id - same as pid */
+ (lookup_id == 0) ? pid : lxpnp->lxpr_desc,
+ ppid,
+ 0,
+ crgetruid(cr), crgetuid(cr), crgetsuid(cr), crgetuid(cr),
+ crgetrgid(cr), crgetgid(cr), crgetsgid(cr), crgetgid(cr),
+ fdlim);
+ ngroups = crgetngroups(cr);
+ groups = crgetgroups(cr);
+ for (i = 0; i < ngroups; i++) {
+ lxpr_uiobuf_printf(uiobuf,
+ "%u ",
+ groups[i]);
+ }
+ crfree(cr);
+ if (printsz) {
+ lxpr_uiobuf_printf(uiobuf,
+ "\n"
+ "VmSize:\t%8lu kB\n"
+ "VmLck:\t%8lu kB\n"
+ "VmRSS:\t%8lu kB\n"
+ "VmData:\t%8lu kB\n"
+ "VmStk:\t%8lu kB\n"
+ "VmExe:\t%8lu kB\n"
+ "VmLib:\t%8lu kB",
+ btok(vsize),
+ btok(nlocked),
+ ptok(rss),
+ 0l,
+ btok(stksize),
+ ptok(rss),
+ 0l);
+ }
+ lxpr_uiobuf_printf(uiobuf, "\nThreads:\t%u\n", lwpcnt);
+ lxpr_uiobuf_printf(uiobuf,
+ "SigPnd:\t%08x%08x\n"
+ "SigBlk:\t%08x%08x\n"
+ "SigIgn:\t%08x%08x\n"
+ "SigCgt:\t%08x%08x\n",
+ current.__sigbits[1], current.__sigbits[0],
+ 0, 0, /* signals blocked on per thread basis */
+ ignore.__sigbits[1], ignore.__sigbits[0],
+ handle.__sigbits[1], handle.__sigbits[0]);
+ /* Report only the full bounding set for now */
+ lxpr_uiobuf_printf(uiobuf,
+ "CapInh:\t%016x\n"
+ "CapPrm:\t%016x\n"
+ "CapEff:\t%016x\n"
+ "CapBnd:\t%016llx\n",
+ 0, 0, 0, 0x1fffffffffLL);
+}
+
+/*
+ * lxpr_read_pid_status(): status file
+ */
+static void
+lxpr_read_pid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_STATUS);
+
+ lxpr_read_status_common(lxpnp, uiobuf, 0);
+}
+
+/*
+ * lxpr_read_pid_tid_status(): status file
+ */
+static void
+lxpr_read_pid_tid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_TID_STATUS);
+ lxpr_read_status_common(lxpnp, uiobuf, lxpnp->lxpr_desc);
+}
+
+/*
+ * Same logic as the lx devfs lxd_pts_devt_translator.
+ */
+static dev_t
+lxpr_xlate_pts_dev(dev_t dev)
+{
+ minor_t min = getminor(dev);
+ int lx_maj, lx_min;
+
+ lx_maj = LX_PTS_MAJOR_MIN + (min / LX_MAXMIN);
+ lx_min = min % LX_MAXMIN;
+
+ return (LX_MAKEDEVICE(lx_maj, lx_min));
+}
+
+/*
+ * pid/tid common code to read stat file
+ */
+static void
+lxpr_read_stat_common(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf,
+ uint_t lookup_id)
+{
+ proc_t *p;
+ kthread_t *t;
+ struct as *as;
+ char stat;
+ pid_t pid, ppid, pgpid, spid;
+ gid_t psgid;
+ dev_t psdev;
+ size_t rss, vsize;
+ int nice, pri, lwpcnt;
+ caddr_t wchan, stackbase;
+ processorid_t cpu;
+ pid_t real_pid;
+ clock_t utime, stime, cutime, cstime, ticks, boottime;
+ char buf_comm[MAXCOMLEN + 1];
+ rlim64_t vmem_ctl;
+
+ real_pid = get_real_pid(lxpnp->lxpr_pid);
+ p = lxpr_lock(real_pid, ZOMB_OK);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ /*
+ * Set Linux defaults if we're the zone's init process
+ */
+ pid = p->p_pid;
+ if (pid == curproc->p_zone->zone_proc_initpid) {
+ pid = 1; /* PID for init */
+ ppid = 0; /* parent PID for init is 0 */
+ pgpid = 0; /* process group for init is 0 */
+ psgid = (gid_t)-1; /* credential GID for init is -1 */
+ spid = 0; /* session id for init is 0 */
+ psdev = 0; /* session device for init is 0 */
+ } else if (pid == curproc->p_zone->zone_zsched->p_pid) {
+ pid = 0; /* PID for zsched */
+ ppid = 0; /* parent PID for zsched is 0 */
+ pgpid = 0; /* process group for zsched is 0 */
+ psgid = (gid_t)-1; /* credential GID for zsched is -1 */
+ spid = 0; /* session id for zsched is 0 */
+ psdev = 0; /* session device for zsched is 0 */
+ } else {
+ /*
+ * Make sure not to reference parent PIDs that reside outside
+ * the zone
+ */
+ ppid = ((p->p_flag & SZONETOP) ?
+ curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
+
+ /*
+ * Convert ppid to the Linux default of 1 if our parent is the
+ * zone's init process
+ */
+ if (ppid == curproc->p_zone->zone_proc_initpid)
+ ppid = 1;
+
+ pgpid = p->p_pgrp;
+
+ mutex_enter(&p->p_splock);
+ mutex_enter(&p->p_sessp->s_lock);
+ spid = p->p_sessp->s_sid;
+ psdev = lxpr_xlate_pts_dev(p->p_sessp->s_dev);
+ if (p->p_sessp->s_cred)
+ psgid = crgetgid(p->p_sessp->s_cred);
+ else
+ psgid = crgetgid(p->p_cred);
+
+ mutex_exit(&p->p_sessp->s_lock);
+ mutex_exit(&p->p_splock);
+ }
+
+ if ((p->p_stat == SZOMB) || (p->p_flag & (SSYS | SEXITING)) ||
+ (p->p_as == &kas)) {
+ stackbase = 0;
+ } else {
+ /* from prgetstackbase() */
+ stackbase = p->p_usrstack - p->p_stksize;
+ }
+
+ utime = stime = 0;
+ t = lxpr_get_thread(p, lookup_id);
+ if (t != NULL) {
+ klwp_t *lwp = ttolwp(t);
+ struct mstate *ms = &lwp->lwp_mstate;
+ hrtime_t utm, stm;
+
+ switch (t->t_state) {
+ case TS_SLEEP:
+ stat = 'S';
+ break;
+ case TS_RUN:
+ case TS_ONPROC:
+ stat = 'R';
+ break;
+ case TS_ZOMB:
+ stat = 'Z';
+ break;
+ case TS_STOPPED:
+ stat = 'T';
+ break;
+ default:
+ stat = '!';
+ break;
+ }
+
+ if (CL_DONICE(t, NULL, 0, &nice) != 0)
+ nice = 0;
+
+ pri = t->t_pri;
+ wchan = t->t_wchan;
+ cpu = t->t_cpu->cpu_id;
+
+ utm = ms->ms_acct[LMS_USER];
+ stm = ms->ms_acct[LMS_SYSTEM];
+
+ thread_unlock(t);
+
+ /* convert unscaled high-res time to nanoseconds */
+ scalehrtime(&utm);
+ scalehrtime(&stm);
+
+ /* Linux /proc expects these values in ticks */
+ utime = (clock_t)NSEC_TO_TICK(utm);
+ stime = (clock_t)NSEC_TO_TICK(stm);
+ } else {
+ if (lookup_id != 0) {
+ /* we can't find this specific thread */
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ lxpr_unlock(p);
+ return;
+ }
+
+ /* Only zombies have no threads */
+ stat = 'Z';
+ nice = 0;
+ pri = 0;
+ wchan = 0;
+ cpu = 0;
+ }
+ as = p->p_as;
+ mutex_exit(&p->p_lock);
+ if (as != &kas) {
+ AS_LOCK_ENTER(as, RW_READER);
+ vsize = as->a_resvsize;
+ rss = rm_asrss(as);
+ AS_LOCK_EXIT(as);
+ } else {
+ vsize = 0;
+ rss = 0;
+ }
+ mutex_enter(&p->p_lock);
+
+ if (lookup_id == 0) {
+ /* process */
+ utime = p->p_utime;
+ stime = p->p_stime;
+ } else {
+ /* tid: utime & stime for the thread set in block above */
+ }
+ cutime = p->p_cutime;
+ cstime = p->p_cstime;
+ lwpcnt = p->p_lwpcnt;
+ vmem_ctl = p->p_vmem_ctl;
+ strlcpy(buf_comm, p->p_user.u_comm, sizeof (buf_comm));
+ ticks = p->p_user.u_ticks; /* lbolt at process start */
+ /* adjust ticks to account for zone boot time */
+ boottime = LXPTOZ(lxpnp)->zone_zsched->p_user.u_ticks;
+ ticks -= boottime;
+ lxpr_unlock(p);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%d " /* 1 */
+ "(%s) %c %d %d %d %d %d " /* 2-8 */
+ "%lu %lu %lu %lu %lu " /* 9-13 */
+ "%lu %lu %ld %ld " /* 14-17 */
+ "%d %d %d " /* 18-20 */
+ "%lu " /* 21 */
+ "%lu " /* 22 */
+ "%lu %ld %llu " /* 23-25 */
+ "%lu %lu %llu " /* 26-28 */
+ "%lu %lu " /* 29-30 */
+ "%lu %lu %lu %lu " /* 31-34 */
+ "%lu " /* 35 */
+ "%lu %lu " /* 36-37 */
+ "%d " /* 38 */
+ "%d" /* 39 */
+ "\n",
+ (lookup_id == 0) ? pid : lxpnp->lxpr_desc, /* 1 */
+ buf_comm, stat, ppid, pgpid, spid, psdev, psgid, /* 2-8 */
+ 0l, 0l, 0l, 0l, 0l, /* flags, minflt, cminflt, majflt, cmajflt */
+ utime, stime, cutime, cstime, /* 14-17 */
+ pri, nice, lwpcnt, /* 18-20 */
+ 0l, /* itrealvalue (time before next SIGALRM) 21 */
+ ticks, /* 22 */
+ vsize, rss, vmem_ctl, /* 23-25 */
+ 0l, 0l, stackbase, /* startcode, endcode, startstack 26-28 */
+ 0l, 0l, /* kstkesp, kstkeip 29-30 */
+ 0l, 0l, 0l, 0l, /* signal, blocked, sigignore, sigcatch 31-34 */
+ wchan, /* 35 */
+ 0l, 0l, /* nswap,cnswap 36-37 */
+ 0, /* exit_signal 38 */
+ cpu /* 39 */);
+}
+
+/*
+ * lxpr_read_pid_stat(): pid stat file
+ */
+static void
+lxpr_read_pid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_STAT);
+
+ lxpr_read_stat_common(lxpnp, uiobuf, 0);
+}
+
+/*
+ * lxpr_read_pid_tid_stat(): pid stat file
+ */
+static void
+lxpr_read_pid_tid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_TID_STAT);
+ lxpr_read_stat_common(lxpnp, uiobuf, lxpnp->lxpr_desc);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_arp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+struct lxpr_ifstat {
+ uint64_t rx_bytes;
+ uint64_t rx_packets;
+ uint64_t rx_errors;
+ uint64_t rx_drop;
+ uint64_t tx_bytes;
+ uint64_t tx_packets;
+ uint64_t tx_errors;
+ uint64_t tx_drop;
+ uint64_t collisions;
+ uint64_t rx_multicast;
+};
+
+static void *
+lxpr_kstat_read(kstat_t *kn, boolean_t byname, size_t *size, int *num)
+{
+ kstat_t *kp;
+ int i, nrec = 0;
+ size_t bufsize;
+ void *buf = NULL;
+
+ if (byname == B_TRUE) {
+ kp = kstat_hold_byname(kn->ks_module, kn->ks_instance,
+ kn->ks_name, getzoneid());
+ } else {
+ kp = kstat_hold_bykid(kn->ks_kid, getzoneid());
+ }
+ if (kp == NULL) {
+ return (NULL);
+ }
+ if (kp->ks_flags & KSTAT_FLAG_INVALID) {
+ kstat_rele(kp);
+ return (NULL);
+ }
+
+ bufsize = kp->ks_data_size + 1;
+ kstat_rele(kp);
+
+ /*
+ * The kstat in question is released so that kmem_alloc(KM_SLEEP) is
+ * performed without it held. After the alloc, the kstat is reacquired
+ * and its size is checked again. If the buffer is no longer large
+ * enough, the alloc and check are repeated up to three times.
+ */
+ for (i = 0; i < 2; i++) {
+ buf = kmem_alloc(bufsize, KM_SLEEP);
+
+ /* Check if bufsize still appropriate */
+ if (byname == B_TRUE) {
+ kp = kstat_hold_byname(kn->ks_module, kn->ks_instance,
+ kn->ks_name, getzoneid());
+ } else {
+ kp = kstat_hold_bykid(kn->ks_kid, getzoneid());
+ }
+ if (kp == NULL || kp->ks_flags & KSTAT_FLAG_INVALID) {
+ if (kp != NULL) {
+ kstat_rele(kp);
+ }
+ kmem_free(buf, bufsize);
+ return (NULL);
+ }
+ KSTAT_ENTER(kp);
+ (void) KSTAT_UPDATE(kp, KSTAT_READ);
+ if (bufsize < kp->ks_data_size) {
+ kmem_free(buf, bufsize);
+ buf = NULL;
+ bufsize = kp->ks_data_size + 1;
+ KSTAT_EXIT(kp);
+ kstat_rele(kp);
+ continue;
+ } else {
+ if (KSTAT_SNAPSHOT(kp, buf, KSTAT_READ) != 0) {
+ kmem_free(buf, bufsize);
+ buf = NULL;
+ }
+ nrec = kp->ks_ndata;
+ KSTAT_EXIT(kp);
+ kstat_rele(kp);
+ break;
+ }
+ }
+
+ if (buf != NULL) {
+ *size = bufsize;
+ *num = nrec;
+ }
+ return (buf);
+}
+
+static int
+lxpr_kstat_ifstat(kstat_t *kn, struct lxpr_ifstat *ifs)
+{
+ kstat_named_t *kp;
+ int i, num;
+ size_t size;
+
+ /*
+ * Search by name instead of by kid since there's a small window to
+ * race against kstats being added/removed.
+ */
+ bzero(ifs, sizeof (*ifs));
+ kp = (kstat_named_t *)lxpr_kstat_read(kn, B_TRUE, &size, &num);
+ if (kp == NULL)
+ return (-1);
+ for (i = 0; i < num; i++) {
+ if (strncmp(kp[i].name, "rbytes64", KSTAT_STRLEN) == 0)
+ ifs->rx_bytes = kp[i].value.ui64;
+ else if (strncmp(kp[i].name, "ipackets64", KSTAT_STRLEN) == 0)
+ ifs->rx_packets = kp[i].value.ui64;
+ else if (strncmp(kp[i].name, "ierrors", KSTAT_STRLEN) == 0)
+ ifs->rx_errors = kp[i].value.ui32;
+ else if (strncmp(kp[i].name, "norcvbuf", KSTAT_STRLEN) == 0)
+ ifs->rx_drop = kp[i].value.ui32;
+ else if (strncmp(kp[i].name, "multircv", KSTAT_STRLEN) == 0)
+ ifs->rx_multicast = kp[i].value.ui32;
+ else if (strncmp(kp[i].name, "obytes64", KSTAT_STRLEN) == 0)
+ ifs->tx_bytes = kp[i].value.ui64;
+ else if (strncmp(kp[i].name, "opackets64", KSTAT_STRLEN) == 0)
+ ifs->tx_packets = kp[i].value.ui64;
+ else if (strncmp(kp[i].name, "oerrors", KSTAT_STRLEN) == 0)
+ ifs->tx_errors = kp[i].value.ui32;
+ else if (strncmp(kp[i].name, "noxmtbuf", KSTAT_STRLEN) == 0)
+ ifs->tx_drop = kp[i].value.ui32;
+ else if (strncmp(kp[i].name, "collisions", KSTAT_STRLEN) == 0)
+ ifs->collisions = kp[i].value.ui32;
+ }
+ kmem_free(kp, size);
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_dev(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ kstat_t *ksr;
+ kstat_t ks0;
+ int i, nidx;
+ size_t sidx;
+ struct lxpr_ifstat ifs;
+
+ lxpr_uiobuf_printf(uiobuf, "Inter-| Receive "
+ " | Transmit\n");
+ lxpr_uiobuf_printf(uiobuf, " face |bytes packets errs drop fifo"
+ " frame compressed multicast|bytes packets errs drop fifo"
+ " colls carrier compressed\n");
+
+ ks0.ks_kid = 0;
+ ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
+ if (ksr == NULL)
+ return;
+
+ for (i = 1; i < nidx; i++) {
+ if (strncmp(ksr[i].ks_module, "link", KSTAT_STRLEN) == 0 ||
+ strncmp(ksr[i].ks_module, "lo", KSTAT_STRLEN) == 0) {
+ if (lxpr_kstat_ifstat(&ksr[i], &ifs) != 0)
+ continue;
+
+ /* Overwriting the name is ok in the local snapshot */
+ lx_ifname_convert(ksr[i].ks_name, LX_IF_FROMNATIVE);
+ lxpr_uiobuf_printf(uiobuf, "%6s: %7llu %7llu %4lu "
+ "%4lu %4u %5u %10u %9lu %8llu %7llu %4lu %4lu %4u "
+ "%5lu %7u %10u\n",
+ ksr[i].ks_name,
+ ifs.rx_bytes, ifs.rx_packets,
+ ifs.rx_errors, ifs.rx_drop,
+ 0, 0, 0, ifs.rx_multicast,
+ ifs.tx_bytes, ifs.tx_packets,
+ ifs.tx_errors, ifs.tx_drop,
+ 0, ifs.collisions, 0, 0);
+ }
+ }
+
+ kmem_free(ksr, sidx);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_dev_mcast(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+static void
+lxpr_inet6_out(const in6_addr_t *addr, char buf[33])
+{
+ const uint8_t *ip = addr->s6_addr;
+ char digits[] = "0123456789abcdef";
+ int i;
+ for (i = 0; i < 16; i++) {
+ buf[2 * i] = digits[ip[i] >> 4];
+ buf[2 * i + 1] = digits[ip[i] & 0xf];
+ }
+ buf[32] = '\0';
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_if_inet6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ netstack_t *ns;
+ ip_stack_t *ipst;
+ ill_t *ill;
+ ipif_t *ipif;
+ ill_walk_context_t ctx;
+ char ifname[LIFNAMSIZ], ip6out[33];
+
+ ns = netstack_get_current();
+ if (ns == NULL)
+ return;
+ ipst = ns->netstack_ip;
+
+ rw_enter(&ipst->ips_ill_g_lock, RW_READER);
+ ill = ILL_START_WALK_V6(&ctx, ipst);
+
+ for (; ill != NULL; ill = ill_next(&ctx, ill)) {
+ for (ipif = ill->ill_ipif; ipif != NULL;
+ ipif = ipif->ipif_next) {
+ uint_t index = ill->ill_phyint->phyint_ifindex;
+ int plen = ip_mask_to_plen_v6(&ipif->ipif_v6net_mask);
+ unsigned int scope = lx_ipv6_scope_convert(
+ &ipif->ipif_v6lcl_addr);
+ /* Always report PERMANENT flag */
+ int flag = 0x80;
+
+ (void) snprintf(ifname, LIFNAMSIZ, "%s", ill->ill_name);
+ lx_ifname_convert(ifname, LX_IF_FROMNATIVE);
+ lxpr_inet6_out(&ipif->ipif_v6lcl_addr, ip6out);
+
+ lxpr_uiobuf_printf(uiobuf, "%32s %02x %02x %02x %02x"
+ " %8s\n", ip6out, index, plen, scope, flag, ifname);
+ }
+ }
+ rw_exit(&ipst->ips_ill_g_lock);
+ netstack_rele(ns);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_igmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_ip_mr_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_ip_mr_vif(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+static void
+lxpr_format_route_ipv6(ire_t *ire, lxpr_uiobuf_t *uiobuf)
+{
+ uint32_t flags;
+ char name[IFNAMSIZ];
+ char ipv6addr[33];
+
+ lxpr_inet6_out(&ire->ire_addr_v6, ipv6addr);
+ lxpr_uiobuf_printf(uiobuf, "%s %02x ", ipv6addr,
+ ip_mask_to_plen_v6(&ire->ire_mask_v6));
+
+ /* punt on this for now */
+ lxpr_uiobuf_printf(uiobuf, "%s %02x ",
+ "00000000000000000000000000000000", 0);
+
+ lxpr_inet6_out(&ire->ire_gateway_addr_v6, ipv6addr);
+ lxpr_uiobuf_printf(uiobuf, "%s", ipv6addr);
+
+ flags = ire->ire_flags &
+ (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED);
+ /* Linux's RTF_LOCAL equivalent */
+ if (ire->ire_metrics.iulp_local)
+ flags |= 0x80000000;
+
+ if (ire->ire_ill != NULL) {
+ ill_get_name(ire->ire_ill, name, sizeof (name));
+ lx_ifname_convert(name, LX_IF_FROMNATIVE);
+ } else {
+ name[0] = '\0';
+ }
+
+ lxpr_uiobuf_printf(uiobuf, " %08x %08x %08x %08x %8s\n",
+ 0, /* metric */
+ ire->ire_refcnt,
+ 0,
+ flags,
+ name);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_ipv6_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ netstack_t *ns;
+ ip_stack_t *ipst;
+
+ ns = netstack_get_current();
+ if (ns == NULL)
+ return;
+ ipst = ns->netstack_ip;
+
+ /*
+ * LX branded zones are expected to have exclusive IP stack, hence
+ * using ALL_ZONES as the zoneid filter.
+ */
+ ire_walk_v6(&lxpr_format_route_ipv6, uiobuf, ALL_ZONES, ipst);
+
+ netstack_rele(ns);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_mcfilter(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_netstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_raw(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+#define LXPR_SKIP_ROUTE(type) \
+ (((IRE_IF_CLONE | IRE_BROADCAST | IRE_MULTICAST | \
+ IRE_NOROUTE | IRE_LOOPBACK | IRE_LOCAL) & type) != 0)
+
+static void
+lxpr_format_route_ipv4(ire_t *ire, lxpr_uiobuf_t *uiobuf)
+{
+ uint32_t flags;
+ char name[IFNAMSIZ];
+ ill_t *ill;
+ ire_t *nire;
+ ipif_t *ipif;
+ ipaddr_t gateway;
+
+ if (LXPR_SKIP_ROUTE(ire->ire_type) || ire->ire_testhidden != 0)
+ return;
+
+ /* These route flags have direct Linux equivalents */
+ flags = ire->ire_flags &
+ (RTF_UP|RTF_GATEWAY|RTF_HOST|RTF_DYNAMIC|RTF_MODIFIED);
+
+ /*
+ * Search for a suitable IRE for naming purposes.
+ * On Linux, the default route is typically associated with the
+ * interface used to access gateway. The default IRE on Illumos
+ * typically lacks an ill reference but its parent might have one.
+ */
+ nire = ire;
+ do {
+ ill = nire->ire_ill;
+ nire = nire->ire_dep_parent;
+ } while (ill == NULL && nire != NULL);
+ if (ill != NULL) {
+ ill_get_name(ill, name, sizeof (name));
+ lx_ifname_convert(name, LX_IF_FROMNATIVE);
+ } else {
+ name[0] = '*';
+ name[1] = '\0';
+ }
+
+ /*
+ * Linux suppresses the gateway address for directly connected
+ * interface networks. To emulate this behavior, we walk all addresses
+ * of a given route interface. If one matches the gateway, it is
+ * displayed as NULL.
+ */
+ gateway = ire->ire_gateway_addr;
+ if ((ill = ire->ire_ill) != NULL) {
+ for (ipif = ill->ill_ipif; ipif != NULL;
+ ipif = ipif->ipif_next) {
+ if (ipif->ipif_lcl_addr == gateway) {
+ gateway = 0;
+ break;
+ }
+ }
+ }
+
+ lxpr_uiobuf_printf(uiobuf, "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
+ "%d\t%08X\t%d\t%u\t%u\n",
+ name,
+ ire->ire_addr,
+ gateway,
+ flags, 0, 0,
+ 0, /* priority */
+ ire->ire_mask,
+ 0, 0, /* mss, window */
+ ire->ire_metrics.iulp_rtt);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ netstack_t *ns;
+ ip_stack_t *ipst;
+
+ lxpr_uiobuf_printf(uiobuf, "Iface\tDestination\tGateway \tFlags\t"
+ "RefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT\n");
+
+ ns = netstack_get_current();
+ if (ns == NULL)
+ return;
+ ipst = ns->netstack_ip;
+
+ /*
+ * LX branded zones are expected to have exclusive IP stack, hence
+ * using ALL_ZONES as the zoneid filter.
+ */
+ ire_walk_v4(&lxpr_format_route_ipv4, uiobuf, ALL_ZONES, ipst);
+
+ netstack_rele(ns);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_rpc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_rt_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_sockstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+typedef struct lxpr_snmp_table {
+ const char *lst_proto;
+ const char *lst_fields[];
+} lxpr_snmp_table_t;
+
+static lxpr_snmp_table_t lxpr_snmp_ip = { "ip",
+ {
+ "forwarding", "defaultTTL", "inReceives", "inHdrErrors",
+ "inAddrErrors", "forwDatagrams", "inUnknownProtos", "inDiscards",
+ "inDelivers", "outRequests", "outDiscards", "outNoRoutes",
+ "reasmTimeout", "reasmReqds", "reasmOKs", "reasmFails", "fragOKs",
+ "fragFails", "fragCreates",
+ NULL
+ }
+};
+static lxpr_snmp_table_t lxpr_snmp_icmp = { "icmp",
+ {
+ "inMsgs", "inErrors", "inCsumErrors", "inDestUnreachs", "inTimeExcds",
+ "inParmProbs", "inSrcQuenchs", "inRedirects", "inEchos", "inEchoReps",
+ "inTimestamps", "inTimestampReps", "inAddrMasks", "inAddrMaskReps",
+ "outMsgs", "outErrors", "outDestUnreachs", "outTimeExcds",
+ "outParmProbs", "outSrcQuenchs", "outRedirects", "outEchos",
+ "outEchoReps", "outTimestamps", "outTimestampReps", "outAddrMasks",
+ "outAddrMaskReps",
+ NULL
+ }
+};
+static lxpr_snmp_table_t lxpr_snmp_tcp = { "tcp",
+ {
+ "rtoAlgorithm", "rtoMin", "rtoMax", "maxConn", "activeOpens",
+ "passiveOpens", "attemptFails", "estabResets", "currEstab", "inSegs",
+ "outSegs", "retransSegs", "inErrs", "outRsts", "inCsumErrors",
+ NULL
+ }
+};
+static lxpr_snmp_table_t lxpr_snmp_udp = { "udp",
+ {
+ "inDatagrams", "noPorts", "inErrors", "outDatagrams", "rcvbufErrors",
+ "sndbufErrors", "inCsumErrors",
+ NULL
+ }
+};
+
+static lxpr_snmp_table_t *lxpr_net_snmptab[] = {
+ &lxpr_snmp_ip,
+ &lxpr_snmp_icmp,
+ &lxpr_snmp_tcp,
+ &lxpr_snmp_udp,
+ NULL
+};
+
+static void
+lxpr_kstat_print_tab(lxpr_uiobuf_t *uiobuf, lxpr_snmp_table_t *table,
+ kstat_t *kn)
+{
+ kstat_named_t *klist;
+ char upname[KSTAT_STRLEN], upfield[KSTAT_STRLEN];
+ int i, j, num;
+ size_t size;
+
+ klist = (kstat_named_t *)lxpr_kstat_read(kn, B_TRUE, &size, &num);
+ if (klist == NULL)
+ return;
+
+ /* Print the header line, fields capitalized */
+ (void) strncpy(upname, table->lst_proto, KSTAT_STRLEN);
+ upname[0] = toupper(upname[0]);
+ lxpr_uiobuf_printf(uiobuf, "%s:", upname);
+ for (i = 0; table->lst_fields[i] != NULL; i++) {
+ (void) strncpy(upfield, table->lst_fields[i], KSTAT_STRLEN);
+ upfield[0] = toupper(upfield[0]);
+ lxpr_uiobuf_printf(uiobuf, " %s", upfield);
+ }
+ lxpr_uiobuf_printf(uiobuf, "\n%s:", upname);
+
+ /* Then loop back through to print the value line. */
+ for (i = 0; table->lst_fields[i] != NULL; i++) {
+ kstat_named_t *kpoint = NULL;
+ for (j = 0; j < num; j++) {
+ if (strncmp(klist[j].name, table->lst_fields[i],
+ KSTAT_STRLEN) == 0) {
+ kpoint = &klist[j];
+ break;
+ }
+ }
+ if (kpoint == NULL) {
+ /* Output 0 for unknown fields */
+ lxpr_uiobuf_printf(uiobuf, " 0");
+ } else {
+ switch (kpoint->data_type) {
+ case KSTAT_DATA_INT32:
+ lxpr_uiobuf_printf(uiobuf, " %d",
+ kpoint->value.i32);
+ break;
+ case KSTAT_DATA_UINT32:
+ lxpr_uiobuf_printf(uiobuf, " %u",
+ kpoint->value.ui32);
+ break;
+ case KSTAT_DATA_INT64:
+ lxpr_uiobuf_printf(uiobuf, " %ld",
+ kpoint->value.l);
+ break;
+ case KSTAT_DATA_UINT64:
+ lxpr_uiobuf_printf(uiobuf, " %lu",
+ kpoint->value.ul);
+ break;
+ }
+ }
+ }
+ lxpr_uiobuf_printf(uiobuf, "\n");
+ kmem_free(klist, size);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_snmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ kstat_t *ksr;
+ kstat_t ks0;
+ lxpr_snmp_table_t **table = lxpr_net_snmptab;
+ int i, t, nidx;
+ size_t sidx;
+
+ ks0.ks_kid = 0;
+ ksr = (kstat_t *)lxpr_kstat_read(&ks0, B_FALSE, &sidx, &nidx);
+ if (ksr == NULL)
+ return;
+
+ for (t = 0; table[t] != NULL; t++) {
+ for (i = 0; i < nidx; i++) {
+ if (strncmp(ksr[i].ks_class, "mib2", KSTAT_STRLEN) != 0)
+ continue;
+ if (strncmp(ksr[i].ks_name, table[t]->lst_proto,
+ KSTAT_STRLEN) == 0) {
+ lxpr_kstat_print_tab(uiobuf, table[t], &ksr[i]);
+ break;
+ }
+ }
+ }
+ kmem_free(ksr, sidx);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+static int
+lxpr_convert_tcp_state(int st)
+{
+ /*
+ * Derived from the enum located in the Linux kernel sources:
+ * include/net/tcp_states.h
+ */
+ switch (st) {
+ case TCPS_ESTABLISHED:
+ return (1);
+ case TCPS_SYN_SENT:
+ return (2);
+ case TCPS_SYN_RCVD:
+ return (3);
+ case TCPS_FIN_WAIT_1:
+ return (4);
+ case TCPS_FIN_WAIT_2:
+ return (5);
+ case TCPS_TIME_WAIT:
+ return (6);
+ case TCPS_CLOSED:
+ return (7);
+ case TCPS_CLOSE_WAIT:
+ return (8);
+ case TCPS_LAST_ACK:
+ return (9);
+ case TCPS_LISTEN:
+ return (10);
+ case TCPS_CLOSING:
+ return (11);
+ default:
+ /* No translation for TCPS_IDLE, TCPS_BOUND or anything else */
+ return (0);
+ }
+}
+
+static void
+lxpr_format_tcp(lxpr_uiobuf_t *uiobuf, ushort_t ipver)
+{
+ int i, sl = 0;
+ connf_t *connfp;
+ conn_t *connp;
+ netstack_t *ns;
+ ip_stack_t *ipst;
+
+ ASSERT(ipver == IPV4_VERSION || ipver == IPV6_VERSION);
+ if (ipver == IPV4_VERSION) {
+ lxpr_uiobuf_printf(uiobuf, " sl local_address rem_address "
+ "st tx_queue rx_queue tr tm->when retrnsmt uid timeout "
+ "inode\n");
+ } else {
+ lxpr_uiobuf_printf(uiobuf, " sl "
+ "local_address "
+ "remote_address "
+ "st tx_queue rx_queue tr tm->when retrnsmt "
+ "uid timeout inode\n");
+ }
+ /*
+ * Due to differences between the Linux and illumos TCP
+ * implementations, some data will be omitted from the output here.
+ *
+ * Valid fields:
+ * - local_address
+ * - remote_address
+ * - st
+ * - tx_queue
+ * - rx_queue
+ * - uid
+ * - inode
+ *
+ * Omitted/invalid fields
+ * - tr
+ * - tm->when
+ * - retrnsmt
+ * - timeout
+ */
+
+ ns = netstack_get_current();
+ if (ns == NULL)
+ return;
+ ipst = ns->netstack_ip;
+
+ for (i = 0; i < CONN_G_HASH_SIZE; i++) {
+ connfp = &ipst->ips_ipcl_globalhash_fanout[i];
+ connp = NULL;
+ while ((connp =
+ ipcl_get_next_conn(connfp, connp, IPCL_TCPCONN)) != NULL) {
+ tcp_t *tcp;
+ vattr_t attr;
+ sonode_t *so = (sonode_t *)connp->conn_upper_handle;
+ vnode_t *vp = (so != NULL) ? so->so_vnode : NULL;
+ if (connp->conn_ipversion != ipver)
+ continue;
+ tcp = connp->conn_tcp;
+ if (ipver == IPV4_VERSION) {
+ lxpr_uiobuf_printf(uiobuf,
+ "%4d: %08X:%04X %08X:%04X ",
+ ++sl,
+ connp->conn_laddr_v4,
+ ntohs(connp->conn_lport),
+ connp->conn_faddr_v4,
+ ntohs(connp->conn_fport));
+ } else {
+ lxpr_uiobuf_printf(uiobuf, "%4d: "
+ "%08X%08X%08X%08X:%04X "
+ "%08X%08X%08X%08X:%04X ",
+ ++sl,
+ connp->conn_laddr_v6.s6_addr32[0],
+ connp->conn_laddr_v6.s6_addr32[1],
+ connp->conn_laddr_v6.s6_addr32[2],
+ connp->conn_laddr_v6.s6_addr32[3],
+ ntohs(connp->conn_lport),
+ connp->conn_faddr_v6.s6_addr32[0],
+ connp->conn_faddr_v6.s6_addr32[1],
+ connp->conn_faddr_v6.s6_addr32[2],
+ connp->conn_faddr_v6.s6_addr32[3],
+ ntohs(connp->conn_fport));
+ }
+
+ /* fetch the simulated inode for the socket */
+ if (vp == NULL ||
+ VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0)
+ attr.va_nodeid = 0;
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%02X %08X:%08X %02X:%08X %08X "
+ "%5u %8d %lu %d %p %u %u %u %u %d\n",
+ lxpr_convert_tcp_state(tcp->tcp_state),
+ tcp->tcp_rcv_cnt, tcp->tcp_unsent, /* rx/tx queue */
+ 0, 0, /* tr, when */
+ 0, /* per-connection rexmits aren't tracked today */
+ connp->conn_cred->cr_uid,
+ 0, /* timeout */
+ /* inode + more */
+ (ino_t)attr.va_nodeid, 0, NULL, 0, 0, 0, 0, 0);
+ }
+ }
+ netstack_rele(ns);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_tcp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_format_tcp(uiobuf, IPV4_VERSION);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_tcp6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_format_tcp(uiobuf, IPV6_VERSION);
+}
+
+static void
+lxpr_format_udp(lxpr_uiobuf_t *uiobuf, ushort_t ipver)
+{
+ int i, sl = 0;
+ connf_t *connfp;
+ conn_t *connp;
+ netstack_t *ns;
+ ip_stack_t *ipst;
+
+ ASSERT(ipver == IPV4_VERSION || ipver == IPV6_VERSION);
+ if (ipver == IPV4_VERSION) {
+ lxpr_uiobuf_printf(uiobuf, " sl local_address rem_address"
+ " st tx_queue rx_queue tr tm->when retrnsmt uid"
+ " timeout inode ref pointer drops\n");
+ } else {
+ lxpr_uiobuf_printf(uiobuf, " sl "
+ "local_address "
+ "remote_address "
+ "st tx_queue rx_queue tr tm->when retrnsmt "
+ "uid timeout inode ref pointer drops\n");
+ }
+ /*
+ * Due to differences between the Linux and illumos UDP
+ * implementations, some data will be omitted from the output here.
+ *
+ * Valid fields:
+ * - local_address
+ * - remote_address
+ * - st: limited
+ * - uid
+ *
+ * Omitted/invalid fields
+ * - tx_queue
+ * - rx_queue
+ * - tr
+ * - tm->when
+ * - retrnsmt
+ * - timeout
+ * - inode
+ */
+
+ ns = netstack_get_current();
+ if (ns == NULL)
+ return;
+ ipst = ns->netstack_ip;
+
+ for (i = 0; i < CONN_G_HASH_SIZE; i++) {
+ connfp = &ipst->ips_ipcl_globalhash_fanout[i];
+ connp = NULL;
+ while ((connp =
+ ipcl_get_next_conn(connfp, connp, IPCL_UDPCONN)) != NULL) {
+ udp_t *udp;
+ int state = 0;
+ vattr_t attr;
+ sonode_t *so = (sonode_t *)connp->conn_upper_handle;
+ vnode_t *vp = (so != NULL) ? so->so_vnode : NULL;
+ if (connp->conn_ipversion != ipver)
+ continue;
+ udp = connp->conn_udp;
+ if (ipver == IPV4_VERSION) {
+ lxpr_uiobuf_printf(uiobuf,
+ "%4d: %08X:%04X %08X:%04X ",
+ ++sl,
+ connp->conn_laddr_v4,
+ ntohs(connp->conn_lport),
+ connp->conn_faddr_v4,
+ ntohs(connp->conn_fport));
+ } else {
+ lxpr_uiobuf_printf(uiobuf, "%4d: "
+ "%08X%08X%08X%08X:%04X "
+ "%08X%08X%08X%08X:%04X ",
+ ++sl,
+ connp->conn_laddr_v6.s6_addr32[0],
+ connp->conn_laddr_v6.s6_addr32[1],
+ connp->conn_laddr_v6.s6_addr32[2],
+ connp->conn_laddr_v6.s6_addr32[3],
+ ntohs(connp->conn_lport),
+ connp->conn_faddr_v6.s6_addr32[0],
+ connp->conn_faddr_v6.s6_addr32[1],
+ connp->conn_faddr_v6.s6_addr32[2],
+ connp->conn_faddr_v6.s6_addr32[3],
+ ntohs(connp->conn_fport));
+ }
+
+ switch (udp->udp_state) {
+ case TS_UNBND:
+ case TS_IDLE:
+ state = 7;
+ break;
+ case TS_DATA_XFER:
+ state = 1;
+ break;
+ }
+
+ /* fetch the simulated inode for the socket */
+ if (vp == NULL ||
+ VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0)
+ attr.va_nodeid = 0;
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%02X %08X:%08X %02X:%08X %08X "
+ "%5u %8d %lu %d %p %d\n",
+ state,
+ 0, 0, /* rx/tx queue */
+ 0, 0, /* tr, when */
+ 0, /* retrans */
+ connp->conn_cred->cr_uid,
+ 0, /* timeout */
+ /* inode, ref, pointer, drops */
+ (ino_t)attr.va_nodeid, 0, NULL, 0);
+ }
+ }
+ netstack_rele(ns);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_udp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_format_udp(uiobuf, IPV4_VERSION);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_udp6(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_format_udp(uiobuf, IPV6_VERSION);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_unix(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ sonode_t *so;
+ zoneid_t zoneid = getzoneid();
+
+ lxpr_uiobuf_printf(uiobuf, "Num RefCount Protocol Flags Type "
+ "St Inode Path\n");
+
+ mutex_enter(&socklist.sl_lock);
+ for (so = socklist.sl_list; so != NULL;
+ so = _SOTOTPI(so)->sti_next_so) {
+ vnode_t *vp = so->so_vnode;
+ vattr_t attr;
+ sotpi_info_t *sti;
+ const char *name = NULL;
+ int status = 0;
+ int type = 0;
+ int flags = 0;
+
+ /* Only process active sonodes in this zone */
+ if (so->so_count == 0 || so->so_zoneid != zoneid)
+ continue;
+
+ /*
+ * Grab the inode, if possible.
+ * This must be done before entering so_lock.
+ */
+ if (vp == NULL ||
+ VOP_GETATTR(vp, &attr, 0, CRED(), NULL) != 0)
+ attr.va_nodeid = 0;
+
+ mutex_enter(&so->so_lock);
+ sti = _SOTOTPI(so);
+
+ if (sti->sti_laddr_sa != NULL &&
+ sti->sti_laddr_len > 0) {
+ name = sti->sti_laddr_sa->sa_data;
+ } else if (sti->sti_faddr_sa != NULL &&
+ sti->sti_faddr_len > 0) {
+ name = sti->sti_faddr_sa->sa_data;
+ }
+
+ /*
+ * Derived from enum values in Linux kernel source:
+ * include/uapi/linux/net.h
+ */
+ if ((so->so_state & SS_ISDISCONNECTING) != 0) {
+ status = 4;
+ } else if ((so->so_state & SS_ISCONNECTING) != 0) {
+ status = 2;
+ } else if ((so->so_state & SS_ISCONNECTED) != 0) {
+ status = 3;
+ } else {
+ status = 1;
+ /* Add ACC flag for stream-type server sockets */
+ if (so->so_type != SOCK_DGRAM &&
+ sti->sti_laddr_sa != NULL)
+ flags |= 0x10000;
+ }
+
+ /* Convert to Linux type */
+ switch (so->so_type) {
+ case SOCK_DGRAM:
+ type = 2;
+ break;
+ case SOCK_SEQPACKET:
+ type = 5;
+ break;
+ default:
+ type = 1;
+ }
+
+ lxpr_uiobuf_printf(uiobuf, "%p: %08X %08X %08X %04X %02X %5llu",
+ so,
+ so->so_count,
+ 0, /* proto, always 0 */
+ flags,
+ type,
+ status,
+ (ino_t)attr.va_nodeid);
+
+ /*
+ * Due to shortcomings in the abstract socket emulation, they
+ * cannot be properly represented here (as @<path>).
+ *
+ * This will be the case until they are better implemented.
+ */
+ if (name != NULL)
+ lxpr_uiobuf_printf(uiobuf, " %s\n", name);
+ else
+ lxpr_uiobuf_printf(uiobuf, "\n");
+ mutex_exit(&so->so_lock);
+ }
+ mutex_exit(&socklist.sl_lock);
+}
+
+/*
+ * lxpr_read_kmsg(): read the contents of the kernel message queue. We
+ * translate this into the reception of console messages for this zone; each
+ * read copies out a single zone console message, or blocks until the next one
+ * is produced, unless we're open non-blocking, in which case we return after
+ * 1ms.
+ */
+
+#define LX_KMSG_PRI "<0>"
+
+static void
+lxpr_read_kmsg(lxpr_node_t *lxpnp, struct lxpr_uiobuf *uiobuf, ldi_handle_t lh)
+{
+ mblk_t *mp;
+ timestruc_t to;
+ timestruc_t *tp = NULL;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_KMSG);
+
+ if (lxpr_uiobuf_nonblock(uiobuf)) {
+ to.tv_sec = 0;
+ to.tv_nsec = 1000000; /* 1msec */
+ tp = &to;
+ }
+
+ if (ldi_getmsg(lh, &mp, tp) == 0) {
+ /*
+ * lx procfs doesn't like successive reads to the same file
+ * descriptor unless we do an explicit rewind each time.
+ */
+ lxpr_uiobuf_seek(uiobuf, 0);
+
+ lxpr_uiobuf_printf(uiobuf, "%s%s", LX_KMSG_PRI,
+ mp->b_cont->b_rptr);
+
+ freemsg(mp);
+ }
+}
+
+/*
+ * lxpr_read_loadavg(): read the contents of the "loadavg" file. We do just
+ * enough for uptime and other simple lxproc readers to work
+ */
+extern int nthread;
+
+static void
+lxpr_read_loadavg(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ulong_t avenrun1;
+ ulong_t avenrun5;
+ ulong_t avenrun15;
+ ulong_t avenrun1_cs;
+ ulong_t avenrun5_cs;
+ ulong_t avenrun15_cs;
+ int loadavg[3];
+ int *loadbuf;
+ cpupart_t *cp;
+ zone_t *zone = LXPTOZ(lxpnp);
+
+ uint_t nrunnable = 0;
+ rctl_qty_t nlwps;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_LOADAVG);
+
+ mutex_enter(&cpu_lock);
+
+ /*
+ * Need to add up values over all CPU partitions. If pools are active,
+ * only report the values of the zone's partition, which by definition
+ * includes the current CPU.
+ */
+ if (pool_pset_enabled()) {
+ psetid_t psetid = zone_pset_get(curproc->p_zone);
+
+ ASSERT(curproc->p_zone != &zone0);
+ cp = CPU->cpu_part;
+
+ nrunnable = cp->cp_nrunning + cp->cp_nrunnable;
+ (void) cpupart_get_loadavg(psetid, &loadavg[0], 3);
+ loadbuf = &loadavg[0];
+ } else {
+ cp = cp_list_head;
+ do {
+ nrunnable += cp->cp_nrunning + cp->cp_nrunnable;
+ } while ((cp = cp->cp_next) != cp_list_head);
+
+ loadbuf = zone == global_zone ?
+ &avenrun[0] : zone->zone_avenrun;
+ }
+
+ /*
+ * If we're in the non-global zone, we'll report the total number of
+ * LWPs in the zone for the "nproc" parameter of /proc/loadavg,
+ * otherwise will just use nthread (which will include kernel threads,
+ * but should be good enough for lxproc).
+ */
+ nlwps = zone == global_zone ? nthread : zone->zone_nlwps;
+
+ mutex_exit(&cpu_lock);
+
+ avenrun1 = loadbuf[0] >> FSHIFT;
+ avenrun1_cs = ((loadbuf[0] & (FSCALE-1)) * 100) >> FSHIFT;
+ avenrun5 = loadbuf[1] >> FSHIFT;
+ avenrun5_cs = ((loadbuf[1] & (FSCALE-1)) * 100) >> FSHIFT;
+ avenrun15 = loadbuf[2] >> FSHIFT;
+ avenrun15_cs = ((loadbuf[2] & (FSCALE-1)) * 100) >> FSHIFT;
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%ld.%02d %ld.%02d %ld.%02d %d/%d %d\n",
+ avenrun1, avenrun1_cs,
+ avenrun5, avenrun5_cs,
+ avenrun15, avenrun15_cs,
+ nrunnable, nlwps, 0);
+}
+
+/*
+ * lxpr_read_meminfo(): read the contents of the "meminfo" file.
+ */
+static void
+lxpr_read_meminfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ zone_t *zone = LXPTOZ(lxpnp);
+ int global = zone == global_zone;
+ long total_mem, free_mem, total_swap;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_MEMINFO);
+
+ if (global || zone->zone_phys_mem_ctl == UINT64_MAX) {
+ total_mem = physmem * PAGESIZE;
+ free_mem = freemem * PAGESIZE;
+ } else {
+ total_mem = zone->zone_phys_mem_ctl;
+ free_mem = zone->zone_phys_mem_ctl - zone->zone_phys_mem;
+ }
+
+ if (global || zone->zone_max_swap_ctl == UINT64_MAX) {
+ total_swap = k_anoninfo.ani_max * PAGESIZE;
+ } else {
+ mutex_enter(&zone->zone_mem_lock);
+ total_swap = zone->zone_max_swap_ctl;
+ mutex_exit(&zone->zone_mem_lock);
+ }
+
+ /*
+ * SwapFree
+ * On illumos we reserve swap up front, whereas on Linux they just
+ * wing it and kill a random process if they run out of backing store
+ * for virtual memory. Our swap reservation doesn't translate to that
+ * model, so just inform the caller that no swap is being used.
+ */
+ lxpr_uiobuf_printf(uiobuf,
+ "MemTotal: %8lu kB\n"
+ "MemFree: %8lu kB\n"
+ "MemShared: %8u kB\n"
+ "Buffers: %8u kB\n"
+ "Cached: %8u kB\n"
+ "SwapCached:%8u kB\n"
+ "Active: %8u kB\n"
+ "Inactive: %8u kB\n"
+ "HighTotal: %8u kB\n"
+ "HighFree: %8u kB\n"
+ "LowTotal: %8u kB\n"
+ "LowFree: %8u kB\n"
+ "SwapTotal: %8lu kB\n"
+ "SwapFree: %8lu kB\n",
+ btok(total_mem), /* MemTotal */
+ btok(free_mem), /* MemFree */
+ 0, /* MemShared */
+ 0, /* Buffers */
+ 0, /* Cached */
+ 0, /* SwapCached */
+ 0, /* Active */
+ 0, /* Inactive */
+ 0, /* HighTotal */
+ 0, /* HighFree */
+ btok(total_mem), /* LowTotal */
+ btok(free_mem), /* LowFree */
+ btok(total_swap), /* SwapTotal */
+ btok(total_swap)); /* SwapFree */
+}
+
+/*
+ * lxpr_read_mounts():
+ */
+/* ARGSUSED */
+static void
+lxpr_read_mounts(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ zone_t *zone = LXPTOZ(lxpnp);
+ list_t *mounts;
+ lxpr_mount_entry_t *lme;
+
+ mounts = lxpr_enumerate_mounts(zone);
+
+ /*
+ * now we can run through what we've extracted without holding
+ * vfs_list_read_lock()
+ */
+ lme = list_remove_head(mounts);
+ while (lme != NULL) {
+ char *resource, *mntpt, *fstype, *rwflag;
+ vnode_t *vp;
+ int error;
+
+ mntpt = (char *)refstr_value(lme->lme_mntpt);
+ resource = (char *)refstr_value(lme->lme_resource);
+
+ if (mntpt == NULL || mntpt[0] == '\0') {
+ goto nextp;
+ }
+ mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
+ error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
+ if (error != 0) {
+ goto nextp;
+ } else if ((vp->v_flag & VROOT) == 0 && !lme->lme_force) {
+ VN_RELE(vp);
+ goto nextp;
+ }
+ VN_RELE(vp);
+
+ if (resource != NULL && resource[0] != '\0') {
+ if (resource[0] == '/') {
+ resource = ZONE_PATH_VISIBLE(resource, zone) ?
+ ZONE_PATH_TRANSLATE(resource, zone) : mntpt;
+ }
+ } else {
+ resource = "none";
+ }
+
+ /* Make things look more like Linux. */
+ fstype = vfssw[lme->lme_fstype].vsw_name;
+ if (lxpr_clean_mntent(&mntpt, &fstype, &resource) != 0 &&
+ !lme->lme_force) {
+ goto nextp;
+ }
+ rwflag = ((lme->lme_flag & VFS_RDONLY) == 0) ? "rw" : "ro";
+
+ lxpr_uiobuf_printf(uiobuf, "%s %s %s %s 0 0\n",
+ resource, mntpt, fstype, rwflag);
+
+nextp:
+ refstr_rele(lme->lme_mntpt);
+ refstr_rele(lme->lme_resource);
+ kmem_free(lme, sizeof (lxpr_mount_entry_t));
+ lme = list_remove_head(mounts);
+ }
+
+ list_destroy(mounts);
+ kmem_free(mounts, sizeof (list_t));
+}
+
+/*
+ * lxpr_read_partitions():
+ *
+ * Over the years, /proc/partitions has been made considerably smaller -- to
+ * the point that it really is only major number, minor number, number of
+ * blocks (which we report as 0), and partition name.
+ *
+ * We support this because some things want to see it to make sense of
+ * /proc/diskstats, and also because "fdisk -l" and a few other things look
+ * here to find all disks on the system.
+ */
+/* ARGSUSED */
+static void
+lxpr_read_partitions(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lx_zone_data_t *lxzd;
+ lx_virt_disk_t *vd;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PARTITIONS);
+
+ lxpr_uiobuf_printf(uiobuf, "major minor #blocks name\n\n");
+
+ lxzd = ztolxzd(curproc->p_zone);
+ if (lxzd == NULL)
+ return;
+ ASSERT(lxzd->lxzd_vdisks != NULL);
+
+ vd = list_head(lxzd->lxzd_vdisks);
+ while (vd != NULL) {
+ lxpr_uiobuf_printf(uiobuf, "%4d %7d %10d %s\n",
+ getmajor(vd->lxvd_emul_dev), getminor(vd->lxvd_emul_dev),
+ 0, vd->lxvd_name);
+ vd = list_next(lxzd->lxzd_vdisks, vd);
+ }
+}
+
+/*
+ * There aren't many actual devices inside a zone but we want to provide the
+ * major numbers for the pseudo devices that do exist, including our pts/ptm
+ * device, as well as the zvol virtual disk device. We simply hardcode the
+ * emulated major numbers that are used elsewhere in the code and that match
+ * the expected Linux major numbers. See lx devfs where some of the major
+ * numbers have no defined constants.
+ */
+/* ARGSUSED */
+static void
+lxpr_read_devices(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_DEVICES);
+
+ lxpr_uiobuf_printf(uiobuf, "Character devices:\n");
+ lxpr_uiobuf_printf(uiobuf, "%3d /dev/tty\n", LX_TTY_MAJOR);
+ lxpr_uiobuf_printf(uiobuf, "%3d /dev/console\n", LX_TTY_MAJOR);
+ lxpr_uiobuf_printf(uiobuf, "%3d /dev/ptmx\n", LX_TTY_MAJOR);
+ lxpr_uiobuf_printf(uiobuf, "%3d ptm\n", LX_PTM_MAJOR);
+ lxpr_uiobuf_printf(uiobuf, "%3d pts\n", LX_PTS_MAJOR_MIN);
+
+ lxpr_uiobuf_printf(uiobuf, "\nBlock devices:\n");
+ lxpr_uiobuf_printf(uiobuf, "%3d zvol\n", LX_MAJOR_DISK);
+}
+
+/*
+ * lxpr_read_diskstats():
+ *
+ * See the block comment above the per-device output-generating line for the
+ * details of the format.
+ */
+/* ARGSUSED */
+static void
+lxpr_read_diskstats(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ zone_t *zone = LXPTOZ(lxpnp);
+ lx_zone_data_t *lxzd;
+ kstat_t kn;
+ int num;
+ zone_vfs_kstat_t *kip;
+ size_t size;
+ lx_virt_disk_t *vd;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_DISKSTATS);
+
+ lxzd = ztolxzd(zone);
+ if (lxzd == NULL)
+ return;
+ ASSERT(lxzd->lxzd_vdisks != NULL);
+
+ /*
+ * Use the zone_vfs kstat, which is a superset of a kstat_io_t, since
+ * it tracks IO at the zone level.
+ */
+ strlcpy(kn.ks_module, "zone_vfs", sizeof (kn.ks_module));
+ strlcpy(kn.ks_name, zone->zone_name, sizeof (kn.ks_name));
+ kn.ks_instance = getzoneid();
+
+ kip = (zone_vfs_kstat_t *)lxpr_kstat_read(&kn, B_TRUE, &size, &num);
+ if (kip == NULL)
+ return;
+
+ if (size < sizeof (kstat_io_t)) {
+ kmem_free(kip, size);
+ return;
+ }
+
+ /*
+ * Because the zone vfs stats are tracked at the zone level we use
+ * the same kstat for the zone's virtual disk (the zpool) and any
+ * zvols that might also visible within the zone.
+ */
+ vd = list_head(lxzd->lxzd_vdisks);
+ while (vd != NULL) {
+ /*
+ * /proc/diskstats is defined to have one line of output for
+ * each block device, with each line containing the following
+ * 14 fields:
+ *
+ * 1 - major number
+ * 2 - minor mumber
+ * 3 - device name
+ * 4 - reads completed successfully
+ * 5 - reads merged
+ * 6 - sectors read
+ * 7 - time spent reading (ms)
+ * 8 - writes completed
+ * 9 - writes merged
+ * 10 - sectors written
+ * 11 - time spent writing (ms)
+ * 12 - I/Os currently in progress
+ * 13 - time spent doing I/Os (ms)
+ * 14 - weighted time spent doing I/Os (ms)
+ *
+ * One small hiccup: we don't actually keep track of time
+ * spent reading vs. time spent writing -- we keep track of
+ * time waiting vs. time actually performing I/O. While we
+ * could divide the total time by the I/O mix (making the
+ * obviously wrong assumption that I/O operations all take the
+ * same amount of time), this has the undesirable side-effect
+ * of moving backwards. Instead, we report the total time
+ * (read + write) for all three stats (read, write, total).
+ * This is also a lie of sorts, but it should be more
+ * immediately clear to the user that reads and writes are
+ * each being double-counted as the other.
+ *
+ * Since certain consumers interpret the major/minor numbers to
+ * infer device names, some translation is required to avoid
+ * output which results in totally unexpected results.
+ */
+
+ lxpr_uiobuf_printf(uiobuf, "%4d %7d %s ",
+ getmajor(vd->lxvd_emul_dev),
+ getminor(vd->lxvd_emul_dev),
+ vd->lxvd_name);
+
+ if (vd->lxvd_type == LXVD_ZFS_DS) {
+ /*
+ * Use the zone-wide vfs stats for any zfs datasets
+ * represented via virtual devices.
+ */
+#define KV(N) kip->zv_ ## N.value.ui64
+#define NS_PER_MS (uint64_t)(NANOSEC / MILLISEC)
+ lxpr_uiobuf_printf(uiobuf,
+ "%llu %llu %llu %llu "
+ "%llu %llu %llu %llu "
+ "%llu %llu %llu\n",
+ (uint64_t)KV(reads), 0LL,
+ KV(nread) / (uint64_t)LXPR_SECTOR_SIZE,
+ (KV(rtime) + KV(wtime)) / NS_PER_MS,
+ (uint64_t)KV(writes), 0LL,
+ KV(nwritten) / (uint64_t)LXPR_SECTOR_SIZE,
+ (KV(rtime) + KV(wtime)) / NS_PER_MS,
+ (uint64_t)(KV(rcnt) + KV(wcnt)),
+ (KV(rtime) + KV(wtime)) / NS_PER_MS,
+ (KV(rlentime) + KV(wlentime)) / NS_PER_MS);
+#undef KV
+#undef NS_PER_MS
+ } else {
+ /*
+ * Report nearly-zeroed statistics for other devices.
+ *
+ * Since iostat will ignore devices which report no
+ * succesful reads or writes, a single read of one
+ * sector, taking 1ms, is reported.
+ */
+ lxpr_uiobuf_printf(uiobuf,
+ "1 0 1 1 0 0 0 0 0 0 0\n");
+ }
+
+ vd = list_next(lxzd->lxzd_vdisks, vd);
+ }
+
+ kmem_free(kip, size);
+}
+
+/*
+ * lxpr_read_version(): read the contents of the "version" file.
+ */
+/* ARGSUSED */
+static void
+lxpr_read_version(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lx_zone_data_t *lxzd = ztolxzd(LXPTOZ(lxpnp));
+ lx_proc_data_t *lxpd = ptolxproc(curproc);
+ char release[LX_KERN_RELEASE_MAX];
+ char version[LX_KERN_VERSION_MAX];
+
+ mutex_enter(&lxzd->lxzd_lock);
+ (void) strlcpy(release, lxzd->lxzd_kernel_release, sizeof (release));
+ (void) strlcpy(version, lxzd->lxzd_kernel_version, sizeof (version));
+ mutex_exit(&lxzd->lxzd_lock);
+
+ /* Use per-process overrides, if specified */
+ if (lxpd != NULL && lxpd->l_uname_release[0] != '\0') {
+ (void) strlcpy(release, lxpd->l_uname_release,
+ sizeof (release));
+ }
+ if (lxpd != NULL && lxpd->l_uname_version[0] != '\0') {
+ (void) strlcpy(version, lxpd->l_uname_version,
+ sizeof (version));
+ }
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%s version %s (%s version %d.%d.%d) %s\n",
+ LX_UNAME_SYSNAME, release,
+#if defined(__GNUC__)
+ "gcc", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__,
+#else
+ "cc", 1, 0, 0,
+#endif
+ version);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_vmstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ cpu_t *cp, *cpstart;
+ int pools_enabled;
+
+ ulong_t pgpgin_cum = 0;
+ ulong_t pgpgout_cum = 0;
+ ulong_t pgswapout_cum = 0;
+ ulong_t pgswapin_cum = 0;
+
+ mutex_enter(&cpu_lock);
+ pools_enabled = pool_pset_enabled();
+ /* Calculate cumulative stats */
+ cp = cpstart = CPU->cpu_part->cp_cpulist;
+ do {
+ /* Only count CPUs which are present and active. */
+ if ((cp->cpu_flags & CPU_EXISTS) == 0) {
+ continue;
+ }
+
+ pgpgin_cum += CPU_STATS(cp, vm.pgpgin);
+ pgpgout_cum += CPU_STATS(cp, vm.pgpgout);
+ pgswapin_cum += CPU_STATS(cp, vm.pgswapin);
+ pgswapout_cum += CPU_STATS(cp, vm.pgswapout);
+
+ if (pools_enabled)
+ cp = cp->cpu_next_part;
+ else
+ cp = cp->cpu_next;
+ } while (cp != cpstart);
+ mutex_exit(&cpu_lock);
+
+ /*
+ * Needless to say, the metrics presented by vmstat are very specific
+ * to the internals of the Linux kernel. There is little per-zone
+ * information which can be translated in a meaningful way to fit the
+ * expected fields. For the time being, the output is kept sparse.
+ */
+ lxpr_uiobuf_printf(uiobuf,
+ "pgpgin %lu\n"
+ "pgpgout %lu\n"
+ "pswpin %lu\n"
+ "pswpout %lu\n",
+ pgpgin_cum,
+ pgpgout_cum,
+ pgswapin_cum,
+ pgswapout_cum);
+}
+
+/*
+ * lxpr_read_stat(): read the contents of the "stat" file.
+ *
+ */
+/* ARGSUSED */
+static void
+lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ cpu_t *cp, *cpstart;
+ int pools_enabled;
+ ulong_t idle_cum = 0;
+ ulong_t sys_cum = 0;
+ ulong_t user_cum = 0;
+ ulong_t irq_cum = 0;
+ ulong_t cpu_nrunnable_cum = 0;
+ ulong_t w_io_cum = 0;
+
+ ulong_t pgpgin_cum = 0;
+ ulong_t pgpgout_cum = 0;
+ ulong_t pgswapout_cum = 0;
+ ulong_t pgswapin_cum = 0;
+ ulong_t intr_cum = 0;
+ ulong_t pswitch_cum = 0;
+ ulong_t forks_cum = 0;
+ hrtime_t msnsecs[NCMSTATES];
+ /* is the emulated release > 2.4 */
+ boolean_t newer_than24 = lx_kern_release_cmp(LXPTOZ(lxpnp), "2.4") > 0;
+ /* temporary variable since scalehrtime modifies data in place */
+ hrtime_t tmptime;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_STAT);
+
+ mutex_enter(&cpu_lock);
+ pools_enabled = pool_pset_enabled();
+
+ /* Calculate cumulative stats */
+ cp = cpstart = CPU->cpu_part->cp_cpulist;
+ do {
+ int i;
+
+ /*
+ * Don't count CPUs that aren't even in the system
+ * or aren't up yet.
+ */
+ if ((cp->cpu_flags & CPU_EXISTS) == 0) {
+ continue;
+ }
+
+ get_cpu_mstate(cp, msnsecs);
+
+ idle_cum += NSEC_TO_TICK(msnsecs[CMS_IDLE]);
+ sys_cum += NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
+ user_cum += NSEC_TO_TICK(msnsecs[CMS_USER]);
+
+ pgpgin_cum += CPU_STATS(cp, vm.pgpgin);
+ pgpgout_cum += CPU_STATS(cp, vm.pgpgout);
+ pgswapin_cum += CPU_STATS(cp, vm.pgswapin);
+ pgswapout_cum += CPU_STATS(cp, vm.pgswapout);
+
+
+ if (newer_than24) {
+ cpu_nrunnable_cum += cp->cpu_disp->disp_nrunnable;
+ w_io_cum += CPU_STATS(cp, sys.iowait);
+ for (i = 0; i < NCMSTATES; i++) {
+ tmptime = cp->cpu_intracct[i];
+ scalehrtime(&tmptime);
+ irq_cum += NSEC_TO_TICK(tmptime);
+ }
+ }
+
+ for (i = 0; i < PIL_MAX; i++)
+ intr_cum += CPU_STATS(cp, sys.intr[i]);
+
+ pswitch_cum += CPU_STATS(cp, sys.pswitch);
+ forks_cum += CPU_STATS(cp, sys.sysfork);
+ forks_cum += CPU_STATS(cp, sys.sysvfork);
+
+ if (pools_enabled)
+ cp = cp->cpu_next_part;
+ else
+ cp = cp->cpu_next;
+ } while (cp != cpstart);
+
+ if (newer_than24) {
+ lxpr_uiobuf_printf(uiobuf,
+ "cpu %lu %lu %lu %lu %lu %lu %lu\n",
+ user_cum, 0L, sys_cum, idle_cum, 0L, irq_cum, 0L);
+ } else {
+ lxpr_uiobuf_printf(uiobuf,
+ "cpu %lu %lu %lu %lu\n",
+ user_cum, 0L, sys_cum, idle_cum);
+ }
+
+ /* Do per processor stats */
+ do {
+ int i;
+
+ ulong_t idle_ticks;
+ ulong_t sys_ticks;
+ ulong_t user_ticks;
+ ulong_t irq_ticks = 0;
+
+ /*
+ * Don't count CPUs that aren't even in the system
+ * or aren't up yet.
+ */
+ if ((cp->cpu_flags & CPU_EXISTS) == 0) {
+ continue;
+ }
+
+ get_cpu_mstate(cp, msnsecs);
+
+ idle_ticks = NSEC_TO_TICK(msnsecs[CMS_IDLE]);
+ sys_ticks = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
+ user_ticks = NSEC_TO_TICK(msnsecs[CMS_USER]);
+
+ for (i = 0; i < NCMSTATES; i++) {
+ tmptime = cp->cpu_intracct[i];
+ scalehrtime(&tmptime);
+ irq_ticks += NSEC_TO_TICK(tmptime);
+ }
+
+ if (newer_than24) {
+ lxpr_uiobuf_printf(uiobuf,
+ "cpu%d %lu %lu %lu %lu %lu %lu %lu\n",
+ cp->cpu_id, user_ticks, 0L, sys_ticks, idle_ticks,
+ 0L, irq_ticks, 0L);
+ } else {
+ lxpr_uiobuf_printf(uiobuf,
+ "cpu%d %lu %lu %lu %lu\n",
+ cp->cpu_id,
+ user_ticks, 0L, sys_ticks, idle_ticks);
+ }
+
+ if (pools_enabled)
+ cp = cp->cpu_next_part;
+ else
+ cp = cp->cpu_next;
+ } while (cp != cpstart);
+
+ mutex_exit(&cpu_lock);
+
+ if (newer_than24) {
+ lxpr_uiobuf_printf(uiobuf,
+ "page %lu %lu\n"
+ "swap %lu %lu\n"
+ "intr %lu\n"
+ "ctxt %lu\n"
+ "btime %lu\n"
+ "processes %lu\n"
+ "procs_running %lu\n"
+ "procs_blocked %lu\n",
+ pgpgin_cum, pgpgout_cum,
+ pgswapin_cum, pgswapout_cum,
+ intr_cum,
+ pswitch_cum,
+ boot_time,
+ forks_cum,
+ cpu_nrunnable_cum,
+ w_io_cum);
+ } else {
+ lxpr_uiobuf_printf(uiobuf,
+ "page %lu %lu\n"
+ "swap %lu %lu\n"
+ "intr %lu\n"
+ "ctxt %lu\n"
+ "btime %lu\n"
+ "processes %lu\n",
+ pgpgin_cum, pgpgout_cum,
+ pgswapin_cum, pgswapout_cum,
+ intr_cum,
+ pswitch_cum,
+ boot_time,
+ forks_cum);
+ }
+}
+
+/*
+ * lxpr_read_swaps():
+ *
+ * We don't support swap files or partitions, but some programs like to look
+ * here just to check we have some swap on the system, so we lie and show
+ * our entire swap cap as one swap partition. See lxpr_read_meminfo for an
+ * explanation on why we report 0 used swap.
+ *
+ * It is important to use formatting identical to the Linux implementation
+ * so that consumers do not break. See swap_show() in mm/swapfile.c.
+ */
+/* ARGSUSED */
+static void
+lxpr_read_swaps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ zone_t *zone = curzone;
+ uint64_t totswap, usedswap;
+
+ if (zone == global_zone || zone->zone_max_swap_ctl == UINT64_MAX) {
+ totswap = (k_anoninfo.ani_max * PAGESIZE) >> 10;
+ } else {
+ mutex_enter(&zone->zone_mem_lock);
+ /* Uses units of 1 kb (2^10). */
+ totswap = zone->zone_max_swap_ctl >> 10;
+ mutex_exit(&zone->zone_mem_lock);
+ }
+ usedswap = 0;
+
+ lxpr_uiobuf_printf(uiobuf,
+ "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
+ lxpr_uiobuf_printf(uiobuf, "%-40s%s\t%llu\t%llu\t%d\n",
+ "/dev/swap", "partition", totswap, usedswap, -1);
+}
+
+/*
+ * inotify tunables exported via /proc.
+ */
+extern int inotify_maxevents;
+extern int inotify_maxinstances;
+extern int inotify_maxwatches;
+
+static void
+lxpr_read_sys_fs_inotify_max_queued_events(lxpr_node_t *lxpnp,
+ lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_QUEUED_EVENTS);
+ lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxevents);
+}
+
+static void
+lxpr_read_sys_fs_inotify_max_user_instances(lxpr_node_t *lxpnp,
+ lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_USER_INSTANCES);
+ lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxinstances);
+}
+
+static void
+lxpr_read_sys_fs_inotify_max_user_watches(lxpr_node_t *lxpnp,
+ lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFY_MAX_USER_WATCHES);
+ lxpr_uiobuf_printf(uiobuf, "%d\n", inotify_maxwatches);
+}
+
+static void
+lxpr_read_sys_kernel_caplcap(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_CAPLCAP);
+ lxpr_uiobuf_printf(uiobuf, "%d\n", LX_CAP_MAX_VALID);
+}
+
+static void
+lxpr_read_sys_kernel_corepatt(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ zone_t *zone = curproc->p_zone;
+ struct core_globals *cg;
+ refstr_t *rp;
+ corectl_path_t *ccp;
+ char tr[MAXPATHLEN];
+
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_COREPATT);
+
+ cg = zone_getspecific(core_zone_key, zone);
+ ASSERT(cg != NULL);
+
+ /* If core dumps are disabled, return an empty string. */
+ if ((cg->core_options & CC_PROCESS_PATH) == 0) {
+ lxpr_uiobuf_printf(uiobuf, "\n");
+ return;
+ }
+
+ ccp = cg->core_default_path;
+ mutex_enter(&ccp->ccp_mtx);
+ if ((rp = ccp->ccp_path) != NULL)
+ refstr_hold(rp);
+ mutex_exit(&ccp->ccp_mtx);
+
+ if (rp == NULL) {
+ lxpr_uiobuf_printf(uiobuf, "\n");
+ return;
+ }
+
+ bzero(tr, sizeof (tr));
+ if (lxpr_core_path_s2l(refstr_value(rp), tr, sizeof (tr)) != 0) {
+ refstr_rele(rp);
+ lxpr_uiobuf_printf(uiobuf, "\n");
+ return;
+ }
+
+ refstr_rele(rp);
+ lxpr_uiobuf_printf(uiobuf, "%s\n", tr);
+}
+
+static void
+lxpr_read_sys_kernel_hostname(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_HOSTNAME);
+ lxpr_uiobuf_printf(uiobuf, "%s\n", uts_nodename());
+}
+
+static void
+lxpr_read_sys_kernel_msgmni(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ rctl_qty_t val;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_MSGMNI);
+
+ mutex_enter(&curproc->p_lock);
+ val = rctl_enforced_value(rc_zone_msgmni,
+ curproc->p_zone->zone_rctls, curproc);
+ mutex_exit(&curproc->p_lock);
+
+ lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val);
+}
+
+static void
+lxpr_read_sys_kernel_ngroups_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_NGROUPS_MAX);
+ lxpr_uiobuf_printf(uiobuf, "%d\n", ngroups_max);
+}
+
+static void
+lxpr_read_sys_kernel_osrel(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lx_zone_data_t *br_data;
+ char version[LX_KERN_VERSION_MAX];
+
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_OSREL);
+ br_data = ztolxzd(curproc->p_zone);
+ if (curproc->p_zone->zone_brand == &lx_brand) {
+ mutex_enter(&br_data->lxzd_lock);
+ (void) strlcpy(version, br_data->lxzd_kernel_version,
+ sizeof (version));
+ mutex_exit(&br_data->lxzd_lock);
+
+ lxpr_uiobuf_printf(uiobuf, "%s\n", version);
+ } else {
+ lxpr_uiobuf_printf(uiobuf, "\n");
+ }
+}
+
+static void
+lxpr_read_sys_kernel_pid_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_PID_MAX);
+ lxpr_uiobuf_printf(uiobuf, "%d\n", maxpid);
+}
+
+static void
+lxpr_read_sys_kernel_rand_bootid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ /*
+ * This file isn't documented on the Linux proc(5) man page but
+ * according to the blog of the author of systemd/journald (the
+ * consumer), he says:
+ * boot_id: A random ID that is regenerated on each boot. As such it
+ * can be used to identify the local machine's current boot. It's
+ * universally available on any recent Linux kernel. It's a good and
+ * safe choice if you need to identify a specific boot on a specific
+ * booted kernel.
+ *
+ * We'll just generate a random ID if necessary. On Linux the format
+ * appears to resemble a uuid but since it is not documented to be a
+ * uuid, we don't worry about that.
+ */
+ lx_zone_data_t *br_data;
+ char bootid[LX_BOOTID_LEN];
+
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_RAND_BOOTID);
+
+ if (curproc->p_zone->zone_brand != &lx_brand) {
+ lxpr_uiobuf_printf(uiobuf, "0\n");
+ return;
+ }
+
+ br_data = ztolxzd(curproc->p_zone);
+ mutex_enter(&br_data->lxzd_lock);
+ if (br_data->lxzd_bootid[0] == '\0') {
+ extern int getrandom(void *, size_t, int);
+ int i;
+
+ for (i = 0; i < 5; i++) {
+ u_longlong_t n;
+ char s[32];
+
+ (void) random_get_bytes((uint8_t *)&n, sizeof (n));
+ switch (i) {
+ case 0: (void) snprintf(s, sizeof (s), "%08llx", n);
+ s[8] = '\0';
+ break;
+ case 4: (void) snprintf(s, sizeof (s), "%012llx", n);
+ s[12] = '\0';
+ break;
+ default: (void) snprintf(s, sizeof (s), "%04llx", n);
+ s[4] = '\0';
+ break;
+ }
+ if (i > 0)
+ strlcat(br_data->lxzd_bootid, "-",
+ sizeof (br_data->lxzd_bootid));
+ strlcat(br_data->lxzd_bootid, s,
+ sizeof (br_data->lxzd_bootid));
+ }
+ }
+ (void) strlcpy(bootid, br_data->lxzd_bootid, sizeof (bootid));
+ mutex_exit(&br_data->lxzd_lock);
+
+ lxpr_uiobuf_printf(uiobuf, "%s\n", bootid);
+
+}
+
+static void
+lxpr_read_sys_kernel_sem(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *pp = curproc;
+ rctl_qty_t vmsl, vopm, vmni, vmns;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_SEM);
+
+ mutex_enter(&pp->p_lock);
+ vmsl = rctl_enforced_value(rc_process_semmsl, pp->p_rctls, pp);
+ vopm = rctl_enforced_value(rc_process_semopm, pp->p_rctls, pp);
+ vmni = rctl_enforced_value(rc_zone_semmni, pp->p_zone->zone_rctls, pp);
+ mutex_exit(&pp->p_lock);
+ vmns = vmsl * vmni;
+ if (vmns < vmsl || vmns < vmni) {
+ vmns = ULLONG_MAX;
+ }
+ /*
+ * Format: semmsl semmns semopm semmni
+ * - semmsl: Limit semaphores in a sempahore set.
+ * - semmns: Limit semaphores in all semaphore sets
+ * - semopm: Limit operations in a single semop call
+ * - semmni: Limit number of semaphore sets
+ */
+ lxpr_uiobuf_printf(uiobuf, "%llu\t%llu\t%llu\t%llu\n",
+ vmsl, vmns, vopm, vmni);
+}
+
+static void
+lxpr_read_sys_kernel_shmall(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ rctl_qty_t val;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_SHMALL);
+
+ mutex_enter(&curproc->p_lock);
+ val = rctl_enforced_value(rc_zone_shmmax,
+ curproc->p_zone->zone_rctls, curproc);
+ mutex_exit(&curproc->p_lock);
+
+ /* value is in pages */
+ lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)btop(val));
+}
+
+static void
+lxpr_read_sys_kernel_shmmax(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ rctl_qty_t val;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_SHMMAX);
+
+ mutex_enter(&curproc->p_lock);
+ val = rctl_enforced_value(rc_zone_shmmax,
+ curproc->p_zone->zone_rctls, curproc);
+ mutex_exit(&curproc->p_lock);
+
+ if (val > FOURGB)
+ val = FOURGB;
+
+ lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val);
+}
+
+static void
+lxpr_read_sys_kernel_shmmni(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ rctl_qty_t val;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_SHMMNI);
+
+ mutex_enter(&curproc->p_lock);
+ val = rctl_enforced_value(rc_zone_shmmni,
+ curproc->p_zone->zone_rctls, curproc);
+ mutex_exit(&curproc->p_lock);
+
+ if (val > FOURGB)
+ val = FOURGB;
+
+ lxpr_uiobuf_printf(uiobuf, "%u\n", (uint_t)val);
+}
+
+static void
+lxpr_read_sys_kernel_threads_max(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_THREADS_MAX);
+ lxpr_uiobuf_printf(uiobuf, "%d\n", curproc->p_zone->zone_nlwps_ctl);
+}
+
+static void
+lxpr_read_sys_net_core_somaxc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ netstack_t *ns;
+ tcp_stack_t *tcps;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_CORE_SOMAXCON);
+
+ ns = netstack_get_current();
+ if (ns == NULL) {
+ lxpr_uiobuf_printf(uiobuf, "%d\n", SOMAXCONN);
+ return;
+ }
+
+ tcps = ns->netstack_tcp;
+ lxpr_uiobuf_printf(uiobuf, "%d\n", tcps->tcps_conn_req_max_q);
+ netstack_rele(ns);
+}
+
+/*
+ * ip_local_port_range
+ *
+ * The low & high port number range.
+ * integers; default: 32768 61000
+ *
+ * illumos: tcp_smallest_anon_port & tcp_largest_anon_port
+ * Not in tcp(7p) man page.
+ */
+static void
+lxpr_read_sys_net_ipv4_ip_lport_range(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ netstack_t *ns;
+ tcp_stack_t *tcps;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_IPV4_IP_LPORT_RANGE);
+
+ ns = netstack_get_current();
+ if (ns == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, ENXIO);
+ return;
+ }
+
+ tcps = ns->netstack_tcp;
+ lxpr_uiobuf_printf(uiobuf, "%d\t%d\n",
+ tcps->tcps_smallest_anon_port, tcps->tcps_largest_anon_port);
+ netstack_rele(ns);
+}
+
+/*
+ * tcp_fin_timeout
+ *
+ * This specifies how many seconds to wait for a final FIN packet before the
+ * socket is forcibly closed. This is strictly a violation of the TCP
+ * specification, but required to prevent denial-of-service attacks.
+ * integer; default: 60;
+ *
+ * illumos: tcp_fin_wait_2_flush_interval
+ * Not in tcp(7p) man page but see comment in uts/common/inet/tcp/tcp_input.c
+ * in the tcp_input_data() function on the use of tcp_fin_wait_2_flush_interval.
+ * The value is in milliseconds.
+ */
+static void
+lxpr_read_sys_net_ipv4_tcp_fin_to(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ netstack_t *ns;
+ tcp_stack_t *tcps;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_IPV4_TCP_FIN_TO);
+
+ ns = netstack_get_current();
+ if (ns == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, ENXIO);
+ return;
+ }
+
+ tcps = ns->netstack_tcp;
+ lxpr_uiobuf_printf(uiobuf, "%d\n",
+ tcps->tcps_fin_wait_2_flush_interval / 1000);
+ netstack_rele(ns);
+}
+
+/*
+ * tcp_keepalive_intvl
+ *
+ * The number of seconds between TCP keep-alive probes. default: 75
+ * Linux retries tcp_keepalive_probes (9) times before timing out.
+ *
+ * illumos:
+ * We have tcp_ka_rinterval but there is no corresponding tcps_* tunable for
+ * this. The closest is tcps_keepalive_abort_interval which specifies the
+ * time threshold for aborting a TCP connection in milliseconds. Linux retries
+ * 9 times (giving a total of 11.25 minutes) so we emulate this by dividing out
+ * tcps_keepalive_abort_interval by 9.
+ */
+static void
+lxpr_read_sys_net_ipv4_tcp_ka_int(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ netstack_t *ns;
+ tcp_stack_t *tcps;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_IPV4_TCP_KA_INT);
+
+ ns = netstack_get_current();
+ if (ns == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, ENXIO);
+ return;
+ }
+
+ tcps = ns->netstack_tcp;
+ lxpr_uiobuf_printf(uiobuf, "%d\n",
+ (tcps->tcps_keepalive_abort_interval / 1000) / 9);
+ netstack_rele(ns);
+}
+
+/*
+ * tcp_keepalive_time
+ *
+ * The number of seconds a connection needs to be idle before TCP begins
+ * sending out keep-alive probes. The default value is 7200 seconds (2 hours).
+ *
+ * illumos: tcp_keepalive_interval
+ * The interval for sending out the first probe in milliseconds. The default is
+ * two hours.
+ */
+static void
+lxpr_read_sys_net_ipv4_tcp_ka_tim(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ netstack_t *ns;
+ tcp_stack_t *tcps;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_IPV4_TCP_KA_TIM);
+
+ ns = netstack_get_current();
+ if (ns == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, ENXIO);
+ return;
+ }
+
+ tcps = ns->netstack_tcp;
+ lxpr_uiobuf_printf(uiobuf, "%d\n",
+ (tcps->tcps_keepalive_interval / 1000));
+ netstack_rele(ns);
+}
+
+/*
+ * tcp_sack
+ *
+ * Enable RFC 2018 TCP Selective Acknowledgements. Boolean, default: enabled
+ *
+ * illumos: tcp_sack_permitted
+ * tcp_sack_permitted 0 == disabled, 1 == no initiate but accept,
+ * 2 == initiate and accept. default is 2.
+ */
+static void
+lxpr_read_sys_net_ipv4_tcp_sack(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ netstack_t *ns;
+ tcp_stack_t *tcps;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_IPV4_TCP_SACK);
+
+ ns = netstack_get_current();
+ if (ns == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, ENXIO);
+ return;
+ }
+
+ tcps = ns->netstack_tcp;
+ lxpr_uiobuf_printf(uiobuf, "%d\n",
+ (tcps->tcps_sack_permitted == 0 ? 0 : 1));
+ netstack_rele(ns);
+}
+
+/*
+ * tcp_window_scaling
+ *
+ * RFC 1323 TCP window scaling. This feature allows the use of a large window
+ * (> 64K) on a TCP connection. Boolean; default: enabled
+ *
+ * illumos: tcp_wscale_always
+ * tcp_wscale_always is set to 1, the window scale option will always be
+ * set when connecting to a remote system. If tcp_wscale_always is 0, the
+ * window scale option will be set only if the user has requested a send or
+ * receive window larger than 64K. The default value of is 1.
+ */
+static void
+lxpr_read_sys_net_ipv4_tcp_winscale(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ netstack_t *ns;
+ tcp_stack_t *tcps;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_IPV4_TCP_WINSCALE);
+
+ ns = netstack_get_current();
+ if (ns == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, ENXIO);
+ return;
+ }
+
+ tcps = ns->netstack_tcp;
+ lxpr_uiobuf_printf(uiobuf, "%d\n", tcps->tcps_wscale_always);
+ netstack_rele(ns);
+}
+
+static void
+lxpr_read_sys_vm_max_map_cnt(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_MAX_MAP_CNT);
+ /* We don't limit mappings, just say we have a large limit. */
+ lxpr_uiobuf_printf(uiobuf, "%d\n", 16777215);
+}
+
+static void
+lxpr_read_sys_vm_minfr_kb(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_MINFR_KB);
+ lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
+}
+
+static void
+lxpr_read_sys_vm_nhpages(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_NHUGEP);
+ lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
+}
+
+static void
+lxpr_read_sys_vm_overcommit_mem(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_OVERCOMMIT_MEM);
+ lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
+}
+
+static void
+lxpr_read_sys_vm_swappiness(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_VM_SWAPPINESS);
+ lxpr_uiobuf_printf(uiobuf, "%d\n", 0);
+}
+
+/*
+ * lxpr_read_uptime(): read the contents of the "uptime" file.
+ *
+ * format is: "%.2lf, %.2lf",uptime_secs, idle_secs
+ * Use fixed point arithmetic to get 2 decimal places
+ */
+/* ARGSUSED */
+static void
+lxpr_read_uptime(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ cpu_t *cp, *cpstart;
+ int pools_enabled;
+ ulong_t idle_cum = 0;
+ ulong_t cpu_count = 0;
+ ulong_t idle_s;
+ ulong_t idle_cs;
+ ulong_t up_s;
+ ulong_t up_cs;
+ hrtime_t birthtime;
+ hrtime_t centi_sec = 10000000; /* 10^7 */
+
+ ASSERT(lxpnp->lxpr_type == LXPR_UPTIME);
+
+ /* Calculate cumulative stats */
+ mutex_enter(&cpu_lock);
+ pools_enabled = pool_pset_enabled();
+
+ cp = cpstart = CPU->cpu_part->cp_cpulist;
+ do {
+ /*
+ * Don't count CPUs that aren't even in the system
+ * or aren't up yet.
+ */
+ if ((cp->cpu_flags & CPU_EXISTS) == 0) {
+ continue;
+ }
+
+ idle_cum += CPU_STATS(cp, sys.cpu_ticks_idle);
+ idle_cum += CPU_STATS(cp, sys.cpu_ticks_wait);
+ cpu_count += 1;
+
+ if (pools_enabled)
+ cp = cp->cpu_next_part;
+ else
+ cp = cp->cpu_next;
+ } while (cp != cpstart);
+ mutex_exit(&cpu_lock);
+
+ /* Getting the Zone zsched process startup time */
+ birthtime = LXPTOZ(lxpnp)->zone_zsched->p_mstart;
+ up_cs = (gethrtime() - birthtime) / centi_sec;
+ up_s = up_cs / 100;
+ up_cs %= 100;
+
+ ASSERT(cpu_count > 0);
+ idle_cum /= cpu_count;
+ idle_s = idle_cum / hz;
+ idle_cs = idle_cum % hz;
+ idle_cs *= 100;
+ idle_cs /= hz;
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%ld.%02d %ld.%02d\n", up_s, up_cs, idle_s, idle_cs);
+}
+
+static const char *amd_x_edx[] = {
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, "syscall",
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, "mp",
+ "nx", NULL, "mmxext", NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, "lm", "3dnowext", "3dnow"
+};
+
+static const char *amd_x_ecx[] = {
+ "lahf_lm", NULL, "svm", NULL,
+ "altmovcr8"
+};
+
+static const char *tm_x_edx[] = {
+ "recovery", "longrun", NULL, "lrti"
+};
+
+/*
+ * Intel calls no-execute "xd" in its docs, but Linux still reports it as "nx."
+ */
+static const char *intc_x_edx[] = {
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, "syscall",
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ "nx", NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, "lm", NULL, NULL
+};
+
+static const char *intc_edx[] = {
+ "fpu", "vme", "de", "pse",
+ "tsc", "msr", "pae", "mce",
+ "cx8", "apic", NULL, "sep",
+ "mtrr", "pge", "mca", "cmov",
+ "pat", "pse36", "pn", "clflush",
+ NULL, "dts", "acpi", "mmx",
+ "fxsr", "sse", "sse2", "ss",
+ "ht", "tm", "ia64", "pbe"
+};
+
+/*
+ * "sse3" on linux is called "pni" (Prescott New Instructions).
+ */
+static const char *intc_ecx[] = {
+ "pni", NULL, NULL, "monitor",
+ "ds_cpl", NULL, NULL, "est",
+ "tm2", NULL, "cid", NULL,
+ NULL, "cx16", "xtpr"
+};
+
+/*
+ * Report a list of each cgroup subsystem supported by our emulated cgroup fs.
+ * This needs to exist for systemd to run but for now we don't report any
+ * cgroup subsystems as being installed. The commented example below shows
+ * how to print a subsystem entry.
+ */
+static void
+lxpr_read_cgroups(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_uiobuf_printf(uiobuf, "%s\t%s\t%s\t%s\n",
+ "#subsys_name", "hierarchy", "num_cgroups", "enabled");
+
+ /*
+ * lxpr_uiobuf_printf(uiobuf, "%s\t%s\t%s\t%s\n",
+ * "cpu,cpuacct", "2", "1", "1");
+ */
+}
+
+static void
+lxpr_read_cpuinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ int i;
+ uint32_t bits;
+ cpu_t *cp, *cpstart;
+ int pools_enabled;
+ const char **fp;
+ char brandstr[CPU_IDSTRLEN];
+ struct cpuid_regs cpr;
+ int maxeax;
+ int std_ecx, std_edx, ext_ecx, ext_edx;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_CPUINFO);
+
+ mutex_enter(&cpu_lock);
+ pools_enabled = pool_pset_enabled();
+
+ cp = cpstart = CPU->cpu_part->cp_cpulist;
+ do {
+ /*
+ * This returns the maximum eax value for standard cpuid
+ * functions in eax.
+ */
+ cpr.cp_eax = 0;
+ (void) cpuid_insn(cp, &cpr);
+ maxeax = cpr.cp_eax;
+
+ /*
+ * Get standard x86 feature flags.
+ */
+ cpr.cp_eax = 1;
+ (void) cpuid_insn(cp, &cpr);
+ std_ecx = cpr.cp_ecx;
+ std_edx = cpr.cp_edx;
+
+ /*
+ * Now get extended feature flags.
+ */
+ cpr.cp_eax = 0x80000001;
+ (void) cpuid_insn(cp, &cpr);
+ ext_ecx = cpr.cp_ecx;
+ ext_edx = cpr.cp_edx;
+
+ (void) cpuid_getbrandstr(cp, brandstr, CPU_IDSTRLEN);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "processor\t: %d\n"
+ "vendor_id\t: %s\n"
+ "cpu family\t: %d\n"
+ "model\t\t: %d\n"
+ "model name\t: %s\n"
+ "stepping\t: %d\n"
+ "cpu MHz\t\t: %u.%03u\n",
+ cp->cpu_id, cpuid_getvendorstr(cp), cpuid_getfamily(cp),
+ cpuid_getmodel(cp), brandstr, cpuid_getstep(cp),
+ (uint32_t)(cpu_freq_hz / 1000000),
+ ((uint32_t)(cpu_freq_hz / 1000)) % 1000);
+
+ lxpr_uiobuf_printf(uiobuf, "cache size\t: %u KB\n",
+ getl2cacheinfo(cp, NULL, NULL, NULL) / 1024);
+
+ if (is_x86_feature(x86_featureset, X86FSET_HTT)) {
+ /*
+ * 'siblings' is used for HT-style threads
+ */
+ lxpr_uiobuf_printf(uiobuf,
+ "physical id\t: %lu\n"
+ "siblings\t: %u\n",
+ pg_plat_hw_instance_id(cp, PGHW_CHIP),
+ cpuid_get_ncpu_per_chip(cp));
+ }
+
+ /*
+ * Since we're relatively picky about running on older hardware,
+ * we can be somewhat cavalier about the answers to these ones.
+ *
+ * In fact, given the hardware we support, we just say:
+ *
+ * fdiv_bug : no (if we're on a 64-bit kernel)
+ * hlt_bug : no
+ * f00f_bug : no
+ * coma_bug : no
+ * wp : yes (write protect in supervsr mode)
+ */
+ lxpr_uiobuf_printf(uiobuf,
+ "fdiv_bug\t: %s\n"
+ "hlt_bug \t: no\n"
+ "f00f_bug\t: no\n"
+ "coma_bug\t: no\n"
+ "fpu\t\t: %s\n"
+ "fpu_exception\t: %s\n"
+ "cpuid level\t: %d\n"
+ "flags\t\t:",
+#if defined(__i386)
+ fpu_pentium_fdivbug ? "yes" : "no",
+#else
+ "no",
+#endif /* __i386 */
+ fpu_exists ? "yes" : "no", fpu_exists ? "yes" : "no",
+ maxeax);
+
+ for (bits = std_edx, fp = intc_edx, i = 0;
+ i < sizeof (intc_edx) / sizeof (intc_edx[0]); fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+
+ /*
+ * name additional features where appropriate
+ */
+ switch (x86_vendor) {
+ case X86_VENDOR_Intel:
+ for (bits = ext_edx, fp = intc_x_edx, i = 0;
+ i < sizeof (intc_x_edx) / sizeof (intc_x_edx[0]);
+ fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+ break;
+
+ case X86_VENDOR_AMD:
+ for (bits = ext_edx, fp = amd_x_edx, i = 0;
+ i < sizeof (amd_x_edx) / sizeof (amd_x_edx[0]);
+ fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+
+ for (bits = ext_ecx, fp = amd_x_ecx, i = 0;
+ i < sizeof (amd_x_ecx) / sizeof (amd_x_ecx[0]);
+ fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+ break;
+
+ case X86_VENDOR_TM:
+ for (bits = ext_edx, fp = tm_x_edx, i = 0;
+ i < sizeof (tm_x_edx) / sizeof (tm_x_edx[0]);
+ fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+ break;
+ default:
+ break;
+ }
+
+ for (bits = std_ecx, fp = intc_ecx, i = 0;
+ i < sizeof (intc_ecx) / sizeof (intc_ecx[0]); fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+
+ lxpr_uiobuf_printf(uiobuf, "\n\n");
+
+ if (pools_enabled)
+ cp = cp->cpu_next_part;
+ else
+ cp = cp->cpu_next;
+ } while (cp != cpstart);
+
+ mutex_exit(&cpu_lock);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_fd(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_FD_FD);
+ lxpr_uiobuf_seterr(uiobuf, EFAULT);
+}
+
+/*
+ * Report a list of file systems loaded in the kernel. We only report the ones
+ * which we support and which may be checked by various components to see if
+ * they are loaded.
+ */
+static void
+lxpr_read_filesystems(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "autofs");
+ lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "cgroup");
+ lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "nfs");
+ lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "proc");
+ lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "sysfs");
+ lxpr_uiobuf_printf(uiobuf, "%s\t%s\n", "nodev", "tmpfs");
+}
+
+/*
+ * lxpr_getattr(): Vnode operation for VOP_GETATTR()
+ */
+static int
+lxpr_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ register lxpr_node_t *lxpnp = VTOLXP(vp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ extern uint_t nproc;
+ int error;
+
+ /*
+ * Return attributes of underlying vnode if ATTR_REAL
+ *
+ * but keep fd files with the symlink permissions
+ */
+ if (lxpnp->lxpr_realvp != NULL && (flags & ATTR_REAL)) {
+ vnode_t *rvp = lxpnp->lxpr_realvp;
+
+ /*
+ * withold attribute information to owner or root
+ */
+ if ((error = VOP_ACCESS(rvp, 0, 0, cr, ct)) != 0) {
+ return (error);
+ }
+
+ /*
+ * now its attributes
+ */
+ if ((error = VOP_GETATTR(rvp, vap, flags, cr, ct)) != 0) {
+ return (error);
+ }
+
+ /*
+ * if it's a file in lx /proc/pid/fd/xx then set its
+ * mode and keep it looking like a symlink, fifo or socket
+ */
+ if (type == LXPR_PID_FD_FD) {
+ vap->va_mode = lxpnp->lxpr_mode;
+ vap->va_type = lxpnp->lxpr_realvp->v_type;
+ vap->va_size = 0;
+ vap->va_nlink = 1;
+ }
+ return (0);
+ }
+
+ /* Default attributes, that may be overridden below */
+ bzero(vap, sizeof (*vap));
+ vap->va_atime = vap->va_mtime = vap->va_ctime = lxpnp->lxpr_time;
+ vap->va_nlink = 1;
+ vap->va_type = vp->v_type;
+ vap->va_mode = lxpnp->lxpr_mode;
+ vap->va_fsid = vp->v_vfsp->vfs_dev;
+ vap->va_blksize = DEV_BSIZE;
+ vap->va_uid = lxpnp->lxpr_uid;
+ vap->va_gid = lxpnp->lxpr_gid;
+ vap->va_nodeid = lxpnp->lxpr_ino;
+
+ switch (type) {
+ case LXPR_PROCDIR:
+ vap->va_nlink = nproc + 2 + PROCDIRFILES;
+ vap->va_size = (nproc + 2 + PROCDIRFILES) * LXPR_SDSIZE;
+ break;
+ case LXPR_PIDDIR:
+ vap->va_nlink = PIDDIRFILES;
+ vap->va_size = PIDDIRFILES * LXPR_SDSIZE;
+ break;
+ case LXPR_PID_TASK_IDDIR:
+ vap->va_nlink = TIDDIRFILES;
+ vap->va_size = TIDDIRFILES * LXPR_SDSIZE;
+ break;
+ case LXPR_SELF:
+ vap->va_uid = crgetruid(curproc->p_cred);
+ vap->va_gid = crgetrgid(curproc->p_cred);
+ break;
+ case LXPR_PID_FD_FD:
+ case LXPR_PID_TID_FD_FD:
+ /*
+ * Restore VLNK type for lstat-type activity.
+ * See lxpr_readlink for more details.
+ */
+ if ((flags & FOLLOW) == 0)
+ vap->va_type = VLNK;
+ default:
+ break;
+ }
+
+ vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size);
+ return (0);
+}
+
+/*
+ * lxpr_access(): Vnode operation for VOP_ACCESS()
+ */
+static int
+lxpr_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
+{
+ lxpr_node_t *lxpnp = VTOLXP(vp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ int shift = 0;
+ proc_t *tp;
+
+ /* lx /proc is primarily a read only file system */
+ if ((mode & VWRITE) && !lxpr_is_writable(type)) {
+ return (EROFS);
+ }
+
+ /*
+ * If this is a restricted file, check access permissions.
+ */
+ switch (type) {
+ case LXPR_PIDDIR:
+ return (0);
+ case LXPR_PID_CURDIR:
+ case LXPR_PID_ENV:
+ case LXPR_PID_EXE:
+ case LXPR_PID_LIMITS:
+ case LXPR_PID_MAPS:
+ case LXPR_PID_MEM:
+ case LXPR_PID_ROOTDIR:
+ case LXPR_PID_FDDIR:
+ case LXPR_PID_FD_FD:
+ case LXPR_PID_TID_FDDIR:
+ case LXPR_PID_TID_FD_FD:
+ if ((tp = lxpr_lock(lxpnp->lxpr_pid, ZOMB_OK)) == NULL)
+ return (ENOENT);
+ if (tp != curproc && secpolicy_proc_access(cr) != 0 &&
+ priv_proc_cred_perm(cr, tp, NULL, mode) != 0) {
+ lxpr_unlock(tp);
+ return (EACCES);
+ }
+ lxpr_unlock(tp);
+ default:
+ break;
+ }
+
+ if (lxpnp->lxpr_realvp != NULL) {
+ /*
+ * For these we use the underlying vnode's accessibility.
+ */
+ return (VOP_ACCESS(lxpnp->lxpr_realvp, mode, flags, cr, ct));
+ }
+
+ /* If user is root allow access regardless of permission bits */
+ if (secpolicy_proc_access(cr) == 0)
+ return (0);
+
+ /*
+ * Access check is based on only one of owner, group, public. If not
+ * owner, then check group. If not a member of the group, then check
+ * public access.
+ */
+ if (crgetuid(cr) != lxpnp->lxpr_uid) {
+ shift += 3;
+ if (!groupmember((uid_t)lxpnp->lxpr_gid, cr))
+ shift += 3;
+ }
+
+ mode &= ~(lxpnp->lxpr_mode << shift);
+
+ if (mode == 0)
+ return (0);
+
+ return (EACCES);
+}
+
+/* ARGSUSED */
+static vnode_t *
+lxpr_lookup_not_a_dir(vnode_t *dp, char *comp)
+{
+ return (NULL);
+}
+
+/*
+ * lxpr_lookup(): Vnode operation for VOP_LOOKUP()
+ */
+/* ARGSUSED */
+static int
+lxpr_lookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pathp,
+ int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
+ int *direntflags, pathname_t *realpnp)
+{
+ lxpr_node_t *lxpnp = VTOLXP(dp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ int error;
+
+ ASSERT(dp->v_type == VDIR);
+ ASSERT(type < LXPR_NFILES);
+
+ /*
+ * we should never get here because the lookup
+ * is done on the realvp for these nodes
+ */
+ ASSERT(type != LXPR_PID_FD_FD &&
+ type != LXPR_PID_CURDIR &&
+ type != LXPR_PID_ROOTDIR);
+
+ /*
+ * restrict lookup permission to owner or root
+ */
+ if ((error = lxpr_access(dp, VEXEC, 0, cr, ct)) != 0) {
+ return (error);
+ }
+
+ /*
+ * Just return the parent vnode if that's where we are trying to go.
+ */
+ if (strcmp(comp, "..") == 0) {
+ VN_HOLD(lxpnp->lxpr_parent);
+ *vpp = lxpnp->lxpr_parent;
+ return (0);
+ }
+
+ /*
+ * Special handling for directory searches. Note: null component name
+ * denotes that the current directory is being searched.
+ */
+ if ((dp->v_type == VDIR) && (*comp == '\0' || strcmp(comp, ".") == 0)) {
+ VN_HOLD(dp);
+ *vpp = dp;
+ return (0);
+ }
+
+ *vpp = (lxpr_lookup_function[type](dp, comp));
+ return ((*vpp == NULL) ? ENOENT : 0);
+}
+
+/*
+ * Do a sequential search on the given directory table
+ */
+static vnode_t *
+lxpr_lookup_common(vnode_t *dp, char *comp, proc_t *p,
+ lxpr_dirent_t *dirtab, int dirtablen)
+{
+ lxpr_node_t *lxpnp;
+ int count;
+
+ for (count = 0; count < dirtablen; count++) {
+ if (strcmp(dirtab[count].d_name, comp) == 0) {
+ lxpnp = lxpr_getnode(dp, dirtab[count].d_type, p, 0);
+ dp = LXPTOV(lxpnp);
+ ASSERT(dp != NULL);
+ return (dp);
+ }
+ }
+ return (NULL);
+}
+
+static vnode_t *
+lxpr_lookup_piddir(vnode_t *dp, char *comp)
+{
+ proc_t *p;
+
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PIDDIR);
+
+ p = lxpr_lock(VTOLXP(dp)->lxpr_pid, ZOMB_OK);
+ if (p == NULL)
+ return (NULL);
+
+ dp = lxpr_lookup_common(dp, comp, p, piddir, PIDDIRFILES);
+
+ lxpr_unlock(p);
+
+ return (dp);
+}
+
+/*
+ * Lookup one of the process's task ID's.
+ */
+static vnode_t *
+lxpr_lookup_taskdir(vnode_t *dp, char *comp)
+{
+ lxpr_node_t *dlxpnp = VTOLXP(dp);
+ lxpr_node_t *lxpnp;
+ proc_t *p;
+ pid_t real_pid;
+ uint_t tid;
+ int c;
+ kthread_t *t;
+
+ ASSERT(dlxpnp->lxpr_type == LXPR_PID_TASKDIR);
+
+ /*
+ * convert the string rendition of the filename to a thread ID
+ */
+ tid = 0;
+ while ((c = *comp++) != '\0') {
+ int otid;
+ if (c < '0' || c > '9')
+ return (NULL);
+
+ otid = tid;
+ tid = 10 * tid + c - '0';
+ /* integer overflow */
+ if (tid / 10 != otid)
+ return (NULL);
+ }
+
+ /*
+ * get the proc to work with and lock it
+ */
+ real_pid = get_real_pid(dlxpnp->lxpr_pid);
+ p = lxpr_lock(real_pid, NO_ZOMB);
+ if ((p == NULL))
+ return (NULL);
+
+ /*
+ * Bail if this is a system process.
+ */
+ if ((p->p_flag & SSYS) || (p->p_as == &kas)) {
+ lxpr_unlock(p);
+ return (NULL);
+ }
+
+ if (p->p_brand == &lx_brand) {
+ t = lxpr_get_thread(p, tid);
+ } else {
+ /*
+ * Only the main thread is visible for non-branded processes.
+ */
+ t = p->p_tlist;
+ if (tid != p->p_pid || t == NULL) {
+ t = NULL;
+ } else {
+ thread_lock(t);
+ }
+ }
+ if (t == NULL) {
+ lxpr_unlock(p);
+ return (NULL);
+ }
+ thread_unlock(t);
+
+ /*
+ * Allocate and fill in a new lx /proc taskid node.
+ * Instead of the last arg being a fd, it is a tid.
+ */
+ lxpnp = lxpr_getnode(dp, LXPR_PID_TASK_IDDIR, p, tid);
+ dp = LXPTOV(lxpnp);
+ ASSERT(dp != NULL);
+ lxpr_unlock(p);
+ return (dp);
+}
+
+/*
+ * Lookup one of the process's task ID's.
+ */
+static vnode_t *
+lxpr_lookup_task_tid_dir(vnode_t *dp, char *comp)
+{
+ lxpr_node_t *dlxpnp = VTOLXP(dp);
+ lxpr_node_t *lxpnp;
+ proc_t *p;
+ pid_t real_pid;
+ kthread_t *t;
+ int i;
+
+ ASSERT(dlxpnp->lxpr_type == LXPR_PID_TASK_IDDIR);
+
+ /*
+ * get the proc to work with and lock it
+ */
+ real_pid = get_real_pid(dlxpnp->lxpr_pid);
+ p = lxpr_lock(real_pid, NO_ZOMB);
+ if ((p == NULL))
+ return (NULL);
+
+ /*
+ * Bail if this is a system process.
+ */
+ if ((p->p_flag & SSYS) || (p->p_as == &kas)) {
+ lxpr_unlock(p);
+ return (NULL);
+ }
+
+ /* need to confirm tid is still there */
+ t = lxpr_get_thread(p, dlxpnp->lxpr_desc);
+ if (t == NULL) {
+ lxpr_unlock(p);
+ return (NULL);
+ }
+ thread_unlock(t);
+
+ /*
+ * allocate and fill in the new lx /proc taskid dir node
+ */
+ for (i = 0; i < TIDDIRFILES; i++) {
+ if (strcmp(tiddir[i].d_name, comp) == 0) {
+ lxpnp = lxpr_getnode(dp, tiddir[i].d_type, p,
+ dlxpnp->lxpr_desc);
+ dp = LXPTOV(lxpnp);
+ ASSERT(dp != NULL);
+ lxpr_unlock(p);
+ return (dp);
+ }
+ }
+
+ lxpr_unlock(p);
+ return (NULL);
+}
+
+/*
+ * Lookup one of the process's open files.
+ */
+static vnode_t *
+lxpr_lookup_fddir(vnode_t *dp, char *comp)
+{
+ lxpr_node_t *dlxpnp = VTOLXP(dp);
+
+ ASSERT(dlxpnp->lxpr_type == LXPR_PID_FDDIR ||
+ dlxpnp->lxpr_type == LXPR_PID_TID_FDDIR);
+
+ return (lxpr_lookup_fdnode(dp, comp));
+}
+
+static vnode_t *
+lxpr_lookup_netdir(vnode_t *dp, char *comp)
+{
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_NETDIR);
+
+ dp = lxpr_lookup_common(dp, comp, NULL, netdir, NETDIRFILES);
+
+ return (dp);
+}
+
+static vnode_t *
+lxpr_lookup_procdir(vnode_t *dp, char *comp)
+{
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PROCDIR);
+
+ /*
+ * We know all the names of files & dirs in our file system structure
+ * except those that are pid names. These change as pids are created/
+ * deleted etc., so we just look for a number as the first char to see
+ * if we are we doing pid lookups.
+ *
+ * Don't need to check for "self" as it is implemented as a symlink
+ */
+ if (*comp >= '0' && *comp <= '9') {
+ pid_t pid = 0;
+ lxpr_node_t *lxpnp = NULL;
+ proc_t *p;
+ int c;
+
+ while ((c = *comp++) != '\0')
+ pid = 10 * pid + c - '0';
+
+ /*
+ * Can't continue if the process is still loading or it doesn't
+ * really exist yet (or maybe it just died!)
+ */
+ p = lxpr_lock(pid, ZOMB_OK);
+ if (p == NULL)
+ return (NULL);
+
+ if (secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
+ lxpr_unlock(p);
+ return (NULL);
+ }
+
+ /*
+ * allocate and fill in a new lx /proc node
+ */
+ lxpnp = lxpr_getnode(dp, LXPR_PIDDIR, p, 0);
+
+ lxpr_unlock(p);
+
+ dp = LXPTOV(lxpnp);
+ ASSERT(dp != NULL);
+
+ return (dp);
+ }
+
+ /* Lookup fixed names */
+ return (lxpr_lookup_common(dp, comp, NULL, lx_procdir, PROCDIRFILES));
+}
+
+static vnode_t *
+lxpr_lookup_sysdir(vnode_t *dp, char *comp)
+{
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYSDIR);
+ return (lxpr_lookup_common(dp, comp, NULL, sysdir, SYSDIRFILES));
+}
+
+static vnode_t *
+lxpr_lookup_sys_kerneldir(vnode_t *dp, char *comp)
+{
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_KERNELDIR);
+ return (lxpr_lookup_common(dp, comp, NULL, sys_kerneldir,
+ SYS_KERNELDIRFILES));
+}
+
+static vnode_t *
+lxpr_lookup_sys_kdir_randdir(vnode_t *dp, char *comp)
+{
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_KERNEL_RANDDIR);
+ return (lxpr_lookup_common(dp, comp, NULL, sys_randdir,
+ SYS_RANDDIRFILES));
+}
+
+static vnode_t *
+lxpr_lookup_sys_netdir(vnode_t *dp, char *comp)
+{
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_NETDIR);
+ return (lxpr_lookup_common(dp, comp, NULL, sys_netdir,
+ SYS_NETDIRFILES));
+}
+
+static vnode_t *
+lxpr_lookup_sys_net_coredir(vnode_t *dp, char *comp)
+{
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_NET_COREDIR);
+ return (lxpr_lookup_common(dp, comp, NULL, sys_net_coredir,
+ SYS_NET_COREDIRFILES));
+}
+
+static vnode_t *
+lxpr_lookup_sys_net_ipv4dir(vnode_t *dp, char *comp)
+{
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_NET_IPV4DIR);
+ return (lxpr_lookup_common(dp, comp, NULL, sys_net_ipv4dir,
+ SYS_NET_IPV4DIRFILES));
+}
+
+static vnode_t *
+lxpr_lookup_sys_vmdir(vnode_t *dp, char *comp)
+{
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_VMDIR);
+ return (lxpr_lookup_common(dp, comp, NULL, sys_vmdir,
+ SYS_VMDIRFILES));
+}
+
+static vnode_t *
+lxpr_lookup_sys_fsdir(vnode_t *dp, char *comp)
+{
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_FSDIR);
+ return (lxpr_lookup_common(dp, comp, NULL, sys_fsdir,
+ SYS_FSDIRFILES));
+}
+
+static vnode_t *
+lxpr_lookup_sys_fs_inotifydir(vnode_t *dp, char *comp)
+{
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_SYS_FS_INOTIFYDIR);
+ return (lxpr_lookup_common(dp, comp, NULL, sys_fs_inotifydir,
+ SYS_FS_INOTIFYDIRFILES));
+}
+
+/*
+ * lxpr_readdir(): Vnode operation for VOP_READDIR()
+ */
+/* ARGSUSED */
+static int
+lxpr_readdir(vnode_t *dp, uio_t *uiop, cred_t *cr, int *eofp,
+ caller_context_t *ct, int flags)
+{
+ lxpr_node_t *lxpnp = VTOLXP(dp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ ssize_t uresid;
+ off_t uoffset;
+ int error;
+
+ ASSERT(dp->v_type == VDIR);
+ ASSERT(type < LXPR_NFILES);
+
+ /*
+ * we should never get here because the readdir
+ * is done on the realvp for these nodes
+ */
+ ASSERT(type != LXPR_PID_FD_FD &&
+ type != LXPR_PID_CURDIR &&
+ type != LXPR_PID_ROOTDIR);
+
+ /*
+ * restrict readdir permission to owner or root
+ */
+ if ((error = lxpr_access(dp, VREAD, 0, cr, ct)) != 0)
+ return (error);
+
+ uoffset = uiop->uio_offset;
+ uresid = uiop->uio_resid;
+
+ /* can't do negative reads */
+ if (uoffset < 0 || uresid <= 0)
+ return (EINVAL);
+
+ /* can't read directory entries that don't exist! */
+ if (uoffset % LXPR_SDSIZE)
+ return (ENOENT);
+
+ return (lxpr_readdir_function[lxpnp->lxpr_type](lxpnp, uiop, eofp));
+}
+
+/* ARGSUSED */
+static int
+lxpr_readdir_not_a_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ return (ENOTDIR);
+}
+
+/*
+ * This has the common logic for returning directory entries
+ */
+static int
+lxpr_readdir_common(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp,
+ lxpr_dirent_t *dirtab, int dirtablen)
+{
+ /* bp holds one dirent64 structure */
+ longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
+ dirent64_t *dirent = (dirent64_t *)bp;
+ ssize_t oresid; /* save a copy for testing later */
+ ssize_t uresid;
+
+ oresid = uiop->uio_resid;
+
+ /* clear out the dirent buffer */
+ bzero(bp, sizeof (bp));
+
+ /*
+ * Satisfy user request
+ */
+ while ((uresid = uiop->uio_resid) > 0) {
+ int dirindex;
+ off_t uoffset;
+ int reclen;
+ int error;
+
+ uoffset = uiop->uio_offset;
+ dirindex = (uoffset / LXPR_SDSIZE) - 2;
+
+ if (uoffset == 0) {
+
+ dirent->d_ino = lxpnp->lxpr_ino;
+ dirent->d_name[0] = '.';
+ dirent->d_name[1] = '\0';
+ reclen = DIRENT64_RECLEN(1);
+
+ } else if (uoffset == LXPR_SDSIZE) {
+
+ dirent->d_ino = lxpr_parentinode(lxpnp);
+ dirent->d_name[0] = '.';
+ dirent->d_name[1] = '.';
+ dirent->d_name[2] = '\0';
+ reclen = DIRENT64_RECLEN(2);
+
+ } else if (dirindex >= 0 && dirindex < dirtablen) {
+ int slen = strlen(dirtab[dirindex].d_name);
+
+ dirent->d_ino = lxpr_inode(dirtab[dirindex].d_type,
+ lxpnp->lxpr_pid, 0);
+
+ VERIFY(slen < LXPNSIZ);
+ (void) strcpy(dirent->d_name, dirtab[dirindex].d_name);
+ reclen = DIRENT64_RECLEN(slen);
+
+ } else {
+ /* Run out of table entries */
+ if (eofp) {
+ *eofp = 1;
+ }
+ return (0);
+ }
+
+ dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
+ dirent->d_reclen = (ushort_t)reclen;
+
+ /*
+ * if the size of the data to transfer is greater
+ * that that requested then we can't do it this transfer.
+ */
+ if (reclen > uresid) {
+ /*
+ * Error if no entries have been returned yet.
+ */
+ if (uresid == oresid) {
+ return (EINVAL);
+ }
+ break;
+ }
+
+ /*
+ * uiomove() updates both uiop->uio_resid and uiop->uio_offset
+ * by the same amount. But we want uiop->uio_offset to change
+ * in increments of LXPR_SDSIZE, which is different from the
+ * number of bytes being returned to the user. So we set
+ * uiop->uio_offset separately, ignoring what uiomove() does.
+ */
+ if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
+ uiop)) != 0)
+ return (error);
+
+ uiop->uio_offset = uoffset + LXPR_SDSIZE;
+ }
+
+ /* Have run out of space, but could have just done last table entry */
+ if (eofp) {
+ *eofp =
+ (uiop->uio_offset >= ((dirtablen+2) * LXPR_SDSIZE)) ? 1 : 0;
+ }
+ return (0);
+}
+
+
+static int
+lxpr_readdir_procdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ /* bp holds one dirent64 structure */
+ longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
+ dirent64_t *dirent = (dirent64_t *)bp;
+ ssize_t oresid; /* save a copy for testing later */
+ ssize_t uresid;
+ off_t uoffset;
+ zoneid_t zoneid;
+ pid_t pid;
+ int error;
+ int ceof;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PROCDIR);
+
+ oresid = uiop->uio_resid;
+ zoneid = LXPTOZ(lxpnp)->zone_id;
+
+ /*
+ * We return directory entries in the order: "." and ".." then the
+ * unique lxproc files, then the directories corresponding to the
+ * running processes. We have defined this as the ordering because
+ * it allows us to more easily keep track of where we are betwen calls
+ * to getdents(). If the number of processes changes between calls
+ * then we can't lose track of where we are in the lxproc files.
+ */
+
+ /* Do the fixed entries */
+ error = lxpr_readdir_common(lxpnp, uiop, &ceof, lx_procdir,
+ PROCDIRFILES);
+
+ /* Finished if we got an error or if we couldn't do all the table */
+ if (error != 0 || ceof == 0)
+ return (error);
+
+ /* clear out the dirent buffer */
+ bzero(bp, sizeof (bp));
+
+ /* Do the process entries */
+ while ((uresid = uiop->uio_resid) > 0) {
+ proc_t *p;
+ int len;
+ int reclen;
+ int i;
+
+ uoffset = uiop->uio_offset;
+
+ /*
+ * Stop when entire proc table has been examined.
+ */
+ i = (uoffset / LXPR_SDSIZE) - 2 - PROCDIRFILES;
+ if (i < 0 || i >= v.v_proc) {
+ /* Run out of table entries */
+ if (eofp) {
+ *eofp = 1;
+ }
+ return (0);
+ }
+ mutex_enter(&pidlock);
+
+ /*
+ * Skip indices for which there is no pid_entry, PIDs for
+ * which there is no corresponding process, a PID of 0,
+ * and anything the security policy doesn't allow
+ * us to look at.
+ */
+ if ((p = pid_entry(i)) == NULL || p->p_stat == SIDL ||
+ p->p_pid == 0 ||
+ secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
+ mutex_exit(&pidlock);
+ goto next;
+ }
+ mutex_exit(&pidlock);
+
+ /*
+ * Convert pid to the Linux default of 1 if we're the zone's
+ * init process, or 0 if zsched, otherwise use the value from
+ * the proc structure
+ */
+ if (p->p_pid == curproc->p_zone->zone_proc_initpid) {
+ pid = 1;
+ } else if (p->p_pid == curproc->p_zone->zone_zsched->p_pid) {
+ pid = 0;
+ } else {
+ pid = p->p_pid;
+ }
+
+ /*
+ * If this /proc was mounted in the global zone, view
+ * all procs; otherwise, only view zone member procs.
+ */
+ if (zoneid != GLOBAL_ZONEID && p->p_zone->zone_id != zoneid) {
+ goto next;
+ }
+
+ ASSERT(p->p_stat != 0);
+
+ dirent->d_ino = lxpr_inode(LXPR_PIDDIR, pid, 0);
+ len = snprintf(dirent->d_name, LXPNSIZ, "%d", pid);
+ ASSERT(len < LXPNSIZ);
+ reclen = DIRENT64_RECLEN(len);
+
+ dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
+ dirent->d_reclen = (ushort_t)reclen;
+
+ /*
+ * if the size of the data to transfer is greater
+ * that that requested then we can't do it this transfer.
+ */
+ if (reclen > uresid) {
+ /*
+ * Error if no entries have been returned yet.
+ */
+ if (uresid == oresid)
+ return (EINVAL);
+ break;
+ }
+
+ /*
+ * uiomove() updates both uiop->uio_resid and uiop->uio_offset
+ * by the same amount. But we want uiop->uio_offset to change
+ * in increments of LXPR_SDSIZE, which is different from the
+ * number of bytes being returned to the user. So we set
+ * uiop->uio_offset separately, in the increment of this for
+ * the loop, ignoring what uiomove() does.
+ */
+ if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
+ uiop)) != 0)
+ return (error);
+next:
+ uiop->uio_offset = uoffset + LXPR_SDSIZE;
+ }
+
+ if (eofp != NULL) {
+ *eofp = (uiop->uio_offset >=
+ ((v.v_proc + PROCDIRFILES + 2) * LXPR_SDSIZE)) ? 1 : 0;
+ }
+
+ return (0);
+}
+
+static int
+lxpr_readdir_piddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ proc_t *p;
+ pid_t find_pid;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PIDDIR);
+
+ /* can't read its contents if it died */
+ mutex_enter(&pidlock);
+
+ if (lxpnp->lxpr_pid == 1) {
+ find_pid = curproc->p_zone->zone_proc_initpid;
+ } else if (lxpnp->lxpr_pid == 0) {
+ find_pid = curproc->p_zone->zone_zsched->p_pid;
+ } else {
+ find_pid = lxpnp->lxpr_pid;
+ }
+ p = prfind(find_pid);
+
+ if (p == NULL || p->p_stat == SIDL) {
+ mutex_exit(&pidlock);
+ return (ENOENT);
+ }
+ mutex_exit(&pidlock);
+
+ return (lxpr_readdir_common(lxpnp, uiop, eofp, piddir, PIDDIRFILES));
+}
+
+static int
+lxpr_readdir_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_NETDIR);
+ return (lxpr_readdir_common(lxpnp, uiop, eofp, netdir, NETDIRFILES));
+}
+
+static int
+lxpr_readdir_taskdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ /* bp holds one dirent64 structure */
+ longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
+ dirent64_t *dirent = (dirent64_t *)bp;
+ ssize_t oresid; /* save a copy for testing later */
+ ssize_t uresid;
+ off_t uoffset;
+ int error, ceof, tiddirsize, tasknum;
+ proc_t *p;
+ pid_t real_pid;
+ kthread_t *t;
+ boolean_t branded;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_TASKDIR);
+
+ oresid = uiop->uio_resid;
+
+ real_pid = get_real_pid(lxpnp->lxpr_pid);
+ p = lxpr_lock(real_pid, ZOMB_OK);
+ if (p == NULL) {
+ return (ENOENT);
+ }
+ if (p->p_stat == SIDL) {
+ lxpr_unlock(p);
+ return (ENOENT);
+ }
+
+ /*
+ * Just emit static entries for system processes and zombies.
+ */
+ if ((p->p_stat == SZOMB) || (p->p_flag & (SSYS | SEXITING)) ||
+ (p->p_as == &kas)) {
+ lxpr_unlock(p);
+ return (lxpr_readdir_common(lxpnp, uiop, eofp, 0, 0));
+ }
+
+ /*
+ * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from
+ * going away while we iterate over its threads.
+ */
+ tiddirsize = p->p_lwpcnt;
+ branded = (p->p_brand == &lx_brand);
+ mutex_exit(&p->p_lock);
+
+ /* Do the fixed entries (in this case just "." & "..") */
+ error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0);
+
+ /* Finished if we got an error or if we couldn't do all the table */
+ if (error != 0 || ceof == 0)
+ goto out;
+
+ if ((t = p->p_tlist) == NULL) {
+ if (eofp != NULL)
+ *eofp = 1;
+ goto out;
+ }
+
+ /* clear out the dirent buffer */
+ bzero(bp, sizeof (bp));
+
+ /*
+ * Loop until user's request is satisfied or until all thread's have
+ * been returned.
+ */
+ for (tasknum = 0; (uresid = uiop->uio_resid) > 0; tasknum++) {
+ int i, reclen, len;
+ uint_t emul_tid;
+ lx_lwp_data_t *lwpd;
+
+ uoffset = uiop->uio_offset;
+
+ /*
+ * Stop at the end of the thread list
+ */
+ i = (uoffset / LXPR_SDSIZE) - 2;
+ if (i < 0 || i >= tiddirsize) {
+ if (eofp) {
+ *eofp = 1;
+ }
+ goto out;
+ }
+
+ if (i != tasknum)
+ goto next;
+
+ if (!branded) {
+ /*
+ * Emulating the goofy linux task model is impossible
+ * to do for native processes. We can compromise by
+ * presenting only the main thread to the consumer.
+ */
+ emul_tid = p->p_pid;
+ } else {
+ if ((lwpd = ttolxlwp(t)) == NULL) {
+ goto next;
+ }
+ emul_tid = lwpd->br_pid;
+ /*
+ * Convert pid to Linux default of 1 if we're the
+ * zone's init.
+ */
+ if (emul_tid == curproc->p_zone->zone_proc_initpid)
+ emul_tid = 1;
+ }
+
+ dirent->d_ino = lxpr_inode(LXPR_PID_TASK_IDDIR, lxpnp->lxpr_pid,
+ emul_tid);
+ len = snprintf(dirent->d_name, LXPNSIZ, "%d", emul_tid);
+ ASSERT(len < LXPNSIZ);
+ reclen = DIRENT64_RECLEN(len);
+
+ dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
+ dirent->d_reclen = (ushort_t)reclen;
+
+ if (reclen > uresid) {
+ /*
+ * Error if no entries have been returned yet.
+ */
+ if (uresid == oresid)
+ error = EINVAL;
+ goto out;
+ }
+
+ /*
+ * uiomove() updates both uiop->uio_resid and uiop->uio_offset
+ * by the same amount. But we want uiop->uio_offset to change
+ * in increments of LXPR_SDSIZE, which is different from the
+ * number of bytes being returned to the user. So we set
+ * uiop->uio_offset separately, in the increment of this for
+ * the loop, ignoring what uiomove() does.
+ */
+ if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
+ uiop)) != 0)
+ goto out;
+
+next:
+ uiop->uio_offset = uoffset + LXPR_SDSIZE;
+
+ if ((t = t->t_forw) == p->p_tlist || !branded) {
+ if (eofp != NULL)
+ *eofp = 1;
+ goto out;
+ }
+ }
+
+ if (eofp != NULL)
+ *eofp = 0;
+
+out:
+ mutex_enter(&p->p_lock);
+ lxpr_unlock(p);
+ return (error);
+}
+
+static int
+lxpr_readdir_task_tid_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ proc_t *p;
+ pid_t real_pid;
+ kthread_t *t;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_TASK_IDDIR);
+
+ mutex_enter(&pidlock);
+
+ real_pid = get_real_pid(lxpnp->lxpr_pid);
+ p = prfind(real_pid);
+
+ /* can't read its contents if it died */
+ if (p == NULL || p->p_stat == SIDL) {
+ mutex_exit(&pidlock);
+ return (ENOENT);
+ }
+
+ mutex_exit(&pidlock);
+
+ /* need to confirm tid is still there */
+ t = lxpr_get_thread(p, lxpnp->lxpr_desc);
+ if (t == NULL) {
+ /* we can't find this specific thread */
+ return (NULL);
+ }
+ thread_unlock(t);
+
+ return (lxpr_readdir_common(lxpnp, uiop, eofp, tiddir, TIDDIRFILES));
+}
+
+static int
+lxpr_readdir_fddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ /* bp holds one dirent64 structure */
+ longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
+ dirent64_t *dirent = (dirent64_t *)bp;
+ ssize_t oresid; /* save a copy for testing later */
+ ssize_t uresid;
+ off_t uoffset;
+ int error, ceof, fddirsize;
+ proc_t *p;
+ uf_info_t *fip;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_FDDIR ||
+ lxpnp->lxpr_type == LXPR_PID_TID_FDDIR);
+
+ oresid = uiop->uio_resid;
+
+ /* can't read its contents if it died */
+ p = lxpr_lock(lxpnp->lxpr_pid, ZOMB_OK);
+ if (p == NULL)
+ return (ENOENT);
+
+ /*
+ * For exiting/exited processes or those belonging to the system, only
+ * emit the fixed entries.
+ */
+ if ((p->p_stat == SZOMB) || (p->p_flag & (SSYS | SEXITING)) ||
+ (p->p_as == &kas)) {
+ lxpr_unlock(p);
+ return (lxpr_readdir_common(lxpnp, uiop, eofp, 0, 0));
+ }
+
+ /*
+ * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from
+ * going away while we iterate over its fi_list.
+ */
+ mutex_exit(&p->p_lock);
+
+ /* Get open file info */
+ fip = (&(p)->p_user.u_finfo);
+ mutex_enter(&fip->fi_lock);
+ fddirsize = fip->fi_nfiles;
+
+ /* Do the fixed entries (in this case just "." & "..") */
+ error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0);
+
+ /* Finished if we got an error or if we couldn't do all the table */
+ if (error != 0 || ceof == 0)
+ goto out;
+
+ /* clear out the dirent buffer */
+ bzero(bp, sizeof (bp));
+
+ /*
+ * Loop until user's request is satisfied or until
+ * all file descriptors have been examined.
+ */
+ for (; (uresid = uiop->uio_resid) > 0;
+ uiop->uio_offset = uoffset + LXPR_SDSIZE) {
+ int reclen;
+ int fd;
+ int len;
+
+ uoffset = uiop->uio_offset;
+
+ /*
+ * Stop at the end of the fd list
+ */
+ fd = (uoffset / LXPR_SDSIZE) - 2;
+ if (fd < 0 || fd >= fddirsize) {
+ if (eofp) {
+ *eofp = 1;
+ }
+ goto out;
+ }
+
+ if (fip->fi_list[fd].uf_file == NULL)
+ continue;
+
+ dirent->d_ino = lxpr_inode(LXPR_PID_FD_FD, lxpnp->lxpr_pid, fd);
+ len = snprintf(dirent->d_name, LXPNSIZ, "%d", fd);
+ ASSERT(len < LXPNSIZ);
+ reclen = DIRENT64_RECLEN(len);
+
+ dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
+ dirent->d_reclen = (ushort_t)reclen;
+
+ if (reclen > uresid) {
+ /*
+ * Error if no entries have been returned yet.
+ */
+ if (uresid == oresid)
+ error = EINVAL;
+ goto out;
+ }
+
+ if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
+ uiop)) != 0)
+ goto out;
+ }
+
+ if (eofp != NULL) {
+ *eofp =
+ (uiop->uio_offset >= ((fddirsize+2) * LXPR_SDSIZE)) ? 1 : 0;
+ }
+
+out:
+ mutex_exit(&fip->fi_lock);
+ mutex_enter(&p->p_lock);
+ lxpr_unlock(p);
+ return (error);
+}
+
+static int
+lxpr_readdir_sysdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYSDIR);
+ return (lxpr_readdir_common(lxpnp, uiop, eofp, sysdir, SYSDIRFILES));
+}
+
+static int
+lxpr_readdir_sys_fsdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_FSDIR);
+ return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_fsdir,
+ SYS_FSDIRFILES));
+}
+
+static int
+lxpr_readdir_sys_fs_inotifydir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_FS_INOTIFYDIR);
+ return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_fs_inotifydir,
+ SYS_FS_INOTIFYDIRFILES));
+}
+
+static int
+lxpr_readdir_sys_kerneldir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNELDIR);
+ return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_kerneldir,
+ SYS_KERNELDIRFILES));
+}
+
+static int
+lxpr_readdir_sys_kdir_randdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_RANDDIR);
+ return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_randdir,
+ SYS_RANDDIRFILES));
+}
+
+static int
+lxpr_readdir_sys_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_NETDIR);
+ return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_netdir,
+ SYS_NETDIRFILES));
+}
+
+static int
+lxpr_readdir_sys_net_coredir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_COREDIR);
+ return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_net_coredir,
+ SYS_NET_COREDIRFILES));
+}
+
+static int
+lxpr_readdir_sys_net_ipv4dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_IPV4DIR);
+ return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_net_ipv4dir,
+ SYS_NET_IPV4DIRFILES));
+}
+
+static int
+lxpr_readdir_sys_vmdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_VMDIR);
+ return (lxpr_readdir_common(lxpnp, uiop, eofp, sys_vmdir,
+ SYS_VMDIRFILES));
+}
+
+#define isdigit(c) ((c) >= '0' && (c) <= '9')
+#define isspace(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
+
+/*
+ * Obtain a numeric value from the null-terminated input string.
+ * We don't have strtok in the kernel, so tokenize this ourselves and
+ * validate the input.
+ */
+static int
+lxpr_tokenize_num(char *str, long *pv, char **ep)
+{
+ char *pstart, *pc, c, *endptr;
+ long v;
+
+ for (pc = str; isspace(*pc); pc++)
+ ;
+
+ for (pstart = pc; isdigit(*pc); pc++)
+ ;
+ if (pc == pstart || (!isspace(*pc) && *pc != '\0'))
+ return (EINVAL);
+ c = *pc;
+ *pc = '\0';
+
+ if (ddi_strtol(pstart, &endptr, 10, &v) != 0) {
+ *pc = c;
+ return (EINVAL);
+ }
+ if (*endptr != '\0') {
+ *pc = c;
+ return (EINVAL);
+ }
+
+ if (pv != NULL)
+ *pv = v;
+ if (ep != NULL)
+ *ep = ++pc;
+
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+lxpr_write_tcp_property(lxpr_node_t *lxpnp, struct uio *uio,
+ struct cred *cr, caller_context_t *ct, char *prop,
+ int (*xlate)(char *, int))
+{
+ int error;
+ int res = 0;
+ size_t olen;
+ char val[16]; /* big enough for a uint numeric string */
+ netstack_t *ns;
+ mod_prop_info_t *ptbl = NULL;
+ mod_prop_info_t *pinfo = NULL;
+
+ if (uio->uio_loffset != 0)
+ return (EINVAL);
+
+ if (uio->uio_resid == 0)
+ return (0);
+
+ olen = uio->uio_resid;
+ if (olen > sizeof (val) - 1)
+ return (EINVAL);
+
+ bzero(val, sizeof (val));
+ error = uiomove(val, olen, UIO_WRITE, uio);
+ if (error != 0)
+ return (error);
+
+ if (val[olen - 1] == '\n')
+ val[olen - 1] = '\0';
+
+ if (val[0] == '\0') /* no input */
+ return (EINVAL);
+
+ ns = netstack_get_current();
+ if (ns == NULL)
+ return (EINVAL);
+
+ if (xlate != NULL && xlate(val, sizeof (val)) != 0) {
+ netstack_rele(ns);
+ return (EINVAL);
+ }
+
+ ptbl = ns->netstack_tcp->tcps_propinfo_tbl;
+ pinfo = mod_prop_lookup(ptbl, prop, MOD_PROTO_TCP);
+ if (pinfo == NULL || pinfo->mpi_setf(ns, cr, pinfo, NULL, val, 0) != 0)
+ res = EINVAL;
+
+ netstack_rele(ns);
+ return (res);
+}
+
+static int
+lxpr_write_sys_net_core_somaxc(lxpr_node_t *lxpnp, struct uio *uio,
+ struct cred *cr, caller_context_t *ct)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_CORE_SOMAXCON);
+ return (lxpr_write_tcp_property(lxpnp, uio, cr, ct,
+ "_conn_req_max_q", NULL));
+}
+
+static int
+lxpr_xlate_sec2ms(char *val, int size)
+{
+ long sec;
+ char *ep;
+
+ if (lxpr_tokenize_num(val, &sec, &ep) != 0)
+ return (EINVAL);
+ if (*ep != '\0')
+ return (EINVAL);
+ if (snprintf(val, size, "%ld", sec * 1000) >= size)
+ return (EINVAL);
+ return (0);
+}
+
+static int
+lxpr_xlate_ka_intvl(char *val, int size)
+{
+ long sec;
+ char *ep;
+
+ if (lxpr_tokenize_num(val, &sec, &ep) != 0)
+ return (EINVAL);
+ if (*ep != '\0')
+ return (EINVAL);
+ if (snprintf(val, size, "%ld", sec * 1000 * 9) >= size)
+ return (EINVAL);
+ return (0);
+}
+
+static int
+lxpr_xlate_sack(char *val, int size)
+{
+ long flag;
+ char *ep;
+
+ if (lxpr_tokenize_num(val, &flag, &ep) != 0)
+ return (EINVAL);
+ if (*ep != '\0')
+ return (EINVAL);
+ if (flag != 0 && flag != 1)
+ return (EINVAL);
+ /* see comment on lxpr_read_sys_net_ipv4_tcp_sack */
+ if (snprintf(val, size, "%d", (flag == 0 ? 0 : 2)) >= size)
+ return (EINVAL);
+ return (0);
+}
+
+/*
+ * We expect two port numbers on a line as input for the range, and we have to
+ * set two properties on the netstack_tcp, so we can't reuse
+ * lxpr_write_tcp_property.
+ */
+static int
+lxpr_write_sys_net_ipv4_ip_lport_range(lxpr_node_t *lxpnp, struct uio *uio,
+ struct cred *cr, caller_context_t *ct)
+{
+ int res;
+ size_t olen;
+ char vals[32]; /* big enough for a line w/ 2 16-bit numeric strings */
+ char *ep;
+ long low, high;
+ netstack_t *ns;
+ tcp_stack_t *tcps;
+ mod_prop_info_t *ptbl = NULL;
+ mod_prop_info_t *pinfo = NULL;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_IPV4_IP_LPORT_RANGE);
+
+ if (uio->uio_loffset != 0)
+ return (EINVAL);
+
+ if (uio->uio_resid == 0)
+ return (0);
+
+ olen = uio->uio_resid;
+ if (olen > sizeof (vals) - 1)
+ return (EINVAL);
+
+ bzero(vals, sizeof (vals));
+ res = uiomove(vals, olen, UIO_WRITE, uio);
+ if (res != 0)
+ return (res);
+
+ if (lxpr_tokenize_num(vals, &low, &ep) != 0)
+ return (EINVAL);
+
+ if (lxpr_tokenize_num(ep, &high, &ep) != 0)
+ return (EINVAL);
+
+ if (*ep != '\0') {
+ /* make sure no other tokens on the line */
+ *ep++ = '\0';
+ for (; isspace(*ep); ep++)
+ ;
+ if (*ep != '\0')
+ return (EINVAL);
+ }
+
+ if (low > high || high > 65535)
+ return (EINVAL);
+
+ ns = netstack_get_current();
+ if (ns == NULL)
+ return (EINVAL);
+
+ tcps = ns->netstack_tcp;
+ if (low < tcps->tcps_smallest_nonpriv_port) {
+ netstack_rele(ns);
+ return (EINVAL);
+ }
+
+ ptbl = ns->netstack_tcp->tcps_propinfo_tbl;
+
+ (void) snprintf(vals, sizeof (vals), "%ld", low);
+ pinfo = mod_prop_lookup(ptbl, "smallest_anon_port", MOD_PROTO_TCP);
+ if (pinfo == NULL || pinfo->mpi_setf(ns, cr, pinfo, NULL, vals, 0) != 0)
+ res = EINVAL;
+
+ (void) snprintf(vals, sizeof (vals), "%ld", high);
+ pinfo = mod_prop_lookup(ptbl, "largest_anon_port", MOD_PROTO_TCP);
+ if (pinfo == NULL || pinfo->mpi_setf(ns, cr, pinfo, NULL, vals, 0) != 0)
+ res = EINVAL;
+
+ netstack_rele(ns);
+ return (res);
+}
+
+static int
+lxpr_write_sys_net_ipv4_tcp_fin_to(lxpr_node_t *lxpnp, struct uio *uio,
+ struct cred *cr, caller_context_t *ct)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_IPV4_TCP_FIN_TO);
+ return (lxpr_write_tcp_property(lxpnp, uio, cr, ct,
+ "_fin_wait_2_flush_interval", lxpr_xlate_sec2ms));
+}
+
+static int
+lxpr_write_sys_net_ipv4_tcp_ka_int(lxpr_node_t *lxpnp, struct uio *uio,
+ struct cred *cr, caller_context_t *ct)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_IPV4_TCP_KA_INT);
+ return (lxpr_write_tcp_property(lxpnp, uio, cr, ct,
+ "_keepalive_abort_interval", lxpr_xlate_ka_intvl));
+}
+
+static int
+lxpr_write_sys_net_ipv4_tcp_ka_tim(lxpr_node_t *lxpnp, struct uio *uio,
+ struct cred *cr, caller_context_t *ct)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_IPV4_TCP_KA_TIM);
+ return (lxpr_write_tcp_property(lxpnp, uio, cr, ct,
+ "_keepalive_interval", lxpr_xlate_sec2ms));
+}
+
+static int
+lxpr_write_sys_net_ipv4_tcp_sack(lxpr_node_t *lxpnp, struct uio *uio,
+ struct cred *cr, caller_context_t *ct)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_IPV4_TCP_SACK);
+ return (lxpr_write_tcp_property(lxpnp, uio, cr, ct, "sack",
+ lxpr_xlate_sack));
+}
+
+static int
+lxpr_write_sys_net_ipv4_tcp_winscale(lxpr_node_t *lxpnp, struct uio *uio,
+ struct cred *cr, caller_context_t *ct)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_NET_IPV4_TCP_WINSCALE);
+ return (lxpr_write_tcp_property(lxpnp, uio, cr, ct, "_wscale_always",
+ NULL));
+}
+
+/* ARGSUSED */
+static int
+lxpr_write_sys_kernel_corepatt(lxpr_node_t *lxpnp, struct uio *uio,
+ struct cred *cr, caller_context_t *ct)
+{
+ zone_t *zone = curproc->p_zone;
+ struct core_globals *cg;
+ refstr_t *rp, *nrp;
+ corectl_path_t *ccp;
+ char val[MAXPATHLEN];
+ char valtr[MAXPATHLEN];
+ size_t olen;
+ int error;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_SYS_KERNEL_COREPATT);
+
+ cg = zone_getspecific(core_zone_key, zone);
+ ASSERT(cg != NULL);
+
+ if (secpolicy_coreadm(cr) != 0)
+ return (EPERM);
+
+ if (uio->uio_loffset != 0)
+ return (EINVAL);
+
+ if (uio->uio_resid == 0)
+ return (0);
+
+ olen = uio->uio_resid;
+ if (olen > sizeof (val) - 1)
+ return (EINVAL);
+
+ bzero(val, sizeof (val));
+ error = uiomove(val, olen, UIO_WRITE, uio);
+ if (error != 0)
+ return (error);
+
+ if (val[olen - 1] == '\n')
+ val[olen - 1] = '\0';
+
+ if (val[0] == '|')
+ return (EINVAL);
+
+ if ((error = lxpr_core_path_l2s(val, valtr, sizeof (valtr))) != 0)
+ return (error);
+
+ nrp = refstr_alloc(valtr);
+
+ ccp = cg->core_default_path;
+ mutex_enter(&ccp->ccp_mtx);
+ rp = ccp->ccp_path;
+ refstr_hold((ccp->ccp_path = nrp));
+ cg->core_options |= CC_PROCESS_PATH;
+ mutex_exit(&ccp->ccp_mtx);
+
+ if (rp != NULL)
+ refstr_rele(rp);
+
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+lxpr_write_pid_loginuid(lxpr_node_t *lxpnp, struct uio *uio, struct cred *cr,
+ caller_context_t *ct)
+{
+ int error;
+ size_t olen;
+ char val[16]; /* big enough for a uint numeric string */
+ char *ep;
+ long u;
+ proc_t *p;
+ lx_proc_data_t *pd;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_LOGINUID);
+
+ if (uio->uio_loffset != 0)
+ return (EINVAL);
+
+ if (uio->uio_resid == 0)
+ return (0);
+
+ olen = uio->uio_resid;
+ if (olen > sizeof (val) - 1)
+ return (EINVAL);
+
+ bzero(val, sizeof (val));
+ error = uiomove(val, olen, UIO_WRITE, uio);
+ if (error != 0)
+ return (error);
+
+ if (lxpr_tokenize_num(val, &u, &ep) != 0)
+ return (EINVAL);
+ if (*ep != '\0')
+ return (EINVAL);
+
+ if ((p = lxpr_lock(lxpnp->lxpr_pid, NO_ZOMB)) == NULL)
+ return (ENXIO);
+
+ if ((pd = ptolxproc(p)) != NULL) {
+ pd->l_loginuid = (uid_t)u;
+ }
+ lxpr_unlock(p);
+
+ return (0);
+}
+
+/*
+ * lxpr_readlink(): Vnode operation for VOP_READLINK()
+ */
+/* ARGSUSED */
+static int
+lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct)
+{
+ char bp[MAXPATHLEN + 1];
+ size_t buflen = sizeof (bp);
+ lxpr_node_t *lxpnp = VTOLXP(vp);
+ vnode_t *rvp = lxpnp->lxpr_realvp;
+ pid_t pid;
+ int error = 0;
+
+ /*
+ * Linux does something very "clever" for /proc/<pid>/fd/<num> entries.
+ * Open FDs are represented as symlinks, the link contents
+ * corresponding to the open resource. For plain files or devices,
+ * this isn't absurd since one can dereference the symlink to query
+ * the underlying resource. For sockets or pipes, it becomes ugly in a
+ * hurry. To maintain this human-readable output, those FD symlinks
+ * point to bogus targets such as "socket:[<inodenum>]". This requires
+ * circumventing vfs since the stat/lstat behavior on those FD entries
+ * will be unusual. (A stat must retrieve information about the open
+ * socket or pipe. It cannot fail because the link contents point to
+ * an absent file.)
+ *
+ * To accomplish this, lxpr_getnode returns an vnode typed VNON for FD
+ * entries. This bypasses code paths which would normally
+ * short-circuit on symlinks and allows us to emulate the vfs behavior
+ * expected by /proc consumers.
+ */
+ if (vp->v_type != VLNK && lxpnp->lxpr_type != LXPR_PID_FD_FD)
+ return (EINVAL);
+
+ /* Try to produce a symlink name for anything that has a realvp */
+ if (rvp != NULL) {
+ if ((error = lxpr_access(vp, VREAD, 0, CRED(), ct)) != 0)
+ return (error);
+ if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0) {
+ /*
+ * Special handling possible for /proc/<pid>/fd/<num>
+ * Generate <type>:[<inode>] links, if allowed.
+ */
+ if (lxpnp->lxpr_type != LXPR_PID_FD_FD ||
+ lxpr_readlink_fdnode(lxpnp, bp, buflen) != 0) {
+ return (error);
+ }
+ }
+ } else {
+ switch (lxpnp->lxpr_type) {
+ case LXPR_SELF:
+ /*
+ * Convert pid to the Linux default of 1 if we're the
+ * zone's init process or 0 if zsched.
+ */
+ if (curproc->p_pid ==
+ curproc->p_zone->zone_proc_initpid) {
+ pid = 1;
+ } else if (curproc->p_pid ==
+ curproc->p_zone->zone_zsched->p_pid) {
+ pid = 0;
+ } else {
+ pid = curproc->p_pid;
+ }
+
+ /*
+ * Don't need to check result as every possible int
+ * will fit within MAXPATHLEN bytes.
+ */
+ (void) snprintf(bp, buflen, "%d", pid);
+ break;
+ case LXPR_PID_CURDIR:
+ case LXPR_PID_ROOTDIR:
+ case LXPR_PID_EXE:
+ return (EACCES);
+ default:
+ /*
+ * Need to return error so that nothing thinks
+ * that the symlink is empty and hence "."
+ */
+ return (EINVAL);
+ }
+ }
+
+ /* copy the link data to user space */
+ return (uiomove(bp, strlen(bp), UIO_READ, uiop));
+}
+
+
+/*
+ * lxpr_inactive(): Vnode operation for VOP_INACTIVE()
+ * Vnode is no longer referenced, deallocate the file
+ * and all its resources.
+ */
+/* ARGSUSED */
+static void
+lxpr_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
+{
+ lxpr_freenode(VTOLXP(vp));
+}
+
+/*
+ * lxpr_sync(): Vnode operation for VOP_SYNC()
+ */
+static int
+lxpr_sync()
+{
+ /*
+ * Nothing to sync but this function must never fail
+ */
+ return (0);
+}
+
+/*
+ * lxpr_cmp(): Vnode operation for VOP_CMP()
+ */
+static int
+lxpr_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
+{
+ vnode_t *rvp;
+
+ while (vn_matchops(vp1, lxpr_vnodeops) &&
+ (rvp = VTOLXP(vp1)->lxpr_realvp) != NULL) {
+ vp1 = rvp;
+ }
+
+ while (vn_matchops(vp2, lxpr_vnodeops) &&
+ (rvp = VTOLXP(vp2)->lxpr_realvp) != NULL) {
+ vp2 = rvp;
+ }
+
+ if (vn_matchops(vp1, lxpr_vnodeops) || vn_matchops(vp2, lxpr_vnodeops))
+ return (vp1 == vp2);
+ return (VOP_CMP(vp1, vp2, ct));
+}
+
+/*
+ * lxpr_realvp(): Vnode operation for VOP_REALVP()
+ */
+static int
+lxpr_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
+{
+ vnode_t *rvp;
+
+ if ((rvp = VTOLXP(vp)->lxpr_realvp) != NULL) {
+ vp = rvp;
+ if (VOP_REALVP(vp, &rvp, ct) == 0)
+ vp = rvp;
+ }
+
+ *vpp = vp;
+ return (0);
+}
+
+static int
+lxpr_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
+ caller_context_t *ct)
+{
+ lxpr_node_t *lxpnp = VTOLXP(vp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ int i;
+
+ for (i = 0; wr_tab[i].wft_type != LXPR_INVALID; i++) {
+ if (wr_tab[i].wft_type == type) {
+ if (wr_tab[i].wft_wrf != NULL) {
+ return (wr_tab[i].wft_wrf(lxpnp, uiop, cr, ct));
+ }
+ break;
+ }
+ }
+
+ /* pretend we wrote the whole thing */
+ uiop->uio_offset += uiop->uio_resid;
+ uiop->uio_resid = 0;
+ return (0);
+}
+
+/* Needed for writable files which are first "truncated" */
+static int
+lxpr_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset,
+ cred_t *cred, caller_context_t *ct)
+{
+ int error;
+
+ if (cmd != F_FREESP)
+ return (EINVAL);
+ if ((error = lxpr_access(vp, VWRITE, 0, cred, ct)) != 0)
+ return (error);
+
+ return (0);
+}
+
+/*
+ * Needed for writable files which are first "truncated". We only support
+ * truncation.
+ */
+static int
+lxpr_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ int error;
+
+ if (vap->va_mask != AT_SIZE)
+ return (EINVAL);
+ if ((error = lxpr_access(vp, VWRITE, 0, cr, ct)) != 0)
+ return (error);
+
+ return (0);
+}
+
+/*
+ * We need to allow open with O_CREAT for the writable files.
+ */
+/*ARGSUSED7*/
+static int
+lxpr_create(vnode_t *dvp, char *nm, vattr_t *vap, enum vcexcl exclusive,
+ int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct,
+ vsecattr_t *vsecp)
+{
+ lxpr_node_t *lxpnp = VTOLXP(dvp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ vnode_t *vp = NULL;
+ int error;
+
+ ASSERT(type < LXPR_NFILES);
+
+ /*
+ * restrict create permission to owner or root
+ */
+ if ((error = lxpr_access(dvp, VEXEC, 0, cr, ct)) != 0) {
+ return (error);
+ }
+
+ if (*nm == '\0')
+ return (EPERM);
+
+ if (dvp->v_type != VDIR)
+ return (EPERM);
+
+ if (exclusive == EXCL)
+ return (EEXIST);
+
+ /*
+ * No writable files in top-level proc dir. We check this to avoid
+ * getting a non-proc node via "..".
+ */
+ if (type != LXPR_PROCDIR &&
+ lxpr_lookup(dvp, nm, &vp, NULL, 0, NULL, cr, ct, NULL, NULL) == 0) {
+ lxpr_nodetype_t ftype = VTOLXP(vp)->lxpr_type;
+ if (!lxpr_is_writable(ftype)) {
+ VN_RELE(vp);
+ vp = NULL;
+ }
+ }
+
+ if (vp != NULL) {
+ ASSERT(vp->v_type != VDIR);
+
+ /* confirm permissions against existing file */
+ if ((error = lxpr_access(vp, mode, 0, cr, ct)) != 0) {
+ VN_RELE(vp);
+ return (error);
+ }
+
+ *vpp = vp;
+ return (0);
+ }
+
+ /*
+ * Linux proc does not allow creation of addition, non-subsystem
+ * specific files inside the hierarchy. ENOENT is tossed when such
+ * actions are attempted.
+ */
+ return (ENOENT);
+}
diff --git a/usr/src/uts/common/brand/lx/sys/lx_autofs.h b/usr/src/uts/common/brand/lx/sys/lx_autofs.h
new file mode 100644
index 0000000000..17b19895f4
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_autofs.h
@@ -0,0 +1,511 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#ifndef _LX_AUTOFS_H
+#define _LX_AUTOFS_H
+
+/*
+ * The lxautofs filesystem and driver exist to emulate the Linux autofs
+ * filesystem and /dev/autofs device (this code emulates both). The
+ * purpose is to provide support for the Linux "automount" automounter.
+ *
+ * The device ioctls map fairly closely to the filesystem ioctls. The device
+ * ioctls have superseded the filesystem ioctls and the automounter will
+ * use the device ioctls if the device exists.
+ *
+ * The device ioctls are used by the automounter to perform recovery
+ * in cases where the automounter is restarted while mounts are present. It
+ * also allows for better management operations when a filesystem is mounted
+ * on top of an autofs mountpoint, as in the case of an NFS direct mount on
+ * top of an autofs mount.
+ *
+ *
+ * +++ Linux automounter background.
+ *
+ * Linux has two automounters: "amd" (not used in any popular, modern distro)
+ * and "automount".
+ *
+ * "automount" is the normal Linux automounter. It utilizes a kernel
+ * filesystem (autofs) and device (/dev/autofs) to provide its functionality.
+ * Basically, it mounts the autofs filesystem at any automounter controlled
+ * mountpoint. This filesystem then intercepts and redirects lookup operations
+ * to the userland automounter process via a pipe. The pipe to the automounter
+ * is established via a mount option when the autofs filesystem is mounted or
+ * via the setpipefd ioctl if the automounter restarts. When the automounter
+ * receives a request via this pipe, it does lookups (or unmounts) to whatever
+ * backing store it's configured to use, does mkdir operations on the autofs
+ * filesystem, mounts remote NFS filesystems on any directories it manages or
+ * just created, and signals the autofs device via an ioctl to let it know
+ * that the lookup (or expire) can continue. Other management operations (such
+ * as querying expiration for unmounting) are performed using the autofs device.
+ *
+ *
+ * +++ Linux autofs documentation.
+ *
+ * Within the Linux src tree, see the file:
+ * Documentation/filesystems/autofs4-mount-control.txt
+ * This documents some of the autofs behavior and the device driver ioctls.
+ *
+ * The following URL (https://lwn.net/Articles/606960/) documents autofs in
+ * general. This patch was targeted for Documentation/filesystems/autofs4.txt,
+ * but seems to have never integrated into the Linux src tree.
+ *
+ *
+ * +++ Linux autofs (and automount daemon) notes
+ *
+ * Since we're mimicking the behavior of the Linux autofs filesystem and
+ * device, we document some of the observed behavior here.
+ *
+ * There are multiple versions of the autofs filesystem kernel API protocol
+ * and modern implementations of the user-land automount daemon would depend
+ * on v5, although the filesystem API has been superseded by the driver ioctl
+ * API, which is roughly similar.
+ *
+ * We'll describe the filesystem ioctls first, since support for those was
+ * implemented first. The device ioctls roughly correspond to the filesystem
+ * ioctls and were implemented last, but the automounter will use those
+ * ioctls, instead of the filesystem ioctls, when the device is present.
+ *
+ * Our original autofs implementation was developed in the mid-2000s around the
+ * v2 protocol, but that is currently obsolete. Our current implementation is
+ * based around the v5 protocol API. There was no autofs device support at that
+ * time.
+ *
+ * The autoumounter supports 3 different, mutually exclusive, mount options for
+ * each mountpoint:
+ * - indirect (this was all you got with the v2 support)
+ * - direct
+ * - offset
+ *
+ * An 'indirect' mountpoint is managed with dynamic mounts below that
+ * mountpoint. For example, if '/home' were an indirect autofs mount, then
+ * accessing a username under /home would traverse the 'lookup' code described
+ * below, cause a local subdirectory to be created, and a mount, usually NFS,
+ * onto that username subdirectory.
+ *
+ * A 'direct' mountpoint is an autofs mountpoint which will trigger the
+ * mounting of another filesystem overtop that mountpoint when accessed.
+ *
+ * An 'offset' mountpoint behaves like a 'direct' mountpoint but it is
+ * created dynamically by the automounter underneath an 'indirect' mountpoint.
+ * For example, if '/net' were an indirect autosfs mountpoint and the host
+ * 'jurassic' exported two NFS filesystems; '/var/crash' and '/var/core', then
+ * accessing '/net/jurassic' would trigger the automounter to create two
+ * subdirectories; '/net/jurassic/var/crash' and '/net/jurassic/var/core'. The
+ * automounter would then mount an autofs offset mount onto each one of these
+ * directories. Accessing either of those directories would then trigger
+ * automounter to perform another mount on top, as is done with a 'direct'
+ * mount.
+ *
+ * General behavior
+ *
+ * A) Autofs allows root owned, non-automounter processes to create
+ * directories in the autofs filesystem. The autofs filesystem treats the
+ * automounter's process group as special, but it doesn't prevent root
+ * processes outside of the automounter's process group from creating new
+ * directories in the autofs filesystem.
+ *
+ * B) Autofs doesn't allow creation of any non-directory entries in the
+ * autofs filesystem. No entity can create files (e.g. /bin/touch or
+ * VOP_CREATE/VOP_SYMLINK/etc.) The only entries that can exist within
+ * the autofs filesystem are directories.
+ *
+ * C) Autofs only intercepts vop lookup operations. Notably, it does _not_
+ * intercept and re-direct vop readdir operations. This means that the
+ * observed behavior of the Linux automounter can be considerably different
+ * from that of the illumos automounter. Specifically, on illumos if an autofs
+ * mountpoint is mounted _without_ the -nobrowse option then if a user does
+ * an ls operation (which translates into a vop readdir operation) then the
+ * automounter will intercept that operation and list all the possible
+ * directories and mountpoints without actually mounting any filesystems.
+ * Essentially, all automounter managed mountpoints on Linux will behave
+ * like "-nobrowse" mountpoints on illumos. Here's an example to illustrate
+ * this. If /ws was mounted on illumos without the -nobrowse option and an
+ * auto_ws yp map was setup as the backing store for this mountpoint, then an
+ * "ls /ws" would list all the keys in the map as valid directories, but an
+ * "ls /ws" on Linux would list an emptry directory.
+ *
+ * D) NFS mounts are performed by the automount process. When the automount
+ * process gets a redirected lookup request, it determines _all_ the
+ * possible remote mountpoints for that request, creates directory paths
+ * via mkdir, and mounts the remote filesystems on the newly created paths.
+ * This is described in the offset mount example above. Once the automounter
+ * completed the mounts it would signal the autofs filesystem (via an ioctl)
+ * that the lookup could continue.
+ *
+ * E.1) Autofs only redirects vop lookup operations for path entries that
+ * don't already exist in the autofs filesystem. So for the example above,
+ * an initial (after the start of the automounter) "ls /net/jurassic" would
+ * result in a request to the automounter. A subsequest "ls /net/jurassic"
+ * would not result in a request to the automounter. Even if
+ * /net/jurassic/var/crash and /net/jurassic/var/core were manually unmounted
+ * after the initial "ls /net/jurassic", a subsequest "ls /net/jurassic"
+ * would not result in a new request to the automounter.
+ *
+ * E.2) Autofs lookup requests that are sent to the automounter only include
+ * the root directory path component. So for example, after starting up
+ * the automounter if a user were to do a "ls /net/jurassic/var/crash", the
+ * initial lookup request actually sent to the automounter would just be for
+ * "jurassic" (the same request as if the user had done "ls /net/jurassic").
+ * After the initial mounting of the two offset mounts onto crash and core the
+ * lookup would continue and a final lookup request would be sent to the
+ * automounter for "crash" (but this would be on a different vfs from the
+ * /net vfs).
+ *
+ * E.3) The two statements above aren't entirely entirely true. The Linux
+ * autofs filesystem will also redirect lookup operations for leaf
+ * directories that don't have a filesystem mounted on them. Using the
+ * example above, if a user did a "ls /net/jurassic", then manually
+ * unmounted /net/jurassic/var/crash, and then did an "ls
+ * /net/jurassic/var/crash", this would result in a request for
+ * "jurassic/var/crash" being sent to the automounter. The strange thing
+ * (a Linux bug perhaps) is that the automounter won't do anything with this
+ * request and the lookup will fail.
+ *
+ * F) The autofs filesystem communication protocol (what ioctls it supports
+ * and what data it passes to the automount process) is versioned. The
+ * userland automount daemon (as of version v5.0.7) expects v5 of the protocol
+ * (by running the AUTOFS_IOC_PROTOSUBVER ioctl), and exits if that is not
+ * supported. For v2-v5 the structure passed through the pipe always begins
+ * with a common header followed by different fields depending on the packet
+ * type. In addition the different versions support additional ioctls.
+ *
+ * v2 - basic lookup request
+ * v3 - adds expiring (umounting)
+ * v4 - adds expire multi
+ * v5 - adds missing indirect, expire indirect, missing direct & expire direct.
+ * Defines a new protocol structure layout.
+ * The v5 'missing indirect' and 'missing direct' ioctls are analogous to
+ * the v2 'missing' ioctl. These ioctls are used to initiate a mount via
+ * a lookup. The 'expire' ioctls are used by the automounter to query if
+ * it is possible to unmount the filesystem. 'direct' and 'indirect'
+ * refer to the mount option type that the automounter performed and
+ * correlate to an automounter direct or indirect map mointpoint.
+ *
+ * G) The automounter periodically issues an 'expire' ioctl to autofs to
+ * obtain the name of a mountpoint which the automounter can unmount.
+ * Unmounting is dicussed in more detail below.
+ *
+ * H) The device ioctls roughly correspond to the filesystem ioctls, but
+ * instead of being tied to an auotfs mountpoint vnode, they can be called any
+ * time. The argument structure uses either a path or an autofs pipe file
+ * descriptor to indicate what is being operated on.
+ *
+ * +++ lxautofs notes
+ *
+ * 1) In general, the lxautofs filesystem tries to mimic the behavior of the
+ * Linux autofs filesystem with the following exceptions:
+ *
+ * 1.1) We don't bother to implement the E.3 functionality listed above
+ * since it doesn't appear to be of any use.
+ *
+ * 1.2) We only fully implement v2 and v5 of the autofs protocol.
+ *
+ * 2) In general, the approach taken for lxautofs is to keep it as simple
+ * as possible and to minimize it's memory usage. To do this all information
+ * about the contents of the lxautofs filesystem are mirrored in the
+ * underlying filesystem that lxautofs is mounted on and most vop operations
+ * are simply passed onto this underlying filesystem. This means we don't
+ * have to implement most of the complex operations that a full filesystem
+ * normally has to implement. It also means that most of our filesystem state
+ * (wrt the contents of the filesystem) doesn't actually have to be stored
+ * in memory, we can simply go to the underlying filesystem to get it when
+ * it's requested. For the purposes of discussion, we'll call the underlying
+ * filesystem the "backing store."
+ *
+ * The backing store is actually a directory called ".lxautofs" which is created
+ * in the directory where the lxautofs filesystem is mounted. When the
+ * lxautofs filesystem is unmounted this backing store directory is deleted.
+ * If this directory exists at mount time (perhaps the system crashed while a
+ * previous lxautofs instance was mounted at the same location) it will be
+ * deleted. There are a few implications of using a backing store worth
+ * mentioning.
+ *
+ * 2.1) lxautofs can't be mounted on a read only filesystem. If this
+ * proves to be a problem we can probably move the location of the
+ * backing store.
+ *
+ * 2.2) If the backing store filesystem runs out of space then the
+ * automounter process won't be able to create more directories and mount
+ * new filesystems. Of course, strange failures usually happen when
+ * filesystems run out of space.
+ *
+ * 3) Why aren't we using gfs? gfs has two different usage models.
+ *
+ * 3.1) I'm my own filesystem but i'm using gfs to help with managing
+ * readdir operations.
+ *
+ * 3.2) I'm a gfs filesystem and gfs is managing all my vnodes
+ *
+ * We're not using the 3.1 interfaces because we don't implement readdir
+ * ourselves. We pass all readdir operations onto the backing store
+ * filesystem and utilize its readdir implementation.
+ *
+ * We're not using the 3.2 interfaces because they are really designed for
+ * in memory filesystems where all of the filesystem state is stored in
+ * memory. They don't lend themselves to filesystems where part of the
+ * state is in memory and part of the state is on disk.
+ *
+ * For more information on gfs take a look at the block comments in the
+ * top of gfs.c
+ *
+ * 4) Unmounting
+ *
+ * The automounter has a timeout associated with each mount. It informs autofs
+ * of this timeout using the LX_AUTOFS_DEV_IOC_TIMEOUT_CMD ioctl after autofs
+ * has been mounted on the mountpoint.
+ *
+ * After the automounter has mounted something associated with the mountpoint
+ * then periodically (<timeout>/4 seconds) the automounter will issue the
+ * LX_AUTOFS_DEV_IOC_EXPIRE_CMD ioctl on the autofs mount. autofs is expected
+ * to respond with an underlying mountpoint entry which is a candidate for
+ * unmounting. The automounter will attempt to unmount the filesystem
+ * (which may fail if it is busy, since this is obviously racy) and then
+ * acknowledge the expire ioctl. The successful acknowledgement is independent
+ * of the success of unmounting the underlying filesystem.
+ *
+ * Unmount handling varies based on which type of mount the autofs was mounted
+ * with (indirect, direct or offset).
+ *
+ * To support 'indirect' mount expiration, the autofs vfs keeps track of the
+ * filesystems mounted immediately under the autofs mountpoint (in
+ * lav_mnt_list) after a lookup has completed successfully. Upon receipt of the
+ * LX_AUTOFS_IOC_DEV_EXPIRE_CMD ioctl, autofs removes the first element from
+ * the list, attempts to check if it is busy and if not, returns that mountpoint
+ * over the fifo (if busy the entry is added to the end of the list). When the
+ * ioctl is acknowledged, if the mountpoint still exists, that means the unmount
+ * failed and the entry is added at the back of the list. If there are no
+ * elements or the first one is busy, EAGAIN is returned for the 'expire' ioctl
+ * and the autoumounter will check again in <timeout>/4 seconds.
+ *
+ * For example, if /home is an autofs indirect mount, then there are typically
+ * many different {username}-specific NFS mounts under that /home autofs mount.
+ * autofs uses the lav_mnt_list to respond to 'expire' ioctls in a round-robin
+ * fashion so that the automounter can unmount user file systems that aren't in
+ * use.
+ *
+ * Expiring 'direct' mounts is similar, but since there is only a single mount,
+ * the lav_mnt_list only will have at most one entry if there is a filesystem
+ * mounted overtop of the autofs mount.
+ *
+ * Expiring 'offset' mounts is more complicated because there are at least
+ * two different autofs VFSs involved (the top-level and one for each offset
+ * mount underneath). The actual offset mount is handled exactly like a 'direct'
+ * mount. The top-level is an indirect mount and is handled in a similar way
+ * as described above for indirect mounts, but special handling is needed for
+ * each offset mount below.
+ *
+ * This can be explained using the same 'jurassic' example described earlier
+ * (/net is an autofs 'indirect' mount and the host 'jurassic' has two exported
+ * file systems; /var/crash and /var/core). If the user accesses
+ * /net/jurassic/var/crash then the automounter would setup the system so that
+ * the following mounts exist:
+ * - /net (the original autofs indirect mount which triggers everything)
+ * - /net/jurassic/var/crash (autofs offset mount)
+ * - /net/jurassic/var/crash (NFS mount on top of the autofs offset mount)
+ * - /net/jurassic/var/core (autofs offset mount)
+ *
+ * For expiration the automounter will issue the LX_AUTOFS_IOC_EXPIRE_MULTI
+ * ioctl on each autofs vfs for which something is mounted, so we would receive
+ * an expire ioctl on /net and another on /net/jusrassic/var/crash. The vfs for
+ * /net will be tracking "jurassic", but we detect it is busy and won't do
+ * anything at first. The vfs for "crash" will work like a direct mount and
+ * acknowledge the expire ioctl to the automounter once that filesystem times
+ * out and is no longer busy. The automounter will then unmount the "crash"
+ * NFS mount.
+ *
+ * Once the "crash" NFS mount has been unmounted by the automounter, we're left
+ * with the two autofs offset mounts under jurassic. The automounter will not
+ * try to unmount either of those, so we have to do that. Once we get another
+ * expire ioctl on /net and check "jurassic", we'll see there are only autofs
+ * mounts under /net/jurassic. We umount those using the lx_autofs_umount_offset
+ * function and respond to the automounter expire ioctl with "jurassic", in the
+ * same way as we would for any other indirect mount.
+ *
+ * 5) Recovery
+ *
+ * If the automounter is restarted for any reason, it needs to cope with
+ * pre-existing autofs mounts, as well as other automount-initiated mounts (e.g.
+ * a direct mount on top of an autofs mountpoint). The automounter uses the
+ * /proc/mounts file to correlate mounts to the managed mountpoints. It then
+ * uses the /dev/autofs device to openmount each of the autofs devices and
+ * reinitialize them using the various dev ioctls (timeout, requester, etc.).
+ *
+ * In general, the autoumounter will closemount the mountpoint once it's done,
+ * but it doesn't in the case of an offset mountpoint with nothing mounted
+ * on top. In this case the automounter expects autofs to expire that mountpoint
+ * before it will closemount (so things can subsequently cleanup). We handle
+ * this special case in the expire code path.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Note that the name of the actual file system is lxautofs, not lx_autofs, but
+ * the code uses lx_autofs to prefix the various names. This is because file
+ * system names are limited to 8 characters.
+ */
+#define LX_AUTOFS_NAME "lxautofs"
+
+#define LX_AUTOFS_MINORNAME "autofs"
+
+/*
+ * Mount options supported.
+ */
+#define LX_MNTOPT_FD "fd"
+#define LX_MNTOPT_PGRP "pgrp"
+#define LX_MNTOPT_MINPROTO "minproto"
+#define LX_MNTOPT_MAXPROTO "maxproto"
+#define LX_MNTOPT_INDIRECT "indirect"
+#define LX_MNTOPT_DIRECT "direct"
+#define LX_MNTOPT_OFFSET "offset"
+
+/*
+ * Version/subversion of the Linux kernel automount protocol we support.
+ *
+ * We fully support v2 and v5. We'll return ENOTSUP for all of the ioctls we
+ * don't yet handle.
+ */
+#define LX_AUTOFS_PROTO_VERS5 5
+#define LX_AUTOFS_PROTO_SUBVERSION 2
+#define LX_AUTOFS_PROTO_VERS2 2
+
+/* packet types */
+typedef enum laph_ptype {
+ LX_AUTOFS_PTYPE_MISSING, /* 0 */
+ LX_AUTOFS_PTYPE_EXPIRE, /* 1 */
+ LX_AUTOFS_PTYPE_EXPIRE_MULTI, /* 2 */
+ LX_AUTOFS_PTYPE_MISSING_INDIR, /* 3 */
+ LX_AUTOFS_PTYPE_EXPIRE_INDIR, /* 4 */
+ LX_AUTOFS_PTYPE_MISSING_DIRECT, /* 5 */
+ LX_AUTOFS_PTYPE_EXPIRE_DIRECT /* 6 */
+} laph_ptype_t;
+
+/*
+ * Common header for all versions of the protocol.
+ */
+typedef struct lx_autofs_pkt_hdr {
+ int laph_protover; /* protocol version number */
+ laph_ptype_t laph_type;
+ int laph_id; /* every pkt must have a unique id */
+} lx_autofs_pkt_hdr_t;
+
+/*
+ * Command structure sent to automount process from lxautofs via a pipe.
+ * This structure is the same for v2-v4 of the automount protocol
+ * (the communication pipe is established at mount time).
+ */
+typedef struct lx_autofs_v2_pkt {
+ lx_autofs_pkt_hdr_t lap_hdr;
+ int lap_name_len; /* don't include newline or NULL */
+ char lap_name[256]; /* path component to lookup */
+} lx_autofs_v2_pkt_t;
+
+/* v4 multi-expire */
+typedef struct lx_autofs_v4_exp_pkt {
+ lx_autofs_pkt_hdr_t lape_hdr;
+ int lape_len;
+ char lape_name[MAXNAMELEN];
+} lx_autofs_v4_exp_pkt_t;
+
+/* v5 */
+typedef struct lx_autofs_v5_pkt {
+ lx_autofs_pkt_hdr_t lap_hdr;
+ uint32_t lap_dev;
+ uint64_t lap_ino;
+ uint32_t lap_uid;
+ uint32_t lap_gid;
+ uint32_t lap_pid;
+ uint32_t lap_tgid;
+ uint32_t lap_name_len;
+ char lap_name[256];
+} lx_autofs_v5_pkt_t;
+
+union lx_autofs_pkt {
+ lx_autofs_v2_pkt_t lap_v2;
+ lx_autofs_v5_pkt_t lap_v5;
+};
+
+#define lap_protover lap_v2.lap_hdr.laph_protover
+#define lap_type lap_v2.lap_hdr.laph_type
+#define lap_id lap_v2.lap_hdr.laph_id
+
+/*
+ * Ioctls fully supported (v2 protocol).
+ */
+#define LX_AUTOFS_IOC_READY 0x00009360 /* arg: int */
+#define LX_AUTOFS_IOC_FAIL 0x00009361 /* arg: int */
+#define LX_AUTOFS_IOC_CATATONIC 0x00009362 /* arg: <none> */
+
+/*
+ * Ioctls supported (v3/v4 protocol).
+ */
+#define LX_AUTOFS_IOC_PROTOVER 0x80049363 /* arg: int */
+#define LX_AUTOFS_IOC_SETTIMEOUT 0xc0089364 /* arg: ulong_t */
+
+/*
+ * Ioctls not supported (v3/v4 protocol).
+ */
+ /* arg: lx_autofs_v3_exp_pkt_t * */
+#define LX_AUTOFS_IOC_EXPIRE 0x81109365
+
+/*
+ * Ioctls supported (v5 protocol).
+ */
+#define LX_AUTOFS_IOC_PROTOSUBVER 0x80049367 /* arg: int */
+#define LX_AUTOFS_IOC_ASKUMOUNT 0x80049370 /* arg: int */
+#define LX_AUTOFS_IOC_EXPIRE_MULTI 0x40049366 /* arg: int */
+#define LX_AUTOFS_IOC_EXPIRE_INDIRECT LX_AUTOFS_IOC_EXPIRE_MULTI
+#define LX_AUTOFS_IOC_EXPIRE_DIRECT LX_AUTOFS_IOC_EXPIRE_MULTI
+
+/*
+ * autofs device ioctls
+ */
+#define LX_AUTOFS_DEV_IOC_VERSION_CMD 0xc0189371
+#define LX_AUTOFS_DEV_IOC_PROTOVER_CMD 0xc0189372
+#define LX_AUTOFS_DEV_IOC_PROTOSUBVER_CMD 0xc0189373
+#define LX_AUTOFS_DEV_IOC_OPENMOUNT_CMD 0xc0189374
+#define LX_AUTOFS_DEV_IOC_CLOSEMOUNT_CMD 0xc0189375
+#define LX_AUTOFS_DEV_IOC_READY_CMD 0xc0189376
+#define LX_AUTOFS_DEV_IOC_FAIL_CMD 0xc0189377
+#define LX_AUTOFS_DEV_IOC_SETPIPEFD_CMD 0xc0189378
+#define LX_AUTOFS_DEV_IOC_CATATONIC_CMD 0xc0189379
+#define LX_AUTOFS_DEV_IOC_TIMEOUT_CMD 0xc018937a
+#define LX_AUTOFS_DEV_IOC_REQUESTER_CMD 0xc018937b
+#define LX_AUTOFS_DEV_IOC_EXPIRE_CMD 0xc018937c
+#define LX_AUTOFS_DEV_IOC_ASKUMOUNT_CMD 0xc018937d
+#define LX_AUTOFS_DEV_IOC_ISMOUNTPOINT_CMD 0xc018937e
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_AUTOFS_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_autofs_impl.h b/usr/src/uts/common/brand/lx/sys/lx_autofs_impl.h
new file mode 100644
index 0000000000..39ea96d1fe
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_autofs_impl.h
@@ -0,0 +1,162 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#ifndef _LX_AUTOFS_IMPL_H
+#define _LX_AUTOFS_IMPL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/file.h>
+#include <sys/id_space.h>
+#include <sys/modhash.h>
+#include <sys/vnode.h>
+
+#include <sys/lx_autofs.h>
+
+/*
+ * Space key.
+ * Used to persist data across lx_autofs filesystem module unloads.
+ */
+#define LX_AUTOFS_SPACE_KEY_UDEV LX_AUTOFS_NAME "_udev"
+
+/*
+ * Name of the backing store directory.
+ */
+#define LX_AUTOFS_BS_DIR "." LX_AUTOFS_NAME
+
+#define LX_AUTOFS_VFS_ID_HASH_SIZE 15
+#define LX_AUTOFS_VFS_PATH_HASH_SIZE 15
+#define LX_AUTOFS_VFS_VN_HASH_SIZE 15
+
+enum lx_autofs_mnttype { LXAMT_NONE, LXAMT_INDIR, LXAMT_DIRECT, LXAMT_OFFSET };
+
+typedef struct lx_autofs_mntent {
+ list_node_t lxafme_lst;
+ uint64_t lxafme_ts; /* time stamp */
+ uint_t lxafme_len;
+ char *lxafme_path;
+} lx_autofs_mntent_t;
+
+/*
+ * VFS data object.
+ */
+typedef struct lx_autofs_vfs {
+ /* Info about the underlying filesystem and backing store. */
+ vnode_t *lav_mvp;
+ char *lav_bs_name;
+ vnode_t *lav_bs_vp;
+
+ /* Info about the automounter process managing this filesystem. */
+ int lav_fd;
+ pid_t lav_pgrp;
+ file_t *lav_fifo_wr;
+ file_t *lav_fifo_rd;
+
+ /* The mount's dev and ino values for v5 protocol msg */
+ uint64_t lav_dev;
+ u_longlong_t lav_ino;
+
+ /* options from the mount */
+ enum lx_autofs_mnttype lav_mnttype;
+ int lav_min_proto;
+
+ /*
+ * ioctl-set timeout value. The automounter will perform an expire
+ * ioctl every timeout/4 seconds. We use this to expire a mount once
+ * it is inactive for the full timeout.
+ */
+ ulong_t lav_timeout;
+
+ /* ioctl-set catatonic value (prevents future mounts). */
+ boolean_t lav_catatonic;
+
+ /* Mount initiator's uid/gid for recovery handling. */
+ uid_t lav_uid;
+ gid_t lav_gid;
+
+ /* Each automount requests needs a unique id. */
+ id_space_t *lav_ids;
+
+ /* All remaining structure members are protected by lav_lock. */
+ kmutex_t lav_lock;
+ /* openmount counter */
+ int lav_openmnt_cnt;
+
+
+ /* Hashes to keep track of outstanding automounter requests. */
+ mod_hash_t *lav_path_hash;
+ mod_hash_t *lav_id_hash;
+
+ /* We need to keep track of all our vnodes. */
+ vnode_t *lav_root;
+ mod_hash_t *lav_vn_hash;
+
+ /* list of current mounts */
+ list_t lav_mnt_list;
+} lx_autofs_vfs_t;
+
+enum lx_autofs_callres { LXACR_NONE, LXACR_READY, LXACR_FAIL };
+
+/*
+ * Structure to keep track of automounter requests sent to user-land.
+ */
+typedef struct lx_autofs_automnt_req {
+ /* Packet that gets sent to the automounter. */
+ union lx_autofs_pkt laar_pkt;
+ int laar_pkt_size;
+
+ /* Reference count. Always updated atomically. */
+ uint_t laar_ref;
+
+ /*
+ * Fields to keep track and sync threads waiting on a lookup.
+ * Fields are protected by lalr_lock.
+ */
+ kmutex_t laar_lock;
+ kcondvar_t laar_cv;
+ int laar_complete;
+
+ enum lx_autofs_callres laar_result;
+} lx_autofs_automnt_req_t;
+
+/*
+ * Generic stack structure.
+ */
+typedef struct stack_elem {
+ list_node_t se_list;
+ caddr_t se_ptr1;
+ caddr_t se_ptr2;
+ caddr_t se_ptr3;
+} stack_elem_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_AUTOFS_IMPL_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_brand.h b/usr/src/uts/common/brand/lx/sys/lx_brand.h
new file mode 100644
index 0000000000..4906e444f1
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_brand.h
@@ -0,0 +1,680 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#ifndef _LX_BRAND_H
+#define _LX_BRAND_H
+
+#ifndef _ASM
+#include <sys/types.h>
+#include <sys/cpuvar.h>
+#include <sys/zone.h>
+#include <sys/ksocket.h>
+#include <sys/vfs.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LX_BRANDNAME "lx"
+
+/*
+ * Brand uname info
+ */
+#define LX_UNAME_SYSNAME "Linux"
+#define LX_UNAME_RELEASE_2_6 "2.6.18"
+#define LX_UNAME_RELEASE_2_4 "2.4.21"
+#define LX_UNAME_VERSION "BrandZ virtual linux"
+#define LX_UNAME_MACHINE32 "i686"
+#define LX_UNAME_MACHINE64 "x86_64"
+
+#define LX_LIB_PATH32 "/native/usr/lib/lx_brand.so.1"
+#define LX_LIB_PATH64 "/native/usr/lib/amd64/lx_brand.so.1"
+
+#define LX_VDSO_PATH32 "/native/usr/lib/brand/lx/lx_vdso.so.1"
+#define LX_VDSO_PATH64 "/native/usr/lib/brand/lx/amd64/lx_vdso.so.1"
+
+#if defined(_LP64)
+#define LX_LIB_PATH LX_LIB_PATH64
+#define LX_UNAME_MACHINE LX_UNAME_MACHINE64
+#define LX_VDSO_PATH LX_VDSO_PATH64
+#else
+#define LX_LIB_PATH LX_LIB_PATH32
+#define LX_UNAME_MACHINE LX_UNAME_MACHINE32
+#define LX_VDSO_PATH LX_VDSO_PATH32
+#endif
+
+/*
+ * This must be large enough for both the 32-bit table and 64-bit table.
+ */
+#define LX_NSYSCALLS 358
+
+/* Highest capability we know about */
+#define LX_CAP_MAX_VALID 36
+
+/*
+ * brand(2) subcommands
+ *
+ * Everything >= 128 is a brand-specific subcommand.
+ * > 192 is reserved for in-kernel emulated system calls.
+ */
+#define B_LPID_TO_SPAIR 128
+#define B_GET_CURRENT_CONTEXT 129
+#define B_EMULATION_DONE 130
+/* formerly B_PTRACE_KERNEL 131 */
+#define B_SET_AFFINITY_MASK 132
+#define B_GET_AFFINITY_MASK 133
+#define B_PTRACE_CLONE_BEGIN 134
+#define B_PTRACE_STOP_FOR_OPT 135
+#define B_UNSUPPORTED 136
+#define B_STORE_ARGS 137
+#define B_GETPID 138
+#define B_JUMP_TO_LINUX 139
+#define B_SET_THUNK_PID 140
+#define B_EXIT_AS_SIG 141
+#define B_HELPER_WAITID 142
+#define B_HELPER_CLONE 143
+#define B_HELPER_SETGROUPS 144
+#define B_HELPER_SIGQUEUE 145
+#define B_HELPER_TGSIGQUEUE 146
+#define B_SET_NATIVE_STACK 147
+#define B_SIGEV_THREAD_ID 148
+#define B_OVERRIDE_KERN_VER 149
+/* formerly B_NOTIFY_VDSO_LOC 150 */
+#define B_GET_PERSONALITY 151
+
+#ifndef _ASM
+/*
+ * Support for Linux PTRACE_SETOPTIONS handling.
+ */
+typedef enum lx_ptrace_options {
+ LX_PTRACE_O_TRACESYSGOOD = 0x0001,
+ LX_PTRACE_O_TRACEFORK = 0x0002,
+ LX_PTRACE_O_TRACEVFORK = 0x0004,
+ LX_PTRACE_O_TRACECLONE = 0x0008,
+ LX_PTRACE_O_TRACEEXEC = 0x0010,
+ LX_PTRACE_O_TRACEVFORKDONE = 0x0020,
+ LX_PTRACE_O_TRACEEXIT = 0x0040,
+ LX_PTRACE_O_TRACESECCOMP = 0x0080
+} lx_ptrace_options_t;
+
+#define LX_PTRACE_O_ALL \
+ (LX_PTRACE_O_TRACESYSGOOD | LX_PTRACE_O_TRACEFORK | \
+ LX_PTRACE_O_TRACEVFORK | LX_PTRACE_O_TRACECLONE | \
+ LX_PTRACE_O_TRACEEXEC | LX_PTRACE_O_TRACEVFORKDONE | \
+ LX_PTRACE_O_TRACEEXIT | LX_PTRACE_O_TRACESECCOMP)
+#endif /* !_ASM */
+
+/* siginfo si_status for traced events */
+#define LX_PTRACE_EVENT_FORK 0x100
+#define LX_PTRACE_EVENT_VFORK 0x200
+#define LX_PTRACE_EVENT_CLONE 0x300
+#define LX_PTRACE_EVENT_EXEC 0x400
+#define LX_PTRACE_EVENT_VFORK_DONE 0x500
+#define LX_PTRACE_EVENT_EXIT 0x600
+#define LX_PTRACE_EVENT_SECCOMP 0x700
+
+/*
+ * Brand-private values for the "pr_what" member of lwpstatus, for use with the
+ * PR_BRAND stop reason. These reasons are validated in lx_stop_notify();
+ * update it if you add new reasons here.
+ */
+#define LX_PR_SYSENTRY 1
+#define LX_PR_SYSEXIT 2
+#define LX_PR_SIGNALLED 3
+#define LX_PR_EVENT 4
+
+
+#define LX_VERSION_1 1
+#define LX_VERSION LX_VERSION_1
+
+#define LX_ATTR_KERN_RELEASE ZONE_ATTR_BRAND_ATTRS
+#define LX_ATTR_KERN_VERSION (ZONE_ATTR_BRAND_ATTRS + 1)
+
+/*
+ * Aux vector containing phdr of Linux executable and ehdr of interpreter
+ * (if any), both of which are used by lx_librtld_db to ascertain r_debug.
+ * We repurpose the 3rd brand-specific aux vector slot for the Linux
+ * AT_SYSINFO_EHDR entry (we modify the a_type in the brand library).
+ */
+#define AT_SUN_BRAND_LX_PHDR AT_SUN_BRAND_AUX1
+#define AT_SUN_BRAND_LX_INTERP AT_SUN_BRAND_AUX2
+#define AT_SUN_BRAND_LX_CLKTCK AT_SUN_BRAND_AUX3
+#define AT_SUN_BRAND_LX_SYSINFO_EHDR AT_SUN_BRAND_AUX4
+
+/* Aux vectors containing real/effective user/group IDs */
+#define AT_LX_UID 11
+#define AT_LX_EUID 12
+#define AT_LX_GID 13
+#define AT_LX_EGID 14
+/* Aux vector containing hz value */
+#define AT_CLKTCK 17
+/* Aux vector containing secure boolean */
+#define AT_SECURE 23
+/* Aux vector containing vDSO addr */
+#define AT_SYSINFO_EHDR 33
+
+/*
+ * Usermode emulation routines are run on an alternate stack allocated by
+ * the brand library. Every LWP in a process will incur this overhead beyond
+ * the regular thread stack:
+ */
+#define LX_NATIVE_STACK_PAGE_COUNT 64
+
+/*
+ * When returning in a new child process created with vfork(2) (or CLONE_VFORK)
+ * we discard some of the native stack to prevent corruption of the parent
+ * emulation state.
+ */
+#define LX_NATIVE_STACK_VFORK_GAP 0x3000
+
+#ifndef _ASM
+
+extern struct brand lx_brand;
+
+typedef struct lx_brand_registration {
+ uint_t lxbr_version; /* version number */
+ void *lxbr_handler; /* base address of handler */
+ uint32_t lxbr_flags; /* LX_PROC_* registration flags */
+} lx_brand_registration_t;
+
+typedef struct lx_brand_registration32 {
+ uint_t lxbr_version; /* version number */
+ uint32_t lxbr_handler; /* base address of handler */
+ uint32_t lxbr_flags; /* LX_PROC_* registration flags */
+} lx_brand_registration32_t;
+
+#endif /* _ASM */
+
+/*
+ * GDT usage
+ */
+#define GDT_TLSMIN (GDT_BRANDMIN)
+#define GDT_TLSMAX (GDT_TLSMIN + 2)
+#define LX_TLSNUM (GDT_TLSMAX - GDT_TLSMIN)
+
+#ifndef _ASM
+
+/*
+ * Stores information needed by the lx linker to launch the main
+ * lx executable.
+ */
+typedef struct lx_elf_data64 {
+ uintptr_t ed_phdr;
+ uintptr_t ed_phent;
+ uintptr_t ed_phnum;
+ uintptr_t ed_entry;
+ uintptr_t ed_base;
+ uintptr_t ed_ldentry;
+} lx_elf_data64_t;
+
+typedef struct lx_elf_data32 {
+ uint32_t ed_phdr;
+ uint32_t ed_phent;
+ uint32_t ed_phnum;
+ uint32_t ed_entry;
+ uint32_t ed_base;
+ uint32_t ed_ldentry;
+} lx_elf_data32_t;
+
+#if defined(_LP64)
+typedef lx_elf_data64_t lx_elf_data_t;
+#else
+typedef lx_elf_data32_t lx_elf_data_t;
+#endif
+
+typedef enum lx_proc_flags {
+ /* flags configurable via brandsys() and members of LX_PROC_ALL */
+ LX_PROC_INSTALL_MODE = 0x01,
+ LX_PROC_STRICT_MODE = 0x02,
+ /* internal flags */
+ LX_PROC_CHILD_DEATHSIG = 0x04,
+ LX_PROC_AIO_USED = 0x08
+} lx_proc_flags_t;
+
+#define LX_PROC_ALL (LX_PROC_INSTALL_MODE | LX_PROC_STRICT_MODE)
+
+/* Maximum length for fields of LX uname */
+#define LX_SYS_UTS_LN 65
+
+/* Max. length of kernel release string */
+#define LX_KERN_RELEASE_MAX LX_SYS_UTS_LN
+#define LX_KERN_VERSION_MAX LX_SYS_UTS_LN
+
+#ifdef _KERNEL
+
+/*
+ * Entry points for cgroup integration.
+ */
+extern void (*lx_cgrp_initlwp)(vfs_t *, uint_t, id_t, pid_t);
+extern void (*lx_cgrp_freelwp)(vfs_t *, uint_t, id_t, pid_t);
+
+#define LX_RLFAKE_LOCKS 0
+#define LX_RLFAKE_NICE 1
+#define LX_RLFAKE_RTPRIO 2
+#define LX_RLFAKE_RTTIME 3
+
+#define LX_RLFAKE_NLIMITS 4
+
+#define LX_RLIM64_INFINITY (~0ULL)
+
+typedef struct {
+ uint64_t rlim_cur;
+ uint64_t rlim_max;
+} lx_rlimit64_t;
+
+typedef struct lx_proc_data {
+ uintptr_t l_handler; /* address of user-space handler */
+ pid_t l_ppid; /* pid of originating parent proc */
+ uid_t l_loginuid; /* /proc/{pid}/loginuid */
+ int64_t l_ptrace; /* count of process lwps observed by ptrace */
+ lx_elf_data_t l_elf_data; /* ELF data for linux executable */
+ /* signal to deliver to parent when this thread group dies */
+ int l_signal;
+ /* native signal to deliver to process when parent dies */
+ int l_parent_deathsig;
+ lx_proc_flags_t l_flags;
+
+ lx_rlimit64_t l_fake_limits[LX_RLFAKE_NLIMITS];
+
+ /* original start/end bounds of arg/env string data */
+ uintptr_t l_args_start;
+ uintptr_t l_envs_start;
+ uintptr_t l_envs_end;
+
+ /* Override zone-wide settings for uname release and version */
+ char l_uname_release[LX_KERN_RELEASE_MAX];
+ char l_uname_version[LX_KERN_VERSION_MAX];
+
+ /* Linux process personality */
+ unsigned int l_personality;
+
+ /* VDSO location */
+ uintptr_t l_vdso;
+} lx_proc_data_t;
+
+#endif /* _KERNEL */
+
+/*
+ * Linux process personality(2) flags stored in l_personality
+ */
+#define LX_PER_UNAME26 0x0020000
+#define LX_PER_ADDR_NO_RANDOMIZE 0x0040000
+#define LX_PER_FDPIC_FUNCPTRS 0x0080000
+#define LX_PER_MMAP_PAGE_ZERO 0x0100000
+#define LX_PER_ADDR_COMPAT_LAYOUT 0x0200000
+#define LX_PER_READ_IMPLIES_EXEC 0x0400000
+#define LX_PER_ADDR_LIMIT_32BIT 0x0800000
+#define LX_PER_SHORT_INODE 0x1000000
+#define LX_PER_WHOLE_SECONDS 0x2000000
+#define LX_PER_STICKY_TIMEOUTS 0x4000000
+#define LX_PER_ADDR_LIMIT_3GB 0x8000000
+
+#define LX_PER_LINUX 0x00
+#define LX_PER_SUNOS (0x06 | LX_PER_STICKY_TIMEOUTS)
+#define LX_PER_MASK 0xff
+
+/*
+ * A data type big enough to bitmap all Linux possible cpus.
+ * The bitmap size is defined as 1024 cpus in the Linux 2.4 and 2.6 man pages
+ * for sched_getaffinity() and sched_getaffinity().
+ */
+#define LX_NCPU (1024)
+#define LX_AFF_ULONGS (LX_NCPU / (8 * sizeof (ulong_t)))
+typedef ulong_t lx_affmask_t[LX_AFF_ULONGS];
+
+/* Length of proc boot_id string */
+#define LX_BOOTID_LEN 37
+
+/*
+ * Flag values for uc_brand_data[0] in the ucontext_t:
+ */
+#define LX_UC_STACK_NATIVE 0x00001
+#define LX_UC_STACK_BRAND 0x00002
+#define LX_UC_RESTORE_NATIVE_SP 0x00010
+#define LX_UC_FRAME_IS_SYSCALL 0x00100
+#define LX_UC_RESTART_SYSCALL 0x01000
+#define LX_UC_IGNORE_LINK 0x10000
+
+#ifdef _KERNEL
+
+typedef struct lx_lwp_data lx_lwp_data_t;
+
+/*
+ * Flag values for "lxpa_flags" on a ptrace(2) accord.
+ */
+typedef enum lx_accord_flags {
+ LX_ACC_TOMBSTONE = 0x01
+} lx_accord_flags_t;
+
+/*
+ * Flags values for "br_ptrace_flags" in the LWP-specific data.
+ */
+typedef enum lx_ptrace_flags {
+ LX_PTF_SYSCALL = 0x01,
+ LX_PTF_EXITING = 0x02,
+ LX_PTF_STOPPING = 0x04,
+ LX_PTF_INHERIT = 0x08,
+ LX_PTF_STOPPED = 0x10,
+ LX_PTF_PARENT_WAIT = 0x20,
+ LX_PTF_CLDPEND = 0x40,
+ LX_PTF_CLONING = 0x80,
+ LX_PTF_WAITPEND = 0x100
+} lx_ptrace_flags_t;
+
+/*
+ * A ptrace(2) accord represents the relationship between a tracer LWP and the
+ * set of LWPs that it is tracing: the tracees. This data structure belongs
+ * primarily to the tracer, but is reference counted so that it may be freed by
+ * whoever references it last.
+ */
+typedef struct lx_ptrace_accord {
+ kmutex_t lxpa_lock;
+ uint_t lxpa_refcnt;
+ lx_accord_flags_t lxpa_flags;
+
+ /*
+ * The tracer must hold "pidlock" while clearing these fields for
+ * exclusion of waitid(), etc.
+ */
+ lx_lwp_data_t *lxpa_tracer;
+ kcondvar_t *lxpa_cvp;
+
+ /*
+ * The "lxpa_tracees_lock" mutex protects the tracee list.
+ */
+ kmutex_t lxpa_tracees_lock;
+ list_t lxpa_tracees;
+} lx_ptrace_accord_t;
+
+/*
+ * These values are stored in the per-LWP data for a tracee when it is attached
+ * to a tracer. They record the method that was used to attach.
+ */
+typedef enum lx_ptrace_attach {
+ LX_PTA_NONE = 0x00, /* not attached */
+ LX_PTA_ATTACH = 0x01, /* due to tracer using PTRACE_ATTACH */
+ LX_PTA_TRACEME = 0x02, /* due to child using PTRACE_TRACEME */
+ LX_PTA_INHERIT_CLONE = 0x04, /* due to PTRACE_CLONE clone(2) flag */
+ LX_PTA_INHERIT_OPTIONS = 0x08 /* due to PTRACE_SETOPTIONS options */
+} lx_ptrace_attach_t;
+
+typedef enum lx_stack_mode {
+ LX_STACK_MODE_PREINIT = 0,
+ LX_STACK_MODE_INIT,
+ LX_STACK_MODE_NATIVE,
+ LX_STACK_MODE_BRAND
+} lx_stack_mode_t;
+
+struct lx_pid {
+ pid_t s_pid; /* the SunOS pid and ... */
+ id_t s_tid; /* ... tid pair */
+ pid_t l_pid; /* the corresponding linux pid */
+ time_t l_start; /* birthday of this pid */
+ struct pid *l_pidp;
+ struct lx_pid *stol_next; /* link in stol hash table */
+ struct lx_pid *ltos_next; /* link in ltos hash table */
+};
+
+/*
+ * lx-specific data in the klwp_t
+ */
+struct lx_lwp_data {
+ uint_t br_lwp_flags; /* misc. flags */
+ klwp_t *br_lwp; /* back pointer to container lwp */
+ int br_signal; /* signal to send to parent when */
+ /* clone()'ed child terminates */
+ int br_exitwhy; /* reason for thread (process) exit */
+ int br_exitwhat; /* exit code / killing signal */
+ lx_affmask_t br_affinitymask; /* bitmask of CPU sched affinities */
+ struct user_desc br_tls[LX_TLSNUM];
+ /* descriptors used by libc for TLS */
+ ulong_t br_lx_fsbase; /* lx fsbase for 64-bit thread ptr */
+ ulong_t br_ntv_fsbase; /* native fsbase 64-bit thread ptr */
+ ulong_t br_lx_gsbase; /* lx user-land gsbase */
+ ulong_t br_ntv_gsbase; /* native user-land gsbase */
+ pid_t br_pid; /* converted pid for this thread */
+ pid_t br_tgid; /* thread group ID for this thread */
+ pid_t br_ppid; /* parent pid for this thread */
+ id_t br_ptid; /* parent tid for this thread */
+ void *br_clear_ctidp; /* clone thread id ptr */
+ void *br_set_ctidp; /* clone thread id ptr */
+ void *br_robust_list; /* robust lock list, if any */
+
+ /*
+ * The following struct is used by some system calls to pass extra
+ * flags into the kernel without impinging on the namespace for
+ * illumos.
+ */
+ void *br_scall_args;
+ int br_args_size; /* size in bytes of br_scall_args */
+
+ boolean_t br_waitid_emulate;
+ int br_waitid_flags;
+
+ lx_ptrace_flags_t br_ptrace_flags; /* ptrace flags for this LWP */
+ lx_ptrace_options_t br_ptrace_options; /* PTRACE_SETOPTIONS options */
+ lx_ptrace_options_t br_ptrace_clone_option; /* current clone(2) type */
+
+ lx_ptrace_attach_t br_ptrace_attach; /* how did we get attached */
+ lx_ptrace_accord_t *br_ptrace_accord; /* accord for this tracer LWP */
+ lx_ptrace_accord_t *br_ptrace_tracer; /* accord tracing this LWP */
+ list_node_t br_ptrace_linkage; /* linkage for lxpa_tracees list */
+
+ ushort_t br_ptrace_whystop; /* stop reason, 0 for no stop */
+ ushort_t br_ptrace_whatstop; /* stop sub-reason */
+
+ int32_t br_ptrace_stopsig; /* stop signal, 0 for no signal */
+ /*
+ * Track the last (native) signal number processed by a ptrace.
+ * This allows the tracee to properly handle ignored signals after
+ * the tracer has been notified and the tracee restarted.
+ */
+ int32_t br_ptrace_donesig;
+ uintptr_t br_ptrace_stopucp; /* usermode ucontext_t pointer */
+
+ uint_t br_ptrace_event;
+ ulong_t br_ptrace_eventmsg;
+
+ int br_syscall_num; /* current system call number */
+ boolean_t br_syscall_restart; /* should restart on EINTR */
+
+ /*
+ * Store the LX_STACK_MODE for this LWP, and the current extent of the
+ * native (emulation) stack. This is similar, in principle, to the
+ * sigaltstack mechanism for signal handling. We also use this mode
+ * flag to determine how to process system calls from this LWP.
+ */
+ lx_stack_mode_t br_stack_mode;
+ uintptr_t br_ntv_stack;
+ uintptr_t br_ntv_stack_current;
+
+ /*
+ * If this pid is set, we return it with getpid(). This allows the
+ * thunking server to interpose on the pid returned to the Linux
+ * syslog software.
+ */
+ pid_t br_lx_thunk_pid;
+
+ /*
+ * If strict mode is enabled (via LX_STRICT in the environment), any
+ * call to lx_unsupported() will set this boolean to B_TRUE. This will
+ * cause us to drop SIGSYS on the LWP as it attempts to return to
+ * usermode.
+ */
+ boolean_t br_strict_failure;
+
+ /*
+ * Some syscalls emulated in-kernel still call back out to the
+ * userspace emulation for certain functions. When that is the case,
+ * the syscall_return logic must be bypassed at the end of the
+ * in-kernel syscall code. The NORMALRETURN and JUSTRETURN constants
+ * are used to choose the behavior.
+ */
+ char br_eosys;
+
+ /*
+ * Hold a pre-allocated lx_pid structure to be used during lx_initlwp.
+ */
+ struct lx_pid *br_lpid;
+
+ /*
+ * ID of the cgroup this thread belongs to.
+ */
+ uint_t br_cgroupid;
+};
+
+/*
+ * Upper limit on br_args_size, low because this value can persist until
+ * overridden with another value, and the size is given from userland.
+ */
+#define LX_BR_ARGS_SIZE_MAX (1024)
+
+/*
+ * brand specific data
+ *
+ * We currently only support a single cgroup mount in an lx zone so we only have
+ * one ptr (lxzd_cgroup) but this could be changed to a list if cgroups is ever
+ * enhanced to support different mounts with different subsystem controllers.
+ */
+typedef struct lx_zone_data {
+ kmutex_t lxzd_lock; /* protects all members */
+ char lxzd_kernel_release[LX_KERN_RELEASE_MAX];
+ char lxzd_kernel_version[LX_KERN_VERSION_MAX];
+ ksocket_t lxzd_ioctl_sock;
+ char lxzd_bootid[LX_BOOTID_LEN]; /* procfs boot_id */
+ vfs_t *lxzd_cgroup; /* cgroup for this zone */
+ list_t *lxzd_vdisks; /* virtual disks (zvols) */
+ dev_t lxzd_zfs_dev; /* major num for zfs */
+} lx_zone_data_t;
+
+#define BR_CPU_BOUND 0x0001
+
+#define ttolxlwp(t) ((struct lx_lwp_data *)ttolwpbrand(t))
+#define lwptolxlwp(l) ((struct lx_lwp_data *)lwptolwpbrand(l))
+#define ttolxproc(t) \
+ (((t)->t_procp->p_brand == &lx_brand) ? \
+ (struct lx_proc_data *)(t)->t_procp->p_brand_data : NULL)
+#define ptolxproc(p) \
+ (((p)->p_brand == &lx_brand) ? \
+ (struct lx_proc_data *)(p)->p_brand_data : NULL)
+#define ztolxzd(z) \
+ (((z)->zone_brand == &lx_brand) ? \
+ (lx_zone_data_t *)(z)->zone_brand_data : NULL)
+
+/* Macro for converting to system call arguments. */
+#define LX_ARGS(scall) ((struct lx_##scall##_args *)\
+ (ttolxlwp(curthread)->br_scall_args))
+
+typedef enum lx_virt_disk_type {
+ LXVD_NONE,
+ LXVD_ZFS_DS,
+ LXVD_ZVOL
+} lx_virt_disk_type_t;
+
+typedef struct lx_virt_disk {
+ list_node_t lxvd_link;
+ char lxvd_name[MAXNAMELEN];
+ lx_virt_disk_type_t lxvd_type;
+ dev_t lxvd_emul_dev;
+ dev_t lxvd_real_dev;
+ uint64_t lxvd_volsize;
+ uint64_t lxvd_blksize;
+ char lxvd_real_name[MAXPATHLEN];
+} lx_virt_disk_t;
+
+/*
+ * Determine the upper bound on the system call number:
+ */
+#if defined(_LP64)
+#define LX_MAX_SYSCALL(lwp) \
+ ((lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) ? \
+ lx_nsysent64 : lx_nsysent32)
+#else
+#define LX_MAX_SYSCALL(lwp) lx_nsysent32
+#endif
+
+extern int lx_kern_release_cmp(zone_t *, const char *);
+
+extern void lx_lwp_set_native_stack_current(lx_lwp_data_t *, uintptr_t);
+extern void lx_divert(klwp_t *, uintptr_t);
+extern int lx_runexe(klwp_t *, void *);
+extern void lx_switch_to_native(klwp_t *);
+
+extern int lx_syscall_enter(void);
+extern int lx_syscall_return(klwp_t *, int, long);
+
+extern int lx_syscall_fast_enter(void);
+
+extern void lx_trace_sysenter(int, uintptr_t *);
+extern void lx_trace_sysreturn(int, long);
+
+extern void lx_emulate_user(klwp_t *, int, uintptr_t *);
+#if defined(_SYSCALL32_IMPL)
+extern void lx_emulate_user32(klwp_t *, int, uintptr_t *);
+#endif
+
+extern int lx_debug;
+#define lx_print if (lx_debug) printf
+
+extern void lx_pid_assign(kthread_t *, struct lx_pid *);
+extern void lx_pid_reassign(kthread_t *);
+extern void lx_pid_rele(pid_t, id_t);
+extern pid_t lx_lpid_to_spair(pid_t, pid_t *, id_t *);
+extern pid_t lx_lwp_ppid(klwp_t *, pid_t *, id_t *);
+extern void lx_pid_init(void);
+extern void lx_pid_fini(void);
+
+/*
+ * In-Kernel Linux System Call Description.
+ */
+typedef struct lx_sysent {
+ char *sy_name;
+ long (*sy_callc)();
+ char sy_flags;
+ char sy_narg;
+} lx_sysent_t;
+
+#if defined(_LP64)
+extern lx_sysent_t lx_sysent64[LX_NSYSCALLS + 1];
+extern int lx_nsysent64;
+#endif
+extern lx_sysent_t lx_sysent32[LX_NSYSCALLS + 1];
+extern int lx_nsysent32;
+
+#endif /* _KERNEL */
+#endif /* _ASM */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_BRAND_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_fcntl.h b/usr/src/uts/common/brand/lx/sys/lx_fcntl.h
new file mode 100644
index 0000000000..f82c6b867d
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_fcntl.h
@@ -0,0 +1,161 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _SYS_LX_FCNTL_H
+#define _SYS_LX_FCNTL_H
+
+#include <sys/vnode.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Lx open/fcntl flags
+ */
+#define LX_O_RDONLY 00
+#define LX_O_WRONLY 01
+#define LX_O_RDWR 02
+#define LX_O_ACCMODE (LX_O_RDONLY | LX_O_WRONLY | LX_O_RDWR)
+#define LX_O_CREAT 0100
+#define LX_O_EXCL 0200
+#define LX_O_NOCTTY 0400
+#define LX_O_TRUNC 01000
+#define LX_O_APPEND 02000
+#define LX_O_NONBLOCK 04000
+#define LX_O_NDELAY LX_O_NONBLOCK
+#define LX_O_SYNC 010000
+#define LX_O_FSYNC LX_O_SYNC
+#define LX_O_ASYNC 020000
+#define LX_O_DIRECT 040000
+#define LX_O_LARGEFILE 0100000
+#define LX_O_DIRECTORY 0200000
+#define LX_O_NOFOLLOW 0400000
+#define LX_O_CLOEXEC 02000000
+#define LX_O_PATH 010000000
+
+#define LX_F_DUPFD 0
+#define LX_F_GETFD 1
+#define LX_F_SETFD 2
+#define LX_F_GETFL 3
+#define LX_F_SETFL 4
+#define LX_F_GETLK 5
+#define LX_F_SETLK 6
+#define LX_F_SETLKW 7
+#define LX_F_SETOWN 8
+#define LX_F_GETOWN 9
+#define LX_F_SETSIG 10
+#define LX_F_GETSIG 11
+
+#define LX_F_GETLK64 12
+#define LX_F_SETLK64 13
+#define LX_F_SETLKW64 14
+
+#define LX_F_SETLEASE 1024
+#define LX_F_GETLEASE 1025
+#define LX_F_NOTIFY 1026
+#define LX_F_CANCELLK 1029
+#define LX_F_DUPFD_CLOEXEC 1030
+#define LX_F_SETPIPE_SZ 1031
+#define LX_F_GETPIPE_SZ 1032
+
+#define LX_F_RDLCK 0
+#define LX_F_WRLCK 1
+#define LX_F_UNLCK 2
+
+/* Test for emulated O_PATH setting in file_t flags */
+#define LX_IS_O_PATH(f) (((f)->f_flag & (FREAD|FWRITE)) == 0)
+
+extern int lx_vp_at(int, char *, vnode_t **, int);
+
+/*
+ * Lx flock codes.
+ */
+#define LX_NAME_MAX 255
+#define LX_LOCK_SH 1 /* shared */
+#define LX_LOCK_EX 2 /* exclusive */
+#define LX_LOCK_NB 4 /* non-blocking */
+#define LX_LOCK_UN 8 /* unlock */
+
+/*
+ * On Linux the constants AT_REMOVEDIR and AT_EACCESS have the same value.
+ * AT_REMOVEDIR is used only by unlinkat and AT_EACCESS is used only by
+ * faccessat.
+ */
+#define LX_AT_FDCWD (-100)
+#define LX_AT_SYMLINK_NOFOLLOW 0x100
+#define LX_AT_REMOVEDIR 0x200
+#define LX_AT_EACCESS 0x200
+#define LX_AT_SYMLINK_FOLLOW 0x400
+#define LX_AT_NO_AUTOMOUNT 0x800
+#define LX_AT_EMPTY_PATH 0x1000
+
+typedef struct lx_flock {
+ short l_type;
+ short l_whence;
+ long l_start;
+ long l_len;
+ int l_pid;
+} lx_flock_t;
+
+typedef struct lx_flock64 {
+ short l_type;
+ short l_whence;
+ long long l_start;
+ long long l_len;
+ int l_pid;
+} lx_flock64_t;
+
+#if defined(_KERNEL)
+
+/*
+ * 64-bit kernel view of 32-bit usermode structs.
+ */
+#pragma pack(4)
+typedef struct lx_flock32 {
+ int16_t l_type;
+ int16_t l_whence;
+ int32_t l_start;
+ int32_t l_len;
+ int32_t l_pid;
+} lx_flock32_t;
+
+typedef struct lx_flock64_32 {
+ int16_t l_type;
+ int16_t l_whence;
+ int64_t l_start;
+ int64_t l_len;
+ int32_t l_pid;
+} lx_flock64_32_t;
+#pragma pack()
+
+#endif /* _KERNEL && _SYSCALL32_IMPL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_FCNTL_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_futex.h b/usr/src/uts/common/brand/lx/sys/lx_futex.h
new file mode 100644
index 0000000000..a400b3bd83
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_futex.h
@@ -0,0 +1,121 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_LX_FUTEX_H
+#define _SYS_LX_FUTEX_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define FUTEX_WAIT 0
+#define FUTEX_WAKE 1
+#define FUTEX_FD 2
+#define FUTEX_REQUEUE 3
+#define FUTEX_CMP_REQUEUE 4
+#define FUTEX_WAKE_OP 5
+#define FUTEX_LOCK_PI 6
+#define FUTEX_UNLOCK_PI 7
+#define FUTEX_TRYLOCK_PI 8
+#define FUTEX_WAIT_BITSET 9
+#define FUTEX_WAKE_BITSET 10
+#define FUTEX_WAIT_REQUEUE_PI 11
+#define FUTEX_CMP_REQUEUE_PI 12
+#define FUTEX_MAX_CMD FUTEX_CMP_REQUEUE_PI
+
+/*
+ * Flags that can be OR'd into a futex operation.
+ */
+#define FUTEX_CMD_MASK 0x007f
+#define FUTEX_PRIVATE_FLAG 0x0080
+#define FUTEX_CLOCK_REALTIME 0x0100
+
+#define FUTEX_BITSET_MATCH_ANY 0xffffffff
+/*
+ * FUTEX_WAKE_OP operations
+ */
+#define FUTEX_OP_SET 0 /* *(int *)UADDR2 = OPARG; */
+#define FUTEX_OP_ADD 1 /* *(int *)UADDR2 += OPARG; */
+#define FUTEX_OP_OR 2 /* *(int *)UADDR2 |= OPARG; */
+#define FUTEX_OP_ANDN 3 /* *(int *)UADDR2 &= ~OPARG; */
+#define FUTEX_OP_XOR 4 /* *(int *)UADDR2 ^= OPARG; */
+
+/*
+ * FUTEX_WAKE_OP comparison operations
+ */
+#define FUTEX_OP_CMP_EQ 0 /* if (oldval == CMPARG) wake */
+#define FUTEX_OP_CMP_NE 1 /* if (oldval != CMPARG) wake */
+#define FUTEX_OP_CMP_LT 2 /* if (oldval < CMPARG) wake */
+#define FUTEX_OP_CMP_LE 3 /* if (oldval <= CMPARG) wake */
+#define FUTEX_OP_CMP_GT 4 /* if (oldval > CMPARG) wake */
+#define FUTEX_OP_CMP_GE 5 /* if (oldval >= CMPARG) wake */
+
+/*
+ * The encoding of the FUTEX_WAKE_OP operation in 32 bits:
+ *
+ * +--+-- - --+-- - --+-- - --+-- - --+
+ * |S |OP |CMP |OPARG |CMPARG |
+ * +--+-- - --+-- - --+-- - --+-- - --+
+ * |31|30 - 28|27 - 24|23 - 12|11 - 0|
+ *
+ * The S bit denotes that the OPARG should be (1 << OPARG) instead of OPARG.
+ * (Yes, this whole thing is entirely absurd -- see the block comment in
+ * lx_futex.c for an explanation of this nonsense.) Macros to extract the
+ * various components from the operation, given the above encoding:
+ */
+#define FUTEX_OP_OP(x) (((x) >> 28) & 7)
+#define FUTEX_OP_CMP(x) (((x) >> 24) & 15)
+#define FUTEX_OP_OPARG(x) (((x) >> 31) ? (1 << (((x) << 8) >> 20)) : \
+ ((((x) << 8) >> 20)))
+#define FUTEX_OP_CMPARG(x) (((x) << 20) >> 20)
+
+#ifdef _KERNEL
+
+#define FUTEX_WAITERS 0x80000000
+#define FUTEX_OWNER_DIED 0x40000000
+#define FUTEX_TID_MASK 0x3fffffff
+
+#define FUTEX_ROBUST_LOCK_PI 1
+#define FUTEX_ROBUST_LIST_LIMIT 2048
+
+extern long lx_futex(uintptr_t addr, int cmd, int val, uintptr_t lx_timeout,
+ uintptr_t addr2, int val2);
+extern void lx_futex_init(void);
+extern int lx_futex_fini(void);
+extern long lx_set_robust_list(void *listp, size_t len);
+extern long lx_get_robust_list(pid_t pid, void **listp, size_t *lenp);
+extern void lx_futex_robust_exit(uintptr_t addr, uint32_t tid);
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_FUTEX_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_impl.h b/usr/src/uts/common/brand/lx/sys/lx_impl.h
new file mode 100644
index 0000000000..03b9d43038
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_impl.h
@@ -0,0 +1,52 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _LX_IMPL_H
+#define _LX_IMPL_H
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void (lx_systrace_f)(ulong_t, ulong_t, ulong_t, ulong_t, ulong_t,
+ ulong_t, ulong_t);
+
+
+extern lx_systrace_f *lx_systrace_entry_ptr;
+extern lx_systrace_f *lx_systrace_return_ptr;
+
+extern void lx_brand_systrace_enable(void);
+extern void lx_brand_systrace_disable(void);
+
+extern void lx_unsupported(char *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_IMPL_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_ldt.h b/usr/src/uts/common/brand/lx/sys/lx_ldt.h
new file mode 100644
index 0000000000..825933e86c
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_ldt.h
@@ -0,0 +1,91 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_LINUX_LDT_H
+#define _SYS_LINUX_LDT_H
+
+#include <sys/segments.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct ldt_info {
+ uint_t entry_number;
+ uint_t base_addr;
+ uint_t limit;
+ uint_t seg_32bit:1,
+ contents:2,
+ read_exec_only:1,
+ limit_in_pages:1,
+ seg_not_present:1,
+ useable:1;
+};
+
+#define LDT_INFO_EMPTY(info) \
+ ((info)->base_addr == 0 && (info)->limit == 0 && \
+ (info)->contents == 0 && (info)->read_exec_only == 1 && \
+ (info)->seg_32bit == 0 && (info)->limit_in_pages == 0 && \
+ (info)->seg_not_present == 1 && (info)->useable == 0)
+
+#if defined(__amd64)
+#define SETMODE(desc) (desc)->usd_long = SDP_SHORT;
+#else
+#define SETMODE(desc)
+#endif
+
+#define LDT_INFO_TO_DESC(info, desc) { \
+ USEGD_SETBASE(desc, (info)->base_addr); \
+ USEGD_SETLIMIT(desc, (info)->limit); \
+ (desc)->usd_type = ((info)->contents << 2) | \
+ ((info)->read_exec_only ^ 1) << 1 | 0x10; \
+ (desc)->usd_dpl = SEL_UPL; \
+ (desc)->usd_p = (info)->seg_not_present ^ 1; \
+ (desc)->usd_def32 = (info)->seg_32bit; \
+ (desc)->usd_gran = (info)->limit_in_pages; \
+ (desc)->usd_avl = (info)->useable; \
+ SETMODE(desc); \
+}
+
+#define DESC_TO_LDT_INFO(desc, info) { \
+ bzero((info), sizeof (*(info))); \
+ (info)->base_addr = USEGD_GETBASE(desc); \
+ (info)->limit = USEGD_GETLIMIT(desc); \
+ (info)->seg_not_present = (desc)->usd_p ^ 1; \
+ (info)->contents = ((desc)->usd_type >> 2) & 3; \
+ (info)->read_exec_only = (((desc)->usd_type >> 1) & 1) ^ 1; \
+ (info)->seg_32bit = (desc)->usd_def32; \
+ (info)->limit_in_pages = (desc)->usd_gran; \
+ (info)->useable = (desc)->usd_avl; \
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LINUX_LDT_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_misc.h b/usr/src/uts/common/brand/lx/sys/lx_misc.h
new file mode 100644
index 0000000000..7c1e50362c
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_misc.h
@@ -0,0 +1,117 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS__LX_MISC_H
+#define _SYS__LX_MISC_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <inet/ip.h>
+#include <inet/ip6.h>
+#include <sys/siginfo.h>
+#include <sys/lx_brand.h>
+
+#ifdef _KERNEL
+
+extern void lx_setrval(klwp_t *, int, int);
+extern void lx_exec();
+extern void lx_exitlwp(klwp_t *);
+extern void lx_freelwp(klwp_t *);
+extern void *lx_lwpdata_alloc(proc_t *);
+extern void lx_lwpdata_free(void *);
+extern void lx_initlwp(klwp_t *, void *);
+extern void lx_initlwp_post(klwp_t *);
+extern void lx_forklwp(klwp_t *, klwp_t *);
+
+extern void lx_set_gdt(int, user_desc_t *);
+extern void lx_clear_gdt(int);
+
+extern longlong_t lx_nosys();
+
+extern greg_t lx_fixsegreg(greg_t, model_t);
+extern uintptr_t lx_fsbase(klwp_t *, uintptr_t);
+extern void lx_exit_with_sig(proc_t *, sigqueue_t *);
+extern boolean_t lx_wait_filter(proc_t *, proc_t *);
+extern void lx_sigfd_translate(k_siginfo_t *);
+extern int stol_ksiginfo_copyout(k_siginfo_t *, void *);
+#if defined(_SYSCALL32_IMPL)
+extern int stol_ksiginfo32_copyout(k_siginfo_t *, void *);
+#endif
+extern void lx_read_argv_bounds(proc_t *p);
+
+typedef enum lx_regs_location {
+ LX_REG_LOC_UNAVAIL,
+ LX_REG_LOC_LWP,
+ LX_REG_LOC_UCP
+} lx_regs_location_t;
+
+extern lx_regs_location_t lx_regs_location(lx_lwp_data_t *, void **, boolean_t);
+
+
+typedef enum lx_if_action {
+ LX_IF_FROMNATIVE,
+ LX_IF_TONATIVE
+} lx_if_action_t;
+
+/* Linux ARP protocol hardware identifiers */
+#define LX_ARPHRD_ETHER 1 /* Ethernet */
+#define LX_ARPHRD_LOOPBACK 772 /* Loopback */
+#define LX_ARPHRD_VOID 0xffff /* Unknown */
+
+/* IPv6 address scope values used in /proc/net/if_inet6 */
+#define LX_IPV6_ADDR_LOOPBACK 0x0010U
+#define LX_IPV6_ADDR_LINKLOCAL 0x0020U
+#define LX_IPV6_ADDR_SITELOCAL 0x0040U
+#define LX_IPV6_ADDR_COMPATv4 0x0080U
+
+extern void lx_ifname_convert(char *, lx_if_action_t);
+extern void lx_ifflags_convert(uint64_t *, lx_if_action_t);
+extern unsigned int lx_ipv6_scope_convert(const in6_addr_t *);
+extern void lx_stol_hwaddr(const struct sockaddr_dl *, struct sockaddr *,
+ int *);
+
+extern boolean_t lx_ptrace_stop(ushort_t);
+extern void lx_stop_notify(proc_t *, klwp_t *, ushort_t, ushort_t);
+extern void lx_ptrace_init(void);
+extern void lx_ptrace_fini(void);
+extern int lx_waitid_helper(idtype_t, id_t, k_siginfo_t *, int, boolean_t *,
+ int *);
+extern void lx_ptrace_exit(proc_t *, klwp_t *);
+extern void lx_ptrace_inherit_tracer(lx_lwp_data_t *, lx_lwp_data_t *);
+extern int lx_ptrace_stop_for_option(int, boolean_t, ulong_t, uintptr_t);
+extern int lx_ptrace_set_clone_inherit(int, boolean_t);
+extern int lx_sigcld_repost(proc_t *, sigqueue_t *);
+extern int lx_ptrace_issig_stop(proc_t *, klwp_t *);
+extern boolean_t lx_ptrace_sig_ignorable(proc_t *, klwp_t *, int);
+
+extern int lx_helper_clone(int64_t *, int, void *, void *, void *);
+extern int lx_helper_setgroups(int, gid_t *);
+extern int lx_helper_rt_sigqueueinfo(pid_t, int, siginfo_t *);
+extern int lx_helper_rt_tgsigqueueinfo(pid_t, pid_t, int, siginfo_t *);
+
+extern boolean_t lx_vsyscall_iscall(klwp_t *, uintptr_t, int *);
+extern void lx_vsyscall_enter(proc_t *, klwp_t *, int);
+
+extern void lx_check_strict_failure(lx_lwp_data_t *);
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS__LX_MISC_H */
diff --git a/usr/src/cmd/ssh/include/engine.h b/usr/src/uts/common/brand/lx/sys/lx_ptm.h
index 74ee80db01..74bbc939a3 100644
--- a/usr/src/cmd/ssh/include/engine.h
+++ b/usr/src/uts/common/brand/lx/sys/lx_ptm.h
@@ -19,25 +19,26 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#ifndef _ENGINE_H
-#define _ENGINE_H
+#ifndef _SYS_PTM_LINUX_H
+#define _SYS_PTM_LINUX_H
-#ifdef __cplusplus
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
extern "C" {
#endif
-#include "includes.h"
-#include <openssl/engine.h>
+#define LX_PTM_DRV "lx_ptm"
+#define LX_PTM_MINOR_NODE "lx_ptmajor"
-ENGINE *pkcs11_engine_load(int use_engine);
-void pkcs11_engine_finish(void *engine);
+#define LX_PTM_DEV_TO_PTS(dev) (getminor(dev) - 1)
-#ifdef __cplusplus
+#ifdef __cplusplus
}
#endif
-#endif /* _ENGINE_H */
+#endif /* _SYS_PTM_LINUX_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_sched.h b/usr/src/uts/common/brand/lx/sys/lx_sched.h
new file mode 100644
index 0000000000..b0ae748f3c
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_sched.h
@@ -0,0 +1,60 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LINUX_SCHED_H
+#define _SYS_LINUX_SCHED_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/procset.h>
+#include <sys/priocntl.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Linux scheduler policies.
+ */
+#define LX_SCHED_OTHER 0
+#define LX_SCHED_FIFO 1
+#define LX_SCHED_RR 2
+
+#define LX_PRI_MAX 99
+
+typedef int l_pid_t;
+
+struct lx_sched_param {
+ int lx_sched_prio;
+};
+
+extern int sched_setprocset(procset_t *, l_pid_t);
+extern long do_priocntlsys(int, procset_t *, void *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LINUX_SCHED_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_siginfo.h b/usr/src/uts/common/brand/lx/sys/lx_siginfo.h
new file mode 100644
index 0000000000..9f606b614f
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_siginfo.h
@@ -0,0 +1,190 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _LX_SIGINFO_H
+#define _LX_SIGINFO_H
+
+#include <sys/lx_types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * lx_siginfo_t lsi_code values
+ *
+ * LX_SI_ASYNCNL: Sent by asynch name lookup completion
+ * LX_SI_DETHREAD: Sent by execve() killing subsidiary threads
+ * LX_SI_SIGIO: Sent by queued SIGIO
+ * LX_SI_ASYNCIO: Sent by asynchronous I/O completion
+ * LX_SI_MESGQ: Sent by real time message queue state change
+ * LX_SI_TIMER: Sent by timer expiration
+ * LX_SI_QUEUE: Sent by sigqueue
+ * LX_SI_USER: Sent by kill, sigsend, raise, etc.
+ * LX_SI_KERNEL: Sent by kernel
+ * LX_SI_CODE_NOT_EXIST: Error code. When translating from Linux to
+ * illumos errors, if there is no translation available, this value
+ * should be used. This value should have no meaning as an si_code in
+ * illumos or Linux.
+ *
+ * At present, LX_SI_ASYNCNL, LX_SI_DETHREAD, and LX_SI_SIGIO are unused by
+ * BrandZ.
+ */
+#define LX_SI_CODE_NOT_EXIST (-61)
+#define LX_SI_ASYNCNL (-60)
+#define LX_SI_DETHREAD (-7)
+#define LX_SI_TKILL (-6)
+#define LX_SI_SIGIO (-5)
+#define LX_SI_ASYNCIO (-4)
+#define LX_SI_MESGQ (-3)
+#define LX_SI_TIMER (-2)
+#define LX_SI_QUEUE (-1)
+#define LX_SI_USER (0)
+#define LX_SI_KERNEL (0x80)
+
+#define LX_SI_MAX_SIZE 128
+#define LX_SI_PAD_SIZE_32 ((LX_SI_MAX_SIZE / sizeof (int)) - 3)
+#define LX_SI_PAD_SIZE_64 ((LX_SI_MAX_SIZE / sizeof (int)) - 4)
+
+#if defined(_LP64)
+/*
+ * Because of the odd number (3) of ints before the union, we need to account
+ * for the smaller padding needed on x64 due to the union being offset to an 8
+ * byte boundary.
+ */
+#define LX_SI_PAD_SIZE LX_SI_PAD_SIZE_64
+#else
+#define LX_SI_PAD_SIZE LX_SI_PAD_SIZE_32
+#endif
+
+typedef struct lx_siginfo {
+ int lsi_signo;
+ int lsi_errno;
+ int lsi_code;
+ union {
+ int _pad[LX_SI_PAD_SIZE];
+
+ struct {
+ pid_t _pid;
+ lx_uid16_t _uid;
+ } _kill;
+
+ struct {
+ uint_t _timer1;
+ uint_t _timer2;
+ } _timer;
+
+ struct {
+ pid_t _pid;
+ lx_uid16_t _uid;
+ union sigval _sigval;
+ } _rt;
+
+ struct {
+ pid_t _pid;
+ lx_uid16_t _uid;
+ int _status;
+ clock_t _utime;
+ clock_t _stime;
+ } _sigchld;
+
+ struct {
+ void *_addr;
+ } _sigfault;
+
+ struct {
+ int _band;
+ int _fd;
+ } _sigpoll;
+ } _sifields;
+} lx_siginfo_t;
+
+#if defined(_KERNEL) && defined(_SYSCALL32_IMPL)
+/*
+ * 64-bit kernel view of the 32-bit "lx_siginfo_t" object.
+ */
+#pragma pack(4)
+typedef struct lx_siginfo32 {
+ int lsi_signo;
+ int lsi_errno;
+ int lsi_code;
+ union {
+ int _pad[LX_SI_PAD_SIZE_32];
+
+ struct {
+ pid32_t _pid;
+ lx_uid16_t _uid;
+ } _kill;
+
+ struct {
+ uint_t _timer1;
+ uint_t _timer2;
+ } _timer;
+
+ struct {
+ pid32_t _pid;
+ lx_uid16_t _uid;
+ union sigval32 _sigval;
+ } _rt;
+
+ struct {
+ pid32_t _pid;
+ lx_uid16_t _uid;
+ int _status;
+ clock32_t _utime;
+ clock32_t _stime;
+ } _sigchld;
+
+ struct {
+ caddr32_t _addr;
+ } _sigfault;
+
+ struct {
+ int _band;
+ int _fd;
+ } _sigpoll;
+ } _sifields;
+} lx_siginfo32_t;
+#pragma pack()
+#endif /* defined(_KERNEL) && defined(_SYSCALL32_IMPL) */
+
+#define lsi_pid _sifields._kill._pid
+#define lsi_uid _sifields._kill._uid
+#define lsi_status _sifields._sigchld._status
+#define lsi_utime _sifields._sigchld._utime
+#define lsi_stime _sifields._sigchld._stime
+#define lsi_value _sifields._rt._sigval
+#define lsi_int _sifields._rt._sigval.sivalx_int
+#define lsi_ptr _sifields._rt._sigval.sivalx_ptr
+#define lsi_addr _sifields._sigfault._addr
+#define lsi_band _sifields._sigpoll._band
+#define lsi_fd _sifields._sigpoll._fd
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_SIGINFO_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_signal.h b/usr/src/uts/common/brand/lx/sys/lx_signal.h
new file mode 100644
index 0000000000..552c36238b
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_signal.h
@@ -0,0 +1,32 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _LX_SIGNAL_H
+#define _LX_SIGNAL_H
+
+#include <lx_signum.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern void lx_ltos_sigset(lx_sigset_t *, k_sigset_t *);
+extern void lx_stol_sigset(k_sigset_t *, lx_sigset_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_SIGNAL_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_socket.h b/usr/src/uts/common/brand/lx/sys/lx_socket.h
new file mode 100644
index 0000000000..eb9826eebe
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_socket.h
@@ -0,0 +1,434 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#ifndef _SYS_LX_SOCKET_H
+#define _SYS_LX_SOCKET_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Linux address family definitions
+ * Some of these are not supported
+ */
+#define LX_AF_UNSPEC 0 /* Unspecified */
+#define LX_AF_UNIX 1 /* local file/pipe name */
+#define LX_AF_INET 2 /* IP protocol family */
+#define LX_AF_AX25 3 /* Amateur Radio AX.25 */
+#define LX_AF_IPX 4 /* Novell Internet Protocol */
+#define LX_AF_APPLETALK 5 /* Appletalk */
+#define LX_AF_NETROM 6 /* Amateur radio */
+#define LX_AF_BRIDGE 7 /* Multiprotocol bridge */
+#define LX_AF_ATMPVC 8 /* ATM PVCs */
+#define LX_AF_X25 9 /* X.25 */
+#define LX_AF_INET6 10 /* IPV 6 */
+#define LX_AF_ROSE 11 /* Amateur Radio X.25 */
+#define LX_AF_DECNET 12 /* DECnet */
+#define LX_AF_NETBEUI 13 /* 802.2LLC */
+#define LX_AF_SECURITY 14 /* Security callback */
+#define LX_AF_KEY 15 /* key management */
+#define LX_AF_ROUTE 16 /* Alias to emulate 4.4BSD */
+#define LX_AF_NETLINK LX_AF_ROUTE
+#define LX_AF_PACKET 17 /* Packet family */
+#define LX_AF_ASH 18 /* Ash ? */
+#define LX_AF_ECONET 19 /* Acorn Econet */
+#define LX_AF_ATMSVC 20 /* ATM SVCs */
+#define LX_AF_SNA 22 /* Linux SNA */
+#define LX_AF_IRDA 23 /* IRDA sockets */
+#define LX_AF_PPPOX 24 /* PPPoX sockets */
+#define LX_AF_WANPIPE 25 /* Wanpipe API sockets */
+#define LX_AF_LLC 26
+/* gap in Linux defines for 27 and 28 */
+#define LX_AF_CAN 29
+#define LX_AF_TIPC 30
+#define LX_AF_BLUETOOTH 31 /* Bluetooth sockets */
+#define LX_AF_IUCV 32
+#define LX_AF_RXRPC 33
+
+/* limit of AF mappings */
+#define LX_AF_MAX LX_AF_RXRPC
+
+#define AF_NOTSUPPORTED -1
+#define AF_INVAL -2
+
+/*
+ * Options for use with [gs]etsockopt at the SOL_SOCKET level.
+ */
+#define LX_SOL_SOCKET 1
+
+#define LX_SCM_RIGHTS 1
+#define LX_SCM_CRED 2
+
+#define LX_SO_DEBUG 1
+#define LX_SO_REUSEADDR 2
+#define LX_SO_TYPE 3
+#define LX_SO_ERROR 4
+#define LX_SO_DONTROUTE 5
+#define LX_SO_BROADCAST 6
+#define LX_SO_SNDBUF 7
+#define LX_SO_RCVBUF 8
+#define LX_SO_KEEPALIVE 9
+#define LX_SO_OOBINLINE 10
+#define LX_SO_NO_CHECK 11
+#define LX_SO_PRIORITY 12
+#define LX_SO_LINGER 13
+#define LX_SO_BSDCOMPAT 14
+#define LX_SO_REUSEPORT 15
+/*
+ * For Linux see unix(7) man page SO_PASSCRED description. For Illumos see
+ * socket.h(3HEAD) man page SO_RECVUCRED description.
+ */
+#define LX_SO_PASSCRED 16
+#define LX_SO_PEERCRED 17
+#define LX_SO_RCVLOWAT 18
+#define LX_SO_SNDLOWAT 19
+#define LX_SO_RCVTIMEO 20
+#define LX_SO_SNDTIMEO 21
+/* Security levels - as per NRL IPv6 - don't actually do anything */
+#define LX_SO_SECURITY_AUTHENTICATION 22
+#define LX_SO_SECURITY_ENCRYPTION_TRANSPORT 23
+#define LX_SO_SECURITY_ENCRYPTION_NETWORK 24
+#define LX_SO_BINDTODEVICE 25
+/* Socket filtering */
+#define LX_SO_ATTACH_FILTER 26
+#define LX_SO_DETACH_FILTER 27
+#define LX_SO_PEERNAME 28
+#define LX_SO_TIMESTAMP 29
+#define LX_SCM_TIMESTAMP LX_SO_TIMESTAMP
+#define LX_SO_ACCEPTCONN 30
+
+#define LX_SO_PEERSEC 31
+#define LX_SO_SNDBUFFORCE 32
+#define LX_SO_RCVBUFFORCE 33
+#define LX_SO_PASSSEC 34
+#define LX_SO_TIMESTAMPNS 35
+#define LX_SCM_TIMESTAMPNS LX_SO_TIMESTAMPNS
+#define LX_SO_MARK 36
+#define LX_SO_TIMESTAMPING 37
+#define LX_SCM_TIMESTAMPING LX_SO_TIMESTAMPING
+#define LX_SO_PROTOCOL 38
+#define LX_SO_DOMAIN 39
+#define LX_SO_RXQ_OVFL 40
+#define LX_SO_WIFI_STATUS 41
+#define LX_SCM_WIFI_STATUS LX_SO_WIFI_STATUS
+#define LX_SO_PEEK_OFF 42
+#define LX_SO_NOFCS 43
+#define LX_SO_LOCK_FILTER 44
+#define LX_SO_SELECT_ERR_QUEUE 45
+#define LX_SO_BUSY_POLL 46
+#define LX_SO_MAX_PACING_RATE 47
+#define LX_SO_BPF_EXTENSIONS 48
+
+/*
+ * Options for use with [gs]etsockopt at the RAW level.
+ * IPPROTO_RAW
+ */
+#define LX_ICMP_FILTER 1
+
+/*
+ * Options for use with [gs]etsockopt at the PACKET level.
+ * SOL_PACKET
+ */
+#define LX_SOL_PACKET 263
+
+#define LX_PACKET_ADD_MEMBERSHIP 1
+#define LX_PACKET_DROP_MEMBERSHIP 2
+#define LX_PACKET_RECV_OUTPUT 3
+#define LX_PACKET_RX_RING 5
+#define LX_PACKET_STATISTICS 6
+
+/*
+ * Options for use with [gs]etsockopt at the NETLINK level.
+ * SOL_NETLINK
+ */
+#define LX_SOL_NETLINK 270
+
+/*
+ * Linux socket type definitions
+ */
+#define LX_SOCK_STREAM 1 /* Connection-based byte streams */
+#define LX_SOCK_DGRAM 2 /* Connectionless, datagram */
+#define LX_SOCK_RAW 3 /* Raw protocol interface */
+#define LX_SOCK_RDM 4 /* Reliably-delivered message */
+#define LX_SOCK_SEQPACKET 5 /* Sequenced packet stream */
+#define LX_SOCK_PACKET 10 /* Linux specific */
+#define LX_SOCK_MAX 11
+
+/*
+ * The Linux socket type can be or-ed with other flags (e.g. SOCK_CLOEXEC).
+ */
+#define LX_SOCK_TYPE_MASK 0xf
+
+/*
+ * Linux flags for socket, socketpair and accept4. These are or-ed into the
+ * socket type value. In the Linux net.h header these come from fcntl.h (note
+ * that they are in octal in the Linux header).
+ */
+#define LX_SOCK_CLOEXEC 0x80000
+#define LX_SOCK_NONBLOCK 0x800
+
+#define SOCK_NOTSUPPORTED -1
+#define SOCK_INVAL -2
+
+/*
+ * PF_PACKET protocol definitions.
+ */
+#define LX_ETH_P_802_3 0x0001
+#define LX_ETH_P_ALL 0x0003
+#define LX_ETH_P_802_2 0x0004
+#define LX_ETH_P_IP 0x0800
+#define LX_ETH_P_ARP 0x0806
+#define LX_ETH_P_IPV6 0x86DD
+
+/*
+ * IP Protocol levels. Some of these match the Illumos IPPROTO_* values.
+ */
+#define LX_IPPROTO_IP 0
+#define LX_IPPROTO_ICMP 1
+#define LX_IPPROTO_IGMP 2
+#define LX_IPPROTO_TCP 6
+#define LX_IPPROTO_UDP 17
+#define LX_IPPROTO_IPV6 41
+#define LX_IPPROTO_ICMPV6 58
+#define LX_IPPROTO_RAW 255
+
+/*
+ * Options for use with [gs]etsockopt at the IP level.
+ * IPPROTO_IP
+ */
+#define LX_IP_TOS 1
+#define LX_IP_TTL 2
+#define LX_IP_HDRINCL 3
+#define LX_IP_OPTIONS 4
+#define LX_IP_ROUTER_ALERT 5
+#define LX_IP_RECVOPTS 6
+#define LX_IP_RETOPTS 7
+#define LX_IP_PKTINFO 8
+#define LX_IP_PKTOPTIONS 9
+#define LX_IP_MTU_DISCOVER 10
+#define LX_IP_RECVERR 11
+#define LX_IP_RECVTTL 12
+#define LX_IP_RECVTOS 13
+#define LX_IP_MTU 14
+#define LX_IP_FREEBIND 15
+#define LX_IP_IPSEC_POLICY 16
+#define LX_IP_XFRM_POLICY 17
+#define LX_IP_PASSSEC 18
+#define LX_IP_TRANSPARENT 19
+#define LX_IP_ORIGDSTADDR 20
+#define LX_IP_MINTTL 21
+#define LX_IP_NODEFRAG 22
+/* Linux apparently leaves a gap here */
+#define LX_IP_MULTICAST_IF 32
+#define LX_IP_MULTICAST_TTL 33
+#define LX_IP_MULTICAST_LOOP 34
+#define LX_IP_ADD_MEMBERSHIP 35
+#define LX_IP_DROP_MEMBERSHIP 36
+#define LX_IP_UNBLOCK_SOURC 37
+#define LX_IP_BLOCK_SOURCE 38
+#define LX_IP_ADD_SOURCE_MEMBERSHIP 39
+#define LX_IP_DROP_SOURCE_MEMBERSHIP 40
+#define LX_IP_MSFILTER 41
+#define LX_MCAST_JOIN_GROUP 42
+#define LX_MCAST_BLOCK_SOURCE 43
+#define LX_MCAST_UNBLOCK_SOURCE 44
+#define LX_MCAST_LEAVE_GROUP 45
+#define LX_MCAST_JOIN_SOURCE_GROUP 46
+#define LX_MCAST_LEAVE_SOURCE_GROUP 47
+#define LX_MCAST_MSFILTER 48
+#define LX_IP_MULTICAST_ALL 49
+#define LX_IP_UNICAST_IF 50
+
+/*
+ * LX_IP_MTU_DISCOVER values
+ */
+#define LX_IP_PMTUDISC_DONT 0
+#define LX_IP_PMTUDISC_WANT 1
+#define LX_IP_PMTUDISC_DO 2
+#define LX_IP_PMTUDISC_PROBE 3
+#define LX_IP_PMTUDISC_INTERFACE 4
+#define LX_IP_PMTUDISC_OMIT 5
+
+/*
+ * Options for use with [gs]etsockopt at the IP level.
+ * IPPROTO_IPV6
+ */
+
+#define LX_IPV6_ADDRFORM 1
+#define LX_IPV6_2292PKTINFO 2
+#define LX_IPV6_2292HOPOPTS 3
+#define LX_IPV6_2292DSTOPTS 4
+#define LX_IPV6_2292RTHDR 5
+#define LX_IPV6_2292PKTOPTIONS 6
+#define LX_IPV6_CHECKSUM 7
+#define LX_IPV6_2292HOPLIMIT 8
+#define LX_IPV6_NEXTHOP 9
+#define LX_IPV6_AUTHHDR 10
+#define LX_IPV6_UNICAST_HOPS 16
+#define LX_IPV6_MULTICAST_IF 17
+#define LX_IPV6_MULTICAST_HOPS 18
+#define LX_IPV6_MULTICAST_LOOP 19
+#define LX_IPV6_JOIN_GROUP 20
+#define LX_IPV6_LEAVE_GROUP 21
+#define LX_IPV6_ROUTER_ALERT 22
+#define LX_IPV6_MTU_DISCOVER 23
+#define LX_IPV6_MTU 24
+#define LX_IPV6_RECVERR 25
+#define LX_IPV6_V6ONLY 26
+#define LX_IPV6_JOIN_ANYCAST 27
+#define LX_IPV6_LEAVE_ANYCAST 28
+#define LX_IPV6_IPSEC_POLICY 34
+#define LX_IPV6_XFRM_POLICY 35
+
+#define LX_IPV6_RECVPKTINFO 49
+#define LX_IPV6_PKTINFO 50
+#define LX_IPV6_RECVHOPLIMIT 51
+#define LX_IPV6_HOPLIMIT 52
+#define LX_IPV6_RECVHOPOPTS 53
+#define LX_IPV6_HOPOPTS 54
+#define LX_IPV6_RTHDRDSTOPTS 55
+#define LX_IPV6_RECVRTHDR 56
+#define LX_IPV6_RTHDR 57
+#define LX_IPV6_RECVDSTOPTS 58
+#define LX_IPV6_DSTOPTS 59
+#define LX_IPV6_RECVTCLASS 66
+#define LX_IPV6_TCLASS 67
+
+/*
+ * Options for use with [gs]etsockopt at the IP level.
+ * IPPROTO_ICMPV6
+ */
+
+#define LX_ICMP6_FILTER 1
+
+/*
+ * Options for use with [gs]etsockopt at the TCP level.
+ * IPPROTO_TCP
+ */
+#define LX_TCP_NODELAY 1 /* Don't delay send to coalesce packets */
+#define LX_TCP_MAXSEG 2 /* Set maximum segment size */
+#define LX_TCP_CORK 3 /* Control sending of partial frames */
+#define LX_TCP_KEEPIDLE 4 /* Start keeplives after this period */
+#define LX_TCP_KEEPINTVL 5 /* Interval between keepalives */
+#define LX_TCP_KEEPCNT 6 /* Number of keepalives before death */
+#define LX_TCP_SYNCNT 7 /* Number of SYN retransmits */
+#define LX_TCP_LINGER2 8 /* Life time of orphaned FIN-WAIT-2 state */
+#define LX_TCP_DEFER_ACCEPT 9 /* Wake up listener only when data arrive */
+#define LX_TCP_WINDOW_CLAMP 10 /* Bound advertised window */
+#define LX_TCP_INFO 11 /* Information about this connection. */
+#define LX_TCP_QUICKACK 12 /* Bock/reenable quick ACKs. */
+#define LX_TCP_CONGESTION 13 /* Congestion control algorithm */
+#define LX_TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */
+#define LX_TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts on thin streams */
+#define LX_TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */
+#define LX_TCP_USER_TIMEOUT 18 /* How long for loss retry before timeout */
+#define LX_TCP_REPAIR 19 /* TCP socket under repair */
+#define LX_TCP_REPAIR_QUEUE 20
+#define LX_TCP_QUEUE_SEQ 21
+#define LX_TCP_REPAIR_OPTIONS 22
+#define LX_TCP_FASTOPEN 23 /* Enable FastOpen on listeners */
+#define LX_TCP_TIMESTAMP 24
+#define LX_TCP_NOTSENT_LOWAT 25 /* limit number of unsent bytes */
+
+/*
+ * Options for use with [gs]etsockopt at the IGMP level.
+ * IPPROTO_IGMP
+ */
+#define LX_IGMP_MINLEN 8
+#define LX_IGMP_MAX_HOST_REPORT_DELAY 10
+#define LX_IGMP_HOST_MEMBERSHIP_QUERY 0x11
+#define LX_IGMP_HOST_MEMBERSHIP_REPORT 0x12
+#define LX_IGMP_DVMRP 0x13
+#define LX_IGMP_PIM 0x14
+#define LX_IGMP_TRACE 0x15
+#define LX_IGMP_HOST_NEW_MEMBERSHIP_REPORT 0x16
+#define LX_IGMP_HOST_LEAVE_MESSAGE 0x17
+#define LX_IGMP_MTRACE_RESP 0x1e
+#define LX_IGMP_MTRACE 0x1f
+
+/*
+ * Linux socket flags for use with recv(2)/send(2)/recvmsg(2)/sendmsg(2)
+ */
+#define LX_MSG_OOB 0x1
+#define LX_MSG_PEEK 0x2
+#define LX_MSG_DONTROUTE 0x4
+#define LX_MSG_CTRUNC 0x8
+#define LX_MSG_PROXY 0x10
+#define LX_MSG_TRUNC 0x20
+#define LX_MSG_DONTWAIT 0x40
+#define LX_MSG_EOR 0x80
+#define LX_MSG_WAITALL 0x100
+#define LX_MSG_FIN 0x200
+#define LX_MSG_SYN 0x400
+#define LX_MSG_CONFIRM 0x800
+#define LX_MSG_RST 0x1000
+#define LX_MSG_ERRQUEUE 0x2000
+#define LX_MSG_NOSIGNAL 0x4000
+#define LX_MSG_MORE 0x8000
+#define LX_MSG_WAITFORONE 0x10000
+#define LX_MSG_FASTOPEN 0x20000000
+#define LX_MSG_CMSG_CLOEXEC 0x40000000
+
+typedef struct lx_msghdr {
+ void *msg_name; /* optional address */
+ socklen_t msg_namelen; /* size of address */
+ struct iovec *msg_iov; /* scatter/gather array */
+ size_t msg_iovlen; /* # elements in msg_iov */
+ void *msg_control; /* ancillary data */
+ size_t msg_controllen; /* ancillary data buffer len */
+ int msg_flags; /* flags on received message */
+} lx_msghdr_t;
+
+
+#if defined(_LP64)
+
+typedef struct lx_msghdr32 {
+ caddr32_t msg_name; /* optional address */
+ uint32_t msg_namelen; /* size of address */
+ caddr32_t msg_iov; /* scatter/gather array */
+ int32_t msg_iovlen; /* # elements in msg_iov */
+ caddr32_t msg_control; /* ancillary data */
+ uint32_t msg_controllen; /* ancillary data buffer len */
+ int32_t msg_flags; /* flags on received message */
+} lx_msghdr32_t;
+
+#endif
+
+typedef struct lx_sockaddr_in6 {
+ sa_family_t sin6_family;
+ in_port_t sin6_port;
+ uint32_t sin6_flowinfo;
+ struct in6_addr sin6_addr;
+ uint32_t sin6_scope_id; /* Depends on scope of sin6_addr */
+ /* one 32-bit field shorter than illumos */
+} lx_sockaddr_in6_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_SOCKET_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_syscalls.h b/usr/src/uts/common/brand/lx/sys/lx_syscalls.h
new file mode 100644
index 0000000000..64084b77f1
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_syscalls.h
@@ -0,0 +1,232 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#ifndef _SYS_LINUX_SYSCALLS_H
+#define _SYS_LINUX_SYSCALLS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+extern long lx_accept();
+extern long lx_accept4();
+extern long lx_access();
+extern long lx_arch_prctl();
+extern long lx_bind();
+extern long lx_brk();
+extern long lx_chmod();
+extern long lx_chown();
+extern long lx_chown16();
+extern long lx_clock_getres();
+extern long lx_clock_gettime();
+extern long lx_clock_settime();
+extern long lx_close();
+extern long lx_connect();
+extern long lx_epoll_create();
+extern long lx_epoll_create1();
+extern long lx_epoll_ctl();
+extern long lx_epoll_pwait();
+extern long lx_epoll_wait();
+extern long lx_faccessat();
+extern long lx_fallocate();
+extern long lx_fallocate32();
+extern long lx_fchmod();
+extern long lx_fchmodat();
+extern long lx_fchown();
+extern long lx_fchown16();
+extern long lx_fchownat();
+extern long lx_fcntl();
+extern long lx_fcntl64();
+extern long lx_fgetxattr();
+extern long lx_flistxattr();
+extern long lx_fremovexattr();
+extern long lx_fsetxattr();
+extern long lx_fstat32();
+extern long lx_fstat64();
+extern long lx_fstatat64();
+extern long lx_futex();
+extern long lx_get_robust_list();
+extern long lx_get_thread_area();
+extern long lx_getcpu();
+extern long lx_getcwd();
+extern long lx_getdents_32();
+extern long lx_getdents_64();
+extern long lx_getdents64();
+extern long lx_getpeername();
+extern long lx_getsockname();
+extern long lx_getpid();
+extern long lx_getppid();
+extern long lx_getrandom();
+extern long lx_getrlimit();
+extern long lx_getsockopt();
+extern long lx_gettid();
+extern long lx_gettimeofday();
+extern long lx_getxattr();
+extern long lx_io_setup();
+extern long lx_ioctl();
+extern long lx_ioprio_get();
+extern long lx_ioprio_set();
+extern long lx_kill();
+extern long lx_lchown();
+extern long lx_lchown16();
+extern long lx_lgetxattr();
+extern long lx_link();
+extern long lx_linkat();
+extern long lx_llistxattr();
+extern long lx_lremovexattr();
+extern long lx_lsetxattr();
+extern long lx_lstat32();
+extern long lx_lstat64();
+extern long lx_listxattr();
+extern long lx_mkdir();
+extern long lx_mkdirat();
+extern long lx_modify_ldt();
+extern long lx_nanosleep();
+extern long lx_oldgetrlimit();
+extern long lx_open();
+extern long lx_openat();
+extern long lx_personality();
+extern long lx_pipe();
+extern long lx_pipe2();
+extern long lx_poll();
+extern long lx_ppoll();
+extern long lx_pread();
+extern long lx_pread32();
+extern long lx_preadv();
+extern long lx_preadv32();
+extern long lx_prctl();
+extern long lx_prlimit64();
+extern long lx_pselect();
+extern long lx_ptrace();
+extern long lx_pwrite();
+extern long lx_pwrite32();
+extern long lx_pwritev();
+extern long lx_pwritev32();
+extern long lx_read();
+extern long lx_readv();
+extern long lx_recv();
+extern long lx_recvmsg();
+extern long lx_recvfrom();
+extern long lx_sched_getparam();
+extern long lx_sched_getscheduler();
+extern long lx_sched_rr_get_interval();
+extern long lx_sched_setparam();
+extern long lx_sched_setscheduler();
+extern long lx_sched_yield();
+extern long lx_select();
+extern long lx_send();
+extern long lx_sendmsg();
+extern long lx_sendto();
+extern long lx_set_robust_list();
+extern long lx_set_thread_area();
+extern long lx_set_tid_address();
+extern long lx_setresgid();
+extern long lx_setresgid16();
+extern long lx_setresuid();
+extern long lx_setresuid16();
+extern long lx_setrlimit();
+extern long lx_setxattr();
+extern long lx_setsockopt();
+extern long lx_socket();
+extern long lx_socketcall();
+extern long lx_stat32();
+extern long lx_stat64();
+extern long lx_sync_file_range();
+extern long lx_syncfs();
+extern long lx_sysinfo32();
+extern long lx_sysinfo64();
+extern long lx_removexattr();
+extern long lx_tgkill();
+extern long lx_time();
+extern long lx_tkill();
+extern long lx_uname();
+extern long lx_wait4();
+extern long lx_waitid();
+extern long lx_waitpid();
+extern long lx_write();
+extern long lx_writev();
+
+#if defined(_LP64)
+/*
+ * Linux vsyscall addresses:
+ */
+#define LX_VSYS_gettimeofday (uintptr_t)0xffffffffff600000
+#define LX_VSYS_time (uintptr_t)0xffffffffff600400
+#define LX_VSYS_getcpu (uintptr_t)0xffffffffff600800
+
+#define LX_VSYSCALL_ADDR (uintptr_t)0xffffffffff600000
+#define LX_VSYSCALL_SIZE (uintptr_t)0x1000
+#endif
+
+#endif /* _KERNEL */
+
+/*
+ * System call numbers for revectoring:
+ */
+
+#if defined(__amd64)
+#define LX_SYS_close 3
+#define LX_SYS_gettimeofday 96
+#define LX_SYS_time 201
+#define LX_SYS_io_setup 206
+#define LX_SYS_clock_gettime 228
+#define LX_SYS_getcpu 309
+
+#define LX_SYS32_close 6
+#define LX_SYS32_gettimeofday 78
+#define LX_SYS32_time 13
+#define LX_SYS32_clock_gettime 265
+#define LX_SYS32_io_setup 245
+#define LX_SYS32_getcpu 318
+#elif defined(__i386)
+#define LX_SYS_close 6
+#define LX_SYS_gettimeofday 78
+#define LX_SYS_time 13
+#define LX_SYS_clock_gettime 265
+#define LX_SYS_io_setup 245
+#define LX_SYS_getcpu 318
+#else
+#error "Architecture not supported"
+#endif /* defined(__amd64) */
+
+/*
+ * The current code in the VDSO operates under the expectation that it will be
+ * mapped at a fixed offset from the comm page. This simplifies the act of
+ * locating said page without any other reference. The VDSO must fit within
+ * this offset, matching the same value as COMM_PAGE_ALIGN.
+ * See: uts/i86pc/sys/comm_page.h
+ */
+#define LX_VDSO_SIZE 0x4000
+#define LX_VDSO_ADDR_MASK ~(LX_VDSO_SIZE - 1)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LINUX_SYSCALLS_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_types.h b/usr/src/uts/common/brand/lx/sys/lx_types.h
new file mode 100644
index 0000000000..90363c8939
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_types.h
@@ -0,0 +1,144 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_LX_TYPES_H
+#define _SYS_LX_TYPES_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef _KERNEL
+
+#define SHRT_MIN (-32768) /* min value of a "short int" */
+#define SHRT_MAX 32767 /* max value of a "short int" */
+#define USHRT_MAX 65535 /* max of "unsigned short int" */
+#define INT_MIN (-2147483647-1) /* min value of an "int" */
+#define INT_MAX 2147483647 /* max value of an "int" */
+#define UINT_MAX 4294967295U /* max value of an "unsigned int" */
+
+#ifndef LLONG_MAX
+#define LLONG_MAX 9223372036854775807LL
+#endif
+
+#if defined(_LP64)
+#define LONG_MAX 9223372036854775807L
+#define ULONG_MAX 18446744073709551615UL
+#else
+#define LONG_MAX 2147483647L /* max value of a 32-bit "long int" */
+#define ULONG_MAX 4294967295UL /* max value of a 32-bit "ulong int" */
+#endif
+
+#endif /* !_KERNEL */
+
+
+typedef uint64_t lx_dev_t;
+typedef uint16_t lx_dev16_t;
+typedef uint32_t lx_ino_t;
+typedef uint64_t lx_ino64_t;
+typedef uint32_t lx_uid_t;
+typedef uint16_t lx_uid16_t;
+typedef uint32_t lx_gid_t;
+typedef uint16_t lx_gid16_t;
+typedef uint32_t lx_off_t;
+typedef uint64_t lx_off64_t;
+typedef uint32_t lx_blksize_t;
+typedef uint32_t lx_blkcnt_t;
+typedef uint64_t lx_blkcnt64_t;
+typedef uint32_t lx_mode_t;
+typedef uint16_t lx_mode16_t;
+
+/*
+ * Linux mangles major/minor numbers into dev_t differently than SunOS.
+ */
+#ifdef _LP64
+#define LX_MAKEDEVICE(maj, min) \
+ (((min) & 0xff) | (((maj) & 0xfff) << 8) | \
+ ((uint64_t)((min) & ~0xff) << 12) | ((uint64_t)((maj) & ~0xfff) << 32))
+
+#define LX_GETMAJOR(lx_dev) ((((lx_dev) >> 8) & 0xfff) | \
+ ((((uint64_t)(lx_dev)) >> 32) & ~0xfff))
+
+#else
+#define LX_MAKEDEVICE(maj, min) \
+ (((min) & 0xff) | (((maj) & 0xfff) << 8) | (((min) & ~0xff) << 12))
+
+#define LX_GETMAJOR(lx_dev) (((lx_dev) >> 8) & 0xfff)
+#endif
+
+#define LX_GETMINOR(lx_dev) (((lx_dev) & 0xff) | (((lx_dev) >> 12) & ~0xff))
+/* Linux supports 20 bits for the minor, and 12 bits for the major number */
+#define LX_MAXMIN 0xfffff
+#define LX_MAXMAJ 0xfff
+
+/*
+ * Certain Linux tools care deeply about major/minor number mapping.
+ * Map virtual disks (zfs datasets, zvols, etc) into a safe reserved range.
+ */
+#define LX_MAJOR_DISK 203
+
+/* LX ptm driver major/minor number */
+#define LX_PTM_MAJOR 5
+#define LX_PTM_MINOR 2
+
+/* LX pts driver major number range */
+#define LX_PTS_MAJOR_MIN 136
+#define LX_PTS_MAJOR_MAX 143
+
+/* LX tty/cons driver major number */
+#define LX_TTY_MAJOR 5
+
+#define LX_UID16_TO_UID32(uid16) \
+ (((uid16) == (lx_uid16_t)-1) ? ((lx_uid_t)-1) : (lx_uid_t)(uid16))
+
+#define LX_GID16_TO_GID32(gid16) \
+ (((gid16) == (lx_gid16_t)-1) ? ((lx_gid_t)-1) : (lx_gid_t)(gid16))
+
+/* Overflow values default to NFS nobody. */
+
+#define UID16_OVERFLOW ((lx_uid16_t)65534)
+#define GID16_OVERFLOW ((lx_gid16_t)65534)
+
+/*
+ * All IDs with high word non-zero are converted to default overflow values to
+ * avoid inadvertent truncation to zero (root) (!).
+ */
+#define LX_UID32_TO_UID16(uid32) \
+ ((((uid32) & 0xffff0000) == 0) ? ((lx_uid16_t)(uid32)) : \
+ (((uid32) == ((lx_uid_t)-1)) ? ((lx_uid16_t)-1) : UID16_OVERFLOW))
+
+#define LX_GID32_TO_GID16(gid32) \
+ ((((gid32) & 0xffff0000) == 0) ? ((lx_gid16_t)(gid32)) : \
+ (((gid32) == ((lx_gid_t)-1)) ? ((lx_gid16_t)-1) : GID16_OVERFLOW))
+
+#define LX_32TO64(lo, hi) \
+ ((uint64_t)((uint64_t)(lo) | ((uint64_t)(hi) << 32)))
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_TYPES_H */
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_access.c b/usr/src/uts/common/brand/lx/syscall/lx_access.c
new file mode 100644
index 0000000000..24805a5e96
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_access.c
@@ -0,0 +1,224 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T
+ * All Rights Reserved
+ *
+ * Portions of this source code were derived from Berkeley 4.3 BSD
+ * under license from the Regents of the University of California.
+ *
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <sys/param.h>
+#include <sys/isa_defs.h>
+#include <sys/types.h>
+#include <sys/sysmacros.h>
+#include <sys/cred_impl.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/pathname.h>
+#include <sys/vnode.h>
+#include <sys/uio.h>
+#include <sys/cmn_err.h>
+#include <sys/debug.h>
+#include <sys/file.h>
+#include <fs/fs_subr.h>
+#include <c2/audit.h>
+#include <sys/fcntl.h>
+#include <sys/stat.h>
+#include <sys/mode.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_fcntl.h>
+
+/*
+ * Determine accessibility of file.
+ */
+
+#define E_OK 010 /* use effective ids */
+#define R_OK 004
+#define W_OK 002
+#define X_OK 001
+
+/*
+ * Convert Linux LX_AT_* flags to SunOS AT_* flags but skip verifying allowed
+ * flags have been passed. This also allows EACCESS/REMOVEDIR to be translated
+ * correctly since on linux they have the same value.
+ *
+ * Some code can actually pass in other bits in the flag. We may have to simply
+ * ignore these, as indicated by the enforce parameter. See lx_fchmodat for
+ * another example of this type of behavior.
+ */
+static int
+ltos_at_flag(int lflag, int allow, boolean_t enforce)
+{
+ int sflag = 0;
+
+ if ((lflag & LX_AT_EACCESS) && (allow & AT_EACCESS)) {
+ lflag &= ~LX_AT_EACCESS;
+ sflag |= AT_EACCESS;
+ }
+
+ if ((lflag & LX_AT_REMOVEDIR) && (allow & AT_REMOVEDIR)) {
+ lflag &= ~LX_AT_REMOVEDIR;
+ sflag |= AT_REMOVEDIR;
+ }
+
+ if ((lflag & LX_AT_SYMLINK_NOFOLLOW) && (allow & AT_SYMLINK_NOFOLLOW)) {
+ lflag &= ~LX_AT_SYMLINK_NOFOLLOW;
+ sflag |= AT_SYMLINK_NOFOLLOW;
+ }
+
+ /* right now solaris doesn't have a _FOLLOW flag, so use a fake one */
+ if ((lflag & LX_AT_SYMLINK_FOLLOW) && (allow & LX_AT_SYMLINK_FOLLOW)) {
+ lflag &= ~LX_AT_SYMLINK_FOLLOW;
+ sflag |= LX_AT_SYMLINK_FOLLOW;
+ }
+
+ /* If lflag is not zero than some flags did not hit the above code. */
+ if (enforce && lflag)
+ return (-EINVAL);
+
+ return (sflag);
+}
+
+/*
+ * For illumos, access() does this:
+ * If the process has appropriate privileges, an implementation may indicate
+ * success for X_OK even if none of the execute file permission bits are set.
+ *
+ * But for Linux, access() does this:
+ * If the calling process is privileged (i.e., its real UID is zero), then
+ * an X_OK check is successful for a regular file if execute permission is
+ * enabled for any of the file owner, group, or other.
+ *
+ * Linux used to behave more like illumos on older kernels:
+ * In kernel 2.4 (and earlier) there is some strangeness in the handling
+ * of X_OK tests for superuser. If all categories of execute permission
+ * are disabled for a nondirectory file, then the only access() test that
+ * returns -1 is when mode is specified as just X_OK; if R_OK or W_OK is
+ * also specified in mode, then access() returns 0 for such files.
+ *
+ * So we need to handle the case where a privileged process is checking for
+ * X_OK but none of the execute bits are set on the file. We'll keep the old
+ * 2.4 behavior for 2.4 emulation but use the new behavior for any other
+ * kernel rev.
+ */
+static int
+lx_common_access(char *fname, int fmode, vnode_t *startvp)
+{
+ vnode_t *vp;
+ cred_t *tmpcr;
+ int error;
+ int mode;
+ cred_t *cr;
+ int estale_retry = 0;
+
+ if (fmode & ~(E_OK|R_OK|W_OK|X_OK))
+ return (EINVAL);
+
+ mode = ((fmode & (R_OK|W_OK|X_OK)) << 6);
+
+ cr = CRED();
+
+ /* OK to use effective uid/gid, i.e., no need to crdup(CRED())? */
+ if ((fmode & E_OK) != 0 ||
+ (cr->cr_uid == cr->cr_ruid && cr->cr_gid == cr->cr_rgid)) {
+ tmpcr = cr;
+ crhold(tmpcr);
+ } else {
+ tmpcr = crdup(cr);
+ tmpcr->cr_uid = cr->cr_ruid;
+ tmpcr->cr_gid = cr->cr_rgid;
+ tmpcr->cr_ruid = cr->cr_uid;
+ tmpcr->cr_rgid = cr->cr_gid;
+ }
+
+lookup:
+ if ((error = lookupnameatcred(fname, UIO_USERSPACE, FOLLOW, NULLVPP,
+ &vp, startvp, tmpcr)) != 0) {
+ if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
+ goto lookup;
+ crfree(tmpcr);
+ return (error);
+ }
+
+ if (mode != 0) {
+ error = VOP_ACCESS(vp, mode, 0, tmpcr, NULL);
+ if (error != 0) {
+ if ((error == ESTALE) &&
+ fs_need_estale_retry(estale_retry++)) {
+ VN_RELE(vp);
+ goto lookup;
+ }
+
+ } else if ((fmode & X_OK) != 0 && cr->cr_ruid == 0 &&
+ lx_kern_release_cmp(curproc->p_zone, "2.4.0") > 0) {
+ /* check for incorrect execute success */
+ vattr_t va;
+
+ va.va_mask = AT_MODE;
+ if ((error = VOP_GETATTR(vp, &va, 0, cr, NULL)) == 0) {
+ mode_t m = VTTOIF(va.va_type) | va.va_mode;
+
+ if ((m & S_IFMT) == S_IFREG &&
+ !(m & (S_IXUSR | S_IXGRP | S_IXOTH))) {
+ /* no execute bits set in the mode */
+ error = EACCES;
+ }
+ }
+ }
+ }
+
+ crfree(tmpcr);
+ VN_RELE(vp);
+ return (error);
+}
+
+int
+lx_faccessat(int atfd, char *fname, int fmode, int flag)
+{
+ vnode_t *startvp;
+ int error;
+
+ if (atfd == LX_AT_FDCWD)
+ atfd = AT_FDCWD;
+
+ if ((flag = ltos_at_flag(flag, AT_EACCESS, B_FALSE)) < 0)
+ return (set_errno(EINVAL));
+
+ if (fname == NULL)
+ return (set_errno(EFAULT));
+ if ((error = fgetstartvp(atfd, fname, &startvp)) != 0)
+ return (set_errno(error));
+ if (AU_AUDITING() && startvp != NULL)
+ audit_setfsat_path(1);
+
+ /* Do not allow E_OK unless AT_EACCESS flag is set */
+ if ((flag & AT_EACCESS) == 0)
+ fmode &= ~E_OK;
+
+ error = lx_common_access(fname, fmode, startvp);
+ if (startvp != NULL)
+ VN_RELE(startvp);
+ if (error)
+ return (set_errno(error));
+ return (0);
+}
+
+int
+lx_access(char *fname, int fmode)
+{
+ return (lx_faccessat(LX_AT_FDCWD, fname, fmode, 0));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_aio.c b/usr/src/uts/common/brand/lx/syscall/lx_aio.c
new file mode 100644
index 0000000000..12f37ea4c7
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_aio.c
@@ -0,0 +1,45 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/systm.h>
+#include <sys/mutex.h>
+#include <sys/brand.h>
+
+#include <sys/lx_brand.h>
+#include <sys/lx_syscalls.h>
+
+
+long
+lx_io_setup(unsigned int nr_events, void **ctxp)
+{
+ lx_proc_data_t *lxpd = ptolxproc(curproc);
+ uintptr_t uargs[2] = {(uintptr_t)nr_events, (uintptr_t)ctxp};
+
+ mutex_enter(&curproc->p_lock);
+ lxpd->l_flags |= LX_PROC_AIO_USED;
+ mutex_exit(&curproc->p_lock);
+
+ ttolxlwp(curthread)->br_eosys = JUSTRETURN;
+#if defined(_LP64)
+ if (get_udatamodel() != DATAMODEL_NATIVE) {
+ lx_emulate_user32(ttolwp(curthread), LX_SYS32_io_setup, uargs);
+ } else
+#endif
+ {
+ lx_emulate_user(ttolwp(curthread), LX_SYS_io_setup, uargs);
+ }
+ /* NOTREACHED */
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_brk.c b/usr/src/uts/common/brand/lx/syscall/lx_brk.c
new file mode 100644
index 0000000000..19a7577ac0
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_brk.c
@@ -0,0 +1,57 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+
+/*
+ * The brk() system call needs to be in-kernel because Linux expects a call to
+ * brk(0) to return the current breakpoint. In Solaris, the process breakpoint
+ * is setup and managed by libc. Due to the way we link our libraries and the
+ * need for Linux to manage its own breakpoint, this has to remain in the
+ * kernel.
+ */
+extern int brk(caddr_t);
+
+long
+lx_brk(caddr_t nva)
+{
+ proc_t *p = curproc;
+ klwp_t *lwp = ttolwp(curthread);
+
+ if (nva != 0) {
+ (void) brk(nva);
+
+ /*
+ * Despite claims to the contrary in the manpage, when Linux
+ * brk() fails, errno is left unchanged.
+ */
+ lwp->lwp_errno = 0;
+ }
+ return ((long)(p->p_brkbase + p->p_brksize));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_chmod.c b/usr/src/uts/common/brand/lx/syscall/lx_chmod.c
new file mode 100644
index 0000000000..7783b97cb0
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_chmod.c
@@ -0,0 +1,107 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/systm.h>
+#include <sys/fcntl.h>
+#include <sys/thread.h>
+#include <sys/klwp.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_fcntl.h>
+
+long
+lx_vn_chmod(vnode_t *vp, int mode)
+{
+ vattr_t vattr;
+
+ vattr.va_mode = mode & MODEMASK;
+ vattr.va_mask = AT_MODE;
+
+ if (vn_is_readonly(vp)) {
+ return (EROFS);
+ }
+ return (VOP_SETATTR(vp, &vattr, 0, CRED(), NULL));
+}
+
+static long
+lx_fchmodat_wrapper(int fd, char *path, int mode)
+{
+ long error;
+ vnode_t *vp;
+
+ if ((error = lx_vp_at(fd, path, &vp, 0)) != 0) {
+ lx_proc_data_t *pd = ttolxproc(curthread);
+
+ /*
+ * If the process is in "install mode", return success
+ * if the operation failed due to an absent file.
+ */
+ if (error == ENOENT &&
+ (pd->l_flags & LX_PROC_INSTALL_MODE)) {
+ return (0);
+ }
+ return (set_errno(error));
+ }
+
+ error = lx_vn_chmod(vp, mode);
+ VN_RELE(vp);
+
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (0);
+}
+
+long
+lx_fchmodat(int fd, char *path, int mode)
+{
+ return (lx_fchmodat_wrapper(fd, path, mode));
+}
+
+long
+lx_fchmod(int fd, int mode)
+{
+ file_t *fp;
+ vnode_t *vp;
+ long error;
+
+ /*
+ * In order to do proper O_PATH handling, lx_fchmod cannot leverage
+ * lx_fchmodat with a NULL path since the desired behavior differs.
+ */
+ if ((fp = getf(fd)) == NULL) {
+ return (set_errno(EBADF));
+ }
+ if (LX_IS_O_PATH(fp)) {
+ releasef(fd);
+ return (set_errno(EBADF));
+ }
+ vp = fp->f_vnode;
+ VN_HOLD(vp);
+ releasef(fd);
+
+ error = lx_vn_chmod(vp, mode);
+ VN_RELE(vp);
+
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (0);
+}
+
+long
+lx_chmod(char *path, int mode)
+{
+ return (lx_fchmodat_wrapper(LX_AT_FDCWD, path, mode));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_chown.c b/usr/src/uts/common/brand/lx/syscall/lx_chown.c
new file mode 100644
index 0000000000..830fba0a73
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_chown.c
@@ -0,0 +1,180 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/systm.h>
+#include <sys/fcntl.h>
+#include <sys/zone.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_fcntl.h>
+#include <sys/lx_types.h>
+
+long
+lx_vn_chown(vnode_t *vp, uid_t uid, gid_t gid)
+{
+ vattr_t vattr;
+ zone_t *zone = crgetzone(CRED());
+
+ if ((uid != (uid_t)-1 && !VALID_UID(uid, zone)) ||
+ (gid != (gid_t)-1 && !VALID_GID(gid, zone))) {
+ return (EINVAL);
+ }
+ vattr.va_uid = uid;
+ vattr.va_gid = gid;
+ vattr.va_mask = 0;
+ if (vattr.va_uid != -1)
+ vattr.va_mask |= AT_UID;
+ if (vattr.va_gid != -1)
+ vattr.va_mask |= AT_GID;
+
+ if (vn_is_readonly(vp)) {
+ return (EROFS);
+ }
+ return (VOP_SETATTR(vp, &vattr, 0, CRED(), NULL));
+}
+
+long
+lx_fchownat_wrapper(int fd, char *path, uid_t uid, gid_t gid, int native_flag)
+{
+ long error;
+ vnode_t *vp;
+
+ if ((error = lx_vp_at(fd, path, &vp, native_flag)) != 0) {
+ lx_proc_data_t *pd = ttolxproc(curthread);
+
+ /*
+ * If the process is in "install mode", return success
+ * if the operation failed due to an absent file.
+ */
+ if (error == ENOENT &&
+ (pd->l_flags & LX_PROC_INSTALL_MODE)) {
+ return (0);
+ }
+ return (set_errno(error));
+ }
+
+ error = lx_vn_chown(vp, uid, gid);
+ VN_RELE(vp);
+
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (0);
+}
+
+long
+lx_fchown_wrapper(int fd, uid_t uid, gid_t gid)
+{
+ file_t *fp;
+ vnode_t *vp;
+ long error;
+
+ /*
+ * In order to do proper O_PATH handling, lx_fchown cannot leverage
+ * lx_fchownat with a NULL path since the desired behavior differs.
+ */
+ if ((fp = getf(fd)) == NULL) {
+ return (set_errno(EBADF));
+ }
+ if (LX_IS_O_PATH(fp)) {
+ releasef(fd);
+ return (set_errno(EBADF));
+ }
+ vp = fp->f_vnode;
+ VN_HOLD(vp);
+ releasef(fd);
+
+ error = lx_vn_chown(vp, uid, gid);
+ VN_RELE(vp);
+
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (0);
+}
+
+long
+lx_fchownat(int fd, char *path, uid_t uid, gid_t gid, int flag)
+{
+ int native_flag = 0;
+
+ if (flag & LX_AT_EMPTY_PATH) {
+ char c;
+
+ /*
+ * According to fchownat(2), when AT_EMPTY_PATH is set: "if
+ * path is an empty string, operate on the file referred to by
+ * fd". We pass NULL in place of the empty string, which
+ * causes fchownat() to operate on the fd we passed without an
+ * additional lookup.
+ */
+ if (copyin(path, &c, sizeof (c)) != 0) {
+ return (set_errno(EFAULT));
+ }
+ if (c == '\0') {
+ path = NULL;
+ }
+
+ flag &= ~LX_AT_EMPTY_PATH;
+ }
+ if (flag & LX_AT_SYMLINK_NOFOLLOW) {
+ flag &= ~LX_AT_SYMLINK_NOFOLLOW;
+ native_flag |= AT_SYMLINK_NOFOLLOW;
+ }
+ if (flag != 0) {
+ return (set_errno(EINVAL));
+ }
+
+ return (lx_fchownat_wrapper(fd, path, uid, gid, native_flag));
+}
+
+long
+lx_fchown(int fd, uid_t uid, gid_t gid)
+{
+ return (lx_fchown_wrapper(fd, uid, gid));
+}
+
+long
+lx_lchown(char *path, uid_t uid, gid_t gid)
+{
+ return (lx_fchownat_wrapper(AT_FDCWD, path, uid, gid,
+ AT_SYMLINK_NOFOLLOW));
+}
+
+long
+lx_chown(char *path, uid_t uid, gid_t gid)
+{
+ return (lx_fchownat_wrapper(AT_FDCWD, path, uid, gid, 0));
+}
+
+long
+lx_fchown16(int fd, lx_uid16_t uid, lx_gid16_t gid)
+{
+ return (lx_fchown_wrapper(fd, LX_UID16_TO_UID32(uid),
+ LX_GID16_TO_GID32(gid)));
+}
+
+long
+lx_lchown16(char *path, uid_t uid, gid_t gid)
+{
+ return (lx_fchownat_wrapper(AT_FDCWD, path, LX_UID16_TO_UID32(uid),
+ LX_GID16_TO_GID32(gid), AT_SYMLINK_NOFOLLOW));
+}
+
+long
+lx_chown16(char *path, lx_uid16_t uid, lx_gid16_t gid)
+{
+ return (lx_fchownat_wrapper(AT_FDCWD, path, LX_UID16_TO_UID32(uid),
+ LX_GID16_TO_GID32(gid), 0));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_clone.c b/usr/src/uts/common/brand/lx/syscall/lx_clone.c
new file mode 100644
index 0000000000..50cdeaeab9
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_clone.c
@@ -0,0 +1,143 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_ldt.h>
+#include <sys/lx_misc.h>
+#include <lx_signum.h>
+#include <lx_syscall.h>
+#include <sys/x86_archext.h>
+#include <sys/controlregs.h>
+
+/*
+ * Our lwp has already been created at this point, so this routine is
+ * responsible for setting up all the state needed to track this as a
+ * linux cloned thread.
+ */
+/* ARGSUSED */
+int
+lx_helper_clone(int64_t *rval, int flags, void *ptidp, void *tls, void *ctidp)
+{
+ struct lx_lwp_data *lwpd = ttolxlwp(curthread);
+ struct lx_proc_data *lproc = ttolxproc(curthread);
+ struct ldt_info info;
+ struct user_desc descr;
+ int tls_index;
+ int entry = -1;
+ int signo;
+
+ signo = flags & LX_CSIGNAL;
+ if (signo < 0 || signo > LX_NSIG)
+ return (set_errno(EINVAL));
+
+ if (!(flags & LX_CLONE_THREAD)) {
+ lproc->l_signal = signo;
+ } else {
+ if (flags & LX_CLONE_SETTLS) {
+ if (get_udatamodel() == DATAMODEL_ILP32) {
+ if (copyin((caddr_t)tls, &info, sizeof (info)))
+ return (set_errno(EFAULT));
+
+ if (LDT_INFO_EMPTY(&info))
+ return (set_errno(EINVAL));
+
+ entry = info.entry_number;
+ if (entry < GDT_TLSMIN || entry > GDT_TLSMAX)
+ return (set_errno(EINVAL));
+
+ tls_index = entry - GDT_TLSMIN;
+
+ /*
+ * Convert the user-space structure into a real
+ * x86 descriptor and copy it into this LWP's
+ * TLS array. We also load it into the GDT.
+ */
+ LDT_INFO_TO_DESC(&info, &descr);
+ bcopy(&descr, &lwpd->br_tls[tls_index],
+ sizeof (descr));
+ lx_set_gdt(entry, &lwpd->br_tls[tls_index]);
+ } else {
+ /*
+ * Set the Linux %fsbase for this LWP. We will
+ * restore it the next time we return to Linux
+ * via setcontext()/lx_restorecontext().
+ */
+ lwpd->br_lx_fsbase = (uintptr_t)tls;
+ }
+ }
+
+ lwpd->br_clear_ctidp =
+ (flags & LX_CLONE_CHILD_CLEARTID) ? ctidp : NULL;
+
+ if (signo && ! (flags & LX_CLONE_DETACH))
+ lwpd->br_signal = signo;
+ else
+ lwpd->br_signal = 0;
+
+ if (flags & LX_CLONE_THREAD)
+ lwpd->br_tgid = curthread->t_procp->p_pid;
+
+ if (flags & LX_CLONE_PARENT)
+ lwpd->br_ppid = 0;
+
+ if ((flags & LX_CLONE_CHILD_SETTID) && (ctidp != NULL) &&
+ (suword32(ctidp, lwpd->br_pid) != 0)) {
+ if (entry >= 0)
+ lx_clear_gdt(entry);
+ return (set_errno(EFAULT));
+ }
+ if ((flags & LX_CLONE_PARENT_SETTID) && (ptidp != NULL) &&
+ (suword32(ptidp, lwpd->br_pid) != 0)) {
+ if (entry >= 0)
+ lx_clear_gdt(entry);
+ return (set_errno(EFAULT));
+ }
+ }
+
+ *rval = lwpd->br_pid;
+ return (0);
+}
+
+long
+lx_set_tid_address(int *tidp)
+{
+ struct lx_lwp_data *lwpd = ttolxlwp(curthread);
+ long rv;
+
+ lwpd->br_clear_ctidp = tidp;
+
+ if (curproc->p_pid == curproc->p_zone->zone_proc_initpid) {
+ rv = 1;
+ } else {
+ rv = lwpd->br_pid;
+ }
+
+ return (rv);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_close.c b/usr/src/uts/common/brand/lx/syscall/lx_close.c
new file mode 100644
index 0000000000..8df0cbbe2f
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_close.c
@@ -0,0 +1,57 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/systm.h>
+#include <sys/mutex.h>
+#include <sys/brand.h>
+
+#include <sys/lx_brand.h>
+#include <sys/lx_syscalls.h>
+
+
+extern int close(int);
+
+long
+lx_close(int fdes)
+{
+ lx_proc_data_t *lxpd = ptolxproc(curproc);
+ boolean_t aio_used;
+ uintptr_t uargs[1] = {(uintptr_t)fdes};
+
+ mutex_enter(&curproc->p_lock);
+ aio_used = ((lxpd->l_flags & LX_PROC_AIO_USED) != 0);
+ mutex_exit(&curproc->p_lock);
+
+ if (!aio_used) {
+ return (close(fdes));
+ }
+
+ /*
+ * If the process potentially has any AIO contexts open, the userspace
+ * emulation must be used so that libc can properly maintain its state.
+ */
+
+ ttolxlwp(curthread)->br_eosys = JUSTRETURN;
+#if defined(_LP64)
+ if (get_udatamodel() != DATAMODEL_NATIVE) {
+ lx_emulate_user32(ttolwp(curthread), LX_SYS32_close, uargs);
+ } else
+#endif
+ {
+ lx_emulate_user(ttolwp(curthread), LX_SYS_close, uargs);
+ }
+ /* NOTREACHED */
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_cpu.c b/usr/src/uts/common/brand/lx/syscall/lx_cpu.c
new file mode 100644
index 0000000000..ec8b7576d8
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_cpu.c
@@ -0,0 +1,35 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/systm.h>
+#include <sys/thread.h>
+#include <sys/cpuvar.h>
+#include <sys/cmn_err.h>
+#include <sys/lx_impl.h>
+
+/*
+ * We support neither the second argument (NUMA node), nor the third (obsolete
+ * pre-2.6.24 caching functionality which was ultimately broken).
+ */
+long
+lx_getcpu(unsigned int *cpu, uintptr_t p2, uintptr_t p3)
+{
+ unsigned int curcpu = curthread->t_cpu->cpu_id;
+
+ if (copyout(&curcpu, cpu, sizeof (curcpu)) != 0)
+ return (set_errno(EFAULT));
+
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_epoll.c b/usr/src/uts/common/brand/lx/syscall/lx_epoll.c
new file mode 100644
index 0000000000..62a0eccf4b
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_epoll.c
@@ -0,0 +1,272 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/thread.h>
+#include <sys/proc.h>
+#include <sys/zone.h>
+#include <sys/brand.h>
+#include <sys/epoll.h>
+#include <sys/devpoll.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+#include <sys/vnode.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_types.h>
+#include <sys/lx_signal.h>
+
+static major_t devpoll_major = 0;
+
+static boolean_t
+lx_epoll_isvalid(file_t *fp)
+{
+ vnode_t *vp = fp->f_vnode;
+
+ if (vp->v_type == VCHR && getmajor(vp->v_rdev) == devpoll_major)
+ return (B_TRUE);
+ return (B_FALSE);
+}
+
+long
+lx_epoll_create1(int flags)
+{
+ int err, fd, rv;
+ int fmode = FREAD | FWRITE;
+ boolean_t cloexec = B_FALSE;
+ vnode_t *vp = NULL;
+ file_t *fp = NULL;
+
+ if (flags & EPOLL_CLOEXEC) {
+ cloexec = B_TRUE;
+ flags &= ~EPOLL_CLOEXEC;
+ }
+ if (flags != 0) {
+ /* No other flags accepted at this time */
+ return (set_errno(EINVAL));
+ }
+
+ if (falloc((vnode_t *)NULL, fmode, &fp, &fd) != 0) {
+ err = EMFILE;
+ goto error;
+ }
+ if (ldi_vp_from_name("/devices/pseudo/poll@0:poll", &vp) != 0) {
+ err = ENOENT;
+ goto error;
+ }
+ if ((err = VOP_OPEN(&vp, fmode | FKLYR, CRED(), NULL)) != 0) {
+ goto error;
+ }
+ err = VOP_IOCTL(vp, DP_EPOLLCOMPAT, 0, fmode, CRED(), &rv, NULL);
+ if (err != 0) {
+ (void) VOP_CLOSE(vp, fmode, 0, 0, CRED(), NULL);
+ goto error;
+ }
+
+ devpoll_major = getmajor(vp->v_rdev);
+
+ fp->f_vnode = vp;
+ mutex_exit(&fp->f_tlock);
+ setf(fd, fp);
+ if (cloexec) {
+ f_setfd(fd, FD_CLOEXEC);
+ }
+ return (fd);
+
+error:
+ if (fp != NULL) {
+ setf(fd, NULL);
+ unfalloc(fp);
+ }
+ if (vp != NULL) {
+ VN_RELE(vp);
+ }
+ return (set_errno(err));
+}
+
+long
+lx_epoll_create(int size)
+{
+ if (size <= 0) {
+ return (set_errno(EINVAL));
+ }
+
+ return (lx_epoll_create1(0));
+}
+
+
+/* Match values from libc implementation */
+#define EPOLLIGNORED (EPOLLMSG | EPOLLWAKEUP)
+#define EPOLLSWIZZLED \
+ (EPOLLRDHUP | EPOLLONESHOT | EPOLLET | EPOLLWRBAND | EPOLLWRNORM)
+
+long
+lx_epoll_ctl(int fd, int op, int pfd, void *event)
+{
+ epoll_event_t epevent;
+ dvpoll_epollfd_t dpevent[2];
+ file_t *fp;
+ iovec_t aiov;
+ uio_t auio;
+ uint32_t events, ev = 0;
+ int error = 0, i = 0;
+
+ dpevent[i].dpep_pollfd.fd = pfd;
+ switch (op) {
+ case EPOLL_CTL_DEL:
+ dpevent[i].dpep_pollfd.events = POLLREMOVE;
+ break;
+
+ case EPOLL_CTL_MOD:
+ /*
+ * In the modify case, we pass down two events: one to
+ * remove the event and another to add it back.
+ */
+ dpevent[i++].dpep_pollfd.events = POLLREMOVE;
+ dpevent[i].dpep_pollfd.fd = pfd;
+ /* FALLTHROUGH */
+
+ case EPOLL_CTL_ADD:
+ if (copyin(event, &epevent, sizeof (epevent)) != 0)
+ return (set_errno(EFAULT));
+
+ /*
+ * Mask off the events that we ignore, and then swizzle the
+ * events for which our values differ from their epoll(7)
+ * equivalents.
+ */
+ events = epevent.events;
+ ev = events & ~(EPOLLIGNORED | EPOLLSWIZZLED);
+
+ if (events & EPOLLRDHUP)
+ ev |= POLLRDHUP;
+ if (events & EPOLLET)
+ ev |= POLLET;
+ if (events & EPOLLONESHOT)
+ ev |= POLLONESHOT;
+ if (events & EPOLLWRNORM)
+ ev |= POLLWRNORM;
+ if (events & EPOLLWRBAND)
+ ev |= POLLWRBAND;
+
+ dpevent[i].dpep_data = epevent.data.u64;
+ dpevent[i].dpep_pollfd.events = ev;
+ break;
+
+ default:
+ return (set_errno(EINVAL));
+ }
+
+ if ((fp = getf(fd)) == NULL) {
+ return (set_errno(EBADF));
+ } else if (!lx_epoll_isvalid(fp)) {
+ releasef(fd);
+ return (set_errno(EINVAL));
+ }
+
+ aiov.iov_base = (void *)dpevent;
+ aiov.iov_len = sizeof (dvpoll_epollfd_t) * (i + 1);
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_resid = aiov.iov_len;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_loffset = 0;
+ auio.uio_fmode = fp->f_flag;
+
+ error = VOP_WRITE(fp->f_vnode, &auio, 1, fp->f_cred, NULL);
+
+ releasef(fd);
+ if (error)
+ return (set_errno(error));
+ return (0);
+}
+
+long
+lx_epoll_wait(int fd, void *events, int maxevents, int timeout)
+{
+ struct dvpoll arg;
+ file_t *fp;
+ int rv = 0, error, flag;
+
+ if (maxevents <= 0) {
+ return (set_errno(EINVAL));
+ }
+ if ((fp = getf(fd)) == NULL) {
+ return (set_errno(EBADF));
+ } else if (!lx_epoll_isvalid(fp)) {
+ releasef(fd);
+ return (set_errno(EINVAL));
+ }
+
+ arg.dp_nfds = maxevents;
+ arg.dp_timeout = timeout;
+ arg.dp_fds = (pollfd_t *)events;
+ flag = fp->f_flag | DATAMODEL_NATIVE | FKIOCTL;
+ error = VOP_IOCTL(fp->f_vnode, DP_POLL, (uintptr_t)&arg, flag,
+ fp->f_cred, &rv, NULL);
+
+ releasef(fd);
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (rv);
+}
+
+long
+lx_epoll_pwait(int fd, void *events, int maxevents, int timeout, void *sigmask)
+{
+ struct dvpoll arg;
+ file_t *fp;
+ int rv = 0, error, flag;
+ k_sigset_t ksig;
+
+ if (maxevents <= 0) {
+ return (set_errno(EINVAL));
+ }
+ if ((fp = getf(fd)) == NULL) {
+ return (set_errno(EBADF));
+ } else if (!lx_epoll_isvalid(fp)) {
+ releasef(fd);
+ return (set_errno(EINVAL));
+ }
+ if (sigmask != NULL) {
+ lx_sigset_t lsig;
+
+ if (copyin(sigmask, &lsig, sizeof (lsig)) != 0) {
+ releasef(fd);
+ return (set_errno(EFAULT));
+ }
+ lx_ltos_sigset(&lsig, &ksig);
+ arg.dp_setp = (sigset_t *)&ksig;
+ } else {
+ arg.dp_setp = NULL;
+ }
+
+ arg.dp_nfds = maxevents;
+ arg.dp_timeout = timeout;
+ arg.dp_fds = (pollfd_t *)events;
+ flag = fp->f_flag | DATAMODEL_NATIVE | FKIOCTL;
+ error = VOP_IOCTL(fp->f_vnode, DP_PPOLL, (uintptr_t)&arg, flag,
+ fp->f_cred, &rv, NULL);
+
+ releasef(fd);
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (rv);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_fallocate.c b/usr/src/uts/common/brand/lx/syscall/lx_fallocate.c
new file mode 100644
index 0000000000..338e4399fe
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_fallocate.c
@@ -0,0 +1,251 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/systm.h>
+#include <sys/zone.h>
+#include <sys/types.h>
+#include <sys/filio.h>
+#include <sys/fcntl.h>
+#include <sys/stat.h>
+#include <sys/nbmlock.h>
+#include <sys/lx_impl.h>
+#include <sys/lx_brand.h>
+#include <sys/sdt.h>
+
+extern int flock_check(vnode_t *, flock64_t *, offset_t, offset_t);
+
+#define LX_FALLOC_FL_KEEP_SIZE 0x01
+#define LX_FALLOC_FL_PUNCH_HOLE 0x02
+#define LX_FALLOC_FL_NO_HIDE_STALE 0x04
+#define LX_FALLOC_FL_COLLAPSE_RANGE 0x08
+#define LX_FALLOC_FL_ZERO_RANGE 0x10
+
+#define LX_FALLOC_VALID (LX_FALLOC_FL_KEEP_SIZE | LX_FALLOC_FL_PUNCH_HOLE | \
+ LX_FALLOC_FL_NO_HIDE_STALE | LX_FALLOC_FL_COLLAPSE_RANGE | \
+ LX_FALLOC_FL_ZERO_RANGE)
+
+#define LX_FALLOC_UNSUPP (LX_FALLOC_FL_NO_HIDE_STALE | \
+ LX_FALLOC_FL_COLLAPSE_RANGE)
+
+long
+lx_fallocate(int fd, int mode, off_t offset, off_t len)
+{
+ int error = 0;
+ file_t *fp;
+ vnode_t *vp;
+ int64_t tot;
+ struct flock64 bf;
+ vattr_t vattr;
+ u_offset_t f_offset;
+ boolean_t in_crit = B_FALSE;
+
+ /*
+ * Error checking is in a specific order to make LTP happy.
+ */
+
+ tot = offset + len;
+ if (tot > (LLONG_MAX / (int64_t)1024))
+ return (set_errno(EFBIG));
+
+ if (mode & LX_FALLOC_UNSUPP)
+ return (set_errno(EOPNOTSUPP));
+
+ if ((fp = getf(fd)) == NULL)
+ return (set_errno(EBADF));
+
+ if ((fp->f_flag & FWRITE) == 0) {
+ error = EBADF;
+ goto done;
+ }
+
+ vp = fp->f_vnode;
+ if (vp->v_type != VREG) {
+ error = EINVAL;
+ goto done;
+ }
+
+ if (offset < 0 || len <= 0) {
+ error = EINVAL;
+ goto done;
+ }
+
+ if (tot < 0LL) {
+ error = EFBIG;
+ goto done;
+ }
+
+ if ((mode & ~LX_FALLOC_VALID) != 0) {
+ error = EINVAL;
+ goto done;
+ }
+
+ /*
+ * If this is the only flag then we don't actually do any work.
+ */
+ if (mode == LX_FALLOC_FL_KEEP_SIZE)
+ goto done;
+
+ bzero(&bf, sizeof (bf));
+
+ vattr.va_mask = AT_SIZE;
+ if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) != 0)
+ goto done;
+
+ if (mode == 0) {
+ /* Nothing to do if not extending the file */
+ if (vattr.va_size >= tot)
+ goto done;
+
+ /* Extend the file. */
+ bf.l_start = (off64_t)tot;
+ bf.l_len = (off64_t)0;
+
+ } else if (mode & LX_FALLOC_FL_PUNCH_HOLE) {
+ /*
+ * Deallocate space in the file.
+ */
+ if ((mode & LX_FALLOC_FL_KEEP_SIZE) == 0) {
+ /* this flag is required with punch hole */
+ error = EINVAL;
+ goto done;
+ }
+
+ if (mode &
+ ~(LX_FALLOC_FL_PUNCH_HOLE | LX_FALLOC_FL_KEEP_SIZE)) {
+ error = EINVAL;
+ goto done;
+ }
+
+ /* Make sure we don't extend since keep_size is set. */
+ if (vattr.va_size < tot) {
+ if (offset > vattr.va_size)
+ goto done;
+ len = (off_t)vattr.va_size - offset;
+ }
+
+ bf.l_start = (off64_t)offset;
+ bf.l_len = (off64_t)len;
+
+ } else if (mode & LX_FALLOC_FL_ZERO_RANGE) {
+ /*
+ * Zero out the space in the file.
+ */
+ if (mode &
+ ~(LX_FALLOC_FL_ZERO_RANGE | LX_FALLOC_FL_KEEP_SIZE)) {
+ error = EINVAL;
+ goto done;
+ }
+
+ /* Make sure we don't extend when keep_size is set. */
+ if (mode & LX_FALLOC_FL_KEEP_SIZE && vattr.va_size < tot) {
+ if (offset > vattr.va_size)
+ goto done;
+ len = vattr.va_size - offset;
+ }
+
+ bf.l_start = (off64_t)offset;
+ bf.l_len = (off64_t)len;
+ } else {
+ /* We should have already handled all flags */
+ VERIFY(0);
+ }
+
+ /*
+ * Check for locks in the range.
+ */
+ f_offset = fp->f_offset;
+ error = flock_check(vp, &bf, f_offset, MAXOFF_T);
+ if (error != 0)
+ goto done;
+
+ /*
+ * Check for conflicting non-blocking mandatory locks.
+ * We need to get the size again under nbl_start_crit.
+ */
+ if (nbl_need_check(vp)) {
+ u_offset_t begin;
+ ssize_t length;
+
+ nbl_start_crit(vp, RW_READER);
+ in_crit = B_TRUE;
+ vattr.va_mask = AT_SIZE;
+ if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) != 0)
+ goto done;
+
+ /*
+ * Make sure we don't extend when keep_size is set.
+ */
+ if (mode & LX_FALLOC_FL_KEEP_SIZE && vattr.va_size < tot) {
+ ASSERT(mode & (LX_FALLOC_FL_PUNCH_HOLE |
+ LX_FALLOC_FL_ZERO_RANGE));
+
+ /*
+ * If the size grew we can short-circuit the rest of
+ * the work, otherwise adjust bf for the vop_space
+ * call.
+ */
+ if (offset >= vattr.va_size)
+ goto done;
+ len = vattr.va_size - offset;
+ bf.l_len = (off64_t)len;
+ }
+
+ if (offset > vattr.va_size) {
+ begin = vattr.va_size;
+ length = offset - vattr.va_size;
+ } else {
+ begin = offset;
+ length = vattr.va_size - offset;
+ }
+
+ if (nbl_conflict(vp, NBL_WRITE, begin, length, 0, NULL)) {
+ error = EACCES;
+ goto done;
+ }
+ }
+
+ error = VOP_SPACE(vp, F_FREESP, &bf, 0, f_offset, fp->f_cred, NULL);
+
+done:
+ if (in_crit)
+ nbl_end_crit(vp);
+
+ releasef(fd);
+ if (error != 0)
+ return (set_errno(error));
+
+ return (0);
+}
+
+long
+lx_fallocate32(int fd, int mode, uint32_t offl, uint32_t offh, uint32_t lenl,
+ uint32_t lenh)
+{
+ int64_t offset = 0, len = 0;
+
+ /*
+ * From 32-bit callers, Linux passes the 64-bit offset and len by
+ * concatenating consecutive arguments. We must perform the same
+ * conversion here.
+ */
+ offset = offh;
+ offset = offset << 32;
+ offset |= offl;
+ len = lenh;
+ len = len << 32;
+ len |= lenl;
+
+ return (lx_fallocate(fd, mode, (off_t)offset, (off_t)len));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_fcntl.c b/usr/src/uts/common/brand/lx/syscall/lx_fcntl.c
new file mode 100644
index 0000000000..2699b9bac7
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_fcntl.c
@@ -0,0 +1,644 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <sys/systm.h>
+#include <sys/zone.h>
+#include <sys/types.h>
+#include <sys/filio.h>
+#include <sys/fcntl.h>
+#include <sys/stat.h>
+#include <sys/cmn_err.h>
+#include <sys/pathname.h>
+#include <sys/lx_impl.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_fcntl.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_socket.h>
+#include <sys/fs/fifonode.h>
+#include <sys/strsubr.h>
+#include <sys/stream.h>
+
+extern int fcntl(int, int, intptr_t);
+extern int flock_check(vnode_t *, flock64_t *, offset_t, offset_t);
+
+
+int
+lx_vp_at(int fd, char *upath, vnode_t **vpp, int flag)
+{
+ vnode_t *startvp;
+ int error;
+
+ if (fd == LX_AT_FDCWD) {
+ fd = AT_FDCWD;
+ }
+
+ if ((error = fgetstartvp(fd, upath, &startvp)) != 0) {
+ return (error);
+ }
+
+ if (upath != NULL) {
+ uio_seg_t seg = UIO_USERSPACE;
+
+ error = lookupnameat(upath, seg,
+ (flag == AT_SYMLINK_NOFOLLOW) ? NO_FOLLOW : FOLLOW,
+ NULLVPP, vpp, startvp);
+ if (startvp != NULL) {
+ VN_RELE(startvp);
+ }
+ return (error);
+ } else {
+ /* VN_HOLD was established in fgetstartvp */
+ *vpp = startvp;
+ VERIFY(*vpp);
+ return (0);
+ }
+}
+
+#define LTOS_FLOCK(l, s) \
+{ \
+ s->l_type = ltos_type(l->l_type); \
+ s->l_whence = l->l_whence; \
+ s->l_start = l->l_start; \
+ s->l_len = l->l_len; \
+ s->l_sysid = 0; /* not defined in linux */ \
+ s->l_pid = (pid_t)l->l_pid; \
+}
+
+#define STOL_FLOCK(s, l) \
+{ \
+ l->l_type = stol_type(s->l_type); \
+ l->l_whence = s->l_whence; \
+ l->l_start = s->l_start; \
+ l->l_len = s->l_len; \
+ l->l_pid = (int)s->l_pid; \
+}
+
+static short
+ltos_type(short l_type)
+{
+ switch (l_type) {
+ case LX_F_RDLCK:
+ return (F_RDLCK);
+ case LX_F_WRLCK:
+ return (F_WRLCK);
+ case LX_F_UNLCK:
+ return (F_UNLCK);
+ default:
+ return (-1);
+ }
+}
+
+static short
+stol_type(short l_type)
+{
+ switch (l_type) {
+ case F_RDLCK:
+ return (LX_F_RDLCK);
+ case F_WRLCK:
+ return (LX_F_WRLCK);
+ case F_UNLCK:
+ return (LX_F_UNLCK);
+ default:
+ /* can't ever happen */
+ return (0);
+ }
+}
+
+static void
+ltos_flock(struct lx_flock *l, struct flock64 *s)
+{
+ LTOS_FLOCK(l, s)
+}
+
+static void
+stol_flock(struct flock64 *s, struct lx_flock *l)
+{
+ STOL_FLOCK(s, l)
+}
+
+static void
+ltos_flock64(struct lx_flock64_32 *l, struct flock64 *s)
+{
+ LTOS_FLOCK(l, s)
+}
+
+static void
+stol_flock64(struct flock64 *s, struct lx_flock64_32 *l)
+{
+ STOL_FLOCK(s, l)
+}
+
+static int
+lx_fcntl_getfl(int fd)
+{
+ int retval;
+ int rc;
+
+ retval = fcntl(fd, F_GETFL, 0);
+ if (ttolwp(curthread)->lwp_errno != 0)
+ return (ttolwp(curthread)->lwp_errno);
+
+ if ((retval & O_ACCMODE) == O_RDONLY)
+ rc = LX_O_RDONLY;
+ else if ((retval & O_ACCMODE) == O_WRONLY)
+ rc = LX_O_WRONLY;
+ else
+ rc = LX_O_RDWR;
+ /* O_NDELAY != O_NONBLOCK, so we need to check for both */
+ if (retval & O_NDELAY)
+ rc |= LX_O_NDELAY;
+ if (retval & O_NONBLOCK)
+ rc |= LX_O_NONBLOCK;
+ if (retval & O_APPEND)
+ rc |= LX_O_APPEND;
+ if (retval & O_SYNC)
+ rc |= LX_O_SYNC;
+ if (retval & O_LARGEFILE)
+ rc |= LX_O_LARGEFILE;
+ if (retval & FASYNC)
+ rc |= LX_O_ASYNC;
+
+ return (rc);
+}
+
+static int
+lx_fcntl_setfl(int fd, ulong_t arg)
+{
+ int new_arg;
+
+ new_arg = 0;
+ /* LX_O_NDELAY == LX_O_NONBLOCK, so we only check for one */
+ if (arg & LX_O_NDELAY)
+ new_arg |= O_NONBLOCK;
+ if (arg & LX_O_APPEND)
+ new_arg |= O_APPEND;
+ if (arg & LX_O_SYNC)
+ new_arg |= O_SYNC;
+ if (arg & LX_O_LARGEFILE)
+ new_arg |= O_LARGEFILE;
+ if (arg & LX_O_ASYNC)
+ new_arg |= FASYNC;
+
+ return (fcntl(fd, F_SETFL, new_arg));
+}
+
+/* The default unprivileged limit in Linux is 1MB */
+static int lx_pipe_max_size = 1048576;
+
+static int
+lx_fcntl_pipesz(int fd, int cmd, ulong_t arg)
+{
+ file_t *fp;
+ vnode_t *vp;
+ stdata_t *str;
+ int err = 0, res = 0;
+
+ if ((fp = getf(fd)) == NULL) {
+ return (set_errno(EBADF));
+ }
+ vp = fp->f_vnode;
+ if (vp->v_type != VFIFO || vp->v_op != fifo_vnodeops) {
+ err = EBADF;
+ goto out;
+ }
+ VERIFY((str = vp->v_stream) != NULL);
+
+ if (cmd == LX_F_SETPIPE_SZ) {
+ stdata_t *mate;
+ intptr_t val = arg;
+
+ if (val < PAGESIZE || val > lx_pipe_max_size) {
+ err = EINVAL;
+ goto out;
+ }
+ if (!STRMATED(str)) {
+ err = strqset(RD(str->sd_wrq), QHIWAT, 0, val);
+ goto out;
+ }
+
+ /*
+ * Ensure consistent order so the set operation is always
+ * attempted on the "higher" stream first.
+ */
+ if (str > str->sd_mate) {
+ VERIFY((mate = str->sd_mate) != NULL);
+ } else {
+ mate = str;
+ VERIFY((str = mate->sd_mate) != NULL);
+ }
+
+ /*
+ * While it is unfortunate that an error could occur for the
+ * latter half of the stream pair, there is little to be done
+ * about it aside from reporting the failure.
+ */
+ if ((err = strqset(RD(str->sd_wrq), QHIWAT, 0, val)) != 0) {
+ goto out;
+ }
+ err = strqset(RD(mate->sd_wrq), QHIWAT, 0, val);
+ } else if (cmd == LX_F_GETPIPE_SZ) {
+ size_t val;
+
+ err = strqget(RD(str->sd_wrq), QHIWAT, 0, &val);
+ res = val;
+ } else {
+ /* NOTREACHED */
+ ASSERT(0);
+ }
+
+out:
+ releasef(fd);
+ if (err != 0) {
+ return (set_errno(err));
+ }
+ return (res);
+}
+
+static int
+lx_fcntl_common(int fd, int cmd, ulong_t arg)
+{
+ int rc = 0;
+ pid_t pid;
+ int error;
+ int rv;
+ int32_t flag;
+ file_t *fp;
+
+ /*
+ * We depend on the call to fcntl to set the errno if necessary.
+ */
+ ttolwp(curthread)->lwp_errno = 0;
+
+ switch (cmd) {
+ case LX_F_SETSIG:
+ case LX_F_GETSIG:
+ case LX_F_SETLEASE:
+ case LX_F_GETLEASE:
+ case LX_F_NOTIFY:
+ case LX_F_CANCELLK:
+ {
+ char buf[80];
+
+ (void) snprintf(buf, sizeof (buf),
+ "unsupported fcntl command: %d", cmd);
+ lx_unsupported(buf);
+ }
+ return (set_errno(ENOTSUP));
+
+ case LX_F_DUPFD:
+ rc = fcntl(fd, F_DUPFD, arg);
+ break;
+
+ case LX_F_DUPFD_CLOEXEC:
+ rc = fcntl(fd, F_DUPFD_CLOEXEC, arg);
+ break;
+
+ case LX_F_GETFD:
+ rc = fcntl(fd, F_GETFD, 0);
+ break;
+
+ case LX_F_SETFD:
+ rc = fcntl(fd, F_SETFD, arg);
+ break;
+
+ case LX_F_GETFL:
+ rc = lx_fcntl_getfl(fd);
+ break;
+
+ case LX_F_SETFL:
+ rc = lx_fcntl_setfl(fd, arg);
+ break;
+
+ case LX_F_SETOWN:
+ pid = (pid_t)arg;
+ if (pid == 1) {
+ /* Setown for the init process uses the real pid. */
+ pid = curzone->zone_proc_initpid;
+ }
+
+ if ((fp = getf(fd)) == NULL)
+ return (set_errno(EBADF));
+
+ rv = 0;
+
+ flag = fp->f_flag | get_udatamodel() | FKIOCTL;
+ error = VOP_IOCTL(fp->f_vnode, FIOSETOWN, (intptr_t)&pid,
+ flag, CRED(), &rv, NULL);
+ releasef(fd);
+ if (error != 0) {
+ /*
+ * On illumos F_SETOWN is only defined for sockets, but
+ * some apps hardcode to do this fcntl on other devices
+ * (e.g. /dev/tty) to setup signal handling. If the
+ * app is only setting itself to be the signal
+ * handler, we pretend to succeed.
+ */
+ if (error != EINVAL ||
+ curthread->t_procp->p_pid != pid) {
+ return (set_errno(error));
+ }
+ }
+
+ rc = 0;
+ break;
+
+ case LX_F_GETOWN:
+ if ((fp = getf(fd)) == NULL)
+ return (set_errno(EBADF));
+
+ rv = 0;
+
+ flag = fp->f_flag | get_udatamodel() | FKIOCTL;
+ error = VOP_IOCTL(fp->f_vnode, FIOGETOWN, (intptr_t)&pid,
+ flag, CRED(), &rv, NULL);
+ releasef(fd);
+ if (error != 0)
+ return (set_errno(error));
+
+ if (pid == curzone->zone_proc_initpid) {
+ /* Getown for the init process returns 1. */
+ pid = 1;
+ }
+
+ rc = pid;
+ break;
+
+ case LX_F_SETPIPE_SZ:
+ case LX_F_GETPIPE_SZ:
+ rc = lx_fcntl_pipesz(fd, cmd, arg);
+ break;
+
+ default:
+ return (set_errno(EINVAL));
+ }
+
+ return (rc);
+}
+
+static int
+lx_fcntl_lock_cmd_to_s(int lx_cmd)
+{
+ switch (lx_cmd) {
+ case LX_F_GETLK:
+ return (F_GETLK);
+ case LX_F_SETLK:
+ return (F_SETLK);
+ case LX_F_SETLKW:
+ return (F_SETLKW);
+ case LX_F_GETLK64:
+ return (F_GETLK64);
+ case LX_F_SETLK64:
+ return (F_SETLK64);
+ case LX_F_SETLKW64:
+ return (F_SETLKW64);
+ default:
+ VERIFY(0);
+ /*NOTREACHED*/
+ return (0);
+ }
+}
+
+/*
+ * This is a pain but we can't re-use the fcntl code for locking since it does
+ * its own copyin/copyout for the flock struct. Since we have to convert the
+ * struct we have to do our own copyin/out. Thus we replicate the fcntl code for
+ * these 3 cmds. Luckily it's not much.
+ */
+static int
+lx_fcntl_lock(int fd, int lx_cmd, void *arg)
+{
+ int cmd;
+ int error = 0;
+ file_t *fp;
+ vnode_t *vp;
+ int flag;
+ offset_t maxoffset;
+ u_offset_t offset;
+ model_t datamodel;
+ lx_flock_t lxflk;
+ lx_flock64_32_t lxflk64;
+ struct flock64 bf;
+
+ if ((fp = getf(fd)) == NULL)
+ return (set_errno(EBADF));
+
+ maxoffset = MAXOFF_T;
+ datamodel = DATAMODEL_NATIVE;
+#if defined(_SYSCALL32_IMPL)
+ if ((datamodel = get_udatamodel()) == DATAMODEL_ILP32)
+ maxoffset = MAXOFF32_T;
+#endif
+ vp = fp->f_vnode;
+ flag = fp->f_flag;
+ offset = fp->f_offset;
+
+ cmd = lx_fcntl_lock_cmd_to_s(lx_cmd);
+
+ switch (cmd) {
+ case F_GETLK:
+ case F_SETLK:
+ case F_SETLKW:
+ if (datamodel == DATAMODEL_NATIVE) {
+ if (copyin(arg, &lxflk, sizeof (lx_flock_t)) != 0) {
+ error = EFAULT;
+ break;
+ }
+ }
+#if defined(_SYSCALL32_IMPL)
+ else {
+ lx_flock32_t lxflk32;
+
+ if (copyin(arg, &lxflk32, sizeof (lxflk32)) != 0) {
+ error = EFAULT;
+ break;
+ }
+
+ lxflk.l_type = lxflk32.l_type;
+ lxflk.l_whence = lxflk32.l_whence;
+ lxflk.l_start = (off64_t)lxflk32.l_start;
+ lxflk.l_len = (off64_t)lxflk32.l_len;
+ lxflk.l_pid = lxflk32.l_pid;
+ }
+#endif /* _SYSCALL32_IMPL */
+
+ ltos_flock(&lxflk, &bf);
+
+ if ((error = flock_check(vp, &bf, offset, maxoffset)) != 0)
+ break;
+
+ if ((error = VOP_FRLOCK(vp, cmd, &bf, flag, offset, NULL,
+ fp->f_cred, NULL)) != 0)
+ break;
+
+ if (cmd != F_GETLK)
+ break;
+
+ /*
+ * The command is GETLK, return result.
+ */
+ stol_flock(&bf, &lxflk);
+
+ /*
+ * If no lock is found, only the type field is changed.
+ */
+ if (lxflk.l_type == LX_F_UNLCK) {
+ /* l_type always first entry, always a short */
+ if (copyout(&lxflk.l_type, &((lx_flock_t *)arg)->l_type,
+ sizeof (lxflk.l_type)))
+ error = EFAULT;
+ break;
+ }
+
+ if (bf.l_start > maxoffset || bf.l_len > maxoffset) {
+ error = EOVERFLOW;
+ break;
+ }
+
+ if (datamodel == DATAMODEL_NATIVE) {
+ if (copyout(&lxflk, arg, sizeof (lxflk)) != 0) {
+ error = EFAULT;
+ break;
+ }
+ }
+#if defined(_SYSCALL32_IMPL)
+ else {
+ lx_flock32_t lxflk32;
+
+ if (bf.l_start > MAXOFF32_T || bf.l_len > MAXOFF32_T) {
+ error = EOVERFLOW;
+ break;
+ }
+
+ lxflk32.l_type = lxflk.l_type;
+ lxflk32.l_whence = lxflk.l_whence;
+ lxflk32.l_start = lxflk.l_start;
+ lxflk32.l_len = lxflk.l_len;
+ lxflk32.l_pid = lxflk.l_pid;
+
+ if (copyout(&lxflk32, arg, sizeof (lxflk32)) != 0) {
+ error = EFAULT;
+ break;
+ }
+ }
+#endif /* _SYSCALL32_IMPL */
+ break;
+
+ case F_GETLK64:
+ case F_SETLK64:
+ case F_SETLKW64:
+ /*
+ * Large File support is only used for ILP32 apps.
+ */
+ if (datamodel != DATAMODEL_ILP32) {
+ error = EINVAL;
+ break;
+ }
+
+ if (cmd == F_GETLK64)
+ cmd = F_GETLK;
+ else if (cmd == F_SETLK64)
+ cmd = F_SETLK;
+ else if (cmd == F_SETLKW64)
+ cmd = F_SETLKW;
+
+ if (copyin(arg, &lxflk64, sizeof (lxflk64)) != 0) {
+ error = EFAULT;
+ break;
+ }
+
+ ltos_flock64(&lxflk64, &bf);
+
+ if ((error = flock_check(vp, &bf, offset, MAXOFFSET_T)) != 0)
+ break;
+
+ if ((error = VOP_FRLOCK(vp, cmd, &bf, flag, offset, NULL,
+ fp->f_cred, NULL)) != 0)
+ break;
+
+ if (cmd != F_GETLK)
+ break;
+
+ /*
+ * The command is GETLK, return result.
+ */
+ stol_flock64(&bf, &lxflk64);
+
+ /*
+ * If no lock is found, only the type field is changed.
+ */
+ if (lxflk64.l_type == LX_F_UNLCK) {
+ /* l_type always first entry, always a short */
+ if (copyout(&lxflk64.l_type,
+ &((lx_flock64_t *)arg)->l_type,
+ sizeof (lxflk64.l_type)))
+ error = EFAULT;
+ break;
+ }
+
+ if (bf.l_start > maxoffset || bf.l_len > maxoffset) {
+ error = EOVERFLOW;
+ break;
+ }
+
+ if (copyout(&lxflk64, arg, sizeof (lxflk64)) != 0) {
+ error = EFAULT;
+ break;
+ }
+ break;
+ }
+
+ releasef(fd);
+ if (error)
+ return (set_errno(error));
+
+ return (0);
+}
+
+long
+lx_fcntl(int fd, int cmd, intptr_t arg)
+{
+ switch (cmd) {
+ case LX_F_GETLK64:
+ case LX_F_SETLK64:
+ case LX_F_SETLKW64:
+ /* The 64-bit fcntl commands must go through fcntl64(). */
+ return (set_errno(EINVAL));
+
+ case LX_F_GETLK:
+ case LX_F_SETLK:
+ case LX_F_SETLKW:
+ return (lx_fcntl_lock(fd, cmd, (void *)arg));
+
+ default:
+ return (lx_fcntl_common(fd, cmd, arg));
+ }
+}
+
+long
+lx_fcntl64(int fd, int cmd, intptr_t arg)
+{
+ switch (cmd) {
+ case LX_F_GETLK:
+ case LX_F_SETLK:
+ case LX_F_SETLKW:
+ case LX_F_GETLK64:
+ case LX_F_SETLKW64:
+ case LX_F_SETLK64:
+ return (lx_fcntl_lock(fd, cmd, (void *)arg));
+
+ default:
+ return (lx_fcntl_common(fd, cmd, (ulong_t)arg));
+ }
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_futex.c b/usr/src/uts/common/brand/lx/syscall/lx_futex.c
new file mode 100644
index 0000000000..e7648e1fc3
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_futex.c
@@ -0,0 +1,1104 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/debug.h>
+#include <vm/as.h>
+#include <vm/seg.h>
+#include <vm/seg_vn.h>
+#include <vm/page.h>
+#include <sys/priv.h>
+#include <sys/mman.h>
+#include <sys/timer.h>
+#include <sys/condvar.h>
+#include <sys/inttypes.h>
+#include <sys/cmn_err.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_futex.h>
+#include <sys/lx_impl.h>
+
+/*
+ * Futexes are a Linux-specific implementation of inter-process mutexes.
+ * They are designed to use shared memory for simple, uncontested
+ * operations, and rely on the kernel to resolve any contention issues.
+ *
+ * Most of the information in this section comes from the paper "Futexes
+ * Are Tricky", by Ulrich Drepper. This paper is currently available at:
+ * http://people.redhat.com/~drepper/futex.pdf.
+ *
+ * A futex itself a 4-byte integer, which must be 4-byte aligned. The
+ * value of this integer is expected to be modified using user-level atomic
+ * operations. The futex(4) design itself does not impose any semantic
+ * constraints on the value stored in the futex; it is up to the
+ * application to define its own protocol.
+ *
+ * When the application decides that kernel intervention is required, it
+ * will use the futex(2) system call. There are 5 different operations
+ * that can be performed on a futex, using this system call. Since this
+ * interface has evolved over time, there are several different prototypes
+ * available to the user. Fortunately, there is only a single kernel-level
+ * interface:
+ *
+ * long sys_futex(void *futex1, int cmd, int val1,
+ * struct timespec *timeout, void *futex2, int val2)
+ *
+ * The kernel-level operations that may be performed on a futex are:
+ *
+ * FUTEX_WAIT
+ *
+ * Atomically verify that futex1 contains the value val1. If it
+ * doesn't, return EWOULDBLOCK. If it does contain the expected
+ * value, the thread will sleep until somebody performs a FUTEX_WAKE
+ * on the futex. The caller may also specify a timeout, indicating
+ * the maximum time the thread should sleep. If the timer expires,
+ * the call returns ETIMEDOUT. If the thread is awoken with a signal,
+ * the call returns EINTR. Otherwise, the call returns 0.
+ *
+ * FUTEX_WAKE
+ *
+ * Wake up val1 processes that are waiting on futex1. The call
+ * returns the number of blocked threads that were woken up.
+ *
+ * FUTEX_WAIT_BITSET/FUTEX_WAKE_BITSET
+ *
+ * Similar to FUTEX_WAIT/FUTEX_WAKE, but each takes an additional argument
+ * denoting a bit vector, with wakers will only waking waiters that match
+ * in one or more bits. These semantics are dubious enough, but the
+ * interface has an inconsistency that is glaring even by the
+ * embarrassingly low standards that Linux sets for itself: the timeout
+ * argument to FUTEX_WAIT_BITSET is absolute, not relative as it is for
+ * FUTEX_WAIT. And as if that weren't enough unnecessary complexity,
+ * the caller may specify this absolute timeout to be against either
+ * CLOCK_MONOTONIC or CLOCK_REALTIME -- but only for FUTEX_WAIT_BITSET,
+ * of course!
+ *
+ * FUTEX_WAKE_OP
+ *
+ * The implementation of a conditional variable in terms of futexes
+ * actually uses two futexes: one to assure sequential access and one to
+ * represent the condition variable. This implementation gives rise to a
+ * particular performance problem whereby a thread is awoken on the futex
+ * that represents the condition variable only to have to (potentially)
+ * immediately wait on the futex that protects the condition variable.
+ * (Do not confuse the futex that serves to protect the condition variable
+ * with the pthread_mutex_t associated with pthread_cond_t -- which
+ * represents a third futex.) To (over)solve this problem, FUTEX_WAKE_OP
+ * was invented, which performs an atomic compare-and-exchange on a
+ * second address in a specified fashion (that is, with a specified
+ * operation). Here are the possible operations (OPARG is defined
+ * to be 12 bit value embedded in the operation):
+ *
+ * - FUTEX_OP_SET: Sets the value at the second address to OPARG
+ * - FUTEX_OP_ADD: Adds the value to OPARG
+ * - FUTEX_OP_OR: OR's the value with OPARG
+ * - FUTEX_OP_ANDN: Performs a negated AND of the value with OPARG
+ * - FUTEX_OP_XOR: XOR's the value with OPARG
+ *
+ * After this compare-and-exchange on the second address, a FUTEX_WAKE is
+ * performed on the first address and -- if the compare-and-exchange
+ * matches a specified result based on a specified comparison operation --
+ * a FUTEX_WAKE is performed on the second address. Here are the possible
+ * comparison operations:
+ *
+ * - FUTEX_OP_CMP_EQ: If old value is CMPARG, wake
+ * - FUTEX_OP_CMP_NE: If old value is not equal to CMPARG, wake
+ * - FUTEX_OP_CMP_LT: If old value is less than CMPARG, wake
+ * - FUTEX_OP_CMP_LE: If old value is less than or equal to CMPARG, wake
+ * - FUTEX_OP_CMP_GT: If old value is greater than CMPARG, wake
+ * - FUTEX_OP_CMP_GE: If old value is greater than or equal to CMPARG, wake
+ *
+ * As a practical matter, the only way that this is used (or, some might
+ * argue, is usable) is by the implementation of pthread_cond_signal(),
+ * which uses FUTEX_WAKE_OP to -- in a single system call -- unlock the
+ * futex that protects the condition variable and wake the futex that
+ * represents the condition variable. The second wake-up is conditional
+ * because the futex that protects the condition variable (rather than the
+ * one that represents it) may or may not have waiters. Given that this
+ * is the use case, FUTEX_WAKE_OP is falsely generic: despite allowing for
+ * five different kinds of operations and six different kinds of
+ * comparision operations, in practice only one is used. (Namely, setting
+ * to 0 and waking if the old value is greater than 1 -- which denotes
+ * that waiters are present and the wakeup should be performed.) Moreover,
+ * because FUTEX_WAKE_OP does not (and cannot) optimize anything in the
+ * case that the pthread_mutex_t associated with the pthread_cond_t is
+ * held at the time of a pthread_cond_signal(), this entire mechanism is
+ * essentially for naught in this case. As one can imagine (and can
+ * verify on just about any source base that uses pthread_cond_signal()),
+ * it is overwhelmingly the common case that the lock associated with the
+ * pthread_cond_t is held at the time of pthread_cond_signal(), assuring
+ * that the problem that all of this complexity was designed to solve
+ * isn't, in fact, solved because the signalled thread simply wakes up
+ * only to block again on the held mutex. Cue a slow clap!
+ *
+ * FUTEX_CMP_REQUEUE
+ *
+ * If the value stored in futex1 matches that passed in in val2, wake
+ * up val1 processes that are waiting on futex1. Otherwise, return
+ * EAGAIN.
+ *
+ * If there are more than val1 threads waiting on the futex, remove
+ * the remaining threads from this futex, and requeue them on futex2.
+ * The caller can limit the number of threads being requeued by
+ * encoding an integral numerical value in the position usually used
+ * for the timeout pointer.
+ *
+ * The call returns the number of blocked threads that were woken up
+ * or requeued.
+ *
+ * FUTEX_REQUEUE
+ *
+ * Identical to FUTEX_CMP_REQUEUE except that it does not use val2.
+ * This command has been declared broken and obsolete, but we still
+ * need to support it.
+ *
+ * FUTEX_FD
+ *
+ * Return a file descriptor, which can be used to refer to the futex.
+ * This operation was broken by design, and was blessedly removed in
+ * Linux 2.6.26 ("because it was inherently racy"); it should go without
+ * saying that we don't support this operation.
+ */
+
+/*
+ * This structure is used to track all the threads currently waiting on a
+ * futex. There is one fwaiter_t for each blocked thread. We store all
+ * fwaiter_t's in a hash structure, indexed by the memid_t of the integer
+ * containing the futex's value.
+ *
+ * At the moment, all fwaiter_t's for a single futex are simply dumped into
+ * the hash bucket. If futex contention ever becomes a hot path, we can
+ * chain a single futex's waiters together.
+ */
+typedef struct fwaiter {
+ memid_t fw_memid; /* memid of the user-space futex */
+ kcondvar_t fw_cv; /* cond var */
+ struct fwaiter *fw_next; /* hash queue */
+ struct fwaiter *fw_prev; /* hash queue */
+ uint32_t fw_bits; /* bits waiting on */
+ volatile int fw_woken;
+} fwaiter_t;
+
+/*
+ * The structure of the robust_list, as set with the set_robust_list() system
+ * call. See lx_futex_robust_exit(), below, for details.
+ */
+typedef struct futex_robust_list {
+ uintptr_t frl_head; /* list of robust locks held */
+ uint64_t frl_offset; /* offset of lock word within a lock */
+ uintptr_t frl_pending; /* pending operation */
+} futex_robust_list_t;
+
+#if defined(_SYSCALL32_IMPL)
+
+#pragma pack(4)
+typedef struct futex_robust_list32 {
+ uint32_t frl_head; /* list of robust locks held */
+ uint32_t frl_offset; /* offset of lock word within a lock */
+ uint32_t frl_pending; /* pending operation */
+} futex_robust_list32_t;
+#pragma pack()
+
+#endif
+
+#define MEMID_COPY(s, d) \
+ { (d)->val[0] = (s)->val[0]; (d)->val[1] = (s)->val[1]; }
+#define MEMID_EQUAL(s, d) \
+ ((d)->val[0] == (s)->val[0] && (d)->val[1] == (s)->val[1])
+
+/*
+ * Because collisions on this hash table can be a source of negative
+ * scalability, we make it pretty large: 4,096 entries -- 64K. If this
+ * size is found to be insufficient, the size should be made dynamic.
+ * (Making it dynamic will be delicate because the per-chain locking will
+ * necessitate memory retiring or similar; see the 2008 ACM Queue article
+ * "Real-world concurrency" for details on this technique.)
+ */
+#define HASH_SHIFT_SZ 12
+#define HASH_SIZE (1 << HASH_SHIFT_SZ)
+#define HASH_FUNC(id) \
+ ((((uintptr_t)((id)->val[1]) >> 3) + \
+ ((uintptr_t)((id)->val[1]) >> (3 + HASH_SHIFT_SZ)) + \
+ ((uintptr_t)((id)->val[1]) >> (3 + 2 * HASH_SHIFT_SZ)) + \
+ ((uintptr_t)((id)->val[0]) >> 3) + \
+ ((uintptr_t)((id)->val[0]) >> (3 + HASH_SHIFT_SZ)) + \
+ ((uintptr_t)((id)->val[0]) >> (3 + 2 * HASH_SHIFT_SZ))) & \
+ (HASH_SIZE - 1))
+
+/*
+ * We place the per-chain lock next to the pointer to the chain itself.
+ * When compared to an array of orthogonal locks, this reduces false sharing
+ * (though adjacent entries can still be falsely shared -- just not as many),
+ * while having the additional bonus of increasing locality.
+ */
+typedef struct futex_hash {
+ kmutex_t fh_lock;
+ fwaiter_t *fh_waiters;
+} futex_hash_t;
+
+static futex_hash_t futex_hash[HASH_SIZE];
+
+static void
+futex_hashin(fwaiter_t *fwp)
+{
+ int index;
+
+ index = HASH_FUNC(&fwp->fw_memid);
+ ASSERT(MUTEX_HELD(&futex_hash[index].fh_lock));
+
+ fwp->fw_prev = NULL;
+ fwp->fw_next = futex_hash[index].fh_waiters;
+ if (fwp->fw_next)
+ fwp->fw_next->fw_prev = fwp;
+ futex_hash[index].fh_waiters = fwp;
+}
+
+static void
+futex_hashout(fwaiter_t *fwp)
+{
+ int index;
+
+ index = HASH_FUNC(&fwp->fw_memid);
+ ASSERT(MUTEX_HELD(&futex_hash[index].fh_lock));
+
+ if (fwp->fw_prev)
+ fwp->fw_prev->fw_next = fwp->fw_next;
+ if (fwp->fw_next)
+ fwp->fw_next->fw_prev = fwp->fw_prev;
+ if (futex_hash[index].fh_waiters == fwp)
+ futex_hash[index].fh_waiters = fwp->fw_next;
+
+ fwp->fw_prev = NULL;
+ fwp->fw_next = NULL;
+}
+
+/*
+ * Go to sleep until somebody does a WAKE operation on this futex, we get a
+ * signal, or the timeout expires.
+ */
+static int
+futex_wait(memid_t *memid, caddr_t addr,
+ int val, timespec_t *timeout, uint32_t bits)
+{
+ kthread_t *t = curthread;
+ int err, ret;
+ int32_t curval;
+ fwaiter_t fw;
+ int index;
+
+ /*
+ * The LMS_USER_LOCK micro state becomes valid if we sleep; otherwise
+ * our time will accrue against LMS_SYSTEM. Use of this micro state
+ * is modelled on lwp_mutex_timedlock(), a native analogue of
+ * futex_wait().
+ */
+ (void) new_mstate(t, LMS_USER_LOCK);
+
+ fw.fw_woken = 0;
+ fw.fw_bits = bits;
+
+ MEMID_COPY(memid, &fw.fw_memid);
+ cv_init(&fw.fw_cv, NULL, CV_DEFAULT, NULL);
+
+ index = HASH_FUNC(&fw.fw_memid);
+ mutex_enter(&futex_hash[index].fh_lock);
+
+ if (fuword32(addr, (uint32_t *)&curval)) {
+ err = set_errno(EFAULT);
+ goto out;
+ }
+ if (curval != val) {
+ err = set_errno(EWOULDBLOCK);
+ goto out;
+ }
+
+ futex_hashin(&fw);
+
+ err = 0;
+ while ((fw.fw_woken == 0) && (err == 0)) {
+ ret = cv_waituntil_sig(&fw.fw_cv, &futex_hash[index].fh_lock,
+ timeout, timechanged);
+ if (ret < 0) {
+ err = set_errno(ETIMEDOUT);
+ } else if (ret == 0) {
+ /*
+ * According to signal(7), a futex(2) call with the
+ * FUTEX_WAIT operation is restartable.
+ */
+ ttolxlwp(t)->br_syscall_restart = B_TRUE;
+ err = set_errno(EINTR);
+ }
+ }
+
+ /*
+ * The futex is normally hashed out in wakeup. If we timed out or
+ * got a signal, we need to hash it out here instead.
+ */
+ if (fw.fw_woken == 0)
+ futex_hashout(&fw);
+
+out:
+ mutex_exit(&futex_hash[index].fh_lock);
+
+ return (err);
+}
+
+/*
+ * Wake up to wake_threads threads that are blocked on the futex at memid.
+ */
+static int
+futex_wake(memid_t *memid, int wake_threads, uint32_t mask)
+{
+ fwaiter_t *fwp, *next;
+ int index;
+ int ret = 0;
+
+ index = HASH_FUNC(memid);
+
+ mutex_enter(&futex_hash[index].fh_lock);
+
+ for (fwp = futex_hash[index].fh_waiters;
+ fwp != NULL && ret < wake_threads; fwp = next) {
+ next = fwp->fw_next;
+ if (MEMID_EQUAL(&fwp->fw_memid, memid) &&
+ (fwp->fw_bits & mask)) {
+ futex_hashout(fwp);
+ fwp->fw_woken = 1;
+ cv_signal(&fwp->fw_cv);
+ ret++;
+ }
+ }
+
+ mutex_exit(&futex_hash[index].fh_lock);
+
+ return (ret);
+}
+
+static int
+futex_wake_op_execute(int32_t *addr, int32_t val3)
+{
+ int32_t op = FUTEX_OP_OP(val3);
+ int32_t cmp = FUTEX_OP_CMP(val3);
+ int32_t cmparg = FUTEX_OP_CMPARG(val3);
+ int32_t oparg, oldval, newval;
+ label_t ljb;
+ int rval;
+
+ if ((uintptr_t)addr >= KERNELBASE)
+ return (set_errno(EFAULT));
+
+ if (on_fault(&ljb))
+ return (set_errno(EFAULT));
+
+ oparg = FUTEX_OP_OPARG(val3);
+
+ do {
+ oldval = *addr;
+ newval = oparg;
+
+ switch (op) {
+ case FUTEX_OP_SET:
+ break;
+
+ case FUTEX_OP_ADD:
+ newval += oparg;
+ break;
+
+ case FUTEX_OP_OR:
+ newval |= oparg;
+ break;
+
+ case FUTEX_OP_ANDN:
+ newval &= ~oparg;
+ break;
+
+ case FUTEX_OP_XOR:
+ newval ^= oparg;
+ break;
+
+ default:
+ no_fault();
+ return (set_errno(EINVAL));
+ }
+ } while (atomic_cas_32((uint32_t *)addr, oldval, newval) != oldval);
+
+ no_fault();
+
+ switch (cmp) {
+ case FUTEX_OP_CMP_EQ:
+ rval = (oldval == cmparg);
+ break;
+
+ case FUTEX_OP_CMP_NE:
+ rval = (oldval != cmparg);
+ break;
+
+ case FUTEX_OP_CMP_LT:
+ rval = (oldval < cmparg);
+ break;
+
+ case FUTEX_OP_CMP_LE:
+ rval = (oldval <= cmparg);
+ break;
+
+ case FUTEX_OP_CMP_GT:
+ rval = (oldval > cmparg);
+ break;
+
+ case FUTEX_OP_CMP_GE:
+ rval = (oldval >= cmparg);
+ break;
+
+ default:
+ return (set_errno(EINVAL));
+ }
+
+ return (rval);
+}
+
+static int
+futex_wake_op(memid_t *memid, caddr_t addr2, memid_t *memid2,
+ int wake_threads, int wake_threads2, int val3)
+{
+ kmutex_t *l1, *l2;
+ int ret = 0, ret2 = 0, wake;
+ fwaiter_t *fwp, *next;
+ int index1, index2;
+
+ index1 = HASH_FUNC(memid);
+ index2 = HASH_FUNC(memid2);
+
+ if (index1 == index2) {
+ l1 = &futex_hash[index1].fh_lock;
+ l2 = NULL;
+ } else if (index1 < index2) {
+ l1 = &futex_hash[index1].fh_lock;
+ l2 = &futex_hash[index2].fh_lock;
+ } else {
+ l1 = &futex_hash[index2].fh_lock;
+ l2 = &futex_hash[index1].fh_lock;
+ }
+
+ mutex_enter(l1);
+ if (l2 != NULL)
+ mutex_enter(l2);
+
+ /* LINTED: alignment */
+ if ((wake = futex_wake_op_execute((int32_t *)addr2, val3)) < 0)
+ goto out;
+
+ for (fwp = futex_hash[index1].fh_waiters; fwp != NULL; fwp = next) {
+ next = fwp->fw_next;
+ if (!MEMID_EQUAL(&fwp->fw_memid, memid))
+ continue;
+
+ futex_hashout(fwp);
+ fwp->fw_woken = 1;
+ cv_signal(&fwp->fw_cv);
+ if (++ret >= wake_threads) {
+ break;
+ }
+ }
+
+ if (!wake)
+ goto out;
+
+ for (fwp = futex_hash[index2].fh_waiters; fwp != NULL; fwp = next) {
+ next = fwp->fw_next;
+ if (!MEMID_EQUAL(&fwp->fw_memid, memid2))
+ continue;
+
+ futex_hashout(fwp);
+ fwp->fw_woken = 1;
+ cv_signal(&fwp->fw_cv);
+ if (++ret2 >= wake_threads2) {
+ break;
+ }
+ }
+
+ ret += ret2;
+out:
+ if (l2 != NULL)
+ mutex_exit(l2);
+ mutex_exit(l1);
+
+ return (ret);
+}
+
+/*
+ * Wake up to wake_threads waiting on the futex at memid. If there are
+ * more than that many threads waiting, requeue the remaining threads on
+ * the futex at requeue_memid.
+ */
+static int
+futex_requeue(memid_t *memid, memid_t *requeue_memid, int wake_threads,
+ ulong_t requeue_threads, caddr_t addr, int *cmpval)
+{
+ fwaiter_t *fwp, *next;
+ int index1, index2;
+ int ret = 0;
+ int32_t curval;
+ kmutex_t *l1, *l2;
+
+ /*
+ * To ensure that we don't miss a wakeup if the value of cmpval
+ * changes, we need to grab locks on both the original and new hash
+ * buckets. To avoid deadlock, we always grab the lower-indexed
+ * lock first.
+ */
+ index1 = HASH_FUNC(memid);
+ index2 = HASH_FUNC(requeue_memid);
+
+ if (index1 == index2) {
+ l1 = &futex_hash[index1].fh_lock;
+ l2 = NULL;
+ } else if (index1 < index2) {
+ l1 = &futex_hash[index1].fh_lock;
+ l2 = &futex_hash[index2].fh_lock;
+ } else {
+ l1 = &futex_hash[index2].fh_lock;
+ l2 = &futex_hash[index1].fh_lock;
+ }
+
+ mutex_enter(l1);
+ if (l2 != NULL)
+ mutex_enter(l2);
+
+ if (cmpval != NULL) {
+ if (fuword32(addr, (uint32_t *)&curval)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ if (curval != *cmpval) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ }
+
+ for (fwp = futex_hash[index1].fh_waiters; fwp != NULL; fwp = next) {
+ next = fwp->fw_next;
+ if (!MEMID_EQUAL(&fwp->fw_memid, memid))
+ continue;
+
+ futex_hashout(fwp);
+ if (ret++ < wake_threads) {
+ fwp->fw_woken = 1;
+ cv_signal(&fwp->fw_cv);
+ } else {
+ MEMID_COPY(requeue_memid, &fwp->fw_memid);
+ futex_hashin(fwp);
+
+ if ((ret - wake_threads) >= requeue_threads)
+ break;
+ }
+ }
+
+out:
+ if (l2 != NULL)
+ mutex_exit(l2);
+ mutex_exit(l1);
+
+ if (ret < 0)
+ return (set_errno(-ret));
+ return (ret);
+}
+
+/*
+ * Copy in the relative timeout provided by the application and convert it
+ * to an absolute timeout. Sadly, this is complicated by the different
+ * timeout of semantics of FUTEX_WAIT vs. FUTEX_WAIT_BITSET. (Yes, you read
+ * that correctly; FUTEX_WAIT and FUTEX_WAIT_BITSET have different timeout
+ * semantics; see the block comment at the top of the file for commentary
+ * on this inanity.)
+ */
+static int
+get_timeout(void *lx_timeout, timestruc_t *timeout, int cmd, int clock)
+{
+ timestruc_t now;
+
+ if (get_udatamodel() == DATAMODEL_NATIVE) {
+ if (copyin(lx_timeout, timeout, sizeof (timestruc_t)))
+ return (EFAULT);
+ }
+#ifdef _SYSCALL32_IMPL
+ else {
+ timestruc32_t timeout32;
+ if (copyin(lx_timeout, &timeout32, sizeof (timestruc32_t)))
+ return (EFAULT);
+ timeout->tv_sec = (time_t)timeout32.tv_sec;
+ timeout->tv_nsec = timeout32.tv_nsec;
+ }
+#endif
+ if (itimerspecfix(timeout))
+ return (EINVAL);
+
+ if (cmd == FUTEX_WAIT) {
+ /*
+ * We've been given a relative time; add it to the current
+ * time to derive an absolute time.
+ */
+ gethrestime(&now);
+ timespecadd(timeout, &now);
+ } else {
+ /*
+ * This is a FUTEX_WAIT_BITSET operation, which (1) specifies
+ * the timeout as an absolute rather than a relative timeout
+ * and (2) allows for different clock types to be specified.
+ * If the clock is CLOCK_REALTIME, we actually have nothing
+ * to do -- but if this is CLOCK_MONOTONIC, we need to convert
+ * our absolute time back into a relative time and then add
+ * it to our current hrestime to get an absolute CLOCK_REALTIME
+ * timeout.
+ */
+ if (clock == CLOCK_MONOTONIC) {
+ /*
+ * Get our current time, and subtract it from our
+ * timeout to get the relative value.
+ */
+ hrt2ts(gethrtime(), &now);
+ timespecsub(timeout, &now);
+
+ /*
+ * If our timeout is in the past, set it to be 0.
+ */
+ if (timeout->tv_sec < 0) {
+ timeout->tv_sec = 0;
+ timeout->tv_nsec = 0;
+ }
+
+ /*
+ * Add the relative time back into the current time.
+ */
+ gethrestime(&now);
+ timespecadd(timeout, &now);
+ }
+ }
+
+ return (0);
+}
+
+long
+lx_futex(uintptr_t addr, int op, int val, uintptr_t lx_timeout,
+ uintptr_t addr2, int val3)
+{
+ struct as *as = curproc->p_as;
+ memid_t memid, memid2;
+ timestruc_t timeout;
+ timestruc_t *tptr = NULL;
+ int val2 = NULL;
+ int rval = 0;
+ int cmd = op & FUTEX_CMD_MASK;
+ int private = op & FUTEX_PRIVATE_FLAG;
+ char dmsg[32];
+
+ /* must be aligned on int boundary */
+ if (addr & 0x3)
+ return (set_errno(EINVAL));
+
+ /* Sanity check the futex command */
+ if (cmd < 0 || cmd > FUTEX_MAX_CMD)
+ return (set_errno(EINVAL));
+
+ if (cmd == FUTEX_FD) {
+ /*
+ * FUTEX_FD was sentenced to death for grievous crimes of
+ * semantics against humanity; it has been ripped out of Linux
+ * and will never be supported by us.
+ */
+ (void) snprintf(dmsg, sizeof (dmsg), "futex 0x%x", cmd);
+ lx_unsupported(dmsg);
+ return (set_errno(ENOSYS));
+ }
+
+ switch (cmd) {
+ case FUTEX_LOCK_PI:
+ case FUTEX_UNLOCK_PI:
+ case FUTEX_TRYLOCK_PI:
+ case FUTEX_WAIT_REQUEUE_PI:
+ case FUTEX_CMP_REQUEUE_PI:
+ /*
+ * These are operations that we don't currently support, but
+ * may well need to in the future. For now, callers need to
+ * deal with these being missing -- but if and as that changes,
+ * they may well need to be implemented.
+ */
+ (void) snprintf(dmsg, sizeof (dmsg), "futex 0x%x", cmd);
+ lx_unsupported(dmsg);
+ return (set_errno(ENOSYS));
+ }
+
+ if ((op & FUTEX_CLOCK_REALTIME) && cmd != FUTEX_WAIT_BITSET) {
+ /*
+ * Linux only allows FUTEX_CLOCK_REALTIME to be set on the
+ * FUTEX_WAIT_BITSET and FUTEX_WAIT_REQUEUE_PI commands.
+ */
+ return (set_errno(ENOSYS));
+ }
+
+ /* Copy in the timeout structure from userspace. */
+ if ((cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_BITSET) &&
+ lx_timeout != NULL) {
+ rval = get_timeout((timespec_t *)lx_timeout, &timeout, cmd,
+ op & FUTEX_CLOCK_REALTIME ? CLOCK_REALTIME :
+ CLOCK_MONOTONIC);
+
+ if (rval != 0)
+ return (set_errno(rval));
+ tptr = &timeout;
+ }
+
+ switch (cmd) {
+ case FUTEX_REQUEUE:
+ case FUTEX_CMP_REQUEUE:
+ case FUTEX_WAKE_OP:
+ /*
+ * lx_timeout is nominally a pointer to a userspace address.
+ * For several commands, however, it actually contains
+ * an additional integer parameter. This is horrible, and
+ * the people who did this to us should be sorry.
+ */
+ val2 = (int)lx_timeout;
+ }
+
+ /*
+ * Translate the process-specific, user-space futex virtual
+ * address(es) to a universal memid. If the private bit is set, we
+ * can just use our as plus the virtual address, saving quite a bit
+ * of effort.
+ */
+ if (private) {
+ memid.val[0] = (uintptr_t)as;
+ memid.val[1] = (uintptr_t)addr;
+ } else {
+ rval = as_getmemid(as, (void *)addr, &memid);
+ if (rval != 0)
+ return (set_errno(rval));
+ }
+
+ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
+ cmd == FUTEX_WAKE_OP) {
+ if (addr2 & 0x3)
+ return (set_errno(EINVAL));
+
+ if (private) {
+ memid2.val[0] = (uintptr_t)as;
+ memid2.val[1] = (uintptr_t)addr2;
+ } else {
+ rval = as_getmemid(as, (void *)addr2, &memid2);
+ if (rval)
+ return (set_errno(rval));
+ }
+ }
+
+ switch (cmd) {
+ case FUTEX_WAIT:
+ rval = futex_wait(&memid, (void *)addr, val,
+ tptr, FUTEX_BITSET_MATCH_ANY);
+ break;
+
+ case FUTEX_WAIT_BITSET:
+ rval = futex_wait(&memid, (void *)addr, val, tptr, val3);
+ break;
+
+ case FUTEX_WAKE:
+ rval = futex_wake(&memid, val, FUTEX_BITSET_MATCH_ANY);
+ break;
+
+ case FUTEX_WAKE_BITSET:
+ rval = futex_wake(&memid, val, val3);
+ break;
+
+ case FUTEX_WAKE_OP:
+ rval = futex_wake_op(&memid, (void *)addr2, &memid2,
+ val, val2, val3);
+ break;
+
+ case FUTEX_CMP_REQUEUE:
+ case FUTEX_REQUEUE:
+ rval = futex_requeue(&memid, &memid2, val,
+ val2, (void *)addr2, &val3);
+
+ break;
+ }
+
+ return (rval);
+}
+
+/*
+ * Does the dirty work of actually dropping a held robust lock in the event
+ * of the untimely death of the owner; see lx_futex_robust_exit(), below.
+ */
+static void
+lx_futex_robust_drop(uintptr_t addr, uint32_t tid)
+{
+ memid_t memid;
+ uint32_t oldval, newval;
+
+ VERIFY(addr + sizeof (uint32_t) < KERNELBASE);
+
+ do {
+ fuword32_noerr((void *)addr, &oldval);
+
+ if ((oldval & FUTEX_TID_MASK) != tid)
+ return;
+
+ newval = (oldval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
+ } while (atomic_cas_32((uint32_t *)addr, oldval, newval) != oldval);
+
+ /*
+ * We have now denoted that this lock's owner is dead; we need to
+ * wake any waiters.
+ */
+ if (as_getmemid(curproc->p_as, (void *)addr, &memid) != 0)
+ return;
+
+ (void) futex_wake(&memid, 1, FUTEX_BITSET_MATCH_ANY);
+}
+
+/*
+ * Called when a thread is exiting. The role of the kernel is very clearly
+ * spelled out in the Linux design document entitled robust-futex-ABI.txt:
+ * we must (carefully!) iterate over the list of held locks pointed to by
+ * the robust list head; for each lock, we'll check to see if the calling
+ * (exiting) thread is the owner, and if so, denote that the lock is dead
+ * and wake any waiters. (The "pending" field of the head points to a lock
+ * that is in transition; it should be dropped if held.) If there are any
+ * errors through here at all (including memory operations), we abort the
+ * entire operation.
+ */
+void
+lx_futex_robust_exit(uintptr_t addr, uint32_t tid)
+{
+ futex_robust_list_t list;
+ uintptr_t entry, next;
+ model_t model = get_udatamodel();
+ int length = 0;
+ label_t ljb;
+
+ if (on_fault(&ljb))
+ return;
+
+ if (addr + sizeof (futex_robust_list_t) >= KERNELBASE)
+ goto out;
+
+ if (model == DATAMODEL_NATIVE) {
+ copyin_noerr((void *)addr, &list, sizeof (list));
+ }
+#if defined(_SYSCALL32_IMPL)
+ else {
+ futex_robust_list32_t list32;
+
+ copyin_noerr((void *)addr, &list32, sizeof (list32));
+ list.frl_head = list32.frl_head;
+ list.frl_offset = list32.frl_offset;
+ list.frl_pending = list32.frl_pending;
+ }
+#endif
+
+ /*
+ * Strip off the PI bit, if any.
+ */
+ entry = list.frl_head & ~FUTEX_ROBUST_LOCK_PI;
+
+ while (entry != addr && length++ < FUTEX_ROBUST_LIST_LIMIT) {
+ if (entry + list.frl_offset + sizeof (uint32_t) >= KERNELBASE)
+ goto out;
+
+ if (model == DATAMODEL_NATIVE) {
+ fulword_noerr((void *)entry, &next);
+ }
+#if defined(_SYSCALL32_IMPL)
+ else {
+ uint32_t next32;
+ fuword32_noerr((void *)entry, &next32);
+ next = next32;
+ }
+#endif
+
+ /*
+ * Drop the robust mutex -- but only if our pending lock didn't
+ * somehow sneak on there.
+ */
+ if (entry != list.frl_pending)
+ lx_futex_robust_drop(entry + list.frl_offset, tid);
+
+ entry = next & ~FUTEX_LOCK_PI;
+ }
+
+ /*
+ * Finally, drop the pending lock if there is one.
+ */
+ if (list.frl_pending != NULL && list.frl_pending +
+ list.frl_offset + sizeof (uint32_t) < KERNELBASE)
+ lx_futex_robust_drop(list.frl_pending + list.frl_offset, tid);
+
+out:
+ no_fault();
+}
+
+long
+lx_set_robust_list(void *listp, size_t len)
+{
+ proc_t *p = curproc;
+ klwp_t *lwp = ttolwp(curthread);
+ struct lx_lwp_data *lwpd = lwptolxlwp(lwp);
+
+ if (get_udatamodel() == DATAMODEL_NATIVE) {
+ if (len != sizeof (futex_robust_list_t))
+ return (set_errno(EINVAL));
+ }
+#if defined(_SYSCALL32_IMPL)
+ else {
+ if (len != sizeof (futex_robust_list32_t))
+ return (set_errno(EINVAL));
+ }
+#endif
+
+ /*
+ * To assure that we are serialized with respect to any racing call
+ * to lx_get_robust_list(), we lock ourselves to set the value. (Note
+ * that sprunlock() drops p_lock.)
+ */
+ mutex_enter(&p->p_lock);
+ sprlock_proc(p);
+ lwpd->br_robust_list = listp;
+ sprunlock(p);
+
+ return (0);
+}
+
+long
+lx_get_robust_list(pid_t pid, void **listp, size_t *lenp)
+{
+ model_t model = get_udatamodel();
+ pid_t rpid;
+ id_t rtid;
+ proc_t *rproc;
+ klwp_t *rlwp;
+ lx_lwp_data_t *rlwpd;
+ kthread_t *rthr;
+ void *list;
+ int err = 0;
+
+ if (pid == 0) {
+ /*
+ * A pid of 0 denotes the current thread; we lock the current
+ * process even though it isn't strictly necessary (we can't
+ * race with set_robust_list() because a thread may only set
+ * its robust list on itself).
+ */
+ rproc = curproc;
+ rlwpd = lwptolxlwp(ttolwp(curthread));
+ mutex_enter(&curproc->p_lock);
+ sprlock_proc(rproc);
+ } else {
+ if (lx_lpid_to_spair(pid, &rpid, &rtid) != 0 ||
+ (rproc = sprlock(rpid)) == NULL) {
+ /*
+ * We couldn't find the specified process.
+ */
+ return (set_errno(ESRCH));
+ }
+
+ if (rproc->p_model != model ||
+ (rthr = idtot(rproc, rtid)) == NULL ||
+ (rlwp = ttolwp(rthr)) == NULL ||
+ (rlwpd = lwptolxlwp(rlwp)) == NULL) {
+ /*
+ * The target process does not match our data model, or
+ * we couldn't find the LWP, or the target process is
+ * not branded.
+ */
+ err = ESRCH;
+ goto out;
+ }
+ }
+
+ if (curproc != rproc &&
+ priv_proc_cred_perm(curproc->p_cred, rproc, NULL, VREAD) != 0) {
+ /*
+ * We don't have the permission to examine the target.
+ */
+ err = EPERM;
+ goto out;
+ }
+
+ list = rlwpd->br_robust_list;
+
+out:
+ sprunlock(rproc);
+
+ if (err != 0)
+ return (set_errno(err));
+
+ if (model == DATAMODEL_NATIVE) {
+ if (sulword(listp, (uintptr_t)list) != 0)
+ return (set_errno(EFAULT));
+
+ if (sulword(lenp, sizeof (futex_robust_list_t)) != 0)
+ return (set_errno(EFAULT));
+ }
+#if defined(_SYSCALL32_IMPL)
+ else {
+ if (suword32(listp, (uint32_t)(uintptr_t)list) != 0)
+ return (set_errno(EFAULT));
+
+ if (suword32(lenp, sizeof (futex_robust_list32_t)) != 0)
+ return (set_errno(EFAULT));
+ }
+#endif
+
+ return (0);
+}
+
+void
+lx_futex_init(void)
+{
+ int i;
+
+ for (i = 0; i < HASH_SIZE; i++)
+ mutex_init(&futex_hash[i].fh_lock, NULL, MUTEX_DEFAULT, NULL);
+}
+
+int
+lx_futex_fini(void)
+{
+ int i, err;
+
+ err = 0;
+ for (i = 0; (err == 0) && (i < HASH_SIZE); i++) {
+ mutex_enter(&futex_hash[i].fh_lock);
+ if (futex_hash[i].fh_waiters != NULL)
+ err = EBUSY;
+ mutex_exit(&futex_hash[i].fh_lock);
+ }
+ return (err);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_getcwd.c b/usr/src/uts/common/brand/lx/syscall/lx_getcwd.c
new file mode 100644
index 0000000000..7fcc594d81
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_getcwd.c
@@ -0,0 +1,50 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/pathname.h>
+
+/*
+ * getcwd() - Linux syscall semantics are slightly different; we need to return
+ * the length of the pathname copied (+ 1 for the terminating NULL byte.)
+ */
+long
+lx_getcwd(char *buf, int size)
+{
+ int len;
+ int error;
+ vnode_t *vp;
+ char path[MAXPATHLEN + 1];
+
+ vp = PTOU(curproc)->u_cdir;
+ VN_HOLD(vp);
+ if ((error = vnodetopath(NULL, vp, path, sizeof (path), CRED())) != 0) {
+ VN_RELE(vp);
+ return (set_errno(error));
+ }
+ VN_RELE(vp);
+
+ len = strlen(path) + 1;
+ if (len > size)
+ return (set_errno(ERANGE));
+
+ if (copyout(path, buf, len) != 0)
+ return (set_errno(EFAULT));
+
+ return (len);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_getdents.c b/usr/src/uts/common/brand/lx/syscall/lx_getdents.c
new file mode 100644
index 0000000000..102d521e02
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_getdents.c
@@ -0,0 +1,350 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/systm.h>
+#include <sys/filio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/inttypes.h>
+#include <sys/vnode.h>
+#include <sys/dirent.h>
+#include <sys/errno.h>
+#include <sys/file.h>
+#include <sys/sysmacros.h>
+#include <sys/kmem.h>
+#include <sys/sunddi.h>
+
+#include <sys/lx_types.h>
+#include <sys/lx_misc.h>
+
+#define LX_NAMEMAX 256
+
+#define LX_GETDENTS_MAX_BUFSZ 65536
+
+/*
+ * Because the Linux dirent has an extra field (d_type), it's possible that
+ * each entry will be 8 bytes larger (and aligned to 8 bytes) due to padding.
+ * To prevent overrun during translation, the illumos-native buffer is sized
+ * pessimistically.
+ */
+#define LTOS_GETDENTS_BUFSZ(bufsz, datasz) \
+ (((bufsz) / (((datasz) + 15) & ~7)) * sizeof (struct dirent))
+
+/*
+ * Record must be long enough to house d_name string, null terminator and
+ * d_type field. It's then padded to nearest 8-byte boundary
+ */
+#define LX_RECLEN(l, t) \
+ ((offsetof(t, d_name) + 2 + (l) + 7) & ~7)
+
+/*
+ * Bytes after d_name string until d_reclen should be zeroed.
+ * Includes zero-terminating d_name
+ */
+#define LX_ZEROLEN(l, t) \
+ (LX_RECLEN(l, t) - \
+ ((offsetof(t, d_name) + (l))))
+
+/* The output format of getdents differs if the caller is 32 or 64 bit. */
+struct lx_dirent_32 {
+ uint32_t d_ino;
+ int32_t d_off;
+ ushort_t d_reclen;
+ char d_name[1];
+ uchar_t d_type;
+};
+
+struct lx_dirent_64 {
+ uint64_t d_ino;
+ int64_t d_off;
+ ushort_t d_reclen;
+ char d_name[1];
+ uchar_t d_type;
+};
+
+static long
+lx_getdents_common(int fd, caddr_t uptr, size_t count,
+ unsigned int lx_size, int (*outcb)(caddr_t, caddr_t, int))
+{
+ vnode_t *vp;
+ file_t *fp;
+ struct uio auio;
+ struct iovec aiov;
+ int error;
+ int sbufsz, lbufsz, bufsz;
+ void *lbuf, *sbuf;
+ size_t outb = 0;
+
+ if (count < lx_size) {
+ return (set_errno(EINVAL));
+ }
+ if ((fp = getf(fd)) == NULL) {
+ return (set_errno(EBADF));
+ }
+ vp = fp->f_vnode;
+ if (vp->v_type != VDIR) {
+ releasef(fd);
+ return (set_errno(ENOTDIR));
+ }
+ if (!(fp->f_flag & FREAD)) {
+ releasef(fd);
+ return (set_errno(EBADF));
+ }
+
+ if (count > LX_GETDENTS_MAX_BUFSZ) {
+ /*
+ * If the target buffer passed to us is huge, keep the
+ * translation buffers moderate in size. Iteration will be
+ * used to fill the request.
+ */
+ lbufsz = LX_GETDENTS_MAX_BUFSZ;
+ sbufsz = LTOS_GETDENTS_BUFSZ(LX_GETDENTS_MAX_BUFSZ, lx_size);
+ } else if (count < (lx_size + MAXPATHLEN)) {
+ /*
+ * If the target buffer is tiny, allocate a Linux-format buffer
+ * big enough to hold at least one max-length row while keeping
+ * the illumos-format buffer pesimistic in size.
+ *
+ * Assuming the buffer is truely tiny, it's likely that the
+ * result will not fit and an EINVAL will be tossed.
+ */
+ lbufsz = (lx_size + MAXPATHLEN);
+ sbufsz = MAX((LTOS_GETDENTS_BUFSZ(count, lx_size)),
+ sizeof (struct dirent));
+ } else {
+ lbufsz = count;
+ sbufsz = LTOS_GETDENTS_BUFSZ(count, lx_size);
+ }
+ bufsz = sbufsz;
+ lbuf = kmem_alloc(lbufsz, KM_SLEEP);
+ sbuf = kmem_alloc(sbufsz, KM_SLEEP);
+
+ aiov.iov_base = sbuf;
+ aiov.iov_len = sbufsz;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_loffset = fp->f_offset;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_resid = sbufsz;
+ auio.uio_fmode = 0;
+ auio.uio_extflg = UIO_COPY_CACHED;
+
+ /*
+ * Since we use a conservative buffer allocation for the differing
+ * struct sizing and Linux places fewer limits on getdents buffers in
+ * general, there's a chance we'll undershoot on the record count.
+ * When this happens, we can simply repeat the READDIR operation until
+ * the available records are exhausted or we've filled the user buffer.
+ */
+ while (1) {
+ int at_eof, res;
+ (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
+ error = VOP_READDIR(vp, &auio, fp->f_cred, &at_eof, NULL, 0);
+ VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
+ if (error != 0 || auio.uio_resid == sbufsz) {
+ break;
+ }
+ res = outcb(sbuf, lbuf, bufsz - auio.uio_resid);
+ VERIFY(res <= lbufsz);
+ if (res == 0) {
+ /* no records to copyout from this batch */
+ break;
+ } else if (res > count) {
+ /*
+ * For very small buffer sizes, it's possible that a
+ * single record is too large due to a long filename.
+ */
+ error = EINVAL;
+ break;
+ }
+
+ VERIFY(outb + res <= count);
+ if (copyout(lbuf, (void *)(uptr + outb), res) != 0) {
+ error = EFAULT;
+ break;
+ }
+ outb += res;
+
+ if (at_eof != 0 || (count - outb) < (lx_size + MAXPATHLEN)) {
+ /*
+ * If there are no records left or the remaining buffer
+ * space is not large enough to hold a max-length
+ * filename, do not continue iteration.
+ */
+ break;
+ }
+
+ /*
+ * We undershot the request buffer.
+ * Reset for another READDIR, taking care not to overshoot.
+ */
+ bufsz = MIN(sbufsz, LTOS_GETDENTS_BUFSZ(count - outb, lx_size));
+ auio.uio_resid = bufsz;
+ aiov.iov_len = bufsz;
+ aiov.iov_base = sbuf;
+ }
+
+ kmem_free(lbuf, lbufsz);
+ kmem_free(sbuf, sbufsz);
+
+ if (error) {
+ releasef(fd);
+ return (set_errno(error));
+ }
+
+ fp->f_offset = auio.uio_loffset;
+ releasef(fd);
+ return (outb);
+}
+
+
+static int
+lx_getdents_format32(caddr_t sbuf, caddr_t lbuf, int len)
+{
+ struct dirent *sd;
+ struct lx_dirent_32 *ld;
+ int namelen;
+ int size = 0;
+
+ while (len > 0) {
+ sd = (struct dirent *)sbuf;
+ ld = (struct lx_dirent_32 *)lbuf;
+ namelen = MIN(strlen(sd->d_name), LX_NAMEMAX - 1);
+
+ ld->d_ino = sd->d_ino;
+ ld->d_off = sd->d_off;
+ (void) strncpy(ld->d_name, sd->d_name, namelen);
+ ld->d_name[namelen] = 0;
+ ld->d_reclen = (ushort_t)LX_RECLEN(namelen,
+ struct lx_dirent_32);
+ /* Zero out any alignment padding and d_type */
+ bzero(ld->d_name + namelen,
+ LX_ZEROLEN(namelen, struct lx_dirent_32));
+
+ len -= sd->d_reclen;
+ size += ld->d_reclen;
+ sbuf += sd->d_reclen;
+ lbuf += ld->d_reclen;
+ }
+ return (size);
+}
+
+static int
+lx_getdents_format64(caddr_t sbuf, caddr_t lbuf, int len)
+{
+ struct dirent *sd;
+ struct lx_dirent_64 *ld;
+ int namelen;
+ int size = 0;
+
+ while (len > 0) {
+ sd = (struct dirent *)sbuf;
+ ld = (struct lx_dirent_64 *)lbuf;
+ namelen = MIN(strlen(sd->d_name), LX_NAMEMAX - 1);
+
+ ld->d_ino = sd->d_ino;
+ ld->d_off = sd->d_off;
+ (void) strncpy(ld->d_name, sd->d_name, namelen);
+ ld->d_name[namelen] = 0;
+ ld->d_reclen = (ushort_t)LX_RECLEN(namelen,
+ struct lx_dirent_64);
+ /* Zero out any alignment padding and d_type */
+ bzero(ld->d_name + namelen,
+ LX_ZEROLEN(namelen, struct lx_dirent_64));
+
+ len -= sd->d_reclen;
+ size += ld->d_reclen;
+ sbuf += sd->d_reclen;
+ lbuf += ld->d_reclen;
+ }
+ return (size);
+}
+
+long
+lx_getdents_32(int fd, caddr_t buf, size_t count)
+{
+ return (lx_getdents_common(fd, buf, count,
+ sizeof (struct lx_dirent_32), lx_getdents_format32));
+}
+
+long
+lx_getdents_64(int fd, caddr_t buf, size_t count)
+{
+ return (lx_getdents_common(fd, buf, count,
+ sizeof (struct lx_dirent_64), lx_getdents_format64));
+}
+
+struct lx_dirent64 {
+ uint64_t d_ino;
+ int64_t d_off;
+ ushort_t d_reclen;
+ uchar_t d_type;
+ char d_name[1];
+};
+
+#define LX_RECLEN64(namelen) \
+ ((offsetof(struct lx_dirent64, d_name) + 1 + (namelen) + 7) & ~7)
+
+#define LX_ZEROLEN64(namelen) \
+ (LX_RECLEN64(namelen) - \
+ ((offsetof(struct lx_dirent64, d_name) + (namelen))))
+
+static int
+lx_getdents64_format(caddr_t sbuf, caddr_t lbuf, int len)
+{
+ struct dirent *sd;
+ struct lx_dirent64 *ld;
+ int namelen;
+ int size = 0;
+
+ while (len > 0) {
+ sd = (struct dirent *)sbuf;
+ ld = (struct lx_dirent64 *)lbuf;
+ namelen = MIN(strlen(sd->d_name), LX_NAMEMAX - 1);
+
+ ld->d_ino = sd->d_ino;
+ ld->d_off = sd->d_off;
+ ld->d_type = 0;
+ (void) strncpy(ld->d_name, sd->d_name, namelen);
+ ld->d_name[namelen] = 0;
+ ld->d_reclen = (ushort_t)LX_RECLEN64(namelen);
+ /* Zero out any alignment padding */
+ bzero(ld->d_name + namelen, LX_ZEROLEN64(namelen));
+
+ len -= sd->d_reclen;
+ size += ld->d_reclen;
+ sbuf += sd->d_reclen;
+ lbuf += ld->d_reclen;
+ }
+ return (size);
+}
+
+
+long
+lx_getdents64(int fd, caddr_t buf, size_t count)
+{
+ return (lx_getdents_common(fd, buf, count,
+ sizeof (struct lx_dirent64), lx_getdents64_format));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_getpid.c b/usr/src/uts/common/brand/lx/syscall/lx_getpid.c
new file mode 100644
index 0000000000..c2506f52c5
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_getpid.c
@@ -0,0 +1,79 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/zone.h>
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/thread.h>
+#include <sys/cpuvar.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+
+/*
+ * return the pid
+ */
+long
+lx_getpid(void)
+{
+ lx_lwp_data_t *lwpd = ttolxlwp(curthread);
+ long rv;
+
+ if (curproc->p_pid == curproc->p_zone->zone_proc_initpid) {
+ rv = 1;
+ } else {
+ VERIFY(lwpd != NULL);
+
+ if (lwpd->br_lx_thunk_pid != 0) {
+ rv = lwpd->br_lx_thunk_pid;
+ } else {
+ rv = lwpd->br_tgid;
+ }
+ }
+
+ return (rv);
+}
+
+/*
+ * return the parent pid
+ */
+long
+lx_getppid(void)
+{
+ return (lx_lwp_ppid(ttolwp(curthread), NULL, NULL));
+}
+
+/*
+ * return the thread id
+ */
+long
+lx_gettid(void)
+{
+ lx_lwp_data_t *lwpd = ttolxlwp(curthread);
+
+ return (lwpd->br_pid);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_getrandom.c b/usr/src/uts/common/brand/lx/syscall/lx_getrandom.c
new file mode 100644
index 0000000000..acc4073483
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_getrandom.c
@@ -0,0 +1,33 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+
+/*
+ * From "uts/common/syscall/getrandom.c":
+ */
+extern int getrandom(void *, size_t, int);
+
+long
+lx_getrandom(void *bufp, size_t buflen, int flags)
+{
+ /*
+ * According to signal(7), calls to getrandom(2) are restartable.
+ */
+ ttolxlwp(curthread)->br_syscall_restart = B_TRUE;
+
+ return (getrandom(bufp, buflen, flags));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_id.c b/usr/src/uts/common/brand/lx/syscall/lx_id.c
new file mode 100644
index 0000000000..baa41f52fa
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_id.c
@@ -0,0 +1,296 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/zone.h>
+#include <sys/cred_impl.h>
+#include <sys/policy.h>
+
+typedef ushort_t l_uid16_t;
+typedef ushort_t l_gid16_t;
+typedef uint_t l_uid_t;
+typedef uint_t l_gid_t;
+
+#define LINUX_UID16_TO_UID32(uid16) \
+ (((uid16) == (l_uid16_t)-1) ? ((l_uid_t)-1) : (l_uid_t)(uid16))
+
+#define LINUX_GID16_TO_GID32(gid16) \
+ (((gid16) == (l_gid16_t)-1) ? ((l_gid_t)-1) : (l_gid_t)(gid16))
+
+#define LX_NGROUPS_MAX 32
+extern int setgroups(int, gid_t *);
+
+/*
+ * This function is based on setreuid in common/syscall/uid.c and exists
+ * because illumos does not have a way to explicitly set the saved uid (suid)
+ * from any other system call.
+ */
+long
+lx_setresuid(l_uid_t ruid, l_uid_t euid, l_uid_t suid)
+{
+ proc_t *p;
+ int error = 0;
+ int do_nocd = 0;
+ int uidchge = 0;
+ uid_t oldruid = ruid;
+ cred_t *cr, *newcr;
+ zoneid_t zoneid = getzoneid();
+
+ if ((ruid != -1 && (ruid > MAXUID)) ||
+ (euid != -1 && (euid > MAXUID)) ||
+ (suid != -1 && (suid > MAXUID))) {
+ error = EINVAL;
+ goto done;
+ }
+
+ /*
+ * Need to pre-allocate the new cred structure before grabbing
+ * the p_crlock mutex.
+ */
+ newcr = cralloc();
+
+ p = ttoproc(curthread);
+
+retry:
+ mutex_enter(&p->p_crlock);
+ cr = p->p_cred;
+
+ if (ruid != -1 &&
+ ruid != cr->cr_ruid && ruid != cr->cr_uid &&
+ ruid != cr->cr_suid && secpolicy_allow_setid(cr, ruid, B_FALSE)) {
+ error = EPERM;
+ } else if (euid != -1 &&
+ euid != cr->cr_ruid && euid != cr->cr_uid &&
+ euid != cr->cr_suid && secpolicy_allow_setid(cr, euid, B_FALSE)) {
+ error = EPERM;
+ } else if (suid != -1 &&
+ suid != cr->cr_ruid && suid != cr->cr_uid &&
+ suid != cr->cr_suid && secpolicy_allow_setid(cr, suid, B_FALSE)) {
+ error = EPERM;
+ } else {
+ if (!uidchge && ruid != -1 && cr->cr_ruid != ruid) {
+ /*
+ * The ruid of the process is going to change. In order
+ * to avoid a race condition involving the
+ * process count associated with the newly given ruid,
+ * we increment the count before assigning the
+ * credential to the process.
+ * To do that, we'll have to take pidlock, so we first
+ * release p_crlock.
+ */
+ mutex_exit(&p->p_crlock);
+ uidchge = 1;
+ mutex_enter(&pidlock);
+ upcount_inc(ruid, zoneid);
+ mutex_exit(&pidlock);
+ /*
+ * As we released p_crlock we can't rely on the cr
+ * we read. So retry the whole thing.
+ */
+ goto retry;
+ }
+ crhold(cr);
+ crcopy_to(cr, newcr);
+ p->p_cred = newcr;
+
+ if (euid != -1)
+ newcr->cr_uid = euid;
+ if (suid != -1)
+ newcr->cr_suid = suid;
+ if (ruid != -1) {
+ oldruid = newcr->cr_ruid;
+ newcr->cr_ruid = ruid;
+ ASSERT(ruid != oldruid ? uidchge : 1);
+ }
+
+ /*
+ * A process that gives up its privilege
+ * must be marked to produce no core dump.
+ */
+ if ((cr->cr_uid != newcr->cr_uid ||
+ cr->cr_ruid != newcr->cr_ruid ||
+ cr->cr_suid != newcr->cr_suid))
+ do_nocd = 1;
+
+ crfree(cr);
+ }
+ mutex_exit(&p->p_crlock);
+
+ /*
+ * We decrement the number of processes associated with the oldruid
+ * to match the increment above, even if the ruid of the process
+ * did not change or an error occurred (oldruid == uid).
+ */
+ if (uidchge) {
+ ASSERT(oldruid != -1 && ruid != -1);
+ mutex_enter(&pidlock);
+ upcount_dec(oldruid, zoneid);
+ mutex_exit(&pidlock);
+ }
+
+ if (error == 0) {
+ if (do_nocd) {
+ mutex_enter(&p->p_lock);
+ p->p_flag |= SNOCD;
+ mutex_exit(&p->p_lock);
+ }
+ crset(p, newcr); /* broadcast to process threads */
+ goto done;
+ }
+ crfree(newcr);
+done:
+ if (error)
+ return (set_errno(error));
+ else
+ return (0);
+}
+
+long
+lx_setresuid16(l_uid16_t ruid16, l_uid16_t euid16, l_uid16_t suid16)
+{
+ long rval;
+
+ rval = lx_setresuid(
+ LINUX_UID16_TO_UID32(ruid16),
+ LINUX_UID16_TO_UID32(euid16),
+ LINUX_UID16_TO_UID32(suid16));
+
+ return (rval);
+}
+
+/*
+ * This function is based on setregid in common/syscall/gid.c
+ */
+long
+lx_setresgid(l_gid_t rgid, l_gid_t egid, l_gid_t sgid)
+{
+ proc_t *p;
+ int error = 0;
+ int do_nocd = 0;
+ cred_t *cr, *newcr;
+
+ if ((rgid != -1 && (rgid > MAXUID)) ||
+ (egid != -1 && (egid > MAXUID)) ||
+ (sgid != -1 && (sgid > MAXUID))) {
+ error = EINVAL;
+ goto done;
+ }
+
+ /*
+ * Need to pre-allocate the new cred structure before grabbing
+ * the p_crlock mutex.
+ */
+ newcr = cralloc();
+
+ p = ttoproc(curthread);
+ mutex_enter(&p->p_crlock);
+ cr = p->p_cred;
+
+ if (rgid != -1 &&
+ rgid != cr->cr_rgid && rgid != cr->cr_gid &&
+ rgid != cr->cr_sgid && secpolicy_allow_setid(cr, -1, B_FALSE)) {
+ error = EPERM;
+ } else if (egid != -1 &&
+ egid != cr->cr_rgid && egid != cr->cr_gid &&
+ egid != cr->cr_sgid && secpolicy_allow_setid(cr, -1, B_FALSE)) {
+ error = EPERM;
+ } else if (sgid != -1 &&
+ sgid != cr->cr_rgid && sgid != cr->cr_gid &&
+ sgid != cr->cr_sgid && secpolicy_allow_setid(cr, -1, B_FALSE)) {
+ error = EPERM;
+ } else {
+ crhold(cr);
+ crcopy_to(cr, newcr);
+ p->p_cred = newcr;
+
+ if (egid != -1)
+ newcr->cr_gid = egid;
+ if (sgid != -1)
+ newcr->cr_sgid = sgid;
+ if (rgid != -1)
+ newcr->cr_rgid = rgid;
+
+ /*
+ * A process that gives up its privilege
+ * must be marked to produce no core dump.
+ */
+ if ((cr->cr_gid != newcr->cr_gid ||
+ cr->cr_rgid != newcr->cr_rgid ||
+ cr->cr_sgid != newcr->cr_sgid))
+ do_nocd = 1;
+
+ crfree(cr);
+ }
+ mutex_exit(&p->p_crlock);
+
+ if (error == 0) {
+ if (do_nocd) {
+ mutex_enter(&p->p_lock);
+ p->p_flag |= SNOCD;
+ mutex_exit(&p->p_lock);
+ }
+ crset(p, newcr); /* broadcast to process threads */
+ goto done;
+ }
+ crfree(newcr);
+done:
+ if (error)
+ return (set_errno(error));
+ else
+ return (0);
+}
+
+long
+lx_setresgid16(l_gid16_t rgid16, l_gid16_t egid16, l_gid16_t sgid16)
+{
+ long rval;
+
+ rval = lx_setresgid(
+ LINUX_GID16_TO_GID32(rgid16),
+ LINUX_GID16_TO_GID32(egid16),
+ LINUX_GID16_TO_GID32(sgid16));
+
+ return (rval);
+}
+
+/*
+ * Linux defines NGROUPS_MAX to be 32, but on illumos it is only 16. We employ
+ * the terrible hack below so that tests may proceed, if only on DEBUG kernels.
+ */
+long
+lx_helper_setgroups(int ngroups, gid_t *grouplist)
+{
+#ifdef DEBUG
+ if (ngroups > ngroups_max && ngroups <= LX_NGROUPS_MAX)
+ ngroups = ngroups_max;
+#endif /* DEBUG */
+
+ return (setgroups(ngroups, grouplist));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_ioctl.c b/usr/src/uts/common/brand/lx/syscall/lx_ioctl.c
new file mode 100644
index 0000000000..2bd5da9961
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_ioctl.c
@@ -0,0 +1,1741 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <sys/errno.h>
+#include <sys/systm.h>
+#include <sys/file.h>
+#include <sys/filio.h>
+#include <sys/vnode.h>
+#include <sys/fcntl.h>
+#include <sys/termio.h>
+#include <sys/termios.h>
+#include <sys/ptyvar.h>
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <sys/sockio.h>
+#include <sys/stropts.h>
+#include <sys/ptms.h>
+#include <sys/cred.h>
+#include <sys/cred_impl.h>
+#include <sys/sysmacros.h>
+#include <sys/lx_misc.h>
+#include <sys/lx_ptm.h>
+#include <sys/sunddi.h>
+#include <sys/thread.h>
+#include <sys/proc.h>
+#include <sys/session.h>
+#include <sys/kmem.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <net/if_arp.h>
+#include <sys/ioccom.h>
+#include <sys/dtrace.h>
+#include <sys/ethernet.h>
+#include <sys/dlpi.h>
+#include <sys/lx_autofs.h>
+#include <sys/netstack.h>
+#include <inet/ip.h>
+#include <inet/ip_if.h>
+#include <sys/dkio.h>
+#include <sys/sdt.h>
+
+/*
+ * Linux ioctl types
+ */
+#define LX_IOC_TYPE_HD 0x03
+#define LX_IOC_TYPE_BLK 0x12
+#define LX_IOC_TYPE_FD 0x54
+#define LX_IOC_TYPE_DTRACE 0x68
+#define LX_IOC_TYPE_SOCK 0x89
+#define LX_IOC_TYPE_AUTOFS 0x93
+
+/*
+ * Supported ioctls
+ */
+#define LX_HDIO_GETGEO 0x0301
+#define LX_BLKGETSIZE 0x1260
+#define LX_BLKSSZGET 0x1268
+#define LX_BLKGETSIZE64 0x80081272
+#define LX_TCGETS 0x5401
+#define LX_TCSETS 0x5402
+#define LX_TCSETSW 0x5403
+#define LX_TCSETSF 0x5404
+#define LX_TCGETA 0x5405
+#define LX_TCSETA 0x5406
+#define LX_TCSETAW 0x5407
+#define LX_TCSETAF 0x5408
+#define LX_TCSBRK 0x5409
+#define LX_TCXONC 0x540a
+#define LX_TCFLSH 0x540b
+#define LX_TIOCEXCL 0x540c
+#define LX_TIOCNXCL 0x540d
+#define LX_TIOCSCTTY 0x540e
+#define LX_TIOCGPGRP 0x540f
+#define LX_TIOCSPGRP 0x5410
+#define LX_TIOCOUTQ 0x5411
+#define LX_TIOCSTI 0x5412
+#define LX_TIOCGWINSZ 0x5413
+#define LX_TIOCSWINSZ 0x5414
+#define LX_TIOCMGET 0x5415
+#define LX_TIOCMBIS 0x5416
+#define LX_TIOCMBIC 0x5417
+#define LX_TIOCMSET 0x5418
+#define LX_TIOCGSOFTCAR 0x5419
+#define LX_TIOCSSOFTCAR 0x541a
+#define LX_FIONREAD 0x541b
+#define LX_TIOCPKT 0x5420
+#define LX_FIONBIO 0x5421
+#define LX_TIOCNOTTY 0x5422
+#define LX_TIOCSETD 0x5423
+#define LX_TIOCGETD 0x5424
+#define LX_TCSBRKP 0x5425
+#define LX_TIOCGSID 0x5429
+#define LX_TIOCGPTN 0x80045430
+#define LX_TIOCSPTLCK 0x40045431
+#define LX_FIONCLEX 0x5450
+#define LX_FIOCLEX 0x5451
+#define LX_FIOASYNC 0x5452
+#define LX_FIOSETOWN 0x8901
+#define LX_SIOCSPGRP 0x8902
+#define LX_FIOGETOWN 0x8903
+#define LX_SIOCGPGRP 0x8904
+#define LX_SIOCATMARK 0x8905
+#define LX_SIOCGSTAMP 0x8906
+#define LX_SIOCADDRT 0x890b
+#define LX_SIOCDELRT 0x890c
+#define LX_SIOCRTMSG 0x890d
+#define LX_SIOCGIFNAME 0x8910
+#define LX_SIOCSIFLINK 0x8911
+#define LX_SIOCGIFCONF 0x8912
+#define LX_SIOCGIFFLAGS 0x8913
+#define LX_SIOCSIFFLAGS 0x8914
+#define LX_SIOCGIFADDR 0x8915
+#define LX_SIOCSIFADDR 0x8916
+#define LX_SIOCGIFDSTADDR 0x8917
+#define LX_SIOCSIFDSTADDR 0x8918
+#define LX_SIOCGIFBRDADDR 0x8919
+#define LX_SIOCSIFBRDADDR 0x891a
+#define LX_SIOCGIFNETMASK 0x891b
+#define LX_SIOCSIFNETMASK 0x891c
+#define LX_SIOCGIFMETRIC 0x891d
+#define LX_SIOCSIFMETRIC 0x891e
+#define LX_SIOCGIFMEM 0x891f
+#define LX_SIOCSIFMEM 0x8920
+#define LX_SIOCGIFMTU 0x8921
+#define LX_SIOCSIFMTU 0x8922
+#define LX_SIOCSIFNAME 0x8923
+#define LX_SIOCSIFHWADDR 0x8924
+#define LX_SIOCGIFENCAP 0x8925
+#define LX_SIOCSIFENCAP 0x8926
+#define LX_SIOCGIFHWADDR 0x8927
+#define LX_SIOCGIFSLAVE 0x8929
+#define LX_SIOCSIFSLAVE 0x8930
+#define LX_SIOCADDMULTI 0x8931
+#define LX_SIOCDELMULTI 0x8932
+#define LX_SIOCGIFINDEX 0x8933
+#define LX_SIOCSIFPFLAGS 0x8934
+#define LX_SIOCGIFPFLAGS 0x8935
+#define LX_SIOCDIFADDR 0x8936
+#define LX_SIOCSIFHWBROADCAST 0x8937
+#define LX_SIOCGIFCOUNT 0x8938
+#define LX_SIOCGIFBR 0x8940
+#define LX_SIOCSIFBR 0x8941
+#define LX_SIOCGIFTXQLEN 0x8942
+#define LX_SIOCSIFTXQLEN 0x8943
+#define LX_SIOCETHTOOL 0x8946
+#define LX_SIOCGMIIPHY 0x8947
+#define LX_SIOCGMIIREG 0x8948
+#define LX_SIOCSMIIREG 0x8949
+#define LX_SIOCWANDEV 0x894a
+#define LX_SIOCOUTQNSD 0x894b
+#define LX_SIOCDARP 0x8953
+#define LX_SIOCGARP 0x8954
+#define LX_SIOCSARP 0x8955
+#define LX_SIOCDRARP 0x8960
+#define LX_SIOCGRARP 0x8961
+#define LX_SIOCSRARP 0x8962
+#define LX_SIOCGIFMAP 0x8970
+#define LX_SIOCSIFMAP 0x8971
+#define LX_SIOCADDDLCI 0x8980
+#define LX_SIOCDELDLCI 0x8981
+#define LX_SIOCGIFVLAN 0x8982
+#define LX_SIOCSIFVLAN 0x8983
+#define LX_SIOCBONDENSLAVE 0x8990
+#define LX_SIOCBONDRELEASE 0x8991
+#define LX_SIOCBONDSETHWADDR 0x8992
+#define LX_SIOCBONDSLAVEINFOQUERY 0x8993
+#define LX_SIOCBONDINFOQUERY 0x8994
+#define LX_SIOCBONDCHANGEACTIVE 0x8995
+#define LX_SIOCBRADDBR 0x89a0
+#define LX_SIOCBRDELBR 0x89a1
+#define LX_SIOCBRADDIF 0x89a2
+#define LX_SIOCBRDELIF 0x89a3
+#define LX_SIOCSHWTSTAMP 0x89b0
+#define LX_SIOCGHWTSTAMP 0x89b1
+#define LX_SIOCDEVPRIVATE 0x89f0
+#define LX_SIOCPROTOPRIVATE 0x89e0
+
+#define FLUSER(fp) fp->f_flag | get_udatamodel()
+#define FLFAKE(fp) fp->f_flag | FKIOCTL
+
+/*
+ * LX_NCC must be different from LX_NCCS since while the termio and termios
+ * structures may look similar they are fundamentally different sizes and
+ * have different members.
+ */
+#define LX_NCC 8
+#define LX_NCCS 19
+
+struct lx_termio {
+ unsigned short c_iflag; /* input mode flags */
+ unsigned short c_oflag; /* output mode flags */
+ unsigned short c_cflag; /* control mode flags */
+ unsigned short c_lflag; /* local mode flags */
+ unsigned char c_line; /* line discipline */
+ unsigned char c_cc[LX_NCC]; /* control characters */
+};
+
+struct lx_termios {
+ uint32_t c_iflag; /* input mode flags */
+ uint32_t c_oflag; /* output mode flags */
+ uint32_t c_cflag; /* control mode flags */
+ uint32_t c_lflag; /* local mode flags */
+ unsigned char c_line; /* line discipline */
+ unsigned char c_cc[LX_NCCS]; /* control characters */
+};
+
+/*
+ * c_cc characters which are valid for lx_termio and lx_termios
+ */
+#define LX_VINTR 0
+#define LX_VQUIT 1
+#define LX_VERASE 2
+#define LX_VKILL 3
+#define LX_VEOF 4
+#define LX_VTIME 5
+#define LX_VMIN 6
+#define LX_VSWTC 7
+
+/*
+ * c_cc characters which are valid for lx_termios
+ */
+#define LX_VSTART 8
+#define LX_VSTOP 9
+#define LX_VSUSP 10
+#define LX_VEOL 11
+#define LX_VREPRINT 12
+#define LX_VDISCARD 13
+#define LX_VWERASE 14
+#define LX_VLNEXT 15
+#define LX_VEOL2 16
+
+/*
+ * Defaults needed for SunOS to Linux format conversion.
+ * See INIT_C_CC in linux-stable/include/asm-generic/termios.h
+ */
+#define LX_DEF_VTIME 0
+#define LX_DEF_VMIN 1
+#define LX_DEF_VEOF '\004'
+#define LX_DEF_VEOL 0
+
+/* VSD key for lx_cc information */
+static uint_t lx_ioctl_vsd = 0;
+
+extern int lx_lpid_to_spair(pid_t l_pid, pid_t *s_pid, id_t *s_tid);
+
+/* Terminal helpers */
+
+static void
+l2s_termios(struct lx_termios *l_tios, struct termios *s_tios)
+{
+ ASSERT((l_tios != NULL) && (s_tios != NULL));
+
+ bzero(s_tios, sizeof (*s_tios));
+
+ s_tios->c_iflag = l_tios->c_iflag;
+ s_tios->c_oflag = l_tios->c_oflag;
+ s_tios->c_cflag = l_tios->c_cflag;
+ s_tios->c_lflag = l_tios->c_lflag;
+
+ if (s_tios->c_lflag & ICANON) {
+ s_tios->c_cc[VEOF] = l_tios->c_cc[LX_VEOF];
+ s_tios->c_cc[VEOL] = l_tios->c_cc[LX_VEOL];
+ } else {
+ s_tios->c_cc[VMIN] = l_tios->c_cc[LX_VMIN];
+ s_tios->c_cc[VTIME] = l_tios->c_cc[LX_VTIME];
+ }
+
+ s_tios->c_cc[VEOL2] = l_tios->c_cc[LX_VEOL2];
+ s_tios->c_cc[VERASE] = l_tios->c_cc[LX_VERASE];
+ s_tios->c_cc[VKILL] = l_tios->c_cc[LX_VKILL];
+ s_tios->c_cc[VREPRINT] = l_tios->c_cc[LX_VREPRINT];
+ s_tios->c_cc[VLNEXT] = l_tios->c_cc[LX_VLNEXT];
+ s_tios->c_cc[VWERASE] = l_tios->c_cc[LX_VWERASE];
+ s_tios->c_cc[VINTR] = l_tios->c_cc[LX_VINTR];
+ s_tios->c_cc[VQUIT] = l_tios->c_cc[LX_VQUIT];
+ s_tios->c_cc[VSWTCH] = l_tios->c_cc[LX_VSWTC];
+ s_tios->c_cc[VSTART] = l_tios->c_cc[LX_VSTART];
+ s_tios->c_cc[VSTOP] = l_tios->c_cc[LX_VSTOP];
+ s_tios->c_cc[VSUSP] = l_tios->c_cc[LX_VSUSP];
+ s_tios->c_cc[VDISCARD] = l_tios->c_cc[LX_VDISCARD];
+}
+
+static void
+l2s_termio(struct lx_termio *l_tio, struct termio *s_tio)
+{
+ ASSERT((l_tio != NULL) && (s_tio != NULL));
+
+ bzero(s_tio, sizeof (*s_tio));
+
+ s_tio->c_iflag = l_tio->c_iflag;
+ s_tio->c_oflag = l_tio->c_oflag;
+ s_tio->c_cflag = l_tio->c_cflag;
+ s_tio->c_lflag = l_tio->c_lflag;
+
+ if (s_tio->c_lflag & ICANON) {
+ s_tio->c_cc[VEOF] = l_tio->c_cc[LX_VEOF];
+ } else {
+ s_tio->c_cc[VMIN] = l_tio->c_cc[LX_VMIN];
+ s_tio->c_cc[VTIME] = l_tio->c_cc[LX_VTIME];
+ }
+
+ s_tio->c_cc[VINTR] = l_tio->c_cc[LX_VINTR];
+ s_tio->c_cc[VQUIT] = l_tio->c_cc[LX_VQUIT];
+ s_tio->c_cc[VERASE] = l_tio->c_cc[LX_VERASE];
+ s_tio->c_cc[VKILL] = l_tio->c_cc[LX_VKILL];
+ s_tio->c_cc[VSWTCH] = l_tio->c_cc[LX_VSWTC];
+}
+
+static void
+termios2lx_cc(struct lx_termios *l_tios, struct lx_cc *lio)
+{
+ ASSERT((l_tios != NULL) && (lio != NULL));
+
+ bzero(lio, sizeof (*lio));
+
+ lio->veof = l_tios->c_cc[LX_VEOF];
+ lio->veol = l_tios->c_cc[LX_VEOL];
+ lio->vmin = l_tios->c_cc[LX_VMIN];
+ lio->vtime = l_tios->c_cc[LX_VTIME];
+}
+
+static void
+termio2lx_cc(struct lx_termio *l_tio, struct lx_cc *lio)
+{
+ ASSERT((l_tio != NULL) && (lio != NULL));
+
+ bzero(lio, sizeof (*lio));
+
+ lio->veof = l_tio->c_cc[LX_VEOF];
+ lio->veol = 0;
+ lio->vmin = l_tio->c_cc[LX_VMIN];
+ lio->vtime = l_tio->c_cc[LX_VTIME];
+}
+
+static void
+s2l_termios(struct termios *s_tios, struct lx_termios *l_tios)
+{
+ ASSERT((s_tios != NULL) && (l_tios != NULL));
+
+ bzero(l_tios, sizeof (*l_tios));
+
+ l_tios->c_iflag = s_tios->c_iflag;
+ l_tios->c_oflag = s_tios->c_oflag;
+ l_tios->c_cflag = s_tios->c_cflag;
+ l_tios->c_lflag = s_tios->c_lflag;
+
+ /*
+ * Since use of the VMIN/VTIME and VEOF/VEOL control characters is
+ * mutually exclusive (determined by ICANON), SunOS aliases them in the
+ * c_cc field in termio/termios. Linux does not perform this aliasing,
+ * so it expects that the default values are present regardless of
+ * ICANON status.
+ *
+ * These defaults can be overridden later by any values stored via the
+ * lx_cc mechanism.
+ */
+ if (s_tios->c_lflag & ICANON) {
+ l_tios->c_cc[LX_VEOF] = s_tios->c_cc[VEOF];
+ l_tios->c_cc[LX_VEOL] = s_tios->c_cc[VEOL];
+ l_tios->c_cc[LX_VTIME] = LX_DEF_VTIME;
+ l_tios->c_cc[LX_VMIN] = LX_DEF_VMIN;
+
+ } else {
+ l_tios->c_cc[LX_VMIN] = s_tios->c_cc[VMIN];
+ l_tios->c_cc[LX_VTIME] = s_tios->c_cc[VTIME];
+ l_tios->c_cc[LX_VEOF] = LX_DEF_VEOF;
+ l_tios->c_cc[LX_VEOL] = LX_DEF_VEOL;
+ }
+
+ l_tios->c_cc[LX_VEOL2] = s_tios->c_cc[VEOL2];
+ l_tios->c_cc[LX_VERASE] = s_tios->c_cc[VERASE];
+ l_tios->c_cc[LX_VKILL] = s_tios->c_cc[VKILL];
+ l_tios->c_cc[LX_VREPRINT] = s_tios->c_cc[VREPRINT];
+ l_tios->c_cc[LX_VLNEXT] = s_tios->c_cc[VLNEXT];
+ l_tios->c_cc[LX_VWERASE] = s_tios->c_cc[VWERASE];
+ l_tios->c_cc[LX_VINTR] = s_tios->c_cc[VINTR];
+ l_tios->c_cc[LX_VQUIT] = s_tios->c_cc[VQUIT];
+ l_tios->c_cc[LX_VSWTC] = s_tios->c_cc[VSWTCH];
+ l_tios->c_cc[LX_VSTART] = s_tios->c_cc[VSTART];
+ l_tios->c_cc[LX_VSTOP] = s_tios->c_cc[VSTOP];
+ l_tios->c_cc[LX_VSUSP] = s_tios->c_cc[VSUSP];
+ l_tios->c_cc[LX_VDISCARD] = s_tios->c_cc[VDISCARD];
+}
+
+static void
+s2l_termio(struct termio *s_tio, struct lx_termio *l_tio)
+{
+ ASSERT((s_tio != NULL) && (l_tio != NULL));
+
+ bzero(l_tio, sizeof (*l_tio));
+
+ l_tio->c_iflag = s_tio->c_iflag;
+ l_tio->c_oflag = s_tio->c_oflag;
+ l_tio->c_cflag = s_tio->c_cflag;
+ l_tio->c_lflag = s_tio->c_lflag;
+
+ if (s_tio->c_lflag & ICANON) {
+ l_tio->c_cc[LX_VEOF] = s_tio->c_cc[VEOF];
+ l_tio->c_cc[LX_VTIME] = LX_DEF_VTIME;
+ l_tio->c_cc[LX_VMIN] = LX_DEF_VMIN;
+ } else {
+ l_tio->c_cc[LX_VMIN] = s_tio->c_cc[VMIN];
+ l_tio->c_cc[LX_VTIME] = s_tio->c_cc[VTIME];
+ l_tio->c_cc[LX_VEOF] = LX_DEF_VEOF;
+ }
+
+ l_tio->c_cc[LX_VINTR] = s_tio->c_cc[VINTR];
+ l_tio->c_cc[LX_VQUIT] = s_tio->c_cc[VQUIT];
+ l_tio->c_cc[LX_VERASE] = s_tio->c_cc[VERASE];
+ l_tio->c_cc[LX_VKILL] = s_tio->c_cc[VKILL];
+ l_tio->c_cc[LX_VSWTC] = s_tio->c_cc[VSWTCH];
+}
+
+static void
+set_lx_cc(vnode_t *vp, struct lx_cc *lio)
+{
+ struct lx_cc *cur;
+ /*
+ * Linux expects that the termio/termios control characters are
+ * preserved more strictly than illumos supports. In order to preserve
+ * the illusion that the characters are maintained, they are stored as
+ * vnode-specific data.
+ */
+ mutex_enter(&vp->v_vsd_lock);
+ cur = (struct lx_cc *)vsd_get(vp, lx_ioctl_vsd);
+ if (cur == NULL) {
+ cur = kmem_alloc(sizeof (struct lx_cc), KM_SLEEP);
+ bcopy(lio, cur, sizeof (struct lx_cc));
+ (void) vsd_set(vp, lx_ioctl_vsd, cur);
+ } else {
+ bcopy(lio, cur, sizeof (struct lx_cc));
+ }
+ mutex_exit(&vp->v_vsd_lock);
+}
+
+static int
+get_lx_cc(vnode_t *vp, struct lx_cc *lio)
+{
+ struct lx_cc *cur;
+ int rv = 1;
+ mutex_enter(&vp->v_vsd_lock);
+ cur = (struct lx_cc *)vsd_get(vp, lx_ioctl_vsd);
+ if (cur != NULL) {
+ bcopy(cur, lio, sizeof (*lio));
+ rv = 0;
+ }
+ mutex_exit(&vp->v_vsd_lock);
+ return (rv);
+}
+
+/* Socket helpers */
+
+typedef struct lx_ifreq32 {
+ char ifr_name[IFNAMSIZ];
+ union {
+ struct sockaddr ifru_addr;
+ };
+} lx_ifreq32_t;
+
+typedef struct lx_ifreq64 {
+ char ifr_name[IFNAMSIZ];
+ union {
+ struct sockaddr ifru_addr;
+ /* pad this out to the Linux size */
+ uint64_t ifmap[3];
+ };
+} lx_ifreq64_t;
+
+typedef struct lx_ifconf32 {
+ int32_t if_len;
+ caddr32_t if_buf;
+} lx_ifconf32_t;
+
+typedef struct lx_ifconf64 {
+ int32_t if_len;
+ caddr_t if_buf;
+} lx_ifconf64_t;
+
+
+/* Generic translators */
+
+static int
+ict_pass(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ int error = 0;
+ int rv;
+
+ error = VOP_IOCTL(fp->f_vnode, cmd, arg, FLUSER(fp), fp->f_cred, &rv,
+ NULL);
+ return ((error != 0) ? set_errno(error) : 0);
+}
+
+static int
+ict_fionbio(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ vnode_t *vp;
+ int32_t iflag, flags;
+ int error;
+
+ if (copyin((caddr_t)arg, &iflag, sizeof (iflag)))
+ return (set_errno(EFAULT));
+
+ mutex_enter(&fp->f_tlock);
+ vp = fp->f_vnode;
+ flags = fp->f_flag;
+ /* Linux sets NONBLOCK instead of FIONBIO */
+ if (iflag)
+ flags |= FNONBLOCK;
+ else
+ flags &= ~FNONBLOCK;
+ /* push the flag down */
+ error = VOP_SETFL(vp, fp->f_flag, flags, fp->f_cred, NULL);
+ fp->f_flag = flags;
+ mutex_exit(&fp->f_tlock);
+ return ((error != 0) ? set_errno(error) : 0);
+}
+
+static int
+ict_fionread(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ vnode_t *vp;
+ struct vattr vattr;
+ int error = 0;
+ int rv;
+ /*
+ * offset is int32_t because that is what FIONREAD is defined in terms
+ * of. We cap at INT_MAX as in other cases for this ioctl.
+ */
+ int32_t offset;
+
+ vp = fp->f_vnode;
+
+ if (vp->v_type == VREG || vp->v_type == VDIR) {
+ vattr.va_mask = AT_SIZE;
+ error = VOP_GETATTR(vp, &vattr, 0, fp->f_cred, NULL);
+ if (error != 0)
+ return (set_errno(error));
+ offset = MIN(vattr.va_size - fp->f_offset, INT_MAX);
+ if (copyout(&offset, (caddr_t)arg, sizeof (offset)))
+ return (set_errno(EFAULT));
+ } else {
+ error = VOP_IOCTL(vp, FIONREAD, arg, FLUSER(fp), fp->f_cred,
+ &rv, NULL);
+ if (error)
+ return (set_errno(error));
+ }
+ return (0);
+}
+
+/*
+ * hard disk-related translators
+ *
+ * Note that the normal disk ioctls only work for VCHR devices. See spec_ioctl
+ * which will return ENOTTY for a VBLK device. However, fdisk, etc. expect to
+ * work with block devices.
+ *
+ * We expect a zvol to be the primary block device we're interacting with and
+ * we use the zone's lxzd_vdisks list to handle zvols specifically.
+ */
+
+typedef struct lx_hd_geom {
+ unsigned char heads;
+ unsigned char sectors;
+ unsigned short cylinders;
+ unsigned long start;
+} lx_hd_geom_t;
+
+static lx_virt_disk_t *
+lx_lookup_zvol(lx_zone_data_t *lxzd, dev_t dev)
+{
+ lx_virt_disk_t *vd;
+
+ vd = list_head(lxzd->lxzd_vdisks);
+ while (vd != NULL) {
+ if (vd->lxvd_type == LXVD_ZVOL && vd->lxvd_real_dev == dev)
+ return (vd);
+ vd = list_next(lxzd->lxzd_vdisks, vd);
+ }
+
+ return (NULL);
+}
+
+/*
+ * See zvol_ioctl() which always fails for DKIOCGGEOM. The geometry for a
+ * zvol (or really any modern disk) is made up, so we do that here as well.
+ */
+static int
+ict_hdgetgeo(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ lx_hd_geom_t lx_geom;
+ lx_zone_data_t *lxzd;
+
+ if (fp->f_vnode->v_type != VCHR && fp->f_vnode->v_type != VBLK)
+ return (set_errno(EINVAL));
+
+ lxzd = ztolxzd(curproc->p_zone);
+ ASSERT(lxzd != NULL);
+ ASSERT(lxzd->lxzd_vdisks != NULL);
+
+ if (getmajor(fp->f_vnode->v_rdev) == getmajor(lxzd->lxzd_zfs_dev)) {
+ lx_virt_disk_t *vd;
+
+ vd = lx_lookup_zvol(lxzd, fp->f_vnode->v_rdev);
+ if (vd == NULL) {
+ /* should only happen if new zvol */
+ bzero(&lx_geom, sizeof (lx_geom));
+ } else {
+ diskaddr_t tot;
+
+ tot = vd->lxvd_volsize / vd->lxvd_blksize;
+
+ /*
+ * Since the 'sectors' value is only one byte we make
+ * up heads/cylinder values to get things to fit.
+ * We roundup the number of heads to ensure we don't
+ * overflow the sectors due to truncation.
+ */
+ lx_geom.heads = lx_geom.cylinders = (tot / 0xff) + 1;
+ lx_geom.sectors = tot / lx_geom.heads;
+ lx_geom.start = 0;
+ }
+ } else {
+ int res, rv;
+ struct dk_geom geom;
+
+ res = VOP_IOCTL(fp->f_vnode, DKIOCGGEOM, (intptr_t)&geom,
+ fp->f_flag | FKIOCTL, fp->f_cred, &rv, NULL);
+ if (res > 0)
+ return (set_errno(res));
+
+ lx_geom.heads = geom.dkg_nhead;
+ lx_geom.sectors = geom.dkg_nsect;
+ lx_geom.cylinders = geom.dkg_ncyl;
+ lx_geom.start = 0;
+ }
+
+ if (copyout(&lx_geom, (caddr_t)arg, sizeof (lx_geom)))
+ return (set_errno(EFAULT));
+ return (0);
+}
+
+/*
+ * Per the Linux sd(4) man page, get the number of sectors. The linux/fs.h
+ * header says its 512 byte blocks.
+ */
+static int
+ict_blkgetsize(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ diskaddr_t tot;
+ lx_zone_data_t *lxzd;
+
+ if (fp->f_vnode->v_type != VCHR && fp->f_vnode->v_type != VBLK)
+ return (set_errno(EINVAL));
+
+ lxzd = ztolxzd(curproc->p_zone);
+ ASSERT(lxzd != NULL);
+ ASSERT(lxzd->lxzd_vdisks != NULL);
+
+ if (getmajor(fp->f_vnode->v_rdev) == getmajor(lxzd->lxzd_zfs_dev)) {
+ lx_virt_disk_t *vd;
+
+ vd = lx_lookup_zvol(lxzd, fp->f_vnode->v_rdev);
+ if (vd == NULL) {
+ /* should only happen if new zvol */
+ tot = 0;
+ } else {
+ tot = vd->lxvd_volsize / 512;
+ }
+ } else {
+ int res, rv;
+ struct dk_minfo minfo;
+
+ res = VOP_IOCTL(fp->f_vnode, DKIOCGMEDIAINFO, (intptr_t)&minfo,
+ fp->f_flag | FKIOCTL, fp->f_cred, &rv, NULL);
+ if (res > 0)
+ return (set_errno(res));
+
+ tot = minfo.dki_capacity;
+ if (minfo.dki_lbsize > 512) {
+ uint_t bsize = minfo.dki_lbsize / 512;
+
+ tot *= bsize;
+ }
+ }
+
+ if (copyout(&tot, (caddr_t)arg, sizeof (long)))
+ return (set_errno(EFAULT));
+ return (0);
+}
+
+/*
+ * Get the sector size (i.e. the logical block size).
+ */
+static int
+ict_blkgetssize(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ uint_t bsize;
+ lx_zone_data_t *lxzd;
+
+ if (fp->f_vnode->v_type != VCHR && fp->f_vnode->v_type != VBLK)
+ return (set_errno(EINVAL));
+
+ lxzd = ztolxzd(curproc->p_zone);
+ ASSERT(lxzd != NULL);
+ ASSERT(lxzd->lxzd_vdisks != NULL);
+
+ if (getmajor(fp->f_vnode->v_rdev) == getmajor(lxzd->lxzd_zfs_dev)) {
+ lx_virt_disk_t *vd;
+
+ vd = lx_lookup_zvol(lxzd, fp->f_vnode->v_rdev);
+ if (vd == NULL) {
+ /* should only happen if new zvol */
+ bsize = 0;
+ } else {
+ bsize = (uint_t)vd->lxvd_blksize;
+ }
+ } else {
+ int res, rv;
+ struct dk_minfo minfo;
+
+ res = VOP_IOCTL(fp->f_vnode, DKIOCGMEDIAINFO, (intptr_t)&minfo,
+ fp->f_flag | FKIOCTL, fp->f_cred, &rv, NULL);
+ if (res > 0)
+ return (set_errno(res));
+
+ bsize = (uint_t)minfo.dki_lbsize;
+ }
+
+ if (copyout(&bsize, (caddr_t)arg, sizeof (bsize)))
+ return (set_errno(EFAULT));
+ return (0);
+}
+
+/*
+ * Get the size. The linux/fs.h header says its in bytes.
+ */
+static int
+ict_blkgetsize64(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ uint64_t tot;
+ lx_zone_data_t *lxzd;
+
+ if (fp->f_vnode->v_type != VCHR && fp->f_vnode->v_type != VBLK)
+ return (set_errno(EINVAL));
+
+ lxzd = ztolxzd(curproc->p_zone);
+ ASSERT(lxzd != NULL);
+ ASSERT(lxzd->lxzd_vdisks != NULL);
+
+ if (getmajor(fp->f_vnode->v_rdev) == getmajor(lxzd->lxzd_zfs_dev)) {
+ lx_virt_disk_t *vd;
+
+ vd = lx_lookup_zvol(lxzd, fp->f_vnode->v_rdev);
+ if (vd == NULL) {
+ /* should only happen if new zvol */
+ tot = 0;
+ } else {
+ tot = vd->lxvd_volsize;
+ }
+ } else {
+ int res, rv;
+ struct dk_minfo minfo;
+
+ res = VOP_IOCTL(fp->f_vnode, DKIOCGMEDIAINFO, (intptr_t)&minfo,
+ fp->f_flag | FKIOCTL, fp->f_cred, &rv, NULL);
+ if (res > 0)
+ return (set_errno(res));
+
+ tot = minfo.dki_capacity * minfo.dki_lbsize;
+ }
+
+ if (copyout(&tot, (caddr_t)arg, sizeof (uint64_t)))
+ return (set_errno(EFAULT));
+ return (0);
+}
+
+/* Terminal-related translators */
+
+static int
+ict_tcsets(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ struct lx_termios l_tios;
+ struct termios s_tios;
+ struct lx_cc lio;
+ int error, rv;
+
+ ASSERT(cmd == TCSETS || cmd == TCSETSW || cmd == TCSETSF);
+
+ if (copyin((struct lx_termios *)arg, &l_tios, sizeof (l_tios)) != 0)
+ return (set_errno(EFAULT));
+ termios2lx_cc(&l_tios, &lio);
+ l2s_termios(&l_tios, &s_tios);
+
+ error = VOP_IOCTL(fp->f_vnode, cmd, (intptr_t)&s_tios,
+ FLFAKE(fp), fp->f_cred, &rv, NULL);
+ if (error)
+ return (set_errno(error));
+ /* preserve lx_cc */
+ set_lx_cc(fp->f_vnode, &lio);
+
+ return (0);
+}
+
+static int
+ict_tcseta(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ struct lx_termio l_tio;
+ struct termio s_tio;
+ struct lx_cc lio;
+ int error, rv;
+
+ ASSERT(cmd == TCSETA || cmd == TCSETAW || cmd == TCSETAF);
+
+ if (copyin((struct lx_termio *)arg, &l_tio, sizeof (l_tio)) != 0)
+ return (set_errno(EFAULT));
+ l2s_termio(&l_tio, &s_tio);
+ termio2lx_cc(&l_tio, &lio);
+
+ error = VOP_IOCTL(fp->f_vnode, cmd, (intptr_t)&s_tio,
+ FLFAKE(fp), fp->f_cred, &rv, NULL);
+ if (error)
+ return (set_errno(error));
+ /* preserve lx_cc */
+ set_lx_cc(fp->f_vnode, &lio);
+
+ return (0);
+}
+
+static int
+ict_tcgets_ptm(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ struct lx_termios l_tios;
+ struct termios s_tios, *s_tiosd;
+ uint_t s_tiosl;
+
+ /* get termios defaults */
+ if (ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, ddi_root_node(),
+ DDI_PROP_NOTPROM, "ttymodes", (uchar_t **)&s_tiosd,
+ &s_tiosl) != DDI_SUCCESS)
+ return (EIO);
+ ASSERT(s_tiosl == sizeof (*s_tiosd));
+ bcopy(s_tiosd, &s_tios, sizeof (s_tios));
+ ddi_prop_free(s_tiosd);
+
+ /* Now munge the data to how Linux wants it. */
+ s2l_termios(&s_tios, &l_tios);
+ if (copyout(&l_tios, (struct lx_termios *)arg, sizeof (l_tios)) != 0)
+ return (set_errno(EFAULT));
+
+ return (0);
+}
+
+static int
+ict_tcgets_native(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ struct lx_termios l_tios;
+ struct termios s_tios;
+ struct lx_cc lio;
+ int error, rv;
+
+ error = VOP_IOCTL(fp->f_vnode, cmd, (intptr_t)&s_tios,
+ FLFAKE(fp), fp->f_cred, &rv, NULL);
+ if (error)
+ return (set_errno(error));
+
+ /* Now munge the data to how Linux wants it. */
+ s2l_termios(&s_tios, &l_tios);
+
+ /* return preserved lx_cc */
+ if (get_lx_cc(fp->f_vnode, &lio) == 0) {
+ l_tios.c_cc[LX_VEOF] = lio.veof;
+ l_tios.c_cc[LX_VEOL] = lio.veol;
+ l_tios.c_cc[LX_VMIN] = lio.vmin;
+ l_tios.c_cc[LX_VTIME] = lio.vtime;
+ }
+
+ if (copyout(&l_tios, (struct lx_termios *)arg, sizeof (l_tios)) != 0)
+ return (set_errno(EFAULT));
+
+ return (0);
+}
+
+static int
+ict_tcgets(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ if (getmajor(fp->f_vnode->v_rdev) == ddi_name_to_major(LX_PTM_DRV))
+ return (ict_tcgets_ptm(fp, cmd, arg, lxcmd));
+ else
+ return (ict_tcgets_native(fp, cmd, arg, lxcmd));
+}
+
+static int
+ict_tcgeta(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ struct lx_termio l_tio;
+ struct termio s_tio;
+ struct lx_cc lio;
+ int error, rv;
+
+ error = VOP_IOCTL(fp->f_vnode, cmd, (intptr_t)&s_tio,
+ FLFAKE(fp), fp->f_cred, &rv, NULL);
+ if (error)
+ return (set_errno(error));
+
+ s2l_termio(&s_tio, &l_tio);
+ /* return preserved lx_cc */
+ if (get_lx_cc(fp->f_vnode, &lio) == 0) {
+ l_tio.c_cc[LX_VEOF] = lio.veof;
+ l_tio.c_cc[LX_VMIN] = lio.vmin;
+ l_tio.c_cc[LX_VTIME] = lio.vtime;
+ }
+
+ if (copyout(&l_tio, (struct lx_termios *)arg, sizeof (l_tio)) != 0)
+ return (set_errno(EFAULT));
+
+ return (0);
+}
+
+static int
+ict_tiocspgrp(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ pid_t lpid, spid, tid;
+ int error, rv;
+
+ /* Converting to the illumos pid is necessary */
+ if (copyin((pid_t *)arg, &lpid, sizeof (lpid)) < 0)
+ return (set_errno(EFAULT));
+ if (lx_lpid_to_spair(lpid, &spid, &tid) < 0)
+ return (set_errno(EPERM));
+
+ error = VOP_IOCTL(fp->f_vnode, cmd, (intptr_t)&spid,
+ fp->f_flag |FKIOCTL, fp->f_cred, &rv, NULL);
+ return ((error != 0) ? set_errno(error) : 0);
+}
+
+static int
+ict_tcsbrkp(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ int rv, error;
+ /* use null duration to emulate TCSBRKP */
+ int dur = 0;
+ error = VOP_IOCTL(fp->f_vnode, TCSBRK, (intptr_t)&dur,
+ FLFAKE(fp), fp->f_cred, &rv, NULL);
+ return ((error != 0) ? set_errno(error) : 0);
+}
+
+static int
+ict_tiocgpgrp(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ pid_t spgrp;
+ int error, rv;
+
+ error = VOP_IOCTL(fp->f_vnode, cmd, (intptr_t)&spgrp, FLFAKE(fp),
+ fp->f_cred, &rv, NULL);
+ if (error == 0) {
+ if (spgrp == curproc->p_zone->zone_proc_initpid) {
+ spgrp = 1;
+ }
+ if (copyout(&spgrp, (caddr_t)arg, sizeof (spgrp))) {
+ return (set_errno(EFAULT));
+ }
+ }
+ return ((error != 0) ? set_errno(error) : 0);
+}
+
+static int
+ict_sptlock(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ struct strioctl istr;
+ int error, rv;
+
+ istr.ic_cmd = UNLKPT;
+ istr.ic_len = 0;
+ istr.ic_timout = 0;
+ istr.ic_dp = NULL;
+ error = VOP_IOCTL(fp->f_vnode, I_STR, (intptr_t)&istr,
+ fp->f_flag |FKIOCTL, fp->f_cred, &rv, NULL);
+ /*
+ * The success/fail return values are different between Linux
+ * and illumos. Linux expects 0 or -1. Illumos can return
+ * positive number on success.
+ */
+ return ((error != 0) ? set_errno(error) : 0);
+}
+
+static int
+ict_gptn(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ struct strioctl istr;
+ cred_t *cr;
+ pt_own_t pto;
+ int error, rv;
+ int ptyno;
+
+ /* This operation is only valid for the lx_ptm device. */
+ if (getmajor(fp->f_vnode->v_rdev) != ddi_name_to_major(LX_PTM_DRV))
+ return (set_errno(ENOTTY));
+
+ cr = CRED();
+ pto.pto_ruid = cr->cr_uid;
+ pto.pto_rgid = cr->cr_gid;
+
+ istr.ic_cmd = OWNERPT;
+ istr.ic_len = sizeof (pto);
+ istr.ic_timout = 0;
+ istr.ic_dp = (char *)&pto;
+ error = VOP_IOCTL(fp->f_vnode, I_STR, (intptr_t)&istr,
+ FLFAKE(fp), fp->f_cred, &rv, NULL);
+
+ if (error)
+ return (set_errno((error == ENOTTY) ? error: EACCES));
+
+ ptyno = getminor(fp->f_vnode->v_rdev) - 1;
+ if (copyout(&ptyno, (caddr_t)arg, sizeof (ptyno)))
+ return (set_errno(EFAULT));
+
+ return (0);
+}
+
+static int
+ict_tiocgwinsz(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ int error, rv;
+
+ error = VOP_IOCTL(fp->f_vnode, cmd, arg, FLUSER(fp), fp->f_cred, &rv,
+ NULL);
+
+ /*
+ * A few Linux libc's (e.g. musl) have chosen to implement isatty()
+ * using the TIOCGWINSZ ioctl. Some apps also do the same thing
+ * directly. On Linux that ioctl will return a size of 0x0 for dumb
+ * terminals but on illumos see the handling for TIOCGWINSZ in ptem's
+ * ptioc(). We fail if the winsize is all zeros. To emulate the Linux
+ * behavior use the native ioctl check that we do for isatty and return
+ * a size of 0x0 if that succeeds.
+ */
+ if (error == EINVAL) {
+ int err;
+ struct termio s_tio;
+
+ err = VOP_IOCTL(fp->f_vnode, TCGETA, (intptr_t)&s_tio,
+ FLFAKE(fp), fp->f_cred, &rv, NULL);
+
+ if (err == 0) {
+ struct winsize w;
+
+ bzero(&w, sizeof (w));
+ if (copyout(&w, (struct winsize *)arg, sizeof (w)) != 0)
+ return (set_errno(EFAULT));
+ return (0);
+ }
+ }
+
+ if (error != 0)
+ return (set_errno(error));
+
+ return (0);
+}
+
+static int
+ict_tiocsctty(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ pid_t ttysid, mysid;
+ int error, rv;
+ proc_t *p = curproc;
+
+ /* getsid */
+ mutex_enter(&p->p_splock);
+ mysid = p->p_sessp->s_sid;
+ mutex_exit(&p->p_splock);
+
+ /*
+ * Report success if we already control the tty.
+ * If no one controls it, TIOCSCTTY will change that later.
+ */
+ error = VOP_IOCTL(fp->f_vnode, TIOCGSID, (intptr_t)&ttysid,
+ FLFAKE(fp), fp->f_cred, &rv, NULL);
+ if (error == 0 && ttysid == mysid)
+ return (0);
+
+ /*
+ * Need to make sure we're a session leader, otherwise the
+ * TIOCSCTTY ioctl will fail.
+ */
+ mutex_enter(&pidlock);
+ if (p->p_sessp->s_sidp != p->p_pidp && !pgmembers(p->p_pid)) {
+ mutex_exit(&pidlock);
+ sess_create();
+ } else {
+ mutex_exit(&pidlock);
+ }
+
+ error = VOP_IOCTL(fp->f_vnode, cmd, 0, FLUSER(fp),
+ fp->f_cred, &rv, NULL);
+ return ((error != 0) ? set_errno(error) : 0);
+}
+
+/* Socket-related translators */
+
+static int
+ict_siocatmark(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ vnode_t *vp = fp->f_vnode;
+ int error, rv;
+ /*
+ * Linux expects a SIOCATMARK of a UDP socket to return ENOTTY, while
+ * Illumos allows it. Linux prior to 2.6.39 returned EINVAL for this.
+ */
+ if (vp->v_type != VSOCK || VTOSO(vp)->so_type != SOCK_STREAM)
+ return (set_errno(ENOTTY));
+
+ error = VOP_IOCTL(fp->f_vnode, cmd, arg, FLUSER(fp), fp->f_cred, &rv,
+ NULL);
+ if (error)
+ return (set_errno(error));
+
+ return (0);
+}
+
+static int
+ict_if_ioctl(vnode_t *vn, int cmd, intptr_t arg, int flags, cred_t *cred)
+{
+ int error, rv;
+ lx_zone_data_t *lxzd = ztolxzd(curproc->p_zone);
+ ksocket_t ks;
+
+ ASSERT(lxzd != NULL);
+
+ /*
+ * For ioctls of this type, we are strict about address family
+ * whereas Linux is lenient. This strictness can be avoided by using
+ * an internal AF_INET ksocket, which we use if the family is anything
+ * but AF_PACKET.
+ */
+ if (vn->v_type == VSOCK && VTOSO(vn)->so_family == AF_PACKET)
+ return (VOP_IOCTL(vn, cmd, arg, flags, cred, &rv, NULL));
+
+ mutex_enter(&lxzd->lxzd_lock);
+ ks = lxzd->lxzd_ioctl_sock;
+ if (ks == NULL) {
+ /*
+ * Linux is not at all picky about address family when it comes
+ * to supporting interface-related ioctls. To mimic this
+ * behavior, we'll attempt those ioctls against a ksocket
+ * configured for that purpose.
+ */
+ (void) ksocket_socket(&lxzd->lxzd_ioctl_sock, AF_INET,
+ SOCK_DGRAM, 0, 0, curproc->p_zone->zone_kcred);
+ ks = lxzd->lxzd_ioctl_sock;
+ }
+ mutex_exit(&lxzd->lxzd_lock);
+
+ if (ks != NULL) {
+ error = ksocket_ioctl(ks, cmd, arg, &rv, cred);
+ } else {
+ error = VOP_IOCTL(vn, cmd, arg, flags, cred, &rv, NULL);
+ }
+
+ return (error);
+}
+
+static int
+ict_sioghwaddr(file_t *fp, struct lifreq *lreq)
+{
+ struct sockaddr_dl *sdl = (struct sockaddr_dl *)&lreq->lifr_addr;
+ struct sockaddr hwaddr;
+ int error, size;
+
+ error = ict_if_ioctl(fp->f_vnode, SIOCGLIFHWADDR, (intptr_t)lreq,
+ FLFAKE(fp), fp->f_cred);
+
+ if (error == EADDRNOTAVAIL &&
+ strncmp(lreq->lifr_name, "lo", 2) == 0) {
+ /* Emulate success on suspected loopbacks */
+ sdl->sdl_type = DL_LOOP;
+ sdl->sdl_alen = ETHERADDRL;
+ bzero(LLADDR(sdl), sdl->sdl_alen);
+ error = 0;
+ }
+
+ if (error == 0) {
+ bzero(&hwaddr, sizeof (hwaddr));
+ lx_stol_hwaddr(sdl, &hwaddr, &size);
+ bcopy(&hwaddr, &lreq->lifr_addr,
+ size + sizeof (sdl->sdl_family));
+ }
+
+ return (error);
+}
+
+static int
+ict_siocgifname(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ struct ifreq req;
+ int len;
+ char name[LIFNAMSIZ];
+ netstack_t *ns;
+ ip_stack_t *ipst;
+ phyint_t *phyi;
+
+ if (fp->f_vnode->v_type != VSOCK) {
+ return (set_errno(EINVAL));
+ }
+
+ len = (curproc->p_model == DATAMODEL_LP64) ? sizeof (lx_ifreq64_t) :
+ sizeof (lx_ifreq32_t);
+ if (copyin((struct ifreq *)arg, &req, len) != 0) {
+ return (set_errno(EFAULT));
+ }
+
+ /*
+ * Since Linux calls this ioctl on all sorts of sockets, perform the
+ * interface name lookup manually.
+ */
+ if ((ns = netstack_get_current()) == NULL) {
+ return (set_errno(EINVAL));
+ }
+ ipst = ns->netstack_ip;
+
+ rw_enter(&ipst->ips_ill_g_lock, RW_READER);
+ phyi = avl_find(&ipst->ips_phyint_g_list->phyint_list_avl_by_index,
+ (void *) &req.ifr_index, NULL);
+ if (phyi != NULL) {
+ strncpy(name, phyi->phyint_name, LIFNAMSIZ);
+ lx_ifname_convert(name, LX_IF_FROMNATIVE);
+ } else {
+ name[0] = '\0';
+ }
+
+ rw_exit(&ipst->ips_ill_g_lock);
+ netstack_rele(ns);
+
+ if (strlen(name) != 0) {
+ /* Truncate for ifreq and copyout */
+ strncpy(req.ifr_name, name, IFNAMSIZ);
+ if (copyout(&req, (struct ifreq *)arg, len) != 0) {
+ return (set_errno(EFAULT));
+ }
+ return (0);
+ }
+
+ return (set_errno(EINVAL));
+}
+
+static int
+ict_siolifreq(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ struct ifreq req;
+ struct lifreq lreq;
+ int error, len;
+
+ /* Convert from Linux ifreq to illumos lifreq */
+ if (curproc->p_model == DATAMODEL_LP64)
+ len = sizeof (lx_ifreq64_t);
+ else
+ len = sizeof (lx_ifreq32_t);
+ if (copyin((struct ifreq *)arg, &req, len) != 0)
+ return (set_errno(EFAULT));
+ bzero(&lreq, sizeof (lreq));
+ strncpy(lreq.lifr_name, req.ifr_name, IFNAMSIZ);
+ bcopy(&req.ifr_ifru, &lreq.lifr_lifru, len - IFNAMSIZ);
+ lx_ifname_convert(lreq.lifr_name, LX_IF_TONATIVE);
+
+ switch (cmd) {
+ case SIOCGIFADDR:
+ case SIOCSIFADDR:
+ case SIOCGIFDSTADDR:
+ case SIOCSIFDSTADDR:
+ case SIOCGIFBRDADDR:
+ case SIOCSIFBRDADDR:
+ case SIOCGIFNETMASK:
+ case SIOCSIFNETMASK:
+ case SIOCGIFMETRIC:
+ case SIOCSIFMETRIC:
+ case SIOCGIFMTU:
+ case SIOCSIFMTU:
+ /*
+ * Convert cmd from SIO*IF* to SIO*LIF*.
+ * This is needed since Linux allows ifreq operations on ipv6
+ * sockets where illumos does not.
+ */
+ cmd = ((cmd & IOC_INOUT) |
+ _IOW('i', ((cmd & 0xff) + 100), struct lifreq));
+ error = ict_if_ioctl(fp->f_vnode, cmd, (intptr_t)&lreq,
+ FLFAKE(fp), fp->f_cred);
+ break;
+ case SIOCGIFINDEX:
+ cmd = SIOCGLIFINDEX;
+ error = ict_if_ioctl(fp->f_vnode, cmd, (intptr_t)&lreq,
+ FLFAKE(fp), fp->f_cred);
+ break;
+ case SIOCGIFFLAGS:
+ cmd = SIOCGLIFFLAGS;
+ error = ict_if_ioctl(fp->f_vnode, cmd, (intptr_t)&lreq,
+ FLFAKE(fp), fp->f_cred);
+ if (error == 0)
+ lx_ifflags_convert(&lreq.lifr_flags, LX_IF_FROMNATIVE);
+ break;
+ case SIOCSIFFLAGS:
+ cmd = SIOCSLIFFLAGS;
+ lx_ifflags_convert(&lreq.lifr_flags, LX_IF_TONATIVE);
+ error = ict_if_ioctl(fp->f_vnode, cmd, (intptr_t)&lreq,
+ FLFAKE(fp), fp->f_cred);
+ break;
+ case SIOCGIFHWADDR:
+ error = ict_sioghwaddr(fp, &lreq);
+ break;
+ case LX_SIOCGIFTXQLEN:
+ /*
+ * Illumos lacks the notion of txqlen. Confirm the provided
+ * interface is valid with SIOCGLIFINDEX and return a fake
+ * txqlen of 1. Loopback devices will report txqlen of 0.
+ */
+ if (strncmp(lreq.lifr_name, "lo", 2) == 0) {
+ lreq.lifr_index = 0;
+ error = 0;
+ break;
+ }
+ cmd = SIOCGLIFINDEX;
+ error = ict_if_ioctl(fp->f_vnode, cmd, (intptr_t)&lreq,
+ FLFAKE(fp), fp->f_cred);
+ if (error == 0) {
+ /* lifr_index aliases to the qlen field */
+ lreq.lifr_index = 1;
+ }
+ break;
+ case LX_SIOCSIFHWADDR:
+ /*
+ * We're not going to support SIOCSIFHWADDR, but we need to be
+ * able to check the result of the copyin first to see if the
+ * command should have returned EFAULT.
+ */
+ default:
+ error = EINVAL;
+ }
+
+ if (error != 0)
+ return (set_errno(error));
+
+ /* Convert back to a Linux ifreq */
+ lx_ifname_convert(lreq.lifr_name, LX_IF_FROMNATIVE);
+ bzero(&req, sizeof (req));
+ strncpy(req.ifr_name, lreq.lifr_name, IFNAMSIZ);
+ bcopy(&lreq.lifr_lifru, &req.ifr_ifru, len - IFNAMSIZ);
+
+ if (copyout(&req, (struct lifreq *)arg, len) != 0)
+ return (set_errno(EFAULT));
+
+ return (0);
+}
+
+static int
+ict_siocgifconf32(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ lx_ifconf32_t conf;
+ lx_ifreq32_t *oreq;
+ struct ifconf sconf;
+ int ifcount, error, i, buf_len;
+
+ if (copyin((lx_ifconf32_t *)arg, &conf, sizeof (conf)) != 0)
+ return (set_errno(EFAULT));
+
+ /* They want to know how many interfaces there are. */
+ if (conf.if_len <= 0 || conf.if_buf == NULL) {
+ error = ict_if_ioctl(fp->f_vnode, SIOCGIFNUM,
+ (intptr_t)&ifcount, FLFAKE(fp), fp->f_cred);
+ if (error != 0)
+ return (set_errno(error));
+
+ conf.if_len = ifcount * sizeof (lx_ifreq32_t);
+
+ if (copyout(&conf, (lx_ifconf32_t *)arg, sizeof (conf)) != 0)
+ return (set_errno(EFAULT));
+ return (0);
+ } else {
+ ifcount = conf.if_len / sizeof (lx_ifreq32_t);
+ }
+
+ /* Get interface configuration list. */
+ sconf.ifc_len = ifcount * sizeof (struct ifreq);
+ sconf.ifc_req = (struct ifreq *)kmem_alloc(sconf.ifc_len, KM_SLEEP);
+
+ error = ict_if_ioctl(fp->f_vnode, cmd, (intptr_t)&sconf, FLFAKE(fp),
+ fp->f_cred);
+ if (error != 0) {
+ kmem_free(sconf.ifc_req, ifcount * sizeof (struct ifreq));
+ return (set_errno(error));
+ }
+
+ /* Convert data to Linux format & rename interfaces */
+ buf_len = ifcount * sizeof (lx_ifreq32_t);
+ oreq = (lx_ifreq32_t *)kmem_alloc(buf_len, KM_SLEEP);
+ for (i = 0; i < sconf.ifc_len / sizeof (struct ifreq); i++) {
+ bcopy(&sconf.ifc_req[i], oreq + i, sizeof (lx_ifreq32_t));
+ lx_ifname_convert(oreq[i].ifr_name, LX_IF_FROMNATIVE);
+ }
+ conf.if_len = i * sizeof (*oreq);
+ kmem_free(sconf.ifc_req, ifcount * sizeof (struct ifreq));
+
+ error = 0;
+ if (copyout(oreq, (caddr_t)(uintptr_t)conf.if_buf, conf.if_len) != 0 ||
+ copyout(&conf, (lx_ifconf32_t *)arg, sizeof (conf)) != 0)
+ error = set_errno(EFAULT);
+
+ kmem_free(oreq, buf_len);
+ return (error);
+}
+
+static int
+ict_siocgifconf64(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ lx_ifconf64_t conf;
+ lx_ifreq64_t *oreq;
+ struct ifconf sconf;
+ int ifcount, error, i, buf_len;
+
+ if (copyin((lx_ifconf64_t *)arg, &conf, sizeof (conf)) != 0)
+ return (set_errno(EFAULT));
+
+ /* They want to know how many interfaces there are. */
+ if (conf.if_len <= 0 || conf.if_buf == NULL) {
+ error = ict_if_ioctl(fp->f_vnode, SIOCGIFNUM,
+ (intptr_t)&ifcount, FLFAKE(fp), fp->f_cred);
+ if (error != 0)
+ return (set_errno(error));
+
+ conf.if_len = ifcount * sizeof (lx_ifreq64_t);
+
+ if (copyout(&conf, (lx_ifconf64_t *)arg, sizeof (conf)) != 0)
+ return (set_errno(EFAULT));
+ return (0);
+ } else {
+ ifcount = conf.if_len / sizeof (lx_ifreq64_t);
+ }
+
+ /* Get interface configuration list. */
+ sconf.ifc_len = ifcount * sizeof (struct ifreq);
+ sconf.ifc_req = (struct ifreq *)kmem_alloc(sconf.ifc_len, KM_SLEEP);
+
+ error = ict_if_ioctl(fp->f_vnode, cmd, (intptr_t)&sconf, FLFAKE(fp),
+ fp->f_cred);
+ if (error != 0) {
+ kmem_free(sconf.ifc_req, ifcount * sizeof (struct ifreq));
+ return (set_errno(error));
+ }
+
+ /* Convert data to Linux format & rename interfaces */
+ buf_len = ifcount * sizeof (lx_ifreq64_t);
+ oreq = (lx_ifreq64_t *)kmem_alloc(buf_len, KM_SLEEP);
+ for (i = 0; i < sconf.ifc_len / sizeof (struct ifreq); i++) {
+ bcopy(&sconf.ifc_req[i], oreq + i, sizeof (lx_ifreq64_t));
+ lx_ifname_convert(oreq[i].ifr_name, LX_IF_FROMNATIVE);
+ }
+ conf.if_len = i * sizeof (*oreq);
+ kmem_free(sconf.ifc_req, ifcount * sizeof (struct ifreq));
+
+ error = 0;
+ if (copyout(oreq, (caddr_t)(uintptr_t)conf.if_buf, conf.if_len) != 0 ||
+ copyout(&conf, (lx_ifconf64_t *)arg, sizeof (conf)) != 0)
+ error = set_errno(EFAULT);
+
+ kmem_free(oreq, buf_len);
+ return (error);
+}
+
+static int
+ict_siocgifconf(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ if (curproc->p_model == DATAMODEL_LP64)
+ return (ict_siocgifconf64(fp, cmd, arg, lxcmd));
+ else
+ return (ict_siocgifconf32(fp, cmd, arg, lxcmd));
+}
+
+/*
+ * Unfortunately some of the autofs ioctls want to return a positive integer
+ * result which does not indicate an error. To minimize disruption in the
+ * rest of the code, we'll treat a positive return as an errno and a negative
+ * return as the non-error return (which we then negate).
+ */
+static int
+ict_autofs(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ int res = 0;
+ int rv;
+
+ res = VOP_IOCTL(fp->f_vnode, cmd, arg, FLUSER(fp), fp->f_cred, &rv,
+ NULL);
+ if (res > 0)
+ return (set_errno(res));
+ if (res == 0)
+ return (0);
+ return (-res);
+}
+
+/* Structure used to define an ioctl translator. */
+typedef struct lx_ioc_cmd_translator {
+ int lict_lxcmd;
+ int lict_cmd;
+ int (*lict_func)(file_t *fp, int cmd, intptr_t arg, int lxcmd);
+} lx_ioc_cmd_translator_t;
+
+#define LX_IOC_CMD_TRANSLATOR_PASS(ioc_cmd_sym) \
+ { (int)LX_##ioc_cmd_sym, (int)ioc_cmd_sym, ict_pass },
+
+#define LX_IOC_CMD_TRANSLATOR_FILTER(ioc_cmd_sym, ioct_handler) \
+ { (int)LX_##ioc_cmd_sym, (int)ioc_cmd_sym, ioct_handler },
+
+#define LX_IOC_CMD_TRANSLATOR_CUSTOM(ioc_cmd_sym, ioct_handler) \
+ { (int)ioc_cmd_sym, (int)ioc_cmd_sym, ioct_handler },
+
+#define LX_IOC_CMD_TRANSLATOR_PTHRU(ioc_cmd_sym) \
+ { (int)ioc_cmd_sym, (int)ioc_cmd_sym, ict_pass },
+
+#define LX_IOC_CMD_TRANSLATOR_END \
+ {0, 0, NULL}
+
+static lx_ioc_cmd_translator_t lx_ioc_xlate_fd[] = {
+ LX_IOC_CMD_TRANSLATOR_FILTER(FIONBIO, ict_fionbio)
+ LX_IOC_CMD_TRANSLATOR_FILTER(FIONREAD, ict_fionread)
+ LX_IOC_CMD_TRANSLATOR_PASS(FIOASYNC)
+
+ /* streams related */
+ LX_IOC_CMD_TRANSLATOR_PASS(TCXONC)
+ LX_IOC_CMD_TRANSLATOR_PASS(TCFLSH)
+ LX_IOC_CMD_TRANSLATOR_PASS(TIOCEXCL)
+ LX_IOC_CMD_TRANSLATOR_PASS(TIOCNXCL)
+ LX_IOC_CMD_TRANSLATOR_PASS(TIOCSTI)
+ LX_IOC_CMD_TRANSLATOR_PASS(TIOCSWINSZ)
+ LX_IOC_CMD_TRANSLATOR_PASS(TIOCMBIS)
+ LX_IOC_CMD_TRANSLATOR_PASS(TIOCMBIC)
+ LX_IOC_CMD_TRANSLATOR_PASS(TIOCMSET)
+ LX_IOC_CMD_TRANSLATOR_PASS(TIOCSETD)
+ LX_IOC_CMD_TRANSLATOR_PASS(TCSBRK)
+
+ /* terminal related */
+ LX_IOC_CMD_TRANSLATOR_PASS(TIOCGETD)
+ LX_IOC_CMD_TRANSLATOR_PASS(TIOCGSID)
+ LX_IOC_CMD_TRANSLATOR_PASS(TIOCNOTTY)
+ LX_IOC_CMD_TRANSLATOR_PASS(TIOCPKT)
+
+ LX_IOC_CMD_TRANSLATOR_FILTER(TCSETS, ict_tcsets)
+ LX_IOC_CMD_TRANSLATOR_FILTER(TCSETSW, ict_tcsets)
+ LX_IOC_CMD_TRANSLATOR_FILTER(TCSETSF, ict_tcsets)
+ LX_IOC_CMD_TRANSLATOR_FILTER(TCSETA, ict_tcseta)
+ LX_IOC_CMD_TRANSLATOR_FILTER(TCSETAW, ict_tcseta)
+ LX_IOC_CMD_TRANSLATOR_FILTER(TCSETAF, ict_tcseta)
+ LX_IOC_CMD_TRANSLATOR_FILTER(TCGETS, ict_tcgets)
+ LX_IOC_CMD_TRANSLATOR_FILTER(TCGETA, ict_tcgeta)
+ LX_IOC_CMD_TRANSLATOR_FILTER(TIOCGWINSZ, ict_tiocgwinsz)
+ LX_IOC_CMD_TRANSLATOR_CUSTOM(LX_TCSBRKP, ict_tcsbrkp)
+ LX_IOC_CMD_TRANSLATOR_FILTER(TIOCSPGRP, ict_tiocspgrp)
+ LX_IOC_CMD_TRANSLATOR_FILTER(TIOCGPGRP, ict_tiocgpgrp)
+ LX_IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCSPTLCK, ict_sptlock)
+ LX_IOC_CMD_TRANSLATOR_CUSTOM(LX_TIOCGPTN, ict_gptn)
+ LX_IOC_CMD_TRANSLATOR_FILTER(TIOCSCTTY, ict_tiocsctty)
+
+ LX_IOC_CMD_TRANSLATOR_END
+};
+
+static lx_ioc_cmd_translator_t lx_ioc_xlate_socket[] = {
+ LX_IOC_CMD_TRANSLATOR_PASS(FIOGETOWN)
+
+ LX_IOC_CMD_TRANSLATOR_PASS(SIOCSPGRP)
+ LX_IOC_CMD_TRANSLATOR_PASS(SIOCGPGRP)
+ LX_IOC_CMD_TRANSLATOR_PASS(SIOCGSTAMP)
+ LX_IOC_CMD_TRANSLATOR_FILTER(SIOCATMARK, ict_siocatmark)
+ LX_IOC_CMD_TRANSLATOR_FILTER(SIOCGIFFLAGS, ict_siolifreq)
+ LX_IOC_CMD_TRANSLATOR_FILTER(SIOCSIFFLAGS, ict_siolifreq)
+ LX_IOC_CMD_TRANSLATOR_FILTER(SIOCGIFADDR, ict_siolifreq)
+ LX_IOC_CMD_TRANSLATOR_FILTER(SIOCSIFADDR, ict_siolifreq)
+ LX_IOC_CMD_TRANSLATOR_FILTER(SIOCGIFDSTADDR, ict_siolifreq)
+ LX_IOC_CMD_TRANSLATOR_FILTER(SIOCSIFDSTADDR, ict_siolifreq)
+ LX_IOC_CMD_TRANSLATOR_FILTER(SIOCGIFBRDADDR, ict_siolifreq)
+ LX_IOC_CMD_TRANSLATOR_FILTER(SIOCSIFBRDADDR, ict_siolifreq)
+ LX_IOC_CMD_TRANSLATOR_FILTER(SIOCGIFNETMASK, ict_siolifreq)
+ LX_IOC_CMD_TRANSLATOR_FILTER(SIOCSIFNETMASK, ict_siolifreq)
+ LX_IOC_CMD_TRANSLATOR_FILTER(SIOCGIFMETRIC, ict_siolifreq)
+ LX_IOC_CMD_TRANSLATOR_FILTER(SIOCSIFMETRIC, ict_siolifreq)
+ LX_IOC_CMD_TRANSLATOR_FILTER(SIOCGIFMTU, ict_siolifreq)
+ LX_IOC_CMD_TRANSLATOR_FILTER(SIOCSIFMTU, ict_siolifreq)
+ LX_IOC_CMD_TRANSLATOR_FILTER(SIOCGIFHWADDR, ict_siolifreq)
+ LX_IOC_CMD_TRANSLATOR_CUSTOM(LX_SIOCSIFHWADDR, ict_siolifreq)
+ LX_IOC_CMD_TRANSLATOR_FILTER(SIOCGIFINDEX, ict_siolifreq)
+ LX_IOC_CMD_TRANSLATOR_CUSTOM(LX_SIOCGIFTXQLEN, ict_siolifreq)
+ LX_IOC_CMD_TRANSLATOR_FILTER(SIOCGIFCONF, ict_siocgifconf)
+ LX_IOC_CMD_TRANSLATOR_CUSTOM(LX_SIOCGIFNAME, ict_siocgifname)
+
+ LX_IOC_CMD_TRANSLATOR_END
+};
+
+static lx_ioc_cmd_translator_t lx_ioc_xlate_dtrace[] = {
+ LX_IOC_CMD_TRANSLATOR_PTHRU(DTRACEHIOC_ADD)
+ LX_IOC_CMD_TRANSLATOR_PTHRU(DTRACEHIOC_REMOVE)
+ LX_IOC_CMD_TRANSLATOR_PTHRU(DTRACEHIOC_ADDDOF)
+
+ LX_IOC_CMD_TRANSLATOR_END
+};
+
+static lx_ioc_cmd_translator_t lx_ioc_xlate_autofs[] = {
+ LX_IOC_CMD_TRANSLATOR_PTHRU(LX_AUTOFS_IOC_READY)
+ LX_IOC_CMD_TRANSLATOR_PTHRU(LX_AUTOFS_IOC_FAIL)
+ LX_IOC_CMD_TRANSLATOR_PTHRU(LX_AUTOFS_IOC_CATATONIC)
+ LX_IOC_CMD_TRANSLATOR_PTHRU(LX_AUTOFS_IOC_PROTOVER)
+ LX_IOC_CMD_TRANSLATOR_PTHRU(LX_AUTOFS_IOC_SETTIMEOUT)
+ LX_IOC_CMD_TRANSLATOR_PTHRU(LX_AUTOFS_IOC_EXPIRE)
+ LX_IOC_CMD_TRANSLATOR_PTHRU(LX_AUTOFS_IOC_EXPIRE_MULTI)
+ LX_IOC_CMD_TRANSLATOR_PTHRU(LX_AUTOFS_IOC_PROTOSUBVER)
+ LX_IOC_CMD_TRANSLATOR_PTHRU(LX_AUTOFS_IOC_ASKUMOUNT)
+
+ LX_IOC_CMD_TRANSLATOR_PTHRU(LX_AUTOFS_DEV_IOC_VERSION_CMD)
+ LX_IOC_CMD_TRANSLATOR_PTHRU(LX_AUTOFS_DEV_IOC_PROTOVER_CMD)
+ LX_IOC_CMD_TRANSLATOR_PTHRU(LX_AUTOFS_DEV_IOC_PROTOSUBVER_CMD)
+ LX_IOC_CMD_TRANSLATOR_PTHRU(LX_AUTOFS_DEV_IOC_OPENMOUNT_CMD)
+ LX_IOC_CMD_TRANSLATOR_PTHRU(LX_AUTOFS_DEV_IOC_CLOSEMOUNT_CMD)
+ LX_IOC_CMD_TRANSLATOR_PTHRU(LX_AUTOFS_DEV_IOC_READY_CMD)
+ LX_IOC_CMD_TRANSLATOR_PTHRU(LX_AUTOFS_DEV_IOC_FAIL_CMD)
+ LX_IOC_CMD_TRANSLATOR_PTHRU(LX_AUTOFS_DEV_IOC_SETPIPEFD_CMD)
+ LX_IOC_CMD_TRANSLATOR_PTHRU(LX_AUTOFS_DEV_IOC_CATATONIC_CMD)
+ LX_IOC_CMD_TRANSLATOR_PTHRU(LX_AUTOFS_DEV_IOC_TIMEOUT_CMD)
+ LX_IOC_CMD_TRANSLATOR_PTHRU(LX_AUTOFS_DEV_IOC_REQUESTER_CMD)
+ LX_IOC_CMD_TRANSLATOR_PTHRU(LX_AUTOFS_DEV_IOC_EXPIRE_CMD)
+ LX_IOC_CMD_TRANSLATOR_PTHRU(LX_AUTOFS_DEV_IOC_ASKUMOUNT_CMD)
+ LX_IOC_CMD_TRANSLATOR_CUSTOM(LX_AUTOFS_DEV_IOC_ISMOUNTPOINT_CMD,
+ ict_autofs)
+
+ LX_IOC_CMD_TRANSLATOR_END
+};
+
+static lx_ioc_cmd_translator_t lx_ioc_xlate_hd[] = {
+ LX_IOC_CMD_TRANSLATOR_CUSTOM(LX_HDIO_GETGEO, ict_hdgetgeo)
+
+ LX_IOC_CMD_TRANSLATOR_END
+};
+
+static lx_ioc_cmd_translator_t lx_ioc_xlate_blk[] = {
+ LX_IOC_CMD_TRANSLATOR_CUSTOM(LX_BLKGETSIZE, ict_blkgetsize)
+ LX_IOC_CMD_TRANSLATOR_CUSTOM(LX_BLKSSZGET, ict_blkgetssize)
+ LX_IOC_CMD_TRANSLATOR_CUSTOM(LX_BLKGETSIZE64, ict_blkgetsize64)
+
+ LX_IOC_CMD_TRANSLATOR_END
+};
+
+static void
+lx_ioctl_vsd_free(void *data)
+{
+ kmem_free(data, sizeof (struct lx_cc));
+}
+
+void
+lx_ioctl_init()
+{
+ vsd_create(&lx_ioctl_vsd, lx_ioctl_vsd_free);
+}
+
+void
+lx_ioctl_fini()
+{
+ vsd_destroy(&lx_ioctl_vsd);
+}
+
+long
+lx_ioctl(int fdes, int cmd, intptr_t arg)
+{
+ file_t *fp;
+ int res = 0, error = ENOTTY;
+ lx_ioc_cmd_translator_t *ict = NULL;
+
+ if (cmd == LX_FIOCLEX || cmd == LX_FIONCLEX) {
+ res = f_setfd_error(fdes, (cmd == LX_FIOCLEX) ? FD_CLOEXEC : 0);
+ return ((res != 0) ? set_errno(res) : 0);
+ }
+
+ if ((fp = getf(fdes)) == NULL)
+ return (set_errno(EBADF));
+
+ switch ((cmd & 0xff00) >> 8) {
+ case LX_IOC_TYPE_FD:
+ ict = lx_ioc_xlate_fd;
+ break;
+
+ case LX_IOC_TYPE_DTRACE:
+ ict = lx_ioc_xlate_dtrace;
+ break;
+
+ case LX_IOC_TYPE_SOCK:
+ ict = lx_ioc_xlate_socket;
+ error = EOPNOTSUPP;
+ break;
+
+ case LX_IOC_TYPE_AUTOFS:
+ ict = lx_ioc_xlate_autofs;
+ break;
+
+ case LX_IOC_TYPE_BLK:
+ ict = lx_ioc_xlate_blk;
+ break;
+
+ case LX_IOC_TYPE_HD:
+ ict = lx_ioc_xlate_hd;
+ break;
+
+ default:
+ releasef(fdes);
+ return (set_errno(ENOTTY));
+ }
+
+ /*
+ * Today, none of the ioctls supported by the emulation possess
+ * overlapping cmd values. Because of that, no type interrogation of
+ * the fd is done before executing specific ioctl emulation. It's
+ * assumed that the vnode-specific logic called by the emulation
+ * function will reject ioctl commands not supported by the fd.
+ */
+ VERIFY(ict != NULL);
+ while (ict->lict_func != NULL) {
+ if (ict->lict_lxcmd == cmd)
+ break;
+ ict++;
+ }
+ if (ict->lict_func == NULL) {
+ releasef(fdes);
+ return (set_errno(error));
+ }
+
+ res = ict->lict_func(fp, ict->lict_cmd, arg, ict->lict_lxcmd);
+ releasef(fdes);
+ return (res);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_ioprio.c b/usr/src/uts/common/brand/lx/syscall/lx_ioprio.c
new file mode 100644
index 0000000000..13397e199e
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_ioprio.c
@@ -0,0 +1,66 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/errno.h>
+#include <sys/systm.h>
+#include <sys/lx_brand.h>
+
+/* 'which' values. */
+#define LX_IOPRIO_WHO_PROCESS 1
+#define LX_IOPRIO_WHO_PGRP 2
+#define LX_IOPRIO_WHO_USER 3
+
+/*
+ * The possible values for the class. We report best effort (BE) as the class
+ * in use.
+ */
+#define LX_IOPRIO_CLASS_RT 1
+#define LX_IOPRIO_CLASS_BE 2
+#define LX_IOPRIO_CLASS_IDLE 3
+
+/* Macro to determine the class from the input mask */
+#define LX_IOPRIO_PRIO_CLASS(m) ((m) >> 13)
+
+/* ARGSUSED */
+long
+lx_ioprio_get(int which, int who)
+{
+ if (which < LX_IOPRIO_WHO_PROCESS || which > LX_IOPRIO_WHO_USER)
+ return (set_errno(EINVAL));
+
+ return (LX_IOPRIO_CLASS_BE);
+}
+
+/*
+ * We allow setting any valid class, even though it's ignored.
+ * We ignore the 'who' parameter which means that we're not searching for
+ * the specified target in order to return a specific errno in the case that
+ * the target does not exist.
+ */
+/* ARGSUSED */
+long
+lx_ioprio_set(int which, int who, int mask)
+{
+ int class;
+
+ if (which < LX_IOPRIO_WHO_PROCESS || which > LX_IOPRIO_WHO_USER)
+ return (set_errno(EINVAL));
+
+ class = LX_IOPRIO_PRIO_CLASS(mask);
+ if (class < LX_IOPRIO_CLASS_RT || class > LX_IOPRIO_CLASS_IDLE)
+ return (set_errno(EINVAL));
+
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_kill.c b/usr/src/uts/common/brand/lx/syscall/lx_kill.c
new file mode 100644
index 0000000000..eeed914566
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_kill.c
@@ -0,0 +1,402 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/proc.h>
+#include <sys/zone.h>
+#include <sys/thread.h>
+#include <sys/signal.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <lx_signum.h>
+#include <sys/contract/process_impl.h>
+
+extern int kill(pid_t, int);
+
+/*
+ * Check if it is legal to send this signal to the init process. Linux
+ * kill(2) semantics dictate that no _unhandled_ signal may be sent to pid
+ * 1.
+ */
+static int
+init_sig_check(int sig, pid_t pid)
+{
+ proc_t *p;
+ int rv = 0;
+
+ mutex_enter(&pidlock);
+
+ if (((p = prfind(pid)) == NULL) || (p->p_stat == SIDL))
+ rv = ESRCH;
+ else if (sig && (sigismember(&cantmask, sig) ||
+ (PTOU(p)->u_signal[sig-1] == SIG_DFL) ||
+ (PTOU(p)->u_signal[sig-1] == SIG_IGN)))
+ rv = EPERM;
+
+ mutex_exit(&pidlock);
+
+ return (rv);
+}
+
+static long
+lx_thrkill(pid_t tgid, pid_t pid, int lx_sig, boolean_t tgkill)
+{
+ kthread_t *t;
+ proc_t *pp, *cp = curproc;
+ pid_t initpid;
+ sigqueue_t *sqp;
+ int tid = 1; /* default tid */
+ int sig, rv;
+
+ /*
+ * Unlike kill(2), Linux tkill(2) doesn't allow signals to
+ * be sent to process IDs <= 0 as it doesn't overlay any special
+ * semantics on the pid.
+ */
+ if ((pid <= 0) || ((lx_sig < 0) || (lx_sig > LX_NSIG)) ||
+ ((sig = ltos_signo[lx_sig]) < 0))
+ return (set_errno(EINVAL));
+
+ /*
+ * If the Linux pid is 1, translate the pid to the actual init
+ * pid for the zone. Note that Linux dictates that no unhandled
+ * signals may be sent to init, so check for that, too.
+ *
+ * Otherwise, extract the tid and real pid from the Linux pid.
+ */
+ initpid = cp->p_zone->zone_proc_initpid;
+ if (pid == 1)
+ pid = initpid;
+ if ((pid == initpid) && ((rv = init_sig_check(sig, pid)) != 0))
+ return (set_errno(rv));
+ else if (lx_lpid_to_spair(pid, &pid, &tid) < 0)
+ return (set_errno(ESRCH));
+
+ if (tgkill && tgid != pid)
+ return (set_errno(ESRCH));
+
+ sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
+
+ /*
+ * Find the process for the passed pid...
+ */
+ mutex_enter(&pidlock);
+ if (((pp = prfind(pid)) == NULL) || (pp->p_stat == SIDL)) {
+ mutex_exit(&pidlock);
+ rv = set_errno(ESRCH);
+ goto free_and_exit;
+ }
+ mutex_enter(&pp->p_lock);
+ mutex_exit(&pidlock);
+
+ /*
+ * Deny permission to send the signal if either of the following
+ * is true:
+ *
+ * + The signal is SIGCONT and the target pid is not in the same
+ * session as the sender
+ *
+ * + prochasprocperm() shows the user lacks sufficient permission
+ * to send the signal to the target pid
+ */
+ if (((sig == SIGCONT) && (pp->p_sessp != cp->p_sessp)) ||
+ (!prochasprocperm(pp, cp, CRED()))) {
+ mutex_exit(&pp->p_lock);
+ rv = set_errno(EPERM);
+ goto free_and_exit;
+ }
+
+ /* check for the tid */
+ if ((t = idtot(pp, tid)) == NULL) {
+ mutex_exit(&pp->p_lock);
+ rv = set_errno(ESRCH);
+ goto free_and_exit;
+ }
+
+ /* a signal of 0 means just check for the existence of the thread */
+ if (lx_sig == 0) {
+ mutex_exit(&pp->p_lock);
+ rv = 0;
+ goto free_and_exit;
+ }
+
+ sqp->sq_info.si_signo = sig;
+ sqp->sq_info.si_code = SI_LWP;
+ sqp->sq_info.si_pid = cp->p_pid;
+ sqp->sq_info.si_zoneid = getzoneid();
+ sqp->sq_info.si_uid = crgetruid(CRED());
+ sigaddqa(pp, t, sqp);
+
+ mutex_exit(&pp->p_lock);
+
+ return (0);
+
+free_and_exit:
+ kmem_free(sqp, sizeof (sigqueue_t));
+ return (rv);
+}
+
+long
+lx_tgkill(pid_t tgid, pid_t pid, int lx_sig)
+{
+ return (lx_thrkill(tgid, pid, lx_sig, B_TRUE));
+}
+
+long
+lx_tkill(pid_t pid, int lx_sig)
+{
+ return (lx_thrkill(0, pid, lx_sig, B_FALSE));
+}
+
+long
+lx_kill(pid_t lx_pid, int lx_sig)
+{
+ pid_t s_pid, initpid;
+ sigsend_t v;
+ zone_t *zone = curproc->p_zone;
+ struct proc *p;
+ int err, sig, nfound;
+
+ if ((lx_sig < 0) || (lx_sig > LX_NSIG) ||
+ ((sig = ltos_signo[lx_sig]) < 0))
+ return (set_errno(EINVAL));
+
+ /*
+ * Since some linux apps rely on init(1M) having PID 1, we
+ * transparently translate 1 to the real init(1M)'s pid. We then
+ * check to be sure that it is legal for this process to send this
+ * signal to init(1M).
+ */
+ initpid = zone->zone_proc_initpid;
+ if (lx_pid == 1) {
+ s_pid = initpid;
+ } else if (lx_pid == 0 || lx_pid == -1) {
+ s_pid = 0;
+ } else if (lx_pid > 0) {
+ if (lx_lpid_to_spair(lx_pid, &s_pid, NULL) != 0) {
+ /*
+ * If we didn't find this pid that means it doesn't
+ * exist in this zone.
+ */
+ return (set_errno(ESRCH));
+ }
+ } else {
+ ASSERT(lx_pid < 0);
+ if (lx_lpid_to_spair(-lx_pid, &s_pid, NULL) != 0) {
+ /*
+ * If we didn't find this pid it means that the
+ * process group leader doesn't exist in this zone.
+ * In this case assuming that the Linux pid is
+ * the same as the Solaris pid will get us the
+ * correct behavior.
+ */
+ s_pid = -lx_pid;
+ }
+ }
+
+ if ((s_pid == initpid) && ((err = init_sig_check(sig, s_pid)) != 0))
+ return (set_errno(err));
+
+ /*
+ * For individual processes, kill() semantics are the same between
+ * Solaris and Linux.
+ */
+ if (lx_pid >= 0)
+ return (kill(s_pid, sig));
+
+ /*
+ * In Solaris, sending a signal to -pid means "send a signal to
+ * everyone in process group pid." In Linux it means "send a
+ * signal to everyone in the group other than init." Sending a
+ * signal to -1 means "send a signal to every process except init
+ * and myself."
+ */
+
+ bzero(&v, sizeof (v));
+ v.sig = sig;
+ v.checkperm = 1;
+ v.sicode = SI_USER;
+ err = 0;
+
+ mutex_enter(&pidlock);
+
+ p = (lx_pid == -1) ? practive : pgfind(s_pid);
+ nfound = 0;
+ while (err == 0 && p != NULL) {
+ if ((p->p_zone == zone) && (p->p_stat != SIDL) &&
+ (p->p_pid != initpid) && (lx_pid < -1 || p != curproc)) {
+ nfound++;
+ err = sigsendproc(p, &v);
+ }
+
+ p = (lx_pid == -1) ? p->p_next : p->p_pglink;
+ }
+ mutex_exit(&pidlock);
+
+ /*
+ * If we found no processes, we'll return ESRCH -- but unlike our
+ * native kill(2), we do not return EPERM if processes are found but
+ * we did not have permission to send any of them a signal.
+ */
+ if (nfound == 0)
+ err = ESRCH;
+
+ return (err ? set_errno(err) : 0);
+}
+
+/*
+ * This handles the unusual case where the user sends a non-queueable signal
+ * through rt_sigqueueinfo. Signals sent with codes that indicate they are
+ * queuable are sent through the sigqueue syscall via the user level function
+ * lx_rt_sigqueueinfo().
+ */
+int
+lx_helper_rt_sigqueueinfo(pid_t tgid, int sig, siginfo_t *uinfo)
+{
+ proc_t *target_proc;
+ pid_t s_pid;
+ zone_t *zone = curproc->p_zone;
+ sigsend_t send;
+ int err;
+ siginfo_t kinfo;
+
+ if (copyin(uinfo, &kinfo, sizeof (siginfo_t)) != 0)
+ return (set_errno(EFAULT));
+ /* Unlike in lx_kill, this process id must be exact, no negatives. */
+ if (tgid == 0)
+ return (set_errno(ESRCH));
+ if (tgid < 0)
+ return (set_errno(EINVAL));
+ /*
+ * Translate init directly, otherwise use the convenient utility
+ * function to translate. Since we're sending to the whole group, we
+ * only need the solaris pid, and not the lwp id.
+ */
+ if (tgid == 1) {
+ s_pid = zone->zone_proc_initpid;
+ } else {
+ if (lx_lpid_to_spair(tgid, &s_pid, NULL) != 0) {
+ /*
+ * If we didn't find this pid that means it doesn't
+ * exist in this zone.
+ */
+ return (set_errno(ESRCH));
+ }
+ }
+ /*
+ * We shouldn't have queuable signals here, those are sent elsewhere by
+ * the usermode handler for this emulated call.
+ */
+ if (!SI_CANQUEUE(kinfo.si_code)) {
+ return (set_errno(EINVAL));
+ }
+ /* Since our signal shouldn't queue, we just call sigsendproc(). */
+ bzero(&send, sizeof (send));
+ send.sig = sig;
+ send.checkperm = 1;
+ send.sicode = kinfo.si_code;
+ send.value = kinfo.si_value;
+
+ mutex_enter(&pidlock);
+ target_proc = prfind(s_pid);
+ err = 0;
+ if (target_proc != NULL) {
+ err = sigsendproc(target_proc, &send);
+ if (err == 0 && send.perm == 0)
+ err = EPERM;
+ } else {
+ err = ESRCH;
+ }
+ mutex_exit(&pidlock);
+
+ return (err ? set_errno(err) : 0);
+}
+
+/*
+ * Unlike the above function, this handles all system calls to rt_tgsigqueue
+ * regardless of si_code.
+ */
+int
+lx_helper_rt_tgsigqueueinfo(pid_t tgid, pid_t tid, int sig, siginfo_t *uinfo)
+{
+ id_t s_tid;
+ pid_t s_pid;
+ proc_t *target_proc;
+ sigqueue_t *sqp;
+ kthread_t *t;
+ siginfo_t kinfo;
+
+ if (copyin(uinfo, &kinfo, sizeof (siginfo_t)) != 0)
+ return (set_errno(EFAULT));
+ if (lx_lpid_to_spair(tid, &s_pid, &s_tid) != 0)
+ return (set_errno(ESRCH));
+ /*
+ * For group leaders, solaris pid == linux pid, so the solaris leader
+ * pid should be the same as the tgid but since the tgid comes in via
+ * the syscall we need to check for an invalid value.
+ */
+ if (s_pid != tgid)
+ return (set_errno(EINVAL));
+
+ mutex_enter(&pidlock);
+ target_proc = prfind(s_pid);
+ if (target_proc != NULL)
+ mutex_enter(&target_proc->p_lock);
+ mutex_exit(&pidlock);
+
+ if (target_proc == NULL) {
+ return (set_errno(ESRCH));
+ }
+ if (sig < 0 || sig >= NSIG)
+ return (set_errno(EINVAL));
+
+ /*
+ * Some code adapted from lwp_kill, duplicated here because we do some
+ * customization to the sq_info field of sqp.
+ */
+ if ((t = idtot(target_proc, s_tid)) == NULL) {
+ mutex_exit(&target_proc->p_lock);
+ return (set_errno(ESRCH));
+ }
+ /* Just checking for existence of the process, not sending a signal. */
+ if (sig == 0) {
+ mutex_exit(&target_proc->p_lock);
+ return (0);
+ }
+ sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
+ sqp->sq_info.si_signo = sig;
+ sqp->sq_info.si_code = kinfo.si_code;
+ sqp->sq_info.si_pid = target_proc->p_pid;
+ sqp->sq_info.si_ctid = PRCTID(target_proc);
+ sqp->sq_info.si_zoneid = getzoneid();
+ sqp->sq_info.si_uid = crgetruid(CRED());
+ sigaddqa(target_proc, t, sqp);
+ mutex_exit(&target_proc->p_lock);
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_link.c b/usr/src/uts/common/brand/lx/syscall/lx_link.c
new file mode 100644
index 0000000000..23e0768581
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_link.c
@@ -0,0 +1,97 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <sys/fcntl.h>
+#include <sys/errno.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/systm.h>
+#include <sys/lx_fcntl.h>
+
+#define LX_LINK_ALLOWED (LX_AT_SYMLINK_FOLLOW | LX_AT_EMPTY_PATH)
+
+static long
+lx_link_common(int ffd, char *from, int tfd, char *to, int flags)
+{
+ int error;
+ vnode_t *fsvp = NULL, *tsvp = NULL;
+ enum symfollow follow = NO_FOLLOW;
+
+ if ((flags & ~LX_LINK_ALLOWED) != 0) {
+ return (set_errno(EINVAL));
+ }
+ if ((flags & LX_AT_EMPTY_PATH) == 0) {
+ char c;
+
+ /*
+ * Check that both 'from' and 'to' names are non-empty if
+ * AT_EMPTY_PATH is not set.
+ */
+ if (copyin(from, &c, sizeof (c)) != 0) {
+ return (set_errno(EFAULT));
+ } else if (c == '\0') {
+ return (set_errno(ENOENT));
+ }
+ if (copyin(to, &c, sizeof (c)) != 0) {
+ return (set_errno(EFAULT));
+ } else if (c == '\0') {
+ return (set_errno(ENOENT));
+ }
+
+ /*
+ * XXX: When our support for LX capabilities improves, ENOENT
+ * should be thrown when a process lacking CAP_DAC_READ_SEARCH
+ * attempts to use the AT_EMPTY_PATH flag.
+ */
+ }
+ if ((flags & LX_AT_SYMLINK_FOLLOW) != 0) {
+ follow = FOLLOW;
+ }
+
+ if ((error = fgetstartvp(ffd, from, &fsvp)) != 0) {
+ goto out;
+ }
+ if ((error = fgetstartvp(tfd, to, &tsvp)) != 0) {
+ goto out;
+ }
+ error = vn_linkat(fsvp, from, follow, tsvp, to, UIO_USERSPACE);
+
+out:
+ if (fsvp != NULL) {
+ VN_RELE(fsvp);
+ }
+ if (tsvp != NULL) {
+ VN_RELE(tsvp);
+ }
+ if (error) {
+ return (set_errno(error));
+ }
+ return (0);
+}
+
+long
+lx_link(char *from, char *to)
+{
+ return (lx_link_common(AT_FDCWD, from, AT_FDCWD, to, 0));
+}
+
+long
+lx_linkat(int ffd, char *from, int tfd, char *to, int flags)
+{
+ ffd = (ffd == LX_AT_FDCWD) ? AT_FDCWD : ffd;
+ tfd = (tfd == LX_AT_FDCWD) ? AT_FDCWD : tfd;
+
+ return (lx_link_common(ffd, from, tfd, to, flags));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_mkdir.c b/usr/src/uts/common/brand/lx/syscall/lx_mkdir.c
new file mode 100644
index 0000000000..2f29f56d5f
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_mkdir.c
@@ -0,0 +1,38 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/fcntl.h>
+#include <sys/lx_fcntl.h>
+
+/*
+ * From "uts/common/syscall/mkdir.c":
+ */
+extern int mkdirat(int, char *, int);
+
+long
+lx_mkdirat(int fd, char *dname, int dmode)
+{
+ if (fd == LX_AT_FDCWD) {
+ fd = AT_FDCWD;
+ }
+
+ return (mkdirat(fd, dname, dmode));
+}
+
+long
+lx_mkdir(char *dname, int dmode)
+{
+ return (mkdirat(AT_FDCWD, dname, dmode));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_modify_ldt.c b/usr/src/uts/common/brand/lx/syscall/lx_modify_ldt.c
new file mode 100644
index 0000000000..aa6e12a7d8
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_modify_ldt.c
@@ -0,0 +1,121 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/segments.h>
+#include <sys/archsystm.h>
+#include <sys/proc.h>
+#include <sys/sysi86.h>
+#include <sys/cmn_err.h>
+#include <sys/lx_ldt.h>
+
+/*
+ * Read the ldt_info structure in from the Linux app, convert it to an ssd
+ * structure, and then call setdscr() to do all the heavy lifting.
+ */
+static int
+write_ldt(void *data, ulong_t count)
+{
+ user_desc_t usd;
+ struct ssd ssd;
+ struct ldt_info ldt_inf;
+ proc_t *pp = curthread->t_procp;
+ int err;
+
+ if (count != sizeof (ldt_inf))
+ return (set_errno(EINVAL));
+
+ if (copyin(data, &ldt_inf, sizeof (ldt_inf)))
+ return (set_errno(EFAULT));
+
+ if (ldt_inf.entry_number >= MAXNLDT)
+ return (set_errno(EINVAL));
+
+ LDT_INFO_TO_DESC(&ldt_inf, &usd);
+ usd_to_ssd(&usd, &ssd, SEL_LDT(ldt_inf.entry_number));
+
+ /*
+ * Get everyone into a safe state before changing the LDT.
+ */
+ if (!holdlwps(SHOLDFORK1))
+ return (set_errno(EINTR));
+
+ err = setdscr(&ssd);
+
+ /*
+ * Release the hounds!
+ */
+ mutex_enter(&pp->p_lock);
+ continuelwps(pp);
+ mutex_exit(&pp->p_lock);
+
+ return (err ? set_errno(err) : 0);
+}
+
+static int
+read_ldt(void *uptr, ulong_t count)
+{
+ proc_t *pp = curproc;
+ int bytes;
+
+ if (pp->p_ldt == NULL)
+ return (0);
+
+ bytes = (pp->p_ldtlimit + 1) * sizeof (user_desc_t);
+ if (bytes > count)
+ bytes = count;
+
+ if (copyout(pp->p_ldt, uptr, bytes))
+ return (set_errno(EFAULT));
+
+ return (bytes);
+}
+
+long
+lx_modify_ldt(int op, void *data, ulong_t count)
+{
+ int rval;
+
+ switch (op) {
+ case 0:
+ rval = read_ldt(data, count);
+ break;
+
+ case 1:
+ rval = write_ldt(data, count);
+ break;
+
+ default:
+ rval = set_errno(ENOSYS);
+ break;
+ }
+
+ return (rval);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_open.c b/usr/src/uts/common/brand/lx/syscall/lx_open.c
new file mode 100644
index 0000000000..431c2ed1ba
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_open.c
@@ -0,0 +1,260 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/systm.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/filio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/inttypes.h>
+#include <sys/mutex.h>
+
+#include <sys/lx_types.h>
+#include <sys/lx_fcntl.h>
+#include <sys/lx_misc.h>
+
+extern int fcntl(int, int, intptr_t);
+extern int openat(int, char *, int, int);
+extern int open(char *, int, int);
+extern int close(int);
+extern int cioctl(file_t *, int, intptr_t, int *);
+extern int lookupnameat(char *, enum uio_seg, int, vnode_t **, vnode_t **,
+ vnode_t *);
+
+
+static int
+ltos_open_flags(int input)
+{
+ int flags;
+
+ if (input & LX_O_PATH) {
+ input &= (LX_O_DIRECTORY | LX_O_NOFOLLOW | LX_O_CLOEXEC);
+ }
+
+ /* This depends on the Linux ACCMODE flags being the same as SunOS. */
+ flags = (input & LX_O_ACCMODE);
+
+ if (input & LX_O_CREAT) {
+ flags |= O_CREAT;
+ }
+
+ if (input & LX_O_EXCL)
+ flags |= O_EXCL;
+ if (input & LX_O_NOCTTY)
+ flags |= O_NOCTTY;
+ if (input & LX_O_TRUNC)
+ flags |= O_TRUNC;
+ if (input & LX_O_APPEND)
+ flags |= O_APPEND;
+ if (input & LX_O_NONBLOCK)
+ flags |= O_NONBLOCK;
+ if (input & LX_O_SYNC)
+ flags |= O_SYNC;
+ if (input & LX_O_LARGEFILE)
+ flags |= O_LARGEFILE;
+ if (input & LX_O_NOFOLLOW)
+ flags |= O_NOFOLLOW;
+ if (input & LX_O_CLOEXEC)
+ flags |= O_CLOEXEC;
+
+ /*
+ * Linux uses the LX_O_DIRECT flag to do raw, synchronous I/O to the
+ * device backing the fd in question. Illumos doesn't have similar
+ * functionality, but we can attempt to simulate it using the flags
+ * (O_RSYNC|O_SYNC) and directio(3C).
+ *
+ * The LX_O_DIRECT flag also requires that the transfer size and
+ * alignment of I/O buffers be a multiple of the logical block size for
+ * the underlying file system, but frankly there isn't an easy way to
+ * support that functionality without doing something like adding an
+ * fcntl(2) flag to denote LX_O_DIRECT mode.
+ *
+ * Since LX_O_DIRECT is merely a performance advisory, we'll just
+ * emulate what we can and trust that the only applications expecting
+ * an error when performing I/O from a misaligned buffer or when
+ * passing a transfer size is not a multiple of the underlying file
+ * system block size will be test suites.
+ */
+ if (input & LX_O_DIRECT)
+ flags |= (O_RSYNC|O_SYNC);
+
+ return (flags);
+}
+
+#define LX_POSTPROCESS_OPTS (LX_O_DIRECT | LX_O_ASYNC | LX_O_PATH)
+
+static int
+lx_open_postprocess(int fd, int fmode)
+{
+ file_t *fp;
+ int rv, error = 0;
+
+ if ((fmode & LX_POSTPROCESS_OPTS) == 0) {
+ /* Skip out early, if possible */
+ return (0);
+ }
+
+ if ((fp = getf(fd)) == NULL) {
+ /*
+ * It is possible that this fd was closed by the time we
+ * arrived here if some one is hammering away with close().
+ */
+ return (EIO);
+ }
+
+ if (fmode & LX_O_DIRECT && error == 0) {
+ (void) VOP_IOCTL(fp->f_vnode, _FIODIRECTIO, DIRECTIO_ON,
+ fp->f_flag, fp->f_cred, &rv, NULL);
+ }
+
+ if (fmode & LX_O_ASYNC && error == 0) {
+ if ((error = VOP_SETFL(fp->f_vnode, fp->f_flag, FASYNC,
+ fp->f_cred, NULL)) == 0) {
+ mutex_enter(&fp->f_tlock);
+ fp->f_flag |= FASYNC;
+ mutex_exit(&fp->f_tlock);
+ }
+ }
+
+ if (fmode & LX_O_PATH && error == 0) {
+ /*
+ * While the O_PATH flag has no direct analog in SunOS, it is
+ * emulated by removing both FREAD and FWRITE from f_flag.
+ * This causes read(2) and write(2) result in EBADF and can be
+ * checked for in other syscalls to tigger the correct behavior
+ * there.
+ */
+ mutex_enter(&fp->f_tlock);
+ fp->f_flag &= ~(FREAD|FWRITE);
+ mutex_exit(&fp->f_tlock);
+ }
+
+ releasef(fd);
+ if (error != 0) {
+ (void) closeandsetf(fd, NULL);
+ }
+ return (error);
+}
+
+long
+lx_openat(int atfd, char *path, int fmode, int cmode)
+{
+ int flags, fd, error;
+ mode_t mode = 0;
+
+ if (atfd == LX_AT_FDCWD)
+ atfd = AT_FDCWD;
+
+ flags = ltos_open_flags(fmode);
+
+ /*
+ * We use the FSEARCH flag to make sure this is a directory. We have to
+ * explicitly add 1 to emulate the FREAD/FWRITE mapping of the OPENMODE
+ * macro since it won't get set via OPENMODE when FSEARCH is used.
+ */
+ if (fmode & LX_O_DIRECTORY) {
+ flags |= FSEARCH;
+ flags++;
+ }
+
+ if (flags & O_CREAT)
+ mode = (mode_t)cmode;
+
+ ttolwp(curthread)->lwp_errno = 0;
+ fd = openat(atfd, path, flags, mode);
+ if (ttolwp(curthread)->lwp_errno != 0) {
+ if ((fmode & LX_O_DIRECTORY) &&
+ ttolwp(curthread)->lwp_errno != ENOTDIR) {
+ /*
+ * We got an error trying to open a file as a directory.
+ * We need to determine if we should return the original
+ * error or ENOTDIR.
+ */
+ vnode_t *startvp;
+ vnode_t *vp;
+ int oerror, error = 0;
+
+ oerror = ttolwp(curthread)->lwp_errno;
+
+ if (atfd == AT_FDCWD) {
+ /* regular open */
+ startvp = NULL;
+ } else {
+ char startchar;
+
+ if (copyin(path, &startchar, sizeof (char)))
+ return (set_errno(oerror));
+
+ /* if startchar is / then startfd is ignored */
+ if (startchar == '/') {
+ startvp = NULL;
+ } else {
+ file_t *startfp;
+
+ if ((startfp = getf(atfd)) == NULL)
+ return (set_errno(oerror));
+ startvp = startfp->f_vnode;
+ VN_HOLD(startvp);
+ releasef(atfd);
+ }
+ }
+
+ if (lookupnameat(path, UIO_USERSPACE,
+ (fmode & LX_O_NOFOLLOW) ? NO_FOLLOW : FOLLOW,
+ NULLVPP, &vp, startvp) != 0) {
+ if (startvp != NULL)
+ VN_RELE(startvp);
+ return (set_errno(oerror));
+ }
+
+ if (startvp != NULL)
+ VN_RELE(startvp);
+
+ if (vp->v_type != VDIR)
+ error = ENOTDIR;
+
+ VN_RELE(vp);
+ if (error != 0)
+ return (set_errno(ENOTDIR));
+
+ set_errno(oerror);
+ }
+ return (ttolwp(curthread)->lwp_errno);
+ }
+
+ if ((error = lx_open_postprocess(fd, fmode)) != 0) {
+ return (set_errno(error));
+ }
+ return (fd);
+}
+
+long
+lx_open(char *path, int fmode, int cmode)
+{
+ return (lx_openat(LX_AT_FDCWD, path, fmode, cmode));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_personality.c b/usr/src/uts/common/brand/lx/syscall/lx_personality.c
new file mode 100644
index 0000000000..e7aa945b50
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_personality.c
@@ -0,0 +1,112 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <sys/systm.h>
+#include <sys/mutex.h>
+#include <sys/brand.h>
+
+#include <sys/lx_brand.h>
+#include <sys/lx_impl.h>
+
+
+/*
+ * These flags are for what Linux calls "bug emulation".
+ * (Descriptions from the personality(2) Linux man page.)
+ *
+ * Flags which are currently actionable in LX:
+ * - READ_IMPLIES_EXEC (since Linux 2.6.8)
+ * With this flag set, PROT_READ implies PROT_EXEC for mmap(2).
+ *
+ * Flags which are current accepted but ignored:
+ * - UNAME26 (since Linux 3.1)
+ * Have uname(2) report a 2.6.40+ version number rather than a 3.x version
+ * number. Added as a stopgap measure to support broken applications that
+ * could not handle the kernel version- numbering switch from 2.6.x to 3.x.
+ *
+ * - ADDR_NO_RANDOMIZE (since Linux 2.6.12)
+ * With this flag set, disable address-space-layout randomization.
+ *
+ * - FDPIC_FUNCPTRS (since Linux 2.6.11)
+ * User-space function pointers to signal handlers point (on certain
+ * architectures) to descriptors.
+ *
+ * - MMAP_PAGE_ZERO (since Linux 2.4.0)
+ * Map page 0 as read-only (to support binaries that depend on this SVr4
+ * behavior).
+ *
+ * - ADDR_COMPAT_LAYOUT (since Linux 2.6.9)
+ * With this flag set, provide legacy virtual address space layout.
+ *
+ * - ADDR_LIMIT_32BIT (since Linux 2.2)
+ * Limit the address space to 32 bits.
+ *
+ * - SHORT_INODE (since Linux 2.4.0)
+ * No effects(?).
+ *
+ * - WHOLE_SECONDS (since Linux 1.2.0)
+ * No effects(?).
+ *
+ * - STICKY_TIMEOUTS (since Linux 1.2.0)
+ * With this flag set, select(2), pselect(2), and ppoll(2) do not modify the
+ * returned timeout argument when interrupted by a signal handler.
+ *
+ * - ADDR_LIMIT_3GB (since Linux 2.4.0)
+ * With this flag set, use 0xc0000000 as the offset at which to search a
+ * virtual memory chunk on mmap(2); otherwise use 0xffffe000.
+ */
+
+#define LX_PER_GET 0xffffffff
+
+long
+lx_personality(unsigned int arg)
+{
+ lx_proc_data_t *lxpd = ptolxproc(curproc);
+ unsigned int result = 0;
+
+ mutex_enter(&curproc->p_lock);
+ result = lxpd->l_personality;
+
+ if (arg == LX_PER_GET) {
+ mutex_exit(&curproc->p_lock);
+ return (result);
+ }
+
+ /*
+ * Prevent changes to the personality if the process is undergoing an
+ * exec. This will allow elfexec and friends to manipulate the
+ * personality without hinderance.
+ */
+ if ((curproc->p_flag & P_PR_EXEC) != 0) {
+ mutex_exit(&curproc->p_lock);
+ return (set_errno(EINVAL));
+ }
+
+ /*
+ * Keep tabs when a non-Linux personality is set. This is silently
+ * allowed to succeed, even though the emulation required is almost
+ * certainly missing.
+ */
+ if ((arg & LX_PER_MASK) != LX_PER_LINUX) {
+ char buf[64];
+
+ (void) snprintf(buf, sizeof (buf), "invalid personality: %02X",
+ arg & LX_PER_MASK);
+ lx_unsupported(buf);
+ }
+
+ lxpd->l_personality = arg;
+ mutex_exit(&curproc->p_lock);
+ return (result);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_pipe.c b/usr/src/uts/common/brand/lx/syscall/lx_pipe.c
new file mode 100644
index 0000000000..519c742abc
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_pipe.c
@@ -0,0 +1,200 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T. All Rights Reserved.
+ *
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2013 OmniTI Computer Consulting, Inc. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/zone.h>
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/thread.h>
+#include <sys/cpuvar.h>
+#include <sys/cred.h>
+#include <sys/user.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/stream.h>
+#include <sys/strsubr.h>
+#include <sys/errno.h>
+#include <sys/debug.h>
+#include <sys/fs/fifonode.h>
+#include <sys/fcntl.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_fcntl.h>
+
+/*
+ * Based on native pipe(2) system call, except that the pipe is half-duplex.
+ */
+static int
+lx_hd_pipe(intptr_t arg, int flags)
+{
+ vnode_t *vp1, *vp2;
+ struct file *fp1, *fp2;
+ int error = 0;
+ int flag1, flag2, iflags;
+ int fd1, fd2;
+
+ /*
+ * Validate allowed flags.
+ */
+ if ((flags & ~(FCLOEXEC|FNONBLOCK)) != 0) {
+ return (set_errno(EINVAL));
+ }
+ /*
+ * Allocate and initialize two vnodes.
+ */
+ makepipe(&vp1, &vp2);
+
+ /*
+ * Allocate and initialize two file table entries and two
+ * file pointers. The first file pointer is open for read and the
+ * second is open for write.
+ */
+ if ((error = falloc(vp1, FREAD, &fp1, &fd1)) != 0) {
+ VN_RELE(vp1);
+ VN_RELE(vp2);
+ return (set_errno(error));
+ }
+
+ if ((error = falloc(vp2, FWRITE, &fp2, &fd2)) != 0)
+ goto out2;
+
+ /*
+ * Create two stream heads and attach to each vnode.
+ */
+ if ((error = fifo_stropen(&vp1, FREAD, fp1->f_cred, 0, 0)) != 0)
+ goto out;
+
+ if ((error = fifo_stropen(&vp2, FWRITE, fp2->f_cred, 0, 0)) != 0) {
+ (void) VOP_CLOSE(vp1, FREAD, 1, (offset_t)0,
+ fp1->f_cred, NULL);
+ goto out;
+ }
+
+ strmate(vp1, vp2);
+
+ VTOF(vp1)->fn_ino = VTOF(vp2)->fn_ino = fifogetid();
+
+ /*
+ * Set the O_NONBLOCK flag if requested.
+ */
+ if (flags & FNONBLOCK) {
+ flag1 = fp1->f_flag;
+ flag2 = fp2->f_flag;
+ iflags = flags & FNONBLOCK;
+
+ if ((error = VOP_SETFL(vp1, flag1, iflags, fp1->f_cred,
+ NULL)) != 0) {
+ goto out_vop_close;
+ }
+ fp1->f_flag |= iflags;
+
+ if ((error = VOP_SETFL(vp2, flag2, iflags, fp2->f_cred,
+ NULL)) != 0) {
+ goto out_vop_close;
+ }
+ fp2->f_flag |= iflags;
+ }
+
+ /*
+ * Return the file descriptors to the user. They now
+ * point to two different vnodes which have different
+ * stream heads.
+ */
+ if (copyout(&fd1, &((int *)arg)[0], sizeof (int)) ||
+ copyout(&fd2, &((int *)arg)[1], sizeof (int))) {
+ error = EFAULT;
+ goto out_vop_close;
+ }
+
+ /*
+ * Now fill in the entries that falloc reserved
+ */
+ mutex_exit(&fp1->f_tlock);
+ mutex_exit(&fp2->f_tlock);
+ setf(fd1, fp1);
+ setf(fd2, fp2);
+
+ /*
+ * Optionally set the FCLOEXEC flag
+ */
+ if ((flags & FCLOEXEC) != 0) {
+ f_setfd(fd1, FD_CLOEXEC);
+ f_setfd(fd2, FD_CLOEXEC);
+ }
+
+ return (0);
+out_vop_close:
+ (void) VOP_CLOSE(vp1, FREAD, 1, (offset_t)0, fp1->f_cred, NULL);
+ (void) VOP_CLOSE(vp2, FWRITE, 1, (offset_t)0, fp2->f_cred, NULL);
+out:
+ setf(fd2, NULL);
+ unfalloc(fp2);
+out2:
+ setf(fd1, NULL);
+ unfalloc(fp1);
+ VN_RELE(vp1);
+ VN_RELE(vp2);
+ return (set_errno(error));
+}
+
+/*
+ * pipe(2) system call.
+ */
+long
+lx_pipe(intptr_t arg)
+{
+ return (lx_hd_pipe(arg, 0));
+}
+
+/*
+ * pipe2(2) system call.
+ */
+long
+lx_pipe2(intptr_t arg, int lxflags)
+{
+ int flags = 0;
+
+ /*
+ * Validate allowed flags.
+ */
+ if ((lxflags & ~(LX_O_NONBLOCK | LX_O_CLOEXEC)) != 0) {
+ return (set_errno(EINVAL));
+ }
+
+ /*
+ * Convert from Linux flags to illumos flags.
+ */
+ if (lxflags & LX_O_NONBLOCK) {
+ flags |= FNONBLOCK;
+ }
+ if (lxflags & LX_O_CLOEXEC) {
+ flags |= FCLOEXEC;
+ }
+
+ return (lx_hd_pipe(arg, flags));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_poll.c b/usr/src/uts/common/brand/lx/syscall/lx_poll.c
new file mode 100644
index 0000000000..1d92a55ddf
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_poll.c
@@ -0,0 +1,762 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/thread.h>
+#include <sys/proc.h>
+#include <sys/zone.h>
+#include <sys/brand.h>
+#include <sys/sunddi.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_types.h>
+#include <sys/poll_impl.h>
+#include <sys/schedctl.h>
+#include <sys/lx_signal.h>
+
+
+/* From uts/common/syscall/poll.c */
+extern int poll_copyin(pollstate_t *, pollfd_t *, nfds_t);
+extern int poll_common(pollstate_t *, pollfd_t *, nfds_t, timespec_t *, int *);
+
+/*
+ * These events are identical between Linux and SunOS
+ */
+#define LX_POLLIN 0x001
+#define LX_POLLPRI 0x002
+#define LX_POLLOUT 0x004
+#define LX_POLLERR 0x008
+#define LX_POLLHUP 0x010
+#define LX_POLLNVAL 0x020
+#define LX_POLLRDNORM 0x040
+#define LX_POLLRDBAND 0x080
+
+#define LX_POLL_COMMON_EVENTS (LX_POLLIN | LX_POLLPRI | LX_POLLOUT | \
+ LX_POLLERR | LX_POLLHUP | LX_POLLNVAL | LX_POLLRDNORM | LX_POLLRDBAND)
+
+/*
+ * These events differ between Linux and SunOS
+ */
+#define LX_POLLWRNORM 0x0100
+#define LX_POLLWRBAND 0x0200
+#define LX_POLLRDHUP 0x2000
+
+
+#define LX_POLL_SUPPORTED_EVENTS \
+ (LX_POLL_COMMON_EVENTS | LX_POLLWRNORM | LX_POLLWRBAND | LX_POLLRDHUP)
+
+
+static int
+lx_poll_copyin(pollstate_t *ps, pollfd_t *fds, nfds_t nfds, short *oldevt)
+{
+ int i, error = 0;
+ pollfd_t *pollfdp;
+
+ if ((error = poll_copyin(ps, fds, nfds)) != 0) {
+ return (error);
+ }
+ pollfdp = ps->ps_pollfd;
+
+ /* Convert the Linux events bitmask into SunOS equivalent. */
+ for (i = 0; i < nfds; i++) {
+ short lx_events = pollfdp[i].events;
+ short events;
+
+ /*
+ * If the caller is polling for an unsupported event, we
+ * have to bail out.
+ */
+ if (lx_events & ~LX_POLL_SUPPORTED_EVENTS) {
+ return (ENOTSUP);
+ }
+
+ events = lx_events & LX_POLL_COMMON_EVENTS;
+ if (lx_events & LX_POLLWRNORM)
+ events |= POLLWRNORM;
+ if (lx_events & LX_POLLWRBAND)
+ events |= POLLWRBAND;
+ if (lx_events & LX_POLLRDHUP)
+ events |= POLLRDHUP;
+ pollfdp[i].events = events;
+ oldevt[i] = lx_events;
+ }
+ return (0);
+}
+
+static int
+lx_poll_copyout(pollfd_t *pollfdp, pollfd_t *fds, nfds_t nfds, short *oldevt)
+{
+ int i;
+
+ /*
+ * Convert SunOS revents bitmask into Linux equivalent and restore
+ * cached events field which was swizzled by lx_poll_copyin.
+ */
+ for (i = 0; i < nfds; i++) {
+ short revents = pollfdp[i].revents;
+ short lx_revents = revents & LX_POLL_COMMON_EVENTS;
+ short orig_events = oldevt[i];
+
+ if (revents & POLLWRBAND)
+ lx_revents |= LX_POLLWRBAND;
+ if (revents & POLLRDHUP)
+ lx_revents |= LX_POLLRDHUP;
+ /*
+ * Because POLLOUT and POLLWRNORM are native defined as the
+ * same value, care must be taken when translating them to
+ * Linux where they differ.
+ */
+ if (revents & POLLOUT) {
+ if ((orig_events & LX_POLLOUT) == 0)
+ lx_revents &= ~LX_POLLOUT;
+ if (orig_events & LX_POLLWRNORM)
+ lx_revents |= LX_POLLWRNORM;
+ }
+
+ pollfdp[i].revents = lx_revents;
+ pollfdp[i].events = orig_events;
+ }
+
+ if (copyout(pollfdp, fds, sizeof (pollfd_t) * nfds) != 0)
+ return (EFAULT);
+
+ return (0);
+}
+
+static long
+lx_poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp)
+{
+ kthread_t *t = curthread;
+ klwp_t *lwp = ttolwp(t);
+ proc_t *p = ttoproc(t);
+ pollstate_t *ps = NULL;
+ pollfd_t *pollfdp = NULL;
+ short *oldevt = NULL;
+ int error = 0, fdcnt = 0;
+
+ /*
+ * Reset our signal mask, if requested.
+ */
+ if (ksetp != NULL) {
+ mutex_enter(&p->p_lock);
+ schedctl_finish_sigblock(t);
+ lwp->lwp_sigoldmask = t->t_hold;
+ t->t_hold = *ksetp;
+ t->t_flag |= T_TOMASK;
+ /*
+ * Call cv_reltimedwait_sig() just to check for signals.
+ * We will return immediately with either 0 or -1.
+ */
+ if (!cv_reltimedwait_sig(&t->t_delay_cv, &p->p_lock, 0,
+ TR_CLOCK_TICK)) {
+ mutex_exit(&p->p_lock);
+ error = EINTR;
+ goto pollout;
+ }
+ mutex_exit(&p->p_lock);
+ }
+
+ /*
+ * Initialize pollstate and copy in pollfd data if present.
+ */
+ if (nfds != 0) {
+ if (nfds > p->p_fno_ctl) {
+ mutex_enter(&p->p_lock);
+ (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE],
+ p->p_rctls, p, RCA_SAFE);
+ mutex_exit(&p->p_lock);
+ error = EINVAL;
+ goto pollout;
+ }
+
+ /*
+ * Need to allocate memory for pollstate before anything
+ * because the mutex and cv are created in this space
+ */
+ ps = pollstate_create();
+ if (ps->ps_pcache == NULL)
+ ps->ps_pcache = pcache_alloc();
+
+ /*
+ * Certain event types which are distinct on Linux are aliased
+ * against each other on illumos. In order properly translate
+ * back into the Linux format, the original events of interest
+ * are stored in 'oldevt' for use during lx_poll_copyout.
+ */
+ oldevt = kmem_alloc(nfds * sizeof (short), KM_SLEEP);
+ if ((error = lx_poll_copyin(ps, fds, nfds, oldevt)) != 0)
+ goto pollout;
+ pollfdp = ps->ps_pollfd;
+ }
+
+ /*
+ * Perform the actual poll.
+ */
+ error = poll_common(ps, fds, nfds, tsp, &fdcnt);
+
+pollout:
+ /*
+ * If we changed the signal mask but we received no signal then restore
+ * the signal mask. Otherwise psig() will deal with the signal mask.
+ */
+ if (ksetp != NULL) {
+ mutex_enter(&p->p_lock);
+ if (lwp->lwp_cursig == 0) {
+ t->t_hold = lwp->lwp_sigoldmask;
+ t->t_flag &= ~T_TOMASK;
+ }
+ mutex_exit(&p->p_lock);
+ }
+
+ /*
+ * Copy out the events and return the fdcnt to the user.
+ */
+ if (nfds != 0 && error == 0) {
+ error = lx_poll_copyout(pollfdp, fds, nfds, oldevt);
+ }
+ if (oldevt != NULL) {
+ kmem_free(oldevt, nfds * sizeof (short));
+ }
+ if (error) {
+ return (set_errno(error));
+ }
+ return (fdcnt);
+}
+
+long
+lx_poll(pollfd_t *fds, nfds_t nfds, int timeout)
+{
+ timespec_t ts, *tsp = NULL;
+
+ if (timeout >= 0) {
+ ts.tv_sec = timeout / MILLISEC;
+ ts.tv_nsec = (timeout % MILLISEC) * MICROSEC;
+ tsp = &ts;
+ }
+
+ return (lx_poll_common(fds, nfds, tsp, NULL));
+}
+
+long
+lx_ppoll(pollfd_t *fds, nfds_t nfds, timespec_t *timeoutp, lx_sigset_t *setp)
+{
+ timespec_t ts, *tsp = NULL;
+ k_sigset_t kset, *ksetp = NULL;
+
+ /*
+ * Copy in timeout and sigmask.
+ */
+ if (timeoutp != NULL) {
+ if (get_udatamodel() == DATAMODEL_NATIVE) {
+ if (copyin(timeoutp, &ts, sizeof (ts)))
+ return (set_errno(EFAULT));
+ } else {
+ timespec32_t ts32;
+
+ if (copyin(timeoutp, &ts32, sizeof (ts32)))
+ return (set_errno(EFAULT));
+ TIMESPEC32_TO_TIMESPEC(&ts, &ts32)
+ }
+
+ if (itimerspecfix(&ts))
+ return (set_errno(EINVAL));
+ tsp = &ts;
+ }
+ if (setp != NULL) {
+ lx_sigset_t lset;
+
+ if (copyin(setp, &lset, sizeof (lset)))
+ return (set_errno(EFAULT));
+ lx_ltos_sigset(&lset, &kset);
+ ksetp = &kset;
+ }
+
+ return (lx_poll_common(fds, nfds, tsp, ksetp));
+}
+
+typedef struct lx_select_buf_s {
+ long *lsb_rfds;
+ long *lsb_wfds;
+ long *lsb_efds;
+ unsigned int lsb_size;
+} lx_select_buf_t;
+
+/*
+ * Size (in bytes) of buffer appropriate for fd_set copyin/copyout.
+ * Linux uses buffers of 'long' to accomplish this.
+ */
+#define LX_FD_SET_BYTES (sizeof (long))
+#define LX_FD_SET_BITS (8 * LX_FD_SET_BYTES)
+#define LX_FD_SET_SIZE(nfds) \
+ ((((nfds) + (LX_FD_SET_BITS - 1)) / LX_FD_SET_BITS) * LX_FD_SET_BYTES)
+
+static int
+lx_select_copyin(pollstate_t *ps, lx_select_buf_t *sbuf, int nfds,
+ long *rfds, long *wfds, long *efds)
+{
+ int n;
+ long *in, *out, *ex;
+ long absent = 0;
+ pollfd_t *pfd;
+ nfds_t old_nfds;
+
+ /*
+ * Just like pollsys and lx_poll, attempt to reuse ps_pollfd if it is
+ * appropriately sized. See poll_copyin for more detail.
+ */
+ old_nfds = ps->ps_nfds;
+ if (nfds != old_nfds) {
+ kmem_free(ps->ps_pollfd, old_nfds * sizeof (pollfd_t));
+ pfd = kmem_alloc(nfds * sizeof (pollfd_t), KM_SLEEP);
+ ps->ps_pollfd = pfd;
+ ps->ps_nfds = nfds;
+ } else {
+ pfd = ps->ps_pollfd;
+ }
+
+ if (rfds != NULL) {
+ if (copyin(rfds, sbuf->lsb_rfds, sbuf->lsb_size) != 0) {
+ return (EFAULT);
+ }
+ }
+ if (wfds != NULL) {
+ if (copyin(wfds, sbuf->lsb_wfds, sbuf->lsb_size) != 0) {
+ return (EFAULT);
+ }
+ }
+ if (efds != NULL) {
+ if (copyin(efds, sbuf->lsb_efds, sbuf->lsb_size) != 0) {
+ return (EFAULT);
+ }
+ }
+
+ /*
+ * For each fd, if any bits are set convert them into the appropriate
+ * pollfd struct. (Derived from libc's select logic)
+ */
+ in = (rfds != NULL) ? sbuf->lsb_rfds : &absent;
+ out = (wfds != NULL) ? sbuf->lsb_wfds : &absent;
+ ex = (efds != NULL) ? sbuf->lsb_efds : &absent;
+ for (n = 0; n < nfds; n += LX_FD_SET_BITS) {
+ unsigned long b, m, j;
+
+ b = (unsigned long)(*in | *out | *ex);
+ m = 1;
+ for (j = 0; j < LX_FD_SET_BITS; j++) {
+ int fd = n + j;
+
+ if (fd >= nfds)
+ return (0);
+ pfd->events = 0;
+ if (b & 1) {
+ pfd->fd = fd;
+ if (*in & m)
+ pfd->events |= POLLRDNORM;
+ if (*out & m)
+ pfd->events |= POLLWRNORM;
+ if (*ex & m)
+ pfd->events |= POLLRDBAND;
+ } else {
+ pfd->fd = -1;
+ }
+ pfd++;
+ b >>= 1;
+ m <<= 1;
+ }
+
+ if (rfds != NULL)
+ in++;
+ if (wfds != NULL)
+ out++;
+ if (efds != NULL)
+ ex++;
+ }
+ return (0);
+}
+
+static int
+lx_select_copyout(pollfd_t *pollfdp, lx_select_buf_t *sbuf, int nfds,
+ long *rfds, long *wfds, long *efds, int *fdcnt)
+{
+ int n;
+ pollfd_t *pfd;
+ long rv = 0;
+
+ /*
+ * If poll did not find any fds of interest, we can just zero out the
+ * fd_set fields for copyout.
+ */
+ if (*fdcnt == 0) {
+ if (rfds != NULL) {
+ bzero(sbuf->lsb_rfds, sbuf->lsb_size);
+ }
+ if (wfds != NULL) {
+ bzero(sbuf->lsb_wfds, sbuf->lsb_size);
+ }
+ if (efds != NULL) {
+ bzero(sbuf->lsb_efds, sbuf->lsb_size);
+ }
+ goto copyout;
+ }
+
+ /*
+ * For each fd, if any bits are set convert them into the appropriate
+ * pollfd struct. (Derived from libc's select logic)
+ */
+ pfd = pollfdp;
+ for (n = 0; n < nfds; n += LX_FD_SET_BITS) {
+ unsigned long m, j;
+ long in = 0, out = 0, ex = 0;
+
+ m = 1;
+ for (j = 0; j < LX_FD_SET_BITS; j++) {
+ if ((n + j) >= nfds)
+ break;
+ if (pfd->revents != 0) {
+ if (pfd->revents & POLLNVAL) {
+ return (EBADF);
+ }
+ if (pfd->revents & POLLRDNORM) {
+ in |= m;
+ rv++;
+ }
+ if (pfd->revents & POLLWRNORM) {
+ out |= m;
+ rv++;
+ }
+ if (pfd->revents & POLLRDBAND) {
+ ex |= m;
+ rv++;
+ }
+ /*
+ * Only set this bit on return if we asked
+ * about input conditions.
+ */
+ if ((pfd->revents & (POLLHUP|POLLERR)) &&
+ (pfd->events & POLLRDNORM)) {
+ if ((in & m) == 0) {
+ /* wasn't already set */
+ rv++;
+ }
+ in |= m;
+ }
+ /*
+ * Only set this bit on return if we asked
+ * about output conditions.
+ */
+ if ((pfd->revents & (POLLHUP|POLLERR)) &&
+ (pfd->events & POLLWRNORM)) {
+ if ((out & m) == 0) {
+ /* wasn't already set */
+ rv++;
+ }
+ out |= m;
+ }
+ /*
+ * Only set this bit on return if we asked
+ * about output conditions.
+ */
+ if ((pfd->revents & (POLLHUP|POLLERR)) &&
+ (pfd->events & POLLRDBAND)) {
+ if ((ex & m) == 0) {
+ /* wasn't already set */
+ rv++;
+ }
+ ex |= m;
+ }
+ }
+ m <<= 1;
+ pfd++;
+ }
+ if (rfds != NULL)
+ sbuf->lsb_rfds[n / LX_FD_SET_BITS] = in;
+ if (wfds != NULL)
+ sbuf->lsb_wfds[n / LX_FD_SET_BITS] = out;
+ if (efds != NULL)
+ sbuf->lsb_efds[n / LX_FD_SET_BITS] = ex;
+ }
+
+copyout:
+ if (rfds != NULL) {
+ if (copyout(sbuf->lsb_rfds, rfds, sbuf->lsb_size) != 0) {
+ return (EFAULT);
+ }
+ }
+ if (wfds != NULL) {
+ if (copyout(sbuf->lsb_wfds, wfds, sbuf->lsb_size) != 0) {
+ return (EFAULT);
+ }
+ }
+ if (efds != NULL) {
+ if (copyout(sbuf->lsb_efds, efds, sbuf->lsb_size) != 0) {
+ return (EFAULT);
+ }
+ }
+ *fdcnt = rv;
+ return (0);
+}
+
+
+static long
+lx_select_common(int nfds, long *rfds, long *wfds, long *efds,
+ timespec_t *tsp, k_sigset_t *ksetp)
+{
+ kthread_t *t = curthread;
+ klwp_t *lwp = ttolwp(t);
+ proc_t *p = ttoproc(t);
+ pollstate_t *ps = NULL;
+ pollfd_t *pollfdp = NULL, *fake_fds = NULL;
+ lx_select_buf_t sbuf = {0};
+ int error = 0, fdcnt = 0;
+
+ if (nfds < 0) {
+ return (set_errno(EINVAL));
+ }
+
+ /*
+ * Reset our signal mask, if requested.
+ */
+ if (ksetp != NULL) {
+ mutex_enter(&p->p_lock);
+ schedctl_finish_sigblock(t);
+ lwp->lwp_sigoldmask = t->t_hold;
+ t->t_hold = *ksetp;
+ t->t_flag |= T_TOMASK;
+ /*
+ * Call cv_reltimedwait_sig() just to check for signals.
+ * We will return immediately with either 0 or -1.
+ */
+ if (!cv_reltimedwait_sig(&t->t_delay_cv, &p->p_lock, 0,
+ TR_CLOCK_TICK)) {
+ mutex_exit(&p->p_lock);
+ error = EINTR;
+ goto out;
+ }
+ mutex_exit(&p->p_lock);
+ }
+
+ /*
+ * Because poll caching uses the userspace pollfd_t pointer to verify
+ * cache reuse validity, a simulated value must be supplied when
+ * emulating Linux select(2). The first non-NULL pointer from
+ * rfds/wfds/efds is used for this purpose.
+ */
+ if (rfds != NULL) {
+ fake_fds = (pollfd_t *)rfds;
+ } else if (wfds != NULL) {
+ fake_fds = (pollfd_t *)wfds;
+ } else if (efds != NULL) {
+ fake_fds = (pollfd_t *)efds;
+ } else {
+ /*
+ * A non-zero nfds was supplied but all three fd_set pointers
+ * were null. Fall back to doing a simple timeout.
+ */
+ nfds = 0;
+ }
+
+ /*
+ * Initialize pollstate and copy in pollfd data if present.
+ */
+ if (nfds != 0) {
+ if (nfds > p->p_fno_ctl) {
+ mutex_enter(&p->p_lock);
+ (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE],
+ p->p_rctls, p, RCA_SAFE);
+ mutex_exit(&p->p_lock);
+ error = EINVAL;
+ goto out;
+ }
+
+ /*
+ * Need to allocate memory for pollstate before anything
+ * because the mutex and cv are created in this space
+ */
+ ps = pollstate_create();
+ if (ps->ps_pcache == NULL)
+ ps->ps_pcache = pcache_alloc();
+
+ sbuf.lsb_size = LX_FD_SET_SIZE(nfds);
+ if (rfds != NULL)
+ sbuf.lsb_rfds = kmem_alloc(sbuf.lsb_size, KM_SLEEP);
+ if (wfds != NULL)
+ sbuf.lsb_wfds = kmem_alloc(sbuf.lsb_size, KM_SLEEP);
+ if (efds != NULL)
+ sbuf.lsb_efds = kmem_alloc(sbuf.lsb_size, KM_SLEEP);
+
+ error = lx_select_copyin(ps, &sbuf, nfds, rfds, wfds, efds);
+ if (error != 0) {
+ goto out;
+ }
+
+ pollfdp = ps->ps_pollfd;
+ }
+
+ /*
+ * Perform the actual poll.
+ */
+ error = poll_common(ps, fake_fds, (nfds_t)nfds, tsp, &fdcnt);
+
+out:
+ /*
+ * If we changed the signal mask but we received no signal then restore
+ * the signal mask. Otherwise psig() will deal with the signal mask.
+ */
+ if (ksetp != NULL) {
+ mutex_enter(&p->p_lock);
+ if (lwp->lwp_cursig == 0) {
+ t->t_hold = lwp->lwp_sigoldmask;
+ t->t_flag &= ~T_TOMASK;
+ }
+ mutex_exit(&p->p_lock);
+ }
+
+ /*
+ * Copy out the events and return the fdcnt to the user.
+ */
+ if (error == 0 && nfds != 0) {
+ error = lx_select_copyout(pollfdp, &sbuf, nfds, rfds, wfds,
+ efds, &fdcnt);
+ }
+ if (sbuf.lsb_size != 0) {
+ if (sbuf.lsb_rfds != NULL)
+ kmem_free(sbuf.lsb_rfds, sbuf.lsb_size);
+ if (sbuf.lsb_wfds != NULL)
+ kmem_free(sbuf.lsb_wfds, sbuf.lsb_size);
+ if (sbuf.lsb_efds != NULL)
+ kmem_free(sbuf.lsb_efds, sbuf.lsb_size);
+ }
+ if (error) {
+ return (set_errno(error));
+ }
+ return (fdcnt);
+}
+
+long
+lx_select(int nfds, long *rfds, long *wfds, long *efds,
+ struct timeval *timeoutp)
+{
+ timespec_t ts, *tsp = NULL;
+
+ if (timeoutp != NULL) {
+ if (get_udatamodel() == DATAMODEL_NATIVE) {
+ struct timeval tv;
+
+ if (copyin(timeoutp, &tv, sizeof (tv)))
+ return (set_errno(EFAULT));
+ ts.tv_sec = tv.tv_sec;
+ ts.tv_nsec = tv.tv_usec * (NANOSEC / MICROSEC);
+ } else {
+ struct timeval32 tv32;
+
+ if (copyin(timeoutp, &tv32, sizeof (tv32)))
+ return (set_errno(EFAULT));
+ ts.tv_sec = tv32.tv_sec;
+ ts.tv_nsec = tv32.tv_usec * (NANOSEC / MICROSEC);
+ }
+
+ if (itimerspecfix(&ts))
+ return (set_errno(EINVAL));
+ tsp = &ts;
+ }
+
+ return (lx_select_common(nfds, rfds, wfds, efds, tsp, NULL));
+}
+
+
+typedef struct {
+ uintptr_t lpsa_addr;
+ unsigned long lpsa_len;
+} lx_pselect_sig_arg_t;
+
+#if defined(_LP64)
+typedef struct {
+ caddr32_t lpsa_addr;
+ uint32_t lpsa_len;
+} lx_pselect_sig_arg32_t;
+#endif /* defined(_LP64) */
+
+long
+lx_pselect(int nfds, long *rfds, long *wfds, long *efds,
+ timespec_t *timeoutp, void *setp)
+{
+ timespec_t ts, *tsp = NULL;
+ k_sigset_t kset, *ksetp = NULL;
+
+ /*
+ * Copy in timeout and sigmask.
+ */
+ if (timeoutp != NULL) {
+ if (get_udatamodel() == DATAMODEL_NATIVE) {
+ if (copyin(timeoutp, &ts, sizeof (ts)))
+ return (set_errno(EFAULT));
+ } else {
+ timespec32_t ts32;
+
+ if (copyin(timeoutp, &ts32, sizeof (ts32)))
+ return (set_errno(EFAULT));
+ TIMESPEC32_TO_TIMESPEC(&ts, &ts32)
+ }
+
+ if (itimerspecfix(&ts))
+ return (set_errno(EINVAL));
+ tsp = &ts;
+ }
+ if (setp != NULL) {
+ lx_sigset_t lset, *sigaddr = NULL;
+
+ if (get_udatamodel() == DATAMODEL_NATIVE) {
+ lx_pselect_sig_arg_t lpsa;
+
+ if (copyin(setp, &lpsa, sizeof (lpsa)) != 0)
+ return (set_errno(EFAULT));
+ /*
+ * Linux forces a size to be passed only so it can
+ * check that it's the size of a sigset_t.
+ */
+ if (lpsa.lpsa_len != sizeof (lx_sigset_t))
+ return (set_errno(EINVAL));
+
+ sigaddr = (lx_sigset_t *)lpsa.lpsa_addr;
+ }
+#if defined(_LP64)
+ else {
+ lx_pselect_sig_arg32_t lpsa32;
+
+ if (copyin(setp, &lpsa32, sizeof (lpsa32)) != 0)
+ return (set_errno(EFAULT));
+ /*
+ * Linux forces a size to be passed only so it can
+ * check that it's the size of a sigset_t.
+ */
+ if (lpsa32.lpsa_len != sizeof (lx_sigset_t))
+ return (set_errno(EINVAL));
+
+ sigaddr = (lx_sigset_t *)(uint64_t)lpsa32.lpsa_addr;
+ }
+#endif /* defined(_LP64) */
+
+ /* This is where we check if the sigset is *really* NULL. */
+ if (sigaddr != NULL) {
+ if (copyin(sigaddr, &lset, sizeof (lset)) != 0)
+ return (set_errno(EFAULT));
+
+ lx_ltos_sigset(&lset, &kset);
+ ksetp = &kset;
+ }
+ }
+
+ return (lx_select_common(nfds, rfds, wfds, efds, tsp, ksetp));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_prctl.c b/usr/src/uts/common/brand/lx/syscall/lx_prctl.c
new file mode 100644
index 0000000000..091a6f547b
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_prctl.c
@@ -0,0 +1,210 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/user.h>
+#include <sys/priv.h>
+#include <sys/brand.h>
+#include <sys/cmn_err.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_impl.h>
+#include <lx_signum.h>
+
+#define LX_PR_SET_PDEATHSIG 1
+#define LX_PR_GET_PDEATHSIG 2
+#define LX_PR_GET_DUMPABLE 3
+#define LX_PR_SET_DUMPABLE 4
+#define LX_PR_GET_UNALIGN 5
+#define LX_PR_SET_UNALIGN 6
+#define LX_PR_GET_KEEPCAPS 7
+#define LX_PR_SET_KEEPCAPS 8
+#define LX_PR_GET_FPEMU 9
+#define LX_PR_SET_FPEMU 10
+#define LX_PR_GET_FPEXC 11
+#define LX_PR_SET_FPEXC 12
+#define LX_PR_GET_TIMING 13
+#define LX_PR_SET_TIMING 14
+#define LX_PR_SET_NAME 15
+#define LX_PR_GET_NAME 16
+#define LX_PR_GET_ENDIAN 19
+#define LX_PR_SET_ENDIAN 20
+#define LX_PR_GET_SECCOMP 21
+#define LX_PR_SET_SECCOMP 22
+#define LX_PR_CAPBSET_READ 23
+#define LX_PR_CAPBSET_DROP 24
+#define LX_PR_GET_TSC 25
+#define LX_PR_SET_TSC 26
+#define LX_PR_GET_SECUREBITS 27
+#define LX_PR_SET_SECUREBITS 28
+#define LX_PR_SET_TIMERSLACK 29
+#define LX_PR_GET_TIMERSLACK 30
+#define LX_PR_TASK_PERF_EVENTS_DISABLE 31
+#define LX_PR_TASK_PERF_EVENTS_ENABLE 32
+#define LX_PR_MCE_KILL 33
+#define LX_PR_MCE_KILL_GET 34
+#define LX_PR_SET_MM 35
+#define LX_PR_SET_CHILD_SUBREAPER 36
+#define LX_PR_GET_CHILD_SUBREAPER 37
+#define LX_PR_SET_NO_NEW_PRIVS 38
+#define LX_PR_GET_NO_NEW_PRIVS 39
+#define LX_PR_GET_TID_ADDRESS 40
+#define LX_PR_SET_THP_DISABLE 41
+#define LX_PR_GET_THP_DISABLE 42
+
+#define LX_PR_SET_NAME_NAMELEN 16
+
+long
+lx_prctl(int opt, uintptr_t data)
+{
+ long err;
+ char ebuf[64];
+
+ switch (opt) {
+ case LX_PR_GET_DUMPABLE: {
+ /* Indicate that process is always dumpable */
+ return (1);
+ }
+
+ case LX_PR_SET_DUMPABLE: {
+ if (data != 0 && data != 1) {
+ return (set_errno(EINVAL));
+ }
+ /* Lie about altering process dumpability */
+ return (0);
+ }
+
+ case LX_PR_GET_SECUREBITS: {
+ /* Our bits are always 0 */
+ return (0);
+ }
+
+ case LX_PR_SET_SECUREBITS: {
+ /* Ignore setting any bits from arg2 */
+ return (0);
+ }
+
+ case LX_PR_SET_KEEPCAPS: {
+ /*
+ * The closest illumos analog to SET_KEEPCAPS is the PRIV_AWARE
+ * flag. There are probably some cases where it's not exactly
+ * the same, but this will do for a first try.
+ */
+ if (data == 0) {
+ err = setpflags(PRIV_AWARE_RESET, 1, NULL);
+ } else {
+ err = setpflags(PRIV_AWARE, 1, NULL);
+ }
+
+ if (err != 0) {
+ return (set_errno(err));
+ }
+ return (0);
+ }
+
+ case LX_PR_SET_NAME: {
+ char name[LX_PR_SET_NAME_NAMELEN + 1];
+ proc_t *p = curproc;
+ /*
+ * In Linux, PR_SET_NAME sets the name of the thread, not the
+ * process. Due to the historical quirks of Linux's asinine
+ * thread model, this name is effectively the name of the
+ * process (as visible via ps(1)) if the thread is the first of
+ * its task group. The first thread is therefore special, and
+ * to best mimic Linux semantics (and absent a notion of
+ * per-LWP names), we do nothing (but return success) on LWPs
+ * other than LWP 1.
+ */
+ if (curthread->t_tid != 1) {
+ return (0);
+ }
+ if (copyin((void *)data, name, LX_PR_SET_NAME_NAMELEN) != 0) {
+ return (set_errno(EFAULT));
+ }
+ name[LX_PR_SET_NAME_NAMELEN] = '\0';
+ mutex_enter(&p->p_lock);
+ (void) strncpy(p->p_user.u_comm, name, MAXCOMLEN + 1);
+ (void) strncpy(p->p_user.u_psargs, name, PSARGSZ);
+ mutex_exit(&p->p_lock);
+ return (0);
+ }
+
+ case LX_PR_GET_PDEATHSIG: {
+ int sig;
+ lx_proc_data_t *lxpd;
+
+ mutex_enter(&curproc->p_lock);
+ VERIFY(lxpd = ptolxproc(curproc));
+ sig = lxpd->l_parent_deathsig;
+ mutex_exit(&curproc->p_lock);
+
+ return (sig);
+ }
+
+ case LX_PR_SET_PDEATHSIG: {
+ int sig = lx_ltos_signo((int)data, 0);
+ proc_t *pp = NULL;
+ lx_proc_data_t *lxpd;
+
+ if (sig == 0 && data != 0) {
+ return (set_errno(EINVAL));
+ }
+
+ mutex_enter(&pidlock);
+ /* Set signal on our self */
+ mutex_enter(&curproc->p_lock);
+ VERIFY(lxpd = ptolxproc(curproc));
+ lxpd->l_parent_deathsig = sig;
+ pp = curproc->p_parent;
+ mutex_exit(&curproc->p_lock);
+
+ /* Configure parent to potentially signal children on death */
+ mutex_enter(&pp->p_lock);
+ if (PROC_IS_BRANDED(pp)) {
+ VERIFY(lxpd = ptolxproc(pp));
+ /*
+ * Mark the parent as having children which wish to be
+ * signaled on death of parent.
+ */
+ lxpd->l_flags |= LX_PROC_CHILD_DEATHSIG;
+ } else {
+ /*
+ * If the parent is not a branded process, the needed
+ * hooks to facilitate this mechanism will not fire
+ * when it dies. We lie about success in this case.
+ */
+ }
+ mutex_exit(&pp->p_lock);
+ mutex_exit(&pidlock);
+ return (0);
+ }
+
+ case LX_PR_CAPBSET_DROP: {
+ /*
+ * On recent versions of Linux the login svc drops capabilities
+ * and if that fails the svc dies and is restarted by systemd.
+ * For now we pretend dropping capabilities succeeded.
+ */
+ return (0);
+ }
+
+ default:
+ break;
+ }
+
+ snprintf(ebuf, 64, "prctl option %d", opt);
+ lx_unsupported(ebuf);
+ return (set_errno(EINVAL));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_rlimit.c b/usr/src/uts/common/brand/lx/syscall/lx_rlimit.c
new file mode 100644
index 0000000000..6581ead25b
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_rlimit.c
@@ -0,0 +1,575 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/systm.h>
+#include <sys/thread.h>
+#include <sys/zone.h>
+#include <sys/cpuvar.h>
+#include <sys/cmn_err.h>
+#include <sys/lx_impl.h>
+#include <sys/lx_brand.h>
+
+#define LX_RLIMIT_CPU 0
+#define LX_RLIMIT_FSIZE 1
+#define LX_RLIMIT_DATA 2
+#define LX_RLIMIT_STACK 3
+#define LX_RLIMIT_CORE 4
+#define LX_RLIMIT_RSS 5
+#define LX_RLIMIT_NPROC 6
+#define LX_RLIMIT_NOFILE 7
+#define LX_RLIMIT_MEMLOCK 8
+#define LX_RLIMIT_AS 9
+#define LX_RLIMIT_LOCKS 10 /* NA limit on locks, early 2.4 only */
+#define LX_RLIMIT_SIGPENDING 11
+#define LX_RLIMIT_MSGQUEUE 12
+#define LX_RLIMIT_NICE 13 /* NA ceiling for nice */
+#define LX_RLIMIT_RTPRIO 14 /* NA ceiling on the RT priority */
+#define LX_RLIMIT_RTTIME 15 /* NA cpu limit for RT proc. */
+
+#define LX_RLIMIT_NLIMITS 16
+
+#define RCTL_INFINITE(x) \
+ ((x->rcv_flagaction & RCTL_LOCAL_MAXIMAL) && \
+ (x->rcv_flagaction & RCTL_GLOBAL_INFINITE))
+
+typedef struct {
+ ulong_t rlim_cur;
+ ulong_t rlim_max;
+} lx_rlimit_t;
+
+typedef struct {
+ uint32_t rlim_cur;
+ uint32_t rlim_max;
+} lx_rlimit32_t;
+
+/*
+ * Linux supports many of the same resources that we do, but on Illumos these
+ * are rctls. Instead of using rlimit, we use rctls for all of the limits.
+ * This table is used to translate Linux rlimit keys into the Illumos legacy
+ * rlimit. We then primarily use the rctl/rlimit compatability code to
+ * manage these.
+ */
+static int l_to_r[LX_RLIMIT_NLIMITS] = {
+ RLIMIT_CPU, /* 0 CPU */
+ RLIMIT_FSIZE, /* 1 FSIZE */
+ RLIMIT_DATA, /* 2 DATA */
+ RLIMIT_STACK, /* 3 STACK */
+ RLIMIT_CORE, /* 4 CORE */
+ -1, /* 5 RSS */
+ -1, /* 6 NPROC */
+ RLIMIT_NOFILE, /* 7 NOFILE */
+ -1, /* 8 MEMLOCK */
+ RLIMIT_AS, /* 9 AS */
+ -1, /* 10 LOCKS */
+ -1, /* 11 SIGPENDING */
+ -1, /* 12 MSGQUEUE */
+ -1, /* 13 NICE */
+ -1, /* 14 RTPRIO */
+ -1 /* 15 RTTIME */
+};
+
+/*
+ * Magic value Linux uses to indicate infinity
+ */
+#define LX_RLIM_INFINITY_N ULONG_MAX
+
+static void
+lx_get_rctl(char *nm, struct rlimit64 *rlp64)
+{
+ rctl_hndl_t hndl;
+ rctl_val_t *oval, *nval;
+
+ rlp64->rlim_cur = RLIM_INFINITY;
+ rlp64->rlim_max = RLIM_INFINITY;
+
+ nval = kmem_alloc(sizeof (rctl_val_t), KM_SLEEP);
+ mutex_enter(&curproc->p_lock);
+
+ hndl = rctl_hndl_lookup(nm);
+ oval = NULL;
+ while ((hndl != -1) && rctl_local_get(hndl, oval, nval, curproc) == 0) {
+ oval = nval;
+ switch (nval->rcv_privilege) {
+ case RCPRIV_BASIC:
+ if (!RCTL_INFINITE(nval))
+ rlp64->rlim_cur = nval->rcv_value;
+ break;
+ case RCPRIV_PRIVILEGED:
+ if (!RCTL_INFINITE(nval))
+ rlp64->rlim_max = nval->rcv_value;
+ break;
+ }
+ }
+
+ mutex_exit(&curproc->p_lock);
+ kmem_free(nval, sizeof (rctl_val_t));
+
+ if (rlp64->rlim_cur == RLIM_INFINITY &&
+ rlp64->rlim_max != RLIM_INFINITY)
+ rlp64->rlim_cur = rlp64->rlim_max;
+}
+
+static int
+lx_getrlimit_common(int lx_resource, uint64_t *rlim_curp, uint64_t *rlim_maxp)
+{
+ lx_proc_data_t *pd = ptolxproc(curproc);
+ int resource;
+ int64_t cur = -1;
+ boolean_t cur_inf = B_FALSE;
+ int64_t max = -1;
+ boolean_t max_inf = B_FALSE;
+ struct rlimit64 rlim64;
+
+ if (lx_resource < 0 || lx_resource >= LX_RLIMIT_NLIMITS)
+ return (EINVAL);
+
+ switch (lx_resource) {
+ case LX_RLIMIT_LOCKS:
+ rlim64.rlim_cur = pd->l_fake_limits[LX_RLFAKE_LOCKS].rlim_cur;
+ rlim64.rlim_max = pd->l_fake_limits[LX_RLFAKE_LOCKS].rlim_max;
+ break;
+
+ case LX_RLIMIT_NICE:
+ rlim64.rlim_cur = pd->l_fake_limits[LX_RLFAKE_NICE].rlim_cur;
+ rlim64.rlim_max = pd->l_fake_limits[LX_RLFAKE_NICE].rlim_max;
+ break;
+
+ case LX_RLIMIT_RTPRIO:
+ rlim64.rlim_cur = pd->l_fake_limits[LX_RLFAKE_RTPRIO].rlim_cur;
+ rlim64.rlim_max = pd->l_fake_limits[LX_RLFAKE_RTPRIO].rlim_max;
+ break;
+
+ case LX_RLIMIT_RTTIME:
+ rlim64.rlim_cur = pd->l_fake_limits[LX_RLFAKE_RTTIME].rlim_cur;
+ rlim64.rlim_max = pd->l_fake_limits[LX_RLFAKE_RTTIME].rlim_max;
+ break;
+
+ case LX_RLIMIT_RSS:
+ /* zone.max-physical-memory */
+ rlim64.rlim_cur = rlim64.rlim_max = curzone->zone_phys_mem_ctl;
+ break;
+
+ case LX_RLIMIT_NPROC:
+ /* zone.max-lwps */
+ rlim64.rlim_cur = rlim64.rlim_max = curzone->zone_nlwps_ctl;
+ break;
+
+ case LX_RLIMIT_MEMLOCK:
+ /* zone.max-locked-memory */
+ rlim64.rlim_cur = rlim64.rlim_max =
+ curzone->zone_locked_mem_ctl;
+ break;
+
+ case LX_RLIMIT_SIGPENDING:
+ lx_get_rctl("process.max-sigqueue-size", &rlim64);
+ break;
+
+ case LX_RLIMIT_MSGQUEUE:
+ lx_get_rctl("process.max-msg-messages", &rlim64);
+ break;
+
+ default:
+ resource = l_to_r[lx_resource];
+
+ mutex_enter(&curproc->p_lock);
+ (void) rctl_rlimit_get(rctlproc_legacy[resource], curproc,
+ &rlim64);
+ mutex_exit(&curproc->p_lock);
+ break;
+ }
+
+
+ if (rlim64.rlim_cur == RLIM64_INFINITY) {
+ cur = LX_RLIM_INFINITY_N;
+ } else {
+ cur = rlim64.rlim_cur;
+ }
+ if (rlim64.rlim_max == RLIM64_INFINITY) {
+ max = LX_RLIM_INFINITY_N;
+ } else {
+ max = rlim64.rlim_max;
+ }
+
+ if (lx_resource == LX_RLIMIT_STACK && cur > INT_MAX) {
+ /*
+ * Stunningly, Linux has somehow managed to confuse the concept
+ * of a "limit" with that of a "default" -- and the value of
+ * RLIMIT_STACK is used by NPTL as the _default_ stack size if
+ * it isn't specified. (!!) Even for a system that prides
+ * itself on slapdash castles of junk, this is an amazingly
+ * willful act of incompetence -- and one that is gleefully
+ * confessed in the pthread_create() man page: "if the
+ * RLIMIT_STACK soft resource limit at the time the program
+ * started has any value other than 'unlimited', then it
+ * determines the default stack size of new threads." A
+ * typical stack limit for us is 32TB; if it needs to be said,
+ * setting the default stack size to be 32TB doesn't work so
+ * well! Of course, glibc dropping a deuce in its pants
+ * becomes our problem -- so to prevent smelly accidents we
+ * tell Linux that any stack limit over the old (32-bit) values
+ * for infinity are just infinitely large.
+ */
+ cur_inf = B_TRUE;
+ max_inf = B_TRUE;
+ }
+
+ if (cur_inf) {
+ *rlim_curp = LX_RLIM64_INFINITY;
+ } else {
+ *rlim_curp = cur;
+ }
+
+ if (max_inf) {
+ *rlim_maxp = LX_RLIM64_INFINITY;
+ } else {
+ *rlim_maxp = max;
+ }
+
+ return (0);
+}
+
+/*
+ * This is the 'new' getrlimit, variously called getrlimit or ugetrlimit
+ * in Linux headers and code. The only difference between this and the old
+ * getrlimit (variously called getrlimit or old_getrlimit) is the value of
+ * RLIM_INFINITY, which is smaller for the older version. Modern code will
+ * use this version by default.
+ */
+long
+lx_getrlimit(int resource, lx_rlimit_t *rlp)
+{
+ int rv;
+ lx_rlimit_t rl;
+ uint64_t rlim_cur, rlim_max;
+
+ rv = lx_getrlimit_common(resource, &rlim_cur, &rlim_max);
+ if (rv != 0)
+ return (set_errno(rv));
+
+ if (get_udatamodel() == DATAMODEL_NATIVE) {
+ if (rlim_cur == LX_RLIM64_INFINITY)
+ rl.rlim_cur = LX_RLIM_INFINITY_N;
+ else if (rlim_cur > LX_RLIM_INFINITY_N)
+ rl.rlim_cur = LX_RLIM_INFINITY_N;
+ else
+ rl.rlim_cur = (ulong_t)rlim_cur;
+
+ if (rlim_max == LX_RLIM64_INFINITY)
+ rl.rlim_max = LX_RLIM_INFINITY_N;
+ else if (rlim_max > LX_RLIM_INFINITY_N)
+ rl.rlim_max = LX_RLIM_INFINITY_N;
+ else
+ rl.rlim_max = (ulong_t)rlim_max;
+
+ if (copyout(&rl, rlp, sizeof (rl)) != 0)
+ return (set_errno(EFAULT));
+ }
+#ifdef _SYSCALL32_IMPL
+ else {
+ lx_rlimit32_t rl32;
+
+ if (rlim_cur > UINT_MAX)
+ rl.rlim_cur = UINT_MAX;
+ else
+ rl.rlim_cur = (ulong_t)rlim_cur;
+
+ if (rlim_max > UINT_MAX)
+ rl.rlim_max = UINT_MAX;
+ else
+ rl.rlim_max = (ulong_t)rlim_max;
+
+ rl32.rlim_cur = rl.rlim_cur;
+ rl32.rlim_max = rl.rlim_max;
+
+ if (copyout(&rl32, rlp, sizeof (rl32)) != 0)
+ return (set_errno(EFAULT));
+ }
+#endif
+
+ return (0);
+}
+
+/*
+ * This is the 'old' getrlimit, variously called getrlimit or old_getrlimit
+ * in Linux headers and code. The only difference between this and the new
+ * getrlimit (variously called getrlimit or ugetrlimit) is the value of
+ * RLIM_INFINITY, which is smaller for the older version.
+ *
+ * This is only used for 32-bit code.
+ */
+long
+lx_oldgetrlimit(int resource, lx_rlimit_t *rlp)
+{
+ int rv;
+ lx_rlimit32_t rl32;
+ uint64_t rlim_cur, rlim_max;
+
+ rv = lx_getrlimit_common(resource, &rlim_cur, &rlim_max);
+ if (rv != 0)
+ return (set_errno(rv));
+
+ if (rlim_cur > INT_MAX)
+ rl32.rlim_cur = INT_MAX;
+ else
+ rl32.rlim_cur = (ulong_t)rlim_cur;
+
+ if (rlim_max > INT_MAX)
+ rl32.rlim_max = INT_MAX;
+ else
+ rl32.rlim_max = (ulong_t)rlim_cur;
+
+ if (copyout(&rl32, rlp, sizeof (rl32)) != 0)
+ return (set_errno(EFAULT));
+
+ return (0);
+}
+
+static int
+lx_set_rctl(char *nm, struct rlimit64 *rlp64)
+{
+ int err;
+ rctl_hndl_t hndl;
+ rctl_alloc_gp_t *gp;
+
+ gp = rctl_rlimit_set_prealloc(1);
+
+ mutex_enter(&curproc->p_lock);
+
+ hndl = rctl_hndl_lookup(nm);
+
+ /*
+ * We're not supposed to do this but since we want all our rctls to
+ * behave like rlimits, we take advantage of this function to set up
+ * this way.
+ */
+ err = rctl_rlimit_set(hndl, curproc, rlp64, gp, RCTL_LOCAL_DENY, 0,
+ CRED());
+
+ mutex_exit(&curproc->p_lock);
+
+ rctl_prealloc_destroy(gp);
+
+ return (err);
+}
+
+static int
+lx_setrlimit_common(int lx_resource, uint64_t rlim_cur, uint64_t rlim_max)
+{
+ lx_proc_data_t *pd = ptolxproc(curproc);
+ int err;
+ int resource;
+ rctl_alloc_gp_t *gp;
+ struct rlimit64 rl64;
+
+ if (lx_resource < 0 || lx_resource >= LX_RLIMIT_NLIMITS)
+ return (EINVAL);
+
+ switch (lx_resource) {
+ case LX_RLIMIT_LOCKS:
+ pd->l_fake_limits[LX_RLFAKE_LOCKS].rlim_cur = rlim_cur;
+ pd->l_fake_limits[LX_RLFAKE_LOCKS].rlim_max = rlim_max;
+ break;
+
+ case LX_RLIMIT_NICE:
+ pd->l_fake_limits[LX_RLFAKE_NICE].rlim_cur = rlim_cur;
+ pd->l_fake_limits[LX_RLFAKE_NICE].rlim_max = rlim_max;
+ break;
+
+ case LX_RLIMIT_RTPRIO:
+ pd->l_fake_limits[LX_RLFAKE_RTPRIO].rlim_cur = rlim_cur;
+ pd->l_fake_limits[LX_RLFAKE_RTPRIO].rlim_max = rlim_max;
+ break;
+
+ case LX_RLIMIT_RTTIME:
+ pd->l_fake_limits[LX_RLFAKE_RTTIME].rlim_cur = rlim_cur;
+ pd->l_fake_limits[LX_RLFAKE_RTTIME].rlim_max = rlim_max;
+ break;
+
+ case LX_RLIMIT_RSS:
+ /*
+ * zone.max-physical-memory
+ * Since we're emulating the value via a zone rctl, we can't
+ * set that from within the zone. Lie and say we set the value.
+ */
+ break;
+
+ case LX_RLIMIT_NPROC:
+ /*
+ * zone.max-lwps
+ * Since we're emulating the value via a zone rctl, we can't
+ * set that from within the zone. Lie and say we set the value.
+ */
+ break;
+
+ case LX_RLIMIT_MEMLOCK:
+ /*
+ * zone.max-locked-memory
+ * Since we're emulating the value via a zone rctl, we can't
+ * set that from within the zone. Lie and say we set the value.
+ */
+ break;
+
+ case LX_RLIMIT_SIGPENDING:
+ /*
+ * On Ubuntu at least, the login and sshd processes expect to
+ * set this limit to 16k and login will fail if this fails. On
+ * Illumos we have a system limit of 8k and normally the
+ * privileged limit is 512. We simply pretend this works to
+ * allow login to work.
+ */
+ if (rlim_max > 8192)
+ return (0);
+
+ rl64.rlim_cur = rlim_cur;
+ rl64.rlim_max = rlim_max;
+ if ((err = lx_set_rctl("process.max-sigqueue-size", &rl64))
+ != 0)
+ return (set_errno(err));
+ break;
+
+ case LX_RLIMIT_MSGQUEUE:
+ rl64.rlim_cur = rlim_cur;
+ rl64.rlim_max = rlim_max;
+ if ((err = lx_set_rctl("process.max-msg-messages", &rl64)) != 0)
+ return (set_errno(err));
+ break;
+
+ default:
+ resource = l_to_r[lx_resource];
+
+ /*
+ * Linux limits the max number of open files to 1m and there is
+ * a test for this.
+ */
+ if (lx_resource == LX_RLIMIT_NOFILE && rlim_max > (1024 * 1024))
+ return (EPERM);
+
+ rl64.rlim_cur = rlim_cur;
+ rl64.rlim_max = rlim_max;
+ gp = rctl_rlimit_set_prealloc(1);
+
+ mutex_enter(&curproc->p_lock);
+ err = rctl_rlimit_set(rctlproc_legacy[resource], curproc,
+ &rl64, gp, rctlproc_flags[resource],
+ rctlproc_signals[resource], CRED());
+ mutex_exit(&curproc->p_lock);
+
+ rctl_prealloc_destroy(gp);
+ if (err != 0)
+ return (set_errno(err));
+ break;
+ }
+
+ return (0);
+}
+
+long
+lx_setrlimit(int resource, lx_rlimit_t *rlp)
+{
+ int rv;
+ lx_rlimit_t rl;
+ uint64_t rlim_cur, rlim_max;
+
+ if (get_udatamodel() == DATAMODEL_NATIVE) {
+ if (copyin(rlp, &rl, sizeof (rl)) != 0)
+ return (set_errno(EFAULT));
+ }
+#ifdef _SYSCALL32_IMPL
+ else {
+ lx_rlimit32_t rl32;
+
+ if (copyin(rlp, &rl32, sizeof (rl32)) != 0)
+ return (set_errno(EFAULT));
+
+ rl.rlim_cur = rl32.rlim_cur;
+ rl.rlim_max = rl32.rlim_max;
+ }
+#endif
+
+ if ((rl.rlim_max != LX_RLIM_INFINITY_N &&
+ rl.rlim_cur == LX_RLIM_INFINITY_N) ||
+ rl.rlim_cur > rl.rlim_max)
+ return (set_errno(EINVAL));
+
+ if (rl.rlim_cur == LX_RLIM_INFINITY_N)
+ rlim_cur = LX_RLIM64_INFINITY;
+ else
+ rlim_cur = rl.rlim_cur;
+
+ if (rl.rlim_max == LX_RLIM_INFINITY_N)
+ rlim_max = LX_RLIM64_INFINITY;
+ else
+ rlim_max = rl.rlim_max;
+
+ rv = lx_setrlimit_common(resource, rlim_cur, rlim_max);
+ if (rv != 0)
+ return (set_errno(rv));
+ return (0);
+}
+
+/*
+ * From the man page:
+ * The Linux-specific prlimit() system call combines and extends the
+ * functionality of setrlimit() and getrlimit(). It can be used to both set
+ * and get the resource limits of an arbitrary process.
+ *
+ * If pid is 0, then the call applies to the calling process.
+ */
+long
+lx_prlimit64(pid_t pid, int resource, lx_rlimit64_t *nrlp, lx_rlimit64_t *orlp)
+{
+ int rv;
+ lx_rlimit64_t nrl, orl;
+
+ if (pid != 0) {
+ /* XXX TBD if needed */
+ char buf[80];
+
+ (void) snprintf(buf, sizeof (buf),
+ "setting prlimit %d for another process\n", resource);
+ lx_unsupported(buf);
+ return (ENOTSUP);
+ }
+
+ if (orlp != NULL) {
+ /* we first get the current limits */
+ rv = lx_getrlimit_common(resource, &orl.rlim_cur,
+ &orl.rlim_max);
+ if (rv != 0)
+ return (set_errno(rv));
+ }
+
+ if (nrlp != NULL) {
+ if (copyin(nrlp, &nrl, sizeof (nrl)) != 0)
+ return (set_errno(EFAULT));
+
+ if ((nrl.rlim_max != LX_RLIM64_INFINITY &&
+ nrl.rlim_cur == LX_RLIM64_INFINITY) ||
+ nrl.rlim_cur > nrl.rlim_max)
+ return (set_errno(EINVAL));
+
+ rv = lx_setrlimit_common(resource, nrl.rlim_cur, nrl.rlim_max);
+ if (rv != 0)
+ return (set_errno(rv));
+ }
+
+ if (orlp != NULL) {
+ /* now return the original limits, if necessary */
+ if (copyout(&orl, orlp, sizeof (orl)) != 0)
+ return (set_errno(EFAULT));
+ }
+
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_rw.c b/usr/src/uts/common/brand/lx/syscall/lx_rw.c
new file mode 100644
index 0000000000..50d532ff51
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_rw.c
@@ -0,0 +1,949 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <sys/errno.h>
+#include <sys/systm.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_types.h>
+#include <sys/nbmlock.h>
+#include <sys/limits.h>
+
+/* uts/common/syscall/rw.c */
+extern size_t copyout_max_cached;
+
+
+/* Common routines */
+
+static int
+lx_iovec_copyin(void *uiovp, int iovcnt, iovec_t *kiovp, ssize_t *count)
+{
+#ifdef _SYSCALL32_IMPL
+ /*
+ * 32-bit callers need to have their iovec expanded, while ensuring
+ * that they can't move more than 2Gbytes of data in a single call.
+ */
+ if (get_udatamodel() == DATAMODEL_ILP32) {
+ struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
+ int aiov32len = 0;
+ ssize32_t total32 = 0;
+ int i;
+
+ if (iovcnt > IOV_MAX_STACK) {
+ aiov32len = iovcnt * sizeof (iovec32_t);
+ aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
+ }
+
+ if (copyin(uiovp, aiov32, iovcnt * sizeof (iovec32_t))) {
+ if (aiov32len != 0) {
+ kmem_free(aiov32, aiov32len);
+ }
+ return (EFAULT);
+ }
+
+ for (i = 0; i < iovcnt; i++) {
+ ssize32_t iovlen32 = aiov32[i].iov_len;
+ total32 += iovlen32;
+ if (iovlen32 < 0 || total32 < 0) {
+ if (aiov32len != 0) {
+ kmem_free(aiov32, aiov32len);
+ }
+ return (EINVAL);
+ }
+ kiovp[i].iov_len = iovlen32;
+ kiovp[i].iov_base =
+ (caddr_t)(uintptr_t)aiov32[i].iov_base;
+ /* Linux does a basic sanity test on the address */
+ if ((uintptr_t)kiovp[i].iov_base >= USERLIMIT32) {
+ if (aiov32len != 0) {
+ kmem_free(aiov32, aiov32len);
+ }
+ return (EFAULT);
+ }
+ }
+ *count = total32;
+
+ if (aiov32len != 0)
+ kmem_free(aiov32, aiov32len);
+ } else
+#endif
+ {
+ ssize_t total = 0;
+ int i;
+
+ if (copyin(uiovp, kiovp, iovcnt * sizeof (iovec_t)))
+ return (EFAULT);
+ for (i = 0; i < iovcnt; i++) {
+ ssize_t iovlen = kiovp[i].iov_len;
+ total += iovlen;
+ if (iovlen < 0 || total < 0) {
+ return (EINVAL);
+ }
+ /* Linux does a basic sanity test on the address */
+ if ((uintptr_t)kiovp[i].iov_base >= USERLIMIT) {
+ return (EFAULT);
+ }
+ }
+ *count = total;
+ }
+ return (0);
+}
+
+static int
+lx_read_common(file_t *fp, uio_t *uiop, size_t *nread, boolean_t positioned)
+{
+ vnode_t *vp = fp->f_vnode;
+ int error = 0, rwflag = 0, ioflag;
+ ssize_t count = uiop->uio_resid;
+ size_t rcount = 0;
+ struct cpu *cp;
+ boolean_t in_crit = B_FALSE;
+
+ /*
+ * We have to enter the critical region before calling VOP_RWLOCK
+ * to avoid a deadlock with ufs.
+ */
+ if (nbl_need_check(vp)) {
+ int svmand;
+
+ nbl_start_crit(vp, RW_READER);
+ in_crit = B_TRUE;
+ error = nbl_svmand(vp, fp->f_cred, &svmand);
+ if (error != 0)
+ goto out;
+ if (nbl_conflict(vp, NBL_READ, uiop->uio_offset, count, svmand,
+ NULL) != 0) {
+ error = EACCES;
+ goto out;
+ }
+ }
+
+ (void) VOP_RWLOCK(vp, rwflag, NULL);
+ /*
+ * For non-positioned reads, recheck offset/count validity inside
+ * VOP_WRLOCK to prevent filesize from changing during validation.
+ */
+ if (!positioned) {
+ u_offset_t uoffset = (u_offset_t)(ulong_t)fp->f_offset;
+
+ if ((vp->v_type == VREG) && (uoffset >= OFFSET_MAX(fp))) {
+ struct vattr va;
+
+ va.va_mask = AT_SIZE;
+ error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL);
+ VOP_RWUNLOCK(vp, rwflag, NULL);
+ if (error != 0)
+ goto out;
+ /* We have to return EOF if fileoff is >= file size. */
+ if (uoffset >= va.va_size)
+ goto out;
+ /*
+ * File is greater than or equal to maxoff and
+ * therefore we return EOVERFLOW.
+ */
+ error = EOVERFLOW;
+ goto out;
+ }
+ if ((vp->v_type == VREG) &&
+ (uoffset + count > OFFSET_MAX(fp))) {
+ count = (ssize_t)(OFFSET_MAX(fp) - uoffset);
+ uiop->uio_resid = count;
+ }
+ uiop->uio_offset = uoffset;
+ }
+ ioflag = uiop->uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
+ /* If read sync is not asked for, filter sync flags */
+ if ((ioflag & FRSYNC) == 0)
+ ioflag &= ~(FSYNC|FDSYNC);
+ error = VOP_READ(vp, uiop, ioflag, fp->f_cred, NULL);
+ rcount = count - uiop->uio_resid;
+ CPU_STATS_ENTER_K();
+ cp = CPU;
+ CPU_STATS_ADDQ(cp, sys, sysread, 1);
+ CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)rcount);
+ CPU_STATS_EXIT_K();
+ ttolwp(curthread)->lwp_ru.ioch += (ulong_t)rcount;
+ /* Store offset for non-positioned reads */
+ if (!positioned) {
+ if (vp->v_type == VFIFO) {
+ /* Backward compatibility */
+ fp->f_offset = rcount;
+ } else if (((fp->f_flag & FAPPEND) == 0) ||
+ (vp->v_type != VREG) || (count != 0)) {
+ /* POSIX */
+ fp->f_offset = uiop->uio_loffset;
+ }
+ }
+ VOP_RWUNLOCK(vp, rwflag, NULL);
+
+out:
+ if (in_crit)
+ nbl_end_crit(vp);
+ *nread = rcount;
+ return (error);
+}
+
+static int
+lx_write_common(file_t *fp, uio_t *uiop, size_t *nwrite, boolean_t positioned)
+{
+ vnode_t *vp = fp->f_vnode;
+ int error = 0, rwflag = 1, ioflag;
+ ssize_t count = uiop->uio_resid;
+ size_t wcount = 0;
+ struct cpu *cp;
+ boolean_t in_crit = B_FALSE;
+
+ /*
+ * We have to enter the critical region before calling VOP_RWLOCK
+ * to avoid a deadlock with ufs.
+ */
+ if (nbl_need_check(vp)) {
+ int svmand;
+
+ nbl_start_crit(vp, RW_READER);
+ in_crit = B_TRUE;
+ error = nbl_svmand(vp, fp->f_cred, &svmand);
+ if (error != 0)
+ goto out;
+ if (nbl_conflict(vp, NBL_WRITE, uiop->uio_loffset, count,
+ svmand, NULL) != 0) {
+ error = EACCES;
+ goto out;
+ }
+ }
+
+ (void) VOP_RWLOCK(vp, rwflag, NULL);
+
+ if (!positioned) {
+ /*
+ * For non-positioned writes, the value of fp->f_offset is
+ * re-queried while inside VOP_RWLOCK. This ensures that other
+ * writes which alter the filesize will be taken into account.
+ */
+ uiop->uio_loffset = fp->f_offset;
+ ioflag = uiop->uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
+ } else {
+ /*
+ * In a senseless departure from POSIX, positioned write calls
+ * on Linux do _not_ ignore the O_APPEND flag.
+ */
+ ioflag = uiop->uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
+ }
+ if (vp->v_type == VREG) {
+ u_offset_t fileoff = (u_offset_t)(ulong_t)uiop->uio_loffset;
+
+ if (fileoff >= curproc->p_fsz_ctl) {
+ VOP_RWUNLOCK(vp, rwflag, NULL);
+ mutex_enter(&curproc->p_lock);
+ (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
+ curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
+ mutex_exit(&curproc->p_lock);
+ error = EFBIG;
+ goto out;
+ }
+ if (fileoff >= OFFSET_MAX(fp)) {
+ VOP_RWUNLOCK(vp, rwflag, NULL);
+ error = EFBIG;
+ goto out;
+ }
+ if (fileoff + count > OFFSET_MAX(fp)) {
+ count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
+ uiop->uio_resid = count;
+ }
+ }
+
+ error = VOP_WRITE(vp, uiop, ioflag, fp->f_cred, NULL);
+ wcount = count - uiop->uio_resid;
+ CPU_STATS_ENTER_K();
+ cp = CPU;
+ CPU_STATS_ADDQ(cp, sys, syswrite, 1);
+ CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)wcount);
+ CPU_STATS_EXIT_K();
+ ttolwp(curthread)->lwp_ru.ioch += (ulong_t)wcount;
+
+ /* Store offset for non-positioned writes */
+ if (!positioned) {
+ if (vp->v_type == VFIFO) {
+ /* Backward compatibility */
+ fp->f_offset = wcount;
+ } else if (((fp->f_flag & FAPPEND) == 0) ||
+ (vp->v_type != VREG) || (count != 0)) {
+ /* POSIX */
+ fp->f_offset = uiop->uio_loffset;
+ }
+ }
+ VOP_RWUNLOCK(vp, rwflag, NULL);
+
+out:
+ if (in_crit)
+ nbl_end_crit(vp);
+ *nwrite = wcount;
+ return (error);
+}
+
+/*
+ * The Linux routines for reading and writing data from file descriptors behave
+ * differently from their SunOS counterparts in a few key ways:
+ *
+ * - Passing an iovcnt of 0 to the vectored functions results in an error on
+ * SunOS, but on Linux it yields return value of 0.
+ *
+ * - If any data is successfully read or written, Linux will return a success.
+ * This is unlike SunOS which would return an error code for the entire
+ * operation in cases where vectors had gone unprocessed.
+ *
+ * - Breaking from POSIX, Linux positioned writes (pwrite/pwritev) on Linux
+ * will obey the O_APPEND flag if it is set on the descriptor.
+ */
+
+ssize_t
+lx_read(int fdes, void *cbuf, size_t ccount)
+{
+ struct uio auio;
+ struct iovec aiov;
+ file_t *fp;
+ ssize_t count = (ssize_t)ccount;
+ size_t nread = 0;
+ int fflag, error = 0;
+
+ if (count < 0)
+ return (set_errno(EINVAL));
+ if ((fp = getf(fdes)) == NULL)
+ return (set_errno(EBADF));
+ if (((fflag = fp->f_flag) & FREAD) == 0) {
+ error = EBADF;
+ goto out;
+ }
+ if (fp->f_vnode->v_type == VREG && count == 0) {
+ goto out;
+ }
+ if (fp->f_vnode->v_type == VDIR) {
+ error = EISDIR;
+ goto out;
+ }
+
+ aiov.iov_base = cbuf;
+ aiov.iov_len = count;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_loffset = fp->f_offset;
+ auio.uio_resid = count;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_llimit = MAXOFFSET_T;
+ auio.uio_fmode = fflag;
+ if (count <= copyout_max_cached)
+ auio.uio_extflg = UIO_COPY_CACHED;
+ else
+ auio.uio_extflg = UIO_COPY_DEFAULT;
+
+ error = lx_read_common(fp, &auio, &nread, B_FALSE);
+
+ if (error == EINTR) {
+ if (nread != 0) {
+ error = 0;
+ } else {
+ ttolxlwp(curthread)->br_syscall_restart = B_TRUE;
+ }
+ }
+out:
+ releasef(fdes);
+ if (error != 0)
+ return (set_errno(error));
+ return ((ssize_t)nread);
+}
+
+ssize_t
+lx_write(int fdes, void *cbuf, size_t ccount)
+{
+ struct uio auio;
+ struct iovec aiov;
+ file_t *fp;
+ ssize_t count = (ssize_t)ccount;
+ size_t nwrite = 0;
+ int fflag, error = 0;
+
+ if (count < 0)
+ return (set_errno(EINVAL));
+ if ((fp = getf(fdes)) == NULL)
+ return (set_errno(EBADF));
+ if (((fflag = fp->f_flag) & FWRITE) == 0) {
+ error = EBADF;
+ goto out;
+ }
+ if (fp->f_vnode->v_type == VREG && count == 0) {
+ goto out;
+ }
+
+ aiov.iov_base = cbuf;
+ aiov.iov_len = count;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_loffset = fp->f_offset;
+ auio.uio_resid = count;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_llimit = curproc->p_fsz_ctl;
+ auio.uio_fmode = fflag;
+ auio.uio_extflg = UIO_COPY_DEFAULT;
+
+ error = lx_write_common(fp, &auio, &nwrite, B_FALSE);
+
+ if (error == EINTR) {
+ if (nwrite != 0) {
+ error = 0;
+ } else {
+ ttolxlwp(curthread)->br_syscall_restart = B_TRUE;
+ }
+ }
+out:
+ releasef(fdes);
+ if (error != 0)
+ return (set_errno(error));
+ return (nwrite);
+}
+
+ssize_t
+lx_readv(int fdes, struct iovec *iovp, int iovcnt)
+{
+ struct uio auio;
+ struct iovec buf[IOV_MAX_STACK], *aiov = buf;
+ int aiovlen = 0;
+ file_t *fp;
+ ssize_t count;
+ size_t nread = 0;
+ int fflag, error = 0;
+
+ if (iovcnt < 0 || iovcnt > IOV_MAX) {
+ return (set_errno(EINVAL));
+ } else if (iovcnt == 0) {
+ return (0);
+ }
+
+ if (iovcnt > IOV_MAX_STACK) {
+ aiovlen = iovcnt * sizeof (iovec_t);
+ aiov = kmem_alloc(aiovlen, KM_SLEEP);
+ }
+ if ((error = lx_iovec_copyin(iovp, iovcnt, aiov, &count)) != 0) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
+ return (set_errno(error));
+ }
+
+ if ((fp = getf(fdes)) == NULL) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
+ return (set_errno(EBADF));
+ }
+ if (((fflag = fp->f_flag) & FREAD) == 0) {
+ error = EBADF;
+ goto out;
+ }
+ if (fp->f_vnode->v_type == VREG && count == 0) {
+ goto out;
+ }
+ if (fp->f_vnode->v_type == VDIR) {
+ error = EISDIR;
+ goto out;
+ }
+
+ auio.uio_iov = aiov;
+ auio.uio_iovcnt = iovcnt;
+ auio.uio_loffset = fp->f_offset;
+ auio.uio_resid = count;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_llimit = MAXOFFSET_T;
+ auio.uio_fmode = fflag;
+ if (count <= copyout_max_cached)
+ auio.uio_extflg = UIO_COPY_CACHED;
+ else
+ auio.uio_extflg = UIO_COPY_DEFAULT;
+
+ error = lx_read_common(fp, &auio, &nread, B_FALSE);
+
+ if (error != 0) {
+ if (nread != 0) {
+ error = 0;
+ } else if (error == EINTR) {
+ ttolxlwp(curthread)->br_syscall_restart = B_TRUE;
+ }
+ }
+out:
+ releasef(fdes);
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (nread);
+}
+
+ssize_t
+lx_writev(int fdes, struct iovec *iovp, int iovcnt)
+{
+ struct uio auio;
+ struct iovec buf[IOV_MAX_STACK], *aiov = buf;
+ int aiovlen = 0;
+ file_t *fp;
+ ssize_t count;
+ size_t nwrite = 0;
+ int fflag, error = 0;
+
+ if (iovcnt < 0 || iovcnt > IOV_MAX) {
+ return (set_errno(EINVAL));
+ } else if (iovcnt == 0) {
+ return (0);
+ }
+
+ if (iovcnt > IOV_MAX_STACK) {
+ aiovlen = iovcnt * sizeof (iovec_t);
+ aiov = kmem_alloc(aiovlen, KM_SLEEP);
+ }
+ if ((error = lx_iovec_copyin(iovp, iovcnt, aiov, &count)) != 0) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
+ return (set_errno(error));
+ }
+
+ if ((fp = getf(fdes)) == NULL) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
+ return (set_errno(EBADF));
+ }
+ if (((fflag = fp->f_flag) & FWRITE) == 0) {
+ error = EBADF;
+ goto out;
+ }
+ if (fp->f_vnode->v_type == VREG && count == 0) {
+ goto out;
+ }
+
+ auio.uio_iov = aiov;
+ auio.uio_iovcnt = iovcnt;
+ auio.uio_loffset = fp->f_offset;
+ auio.uio_resid = count;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_llimit = curproc->p_fsz_ctl;
+ auio.uio_fmode = fflag;
+ auio.uio_extflg = UIO_COPY_DEFAULT;
+
+ error = lx_write_common(fp, &auio, &nwrite, B_FALSE);
+
+ if (error != 0) {
+ if (nwrite != 0) {
+ error = 0;
+ } else if (error == EINTR) {
+ ttolxlwp(curthread)->br_syscall_restart = B_TRUE;
+ }
+ }
+out:
+ releasef(fdes);
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (nwrite);
+}
+
+ssize_t
+lx_pread(int fdes, void *cbuf, size_t ccount, off64_t offset)
+{
+ struct uio auio;
+ struct iovec aiov;
+ file_t *fp;
+ ssize_t count = (ssize_t)ccount;
+ size_t nread = 0;
+ int fflag, error = 0;
+
+ if (count < 0)
+ return (set_errno(EINVAL));
+ if ((fp = getf(fdes)) == NULL)
+ return (set_errno(EBADF));
+ if (((fflag = fp->f_flag) & FREAD) == 0) {
+ error = EBADF;
+ goto out;
+ }
+ if (fp->f_vnode->v_type == VREG) {
+ u_offset_t fileoff = (u_offset_t)offset;
+
+ if (count == 0)
+ goto out;
+ /*
+ * Return EINVAL if an invalid offset comes to pread.
+ * Negative offset from user will cause this error.
+ */
+ if (fileoff > MAXOFFSET_T) {
+ error = EINVAL;
+ goto out;
+ }
+ /*
+ * Limit offset such that we don't read or write
+ * a file beyond the maximum offset representable in
+ * an off_t structure.
+ */
+ if (fileoff + count > MAXOFFSET_T)
+ count = (ssize_t)((offset_t)MAXOFFSET_T - fileoff);
+ } else if (fp->f_vnode->v_type == VFIFO) {
+ error = ESPIPE;
+ goto out;
+ } else if (fp->f_vnode->v_type == VDIR) {
+ error = EISDIR;
+ goto out;
+ }
+
+ aiov.iov_base = cbuf;
+ aiov.iov_len = count;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_loffset = offset;
+ auio.uio_resid = count;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_llimit = MAXOFFSET_T;
+ auio.uio_fmode = fflag;
+ auio.uio_extflg = UIO_COPY_CACHED;
+
+ error = lx_read_common(fp, &auio, &nread, B_TRUE);
+
+ if (error == EINTR) {
+ if (nread != 0) {
+ error = 0;
+ } else {
+ ttolxlwp(curthread)->br_syscall_restart = B_TRUE;
+ }
+ }
+out:
+ releasef(fdes);
+ if (error) {
+ return (set_errno(error));
+ }
+ return ((ssize_t)nread);
+
+}
+
+ssize_t
+lx_pwrite(int fdes, void *cbuf, size_t ccount, off64_t offset)
+{
+ struct uio auio;
+ struct iovec aiov;
+ file_t *fp;
+ ssize_t count = (ssize_t)ccount;
+ size_t nwrite = 0;
+ int fflag, error = 0;
+
+ if (count < 0)
+ return (set_errno(EINVAL));
+ if ((fp = getf(fdes)) == NULL)
+ return (set_errno(EBADF));
+ if (((fflag = fp->f_flag) & (FWRITE)) == 0) {
+ error = EBADF;
+ goto out;
+ }
+ if (fp->f_vnode->v_type == VREG) {
+ u_offset_t fileoff = (u_offset_t)offset;
+
+ if (count == 0)
+ goto out;
+ /*
+ * return EINVAL for offsets that cannot be
+ * represented in an off_t.
+ */
+ if (fileoff > MAXOFFSET_T) {
+ error = EINVAL;
+ goto out;
+ }
+ /*
+ * Take appropriate action if we are trying to write above the
+ * resource limit.
+ */
+ if (fileoff >= curproc->p_fsz_ctl) {
+ mutex_enter(&curproc->p_lock);
+ (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
+ curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
+ mutex_exit(&curproc->p_lock);
+
+ error = EFBIG;
+ goto out;
+ }
+ /*
+ * Don't allow pwrite to cause file sizes to exceed maxoffset.
+ */
+ if (fileoff == MAXOFFSET_T) {
+ error = EFBIG;
+ goto out;
+ }
+ if (fileoff + count > MAXOFFSET_T)
+ count = (ssize_t)((u_offset_t)MAXOFFSET_T - fileoff);
+ } else if (fp->f_vnode->v_type == VFIFO) {
+ error = ESPIPE;
+ goto out;
+ }
+
+ aiov.iov_base = cbuf;
+ aiov.iov_len = count;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_loffset = offset;
+ auio.uio_resid = count;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_llimit = curproc->p_fsz_ctl;
+ auio.uio_fmode = fflag;
+ auio.uio_extflg = UIO_COPY_CACHED;
+
+ error = lx_write_common(fp, &auio, &nwrite, B_TRUE);
+
+ if (error == EINTR) {
+ if (nwrite != 0) {
+ error = 0;
+ } else {
+ ttolxlwp(curthread)->br_syscall_restart = B_TRUE;
+ }
+ }
+out:
+ releasef(fdes);
+ if (error) {
+ return (set_errno(error));
+ }
+ return (nwrite);
+}
+
+ssize_t
+lx_pread32(int fdes, void *cbuf, size_t ccount, uint32_t off_lo,
+ uint32_t off_hi)
+{
+ return (lx_pread(fdes, cbuf, ccount, LX_32TO64(off_lo, off_hi)));
+}
+
+ssize_t
+lx_pwrite32(int fdes, void *cbuf, size_t ccount, uint32_t off_lo,
+ uint32_t off_hi)
+{
+ return (lx_pwrite(fdes, cbuf, ccount, LX_32TO64(off_lo, off_hi)));
+}
+
+ssize_t
+lx_preadv(int fdes, void *iovp, int iovcnt, off64_t offset)
+{
+ struct uio auio;
+ struct iovec buf[IOV_MAX_STACK], *aiov = buf;
+ int aiovlen = 0;
+ file_t *fp;
+ ssize_t count;
+ size_t nread = 0;
+ int fflag, error = 0;
+
+ if (iovcnt < 0 || iovcnt > IOV_MAX) {
+ return (set_errno(EINVAL));
+ } else if (iovcnt == 0) {
+ return (0);
+ }
+
+ if (iovcnt > IOV_MAX_STACK) {
+ aiovlen = iovcnt * sizeof (iovec_t);
+ aiov = kmem_alloc(aiovlen, KM_SLEEP);
+ }
+ if ((error = lx_iovec_copyin(iovp, iovcnt, aiov, &count)) != 0) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
+ return (set_errno(error));
+ }
+
+ if ((fp = getf(fdes)) == NULL) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
+ return (set_errno(EBADF));
+ }
+ if (((fflag = fp->f_flag) & FREAD) == 0) {
+ error = EBADF;
+ goto out;
+ }
+ if (fp->f_vnode->v_type == VREG) {
+ u_offset_t fileoff = (u_offset_t)offset;
+
+ if (count == 0)
+ goto out;
+ /*
+ * Return EINVAL if an invalid offset comes to pread.
+ * Negative offset from user will cause this error.
+ */
+ if (fileoff > MAXOFFSET_T) {
+ error = EINVAL;
+ goto out;
+ }
+ /*
+ * Limit offset such that we don't read or write a file beyond
+ * the maximum offset representable in an off_t structure.
+ */
+ if (fileoff + count > MAXOFFSET_T)
+ count = (ssize_t)((offset_t)MAXOFFSET_T - fileoff);
+ } else if (fp->f_vnode->v_type == VDIR) {
+ error = EISDIR;
+ goto out;
+ } else if (fp->f_vnode->v_type == VFIFO) {
+ error = ESPIPE;
+ goto out;
+ }
+
+ auio.uio_iov = aiov;
+ auio.uio_iovcnt = iovcnt;
+ auio.uio_loffset = offset;
+ auio.uio_resid = count;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_llimit = MAXOFFSET_T;
+ auio.uio_fmode = fflag;
+ if (count <= copyout_max_cached)
+ auio.uio_extflg = UIO_COPY_CACHED;
+ else
+ auio.uio_extflg = UIO_COPY_DEFAULT;
+
+ error = lx_read_common(fp, &auio, &nread, B_TRUE);
+
+ if (error != 0) {
+ if (nread != 0) {
+ error = 0;
+ } else if (error == EINTR) {
+ ttolxlwp(curthread)->br_syscall_restart = B_TRUE;
+ }
+ }
+out:
+ releasef(fdes);
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (nread);
+}
+
+ssize_t
+lx_pwritev(int fdes, void *iovp, int iovcnt, off64_t offset)
+{
+ struct uio auio;
+ struct iovec buf[IOV_MAX_STACK], *aiov = buf;
+ int aiovlen = 0;
+ file_t *fp;
+ ssize_t count;
+ size_t nwrite = 0;
+ int fflag, error = 0;
+
+ if (iovcnt < 0 || iovcnt > IOV_MAX) {
+ return (set_errno(EINVAL));
+ } else if (iovcnt == 0) {
+ return (0);
+ }
+
+ if (iovcnt > IOV_MAX_STACK) {
+ aiovlen = iovcnt * sizeof (iovec_t);
+ aiov = kmem_alloc(aiovlen, KM_SLEEP);
+ }
+ if ((error = lx_iovec_copyin(iovp, iovcnt, aiov, &count)) != 0) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
+ return (set_errno(error));
+ }
+
+ if ((fp = getf(fdes)) == NULL) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
+ return (set_errno(EBADF));
+ }
+ if (((fflag = fp->f_flag) & FWRITE) == 0) {
+ error = EBADF;
+ goto out;
+ }
+ if (fp->f_vnode->v_type == VREG) {
+ u_offset_t fileoff = (u_offset_t)offset;
+
+ if (count == 0)
+ goto out;
+ /*
+ * Return EINVAL if an invalid offset comes to pread.
+ * Negative offset from user will cause this error.
+ */
+ if (fileoff > MAXOFFSET_T) {
+ error = EINVAL;
+ goto out;
+ }
+ /*
+ * Take appropriate action if we are trying to write above the
+ * resource limit.
+ */
+ if (fileoff >= curproc->p_fsz_ctl) {
+ mutex_enter(&curproc->p_lock);
+ (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
+ curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
+ mutex_exit(&curproc->p_lock);
+
+ error = EFBIG;
+ goto out;
+ }
+ /*
+ * Don't allow pwritev to cause file sizes to exceed maxoffset.
+ */
+ if (fileoff == MAXOFFSET_T) {
+ error = EFBIG;
+ goto out;
+ }
+ /*
+ * Limit offset such that we don't read or write a file beyond
+ * the maximum offset representable in an off_t structure.
+ */
+ if (fileoff + count > MAXOFFSET_T)
+ count = (ssize_t)((u_offset_t)MAXOFFSET_T - fileoff);
+ } else if (fp->f_vnode->v_type == VFIFO) {
+ error = ESPIPE;
+ goto out;
+ }
+
+ auio.uio_iov = aiov;
+ auio.uio_iovcnt = iovcnt;
+ auio.uio_loffset = offset;
+ auio.uio_resid = count;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_llimit = curproc->p_fsz_ctl;
+ auio.uio_fmode = fflag;
+ auio.uio_extflg = UIO_COPY_DEFAULT;
+
+ error = lx_write_common(fp, &auio, &nwrite, B_TRUE);
+
+ if (error != 0) {
+ if (nwrite != 0) {
+ error = 0;
+ } else if (error == EINTR) {
+ ttolxlwp(curthread)->br_syscall_restart = B_TRUE;
+ }
+ }
+out:
+ releasef(fdes);
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (nwrite);
+}
+
+ssize_t
+lx_preadv32(int fdes, void *iovp, int iovcnt, uint32_t off_lo, uint32_t off_hi)
+{
+ return (lx_preadv(fdes, iovp, iovcnt, LX_32TO64(off_lo, off_hi)));
+}
+
+ssize_t
+lx_pwritev32(int fdes, void *iovp, int iovcnt, uint32_t off_lo,
+ uint32_t off_hi)
+{
+ return (lx_pwritev(fdes, iovp, iovcnt, LX_32TO64(off_lo, off_hi)));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_sched.c b/usr/src/uts/common/brand/lx/syscall/lx_sched.c
new file mode 100644
index 0000000000..0def559e29
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_sched.c
@@ -0,0 +1,524 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/proc.h>
+#include <sys/cpu.h>
+#include <sys/rtpriocntl.h>
+#include <sys/tspriocntl.h>
+#include <sys/processor.h>
+#include <sys/brand.h>
+#include <sys/lx_sched.h>
+#include <sys/lx_brand.h>
+
+extern int yield();
+extern long priocntl_common(int, procset_t *, int, caddr_t, caddr_t, uio_seg_t);
+
+long
+lx_sched_yield(void)
+{
+ yield();
+
+ return (0);
+}
+
+int
+lx_sched_affinity(int cmd, uintptr_t pid, int len, uintptr_t maskp,
+ int64_t *rval)
+{
+ pid_t s_pid;
+ id_t s_tid;
+ kthread_t *t = curthread;
+ lx_lwp_data_t *lx_lwp;
+
+ if (cmd != B_GET_AFFINITY_MASK && cmd != B_SET_AFFINITY_MASK)
+ return (set_errno(EINVAL));
+
+ /*
+ * The caller wants to know how large the mask should be.
+ */
+ if (cmd == B_GET_AFFINITY_MASK && len == 0) {
+ *rval = sizeof (lx_affmask_t);
+ return (0);
+ }
+
+ /*
+ * Otherwise, ensure they have a large enough mask.
+ */
+ if (cmd == B_GET_AFFINITY_MASK && len < sizeof (lx_affmask_t)) {
+ *rval = -1;
+ return (set_errno(EINVAL));
+ }
+
+ if (pid == 0) {
+ s_pid = curproc->p_pid;
+ s_tid = curthread->t_tid;
+ } else if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) == -1) {
+ return (set_errno(ESRCH));
+ }
+
+ /*
+ * For now, we only support manipulating threads in the
+ * same process.
+ */
+ if (curproc->p_pid != s_pid)
+ return (set_errno(EPERM));
+
+ /*
+ * We must hold the process lock so that the thread list
+ * doesn't change while we're looking at it. We'll hold
+ * the lock until we no longer reference the
+ * corresponding lwp.
+ */
+
+ mutex_enter(&curproc->p_lock);
+
+ do {
+ if (t->t_tid == s_tid)
+ break;
+ t = t->t_forw;
+ } while (t != curthread);
+
+ /*
+ * If the given PID is in the current thread's process,
+ * then we _must_ find it in the process's thread list.
+ */
+ ASSERT(t->t_tid == s_tid);
+
+ lx_lwp = t->t_lwp->lwp_brand;
+
+ if (cmd == B_SET_AFFINITY_MASK) {
+ if (copyin_nowatch((void *)maskp, &lx_lwp->br_affinitymask,
+ sizeof (lx_affmask_t)) != 0) {
+ mutex_exit(&curproc->p_lock);
+ return (set_errno(EFAULT));
+ }
+
+ *rval = 0;
+ } else {
+ if (copyout_nowatch(&lx_lwp->br_affinitymask, (void *)maskp,
+ sizeof (lx_affmask_t)) != 0) {
+ mutex_exit(&curproc->p_lock);
+ return (set_errno(EFAULT));
+ }
+
+ *rval = sizeof (lx_affmask_t);
+ }
+
+ mutex_exit(&curproc->p_lock);
+ return (0);
+}
+
+long
+lx_sched_setscheduler(l_pid_t pid, int policy, struct lx_sched_param *param)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ procset_t procset;
+ procset_t procset_cid;
+ pcparms_t pcparm;
+ pcinfo_t pcinfo;
+ struct lx_sched_param sched_param;
+ tsparms_t *tsp;
+ int prio, maxupri;
+ int rv;
+
+ if (pid < 0)
+ return (set_errno(ESRCH));
+
+ if ((rv = sched_setprocset(&procset, pid)))
+ return (rv);
+
+ if (copyin(param, &sched_param, sizeof (sched_param)))
+ return (set_errno(EFAULT));
+
+ prio = sched_param.lx_sched_prio;
+
+ if (policy < 0) {
+ /*
+ * get the class id
+ */
+ pcparm.pc_cid = PC_CLNULL;
+ (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ /*
+ * get the current policy
+ */
+ bzero(&pcinfo, sizeof (pcinfo));
+ pcinfo.pc_cid = pcparm.pc_cid;
+ (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ if (strcmp(pcinfo.pc_clname, "TS") == 0) {
+ policy = LX_SCHED_OTHER;
+ } else if (strcmp(pcinfo.pc_clname, "RT") == 0) {
+ policy = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs ==
+ RT_TQINF ? LX_SCHED_FIFO : LX_SCHED_RR;
+ } else {
+ return (set_errno(EINVAL));
+ }
+ }
+
+ bzero(&pcinfo, sizeof (pcinfo));
+ bzero(&pcparm, sizeof (pcparm));
+ setprocset(&procset_cid, POP_AND, P_PID, 0, P_ALL, 0);
+ switch (policy) {
+ case LX_SCHED_FIFO:
+ case LX_SCHED_RR:
+ (void) strcpy(pcinfo.pc_clname, "RT");
+ (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ if (prio < 0 ||
+ prio > ((rtinfo_t *)pcinfo.pc_clinfo)->rt_maxpri)
+ return (set_errno(EINVAL));
+ pcparm.pc_cid = pcinfo.pc_cid;
+ ((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio;
+ ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs =
+ policy == LX_SCHED_RR ? RT_TQDEF : RT_TQINF;
+ break;
+
+ case LX_SCHED_OTHER:
+ (void) strcpy(pcinfo.pc_clname, "TS");
+ (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ maxupri = ((tsinfo_t *)pcinfo.pc_clinfo)->ts_maxupri;
+ if (prio > maxupri || prio < -maxupri)
+ return (set_errno(EINVAL));
+
+ pcparm.pc_cid = pcinfo.pc_cid;
+ tsp = (tsparms_t *)pcparm.pc_clparms;
+ tsp->ts_upri = prio;
+ tsp->ts_uprilim = TS_NOCHANGE;
+ break;
+
+ default:
+ return (set_errno(EINVAL));
+ }
+
+ /*
+ * finally set scheduling policy and parameters
+ */
+ (void) do_priocntlsys(PC_SETPARMS, &procset, &pcparm);
+
+ return (0);
+}
+
+long
+lx_sched_getscheduler(l_pid_t pid)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ procset_t procset;
+ pcparms_t pcparm;
+ pcinfo_t pcinfo;
+ int policy;
+ int rv;
+
+ if (pid < 0)
+ return (set_errno(ESRCH));
+
+ if ((rv = sched_setprocset(&procset, pid)))
+ return (rv);
+
+ /*
+ * get the class id
+ */
+ pcparm.pc_cid = PC_CLNULL;
+ (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ /*
+ * get the class info and identify the equivalent linux policy
+ */
+ bzero(&pcinfo, sizeof (pcinfo));
+ pcinfo.pc_cid = pcparm.pc_cid;
+ (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ if (strcmp(pcinfo.pc_clname, "TS") == 0)
+ policy = LX_SCHED_OTHER;
+ else if (strcmp(pcinfo.pc_clname, "RT") == 0)
+ policy = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs ==
+ RT_TQINF ? LX_SCHED_FIFO : LX_SCHED_RR;
+ else
+ policy = set_errno(EINVAL);
+
+ return (policy);
+}
+
+long
+lx_sched_setparam(l_pid_t pid, struct lx_sched_param *param)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ procset_t procset;
+ procset_t procset_cid;
+ pcparms_t pcparm;
+ pcinfo_t pcinfo;
+ struct lx_sched_param sched_param;
+ tsparms_t *tsp;
+ int policy;
+ int prio, maxupri;
+ int rv;
+
+ if (pid < 0)
+ return (set_errno(ESRCH));
+
+ if ((rv = sched_setprocset(&procset, pid)))
+ return (rv);
+
+ if (copyin(param, &sched_param, sizeof (sched_param)))
+ return (set_errno(EFAULT));
+
+ prio = sched_param.lx_sched_prio;
+
+ /*
+ * get the class id
+ */
+ pcparm.pc_cid = PC_CLNULL;
+ (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ /*
+ * get the current policy
+ */
+ bzero(&pcinfo, sizeof (pcinfo));
+ pcinfo.pc_cid = pcparm.pc_cid;
+ (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ if (strcmp(pcinfo.pc_clname, "TS") == 0)
+ policy = LX_SCHED_OTHER;
+ else if (strcmp(pcinfo.pc_clname, "RT") == 0)
+ policy = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs ==
+ RT_TQINF ? LX_SCHED_FIFO : LX_SCHED_RR;
+ else
+ return (set_errno(EINVAL));
+
+ bzero(&pcinfo, sizeof (pcinfo));
+ bzero(&pcparm, sizeof (pcparm));
+ setprocset(&procset_cid, POP_AND, P_PID, 0, P_ALL, 0);
+ switch (policy) {
+ case LX_SCHED_FIFO:
+ case LX_SCHED_RR:
+ (void) strcpy(pcinfo.pc_clname, "RT");
+ (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ if (prio < 0 ||
+ prio > ((rtinfo_t *)pcinfo.pc_clinfo)->rt_maxpri)
+ return (set_errno(EINVAL));
+ pcparm.pc_cid = pcinfo.pc_cid;
+ ((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio;
+ ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs =
+ policy == LX_SCHED_RR ? RT_TQDEF : RT_TQINF;
+ break;
+
+ case LX_SCHED_OTHER:
+ (void) strcpy(pcinfo.pc_clname, "TS");
+ (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ maxupri = ((tsinfo_t *)pcinfo.pc_clinfo)->ts_maxupri;
+ if (prio > maxupri || prio < -maxupri)
+ return (set_errno(EINVAL));
+
+ pcparm.pc_cid = pcinfo.pc_cid;
+ tsp = (tsparms_t *)pcparm.pc_clparms;
+ tsp->ts_upri = prio;
+ tsp->ts_uprilim = TS_NOCHANGE;
+ break;
+
+ default:
+ return (set_errno(EINVAL));
+ }
+
+ /*
+ * finally set scheduling policy and parameters
+ */
+ (void) do_priocntlsys(PC_SETPARMS, &procset, &pcparm);
+
+ return (0);
+}
+
+long
+lx_sched_getparam(l_pid_t pid, struct lx_sched_param *param)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ struct lx_sched_param local_param;
+ procset_t procset;
+ pcparms_t pcparm;
+ pcinfo_t pcinfo;
+ tsinfo_t *tsi;
+ int prio, scale;
+ int rv;
+
+ if (pid < 0)
+ return (set_errno(ESRCH));
+
+ if ((rv = sched_setprocset(&procset, pid)))
+ return (rv);
+
+ /*
+ * get the class id
+ */
+ pcparm.pc_cid = PC_CLNULL;
+ (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ /*
+ * get the class info and identify the equivalent linux policy
+ */
+ bzero(&pcinfo, sizeof (pcinfo));
+ pcinfo.pc_cid = pcparm.pc_cid;
+ (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ bzero(&local_param, sizeof (local_param));
+ if (strcmp(pcinfo.pc_clname, "TS") == 0) {
+ /*
+ * I don't know if we need to do this, coz it can't be
+ * changed from zero anyway.....
+ */
+ tsi = (tsinfo_t *)pcinfo.pc_clinfo;
+ prio = ((tsparms_t *)pcparm.pc_clparms)->ts_upri;
+ scale = tsi->ts_maxupri;
+ if (scale == 0)
+ local_param.lx_sched_prio = 0;
+ else
+ local_param.lx_sched_prio = -(prio * 20) / scale;
+ } else if (strcmp(pcinfo.pc_clname, "RT") == 0) {
+ local_param.lx_sched_prio =
+ ((rtparms_t *)pcparm.pc_clparms)->rt_pri;
+ } else {
+ rv = set_errno(EINVAL);
+ }
+
+ if (rv == 0)
+ if (copyout(&local_param, param, sizeof (local_param)))
+ return (set_errno(EFAULT));
+
+ return (rv);
+}
+
+long
+lx_sched_rr_get_interval(l_pid_t pid, struct timespec *ival)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ struct timespec interval;
+ procset_t procset;
+ pcparms_t pcparm;
+ pcinfo_t pcinfo;
+ int rv;
+
+ if (pid < 0)
+ return (set_errno(ESRCH));
+
+ if ((rv = sched_setprocset(&procset, pid)))
+ return (rv);
+
+ /*
+ * get the class id
+ */
+ pcparm.pc_cid = PC_CLNULL;
+ (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ /*
+ * get the class info and identify the equivalent linux policy
+ */
+ setprocset(&procset, POP_AND, P_PID, 0, P_ALL, 0);
+ bzero(&pcinfo, sizeof (pcinfo));
+ (void) strcpy(pcinfo.pc_clname, "RT");
+ (void) do_priocntlsys(PC_GETCID, &procset, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ if (pcparm.pc_cid == pcinfo.pc_cid &&
+ ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs != RT_TQINF) {
+ interval.tv_sec = ((rtparms_t *)pcparm.pc_clparms)->rt_tqsecs;
+ interval.tv_nsec = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs;
+
+ if (copyout(&interval, ival, sizeof (interval)))
+ return (set_errno(EFAULT));
+
+ return (0);
+ }
+
+ return (set_errno(EINVAL));
+}
+
+int
+sched_setprocset(procset_t *procset, l_pid_t pid)
+{
+ id_t lid, rid;
+ idtype_t lidtype, ridtype;
+
+ /*
+ * define the target lwp
+ */
+ if (pid == 0) {
+ ridtype = P_ALL;
+ lidtype = P_PID;
+ rid = 0;
+ lid = P_MYID;
+ } else {
+ if (lx_lpid_to_spair(pid, &pid, &lid) < 0)
+ return (set_errno(ESRCH));
+ if (pid != curproc->p_pid)
+ return (set_errno(ESRCH));
+ rid = 0;
+ ridtype = P_ALL;
+ lidtype = P_LWPID;
+ }
+ setprocset(procset, POP_AND, lidtype, lid, ridtype, rid);
+
+ return (0);
+}
+
+long
+do_priocntlsys(int cmd, procset_t *procset, void *arg)
+{
+ return (priocntl_common(PC_VERSION, procset, cmd, (caddr_t)arg, 0,
+ UIO_SYSSPACE));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_socket.c b/usr/src/uts/common/brand/lx/syscall/lx_socket.c
new file mode 100644
index 0000000000..e8e9714143
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_socket.c
@@ -0,0 +1,3750 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <sys/errno.h>
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/cmn_err.h>
+#include <sys/sockio.h>
+#include <sys/thread.h>
+#include <sys/stropts.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/kmem.h>
+#include <sys/un.h>
+#include <sys/sunddi.h>
+#include <sys/cred.h>
+#include <sys/ucred.h>
+#include <sys/model.h>
+#include <sys/brand.h>
+#include <sys/vmsystm.h>
+#include <sys/limits.h>
+#include <sys/fcntl.h>
+#include <sys/sysmacros.h>
+#include <netpacket/packet.h>
+#include <sockcommon.h>
+#include <socktpi_impl.h>
+#include <netinet/udp.h>
+#include <sys/sdt.h>
+#include <netinet/tcp.h>
+#include <netinet/igmp.h>
+#include <netinet/icmp6.h>
+#include <lx_errno.h>
+
+#include <sys/lx_brand.h>
+#include <sys/lx_socket.h>
+#include <sys/lx_types.h>
+#include <sys/lx_impl.h>
+
+
+typedef struct lx_ucred {
+ pid_t lxu_pid;
+ lx_uid_t lxu_uid;
+ lx_gid_t lxu_gid;
+} lx_ucred_t;
+
+typedef struct lx_socket_aux_data
+{
+ kmutex_t lxsad_lock;
+ enum lxsad_status_t {
+ LXSS_NONE = 0,
+ LXSS_CONNECTING,
+ LXSS_CONNECTED
+ } lxsad_status;
+ boolean_t lxsad_stream_cred;
+} lx_socket_aux_data_t;
+
+static lx_socket_aux_data_t *lx_sad_acquire(vnode_t *);
+
+/* VSD key for lx-specific socket information */
+static uint_t lx_socket_vsd = 0;
+
+/* Convenience enum to enforce translation direction */
+typedef enum lx_xlate_dir {
+ SUNOS_TO_LX,
+ LX_TO_SUNOS
+} lx_xlate_dir_t;
+
+/* enum for getpeername/getsockname handling */
+typedef enum lx_getname_type {
+ LX_GETPEERNAME,
+ LX_GETSOCKNAME
+} lx_getname_type_t;
+
+/*
+ * What follows are a series of tables we use to translate Linux constants
+ * into equivalent Illumos constants and back again. I wish this were
+ * cleaner, more programmatic, and generally nicer. Sadly, life is messy,
+ * and Unix networking even more so.
+ */
+static const int ltos_family[LX_AF_MAX + 1] = {
+ AF_UNSPEC, /* LX_AF_UNSPEC */
+ AF_UNIX, /* LX_AF_UNIX */
+ AF_INET, /* LX_AF_INET */
+ AF_NOTSUPPORTED, /* LX_AF_AX25 */
+ AF_NOTSUPPORTED, /* LX_AF_IPX */
+ AF_NOTSUPPORTED, /* LX_AF_APPLETALK */
+ AF_NOTSUPPORTED, /* LX_AF_NETROM */
+ AF_NOTSUPPORTED, /* LX_AF_BRIDGE */
+ AF_NOTSUPPORTED, /* LX_AF_ATMPVC */
+ AF_NOTSUPPORTED, /* LX_AF_X25 */
+ AF_INET6, /* LX_AF_INET6 */
+ AF_NOTSUPPORTED, /* LX_AF_ROSE */
+ AF_NOTSUPPORTED, /* LX_AF_DECNET */
+ AF_NOTSUPPORTED, /* LX_AF_NETBEUI */
+ AF_NOTSUPPORTED, /* LX_AF_SECURITY */
+ AF_NOTSUPPORTED, /* LX_AF_KEY */
+ AF_LX_NETLINK, /* LX_AF_NETLINK */
+ AF_PACKET, /* LX_AF_PACKET */
+ AF_NOTSUPPORTED, /* LX_AF_ASH */
+ AF_NOTSUPPORTED, /* LX_AF_ECONET */
+ AF_NOTSUPPORTED, /* LX_AF_ATMSVC */
+ AF_NOTSUPPORTED, /* LX_AF_RDS */
+ AF_NOTSUPPORTED, /* LX_AF_SNA */
+ AF_NOTSUPPORTED, /* LX_AF_IRDA */
+ AF_NOTSUPPORTED, /* LX_AF_PPOX */
+ AF_NOTSUPPORTED, /* LX_AF_WANPIPE */
+ AF_NOTSUPPORTED, /* LX_AF_LLC */
+ AF_NOTSUPPORTED, /* EMPTY */
+ AF_NOTSUPPORTED, /* EMPTY */
+ AF_NOTSUPPORTED, /* LX_AF_CAN */
+ AF_NOTSUPPORTED, /* LX_AF_TIPC */
+ AF_NOTSUPPORTED, /* LX_AF_BLUETOOTH */
+ AF_NOTSUPPORTED, /* LX_AF_IUCV */
+ AF_NOTSUPPORTED /* LX_AF_RXRPC */
+ /* LX_AF_ISDN */
+ /* LX_AF_PHONET */
+ /* LX_AF_IEEE802154 */
+ /* LX_AF_CAIF */
+ /* LX_AF_ALG */
+ /* LX_AF_NFC */
+ /* LX_AF_VSOCK */
+};
+
+static const int stol_family[LX_AF_MAX + 1] = {
+ AF_UNSPEC, /* AF_UNSPEC */
+ AF_UNIX, /* AF_UNIX */
+ AF_INET, /* AF_INET */
+ AF_NOTSUPPORTED, /* AF_IMPLINK */
+ AF_NOTSUPPORTED, /* AF_PUP */
+ AF_NOTSUPPORTED, /* AF_CHAOS */
+ AF_NOTSUPPORTED, /* AF_NS */
+ AF_NOTSUPPORTED, /* AF_NBS */
+ AF_NOTSUPPORTED, /* AF_ECMA */
+ AF_NOTSUPPORTED, /* AF_DATAKIT */
+ AF_NOTSUPPORTED, /* AF_CCITT */
+ AF_NOTSUPPORTED, /* AF_SNA */
+ AF_NOTSUPPORTED, /* AF_DECNET */
+ AF_NOTSUPPORTED, /* AF_DLI */
+ AF_NOTSUPPORTED, /* AF_LAT */
+ AF_NOTSUPPORTED, /* AF_HYLINK */
+ AF_NOTSUPPORTED, /* AF_APPLETALK */
+ AF_NOTSUPPORTED, /* AF_NIT */
+ AF_NOTSUPPORTED, /* AF_802 */
+ AF_NOTSUPPORTED, /* AF_OSI */
+ AF_NOTSUPPORTED, /* AF_X25 */
+ AF_NOTSUPPORTED, /* AF_OSINET */
+ AF_NOTSUPPORTED, /* AF_GOSIP */
+ AF_NOTSUPPORTED, /* AF_IPX */
+ AF_NOTSUPPORTED, /* AF_ROUTE */
+ AF_NOTSUPPORTED, /* AF_LINK */
+ LX_AF_INET6, /* AF_INET6 */
+ AF_NOTSUPPORTED, /* AF_KEY */
+ AF_NOTSUPPORTED, /* AF_NCA */
+ AF_NOTSUPPORTED, /* AF_POLICY */
+ AF_NOTSUPPORTED, /* AF_INET_OFFLOAD */
+ AF_NOTSUPPORTED, /* AF_TRILL */
+ LX_AF_PACKET, /* AF_PACKET */
+ LX_AF_NETLINK /* AF_LX_NETLINK */
+};
+
+#define LTOS_FAMILY(d) ((d) <= LX_AF_MAX ? ltos_family[(d)] : AF_INVAL)
+#define STOL_FAMILY(d) ((d) <= LX_AF_MAX ? stol_family[(d)] : AF_INVAL)
+
+
+static const int ltos_socktype[LX_SOCK_PACKET + 1] = {
+ SOCK_NOTSUPPORTED, SOCK_STREAM, SOCK_DGRAM, SOCK_RAW,
+ SOCK_RDM, SOCK_SEQPACKET, SOCK_NOTSUPPORTED, SOCK_NOTSUPPORTED,
+ SOCK_NOTSUPPORTED, SOCK_NOTSUPPORTED, SOCK_NOTSUPPORTED
+};
+
+static const int stol_socktype[SOCK_SEQPACKET + 1] = {
+ SOCK_NOTSUPPORTED, LX_SOCK_DGRAM, LX_SOCK_STREAM, SOCK_NOTSUPPORTED,
+ LX_SOCK_RAW, LX_SOCK_RDM, LX_SOCK_SEQPACKET
+};
+
+#define LTOS_SOCKTYPE(t) \
+ ((t) <= LX_SOCK_PACKET ? ltos_socktype[(t)] : SOCK_INVAL)
+#define STOL_SOCKTYPE(t) \
+ ((t) <= SOCK_SEQPACKET ? ltos_socktype[(t)] : SOCK_INVAL)
+
+
+/*
+ * This string is used to prefix all abstract namespace Unix sockets, ie all
+ * abstract namespace sockets are converted to regular sockets in the /tmp
+ * directory with .ABSK_ prefixed to their names.
+ */
+#define ABST_PRFX "/tmp/.ABSK_"
+#define ABST_PRFX_LEN (sizeof (ABST_PRFX) - 1)
+
+#define DATAFILT "datafilt"
+
+typedef enum {
+ lxa_none,
+ lxa_abstract,
+ lxa_devlog
+} lx_addr_type_t;
+
+static int
+ltos_pkt_proto(int protocol)
+{
+ switch (ntohs(protocol)) {
+ case LX_ETH_P_802_2:
+ return (ETH_P_802_2);
+ case LX_ETH_P_IP:
+ return (ETH_P_IP);
+ case LX_ETH_P_ARP:
+ return (ETH_P_ARP);
+ case LX_ETH_P_IPV6:
+ return (ETH_P_IPV6);
+ case LX_ETH_P_ALL:
+ case LX_ETH_P_802_3:
+ return (ETH_P_ALL);
+ default:
+ return (-1);
+ }
+}
+
+
+typedef struct lx_flag_map {
+ enum {
+ LXFM_MAP,
+ LXFM_IGNORE,
+ LXFM_UNSUP
+ } lxfm_action;
+ int lxfm_sunos_flag;
+ int lxfm_linux_flag;
+ char *lxfm_name;
+} lx_flag_map_t;
+
+static lx_flag_map_t lx_flag_map_tbl[] = {
+ { LXFM_MAP, MSG_OOB, LX_MSG_OOB, NULL },
+ { LXFM_MAP, MSG_PEEK, LX_MSG_PEEK, NULL },
+ { LXFM_MAP, MSG_DONTROUTE, LX_MSG_DONTROUTE, NULL },
+ { LXFM_MAP, MSG_CTRUNC, LX_MSG_CTRUNC, NULL },
+ { LXFM_MAP, MSG_TRUNC, LX_MSG_TRUNC, NULL },
+ { LXFM_MAP, MSG_DONTWAIT, LX_MSG_DONTWAIT, NULL },
+ { LXFM_MAP, MSG_EOR, LX_MSG_EOR, NULL },
+ { LXFM_MAP, MSG_WAITALL, LX_MSG_WAITALL, NULL },
+ /* MSG_CONFIRM is safe to ignore */
+ { LXFM_IGNORE, 0, LX_MSG_CONFIRM, NULL },
+ /*
+ * The NOSIGNAL and CMSG_CLOEXEC flags are handled by the emulation
+ * outside of the flag-conversion routine.
+ */
+ { LXFM_IGNORE, 0, LX_MSG_NOSIGNAL, NULL },
+ { LXFM_IGNORE, 0, LX_MSG_CMSG_CLOEXEC, NULL },
+ { LXFM_UNSUP, LX_MSG_PROXY, 0, "MSG_PROXY" },
+ { LXFM_UNSUP, LX_MSG_FIN, 0, "MSG_FIN" },
+ { LXFM_UNSUP, LX_MSG_SYN, 0, "MSG_SYN" },
+ { LXFM_UNSUP, LX_MSG_RST, 0, "MSG_RST" },
+ { LXFM_UNSUP, LX_MSG_ERRQUEUE, 0, "MSG_ERRQUEUE" },
+ { LXFM_UNSUP, LX_MSG_MORE, 0, "MSG_MORE" },
+ { LXFM_UNSUP, LX_MSG_WAITFORONE, 0, "MSG_WAITFORONE" },
+ { LXFM_UNSUP, LX_MSG_FASTOPEN, 0, "MSG_FASTOPEN" },
+};
+
+#define LX_FLAG_MAP_MAX \
+ (sizeof (lx_flag_map_tbl) / sizeof (lx_flag_map_tbl[0]))
+
+#define LX_UNSUP_BUFSZ 64
+
+static int
+lx_xlate_sock_flags(int inflags, lx_xlate_dir_t dir)
+{
+ int i, outflags = 0;
+ char buf[LX_UNSUP_BUFSZ];
+
+ VERIFY(dir == SUNOS_TO_LX || dir == LX_TO_SUNOS);
+
+ for (i = 0; i < LX_FLAG_MAP_MAX; i++) {
+ lx_flag_map_t *map = &lx_flag_map_tbl[i];
+ int match, out;
+
+ if (dir == SUNOS_TO_LX) {
+ match = inflags & map->lxfm_sunos_flag;
+ out = map->lxfm_linux_flag;
+ } else {
+ match = inflags & map->lxfm_linux_flag;
+ out = map->lxfm_sunos_flag;
+ }
+ switch (map->lxfm_action) {
+ case LXFM_MAP:
+ if (match != 0) {
+ inflags &= ~(match);
+ outflags |= out;
+ }
+ break;
+ case LXFM_IGNORE:
+ if (match != 0) {
+ inflags &= ~(match);
+ }
+ break;
+ case LXFM_UNSUP:
+ if (match != 0) {
+ snprintf(buf, LX_UNSUP_BUFSZ,
+ "unsupported sock flag %s", map->lxfm_name);
+ lx_unsupported(buf);
+ }
+ }
+ }
+ if (inflags != 0) {
+ snprintf(buf, LX_UNSUP_BUFSZ, "unsupported sock flags 0x%08x",
+ inflags);
+ lx_unsupported(buf);
+ }
+
+ return (outflags);
+}
+
+typedef enum lx_sun_type {
+ LX_SUN_NORMAL,
+ LX_SUN_ABSTRACT,
+} lx_sun_type_t;
+
+static void
+ltos_sockaddr_ux(const struct sockaddr *inaddr, const socklen_t inlen,
+ struct sockaddr **outaddr, socklen_t *outlen, lx_sun_type_t *sun_type)
+{
+ struct sockaddr_un buf;
+ /* Calculate size of (sun_family + any padding) in sockaddr */
+ int sizediff = (sizeof (buf) - sizeof (buf.sun_path));
+ int len = inlen - sizediff;
+
+ VERIFY(len > 0);
+ VERIFY(len <= sizeof (buf.sun_path));
+ bzero(&buf, sizeof (buf));
+
+ if (inaddr->sa_data[0] == '\0') {
+ /*
+ * Linux supports abstract Unix sockets, which are simply
+ * sockets that do not exist on the file system. These sockets
+ * are denoted by beginning the path with a NULL character. To
+ * support these, we strip out the leading NULL character and
+ * change the path to point to a real place in /tmp directory,
+ * by prepending ABST_PRFX and replacing all illegal characters
+ * with * '_'.
+ *
+ * Since these sockets are supposed to exist outside the
+ * filesystem, they must be cleaned up after use. This removal
+ * is performed during bind().
+ */
+ int idx, odx;
+
+ /* Add our abstract prefix */
+ (void) strcpy(buf.sun_path, ABST_PRFX);
+ for (idx = 1, odx = ABST_PRFX_LEN;
+ idx < len && odx < sizeof (buf.sun_path);
+ idx++, odx++) {
+ char c = inaddr->sa_data[idx];
+ if (c == '\0' || c == '/') {
+ buf.sun_path[odx] = '_';
+ } else {
+ buf.sun_path[odx] = c;
+ }
+ }
+
+ /*
+ * Since abstract socket addresses might not be NUL terminated,
+ * we must explicitly NUL terminate the translated path.
+ * Care is taken not to overflow the buffer.
+ */
+ if (odx == sizeof (buf.sun_path)) {
+ buf.sun_path[odx - 1] = '\0';
+ } else {
+ buf.sun_path[odx] = '\0';
+ }
+
+ if (sun_type != NULL) {
+ *sun_type = LX_SUN_ABSTRACT;
+ }
+ } else {
+ /* Copy the address directly, minding termination */
+ (void) strncpy(buf.sun_path, inaddr->sa_data, len);
+ len = strnlen(buf.sun_path, len);
+ if (len == sizeof (buf.sun_path)) {
+ buf.sun_path[len - 1] = '\0';
+ } else {
+ VERIFY(len < sizeof (buf.sun_path));
+ buf.sun_path[len] = '\0';
+ }
+
+ if (sun_type != NULL) {
+ *sun_type = LX_SUN_NORMAL;
+ }
+ }
+ buf.sun_family = AF_UNIX;
+ *outlen = strlen(buf.sun_path) + 1 + sizediff;
+ VERIFY(*outlen <= sizeof (struct sockaddr_un));
+
+ *outaddr = kmem_alloc(*outlen, KM_SLEEP);
+ bcopy(&buf, *outaddr, *outlen);
+}
+
+/*
+ * Copy in a Linux-native socket address from userspace and convert it into
+ * illumos format. When successful, it will allocate an appropriately sized
+ * struct to be freed by the caller.
+ */
+static long
+ltos_sockaddr_copyin(const struct sockaddr *inaddr, const socklen_t inlen,
+ struct sockaddr **outaddr, socklen_t *outlen, lx_sun_type_t *sun_type)
+{
+ sa_family_t family;
+ struct sockaddr *laddr;
+ struct sockaddr_ll *sal;
+ int proto, error = 0;
+
+ VERIFY(inaddr != NULL);
+
+ if (inlen < sizeof (sa_family_t) ||
+ inlen > sizeof (struct sockaddr_storage)) {
+ return (EINVAL);
+ }
+ laddr = kmem_alloc(inlen, KM_SLEEP);
+ if (copyin(inaddr, laddr, inlen) != 0) {
+ kmem_free(laddr, inlen);
+ return (EFAULT);
+ }
+
+ family = LTOS_FAMILY(laddr->sa_family);
+ switch (family) {
+ case (sa_family_t)AF_NOTSUPPORTED:
+ error = EPROTONOSUPPORT;
+ break;
+
+ case (sa_family_t)AF_INVAL:
+ error = EAFNOSUPPORT;
+ break;
+
+ case AF_UNIX:
+ if (inlen < sizeof (sa_family_t) + 2 ||
+ inlen > sizeof (struct sockaddr_un)) {
+ error = EINVAL;
+ break;
+ }
+ ltos_sockaddr_ux(laddr, inlen, outaddr, outlen,
+ sun_type);
+
+ /* AF_UNIX bypasses the standard copy logic */
+ kmem_free(laddr, inlen);
+ return (0);
+
+ case AF_PACKET:
+ if (inlen < sizeof (struct sockaddr_ll)) {
+ error = EINVAL;
+ break;
+ }
+ *outlen = sizeof (struct sockaddr_ll);
+
+ /* sll_protocol must be translated */
+ sal = (struct sockaddr_ll *)laddr;
+ proto = ltos_pkt_proto(sal->sll_protocol);
+ if (proto < 0) {
+ error = EINVAL;
+ }
+ sal->sll_protocol = proto;
+ break;
+
+ case AF_INET:
+ if (inlen < sizeof (struct sockaddr)) {
+ error = EINVAL;
+ break;
+ }
+ *outlen = sizeof (struct sockaddr);
+ break;
+
+ case AF_INET6:
+ /*
+ * The illumos sockaddr_in6 has one more 32-bit field
+ * than the Linux version. We simply zero that field
+ * via kmem_zalloc.
+ */
+ if (inlen < sizeof (lx_sockaddr_in6_t)) {
+ error = EINVAL;
+ break;
+ }
+ *outlen = sizeof (struct sockaddr_in6);
+ *outaddr = (struct sockaddr *)kmem_zalloc(*outlen,
+ KM_SLEEP);
+ bcopy(laddr, *outaddr, sizeof (lx_sockaddr_in6_t));
+ (*outaddr)->sa_family = AF_INET6;
+ /* AF_INET6 bypasses the standard copy logic */
+ kmem_free(laddr, inlen);
+ return (0);
+
+ default:
+ *outlen = inlen;
+ }
+
+ if (error == 0) {
+ /*
+ * For most address families, just copying into a sockaddr of
+ * the correct size and updating sa_family is adequate.
+ */
+ VERIFY(inlen >= *outlen);
+
+ *outaddr = (struct sockaddr *)kmem_zalloc(*outlen, KM_SLEEP);
+ bcopy(laddr, *outaddr, *outlen);
+ (*outaddr)->sa_family = family;
+ }
+ kmem_free(laddr, inlen);
+ return (error);
+}
+
+/*
+ * Convert an illumos-native socket address into Linux format and copy it out
+ * to userspace.
+ */
+static long
+stol_sockaddr_copyout(struct sockaddr *inaddr, socklen_t inlen,
+ struct sockaddr *outaddr, void *outlenp, socklen_t orig)
+{
+ socklen_t size = inlen;
+ struct sockaddr_storage buf;
+ struct sockaddr *bufaddr;
+
+ /*
+ * Either we were passed a valid sockaddr (with length) or the length
+ * is set to 0.
+ */
+ VERIFY(inaddr != NULL || inlen == 0);
+
+ if (inlen == 0) {
+ goto finish;
+ }
+
+
+ switch (inaddr->sa_family) {
+ case AF_INET:
+ if (inlen != sizeof (struct sockaddr)) {
+ return (EINVAL);
+ }
+ break;
+
+ case AF_INET6:
+ if (inlen != sizeof (struct sockaddr_in6)) {
+ return (EINVAL);
+ }
+ /*
+ * The linux sockaddr_in6 is shorter than illumos.
+ * Truncate the extra field on the way out.
+ */
+ size = (sizeof (lx_sockaddr_in6_t));
+ inlen = (sizeof (lx_sockaddr_in6_t));
+ break;
+
+ case AF_UNIX:
+ if (inlen > sizeof (struct sockaddr_un)) {
+ return (EINVAL);
+ }
+
+ /*
+ * On Linux an empty AF_UNIX address is returned as NULL, which
+ * means setting the returned length to only encompass the
+ * address family part of the buffer. However, some code also
+ * references the address portion of the buffer and uses it,
+ * even though the returned length has been shortened. Thus, we
+ * clear the buffer to ensure that the address portion is NULL.
+ */
+ if (inaddr->sa_data[0] == '\0') {
+ bzero(&buf, sizeof (buf));
+ inlen = sizeof (inaddr->sa_family);
+ }
+ break;
+
+ case (sa_family_t)AF_NOTSUPPORTED:
+ return (EPROTONOSUPPORT);
+
+ case (sa_family_t)AF_INVAL:
+ return (EAFNOSUPPORT);
+
+ default:
+ break;
+ }
+
+ /*
+ * The input should be smaller than sockaddr_storage, the largest
+ * sockaddr we support.
+ */
+ VERIFY(inlen <= sizeof (buf));
+
+ bufaddr = (struct sockaddr *)&buf;
+ bcopy(inaddr, bufaddr, inlen);
+ bufaddr->sa_family = STOL_FAMILY(bufaddr->sa_family);
+
+ /*
+ * It is possible that userspace passed us a smaller buffer than we
+ * hope to output. When this is the case, we will truncate our output
+ * to the max size of their buffer but report the true size of the
+ * sockaddr when outputting the outlen value.
+ */
+ size = (orig < size) ? orig : size;
+
+ if (copyout(bufaddr, outaddr, size) != 0) {
+ return (EFAULT);
+ }
+
+finish:
+#if defined(_LP64)
+ if (get_udatamodel() != DATAMODEL_NATIVE) {
+ int32_t len32 = (int32_t)inlen;
+ if (copyout(&len32, outlenp, sizeof (len32)) != 0) {
+ return (EFAULT);
+ }
+ } else
+#endif /* defined(_LP64) */
+ {
+ if (copyout(&inlen, outlenp, sizeof (inlen)) != 0) {
+ return (EFAULT);
+ }
+ }
+
+ return (0);
+}
+
+typedef struct lx_cmsg_xlate {
+ int lcx_sunos_level;
+ int lcx_sunos_type;
+ int (*lcx_stol_conv)(struct cmsghdr *, struct cmsghdr *);
+ int lcx_linux_level;
+ int lcx_linux_type;
+ int (*lcx_ltos_conv)(struct cmsghdr *, struct cmsghdr *);
+} lx_cmsg_xlate_t;
+
+static int cmsg_conv_generic(struct cmsghdr *, struct cmsghdr *);
+static int stol_conv_ucred(struct cmsghdr *, struct cmsghdr *);
+static int ltos_conv_ucred(struct cmsghdr *, struct cmsghdr *);
+static int stol_conv_recvttl(struct cmsghdr *, struct cmsghdr *);
+
+/*
+ * Table describing SunOS <-> Linux cmsg translation mappings.
+ * Certain types (IP_RECVTTL) are only converted in one direction and are
+ * indicated by one of the translation functions being set to NULL.
+ */
+static lx_cmsg_xlate_t lx_cmsg_xlate_tbl[] = {
+ { SOL_SOCKET, SCM_RIGHTS, cmsg_conv_generic,
+ LX_SOL_SOCKET, LX_SCM_RIGHTS, cmsg_conv_generic },
+ { SOL_SOCKET, SCM_UCRED, stol_conv_ucred,
+ LX_SOL_SOCKET, LX_SCM_CRED, ltos_conv_ucred },
+ { SOL_SOCKET, SCM_TIMESTAMP, cmsg_conv_generic,
+ LX_SOL_SOCKET, LX_SCM_TIMESTAMP, cmsg_conv_generic },
+ { IPPROTO_IP, IP_PKTINFO, cmsg_conv_generic,
+ LX_IPPROTO_IP, LX_IP_PKTINFO, cmsg_conv_generic },
+ { IPPROTO_IP, IP_RECVTTL, stol_conv_recvttl,
+ LX_IPPROTO_IP, LX_IP_TTL, NULL },
+ { IPPROTO_IP, IP_TTL, cmsg_conv_generic,
+ LX_IPPROTO_IP, LX_IP_TTL, cmsg_conv_generic },
+ { IPPROTO_IPV6, IPV6_HOPLIMIT, cmsg_conv_generic,
+ LX_IPPROTO_IPV6, LX_IPV6_HOPLIMIT, cmsg_conv_generic },
+ { IPPROTO_IPV6, IPV6_PKTINFO, cmsg_conv_generic,
+ LX_IPPROTO_IPV6, LX_IPV6_PKTINFO, cmsg_conv_generic }
+};
+
+#define LX_MAX_CMSG_XLATE \
+ (sizeof (lx_cmsg_xlate_tbl) / sizeof (lx_cmsg_xlate_tbl[0]))
+
+#if defined(_LP64)
+
+typedef struct {
+ int64_t cmsg_len;
+ int32_t cmsg_level;
+ int32_t cmsg_type;
+} lx_cmsghdr64_t;
+
+/* The alignment/padding for 64bit Linux cmsghdr is not the same. */
+#define LX_CMSG64_ALIGNMENT 8
+#define ISALIGNED_LX_CMSG64(addr) \
+ (((uintptr_t)(addr) & (LX_CMSG64_ALIGNMENT - 1)) == 0)
+#define ROUNDUP_LX_CMSG64_LEN(len) \
+ (((len) + LX_CMSG64_ALIGNMENT - 1) & ~(LX_CMSG64_ALIGNMENT - 1))
+
+#define LX_CMSG64_IS_ALIGNED(m) \
+ (((uintptr_t)(m) & (_CMSG_DATA_ALIGNMENT - 1)) == 0)
+#define LX_CMSG64_DATA(c) ((unsigned char *)(((lx_cmsghdr64_t *)(c)) + 1))
+/*
+ * LX_CMSG64_VALID is closely derived from CMSG_VALID with one particularly
+ * important addition. Since cmsg_len is 64bit, (cmsg + cmsg_len) is checked
+ * against the start address as well. This prevents bogus inputs from wrapping
+ * around the address space.
+ */
+#define LX_CMSG64_VALID(cmsg, start, end) \
+ (ISALIGNED_LX_CMSG64(cmsg) && \
+ ((uintptr_t)(cmsg) >= (uintptr_t)(start)) && \
+ ((uintptr_t)(cmsg) < (uintptr_t)(end)) && \
+ ((cmsg)->cmsg_len >= sizeof (lx_cmsghdr64_t)) && \
+ ((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end)) && \
+ ((uintptr_t)(cmsg) + (cmsg)->cmsg_len >= (uintptr_t)(start)))
+#define LX_CMSG64_NEXT(cmsg) \
+ (lx_cmsghdr64_t *)((uintptr_t)(cmsg) + \
+ ROUNDUP_LX_CMSG64_LEN((cmsg)->cmsg_len))
+#define LX_CMSG64_DIFF sizeof (uint32_t)
+
+#endif /* defined(_LP64) */
+
+/*
+ * convert ucred_s to lx_ucred.
+ */
+static int
+stol_conv_ucred(struct cmsghdr *inmsg, struct cmsghdr *omsg)
+{
+ /*
+ * Format the data correctly in the omsg buffer.
+ */
+ if (omsg != NULL) {
+ struct ucred_s *scred = (struct ucred_s *)CMSG_CONTENT(inmsg);
+ prcred_t *cr;
+ lx_ucred_t lcred;
+
+ lcred.lxu_pid = scred->uc_pid;
+ cr = UCCRED(scred);
+ if (cr != NULL) {
+ lcred.lxu_uid = cr->pr_euid;
+ lcred.lxu_gid = cr->pr_egid;
+ } else {
+ lcred.lxu_uid = lcred.lxu_gid = 0;
+ }
+
+ bcopy(&lcred, CMSG_CONTENT(omsg), sizeof (lx_ucred_t));
+ }
+
+ return (sizeof (struct cmsghdr) + sizeof (lx_ucred_t));
+}
+
+static int
+ltos_conv_ucred(struct cmsghdr *inmsg, struct cmsghdr *omsg)
+{
+ if (omsg != NULL) {
+ struct ucred_s *uc;
+ prcred_t *pc;
+ lx_ucred_t *lcred;
+
+ uc = (struct ucred_s *)CMSG_CONTENT(omsg);
+ pc = (prcred_t *)((char *)uc + sizeof (struct ucred_s));
+
+ uc->uc_credoff = sizeof (struct ucred_s);
+
+ lcred = (lx_ucred_t *)CMSG_CONTENT(inmsg);
+
+ uc->uc_pid = lcred->lxu_pid;
+ pc->pr_euid = lcred->lxu_uid;
+ pc->pr_egid = lcred->lxu_gid;
+ }
+
+ return (sizeof (struct cmsghdr) + sizeof (struct ucred_s) +
+ sizeof (prcred_t));
+
+}
+
+static int
+stol_conv_recvttl(struct cmsghdr *inmsg, struct cmsghdr *omsg)
+{
+ /*
+ * SunOS communicates the TTL of incoming packets via IP_RECVTTL using
+ * a uint8_t value instead of IP_TTL using an int. This conversion is
+ * only needed in the one direction since Linux does not handle
+ * IP_RECVTTL in the sendmsg path.
+ */
+ if (omsg != NULL) {
+ uint8_t *inttl = (uint8_t *)CMSG_CONTENT(inmsg);
+ int *ottl = (int *)CMSG_CONTENT(omsg);
+
+ *ottl = (int)*inttl;
+ }
+
+ return (sizeof (struct cmsghdr) + sizeof (int));
+}
+
+static int
+cmsg_conv_generic(struct cmsghdr *inmsg, struct cmsghdr *omsg)
+{
+ if (omsg != NULL) {
+ size_t data_len;
+
+ data_len = inmsg->cmsg_len - sizeof (struct cmsghdr);
+ bcopy(CMSG_CONTENT(inmsg), CMSG_CONTENT(omsg), data_len);
+ }
+
+ return (inmsg->cmsg_len);
+}
+
+static int
+lx_xlate_cmsg(struct cmsghdr *inmsg, struct cmsghdr *omsg, lx_xlate_dir_t dir)
+{
+ int i;
+ int len;
+
+ VERIFY(dir == SUNOS_TO_LX || dir == LX_TO_SUNOS);
+
+ for (i = 0; i < LX_MAX_CMSG_XLATE; i++) {
+ lx_cmsg_xlate_t *xlate = &lx_cmsg_xlate_tbl[i];
+ if (dir == LX_TO_SUNOS &&
+ inmsg->cmsg_level == xlate->lcx_linux_level &&
+ inmsg->cmsg_type == xlate->lcx_linux_type &&
+ xlate->lcx_ltos_conv != NULL) {
+ len = xlate->lcx_ltos_conv(inmsg, omsg);
+ if (omsg != NULL) {
+ omsg->cmsg_len = len;
+ omsg->cmsg_level = xlate->lcx_sunos_level;
+ omsg->cmsg_type = xlate->lcx_sunos_type;
+ }
+ return (len);
+ } else if (dir == SUNOS_TO_LX &&
+ inmsg->cmsg_level == xlate->lcx_sunos_level &&
+ inmsg->cmsg_type == xlate->lcx_sunos_type &&
+ xlate->lcx_stol_conv != NULL) {
+ len = xlate->lcx_stol_conv(inmsg, omsg);
+ if (omsg != NULL) {
+ omsg->cmsg_len = len;
+ omsg->cmsg_level = xlate->lcx_linux_level;
+ omsg->cmsg_type = xlate->lcx_linux_type;
+ }
+ return (len);
+ }
+ }
+ /*
+ * The Linux man page for sendmsg does not define a specific error for
+ * unsupported cmsgs. While it is meant to indicated bad values for
+ * passed flags, EOPNOTSUPP appears to be the next closest choice.
+ */
+ return (-EOPNOTSUPP);
+}
+
+static long
+ltos_cmsgs_copyin(void *addr, socklen_t inlen, void **outmsg,
+ socklen_t *outlenp)
+{
+ void *inbuf, *obuf;
+ struct cmsghdr *inmsg, *omsg;
+ int slen = 0;
+
+ if (inlen < sizeof (struct cmsghdr) || inlen > SO_MAXARGSIZE) {
+ return (EINVAL);
+ }
+
+#if defined(_LP64)
+ if (get_udatamodel() == DATAMODEL_NATIVE &&
+ inlen < sizeof (lx_cmsghdr64_t)) {
+ /* The size requirements are more strict for 64bit. */
+ return (EINVAL);
+ }
+#endif /* defined(_LP64) */
+
+ inbuf = kmem_alloc(inlen, KM_SLEEP);
+ if (copyin(addr, inbuf, inlen) != 0) {
+ kmem_free(inbuf, inlen);
+ return (EFAULT);
+ }
+
+#if defined(_LP64)
+ if (get_udatamodel() == DATAMODEL_NATIVE) {
+ /*
+ * Linux cmsg headers are longer than illumos under x86_64.
+ * Convert to regular cmsgs first.
+ */
+ lx_cmsghdr64_t *lmsg;
+ struct cmsghdr *smsg;
+ void *newbuf;
+ int len = 0;
+
+ /* Inventory the new cmsg size */
+ for (lmsg = (lx_cmsghdr64_t *)inbuf;
+ LX_CMSG64_VALID(lmsg, inbuf, (uintptr_t)inbuf + inlen) != 0;
+ lmsg = LX_CMSG64_NEXT(lmsg)) {
+ len += ROUNDUP_cmsglen(lmsg->cmsg_len - LX_CMSG64_DIFF);
+ }
+
+ VERIFY(len < inlen);
+ if (len == 0) {
+ /* Input was bogus, so we can give up early. */
+ kmem_free(inbuf, inlen);
+ *outmsg = NULL;
+ *outlenp = 0;
+ return (EINVAL);
+ }
+
+ newbuf = kmem_alloc(len, KM_SLEEP);
+
+ for (lmsg = (lx_cmsghdr64_t *)inbuf,
+ smsg = (struct cmsghdr *)newbuf;
+ LX_CMSG64_VALID(lmsg, inbuf, (uintptr_t)inbuf + inlen) != 0;
+ lmsg = LX_CMSG64_NEXT(lmsg), smsg = CMSG_NEXT(smsg)) {
+ smsg->cmsg_level = lmsg->cmsg_level;
+ smsg->cmsg_type = lmsg->cmsg_type;
+ smsg->cmsg_len = lmsg->cmsg_len - LX_CMSG64_DIFF;
+
+ /* The above length measurement should ensure this */
+ ASSERT(CMSG_VALID(smsg, newbuf,
+ (uintptr_t)newbuf + len));
+
+ bcopy(LX_CMSG64_DATA(lmsg), CMSG_CONTENT(smsg),
+ smsg->cmsg_len - sizeof (*smsg));
+ }
+
+ kmem_free(inbuf, inlen);
+ inbuf = newbuf;
+ inlen = len;
+ }
+#endif /* defined(_LP64) */
+
+ /*
+ * Now determine how much space we need for the conversion.
+ */
+ for (inmsg = (struct cmsghdr *)inbuf;
+ CMSG_VALID(inmsg, inbuf, (uintptr_t)inbuf + inlen) != 0;
+ inmsg = CMSG_NEXT(inmsg)) {
+ int sz;
+
+ if ((sz = lx_xlate_cmsg(inmsg, NULL, LX_TO_SUNOS)) < 0) {
+ /* unsupported msg */
+ kmem_free(inbuf, inlen);
+ return (-sz);
+ }
+
+ slen += ROUNDUP_cmsglen(sz);
+ }
+
+ obuf = kmem_zalloc(slen, KM_SLEEP);
+
+ /*
+ * Now do the conversion.
+ */
+ for (inmsg = (struct cmsghdr *)inbuf, omsg = (struct cmsghdr *)obuf;
+ CMSG_VALID(inmsg, inbuf, (uintptr_t)inbuf + inlen) != 0;
+ inmsg = CMSG_NEXT(inmsg), omsg = CMSG_NEXT(omsg)) {
+ VERIFY(lx_xlate_cmsg(inmsg, omsg, LX_TO_SUNOS) >= 0);
+ }
+
+ kmem_free(inbuf, inlen);
+ *outmsg = obuf;
+ *outlenp = slen;
+ return (0);
+}
+
+static long
+stol_cmsgs_copyout(void *input, socklen_t inlen, void *addr,
+ void *outlenp, socklen_t orig_outlen)
+{
+ void *obuf;
+ struct cmsghdr *inmsg, *omsg;
+ int error = 0;
+ socklen_t lx_len = 0;
+#if defined(_LP64)
+ model_t model = get_udatamodel();
+#endif
+
+ if (inlen == 0) {
+ /* Simply output the zero controllen */
+ goto finish;
+ }
+
+ VERIFY(inlen > sizeof (struct cmsghdr));
+
+ /*
+ * First determine how much space we need for the conversion and
+ * make sure the caller has provided at least that much space to return
+ * results.
+ */
+ for (inmsg = (struct cmsghdr *)input;
+ CMSG_VALID(inmsg, input, (uintptr_t)input + inlen) != 0;
+ inmsg = CMSG_NEXT(inmsg)) {
+ int sz;
+
+ if ((sz = lx_xlate_cmsg(inmsg, NULL, SUNOS_TO_LX)) < 0) {
+ /* unsupported msg */
+ return (-sz);
+ }
+
+#if defined(_LP64)
+ if (model == DATAMODEL_NATIVE) {
+ /*
+ * The converted 64-bit cmsgs require an additional 4
+ * bytes of header space and must be aligned to 8 bytes
+ * (instead of the typical 4 for x86)
+ */
+ sz = ROUNDUP_LX_CMSG64_LEN(sz + LX_CMSG64_DIFF);
+ } else
+#endif /* defined(_LP64) */
+ {
+ /*
+ * The converted 32-bit cmsgs do not require additional
+ * header space or padding for Linux conversion.
+ */
+ sz = ROUNDUP_cmsglen(sz);
+ }
+
+ /*
+ * Unlike SunOS, Linux requires that the last cmsg be
+ * adequately padded for alignment.
+ */
+ lx_len += sz;
+ }
+
+ if (lx_len > orig_outlen || addr == NULL) {
+ /* This will be interpreted by the caller */
+ error = EMSGSIZE;
+ lx_len = 0;
+ goto finish;
+ }
+
+ /*
+ * Since cmsgs are often padded to an aligned size, kmem_zalloc is
+ * necessary to prevent leaking the contents of uninitialized memory.
+ */
+ obuf = kmem_zalloc(lx_len, KM_SLEEP);
+
+ /*
+ * Convert the msgs.
+ */
+ for (inmsg = (struct cmsghdr *)input, omsg = (struct cmsghdr *)obuf;
+ CMSG_VALID(inmsg, input, (uintptr_t)input + inlen) != 0;
+ inmsg = CMSG_NEXT(inmsg), omsg = CMSG_NEXT(omsg)) {
+ VERIFY(lx_xlate_cmsg(inmsg, omsg, SUNOS_TO_LX) >= 0);
+ }
+
+#if defined(_LP64)
+ if (model == DATAMODEL_NATIVE) {
+ /* Linux cmsg headers are longer than illumos under x86_64. */
+ struct cmsghdr *smsg;
+ lx_cmsghdr64_t *lmsg;
+ void *newbuf;
+
+ /*
+ * Once again, kmem_zalloc is needed to avoid leaking the
+ * contents of uninialized memory
+ */
+ newbuf = kmem_zalloc(lx_len, KM_SLEEP);
+ for (smsg = (struct cmsghdr *)obuf,
+ lmsg = (lx_cmsghdr64_t *)newbuf;
+ CMSG_VALID(smsg, obuf, (uintptr_t)obuf + inlen) != 0;
+ smsg = CMSG_NEXT(smsg), lmsg = LX_CMSG64_NEXT(lmsg)) {
+ lmsg->cmsg_level = smsg->cmsg_level;
+ lmsg->cmsg_type = smsg->cmsg_type;
+ lmsg->cmsg_len = smsg->cmsg_len + LX_CMSG64_DIFF;
+
+ ASSERT(LX_CMSG64_VALID(lmsg, newbuf,
+ (uintptr_t)newbuf + lx_len) != 0);
+
+ bcopy(CMSG_CONTENT(smsg), LX_CMSG64_DATA(lmsg),
+ smsg->cmsg_len - sizeof (*smsg));
+ }
+
+ kmem_free(obuf, lx_len);
+ obuf = newbuf;
+ }
+#endif /* defined(_LP64) */
+
+ if (copyout(obuf, addr, lx_len) != 0) {
+ kmem_free(obuf, lx_len);
+ return (EFAULT);
+ }
+ kmem_free(obuf, lx_len);
+
+finish:
+ if (outlenp != NULL) {
+#if defined(_LP64)
+ if (model != DATAMODEL_NATIVE) {
+ int32_t len32 = (int32_t)lx_len;
+ if (copyout(&len32, outlenp, sizeof (len32)) != 0) {
+ return (EFAULT);
+ }
+ } else
+#endif /* defined(_LP64) */
+ {
+ if (copyout(&lx_len, outlenp, sizeof (lx_len)) != 0) {
+ return (EFAULT);
+ }
+ }
+ }
+ return (error);
+}
+
+static void
+lx_cmsg_set_cloexec(void *input, socklen_t inlen)
+{
+ struct cmsghdr *inmsg;
+
+ if (inlen == 0) {
+ return;
+ }
+
+ for (inmsg = (struct cmsghdr *)input;
+ CMSG_VALID(inmsg, input, (uintptr_t)input + inlen) != 0;
+ inmsg = CMSG_NEXT(inmsg)) {
+ if (inmsg->cmsg_level == SOL_SOCKET &&
+ inmsg->cmsg_type == SCM_RIGHTS) {
+ int *fds = (int *)CMSG_CONTENT(inmsg);
+ int i, num = (int)CMSG_CONTENTLEN(inmsg) / sizeof (int);
+ for (i = 0; i < num; i++) {
+ char flags;
+ file_t *fp;
+ /* set CLOEXEC on the fd */
+ fp = getf(fds[i]);
+ VERIFY(fp != NULL);
+ flags = f_getfd(fds[i]);
+ flags |= FD_CLOEXEC;
+ f_setfd(fds[i], flags);
+ releasef(fds[i]);
+ }
+ }
+ }
+}
+
+static int
+lx_cmsg_try_ucred(sonode_t *so, struct nmsghdr *msg, socklen_t origlen)
+{
+ lx_socket_aux_data_t *sad;
+ struct cmsghdr *cmsg = NULL;
+ int msgsize;
+ cred_t *cred;
+
+ if (origlen == 0) {
+ return (0);
+ }
+ sad = lx_sad_acquire(SOTOV(so));
+ if (!sad->lxsad_stream_cred) {
+ mutex_exit(&sad->lxsad_lock);
+ return (0);
+ }
+ mutex_exit(&sad->lxsad_lock);
+
+ mutex_enter(&so->so_lock);
+ if (so->so_peercred == NULL) {
+ mutex_exit(&so->so_lock);
+ return (0);
+ }
+ crhold(cred = so->so_peercred);
+ mutex_exit(&so->so_lock);
+
+ msgsize = ucredminsize(cred) + sizeof (struct cmsghdr);
+ if (msg->msg_control == NULL) {
+ msg->msg_controllen = msgsize;
+ msg->msg_control = cmsg = kmem_zalloc(msgsize, KM_SLEEP);
+ } else {
+ /*
+ * The so_recvmsg operation may have allocated a msg_control
+ * buffer which precisely fits all returned cmsgs. We must
+ * manually verify the length of that cmsg data and reallocate
+ * the buffer if it lacks the necessary space.
+ */
+ uintptr_t start = (uintptr_t)msg->msg_control;
+ uintptr_t end = start + msg->msg_controllen;
+
+ ASSERT(msg->msg_controllen > 0);
+ cmsg = (struct cmsghdr *)msg->msg_control;
+ while (CMSG_VALID(cmsg, start, end) != 0) {
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_UCRED) {
+ /*
+ * If some later code change results in a ucred
+ * being attached anyways, there is no need for
+ * us to do it manually
+ */
+ crfree(cred);
+ return (0);
+ }
+ cmsg = CMSG_NEXT(cmsg);
+ }
+ if (((uintptr_t)cmsg + msgsize) > end) {
+ socklen_t offset = (uintptr_t)cmsg - start;
+ socklen_t newsize = offset + msgsize;
+ void *newbuf;
+
+ if (newsize < msg->msg_controllen) {
+ /* size overflow, bail */
+ crfree(cred);
+ return (-1);
+ }
+ newbuf = kmem_alloc(newsize, KM_SLEEP);
+ bcopy(msg->msg_control, newbuf, msg->msg_controllen);
+ kmem_free(msg->msg_control, msg->msg_controllen);
+
+ msg->msg_control = newbuf;
+ msg->msg_controllen = newsize;
+ cmsg = (struct cmsghdr *)((uintptr_t)newbuf + offset);
+ }
+ }
+
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_UCRED;
+ cmsg->cmsg_len = msgsize;
+ (void) cred2ucred(cred, so->so_cpid, CMSG_CONTENT(cmsg), CRED());
+ crfree(cred);
+ return (0);
+}
+
+static lx_socket_aux_data_t *
+lx_sad_acquire(vnode_t *vp)
+{
+ lx_socket_aux_data_t *cur, *created;
+
+ mutex_enter(&vp->v_vsd_lock);
+ cur = (lx_socket_aux_data_t *)vsd_get(vp, lx_socket_vsd);
+ if (cur == NULL) {
+ /* perform our allocation carefully */
+ mutex_exit(&vp->v_vsd_lock);
+
+ created = (lx_socket_aux_data_t *)kmem_zalloc(
+ sizeof (*created), KM_SLEEP);
+
+ mutex_enter(&vp->v_vsd_lock);
+ cur = (lx_socket_aux_data_t *)vsd_get(vp, lx_socket_vsd);
+ if (cur == NULL) {
+ mutex_init(&created->lxsad_lock, NULL, MUTEX_DEFAULT,
+ NULL);
+ (void) vsd_set(vp, lx_socket_vsd, created);
+ cur = created;
+ } else {
+ kmem_free(created, sizeof (*created));
+ }
+ }
+ mutex_exit(&vp->v_vsd_lock);
+ mutex_enter(&cur->lxsad_lock);
+ return (cur);
+}
+
+static int
+lx_convert_pkt_proto(int protocol)
+{
+ switch (ntohs(protocol)) {
+ case LX_ETH_P_802_2:
+ return (ETH_P_802_2);
+ case LX_ETH_P_IP:
+ return (ETH_P_IP);
+ case LX_ETH_P_ARP:
+ return (ETH_P_ARP);
+ case LX_ETH_P_IPV6:
+ return (ETH_P_IPV6);
+ case LX_ETH_P_ALL:
+ case LX_ETH_P_802_3:
+ return (ETH_P_ALL);
+ default:
+ return (-1);
+ }
+}
+
+static int
+lx_convert_sock_args(int in_dom, int in_type, int in_proto, int *out_dom,
+ int *out_type, int *out_options, int *out_proto)
+{
+ int domain, type, options;
+
+ if (in_dom < 0 || in_type < 0 || in_proto < 0)
+ return (EINVAL);
+
+ domain = LTOS_FAMILY(in_dom);
+ if (domain == AF_NOTSUPPORTED || domain == AF_UNSPEC)
+ return (EAFNOSUPPORT);
+ if (domain == AF_INVAL)
+ return (EINVAL);
+
+ type = LTOS_SOCKTYPE(in_type & LX_SOCK_TYPE_MASK);
+ if (type == SOCK_NOTSUPPORTED)
+ return (ESOCKTNOSUPPORT);
+ if (type == SOCK_INVAL)
+ return (EINVAL);
+
+ /*
+ * Linux does not allow the app to specify IP Protocol for raw sockets.
+ * SunOS does, so bail out here.
+ */
+ if (domain == AF_INET && type == SOCK_RAW && in_proto == IPPROTO_IP)
+ return (ESOCKTNOSUPPORT);
+
+ options = 0;
+ in_type &= ~(LX_SOCK_TYPE_MASK);
+ if (in_type & LX_SOCK_NONBLOCK) {
+ in_type ^= LX_SOCK_NONBLOCK;
+ options |= SOCK_NONBLOCK;
+ }
+ if (in_type & LX_SOCK_CLOEXEC) {
+ in_type ^= LX_SOCK_CLOEXEC;
+ options |= SOCK_CLOEXEC;
+ }
+ if (in_type != 0) {
+ return (EINVAL);
+ }
+
+ /* Protocol definitions for PF_PACKET differ between Linux and SunOS */
+ if (domain == PF_PACKET &&
+ (in_proto = lx_convert_pkt_proto(in_proto)) < 0)
+ return (EINVAL);
+
+ *out_dom = domain;
+ *out_type = type;
+ *out_options = options;
+ *out_proto = in_proto;
+ return (0);
+}
+
+long
+lx_socket(int domain, int type, int protocol)
+{
+ int fd, error, options;
+ sonode_t *so;
+ vnode_t *vp;
+ struct file *fp;
+
+ if ((error = lx_convert_sock_args(domain, type, protocol, &domain,
+ &type, &options, &protocol)) != 0) {
+ return (set_errno(error));
+ }
+
+ /* logic cloned from so_socket */
+ so = socket_create(domain, type, protocol, NULL, NULL, SOCKET_SLEEP,
+ SOV_DEFAULT, CRED(), &error);
+
+ if (so == NULL) {
+ if (error == EPROTOTYPE || error == EPROTONOSUPPORT) {
+ error = ESOCKTNOSUPPORT;
+ }
+ return (set_errno(error));
+ }
+
+ /* Allocate a file descriptor for the socket */
+ vp = SOTOV(so);
+ if ((error = falloc(vp, FWRITE|FREAD, &fp, &fd)) != 0) {
+ (void) socket_close(so, 0, CRED());
+ socket_destroy(so);
+ return (set_errno(error));
+ }
+
+ /*
+ * Linux programs do not tolerate errors appearing from asynchronous
+ * events (such as ICMP messages arriving). Setting SM_DEFERERR will
+ * prevent checking/delivery of such errors.
+ */
+ so->so_mode |= SM_DEFERERR;
+
+ /* Now fill in the entries that falloc reserved */
+ if (options & SOCK_NONBLOCK) {
+ so->so_state |= SS_NONBLOCK;
+ fp->f_flag |= FNONBLOCK;
+ }
+ mutex_exit(&fp->f_tlock);
+ setf(fd, fp);
+ if ((options & SOCK_CLOEXEC) != 0) {
+ f_setfd(fd, FD_CLOEXEC);
+ }
+ return (fd);
+}
+
+long
+lx_bind(long sock, uintptr_t name, socklen_t namelen)
+{
+ struct sonode *so;
+ struct sockaddr *addr = NULL;
+ socklen_t len = 0;
+ file_t *fp;
+ int error;
+ lx_sun_type_t sun_type;
+ boolean_t not_sock = B_FALSE;
+
+ if ((so = getsonode(sock, &error, &fp)) == NULL) {
+ return (set_errno(error));
+ }
+
+ if (namelen != 0) {
+ error = ltos_sockaddr_copyin((struct sockaddr *)name, namelen,
+ &addr, &len, &sun_type);
+ if (error != 0) {
+ releasef(sock);
+ return (set_errno(error));
+ }
+ }
+
+ if (addr != NULL && addr->sa_family == AF_UNIX) {
+ vnode_t *vp;
+
+ error = so_ux_lookup(so, (struct sockaddr_un *)addr, B_TRUE,
+ &vp);
+ if (error == 0) {
+ /* A valid socket exists and is open at this address. */
+ VN_RELE(vp);
+ } else {
+ /* Keep track of paths which are not valid sockets. */
+ if (error == ENOTSOCK) {
+ not_sock = B_TRUE;
+ }
+
+ /*
+ * When binding to an abstract namespace address or
+ * /dev/log, implicit clean-up must occur if there is
+ * not a valid socket at the specififed address. See
+ * ltos_sockaddr_copyin for details about why these
+ * socket types act differently.
+ */
+ if (sun_type == LX_SUN_ABSTRACT) {
+ (void) vn_removeat(NULL, addr->sa_data,
+ UIO_SYSSPACE, RMFILE);
+ }
+ }
+ }
+
+ error = socket_bind(so, addr, len, _SOBIND_XPG4_2, CRED());
+
+ /*
+ * Linux returns EADDRINUSE for attempts to bind to Unix domain
+ * sockets that aren't sockets.
+ */
+ if (error == EINVAL && addr != NULL && addr->sa_family == AF_UNIX &&
+ not_sock == B_TRUE) {
+ error = EADDRINUSE;
+ }
+
+ releasef(sock);
+
+ if (addr != NULL) {
+ kmem_free(addr, len);
+ }
+
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (0);
+}
+
+long
+lx_connect(long sock, uintptr_t name, socklen_t namelen)
+{
+ struct sonode *so;
+ struct sockaddr *addr = NULL;
+ lx_socket_aux_data_t *sad = NULL;
+ socklen_t len = 0;
+ file_t *fp;
+ int error;
+
+ if ((so = getsonode(sock, &error, &fp)) == NULL) {
+ return (set_errno(error));
+ }
+
+ /*
+ * Ensure the name is sized appropriately before we alloc memory and
+ * copy it in from userspace. We need at least the address family to
+ * make later sizing decisions.
+ */
+ if (namelen != 0) {
+ error = ltos_sockaddr_copyin((struct sockaddr *)name, namelen,
+ &addr, &len, NULL);
+ if (error != 0) {
+ releasef(sock);
+ return (set_errno(error));
+ }
+ }
+
+ error = socket_connect(so, addr, len, fp->f_flag,
+ _SOCONNECT_XPG4_2, CRED());
+
+ /*
+ * Linux connect(2) behavior is rather strange when using the
+ * O_NONBLOCK flag. The first call will return EINPROGRESS, as
+ * expected. Provided that is successful, a second call to connect
+ * will return 0 instead of EISCONN. Subsequent connect calls will
+ * return EISCONN.
+ */
+ if ((fp->f_flag & FNONBLOCK) != 0 && error != 0) {
+ sad = lx_sad_acquire(SOTOV(so));
+ if (error == EISCONN &&
+ sad->lxsad_status == LXSS_CONNECTING) {
+ /* Report the one success */
+ sad->lxsad_status = LXSS_CONNECTED;
+ error = 0;
+ } else if (error == EINPROGRESS) {
+ sad->lxsad_status = LXSS_CONNECTING;
+ }
+ mutex_exit(&sad->lxsad_lock);
+ }
+
+ /*
+ * When connecting to a UDP socket, configure it so that future
+ * sendto/sendmsg operations are allowed to specify a destination
+ * address. See the Posix spec. for sendto(2). Linux allows this while
+ * illumos would return EISCONN if the option is not set.
+ */
+ if (error == 0 && so->so_protocol == IPPROTO_UDP &&
+ (so->so_family == AF_INET || so->so_family == AF_INET6)) {
+ int val = 1;
+
+ DTRACE_PROBE(lx__connect__udp);
+ (void) socket_setsockopt(so, IPPROTO_UDP, UDP_SND_TO_CONNECTED,
+ &val, sizeof (val), CRED());
+ }
+
+ releasef(sock);
+
+ if (addr != NULL) {
+ kmem_free(addr, len);
+ }
+
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (0);
+}
+
+/*
+ * Custom version of socket_recvmsg for error-handling overrides.
+ */
+static int
+lx_socket_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
+ cred_t *cr)
+{
+ int error;
+ ssize_t orig_resid = uiop->uio_resid;
+
+ /*
+ * Do not bypass the cache when reading data, as the application
+ * is likely to access the data shortly.
+ */
+ uiop->uio_extflg |= UIO_COPY_CACHED;
+
+ error = SOP_RECVMSG(so, msg, uiop, cr);
+
+ switch (error) {
+ case EINTR:
+ /* EAGAIN is EWOULDBLOCK */
+ case EWOULDBLOCK:
+ /* We did a partial read */
+ if (uiop->uio_resid != orig_resid)
+ error = 0;
+ break;
+ case ENOTCONN:
+ /*
+ * The rules are different for non-blocking sockets which are
+ * still in the process of making a connection
+ */
+ if ((msg->msg_flags & MSG_DONTWAIT) != 0 ||
+ (uiop->uio_fmode & (FNONBLOCK|FNDELAY)) != 0) {
+ error = EAGAIN;
+ }
+ break;
+ default:
+ break;
+ }
+ return (error);
+}
+
+static long
+lx_recv_common(int sock, struct nmsghdr *msg, xuio_t *xuiop, int flags,
+ void *namelenp, void *controllenp, void *flagsp)
+{
+ struct sonode *so;
+ file_t *fp;
+ void *name;
+ socklen_t namelen;
+ void *control;
+ socklen_t controllen;
+ ssize_t len;
+ int error;
+ boolean_t fd_cloexec;
+ boolean_t is_peek_trunc;
+
+ if ((so = getsonode(sock, &error, &fp)) == NULL) {
+ return (set_errno(error));
+ }
+
+ fd_cloexec = ((flags & LX_MSG_CMSG_CLOEXEC) != 0);
+ flags = lx_xlate_sock_flags(flags, LX_TO_SUNOS);
+ is_peek_trunc = (flags & (MSG_PEEK|MSG_TRUNC)) == (MSG_PEEK|MSG_TRUNC);
+ len = xuiop->xu_uio.uio_resid;
+ xuiop->xu_uio.uio_fmode = fp->f_flag;
+ xuiop->xu_uio.uio_extflg = UIO_COPY_CACHED;
+
+ /*
+ * Linux accepts MSG_TRUNC as an input flag, unlike SunOS and many
+ * other UNIX distributions. When combined with MSG_PEEK, it causes
+ * recvmsg to return the size of the waiting message, regardless of
+ * buffer size. This behavior is commonly used with a 0-length buffer
+ * to interrogate the size of a queued message prior to allocating a
+ * buffer for it.
+ *
+ * In order to support this functionality, a custom XUIO type is used
+ * to communicate the total message size out from the depths of sockfs.
+ */
+ if (is_peek_trunc) {
+ xuiop->xu_uio.uio_extflg |= UIO_XUIO;
+ xuiop->xu_type = UIOTYPE_PEEKSIZE;
+ xuiop->xu_ext.xu_ps.xu_ps_set = B_FALSE;
+ xuiop->xu_ext.xu_ps.xu_ps_size = 0;
+ }
+
+ name = msg->msg_name;
+ namelen = msg->msg_namelen;
+ control = msg->msg_control;
+ controllen = msg->msg_controllen;
+
+ /*
+ * socket_recvmsg will allocate these if needed.
+ * NULL them out to prevent any confusion.
+ */
+ msg->msg_name = NULL;
+ msg->msg_control = NULL;
+
+ msg->msg_flags = flags & (MSG_OOB | MSG_PEEK | MSG_WAITALL |
+ MSG_DONTWAIT);
+ /* Default to XPG4.2 operation */
+ msg->msg_flags |= MSG_XPG4_2;
+
+ error = lx_socket_recvmsg(so, msg, (struct uio *)xuiop, CRED());
+ if (error) {
+ releasef(sock);
+ return (set_errno(error));
+ }
+ lwp_stat_update(LWP_STAT_MSGRCV, 1);
+ releasef(sock);
+
+ if (namelen != 0) {
+ error = stol_sockaddr_copyout(msg->msg_name, msg->msg_namelen,
+ name, namelenp, namelen);
+
+ if (msg->msg_namelen != 0) {
+ kmem_free(msg->msg_name, (size_t)msg->msg_namelen);
+ msg->msg_namelen = 0;
+ }
+
+ /*
+ * Errors during copyout of the name are not a concern to Linux
+ * callers at this point in the syscall
+ */
+ if (error != 0 && error != EFAULT) {
+ goto err;
+ }
+ }
+
+ if (controllen != 0) {
+ if (fd_cloexec) {
+ /*
+ * If CLOEXEC needs to set on file descriptors passed
+ * via SCM_RIGHTS, do so before formatting the cmsgs
+ * for Linux.
+ */
+ lx_cmsg_set_cloexec(msg->msg_control,
+ msg->msg_controllen);
+ }
+ if (so->so_family == AF_UNIX &&
+ (so->so_mode & SM_CONNREQUIRED) != 0) {
+ /*
+ * It may be necessary to append a SCM_UCRED cmsg to
+ * the controls if SO_PASSCRED is set on a
+ * connection-oriented AF_UNIX socket.
+ *
+ * See lx_setsockopt_socket for more details.
+ */
+ if (lx_cmsg_try_ucred(so, msg, controllen) != 0) {
+ msg->msg_flags |= MSG_CTRUNC;
+ }
+ }
+
+ error = stol_cmsgs_copyout(msg->msg_control,
+ msg->msg_controllen, control, controllenp, controllen);
+
+ if (error != 0) {
+ /*
+ * If there was an error during cmsg translation or
+ * copyout, we need to clean up any FDs that are being
+ * passed back via SCM_RIGHTS. This prevents us from
+ * leaking those open files.
+ */
+ so_closefds(msg->msg_control, msg->msg_controllen, 0,
+ 0);
+
+ /*
+ * An error during cmsg_copyout means we had
+ * _something_ to process.
+ */
+ VERIFY(msg->msg_controllen != 0);
+
+ kmem_free(msg->msg_control,
+ (size_t)msg->msg_controllen);
+ msg->msg_controllen = 0;
+
+ if (error == EMSGSIZE) {
+ /* Communicate that messages were truncated */
+ msg->msg_flags |= MSG_CTRUNC;
+ error = 0;
+ } else {
+ goto err;
+ }
+ } else if (msg->msg_controllen != 0) {
+ kmem_free(msg->msg_control,
+ (size_t)msg->msg_controllen);
+ msg->msg_controllen = 0;
+ }
+ }
+
+ if (flagsp != NULL) {
+ int flags;
+
+ /* Clear internal flag. */
+ flags = msg->msg_flags & ~MSG_XPG4_2;
+ flags = lx_xlate_sock_flags(flags, SUNOS_TO_LX);
+
+ if (copyout(&flags, flagsp, sizeof (flags) != 0)) {
+ error = EFAULT;
+ goto err;
+ }
+ }
+
+ /*
+ * If both MSG_PEEK|MSG_TRUNC were set on the input flags and the
+ * socket layer was able to calculate the total message size for us,
+ * return that instead of the copied size.
+ */
+ if (is_peek_trunc && xuiop->xu_ext.xu_ps.xu_ps_set == B_TRUE) {
+ return (xuiop->xu_ext.xu_ps.xu_ps_size);
+ }
+
+ return (len - xuiop->xu_uio.uio_resid);
+
+err:
+ if (msg->msg_controllen != 0) {
+ /* Prevent FD leakage (see above) */
+ so_closefds(msg->msg_control, msg->msg_controllen, 0, 0);
+ kmem_free(msg->msg_control, (size_t)msg->msg_controllen);
+ }
+ if (msg->msg_namelen != 0) {
+ kmem_free(msg->msg_name, (size_t)msg->msg_namelen);
+ }
+ return (set_errno(error));
+}
+
+long
+lx_recv(int sock, void *buffer, size_t len, int flags)
+{
+ struct nmsghdr smsg;
+ xuio_t xuio;
+ struct iovec uiov;
+
+ if ((ssize_t)len < 0) {
+ /*
+ * The input len is unsigned, so limit it to SSIZE_MAX since
+ * the return value is signed.
+ */
+ return (set_errno(EINVAL));
+ }
+
+ uiov.iov_base = buffer;
+ uiov.iov_len = len;
+ xuio.xu_uio.uio_loffset = 0;
+ xuio.xu_uio.uio_iov = &uiov;
+ xuio.xu_uio.uio_iovcnt = 1;
+ xuio.xu_uio.uio_resid = len;
+ xuio.xu_uio.uio_segflg = UIO_USERSPACE;
+ xuio.xu_uio.uio_limit = 0;
+
+ smsg.msg_namelen = 0;
+ smsg.msg_controllen = 0;
+ smsg.msg_flags = 0;
+ return (lx_recv_common(sock, &smsg, &xuio, flags, NULL, NULL, NULL));
+}
+
+long
+lx_recvfrom(int sock, void *buffer, size_t len, int flags,
+ struct sockaddr *srcaddr, socklen_t *addrlenp)
+{
+ struct nmsghdr smsg;
+ xuio_t xuio;
+ struct iovec uiov;
+
+ if ((ssize_t)len < 0) {
+ /* Keep len reasonably limited (see lx_recv) */
+ return (set_errno(EINVAL));
+ }
+
+ uiov.iov_base = buffer;
+ uiov.iov_len = len;
+ xuio.xu_uio.uio_loffset = 0;
+ xuio.xu_uio.uio_iov = &uiov;
+ xuio.xu_uio.uio_iovcnt = 1;
+ xuio.xu_uio.uio_resid = len;
+ xuio.xu_uio.uio_segflg = UIO_USERSPACE;
+ xuio.xu_uio.uio_limit = 0;
+
+ smsg.msg_name = (char *)srcaddr;
+ if (addrlenp != NULL && srcaddr != NULL) {
+ /*
+ * Despite addrlenp being defined as a socklen_t *, Linux
+ * treats it internally as an int *. Certain LTP tests depend
+ * upon this behavior, so we must emulate it as well.
+ */
+ int namelen;
+
+ if (copyin(addrlenp, &namelen, sizeof (namelen)) != 0) {
+ return (set_errno(EFAULT));
+ }
+ if (namelen < 0) {
+ return (set_errno(EINVAL));
+ }
+ smsg.msg_namelen = namelen;
+ } else {
+ smsg.msg_namelen = 0;
+ }
+ smsg.msg_controllen = 0;
+ smsg.msg_flags = 0;
+
+ return (lx_recv_common(sock, &smsg, &xuio, flags, addrlenp, NULL,
+ NULL));
+}
+
+long
+lx_recvmsg(int sock, void *msg, int flags)
+{
+ struct nmsghdr smsg;
+ xuio_t xuio;
+ struct iovec luiov[IOV_MAX_STACK], *uiov;
+ int i, iovcnt, iovsize;
+ long res;
+ ssize_t len = 0;
+ void *namelenp, *controllenp, *flagsp;
+
+#if defined(_LP64)
+ if (get_udatamodel() != DATAMODEL_NATIVE) {
+ lx_msghdr32_t lmsg32;
+ if (copyin(msg, &lmsg32, sizeof (lmsg32)) != 0) {
+ return (set_errno(EFAULT));
+ }
+ smsg.msg_name = (void *)(uintptr_t)lmsg32.msg_name;
+ smsg.msg_namelen = lmsg32.msg_namelen;
+ smsg.msg_iov = (struct iovec *)(uintptr_t)lmsg32.msg_iov;
+ smsg.msg_iovlen = lmsg32.msg_iovlen;
+ smsg.msg_control = (void *)(uintptr_t)lmsg32.msg_control;
+ smsg.msg_controllen = lmsg32.msg_controllen;
+ smsg.msg_flags = lmsg32.msg_flags;
+
+ namelenp = &((lx_msghdr32_t *)msg)->msg_namelen;
+ controllenp = &((lx_msghdr32_t *)msg)->msg_controllen;
+ flagsp = &((lx_msghdr32_t *)msg)->msg_flags;
+ } else
+#endif /* defined(_LP64) */
+ {
+ lx_msghdr_t lmsg;
+ if (copyin(msg, &lmsg, sizeof (lmsg)) != 0) {
+ return (set_errno(EFAULT));
+ }
+ smsg.msg_name = lmsg.msg_name;
+ smsg.msg_namelen = lmsg.msg_namelen;
+ smsg.msg_iov = lmsg.msg_iov;
+ smsg.msg_iovlen = lmsg.msg_iovlen;
+ smsg.msg_control = lmsg.msg_control;
+ smsg.msg_controllen = lmsg.msg_controllen;
+ smsg.msg_flags = lmsg.msg_flags;
+
+ namelenp = &((lx_msghdr_t *)msg)->msg_namelen;
+ controllenp = &((lx_msghdr_t *)msg)->msg_controllen;
+ flagsp = &((lx_msghdr_t *)msg)->msg_flags;
+ }
+
+ iovcnt = smsg.msg_iovlen;
+ if (iovcnt < 0 || iovcnt > IOV_MAX) {
+ return (set_errno(EMSGSIZE));
+ }
+ if (iovcnt > IOV_MAX_STACK) {
+ iovsize = iovcnt * sizeof (struct iovec);
+ uiov = kmem_alloc(iovsize, KM_SLEEP);
+ } else if (iovcnt > 0) {
+ iovsize = 0;
+ uiov = luiov;
+ } else {
+ iovsize = 0;
+ uiov = NULL;
+ goto noiov;
+ }
+
+#if defined(_LP64)
+ if (get_udatamodel() != DATAMODEL_NATIVE) {
+ /* convert from 32bit iovec structs */
+ struct iovec32 luiov32[IOV_MAX_STACK], *uiov32;
+ ssize_t iov32size;
+ ssize32_t count32;
+
+ iov32size = iovcnt * sizeof (struct iovec32);
+ if (iovsize != 0) {
+ uiov32 = kmem_alloc(iov32size, KM_SLEEP);
+ } else {
+ uiov32 = luiov32;
+ }
+
+ if (copyin((struct iovec32 *)smsg.msg_iov, uiov32, iov32size)) {
+ if (iovsize != 0) {
+ kmem_free(uiov32, iov32size);
+ kmem_free(uiov, iovsize);
+ }
+
+ return (set_errno(EFAULT));
+ }
+
+ count32 = 0;
+ for (i = 0; i < iovcnt; i++) {
+ ssize32_t iovlen32;
+
+ iovlen32 = uiov32[i].iov_len;
+ count32 += iovlen32;
+ if (iovlen32 < 0 || count32 < 0) {
+ if (iovsize != 0) {
+ kmem_free(uiov32, iov32size);
+ kmem_free(uiov, iovsize);
+ }
+
+ return (set_errno(EINVAL));
+ }
+
+ uiov[i].iov_len = iovlen32;
+ uiov[i].iov_base =
+ (caddr_t)(uintptr_t)uiov32[i].iov_base;
+ }
+ len = count32;
+
+ if (iovsize != 0) {
+ kmem_free(uiov32, iov32size);
+ }
+ } else
+#endif /* defined(_LP64) */
+ {
+ if (copyin(smsg.msg_iov, uiov,
+ iovcnt * sizeof (struct iovec)) != 0) {
+ if (iovsize != 0) {
+ kmem_free(uiov, iovsize);
+ }
+ return (set_errno(EFAULT));
+ }
+
+ len = 0;
+ for (i = 0; i < iovcnt; i++) {
+ ssize_t iovlen = uiov[i].iov_len;
+ len += iovlen;
+ if (iovlen < 0 || len < 0) {
+ if (iovsize != 0) {
+ kmem_free(uiov, iovsize);
+ }
+ return (set_errno(EINVAL));
+ }
+ }
+ }
+
+noiov:
+ /* Since the iovec is passed via the uio, NULL it out in the msg */
+ smsg.msg_iov = NULL;
+
+ xuio.xu_uio.uio_loffset = 0;
+ xuio.xu_uio.uio_iov = uiov;
+ xuio.xu_uio.uio_iovcnt = iovcnt;
+ xuio.xu_uio.uio_resid = len;
+ xuio.xu_uio.uio_segflg = UIO_USERSPACE;
+ xuio.xu_uio.uio_limit = 0;
+
+ res = lx_recv_common(sock, &smsg, &xuio, flags, namelenp, controllenp,
+ flagsp);
+
+ if (iovsize != 0) {
+ kmem_free(uiov, iovsize);
+ }
+
+ return (res);
+}
+
+/*
+ * Custom version of socket_sendmsg for error-handling overrides.
+ */
+static int
+lx_socket_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
+ cred_t *cr, boolean_t nosig)
+{
+ int error = 0;
+ ssize_t orig_resid = uiop->uio_resid;
+
+ /*
+ * Do not bypass the cache if we are doing a local (AF_UNIX) write.
+ */
+ if (so->so_family == AF_UNIX) {
+ uiop->uio_extflg |= UIO_COPY_CACHED;
+ } else {
+ uiop->uio_extflg &= ~UIO_COPY_CACHED;
+ }
+
+ error = SOP_SENDMSG(so, msg, uiop, cr);
+
+ switch (error) {
+ case EINTR:
+ case ENOMEM:
+ /* EAGAIN is EWOULDBLOCK */
+ case EWOULDBLOCK:
+ /* We did a partial send */
+ if (uiop->uio_resid != orig_resid) {
+ error = 0;
+ }
+ break;
+
+ case ENOTCONN:
+ /*
+ * The rules are different for non-blocking sockets which are
+ * still in the process of making a connection
+ */
+ if ((msg->msg_flags & MSG_DONTWAIT) != 0 ||
+ (uiop->uio_fmode & (FNONBLOCK|FNDELAY)) != 0) {
+ error = EAGAIN;
+ break;
+ }
+
+ /* Appease LTP and match behavior detailed in the man page */
+ error = EPIPE;
+ /* FALLTHROUGH */
+ case EPIPE:
+ if (nosig == B_FALSE) {
+ tsignal(curthread, SIGPIPE);
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return (error);
+}
+
+static long
+lx_send_common(int sock, struct nmsghdr *msg, struct uio *uiop, int flags)
+{
+ struct sonode *so;
+ file_t *fp;
+ struct sockaddr *name = NULL;
+ socklen_t namelen;
+ void *control = NULL;
+ socklen_t controllen;
+ ssize_t len = 0;
+ int error;
+ boolean_t nosig;
+
+ if ((so = getsonode(sock, &error, &fp)) == NULL) {
+ return (set_errno(error));
+ }
+
+ uiop->uio_fmode = fp->f_flag;
+
+ /* Allocate and copyin name and control */
+ if (msg->msg_name != NULL && msg->msg_namelen != 0) {
+ ASSERT(MUTEX_NOT_HELD(&so->so_lock));
+
+ error = ltos_sockaddr_copyin((struct sockaddr *)msg->msg_name,
+ msg->msg_namelen, &name, &namelen, NULL);
+ if (error != 0) {
+ goto done;
+ }
+ /* copyin_name null terminates addresses for AF_UNIX */
+ msg->msg_namelen = namelen;
+ msg->msg_name = name;
+ } else {
+ msg->msg_name = name = NULL;
+ msg->msg_namelen = namelen = 0;
+ }
+
+ if (msg->msg_control != NULL && msg->msg_controllen != 0) {
+ /*
+ * Verify that the length is not excessive to prevent
+ * an application from consuming all of kernel memory.
+ */
+ if (msg->msg_controllen > SO_MAXARGSIZE) {
+ error = EINVAL;
+ goto done;
+ }
+ if ((error = ltos_cmsgs_copyin(msg->msg_control,
+ msg->msg_controllen, &control, &controllen)) != 0) {
+ goto done;
+ }
+ msg->msg_control = control;
+ msg->msg_controllen = controllen;
+ } else {
+ msg->msg_control = control = NULL;
+ msg->msg_controllen = controllen = 0;
+ }
+
+ len = uiop->uio_resid;
+ msg->msg_flags = lx_xlate_sock_flags(flags, LX_TO_SUNOS);
+ /* Default to XPG4.2 operation */
+ msg->msg_flags |= MSG_XPG4_2;
+ nosig = ((flags & LX_MSG_NOSIGNAL) != 0);
+
+ error = lx_socket_sendmsg(so, msg, uiop, CRED(), nosig);
+done:
+ if (control != NULL) {
+ kmem_free(control, controllen);
+ }
+ if (name != NULL) {
+ kmem_free(name, namelen);
+ }
+ if (error != 0) {
+ releasef(sock);
+ return (set_errno(error));
+ }
+ lwp_stat_update(LWP_STAT_MSGSND, 1);
+ releasef(sock);
+ return (len - uiop->uio_resid);
+}
+
+long
+lx_send(int sock, void *buffer, size_t len, int flags)
+{
+ struct nmsghdr smsg;
+ struct uio auio;
+ struct iovec aiov[1];
+
+ if ((ssize_t)len < 0) {
+ /* Keep len reasonably limited (see lx_recv) */
+ return (set_errno(EINVAL));
+ }
+
+ aiov[0].iov_base = buffer;
+ aiov[0].iov_len = len;
+ auio.uio_loffset = 0;
+ auio.uio_iov = aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_resid = len;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_limit = 0;
+
+ smsg.msg_name = NULL;
+ smsg.msg_control = NULL;
+ return (lx_send_common(sock, &smsg, &auio, flags));
+}
+
+long
+lx_sendto(int sock, void *buffer, size_t len, int flags,
+ struct sockaddr *dstaddr, socklen_t addrlen)
+{
+ struct nmsghdr smsg;
+ struct uio auio;
+ struct iovec aiov[1];
+
+ if ((ssize_t)len < 0) {
+ /* Keep len reasonably limited (see lx_recv) */
+ return (set_errno(EINVAL));
+ }
+
+ aiov[0].iov_base = buffer;
+ aiov[0].iov_len = len;
+ auio.uio_loffset = 0;
+ auio.uio_iov = aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_resid = len;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_limit = 0;
+
+ smsg.msg_name = (char *)dstaddr;
+ smsg.msg_namelen = addrlen;
+ smsg.msg_control = NULL;
+ return (lx_send_common(sock, &smsg, &auio, flags));
+}
+
+long
+lx_sendmsg(int sock, void *msg, int flags)
+{
+ struct nmsghdr smsg;
+ struct uio auio;
+ struct iovec buf[IOV_MAX_STACK], *aiov;
+ int i, iovcnt, iovsize;
+ long res;
+ ssize_t len = 0;
+
+#if defined(_LP64)
+ if (get_udatamodel() != DATAMODEL_NATIVE) {
+ lx_msghdr32_t lmsg32;
+ if (copyin(msg, &lmsg32, sizeof (lmsg32)) != 0) {
+ return (set_errno(EFAULT));
+ }
+ smsg.msg_name = (void *)(uintptr_t)lmsg32.msg_name;
+ smsg.msg_namelen = lmsg32.msg_namelen;
+ smsg.msg_iov = (struct iovec *)(uintptr_t)lmsg32.msg_iov;
+ smsg.msg_iovlen = lmsg32.msg_iovlen;
+ smsg.msg_control = (void *)(uintptr_t)lmsg32.msg_control;
+ smsg.msg_controllen = lmsg32.msg_controllen;
+ smsg.msg_flags = lmsg32.msg_flags;
+ } else
+#endif /* defined(_LP64) */
+ {
+ lx_msghdr_t lmsg;
+ if (copyin(msg, &lmsg, sizeof (lmsg)) != 0) {
+ return (set_errno(EFAULT));
+ }
+ smsg.msg_name = lmsg.msg_name;
+ smsg.msg_namelen = lmsg.msg_namelen;
+ smsg.msg_iov = lmsg.msg_iov;
+ smsg.msg_iovlen = lmsg.msg_iovlen;
+ smsg.msg_control = lmsg.msg_control;
+ smsg.msg_controllen = lmsg.msg_controllen;
+ smsg.msg_flags = lmsg.msg_flags;
+ }
+
+ iovcnt = smsg.msg_iovlen;
+ if (iovcnt <= 0 || iovcnt > IOV_MAX) {
+ return (set_errno(EMSGSIZE));
+ }
+ if (iovcnt > IOV_MAX_STACK) {
+ iovsize = iovcnt * sizeof (struct iovec);
+ aiov = kmem_alloc(iovsize, KM_SLEEP);
+ } else {
+ iovsize = 0;
+ aiov = buf;
+ }
+
+#if defined(_LP64)
+ if (get_udatamodel() != DATAMODEL_NATIVE) {
+ /* convert from 32bit iovec structs */
+ struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
+ ssize_t iov32size;
+ ssize32_t count32;
+
+ iov32size = iovcnt * sizeof (struct iovec32);
+ if (iovsize != 0) {
+ aiov32 = kmem_alloc(iov32size, KM_SLEEP);
+ }
+
+ if (copyin((struct iovec32 *)smsg.msg_iov, aiov32, iov32size)) {
+ if (iovsize != 0) {
+ kmem_free(aiov32, iov32size);
+ kmem_free(aiov, iovsize);
+ }
+
+ return (set_errno(EFAULT));
+ }
+
+ count32 = 0;
+ for (i = 0; i < iovcnt; i++) {
+ ssize32_t iovlen32;
+
+ iovlen32 = aiov32[i].iov_len;
+ count32 += iovlen32;
+ if (iovlen32 < 0 || count32 < 0) {
+ if (iovsize != 0) {
+ kmem_free(aiov32, iov32size);
+ kmem_free(aiov, iovsize);
+ }
+
+ return (set_errno(EINVAL));
+ }
+
+ aiov[i].iov_len = iovlen32;
+ aiov[i].iov_base =
+ (caddr_t)(uintptr_t)aiov32[i].iov_base;
+ }
+ len = count32;
+
+ if (iovsize != 0) {
+ kmem_free(aiov32, iov32size);
+ }
+ } else
+#endif /* defined(_LP64) */
+ {
+ if (copyin(smsg.msg_iov, aiov,
+ iovcnt * sizeof (struct iovec)) != 0) {
+ if (iovsize != 0) {
+ kmem_free(aiov, iovsize);
+ }
+ return (set_errno(EFAULT));
+ }
+
+ len = 0;
+ for (i = 0; i < iovcnt; i++) {
+ ssize_t iovlen = aiov[i].iov_len;
+
+ len += iovlen;
+ if (iovlen < 0 || len < 0) {
+ if (iovsize != 0) {
+ kmem_free(aiov, iovsize);
+ }
+ return (set_errno(EINVAL));
+ }
+ }
+ }
+ /* Since the iovec is passed via the uio, NULL it out in the msg */
+ smsg.msg_iov = NULL;
+
+ auio.uio_loffset = 0;
+ auio.uio_iov = aiov;
+ auio.uio_iovcnt = iovcnt;
+ auio.uio_resid = len;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_limit = 0;
+
+ res = lx_send_common(sock, &smsg, &auio, flags);
+
+ if (iovsize != 0) {
+ kmem_free(aiov, iovsize);
+ }
+
+ return (res);
+}
+
+/*
+ * Linux socket option type definitions
+ *
+ * The protocol `levels` are well defined (see in.h) The option values are
+ * not so well defined. Linux often uses different values vs. Illumos
+ * although they mean the same thing. For example, IP_TOS in Linux is
+ * defined as value 1 but in Illumos it is defined as value 3. This table
+ * maps all the Protocol levels to their options and maps them between
+ * Linux and Illumos and vice versa. Hence the reason for the complexity.
+ *
+ * For a certain subset of sockopts, Linux will implicitly truncate optval
+ * input, so long as optlen meets a minimum size. Because illumos is strict
+ * about optlen, we must cap optlen for those options.
+ */
+
+typedef struct lx_sockopt_map {
+ const int lsm_opt; /* Illumos-native equivalent */
+ const int lsm_lcap; /* Cap optlen to this size. (Ignored if 0) */
+} lx_sockopt_map_t;
+
+typedef struct lx_proto_opts {
+ const lx_sockopt_map_t *lpo_entries; /* Linux->SunOS map entries */
+ unsigned int lpo_max; /* max entries in table */
+} lx_proto_opts_t;
+
+#define OPTNOTSUP -1 /* we don't support it */
+
+#define PROTO_SOCKOPTS(opts) \
+ { (opts), sizeof ((opts)) / sizeof ((opts)[0]) }
+
+/* Shorten name so the columns can line up */
+#define IP_MREQ_SZ sizeof (struct ip_mreq)
+
+static const lx_sockopt_map_t ltos_ip_sockopts[LX_IP_UNICAST_IF + 1] = {
+ { OPTNOTSUP, 0 },
+ { IP_TOS, sizeof (int) }, /* IP_TOS */
+ { IP_TTL, sizeof (int) }, /* IP_TTL */
+ { IP_HDRINCL, sizeof (int) }, /* IP_HDRINCL */
+ { IP_OPTIONS, 0 }, /* IP_OPTIONS */
+ { OPTNOTSUP, 0 }, /* IP_ROUTER_ALERT */
+ { IP_RECVOPTS, sizeof (int) }, /* IP_RECVOPTS */
+ { IP_RETOPTS, sizeof (int) }, /* IP_RETOPTS */
+ { IP_PKTINFO, sizeof (int) }, /* IP_PKTINFO */
+ { OPTNOTSUP, 0 }, /* IP_PKTOPTIONS */
+ { OPTNOTSUP, 0 }, /* IP_MTUDISCOVER */
+ { OPTNOTSUP, 0 }, /* IP_RECVERR */
+ { IP_RECVTTL, sizeof (int) }, /* IP_RECVTTL */
+ { OPTNOTSUP, 0 }, /* IP_RECVTOS */
+ { OPTNOTSUP, 0 }, /* IP_MTU */
+ { OPTNOTSUP, 0 }, /* IP_FREEBIND */
+ { OPTNOTSUP, 0 }, /* IP_IPSEC_POLICY */
+ { OPTNOTSUP, 0 }, /* IP_XFRM_POLICY */
+ { OPTNOTSUP, 0 }, /* IP_PASSSEC */
+ { OPTNOTSUP, 0 }, /* IP_TRANSPARENT */
+ { OPTNOTSUP, 0 }, /* IP_ORIGDSTADDR */
+ { OPTNOTSUP, 0 }, /* IP_MINTTL */
+ { OPTNOTSUP, 0 }, /* IP_NODEFRAG */
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { IP_MULTICAST_IF, sizeof (int) }, /* IP_MULTICAST_IF */
+ { IP_MULTICAST_TTL, sizeof (int) }, /* IP_MULTICAST_TTL */
+ { IP_MULTICAST_LOOP, sizeof (int) }, /* IP_MULTICAST_LOOP */
+ { IP_ADD_MEMBERSHIP, IP_MREQ_SZ }, /* IP_ADD_MEMBERSHIP */
+ { IP_DROP_MEMBERSHIP, IP_MREQ_SZ }, /* IP_DROP_MEMBERSHIP */
+ { IP_UNBLOCK_SOURCE, 0 }, /* IP_UNBLOCK_SOURCE */
+ { IP_BLOCK_SOURCE, 0 }, /* IP_BLOCK_SOURCE */
+ { IP_ADD_SOURCE_MEMBERSHIP, 0 }, /* IP_ADD_SOURCE_MEMBERSHIP */
+ { OPTNOTSUP, 0 }, /* IP_DROP_SOURCE_MEMBERSHIP */
+ { OPTNOTSUP, 0 }, /* IP_MSFILTER */
+ { OPTNOTSUP, 0 }, /* MCAST_JOIN_GROUP */
+ { OPTNOTSUP, 0 }, /* MCAST_BLOCK_SOURCE */
+ { OPTNOTSUP, 0 }, /* MCAST_UNBLOCK_SOURCE */
+ { OPTNOTSUP, 0 }, /* MCAST_LEAVE_GROUP */
+ { OPTNOTSUP, 0 }, /* MCAST_JOIN_SOURCE_GROUP */
+ { OPTNOTSUP, 0 }, /* MCAST_LEAVE_SOURCE_GROUP */
+ { OPTNOTSUP, 0 }, /* MCAST_MSFILTER */
+ { OPTNOTSUP, 0 }, /* IP_MULTICAST_ALL */
+ { OPTNOTSUP, 0 } /* IP_UNICAST_IF */
+};
+
+static const lx_sockopt_map_t ltos_ipv6_sockopts[LX_IPV6_TCLASS + 1] = {
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 }, /* IPV6_ADDRFORM */
+ { OPTNOTSUP, 0 }, /* IPV6_2292PKTINFO */
+ { OPTNOTSUP, 0 }, /* IPV6_2292HOPOPTS */
+ { OPTNOTSUP, 0 }, /* IPV6_2292DSTOPTS */
+ { OPTNOTSUP, 0 }, /* IPV6_2292RTHDR */
+ { OPTNOTSUP, 0 }, /* IPV6_2292PKTOPTIONS */
+ { IPV6_CHECKSUM, sizeof (int) }, /* IPV6_CHECKSUM */
+ { OPTNOTSUP, 0 }, /* IPV6_2292HOPLIMIT */
+ { OPTNOTSUP, 0 }, /* IPV6_NEXTHOP */
+ { OPTNOTSUP, 0 }, /* IPV6_AUTHHDR */
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { IPV6_UNICAST_HOPS, sizeof (int) }, /* IPV6_UNICAST_HOPS */
+ { IPV6_MULTICAST_IF, sizeof (int) }, /* IPV6_MULTICAST_IF */
+ { IPV6_MULTICAST_HOPS, sizeof (int) }, /* IPV6_MULTICAST_HOPS */
+ { IPV6_MULTICAST_LOOP, sizeof (int) }, /* IPV6_MULTICAST_LOOP */
+ { OPTNOTSUP, 0 }, /* IPV6_JOIN_GROUP */
+ { OPTNOTSUP, 0 }, /* IPV6_LEAVE_GROUP */
+ { OPTNOTSUP, 0 }, /* IPV6_ROUTER_ALERT */
+ { OPTNOTSUP, 0 }, /* IPV6_MTU_DISCOVER */
+ { OPTNOTSUP, 0 }, /* IPV6_MTU */
+ { OPTNOTSUP, 0 }, /* IPV6_RECVERR */
+ { IPV6_V6ONLY, sizeof (int) }, /* IPV6_V6ONLY */
+ { OPTNOTSUP, 0 }, /* IPV6_JOIN_ANYCAST */
+ { OPTNOTSUP, 0 }, /* IPV6_LEAVE_ANYCAST */
+ { OPTNOTSUP, 0 }, /* IPV6_IPSEC_POLICY */
+ { OPTNOTSUP, 0 }, /* IPV6_XFRM_POLICY */
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { IPV6_RECVPKTINFO, sizeof (int) }, /* IPV6_RECVPKTINFO */
+ { IPV6_PKTINFO, 0 }, /* IPV6_PKTINFO */
+ { IPV6_RECVHOPLIMIT, sizeof (int) }, /* IPV6_RECVHOPLIMIT */
+ { IPV6_HOPLIMIT, 0 }, /* IPV6_HOPLIMIT */
+ { OPTNOTSUP, 0 }, /* IPV6_RECVHOPOPTS */
+ { OPTNOTSUP, 0 }, /* IPV6_HOPOPTS */
+ { OPTNOTSUP, 0 }, /* IPV6_RTHDRDSTOPTS */
+ { OPTNOTSUP, 0 }, /* IPV6_RECVRTHDR */
+ { OPTNOTSUP, 0 }, /* IPV6_RTHDR */
+ { OPTNOTSUP, 0 }, /* IPV6_RECVDSTOPTS */
+ { OPTNOTSUP, 0 }, /* IPV6_DSTOPTS */
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 }, /* IPV6_RECVTCLASS */
+ { IPV6_TCLASS, sizeof (int) } /* IPV6_TCLASS */
+};
+
+static const lx_sockopt_map_t ltos_icmpv6_sockopts[LX_ICMP6_FILTER + 1] = {
+ { OPTNOTSUP, 0 },
+ { ICMP6_FILTER, 0 } /* ICMP6_FILTER */
+};
+
+static const lx_sockopt_map_t ltos_tcp_sockopts[LX_TCP_NOTSENT_LOWAT + 1] = {
+ { OPTNOTSUP, 0 },
+ { TCP_NODELAY, sizeof (int) }, /* TCP_NODELAY */
+ { TCP_MAXSEG, sizeof (int) }, /* TCP_MAXSEG */
+ { TCP_CORK, sizeof (int) }, /* TCP_CORK */
+ { TCP_KEEPIDLE, sizeof (int) }, /* TCP_KEEPIDLE */
+ { TCP_KEEPINTVL, sizeof (int) }, /* TCP_KEEPINTVL */
+ { TCP_KEEPCNT, sizeof (int) }, /* TCP_KEEPCNT */
+ { OPTNOTSUP, 0 }, /* TCP_SYNCNT */
+ { TCP_LINGER2, sizeof (int) }, /* TCP_LINGER2 */
+ { OPTNOTSUP, 0 }, /* TCP_DEFER_ACCEPT */
+ { OPTNOTSUP, 0 }, /* TCP_WINDOW_CLAMP */
+ { OPTNOTSUP, 0 }, /* TCP_INFO */
+ { OPTNOTSUP, 0 }, /* TCP_QUICKACK */
+ { OPTNOTSUP, 0 }, /* TCP_CONGESTION */
+ { OPTNOTSUP, 0 }, /* TCP_MD5SIG */
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 }, /* TCP_THIN_LINEAR_TIMEOUTS */
+ { OPTNOTSUP, 0 }, /* TCP_THIN_DUPACK */
+ { OPTNOTSUP, 0 }, /* TCP_USER_TIMEOUT */
+ { OPTNOTSUP, 0 }, /* TCP_REPAIR */
+ { OPTNOTSUP, 0 }, /* TCP_REPAIR_QUEUE */
+ { OPTNOTSUP, 0 }, /* TCP_QUEUE_SEQ */
+ { OPTNOTSUP, 0 }, /* TCP_REPAIR_OPTIONS */
+ { OPTNOTSUP, 0 }, /* TCP_FASTOPEN */
+ { OPTNOTSUP, 0 }, /* TCP_TIMESTAMP */
+ { OPTNOTSUP, 0 } /* TCP_NOTSENT_LOWAT */
+};
+
+static const lx_sockopt_map_t ltos_igmp_sockopts[IGMP_MTRACE + 1] = {
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { IGMP_MINLEN, 0 }, /* IGMP_MINLEN */
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { IGMP_MEMBERSHIP_QUERY, 0 }, /* IGMP_HOST_MEMBERSHIP_QUERY */
+ { IGMP_V1_MEMBERSHIP_REPORT, 0 }, /* IGMP_HOST_MEMBERSHIP_REPORT */
+ { IGMP_DVMRP, 0 }, /* IGMP_DVMRP */
+ { IGMP_PIM, 0 }, /* IGMP_PIM */
+ { OPTNOTSUP, 0 }, /* IGMP_TRACE */
+ { IGMP_V2_MEMBERSHIP_REPORT, 0 }, /* IGMPV2_HOST_MEMBERSHIP_REPORT */
+ { IGMP_V2_LEAVE_GROUP, 0 }, /* IGMP_HOST_LEAVE_MESSAGE */
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 },
+ { IGMP_MTRACE_RESP, 0 }, /* IGMP_MTRACE_RESP */
+ { IGMP_MTRACE, 0 } /* IGMP_MTRACE */
+};
+
+static const lx_sockopt_map_t ltos_socket_sockopts[LX_SO_BPF_EXTENSIONS + 1] = {
+ { OPTNOTSUP, 0 },
+ { SO_DEBUG, sizeof (int) }, /* SO_DEBUG */
+ { SO_REUSEADDR, sizeof (int) }, /* SO_REUSEADDR */
+ { SO_TYPE, 0 }, /* SO_TYPE */
+ { SO_ERROR, 0 }, /* SO_ERROR */
+ { SO_DONTROUTE, sizeof (int) }, /* SO_DONTROUTE */
+ { SO_BROADCAST, sizeof (int) }, /* SO_BROADCAST */
+ { SO_SNDBUF, sizeof (int) }, /* SO_SNDBUF */
+ { SO_RCVBUF, sizeof (int) }, /* SO_RCVBUF */
+ { SO_KEEPALIVE, sizeof (int) }, /* SO_KEEPALIVE */
+ { SO_OOBINLINE, sizeof (int) }, /* SO_OOBINLINE */
+ { OPTNOTSUP, 0 }, /* SO_NO_CHECK */
+ { OPTNOTSUP, 0 }, /* SO_PRIORITY */
+ { SO_LINGER, 0 }, /* SO_LINGER */
+ { OPTNOTSUP, 0 }, /* SO_BSDCOMPAT */
+ { SO_REUSEPORT, sizeof (int) }, /* SO_REUSEPORT */
+ { SO_RECVUCRED, sizeof (int) }, /* SO_PASSCRED */
+ { OPTNOTSUP, 0 }, /* SO_PEERCRED */
+ { SO_RCVLOWAT, sizeof (int) }, /* SO_RCVLOWAT */
+ { SO_SNDLOWAT, sizeof (int) }, /* SO_SNDLOWAT */
+ { SO_RCVTIMEO, 0 }, /* SO_RCVTIMEO */
+ { SO_SNDTIMEO, 0 }, /* SO_SNDTIMEO */
+ { OPTNOTSUP, 0 }, /* SO_SECURITY_AUTHENTICATION */
+ { OPTNOTSUP, 0 }, /* SO_SECURITY_ENCRYPTION_TRANSPORT */
+ { OPTNOTSUP, 0 }, /* SO_SECURITY_ENCRYPTION_NETWORK */
+ { OPTNOTSUP, 0 }, /* SO_BINDTODEVICE */
+ { SO_ATTACH_FILTER, 0 }, /* SO_ATTACH_FILTER */
+ { SO_DETACH_FILTER, 0 }, /* SO_DETACH_FILTER */
+ { OPTNOTSUP, 0 }, /* SO_PEERNAME */
+ { SO_TIMESTAMP, sizeof (int) }, /* SO_TIMESTAMP */
+ { SO_ACCEPTCONN, 0 }, /* SO_ACCEPTCONN */
+ { OPTNOTSUP, 0 }, /* SO_PEERSEC */
+ { SO_SNDBUF, sizeof (int) }, /* SO_SNDBUFFORCE */
+ { SO_RCVBUF, sizeof (int) }, /* SO_RCVBUFFORCE */
+ { OPTNOTSUP, 0 }, /* SO_PASSSEC */
+ { OPTNOTSUP, 0 }, /* SO_TIMESTAMPNS */
+ { OPTNOTSUP, 0 }, /* SO_MARK */
+ { OPTNOTSUP, 0 }, /* SO_TIMESTAMPING */
+ { SO_PROTOTYPE, 0 }, /* SO_PROTOCOL */
+ { SO_DOMAIN, 0 }, /* SO_DOMAIN */
+ { OPTNOTSUP, 0 }, /* SO_RXQ_OVFL */
+ { OPTNOTSUP, 0 }, /* SO_WIFI_STATUS */
+ { OPTNOTSUP, 0 }, /* SO_PEEK_OFF */
+ { OPTNOTSUP, 0 }, /* SO_NOFCS */
+ { OPTNOTSUP, 0 }, /* SO_LOCK_FILTER */
+ { OPTNOTSUP, 0 }, /* SO_SELECT_ERR_QUEUE */
+ { OPTNOTSUP, 0 }, /* SO_BUSY_POLL */
+ { OPTNOTSUP, 0 }, /* SO_MAX_PACING_RATE */
+ { OPTNOTSUP, 0 } /* SO_BPF_EXTENSIONS */
+};
+
+static const lx_sockopt_map_t ltos_raw_sockopts[LX_ICMP_FILTER + 1] = {
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 } /* ICMP_FILTER */
+};
+
+static const lx_sockopt_map_t ltos_packet_sockopts[LX_PACKET_STATISTICS + 1] = {
+ { OPTNOTSUP, 0 },
+ { PACKET_ADD_MEMBERSHIP, 0 }, /* PACKET_ADD_MEMBERSHIP */
+ { PACKET_DROP_MEMBERSHIP, 0 }, /* PACKET_DROP_MEMBERSHIP */
+ { OPTNOTSUP, 0 }, /* PACKET_RECV_OUTPUT */
+ { OPTNOTSUP, 0 },
+ { OPTNOTSUP, 0 }, /* PACKET_RX_RING */
+ { PACKET_STATISTICS, 0 } /* PACKET_STATISTICS */
+};
+
+/* Needed for SO_ATTACH_FILTER */
+struct lx_bpf_program {
+ unsigned short bf_len;
+ caddr_t bf_insns;
+};
+
+/* Invert filter fields as Linux expects */
+#define LX_ICMP6_FILTER_INVERT(filterp) ( \
+ ((filterp)->__icmp6_filt[0] ^= 0xFFFFFFFFU), \
+ ((filterp)->__icmp6_filt[1] ^= 0xFFFFFFFFU), \
+ ((filterp)->__icmp6_filt[2] ^= 0xFFFFFFFFU), \
+ ((filterp)->__icmp6_filt[3] ^= 0xFFFFFFFFU), \
+ ((filterp)->__icmp6_filt[4] ^= 0xFFFFFFFFU), \
+ ((filterp)->__icmp6_filt[5] ^= 0xFFFFFFFFU), \
+ ((filterp)->__icmp6_filt[6] ^= 0xFFFFFFFFU), \
+ ((filterp)->__icmp6_filt[7] ^= 0xFFFFFFFFU))
+
+static boolean_t
+lx_sockopt_lookup(lx_proto_opts_t tbl, int *optname, socklen_t *optlen)
+{
+ const lx_sockopt_map_t *entry;
+
+ if (*optname > tbl.lpo_max) {
+ return (B_FALSE);
+ }
+ entry = &tbl.lpo_entries[*optname];
+ if (entry->lsm_opt == OPTNOTSUP) {
+ return (B_FALSE);
+ }
+ *optname = entry->lsm_opt;
+ /* Truncate the optlen if needed/allowed */
+ if (entry->lsm_lcap != 0 && *optlen > entry->lsm_lcap) {
+ *optlen = entry->lsm_lcap;
+ }
+ return (B_TRUE);
+}
+
+static int
+lx_setsockopt_ip(sonode_t *so, int optname, void *optval, socklen_t optlen)
+{
+ int error;
+ int *intval = (int *)optval;
+ lx_proto_opts_t sockopts_tbl = PROTO_SOCKOPTS(ltos_ip_sockopts);
+
+ switch (optname) {
+ case LX_IP_RECVERR:
+ /*
+ * Ping sets this option to receive errors on raw sockets.
+ * Currently we just ignore it to make ping happy. From the
+ * Linux ip.7 man page:
+ *
+ * For raw sockets, IP_RECVERR enables passing of all
+ * received ICMP errors to the application.
+ *
+ * Programs known to depend upon this:
+ * - ping
+ * - traceroute
+ * - mount.nfs
+ */
+ return (0);
+
+ case LX_IP_MTU_DISCOVER: {
+ int val;
+
+ /*
+ * We translate Linux's IP_MTU_DISCOVER into our IP_DONTFRAG,
+ * allowing this be a byte or an integer and observing the
+ * inverted sense of the two relative to one another (and
+ * translating accordingly).
+ */
+ if (optlen < sizeof (int)) {
+ val = *((uint8_t *)optval);
+ } else {
+ val = *((int *)optval);
+ }
+
+ switch (val) {
+ case LX_IP_PMTUDISC_DONT:
+ val = 1;
+ break;
+
+ case LX_IP_PMTUDISC_DO:
+ case LX_IP_PMTUDISC_WANT:
+ val = 0;
+ break;
+
+ default:
+ return (EOPNOTSUPP);
+ }
+
+ error = socket_setsockopt(so, IPPROTO_IP, IP_DONTFRAG,
+ &val, sizeof (val), CRED());
+ return (error);
+ }
+
+ case LX_IP_MULTICAST_TTL:
+ case LX_IP_MULTICAST_LOOP:
+ /*
+ * For IP_MULTICAST_TTL and IP_MULTICAST_LOOP, Linux defines
+ * the option value to be an integer while we define it to be
+ * an unsigned character. To prevent the kernel from spitting
+ * back an error on an illegal length, verify that the option
+ * value is less than UCHAR_MAX before truncating optlen.
+ */
+ if (optlen <= 0 || optlen > sizeof (int) ||
+ *intval > UINT8_MAX) {
+ return (EINVAL);
+ }
+ optlen = sizeof (uchar_t);
+ break;
+
+ default:
+ break;
+ }
+
+ if (!lx_sockopt_lookup(sockopts_tbl, &optname, &optlen)) {
+ return (ENOPROTOOPT);
+ }
+
+ error = socket_setsockopt(so, IPPROTO_IP, optname, optval, optlen,
+ CRED());
+ return (error);
+}
+
+static int
+lx_setsockopt_ipv6(sonode_t *so, int optname, void *optval, socklen_t optlen)
+{
+ int error;
+ lx_proto_opts_t sockopts_tbl = PROTO_SOCKOPTS(ltos_ipv6_sockopts);
+
+ if (optname == LX_IPV6_MTU) {
+ /*
+ * There isn't a good translation for IPV6_MTU and certain apps
+ * such as bind9 will bail if it cannot be set.
+ * We just lie about the success for now.
+ */
+ return (0);
+ }
+
+ if (!lx_sockopt_lookup(sockopts_tbl, &optname, &optlen)) {
+ return (ENOPROTOOPT);
+ }
+ error = socket_setsockopt(so, IPPROTO_IPV6, optname, optval, optlen,
+ CRED());
+ return (error);
+}
+
+static int
+lx_setsockopt_icmpv6(sonode_t *so, int optname, void *optval, socklen_t optlen)
+{
+ int error;
+ lx_proto_opts_t sockopts_tbl = PROTO_SOCKOPTS(ltos_icmpv6_sockopts);
+
+ if (optname == LX_ICMP6_FILTER && optval != NULL) {
+ /*
+ * Surprise! The input to ICMP6_FILTER on Linux is inverted
+ * when compared to illumos.
+ */
+ if (optlen != sizeof (icmp6_filter_t)) {
+ return (EINVAL);
+ }
+ LX_ICMP6_FILTER_INVERT((icmp6_filter_t *)optval);
+ }
+
+ if (!lx_sockopt_lookup(sockopts_tbl, &optname, &optlen)) {
+ return (ENOPROTOOPT);
+ }
+ error = socket_setsockopt(so, IPPROTO_ICMPV6, optname, optval, optlen,
+ CRED());
+ return (error);
+}
+
+static int
+lx_setsockopt_tcp(sonode_t *so, int optname, void *optval, socklen_t optlen)
+{
+ int error;
+ lx_proto_opts_t sockopts_tbl = PROTO_SOCKOPTS(ltos_tcp_sockopts);
+
+ if (optname == LX_TCP_DEFER_ACCEPT) {
+ int *intval;
+ char *dfp;
+
+ /*
+ * Emulate TCP_DEFER_ACCEPT using the datafilt(7M) socket
+ * filter but we can't emulate the timeout aspect so treat any
+ * non-zero value as enabling and zero as disabling.
+ */
+ if (optlen != sizeof (int)) {
+ return (EINVAL);
+ }
+ intval = (int *)optval;
+
+ /*
+ * socket_setsockopt asserts that the optval is aligned, so
+ * we use kmem_alloc to ensure this.
+ */
+ dfp = (char *)kmem_alloc(sizeof (DATAFILT), KM_SLEEP);
+ (void) strcpy(dfp, DATAFILT);
+
+ if (*intval > 0) {
+ error = socket_setsockopt(so, SOL_FILTER, FIL_ATTACH,
+ dfp, 9, CRED());
+ if (error == EEXIST) {
+ error = 0;
+ }
+ } else {
+ error = socket_setsockopt(so, SOL_FILTER, FIL_DETACH,
+ dfp, 9, CRED());
+ if (error == ENXIO) {
+ error = 0;
+ }
+ }
+ kmem_free(dfp, sizeof (DATAFILT));
+ return (error);
+ }
+
+ if (!lx_sockopt_lookup(sockopts_tbl, &optname, &optlen)) {
+ return (ENOPROTOOPT);
+ }
+
+ error = socket_setsockopt(so, IPPROTO_TCP, optname, optval, optlen,
+ CRED());
+ return (error);
+}
+
+static int
+lx_setsockopt_socket(sonode_t *so, int optname, void *optval, socklen_t optlen)
+{
+ int error;
+ lx_proto_opts_t sockopts_tbl = PROTO_SOCKOPTS(ltos_socket_sockopts);
+ struct lx_bpf_program *lbp;
+ int *intval;
+ struct bpf_program bp;
+
+ switch (optname) {
+ case LX_SO_BSDCOMPAT:
+ /* Linux ignores this option. */
+ return (0);
+
+ case LX_SO_TIMESTAMP:
+ /*
+ * SO_TIMESTAMP is not supported on AF_UNIX sockets but we have
+ * some of those which apps use for logging, etc., so pretend
+ * this worked.
+ */
+ if (so->so_family == AF_UNIX) {
+ return (0);
+ }
+ break;
+
+ case LX_SO_ATTACH_FILTER:
+ /*
+ * Convert bpf program struct
+ */
+ if (optlen != sizeof (struct lx_bpf_program)) {
+ return (EINVAL);
+ }
+ lbp = (struct lx_bpf_program *)optval;
+ bp.bf_len = lbp->bf_len;
+ bp.bf_insns = (struct bpf_insn *)lbp->bf_insns;
+ optval = &bp;
+ break;
+
+ case LX_SO_PASSSEC:
+ /*
+ * SO_PASSSEC is very similar to SO_PASSCRED (emulated by
+ * SO_RECVUCRED) in that it requests that cmsgs containing
+ * identity information be attached to recieved messages.
+ * Instead of ucred information, security-module-specific
+ * information such as selinux label is expected
+ *
+ * Since LX does not at all support selinux today, the
+ * option is silently accepted.
+ */
+ return (0);
+
+ case LX_SO_PASSCRED:
+ /*
+ * In many cases, the Linux SO_PASSCRED is mapped to the SunOS
+ * SO_RECVUCRED to enable the passing of peer credential
+ * information via received cmsgs. One exception is for
+ * connection-oriented AF_UNIX sockets which do not yet support
+ * that option. Instead, we track the setting internally and,
+ * when there is appropriate cmsg space, emulate the credential
+ * passing by querying the STREAMS ioctl.
+ */
+ if (so->so_family == AF_UNIX &&
+ (so->so_mode & SM_CONNREQUIRED) != 0) {
+ lx_socket_aux_data_t *sad;
+
+ if (optlen != sizeof (int)) {
+ return (EINVAL);
+ }
+ intval = (int *)optval;
+ sad = lx_sad_acquire(SOTOV(so));
+ sad->lxsad_stream_cred = !(*intval == 0);
+ mutex_exit(&sad->lxsad_lock);
+ return (0);
+ }
+ break;
+ }
+
+ if (!lx_sockopt_lookup(sockopts_tbl, &optname, &optlen)) {
+ return (ENOPROTOOPT);
+ }
+
+ error = socket_setsockopt(so, SOL_SOCKET, optname, optval, optlen,
+ CRED());
+ return (error);
+}
+
+static int
+lx_setsockopt_raw(sonode_t *so, int optname, void *optval, socklen_t optlen)
+{
+ int error;
+ lx_proto_opts_t sockopts_tbl = PROTO_SOCKOPTS(ltos_raw_sockopts);
+
+ switch (optname) {
+ case LX_ICMP_FILTER:
+ /*
+ * This option is currently ignored to appease ping.
+ */
+ return (0);
+
+ case LX_IPV6_CHECKSUM:
+ /*
+ * Ping6 tries to set the IPV6_CHECKSUM offset in a way that
+ * illumos won't allow. Quietly ignore this to prevent it from
+ * complaining.
+ */
+ return (0);
+
+ default:
+ break;
+ }
+
+ if (!lx_sockopt_lookup(sockopts_tbl, &optname, &optlen)) {
+ return (ENOPROTOOPT);
+ }
+
+ error = socket_setsockopt(so, IPPROTO_TCP, optname, optval, optlen,
+ CRED());
+ return (error);
+}
+
+static int
+lx_setsockopt_packet(sonode_t *so, int optname, void *optval, socklen_t optlen)
+{
+ int error;
+ lx_proto_opts_t sockopts_tbl = PROTO_SOCKOPTS(ltos_packet_sockopts);
+ struct packet_mreq *mr;
+
+ switch (optname) {
+ case LX_PACKET_ADD_MEMBERSHIP:
+ case LX_PACKET_DROP_MEMBERSHIP:
+ /* Convert Linux mr_type to illumos */
+ if (optlen != sizeof (struct packet_mreq)) {
+ return (EINVAL);
+ }
+ mr = (struct packet_mreq *)optval;
+ if (--mr->mr_type > PACKET_MR_ALLMULTI)
+ return (EINVAL);
+ optval = mr;
+ break;
+
+ default:
+ break;
+ }
+
+ if (!lx_sockopt_lookup(sockopts_tbl, &optname, &optlen)) {
+ return (ENOPROTOOPT);
+ }
+
+ error = socket_setsockopt(so, SOL_PACKET, optname, optval, optlen,
+ CRED());
+ return (error);
+}
+
+static int
+lx_setsockopt_igmp(sonode_t *so, int optname, void *optval, socklen_t optlen)
+{
+ int error;
+ lx_proto_opts_t sockopts_tbl = PROTO_SOCKOPTS(ltos_igmp_sockopts);
+
+ if (!lx_sockopt_lookup(sockopts_tbl, &optname, &optlen)) {
+ return (ENOPROTOOPT);
+ }
+
+ error = socket_setsockopt(so, IPPROTO_IGMP, optname, optval, optlen,
+ CRED());
+ return (error);
+}
+
+static int
+lx_getsockopt_ip(sonode_t *so, int optname, void *optval, socklen_t *optlen)
+{
+ int error = 0;
+ lx_proto_opts_t sockopts_tbl = PROTO_SOCKOPTS(ltos_ip_sockopts);
+
+ if (!lx_sockopt_lookup(sockopts_tbl, &optname, optlen)) {
+ return (ENOPROTOOPT);
+ }
+
+ error = socket_getsockopt(so, IPPROTO_IP, optname, optval, optlen, 0,
+ CRED());
+ return (error);
+}
+
+static int
+lx_getsockopt_ipv6(sonode_t *so, int optname, void *optval, socklen_t *optlen)
+{
+ int error = 0;
+ lx_proto_opts_t sockopts_tbl = PROTO_SOCKOPTS(ltos_ipv6_sockopts);
+
+ if (!lx_sockopt_lookup(sockopts_tbl, &optname, optlen)) {
+ return (ENOPROTOOPT);
+ }
+
+ error = socket_getsockopt(so, IPPROTO_IPV6, optname, optval, optlen, 0,
+ CRED());
+ return (error);
+}
+
+static int
+lx_getsockopt_icmpv6(sonode_t *so, int optname, void *optval,
+ socklen_t *optlen)
+{
+ int error = 0;
+ lx_proto_opts_t sockopts_tbl = PROTO_SOCKOPTS(ltos_icmpv6_sockopts);
+
+ if (optname == LX_ICMP6_FILTER) {
+ error = socket_getsockopt(so, IPPROTO_ICMPV6, ICMP6_FILTER,
+ optval, optlen, 0, CRED());
+
+ /*
+ * ICMP6_FILTER is inverted on Linux. Make it so before copying
+ * back to caller's buffer.
+ */
+ if (error == 0) {
+ LX_ICMP6_FILTER_INVERT((icmp6_filter_t *)optval);
+ }
+ return (error);
+ }
+
+ if (!lx_sockopt_lookup(sockopts_tbl, &optname, optlen)) {
+ return (ENOPROTOOPT);
+ }
+
+ error = socket_getsockopt(so, IPPROTO_ICMPV6, optname, optval, optlen,
+ 0, CRED());
+ return (error);
+}
+
+static int
+lx_getsockopt_tcp(sonode_t *so, int optname, void *optval, socklen_t *optlen)
+{
+ int error = 0;
+ int *intval = (int *)optval;
+ lx_proto_opts_t sockopts_tbl = PROTO_SOCKOPTS(ltos_tcp_sockopts);
+
+ switch (optname) {
+ case LX_TCP_CORK:
+ /*
+ * We do not support TCP_CORK but some apps rely on it. Rather
+ * than return an error we just return 0. This isn't exactly a
+ * lie, since this option really isn't set, but it's not the
+ * whole truth either. Fortunately, we aren't under oath.
+ */
+ if (*optlen < sizeof (int)) {
+ error = EINVAL;
+ } else {
+ *intval = 0;
+ }
+ *optlen = sizeof (int);
+ return (error);
+
+ case LX_TCP_DEFER_ACCEPT:
+ /*
+ * We do support TCP_DEFER_ACCEPT using the datafilt(7M) socket
+ * filter but we don't emulate the timeout aspect so treat the
+ * existence as 1 and absence as 0.
+ */
+ if (*optlen < sizeof (int)) {
+ error = EINVAL;
+ } else {
+ struct fil_info fi[10];
+ int i;
+ socklen_t len = sizeof (fi);
+
+ if ((error = socket_getsockopt(so, SOL_FILTER,
+ FIL_LIST, fi, &len, 0, CRED()) != 0)) {
+ *optlen = sizeof (int);
+ return (error);
+ }
+
+ *intval = 0;
+ len = len / sizeof (struct fil_info);
+ for (i = 0; i < len; i++) {
+ if (fi[i].fi_flags == FILF_PROG &&
+ strcmp(fi[i].fi_name, "datafilt") == 0) {
+ *intval = 1;
+ break;
+ }
+ }
+ }
+ *optlen = sizeof (int);
+ return (error);
+ default:
+ break;
+ }
+
+ if (!lx_sockopt_lookup(sockopts_tbl, &optname, optlen)) {
+ return (ENOPROTOOPT);
+ }
+
+ error = socket_getsockopt(so, IPPROTO_TCP, optname, optval, optlen, 0,
+ CRED());
+ return (error);
+}
+
+static int
+lx_getsockopt_socket(sonode_t *so, int optname, void *optval,
+ socklen_t *optlen)
+{
+ int error = 0;
+ int *intval = (int *)optval;
+ lx_proto_opts_t sockopts_tbl = PROTO_SOCKOPTS(ltos_socket_sockopts);
+
+ switch (optname) {
+ case LX_SO_PASSSEC:
+ /*
+ * Communicate value of 0 since selinux-related functionality
+ * is not supported.
+ */
+ if (*optlen < sizeof (int)) {
+ error = EINVAL;
+ } else {
+ *intval = 0;
+ }
+ *optlen = sizeof (int);
+ return (error);
+
+ case LX_SO_PASSCRED:
+ /*
+ * Special handling for connection-oriented AF_UNIX sockets.
+ * See lx_setsockopt_socket for more details.
+ */
+ if (so->so_family == AF_UNIX &&
+ (so->so_mode & SM_CONNREQUIRED) != 0) {
+ lx_socket_aux_data_t *sad;
+
+ if (*optlen < sizeof (int)) {
+ return (EINVAL);
+ }
+ sad = lx_sad_acquire(SOTOV(so));
+ *intval = sad->lxsad_stream_cred;
+ *optlen = sizeof (int);
+ mutex_exit(&sad->lxsad_lock);
+ return (0);
+ }
+ break;
+
+ case LX_SO_PEERCRED:
+ if (*optlen < sizeof (struct lx_ucred)) {
+ error = EINVAL;
+ } else {
+ struct lx_ucred *lcred = (struct lx_ucred *)optval;
+
+ mutex_enter(&so->so_lock);
+ if ((so->so_mode & SM_CONNREQUIRED) == 0) {
+ error = ENOTSUP;
+ } else if (so->so_peercred == NULL) {
+ error = EINVAL;
+ } else {
+ lcred->lxu_uid = crgetuid(so->so_peercred);
+ lcred->lxu_gid = crgetgid(so->so_peercred);
+ lcred->lxu_pid = so->so_cpid;
+ }
+ mutex_exit(&so->so_lock);
+ }
+ *optlen = sizeof (struct lx_ucred);
+ return (error);
+
+ default:
+ break;
+ }
+
+ if (!lx_sockopt_lookup(sockopts_tbl, &optname, optlen)) {
+ return (ENOPROTOOPT);
+ }
+
+ error = socket_getsockopt(so, SOL_SOCKET, optname, optval, optlen, 0,
+ CRED());
+
+ if (error == 0) {
+ switch (optname) {
+ case SO_TYPE:
+ /* translate our type back to Linux */
+ *intval = STOL_SOCKTYPE(*intval);
+ break;
+
+ case SO_ERROR:
+ *intval = lx_errno(*intval, EINVAL);
+ break;
+ default:
+ break;
+ }
+ }
+ return (error);
+}
+
+static int
+lx_getsockopt_raw(sonode_t *so, int optname, void *optval, socklen_t *optlen)
+{
+ int error = 0;
+ lx_proto_opts_t sockopts_tbl = PROTO_SOCKOPTS(ltos_raw_sockopts);
+
+ if (!lx_sockopt_lookup(sockopts_tbl, &optname, optlen)) {
+ return (ENOPROTOOPT);
+ }
+
+ error = socket_getsockopt(so, IPPROTO_RAW, optname, optval, optlen, 0,
+ CRED());
+ return (error);
+}
+
+static int
+lx_getsockopt_packet(sonode_t *so, int optname, void *optval,
+ socklen_t *optlen)
+{
+ int error = 0;
+ lx_proto_opts_t sockopts_tbl = PROTO_SOCKOPTS(ltos_packet_sockopts);
+
+ if (!lx_sockopt_lookup(sockopts_tbl, &optname, optlen)) {
+ return (ENOPROTOOPT);
+ }
+
+ error = socket_getsockopt(so, SOL_PACKET, optname, optval, optlen, 0,
+ CRED());
+ return (error);
+}
+
+static int
+lx_getsockopt_igmp(sonode_t *so, int optname, void *optval, socklen_t *optlen)
+{
+ int error = 0;
+ lx_proto_opts_t sockopts_tbl = PROTO_SOCKOPTS(ltos_igmp_sockopts);
+
+ if (!lx_sockopt_lookup(sockopts_tbl, &optname, optlen)) {
+ return (ENOPROTOOPT);
+ }
+
+ error = socket_getsockopt(so, IPPROTO_IGMP, optname, optval, optlen, 0,
+ CRED());
+ return (error);
+}
+
+long
+lx_setsockopt(int sock, int level, int optname, void *optval, socklen_t optlen)
+{
+ struct sonode *so;
+ file_t *fp;
+ int buflen = 0;
+ intptr_t stkbuf[2];
+ void *optbuf = stkbuf;
+ int error = 0;
+
+ if (optlen != 0) {
+ if (optlen > SO_MAXARGSIZE) {
+ return (set_errno(EINVAL));
+ }
+ if (optlen > sizeof (stkbuf)) {
+ buflen = optlen;
+ optbuf = kmem_alloc(optlen, KM_SLEEP);
+ } else {
+ /*
+ * Zero the on-stack buffer to avoid poisoning smaller
+ * optvals with stack garbage.
+ */
+ stkbuf[0] = 0;
+ stkbuf[1] = 0;
+ }
+ if (copyin(optval, optbuf, optlen) != 0) {
+ if (buflen != 0) {
+ kmem_free(optbuf, buflen);
+ }
+ return (set_errno(EFAULT));
+ }
+ } else {
+ optbuf = NULL;
+ }
+ if ((so = getsonode(sock, &error, &fp)) == NULL) {
+ if (buflen != 0) {
+ kmem_free(optbuf, buflen);
+ }
+ return (set_errno(error));
+ }
+
+ switch (level) {
+ case LX_IPPROTO_IP:
+ error = lx_setsockopt_ip(so, optname, optbuf, optlen);
+ break;
+ case LX_IPPROTO_IPV6:
+ error = lx_setsockopt_ipv6(so, optname, optbuf, optlen);
+ break;
+ case LX_IPPROTO_ICMPV6:
+ error = lx_setsockopt_icmpv6(so, optname, optbuf, optlen);
+ break;
+ case LX_IPPROTO_TCP:
+ error = lx_setsockopt_tcp(so, optname, optbuf, optlen);
+ break;
+ case LX_SOL_SOCKET:
+ error = lx_setsockopt_socket(so, optname, optbuf, optlen);
+ break;
+ case LX_IPPROTO_RAW:
+ error = lx_setsockopt_raw(so, optname, optbuf, optlen);
+ break;
+ case LX_SOL_PACKET:
+ error = lx_setsockopt_packet(so, optname, optbuf, optlen);
+ break;
+ case LX_IPPROTO_IGMP:
+ error = lx_setsockopt_igmp(so, optname, optbuf, optlen);
+ break;
+ case LX_SOL_NETLINK:
+ /*
+ * Since our netlink implmentation is modeled after Linux,
+ * sockopts can be passed directly through.
+ */
+ error = socket_setsockopt(so, LX_SOL_NETLINK, optname, optval,
+ optlen, CRED());
+ break;
+ default:
+ error = ENOPROTOOPT;
+ break;
+ }
+
+ if (error == ENOPROTOOPT) {
+ char buf[LX_UNSUP_BUFSZ];
+
+ snprintf(buf, LX_UNSUP_BUFSZ, "setsockopt(%d, %d)", level,
+ optname);
+ lx_unsupported(buf);
+ }
+ if (buflen != 0) {
+ kmem_free(optbuf, buflen);
+ }
+ releasef(sock);
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (0);
+}
+
+long
+lx_getsockopt(int sock, int level, int optname, void *optval,
+ socklen_t *optlenp)
+{
+ struct sonode *so;
+ file_t *fp;
+ int error = 0, buflen = 0;
+ socklen_t optlen;
+ intptr_t stkbuf[2];
+ void *optbuf = stkbuf;
+
+ if (copyin(optlenp, &optlen, sizeof (optlen)) != 0) {
+ return (set_errno(EFAULT));
+ }
+ if (optlen != 0) {
+ if (optlen > SO_MAXARGSIZE) {
+ return (set_errno(EINVAL));
+ }
+ if (optlen > sizeof (stkbuf)) {
+ buflen = optlen;
+ optbuf = kmem_zalloc(optlen, KM_SLEEP);
+ } else {
+ /* zero the on-stack buffer, just in case */
+ stkbuf[0] = 0;
+ stkbuf[1] = 0;
+ }
+ } else {
+ optbuf = NULL;
+ }
+ if ((so = getsonode(sock, &error, &fp)) == NULL) {
+ if (buflen != 0) {
+ kmem_free(optbuf, buflen);
+ }
+ return (set_errno(error));
+ }
+
+ switch (level) {
+ case LX_IPPROTO_IP:
+ error = lx_getsockopt_ip(so, optname, optbuf, &optlen);
+ break;
+ case LX_IPPROTO_IPV6:
+ error = lx_getsockopt_ipv6(so, optname, optbuf, &optlen);
+ break;
+ case LX_IPPROTO_ICMPV6:
+ error = lx_getsockopt_icmpv6(so, optname, optbuf, &optlen);
+ break;
+ case LX_IPPROTO_TCP:
+ error = lx_getsockopt_tcp(so, optname, optbuf, &optlen);
+ break;
+ case LX_SOL_SOCKET:
+ error = lx_getsockopt_socket(so, optname, optbuf, &optlen);
+ break;
+ case LX_IPPROTO_RAW:
+ error = lx_getsockopt_raw(so, optname, optbuf, &optlen);
+ break;
+ case LX_SOL_PACKET:
+ error = lx_getsockopt_packet(so, optname, optbuf, &optlen);
+ break;
+ case LX_IPPROTO_IGMP:
+ error = lx_getsockopt_igmp(so, optname, optbuf, &optlen);
+ break;
+ case LX_SOL_NETLINK:
+ /*
+ * Since our netlink implmentation is modeled after Linux,
+ * sockopts can be passed directly through.
+ */
+ error = socket_getsockopt(so, LX_SOL_NETLINK, optname, optval,
+ &optlen, 0, CRED());
+ break;
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+
+ if (error == ENOPROTOOPT) {
+ char buf[LX_UNSUP_BUFSZ];
+
+ snprintf(buf, LX_UNSUP_BUFSZ, "getsockopt(%d, %d)", level,
+ optname);
+ lx_unsupported(buf);
+ }
+ if (copyout(&optlen, optlenp, sizeof (optlen)) != 0) {
+ error = EFAULT;
+ }
+ if (error == 0 && optlen > 0) {
+ VERIFY(optlen <= sizeof (stkbuf) || optlen <= buflen);
+ if (copyout(optbuf, optval, optlen) != 0) {
+ error = EFAULT;
+ }
+ }
+ if (buflen != 0) {
+ kmem_free(optbuf, buflen);
+ }
+ releasef(sock);
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (0);
+}
+
+long
+lx_getname_common(lx_getname_type_t type, int sockfd, void *np, int *nlp)
+{
+ struct sockaddr_storage buf;
+ struct sockaddr *name = (struct sockaddr *)&buf;
+ socklen_t namelen, namelen_orig;
+ int err, tmp;
+ struct sonode *so;
+
+ /* We need to validate the name address up front to pass LTP. */
+ if (copyin(np, &tmp, sizeof (tmp)) != 0)
+ return (set_errno(EFAULT));
+
+ if (copyin(nlp, &namelen, sizeof (socklen_t)) != 0)
+ return (set_errno(EFAULT));
+ namelen_orig = namelen;
+
+ /* LTP can pass -1 */
+ if ((int)namelen < 0)
+ return (set_errno(EINVAL));
+
+ if ((so = getsonode(sockfd, &err, NULL)) == NULL)
+ return (set_errno(err));
+
+ bzero(&buf, sizeof (buf));
+ namelen = sizeof (struct sockaddr_storage);
+ if (type == LX_GETPEERNAME) {
+ err = socket_getpeername(so, name, &namelen, B_FALSE, CRED());
+ } else {
+ err = socket_getsockname(so, name, &namelen, CRED());
+ }
+
+ if (err == 0) {
+ ASSERT(namelen <= so->so_max_addr_len);
+ err = stol_sockaddr_copyout(name, namelen,
+ (struct sockaddr *)np, (socklen_t *)nlp, namelen_orig);
+ }
+
+ releasef(sockfd);
+ return (err != 0 ? set_errno(err) : 0);
+}
+
+long
+lx_getpeername(int sockfd, void *np, int *nlp)
+{
+ return (lx_getname_common(LX_GETPEERNAME, sockfd, np, nlp));
+}
+
+long
+lx_getsockname(int sockfd, void *np, int *nlp)
+{
+ return (lx_getname_common(LX_GETSOCKNAME, sockfd, np, nlp));
+}
+
+static int
+lx_accept_common(int sock, struct sockaddr *name, socklen_t *nlp, int flags)
+{
+ struct sonode *so;
+ file_t *fp;
+ int error;
+ socklen_t namelen;
+ struct sonode *nso;
+ struct vnode *nvp;
+ struct file *nfp;
+ int nfd;
+ int arg;
+
+ if (flags & ~(LX_SOCK_CLOEXEC | LX_SOCK_NONBLOCK)) {
+ return (set_errno(EINVAL));
+ }
+
+ if ((so = getsonode(sock, &error, &fp)) == NULL)
+ return (set_errno(error));
+
+ if (name != NULL) {
+ /*
+ * The Linux man page says that -1 is returned and errno is set
+ * to EFAULT if the "name" address is bad, but it is silent on
+ * what to set errno to if the "namelen" address is bad.
+ * LTP expects EINVAL.
+ *
+ * Note that we must first check the name pointer, as the Linux
+ * docs state nothing is copied out if the "name" pointer is
+ * NULL. If it is NULL, we don't care about the namelen
+ * pointer's value or about dereferencing it.
+ */
+ if (copyin(nlp, &namelen, sizeof (namelen))) {
+ releasef(sock);
+ return (set_errno(EINVAL));
+ }
+ if (namelen == 0) {
+ name = NULL;
+ }
+ } else {
+ namelen = 0;
+ }
+
+ /*
+ * Allocate the user fd before socket_accept() in order to
+ * catch EMFILE errors before calling socket_accept().
+ */
+ if ((error = falloc(NULL, FWRITE|FREAD, &nfp, &nfd)) != 0) {
+ eprintsoline(so, EMFILE);
+ releasef(sock);
+ return (set_errno(error));
+ }
+ if ((error = socket_accept(so, fp->f_flag, CRED(), &nso)) != 0) {
+ setf(nfd, NULL);
+ unfalloc(nfp);
+ releasef(sock);
+ return (set_errno(error));
+ }
+
+ nvp = SOTOV(nso);
+
+ if (namelen != 0) {
+ socklen_t addrlen = sizeof (struct sockaddr_storage);
+ struct sockaddr_storage buf;
+ struct sockaddr *addrp = (struct sockaddr *)&buf;
+
+ if ((error = socket_getpeername(nso, addrp, &addrlen, B_TRUE,
+ CRED())) == 0) {
+ error = stol_sockaddr_copyout(addrp, addrlen,
+ name, nlp, namelen);
+ /*
+ * Logic might dictate that we should check if we can
+ * write to the namelen pointer earlier so we don't
+ * accept a pending connection only to fail the call
+ * because we can't write the namelen value back out.
+ * However, testing shows Linux does indeed fail the
+ * call after accepting the connection so we must
+ * behave in a compatible manner.
+ */
+ } else {
+ ASSERT(error == EINVAL || error == ENOTCONN);
+ error = ECONNABORTED;
+ }
+ }
+
+ if (error != 0) {
+ setf(nfd, NULL);
+ unfalloc(nfp);
+ (void) socket_close(nso, 0, CRED());
+ socket_destroy(nso);
+ releasef(sock);
+ return (set_errno(error));
+ }
+
+ /* Fill in the entries that falloc reserved */
+ nfp->f_vnode = nvp;
+ mutex_exit(&nfp->f_tlock);
+ setf(nfd, nfp);
+
+ /* Act on LX_SOCK_CLOEXEC from flags */
+ if (flags & LX_SOCK_CLOEXEC) {
+ f_setfd(nfd, FD_CLOEXEC);
+ }
+
+ /*
+ * In Linux, accept()ed sockets do not inherit anything set by fcntl(),
+ * so either explicitly set the flags or filter those out.
+ *
+ * The VOP_SETFL code is a simplification of the F_SETFL code in
+ * fcntl(). Ignore any errors from VOP_SETFL.
+ */
+ arg = 0;
+ if (flags & LX_SOCK_NONBLOCK)
+ arg |= FNONBLOCK;
+
+ error = VOP_SETFL(nvp, nfp->f_flag, arg, nfp->f_cred, NULL);
+ if (error != 0) {
+ eprintsoline(so, error);
+ error = 0;
+ } else {
+ mutex_enter(&nfp->f_tlock);
+ nfp->f_flag &= ~FMASK | (FREAD|FWRITE);
+ nfp->f_flag |= arg;
+ mutex_exit(&nfp->f_tlock);
+ }
+
+ releasef(sock);
+ return (nfd);
+}
+
+long
+lx_accept(int sockfd, void *np, int *nlp)
+{
+ return (lx_accept_common(sockfd, (struct sockaddr *)np,
+ (socklen_t *)nlp, 0));
+}
+
+long
+lx_accept4(int sockfd, void *np, int *nlp, int flags)
+{
+ return (lx_accept_common(sockfd, (struct sockaddr *)np,
+ (socklen_t *)nlp, flags));
+}
+
+#if defined(_SYSCALL32_IMPL)
+
+#define LX_SYS_SOCKETCALL 102
+#define LX_SOCKETCALL_MAX 20
+
+typedef long (*lx_sockfn_t)();
+
+static struct {
+ lx_sockfn_t s_fn; /* Function implementing the subcommand */
+ int s_nargs; /* Number of arguments the function takes */
+} lx_socketcall_fns[] = {
+ lx_socket, 3, /* socket */
+ lx_bind, 3, /* bind */
+ lx_connect, 3, /* connect */
+ NULL, 2, /* listen */
+ lx_accept, 3, /* accept */
+ lx_getsockname, 3, /* getsockname */
+ lx_getpeername, 3, /* getpeername */
+ NULL, 4, /* socketpair */
+ lx_send, 4, /* send */
+ lx_recv, 4, /* recv */
+ lx_sendto, 6, /* sendto */
+ lx_recvfrom, 6, /* recvfrom */
+ NULL, 2, /* shutdown */
+ lx_setsockopt, 5, /* setsockopt */
+ lx_getsockopt, 5, /* getsockopt */
+ lx_sendmsg, 3, /* sendmsg */
+ lx_recvmsg, 3, /* recvmsg */
+ lx_accept4, 4, /* accept4 */
+ NULL, 5, /* recvmmsg */
+ NULL, 4 /* sendmmsg */
+};
+
+long
+lx_socketcall(long p1, uint32_t *p2)
+{
+ int subcmd, i;
+ unsigned long args[6] = { 0, 0, 0, 0, 0, 0 };
+ lx_lwp_data_t *lwpd = ttolxlwp(curthread);
+
+ /* incoming subcmds are 1-indexed */
+ subcmd = (int)p1 - 1;
+
+ if (subcmd < 0 || subcmd >= LX_SOCKETCALL_MAX) {
+ return (-EINVAL);
+ }
+
+ /* Vector back out to userland emulation if we lack IKE */
+ if (lx_socketcall_fns[subcmd].s_fn == NULL) {
+ uintptr_t uargs[2] = {p1, (uintptr_t)p2};
+ /* The userspace emulation will handle the syscall return */
+ lwpd->br_eosys = JUSTRETURN;
+ lx_emulate_user32(ttolwp(curthread), LX_SYS_SOCKETCALL, uargs);
+ return (0);
+ }
+
+ /*
+ * Copy the arguments to the subcommand in from the app's address
+ * space, returning EFAULT if we get a bogus pointer.
+ */
+ for (i = 0; i < lx_socketcall_fns[subcmd].s_nargs; i++) {
+ uint32_t arg;
+
+ if (copyin(&p2[i], &arg, sizeof (uint32_t)) != 0) {
+ return (set_errno(EFAULT));
+ }
+ args[i] = (unsigned long)arg;
+ }
+
+ return ((lx_socketcall_fns[subcmd].s_fn)(args[0], args[1], args[2],
+ args[3], args[4], args[5]));
+}
+
+#endif /* defined(_SYSCALL32_IMPL) */
+
+static void
+lx_socket_vsd_free(void *data)
+{
+ lx_socket_aux_data_t *entry;
+
+ entry = (lx_socket_aux_data_t *)data;
+ mutex_destroy(&entry->lxsad_lock);
+ kmem_free(entry, sizeof (*entry));
+}
+
+void
+lx_socket_init()
+{
+ vsd_create(&lx_socket_vsd, lx_socket_vsd_free);
+}
+
+void
+lx_socket_fini()
+{
+ vsd_destroy(&lx_socket_vsd);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_stat.c b/usr/src/uts/common/brand/lx/syscall/lx_stat.c
new file mode 100644
index 0000000000..2ec8a4542d
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_stat.c
@@ -0,0 +1,439 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/fcntl.h>
+#include <sys/debug.h>
+#include <sys/errno.h>
+#include <sys/model.h>
+#include <sys/mode.h>
+#include <sys/stat.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_fcntl.h>
+#include <sys/lx_types.h>
+#include <sys/lx_impl.h>
+#include <sys/brand.h>
+#include <sys/ddi.h>
+
+/* From "uts/common/syscall/stat.c" */
+extern int cstatat_getvp(int, char *, int, vnode_t **, cred_t **);
+
+typedef struct lx_timespec32 {
+ int32_t ts_sec;
+ int32_t ts_nsec;
+} lx_timespec32_t;
+
+typedef struct lx_timespec64 {
+ int64_t ts_sec;
+ int64_t ts_nsec;
+}lx_timespec64_t;
+
+struct lx_stat32 {
+ uint16_t st_dev;
+ uint16_t st_pad1;
+ uint32_t st_ino;
+ uint16_t st_mode;
+ uint16_t st_nlink;
+ uint16_t st_uid;
+ uint16_t st_gid;
+ uint16_t st_rdev;
+ uint16_t st_pad2;
+ uint32_t st_size;
+ uint32_t st_blksize;
+ uint32_t st_blocks;
+ lx_timespec32_t st_atime;
+ lx_timespec32_t st_mtime;
+ lx_timespec32_t st_ctime;
+ uint32_t st_pad3;
+ uint32_t st_pad4;
+};
+
+#pragma pack(4)
+struct lx_stat64_32 {
+ uint64_t st_dev;
+ uint32_t st_pad1;
+ uint32_t st_small_ino;
+ uint32_t st_mode;
+ uint32_t st_nlink;
+ uint32_t st_uid;
+ uint32_t st_gid;
+ uint64_t st_rdev;
+ uint32_t st_pad2;
+ uint64_t st_size;
+ uint32_t st_blksize;
+ uint64_t st_blocks;
+ lx_timespec32_t st_atime;
+ lx_timespec32_t st_mtime;
+ lx_timespec32_t st_ctime;
+ uint64_t st_ino;
+};
+#pragma pack()
+
+#if defined(_LP64)
+struct lx_stat64_64 {
+ uint64_t st_dev;
+ uint64_t st_ino;
+ uint64_t st_nlink; /* yes, the order really is */
+ uint32_t st_mode; /* different for these two */
+ uint32_t st_uid;
+ uint32_t st_gid;
+ uint32_t st_pad0;
+ uint64_t st_rdev;
+ int64_t st_size;
+ int64_t st_blksize;
+ int64_t st_blocks;
+ lx_timespec64_t st_atime;
+ lx_timespec64_t st_mtime;
+ lx_timespec64_t st_ctime;
+ int64_t st_unused[3];
+};
+#endif /* defined(_LP64) */
+
+typedef enum lx_stat_fmt {
+ LXF_STAT32,
+ LXF_STAT64_32,
+ LXF_STAT64_64
+} lx_stat_fmt_t;
+
+static void
+lx_stat_xlate_dev(vattr_t *vattr)
+{
+ lx_zone_data_t *lxzd = ztolxzd(curproc->p_zone);
+ dev_t dev = vattr->va_fsid;
+ lx_virt_disk_t *vd;
+
+ /* Substitute emulated major/minor on mounted datasets */
+ vd = list_head(lxzd->lxzd_vdisks);
+ while (vd != NULL) {
+ if (vd->lxvd_real_dev == dev) {
+ dev = vd->lxvd_emul_dev;
+ break;
+ }
+ vd = list_next(lxzd->lxzd_vdisks, vd);
+ }
+
+ /* Mangle st_dev into expected format */
+ vattr->va_fsid = LX_MAKEDEVICE(getmajor(dev), getminor(dev));
+}
+
+static long
+lx_stat_common(vnode_t *vp, cred_t *cr, void *outp, lx_stat_fmt_t fmt)
+{
+ vattr_t vattr;
+ mode_t mode;
+ int error;
+
+ vattr.va_mask = AT_STAT | AT_NBLOCKS | AT_BLKSIZE | AT_SIZE;
+ if ((error = VOP_GETATTR(vp, &vattr, 0, cr, NULL)) != 0) {
+ return (error);
+ }
+
+ mode = VTTOIF(vattr.va_type) | vattr.va_mode;
+ if ((mode & S_IFMT) == S_IFBLK) {
+ /* Linux seems to report a 0 st_size for all block devices */
+ vattr.va_size = 0;
+ }
+ if (vattr.va_rdev == NODEV) {
+ /* Linux leaves st_rdev zeroed when it is absent */
+ vattr.va_rdev = 0;
+ }
+
+ lx_stat_xlate_dev(&vattr);
+
+ if (fmt == LXF_STAT32) {
+ struct lx_stat32 sb;
+
+ if (vattr.va_fsid > USHRT_MAX || vattr.va_rdev > USHRT_MAX ||
+ vattr.va_nlink > USHRT_MAX || vattr.va_size > INT_MAX) {
+ return (EOVERFLOW);
+ }
+
+ bzero(&sb, sizeof (sb));
+ sb.st_dev = vattr.va_fsid;
+ sb.st_ino = vattr.va_nodeid;
+ sb.st_mode = mode;
+ sb.st_nlink = vattr.va_nlink;
+ sb.st_uid = LX_UID32_TO_UID16(vattr.va_uid);
+ sb.st_gid = LX_GID32_TO_GID16(vattr.va_gid);
+ sb.st_rdev = vattr.va_rdev;
+ sb.st_size = vattr.va_size;
+ sb.st_blksize = vattr.va_blksize;
+ sb.st_blocks = vattr.va_nblocks;
+ sb.st_atime.ts_sec = vattr.va_atime.tv_sec;
+ sb.st_atime.ts_nsec = vattr.va_atime.tv_nsec;
+ sb.st_mtime.ts_sec = vattr.va_mtime.tv_sec;
+ sb.st_mtime.ts_nsec = vattr.va_mtime.tv_nsec;
+ sb.st_ctime.ts_sec = vattr.va_ctime.tv_sec;
+ sb.st_ctime.ts_nsec = vattr.va_ctime.tv_nsec;
+ if (copyout(&sb, outp, sizeof (sb)) != 0) {
+ return (EFAULT);
+ }
+ return (0);
+ } else if (fmt == LXF_STAT64_32) {
+ struct lx_stat64_32 sb;
+
+ bzero(&sb, sizeof (sb));
+ sb.st_dev = vattr.va_fsid;
+ sb.st_ino = vattr.va_nodeid;
+ sb.st_small_ino = (vattr.va_nodeid & UINT_MAX);
+ sb.st_mode = mode;
+ sb.st_nlink = vattr.va_nlink;
+ sb.st_uid = vattr.va_uid;
+ sb.st_gid = vattr.va_gid;
+ sb.st_rdev = vattr.va_rdev;
+ sb.st_size = vattr.va_size;
+ sb.st_blksize = vattr.va_blksize;
+ sb.st_blocks = vattr.va_nblocks;
+ sb.st_atime.ts_sec = vattr.va_atime.tv_sec;
+ sb.st_atime.ts_nsec = vattr.va_atime.tv_nsec;
+ sb.st_mtime.ts_sec = vattr.va_mtime.tv_sec;
+ sb.st_mtime.ts_nsec = vattr.va_mtime.tv_nsec;
+ sb.st_ctime.ts_sec = vattr.va_ctime.tv_sec;
+ sb.st_ctime.ts_nsec = vattr.va_ctime.tv_nsec;
+ if (copyout(&sb, outp, sizeof (sb)) != 0) {
+ return (EFAULT);
+ }
+ return (0);
+ } else if (fmt == LXF_STAT64_64) {
+#if defined(_LP64)
+ struct lx_stat64_64 sb;
+
+ bzero(&sb, sizeof (sb));
+ sb.st_dev = vattr.va_fsid;
+ sb.st_ino = vattr.va_nodeid;
+ sb.st_mode = mode;
+ sb.st_nlink = vattr.va_nlink;
+ sb.st_uid = vattr.va_uid;
+ sb.st_gid = vattr.va_gid;
+ sb.st_rdev = vattr.va_rdev;
+ sb.st_size = vattr.va_size;
+ sb.st_blksize = vattr.va_blksize;
+ sb.st_blocks = vattr.va_nblocks;
+ sb.st_atime.ts_sec = vattr.va_atime.tv_sec;
+ sb.st_atime.ts_nsec = vattr.va_atime.tv_nsec;
+ sb.st_mtime.ts_sec = vattr.va_mtime.tv_sec;
+ sb.st_mtime.ts_nsec = vattr.va_mtime.tv_nsec;
+ sb.st_ctime.ts_sec = vattr.va_ctime.tv_sec;
+ sb.st_ctime.ts_nsec = vattr.va_ctime.tv_nsec;
+ if (copyout(&sb, outp, sizeof (sb)) != 0) {
+ return (EFAULT);
+ }
+ return (0);
+#else
+ /* Invalid output format on 32-bit */
+ VERIFY(0);
+#endif
+ }
+
+ /* Invalid output format */
+ VERIFY(0);
+ return (0);
+}
+
+long
+lx_stat32(char *name, void *outp)
+{
+ vnode_t *vp = NULL;
+ cred_t *cr = NULL;
+ int error;
+
+ if ((error = cstatat_getvp(AT_FDCWD, name, FOLLOW, &vp, &cr)) != 0) {
+ return (set_errno(error));
+ }
+ error = lx_stat_common(vp, cr, outp, LXF_STAT32);
+ VN_RELE(vp);
+ crfree(cr);
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (0);
+}
+
+long
+lx_fstat32(int fd, void *outp)
+{
+ file_t *fp;
+ int error;
+
+ if ((fp = getf(fd)) == NULL) {
+ return (set_errno(EBADF));
+ }
+ error = lx_stat_common(fp->f_vnode, fp->f_cred, outp, LXF_STAT32);
+ releasef(fd);
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (0);
+}
+
+long
+lx_lstat32(char *name, void *outp)
+{
+ vnode_t *vp = NULL;
+ cred_t *cr = NULL;
+ int error;
+
+ if ((error = cstatat_getvp(AT_FDCWD, name, NO_FOLLOW, &vp, &cr)) != 0) {
+ return (set_errno(error));
+ }
+ error = lx_stat_common(vp, cr, outp, LXF_STAT32);
+ VN_RELE(vp);
+ crfree(cr);
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (0);
+}
+
+long
+lx_stat64(char *name, void *outp)
+{
+ vnode_t *vp = NULL;
+ cred_t *cr = NULL;
+ model_t model = get_udatamodel();
+ int error;
+
+ if ((error = cstatat_getvp(AT_FDCWD, name, FOLLOW, &vp, &cr)) != 0) {
+ return (set_errno(error));
+ }
+ error = lx_stat_common(vp, cr, outp,
+ (model == DATAMODEL_LP64) ? LXF_STAT64_64 : LXF_STAT64_32);
+ VN_RELE(vp);
+ crfree(cr);
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (0);
+}
+
+long
+lx_fstat64(int fd, void *outp)
+{
+ file_t *fp;
+ model_t model = get_udatamodel();
+ int error;
+
+ if ((fp = getf(fd)) == NULL) {
+ return (set_errno(EBADF));
+ }
+ error = lx_stat_common(fp->f_vnode, fp->f_cred, outp,
+ (model == DATAMODEL_LP64) ? LXF_STAT64_64 : LXF_STAT64_32);
+ releasef(fd);
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (0);
+}
+
+#define LX_FSTATAT_ALLOWED (LX_AT_SYMLINK_NOFOLLOW | LX_AT_EMPTY_PATH | \
+ LX_AT_NO_AUTOMOUNT)
+
+long
+lx_fstatat64(int fd, char *name, void *outp, int flag)
+{
+ vnode_t *vp = NULL;
+ cred_t *cr = NULL;
+ model_t model = get_udatamodel();
+ enum symfollow follow = FOLLOW;
+ int error;
+ char c;
+
+ if (fd == LX_AT_FDCWD) {
+ fd = AT_FDCWD;
+ }
+ if ((flag & ~LX_FSTATAT_ALLOWED) != 0) {
+ return (set_errno(EINVAL));
+ }
+ if ((flag & LX_AT_NO_AUTOMOUNT) != 0) {
+ /*
+ * While AT_NO_AUTOMOUNT is a legal flag for fstatat64, it is
+ * not yet supported by lx_autofs.
+ */
+ lx_unsupported("fstatat(AT_NO_AUTOMOUNT)");
+ return (set_errno(EINVAL));
+ }
+ if ((flag & LX_AT_SYMLINK_NOFOLLOW) != 0) {
+ follow = NO_FOLLOW;
+ }
+
+ if (copyin(name, &c, sizeof (c)) != 0) {
+ return (set_errno(EFAULT));
+ }
+ if (c == '\0') {
+ if ((flag & LX_AT_EMPTY_PATH) == 0) {
+ return (set_errno(ENOENT));
+ }
+
+ /*
+ * When AT_EMPTY_PATH is set and and empty string has been
+ * passed for the name parameter, direct the lookup against the
+ * vnode for that fd.
+ */
+ if (fd == AT_FDCWD) {
+ vp = PTOU(curproc)->u_cdir;
+ VN_HOLD(vp);
+ cr = CRED();
+ crhold(cr);
+ } else {
+ file_t *fp;
+
+ if ((fp = getf(fd)) == NULL) {
+ return (set_errno(EBADF));
+ }
+ vp = fp->f_vnode;
+ VN_HOLD(vp);
+ cr = fp->f_cred;
+ crhold(cr);
+ releasef(fd);
+ }
+ } else {
+ if ((error = cstatat_getvp(fd, name, follow, &vp, &cr)) != 0) {
+ return (set_errno(error));
+ }
+ }
+
+ error = lx_stat_common(vp, cr, outp,
+ (model == DATAMODEL_LP64) ? LXF_STAT64_64 : LXF_STAT64_32);
+ VN_RELE(vp);
+ crfree(cr);
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (0);
+}
+
+long
+lx_lstat64(char *name, void *outp)
+{
+ vnode_t *vp = NULL;
+ cred_t *cr = NULL;
+ model_t model = get_udatamodel();
+ int error;
+
+ if ((error = cstatat_getvp(AT_FDCWD, name, NO_FOLLOW, &vp, &cr)) != 0) {
+ return (set_errno(error));
+ }
+ error = lx_stat_common(vp, cr, outp,
+ (model == DATAMODEL_LP64) ? LXF_STAT64_64 : LXF_STAT64_32);
+ VN_RELE(vp);
+ crfree(cr);
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_sync.c b/usr/src/uts/common/brand/lx/syscall/lx_sync.c
new file mode 100644
index 0000000000..614afca0b0
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_sync.c
@@ -0,0 +1,86 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/file.h>
+#include <sys/vfs.h>
+#include <sys/vnode.h>
+#include <sys/lx_impl.h>
+#include <sys/lx_brand.h>
+
+long
+lx_syncfs(int fd)
+{
+ file_t *fp;
+ vfs_t *vfsp;
+
+ if ((fp = getf(fd)) == NULL)
+ return (set_errno(EBADF));
+
+ vfsp = fp->f_vnode->v_vfsp;
+ releasef(fd);
+
+ (void) (vfsp->vfs_op->vfs_sync)(vfsp, 0, CRED());
+
+ return (0);
+}
+
+#define LX_SYNC_FILE_RANGE_WAIT_BEFORE 0x1
+#define LX_SYNC_FILE_RANGE_WRITE 0x2
+#define LX_SYNC_FILE_RANGE_WAIT_AFTER 0x4
+
+#define LX_SYNC_FILE_RANGE_VALID (LX_SYNC_FILE_RANGE_WAIT_BEFORE | \
+ LX_SYNC_FILE_RANGE_WRITE | LX_SYNC_FILE_RANGE_WAIT_AFTER)
+
+
+long
+lx_sync_file_range(int fd, off_t offset, off_t nbytes, int flags)
+{
+ file_t *fp;
+ int error, sflags = 0;
+
+ if ((flags & ~LX_SYNC_FILE_RANGE_VALID) != 0)
+ return (set_errno(EINVAL));
+ if (offset < 0 || nbytes < 0)
+ return (set_errno(EINVAL));
+
+ if ((fp = getf(fd)) == NULL)
+ return (set_errno(EBADF));
+
+ /*
+ * Since sync_file_range is implemented in terms of VOP_PUTPAGE, both
+ * SYNC_FILE_RANGE_WAIT flags are treated as forcing synchronous
+ * operation. While this differs from the Linux behavior where
+ * BEFORE/AFTER are distinct, it achieves an adequate level of safety
+ * since the requested data is synced out at the end of the call.
+ */
+ if ((flags & (LX_SYNC_FILE_RANGE_WAIT_BEFORE |
+ LX_SYNC_FILE_RANGE_WAIT_AFTER)) == 0) {
+ sflags |= B_ASYNC;
+ }
+
+ error = VOP_PUTPAGE(fp->f_vnode, offset, nbytes, sflags, CRED(), NULL);
+ if (error == ENOSYS) {
+ error = ESPIPE;
+ }
+
+ releasef(fd);
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_sysinfo.c b/usr/src/uts/common/brand/lx/syscall/lx_sysinfo.c
new file mode 100644
index 0000000000..449d5882d4
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_sysinfo.c
@@ -0,0 +1,218 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <vm/anon.h>
+#include <sys/systm.h>
+#include <sys/sysmacros.h>
+#include <sys/zone.h>
+#include <sys/time.h>
+
+typedef struct lx_sysinfo {
+ int64_t si_uptime; /* Seconds since boot */
+ uint64_t si_loads[3]; /* 1, 5, and 15 minute avg runq length */
+ uint64_t si_totalram; /* Total memory size */
+ uint64_t si_freeram; /* Available memory */
+ uint64_t si_sharedram; /* Shared memory */
+ uint64_t si_bufferram; /* Buffer memory */
+ uint64_t si_totalswap; /* Total swap space */
+ uint64_t si_freeswap; /* Avail swap space */
+ uint16_t si_procs; /* Process count */
+ uint16_t si_pad; /* Padding */
+ uint64_t si_totalhigh; /* High memory size */
+ uint64_t si_freehigh; /* Avail high memory */
+ uint32_t si_mem_unit; /* Unit size of memory fields */
+} lx_sysinfo_t;
+
+#if defined(_SYSCALL32_IMPL)
+/*
+ * 64-bit kernel view of the 32-bit usermode struct.
+ */
+#pragma pack(4)
+typedef struct lx_sysinfo32 {
+ int32_t si_uptime; /* Seconds since boot */
+ uint32_t si_loads[3]; /* 1, 5, and 15 minute avg runq length */
+ uint32_t si_totalram; /* Total memory size */
+ uint32_t si_freeram; /* Available memory */
+ uint32_t si_sharedram; /* Shared memory */
+ uint32_t si_bufferram; /* Buffer memory */
+ uint32_t si_totalswap; /* Total swap space */
+ uint32_t si_freeswap; /* Avail swap space */
+ uint16_t si_procs; /* Process count */
+ uint16_t si_pad; /* Padding */
+ uint32_t si_totalhigh; /* High memory size */
+ uint32_t si_freehigh; /* Avail high memory */
+ uint32_t si_mem_unit; /* Unit size of memory fields */
+ char __si_pad[8];
+} lx_sysinfo32_t;
+#pragma pack()
+#endif
+
+extern pgcnt_t swapfs_minfree;
+
+static void
+lx_sysinfo_common(lx_sysinfo_t *si)
+{
+ zone_t *zone = curthread->t_procp->p_zone;
+ uint64_t zphysmem, zfreemem, ztotswap, zfreeswap;
+
+ si->si_uptime = gethrestime_sec() - zone->zone_boot_time;
+
+ si->si_loads[0] = zone->zone_hp_avenrun[0];
+ si->si_loads[1] = zone->zone_hp_avenrun[1];
+ si->si_loads[2] = zone->zone_hp_avenrun[2];
+
+ /*
+ * In linux each thread looks like a process, so we conflate the
+ * two in this stat as well.
+ */
+ si->si_procs = (int32_t)zone->zone_nlwps;
+
+ /*
+ * If memory or swap limits are set on the zone, use those, otherwise
+ * use the system values. physmem and freemem are in pages, but the
+ * zone values are in bytes. Likewise, ani_max and ani_free are in
+ * pages.
+ */
+ if (zone->zone_phys_mem_ctl == UINT64_MAX) {
+ zphysmem = physmem;
+ zfreemem = freemem;
+ } else {
+ zphysmem = btop(zone->zone_phys_mem_ctl);
+ zfreemem = btop(zone->zone_phys_mem_ctl - zone->zone_phys_mem);
+ }
+
+ if (zone->zone_max_swap_ctl == UINT64_MAX) {
+ ztotswap = k_anoninfo.ani_max;
+ zfreeswap = k_anoninfo.ani_free;
+ } else {
+ /*
+ * See the comment in swapctl for a description of how free is
+ * calculated within a zone.
+ */
+ rctl_qty_t used;
+ spgcnt_t avail;
+ uint64_t max;
+
+ avail = MAX((spgcnt_t)(availrmem - swapfs_minfree), 0);
+ max = k_anoninfo.ani_max + k_anoninfo.ani_mem_resv + avail;
+
+ mutex_enter(&zone->zone_mem_lock);
+ ztotswap = btop(zone->zone_max_swap_ctl);
+ used = btop(zone->zone_max_swap);
+ mutex_exit(&zone->zone_mem_lock);
+
+ zfreeswap = MIN(ztotswap, max) - used;
+ }
+
+ /*
+ * If the maximum memory stat is less than 1^20 pages (i.e. 4GB),
+ * then we report the result in bytes. Otherwise we use pages.
+ * Once we start supporting >1TB systems/zones, we'll need a third
+ * option.
+ */
+ if (MAX(zphysmem, ztotswap) < 1024 * 1024) {
+ si->si_totalram = ptob(zphysmem);
+ si->si_freeram = ptob(zfreemem);
+ si->si_totalswap = ptob(ztotswap);
+ si->si_freeswap = ptob(zfreeswap);
+ si->si_mem_unit = 1;
+ } else {
+ si->si_totalram = zphysmem;
+ si->si_freeram = zfreemem;
+ si->si_totalswap = ztotswap;
+ si->si_freeswap = zfreeswap;
+ si->si_mem_unit = PAGESIZE;
+ }
+ si->si_bufferram = 0;
+ si->si_sharedram = 0;
+
+ /*
+ * These two stats refer to high physical memory. If an
+ * application running in a Linux zone cares about this, then
+ * either it or we are broken.
+ */
+ si->si_totalhigh = 0;
+ si->si_freehigh = 0;
+}
+
+long
+lx_sysinfo64(caddr_t sip)
+{
+ lx_sysinfo_t si;
+
+ bzero(&si, sizeof (si));
+ lx_sysinfo_common(&si);
+
+ if (copyout(&si, sip, sizeof (si)) != 0) {
+ return (set_errno(EFAULT));
+ }
+
+ return (0);
+}
+
+#if defined(_SYSCALL32_IMPL)
+long
+lx_sysinfo32(caddr_t sip)
+{
+ lx_sysinfo_t si;
+ lx_sysinfo32_t si32;
+ int i;
+
+ lx_sysinfo_common(&si);
+
+ /*
+ * Convert the lx_sysinfo_t into the legacy 32-bit view:
+ */
+ bzero(&si32, sizeof (si32));
+ si32.si_uptime = si.si_uptime;
+
+ for (i = 0; i < 3; i++) {
+ if ((si.si_loads[i]) > 0x7fffffff)
+ si32.si_loads[i] = 0x7fffffff;
+ else
+ si32.si_loads[i] = si.si_loads[i];
+ }
+
+ si32.si_procs = si.si_procs;
+ si32.si_totalram = si.si_totalram;
+ si32.si_freeram = si.si_freeram;
+ si32.si_totalswap = si.si_totalswap;
+ si32.si_freeswap = si.si_freeswap;
+ si32.si_mem_unit = si.si_mem_unit;
+
+ si32.si_bufferram = si.si_bufferram;
+ si32.si_sharedram = si.si_sharedram;
+
+ si32.si_totalhigh = si.si_totalhigh;
+ si32.si_freehigh = si.si_freehigh;
+
+ if (copyout(&si32, sip, sizeof (si32)) != 0) {
+ return (set_errno(EFAULT));
+ }
+
+ return (0);
+}
+#endif
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_thread_area.c b/usr/src/uts/common/brand/lx/syscall/lx_thread_area.c
new file mode 100644
index 0000000000..48d91b09cc
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_thread_area.c
@@ -0,0 +1,196 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/cpuvar.h>
+#include <sys/archsystm.h>
+#include <sys/proc.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_ldt.h>
+#include <sys/lx_misc.h>
+#include <sys/x86_archext.h>
+#include <sys/controlregs.h>
+#include <lx_syscall.h>
+
+long
+lx_arch_prctl(int code, ulong_t addr)
+{
+#if defined(__amd64)
+ klwp_t *lwp = ttolwp(curthread);
+ lx_lwp_data_t *llwp = lwptolxlwp(lwp);
+ pcb_t *pcb = &lwp->lwp_pcb;
+
+ switch (code) {
+ case LX_ARCH_GET_FS:
+ if (copyout(&llwp->br_lx_fsbase, (void *)addr,
+ sizeof (llwp->br_lx_fsbase)) != 0) {
+ return (set_errno(EFAULT));
+ }
+ break;
+
+ case LX_ARCH_SET_FS:
+ llwp->br_lx_fsbase = addr;
+
+ kpreempt_disable();
+ if (pcb->pcb_fsbase != llwp->br_lx_fsbase) {
+ pcb->pcb_fsbase = llwp->br_lx_fsbase;
+
+ /*
+ * Ensure we go out via update_sregs.
+ */
+ pcb->pcb_rupdate = 1;
+ }
+ kpreempt_enable();
+ break;
+
+ case LX_ARCH_GET_GS:
+ if (copyout(&llwp->br_lx_gsbase, (void *)addr,
+ sizeof (llwp->br_lx_gsbase)) != 0) {
+ return (set_errno(EFAULT));
+ }
+ break;
+
+ case LX_ARCH_SET_GS:
+ llwp->br_lx_gsbase = addr;
+
+ kpreempt_disable();
+ if (pcb->pcb_gsbase != llwp->br_lx_gsbase) {
+ pcb->pcb_gsbase = llwp->br_lx_gsbase;
+
+ /*
+ * Ensure we go out via update_sregs.
+ */
+ pcb->pcb_rupdate = 1;
+ }
+ kpreempt_enable();
+ break;
+
+ default:
+ return (set_errno(EINVAL));
+ }
+#endif
+
+ return (0);
+}
+
+long
+lx_get_thread_area(struct ldt_info *inf)
+{
+ struct lx_lwp_data *jlwp = ttolxlwp(curthread);
+ struct ldt_info ldt_inf;
+ user_desc_t *dscrp;
+ int entry;
+
+ if (fuword32(&inf->entry_number, (uint32_t *)&entry))
+ return (set_errno(EFAULT));
+
+ if (entry < GDT_TLSMIN || entry > GDT_TLSMAX)
+ return (set_errno(EINVAL));
+
+ dscrp = jlwp->br_tls + entry - GDT_TLSMIN;
+
+ /*
+ * convert the solaris ldt to the linux format expected by the
+ * caller
+ */
+ DESC_TO_LDT_INFO(dscrp, &ldt_inf);
+ ldt_inf.entry_number = entry;
+
+ if (copyout(&ldt_inf, inf, sizeof (struct ldt_info)))
+ return (set_errno(EFAULT));
+
+ return (0);
+}
+
+long
+lx_set_thread_area(struct ldt_info *inf)
+{
+ struct lx_lwp_data *jlwp = ttolxlwp(curthread);
+ struct ldt_info ldt_inf;
+ user_desc_t *dscrp;
+ int entry;
+ int i;
+
+ /* Check that casts for accessing the words in user_desc are valid */
+ ASSERT(sizeof (user_desc_t) == 8);
+
+ if (copyin(inf, &ldt_inf, sizeof (ldt_inf)))
+ return (set_errno(EFAULT));
+
+ entry = ldt_inf.entry_number;
+ if (entry == -1) {
+ /*
+ * Find an empty entry in the tls for this thread.
+ * The casts assume each user_desc_t entry is 8 bytes.
+ */
+ for (i = 0, dscrp = jlwp->br_tls; i < LX_TLSNUM; i++, dscrp++) {
+ if (((uint_t *)dscrp)[0] == 0 &&
+ ((uint_t *)dscrp)[1] == 0)
+ break;
+ }
+
+ if (i < LX_TLSNUM) {
+ /*
+ * found one
+ */
+ entry = i + GDT_TLSMIN;
+ if (suword32(&inf->entry_number, entry))
+ return (set_errno(EFAULT));
+ } else {
+ return (set_errno(ESRCH));
+ }
+ }
+
+ if (entry < GDT_TLSMIN || entry > GDT_TLSMAX)
+ return (set_errno(EINVAL));
+
+ /*
+ * convert the linux ldt info to standard intel descriptor
+ */
+ dscrp = jlwp->br_tls + entry - GDT_TLSMIN;
+
+ if (LDT_INFO_EMPTY(&ldt_inf)) {
+ ((uint_t *)dscrp)[0] = 0;
+ ((uint_t *)dscrp)[1] = 0;
+ } else {
+ LDT_INFO_TO_DESC(&ldt_inf, dscrp);
+ }
+
+ /*
+ * update the gdt with the new descriptor
+ */
+ kpreempt_disable();
+
+ for (i = 0, dscrp = jlwp->br_tls; i < LX_TLSNUM; i++, dscrp++)
+ lx_set_gdt(GDT_TLSMIN + i, dscrp);
+
+ kpreempt_enable();
+
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_timer.c b/usr/src/uts/common/brand/lx/syscall/lx_timer.c
new file mode 100644
index 0000000000..c2fb4a4c7d
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_timer.c
@@ -0,0 +1,379 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * The illumos kernel provides two clock backends: CLOCK_REALTIME, the
+ * adjustable system wall clock; and CLOCK_HIGHRES, the monotonically
+ * increasing time source that is not subject to drift or adjustment. By
+ * contrast, the Linux kernel is furnished with an overabundance of narrowly
+ * differentiated clock types.
+ *
+ * Fortunately, most of the commonly used Linux clock types are either similar
+ * enough to the native clock backends that they can be directly mapped, or
+ * represent queries to the per-process and per-LWP microstate counters.
+ *
+ * CLOCK_BOOTTIME is identical to CLOCK_MONOTONIC, except that it takes into
+ * account time that the system is suspended. Since that is uninteresting to
+ * us, we treat it the same.
+ */
+
+#include <sys/time.h>
+#include <sys/systm.h>
+#include <sys/cmn_err.h>
+#include <sys/lx_impl.h>
+
+/*
+ * From "uts/common/os/timer.c":
+ */
+extern int clock_settime(clockid_t, timespec_t *);
+extern int clock_gettime(clockid_t, timespec_t *);
+extern int clock_getres(clockid_t, timespec_t *);
+extern int nanosleep(timespec_t *, timespec_t *);
+
+
+static int lx_emul_clock_getres(clockid_t, timespec_t *);
+static int lx_emul_clock_gettime(clockid_t, timespec_t *);
+static int lx_emul_clock_settime(clockid_t, timespec_t *);
+
+typedef struct lx_clock_backend {
+ clockid_t lclk_ntv_id;
+ int (*lclk_clock_getres)(clockid_t, timespec_t *);
+ int (*lclk_clock_gettime)(clockid_t, timespec_t *);
+ int (*lclk_clock_settime)(clockid_t, timespec_t *);
+} lx_clock_backend_t;
+
+/*
+ * NOTE: The Linux man pages state this structure is obsolete and is
+ * unsupported, so it is declared here for sizing purposes only.
+ */
+struct lx_timezone {
+ int tz_minuteswest; /* minutes W of Greenwich */
+ int tz_dsttime; /* type of dst correction */
+};
+
+/*
+ * Use the native clock_* system call implementation, but with a translated
+ * clock identifier:
+ */
+#define NATIVE(ntv_id) \
+ { ntv_id, clock_getres, clock_gettime, clock_settime }
+
+/*
+ * This backend is not supported, so we provide an emulation handler:
+ */
+#define EMUL(ntv_id) \
+ { ntv_id, lx_emul_clock_getres, lx_emul_clock_gettime, \
+ lx_emul_clock_settime }
+
+static lx_clock_backend_t lx_clock_backends[] = {
+ NATIVE(CLOCK_REALTIME), /* LX_CLOCK_REALTIME */
+ NATIVE(CLOCK_HIGHRES), /* LX_CLOCK_MONOTONIC */
+ EMUL(CLOCK_PROCESS_CPUTIME_ID), /* LX_CLOCK_PROCESS_CPUTIME_ID */
+ EMUL(CLOCK_THREAD_CPUTIME_ID), /* LX_CLOCK_THREAD_CPUTIME_ID */
+ NATIVE(CLOCK_HIGHRES), /* LX_CLOCK_MONOTONIC_RAW */
+ NATIVE(CLOCK_REALTIME), /* LX_CLOCK_REALTIME_COARSE */
+ NATIVE(CLOCK_HIGHRES), /* LX_CLOCK_MONOTONIC_COARSE */
+ NATIVE(CLOCK_HIGHRES) /* LX_CLOCK_BOOTTIME */
+};
+
+#define LX_CLOCK_MAX \
+ (sizeof (lx_clock_backends) / sizeof (lx_clock_backends[0]))
+#define LX_CLOCK_BACKEND(clk) \
+ ((clk) < LX_CLOCK_MAX && (clk) >= 0 ? &lx_clock_backends[(clk)] : NULL)
+
+static int
+lx_emul_clock_settime(clockid_t clock, timespec_t *tp)
+{
+ return (set_errno(EINVAL));
+}
+
+static int
+lx_emul_clock_gettime(clockid_t clock, timespec_t *tp)
+{
+ timespec_t t;
+
+ switch (clock) {
+ case CLOCK_PROCESS_CPUTIME_ID: {
+ proc_t *p = ttoproc(curthread);
+ hrtime_t snsecs, unsecs;
+
+ /*
+ * Based on getrusage() in "rusagesys.c":
+ */
+ mutex_enter(&p->p_lock);
+ unsecs = mstate_aggr_state(p, LMS_USER);
+ snsecs = mstate_aggr_state(p, LMS_SYSTEM);
+ mutex_exit(&p->p_lock);
+
+ hrt2ts(unsecs + snsecs, &t);
+ break;
+ }
+
+ case CLOCK_THREAD_CPUTIME_ID: {
+ klwp_t *lwp = ttolwp(curthread);
+ struct mstate *ms = &lwp->lwp_mstate;
+ hrtime_t snsecs, unsecs;
+
+ /*
+ * Based on getrusage_lwp() in "rusagesys.c":
+ */
+ unsecs = ms->ms_acct[LMS_USER];
+ snsecs = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
+
+ scalehrtime(&unsecs);
+ scalehrtime(&snsecs);
+
+ hrt2ts(unsecs + snsecs, &t);
+ break;
+ }
+
+ default:
+ return (set_errno(EINVAL));
+ }
+
+#if defined(_SYSCALL32_IMPL)
+ if (get_udatamodel() != DATAMODEL_NATIVE) {
+ timespec32_t t32;
+
+ if (TIMESPEC_OVERFLOW(&t)) {
+ return (set_errno(EOVERFLOW));
+ }
+ TIMESPEC_TO_TIMESPEC32(&t32, &t);
+
+ if (copyout(&t32, tp, sizeof (t32)) != 0) {
+ return (set_errno(EFAULT));
+ }
+
+ return (0);
+ }
+#endif
+
+ if (copyout(&t, tp, sizeof (t)) != 0) {
+ return (set_errno(EFAULT));
+ }
+
+ return (0);
+}
+
+static int
+lx_emul_clock_getres(clockid_t clock, timespec_t *tp)
+{
+ timespec_t t;
+
+ if (tp == NULL) {
+ return (0);
+ }
+
+ switch (clock) {
+ case CLOCK_PROCESS_CPUTIME_ID:
+ case CLOCK_THREAD_CPUTIME_ID:
+ /*
+ * These clock backends return microstate accounting values for
+ * the LWP or the entire process. The Linux kernel claims they
+ * have nanosecond resolution; so will we.
+ */
+ t.tv_sec = 0;
+ t.tv_nsec = 1;
+ break;
+
+ default:
+ return (set_errno(EINVAL));
+ }
+
+#if defined(_SYSCALL32_IMPL)
+ if (get_udatamodel() != DATAMODEL_NATIVE) {
+ timespec32_t t32;
+
+ if (TIMESPEC_OVERFLOW(&t)) {
+ return (set_errno(EOVERFLOW));
+ }
+ TIMESPEC_TO_TIMESPEC32(&t32, &t);
+
+ if (copyout(&t32, tp, sizeof (t32)) != 0) {
+ return (set_errno(EFAULT));
+ }
+
+ return (0);
+ }
+#endif
+
+ if (copyout(&t, tp, sizeof (t)) != 0) {
+ return (set_errno(EFAULT));
+ }
+
+ return (0);
+}
+
+static void
+lx_clock_unsupported(int clock)
+{
+ char buf[100];
+
+ (void) snprintf(buf, sizeof (buf), "unsupported clock: %d", clock);
+ lx_unsupported(buf);
+}
+
+long
+lx_clock_settime(int clock, timespec_t *tp)
+{
+ lx_clock_backend_t *backend;
+
+ if ((backend = LX_CLOCK_BACKEND(clock)) == NULL) {
+ lx_clock_unsupported(clock);
+ return (set_errno(EINVAL));
+ }
+
+ return (backend->lclk_clock_settime(backend->lclk_ntv_id, tp));
+}
+
+long
+lx_clock_gettime(int clock, timespec_t *tp)
+{
+ lx_clock_backend_t *backend;
+
+ if ((backend = LX_CLOCK_BACKEND(clock)) == NULL) {
+ lx_clock_unsupported(clock);
+ return (set_errno(EINVAL));
+ }
+
+ return (backend->lclk_clock_gettime(backend->lclk_ntv_id, tp));
+}
+
+long
+lx_clock_getres(int clock, timespec_t *tp)
+{
+ lx_clock_backend_t *backend;
+
+ if (tp == NULL) {
+ return (0);
+ }
+
+ if ((backend = LX_CLOCK_BACKEND(clock)) == NULL) {
+ lx_clock_unsupported(clock);
+ return (set_errno(EINVAL));
+ }
+
+ return (backend->lclk_clock_getres(backend->lclk_ntv_id, tp));
+}
+
+
+long
+lx_gettimeofday(struct timeval *tvp, struct lx_timezone *tzp)
+{
+ struct lx_timezone tz;
+
+ bzero(&tz, sizeof (tz));
+
+ /*
+ * We want to be similar to libc which just does a fasttrap to
+ * gethrestime and simply converts that result. We follow how uniqtime
+ * does the conversion but we can't use that code since it does some
+ * extra work which can cause the result to bounce around based on which
+ * CPU we run on.
+ */
+ if (tvp != NULL) {
+ struct timeval tv;
+ timestruc_t ts;
+ int usec, nsec;
+
+ gethrestime(&ts);
+ nsec = ts.tv_nsec;
+ usec = nsec + (nsec >> 2);
+ usec = nsec + (usec >> 1);
+ usec = nsec + (usec >> 2);
+ usec = nsec + (usec >> 4);
+ usec = nsec - (usec >> 3);
+ usec = nsec + (usec >> 2);
+ usec = nsec + (usec >> 3);
+ usec = nsec + (usec >> 4);
+ usec = nsec + (usec >> 1);
+ usec = nsec + (usec >> 6);
+ usec = usec >> 10;
+
+ tv.tv_sec = ts.tv_sec;
+ tv.tv_usec = usec;
+
+ if (get_udatamodel() == DATAMODEL_NATIVE) {
+ if (copyout(&tv, tvp, sizeof (tv)) != 0)
+ return (set_errno(EFAULT));
+ }
+#ifdef _SYSCALL32_IMPL
+ else {
+ struct timeval32 tv32;
+
+ if (TIMEVAL_OVERFLOW(&tv))
+ return (set_errno(EOVERFLOW));
+ TIMEVAL_TO_TIMEVAL32(&tv32, &tv);
+
+ if (copyout(&tv32, tvp, sizeof (tv32)))
+ return (set_errno(EFAULT));
+ }
+#endif
+ }
+
+ /*
+ * The Linux man page states use of the second parameter is obsolete,
+ * but gettimeofday(2) should still return EFAULT if it is set
+ * to a bad non-NULL pointer (sigh...)
+ */
+ if (tzp != NULL && copyout(&tz, tzp, sizeof (tz)) != 0)
+ return (set_errno(EFAULT));
+
+ return (0);
+}
+
+/*
+ * On Linux a bad buffer will set errno to EFAULT, and on Illumos the failure
+ * mode is documented as "undefined."
+ */
+long
+lx_time(time_t *tp)
+{
+ timestruc_t ts;
+ struct timeval tv;
+
+ gethrestime(&ts);
+ tv.tv_sec = ts.tv_sec;
+ tv.tv_usec = 0;
+
+ if (get_udatamodel() == DATAMODEL_NATIVE) {
+ if (tp != NULL &&
+ copyout(&tv.tv_sec, tp, sizeof (tv.tv_sec)) != 0)
+ return (set_errno(EFAULT));
+
+ return (tv.tv_sec);
+ }
+#ifdef _SYSCALL32_IMPL
+ else {
+ struct timeval32 tv32;
+
+ if (TIMEVAL_OVERFLOW(&tv))
+ return (set_errno(EOVERFLOW));
+ TIMEVAL_TO_TIMEVAL32(&tv32, &tv);
+
+ if (tp != NULL &&
+ copyout(&tv32.tv_sec, tp, sizeof (tv32.tv_sec)))
+ return (set_errno(EFAULT));
+
+ return (tv32.tv_sec);
+ }
+#endif
+}
+
+long
+lx_nanosleep(timespec_t *rqtp, timespec_t *rmtp)
+{
+ return (nanosleep(rqtp, rmtp));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_uname.c b/usr/src/uts/common/brand/lx/syscall/lx_uname.c
new file mode 100644
index 0000000000..2d18408eaa
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_uname.c
@@ -0,0 +1,82 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/thread.h>
+#include <sys/proc.h>
+#include <sys/zone.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_types.h>
+
+struct lx_utsname {
+ char lxu_sysname[LX_SYS_UTS_LN];
+ char lxu_nodename[LX_SYS_UTS_LN];
+ char lxu_release[LX_SYS_UTS_LN];
+ char lxu_version[LX_SYS_UTS_LN];
+ char lxu_machine[LX_SYS_UTS_LN];
+ char lxu_domainname[LX_SYS_UTS_LN];
+};
+
+long
+lx_uname(void *uptr)
+{
+ proc_t *p = curproc;
+ lx_proc_data_t *lxpd = ptolxproc(p);
+ lx_zone_data_t *lxzd = ztolxzd(p->p_zone);
+ struct lx_utsname un;
+
+ bzero(&un, sizeof (un));
+
+ (void) strlcpy(un.lxu_sysname, LX_UNAME_SYSNAME, LX_SYS_UTS_LN);
+ (void) strlcpy(un.lxu_nodename, p->p_zone->zone_nodename,
+ LX_SYS_UTS_LN);
+
+ mutex_enter(&lxzd->lxzd_lock);
+
+ if (lxpd->l_uname_release[0] != '\0') {
+ (void) strlcpy(un.lxu_release, lxpd->l_uname_release,
+ LX_SYS_UTS_LN);
+ } else {
+ (void) strlcpy(un.lxu_release, lxzd->lxzd_kernel_release,
+ LX_SYS_UTS_LN);
+ }
+ if (lxpd->l_uname_version[0] != '\0') {
+ (void) strlcpy(un.lxu_version, lxpd->l_uname_version,
+ LX_SYS_UTS_LN);
+ } else {
+ (void) strlcpy(un.lxu_version, lxzd->lxzd_kernel_version,
+ LX_SYS_UTS_LN);
+ }
+
+ mutex_exit(&lxzd->lxzd_lock);
+
+ if (get_udatamodel() == DATAMODEL_LP64) {
+ (void) strlcpy(un.lxu_machine, LX_UNAME_MACHINE64,
+ LX_SYS_UTS_LN);
+ } else {
+ (void) strlcpy(un.lxu_machine, LX_UNAME_MACHINE32,
+ LX_SYS_UTS_LN);
+ }
+ (void) strlcpy(un.lxu_domainname, p->p_zone->zone_domain,
+ LX_SYS_UTS_LN);
+
+ if (copyout(&un, uptr, sizeof (un)) != 0) {
+ return (set_errno(EFAULT));
+ }
+
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_wait.c b/usr/src/uts/common/brand/lx/syscall/lx_wait.c
new file mode 100644
index 0000000000..e8358f9f69
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_wait.c
@@ -0,0 +1,377 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * wait() family of functions.
+ *
+ * The first minor difference between the Linux and Solaris family of wait()
+ * calls is that the values for WNOHANG and WUNTRACED are different. Thankfully,
+ * the exit status values are identical between the two implementations.
+ *
+ * Things get very different and very complicated when we introduce the Linux
+ * threading model. Under linux, both threads and child processes are
+ * represented as processes. However, the behavior of wait() with respect to
+ * each child varies according to the flags given to clone()
+ *
+ * SIGCHLD The SIGCHLD signal should be sent on termination
+ * CLONE_THREAD The child shares the same thread group as the parent
+ * CLONE_DETACHED The parent receives no notification when the child exits
+ *
+ * The following flags control the Linux behavior w.r.t. the above attributes:
+ *
+ * __WALL Wait on all children, regardless of type
+ * __WCLONE Wait only on non-SIGCHLD children
+ * __WNOTHREAD Don't wait on children of other threads in this group
+ *
+ * The following chart shows whether wait() returns when the child exits:
+ *
+ * default __WCLONE __WALL
+ * no SIGCHLD - X X
+ * SIGCHLD X - X
+ *
+ * The following chart shows whether wait() returns when the grandchild exits:
+ *
+ * default __WNOTHREAD
+ * no CLONE_THREAD - -
+ * CLONE_THREAD X -
+ *
+ * The CLONE_DETACHED flag is universal - when the child exits, no state is
+ * stored and wait() has no effect.
+ *
+ * XXX Support the above combination of options, or some reasonable subset that
+ * covers at least fork() and pthread_create().
+ */
+
+#include <sys/wait.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_types.h>
+#include <sys/lx_misc.h>
+#include <lx_signum.h>
+#include <lx_errno.h>
+#include <lx_syscall.h>
+
+/*
+ * From "uts/common/os/exit.c" and "uts/common/syscall/rusagesys.c":
+ */
+extern int waitid(idtype_t, id_t, k_siginfo_t *, int);
+extern int rusagesys(int, void *, void *, void *, void *);
+
+/*
+ * Convert between Linux options and Solaris options, returning -1 if any
+ * invalid flags are found.
+ */
+#define LX_WNOHANG 0x00000001
+#define LX_WUNTRACED 0x00000002
+#define LX_WSTOPPED LX_WUNTRACED
+#define LX_WEXITED 0x00000004
+#define LX_WCONTINUED 0x00000008
+#define LX_WNOWAIT 0x01000000
+
+#define LX_WNOTHREAD 0x20000000
+#define LX_WALL 0x40000000
+#define LX_WCLONE 0x80000000
+
+#define LX_P_ALL 0x0
+#define LX_P_PID 0x1
+#define LX_P_GID 0x2
+
+/*
+ * Split the passed waitpid/waitid options into two separate variables:
+ * those for the native illumos waitid(2), and the extra Linux-specific
+ * options we will handle in our brand-specific code.
+ */
+static int
+ltos_options(uintptr_t options, int *native_options, int *extra_options)
+{
+ int newoptions = 0;
+
+ if (((options) & ~(LX_WNOHANG | LX_WUNTRACED | LX_WEXITED |
+ LX_WCONTINUED | LX_WNOWAIT | LX_WNOTHREAD | LX_WALL |
+ LX_WCLONE)) != 0) {
+ return (-1);
+ }
+
+ *extra_options = options & (LX_WNOTHREAD | LX_WALL | LX_WCLONE);
+
+ if (options & LX_WNOHANG)
+ newoptions |= WNOHANG;
+ if (options & LX_WUNTRACED)
+ newoptions |= WUNTRACED;
+ if (options & LX_WEXITED)
+ newoptions |= WEXITED;
+ if (options & LX_WCONTINUED)
+ newoptions |= WCONTINUED;
+ if (options & LX_WNOWAIT)
+ newoptions |= WNOWAIT;
+
+ /*
+ * The trapped option is implicit on Linux.
+ */
+ newoptions |= WTRAPPED;
+
+ *native_options = newoptions;
+ return (0);
+}
+
+static int
+lx_wstat(int code, int status)
+{
+ int stat = 0;
+
+ switch (code) {
+ case CLD_EXITED:
+ stat = status << 8;
+ break;
+ case CLD_DUMPED:
+ stat = lx_stol_signo(status, SIGKILL) | WCOREFLG;
+ break;
+ case CLD_KILLED:
+ stat = lx_stol_signo(status, SIGKILL);
+ break;
+ case CLD_TRAPPED:
+ case CLD_STOPPED:
+ stat = (lx_stol_status(status, SIGKILL) << 8) | WSTOPFLG;
+ break;
+ case CLD_CONTINUED:
+ stat = WCONTFLG;
+ break;
+ }
+
+ return (stat);
+}
+
+static int
+lx_call_waitid(idtype_t idtype, id_t id, k_siginfo_t *sip, int native_options,
+ int extra_options)
+{
+ lx_lwp_data_t *lwpd = ttolxlwp(curthread);
+ int error;
+
+ /*
+ * Our brand-specific waitid helper only understands a subset of
+ * the possible idtypes. Ensure we keep to that subset here:
+ */
+ if (idtype != P_ALL && idtype != P_PID && idtype != P_PGID) {
+ return (EINVAL);
+ }
+
+ /*
+ * Enable the return of emulated ptrace(2) stop conditions
+ * through lx_waitid_helper, and stash the Linux-specific
+ * extra waitid() flags.
+ */
+ lwpd->br_waitid_emulate = B_TRUE;
+ lwpd->br_waitid_flags = extra_options;
+
+ if ((error = waitid(idtype, id, sip, native_options)) == EINTR) {
+ /*
+ * According to signal(7), the wait4(2), waitid(2), and
+ * waitpid(2) system calls are restartable.
+ */
+ ttolxlwp(curthread)->br_syscall_restart = B_TRUE;
+ }
+
+ lwpd->br_waitid_emulate = B_FALSE;
+ lwpd->br_waitid_flags = 0;
+
+ return (error);
+}
+
+long
+lx_wait4(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
+{
+ k_siginfo_t info = { 0 };
+ idtype_t idtype;
+ id_t id;
+ int status = 0;
+ pid_t pid = (pid_t)p1;
+ int error;
+ int native_options, extra_options;
+ int *statusp = (int *)p2;
+ void *rup = (void *)p4;
+
+ if (ltos_options(p3, &native_options, &extra_options) == -1) {
+ return (set_errno(EINVAL));
+ }
+
+ if (pid > maxpid) {
+ return (set_errno(ECHILD));
+ }
+
+ /*
+ * While not listed as a valid return code, Linux's wait4(2) does,
+ * in fact, get an EFAULT if either the status pointer or rusage
+ * pointer is invalid. Since a failed waitpid should leave child
+ * process in a state where a future wait4(2) will succeed, we
+ * check them by copying out the values their buffers originally
+ * contained. (We need to do this as a failed system call should
+ * never affect the contents of a passed buffer.)
+ *
+ * This will fail if the buffers in question are write-only.
+ */
+ if (statusp != NULL) {
+ if (copyin(statusp, &status, sizeof (status)) != 0 ||
+ copyout(&status, statusp, sizeof (status)) != 0) {
+ return (set_errno(EFAULT));
+ }
+ }
+
+ /*
+ * Do the same check for the "struct rusage" pointer, which differs
+ * in size for 32- and 64-bit processes.
+ */
+ if (rup != NULL) {
+ struct rusage ru;
+ void *krup = &ru;
+ size_t rusz = sizeof (ru);
+#if defined(_SYSCALL32_IMPL)
+ struct rusage32 ru32;
+
+ if (get_udatamodel() != DATAMODEL_NATIVE) {
+ krup = &ru32;
+ rusz = sizeof (ru32);
+ }
+#endif
+
+ if (copyin(rup, krup, rusz) != 0 ||
+ copyout(krup, rup, rusz) != 0) {
+ return (set_errno(EFAULT));
+ }
+ }
+
+ if (pid < -1) {
+ idtype = P_PGID;
+ id = -pid;
+ } else if (pid == -1) {
+ idtype = P_ALL;
+ id = 0;
+ } else if (pid == 0) {
+ idtype = P_PGID;
+ mutex_enter(&pidlock);
+ id = curproc->p_pgrp;
+ mutex_exit(&pidlock);
+ } else {
+ idtype = P_PID;
+ id = pid;
+ }
+
+ native_options |= (WEXITED | WTRAPPED);
+
+ if ((error = lx_call_waitid(idtype, id, &info, native_options,
+ extra_options)) != 0) {
+ return (set_errno(error));
+ }
+
+ /*
+ * If the WNOHANG flag was specified and no child was found return 0.
+ */
+ if ((native_options & WNOHANG) && info.si_pid == 0) {
+ return (0);
+ }
+
+ status = lx_wstat(info.si_code, info.si_status);
+
+ /*
+ * Unfortunately if this attempt to copy out either the status or the
+ * rusage fails, the process will be in an inconsistent state as
+ * subsequent calls to wait for the same child will fail where they
+ * should succeed on a Linux system. This, however, is rather
+ * unlikely since we tested the validity of both above.
+ */
+ if (statusp != NULL) {
+ if (copyout(&status, statusp, sizeof (status)) != 0) {
+ return (set_errno(EFAULT));
+ }
+ }
+
+ if (rup != NULL) {
+ if ((error = rusagesys(_RUSAGESYS_GETRUSAGE_CHLD, rup, NULL,
+ NULL, NULL)) != 0) {
+ return (set_errno(error));
+ }
+ }
+
+ return (info.si_pid);
+}
+
+long
+lx_waitpid(uintptr_t p1, uintptr_t p2, uintptr_t p3)
+{
+ return (lx_wait4(p1, p2, p3, NULL));
+}
+
+long
+lx_waitid(uintptr_t idtype, uintptr_t id, uintptr_t infop, uintptr_t opt)
+{
+ int error;
+ int native_options, extra_options;
+ k_siginfo_t info = { 0 };
+
+ if (ltos_options(opt, &native_options, &extra_options) == -1) {
+ return (set_errno(EINVAL));
+ }
+
+ if (((opt) & (LX_WEXITED | LX_WSTOPPED | LX_WCONTINUED)) == 0) {
+ return (set_errno(EINVAL));
+ }
+
+ switch (idtype) {
+ case LX_P_ALL:
+ idtype = P_ALL;
+ break;
+ case LX_P_PID:
+ idtype = P_PID;
+ break;
+ case LX_P_GID:
+ idtype = P_PGID;
+ break;
+ default:
+ return (set_errno(EINVAL));
+ }
+
+ if ((error = lx_call_waitid(idtype, id, &info, native_options,
+ extra_options)) != 0) {
+ return (set_errno(error));
+ }
+
+ /*
+ * If the WNOHANG flag was specified and no child was found return 0.
+ */
+ if ((native_options & WNOHANG) && info.si_pid == 0) {
+ return (0);
+ }
+
+#if defined(_SYSCALL32_IMPL)
+ if (get_udatamodel() != DATAMODEL_NATIVE) {
+ return (stol_ksiginfo32_copyout(&info, (void *)infop));
+ } else
+#endif
+ {
+ return (stol_ksiginfo_copyout(&info, (void *)infop));
+ }
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_xattr.c b/usr/src/uts/common/brand/lx/syscall/lx_xattr.c
new file mode 100644
index 0000000000..bd7667226f
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_xattr.c
@@ -0,0 +1,371 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <sys/errno.h>
+#include <sys/systm.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/pathname.h>
+
+
+#define LX_XATTR_NAME_MAX 255
+#define LX_XATTR_SIZE_MAX 65536
+#define LX_XATTR_LIST_MAX 65536
+
+#define LX_XATTR_FLAG_CREATE 0x1
+#define LX_XATTR_FLAG_REPLACE 0x2
+#define LX_XATTR_FLAGS_VALID (LX_XATTR_FLAG_CREATE | LX_XATTR_FLAG_REPLACE)
+
+#define LX_CAP_XATTR_NAME "security.capability"
+
+/*
+ * *xattr() family of functions.
+ *
+ * These are largely unimplemented. In most cases we return EOPNOTSUPP, rather
+ * than using NOSYS_NO_EQUIV to avoid unwanted stderr output from ls(1).
+ *
+ * Note that CRED() is used instead of f_cred in the f*xattr functions. This
+ * is intentional as Linux does not have the same notion of per-fd credentials.
+ */
+
+/* ARGSUSED */
+static int
+lx_setxattr_common(vnode_t *vp, char *name, void *value, size_t size,
+ int flags)
+{
+ int error;
+ char name_buf[LX_XATTR_NAME_MAX + 1];
+ size_t name_len;
+
+ if ((flags & ~LX_XATTR_FLAGS_VALID) != 0) {
+ return (EINVAL);
+ }
+ error = copyinstr(name, name_buf, sizeof (name_buf), &name_len);
+ if (error == ENAMETOOLONG || name_len == sizeof (name_buf)) {
+ return (ERANGE);
+ } else if (error != 0) {
+ return (EFAULT);
+ }
+ if (size > LX_XATTR_SIZE_MAX) {
+ return (E2BIG);
+ }
+
+ /*
+ * In order to keep package management software happy, despite lacking
+ * support for file-based Linux capabilities via xattrs, we fake
+ * success when root attempts a setxattr on that attribute.
+ */
+ if (crgetuid(CRED()) == 0 &&
+ strcmp(name_buf, LX_CAP_XATTR_NAME) == 0) {
+ return (0);
+ }
+
+
+ return (EOPNOTSUPP);
+}
+
+/* ARGSUSED */
+static int
+lx_getxattr_common(vnode_t *vp, char *name, char *value, size_t size,
+ ssize_t *osize)
+{
+ int error;
+ char name_buf[LX_XATTR_NAME_MAX + 1];
+ size_t name_len;
+
+ error = copyinstr(name, name_buf, sizeof (name_buf), &name_len);
+ if (error == ENAMETOOLONG || name_len == sizeof (name_buf)) {
+ return (ERANGE);
+ } else if (error != 0) {
+ return (EFAULT);
+ }
+
+ /*
+ * Only parameter validation is attempted for now.
+ */
+ return (EOPNOTSUPP);
+}
+
+/* ARGSUSED */
+static int
+lx_listxattr_common(vnode_t *vp, char *list, size_t size, ssize_t *osize)
+{
+ return (EOPNOTSUPP);
+}
+
+/* ARGSUSED */
+static int
+lx_removexattr_common(vnode_t *vp, char *name)
+{
+ int error;
+ char name_buf[LX_XATTR_NAME_MAX + 1];
+ size_t name_len;
+
+ error = copyinstr(name, name_buf, sizeof (name_buf), &name_len);
+ if (error == ENAMETOOLONG || name_len == sizeof (name_buf)) {
+ return (ERANGE);
+ } else if (error != 0) {
+ return (EFAULT);
+ }
+
+ /*
+ * Only parameter validation is attempted for now.
+ */
+ return (EOPNOTSUPP);
+}
+
+
+long
+lx_setxattr(char *path, char *name, void *value, size_t size, int flags)
+{
+ int error;
+ vnode_t *vp = NULL;
+
+ error = lookupname(path, UIO_USERSPACE, FOLLOW, NULLVPP, &vp);
+ if (error != 0) {
+ return (set_errno(error));
+ }
+
+ error = lx_setxattr_common(vp, name, value, size, flags);
+ VN_RELE(vp);
+
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (0);
+}
+
+long
+lx_lsetxattr(char *path, char *name, void *value, size_t size, int flags)
+{
+ int error;
+ vnode_t *vp = NULL;
+
+ error = lookupname(path, UIO_USERSPACE, NO_FOLLOW, NULLVPP, &vp);
+ if (error != 0) {
+ return (set_errno(error));
+ }
+
+ error = lx_setxattr_common(vp, name, value, size, flags);
+ VN_RELE(vp);
+
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (0);
+}
+
+long
+lx_fsetxattr(int fd, char *name, void *value, size_t size, int flags)
+{
+ int error;
+ file_t *fp;
+
+ if ((fp = getf(fd)) == NULL) {
+ return (set_errno(EBADF));
+ }
+
+ error = lx_setxattr_common(fp->f_vnode, name, value, size, flags);
+ releasef(fd);
+
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (0);
+}
+
+ssize_t
+lx_getxattr(char *path, char *name, void *value, size_t size)
+{
+ int error;
+ vnode_t *vp = NULL;
+ ssize_t osize;
+
+ error = lookupname(path, UIO_USERSPACE, FOLLOW, NULLVPP, &vp);
+ if (error != 0) {
+ return (set_errno(error));
+ }
+
+ error = lx_getxattr_common(vp, name, value, size, &osize);
+ VN_RELE(vp);
+
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (osize);
+}
+
+ssize_t
+lx_lgetxattr(char *path, char *name, void *value, size_t size)
+{
+
+ int error;
+ vnode_t *vp = NULL;
+ ssize_t osize;
+
+ error = lookupname(path, UIO_USERSPACE, NO_FOLLOW, NULLVPP, &vp);
+ if (error != 0) {
+ return (set_errno(error));
+ }
+
+ error = lx_getxattr_common(vp, name, value, size, &osize);
+ VN_RELE(vp);
+
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (osize);
+}
+
+ssize_t
+lx_fgetxattr(int fd, char *name, void *value, size_t size)
+{
+ int error;
+ file_t *fp;
+ ssize_t osize;
+
+ if ((fp = getf(fd)) == NULL) {
+ return (set_errno(EBADF));
+ }
+
+ error = lx_getxattr_common(fp->f_vnode, name, value, size, &osize);
+ releasef(fd);
+
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (osize);
+}
+
+ssize_t
+lx_listxattr(char *path, char *list, size_t size)
+{
+ int error;
+ vnode_t *vp = NULL;
+ ssize_t osize;
+
+ error = lookupname(path, UIO_USERSPACE, FOLLOW, NULLVPP, &vp);
+ if (error != 0) {
+ return (set_errno(error));
+ }
+
+ error = lx_listxattr_common(vp, list, size, &osize);
+ VN_RELE(vp);
+
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (osize);
+}
+
+ssize_t
+lx_llistxattr(char *path, char *list, size_t size)
+{
+ int error;
+ vnode_t *vp = NULL;
+ ssize_t osize;
+
+ error = lookupname(path, UIO_USERSPACE, NO_FOLLOW, NULLVPP, &vp);
+ if (error != 0) {
+ return (set_errno(error));
+ }
+
+ error = lx_listxattr_common(vp, list, size, &osize);
+ VN_RELE(vp);
+
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (osize);
+}
+
+ssize_t
+lx_flistxattr(int fd, char *list, size_t size)
+{
+ int error;
+ file_t *fp;
+ ssize_t osize;
+
+ if ((fp = getf(fd)) == NULL) {
+ return (set_errno(EBADF));
+ }
+
+ error = lx_listxattr_common(fp->f_vnode, list, size, &osize);
+ releasef(fd);
+
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (osize);
+}
+
+int
+lx_removexattr(char *path, char *name)
+{
+ int error;
+ vnode_t *vp = NULL;
+
+ error = lookupname(path, UIO_USERSPACE, FOLLOW, NULLVPP, &vp);
+ if (error != 0) {
+ return (set_errno(error));
+ }
+
+ error = lx_removexattr_common(vp, name);
+ VN_RELE(vp);
+
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (0);
+}
+
+int
+lx_lremovexattr(char *path, char *name)
+{
+ int error;
+ vnode_t *vp = NULL;
+
+ error = lookupname(path, UIO_USERSPACE, NO_FOLLOW, NULLVPP, &vp);
+ if (error != 0) {
+ return (set_errno(error));
+ }
+
+ error = lx_removexattr_common(vp, name);
+ VN_RELE(vp);
+
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (0);
+}
+
+int
+lx_fremovexattr(int fd, char *name)
+{
+ int error;
+ file_t *fp;
+
+ if ((fp = getf(fd)) == NULL) {
+ return (set_errno(EBADF));
+ }
+
+ error = lx_removexattr_common(fp->f_vnode, name);
+ releasef(fd);
+
+ if (error != 0) {
+ return (set_errno(error));
+ }
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/sysfs/lx_sysfs.h b/usr/src/uts/common/brand/lx/sysfs/lx_sysfs.h
new file mode 100644
index 0000000000..93dc316c1e
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sysfs/lx_sysfs.h
@@ -0,0 +1,196 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#ifndef _LXSYSFS_H
+#define _LXSYSFS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * lx_sysfs.h: declarations, data structures and macros for lx_sysfs
+ */
+
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/policy.h>
+#include <sys/debug.h>
+#include <sys/dirent.h>
+#include <sys/errno.h>
+#include <sys/file.h>
+#include <sys/kmem.h>
+#include <sys/pathname.h>
+#include <sys/systm.h>
+#include <sys/var.h>
+#include <sys/user.h>
+#include <sys/t_lock.h>
+#include <sys/sysmacros.h>
+#include <sys/cred.h>
+#include <sys/priv.h>
+#include <sys/vnode.h>
+#include <sys/vfs.h>
+#include <sys/statvfs.h>
+#include <sys/cmn_err.h>
+#include <sys/zone.h>
+#include <sys/uio.h>
+#include <sys/utsname.h>
+#include <sys/dnlc.h>
+#include <sys/atomic.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+#include <vm/as.h>
+#include <vm/anon.h>
+#include <sys/netstack.h>
+#include <inet/ip.h>
+#include <inet/ip_if.h>
+
+/*
+ * Convert a vnode into an lxsys_mnt_t
+ */
+#define VTOLXSM(vp) ((lxsys_mnt_t *)(vp)->v_vfsp->vfs_data)
+
+/*
+ * convert a vnode into an lxsys_node
+ */
+#define VTOLXS(vp) ((lxsys_node_t *)(vp)->v_data)
+
+/*
+ * convert a lxsys_node into a vnode
+ */
+#define LXSTOV(lxsnp) ((lxsnp)->lxsys_vnode)
+
+/*
+ * convert a lxsys_node into zone for fs
+ */
+#define LXSTOZ(lxsnp) \
+ (((lxsys_mnt_t *)(lxsnp)->lxsys_vnode->v_vfsp->vfs_data)->lxsysm_zone)
+
+#define LXSNSIZ 256 /* max size of lx /sys file name entries */
+
+/*
+ * Pretend that a directory entry takes 16 bytes
+ */
+#define LXSYS_SDSIZE 16
+
+/* Root sysfs lxsys_instance */
+#define LXSYS_INST_ROOT 0
+
+/*
+ * Node/file types for lx /sys files
+ * (directories and files contained therein).
+ */
+typedef enum lxsys_nodetype {
+ LXSYS_NONE, /* None-type to keep inodes non-zero */
+ LXSYS_STATIC, /* Statically defined entries */
+ LXSYS_CLASS_NET, /* /sys/class/net/<iface> */
+ LXSYS_DEV_NET, /* /sys/devices/virtual/net/<iface> */
+ LXSYS_BLOCK, /* /sys/block/<dev> */
+ LXSYS_DEV_ZFS, /* /sys/devices/zfs/<dev> */
+ LXSYS_DEV_SYS_CPU, /* /sys/devices/system/cpu/<cpu> */
+ LXSYS_DEV_SYS_CPUINFO, /* /sys/devices/system/cpu/cpuN/<info> */
+ LXSYS_DEV_SYS_NODE, /* /sys/devices/system/node/node0/<info> */
+ LXSYS_MAXTYPE, /* type limit */
+} lxsys_nodetype_t;
+
+/*
+ * external dirent characteristics
+ */
+typedef struct {
+ unsigned int d_idnum;
+ char *d_name;
+} lxsys_dirent_t;
+
+typedef struct {
+ unsigned int dl_instance;
+ lxsys_dirent_t *dl_list;
+ int dl_length;
+} lxsys_dirlookup_t;
+
+/*
+ * This is the lx sysfs private data object
+ * which is attached to v_data in the vnode structure
+ */
+struct lxsys_node;
+typedef struct lxsys_node lxsys_node_t;
+struct lxsys_node {
+ lxsys_nodetype_t lxsys_type; /* type ID of node */
+ unsigned int lxsys_instance; /* instance ID node */
+ unsigned int lxsys_endpoint; /* endpoint ID node */
+ vnode_t *lxsys_vnode; /* vnode for the node */
+ vnode_t *lxsys_parentvp; /* parent directory */
+ lxsys_node_t *lxsys_next; /* next list entry */
+ timestruc_t lxsys_time; /* creation time */
+ mode_t lxsys_mode; /* file mode bits */
+ uid_t lxsys_uid; /* file owner */
+ gid_t lxsys_gid; /* file group owner */
+ ino_t lxsys_ino; /* node id */
+};
+
+/*
+ * This is the lxsysfs private data object
+ * which is attached to vfs_data in the vfs structure
+ */
+typedef struct lxsys_mnt {
+ kmutex_t lxsysm_lock; /* protects fields */
+ lxsys_node_t *lxsysm_node; /* node at root of sys mount */
+ zone_t *lxsysm_zone; /* zone for this mount */
+} lxsys_mnt_t;
+
+extern vnodeops_t *lxsys_vnodeops;
+
+typedef struct mounta mounta_t;
+
+extern void lxsys_initnodecache();
+extern void lxsys_fininodecache();
+extern ino_t lxsys_inode(lxsys_nodetype_t, unsigned int, unsigned int);
+extern ino_t lxsys_parentinode(lxsys_node_t *);
+extern lxsys_node_t *lxsys_getnode(vnode_t *, lxsys_nodetype_t, unsigned int,
+ unsigned int);
+extern lxsys_node_t *lxsys_getnode_static(vnode_t *, unsigned int);
+extern void lxsys_freenode(lxsys_node_t *);
+
+extern netstack_t *lxsys_netstack(lxsys_node_t *);
+extern ill_t *lxsys_find_ill(ip_stack_t *, uint_t);
+
+typedef struct lxpr_uiobuf {
+ uio_t *uiop;
+ char *buffer;
+ uint32_t bufsize;
+ char *pos;
+ size_t beg;
+ int error;
+} lxsys_uiobuf_t;
+
+extern lxsys_uiobuf_t *lxsys_uiobuf_new(uio_t *);
+extern void lxsys_uiobuf_free(lxsys_uiobuf_t *);
+extern void lxsys_uiobuf_seterr(lxsys_uiobuf_t *, int);
+extern int lxsys_uiobuf_flush(lxsys_uiobuf_t *);
+extern void lxsys_uiobuf_write(lxsys_uiobuf_t *, const char *, size_t);
+extern void lxsys_uiobuf_printf(lxsys_uiobuf_t *uiobuf, const char *fmt, ...);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef islower
+#define islower(x) (((unsigned)(x) >= 'a') && ((unsigned)(x) <= 'z'))
+#endif
+#ifndef toupper
+#define toupper(x) (islower(x) ? (x) - 'a' + 'A' : (x))
+#endif
+
+#endif /* _LXSYSFS_H */
diff --git a/usr/src/uts/common/brand/lx/sysfs/lx_syssubr.c b/usr/src/uts/common/brand/lx/sysfs/lx_syssubr.c
new file mode 100644
index 0000000000..3184b34d08
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sysfs/lx_syssubr.c
@@ -0,0 +1,457 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * lx_syssubr.c: Various functions for the /sys vnodeops.
+ */
+
+#include <sys/varargs.h>
+
+#include <sys/cpuvar.h>
+#include <sys/mman.h>
+#include <sys/vmsystm.h>
+#include <sys/prsystm.h>
+
+#include "lx_sysfs.h"
+
+#define LXSYSCACHE_NAME "lxsys_cache"
+
+static int lxsys_node_constructor(void *, void *, int);
+static void lxsys_node_destructor(void *, void *);
+
+static kmem_cache_t *lxsys_node_cache;
+
+void
+lxsys_initnodecache()
+{
+ lxsys_node_cache = kmem_cache_create(LXSYSCACHE_NAME,
+ sizeof (lxsys_node_t), 0,
+ lxsys_node_constructor, lxsys_node_destructor, NULL, NULL, NULL, 0);
+}
+
+void
+lxsys_fininodecache()
+{
+ kmem_cache_destroy(lxsys_node_cache);
+}
+
+/* ARGSUSED */
+static int
+lxsys_node_constructor(void *buf, void *un, int kmflags)
+{
+ lxsys_node_t *lxsnp = buf;
+ vnode_t *vp;
+
+ vp = lxsnp->lxsys_vnode = vn_alloc(kmflags);
+ if (vp == NULL)
+ return (-1);
+
+ (void) vn_setops(vp, lxsys_vnodeops);
+ vp->v_data = lxsnp;
+
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+lxsys_node_destructor(void *buf, void *un)
+{
+ lxsys_node_t *lxsnp = buf;
+
+ vn_free(LXSTOV(lxsnp));
+}
+
+/*
+ * Calculate an inode number
+ *
+ * This takes various bits of info and munges them
+ * to give the inode number for an lxsys node
+ */
+ino_t
+lxsys_inode(lxsys_nodetype_t type, unsigned int instance,
+ unsigned int endpoint)
+{
+ /*
+ * Sysfs Inode format:
+ * 0000AABBBBCC
+ *
+ * AA - TYPE
+ * BBBB - INSTANCE
+ * CC - ENDPOINT
+ */
+ ASSERT(instance <= 0xffff);
+ ASSERT(endpoint <= 0xff);
+
+ return ((ino_t)(type << 24)|(instance << 8)|endpoint);
+}
+
+/*
+ * Return inode number of parent (directory)
+ */
+ino_t
+lxsys_parentinode(lxsys_node_t *lxsnp)
+{
+ /*
+ * If the input node is the root then the parent inode
+ * is the mounted on inode so just return our inode number
+ */
+ if (lxsnp->lxsys_type == LXSYS_STATIC &&
+ lxsnp->lxsys_instance == LXSYS_INST_ROOT) {
+ return (lxsnp->lxsys_ino);
+ } else {
+ return (VTOLXS(lxsnp->lxsys_parentvp)->lxsys_ino);
+ }
+}
+
+/*
+ * Allocate a new lxsys node
+ *
+ * This also allocates the vnode associated with it
+ */
+lxsys_node_t *
+lxsys_getnode(vnode_t *dp, lxsys_nodetype_t type, unsigned int instance,
+ unsigned int endpoint)
+{
+ lxsys_node_t *lxsnp;
+ vnode_t *vp;
+ timestruc_t now;
+
+ /*
+ * Allocate a new node. It is deallocated in vop_innactive
+ */
+ lxsnp = kmem_cache_alloc(lxsys_node_cache, KM_SLEEP);
+
+ /*
+ * Set defaults (may be overridden below)
+ */
+ gethrestime(&now);
+ lxsnp->lxsys_type = type;
+ lxsnp->lxsys_instance = instance;
+ lxsnp->lxsys_endpoint = endpoint;
+ lxsnp->lxsys_next = NULL;
+ lxsnp->lxsys_parentvp = dp;
+ VN_HOLD(dp);
+
+ lxsnp->lxsys_time = now;
+ lxsnp->lxsys_uid = lxsnp->lxsys_gid = 0;
+ lxsnp->lxsys_ino = lxsys_inode(type, instance, endpoint);
+
+ /* initialize the vnode data */
+ vp = lxsnp->lxsys_vnode;
+ vn_reinit(vp);
+ vp->v_flag = VNOCACHE|VNOMAP|VNOSWAP|VNOMOUNT;
+ vp->v_vfsp = dp->v_vfsp;
+
+ /*
+ * Default to a directory with open permissions.
+ * Specific components will override this
+ */
+ if (type == LXSYS_STATIC && instance == LXSYS_INST_ROOT) {
+ vp->v_flag |= VROOT;
+ }
+ vp->v_type = VDIR;
+ lxsnp->lxsys_mode = 0555;
+
+ return (lxsnp);
+}
+
+lxsys_node_t *
+lxsys_getnode_static(vnode_t *dp, unsigned int instance)
+{
+ lxsys_mnt_t *lxsm = VTOLXSM(dp);
+ lxsys_node_t *lnp;
+
+ mutex_enter(&lxsm->lxsysm_lock);
+ lnp = lxsm->lxsysm_node;
+ while (1) {
+ if (lnp->lxsys_instance == instance) {
+ VERIFY(lnp->lxsys_parentvp == dp);
+
+ VN_HOLD(lnp->lxsys_vnode);
+ mutex_exit(&lxsm->lxsysm_lock);
+ return (lnp);
+ } else if (lnp->lxsys_next == NULL) {
+ break;
+ }
+ lnp = lnp->lxsys_next;
+ }
+
+ /*
+ * No persistent node found.
+ * Create one and add it to the end of the list.
+ */
+ lnp->lxsys_next = lxsys_getnode(dp, LXSYS_STATIC, instance, 0);
+ lnp = lnp->lxsys_next;
+ /* Allow mounts on static entries */
+ LXSTOV(lnp)->v_flag &= (~VNOMOUNT);
+
+ mutex_exit(&lxsm->lxsysm_lock);
+ return (lnp);
+}
+
+/* Clean up persistence for static lxsys_node */
+int
+lxsys_freenode_static(lxsys_node_t *lnp)
+{
+ lxsys_node_t *plnp;
+ vnode_t *vp = LXSTOV(lnp);
+ lxsys_mnt_t *lxsm = VTOLXSM(vp);
+
+ if (lnp->lxsys_instance == LXSYS_INST_ROOT) {
+ /*
+ * The root vnode does not need special cleanup since it
+ * anchors the list and is freed by lxsys_unmount.
+ */
+ return (0);
+ }
+
+ mutex_enter(&lxsm->lxsysm_lock);
+
+ /*
+ * It is possible that a different process acquired a fresh reference
+ * to this vnode via lookup while we were waiting on the lxsysm_lock.
+ * To avoid freeing the vnode out from under them, we will double-check
+ * v_count and bail from the fop_inactive if it was grabbed.
+ */
+ mutex_enter(&vp->v_lock);
+ if (vp->v_count != 1) {
+ VERIFY(vp->v_count > 0);
+
+ /* Release our hold before bailing out of lxsys_inactive */
+ vp->v_count--;
+
+ mutex_exit(&vp->v_lock);
+ mutex_exit(&lxsm->lxsysm_lock);
+ return (-1);
+ }
+ mutex_exit(&vp->v_lock);
+
+ /* search for the record pointing to lnp */
+ plnp = lxsm->lxsysm_node;
+ while (plnp != NULL && plnp->lxsys_next != lnp) {
+ plnp = plnp->lxsys_next;
+ }
+ /* entry should always be found */
+ VERIFY(plnp != NULL);
+ plnp->lxsys_next = lnp->lxsys_next;
+
+ mutex_exit(&lxsm->lxsysm_lock);
+ return (0);
+}
+
+/*
+ * Free the storage obtained from lxsys_getnode().
+ */
+void
+lxsys_freenode(lxsys_node_t *lxsnp)
+{
+ vnode_t *vp = LXSTOV(lxsnp);
+
+ VERIFY(vp != NULL);
+
+ if (lxsnp->lxsys_type == LXSYS_STATIC) {
+ if (lxsys_freenode_static(lxsnp) != 0) {
+ return;
+ }
+ }
+
+ /*
+ * delete any association with parent vp
+ */
+ if (lxsnp->lxsys_parentvp != NULL)
+ VN_RELE(lxsnp->lxsys_parentvp);
+
+ /*
+ * Release the lxsysnode.
+ */
+ kmem_cache_free(lxsys_node_cache, lxsnp);
+}
+
+/*
+ * Get the netstack associated with this lxsys mount
+ */
+netstack_t *
+lxsys_netstack(lxsys_node_t *lnp)
+{
+ zone_t *zone = VTOLXSM(LXSTOV(lnp))->lxsysm_zone;
+ netstack_t *ns = zone->zone_netstack;
+
+ VERIFY(ns != NULL);
+
+ if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) {
+ ns = NULL;
+ } else {
+ netstack_hold(ns);
+ }
+
+ return (ns);
+}
+
+ill_t *
+lxsys_find_ill(ip_stack_t *ipst, uint_t ifindex)
+{
+ ill_t *ill;
+ phyint_t *phyi;
+
+ rw_enter(&ipst->ips_ill_g_lock, RW_READER);
+ phyi = avl_find(&ipst->ips_phyint_g_list->phyint_list_avl_by_index,
+ (void *) &ifindex, NULL);
+ if (phyi != NULL) {
+ /*
+ * Since interface information presented via /sys is not
+ * specific to IPv4 or IPv6, an ill reference from either
+ * protocol will be adequate. Check both, starting with IPv4
+ * for a valid reference to use.
+ */
+ for (ill = phyi->phyint_illv4; ill != phyi->phyint_illv6;
+ ill = phyi->phyint_illv6) {
+ if (ill != NULL) {
+ mutex_enter(&ill->ill_lock);
+ if (!ILL_IS_CONDEMNED(ill)) {
+ ill_refhold_locked(ill);
+ mutex_exit(&ill->ill_lock);
+ rw_exit(&ipst->ips_ill_g_lock);
+ return (ill);
+ }
+ mutex_exit(&ill->ill_lock);
+ }
+ }
+ }
+ rw_exit(&ipst->ips_ill_g_lock);
+ return (NULL);
+}
+
+
+#define LXSYSUIOBUFSZ 4096
+
+lxsys_uiobuf_t *
+lxsys_uiobuf_new(uio_t *uiop)
+{
+ /* Allocate memory for both lxsys_uiobuf and output buffer */
+ int bufsize = LXSYSUIOBUFSZ;
+ lxsys_uiobuf_t *uiobuf =
+ kmem_alloc(sizeof (lxsys_uiobuf_t) + bufsize, KM_SLEEP);
+
+ uiobuf->uiop = uiop;
+ uiobuf->buffer = (char *)&uiobuf[1];
+ uiobuf->bufsize = bufsize;
+ uiobuf->pos = uiobuf->buffer;
+ uiobuf->beg = 0;
+ uiobuf->error = 0;
+
+ return (uiobuf);
+}
+
+void
+lxsys_uiobuf_free(lxsys_uiobuf_t *uiobuf)
+{
+ ASSERT(uiobuf != NULL);
+ ASSERT(uiobuf->pos == uiobuf->buffer);
+
+ kmem_free(uiobuf, sizeof (lxsys_uiobuf_t) + uiobuf->bufsize);
+}
+
+void
+lxsys_uiobuf_seterr(lxsys_uiobuf_t *uiobuf, int err)
+{
+ ASSERT(uiobuf->error == 0);
+
+ uiobuf->error = err;
+}
+
+int
+lxsys_uiobuf_flush(lxsys_uiobuf_t *uiobuf)
+{
+ off_t off = uiobuf->uiop->uio_offset;
+ caddr_t uaddr = uiobuf->buffer;
+ size_t beg = uiobuf->beg;
+ size_t size = (uintptr_t)uiobuf->pos - (uintptr_t)uaddr;
+
+ if (uiobuf->error == 0 && uiobuf->uiop->uio_resid != 0) {
+ ASSERT(off >= beg);
+
+ if (beg + size > off && off >= 0)
+ uiobuf->error =
+ uiomove(uaddr + (off - beg), size - (off - beg),
+ UIO_READ, uiobuf->uiop);
+
+ uiobuf->beg += size;
+ }
+
+ uiobuf->pos = uaddr;
+
+ return (uiobuf->error);
+}
+
+void
+lxsys_uiobuf_write(lxsys_uiobuf_t *uiobuf, const char *buf, size_t size)
+{
+ /* While we can still carry on */
+ while (uiobuf->error == 0 && uiobuf->uiop->uio_resid != 0) {
+ uintptr_t remain = (uintptr_t)uiobuf->bufsize -
+ ((uintptr_t)uiobuf->pos - (uintptr_t)uiobuf->buffer);
+
+ /* Enough space in buffer? */
+ if (remain >= size) {
+ bcopy(buf, uiobuf->pos, size);
+ uiobuf->pos += size;
+ return;
+ }
+
+ /* Not enough space, so copy all we can and try again */
+ bcopy(buf, uiobuf->pos, remain);
+ uiobuf->pos += remain;
+ (void) lxsys_uiobuf_flush(uiobuf);
+ buf += remain;
+ size -= remain;
+ }
+}
+
+#define TYPBUFFSIZE 256
+
+void
+lxsys_uiobuf_printf(lxsys_uiobuf_t *uiobuf, const char *fmt, ...)
+{
+ va_list args;
+ char buff[TYPBUFFSIZE];
+ int len;
+ char *buffer;
+
+ /* Can we still do any output */
+ if (uiobuf->error != 0 || uiobuf->uiop->uio_resid == 0)
+ return;
+
+ va_start(args, fmt);
+
+ /* Try using stack allocated buffer */
+ len = vsnprintf(buff, TYPBUFFSIZE, fmt, args);
+ if (len < TYPBUFFSIZE) {
+ va_end(args);
+ lxsys_uiobuf_write(uiobuf, buff, len);
+ return;
+ }
+
+ /* Not enough space in pre-allocated buffer */
+ buffer = kmem_alloc(len + 1, KM_SLEEP);
+
+ /*
+ * We know we allocated the correct amount of space
+ * so no check on the return value
+ */
+ (void) vsnprintf(buffer, len+1, fmt, args);
+ lxsys_uiobuf_write(uiobuf, buffer, len);
+ va_end(args);
+ kmem_free(buffer, len+1);
+}
diff --git a/usr/src/uts/common/brand/lx/sysfs/lx_sysvfsops.c b/usr/src/uts/common/brand/lx/sysfs/lx_sysvfsops.c
new file mode 100644
index 0000000000..9bb1d70527
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sysfs/lx_sysvfsops.c
@@ -0,0 +1,348 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+/*
+ * lxsysvfsops.c: vfs operations for lx sysfs.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/cmn_err.h>
+#include <sys/cred.h>
+#include <sys/debug.h>
+#include <sys/errno.h>
+#include <sys/proc.h>
+#include <sys/stat.h>
+#include <sys/statvfs.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+#include <sys/var.h>
+#include <sys/vfs.h>
+#include <sys/vfs_opreg.h>
+#include <sys/vnode.h>
+#include <sys/mode.h>
+#include <sys/signal.h>
+#include <sys/user.h>
+#include <sys/mount.h>
+#include <sys/bitmap.h>
+#include <sys/kmem.h>
+#include <sys/policy.h>
+#include <sys/modctl.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+#include <sys/lx_impl.h>
+
+#include "lx_sysfs.h"
+
+/* Module level parameters */
+static int lxsysfstype;
+static dev_t lxsysdev;
+static kmutex_t lxsys_mount_lock;
+
+static int lxsys_mount(vfs_t *, vnode_t *, mounta_t *, cred_t *);
+static int lxsys_unmount(vfs_t *, int, cred_t *);
+static int lxsys_root(vfs_t *, vnode_t **);
+static int lxsys_statvfs(vfs_t *, statvfs64_t *);
+static int lxsys_init(int, char *);
+
+static vfsdef_t vfw = {
+ VFSDEF_VERSION,
+ "lx_sysfs",
+ lxsys_init,
+ VSW_ZMOUNT,
+ NULL
+};
+
+/*
+ * Module linkage information for the kernel.
+ */
+extern struct mod_ops mod_fsops;
+
+static struct modlfs modlfs = {
+ &mod_fsops, "lx brand sysfs", &vfw
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, (void *)&modlfs, NULL
+};
+
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int retval;
+
+ /*
+ * attempt to unload the module
+ */
+ if ((retval = mod_remove(&modlinkage)) != 0)
+ goto done;
+
+ /*
+ * destroy lxsys_node cache
+ */
+ lxsys_fininodecache();
+
+ /*
+ * clean out the vfsops and vnodeops
+ */
+ (void) vfs_freevfsops_by_type(lxsysfstype);
+ vn_freevnodeops(lxsys_vnodeops);
+
+ mutex_destroy(&lxsys_mount_lock);
+done:
+ return (retval);
+}
+
+static int
+lxsys_init(int fstype, char *name)
+{
+ static const fs_operation_def_t lxsys_vfsops_template[] = {
+ VFSNAME_MOUNT, { .vfs_mount = lxsys_mount },
+ VFSNAME_UNMOUNT, { .vfs_unmount = lxsys_unmount },
+ VFSNAME_ROOT, { .vfs_root = lxsys_root },
+ VFSNAME_STATVFS, { .vfs_statvfs = lxsys_statvfs },
+ NULL, NULL
+ };
+ extern const fs_operation_def_t lxsys_vnodeops_template[];
+ int error;
+ major_t dev;
+
+ lxsysfstype = fstype;
+ ASSERT(lxsysfstype != 0);
+
+ mutex_init(&lxsys_mount_lock, NULL, MUTEX_DEFAULT, NULL);
+
+ /*
+ * Associate VFS ops vector with this fstype.
+ */
+ error = vfs_setfsops(fstype, lxsys_vfsops_template, NULL);
+ if (error != 0) {
+ cmn_err(CE_WARN, "lxsys_init: bad vfs ops template");
+ return (error);
+ }
+
+ /*
+ * Set up vnode ops vector too.
+ */
+ error = vn_make_ops(name, lxsys_vnodeops_template, &lxsys_vnodeops);
+ if (error != 0) {
+ (void) vfs_freevfsops_by_type(fstype);
+ cmn_err(CE_WARN, "lxsys_init: bad vnode ops template");
+ return (error);
+ }
+
+ /*
+ * Assign a unique "device" number (used by stat(2)).
+ */
+ if ((dev = getudev()) == (major_t)-1) {
+ cmn_err(CE_WARN, "lxsys_init: can't get unique device number");
+ dev = 0;
+ }
+
+ /*
+ * Make the pseudo device
+ */
+ lxsysdev = makedevice(dev, 0);
+
+ /*
+ * Initialise cache for lxsys_nodes
+ */
+ lxsys_initnodecache();
+
+ return (0);
+}
+
+static int
+lxsys_mount(vfs_t *vfsp, vnode_t *mvp, mounta_t *uap, cred_t *cr)
+{
+ lxsys_mnt_t *lxsys_mnt;
+ zone_t *zone = curproc->p_zone;
+
+ /*
+ * must be root to mount
+ */
+ if (secpolicy_fs_mount(cr, mvp, vfsp) != 0)
+ return (EPERM);
+
+ /*
+ * mount point must be a directory
+ */
+ if (mvp->v_type != VDIR)
+ return (ENOTDIR);
+
+ if (zone == global_zone) {
+ zone_t *mntzone;
+
+ mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt));
+ zone_rele(mntzone);
+ if (zone != mntzone)
+ return (EBUSY);
+ }
+
+ /*
+ * Having the resource be anything but "lxsys" doesn't make sense
+ */
+ vfs_setresource(vfsp, "lxsys", 0);
+
+ lxsys_mnt = kmem_alloc(sizeof (*lxsys_mnt), KM_SLEEP);
+
+ mutex_enter(&lxsys_mount_lock);
+
+ /*
+ * Ensure we don't allow overlaying mounts
+ */
+ mutex_enter(&mvp->v_lock);
+ if ((uap->flags & MS_OVERLAY) == 0 &&
+ (mvp->v_count > 1 || (mvp->v_flag & VROOT))) {
+ mutex_exit(&mvp->v_lock);
+ mutex_exit(&lxsys_mount_lock);
+ kmem_free(lxsys_mnt, sizeof ((*lxsys_mnt)));
+ return (EBUSY);
+ }
+ mutex_exit(&mvp->v_lock);
+
+
+ mutex_init(&lxsys_mnt->lxsysm_lock, NULL, MUTEX_DEFAULT, NULL);
+ zone_hold(lxsys_mnt->lxsysm_zone = zone);
+
+ /* Arbitrarily set the parent vnode to the mounted over directory */
+ lxsys_mnt->lxsysm_node = lxsys_getnode(mvp, LXSYS_STATIC,
+ LXSYS_INST_ROOT, 0);
+ lxsys_mnt->lxsysm_node->lxsys_next = NULL;
+
+ /* Correctly set the fs for the root node */
+ lxsys_mnt->lxsysm_node->lxsys_vnode->v_vfsp = vfsp;
+
+ vfs_make_fsid(&vfsp->vfs_fsid, lxsysdev, lxsysfstype);
+ vfsp->vfs_bsize = DEV_BSIZE;
+ vfsp->vfs_fstype = lxsysfstype;
+ vfsp->vfs_data = (caddr_t)lxsys_mnt;
+ vfsp->vfs_dev = lxsysdev;
+
+ mutex_exit(&lxsys_mount_lock);
+
+ return (0);
+}
+
+static int
+lxsys_unmount(vfs_t *vfsp, int flag, cred_t *cr)
+{
+ lxsys_mnt_t *lxsys_mnt = (lxsys_mnt_t *)vfsp->vfs_data;
+ lxsys_node_t *lnp;
+ vnode_t *vp;
+ int count;
+
+ VERIFY(lxsys_mnt != NULL);
+
+ mutex_enter(&lxsys_mount_lock);
+
+ /* must be root to unmount */
+ if (secpolicy_fs_unmount(cr, vfsp) != 0) {
+ mutex_exit(&lxsys_mount_lock);
+ return (EPERM);
+ }
+
+ /* forced unmount is not supported by this fs */
+ if (flag & MS_FORCE) {
+ mutex_exit(&lxsys_mount_lock);
+ return (ENOTSUP);
+ }
+
+ /* Ensure that no vnodes are in use on this mount point. */
+ lnp = lxsys_mnt->lxsysm_node;
+ vp = LXSTOV(lnp);
+ mutex_enter(&vp->v_lock);
+ count = vp->v_count;
+ mutex_exit(&vp->v_lock);
+ if (count > 1) {
+ mutex_exit(&lxsys_mount_lock);
+ return (EBUSY);
+ }
+
+ /*
+ * If there are no references to the root vnode the list of persistent
+ * static vnodes should be empty
+ */
+ VERIFY(lnp->lxsys_next == NULL);
+
+ (void) dnlc_purge_vfsp(vfsp, 0);
+
+ lxsys_mnt->lxsysm_node = NULL;
+ lxsys_freenode(lnp);
+ zone_rele(lxsys_mnt->lxsysm_zone);
+ vfsp->vfs_data = NULL;
+ kmem_free(lxsys_mnt, sizeof (*lxsys_mnt));
+
+ mutex_exit(&lxsys_mount_lock);
+
+ return (0);
+}
+
+static int
+lxsys_root(vfs_t *vfsp, vnode_t **vpp)
+{
+ lxsys_mnt_t *lxsm = (lxsys_mnt_t *)vfsp->vfs_data;
+ vnode_t *vp;
+
+ VERIFY(lxsm != NULL);
+ VERIFY(lxsm->lxsysm_node != NULL);
+
+ vp = LXSTOV(lxsm->lxsysm_node);
+ VN_HOLD(vp);
+ *vpp = vp;
+
+ return (0);
+}
+
+static int
+lxsys_statvfs(vfs_t *vfsp, statvfs64_t *sp)
+{
+ dev32_t d32;
+
+ bzero((caddr_t)sp, sizeof (*sp));
+ sp->f_bsize = DEV_BSIZE;
+ sp->f_frsize = DEV_BSIZE;
+ sp->f_blocks = (fsblkcnt64_t)0;
+ sp->f_bfree = (fsblkcnt64_t)0;
+ sp->f_bavail = (fsblkcnt64_t)0;
+ sp->f_files = (fsfilcnt64_t)3;
+ sp->f_ffree = (fsfilcnt64_t)0; /* none */
+ sp->f_favail = (fsfilcnt64_t)0; /* none */
+ (void) cmpldev(&d32, vfsp->vfs_dev);
+ sp->f_fsid = d32;
+ /* It is guaranteed that vsw_name will fit in f_basetype */
+ (void) strcpy(sp->f_basetype, vfssw[lxsysfstype].vsw_name);
+ sp->f_flag = vf_to_stf(vfsp->vfs_flag);
+ sp->f_namemax = 64; /* quite arbitrary */
+ bzero(sp->f_fstr, sizeof (sp->f_fstr));
+
+ /* We know f_fstr is 32 chars */
+ (void) strcpy(sp->f_fstr, "/sys");
+ (void) strcpy(&sp->f_fstr[6], "/sys");
+
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/sysfs/lx_sysvnops.c b/usr/src/uts/common/brand/lx/sysfs/lx_sysvnops.c
new file mode 100644
index 0000000000..f3df77428c
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sysfs/lx_sysvnops.c
@@ -0,0 +1,1796 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+/*
+ * lx_sysfs -- a Linux-compatible /sys for the LX brand
+ */
+
+#include <vm/seg_vn.h>
+#include <sys/sdt.h>
+#include <sys/strlog.h>
+#include <sys/stropts.h>
+#include <sys/cmn_err.h>
+#include <sys/lx_brand.h>
+#include <sys/x86_archext.h>
+#include <sys/archsystm.h>
+#include <sys/fp.h>
+#include <sys/pool_pset.h>
+#include <sys/pset.h>
+#include <sys/zone.h>
+#include <sys/pghw.h>
+#include <sys/vfs_opreg.h>
+#include <sys/param.h>
+#include <sys/utsname.h>
+#include <sys/lx_misc.h>
+#include <sys/brand.h>
+#include <sys/cred_impl.h>
+#include <sys/tihdr.h>
+#include <sys/sunddi.h>
+#include <sys/vnode.h>
+#include <sys/netstack.h>
+#include <sys/ethernet.h>
+#include <inet/ip_arp.h>
+
+#include "lx_sysfs.h"
+
+/*
+ * Pointer to the vnode ops vector for this fs.
+ * This is instantiated in lxsys_init() in lx_sysvfsops.c
+ */
+vnodeops_t *lxsys_vnodeops;
+
+static int lxsys_open(vnode_t **, int, cred_t *, caller_context_t *);
+static int lxsys_close(vnode_t *, int, int, offset_t, cred_t *,
+ caller_context_t *);
+static int lxsys_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
+static int lxsys_getattr(vnode_t *, vattr_t *, int, cred_t *,
+ caller_context_t *);
+static int lxsys_access(vnode_t *, int, int, cred_t *, caller_context_t *);
+static int lxsys_lookup(vnode_t *, char *, vnode_t **,
+ pathname_t *, int, vnode_t *, cred_t *, caller_context_t *, int *,
+ pathname_t *);
+static int lxsys_readdir(vnode_t *, uio_t *, cred_t *, int *,
+ caller_context_t *, int);
+static int lxsys_readlink(vnode_t *, uio_t *, cred_t *, caller_context_t *);
+static int lxsys_cmp(vnode_t *, vnode_t *, caller_context_t *);
+static int lxsys_sync(void);
+static void lxsys_inactive(vnode_t *, cred_t *, caller_context_t *);
+
+static vnode_t *lxsys_lookup_static(lxsys_node_t *, char *);
+static vnode_t *lxsys_lookup_class_netdir(lxsys_node_t *, char *);
+static vnode_t *lxsys_lookup_devices_virtual_netdir(lxsys_node_t *, char *);
+static vnode_t *lxsys_lookup_blockdir(lxsys_node_t *, char *);
+static vnode_t *lxsys_lookup_devices_zfsdir(lxsys_node_t *, char *);
+static vnode_t *lxsys_lookup_devices_syscpu(lxsys_node_t *, char *);
+static vnode_t *lxsys_lookup_devices_syscpuinfo(lxsys_node_t *, char *);
+static vnode_t *lxsys_lookup_devices_sysnode(lxsys_node_t *, char *);
+
+static int lxsys_read_static(lxsys_node_t *, lxsys_uiobuf_t *);
+static int lxsys_read_devices_virtual_net(lxsys_node_t *, lxsys_uiobuf_t *);
+static int lxsys_read_devices_zfs_block(lxsys_node_t *, lxsys_uiobuf_t *);
+static int lxsys_read_devices_sysnode(lxsys_node_t *, lxsys_uiobuf_t *);
+
+static int lxsys_readdir_devices_syscpu(lxsys_node_t *, uio_t *, int *);
+static int lxsys_readdir_devices_syscpuinfo(lxsys_node_t *, uio_t *, int *);
+static int lxsys_readdir_devices_sysnode(lxsys_node_t *, uio_t *, int *);
+static int lxsys_readdir_static(lxsys_node_t *, uio_t *, int *);
+static int lxsys_readdir_class_netdir(lxsys_node_t *, uio_t *, int *);
+static int lxsys_readdir_devices_virtual_netdir(lxsys_node_t *, uio_t *, int *);
+static int lxsys_readdir_blockdir(lxsys_node_t *, uio_t *, int *);
+static int lxsys_readdir_devices_zfsdir(lxsys_node_t *, uio_t *, int *);
+
+static int lxsys_readlink_class_net(lxsys_node_t *, char *, size_t);
+static int lxsys_readlink_block(lxsys_node_t *, char *, size_t);
+
+/*
+ * The lx /sys vnode operations vector
+ */
+const fs_operation_def_t lxsys_vnodeops_template[] = {
+ VOPNAME_OPEN, { .vop_open = lxsys_open },
+ VOPNAME_CLOSE, { .vop_close = lxsys_close },
+ VOPNAME_READ, { .vop_read = lxsys_read },
+ VOPNAME_GETATTR, { .vop_getattr = lxsys_getattr },
+ VOPNAME_ACCESS, { .vop_access = lxsys_access },
+ VOPNAME_LOOKUP, { .vop_lookup = lxsys_lookup },
+ VOPNAME_READDIR, { .vop_readdir = lxsys_readdir },
+ VOPNAME_READLINK, { .vop_readlink = lxsys_readlink },
+ VOPNAME_FSYNC, { .error = lxsys_sync },
+ VOPNAME_SEEK, { .error = lxsys_sync },
+ VOPNAME_INACTIVE, { .vop_inactive = lxsys_inactive },
+ VOPNAME_CMP, { .vop_cmp = lxsys_cmp },
+ NULL, NULL
+};
+
+/*
+ * Sysfs Inode format:
+ * 0000AABBBBCC
+ *
+ * AA - TYPE
+ * BBBB - INSTANCE
+ * CC - ENDPOINT
+ *
+ * Where TYPE is one of:
+ * 1 - SYS_STATIC
+ * 2 - SYS_CLASS_NET
+ * 3 - SYS_DEV_NET
+ * 4 - SYS_BLOCK
+ * 5 - SYS_DEV_ZFS
+ * 6 - SYS_DEV_SYS_CPU
+ * 7 - SYS_DEV_SYS_CPUINFO
+ * 8 - SYS_DEV_SYS_NODE
+ *
+ * Static entries will have assigned INSTANCE identifiers:
+ * - 0x00: /sys
+ * - 0x01: /sys/class
+ * - 0x02: /sys/devices
+ * - 0x03: /sys/fs
+ * - 0x04: /sys/class/net
+ * - 0x05: /sys/devices/virtual
+ * - 0x06: /sys/devices/system
+ * - 0x07: /sys/fs/cgroup
+ * - 0x08: /sys/devices/virtual/net
+ * - 0x09: /sys/block
+ * - 0x0a: /sys/devices/zfs
+ * - 0x0b: /sys/devices/system/cpu
+ * - 0x0c: /sys/devices/system/cpu/kernel_max
+ * - 0x0d: /sys/devices/system/node
+ *
+ * Dynamic /sys/class/net/<interface> symlinks will use an INSTANCE derived
+ * from the corresonding ifindex.
+ *
+ * Dynamic /sys/devices/virtual/net/<interface>/<entries> directories will use
+ * an INSTANCE derived from the ifindex and statically assigned ENDPOINT IDs
+ * for the contained entries.
+ *
+ * Dynamic /sys/block/<dev> symlinks will use an INSTANCE derived from the
+ * device major and instance from records listed in kstat or zvols.
+ *
+ * Dynamic /sys/devices/zfs/<dev> directories will use an INSTANCE derived from
+ * the emulated minor number.
+ *
+ * Static/Dynamic /sys/devices/system/cpu contains a static kernel_max file
+ * and a dynamic set of cpuN subdirectories.
+ *
+ * Static/Dynamic /sys/devices/system/node/node0 currently only contains a
+ * static cpulist file, but will likely need future dynamic entries for cpuN
+ * symlinks, and perhaps other static files. By only providing 'node0' we
+ * pretend that there is only a single NUMA node available to a zone (trying to
+ * be NUMA-aware inside a zone is generally not going to work anyway).
+ */
+
+#define LXSYS_INST_CLASSDIR 0x1
+#define LXSYS_INST_DEVICESDIR 0x2
+#define LXSYS_INST_FSDIR 0x3
+#define LXSYS_INST_CLASS_NETDIR 0x4
+#define LXSYS_INST_DEVICES_VIRTUALDIR 0x5
+#define LXSYS_INST_DEVICES_SYSTEMDIR 0x6
+#define LXSYS_INST_FS_CGROUPDIR 0x7
+#define LXSYS_INST_DEVICES_VIRTUAL_NETDIR 0x8
+#define LXSYS_INST_BLOCKDIR 0x9
+#define LXSYS_INST_DEVICES_ZFSDIR 0xa
+#define LXSYS_INST_DEVICES_SYSCPU 0xb
+#define LXSYS_INST_DEV_SYSCPU_KMAX 0xc
+#define LXSYS_INST_DEVICES_SYSNODE 0xd
+
+/*
+ * file contents of an lx /sys directory.
+ */
+static lxsys_dirent_t dirlist_root[] = {
+ { LXSYS_INST_BLOCKDIR, "block" },
+ { LXSYS_INST_CLASSDIR, "class" },
+ { LXSYS_INST_DEVICESDIR, "devices" },
+ { LXSYS_INST_FSDIR, "fs" }
+};
+static lxsys_dirent_t dirlist_empty[] = {};
+static lxsys_dirent_t dirlist_class[] = {
+ { LXSYS_INST_CLASS_NETDIR, "net" }
+};
+static lxsys_dirent_t dirlist_fs[] = {
+ { LXSYS_INST_FS_CGROUPDIR, "cgroup" }
+};
+static lxsys_dirent_t dirlist_devices[] = {
+ { LXSYS_INST_DEVICES_SYSTEMDIR, "system" },
+ { LXSYS_INST_DEVICES_VIRTUALDIR, "virtual" },
+ { LXSYS_INST_DEVICES_ZFSDIR, "zfs" }
+};
+static lxsys_dirent_t dirlist_devices_virtual[] = {
+ { LXSYS_INST_DEVICES_VIRTUAL_NETDIR, "net" }
+};
+
+/*
+ * XXX: The presence of the cpu tree in sysfs triggers new behavior in various
+ * applications. The glibc code which accesses this part of the tree expects
+ * dirents to have the d_type field populated. We cannot implement the 'cpu'
+ * hierarchy until that is addressed. One such application is java, which
+ * becomes unstable due to the incorrect data from glibc.
+ */
+static lxsys_dirent_t dirlist_devices_system[] = {
+ /* { LXSYS_INST_DEVICES_SYSCPU, "cpu" }, */
+ { LXSYS_INST_DEVICES_SYSNODE, "node" }
+};
+
+#define LXSYS_ENDP_NET_ADDRESS 1
+#define LXSYS_ENDP_NET_ADDRLEN 2
+#define LXSYS_ENDP_NET_FLAGS 3
+#define LXSYS_ENDP_NET_IFINDEX 4
+#define LXSYS_ENDP_NET_MTU 5
+#define LXSYS_ENDP_NET_TXQLEN 6
+#define LXSYS_ENDP_NET_TYPE 7
+
+#define LXSYS_ENDP_BLOCK_DEVICE 1
+
+#define LXSYS_ENDP_NODE_CPULIST 1
+
+static lxsys_dirent_t dirlist_devices_virtual_net[] = {
+ { LXSYS_ENDP_NET_ADDRESS, "address" },
+ { LXSYS_ENDP_NET_ADDRLEN, "addr_len" },
+ { LXSYS_ENDP_NET_FLAGS, "flags" },
+ { LXSYS_ENDP_NET_IFINDEX, "ifindex" },
+ { LXSYS_ENDP_NET_MTU, "mtu" },
+ { LXSYS_ENDP_NET_TXQLEN, "tx_queue_len" },
+ { LXSYS_ENDP_NET_TYPE, "type" }
+};
+
+static lxsys_dirent_t dirlist_devices_zfs_block[] = {
+ { LXSYS_ENDP_BLOCK_DEVICE, "device" }
+};
+
+static lxsys_dirent_t dirlist_devices_sysnode[] = {
+ { LXSYS_ENDP_NODE_CPULIST, "cpulist" }
+};
+
+#define SYSDIRLISTSZ(l) (sizeof (l) / sizeof ((l)[0]))
+
+#define SYSDLENT(i, l) { i, l, SYSDIRLISTSZ(l) }
+static lxsys_dirlookup_t lxsys_dirlookup[] = {
+ SYSDLENT(LXSYS_INST_ROOT, dirlist_root),
+ SYSDLENT(LXSYS_INST_CLASSDIR, dirlist_class),
+ SYSDLENT(LXSYS_INST_FSDIR, dirlist_fs),
+ SYSDLENT(LXSYS_INST_FS_CGROUPDIR, dirlist_empty),
+ SYSDLENT(LXSYS_INST_DEVICESDIR, dirlist_devices),
+ SYSDLENT(LXSYS_INST_DEVICES_SYSTEMDIR, dirlist_devices_system),
+ SYSDLENT(LXSYS_INST_DEVICES_VIRTUALDIR, dirlist_devices_virtual),
+ SYSDLENT(LXSYS_INST_DEVICES_SYSNODE, dirlist_devices_sysnode)
+};
+
+
+/*
+ * Array of lookup functions, indexed by lx /sys file type.
+ */
+static vnode_t *(*lxsys_lookup_function[LXSYS_MAXTYPE])() = {
+ NULL, /* LXSYS_NONE */
+ lxsys_lookup_static, /* LXSYS_STATIC */
+ lxsys_lookup_class_netdir, /* LXSYS_CLASS_NET */
+ lxsys_lookup_devices_virtual_netdir, /* LXSYS_DEV_NET */
+ lxsys_lookup_blockdir, /* LXSYS_BLOCK */
+ lxsys_lookup_devices_zfsdir, /* LXSYS_DEV_ZFS */
+ lxsys_lookup_devices_syscpu, /* LXSYS_DEV_SYS_CPU */
+ lxsys_lookup_devices_syscpuinfo, /* LXSYS_DEV_SYS_CPUINFO */
+ lxsys_lookup_devices_sysnode, /* LXSYS_DEV_SYS_NODE */
+};
+
+/*
+ * Array of readdir functions, indexed by /sys file type.
+ */
+static int (*lxsys_readdir_function[LXSYS_MAXTYPE])() = {
+ NULL, /* LXSYS_NONE */
+ lxsys_readdir_static, /* LXSYS_STATIC */
+ lxsys_readdir_class_netdir, /* LXSYS_CLASS_NET */
+ lxsys_readdir_devices_virtual_netdir, /* LXSYS_DEV_NET */
+ lxsys_readdir_blockdir, /* LXSYS_BLOCK */
+ lxsys_readdir_devices_zfsdir, /* LXSYS_DEV_ZFS */
+ lxsys_readdir_devices_syscpu, /* LXSYS_DEV_SYS_CPU */
+ lxsys_readdir_devices_syscpuinfo, /* LXSYS_DEV_SYS_CPUINFO */
+ lxsys_readdir_devices_sysnode, /* LXSYS_DEV_SYS_NODE */
+};
+
+/*
+ * Array of read functions, indexed by /sys file type.
+ */
+static int (*lxsys_read_function[LXSYS_MAXTYPE])() = {
+ NULL, /* LXSYS_NONE */
+ lxsys_read_static, /* LXSYS_STATIC */
+ NULL, /* LXSYS_CLASS_NET */
+ lxsys_read_devices_virtual_net, /* LXSYS_DEV_NET */
+ NULL, /* LXSYS_BLOCK */
+ lxsys_read_devices_zfs_block, /* LXSYS_DEV_ZFS */
+ NULL, /* LXSYS_DEV_SYS_CPU */
+ NULL, /* LXSYS_DEV_SYS_CPUINFO */
+ lxsys_read_devices_sysnode, /* LXSYS_DEV_SYS_NODE */
+};
+
+/*
+ * Array of readlink functions, indexed by /sys file type.
+ */
+static int (*lxsys_readlink_function[LXSYS_MAXTYPE])() = {
+ NULL, /* LXSYS_NONE */
+ NULL, /* LXSYS_STATIC */
+ lxsys_readlink_class_net, /* LXSYS_CLASS_NET */
+ NULL, /* LXSYS_DEV_NET */
+ lxsys_readlink_block, /* LXSYS_BLOCK */
+ NULL, /* LXSYS_DEV_ZFS */
+ NULL, /* LXSYS_DEV_SYS_CPU */
+ NULL, /* LXSYS_DEV_SYS_CPUINFO */
+ NULL, /* LXSYS_DEV_SYS_NODE */
+};
+
+typedef struct lxsys_cpu_info {
+ processorid_t cpu_id;
+ processorid_t cpu_seqid;
+} lxsys_cpu_info_t;
+
+/*
+ * lxsys_open(): Vnode operation for VOP_OPEN()
+ */
+static int
+lxsys_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
+{
+ /*
+ * We only allow reading in this file system
+ */
+ if (flag & FWRITE)
+ return (EROFS);
+
+ return (0);
+}
+
+
+/*
+ * lxsys_close(): Vnode operation for VOP_CLOSE()
+ */
+/* ARGSUSED */
+static int
+lxsys_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
+ caller_context_t *ct)
+{
+ return (0);
+}
+
+
+/*
+ * lxsys_read(): Vnode operation for VOP_READ()
+ * All we currently have in this fs are directories.
+ */
+/* ARGSUSED */
+static int
+lxsys_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
+ caller_context_t *ct)
+{
+ lxsys_node_t *lnp = VTOLXS(vp);
+ lxsys_nodetype_t type = lnp->lxsys_type;
+ int (*rlfunc)();
+ int error;
+ lxsys_uiobuf_t *luio;
+
+ VERIFY(type > LXSYS_NONE && type < LXSYS_MAXTYPE);
+
+ if (vp->v_type == VDIR) {
+ return (EISDIR);
+ }
+
+ rlfunc = lxsys_read_function[type];
+ if (rlfunc != NULL) {
+ luio = lxsys_uiobuf_new(uiop);
+ if ((error = rlfunc(lnp, luio)) == 0) {
+ error = lxsys_uiobuf_flush(luio);
+ }
+ lxsys_uiobuf_free(luio);
+ } else {
+ error = EIO;
+ }
+
+ return (error);
+}
+
+/*
+ * lxsys_getattr(): Vnode operation for VOP_GETATTR()
+ */
+static int
+lxsys_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ register lxsys_node_t *lxsnp = VTOLXS(vp);
+
+ /* Default attributes, that may be overridden below */
+ bzero(vap, sizeof (*vap));
+ vap->va_atime = vap->va_mtime = vap->va_ctime = lxsnp->lxsys_time;
+ vap->va_nlink = 1;
+ vap->va_type = vp->v_type;
+ vap->va_mode = lxsnp->lxsys_mode;
+ vap->va_fsid = vp->v_vfsp->vfs_dev;
+ vap->va_blksize = DEV_BSIZE;
+ vap->va_uid = lxsnp->lxsys_uid;
+ vap->va_gid = lxsnp->lxsys_gid;
+ vap->va_nodeid = lxsnp->lxsys_ino;
+
+ vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size);
+ return (0);
+}
+
+/*
+ * lxsys_access(): Vnode operation for VOP_ACCESS()
+ */
+static int
+lxsys_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
+{
+ lxsys_node_t *lxsnp = VTOLXS(vp);
+ int shift = 0;
+
+ /*
+ * Although our lx sysfs is basically a read only file system, Linux
+ * expects it to be writable so we can't just error if (mode & VWRITE).
+ */
+
+ /* If user is root allow access regardless of permission bits */
+ if (secpolicy_proc_access(cr) == 0)
+ return (0);
+
+ /*
+ * Access check is based on only one of owner, group, public. If not
+ * owner, then check group. If not a member of the group, then check
+ * public access.
+ */
+ if (crgetuid(cr) != lxsnp->lxsys_uid) {
+ shift += 3;
+ if (!groupmember((uid_t)lxsnp->lxsys_gid, cr))
+ shift += 3;
+ }
+
+ mode &= ~(lxsnp->lxsys_mode << shift);
+
+ if (mode == 0)
+ return (0);
+
+ return (EACCES);
+}
+
+/*
+ * lxsys_lookup(): Vnode operation for VOP_LOOKUP()
+ */
+/* ARGSUSED */
+static int
+lxsys_lookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pathp,
+ int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
+ int *direntflags, pathname_t *realpnp)
+{
+ lxsys_node_t *lxsnp = VTOLXS(dp);
+ lxsys_nodetype_t type = lxsnp->lxsys_type;
+ int error;
+
+ VERIFY(dp->v_type == VDIR);
+ VERIFY(type > LXSYS_NONE && type < LXSYS_MAXTYPE);
+
+ /*
+ * restrict lookup permission to owner or root
+ */
+ if ((error = lxsys_access(dp, VEXEC, 0, cr, ct)) != 0) {
+ return (error);
+ }
+
+ /*
+ * Just return the parent vnode if that's where we are trying to go.
+ */
+ if (strcmp(comp, "..") == 0) {
+ VN_HOLD(lxsnp->lxsys_parentvp);
+ *vpp = lxsnp->lxsys_parentvp;
+ return (0);
+ }
+
+ /*
+ * Special handling for directory searches. Note: null component name
+ * denotes that the current directory is being searched.
+ */
+ if ((dp->v_type == VDIR) && (*comp == '\0' || strcmp(comp, ".") == 0)) {
+ VN_HOLD(dp);
+ *vpp = dp;
+ return (0);
+ }
+
+ *vpp = (lxsys_lookup_function[type](lxsnp, comp));
+ return ((*vpp == NULL) ? ENOENT : 0);
+}
+
+static lxsys_node_t *
+lxsys_lookup_disk(lxsys_node_t *ldp, char *comp, lxsys_nodetype_t type)
+{
+ lxsys_node_t *lnp = NULL;
+ lx_zone_data_t *lxzdata;
+ lx_virt_disk_t *vd;
+
+ lxzdata = ztolxzd(curproc->p_zone);
+ if (lxzdata == NULL)
+ return (NULL);
+ ASSERT(lxzdata->lxzd_vdisks != NULL);
+
+ vd = list_head(lxzdata->lxzd_vdisks);
+ while (vd != NULL) {
+ int inst = getminor(vd->lxvd_emul_dev) & 0xffff;
+
+ if (strcmp(vd->lxvd_name, comp) == 0 && inst != 0) {
+ lnp = lxsys_getnode(ldp->lxsys_vnode, type, inst, 0);
+ break;
+ }
+
+ vd = list_next(lxzdata->lxzd_vdisks, vd);
+ }
+
+ return (lnp);
+}
+
+static vnode_t *
+lxsys_lookup_static(lxsys_node_t *ldp, char *comp)
+{
+ lxsys_dirent_t *dirent = NULL;
+ int i, len = 0;
+
+ for (i = 0; i < SYSDIRLISTSZ(lxsys_dirlookup); i++) {
+ if (ldp->lxsys_instance == lxsys_dirlookup[i].dl_instance) {
+ dirent = lxsys_dirlookup[i].dl_list;
+ len = lxsys_dirlookup[i].dl_length;
+ break;
+ }
+ }
+ if (dirent == NULL) {
+ return (NULL);
+ }
+
+ for (i = 0; i < len; i++) {
+ if (strncmp(comp, dirent[i].d_name, MAXPATHLEN) == 0) {
+ lxsys_nodetype_t node_type = ldp->lxsys_type;
+ unsigned int node_instance = 0;
+ lxsys_node_t *lnp;
+
+ switch (dirent[i].d_idnum) {
+ case LXSYS_INST_BLOCKDIR:
+ node_type = LXSYS_BLOCK;
+ break;
+ case LXSYS_INST_CLASS_NETDIR:
+ node_type = LXSYS_CLASS_NET;
+ break;
+ case LXSYS_INST_DEVICES_VIRTUAL_NETDIR:
+ node_type = LXSYS_DEV_NET;
+ break;
+ case LXSYS_INST_DEVICES_ZFSDIR:
+ node_type = LXSYS_DEV_ZFS;
+ break;
+ case LXSYS_INST_DEVICES_SYSCPU:
+ node_type = LXSYS_DEV_SYS_CPU;
+ break;
+ case LXSYS_INST_DEVICES_SYSNODE:
+ node_type = LXSYS_DEV_SYS_NODE;
+ break;
+ default:
+ /* Another static node */
+ node_instance = dirent[i].d_idnum;
+ }
+ if (node_type == LXSYS_STATIC) {
+ lnp = lxsys_getnode_static(ldp->lxsys_vnode,
+ node_instance);
+ } else {
+ lnp = lxsys_getnode(ldp->lxsys_vnode,
+ node_type, node_instance, 0);
+ }
+ return (lnp->lxsys_vnode);
+ }
+ }
+ return (NULL);
+}
+
+static vnode_t *
+lxsys_lookup_class_netdir(lxsys_node_t *ldp, char *comp)
+{
+ vnode_t *result = NULL;
+ lxsys_node_t *lnp;
+ netstack_t *ns;
+ ip_stack_t *ipst;
+ avl_tree_t *phytree;
+ phyint_t *phyi;
+ char ifname[LIFNAMSIZ];
+
+ if (ldp->lxsys_type != LXSYS_CLASS_NET ||
+ ldp->lxsys_instance != 0) {
+ /* Lookups only allowed at directory level */
+ return (NULL);
+ }
+
+ (void) strncpy(ifname, comp, LIFNAMSIZ);
+ lx_ifname_convert(ifname, LX_IF_TONATIVE);
+
+ if ((ns = lxsys_netstack(ldp)) == NULL) {
+ return (NULL);
+ }
+ ipst = ns->netstack_ip;
+ rw_enter(&ipst->ips_ill_g_lock, RW_READER);
+
+ phytree = &ipst->ips_phyint_g_list->phyint_list_avl_by_name;
+ phyi = avl_find(phytree, ifname, NULL);
+ if (phyi != NULL) {
+ lnp = lxsys_getnode(ldp->lxsys_vnode, ldp->lxsys_type,
+ phyi->phyint_ifindex, 0);
+ result = lnp->lxsys_vnode;
+ result->v_type = VLNK;
+ }
+
+ rw_exit(&ipst->ips_ill_g_lock);
+ netstack_rele(ns);
+
+ return (result);
+}
+
+static vnode_t *
+lxsys_lookup_devices_virtual_netdir(lxsys_node_t *ldp, char *comp)
+{
+ lxsys_node_t *lnp;
+
+ if (ldp->lxsys_instance == 0) {
+ /* top-level interface listing */
+ vnode_t *result = NULL;
+ netstack_t *ns;
+ ip_stack_t *ipst;
+ avl_tree_t *phytree;
+ phyint_t *phyi;
+ char ifname[LIFNAMSIZ];
+
+ (void) strncpy(ifname, comp, LIFNAMSIZ);
+ lx_ifname_convert(ifname, LX_IF_TONATIVE);
+
+ if ((ns = lxsys_netstack(ldp)) == NULL) {
+ return (NULL);
+ }
+ ipst = ns->netstack_ip;
+ rw_enter(&ipst->ips_ill_g_lock, RW_READER);
+
+ phytree = &ipst->ips_phyint_g_list->phyint_list_avl_by_name;
+ phyi = avl_find(phytree, ifname, NULL);
+ if (phyi != NULL) {
+ lnp = lxsys_getnode(ldp->lxsys_vnode, ldp->lxsys_type,
+ phyi->phyint_ifindex, 0);
+ result = lnp->lxsys_vnode;
+ }
+
+ rw_exit(&ipst->ips_ill_g_lock);
+ netstack_rele(ns);
+
+ return (result);
+ } else if (ldp->lxsys_endpoint == 0) {
+ /* interface-level sub-item listing */
+ int i, size;
+ lxsys_dirent_t *dirent;
+
+ size = SYSDIRLISTSZ(dirlist_devices_virtual_net);
+ for (i = 0; i < size; i++) {
+ dirent = &dirlist_devices_virtual_net[i];
+ if (strncmp(comp, dirent->d_name, LXSNSIZ) == 0) {
+ lnp = lxsys_getnode(ldp->lxsys_vnode,
+ ldp->lxsys_type, ldp->lxsys_instance,
+ dirent->d_idnum);
+ lnp->lxsys_vnode->v_type = VREG;
+ lnp->lxsys_mode = 0444;
+ return (lnp->lxsys_vnode);
+ }
+ }
+ }
+
+ return (NULL);
+}
+
+static vnode_t *
+lxsys_lookup_blockdir(lxsys_node_t *ldp, char *comp)
+{
+ lxsys_node_t *lnp;
+
+ if (ldp->lxsys_instance == 0) {
+ /* top-level dev listing */
+ lnp = lxsys_lookup_disk(ldp, comp, LXSYS_BLOCK);
+
+ if (lnp != NULL) {
+ lnp->lxsys_vnode->v_type = VLNK;
+ return (lnp->lxsys_vnode);
+ }
+ }
+
+ return (NULL);
+}
+
+static vnode_t *
+lxsys_lookup_devices_zfsdir(lxsys_node_t *ldp, char *comp)
+{
+ lxsys_node_t *lnp;
+
+ if (ldp->lxsys_instance == 0) {
+ /* top-level dev listing */
+ lnp = lxsys_lookup_disk(ldp, comp, LXSYS_DEV_ZFS);
+
+ if (lnp != NULL) {
+ return (lnp->lxsys_vnode);
+ }
+ } else if (ldp->lxsys_endpoint == 0) {
+ /* disk-level sub-item listing */
+ int i, size;
+ lxsys_dirent_t *dirent;
+
+ /*
+ * All of these entries currently look like regular files
+ * but on a real Linux system some will be subdirs. This should
+ * be fixed when we populate the directory for real.
+ */
+ size = SYSDIRLISTSZ(dirlist_devices_zfs_block);
+ for (i = 0; i < size; i++) {
+ dirent = &dirlist_devices_zfs_block[i];
+ if (strncmp(comp, dirent->d_name, LXSNSIZ) == 0) {
+ lnp = lxsys_getnode(ldp->lxsys_vnode,
+ ldp->lxsys_type, ldp->lxsys_instance,
+ dirent->d_idnum);
+ lnp->lxsys_vnode->v_type = VREG;
+ lnp->lxsys_mode = 0444;
+ return (lnp->lxsys_vnode);
+ }
+ }
+ }
+
+ return (NULL);
+}
+
+static vnode_t *
+lxsys_lookup_devices_syscpu(lxsys_node_t *ldp, char *comp)
+{
+ lxsys_node_t *lnp = NULL;
+
+ if (ldp->lxsys_instance == 0) {
+ /* top-level cpu listing */
+
+ /* If fixed entry */
+ if (strcmp(comp, "kernel_max") == 0) {
+ lnp = lxsys_getnode_static(ldp->lxsys_vnode,
+ LXSYS_INST_DEV_SYSCPU_KMAX);
+ lnp->lxsys_vnode->v_type = VREG;
+ lnp->lxsys_mode = 0444;
+ } else {
+ /* Else dynamic cpuN entry */
+ cpu_t *cp, *cpstart;
+ int pools_enabled;
+
+ mutex_enter(&cpu_lock);
+ pools_enabled = pool_pset_enabled();
+
+ cp = cpstart = CPU->cpu_part->cp_cpulist;
+ do {
+ char cpunm[16];
+
+ (void) snprintf(cpunm, sizeof (cpunm), "cpu%d",
+ cp->cpu_seqid);
+
+ if (strcmp(comp, cpunm) == 0) {
+ lnp = lxsys_getnode(ldp->lxsys_vnode,
+ LXSYS_DEV_SYS_CPUINFO,
+ cp->cpu_id + 1, 0);
+ break;
+ }
+ if (pools_enabled) {
+ cp = cp->cpu_next_part;
+ } else {
+ cp = cp->cpu_next;
+ }
+ } while (cp != cpstart);
+
+ mutex_exit(&cpu_lock);
+ }
+
+ if (lnp != NULL) {
+ return (lnp->lxsys_vnode);
+ }
+ } else if (ldp->lxsys_endpoint == 0) {
+ /* cpu-level sub-item listing, currently empty */
+ }
+
+ return (NULL);
+}
+
+static vnode_t *
+lxsys_lookup_devices_syscpuinfo(lxsys_node_t *ldp, char *comp)
+{
+ return (NULL);
+}
+
+static vnode_t *
+lxsys_lookup_devices_sysnode(lxsys_node_t *ldp, char *comp)
+{
+ lxsys_node_t *lnp = NULL;
+
+ if (ldp->lxsys_instance == 0) {
+ /*
+ * The system is presently represented as a single node,
+ * regardless of any NUMA topology which exists.
+ * The instances are offset by 1 to account for the top level
+ * directory occupying instance 0.
+ */
+ if (strcmp(comp, "node0") == 0) {
+ lnp = lxsys_getnode(ldp->lxsys_vnode, ldp->lxsys_type,
+ 1, 0);
+ return (lnp->lxsys_vnode);
+ }
+ } else {
+ /* interface-level sub-item listing */
+ int i, size;
+ lxsys_dirent_t *dirent;
+
+ size = SYSDIRLISTSZ(dirlist_devices_sysnode);
+ for (i = 0; i < size; i++) {
+ dirent = &dirlist_devices_sysnode[i];
+ if (strncmp(comp, dirent->d_name, LXSNSIZ) == 0) {
+ lnp = lxsys_getnode(ldp->lxsys_vnode,
+ ldp->lxsys_type, ldp->lxsys_instance,
+ dirent->d_idnum);
+ lnp->lxsys_vnode->v_type = VREG;
+ lnp->lxsys_mode = 0444;
+ return (lnp->lxsys_vnode);
+ }
+ }
+ }
+
+ return (NULL);
+}
+
+static int
+lxsys_read_devices_virtual_net(lxsys_node_t *lnp, lxsys_uiobuf_t *luio)
+{
+ netstack_t *ns;
+ ill_t *ill;
+ uint_t ifindex = lnp->lxsys_instance;
+ uint8_t *addr;
+ uint64_t flags;
+ int error = 0;
+
+ if (ifindex == 0 || lnp->lxsys_endpoint == 0) {
+ return (EISDIR);
+ }
+
+ if ((ns = lxsys_netstack(lnp)) == NULL) {
+ return (EIO);
+ }
+
+ ill = lxsys_find_ill(ns->netstack_ip, ifindex);
+ if (ill == NULL) {
+ netstack_rele(ns);
+ return (EIO);
+ }
+
+ switch (lnp->lxsys_endpoint) {
+ case LXSYS_ENDP_NET_ADDRESS:
+ if (ill->ill_phys_addr_length != ETHERADDRL) {
+ lxsys_uiobuf_printf(luio, "00:00:00:00:00:00\n");
+ break;
+ }
+ addr = ill->ill_phys_addr;
+ lxsys_uiobuf_printf(luio,
+ "%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx\n",
+ addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
+ break;
+ case LXSYS_ENDP_NET_ADDRLEN:
+ lxsys_uiobuf_printf(luio, "%u\n",
+ IS_LOOPBACK(ill) ? ETHERADDRL : ill->ill_phys_addr_length);
+ break;
+ case LXSYS_ENDP_NET_FLAGS:
+ flags = (ill->ill_flags | ill->ill_ipif->ipif_flags |
+ ill->ill_phyint->phyint_flags) & 0xffff;
+ lx_ifflags_convert(&flags, LX_IF_FROMNATIVE);
+ lxsys_uiobuf_printf(luio, "0x%x\n", flags);
+ break;
+ case LXSYS_ENDP_NET_IFINDEX:
+ lxsys_uiobuf_printf(luio, "%u\n", ifindex);
+ break;
+ case LXSYS_ENDP_NET_MTU:
+ lxsys_uiobuf_printf(luio, "%u\n", ill->ill_mtu);
+ break;
+ case LXSYS_ENDP_NET_TXQLEN:
+ /* perpetuate the txqlen lie */
+ if (IS_LOOPBACK(ill)) {
+ lxsys_uiobuf_printf(luio, "0\n");
+ } else {
+ lxsys_uiobuf_printf(luio, "1\n");
+ }
+ break;
+ case LXSYS_ENDP_NET_TYPE:
+ lxsys_uiobuf_printf(luio, "%u\n",
+ IS_LOOPBACK(ill) ? LX_ARPHRD_LOOPBACK :
+ arp_hw_type(ill->ill_mactype));
+ break;
+ default:
+ error = EIO;
+ }
+
+ ill_refrele(ill);
+ netstack_rele(ns);
+ return (error);
+}
+
+static int
+lxsys_read_devices_zfs_block(lxsys_node_t *lnp, lxsys_uiobuf_t *luio)
+{
+ uint_t dskindex = lnp->lxsys_instance;
+
+ if (dskindex == 0 || lnp->lxsys_endpoint == 0) {
+ return (EISDIR);
+ }
+
+ return (EIO);
+}
+
+static int
+lxsys_read_devices_sysnode(lxsys_node_t *lnp, lxsys_uiobuf_t *luio)
+{
+ if (lnp->lxsys_instance == 1 &&
+ lnp->lxsys_endpoint == LXSYS_ENDP_NODE_CPULIST) {
+ /* Show the range of CPUs */
+ cpu_t *cp, *cpstart;
+ int pools_enabled, maxid = -1;
+
+ mutex_enter(&cpu_lock);
+ pools_enabled = pool_pset_enabled();
+
+ cp = cpstart = CPU->cpu_part->cp_cpulist;
+ do {
+ if (cp->cpu_seqid > maxid)
+ maxid = cp->cpu_seqid;
+
+ if (pools_enabled) {
+ cp = cp->cpu_next_part;
+ } else {
+ cp = cp->cpu_next;
+ }
+ } while (cp != cpstart);
+
+ mutex_exit(&cpu_lock);
+
+ lxsys_uiobuf_printf(luio, "0-%d\n", maxid);
+ return (0);
+ }
+ return (EISDIR);
+
+}
+
+static int
+lxsys_read_static(lxsys_node_t *lnp, lxsys_uiobuf_t *luio)
+{
+ uint_t inst = lnp->lxsys_instance;
+
+ if (inst == LXSYS_INST_DEV_SYSCPU_KMAX) {
+ lxsys_uiobuf_printf(luio, "%d\n", NCPU);
+ return (0);
+ }
+
+ /* All other static nodes are directories */
+ return (EISDIR);
+}
+
+/*
+ * lxsys_readdir(): Vnode operation for VOP_READDIR()
+ */
+/* ARGSUSED */
+static int
+lxsys_readdir(vnode_t *dp, uio_t *uiop, cred_t *cr, int *eofp,
+ caller_context_t *ct, int flags)
+{
+ lxsys_node_t *lxsnp = VTOLXS(dp);
+ lxsys_nodetype_t type = lxsnp->lxsys_type;
+ ssize_t uresid;
+ off_t uoffset;
+ int error, leof;
+
+ ASSERT(dp->v_type == VDIR);
+ VERIFY(type > LXSYS_NONE && type < LXSYS_MAXTYPE);
+
+ /*
+ * restrict readdir permission to owner or root
+ */
+ if ((error = lxsys_access(dp, VREAD, 0, cr, ct)) != 0)
+ return (error);
+
+ uoffset = uiop->uio_offset;
+ uresid = uiop->uio_resid;
+
+ /* can't do negative reads */
+ if (uoffset < 0 || uresid <= 0)
+ return (EINVAL);
+
+ /* can't read directory entries that don't exist! */
+ if (uoffset % LXSYS_SDSIZE)
+ return (ENOENT);
+
+ /* Free lower functions from having to check eofp == NULL */
+ if (eofp == NULL) {
+ eofp = &leof;
+ }
+
+ return (lxsys_readdir_function[lxsnp->lxsys_type](lxsnp, uiop, eofp));
+}
+
+static int
+lxsys_dirent_out(dirent64_t *d, ushort_t n, struct uio *uio)
+{
+ int error;
+ off_t offset = uio->uio_offset;
+
+ /*
+ * uiomove() updates both uiop->uio_resid and uiop->uio_offset by the
+ * same amount. But we want uiop->uio_offset to change in increments
+ * of LXSYS_SDSIZE, which is different from the number of bytes being
+ * returned to the user. To accomplish this, we set uiop->uio_offset
+ * separately on success, overriding what uiomove() does.
+ */
+ d->d_off = (off64_t)(offset + LXSYS_SDSIZE);
+ d->d_reclen = n;
+ if ((error = uiomove(d, n, UIO_READ, uio)) != 0) {
+ return (error);
+ }
+ uio->uio_offset = offset + LXSYS_SDSIZE;
+ return (0);
+}
+
+/*
+ * This has the common logic for returning directory entries
+ */
+static int
+lxsys_readdir_common(lxsys_node_t *lxsnp, uio_t *uiop, int *eofp,
+ lxsys_dirent_t *dirtab, int dirtablen)
+{
+ /* bp holds one dirent64 structure */
+ longlong_t bp[DIRENT64_RECLEN(LXSNSIZ) / sizeof (longlong_t)];
+ dirent64_t *dirent = (dirent64_t *)bp;
+ ssize_t oresid; /* save a copy for testing later */
+ ssize_t uresid;
+
+ oresid = uiop->uio_resid;
+
+ /* clear out the dirent buffer */
+ bzero(bp, sizeof (bp));
+
+ /* Satisfy user request */
+ while ((uresid = uiop->uio_resid) > 0) {
+ int dirindex;
+ off_t uoffset;
+ int reclen;
+ int error;
+
+ uoffset = uiop->uio_offset;
+ dirindex = (uoffset / LXSYS_SDSIZE) - 2;
+
+ if (uoffset == 0) {
+
+ dirent->d_ino = lxsnp->lxsys_ino;
+ dirent->d_name[0] = '.';
+ dirent->d_name[1] = '\0';
+ reclen = DIRENT64_RECLEN(1);
+
+ } else if (uoffset == LXSYS_SDSIZE) {
+
+ dirent->d_ino = lxsys_parentinode(lxsnp);
+ dirent->d_name[0] = '.';
+ dirent->d_name[1] = '.';
+ dirent->d_name[2] = '\0';
+ reclen = DIRENT64_RECLEN(2);
+
+ } else if (dirindex >= 0 && dirindex < dirtablen) {
+
+ int slen = strlen(dirtab[dirindex].d_name);
+
+ dirent->d_ino = lxsys_inode(LXSYS_STATIC,
+ dirtab[dirindex].d_idnum, 0);
+ (void) strcpy(dirent->d_name, dirtab[dirindex].d_name);
+ reclen = DIRENT64_RECLEN(slen);
+
+ } else {
+ /* Run out of table entries */
+ *eofp = 1;
+ return (0);
+ }
+
+ /*
+ * If the size of the data to transfer is greater than the
+ * user-provided buffer, we cannot continue.
+ */
+ if (reclen > uresid) {
+ /* Error if no entries have been returned yet. */
+ if (uresid == oresid) {
+ return (EINVAL);
+ }
+ break;
+ }
+
+ if ((error = lxsys_dirent_out(dirent, reclen, uiop)) != 0) {
+ return (error);
+ }
+ }
+
+ /* Have run out of space, but could have just done last table entry */
+ *eofp = (uiop->uio_offset >= ((dirtablen+2) * LXSYS_SDSIZE)) ? 1 : 0;
+ return (0);
+}
+
+static int
+lxsys_readdir_subdir(lxsys_node_t *lxsnp, uio_t *uiop, int *eofp,
+ lxsys_dirent_t *dirtab, int dirtablen)
+{
+ /* bp holds one dirent64 structure */
+ longlong_t bp[DIRENT64_RECLEN(LXSNSIZ) / sizeof (longlong_t)];
+ dirent64_t *dirent = (dirent64_t *)bp;
+ ssize_t oresid; /* save a copy for testing later */
+ ssize_t uresid;
+
+ oresid = uiop->uio_resid;
+
+ /* clear out the dirent buffer */
+ bzero(bp, sizeof (bp));
+
+ /* Satisfy user request */
+ while ((uresid = uiop->uio_resid) > 0) {
+ int dirindex;
+ off_t uoffset;
+ int reclen;
+ int error;
+
+ uoffset = uiop->uio_offset;
+ dirindex = (uoffset / LXSYS_SDSIZE) - 2;
+
+ if (uoffset == 0) {
+
+ dirent->d_ino = lxsnp->lxsys_ino;
+ dirent->d_name[0] = '.';
+ dirent->d_name[1] = '\0';
+ reclen = DIRENT64_RECLEN(1);
+
+ } else if (uoffset == LXSYS_SDSIZE) {
+
+ dirent->d_ino = lxsys_parentinode(lxsnp);
+ dirent->d_name[0] = '.';
+ dirent->d_name[1] = '.';
+ dirent->d_name[2] = '\0';
+ reclen = DIRENT64_RECLEN(2);
+
+ } else if (dirindex >= 0 && dirindex < dirtablen) {
+
+ int slen = strlen(dirtab[dirindex].d_name);
+
+ dirent->d_ino = lxsys_inode(lxsnp->lxsys_type,
+ lxsnp->lxsys_instance, dirtab[dirindex].d_idnum);
+ (void) strcpy(dirent->d_name, dirtab[dirindex].d_name);
+ reclen = DIRENT64_RECLEN(slen);
+
+ } else {
+ /* Run out of table entries */
+ *eofp = 1;
+ return (0);
+ }
+
+ /*
+ * If the size of the data to transfer is greater than the
+ * user-provided buffer, we cannot continue.
+ */
+ if (reclen > uresid) {
+ /* Error if no entries have been returned yet. */
+ if (uresid == oresid) {
+ return (EINVAL);
+ }
+ break;
+ }
+
+ if ((error = lxsys_dirent_out(dirent, reclen, uiop)) != 0) {
+ return (error);
+ }
+ }
+
+ /* Have run out of space, but could have just done last table entry */
+ *eofp = (uiop->uio_offset >= ((dirtablen+2) * LXSYS_SDSIZE)) ? 1 : 0;
+ return (0);
+}
+
+static int
+lxsys_readdir_ifaces(lxsys_node_t *ldp, struct uio *uiop, int *eofp,
+ lxsys_nodetype_t type)
+{
+ longlong_t bp[DIRENT64_RECLEN(LXSNSIZ) / sizeof (longlong_t)];
+ dirent64_t *dirent = (dirent64_t *)bp;
+ ssize_t oresid, uresid;
+ netstack_t *ns;
+ ip_stack_t *ipst;
+ avl_tree_t *phytree;
+ phyint_t *phyi;
+ int error, i;
+
+
+ /* Emit "." and ".." entries */
+ oresid = uiop->uio_resid;
+ error = lxsys_readdir_common(ldp, uiop, eofp, NULL, 0);
+ if (error != 0 || *eofp == 0) {
+ return (error);
+ }
+
+ if ((ns = lxsys_netstack(ldp)) == NULL) {
+ *eofp = 1;
+ return (0);
+ }
+ ipst = ns->netstack_ip;
+
+ rw_enter(&ipst->ips_ill_g_lock, RW_READER);
+ phytree = &ipst->ips_phyint_g_list->phyint_list_avl_by_index;
+ phyi = avl_first(phytree);
+ if (phyi == NULL) {
+ *eofp = 1;
+ }
+ bzero(bp, sizeof (bp));
+
+ /*
+ * Skip records we have already passed with the offset.
+ * This accounts for the two "." and ".." records already seen.
+ */
+ for (i = (uiop->uio_offset/LXSYS_SDSIZE) - 2; i > 0; i--) {
+ if ((phyi = avl_walk(phytree, phyi, AVL_AFTER)) == NULL) {
+ *eofp = 1;
+ break;
+ }
+ }
+
+ while ((uresid = uiop->uio_resid) > 0 && phyi != NULL) {
+ uint_t ifindex;
+ int reclen;
+
+ ifindex = phyi->phyint_ifindex;
+ (void) strncpy(dirent->d_name, phyi->phyint_name, LIFNAMSIZ);
+ lx_ifname_convert(dirent->d_name, LX_IF_FROMNATIVE);
+ dirent->d_ino = lxsys_inode(type, ifindex, 0);
+ reclen = DIRENT64_RECLEN(strlen(dirent->d_name));
+
+ if (reclen > uresid) {
+ if (uresid == oresid) {
+ /* Not enough space for one record */
+ error = EINVAL;
+ }
+ break;
+ }
+ if ((error = lxsys_dirent_out(dirent, reclen, uiop)) != 0) {
+ break;
+ }
+
+ if ((phyi = avl_walk(phytree, phyi, AVL_AFTER)) == NULL) {
+ *eofp = 1;
+ break;
+ }
+ }
+
+ rw_exit(&ipst->ips_ill_g_lock);
+ netstack_rele(ns);
+ return (error);
+}
+
+static int
+lxsys_readdir_disks(lxsys_node_t *ldp, struct uio *uiop, int *eofp,
+ lxsys_nodetype_t type)
+{
+ longlong_t bp[DIRENT64_RECLEN(LXSNSIZ) / sizeof (longlong_t)];
+ dirent64_t *dirent = (dirent64_t *)bp;
+ ssize_t oresid, uresid;
+ int skip, error;
+ int reclen;
+ uint_t instance;
+ lx_zone_data_t *lxzdata;
+ lx_virt_disk_t *vd;
+
+ /* Emit "." and ".." entries */
+ oresid = uiop->uio_resid;
+ error = lxsys_readdir_common(ldp, uiop, eofp, NULL, 0);
+ if (error != 0 || *eofp == 0) {
+ return (error);
+ }
+
+ skip = (uiop->uio_offset/LXSYS_SDSIZE) - 2;
+
+ lxzdata = ztolxzd(curproc->p_zone);
+ if (lxzdata == NULL)
+ return (EINVAL);
+ ASSERT(lxzdata->lxzd_vdisks != NULL);
+
+ vd = list_head(lxzdata->lxzd_vdisks);
+ while (vd != NULL) {
+ if (skip > 0) {
+ skip--;
+ goto next;
+ }
+
+ if (strnlen(vd->lxvd_name, sizeof (vd->lxvd_name)) > LXSNSIZ)
+ goto next;
+
+ (void) strncpy(dirent->d_name, vd->lxvd_name, LXSNSIZ);
+
+ instance = getminor(vd->lxvd_emul_dev) & 0xffff;
+ if (instance == 0)
+ goto next;
+
+ dirent->d_ino = lxsys_inode(type, instance, 0);
+ reclen = DIRENT64_RECLEN(strlen(dirent->d_name));
+
+ uresid = uiop->uio_resid;
+ if (reclen > uresid) {
+ if (uresid == oresid) {
+ /* Not enough space for one record */
+ error = EINVAL;
+ }
+ break;
+ }
+ if ((error = lxsys_dirent_out(dirent, reclen, uiop)) != 0) {
+ break;
+ }
+
+next:
+ vd = list_next(lxzdata->lxzd_vdisks, vd);
+ }
+
+ /* Indicate EOF if we reached the end of the virtual disks. */
+ if (vd == NULL) {
+ *eofp = 1;
+ }
+
+ return (error);
+}
+
+
+static int
+lxsys_readdir_static(lxsys_node_t *lnp, uio_t *uiop, int *eofp)
+{
+ lxsys_dirent_t *dirent = NULL;
+ int i, len = 0;
+
+ for (i = 0; i < SYSDIRLISTSZ(lxsys_dirlookup); i++) {
+ if (lnp->lxsys_instance == lxsys_dirlookup[i].dl_instance) {
+ dirent = lxsys_dirlookup[i].dl_list;
+ len = lxsys_dirlookup[i].dl_length;
+ break;
+ }
+ }
+
+ if (dirent == NULL) {
+ return (ENOTDIR);
+ }
+
+ return (lxsys_readdir_common(lnp, uiop, eofp, dirent, len));
+}
+
+static int
+lxsys_readdir_class_netdir(lxsys_node_t *lnp, uio_t *uiop, int *eofp)
+{
+ if (lnp->lxsys_type != LXSYS_CLASS_NET ||
+ lnp->lxsys_instance != 0) {
+ /*
+ * Since /sys/class/net contains only symlinks, readdir
+ * operations should not be performed anywhere except the top
+ * level (instance == 0).
+ */
+ return (ENOTDIR);
+ }
+
+ return (lxsys_readdir_ifaces(lnp, uiop, eofp, LXSYS_CLASS_NET));
+}
+
+static int
+lxsys_readdir_devices_virtual_netdir(lxsys_node_t *lnp, uio_t *uiop, int *eofp)
+{
+ int error;
+
+ if (lnp->lxsys_instance == 0) {
+ /* top-level interface listing */
+ error = lxsys_readdir_ifaces(lnp, uiop, eofp,
+ LXSYS_DEV_NET);
+ } else if (lnp->lxsys_endpoint == 0) {
+ /* interface-level sub-item listing */
+ error = lxsys_readdir_subdir(lnp, uiop, eofp,
+ dirlist_devices_virtual_net,
+ SYSDIRLISTSZ(dirlist_devices_virtual_net));
+ } else {
+ /* there shouldn't be subdirs below this */
+ error = ENOTDIR;
+ }
+
+ return (error);
+}
+
+static int
+lxsys_readdir_blockdir(lxsys_node_t *lnp, uio_t *uiop, int *eofp)
+{
+ if (lnp->lxsys_type != LXSYS_BLOCK ||
+ lnp->lxsys_instance != 0) {
+ /*
+ * Since /sys/block contains only symlinks, readdir operations
+ * should not be performed anywhere except the top level
+ * (instance == 0).
+ */
+ return (ENOTDIR);
+ }
+
+ return (lxsys_readdir_disks(lnp, uiop, eofp, LXSYS_BLOCK));
+}
+
+static int
+lxsys_readdir_devices_zfsdir(lxsys_node_t *lnp, uio_t *uiop, int *eofp)
+{
+ int error;
+
+ if (lnp->lxsys_instance == 0) {
+ /* top-level dev listing */
+ error = lxsys_readdir_disks(lnp, uiop, eofp,
+ LXSYS_DEV_ZFS);
+ } else if (lnp->lxsys_endpoint == 0) {
+ /* disk-level sub-item listing */
+ error = lxsys_readdir_subdir(lnp, uiop, eofp,
+ dirlist_devices_zfs_block,
+ SYSDIRLISTSZ(dirlist_devices_zfs_block));
+ } else {
+ /*
+ * Currently there shouldn't be subdirs below this but
+ * on a real Linux system some will be subdirs. This should
+ * be fixed when we populate the directory for real.
+ */
+ error = ENOTDIR;
+ }
+
+ return (error);
+}
+
+static int
+lxsys_readdir_cpu(lxsys_node_t *ldp, struct uio *uiop, int *eofp)
+{
+ longlong_t bp[DIRENT64_RECLEN(LXSNSIZ) / sizeof (longlong_t)];
+ dirent64_t *dirent = (dirent64_t *)bp;
+ ssize_t oresid, uresid;
+ int skip, error;
+ int reclen;
+ cpu_t *cp, *cpstart;
+ int pools_enabled;
+ int i, cpucnt;
+ lxsys_cpu_info_t cpu_info[NCPU];
+
+ /* Emit "." and ".." entries */
+ oresid = uiop->uio_resid;
+ error = lxsys_readdir_common(ldp, uiop, eofp, NULL, 0);
+ if (error != 0 || *eofp == 0) {
+ return (error);
+ }
+
+ skip = (uiop->uio_offset/LXSYS_SDSIZE) - 2;
+
+ /* Fixed entries */
+ if (skip > 0) {
+ skip--;
+ } else {
+ (void) strncpy(dirent->d_name, "kernel_max", LXSNSIZ);
+
+ dirent->d_ino = lxsys_inode(LXSYS_STATIC,
+ LXSYS_INST_DEV_SYSCPU_KMAX, 0);
+ reclen = DIRENT64_RECLEN(strlen(dirent->d_name));
+
+ uresid = uiop->uio_resid;
+ if (reclen > uresid) {
+ if (uresid == oresid) {
+ /* Not enough space for one record */
+ error = EINVAL;
+ }
+ goto done;
+ }
+ if ((error = lxsys_dirent_out(dirent, reclen, uiop)) != 0) {
+ goto done;
+ }
+ }
+
+ /* Collect a list of CPU info */
+ mutex_enter(&cpu_lock);
+ pools_enabled = pool_pset_enabled();
+
+ cpucnt = 0;
+ cp = cpstart = CPU->cpu_part->cp_cpulist;
+ do {
+ cpu_info[cpucnt].cpu_id = cp->cpu_id;
+ cpu_info[cpucnt++].cpu_seqid = cp->cpu_seqid;
+ ASSERT(cpucnt < NCPU);
+ if (pools_enabled) {
+ cp = cp->cpu_next_part;
+ } else {
+ cp = cp->cpu_next;
+ }
+ } while (cp != cpstart);
+
+ mutex_exit(&cpu_lock);
+
+ /* Output dynamic CPU info */
+ for (i = 0; i < cpucnt; i++) {
+ char cpunm[16];
+
+ if (skip > 0) {
+ skip--;
+ continue;
+ }
+
+ (void) snprintf(cpunm, sizeof (cpunm), "cpu%d",
+ cpu_info[i].cpu_seqid);
+ (void) strncpy(dirent->d_name, cpunm, LXSNSIZ);
+
+ dirent->d_ino = lxsys_inode(LXSYS_DEV_SYS_CPU,
+ cpu_info[i].cpu_id + 1, 0);
+ reclen = DIRENT64_RECLEN(strlen(dirent->d_name));
+
+ uresid = uiop->uio_resid;
+ if (reclen > uresid) {
+ if (uresid == oresid) {
+ /* Not enough space for one record */
+ error = EINVAL;
+ }
+ break;
+ }
+ if ((error = lxsys_dirent_out(dirent, reclen, uiop)) != 0) {
+ break;
+ }
+ }
+
+ /* Indicate EOF if we reached the end of the CPU list. */
+ if (i == cpucnt) {
+ *eofp = 1;
+ }
+
+done:
+ return (error);
+}
+
+static int
+lxsys_readdir_devices_syscpu(lxsys_node_t *lnp, uio_t *uiop, int *eofp)
+{
+ int error;
+
+ if (lnp->lxsys_instance == 0) {
+ /* top-level cpu listing */
+ error = lxsys_readdir_cpu(lnp, uiop, eofp);
+ } else if (lnp->lxsys_endpoint == 0) {
+ /* cpu-level sub-item listing */
+ error = lxsys_readdir_subdir(lnp, uiop, eofp,
+ dirlist_empty, SYSDIRLISTSZ(dirlist_empty));
+ } else {
+ /*
+ * Currently there shouldn't be subdirs below this but
+ * on a real Linux system some will be subdirs. This should
+ * be fixed when we populate the directory for real.
+ */
+ error = ENOTDIR;
+ }
+
+ return (error);
+}
+
+static int
+lxsys_readdir_devices_syscpuinfo(lxsys_node_t *lnp, uio_t *uiop, int *eofp)
+{
+ int error;
+
+ if (lnp->lxsys_type != LXSYS_DEV_SYS_CPUINFO) {
+ /*
+ * Since /sys/devices/system/cpu/cpuN is empty, readdir
+ * operations should not be performed anywhere except the top
+ * level.
+ */
+ return (ENOTDIR);
+ }
+
+ /*
+ * Emit "." and ".." entries
+ * All cpuN directories are currently empty.
+ */
+ error = lxsys_readdir_common(lnp, uiop, eofp, NULL, 0);
+ if (error != 0 || *eofp == 0) {
+ return (error);
+ }
+
+ /* Indicate EOF */
+ *eofp = 1;
+
+ return (error);
+}
+
+static int
+lxsys_readdir_devices_sysnode(lxsys_node_t *lnp, uio_t *uiop, int *eofp)
+{
+ int error;
+
+ if (lnp->lxsys_instance == 0) {
+ /* top-level node listing */
+ longlong_t bp[DIRENT64_RECLEN(LXSNSIZ) / sizeof (longlong_t)];
+ dirent64_t *dirent = (dirent64_t *)bp;
+ ssize_t oresid, uresid;
+ int reclen, skip;
+
+ /* Emit "." and ".." entries */
+ oresid = uiop->uio_resid;
+ error = lxsys_readdir_common(lnp, uiop, eofp, NULL, 0);
+ if (error != 0 || *eofp == 0) {
+ return (error);
+ }
+ skip = (uiop->uio_offset/LXSYS_SDSIZE) - 2;
+
+ /* Fixed entries */
+ if (skip > 0) {
+ skip--;
+ } else {
+ (void) strncpy(dirent->d_name, "node0", LXSNSIZ);
+
+ dirent->d_ino = lxsys_inode(LXSYS_DEV_SYS_NODE,
+ 1, 0);
+ reclen = DIRENT64_RECLEN(strlen(dirent->d_name));
+
+ uresid = uiop->uio_resid;
+ if (reclen > uresid) {
+ if (uresid == oresid) {
+ /* Not enough space for one record */
+ return (EINVAL);
+ }
+ return (0);
+ }
+ error = lxsys_dirent_out(dirent, reclen, uiop);
+ }
+ /* Indicate EOF */
+ if (error == 0) {
+ *eofp = 1;
+ }
+ } else if (lnp->lxsys_endpoint == 0) {
+ /* node-level sub-item listing */
+ error = lxsys_readdir_subdir(lnp, uiop, eofp,
+ dirlist_devices_sysnode,
+ SYSDIRLISTSZ(dirlist_devices_sysnode));
+ } else {
+ /* there shouldn't be subdirs below this */
+ error = ENOTDIR;
+ }
+
+ return (error);
+}
+
+/*
+ * lxsys_readlink(): Vnode operation for VOP_READLINK()
+ */
+/* ARGSUSED */
+static int
+lxsys_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct)
+{
+ char buf[MAXPATHLEN + 1];
+ lxsys_node_t *lnp = VTOLXS(vp);
+ lxsys_nodetype_t type = lnp->lxsys_type;
+ int (*rlfunc)();
+ int error;
+
+ VERIFY(type > LXSYS_NONE && type < LXSYS_MAXTYPE);
+
+ if (vp->v_type != VLNK) {
+ return (EINVAL);
+ }
+
+ rlfunc = lxsys_readlink_function[lnp->lxsys_type];
+ if (rlfunc != NULL) {
+ if ((error = rlfunc(lnp, buf, sizeof (buf))) == 0) {
+ error = uiomove(buf, strlen(buf), UIO_READ, uiop);
+ }
+ } else {
+ error = EINVAL;
+ }
+
+ return (error);
+}
+
+
+static int
+lxsys_readlink_class_net(lxsys_node_t *lnp, char *buf, size_t len)
+{
+ netstack_t *ns;
+ ip_stack_t *ipst;
+ avl_tree_t *phytree;
+ phyint_t *phyi;
+ uint_t ifindex;
+ char ifname[LIFNAMSIZ];
+ int error = EINVAL;
+
+ if ((ifindex = lnp->lxsys_instance) == 0) {
+ return (error);
+ }
+
+ if ((ns = lxsys_netstack(lnp)) == NULL) {
+ return (error);
+ }
+ ipst = ns->netstack_ip;
+ rw_enter(&ipst->ips_ill_g_lock, RW_READER);
+
+ phytree = &ipst->ips_phyint_g_list->phyint_list_avl_by_index;
+ phyi = avl_find(phytree, &ifindex, NULL);
+ if (phyi != NULL) {
+ (void) strncpy(ifname, phyi->phyint_name, LIFNAMSIZ);
+ lx_ifname_convert(ifname, LX_IF_FROMNATIVE);
+ (void) snprintf(buf, len, "/sys/devices/virtual/net/%s",
+ ifname);
+ error = 0;
+ }
+
+ rw_exit(&ipst->ips_ill_g_lock);
+ netstack_rele(ns);
+ return (error);
+}
+
+static int
+lxsys_readlink_block(lxsys_node_t *lnp, char *buf, size_t len)
+{
+ int inst, error = EINVAL;
+ lx_zone_data_t *lxzdata;
+ lx_virt_disk_t *vd;
+
+ if ((inst = lnp->lxsys_instance) == 0) {
+ return (error);
+ }
+
+ lxzdata = ztolxzd(curproc->p_zone);
+ if (lxzdata == NULL)
+ return (error);
+ ASSERT(lxzdata->lxzd_vdisks != NULL);
+
+ vd = list_head(lxzdata->lxzd_vdisks);
+ while (vd != NULL) {
+ int vinst = getminor(vd->lxvd_emul_dev) & 0xffff;
+
+ if (vinst == inst) {
+ (void) snprintf(buf, len,
+ "../devices/zfs/%s", vd->lxvd_name);
+ error = 0;
+ break;
+ }
+ vd = list_next(lxzdata->lxzd_vdisks, vd);
+ }
+
+ return (error);
+}
+
+/*
+ * lxsys_inactive(): Vnode operation for VOP_INACTIVE()
+ * Vnode is no longer referenced, deallocate the file
+ * and all its resources.
+ */
+/* ARGSUSED */
+static void
+lxsys_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
+{
+ lxsys_freenode(VTOLXS(vp));
+}
+
+/*
+ * lxsys_sync(): Vnode operation for VOP_SYNC()
+ */
+static int
+lxsys_sync()
+{
+ /*
+ * Nothing to sync but this function must never fail
+ */
+ return (0);
+}
+
+/*
+ * lxsys_cmp(): Vnode operation for VOP_CMP()
+ */
+static int
+lxsys_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
+{
+ if (vn_matchops(vp1, lxsys_vnodeops) ||
+ vn_matchops(vp2, lxsys_vnodeops))
+ return (vp1 == vp2);
+ return (VOP_CMP(vp1, vp2, ct));
+}
diff --git a/usr/src/uts/common/brand/sn1/sn1_brand.c b/usr/src/uts/common/brand/sn1/sn1_brand.c
index d61928d578..32fb7d9127 100644
--- a/usr/src/uts/common/brand/sn1/sn1_brand.c
+++ b/usr/src/uts/common/brand/sn1/sn1_brand.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
*/
#include <sys/errno.h>
@@ -42,43 +43,66 @@
char *sn1_emulation_table = NULL;
-void sn1_init_brand_data(zone_t *);
+void sn1_init_brand_data(zone_t *, kmutex_t *);
void sn1_free_brand_data(zone_t *);
void sn1_setbrand(proc_t *);
int sn1_getattr(zone_t *, int, void *, size_t *);
int sn1_setattr(zone_t *, int, void *, size_t);
int sn1_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t,
- uintptr_t, uintptr_t, uintptr_t);
+ uintptr_t, uintptr_t);
void sn1_copy_procdata(proc_t *, proc_t *);
-void sn1_proc_exit(struct proc *, klwp_t *);
+void sn1_proc_exit(struct proc *);
void sn1_exec();
-int sn1_initlwp(klwp_t *);
+void sn1_initlwp(klwp_t *, void *);
void sn1_forklwp(klwp_t *, klwp_t *);
void sn1_freelwp(klwp_t *);
void sn1_lwpexit(klwp_t *);
int sn1_elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int,
- long *, int, caddr_t, cred_t *, int);
+ long *, int, caddr_t, cred_t *, int *);
/* sn1 brand */
struct brand_ops sn1_brops = {
- sn1_init_brand_data,
- sn1_free_brand_data,
- sn1_brandsys,
- sn1_setbrand,
- sn1_getattr,
- sn1_setattr,
- sn1_copy_procdata,
- sn1_proc_exit,
- sn1_exec,
- lwp_setrval,
- sn1_initlwp,
- sn1_forklwp,
- sn1_freelwp,
- sn1_lwpexit,
- sn1_elfexec,
- NULL,
- NULL,
- NSIG,
+ sn1_init_brand_data, /* b_init_brand_data */
+ sn1_free_brand_data, /* b_free_brand_data */
+ sn1_brandsys, /* b_brandsys */
+ sn1_setbrand, /* b_setbrand */
+ sn1_getattr, /* b_getattr */
+ sn1_setattr, /* b_setattr */
+ sn1_copy_procdata, /* b_copy_procdata */
+ sn1_proc_exit, /* b_proc_exit */
+ sn1_exec, /* b_exec */
+ lwp_setrval, /* b_lwp_setrval */
+ NULL, /* b_lwpdata_alloc */
+ NULL, /* b_lwpdata_free */
+ sn1_initlwp, /* b_initlwp */
+ NULL, /* b_initlwp_post */
+ sn1_forklwp, /* b_forklwp */
+ sn1_freelwp, /* b_freelwp */
+ sn1_lwpexit, /* b_lwpexit */
+ sn1_elfexec, /* b_elfexec */
+ NULL, /* b_sigset_native_to_brand */
+ NULL, /* b_sigset_brand_to_native */
+ NULL, /* b_sigfd_translate */
+ NSIG, /* b_nsig */
+ NULL, /* b_exit_with_sig */
+ NULL, /* b_wait_filter */
+ NULL, /* b_native_exec */
+ NULL, /* b_map32limit */
+ NULL, /* b_stop_notify */
+ NULL, /* b_waitid_helper */
+ NULL, /* b_sigcld_repost */
+ NULL, /* b_issig_stop */
+ NULL, /* b_sig_ignorable */
+ NULL, /* b_savecontext */
+#if defined(_SYSCALL32_IMPL)
+ NULL, /* b_savecontext32 */
+#endif
+ NULL, /* b_restorecontext */
+ NULL, /* b_sendsig_stack */
+ NULL, /* b_sendsig */
+ NULL, /* b_setid_clear */
+ NULL, /* b_pagefault */
+ B_TRUE /* b_intp_parse_arg */
};
#ifdef sparc
@@ -94,9 +118,12 @@ struct brand_mach_ops sn1_mops = {
struct brand_mach_ops sn1_mops = {
sn1_brand_sysenter_callback,
+ NULL,
sn1_brand_int91_callback,
sn1_brand_syscall_callback,
- sn1_brand_syscall32_callback
+ sn1_brand_syscall32_callback,
+ NULL,
+ NULL
};
#else /* ! __amd64 */
@@ -104,7 +131,10 @@ struct brand_mach_ops sn1_mops = {
struct brand_mach_ops sn1_mops = {
sn1_brand_sysenter_callback,
NULL,
+ NULL,
sn1_brand_syscall_callback,
+ NULL,
+ NULL,
NULL
};
#endif /* __amd64 */
@@ -115,7 +145,8 @@ struct brand sn1_brand = {
BRAND_VER_1,
"sn1",
&sn1_brops,
- &sn1_mops
+ &sn1_mops,
+ sizeof (brand_proc_data_t),
};
static struct modlbrand modlbrand = {
@@ -151,7 +182,7 @@ sn1_setattr(zone_t *zone, int attr, void *buf, size_t bufsize)
/*ARGSUSED*/
int
sn1_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
- uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6)
+ uintptr_t arg3, uintptr_t arg4, uintptr_t arg5)
{
int res;
@@ -171,9 +202,9 @@ sn1_copy_procdata(proc_t *child, proc_t *parent)
}
void
-sn1_proc_exit(struct proc *p, klwp_t *l)
+sn1_proc_exit(struct proc *p)
{
- brand_solaris_proc_exit(p, l, &sn1_brand);
+ brand_solaris_proc_exit(p, &sn1_brand);
}
void
@@ -182,10 +213,10 @@ sn1_exec()
brand_solaris_exec(&sn1_brand);
}
-int
-sn1_initlwp(klwp_t *l)
+void
+sn1_initlwp(klwp_t *l, void *bd)
{
- return (brand_solaris_initlwp(l, &sn1_brand));
+ brand_solaris_initlwp(l, &sn1_brand);
}
void
@@ -214,18 +245,18 @@ sn1_free_brand_data(zone_t *zone)
/*ARGSUSED*/
void
-sn1_init_brand_data(zone_t *zone)
+sn1_init_brand_data(zone_t *zone, kmutex_t *zsl)
{
}
int
sn1_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred,
- int brand_action)
+ int *brand_action)
{
return (brand_solaris_elfexec(vp, uap, args, idatap, level, execsz,
setid, exec_file, cred, brand_action, &sn1_brand, SN1_BRANDNAME,
- SN1_LIB, SN1_LIB32, SN1_LINKER, SN1_LINKER32));
+ SN1_LIB, SN1_LIB32));
}
int
diff --git a/usr/src/uts/common/brand/sn1/sn1_brand.h b/usr/src/uts/common/brand/sn1/sn1_brand.h
index b487745e21..fef9dc128b 100644
--- a/usr/src/uts/common/brand/sn1/sn1_brand.h
+++ b/usr/src/uts/common/brand/sn1/sn1_brand.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#ifndef _SN1_BRAND_H
@@ -37,20 +38,14 @@ extern "C" {
#define SN1_VERSION SN1_VERSION_1
#define SN1_LIB_NAME "sn1_brand.so.1"
-#define SN1_LINKER_NAME "ld.so.1"
#define SN1_LIB32 BRAND_NATIVE_DIR "usr/lib/" SN1_LIB_NAME
-#define SN1_LINKER32 "/lib/" SN1_LINKER_NAME
-
#define SN1_LIB64 BRAND_NATIVE_DIR "usr/lib/64/" SN1_LIB_NAME
-#define SN1_LINKER64 "/lib/64/" SN1_LINKER_NAME
#if defined(_LP64)
#define SN1_LIB SN1_LIB64
-#define SN1_LINKER SN1_LINKER64
#else /* !_LP64 */
#define SN1_LIB SN1_LIB32
-#define SN1_LINKER SN1_LINKER32
#endif /* !_LP64 */
#if defined(_KERNEL)
diff --git a/usr/src/uts/common/brand/solaris10/s10_brand.c b/usr/src/uts/common/brand/solaris10/s10_brand.c
index f24b864eef..a02ee7de3d 100644
--- a/usr/src/uts/common/brand/solaris10/s10_brand.c
+++ b/usr/src/uts/common/brand/solaris10/s10_brand.c
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2016, Joyent, Inc.
*/
#include <sys/errno.h>
@@ -45,45 +46,68 @@
char *s10_emulation_table = NULL;
-void s10_init_brand_data(zone_t *);
+void s10_init_brand_data(zone_t *, kmutex_t *);
void s10_free_brand_data(zone_t *);
void s10_setbrand(proc_t *);
int s10_getattr(zone_t *, int, void *, size_t *);
int s10_setattr(zone_t *, int, void *, size_t);
int s10_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t,
- uintptr_t, uintptr_t, uintptr_t);
+ uintptr_t, uintptr_t);
void s10_copy_procdata(proc_t *, proc_t *);
-void s10_proc_exit(struct proc *, klwp_t *);
+void s10_proc_exit(struct proc *);
void s10_exec();
-int s10_initlwp(klwp_t *);
+void s10_initlwp(klwp_t *, void *);
void s10_forklwp(klwp_t *, klwp_t *);
void s10_freelwp(klwp_t *);
void s10_lwpexit(klwp_t *);
int s10_elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int,
- long *, int, caddr_t, cred_t *, int);
+ long *, int, caddr_t, cred_t *, int *);
void s10_sigset_native_to_s10(sigset_t *);
void s10_sigset_s10_to_native(sigset_t *);
/* s10 brand */
struct brand_ops s10_brops = {
- s10_init_brand_data,
- s10_free_brand_data,
- s10_brandsys,
- s10_setbrand,
- s10_getattr,
- s10_setattr,
- s10_copy_procdata,
- s10_proc_exit,
- s10_exec,
- lwp_setrval,
- s10_initlwp,
- s10_forklwp,
- s10_freelwp,
- s10_lwpexit,
- s10_elfexec,
- s10_sigset_native_to_s10,
- s10_sigset_s10_to_native,
- S10_NSIG,
+ s10_init_brand_data, /* b_init_brand_data */
+ s10_free_brand_data, /* b_free_brand_data */
+ s10_brandsys, /* b_brandsys */
+ s10_setbrand, /* b_setbrand */
+ s10_getattr, /* b_getattr */
+ s10_setattr, /* b_setattr */
+ s10_copy_procdata, /* b_copy_procdata */
+ s10_proc_exit, /* b_proc_exit */
+ s10_exec, /* b_exec */
+ lwp_setrval, /* b_lwp_setrval */
+ NULL, /* b_lwpdata_alloc */
+ NULL, /* b_lwpdata_free */
+ s10_initlwp, /* b_initlwp */
+ NULL, /* b_initlwp_post */
+ s10_forklwp, /* b_forklwp */
+ s10_freelwp, /* b_freelwp */
+ s10_lwpexit, /* b_lwpexit */
+ s10_elfexec, /* b_elfexec */
+ s10_sigset_native_to_s10, /* b_sigset_native_to_brand */
+ s10_sigset_s10_to_native, /* b_sigset_brand_to_native */
+ NULL, /* b_sigfd_translate */
+ S10_NSIG, /* b_nsig */
+ NULL, /* b_exit_with_sig */
+ NULL, /* b_wait_filter */
+ NULL, /* b_native_exec */
+ NULL, /* b_map32limit */
+ NULL, /* b_stop_notify */
+ NULL, /* b_waitid_helper */
+ NULL, /* b_sigcld_repost */
+ NULL, /* b_issig_stop */
+ NULL, /* b_sig_ignorable */
+ NULL, /* b_savecontext */
+#if defined(_SYSCALL32_IMPL)
+ NULL, /* b_savecontext32 */
+#endif
+ NULL, /* b_restorecontext */
+ NULL, /* b_sendsig_stack */
+ NULL, /* b_sendsig */
+ NULL, /* b_setid_clear */
+ NULL, /* b_pagefault */
+ B_TRUE /* b_intp_parse_arg */
};
#ifdef sparc
@@ -99,9 +123,12 @@ struct brand_mach_ops s10_mops = {
struct brand_mach_ops s10_mops = {
s10_brand_sysenter_callback,
+ NULL,
s10_brand_int91_callback,
s10_brand_syscall_callback,
- s10_brand_syscall32_callback
+ s10_brand_syscall32_callback,
+ NULL,
+ NULL
};
#else /* ! __amd64 */
@@ -109,7 +136,10 @@ struct brand_mach_ops s10_mops = {
struct brand_mach_ops s10_mops = {
s10_brand_sysenter_callback,
NULL,
+ NULL,
s10_brand_syscall_callback,
+ NULL,
+ NULL,
NULL
};
#endif /* __amd64 */
@@ -120,7 +150,8 @@ struct brand s10_brand = {
BRAND_VER_1,
"solaris10",
&s10_brops,
- &s10_mops
+ &s10_mops,
+ sizeof (brand_proc_data_t),
};
static struct modlbrand modlbrand = {
@@ -252,7 +283,7 @@ s10_native(void *cmd, void *args)
/*ARGSUSED*/
int
s10_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
- uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6)
+ uintptr_t arg3, uintptr_t arg4, uintptr_t arg5)
{
proc_t *p = curproc;
int res;
@@ -326,9 +357,9 @@ s10_copy_procdata(proc_t *child, proc_t *parent)
}
void
-s10_proc_exit(struct proc *p, klwp_t *l)
+s10_proc_exit(struct proc *p)
{
- brand_solaris_proc_exit(p, l, &s10_brand);
+ brand_solaris_proc_exit(p, &s10_brand);
}
void
@@ -337,10 +368,10 @@ s10_exec()
brand_solaris_exec(&s10_brand);
}
-int
-s10_initlwp(klwp_t *l)
+void
+s10_initlwp(klwp_t *l, void *bd)
{
- return (brand_solaris_initlwp(l, &s10_brand));
+ brand_solaris_initlwp(l, &s10_brand);
}
void
@@ -380,7 +411,7 @@ s10_free_brand_data(zone_t *zone)
}
void
-s10_init_brand_data(zone_t *zone)
+s10_init_brand_data(zone_t *zone, kmutex_t *zsl)
{
ASSERT(zone->zone_brand == &s10_brand);
ASSERT(zone->zone_brand_data == NULL);
@@ -390,11 +421,11 @@ s10_init_brand_data(zone_t *zone)
int
s10_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred,
- int brand_action)
+ int *brand_action)
{
return (brand_solaris_elfexec(vp, uap, args, idatap, level, execsz,
setid, exec_file, cred, brand_action, &s10_brand, S10_BRANDNAME,
- S10_LIB, S10_LIB32, S10_LINKER, S10_LINKER32));
+ S10_LIB, S10_LIB32));
}
void
diff --git a/usr/src/uts/common/brand/solaris10/s10_brand.h b/usr/src/uts/common/brand/solaris10/s10_brand.h
index 11f9853f48..ffef485e12 100644
--- a/usr/src/uts/common/brand/solaris10/s10_brand.h
+++ b/usr/src/uts/common/brand/solaris10/s10_brand.h
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#ifndef _S10_BRAND_H
@@ -42,17 +43,12 @@ extern "C" {
#define S10_LINKER_NAME "ld.so.1"
#define S10_LIB32 BRAND_NATIVE_DIR "usr/lib/" S10_LIB_NAME
-#define S10_LINKER32 "/lib/" S10_LINKER_NAME
-
#define S10_LIB64 BRAND_NATIVE_DIR "usr/lib/64/" S10_LIB_NAME
-#define S10_LINKER64 "/lib/64/" S10_LINKER_NAME
#if defined(_LP64)
#define S10_LIB S10_LIB64
-#define S10_LINKER S10_LINKER64
#else /* !_LP64 */
#define S10_LIB S10_LIB32
-#define S10_LINKER S10_LINKER32
#endif /* !_LP64 */
/*
diff --git a/usr/src/uts/common/conf/param.c b/usr/src/uts/common/conf/param.c
index 6b3ba51f31..a71be771fd 100644
--- a/usr/src/uts/common/conf/param.c
+++ b/usr/src/uts/common/conf/param.c
@@ -22,6 +22,7 @@
/*
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1983, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright 2012 Milan Jurik. All rights reserved.
*/
@@ -559,8 +560,8 @@ char *isa_list = architecture;
static pgcnt_t original_physmem = 0;
#define MIN_DEFAULT_MAXUSERS 8u
-#define MAX_DEFAULT_MAXUSERS 2048u
-#define MAX_MAXUSERS 4096u
+#define MAX_DEFAULT_MAXUSERS 10000u
+#define MAX_MAXUSERS 20000u
void
param_preset(void)
@@ -572,7 +573,7 @@ void
param_calc(int platform_max_nprocs)
{
/*
- * Default to about one "user" per megabyte, taking into
+ * Default to about one "user" per 8MB, taking into
* account both physical and virtual constraints.
* Note: 2^20 is a meg; shifting right by (20 - PAGESHIFT)
* converts pages to megs without integer overflow.
@@ -586,8 +587,9 @@ param_calc(int platform_max_nprocs)
if (maxusers == 0) {
pgcnt_t physmegs = physmem >> (20 - PAGESHIFT);
pgcnt_t virtmegs = vmem_size(heap_arena, VMEM_FREE) >> 20;
- maxusers = MIN(MAX(MIN(physmegs, virtmegs),
- MIN_DEFAULT_MAXUSERS), MAX_DEFAULT_MAXUSERS);
+ maxusers = MIN(physmegs, virtmegs) >> 3; /* divide by 8 */
+ maxusers = MAX(maxusers, MIN_DEFAULT_MAXUSERS);
+ maxusers = MIN(maxusers, MAX_DEFAULT_MAXUSERS);
}
if (maxusers > MAX_MAXUSERS) {
maxusers = MAX_MAXUSERS;
diff --git a/usr/src/uts/common/contract/process.c b/usr/src/uts/common/contract/process.c
index 9fd23fdb61..cad5d7f955 100644
--- a/usr/src/uts/common/contract/process.c
+++ b/usr/src/uts/common/contract/process.c
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
*/
#include <sys/mutex.h>
@@ -955,6 +956,18 @@ contract_process_exit(cont_process_t *ctp, proc_t *p, int exitstatus)
(void) cte_publish_all(ct, event, nvl, NULL);
mutex_enter(&ct->ct_lock);
}
+
+ /*
+ * CT_PR_EV_EXIT is not part of the CT_PR_ALLFATAL definition since
+ * we never allow including this in the fatal set via a user-land
+ * application, but we do allow CT_PR_EV_EXIT in the contract's fatal
+ * set for a process setup for zone init. See zone_start_init().
+ */
+ if (EVFATALP(ctp, CT_PR_EV_EXIT)) {
+ ASSERT(MUTEX_HELD(&ct->ct_lock));
+ contract_process_kill(ct, p, B_TRUE);
+ }
+
if (empty) {
/*
* Send EMPTY message.
diff --git a/usr/src/uts/common/crypto/api/kcf_random.c b/usr/src/uts/common/crypto/api/kcf_random.c
index bc72fa984a..75072fb686 100644
--- a/usr/src/uts/common/crypto/api/kcf_random.c
+++ b/usr/src/uts/common/crypto/api/kcf_random.c
@@ -70,6 +70,7 @@
#include <sys/cpuvar.h>
#include <sys/taskq.h>
#include <rng/fips_random.h>
+#include <sys/strlog.h>
#define RNDPOOLSIZE 1024 /* Pool size in bytes */
#define MINEXTRACTBYTES 20
@@ -933,7 +934,8 @@ rnd_handler(void *arg)
int len = 0;
if (!rng_prov_found && rng_ok_to_log) {
- cmn_err(CE_WARN, "No randomness provider enabled for "
+ (void) strlog(0, 0, 0, SL_NOTE,
+ "No randomness provider enabled for "
"/dev/random. Use cryptoadm(1M) to enable a provider.");
rng_ok_to_log = B_FALSE;
}
diff --git a/usr/src/uts/common/crypto/core/kcf_sched.c b/usr/src/uts/common/crypto/core/kcf_sched.c
index f461fe048c..8b2760b237 100644
--- a/usr/src/uts/common/crypto/core/kcf_sched.c
+++ b/usr/src/uts/common/crypto/core/kcf_sched.c
@@ -1027,9 +1027,9 @@ kcfpool_svc(void *arg)
case 0:
case -1:
/*
- * Woke up with no work to do. Check
- * if this thread should exit. We keep
- * at least kcf_minthreads.
+ * Woke up with no work to do. Check if we
+ * should lwp_exit() (which won't return). We
+ * keep at least kcf_minthreads.
*/
if (kcfpool->kp_threads > kcf_minthreads) {
KCF_ATOMIC_DECR(kcfpool->kp_threads);
diff --git a/usr/src/uts/common/ctf/ctf_mod.c b/usr/src/uts/common/ctf/ctf_mod.c
index b34cf400cd..421b922c96 100644
--- a/usr/src/uts/common/ctf/ctf_mod.c
+++ b/usr/src/uts/common/ctf/ctf_mod.c
@@ -24,8 +24,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/sysmacros.h>
#include <sys/modctl.h>
#include <sys/debug.h>
@@ -117,6 +115,15 @@ ctf_version(int version)
/*ARGSUSED*/
ctf_file_t *
+ctf_fdcreate_int(int fd, int *errp, ctf_sect_t *ctfp)
+{
+ if (errp != NULL)
+ *errp = ENOTSUP;
+ return (NULL);
+}
+
+/*ARGSUSED*/
+ctf_file_t *
ctf_modopen(struct module *mp, int *error)
{
ctf_sect_t ctfsect, symsect, strsect;
diff --git a/usr/src/uts/common/disp/cmt.c b/usr/src/uts/common/disp/cmt.c
index 1c5e1f79a9..3ecbf39393 100644
--- a/usr/src/uts/common/disp/cmt.c
+++ b/usr/src/uts/common/disp/cmt.c
@@ -201,13 +201,15 @@ pg_cmt_cpu_startup(cpu_t *cp)
/*
* Return non-zero if thread can migrate between "from" and "to"
- * without a performance penalty
+ * without a performance penalty. This is true only if we share a core on
+ * virtually any CPU; sharing the last-level cache is insufficient to make
+ * migration possible without penalty.
*/
int
pg_cmt_can_migrate(cpu_t *from, cpu_t *to)
{
- if (from->cpu_physid->cpu_cacheid ==
- to->cpu_physid->cpu_cacheid)
+ if (from->cpu_physid->cpu_coreid ==
+ to->cpu_physid->cpu_coreid)
return (1);
return (0);
}
diff --git a/usr/src/uts/common/disp/cpucaps.c b/usr/src/uts/common/disp/cpucaps.c
index 46f53faab6..2a4365ff73 100644
--- a/usr/src/uts/common/disp/cpucaps.c
+++ b/usr/src/uts/common/disp/cpucaps.c
@@ -22,6 +22,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2013 Joyent, Inc. All rights reserved.
*/
#include <sys/disp.h>
@@ -74,6 +75,32 @@
* Putting threads on wait queues in random places while running in the
* kernel might lead to all kinds of locking problems.
*
+ * Bursting
+ * ========
+ *
+ * CPU bursting occurs when the CPU usage is over the baseline but under the
+ * cap. The baseline CPU (zone.cpu-baseline) is set in a multi-tenant
+ * environment so that we know how much CPU is allocated for a tenant under
+ * normal utilization. We can then track how much time a zone is spending
+ * over the "normal" CPU utilization expected for that zone using the
+ * "above_base_sec" kstat. This kstat is cumulative.
+ *
+ * If the zone has a burst limit (zone.cpu-burst-time) then the zone can
+ * burst for that period of time (in seconds) before the effective cap is
+ * lowered to the baseline. Once the effective cap is lowered, the zone
+ * will run at the baseline for the burst limit before the effective cap is
+ * raised again to the full value. This will allow the zone to burst again.
+ * We can watch this behavior using the kstats. The "effective" kstat shows
+ * which cap is being used, the baseline value or the burst value. The
+ * "burst_limit_sec" shows the value of the zone.cpu-burst-time rctl and the
+ * "bursting_sec" kstat shows how many seconds the zone has currently been
+ * bursting. When the CPU load is continuously greater than the baseline,
+ * bursting_sec will increase, up to the burst_limit_sec value, then the
+ * effective kstat will drop to the baseline and the bursting_sec value will
+ * decrease until it hits 0, at which time the effective kstat will return to
+ * the full burst value and the bursting_sec value will begin to increase
+ * again.
+ *
* Accounting
* ==========
*
@@ -203,18 +230,28 @@ static void caps_update();
*/
struct cap_kstat {
kstat_named_t cap_value;
+ kstat_named_t cap_baseline;
+ kstat_named_t cap_effective;
+ kstat_named_t cap_burst_limit;
+ kstat_named_t cap_bursting;
kstat_named_t cap_usage;
kstat_named_t cap_nwait;
kstat_named_t cap_below;
kstat_named_t cap_above;
+ kstat_named_t cap_above_base;
kstat_named_t cap_maxusage;
kstat_named_t cap_zonename;
} cap_kstat = {
{ "value", KSTAT_DATA_UINT64 },
+ { "baseline", KSTAT_DATA_UINT64 },
+ { "effective", KSTAT_DATA_UINT64 },
+ { "burst_limit_sec", KSTAT_DATA_UINT64 },
+ { "bursting_sec", KSTAT_DATA_UINT64 },
{ "usage", KSTAT_DATA_UINT64 },
{ "nwait", KSTAT_DATA_UINT64 },
{ "below_sec", KSTAT_DATA_UINT64 },
{ "above_sec", KSTAT_DATA_UINT64 },
+ { "above_base_sec", KSTAT_DATA_UINT64 },
{ "maxusage", KSTAT_DATA_UINT64 },
{ "zonename", KSTAT_DATA_STRING },
};
@@ -311,7 +348,7 @@ cap_enable(list_t *l, cpucap_t *cap, hrtime_t value)
cap->cap_below = cap->cap_above = 0;
cap->cap_maxusage = 0;
cap->cap_usage = 0;
- cap->cap_value = value;
+ cap->cap_value = cap->cap_chk_value = value;
waitq_unblock(&cap->cap_waitq);
if (CPUCAPS_OFF()) {
cpucaps_enabled = B_TRUE;
@@ -340,19 +377,21 @@ cap_disable(list_t *l, cpucap_t *cap)
ASSERT(CAP_ENABLED(cap));
waitq_block(&cap->cap_waitq);
+
+ /* do this first to avoid race with cap_kstat_update */
+ if (cap->cap_kstat != NULL) {
+ kstat_delete(cap->cap_kstat);
+ cap->cap_kstat = NULL;
+ }
+
list_remove(l, cap);
if (list_is_empty(&capped_projects) && list_is_empty(&capped_zones)) {
cpucaps_enabled = B_FALSE;
cpucaps_clock_callout = NULL;
}
- cap->cap_value = 0;
+ cap->cap_value = cap->cap_chk_value = 0;
cap->cap_project = NULL;
cap->cap_zone = NULL;
- if (cap->cap_kstat != NULL) {
- kstat_delete(cap->cap_kstat);
- cap->cap_kstat = NULL;
- }
-
}
/*
@@ -487,6 +526,8 @@ cap_walk(list_t *l, void (*cb)(cpucap_t *, int64_t))
* The waitq_isempty check is performed without the waitq lock. If a new thread
* is placed on the waitq right after the check, it will be picked up during the
* next invocation of cap_poke_waitq().
+ *
+ * Called once per tick for zones.
*/
/* ARGSUSED */
static void
@@ -494,15 +535,92 @@ cap_poke_waitq(cpucap_t *cap, int64_t gen)
{
ASSERT(MUTEX_HELD(&caps_lock));
- if (cap->cap_usage >= cap->cap_value) {
+ if (cap->cap_base != 0) {
+ /*
+ * Because of the way usage is calculated and decayed, its
+ * possible for the zone to be slightly over its cap, but we
+ * don't want to count that after we have reduced the effective
+ * cap to the baseline. That way the zone will be able to
+ * burst again after the burst_limit has expired.
+ */
+ if (cap->cap_usage > cap->cap_base &&
+ cap->cap_chk_value == cap->cap_value) {
+ cap->cap_above_base++;
+
+ /*
+ * If bursting is limited and we've been bursting
+ * longer than we're supposed to, then set the
+ * effective cap to the baseline.
+ */
+ if (cap->cap_burst_limit != 0) {
+ cap->cap_bursting++;
+ if (cap->cap_bursting >= cap->cap_burst_limit)
+ cap->cap_chk_value = cap->cap_base;
+ }
+ } else if (cap->cap_bursting > 0) {
+ /*
+ * We're not bursting now, but we were, decay the
+ * bursting timer.
+ */
+ cap->cap_bursting--;
+ /*
+ * Reset the effective cap once we decay to 0 so we
+ * can burst again.
+ */
+ if (cap->cap_bursting == 0 &&
+ cap->cap_chk_value != cap->cap_value)
+ cap->cap_chk_value = cap->cap_value;
+ }
+ }
+
+ if (cap->cap_usage >= cap->cap_chk_value) {
cap->cap_above++;
} else {
waitq_t *wq = &cap->cap_waitq;
cap->cap_below++;
- if (!waitq_isempty(wq))
- waitq_runone(wq);
+ if (!waitq_isempty(wq)) {
+ int i, ndequeue, p;
+
+ /*
+ * Since this function is only called once per tick,
+ * we can hit a situation where we have artificially
+ * limited the project/zone below its cap. This would
+ * happen if we have multiple threads queued up but
+ * only dequeued one thread/tick. To avoid this we
+ * dequeue multiple threads, calculated based on the
+ * usage percentage of the cap. It is possible that we
+ * could dequeue too many threads and some of them
+ * might be put back on the wait queue quickly, but
+ * since we know that threads are on the wait queue
+ * because we're capping, we know that there is unused
+ * CPU cycles anyway, so this extra work would not
+ * hurt. Also, the ndequeue number is only an upper
+ * bound and we might dequeue less, depending on how
+ * many threads are actually in the wait queue. The
+ * ndequeue values are empirically derived and could be
+ * adjusted or calculated in another way if necessary.
+ */
+ p = (int)((100 * cap->cap_usage) / cap->cap_chk_value);
+ if (p >= 98)
+ ndequeue = 10;
+ else if (p >= 95)
+ ndequeue = 20;
+ else if (p >= 90)
+ ndequeue = 40;
+ else if (p >= 85)
+ ndequeue = 80;
+ else
+ ndequeue = 160;
+
+ for (i = 0; i < ndequeue; i++) {
+ waitq_runone(wq);
+ if (waitq_isempty(wq))
+ break;
+ }
+ DTRACE_PROBE2(cpucaps__pokeq, int, p, int, i);
+ }
}
}
@@ -629,14 +747,14 @@ cap_project_zone_modify_walker(kproject_t *kpj, void *arg)
* Remove all projects in this zone without caps
* from the capped_projects list.
*/
- if (project_cap->cap_value == MAX_USAGE) {
+ if (project_cap->cap_chk_value == MAX_USAGE) {
cap_project_disable(kpj);
}
} else if (CAP_DISABLED(project_cap)) {
/*
* Add the project to capped_projects list.
*/
- ASSERT(project_cap->cap_value == 0);
+ ASSERT(project_cap->cap_chk_value == 0);
cap_project_enable(kpj, MAX_USAGE);
}
mutex_exit(&caps_lock);
@@ -746,7 +864,7 @@ cpucaps_zone_set(zone_t *zone, rctl_qty_t cap_val)
/*
* No state transitions, just change the value
*/
- cap->cap_value = value;
+ cap->cap_value = cap->cap_chk_value = value;
}
ASSERT(MUTEX_HELD(&caps_lock));
@@ -757,6 +875,108 @@ cpucaps_zone_set(zone_t *zone, rctl_qty_t cap_val)
}
/*
+ * Set zone's base cpu value to base_val
+ */
+int
+cpucaps_zone_set_base(zone_t *zone, rctl_qty_t base_val)
+{
+ cpucap_t *cap = NULL;
+ hrtime_t value;
+
+ ASSERT(base_val <= MAXCAP);
+ if (base_val > MAXCAP)
+ base_val = MAXCAP;
+
+ if (CPUCAPS_OFF() || !ZONE_IS_CAPPED(zone))
+ return (0);
+
+ if (zone->zone_cpucap == NULL)
+ cap = cap_alloc();
+
+ mutex_enter(&caps_lock);
+
+ if (cpucaps_busy) {
+ mutex_exit(&caps_lock);
+ return (EBUSY);
+ }
+
+ /*
+ * Double-check whether zone->zone_cpucap is NULL, now with caps_lock
+ * held. If it is still NULL, assign a newly allocated cpucap to it.
+ */
+ if (zone->zone_cpucap == NULL) {
+ zone->zone_cpucap = cap;
+ } else if (cap != NULL) {
+ cap_free(cap);
+ }
+
+ cap = zone->zone_cpucap;
+
+ value = base_val * cap_tick_cost;
+ if (value < 0 || value > cap->cap_value)
+ value = 0;
+
+ cap->cap_base = value;
+
+ mutex_exit(&caps_lock);
+
+ return (0);
+}
+
+/*
+ * Set zone's maximum burst time in seconds. A burst time of 0 means that
+ * the zone can run over its baseline indefinitely.
+ */
+int
+cpucaps_zone_set_burst_time(zone_t *zone, rctl_qty_t base_val)
+{
+ cpucap_t *cap = NULL;
+ hrtime_t value;
+
+ ASSERT(base_val <= INT_MAX);
+ /* Treat the default as 0 - no limit */
+ if (base_val == INT_MAX)
+ base_val = 0;
+ if (base_val > INT_MAX)
+ base_val = INT_MAX;
+
+ if (CPUCAPS_OFF() || !ZONE_IS_CAPPED(zone))
+ return (0);
+
+ if (zone->zone_cpucap == NULL)
+ cap = cap_alloc();
+
+ mutex_enter(&caps_lock);
+
+ if (cpucaps_busy) {
+ mutex_exit(&caps_lock);
+ return (EBUSY);
+ }
+
+ /*
+ * Double-check whether zone->zone_cpucap is NULL, now with caps_lock
+ * held. If it is still NULL, assign a newly allocated cpucap to it.
+ */
+ if (zone->zone_cpucap == NULL) {
+ zone->zone_cpucap = cap;
+ } else if (cap != NULL) {
+ cap_free(cap);
+ }
+
+ cap = zone->zone_cpucap;
+
+ value = SEC_TO_TICK(base_val);
+ if (value < 0)
+ value = 0;
+
+ cap->cap_burst_limit = value;
+
+ mutex_exit(&caps_lock);
+
+ return (0);
+}
+
+/*
* The project is going away so disable its cap.
*/
void
@@ -902,7 +1122,7 @@ cpucaps_project_set(kproject_t *kpj, rctl_qty_t cap_val)
if (CAP_DISABLED(cap))
cap_project_enable(kpj, value);
else
- cap->cap_value = value;
+ cap->cap_value = cap->cap_chk_value = value;
} else if (CAP_ENABLED(cap)) {
/*
* User requested to drop a cap on the project. If it is part of
@@ -910,7 +1130,7 @@ cpucaps_project_set(kproject_t *kpj, rctl_qty_t cap_val)
* otherwise disable the cap.
*/
if (ZONE_IS_CAPPED(kpj->kpj_zone)) {
- cap->cap_value = MAX_USAGE;
+ cap->cap_value = cap->cap_chk_value = MAX_USAGE;
} else {
cap_project_disable(kpj);
}
@@ -948,6 +1168,26 @@ cpucaps_zone_get(zone_t *zone)
}
/*
+ * Get current zone baseline.
+ */
+rctl_qty_t
+cpucaps_zone_get_base(zone_t *zone)
+{
+ return (zone->zone_cpucap != NULL ?
+ (rctl_qty_t)(zone->zone_cpucap->cap_base / cap_tick_cost) : 0);
+}
+
+/*
+ * Get current zone maximum burst time.
+ */
+rctl_qty_t
+cpucaps_zone_get_burst_time(zone_t *zone)
+{
+ return (zone->zone_cpucap != NULL ?
+ (rctl_qty_t)(TICK_TO_SEC(zone->zone_cpucap->cap_burst_limit)) : 0);
+}
+
+/*
* Charge project of thread t the time thread t spent on CPU since previously
* adjusted.
*
@@ -1045,7 +1285,7 @@ cpucaps_charge(kthread_id_t t, caps_sc_t *csc, cpucaps_charge_t charge_type)
project_cap = kpj->kpj_cpucap;
- if (project_cap->cap_usage >= project_cap->cap_value) {
+ if (project_cap->cap_usage >= project_cap->cap_chk_value) {
t->t_schedflag |= TS_PROJWAITQ;
rc = B_TRUE;
} else if (t->t_schedflag & TS_PROJWAITQ) {
@@ -1059,7 +1299,7 @@ cpucaps_charge(kthread_id_t t, caps_sc_t *csc, cpucaps_charge_t charge_type)
} else {
cpucap_t *zone_cap = zone->zone_cpucap;
- if (zone_cap->cap_usage >= zone_cap->cap_value) {
+ if (zone_cap->cap_usage >= zone_cap->cap_chk_value) {
t->t_schedflag |= TS_ZONEWAITQ;
rc = B_TRUE;
} else if (t->t_schedflag & TS_ZONEWAITQ) {
@@ -1119,6 +1359,7 @@ cpucaps_enforce(kthread_t *t)
/*
* Convert internal cap statistics into values exported by cap kstat.
+ * Note that the kstat is held throughout this function but caps_lock is not.
*/
static int
cap_kstat_update(kstat_t *ksp, int rw)
@@ -1133,6 +1374,12 @@ cap_kstat_update(kstat_t *ksp, int rw)
capsp->cap_value.value.ui64 =
ROUND_SCALE(cap->cap_value, cap_tick_cost);
+ capsp->cap_baseline.value.ui64 =
+ ROUND_SCALE(cap->cap_base, cap_tick_cost);
+ capsp->cap_effective.value.ui64 =
+ ROUND_SCALE(cap->cap_chk_value, cap_tick_cost);
+ capsp->cap_burst_limit.value.ui64 =
+ ROUND_SCALE(cap->cap_burst_limit, tick_sec);
capsp->cap_usage.value.ui64 =
ROUND_SCALE(cap->cap_usage, cap_tick_cost);
capsp->cap_maxusage.value.ui64 =
@@ -1140,6 +1387,10 @@ cap_kstat_update(kstat_t *ksp, int rw)
capsp->cap_nwait.value.ui64 = cap->cap_waitq.wq_count;
capsp->cap_below.value.ui64 = ROUND_SCALE(cap->cap_below, tick_sec);
capsp->cap_above.value.ui64 = ROUND_SCALE(cap->cap_above, tick_sec);
+ capsp->cap_above_base.value.ui64 =
+ ROUND_SCALE(cap->cap_above_base, tick_sec);
+ capsp->cap_bursting.value.ui64 =
+ ROUND_SCALE(cap->cap_bursting, tick_sec);
kstat_named_setstr(&capsp->cap_zonename, zonename);
return (0);
diff --git a/usr/src/uts/common/disp/disp.c b/usr/src/uts/common/disp/disp.c
index 0c2c0b4993..5f9c2c68a2 100644
--- a/usr/src/uts/common/disp/disp.c
+++ b/usr/src/uts/common/disp/disp.c
@@ -23,6 +23,10 @@
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ */
+
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
@@ -105,7 +109,7 @@ static void cpu_resched(cpu_t *cp, pri_t tpri);
/*
* If this is set, only interrupt threads will cause kernel preemptions.
* This is done by changing the value of kpreemptpri. kpreemptpri
- * will either be the max sysclass pri + 1 or the min interrupt pri.
+ * will either be the max sysclass pri or the min interrupt pri.
*/
int only_intr_kpreempt;
@@ -252,7 +256,23 @@ dispinit(void)
maxglobpri = cl_maxglobpri;
}
}
- kpreemptpri = (pri_t)v.v_maxsyspri + 1;
+
+ /*
+ * Historically, kpreemptpri was set to v_maxsyspri + 1 -- which is
+ * to say, maxclsyspri + 1. However, over time, the system has used
+ * more and more asynchronous kernel threads, with an increasing number
+ * of these doing work on direct behalf of higher-level software (e.g.,
+ * network processing). This has led to potential priority inversions:
+ * threads doing low-priority lengthy kernel work can effectively
+ * delay kernel-level processing of higher-priority data. To minimize
+ * such inversions, we set kpreemptpri to be v_maxsyspri; anything in
+ * the kernel that runs at maxclsyspri will therefore induce kernel
+ * preemption, and this priority should be used if/when an asynchronous
+ * thread (or, as is often the case, task queue) is performing a task
+ * on behalf of higher-level software (or any task that is otherwise
+ * latency-sensitve).
+ */
+ kpreemptpri = (pri_t)v.v_maxsyspri;
if (kpqpri == KPQPRI)
kpqpri = kpreemptpri;
@@ -2258,7 +2278,7 @@ disp_getbest(disp_t *dp)
* placed earlier.
*/
if (tcp == NULL ||
- pri >= minclsyspri ||
+ (pri >= minclsyspri && tp->t_procp == &p0) ||
tp->t_cpu != tcp)
break;
diff --git a/usr/src/uts/common/disp/fx.c b/usr/src/uts/common/disp/fx.c
index ab5ba278a0..8260680a07 100644
--- a/usr/src/uts/common/disp/fx.c
+++ b/usr/src/uts/common/disp/fx.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#include <sys/types.h>
@@ -71,16 +71,6 @@ static struct modlinkage modlinkage = {
};
-/*
- * control flags (kparms->fx_cflags).
- */
-#define FX_DOUPRILIM 0x01 /* change user priority limit */
-#define FX_DOUPRI 0x02 /* change user priority */
-#define FX_DOTQ 0x04 /* change FX time quantum */
-
-
-#define FXMAXUPRI 60 /* maximum user priority setting */
-
#define FX_MAX_UNPRIV_PRI 0 /* maximum unpriviledge priority */
/*
diff --git a/usr/src/uts/common/disp/rt.c b/usr/src/uts/common/disp/rt.c
index f87f8c56ce..115e42ccb8 100644
--- a/usr/src/uts/common/disp/rt.c
+++ b/usr/src/uts/common/disp/rt.c
@@ -22,7 +22,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2013 Joyent, Inc. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -103,13 +103,6 @@ _info(struct modinfo *modinfop)
pri_t rt_maxpri = RTMAXPRI; /* maximum real-time priority */
rtdpent_t *rt_dptbl; /* real-time dispatcher parameter table */
-/*
- * control flags (kparms->rt_cflags).
- */
-#define RT_DOPRI 0x01 /* change priority */
-#define RT_DOTQ 0x02 /* change RT time quantum */
-#define RT_DOSIG 0x04 /* change RT time quantum signal */
-
static int rt_admin(caddr_t, cred_t *);
static int rt_enterclass(kthread_t *, id_t, void *, cred_t *, void *);
static int rt_fork(kthread_t *, kthread_t *, void *);
diff --git a/usr/src/uts/common/disp/rt_dptbl.c b/usr/src/uts/common/disp/rt_dptbl.c
index 768b499ef2..cc88ed72fc 100644
--- a/usr/src/uts/common/disp/rt_dptbl.c
+++ b/usr/src/uts/common/disp/rt_dptbl.c
@@ -28,8 +28,6 @@
/* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
/* All Rights Reserved */
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/proc.h>
#include <sys/priocntl.h>
#include <sys/class.h>
@@ -70,8 +68,6 @@ _info(struct modinfo *modinfop)
return (mod_info(&modlinkage, modinfop));
}
-#define RTGPPRIO0 100 /* Global priority for RT priority 0 */
-
rtdpent_t config_rt_dptbl[] = {
/* prilevel Time quantum */
diff --git a/usr/src/uts/common/disp/thread.c b/usr/src/uts/common/disp/thread.c
index f2685af534..ae6c5eef16 100644
--- a/usr/src/uts/common/disp/thread.c
+++ b/usr/src/uts/common/disp/thread.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -75,6 +75,10 @@
#include <sys/cpucaps.h>
#include <sys/kiconv.h>
+#ifndef STACK_GROWTH_DOWN
+#error Stacks do not grow downward; 3b2 zombie attack detected!
+#endif
+
struct kmem_cache *thread_cache; /* cache of free threads */
struct kmem_cache *lwp_cache; /* cache of free lwps */
struct kmem_cache *turnstile_cache; /* cache of free turnstiles */
@@ -372,7 +376,7 @@ thread_create(
if (stksize <= sizeof (kthread_t) + PTR24_ALIGN)
cmn_err(CE_PANIC, "thread_create: proposed stack size"
" too small to hold thread.");
-#ifdef STACK_GROWTH_DOWN
+
stksize -= SA(sizeof (kthread_t) + PTR24_ALIGN - 1);
stksize &= -PTR24_ALIGN; /* make thread aligned */
t = (kthread_t *)(stk + stksize);
@@ -381,13 +385,6 @@ thread_create(
audit_thread_create(t);
t->t_stk = stk + stksize;
t->t_stkbase = stk;
-#else /* stack grows to larger addresses */
- stksize -= SA(sizeof (kthread_t));
- t = (kthread_t *)(stk);
- bzero(t, sizeof (kthread_t));
- t->t_stk = stk + sizeof (kthread_t);
- t->t_stkbase = stk + stksize + sizeof (kthread_t);
-#endif /* STACK_GROWTH_DOWN */
t->t_flag |= T_TALLOCSTK;
t->t_swap = stk;
} else {
@@ -400,13 +397,8 @@ thread_create(
* Initialize t_stk to the kernel stack pointer to use
* upon entry to the kernel
*/
-#ifdef STACK_GROWTH_DOWN
t->t_stk = stk + stksize;
t->t_stkbase = stk;
-#else
- t->t_stk = stk; /* 3b2-like */
- t->t_stkbase = stk + stksize;
-#endif /* STACK_GROWTH_DOWN */
}
if (kmem_stackinfo != 0) {
@@ -589,6 +581,9 @@ thread_exit(void)
if ((t->t_proc_flag & TP_ZTHREAD) != 0)
cmn_err(CE_PANIC, "thread_exit: zthread_exit() not called");
+ if ((t->t_flag & T_SPLITSTK) != 0)
+ cmn_err(CE_PANIC, "thread_exit: called when stack is split");
+
tsd_exit(); /* Clean up this thread's TSD */
kcpc_passivate(); /* clean up performance counter state */
@@ -1050,6 +1045,8 @@ installctx(
ctx->free_op = free;
ctx->arg = arg;
ctx->next = t->t_ctx;
+ ctx->save_ts = 0;
+ ctx->restore_ts = 0;
t->t_ctx = ctx;
}
@@ -1124,9 +1121,12 @@ savectx(kthread_t *t)
struct ctxop *ctx;
ASSERT(t == curthread);
- for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next)
- if (ctx->save_op != NULL)
+ for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next) {
+ if (ctx->save_op != NULL) {
+ ctx->save_ts = gethrtime_unscaled();
(ctx->save_op)(ctx->arg);
+ }
+ }
}
void
@@ -1135,9 +1135,12 @@ restorectx(kthread_t *t)
struct ctxop *ctx;
ASSERT(t == curthread);
- for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next)
- if (ctx->restore_op != NULL)
+ for (ctx = t->t_ctx; ctx != 0; ctx = ctx->next) {
+ if (ctx->restore_op != NULL) {
+ ctx->restore_ts = gethrtime_unscaled();
(ctx->restore_op)(ctx->arg);
+ }
+ }
}
void
@@ -1883,6 +1886,103 @@ thread_change_pri(kthread_t *t, pri_t disp_pri, int front)
return (on_rq);
}
+
+/*
+ * There are occasions in the kernel when we need much more stack than we
+ * allocate by default, but we do not wish to have that work done
+ * asynchronously by another thread. To accommodate these scenarios, we allow
+ * for a split stack (also known as a "segmented stack") whereby a new stack
+ * is dynamically allocated and the current thread jumps onto it for purposes
+ * of executing the specified function. After the specified function returns,
+ * the stack is deallocated and control is returned to the caller. This
+ * functionality is implemented by thread_splitstack(), below; there are a few
+ * constraints on its use:
+ *
+ * - The caller must be in a context where it is safe to block for memory.
+ * - The caller cannot be in a t_onfault context
+ * - The called function must not call thread_exit() while on the split stack
+ *
+ * The code will explicitly panic if these constraints are violated. Notably,
+ * however, thread_splitstack() _can_ be called on a split stack -- there
+ * is no limit to the level that split stacks can nest.
+ *
+ * When the stack is split, it is constructed such that stack backtraces
+ * from kernel debuggers continue to function -- though note that DTrace's
+ * stack() action and stackdepth function will only show the stack up to and
+ * including thread_splitstack_run(); DTrace explicitly bounds itself to
+ * pointers that exist within the current declared stack as a safety
+ * mechanism.
+ */
+void
+thread_splitstack(void (*func)(void *), void *arg, size_t stksize)
+{
+ kthread_t *t = curthread;
+ caddr_t ostk, ostkbase, stk;
+ ushort_t otflag;
+
+ if (t->t_onfault != NULL)
+ panic("thread_splitstack: called with non-NULL t_onfault");
+
+ ostk = t->t_stk;
+ ostkbase = t->t_stkbase;
+ otflag = t->t_flag;
+
+ stksize = roundup(stksize, PAGESIZE);
+
+ if (stksize < default_stksize)
+ stksize = default_stksize;
+
+ if (stksize == default_stksize) {
+ stk = (caddr_t)segkp_cache_get(segkp_thread);
+ } else {
+ stksize = roundup(stksize, PAGESIZE);
+ stk = (caddr_t)segkp_get(segkp, stksize,
+ (KPD_HASREDZONE | KPD_NO_ANON | KPD_LOCKED));
+ }
+
+ /*
+ * We're going to lock ourselves before we set T_SPLITSTK to assure
+ * that we're not swapped out in the meantime. (Note that we don't
+ * bother to set t_swap, as we're not going to be swapped out.)
+ */
+ thread_lock(t);
+
+ if (!(otflag & T_SPLITSTK))
+ t->t_flag |= T_SPLITSTK;
+
+ t->t_stk = stk + stksize;
+ t->t_stkbase = stk;
+
+ thread_unlock(t);
+
+ /*
+ * Now actually run on the new (split) stack...
+ */
+ thread_splitstack_run(t->t_stk, func, arg);
+
+ /*
+ * We're back onto our own stack; lock ourselves and restore our
+ * pre-split state.
+ */
+ thread_lock(t);
+
+ t->t_stk = ostk;
+ t->t_stkbase = ostkbase;
+
+ if (!(otflag & T_SPLITSTK))
+ t->t_flag &= ~T_SPLITSTK;
+
+ thread_unlock(t);
+
+ /*
+ * Now that we are entirely back on our own stack, call back into
+ * the platform layer to perform any platform-specific cleanup.
+ */
+ thread_splitstack_cleanup();
+
+ segkp_release(segkp, stk);
+}
+
/*
* Tunable kmem_stackinfo is set, fill the kernel thread stack with a
* specific pattern.
diff --git a/usr/src/uts/common/disp/thread_intr.c b/usr/src/uts/common/disp/thread_intr.c
index 67ccc6922f..c840bdf31a 100644
--- a/usr/src/uts/common/disp/thread_intr.c
+++ b/usr/src/uts/common/disp/thread_intr.c
@@ -23,19 +23,10 @@
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-
/*
- * FILE NOTICE BEGIN
- *
- * This file should not be modified. If you wish to modify it or have it
- * modified, please contact Sun Microsystems at <LFI149367@-sun-.-com->
- * (without anti-spam dashes)
- *
- * FILE NOTICE END
+ * Copyright 2015, Joyent, Inc.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/cpuvar.h>
#include <sys/stack.h>
#include <vm/seg_kp.h>
@@ -44,6 +35,17 @@
#include <sys/sysmacros.h>
/*
+ * Use a slightly larger thread stack size for interrupt threads rather than the
+ * default. This is useful for cases where the networking stack may do an rx and
+ * a tx in the context of a single interrupt and when combined with various
+ * promisc hooks that need memory, can cause us to get dangerously close to the
+ * edge of the traditional stack sizes. This is only a few pages more than a
+ * traditional stack and given that we don't have that many interrupt threads,
+ * the memory costs end up being more than worthwhile.
+ */
+#define LL_INTR_STKSZ (32 * 1024)
+
+/*
* Create and initialize an interrupt thread.
*/
static void
@@ -51,7 +53,7 @@ thread_create_intr(cpu_t *cp)
{
kthread_t *tp;
- tp = thread_create(NULL, 0,
+ tp = thread_create(NULL, LL_INTR_STKSZ,
(void (*)())thread_create_intr, NULL, 0, &p0, TS_ONPROC, 0);
/*
@@ -97,9 +99,12 @@ thread_create_intr(cpu_t *cp)
}
/*
- * Allocate a given number of interrupt threads for a given CPU.
- * These threads will get freed by cpu_destroy_bound_threads()
- * when CPU gets unconfigured.
+ * Allocate a given number of interrupt threads for a given CPU. These threads
+ * will get freed by cpu_destroy_bound_threads() when CPU gets unconfigured.
+ *
+ * Note, high level interrupts are always serviced using cpu_intr_stack and are
+ * not allowed to block. Low level interrupts or soft-interrupts use the
+ * kthread_t's that we create through the calls to thread_create_intr().
*/
void
cpu_intr_alloc(cpu_t *cp, int n)
@@ -110,6 +115,6 @@ cpu_intr_alloc(cpu_t *cp, int n)
thread_create_intr(cp);
cp->cpu_intr_stack = (caddr_t)segkp_get(segkp, INTR_STACK_SIZE,
- KPD_HASREDZONE | KPD_NO_ANON | KPD_LOCKED) +
- INTR_STACK_SIZE - SA(MINFRAME);
+ KPD_HASREDZONE | KPD_NO_ANON | KPD_LOCKED) +
+ INTR_STACK_SIZE - SA(MINFRAME);
}
diff --git a/usr/src/uts/common/dtrace/dtrace.c b/usr/src/uts/common/dtrace/dtrace.c
index c775224d86..fc0206da29 100644
--- a/usr/src/uts/common/dtrace/dtrace.c
+++ b/usr/src/uts/common/dtrace/dtrace.c
@@ -7710,7 +7710,7 @@ dtrace_cred2priv(cred_t *cr, uint32_t *privp, uid_t *uidp, zoneid_t *zoneidp)
priv = DTRACE_PRIV_ALL;
} else {
*uidp = crgetuid(cr);
- *zoneidp = crgetzoneid(cr);
+ *zoneidp = crgetzonedid(cr);
priv = 0;
if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE))
@@ -8206,7 +8206,7 @@ dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv,
provider->dtpv_priv.dtpp_flags = priv;
if (cr != NULL) {
provider->dtpv_priv.dtpp_uid = crgetuid(cr);
- provider->dtpv_priv.dtpp_zoneid = crgetzoneid(cr);
+ provider->dtpv_priv.dtpp_zoneid = crgetzonedid(cr);
}
provider->dtpv_pops = *pops;
@@ -8817,6 +8817,7 @@ dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab)
uint32_t priv;
uid_t uid;
zoneid_t zoneid;
+ dtrace_state_t *state = enab->dten_vstate->dtvs_state;
ASSERT(MUTEX_HELD(&dtrace_lock));
dtrace_ecb_create_cache = NULL;
@@ -8831,8 +8832,22 @@ dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab)
}
dtrace_probekey(desc, &pkey);
- dtrace_cred2priv(enab->dten_vstate->dtvs_state->dts_cred.dcr_cred,
- &priv, &uid, &zoneid);
+ dtrace_cred2priv(state->dts_cred.dcr_cred, &priv, &uid, &zoneid);
+
+ if ((priv & DTRACE_PRIV_ZONEOWNER) &&
+ state->dts_options[DTRACEOPT_ZONE] != DTRACEOPT_UNSET) {
+ /*
+ * If we have the privilege of instrumenting all zones but we
+ * have been told to instrument but one, we will spoof this up
+ * depriving ourselves of DTRACE_PRIV_ZONEOWNER for purposes
+ * of dtrace_match(). (Note that DTRACEOPT_ZONE is not for
+ * security but rather for performance: it allows the global
+ * zone to instrument USDT probes in a local zone without
+ * requiring all zones to be instrumented.)
+ */
+ priv &= ~DTRACE_PRIV_ZONEOWNER;
+ zoneid = state->dts_options[DTRACEOPT_ZONE];
+ }
return (dtrace_match(&pkey, priv, uid, zoneid, dtrace_ecb_create_enable,
enab));
diff --git a/usr/src/uts/common/dtrace/sdt_subr.c b/usr/src/uts/common/dtrace/sdt_subr.c
index 157acc25fc..3d350ff278 100644
--- a/usr/src/uts/common/dtrace/sdt_subr.c
+++ b/usr/src/uts/common/dtrace/sdt_subr.c
@@ -97,6 +97,10 @@ static dtrace_pattr_t iscsi_attr = {
{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
};
+/*
+ * When adding a new provider you must add it before sdt as sdt is a catch all
+ * for remaining probes.
+ */
sdt_provider_t sdt_providers[] = {
{ "vtrace", "__vtrace_", &vtrace_attr },
{ "sysinfo", "__cpu_sysinfo_", &info_attr, DTRACE_PRIV_USER },
@@ -117,6 +121,7 @@ sdt_provider_t sdt_providers[] = {
{ "fc", "__fc_", &fc_attr },
{ "srp", "__srp_", &fc_attr },
{ "sysevent", "__sysevent_", &stab_attr },
+ { "vnd", "__vnd_", &stab_attr },
{ "sdt", NULL, &sdt_attr },
{ NULL }
};
@@ -1151,6 +1156,34 @@ sdt_argdesc_t sdt_args[] = {
{ "fc", "abts-receive", 2, 2, "fct_i_remote_port_t *",
"fc_port_info_t *" },
+ { "vnd", "flow-blocked", 0, 0, "vnd_str_t *", "ifinfo_t *" },
+ { "vnd", "flow-blocked", 1, 1, "uint64_t", "uint64_t" },
+ { "vnd", "flow-blocked", 2, 2, "uintptr_t", "uintptr_t" },
+ { "vnd", "flow-resumed", 0, 0, "vnd_str_t *", "ifinfo_t *" },
+ { "vnd", "flow-resumed", 1, 1, "uint64_t", "uint64_t" },
+ { "vnd", "flow-resumed", 2, 2, "uintptr_t", "uintptr_t" },
+ { "vnd", "drop-in", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "vnd", "drop-in", 1, 1, "vnd_str_t *", "ifinfo_t *" },
+ { "vnd", "drop-in", 2, 2, "mblk_t *", "etherinfo_t *" },
+ { "vnd", "drop-in", 3, 3, "const char *", "const char *" },
+ { "vnd", "drop-out", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "vnd", "drop-out", 1, 1, "vnd_str_t *", "ifinfo_t *" },
+ { "vnd", "drop-out", 2, 2, "mblk_t *", "etherinfo_t *" },
+ { "vnd", "drop-out", 3, 3, "const char *", "const char *" },
+ { "vnd", "drop-ctl", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "vnd", "drop-ctl", 1, 1, "vnd_str_t *", "ifinfo_t *" },
+ { "vnd", "drop-ctl", 2, 2, "mblk_t *", "etherinfo_t *" },
+ { "vnd", "drop-ctl", 3, 3, "const char *", "const char *" },
+ { "vnd", "send", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "vnd", "send", 1, 1, "void *", "csinfo_t *" },
+ { "vnd", "send", 2, 2, "void *", "ipinfo_t *" },
+ { "vnd", "send", 3, 3, "vnd_str_t *", "ifinfo_t *" },
+ { "vnd", "send", 4, 4, "mblk_t *", "etherinfo_t *" },
+ { "vnd", "recv", 0, 0, "mblk_t *", "pktinfo_t *" },
+ { "vnd", "recv", 1, 1, "void *", "csinfo_t *" },
+ { "vnd", "recv", 2, 2, "void *", "ipinfo_t *" },
+ { "vnd", "recv", 3, 3, "vnd_str_t *", "ifinfo_t *" },
+ { "vnd", "recv", 4, 4, "mblk_t *", "etherinfo_t *" },
{ NULL }
};
diff --git a/usr/src/uts/common/exec/aout/aout.c b/usr/src/uts/common/exec/aout/aout.c
index fc45bd9544..5dbb2ed28c 100644
--- a/usr/src/uts/common/exec/aout/aout.c
+++ b/usr/src/uts/common/exec/aout/aout.c
@@ -22,6 +22,7 @@
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2011 Bayard G. Bell. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#include <sys/types.h>
@@ -54,7 +55,7 @@
static int aoutexec(vnode_t *vp, execa_t *uap, uarg_t *args,
intpdata_t *idatap, int level, long *execsz, int setid,
- caddr_t exec_file, cred_t *cred, int brand_action);
+ caddr_t exec_file, cred_t *cred, int *brand_action);
static int get_aout_head(struct vnode **vpp, struct exdata *edp, long *execsz,
int *isdyn);
static int aoutcore(vnode_t *vp, proc_t *pp, cred_t *credp,
@@ -130,7 +131,7 @@ _info(struct modinfo *modinfop)
static int
aoutexec(vnode_t *vp, struct execa *uap, struct uarg *args,
struct intpdata *idatap, int level, long *execsz, int setid,
- caddr_t exec_file, cred_t *cred, int brand_action)
+ caddr_t exec_file, cred_t *cred, int *brand_action)
{
auxv32_t auxflags_auxv32;
int error;
diff --git a/usr/src/uts/common/exec/elf/elf.c b/usr/src/uts/common/exec/elf/elf.c
index dc04b292b0..d74737dead 100644
--- a/usr/src/uts/common/exec/elf/elf.c
+++ b/usr/src/uts/common/exec/elf/elf.c
@@ -26,7 +26,7 @@
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
*/
#include <sys/types.h>
@@ -66,6 +66,11 @@
#include <sys/sdt.h>
#include <sys/siginfo.h>
+#if defined(__x86) && !defined(__xpv)
+#include <sys/comm_page.h>
+#endif /* defined(__x86) && !defined(__xpv) */
+
+
extern int at_flags;
#define ORIGIN_STR "ORIGIN"
@@ -163,12 +168,16 @@ dtrace_safe_phdr(Phdr *phdrp, struct uarg *args, uintptr_t base)
}
/*
- * Map in the executable pointed to by vp. Returns 0 on success.
+ * Map in the executable pointed to by vp. Returns 0 on success. Note that
+ * this function currently has the maximum number of arguments allowed by
+ * modstubs on x86 (MAXNARG)! Do _not_ add to this function signature without
+ * adding to MAXNARG. (Better yet, do not add to this monster of a function
+ * signature!)
*/
int
mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
- intptr_t *voffset, caddr_t exec_file, int *interp, caddr_t *bssbase,
- caddr_t *brkbase, size_t *brksize, uintptr_t *lddatap)
+ intptr_t *voffset, caddr_t exec_file, char **interpp, caddr_t *bssbase,
+ caddr_t *brkbase, size_t *brksize, uintptr_t *lddatap, uintptr_t *minaddrp)
{
size_t len;
struct vattr vat;
@@ -180,6 +189,7 @@ mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
Phdr *junk = NULL;
Phdr *dynphdr = NULL;
Phdr *dtrphdr = NULL;
+ char *interp = NULL;
uintptr_t lddata;
long execsz;
intptr_t minaddr;
@@ -187,6 +197,9 @@ mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
if (lddatap != NULL)
*lddatap = NULL;
+ if (minaddrp != NULL)
+ *minaddrp = NULL;
+
if (error = execpermissions(vp, &vat, args)) {
uprintf("%s: Cannot execute %s\n", exec_file, args->pathname);
return (error);
@@ -212,25 +225,89 @@ mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
&junk, &dtrphdr, NULL, bssbase, brkbase, voffset, &minaddr,
len, &execsz, brksize)) {
uprintf("%s: Cannot map %s\n", exec_file, args->pathname);
+ if (uphdr != NULL && uphdr->p_flags == 0)
+ kmem_free(uphdr, sizeof (Phdr));
kmem_free(phdrbase, phdrsize);
return (error);
}
+ if (minaddrp != NULL)
+ *minaddrp = minaddr;
+
/*
- * Inform our caller if the executable needs an interpreter.
+ * If the executable requires an interpreter, determine its name.
*/
- *interp = (dynphdr == NULL) ? 0 : 1;
+ if (dynphdr != NULL) {
+ ssize_t resid;
+
+ if (dynphdr->p_filesz > MAXPATHLEN || dynphdr->p_filesz == 0) {
+ uprintf("%s: Invalid interpreter\n", exec_file);
+ kmem_free(phdrbase, phdrsize);
+ return (ENOEXEC);
+ }
+
+ interp = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ if ((error = vn_rdwr(UIO_READ, vp, interp, dynphdr->p_filesz,
+ (offset_t)dynphdr->p_offset, UIO_SYSSPACE, 0,
+ (rlim64_t)0, CRED(), &resid)) != 0 || resid != 0 ||
+ interp[dynphdr->p_filesz - 1] != '\0') {
+ uprintf("%s: Cannot obtain interpreter pathname\n",
+ exec_file);
+ kmem_free(interp, MAXPATHLEN);
+ kmem_free(phdrbase, phdrsize);
+ return (error != 0 ? error : ENOEXEC);
+ }
+ }
/*
* If this is a statically linked executable, voffset should indicate
* the address of the executable itself (it normally holds the address
* of the interpreter).
*/
- if (ehdr->e_type == ET_EXEC && *interp == 0)
+ if (ehdr->e_type == ET_EXEC && interp == NULL)
*voffset = minaddr;
+ /*
+ * If the caller has asked for the interpreter name, return it (it's
+ * up to the caller to free it); if the caller hasn't asked for it,
+ * free it ourselves.
+ */
+ if (interpp != NULL) {
+ *interpp = interp;
+ } else if (interp != NULL) {
+ kmem_free(interp, MAXPATHLEN);
+ }
+
if (uphdr != NULL) {
*uphdr_vaddr = uphdr->p_vaddr;
+
+ if (uphdr->p_flags == 0)
+ kmem_free(uphdr, sizeof (Phdr));
+ } else if (ehdr->e_type == ET_DYN) {
+ /*
+ * If we don't have a uphdr, we'll apply the logic found
+ * in mapelfexec() and use the p_vaddr of the first PT_LOAD
+ * section as the base address of the object.
+ */
+ Phdr *phdr = (Phdr *)phdrbase;
+ int i, hsize = ehdr->e_phentsize;
+
+ for (i = nphdrs; i > 0; i--) {
+ if (phdr->p_type == PT_LOAD) {
+ *uphdr_vaddr = (uintptr_t)phdr->p_vaddr +
+ ehdr->e_phoff;
+ break;
+ }
+
+ phdr = (Phdr *)((caddr_t)phdr + hsize);
+ }
+
+ /*
+ * If we don't have a PT_LOAD segment, we should have returned
+ * ENOEXEC when elfsize() returned 0, above.
+ */
+ VERIFY(i > 0);
} else {
*uphdr_vaddr = (Addr)-1;
}
@@ -243,13 +320,13 @@ mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Addr *uphdr_vaddr,
int
elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred,
- int brand_action)
+ int *brand_action)
{
caddr_t phdrbase = NULL;
caddr_t bssbase = 0;
caddr_t brkbase = 0;
size_t brksize = 0;
- ssize_t dlnsize;
+ ssize_t dlnsize, nsize = 0;
aux_entry_t *aux;
int error;
ssize_t resid;
@@ -273,6 +350,7 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
int hasauxv = 0;
int hasdy = 0;
int branded = 0;
+ int dynuphdr = 0;
struct proc *p = ttoproc(curthread);
struct user *up = PTOU(p);
@@ -327,7 +405,9 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
*execsz = btopr(SINCR) + btopr(SSIZE) + btopr(NCARGS32-1);
} else {
args->to_model = DATAMODEL_LP64;
- args->stk_prot &= ~PROT_EXEC;
+ if (!args->stk_prot_override) {
+ args->stk_prot &= ~PROT_EXEC;
+ }
#if defined(__i386) || defined(__amd64)
args->dat_prot &= ~PROT_EXEC;
#endif
@@ -339,11 +419,25 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
#endif /* _LP64 */
/*
- * We delay invoking the brand callback until we've figured out
- * what kind of elf binary we're trying to run, 32-bit or 64-bit.
- * We do this because now the brand library can just check
- * args->to_model to see if the target is 32-bit or 64-bit without
- * having do duplicate all the code above.
+ * We delay invoking the brand callback until we've figured out what
+ * kind of elf binary we're trying to run, 32-bit or 64-bit. We do this
+ * because now the brand library can just check args->to_model to see if
+ * the target is 32-bit or 64-bit without having do duplicate all the
+ * code above.
+ *
+ * We also give the brand a chance to indicate that based on the ELF
+ * OSABI of the target binary it should become unbranded and optionally
+ * indicate that it should be treated as existing in a specific prefix.
+ *
+ * Note that if a brand opts to go down this route it does not actually
+ * end up being debranded. In other words, future programs that exec
+ * will still be considered for branding unless this escape hatch is
+ * used. Consider the case of lx brand for example. If a user runs
+ * /native/usr/sbin/dtrace -c /bin/ls, the isaexec and normal executable
+ * of DTrace that's in /native will take this escape hatch and be run
+ * and interpreted using the normal system call table; however, the
+ * execution of a non-illumos binary in the form of /bin/ls will still
+ * be branded and be subject to all of the normal actions of the brand.
*
* The level checks associated with brand handling below are used to
* prevent a loop since the brand elfexec function typically comes back
@@ -351,8 +445,20 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
* handling in the #! interpreter code will increment the level before
* calling gexec to run the final elfexec interpreter.
*/
+ if ((level <= INTP_MAXDEPTH) && (*brand_action != EBA_NATIVE) &&
+ (PROC_IS_BRANDED(p)) && (BROP(p)->b_native_exec != NULL)) {
+ if (BROP(p)->b_native_exec(ehdrp->e_ident[EI_OSABI],
+ &args->brand_nroot) == B_TRUE) {
+ ASSERT(ehdrp->e_ident[EI_OSABI]);
+ *brand_action = EBA_NATIVE;
+ /* Add one for the trailing '/' in the path */
+ if (args->brand_nroot != NULL)
+ nsize = strlen(args->brand_nroot) + 1;
+ }
+ }
+
if ((level <= INTP_MAXDEPTH) &&
- (brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
+ (*brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
error = BROP(p)->b_elfexec(vp, uap, args,
idatap, level + 1, execsz, setid, exec_file, cred,
brand_action);
@@ -423,14 +529,15 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
* AT_BASE
* AT_FLAGS
* AT_PAGESZ
+ * AT_RANDOM (added in stk_copyout)
* AT_SUN_AUXFLAGS
* AT_SUN_HWCAP
* AT_SUN_HWCAP2
- * AT_SUN_PLATFORM (added in stk_copyout)
- * AT_SUN_EXECNAME (added in stk_copyout)
+ * AT_SUN_PLATFORM (added in stk_copyout)
+ * AT_SUN_EXECNAME (added in stk_copyout)
* AT_NULL
*
- * total == 9
+ * total == 10
*/
if (hasdy && hasu) {
/*
@@ -445,7 +552,7 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
*
* total = 5
*/
- args->auxsize = (9 + 5) * sizeof (aux_entry_t);
+ args->auxsize = (10 + 5) * sizeof (aux_entry_t);
} else if (hasdy) {
/*
* Has PT_INTERP but no PT_PHDR
@@ -455,9 +562,9 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
*
* total = 2
*/
- args->auxsize = (9 + 2) * sizeof (aux_entry_t);
+ args->auxsize = (10 + 2) * sizeof (aux_entry_t);
} else {
- args->auxsize = 9 * sizeof (aux_entry_t);
+ args->auxsize = 10 * sizeof (aux_entry_t);
}
} else {
args->auxsize = 0;
@@ -470,13 +577,41 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
if (args->emulator != NULL)
args->auxsize += sizeof (aux_entry_t);
- if ((brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
+ /*
+ * If this is a native binary that's been given a modified interpreter
+ * root, inform it that the native system exists at that root.
+ */
+ if (args->brand_nroot != NULL) {
+ args->auxsize += sizeof (aux_entry_t);
+ }
+
+
+ /*
+ * On supported kernels (64-bit, non-xpv) make room in the auxv for the
+ * AT_SUN_COMMPAGE entry.
+ */
+#if defined(__amd64) && !defined(__xpv)
+ args->auxsize += sizeof (aux_entry_t);
+#endif /* defined(__amd64) && !defined(__xpv) */
+
+ /*
+ * If we have user credentials, we'll supply the following entries:
+ * AT_SUN_UID
+ * AT_SUN_RUID
+ * AT_SUN_GID
+ * AT_SUN_RGID
+ */
+ if (cred != NULL) {
+ args->auxsize += 4 * sizeof (aux_entry_t);
+ }
+
+ if ((*brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
branded = 1;
/*
- * We will be adding 4 entries to the aux vectors. One for
- * the the brandname and 3 for the brand specific aux vectors.
+ * We will be adding 5 entries to the aux vectors. One for
+ * the the brandname and 4 for the brand specific aux vectors.
*/
- args->auxsize += 4 * sizeof (aux_entry_t);
+ args->auxsize += 5 * sizeof (aux_entry_t);
}
/* Hardware/Software capabilities */
@@ -507,7 +642,8 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
aux = bigwad->elfargs;
/*
* Move args to the user's stack.
- * This can fill in the AT_SUN_PLATFORM and AT_SUN_EXECNAME aux entries.
+ * This can fill in the AT_SUN_PLATFORM, AT_SUN_EXECNAME and AT_RANDOM
+ * aux entries.
*/
if ((error = exec_args(uap, args, idatap, (void **)&aux)) != 0) {
if (error == -1) {
@@ -534,6 +670,14 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
len, execsz, &brksize)) != 0)
goto bad;
+ if (uphdr != NULL) {
+ /*
+ * Our uphdr has been dynamically allocated if (and only if)
+ * its program header flags are clear.
+ */
+ dynuphdr = (uphdr->p_flags == 0);
+ }
+
if (uphdr != NULL && dyphdr == NULL)
goto bad;
@@ -548,17 +692,22 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
char *p;
struct vnode *nvp;
- dlnsize = dyphdr->p_filesz;
+ dlnsize = dyphdr->p_filesz + nsize;
if (dlnsize > MAXPATHLEN || dlnsize <= 0)
goto bad;
+ if (nsize != 0) {
+ bcopy(args->brand_nroot, dlnp, nsize - 1);
+ dlnp[nsize - 1] = '/';
+ }
+
/*
* Read in "interpreter" pathname.
*/
- if ((error = vn_rdwr(UIO_READ, vp, dlnp, dyphdr->p_filesz,
- (offset_t)dyphdr->p_offset, UIO_SYSSPACE, 0, (rlim64_t)0,
- CRED(), &resid)) != 0) {
+ if ((error = vn_rdwr(UIO_READ, vp, dlnp + nsize,
+ dyphdr->p_filesz, (offset_t)dyphdr->p_offset, UIO_SYSSPACE,
+ 0, (rlim64_t)0, CRED(), &resid)) != 0) {
uprintf("%s: Cannot obtain interpreter pathname\n",
exec_file);
goto bad;
@@ -703,9 +852,10 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
dtrphdr = NULL;
- error = mapelfexec(nvp, ehdrp, nphdrs, phdrbase, &junk, &junk,
+ error = mapelfexec(nvp, ehdrp, nphdrs, phdrbase, NULL, &junk,
&junk, &dtrphdr, NULL, NULL, NULL, &voffset, NULL, len,
execsz, NULL);
+
if (error || junk != NULL) {
VN_RELE(nvp);
uprintf("%s: Cannot map %s\n", exec_file, dlnp);
@@ -732,9 +882,10 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
if (hasauxv) {
int auxf = AF_SUN_HWCAPVERIFY;
+
/*
- * Note: AT_SUN_PLATFORM and AT_SUN_EXECNAME were filled in via
- * exec_args()
+ * Note: AT_SUN_PLATFORM, AT_SUN_EXECNAME and AT_RANDOM were
+ * filled in via exec_args()
*/
ADDAUX(aux, AT_BASE, voffset)
ADDAUX(aux, AT_FLAGS, at_flags)
@@ -762,7 +913,7 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
* malicious user within the zone from crafting a wrapper to
* run native suid commands with unsecure libraries interposed.
*/
- if ((brand_action == EBA_NATIVE) && (PROC_IS_BRANDED(p) &&
+ if ((*brand_action == EBA_NATIVE) && (PROC_IS_BRANDED(p) &&
(setid &= ~EXECSETID_SETID) != 0))
auxf &= ~AF_SUN_SETUGID;
@@ -775,6 +926,18 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
((char *)&aux->a_type -
(char *)bigwad->elfargs));
ADDAUX(aux, AT_SUN_AUXFLAGS, auxf);
+
+ /*
+ * Record information about the real and effective user and
+ * group IDs.
+ */
+ if (cred != NULL) {
+ ADDAUX(aux, AT_SUN_UID, crgetuid(cred));
+ ADDAUX(aux, AT_SUN_RUID, crgetruid(cred));
+ ADDAUX(aux, AT_SUN_GID, crgetgid(cred));
+ ADDAUX(aux, AT_SUN_RGID, crgetrgid(cred));
+ }
+
/*
* Hardware capability flag word (performance hints)
* Used for choosing faster library routines.
@@ -804,8 +967,19 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
ADDAUX(aux, AT_SUN_BRAND_AUX1, 0)
ADDAUX(aux, AT_SUN_BRAND_AUX2, 0)
ADDAUX(aux, AT_SUN_BRAND_AUX3, 0)
+ ADDAUX(aux, AT_SUN_BRAND_AUX4, 0)
}
+ /*
+ * Add the comm page auxv entry, mapping it in if needed.
+ */
+#if defined(__amd64) && !defined(__xpv)
+ if (args->commpage != NULL ||
+ (args->commpage = (uintptr_t)comm_page_mapin()) != NULL) {
+ ADDAUX(aux, AT_SUN_COMMPAGE, args->commpage)
+ }
+#endif /* defined(__amd64) && !defined(__xpv) */
+
ADDAUX(aux, AT_NULL, 0)
postfixsize = (char *)aux - (char *)bigwad->elfargs;
@@ -845,6 +1019,7 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
}
bzero(up->u_auxv, sizeof (up->u_auxv));
+ up->u_commpagep = args->commpage;
if (postfixsize) {
int num_auxv;
@@ -911,6 +1086,8 @@ bad:
if (error == 0)
error = ENOEXEC;
out:
+ if (dynuphdr)
+ kmem_free(uphdr, sizeof (Phdr));
if (phdrbase != NULL)
kmem_free(phdrbase, phdrsize);
if (cap != NULL)
@@ -1177,6 +1354,29 @@ getelfshdr(vnode_t *vp, cred_t *credp, const Ehdr *ehdr,
return (0);
}
+
+#ifdef _ELF32_COMPAT
+int
+elf32readhdr(vnode_t *vp, cred_t *credp, Ehdr *ehdrp, int *nphdrs,
+ caddr_t *phbasep, ssize_t *phsizep)
+#else
+int
+elfreadhdr(vnode_t *vp, cred_t *credp, Ehdr *ehdrp, int *nphdrs,
+ caddr_t *phbasep, ssize_t *phsizep)
+#endif
+{
+ int error, nshdrs, shstrndx;
+
+ if ((error = getelfhead(vp, credp, ehdrp, &nshdrs, &shstrndx,
+ nphdrs)) != 0 ||
+ (error = getelfphdr(vp, credp, ehdrp, *nphdrs, phbasep,
+ phsizep)) != 0) {
+ return (error);
+ }
+ return (0);
+}
+
+
static int
mapelfexec(
vnode_t *vp,
@@ -1197,7 +1397,7 @@ mapelfexec(
size_t *brksize)
{
Phdr *phdr;
- int i, prot, error;
+ int i, prot, error, lastprot = 0;
caddr_t addr = NULL;
size_t zfodsz;
int ptload = 0;
@@ -1205,43 +1405,78 @@ mapelfexec(
off_t offset;
int hsize = ehdr->e_phentsize;
caddr_t mintmp = (caddr_t)-1;
+ uintptr_t lastaddr = NULL;
extern int use_brk_lpg;
if (ehdr->e_type == ET_DYN) {
- /*
- * Obtain the virtual address of a hole in the
- * address space to map the "interpreter".
- */
- map_addr(&addr, len, (offset_t)0, 1, 0);
- if (addr == NULL)
- return (ENOMEM);
- *voffset = (intptr_t)addr;
+ caddr_t vaddr;
/*
- * Calculate the minimum vaddr so it can be subtracted out.
- * According to the ELF specification, since PT_LOAD sections
- * must be sorted by increasing p_vaddr values, this is
- * guaranteed to be the first PT_LOAD section.
+ * Despite the fact that mmapobj(2) refuses to load them, we
+ * need to support executing ET_DYN objects that have a
+ * non-NULL p_vaddr. When found in the wild, these objects
+ * are likely to be due to an old (and largely obviated) Linux
+ * facility, prelink(8), that rewrites shared objects to
+ * prefer specific (disjoint) virtual address ranges. (Yes,
+ * this is putatively for performance -- and yes, it has
+ * limited applicability, many edge conditions and grisly
+ * failure modes; even for Linux, it's insane.) As ELF
+ * mandates that the PT_LOAD segments be in p_vaddr order, we
+ * find the lowest p_vaddr by finding the first PT_LOAD
+ * segment.
*/
phdr = (Phdr *)phdrbase;
for (i = nphdrs; i > 0; i--) {
if (phdr->p_type == PT_LOAD) {
- *voffset -= (uintptr_t)phdr->p_vaddr;
+ addr = (caddr_t)(uintptr_t)phdr->p_vaddr;
break;
}
phdr = (Phdr *)((caddr_t)phdr + hsize);
}
+ /*
+ * We have a non-zero p_vaddr in the first PT_LOAD segment --
+ * presumably because we're directly executing a prelink(8)'d
+ * ld-linux.so. While we could correctly execute such an
+ * object without locating it at its desired p_vaddr (it is,
+ * after all, still relocatable), our inner antiquarian
+ * derives a perverse pleasure in accommodating the steampunk
+ * prelink(8) contraption -- goggles on!
+ */
+ if ((vaddr = addr) != NULL) {
+ if (as_gap(curproc->p_as, len,
+ &addr, &len, AH_LO, NULL) == -1 || addr != vaddr) {
+ addr = NULL;
+ }
+ }
+
+ if (addr == NULL) {
+ /*
+ * We either have a NULL p_vaddr (the common case, by
+ * many orders of magnitude) or we have a non-NULL
+ * p_vaddr and we were unable to obtain the specified
+ * VA range (presumably because it's an illegal
+ * address). Either way, obtain an address in which
+ * to map the interpreter.
+ */
+ map_addr(&addr, len, (offset_t)0, 1, 0);
+ if (addr == NULL)
+ return (ENOMEM);
+ }
+
+ /*
+ * Our voffset is the difference between where we landed and
+ * where we wanted to be.
+ */
+ *voffset = (uintptr_t)addr - (uintptr_t)vaddr;
} else {
*voffset = 0;
}
+
phdr = (Phdr *)phdrbase;
for (i = nphdrs; i > 0; i--) {
switch (phdr->p_type) {
case PT_LOAD:
- if ((*dyphdr != NULL) && (*uphdr == NULL))
- return (0);
-
ptload = 1;
prot = PROT_USER;
if (phdr->p_flags & PF_R)
@@ -1253,6 +1488,34 @@ mapelfexec(
addr = (caddr_t)((uintptr_t)phdr->p_vaddr + *voffset);
+ if ((*dyphdr != NULL) && uphdr != NULL &&
+ (*uphdr == NULL)) {
+ /*
+ * The PT_PHDR program header is, strictly
+ * speaking, optional. If we find that this
+ * is missing, we will determine the location
+ * of the program headers based on the address
+ * of the lowest PT_LOAD segment (namely, this
+ * one): we subtract the p_offset to get to
+ * the ELF header and then add back the program
+ * header offset to get to the program headers.
+ * We then cons up a Phdr that corresponds to
+ * the (missing) PT_PHDR, setting the flags
+ * to 0 to denote that this is artificial and
+ * should (must) be freed by the caller.
+ */
+ Phdr *cons;
+
+ cons = kmem_zalloc(sizeof (Phdr), KM_SLEEP);
+
+ cons->p_flags = 0;
+ cons->p_type = PT_PHDR;
+ cons->p_vaddr = ((uintptr_t)addr -
+ phdr->p_offset) + ehdr->e_phoff;
+
+ *uphdr = cons;
+ }
+
/*
* Keep track of the segment with the lowest starting
* address.
@@ -1260,6 +1523,41 @@ mapelfexec(
if (addr < mintmp)
mintmp = addr;
+ /*
+ * Segments need not correspond to page boundaries:
+ * they are permitted to share a page. If two PT_LOAD
+ * segments share the same page, and the permissions
+ * of the segments differ, the behavior is historically
+ * that the permissions of the latter segment are used
+ * for the page that the two segments share. This is
+ * also historically a non-issue: binaries generated
+ * by most anything will make sure that two PT_LOAD
+ * segments with differing permissions don't actually
+ * share any pages. However, there exist some crazy
+ * things out there (including at least an obscure
+ * Portuguese teaching language called G-Portugol) that
+ * actually do the wrong thing and expect it to work:
+ * they have a segment with execute permission share
+ * a page with a subsequent segment that does not
+ * have execute permissions and expect the resulting
+ * shared page to in fact be executable. To accommodate
+ * such broken link editors, we take advantage of a
+ * latitude explicitly granted to the loader: it is
+ * permitted to make _any_ PT_LOAD segment executable
+ * (provided that it is readable or writable). If we
+ * see that we're sharing a page and that the previous
+ * page was executable, we will add execute permissions
+ * to our segment.
+ */
+ if (btop(lastaddr) == btop((uintptr_t)addr) &&
+ (phdr->p_flags & (PF_R | PF_W)) &&
+ (lastprot & PROT_EXEC)) {
+ prot |= PROT_EXEC;
+ }
+
+ lastaddr = (uintptr_t)addr + phdr->p_filesz;
+ lastprot = prot;
+
zfodsz = (size_t)phdr->p_memsz - phdr->p_filesz;
offset = phdr->p_offset;
@@ -1324,8 +1622,22 @@ mapelfexec(
break;
case PT_INTERP:
- if (ptload)
- goto bad;
+ /*
+ * The ELF specification is unequivocal about the
+ * PT_INTERP program header with respect to any PT_LOAD
+ * program header: "If it is present, it must precede
+ * any loadable segment entry." Linux, however, makes
+ * no attempt to enforce this -- which has allowed some
+ * binary editing tools to get away with generating
+ * invalid ELF binaries in the respect that PT_INTERP
+ * occurs after the first PT_LOAD program header. This
+ * is unfortunate (and of course, disappointing) but
+ * it's no worse than that: there is no reason that we
+ * can't process the PT_INTERP entry (if present) after
+ * one or more PT_LOAD entries. We therefore
+ * deliberately do not check ptload here and always
+ * store dyphdr to be the PT_INTERP program header.
+ */
*dyphdr = phdr;
break;
@@ -1334,9 +1646,12 @@ mapelfexec(
break;
case PT_PHDR:
- if (ptload)
+ if (ptload || phdr->p_flags == 0)
goto bad;
- *uphdr = phdr;
+
+ if (uphdr != NULL)
+ *uphdr = phdr;
+
break;
case PT_NULL:
@@ -2185,7 +2500,7 @@ static struct modlexec modlexec = {
extern int elf32exec(vnode_t *vp, execa_t *uap, uarg_t *args,
intpdata_t *idatap, int level, long *execsz,
int setid, caddr_t exec_file, cred_t *cred,
- int brand_action);
+ int *brand_action);
extern int elf32core(vnode_t *vp, proc_t *p, cred_t *credp,
rlim64_t rlimit, int sig, core_content_t content);
diff --git a/usr/src/uts/common/exec/intp/intp.c b/usr/src/uts/common/exec/intp/intp.c
index 269ba86b1b..512cab2b66 100644
--- a/usr/src/uts/common/exec/intp/intp.c
+++ b/usr/src/uts/common/exec/intp/intp.c
@@ -22,6 +22,7 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright 2012 Milan Jurik. All rights reserved.
+ * Copyright 2016, Joyent, Inc.
*/
/* Copyright (c) 1988 AT&T */
@@ -47,6 +48,7 @@
#include <sys/kmem.h>
#include <sys/note.h>
#include <sys/sdt.h>
+#include <sys/brand.h>
/*
* This is the loadable module wrapper.
@@ -54,7 +56,7 @@
#include <sys/modctl.h>
extern int intpexec(struct vnode *, struct execa *, struct uarg *,
- struct intpdata *, int, long *, int, caddr_t, struct cred *, int);
+ struct intpdata *, int, long *, int, caddr_t, struct cred *, int *);
static struct execsw esw = {
intpmagicstr,
@@ -126,13 +128,20 @@ getintphead(struct vnode *vp, struct intpdata *idatap)
*cp = '\0';
/*
- * Locate the beginning and end of the interpreter name.
- * In addition to the name, one additional argument may
- * optionally be included here, to be prepended to the
- * arguments provided on the command line. Thus, for
- * example, you can say
+ * Locate the beginning and end of the interpreter name. Historically,
+ * for illumos and its predecessors, in addition to the name, one
+ * additional argument may optionally be included here, to be prepended
+ * to the arguments provided on the command line. Thus, for example,
+ * you can say
*
* #! /usr/bin/awk -f
+ *
+ * However, handling of interpreter arguments varies across operating
+ * systems and other systems allow more than one argument. In
+ * particular, Linux allows more than one and delivers all arguments
+ * as a single string (argv[1] is "-arg1 -arg2 ..."). We support this
+ * style of argument handling as a brand-specific option (setting
+ * b_intp_parse_arg to B_FALSE).
*/
for (cp = &linep[2]; *cp == ' '; cp++)
;
@@ -151,9 +160,12 @@ getintphead(struct vnode *vp, struct intpdata *idatap)
idatap->intp_arg[0] = NULL;
else {
idatap->intp_arg[0] = cp;
- while (*cp && *cp != ' ')
- cp++;
- *cp = '\0';
+ if (!PROC_IS_BRANDED(curproc) ||
+ BROP(curproc)->b_intp_parse_arg) {
+ while (*cp && *cp != ' ')
+ cp++;
+ *cp = '\0';
+ }
}
}
return (0);
@@ -188,9 +200,8 @@ intpexec(
int setid,
caddr_t exec_file,
struct cred *cred,
- int brand_action)
+ int *brand_action)
{
- _NOTE(ARGUNUSED(brand_action))
vnode_t *nvp;
int error = 0;
struct intpdata idata;
@@ -281,7 +292,7 @@ intpexec(
}
error = gexec(&nvp, uap, args, &idata, ++level, execsz, exec_file, cred,
- EBA_NONE);
+ brand_action);
if (!error) {
/*
diff --git a/usr/src/uts/common/exec/java/java.c b/usr/src/uts/common/exec/java/java.c
index fdc327dcbb..5170fda5cb 100644
--- a/usr/src/uts/common/exec/java/java.c
+++ b/usr/src/uts/common/exec/java/java.c
@@ -21,6 +21,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015, Joyent, Inc.
*/
/*
@@ -85,7 +86,7 @@ char *jexec_arg = "-jar";
static int
javaexec(vnode_t *vp, struct execa *uap, struct uarg *args,
struct intpdata *idatap, int level, long *execsz, int setid,
- caddr_t execfile, cred_t *cred, int brand_action)
+ caddr_t execfile, cred_t *cred, int *brand_action)
{
struct intpdata idata;
int error;
diff --git a/usr/src/uts/common/exec/shbin/shbin.c b/usr/src/uts/common/exec/shbin/shbin.c
index ee5060a07e..016d87b9ef 100644
--- a/usr/src/uts/common/exec/shbin/shbin.c
+++ b/usr/src/uts/common/exec/shbin/shbin.c
@@ -22,6 +22,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015, Joyent, Inc.
*/
#include <sys/types.h>
@@ -58,7 +59,7 @@ shbinexec(
int setid,
caddr_t exec_file,
struct cred *cred,
- int brand_action);
+ int *brand_action);
#define SHBIN_CNTL(x) ((x)&037)
#define SHBINMAGIC_LEN 4
@@ -162,7 +163,7 @@ shbinexec(
int setid,
caddr_t exec_file,
struct cred *cred,
- int brand_action)
+ int *brand_action)
{
_NOTE(ARGUNUSED(brand_action))
vnode_t *nvp;
diff --git a/usr/src/uts/common/fs/dev/sdev_netops.c b/usr/src/uts/common/fs/dev/sdev_netops.c
index 4eaf38f484..41441ec52d 100644
--- a/usr/src/uts/common/fs/dev/sdev_netops.c
+++ b/usr/src/uts/common/fs/dev/sdev_netops.c
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
/*
@@ -41,8 +42,102 @@
#include <sys/zone.h>
#include <sys/dls.h>
+static const char *devnet_zpath = "/dev/net/zone/";
struct vnodeops *devnet_vnodeops;
+static zoneid_t
+devnet_nodetozone(sdev_node_t *dv)
+{
+ char *zname = NULL, *dup;
+ zone_t *zone;
+ int duplen;
+ zoneid_t zid;
+
+ /*
+ * If in a non-global zone, always return it's zid no matter what the
+ * node is.
+ */
+ zid = getzoneid();
+ if (zid != GLOBAL_ZONEID)
+ return (zid);
+
+ /*
+ * If it doesn't have /dev/net/zone/ then it can't be a specific zone
+ * we're targetting.
+ */
+ if (strncmp(devnet_zpath, dv->sdev_path, strlen(devnet_zpath)) != 0)
+ return (GLOBAL_ZONEID);
+
+ if (dv->sdev_vnode->v_type == VDIR) {
+ zone = zone_find_by_name(dv->sdev_name);
+ } else {
+ /* Non directories have the form /dev/net/zone/%z/%s */
+ dup = strdup(dv->sdev_path);
+ duplen = strlen(dup);
+ zname = strrchr(dup, '/');
+ *zname = '\0';
+ zname--;
+ zname = strrchr(dup, '/');
+ zname++;
+ zone = zone_find_by_name(zname);
+ kmem_free(dup, duplen + 1);
+ }
+ if (zone == NULL)
+ return (GLOBAL_ZONEID);
+ zid = zone->zone_id;
+ zone_rele(zone);
+ return (zid);
+}
+
+static int
+devnet_mkdir(struct sdev_node *ddv, char *name)
+{
+ sdev_node_t *dv;
+ struct vattr va;
+ int ret;
+
+ ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
+ dv = sdev_cache_lookup(ddv, name);
+ if (dv != NULL) {
+ SDEV_SIMPLE_RELE(dv);
+ return (EEXIST);
+ }
+
+ va = *sdev_getdefault_attr(VDIR);
+ gethrestime(&va.va_atime);
+ va.va_mtime = va.va_atime;
+ va.va_ctime = va.va_atime;
+
+ ret = sdev_mknode(ddv, name, &dv, &va, NULL, NULL, kcred, SDEV_READY);
+ if (ret != 0)
+ return (ret);
+ SDEV_SIMPLE_RELE(dv);
+ return (0);
+}
+
+/*
+ * We basically need to walk down the directory path to determine what we should
+ * do. At the top level of /dev/net, only the directory /dev/net/zone is valid,
+ * and it is always valid. Following on that, /dev/net/zone/%zonename is valid
+ * if and only if we can look up that zone name. If it's not, or it's some other
+ * name, then it's SDEV_VTOR_INVALID.
+ */
+static int
+devnet_dirvalidate(struct sdev_node *dv)
+{
+ zone_t *zonep;
+ char *path = "/dev/net/zone";
+
+ if (strcmp(path, dv->sdev_path) == 0)
+ return (SDEV_VTOR_VALID);
+
+ zonep = zone_find_by_name(dv->sdev_name);
+ if (zonep == NULL)
+ return (SDEV_VTOR_INVALID);
+ zone_rele(zonep);
+ return (SDEV_VTOR_VALID);
+}
+
/*
* Check if a net sdev_node is still valid - i.e. it represents a current
* network link.
@@ -60,11 +155,20 @@ devnet_validate(struct sdev_node *dv)
ASSERT(dv->sdev_state == SDEV_READY);
- if (dls_mgmt_get_linkid(dv->sdev_name, &linkid) != 0)
+ if (dv->sdev_vnode->v_type == VDIR)
+ return (devnet_dirvalidate(dv));
+
+ if (strncmp(devnet_zpath, dv->sdev_path, strlen(devnet_zpath)) == 0) {
+ ASSERT(SDEV_IS_GLOBAL(dv));
+ zoneid = devnet_nodetozone(dv);
+ } else {
+ zoneid = getzoneid();
+ }
+
+ if (dls_mgmt_get_linkid_in_zone(dv->sdev_name, &linkid, zoneid) != 0)
return (SDEV_VTOR_INVALID);
- if (SDEV_IS_GLOBAL(dv))
+ if (zoneid == GLOBAL_ZONEID)
return (SDEV_VTOR_VALID);
- zoneid = getzoneid();
return (zone_check_datalink(&zoneid, linkid) == 0 ?
SDEV_VTOR_VALID : SDEV_VTOR_INVALID);
}
@@ -74,13 +178,14 @@ devnet_validate(struct sdev_node *dv)
* a net entry when the node is not found in the cache.
*/
static int
-devnet_create_rvp(const char *nm, struct vattr *vap, dls_dl_handle_t *ddhp)
+devnet_create_rvp(const char *nm, struct vattr *vap, dls_dl_handle_t *ddhp,
+ zoneid_t zid)
{
timestruc_t now;
dev_t dev;
int error;
- if ((error = dls_devnet_open(nm, ddhp, &dev)) != 0) {
+ if ((error = dls_devnet_open_in_zone(nm, ddhp, &dev, zid)) != 0) {
sdcmn_err12(("devnet_create_rvp: not a valid vanity name "
"network node: %s\n", nm));
return (error);
@@ -116,6 +221,7 @@ devnet_lookup(struct vnode *dvp, char *nm, struct vnode **vpp,
struct sdev_node *ddv = VTOSDEV(dvp);
struct sdev_node *dv = NULL;
dls_dl_handle_t ddh = NULL;
+ zone_t *zone;
struct vattr vattr;
int nmlen;
int error = ENOENT;
@@ -123,6 +229,9 @@ devnet_lookup(struct vnode *dvp, char *nm, struct vnode **vpp,
if (SDEVTOV(ddv)->v_type != VDIR)
return (ENOTDIR);
+ if (!SDEV_IS_GLOBAL(ddv) && crgetzoneid(cred) == GLOBAL_ZONEID)
+ return (EPERM);
+
/*
* Empty name or ., return node itself.
*/
@@ -145,6 +254,12 @@ devnet_lookup(struct vnode *dvp, char *nm, struct vnode **vpp,
rw_enter(&ddv->sdev_contents, RW_WRITER);
/*
+ * ZOMBIED parent does not allow new node creation, bail out early.
+ */
+ if (ddv->sdev_state == SDEV_ZOMBIE)
+ goto failed;
+
+ /*
* directory cache lookup:
*/
if ((dv = sdev_cache_lookup(ddv, nm)) != NULL) {
@@ -153,13 +268,42 @@ devnet_lookup(struct vnode *dvp, char *nm, struct vnode **vpp,
goto found;
}
+ if (SDEV_IS_GLOBAL(ddv)) {
+ /*
+ * Check for /dev/net/zone
+ */
+ if (strcmp("zone", nm) == 0 && strcmp("/dev/net",
+ ddv->sdev_path) == 0) {
+ (void) devnet_mkdir(ddv, nm);
+ dv = sdev_cache_lookup(ddv, nm);
+ ASSERT(dv != NULL);
+ goto found;
+ }
+
+ /*
+ * Check for /dev/net/zone/%z. We can't use devnet_zpath due to
+ * its trailing slash.
+ */
+ if (strcmp("/dev/net/zone", ddv->sdev_path) == 0) {
+ zone = zone_find_by_name(nm);
+ if (zone == NULL)
+ goto failed;
+ (void) devnet_mkdir(ddv, nm);
+ zone_rele(zone);
+ dv = sdev_cache_lookup(ddv, nm);
+ ASSERT(dv != NULL);
+ goto found;
+ }
+ } else if (strcmp("/dev/net", ddv->sdev_path) != 0) {
+ goto failed;
+ }
+
/*
- * ZOMBIED parent does not allow new node creation, bail out early.
+ * We didn't find what we were looking for. What that is depends a lot
+ * on what directory we're in.
*/
- if (ddv->sdev_state == SDEV_ZOMBIE)
- goto failed;
- error = devnet_create_rvp(nm, &vattr, &ddh);
+ error = devnet_create_rvp(nm, &vattr, &ddh, devnet_nodetozone(ddv));
if (error != 0)
goto failed;
@@ -219,7 +363,7 @@ devnet_filldir_datalink(datalink_id_t linkid, void *arg)
if ((dv = sdev_cache_lookup(ddv, (char *)link)) != NULL)
goto found;
- if (devnet_create_rvp(link, &vattr, &ddh) != 0)
+ if (devnet_create_rvp(link, &vattr, &ddh, devnet_nodetozone(arg)) != 0)
return (0);
ASSERT(ddh != NULL);
@@ -244,16 +388,77 @@ found:
return (0);
}
+/*
+ * Fill in all the entries for the current zone.
+ */
static void
-devnet_filldir(struct sdev_node *ddv)
+devnet_fillzone(struct sdev_node *ddv, zoneid_t zid)
{
- sdev_node_t *dv, *next;
datalink_id_t linkid;
+ ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
+ if (zid == GLOBAL_ZONEID) {
+ ASSERT(SDEV_IS_GLOBAL(ddv));
+ linkid = DATALINK_INVALID_LINKID;
+ do {
+ linkid = dls_mgmt_get_next(linkid, DATALINK_CLASS_ALL,
+ DATALINK_ANY_MEDIATYPE, DLMGMT_ACTIVE);
+ if (linkid != DATALINK_INVALID_LINKID)
+ (void) devnet_filldir_datalink(linkid, ddv);
+ } while (linkid != DATALINK_INVALID_LINKID);
+ } else {
+ (void) zone_datalink_walk(zid, devnet_filldir_datalink, ddv);
+ }
+}
+
+/*
+ * Callback for zone_walk when filling up /dev/net/zone/...
+ */
+static int
+devnet_fillzdir_cb(zone_t *zonep, void *arg)
+{
+ sdev_node_t *ddv = arg;
+
+ ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
+ (void) devnet_mkdir(ddv, zonep->zone_name);
+ return (0);
+}
+
+/*
+ * Fill in a directory that isn't the top level /dev/net.
+ */
+static void
+devnet_fillzdir(struct sdev_node *ddv)
+{
+ zone_t *zonep;
+ char *path = "/dev/net/zone";
+
+ if (strcmp(path, ddv->sdev_path) == 0) {
+ (void) zone_walk(devnet_fillzdir_cb, ddv);
+ return;
+ }
+
+ zonep = zone_find_by_name(ddv->sdev_name);
+ if (zonep == NULL)
+ return;
+ devnet_fillzone(ddv, zonep->zone_id);
+ zone_rele(zonep);
+}
+
+static void
+devnet_filldir(struct sdev_node *ddv)
+{
+ int ret;
+ sdev_node_t *dv, *next;
+
ASSERT(RW_READ_HELD(&ddv->sdev_contents));
if (rw_tryupgrade(&ddv->sdev_contents) == NULL) {
rw_exit(&ddv->sdev_contents);
rw_enter(&ddv->sdev_contents, RW_WRITER);
+ if (ddv->sdev_state == SDEV_ZOMBIE) {
+ rw_exit(&ddv->sdev_contents);
+ return;
+ }
}
for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = next) {
@@ -276,31 +481,38 @@ devnet_filldir(struct sdev_node *ddv)
if (SDEVTOV(dv)->v_count > 0)
continue;
+
SDEV_HOLD(dv);
+
+ /*
+ * Clean out everything underneath before we remove ourselves.
+ */
+ if (SDEVTOV(ddv)->v_type == VDIR) {
+ ret = sdev_cleandir(dv, NULL, 0);
+ ASSERT(ret == 0);
+ }
/* remove the cache node */
(void) sdev_cache_update(ddv, &dv, dv->sdev_name,
SDEV_CACHE_DELETE);
SDEV_RELE(dv);
}
+ if (strcmp(ddv->sdev_path, "/dev/net") != 0) {
+ devnet_fillzdir(ddv);
+ goto done;
+ }
+
if (((ddv->sdev_flags & SDEV_BUILD) == 0) && !dls_devnet_rebuild())
goto done;
if (SDEV_IS_GLOBAL(ddv)) {
- linkid = DATALINK_INVALID_LINKID;
- do {
- linkid = dls_mgmt_get_next(linkid, DATALINK_CLASS_ALL,
- DATALINK_ANY_MEDIATYPE, DLMGMT_ACTIVE);
- if (linkid != DATALINK_INVALID_LINKID)
- (void) devnet_filldir_datalink(linkid, ddv);
- } while (linkid != DATALINK_INVALID_LINKID);
+ devnet_fillzone(ddv, GLOBAL_ZONEID);
+ (void) devnet_mkdir(ddv, "zone");
} else {
- (void) zone_datalink_walk(getzoneid(),
- devnet_filldir_datalink, ddv);
+ devnet_fillzone(ddv, getzoneid());
}
ddv->sdev_flags &= ~SDEV_BUILD;
-
done:
rw_downgrade(&ddv->sdev_contents);
}
@@ -319,6 +531,9 @@ devnet_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred,
ASSERT(sdvp);
+ if (crgetzoneid(cred) == GLOBAL_ZONEID && !SDEV_IS_GLOBAL(sdvp))
+ return (EPERM);
+
if (uiop->uio_offset == 0)
devnet_filldir(sdvp);
diff --git a/usr/src/uts/common/fs/dev/sdev_plugin.c b/usr/src/uts/common/fs/dev/sdev_plugin.c
new file mode 100644
index 0000000000..885191175f
--- /dev/null
+++ b/usr/src/uts/common/fs/dev/sdev_plugin.c
@@ -0,0 +1,913 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Dynamic directory plugin interface for sdev.
+ *
+ * The sdev plugin interfaces provides a means for a dynamic directory based on
+ * in-kernel state to be simply created. Traditionally, dynamic directories were
+ * built into sdev itself. While these legacy plugins are useful, it makes more
+ * sense for these pieces of functionality to live with the individual drivers.
+ *
+ * The plugin interface requires folks to implement three interfaces and
+ * provides a series of callbacks that can be made in the context of those
+ * interfaces to interrogate the sdev_node_t without having to leak
+ * implementation details of the sdev_node_t. These interfaces are:
+ *
+ * o spo_validate
+ *
+ * Given a particular node, answer the question as to whether or not this
+ * entry is still valid. Here, plugins should use the name and the dev_t
+ * associated with the node to verify that it matches something that still
+ * exists.
+ *
+ * o spo_filldir
+ *
+ * Fill all the entries inside of a directory. Note that some of these entries
+ * may already exist.
+ *
+ * o spo_inactive
+ *
+ * The given node is no longer being used. This allows the consumer to
+ * potentially tear down anything that was being held open related to this.
+ * Note that this only fires when the given sdev_node_t becomes a zombie.
+ *
+ * During these callbacks a consumer is not allowed to register or unregister a
+ * plugin, especially their own. They may call the sdev_ctx style functions. All
+ * callbacks fire in a context where blocking is allowed (eg. the spl is below
+ * LOCK_LEVEL).
+ *
+ * When a plugin is added, we create its directory in the global zone. By doing
+ * that, we ensure that something isn't already there and that nothing else can
+ * come along and try and create something without our knowledge. We only have
+ * to create it in the GZ and not for all other instances of sdev because an
+ * instance of sdev that isn't at /dev does not have dynamic directories, and
+ * second, any instance of sdev present in a non-global zone cannot create
+ * anything, therefore we know that by it not being in the global zone's
+ * instance of sdev that we're good to go.
+ *
+ * Lock Ordering
+ * -------------
+ *
+ * The global sdev_plugin_lock must be held before any of the individual
+ * sdev_plugin_t`sp_lock. Further, once any plugin related lock has been held,
+ * it is not legal to take any holds on any sdev_node_t or to grab the
+ * sdev_node_t`contents_lock in any way.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/fs/sdev_impl.h>
+#include <sys/fs/sdev_plugin.h>
+#include <fs/fs_subr.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/ksynch.h>
+#include <sys/sysmacros.h>
+#include <sys/list.h>
+#include <sys/ctype.h>
+
+kmutex_t sdev_plugin_lock;
+list_t sdev_plugin_list;
+kmem_cache_t *sdev_plugin_cache;
+struct vnodeops *sdev_plugin_vnops;
+
+#define SDEV_PLUGIN_NAMELEN 64
+
+typedef struct sdev_plugin {
+ list_node_t sp_link;
+ char sp_name[SDEV_PLUGIN_NAMELEN]; /* E */
+ int sp_nflags; /* E */
+ struct vnodeops *sp_vnops; /* E */
+ sdev_plugin_ops_t *sp_pops; /* E */
+ boolean_t sp_islegacy; /* E */
+ int (*sp_lvtor)(sdev_node_t *); /* E */
+ kmutex_t sp_lock; /* Protects everything below */
+ kcondvar_t sp_nodecv;
+ size_t sp_nnodes;
+} sdev_plugin_t;
+
+/* ARGSUSED */
+static int
+sdev_plugin_cache_constructor(void *buf, void *arg, int tags)
+{
+ sdev_plugin_t *spp = buf;
+ mutex_init(&spp->sp_lock, NULL, MUTEX_DRIVER, 0);
+ cv_init(&spp->sp_nodecv, NULL, CV_DRIVER, NULL);
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+sdev_plugin_cache_destructor(void *buf, void *arg)
+{
+ sdev_plugin_t *spp = buf;
+ cv_destroy(&spp->sp_nodecv);
+ mutex_destroy(&spp->sp_lock);
+}
+
+enum vtype
+sdev_ctx_vtype(sdev_ctx_t ctx)
+{
+ sdev_node_t *sdp = (sdev_node_t *)ctx;
+
+ ASSERT(RW_LOCK_HELD(&sdp->sdev_contents));
+ return (sdp->sdev_vnode->v_type);
+}
+
+const char *
+sdev_ctx_path(sdev_ctx_t ctx)
+{
+ sdev_node_t *sdp = (sdev_node_t *)ctx;
+
+ ASSERT(RW_LOCK_HELD(&sdp->sdev_contents));
+ return (sdp->sdev_path);
+}
+
+const char *
+sdev_ctx_name(sdev_ctx_t ctx)
+{
+ sdev_node_t *sdp = (sdev_node_t *)ctx;
+
+ ASSERT(RW_LOCK_HELD(&sdp->sdev_contents));
+ return (sdp->sdev_name);
+}
+
+/*
+ * Currently we only support psasing through a single flag -- SDEV_IS_GLOBAL.
+ */
+sdev_ctx_flags_t
+sdev_ctx_flags(sdev_ctx_t ctx)
+{
+ sdev_node_t *sdp = (sdev_node_t *)ctx;
+
+ ASSERT(RW_LOCK_HELD(&sdp->sdev_contents));
+ return (sdp->sdev_flags & SDEV_GLOBAL);
+}
+
+/*
+ * Return some amount of private data specific to the vtype. In the case of a
+ * character or block device this is the device number.
+ */
+const void *
+sdev_ctx_vtype_data(sdev_ctx_t ctx)
+{
+ sdev_node_t *sdp = (sdev_node_t *)ctx;
+ void *ret;
+
+ ASSERT(RW_LOCK_HELD(&sdp->sdev_contents));
+ switch (sdp->sdev_vnode->v_type) {
+ case VCHR:
+ case VBLK:
+ ret = (void *)(uintptr_t)(sdp->sdev_vnode->v_rdev);
+ break;
+ default:
+ ret = NULL;
+ break;
+ }
+
+ return (ret);
+}
+
+/*
+ * Use the same rules as zones for a name. isalphanum + '-', '_', and '.'.
+ */
+static int
+sdev_plugin_name_isvalid(const char *c, int buflen)
+{
+ int i;
+
+ for (i = 0; i < buflen; i++, c++) {
+ if (*c == '\0')
+ return (1);
+
+ if (!isalnum(*c) && *c != '-' && *c != '_' && *c != '.')
+ return (0);
+ }
+ /* Never found a null terminator */
+ return (0);
+}
+
+static int
+sdev_plugin_mknode(sdev_plugin_t *spp, sdev_node_t *sdvp, char *name,
+ vattr_t *vap)
+{
+ int ret;
+ sdev_node_t *svp;
+
+ ASSERT(RW_WRITE_HELD(&sdvp->sdev_contents));
+ ASSERT(spp != NULL);
+ svp = sdev_cache_lookup(sdvp, name);
+ if (svp != NULL) {
+ SDEV_SIMPLE_RELE(svp);
+ return (EEXIST);
+ }
+
+ ret = sdev_mknode(sdvp, name, &svp, vap, NULL, NULL, kcred,
+ SDEV_READY);
+ if (ret != 0)
+ return (ret);
+ SDEV_SIMPLE_RELE(svp);
+
+ return (0);
+}
+
+/*
+ * Plugin node creation callbacks
+ */
+int
+sdev_plugin_mkdir(sdev_ctx_t ctx, char *name)
+{
+ sdev_node_t *sdvp;
+ timestruc_t now;
+ struct vattr vap;
+
+ if (sdev_plugin_name_isvalid(name, SDEV_PLUGIN_NAMELEN) == 0)
+ return (EINVAL);
+
+ sdvp = (sdev_node_t *)ctx;
+ ASSERT(sdvp->sdev_private != NULL);
+ ASSERT(RW_WRITE_HELD(&sdvp->sdev_contents));
+
+ vap = *sdev_getdefault_attr(VDIR);
+ gethrestime(&now);
+ vap.va_atime = now;
+ vap.va_mtime = now;
+ vap.va_ctime = now;
+
+ return (sdev_plugin_mknode(sdvp->sdev_private, sdvp, name, &vap));
+}
+
+int
+sdev_plugin_mknod(sdev_ctx_t ctx, char *name, mode_t mode, dev_t dev)
+{
+ sdev_node_t *sdvp;
+ timestruc_t now;
+ struct vattr vap;
+
+ if (sdev_plugin_name_isvalid(name, SDEV_PLUGIN_NAMELEN) == 0)
+ return (EINVAL);
+
+ sdvp = (sdev_node_t *)ctx;
+ ASSERT(RW_WRITE_HELD(&sdvp->sdev_contents));
+ if (mode != S_IFCHR && mode != S_IFBLK)
+ return (EINVAL);
+
+ ASSERT(sdvp->sdev_private != NULL);
+
+ vap = *sdev_getdefault_attr(mode == S_IFCHR ? VCHR : VBLK);
+ gethrestime(&now);
+ vap.va_atime = now;
+ vap.va_mtime = now;
+ vap.va_ctime = now;
+ vap.va_rdev = dev;
+ vap.va_mode = mode | 0666;
+
+ /* Despite the similar name, this is in fact a different function */
+ return (sdev_plugin_mknode(sdvp->sdev_private, sdvp, name, &vap));
+
+}
+
+static int
+sdev_plugin_validate(sdev_node_t *sdp)
+{
+ int ret;
+ sdev_plugin_t *spp;
+
+ ASSERT(sdp->sdev_private != NULL);
+ spp = sdp->sdev_private;
+ ASSERT(spp->sp_islegacy == B_FALSE);
+ ASSERT(spp->sp_pops != NULL);
+ rw_enter(&sdp->sdev_contents, RW_READER);
+ ret = spp->sp_pops->spo_validate((uintptr_t)sdp);
+ rw_exit(&sdp->sdev_contents);
+ return (ret);
+}
+
+static void
+sdev_plugin_validate_dir(sdev_node_t *sdvp)
+{
+ int ret;
+ sdev_node_t *svp, *next;
+
+ ASSERT(RW_WRITE_HELD(&sdvp->sdev_contents));
+
+ for (svp = SDEV_FIRST_ENTRY(sdvp); svp != NULL; svp = next) {
+
+ next = SDEV_NEXT_ENTRY(sdvp, svp);
+ ASSERT(svp->sdev_state != SDEV_ZOMBIE);
+ /* skip nodes that aren't ready */
+ if (svp->sdev_state == SDEV_INIT)
+ continue;
+
+ switch (sdev_plugin_validate(svp)) {
+ case SDEV_VTOR_VALID:
+ case SDEV_VTOR_SKIP:
+ continue;
+ case SDEV_VTOR_INVALID:
+ case SDEV_VTOR_STALE:
+ break;
+ }
+
+ SDEV_HOLD(svp);
+
+ /*
+ * Clean out everything underneath this node before we
+ * remove it.
+ */
+ if (svp->sdev_vnode->v_type == VDIR) {
+ ret = sdev_cleandir(svp, NULL, 0);
+ ASSERT(ret == 0);
+ }
+ /* remove the cache node */
+ (void) sdev_cache_update(sdvp, &svp, svp->sdev_name,
+ SDEV_CACHE_DELETE);
+ SDEV_RELE(svp);
+ }
+}
+
+/* ARGSUSED */
+static int
+sdev_plugin_vop_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred,
+ int *eofp, caller_context_t *ct_unused, int flags_unused)
+{
+ int ret;
+ sdev_node_t *sdvp = VTOSDEV(dvp);
+ sdev_plugin_t *spp;
+
+ ASSERT(RW_READ_HELD(&sdvp->sdev_contents));
+
+ /* Sanity check we're not a zombie before we do anyting else */
+ if (sdvp->sdev_state == SDEV_ZOMBIE)
+ return (ENOENT);
+
+ spp = sdvp->sdev_private;
+ ASSERT(spp != NULL);
+ ASSERT(spp->sp_islegacy == B_FALSE);
+ ASSERT(spp->sp_pops != NULL);
+
+ if (crgetzoneid(cred) == GLOBAL_ZONEID && !SDEV_IS_GLOBAL(sdvp))
+ return (EPERM);
+
+ if (uiop->uio_offset == 0) {
+ /*
+ * We upgrade to a write lock and grab the plugin's lock along
+ * the way. We're almost certainly going to get creation
+ * callbacks, so this is the only safe way to go.
+ */
+ if (rw_tryupgrade(&sdvp->sdev_contents) == 0) {
+ rw_exit(&sdvp->sdev_contents);
+ rw_enter(&sdvp->sdev_contents, RW_WRITER);
+ if (sdvp->sdev_state == SDEV_ZOMBIE) {
+ rw_downgrade(&sdvp->sdev_contents);
+ return (ENOENT);
+ }
+ }
+
+ sdev_plugin_validate_dir(sdvp);
+ ret = spp->sp_pops->spo_filldir((uintptr_t)sdvp);
+ rw_downgrade(&sdvp->sdev_contents);
+ if (ret != 0)
+ return (ret);
+ }
+
+ return (devname_readdir_func(dvp, uiop, cred, eofp, 0));
+}
+
+/*
+ * If we don't have a callback function that returns a failure, then sdev will
+ * try to create a node for us which violates all of our basic assertions. To
+ * work around that we create our own callback for devname_lookup_func which
+ * always returns ENOENT as at this point either it was created with the filldir
+ * callback or it was not.
+ */
+/*ARGSUSED*/
+static int
+sdev_plugin_vop_lookup_cb(sdev_node_t *ddv, char *nm, void **arg, cred_t *cred,
+ void *unused, char *unused2)
+{
+ return (ENOENT);
+}
+
+/* ARGSUSED */
+static int
+sdev_plugin_vop_lookup(struct vnode *dvp, char *nm, struct vnode **vpp,
+ struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred,
+ caller_context_t *ct, int *direntflags, pathname_t *realpnp)
+{
+ int ret;
+ sdev_node_t *sdvp;
+ sdev_plugin_t *spp;
+
+ /* execute access is required to search the directory */
+ if ((ret = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0)
+ return (ret);
+
+ sdvp = VTOSDEV(dvp);
+ spp = sdvp->sdev_private;
+ ASSERT(spp != NULL);
+ ASSERT(spp->sp_islegacy == B_FALSE);
+ ASSERT(spp->sp_pops != NULL);
+
+ if (crgetzoneid(cred) == GLOBAL_ZONEID && !SDEV_IS_GLOBAL(sdvp))
+ return (EPERM);
+
+ /*
+ * Go straight for the write lock.
+ */
+ rw_enter(&sdvp->sdev_contents, RW_WRITER);
+ if (sdvp->sdev_state == SDEV_ZOMBIE) {
+ rw_exit(&sdvp->sdev_contents);
+ return (ENOENT);
+ }
+ sdev_plugin_validate_dir(sdvp);
+ ret = spp->sp_pops->spo_filldir((uintptr_t)sdvp);
+ rw_exit(&sdvp->sdev_contents);
+ if (ret != 0)
+ return (ret);
+
+ return (devname_lookup_func(sdvp, nm, vpp, cred,
+ sdev_plugin_vop_lookup_cb, SDEV_VATTR));
+}
+
+/*
+ * sdev is not a good citizen. We get inactive callbacks whenever a vnode goes
+ * to zero, but isn't necessairily a zombie yet. As such, to make things easier
+ * for users, we only fire the inactive callback when the node becomes a zombie
+ * and thus will be torn down here.
+ */
+static void
+sdev_plugin_vop_inactive_cb(struct vnode *dvp)
+{
+ sdev_node_t *sdp = VTOSDEV(dvp);
+ sdev_plugin_t *spp = sdp->sdev_private;
+
+ rw_enter(&sdp->sdev_contents, RW_READER);
+ if (sdp->sdev_state != SDEV_ZOMBIE) {
+ rw_exit(&sdp->sdev_contents);
+ return;
+ }
+ spp->sp_pops->spo_inactive((uintptr_t)sdp);
+ mutex_enter(&spp->sp_lock);
+ VERIFY(spp->sp_nnodes > 0);
+ spp->sp_nnodes--;
+ cv_signal(&spp->sp_nodecv);
+ mutex_exit(&spp->sp_lock);
+ rw_exit(&sdp->sdev_contents);
+}
+
+/*ARGSUSED*/
+static void
+sdev_plugin_vop_inactive(struct vnode *dvp, struct cred *cred,
+ caller_context_t *ct)
+{
+ sdev_node_t *sdp = VTOSDEV(dvp);
+ sdev_plugin_t *spp = sdp->sdev_private;
+ ASSERT(sdp->sdev_private != NULL);
+ ASSERT(spp->sp_islegacy == B_FALSE);
+ devname_inactive_func(dvp, cred, sdev_plugin_vop_inactive_cb);
+}
+
+const fs_operation_def_t sdev_plugin_vnodeops_tbl[] = {
+ VOPNAME_READDIR, { .vop_readdir = sdev_plugin_vop_readdir },
+ VOPNAME_LOOKUP, { .vop_lookup = sdev_plugin_vop_lookup },
+ VOPNAME_INACTIVE, { .vop_inactive = sdev_plugin_vop_inactive },
+ VOPNAME_CREATE, { .error = fs_nosys },
+ VOPNAME_REMOVE, { .error = fs_nosys },
+ VOPNAME_MKDIR, { .error = fs_nosys },
+ VOPNAME_RMDIR, { .error = fs_nosys },
+ VOPNAME_SYMLINK, { .error = fs_nosys },
+ VOPNAME_SETSECATTR, { .error = fs_nosys },
+ NULL, NULL
+};
+
+/*
+ * construct a new template with overrides from vtab
+ */
+static fs_operation_def_t *
+sdev_merge_vtab(const fs_operation_def_t tab[])
+{
+ fs_operation_def_t *new;
+ const fs_operation_def_t *tab_entry;
+
+ /* make a copy of standard vnode ops table */
+ new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP);
+ bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size);
+
+ /* replace the overrides from tab */
+ for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) {
+ fs_operation_def_t *std_entry = new;
+ while (std_entry->name) {
+ if (strcmp(tab_entry->name, std_entry->name) == 0) {
+ std_entry->func = tab_entry->func;
+ break;
+ }
+ std_entry++;
+ }
+ }
+
+ return (new);
+}
+
+/* free memory allocated by sdev_merge_vtab */
+static void
+sdev_free_vtab(fs_operation_def_t *new)
+{
+ kmem_free(new, sdev_vnodeops_tbl_size);
+}
+
+/*
+ * Register a new plugin.
+ */
+sdev_plugin_hdl_t
+sdev_plugin_register(const char *name, sdev_plugin_ops_t *ops, int *errp)
+{
+ int ret, err;
+ sdev_plugin_t *spp, *iter;
+ vnode_t *vp, *nvp;
+ sdev_node_t *sdp, *slp;
+ timestruc_t now;
+ struct vattr vap;
+
+ /*
+ * Some consumers don't care about why they failed. To keep the code
+ * simple, we'll just pretend they gave us something.
+ */
+ if (errp == NULL)
+ errp = &err;
+
+ if (sdev_plugin_name_isvalid(name, SDEV_PLUGIN_NAMELEN) == 0) {
+ *errp = EINVAL;
+ return (NULL);
+ }
+
+ if (ops->spo_version != 1) {
+ *errp = EINVAL;
+ return (NULL);
+ }
+
+ if (ops->spo_validate == NULL || ops->spo_filldir == NULL ||
+ ops->spo_inactive == NULL) {
+ *errp = EINVAL;
+ return (NULL);
+ }
+
+ if ((ops->spo_flags & ~SDEV_PLUGIN_FLAGS_MASK) != 0) {
+ *errp = EINVAL;
+ return (NULL);
+ }
+
+ spp = kmem_cache_alloc(sdev_plugin_cache, KM_SLEEP);
+ (void) strlcpy(spp->sp_name, name, SDEV_PLUGIN_NAMELEN);
+
+ spp->sp_pops = ops;
+ spp->sp_nflags = SDEV_DYNAMIC | SDEV_VTOR;
+ if (ops->spo_flags & SDEV_PLUGIN_NO_NCACHE)
+ spp->sp_nflags |= SDEV_NO_NCACHE;
+ if (ops->spo_flags & SDEV_PLUGIN_SUBDIR)
+ spp->sp_nflags |= SDEV_SUBDIR;
+ spp->sp_vnops = sdev_plugin_vnops;
+ spp->sp_islegacy = B_FALSE;
+ spp->sp_lvtor = NULL;
+ spp->sp_nnodes = 0;
+
+ /*
+ * Make sure it's unique, nothing exists with this name already, and add
+ * it to the list. We also need to go through and grab the sdev
+ * root node as we cannot grab any sdev node locks once we've grabbed
+ * the sdev_plugin_lock. We effectively assert that if a directory is
+ * not present in the GZ's /dev, then it doesn't exist in any of the
+ * local zones.
+ */
+ ret = vn_openat("/dev", UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir, -1);
+ if (ret != 0) {
+ *errp = ret;
+ kmem_cache_free(sdev_plugin_cache, spp);
+ return (NULL);
+ }
+ /* Make sure we have the real vnode */
+ if (VOP_REALVP(vp, &nvp, NULL) == 0) {
+ VN_HOLD(nvp);
+ VN_RELE(vp);
+ vp = nvp;
+ nvp = NULL;
+ }
+ VERIFY(vp->v_op == sdev_vnodeops);
+ sdp = VTOSDEV(vp);
+ rw_enter(&sdp->sdev_contents, RW_WRITER);
+ slp = sdev_cache_lookup(sdp, spp->sp_name);
+ if (slp != NULL) {
+ SDEV_RELE(slp);
+ rw_exit(&sdp->sdev_contents);
+ VN_RELE(vp);
+ *errp = EEXIST;
+ kmem_cache_free(sdev_plugin_cache, spp);
+ return (NULL);
+ }
+
+ mutex_enter(&sdev_plugin_lock);
+ for (iter = list_head(&sdev_plugin_list); iter != NULL;
+ iter = list_next(&sdev_plugin_list, iter)) {
+ if (strcmp(spp->sp_name, iter->sp_name) == 0) {
+ mutex_exit(&sdev_plugin_lock);
+ rw_exit(&sdp->sdev_contents);
+ VN_RELE(vp);
+ *errp = EEXIST;
+ kmem_cache_free(sdev_plugin_cache, spp);
+ return (NULL);
+ }
+ }
+
+ list_insert_tail(&sdev_plugin_list, spp);
+ mutex_exit(&sdev_plugin_lock);
+
+ /*
+ * Now go ahead and create the top level directory for the global zone.
+ */
+ vap = *sdev_getdefault_attr(VDIR);
+ gethrestime(&now);
+ vap.va_atime = now;
+ vap.va_mtime = now;
+ vap.va_ctime = now;
+
+ (void) sdev_plugin_mknode(spp, sdp, spp->sp_name, &vap);
+
+ rw_exit(&sdp->sdev_contents);
+ VN_RELE(vp);
+
+ return ((sdev_plugin_hdl_t)spp);
+}
+
+static void
+sdev_plugin_unregister_cb(sdev_node_t *rdp, void *arg)
+{
+ sdev_plugin_t *spp = arg;
+ sdev_node_t *sdp;
+
+ rw_enter(&rdp->sdev_contents, RW_WRITER);
+ sdp = sdev_cache_lookup(rdp, spp->sp_name);
+ /* If it doesn't exist, we're done here */
+ if (sdp == NULL) {
+ rw_exit(&rdp->sdev_contents);
+ return;
+ }
+
+ /*
+ * We first delete the directory before recursively marking everything
+ * else stale. This ordering should ensure that we don't accidentally
+ * miss anything.
+ */
+ sdev_cache_update(rdp, &sdp, spp->sp_name, SDEV_CACHE_DELETE);
+ sdev_stale(sdp);
+ SDEV_RELE(sdp);
+ rw_exit(&rdp->sdev_contents);
+}
+
+/*
+ * Remove a plugin. This will block until everything has become a zombie, thus
+ * guaranteeing the caller that nothing will call into them again once this call
+ * returns. While the call is ongoing, it could be called into. Note that while
+ * this is ongoing, it will block other mounts.
+ */
+int
+sdev_plugin_unregister(sdev_plugin_hdl_t hdl)
+{
+ sdev_plugin_t *spp = (sdev_plugin_t *)hdl;
+ if (spp->sp_islegacy)
+ return (EINVAL);
+
+ mutex_enter(&sdev_plugin_lock);
+ list_remove(&sdev_plugin_list, spp);
+ mutex_exit(&sdev_plugin_lock);
+
+ sdev_mnt_walk(sdev_plugin_unregister_cb, spp);
+ mutex_enter(&spp->sp_lock);
+ while (spp->sp_nnodes > 0)
+ cv_wait(&spp->sp_nodecv, &spp->sp_lock);
+ mutex_exit(&spp->sp_lock);
+ kmem_cache_free(sdev_plugin_cache, spp);
+ return (0);
+}
+
+/*
+ * Register an old sdev style plugin to deal with what used to be in the vtab.
+ */
+static int
+sdev_plugin_register_legacy(struct sdev_vop_table *vtp)
+{
+ sdev_plugin_t *spp;
+
+ spp = kmem_cache_alloc(sdev_plugin_cache, KM_SLEEP);
+ (void) strlcpy(spp->sp_name, vtp->vt_name, SDEV_PLUGIN_NAMELEN);
+ spp->sp_islegacy = B_TRUE;
+ spp->sp_pops = NULL;
+ spp->sp_nflags = vtp->vt_flags;
+ spp->sp_lvtor = vtp->vt_vtor;
+ spp->sp_nnodes = 0;
+
+ if (vtp->vt_service != NULL) {
+ fs_operation_def_t *templ;
+ templ = sdev_merge_vtab(vtp->vt_service);
+ if (vn_make_ops(vtp->vt_name,
+ (const fs_operation_def_t *)templ,
+ &spp->sp_vnops) != 0) {
+ cmn_err(CE_WARN, "%s: malformed vnode ops\n",
+ vtp->vt_name);
+ sdev_free_vtab(templ);
+ kmem_cache_free(sdev_plugin_cache, spp);
+ return (1);
+ }
+
+ if (vtp->vt_global_vops) {
+ *(vtp->vt_global_vops) = spp->sp_vnops;
+ }
+
+ sdev_free_vtab(templ);
+ } else {
+ spp->sp_vnops = sdev_vnodeops;
+ }
+
+ /*
+ * No need to check for EEXIST here. These are loaded as a part of the
+ * sdev's initialization function. Further, we don't have to create them
+ * as that's taken care of in sdev's mount for the GZ.
+ */
+ mutex_enter(&sdev_plugin_lock);
+ list_insert_tail(&sdev_plugin_list, spp);
+ mutex_exit(&sdev_plugin_lock);
+
+ return (0);
+}
+
+/*
+ * We need to match off of the sdev_path, not the sdev_name. We are only allowed
+ * to exist directly under /dev.
+ */
+static sdev_plugin_t *
+sdev_match(sdev_node_t *dv)
+{
+ int vlen;
+ const char *path;
+ sdev_plugin_t *spp;
+
+ if (strlen(dv->sdev_path) <= 5)
+ return (NULL);
+
+ if (strncmp(dv->sdev_path, "/dev/", 5) != 0)
+ return (NULL);
+ path = dv->sdev_path + 5;
+
+ mutex_enter(&sdev_plugin_lock);
+
+ for (spp = list_head(&sdev_plugin_list); spp != NULL;
+ spp = list_next(&sdev_plugin_list, spp)) {
+ if (strcmp(spp->sp_name, path) == 0) {
+ mutex_exit(&sdev_plugin_lock);
+ return (spp);
+ }
+
+ if (spp->sp_nflags & SDEV_SUBDIR) {
+ vlen = strlen(spp->sp_name);
+ if ((strncmp(spp->sp_name, path,
+ vlen - 1) == 0) && path[vlen] == '/') {
+ mutex_exit(&sdev_plugin_lock);
+ return (spp);
+ }
+
+ }
+ }
+
+ mutex_exit(&sdev_plugin_lock);
+ return (NULL);
+}
+
+void
+sdev_set_no_negcache(sdev_node_t *dv)
+{
+ char *path;
+ sdev_plugin_t *spp;
+
+ ASSERT(dv->sdev_path);
+ path = dv->sdev_path + strlen("/dev/");
+
+ mutex_enter(&sdev_plugin_lock);
+ for (spp = list_head(&sdev_plugin_list); spp != NULL;
+ spp = list_next(&sdev_plugin_list, spp)) {
+ if (strcmp(spp->sp_name, path) == 0) {
+ if (spp->sp_nflags & SDEV_NO_NCACHE)
+ dv->sdev_flags |= SDEV_NO_NCACHE;
+ break;
+ }
+ }
+ mutex_exit(&sdev_plugin_lock);
+}
+
+struct vnodeops *
+sdev_get_vop(sdev_node_t *dv)
+{
+ char *path;
+ sdev_plugin_t *spp;
+
+ path = dv->sdev_path;
+ ASSERT(path);
+
+ /* gets the relative path to /dev/ */
+ path += 5;
+
+ if ((spp = sdev_match(dv)) != NULL) {
+ dv->sdev_flags |= spp->sp_nflags;
+ if (SDEV_IS_PERSIST(dv->sdev_dotdot) &&
+ (SDEV_IS_PERSIST(dv) || !SDEV_IS_DYNAMIC(dv)))
+ dv->sdev_flags |= SDEV_PERSIST;
+ return (spp->sp_vnops);
+ }
+
+ /* child inherits the persistence of the parent */
+ if (SDEV_IS_PERSIST(dv->sdev_dotdot))
+ dv->sdev_flags |= SDEV_PERSIST;
+ return (sdev_vnodeops);
+}
+
+void *
+sdev_get_vtor(sdev_node_t *dv)
+{
+ sdev_plugin_t *spp;
+
+ if (dv->sdev_private == NULL) {
+ spp = sdev_match(dv);
+ if (spp == NULL)
+ return (NULL);
+ } else {
+ spp = dv->sdev_private;
+ }
+
+ if (spp->sp_islegacy)
+ return ((void *)spp->sp_lvtor);
+ else
+ return ((void *)sdev_plugin_validate);
+}
+
+void
+sdev_plugin_nodeready(sdev_node_t *sdp)
+{
+ sdev_plugin_t *spp;
+
+ ASSERT(RW_WRITE_HELD(&sdp->sdev_contents));
+ ASSERT(sdp->sdev_private == NULL);
+
+ spp = sdev_match(sdp);
+ if (spp == NULL)
+ return;
+ if (spp->sp_islegacy)
+ return;
+ sdp->sdev_private = spp;
+ mutex_enter(&spp->sp_lock);
+ spp->sp_nnodes++;
+ mutex_exit(&spp->sp_lock);
+}
+
+int
+sdev_plugin_init(void)
+{
+ sdev_vop_table_t *vtp;
+ fs_operation_def_t *templ;
+
+ sdev_plugin_cache = kmem_cache_create("sdev_plugin",
+ sizeof (sdev_plugin_t), 0, sdev_plugin_cache_constructor,
+ sdev_plugin_cache_destructor, NULL, NULL, NULL, 0);
+ if (sdev_plugin_cache == NULL)
+ return (1);
+ mutex_init(&sdev_plugin_lock, NULL, MUTEX_DRIVER, NULL);
+ list_create(&sdev_plugin_list, sizeof (sdev_plugin_t),
+ offsetof(sdev_plugin_t, sp_link));
+
+ /*
+ * Register all of the legacy vnops
+ */
+ for (vtp = &vtab[0]; vtp->vt_name != NULL; vtp++)
+ if (sdev_plugin_register_legacy(vtp) != 0)
+ return (1);
+
+ templ = sdev_merge_vtab(sdev_plugin_vnodeops_tbl);
+ if (vn_make_ops("sdev_plugin",
+ (const fs_operation_def_t *)templ,
+ &sdev_plugin_vnops) != 0) {
+ sdev_free_vtab(templ);
+ return (1);
+ }
+
+ sdev_free_vtab(templ);
+ return (0);
+}
diff --git a/usr/src/uts/common/fs/dev/sdev_subr.c b/usr/src/uts/common/fs/dev/sdev_subr.c
index 9234cc4a0c..511432453f 100644
--- a/usr/src/uts/common/fs/dev/sdev_subr.c
+++ b/usr/src/uts/common/fs/dev/sdev_subr.c
@@ -150,12 +150,6 @@ vattr_t sdev_vattr_chr = {
kmem_cache_t *sdev_node_cache; /* sdev_node cache */
int devtype; /* fstype */
-/* static */
-static struct vnodeops *sdev_get_vop(struct sdev_node *);
-static void sdev_set_no_negcache(struct sdev_node *);
-static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []);
-static void sdev_free_vtab(fs_operation_def_t *);
-
static void
sdev_prof_free(struct sdev_node *dv)
{
@@ -313,6 +307,7 @@ sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
(void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm);
/* overwritten for VLNK nodes */
dv->sdev_symlink = NULL;
+ list_link_init(&dv->sdev_plist);
vp = SDEVTOV(dv);
vn_reinit(vp);
@@ -401,6 +396,7 @@ sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp,
} else {
dv->sdev_nlink = 1;
}
+ sdev_plugin_nodeready(dv);
if (!(SDEV_IS_GLOBAL(dv))) {
dv->sdev_origin = (struct sdev_node *)args;
@@ -497,37 +493,22 @@ sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp,
return (dv);
}
-/* directory dependent vop table */
-struct sdev_vop_table {
- char *vt_name; /* subdirectory name */
- const fs_operation_def_t *vt_service; /* vnodeops table */
- struct vnodeops *vt_vops; /* constructed vop */
- struct vnodeops **vt_global_vops; /* global container for vop */
- int (*vt_vtor)(struct sdev_node *); /* validate sdev_node */
- int vt_flags;
-};
-
-/*
- * A nice improvement would be to provide a plug-in mechanism
- * for this table instead of a const table.
- */
-static struct sdev_vop_table vtab[] =
-{
- { "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate,
+struct sdev_vop_table vtab[] = {
+ { "pts", devpts_vnodeops_tbl, &devpts_vnodeops, devpts_validate,
SDEV_DYNAMIC | SDEV_VTOR },
- { "vt", devvt_vnodeops_tbl, NULL, &devvt_vnodeops, devvt_validate,
+ { "vt", devvt_vnodeops_tbl, &devvt_vnodeops, devvt_validate,
SDEV_DYNAMIC | SDEV_VTOR },
- { "zvol", devzvol_vnodeops_tbl, NULL, &devzvol_vnodeops,
+ { "zvol", devzvol_vnodeops_tbl, &devzvol_vnodeops,
devzvol_validate, SDEV_ZONED | SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR },
- { "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE },
+ { "zcons", NULL, NULL, NULL, SDEV_NO_NCACHE },
- { "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate,
- SDEV_DYNAMIC | SDEV_VTOR },
+ { "net", devnet_vnodeops_tbl, &devnet_vnodeops, devnet_validate,
+ SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR },
- { "ipnet", devipnet_vnodeops_tbl, NULL, &devipnet_vnodeops,
+ { "ipnet", devipnet_vnodeops_tbl, &devipnet_vnodeops,
devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE },
/*
@@ -542,132 +523,14 @@ static struct sdev_vop_table vtab[] =
* preventing a mkdir.
*/
- { "lofi", NULL, NULL, NULL, NULL,
+ { "lofi", NULL, NULL, NULL,
SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST },
- { "rlofi", NULL, NULL, NULL, NULL,
+ { "rlofi", NULL, NULL, NULL,
SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST },
- { NULL, NULL, NULL, NULL, NULL, 0}
+ { NULL, NULL, NULL, NULL, 0}
};
-/*
- * We need to match off of the sdev_path, not the sdev_name. We are only allowed
- * to exist directly under /dev.
- */
-struct sdev_vop_table *
-sdev_match(struct sdev_node *dv)
-{
- int vlen;
- int i;
- const char *path;
-
- if (strlen(dv->sdev_path) <= 5)
- return (NULL);
-
- if (strncmp(dv->sdev_path, "/dev/", 5) != 0)
- return (NULL);
- path = dv->sdev_path + 5;
-
- for (i = 0; vtab[i].vt_name; i++) {
- if (strcmp(vtab[i].vt_name, path) == 0)
- return (&vtab[i]);
- if (vtab[i].vt_flags & SDEV_SUBDIR) {
- vlen = strlen(vtab[i].vt_name);
- if ((strncmp(vtab[i].vt_name, path,
- vlen - 1) == 0) && path[vlen] == '/')
- return (&vtab[i]);
- }
-
- }
- return (NULL);
-}
-
-/*
- * sets a directory's vnodeops if the directory is in the vtab;
- */
-static struct vnodeops *
-sdev_get_vop(struct sdev_node *dv)
-{
- struct sdev_vop_table *vtp;
- char *path;
-
- path = dv->sdev_path;
- ASSERT(path);
-
- /* gets the relative path to /dev/ */
- path += 5;
-
- /* gets the vtab entry it matches */
- if ((vtp = sdev_match(dv)) != NULL) {
- dv->sdev_flags |= vtp->vt_flags;
- if (SDEV_IS_PERSIST(dv->sdev_dotdot) &&
- (SDEV_IS_PERSIST(dv) || !SDEV_IS_DYNAMIC(dv)))
- dv->sdev_flags |= SDEV_PERSIST;
-
- if (vtp->vt_vops) {
- if (vtp->vt_global_vops)
- *(vtp->vt_global_vops) = vtp->vt_vops;
-
- return (vtp->vt_vops);
- }
-
- if (vtp->vt_service) {
- fs_operation_def_t *templ;
- templ = sdev_merge_vtab(vtp->vt_service);
- if (vn_make_ops(vtp->vt_name,
- (const fs_operation_def_t *)templ,
- &vtp->vt_vops) != 0) {
- cmn_err(CE_PANIC, "%s: malformed vnode ops\n",
- vtp->vt_name);
- /*NOTREACHED*/
- }
- if (vtp->vt_global_vops) {
- *(vtp->vt_global_vops) = vtp->vt_vops;
- }
- sdev_free_vtab(templ);
-
- return (vtp->vt_vops);
- }
-
- return (sdev_vnodeops);
- }
-
- /* child inherits the persistence of the parent */
- if (SDEV_IS_PERSIST(dv->sdev_dotdot))
- dv->sdev_flags |= SDEV_PERSIST;
-
- return (sdev_vnodeops);
-}
-
-static void
-sdev_set_no_negcache(struct sdev_node *dv)
-{
- int i;
- char *path;
-
- ASSERT(dv->sdev_path);
- path = dv->sdev_path + strlen("/dev/");
-
- for (i = 0; vtab[i].vt_name; i++) {
- if (strcmp(vtab[i].vt_name, path) == 0) {
- if (vtab[i].vt_flags & SDEV_NO_NCACHE)
- dv->sdev_flags |= SDEV_NO_NCACHE;
- break;
- }
- }
-}
-
-void *
-sdev_get_vtor(struct sdev_node *dv)
-{
- struct sdev_vop_table *vtp;
-
- vtp = sdev_match(dv);
- if (vtp)
- return ((void *)vtp->vt_vtor);
- else
- return (NULL);
-}
/*
* Build the base root inode
@@ -947,8 +810,11 @@ sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
dv->sdev_path = NULL;
}
- if (!SDEV_IS_GLOBAL(dv))
+ if (!SDEV_IS_GLOBAL(dv)) {
sdev_prof_free(dv);
+ if (dv->sdev_vnode->v_type != VLNK && dv->sdev_origin != NULL)
+ SDEV_RELE(dv->sdev_origin);
+ }
if (SDEVTOV(dv)->v_type == VDIR) {
ASSERT(SDEV_FIRST_ENTRY(dv) == NULL);
@@ -962,6 +828,7 @@ sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
(void) memset((void *)&dv->sdev_instance_data, 0,
sizeof (dv->sdev_instance_data));
vn_invalid(SDEVTOV(dv));
+ dv->sdev_private = NULL;
kmem_cache_free(sdev_node_cache, dv);
}
@@ -2944,46 +2811,6 @@ sdev_modctl_devexists(const char *path)
return (error);
}
-extern int sdev_vnodeops_tbl_size;
-
-/*
- * construct a new template with overrides from vtab
- */
-static fs_operation_def_t *
-sdev_merge_vtab(const fs_operation_def_t tab[])
-{
- fs_operation_def_t *new;
- const fs_operation_def_t *tab_entry;
-
- /* make a copy of standard vnode ops table */
- new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP);
- bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size);
-
- /* replace the overrides from tab */
- for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) {
- fs_operation_def_t *std_entry = new;
- while (std_entry->name) {
- if (strcmp(tab_entry->name, std_entry->name) == 0) {
- std_entry->func = tab_entry->func;
- break;
- }
- std_entry++;
- }
- if (std_entry->name == NULL)
- cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.",
- tab_entry->name);
- }
-
- return (new);
-}
-
-/* free memory allocated by sdev_merge_vtab */
-static void
-sdev_free_vtab(fs_operation_def_t *new)
-{
- kmem_free(new, sdev_vnodeops_tbl_size);
-}
-
/*
* a generic setattr() function
*
diff --git a/usr/src/uts/common/fs/dev/sdev_vfsops.c b/usr/src/uts/common/fs/dev/sdev_vfsops.c
index 00e981ce9c..8de16926cd 100644
--- a/usr/src/uts/common/fs/dev/sdev_vfsops.c
+++ b/usr/src/uts/common/fs/dev/sdev_vfsops.c
@@ -172,7 +172,13 @@ devinit(int fstype, char *name)
if ((devmajor = getudev()) == (major_t)-1) {
cmn_err(CE_WARN, "%s: can't get unique dev", sdev_vfssw.name);
- return (1);
+ return (ENXIO);
+ }
+
+ if (sdev_plugin_init() != 0) {
+ cmn_err(CE_WARN, "%s: failed to set init plugin subsystem",
+ sdev_vfssw.name);
+ return (EIO);
}
/* initialize negative cache */
@@ -349,6 +355,7 @@ sdev_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap,
ASSERT(sdev_origins);
dv->sdev_flags &= ~SDEV_GLOBAL;
dv->sdev_origin = sdev_origins->sdev_root;
+ SDEV_HOLD(dv->sdev_origin);
} else {
sdev_ncache_setup();
rw_enter(&dv->sdev_contents, RW_WRITER);
@@ -521,3 +528,17 @@ sdev_mntinfo_rele(struct sdev_data *mntinfo)
SDEVTOV(mntinfo->sdev_root)->v_count--;
mutex_exit(&sdev_lock);
}
+
+void
+sdev_mnt_walk(void (*func)(struct sdev_node *, void *), void *arg)
+{
+ struct sdev_data *mntinfo;
+
+ mutex_enter(&sdev_lock);
+ mntinfo = sdev_mntinfo;
+ while (mntinfo != NULL) {
+ func(mntinfo->sdev_root, arg);
+ mntinfo = mntinfo->sdev_next;
+ }
+ mutex_exit(&sdev_lock);
+}
diff --git a/usr/src/uts/common/fs/dev/sdev_vnops.c b/usr/src/uts/common/fs/dev/sdev_vnops.c
index 59a3c9f17a..6ce4b0b174 100644
--- a/usr/src/uts/common/fs/dev/sdev_vnops.c
+++ b/usr/src/uts/common/fs/dev/sdev_vnops.c
@@ -22,7 +22,7 @@
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2016, Joyent, Inc.
*/
/*
@@ -864,6 +864,9 @@ sdev_remove(struct vnode *dvp, char *nm, struct cred *cred,
}
}
+ if (error == 0)
+ i_ddi_di_cache_invalidate();
+
return (error);
}
@@ -1188,6 +1191,7 @@ sdev_symlink(struct vnode *dvp, char *lnm, struct vattr *tva,
sdev_update_timestamps(dvp, kcred, AT_MTIME|AT_ATIME);
if (SDEV_IS_GLOBAL(parent))
atomic_inc_ulong(&parent->sdev_gdir_gen);
+ i_ddi_di_cache_invalidate();
/* wake up other threads blocked on looking up this node */
mutex_enter(&self->sdev_lookup_lock);
@@ -1260,6 +1264,7 @@ sdev_mkdir(struct vnode *dvp, char *nm, struct vattr *va, struct vnode **vpp,
sdev_update_timestamps(dvp, kcred, AT_MTIME|AT_ATIME);
if (SDEV_IS_GLOBAL(parent))
atomic_inc_ulong(&parent->sdev_gdir_gen);
+ i_ddi_di_cache_invalidate();
/* wake up other threads blocked on looking up this node */
mutex_enter(&self->sdev_lookup_lock);
@@ -1375,6 +1380,9 @@ sdev_rmdir(struct vnode *dvp, char *nm, struct vnode *cdir, struct cred *cred,
}
+ if (error == 0)
+ i_ddi_di_cache_invalidate();
+
return (error);
}
diff --git a/usr/src/uts/common/fs/dev/sdev_zvolops.c b/usr/src/uts/common/fs/dev/sdev_zvolops.c
index 11384e33d3..407ad1d55b 100644
--- a/usr/src/uts/common/fs/dev/sdev_zvolops.c
+++ b/usr/src/uts/common/fs/dev/sdev_zvolops.c
@@ -459,8 +459,10 @@ devzvol_create_pool_dirs(struct vnode *dvp)
ASSERT(dvp->v_count > 0);
rc = VOP_LOOKUP(dvp, nvpair_name(elem), &vp, NULL, 0,
NULL, kcred, NULL, 0, NULL);
- /* should either work, or not be visible from a zone */
- ASSERT(rc == 0 || rc == ENOENT);
+ /*
+ * should either work or we should get an error if this should
+ * not be visible from the zone, or disallowed in the zone
+ */
if (rc == 0)
VN_RELE(vp);
pools++;
diff --git a/usr/src/uts/common/fs/dnlc.c b/usr/src/uts/common/fs/dnlc.c
index 25327d2852..c949117da6 100644
--- a/usr/src/uts/common/fs/dnlc.c
+++ b/usr/src/uts/common/fs/dnlc.c
@@ -921,50 +921,6 @@ dnlc_fs_purge1(vnodeops_t *vop)
}
/*
- * Perform a reverse lookup in the DNLC. This will find the first occurrence of
- * the vnode. If successful, it will return the vnode of the parent, and the
- * name of the entry in the given buffer. If it cannot be found, or the buffer
- * is too small, then it will return NULL. Note that this is a highly
- * inefficient function, since the DNLC is constructed solely for forward
- * lookups.
- */
-vnode_t *
-dnlc_reverse_lookup(vnode_t *vp, char *buf, size_t buflen)
-{
- nc_hash_t *nch;
- ncache_t *ncp;
- vnode_t *pvp;
-
- if (!doingcache)
- return (NULL);
-
- for (nch = nc_hash; nch < &nc_hash[nc_hashsz]; nch++) {
- mutex_enter(&nch->hash_lock);
- ncp = nch->hash_next;
- while (ncp != (ncache_t *)nch) {
- /*
- * We ignore '..' entries since it can create
- * confusion and infinite loops.
- */
- if (ncp->vp == vp && !(ncp->namlen == 2 &&
- 0 == bcmp(ncp->name, "..", 2)) &&
- ncp->namlen < buflen) {
- bcopy(ncp->name, buf, ncp->namlen);
- buf[ncp->namlen] = '\0';
- pvp = ncp->dp;
- /* VN_HOLD 2 of 2 in this file */
- VN_HOLD_CALLER(pvp);
- mutex_exit(&nch->hash_lock);
- return (pvp);
- }
- ncp = ncp->hash_next;
- }
- mutex_exit(&nch->hash_lock);
- }
-
- return (NULL);
-}
-/*
* Utility routine to search for a cache entry. Return the
* ncache entry if found, NULL otherwise.
*/
diff --git a/usr/src/uts/common/fs/fem.c b/usr/src/uts/common/fs/fem.c
index b4e28cc860..5f524def30 100644
--- a/usr/src/uts/common/fs/fem.c
+++ b/usr/src/uts/common/fs/fem.c
@@ -23,6 +23,10 @@
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
+ */
+
#include <sys/types.h>
#include <sys/atomic.h>
#include <sys/kmem.h>
@@ -33,11 +37,12 @@
#include <sys/systm.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
-
#include <sys/fem.h>
#include <sys/vfs.h>
#include <sys/vnode.h>
#include <sys/vfs_opreg.h>
+#include <sys/stack.h>
+#include <sys/archsystm.h>
#define NNODES_DEFAULT 8 /* Default number of nodes in a fem_list */
/*
@@ -291,6 +296,536 @@ _op_find(femarg_t *ap, void **fp, int offs0, int offs1)
}
#endif
+/*
+ * File event monitoring handoffs
+ *
+ * File event monitoring relies on being able to inject stack frames between
+ * vnode consumers and the underlying file systems. This becomes problematic
+ * when there exist many monitors, as kernel stack depth is finite. The model
+ * very much encodes this injected frame: the flow of control deliberately
+ * lies with the monitor, not with the monitoring system. While we could
+ * conceivably address this by allowing each subsystem to install at most
+ * one monitor per vnode (and impose on subsystems that they handle any
+ * of their own consumer multiplexing internally), this in fact exports a
+ * substantial amount of run-time complexity to deal with an uncommon case
+ * (and, it must be said, assumes a small number of consuming subsystems).
+ * To allow our abstraction to remain clean, we instead check our remaining
+ * stack in every vnext_*() call; if the amount of stack remaining is lower
+ * than a threshold (fem_stack_needed), we call thread_splitstack() to carry
+ * on the execution of the monitors and the underlying vnode operation on a
+ * split stack. Because we can only pass a single argument to our split stack
+ * function, we must marshal our arguments, the mechanics of which are somewhat
+ * ornate in terms of the code: to marshal in a type-safe manner, we define a
+ * baton that is a union of payload structures for each kind of operation,
+ * loading the per-operation payload explicitly and calling into common handoff
+ * code that itself calls thread_splitstack(). The function passed to
+ * thread_splitstack() is a per-entry point function that continues monitor
+ * processing given the specified (marshalled) arguments. While this method
+ * is a little verbose to implement, it has the advantage of being relatively
+ * robust (that is, broadly type-safe) while imposing minimal burden on each
+ * vnext_*() entry point.
+ *
+ * In terms of the implementation:
+ *
+ * - The FEM_BATON_n macros define the per-entry point baton structures
+ * - The fem_baton_payload_t contains the union of these structures
+ * - The FEM_VNEXTn_DECL macros declare the post-handoff entry point
+ * - The FEM_VNEXTn macros constitute the per-handoff entry point
+ *
+ * Note that we don't use variadic macros -- we define a variant of these
+ * macros for each of our relevant argument counts. This may seem overly
+ * explicit, but it is deliberate: the object here is to minimize the
+ * future maintenance burden by minimizing the likelihood of introduced
+ * error -- not to minimize the number of characters in this source file.
+ */
+
+#ifndef STACK_GROWTH_DOWN
+#error Downward stack growth assumed.
+#endif
+
+int fem_stack_toodeep;
+uintptr_t fem_stack_needed = 8 * 1024;
+size_t fem_handoff_stacksize = 128 * 1024;
+
+#define FEM_TOODEEP() (STACK_BIAS + (uintptr_t)getfp() - \
+ (uintptr_t)curthread->t_stkbase < fem_stack_needed)
+
+#define FEM_BATON_1(what, t0, l0) \
+ struct { \
+ void *fb_##what##_arg0; \
+ caller_context_t *fb_##what##_ct; \
+ t0 fb_##what##_##l0; \
+ } fb_##what
+
+#define FEM_BATON_2(what, t0, l0, t1, l1) \
+ struct { \
+ void *fb_##what##_arg0; \
+ caller_context_t *fb_##what##_ct; \
+ t0 fb_##what##_##l0; \
+ t1 fb_##what##_##l1; \
+ } fb_##what
+
+#define FEM_BATON_3(what, t0, l0, t1, l1, t2, l2) \
+ struct { \
+ void *fb_##what##_arg0; \
+ caller_context_t *fb_##what##_ct; \
+ t0 fb_##what##_##l0; \
+ t1 fb_##what##_##l1; \
+ t2 fb_##what##_##l2; \
+ } fb_##what
+
+#define FEM_BATON_4(what, t0, l0, t1, l1, t2, l2, t3, l3) \
+ struct { \
+ void *fb_##what##_arg0; \
+ caller_context_t *fb_##what##_ct; \
+ t0 fb_##what##_##l0; \
+ t1 fb_##what##_##l1; \
+ t2 fb_##what##_##l2; \
+ t3 fb_##what##_##l3; \
+ } fb_##what
+
+#define FEM_BATON_5(what, t0, l0, t1, l1, t2, l2, t3, l3, t4, l4) \
+ struct { \
+ void *fb_##what##_arg0; \
+ caller_context_t *fb_##what##_ct; \
+ t0 fb_##what##_##l0; \
+ t1 fb_##what##_##l1; \
+ t2 fb_##what##_##l2; \
+ t3 fb_##what##_##l3; \
+ t4 fb_##what##_##l4; \
+ } fb_##what
+
+#define FEM_BATON_6(what, t0, l0, t1, l1, t2, l2, t3, l3, t4, l4, t5, l5) \
+ struct { \
+ void *fb_##what##_arg0; \
+ caller_context_t *fb_##what##_ct; \
+ t0 fb_##what##_##l0; \
+ t1 fb_##what##_##l1; \
+ t2 fb_##what##_##l2; \
+ t3 fb_##what##_##l3; \
+ t4 fb_##what##_##l4; \
+ t5 fb_##what##_##l5; \
+ } fb_##what
+
+#define FEM_BATON_8(what, t0, l0, t1, l1, t2, l2, t3, l3, t4, l4, t5, l5, \
+ t6, l6, t7, l7) \
+ struct { \
+ void *fb_##what##_arg0; \
+ caller_context_t *fb_##what##_ct; \
+ t0 fb_##what##_##l0; \
+ t1 fb_##what##_##l1; \
+ t2 fb_##what##_##l2; \
+ t3 fb_##what##_##l3; \
+ t4 fb_##what##_##l4; \
+ t5 fb_##what##_##l5; \
+ t6 fb_##what##_##l6; \
+ t7 fb_##what##_##l7; \
+ } fb_##what
+
+#define FEM_BATON_9(what, t0, l0, t1, l1, t2, l2, t3, l3, t4, l4, t5, l5, \
+ t6, l6, t7, l7, t8, l8) \
+ struct { \
+ void *fb_##what##_arg0; \
+ caller_context_t *fb_##what##_ct; \
+ t0 fb_##what##_##l0; \
+ t1 fb_##what##_##l1; \
+ t2 fb_##what##_##l2; \
+ t3 fb_##what##_##l3; \
+ t4 fb_##what##_##l4; \
+ t5 fb_##what##_##l5; \
+ t6 fb_##what##_##l6; \
+ t7 fb_##what##_##l7; \
+ t8 fb_##what##_##l8; \
+ } fb_##what
+
+typedef union {
+ FEM_BATON_2(open, int, mode, cred_t *, cr);
+ FEM_BATON_4(close, int, flag, int, count,
+ offset_t, offset, cred_t *, cr);
+ FEM_BATON_3(read, uio_t *, uiop, int, ioflag, cred_t *, cr);
+ FEM_BATON_3(write, uio_t *, uiop, int, ioflag, cred_t *, cr);
+ FEM_BATON_5(ioctl, int, cmd, intptr_t, arg,
+ int, flag, cred_t *, cr, int *, rvalp);
+ FEM_BATON_3(setfl, int, oflags, int, nflags, cred_t *, cr);
+ FEM_BATON_3(getattr, vattr_t *, vap, int, flags, cred_t *, cr);
+ FEM_BATON_3(setattr, vattr_t *, vap, int, flags, cred_t *, cr);
+ FEM_BATON_3(access, int, mode, int, flags, cred_t *, cr);
+ FEM_BATON_8(lookup, char *, nm, vnode_t **, vpp,
+ pathname_t *, pnp, int, flags, vnode_t *, rdir,
+ cred_t *, cr, int *, direntflags, pathname_t *, realpnp);
+ FEM_BATON_8(create, char *, name, vattr_t *, vap,
+ vcexcl_t, excl, int, mode, vnode_t **, vpp,
+ cred_t *, cr, int, flag, vsecattr_t *, vsecp);
+ FEM_BATON_3(remove, char *, nm, cred_t *, cr, int, flags);
+ FEM_BATON_4(link, vnode_t *, svp, char *, tnm,
+ cred_t *, cr, int, flags);
+ FEM_BATON_5(rename, char *, snm, vnode_t *, tdvp,
+ char *, tnm, cred_t *, cr, int, flags);
+ FEM_BATON_6(mkdir, char *, dirname, vattr_t *, vap,
+ vnode_t **, vpp, cred_t *, cr, int, flags,
+ vsecattr_t *, vsecp);
+ FEM_BATON_4(rmdir, char *, nm, vnode_t *, cdir,
+ cred_t *, cr, int, flags);
+ FEM_BATON_4(readdir, uio_t *, uiop, cred_t *, cr,
+ int *, eofp, int, flags);
+ FEM_BATON_5(symlink, char *, linkname, vattr_t *, vap,
+ char *, target, cred_t *, cr, int, flags);
+ FEM_BATON_2(readlink, uio_t *, uiop, cred_t *, cr);
+ FEM_BATON_2(fsync, int, syncflag, cred_t *, cr);
+ FEM_BATON_1(inactive, cred_t *, cr);
+ FEM_BATON_1(fid, fid_t *, fidp);
+ FEM_BATON_1(rwlock, int, write_lock);
+ FEM_BATON_1(rwunlock, int, write_lock);
+ FEM_BATON_2(seek, offset_t, ooff, offset_t *, noffp);
+ FEM_BATON_1(cmp, vnode_t *, vp2);
+ FEM_BATON_6(frlock, int, cmd, struct flock64 *, bfp,
+ int, flag, offset_t, offset, struct flk_callback *, flk_cbp,
+ cred_t *, cr);
+ FEM_BATON_5(space, int, cmd, struct flock64 *, bfp,
+ int, flag, offset_t, offset, cred_t *, cr);
+ FEM_BATON_1(realvp, vnode_t **, vpp);
+ FEM_BATON_9(getpage, offset_t, off, size_t, len,
+ uint_t *, protp, struct page **, plarr, size_t, plsz,
+ struct seg *, seg, caddr_t, addr, enum seg_rw, rw,
+ cred_t *, cr);
+ FEM_BATON_4(putpage, offset_t, off, size_t, len,
+ int, flags, cred_t *, cr);
+ FEM_BATON_8(map, offset_t, off, struct as *, as,
+ caddr_t *, addrp, size_t, len, uchar_t, prot,
+ uchar_t, maxprot, uint_t, flags, cred_t *, cr);
+ FEM_BATON_8(addmap, offset_t, off, struct as *, as,
+ caddr_t, addr, size_t, len, uchar_t, prot,
+ uchar_t, maxprot, uint_t, flags, cred_t *, cr);
+ FEM_BATON_8(delmap, offset_t, off, struct as *, as,
+ caddr_t, addr, size_t, len, uint_t, prot,
+ uint_t, maxprot, uint_t, flags, cred_t *, cr);
+ FEM_BATON_4(poll, short, events, int, anyyet,
+ short *, reventsp, struct pollhead **, phpp);
+ FEM_BATON_3(dump, caddr_t, addr, offset_t, lbdn, offset_t, dblks);
+ FEM_BATON_3(pathconf, int, cmd, ulong_t *, valp, cred_t *, cr);
+ FEM_BATON_5(pageio, struct page *, pp, u_offset_t, io_off,
+ size_t, io_len, int, flags, cred_t *, cr);
+ FEM_BATON_2(dumpctl, int, action, offset_t *, blkp);
+ FEM_BATON_4(dispose, struct page *, pp, int, flag,
+ int, dn, cred_t *, cr);
+ FEM_BATON_3(setsecattr, vsecattr_t *, vsap, int, flag, cred_t *, cr);
+ FEM_BATON_3(getsecattr, vsecattr_t *, vsap, int, flag, cred_t *, cr);
+ FEM_BATON_4(shrlock, int, cmd, struct shrlock *, shr,
+ int, flag, cred_t *, cr);
+ FEM_BATON_3(vnevent, vnevent_t, vnevent, vnode_t *, dvp, char *, cname);
+ FEM_BATON_3(reqzcbuf, enum uio_rw, ioflag,
+ xuio_t *, xuiop, cred_t *, cr);
+ FEM_BATON_2(retzcbuf, xuio_t *, xuiop, cred_t *, cr);
+} fem_baton_payload_t;
+
+typedef struct {
+ fem_baton_payload_t fb_payload;
+ int (*fb_func)();
+ void (*fb_handoff)();
+ int fb_rval;
+} fem_baton_t;
+
+static int
+fem_handoff(fem_baton_t *bp)
+{
+ fem_stack_toodeep++;
+ thread_splitstack(bp->fb_handoff, bp, fem_handoff_stacksize);
+
+ return (bp->fb_rval);
+}
+
+#define FEM_VNEXT3_DECL(what, a0, a1, a2) \
+void \
+fem_handoff_##what(fem_baton_t *bp) \
+{ \
+ bp->fb_rval = bp->fb_func( \
+ bp->fb_payload.fb_##what.fb_##what##_##a0, \
+ bp->fb_payload.fb_##what.fb_##what##_##a1, \
+ bp->fb_payload.fb_##what.fb_##what##_##a2); \
+}
+
+#define FEM_VNEXT4_DECL(what, a0, a1, a2, a3) \
+void \
+fem_handoff_##what(fem_baton_t *bp) \
+{ \
+ bp->fb_rval = bp->fb_func( \
+ bp->fb_payload.fb_##what.fb_##what##_##a0, \
+ bp->fb_payload.fb_##what.fb_##what##_##a1, \
+ bp->fb_payload.fb_##what.fb_##what##_##a2, \
+ bp->fb_payload.fb_##what.fb_##what##_##a3); \
+}
+
+#define FEM_VNEXT5_DECL(what, a0, a1, a2, a3, a4) \
+void \
+fem_handoff_##what(fem_baton_t *bp) \
+{ \
+ bp->fb_rval = bp->fb_func( \
+ bp->fb_payload.fb_##what.fb_##what##_##a0, \
+ bp->fb_payload.fb_##what.fb_##what##_##a1, \
+ bp->fb_payload.fb_##what.fb_##what##_##a2, \
+ bp->fb_payload.fb_##what.fb_##what##_##a3, \
+ bp->fb_payload.fb_##what.fb_##what##_##a4); \
+}
+
+#define FEM_VNEXT6_DECL(what, a0, a1, a2, a3, a4, a5) \
+void \
+fem_handoff_##what(fem_baton_t *bp) \
+{ \
+ bp->fb_rval = bp->fb_func( \
+ bp->fb_payload.fb_##what.fb_##what##_##a0, \
+ bp->fb_payload.fb_##what.fb_##what##_##a1, \
+ bp->fb_payload.fb_##what.fb_##what##_##a2, \
+ bp->fb_payload.fb_##what.fb_##what##_##a3, \
+ bp->fb_payload.fb_##what.fb_##what##_##a4, \
+ bp->fb_payload.fb_##what.fb_##what##_##a5); \
+}
+
+#define FEM_VNEXT7_DECL(what, a0, a1, a2, a3, a4, a5, a6) \
+void \
+fem_handoff_##what(fem_baton_t *bp) \
+{ \
+ bp->fb_rval = bp->fb_func( \
+ bp->fb_payload.fb_##what.fb_##what##_##a0, \
+ bp->fb_payload.fb_##what.fb_##what##_##a1, \
+ bp->fb_payload.fb_##what.fb_##what##_##a2, \
+ bp->fb_payload.fb_##what.fb_##what##_##a3, \
+ bp->fb_payload.fb_##what.fb_##what##_##a4, \
+ bp->fb_payload.fb_##what.fb_##what##_##a5, \
+ bp->fb_payload.fb_##what.fb_##what##_##a6); \
+}
+
+#define FEM_VNEXT8_DECL(what, a0, a1, a2, a3, a4, a5, a6, a7) \
+void \
+fem_handoff_##what(fem_baton_t *bp) \
+{ \
+ bp->fb_rval = bp->fb_func( \
+ bp->fb_payload.fb_##what.fb_##what##_##a0, \
+ bp->fb_payload.fb_##what.fb_##what##_##a1, \
+ bp->fb_payload.fb_##what.fb_##what##_##a2, \
+ bp->fb_payload.fb_##what.fb_##what##_##a3, \
+ bp->fb_payload.fb_##what.fb_##what##_##a4, \
+ bp->fb_payload.fb_##what.fb_##what##_##a5, \
+ bp->fb_payload.fb_##what.fb_##what##_##a6, \
+ bp->fb_payload.fb_##what.fb_##what##_##a7); \
+}
+
+#define FEM_VNEXT10_DECL(what, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9) \
+void \
+fem_handoff_##what(fem_baton_t *bp) \
+{ \
+ bp->fb_rval = bp->fb_func( \
+ bp->fb_payload.fb_##what.fb_##what##_##a0, \
+ bp->fb_payload.fb_##what.fb_##what##_##a1, \
+ bp->fb_payload.fb_##what.fb_##what##_##a2, \
+ bp->fb_payload.fb_##what.fb_##what##_##a3, \
+ bp->fb_payload.fb_##what.fb_##what##_##a4, \
+ bp->fb_payload.fb_##what.fb_##what##_##a5, \
+ bp->fb_payload.fb_##what.fb_##what##_##a6, \
+ bp->fb_payload.fb_##what.fb_##what##_##a7, \
+ bp->fb_payload.fb_##what.fb_##what##_##a8, \
+ bp->fb_payload.fb_##what.fb_##what##_##a9); \
+}
+
+#define FEM_VNEXT11_DECL(what, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10) \
+void \
+fem_handoff_##what(fem_baton_t *bp) \
+{ \
+ bp->fb_rval = bp->fb_func( \
+ bp->fb_payload.fb_##what.fb_##what##_##a0, \
+ bp->fb_payload.fb_##what.fb_##what##_##a1, \
+ bp->fb_payload.fb_##what.fb_##what##_##a2, \
+ bp->fb_payload.fb_##what.fb_##what##_##a3, \
+ bp->fb_payload.fb_##what.fb_##what##_##a4, \
+ bp->fb_payload.fb_##what.fb_##what##_##a5, \
+ bp->fb_payload.fb_##what.fb_##what##_##a6, \
+ bp->fb_payload.fb_##what.fb_##what##_##a7, \
+ bp->fb_payload.fb_##what.fb_##what##_##a8, \
+ bp->fb_payload.fb_##what.fb_##what##_##a9, \
+ bp->fb_payload.fb_##what.fb_##what##_##a10); \
+}
+
+#define FEM_VNEXT3(what, func, a0, a1, a2) \
+ if (FEM_TOODEEP()) { \
+ fem_baton_t *baton; \
+ int rval; \
+ \
+ baton = kmem_alloc(sizeof (fem_baton_t), KM_SLEEP); \
+ baton->fb_payload.fb_##what.fb_##what##_##a0 = a0; \
+ baton->fb_payload.fb_##what.fb_##what##_##a1 = a1; \
+ baton->fb_payload.fb_##what.fb_##what##_##a2 = a2; \
+ baton->fb_handoff = fem_handoff_##what; \
+ baton->fb_func = func; \
+ \
+ rval = fem_handoff(baton); \
+ kmem_free(baton, sizeof (fem_baton_t)); \
+ \
+ return (rval); \
+ } \
+ return (func(a0, a1, a2))
+
+#define FEM_VNEXT4(what, func, a0, a1, a2, a3) \
+ if (FEM_TOODEEP()) { \
+ fem_baton_t *baton; \
+ int rval; \
+ \
+ baton = kmem_alloc(sizeof (fem_baton_t), KM_SLEEP); \
+ baton->fb_payload.fb_##what.fb_##what##_##a0 = a0; \
+ baton->fb_payload.fb_##what.fb_##what##_##a1 = a1; \
+ baton->fb_payload.fb_##what.fb_##what##_##a2 = a2; \
+ baton->fb_payload.fb_##what.fb_##what##_##a3 = a3; \
+ baton->fb_handoff = fem_handoff_##what; \
+ baton->fb_func = func; \
+ \
+ rval = fem_handoff(baton); \
+ kmem_free(baton, sizeof (fem_baton_t)); \
+ \
+ return (rval); \
+ } \
+ return (func(a0, a1, a2, a3))
+
+#define FEM_VNEXT5(what, func, a0, a1, a2, a3, a4) \
+ if (FEM_TOODEEP()) { \
+ fem_baton_t *baton; \
+ int rval; \
+ \
+ baton = kmem_alloc(sizeof (fem_baton_t), KM_SLEEP); \
+ baton->fb_payload.fb_##what.fb_##what##_##a0 = a0; \
+ baton->fb_payload.fb_##what.fb_##what##_##a1 = a1; \
+ baton->fb_payload.fb_##what.fb_##what##_##a2 = a2; \
+ baton->fb_payload.fb_##what.fb_##what##_##a3 = a3; \
+ baton->fb_payload.fb_##what.fb_##what##_##a4 = a4; \
+ baton->fb_handoff = fem_handoff_##what; \
+ baton->fb_func = func; \
+ \
+ rval = fem_handoff(baton); \
+ kmem_free(baton, sizeof (fem_baton_t)); \
+ \
+ return (rval); \
+ } \
+ return (func(a0, a1, a2, a3, a4))
+
+#define FEM_VNEXT6(what, func, a0, a1, a2, a3, a4, a5) \
+ if (FEM_TOODEEP()) { \
+ fem_baton_t *baton; \
+ int rval; \
+ \
+ baton = kmem_alloc(sizeof (fem_baton_t), KM_SLEEP); \
+ baton->fb_payload.fb_##what.fb_##what##_##a0 = a0; \
+ baton->fb_payload.fb_##what.fb_##what##_##a1 = a1; \
+ baton->fb_payload.fb_##what.fb_##what##_##a2 = a2; \
+ baton->fb_payload.fb_##what.fb_##what##_##a3 = a3; \
+ baton->fb_payload.fb_##what.fb_##what##_##a4 = a4; \
+ baton->fb_payload.fb_##what.fb_##what##_##a5 = a5; \
+ baton->fb_handoff = fem_handoff_##what; \
+ baton->fb_func = func; \
+ \
+ rval = fem_handoff(baton); \
+ kmem_free(baton, sizeof (fem_baton_t)); \
+ \
+ return (rval); \
+ } \
+ return (func(a0, a1, a2, a3, a4, a5))
+
+#define FEM_VNEXT7(what, func, a0, a1, a2, a3, a4, a5, a6) \
+ if (FEM_TOODEEP()) { \
+ fem_baton_t *baton; \
+ int rval; \
+ \
+ baton = kmem_alloc(sizeof (fem_baton_t), KM_SLEEP); \
+ baton->fb_payload.fb_##what.fb_##what##_##a0 = a0; \
+ baton->fb_payload.fb_##what.fb_##what##_##a1 = a1; \
+ baton->fb_payload.fb_##what.fb_##what##_##a2 = a2; \
+ baton->fb_payload.fb_##what.fb_##what##_##a3 = a3; \
+ baton->fb_payload.fb_##what.fb_##what##_##a4 = a4; \
+ baton->fb_payload.fb_##what.fb_##what##_##a5 = a5; \
+ baton->fb_payload.fb_##what.fb_##what##_##a6 = a6; \
+ baton->fb_handoff = fem_handoff_##what; \
+ baton->fb_func = func; \
+ \
+ rval = fem_handoff(baton); \
+ kmem_free(baton, sizeof (fem_baton_t)); \
+ \
+ return (rval); \
+ } \
+ return (func(a0, a1, a2, a3, a4, a5, a6))
+
+#define FEM_VNEXT8(what, func, a0, a1, a2, a3, a4, a5, a6, a7) \
+ if (FEM_TOODEEP()) { \
+ fem_baton_t *baton; \
+ int rval; \
+ \
+ baton = kmem_alloc(sizeof (fem_baton_t), KM_SLEEP); \
+ baton->fb_payload.fb_##what.fb_##what##_##a0 = a0; \
+ baton->fb_payload.fb_##what.fb_##what##_##a1 = a1; \
+ baton->fb_payload.fb_##what.fb_##what##_##a2 = a2; \
+ baton->fb_payload.fb_##what.fb_##what##_##a3 = a3; \
+ baton->fb_payload.fb_##what.fb_##what##_##a4 = a4; \
+ baton->fb_payload.fb_##what.fb_##what##_##a5 = a5; \
+ baton->fb_payload.fb_##what.fb_##what##_##a6 = a6; \
+ baton->fb_payload.fb_##what.fb_##what##_##a7 = a7; \
+ baton->fb_handoff = fem_handoff_##what; \
+ baton->fb_func = func; \
+ \
+ rval = fem_handoff(baton); \
+ kmem_free(baton, sizeof (fem_baton_t)); \
+ \
+ return (rval); \
+ } \
+ return (func(a0, a1, a2, a3, a4, a5, a6, a7))
+
+#define FEM_VNEXT10(what, func, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9) \
+ if (FEM_TOODEEP()) { \
+ fem_baton_t *baton; \
+ int rval; \
+ \
+ baton = kmem_alloc(sizeof (fem_baton_t), KM_SLEEP); \
+ baton->fb_payload.fb_##what.fb_##what##_##a0 = a0; \
+ baton->fb_payload.fb_##what.fb_##what##_##a1 = a1; \
+ baton->fb_payload.fb_##what.fb_##what##_##a2 = a2; \
+ baton->fb_payload.fb_##what.fb_##what##_##a3 = a3; \
+ baton->fb_payload.fb_##what.fb_##what##_##a4 = a4; \
+ baton->fb_payload.fb_##what.fb_##what##_##a5 = a5; \
+ baton->fb_payload.fb_##what.fb_##what##_##a6 = a6; \
+ baton->fb_payload.fb_##what.fb_##what##_##a7 = a7; \
+ baton->fb_payload.fb_##what.fb_##what##_##a8 = a8; \
+ baton->fb_payload.fb_##what.fb_##what##_##a9 = a9; \
+ baton->fb_handoff = fem_handoff_##what; \
+ baton->fb_func = func; \
+ \
+ rval = fem_handoff(baton); \
+ kmem_free(baton, sizeof (fem_baton_t)); \
+ \
+ return (rval); \
+ } \
+ return (func(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9))
+
+#define FEM_VNEXT11(what, func, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10) \
+ if (FEM_TOODEEP()) { \
+ fem_baton_t *baton; \
+ int rval; \
+ \
+ baton = kmem_alloc(sizeof (fem_baton_t), KM_SLEEP); \
+ baton->fb_payload.fb_##what.fb_##what##_##a0 = a0; \
+ baton->fb_payload.fb_##what.fb_##what##_##a1 = a1; \
+ baton->fb_payload.fb_##what.fb_##what##_##a2 = a2; \
+ baton->fb_payload.fb_##what.fb_##what##_##a3 = a3; \
+ baton->fb_payload.fb_##what.fb_##what##_##a4 = a4; \
+ baton->fb_payload.fb_##what.fb_##what##_##a5 = a5; \
+ baton->fb_payload.fb_##what.fb_##what##_##a6 = a6; \
+ baton->fb_payload.fb_##what.fb_##what##_##a7 = a7; \
+ baton->fb_payload.fb_##what.fb_##what##_##a8 = a8; \
+ baton->fb_payload.fb_##what.fb_##what##_##a9 = a9; \
+ baton->fb_payload.fb_##what.fb_##what##_##a10 = a10; \
+ baton->fb_handoff = fem_handoff_##what; \
+ baton->fb_func = func; \
+ \
+ rval = fem_handoff(baton); \
+ kmem_free(baton, sizeof (fem_baton_t)); \
+ \
+ return (rval); \
+ } \
+ return (func(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10))
+
static fem_t *
fem_alloc()
{
@@ -2036,10 +2571,60 @@ static struct fs_operation_def fshead_vfs_spec[] = {
* 5. Return by invoking the base operation with the base object.
*
* for each classification, there needs to be at least one "next" operation
- * for each "head"operation.
- *
+ * for each "head" operation. Note that we also use the FEM_VNEXTn_DECL macros
+ * to define the function to run when the stack is split; see the discussion
+ * on "File event monitoring handoffs", above.
*/
+FEM_VNEXT4_DECL(open, arg0, mode, cr, ct)
+FEM_VNEXT6_DECL(close, arg0, flag, count, offset, cr, ct)
+FEM_VNEXT5_DECL(read, arg0, uiop, ioflag, cr, ct)
+FEM_VNEXT5_DECL(write, arg0, uiop, ioflag, cr, ct)
+FEM_VNEXT7_DECL(ioctl, arg0, cmd, arg, flag, cr, rvalp, ct)
+FEM_VNEXT5_DECL(setfl, arg0, oflags, nflags, cr, ct)
+FEM_VNEXT5_DECL(getattr, arg0, vap, flags, cr, ct)
+FEM_VNEXT5_DECL(setattr, arg0, vap, flags, cr, ct)
+FEM_VNEXT5_DECL(access, arg0, mode, flags, cr, ct)
+FEM_VNEXT10_DECL(lookup, arg0, nm, vpp, pnp, flags, rdir,
+ cr, ct, direntflags, realpnp)
+FEM_VNEXT10_DECL(create, arg0, name, vap, excl, mode, vpp, cr, flag, ct, vsecp)
+FEM_VNEXT5_DECL(remove, arg0, nm, cr, ct, flags)
+FEM_VNEXT6_DECL(link, arg0, svp, tnm, cr, ct, flags)
+FEM_VNEXT7_DECL(rename, arg0, snm, tdvp, tnm, cr, ct, flags)
+FEM_VNEXT8_DECL(mkdir, arg0, dirname, vap, vpp, cr, ct, flags, vsecp)
+FEM_VNEXT6_DECL(rmdir, arg0, nm, cdir, cr, ct, flags)
+FEM_VNEXT6_DECL(readdir, arg0, uiop, cr, eofp, ct, flags)
+FEM_VNEXT7_DECL(symlink, arg0, linkname, vap, target, cr, ct, flags)
+FEM_VNEXT4_DECL(readlink, arg0, uiop, cr, ct)
+FEM_VNEXT4_DECL(fsync, arg0, syncflag, cr, ct)
+FEM_VNEXT3_DECL(fid, arg0, fidp, ct)
+FEM_VNEXT3_DECL(rwlock, arg0, write_lock, ct)
+FEM_VNEXT4_DECL(seek, arg0, ooff, noffp, ct)
+FEM_VNEXT3_DECL(cmp, arg0, vp2, ct)
+FEM_VNEXT8_DECL(frlock, arg0, cmd, bfp, flag, offset, flk_cbp, cr, ct)
+FEM_VNEXT7_DECL(space, arg0, cmd, bfp, flag, offset, cr, ct)
+FEM_VNEXT3_DECL(realvp, arg0, vpp, ct)
+FEM_VNEXT11_DECL(getpage, arg0, off, len, protp, plarr, plsz,
+ seg, addr, rw, cr, ct)
+FEM_VNEXT6_DECL(putpage, arg0, off, len, flags, cr, ct)
+FEM_VNEXT10_DECL(map, arg0, off, as, addrp, len, prot, maxprot,
+ flags, cr, ct)
+FEM_VNEXT10_DECL(addmap, arg0, off, as, addr, len, prot, maxprot,
+ flags, cr, ct)
+FEM_VNEXT10_DECL(delmap, arg0, off, as, addr, len, prot, maxprot,
+ flags, cr, ct)
+FEM_VNEXT6_DECL(poll, arg0, events, anyyet, reventsp, phpp, ct)
+FEM_VNEXT5_DECL(dump, arg0, addr, lbdn, dblks, ct)
+FEM_VNEXT5_DECL(pathconf, arg0, cmd, valp, cr, ct)
+FEM_VNEXT7_DECL(pageio, arg0, pp, io_off, io_len, flags, cr, ct)
+FEM_VNEXT4_DECL(dumpctl, arg0, action, blkp, ct)
+FEM_VNEXT5_DECL(setsecattr, arg0, vsap, flag, cr, ct)
+FEM_VNEXT5_DECL(getsecattr, arg0, vsap, flag, cr, ct)
+FEM_VNEXT6_DECL(shrlock, arg0, cmd, shr, flag, cr, ct)
+FEM_VNEXT5_DECL(vnevent, arg0, vnevent, dvp, cname, ct)
+FEM_VNEXT5_DECL(reqzcbuf, arg0, ioflag, xuiop, cr, ct)
+FEM_VNEXT4_DECL(retzcbuf, arg0, xuiop, cr, ct)
+
int
vnext_open(femarg_t *vf, int mode, cred_t *cr, caller_context_t *ct)
{
@@ -2051,7 +2636,7 @@ vnext_open(femarg_t *vf, int mode, cred_t *cr, caller_context_t *ct)
vsop_find(vf, &func, int, &arg0, vop_open, femop_open);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, mode, cr, ct));
+ FEM_VNEXT4(open, func, arg0, mode, cr, ct);
}
int
@@ -2066,7 +2651,7 @@ vnext_close(femarg_t *vf, int flag, int count, offset_t offset, cred_t *cr,
vsop_find(vf, &func, int, &arg0, vop_close, femop_close);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, flag, count, offset, cr, ct));
+ FEM_VNEXT6(close, func, arg0, flag, count, offset, cr, ct);
}
int
@@ -2081,7 +2666,7 @@ vnext_read(femarg_t *vf, uio_t *uiop, int ioflag, cred_t *cr,
vsop_find(vf, &func, int, &arg0, vop_read, femop_read);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, uiop, ioflag, cr, ct));
+ FEM_VNEXT5(read, func, arg0, uiop, ioflag, cr, ct);
}
int
@@ -2096,7 +2681,7 @@ vnext_write(femarg_t *vf, uio_t *uiop, int ioflag, cred_t *cr,
vsop_find(vf, &func, int, &arg0, vop_write, femop_write);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, uiop, ioflag, cr, ct));
+ FEM_VNEXT5(write, func, arg0, uiop, ioflag, cr, ct);
}
int
@@ -2111,7 +2696,7 @@ vnext_ioctl(femarg_t *vf, int cmd, intptr_t arg, int flag, cred_t *cr,
vsop_find(vf, &func, int, &arg0, vop_ioctl, femop_ioctl);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, cmd, arg, flag, cr, rvalp, ct));
+ FEM_VNEXT7(ioctl, func, arg0, cmd, arg, flag, cr, rvalp, ct);
}
int
@@ -2126,7 +2711,7 @@ vnext_setfl(femarg_t *vf, int oflags, int nflags, cred_t *cr,
vsop_find(vf, &func, int, &arg0, vop_setfl, femop_setfl);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, oflags, nflags, cr, ct));
+ FEM_VNEXT5(setfl, func, arg0, oflags, nflags, cr, ct);
}
int
@@ -2141,7 +2726,7 @@ vnext_getattr(femarg_t *vf, vattr_t *vap, int flags, cred_t *cr,
vsop_find(vf, &func, int, &arg0, vop_getattr, femop_getattr);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, vap, flags, cr, ct));
+ FEM_VNEXT5(getattr, func, arg0, vap, flags, cr, ct);
}
int
@@ -2156,7 +2741,7 @@ vnext_setattr(femarg_t *vf, vattr_t *vap, int flags, cred_t *cr,
vsop_find(vf, &func, int, &arg0, vop_setattr, femop_setattr);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, vap, flags, cr, ct));
+ FEM_VNEXT5(setattr, func, arg0, vap, flags, cr, ct);
}
int
@@ -2171,7 +2756,7 @@ vnext_access(femarg_t *vf, int mode, int flags, cred_t *cr,
vsop_find(vf, &func, int, &arg0, vop_access, femop_access);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, mode, flags, cr, ct));
+ FEM_VNEXT5(access, func, arg0, mode, flags, cr, ct);
}
int
@@ -2187,8 +2772,8 @@ vnext_lookup(femarg_t *vf, char *nm, vnode_t **vpp, pathname_t *pnp,
vsop_find(vf, &func, int, &arg0, vop_lookup, femop_lookup);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, nm, vpp, pnp, flags, rdir, cr, ct,
- direntflags, realpnp));
+ FEM_VNEXT10(lookup, func, arg0, nm, vpp, pnp, flags, rdir, cr, ct,
+ direntflags, realpnp);
}
int
@@ -2204,7 +2789,8 @@ vnext_create(femarg_t *vf, char *name, vattr_t *vap, vcexcl_t excl,
vsop_find(vf, &func, int, &arg0, vop_create, femop_create);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, name, vap, excl, mode, vpp, cr, flag, ct, vsecp));
+ FEM_VNEXT10(create, func, arg0, name, vap, excl,
+ mode, vpp, cr, flag, ct, vsecp);
}
int
@@ -2219,7 +2805,7 @@ vnext_remove(femarg_t *vf, char *nm, cred_t *cr, caller_context_t *ct,
vsop_find(vf, &func, int, &arg0, vop_remove, femop_remove);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, nm, cr, ct, flags));
+ FEM_VNEXT5(remove, func, arg0, nm, cr, ct, flags);
}
int
@@ -2234,7 +2820,7 @@ vnext_link(femarg_t *vf, vnode_t *svp, char *tnm, cred_t *cr,
vsop_find(vf, &func, int, &arg0, vop_link, femop_link);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, svp, tnm, cr, ct, flags));
+ FEM_VNEXT6(link, func, arg0, svp, tnm, cr, ct, flags);
}
int
@@ -2249,7 +2835,7 @@ vnext_rename(femarg_t *vf, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr,
vsop_find(vf, &func, int, &arg0, vop_rename, femop_rename);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, snm, tdvp, tnm, cr, ct, flags));
+ FEM_VNEXT7(rename, func, arg0, snm, tdvp, tnm, cr, ct, flags);
}
int
@@ -2264,7 +2850,7 @@ vnext_mkdir(femarg_t *vf, char *dirname, vattr_t *vap, vnode_t **vpp,
vsop_find(vf, &func, int, &arg0, vop_mkdir, femop_mkdir);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, dirname, vap, vpp, cr, ct, flags, vsecp));
+ FEM_VNEXT8(mkdir, func, arg0, dirname, vap, vpp, cr, ct, flags, vsecp);
}
int
@@ -2279,7 +2865,7 @@ vnext_rmdir(femarg_t *vf, char *nm, vnode_t *cdir, cred_t *cr,
vsop_find(vf, &func, int, &arg0, vop_rmdir, femop_rmdir);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, nm, cdir, cr, ct, flags));
+ FEM_VNEXT6(rmdir, func, arg0, nm, cdir, cr, ct, flags);
}
int
@@ -2294,7 +2880,7 @@ vnext_readdir(femarg_t *vf, uio_t *uiop, cred_t *cr, int *eofp,
vsop_find(vf, &func, int, &arg0, vop_readdir, femop_readdir);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, uiop, cr, eofp, ct, flags));
+ FEM_VNEXT6(readdir, func, arg0, uiop, cr, eofp, ct, flags);
}
int
@@ -2309,7 +2895,7 @@ vnext_symlink(femarg_t *vf, char *linkname, vattr_t *vap, char *target,
vsop_find(vf, &func, int, &arg0, vop_symlink, femop_symlink);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, linkname, vap, target, cr, ct, flags));
+ FEM_VNEXT7(symlink, func, arg0, linkname, vap, target, cr, ct, flags);
}
int
@@ -2323,7 +2909,7 @@ vnext_readlink(femarg_t *vf, uio_t *uiop, cred_t *cr, caller_context_t *ct)
vsop_find(vf, &func, int, &arg0, vop_readlink, femop_readlink);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, uiop, cr, ct));
+ FEM_VNEXT4(readlink, func, arg0, uiop, cr, ct);
}
int
@@ -2337,7 +2923,7 @@ vnext_fsync(femarg_t *vf, int syncflag, cred_t *cr, caller_context_t *ct)
vsop_find(vf, &func, int, &arg0, vop_fsync, femop_fsync);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, syncflag, cr, ct));
+ FEM_VNEXT4(fsync, func, arg0, syncflag, cr, ct);
}
void
@@ -2365,7 +2951,7 @@ vnext_fid(femarg_t *vf, fid_t *fidp, caller_context_t *ct)
vsop_find(vf, &func, int, &arg0, vop_fid, femop_fid);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, fidp, ct));
+ FEM_VNEXT3(fid, func, arg0, fidp, ct);
}
int
@@ -2379,7 +2965,7 @@ vnext_rwlock(femarg_t *vf, int write_lock, caller_context_t *ct)
vsop_find(vf, &func, int, &arg0, vop_rwlock, femop_rwlock);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, write_lock, ct));
+ FEM_VNEXT3(rwlock, func, arg0, write_lock, ct);
}
void
@@ -2407,7 +2993,7 @@ vnext_seek(femarg_t *vf, offset_t ooff, offset_t *noffp, caller_context_t *ct)
vsop_find(vf, &func, int, &arg0, vop_seek, femop_seek);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, ooff, noffp, ct));
+ FEM_VNEXT4(seek, func, arg0, ooff, noffp, ct);
}
int
@@ -2421,7 +3007,7 @@ vnext_cmp(femarg_t *vf, vnode_t *vp2, caller_context_t *ct)
vsop_find(vf, &func, int, &arg0, vop_cmp, femop_cmp);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, vp2, ct));
+ FEM_VNEXT3(cmp, func, arg0, vp2, ct);
}
int
@@ -2437,7 +3023,7 @@ vnext_frlock(femarg_t *vf, int cmd, struct flock64 *bfp, int flag,
vsop_find(vf, &func, int, &arg0, vop_frlock, femop_frlock);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, cmd, bfp, flag, offset, flk_cbp, cr, ct));
+ FEM_VNEXT8(frlock, func, arg0, cmd, bfp, flag, offset, flk_cbp, cr, ct);
}
int
@@ -2452,7 +3038,7 @@ vnext_space(femarg_t *vf, int cmd, struct flock64 *bfp, int flag,
vsop_find(vf, &func, int, &arg0, vop_space, femop_space);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, cmd, bfp, flag, offset, cr, ct));
+ FEM_VNEXT7(space, func, arg0, cmd, bfp, flag, offset, cr, ct);
}
int
@@ -2466,7 +3052,7 @@ vnext_realvp(femarg_t *vf, vnode_t **vpp, caller_context_t *ct)
vsop_find(vf, &func, int, &arg0, vop_realvp, femop_realvp);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, vpp, ct));
+ FEM_VNEXT3(realvp, func, arg0, vpp, ct);
}
int
@@ -2482,8 +3068,8 @@ vnext_getpage(femarg_t *vf, offset_t off, size_t len, uint_t *protp,
vsop_find(vf, &func, int, &arg0, vop_getpage, femop_getpage);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, off, len, protp, plarr, plsz, seg, addr, rw,
- cr, ct));
+ FEM_VNEXT11(getpage, func, arg0, off, len, protp,
+ plarr, plsz, seg, addr, rw, cr, ct);
}
int
@@ -2498,7 +3084,7 @@ vnext_putpage(femarg_t *vf, offset_t off, size_t len, int flags,
vsop_find(vf, &func, int, &arg0, vop_putpage, femop_putpage);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, off, len, flags, cr, ct));
+ FEM_VNEXT6(putpage, func, arg0, off, len, flags, cr, ct);
}
int
@@ -2514,8 +3100,8 @@ vnext_map(femarg_t *vf, offset_t off, struct as *as, caddr_t *addrp,
vsop_find(vf, &func, int, &arg0, vop_map, femop_map);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, off, as, addrp, len, prot, maxprot, flags,
- cr, ct));
+ FEM_VNEXT10(map, func, arg0, off, as, addrp, len, prot, maxprot, flags,
+ cr, ct);
}
int
@@ -2531,8 +3117,8 @@ vnext_addmap(femarg_t *vf, offset_t off, struct as *as, caddr_t addr,
vsop_find(vf, &func, int, &arg0, vop_addmap, femop_addmap);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, off, as, addr, len, prot, maxprot, flags,
- cr, ct));
+ FEM_VNEXT10(addmap, func, arg0, off, as, addr, len, prot, maxprot,
+ flags, cr, ct);
}
int
@@ -2548,8 +3134,8 @@ vnext_delmap(femarg_t *vf, offset_t off, struct as *as, caddr_t addr,
vsop_find(vf, &func, int, &arg0, vop_delmap, femop_delmap);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, off, as, addr, len, prot, maxprot, flags,
- cr, ct));
+ FEM_VNEXT10(delmap, func, arg0, off, as, addr, len, prot, maxprot,
+ flags, cr, ct);
}
int
@@ -2564,7 +3150,7 @@ vnext_poll(femarg_t *vf, short events, int anyyet, short *reventsp,
vsop_find(vf, &func, int, &arg0, vop_poll, femop_poll);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, events, anyyet, reventsp, phpp, ct));
+ FEM_VNEXT6(poll, func, arg0, events, anyyet, reventsp, phpp, ct);
}
int
@@ -2579,7 +3165,7 @@ vnext_dump(femarg_t *vf, caddr_t addr, offset_t lbdn, offset_t dblks,
vsop_find(vf, &func, int, &arg0, vop_dump, femop_dump);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, addr, lbdn, dblks, ct));
+ FEM_VNEXT5(dump, func, arg0, addr, lbdn, dblks, ct);
}
int
@@ -2594,7 +3180,7 @@ vnext_pathconf(femarg_t *vf, int cmd, ulong_t *valp, cred_t *cr,
vsop_find(vf, &func, int, &arg0, vop_pathconf, femop_pathconf);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, cmd, valp, cr, ct));
+ FEM_VNEXT5(pathconf, func, arg0, cmd, valp, cr, ct);
}
int
@@ -2609,7 +3195,7 @@ vnext_pageio(femarg_t *vf, struct page *pp, u_offset_t io_off,
vsop_find(vf, &func, int, &arg0, vop_pageio, femop_pageio);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, pp, io_off, io_len, flags, cr, ct));
+ FEM_VNEXT7(pageio, func, arg0, pp, io_off, io_len, flags, cr, ct);
}
int
@@ -2623,7 +3209,7 @@ vnext_dumpctl(femarg_t *vf, int action, offset_t *blkp, caller_context_t *ct)
vsop_find(vf, &func, int, &arg0, vop_dumpctl, femop_dumpctl);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, action, blkp, ct));
+ FEM_VNEXT4(dumpctl, func, arg0, action, blkp, ct);
}
void
@@ -2653,7 +3239,7 @@ vnext_setsecattr(femarg_t *vf, vsecattr_t *vsap, int flag, cred_t *cr,
vsop_find(vf, &func, int, &arg0, vop_setsecattr, femop_setsecattr);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, vsap, flag, cr, ct));
+ FEM_VNEXT5(setsecattr, func, arg0, vsap, flag, cr, ct);
}
int
@@ -2668,7 +3254,7 @@ vnext_getsecattr(femarg_t *vf, vsecattr_t *vsap, int flag, cred_t *cr,
vsop_find(vf, &func, int, &arg0, vop_getsecattr, femop_getsecattr);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, vsap, flag, cr, ct));
+ FEM_VNEXT5(getsecattr, func, arg0, vsap, flag, cr, ct);
}
int
@@ -2683,7 +3269,7 @@ vnext_shrlock(femarg_t *vf, int cmd, struct shrlock *shr, int flag,
vsop_find(vf, &func, int, &arg0, vop_shrlock, femop_shrlock);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, cmd, shr, flag, cr, ct));
+ FEM_VNEXT6(shrlock, func, arg0, cmd, shr, flag, cr, ct);
}
int
@@ -2698,7 +3284,7 @@ vnext_vnevent(femarg_t *vf, vnevent_t vnevent, vnode_t *dvp, char *cname,
vsop_find(vf, &func, int, &arg0, vop_vnevent, femop_vnevent);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, vnevent, dvp, cname, ct));
+ FEM_VNEXT5(vnevent, func, arg0, vnevent, dvp, cname, ct);
}
int
@@ -2713,7 +3299,7 @@ vnext_reqzcbuf(femarg_t *vf, enum uio_rw ioflag, xuio_t *xuiop, cred_t *cr,
vsop_find(vf, &func, int, &arg0, vop_reqzcbuf, femop_reqzcbuf);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, ioflag, xuiop, cr, ct));
+ FEM_VNEXT5(reqzcbuf, func, arg0, ioflag, xuiop, cr, ct);
}
int
@@ -2727,7 +3313,7 @@ vnext_retzcbuf(femarg_t *vf, xuio_t *xuiop, cred_t *cr, caller_context_t *ct)
vsop_find(vf, &func, int, &arg0, vop_retzcbuf, femop_retzcbuf);
ASSERT(func != NULL);
ASSERT(arg0 != NULL);
- return ((*func)(arg0, xuiop, cr, ct));
+ FEM_VNEXT4(retzcbuf, func, arg0, xuiop, cr, ct);
}
int
diff --git a/usr/src/uts/common/fs/fifofs/fifosubr.c b/usr/src/uts/common/fs/fifofs/fifosubr.c
index 6e56000ffe..56204c6741 100644
--- a/usr/src/uts/common/fs/fifofs/fifosubr.c
+++ b/usr/src/uts/common/fs/fifofs/fifosubr.c
@@ -614,9 +614,12 @@ fifo_stropen(vnode_t **vpp, int flag, cred_t *crp, int dotwist, int lockheld)
/*
* The other end of the pipe is almost closed so
* reject any other open on this end of the pipe
- * This only happens with a pipe mounted under namefs
+ * This normally only happens with a pipe mounted under namefs, but
+ * we can also see an open via proc/fd, which should still succeed.
+ * To indicate the proc/fd case the FKLYR flag is passed.
*/
- if ((fnp->fn_flag & (FIFOCLOSE|ISPIPE)) == (FIFOCLOSE|ISPIPE)) {
+ if ((fnp->fn_flag & (FIFOCLOSE|ISPIPE)) == (FIFOCLOSE|ISPIPE) &&
+ (flag & FKLYR) == 0) {
fifo_cleanup(oldvp, flag);
cv_broadcast(&fnp->fn_wait_cv);
if (!lockheld)
diff --git a/usr/src/uts/common/fs/hyprlofs/hyprlofs_dir.c b/usr/src/uts/common/fs/hyprlofs/hyprlofs_dir.c
new file mode 100644
index 0000000000..05ee2c6e09
--- /dev/null
+++ b/usr/src/uts/common/fs/hyprlofs/hyprlofs_dir.c
@@ -0,0 +1,640 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2012, Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/vfs.h>
+#include <sys/vnode.h>
+#include <sys/errno.h>
+#include <sys/cmn_err.h>
+#include <sys/cred.h>
+#include <sys/stat.h>
+#include <sys/policy.h>
+#include <sys/fs/hyprlofs_info.h>
+
+static int hldir_make_hlnode(hlnode_t *, hlfsmount_t *, vattr_t *, enum de_op,
+ vnode_t *, hlnode_t **, cred_t *);
+static int hldiraddentry(hlnode_t *, hlnode_t *, char *);
+
+
+#define HL_HASH_SIZE 8192 /* must be power of 2 */
+#define HL_MUTEX_SIZE 64
+
+static hldirent_t *hl_hashtable[HL_HASH_SIZE];
+static kmutex_t hl_hashmutex[HL_MUTEX_SIZE];
+
+#define HL_HASH_INDEX(a) ((a) & (HL_HASH_SIZE-1))
+#define HL_MUTEX_INDEX(a) ((a) & (HL_MUTEX_SIZE-1))
+
+#define HYPRLOFS_HASH(tp, name, hash) \
+ { \
+ char Xc, *Xcp; \
+ hash = (uint_t)(uintptr_t)(tp) >> 8; \
+ for (Xcp = (name); (Xc = *Xcp) != 0; Xcp++) \
+ hash = (hash << 4) + hash + (uint_t)Xc; \
+ }
+
+void
+hyprlofs_hash_init(void)
+{
+ int ix;
+
+ for (ix = 0; ix < HL_MUTEX_SIZE; ix++)
+ mutex_init(&hl_hashmutex[ix], NULL, MUTEX_DEFAULT, NULL);
+}
+
+static void
+hyprlofs_hash_in(hldirent_t *h)
+{
+ uint_t hash;
+ hldirent_t **prevpp;
+ kmutex_t *hmtx;
+
+ HYPRLOFS_HASH(h->hld_parent, h->hld_name, hash);
+ h->hld_hash = hash;
+ prevpp = &hl_hashtable[HL_HASH_INDEX(hash)];
+ hmtx = &hl_hashmutex[HL_MUTEX_INDEX(hash)];
+ mutex_enter(hmtx);
+ h->hld_link = *prevpp;
+ *prevpp = h;
+ mutex_exit(hmtx);
+}
+
+/* Remove hldirent *h from the hash list. */
+static void
+hyprlofs_hash_out(hldirent_t *h)
+{
+ uint_t hash;
+ hldirent_t **prevpp;
+ kmutex_t *hmtx;
+
+ hash = h->hld_hash;
+ prevpp = &hl_hashtable[HL_HASH_INDEX(hash)];
+ hmtx = &hl_hashmutex[HL_MUTEX_INDEX(hash)];
+ mutex_enter(hmtx);
+ while (*prevpp != h)
+ prevpp = &(*prevpp)->hld_link;
+ *prevpp = h->hld_link;
+ mutex_exit(hmtx);
+}
+
+static hldirent_t *
+hyprlofs_hash_lookup(char *name, hlnode_t *parent, uint_t hold,
+ hlnode_t **found)
+{
+ hldirent_t *l;
+ uint_t hash;
+ kmutex_t *hmtx;
+ hlnode_t *hnp;
+
+ HYPRLOFS_HASH(parent, name, hash);
+ hmtx = &hl_hashmutex[HL_MUTEX_INDEX(hash)];
+ mutex_enter(hmtx);
+ l = hl_hashtable[HL_HASH_INDEX(hash)];
+ while (l) {
+ if (l->hld_hash == hash && l->hld_parent == parent &&
+ strcmp(l->hld_name, name) == 0) {
+ /*
+ * Ensure that the hlnode that we put a hold on is the
+ * same one that we pass back. Thus the temp. var
+ * hnp is necessary.
+ */
+ hnp = l->hld_hlnode;
+ if (hold) {
+ ASSERT(hnp);
+ hlnode_hold(hnp);
+ }
+ if (found)
+ *found = hnp;
+ mutex_exit(hmtx);
+ return (l);
+ } else {
+ l = l->hld_link;
+ }
+ }
+ mutex_exit(hmtx);
+ return (NULL);
+}
+
+/*
+ * Search directory 'parent' for entry 'name'.
+ *
+ * The calling thread can't hold the write version of the rwlock for the
+ * directory being searched
+ *
+ * On success *foundtp points to the found hlnode with its vnode held.
+ */
+int
+hyprlofs_dirlookup(hlnode_t *parent, char *name, hlnode_t **foundtp, cred_t *cr)
+{
+ int error;
+
+ *foundtp = NULL;
+ if (parent->hln_type != VDIR)
+ return (ENOTDIR);
+
+ if ((error = hyprlofs_taccess(parent, VEXEC, cr)))
+ return (error);
+
+ if (*name == '\0') {
+ hlnode_hold(parent);
+ *foundtp = parent;
+ return (0);
+ }
+
+ /*
+ * Search the directory for the matching name. We need the lock
+ * protecting the hln_dir list so that it doesn't change out from
+ * underneath us. hyprlofs_hash_lookup() will pass back the hlnode
+ * with a hold on it.
+ */
+ if (hyprlofs_hash_lookup(name, parent, 1, foundtp) != NULL) {
+ ASSERT(*foundtp);
+ return (0);
+ }
+
+ return (ENOENT);
+}
+
+/*
+ * Enter a directory entry (either a file or subdir, depending on op) for
+ * 'name' and 'hp' into directory 'dir'
+ */
+int
+hyprlofs_direnter(
+ hlfsmount_t *hm,
+ hlnode_t *dir, /* target directory to make entry in */
+ char *name, /* name of entry */
+ enum de_op op, /* entry operation */
+ vnode_t *realvp, /* real vnode */
+ vattr_t *va,
+ hlnode_t **hpp, /* return hlnode */
+ cred_t *cr)
+{
+ hldirent_t *hdp;
+ hlnode_t *found = NULL;
+ hlnode_t *hp;
+ int error = 0;
+ char *s;
+
+ /* hln_rwlock is held to serialize direnter and dirdeletes */
+ ASSERT(RW_WRITE_HELD(&dir->hln_rwlock));
+ ASSERT(dir->hln_type == VDIR);
+
+ /* Don't allow '/' characters in pathname component */
+ for (s = name; *s; s++)
+ if (*s == '/')
+ return (EACCES);
+
+ if (name[0] == '\0')
+ panic("hyprlofs_direnter: NULL name");
+
+ /*
+ * This might be a "dangling detached directory". It could have been
+ * removed, but a reference to it kept in u_cwd. Don't bother searching
+ * it, and with any luck the user will get tired of dealing with us and
+ * cd to some absolute pathway. This is in ufs, too.
+ */
+ if (dir->hln_nlink == 0) {
+ return (ENOENT);
+ }
+
+ /* Search for the entry. Return "found" if it exists. */
+ hdp = hyprlofs_hash_lookup(name, dir, 1, &found);
+
+ if (hdp) {
+ ASSERT(found);
+ switch (op) {
+ case DE_CREATE:
+ case DE_MKDIR:
+ if (hpp) {
+ *hpp = found;
+ error = EEXIST;
+ } else {
+ hlnode_rele(found);
+ }
+ break;
+ }
+ } else {
+
+ /*
+ * The entry does not exist. Check write perms in dir to see if
+ * entry can be created.
+ */
+ if ((error = hyprlofs_taccess(dir, VWRITE, cr)))
+ return (error);
+
+ /* Make new hlnode and directory entry as required. */
+ if ((error = hldir_make_hlnode(dir, hm, va, op, realvp, &hp,
+ cr)))
+ return (error);
+
+ if ((error = hldiraddentry(dir, hp, name))) {
+ /* Unmake the inode we just made. */
+ rw_enter(&hp->hln_rwlock, RW_WRITER);
+ if ((hp->hln_type) == VDIR) {
+ ASSERT(hdp == NULL);
+ /* cleanup allocs made by hyprlofs_dirinit() */
+ hyprlofs_dirtrunc(hp);
+ }
+ mutex_enter(&hp->hln_tlock);
+ hp->hln_nlink = 0;
+ mutex_exit(&hp->hln_tlock);
+ gethrestime(&hp->hln_ctime);
+ rw_exit(&hp->hln_rwlock);
+ hlnode_rele(hp);
+ hp = NULL;
+ } else if (hpp) {
+ *hpp = hp;
+ } else {
+ hlnode_rele(hp);
+ }
+ }
+
+ return (error);
+}
+
+/*
+ * Delete entry hp of name "nm" from dir. Free dir entry space and decrement
+ * link count on hlnode(s).
+ */
+int
+hyprlofs_dirdelete(hlnode_t *dir, hlnode_t *hp, char *nm, enum dr_op op,
+ cred_t *cr)
+{
+ hldirent_t *hpdp;
+ int error;
+ size_t namelen;
+ hlnode_t *hnp;
+ timestruc_t now;
+
+ ASSERT(RW_WRITE_HELD(&dir->hln_rwlock));
+ ASSERT(RW_WRITE_HELD(&hp->hln_rwlock));
+ ASSERT(dir->hln_type == VDIR);
+
+ if (nm[0] == '\0')
+ panic("hyprlofs_dirdelete: NULL name for %p", (void *)hp);
+
+ /* return error if removing . or .. */
+ if (nm[0] == '.') {
+ if (nm[1] == '\0')
+ return (EINVAL);
+ if (nm[1] == '.' && nm[2] == '\0')
+ return (EEXIST); /* thus in ufs */
+ }
+
+ if ((error = hyprlofs_taccess(dir, VEXEC|VWRITE, cr)) != 0)
+ return (error);
+
+ if (dir->hln_dir == NULL)
+ return (ENOENT);
+
+ hpdp = hyprlofs_hash_lookup(nm, dir, 0, &hnp);
+ if (hpdp == NULL) {
+ /*
+ * If it is gone, some other thread got here first!
+ * Return error ENOENT.
+ */
+ return (ENOENT);
+ }
+
+ /*
+ * If the hlnode in the hldirent changed (shouldn't happen since we
+ * don't support rename) then original is gone, so return that status
+ * (same as UFS).
+ */
+ if (hp != hnp)
+ return (ENOENT);
+
+ hyprlofs_hash_out(hpdp);
+
+ /* Take hpdp out of the directory list. */
+ ASSERT(hpdp->hld_next != hpdp);
+ ASSERT(hpdp->hld_prev != hpdp);
+ if (hpdp->hld_prev) {
+ hpdp->hld_prev->hld_next = hpdp->hld_next;
+ }
+ if (hpdp->hld_next) {
+ hpdp->hld_next->hld_prev = hpdp->hld_prev;
+ }
+
+ /*
+ * If the roving slot pointer happens to match hpdp, point it at the
+ * previous dirent.
+ */
+ if (dir->hln_dir->hld_prev == hpdp) {
+ dir->hln_dir->hld_prev = hpdp->hld_prev;
+ }
+ ASSERT(hpdp->hld_next != hpdp);
+ ASSERT(hpdp->hld_prev != hpdp);
+
+ /* hpdp points to the correct directory entry */
+ namelen = strlen(hpdp->hld_name) + 1;
+
+ kmem_free(hpdp, sizeof (hldirent_t) + namelen);
+ dir->hln_size -= (sizeof (hldirent_t) + namelen);
+ dir->hln_dirents--;
+
+ gethrestime(&now);
+ dir->hln_mtime = now;
+ dir->hln_ctime = now;
+ hp->hln_ctime = now;
+
+ ASSERT(hp->hln_nlink > 0);
+ DECR_COUNT(&hp->hln_nlink, &hp->hln_tlock);
+ if (op == DR_RMDIR && hp->hln_type == VDIR) {
+ hyprlofs_dirtrunc(hp);
+ ASSERT(hp->hln_nlink == 0);
+ }
+ return (0);
+}
+
+/*
+ * hyprlofs_dirinit initializes a dir with '.' and '..' entries without
+ * checking perms and locking
+ */
+void
+hyprlofs_dirinit(
+ hlnode_t *parent, /* parent of directory to initialize */
+ hlnode_t *dir) /* the new directory */
+{
+ hldirent_t *dot, *dotdot;
+ timestruc_t now;
+
+ ASSERT(RW_WRITE_HELD(&parent->hln_rwlock));
+ ASSERT(dir->hln_type == VDIR);
+
+ dot = kmem_zalloc(sizeof (hldirent_t) + 2, KM_SLEEP);
+ dotdot = kmem_zalloc(sizeof (hldirent_t) + 3, KM_SLEEP);
+
+ /* Initialize the entries */
+ dot->hld_hlnode = dir;
+ dot->hld_offset = 0;
+ dot->hld_name = (char *)dot + sizeof (hldirent_t);
+ dot->hld_name[0] = '.';
+ dot->hld_parent = dir;
+ hyprlofs_hash_in(dot);
+
+ dotdot->hld_hlnode = parent;
+ dotdot->hld_offset = 1;
+ dotdot->hld_name = (char *)dotdot + sizeof (hldirent_t);
+ dotdot->hld_name[0] = '.';
+ dotdot->hld_name[1] = '.';
+ dotdot->hld_parent = dir;
+ hyprlofs_hash_in(dotdot);
+
+ /* Initialize directory entry list. */
+ dot->hld_next = dotdot;
+ dot->hld_prev = dotdot;
+ dotdot->hld_next = NULL;
+ dotdot->hld_prev = dot;
+
+ gethrestime(&now);
+ dir->hln_mtime = now;
+ dir->hln_ctime = now;
+
+ /*
+ * Since hyprlofs_dirinit is called with both dir and parent being the
+ * same for the root vnode, we need to increment this before we set
+ * hln_nlink = 2 below.
+ */
+ INCR_COUNT(&parent->hln_nlink, &parent->hln_tlock);
+ parent->hln_ctime = now;
+
+ dir->hln_dir = dot;
+ dir->hln_size = 2 * sizeof (hldirent_t) + 5; /* dot and dotdot */
+ dir->hln_dirents = 2;
+ dir->hln_nlink = 2;
+}
+
+
+/*
+ * hyprlofs_dirtrunc removes all dir entries under this dir.
+ */
+void
+hyprlofs_dirtrunc(hlnode_t *dir)
+{
+ hldirent_t *hdp;
+ hlnode_t *tp;
+ size_t namelen;
+ timestruc_t now;
+
+ ASSERT(RW_WRITE_HELD(&dir->hln_rwlock));
+ ASSERT(dir->hln_type == VDIR);
+
+ if (dir->hln_looped)
+ return;
+
+ for (hdp = dir->hln_dir; hdp; hdp = dir->hln_dir) {
+ ASSERT(hdp->hld_next != hdp);
+ ASSERT(hdp->hld_prev != hdp);
+ ASSERT(hdp->hld_hlnode);
+
+ dir->hln_dir = hdp->hld_next;
+ namelen = strlen(hdp->hld_name) + 1;
+
+ /*
+ * Adjust the link counts to account for this dir entry removal.
+ */
+ tp = hdp->hld_hlnode;
+
+ ASSERT(tp->hln_nlink > 0);
+ DECR_COUNT(&tp->hln_nlink, &tp->hln_tlock);
+
+ hyprlofs_hash_out(hdp);
+
+ kmem_free(hdp, sizeof (hldirent_t) + namelen);
+ dir->hln_size -= (sizeof (hldirent_t) + namelen);
+ dir->hln_dirents--;
+ }
+
+ gethrestime(&now);
+ dir->hln_mtime = now;
+ dir->hln_ctime = now;
+
+ ASSERT(dir->hln_dir == NULL);
+ ASSERT(dir->hln_size == 0);
+ ASSERT(dir->hln_dirents == 0);
+}
+
+static int
+hldiraddentry(
+ hlnode_t *dir, /* target directory to make entry in */
+ hlnode_t *hp, /* new hlnode */
+ char *name)
+{
+ hldirent_t *hdp, *hpdp;
+ size_t namelen, alloc_size;
+ timestruc_t now;
+
+ /*
+ * Make sure the parent dir wasn't removed from underneath the caller.
+ */
+ if (dir->hln_dir == NULL)
+ return (ENOENT);
+
+ /* Check that everything is on the same FS. */
+ if (hp->hln_vnode->v_vfsp != dir->hln_vnode->v_vfsp)
+ return (EXDEV);
+
+ /* Alloc and init dir entry */
+ namelen = strlen(name) + 1;
+ alloc_size = namelen + sizeof (hldirent_t);
+ hdp = kmem_zalloc(alloc_size, KM_NORMALPRI | KM_NOSLEEP);
+ if (hdp == NULL)
+ return (ENOSPC);
+
+ dir->hln_size += alloc_size;
+ dir->hln_dirents++;
+ hdp->hld_hlnode = hp;
+ hdp->hld_parent = dir;
+
+ /* The dir entry and its name were allocated sequentially. */
+ hdp->hld_name = (char *)hdp + sizeof (hldirent_t);
+ (void) strcpy(hdp->hld_name, name);
+
+ hyprlofs_hash_in(hdp);
+
+ /*
+ * Some utilities expect the size of a directory to remain fairly
+ * static. For example, a routine which unlinks files between calls to
+ * readdir(); the size of the dir changes from underneath it and so the
+ * real dir offset in bytes is invalid. To circumvent this problem, we
+ * initialize a dir entry with a phony offset, and use this offset to
+ * determine end of file in hyprlofs_readdir.
+ */
+ hpdp = dir->hln_dir->hld_prev;
+ /*
+ * Install at first empty "slot" in directory list.
+ */
+ while (hpdp->hld_next != NULL && (hpdp->hld_next->hld_offset -
+ hpdp->hld_offset) <= 1) {
+ ASSERT(hpdp->hld_next != hpdp);
+ ASSERT(hpdp->hld_prev != hpdp);
+ ASSERT(hpdp->hld_next->hld_offset > hpdp->hld_offset);
+ hpdp = hpdp->hld_next;
+ }
+ hdp->hld_offset = hpdp->hld_offset + 1;
+
+ /*
+ * If we're at the end of the dirent list and the offset (which is
+ * necessarily the largest offset in this dir) is more than twice the
+ * number of dirents, that means the dir is 50% holes. At this point
+ * we reset the slot pointer back to the beginning of the dir so we
+ * start using the holes. The idea is that if there are N dirents,
+ * there must also be N holes, so we can satisfy the next N creates by
+ * walking at most 2N entries; thus the average cost of a create is
+ * constant. Note that we use the first dirent's hld_prev as the roving
+ * slot pointer. This saves a word in every dirent.
+ */
+ if (hpdp->hld_next == NULL && hpdp->hld_offset > 2 * dir->hln_dirents)
+ dir->hln_dir->hld_prev = dir->hln_dir->hld_next;
+ else
+ dir->hln_dir->hld_prev = hdp;
+
+ ASSERT(hpdp->hld_next != hpdp);
+ ASSERT(hpdp->hld_prev != hpdp);
+
+ hdp->hld_next = hpdp->hld_next;
+ if (hdp->hld_next) {
+ hdp->hld_next->hld_prev = hdp;
+ }
+ hdp->hld_prev = hpdp;
+ hpdp->hld_next = hdp;
+
+ ASSERT(hdp->hld_next != hdp);
+ ASSERT(hdp->hld_prev != hdp);
+ ASSERT(hpdp->hld_next != hpdp);
+ ASSERT(hpdp->hld_prev != hpdp);
+
+ gethrestime(&now);
+ dir->hln_mtime = now;
+ dir->hln_ctime = now;
+
+ return (0);
+}
+
+static int
+hldir_make_hlnode(hlnode_t *dir, hlfsmount_t *hm, vattr_t *va, enum de_op op,
+ vnode_t *realvp, hlnode_t **newnode, cred_t *cr)
+{
+ hlnode_t *hp;
+ enum vtype type;
+
+ ASSERT(va != NULL);
+ ASSERT(op == DE_CREATE || op == DE_MKDIR);
+ if (((va->va_mask & AT_ATIME) && TIMESPEC_OVERFLOW(&va->va_atime)) ||
+ ((va->va_mask & AT_MTIME) && TIMESPEC_OVERFLOW(&va->va_mtime)))
+ return (EOVERFLOW);
+ type = va->va_type;
+ hp = kmem_zalloc(sizeof (hlnode_t), KM_SLEEP);
+ hyprlofs_node_init(hm, hp, va, cr);
+
+ hp->hln_vnode->v_rdev = hp->hln_rdev = NODEV;
+ hp->hln_vnode->v_type = type;
+ hp->hln_uid = crgetuid(cr);
+
+ /*
+ * To determine the gid of the created file:
+ * If the directory's set-gid bit is set, set the gid to the gid
+ * of the parent dir, otherwise, use the process's gid.
+ */
+ if (dir->hln_mode & VSGID)
+ hp->hln_gid = dir->hln_gid;
+ else
+ hp->hln_gid = crgetgid(cr);
+
+ /*
+ * If we're creating a dir and the parent dir has the set-GID bit set,
+ * set it on the new dir. Otherwise, if the user is neither privileged
+ * nor a member of the file's new group, clear the file's set-GID bit.
+ */
+ if (dir->hln_mode & VSGID && type == VDIR)
+ hp->hln_mode |= VSGID;
+ else {
+ if ((hp->hln_mode & VSGID) &&
+ secpolicy_vnode_setids_setgids(cr, hp->hln_gid) != 0)
+ hp->hln_mode &= ~VSGID;
+ }
+
+ if (va->va_mask & AT_ATIME)
+ hp->hln_atime = va->va_atime;
+ if (va->va_mask & AT_MTIME)
+ hp->hln_mtime = va->va_mtime;
+
+ if (op == DE_MKDIR) {
+ hyprlofs_dirinit(dir, hp);
+ hp->hln_looped = 0;
+ } else {
+ hp->hln_realvp = realvp;
+ hp->hln_size = va->va_size;
+ hp->hln_looped = 1;
+ }
+
+ *newnode = hp;
+ return (0);
+}
diff --git a/usr/src/uts/common/fs/hyprlofs/hyprlofs_subr.c b/usr/src/uts/common/fs/hyprlofs/hyprlofs_subr.c
new file mode 100644
index 0000000000..1d857309f3
--- /dev/null
+++ b/usr/src/uts/common/fs/hyprlofs/hyprlofs_subr.c
@@ -0,0 +1,127 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/param.h>
+#include <sys/t_lock.h>
+#include <sys/systm.h>
+#include <sys/sysmacros.h>
+#include <sys/debug.h>
+#include <sys/time.h>
+#include <sys/cmn_err.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <sys/mode.h>
+#include <sys/vfs.h>
+#include <sys/cred.h>
+#include <sys/kmem.h>
+#include <sys/atomic.h>
+#include <sys/policy.h>
+#include <sys/fs/hyprlofs_info.h>
+
+#define MODESHIFT 3
+
+/* Initialize a hlnode and add it to file list under mount point. */
+void
+hyprlofs_node_init(hlfsmount_t *hm, hlnode_t *h, vattr_t *vap, cred_t *cr)
+{
+ vnode_t *vp;
+ timestruc_t now;
+
+ ASSERT(vap != NULL);
+
+ rw_init(&h->hln_rwlock, NULL, RW_DEFAULT, NULL);
+ mutex_init(&h->hln_tlock, NULL, MUTEX_DEFAULT, NULL);
+ h->hln_mode = MAKEIMODE(vap->va_type, vap->va_mode);
+ h->hln_mask = 0;
+ h->hln_type = vap->va_type;
+ h->hln_nodeid = (ino64_t)(uint32_t)((uintptr_t)h >> 3);
+ h->hln_nlink = 1;
+ h->hln_size = 0;
+
+ if (cr == NULL) {
+ h->hln_uid = vap->va_uid;
+ h->hln_gid = vap->va_gid;
+ } else {
+ h->hln_uid = crgetuid(cr);
+ h->hln_gid = crgetgid(cr);
+ }
+
+ h->hln_fsid = hm->hlm_dev;
+ h->hln_rdev = vap->va_rdev;
+ h->hln_blksize = PAGESIZE;
+ h->hln_nblocks = 0;
+ gethrestime(&now);
+ h->hln_atime = now;
+ h->hln_mtime = now;
+ h->hln_ctime = now;
+ h->hln_seq = 0;
+ h->hln_dir = NULL;
+
+ h->hln_vnode = vn_alloc(KM_SLEEP);
+ vp = HLNTOV(h);
+ vn_setops(vp, hyprlofs_vnodeops);
+ vp->v_vfsp = hm->hlm_vfsp;
+ vp->v_type = vap->va_type;
+ vp->v_rdev = vap->va_rdev;
+ vp->v_data = (caddr_t)h;
+ mutex_enter(&hm->hlm_contents);
+ /*
+ * Increment the pseudo generation number for this hlnode. Since
+ * hlnodes are allocated and freed, there really is no particular
+ * generation number for a new hlnode. Just fake it by using a
+ * counter in each file system.
+ */
+ h->hln_gen = hm->hlm_gen++;
+
+ /*
+ * Add new hlnode to end of linked list of hlnodes for this hyprlofs
+ * Root dir is handled specially in hyprlofs_mount.
+ */
+ if (hm->hlm_rootnode != (hlnode_t *)NULL) {
+ h->hln_forw = NULL;
+ h->hln_back = hm->hlm_rootnode->hln_back;
+ h->hln_back->hln_forw = hm->hlm_rootnode->hln_back = h;
+ }
+ mutex_exit(&hm->hlm_contents);
+ vn_exists(vp);
+}
+
+int
+hyprlofs_taccess(void *vtp, int mode, cred_t *cr)
+{
+ hlnode_t *hp = vtp;
+ int shift = 0;
+
+ /* Check access based on owner, group and public perms in hlnode. */
+ if (crgetuid(cr) != hp->hln_uid) {
+ shift += MODESHIFT;
+ if (groupmember(hp->hln_gid, cr) == 0)
+ shift += MODESHIFT;
+ }
+
+ return (secpolicy_vnode_access2(cr, HLNTOV(hp), hp->hln_uid,
+ hp->hln_mode << shift, mode));
+}
diff --git a/usr/src/uts/common/fs/hyprlofs/hyprlofs_vfsops.c b/usr/src/uts/common/fs/hyprlofs/hyprlofs_vfsops.c
new file mode 100644
index 0000000000..c582a8cac2
--- /dev/null
+++ b/usr/src/uts/common/fs/hyprlofs/hyprlofs_vfsops.c
@@ -0,0 +1,614 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Hyperlofs is a hybrid file system combining features of the tmpfs(7FS) and
+ * lofs(7FS) file systems. It is modeled on code from both of these file
+ * systems.
+ *
+ * The purpose is to create a high performance name space for files on which
+ * applications will compute. Given a large number of data files with various
+ * owners, we want to construct a view onto those files such that only a subset
+ * is visible to the applications and such that the view can be changed very
+ * quickly as compute progresses. Entries in the name space are not mounts and
+ * thus do not appear in the mnttab. Entries in the name space are allowed to
+ * refer to files on different backing file systems. Intermediate directories
+ * in the name space exist only in-memory, ala tmpfs. There are no leaf nodes
+ * in the name space except for entries that refer to backing files ala lofs.
+ *
+ * The name space is managed via ioctls issued on the mounted file system and
+ * is mostly read-only for the compute applications. That is, applications
+ * cannot create new files in the name space. If a file is unlinked by an
+ * application, that only removes the file from the name space, the backing
+ * file remains in place. It is possible for applications to write-through to
+ * the backing files if the file system is mounted read-write.
+ *
+ * The name space is managed via the HYPRLOFS_ADD_ENTRIES, HYPRLOFS_RM_ENTRIES,
+ * and HYPRLOFS_RM_ALL ioctls on the top-level mount.
+ *
+ * The HYPRLOFS_ADD_ENTRIES ioctl specifies path(s) to the backing file(s) and
+ * the name(s) for the file(s) in the name space. The name(s) may be path(s)
+ * which will be relative to the root of the mount and thus cannot begin with
+ * a /. If the name is a path, it does not have to correspond to any backing
+ * path. The intermediate directories will only exist in the name space. The
+ * entry(ies) will be added to the name space.
+ *
+ * The HYPRLOFS_RM_ENTRIES ioctl specifies the name(s) of the file(s) in the
+ * name space which should be removed. The name(s) may be path(s) which will
+ * be relative to the root of the mount and thus cannot begin with a /. The
+ * named entry(ies) will be removed.
+ *
+ * The HYPRLOFS_RM_ALL ioctl will remove all mappings from the name space.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/kmem.h>
+#include <sys/time.h>
+#include <sys/pathname.h>
+#include <sys/vfs.h>
+#include <sys/vfs_opreg.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+#include <sys/stat.h>
+#include <sys/errno.h>
+#include <sys/cmn_err.h>
+#include <sys/cred.h>
+#include <sys/statvfs.h>
+#include <sys/mount.h>
+#include <sys/debug.h>
+#include <sys/systm.h>
+#include <sys/mntent.h>
+#include <fs/fs_subr.h>
+#include <vm/page.h>
+#include <vm/anon.h>
+#include <sys/model.h>
+#include <sys/policy.h>
+
+#include <sys/fs/swapnode.h>
+#include <sys/fs/hyprlofs_info.h>
+
+static int hyprlofsfstype;
+
+/*
+ * hyprlofs vfs operations.
+ */
+static int hyprlofsinit(int, char *);
+static int hyprlofs_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *);
+static int hyprlofs_unmount(vfs_t *, int, cred_t *);
+static int hyprlofs_root(vfs_t *, vnode_t **);
+static int hyprlofs_statvfs(vfs_t *, struct statvfs64 *);
+static int hyprlofs_vget(vfs_t *, vnode_t **, struct fid *);
+
+/*
+ * Loadable module wrapper
+ */
+#include <sys/modctl.h>
+
+static mntopts_t hyprlofs_mntopts;
+
+static vfsdef_t vfw = {
+ VFSDEF_VERSION,
+ "hyprlofs",
+ hyprlofsinit,
+ VSW_HASPROTO|VSW_CANREMOUNT|VSW_STATS|VSW_ZMOUNT,
+ &hyprlofs_mntopts
+};
+
+static mntopts_t hyprlofs_mntopts = {
+ 0, NULL
+};
+
+/*
+ * Module linkage information
+ */
+static struct modlfs modlfs = {
+ &mod_fsops, "filesystem for hyprlofs", &vfw
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, &modlfs, NULL
+};
+
+int
+_init()
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_fini()
+{
+ int error;
+
+ error = mod_remove(&modlinkage);
+ if (error)
+ return (error);
+ /*
+ * Tear down the operations vectors
+ */
+ (void) vfs_freevfsops_by_type(hyprlofsfstype);
+ vn_freevnodeops(hyprlofs_vnodeops);
+ return (0);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+/*
+ * The following are patchable variables limiting the amount of system
+ * resources hyprlofs can use.
+ *
+ * hyprlofs_maxkmem limits the amount of kernel kmem_alloc memory hyprlofs can
+ * use for it's data structures (e.g. hlnodes, directory entries). It is set
+ * as a percentage of physical memory which is determined when hyprlofs is
+ * first used in the system.
+ *
+ * hyprlofs_minfree is the minimum amount of swap space that hyprlofs leaves for
+ * the rest of the system. If the amount of free swap space in the system
+ * (i.e. anoninfo.ani_free) drops below hyprlofs_minfree, hyprlofs anon
+ * allocations will fail.
+ */
+size_t hyprlofs_maxkmem = 0;
+size_t hyprlofs_minfree = 0;
+size_t hyprlofs_kmemspace; /* bytes of kernel heap used by all hyprlofs */
+
+static major_t hyprlofs_major;
+static minor_t hyprlofs_minor;
+static kmutex_t hyprlofs_minor_lock;
+
+/*
+ * initialize global hyprlofs locks and hashes when loading hyprlofs module
+ */
+static int
+hyprlofsinit(int fstype, char *name)
+{
+ static const fs_operation_def_t hl_vfsops_template[] = {
+ VFSNAME_MOUNT, { .vfs_mount = hyprlofs_mount },
+ VFSNAME_UNMOUNT, { .vfs_unmount = hyprlofs_unmount },
+ VFSNAME_ROOT, { .vfs_root = hyprlofs_root },
+ VFSNAME_STATVFS, { .vfs_statvfs = hyprlofs_statvfs },
+ VFSNAME_VGET, { .vfs_vget = hyprlofs_vget },
+ NULL, NULL
+ };
+ int error;
+ extern void hyprlofs_hash_init();
+
+ hyprlofs_hash_init();
+ hyprlofsfstype = fstype;
+ ASSERT(hyprlofsfstype != 0);
+
+ error = vfs_setfsops(fstype, hl_vfsops_template, NULL);
+ if (error != 0) {
+ cmn_err(CE_WARN, "hyprlofsinit: bad vfs ops template");
+ return (error);
+ }
+
+ error = vn_make_ops(name, hyprlofs_vnodeops_template,
+ &hyprlofs_vnodeops);
+ if (error != 0) {
+ (void) vfs_freevfsops_by_type(fstype);
+ cmn_err(CE_WARN, "hyprlofsinit: bad vnode ops template");
+ return (error);
+ }
+
+ /*
+ * hyprlofs_minfree is an absolute limit of swap space which still
+ * allows other processes to execute. Set it if its not patched.
+ */
+ if (hyprlofs_minfree == 0)
+ hyprlofs_minfree = btopr(HYPRLOFSMINFREE);
+
+ if ((hyprlofs_major = getudev()) == (major_t)-1) {
+ cmn_err(CE_WARN,
+ "hyprlofsinit: Can't get unique device number.");
+ hyprlofs_major = 0;
+ }
+ mutex_init(&hyprlofs_minor_lock, NULL, MUTEX_DEFAULT, NULL);
+ return (0);
+}
+
+static int
+hyprlofs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
+{
+ hlfsmount_t *hm = NULL;
+ hlnode_t *hp;
+ struct pathname dpn;
+ int error;
+ vattr_t rattr;
+ int got_attrs;
+
+ if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
+ return (error);
+ if (secpolicy_hyprlofs_control(cr) != 0)
+ return (EPERM);
+
+ if (mvp->v_type != VDIR)
+ return (ENOTDIR);
+
+ if (uap->flags & MS_REMOUNT)
+ return (EBUSY);
+
+ mutex_enter(&mvp->v_lock);
+ if ((uap->flags & MS_OVERLAY) == 0 &&
+ (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
+ mutex_exit(&mvp->v_lock);
+ return (EBUSY);
+ }
+ mutex_exit(&mvp->v_lock);
+
+ /* Having the resource be anything but "swap" doesn't make sense. */
+ vfs_setresource(vfsp, "swap", 0);
+
+ if ((error = pn_get(uap->dir,
+ (uap->flags & MS_SYSSPACE) ? UIO_SYSSPACE : UIO_USERSPACE,
+ &dpn)) != 0)
+ goto out;
+
+ if ((hm = kmem_zalloc(sizeof (hlfsmount_t),
+ KM_NORMALPRI | KM_NOSLEEP)) == NULL) {
+ pn_free(&dpn);
+ error = ENOMEM;
+ goto out;
+ }
+
+ /* Get an available minor device number for this mount */
+ mutex_enter(&hyprlofs_minor_lock);
+ do {
+ hyprlofs_minor = (hyprlofs_minor + 1) & L_MAXMIN32;
+ hm->hlm_dev = makedevice(hyprlofs_major, hyprlofs_minor);
+ } while (vfs_devismounted(hm->hlm_dev));
+ mutex_exit(&hyprlofs_minor_lock);
+
+ /*
+ * Set but don't bother entering the mutex since hlfsmount is not on
+ * the mount list yet.
+ */
+ mutex_init(&hm->hlm_contents, NULL, MUTEX_DEFAULT, NULL);
+
+ hm->hlm_vfsp = vfsp;
+
+ vfsp->vfs_data = (caddr_t)hm;
+ vfsp->vfs_fstype = hyprlofsfstype;
+ vfsp->vfs_dev = hm->hlm_dev;
+ vfsp->vfs_bsize = PAGESIZE;
+ vfsp->vfs_flag |= VFS_NOTRUNC;
+ vfs_make_fsid(&vfsp->vfs_fsid, hm->hlm_dev, hyprlofsfstype);
+ hm->hlm_mntpath = kmem_zalloc(dpn.pn_pathlen + 1, KM_SLEEP);
+ (void) strcpy(hm->hlm_mntpath, dpn.pn_path);
+
+ /* allocate and initialize root hlnode structure */
+ bzero(&rattr, sizeof (vattr_t));
+ rattr.va_mode = (mode_t)(S_IFDIR | 0777);
+ rattr.va_type = VDIR;
+ rattr.va_rdev = 0;
+ hp = kmem_zalloc(sizeof (hlnode_t), KM_SLEEP);
+ hyprlofs_node_init(hm, hp, &rattr, cr);
+
+ /* Get the mode, uid, and gid from the underlying mount point. */
+ rattr.va_mask = AT_MODE|AT_UID|AT_GID;
+ got_attrs = VOP_GETATTR(mvp, &rattr, 0, cr, NULL);
+
+ rw_enter(&hp->hln_rwlock, RW_WRITER);
+ HLNTOV(hp)->v_flag |= VROOT;
+
+ /*
+ * If the getattr succeeded, use its results, otherwise allow the
+ * previously set defaults to prevail.
+ */
+ if (got_attrs == 0) {
+ hp->hln_mode = rattr.va_mode;
+ hp->hln_uid = rattr.va_uid;
+ hp->hln_gid = rattr.va_gid;
+ }
+
+ /*
+ * Initialize linked list of hlnodes so that the back pointer of the
+ * root hlnode always points to the last one on the list and the
+ * forward pointer of the last node is null
+ */
+ hp->hln_back = hp;
+ hp->hln_forw = NULL;
+ hp->hln_nlink = 0;
+ hm->hlm_rootnode = hp;
+
+ hyprlofs_dirinit(hp, hp);
+
+ rw_exit(&hp->hln_rwlock);
+
+ pn_free(&dpn);
+ error = 0;
+
+out:
+ return (error);
+}
+
+static int
+hyprlofs_unmount(vfs_t *vfsp, int flag, cred_t *cr)
+{
+ hlfsmount_t *hm = (hlfsmount_t *)VFSTOHLM(vfsp);
+ hlnode_t *hnp, *cancel;
+ vnode_t *vp;
+ int error;
+
+ if ((error = secpolicy_fs_unmount(cr, vfsp)) != 0)
+ return (error);
+ if (secpolicy_hyprlofs_control(cr) != 0)
+ return (EPERM);
+
+ /*
+ * forced unmount is not supported by this file system
+ * and thus, ENOTSUP, is being returned.
+ */
+ if (flag & MS_FORCE)
+ return (ENOTSUP);
+
+ mutex_enter(&hm->hlm_contents);
+
+ /*
+ * If there are no open files, only the root node should have a ref cnt.
+ * With hlm_contents held, nothing can be added or removed. There may
+ * be some dirty pages. To prevent fsflush from disrupting the unmount,
+ * put a hold on each node while scanning. If we find a previously
+ * referenced node, undo the holds we have placed and fail EBUSY.
+ */
+ hnp = hm->hlm_rootnode;
+ if (HLNTOV(hnp)->v_count > 1) {
+ mutex_exit(&hm->hlm_contents);
+ return (EBUSY);
+ }
+
+ for (hnp = hnp->hln_forw; hnp; hnp = hnp->hln_forw) {
+ if ((vp = HLNTOV(hnp))->v_count > 0) {
+ cancel = hm->hlm_rootnode->hln_forw;
+ while (cancel != hnp) {
+ vp = HLNTOV(cancel);
+ ASSERT(vp->v_count > 0);
+ VN_RELE(vp);
+ cancel = cancel->hln_forw;
+ }
+ mutex_exit(&hm->hlm_contents);
+ return (EBUSY);
+ }
+ VN_HOLD(vp);
+ }
+
+ /* We can drop the mutex now because no one can find this mount */
+ mutex_exit(&hm->hlm_contents);
+
+ /*
+ * Free all alloc'd memory associated with this FS. To do this, we go
+ * through the file list twice, once to remove all the dir entries, and
+ * then to remove all the files.
+ */
+
+ /* Remove all directory entries */
+ for (hnp = hm->hlm_rootnode; hnp; hnp = hnp->hln_forw) {
+ rw_enter(&hnp->hln_rwlock, RW_WRITER);
+ if (hnp->hln_type == VDIR)
+ hyprlofs_dirtrunc(hnp);
+ rw_exit(&hnp->hln_rwlock);
+ }
+
+ ASSERT(hm->hlm_rootnode);
+
+ /*
+ * All links are gone, v_count is keeping nodes in place. VN_RELE
+ * should make the node disappear, unless somebody is holding pages
+ * against it. Wait and retry until it disappears.
+ *
+ * We re-acquire the lock to prevent others who have a HOLD on a hlnode
+ * from blowing it away (in hyprlofs_inactive) while we're trying to
+ * get to it here. Once we have a HOLD on it we know it'll stick around.
+ */
+ mutex_enter(&hm->hlm_contents);
+
+ /* Remove all the files (except the rootnode) backwards. */
+ while ((hnp = hm->hlm_rootnode->hln_back) != hm->hlm_rootnode) {
+ mutex_exit(&hm->hlm_contents);
+ /* Note we handled the link count in pass 2 above. */
+ vp = HLNTOV(hnp);
+ VN_RELE(vp);
+ mutex_enter(&hm->hlm_contents);
+ /*
+ * It's still there after the RELE. Someone else like pageout
+ * has a hold on it so wait a bit and then try again.
+ */
+ if (hnp == hm->hlm_rootnode->hln_back) {
+ VN_HOLD(vp);
+ mutex_exit(&hm->hlm_contents);
+ delay(hz / 4);
+ mutex_enter(&hm->hlm_contents);
+ }
+ }
+ mutex_exit(&hm->hlm_contents);
+
+ VN_RELE(HLNTOV(hm->hlm_rootnode));
+
+ ASSERT(hm->hlm_mntpath);
+
+ kmem_free(hm->hlm_mntpath, strlen(hm->hlm_mntpath) + 1);
+
+ mutex_destroy(&hm->hlm_contents);
+ kmem_free(hm, sizeof (hlfsmount_t));
+
+ return (0);
+}
+
+/* Return root hlnode for given vnode */
+static int
+hyprlofs_root(vfs_t *vfsp, vnode_t **vpp)
+{
+ hlfsmount_t *hm = (hlfsmount_t *)VFSTOHLM(vfsp);
+ hlnode_t *hp = hm->hlm_rootnode;
+ vnode_t *vp;
+
+ ASSERT(hp);
+
+ vp = HLNTOV(hp);
+ VN_HOLD(vp);
+ *vpp = vp;
+ return (0);
+}
+
+static int
+hyprlofs_statvfs(vfs_t *vfsp, struct statvfs64 *sbp)
+{
+ hlfsmount_t *hm = (hlfsmount_t *)VFSTOHLM(vfsp);
+ ulong_t blocks;
+ dev32_t d32;
+ zoneid_t eff_zid;
+ struct zone *zp;
+
+ /*
+ * The FS may have been mounted by the GZ on behalf of the NGZ. In
+ * that case, the hlfsmount zone_id will be the global zone. We want
+ * to show the swap cap inside the zone in this case, even though the
+ * FS was mounted by the GZ.
+ */
+ if (curproc->p_zone->zone_id != GLOBAL_ZONEUNIQID)
+ zp = curproc->p_zone;
+ else
+ zp = hm->hlm_vfsp->vfs_zone;
+
+ if (zp == NULL)
+ eff_zid = GLOBAL_ZONEUNIQID;
+ else
+ eff_zid = zp->zone_id;
+
+ sbp->f_bsize = PAGESIZE;
+ sbp->f_frsize = PAGESIZE;
+
+ /*
+ * Find the amount of available physical and memory swap
+ */
+ mutex_enter(&anoninfo_lock);
+ ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv);
+ blocks = (ulong_t)CURRENT_TOTAL_AVAILABLE_SWAP;
+ mutex_exit(&anoninfo_lock);
+
+ if (blocks > hyprlofs_minfree)
+ sbp->f_bfree = blocks - hyprlofs_minfree;
+ else
+ sbp->f_bfree = 0;
+
+ sbp->f_bavail = sbp->f_bfree;
+
+ /*
+ * Total number of blocks is what's available plus what's been used
+ */
+ sbp->f_blocks = (fsblkcnt64_t)(sbp->f_bfree);
+
+ if (eff_zid != GLOBAL_ZONEUNIQID &&
+ zp->zone_max_swap_ctl != UINT64_MAX) {
+ /*
+ * If the fs is used by a NGZ with a swap cap, then report the
+ * capped size.
+ */
+ rctl_qty_t cap, used;
+ pgcnt_t pgcap, pgused;
+
+ mutex_enter(&zp->zone_mem_lock);
+ cap = zp->zone_max_swap_ctl;
+ used = zp->zone_max_swap;
+ mutex_exit(&zp->zone_mem_lock);
+
+ pgcap = btop(cap);
+ pgused = btop(used);
+
+ sbp->f_bfree = MIN(pgcap - pgused, sbp->f_bfree);
+ sbp->f_bavail = sbp->f_bfree;
+ sbp->f_blocks = MIN(pgcap, sbp->f_blocks);
+ }
+
+ /*
+ * This is fairly inaccurate since it doesn't take into account the
+ * names stored in the directory entries.
+ */
+ sbp->f_ffree = sbp->f_files = ptob(availrmem) /
+ (sizeof (hlnode_t) + sizeof (hldirent_t));
+
+ sbp->f_favail = (fsfilcnt64_t)(sbp->f_ffree);
+ (void) cmpldev(&d32, vfsp->vfs_dev);
+ sbp->f_fsid = d32;
+ (void) strcpy(sbp->f_basetype, vfssw[hyprlofsfstype].vsw_name);
+ (void) strncpy(sbp->f_fstr, hm->hlm_mntpath, sizeof (sbp->f_fstr));
+ /*
+ * ensure null termination
+ */
+ sbp->f_fstr[sizeof (sbp->f_fstr) - 1] = '\0';
+ sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
+ sbp->f_namemax = MAXNAMELEN - 1;
+ return (0);
+}
+
+static int
+hyprlofs_vget(vfs_t *vfsp, vnode_t **vpp, struct fid *fidp)
+{
+ hlfid_t *hfid;
+ hlfsmount_t *hm = (hlfsmount_t *)VFSTOHLM(vfsp);
+ hlnode_t *hp = NULL;
+
+ hfid = (hlfid_t *)fidp;
+ *vpp = NULL;
+
+ mutex_enter(&hm->hlm_contents);
+ for (hp = hm->hlm_rootnode; hp; hp = hp->hln_forw) {
+ mutex_enter(&hp->hln_tlock);
+ if (hp->hln_nodeid == hfid->hlfid_ino) {
+ /*
+ * If the gen numbers don't match we know the file
+ * won't be found since only one hlnode can have this
+ * number at a time.
+ */
+ if (hp->hln_gen != hfid->hlfid_gen ||
+ hp->hln_nlink == 0) {
+ mutex_exit(&hp->hln_tlock);
+ mutex_exit(&hm->hlm_contents);
+ return (0);
+ }
+ *vpp = (vnode_t *)HLNTOV(hp);
+
+ VN_HOLD(*vpp);
+
+ if ((hp->hln_mode & S_ISVTX) &&
+ !(hp->hln_mode & (S_IXUSR | S_IFDIR))) {
+ mutex_enter(&(*vpp)->v_lock);
+ (*vpp)->v_flag |= VISSWAP;
+ mutex_exit(&(*vpp)->v_lock);
+ }
+ mutex_exit(&hp->hln_tlock);
+ mutex_exit(&hm->hlm_contents);
+ return (0);
+ }
+ mutex_exit(&hp->hln_tlock);
+ }
+ mutex_exit(&hm->hlm_contents);
+ return (0);
+}
diff --git a/usr/src/uts/common/fs/hyprlofs/hyprlofs_vnops.c b/usr/src/uts/common/fs/hyprlofs/hyprlofs_vnops.c
new file mode 100644
index 0000000000..a2064dfa1f
--- /dev/null
+++ b/usr/src/uts/common/fs/hyprlofs/hyprlofs_vnops.c
@@ -0,0 +1,1441 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/t_lock.h>
+#include <sys/systm.h>
+#include <sys/sysmacros.h>
+#include <sys/user.h>
+#include <sys/time.h>
+#include <sys/vfs.h>
+#include <sys/vfs_opreg.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/fcntl.h>
+#include <sys/flock.h>
+#include <sys/kmem.h>
+#include <sys/errno.h>
+#include <sys/stat.h>
+#include <sys/cred.h>
+#include <sys/dirent.h>
+#include <sys/pathname.h>
+#include <sys/fs/hyprlofs.h>
+#include <sys/fs/hyprlofs_info.h>
+#include <sys/mman.h>
+#include <vm/pvn.h>
+#include <sys/cmn_err.h>
+#include <sys/buf.h>
+#include <sys/policy.h>
+#include <fs/fs_subr.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+
+static int hyprlofs_add_entry(vnode_t *, char *, char *, cred_t *,
+ caller_context_t *);
+static int hyprlofs_rm_entry(vnode_t *, char *, cred_t *, caller_context_t *,
+ int);
+static int hyprlofs_rm_all(vnode_t *, cred_t *, caller_context_t *, int);
+static int hyprlofs_remove(vnode_t *, char *, cred_t *, caller_context_t *,
+ int);
+static int hyprlofs_get_all(vnode_t *, intptr_t, cred_t *, caller_context_t *,
+ int);
+
+/*
+ * This is a somewhat arbitrary upper limit on the number of entries we can
+ * pass in on a single add/rm ioctl call. This is only used to validate that
+ * the input list looks sane.
+ */
+#define MAX_IOCTL_PARAMS 100000
+
+static int
+hyprlofs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
+{
+ vnode_t *rvp;
+ int error;
+
+ rvp = REALVP(*vpp);
+
+ if (VTOHLN(*vpp)->hln_looped == 0)
+ return (0);
+
+ /*
+ * looped back, pass through to real vnode. Need to hold new reference
+ * to vp since VOP_OPEN() may decide to release it.
+ */
+ VN_HOLD(rvp);
+ error = VOP_OPEN(&rvp, flag, cr, ct);
+ ASSERT(rvp->v_count > 1);
+ VN_RELE(rvp);
+
+ return (error);
+}
+
+static int
+hyprlofs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
+ caller_context_t *ct)
+{
+ if (VTOHLN(vp)->hln_looped == 0) {
+ cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
+ cleanshares(vp, ttoproc(curthread)->p_pid);
+ return (0);
+ }
+
+ return (VOP_CLOSE(REALVP(vp), flag, count, offset, cr, ct));
+}
+
+static int
+hyprlofs_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
+ caller_context_t *ct)
+{
+ if (vp->v_type == VDIR)
+ return (EISDIR);
+ return (VOP_READ(REALVP(vp), uiop, ioflag, cr, ct));
+}
+
+static int
+hyprlofs_write(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
+ caller_context_t *ct)
+{
+ /* We don't support writing to non-regular files */
+ if (vp->v_type != VREG)
+ return (EINVAL);
+
+ if (vn_is_readonly(vp))
+ return (EROFS);
+
+ return (VOP_WRITE(REALVP(vp), uiop, ioflag, cr, ct));
+}
+
+/* ARGSUSED */
+static int
+hyprlofs_ioctl(vnode_t *vp, int cmd, intptr_t data, int flag,
+ cred_t *cr, int *rvalp, caller_context_t *ct)
+{
+ int len, cnt, error;
+ int i;
+ model_t model;
+ char path[MAXPATHLEN];
+ char nm[MAXPATHLEN];
+
+ /* We only support the hyprlofs ioctls on the root vnode */
+ if (!(vp->v_flag & VROOT))
+ return (ENOTTY);
+
+ /*
+ * Check if managing hyprlofs is allowed.
+ */
+ if (secpolicy_hyprlofs_control(cr) != 0)
+ return (EPERM);
+
+ if (cmd == HYPRLOFS_ADD_ENTRIES || cmd == HYPRLOFS_RM_ENTRIES) {
+ model = get_udatamodel();
+
+ if (model == DATAMODEL_NATIVE) {
+ hyprlofs_entries_t ebuf;
+ hyprlofs_entry_t *e;
+
+ if (copyin((void *)data, &ebuf, sizeof (ebuf)))
+ return (EFAULT);
+ cnt = ebuf.hle_len;
+ if (cnt > MAX_IOCTL_PARAMS)
+ return (EINVAL);
+ len = sizeof (hyprlofs_entry_t) * cnt;
+
+ e = kmem_alloc(len, KM_SLEEP);
+ if (copyin((void *)(ebuf.hle_entries), e, len)) {
+ kmem_free(e, len);
+ return (EFAULT);
+ }
+
+ for (i = 0; i < cnt; i++) {
+ if (e[i].hle_nlen == 0 ||
+ e[i].hle_nlen > MAXPATHLEN)
+ return (EINVAL);
+
+ if (copyin(e[i].hle_name, nm, e[i].hle_nlen)
+ != 0) {
+ kmem_free(e, len);
+ return (EFAULT);
+ }
+ nm[e[i].hle_nlen] = '\0';
+
+ if (cmd == HYPRLOFS_ADD_ENTRIES) {
+ if (e[i].hle_plen == 0 ||
+ e[i].hle_plen > MAXPATHLEN)
+ return (EINVAL);
+
+ if (copyin(e[i].hle_path, path,
+ e[i].hle_plen) != 0) {
+ kmem_free(e, len);
+ return (EFAULT);
+ }
+ path[e[i].hle_plen] = '\0';
+
+ if ((error = hyprlofs_add_entry(vp,
+ path, nm, cr, ct)) != 0) {
+ kmem_free(e, len);
+ return (error);
+ }
+ } else {
+ if ((error = hyprlofs_rm_entry(vp, nm,
+ cr, ct, flag)) != 0) {
+ kmem_free(e, len);
+ return (error);
+ }
+ }
+ }
+
+ kmem_free(e, len);
+ return (0);
+
+ } else {
+ hyprlofs_entries32_t ebuf32;
+ hyprlofs_entry32_t *e32;
+
+ if (copyin((void *)data, &ebuf32, sizeof (ebuf32)))
+ return (EFAULT);
+
+ cnt = ebuf32.hle_len;
+ if (cnt > MAX_IOCTL_PARAMS)
+ return (EINVAL);
+ len = sizeof (hyprlofs_entry32_t) * cnt;
+
+ e32 = kmem_alloc(len, KM_SLEEP);
+ if (copyin((void *)(unsigned long)(ebuf32.hle_entries),
+ e32, len)) {
+ kmem_free(e32, len);
+ return (EFAULT);
+ }
+
+ for (i = 0; i < cnt; i++) {
+ if (e32[i].hle_nlen == 0 ||
+ e32[i].hle_nlen > MAXPATHLEN)
+ return (EINVAL);
+
+ if (copyin((void *)(unsigned long)
+ e32[i].hle_name, nm,
+ e32[i].hle_nlen) != 0) {
+ kmem_free(e32, len);
+ return (EFAULT);
+ }
+ nm[e32[i].hle_nlen] = '\0';
+
+ if (cmd == HYPRLOFS_ADD_ENTRIES) {
+ if (e32[i].hle_plen == 0 ||
+ e32[i].hle_plen > MAXPATHLEN)
+ return (EINVAL);
+
+ if (copyin((void *)(unsigned long)
+ e32[i].hle_path, path,
+ e32[i].hle_plen) != 0) {
+ kmem_free(e32, len);
+ return (EFAULT);
+ }
+ path[e32[i].hle_plen] = '\0';
+
+ if ((error = hyprlofs_add_entry(vp,
+ path, nm, cr, ct)) != 0) {
+ kmem_free(e32, len);
+ return (error);
+ }
+ } else {
+ if ((error = hyprlofs_rm_entry(vp, nm,
+ cr, ct, flag)) != 0) {
+ kmem_free(e32, len);
+ return (error);
+ }
+ }
+ }
+
+ kmem_free(e32, len);
+ return (0);
+ }
+ }
+
+ if (cmd == HYPRLOFS_RM_ALL) {
+ return (hyprlofs_rm_all(vp, cr, ct, flag));
+ }
+
+ if (cmd == HYPRLOFS_GET_ENTRIES) {
+ return (hyprlofs_get_all(vp, data, cr, ct, flag));
+ }
+
+ return (ENOTTY);
+}
+
+static int
+hyprlofs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ hlnode_t *tp = (hlnode_t *)VTOHLN(vp);
+ vattr_t tmp_va;
+
+ if (tp->hln_looped == 1) {
+ int error;
+
+ if ((error = VOP_GETATTR(REALVP(vp), &tmp_va, flags, cr,
+ ct)) != 0)
+ return (error);
+ }
+
+ mutex_enter(&tp->hln_tlock);
+ vap->va_type = vp->v_type;
+ vap->va_mode = tp->hln_mode & MODEMASK;
+ vap->va_uid = tp->hln_uid;
+ vap->va_gid = tp->hln_gid;
+ vap->va_fsid = tp->hln_fsid;
+ vap->va_nodeid = (ino64_t)tp->hln_nodeid;
+ vap->va_nlink = tp->hln_nlink;
+ vap->va_size = (u_offset_t)tp->hln_size;
+ vap->va_atime = tp->hln_atime;
+ vap->va_mtime = tp->hln_mtime;
+ vap->va_ctime = tp->hln_ctime;
+ vap->va_blksize = PAGESIZE;
+ vap->va_rdev = tp->hln_rdev;
+ vap->va_seq = tp->hln_seq;
+
+ if (tp->hln_looped == 1) {
+ vap->va_nblocks = tmp_va.va_nblocks;
+ } else {
+ vap->va_nblocks =
+ (fsblkcnt64_t)btodb(ptob(btopr(vap->va_size)));
+ }
+ mutex_exit(&tp->hln_tlock);
+ return (0);
+}
+
+/*ARGSUSED4*/
+static int
+hyprlofs_setattr(vnode_t *vp, vattr_t *vap, int flags,
+ cred_t *cr, caller_context_t *ct)
+{
+ hlnode_t *tp = (hlnode_t *)VTOHLN(vp);
+ int error = 0;
+ vattr_t *get;
+ long mask;
+
+ /*
+ * Cannot set these attributes
+ */
+ if ((vap->va_mask & AT_NOSET) || (vap->va_mask & AT_XVATTR))
+ return (EINVAL);
+
+ mutex_enter(&tp->hln_tlock);
+
+ get = &tp->hln_attr;
+ /*
+ * Change file access modes. Must be owner or have sufficient
+ * privileges.
+ */
+ error = secpolicy_vnode_setattr(cr, vp, vap, get, flags,
+ hyprlofs_taccess, tp);
+
+ if (error)
+ goto out;
+
+ mask = vap->va_mask;
+
+ if (mask & AT_MODE) {
+ get->va_mode &= S_IFMT;
+ get->va_mode |= vap->va_mode & ~S_IFMT;
+ }
+
+ if (mask & AT_UID)
+ get->va_uid = vap->va_uid;
+ if (mask & AT_GID)
+ get->va_gid = vap->va_gid;
+ if (mask & AT_ATIME)
+ get->va_atime = vap->va_atime;
+ if (mask & AT_MTIME)
+ get->va_mtime = vap->va_mtime;
+
+ if (mask & (AT_UID | AT_GID | AT_MODE | AT_MTIME))
+ gethrestime(&tp->hln_ctime);
+
+out:
+ mutex_exit(&tp->hln_tlock);
+ return (error);
+}
+
+static int
+hyprlofs_access(vnode_t *vp, int mode, int flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ hlnode_t *tp = (hlnode_t *)VTOHLN(vp);
+ int error;
+
+ if (mode & VWRITE) {
+ if (vp->v_type == VREG && vn_is_readonly(vp))
+ return (EROFS);
+ }
+ if (VTOHLN(vp)->hln_looped == 1)
+ return (VOP_ACCESS(REALVP(vp), mode, flags, cr, ct));
+
+ mutex_enter(&tp->hln_tlock);
+ error = hyprlofs_taccess(tp, mode, cr);
+ mutex_exit(&tp->hln_tlock);
+ return (error);
+}
+
+/* ARGSUSED3 */
+static int
+hyprlofs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
+ int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
+ int *direntflags, pathname_t *realpnp)
+{
+ hlnode_t *tp = (hlnode_t *)VTOHLN(dvp);
+ hlnode_t *ntp = NULL;
+ int error;
+
+ if (VTOHLN(dvp)->hln_looped == 1)
+ return (VOP_LOOKUP(REALVP(dvp), nm, vpp, pnp, flags, rdir,
+ cr, ct, direntflags, realpnp));
+
+ if (flags & LOOKUP_XATTR)
+ return (EINVAL);
+
+ /* Null component name is a synonym for directory being searched. */
+ if (*nm == '\0') {
+ VN_HOLD(dvp);
+ *vpp = dvp;
+ return (0);
+ }
+ ASSERT(tp);
+
+ if ((error = hyprlofs_dirlookup(tp, nm, &ntp, cr)) == 0) {
+ ASSERT(ntp);
+ *vpp = HLNTOV(ntp);
+ }
+ return (error);
+}
+
+/*
+ * Create the loopback from the hyprlofs vnode to the real vnode.
+ */
+static int
+hyprlofs_loopback(vnode_t *dvp, vnode_t *rvp, char *nm, vattr_t *vap,
+ int mode, cred_t *cr, caller_context_t *ct)
+{
+ hlnode_t *parent;
+ hlfsmount_t *tm;
+ int error;
+ hlnode_t *oldtp;
+ vnode_t *vp;
+
+ parent = (hlnode_t *)VTOHLN(dvp);
+ tm = (hlfsmount_t *)VTOHLM(dvp);
+ error = 0;
+ oldtp = NULL;
+
+ if (vap->va_type == VREG && (vap->va_mode & VSVTX)) {
+ /* we don't support the sticky bit */
+ vap->va_mode &= ~VSVTX;
+ } else if (vap->va_type == VNON) {
+ return (EINVAL);
+ }
+
+ /* Null component name is a synonym for directory being searched. */
+ if (*nm == '\0') {
+ VN_HOLD(dvp);
+ oldtp = parent;
+ } else {
+ error = hyprlofs_dirlookup(parent, nm, &oldtp, cr);
+ }
+
+ if (error == 0) { /* name found */
+ ASSERT(oldtp);
+
+ rw_enter(&oldtp->hln_rwlock, RW_WRITER);
+
+ /*
+ * if create/read-only an existing directory, allow it
+ */
+ if ((oldtp->hln_type == VDIR) && (mode & VWRITE))
+ error = EISDIR;
+ else {
+ error = hyprlofs_taccess(oldtp, mode, cr);
+ }
+
+ if (error) {
+ rw_exit(&oldtp->hln_rwlock);
+ hlnode_rele(oldtp);
+ return (error);
+ }
+
+ vp = HLNTOV(oldtp);
+ rw_exit(&oldtp->hln_rwlock);
+
+ if (vp->v_type == VREG) {
+ hlnode_rele(oldtp);
+ return (EEXIST);
+ }
+
+ vnevent_create(vp, ct);
+ return (0);
+ }
+
+ if (error != ENOENT)
+ return (error);
+
+ rw_enter(&parent->hln_rwlock, RW_WRITER);
+ error = hyprlofs_direnter(tm, parent, nm, DE_CREATE, rvp, vap, NULL,
+ cr);
+ rw_exit(&parent->hln_rwlock);
+
+ return (error);
+}
+
+/*
+ * Create an in-memory directory based on the add-entry ioctl name.
+ * If the dir exists, return EEXIST but still also return node in vpp.
+ */
+static int
+hyprlofs_mkdir(vnode_t *dvp, char *nm, vattr_t *va, vnode_t **vpp, cred_t *cr)
+{
+ hlnode_t *parent = (hlnode_t *)VTOHLN(dvp);
+ hlnode_t *self = NULL;
+ hlfsmount_t *tm = (hlfsmount_t *)VTOHLM(dvp);
+ int error;
+
+ /*
+ * Might be dangling directory. Catch it here, because a ENOENT return
+ * from hyprlofs_dirlookup() is a valid return.
+ */
+ if (parent->hln_nlink == 0)
+ return (ENOENT);
+
+ error = hyprlofs_dirlookup(parent, nm, &self, cr);
+ if (error == 0) {
+ ASSERT(self);
+ hlnode_rele(self);
+ /* We can't loop in under a looped in directory */
+ if (self->hln_looped)
+ return (EACCES);
+ *vpp = HLNTOV(self);
+ return (EEXIST);
+ }
+ if (error != ENOENT)
+ return (error);
+
+ rw_enter(&parent->hln_rwlock, RW_WRITER);
+ error = hyprlofs_direnter(tm, parent, nm, DE_MKDIR, (vnode_t *)NULL,
+ va, &self, cr);
+ rw_exit(&parent->hln_rwlock);
+
+ if (error == 0 || error == EEXIST) {
+ hlnode_rele(self);
+ *vpp = HLNTOV(self);
+ }
+
+ return (error);
+}
+
+/*
+ * Loop in a file or directory into the namespace.
+ */
+static int
+hyprlofs_add_entry(vnode_t *vp, char *fspath, char *fsname,
+ cred_t *cr, caller_context_t *ct)
+{
+ int error;
+ char *p, *pnm;
+ vnode_t *realvp, *dvp;
+ vattr_t va;
+
+ /*
+ * Get vnode for the real file/dir. We'll have a hold on realvp which
+ * we won't vn_rele until hyprlofs_inactive.
+ */
+ if ((error = lookupname(fspath, UIO_SYSSPACE, FOLLOW, NULLVPP,
+ &realvp)) != 0)
+ return (error);
+
+ /* no devices allowed */
+ if (IS_DEVVP(realvp)) {
+ VN_RELE(realvp);
+ return (ENODEV);
+ }
+
+ /*
+ * realvp may be an AUTOFS node, in which case we perform a VOP_ACCESS
+ * to trigger the mount of the intended filesystem. This causes a
+ * loopback mount of the intended filesystem instead of the AUTOFS
+ * filesystem.
+ */
+ if ((error = VOP_ACCESS(realvp, 0, 0, cr, NULL)) != 0) {
+ VN_RELE(realvp);
+ return (error);
+ }
+
+ /*
+ * We're interested in the top most filesystem. This is specially
+ * important when fspath is a trigger AUTOFS node, since we're really
+ * interested in mounting the filesystem AUTOFS mounted as result of
+ * the VOP_ACCESS() call not the AUTOFS node itself.
+ */
+ if (vn_mountedvfs(realvp) != NULL) {
+ if ((error = traverse(&realvp)) != 0) {
+ VN_RELE(realvp);
+ return (error);
+ }
+ }
+
+ va.va_type = VNON;
+ /*
+ * If the target name is a path, make sure we have all of the
+ * intermediate directories, creating them if necessary.
+ */
+ dvp = vp;
+ pnm = p = fsname;
+
+ /* path cannot be absolute */
+ if (*p == '/') {
+ VN_RELE(realvp);
+ return (EINVAL);
+ }
+
+ for (p = strchr(pnm, '/'); p != NULL; p = strchr(pnm, '/')) {
+ if (va.va_type == VNON)
+ /* use the top-level dir as the template va for mkdir */
+ if ((error = VOP_GETATTR(vp, &va, 0, cr, NULL)) != 0) {
+ VN_RELE(realvp);
+ return (error);
+ }
+
+ *p = '\0';
+
+ /* Path component cannot be empty or relative */
+ if (pnm[0] == '\0' ||
+ (pnm[0] == '.' && pnm[1] == '.' && pnm[2] == '\0')) {
+ VN_RELE(realvp);
+ return (EINVAL);
+ }
+
+ if ((error = hyprlofs_mkdir(dvp, pnm, &va, &dvp, cr)) != 0 &&
+ error != EEXIST) {
+ VN_RELE(realvp);
+ return (error);
+ }
+
+ *p = '/';
+ pnm = p + 1;
+ }
+
+ /* The file name is required */
+ if (pnm[0] == '\0') {
+ VN_RELE(realvp);
+ return (EINVAL);
+ }
+
+ /* Now use the real file's va as the template va */
+ if ((error = VOP_GETATTR(realvp, &va, 0, cr, NULL)) != 0) {
+ VN_RELE(realvp);
+ return (error);
+ }
+
+ /* Make the vnode */
+ error = hyprlofs_loopback(dvp, realvp, pnm, &va, va.va_mode, cr, ct);
+ if (error != 0)
+ VN_RELE(realvp);
+ return (error);
+}
+
+/*
+ * Remove a looped in file from the namespace.
+ */
+static int
+hyprlofs_rm_entry(vnode_t *dvp, char *fsname, cred_t *cr, caller_context_t *ct,
+ int flags)
+{
+ int error;
+ char *p, *pnm;
+ hlnode_t *parent;
+ hlnode_t *fndtp;
+
+ pnm = p = fsname;
+
+ /* path cannot be absolute */
+ if (*p == '/')
+ return (EINVAL);
+
+ /*
+ * If the target name is a path, get the containing dir and simple
+ * file name.
+ */
+ parent = (hlnode_t *)VTOHLN(dvp);
+ for (p = strchr(pnm, '/'); p != NULL; p = strchr(pnm, '/')) {
+ *p = '\0';
+
+ /* Path component cannot be empty or relative */
+ if (pnm[0] == '\0' ||
+ (pnm[0] == '.' && pnm[1] == '.' && pnm[2] == '\0'))
+ return (EINVAL);
+
+ if ((error = hyprlofs_dirlookup(parent, pnm, &fndtp, cr)) != 0)
+ return (error);
+
+ dvp = HLNTOV(fndtp);
+ parent = fndtp;
+ pnm = p + 1;
+ }
+
+ /* The file name is required */
+ if (pnm[0] == '\0')
+ return (EINVAL);
+
+ /* Remove the entry from the parent dir */
+ return (hyprlofs_remove(dvp, pnm, cr, ct, flags));
+}
+
+/*
+ * Remove all looped in files from the namespace.
+ */
+static int
+hyprlofs_rm_all(vnode_t *dvp, cred_t *cr, caller_context_t *ct,
+ int flags)
+{
+ int error = 0;
+ hlnode_t *hp = (hlnode_t *)VTOHLN(dvp);
+ hldirent_t *hdp;
+
+ hlnode_hold(hp);
+
+ /*
+ * There's a window here where someone could have removed
+ * all the entries in the directory after we put a hold on the
+ * vnode but before we grabbed the rwlock. Just return.
+ */
+ if (hp->hln_dir == NULL) {
+ if (hp->hln_nlink) {
+ panic("empty directory 0x%p", (void *)hp);
+ /*NOTREACHED*/
+ }
+ goto done;
+ }
+
+ hdp = hp->hln_dir;
+ while (hdp) {
+ hlnode_t *fndhp;
+
+ if (strcmp(hdp->hld_name, ".") == 0 ||
+ strcmp(hdp->hld_name, "..") == 0) {
+ hdp = hdp->hld_next;
+ continue;
+ }
+
+ /* This holds the fndhp vnode */
+ error = hyprlofs_dirlookup(hp, hdp->hld_name, &fndhp, cr);
+ if (error != 0)
+ goto done;
+ hlnode_rele(fndhp);
+
+ if (fndhp->hln_looped == 0) {
+ /* recursively remove contents of this subdir */
+ if (fndhp->hln_type == VDIR) {
+ vnode_t *tvp = HLNTOV(fndhp);
+
+ error = hyprlofs_rm_all(tvp, cr, ct, flags);
+ if (error != 0)
+ goto done;
+ }
+ }
+
+ /* remove the entry */
+ error = hyprlofs_remove(dvp, hdp->hld_name, cr, ct, flags);
+ if (error != 0)
+ goto done;
+
+ hdp = hp->hln_dir;
+ }
+
+done:
+ hlnode_rele(hp);
+ return (error);
+}
+
+/*
+ * Get a list of all looped in files in the namespace.
+ */
+static int
+hyprlofs_get_all_entries(vnode_t *dvp, hyprlofs_curr_entry_t *hcp,
+ char *prefix, int *pcnt, int n_max,
+ cred_t *cr, caller_context_t *ct, int flags)
+{
+ int error = 0;
+ int too_big = 0;
+ int cnt;
+ int len;
+ hlnode_t *hp = (hlnode_t *)VTOHLN(dvp);
+ hldirent_t *hdp;
+ char *path;
+
+ cnt = *pcnt;
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ hlnode_hold(hp);
+
+ /*
+ * There's a window here where someone could have removed
+ * all the entries in the directory after we put a hold on the
+ * vnode but before we grabbed the rwlock. Just return.
+ */
+ if (hp->hln_dir == NULL) {
+ if (hp->hln_nlink) {
+ panic("empty directory 0x%p", (void *)hp);
+ /*NOTREACHED*/
+ }
+ goto done;
+ }
+
+ hdp = hp->hln_dir;
+ while (hdp) {
+ hlnode_t *fndhp;
+ vnode_t *tvp;
+
+ if (strcmp(hdp->hld_name, ".") == 0 ||
+ strcmp(hdp->hld_name, "..") == 0) {
+ hdp = hdp->hld_next;
+ continue;
+ }
+
+ /* This holds the fndhp vnode */
+ error = hyprlofs_dirlookup(hp, hdp->hld_name, &fndhp, cr);
+ if (error != 0)
+ goto done;
+ hlnode_rele(fndhp);
+
+ if (fndhp->hln_looped == 0) {
+ /* recursively get contents of this subdir */
+ VERIFY(fndhp->hln_type == VDIR);
+ tvp = HLNTOV(fndhp);
+
+ if (*prefix == '\0')
+ (void) strlcpy(path, hdp->hld_name, MAXPATHLEN);
+ else
+ (void) snprintf(path, MAXPATHLEN, "%s/%s",
+ prefix, hdp->hld_name);
+
+ error = hyprlofs_get_all_entries(tvp, hcp, path,
+ &cnt, n_max, cr, ct, flags);
+
+ if (error == E2BIG) {
+ too_big = 1;
+ error = 0;
+ }
+ if (error != 0)
+ goto done;
+ } else {
+ if (cnt < n_max) {
+ char *p;
+
+ if (*prefix == '\0')
+ (void) strlcpy(path, hdp->hld_name,
+ MAXPATHLEN);
+ else
+ (void) snprintf(path, MAXPATHLEN,
+ "%s/%s", prefix, hdp->hld_name);
+
+ len = strlen(path);
+ ASSERT(len <= MAXPATHLEN);
+ if (copyout(path, (void *)(hcp[cnt].hce_name),
+ len)) {
+ error = EFAULT;
+ goto done;
+ }
+
+ tvp = REALVP(HLNTOV(fndhp));
+ if (tvp->v_path == vn_vpath_empty) {
+ p = "<unknown>";
+ } else {
+ p = tvp->v_path;
+ }
+ len = strlen(p);
+ ASSERT(len <= MAXPATHLEN);
+ if (copyout(p, (void *)(hcp[cnt].hce_path),
+ len)) {
+ error = EFAULT;
+ goto done;
+ }
+ }
+
+ cnt++;
+ if (cnt > n_max)
+ too_big = 1;
+ }
+
+ hdp = hdp->hld_next;
+ }
+
+done:
+ hlnode_rele(hp);
+ kmem_free(path, MAXPATHLEN);
+
+ *pcnt = cnt;
+ if (error == 0 && too_big == 1)
+ error = E2BIG;
+
+ return (error);
+}
+
+/*
+ * Return a list of all looped in files in the namespace.
+ */
+static int
+hyprlofs_get_all(vnode_t *dvp, intptr_t data, cred_t *cr, caller_context_t *ct,
+ int flags)
+{
+ int limit, cnt, error;
+ model_t model;
+ hyprlofs_curr_entry_t *e;
+
+ model = get_udatamodel();
+
+ if (model == DATAMODEL_NATIVE) {
+ hyprlofs_curr_entries_t ebuf;
+
+ if (copyin((void *)data, &ebuf, sizeof (ebuf)))
+ return (EFAULT);
+ limit = ebuf.hce_cnt;
+ e = ebuf.hce_entries;
+ if (limit > MAX_IOCTL_PARAMS)
+ return (EINVAL);
+
+ } else {
+ hyprlofs_curr_entries32_t ebuf32;
+
+ if (copyin((void *)data, &ebuf32, sizeof (ebuf32)))
+ return (EFAULT);
+
+ limit = ebuf32.hce_cnt;
+ e = (hyprlofs_curr_entry_t *)(unsigned long)
+ (ebuf32.hce_entries);
+ if (limit > MAX_IOCTL_PARAMS)
+ return (EINVAL);
+ }
+
+ cnt = 0;
+ error = hyprlofs_get_all_entries(dvp, e, "", &cnt, limit, cr, ct,
+ flags);
+
+ if (error == 0 || error == E2BIG) {
+ if (model == DATAMODEL_NATIVE) {
+ hyprlofs_curr_entries_t ebuf;
+
+ ebuf.hce_cnt = cnt;
+ if (copyout(&ebuf, (void *)data, sizeof (ebuf)))
+ return (EFAULT);
+
+ } else {
+ hyprlofs_curr_entries32_t ebuf32;
+
+ ebuf32.hce_cnt = cnt;
+ if (copyout(&ebuf32, (void *)data, sizeof (ebuf32)))
+ return (EFAULT);
+ }
+ }
+
+ return (error);
+}
+
+/* ARGSUSED3 */
+static int
+hyprlofs_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct,
+ int flags)
+{
+ hlnode_t *parent = (hlnode_t *)VTOHLN(dvp);
+ int error;
+ hlnode_t *hp = NULL;
+
+ /* This holds the hp vnode */
+ error = hyprlofs_dirlookup(parent, nm, &hp, cr);
+ if (error)
+ return (error);
+
+ ASSERT(hp);
+ rw_enter(&parent->hln_rwlock, RW_WRITER);
+ rw_enter(&hp->hln_rwlock, RW_WRITER);
+
+ error = hyprlofs_dirdelete(parent, hp, nm, DR_REMOVE, cr);
+
+ rw_exit(&hp->hln_rwlock);
+ rw_exit(&parent->hln_rwlock);
+ vnevent_remove(HLNTOV(hp), dvp, nm, ct);
+
+ /*
+ * We've now dropped the dir link so by rele-ing our vnode we should
+ * clean up in hyprlofs_inactive.
+ */
+ hlnode_rele(hp);
+
+ return (error);
+}
+
+/* ARGSUSED4 */
+static int
+hyprlofs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
+ caller_context_t *ct, int flags)
+{
+ hlnode_t *parent = (hlnode_t *)VTOHLN(dvp);
+ hlnode_t *self = NULL;
+ vnode_t *vp;
+ int error = 0;
+
+ /* Return error if removing . or .. */
+ if (strcmp(nm, ".") == 0)
+ return (EINVAL);
+ if (strcmp(nm, "..") == 0)
+ return (EEXIST); /* Should be ENOTEMPTY */
+ error = hyprlofs_dirlookup(parent, nm, &self, cr);
+ if (error)
+ return (error);
+
+ rw_enter(&parent->hln_rwlock, RW_WRITER);
+ rw_enter(&self->hln_rwlock, RW_WRITER);
+
+ vp = HLNTOV(self);
+ if (vp == dvp || vp == cdir) {
+ error = EINVAL;
+ goto done1;
+ }
+ if (self->hln_type != VDIR) {
+ error = ENOTDIR;
+ goto done1;
+ }
+
+ /*
+ * When a dir is looped in, we only remove the in-memory dir, not the
+ * backing dir.
+ */
+ if (self->hln_looped == 0) {
+ mutex_enter(&self->hln_tlock);
+ if (self->hln_nlink > 2) {
+ mutex_exit(&self->hln_tlock);
+ error = EEXIST;
+ goto done1;
+ }
+ mutex_exit(&self->hln_tlock);
+
+ if (vn_vfswlock(vp)) {
+ error = EBUSY;
+ goto done1;
+ }
+ if (vn_mountedvfs(vp) != NULL) {
+ error = EBUSY;
+ goto done;
+ }
+
+ /*
+ * Check for an empty directory, i.e. only includes entries for
+ * "." and ".."
+ */
+ if (self->hln_dirents > 2) {
+ error = EEXIST; /* SIGH should be ENOTEMPTY */
+ /*
+ * Update atime because checking hln_dirents is
+ * equivalent to reading the directory
+ */
+ gethrestime(&self->hln_atime);
+ goto done;
+ }
+
+ error = hyprlofs_dirdelete(parent, self, nm, DR_RMDIR, cr);
+ } else {
+ error = hyprlofs_dirdelete(parent, self, nm, DR_REMOVE, cr);
+ }
+
+done:
+ if (self->hln_looped == 0)
+ vn_vfsunlock(vp);
+done1:
+ rw_exit(&self->hln_rwlock);
+ rw_exit(&parent->hln_rwlock);
+ vnevent_rmdir(HLNTOV(self), dvp, nm, ct);
+
+ /*
+ * We've now dropped the dir link so by rele-ing our vnode we should
+ * clean up in hyprlofs_inactive.
+ */
+ hlnode_rele(self);
+
+ return (error);
+}
+
+static int
+hyprlofs_readdir(vnode_t *vp, struct uio *uiop, cred_t *cr, int *eofp,
+ caller_context_t *ct, int flags)
+{
+ hlnode_t *hp = (hlnode_t *)VTOHLN(vp);
+ hldirent_t *hdp;
+ int error = 0;
+ size_t namelen;
+ struct dirent64 *dp;
+ ulong_t offset;
+ ulong_t total_bytes_wanted;
+ long outcount = 0;
+ long bufsize;
+ int reclen;
+ caddr_t outbuf;
+
+ if (VTOHLN(vp)->hln_looped == 1)
+ return (VOP_READDIR(REALVP(vp), uiop, cr, eofp, ct, flags));
+
+ if (uiop->uio_loffset >= MAXOFF_T) {
+ if (eofp)
+ *eofp = 1;
+ return (0);
+ }
+ /* assuming syscall has already called hln_rwlock */
+ ASSERT(RW_READ_HELD(&hp->hln_rwlock));
+
+ if (uiop->uio_iovcnt != 1)
+ return (EINVAL);
+
+ if (vp->v_type != VDIR)
+ return (ENOTDIR);
+
+ /*
+ * There's a window here where someone could have removed
+ * all the entries in the directory after we put a hold on the
+ * vnode but before we grabbed the rwlock. Just return.
+ */
+ if (hp->hln_dir == NULL) {
+ if (hp->hln_nlink) {
+ panic("empty directory 0x%p", (void *)hp);
+ /*NOTREACHED*/
+ }
+ return (0);
+ }
+
+ /* Get space for multiple dir entries */
+ total_bytes_wanted = uiop->uio_iov->iov_len;
+ bufsize = total_bytes_wanted + sizeof (struct dirent64);
+ outbuf = kmem_alloc(bufsize, KM_SLEEP);
+
+ dp = (struct dirent64 *)((uintptr_t)outbuf);
+
+ offset = 0;
+ hdp = hp->hln_dir;
+ while (hdp) {
+ namelen = strlen(hdp->hld_name); /* no +1 needed */
+ offset = hdp->hld_offset;
+ if (offset >= uiop->uio_offset) {
+ reclen = (int)DIRENT64_RECLEN(namelen);
+ if (outcount + reclen > total_bytes_wanted) {
+ if (!outcount)
+ /* Buffer too small for any entries. */
+ error = EINVAL;
+ break;
+ }
+ ASSERT(hdp->hld_hlnode != NULL);
+
+ /* zero out uninitialized bytes */
+ (void) strncpy(dp->d_name, hdp->hld_name,
+ DIRENT64_NAMELEN(reclen));
+ dp->d_reclen = (ushort_t)reclen;
+ dp->d_ino = (ino64_t)hdp->hld_hlnode->hln_nodeid;
+ dp->d_off = (offset_t)hdp->hld_offset + 1;
+ dp = (struct dirent64 *)
+ ((uintptr_t)dp + dp->d_reclen);
+ outcount += reclen;
+ ASSERT(outcount <= bufsize);
+ }
+ hdp = hdp->hld_next;
+ }
+
+ if (!error)
+ error = uiomove(outbuf, outcount, UIO_READ, uiop);
+
+ if (!error) {
+ /*
+ * If we reached the end of the list our offset should now be
+ * just past the end.
+ */
+ if (!hdp) {
+ offset += 1;
+ if (eofp)
+ *eofp = 1;
+ } else if (eofp)
+ *eofp = 0;
+ uiop->uio_offset = offset;
+ }
+ gethrestime(&hp->hln_atime);
+ kmem_free(outbuf, bufsize);
+ return (error);
+}
+
+static int
+hyprlofs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
+{
+ if (VTOHLN(vp)->hln_looped == 1)
+ return (VOP_FSYNC(REALVP(vp), syncflag, cr, ct));
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+hyprlofs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
+{
+ hlnode_t *hp = (hlnode_t *)VTOHLN(vp);
+ hlfsmount_t *hm = (hlfsmount_t *)VFSTOHLM(vp->v_vfsp);
+
+ rw_enter(&hp->hln_rwlock, RW_WRITER);
+
+ mutex_enter(&hp->hln_tlock);
+ mutex_enter(&vp->v_lock);
+ ASSERT(vp->v_count >= 1);
+
+ /*
+ * If we don't have the last hold or the link count is non-zero,
+ * there's nothing to do except drop our hold.
+ */
+ if (vp->v_count > 1 || hp->hln_nlink != 0) {
+ vp->v_count--;
+ mutex_exit(&vp->v_lock);
+ mutex_exit(&hp->hln_tlock);
+ rw_exit(&hp->hln_rwlock);
+ return;
+ }
+
+ mutex_exit(&vp->v_lock);
+ mutex_exit(&hp->hln_tlock);
+
+ /* release hold on the real vnode now */
+ if (hp->hln_looped == 1 && hp->hln_realvp != NULL)
+ VN_RELE(hp->hln_realvp);
+
+ /* Here's our chance to send invalid event while we're between locks */
+ vn_invalid(HLNTOV(hp));
+
+ mutex_enter(&hm->hlm_contents);
+ if (hp->hln_forw == NULL)
+ hm->hlm_rootnode->hln_back = hp->hln_back;
+ else
+ hp->hln_forw->hln_back = hp->hln_back;
+ hp->hln_back->hln_forw = hp->hln_forw;
+ mutex_exit(&hm->hlm_contents);
+ rw_exit(&hp->hln_rwlock);
+ rw_destroy(&hp->hln_rwlock);
+ mutex_destroy(&hp->hln_tlock);
+ vn_free(HLNTOV(hp));
+ kmem_free(hp, sizeof (hlnode_t));
+}
+
+static int
+hyprlofs_fid(vnode_t *vp, struct fid *fidp, caller_context_t *ct)
+{
+ hlnode_t *hp = (hlnode_t *)VTOHLN(vp);
+ hlfid_t *hfid;
+
+ if (VTOHLN(vp)->hln_looped == 1)
+ return (VOP_FID(REALVP(vp), fidp, ct));
+
+ if (fidp->fid_len < (sizeof (hlfid_t) - sizeof (ushort_t))) {
+ fidp->fid_len = sizeof (hlfid_t) - sizeof (ushort_t);
+ return (ENOSPC);
+ }
+
+ hfid = (hlfid_t *)fidp;
+ bzero(hfid, sizeof (hlfid_t));
+ hfid->hlfid_len = (int)sizeof (hlfid_t) - sizeof (ushort_t);
+
+ hfid->hlfid_ino = hp->hln_nodeid;
+ hfid->hlfid_gen = hp->hln_gen;
+
+ return (0);
+}
+
+static int
+hyprlofs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp,
+ page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw,
+ cred_t *cr, caller_context_t *ct)
+{
+ /* return EACCES to be consistent with mmap */
+ if (VTOHLN(vp)->hln_looped != 1)
+ return (EACCES);
+ return (VOP_GETPAGE(REALVP(vp), off, len, protp, pl, plsz, seg, addr,
+ rw, cr, ct));
+}
+
+int
+hyprlofs_putpage(vnode_t *vp, offset_t off, size_t len, int flags,
+ cred_t *cr, caller_context_t *ct)
+{
+ /* return EACCES to be consistent with mmap */
+ if (VTOHLN(vp)->hln_looped != 1)
+ return (EACCES);
+ return (VOP_PUTPAGE(REALVP(vp), off, len, flags, cr, ct));
+}
+
+static int
+hyprlofs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp,
+ size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ /* return EACCES to be consistent with mmap */
+ if (VTOHLN(vp)->hln_looped != 1)
+ return (EACCES);
+ return (VOP_MAP(REALVP(vp), off, as, addrp, len, prot, maxprot, flags,
+ cr, ct));
+}
+
+static int
+hyprlofs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
+ size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ /* return EACCES to be consistent with mmap */
+ if (VTOHLN(vp)->hln_looped != 1)
+ return (EACCES);
+ return (VOP_ADDMAP(REALVP(vp), off, as, addr, len, prot, maxprot,
+ flags, cr, ct));
+}
+
+static int
+hyprlofs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
+ size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ /* return EACCES to be consistent with mmap */
+ if (VTOHLN(vp)->hln_looped != 1)
+ return (EACCES);
+ return (VOP_DELMAP(REALVP(vp), off, as, addr, len, prot, maxprot,
+ flags, cr, ct));
+}
+
+static int
+hyprlofs_space(vnode_t *vp, int cmd, struct flock64 *bfp, int flag,
+ offset_t offset, cred_t *cr, caller_context_t *ct)
+{
+ /* return EACCES to be consistent with mmap */
+ if (VTOHLN(vp)->hln_looped != 1)
+ return (EACCES);
+ return (VOP_SPACE(REALVP(vp), cmd, bfp, flag, offset, cr, ct));
+}
+
+static int
+hyprlofs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp,
+ caller_context_t *ct)
+{
+ if (VTOHLN(vp)->hln_looped == 0)
+ return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
+
+ return (VOP_SEEK(REALVP(vp), ooff, noffp, ct));
+}
+
+static int
+hyprlofs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ct)
+{
+ hlnode_t *hp = VTOHLN(vp);
+
+ if (hp->hln_looped == 1)
+ return (VOP_RWLOCK(REALVP(vp), write_lock, ct));
+
+ if (write_lock) {
+ rw_enter(&hp->hln_rwlock, RW_WRITER);
+ } else {
+ rw_enter(&hp->hln_rwlock, RW_READER);
+ }
+ return (write_lock);
+}
+
+static void
+hyprlofs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ct)
+{
+ hlnode_t *hp = VTOHLN(vp);
+
+ if (hp->hln_looped == 1) {
+ VOP_RWUNLOCK(REALVP(vp), write_lock, ct);
+ return;
+ }
+
+ rw_exit(&hp->hln_rwlock);
+}
+
+static int
+hyprlofs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
+ caller_context_t *ct)
+{
+ int error;
+
+ if (VTOHLN(vp)->hln_looped == 1)
+ return (VOP_PATHCONF(REALVP(vp), cmd, valp, cr, ct));
+
+ switch (cmd) {
+ case _PC_XATTR_ENABLED:
+ case _PC_XATTR_EXISTS:
+ case _PC_SATTR_ENABLED:
+ case _PC_SATTR_EXISTS:
+ error = EINVAL;
+ break;
+ case _PC_TIMESTAMP_RESOLUTION:
+ /* nanosecond timestamp resolution */
+ *valp = 1L;
+ error = 0;
+ break;
+ default:
+ error = fs_pathconf(vp, cmd, valp, cr, ct);
+ }
+ return (error);
+}
+
+
+struct vnodeops *hyprlofs_vnodeops;
+
+const fs_operation_def_t hyprlofs_vnodeops_template[] = {
+ VOPNAME_OPEN, { .vop_open = hyprlofs_open },
+ VOPNAME_CLOSE, { .vop_close = hyprlofs_close },
+ VOPNAME_READ, { .vop_read = hyprlofs_read },
+ VOPNAME_WRITE, { .vop_write = hyprlofs_write },
+ VOPNAME_IOCTL, { .vop_ioctl = hyprlofs_ioctl },
+ VOPNAME_GETATTR, { .vop_getattr = hyprlofs_getattr },
+ VOPNAME_SETATTR, { .vop_setattr = hyprlofs_setattr },
+ VOPNAME_ACCESS, { .vop_access = hyprlofs_access },
+ VOPNAME_LOOKUP, { .vop_lookup = hyprlofs_lookup },
+ VOPNAME_CREATE, { .error = fs_error },
+ VOPNAME_REMOVE, { .vop_remove = hyprlofs_remove },
+ VOPNAME_LINK, { .error = fs_error },
+ VOPNAME_RENAME, { .error = fs_error },
+ VOPNAME_MKDIR, { .error = fs_error },
+ VOPNAME_RMDIR, { .vop_rmdir = hyprlofs_rmdir },
+ VOPNAME_READDIR, { .vop_readdir = hyprlofs_readdir },
+ VOPNAME_SYMLINK, { .error = fs_error },
+ VOPNAME_READLINK, { .error = fs_error },
+ VOPNAME_FSYNC, { .vop_fsync = hyprlofs_fsync },
+ VOPNAME_INACTIVE, { .vop_inactive = hyprlofs_inactive },
+ VOPNAME_FID, { .vop_fid = hyprlofs_fid },
+ VOPNAME_RWLOCK, { .vop_rwlock = hyprlofs_rwlock },
+ VOPNAME_RWUNLOCK, { .vop_rwunlock = hyprlofs_rwunlock },
+ VOPNAME_SEEK, { .vop_seek = hyprlofs_seek },
+ VOPNAME_SPACE, { .vop_space = hyprlofs_space },
+ VOPNAME_GETPAGE, { .vop_getpage = hyprlofs_getpage },
+ VOPNAME_PUTPAGE, { .vop_putpage = hyprlofs_putpage },
+ VOPNAME_MAP, { .vop_map = hyprlofs_map },
+ VOPNAME_ADDMAP, { .vop_addmap = hyprlofs_addmap },
+ VOPNAME_DELMAP, { .vop_delmap = hyprlofs_delmap },
+ VOPNAME_PATHCONF, { .vop_pathconf = hyprlofs_pathconf },
+ VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support },
+ NULL, NULL
+};
diff --git a/usr/src/uts/common/fs/lookup.c b/usr/src/uts/common/fs/lookup.c
index 55ffb94805..59ec5d1829 100644
--- a/usr/src/uts/common/fs/lookup.c
+++ b/usr/src/uts/common/fs/lookup.c
@@ -21,6 +21,7 @@
/*
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
*/
@@ -57,6 +58,7 @@
#include <sys/zone.h>
#include <sys/dnlc.h>
#include <sys/fs/snode.h>
+#include <sys/brand.h>
/* Controls whether paths are stored with vnodes. */
int vfs_vnode_path = 1;
@@ -977,6 +979,96 @@ localpath(char *path, struct vnode *vrootp, cred_t *cr)
}
/*
+ * Clean a stale v_path from a vnode. This is only performed if the v_path has
+ * not been altered since it was found to be stale
+ */
+static void
+vnode_clear_vpath(vnode_t *vp, char *vpath_old)
+{
+ mutex_enter(&vp->v_lock);
+ if (vp->v_path != vn_vpath_empty && vp->v_path == vpath_old) {
+ vp->v_path = vn_vpath_empty;
+ mutex_exit(&vp->v_lock);
+ kmem_free(vpath_old, strlen(vpath_old) + 1);
+ } else {
+ mutex_exit(&vp->v_lock);
+ }
+}
+
+/*
+ * Validate that a pathname refers to a given vnode.
+ */
+static int
+vnode_valid_pn(vnode_t *vp, vnode_t *vrootp, pathname_t *pn, pathname_t *rpn,
+ int flags, cred_t *cr)
+{
+ vnode_t *compvp;
+ /*
+ * If we are in a zone or a chroot environment, then we have to
+ * take additional steps, since the path to the root might not
+ * be readable with the current credentials, even though the
+ * process can legitmately access the file. In this case, we
+ * do the following:
+ *
+ * lookuppnvp() with all privileges to get the resolved path.
+ * call localpath() to get the local portion of the path, and
+ * continue as normal.
+ *
+ * If the the conversion to a local path fails, then we continue
+ * as normal. This is a heuristic to make process object file
+ * paths available from within a zone. Because lofs doesn't
+ * support page operations, the vnode stored in the seg_t is
+ * actually the underlying real vnode, not the lofs node itself.
+ * Most of the time, the lofs path is the same as the underlying
+ * vnode (for example, /usr/lib/libc.so.1).
+ */
+ if (vrootp != rootdir) {
+ char *local = NULL;
+
+ VN_HOLD(rootdir);
+ if (lookuppnvp(pn, rpn, FOLLOW, NULL, &compvp, rootdir,
+ rootdir, kcred) == 0) {
+ local = localpath(rpn->pn_path, vrootp, kcred);
+ VN_RELE(compvp);
+ }
+
+ /*
+ * The original pn was changed through lookuppnvp().
+ * Set it to local for next validation attempt.
+ */
+ if (local) {
+ (void) pn_set(pn, local);
+ } else {
+ return (1);
+ }
+ }
+
+ /*
+ * We should have a local path at this point, so start the search from
+ * the root of the current process.
+ */
+ VN_HOLD(vrootp);
+ if (vrootp != rootdir)
+ VN_HOLD(vrootp);
+ if (lookuppnvp(pn, rpn, FOLLOW | flags, NULL, &compvp, vrootp, vrootp,
+ cr) == 0) {
+ /*
+ * Check to see if the returned vnode is the same as the one we
+ * expect.
+ */
+ if (vn_compare(vp, compvp) ||
+ vnode_match(vp, compvp, cr)) {
+ VN_RELE(compvp);
+ return (0);
+ } else {
+ VN_RELE(compvp);
+ }
+ }
+
+ return (1);
+}
+
+/*
* Given a directory, return the full, resolved path. This looks up "..",
* searches for the given vnode in the parent, appends the component, etc. It
* is used to implement vnodetopath() and getcwd() when the cached path fails.
@@ -995,6 +1087,8 @@ dirtopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, int flags,
char *bufloc;
size_t dlen = DIRENT64_RECLEN(MAXPATHLEN);
refstr_t *mntpt;
+ char *vpath_cached;
+ boolean_t vpath_stale;
/* Operation only allowed on directories */
ASSERT(vp->v_type == VDIR);
@@ -1088,40 +1182,28 @@ dirtopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, int flags,
* Shortcut: see if this vnode has correct v_path. If so,
* we have the work done.
*/
+ vpath_cached = NULL;
+ vpath_stale = B_FALSE;
mutex_enter(&vp->v_lock);
- if (vp->v_path != NULL) {
-
- if ((err = pn_set(&pn, vp->v_path)) == 0) {
- mutex_exit(&vp->v_lock);
- rpn.pn_path = rpn.pn_buf;
-
- /*
- * Ensure the v_path pointing to correct vnode
- */
- VN_HOLD(vrootp);
- if (vrootp != rootdir)
- VN_HOLD(vrootp);
- if (lookuppnvp(&pn, &rpn, flags, NULL,
- &cmpvp, vrootp, vrootp, cr) == 0) {
-
- if (VN_CMP(vp, cmpvp)) {
- VN_RELE(cmpvp);
+ if (vp->v_path != vn_vpath_empty &&
+ pn_set(&pn, vp->v_path) == 0) {
+ vpath_cached = vp->v_path;
+ mutex_exit(&vp->v_lock);
+ rpn.pn_path = rpn.pn_buf;
- complen = strlen(rpn.pn_path);
- bufloc -= complen;
- if (bufloc < buf) {
- err = ERANGE;
- goto out;
- }
- bcopy(rpn.pn_path, bufloc,
- complen);
- break;
- } else {
- VN_RELE(cmpvp);
- }
+ /* Ensure the v_path pointing to correct vnode */
+ if (vnode_valid_pn(vp, vrootp, &pn, &rpn, flags,
+ cr) == 0) {
+ complen = strlen(rpn.pn_path);
+ bufloc -= complen;
+ if (bufloc < buf) {
+ err = ERANGE;
+ goto out;
}
+ bcopy(rpn.pn_path, bufloc, complen);
+ break;
} else {
- mutex_exit(&vp->v_lock);
+ vpath_stale = B_TRUE;
}
} else {
mutex_exit(&vp->v_lock);
@@ -1166,38 +1248,6 @@ dirtopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, int flags,
}
/*
- * Try to obtain the path component from dnlc cache
- * before searching through the directory.
- */
- if ((cmpvp = dnlc_reverse_lookup(vp, dbuf, dlen)) != NULL) {
- /*
- * If we got parent vnode as a result,
- * then the answered path is correct.
- */
- if (VN_CMP(cmpvp, pvp)) {
- VN_RELE(cmpvp);
- complen = strlen(dbuf);
- bufloc -= complen;
- if (bufloc <= buf) {
- err = ENAMETOOLONG;
- goto out;
- }
- bcopy(dbuf, bufloc, complen);
-
- /* Prepend a slash to the current path */
- *--bufloc = '/';
-
- /* And continue with the next component */
- VN_RELE(vp);
- vp = pvp;
- pvp = NULL;
- continue;
- } else {
- VN_RELE(cmpvp);
- }
- }
-
- /*
* Search the parent directory for the entry corresponding to
* this vnode.
*/
@@ -1215,6 +1265,11 @@ dirtopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, int flags,
/* Prepend a slash to the current path. */
*--bufloc = '/';
+ /* Clear vp->v_path if it was found to be stale. */
+ if (vpath_stale == B_TRUE) {
+ vnode_clear_vpath(vp, vpath_cached);
+ }
+
/* And continue with the next component */
VN_RELE(vp);
vp = pvp;
@@ -1306,144 +1361,49 @@ vnodetopath_common(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen,
VN_RELE(vp);
}
- pn_alloc(&pn);
/*
- * Check to see if we have a cached path in the vnode.
+ * Check to see if we have a valid cached path in the vnode.
*/
+ pn_alloc(&pn);
mutex_enter(&vp->v_lock);
- if (vp->v_path != NULL) {
+ if (vp->v_path != vn_vpath_empty) {
(void) pn_set(&pn, vp->v_path);
mutex_exit(&vp->v_lock);
- pn_alloc(&rpn);
-
/* We should only cache absolute paths */
ASSERT(pn.pn_buf[0] == '/');
- /*
- * If we are in a zone or a chroot environment, then we have to
- * take additional steps, since the path to the root might not
- * be readable with the current credentials, even though the
- * process can legitmately access the file. In this case, we
- * do the following:
- *
- * lookuppnvp() with all privileges to get the resolved path.
- * call localpath() to get the local portion of the path, and
- * continue as normal.
- *
- * If the the conversion to a local path fails, then we continue
- * as normal. This is a heuristic to make process object file
- * paths available from within a zone. Because lofs doesn't
- * support page operations, the vnode stored in the seg_t is
- * actually the underlying real vnode, not the lofs node itself.
- * Most of the time, the lofs path is the same as the underlying
- * vnode (for example, /usr/lib/libc.so.1).
- */
- if (vrootp != rootdir) {
- char *local = NULL;
- VN_HOLD(rootdir);
- if (lookuppnvp(&pn, &rpn, FOLLOW,
- NULL, &compvp, rootdir, rootdir, kcred) == 0) {
- local = localpath(rpn.pn_path, vrootp,
- kcred);
- VN_RELE(compvp);
- }
-
- /*
- * The original pn was changed through lookuppnvp().
- * Set it to local for next validation attempt.
- */
- if (local) {
- (void) pn_set(&pn, local);
- } else {
- goto notcached;
+ pn_alloc(&rpn);
+ if (vnode_valid_pn(vp, vrootp, &pn, &rpn, flags, cr) == 0) {
+ /* Return the result, if we're able. */
+ if (buflen > rpn.pn_pathlen) {
+ bcopy(rpn.pn_path, buf, rpn.pn_pathlen + 1);
+ pn_free(&pn);
+ pn_free(&rpn);
+ VN_RELE(vrootp);
+ if (doclose) {
+ (void) VOP_CLOSE(vp, FREAD, 1, 0, cr,
+ NULL);
+ VN_RELE(vp);
+ }
+ return (0);
}
}
-
/*
- * We should have a local path at this point, so start the
- * search from the root of the current process.
+ * A stale v_path will be purged by the later dirtopath lookup.
*/
- VN_HOLD(vrootp);
- if (vrootp != rootdir)
- VN_HOLD(vrootp);
- ret = lookuppnvp(&pn, &rpn, FOLLOW | flags, NULL,
- &compvp, vrootp, vrootp, cr);
- if (ret == 0) {
- /*
- * Check to see if the returned vnode is the same as
- * the one we expect. If not, give up.
- */
- if (!vn_compare(vp, compvp) &&
- !vnode_match(vp, compvp, cr)) {
- VN_RELE(compvp);
- goto notcached;
- }
-
- VN_RELE(compvp);
-
- /*
- * Return the result.
- */
- if (buflen <= rpn.pn_pathlen)
- goto notcached;
-
- bcopy(rpn.pn_path, buf, rpn.pn_pathlen + 1);
- pn_free(&pn);
- pn_free(&rpn);
- VN_RELE(vrootp);
- if (doclose) {
- (void) VOP_CLOSE(vp, FREAD, 1, 0, cr, NULL);
- VN_RELE(vp);
- }
- return (0);
- }
-
-notcached:
pn_free(&rpn);
} else {
mutex_exit(&vp->v_lock);
}
-
pn_free(&pn);
if (vp->v_type != VDIR) {
- /*
- * If we don't have a directory, try to find it in the dnlc via
- * reverse lookup. Once this is found, we can use the regular
- * directory search to find the full path.
- */
- if ((pvp = dnlc_reverse_lookup(vp, path, MAXNAMELEN)) != NULL) {
- /*
- * Check if we have read privilege so, that
- * we can lookup the path in the directory
- */
- ret = 0;
- if ((flags & LOOKUP_CHECKREAD)) {
- ret = VOP_ACCESS(pvp, VREAD, 0, cr, NULL);
- }
- if (ret == 0) {
- ret = dirtopath(vrootp, pvp, buf, buflen,
- flags, cr);
- }
- if (ret == 0) {
- len = strlen(buf);
- if (len + strlen(path) + 1 >= buflen) {
- ret = ENAMETOOLONG;
- } else {
- if (buf[len - 1] != '/')
- buf[len++] = '/';
- bcopy(path, buf + len,
- strlen(path) + 1);
- }
- }
-
- VN_RELE(pvp);
- } else
- ret = ENOENT;
- } else
+ ret = ENOENT;
+ } else {
ret = dirtopath(vrootp, vp, buf, buflen, flags, cr);
+ }
VN_RELE(vrootp);
if (doclose) {
diff --git a/usr/src/uts/common/fs/lxproc/lxpr_subr.c b/usr/src/uts/common/fs/lxproc/lxpr_subr.c
new file mode 100644
index 0000000000..3c1405d4af
--- /dev/null
+++ b/usr/src/uts/common/fs/lxproc/lxpr_subr.c
@@ -0,0 +1,524 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/varargs.h>
+#include <sys/cpuvar.h>
+#include <sys/mman.h>
+#include <sys/vmsystm.h>
+#include <sys/prsystm.h>
+
+#include "lxproc.h"
+
+#define LXPRCACHE_NAME "lxpr_cache"
+
+static int lxpr_node_constructor(void *, void *, int);
+static void lxpr_node_destructor(void *, void *);
+
+static kmem_cache_t *lxpr_node_cache;
+
+struct lxpr_uiobuf {
+ uio_t *uiop;
+ char *buffer;
+ uint32_t buffsize;
+ char *pos;
+ size_t beg;
+ int error;
+};
+
+int lxpr_bufsize = 4000;
+
+struct lxpr_uiobuf *
+lxpr_uiobuf_new(uio_t *uiop)
+{
+ /* Allocate memory for both lxpr_uiobuf and output buffer */
+ int bufsize = lxpr_bufsize;
+ struct lxpr_uiobuf *uiobuf =
+ kmem_alloc(sizeof (struct lxpr_uiobuf) + bufsize, KM_SLEEP);
+
+ uiobuf->uiop = uiop;
+ uiobuf->buffer = (char *)&uiobuf[1];
+ uiobuf->buffsize = bufsize;
+ uiobuf->pos = uiobuf->buffer;
+ uiobuf->beg = 0;
+ uiobuf->error = 0;
+
+ return (uiobuf);
+}
+
+void
+lxpr_uiobuf_free(struct lxpr_uiobuf *uiobuf)
+{
+ ASSERT(uiobuf != NULL);
+ ASSERT(uiobuf->pos == uiobuf->buffer);
+
+ kmem_free(uiobuf, sizeof (struct lxpr_uiobuf) + uiobuf->buffsize);
+}
+
+void
+lxpr_uiobuf_seek(struct lxpr_uiobuf *uiobuf, offset_t offset)
+{
+ uiobuf->uiop->uio_offset = (off_t)offset;
+}
+
+void
+lxpr_uiobuf_seterr(struct lxpr_uiobuf *uiobuf, int err)
+{
+ ASSERT(uiobuf->error == 0);
+
+ uiobuf->error = err;
+}
+
+int
+lxpr_uiobuf_flush(struct lxpr_uiobuf *uiobuf)
+{
+ off_t off = uiobuf->uiop->uio_offset;
+ caddr_t uaddr = uiobuf->buffer;
+ size_t beg = uiobuf->beg;
+ size_t size = (uintptr_t)uiobuf->pos - (uintptr_t)uaddr;
+
+ if (uiobuf->error == 0 && uiobuf->uiop->uio_resid != 0) {
+ ASSERT(off >= beg);
+
+ if (beg + size > off && off >= 0)
+ uiobuf->error =
+ uiomove(uaddr + (off - beg), size - (off - beg),
+ UIO_READ, uiobuf->uiop);
+
+ uiobuf->beg += size;
+ }
+
+ uiobuf->pos = uaddr;
+
+ return (uiobuf->error);
+}
+
+void
+lxpr_uiobuf_write(struct lxpr_uiobuf *uiobuf, const char *buf, size_t size)
+{
+ /* While we can still carry on */
+ while (uiobuf->error == 0 && uiobuf->uiop->uio_resid != 0) {
+ uintptr_t remain = (uintptr_t)uiobuf->buffsize -
+ ((uintptr_t)uiobuf->pos - (uintptr_t)uiobuf->buffer);
+
+ /* Enough space in buffer? */
+ if (remain >= size) {
+ bcopy(buf, uiobuf->pos, size);
+ uiobuf->pos += size;
+ return;
+ }
+
+ /* Not enough space, so copy all we can and try again */
+ bcopy(buf, uiobuf->pos, remain);
+ uiobuf->pos += remain;
+ (void) lxpr_uiobuf_flush(uiobuf);
+ buf += remain;
+ size -= remain;
+ }
+}
+
+#define TYPBUFFSIZE 256
+
+void
+lxpr_uiobuf_printf(struct lxpr_uiobuf *uiobuf, const char *fmt, ...)
+{
+ va_list args;
+ char buff[TYPBUFFSIZE];
+ int len;
+ char *buffer;
+
+ /* Can we still do any output */
+ if (uiobuf->error != 0 || uiobuf->uiop->uio_resid == 0)
+ return;
+
+ va_start(args, fmt);
+
+ /* Try using stack allocated buffer */
+ len = vsnprintf(buff, TYPBUFFSIZE, fmt, args);
+ if (len < TYPBUFFSIZE) {
+ va_end(args);
+ lxpr_uiobuf_write(uiobuf, buff, len);
+ return;
+ }
+
+ /* Not enough space in pre-allocated buffer */
+ buffer = kmem_alloc(len + 1, KM_SLEEP);
+
+ /*
+ * We know we allocated the correct amount of space
+ * so no check on the return value
+ */
+ (void) vsnprintf(buffer, len+1, fmt, args);
+ lxpr_uiobuf_write(uiobuf, buffer, len);
+ va_end(args);
+ kmem_free(buffer, len+1);
+}
+
+/*
+ * lxpr_lock():
+ *
+ * Lookup process from pid and return with p_plock and P_PR_LOCK held.
+ */
+proc_t *
+lxpr_lock(pid_t pid)
+{
+ proc_t *p;
+ kmutex_t *mp;
+
+ ASSERT(!MUTEX_HELD(&pidlock));
+
+ for (;;) {
+ mutex_enter(&pidlock);
+
+ /*
+ * If the pid is 1, we really want the zone's init process
+ */
+ p = prfind((pid == 1) ?
+ curproc->p_zone->zone_proc_initpid : pid);
+
+ if (p == NULL || p->p_stat == SIDL) {
+ mutex_exit(&pidlock);
+ return (NULL);
+ }
+
+ /*
+ * p_lock is persistent, but p itself is not -- it could
+ * vanish during cv_wait(). Load p->p_lock now so we can
+ * drop it after cv_wait() without referencing p.
+ */
+ mp = &p->p_lock;
+ mutex_enter(mp);
+
+ mutex_exit(&pidlock);
+
+ if (p->p_flag & SEXITING) {
+ /*
+ * This process is exiting -- let it go.
+ */
+ mutex_exit(mp);
+ return (NULL);
+ }
+
+ if (!(p->p_proc_flag & P_PR_LOCK))
+ break;
+
+ cv_wait(&pr_pid_cv[p->p_slot], mp);
+ mutex_exit(mp);
+ }
+
+ p->p_proc_flag |= P_PR_LOCK;
+ THREAD_KPRI_REQUEST();
+ return (p);
+}
+
+/*
+ * lxpr_unlock()
+ *
+ * Unlock locked process
+ */
+void
+lxpr_unlock(proc_t *p)
+{
+ ASSERT(p->p_proc_flag & P_PR_LOCK);
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ ASSERT(!MUTEX_HELD(&pidlock));
+
+ cv_signal(&pr_pid_cv[p->p_slot]);
+ p->p_proc_flag &= ~P_PR_LOCK;
+ mutex_exit(&p->p_lock);
+ THREAD_KPRI_RELEASE();
+}
+
+void
+lxpr_initnodecache()
+{
+ lxpr_node_cache = kmem_cache_create(LXPRCACHE_NAME,
+ sizeof (lxpr_node_t), 0,
+ lxpr_node_constructor, lxpr_node_destructor, NULL, NULL, NULL, 0);
+}
+
+void
+lxpr_fininodecache()
+{
+ kmem_cache_destroy(lxpr_node_cache);
+}
+
+/* ARGSUSED */
+static int
+lxpr_node_constructor(void *buf, void *un, int kmflags)
+{
+ lxpr_node_t *lxpnp = buf;
+ vnode_t *vp;
+
+ vp = lxpnp->lxpr_vnode = vn_alloc(kmflags);
+ if (vp == NULL)
+ return (-1);
+
+ (void) vn_setops(vp, lxpr_vnodeops);
+ vp->v_data = lxpnp;
+
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+lxpr_node_destructor(void *buf, void *un)
+{
+ lxpr_node_t *lxpnp = buf;
+
+ vn_free(LXPTOV(lxpnp));
+}
+
+/*
+ * Calculate an inode number
+ *
+ * This takes various bits of info and munges them
+ * to give the inode number for an lxproc node
+ */
+ino_t
+lxpr_inode(lxpr_nodetype_t type, pid_t pid, int fd)
+{
+ if (pid == 1)
+ pid = curproc->p_zone->zone_proc_initpid;
+
+ switch (type) {
+ case LXPR_PIDDIR:
+ return (pid + 1);
+ case LXPR_PROCDIR:
+ return (maxpid + 2);
+ case LXPR_PID_FD_FD:
+ return (maxpid + 2 +
+ (pid * (LXPR_FD_PERPROC + LXPR_NFILES)) +
+ LXPR_NFILES + fd);
+ default:
+ return (maxpid + 2 +
+ (pid * (LXPR_FD_PERPROC + LXPR_NFILES)) +
+ type);
+ }
+}
+
+/*
+ * Return inode number of parent (directory)
+ */
+ino_t
+lxpr_parentinode(lxpr_node_t *lxpnp)
+{
+ /*
+ * If the input node is the root then the parent inode
+ * is the mounted on inode so just return our inode number
+ */
+ if (lxpnp->lxpr_type != LXPR_PROCDIR)
+ return (VTOLXP(lxpnp->lxpr_parent)->lxpr_ino);
+ else
+ return (lxpnp->lxpr_ino);
+}
+
+/*
+ * Allocate a new lxproc node
+ *
+ * This also allocates the vnode associated with it
+ */
+lxpr_node_t *
+lxpr_getnode(vnode_t *dp, lxpr_nodetype_t type, proc_t *p, int fd)
+{
+ lxpr_node_t *lxpnp;
+ vnode_t *vp;
+ user_t *up;
+ timestruc_t now;
+
+ /*
+ * Allocate a new node. It is deallocated in vop_innactive
+ */
+ lxpnp = kmem_cache_alloc(lxpr_node_cache, KM_SLEEP);
+
+ /*
+ * Set defaults (may be overridden below)
+ */
+ gethrestime(&now);
+ lxpnp->lxpr_type = type;
+ lxpnp->lxpr_realvp = NULL;
+ lxpnp->lxpr_parent = dp;
+ VN_HOLD(dp);
+ if (p != NULL) {
+ lxpnp->lxpr_pid = ((p->p_pid ==
+ curproc->p_zone->zone_proc_initpid) ? 1 : p->p_pid);
+
+ lxpnp->lxpr_time = PTOU(p)->u_start;
+ lxpnp->lxpr_uid = crgetruid(p->p_cred);
+ lxpnp->lxpr_gid = crgetrgid(p->p_cred);
+ lxpnp->lxpr_ino = lxpr_inode(type, p->p_pid, fd);
+ } else {
+ /* Pretend files without a proc belong to sched */
+ lxpnp->lxpr_pid = 0;
+ lxpnp->lxpr_time = now;
+ lxpnp->lxpr_uid = lxpnp->lxpr_gid = 0;
+ lxpnp->lxpr_ino = lxpr_inode(type, 0, 0);
+ }
+
+ /* initialize the vnode data */
+ vp = lxpnp->lxpr_vnode;
+ vn_reinit(vp);
+ vp->v_flag = VNOCACHE|VNOMAP|VNOSWAP|VNOMOUNT;
+ vp->v_vfsp = dp->v_vfsp;
+
+ /*
+ * Do node specific stuff
+ */
+ switch (type) {
+ case LXPR_PROCDIR:
+ vp->v_flag |= VROOT;
+ vp->v_type = VDIR;
+ lxpnp->lxpr_mode = 0555; /* read-search by everyone */
+ break;
+
+ case LXPR_PID_CURDIR:
+ ASSERT(p != NULL);
+
+ /*
+ * Zombie check. p_stat is officially protected by pidlock,
+ * but we can't grab pidlock here because we already hold
+ * p_lock. Luckily if we look at the process exit code
+ * we see that p_stat only transisions from SRUN to SZOMB
+ * while p_lock is held. Aside from this, the only other
+ * p_stat transition that we need to be aware about is
+ * SIDL to SRUN, but that's not a problem since lxpr_lock()
+ * ignores nodes in the SIDL state so we'll never get a node
+ * that isn't already in the SRUN state.
+ */
+ if (p->p_stat == SZOMB) {
+ lxpnp->lxpr_realvp = NULL;
+ } else {
+ up = PTOU(p);
+ lxpnp->lxpr_realvp = up->u_cdir;
+ ASSERT(lxpnp->lxpr_realvp != NULL);
+ VN_HOLD(lxpnp->lxpr_realvp);
+ }
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0777; /* anyone does anything ! */
+ break;
+
+ case LXPR_PID_ROOTDIR:
+ ASSERT(p != NULL);
+ /* Zombie check. see locking comment above */
+ if (p->p_stat == SZOMB) {
+ lxpnp->lxpr_realvp = NULL;
+ } else {
+ up = PTOU(p);
+ lxpnp->lxpr_realvp =
+ up->u_rdir != NULL ? up->u_rdir : rootdir;
+ ASSERT(lxpnp->lxpr_realvp != NULL);
+ VN_HOLD(lxpnp->lxpr_realvp);
+ }
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0777; /* anyone does anything ! */
+ break;
+
+ case LXPR_PID_EXE:
+ ASSERT(p != NULL);
+ lxpnp->lxpr_realvp = p->p_exec;
+ if (lxpnp->lxpr_realvp != NULL) {
+ VN_HOLD(lxpnp->lxpr_realvp);
+ }
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0777;
+ break;
+
+ case LXPR_SELF:
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0777; /* anyone does anything ! */
+ break;
+
+ case LXPR_PID_FD_FD:
+ ASSERT(p != NULL);
+ /* lxpr_realvp is set after we return */
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0700; /* read-write-exe owner only */
+ break;
+
+ case LXPR_PID_FDDIR:
+ ASSERT(p != NULL);
+ vp->v_type = VDIR;
+ lxpnp->lxpr_mode = 0500; /* read-search by owner only */
+ break;
+
+ case LXPR_PIDDIR:
+ ASSERT(p != NULL);
+ vp->v_type = VDIR;
+ lxpnp->lxpr_mode = 0511;
+ break;
+
+ case LXPR_NETDIR:
+ vp->v_type = VDIR;
+ lxpnp->lxpr_mode = 0555; /* read-search by all */
+ break;
+
+ case LXPR_PID_ENV:
+ case LXPR_PID_MEM:
+ ASSERT(p != NULL);
+ /*FALLTHRU*/
+ case LXPR_KCORE:
+ vp->v_type = VREG;
+ lxpnp->lxpr_mode = 0400; /* read-only by owner only */
+ break;
+
+ default:
+ vp->v_type = VREG;
+ lxpnp->lxpr_mode = 0444; /* read-only by all */
+ break;
+ }
+
+ return (lxpnp);
+}
+
+
+/*
+ * Free the storage obtained from lxpr_getnode().
+ */
+void
+lxpr_freenode(lxpr_node_t *lxpnp)
+{
+ ASSERT(lxpnp != NULL);
+ ASSERT(LXPTOV(lxpnp) != NULL);
+
+ /*
+ * delete any association with realvp
+ */
+ if (lxpnp->lxpr_realvp != NULL)
+ VN_RELE(lxpnp->lxpr_realvp);
+
+ /*
+ * delete any association with parent vp
+ */
+ if (lxpnp->lxpr_parent != NULL)
+ VN_RELE(lxpnp->lxpr_parent);
+
+ /*
+ * Release the lxprnode.
+ */
+ kmem_cache_free(lxpr_node_cache, lxpnp);
+}
diff --git a/usr/src/uts/common/fs/lxproc/lxpr_vfsops.c b/usr/src/uts/common/fs/lxproc/lxpr_vfsops.c
new file mode 100644
index 0000000000..1bb7bd3823
--- /dev/null
+++ b/usr/src/uts/common/fs/lxproc/lxpr_vfsops.c
@@ -0,0 +1,367 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/cmn_err.h>
+#include <sys/cred.h>
+#include <sys/debug.h>
+#include <sys/errno.h>
+#include <sys/proc.h>
+#include <sys/stat.h>
+#include <sys/statvfs.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+#include <sys/var.h>
+#include <sys/vfs.h>
+#include <sys/vfs_opreg.h>
+#include <sys/vnode.h>
+#include <sys/mode.h>
+#include <sys/signal.h>
+#include <sys/user.h>
+#include <sys/mount.h>
+#include <sys/bitmap.h>
+#include <sys/kmem.h>
+#include <sys/policy.h>
+#include <sys/modctl.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+
+#include "lxproc.h"
+
+/* Module level parameters */
+static int lxprocfstype;
+static dev_t lxprocdev;
+static kmutex_t lxpr_mount_lock;
+
+int nproc_highbit; /* highbit(v.v_nproc) */
+
+static int lxpr_mount(vfs_t *, vnode_t *, mounta_t *, cred_t *);
+static int lxpr_unmount(vfs_t *, int, cred_t *);
+static int lxpr_root(vfs_t *, vnode_t **);
+static int lxpr_statvfs(vfs_t *, statvfs64_t *);
+static int lxpr_init(int, char *);
+
+static vfsdef_t vfw = {
+ VFSDEF_VERSION,
+ "lxproc",
+ lxpr_init,
+ VSW_ZMOUNT,
+ NULL
+};
+
+/*
+ * Module linkage information for the kernel.
+ */
+extern struct mod_ops mod_fsops;
+
+static struct modlfs modlfs = {
+ &mod_fsops, "generic linux procfs", &vfw
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, (void *)&modlfs, NULL
+};
+
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int retval;
+
+ /*
+ * attempt to unload the module
+ */
+ if ((retval = mod_remove(&modlinkage)) != 0)
+ goto done;
+
+ /*
+ * destroy lxpr_node cache
+ */
+ lxpr_fininodecache();
+
+ /*
+ * clean out the vfsops and vnodeops
+ */
+ (void) vfs_freevfsops_by_type(lxprocfstype);
+ vn_freevnodeops(lxpr_vnodeops);
+
+ mutex_destroy(&lxpr_mount_lock);
+done:
+ return (retval);
+}
+
+static int
+lxpr_init(int fstype, char *name)
+{
+ static const fs_operation_def_t lxpr_vfsops_template[] = {
+ VFSNAME_MOUNT, { .vfs_mount = lxpr_mount },
+ VFSNAME_UNMOUNT, { .vfs_unmount = lxpr_unmount },
+ VFSNAME_ROOT, { .vfs_root = lxpr_root },
+ VFSNAME_STATVFS, { .vfs_statvfs = lxpr_statvfs },
+ NULL, NULL
+ };
+ extern const fs_operation_def_t lxpr_vnodeops_template[];
+ int error;
+ major_t dev;
+
+ nproc_highbit = highbit(v.v_proc);
+ lxprocfstype = fstype;
+ ASSERT(lxprocfstype != 0);
+
+ mutex_init(&lxpr_mount_lock, NULL, MUTEX_DEFAULT, NULL);
+
+ /*
+ * Associate VFS ops vector with this fstype.
+ */
+ error = vfs_setfsops(fstype, lxpr_vfsops_template, NULL);
+ if (error != 0) {
+ cmn_err(CE_WARN, "lxpr_init: bad vfs ops template");
+ return (error);
+ }
+
+ /*
+ * Set up vnode ops vector too.
+ */
+ error = vn_make_ops(name, lxpr_vnodeops_template, &lxpr_vnodeops);
+ if (error != 0) {
+ (void) vfs_freevfsops_by_type(fstype);
+ cmn_err(CE_WARN, "lxpr_init: bad vnode ops template");
+ return (error);
+ }
+
+ /*
+ * Assign a unique "device" number (used by stat(2)).
+ */
+ if ((dev = getudev()) == (major_t)-1) {
+ cmn_err(CE_WARN, "lxpr_init: can't get unique device number");
+ dev = 0;
+ }
+
+ /*
+ * Make the pseudo device
+ */
+ lxprocdev = makedevice(dev, 0);
+
+ /*
+ * Initialize cache for lxpr_nodes
+ */
+ lxpr_initnodecache();
+
+ return (0);
+}
+
+static int
+lxpr_mount(vfs_t *vfsp, vnode_t *mvp, mounta_t *uap, cred_t *cr)
+{
+ lxpr_mnt_t *lxpr_mnt;
+ zone_t *zone = curproc->p_zone;
+ ldi_ident_t li;
+ int err;
+
+ /*
+ * must be root to mount
+ */
+ if (secpolicy_fs_mount(cr, mvp, vfsp) != 0)
+ return (EPERM);
+
+ /*
+ * mount point must be a directory
+ */
+ if (mvp->v_type != VDIR)
+ return (ENOTDIR);
+
+ if (zone == global_zone) {
+ zone_t *mntzone;
+
+ mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt));
+ zone_rele(mntzone);
+ if (zone != mntzone)
+ return (EBUSY);
+ }
+
+ /*
+ * Having the resource be anything but "lxproc" doesn't make sense
+ */
+ vfs_setresource(vfsp, "lxproc", 0);
+
+ lxpr_mnt = kmem_alloc(sizeof (*lxpr_mnt), KM_SLEEP);
+
+ if ((err = ldi_ident_from_mod(&modlinkage, &li)) != 0) {
+ kmem_free(lxpr_mnt, sizeof (*lxpr_mnt));
+ return (err);
+ }
+
+ lxpr_mnt->lxprm_li = li;
+
+ mutex_enter(&lxpr_mount_lock);
+
+ /*
+ * Ensure we don't allow overlaying mounts
+ */
+ mutex_enter(&mvp->v_lock);
+ if ((uap->flags & MS_OVERLAY) == 0 &&
+ (mvp->v_count > 1 || (mvp->v_flag & VROOT))) {
+ mutex_exit(&mvp->v_lock);
+ mutex_exit(&lxpr_mount_lock);
+ kmem_free(lxpr_mnt, sizeof ((*lxpr_mnt)));
+ return (EBUSY);
+ }
+ mutex_exit(&mvp->v_lock);
+
+ /*
+ * allocate the first vnode
+ */
+ zone_hold(lxpr_mnt->lxprm_zone = zone);
+
+ /* Arbitrarily set the parent vnode to the mounted over directory */
+ lxpr_mnt->lxprm_node = lxpr_getnode(mvp, LXPR_PROCDIR, NULL, 0);
+
+ /* Correctly set the fs for the root node */
+ lxpr_mnt->lxprm_node->lxpr_vnode->v_vfsp = vfsp;
+
+ vfs_make_fsid(&vfsp->vfs_fsid, lxprocdev, lxprocfstype);
+ vfsp->vfs_bsize = DEV_BSIZE;
+ vfsp->vfs_fstype = lxprocfstype;
+ vfsp->vfs_data = (caddr_t)lxpr_mnt;
+ vfsp->vfs_dev = lxprocdev;
+
+ mutex_exit(&lxpr_mount_lock);
+
+ return (0);
+}
+
+static int
+lxpr_unmount(vfs_t *vfsp, int flag, cred_t *cr)
+{
+ lxpr_mnt_t *lxpr_mnt = (lxpr_mnt_t *)vfsp->vfs_data;
+ vnode_t *vp;
+ int count;
+
+ ASSERT(lxpr_mnt != NULL);
+ vp = LXPTOV(lxpr_mnt->lxprm_node);
+
+ mutex_enter(&lxpr_mount_lock);
+
+ /*
+ * must be root to unmount
+ */
+ if (secpolicy_fs_unmount(cr, vfsp) != 0) {
+ mutex_exit(&lxpr_mount_lock);
+ return (EPERM);
+ }
+
+ /*
+ * forced unmount is not supported by this file system
+ */
+ if (flag & MS_FORCE) {
+ mutex_exit(&lxpr_mount_lock);
+ return (ENOTSUP);
+ }
+
+ /*
+ * Ensure that no vnodes are in use on this mount point.
+ */
+ mutex_enter(&vp->v_lock);
+ count = vp->v_count;
+ mutex_exit(&vp->v_lock);
+ if (count > 1) {
+ mutex_exit(&lxpr_mount_lock);
+ return (EBUSY);
+ }
+
+ /*
+ * purge the dnlc cache for vnode entries
+ * associated with this file system
+ */
+ count = dnlc_purge_vfsp(vfsp, 0);
+
+ /*
+ * free up the lxprnode
+ */
+ lxpr_freenode(lxpr_mnt->lxprm_node);
+ zone_rele(lxpr_mnt->lxprm_zone);
+ kmem_free(lxpr_mnt, sizeof (*lxpr_mnt));
+
+ mutex_exit(&lxpr_mount_lock);
+
+ return (0);
+}
+
+static int
+lxpr_root(vfs_t *vfsp, vnode_t **vpp)
+{
+ lxpr_node_t *lxpnp = ((lxpr_mnt_t *)vfsp->vfs_data)->lxprm_node;
+ vnode_t *vp = LXPTOV(lxpnp);
+
+ VN_HOLD(vp);
+ *vpp = vp;
+ return (0);
+}
+
+static int
+lxpr_statvfs(vfs_t *vfsp, statvfs64_t *sp)
+{
+ int n;
+ dev32_t d32;
+ extern uint_t nproc;
+
+ n = v.v_proc - nproc;
+
+ bzero((caddr_t)sp, sizeof (*sp));
+ sp->f_bsize = DEV_BSIZE;
+ sp->f_frsize = DEV_BSIZE;
+ sp->f_blocks = (fsblkcnt64_t)0;
+ sp->f_bfree = (fsblkcnt64_t)0;
+ sp->f_bavail = (fsblkcnt64_t)0;
+ sp->f_files = (fsfilcnt64_t)v.v_proc + 2;
+ sp->f_ffree = (fsfilcnt64_t)n;
+ sp->f_favail = (fsfilcnt64_t)n;
+ (void) cmpldev(&d32, vfsp->vfs_dev);
+ sp->f_fsid = d32;
+ /* It is guaranteed that vsw_name will fit in f_basetype */
+ (void) strcpy(sp->f_basetype, vfssw[lxprocfstype].vsw_name);
+ sp->f_flag = vf_to_stf(vfsp->vfs_flag);
+ sp->f_namemax = 64; /* quite arbitrary */
+
+ (void) strcpy(sp->f_fstr, "lxproc");
+
+ return (0);
+}
diff --git a/usr/src/uts/common/fs/lxproc/lxpr_vnops.c b/usr/src/uts/common/fs/lxproc/lxpr_vnops.c
new file mode 100644
index 0000000000..9c996891f3
--- /dev/null
+++ b/usr/src/uts/common/fs/lxproc/lxpr_vnops.c
@@ -0,0 +1,3099 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+/*
+ * lxproc -- a loosely Linux-compatible /proc
+ *
+ * We have -- confusingly -- two implementations of Linux /proc. One is to
+ * support the LX brand with a Linux /proc entirely compatible with the Linux
+ * world view; the other -- this one -- is to support native (but Linux-borne)
+ * programs that wish to view the native system via the Linux /proc model. So
+ * the aspiration here is to provide something that sufficiently approximates
+ * the Linux /proc implementation for purposes of offering some compatibility
+ * for simple Linux /proc readers (e.g., ps/top/htop). However, it is not
+ * intended to exactly mimic Linux semantics; when choosing between offering
+ * compatibility and telling the truth, we emphatically pick the truth. A
+ * particular glaring example of this is the Linux notion of "tasks" (that is,
+ * threads), which -- due to historical misadventures on Linux -- allocate their
+ * identifiers from the process identifier space. (That is, each thread has in
+ * effect a pid.) Some Linux /proc readers have come to depend on this
+ * attribute, and become confused when threads appear with proper identifiers,
+ * so we simply opt for the pre-2.6 behavior, and do not present the tasks
+ * directory at all. Similarly, when choosing between offering compatibility
+ * and remaining consistent with our broader security model, we (obviously)
+ * choose security over compatibility. In short, this is meant to be a best
+ * effort -- no more -- and as such, it should not be unified with the much
+ * more complete Linux /proc implementation found in the LX brand.
+ */
+
+#include <sys/cpupart.h>
+#include <sys/cpuvar.h>
+#include <sys/session.h>
+#include <sys/vmparam.h>
+#include <sys/mman.h>
+#include <vm/rm.h>
+#include <vm/seg_vn.h>
+#include <sys/sdt.h>
+#include <sys/strlog.h>
+#include <sys/stropts.h>
+#include <sys/cmn_err.h>
+#include <sys/x86_archext.h>
+#include <sys/archsystm.h>
+#include <sys/fp.h>
+#include <sys/pool_pset.h>
+#include <sys/pset.h>
+#include <sys/zone.h>
+#include <sys/pghw.h>
+#include <sys/vfs_opreg.h>
+
+/* Dependent on procfs */
+extern kthread_t *prchoose(proc_t *);
+
+#include "lxproc.h"
+
+extern pgcnt_t swapfs_minfree;
+extern time_t boot_time;
+
+/*
+ * Pointer to the vnode ops vector for this fs.
+ * This is instantiated in lxprinit() in lxpr_vfsops.c
+ */
+vnodeops_t *lxpr_vnodeops;
+
+static int lxpr_open(vnode_t **, int, cred_t *, caller_context_t *);
+static int lxpr_close(vnode_t *, int, int, offset_t, cred_t *,
+ caller_context_t *);
+static int lxpr_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
+static int lxpr_getattr(vnode_t *, vattr_t *, int, cred_t *,
+ caller_context_t *);
+static int lxpr_access(vnode_t *, int, int, cred_t *, caller_context_t *);
+static int lxpr_lookup(vnode_t *, char *, vnode_t **,
+ pathname_t *, int, vnode_t *, cred_t *, caller_context_t *, int *,
+ pathname_t *);
+static int lxpr_readdir(vnode_t *, uio_t *, cred_t *, int *,
+ caller_context_t *, int);
+static int lxpr_readlink(vnode_t *, uio_t *, cred_t *, caller_context_t *);
+static int lxpr_cmp(vnode_t *, vnode_t *, caller_context_t *);
+static int lxpr_realvp(vnode_t *, vnode_t **, caller_context_t *);
+static int lxpr_sync(void);
+static void lxpr_inactive(vnode_t *, cred_t *, caller_context_t *);
+
+static vnode_t *lxpr_lookup_procdir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_piddir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_not_a_dir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_fddir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_netdir(vnode_t *, char *);
+
+static int lxpr_readdir_procdir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_piddir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_not_a_dir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_fddir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_netdir(lxpr_node_t *, uio_t *, int *);
+
+static void lxpr_read_invalid(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_empty(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_cpuinfo(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_isdir(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_fd(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_kmsg(lxpr_node_t *, lxpr_uiobuf_t *, ldi_handle_t);
+static void lxpr_read_loadavg(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_meminfo(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_mounts(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_partitions(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_stat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_uptime(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_version(lxpr_node_t *, lxpr_uiobuf_t *);
+
+static void lxpr_read_pid_cmdline(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_maps(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_stat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_statm(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_status(lxpr_node_t *, lxpr_uiobuf_t *);
+
+static void lxpr_read_net_arp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_dev(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_dev_mcast(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_igmp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_ip_mr_cache(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_ip_mr_vif(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_mcfilter(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_netstat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_raw(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_route(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_rpc(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_rt_cache(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_sockstat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_snmp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_stat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_tcp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_udp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_unix(lxpr_node_t *, lxpr_uiobuf_t *);
+
+/*
+ * Simple conversion
+ */
+#define btok(x) ((x) >> 10) /* bytes to kbytes */
+#define ptok(x) ((x) << (PAGESHIFT - 10)) /* pages to kbytes */
+
+/*
+ * The lxproc vnode operations vector
+ */
+const fs_operation_def_t lxpr_vnodeops_template[] = {
+ VOPNAME_OPEN, { .vop_open = lxpr_open },
+ VOPNAME_CLOSE, { .vop_close = lxpr_close },
+ VOPNAME_READ, { .vop_read = lxpr_read },
+ VOPNAME_GETATTR, { .vop_getattr = lxpr_getattr },
+ VOPNAME_ACCESS, { .vop_access = lxpr_access },
+ VOPNAME_LOOKUP, { .vop_lookup = lxpr_lookup },
+ VOPNAME_READDIR, { .vop_readdir = lxpr_readdir },
+ VOPNAME_READLINK, { .vop_readlink = lxpr_readlink },
+ VOPNAME_FSYNC, { .error = lxpr_sync },
+ VOPNAME_SEEK, { .error = lxpr_sync },
+ VOPNAME_INACTIVE, { .vop_inactive = lxpr_inactive },
+ VOPNAME_CMP, { .vop_cmp = lxpr_cmp },
+ VOPNAME_REALVP, { .vop_realvp = lxpr_realvp },
+ NULL, NULL
+};
+
+/*
+ * file contents of an lxproc directory.
+ */
+static lxpr_dirent_t lxpr_dir[] = {
+ { LXPR_CMDLINE, "cmdline" },
+ { LXPR_CPUINFO, "cpuinfo" },
+ { LXPR_DEVICES, "devices" },
+ { LXPR_DMA, "dma" },
+ { LXPR_FILESYSTEMS, "filesystems" },
+ { LXPR_INTERRUPTS, "interrupts" },
+ { LXPR_IOPORTS, "ioports" },
+ { LXPR_KCORE, "kcore" },
+ { LXPR_KMSG, "kmsg" },
+ { LXPR_LOADAVG, "loadavg" },
+ { LXPR_MEMINFO, "meminfo" },
+ { LXPR_MOUNTS, "mounts" },
+ { LXPR_NETDIR, "net" },
+ { LXPR_PARTITIONS, "partitions" },
+ { LXPR_SELF, "self" },
+ { LXPR_STAT, "stat" },
+ { LXPR_UPTIME, "uptime" },
+ { LXPR_VERSION, "version" }
+};
+
+#define PROCDIRFILES (sizeof (lxpr_dir) / sizeof (lxpr_dir[0]))
+
+/*
+ * Contents of an /lxproc/<pid> directory.
+ */
+static lxpr_dirent_t piddir[] = {
+ { LXPR_PID_CMDLINE, "cmdline" },
+ { LXPR_PID_CPU, "cpu" },
+ { LXPR_PID_CURDIR, "cwd" },
+ { LXPR_PID_ENV, "environ" },
+ { LXPR_PID_EXE, "exe" },
+ { LXPR_PID_MAPS, "maps" },
+ { LXPR_PID_MEM, "mem" },
+ { LXPR_PID_ROOTDIR, "root" },
+ { LXPR_PID_STAT, "stat" },
+ { LXPR_PID_STATM, "statm" },
+ { LXPR_PID_STATUS, "status" },
+ { LXPR_PID_FDDIR, "fd" }
+};
+
+#define PIDDIRFILES (sizeof (piddir) / sizeof (piddir[0]))
+
+/*
+ * contents of /lxproc/net directory
+ */
+static lxpr_dirent_t netdir[] = {
+ { LXPR_NET_ARP, "arp" },
+ { LXPR_NET_DEV, "dev" },
+ { LXPR_NET_DEV_MCAST, "dev_mcast" },
+ { LXPR_NET_IGMP, "igmp" },
+ { LXPR_NET_IP_MR_CACHE, "ip_mr_cache" },
+ { LXPR_NET_IP_MR_VIF, "ip_mr_vif" },
+ { LXPR_NET_MCFILTER, "mcfilter" },
+ { LXPR_NET_NETSTAT, "netstat" },
+ { LXPR_NET_RAW, "raw" },
+ { LXPR_NET_ROUTE, "route" },
+ { LXPR_NET_RPC, "rpc" },
+ { LXPR_NET_RT_CACHE, "rt_cache" },
+ { LXPR_NET_SOCKSTAT, "sockstat" },
+ { LXPR_NET_SNMP, "snmp" },
+ { LXPR_NET_STAT, "stat" },
+ { LXPR_NET_TCP, "tcp" },
+ { LXPR_NET_UDP, "udp" },
+ { LXPR_NET_UNIX, "unix" }
+};
+
+#define NETDIRFILES (sizeof (netdir) / sizeof (netdir[0]))
+
+/*
+ * These are the major signal number differences between Linux and native:
+ *
+ * ====================================
+ * | Number | Linux | Native |
+ * | ====== | ========= | ========== |
+ * | 7 | SIGBUS | SIGEMT |
+ * | 10 | SIGUSR1 | SIGBUS |
+ * | 12 | SIGUSR2 | SIGSYS |
+ * | 16 | SIGSTKFLT | SIGUSR1 |
+ * | 17 | SIGCHLD | SIGUSR2 |
+ * | 18 | SIGCONT | SIGCHLD |
+ * | 19 | SIGSTOP | SIGPWR |
+ * | 20 | SIGTSTP | SIGWINCH |
+ * | 21 | SIGTTIN | SIGURG |
+ * | 22 | SIGTTOU | SIGPOLL |
+ * | 23 | SIGURG | SIGSTOP |
+ * | 24 | SIGXCPU | SIGTSTP |
+ * | 25 | SIGXFSZ | SIGCONT |
+ * | 26 | SIGVTALARM | SIGTTIN |
+ * | 27 | SIGPROF | SIGTTOU |
+ * | 28 | SIGWINCH | SIGVTALARM |
+ * | 29 | SIGPOLL | SIGPROF |
+ * | 30 | SIGPWR | SIGXCPU |
+ * | 31 | SIGSYS | SIGXFSZ |
+ * ====================================
+ *
+ * Not every Linux signal maps to a native signal, nor does every native
+ * signal map to a Linux counterpart. However, when signals do map, the
+ * mapping is unique.
+ */
+static int
+lxpr_sigmap[NSIG] = {
+ 0,
+ LX_SIGHUP,
+ LX_SIGINT,
+ LX_SIGQUIT,
+ LX_SIGILL,
+ LX_SIGTRAP,
+ LX_SIGABRT,
+ LX_SIGSTKFLT,
+ LX_SIGFPE,
+ LX_SIGKILL,
+ LX_SIGBUS,
+ LX_SIGSEGV,
+ LX_SIGSYS,
+ LX_SIGPIPE,
+ LX_SIGALRM,
+ LX_SIGTERM,
+ LX_SIGUSR1,
+ LX_SIGUSR2,
+ LX_SIGCHLD,
+ LX_SIGPWR,
+ LX_SIGWINCH,
+ LX_SIGURG,
+ LX_SIGPOLL,
+ LX_SIGSTOP,
+ LX_SIGTSTP,
+ LX_SIGCONT,
+ LX_SIGTTIN,
+ LX_SIGTTOU,
+ LX_SIGVTALRM,
+ LX_SIGPROF,
+ LX_SIGXCPU,
+ LX_SIGXFSZ,
+ -1, /* 32: illumos SIGWAITING */
+ -1, /* 33: illumos SIGLWP */
+ -1, /* 34: illumos SIGFREEZE */
+ -1, /* 35: illumos SIGTHAW */
+ -1, /* 36: illumos SIGCANCEL */
+ -1, /* 37: illumos SIGLOST */
+ -1, /* 38: illumos SIGXRES */
+ -1, /* 39: illumos SIGJVM1 */
+ -1, /* 40: illumos SIGJVM2 */
+ -1, /* 41: illumos SIGINFO */
+ LX_SIGRTMIN, /* 42: illumos _SIGRTMIN */
+ LX_SIGRTMIN + 1,
+ LX_SIGRTMIN + 2,
+ LX_SIGRTMIN + 3,
+ LX_SIGRTMIN + 4,
+ LX_SIGRTMIN + 5,
+ LX_SIGRTMIN + 6,
+ LX_SIGRTMIN + 7,
+ LX_SIGRTMIN + 8,
+ LX_SIGRTMIN + 9,
+ LX_SIGRTMIN + 10,
+ LX_SIGRTMIN + 11,
+ LX_SIGRTMIN + 12,
+ LX_SIGRTMIN + 13,
+ LX_SIGRTMIN + 14,
+ LX_SIGRTMIN + 15,
+ LX_SIGRTMIN + 16,
+ LX_SIGRTMIN + 17,
+ LX_SIGRTMIN + 18,
+ LX_SIGRTMIN + 19,
+ LX_SIGRTMIN + 20,
+ LX_SIGRTMIN + 21,
+ LX_SIGRTMIN + 22,
+ LX_SIGRTMIN + 23,
+ LX_SIGRTMIN + 24,
+ LX_SIGRTMIN + 25,
+ LX_SIGRTMIN + 26,
+ LX_SIGRTMIN + 27,
+ LX_SIGRTMIN + 28,
+ LX_SIGRTMIN + 29,
+ LX_SIGRTMIN + 30,
+ LX_SIGRTMAX
+};
+
+/*
+ * lxpr_open(): Vnode operation for VOP_OPEN()
+ */
+static int
+lxpr_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
+{
+ vnode_t *vp = *vpp;
+ lxpr_node_t *lxpnp = VTOLXP(vp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ vnode_t *rvp;
+ int error = 0;
+
+ /*
+ * We only allow reading in this file systrem
+ */
+ if (flag & FWRITE)
+ return (EROFS);
+
+ /*
+ * If we are opening an underlying file only allow regular files
+ * reject the open for anything but a regular file.
+ * Just do it if we are opening the current or root directory.
+ */
+ if (lxpnp->lxpr_realvp != NULL) {
+ rvp = lxpnp->lxpr_realvp;
+
+ if (type == LXPR_PID_FD_FD && rvp->v_type != VREG)
+ error = EACCES;
+ else {
+ /*
+ * Need to hold rvp since VOP_OPEN() may release it.
+ */
+ VN_HOLD(rvp);
+ error = VOP_OPEN(&rvp, flag, cr, ct);
+ if (error) {
+ VN_RELE(rvp);
+ } else {
+ *vpp = rvp;
+ VN_RELE(vp);
+ }
+ }
+ }
+
+ return (error);
+}
+
+
+/*
+ * lxpr_close(): Vnode operation for VOP_CLOSE()
+ */
+/* ARGSUSED */
+static int
+lxpr_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
+ caller_context_t *ct)
+{
+ lxpr_node_t *lxpr = VTOLXP(vp);
+ lxpr_nodetype_t type = lxpr->lxpr_type;
+
+ /*
+ * we should never get here because the close is done on the realvp
+ * for these nodes
+ */
+ ASSERT(type != LXPR_PID_FD_FD &&
+ type != LXPR_PID_CURDIR &&
+ type != LXPR_PID_ROOTDIR &&
+ type != LXPR_PID_EXE);
+
+ return (0);
+}
+
+static void (*lxpr_read_function[LXPR_NFILES])() = {
+ lxpr_read_isdir, /* /proc */
+ lxpr_read_isdir, /* /proc/<pid> */
+ lxpr_read_pid_cmdline, /* /proc/<pid>/cmdline */
+ lxpr_read_empty, /* /proc/<pid>/cpu */
+ lxpr_read_invalid, /* /proc/<pid>/cwd */
+ lxpr_read_empty, /* /proc/<pid>/environ */
+ lxpr_read_invalid, /* /proc/<pid>/exe */
+ lxpr_read_pid_maps, /* /proc/<pid>/maps */
+ lxpr_read_empty, /* /proc/<pid>/mem */
+ lxpr_read_invalid, /* /proc/<pid>/root */
+ lxpr_read_pid_stat, /* /proc/<pid>/stat */
+ lxpr_read_pid_statm, /* /proc/<pid>/statm */
+ lxpr_read_pid_status, /* /proc/<pid>/status */
+ lxpr_read_isdir, /* /proc/<pid>/fd */
+ lxpr_read_fd, /* /proc/<pid>/fd/nn */
+ lxpr_read_empty, /* /proc/cmdline */
+ lxpr_read_cpuinfo, /* /proc/cpuinfo */
+ lxpr_read_empty, /* /proc/devices */
+ lxpr_read_empty, /* /proc/dma */
+ lxpr_read_empty, /* /proc/filesystems */
+ lxpr_read_empty, /* /proc/interrupts */
+ lxpr_read_empty, /* /proc/ioports */
+ lxpr_read_empty, /* /proc/kcore */
+ lxpr_read_invalid, /* /proc/kmsg -- see lxpr_read() */
+ lxpr_read_loadavg, /* /proc/loadavg */
+ lxpr_read_meminfo, /* /proc/meminfo */
+ lxpr_read_mounts, /* /proc/mounts */
+ lxpr_read_isdir, /* /proc/net */
+ lxpr_read_net_arp, /* /proc/net/arp */
+ lxpr_read_net_dev, /* /proc/net/dev */
+ lxpr_read_net_dev_mcast, /* /proc/net/dev_mcast */
+ lxpr_read_net_igmp, /* /proc/net/igmp */
+ lxpr_read_net_ip_mr_cache, /* /proc/net/ip_mr_cache */
+ lxpr_read_net_ip_mr_vif, /* /proc/net/ip_mr_vif */
+ lxpr_read_net_mcfilter, /* /proc/net/mcfilter */
+ lxpr_read_net_netstat, /* /proc/net/netstat */
+ lxpr_read_net_raw, /* /proc/net/raw */
+ lxpr_read_net_route, /* /proc/net/route */
+ lxpr_read_net_rpc, /* /proc/net/rpc */
+ lxpr_read_net_rt_cache, /* /proc/net/rt_cache */
+ lxpr_read_net_sockstat, /* /proc/net/sockstat */
+ lxpr_read_net_snmp, /* /proc/net/snmp */
+ lxpr_read_net_stat, /* /proc/net/stat */
+ lxpr_read_net_tcp, /* /proc/net/tcp */
+ lxpr_read_net_udp, /* /proc/net/udp */
+ lxpr_read_net_unix, /* /proc/net/unix */
+ lxpr_read_partitions, /* /proc/partitions */
+ lxpr_read_invalid, /* /proc/self */
+ lxpr_read_stat, /* /proc/stat */
+ lxpr_read_uptime, /* /proc/uptime */
+ lxpr_read_version, /* /proc/version */
+};
+
+/*
+ * Array of lookup functions, indexed by /lxproc file type.
+ */
+static vnode_t *(*lxpr_lookup_function[LXPR_NFILES])() = {
+ lxpr_lookup_procdir, /* /proc */
+ lxpr_lookup_piddir, /* /proc/<pid> */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/cmdline */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/cpu */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/cwd */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/environ */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/exe */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/maps */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/mem */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/root */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/stat */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/statm */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/status */
+ lxpr_lookup_fddir, /* /proc/<pid>/fd */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/fd/nn */
+ lxpr_lookup_not_a_dir, /* /proc/cmdline */
+ lxpr_lookup_not_a_dir, /* /proc/cpuinfo */
+ lxpr_lookup_not_a_dir, /* /proc/devices */
+ lxpr_lookup_not_a_dir, /* /proc/dma */
+ lxpr_lookup_not_a_dir, /* /proc/filesystems */
+ lxpr_lookup_not_a_dir, /* /proc/interrupts */
+ lxpr_lookup_not_a_dir, /* /proc/ioports */
+ lxpr_lookup_not_a_dir, /* /proc/kcore */
+ lxpr_lookup_not_a_dir, /* /proc/kmsg */
+ lxpr_lookup_not_a_dir, /* /proc/loadavg */
+ lxpr_lookup_not_a_dir, /* /proc/meminfo */
+ lxpr_lookup_not_a_dir, /* /proc/mounts */
+ lxpr_lookup_netdir, /* /proc/net */
+ lxpr_lookup_not_a_dir, /* /proc/net/arp */
+ lxpr_lookup_not_a_dir, /* /proc/net/dev */
+ lxpr_lookup_not_a_dir, /* /proc/net/dev_mcast */
+ lxpr_lookup_not_a_dir, /* /proc/net/igmp */
+ lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_cache */
+ lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_vif */
+ lxpr_lookup_not_a_dir, /* /proc/net/mcfilter */
+ lxpr_lookup_not_a_dir, /* /proc/net/netstat */
+ lxpr_lookup_not_a_dir, /* /proc/net/raw */
+ lxpr_lookup_not_a_dir, /* /proc/net/route */
+ lxpr_lookup_not_a_dir, /* /proc/net/rpc */
+ lxpr_lookup_not_a_dir, /* /proc/net/rt_cache */
+ lxpr_lookup_not_a_dir, /* /proc/net/sockstat */
+ lxpr_lookup_not_a_dir, /* /proc/net/snmp */
+ lxpr_lookup_not_a_dir, /* /proc/net/stat */
+ lxpr_lookup_not_a_dir, /* /proc/net/tcp */
+ lxpr_lookup_not_a_dir, /* /proc/net/udp */
+ lxpr_lookup_not_a_dir, /* /proc/net/unix */
+ lxpr_lookup_not_a_dir, /* /proc/partitions */
+ lxpr_lookup_not_a_dir, /* /proc/self */
+ lxpr_lookup_not_a_dir, /* /proc/stat */
+ lxpr_lookup_not_a_dir, /* /proc/uptime */
+ lxpr_lookup_not_a_dir, /* /proc/version */
+};
+
+/*
+ * Array of readdir functions, indexed by /proc file type.
+ */
+static int (*lxpr_readdir_function[LXPR_NFILES])() = {
+ lxpr_readdir_procdir, /* /proc */
+ lxpr_readdir_piddir, /* /proc/<pid> */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/cmdline */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/cpu */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/cwd */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/environ */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/exe */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/maps */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/mem */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/root */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/stat */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/statm */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/status */
+ lxpr_readdir_fddir, /* /proc/<pid>/fd */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/fd/nn */
+ lxpr_readdir_not_a_dir, /* /proc/cmdline */
+ lxpr_readdir_not_a_dir, /* /proc/cpuinfo */
+ lxpr_readdir_not_a_dir, /* /proc/devices */
+ lxpr_readdir_not_a_dir, /* /proc/dma */
+ lxpr_readdir_not_a_dir, /* /proc/filesystems */
+ lxpr_readdir_not_a_dir, /* /proc/interrupts */
+ lxpr_readdir_not_a_dir, /* /proc/ioports */
+ lxpr_readdir_not_a_dir, /* /proc/kcore */
+ lxpr_readdir_not_a_dir, /* /proc/kmsg */
+ lxpr_readdir_not_a_dir, /* /proc/loadavg */
+ lxpr_readdir_not_a_dir, /* /proc/meminfo */
+ lxpr_readdir_not_a_dir, /* /proc/mounts */
+ lxpr_readdir_netdir, /* /proc/net */
+ lxpr_readdir_not_a_dir, /* /proc/net/arp */
+ lxpr_readdir_not_a_dir, /* /proc/net/dev */
+ lxpr_readdir_not_a_dir, /* /proc/net/dev_mcast */
+ lxpr_readdir_not_a_dir, /* /proc/net/igmp */
+ lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_cache */
+ lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_vif */
+ lxpr_readdir_not_a_dir, /* /proc/net/mcfilter */
+ lxpr_readdir_not_a_dir, /* /proc/net/netstat */
+ lxpr_readdir_not_a_dir, /* /proc/net/raw */
+ lxpr_readdir_not_a_dir, /* /proc/net/route */
+ lxpr_readdir_not_a_dir, /* /proc/net/rpc */
+ lxpr_readdir_not_a_dir, /* /proc/net/rt_cache */
+ lxpr_readdir_not_a_dir, /* /proc/net/sockstat */
+ lxpr_readdir_not_a_dir, /* /proc/net/snmp */
+ lxpr_readdir_not_a_dir, /* /proc/net/stat */
+ lxpr_readdir_not_a_dir, /* /proc/net/tcp */
+ lxpr_readdir_not_a_dir, /* /proc/net/udp */
+ lxpr_readdir_not_a_dir, /* /proc/net/unix */
+ lxpr_readdir_not_a_dir, /* /proc/partitions */
+ lxpr_readdir_not_a_dir, /* /proc/self */
+ lxpr_readdir_not_a_dir, /* /proc/stat */
+ lxpr_readdir_not_a_dir, /* /proc/uptime */
+ lxpr_readdir_not_a_dir, /* /proc/version */
+};
+
+
+/*
+ * lxpr_read(): Vnode operation for VOP_READ()
+ *
+ * As the format of all the files that can be read in lxproc is human readable
+ * and not binary structures there do not have to be different read variants
+ * depending on whether the reading process model is 32- or 64-bit.
+ */
+/* ARGSUSED */
+static int
+lxpr_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
+ caller_context_t *ct)
+{
+ lxpr_node_t *lxpnp = VTOLXP(vp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ lxpr_uiobuf_t *uiobuf = lxpr_uiobuf_new(uiop);
+ int error;
+
+ ASSERT(type < LXPR_NFILES);
+
+ if (type == LXPR_KMSG) {
+ ldi_ident_t li = VTOLXPM(vp)->lxprm_li;
+ ldi_handle_t ldih;
+ struct strioctl str;
+ int rv;
+
+ /*
+ * Open the zone's console device using the layered driver
+ * interface.
+ */
+ if ((error =
+ ldi_open_by_name("/dev/log", FREAD, cr, &ldih, li)) != 0)
+ return (error);
+
+ /*
+ * Send an ioctl to the underlying console device, letting it
+ * know we're interested in getting console messages.
+ */
+ str.ic_cmd = I_CONSLOG;
+ str.ic_timout = 0;
+ str.ic_len = 0;
+ str.ic_dp = NULL;
+ if ((error = ldi_ioctl(ldih, I_STR,
+ (intptr_t)&str, FKIOCTL, cr, &rv)) != 0)
+ return (error);
+
+ lxpr_read_kmsg(lxpnp, uiobuf, ldih);
+
+ if ((error = ldi_close(ldih, FREAD, cr)) != 0)
+ return (error);
+ } else {
+ lxpr_read_function[type](lxpnp, uiobuf);
+ }
+
+ error = lxpr_uiobuf_flush(uiobuf);
+ lxpr_uiobuf_free(uiobuf);
+
+ return (error);
+}
+
+/*
+ * lxpr_read_invalid(), lxpr_read_isdir(), lxpr_read_empty()
+ *
+ * Various special case reads:
+ * - trying to read a directory
+ * - invalid file (used to mean a file that should be implemented,
+ * but isn't yet)
+ * - empty file
+ * - wait to be able to read a file that will never have anything to read
+ */
+/* ARGSUSED */
+static void
+lxpr_read_isdir(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_uiobuf_seterr(uiobuf, EISDIR);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_invalid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_empty(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/*
+ * lxpr_read_pid_cmdline():
+ *
+ * This is not precisely compatible with Linux: the Linux cmdline returns argv
+ * with the correct separation using \0 between the arguments, but we cannot do
+ * that without copying the real argv from the correct process context. This
+ * is too difficult to attempt so we pretend that the entire cmdline is just
+ * argv[0]. This is good enough for ps and htop to display correctly, but might
+ * cause some other things not to work correctly.
+ */
+static void
+lxpr_read_pid_cmdline(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ char *buf;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_CMDLINE);
+
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ buf = PTOU(p)->u_argv != 0 ? PTOU(p)->u_psargs : PTOU(p)->u_comm;
+
+ lxpr_uiobuf_write(uiobuf, buf, strlen(buf) + 1);
+ lxpr_unlock(p);
+}
+
+/*
+ * lxpr_read_pid_maps(): memory map file
+ */
+static void
+lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ struct as *as;
+ struct seg *seg;
+ char *buf;
+ int buflen = MAXPATHLEN;
+ struct print_data {
+ caddr_t saddr;
+ caddr_t eaddr;
+ int type;
+ char prot[5];
+ uint32_t offset;
+ vnode_t *vp;
+ struct print_data *next;
+ } *print_head = NULL;
+ struct print_data **print_tail = &print_head;
+ struct print_data *pbuf;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_MAPS);
+
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ as = p->p_as;
+
+ if (as == &kas) {
+ lxpr_unlock(p);
+ return;
+ }
+
+ mutex_exit(&p->p_lock);
+
+ /* Iterate over all segments in the address space */
+ AS_LOCK_ENTER(as, RW_READER);
+ for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
+ vnode_t *vp;
+ uint_t protbits;
+
+ pbuf = kmem_alloc(sizeof (*pbuf), KM_SLEEP);
+
+ pbuf->saddr = seg->s_base;
+ pbuf->eaddr = seg->s_base+seg->s_size;
+ pbuf->type = SEGOP_GETTYPE(seg, seg->s_base);
+
+ /*
+ * Cheat and only use the protection bits of the first page
+ * in the segment
+ */
+ (void) strncpy(pbuf->prot, "----", sizeof (pbuf->prot));
+ (void) SEGOP_GETPROT(seg, seg->s_base, 0, &protbits);
+
+ if (protbits & PROT_READ) pbuf->prot[0] = 'r';
+ if (protbits & PROT_WRITE) pbuf->prot[1] = 'w';
+ if (protbits & PROT_EXEC) pbuf->prot[2] = 'x';
+ if (pbuf->type & MAP_SHARED) pbuf->prot[3] = 's';
+ else if (pbuf->type & MAP_PRIVATE) pbuf->prot[3] = 'p';
+
+ if (seg->s_ops == &segvn_ops &&
+ SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
+ vp != NULL && vp->v_type == VREG) {
+ VN_HOLD(vp);
+ pbuf->vp = vp;
+ } else {
+ pbuf->vp = NULL;
+ }
+
+ pbuf->offset = (uint32_t)SEGOP_GETOFFSET(seg, pbuf->saddr);
+
+ pbuf->next = NULL;
+ *print_tail = pbuf;
+ print_tail = &pbuf->next;
+ }
+ AS_LOCK_EXIT(as);
+ mutex_enter(&p->p_lock);
+ lxpr_unlock(p);
+
+ buf = kmem_alloc(buflen, KM_SLEEP);
+
+ /* print the data we've extracted */
+ pbuf = print_head;
+ while (pbuf != NULL) {
+ struct print_data *pbuf_next;
+ vattr_t vattr;
+
+ int maj = 0;
+ int min = 0;
+ u_longlong_t inode = 0;
+
+ *buf = '\0';
+ if (pbuf->vp != NULL) {
+ vattr.va_mask = AT_FSID | AT_NODEID;
+ if (VOP_GETATTR(pbuf->vp, &vattr, 0, CRED(),
+ NULL) == 0) {
+ maj = getmajor(vattr.va_fsid);
+ min = getminor(vattr.va_fsid);
+ inode = vattr.va_nodeid;
+ }
+ (void) vnodetopath(NULL, pbuf->vp, buf, buflen, CRED());
+ VN_RELE(pbuf->vp);
+ }
+
+ if (*buf != '\0') {
+ lxpr_uiobuf_printf(uiobuf,
+ "%08x-%08x %s %08x %02d:%03d %lld %s\n",
+ pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset,
+ maj, min, inode, buf);
+ } else {
+ lxpr_uiobuf_printf(uiobuf,
+ "%08x-%08x %s %08x %02d:%03d %lld\n",
+ pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset,
+ maj, min, inode);
+ }
+
+ pbuf_next = pbuf->next;
+ kmem_free(pbuf, sizeof (*pbuf));
+ pbuf = pbuf_next;
+ }
+
+ kmem_free(buf, buflen);
+}
+
+/*
+ * lxpr_read_pid_statm(): memory status file
+ */
+static void
+lxpr_read_pid_statm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ struct as *as;
+ size_t vsize;
+ size_t rss;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_STATM);
+
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ as = p->p_as;
+
+ mutex_exit(&p->p_lock);
+
+ AS_LOCK_ENTER(as, RW_READER);
+ vsize = btopr(as->a_resvsize);
+ rss = rm_asrss(as);
+ AS_LOCK_EXIT(as);
+
+ mutex_enter(&p->p_lock);
+ lxpr_unlock(p);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%lu %lu %lu %lu %lu %lu %lu\n",
+ vsize, rss, 0l, rss, 0l, 0l, 0l);
+}
+
+/*
+ * lxpr_read_pid_status(): status file
+ */
+static void
+lxpr_read_pid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ kthread_t *t;
+ user_t *up;
+ cred_t *cr;
+ const gid_t *groups;
+ int ngroups;
+ struct as *as;
+ char *status;
+ pid_t pid, ppid;
+ size_t vsize;
+ size_t rss;
+ k_sigset_t current, ignore, handle;
+ int i, lx_sig;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_STATUS);
+
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ pid = p->p_pid;
+
+ /*
+ * Convert pid to the Linux default of 1 if we're the zone's init
+ * process
+ */
+ if (pid == curproc->p_zone->zone_proc_initpid) {
+ pid = 1;
+ ppid = 0; /* parent pid for init is 0 */
+ } else {
+ /*
+ * Make sure not to reference parent PIDs that reside outside
+ * the zone
+ */
+ ppid = ((p->p_flag & SZONETOP)
+ ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
+
+ /*
+ * Convert ppid to the Linux default of 1 if our parent is the
+ * zone's init process
+ */
+ if (ppid == curproc->p_zone->zone_proc_initpid)
+ ppid = 1;
+ }
+
+ t = prchoose(p);
+ if (t != NULL) {
+ switch (t->t_state) {
+ case TS_SLEEP:
+ status = "S (sleeping)";
+ break;
+ case TS_RUN:
+ case TS_ONPROC:
+ status = "R (running)";
+ break;
+ case TS_ZOMB:
+ status = "Z (zombie)";
+ break;
+ case TS_STOPPED:
+ status = "T (stopped)";
+ break;
+ default:
+ status = "! (unknown)";
+ break;
+ }
+ thread_unlock(t);
+ } else {
+ /*
+ * there is a hole in the exit code, where a proc can have
+ * no threads but it is yet to be flagged SZOMB. We will
+ * assume we are about to become a zombie
+ */
+ status = "Z (zombie)";
+ }
+
+ up = PTOU(p);
+ mutex_enter(&p->p_crlock);
+ crhold(cr = p->p_cred);
+ mutex_exit(&p->p_crlock);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "Name:\t%s\n"
+ "State:\t%s\n"
+ "Tgid:\t%d\n"
+ "Pid:\t%d\n"
+ "PPid:\t%d\n"
+ "TracerPid:\t%d\n"
+ "Uid:\t%u\t%u\t%u\t%u\n"
+ "Gid:\t%u\t%u\t%u\t%u\n"
+ "FDSize:\t%d\n"
+ "Groups:\t",
+ up->u_comm,
+ status,
+ pid, /* thread group id - same as pid */
+ pid,
+ ppid,
+ 0,
+ crgetruid(cr), crgetuid(cr), crgetsuid(cr), crgetuid(cr),
+ crgetrgid(cr), crgetgid(cr), crgetsgid(cr), crgetgid(cr),
+ p->p_fno_ctl);
+
+ ngroups = crgetngroups(cr);
+ groups = crgetgroups(cr);
+ for (i = 0; i < ngroups; i++) {
+ lxpr_uiobuf_printf(uiobuf,
+ "%u ",
+ groups[i]);
+ }
+ crfree(cr);
+
+ as = p->p_as;
+ if ((p->p_stat != SZOMB) && !(p->p_flag & SSYS) && (as != &kas)) {
+ mutex_exit(&p->p_lock);
+ AS_LOCK_ENTER(as, RW_READER);
+ vsize = as->a_resvsize;
+ rss = rm_asrss(as);
+ AS_LOCK_EXIT(as);
+ mutex_enter(&p->p_lock);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "\n"
+ "VmSize:\t%8lu kB\n"
+ "VmLck:\t%8lu kB\n"
+ "VmRSS:\t%8lu kB\n"
+ "VmData:\t%8lu kB\n"
+ "VmStk:\t%8lu kB\n"
+ "VmExe:\t%8lu kB\n"
+ "VmLib:\t%8lu kB",
+ btok(vsize),
+ 0l,
+ ptok(rss),
+ 0l,
+ btok(p->p_stksize),
+ ptok(rss),
+ 0l);
+ }
+
+ sigemptyset(&current);
+ sigemptyset(&ignore);
+ sigemptyset(&handle);
+
+ for (i = 1; i < NSIG; i++) {
+ lx_sig = lxpr_sigmap[i];
+
+ if ((lx_sig > 0) && (lx_sig <= LX_NSIG)) {
+ if (sigismember(&p->p_sig, i))
+ sigaddset(&current, lx_sig);
+
+ if (up->u_signal[i - 1] == SIG_IGN)
+ sigaddset(&ignore, lx_sig);
+ else if (up->u_signal[i - 1] != SIG_DFL)
+ sigaddset(&handle, lx_sig);
+ }
+ }
+
+ lxpr_uiobuf_printf(uiobuf,
+ "\n"
+ "SigPnd:\t%08x%08x\n"
+ "SigBlk:\t%08x%08x\n"
+ "SigIgn:\t%08x%08x\n"
+ "SigCgt:\t%08x%08x\n"
+ "CapInh:\t%016x\n"
+ "CapPrm:\t%016x\n"
+ "CapEff:\t%016x\n",
+ current.__sigbits[1], current.__sigbits[0],
+ 0, 0, /* signals blocked on per thread basis */
+ ignore.__sigbits[1], ignore.__sigbits[0],
+ handle.__sigbits[1], handle.__sigbits[0],
+ /* Can't do anything with linux capabilities */
+ 0,
+ 0,
+ 0);
+
+ lxpr_unlock(p);
+}
+
+
+/*
+ * lxpr_read_pid_stat(): pid stat file
+ */
+static void
+lxpr_read_pid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ kthread_t *t;
+ struct as *as;
+ char stat;
+ pid_t pid, ppid, pgpid, spid;
+ gid_t psgid;
+ dev_t psdev;
+ size_t rss, vsize;
+ int nice, pri;
+ caddr_t wchan;
+ processorid_t cpu;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_STAT);
+
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ pid = p->p_pid;
+
+ /*
+ * Set Linux defaults if we're the zone's init process
+ */
+ if (pid == curproc->p_zone->zone_proc_initpid) {
+ pid = 1; /* PID for init */
+ ppid = 0; /* parent PID for init is 0 */
+ pgpid = 0; /* process group for init is 0 */
+ psgid = (gid_t)-1; /* credential GID for init is -1 */
+ spid = 0; /* session id for init is 0 */
+ psdev = 0; /* session device for init is 0 */
+ } else {
+ /*
+ * Make sure not to reference parent PIDs that reside outside
+ * the zone
+ */
+ ppid = ((p->p_flag & SZONETOP) ?
+ curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
+
+ /*
+ * Convert ppid to the Linux default of 1 if our parent is the
+ * zone's init process
+ */
+ if (ppid == curproc->p_zone->zone_proc_initpid)
+ ppid = 1;
+
+ pgpid = p->p_pgrp;
+
+ mutex_enter(&p->p_splock);
+ mutex_enter(&p->p_sessp->s_lock);
+ spid = p->p_sessp->s_sid;
+ psdev = p->p_sessp->s_dev;
+ if (p->p_sessp->s_cred)
+ psgid = crgetgid(p->p_sessp->s_cred);
+ else
+ psgid = crgetgid(p->p_cred);
+
+ mutex_exit(&p->p_sessp->s_lock);
+ mutex_exit(&p->p_splock);
+ }
+
+ t = prchoose(p);
+ if (t != NULL) {
+ switch (t->t_state) {
+ case TS_SLEEP:
+ stat = 'S'; break;
+ case TS_RUN:
+ case TS_ONPROC:
+ stat = 'R'; break;
+ case TS_ZOMB:
+ stat = 'Z'; break;
+ case TS_STOPPED:
+ stat = 'T'; break;
+ default:
+ stat = '!'; break;
+ }
+
+ if (CL_DONICE(t, NULL, 0, &nice) != 0)
+ nice = 0;
+
+ pri = t->t_pri;
+ wchan = t->t_wchan;
+ cpu = t->t_cpu->cpu_id;
+ thread_unlock(t);
+ } else {
+ /* Only zombies have no threads */
+ stat = 'Z';
+ nice = 0;
+ pri = 0;
+ wchan = 0;
+ cpu = 0;
+ }
+ as = p->p_as;
+ mutex_exit(&p->p_lock);
+ AS_LOCK_ENTER(as, RW_READER);
+ vsize = as->a_resvsize;
+ rss = rm_asrss(as);
+ AS_LOCK_EXIT(as);
+ mutex_enter(&p->p_lock);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%d (%s) %c %d %d %d %d %d "
+ "%lu %lu %lu %lu %lu "
+ "%lu %lu %ld %ld "
+ "%d %d %d "
+ "%lu "
+ "%lu "
+ "%lu %ld %llu "
+ "%lu %lu %u "
+ "%lu %lu "
+ "%lu %lu %lu %lu "
+ "%lu "
+ "%lu %lu "
+ "%d "
+ "%d"
+ "\n",
+ pid, PTOU(p)->u_comm, stat, ppid, pgpid, spid, psdev, psgid,
+ 0l, 0l, 0l, 0l, 0l, /* flags, minflt, cminflt, majflt, cmajflt */
+ p->p_utime, p->p_stime, p->p_cutime, p->p_cstime,
+ pri, nice, p->p_lwpcnt,
+ 0l, /* itrealvalue (time before next SIGALRM) */
+ PTOU(p)->u_ticks,
+ vsize, rss, p->p_vmem_ctl,
+ 0l, 0l, USRSTACK, /* startcode, endcode, startstack */
+ 0l, 0l, /* kstkesp, kstkeip */
+ 0l, 0l, 0l, 0l, /* signal, blocked, sigignore, sigcatch */
+ wchan,
+ 0l, 0l, /* nswap, cnswap */
+ 0, /* exit_signal */
+ cpu);
+
+ lxpr_unlock(p);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_arp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_dev(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_uiobuf_printf(uiobuf, "Inter-| Receive "
+ " | Transmit\n");
+ lxpr_uiobuf_printf(uiobuf, " face |bytes packets errs drop fifo"
+ " frame compressed multicast|bytes packets errs drop fifo"
+ " colls carrier compressed\n");
+
+ /*
+ * Data about each interface should go here, but that shouldn't be added
+ * unless there is an lxproc reader that actually makes use of it (and
+ * doesn't need anything else that we refuse to provide)...
+ */
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_dev_mcast(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_igmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_ip_mr_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_ip_mr_vif(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_mcfilter(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_netstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_raw(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_rpc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_rt_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_sockstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_snmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_tcp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_udp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_unix(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/*
+ * lxpr_read_kmsg(): read the contents of the kernel message queue. We
+ * translate this into the reception of console messages for this zone; each
+ * read copies out a single zone console message, or blocks until the next one
+ * is produced.
+ */
+
+#define LX_KMSG_PRI "<0>"
+
+static void
+lxpr_read_kmsg(lxpr_node_t *lxpnp, struct lxpr_uiobuf *uiobuf, ldi_handle_t lh)
+{
+ mblk_t *mp;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_KMSG);
+
+ if (ldi_getmsg(lh, &mp, NULL) == 0) {
+ /*
+ * lxproc doesn't like successive reads to the same file
+ * descriptor unless we do an explicit rewind each time.
+ */
+ lxpr_uiobuf_seek(uiobuf, 0);
+
+ lxpr_uiobuf_printf(uiobuf, "%s%s", LX_KMSG_PRI,
+ mp->b_cont->b_rptr);
+
+ freemsg(mp);
+ }
+}
+
+/*
+ * lxpr_read_loadavg(): read the contents of the "loadavg" file. We do just
+ * enough for uptime and other simple lxproc readers to work
+ */
+extern int nthread;
+
+static void
+lxpr_read_loadavg(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ulong_t avenrun1;
+ ulong_t avenrun5;
+ ulong_t avenrun15;
+ ulong_t avenrun1_cs;
+ ulong_t avenrun5_cs;
+ ulong_t avenrun15_cs;
+ int loadavg[3];
+ int *loadbuf;
+ cpupart_t *cp;
+ zone_t *zone = LXPTOZ(lxpnp);
+
+ uint_t nrunnable = 0;
+ rctl_qty_t nlwps;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_LOADAVG);
+
+ mutex_enter(&cpu_lock);
+
+ /*
+ * Need to add up values over all CPU partitions. If pools are active,
+ * only report the values of the zone's partition, which by definition
+ * includes the current CPU.
+ */
+ if (pool_pset_enabled()) {
+ psetid_t psetid = zone_pset_get(curproc->p_zone);
+
+ ASSERT(curproc->p_zone != &zone0);
+ cp = CPU->cpu_part;
+
+ nrunnable = cp->cp_nrunning + cp->cp_nrunnable;
+ (void) cpupart_get_loadavg(psetid, &loadavg[0], 3);
+ loadbuf = &loadavg[0];
+ } else {
+ cp = cp_list_head;
+ do {
+ nrunnable += cp->cp_nrunning + cp->cp_nrunnable;
+ } while ((cp = cp->cp_next) != cp_list_head);
+
+ loadbuf = zone == global_zone ?
+ &avenrun[0] : zone->zone_avenrun;
+ }
+
+ /*
+ * If we're in the non-global zone, we'll report the total number of
+ * LWPs in the zone for the "nproc" parameter of /proc/loadavg,
+ * otherwise will just use nthread (which will include kernel threads,
+ * but should be good enough for lxproc).
+ */
+ nlwps = zone == global_zone ? nthread : zone->zone_nlwps;
+
+ mutex_exit(&cpu_lock);
+
+ avenrun1 = loadbuf[0] >> FSHIFT;
+ avenrun1_cs = ((loadbuf[0] & (FSCALE-1)) * 100) >> FSHIFT;
+ avenrun5 = loadbuf[1] >> FSHIFT;
+ avenrun5_cs = ((loadbuf[1] & (FSCALE-1)) * 100) >> FSHIFT;
+ avenrun15 = loadbuf[2] >> FSHIFT;
+ avenrun15_cs = ((loadbuf[2] & (FSCALE-1)) * 100) >> FSHIFT;
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%ld.%02d %ld.%02d %ld.%02d %d/%d %d\n",
+ avenrun1, avenrun1_cs,
+ avenrun5, avenrun5_cs,
+ avenrun15, avenrun15_cs,
+ nrunnable, nlwps, 0);
+}
+
+/*
+ * lxpr_read_meminfo(): read the contents of the "meminfo" file.
+ */
+static void
+lxpr_read_meminfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ zone_t *zone = LXPTOZ(lxpnp);
+ int global = zone == global_zone;
+ long total_mem, free_mem, total_swap, used_swap;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_MEMINFO);
+
+ if (global || zone->zone_phys_mem_ctl == UINT64_MAX) {
+ total_mem = physmem * PAGESIZE;
+ free_mem = freemem * PAGESIZE;
+ } else {
+ total_mem = zone->zone_phys_mem_ctl;
+ free_mem = zone->zone_phys_mem_ctl - zone->zone_phys_mem;
+ }
+
+ if (global || zone->zone_max_swap_ctl == UINT64_MAX) {
+ total_swap = k_anoninfo.ani_max * PAGESIZE;
+ used_swap = k_anoninfo.ani_phys_resv * PAGESIZE;
+ } else {
+ mutex_enter(&zone->zone_mem_lock);
+ total_swap = zone->zone_max_swap_ctl;
+ used_swap = zone->zone_max_swap;
+ mutex_exit(&zone->zone_mem_lock);
+ }
+
+ lxpr_uiobuf_printf(uiobuf,
+ " total: used: free: shared: buffers: cached:\n"
+ "Mem: %8lu %8lu %8lu %8u %8u %8u\n"
+ "Swap: %8lu %8lu %8lu\n"
+ "MemTotal: %8lu kB\n"
+ "MemFree: %8lu kB\n"
+ "MemShared: %8u kB\n"
+ "Buffers: %8u kB\n"
+ "Cached: %8u kB\n"
+ "SwapCached:%8u kB\n"
+ "Active: %8u kB\n"
+ "Inactive: %8u kB\n"
+ "HighTotal: %8u kB\n"
+ "HighFree: %8u kB\n"
+ "LowTotal: %8u kB\n"
+ "LowFree: %8u kB\n"
+ "SwapTotal: %8lu kB\n"
+ "SwapFree: %8lu kB\n",
+ total_mem, total_mem - free_mem, free_mem, 0, 0, 0,
+ total_swap, used_swap, total_swap - used_swap,
+ btok(total_mem), /* MemTotal */
+ btok(free_mem), /* MemFree */
+ 0, /* MemShared */
+ 0, /* Buffers */
+ 0, /* Cached */
+ 0, /* SwapCached */
+ 0, /* Active */
+ 0, /* Inactive */
+ 0, /* HighTotal */
+ 0, /* HighFree */
+ btok(total_mem), /* LowTotal */
+ btok(free_mem), /* LowFree */
+ btok(total_swap), /* SwapTotal */
+ btok(total_swap - used_swap)); /* SwapFree */
+}
+
+/*
+ * lxpr_read_mounts():
+ */
+/* ARGSUSED */
+static void
+lxpr_read_mounts(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ struct vfs *vfsp;
+ struct vfs *vfslist;
+ zone_t *zone = LXPTOZ(lxpnp);
+ struct print_data {
+ refstr_t *vfs_mntpt;
+ refstr_t *vfs_resource;
+ uint_t vfs_flag;
+ int vfs_fstype;
+ struct print_data *next;
+ } *print_head = NULL;
+ struct print_data **print_tail = &print_head;
+ struct print_data *printp;
+
+ vfs_list_read_lock();
+
+ if (zone == global_zone) {
+ vfsp = vfslist = rootvfs;
+ } else {
+ vfsp = vfslist = zone->zone_vfslist;
+ /*
+ * If the zone has a root entry, it will be the first in
+ * the list. If it doesn't, we conjure one up.
+ */
+ if (vfslist == NULL || strcmp(refstr_value(vfsp->vfs_mntpt),
+ zone->zone_rootpath) != 0) {
+ struct vfs *tvfsp;
+ /*
+ * The root of the zone is not a mount point. The vfs
+ * we want to report is that of the zone's root vnode.
+ */
+ tvfsp = zone->zone_rootvp->v_vfsp;
+
+ lxpr_uiobuf_printf(uiobuf,
+ "/ / %s %s 0 0\n",
+ vfssw[tvfsp->vfs_fstype].vsw_name,
+ tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
+
+ }
+ if (vfslist == NULL) {
+ vfs_list_unlock();
+ return;
+ }
+ }
+
+ /*
+ * Later on we have to do a lookupname, which can end up causing
+ * another vfs_list_read_lock() to be called. Which can lead to a
+ * deadlock. To avoid this, we extract the data we need into a local
+ * list, then we can run this list without holding vfs_list_read_lock()
+ * We keep the list in the same order as the vfs_list
+ */
+ do {
+ /* Skip mounts we shouldn't show */
+ if (vfsp->vfs_flag & VFS_NOMNTTAB) {
+ goto nextfs;
+ }
+
+ printp = kmem_alloc(sizeof (*printp), KM_SLEEP);
+ refstr_hold(vfsp->vfs_mntpt);
+ printp->vfs_mntpt = vfsp->vfs_mntpt;
+ refstr_hold(vfsp->vfs_resource);
+ printp->vfs_resource = vfsp->vfs_resource;
+ printp->vfs_flag = vfsp->vfs_flag;
+ printp->vfs_fstype = vfsp->vfs_fstype;
+ printp->next = NULL;
+
+ *print_tail = printp;
+ print_tail = &printp->next;
+
+nextfs:
+ vfsp = (zone == global_zone) ?
+ vfsp->vfs_next : vfsp->vfs_zone_next;
+
+ } while (vfsp != vfslist);
+
+ vfs_list_unlock();
+
+ /*
+ * now we can run through what we've extracted without holding
+ * vfs_list_read_lock()
+ */
+ printp = print_head;
+ while (printp != NULL) {
+ struct print_data *printp_next;
+ const char *resource;
+ char *mntpt;
+ struct vnode *vp;
+ int error;
+
+ mntpt = (char *)refstr_value(printp->vfs_mntpt);
+ resource = refstr_value(printp->vfs_resource);
+
+ if (mntpt != NULL && mntpt[0] != '\0')
+ mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
+ else
+ mntpt = "-";
+
+ error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
+
+ if (error != 0)
+ goto nextp;
+
+ if (!(vp->v_flag & VROOT)) {
+ VN_RELE(vp);
+ goto nextp;
+ }
+ VN_RELE(vp);
+
+ if (resource != NULL && resource[0] != '\0') {
+ if (resource[0] == '/') {
+ resource = ZONE_PATH_VISIBLE(resource, zone) ?
+ ZONE_PATH_TRANSLATE(resource, zone) :
+ mntpt;
+ }
+ } else {
+ resource = "-";
+ }
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%s %s %s %s 0 0\n",
+ resource, mntpt, vfssw[printp->vfs_fstype].vsw_name,
+ printp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
+
+nextp:
+ printp_next = printp->next;
+ refstr_rele(printp->vfs_mntpt);
+ refstr_rele(printp->vfs_resource);
+ kmem_free(printp, sizeof (*printp));
+ printp = printp_next;
+
+ }
+}
+
+/*
+ * lxpr_read_partitions():
+ *
+ * We don't support partitions in a local zone because it requires access to
+ * physical devices. But we need to fake up enough of the file to show that we
+ * have no partitions.
+ */
+/* ARGSUSED */
+static void
+lxpr_read_partitions(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_uiobuf_printf(uiobuf,
+ "major minor #blocks name rio rmerge rsect ruse "
+ "wio wmerge wsect wuse running use aveq\n\n");
+}
+
+/*
+ * lxpr_read_version(): read the contents of the "version" file. Note that
+ * we don't lie here -- we don't pretend that we're Linux. If lxproc is to
+ * be used in a Linux-branded zone, there will need to be a mount option to
+ * indicate that Linux should be more fully mimicked.
+ */
+/* ARGSUSED */
+static void
+lxpr_read_version(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_uiobuf_printf(uiobuf,
+ "%s version %s (%s version %d.%d.%d) "
+ "#%s SMP %s\n",
+ utsname.sysname, utsname.release,
+#if defined(__GNUC__)
+ "gcc",
+ __GNUC__,
+ __GNUC_MINOR__,
+ __GNUC_PATCHLEVEL__,
+#else
+ "Sun C",
+ __SUNPRO_C / 0x100,
+ (__SUNPRO_C & 0xff) / 0x10,
+ __SUNPRO_C & 0xf,
+#endif
+ utsname.version,
+ "00:00:00 00/00/00");
+}
+
+/*
+ * lxpr_read_stat(): read the contents of the "stat" file.
+ *
+ */
+/* ARGSUSED */
+static void
+lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ cpu_t *cp, *cpstart;
+ int pools_enabled;
+ ulong_t idle_cum = 0;
+ ulong_t sys_cum = 0;
+ ulong_t user_cum = 0;
+ ulong_t irq_cum = 0;
+ ulong_t cpu_nrunnable_cum = 0;
+ ulong_t w_io_cum = 0;
+
+ ulong_t pgpgin_cum = 0;
+ ulong_t pgpgout_cum = 0;
+ ulong_t pgswapout_cum = 0;
+ ulong_t pgswapin_cum = 0;
+ ulong_t intr_cum = 0;
+ ulong_t pswitch_cum = 0;
+ ulong_t forks_cum = 0;
+ hrtime_t msnsecs[NCMSTATES];
+
+ /* temporary variable since scalehrtime modifies data in place */
+ hrtime_t tmptime;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_STAT);
+
+ mutex_enter(&cpu_lock);
+ pools_enabled = pool_pset_enabled();
+
+ /* Calculate cumulative stats */
+ cp = cpstart = CPU->cpu_part->cp_cpulist;
+ do {
+ int i;
+
+ /*
+ * Don't count CPUs that aren't even in the system
+ * or aren't up yet.
+ */
+ if ((cp->cpu_flags & CPU_EXISTS) == 0) {
+ continue;
+ }
+
+ get_cpu_mstate(cp, msnsecs);
+
+ idle_cum += NSEC_TO_TICK(msnsecs[CMS_IDLE]);
+ sys_cum += NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
+ user_cum += NSEC_TO_TICK(msnsecs[CMS_USER]);
+
+ pgpgin_cum += CPU_STATS(cp, vm.pgpgin);
+ pgpgout_cum += CPU_STATS(cp, vm.pgpgout);
+ pgswapin_cum += CPU_STATS(cp, vm.pgswapin);
+ pgswapout_cum += CPU_STATS(cp, vm.pgswapout);
+
+ cpu_nrunnable_cum += cp->cpu_disp->disp_nrunnable;
+ w_io_cum += CPU_STATS(cp, sys.iowait);
+ for (i = 0; i < NCMSTATES; i++) {
+ tmptime = cp->cpu_intracct[i];
+ scalehrtime(&tmptime);
+ irq_cum += NSEC_TO_TICK(tmptime);
+ }
+
+ for (i = 0; i < PIL_MAX; i++)
+ intr_cum += CPU_STATS(cp, sys.intr[i]);
+
+ pswitch_cum += CPU_STATS(cp, sys.pswitch);
+ forks_cum += CPU_STATS(cp, sys.sysfork);
+ forks_cum += CPU_STATS(cp, sys.sysvfork);
+
+ if (pools_enabled)
+ cp = cp->cpu_next_part;
+ else
+ cp = cp->cpu_next;
+ } while (cp != cpstart);
+
+ lxpr_uiobuf_printf(uiobuf, "cpu %lu %lu %lu %lu %lu %lu %lu\n",
+ user_cum, 0L, sys_cum, idle_cum, 0L, irq_cum, 0L);
+
+ /* Do per processor stats */
+ do {
+ int i;
+
+ ulong_t idle_ticks;
+ ulong_t sys_ticks;
+ ulong_t user_ticks;
+ ulong_t irq_ticks = 0;
+
+ /*
+ * Don't count CPUs that aren't even in the system
+ * or aren't up yet.
+ */
+ if ((cp->cpu_flags & CPU_EXISTS) == 0) {
+ continue;
+ }
+
+ get_cpu_mstate(cp, msnsecs);
+
+ idle_ticks = NSEC_TO_TICK(msnsecs[CMS_IDLE]);
+ sys_ticks = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
+ user_ticks = NSEC_TO_TICK(msnsecs[CMS_USER]);
+
+ for (i = 0; i < NCMSTATES; i++) {
+ tmptime = cp->cpu_intracct[i];
+ scalehrtime(&tmptime);
+ irq_ticks += NSEC_TO_TICK(tmptime);
+ }
+
+ lxpr_uiobuf_printf(uiobuf,
+ "cpu%d %lu %lu %lu %lu %lu %lu %lu\n",
+ cp->cpu_id, user_ticks, 0L, sys_ticks, idle_ticks,
+ 0L, irq_ticks, 0L);
+
+ if (pools_enabled)
+ cp = cp->cpu_next_part;
+ else
+ cp = cp->cpu_next;
+ } while (cp != cpstart);
+
+ mutex_exit(&cpu_lock);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "page %lu %lu\n"
+ "swap %lu %lu\n"
+ "intr %lu\n"
+ "ctxt %lu\n"
+ "btime %lu\n"
+ "processes %lu\n"
+ "procs_running %lu\n"
+ "procs_blocked %lu\n",
+ pgpgin_cum, pgpgout_cum,
+ pgswapin_cum, pgswapout_cum,
+ intr_cum,
+ pswitch_cum,
+ boot_time,
+ forks_cum,
+ cpu_nrunnable_cum,
+ w_io_cum);
+}
+
+/*
+ * lxpr_read_uptime(): read the contents of the "uptime" file.
+ *
+ * format is: "%.2lf, %.2lf",uptime_secs, idle_secs
+ * Use fixed point arithmetic to get 2 decimal places
+ */
+/* ARGSUSED */
+static void
+lxpr_read_uptime(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ cpu_t *cp, *cpstart;
+ int pools_enabled;
+ ulong_t idle_cum = 0;
+ ulong_t cpu_count = 0;
+ ulong_t idle_s;
+ ulong_t idle_cs;
+ ulong_t up_s;
+ ulong_t up_cs;
+ hrtime_t birthtime;
+ hrtime_t centi_sec = 10000000; /* 10^7 */
+
+ ASSERT(lxpnp->lxpr_type == LXPR_UPTIME);
+
+ /* Calculate cumulative stats */
+ mutex_enter(&cpu_lock);
+ pools_enabled = pool_pset_enabled();
+
+ cp = cpstart = CPU->cpu_part->cp_cpulist;
+ do {
+ /*
+ * Don't count CPUs that aren't even in the system
+ * or aren't up yet.
+ */
+ if ((cp->cpu_flags & CPU_EXISTS) == 0) {
+ continue;
+ }
+
+ idle_cum += CPU_STATS(cp, sys.cpu_ticks_idle);
+ idle_cum += CPU_STATS(cp, sys.cpu_ticks_wait);
+ cpu_count += 1;
+
+ if (pools_enabled)
+ cp = cp->cpu_next_part;
+ else
+ cp = cp->cpu_next;
+ } while (cp != cpstart);
+ mutex_exit(&cpu_lock);
+
+ /* Getting the Zone zsched process startup time */
+ birthtime = LXPTOZ(lxpnp)->zone_zsched->p_mstart;
+ up_cs = (gethrtime() - birthtime) / centi_sec;
+ up_s = up_cs / 100;
+ up_cs %= 100;
+
+ ASSERT(cpu_count > 0);
+ idle_cum /= cpu_count;
+ idle_s = idle_cum / hz;
+ idle_cs = idle_cum % hz;
+ idle_cs *= 100;
+ idle_cs /= hz;
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%ld.%02d %ld.%02d\n", up_s, up_cs, idle_s, idle_cs);
+}
+
+static const char *amd_x_edx[] = {
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, "syscall",
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, "mp",
+ "nx", NULL, "mmxext", NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, "lm", "3dnowext", "3dnow"
+};
+
+static const char *amd_x_ecx[] = {
+ "lahf_lm", NULL, "svm", NULL,
+ "altmovcr8"
+};
+
+static const char *tm_x_edx[] = {
+ "recovery", "longrun", NULL, "lrti"
+};
+
+/*
+ * Intel calls no-execute "xd" in its docs, but Linux still reports it as "nx."
+ */
+static const char *intc_x_edx[] = {
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, "syscall",
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ "nx", NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, "lm", NULL, NULL
+};
+
+static const char *intc_edx[] = {
+ "fpu", "vme", "de", "pse",
+ "tsc", "msr", "pae", "mce",
+ "cx8", "apic", NULL, "sep",
+ "mtrr", "pge", "mca", "cmov",
+ "pat", "pse36", "pn", "clflush",
+ NULL, "dts", "acpi", "mmx",
+ "fxsr", "sse", "sse2", "ss",
+ "ht", "tm", "ia64", "pbe"
+};
+
+/*
+ * "sse3" on linux is called "pni" (Prescott New Instructions).
+ */
+static const char *intc_ecx[] = {
+ "pni", NULL, NULL, "monitor",
+ "ds_cpl", NULL, NULL, "est",
+ "tm2", NULL, "cid", NULL,
+ NULL, "cx16", "xtpr"
+};
+
+static void
+lxpr_read_cpuinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ int i;
+ uint32_t bits;
+ cpu_t *cp, *cpstart;
+ int pools_enabled;
+ const char **fp;
+ char brandstr[CPU_IDSTRLEN];
+ struct cpuid_regs cpr;
+ int maxeax;
+ int std_ecx, std_edx, ext_ecx, ext_edx;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_CPUINFO);
+
+ mutex_enter(&cpu_lock);
+ pools_enabled = pool_pset_enabled();
+
+ cp = cpstart = CPU->cpu_part->cp_cpulist;
+ do {
+ /*
+ * This returns the maximum eax value for standard cpuid
+ * functions in eax.
+ */
+ cpr.cp_eax = 0;
+ (void) cpuid_insn(cp, &cpr);
+ maxeax = cpr.cp_eax;
+
+ /*
+ * Get standard x86 feature flags.
+ */
+ cpr.cp_eax = 1;
+ (void) cpuid_insn(cp, &cpr);
+ std_ecx = cpr.cp_ecx;
+ std_edx = cpr.cp_edx;
+
+ /*
+ * Now get extended feature flags.
+ */
+ cpr.cp_eax = 0x80000001;
+ (void) cpuid_insn(cp, &cpr);
+ ext_ecx = cpr.cp_ecx;
+ ext_edx = cpr.cp_edx;
+
+ (void) cpuid_getbrandstr(cp, brandstr, CPU_IDSTRLEN);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "processor\t: %d\n"
+ "vendor_id\t: %s\n"
+ "cpu family\t: %d\n"
+ "model\t\t: %d\n"
+ "model name\t: %s\n"
+ "stepping\t: %d\n"
+ "cpu MHz\t\t: %u.%03u\n",
+ cp->cpu_id, cpuid_getvendorstr(cp), cpuid_getfamily(cp),
+ cpuid_getmodel(cp), brandstr, cpuid_getstep(cp),
+ (uint32_t)(cpu_freq_hz / 1000000),
+ ((uint32_t)(cpu_freq_hz / 1000)) % 1000);
+
+ lxpr_uiobuf_printf(uiobuf, "cache size\t: %u KB\n",
+ getl2cacheinfo(cp, NULL, NULL, NULL) / 1024);
+
+ if (is_x86_feature(x86_featureset, X86FSET_HTT)) {
+ /*
+ * 'siblings' is used for HT-style threads
+ */
+ lxpr_uiobuf_printf(uiobuf,
+ "physical id\t: %lu\n"
+ "siblings\t: %u\n",
+ pg_plat_hw_instance_id(cp, PGHW_CHIP),
+ cpuid_get_ncpu_per_chip(cp));
+ }
+
+ /*
+ * Since we're relatively picky about running on older hardware,
+ * we can be somewhat cavalier about the answers to these ones.
+ *
+ * In fact, given the hardware we support, we just say:
+ *
+ * fdiv_bug : no (if we're on a 64-bit kernel)
+ * hlt_bug : no
+ * f00f_bug : no
+ * coma_bug : no
+ * wp : yes (write protect in supervsr mode)
+ */
+ lxpr_uiobuf_printf(uiobuf,
+ "fdiv_bug\t: %s\n"
+ "hlt_bug \t: no\n"
+ "f00f_bug\t: no\n"
+ "coma_bug\t: no\n"
+ "fpu\t\t: %s\n"
+ "fpu_exception\t: %s\n"
+ "cpuid level\t: %d\n"
+ "flags\t\t:",
+#if defined(__i386)
+ fpu_pentium_fdivbug ? "yes" : "no",
+#else
+ "no",
+#endif /* __i386 */
+ fpu_exists ? "yes" : "no", fpu_exists ? "yes" : "no",
+ maxeax);
+
+ for (bits = std_edx, fp = intc_edx, i = 0;
+ i < sizeof (intc_edx) / sizeof (intc_edx[0]); fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+
+ /*
+ * name additional features where appropriate
+ */
+ switch (x86_vendor) {
+ case X86_VENDOR_Intel:
+ for (bits = ext_edx, fp = intc_x_edx, i = 0;
+ i < sizeof (intc_x_edx) / sizeof (intc_x_edx[0]);
+ fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+ break;
+
+ case X86_VENDOR_AMD:
+ for (bits = ext_edx, fp = amd_x_edx, i = 0;
+ i < sizeof (amd_x_edx) / sizeof (amd_x_edx[0]);
+ fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+
+ for (bits = ext_ecx, fp = amd_x_ecx, i = 0;
+ i < sizeof (amd_x_ecx) / sizeof (amd_x_ecx[0]);
+ fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+ break;
+
+ case X86_VENDOR_TM:
+ for (bits = ext_edx, fp = tm_x_edx, i = 0;
+ i < sizeof (tm_x_edx) / sizeof (tm_x_edx[0]);
+ fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+ break;
+ default:
+ break;
+ }
+
+ for (bits = std_ecx, fp = intc_ecx, i = 0;
+ i < sizeof (intc_ecx) / sizeof (intc_ecx[0]); fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+
+ lxpr_uiobuf_printf(uiobuf, "\n\n");
+
+ if (pools_enabled)
+ cp = cp->cpu_next_part;
+ else
+ cp = cp->cpu_next;
+ } while (cp != cpstart);
+
+ mutex_exit(&cpu_lock);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_fd(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_FD_FD);
+ lxpr_uiobuf_seterr(uiobuf, EFAULT);
+}
+
+/*
+ * lxpr_getattr(): Vnode operation for VOP_GETATTR()
+ */
+static int
+lxpr_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ register lxpr_node_t *lxpnp = VTOLXP(vp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ extern uint_t nproc;
+ int error;
+
+ /*
+ * Return attributes of underlying vnode if ATTR_REAL
+ *
+ * but keep fd files with the symlink permissions
+ */
+ if (lxpnp->lxpr_realvp != NULL && (flags & ATTR_REAL)) {
+ vnode_t *rvp = lxpnp->lxpr_realvp;
+
+ /*
+ * withold attribute information to owner or root
+ */
+ if ((error = VOP_ACCESS(rvp, 0, 0, cr, ct)) != 0) {
+ return (error);
+ }
+
+ /*
+ * now its attributes
+ */
+ if ((error = VOP_GETATTR(rvp, vap, flags, cr, ct)) != 0) {
+ return (error);
+ }
+
+ /*
+ * if it's a file in lx /proc/pid/fd/xx then set its
+ * mode and keep it looking like a symlink
+ */
+ if (type == LXPR_PID_FD_FD) {
+ vap->va_mode = lxpnp->lxpr_mode;
+ vap->va_type = vp->v_type;
+ vap->va_size = 0;
+ vap->va_nlink = 1;
+ }
+ return (0);
+ }
+
+ /* Default attributes, that may be overridden below */
+ bzero(vap, sizeof (*vap));
+ vap->va_atime = vap->va_mtime = vap->va_ctime = lxpnp->lxpr_time;
+ vap->va_nlink = 1;
+ vap->va_type = vp->v_type;
+ vap->va_mode = lxpnp->lxpr_mode;
+ vap->va_fsid = vp->v_vfsp->vfs_dev;
+ vap->va_blksize = DEV_BSIZE;
+ vap->va_uid = lxpnp->lxpr_uid;
+ vap->va_gid = lxpnp->lxpr_gid;
+ vap->va_nodeid = lxpnp->lxpr_ino;
+
+ switch (type) {
+ case LXPR_PROCDIR:
+ vap->va_nlink = nproc + 2 + PROCDIRFILES;
+ vap->va_size = (nproc + 2 + PROCDIRFILES) * LXPR_SDSIZE;
+ break;
+ case LXPR_PIDDIR:
+ vap->va_nlink = PIDDIRFILES;
+ vap->va_size = PIDDIRFILES * LXPR_SDSIZE;
+ break;
+ case LXPR_SELF:
+ vap->va_uid = crgetruid(curproc->p_cred);
+ vap->va_gid = crgetrgid(curproc->p_cred);
+ break;
+ default:
+ break;
+ }
+
+ vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size);
+ return (0);
+}
+
+/*
+ * lxpr_access(): Vnode operation for VOP_ACCESS()
+ */
+static int
+lxpr_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
+{
+ lxpr_node_t *lxpnp = VTOLXP(vp);
+ int shift = 0;
+ proc_t *tp;
+
+ /* lx /proc is a read only file system */
+ if (mode & VWRITE)
+ return (EROFS);
+
+ /*
+ * If this is a restricted file, check access permissions.
+ */
+ switch (lxpnp->lxpr_type) {
+ case LXPR_PIDDIR:
+ return (0);
+ case LXPR_PID_CURDIR:
+ case LXPR_PID_ENV:
+ case LXPR_PID_EXE:
+ case LXPR_PID_MAPS:
+ case LXPR_PID_MEM:
+ case LXPR_PID_ROOTDIR:
+ case LXPR_PID_FDDIR:
+ case LXPR_PID_FD_FD:
+ if ((tp = lxpr_lock(lxpnp->lxpr_pid)) == NULL)
+ return (ENOENT);
+ if (tp != curproc && secpolicy_proc_access(cr) != 0 &&
+ priv_proc_cred_perm(cr, tp, NULL, mode) != 0) {
+ lxpr_unlock(tp);
+ return (EACCES);
+ }
+ lxpr_unlock(tp);
+ default:
+ break;
+ }
+
+ if (lxpnp->lxpr_realvp != NULL) {
+ /*
+ * For these we use the underlying vnode's accessibility.
+ */
+ return (VOP_ACCESS(lxpnp->lxpr_realvp, mode, flags, cr, ct));
+ }
+
+ /* If user is root allow access regardless of permission bits */
+ if (secpolicy_proc_access(cr) == 0)
+ return (0);
+
+ /*
+ * Access check is based on only one of owner, group, public. If not
+ * owner, then check group. If not a member of the group, then check
+ * public access.
+ */
+ if (crgetuid(cr) != lxpnp->lxpr_uid) {
+ shift += 3;
+ if (!groupmember((uid_t)lxpnp->lxpr_gid, cr))
+ shift += 3;
+ }
+
+ mode &= ~(lxpnp->lxpr_mode << shift);
+
+ if (mode == 0)
+ return (0);
+
+ return (EACCES);
+}
+
+/* ARGSUSED */
+static vnode_t *
+lxpr_lookup_not_a_dir(vnode_t *dp, char *comp)
+{
+ return (NULL);
+}
+
+/*
+ * lxpr_lookup(): Vnode operation for VOP_LOOKUP()
+ */
+/* ARGSUSED */
+static int
+lxpr_lookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pathp,
+ int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
+ int *direntflags, pathname_t *realpnp)
+{
+ lxpr_node_t *lxpnp = VTOLXP(dp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ int error;
+
+ ASSERT(dp->v_type == VDIR);
+ ASSERT(type < LXPR_NFILES);
+
+ /*
+ * we should never get here because the lookup
+ * is done on the realvp for these nodes
+ */
+ ASSERT(type != LXPR_PID_FD_FD &&
+ type != LXPR_PID_CURDIR &&
+ type != LXPR_PID_ROOTDIR);
+
+ /*
+ * restrict lookup permission to owner or root
+ */
+ if ((error = lxpr_access(dp, VEXEC, 0, cr, ct)) != 0) {
+ return (error);
+ }
+
+ /*
+ * Just return the parent vnode if that's where we are trying to go.
+ */
+ if (strcmp(comp, "..") == 0) {
+ VN_HOLD(lxpnp->lxpr_parent);
+ *vpp = lxpnp->lxpr_parent;
+ return (0);
+ }
+
+ /*
+ * Special handling for directory searches. Note: null component name
+ * denotes that the current directory is being searched.
+ */
+ if ((dp->v_type == VDIR) && (*comp == '\0' || strcmp(comp, ".") == 0)) {
+ VN_HOLD(dp);
+ *vpp = dp;
+ return (0);
+ }
+
+ *vpp = (lxpr_lookup_function[type](dp, comp));
+ return ((*vpp == NULL) ? ENOENT : 0);
+}
+
+/*
+ * Do a sequential search on the given directory table
+ */
+static vnode_t *
+lxpr_lookup_common(vnode_t *dp, char *comp, proc_t *p,
+ lxpr_dirent_t *dirtab, int dirtablen)
+{
+ lxpr_node_t *lxpnp;
+ int count;
+
+ for (count = 0; count < dirtablen; count++) {
+ if (strcmp(dirtab[count].d_name, comp) == 0) {
+ lxpnp = lxpr_getnode(dp, dirtab[count].d_type, p, 0);
+ dp = LXPTOV(lxpnp);
+ ASSERT(dp != NULL);
+ return (dp);
+ }
+ }
+ return (NULL);
+}
+
+static vnode_t *
+lxpr_lookup_piddir(vnode_t *dp, char *comp)
+{
+ proc_t *p;
+
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PIDDIR);
+
+ p = lxpr_lock(VTOLXP(dp)->lxpr_pid);
+ if (p == NULL)
+ return (NULL);
+
+ dp = lxpr_lookup_common(dp, comp, p, piddir, PIDDIRFILES);
+
+ lxpr_unlock(p);
+
+ return (dp);
+}
+
+/*
+ * Lookup one of the process's open files.
+ */
+static vnode_t *
+lxpr_lookup_fddir(vnode_t *dp, char *comp)
+{
+ lxpr_node_t *dlxpnp = VTOLXP(dp);
+ lxpr_node_t *lxpnp;
+ vnode_t *vp = NULL;
+ proc_t *p;
+ file_t *fp;
+ uint_t fd;
+ int c;
+ uf_entry_t *ufp;
+ uf_info_t *fip;
+
+ ASSERT(dlxpnp->lxpr_type == LXPR_PID_FDDIR);
+
+ /*
+ * convert the string rendition of the filename
+ * to a file descriptor
+ */
+ fd = 0;
+ while ((c = *comp++) != '\0') {
+ int ofd;
+ if (c < '0' || c > '9')
+ return (NULL);
+
+ ofd = fd;
+ fd = 10*fd + c - '0';
+ /* integer overflow */
+ if (fd / 10 != ofd)
+ return (NULL);
+ }
+
+ /*
+ * get the proc to work with and lock it
+ */
+ p = lxpr_lock(dlxpnp->lxpr_pid);
+ if ((p == NULL))
+ return (NULL);
+
+ /*
+ * If the process is a zombie or system process
+ * it can't have any open files.
+ */
+ if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) {
+ lxpr_unlock(p);
+ return (NULL);
+ }
+
+ /*
+ * get us a fresh node/vnode
+ */
+ lxpnp = lxpr_getnode(dp, LXPR_PID_FD_FD, p, fd);
+
+ /*
+ * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from
+ * going away while we dereference into fi_list.
+ */
+ mutex_exit(&p->p_lock);
+
+ /*
+ * get open file info
+ */
+ fip = (&(p)->p_user.u_finfo);
+ mutex_enter(&fip->fi_lock);
+
+ if (fd < fip->fi_nfiles) {
+ UF_ENTER(ufp, fip, fd);
+ /*
+ * ensure the fd is still kosher.
+ * it may have gone between the readdir and
+ * the lookup
+ */
+ if (fip->fi_list[fd].uf_file == NULL) {
+ mutex_exit(&fip->fi_lock);
+ UF_EXIT(ufp);
+ mutex_enter(&p->p_lock);
+ lxpr_unlock(p);
+ lxpr_freenode(lxpnp);
+ return (NULL);
+ }
+
+ if ((fp = ufp->uf_file) != NULL)
+ vp = fp->f_vnode;
+ UF_EXIT(ufp);
+ }
+ mutex_exit(&fip->fi_lock);
+
+ if (vp == NULL) {
+ mutex_enter(&p->p_lock);
+ lxpr_unlock(p);
+ lxpr_freenode(lxpnp);
+ return (NULL);
+ } else {
+ /*
+ * Fill in the lxpr_node so future references will be able to
+ * find the underlying vnode. The vnode is held on the realvp.
+ */
+ lxpnp->lxpr_realvp = vp;
+ VN_HOLD(lxpnp->lxpr_realvp);
+ }
+
+ mutex_enter(&p->p_lock);
+ lxpr_unlock(p);
+ dp = LXPTOV(lxpnp);
+ ASSERT(dp != NULL);
+
+ return (dp);
+}
+
+static vnode_t *
+lxpr_lookup_netdir(vnode_t *dp, char *comp)
+{
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_NETDIR);
+
+ dp = lxpr_lookup_common(dp, comp, NULL, netdir, NETDIRFILES);
+
+ return (dp);
+}
+
+static vnode_t *
+lxpr_lookup_procdir(vnode_t *dp, char *comp)
+{
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PROCDIR);
+
+ /*
+ * We know all the names of files & dirs in our file system structure
+ * except those that are pid names. These change as pids are created/
+ * deleted etc., so we just look for a number as the first char to see
+ * if we are we doing pid lookups.
+ *
+ * Don't need to check for "self" as it is implemented as a symlink
+ */
+ if (*comp >= '0' && *comp <= '9') {
+ pid_t pid = 0;
+ lxpr_node_t *lxpnp = NULL;
+ proc_t *p;
+ int c;
+
+ while ((c = *comp++) != '\0')
+ pid = 10 * pid + c - '0';
+
+ /*
+ * Can't continue if the process is still loading or it doesn't
+ * really exist yet (or maybe it just died!)
+ */
+ p = lxpr_lock(pid);
+ if (p == NULL)
+ return (NULL);
+
+ if (secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
+ lxpr_unlock(p);
+ return (NULL);
+ }
+
+ /*
+ * allocate and fill in a new lxpr node
+ */
+ lxpnp = lxpr_getnode(dp, LXPR_PIDDIR, p, 0);
+
+ lxpr_unlock(p);
+
+ dp = LXPTOV(lxpnp);
+ ASSERT(dp != NULL);
+
+ return (dp);
+ }
+
+ /* Lookup fixed names */
+ return (lxpr_lookup_common(dp, comp, NULL, lxpr_dir, PROCDIRFILES));
+}
+
+/*
+ * lxpr_readdir(): Vnode operation for VOP_READDIR()
+ */
+/* ARGSUSED */
+static int
+lxpr_readdir(vnode_t *dp, uio_t *uiop, cred_t *cr, int *eofp,
+ caller_context_t *ct, int flags)
+{
+ lxpr_node_t *lxpnp = VTOLXP(dp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ ssize_t uresid;
+ off_t uoffset;
+ int error;
+
+ ASSERT(dp->v_type == VDIR);
+ ASSERT(type < LXPR_NFILES);
+
+ /*
+ * we should never get here because the readdir
+ * is done on the realvp for these nodes
+ */
+ ASSERT(type != LXPR_PID_FD_FD &&
+ type != LXPR_PID_CURDIR &&
+ type != LXPR_PID_ROOTDIR);
+
+ /*
+ * restrict readdir permission to owner or root
+ */
+ if ((error = lxpr_access(dp, VREAD, 0, cr, ct)) != 0)
+ return (error);
+
+ uoffset = uiop->uio_offset;
+ uresid = uiop->uio_resid;
+
+ /* can't do negative reads */
+ if (uoffset < 0 || uresid <= 0)
+ return (EINVAL);
+
+ /* can't read directory entries that don't exist! */
+ if (uoffset % LXPR_SDSIZE)
+ return (ENOENT);
+
+ return (lxpr_readdir_function[lxpnp->lxpr_type](lxpnp, uiop, eofp));
+}
+
+/* ARGSUSED */
+static int
+lxpr_readdir_not_a_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ return (ENOTDIR);
+}
+
+/*
+ * This has the common logic for returning directory entries
+ */
+static int
+lxpr_readdir_common(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp,
+ lxpr_dirent_t *dirtab, int dirtablen)
+{
+ /* bp holds one dirent64 structure */
+ longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
+ dirent64_t *dirent = (dirent64_t *)bp;
+ ssize_t oresid; /* save a copy for testing later */
+ ssize_t uresid;
+
+ oresid = uiop->uio_resid;
+
+ /* clear out the dirent buffer */
+ bzero(bp, sizeof (bp));
+
+ /*
+ * Satisfy user request
+ */
+ while ((uresid = uiop->uio_resid) > 0) {
+ int dirindex;
+ off_t uoffset;
+ int reclen;
+ int error;
+
+ uoffset = uiop->uio_offset;
+ dirindex = (uoffset / LXPR_SDSIZE) - 2;
+
+ if (uoffset == 0) {
+
+ dirent->d_ino = lxpnp->lxpr_ino;
+ dirent->d_name[0] = '.';
+ dirent->d_name[1] = '\0';
+ reclen = DIRENT64_RECLEN(1);
+
+ } else if (uoffset == LXPR_SDSIZE) {
+
+ dirent->d_ino = lxpr_parentinode(lxpnp);
+ dirent->d_name[0] = '.';
+ dirent->d_name[1] = '.';
+ dirent->d_name[2] = '\0';
+ reclen = DIRENT64_RECLEN(2);
+
+ } else if (dirindex >= 0 && dirindex < dirtablen) {
+ int slen = strlen(dirtab[dirindex].d_name);
+
+ dirent->d_ino = lxpr_inode(dirtab[dirindex].d_type,
+ lxpnp->lxpr_pid, 0);
+
+ VERIFY(slen < LXPNSIZ);
+ (void) strcpy(dirent->d_name, dirtab[dirindex].d_name);
+ reclen = DIRENT64_RECLEN(slen);
+
+ } else {
+ /* Run out of table entries */
+ if (eofp) {
+ *eofp = 1;
+ }
+ return (0);
+ }
+
+ dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
+ dirent->d_reclen = (ushort_t)reclen;
+
+ /*
+ * if the size of the data to transfer is greater
+ * that that requested then we can't do it this transfer.
+ */
+ if (reclen > uresid) {
+ /*
+ * Error if no entries have been returned yet.
+ */
+ if (uresid == oresid) {
+ return (EINVAL);
+ }
+ break;
+ }
+
+ /*
+ * uiomove() updates both uiop->uio_resid and uiop->uio_offset
+ * by the same amount. But we want uiop->uio_offset to change
+ * in increments of LXPR_SDSIZE, which is different from the
+ * number of bytes being returned to the user. So we set
+ * uiop->uio_offset separately, ignoring what uiomove() does.
+ */
+ if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
+ uiop)) != 0)
+ return (error);
+
+ uiop->uio_offset = uoffset + LXPR_SDSIZE;
+ }
+
+ /* Have run out of space, but could have just done last table entry */
+ if (eofp) {
+ *eofp =
+ (uiop->uio_offset >= ((dirtablen+2) * LXPR_SDSIZE)) ? 1 : 0;
+ }
+ return (0);
+}
+
+
+static int
+lxpr_readdir_procdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ /* bp holds one dirent64 structure */
+ longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
+ dirent64_t *dirent = (dirent64_t *)bp;
+ ssize_t oresid; /* save a copy for testing later */
+ ssize_t uresid;
+ off_t uoffset;
+ zoneid_t zoneid;
+ pid_t pid;
+ int error;
+ int ceof;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PROCDIR);
+
+ oresid = uiop->uio_resid;
+ zoneid = LXPTOZ(lxpnp)->zone_id;
+
+ /*
+ * We return directory entries in the order: "." and ".." then the
+ * unique lxproc files, then the directories corresponding to the
+ * running processes. We have defined this as the ordering because
+ * it allows us to more easily keep track of where we are betwen calls
+ * to getdents(). If the number of processes changes between calls
+ * then we can't lose track of where we are in the lxproc files.
+ */
+
+ /* Do the fixed entries */
+ error = lxpr_readdir_common(lxpnp, uiop, &ceof, lxpr_dir,
+ PROCDIRFILES);
+
+ /* Finished if we got an error or if we couldn't do all the table */
+ if (error != 0 || ceof == 0)
+ return (error);
+
+ /* clear out the dirent buffer */
+ bzero(bp, sizeof (bp));
+
+ /* Do the process entries */
+ while ((uresid = uiop->uio_resid) > 0) {
+ proc_t *p;
+ int len;
+ int reclen;
+ int i;
+
+ uoffset = uiop->uio_offset;
+
+ /*
+ * Stop when entire proc table has been examined.
+ */
+ i = (uoffset / LXPR_SDSIZE) - 2 - PROCDIRFILES;
+ if (i < 0 || i >= v.v_proc) {
+ /* Run out of table entries */
+ if (eofp) {
+ *eofp = 1;
+ }
+ return (0);
+ }
+ mutex_enter(&pidlock);
+
+ /*
+ * Skip indices for which there is no pid_entry, PIDs for
+ * which there is no corresponding process, a PID of 0,
+ * and anything the security policy doesn't allow
+ * us to look at.
+ */
+ if ((p = pid_entry(i)) == NULL || p->p_stat == SIDL ||
+ p->p_pid == 0 ||
+ secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
+ mutex_exit(&pidlock);
+ goto next;
+ }
+ mutex_exit(&pidlock);
+
+ /*
+ * Convert pid to the Linux default of 1 if we're the zone's
+ * init process, otherwise use the value from the proc
+ * structure
+ */
+ pid = ((p->p_pid != curproc->p_zone->zone_proc_initpid) ?
+ p->p_pid : 1);
+
+ /*
+ * If this /proc was mounted in the global zone, view
+ * all procs; otherwise, only view zone member procs.
+ */
+ if (zoneid != GLOBAL_ZONEID && p->p_zone->zone_id != zoneid) {
+ goto next;
+ }
+
+ ASSERT(p->p_stat != 0);
+
+ dirent->d_ino = lxpr_inode(LXPR_PIDDIR, pid, 0);
+ len = snprintf(dirent->d_name, LXPNSIZ, "%d", pid);
+ ASSERT(len < LXPNSIZ);
+ reclen = DIRENT64_RECLEN(len);
+
+ dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
+ dirent->d_reclen = (ushort_t)reclen;
+
+ /*
+ * if the size of the data to transfer is greater
+ * that that requested then we can't do it this transfer.
+ */
+ if (reclen > uresid) {
+ /*
+ * Error if no entries have been returned yet.
+ */
+ if (uresid == oresid)
+ return (EINVAL);
+ break;
+ }
+
+ /*
+ * uiomove() updates both uiop->uio_resid and uiop->uio_offset
+ * by the same amount. But we want uiop->uio_offset to change
+ * in increments of LXPR_SDSIZE, which is different from the
+ * number of bytes being returned to the user. So we set
+ * uiop->uio_offset separately, in the increment of this for
+ * the loop, ignoring what uiomove() does.
+ */
+ if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
+ uiop)) != 0)
+ return (error);
+next:
+ uiop->uio_offset = uoffset + LXPR_SDSIZE;
+ }
+
+ if (eofp != NULL) {
+ *eofp = (uiop->uio_offset >=
+ ((v.v_proc + PROCDIRFILES + 2) * LXPR_SDSIZE)) ? 1 : 0;
+ }
+
+ return (0);
+}
+
+static int
+lxpr_readdir_piddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ proc_t *p;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PIDDIR);
+
+ /* can't read its contents if it died */
+ mutex_enter(&pidlock);
+
+ p = prfind((lxpnp->lxpr_pid == 1) ?
+ curproc->p_zone->zone_proc_initpid : lxpnp->lxpr_pid);
+
+ if (p == NULL || p->p_stat == SIDL) {
+ mutex_exit(&pidlock);
+ return (ENOENT);
+ }
+ mutex_exit(&pidlock);
+
+ return (lxpr_readdir_common(lxpnp, uiop, eofp, piddir, PIDDIRFILES));
+}
+
+static int
+lxpr_readdir_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_NETDIR);
+ return (lxpr_readdir_common(lxpnp, uiop, eofp, netdir, NETDIRFILES));
+}
+
+static int
+lxpr_readdir_fddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ /* bp holds one dirent64 structure */
+ longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
+ dirent64_t *dirent = (dirent64_t *)bp;
+ ssize_t oresid; /* save a copy for testing later */
+ ssize_t uresid;
+ off_t uoffset;
+ int error;
+ int ceof;
+ proc_t *p;
+ int fddirsize = -1;
+ uf_info_t *fip;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_FDDIR);
+
+ oresid = uiop->uio_resid;
+
+ /* can't read its contents if it died */
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL)
+ return (ENOENT);
+
+ if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas))
+ fddirsize = 0;
+
+ /*
+ * Drop p_lock, but keep the process P_PR_LOCK'd to prevent it from
+ * going away while we iterate over its fi_list.
+ */
+ mutex_exit(&p->p_lock);
+
+ /* Get open file info */
+ fip = (&(p)->p_user.u_finfo);
+ mutex_enter(&fip->fi_lock);
+
+ if (fddirsize == -1)
+ fddirsize = fip->fi_nfiles;
+
+ /* Do the fixed entries (in this case just "." & "..") */
+ error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0);
+
+ /* Finished if we got an error or if we couldn't do all the table */
+ if (error != 0 || ceof == 0)
+ goto out;
+
+ /* clear out the dirent buffer */
+ bzero(bp, sizeof (bp));
+
+ /*
+ * Loop until user's request is satisfied or until
+ * all file descriptors have been examined.
+ */
+ for (; (uresid = uiop->uio_resid) > 0;
+ uiop->uio_offset = uoffset + LXPR_SDSIZE) {
+ int reclen;
+ int fd;
+ int len;
+
+ uoffset = uiop->uio_offset;
+
+ /*
+ * Stop at the end of the fd list
+ */
+ fd = (uoffset / LXPR_SDSIZE) - 2;
+ if (fd < 0 || fd >= fddirsize) {
+ if (eofp) {
+ *eofp = 1;
+ }
+ goto out;
+ }
+
+ if (fip->fi_list[fd].uf_file == NULL)
+ continue;
+
+ dirent->d_ino = lxpr_inode(LXPR_PID_FD_FD, lxpnp->lxpr_pid, fd);
+ len = snprintf(dirent->d_name, LXPNSIZ, "%d", fd);
+ ASSERT(len < LXPNSIZ);
+ reclen = DIRENT64_RECLEN(len);
+
+ dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
+ dirent->d_reclen = (ushort_t)reclen;
+
+ if (reclen > uresid) {
+ /*
+ * Error if no entries have been returned yet.
+ */
+ if (uresid == oresid)
+ error = EINVAL;
+ goto out;
+ }
+
+ if ((error = uiomove((caddr_t)dirent, reclen, UIO_READ,
+ uiop)) != 0)
+ goto out;
+ }
+
+ if (eofp != NULL) {
+ *eofp =
+ (uiop->uio_offset >= ((fddirsize+2) * LXPR_SDSIZE)) ? 1 : 0;
+ }
+
+out:
+ mutex_exit(&fip->fi_lock);
+ mutex_enter(&p->p_lock);
+ lxpr_unlock(p);
+ return (error);
+}
+
+
+/*
+ * lxpr_readlink(): Vnode operation for VOP_READLINK()
+ */
+/* ARGSUSED */
+static int
+lxpr_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct)
+{
+ char bp[MAXPATHLEN + 1];
+ size_t buflen = sizeof (bp);
+ lxpr_node_t *lxpnp = VTOLXP(vp);
+ vnode_t *rvp = lxpnp->lxpr_realvp;
+ pid_t pid;
+ int error = 0;
+
+ /* must be a symbolic link file */
+ if (vp->v_type != VLNK)
+ return (EINVAL);
+
+ /* Try to produce a symlink name for anything that has a realvp */
+ if (rvp != NULL) {
+ if ((error = lxpr_access(vp, VREAD, 0, CRED(), ct)) != 0)
+ return (error);
+ if ((error = vnodetopath(NULL, rvp, bp, buflen, CRED())) != 0)
+ return (error);
+ } else {
+ switch (lxpnp->lxpr_type) {
+ case LXPR_SELF:
+ /*
+ * Convert pid to the Linux default of 1 if we're the
+ * zone's init process
+ */
+ pid = ((curproc->p_pid !=
+ curproc->p_zone->zone_proc_initpid)
+ ? curproc->p_pid : 1);
+
+ /*
+ * Don't need to check result as every possible int
+ * will fit within MAXPATHLEN bytes.
+ */
+ (void) snprintf(bp, buflen, "%d", pid);
+ break;
+ case LXPR_PID_CURDIR:
+ case LXPR_PID_ROOTDIR:
+ case LXPR_PID_EXE:
+ return (EACCES);
+ default:
+ /*
+ * Need to return error so that nothing thinks
+ * that the symlink is empty and hence "."
+ */
+ return (EINVAL);
+ }
+ }
+
+ /* copy the link data to user space */
+ return (uiomove(bp, strlen(bp), UIO_READ, uiop));
+}
+
+/*
+ * lxpr_inactive(): Vnode operation for VOP_INACTIVE()
+ * Vnode is no longer referenced, deallocate the file
+ * and all its resources.
+ */
+/* ARGSUSED */
+static void
+lxpr_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
+{
+ lxpr_freenode(VTOLXP(vp));
+}
+
+/*
+ * lxpr_sync(): Vnode operation for VOP_SYNC()
+ */
+static int
+lxpr_sync()
+{
+ /*
+ * Nothing to sync but this function must never fail
+ */
+ return (0);
+}
+
+/*
+ * lxpr_cmp(): Vnode operation for VOP_CMP()
+ */
+static int
+lxpr_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
+{
+ vnode_t *rvp;
+
+ while (vn_matchops(vp1, lxpr_vnodeops) &&
+ (rvp = VTOLXP(vp1)->lxpr_realvp) != NULL) {
+ vp1 = rvp;
+ }
+
+ while (vn_matchops(vp2, lxpr_vnodeops) &&
+ (rvp = VTOLXP(vp2)->lxpr_realvp) != NULL) {
+ vp2 = rvp;
+ }
+
+ if (vn_matchops(vp1, lxpr_vnodeops) || vn_matchops(vp2, lxpr_vnodeops))
+ return (vp1 == vp2);
+
+ return (VOP_CMP(vp1, vp2, ct));
+}
+
+/*
+ * lxpr_realvp(): Vnode operation for VOP_REALVP()
+ */
+static int
+lxpr_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
+{
+ vnode_t *rvp;
+
+ if ((rvp = VTOLXP(vp)->lxpr_realvp) != NULL) {
+ vp = rvp;
+ if (VOP_REALVP(vp, &rvp, ct) == 0)
+ vp = rvp;
+ }
+
+ *vpp = vp;
+ return (0);
+}
diff --git a/usr/src/uts/common/fs/lxproc/lxproc.h b/usr/src/uts/common/fs/lxproc/lxproc.h
new file mode 100644
index 0000000000..eadb2ccd27
--- /dev/null
+++ b/usr/src/uts/common/fs/lxproc/lxproc.h
@@ -0,0 +1,278 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _LXPROC_H
+#define _LXPROC_H
+
+#ifdef _LXPROC_BRANDED_H
+#error Attempted to include native lxproc.h after branded lx_proc.h
+#endif
+
+#define _LXPROC_NATIVE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * lxproc.h: declarations, data structures and macros for lxprocfs
+ */
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/policy.h>
+#include <sys/debug.h>
+#include <sys/dirent.h>
+#include <sys/errno.h>
+#include <sys/file.h>
+#include <sys/kmem.h>
+#include <sys/pathname.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/var.h>
+#include <sys/user.h>
+#include <sys/t_lock.h>
+#include <sys/sysmacros.h>
+#include <sys/cred.h>
+#include <sys/priv.h>
+#include <sys/vnode.h>
+#include <sys/vfs.h>
+#include <sys/statvfs.h>
+#include <sys/cmn_err.h>
+#include <sys/zone.h>
+#include <sys/uio.h>
+#include <sys/utsname.h>
+#include <sys/dnlc.h>
+#include <sys/atomic.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+#include <vm/as.h>
+#include <vm/anon.h>
+
+#define LX_SIGHUP 1
+#define LX_SIGINT 2
+#define LX_SIGQUIT 3
+#define LX_SIGILL 4
+#define LX_SIGTRAP 5
+#define LX_SIGABRT 6
+#define LX_SIGIOT 6
+#define LX_SIGBUS 7
+#define LX_SIGFPE 8
+#define LX_SIGKILL 9
+#define LX_SIGUSR1 10
+#define LX_SIGSEGV 11
+#define LX_SIGUSR2 12
+#define LX_SIGPIPE 13
+#define LX_SIGALRM 14
+#define LX_SIGTERM 15
+#define LX_SIGSTKFLT 16
+#define LX_SIGCHLD 17
+#define LX_SIGCONT 18
+#define LX_SIGSTOP 19
+#define LX_SIGTSTP 20
+#define LX_SIGTTIN 21
+#define LX_SIGTTOU 22
+#define LX_SIGURG 23
+#define LX_SIGXCPU 24
+#define LX_SIGXFSZ 25
+#define LX_SIGVTALRM 26
+#define LX_SIGPROF 27
+#define LX_SIGWINCH 28
+#define LX_SIGIO 29
+#define LX_SIGPOLL LX_SIGIO
+#define LX_SIGPWR 30
+#define LX_SIGSYS 31
+#define LX_SIGUNUSED 31
+
+#define LX_NSIG 64 /* Linux _NSIG */
+
+#define LX_SIGRTMIN 32
+#define LX_SIGRTMAX LX_NSIG
+
+/*
+ * Convert a vnode into an lxpr_mnt_t
+ */
+#define VTOLXPM(vp) ((lxpr_mnt_t *)(vp)->v_vfsp->vfs_data)
+
+/*
+ * convert a vnode into an lxpr_node
+ */
+#define VTOLXP(vp) ((lxpr_node_t *)(vp)->v_data)
+
+/*
+ * convert a lxprnode into a vnode
+ */
+#define LXPTOV(lxpnp) ((lxpnp)->lxpr_vnode)
+
+/*
+ * convert a lxpr_node into zone for fs
+ */
+#define LXPTOZ(lxpnp) \
+ (((lxpr_mnt_t *)(lxpnp)->lxpr_vnode->v_vfsp->vfs_data)->lxprm_zone)
+
+#define LXPNSIZ 256 /* max size of lx /proc file name entries */
+
+/*
+ * Pretend that a directory entry takes 16 bytes
+ */
+#define LXPR_SDSIZE 16
+
+/*
+ * Node/file types for lx /proc files
+ * (directories and files contained therein).
+ */
+typedef enum lxpr_nodetype {
+ LXPR_PROCDIR, /* /proc */
+ LXPR_PIDDIR, /* /proc/<pid> */
+ LXPR_PID_CMDLINE, /* /proc/<pid>/cmdline */
+ LXPR_PID_CPU, /* /proc/<pid>/cpu */
+ LXPR_PID_CURDIR, /* /proc/<pid>/cwd */
+ LXPR_PID_ENV, /* /proc/<pid>/environ */
+ LXPR_PID_EXE, /* /proc/<pid>/exe */
+ LXPR_PID_MAPS, /* /proc/<pid>/maps */
+ LXPR_PID_MEM, /* /proc/<pid>/mem */
+ LXPR_PID_ROOTDIR, /* /proc/<pid>/root */
+ LXPR_PID_STAT, /* /proc/<pid>/stat */
+ LXPR_PID_STATM, /* /proc/<pid>/statm */
+ LXPR_PID_STATUS, /* /proc/<pid>/status */
+ LXPR_PID_FDDIR, /* /proc/<pid>/fd */
+ LXPR_PID_FD_FD, /* /proc/<pid>/fd/nn */
+ LXPR_CMDLINE, /* /proc/cmdline */
+ LXPR_CPUINFO, /* /proc/cpuinfo */
+ LXPR_DEVICES, /* /proc/devices */
+ LXPR_DMA, /* /proc/dma */
+ LXPR_FILESYSTEMS, /* /proc/filesystems */
+ LXPR_INTERRUPTS, /* /proc/interrupts */
+ LXPR_IOPORTS, /* /proc/ioports */
+ LXPR_KCORE, /* /proc/kcore */
+ LXPR_KMSG, /* /proc/kmsg */
+ LXPR_LOADAVG, /* /proc/loadavg */
+ LXPR_MEMINFO, /* /proc/meminfo */
+ LXPR_MOUNTS, /* /proc/mounts */
+ LXPR_NETDIR, /* /proc/net */
+ LXPR_NET_ARP, /* /proc/net/arp */
+ LXPR_NET_DEV, /* /proc/net/dev */
+ LXPR_NET_DEV_MCAST, /* /proc/net/dev_mcast */
+ LXPR_NET_IGMP, /* /proc/net/igmp */
+ LXPR_NET_IP_MR_CACHE, /* /proc/net/ip_mr_cache */
+ LXPR_NET_IP_MR_VIF, /* /proc/net/ip_mr_vif */
+ LXPR_NET_MCFILTER, /* /proc/net/mcfilter */
+ LXPR_NET_NETSTAT, /* /proc/net/netstat */
+ LXPR_NET_RAW, /* /proc/net/raw */
+ LXPR_NET_ROUTE, /* /proc/net/route */
+ LXPR_NET_RPC, /* /proc/net/rpc */
+ LXPR_NET_RT_CACHE, /* /proc/net/rt_cache */
+ LXPR_NET_SOCKSTAT, /* /proc/net/sockstat */
+ LXPR_NET_SNMP, /* /proc/net/snmp */
+ LXPR_NET_STAT, /* /proc/net/stat */
+ LXPR_NET_TCP, /* /proc/net/tcp */
+ LXPR_NET_UDP, /* /proc/net/udp */
+ LXPR_NET_UNIX, /* /proc/net/unix */
+ LXPR_PARTITIONS, /* /proc/partitions */
+ LXPR_SELF, /* /proc/self */
+ LXPR_STAT, /* /proc/stat */
+ LXPR_UPTIME, /* /proc/uptime */
+ LXPR_VERSION, /* /proc/version */
+ LXPR_NFILES /* number of lx /proc file types */
+} lxpr_nodetype_t;
+
+/*
+ * Number of fds allowed for in the inode number calculation
+ * per process (if a process has more fds then inode numbers
+ * may be duplicated)
+ */
+#define LXPR_FD_PERPROC 2000
+
+/*
+ * external dirent characteristics
+ */
+#define LXPRMAXNAMELEN 14
+typedef struct {
+ lxpr_nodetype_t d_type;
+ char d_name[LXPRMAXNAMELEN];
+} lxpr_dirent_t;
+
+/*
+ * This is the lxprocfs private data object
+ * which is attached to v_data in the vnode structure
+ */
+typedef struct lxpr_node {
+ lxpr_nodetype_t lxpr_type; /* type of this node */
+ vnode_t *lxpr_vnode; /* vnode for the node */
+ vnode_t *lxpr_parent; /* parent directory */
+ vnode_t *lxpr_realvp; /* real vnode, file in dirs */
+ timestruc_t lxpr_time; /* creation etc time for file */
+ mode_t lxpr_mode; /* file mode bits */
+ uid_t lxpr_uid; /* file owner */
+ gid_t lxpr_gid; /* file group owner */
+ pid_t lxpr_pid; /* pid of proc referred to */
+ ino_t lxpr_ino; /* node id */
+} lxpr_node_t;
+
+struct zone; /* forward declaration */
+
+/*
+ * This is the lxprocfs private data object
+ * which is attached to vfs_data in the vfs structure
+ */
+typedef struct lxpr_mnt {
+ lxpr_node_t *lxprm_node; /* node at root of proc mount */
+ struct zone *lxprm_zone; /* zone for this mount */
+ ldi_ident_t lxprm_li; /* ident for ldi */
+} lxpr_mnt_t;
+
+extern vnodeops_t *lxpr_vnodeops;
+extern int nproc_highbit; /* highbit(v.v_nproc) */
+
+typedef struct mounta mounta_t;
+
+extern void lxpr_initnodecache();
+extern void lxpr_fininodecache();
+extern void lxpr_initrootnode(lxpr_node_t **, vfs_t *);
+extern ino_t lxpr_inode(lxpr_nodetype_t, pid_t, int);
+extern ino_t lxpr_parentinode(lxpr_node_t *);
+extern lxpr_node_t *lxpr_getnode(vnode_t *, lxpr_nodetype_t, proc_t *, int);
+extern void lxpr_freenode(lxpr_node_t *);
+
+typedef struct lxpr_uiobuf lxpr_uiobuf_t;
+extern lxpr_uiobuf_t *lxpr_uiobuf_new(uio_t *);
+extern void lxpr_uiobuf_free(lxpr_uiobuf_t *);
+extern int lxpr_uiobuf_flush(lxpr_uiobuf_t *);
+extern void lxpr_uiobuf_seek(lxpr_uiobuf_t *, offset_t);
+extern void lxpr_uiobuf_write(lxpr_uiobuf_t *, const char *, size_t);
+extern void lxpr_uiobuf_printf(lxpr_uiobuf_t *, const char *, ...);
+extern void lxpr_uiobuf_seterr(lxpr_uiobuf_t *, int);
+
+proc_t *lxpr_lock(pid_t);
+void lxpr_unlock(proc_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LXPROC_H */
diff --git a/usr/src/uts/common/fs/nfs/nfs3_vfsops.c b/usr/src/uts/common/fs/nfs/nfs3_vfsops.c
index 207a708771..2176dcb9de 100644
--- a/usr/src/uts/common/fs/nfs/nfs3_vfsops.c
+++ b/usr/src/uts/common/fs/nfs/nfs3_vfsops.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2013, Joyent, Inc. All rights reserved.
*/
/*
diff --git a/usr/src/uts/common/fs/nfs/nfs3_vnops.c b/usr/src/uts/common/fs/nfs/nfs3_vnops.c
index b7354c168a..d3b12817ba 100644
--- a/usr/src/uts/common/fs/nfs/nfs3_vnops.c
+++ b/usr/src/uts/common/fs/nfs/nfs3_vnops.c
@@ -29,7 +29,7 @@
*/
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*/
@@ -3353,10 +3353,9 @@ nfs3rename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr,
if (nvp)
vnevent_rename_dest(nvp, ndvp, nnm, ct);
- if (odvp != ndvp)
- vnevent_rename_dest_dir(ndvp, ct);
ASSERT(ovp != NULL);
vnevent_rename_src(ovp, odvp, onm, ct);
+ vnevent_rename_dest_dir(ndvp, ovp, nnm, ct);
}
if (nvp) {
@@ -5523,8 +5522,13 @@ nfs3_space(vnode_t *vp, int cmd, struct flock64 *bfp, int flag,
va.va_size = bfp->l_start;
error = nfs3setattr(vp, &va, 0, cr);
- if (error == 0 && bfp->l_start == 0)
- vnevent_truncate(vp, ct);
+ if (error == 0) {
+ if (bfp->l_start == 0) {
+ vnevent_truncate(vp, ct);
+ } else {
+ vnevent_resize(vp, ct);
+ }
+ }
} else
error = EINVAL;
}
diff --git a/usr/src/uts/common/fs/nfs/nfs4_srv_ns.c b/usr/src/uts/common/fs/nfs/nfs4_srv_ns.c
index bc19d5a116..7b97b090af 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_srv_ns.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_srv_ns.c
@@ -22,6 +22,7 @@
/*
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc.
*/
#include <sys/systm.h>
@@ -178,12 +179,12 @@ pseudo_exportfs(vnode_t *vp, fid_t *fid, struct exp_visible *vis_head,
kex = &exi->exi_export;
kex->ex_flags = EX_PSEUDO;
- vpathlen = vp->v_path ? strlen(vp->v_path) : 0;
+ vpathlen = strlen(vp->v_path);
kex->ex_pathlen = vpathlen + strlen(PSEUDOFS_SUFFIX);
kex->ex_path = kmem_alloc(kex->ex_pathlen + 1, KM_SLEEP);
if (vpathlen)
- (void) strcpy(kex->ex_path, vp->v_path);
+ (void) strncpy(kex->ex_path, vp->v_path, vpathlen);
(void) strcpy(kex->ex_path + vpathlen, PSEUDOFS_SUFFIX);
/* Transfer the secinfo data from exdata to this new pseudo node */
diff --git a/usr/src/uts/common/fs/nfs/nfs4_vfsops.c b/usr/src/uts/common/fs/nfs/nfs4_vfsops.c
index 151cb62403..55f6c95289 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_vfsops.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_vfsops.c
@@ -22,6 +22,7 @@
/*
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2013, Joyent, Inc. All rights reserved.
*/
/*
diff --git a/usr/src/uts/common/fs/nfs/nfs4_vnops.c b/usr/src/uts/common/fs/nfs/nfs4_vnops.c
index d6bf384a8b..107fe97b95 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_vnops.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_vnops.c
@@ -34,7 +34,7 @@
*/
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#include <sys/param.h>
@@ -3737,8 +3737,13 @@ nfs4_setattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr,
*/
error = nfs4setattr(vp, vap, flags, cr, NULL);
- if (error == 0 && (vap->va_mask & AT_SIZE) && vap->va_size == 0)
- vnevent_truncate(vp, ct);
+ if (error == 0 && (vap->va_mask & AT_SIZE)) {
+ if (vap->va_size == 0) {
+ vnevent_truncate(vp, ct);
+ } else {
+ vnevent_resize(vp, ct);
+ }
+ }
return (error);
}
@@ -8061,8 +8066,9 @@ link_call:
* vnode if it already existed.
*/
if (error == 0) {
- vnode_t *tvp;
+ vnode_t *tvp, *tovp;
rnode4_t *trp;
+
/*
* Notify the vnode. Each links is represented by
* a different vnode, in nfsv4.
@@ -8075,23 +8081,20 @@ link_call:
vnevent_rename_dest(tvp, ndvp, nnm, ct);
}
- /*
- * if the source and destination directory are not the
- * same notify the destination directory.
- */
- if (VTOR4(odvp) != VTOR4(ndvp)) {
- trp = VTOR4(ndvp);
- tvp = ndvp;
- if (IS_SHADOW(ndvp, trp))
- tvp = RTOV4(trp);
- vnevent_rename_dest_dir(tvp, ct);
- }
-
trp = VTOR4(ovp);
- tvp = ovp;
+ tovp = ovp;
if (IS_SHADOW(ovp, trp))
+ tovp = RTOV4(trp);
+
+ vnevent_rename_src(tovp, odvp, onm, ct);
+
+ trp = VTOR4(ndvp);
+ tvp = ndvp;
+
+ if (IS_SHADOW(ndvp, trp))
tvp = RTOV4(trp);
- vnevent_rename_src(tvp, odvp, onm, ct);
+
+ vnevent_rename_dest_dir(tvp, tovp, nnm, ct);
}
if (nvp) {
@@ -11000,8 +11003,13 @@ nfs4_space(vnode_t *vp, int cmd, struct flock64 *bfp, int flag,
va.va_size = bfp->l_start;
error = nfs4setattr(vp, &va, 0, cr, NULL);
- if (error == 0 && bfp->l_start == 0)
- vnevent_truncate(vp, ct);
+ if (error == 0) {
+ if (bfp->l_start == 0) {
+ vnevent_truncate(vp, ct);
+ } else {
+ vnevent_resize(vp, ct);
+ }
+ }
} else
error = EINVAL;
}
diff --git a/usr/src/uts/common/fs/nfs/nfs_auth.c b/usr/src/uts/common/fs/nfs/nfs_auth.c
index 268badd6c0..da60a0ccd0 100644
--- a/usr/src/uts/common/fs/nfs/nfs_auth.c
+++ b/usr/src/uts/common/fs/nfs/nfs_auth.c
@@ -22,6 +22,7 @@
/*
* Copyright 2016 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015 Joyent, Inc. All rights reserved.
*/
#include <sys/param.h>
@@ -558,11 +559,16 @@ retry:
*access = res.ares.auth_perm;
*srv_uid = res.ares.auth_srv_uid;
*srv_gid = res.ares.auth_srv_gid;
- *srv_gids_cnt = res.ares.auth_srv_gids.len;
- *srv_gids = kmem_alloc(*srv_gids_cnt * sizeof (gid_t),
- KM_SLEEP);
- bcopy(res.ares.auth_srv_gids.val, *srv_gids,
- *srv_gids_cnt * sizeof (gid_t));
+
+ if ((*srv_gids_cnt = res.ares.auth_srv_gids.len) != 0) {
+ *srv_gids = kmem_alloc(*srv_gids_cnt *
+ sizeof (gid_t), KM_SLEEP);
+ bcopy(res.ares.auth_srv_gids.val, *srv_gids,
+ *srv_gids_cnt * sizeof (gid_t));
+ } else {
+ *srv_gids = NULL;
+ }
+
break;
case NFSAUTH_DR_EFAIL:
@@ -1051,9 +1057,13 @@ nfsauth_cache_get(struct exportinfo *exi, struct svc_req *req, int flavor,
if (gid != NULL)
*gid = p->auth_srv_gid;
if (ngids != NULL && gids != NULL) {
- *ngids = p->auth_srv_ngids;
- *gids = kmem_alloc(*ngids * sizeof (gid_t), KM_SLEEP);
- bcopy(p->auth_srv_gids, *gids, *ngids * sizeof (gid_t));
+ if ((*ngids = p->auth_srv_ngids) != 0) {
+ size_t sz = *ngids * sizeof (gid_t);
+ *gids = kmem_alloc(sz, KM_SLEEP);
+ bcopy(p->auth_srv_gids, *gids, sz);
+ } else {
+ *gids = NULL;
+ }
}
access = p->auth_access;
diff --git a/usr/src/uts/common/fs/nfs/nfs_server.c b/usr/src/uts/common/fs/nfs/nfs_server.c
index be28ac9071..5d2efc71b2 100644
--- a/usr/src/uts/common/fs/nfs/nfs_server.c
+++ b/usr/src/uts/common/fs/nfs/nfs_server.c
@@ -24,6 +24,7 @@
* Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Bayard G. Bell. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc.
*/
/*
@@ -2649,6 +2650,9 @@ nfs_srvinit(void)
{
int error;
+ if (getzoneid() != GLOBAL_ZONEID)
+ return (EACCES);
+
error = nfs_exportinit();
if (error != 0)
return (error);
@@ -3287,7 +3291,7 @@ nfs_getflabel(vnode_t *vp, struct exportinfo *exi)
char *path;
mutex_enter(&vp->v_lock);
- if (vp->v_path != NULL) {
+ if (vp->v_path != vn_vpath_empty) {
zone = zone_find_by_any_path(vp->v_path, B_FALSE);
mutex_exit(&vp->v_lock);
} else {
diff --git a/usr/src/uts/common/fs/nfs/nfs_vfsops.c b/usr/src/uts/common/fs/nfs/nfs_vfsops.c
index 57b21778b4..ffd5380a86 100644
--- a/usr/src/uts/common/fs/nfs/nfs_vfsops.c
+++ b/usr/src/uts/common/fs/nfs/nfs_vfsops.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2013, Joyent, Inc. All rights reserved.
*
* Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
* All rights reserved.
diff --git a/usr/src/uts/common/fs/nfs/nfs_vnops.c b/usr/src/uts/common/fs/nfs/nfs_vnops.c
index 1a1082bcb8..ee3bac484f 100644
--- a/usr/src/uts/common/fs/nfs/nfs_vnops.c
+++ b/usr/src/uts/common/fs/nfs/nfs_vnops.c
@@ -26,7 +26,7 @@
*/
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*/
@@ -1174,8 +1174,13 @@ nfs_setattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr,
error = nfssetattr(vp, vap, flags, cr);
- if (error == 0 && (mask & AT_SIZE) && vap->va_size == 0)
- vnevent_truncate(vp, ct);
+ if (error == 0 && (mask & AT_SIZE)) {
+ if (vap->va_size == 0) {
+ vnevent_truncate(vp, ct);
+ } else {
+ vnevent_resize(vp, ct);
+ }
+ }
return (error);
}
@@ -2688,11 +2693,9 @@ nfsrename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr,
if (nvp)
vnevent_rename_dest(nvp, ndvp, nnm, ct);
- if (odvp != ndvp)
- vnevent_rename_dest_dir(ndvp, ct);
-
ASSERT(ovp != NULL);
vnevent_rename_src(ovp, odvp, onm, ct);
+ vnevent_rename_dest_dir(ndvp, ovp, nnm, ct);
}
if (nvp) {
@@ -4620,8 +4623,13 @@ nfs_space(vnode_t *vp, int cmd, struct flock64 *bfp, int flag,
va.va_size = bfp->l_start;
error = nfssetattr(vp, &va, 0, cr);
- if (error == 0 && bfp->l_start == 0)
- vnevent_truncate(vp, ct);
+ if (error == 0) {
+ if (bfp->l_start == 0) {
+ vnevent_truncate(vp, ct);
+ } else {
+ vnevent_resize(vp, ct);
+ }
+ }
} else
error = EINVAL;
}
diff --git a/usr/src/uts/common/fs/pcfs/pc_dir.c b/usr/src/uts/common/fs/pcfs/pc_dir.c
index 976715e346..275330a0ae 100644
--- a/usr/src/uts/common/fs/pcfs/pc_dir.c
+++ b/usr/src/uts/common/fs/pcfs/pc_dir.c
@@ -22,7 +22,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2015 Joyent, Inc.
+ * Copyright 2016 Joyent, Inc.
*/
#include <sys/param.h>
@@ -826,8 +826,7 @@ top:
if (error == 0) {
vnevent_rename_src(PCTOV(pcp), PCTOV(dp), snm, ctp);
- if (dp != tdp)
- vnevent_rename_dest_dir(PCTOV(tdp), ctp);
+ vnevent_rename_dest_dir(PCTOV(tdp), PCTOV(pcp), tnm, ctp);
}
done:
diff --git a/usr/src/uts/common/fs/pcfs/pc_vnops.c b/usr/src/uts/common/fs/pcfs/pc_vnops.c
index a8743b245a..ae72cada7a 100644
--- a/usr/src/uts/common/fs/pcfs/pc_vnops.c
+++ b/usr/src/uts/common/fs/pcfs/pc_vnops.c
@@ -781,8 +781,11 @@ pcfs_setattr(
if (error)
goto out;
- if (vap->va_size == 0)
+ if (vap->va_size == 0) {
vnevent_truncate(vp, ct);
+ } else {
+ vnevent_resize(vp, ct);
+ }
}
/*
* Change file modified times.
diff --git a/usr/src/uts/common/fs/portfs/port.c b/usr/src/uts/common/fs/portfs/port.c
index 14be8cbbae..11b7386269 100644
--- a/usr/src/uts/common/fs/portfs/port.c
+++ b/usr/src/uts/common/fs/portfs/port.c
@@ -24,7 +24,9 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2015 Joyent, Inc. All rights reserved.
+ */
#include <sys/types.h>
#include <sys/systm.h>
@@ -1381,12 +1383,18 @@ portnowait:
if (model == DATAMODEL_NATIVE) {
eventsz = sizeof (port_event_t);
- kevp = kmem_alloc(eventsz * nmax, KM_NOSLEEP);
- if (kevp == NULL) {
- if (nmax > pp->port_max_list)
- nmax = pp->port_max_list;
- kevp = kmem_alloc(eventsz * nmax, KM_SLEEP);
+
+ if (nmax == 0) {
+ kevp = NULL;
+ } else {
+ kevp = kmem_alloc(eventsz * nmax, KM_NOSLEEP);
+ if (kevp == NULL) {
+ if (nmax > pp->port_max_list)
+ nmax = pp->port_max_list;
+ kevp = kmem_alloc(eventsz * nmax, KM_SLEEP);
+ }
}
+
results = kevp;
lev = NULL; /* start with first event in the queue */
for (nevents = 0; nevents < nmax; ) {
@@ -1423,12 +1431,18 @@ portnowait:
port_event32_t *kevp32;
eventsz = sizeof (port_event32_t);
- kevp32 = kmem_alloc(eventsz * nmax, KM_NOSLEEP);
- if (kevp32 == NULL) {
- if (nmax > pp->port_max_list)
- nmax = pp->port_max_list;
- kevp32 = kmem_alloc(eventsz * nmax, KM_SLEEP);
+
+ if (nmax == 0) {
+ kevp32 = NULL;
+ } else {
+ kevp32 = kmem_alloc(eventsz * nmax, KM_NOSLEEP);
+ if (kevp32 == NULL) {
+ if (nmax > pp->port_max_list)
+ nmax = pp->port_max_list;
+ kevp32 = kmem_alloc(eventsz * nmax, KM_SLEEP);
+ }
}
+
results = kevp32;
lev = NULL; /* start with first event in the queue */
for (nevents = 0; nevents < nmax; ) {
diff --git a/usr/src/uts/common/fs/proc/prargv.c b/usr/src/uts/common/fs/proc/prargv.c
new file mode 100644
index 0000000000..b09a9c8afc
--- /dev/null
+++ b/usr/src/uts/common/fs/proc/prargv.c
@@ -0,0 +1,441 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015, Joyent, Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/sunddi.h>
+#include <sys/proc.h>
+#include <sys/procfs.h>
+#include <sys/sysmacros.h>
+#include <vm/as.h>
+
+/*
+ * Safely read a contiguous region of memory from 'addr' in the address space
+ * of a particular process into the supplied kernel buffer (*buf, sz).
+ * Partially mapped regions will result in a partial read terminating at the
+ * first hole in the address space. The number of bytes actually read is
+ * returned to the caller via 'rdsz'.
+ */
+int
+prreadbuf(proc_t *p, uintptr_t ustart, uint8_t *buf, size_t sz, size_t *rdsz)
+{
+ int error = 0;
+ size_t rem = sz;
+ off_t pos = 0;
+
+ if (rdsz != NULL)
+ *rdsz = 0;
+
+ while (rem != 0) {
+ uintptr_t addr = ustart + pos;
+ size_t len = MIN(rem, PAGESIZE - (addr & PAGEOFFSET));
+
+ if ((error = uread(p, buf + pos, len, addr)) != 0) {
+ if (error == ENXIO) {
+ /*
+ * ENXIO from uread() indicates that the page
+ * does not exist. This will simply be a
+ * partial read.
+ */
+ error = 0;
+ }
+ break;
+ }
+
+ rem -= len;
+ pos += len;
+ }
+
+ if (rdsz != NULL)
+ *rdsz = pos;
+
+ return (error);
+}
+
+/*
+ * Attempt to read the argument vector (argv) from this process. The caller
+ * must hold the p_lock mutex, and have marked the process P_PR_LOCK (e.g. via
+ * prlock or lx_prlock).
+ *
+ * The caller must provide a buffer (buf, buflen). We will concatenate each
+ * argument string (including the NUL terminator) into this buffer. The number
+ * of characters written to this buffer (including the final NUL terminator)
+ * will be stored in 'slen'.
+ */
+int
+prreadargv(proc_t *p, char *buf, size_t bufsz, size_t *slen)
+{
+ int error;
+ user_t *up;
+ struct as *as;
+ size_t pos = 0;
+ caddr_t *argv = NULL;
+ size_t argvsz = 0;
+ int i;
+
+ VERIFY(MUTEX_HELD(&p->p_lock));
+ VERIFY(p->p_proc_flag & P_PR_LOCK);
+
+ up = PTOU(p);
+ as = p->p_as;
+
+ if ((p->p_flag & SSYS) || as == &kas || up->u_argv == NULL) {
+ /*
+ * Return the regular psargs string to the caller.
+ */
+ bcopy(up->u_psargs, buf, MIN(bufsz, sizeof (up->u_psargs)));
+ buf[bufsz - 1] = '\0';
+ *slen = strlen(buf) + 1;
+
+ return (0);
+ }
+
+ /*
+ * Allocate space to store argv array.
+ */
+ argvsz = up->u_argc * (p->p_model == DATAMODEL_ILP32 ?
+ sizeof (caddr32_t) : sizeof (caddr_t));
+ argv = kmem_alloc(argvsz, KM_SLEEP);
+
+ /*
+ * Extract the argv array from the target process. Drop p_lock
+ * while we do I/O to avoid deadlock with the clock thread.
+ */
+ mutex_exit(&p->p_lock);
+ if ((error = prreadbuf(p, up->u_argv, (uint8_t *)argv, argvsz,
+ NULL)) != 0) {
+ kmem_free(argv, argvsz);
+ mutex_enter(&p->p_lock);
+ VERIFY(p->p_proc_flag & P_PR_LOCK);
+ return (-1);
+ }
+
+ /*
+ * Read each argument string from the pointers in the argv array.
+ */
+ pos = 0;
+ for (i = 0; i < up->u_argc; i++) {
+ size_t rdsz, trysz;
+ uintptr_t arg;
+ off_t j;
+ boolean_t found_nul;
+ boolean_t do_retry = B_TRUE;
+
+#ifdef _SYSCALL32_IMPL
+ if (p->p_model == DATAMODEL_ILP32) {
+ arg = (uintptr_t)((caddr32_t *)argv)[i];
+ } else {
+ arg = (uintptr_t)argv[i];
+ }
+#else
+ arg = (uintptr_t)argv[i];
+#endif
+
+ /*
+ * Stop trying to read arguments if we reach a NULL
+ * pointer in the vector.
+ */
+ if (arg == NULL)
+ break;
+
+ /*
+ * Stop reading if we have read the maximum length
+ * we can return to the user.
+ */
+ if (pos >= bufsz)
+ break;
+
+ /*
+ * Initially we try a short read, on the assumption that
+ * most individual argument strings are less than 80
+ * characters long.
+ */
+ if ((trysz = MIN(80, bufsz - pos - 1)) < 80) {
+ /*
+ * We don't have room in the target buffer for even
+ * an entire short read, so there is no need to retry
+ * with a longer read.
+ */
+ do_retry = B_FALSE;
+ }
+
+retry:
+ /*
+ * Read string data for this argument. Leave room
+ * in the buffer for a final NUL terminator.
+ */
+ if ((error = prreadbuf(p, arg, (uint8_t *)&buf[pos], trysz,
+ &rdsz)) != 0) {
+ /*
+ * There was a problem reading this string
+ * from the process. Give up.
+ */
+ break;
+ }
+
+ /*
+ * Find the NUL terminator.
+ */
+ found_nul = B_FALSE;
+ for (j = 0; j < rdsz; j++) {
+ if (buf[pos + j] == '\0') {
+ found_nul = B_TRUE;
+ break;
+ }
+ }
+
+ if (!found_nul && do_retry) {
+ /*
+ * We did not find a NUL terminator, but this
+ * was a first pass short read. Try once more
+ * with feeling.
+ */
+ trysz = bufsz - pos - 1;
+ do_retry = B_FALSE;
+ goto retry;
+ }
+
+ /*
+ * Commit the string we read to the buffer.
+ */
+ pos += j + 1;
+ if (!found_nul && pos < bufsz) {
+ /*
+ * A NUL terminator was not found; add one.
+ */
+ buf[pos++] = '\0';
+ }
+ }
+
+ /*
+ * Ensure the entire string is NUL-terminated.
+ */
+ buf[bufsz - 1] = '\0';
+
+ mutex_enter(&p->p_lock);
+ VERIFY(p->p_proc_flag & P_PR_LOCK);
+ kmem_free(argv, argvsz);
+
+ /*
+ * If the operation was a success, return the copied string length
+ * to the caller.
+ */
+ *slen = (error == 0) ? pos : 0;
+
+ return (error);
+}
+
+/*
+ * Similar to prreadargv except reads the env vector. This is slightly more
+ * complex because there is no count for the env vector that corresponds to
+ * u_argc.
+ */
+int
+prreadenvv(proc_t *p, char *buf, size_t bufsz, size_t *slen)
+{
+ int error;
+ user_t *up;
+ struct as *as;
+ size_t pos = 0;
+ caddr_t *envp = NULL;
+ uintptr_t tmpp = NULL;
+ size_t envpsz = 0, rdsz = 0;
+ int i;
+ int cnt, bound;
+
+ VERIFY(MUTEX_HELD(&p->p_lock));
+ VERIFY(p->p_proc_flag & P_PR_LOCK);
+
+ up = PTOU(p);
+ as = p->p_as;
+
+ if ((p->p_flag & SSYS) || as == &kas || up->u_envp == NULL) {
+ /*
+ * Return empty string.
+ */
+ buf[0] = '\0';
+ *slen = 1;
+
+ return (0);
+ }
+
+ /*
+ * Drop p_lock while we do I/O to avoid deadlock with the clock thread.
+ */
+ mutex_exit(&p->p_lock);
+
+ /*
+ * We first have to count how many env entries we have. This is
+ * somewhat painful. We extract the env entries from the target process
+ * one entry at a time. Stop trying to read env entries if we reach a
+ * NULL pointer in the vector or hit our upper bound (which we take
+ * as the bufsz/4) to ensure we don't run off.
+ */
+ rdsz = (p->p_model == DATAMODEL_ILP32 ?
+ sizeof (caddr32_t) : sizeof (caddr_t));
+ bound = (int)(bufsz / 4);
+ for (cnt = 0, tmpp = up->u_envp; cnt < bound; cnt++, tmpp += rdsz) {
+ caddr_t tmp = NULL;
+
+ if ((error = prreadbuf(p, tmpp, (uint8_t *)&tmp, rdsz,
+ NULL)) != 0) {
+ mutex_enter(&p->p_lock);
+ VERIFY(p->p_proc_flag & P_PR_LOCK);
+ return (-1);
+ }
+
+ if (tmp == NULL)
+ break;
+ }
+ if (cnt == 0) {
+ /* Return empty string. */
+ buf[0] = '\0';
+ *slen = 1;
+ mutex_enter(&p->p_lock);
+ VERIFY(p->p_proc_flag & P_PR_LOCK);
+ return (0);
+ }
+
+ /*
+ * Allocate space to store env array.
+ */
+ envpsz = cnt * (p->p_model == DATAMODEL_ILP32 ?
+ sizeof (caddr32_t) : sizeof (caddr_t));
+ envp = kmem_alloc(envpsz, KM_SLEEP);
+
+ /*
+ * Extract the env array from the target process.
+ */
+ if ((error = prreadbuf(p, up->u_envp, (uint8_t *)envp, envpsz,
+ NULL)) != 0) {
+ kmem_free(envp, envpsz);
+ mutex_enter(&p->p_lock);
+ VERIFY(p->p_proc_flag & P_PR_LOCK);
+ return (-1);
+ }
+
+ /*
+ * Read each env string from the pointers in the env array.
+ */
+ pos = 0;
+ for (i = 0; i < cnt; i++) {
+ size_t rdsz, trysz;
+ uintptr_t ev;
+ off_t j;
+ boolean_t found_nul;
+ boolean_t do_retry = B_TRUE;
+
+#ifdef _SYSCALL32_IMPL
+ if (p->p_model == DATAMODEL_ILP32) {
+ ev = (uintptr_t)((caddr32_t *)envp)[i];
+ } else {
+ ev = (uintptr_t)envp[i];
+ }
+#else
+ ev = (uintptr_t)envp[i];
+#endif
+
+ /*
+ * Stop trying to read env entries if we reach a NULL
+ * pointer in the vector.
+ */
+ if (ev == NULL)
+ break;
+
+ /*
+ * Stop reading if we have read the maximum length
+ * we can return to the user.
+ */
+ if (pos >= bufsz)
+ break;
+
+ /*
+ * Initially we try a short read, on the assumption that
+ * most individual env strings are less than 80
+ * characters long.
+ */
+ if ((trysz = MIN(80, bufsz - pos - 1)) < 80) {
+ /*
+ * We don't have room in the target buffer for even
+ * an entire short read, so there is no need to retry
+ * with a longer read.
+ */
+ do_retry = B_FALSE;
+ }
+
+retry:
+ /*
+ * Read string data for this env var. Leave room
+ * in the buffer for a final NUL terminator.
+ */
+ if ((error = prreadbuf(p, ev, (uint8_t *)&buf[pos], trysz,
+ &rdsz)) != 0) {
+ /*
+ * There was a problem reading this string
+ * from the process. Give up.
+ */
+ break;
+ }
+
+ /*
+ * Find the NUL terminator.
+ */
+ found_nul = B_FALSE;
+ for (j = 0; j < rdsz; j++) {
+ if (buf[pos + j] == '\0') {
+ found_nul = B_TRUE;
+ break;
+ }
+ }
+
+ if (!found_nul && do_retry) {
+ /*
+ * We did not find a NUL terminator, but this
+ * was a first pass short read. Try once more
+ * with feeling.
+ */
+ trysz = bufsz - pos - 1;
+ do_retry = B_FALSE;
+ goto retry;
+ }
+
+ /*
+ * Commit the string we read to the buffer.
+ */
+ pos += j + 1;
+ if (!found_nul && pos < bufsz) {
+ /*
+ * A NUL terminator was not found; add one.
+ */
+ buf[pos++] = '\0';
+ }
+ }
+
+ /*
+ * Ensure the entire string is NUL-terminated.
+ */
+ buf[bufsz - 1] = '\0';
+
+ mutex_enter(&p->p_lock);
+ VERIFY(p->p_proc_flag & P_PR_LOCK);
+ kmem_free(envp, envpsz);
+
+ /*
+ * If the operation was a success, return the copied string length
+ * to the caller.
+ */
+ *slen = (error == 0) ? pos : 0;
+
+ return (error);
+}
diff --git a/usr/src/uts/common/fs/proc/prcontrol.c b/usr/src/uts/common/fs/proc/prcontrol.c
index 6b151a6369..07dcb1e7db 100644
--- a/usr/src/uts/common/fs/proc/prcontrol.c
+++ b/usr/src/uts/common/fs/proc/prcontrol.c
@@ -25,7 +25,7 @@
*/
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#include <sys/types.h>
@@ -1481,7 +1481,7 @@ pr_setsig(prnode_t *pnp, siginfo_t *sip)
} else if (t->t_state == TS_STOPPED && sig == SIGKILL) {
/* If SIGKILL, set stopped lwp running */
p->p_stopsig = 0;
- t->t_schedflag |= TS_XSTART | TS_PSTART;
+ t->t_schedflag |= TS_XSTART | TS_PSTART | TS_BSTART;
t->t_dtrace_stop = 0;
setrun_locked(t);
}
@@ -2276,9 +2276,17 @@ pr_szoneid(proc_t *p, zoneid_t zoneid, cred_t *cr)
return (EPERM);
if (zoneid != GLOBAL_ZONEID && zoneid != p->p_zone->zone_id)
return (EINVAL);
- if ((zptr = zone_find_by_id(zoneid)) == NULL)
- return (EINVAL);
+ /*
+ * We cannot hold p_lock when we call zone_find_by_id since that can
+ * lead to a deadlock. zone_find_by_id() takes zonehash_lock.
+ * zone_enter() can hold the zonehash_lock and needs p_lock when it
+ * calls task_join.
+ */
mutex_exit(&p->p_lock);
+ if ((zptr = zone_find_by_id(zoneid)) == NULL) {
+ mutex_enter(&p->p_lock);
+ return (EINVAL);
+ }
mutex_enter(&p->p_crlock);
oldcred = p->p_cred;
crhold(oldcred);
diff --git a/usr/src/uts/common/fs/proc/prdata.h b/usr/src/uts/common/fs/proc/prdata.h
index 8ea516bf82..72f26b3c05 100644
--- a/usr/src/uts/common/fs/proc/prdata.h
+++ b/usr/src/uts/common/fs/proc/prdata.h
@@ -27,7 +27,7 @@
/* All Rights Reserved */
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#ifndef _SYS_PROC_PRDATA_H
@@ -123,6 +123,7 @@ typedef enum prnodetype {
#if defined(__i386) || defined(__amd64)
PR_LDT, /* /proc/<pid>/ldt */
#endif
+ PR_ARGV, /* /proc/<pid>/argv */
PR_USAGE, /* /proc/<pid>/usage */
PR_LUSAGE, /* /proc/<pid>/lusage */
PR_PAGEDATA, /* /proc/<pid>/pagedata */
@@ -347,6 +348,8 @@ extern int pr_unset(proc_t *, long);
extern void pr_sethold(prnode_t *, sigset_t *);
extern void pr_setfault(proc_t *, fltset_t *);
extern int prusrio(proc_t *, enum uio_rw, struct uio *, int);
+extern int prreadargv(proc_t *, char *, size_t, size_t *);
+extern int prreadenvv(proc_t *, char *, size_t, size_t *);
extern int prwritectl(vnode_t *, struct uio *, cred_t *);
extern int prlock(prnode_t *, int);
extern void prunmark(proc_t *);
diff --git a/usr/src/uts/common/fs/proc/prsubr.c b/usr/src/uts/common/fs/proc/prsubr.c
index 28950bf972..21c25a01e3 100644
--- a/usr/src/uts/common/fs/proc/prsubr.c
+++ b/usr/src/uts/common/fs/proc/prsubr.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2016, Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -201,6 +201,7 @@ prchoose(proc_t *p)
case PR_SYSEXIT:
case PR_SIGNALLED:
case PR_FAULTED:
+ case PR_BRAND:
/*
* Make an lwp calling exit() be the
* last lwp seen in the process.
@@ -534,6 +535,12 @@ prexecend(void)
pcp->prc_tslot = tslot;
}
}
+
+ /*
+ * There may be threads waiting for the flag change blocked behind the
+ * pr_pid_cv as well.
+ */
+ cv_signal(&pr_pid_cv[p->p_slot]);
}
/*
diff --git a/usr/src/uts/common/fs/proc/prvnops.c b/usr/src/uts/common/fs/proc/prvnops.c
index 39f2abbc32..245133abf4 100644
--- a/usr/src/uts/common/fs/proc/prvnops.c
+++ b/usr/src/uts/common/fs/proc/prvnops.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -96,6 +96,11 @@ struct prdirect {
#define PRSDSIZE (sizeof (struct prdirect))
/*
+ * Maximum length of the /proc/$$/argv file:
+ */
+int prmaxargvlen = 4096;
+
+/*
* Directory characteristics.
*/
typedef struct prdirent {
@@ -166,6 +171,8 @@ static prdirent_t piddir[] = {
{ PR_LDT, 27 * sizeof (prdirent_t), sizeof (prdirent_t),
"ldt" },
#endif
+ { PR_ARGV, 28 * sizeof (prdirent_t), sizeof (prdirent_t),
+ "argv" },
};
#define NPIDDIRFILES (sizeof (piddir) / sizeof (piddir[0]) - 2)
@@ -582,6 +589,7 @@ static int pr_read_inval(), pr_read_as(), pr_read_status(),
#if defined(__x86)
pr_read_ldt(),
#endif
+ pr_read_argv(),
pr_read_usage(), pr_read_lusage(), pr_read_pagedata(),
pr_read_watch(), pr_read_lwpstatus(), pr_read_lwpsinfo(),
pr_read_lwpusage(), pr_read_xregs(), pr_read_priv(),
@@ -610,6 +618,7 @@ static int (*pr_read_function[PR_NFILES])() = {
#if defined(__x86)
pr_read_ldt, /* /proc/<pid>/ldt */
#endif
+ pr_read_argv, /* /proc/<pid>/argv */
pr_read_usage, /* /proc/<pid>/usage */
pr_read_lusage, /* /proc/<pid>/lusage */
pr_read_pagedata, /* /proc/<pid>/pagedata */
@@ -672,6 +681,41 @@ pr_uioread(void *base, long count, uio_t *uiop)
}
static int
+pr_read_argv(prnode_t *pnp, uio_t *uiop)
+{
+ char *args;
+ int error;
+ size_t asz = prmaxargvlen, sz;
+
+ /*
+ * Allocate a scratch buffer for collection of the process arguments.
+ */
+ args = kmem_alloc(asz, KM_SLEEP);
+
+ ASSERT(pnp->pr_type == PR_ARGV);
+
+ if ((error = prlock(pnp, ZNO)) != 0) {
+ kmem_free(args, asz);
+ return (error);
+ }
+
+ if ((error = prreadargv(pnp->pr_common->prc_proc, args, asz,
+ &sz)) != 0) {
+ prunlock(pnp);
+ kmem_free(args, asz);
+ return (error);
+ }
+
+ prunlock(pnp);
+
+ error = pr_uioread(args, sz, uiop);
+
+ kmem_free(args, asz);
+
+ return (error);
+}
+
+static int
pr_read_as(prnode_t *pnp, uio_t *uiop)
{
int error;
@@ -1767,6 +1811,7 @@ static int (*pr_read_function_32[PR_NFILES])() = {
#if defined(__x86)
pr_read_ldt, /* /proc/<pid>/ldt */
#endif
+ pr_read_argv, /* /proc/<pid>/argv */
pr_read_usage_32, /* /proc/<pid>/usage */
pr_read_lusage_32, /* /proc/<pid>/lusage */
pr_read_pagedata_32, /* /proc/<pid>/pagedata */
@@ -2686,6 +2731,103 @@ prread(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, caller_context_t *ct)
#endif
}
+/*
+ * We make pr_write_psinfo_fname() somewhat simpler by asserting at compile
+ * time that PRFNSZ has the same definition as MAXCOMLEN.
+ */
+#if PRFNSZ != MAXCOMLEN
+#error PRFNSZ/MAXCOMLEN mismatch
+#endif
+
+static int
+pr_write_psinfo_fname(prnode_t *pnp, uio_t *uiop)
+{
+ char fname[PRFNSZ];
+ int offset = offsetof(psinfo_t, pr_fname), error;
+
+#ifdef _SYSCALL32_IMPL
+ if (curproc->p_model != DATAMODEL_LP64)
+ offset = offsetof(psinfo32_t, pr_fname);
+#endif
+
+ /*
+ * If this isn't a write to pr_fname (or if the size doesn't match
+ * PRFNSZ) return.
+ */
+ if (uiop->uio_offset != offset || uiop->uio_resid != PRFNSZ)
+ return (0);
+
+ if ((error = uiomove(fname, PRFNSZ, UIO_WRITE, uiop)) != 0)
+ return (error);
+
+ fname[PRFNSZ - 1] = '\0';
+
+ if ((error = prlock(pnp, ZNO)) != 0)
+ return (error);
+
+ bcopy(fname, pnp->pr_common->prc_proc->p_user.u_comm, PRFNSZ);
+
+ prunlock(pnp);
+
+ return (0);
+}
+
+/*
+ * We make pr_write_psinfo_psargs() somewhat simpler by asserting at compile
+ * time that PRARGSZ has the same definition as PSARGSZ.
+ */
+#if PRARGSZ != PSARGSZ
+#error PRARGSZ/PSARGSZ mismatch
+#endif
+
+static int
+pr_write_psinfo_psargs(prnode_t *pnp, uio_t *uiop)
+{
+ char psargs[PRARGSZ];
+ int offset = offsetof(psinfo_t, pr_psargs), error;
+
+#ifdef _SYSCALL32_IMPL
+ if (curproc->p_model != DATAMODEL_LP64)
+ offset = offsetof(psinfo32_t, pr_psargs);
+#endif
+
+ /*
+ * If this isn't a write to pr_psargs (or if the size doesn't match
+ * PRARGSZ) return.
+ */
+ if (uiop->uio_offset != offset || uiop->uio_resid != PRARGSZ)
+ return (0);
+
+ if ((error = uiomove(psargs, PRARGSZ, UIO_WRITE, uiop)) != 0)
+ return (error);
+
+ psargs[PRARGSZ - 1] = '\0';
+
+ if ((error = prlock(pnp, ZNO)) != 0)
+ return (error);
+
+ bcopy(psargs, pnp->pr_common->prc_proc->p_user.u_psargs, PRARGSZ);
+
+ prunlock(pnp);
+
+ return (0);
+}
+
+int
+pr_write_psinfo(prnode_t *pnp, uio_t *uiop)
+{
+ int error;
+
+ if ((error = pr_write_psinfo_fname(pnp, uiop)) != 0)
+ return (error);
+
+ if ((error = pr_write_psinfo_psargs(pnp, uiop)) != 0)
+ return (error);
+
+ return (0);
+}
+
+
/* ARGSUSED */
static int
prwrite(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, caller_context_t *ct)
@@ -2764,6 +2906,9 @@ prwrite(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, caller_context_t *ct)
uiop->uio_resid = resid;
return (error);
+ case PR_PSINFO:
+ return (pr_write_psinfo(pnp, uiop));
+
default:
return ((vp->v_type == VDIR)? EISDIR : EBADF);
}
@@ -3047,6 +3192,13 @@ prgetattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
case PR_AUXV:
vap->va_size = __KERN_NAUXV_IMPL * PR_OBJSIZE(auxv32_t, auxv_t);
break;
+ case PR_ARGV:
+ if ((p->p_flag & SSYS) || p->p_as == &kas) {
+ vap->va_size = PSARGSZ;
+ } else {
+ vap->va_size = prmaxargvlen;
+ }
+ break;
#if defined(__x86)
case PR_LDT:
mutex_exit(&p->p_lock);
@@ -3222,6 +3374,7 @@ praccess(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct)
case PR_USAGE:
case PR_LUSAGE:
case PR_LWPUSAGE:
+ case PR_ARGV:
p = pr_p_lock(pnp);
mutex_exit(&pr_pidlock);
if (p == NULL)
@@ -3307,6 +3460,7 @@ static vnode_t *(*pr_lookup_function[PR_NFILES])() = {
#if defined(__x86)
pr_lookup_notdir, /* /proc/<pid>/ldt */
#endif
+ pr_lookup_notdir, /* /proc/<pid>/argv */
pr_lookup_notdir, /* /proc/<pid>/usage */
pr_lookup_notdir, /* /proc/<pid>/lusage */
pr_lookup_notdir, /* /proc/<pid>/pagedata */
@@ -4546,11 +4700,15 @@ prgetnode(vnode_t *dp, prnodetype_t type)
break;
case PR_PSINFO:
+ pnp->pr_mode = 0644; /* readable by all + owner can write */
+ break;
+
case PR_LPSINFO:
case PR_LWPSINFO:
case PR_USAGE:
case PR_LUSAGE:
case PR_LWPUSAGE:
+ case PR_ARGV:
pnp->pr_mode = 0444; /* read-only by all */
break;
@@ -4656,6 +4814,7 @@ static int (*pr_readdir_function[PR_NFILES])() = {
#if defined(__x86)
pr_readdir_notdir, /* /proc/<pid>/ldt */
#endif
+ pr_readdir_notdir, /* /proc/<pid>/argv */
pr_readdir_notdir, /* /proc/<pid>/usage */
pr_readdir_notdir, /* /proc/<pid>/lusage */
pr_readdir_notdir, /* /proc/<pid>/pagedata */
@@ -4805,6 +4964,7 @@ pr_readdir_piddir(prnode_t *pnp, uio_t *uiop, int *eofp)
case PR_PROCDIR:
case PR_PSINFO:
case PR_USAGE:
+ case PR_ARGV:
break;
default:
continue;
diff --git a/usr/src/uts/common/fs/sockfs/sockcommon.c b/usr/src/uts/common/fs/sockfs/sockcommon.c
index 703e26ea61..682f1d867b 100644
--- a/usr/src/uts/common/fs/sockfs/sockcommon.c
+++ b/usr/src/uts/common/fs/sockfs/sockcommon.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#include <sys/types.h>
@@ -501,6 +502,9 @@ sonode_constructor(void *buf, void *cdrarg, int kmflags)
cv_init(&so->so_copy_cv, NULL, CV_DEFAULT, NULL);
cv_init(&so->so_closing_cv, NULL, CV_DEFAULT, NULL);
+ so->so_krecv_cb = NULL;
+ so->so_krecv_arg = NULL;
+
return (0);
}
@@ -654,6 +658,10 @@ sonode_fini(struct sonode *so)
if (so->so_filter_top != NULL)
sof_sonode_cleanup(so);
+ /* Clean up any remnants of krecv callbacks */
+ so->so_krecv_cb = NULL;
+ so->so_krecv_arg = NULL;
+
ASSERT(list_is_empty(&so->so_acceptq_list));
ASSERT(list_is_empty(&so->so_acceptq_defer));
ASSERT(!list_link_active(&so->so_acceptq_node));
diff --git a/usr/src/uts/common/fs/sockfs/sockcommon_sops.c b/usr/src/uts/common/fs/sockfs/sockcommon_sops.c
index e5bc6dc845..9b8186a8a0 100644
--- a/usr/src/uts/common/fs/sockfs/sockcommon_sops.c
+++ b/usr/src/uts/common/fs/sockfs/sockcommon_sops.c
@@ -24,7 +24,7 @@
*/
/*
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -128,7 +128,7 @@ so_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
{
int error;
- SO_BLOCK_FALLBACK(so, SOP_BIND(so, name, namelen, flags, cr));
+ SO_BLOCK_FALLBACK_SAFE(so, SOP_BIND(so, name, namelen, flags, cr));
ASSERT(flags == _SOBIND_XPG4_2 || flags == _SOBIND_SOCKBSD);
@@ -305,7 +305,7 @@ so_connect(struct sonode *so, struct sockaddr *name,
* This can happen if a non blocking operation caused an error.
*/
- if (so->so_error != 0) {
+ if (so->so_error != 0 && (so->so_mode & SM_DEFERERR) == 0) {
mutex_enter(&so->so_lock);
error = sogeterr(so, B_TRUE);
mutex_exit(&so->so_lock);
@@ -404,7 +404,7 @@ so_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
break;
}
- if (so->so_error != 0) {
+ if (so->so_error != 0 && (so->so_mode & SM_DEFERERR) == 0) {
mutex_enter(&so->so_lock);
error = sogeterr(so, B_TRUE);
mutex_exit(&so->so_lock);
@@ -513,7 +513,7 @@ so_sendmblk_impl(struct sonode *so, struct nmsghdr *msg, int fflag,
error = EPIPE;
break;
}
- if (so->so_error != 0) {
+ if (so->so_error != 0 && (so->so_mode & SM_DEFERERR) == 0) {
mutex_enter(&so->so_lock);
error = sogeterr(so, B_TRUE);
mutex_exit(&so->so_lock);
@@ -586,11 +586,6 @@ so_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag,
SO_BLOCK_FALLBACK(so, SOP_SENDMBLK(so, msg, fflag, cr, mpp));
- if ((so->so_mode & SM_SENDFILESUPP) == 0) {
- SO_UNBLOCK_FALLBACK(so);
- return (EOPNOTSUPP);
- }
-
error = so_sendmblk_impl(so, msg, fflag, cr, mpp, so->so_filter_top,
B_FALSE);
@@ -653,7 +648,7 @@ so_getsockname(struct sonode *so, struct sockaddr *addr,
{
int error;
- SO_BLOCK_FALLBACK(so, SOP_GETSOCKNAME(so, addr, addrlen, cr));
+ SO_BLOCK_FALLBACK_SAFE(so, SOP_GETSOCKNAME(so, addr, addrlen, cr));
if (so->so_filter_active == 0 ||
(error = sof_filter_getsockname(so, addr, addrlen, cr)) < 0)
@@ -702,7 +697,7 @@ so_getsockopt(struct sonode *so, int level, int option_name,
if (level == SOL_FILTER)
return (sof_getsockopt(so, option_name, optval, optlenp, cr));
- SO_BLOCK_FALLBACK(so,
+ SO_BLOCK_FALLBACK_SAFE(so,
SOP_GETSOCKOPT(so, level, option_name, optval, optlenp, flags, cr));
if ((so->so_filter_active == 0 ||
@@ -791,7 +786,7 @@ so_setsockopt(struct sonode *so, int level, int option_name,
if (level == SOL_FILTER)
return (sof_setsockopt(so, option_name, optval, optlen, cr));
- SO_BLOCK_FALLBACK(so,
+ SO_BLOCK_FALLBACK_SAFE(so,
SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr));
/* X/Open requires this check */
@@ -876,7 +871,7 @@ so_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode,
* If there is a pending error, return error
* This can happen if a non blocking operation caused an error.
*/
- if (so->so_error != 0) {
+ if (so->so_error != 0 && (so->so_mode & SM_DEFERERR) == 0) {
mutex_enter(&so->so_lock);
error = sogeterr(so, B_TRUE);
mutex_exit(&so->so_lock);
@@ -1329,6 +1324,26 @@ so_queue_msg_impl(struct sonode *so, mblk_t *mp,
}
}
+ mutex_enter(&so->so_lock);
+ if (so->so_krecv_cb != NULL) {
+ boolean_t cont;
+ so_krecv_f func = so->so_krecv_cb;
+ void *arg = so->so_krecv_arg;
+
+ mutex_exit(&so->so_lock);
+ cont = func(so, mp, msg_size, flags & MSG_OOB, arg);
+ mutex_enter(&so->so_lock);
+ if (cont == B_TRUE) {
+ space_left = so->so_rcvbuf;
+ } else {
+ so->so_rcv_queued = so->so_rcvlowat;
+ *errorp = ENOSPC;
+ space_left = -1;
+ }
+ goto done_unlock;
+ }
+ mutex_exit(&so->so_lock);
+
if (flags & MSG_OOB) {
so_queue_oob(so, mp, msg_size);
mutex_enter(&so->so_lock);
@@ -1607,6 +1622,13 @@ so_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop,
return (ENOTCONN);
}
+ mutex_enter(&so->so_lock);
+ if (so->so_krecv_cb != NULL) {
+ mutex_exit(&so->so_lock);
+ return (EOPNOTSUPP);
+ }
+ mutex_exit(&so->so_lock);
+
if (msg->msg_flags & MSG_PEEK)
msg->msg_flags &= ~MSG_WAITALL;
diff --git a/usr/src/uts/common/fs/sockfs/sockcommon_subr.c b/usr/src/uts/common/fs/sockfs/sockcommon_subr.c
index 957c8f93b4..7bdd64393b 100644
--- a/usr/src/uts/common/fs/sockfs/sockcommon_subr.c
+++ b/usr/src/uts/common/fs/sockfs/sockcommon_subr.c
@@ -24,6 +24,7 @@
*/
/*
* Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
#include <sys/types.h>
@@ -670,10 +671,15 @@ so_dequeue_msg(struct sonode *so, mblk_t **mctlp, struct uio *uiop,
int more = 0;
int error;
ssize_t oobmark;
+ ssize_t copied = 0;
sodirect_t *sodp = so->so_direct;
+ xuio_t *xuio = NULL;
partial_read = B_FALSE;
*mctlp = NULL;
+ if ((uiop->uio_extflg & UIO_XUIO) != 0) {
+ xuio = (xuio_t *)uiop;
+ }
again:
mutex_enter(&so->so_lock);
again1:
@@ -784,8 +790,6 @@ again1:
* enabled socket, uio_resid can be 0.
*/
if (uiop->uio_resid >= 0) {
- ssize_t copied = 0;
-
if (sodp != NULL && (DB_FLAGS(mp) & DBLK_UIOA)) {
mutex_enter(&so->so_lock);
ASSERT(uiop == (uio_t *)&sodp->sod_uioa);
@@ -843,6 +847,18 @@ again1:
}
if (mp != NULL) { /* more data blocks in msg */
more |= MOREDATA;
+
+ /*
+ * If requested, tally up remaining data along with the
+ * amount already copied.
+ */
+ if (xuio != NULL &&
+ xuio->xu_type == UIOTYPE_PEEKSIZE) {
+ xuio->xu_ext.xu_ps.xu_ps_set = B_TRUE;
+ xuio->xu_ext.xu_ps.xu_ps_size =
+ copied + msgdsize(mp);
+ }
+
if ((flags & (MSG_PEEK|MSG_TRUNC))) {
if (flags & MSG_PEEK) {
freemsg(mp);
@@ -2276,9 +2292,9 @@ so_tpi_fallback(struct sonode *so, struct cred *cr)
fbfunc = sp->sp_smod_info->smod_proto_fallback_func;
/*
- * Cannot fallback if the socket has active filters
+ * Cannot fallback if the socket has active filters or a krecv callback.
*/
- if (so->so_filter_active > 0)
+ if (so->so_filter_active > 0 || so->so_krecv_cb != NULL)
return (EINVAL);
switch (so->so_family) {
@@ -2456,3 +2472,50 @@ out:
return (error);
}
+
+int
+so_krecv_set(sonode_t *so, so_krecv_f cb, void *arg)
+{
+ int ret;
+
+ if (cb == NULL && arg != NULL)
+ return (EINVAL);
+
+ SO_BLOCK_FALLBACK(so, so_krecv_set(so, cb, arg));
+
+ mutex_enter(&so->so_lock);
+ if (so->so_state & SS_FALLBACK_COMP) {
+ mutex_exit(&so->so_lock);
+ SO_UNBLOCK_FALLBACK(so);
+ return (ENOTSUP);
+ }
+
+ ret = so_lock_read(so, 0);
+ VERIFY(ret == 0);
+ /*
+ * Other consumers may actually care about getting extant data delivered
+ * to them, when they come along, they should figure out the best API
+ * for that.
+ */
+ so_rcv_flush(so);
+
+ so->so_krecv_cb = cb;
+ so->so_krecv_arg = arg;
+
+ so_unlock_read(so);
+ mutex_exit(&so->so_lock);
+ SO_UNBLOCK_FALLBACK(so);
+
+ return (0);
+}
+
+void
+so_krecv_unblock(sonode_t *so)
+{
+ mutex_enter(&so->so_lock);
+ VERIFY(so->so_krecv_cb != NULL);
+
+ so->so_rcv_queued = 0;
+ (void) so_check_flow_control(so);
+ mutex_exit(&so->so_lock);
+}
diff --git a/usr/src/uts/common/fs/sockfs/sockfilter.c b/usr/src/uts/common/fs/sockfs/sockfilter.c
index 971523945e..7dca6ae6fc 100644
--- a/usr/src/uts/common/fs/sockfs/sockfilter.c
+++ b/usr/src/uts/common/fs/sockfs/sockfilter.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
#include <sys/systm.h>
@@ -246,6 +247,18 @@ sof_setsockopt_impl(struct sonode *so, int option_name,
/* Module loaded OK, so there must be an ops vector */
ASSERT(ent->sofe_mod != NULL);
+
+ /*
+ * Check again to confirm ATTACH is ok. See if the the module
+ * is not SOF_ATT_SAFE after an unsafe operation has taken
+ * place.
+ */
+ if ((ent->sofe_mod->sofm_flags & SOF_ATT_SAFE) == 0 &&
+ so->so_state & SS_FILOP_UNSF) {
+ sof_instance_destroy(inst);
+ return (EINVAL);
+ }
+
inst->sofi_ops = &ent->sofe_mod->sofm_ops;
SOF_STAT_ADD(inst, tot_active_attach, 1);
@@ -1444,7 +1457,13 @@ sof_filter_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode,
* sof_register(version, name, ops, flags)
*
* Register a socket filter identified by name `name' and which should use
- * the ops vector `ops' for event notification. `flags' should be set to 0.
+ * the ops vector `ops' for event notification. `flags' should be set to 0
+ * by default for "unsafe" modules or SOF_ATT_SAFE for "safe" modules. An
+ * unsafe filter is one that cannot be attached after any socket operation has
+ * occured. This is the legacy default. A "safe" filter can be attached even
+ * after some basic initial socket operations have taken place. This set is
+ * currently bind, getsockname, getsockopt and setsockopt. The order in which
+ * a "safe" filter can be attached is more relaxed, and thus more flexible.
* On success 0 is returned, otherwise an errno is returned.
*/
int
@@ -1452,14 +1471,13 @@ sof_register(int version, const char *name, const sof_ops_t *ops, int flags)
{
sof_module_t *mod;
- _NOTE(ARGUNUSED(flags));
-
if (version != SOF_VERSION)
return (EINVAL);
mod = kmem_zalloc(sizeof (sof_module_t), KM_SLEEP);
mod->sofm_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
(void) strcpy(mod->sofm_name, name);
+ mod->sofm_flags = flags;
mod->sofm_ops = *ops;
mutex_enter(&sof_module_lock);
diff --git a/usr/src/uts/common/fs/sockfs/sockfilter_impl.h b/usr/src/uts/common/fs/sockfs/sockfilter_impl.h
index 7f7aece1f1..cf2ad8b20d 100644
--- a/usr/src/uts/common/fs/sockfs/sockfilter_impl.h
+++ b/usr/src/uts/common/fs/sockfs/sockfilter_impl.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
#ifndef _SOCKFS_SOCKFILTER_H
@@ -51,6 +52,7 @@ typedef struct sof_kstat sof_kstat_t;
struct sof_module {
char *sofm_name;
+ int sofm_flags;
sof_ops_t sofm_ops;
uint_t sofm_refcnt;
list_node_t sofm_node;
diff --git a/usr/src/uts/common/fs/sockfs/socksubr.c b/usr/src/uts/common/fs/sockfs/socksubr.c
index 30027200b6..eea86672b8 100644
--- a/usr/src/uts/common/fs/sockfs/socksubr.c
+++ b/usr/src/uts/common/fs/sockfs/socksubr.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015, Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -436,10 +437,12 @@ sogetoff(mblk_t *mp, t_uscalar_t offset,
*
* The underlying filesystem VSOCK vnode has a v_stream pointer that
* references the actual stream head (hence indirectly the actual sonode).
+ *
+ * This function is non-static so it can be used by brand emulation.
*/
-static int
+int
so_ux_lookup(struct sonode *so, struct sockaddr_un *soun, int checkaccess,
- vnode_t **vpp)
+ vnode_t **vpp)
{
vnode_t *vp; /* Underlying filesystem vnode */
vnode_t *rvp; /* real vnode */
@@ -1879,7 +1882,7 @@ ssize_t
soreadfile(file_t *fp, uchar_t *buf, u_offset_t fileoff, int *err, size_t size)
{
struct uio auio;
- struct iovec aiov[MSG_MAXIOVLEN];
+ struct iovec aiov[1];
register vnode_t *vp;
int ioflag, rwflag;
ssize_t cnt;
diff --git a/usr/src/uts/common/fs/sockfs/socksyscalls.c b/usr/src/uts/common/fs/sockfs/socksyscalls.c
index 21f3744895..854dd040b5 100644
--- a/usr/src/uts/common/fs/sockfs/socksyscalls.c
+++ b/usr/src/uts/common/fs/sockfs/socksyscalls.c
@@ -21,6 +21,8 @@
/*
* Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. */
@@ -54,6 +56,7 @@
#include <sys/cmn_err.h>
#include <sys/vmsystm.h>
#include <sys/policy.h>
+#include <sys/limits.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
@@ -86,12 +89,6 @@ extern void nl7c_init(void);
extern int sockfs_defer_nl7c_init;
/*
- * Note: DEF_IOV_MAX is defined and used as it is in "fs/vncalls.c"
- * as there isn't a formal definition of IOV_MAX ???
- */
-#define MSG_MAXIOVLEN 16
-
-/*
* Kernel component of socket creation.
*
* The socket library determines which version number to use.
@@ -1026,9 +1023,10 @@ recvmsg(int sock, struct nmsghdr *msg, int flags)
STRUCT_HANDLE(nmsghdr, umsgptr);
struct nmsghdr lmsg;
struct uio auio;
- struct iovec aiov[MSG_MAXIOVLEN];
+ struct iovec buf[IOV_MAX_STACK], *aiov = buf;
+ ssize_t iovsize = 0;
int iovcnt;
- ssize_t len;
+ ssize_t len, rval;
int i;
int *flagsp;
model_t model;
@@ -1071,22 +1069,37 @@ recvmsg(int sock, struct nmsghdr *msg, int flags)
iovcnt = lmsg.msg_iovlen;
- if (iovcnt <= 0 || iovcnt > MSG_MAXIOVLEN) {
+ if (iovcnt <= 0 || iovcnt > IOV_MAX) {
return (set_errno(EMSGSIZE));
}
+ if (iovcnt > IOV_MAX_STACK) {
+ iovsize = iovcnt * sizeof (struct iovec);
+ aiov = kmem_alloc(iovsize, KM_SLEEP);
+ }
+
#ifdef _SYSCALL32_IMPL
/*
* 32-bit callers need to have their iovec expanded, while ensuring
* that they can't move more than 2Gbytes of data in a single call.
*/
if (model == DATAMODEL_ILP32) {
- struct iovec32 aiov32[MSG_MAXIOVLEN];
+ struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
+ ssize_t iov32size;
ssize32_t count32;
- if (copyin((struct iovec32 *)lmsg.msg_iov, aiov32,
- iovcnt * sizeof (struct iovec32)))
+ iov32size = iovcnt * sizeof (struct iovec32);
+ if (iovsize != 0)
+ aiov32 = kmem_alloc(iov32size, KM_SLEEP);
+
+ if (copyin((struct iovec32 *)lmsg.msg_iov, aiov32, iov32size)) {
+ if (iovsize != 0) {
+ kmem_free(aiov32, iov32size);
+ kmem_free(aiov, iovsize);
+ }
+
return (set_errno(EFAULT));
+ }
count32 = 0;
for (i = 0; i < iovcnt; i++) {
@@ -1094,15 +1107,28 @@ recvmsg(int sock, struct nmsghdr *msg, int flags)
iovlen32 = aiov32[i].iov_len;
count32 += iovlen32;
- if (iovlen32 < 0 || count32 < 0)
+ if (iovlen32 < 0 || count32 < 0) {
+ if (iovsize != 0) {
+ kmem_free(aiov32, iov32size);
+ kmem_free(aiov, iovsize);
+ }
+
return (set_errno(EINVAL));
+ }
+
aiov[i].iov_len = iovlen32;
aiov[i].iov_base =
(caddr_t)(uintptr_t)aiov32[i].iov_base;
}
+
+ if (iovsize != 0)
+ kmem_free(aiov32, iov32size);
} else
#endif /* _SYSCALL32_IMPL */
if (copyin(lmsg.msg_iov, aiov, iovcnt * sizeof (struct iovec))) {
+ if (iovsize != 0)
+ kmem_free(aiov, iovsize);
+
return (set_errno(EFAULT));
}
len = 0;
@@ -1110,6 +1136,9 @@ recvmsg(int sock, struct nmsghdr *msg, int flags)
ssize_t iovlen = aiov[i].iov_len;
len += iovlen;
if (iovlen < 0 || len < 0) {
+ if (iovsize != 0)
+ kmem_free(aiov, iovsize);
+
return (set_errno(EINVAL));
}
}
@@ -1124,12 +1153,20 @@ recvmsg(int sock, struct nmsghdr *msg, int flags)
(do_useracc == 0 ||
useracc(lmsg.msg_control, lmsg.msg_controllen,
B_WRITE) != 0)) {
+ if (iovsize != 0)
+ kmem_free(aiov, iovsize);
+
return (set_errno(EFAULT));
}
- return (recvit(sock, &lmsg, &auio, flags,
+ rval = recvit(sock, &lmsg, &auio, flags,
STRUCT_FADDR(umsgptr, msg_namelen),
- STRUCT_FADDR(umsgptr, msg_controllen), flagsp));
+ STRUCT_FADDR(umsgptr, msg_controllen), flagsp);
+
+ if (iovsize != 0)
+ kmem_free(aiov, iovsize);
+
+ return (rval);
}
/*
@@ -1267,9 +1304,10 @@ sendmsg(int sock, struct nmsghdr *msg, int flags)
struct nmsghdr lmsg;
STRUCT_DECL(nmsghdr, u_lmsg);
struct uio auio;
- struct iovec aiov[MSG_MAXIOVLEN];
+ struct iovec buf[IOV_MAX_STACK], *aiov = buf;
+ ssize_t iovsize = 0;
int iovcnt;
- ssize_t len;
+ ssize_t len, rval;
int i;
model_t model;
@@ -1312,7 +1350,7 @@ sendmsg(int sock, struct nmsghdr *msg, int flags)
iovcnt = lmsg.msg_iovlen;
- if (iovcnt <= 0 || iovcnt > MSG_MAXIOVLEN) {
+ if (iovcnt <= 0 || iovcnt > IOV_MAX) {
/*
* Unless this is XPG 4.2 we allow iovcnt == 0 to
* be compatible with SunOS 4.X and 4.4BSD.
@@ -1321,19 +1359,34 @@ sendmsg(int sock, struct nmsghdr *msg, int flags)
return (set_errno(EMSGSIZE));
}
+ if (iovcnt > IOV_MAX_STACK) {
+ iovsize = iovcnt * sizeof (struct iovec);
+ aiov = kmem_alloc(iovsize, KM_SLEEP);
+ }
+
#ifdef _SYSCALL32_IMPL
/*
* 32-bit callers need to have their iovec expanded, while ensuring
* that they can't move more than 2Gbytes of data in a single call.
*/
if (model == DATAMODEL_ILP32) {
- struct iovec32 aiov32[MSG_MAXIOVLEN];
+ struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
+ ssize_t iov32size;
ssize32_t count32;
+ iov32size = iovcnt * sizeof (struct iovec32);
+ if (iovsize != 0)
+ aiov32 = kmem_alloc(iov32size, KM_SLEEP);
+
if (iovcnt != 0 &&
- copyin((struct iovec32 *)lmsg.msg_iov, aiov32,
- iovcnt * sizeof (struct iovec32)))
+ copyin((struct iovec32 *)lmsg.msg_iov, aiov32, iov32size)) {
+ if (iovsize != 0) {
+ kmem_free(aiov32, iov32size);
+ kmem_free(aiov, iovsize);
+ }
+
return (set_errno(EFAULT));
+ }
count32 = 0;
for (i = 0; i < iovcnt; i++) {
@@ -1341,17 +1394,30 @@ sendmsg(int sock, struct nmsghdr *msg, int flags)
iovlen32 = aiov32[i].iov_len;
count32 += iovlen32;
- if (iovlen32 < 0 || count32 < 0)
+ if (iovlen32 < 0 || count32 < 0) {
+ if (iovsize != 0) {
+ kmem_free(aiov32, iov32size);
+ kmem_free(aiov, iovsize);
+ }
+
return (set_errno(EINVAL));
+ }
+
aiov[i].iov_len = iovlen32;
aiov[i].iov_base =
(caddr_t)(uintptr_t)aiov32[i].iov_base;
}
+
+ if (iovsize != 0)
+ kmem_free(aiov32, iov32size);
} else
#endif /* _SYSCALL32_IMPL */
if (iovcnt != 0 &&
copyin(lmsg.msg_iov, aiov,
(unsigned)iovcnt * sizeof (struct iovec))) {
+ if (iovsize != 0)
+ kmem_free(aiov, iovsize);
+
return (set_errno(EFAULT));
}
len = 0;
@@ -1359,6 +1425,9 @@ sendmsg(int sock, struct nmsghdr *msg, int flags)
ssize_t iovlen = aiov[i].iov_len;
len += iovlen;
if (iovlen < 0 || len < 0) {
+ if (iovsize != 0)
+ kmem_free(aiov, iovsize);
+
return (set_errno(EINVAL));
}
}
@@ -1369,7 +1438,12 @@ sendmsg(int sock, struct nmsghdr *msg, int flags)
auio.uio_segflg = UIO_USERSPACE;
auio.uio_limit = 0;
- return (sendit(sock, &lmsg, &auio, flags));
+ rval = sendit(sock, &lmsg, &auio, flags);
+
+ if (iovsize != 0)
+ kmem_free(aiov, iovsize);
+
+ return (rval);
}
ssize_t
diff --git a/usr/src/uts/common/fs/sockfs/socktpi_impl.h b/usr/src/uts/common/fs/sockfs/socktpi_impl.h
index 6a515be122..24acb81a0a 100644
--- a/usr/src/uts/common/fs/sockfs/socktpi_impl.h
+++ b/usr/src/uts/common/fs/sockfs/socktpi_impl.h
@@ -22,6 +22,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
*/
#ifndef _SOCKFS_SOCKTPI_IMPL_H
@@ -56,6 +57,8 @@ extern int sogetrderr(vnode_t *, int, int *);
extern int sogetwrerr(vnode_t *, int, int *);
extern int so_addr_verify(struct sonode *, const struct sockaddr *,
socklen_t);
+extern int so_ux_lookup(struct sonode *, struct sockaddr_un *, int,
+ vnode_t **);
extern int so_ux_addr_xlate(struct sonode *, struct sockaddr *,
socklen_t, int, void **, socklen_t *);
extern void so_unix_close(struct sonode *);
diff --git a/usr/src/uts/common/fs/swapfs/swap_subr.c b/usr/src/uts/common/fs/swapfs/swap_subr.c
index 74c4302da9..a4d983665b 100644
--- a/usr/src/uts/common/fs/swapfs/swap_subr.c
+++ b/usr/src/uts/common/fs/swapfs/swap_subr.c
@@ -110,9 +110,11 @@ swapfs_recalc(pgcnt_t pgs)
* memory that can be used as swap space should do so by
* setting swapfs_desfree at boot time, not swapfs_minfree.
* However, swapfs_minfree is tunable by install as a
- * workaround for bugid 1147463.
+ * workaround for bugid 1147463. Note swapfs_minfree is set
+ * to 1/8th of memory, but clamped at the limit of 256 MB.
*/
- new_swapfs_minfree = MAX(btopr(2 * 1024 * 1024), pgs >> 3);
+ new_swapfs_minfree = MIN(MAX(btopr(2 * 1024 * 1024), pgs >> 3),
+ btopr(256 * 1024 * 1024));
}
/*
diff --git a/usr/src/uts/common/fs/tmpfs/tmp_dir.c b/usr/src/uts/common/fs/tmpfs/tmp_dir.c
index f6621c8097..387cc6ae54 100644
--- a/usr/src/uts/common/fs/tmpfs/tmp_dir.c
+++ b/usr/src/uts/common/fs/tmpfs/tmp_dir.c
@@ -516,7 +516,7 @@ tdirdelete(
*/
namelen = strlen(tpdp->td_name) + 1;
- tmp_memfree(tpdp, sizeof (struct tdirent) + namelen);
+ kmem_free(tpdp, sizeof (struct tdirent) + namelen);
dir->tn_size -= (sizeof (struct tdirent) + namelen);
dir->tn_dirents--;
@@ -549,8 +549,8 @@ tdirinit(
ASSERT(RW_WRITE_HELD(&parent->tn_rwlock));
ASSERT(dir->tn_type == VDIR);
- dot = tmp_memalloc(sizeof (struct tdirent) + 2, TMP_MUSTHAVE);
- dotdot = tmp_memalloc(sizeof (struct tdirent) + 3, TMP_MUSTHAVE);
+ dot = kmem_zalloc(sizeof (struct tdirent) + 2, KM_SLEEP);
+ dotdot = kmem_zalloc(sizeof (struct tdirent) + 3, KM_SLEEP);
/*
* Initialize the entries
@@ -650,7 +650,7 @@ tdirtrunc(struct tmpnode *dir)
tmpfs_hash_out(tdp);
- tmp_memfree(tdp, sizeof (struct tdirent) + namelen);
+ kmem_free(tdp, sizeof (struct tdirent) + namelen);
dir->tn_size -= (sizeof (struct tdirent) + namelen);
dir->tn_dirents--;
}
@@ -925,7 +925,7 @@ tdiraddentry(
*/
namelen = strlen(name) + 1;
alloc_size = namelen + sizeof (struct tdirent);
- tdp = tmp_memalloc(alloc_size, 0);
+ tdp = kmem_zalloc(alloc_size, KM_NOSLEEP | KM_NORMALPRI);
if (tdp == NULL)
return (ENOSPC);
@@ -1025,7 +1025,7 @@ tdirmaketnode(
((va->va_mask & AT_MTIME) && TIMESPEC_OVERFLOW(&va->va_mtime)))
return (EOVERFLOW);
type = va->va_type;
- tp = tmp_memalloc(sizeof (struct tmpnode), TMP_MUSTHAVE);
+ tp = kmem_zalloc(sizeof (struct tmpnode), KM_SLEEP);
tmpnode_init(tm, tp, va, cred);
/* setup normal file/dir's extended attribute directory */
diff --git a/usr/src/uts/common/fs/tmpfs/tmp_subr.c b/usr/src/uts/common/fs/tmpfs/tmp_subr.c
index 2e59d28d80..e6e2b392fe 100644
--- a/usr/src/uts/common/fs/tmpfs/tmp_subr.c
+++ b/usr/src/uts/common/fs/tmpfs/tmp_subr.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
#include <sys/types.h>
@@ -40,9 +41,19 @@
#include <sys/policy.h>
#include <sys/fs/tmp.h>
#include <sys/fs/tmpnode.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+
+#define KILOBYTE 1024
+#define MEGABYTE (1024 * KILOBYTE)
+#define GIGABYTE (1024 * MEGABYTE)
#define MODESHIFT 3
+#define VALIDMODEBITS 07777
+
+extern pgcnt_t swapfs_minfree;
+
int
tmp_taccess(void *vtp, int mode, struct cred *cred)
{
@@ -71,7 +82,6 @@ tmp_taccess(void *vtp, int mode, struct cred *cred)
* a plain file and you have write access to that file.
* Function returns 0 if remove access is granted.
*/
-
int
tmp_sticky_remove_access(struct tmpnode *dir, struct tmpnode *entry,
struct cred *cr)
@@ -89,111 +99,122 @@ tmp_sticky_remove_access(struct tmpnode *dir, struct tmpnode *entry,
}
/*
- * Allocate zeroed memory if tmpfs_maxkmem has not been exceeded
- * or the 'musthave' flag is set. 'musthave' allocations should
- * always be subordinate to normal allocations so that tmpfs_maxkmem
- * can't be exceeded by more than a few KB. Example: when creating
- * a new directory, the tmpnode is a normal allocation; if that
- * succeeds, the dirents for "." and ".." are 'musthave' allocations.
- */
-void *
-tmp_memalloc(size_t size, int musthave)
-{
- static time_t last_warning;
- time_t now;
-
- if (atomic_add_long_nv(&tmp_kmemspace, size) < tmpfs_maxkmem ||
- musthave)
- return (kmem_zalloc(size, KM_SLEEP));
-
- atomic_add_long(&tmp_kmemspace, -size);
- now = gethrestime_sec();
- if (last_warning != now) {
- last_warning = now;
- cmn_err(CE_WARN, "tmp_memalloc: tmpfs over memory limit");
- }
- return (NULL);
-}
-
-void
-tmp_memfree(void *cp, size_t size)
-{
- kmem_free(cp, size);
- atomic_add_long(&tmp_kmemspace, -size);
-}
-
-/*
* Convert a string containing a number (number of bytes) to a pgcnt_t,
* containing the corresponding number of pages. On 32-bit kernels, the
* maximum value encoded in 'str' is PAGESIZE * ULONG_MAX, while the value
* returned in 'maxpg' is at most ULONG_MAX.
*
- * If the number is followed by a "k" or "K", the value is converted from
- * kilobytes to bytes. If it is followed by an "m" or "M" it is converted
- * from megabytes to bytes. If it is not followed by a character it is
- * assumed to be in bytes. Multiple letter options are allowed, so for instance
- * '2mk' is interpreted as 2gb.
+ * The number may be followed by a magnitude suffix: "k" or "K" for kilobytes;
+ * "m" or "M" for megabytes; "g" or "G" for gigabytes. This interface allows
+ * for an arguably esoteric interpretation of multiple suffix characters:
+ * namely, they cascade. For example, the caller may specify "2mk", which is
+ * interpreted as 2 gigabytes. It would seem, at this late stage, that the
+ * horse has left not only the barn but indeed the country, and possibly the
+ * entire planetary system. Alternatively, the number may be followed by a
+ * single '%' sign, indicating the size is a percentage of either the zone's
+ * swap limit or the system's overall swap size.
*
* Parse and overflow errors are detected and a non-zero number returned on
* error.
*/
-
int
tmp_convnum(char *str, pgcnt_t *maxpg)
{
- uint64_t num = 0, oldnum;
+ u_longlong_t num = 0;
#ifdef _LP64
- uint64_t max_bytes = ULONG_MAX;
+ u_longlong_t max_bytes = ULONG_MAX;
#else
- uint64_t max_bytes = PAGESIZE * (uint64_t)ULONG_MAX;
+ u_longlong_t max_bytes = PAGESIZE * (uint64_t)ULONG_MAX;
#endif
char *c;
-
- if (str == NULL)
+ const struct convchar {
+ char *cc_char;
+ uint64_t cc_factor;
+ } convchars[] = {
+ { "kK", KILOBYTE },
+ { "mM", MEGABYTE },
+ { "gG", GIGABYTE },
+ { NULL, 0 }
+ };
+
+ if (str == NULL) {
return (EINVAL);
+ }
c = str;
/*
- * Convert str to number
+ * Convert the initial numeric portion of the input string.
*/
- while ((*c >= '0') && (*c <= '9')) {
- oldnum = num;
- num = num * 10 + (*c++ - '0');
- if (oldnum > num) /* overflow */
+ if (ddi_strtoull(str, &c, 10, &num) != 0) {
+ return (EINVAL);
+ }
+
+ /*
+ * Handle a size in percent. Anything other than a single percent
+ * modifier is invalid. We use either the zone's swap limit or the
+ * system's total available swap size as the initial value. Perform the
+ * intermediate calculation in pages to avoid overflow.
+ */
+ if (*c == '\%') {
+ u_longlong_t cap;
+
+ if (*(c + 1) != '\0')
+ return (EINVAL);
+
+ if (num > 100)
return (EINVAL);
+
+ cap = (u_longlong_t)curproc->p_zone->zone_max_swap_ctl;
+ if (cap == UINT64_MAX) {
+ /*
+ * Use the amount of available physical and memory swap
+ */
+ mutex_enter(&anoninfo_lock);
+ cap = TOTAL_AVAILABLE_SWAP;
+ mutex_exit(&anoninfo_lock);
+ } else {
+ cap = btop(cap);
+ }
+
+ num = ptob(cap * num / 100);
+ goto done;
}
/*
- * Terminate on null
+ * Apply the (potentially cascading) magnitude suffixes until an
+ * invalid character is found, or the string comes to an end.
*/
- while (*c != '\0') {
- switch (*c++) {
+ for (; *c != '\0'; c++) {
+ int i;
+
+ for (i = 0; convchars[i].cc_char != NULL; i++) {
+ /*
+ * Check if this character matches this multiplier
+ * class:
+ */
+ if (strchr(convchars[i].cc_char, *c) != NULL) {
+ /*
+ * Check for overflow:
+ */
+ if (num > max_bytes / convchars[i].cc_factor) {
+ return (EINVAL);
+ }
+
+ num *= convchars[i].cc_factor;
+ goto valid_char;
+ }
+ }
/*
- * convert from kilobytes
+ * This was not a valid multiplier suffix character.
*/
- case 'k':
- case 'K':
- if (num > max_bytes / 1024) /* will overflow */
- return (EINVAL);
- num *= 1024;
- break;
+ return (EINVAL);
- /*
- * convert from megabytes
- */
- case 'm':
- case 'M':
- if (num > max_bytes / (1024 * 1024)) /* will overflow */
- return (EINVAL);
- num *= 1024 * 1024;
- break;
-
- default:
- return (EINVAL);
- }
+valid_char:
+ continue;
}
+done:
/*
* Since btopr() rounds up to page granularity, this round-up can
* cause an overflow only if 'num' is between (max_bytes - PAGESIZE)
@@ -204,3 +225,29 @@ tmp_convnum(char *str, pgcnt_t *maxpg)
return (EINVAL);
return (0);
}
+
+/*
+ * Parse an octal mode string for use as the permissions set for the root
+ * of the tmpfs mount.
+ */
+int
+tmp_convmode(char *str, mode_t *mode)
+{
+ ulong_t num;
+ char *c;
+
+ if (str == NULL) {
+ return (EINVAL);
+ }
+
+ if (ddi_strtoul(str, &c, 8, &num) != 0) {
+ return (EINVAL);
+ }
+
+ if ((num & ~VALIDMODEBITS) != 0) {
+ return (EINVAL);
+ }
+
+ *mode = VALIDMODEBITS & num;
+ return (0);
+}
diff --git a/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c b/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c
index f8a36a528f..3c088c442c 100644
--- a/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c
+++ b/usr/src/uts/common/fs/tmpfs/tmp_vfsops.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
#include <sys/types.h>
@@ -55,6 +56,15 @@
static int tmpfsfstype;
/*
+ * tmpfs_mountcount is used to prevent module unloads while there is still
+ * state from a former mount hanging around. With forced umount support, the
+ * filesystem module must not be allowed to go away before the last
+ * VFS_FREEVFS() call has been made. Since this is just an atomic counter,
+ * there's no need for locking.
+ */
+static uint32_t tmpfs_mountcount;
+
+/*
* tmpfs vfs operations.
*/
static int tmpfsinit(int, char *);
@@ -64,6 +74,7 @@ static int tmp_unmount(struct vfs *, int, struct cred *);
static int tmp_root(struct vfs *, struct vnode **);
static int tmp_statvfs(struct vfs *, struct statvfs64 *);
static int tmp_vget(struct vfs *, struct vnode **, struct fid *);
+static void tmp_freevfs(vfs_t *vfsp);
/*
* Loadable module wrapper
@@ -76,7 +87,7 @@ static vfsdef_t vfw = {
VFSDEF_VERSION,
"tmpfs",
tmpfsinit,
- VSW_HASPROTO|VSW_STATS|VSW_ZMOUNT,
+ VSW_HASPROTO|VSW_CANREMOUNT|VSW_STATS|VSW_ZMOUNT,
&tmpfs_proto_opttbl
};
@@ -90,7 +101,8 @@ static mntopt_t tmpfs_options[] = {
/* Option name Cancel Opt Arg Flags Data */
{ MNTOPT_XATTR, xattr_cancel, NULL, MO_DEFAULT, NULL},
{ MNTOPT_NOXATTR, noxattr_cancel, NULL, NULL, NULL},
- { "size", NULL, "0", MO_HASVALUE, NULL}
+ { "size", NULL, "0", MO_HASVALUE, NULL},
+ { "mode", NULL, NULL, MO_HASVALUE, NULL}
};
@@ -121,6 +133,14 @@ _fini()
{
int error;
+ /*
+ * If a forceably unmounted instance is still hanging around, we cannot
+ * allow the module to be unloaded because that would cause panics once
+ * the VFS framework decides it's time to call into VFS_FREEVFS().
+ */
+ if (tmpfs_mountcount)
+ return (EBUSY);
+
error = mod_remove(&modlinkage);
if (error)
return (error);
@@ -139,14 +159,6 @@ _info(struct modinfo *modinfop)
}
/*
- * The following are patchable variables limiting the amount of system
- * resources tmpfs can use.
- *
- * tmpfs_maxkmem limits the amount of kernel kmem_alloc memory
- * tmpfs can use for it's data structures (e.g. tmpnodes, directory entries)
- * It is not determined by setting a hard limit but rather as a percentage of
- * physical memory which is determined when tmpfs is first used in the system.
- *
* tmpfs_minfree is the minimum amount of swap space that tmpfs leaves for
* the rest of the system. In other words, if the amount of free swap space
* in the system (i.e. anoninfo.ani_free) drops below tmpfs_minfree, tmpfs
@@ -155,9 +167,7 @@ _info(struct modinfo *modinfop)
* There is also a per mount limit on the amount of swap space
* (tmount.tm_anonmax) settable via a mount option.
*/
-size_t tmpfs_maxkmem = 0;
size_t tmpfs_minfree = 0;
-size_t tmp_kmemspace; /* bytes of kernel heap used by all tmpfs */
static major_t tmpfs_major;
static minor_t tmpfs_minor;
@@ -176,6 +186,7 @@ tmpfsinit(int fstype, char *name)
VFSNAME_ROOT, { .vfs_root = tmp_root },
VFSNAME_STATVFS, { .vfs_statvfs = tmp_statvfs },
VFSNAME_VGET, { .vfs_vget = tmp_vget },
+ VFSNAME_FREEVFS, { .vfs_freevfs = tmp_freevfs },
NULL, NULL
};
int error;
@@ -210,27 +221,17 @@ tmpfsinit(int fstype, char *name)
tmpfs_minfree = btopr(TMPMINFREE);
}
- /*
- * The maximum amount of space tmpfs can allocate is
- * TMPMAXPROCKMEM percent of kernel memory
- */
- if (tmpfs_maxkmem == 0)
- tmpfs_maxkmem = MAX(PAGESIZE, kmem_maxavail() / TMPMAXFRACKMEM);
-
if ((tmpfs_major = getudev()) == (major_t)-1) {
cmn_err(CE_WARN, "tmpfsinit: Can't get unique device number.");
tmpfs_major = 0;
}
mutex_init(&tmpfs_minor_lock, NULL, MUTEX_DEFAULT, NULL);
+ tmpfs_mountcount = 0;
return (0);
}
static int
-tmp_mount(
- struct vfs *vfsp,
- struct vnode *mvp,
- struct mounta *uap,
- struct cred *cr)
+tmp_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
{
struct tmount *tm = NULL;
struct tmpnode *tp;
@@ -239,8 +240,9 @@ tmp_mount(
pgcnt_t anonmax;
struct vattr rattr;
int got_attrs;
-
- char *sizestr;
+ boolean_t mode_arg = B_FALSE;
+ mode_t root_mode = 0777;
+ char *argstr;
if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
return (error);
@@ -249,7 +251,7 @@ tmp_mount(
return (ENOTDIR);
mutex_enter(&mvp->v_lock);
- if ((uap->flags & MS_OVERLAY) == 0 &&
+ if ((uap->flags & MS_REMOUNT) == 0 && (uap->flags & MS_OVERLAY) == 0 &&
(mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
mutex_exit(&mvp->v_lock);
return (EBUSY);
@@ -275,18 +277,45 @@ tmp_mount(
* tm_anonmax is set according to the mount arguments
* if any. Otherwise, it is set to a maximum value.
*/
- if (vfs_optionisset(vfsp, "size", &sizestr)) {
- if ((error = tmp_convnum(sizestr, &anonmax)) != 0)
+ if (vfs_optionisset(vfsp, "size", &argstr)) {
+ if ((error = tmp_convnum(argstr, &anonmax)) != 0)
goto out;
} else {
anonmax = ULONG_MAX;
}
+ /*
+ * The "mode" mount argument allows the operator to override the
+ * permissions of the root of the tmpfs mount.
+ */
+ if (vfs_optionisset(vfsp, "mode", &argstr)) {
+ if ((error = tmp_convmode(argstr, &root_mode)) != 0) {
+ goto out;
+ }
+ mode_arg = B_TRUE;
+ }
+
if (error = pn_get(uap->dir,
(uap->flags & MS_SYSSPACE) ? UIO_SYSSPACE : UIO_USERSPACE, &dpn))
goto out;
- if ((tm = tmp_memalloc(sizeof (struct tmount), 0)) == NULL) {
+ if (uap->flags & MS_REMOUNT) {
+ tm = (struct tmount *)VFSTOTM(vfsp);
+
+ /*
+ * If we change the size so its less than what is currently
+ * being used, we allow that. The file system will simply be
+ * full until enough files have been removed to get below the
+ * new max.
+ */
+ mutex_enter(&tm->tm_contents);
+ tm->tm_anonmax = anonmax;
+ mutex_exit(&tm->tm_contents);
+ goto out;
+ }
+
+ if ((tm = kmem_zalloc(sizeof (struct tmount),
+ KM_NOSLEEP | KM_NORMALPRI)) == NULL) {
pn_free(&dpn);
error = ENOMEM;
goto out;
@@ -318,17 +347,17 @@ tmp_mount(
vfsp->vfs_bsize = PAGESIZE;
vfsp->vfs_flag |= VFS_NOTRUNC;
vfs_make_fsid(&vfsp->vfs_fsid, tm->tm_dev, tmpfsfstype);
- tm->tm_mntpath = tmp_memalloc(dpn.pn_pathlen + 1, TMP_MUSTHAVE);
+ tm->tm_mntpath = kmem_zalloc(dpn.pn_pathlen + 1, KM_SLEEP);
(void) strcpy(tm->tm_mntpath, dpn.pn_path);
/*
* allocate and initialize root tmpnode structure
*/
bzero(&rattr, sizeof (struct vattr));
- rattr.va_mode = (mode_t)(S_IFDIR | 0777); /* XXX modes */
+ rattr.va_mode = (mode_t)(S_IFDIR | root_mode);
rattr.va_type = VDIR;
rattr.va_rdev = 0;
- tp = tmp_memalloc(sizeof (struct tmpnode), TMP_MUSTHAVE);
+ tp = kmem_zalloc(sizeof (struct tmpnode), KM_SLEEP);
tmpnode_init(tm, tp, &rattr, cr);
/*
@@ -345,7 +374,14 @@ tmp_mount(
* the previously set hardwired defaults to prevail.
*/
if (got_attrs == 0) {
- tp->tn_mode = rattr.va_mode;
+ if (!mode_arg) {
+ /*
+ * Only use the underlying mount point for the
+ * mode if the "mode" mount argument was not
+ * provided.
+ */
+ tp->tn_mode = rattr.va_mode;
+ }
tp->tn_uid = rattr.va_uid;
tp->tn_gid = rattr.va_gid;
}
@@ -366,6 +402,7 @@ tmp_mount(
pn_free(&dpn);
error = 0;
+ atomic_inc_32(&tmpfs_mountcount);
out:
if (error == 0)
@@ -381,36 +418,107 @@ tmp_unmount(struct vfs *vfsp, int flag, struct cred *cr)
struct tmpnode *tnp, *cancel;
struct vnode *vp;
int error;
+ uint_t cnt;
+ int i;
if ((error = secpolicy_fs_unmount(cr, vfsp)) != 0)
return (error);
- /*
- * forced unmount is not supported by this file system
- * and thus, ENOTSUP, is being returned.
- */
- if (flag & MS_FORCE)
- return (ENOTSUP);
-
mutex_enter(&tm->tm_contents);
/*
- * If there are no open files, only the root node should have
- * a reference count.
+ * In the normal unmount case (non-forced unmount), if there are no
+ * open files, only the root node should have a reference count.
+ *
* With tm_contents held, nothing can be added or removed.
* There may be some dirty pages. To prevent fsflush from
* disrupting the unmount, put a hold on each node while scanning.
* If we find a previously referenced node, undo the holds we have
* placed and fail EBUSY.
+ *
+ * However, in the case of a forced umount, things are a bit different.
+ * An additional VFS_HOLD is added for each outstanding VN_HOLD to
+ * ensure that the file system is not cleaned up (tmp_freevfs) until
+ * the last vfs hold is dropped. This happens in tmp_inactive as the
+ * vnodes are released. Also, we can't add an additional VN_HOLD in
+ * this case since that would prevent tmp_inactive from ever being
+ * called. Finally, we do need to drop the zone ref now (zone_rele_ref)
+ * so that the zone is not blocked waiting for the final file system
+ * cleanup.
*/
tnp = tm->tm_rootnode;
- if (TNTOV(tnp)->v_count > 1) {
+
+ vp = TNTOV(tnp);
+ mutex_enter(&vp->v_lock);
+ cnt = vp->v_count;
+ if (flag & MS_FORCE) {
+ vfsp->vfs_flag |= VFS_UNMOUNTED;
+ /* Extra hold which we rele below when we drop the zone ref */
+ VFS_HOLD(vfsp);
+
+ for (i = 1; i < cnt; i++)
+ VFS_HOLD(vfsp);
+
+ /* drop the mutex now because no one can find this mount */
+ mutex_exit(&tm->tm_contents);
+ } else if (cnt > 1) {
+ mutex_exit(&vp->v_lock);
mutex_exit(&tm->tm_contents);
return (EBUSY);
}
+ mutex_exit(&vp->v_lock);
+ /*
+ * Check for open files. An open file causes everything to unwind
+ * unless this is a forced umount.
+ */
for (tnp = tnp->tn_forw; tnp; tnp = tnp->tn_forw) {
- if ((vp = TNTOV(tnp))->v_count > 0) {
+ vp = TNTOV(tnp);
+ mutex_enter(&vp->v_lock);
+ cnt = vp->v_count;
+ if (flag & MS_FORCE) {
+ for (i = 0; i < cnt; i++)
+ VFS_HOLD(vfsp);
+
+ /*
+ * In the case of a forced umount don't add an
+ * additional VN_HOLD on the already held vnodes, like
+ * we do in the non-forced unmount case. If the
+ * cnt > 0, then the vnode already has at least one
+ * hold and we need tmp_inactive to get called when the
+ * last pre-existing hold on the node is released so
+ * that we can VFS_RELE the VFS holds we just added.
+ */
+ if (cnt == 0) {
+ /* directly add VN_HOLD since have the lock */
+ vp->v_count++;
+ }
+
+ mutex_exit(&vp->v_lock);
+
+ /*
+ * If the tmpnode has any pages associated with it
+ * (i.e. if it's a normal file with non-zero size), the
+ * tmpnode could still be discovered by pageout or
+ * fsflush via the page vnode pointers. To prevent this
+ * from interfering with the tmp_freevfs, truncate the
+ * tmpnode now.
+ */
+ if (tnp->tn_size != 0 && tnp->tn_type == VREG) {
+ rw_enter(&tnp->tn_rwlock, RW_WRITER);
+ rw_enter(&tnp->tn_contents, RW_WRITER);
+
+ (void) tmpnode_trunc(tm, tnp, 0);
+
+ rw_exit(&tnp->tn_contents);
+ rw_exit(&tnp->tn_rwlock);
+
+ ASSERT(tnp->tn_size == 0);
+ ASSERT(tnp->tn_nblocks == 0);
+ }
+ } else if (cnt > 0) {
+ /* An open file; unwind the holds we've been adding. */
+ mutex_exit(&vp->v_lock);
cancel = tm->tm_rootnode->tn_forw;
while (cancel != tnp) {
vp = TNTOV(cancel);
@@ -420,14 +528,50 @@ tmp_unmount(struct vfs *vfsp, int flag, struct cred *cr)
}
mutex_exit(&tm->tm_contents);
return (EBUSY);
+ } else {
+ /* directly add a VN_HOLD since we have the lock */
+ vp->v_count++;
+ mutex_exit(&vp->v_lock);
}
- VN_HOLD(vp);
}
- /*
- * We can drop the mutex now because no one can find this mount
- */
- mutex_exit(&tm->tm_contents);
+ if (flag & MS_FORCE) {
+ /*
+ * Drop the zone ref now since we don't know how long it will
+ * be until the final vfs_rele is called by tmp_inactive.
+ */
+ if (vfsp->vfs_zone) {
+ zone_rele_ref(&vfsp->vfs_implp->vi_zone_ref,
+ ZONE_REF_VFS);
+ vfsp->vfs_zone = 0;
+ }
+ /* We can now drop the extra hold we added above. */
+ VFS_RELE(vfsp);
+ } else {
+ /*
+ * For the non-forced case, we can drop the mutex now because
+ * no one can find this mount anymore
+ */
+ vfsp->vfs_flag |= VFS_UNMOUNTED;
+ mutex_exit(&tm->tm_contents);
+ }
+
+ return (0);
+}
+
+/*
+ * Implementation of VFS_FREEVFS() to support forced umounts. This is called by
+ * the vfs framework after umount and the last VFS_RELE, to trigger the release
+ * of any resources still associated with the given vfs_t. We only add
+ * additional VFS_HOLDs during the forced umount case, so this is normally
+ * called immediately after tmp_umount.
+ */
+void
+tmp_freevfs(vfs_t *vfsp)
+{
+ struct tmount *tm = (struct tmount *)VFSTOTM(vfsp);
+ struct tmpnode *tnp;
+ struct vnode *vp;
/*
* Free all kmemalloc'd and anonalloc'd memory associated with
@@ -437,6 +581,16 @@ tmp_unmount(struct vfs *vfsp, int flag, struct cred *cr)
* tmpnode_free which assumes that the directory entry has been
* removed before the file.
*/
+
+ /*
+ * Now that we are tearing ourselves down we need to remove the
+ * UNMOUNTED flag. If we don't, we'll later hit a VN_RELE when we remove
+ * files from the system causing us to have a negative value. Doing this
+ * seems a bit better than trying to set a flag on the tmount that says
+ * we're tearing down.
+ */
+ vfsp->vfs_flag &= ~VFS_UNMOUNTED;
+
/*
* Remove all directory entries
*/
@@ -503,15 +657,16 @@ tmp_unmount(struct vfs *vfsp, int flag, struct cred *cr)
ASSERT(tm->tm_mntpath);
- tmp_memfree(tm->tm_mntpath, strlen(tm->tm_mntpath) + 1);
+ kmem_free(tm->tm_mntpath, strlen(tm->tm_mntpath) + 1);
ASSERT(tm->tm_anonmem == 0);
mutex_destroy(&tm->tm_contents);
mutex_destroy(&tm->tm_renamelck);
- tmp_memfree(tm, sizeof (struct tmount));
+ kmem_free(tm, sizeof (struct tmount));
- return (0);
+ /* Allow _fini() to succeed now */
+ atomic_dec_32(&tmpfs_mountcount);
}
/*
@@ -614,13 +769,7 @@ tmp_statvfs(struct vfs *vfsp, struct statvfs64 *sbp)
* available to tmpfs. This is fairly inaccurate since it doesn't
* take into account the names stored in the directory entries.
*/
- if (tmpfs_maxkmem > tmp_kmemspace)
- sbp->f_ffree = (tmpfs_maxkmem - tmp_kmemspace) /
- (sizeof (struct tmpnode) + sizeof (struct tdirent));
- else
- sbp->f_ffree = 0;
-
- sbp->f_files = tmpfs_maxkmem /
+ sbp->f_ffree = sbp->f_files = ptob(availrmem) /
(sizeof (struct tmpnode) + sizeof (struct tdirent));
sbp->f_favail = (fsfilcnt64_t)(sbp->f_ffree);
(void) cmpldev(&d32, vfsp->vfs_dev);
diff --git a/usr/src/uts/common/fs/tmpfs/tmp_vnops.c b/usr/src/uts/common/fs/tmpfs/tmp_vnops.c
index 3c251df0cc..18e037ee22 100644
--- a/usr/src/uts/common/fs/tmpfs/tmp_vnops.c
+++ b/usr/src/uts/common/fs/tmpfs/tmp_vnops.c
@@ -25,7 +25,7 @@
*/
/*
- * Copyright (c) 2015, Joyent, Inc. All rights reserved.
+ * Copyright 2016, Joyent, Inc.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*/
@@ -584,6 +584,10 @@ tmp_read(struct vnode *vp, struct uio *uiop, int ioflag, cred_t *cred,
struct tmount *tm = (struct tmount *)VTOTM(vp);
int error;
+ /* If the filesystem was umounted by force, return immediately. */
+ if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
+ return (EIO);
+
/*
* We don't currently support reading non-regular files
*/
@@ -613,6 +617,10 @@ tmp_write(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cred,
struct tmount *tm = (struct tmount *)VTOTM(vp);
int error;
+ /* If the filesystem was umounted by force, return immediately. */
+ if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
+ return (EIO);
+
/*
* We don't currently support writing to non-regular files
*/
@@ -786,8 +794,13 @@ tmp_setattr(
rw_exit(&tp->tn_contents);
rw_exit(&tp->tn_rwlock);
- if (error == 0 && vap->va_size == 0)
- vnevent_truncate(vp, ct);
+ if (error == 0) {
+ if (vap->va_size == 0) {
+ vnevent_truncate(vp, ct);
+ } else {
+ vnevent_resize(vp, ct);
+ }
+ }
goto out1;
}
@@ -833,6 +846,9 @@ tmp_lookup(
struct tmpnode *ntp = NULL;
int error;
+ /* If the filesystem was umounted by force, return immediately. */
+ if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
+ return (EIO);
/* allow cd into @ dir */
if (flags & LOOKUP_XATTR) {
@@ -871,8 +887,7 @@ tmp_lookup(
return (error);
}
- xdp = tmp_memalloc(sizeof (struct tmpnode),
- TMP_MUSTHAVE);
+ xdp = kmem_zalloc(sizeof (struct tmpnode), KM_SLEEP);
tm = VTOTM(dvp);
tmpnode_init(tm, xdp, &tp->tn_attr, NULL);
/*
@@ -1302,10 +1317,8 @@ tmp_rename(
vnevent_rename_src(TNTOV(fromtp), odvp, onm, ct);
/*
* vnevent_rename_dest is called in tdirenter().
- * Notify the target dir if not same as source dir.
*/
- if (ndvp != odvp)
- vnevent_rename_dest_dir(ndvp, ct);
+ vnevent_rename_dest_dir(ndvp, TNTOV(fromtp), nnm, ct);
}
done:
@@ -1474,6 +1487,10 @@ tmp_readdir(
int reclen;
caddr_t outbuf;
+ /* If the filesystem was umounted by force, return immediately. */
+ if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
+ return (EIO);
+
if (uiop->uio_loffset >= MAXOFF_T) {
if (eofp)
*eofp = 1;
@@ -1612,7 +1629,7 @@ tmp_symlink(
return (error);
}
len = strlen(tnm) + 1;
- cp = tmp_memalloc(len, 0);
+ cp = kmem_alloc(len, KM_NOSLEEP | KM_NORMALPRI);
if (cp == NULL) {
tmpnode_rele(self);
return (ENOSPC);
@@ -1677,10 +1694,27 @@ top:
* there's little to do -- just drop our hold.
*/
if (vp->v_count > 1 || tp->tn_nlink != 0) {
- vp->v_count--;
+ if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED) {
+ /*
+ * Since the file system was forcibly unmounted, we can
+ * have a case (v_count == 1, tn_nlink != 0) where this
+ * file was open so we didn't add an extra hold on the
+ * file in tmp_unmount. We are counting on the
+ * interaction of the hold made in tmp_unmount and
+ * rele-ed in tmp_vfsfree so we need to be sure we
+ * don't decrement in this case.
+ */
+ if (vp->v_count > 1)
+ vp->v_count--;
+ } else {
+ vp->v_count--;
+ }
mutex_exit(&vp->v_lock);
mutex_exit(&tp->tn_tlock);
rw_exit(&tp->tn_rwlock);
+ /* If the filesystem was umounted by force, rele the vfs ref */
+ if (tm->tm_vfsp->vfs_flag & VFS_UNMOUNTED)
+ VFS_RELE(tm->tm_vfsp);
return;
}
@@ -1705,7 +1739,7 @@ top:
goto top;
}
if (tp->tn_type == VLNK)
- tmp_memfree(tp->tn_symlink, tp->tn_size + 1);
+ kmem_free(tp->tn_symlink, tp->tn_size + 1);
}
/*
@@ -1739,7 +1773,11 @@ top:
rw_destroy(&tp->tn_rwlock);
mutex_destroy(&tp->tn_tlock);
vn_free(TNTOV(tp));
- tmp_memfree(tp, sizeof (struct tmpnode));
+ kmem_free(tp, sizeof (struct tmpnode));
+
+ /* If the filesystem was umounted by force, rele the vfs ref */
+ if (tm->tm_vfsp->vfs_flag & VFS_UNMOUNTED)
+ VFS_RELE(tm->tm_vfsp);
}
/* ARGSUSED2 */
@@ -1861,6 +1899,10 @@ tmp_getapage(
struct vnode *pvp;
u_offset_t poff;
+ /* If the filesystem was umounted by force, return immediately. */
+ if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
+ return (EIO);
+
if (protp != NULL)
*protp = PROT_ALL;
again:
@@ -2082,6 +2124,10 @@ tmp_putapage(
u_offset_t offset;
u_offset_t tmpoff;
+ /* If the filesystem was umounted by force, return immediately. */
+ if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
+ return (EIO);
+
ASSERT(PAGE_LOCKED(pp));
/* Kluster in tmp_klustsize chunks */
@@ -2342,8 +2388,13 @@ tmp_space(
return (EFBIG);
error = tmp_freesp(vp, bfp, flag);
- if (error == 0 && bfp->l_start == 0)
- vnevent_truncate(vp, ct);
+ if (error == 0) {
+ if (bfp->l_start == 0) {
+ vnevent_truncate(vp, ct);
+ } else {
+ vnevent_resize(vp, ct);
+ }
+ }
}
return (error);
}
diff --git a/usr/src/uts/common/fs/udfs/udf_dir.c b/usr/src/uts/common/fs/udfs/udf_dir.c
index c1e2c74a87..def046a0bf 100644
--- a/usr/src/uts/common/fs/udfs/udf_dir.c
+++ b/usr/src/uts/common/fs/udfs/udf_dir.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -562,9 +563,8 @@ out:
namep, ctp);
}
- if (sdp != tdp) {
- vnevent_rename_dest_dir(ITOV(tdp), ctp);
- }
+ vnevent_rename_dest_dir(ITOV(tdp), ITOV(tip),
+ namep, ctp);
}
/*
diff --git a/usr/src/uts/common/fs/udfs/udf_vnops.c b/usr/src/uts/common/fs/udfs/udf_vnops.c
index 93cc4d49e8..7e17f16ce2 100644
--- a/usr/src/uts/common/fs/udfs/udf_vnops.c
+++ b/usr/src/uts/common/fs/udfs/udf_vnops.c
@@ -569,8 +569,11 @@ udf_setattr(
goto update_inode;
}
- if (vap->va_size == 0)
+ if (vap->va_size == 0) {
vnevent_truncate(vp, ct);
+ } else {
+ vnevent_resize(vp, ct);
+ }
}
/*
* Change file access or modified times.
@@ -1649,8 +1652,13 @@ udf_space(
} else if ((error = convoff(vp, bfp, 0, offset)) == 0) {
error = ud_freesp(vp, bfp, flag, cr);
- if (error == 0 && bfp->l_start == 0)
- vnevent_truncate(vp, ct);
+ if (error == 0) {
+ if (bfp->l_start == 0) {
+ vnevent_truncate(vp, ct);
+ } else {
+ vnevent_resize(vp, ct);
+ }
+ }
}
return (error);
diff --git a/usr/src/uts/common/fs/ufs/ufs_vnops.c b/usr/src/uts/common/fs/ufs/ufs_vnops.c
index cf45b48e3c..d689a8173b 100644
--- a/usr/src/uts/common/fs/ufs/ufs_vnops.c
+++ b/usr/src/uts/common/fs/ufs/ufs_vnops.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1984, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2015, Joyent, Inc.
+ * Copyright 2016, Joyent, Inc.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
@@ -2193,8 +2193,13 @@ again:
goto update_inode;
}
- if (error == 0 && vap->va_size)
- vnevent_truncate(vp, ct);
+ if (error == 0) {
+ if (vap->va_size) {
+ vnevent_truncate(vp, ct);
+ } else {
+ vnevent_resize(vp, ct);
+ }
+ }
}
if (ulp) {
@@ -3726,12 +3731,7 @@ retry_firstlock:
if (error == 0) {
vnevent_rename_src(ITOV(sip), sdvp, snm, ct);
- /*
- * Notify the target directory of the rename event
- * if source and target directories are not the same.
- */
- if (sdvp != tdvp)
- vnevent_rename_dest_dir(tdvp, ct);
+ vnevent_rename_dest_dir(tdvp, ITOV(sip), tnm, ct);
}
errout:
@@ -4478,8 +4478,13 @@ ufs_space(struct vnode *vp, int cmd, struct flock64 *bfp, int flag,
return (error);
error = ufs_freesp(vp, bfp, flag, cr);
- if (error == 0 && bfp->l_start == 0)
- vnevent_truncate(vp, ct);
+ if (error == 0) {
+ if (bfp->l_start == 0) {
+ vnevent_truncate(vp, ct);
+ } else {
+ vnevent_resize(vp, ct);
+ }
+ }
} else if (cmd == F_ALLOCSP) {
error = ufs_lockfs_begin(ufsvfsp, &ulp,
ULOCKFS_FALLOCATE_MASK);
diff --git a/usr/src/uts/common/fs/vfs.c b/usr/src/uts/common/fs/vfs.c
index e179d934ed..35e65f15e6 100644
--- a/usr/src/uts/common/fs/vfs.c
+++ b/usr/src/uts/common/fs/vfs.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*/
@@ -236,7 +236,8 @@ fsop_root(vfs_t *vfsp, vnode_t **vpp)
* Make sure this root has a path. With lofs, it is possible to have
* a NULL mountpoint.
*/
- if (ret == 0 && vfsp->vfs_mntpt != NULL && (*vpp)->v_path == NULL) {
+ if (ret == 0 && vfsp->vfs_mntpt != NULL &&
+ (*vpp)->v_path == vn_vpath_empty) {
mntpt = vfs_getmntpoint(vfsp);
vn_setpath_str(*vpp, refstr_value(mntpt),
strlen(refstr_value(mntpt)));
@@ -3901,6 +3902,8 @@ vfs_to_modname(const char *vfstype)
vfstype = "fdfs";
} else if (strncmp(vfstype, "nfs", 3) == 0) {
vfstype = "nfs";
+ } else if (strcmp(vfstype, "lxproc") == 0) {
+ vfstype = "lxprocfs";
}
return (vfstype);
diff --git a/usr/src/uts/common/fs/vnode.c b/usr/src/uts/common/fs/vnode.c
index e6b6adf56b..77b30da871 100644
--- a/usr/src/uts/common/fs/vnode.c
+++ b/usr/src/uts/common/fs/vnode.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
@@ -66,6 +66,7 @@
#include <fs/fs_subr.h>
#include <sys/taskq.h>
#include <fs/fs_reparse.h>
+#include <sys/time.h>
/* Determine if this vnode is a file that is read-only */
#define ISROFILE(vp) \
@@ -102,6 +103,9 @@ kmutex_t vskstat_tree_lock;
/* Global variable which enables/disables the vopstats collection */
int vopstats_enabled = 1;
+/* Global used for empty/invalid v_path */
+char *vn_vpath_empty = "";
+
/*
* forward declarations for internal vnode specific data (vsd)
*/
@@ -200,6 +204,11 @@ static void (**vsd_destructor)(void *);
cr = crgetmapped(cr); \
}
+#define VOP_LATENCY_10MS 10000000
+#define VOP_LATENCY_100MS 100000000
+#define VOP_LATENCY_1S 1000000000
+#define VOP_LATENCY_10S 10000000000
+
/*
* Convert stat(2) formats to vnode types and vice versa. (Knows about
* numerical order of S_IFMT and vnode types.)
@@ -2284,7 +2293,7 @@ vn_cache_constructor(void *buf, void *cdrarg, int kmflags)
cv_init(&vp->v_cv, NULL, CV_DEFAULT, NULL);
rw_init(&vp->v_nbllock, NULL, RW_DEFAULT, NULL);
vp->v_femhead = NULL; /* Must be done before vn_reinit() */
- vp->v_path = NULL;
+ vp->v_path = vn_vpath_empty;
vp->v_mpssdata = NULL;
vp->v_vsd = NULL;
vp->v_fopdata = NULL;
@@ -2331,6 +2340,7 @@ void
vn_recycle(vnode_t *vp)
{
ASSERT(vp->v_pages == NULL);
+ VERIFY(vp->v_path != NULL);
/*
* XXX - This really belongs in vn_reinit(), but we have some issues
@@ -2353,9 +2363,9 @@ vn_recycle(vnode_t *vp)
kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead)));
vp->v_femhead = NULL;
}
- if (vp->v_path) {
+ if (vp->v_path != vn_vpath_empty) {
kmem_free(vp->v_path, strlen(vp->v_path) + 1);
- vp->v_path = NULL;
+ vp->v_path = vn_vpath_empty;
}
if (vp->v_fopdata != NULL) {
@@ -2427,9 +2437,10 @@ vn_free(vnode_t *vp)
*/
ASSERT((vp->v_count == 0) || (vp->v_count == 1));
ASSERT(vp->v_count_dnlc == 0);
- if (vp->v_path != NULL) {
+ VERIFY(vp->v_path != NULL);
+ if (vp->v_path != vn_vpath_empty) {
kmem_free(vp->v_path, strlen(vp->v_path) + 1);
- vp->v_path = NULL;
+ vp->v_path = vn_vpath_empty;
}
/* If FEM was in use, make sure everything gets cleaned up */
@@ -2516,6 +2527,7 @@ vnevent_rename_src(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
if (vp == NULL || vp->v_femhead == NULL) {
return;
}
+ (void) VOP_VNEVENT(dvp, VE_RENAME_SRC_DIR, vp, name, ct);
(void) VOP_VNEVENT(vp, VE_RENAME_SRC, dvp, name, ct);
}
@@ -2530,12 +2542,13 @@ vnevent_rename_dest(vnode_t *vp, vnode_t *dvp, char *name,
}
void
-vnevent_rename_dest_dir(vnode_t *vp, caller_context_t *ct)
+vnevent_rename_dest_dir(vnode_t *vp, vnode_t *nvp, char *name,
+ caller_context_t *ct)
{
if (vp == NULL || vp->v_femhead == NULL) {
return;
}
- (void) VOP_VNEVENT(vp, VE_RENAME_DEST_DIR, NULL, NULL, ct);
+ (void) VOP_VNEVENT(vp, VE_RENAME_DEST_DIR, nvp, name, ct);
}
void
@@ -2622,6 +2635,15 @@ vnevent_truncate(vnode_t *vp, caller_context_t *ct)
(void) VOP_VNEVENT(vp, VE_TRUNCATE, NULL, NULL, ct);
}
+void
+vnevent_resize(vnode_t *vp, caller_context_t *ct)
+{
+ if (vp == NULL || vp->v_femhead == NULL) {
+ return;
+ }
+ (void) VOP_VNEVENT(vp, VE_RESIZE, NULL, NULL, ct);
+}
+
/*
* Vnode accessors.
*/
@@ -2981,7 +3003,7 @@ vn_setpath(vnode_t *rootvp, struct vnode *startvp, struct vnode *vp,
* the potential for deadlock.
*/
mutex_enter(&base->v_lock);
- if (base->v_path == NULL) {
+ if (base->v_path == vn_vpath_empty) {
mutex_exit(&base->v_lock);
return;
}
@@ -3008,7 +3030,8 @@ vn_setpath(vnode_t *rootvp, struct vnode *startvp, struct vnode *vp,
rpath = kmem_alloc(rpathalloc, KM_SLEEP);
mutex_enter(&base->v_lock);
- if (base->v_path == NULL || strlen(base->v_path) != rpathlen) {
+ if (base->v_path == vn_vpath_empty ||
+ strlen(base->v_path) != rpathlen) {
mutex_exit(&base->v_lock);
kmem_free(rpath, rpathalloc);
return;
@@ -3022,7 +3045,7 @@ vn_setpath(vnode_t *rootvp, struct vnode *startvp, struct vnode *vp,
rpath[rpathlen + plen] = '\0';
mutex_enter(&vp->v_lock);
- if (vp->v_path != NULL) {
+ if (vp->v_path != vn_vpath_empty) {
mutex_exit(&vp->v_lock);
kmem_free(rpath, rpathalloc);
} else {
@@ -3042,7 +3065,7 @@ vn_setpath_str(struct vnode *vp, const char *str, size_t len)
char *buf = kmem_alloc(len + 1, KM_SLEEP);
mutex_enter(&vp->v_lock);
- if (vp->v_path != NULL) {
+ if (vp->v_path != vn_vpath_empty) {
mutex_exit(&vp->v_lock);
kmem_free(buf, len + 1);
return;
@@ -3066,10 +3089,10 @@ vn_renamepath(vnode_t *dvp, vnode_t *vp, const char *nm, size_t len)
mutex_enter(&vp->v_lock);
tmp = vp->v_path;
- vp->v_path = NULL;
+ vp->v_path = vn_vpath_empty;
mutex_exit(&vp->v_lock);
vn_setpath(rootdir, dvp, vp, nm, len);
- if (tmp != NULL)
+ if (tmp != vn_vpath_empty)
kmem_free(tmp, strlen(tmp) + 1);
}
@@ -3084,7 +3107,7 @@ vn_copypath(struct vnode *src, struct vnode *dst)
int alloc;
mutex_enter(&src->v_lock);
- if (src->v_path == NULL) {
+ if (src->v_path == vn_vpath_empty) {
mutex_exit(&src->v_lock);
return;
}
@@ -3094,7 +3117,7 @@ vn_copypath(struct vnode *src, struct vnode *dst)
mutex_exit(&src->v_lock);
buf = kmem_alloc(alloc, KM_SLEEP);
mutex_enter(&src->v_lock);
- if (src->v_path == NULL || strlen(src->v_path) + 1 != alloc) {
+ if (src->v_path == vn_vpath_empty || strlen(src->v_path) + 1 != alloc) {
mutex_exit(&src->v_lock);
kmem_free(buf, alloc);
return;
@@ -3103,7 +3126,7 @@ vn_copypath(struct vnode *src, struct vnode *dst)
mutex_exit(&src->v_lock);
mutex_enter(&dst->v_lock);
- if (dst->v_path != NULL) {
+ if (dst->v_path != vn_vpath_empty) {
mutex_exit(&dst->v_lock);
kmem_free(buf, alloc);
return;
@@ -3261,14 +3284,57 @@ fop_read(
cred_t *cr,
caller_context_t *ct)
{
- int err;
ssize_t resid_start = uiop->uio_resid;
+ zone_t *zonep = curzone;
+ zone_vfs_kstat_t *zvp = zonep->zone_vfs_stats;
+
+ hrtime_t start, lat;
+ ssize_t len;
+ int err;
+
+ if (vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VBLK) {
+ start = gethrtime();
+
+ mutex_enter(&zonep->zone_vfs_lock);
+ kstat_runq_enter(&zonep->zone_vfs_rwstats);
+ mutex_exit(&zonep->zone_vfs_lock);
+ }
VOPXID_MAP_CR(vp, cr);
err = (*(vp)->v_op->vop_read)(vp, uiop, ioflag, cr, ct);
- VOPSTATS_UPDATE_IO(vp, read,
- read_bytes, (resid_start - uiop->uio_resid));
+ len = resid_start - uiop->uio_resid;
+
+ VOPSTATS_UPDATE_IO(vp, read, read_bytes, len);
+
+ if (vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VBLK) {
+ mutex_enter(&zonep->zone_vfs_lock);
+ zonep->zone_vfs_rwstats.reads++;
+ zonep->zone_vfs_rwstats.nread += len;
+ kstat_runq_exit(&zonep->zone_vfs_rwstats);
+ mutex_exit(&zonep->zone_vfs_lock);
+
+ lat = gethrtime() - start;
+
+ if (lat >= VOP_LATENCY_10MS) {
+ if (lat < VOP_LATENCY_100MS)
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ else if (lat < VOP_LATENCY_1S) {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ } else if (lat < VOP_LATENCY_10S) {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
+ } else {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_10s_ops.value.ui64);
+ }
+ }
+ }
+
return (err);
}
@@ -3280,14 +3346,62 @@ fop_write(
cred_t *cr,
caller_context_t *ct)
{
- int err;
ssize_t resid_start = uiop->uio_resid;
+ zone_t *zonep = curzone;
+ zone_vfs_kstat_t *zvp = zonep->zone_vfs_stats;
+
+ hrtime_t start, lat;
+ ssize_t len;
+ int err;
+
+ /*
+ * For the purposes of VFS kstat consumers, the "waitq" calculation is
+ * repurposed as the active queue for VFS write operations. There's no
+ * actual wait queue for VFS operations.
+ */
+ if (vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VBLK) {
+ start = gethrtime();
+
+ mutex_enter(&zonep->zone_vfs_lock);
+ kstat_waitq_enter(&zonep->zone_vfs_rwstats);
+ mutex_exit(&zonep->zone_vfs_lock);
+ }
VOPXID_MAP_CR(vp, cr);
err = (*(vp)->v_op->vop_write)(vp, uiop, ioflag, cr, ct);
- VOPSTATS_UPDATE_IO(vp, write,
- write_bytes, (resid_start - uiop->uio_resid));
+ len = resid_start - uiop->uio_resid;
+
+ VOPSTATS_UPDATE_IO(vp, write, write_bytes, len);
+
+ if (vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VBLK) {
+ mutex_enter(&zonep->zone_vfs_lock);
+ zonep->zone_vfs_rwstats.writes++;
+ zonep->zone_vfs_rwstats.nwritten += len;
+ kstat_waitq_exit(&zonep->zone_vfs_rwstats);
+ mutex_exit(&zonep->zone_vfs_lock);
+
+ lat = gethrtime() - start;
+
+ if (lat >= VOP_LATENCY_10MS) {
+ if (lat < VOP_LATENCY_100MS)
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ else if (lat < VOP_LATENCY_1S) {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ } else if (lat < VOP_LATENCY_10S) {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
+ } else {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_10s_ops.value.ui64);
+ }
+ }
+ }
+
return (err);
}
@@ -3451,7 +3565,7 @@ fop_lookup(
}
if (ret == 0 && *vpp) {
VOPSTATS_UPDATE(*vpp, lookup);
- if ((*vpp)->v_path == NULL) {
+ if ((*vpp)->v_path == vn_vpath_empty) {
vn_setpath(rootdir, dvp, *vpp, nm, strlen(nm));
}
}
@@ -3493,7 +3607,7 @@ fop_create(
(dvp, name, vap, excl, mode, vpp, cr, flags, ct, vsecp);
if (ret == 0 && *vpp) {
VOPSTATS_UPDATE(*vpp, create);
- if ((*vpp)->v_path == NULL) {
+ if ((*vpp)->v_path == vn_vpath_empty) {
vn_setpath(rootdir, dvp, *vpp, name, strlen(name));
}
}
@@ -3615,7 +3729,7 @@ fop_mkdir(
(dvp, dirname, vap, vpp, cr, ct, flags, vsecp);
if (ret == 0 && *vpp) {
VOPSTATS_UPDATE(*vpp, mkdir);
- if ((*vpp)->v_path == NULL) {
+ if ((*vpp)->v_path == vn_vpath_empty) {
vn_setpath(rootdir, dvp, *vpp, dirname,
strlen(dirname));
}
diff --git a/usr/src/uts/common/fs/zfs/arc.c b/usr/src/uts/common/fs/zfs/arc.c
index 3c3cbdf4c1..5c06a1bb29 100644
--- a/usr/src/uts/common/fs/zfs/arc.c
+++ b/usr/src/uts/common/fs/zfs/arc.c
@@ -129,6 +129,7 @@
#include <sys/vdev.h>
#include <sys/vdev_impl.h>
#include <sys/dsl_pool.h>
+#include <sys/zfs_zone.h>
#include <sys/multilist.h>
#ifdef _KERNEL
#include <sys/vmsystm.h>
@@ -4343,6 +4344,14 @@ top:
rzio = zio_read(pio, spa, bp, buf->b_data, size,
arc_read_done, buf, priority, zio_flags, zb);
+ /*
+ * At this point, this read I/O has already missed in the ARC
+ * and will be going through to the disk. The I/O throttle
+ * should delay this I/O if this zone is using more than its I/O
+ * priority allows.
+ */
+ zfs_zone_io_throttle(ZFS_ZONE_IOP_READ);
+
if (*arc_flags & ARC_FLAG_WAIT)
return (zio_wait(rzio));
diff --git a/usr/src/uts/common/fs/zfs/dbuf.c b/usr/src/uts/common/fs/zfs/dbuf.c
index 4f469fc750..64a4cb74d0 100644
--- a/usr/src/uts/common/fs/zfs/dbuf.c
+++ b/usr/src/uts/common/fs/zfs/dbuf.c
@@ -678,8 +678,19 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
arc_space_consume(DN_MAX_BONUSLEN, ARC_SPACE_OTHER);
if (bonuslen < DN_MAX_BONUSLEN)
bzero(db->db.db_data, DN_MAX_BONUSLEN);
- if (bonuslen)
- bcopy(DN_BONUS(dn->dn_phys), db->db.db_data, bonuslen);
+
+ if (bonuslen) {
+ /*
+ * Absent byzantine on-disk corruption, we fully expect
+ * our bonuslen to be no more than DN_MAX_BONUSLEN --
+ * but we nonetheless explicitly clamp it on the bcopy()
+ * to prevent any on-disk corruption from becoming
+ * rampant in-kernel corruption.
+ */
+ bcopy(DN_BONUS(dn->dn_phys), db->db.db_data,
+ MIN(bonuslen, DN_MAX_BONUSLEN));
+ }
+
DB_DNODE_EXIT(db);
db->db_state = DB_CACHED;
mutex_exit(&db->db_mtx);
diff --git a/usr/src/uts/common/fs/zfs/dmu.c b/usr/src/uts/common/fs/zfs/dmu.c
index 35015825b4..8ce9178ad2 100644
--- a/usr/src/uts/common/fs/zfs/dmu.c
+++ b/usr/src/uts/common/fs/zfs/dmu.c
@@ -1818,7 +1818,6 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
ZCHECKSUM_FLAG_DEDUP))
dedup_verify = B_TRUE;
}
-
/*
* Enable nopwrite if we have secure enough checksum
* algorithm (see comment in zio_nop_write) and
diff --git a/usr/src/uts/common/fs/zfs/dmu_send.c b/usr/src/uts/common/fs/zfs/dmu_send.c
index 50b8aba876..f54d67202b 100644
--- a/usr/src/uts/common/fs/zfs/dmu_send.c
+++ b/usr/src/uts/common/fs/zfs/dmu_send.c
@@ -22,7 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
* Copyright 2014 HybridCluster. All rights reserved.
* Copyright 2016 RackTop Systems.
* Copyright (c) 2014 Integros [integros.com]
@@ -2492,8 +2492,12 @@ receive_read_record(struct receive_arg *ra)
{
struct drr_object *drro = &ra->rrd->header.drr_u.drr_object;
uint32_t size = P2ROUNDUP(drro->drr_bonuslen, 8);
- void *buf = kmem_zalloc(size, KM_SLEEP);
+ void *buf = NULL;
dmu_object_info_t doi;
+
+ if (size > 0)
+ buf = kmem_zalloc(size, KM_SLEEP);
+
err = receive_read_payload_and_next_header(ra, size, buf);
if (err != 0) {
kmem_free(buf, size);
diff --git a/usr/src/uts/common/fs/zfs/dmu_tx.c b/usr/src/uts/common/fs/zfs/dmu_tx.c
index 15b9459ce2..9d3db4212e 100644
--- a/usr/src/uts/common/fs/zfs/dmu_tx.c
+++ b/usr/src/uts/common/fs/zfs/dmu_tx.c
@@ -39,11 +39,11 @@
#include <sys/sa_impl.h>
#include <sys/zfs_context.h>
#include <sys/varargs.h>
+#include <sys/zfs_zone.h>
typedef void (*dmu_tx_hold_func_t)(dmu_tx_t *tx, struct dnode *dn,
uint64_t arg1, uint64_t arg2);
-
dmu_tx_t *
dmu_tx_create_dd(dsl_dir_t *dd)
{
@@ -224,6 +224,8 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
if (len == 0)
return;
+ zfs_zone_io_throttle(ZFS_ZONE_IOP_LOGICAL_WRITE);
+
min_bs = SPA_MINBLOCKSHIFT;
max_bs = highbit64(txh->txh_tx->tx_objset->os_recordsize) - 1;
min_ibs = DN_MIN_INDBLKSHIFT;
diff --git a/usr/src/uts/common/fs/zfs/dsl_dir.c b/usr/src/uts/common/fs/zfs/dsl_dir.c
index 7d86f72ad1..ca7d8b9bee 100644
--- a/usr/src/uts/common/fs/zfs/dsl_dir.c
+++ b/usr/src/uts/common/fs/zfs/dsl_dir.c
@@ -42,6 +42,7 @@
#include <sys/zio.h>
#include <sys/arc.h>
#include <sys/sunddi.h>
+#include <sys/zfs_zone.h>
#include <sys/zfeature.h>
#include <sys/policy.h>
#include <sys/zfs_znode.h>
@@ -1262,7 +1263,7 @@ dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize,
* locks are held.
*/
txg_delay(dd->dd_pool, tx->tx_txg,
- MSEC2NSEC(10), MSEC2NSEC(10));
+ zfs_zone_txg_delay(), MSEC2NSEC(10));
err = SET_ERROR(ERESTART);
}
}
diff --git a/usr/src/uts/common/fs/zfs/dsl_pool.c b/usr/src/uts/common/fs/zfs/dsl_pool.c
index ee2d8ee9eb..242db1c9f1 100644
--- a/usr/src/uts/common/fs/zfs/dsl_pool.c
+++ b/usr/src/uts/common/fs/zfs/dsl_pool.c
@@ -43,6 +43,7 @@
#include <sys/zfs_znode.h>
#include <sys/spa_impl.h>
#include <sys/dsl_deadlist.h>
+#include <sys/zfs_zone.h>
#include <sys/bptree.h>
#include <sys/zfeature.h>
#include <sys/zil_impl.h>
diff --git a/usr/src/uts/common/fs/zfs/metaslab.c b/usr/src/uts/common/fs/zfs/metaslab.c
index 9030b855a1..296f0006fc 100644
--- a/usr/src/uts/common/fs/zfs/metaslab.c
+++ b/usr/src/uts/common/fs/zfs/metaslab.c
@@ -64,6 +64,11 @@ uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */
int zfs_condense_pct = 200;
/*
+ * Never condense any space map. This is for debugging/recovery only.
+ */
+int zfs_condense_never = 0;
+
+/*
* Condensing a metaslab is not guaranteed to actually reduce the amount of
* space used on disk. In particular, a space map uses data in increments of
* MAX(1 << ashift, space_map_blksize), so a metaslab might use the
@@ -1657,6 +1662,9 @@ metaslab_should_condense(metaslab_t *msp)
ASSERT(MUTEX_HELD(&msp->ms_lock));
ASSERT(msp->ms_loaded);
+ if (zfs_condense_never != 0)
+ return (B_FALSE);
+
/*
* Use the ms_size_tree range tree, which is ordered by size, to
* obtain the largest segment in the free tree. We always condense
diff --git a/usr/src/uts/common/fs/zfs/sa.c b/usr/src/uts/common/fs/zfs/sa.c
index 7ddf806ec5..3168b47304 100644
--- a/usr/src/uts/common/fs/zfs/sa.c
+++ b/usr/src/uts/common/fs/zfs/sa.c
@@ -24,6 +24,7 @@
* Portions Copyright 2011 iXsystems, Inc
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright (c) 2015 Joyent, Inc. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
*/
@@ -407,15 +408,18 @@ sa_add_layout_entry(objset_t *os, sa_attr_type_t *attrs, int attr_count,
{
sa_os_t *sa = os->os_sa;
sa_lot_t *tb, *findtb;
- int i;
+ int i, size;
avl_index_t loc;
ASSERT(MUTEX_HELD(&sa->sa_lock));
tb = kmem_zalloc(sizeof (sa_lot_t), KM_SLEEP);
tb->lot_attr_count = attr_count;
- tb->lot_attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count,
- KM_SLEEP);
- bcopy(attrs, tb->lot_attrs, sizeof (sa_attr_type_t) * attr_count);
+
+ if ((size = sizeof (sa_attr_type_t) * attr_count) != 0) {
+ tb->lot_attrs = kmem_alloc(size, KM_SLEEP);
+ bcopy(attrs, tb->lot_attrs, size);
+ }
+
tb->lot_num = lot_num;
tb->lot_hash = hash;
tb->lot_instance = 0;
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
index 3f1b7d8a54..0b99d08e72 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
@@ -116,6 +116,7 @@ struct vdev_queue {
avl_tree_t vq_read_offset_tree;
avl_tree_t vq_write_offset_tree;
uint64_t vq_last_offset;
+ zoneid_t vq_last_zone_id;
hrtime_t vq_io_complete_ts; /* time last i/o completed */
kmutex_t vq_lock;
};
diff --git a/usr/src/uts/common/fs/zfs/sys/zfs_zone.h b/usr/src/uts/common/fs/zfs/sys/zfs_zone.h
new file mode 100644
index 0000000000..f1431b3f55
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/sys/zfs_zone.h
@@ -0,0 +1,63 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2015, Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_FS_ZFS_ZONE_H
+#define _SYS_FS_ZFS_ZONE_H
+
+#ifdef _KERNEL
+#include <sys/isa_defs.h>
+#include <sys/types32.h>
+#include <sys/vdev_impl.h>
+#include <sys/zio.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum {
+ ZFS_ZONE_IOP_READ = 0,
+ ZFS_ZONE_IOP_WRITE,
+ ZFS_ZONE_IOP_LOGICAL_WRITE,
+} zfs_zone_iop_type_t;
+
+extern void zfs_zone_io_throttle(zfs_zone_iop_type_t);
+
+extern void zfs_zone_zio_init(zio_t *);
+extern void zfs_zone_zio_start(zio_t *);
+extern void zfs_zone_zio_done(zio_t *);
+extern void zfs_zone_zio_dequeue(zio_t *);
+extern void zfs_zone_zio_enqueue(zio_t *);
+extern void zfs_zone_report_txg_sync(void *);
+extern hrtime_t zfs_zone_txg_delay();
+#ifdef _KERNEL
+extern zio_t *zfs_zone_schedule(vdev_queue_t *, zio_priority_t, avl_index_t,
+ avl_tree_t *);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_FS_ZFS_ZONE_H */
diff --git a/usr/src/uts/common/fs/zfs/sys/zio.h b/usr/src/uts/common/fs/zfs/sys/zio.h
index 6d8f7601f3..6d02c95b22 100644
--- a/usr/src/uts/common/fs/zfs/sys/zio.h
+++ b/usr/src/uts/common/fs/zfs/sys/zio.h
@@ -421,6 +421,7 @@ struct zio {
uint64_t io_offset;
hrtime_t io_timestamp;
hrtime_t io_target_timestamp;
+ hrtime_t io_dispatched; /* time I/O was dispatched to disk */
avl_node_t io_queue_node;
avl_node_t io_offset_node;
@@ -449,6 +450,7 @@ struct zio {
zio_cksum_report_t *io_cksum_report;
uint64_t io_ena;
+ zoneid_t io_zoneid; /* zone which originated this I/O */
/* Taskq dispatching state */
taskq_ent_t io_tqent;
};
diff --git a/usr/src/uts/common/fs/zfs/txg.c b/usr/src/uts/common/fs/zfs/txg.c
index 191259e75b..915c9bb4b2 100644
--- a/usr/src/uts/common/fs/zfs/txg.c
+++ b/usr/src/uts/common/fs/zfs/txg.c
@@ -31,6 +31,7 @@
#include <sys/dsl_pool.h>
#include <sys/dsl_scan.h>
#include <sys/callb.h>
+#include <sys/zfs_zone.h>
/*
* ZFS Transaction Groups
@@ -506,6 +507,8 @@ txg_sync_thread(dsl_pool_t *dp)
txg, tx->tx_quiesce_txg_waiting, tx->tx_sync_txg_waiting);
mutex_exit(&tx->tx_sync_lock);
+ zfs_zone_report_txg_sync(dp);
+
start = ddi_get_lbolt();
spa_sync(spa, txg);
delta = ddi_get_lbolt() - start;
diff --git a/usr/src/uts/common/fs/zfs/vdev_disk.c b/usr/src/uts/common/fs/zfs/vdev_disk.c
index a6af0101e7..ab305ed694 100644
--- a/usr/src/uts/common/fs/zfs/vdev_disk.c
+++ b/usr/src/uts/common/fs/zfs/vdev_disk.c
@@ -26,6 +26,7 @@
*/
#include <sys/zfs_context.h>
+#include <sys/zfs_zone.h>
#include <sys/spa_impl.h>
#include <sys/refcount.h>
#include <sys/vdev_disk.h>
@@ -44,6 +45,11 @@ extern ldi_ident_t zfs_li;
static void vdev_disk_close(vdev_t *);
+typedef struct vdev_disk_buf {
+ buf_t vdb_buf;
+ zio_t *vdb_io;
+} vdev_disk_buf_t;
+
typedef struct vdev_disk_ldi_cb {
list_node_t lcb_next;
ldi_callback_id_t lcb_id;
@@ -127,6 +133,8 @@ vdev_disk_off_finalize(ldi_handle_t lh, ldi_ev_cookie_t ecookie,
int ldi_result, void *arg, void *ev_data)
{
vdev_t *vd = (vdev_t *)arg;
+ vdev_disk_t *dvd = vd->vdev_tsd;
+ vdev_disk_ldi_cb_t *lcb;
/*
* Ignore events other than offline.
@@ -586,6 +594,7 @@ static void
vdev_disk_close(vdev_t *vd)
{
vdev_disk_t *dvd = vd->vdev_tsd;
+ vdev_disk_ldi_cb_t *lcb;
if (vd->vdev_reopening || dvd == NULL)
return;
@@ -815,6 +824,8 @@ vdev_disk_io_start(zio_t *zio)
bp->b_bufsize = zio->io_size;
bp->b_iodone = (int (*)())vdev_disk_io_intr;
+ zfs_zone_zio_start(zio);
+
/* ldi_strategy() will return non-zero only on programming errors */
VERIFY(ldi_strategy(dvd->vd_lh, bp) == 0);
}
@@ -824,6 +835,8 @@ vdev_disk_io_done(zio_t *zio)
{
vdev_t *vd = zio->io_vd;
+ zfs_zone_zio_done(zio);
+
/*
* If the device returned EIO, then attempt a DKIOCSTATE ioctl to see if
* the device has been removed. If this is the case, then we trigger an
diff --git a/usr/src/uts/common/fs/zfs/vdev_queue.c b/usr/src/uts/common/fs/zfs/vdev_queue.c
index 4917cc9284..cc415e2ca0 100644
--- a/usr/src/uts/common/fs/zfs/vdev_queue.c
+++ b/usr/src/uts/common/fs/zfs/vdev_queue.c
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2013, Joyent, Inc. All rights reserved.
*/
/*
@@ -34,6 +35,7 @@
#include <sys/zio.h>
#include <sys/avl.h>
#include <sys/dsl_pool.h>
+#include <sys/zfs_zone.h>
/*
* ZFS I/O Scheduler
@@ -142,7 +144,7 @@ uint32_t zfs_vdev_sync_write_min_active = 10;
uint32_t zfs_vdev_sync_write_max_active = 10;
uint32_t zfs_vdev_async_read_min_active = 1;
uint32_t zfs_vdev_async_read_max_active = 3;
-uint32_t zfs_vdev_async_write_min_active = 1;
+uint32_t zfs_vdev_async_write_min_active = 3;
uint32_t zfs_vdev_async_write_max_active = 10;
uint32_t zfs_vdev_scrub_min_active = 1;
uint32_t zfs_vdev_scrub_max_active = 2;
@@ -238,6 +240,8 @@ vdev_queue_init(vdev_t *vd)
vdev_queue_offset_compare, sizeof (zio_t),
offsetof(struct zio, io_offset_node));
+ vq->vq_last_zone_id = 0;
+
for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
int (*compfn) (const void *, const void *);
@@ -275,6 +279,7 @@ vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio)
{
spa_t *spa = zio->io_spa;
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
+ zfs_zone_zio_enqueue(zio);
avl_add(vdev_queue_class_tree(vq, zio->io_priority), zio);
avl_add(vdev_queue_type_tree(vq, zio->io_type), zio);
@@ -290,6 +295,7 @@ vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio)
{
spa_t *spa = zio->io_spa;
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
+ zfs_zone_zio_dequeue(zio);
avl_remove(vdev_queue_class_tree(vq, zio->io_priority), zio);
avl_remove(vdev_queue_type_tree(vq, zio->io_type), zio);
@@ -649,7 +655,11 @@ again:
search.io_timestamp = 0;
search.io_offset = vq->vq_last_offset + 1;
VERIFY3P(avl_find(tree, &search, &idx), ==, NULL);
+#ifdef _KERNEL
+ zio = zfs_zone_schedule(vq, p, idx, tree);
+#else
zio = avl_nearest(tree, idx, AVL_AFTER);
+#endif
if (zio == NULL)
zio = avl_first(tree);
ASSERT3U(zio->io_priority, ==, p);
diff --git a/usr/src/uts/common/fs/zfs/zfs_dir.c b/usr/src/uts/common/fs/zfs/zfs_dir.c
index 2b18ecb01c..132e84b111 100644
--- a/usr/src/uts/common/fs/zfs/zfs_dir.c
+++ b/usr/src/uts/common/fs/zfs/zfs_dir.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc.
* Copyright (c) 2013, 2015 by Delphix. All rights reserved.
*/
@@ -853,9 +854,9 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
if (zp->z_links <= zp_is_dir) {
zfs_panic_recover("zfs: link count on %s is %u, "
"should be at least %u",
- zp->z_vnode->v_path ? zp->z_vnode->v_path :
- "<unknown>", (int)zp->z_links,
- zp_is_dir + 1);
+ zp->z_vnode->v_path != vn_vpath_empty ?
+ zp->z_vnode->v_path : "<unknown>",
+ (int)zp->z_links, zp_is_dir + 1);
zp->z_links = zp_is_dir + 1;
}
if (--zp->z_links == zp_is_dir) {
diff --git a/usr/src/uts/common/fs/zfs/zfs_ioctl.c b/usr/src/uts/common/fs/zfs/zfs_ioctl.c
index a7feada44f..6d28956707 100644
--- a/usr/src/uts/common/fs/zfs/zfs_ioctl.c
+++ b/usr/src/uts/common/fs/zfs/zfs_ioctl.c
@@ -615,9 +615,10 @@ zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
* Check permissions for special properties.
*/
switch (prop) {
+ case ZFS_PROP_DEDUP:
case ZFS_PROP_ZONED:
/*
- * Disallow setting of 'zoned' from within a local zone.
+ * Disallow setting these properties from within a local zone.
*/
if (!INGLOBALZONE(curproc))
return (SET_ERROR(EPERM));
@@ -947,6 +948,9 @@ zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
{
int error;
+ if (secpolicy_fs_import(cr) != 0)
+ return (set_errno(EPERM));
+
if ((error = zfs_secpolicy_write_perms(zc->zc_name,
ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
return (error);
@@ -2037,7 +2041,8 @@ zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
}
static int
-zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
+zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os,
+ boolean_t cachedpropsonly)
{
int error = 0;
nvlist_t *nv;
@@ -2055,7 +2060,8 @@ zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
* XXX reading with out owning
*/
if (!zc->zc_objset_stats.dds_inconsistent &&
- dmu_objset_type(os) == DMU_OST_ZVOL) {
+ dmu_objset_type(os) == DMU_OST_ZVOL &&
+ !cachedpropsonly) {
error = zvol_get_stats(os, nv);
if (error == EIO)
return (error);
@@ -2082,11 +2088,24 @@ static int
zfs_ioc_objset_stats(zfs_cmd_t *zc)
{
objset_t *os;
+ nvlist_t *nvl = NULL;
+ boolean_t cachedpropsonly = B_FALSE;
int error;
+ if (zc->zc_nvlist_src != NULL &&
+ (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
+ zc->zc_iflags, &nvl) != 0))
+ return (error);
+
+ if (nvl != NULL) {
+ (void) nvlist_lookup_boolean_value(nvl, "cachedpropsonly",
+ &cachedpropsonly);
+ nvlist_free(nvl);
+ }
+
error = dmu_objset_hold(zc->zc_name, FTAG, &os);
if (error == 0) {
- error = zfs_ioc_objset_stats_impl(zc, os);
+ error = zfs_ioc_objset_stats_impl(zc, os, cachedpropsonly);
dmu_objset_rele(os, FTAG);
}
@@ -2281,8 +2300,21 @@ static int
zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
{
objset_t *os;
+ nvlist_t *nvl = NULL;
+ boolean_t cachedpropsonly = B_FALSE;
int error;
+ if (zc->zc_nvlist_src != NULL &&
+ (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
+ zc->zc_iflags, &nvl) != 0))
+ return (error);
+
+ if (nvl != NULL) {
+ (void) nvlist_lookup_boolean_value(nvl, "cachedpropsonly",
+ &cachedpropsonly);
+ nvlist_free(nvl);
+ }
+
error = dmu_objset_hold(zc->zc_name, FTAG, &os);
if (error != 0) {
return (error == ENOENT ? ESRCH : error);
@@ -2311,8 +2343,10 @@ zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
objset_t *ossnap;
error = dmu_objset_from_ds(ds, &ossnap);
- if (error == 0)
- error = zfs_ioc_objset_stats_impl(zc, ossnap);
+ if (error == 0) {
+ error = zfs_ioc_objset_stats_impl(zc,
+ ossnap, cachedpropsonly);
+ }
dsl_dataset_rele(ds, FTAG);
}
} else if (error == ENOENT) {
@@ -3022,6 +3056,7 @@ zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
uint64_t sense = ZFS_PROP_UNDEFINED;
uint64_t norm = ZFS_PROP_UNDEFINED;
uint64_t u8 = ZFS_PROP_UNDEFINED;
+ int error;
ASSERT(zplprops != NULL);
@@ -3065,8 +3100,9 @@ zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
VERIFY(nvlist_add_uint64(zplprops,
zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
- if (norm == ZFS_PROP_UNDEFINED)
- VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
+ if (norm == ZFS_PROP_UNDEFINED &&
+ (error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm)) != 0)
+ return (error);
VERIFY(nvlist_add_uint64(zplprops,
zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
@@ -3075,13 +3111,15 @@ zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
*/
if (norm)
u8 = 1;
- if (u8 == ZFS_PROP_UNDEFINED)
- VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
+ if (u8 == ZFS_PROP_UNDEFINED &&
+ (error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8)) != 0)
+ return (error);
VERIFY(nvlist_add_uint64(zplprops,
zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
- if (sense == ZFS_PROP_UNDEFINED)
- VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
+ if (sense == ZFS_PROP_UNDEFINED &&
+ (error = zfs_get_zplprop(os, ZFS_PROP_CASE, &sense)) != 0)
+ return (error);
VERIFY(nvlist_add_uint64(zplprops,
zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
diff --git a/usr/src/uts/common/fs/zfs/zfs_vfsops.c b/usr/src/uts/common/fs/zfs/zfs_vfsops.c
index 0d02fd5bec..ace4bf8173 100644
--- a/usr/src/uts/common/fs/zfs/zfs_vfsops.c
+++ b/usr/src/uts/common/fs/zfs/zfs_vfsops.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
*/
@@ -1903,6 +1904,17 @@ zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr)
if (zfsvfs->z_ctldir != NULL)
zfsctl_destroy(zfsvfs);
+ /*
+ * If we're doing a forced unmount on a dataset which still has
+ * references and is in a zone, then we need to cleanup the zone
+ * reference at this point or else the zone will never be able to
+ * shutdown.
+ */
+ if ((fflag & MS_FORCE) && vfsp->vfs_count > 1 && vfsp->vfs_zone) {
+ zone_rele_ref(&vfsp->vfs_implp->vi_zone_ref, ZONE_REF_VFS);
+ vfsp->vfs_zone = NULL;
+ }
+
return (0);
}
diff --git a/usr/src/uts/common/fs/zfs/zfs_vnops.c b/usr/src/uts/common/fs/zfs/zfs_vnops.c
index 9cba49b402..829d57b760 100644
--- a/usr/src/uts/common/fs/zfs/zfs_vnops.c
+++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c
@@ -23,7 +23,7 @@
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
- * Copyright 2015 Joyent, Inc.
+ * Copyright 2016 Joyent, Inc.
*/
/* Portions Copyright 2007 Jeremy Teo */
@@ -664,6 +664,7 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
rl_t *rl;
int max_blksz = zfsvfs->z_max_blksz;
int error = 0;
+ int prev_error;
arc_buf_t *abuf;
iovec_t *aiov = NULL;
xuio_t *xuio = NULL;
@@ -685,6 +686,17 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
limit = MAXOFFSET_T;
+ /*
+ * Pre-fault the pages to ensure slow (eg NFS) pages
+ * don't hold up txg.
+ * Skip this if uio contains loaned arc_buf.
+ */
+ if ((uio->uio_extflg == UIO_XUIO) &&
+ (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY))
+ xuio = (xuio_t *)uio;
+ else
+ uio_prefaultpages(n, uio);
+
ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
@@ -737,17 +749,6 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
}
/*
- * Pre-fault the pages to ensure slow (eg NFS) pages
- * don't hold up txg.
- * Skip this if uio contains loaned arc_buf.
- */
- if ((uio->uio_extflg == UIO_XUIO) &&
- (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY))
- xuio = (xuio_t *)uio;
- else
- uio_prefaultpages(MIN(n, max_blksz), uio);
-
- /*
* If in append mode, set the io offset pointer to eof.
*/
if (ioflag & FAPPEND) {
@@ -968,7 +969,6 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
while ((end_size = zp->z_size) < uio->uio_loffset) {
(void) atomic_cas_64(&zp->z_size, end_size,
uio->uio_loffset);
- ASSERT(error == 0);
}
/*
* If we are replaying and eof is non zero then force
@@ -978,18 +978,20 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
zp->z_size = zfsvfs->z_replay_eof;
+ /*
+ * Keep track of a possible pre-existing error from a partial
+ * write via dmu_write_uio_dbuf above.
+ */
+ prev_error = error;
error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag);
dmu_tx_commit(tx);
- if (error != 0)
+ if (prev_error != 0 || error != 0)
break;
ASSERT(tx_bytes == nbytes);
n -= nbytes;
-
- if (!xuio && n > 0)
- uio_prefaultpages(MIN(n, max_blksz), uio);
}
zfs_range_unlock(rl);
@@ -2832,8 +2834,11 @@ top:
return (err);
}
- if (vap->va_size == 0)
+ if (vap->va_size == 0) {
vnevent_truncate(ZTOV(zp), ct);
+ } else {
+ vnevent_resize(ZTOV(zp), ct);
+ }
}
if (mask & (AT_ATIME|AT_MTIME) ||
@@ -3761,9 +3766,7 @@ top:
if (error == 0) {
vnevent_rename_src(ZTOV(szp), sdvp, snm, ct);
- /* notify the target dir if it is not the same as source dir */
- if (tdvp != sdvp)
- vnevent_rename_dest_dir(tdvp, ct);
+ vnevent_rename_dest_dir(tdvp, ZTOV(szp), tnm, ct);
}
out:
if (zl != NULL)
@@ -4255,6 +4258,8 @@ zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp,
&zp->z_pflags, 8);
zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
B_TRUE);
+ err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+
zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0);
}
dmu_tx_commit(tx);
@@ -4790,10 +4795,6 @@ zfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
ASSERT3U(VTOZ(vp)->z_mapcnt, >=, pages);
atomic_add_64(&VTOZ(vp)->z_mapcnt, -pages);
- if ((flags & MAP_SHARED) && (prot & PROT_WRITE) &&
- vn_has_cached_data(vp))
- (void) VOP_PUTPAGE(vp, off, len, B_ASYNC, cr, ct);
-
return (0);
}
@@ -4859,8 +4860,13 @@ zfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag,
error = zfs_freesp(zp, off, len, flag, TRUE);
- if (error == 0 && off == 0 && len == 0)
- vnevent_truncate(ZTOV(zp), ct);
+ if (error == 0 && len == 0) {
+ if (off == 0) {
+ vnevent_truncate(ZTOV(zp), ct);
+ } else {
+ vnevent_resize(ZTOV(zp), ct);
+ }
+ }
ZFS_EXIT(zfsvfs);
return (error);
diff --git a/usr/src/uts/common/fs/zfs/zfs_zone.c b/usr/src/uts/common/fs/zfs/zfs_zone.c
new file mode 100644
index 0000000000..4861c64f8e
--- /dev/null
+++ b/usr/src/uts/common/fs/zfs/zfs_zone.c
@@ -0,0 +1,1336 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * The ZFS/Zone I/O throttle and scheduler attempts to ensure fair access to
+ * ZFS I/O resources for each zone.
+ *
+ * I/O contention can be major pain point on a multi-tenant system. A single
+ * zone can issue a stream of I/O operations, usually synchronous writes, which
+ * disrupt I/O performance for all other zones. This problem is further
+ * exacerbated by ZFS, which buffers all asynchronous writes in a single TXG,
+ * a set of blocks which are atomically synced to disk. The process of
+ * syncing a TXG can occupy all of a device's I/O bandwidth, thereby starving
+ * out any pending read operations.
+ *
+ * There are two facets to this capability; the throttle and the scheduler.
+ *
+ * Throttle
+ *
+ * The requirements on the throttle are:
+ *
+ * 1) Ensure consistent and predictable I/O latency across all zones.
+ * 2) Sequential and random workloads have very different characteristics,
+ * so it is a non-starter to track IOPS or throughput.
+ * 3) A zone should be able to use the full disk bandwidth if no other zone
+ * is actively using the disk.
+ *
+ * The throttle has two components: one to track and account for each zone's
+ * I/O requests, and another to throttle each zone's operations when it
+ * exceeds its fair share of disk I/O. When the throttle detects that a zone is
+ * consuming more than is appropriate, each read or write system call is
+ * delayed by up to 100 microseconds, which we've found is sufficient to allow
+ * other zones to interleave I/O requests during those delays.
+ *
+ * Note: The throttle will delay each logical I/O (as opposed to the physical
+ * I/O which will likely be issued asynchronously), so it may be easier to
+ * think of the I/O throttle delaying each read/write syscall instead of the
+ * actual I/O operation. For each zone, the throttle tracks an ongoing average
+ * of read and write operations performed to determine the overall I/O
+ * utilization for each zone.
+ *
+ * The throttle calculates a I/O utilization metric for each zone using the
+ * following formula:
+ *
+ * (# of read syscalls) x (Average read latency) +
+ * (# of write syscalls) x (Average write latency)
+ *
+ * Once each zone has its utilization metric, the I/O throttle will compare I/O
+ * utilization across all zones, and if a zone has a higher-than-average I/O
+ * utilization, system calls from that zone are throttled. That is, if one
+ * zone has a much higher utilization, that zone's delay is increased by 5
+ * microseconds, up to a maximum of 100 microseconds. Conversely, if a zone is
+ * already throttled and has a lower utilization than average, its delay will
+ * be lowered by 5 microseconds.
+ *
+ * The throttle calculation is driven by IO activity, but since IO does not
+ * happen at fixed intervals, timestamps are used to track when the last update
+ * was made and to drive recalculation.
+ *
+ * The throttle recalculates each zone's I/O usage and throttle delay (if any)
+ * on the zfs_zone_adjust_time interval. Overall I/O latency is maintained as
+ * a decayed average which is updated on the zfs_zone_sys_avg_cycle interval.
+ *
+ * Scheduler
+ *
+ * The I/O scheduler manages the vdev queues – the queues of pending I/Os to
+ * issue to the disks. It only makes scheduling decisions for the two
+ * synchronous I/O queues (read & write).
+ *
+ * The scheduler maintains how many I/Os in the queue are from each zone, and
+ * if one zone has a disproportionately large number of I/Os in the queue, the
+ * scheduler will allow certain I/Os from the underutilized zones to be "bumped"
+ * and pulled from the middle of the queue. This bump allows zones with a small
+ * number of I/Os (so small they may not even be taken into account by the
+ * throttle) to complete quickly instead of waiting behind dozens of I/Os from
+ * other zones.
+ */
+
+#include <sys/spa.h>
+#include <sys/vdev_impl.h>
+#include <sys/zfs_zone.h>
+
+#ifndef _KERNEL
+
+/*
+ * Stubs for when compiling for user-land.
+ */
+
+void
+zfs_zone_io_throttle(zfs_zone_iop_type_t type)
+{
+}
+
+void
+zfs_zone_zio_init(zio_t *zp)
+{
+}
+
+void
+zfs_zone_zio_start(zio_t *zp)
+{
+}
+
+void
+zfs_zone_zio_done(zio_t *zp)
+{
+}
+
+void
+zfs_zone_zio_dequeue(zio_t *zp)
+{
+}
+
+void
+zfs_zone_zio_enqueue(zio_t *zp)
+{
+}
+
+/*ARGSUSED*/
+void
+zfs_zone_report_txg_sync(void *dp)
+{
+}
+
+hrtime_t
+zfs_zone_txg_delay()
+{
+ return (MSEC2NSEC(10));
+}
+
+#else
+
+/*
+ * The real code.
+ */
+
+#include <sys/systm.h>
+#include <sys/thread.h>
+#include <sys/proc.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/atomic.h>
+#include <sys/zio.h>
+#include <sys/zone.h>
+#include <sys/avl.h>
+#include <sys/sdt.h>
+#include <sys/ddi.h>
+
+/*
+ * The zone throttle delays read and write operations from certain zones based
+ * on each zone's IO utilitzation. Once a cycle (defined by zfs_zone_cycle_time
+ * below), the delays for each zone are recalculated based on the utilization
+ * over the previous window.
+ */
+boolean_t zfs_zone_delay_enable = B_TRUE; /* enable IO throttle */
+uint16_t zfs_zone_delay_step = 5; /* usec amnt to change delay */
+uint16_t zfs_zone_delay_ceiling = 100; /* usec delay max */
+
+boolean_t zfs_zone_priority_enable = B_TRUE; /* enable IO priority */
+
+/*
+ * For certain workloads, one zone may be issuing primarily sequential I/O and
+ * another primarily random I/O. The sequential I/O will complete much more
+ * quickly than the random I/O, driving the average system latency for those
+ * operations way down. As a result, the random I/O may be throttled back, even
+ * though the sequential I/O should be throttled to allow the random I/O more
+ * access to the disk.
+ *
+ * This tunable limits the discrepancy between the read and write system
+ * latency. If one becomes excessively high, this tunable prevents the I/O
+ * throttler from exacerbating the imbalance.
+ */
+uint_t zfs_zone_rw_lat_limit = 10;
+
+/*
+ * The I/O throttle will only start delaying zones when it detects disk
+ * utilization has reached a certain level. This tunable controls the
+ * threshold at which the throttle will start delaying zones. When the number
+ * of vdevs is small, the calculation should correspond closely with the %b
+ * column from iostat -- but as the number of vdevs becomes large, it will
+ * correlate less and less to any single device (therefore making it a poor
+ * approximation for the actual I/O utilization on such systems). We
+ * therefore use our derived utilization conservatively: we know that low
+ * derived utilization does indeed correlate to low I/O use -- but that a high
+ * rate of derived utilization does not necesarily alone denote saturation;
+ * where we see a high rate of utilization, we also look for laggard I/Os to
+ * attempt to detect saturation.
+ */
+uint_t zfs_zone_util_threshold = 80;
+uint_t zfs_zone_underutil_threshold = 60;
+
+/*
+ * There are three important tunables here: zfs_zone_laggard_threshold denotes
+ * the threshold at which an I/O is considered to be of notably high latency;
+ * zfs_zone_laggard_recent denotes the number of microseconds before the
+ * current time after which the last laggard is considered to be sufficiently
+ * recent to merit increasing the throttle; zfs_zone_laggard_ancient denotes
+ * the microseconds before the current time before which the last laggard is
+ * considered to be sufficiently old to merit decreasing the throttle. The
+ * most important tunable of these three is the zfs_zone_laggard_threshold: in
+ * modeling data from a large public cloud, this tunable was found to have a
+ * much greater effect on the throttle than the two time-based thresholds.
+ * This must be set high enough to not result in spurious throttling, but not
+ * so high as to allow pathological I/O to persist in the system.
+ */
+uint_t zfs_zone_laggard_threshold = 50000; /* 50 ms */
+uint_t zfs_zone_laggard_recent = 1000000; /* 1000 ms */
+uint_t zfs_zone_laggard_ancient = 5000000; /* 5000 ms */
+
+/*
+ * Throughout this subsystem, our timestamps are in microseconds. Our system
+ * average cycle is one second or 1 million microseconds. Our zone counter
+ * update cycle is two seconds or 2 million microseconds. We use a longer
+ * duration for that cycle because some ops can see a little over two seconds of
+ * latency when they are being starved by another zone.
+ */
+uint_t zfs_zone_sys_avg_cycle = 1000000; /* 1 s */
+uint_t zfs_zone_cycle_time = 2000000; /* 2 s */
+
+/*
+ * How often the I/O throttle will reevaluate each zone's utilization, in
+ * microseconds. Default is 1/4 sec.
+ */
+uint_t zfs_zone_adjust_time = 250000; /* 250 ms */
+
+typedef struct {
+ hrtime_t cycle_start;
+ int cycle_cnt;
+ hrtime_t cycle_lat;
+ hrtime_t sys_avg_lat;
+} sys_lat_cycle_t;
+
+typedef struct {
+ hrtime_t zi_now;
+ uint_t zi_avgrlat;
+ uint_t zi_avgwlat;
+ uint64_t zi_totpri;
+ uint64_t zi_totutil;
+ int zi_active;
+ uint_t zi_diskutil;
+ boolean_t zi_underutil;
+ boolean_t zi_overutil;
+} zoneio_stats_t;
+
+static sys_lat_cycle_t rd_lat;
+static sys_lat_cycle_t wr_lat;
+
+/*
+ * Some basic disk stats to determine disk utilization. The utilization info
+ * for all disks on the system is aggregated into these values.
+ *
+ * Overall disk utilization for the current cycle is calculated as:
+ *
+ * ((zfs_disk_rtime - zfs_disk_last_rtime) * 100)
+ * ----------------------------------------------
+ * ((now - zfs_zone_last_checked) * 1000);
+ */
+kmutex_t zfs_disk_lock; /* protects the following: */
+uint_t zfs_disk_rcnt; /* Number of outstanding IOs */
+hrtime_t zfs_disk_rtime = 0; /* cummulative sum of time performing IO */
+hrtime_t zfs_disk_rlastupdate = 0; /* time last IO dispatched */
+
+hrtime_t zfs_disk_last_rtime = 0; /* prev. cycle's zfs_disk_rtime val */
+/* time that we last updated per-zone throttle info */
+hrtime_t zfs_zone_last_checked = 0;
+hrtime_t zfs_disk_last_laggard = 0;
+
+/*
+ * Data used to keep track of how often txg sync is running.
+ */
+extern int zfs_txg_timeout;
+static uint_t txg_last_check;
+static uint_t txg_cnt;
+static uint_t txg_sync_rate;
+
+boolean_t zfs_zone_schedule_enable = B_TRUE; /* enable IO sched. */
+/*
+ * Threshold for when zio scheduling should kick in.
+ *
+ * This threshold is based on the zfs_vdev_sync_read_max_active value for the
+ * number of I/Os that can be pending on a device. If there are more than the
+ * max_active ops already queued up, beyond those already issued to the vdev,
+ * then use zone-based scheduling to get the next synchronous zio.
+ */
+uint32_t zfs_zone_schedule_thresh = 10;
+
+/*
+ * On each pass of the scheduler we increment the zone's weight (up to this
+ * maximum). The weight is used by the scheduler to prevent starvation so
+ * that zones which haven't been able to do any IO over many iterations
+ * will max out thier weight to this value.
+ */
+#define SCHED_WEIGHT_MAX 20
+
+/*
+ * Tunables for delay throttling when TXG sync is occurring.
+ *
+ * If the zone is performing a write and we're doing above normal TXG syncing,
+ * then throttle for longer than normal. The zone's wait time is multiplied
+ * by the scale (zfs_zone_txg_throttle_scale).
+ */
+int zfs_zone_txg_throttle_scale = 2;
+hrtime_t zfs_zone_txg_delay_nsec = MSEC2NSEC(20);
+
+typedef struct {
+ int zq_qdepth;
+ zio_priority_t zq_queue;
+ int zq_priority;
+ int zq_wt;
+ zoneid_t zq_zoneid;
+} zone_q_bump_t;
+
+/*
+ * This uses gethrtime() but returns a value in usecs.
+ */
+#define GET_USEC_TIME (gethrtime() / 1000)
+#define NANO_TO_MICRO(x) (x / (NANOSEC / MICROSEC))
+
+/*
+ * Keep track of the zone's ZFS IOPs.
+ *
+ * See the comment on the zfs_zone_io_throttle function for which/how IOPs are
+ * accounted for.
+ *
+ * If the number of ops is >1 then we can just use that value. However,
+ * if the number of ops is <2 then we might have a zone which is trying to do
+ * IO but is not able to get any ops through the system. We don't want to lose
+ * track of this zone so we factor in its decayed count into the current count.
+ *
+ * Each cycle (zfs_zone_sys_avg_cycle) we want to update the decayed count.
+ * However, since this calculation is driven by IO activity and since IO does
+ * not happen at fixed intervals, we use a timestamp to see when the last update
+ * was made. If it was more than one cycle ago, then we need to decay the
+ * historical count by the proper number of additional cycles in which no IO was
+ * performed.
+ *
+ * Return a time delta indicating how far into the current cycle we are or 0
+ * if the last IO was more than a cycle ago.
+ */
+static hrtime_t
+compute_historical_zone_cnt(hrtime_t unow, sys_zio_cntr_t *cp)
+{
+ hrtime_t delta;
+ int gen_cnt;
+
+ /*
+ * Check if its time to recompute a new zone count.
+ * If we're still collecting data for the current cycle, return false.
+ */
+ delta = unow - cp->cycle_start;
+ if (delta < zfs_zone_cycle_time)
+ return (delta);
+
+ /* A previous cycle is past, compute the new zone count. */
+
+ /*
+ * Figure out how many generations we have to decay the historical
+ * count, since multiple cycles may have elapsed since our last IO.
+ * We depend on int rounding here.
+ */
+ gen_cnt = (int)(delta / zfs_zone_cycle_time);
+
+ /* If more than 5 cycles since last the IO, reset count. */
+ if (gen_cnt > 5) {
+ cp->zone_avg_cnt = 0;
+ } else {
+ /* Update the count. */
+ int i;
+
+ /*
+ * If the zone did more than 1 IO, just use its current count
+ * as the historical value, otherwise decay the historical
+ * count and factor that into the new historical count. We
+ * pick a threshold > 1 so that we don't lose track of IO due
+ * to int rounding.
+ */
+ if (cp->cycle_cnt > 1)
+ cp->zone_avg_cnt = cp->cycle_cnt;
+ else
+ cp->zone_avg_cnt = cp->cycle_cnt +
+ (cp->zone_avg_cnt / 2);
+
+ /*
+ * If more than one generation has elapsed since the last
+ * update, decay the values further.
+ */
+ for (i = 1; i < gen_cnt; i++)
+ cp->zone_avg_cnt = cp->zone_avg_cnt / 2;
+ }
+
+ /* A new cycle begins. */
+ cp->cycle_start = unow;
+ cp->cycle_cnt = 0;
+
+ return (0);
+}
+
+/*
+ * Add IO op data to the zone.
+ */
+static void
+add_zone_iop(zone_t *zonep, hrtime_t unow, zfs_zone_iop_type_t op)
+{
+ switch (op) {
+ case ZFS_ZONE_IOP_READ:
+ (void) compute_historical_zone_cnt(unow, &zonep->zone_rd_ops);
+ zonep->zone_rd_ops.cycle_cnt++;
+ break;
+ case ZFS_ZONE_IOP_WRITE:
+ (void) compute_historical_zone_cnt(unow, &zonep->zone_wr_ops);
+ zonep->zone_wr_ops.cycle_cnt++;
+ break;
+ case ZFS_ZONE_IOP_LOGICAL_WRITE:
+ (void) compute_historical_zone_cnt(unow, &zonep->zone_lwr_ops);
+ zonep->zone_lwr_ops.cycle_cnt++;
+ break;
+ }
+}
+
+/*
+ * Use a decaying average to keep track of the overall system latency.
+ *
+ * We want to have the recent activity heavily weighted, but if the
+ * activity decreases or stops, then the average should quickly decay
+ * down to the new value.
+ *
+ * Each cycle (zfs_zone_sys_avg_cycle) we want to update the decayed average.
+ * However, since this calculation is driven by IO activity and since IO does
+ * not happen at fixed intervals, we use a timestamp to see when the last
+ * update was made. If it was more than one cycle ago, then we need to decay
+ * the average by the proper number of additional cycles in which no IO was
+ * performed.
+ *
+ * Return true if we actually computed a new system average.
+ * If we're still within an active cycle there is nothing to do, return false.
+ */
+static boolean_t
+compute_new_sys_avg(hrtime_t unow, sys_lat_cycle_t *cp)
+{
+ hrtime_t delta;
+ int gen_cnt;
+
+ /*
+ * Check if its time to recompute a new average.
+ * If we're still collecting data for the current cycle, return false.
+ */
+ delta = unow - cp->cycle_start;
+ if (delta < zfs_zone_sys_avg_cycle)
+ return (B_FALSE);
+
+ /* A previous cycle is past, compute a new system average. */
+
+ /*
+ * Figure out how many generations we have to decay, since multiple
+ * cycles may have elapsed since our last IO.
+ * We count on int rounding here.
+ */
+ gen_cnt = (int)(delta / zfs_zone_sys_avg_cycle);
+
+ /* If more than 5 cycles since last the IO, reset average. */
+ if (gen_cnt > 5) {
+ cp->sys_avg_lat = 0;
+ } else {
+ /* Update the average. */
+ int i;
+
+ cp->sys_avg_lat =
+ (cp->sys_avg_lat + cp->cycle_lat) / (1 + cp->cycle_cnt);
+
+ /*
+ * If more than one generation has elapsed since the last
+ * update, decay the values further.
+ */
+ for (i = 1; i < gen_cnt; i++)
+ cp->sys_avg_lat = cp->sys_avg_lat / 2;
+ }
+
+ /* A new cycle begins. */
+ cp->cycle_start = unow;
+ cp->cycle_cnt = 0;
+ cp->cycle_lat = 0;
+
+ return (B_TRUE);
+}
+
+static void
+add_sys_iop(hrtime_t unow, int op, int lat)
+{
+ switch (op) {
+ case ZFS_ZONE_IOP_READ:
+ (void) compute_new_sys_avg(unow, &rd_lat);
+ rd_lat.cycle_cnt++;
+ rd_lat.cycle_lat += lat;
+ break;
+ case ZFS_ZONE_IOP_WRITE:
+ (void) compute_new_sys_avg(unow, &wr_lat);
+ wr_lat.cycle_cnt++;
+ wr_lat.cycle_lat += lat;
+ break;
+ }
+}
+
+/*
+ * Get the zone IO counts.
+ */
+static uint_t
+calc_zone_cnt(hrtime_t unow, sys_zio_cntr_t *cp)
+{
+ hrtime_t delta;
+ uint_t cnt;
+
+ if ((delta = compute_historical_zone_cnt(unow, cp)) == 0) {
+ /*
+ * No activity in the current cycle, we already have the
+ * historical data so we'll use that.
+ */
+ cnt = cp->zone_avg_cnt;
+ } else {
+ /*
+ * If we're less than half way through the cycle then use
+ * the current count plus half the historical count, otherwise
+ * just use the current count.
+ */
+ if (delta < (zfs_zone_cycle_time / 2))
+ cnt = cp->cycle_cnt + (cp->zone_avg_cnt / 2);
+ else
+ cnt = cp->cycle_cnt;
+ }
+
+ return (cnt);
+}
+
+/*
+ * Get the average read/write latency in usecs for the system.
+ */
+static uint_t
+calc_avg_lat(hrtime_t unow, sys_lat_cycle_t *cp)
+{
+ if (compute_new_sys_avg(unow, cp)) {
+ /*
+ * No activity in the current cycle, we already have the
+ * historical data so we'll use that.
+ */
+ return (cp->sys_avg_lat);
+ } else {
+ /*
+ * We're within a cycle; weight the current activity higher
+ * compared to the historical data and use that.
+ */
+ DTRACE_PROBE3(zfs__zone__calc__wt__avg,
+ uintptr_t, cp->sys_avg_lat,
+ uintptr_t, cp->cycle_lat,
+ uintptr_t, cp->cycle_cnt);
+
+ return ((cp->sys_avg_lat + (cp->cycle_lat * 8)) /
+ (1 + (cp->cycle_cnt * 8)));
+ }
+}
+
+/*
+ * Account for the current IOP on the zone and for the system as a whole.
+ * The latency parameter is in usecs.
+ */
+static void
+add_iop(zone_t *zonep, hrtime_t unow, zfs_zone_iop_type_t op, hrtime_t lat)
+{
+ /* Add op to zone */
+ add_zone_iop(zonep, unow, op);
+
+ /* Track system latency */
+ if (op != ZFS_ZONE_IOP_LOGICAL_WRITE)
+ add_sys_iop(unow, op, lat);
+}
+
+/*
+ * Calculate and return the total number of read ops, write ops and logical
+ * write ops for the given zone. If the zone has issued operations of any type
+ * return a non-zero value, otherwise return 0.
+ */
+static int
+get_zone_io_cnt(hrtime_t unow, zone_t *zonep, uint_t *rops, uint_t *wops,
+ uint_t *lwops)
+{
+ *rops = calc_zone_cnt(unow, &zonep->zone_rd_ops);
+ *wops = calc_zone_cnt(unow, &zonep->zone_wr_ops);
+ *lwops = calc_zone_cnt(unow, &zonep->zone_lwr_ops);
+
+ DTRACE_PROBE4(zfs__zone__io__cnt, uintptr_t, zonep->zone_id,
+ uintptr_t, *rops, uintptr_t, *wops, uintptr_t, *lwops);
+
+ return (*rops | *wops | *lwops);
+}
+
+/*
+ * Get the average read/write latency in usecs for the system.
+ */
+static void
+get_sys_avg_lat(hrtime_t unow, uint_t *rlat, uint_t *wlat)
+{
+ *rlat = calc_avg_lat(unow, &rd_lat);
+ *wlat = calc_avg_lat(unow, &wr_lat);
+
+ /*
+ * In an attempt to improve the accuracy of the throttling algorithm,
+ * assume that IO operations can't have zero latency. Instead, assume
+ * a reasonable lower bound for each operation type. If the actual
+ * observed latencies are non-zero, use those latency values instead.
+ */
+ if (*rlat == 0)
+ *rlat = 1000;
+ if (*wlat == 0)
+ *wlat = 1000;
+
+ DTRACE_PROBE2(zfs__zone__sys__avg__lat, uintptr_t, *rlat,
+ uintptr_t, *wlat);
+}
+
+/*
+ * Find disk utilization for each zone and average utilization for all active
+ * zones.
+ */
+static int
+zfs_zone_wait_adjust_calculate_cb(zone_t *zonep, void *arg)
+{
+ zoneio_stats_t *sp = arg;
+ uint_t rops, wops, lwops;
+
+ if (zonep->zone_id == GLOBAL_ZONEID ||
+ get_zone_io_cnt(sp->zi_now, zonep, &rops, &wops, &lwops) == 0) {
+ zonep->zone_io_util = 0;
+ return (0);
+ }
+
+ zonep->zone_io_util = (rops * sp->zi_avgrlat) +
+ (wops * sp->zi_avgwlat) + (lwops * sp->zi_avgwlat);
+ sp->zi_totutil += zonep->zone_io_util;
+
+ if (zonep->zone_io_util > 0) {
+ sp->zi_active++;
+ sp->zi_totpri += zonep->zone_zfs_io_pri;
+ }
+
+ /*
+ * sdt:::zfs-zone-utilization
+ *
+ * arg0: zone ID
+ * arg1: read operations observed during time window
+ * arg2: physical write operations observed during time window
+ * arg3: logical write ops observed during time window
+ * arg4: calculated utilization given read and write ops
+ * arg5: I/O priority assigned to this zone
+ */
+ DTRACE_PROBE6(zfs__zone__utilization, uint_t, zonep->zone_id,
+ uint_t, rops, uint_t, wops, uint_t, lwops,
+ uint_t, zonep->zone_io_util, uint_t, zonep->zone_zfs_io_pri);
+
+ return (0);
+}
+
+static void
+zfs_zone_delay_inc(zone_t *zonep)
+{
+ if (zonep->zone_io_delay < zfs_zone_delay_ceiling)
+ zonep->zone_io_delay += zfs_zone_delay_step;
+}
+
+static void
+zfs_zone_delay_dec(zone_t *zonep)
+{
+ if (zonep->zone_io_delay > 0)
+ zonep->zone_io_delay -= zfs_zone_delay_step;
+}
+
+/*
+ * For all zones "far enough" away from the average utilization, increase that
+ * zones delay. Otherwise, reduce its delay.
+ */
+static int
+zfs_zone_wait_adjust_delay_cb(zone_t *zonep, void *arg)
+{
+ zoneio_stats_t *sp = arg;
+ uint16_t delay = zonep->zone_io_delay;
+ uint_t fairutil = 0;
+
+ zonep->zone_io_util_above_avg = B_FALSE;
+
+ /*
+ * Given the calculated total utilitzation for all zones, calculate the
+ * fair share of I/O for this zone.
+ */
+ if (zfs_zone_priority_enable && sp->zi_totpri > 0) {
+ fairutil = (sp->zi_totutil * zonep->zone_zfs_io_pri) /
+ sp->zi_totpri;
+ } else if (sp->zi_active > 0) {
+ fairutil = sp->zi_totutil / sp->zi_active;
+ }
+
+ /*
+ * Adjust each IO's delay. If the overall delay becomes too high, avoid
+ * increasing beyond the ceiling value.
+ */
+ if (zonep->zone_io_util > fairutil && sp->zi_overutil) {
+ zonep->zone_io_util_above_avg = B_TRUE;
+
+ if (sp->zi_active > 1)
+ zfs_zone_delay_inc(zonep);
+ } else if (zonep->zone_io_util < fairutil || sp->zi_underutil ||
+ sp->zi_active <= 1) {
+ zfs_zone_delay_dec(zonep);
+ }
+
+ /*
+ * sdt:::zfs-zone-throttle
+ *
+ * arg0: zone ID
+ * arg1: old delay for this zone
+ * arg2: new delay for this zone
+ * arg3: calculated fair I/O utilization
+ * arg4: actual I/O utilization
+ */
+ DTRACE_PROBE5(zfs__zone__throttle, uintptr_t, zonep->zone_id,
+ uintptr_t, delay, uintptr_t, zonep->zone_io_delay,
+ uintptr_t, fairutil, uintptr_t, zonep->zone_io_util);
+
+ return (0);
+}
+
+/*
+ * Examine the utilization between different zones, and adjust the delay for
+ * each zone appropriately.
+ */
+static void
+zfs_zone_wait_adjust(hrtime_t unow, hrtime_t last_checked)
+{
+ zoneio_stats_t stats;
+ hrtime_t laggard_udelta = 0;
+
+ (void) bzero(&stats, sizeof (stats));
+
+ stats.zi_now = unow;
+ get_sys_avg_lat(unow, &stats.zi_avgrlat, &stats.zi_avgwlat);
+
+ if (stats.zi_avgrlat > stats.zi_avgwlat * zfs_zone_rw_lat_limit)
+ stats.zi_avgrlat = stats.zi_avgwlat * zfs_zone_rw_lat_limit;
+ else if (stats.zi_avgrlat * zfs_zone_rw_lat_limit < stats.zi_avgwlat)
+ stats.zi_avgwlat = stats.zi_avgrlat * zfs_zone_rw_lat_limit;
+
+ if (zone_walk(zfs_zone_wait_adjust_calculate_cb, &stats) != 0)
+ return;
+
+ /*
+ * Calculate disk utilization for the most recent period.
+ */
+ if (zfs_disk_last_rtime == 0 || unow - last_checked <= 0) {
+ stats.zi_diskutil = 0;
+ } else {
+ stats.zi_diskutil =
+ ((zfs_disk_rtime - zfs_disk_last_rtime) * 100) /
+ ((unow - last_checked) * 1000);
+ }
+ zfs_disk_last_rtime = zfs_disk_rtime;
+
+ if (unow > zfs_disk_last_laggard)
+ laggard_udelta = unow - zfs_disk_last_laggard;
+
+ /*
+ * To minimize porpoising, we have three separate states for our
+ * assessment of I/O performance: overutilized, underutilized, and
+ * neither overutilized nor underutilized. We will increment the
+ * throttle if a zone is using more than its fair share _and_ I/O
+ * is overutilized; we will decrement the throttle if a zone is using
+ * less than its fair share _or_ I/O is underutilized.
+ */
+ stats.zi_underutil = stats.zi_diskutil < zfs_zone_underutil_threshold ||
+ laggard_udelta > zfs_zone_laggard_ancient;
+
+ stats.zi_overutil = stats.zi_diskutil > zfs_zone_util_threshold &&
+ laggard_udelta < zfs_zone_laggard_recent;
+
+ /*
+ * sdt:::zfs-zone-stats
+ *
+ * Statistics observed over the last period:
+ *
+ * arg0: average system read latency
+ * arg1: average system write latency
+ * arg2: number of active zones
+ * arg3: total I/O 'utilization' for all zones
+ * arg4: total I/O priority of all active zones
+ * arg5: calculated disk utilization
+ */
+ DTRACE_PROBE6(zfs__zone__stats, uintptr_t, stats.zi_avgrlat,
+ uintptr_t, stats.zi_avgwlat, uintptr_t, stats.zi_active,
+ uintptr_t, stats.zi_totutil, uintptr_t, stats.zi_totpri,
+ uintptr_t, stats.zi_diskutil);
+
+ (void) zone_walk(zfs_zone_wait_adjust_delay_cb, &stats);
+}
+
+/*
+ * Callback used to calculate a zone's IO schedule priority.
+ *
+ * We scan the zones looking for ones with ops in the queue. Out of those,
+ * we pick the one that calculates to the highest schedule priority.
+ */
+static int
+get_sched_pri_cb(zone_t *zonep, void *arg)
+{
+ int pri;
+ uint_t cnt;
+ zone_q_bump_t *qbp = arg;
+ zio_priority_t p = qbp->zq_queue;
+
+ cnt = zonep->zone_zfs_queued[p];
+ if (cnt == 0) {
+ zonep->zone_zfs_weight = 0;
+ return (0);
+ }
+
+ /*
+ * On each pass, increment the zone's weight. We use this as input
+ * to the calculation to prevent starvation. The value is reset
+ * each time we issue an IO for this zone so zones which haven't
+ * done any IO over several iterations will see their weight max
+ * out.
+ */
+ if (zonep->zone_zfs_weight < SCHED_WEIGHT_MAX)
+ zonep->zone_zfs_weight++;
+
+ /*
+ * This zone's IO priority is the inverse of the number of IOs
+ * the zone has enqueued * zone's configured priority * weight.
+ * The queue depth has already been scaled by 10 to avoid problems
+ * with int rounding.
+ *
+ * This means that zones with fewer IOs in the queue will get
+ * preference unless other zone's assigned priority pulls them
+ * ahead. The weight is factored in to help ensure that zones
+ * which haven't done IO in a while aren't getting starved.
+ */
+ pri = (qbp->zq_qdepth / cnt) *
+ zonep->zone_zfs_io_pri * zonep->zone_zfs_weight;
+
+ /*
+ * If this zone has a higher priority than what we found so far,
+ * it becomes the new leading contender.
+ */
+ if (pri > qbp->zq_priority) {
+ qbp->zq_zoneid = zonep->zone_id;
+ qbp->zq_priority = pri;
+ qbp->zq_wt = zonep->zone_zfs_weight;
+ }
+ return (0);
+}
+
+/*
+ * See if we need to bump a zone's zio to the head of the queue. This is only
+ * done on the two synchronous I/O queues (see the block comment on the
+ * zfs_zone_schedule function). We get the correct vdev_queue_class_t and
+ * queue depth from our caller.
+ *
+ * For single-threaded synchronous processes a zone cannot get more than
+ * 1 op into the queue at a time unless the zone is running multiple processes
+ * in parallel. This can cause an imbalance in performance if there are zones
+ * with many parallel processes (and ops in the queue) vs. other zones which
+ * are doing simple single-threaded processes, such as interactive tasks in the
+ * shell. These zones can get backed up behind a deep queue and their IO
+ * performance will appear to be very poor as a result. This can make the
+ * zone work badly for interactive behavior.
+ *
+ * The scheduling algorithm kicks in once we start to get a deeper queue.
+ * Once that occurs, we look at all of the zones to see which one calculates
+ * to the highest priority. We bump that zone's first zio to the head of the
+ * queue.
+ *
+ * We use a counter on the zone so that we can quickly find how many ops each
+ * zone has in the queue without having to search the entire queue itself.
+ * This scales better since the number of zones is expected to be on the
+ * order of 10-100 whereas the queue depth can be in the range of 50-2000.
+ * In addition, since the zio's in the queue only have the zoneid, we would
+ * have to look up the zone for each zio enqueued and that means the overhead
+ * for scanning the queue each time would be much higher.
+ *
+ * In all cases, we fall back to simply pulling the next op off the queue
+ * if something should go wrong.
+ */
+static zio_t *
+get_next_zio(vdev_queue_class_t *vqc, int qdepth, zio_priority_t p,
+ avl_tree_t *tree)
+{
+ zone_q_bump_t qbump;
+ zio_t *zp = NULL, *zphead;
+ int cnt = 0;
+
+ /* To avoid problems with int rounding, scale the queue depth by 10 */
+ qbump.zq_qdepth = qdepth * 10;
+ qbump.zq_priority = 0;
+ qbump.zq_zoneid = 0;
+ qbump.zq_queue = p;
+ (void) zone_walk(get_sched_pri_cb, &qbump);
+
+ zphead = avl_first(tree);
+
+ /* Check if the scheduler didn't pick a zone for some reason!? */
+ if (qbump.zq_zoneid != 0) {
+ for (zp = avl_first(tree); zp != NULL;
+ zp = avl_walk(tree, zp, AVL_AFTER)) {
+ if (zp->io_zoneid == qbump.zq_zoneid)
+ break;
+ cnt++;
+ }
+ }
+
+ if (zp == NULL) {
+ zp = zphead;
+ } else if (zp != zphead) {
+ /*
+ * Only fire the probe if we actually picked a different zio
+ * than the one already at the head of the queue.
+ */
+ DTRACE_PROBE4(zfs__zone__sched__bump, uint_t, zp->io_zoneid,
+ uint_t, cnt, int, qbump.zq_priority, int, qbump.zq_wt);
+ }
+
+ return (zp);
+}
+
+/*
+ * Add our zone ID to the zio so we can keep track of which zones are doing
+ * what, even when the current thread processing the zio is not associated
+ * with the zone (e.g. the kernel taskq which pushes out TX groups).
+ */
+void
+zfs_zone_zio_init(zio_t *zp)
+{
+ zone_t *zonep = curzone;
+
+ zp->io_zoneid = zonep->zone_id;
+}
+
+/*
+ * Track and throttle IO operations per zone. Called from:
+ * - dmu_tx_count_write for (logical) write ops (both dataset and zvol writes
+ * go through this path)
+ * - arc_read for read ops that miss the ARC (both dataset and zvol)
+ * For each operation, increment that zone's counter based on the type of
+ * operation, then delay the operation, if necessary.
+ *
+ * There are three basic ways that we can see write ops:
+ * 1) An application does write syscalls. Those ops go into a TXG which
+ * we'll count here. Sometime later a kernel taskq thread (we'll see the
+ * vdev IO as zone 0) will perform some number of physical writes to commit
+ * the TXG to disk. Those writes are not associated with the zone which
+ * made the write syscalls and the number of operations is not correlated
+ * between the taskq and the zone. We only see logical writes in this
+ * function, we see the physcial writes in the zfs_zone_zio_start and
+ * zfs_zone_zio_done functions.
+ * 2) An application opens a file with O_SYNC. Each write will result in
+ * an operation which we'll see here plus a low-level vdev write from
+ * that zone.
+ * 3) An application does write syscalls followed by an fsync(). We'll
+ * count the writes going into a TXG here. We'll also see some number
+ * (usually much smaller, maybe only 1) of low-level vdev writes from this
+ * zone when the fsync is performed, plus some other low-level vdev writes
+ * from the taskq in zone 0 (are these metadata writes?).
+ *
+ * 4) In addition to the above, there are misc. system-level writes, such as
+ * writing out dirty pages to swap, or sync(2) calls, which will be handled
+ * by the global zone and which we count but don't generally worry about.
+ *
+ * Because of the above, we can see writes twice; first because this function
+ * is always called by a zone thread for logical writes, but then we also will
+ * count the physical writes that are performed at a low level via
+ * zfs_zone_zio_start. Without this, it can look like a non-global zone never
+ * writes (case 1). Depending on when the TXG is synced, the counts may be in
+ * the same sample bucket or in a different one.
+ *
+ * Tracking read operations is simpler due to their synchronous semantics. The
+ * zfs_read function -- called as a result of a read(2) syscall -- will always
+ * retrieve the data to be read through arc_read and we only come into this
+ * function when we have an arc miss.
+ */
+void
+zfs_zone_io_throttle(zfs_zone_iop_type_t type)
+{
+ zone_t *zonep = curzone;
+ hrtime_t unow, last_checked;
+ uint16_t wait;
+
+ unow = GET_USEC_TIME;
+
+ /*
+ * Only bump the counter for logical writes here. The counters for
+ * tracking physical IO operations are handled in zfs_zone_zio_done.
+ */
+ if (type == ZFS_ZONE_IOP_LOGICAL_WRITE) {
+ mutex_enter(&zonep->zone_stg_io_lock);
+ add_iop(zonep, unow, type, 0);
+ mutex_exit(&zonep->zone_stg_io_lock);
+ }
+
+ if (!zfs_zone_delay_enable)
+ return;
+
+ /*
+ * If the zone's I/O priority is set to zero, don't throttle that zone's
+ * operations at all.
+ */
+ if (zonep->zone_zfs_io_pri == 0)
+ return;
+
+ /*
+ * XXX There's a potential race here in that more than one thread may
+ * update the zone delays concurrently. The worst outcome is corruption
+ * of our data to track each zone's IO, so the algorithm may make
+ * incorrect throttling decisions until the data is refreshed.
+ */
+ last_checked = zfs_zone_last_checked;
+ if ((unow - last_checked) > zfs_zone_adjust_time) {
+ zfs_zone_last_checked = unow;
+ zfs_zone_wait_adjust(unow, last_checked);
+ }
+
+ if ((wait = zonep->zone_io_delay) > 0) {
+ /*
+ * If this is a write and we're doing above normal TXG
+ * syncing, then throttle for longer than normal.
+ */
+ if (type == ZFS_ZONE_IOP_LOGICAL_WRITE &&
+ (txg_cnt > 1 || txg_sync_rate > 1))
+ wait *= zfs_zone_txg_throttle_scale;
+
+ /*
+ * sdt:::zfs-zone-wait
+ *
+ * arg0: zone ID
+ * arg1: type of IO operation
+ * arg2: time to delay (in us)
+ */
+ DTRACE_PROBE3(zfs__zone__wait, uintptr_t, zonep->zone_id,
+ uintptr_t, type, uintptr_t, wait);
+
+ drv_usecwait(wait);
+
+ if (zonep->zone_vfs_stats != NULL) {
+ atomic_inc_64(&zonep->zone_vfs_stats->
+ zv_delay_cnt.value.ui64);
+ atomic_add_64(&zonep->zone_vfs_stats->
+ zv_delay_time.value.ui64, wait);
+ }
+ }
+}
+
+/*
+ * XXX Ignore the pool pointer parameter for now.
+ *
+ * Keep track to see if the TXG sync rate is running above the expected rate.
+ * If so, this implies that we are filling TXG's at a high rate due to a heavy
+ * write workload. We use this as input into the zone throttle.
+ *
+ * This function is called every 5 seconds (zfs_txg_timeout) under a normal
+ * write load. In this case, the sync rate is going to be 1. When there
+ * is a heavy write load, TXG's fill up fast and the sync thread will write
+ * the TXG more frequently (perhaps once a second). In this case the rate
+ * will be > 1. The sync rate is a lagging indicator since it can be up
+ * to 5 seconds old. We use the txg_cnt to keep track of the rate in the
+ * current 5 second interval and txg_sync_rate to keep track of the previous
+ * 5 second interval. In that way we don't have a period (1 or more seconds)
+ * where the txg_cnt == 0 and we cut back on throttling even though the rate
+ * is still high.
+ */
+/*ARGSUSED*/
+void
+zfs_zone_report_txg_sync(void *dp)
+{
+ uint_t now;
+
+ txg_cnt++;
+ now = (uint_t)(gethrtime() / NANOSEC);
+ if ((now - txg_last_check) >= zfs_txg_timeout) {
+ txg_sync_rate = txg_cnt / 2;
+ txg_cnt = 0;
+ txg_last_check = now;
+ }
+}
+
+hrtime_t
+zfs_zone_txg_delay()
+{
+ if (curzone->zone_io_util_above_avg)
+ return (zfs_zone_txg_delay_nsec);
+
+ return (MSEC2NSEC(10));
+}
+
+/*
+ * Called from vdev_disk_io_start when an IO hits the end of the zio pipeline
+ * and is issued.
+ * Keep track of start time for latency calculation in zfs_zone_zio_done.
+ */
+void
+zfs_zone_zio_start(zio_t *zp)
+{
+ zone_t *zonep;
+
+ /*
+ * I/Os of type ZIO_TYPE_IOCTL are used to flush the disk cache, not for
+ * an actual I/O operation. Ignore those operations as they relate to
+ * throttling and scheduling.
+ */
+ if (zp->io_type == ZIO_TYPE_IOCTL)
+ return;
+
+ if ((zonep = zone_find_by_id(zp->io_zoneid)) == NULL)
+ return;
+
+ mutex_enter(&zonep->zone_zfs_lock);
+ if (zp->io_type == ZIO_TYPE_READ)
+ kstat_runq_enter(&zonep->zone_zfs_rwstats);
+ zonep->zone_zfs_weight = 0;
+ mutex_exit(&zonep->zone_zfs_lock);
+
+ mutex_enter(&zfs_disk_lock);
+ zp->io_dispatched = gethrtime();
+
+ if (zfs_disk_rcnt++ != 0)
+ zfs_disk_rtime += (zp->io_dispatched - zfs_disk_rlastupdate);
+ zfs_disk_rlastupdate = zp->io_dispatched;
+ mutex_exit(&zfs_disk_lock);
+
+ zone_rele(zonep);
+}
+
+/*
+ * Called from vdev_disk_io_done when an IO completes.
+ * Increment our counter for zone ops.
+ * Calculate the IO latency avg. for this zone.
+ */
+void
+zfs_zone_zio_done(zio_t *zp)
+{
+ zone_t *zonep;
+ hrtime_t now, unow, udelta;
+
+ if (zp->io_type == ZIO_TYPE_IOCTL)
+ return;
+
+ if (zp->io_dispatched == 0)
+ return;
+
+ if ((zonep = zone_find_by_id(zp->io_zoneid)) == NULL)
+ return;
+
+ now = gethrtime();
+ unow = NANO_TO_MICRO(now);
+ udelta = unow - NANO_TO_MICRO(zp->io_dispatched);
+
+ mutex_enter(&zonep->zone_zfs_lock);
+
+ /*
+ * To calculate the wsvc_t average, keep a cumulative sum of all the
+ * wait time before each I/O was dispatched. Since most writes are
+ * asynchronous, only track the wait time for read I/Os.
+ */
+ if (zp->io_type == ZIO_TYPE_READ) {
+ zonep->zone_zfs_rwstats.reads++;
+ zonep->zone_zfs_rwstats.nread += zp->io_size;
+
+ zonep->zone_zfs_stats->zz_waittime.value.ui64 +=
+ zp->io_dispatched - zp->io_timestamp;
+
+ kstat_runq_exit(&zonep->zone_zfs_rwstats);
+ } else {
+ zonep->zone_zfs_rwstats.writes++;
+ zonep->zone_zfs_rwstats.nwritten += zp->io_size;
+ }
+
+ mutex_exit(&zonep->zone_zfs_lock);
+
+ mutex_enter(&zfs_disk_lock);
+ zfs_disk_rcnt--;
+ zfs_disk_rtime += (now - zfs_disk_rlastupdate);
+ zfs_disk_rlastupdate = now;
+
+ if (udelta > zfs_zone_laggard_threshold)
+ zfs_disk_last_laggard = unow;
+
+ mutex_exit(&zfs_disk_lock);
+
+ if (zfs_zone_delay_enable) {
+ mutex_enter(&zonep->zone_stg_io_lock);
+ add_iop(zonep, unow, zp->io_type == ZIO_TYPE_READ ?
+ ZFS_ZONE_IOP_READ : ZFS_ZONE_IOP_WRITE, udelta);
+ mutex_exit(&zonep->zone_stg_io_lock);
+ }
+
+ zone_rele(zonep);
+
+ /*
+ * sdt:::zfs-zone-latency
+ *
+ * arg0: zone ID
+ * arg1: type of I/O operation
+ * arg2: I/O latency (in us)
+ */
+ DTRACE_PROBE3(zfs__zone__latency, uintptr_t, zp->io_zoneid,
+ uintptr_t, zp->io_type, uintptr_t, udelta);
+}
+
+void
+zfs_zone_zio_dequeue(zio_t *zp)
+{
+ zio_priority_t p;
+ zone_t *zonep;
+
+ p = zp->io_priority;
+ if (p != ZIO_PRIORITY_SYNC_READ && p != ZIO_PRIORITY_SYNC_WRITE)
+ return;
+
+ /* We depend on p being defined as either 0 or 1 */
+ ASSERT(p < 2);
+
+ if ((zonep = zone_find_by_id(zp->io_zoneid)) == NULL)
+ return;
+
+ mutex_enter(&zonep->zone_stg_io_lock);
+ ASSERT(zonep->zone_zfs_queued[p] > 0);
+ if (zonep->zone_zfs_queued[p] == 0)
+ cmn_err(CE_WARN, "zfs_zone_zio_dequeue: count==0");
+ else
+ zonep->zone_zfs_queued[p]--;
+ mutex_exit(&zonep->zone_stg_io_lock);
+ zone_rele(zonep);
+}
+
+void
+zfs_zone_zio_enqueue(zio_t *zp)
+{
+ zio_priority_t p;
+ zone_t *zonep;
+
+ p = zp->io_priority;
+ if (p != ZIO_PRIORITY_SYNC_READ && p != ZIO_PRIORITY_SYNC_WRITE)
+ return;
+
+ /* We depend on p being defined as either 0 or 1 */
+ ASSERT(p < 2);
+
+ if ((zonep = zone_find_by_id(zp->io_zoneid)) == NULL)
+ return;
+
+ mutex_enter(&zonep->zone_stg_io_lock);
+ zonep->zone_zfs_queued[p]++;
+ mutex_exit(&zonep->zone_stg_io_lock);
+ zone_rele(zonep);
+}
+
+/*
+ * Called from vdev_queue_io_to_issue. That function is where zio's are listed
+ * in FIFO order on one of the sync queues, then pulled off (by
+ * vdev_queue_io_remove) and issued. We potentially do zone-based scheduling
+ * here to find a zone's zio deeper in the sync queue and issue that instead
+ * of simply doing FIFO.
+ *
+ * We only do zone-based zio scheduling for the two synchronous I/O queues
+ * (read & write). These queues are normally serviced in FIFO order but we
+ * may decide to move a zone's zio to the head of the line. A typical I/O
+ * load will be mostly synchronous reads and some asynchronous writes (which
+ * are scheduled differently due to transaction groups). There will also be
+ * some synchronous writes for those apps which want to ensure their data is on
+ * disk. We want to make sure that a zone with a single-threaded app (e.g. the
+ * shell) that is doing synchronous I/O (typically reads) isn't penalized by
+ * other zones which are doing lots of synchronous I/O because they have many
+ * running threads.
+ *
+ * The vq->vq_lock mutex is held when we're executing this function so we
+ * can safely access the "last zone" variable on the queue.
+ */
+zio_t *
+zfs_zone_schedule(vdev_queue_t *vq, zio_priority_t p, avl_index_t idx,
+ avl_tree_t *tree)
+{
+ vdev_queue_class_t *vqc = &vq->vq_class[p];
+ uint_t cnt;
+ zoneid_t last_zone;
+ zio_t *zio;
+
+ ASSERT(MUTEX_HELD(&vq->vq_lock));
+
+ /* Don't change the order on the LBA ordered queues. */
+ if (p != ZIO_PRIORITY_SYNC_READ && p != ZIO_PRIORITY_SYNC_WRITE)
+ return (avl_nearest(tree, idx, AVL_AFTER));
+
+ /* We depend on p being defined as either 0 or 1 */
+ ASSERT(p < 2);
+
+ cnt = avl_numnodes(tree);
+ last_zone = vq->vq_last_zone_id;
+
+ /*
+ * If there are only a few zios in the queue then just issue the head.
+ * If there are more than a few zios already queued up, then use
+ * scheduling to get the next zio.
+ */
+ if (!zfs_zone_schedule_enable || cnt < zfs_zone_schedule_thresh)
+ zio = avl_nearest(tree, idx, AVL_AFTER);
+ else
+ zio = get_next_zio(vqc, cnt, p, tree);
+
+ vq->vq_last_zone_id = zio->io_zoneid;
+
+ /*
+ * Probe with 4 args; the number of IOs in the queue, the zone that
+ * was last scheduled off this queue, the zone that was associated
+ * with the next IO that is scheduled, and which queue (priority).
+ */
+ DTRACE_PROBE4(zfs__zone__sched, uint_t, cnt, uint_t, last_zone,
+ uint_t, zio->io_zoneid, uint_t, p);
+
+ return (zio);
+}
+
+#endif
diff --git a/usr/src/uts/common/fs/zfs/zil.c b/usr/src/uts/common/fs/zfs/zil.c
index e3889b3a30..abbb31a199 100644
--- a/usr/src/uts/common/fs/zfs/zil.c
+++ b/usr/src/uts/common/fs/zfs/zil.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
*/
@@ -1802,9 +1803,18 @@ zil_close(zilog_t *zilog)
if (lwb != NULL)
txg = lwb->lwb_max_txg;
mutex_exit(&zilog->zl_lock);
- if (txg)
+
+ if (zilog_is_dirty(zilog)) {
+ /*
+ * If we're dirty, always wait for the current transaction --
+ * our lwb_max_txg may be in the past.
+ */
+ txg_wait_synced(zilog->zl_dmu_pool, 0);
+ } else if (txg) {
txg_wait_synced(zilog->zl_dmu_pool, txg);
- ASSERT(!zilog_is_dirty(zilog));
+ }
+
+ VERIFY(!zilog_is_dirty(zilog));
taskq_destroy(zilog->zl_clean_taskq);
zilog->zl_clean_taskq = NULL;
diff --git a/usr/src/uts/common/fs/zfs/zio.c b/usr/src/uts/common/fs/zfs/zio.c
index 1acc8b2e6a..bfbcdfb511 100644
--- a/usr/src/uts/common/fs/zfs/zio.c
+++ b/usr/src/uts/common/fs/zfs/zio.c
@@ -22,6 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2016 by Delphix. All rights reserved.
* Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
*/
@@ -40,6 +41,7 @@
#include <sys/ddt.h>
#include <sys/blkptr.h>
#include <sys/zfeature.h>
+#include <sys/zfs_zone.h>
/*
* ==========================================================================
@@ -561,11 +563,14 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
zio->io_bookmark = *zb;
if (pio != NULL) {
+ zio->io_zoneid = pio->io_zoneid;
if (zio->io_logical == NULL)
zio->io_logical = pio->io_logical;
if (zio->io_child_type == ZIO_CHILD_GANG)
zio->io_gang_leader = pio->io_gang_leader;
zio_add_child(pio, zio);
+ } else {
+ zfs_zone_zio_init(zio);
}
return (zio);
diff --git a/usr/src/uts/common/fs/zfs/zvol.c b/usr/src/uts/common/fs/zfs/zvol.c
index 95bb26c211..535bc057b9 100644
--- a/usr/src/uts/common/fs/zfs/zvol.c
+++ b/usr/src/uts/common/fs/zfs/zvol.c
@@ -25,7 +25,7 @@
*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
*/
@@ -84,6 +84,7 @@
#include <sys/zvol.h>
#include <sys/dumphdr.h>
#include <sys/zil_impl.h>
+#include <sys/sdt.h>
#include <sys/dbuf.h>
#include <sys/dmu_tx.h>
#include <sys/zfeature.h>
@@ -138,6 +139,11 @@ typedef struct zvol_state {
#define ZVOL_EXCL 0x4
#define ZVOL_WCE 0x8
+#define VOP_LATENCY_10MS 10000000
+#define VOP_LATENCY_100MS 100000000
+#define VOP_LATENCY_1S 1000000000
+#define VOP_LATENCY_10S 10000000000
+
/*
* zvol maximum transfer in one DMU tx.
*/
@@ -1379,6 +1385,9 @@ zvol_read(dev_t dev, uio_t *uio, cred_t *cr)
uint64_t volsize;
rl_t *rl;
int error = 0;
+ zone_t *zonep = curzone;
+ uint64_t tot_bytes;
+ hrtime_t start, lat;
zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
if (zv == NULL)
@@ -1395,6 +1404,14 @@ zvol_read(dev_t dev, uio_t *uio, cred_t *cr)
return (error);
}
+ DTRACE_PROBE3(zvol__uio__start, dev_t, dev, uio_t *, uio, int, 0);
+
+ mutex_enter(&zonep->zone_vfs_lock);
+ kstat_runq_enter(&zonep->zone_vfs_rwstats);
+ mutex_exit(&zonep->zone_vfs_lock);
+ start = gethrtime();
+ tot_bytes = 0;
+
rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid,
RL_READER);
while (uio->uio_resid > 0 && uio->uio_loffset < volsize) {
@@ -1404,6 +1421,7 @@ zvol_read(dev_t dev, uio_t *uio, cred_t *cr)
if (bytes > volsize - uio->uio_loffset)
bytes = volsize - uio->uio_loffset;
+ tot_bytes += bytes;
error = dmu_read_uio(zv->zv_objset, ZVOL_OBJ, uio, bytes);
if (error) {
/* convert checksum errors into IO errors */
@@ -1413,6 +1431,39 @@ zvol_read(dev_t dev, uio_t *uio, cred_t *cr)
}
}
zfs_range_unlock(rl);
+
+ mutex_enter(&zonep->zone_vfs_lock);
+ zonep->zone_vfs_rwstats.reads++;
+ zonep->zone_vfs_rwstats.nread += tot_bytes;
+ kstat_runq_exit(&zonep->zone_vfs_rwstats);
+ mutex_exit(&zonep->zone_vfs_lock);
+
+ lat = gethrtime() - start;
+
+ if (lat >= VOP_LATENCY_10MS) {
+ zone_vfs_kstat_t *zvp;
+
+ zvp = zonep->zone_vfs_stats;
+ if (lat < VOP_LATENCY_100MS) {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ } else if (lat < VOP_LATENCY_1S) {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ } else if (lat < VOP_LATENCY_10S) {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
+ } else {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_10s_ops.value.ui64);
+ }
+ }
+
+ DTRACE_PROBE4(zvol__uio__done, dev_t, dev, uio_t *, uio, int, 0, int,
+ error);
+
return (error);
}
@@ -1426,6 +1477,9 @@ zvol_write(dev_t dev, uio_t *uio, cred_t *cr)
rl_t *rl;
int error = 0;
boolean_t sync;
+ zone_t *zonep = curzone;
+ uint64_t tot_bytes;
+ hrtime_t start, lat;
zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
if (zv == NULL)
@@ -1442,6 +1496,19 @@ zvol_write(dev_t dev, uio_t *uio, cred_t *cr)
return (error);
}
+ DTRACE_PROBE3(zvol__uio__start, dev_t, dev, uio_t *, uio, int, 1);
+
+ /*
+ * For the purposes of VFS kstat consumers, the "waitq" calculation is
+ * repurposed as the active queue for zvol write operations. There's no
+ * actual wait queue for zvol operations.
+ */
+ mutex_enter(&zonep->zone_vfs_lock);
+ kstat_waitq_enter(&zonep->zone_vfs_rwstats);
+ mutex_exit(&zonep->zone_vfs_lock);
+ start = gethrtime();
+ tot_bytes = 0;
+
sync = !(zv->zv_flags & ZVOL_WCE) ||
(zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS);
@@ -1455,6 +1522,7 @@ zvol_write(dev_t dev, uio_t *uio, cred_t *cr)
if (bytes > volsize - off) /* don't write past the end */
bytes = volsize - off;
+ tot_bytes += bytes;
dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes);
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
@@ -1472,6 +1540,39 @@ zvol_write(dev_t dev, uio_t *uio, cred_t *cr)
zfs_range_unlock(rl);
if (sync)
zil_commit(zv->zv_zilog, ZVOL_OBJ);
+
+ DTRACE_PROBE4(zvol__uio__done, dev_t, dev, uio_t *, uio, int, 1, int,
+ error);
+
+ mutex_enter(&zonep->zone_vfs_lock);
+ zonep->zone_vfs_rwstats.writes++;
+ zonep->zone_vfs_rwstats.nwritten += tot_bytes;
+ kstat_waitq_exit(&zonep->zone_vfs_rwstats);
+ mutex_exit(&zonep->zone_vfs_lock);
+
+ lat = gethrtime() - start;
+
+ if (lat >= VOP_LATENCY_10MS) {
+ zone_vfs_kstat_t *zvp;
+
+ zvp = zonep->zone_vfs_stats;
+ if (lat < VOP_LATENCY_100MS) {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ } else if (lat < VOP_LATENCY_1S) {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ } else if (lat < VOP_LATENCY_10S) {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
+ } else {
+ atomic_inc_64(&zvp->zv_10ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_100ms_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_1s_ops.value.ui64);
+ atomic_inc_64(&zvp->zv_10s_ops.value.ui64);
+ }
+ }
+
return (error);
}
diff --git a/usr/src/uts/common/inet/inet_hash.h b/usr/src/uts/common/inet/inet_hash.h
new file mode 100644
index 0000000000..a790a797d1
--- /dev/null
+++ b/usr/src/uts/common/inet/inet_hash.h
@@ -0,0 +1,37 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _INET_INET_HASH_H
+#define _INET_INET_HASH_H
+
+/*
+ * Common packet hashing routines shared across MAC, UDP, and others.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define INET_PKT_HASH_L2 0x01
+#define INET_PKT_HASH_L3 0x02
+#define INET_PKT_HASH_L4 0x04
+
+extern uint64_t inet_pkt_hash(uint_t, mblk_t *, uint8_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _INET_INET_HASH_H */
diff --git a/usr/src/uts/common/inet/ip/conn_opt.c b/usr/src/uts/common/inet/ip/conn_opt.c
index bcbc1c4949..b4bff4d7b4 100644
--- a/usr/src/uts/common/inet/ip/conn_opt.c
+++ b/usr/src/uts/common/inet/ip/conn_opt.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
*/
/* Copyright (c) 1990 Mentat Inc. */
@@ -619,6 +620,9 @@ conn_opt_get(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
case SO_REUSEADDR:
*i1 = connp->conn_reuseaddr ? SO_REUSEADDR : 0;
break; /* goto sizeof (int) option return */
+ case SO_REUSEPORT:
+ *i1 = connp->conn_reuseport;
+ break; /* goto sizeof (int) option return */
case SO_TYPE:
*i1 = connp->conn_so_type;
break; /* goto sizeof (int) option return */
@@ -1186,8 +1190,24 @@ conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
int error;
- if (connp->conn_family != AF_INET)
+ if (connp->conn_family == AF_INET6 &&
+ connp->conn_ipversion == IPV4_VERSION) {
+ /*
+ * Allow certain IPv4 options to be set on an AF_INET6 socket
+ * if the connection is still IPv4.
+ */
+ switch (name) {
+ case IP_TOS:
+ case T_IP_TOS:
+ case IP_TTL:
+ case IP_DONTFRAG:
+ break;
+ default:
+ return (EINVAL);
+ }
+ } else if (connp->conn_family != AF_INET) {
return (EINVAL);
+ }
switch (name) {
case IP_TTL:
diff --git a/usr/src/uts/common/inet/ip/ip.c b/usr/src/uts/common/inet/ip/ip.c
index f006e83a1f..73081b9c1c 100644
--- a/usr/src/uts/common/inet/ip/ip.c
+++ b/usr/src/uts/common/inet/ip/ip.c
@@ -12577,6 +12577,7 @@ ip_process_ioctl(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *arg)
struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
ip_ioctl_cmd_t *ipip = arg;
ip_extract_func_t *extract_funcp;
+ ill_t *ill;
cmd_info_t ci;
int err;
boolean_t entered_ipsq = B_FALSE;
@@ -12697,6 +12698,13 @@ ip_process_ioctl(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *arg)
ipsq_current_start(ipsq, ci.ci_ipif, ipip->ipi_cmd);
/*
+ * We need to cache the ill_t that we're going to use as the argument
+ * to the ipif-ioctl DTrace probe (below) because the ci_ipif can be
+ * blown away by calling ipi_func.
+ */
+ ill = ci.ci_ipif == NULL ? NULL : ci.ci_ipif->ipif_ill;
+
+ /*
* A return value of EINPROGRESS means the ioctl is
* either queued and waiting for some reason or has
* already completed.
@@ -12704,9 +12712,7 @@ ip_process_ioctl(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *arg)
err = (*ipip->ipi_func)(ci.ci_ipif, ci.ci_sin, q, mp, ipip, ci.ci_lifr);
DTRACE_PROBE4(ipif__ioctl, char *, "ip_process_ioctl finish WR",
- int, ipip->ipi_cmd,
- ill_t *, ci.ci_ipif == NULL ? NULL : ci.ci_ipif->ipif_ill,
- ipif_t *, ci.ci_ipif);
+ int, ipip->ipi_cmd, ill_t *, ill, ipif_t *, ci.ci_ipif);
ip_ioctl_finish(q, mp, err, IPI2MODE(ipip), ipsq);
if (entered_ipsq)
diff --git a/usr/src/uts/common/inet/ip/ip_attr.c b/usr/src/uts/common/inet/ip/ip_attr.c
index 85ee142dfc..c350d67c2d 100644
--- a/usr/src/uts/common/inet/ip/ip_attr.c
+++ b/usr/src/uts/common/inet/ip/ip_attr.c
@@ -909,6 +909,11 @@ ixa_safe_copy(ip_xmit_attr_t *src, ip_xmit_attr_t *ixa)
*/
if (ixa->ixa_free_flags & IXA_FREE_CRED)
crhold(ixa->ixa_cred);
+
+ /*
+ * There is no cleanup in progress on this new copy.
+ */
+ ixa->ixa_tcpcleanup = IXATC_IDLE;
}
/*
diff --git a/usr/src/uts/common/inet/ip/ip_squeue.c b/usr/src/uts/common/inet/ip/ip_squeue.c
index 33a2fa5935..dedb4dadcc 100644
--- a/usr/src/uts/common/inet/ip/ip_squeue.c
+++ b/usr/src/uts/common/inet/ip/ip_squeue.c
@@ -163,7 +163,7 @@ ip_squeue_create(pri_t pri)
{
squeue_t *sqp;
- sqp = squeue_create(ip_squeue_worker_wait, pri);
+ sqp = squeue_create(ip_squeue_worker_wait, pri, B_TRUE);
ASSERT(sqp != NULL);
if (ip_squeue_create_callback != NULL)
ip_squeue_create_callback(sqp);
diff --git a/usr/src/uts/common/inet/ip/ipclassifier.c b/usr/src/uts/common/inet/ip/ipclassifier.c
index bc2173ff24..3a12e58c3a 100644
--- a/usr/src/uts/common/inet/ip/ipclassifier.c
+++ b/usr/src/uts/common/inet/ip/ipclassifier.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
/*
@@ -868,67 +869,91 @@ ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp)
mutex_exit(&(connfp)->connf_lock); \
}
-#define IPCL_HASH_INSERT_BOUND(connfp, connp) { \
- conn_t *pconnp = NULL, *nconnp; \
- IPCL_HASH_REMOVE((connp)); \
- mutex_enter(&(connfp)->connf_lock); \
- nconnp = (connfp)->connf_head; \
- while (nconnp != NULL && \
- !_IPCL_V4_MATCH_ANY(nconnp->conn_laddr_v6)) { \
- pconnp = nconnp; \
- nconnp = nconnp->conn_next; \
- } \
- if (pconnp != NULL) { \
- pconnp->conn_next = (connp); \
- (connp)->conn_prev = pconnp; \
- } else { \
- (connfp)->connf_head = (connp); \
- } \
- if (nconnp != NULL) { \
- (connp)->conn_next = nconnp; \
- nconnp->conn_prev = (connp); \
- } \
- (connp)->conn_fanout = (connfp); \
- (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
- IPCL_BOUND; \
- CONN_INC_REF(connp); \
- mutex_exit(&(connfp)->connf_lock); \
-}
+/*
+ * When inserting bound or wildcard entries into the hash, ordering rules are
+ * used to facilitate timely and correct lookups. The order is as follows:
+ * 1. Entries bound to a specific address
+ * 2. Entries bound to INADDR_ANY
+ * 3. Entries bound to ADDR_UNSPECIFIED
+ * Entries in a category which share conn_lport (such as those using
+ * SO_REUSEPORT) will be ordered such that the newest inserted is first.
+ */
-#define IPCL_HASH_INSERT_WILDCARD(connfp, connp) { \
- conn_t **list, *prev, *next; \
- boolean_t isv4mapped = \
- IN6_IS_ADDR_V4MAPPED(&(connp)->conn_laddr_v6); \
- IPCL_HASH_REMOVE((connp)); \
- mutex_enter(&(connfp)->connf_lock); \
- list = &(connfp)->connf_head; \
- prev = NULL; \
- while ((next = *list) != NULL) { \
- if (isv4mapped && \
- IN6_IS_ADDR_UNSPECIFIED(&next->conn_laddr_v6) && \
- connp->conn_zoneid == next->conn_zoneid) { \
- (connp)->conn_next = next; \
- if (prev != NULL) \
- prev = next->conn_prev; \
- next->conn_prev = (connp); \
- break; \
- } \
- list = &next->conn_next; \
- prev = next; \
- } \
- (connp)->conn_prev = prev; \
- *list = (connp); \
- (connp)->conn_fanout = (connfp); \
- (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
- IPCL_BOUND; \
- CONN_INC_REF((connp)); \
- mutex_exit(&(connfp)->connf_lock); \
+void
+ipcl_hash_insert_bound(connf_t *connfp, conn_t *connp)
+{
+ conn_t *pconnp, *nconnp;
+
+ IPCL_HASH_REMOVE(connp);
+ mutex_enter(&connfp->connf_lock);
+ nconnp = connfp->connf_head;
+ pconnp = NULL;
+ while (nconnp != NULL) {
+ /*
+ * Walk though entries associated with the fanout until one is
+ * found which fulfills any of these conditions:
+ * 1. Listen address of ADDR_ANY/ADDR_UNSPECIFIED
+ * 2. Listen port the same as connp
+ */
+ if (_IPCL_V4_MATCH_ANY(nconnp->conn_laddr_v6) ||
+ connp->conn_lport == nconnp->conn_lport)
+ break;
+ pconnp = nconnp;
+ nconnp = nconnp->conn_next;
+ }
+ if (pconnp != NULL) {
+ pconnp->conn_next = connp;
+ connp->conn_prev = pconnp;
+ } else {
+ connfp->connf_head = connp;
+ }
+ if (nconnp != NULL) {
+ connp->conn_next = nconnp;
+ nconnp->conn_prev = connp;
+ }
+ connp->conn_fanout = connfp;
+ connp->conn_flags = (connp->conn_flags & ~IPCL_REMOVED) | IPCL_BOUND;
+ CONN_INC_REF(connp);
+ mutex_exit(&connfp->connf_lock);
}
void
ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp)
{
- IPCL_HASH_INSERT_WILDCARD(connfp, connp);
+ conn_t **list, *prev, *next;
+ conn_t *pconnp = NULL, *nconnp;
+ boolean_t isv4mapped = IN6_IS_ADDR_V4MAPPED(&connp->conn_laddr_v6);
+
+ IPCL_HASH_REMOVE(connp);
+ mutex_enter(&connfp->connf_lock);
+ nconnp = connfp->connf_head;
+ pconnp = NULL;
+ while (nconnp != NULL) {
+ if (IN6_IS_ADDR_V4MAPPED_ANY(&nconnp->conn_laddr_v6) &&
+ isv4mapped && connp->conn_lport == nconnp->conn_lport)
+ break;
+ if (IN6_IS_ADDR_UNSPECIFIED(&nconnp->conn_laddr_v6) &&
+ (isv4mapped ||
+ connp->conn_lport == nconnp->conn_lport))
+ break;
+
+ pconnp = nconnp;
+ nconnp = nconnp->conn_next;
+ }
+ if (pconnp != NULL) {
+ pconnp->conn_next = connp;
+ connp->conn_prev = pconnp;
+ } else {
+ connfp->connf_head = connp;
+ }
+ if (nconnp != NULL) {
+ connp->conn_next = nconnp;
+ nconnp->conn_prev = connp;
+ }
+ connp->conn_fanout = connfp;
+ connp->conn_flags = (connp->conn_flags & ~IPCL_REMOVED) | IPCL_BOUND;
+ CONN_INC_REF(connp);
+ mutex_exit(&connfp->connf_lock);
}
/*
@@ -1034,9 +1059,9 @@ ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport)
IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6)) {
- IPCL_HASH_INSERT_WILDCARD(connfp, connp);
+ ipcl_hash_insert_wildcard(connfp, connp);
} else {
- IPCL_HASH_INSERT_BOUND(connfp, connp);
+ ipcl_hash_insert_bound(connfp, connp);
}
} else {
IPCL_HASH_INSERT_CONNECTED(connfp, connp);
@@ -1205,9 +1230,9 @@ ipcl_bind_insert_v4(conn_t *connp)
if (connp->conn_faddr_v4 != INADDR_ANY) {
IPCL_HASH_INSERT_CONNECTED(connfp, connp);
} else if (connp->conn_laddr_v4 != INADDR_ANY) {
- IPCL_HASH_INSERT_BOUND(connfp, connp);
+ ipcl_hash_insert_bound(connfp, connp);
} else {
- IPCL_HASH_INSERT_WILDCARD(connfp, connp);
+ ipcl_hash_insert_wildcard(connfp, connp);
}
if (protocol == IPPROTO_RSVP)
ill_set_inputfn_all(ipst);
@@ -1219,9 +1244,9 @@ ipcl_bind_insert_v4(conn_t *connp)
connfp = &ipst->ips_ipcl_bind_fanout[
IPCL_BIND_HASH(lport, ipst)];
if (connp->conn_laddr_v4 != INADDR_ANY) {
- IPCL_HASH_INSERT_BOUND(connfp, connp);
+ ipcl_hash_insert_bound(connfp, connp);
} else {
- IPCL_HASH_INSERT_WILDCARD(connfp, connp);
+ ipcl_hash_insert_wildcard(connfp, connp);
}
if (cl_inet_listen != NULL) {
ASSERT(connp->conn_ipversion == IPV4_VERSION);
@@ -1271,9 +1296,9 @@ ipcl_bind_insert_v6(conn_t *connp)
if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
IPCL_HASH_INSERT_CONNECTED(connfp, connp);
} else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
- IPCL_HASH_INSERT_BOUND(connfp, connp);
+ ipcl_hash_insert_bound(connfp, connp);
} else {
- IPCL_HASH_INSERT_WILDCARD(connfp, connp);
+ ipcl_hash_insert_wildcard(connfp, connp);
}
break;
@@ -1283,9 +1308,9 @@ ipcl_bind_insert_v6(conn_t *connp)
connfp = &ipst->ips_ipcl_bind_fanout[
IPCL_BIND_HASH(lport, ipst)];
if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
- IPCL_HASH_INSERT_BOUND(connfp, connp);
+ ipcl_hash_insert_bound(connfp, connp);
} else {
- IPCL_HASH_INSERT_WILDCARD(connfp, connp);
+ ipcl_hash_insert_wildcard(connfp, connp);
}
if (cl_inet_listen != NULL) {
sa_family_t addr_family;
@@ -1416,9 +1441,9 @@ ipcl_conn_insert_v4(conn_t *connp)
if (connp->conn_faddr_v4 != INADDR_ANY) {
IPCL_HASH_INSERT_CONNECTED(connfp, connp);
} else if (connp->conn_laddr_v4 != INADDR_ANY) {
- IPCL_HASH_INSERT_BOUND(connfp, connp);
+ ipcl_hash_insert_bound(connfp, connp);
} else {
- IPCL_HASH_INSERT_WILDCARD(connfp, connp);
+ ipcl_hash_insert_wildcard(connfp, connp);
}
break;
}
@@ -1504,9 +1529,9 @@ ipcl_conn_insert_v6(conn_t *connp)
if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
IPCL_HASH_INSERT_CONNECTED(connfp, connp);
} else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
- IPCL_HASH_INSERT_BOUND(connfp, connp);
+ ipcl_hash_insert_bound(connfp, connp);
} else {
- IPCL_HASH_INSERT_WILDCARD(connfp, connp);
+ ipcl_hash_insert_wildcard(connfp, connp);
}
break;
}
diff --git a/usr/src/uts/common/inet/ip/ipsecesp.c b/usr/src/uts/common/inet/ip/ipsecesp.c
index c325e8dc26..2ca770ebe9 100644
--- a/usr/src/uts/common/inet/ip/ipsecesp.c
+++ b/usr/src/uts/common/inet/ip/ipsecesp.c
@@ -234,8 +234,7 @@ esp_kstat_init(ipsecesp_stack_t *espstack, netstackid_t stackid)
{
espstack->esp_ksp = kstat_create_netstack("ipsecesp", 0, "esp_stat",
"net", KSTAT_TYPE_NAMED,
- sizeof (esp_kstats_t) / sizeof (kstat_named_t),
- KSTAT_FLAG_PERSISTENT, stackid);
+ sizeof (esp_kstats_t) / sizeof (kstat_named_t), 0, stackid);
if (espstack->esp_ksp == NULL || espstack->esp_ksp->ks_data == NULL)
return (B_FALSE);
diff --git a/usr/src/uts/common/inet/ipclassifier.h b/usr/src/uts/common/inet/ipclassifier.h
index f6466434f6..c3139d9288 100644
--- a/usr/src/uts/common/inet/ipclassifier.h
+++ b/usr/src/uts/common/inet/ipclassifier.h
@@ -21,6 +21,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
*/
#ifndef _INET_IPCLASSIFIER_H
@@ -293,7 +294,8 @@ struct conn_s {
conn_ipv6_recvpathmtu : 1, /* IPV6_RECVPATHMTU */
conn_mcbc_bind : 1, /* Bound to multi/broadcast */
- conn_pad_to_bit_31 : 12;
+ conn_reuseport : 1, /* SO_REUSEPORT state */
+ conn_pad_to_bit_31 : 11;
boolean_t conn_blocked; /* conn is flow-controlled */
diff --git a/usr/src/uts/common/inet/ipf/ip_fil_solaris.c b/usr/src/uts/common/inet/ipf/ip_fil_solaris.c
index f958ca2261..227d2075f8 100644
--- a/usr/src/uts/common/inet/ipf/ip_fil_solaris.c
+++ b/usr/src/uts/common/inet/ipf/ip_fil_solaris.c
@@ -83,6 +83,14 @@ static int ipf_hook6_loop_out __P((hook_event_token_t, hook_data_t,
static int ipf_hook6_loop_in __P((hook_event_token_t, hook_data_t,
void *));
static int ipf_hook6 __P((hook_data_t, int, int, void *));
+static int ipf_hookvndl3v4_in __P((hook_event_token_t, hook_data_t,
+ void *));
+static int ipf_hookvndl3v6_in __P((hook_event_token_t, hook_data_t,
+ void *));
+static int ipf_hookvndl3v4_out __P((hook_event_token_t, hook_data_t,
+ void *));
+static int ipf_hookvndl3v6_out __P((hook_event_token_t, hook_data_t,
+ void *));
extern int ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
extern int ipf_frruleiter __P((void *, int, void *, ipf_stack_t *));
@@ -152,6 +160,16 @@ char *hook6_loop_in_gz = "ipfilter_hook6_loop_in_gz";
char *hook6_loop_out = "ipfilter_hook6_loop_out";
char *hook6_loop_out_gz = "ipfilter_hook6_loop_out_gz";
+/* vnd IPv4/v6 hook names */
+char *hook4_vnd_in = "ipfilter_hookvndl3v4_in";
+char *hook4_vnd_in_gz = "ipfilter_hookvndl3v4_in_gz";
+char *hook6_vnd_in = "ipfilter_hookvndl3v6_in";
+char *hook6_vnd_in_gz = "ipfilter_hookvndl3v6_in_gz";
+char *hook4_vnd_out = "ipfilter_hookvndl3v4_out";
+char *hook4_vnd_out_gz = "ipfilter_hookvndl3v4_out_gz";
+char *hook6_vnd_out = "ipfilter_hookvndl3v6_out";
+char *hook6_vnd_out_gz = "ipfilter_hookvndl3v6_out_gz";
+
/* ------------------------------------------------------------------------ */
/* Function: ipldetach */
/* Returns: int - 0 == success, else error. */
@@ -248,6 +266,31 @@ ipf_stack_t *ifs;
ifs->ifs_ipf_ipv4 = NULL;
}
+ /*
+ * Remove VND hooks
+ */
+ if (ifs->ifs_ipf_vndl3v4 != NULL) {
+ UNDO_HOOK(ifs_ipf_vndl3v4, ifs_hookvndl3v4_physical_in,
+ NH_PHYSICAL_IN, ifs_ipfhookvndl3v4_in);
+ UNDO_HOOK(ifs_ipf_vndl3v4, ifs_hookvndl3v4_physical_out,
+ NH_PHYSICAL_OUT, ifs_ipfhookvndl3v4_out);
+
+ if (net_protocol_release(ifs->ifs_ipf_vndl3v4) != 0)
+ goto detach_failed;
+ ifs->ifs_ipf_vndl3v4 = NULL;
+ }
+
+ if (ifs->ifs_ipf_vndl3v6 != NULL) {
+ UNDO_HOOK(ifs_ipf_vndl3v6, ifs_hookvndl3v6_physical_in,
+ NH_PHYSICAL_IN, ifs_ipfhookvndl3v6_in);
+ UNDO_HOOK(ifs_ipf_vndl3v6, ifs_hookvndl3v6_physical_out,
+ NH_PHYSICAL_OUT, ifs_ipfhookvndl3v6_out);
+
+ if (net_protocol_release(ifs->ifs_ipf_vndl3v6) != 0)
+ goto detach_failed;
+ ifs->ifs_ipf_vndl3v6 = NULL;
+ }
+
#undef UNDO_HOOK
#ifdef IPFDEBUG
@@ -445,6 +488,48 @@ ipf_stack_t *ifs;
}
/*
+ * Add VND INET hooks
+ */
+ ifs->ifs_ipf_vndl3v4 = net_protocol_lookup(id, NHF_VND_INET);
+ if (ifs->ifs_ipf_vndl3v4 == NULL)
+ goto hookup_failed;
+
+ HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhookvndl3v4_in, ipf_hookvndl3v4_in,
+ hook4_vnd_in, hook4_vnd_in_gz, ifs);
+ HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhookvndl3v4_out, ipf_hookvndl3v4_out,
+ hook4_vnd_out, hook4_vnd_out_gz, ifs);
+ ifs->ifs_hookvndl3v4_physical_in = (net_hook_register(ifs->ifs_ipf_vndl3v4,
+ NH_PHYSICAL_IN, ifs->ifs_ipfhookvndl3v4_in) == 0);
+ if (!ifs->ifs_hookvndl3v4_physical_in)
+ goto hookup_failed;
+
+ ifs->ifs_hookvndl3v4_physical_out = (net_hook_register(ifs->ifs_ipf_vndl3v4,
+ NH_PHYSICAL_OUT, ifs->ifs_ipfhookvndl3v4_out) == 0);
+ if (!ifs->ifs_hookvndl3v4_physical_out)
+ goto hookup_failed;
+
+
+ /*
+ * VND INET6 hooks
+ */
+ ifs->ifs_ipf_vndl3v6 = net_protocol_lookup(id, NHF_VND_INET6);
+ if (ifs->ifs_ipf_vndl3v6 == NULL)
+ goto hookup_failed;
+
+ HOOK_INIT_GZ_BEFORE(ifs->ifs_ipfhookvndl3v6_in, ipf_hookvndl3v6_in,
+ hook6_vnd_in, hook6_vnd_in_gz, ifs);
+ HOOK_INIT_GZ_AFTER(ifs->ifs_ipfhookvndl3v6_out, ipf_hookvndl3v6_out,
+ hook6_vnd_out, hook6_vnd_out_gz, ifs);
+ ifs->ifs_hookvndl3v6_physical_in = (net_hook_register(ifs->ifs_ipf_vndl3v6,
+ NH_PHYSICAL_IN, ifs->ifs_ipfhookvndl3v6_in) == 0);
+ if (!ifs->ifs_hookvndl3v6_physical_in)
+ goto hookup_failed;
+
+ ifs->ifs_hookvndl3v6_physical_out = (net_hook_register(ifs->ifs_ipf_vndl3v6,
+ NH_PHYSICAL_OUT, ifs->ifs_ipfhookvndl3v6_out) == 0);
+ if (!ifs->ifs_hookvndl3v6_physical_out)
+ goto hookup_failed;
+ /*
* Reacquire ipf_global, now it is safe.
*/
WRITE_ENTER(&ifs->ifs_ipf_global);
@@ -1011,7 +1096,6 @@ cred_t *cp;
return ENXIO;
unit = isp->ipfs_minor;
-
/*
* ipf_find_stack returns with a read lock on ifs_ipf_global
*/
@@ -2045,6 +2129,42 @@ int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
}
/* ------------------------------------------------------------------------ */
+/* Function: ipf_hookvndl3_in */
+/* Returns: int - 0 == packet ok, else problem, free packet if not done */
+/* Parameters: event(I) - pointer to event */
+/* info(I) - pointer to hook information for firewalling */
+/* */
+/* The vnd hooks are private hooks to ON. They represents a layer 2 */
+/* datapath generally used to implement virtual machines. The driver sends */
+/* along L3 packets of either type IP or IPv6. The ethertype to distinguish */
+/* them is in the upper 16 bits while the remaining bits are the */
+/* traditional packet hook flags. */
+/* */
+/* They end up calling the appropriate traditional ip hooks. */
+/* ------------------------------------------------------------------------ */
+/*ARGSUSED*/
+int ipf_hookvndl3v4_in(hook_event_token_t token, hook_data_t info, void *arg)
+{
+ return ipf_hook4_in(token, info, arg);
+}
+
+int ipf_hookvndl3v6_in(hook_event_token_t token, hook_data_t info, void *arg)
+{
+ return ipf_hook6_in(token, info, arg);
+}
+
+/*ARGSUSED*/
+int ipf_hookvndl3v4_out(hook_event_token_t token, hook_data_t info, void *arg)
+{
+ return ipf_hook4_out(token, info, arg);
+}
+
+int ipf_hookvndl3v6_out(hook_event_token_t token, hook_data_t info, void *arg)
+{
+ return ipf_hook6_out(token, info, arg);
+}
+
+/* ------------------------------------------------------------------------ */
/* Function: ipf_hook4_loop_in */
/* Returns: int - 0 == packet ok, else problem, free packet if not done */
/* Parameters: event(I) - pointer to event */
diff --git a/usr/src/uts/common/inet/ipf/ipf.conf b/usr/src/uts/common/inet/ipf/ipf.conf
index 6b36f9fdbf..f49e024a72 100644
--- a/usr/src/uts/common/inet/ipf/ipf.conf
+++ b/usr/src/uts/common/inet/ipf/ipf.conf
@@ -1,3 +1,8 @@
#
#
name="ipf" parent="pseudo" instance=0;
+
+# Increase the state table limits. fr_statemax should be ~70% of fr_statesize,
+# and both should be prime numbers
+fr_statesize=151007;
+fr_statemax=113279;
diff --git a/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h b/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h
index a239f1c1ca..9aa2478c6a 100644
--- a/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h
+++ b/usr/src/uts/common/inet/ipf/netinet/ipf_stack.h
@@ -125,6 +125,10 @@ struct ipf_stack {
hook_t *ifs_ipfhook6_loop_in;
hook_t *ifs_ipfhook6_loop_out;
hook_t *ifs_ipfhook6_nicevents;
+ hook_t *ifs_ipfhookvndl3v4_in;
+ hook_t *ifs_ipfhookvndl3v6_in;
+ hook_t *ifs_ipfhookvndl3v4_out;
+ hook_t *ifs_ipfhookvndl3v6_out;
/* flags to indicate whether hooks are registered. */
boolean_t ifs_hook4_physical_in;
@@ -137,10 +141,16 @@ struct ipf_stack {
boolean_t ifs_hook6_nic_events;
boolean_t ifs_hook6_loopback_in;
boolean_t ifs_hook6_loopback_out;
+ boolean_t ifs_hookvndl3v4_physical_in;
+ boolean_t ifs_hookvndl3v6_physical_in;
+ boolean_t ifs_hookvndl3v4_physical_out;
+ boolean_t ifs_hookvndl3v6_physical_out;
int ifs_ipf_loopback;
net_handle_t ifs_ipf_ipv4;
net_handle_t ifs_ipf_ipv6;
+ net_handle_t ifs_ipf_vndl3v4;
+ net_handle_t ifs_ipf_vndl3v6;
/* ip_auth.c */
int ifs_fr_authsize;
diff --git a/usr/src/uts/common/inet/ipf/solaris.c b/usr/src/uts/common/inet/ipf/solaris.c
index c541f4dddc..5d56debc31 100644
--- a/usr/src/uts/common/inet/ipf/solaris.c
+++ b/usr/src/uts/common/inet/ipf/solaris.c
@@ -625,7 +625,6 @@ ipf_stack_shutdown(const netid_t id, void *arg)
/*
* Destroy things for ipf for one stack.
*/
-/* ARGSUSED */
static void
ipf_stack_destroy_one(const netid_t id, ipf_stack_t *ifs)
{
diff --git a/usr/src/uts/common/inet/sockmods/datafilt.c b/usr/src/uts/common/inet/sockmods/datafilt.c
new file mode 100644
index 0000000000..6e1171de46
--- /dev/null
+++ b/usr/src/uts/common/inet/sockmods/datafilt.c
@@ -0,0 +1,116 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2012, OmniTI Computer Consulting, Inc. All rights reserved.
+ */
+
+/*
+ * This file implements a socketfilter used to deter TCP connections.
+ * To defer a connection means to delay the return of accept(3SOCKET)
+ * until at least one byte is ready to be read(2). This filter may be
+ * applied automatically or programmatically through the use of
+ * soconfig(1M) and setsockopt(3SOCKET).
+ */
+
+#include <sys/kmem.h>
+#include <sys/systm.h>
+#include <sys/stropts.h>
+#include <sys/strsun.h>
+#include <sys/socketvar.h>
+#include <sys/sockfilter.h>
+#include <sys/note.h>
+#include <sys/taskq.h>
+
+#define DATAFILT_MODULE "datafilt"
+
+static struct modlmisc dataf_modlmisc = {
+ &mod_miscops,
+ "Kernel data-ready socket filter"
+};
+
+static struct modlinkage dataf_modlinkage = {
+ MODREV_1,
+ &dataf_modlmisc,
+ NULL
+};
+
+static sof_rval_t
+dataf_attach_passive_cb(sof_handle_t handle, sof_handle_t ph,
+ void *parg, struct sockaddr *laddr, socklen_t laddrlen,
+ struct sockaddr *faddr, socklen_t faddrlen, void **cookiep)
+{
+ _NOTE(ARGUNUSED(handle, ph, parg, laddr, laddrlen, faddr, faddrlen,
+ cookiep));
+ return (SOF_RVAL_DEFER);
+}
+
+static void
+dataf_detach_cb(sof_handle_t handle, void *cookie, cred_t *cr)
+{
+ _NOTE(ARGUNUSED(handle, cookie, cr));
+}
+
+static mblk_t *
+dataf_data_in_cb(sof_handle_t handle, void *cookie, mblk_t *mp, int flags,
+ size_t *lenp)
+{
+ _NOTE(ARGUNUSED(cookie, flags, lenp));
+
+ if (mp != NULL && MBLKL(mp) > 0) {
+ sof_newconn_ready(handle);
+ sof_bypass(handle);
+ }
+
+ return (mp);
+}
+
+static sof_ops_t dataf_ops = {
+ .sofop_attach_passive = dataf_attach_passive_cb,
+ .sofop_detach = dataf_detach_cb,
+ .sofop_data_in = dataf_data_in_cb
+};
+
+int
+_init(void)
+{
+ int err;
+
+ /*
+ * This module is safe to attach even after some preliminary socket
+ * setup calls have taken place. See the comment for SOF_ATT_SAFE.
+ */
+ err = sof_register(SOF_VERSION, DATAFILT_MODULE, &dataf_ops,
+ SOF_ATT_SAFE);
+ if (err != 0)
+ return (err);
+ if ((err = mod_install(&dataf_modlinkage)) != 0)
+ (void) sof_unregister(DATAFILT_MODULE);
+
+ return (err);
+}
+
+int
+_fini(void)
+{
+ int err;
+
+ if ((err = sof_unregister(DATAFILT_MODULE)) != 0)
+ return (err);
+
+ return (mod_remove(&dataf_modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&dataf_modlinkage, modinfop));
+}
diff --git a/usr/src/uts/common/inet/squeue.c b/usr/src/uts/common/inet/squeue.c
index 2e08dc359b..1009f0700f 100644
--- a/usr/src/uts/common/inet/squeue.c
+++ b/usr/src/uts/common/inet/squeue.c
@@ -23,7 +23,7 @@
*/
/*
- * Copyright 2012 Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
*/
/*
@@ -61,6 +61,10 @@
* connection are processed on that squeue. The connection ("conn") to
* squeue mapping is stored in "conn_t" member "conn_sqp".
*
+ * If the squeue is not related to TCP/IP, then the value of sqp->sq_isip is
+ * false and it will not have an associated conn_t, which means many aspects of
+ * the system, such as polling and swtiching squeues will not be used.
+ *
* Since the processing of the connection cuts across multiple layers
* but still allows packets for different connnection to be processed on
* other CPU/squeues, squeues are also termed as "Vertical Perimeter" or
@@ -244,7 +248,7 @@ squeue_init(void)
/* ARGSUSED */
squeue_t *
-squeue_create(clock_t wait, pri_t pri)
+squeue_create(clock_t wait, pri_t pri, boolean_t isip)
{
squeue_t *sqp = kmem_cache_alloc(squeue_cache, KM_SLEEP);
@@ -260,11 +264,36 @@ squeue_create(clock_t wait, pri_t pri)
sqp->sq_enter = squeue_enter;
sqp->sq_drain = squeue_drain;
+ sqp->sq_isip = isip;
return (sqp);
}
/*
+ * We need to kill the threads and then clean up. We should VERIFY that
+ * polling is disabled so we don't have to worry about disassociating from
+ * MAC/IP/etc.
+ */
+void
+squeue_destroy(squeue_t *sqp)
+{
+ kt_did_t worker, poll;
+ mutex_enter(&sqp->sq_lock);
+ VERIFY(!(sqp->sq_state & (SQS_POLL_THR_QUIESCED |
+ SQS_POLL_QUIESCE_DONE | SQS_PAUSE | SQS_EXIT)));
+ worker = sqp->sq_worker->t_did;
+ poll = sqp->sq_poll_thr->t_did;
+ sqp->sq_state |= SQS_EXIT;
+ cv_signal(&sqp->sq_poll_cv);
+ cv_signal(&sqp->sq_worker_cv);
+ mutex_exit(&sqp->sq_lock);
+
+ thread_join(poll);
+ thread_join(worker);
+ kmem_cache_free(squeue_cache, sqp);
+}
+
+/*
* Bind squeue worker thread to the specified CPU, given by CPU id.
* If the CPU id value is -1, bind the worker thread to the value
* specified in sq_bind field. If a thread is already bound to a
@@ -475,18 +504,21 @@ squeue_enter(squeue_t *sqp, mblk_t *mp, mblk_t *tail, uint32_t cnt,
* Handle squeue switching. More details in the
* block comment at the top of the file
*/
- if (connp->conn_sqp == sqp) {
+ if (sqp->sq_isip == B_FALSE || connp->conn_sqp == sqp) {
SQUEUE_DBG_SET(sqp, mp, proc, connp,
tag);
- connp->conn_on_sqp = B_TRUE;
+ if (sqp->sq_isip == B_TRUE)
+ connp->conn_on_sqp = B_TRUE;
DTRACE_PROBE3(squeue__proc__start, squeue_t *,
sqp, mblk_t *, mp, conn_t *, connp);
(*proc)(connp, mp, sqp, ira);
DTRACE_PROBE2(squeue__proc__end, squeue_t *,
sqp, conn_t *, connp);
- connp->conn_on_sqp = B_FALSE;
+ if (sqp->sq_isip == B_TRUE) {
+ connp->conn_on_sqp = B_FALSE;
+ CONN_DEC_REF(connp);
+ }
SQUEUE_DBG_CLEAR(sqp);
- CONN_DEC_REF(connp);
} else {
SQUEUE_ENTER_ONE(connp->conn_sqp, mp, proc,
connp, ira, SQ_FILL, SQTAG_SQUEUE_CHANGE);
@@ -513,7 +545,7 @@ squeue_enter(squeue_t *sqp, mblk_t *mp, mblk_t *tail, uint32_t cnt,
return;
}
} else {
- if (ira != NULL) {
+ if (sqp->sq_isip == B_TRUE && ira != NULL) {
mblk_t *attrmp;
ASSERT(cnt == 1);
@@ -587,7 +619,8 @@ squeue_enter(squeue_t *sqp, mblk_t *mp, mblk_t *tail, uint32_t cnt,
if (!(sqp->sq_state & SQS_REENTER) &&
(process_flag != SQ_FILL) && (sqp->sq_first == NULL) &&
(sqp->sq_run == curthread) && (cnt == 1) &&
- (connp->conn_on_sqp == B_FALSE)) {
+ (sqp->sq_isip == B_FALSE ||
+ connp->conn_on_sqp == B_FALSE)) {
sqp->sq_state |= SQS_REENTER;
mutex_exit(&sqp->sq_lock);
@@ -602,15 +635,21 @@ squeue_enter(squeue_t *sqp, mblk_t *mp, mblk_t *tail, uint32_t cnt,
* Handle squeue switching. More details in the
* block comment at the top of the file
*/
- if (connp->conn_sqp == sqp) {
- connp->conn_on_sqp = B_TRUE;
+ if (sqp->sq_isip == B_FALSE || connp->conn_sqp == sqp) {
+ SQUEUE_DBG_SET(sqp, mp, proc, connp,
+ tag);
+ if (sqp->sq_isip == B_TRUE)
+ connp->conn_on_sqp = B_TRUE;
DTRACE_PROBE3(squeue__proc__start, squeue_t *,
sqp, mblk_t *, mp, conn_t *, connp);
(*proc)(connp, mp, sqp, ira);
DTRACE_PROBE2(squeue__proc__end, squeue_t *,
sqp, conn_t *, connp);
- connp->conn_on_sqp = B_FALSE;
- CONN_DEC_REF(connp);
+ if (sqp->sq_isip == B_TRUE) {
+ connp->conn_on_sqp = B_FALSE;
+ CONN_DEC_REF(connp);
+ }
+ SQUEUE_DBG_CLEAR(sqp);
} else {
SQUEUE_ENTER_ONE(connp->conn_sqp, mp, proc,
connp, ira, SQ_FILL, SQTAG_SQUEUE_CHANGE);
@@ -631,7 +670,7 @@ squeue_enter(squeue_t *sqp, mblk_t *mp, mblk_t *tail, uint32_t cnt,
#ifdef DEBUG
mp->b_tag = tag;
#endif
- if (ira != NULL) {
+ if (sqp->sq_isip && ira != NULL) {
mblk_t *attrmp;
ASSERT(cnt == 1);
@@ -779,7 +818,7 @@ again:
mp->b_prev = NULL;
/* Is there an ip_recv_attr_t to handle? */
- if (ip_recv_attr_is_mblk(mp)) {
+ if (sqp->sq_isip == B_TRUE && ip_recv_attr_is_mblk(mp)) {
mblk_t *attrmp = mp;
ASSERT(attrmp->b_cont != NULL);
@@ -804,20 +843,25 @@ again:
/*
- * Handle squeue switching. More details in the
- * block comment at the top of the file
+ * Handle squeue switching. More details in the block comment at
+ * the top of the file. non-IP squeues cannot switch, as there
+ * is no conn_t.
*/
- if (connp->conn_sqp == sqp) {
+ if (sqp->sq_isip == B_FALSE || connp->conn_sqp == sqp) {
SQUEUE_DBG_SET(sqp, mp, proc, connp,
mp->b_tag);
- connp->conn_on_sqp = B_TRUE;
+ if (sqp->sq_isip == B_TRUE)
+ connp->conn_on_sqp = B_TRUE;
DTRACE_PROBE3(squeue__proc__start, squeue_t *,
sqp, mblk_t *, mp, conn_t *, connp);
(*proc)(connp, mp, sqp, ira);
DTRACE_PROBE2(squeue__proc__end, squeue_t *,
sqp, conn_t *, connp);
- connp->conn_on_sqp = B_FALSE;
- CONN_DEC_REF(connp);
+ if (sqp->sq_isip == B_TRUE) {
+ connp->conn_on_sqp = B_FALSE;
+ CONN_DEC_REF(connp);
+ }
+ SQUEUE_DBG_CLEAR(sqp);
} else {
SQUEUE_ENTER_ONE(connp->conn_sqp, mp, proc, connp, ira,
SQ_FILL, SQTAG_SQUEUE_CHANGE);
@@ -1051,6 +1095,11 @@ squeue_polling_thread(squeue_t *sqp)
cv_wait(async, lock);
CALLB_CPR_SAFE_END(&cprinfo, lock);
+ if (sqp->sq_state & SQS_EXIT) {
+ mutex_exit(lock);
+ thread_exit();
+ }
+
ctl_state = sqp->sq_state & (SQS_POLL_THR_CONTROL |
SQS_POLL_THR_QUIESCED);
if (ctl_state != 0) {
@@ -1076,6 +1125,9 @@ squeue_polling_thread(squeue_t *sqp)
(SQS_PROC|SQS_POLLING|SQS_GET_PKTS)) ==
(SQS_PROC|SQS_POLLING|SQS_GET_PKTS));
+ /* Only IP related squeues should reach this point */
+ VERIFY(sqp->sq_isip == B_TRUE);
+
poll_again:
sq_rx_ring = sqp->sq_rx_ring;
sq_get_pkts = sq_rx_ring->rr_rx;
@@ -1205,6 +1257,7 @@ squeue_worker_thr_control(squeue_t *sqp)
ill_rx_ring_t *rx_ring;
ASSERT(MUTEX_HELD(&sqp->sq_lock));
+ VERIFY(sqp->sq_isip == B_TRUE);
if (sqp->sq_state & SQS_POLL_RESTART) {
/* Restart implies a previous quiesce. */
@@ -1316,6 +1369,11 @@ squeue_worker(squeue_t *sqp)
for (;;) {
for (;;) {
+ if (sqp->sq_state & SQS_EXIT) {
+ mutex_exit(lock);
+ thread_exit();
+ }
+
/*
* If the poll thread has handed control to us
* we need to break out of the wait.
@@ -1412,6 +1470,7 @@ squeue_synch_enter(conn_t *connp, mblk_t *use_mp)
again:
sqp = connp->conn_sqp;
+ VERIFY(sqp->sq_isip == B_TRUE);
mutex_enter(&sqp->sq_lock);
if (sqp->sq_first == NULL && !(sqp->sq_state & SQS_PROC)) {
@@ -1487,6 +1546,7 @@ void
squeue_synch_exit(conn_t *connp)
{
squeue_t *sqp = connp->conn_sqp;
+ VERIFY(sqp->sq_isip == B_TRUE);
mutex_enter(&sqp->sq_lock);
if (sqp->sq_run == curthread) {
diff --git a/usr/src/uts/common/inet/tcp.h b/usr/src/uts/common/inet/tcp.h
index b2b9973291..6ec2e6b2d7 100644
--- a/usr/src/uts/common/inet/tcp.h
+++ b/usr/src/uts/common/inet/tcp.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, Joyent, Inc. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
* Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 by Delphix. All rights reserved.
*/
@@ -134,6 +134,7 @@ typedef struct tcphdra_s {
struct conn_s;
struct tcp_listen_cnt_s;
+struct tcp_rg_s;
/*
* Control structure for each open TCP stream,
@@ -404,6 +405,13 @@ typedef struct tcp_s {
struct tcp_s *tcp_bind_hash_port; /* tcp_t's bound to the same lport */
struct tcp_s **tcp_ptpbhn;
+ /*
+ * Group of tcp_t entries bound to the same adress and port via
+ * SO_REUSEPORT. The pointer itself is protected by tf_lock in the
+ * containing tcps_bind_fanout slot.
+ */
+ struct tcp_rg_s *tcp_rg_bind;
+
uint_t tcp_maxpsz_multiplier;
uint32_t tcp_lso_max; /* maximum LSO payload */
diff --git a/usr/src/uts/common/inet/tcp/tcp.c b/usr/src/uts/common/inet/tcp/tcp.c
index fba7125690..cf046c968e 100644
--- a/usr/src/uts/common/inet/tcp/tcp.c
+++ b/usr/src/uts/common/inet/tcp/tcp.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, Joyent Inc. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
* Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013,2014 by Delphix. All rights reserved.
* Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved.
@@ -1423,6 +1423,21 @@ tcp_free(tcp_t *tcp)
tcp_close_mpp(&tcp->tcp_conn.tcp_eager_conn_ind);
/*
+ * Destroy any association with SO_REUSEPORT group.
+ */
+ if (tcp->tcp_rg_bind != NULL) {
+ /*
+ * This is only necessary for connections which enabled
+ * SO_REUSEPORT but were never bound. Such connections should
+ * be the one and only member of the tcp_rg_tp to which they
+ * have been associated.
+ */
+ VERIFY(tcp_rg_remove(tcp->tcp_rg_bind, tcp));
+ tcp_rg_destroy(tcp->tcp_rg_bind);
+ tcp->tcp_rg_bind = NULL;
+ }
+
+ /*
* If this is a non-STREAM socket still holding on to an upper
* handle, release it. As a result of fallback we might also see
* STREAMS based conns with upper handles, in which case there is
@@ -2054,8 +2069,7 @@ tcp_reinit(tcp_t *tcp)
* structure!
*/
static void
-tcp_reinit_values(tcp)
- tcp_t *tcp;
+tcp_reinit_values(tcp_t *tcp)
{
tcp_stack_t *tcps = tcp->tcp_tcps;
conn_t *connp = tcp->tcp_connp;
diff --git a/usr/src/uts/common/inet/tcp/tcp_bind.c b/usr/src/uts/common/inet/tcp/tcp_bind.c
index c6df39b91e..adc201eebb 100644
--- a/usr/src/uts/common/inet/tcp/tcp_bind.c
+++ b/usr/src/uts/common/inet/tcp/tcp_bind.c
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
#include <sys/types.h>
@@ -55,6 +56,7 @@ static uint32_t tcp_random_anon_port = 1;
static int tcp_bind_select_lport(tcp_t *, in_port_t *, boolean_t,
cred_t *cr);
static in_port_t tcp_get_next_priv_port(const tcp_t *);
+static int tcp_rg_insert(tcp_rg_t *, struct tcp_s *);
/*
* Hash list insertion routine for tcp_t structures. Each hash bucket
@@ -172,6 +174,16 @@ tcp_bind_hash_remove(tcp_t *tcp)
ASSERT(lockp != NULL);
mutex_enter(lockp);
+
+ /* destroy any association with SO_REUSEPORT group */
+ if (tcp->tcp_rg_bind != NULL) {
+ if (tcp_rg_remove(tcp->tcp_rg_bind, tcp)) {
+ /* Last one out turns off the lights */
+ tcp_rg_destroy(tcp->tcp_rg_bind);
+ }
+ tcp->tcp_rg_bind = NULL;
+ }
+
if (tcp->tcp_ptpbhn) {
tcpnext = tcp->tcp_bind_hash_port;
if (tcpnext != NULL) {
@@ -636,13 +648,12 @@ tcp_bind_check(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
}
/*
- * If the "bind_to_req_port_only" parameter is set, if the requested port
- * number is available, return it, If not return 0
+ * If the "bind_to_req_port_only" parameter is set and the requested port
+ * number is available, return it (else return 0).
*
- * If "bind_to_req_port_only" parameter is not set and
- * If the requested port number is available, return it. If not, return
- * the first anonymous port we happen across. If no anonymous ports are
- * available, return 0. addr is the requested local address, if any.
+ * If "bind_to_req_port_only" parameter is not set and the requested port
+ * number is available, return it. If not, return the first anonymous port we
+ * happen across. If no anonymous ports are available, return 0.
*
* In either case, when succeeding update the tcp_t to record the port number
* and insert it in the bind hash table.
@@ -662,6 +673,7 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
int loopmax;
conn_t *connp = tcp->tcp_connp;
tcp_stack_t *tcps = tcp->tcp_tcps;
+ boolean_t reuseport = connp->conn_reuseport;
/*
* Lookup for free addresses is done in a loop and "loopmax"
@@ -698,6 +710,7 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
tf_t *tbf;
tcp_t *ltcp;
conn_t *lconnp;
+ boolean_t attempt_reuse = B_FALSE;
lport = htons(port);
@@ -724,6 +737,7 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
for (; ltcp != NULL; ltcp = ltcp->tcp_bind_hash_port) {
boolean_t not_socket;
boolean_t exclbind;
+ boolean_t addrmatch;
lconnp = ltcp->tcp_connp;
@@ -829,22 +843,34 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
&lconnp->conn_faddr_v6)))
continue;
+ addrmatch = IN6_ARE_ADDR_EQUAL(laddr,
+ &lconnp->conn_bound_addr_v6);
+
+ if (addrmatch && reuseport && bind_to_req_port_only &&
+ (ltcp->tcp_state == TCPS_BOUND ||
+ ltcp->tcp_state == TCPS_LISTEN)) {
+ /*
+ * This entry is bound to the exact same
+ * address and port. If SO_REUSEPORT is set on
+ * the calling socket, attempt to reuse this
+ * binding if it too appears to be willing.
+ */
+ attempt_reuse = B_TRUE;
+ break;
+ }
+
if (!reuseaddr) {
/*
- * No socket option SO_REUSEADDR.
- * If existing port is bound to
- * a non-wildcard IP address
- * and the requesting stream is
- * bound to a distinct
- * different IP addresses
- * (non-wildcard, also), keep
- * going.
+ * No socket option SO_REUSEADDR. If an
+ * existing port is bound to a non-wildcard IP
+ * address and the requesting stream is bound
+ * to a distinct different IP address
+ * (non-wildcard, also), keep going.
*/
if (!V6_OR_V4_INADDR_ANY(*laddr) &&
!V6_OR_V4_INADDR_ANY(
lconnp->conn_bound_addr_v6) &&
- !IN6_ARE_ADDR_EQUAL(laddr,
- &lconnp->conn_bound_addr_v6))
+ !addrmatch)
continue;
if (ltcp->tcp_state >= TCPS_BOUND) {
/*
@@ -859,27 +885,47 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
* socket option SO_REUSEADDR is set on the
* binding tcp_t.
*
- * If two streams are bound to
- * same IP address or both addr
- * and bound source are wildcards
- * (INADDR_ANY), we want to stop
- * searching.
- * We have found a match of IP source
- * address and source port, which is
- * refused regardless of the
- * SO_REUSEADDR setting, so we break.
+ * If two streams are bound to the same IP
+ * address or both addr and bound source are
+ * wildcards (INADDR_ANY), we want to stop
+ * searching. We have found a match of IP
+ * source address and source port, which is
+ * refused regardless of the SO_REUSEADDR
+ * setting, so we break.
*/
- if (IN6_ARE_ADDR_EQUAL(laddr,
- &lconnp->conn_bound_addr_v6) &&
+ if (addrmatch &&
(ltcp->tcp_state == TCPS_LISTEN ||
ltcp->tcp_state == TCPS_BOUND))
break;
}
}
- if (ltcp != NULL) {
+ if (ltcp != NULL && !attempt_reuse) {
/* The port number is busy */
mutex_exit(&tbf->tf_lock);
} else {
+ if (attempt_reuse) {
+ int err;
+
+ ASSERT(ltcp != NULL);
+ ASSERT(ltcp->tcp_rg_bind != NULL);
+ ASSERT(tcp->tcp_rg_bind != NULL);
+ ASSERT(ltcp->tcp_rg_bind != tcp->tcp_rg_bind);
+
+ err = tcp_rg_insert(ltcp->tcp_rg_bind, tcp);
+ if (err != 0) {
+ mutex_exit(&tbf->tf_lock);
+ return (0);
+ }
+ /*
+ * Now that the newly-binding socket has joined
+ * the existing reuseport group on ltcp, it
+ * should clean up its own (empty) group.
+ */
+ VERIFY(tcp_rg_remove(tcp->tcp_rg_bind, tcp));
+ tcp_rg_destroy(tcp->tcp_rg_bind);
+ tcp->tcp_rg_bind = ltcp->tcp_rg_bind;
+ }
+
/*
* This port is ours. Insert in fanout and mark as
* bound to prevent others from getting the port
@@ -944,3 +990,125 @@ tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
} while (++count < loopmax);
return (0);
}
+
+/* Max number of members in TCP SO_REUSEPORT group */
+#define TCP_RG_SIZE_MAX 64
+/* Step size when expanding members array */
+#define TCP_RG_SIZE_STEP 2
+
+
+tcp_rg_t *
+tcp_rg_init(tcp_t *tcp)
+{
+ tcp_rg_t *rg;
+ rg = kmem_alloc(sizeof (tcp_rg_t), KM_NOSLEEP|KM_NORMALPRI);
+ if (rg == NULL)
+ return (NULL);
+ rg->tcprg_members = kmem_zalloc(2 * sizeof (tcp_t *),
+ KM_NOSLEEP|KM_NORMALPRI);
+ if (rg->tcprg_members == NULL) {
+ kmem_free(rg, sizeof (tcp_rg_t));
+ return (NULL);
+ }
+
+ mutex_init(&rg->tcprg_lock, NULL, MUTEX_DEFAULT, NULL);
+ rg->tcprg_size = 2;
+ rg->tcprg_count = 1;
+ rg->tcprg_active = 1;
+ rg->tcprg_members[0] = tcp;
+ return (rg);
+}
+
+void
+tcp_rg_destroy(tcp_rg_t *rg)
+{
+ mutex_enter(&rg->tcprg_lock);
+ ASSERT(rg->tcprg_count == 0);
+ ASSERT(rg->tcprg_active == 0);
+ kmem_free(rg->tcprg_members, rg->tcprg_size * sizeof (tcp_t *));
+ mutex_destroy(&rg->tcprg_lock);
+ kmem_free(rg, sizeof (struct tcp_rg_s));
+}
+
+static int
+tcp_rg_insert(tcp_rg_t *rg, tcp_t *tcp)
+{
+ mutex_enter(&rg->tcprg_lock);
+
+ VERIFY(rg->tcprg_size > 0);
+ VERIFY(rg->tcprg_count <= rg->tcprg_size);
+ if (rg->tcprg_count != 0) {
+ cred_t *oldcred = rg->tcprg_members[0]->tcp_connp->conn_cred;
+ cred_t *newcred = tcp->tcp_connp->conn_cred;
+
+ if (crgetuid(oldcred) != crgetuid(newcred) ||
+ crgetzoneid(oldcred) != crgetzoneid(newcred)) {
+ mutex_exit(&rg->tcprg_lock);
+ return (EPERM);
+ }
+ }
+
+ if (rg->tcprg_count == rg->tcprg_size) {
+ unsigned int oldalloc = rg->tcprg_size * sizeof (tcp_t *);
+ unsigned int newsize = rg->tcprg_size + TCP_RG_SIZE_STEP;
+ tcp_t **newmembers;
+
+ if (newsize > TCP_RG_SIZE_MAX) {
+ mutex_exit(&rg->tcprg_lock);
+ return (EINVAL);
+ }
+ newmembers = kmem_zalloc(newsize * sizeof (tcp_t *),
+ KM_NOSLEEP|KM_NORMALPRI);
+ if (newmembers == NULL) {
+ mutex_exit(&rg->tcprg_lock);
+ return (ENOMEM);
+ }
+ bcopy(rg->tcprg_members, newmembers, oldalloc);
+ kmem_free(rg->tcprg_members, oldalloc);
+ rg->tcprg_members = newmembers;
+ rg->tcprg_size = newsize;
+ }
+
+ rg->tcprg_members[rg->tcprg_count] = tcp;
+ rg->tcprg_count++;
+ rg->tcprg_active++;
+
+ mutex_exit(&rg->tcprg_lock);
+ return (0);
+}
+
+boolean_t
+tcp_rg_remove(tcp_rg_t *rg, tcp_t *tcp)
+{
+ int i;
+ boolean_t is_empty;
+
+ mutex_enter(&rg->tcprg_lock);
+ for (i = 0; i < rg->tcprg_count; i++) {
+ if (rg->tcprg_members[i] == tcp)
+ break;
+ }
+ /* The item should be present */
+ ASSERT(i < rg->tcprg_count);
+ /* Move the last member into this position */
+ rg->tcprg_count--;
+ rg->tcprg_members[i] = rg->tcprg_members[rg->tcprg_count];
+ rg->tcprg_members[rg->tcprg_count] = NULL;
+ if (tcp->tcp_connp->conn_reuseport != 0)
+ rg->tcprg_active--;
+ is_empty = (rg->tcprg_count == 0);
+ mutex_exit(&rg->tcprg_lock);
+ return (is_empty);
+}
+
+void
+tcp_rg_setactive(tcp_rg_t *rg, boolean_t is_active)
+{
+ mutex_enter(&rg->tcprg_lock);
+ if (is_active) {
+ rg->tcprg_active++;
+ } else {
+ rg->tcprg_active--;
+ }
+ mutex_exit(&rg->tcprg_lock);
+}
diff --git a/usr/src/uts/common/inet/tcp/tcp_input.c b/usr/src/uts/common/inet/tcp/tcp_input.c
index cf8e0c6bd4..7cfdb9a4a2 100644
--- a/usr/src/uts/common/inet/tcp/tcp_input.c
+++ b/usr/src/uts/common/inet/tcp/tcp_input.c
@@ -22,7 +22,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2011 Joyent, Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
* Copyright (c) 2014 by Delphix. All rights reserved.
*/
@@ -99,7 +99,7 @@
* tcps_time_wait_interval since the period before upper layer closes the
* connection is not accounted for when tcp_time_wait_append() is called.
*
- * If uppser layer has closed the connection, call tcp_time_wait_append()
+ * If upper layer has closed the connection, call tcp_time_wait_append()
* directly.
*
*/
diff --git a/usr/src/uts/common/inet/tcp/tcp_opt_data.c b/usr/src/uts/common/inet/tcp/tcp_opt_data.c
index 1a5363bedc..835acd1b12 100644
--- a/usr/src/uts/common/inet/tcp/tcp_opt_data.c
+++ b/usr/src/uts/common/inet/tcp/tcp_opt_data.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
*/
#include <sys/types.h>
@@ -62,7 +63,8 @@ opdes_t tcp_opt_arr[] = {
{ SO_USELOOPBACK, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
},
{ SO_BROADCAST, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
-{ SO_REUSEADDR, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
+{ SO_REUSEADDR, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
+{ SO_REUSEPORT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
{ SO_OOBINLINE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
{ SO_TYPE, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
{ SO_SNDBUF, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
@@ -483,6 +485,42 @@ tcp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
return (retval);
}
+static int
+tcp_set_reuseport(conn_t *connp, boolean_t do_enable)
+{
+ tcp_t *tcp = connp->conn_tcp;
+ struct tcp_rg_s *rg;
+
+ if (do_enable && !IPCL_IS_NONSTR(connp)) {
+ /*
+ * SO_REUSEPORT cannot be enabled on sockets which have fallen
+ * back to the STREAMS API.
+ */
+ return (EINVAL);
+ }
+ if (connp->conn_reuseport == 0 && do_enable) {
+ /* disabled -> enabled */
+ if (tcp->tcp_rg_bind != NULL) {
+ tcp_rg_setactive(tcp->tcp_rg_bind, do_enable);
+ } else {
+ if (tcp->tcp_state >= TCPS_BOUND ||
+ tcp->tcp_state <= TCPS_CLOSED)
+ return (EINVAL);
+ if ((rg = tcp_rg_init(tcp)) == NULL)
+ return (ENOMEM);
+ tcp->tcp_rg_bind = rg;
+ }
+ connp->conn_reuseport = 1;
+ } else if (connp->conn_reuseport != 0 && !do_enable) {
+ /* enabled -> disabled */
+ if (tcp->tcp_rg_bind != NULL) {
+ tcp_rg_setactive(tcp->tcp_rg_bind, do_enable);
+ }
+ connp->conn_reuseport = 0;
+ }
+ return (0);
+}
+
/*
* We declare as 'int' rather than 'void' to satisfy pfi_t arg requirements.
* Parameters are assumed to be verified by the caller.
@@ -653,6 +691,11 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
}
*outlenp = inlen;
return (0);
+ case SO_REUSEPORT:
+ if (!checkonly) {
+ return (tcp_set_reuseport(connp, *i1 != 0));
+ }
+ return (0);
}
break;
case IPPROTO_TCP:
@@ -769,14 +812,37 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
if (*i1 == 0) {
return (EINVAL);
} else if (tcp->tcp_ka_rinterval == 0) {
- if ((tcp->tcp_ka_abort_thres / *i1) <
- tcp->tcp_rto_min ||
- (tcp->tcp_ka_abort_thres / *i1) >
- tcp->tcp_rto_max)
- return (EINVAL);
+ /*
+ * When TCP_KEEPCNT is specified without first
+ * specifying a TCP_KEEPINTVL, we infer an
+ * interval based on a tunable specific to our
+ * stack: the tcp_keepalive_abort_interval.
+ * (Or the TCP_KEEPALIVE_ABORT_THRESHOLD, in
+ * the unlikely event that that has been set.)
+ * Given the abort interval's default value of
+ * 480 seconds, low TCP_KEEPCNT values can
+ * result in intervals that exceed the default
+ * maximum RTO of 60 seconds. Rather than
+ * fail in these cases, we (implicitly) clamp
+ * the interval at the maximum RTO; if the
+ * TCP_KEEPCNT is shortly followed by a
+ * TCP_KEEPINTVL (as we expect), the abort
+ * threshold will be recalculated correctly --
+ * and if a TCP_KEEPINTVL is not forthcoming,
+ * keep-alive will at least operate reasonably
+ * given the underconfigured state.
+ */
+ uint32_t interval;
- tcp->tcp_ka_rinterval =
- tcp->tcp_ka_abort_thres / *i1;
+ interval = tcp->tcp_ka_abort_thres / *i1;
+
+ if (interval < tcp->tcp_rto_min)
+ interval = tcp->tcp_rto_min;
+
+ if (interval > tcp->tcp_rto_max)
+ interval = tcp->tcp_rto_max;
+
+ tcp->tcp_ka_rinterval = interval;
} else {
if ((*i1 * tcp->tcp_ka_rinterval) <
tcps->tcps_keepalive_abort_interval_low ||
@@ -953,10 +1019,6 @@ tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
}
break;
case IPPROTO_IP:
- if (connp->conn_family != AF_INET) {
- *outlenp = 0;
- return (EINVAL);
- }
switch (name) {
case IP_SEC_OPT:
/*
diff --git a/usr/src/uts/common/inet/tcp/tcp_socket.c b/usr/src/uts/common/inet/tcp/tcp_socket.c
index a431bf63d1..8f535a5dd1 100644
--- a/usr/src/uts/common/inet/tcp/tcp_socket.c
+++ b/usr/src/uts/common/inet/tcp/tcp_socket.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
/* This file contains all TCP kernel socket related functions. */
@@ -1022,6 +1023,16 @@ tcp_fallback(sock_lower_handle_t proto_handle, queue_t *q,
}
/*
+ * Do not allow fallback on connections making use of SO_REUSEPORT.
+ */
+ if (tcp->tcp_rg_bind != NULL) {
+ freeb(stropt_mp);
+ freeb(ordrel_mp);
+ squeue_synch_exit(connp);
+ return (EINVAL);
+ }
+
+ /*
* Both endpoints must be of the same type (either STREAMS or
* non-STREAMS) for fusion to be enabled. So if we are fused,
* we have to unfuse.
diff --git a/usr/src/uts/common/inet/tcp/tcp_time_wait.c b/usr/src/uts/common/inet/tcp/tcp_time_wait.c
index b470934da0..6600296b18 100644
--- a/usr/src/uts/common/inet/tcp/tcp_time_wait.c
+++ b/usr/src/uts/common/inet/tcp/tcp_time_wait.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, Joyent Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
*/
/*
@@ -41,13 +41,13 @@
#include <inet/tcp_impl.h>
#include <inet/tcp_cluster.h>
-static void tcp_timewait_close(void *, mblk_t *, void *, ip_recv_attr_t *);
+static void tcp_time_wait_purge(tcp_t *, tcp_squeue_priv_t *);
+
+#define TW_BUCKET(t) \
+ (((t) / MSEC_TO_TICK(TCP_TIME_WAIT_DELAY)) % TCP_TIME_WAIT_BUCKETS)
+
+#define TW_BUCKET_NEXT(b) (((b) + 1) % TCP_TIME_WAIT_BUCKETS)
-/*
- * TCP_TIME_WAIT_DELAY governs how often the time_wait_collector runs.
- * Running it every 5 seconds seems to give the best results.
- */
-#define TCP_TIME_WAIT_DELAY ((hrtime_t)5 * NANOSEC)
/*
* Remove a connection from the list of detached TIME_WAIT connections.
@@ -56,17 +56,17 @@ static void tcp_timewait_close(void *, mblk_t *, void *, ip_recv_attr_t *);
* earlier call to tcp_time_wait_remove(); otherwise it returns B_TRUE.
*/
boolean_t
-tcp_time_wait_remove(tcp_t *tcp, tcp_squeue_priv_t *tcp_time_wait)
+tcp_time_wait_remove(tcp_t *tcp, tcp_squeue_priv_t *tsp)
{
boolean_t locked = B_FALSE;
- if (tcp_time_wait == NULL) {
- tcp_time_wait = *((tcp_squeue_priv_t **)
+ if (tsp == NULL) {
+ tsp = *((tcp_squeue_priv_t **)
squeue_getprivate(tcp->tcp_connp->conn_sqp, SQPRIVATE_TCP));
- mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
+ mutex_enter(&tsp->tcp_time_wait_lock);
locked = B_TRUE;
} else {
- ASSERT(MUTEX_HELD(&tcp_time_wait->tcp_time_wait_lock));
+ ASSERT(MUTEX_HELD(&tsp->tcp_time_wait_lock));
}
/* 0 means that the tcp_t has not been added to the time wait list. */
@@ -74,40 +74,34 @@ tcp_time_wait_remove(tcp_t *tcp, tcp_squeue_priv_t *tcp_time_wait)
ASSERT(tcp->tcp_time_wait_next == NULL);
ASSERT(tcp->tcp_time_wait_prev == NULL);
if (locked)
- mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
+ mutex_exit(&tsp->tcp_time_wait_lock);
return (B_FALSE);
}
ASSERT(TCP_IS_DETACHED(tcp));
ASSERT(tcp->tcp_state == TCPS_TIME_WAIT);
+ ASSERT(tsp->tcp_time_wait_cnt > 0);
- if (tcp == tcp_time_wait->tcp_time_wait_head) {
- ASSERT(tcp->tcp_time_wait_prev == NULL);
- tcp_time_wait->tcp_time_wait_head = tcp->tcp_time_wait_next;
- if (tcp_time_wait->tcp_time_wait_head != NULL) {
- tcp_time_wait->tcp_time_wait_head->tcp_time_wait_prev =
- NULL;
- } else {
- tcp_time_wait->tcp_time_wait_tail = NULL;
- }
- } else if (tcp == tcp_time_wait->tcp_time_wait_tail) {
- ASSERT(tcp->tcp_time_wait_next == NULL);
- tcp_time_wait->tcp_time_wait_tail = tcp->tcp_time_wait_prev;
- ASSERT(tcp_time_wait->tcp_time_wait_tail != NULL);
- tcp_time_wait->tcp_time_wait_tail->tcp_time_wait_next = NULL;
- } else {
- ASSERT(tcp->tcp_time_wait_prev->tcp_time_wait_next == tcp);
- ASSERT(tcp->tcp_time_wait_next->tcp_time_wait_prev == tcp);
- tcp->tcp_time_wait_prev->tcp_time_wait_next =
- tcp->tcp_time_wait_next;
+ if (tcp->tcp_time_wait_next != NULL) {
tcp->tcp_time_wait_next->tcp_time_wait_prev =
tcp->tcp_time_wait_prev;
}
+ if (tcp->tcp_time_wait_prev != NULL) {
+ tcp->tcp_time_wait_prev->tcp_time_wait_next =
+ tcp->tcp_time_wait_next;
+ } else {
+ unsigned int bucket;
+
+ bucket = TW_BUCKET(tcp->tcp_time_wait_expire);
+ ASSERT(tsp->tcp_time_wait_bucket[bucket] == tcp);
+ tsp->tcp_time_wait_bucket[bucket] = tcp->tcp_time_wait_next;
+ }
tcp->tcp_time_wait_next = NULL;
tcp->tcp_time_wait_prev = NULL;
tcp->tcp_time_wait_expire = 0;
+ tsp->tcp_time_wait_cnt--;
if (locked)
- mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
+ mutex_exit(&tsp->tcp_time_wait_lock);
return (B_TRUE);
}
@@ -126,6 +120,7 @@ tcp_time_wait_remove(tcp_t *tcp, tcp_squeue_priv_t *tcp_time_wait)
((x)->tcp_connp->conn_ipversion == IPV6_VERSION && \
IN6_IS_ADDR_LOOPBACK(&(x)->tcp_connp->conn_laddr_v6)))
+
/*
* Add a connection to the list of detached TIME_WAIT connections
* and set its time to expire.
@@ -135,9 +130,10 @@ tcp_time_wait_append(tcp_t *tcp)
{
tcp_stack_t *tcps = tcp->tcp_tcps;
squeue_t *sqp = tcp->tcp_connp->conn_sqp;
- tcp_squeue_priv_t *tcp_time_wait =
+ tcp_squeue_priv_t *tsp =
*((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP));
- hrtime_t firetime = 0;
+ int64_t now, schedule;
+ unsigned int bucket;
tcp_timers_stop(tcp);
@@ -146,6 +142,8 @@ tcp_time_wait_append(tcp_t *tcp)
ASSERT(tcp->tcp_ack_tid == 0);
/* must have happened at the time of detaching the tcp */
+ ASSERT(TCP_IS_DETACHED(tcp));
+ ASSERT(tcp->tcp_state == TCPS_TIME_WAIT);
ASSERT(tcp->tcp_ptpahn == NULL);
ASSERT(tcp->tcp_flow_stopped == 0);
ASSERT(tcp->tcp_time_wait_next == NULL);
@@ -153,97 +151,112 @@ tcp_time_wait_append(tcp_t *tcp)
ASSERT(tcp->tcp_time_wait_expire == 0);
ASSERT(tcp->tcp_listener == NULL);
- tcp->tcp_time_wait_expire = ddi_get_lbolt64();
- if (IS_LOCAL_HOST(tcp)) {
- /*
- * This is the fastpath for handling localhost connections.
- * Since we don't have to worry about packets on the localhost
- * showing up after a long network delay, we want to expire
- * these quickly so the port range on the localhost doesn't
- * get starved by short-running, local apps.
- *
- * Leave tcp_time_wait_expire at the current time. This
- * essentially means the connection is expired now and it will
- * clean up the next time tcp_time_wait_collector runs. We set
- * firetime to use a short delay so that if we have to start a
- * tcp_time_wait_collector thread below, it runs soon instead
- * of after a delay of time_wait_interval. firetime being set
- * to a non-0 value is also our indicator that we should add
- * this connection to the head of the time wait list (since we
- * are already expired) so that its sure to get cleaned up on
- * the next run of tcp_time_wait_collector (which expects the
- * entries to appear in time-order and stops when it hits the
- * first non-expired entry).
- */
- firetime = TCP_TIME_WAIT_DELAY;
- } else {
- /*
- * Since tcp_time_wait_expire is lbolt64, it should not wrap
- * around in practice. Hence it cannot be 0. Note that zero
- * means that the tcp_t is not in the TIME_WAIT list.
- */
- tcp->tcp_time_wait_expire += MSEC_TO_TICK(
- tcps->tcps_time_wait_interval);
+ TCP_DBGSTAT(tcps, tcp_time_wait);
+ mutex_enter(&tsp->tcp_time_wait_lock);
+
+ /*
+ * Immediately expire loopback connections. Since there is no worry
+ * about packets on the local host showing up after a long network
+ * delay, this is safe and allows much higher rates of connection churn
+ * for applications operating locally.
+ *
+ * This typically bypasses the tcp_free_list fast path due to squeue
+ * re-entry for the loopback close operation.
+ */
+ if (tcp->tcp_loopback) {
+ tcp_time_wait_purge(tcp, tsp);
+ mutex_exit(&tsp->tcp_time_wait_lock);
+ return;
}
- ASSERT(TCP_IS_DETACHED(tcp));
- ASSERT(tcp->tcp_state == TCPS_TIME_WAIT);
- ASSERT(tcp->tcp_time_wait_next == NULL);
- ASSERT(tcp->tcp_time_wait_prev == NULL);
- TCP_DBGSTAT(tcps, tcp_time_wait);
+ /*
+ * In order to reap TIME_WAITs reliably, we should use a source of time
+ * that is not adjustable by the user. While it would be more accurate
+ * to grab this timestamp before (potentially) sleeping on the
+ * tcp_time_wait_lock, doing so complicates bucket addressing later.
+ */
+ now = ddi_get_lbolt64();
- mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
- if (tcp_time_wait->tcp_time_wait_head == NULL) {
- ASSERT(tcp_time_wait->tcp_time_wait_tail == NULL);
- tcp_time_wait->tcp_time_wait_head = tcp;
+ /*
+ * Each squeue uses an arbitrary time offset when scheduling
+ * expiration timers. This prevents the bucketing from forcing
+ * tcp_time_wait_collector to run in locksetup across squeues.
+ *
+ * This offset is (re)initialized when a new TIME_WAIT connection is
+ * added to an squeue which has no connections waiting to expire.
+ */
+ if (tsp->tcp_time_wait_tid == 0) {
+ ASSERT(tsp->tcp_time_wait_cnt == 0);
+ tsp->tcp_time_wait_offset =
+ now % MSEC_TO_TICK(TCP_TIME_WAIT_DELAY);
+ }
+ now -= tsp->tcp_time_wait_offset;
+
+ /*
+ * Use the netstack-defined timeout, rounded up to the minimum
+ * time_wait_collector interval.
+ */
+ schedule = now + MSEC_TO_TICK(tcps->tcps_time_wait_interval);
+ tcp->tcp_time_wait_expire = schedule;
+
+ /*
+ * Append the connection into the appropriate bucket.
+ */
+ bucket = TW_BUCKET(tcp->tcp_time_wait_expire);
+ tcp->tcp_time_wait_next = tsp->tcp_time_wait_bucket[bucket];
+ tsp->tcp_time_wait_bucket[bucket] = tcp;
+ if (tcp->tcp_time_wait_next != NULL) {
+ ASSERT(tcp->tcp_time_wait_next->tcp_time_wait_prev == NULL);
+ tcp->tcp_time_wait_next->tcp_time_wait_prev = tcp;
+ }
+ tsp->tcp_time_wait_cnt++;
+
+ /*
+ * Round delay up to the nearest bucket boundary.
+ */
+ schedule += MSEC_TO_TICK(TCP_TIME_WAIT_DELAY);
+ schedule -= schedule % MSEC_TO_TICK(TCP_TIME_WAIT_DELAY);
+
+ /*
+ * The newly inserted entry may require a tighter schedule for the
+ * expiration timer.
+ */
+ if (schedule < tsp->tcp_time_wait_schedule) {
+ callout_id_t old_tid = tsp->tcp_time_wait_tid;
+
+ tsp->tcp_time_wait_schedule = schedule;
+ tsp->tcp_time_wait_tid =
+ timeout_generic(CALLOUT_NORMAL,
+ tcp_time_wait_collector, sqp,
+ TICK_TO_NSEC(schedule - now),
+ CALLOUT_TCP_RESOLUTION, CALLOUT_FLAG_ROUNDUP);
/*
- * Even if the list was empty before, there may be a timer
- * running since a tcp_t can be removed from the list
- * in other places, such as tcp_clean_death(). So check if
- * a timer is needed.
- */
- if (tcp_time_wait->tcp_time_wait_tid == 0) {
- if (firetime == 0)
- firetime = (hrtime_t)
- (tcps->tcps_time_wait_interval + 1) *
- MICROSEC;
-
- tcp_time_wait->tcp_time_wait_tid =
- timeout_generic(CALLOUT_NORMAL,
- tcp_time_wait_collector, sqp, firetime,
- CALLOUT_TCP_RESOLUTION, CALLOUT_FLAG_ROUNDUP);
- }
- tcp_time_wait->tcp_time_wait_tail = tcp;
- } else {
- /*
- * The list is not empty, so a timer must be running. If not,
- * tcp_time_wait_collector() must be running on this
- * tcp_time_wait list at the same time.
+ * It is possible for the timer to fire before the untimeout
+ * action is able to complete. In that case, the exclusion
+ * offered by the tcp_time_wait_collector_active flag will
+ * prevent multiple collector threads from processing records
+ * simultaneously from the same squeue.
*/
- ASSERT(tcp_time_wait->tcp_time_wait_tid != 0 ||
- tcp_time_wait->tcp_time_wait_running);
- ASSERT(tcp_time_wait->tcp_time_wait_tail != NULL);
- ASSERT(tcp_time_wait->tcp_time_wait_tail->tcp_state ==
- TCPS_TIME_WAIT);
-
- if (firetime == 0) {
- /* add at end */
- tcp_time_wait->tcp_time_wait_tail->tcp_time_wait_next =
- tcp;
- tcp->tcp_time_wait_prev =
- tcp_time_wait->tcp_time_wait_tail;
- tcp_time_wait->tcp_time_wait_tail = tcp;
- } else {
- /* add at head */
- tcp->tcp_time_wait_next =
- tcp_time_wait->tcp_time_wait_head;
- tcp_time_wait->tcp_time_wait_head->tcp_time_wait_prev =
- tcp;
- tcp_time_wait->tcp_time_wait_head = tcp;
- }
+ mutex_exit(&tsp->tcp_time_wait_lock);
+ (void) untimeout_default(old_tid, 0);
+ return;
+ }
+
+ /*
+ * Start a fresh timer if none exists.
+ */
+ if (tsp->tcp_time_wait_schedule == 0) {
+ ASSERT(tsp->tcp_time_wait_tid == 0);
+
+ tsp->tcp_time_wait_schedule = schedule;
+ tsp->tcp_time_wait_tid =
+ timeout_generic(CALLOUT_NORMAL,
+ tcp_time_wait_collector, sqp,
+ TICK_TO_NSEC(schedule - now),
+ CALLOUT_TCP_RESOLUTION, CALLOUT_FLAG_ROUNDUP);
}
- mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
+ mutex_exit(&tsp->tcp_time_wait_lock);
}
/*
@@ -278,216 +291,287 @@ tcp_timewait_close(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
tcp_close_detached(tcp);
}
+
+static void
+tcp_time_wait_purge(tcp_t *tcp, tcp_squeue_priv_t *tsp)
+{
+ mblk_t *mp;
+ conn_t *connp = tcp->tcp_connp;
+ kmutex_t *lock;
+
+ ASSERT(MUTEX_HELD(&tsp->tcp_time_wait_lock));
+ ASSERT(connp->conn_fanout != NULL);
+
+ lock = &connp->conn_fanout->connf_lock;
+
+ /*
+ * This is essentially a TIME_WAIT reclaim fast path optimization for
+ * performance where the connection is checked under the fanout lock
+ * (so that no one else can get access to the conn_t) that the refcnt
+ * is 2 (one each for TCP and the classifier hash list). That is the
+ * case and clustering callbacks are not enabled, the conn can be
+ * removed under the fanout lock and avoid clean-up under the squeue.
+ *
+ * This optimization is forgone when clustering is enabled since the
+ * clustering callback must be made before setting the CONDEMNED flag
+ * and after dropping all locks
+ *
+ * See the comments in tcp_closei_local for additional information
+ * regarding the refcnt logic.
+ */
+ if (mutex_tryenter(lock)) {
+ mutex_enter(&connp->conn_lock);
+ if (connp->conn_ref == 2 && cl_inet_disconnect == NULL) {
+ ipcl_hash_remove_locked(connp, connp->conn_fanout);
+ /*
+ * Set the CONDEMNED flag now itself so that the refcnt
+ * cannot increase due to any walker.
+ */
+ connp->conn_state_flags |= CONN_CONDEMNED;
+ mutex_exit(&connp->conn_lock);
+ mutex_exit(lock);
+ if (tsp->tcp_free_list_cnt < tcp_free_list_max_cnt) {
+ /*
+ * Add to head of tcp_free_list
+ */
+ tcp_cleanup(tcp);
+ ASSERT(connp->conn_latch == NULL);
+ ASSERT(connp->conn_policy == NULL);
+ ASSERT(tcp->tcp_tcps == NULL);
+ ASSERT(connp->conn_netstack == NULL);
+
+ tcp->tcp_time_wait_next = tsp->tcp_free_list;
+ tcp->tcp_in_free_list = B_TRUE;
+ tsp->tcp_free_list = tcp;
+ tsp->tcp_free_list_cnt++;
+ } else {
+ /*
+ * Do not add to tcp_free_list
+ */
+ tcp_bind_hash_remove(tcp);
+ ixa_cleanup(tcp->tcp_connp->conn_ixa);
+ tcp_ipsec_cleanup(tcp);
+ CONN_DEC_REF(tcp->tcp_connp);
+ }
+
+ /*
+ * With the fast-path complete, we can bail.
+ */
+ return;
+ } else {
+ /*
+ * Fall back to slow path.
+ */
+ CONN_INC_REF_LOCKED(connp);
+ mutex_exit(&connp->conn_lock);
+ mutex_exit(lock);
+ }
+ } else {
+ CONN_INC_REF(connp);
+ }
+
+ /*
+ * We can reuse the closemp here since conn has detached (otherwise we
+ * wouldn't even be in time_wait list). It is safe to change
+ * tcp_closemp_used without taking a lock as no other thread can
+ * concurrently access it at this point in the connection lifecycle.
+ */
+ if (tcp->tcp_closemp.b_prev == NULL) {
+ tcp->tcp_closemp_used = B_TRUE;
+ } else {
+ cmn_err(CE_PANIC,
+ "tcp_timewait_collector: concurrent use of tcp_closemp: "
+ "connp %p tcp %p\n", (void *)connp, (void *)tcp);
+ }
+
+ TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
+ mp = &tcp->tcp_closemp;
+ mutex_exit(&tsp->tcp_time_wait_lock);
+ SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_timewait_close, connp, NULL,
+ SQ_FILL, SQTAG_TCP_TIMEWAIT);
+ mutex_enter(&tsp->tcp_time_wait_lock);
+}
+
/*
- * Blows away all tcps whose TIME_WAIT has expired. List traversal
- * is done forwards from the head.
- * This walks all stack instances since
- * tcp_time_wait remains global across all stacks.
+ * Purge any tcp_t instances associated with this squeue which have expired
+ * from the TIME_WAIT state.
*/
-/* ARGSUSED */
void
tcp_time_wait_collector(void *arg)
{
tcp_t *tcp;
- int64_t now;
- mblk_t *mp;
- conn_t *connp;
- kmutex_t *lock;
- boolean_t removed;
- extern void (*cl_inet_disconnect)(netstackid_t, uint8_t, sa_family_t,
- uint8_t *, in_port_t, uint8_t *, in_port_t, void *);
+ int64_t now, active_schedule, new_schedule;
+ unsigned int idx;
squeue_t *sqp = (squeue_t *)arg;
- tcp_squeue_priv_t *tcp_time_wait =
+ tcp_squeue_priv_t *tsp =
*((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP));
- mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
- tcp_time_wait->tcp_time_wait_tid = 0;
-#ifdef DEBUG
- tcp_time_wait->tcp_time_wait_running = B_TRUE;
-#endif
+ mutex_enter(&tsp->tcp_time_wait_lock);
+
+ /*
+ * Because of timer scheduling complexity and the fact that the
+ * tcp_time_wait_lock is dropped during tcp_time_wait_purge, it is
+ * possible for multiple tcp_time_wait_collector threads to run against
+ * the same squeue. This flag is used to exclude other collectors from
+ * the squeue during execution.
+ */
+ if (tsp->tcp_time_wait_collector_active) {
+ mutex_exit(&tsp->tcp_time_wait_lock);
+ return;
+ }
+ tsp->tcp_time_wait_collector_active = B_TRUE;
- if (tcp_time_wait->tcp_free_list != NULL &&
- tcp_time_wait->tcp_free_list->tcp_in_free_list == B_TRUE) {
+ /*
+ * Purge the free list if necessary
+ */
+ if (tsp->tcp_free_list != NULL) {
TCP_G_STAT(tcp_freelist_cleanup);
- while ((tcp = tcp_time_wait->tcp_free_list) != NULL) {
- tcp_time_wait->tcp_free_list = tcp->tcp_time_wait_next;
+ while ((tcp = tsp->tcp_free_list) != NULL) {
+ tsp->tcp_free_list = tcp->tcp_time_wait_next;
tcp->tcp_time_wait_next = NULL;
- tcp_time_wait->tcp_free_list_cnt--;
+ tsp->tcp_free_list_cnt--;
ASSERT(tcp->tcp_tcps == NULL);
CONN_DEC_REF(tcp->tcp_connp);
}
- ASSERT(tcp_time_wait->tcp_free_list_cnt == 0);
+ ASSERT(tsp->tcp_free_list_cnt == 0);
}
/*
- * In order to reap time waits reliably, we should use a
- * source of time that is not adjustable by the user -- hence
- * the call to ddi_get_lbolt64().
+ * If there are no connections pending, clear timer-related state to be
+ * reinitialized by the next caller.
*/
- now = ddi_get_lbolt64();
- while ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL) {
+ if (tsp->tcp_time_wait_cnt == 0) {
+ tsp->tcp_time_wait_offset = 0;
+ tsp->tcp_time_wait_schedule = 0;
+ tsp->tcp_time_wait_tid = 0;
+ tsp->tcp_time_wait_collector_active = B_FALSE;
+ mutex_exit(&tsp->tcp_time_wait_lock);
+ return;
+ }
+
+ /*
+ * Grab the bucket which we were scheduled to cleanse.
+ */
+ active_schedule = tsp->tcp_time_wait_schedule;
+ idx = TW_BUCKET(active_schedule - 1);
+ now = ddi_get_lbolt64() - tsp->tcp_time_wait_offset;
+retry:
+ tcp = tsp->tcp_time_wait_bucket[idx];
+
+ while (tcp != NULL) {
/*
- * lbolt64 should not wrap around in practice... So we can
- * do a direct comparison.
+ * Since the bucket count is sized to prevent wrap-around
+ * during typical operation and timers are schedule to process
+ * buckets with only expired connections, there is only one
+ * reason to encounter a connection expiring in the future:
+ * The tcp_time_wait_collector thread has been so delayed in
+ * its processing that connections have wrapped around the
+ * timing wheel into this bucket.
+ *
+ * In that case, the remaining entires in the bucket can be
+ * ignored since, being appended sequentially, they should all
+ * expire in the future.
*/
- if (now < tcp->tcp_time_wait_expire)
+ if (now < tcp->tcp_time_wait_expire) {
break;
+ }
- removed = tcp_time_wait_remove(tcp, tcp_time_wait);
- ASSERT(removed);
+ /*
+ * Pull the connection out of the bucket.
+ */
+ VERIFY(tcp_time_wait_remove(tcp, tsp));
- connp = tcp->tcp_connp;
- ASSERT(connp->conn_fanout != NULL);
- lock = &connp->conn_fanout->connf_lock;
/*
- * This is essentially a TW reclaim fast path optimization for
- * performance where the timewait collector checks under the
- * fanout lock (so that no one else can get access to the
- * conn_t) that the refcnt is 2 i.e. one for TCP and one for
- * the classifier hash list. If ref count is indeed 2, we can
- * just remove the conn under the fanout lock and avoid
- * cleaning up the conn under the squeue, provided that
- * clustering callbacks are not enabled. If clustering is
- * enabled, we need to make the clustering callback before
- * setting the CONDEMNED flag and after dropping all locks and
- * so we forego this optimization and fall back to the slow
- * path. Also please see the comments in tcp_closei_local
- * regarding the refcnt logic.
+ * Purge the connection.
*
- * Since we are holding the tcp_time_wait_lock, its better
- * not to block on the fanout_lock because other connections
- * can't add themselves to time_wait list. So we do a
- * tryenter instead of mutex_enter.
+ * While tcp_time_wait_lock will be temporarily dropped as part
+ * of the process, there is no risk of the timer being
+ * (re)scheduled while the collector is running since a value
+ * corresponding to the past is left in tcp_time_wait_schedule.
*/
- if (mutex_tryenter(lock)) {
- mutex_enter(&connp->conn_lock);
- if ((connp->conn_ref == 2) &&
- (cl_inet_disconnect == NULL)) {
- ipcl_hash_remove_locked(connp,
- connp->conn_fanout);
- /*
- * Set the CONDEMNED flag now itself so that
- * the refcnt cannot increase due to any
- * walker.
- */
- connp->conn_state_flags |= CONN_CONDEMNED;
- mutex_exit(lock);
- mutex_exit(&connp->conn_lock);
- if (tcp_time_wait->tcp_free_list_cnt <
- tcp_free_list_max_cnt) {
- /* Add to head of tcp_free_list */
- mutex_exit(
- &tcp_time_wait->tcp_time_wait_lock);
- tcp_cleanup(tcp);
- ASSERT(connp->conn_latch == NULL);
- ASSERT(connp->conn_policy == NULL);
- ASSERT(tcp->tcp_tcps == NULL);
- ASSERT(connp->conn_netstack == NULL);
-
- mutex_enter(
- &tcp_time_wait->tcp_time_wait_lock);
- tcp->tcp_time_wait_next =
- tcp_time_wait->tcp_free_list;
- tcp_time_wait->tcp_free_list = tcp;
- tcp_time_wait->tcp_free_list_cnt++;
- continue;
- } else {
- /* Do not add to tcp_free_list */
- mutex_exit(
- &tcp_time_wait->tcp_time_wait_lock);
- tcp_bind_hash_remove(tcp);
- ixa_cleanup(tcp->tcp_connp->conn_ixa);
- tcp_ipsec_cleanup(tcp);
- CONN_DEC_REF(tcp->tcp_connp);
- }
- } else {
- CONN_INC_REF_LOCKED(connp);
- mutex_exit(lock);
- mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
- mutex_exit(&connp->conn_lock);
- /*
- * We can reuse the closemp here since conn has
- * detached (otherwise we wouldn't even be in
- * time_wait list). tcp_closemp_used can safely
- * be changed without taking a lock as no other
- * thread can concurrently access it at this
- * point in the connection lifecycle.
- */
+ tcp_time_wait_purge(tcp, tsp);
- if (tcp->tcp_closemp.b_prev == NULL)
- tcp->tcp_closemp_used = B_TRUE;
- else
- cmn_err(CE_PANIC,
- "tcp_timewait_collector: "
- "concurrent use of tcp_closemp: "
- "connp %p tcp %p\n", (void *)connp,
- (void *)tcp);
-
- TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
- mp = &tcp->tcp_closemp;
- SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
- tcp_timewait_close, connp, NULL,
- SQ_FILL, SQTAG_TCP_TIMEWAIT);
- }
- } else {
- mutex_enter(&connp->conn_lock);
- CONN_INC_REF_LOCKED(connp);
- mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
- mutex_exit(&connp->conn_lock);
- /*
- * We can reuse the closemp here since conn has
- * detached (otherwise we wouldn't even be in
- * time_wait list). tcp_closemp_used can safely
- * be changed without taking a lock as no other
- * thread can concurrently access it at this
- * point in the connection lifecycle.
- */
+ /*
+ * Because tcp_time_wait_remove clears the tcp_time_wait_next
+ * field, the next item must be grabbed directly from the
+ * bucket itself.
+ */
+ tcp = tsp->tcp_time_wait_bucket[idx];
+ }
+
+ if (tsp->tcp_time_wait_cnt == 0) {
+ /*
+ * There is not a need for the collector to schedule a new
+ * timer if no pending items remain. The timer state can be
+ * cleared only if it was untouched while the collector dropped
+ * its locks during tcp_time_wait_purge.
+ */
+ if (tsp->tcp_time_wait_schedule == active_schedule) {
+ tsp->tcp_time_wait_offset = 0;
+ tsp->tcp_time_wait_schedule = 0;
+ tsp->tcp_time_wait_tid = 0;
+ }
+ tsp->tcp_time_wait_collector_active = B_FALSE;
+ mutex_exit(&tsp->tcp_time_wait_lock);
+ return;
+ } else {
+ unsigned int nidx;
- if (tcp->tcp_closemp.b_prev == NULL)
- tcp->tcp_closemp_used = B_TRUE;
- else
- cmn_err(CE_PANIC, "tcp_timewait_collector: "
- "concurrent use of tcp_closemp: "
- "connp %p tcp %p\n", (void *)connp,
- (void *)tcp);
-
- TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
- mp = &tcp->tcp_closemp;
- SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
- tcp_timewait_close, connp, NULL,
- SQ_FILL, SQTAG_TCP_TIMEWAIT);
+ /*
+ * Locate the next bucket containing entries.
+ */
+ new_schedule = active_schedule
+ + MSEC_TO_TICK(TCP_TIME_WAIT_DELAY);
+ nidx = TW_BUCKET_NEXT(idx);
+ while (tsp->tcp_time_wait_bucket[nidx] == NULL) {
+ if (nidx == idx) {
+ break;
+ }
+ nidx = TW_BUCKET_NEXT(nidx);
+ new_schedule += MSEC_TO_TICK(TCP_TIME_WAIT_DELAY);
}
- mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
+ ASSERT(tsp->tcp_time_wait_bucket[nidx] != NULL);
}
- if (tcp_time_wait->tcp_free_list != NULL)
- tcp_time_wait->tcp_free_list->tcp_in_free_list = B_TRUE;
+ /*
+ * It is possible that the system is under such dire load that between
+ * the timer scheduling and TIME_WAIT processing delay, execution
+ * overran the interval allocated to this bucket.
+ */
+ now = ddi_get_lbolt64() - tsp->tcp_time_wait_offset;
+ if (new_schedule <= now) {
+ /*
+ * Attempt to right the situation by immediately performing a
+ * purge on the next bucket. This loop will continue as needed
+ * until the schedule can be pushed out ahead of the clock.
+ */
+ idx = TW_BUCKET(new_schedule - 1);
+ goto retry;
+ }
/*
- * If the time wait list is not empty and there is no timer running,
- * restart it.
+ * Another thread may have snuck in to reschedule the timer while locks
+ * were dropped during tcp_time_wait_purge. Defer to the running timer
+ * if that is the case.
*/
- if ((tcp = tcp_time_wait->tcp_time_wait_head) != NULL &&
- tcp_time_wait->tcp_time_wait_tid == 0) {
- hrtime_t firetime;
-
- /* shouldn't be necessary, but just in case */
- if (tcp->tcp_time_wait_expire < now)
- tcp->tcp_time_wait_expire = now;
-
- firetime = TICK_TO_NSEC(tcp->tcp_time_wait_expire - now);
- /* This ensures that we won't wake up too often. */
- firetime = MAX(TCP_TIME_WAIT_DELAY, firetime);
- tcp_time_wait->tcp_time_wait_tid =
- timeout_generic(CALLOUT_NORMAL, tcp_time_wait_collector,
- sqp, firetime, CALLOUT_TCP_RESOLUTION,
- CALLOUT_FLAG_ROUNDUP);
+ if (tsp->tcp_time_wait_schedule != active_schedule) {
+ tsp->tcp_time_wait_collector_active = B_FALSE;
+ mutex_exit(&tsp->tcp_time_wait_lock);
+ return;
}
-#ifdef DEBUG
- tcp_time_wait->tcp_time_wait_running = B_FALSE;
-#endif
- mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
+
+ /*
+ * Schedule the next timer.
+ */
+ tsp->tcp_time_wait_schedule = new_schedule;
+ tsp->tcp_time_wait_tid =
+ timeout_generic(CALLOUT_NORMAL,
+ tcp_time_wait_collector, sqp,
+ TICK_TO_NSEC(new_schedule - now),
+ CALLOUT_TCP_RESOLUTION, CALLOUT_FLAG_ROUNDUP);
+ tsp->tcp_time_wait_collector_active = B_FALSE;
+ mutex_exit(&tsp->tcp_time_wait_lock);
}
/*
diff --git a/usr/src/uts/common/inet/tcp/tcp_tunables.c b/usr/src/uts/common/inet/tcp/tcp_tunables.c
index be75f1f663..f4d6c71914 100644
--- a/usr/src/uts/common/inet/tcp/tcp_tunables.c
+++ b/usr/src/uts/common/inet/tcp/tcp_tunables.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, Joyent Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
*/
@@ -249,7 +249,7 @@ mod_prop_info_t tcp_propinfo_tbl[] = {
/* tunable - 0 */
{ "_time_wait_interval", MOD_PROTO_TCP,
mod_set_uint32, mod_get_uint32,
- {1*SECONDS, 10*MINUTES, 1*MINUTES}, {1*MINUTES} },
+ {1*SECONDS, TCP_TIME_WAIT_MAX, 1*MINUTES}, {1*MINUTES} },
{ "_conn_req_max_q", MOD_PROTO_TCP,
mod_set_uint32, mod_get_uint32,
@@ -307,7 +307,7 @@ mod_prop_info_t tcp_propinfo_tbl[] = {
{ "_keepalive_interval", MOD_PROTO_TCP,
mod_set_uint32, mod_get_uint32,
- {10*SECONDS, 10*DAYS, 2*HOURS}, {2*HOURS} },
+ {1*SECONDS, 10*DAYS, 2*HOURS}, {2*HOURS} },
{ "_maxpsz_multiplier", MOD_PROTO_TCP,
mod_set_uint32, mod_get_uint32,
diff --git a/usr/src/uts/common/inet/tcp_impl.h b/usr/src/uts/common/inet/tcp_impl.h
index 0f0f915a2b..cb83b91fad 100644
--- a/usr/src/uts/common/inet/tcp_impl.h
+++ b/usr/src/uts/common/inet/tcp_impl.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, Joyent Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
* Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
* Copyright (c) 2013, 2014 by Delphix. All rights reserved.
*/
@@ -61,9 +61,9 @@ extern sock_downcalls_t sock_tcp_downcalls;
* by setting it to 0.
*/
#define TCP_XMIT_LOWATER 4096
-#define TCP_XMIT_HIWATER 49152
+#define TCP_XMIT_HIWATER 128000
#define TCP_RECV_LOWATER 2048
-#define TCP_RECV_HIWATER 128000
+#define TCP_RECV_HIWATER 1048576
/*
* Bind hash list size and has function. It has to be a power of 2 for
@@ -105,7 +105,7 @@ extern sock_downcalls_t sock_tcp_downcalls;
*/
#define TCP_IS_DETACHED(tcp) ((tcp)->tcp_detached)
-/* TCP timers related data strucutres. Refer to tcp_timers.c. */
+/* TCP timers related data structures. Refer to tcp_timers.c. */
typedef struct tcp_timer_s {
conn_t *connp;
void (*tcpt_proc)(void *);
@@ -132,48 +132,79 @@ extern kmem_cache_t *tcp_timercache;
(tcp)->tcp_timer_tid = TCP_TIMER((tcp), tcp_timer, (intvl)); \
}
+
+/*
+ * Maximum TIME_WAIT timeout. It is defined here (instead of tcp_tunables.c)
+ * so that other parameters can be derived from it.
+ */
+#define TCP_TIME_WAIT_MAX (10 * MINUTES)
+
+/*
+ * TCP_TIME_WAIT_DELAY governs how often the time_wait_collector runs.
+ * Running it every 5 seconds seems to yield a reasonable balance between
+ * cleanup liveliness and system load.
+ */
+#define TCP_TIME_WAIT_DELAY (5 * SECONDS)
+
+#define TCP_TIME_WAIT_BUCKETS ((TCP_TIME_WAIT_MAX / TCP_TIME_WAIT_DELAY) + 1)
+
/*
* For scalability, we must not run a timer for every TCP connection
* in TIME_WAIT state. To see why, consider (for time wait interval of
* 1 minutes):
* 10,000 connections/sec * 60 seconds/time wait = 600,000 active conn's
*
- * This list is ordered by time, so you need only delete from the head
- * until you get to entries which aren't old enough to delete yet.
- * The list consists of only the detached TIME_WAIT connections.
+ * Since TIME_WAIT expiration occurs on a per-squeue basis, handling
+ * connections from all netstacks on the system, a simple queue is inadequate
+ * for pending entries. This is because tcp_time_wait_interval may differ
+ * between connections, causing tail insertion to violate expiration order.
+ *
+ * Instead of performing expensive sorting or unnecessary list traversal to
+ * counteract interval variance between netstacks, a timing wheel structure is
+ * used. The duration covered by each bucket in the wheel is determined by the
+ * TCP_TIME_WAIT_DELAY (5 seconds). The number of buckets in the wheel is
+ * determined by dividing the maximum TIME_WAIT interval (10 minutes) by
+ * TCP_TIME_WAIT_DELAY, with one added bucket for rollover protection.
+ * (Yielding 121 buckets with the current parameters) When items are inserted
+ * into the set of buckets, they are indexed by using their expiration time
+ * divided by the bucket size, modulo the number of buckets. This means that
+ * when each bucket is processed, all items within should have expired within
+ * the last TCP_TIME_WAIT_DELAY interval.
+ *
+ * Since bucket timer schedules are rounded to the nearest TCP_TIME_WAIT_DELAY
+ * interval to ensure all connections in the pending bucket will be expired, a
+ * per-squeue offset is used when doing TIME_WAIT scheduling. This offset is
+ * between 0 and the TCP_TIME_WAIT_DELAY and is designed to avoid scheduling
+ * all of the tcp_time_wait_collector threads to run in lock-step. The offset
+ * is fixed while there are any connections present in the buckets.
*
* When a tcp_t enters TIME_WAIT state, a timer is started (timeout is
* tcps_time_wait_interval). When the tcp_t is detached (upper layer closes
- * the end point), it is moved to the time wait list and another timer is
- * started (expiry time is set at tcp_time_wait_expire, which is
- * also calculated using tcps_time_wait_interval). This means that the
- * TIME_WAIT state can be extended (up to doubled) if the tcp_t doesn't
- * become detached for a long time.
+ * the end point), it is scheduled to be cleaned up by the squeue-driving
+ * tcp_time_wait_collector (also using tcps_time_wait_interval). This means
+ * that the TIME_WAIT state can be extended (up to doubled) if the tcp_t
+ * doesn't become detached for a long time.
*
* The list manipulations (including tcp_time_wait_next/prev)
* are protected by the tcp_time_wait_lock. The content of the
* detached TIME_WAIT connections is protected by the normal perimeters.
*
- * This list is per squeue and squeues are shared across the tcp_stack_t's.
- * Things on tcp_time_wait_head remain associated with the tcp_stack_t
- * and conn_netstack.
- * The tcp_t's that are added to tcp_free_list are disassociated and
- * have NULL tcp_tcps and conn_netstack pointers.
+ * These connection lists are per squeue and squeues are shared across the
+ * tcp_stack_t instances. Things in a tcp_time_wait_bucket remain associated
+ * with the tcp_stack_t and conn_netstack. Any tcp_t connections stored in the
+ * tcp_free_list are disassociated and have NULL tcp_tcps and conn_netstack
+ * pointers.
*/
typedef struct tcp_squeue_priv_s {
kmutex_t tcp_time_wait_lock;
+ boolean_t tcp_time_wait_collector_active;
callout_id_t tcp_time_wait_tid;
- tcp_t *tcp_time_wait_head;
- tcp_t *tcp_time_wait_tail;
+ uint64_t tcp_time_wait_cnt;
+ int64_t tcp_time_wait_schedule;
+ int64_t tcp_time_wait_offset;
+ tcp_t *tcp_time_wait_bucket[TCP_TIME_WAIT_BUCKETS];
tcp_t *tcp_free_list;
uint_t tcp_free_list_cnt;
-#ifdef DEBUG
- /*
- * For debugging purpose, true when tcp_time_wait_collector() is
- * running.
- */
- boolean_t tcp_time_wait_running;
-#endif
} tcp_squeue_priv_t;
/*
@@ -375,6 +406,22 @@ typedef struct tcp_listen_cnt_s {
uint32_t tlc_drop;
} tcp_listen_cnt_t;
+/*
+ * Track tcp_t entities bound to the same port/address tuple via SO_REUSEPORT.
+ * - tcprg_lock: Protects the other fields
+ * - tcprg_size: Allocated size (in entries) of tcprg_members array
+ * - tcprg_count: Count of occupied tcprg_members slots
+ * - tcprg_active: Count of members which still have SO_REUSEPORT set
+ * - tcprg_members: Connections associated with address/port group
+ */
+typedef struct tcp_rg_s {
+ kmutex_t tcprg_lock;
+ unsigned int tcprg_size;
+ unsigned int tcprg_count;
+ unsigned int tcprg_active;
+ tcp_t **tcprg_members;
+} tcp_rg_t;
+
#define TCP_TLC_REPORT_INTERVAL (30 * MINUTES)
#define TCP_DECR_LISTEN_CNT(tcp) \
@@ -618,6 +665,10 @@ extern in_port_t tcp_bindi(tcp_t *, in_port_t, const in6_addr_t *,
int, boolean_t, boolean_t, boolean_t);
extern in_port_t tcp_update_next_port(in_port_t, const tcp_t *,
boolean_t);
+extern tcp_rg_t *tcp_rg_init(tcp_t *);
+extern boolean_t tcp_rg_remove(tcp_rg_t *, tcp_t *);
+extern void tcp_rg_destroy(tcp_rg_t *);
+extern void tcp_rg_setactive(tcp_rg_t *, boolean_t);
/*
* Fusion related functions in tcp_fusion.c.
diff --git a/usr/src/uts/common/inet/udp/udp.c b/usr/src/uts/common/inet/udp/udp.c
index 5a15aea4de..a88bac932c 100644
--- a/usr/src/uts/common/inet/udp/udp.c
+++ b/usr/src/uts/common/inet/udp/udp.c
@@ -22,6 +22,7 @@
* Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
* Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
/* Copyright (c) 1990 Mentat Inc. */
@@ -76,7 +77,8 @@
#include <inet/ipclassifier.h>
#include <sys/squeue_impl.h>
#include <inet/ipnet.h>
-#include <sys/ethernet.h>
+#include <sys/vxlan.h>
+#include <inet/inet_hash.h>
#include <sys/tsol/label.h>
#include <sys/tsol/tnet.h>
@@ -346,6 +348,89 @@ void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol,
typedef union T_primitives *t_primp_t;
/*
+ * Various protocols that encapsulate UDP have no real use for the source port.
+ * Instead, they want to vary the source port to provide better equal-cost
+ * multipathing and other systems that use fanout. Consider something like
+ * VXLAN. If you're actually sending multiple different streams to a single
+ * host, if you don't vary the source port, then the tuple of ( SRC IP, DST IP,
+ * SRC Port, DST Port) will always be the same.
+ *
+ * Here, we return a port to hash this to, if we know how to hash it. If for
+ * some reason we can't perform an L4 hash, then we just return the default
+ * value, usually the default port. After we determine the hash we transform it
+ * so that it's in the range of [ min, max ].
+ *
+ * We'd like to avoid a pull up for the sake of performing the hash. If the
+ * first mblk_t doesn't have the full protocol header, then we just send it to
+ * the default. If for some reason we have an encapsulated packet that has its
+ * protocol header in different parts of an mblk_t, then we'll go with the
+ * default port. This means that that if a driver isn't consistent about how it
+ * generates the frames for a given flow, it will not always be consistently
+ * hashed. That should be an uncommon event.
+ */
+uint16_t
+udp_srcport_hash(mblk_t *mp, int type, uint16_t min, uint16_t max,
+ uint16_t def)
+{
+ size_t szused = 0;
+ struct ether_header *ether;
+ struct ether_vlan_header *vether;
+ ip6_t *ip6h;
+ ipha_t *ipha;
+ uint16_t sap;
+ uint64_t hash;
+ uint32_t mod;
+
+ ASSERT(min <= max);
+
+ if (type != UDP_HASH_VXLAN)
+ return (def);
+
+ if (!IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)))
+ return (def);
+
+ /*
+ * The following logic is VXLAN specific to get at the header, if we
+ * have formats, eg. GENEVE, then we should ignore this.
+ *
+ * The kernel overlay device often puts a first mblk_t for the data
+ * which is just the encap. If so, then we're going to use that and try
+ * to avoid a pull up.
+ */
+ if (MBLKL(mp) == VXLAN_HDR_LEN) {
+ if (mp->b_cont == NULL)
+ return (def);
+ mp = mp->b_cont;
+ ether = (struct ether_header *)mp->b_rptr;
+ } else if (MBLKL(mp) < VXLAN_HDR_LEN) {
+ return (def);
+ } else {
+ szused = VXLAN_HDR_LEN;
+ ether = (struct ether_header *)((uintptr_t)mp->b_rptr + szused);
+ }
+
+ /* Can we hold a MAC header? */
+ if (MBLKL(mp) + szused < sizeof (struct ether_header))
+ return (def);
+
+ /*
+ * We need to lie about the starting offset into the message block for
+ * convenience. Undo it at the end. We know that inet_pkt_hash() won't
+ * modify the mblk_t.
+ */
+ mp->b_rptr += szused;
+ hash = inet_pkt_hash(DL_ETHER, mp, INET_PKT_HASH_L2 |
+ INET_PKT_HASH_L3 | INET_PKT_HASH_L4);
+ mp->b_rptr -= szused;
+
+ if (hash == 0)
+ return (def);
+
+ mod = max - min + 1;
+ return ((hash % mod) + min);
+}
+
+/*
* Return the next anonymous port in the privileged port range for
* bind checking.
*
@@ -1583,6 +1668,16 @@ udp_opt_get(conn_t *connp, t_scalar_t level, t_scalar_t name,
*i1 = udp->udp_rcvhdr ? 1 : 0;
mutex_exit(&connp->conn_lock);
return (sizeof (int));
+ case UDP_SRCPORT_HASH:
+ mutex_enter(&connp->conn_lock);
+ *i1 = udp->udp_vxlanhash;
+ mutex_exit(&connp->conn_lock);
+ return (sizeof (int));
+ case UDP_SND_TO_CONNECTED:
+ mutex_enter(&connp->conn_lock);
+ *i1 = udp->udp_snd_to_conn ? 1 : 0;
+ mutex_exit(&connp->conn_lock);
+ return (sizeof (int));
}
}
mutex_enter(&connp->conn_lock);
@@ -1718,6 +1813,31 @@ udp_do_opt_set(conn_opt_arg_t *coa, int level, int name,
udp->udp_rcvhdr = onoff;
mutex_exit(&connp->conn_lock);
return (0);
+ case UDP_SRCPORT_HASH:
+ /*
+ * This should have already been verified, but double
+ * check.
+ */
+ if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
+ return (error);
+ }
+
+ /* First see if the val is something we understand */
+ if (*i1 != UDP_HASH_DISABLE && *i1 != UDP_HASH_VXLAN)
+ return (EINVAL);
+
+ if (!checkonly) {
+ mutex_enter(&connp->conn_lock);
+ udp->udp_vxlanhash = *i1;
+ mutex_exit(&connp->conn_lock);
+ }
+ /* Fully handled this option. */
+ return (0);
+ case UDP_SND_TO_CONNECTED:
+ mutex_enter(&connp->conn_lock);
+ udp->udp_snd_to_conn = onoff;
+ mutex_exit(&connp->conn_lock);
+ return (0);
}
break;
}
@@ -2001,13 +2121,25 @@ udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
uint32_t cksum;
udp_t *udp = connp->conn_udp;
boolean_t insert_spi = udp->udp_nat_t_endpoint;
+ boolean_t hash_srcport = udp->udp_vxlanhash;
uint_t ulp_hdr_len;
+ uint16_t srcport;
data_len = msgdsize(data_mp);
ulp_hdr_len = UDPH_SIZE;
if (insert_spi)
ulp_hdr_len += sizeof (uint32_t);
+ /*
+ * If we have source port hashing going on, determine the hash before
+ * we modify the mblk_t.
+ */
+ if (hash_srcport == B_TRUE) {
+ srcport = udp_srcport_hash(mp, UDP_HASH_VXLAN,
+ IPPORT_DYNAMIC_MIN, IPPORT_DYNAMIC_MAX,
+ ntohs(connp->conn_lport));
+ }
+
mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo,
ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp);
if (mp == NULL) {
@@ -2019,7 +2151,11 @@ udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length;
udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length);
- udpha->uha_src_port = connp->conn_lport;
+ if (hash_srcport == B_TRUE) {
+ udpha->uha_src_port = htons(srcport);
+ } else {
+ udpha->uha_src_port = connp->conn_lport;
+ }
udpha->uha_dst_port = dstport;
udpha->uha_checksum = 0;
udpha->uha_length = htons(data_len);
@@ -3194,6 +3330,7 @@ udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp,
udp_t *udp = connp->conn_udp;
udp_stack_t *us = udp->udp_us;
boolean_t insert_spi = udp->udp_nat_t_endpoint;
+ boolean_t hash_srcport = udp->udp_vxlanhash;
uint_t pktlen;
uint_t alloclen;
uint_t copylen;
@@ -3202,10 +3339,21 @@ udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp,
udpha_t *udpha;
uint32_t cksum;
ip_pkt_t *ipp;
+ uint16_t srcport;
ASSERT(MUTEX_HELD(&connp->conn_lock));
/*
+ * If we have source port hashing going on, determine the hash before
+ * we modify the mblk_t.
+ */
+ if (hash_srcport == B_TRUE) {
+ srcport = udp_srcport_hash(mp, UDP_HASH_VXLAN,
+ IPPORT_DYNAMIC_MIN, IPPORT_DYNAMIC_MAX,
+ ntohs(connp->conn_lport));
+ }
+
+ /*
* Copy the header template and leave space for an SPI
*/
copylen = connp->conn_ht_iphc_len;
@@ -3303,6 +3451,9 @@ udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp,
*((uint32_t *)(udpha + 1)) = 0;
udpha->uha_dst_port = dstport;
+ if (hash_srcport == B_TRUE)
+ udpha->uha_src_port = htons(srcport);
+
return (mp);
}
@@ -5947,10 +6098,18 @@ udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
else
return (error);
}
- if (udp->udp_state == TS_DATA_XFER) {
+
+ /*
+ * Check if we're allowed to send to a connection on which we've
+ * already called 'connect'. The posix spec. allows both behaviors but
+ * historically we've returned an error if already connected. The
+ * client can allow this via a sockopt.
+ */
+ if (udp->udp_state == TS_DATA_XFER && !udp->udp_snd_to_conn) {
UDPS_BUMP_MIB(us, udpOutErrors);
return (EISCONN);
}
+
error = proto_verify_ip_addr(connp->conn_family,
(struct sockaddr *)msg->msg_name, msg->msg_namelen);
if (error != 0) {
diff --git a/usr/src/uts/common/inet/udp/udp_opt_data.c b/usr/src/uts/common/inet/udp/udp_opt_data.c
index c279bb4a21..847e2cdde6 100644
--- a/usr/src/uts/common/inet/udp/udp_opt_data.c
+++ b/usr/src/uts/common/inet/udp/udp_opt_data.c
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015, Joyent, Inc.
*/
#include <sys/types.h>
@@ -292,6 +293,9 @@ opdes_t udp_opt_arr[] = {
},
{ UDP_NAT_T_ENDPOINT, IPPROTO_UDP, OA_RW, OA_RW, OP_PRIVPORT, 0, sizeof (int),
0 },
+{ UDP_SRCPORT_HASH, IPPROTO_UDP, OA_R, OA_RW, OP_CONFIG, 0, sizeof (int), 0 },
+{ UDP_SND_TO_CONNECTED, IPPROTO_UDP, OA_R, OA_RW, OP_CONFIG, 0, sizeof (int),
+ 0 }
};
/*
diff --git a/usr/src/uts/common/inet/udp_impl.h b/usr/src/uts/common/inet/udp_impl.h
index 6a31ce5c22..ebba10c0f7 100644
--- a/usr/src/uts/common/inet/udp_impl.h
+++ b/usr/src/uts/common/inet/udp_impl.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#ifndef _UDP_IMPL_H
@@ -178,8 +179,12 @@ typedef struct udp_s {
udp_issocket : 1, /* socket mode; sockfs is on top */
udp_nat_t_endpoint : 1, /* UDP_NAT_T_ENDPOINT option */
udp_rcvhdr : 1, /* UDP_RCVHDR option */
+ udp_vxlanhash: 1, /* UDP_SRCPORT_HASH option */
+ /* Because there's only VXLAN, cheat */
+ /* and only use a single bit */
+ udp_snd_to_conn: 1, /* UDP_SND_TO_CONNECTED option */
- udp_pad_to_bit_31 : 29;
+ udp_pad_to_bit_31 : 27;
/* Following 2 fields protected by the uf_lock */
struct udp_s *udp_bind_hash; /* Bind hash chain */
diff --git a/usr/src/uts/common/io/aggr/aggr_port.c b/usr/src/uts/common/io/aggr/aggr_port.c
index 00545d2c03..a39110255a 100644
--- a/usr/src/uts/common/io/aggr/aggr_port.c
+++ b/usr/src/uts/common/io/aggr/aggr_port.c
@@ -21,6 +21,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 OmniTI Computer Consulting, Inc All rights reserved.
*/
/*
@@ -528,8 +529,13 @@ aggr_port_promisc(aggr_port_t *port, boolean_t on)
if (on) {
mac_rx_clear(port->lp_mch);
+ /* We use the promisc callback because without hardware
+ * rings, we deliver through flows that will cause duplicate
+ * delivery of packets when we've flipped into this mode
+ * to compensate for the lack of hardware MAC matching
+ */
rc = mac_promisc_add(port->lp_mch, MAC_CLIENT_PROMISC_ALL,
- aggr_recv_cb, port, &port->lp_mphp,
+ aggr_recv_promisc_cb, port, &port->lp_mphp,
MAC_PROMISC_FLAGS_NO_TX_LOOP);
if (rc != 0) {
mac_rx_set(port->lp_mch, aggr_recv_cb, port);
diff --git a/usr/src/uts/common/io/aggr/aggr_recv.c b/usr/src/uts/common/io/aggr/aggr_recv.c
index 2bdb7872e3..0dfe234b70 100644
--- a/usr/src/uts/common/io/aggr/aggr_recv.c
+++ b/usr/src/uts/common/io/aggr/aggr_recv.c
@@ -21,6 +21,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 OmniTI Computer Consulting, Inc All rights reserved.
*/
/*
@@ -68,16 +69,27 @@ aggr_recv_lacp(aggr_port_t *port, mac_resource_handle_t mrh, mblk_t *mp)
/*
* Callback function invoked by MAC service module when packets are
- * made available by a MAC port.
+ * made available by a MAC port, both in promisc_on mode and not.
*/
/* ARGSUSED */
-void
-aggr_recv_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
- boolean_t loopback)
+static void
+aggr_recv_path_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
+ boolean_t loopback, boolean_t promisc_path)
{
aggr_port_t *port = (aggr_port_t *)arg;
aggr_grp_t *grp = port->lp_grp;
+ /* In the case where lp_promisc_on has been turned on to
+ * compensate for insufficient hardware MAC matching and
+ * hardware rings are not in use we will fall back to
+ * using flows for delivery which can result in duplicates
+ * pushed up the stack. Only respect the chosen path.
+ */
+ if (port->lp_promisc_on != promisc_path) {
+ freemsgchain(mp);
+ return;
+ }
+
if (grp->lg_lacp_mode == AGGR_LACP_OFF) {
aggr_mac_rx(grp->lg_mh, mrh, mp);
} else {
@@ -161,3 +173,19 @@ aggr_recv_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
}
}
}
+
+/* ARGSUSED */
+void
+aggr_recv_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
+ boolean_t loopback)
+{
+ aggr_recv_path_cb(arg, mrh, mp, loopback, B_FALSE);
+}
+
+/* ARGSUSED */
+void
+aggr_recv_promisc_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
+ boolean_t loopback)
+{
+ aggr_recv_path_cb(arg, mrh, mp, loopback, B_TRUE);
+}
diff --git a/usr/src/uts/common/io/axf/ax88172reg.h b/usr/src/uts/common/io/axf/ax88172reg.h
new file mode 100644
index 0000000000..8ca6ebc187
--- /dev/null
+++ b/usr/src/uts/common/io/axf/ax88172reg.h
@@ -0,0 +1,163 @@
+/*
+ * @(#)ax88172reg.h 1.1 09/06/15
+ * Macro definitions for ASIX AX88172 USB to fast ethernet controler
+ * based on ASIX AX88172/88772 data sheet
+ * This file is public domain. Coded by M.Murayama (KHF04453@nifty.com)
+ */
+
+#ifndef __AX88172_H__
+#define __AX88172_H__
+
+/*
+ * Vendor command definitions
+ */
+#define VCMD_READ_SRAM 0x02
+#define VCMD_WRITE_RXSRAM 0x03
+#define VCMD_WRITE_TXSRAM 0x04
+#define VCMD_SOFTWARE_MII_OP 0x06
+#define VCMD_READ_MII_REG 0x07
+#define VCMD_WRITE_MII_REG 0x08
+#define VCMD_READ_MII_OPMODE 0x09
+#define VCMD_HARDWARE_MII_OP 0x0a
+#define VCMD_READ_SROM 0x0b
+#define VCMD_WRITE_SROM 0x0c
+#define VCMD_WRITE_SROM_ENABLE 0x0d
+#define VCMD_WRITE_SROM_DISABLE 0x0e
+#define VCMD_READ_RXCTRL 0x0f
+#define VCMD_WRITE_RXCTRL 0x10
+#define VCMD_READ_IPGS 0x11
+#define VCMD_WRITE_IPG 0x12
+#define VCMD_WRITE_IPG1 0x13
+#define VCMD_WRITE_IPG2 0x14
+#define VCMD_READ_MCAST_FILTER 0x15
+#define VCMD_WRITE_MCAST_FILTER 0x16
+#define VCMD_READ_NODE_ID 0x17
+#define VCMD_READ_PHY_IDS 0x19
+#define VCMD_READ_MEDIUM_STATUS 0x1a
+#define VCMD_WRITE_MEDIUM_STATUS 0x1b
+#define VCMD_SET_MONITOR_MODE 0x1c
+#define VCMD_GET_MONITOR_MODE 0x1d
+#define VCMD_READ_GPIO 0x1e
+#define VCMD_WRITE_GPIO 0x1f
+
+/* ax88772 only, currently not supported */
+#define VCMD_WRITE_IPGS_88772 0x12
+#define VCMD_READ_NODE_ID_88772 0x13
+#define VCMD_WRITE_NODE_ID_88772 0x14
+#define VCMD_WRITE_TEST_REG_88772 0x17
+#define VCMD_SOFTWARE_RESET_88772 0x20
+#define VCMD_READ_PHY_SELECT_88772 0x21
+#define VCMD_WRITE_PHY_SELECT_88772 0x22
+
+
+/*
+ * Register definitions
+ */
+
+/* Rx control register */
+#define RCR_SO 0x80 /* Start Operation */
+#define RCR_AP_88772 0x20 /* accept physical address from mcast filter */
+#define RCR_AM 0x10 /* accept multicast address */
+#define RCR_AB 0x08 /* accept broadcast address */
+#define RCR_SEP 0x04 /* save error packet */
+#define RCR_AMALL 0x02 /* accept all multicast address */
+#define RCR_PRO 0x01 /* promiscious, all frames received */
+
+#define RCR_MFB 0x0300
+#define RCR_MFB_SHIFT 8
+#define RCR_MFB_2K (0U << RCR_MFB_SHIFT)
+#define RCR_MFB_4K (1U << RCR_MFB_SHIFT)
+#define RCR_MFB_8K (2U << RCR_MFB_SHIFT)
+#define RCR_MFB_16K (3U << RCR_MFB_SHIFT)
+
+#define RCR_BITS \
+ "\020" \
+ "\010SO" \
+ "\006AP" \
+ "\005AM" \
+ "\004AB" \
+ "\003SEP" \
+ "\002AMALL" \
+ "\001PRO"
+
+/* Medium status register */
+#define MSR_SM 0x1000 /* super mac support */
+#define MSR_SBP 0x0800 /* stop backpressure */
+#define MSR_PS 0x0200 /* port speed in mii mode */
+#define MSR_RE 0x0100 /* rx enable */
+#define MSR_PF 0x0080 /* check only length/type for pause frame */
+#define MSR_JFE 0x0040 /* jumbo frame enable */
+#define MSR_TFC 0x0020 /* tx flow control enable */
+#define MSR_RFC 0x0010 /* rx flow control enable (178) */
+#define MSR_FCEN 0x0010 /* flow control enable (172/772) */
+#define MSR_ENCK 0x0008 /* Enable GTX_CLK and TXC clock output (178) */
+#define MSR_TXABT 0x0004 /* Tx abort allow, always set */
+#define MSR_FDPX 0x0002 /* full duplex */
+#define MSR_GM 0x0001 /* Gigabit mode (178) */
+
+#define MSR_BITS \
+ "\020" \
+ "\015SM" \
+ "\014SBP" \
+ "\012PS" \
+ "\011RE" \
+ "\005FCEN" \
+ "\004ENCK" \
+ "\003TXABT" \
+ "\002FDPX" \
+ "\001GM"
+
+/* monitor mode register */
+#define MMR_RWMP 0x04 /* remote wakeup by magic pkt */
+#define MMR_RWLU 0x02 /* remote wakeup by linkup */
+#define MMR_MOM 0x01 /* monitor mode 1:en, 0:dis */
+
+#define MMR_BITS \
+ "\020" \
+ "\003RWMP" \
+ "\002RWLU" \
+ "\001MOM"
+
+/* GPIO register */
+#define GPIO_RSE 0x80 /* reload serial eeprom (88772)*/
+#define GPIO_DATA2 0x20
+#define GPIO_EN2 0x10
+#define GPIO_DATA1 0x08
+#define GPIO_EN1 0x04
+#define GPIO_DATA0 0x02
+#define GPIO_EN0 0x01
+
+#define GPIO_BITS \
+ "\020" \
+ "\010RSE" \
+ "\006DATA2" \
+ "\005EN2" \
+ "\004DATA1" \
+ "\003EN1" \
+ "\002DATA0" \
+ "\001EN0"
+
+/* Software reset register */
+#define SWRST_IPPD 0x40 /* internal phy power down control */
+#define SWRST_IPRL 0x20 /* internal phy reset control */
+#define SWRST_BZ 0x10 /* force Bulk In to return zero-length pkt */
+#define SWRST_PRL 0x08 /* external phy reset pin level */
+#define SWRST_PRTE 0x04 /* external phy tri-state enable */
+#define SWRST_RT 0x02 /* clear frame length error for Bulk-Out */
+#define SWRST_RR 0x01 /* clear frame length error for Bulk-In */
+
+#define SWRST_BITS \
+ "\020" \
+ "\007IPPD" \
+ "\006IPRL" \
+ "\005BZ" \
+ "\004PRL" \
+ "\003PRTE" \
+ "\002RT" \
+ "\001RR"
+
+/* Software PHY Select Status register */
+#define SPSS_ASEL 0x02 /* 1:auto select 0:manual select */
+#define SPSS_PSEL 0x01 /* 1:intenal phy, 0:external (when ASEL=0) */
+
+#endif /* __AX88172_H__ */
diff --git a/usr/src/uts/common/io/axf/axf_usbgem.c b/usr/src/uts/common/io/axf/axf_usbgem.c
new file mode 100644
index 0000000000..28963f6849
--- /dev/null
+++ b/usr/src/uts/common/io/axf/axf_usbgem.c
@@ -0,0 +1,1539 @@
+/*
+ * axf_usbgem.c : ASIX AX88172/772 USB to Fast Ethernet Driver for Solaris
+ *
+ * Copyright (c) 2004-2012 Masayuki Murayama. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the author nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+#pragma ident "@(#)axf_usbgem.c 1.3 12/02/09"
+
+/*
+ * Changelog:
+ */
+
+/*
+ * TODO
+ * handle RXMODE_ENABLE in set_rx_filter()
+ */
+/* ======================================================= */
+
+/*
+ * Solaris system header files and macros
+ */
+
+/* minimum kernel headers for drivers */
+#include <sys/types.h>
+#include <sys/conf.h>
+#include <sys/debug.h>
+#include <sys/kmem.h>
+#include <sys/modctl.h>
+#include <sys/errno.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/byteorder.h>
+
+/* ethernet stuff */
+#include <sys/ethernet.h>
+
+/* interface card depend stuff */
+#include <sys/stropts.h>
+#include <sys/stream.h>
+#include <sys/strlog.h>
+#include <sys/usb/usba.h>
+#include "usbgem.h"
+
+/* hardware stuff */
+#include "usbgem_mii.h"
+#include "ax88172reg.h"
+
+char ident[] = "ax88x72 usbnic driver v" VERSION;
+
+/*
+ * Useful macros
+ */
+#define CHECK_AND_JUMP(err, label) if (err != USB_SUCCESS) goto label
+#define LE16P(p) ((((uint8_t *)(p))[1] << 8) | ((uint8_t *)(p))[0])
+
+#define AX88172(dp) \
+ (((struct axf_dev *)(dp)->private)->chip->type == CHIP_TYPE_AX88172)
+
+#define AX88772(dp) \
+ (((struct axf_dev *)(dp)->private)->chip->type == CHIP_TYPE_AX88772)
+
+/*
+ * Debugging
+ */
+#ifdef DEBUG_LEVEL
+static int axf_debug = DEBUG_LEVEL;
+#define DPRINTF(n, args) if (axf_debug > (n)) cmn_err args
+#else
+#define DPRINTF(n, args)
+#endif
+
+/*
+ * Our configration for ax88172
+ */
+/* timeouts */
+#define ONESEC (drv_usectohz(1*1000000))
+
+/*
+ * RX/TX buffer size
+ */
+
+/*
+ * Local device definitions
+ */
+struct chip_info {
+ uint16_t vid; /* usb vendor id */
+ uint16_t pid; /* usb product id */
+ int type;
+ uint8_t gpio_reset[2];
+ uint8_t gpio_speed[2];
+ uint8_t gpio_duplex[2];
+ char *name;
+#define CHIP_TYPE_AX88172 0
+#define CHIP_TYPE_AX88772 1
+#define CHIP_TYPE_AX88178 2
+};
+
+#define GPIO_DEFAULT {0x00, 0x15}, {0, 0}, {0, 0}
+struct chip_info chiptbl_88x7x[] = {
+/* AX88172 */
+{
+ /* Planex UE2-100TX, Hawking UF200, TrendNet TU2-ET100 */
+ 0x07b8, 0x420a, CHIP_TYPE_AX88172,
+
+ /*
+ * the default setting covers below:
+ * gpio bit2 has to be 0 and gpio bit0 has to be 1
+ */
+ {0, 0},
+ {GPIO_EN1, GPIO_DATA1 | GPIO_EN1},
+ {0, 0},
+ "Planex UE2-100TX", /* tested */
+},
+{
+ 0x2001, 0x1a00, CHIP_TYPE_AX88172,
+ {0x9f, 0x9e}, {0, 0}, {0, 0},
+ "D-Link dube100", /* XXX */
+},
+{
+ 0x077b, 0x2226, CHIP_TYPE_AX88172,
+ GPIO_DEFAULT,
+ "Linksys USB200M",
+},
+{
+ 0x0846, 0x1040, CHIP_TYPE_AX88172,
+ GPIO_DEFAULT,
+ "Netgear FA120",
+},
+{
+ 0x0b95, 0x1720, CHIP_TYPE_AX88172,
+ GPIO_DEFAULT,
+ "Intellinet, ST Lab USB Ethernet",
+},
+{
+ 0x08dd, 0x90ff, CHIP_TYPE_AX88172,
+ GPIO_DEFAULT,
+ "Billionton Systems, USB2AR",
+},
+{
+ 0x0557, 0x2009, CHIP_TYPE_AX88172,
+ GPIO_DEFAULT,
+ "ATEN UC210T",
+},
+{
+ 0x0411, 0x003d, CHIP_TYPE_AX88172,
+ GPIO_DEFAULT,
+ "Buffalo LUA-U2-KTX",
+},
+{
+ 0x6189, 0x182d, CHIP_TYPE_AX88172,
+ GPIO_DEFAULT,
+ "Sitecom LN-029 USB 2.0 10/100 Ethernet adapter",
+},
+{
+ 0x07aa, 0x0017, CHIP_TYPE_AX88172,
+ GPIO_DEFAULT,
+ "corega FEther USB2-TX",
+},
+{
+ 0x1189, 0x0893, CHIP_TYPE_AX88172,
+ GPIO_DEFAULT,
+ "Surecom EP-1427X-2",
+},
+{
+ 0x1631, 0x6200, CHIP_TYPE_AX88172,
+ GPIO_DEFAULT,
+ "goodway corp usb gwusb2e",
+},
+/* AX88772 and AX88178 */
+{
+ 0x13b1, 0x0018, CHIP_TYPE_AX88772,
+ {0, 0}, {0, 0}, {0, 0},
+ "Linksys USB200M rev.2",
+},
+{
+ 0x1557, 0x7720, CHIP_TYPE_AX88772,
+ {0, 0}, {0, 0}, {0, 0},
+ "0Q0 cable ethernet",
+},
+{
+ 0x07d1, 0x3c05, CHIP_TYPE_AX88772,
+ {0, 0}, {0, 0}, {0, 0},
+ "DLink DUB E100 ver B1",
+},
+{
+ 0x2001, 0x3c05, CHIP_TYPE_AX88772,
+ {0, 0}, {0, 0}, {0, 0},
+ "DLink DUB E100 ver B1(2)",
+},
+{
+ 0x05ac, 0x1402, CHIP_TYPE_AX88772,
+ {0, 0}, {0, 0}, {0, 0},
+ "Apple Ethernet USB Adapter",
+},
+{
+ 0x1737, 0x0039, CHIP_TYPE_AX88178,
+ {0, 0}, {0, 0}, {0, 0},
+ "Linksys USB1000",
+},
+{
+ 0x0411, 0x006e, CHIP_TYPE_AX88178,
+ {0, 0}, {0, 0}, {0, 0},
+ "Buffalo LUA-U2-KGT/LUA-U2-GT",
+},
+{
+ 0x04bb, 0x0930, CHIP_TYPE_AX88178,
+ {0, 0}, {0, 0}, {0, 0},
+ "I/O DATA ETG-US2",
+},
+{
+ 0x050d, 0x5055, CHIP_TYPE_AX88178,
+ {0, 0}, {0, 0}, {0, 0},
+ "Belkin F5D5055",
+},
+{
+ /* generic ax88772 must be the last entry */
+ /* planex UE-200TX-G */
+ 0x0b95, 0x7720, CHIP_TYPE_AX88772,
+ {0, 0}, {0, 0}, {0, 0},
+ "ASIX AX88772/AX88178", /* tested */
+},
+};
+
+#define CHIPTABLESIZE (sizeof (chiptbl_88x7x) / sizeof (struct chip_info))
+
+struct axf_dev {
+ /*
+ * Misc HW information
+ */
+ struct chip_info *chip;
+ uint8_t ipg[3];
+ uint8_t gpio;
+ uint16_t rcr;
+ uint16_t msr;
+ uint8_t last_link_state;
+ boolean_t phy_has_reset;
+};
+
+/*
+ * private functions
+ */
+
+/* mii operations */
+static uint16_t axf_mii_read(struct usbgem_dev *, uint_t, int *errp);
+static void axf_mii_write(struct usbgem_dev *, uint_t, uint16_t, int *errp);
+
+/* nic operations */
+static int axf_reset_chip(struct usbgem_dev *);
+static int axf_init_chip(struct usbgem_dev *);
+static int axf_start_chip(struct usbgem_dev *);
+static int axf_stop_chip(struct usbgem_dev *);
+static int axf_set_media(struct usbgem_dev *);
+static int axf_set_rx_filter(struct usbgem_dev *);
+static int axf_get_stats(struct usbgem_dev *);
+static void axf_interrupt(struct usbgem_dev *, mblk_t *);
+
+/* packet operations */
+static mblk_t *axf_tx_make_packet(struct usbgem_dev *, mblk_t *);
+static mblk_t *axf_rx_make_packet(struct usbgem_dev *, mblk_t *);
+
+/* =============================================================== */
+/*
+ * I/O functions
+ */
+/* =============================================================== */
+#define OUT(dp, req, val, ix, len, buf, errp, label) \
+ if ((*(errp) = usbgem_ctrl_out((dp), \
+ /* bmRequestType */ USB_DEV_REQ_HOST_TO_DEV \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ (req), \
+ /* wValue */ (val), \
+ /* wIndex */ (ix), \
+ /* wLength */ (len), \
+ /* value */ (buf), \
+ /* size */ (len))) != USB_SUCCESS) goto label
+
+#define IN(dp, req, val, ix, len, buf, errp, label) \
+ if ((*(errp) = usbgem_ctrl_in((dp), \
+ /* bmRequestType */ USB_DEV_REQ_DEV_TO_HOST \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ (req), \
+ /* wValue */ (val), \
+ /* wIndex */ (ix), \
+ /* wLength */ (len), \
+ /* valuep */ (buf), \
+ /* size */ (len))) != USB_SUCCESS) goto label
+
+/* =============================================================== */
+/*
+ * Hardware manupilation
+ */
+/* =============================================================== */
+static int
+axf_reset_phy(struct usbgem_dev *dp)
+{
+ uint8_t phys[2];
+ uint8_t val8;
+ int err;
+ struct axf_dev *lp = dp->private;
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ if (AX88172(dp)) {
+ delay(drv_usectohz(5000));
+ IN(dp, VCMD_READ_GPIO, 0, 0, 1, &val8, &err, usberr);
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: gpio 0x%b",
+ dp->name, __func__, val8, GPIO_BITS));
+
+ /* reset MII PHY */
+ val8 = lp->chip->gpio_reset[1]
+ | lp->chip->gpio_speed[dp->speed]
+ | lp->chip->gpio_duplex[dp->full_duplex];
+
+ OUT(dp, VCMD_WRITE_GPIO,
+ val8, 0, 0, NULL, &err, usberr);
+ delay(drv_usectohz(5000));
+
+ val8 = lp->chip->gpio_reset[0]
+ | lp->chip->gpio_speed[dp->speed]
+ | lp->chip->gpio_duplex[dp->full_duplex];
+
+ OUT(dp, VCMD_WRITE_GPIO,
+ val8, 0, 0, NULL, &err, usberr);
+ delay(drv_usectohz(5000));
+ } else {
+ lp->gpio = GPIO_RSE | GPIO_DATA2 | GPIO_EN2;
+ OUT(dp, VCMD_WRITE_GPIO, lp->gpio, 0,
+ 0, NULL, &err, usberr);
+ drv_usecwait(1000);
+
+ OUT(dp, VCMD_WRITE_PHY_SELECT_88772,
+ dp->mii_phy_addr == 16 ? 1 : 0, 0, 0, NULL, &err, usberr);
+
+ OUT(dp, VCMD_SOFTWARE_RESET_88772,
+ SWRST_IPPD | SWRST_PRL, 0, 0, NULL, &err, usberr);
+ delay(drv_usectohz(150*1000));
+ OUT(dp, VCMD_SOFTWARE_RESET_88772,
+ 0, 0, 0, NULL, &err, usberr);
+
+ OUT(dp, VCMD_SOFTWARE_RESET_88772,
+ dp->mii_phy_addr == 16 ? SWRST_IPRL : SWRST_PRTE,
+ 0, 0, NULL, &err, usberr);
+ delay(drv_usectohz(150*1000));
+ }
+
+
+ return (USB_SUCCESS);
+
+usberr:
+ return (USB_FAILURE);
+}
+
+static int
+axf_reset_chip(struct usbgem_dev *dp)
+{
+ int err = USB_SUCCESS;
+
+ if (AX88172(dp)) {
+ /* there are no ways to reset nic */
+ return (USB_SUCCESS);
+ }
+#ifdef NEVER
+ OUT(dp, VCMD_SOFTWARE_RESET_88772,
+ SWRST_RR | SWRST_RT, 0, 0, NULL, &err, usberr);
+ OUT(dp, VCMD_SOFTWARE_RESET_88772,
+ 0, 0, 0, NULL, &err, usberr);
+usberr:
+#endif
+ return (err);
+}
+
+/*
+ * Setup ax88172
+ */
+static int
+axf_init_chip(struct usbgem_dev *dp)
+{
+ int i;
+ uint32_t val;
+ int err = USB_SUCCESS;
+ uint16_t reg;
+ uint8_t buf[2];
+ uint16_t tmp16;
+ struct axf_dev *lp = dp->private;
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /* rx conrol register: read default value */
+ if (!AX88172(dp)) {
+ /* clear rx control */
+ OUT(dp, VCMD_WRITE_RXCTRL, 0, 0, 0, NULL, &err, usberr);
+ }
+
+ IN(dp, VCMD_READ_RXCTRL, 0, 0, 2, buf, &err, usberr);
+ lp->rcr = LE16P(buf);
+ DPRINTF(0, (CE_CONT, "!%s: %s: rcr(default):%b",
+ dp->name, __func__, lp->rcr, RCR_BITS));
+
+ lp->rcr &= ~RCR_SO;
+
+ /* Media status register */
+ if (AX88172(dp)) {
+#ifdef notdef
+ lp->msr = MSR_TXABT;
+#else
+ lp->msr = 0;
+#endif
+ } else {
+ lp->msr = MSR_RE | MSR_TXABT;
+ }
+ DPRINTF(0, (CE_CONT, "!%s: %s: msr:%b",
+ dp->name, __func__, lp->msr, MSR_BITS));
+ err = axf_set_media(dp);
+ CHECK_AND_JUMP(err, usberr);
+
+ /* write IPG0-2 registers */
+ if (AX88172(dp)) {
+ OUT(dp, VCMD_WRITE_IPG, lp->ipg[0], 0, 0, NULL, &err, usberr);
+ OUT(dp, VCMD_WRITE_IPG1, lp->ipg[1], 0, 0, NULL, &err, usberr);
+ OUT(dp, VCMD_WRITE_IPG2, lp->ipg[2], 0, 0, NULL, &err, usberr);
+ } else {
+ /* EMPTY */
+ }
+#ifdef ENABLE_RX_IN_INIT_CHIP
+ /* enable Rx */
+ lp->rcr |= RCR_SO;
+ OUT(dp, VCMD_WRITE_RXCTRL, lp->rcr, 0, 0, NULL, &err, usberr);
+#endif
+usberr:
+ DPRINTF(2, (CE_CONT, "!%s: %s: end (%s)",
+ dp->name, __func__,
+ err, err == USB_SUCCESS ? "success" : "error"));
+ return (err);
+}
+
+static int
+axf_start_chip(struct usbgem_dev *dp)
+{
+ int err = USB_SUCCESS;
+ struct axf_dev *lp = dp->private;
+#ifndef ENABLE_RX_IN_INIT_CHIP
+ /* enable Rx */
+ lp->rcr |= RCR_SO;
+ OUT(dp, VCMD_WRITE_RXCTRL, lp->rcr, 0, 0, NULL, &err, usberr);
+
+usberr:
+ DPRINTF(2, (CE_CONT, "!%s: %s: end (%s)",
+ dp->name, __func__,
+ err, err == USB_SUCCESS ? "success" : "error"));
+#endif
+ return (err);
+}
+
+static int
+axf_stop_chip(struct usbgem_dev *dp)
+{
+ int err = USB_SUCCESS;
+ struct axf_dev *lp = dp->private;
+
+ /* Disable Rx */
+ lp->rcr &= ~RCR_SO;
+ OUT(dp, VCMD_WRITE_RXCTRL, lp->rcr, 0, 0, NULL, &err, usberr);
+
+ /*
+ * Restore factory mac address
+ * if we have changed current mac address
+ */
+ if (!AX88172(dp) &&
+ bcmp(dp->dev_addr.ether_addr_octet,
+ dp->cur_addr.ether_addr_octet,
+ ETHERADDRL) != 0) {
+ OUT(dp, VCMD_WRITE_NODE_ID_88772, 0, 0,
+ ETHERADDRL, dp->cur_addr.ether_addr_octet, &err, usberr);
+ }
+usberr:
+ return (axf_reset_chip(dp));
+}
+
+static int
+axf_get_stats(struct usbgem_dev *dp)
+{
+ /* EMPTY */
+ return (USB_SUCCESS);
+}
+
+static uint_t
+axf_mcast_hash(struct usbgem_dev *dp, const uint8_t *addr)
+{
+ return (usbgem_ether_crc_be(addr) >> (32 - 6));
+}
+
+static int
+axf_set_rx_filter(struct usbgem_dev *dp)
+{
+ int i;
+ uint8_t mode;
+ uint8_t mhash[8];
+ uint8_t buf[2];
+ uint_t h;
+ int err = USB_SUCCESS;
+ struct axf_dev *lp = dp->private;
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: called, rxmode:%x",
+ dp->name, __func__, dp->rxmode));
+
+ if (lp->rcr & RCR_SO) {
+ /* set promiscuous mode before changing it. */
+ OUT(dp, VCMD_WRITE_RXCTRL,
+ lp->rcr | RCR_PRO, 0, 0, NULL, &err, usberr);
+ }
+
+ lp->rcr &= ~(RCR_AP_88772 | RCR_AM | RCR_SEP | RCR_AMALL | RCR_PRO);
+ mode = RCR_AB; /* accept broadcast packets */
+
+ bzero(mhash, sizeof (mhash));
+
+ if (dp->rxmode & RXMODE_PROMISC) {
+ /* promiscious mode implies all multicast and all physical */
+ mode |= RCR_PRO;
+ } else if ((dp->rxmode & RXMODE_ALLMULTI) || dp->mc_count > 32) {
+ /* accept all multicast packets */
+ mode |= RCR_AMALL;
+ } else if (dp->mc_count > 0) {
+ /*
+ * make hash table to select interresting
+ * multicast address only.
+ */
+ mode |= RCR_AM;
+ for (i = 0; i < dp->mc_count; i++) {
+ h = dp->mc_list[i].hash;
+ mhash[h / 8] |= 1 << (h % 8);
+ }
+ }
+ if (AX88172(dp)) {
+ if (bcmp(dp->dev_addr.ether_addr_octet,
+ dp->cur_addr.ether_addr_octet, ETHERADDRL) != 0) {
+ /*
+ * we use promiscious mode instead of changing the
+ * mac address in ax88172
+ */
+ mode |= RCR_PRO;
+ }
+ } else {
+ OUT(dp, VCMD_WRITE_NODE_ID_88772, 0, 0,
+ ETHERADDRL, dp->cur_addr.ether_addr_octet, &err, usberr);
+ }
+ lp->rcr |= mode;
+
+ /* set multicast hash table */
+ if (mode & RCR_AM) {
+ /* need to set up multicast hash table */
+ OUT(dp, VCMD_WRITE_MCAST_FILTER, 0, 0,
+ sizeof (mhash), mhash, &err, usberr);
+ }
+
+ /* update rcr */
+ OUT(dp, VCMD_WRITE_RXCTRL, lp->rcr, 0,
+ 0, NULL, &err, usberr);
+
+#if DEBUG_LEVEL > 1
+ /* verify rxctrl reg */
+ IN(dp, VCMD_READ_RXCTRL, 0, 0, 2, buf, &err, usberr);
+ cmn_err(CE_CONT, "!%s: %s: rcr:%b returned",
+ dp->name, __func__, LE16P(buf), RCR_BITS);
+#endif
+usberr:
+ DPRINTF(2, (CE_CONT, "!%s: %s: end (%s)",
+ dp->name, __func__,
+ err, err == USB_SUCCESS ? "success" : "error"));
+ return (err);
+}
+
+static int
+axf_set_media(struct usbgem_dev *dp)
+{
+ uint8_t val8;
+ uint8_t gpio;
+ uint8_t gpio_old;
+ int err = USB_SUCCESS;
+ uint16_t msr;
+ struct axf_dev *lp = dp->private;
+
+ IN(dp, VCMD_READ_GPIO, 0, 0, 1, &gpio, &err, usberr);
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called, gpio:%b",
+ dp->name, __func__, gpio, GPIO_BITS));
+
+ msr = lp->msr;
+ gpio_old = gpio;
+ gpio = lp->chip->gpio_reset[0];
+
+ /* setup speed */
+ if (AX88172(dp)) {
+ /* EMPTY */
+ } else {
+ msr &= ~(MSR_PS | MSR_GM | MSR_ENCK);
+
+ switch (dp->speed) {
+ case USBGEM_SPD_1000:
+ msr |= MSR_GM | MSR_ENCK;
+ break;
+
+ case USBGEM_SPD_100:
+ msr |= MSR_PS;
+ break;
+
+ case USBGEM_SPD_10:
+ break;
+ }
+ }
+ gpio |= lp->chip->gpio_speed[dp->speed == USBGEM_SPD_100 ? 1 : 0];
+
+ /* select duplex */
+ msr &= ~MSR_FDPX;
+ if (dp->full_duplex) {
+ msr |= MSR_FDPX;
+
+ /* select flow control */
+ if (AX88172(dp)) {
+ msr &= ~MSR_FCEN;
+ switch (dp->flow_control) {
+ case FLOW_CONTROL_TX_PAUSE:
+ case FLOW_CONTROL_SYMMETRIC:
+ case FLOW_CONTROL_RX_PAUSE:
+ msr |= MSR_FCEN;
+ break;
+ }
+ } else {
+ msr &= ~(MSR_RFC | MSR_TFC);
+ switch (dp->flow_control) {
+ case FLOW_CONTROL_TX_PAUSE:
+ msr |= MSR_TFC;
+ break;
+
+ case FLOW_CONTROL_SYMMETRIC:
+ msr |= MSR_TFC | MSR_RFC;
+ break;
+
+ case FLOW_CONTROL_RX_PAUSE:
+ msr |= MSR_RFC;
+ break;
+ }
+ }
+ }
+ gpio |= lp->chip->gpio_duplex[dp->full_duplex ? 1 : 0];
+
+ /* update medium status register */
+ lp->msr = msr;
+ OUT(dp, VCMD_WRITE_MEDIUM_STATUS, lp->msr, 0,
+ 0, NULL, &err, usberr);
+
+ if (gpio != gpio_old) {
+ /* LED control required for some products */
+ OUT(dp, VCMD_WRITE_GPIO,
+ gpio, 0, 0, NULL, &err, usberr);
+ }
+
+usberr:
+ DPRINTF(2, (CE_CONT, "!%s: %s: end (%s)",
+ dp->name, __func__,
+ err, err == USB_SUCCESS ? "success" : "error"));
+ return (err);
+}
+
+#define FILL_PKT_HEADER(bp, len) { \
+ (bp)[0] = (uint8_t)(len); \
+ (bp)[1] = (uint8_t)((len) >> 8); \
+ (bp)[2] = (uint8_t)(~(len)); \
+ (bp)[3] = (uint8_t)((~(len)) >> 8); \
+}
+
+#define PKT_HEADER_SIZE 4
+
+/*
+ * send/receive packet check
+ */
+static mblk_t *
+axf_tx_make_packet(struct usbgem_dev *dp, mblk_t *mp)
+{
+ int n;
+ size_t len;
+ size_t pkt_size;
+ mblk_t *new;
+ mblk_t *tp;
+ uint8_t *bp;
+ uint8_t *last_pos;
+ uint_t align_mask;
+ size_t header_size;
+ int pad_size;
+
+ len = msgdsize(mp);
+
+ if (AX88172(dp)) {
+#ifdef notdef
+ align_mask = 63;
+#else
+ align_mask = 511;
+#endif
+ header_size = 0;
+
+ if (len >= ETHERMIN && mp->b_cont == NULL &&
+ (len & align_mask) != 0) {
+ /* use the mp "as is" */
+ return (mp);
+ }
+ } else {
+ align_mask = 511;
+ header_size = PKT_HEADER_SIZE;
+ }
+
+ /*
+ * re-allocate the mp
+ */
+ /* minimum ethernet packet size of ETHERMIN */
+ pkt_size = max(len, ETHERMIN);
+
+ if (((pkt_size + header_size) & align_mask) == 0) {
+ /* padding is required in usb communication */
+ pad_size = PKT_HEADER_SIZE;
+ } else {
+ pad_size = 0;
+ }
+
+ if ((new = allocb(header_size + pkt_size + pad_size, 0)) == NULL) {
+ return (NULL);
+ }
+
+ bp = new->b_rptr;
+ if (header_size) {
+ uint16_t tmp;
+
+ /* add a header */
+ tmp = (uint16_t)pkt_size;
+ FILL_PKT_HEADER(bp, tmp);
+ bp += header_size;
+ }
+
+ /* copy contents of the buffer */
+ for (tp = mp; tp; tp = tp->b_cont) {
+ n = tp->b_wptr - tp->b_rptr;
+ bcopy(tp->b_rptr, bp, n);
+ bp += n;
+ }
+
+ /* add pads for ethernet packets */
+ last_pos = new->b_rptr + header_size + pkt_size;
+ while (bp < last_pos) {
+ *bp++ = 0;
+ }
+
+ /* add a zero-length pad segment for usb communications */
+ if (pad_size) {
+ /* add a dummy header for zero-length packet */
+ FILL_PKT_HEADER(bp, 0);
+ bp += pad_size;
+ }
+
+ /* close the payload of the packet */
+ new->b_wptr = bp;
+
+ return (new);
+}
+
+static void
+axf_dump_packet(struct usbgem_dev *dp, uint8_t *bp, int n)
+{
+ int i;
+
+ for (i = 0; i < n; i += 8, bp += 8) {
+ cmn_err(CE_CONT, "%02x %02x %02x %02x %02x %02x %02x %02x",
+ bp[0], bp[1], bp[2], bp[3], bp[4], bp[5], bp[6], bp[7]);
+ }
+}
+
+static mblk_t *
+axf_rx_make_packet(struct usbgem_dev *dp, mblk_t *mp)
+{
+ mblk_t *tp;
+ int rest;
+
+ if (AX88172(dp)) {
+ return (mp);
+ }
+
+ tp = mp;
+ rest = tp->b_wptr - tp->b_rptr;
+
+ if (rest <= PKT_HEADER_SIZE) {
+ /*
+ * the usb bulk-in frame doesn't include any valid
+ * ethernet packets.
+ */
+ return (NULL);
+ }
+
+ for (; ; ) {
+ uint16_t len;
+ uint16_t cksum;
+
+ /* analyse the header of the received usb frame */
+ len = LE16P(tp->b_rptr + 0);
+ cksum = LE16P(tp->b_rptr + 2);
+
+ /* test if the header is valid */
+ if (len + cksum != 0xffff) {
+ /* discard whole the packet */
+ cmn_err(CE_WARN,
+ "!%s: %s: corrupted header:%04x %04x",
+ dp->name, __func__, len, cksum);
+ return (NULL);
+ }
+#if DEBUG_LEVEL > 0
+ if (len < ETHERMIN || len > ETHERMAX) {
+ cmn_err(CE_NOTE,
+ "!%s: %s: incorrect pktsize:%d",
+ dp->name, __func__, len);
+ }
+#endif
+ /* extract a ethernet packet from the bulk-in frame */
+ tp->b_rptr += PKT_HEADER_SIZE;
+ tp->b_wptr = tp->b_rptr + len;
+
+ if (len & 1) {
+ /*
+ * skip a tailing pad byte if the packet
+ * length is odd
+ */
+ len++;
+ }
+ rest -= len + PKT_HEADER_SIZE;
+
+ if (rest <= PKT_HEADER_SIZE) {
+ /* no more vaild ethernet packets */
+ break;
+ }
+
+#if DEBUG_LEVEL > 10
+ axf_dump_packet(dp, tp->b_wptr, 18);
+#endif
+ /* allocate a mblk_t header for the next ethernet packet */
+ tp->b_next = dupb(mp);
+ tp->b_next->b_rptr = tp->b_rptr + len;
+ tp = tp->b_next;
+ }
+
+ return (mp);
+}
+
+/*
+ * MII Interfaces
+ */
+static uint16_t
+axf_mii_read(struct usbgem_dev *dp, uint_t index, int *errp)
+{
+ uint8_t buf[2];
+ uint16_t val;
+
+ DPRINTF(4, (CE_CONT, "!%s: %s: called, ix:%d",
+ dp->name, __func__, index));
+
+ /* switch to software MII operation mode */
+ OUT(dp, VCMD_SOFTWARE_MII_OP, 0, 0, 0, NULL, errp, usberr);
+
+ /* Read MII register */
+ IN(dp, VCMD_READ_MII_REG, dp->mii_phy_addr, index,
+ 2, buf, errp, usberr);
+
+ /* switch to hardware MII operation mode */
+ OUT(dp, VCMD_HARDWARE_MII_OP, 0, 0, 0, NULL, errp, usberr);
+
+ return (LE16P(buf));
+
+usberr:
+ cmn_err(CE_CONT,
+ "!%s: %s: usberr(%d) detected", dp->name, __func__, *errp);
+ return (0);
+}
+
+static void
+axf_mii_write(struct usbgem_dev *dp, uint_t index, uint16_t val, int *errp)
+{
+ uint8_t buf[2];
+
+ DPRINTF(4, (CE_CONT, "!%s: %s called, reg:%x val:%x",
+ dp->name, __func__, index, val));
+
+ /* switch software MII operation mode */
+ OUT(dp, VCMD_SOFTWARE_MII_OP, 0, 0, 0, NULL, errp, usberr);
+
+ /* Write to the specified MII register */
+ buf[0] = (uint8_t)val;
+ buf[1] = (uint8_t)(val >> 8);
+ OUT(dp, VCMD_WRITE_MII_REG, dp->mii_phy_addr, index,
+ 2, buf, errp, usberr);
+
+ /* switch to hardware MII operation mode */
+ OUT(dp, VCMD_HARDWARE_MII_OP, 0, 0, 0, NULL, errp, usberr);
+
+usberr:
+ ;
+}
+
+static void
+axf_interrupt(struct usbgem_dev *dp, mblk_t *mp)
+{
+ uint8_t *bp;
+ struct axf_dev *lp = dp->private;
+
+ bp = mp->b_rptr;
+
+ DPRINTF(2, (CE_CONT,
+ "!%s: %s: size:%d, %02x %02x %02x %02x %02x %02x %02x %02x",
+ dp->name, __func__, mp->b_wptr - mp->b_rptr,
+ bp[0], bp[1], bp[2], bp[3], bp[4], bp[5], bp[6], bp[7]));
+
+ if (lp->last_link_state ^ bp[2]) {
+ usbgem_mii_update_link(dp);
+ }
+
+ lp->last_link_state = bp[2];
+}
+
+/* ======================================================== */
+/*
+ * OS depend (device driver DKI) routine
+ */
+/* ======================================================== */
+#ifdef DEBUG_LEVEL
+static void
+axf_eeprom_dump(struct usbgem_dev *dp, int size)
+{
+ int i;
+ int err;
+ uint8_t w0[2], w1[2], w2[2], w3[2];
+
+ cmn_err(CE_CONT, "!%s: eeprom dump:", dp->name);
+
+ err = USB_SUCCESS;
+
+ for (i = 0; i < size; i += 4) {
+ IN(dp, VCMD_READ_SROM, i + 0, 0, 2, w0, &err, usberr);
+ IN(dp, VCMD_READ_SROM, i + 1, 0, 2, w1, &err, usberr);
+ IN(dp, VCMD_READ_SROM, i + 2, 0, 2, w2, &err, usberr);
+ IN(dp, VCMD_READ_SROM, i + 3, 0, 2, w3, &err, usberr);
+ cmn_err(CE_CONT, "!0x%02x: 0x%04x 0x%04x 0x%04x 0x%04x",
+ i,
+ (w0[1] << 8) | w0[0],
+ (w1[1] << 8) | w1[0],
+ (w2[1] << 8) | w2[0],
+ (w3[1] << 8) | w3[0]);
+ }
+usberr:
+ ;
+}
+#endif
+
+static int
+axf_attach_chip(struct usbgem_dev *dp)
+{
+ uint8_t phys[2];
+ int err;
+ uint_t vcmd;
+ int ret;
+#ifdef CONFIG_FULLSIZE_VLAN
+ uint8_t maxpktsize[2];
+ uint16_t vlan_pktsize;
+#endif
+#ifdef DEBUG_LEVEL
+ uint8_t val8;
+#endif
+ struct axf_dev *lp = dp->private;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s enter", dp->name, __func__));
+
+ ret = USB_SUCCESS;
+ /*
+ * mac address in EEPROM has loaded to ID registers.
+ */
+ vcmd = AX88172(dp) ? VCMD_READ_NODE_ID : VCMD_READ_NODE_ID_88772;
+ IN(dp, vcmd, 0, 0,
+ ETHERADDRL, dp->dev_addr.ether_addr_octet, &err, usberr);
+
+ /*
+ * setup IPG values
+ */
+ lp->ipg[0] = 0x15;
+ lp->ipg[1] = 0x0c;
+ lp->ipg[2] = 0x12;
+
+ /*
+ * We cannot scan phy because the nic returns undefined
+ * value, i.e. remained garbage, when MII phy is not at the
+ * specified index.
+ */
+#ifdef DEBUG_LEVELx
+ if (lp->chip->vid == 0x07b8 && lp->chip->pid == 0x420a) {
+ /*
+ * restore the original phy address of brain
+ * damaged Planex UE2-100TX
+ */
+ OUT(dp, VCMD_WRITE_SROM_ENABLE, 0, 0, 0, NULL, &err, usberr);
+ OUT(dp, VCMD_WRITE_SROM, 0x11, 0xe004, 0, NULL, &err, usberr);
+ OUT(dp, VCMD_WRITE_SROM_DISABLE, 0, 0, 0, NULL, &err, usberr);
+ }
+#endif
+ if (AX88172(dp)) {
+ IN(dp, VCMD_READ_PHY_IDS, 0, 0, 2, &phys, &err, usberr);
+ dp->mii_phy_addr = phys[1];
+ DPRINTF(0, (CE_CONT, "!%s: %s: phys_addr:%d %d",
+ dp->name, __func__, phys[0], phys[1]));
+ } else {
+ /* use built-in phy */
+ dp->mii_phy_addr = 0x10;
+ }
+
+ dp->misc_flag |= USBGEM_VLAN;
+#ifdef CONFIG_FULLSIZE_VLAN
+ if (AX88172(dp) || AX88772(dp)) {
+ /* check max packet size in srom */
+ IN(dp, VCMD_READ_SROM, 0x10, 0, 2, maxpktsize, &err, usberr);
+ vlan_pktsize = ETHERMAX + ETHERFCSL + 4 /* VTAG_SIZE */;
+
+ if (LE16P(maxpktsize) < vlan_pktsize) {
+ cmn_err(CE_CONT,
+ "!%s: %s: max packet size in srom is too small, "
+ "changing %d -> %d, do power cycle for the device",
+ dp->name, __func__,
+ LE16P(maxpktsize), vlan_pktsize);
+ OUT(dp, VCMD_WRITE_SROM_ENABLE,
+ 0, 0, 0, NULL, &err, usberr);
+ OUT(dp, VCMD_WRITE_SROM, 0x10,
+ vlan_pktsize, 0, NULL, &err, usberr);
+ OUT(dp, VCMD_WRITE_SROM_DISABLE,
+ 0, 0, 0, NULL, &err, usberr);
+
+ /* need to power off the device */
+ ret = USB_FAILURE;
+ }
+ }
+#endif
+#ifdef DEBUG_LEVEL
+ IN(dp, VCMD_READ_GPIO, 0, 0, 1, &val8, &err, usberr);
+ cmn_err(CE_CONT,
+ "!%s: %s: ipg 0x%02x 0x%02x 0x%02x, gpio 0x%b",
+ dp->name, __func__, lp->ipg[0], lp->ipg[1], lp->ipg[2],
+ val8, GPIO_BITS);
+#endif
+ /* fix rx buffer size */
+ if (!AX88172(dp)) {
+ dp->rx_buf_len = 2048;
+ }
+
+#if DEBUG_LEVEL > 0
+ axf_eeprom_dump(dp, 0x20);
+#endif
+ return (ret);
+
+usberr:
+ cmn_err(CE_WARN, "%s: %s: usb error detected (%d)",
+ dp->name, __func__, err);
+ return (USB_FAILURE);
+}
+
+static boolean_t
+axf_scan_phy(struct usbgem_dev *dp)
+{
+ int i;
+ int err;
+ uint16_t val;
+ int phy_addr_saved;
+ struct axf_dev *lp = dp->private;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ phy_addr_saved = dp->mii_phy_addr;
+
+ /* special probe routine for unreliable MII addr */
+#define PROBE_PAT \
+ (MII_ABILITY_100BASE_TX_FD | \
+ MII_ABILITY_100BASE_TX | \
+ MII_ABILITY_10BASE_T_FD | \
+ MII_ABILITY_10BASE_T)
+
+ for (i = 0; i < 32; i++) {
+ dp->mii_phy_addr = i;
+ axf_mii_write(dp, MII_AN_ADVERT, 0, &err);
+ if (err != USBGEM_SUCCESS) {
+ break;
+ }
+ val = axf_mii_read(dp, MII_AN_ADVERT, &err);
+ if (err != USBGEM_SUCCESS) {
+ break;
+ }
+ if (val != 0) {
+ DPRINTF(0, (CE_CONT, "!%s: %s: index:%d, val %b != 0",
+ dp->name, __func__, i, val, MII_ABILITY_BITS));
+ continue;
+ }
+
+ axf_mii_write(dp, MII_AN_ADVERT, PROBE_PAT, &err);
+ if (err != USBGEM_SUCCESS) {
+ break;
+ }
+ val = axf_mii_read(dp, MII_AN_ADVERT, &err);
+ if (err != USBGEM_SUCCESS) {
+ break;
+ }
+ if ((val & MII_ABILITY_TECH) != PROBE_PAT) {
+ DPRINTF(0, (CE_CONT, "!%s: %s: "
+ "index:%d, pat:%x != val:%b",
+ dp->name, __func__, i,
+ PROBE_PAT, val, MII_ABILITY_BITS));
+ continue;
+ }
+
+ /* found */
+ dp->mii_phy_addr = phy_addr_saved;
+ return (i);
+ }
+#undef PROBE_PAT
+ if (i == 32) {
+ cmn_err(CE_CONT, "!%s: %s: no mii phy found",
+ dp->name, __func__);
+ } else {
+ cmn_err(CE_CONT, "!%s: %s: i/o error while scanning phy",
+ dp->name, __func__);
+ }
+ dp->mii_phy_addr = phy_addr_saved;
+ return (-1);
+}
+
+static int
+axf_mii_probe(struct usbgem_dev *dp)
+{
+ int my_guess;
+ int err;
+ uint8_t old_11th[2];
+ uint8_t new_11th[2];
+ struct axf_dev *lp = dp->private;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+ (void) axf_reset_phy(dp);
+ lp->phy_has_reset = B_TRUE;
+
+ if (AX88172(dp)) {
+ my_guess = axf_scan_phy(dp);
+ if (my_guess >= 0 && my_guess < 32 &&
+ my_guess != dp->mii_phy_addr) {
+ /*
+ * phy addr in srom is wrong, need to fix it
+ */
+ IN(dp, VCMD_READ_SROM,
+ 0x11, 0, 2, old_11th, &err, usberr);
+
+ new_11th[0] = my_guess;
+ new_11th[1] = old_11th[1];
+
+ OUT(dp, VCMD_WRITE_SROM_ENABLE,
+ 0, 0, 0, NULL, &err, usberr);
+ OUT(dp, VCMD_WRITE_SROM,
+ 0x11, LE16P(new_11th), 0, NULL, &err, usberr);
+ OUT(dp, VCMD_WRITE_SROM_DISABLE,
+ 0, 0, 0, NULL, &err, usberr);
+#if 1
+ /* XXX - read back, but it doesn't work, why? */
+ delay(drv_usectohz(1000*1000));
+ IN(dp, VCMD_READ_SROM,
+ 0x11, 0, 2, new_11th, &err, usberr);
+#endif
+ cmn_err(CE_NOTE, "!%s: %s: phy addr in srom fixed: "
+ "%04x -> %04x",
+ dp->name, __func__,
+ LE16P(old_11th), LE16P(new_11th));
+ return (USBGEM_FAILURE);
+usberr:
+ cmn_err(CE_NOTE,
+ "!%s: %s: failed to patch phy addr, "
+ "current: %04x",
+ dp->name, __func__, LE16P(old_11th));
+ return (USBGEM_FAILURE);
+ }
+ }
+ return (usbgem_mii_probe_default(dp));
+}
+
+static int
+axf_mii_init(struct usbgem_dev *dp)
+{
+ struct axf_dev *lp = dp->private;
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ if (!lp->phy_has_reset) {
+ (void) axf_reset_phy(dp);
+ }
+
+ /* prepare to reset phy on the next reconnect or resume */
+ lp->phy_has_reset = B_FALSE;
+
+ return (USB_SUCCESS);
+}
+
+static int
+axfattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int i;
+ ddi_iblock_cookie_t c;
+ int ret;
+ int revid;
+ int unit;
+ int vid;
+ int pid;
+ struct chip_info *p;
+ int len;
+ const char *drv_name;
+ struct usbgem_dev *dp;
+ void *base;
+ struct usbgem_conf *ugcp;
+ struct axf_dev *lp;
+
+ unit = ddi_get_instance(dip);
+ drv_name = ddi_driver_name(dip);
+
+ DPRINTF(3, (CE_CONT, "!%s%d: %s: called, cmd:%d",
+ drv_name, unit, __func__, cmd));
+
+ if (cmd == DDI_ATTACH) {
+ /*
+ * Check if the chip is supported.
+ */
+ vid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
+ "usb-vendor-id", -1);
+ pid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
+ "usb-product-id", -1);
+ revid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
+ "usb-revision-id", -1);
+
+ for (i = 0, p = chiptbl_88x7x; i < CHIPTABLESIZE; i++, p++) {
+ if (p->vid == vid && p->pid == pid) {
+ /* found */
+ cmn_err(CE_CONT, "!%s%d: %s "
+ "(vid: 0x%04x, did: 0x%04x, revid: 0x%02x)",
+ drv_name, unit, p->name, vid, pid, revid);
+ goto chip_found;
+ }
+ }
+
+ /* Not found */
+ cmn_err(CE_WARN, "!%s: %s: wrong usb venid/prodid (0x%x, 0x%x)",
+ drv_name, __func__, vid, pid);
+
+ /* assume 88772 */
+ p = &chiptbl_88x7x[CHIPTABLESIZE - 1];
+chip_found:
+ /*
+ * construct usbgem configration
+ */
+ ugcp = kmem_zalloc(sizeof (*ugcp), KM_SLEEP);
+
+ /* name */
+ /*
+ * softmac requires that ppa is the instance number
+ * of the device, otherwise it hangs in seaching the device.
+ */
+ sprintf(ugcp->usbgc_name, "%s%d", drv_name, unit);
+ ugcp->usbgc_ppa = unit;
+
+ ugcp->usbgc_ifnum = 0;
+ ugcp->usbgc_alt = 0;
+
+ ugcp->usbgc_tx_list_max = 64;
+
+ ugcp->usbgc_rx_header_len = 0;
+ ugcp->usbgc_rx_list_max = 64;
+
+ /* time out parameters */
+ ugcp->usbgc_tx_timeout = USBGEM_TX_TIMEOUT;
+ ugcp->usbgc_tx_timeout_interval = USBGEM_TX_TIMEOUT_INTERVAL;
+
+ /* flow control */
+ /*
+ * XXX - flow control caused link down frequently under
+ * heavy traffic
+ */
+ ugcp->usbgc_flow_control = FLOW_CONTROL_RX_PAUSE;
+
+ /* MII timeout parameters */
+ ugcp->usbgc_mii_link_watch_interval = ONESEC;
+ ugcp->usbgc_mii_an_watch_interval = ONESEC/5;
+ ugcp->usbgc_mii_reset_timeout = MII_RESET_TIMEOUT; /* 1 sec */
+ ugcp->usbgc_mii_an_timeout = MII_AN_TIMEOUT; /* 5 sec */
+ ugcp->usbgc_mii_an_wait = 0;
+ ugcp->usbgc_mii_linkdown_timeout = MII_LINKDOWN_TIMEOUT;
+
+ ugcp->usbgc_mii_an_delay = ONESEC/10;
+ ugcp->usbgc_mii_linkdown_action = MII_ACTION_RSA;
+ ugcp->usbgc_mii_linkdown_timeout_action = MII_ACTION_RESET;
+ ugcp->usbgc_mii_dont_reset = B_FALSE;
+ ugcp->usbgc_mii_hw_link_detection = B_TRUE;
+ ugcp->usbgc_mii_stop_mac_on_linkdown = B_FALSE;
+
+ /* I/O methods */
+
+ /* mac operation */
+ ugcp->usbgc_attach_chip = &axf_attach_chip;
+ ugcp->usbgc_reset_chip = &axf_reset_chip;
+ ugcp->usbgc_init_chip = &axf_init_chip;
+ ugcp->usbgc_start_chip = &axf_start_chip;
+ ugcp->usbgc_stop_chip = &axf_stop_chip;
+ ugcp->usbgc_multicast_hash = &axf_mcast_hash;
+
+ ugcp->usbgc_set_rx_filter = &axf_set_rx_filter;
+ ugcp->usbgc_set_media = &axf_set_media;
+ ugcp->usbgc_get_stats = &axf_get_stats;
+ ugcp->usbgc_interrupt = &axf_interrupt;
+
+ /* packet operation */
+ ugcp->usbgc_tx_make_packet = &axf_tx_make_packet;
+ ugcp->usbgc_rx_make_packet = &axf_rx_make_packet;
+
+ /* mii operations */
+ ugcp->usbgc_mii_probe = &axf_mii_probe;
+ ugcp->usbgc_mii_init = &axf_mii_init;
+ ugcp->usbgc_mii_config = &usbgem_mii_config_default;
+ ugcp->usbgc_mii_read = &axf_mii_read;
+ ugcp->usbgc_mii_write = &axf_mii_write;
+
+ /* mtu */
+ ugcp->usbgc_min_mtu = ETHERMTU;
+ ugcp->usbgc_max_mtu = ETHERMTU;
+ ugcp->usbgc_default_mtu = ETHERMTU;
+
+ lp = kmem_zalloc(sizeof (struct axf_dev), KM_SLEEP);
+ lp->chip = p;
+ lp->last_link_state = 0;
+ lp->phy_has_reset = B_FALSE;
+
+ dp = usbgem_do_attach(dip, ugcp, lp, sizeof (struct axf_dev));
+
+ kmem_free(ugcp, sizeof (*ugcp));
+
+ if (dp != NULL) {
+ return (DDI_SUCCESS);
+ }
+
+err_free_mem:
+ kmem_free(lp, sizeof (struct axf_dev));
+err_close_pipe:
+err:
+ return (DDI_FAILURE);
+ }
+
+ if (cmd == DDI_RESUME) {
+ return (usbgem_resume(dip));
+ }
+
+ return (DDI_FAILURE);
+}
+
+static int
+axfdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ int ret;
+
+ if (cmd == DDI_DETACH) {
+ ret = usbgem_do_detach(dip);
+ if (ret != DDI_SUCCESS) {
+ return (DDI_FAILURE);
+ }
+ return (DDI_SUCCESS);
+ }
+ if (cmd == DDI_SUSPEND) {
+ return (usbgem_suspend(dip));
+ }
+ return (DDI_FAILURE);
+}
+
+/* ======================================================== */
+/*
+ * OS depend (loadable streams driver) routine
+ */
+/* ======================================================== */
+#ifdef USBGEM_CONFIG_GLDv3
+USBGEM_STREAM_OPS(axf_ops, axfattach, axfdetach);
+#else
+static struct module_info axfminfo = {
+ 0, /* mi_idnum */
+ "axf", /* mi_idname */
+ 0, /* mi_minpsz */
+ ETHERMTU, /* mi_maxpsz */
+ ETHERMTU*128, /* mi_hiwat */
+ 1, /* mi_lowat */
+};
+
+static struct qinit axfrinit = {
+ (int (*)()) NULL, /* qi_putp */
+ usbgem_rsrv, /* qi_srvp */
+ usbgem_open, /* qi_qopen */
+ usbgem_close, /* qi_qclose */
+ (int (*)()) NULL, /* qi_qadmin */
+ &axfminfo, /* qi_minfo */
+ NULL /* qi_mstat */
+};
+
+static struct qinit axfwinit = {
+ usbgem_wput, /* qi_putp */
+ usbgem_wsrv, /* qi_srvp */
+ (int (*)()) NULL, /* qi_qopen */
+ (int (*)()) NULL, /* qi_qclose */
+ (int (*)()) NULL, /* qi_qadmin */
+ &axfminfo, /* qi_minfo */
+ NULL /* qi_mstat */
+};
+
+static struct streamtab axf_info = {
+ &axfrinit, /* st_rdinit */
+ &axfwinit, /* st_wrinit */
+ NULL, /* st_muxrinit */
+ NULL /* st_muxwrinit */
+};
+
+static struct cb_ops cb_axf_ops = {
+ nulldev, /* cb_open */
+ nulldev, /* cb_close */
+ nodev, /* cb_strategy */
+ nodev, /* cb_print */
+ nodev, /* cb_dump */
+ nodev, /* cb_read */
+ nodev, /* cb_write */
+ nodev, /* cb_ioctl */
+ nodev, /* cb_devmap */
+ nodev, /* cb_mmap */
+ nodev, /* cb_segmap */
+ nochpoll, /* cb_chpoll */
+ ddi_prop_op, /* cb_prop_op */
+ &axf_info, /* cb_stream */
+ D_NEW|D_MP /* cb_flag */
+};
+
+static struct dev_ops axf_ops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* devo_refcnt */
+ usbgem_getinfo, /* devo_getinfo */
+ nulldev, /* devo_identify */
+ nulldev, /* devo_probe */
+ axfattach, /* devo_attach */
+ axfdetach, /* devo_detach */
+ nodev, /* devo_reset */
+ &cb_axf_ops, /* devo_cb_ops */
+ NULL, /* devo_bus_ops */
+ usbgem_power, /* devo_power */
+#if DEVO_REV >= 4
+ usbgem_quiesce, /* devo_quiesce */
+#endif
+};
+#endif
+
+static struct modldrv modldrv = {
+ &mod_driverops, /* Type of module. This one is a driver */
+ ident,
+ &axf_ops, /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, &modldrv, NULL
+};
+
+/* ======================================================== */
+/*
+ * _init : done
+ */
+/* ======================================================== */
+int
+_init(void)
+{
+ int status;
+
+ DPRINTF(2, (CE_CONT, "!axf: _init: called"));
+
+ status = usbgem_mod_init(&axf_ops, "axf");
+ if (status != DDI_SUCCESS) {
+ return (status);
+ }
+ status = mod_install(&modlinkage);
+ if (status != DDI_SUCCESS) {
+ usbgem_mod_fini(&axf_ops);
+ }
+ return (status);
+}
+
+/*
+ * _fini : done
+ */
+int
+_fini(void)
+{
+ int status;
+
+ DPRINTF(2, (CE_CONT, "!axf: _fini: called"));
+ status = mod_remove(&modlinkage);
+ if (status == DDI_SUCCESS) {
+ usbgem_mod_fini(&axf_ops);
+ }
+ return (status);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
diff --git a/usr/src/uts/common/io/cons.c b/usr/src/uts/common/io/cons.c
index 6ef1b0b9f7..495ae93cf9 100644
--- a/usr/src/uts/common/io/cons.c
+++ b/usr/src/uts/common/io/cons.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 1982, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015, Joyent, Inc. All rights reserved.
*/
/*
@@ -53,6 +54,7 @@
#include <sys/vnode.h>
#include <sys/uio.h>
#include <sys/stat.h>
+#include <sys/limits.h>
#include <sys/console.h>
#include <sys/consdev.h>
@@ -414,14 +416,24 @@ cnwrite(dev_t dev, struct uio *uio, struct cred *cred)
*/
if (vsconsvp != NULL && vsconsvp->v_stream != NULL) {
struiod_t uiod;
+ struct iovec buf[IOV_MAX_STACK];
+ int iovlen = 0;
+
+ if (uio->uio_iovcnt > IOV_MAX_STACK) {
+ iovlen = uio->uio_iovcnt * sizeof (iovec_t);
+ uiod.d_iov = kmem_alloc(iovlen, KM_SLEEP);
+ } else {
+ uiod.d_iov = buf;
+ }
/*
* strwrite modifies uio so need to make copy.
*/
- (void) uiodup(uio, &uiod.d_uio, uiod.d_iov,
- sizeof (uiod.d_iov) / sizeof (*uiod.d_iov));
+ (void) uiodup(uio, &uiod.d_uio, uiod.d_iov, uio->uio_iovcnt);
(void) strwrite(vsconsvp, &uiod.d_uio, cred);
+ if (iovlen != 0)
+ kmem_free(uiod.d_iov, iovlen);
}
if (rconsvp->v_stream != NULL)
diff --git a/usr/src/uts/common/io/devpoll.c b/usr/src/uts/common/io/devpoll.c
index e00ac1d1e9..f8d9f1cff8 100644
--- a/usr/src/uts/common/io/devpoll.c
+++ b/usr/src/uts/common/io/devpoll.c
@@ -670,15 +670,26 @@ dpwrite(dev_t dev, struct uio *uiop, cred_t *credp)
uiosize = uiop->uio_resid;
pollfdnum = uiosize / size;
- mutex_enter(&curproc->p_lock);
- if (pollfdnum > (uint_t)rctl_enforced_value(
- rctlproc_legacy[RLIMIT_NOFILE], curproc->p_rctls, curproc)) {
- (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE],
- curproc->p_rctls, curproc, RCA_SAFE);
+
+ /*
+ * We want to make sure that pollfdnum isn't large enough to DoS us,
+ * but we also don't want to grab p_lock unnecessarily -- so we
+ * perform the full check against our resource limits if and only if
+ * pollfdnum is larger than the known-to-be-sane value of UINT8_MAX.
+ */
+ if (pollfdnum > UINT8_MAX) {
+ mutex_enter(&curproc->p_lock);
+ if (pollfdnum >
+ (uint_t)rctl_enforced_value(rctlproc_legacy[RLIMIT_NOFILE],
+ curproc->p_rctls, curproc)) {
+ (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE],
+ curproc->p_rctls, curproc, RCA_SAFE);
+ mutex_exit(&curproc->p_lock);
+ return (EINVAL);
+ }
mutex_exit(&curproc->p_lock);
- return (EINVAL);
}
- mutex_exit(&curproc->p_lock);
+
/*
* Copy in the pollfd array. Walk through the array and add
* each polled fd to the cached set.
@@ -1112,14 +1123,18 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
void *setp = STRUCT_FGETP(dvpoll, dp_setp);
if (setp != NULL) {
- if (copyin(setp, &set, sizeof (set))) {
- DP_REFRELE(dpep);
- return (EFAULT);
+ if ((mode & FKIOCTL) != 0) {
+ /* Use the signal set directly */
+ ksetp = (k_sigset_t *)setp;
+ } else {
+ if (copyin(setp, &set, sizeof (set))) {
+ DP_REFRELE(dpep);
+ return (EFAULT);
+ }
+ sigutok(&set, &kset);
+ ksetp = &kset;
}
- sigutok(&set, &kset);
- ksetp = &kset;
-
mutex_enter(&p->p_lock);
schedctl_finish_sigblock(t);
lwp->lwp_sigoldmask = t->t_hold;
@@ -1268,6 +1283,10 @@ dpioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
DP_SIGMASK_RESTORE(ksetp);
if (error == 0 && fdcnt > 0) {
+ /*
+ * It should be noted that FKIOCTL does not influence
+ * the copyout (vs bcopy) of dp_fds at this time.
+ */
if (copyout(ps->ps_dpbuf,
STRUCT_FGETP(dvpoll, dp_fds), fdcnt * fdsize)) {
DP_REFRELE(dpep);
diff --git a/usr/src/uts/common/io/dld/dld_drv.c b/usr/src/uts/common/io/dld/dld_drv.c
index 40cbe86170..62bc4a8ecf 100644
--- a/usr/src/uts/common/io/dld/dld_drv.c
+++ b/usr/src/uts/common/io/dld/dld_drv.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015, Joyent Inc.
*/
/*
@@ -701,7 +702,8 @@ drv_ioc_prop_common(dld_ioc_macprop_t *prop, intptr_t arg, boolean_t set,
err = EACCES;
goto done;
}
- err = dls_devnet_setzid(dlh, dzp->diz_zid);
+ err = dls_devnet_setzid(dlh, dzp->diz_zid,
+ dzp->diz_transient);
} else {
kprop->pr_perm_flags = MAC_PROP_PERM_RW;
(*(zoneid_t *)kprop->pr_val) = dls_devnet_getzid(dlh);
@@ -865,7 +867,7 @@ drv_ioc_rename(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp)
return (err);
if ((err = dls_devnet_rename(dir->dir_linkid1, dir->dir_linkid2,
- dir->dir_link)) != 0)
+ dir->dir_link, dir->dir_zoneinit)) != 0)
return (err);
if (dir->dir_linkid2 == DATALINK_INVALID_LINKID)
@@ -1376,7 +1378,8 @@ static dld_ioc_modentry_t dld_ioc_modtable[] = {
{SIMNET_IOC, "simnet", 0, NULL, 0},
{BRIDGE_IOC, "bridge", 0, NULL, 0},
{IPTUN_IOC, "iptun", 0, NULL, 0},
- {IBPART_IOC, "ibp", -1, NULL, 0}
+ {IBPART_IOC, "ibp", -1, NULL, 0},
+ {OVERLAY_IOC, "overlay", 0, NULL, 0}
};
#define DLDIOC_CNT \
(sizeof (dld_ioc_modtable) / sizeof (dld_ioc_modentry_t))
diff --git a/usr/src/uts/common/io/dld/dld_proto.c b/usr/src/uts/common/io/dld/dld_proto.c
index a438e43d91..661d8b2f4f 100644
--- a/usr/src/uts/common/io/dld/dld_proto.c
+++ b/usr/src/uts/common/io/dld/dld_proto.c
@@ -41,7 +41,7 @@ static proto_reqfunc_t proto_info_req, proto_attach_req, proto_detach_req,
proto_bind_req, proto_unbind_req, proto_promiscon_req, proto_promiscoff_req,
proto_enabmulti_req, proto_disabmulti_req, proto_physaddr_req,
proto_setphysaddr_req, proto_udqos_req, proto_req, proto_capability_req,
- proto_notify_req, proto_passive_req;
+ proto_notify_req, proto_passive_req, proto_exclusive_req;
static void proto_capability_advertise(dld_str_t *, mblk_t *);
static int dld_capab_poll_disable(dld_str_t *, dld_capab_poll_t *);
@@ -121,6 +121,9 @@ dld_proto(dld_str_t *dsp, mblk_t *mp)
case DL_PASSIVE_REQ:
proto_passive_req(dsp, mp);
break;
+ case DL_EXCLUSIVE_REQ:
+ proto_exclusive_req(dsp, mp);
+ break;
default:
proto_req(dsp, mp);
break;
@@ -605,6 +608,14 @@ proto_promiscon_req(dld_str_t *dsp, mblk_t *mp)
new_flags |= DLS_PROMISC_PHYS;
break;
+ case DL_PROMISC_RX_ONLY:
+ new_flags |= DLS_PROMISC_RX_ONLY;
+ break;
+
+ case DL_PROMISC_FIXUPS:
+ new_flags |= DLS_PROMISC_FIXUPS;
+ break;
+
default:
dl_err = DL_NOTSUPPORTED;
goto failed2;
@@ -692,6 +703,22 @@ proto_promiscoff_req(dld_str_t *dsp, mblk_t *mp)
new_flags &= ~DLS_PROMISC_PHYS;
break;
+ case DL_PROMISC_RX_ONLY:
+ if (!(dsp->ds_promisc & DLS_PROMISC_RX_ONLY)) {
+ dl_err = DL_NOTENAB;
+ goto failed;
+ }
+ new_flags &= ~DLS_PROMISC_RX_ONLY;
+ break;
+
+ case DL_PROMISC_FIXUPS:
+ if (!(dsp->ds_promisc & DLS_PROMISC_FIXUPS)) {
+ dl_err = DL_NOTENAB;
+ goto failed;
+ }
+ new_flags &= ~DLS_PROMISC_FIXUPS;
+ break;
+
default:
dl_err = DL_NOTSUPPORTED;
mac_perim_exit(mph);
@@ -1295,7 +1322,8 @@ proto_passive_req(dld_str_t *dsp, mblk_t *mp)
* If we've already become active by issuing an active primitive,
* then it's too late to try to become passive.
*/
- if (dsp->ds_passivestate == DLD_ACTIVE) {
+ if (dsp->ds_passivestate == DLD_ACTIVE ||
+ dsp->ds_passivestate == DLD_EXCLUSIVE) {
dl_err = DL_OUTSTATE;
goto failed;
}
@@ -1354,7 +1382,12 @@ dld_capab_direct(dld_str_t *dsp, void *data, uint_t flags)
dls_rx_set(dsp, (dls_rx_t)direct->di_rx_cf,
direct->di_rx_ch);
- direct->di_tx_df = (uintptr_t)str_mdata_fastpath_put;
+ if (direct->di_flags & DI_DIRECT_RAW) {
+ direct->di_tx_df =
+ (uintptr_t)str_mdata_raw_fastpath_put;
+ } else {
+ direct->di_tx_df = (uintptr_t)str_mdata_fastpath_put;
+ }
direct->di_tx_dh = dsp;
direct->di_tx_cb_df = (uintptr_t)mac_client_tx_notify;
direct->di_tx_cb_dh = dsp->ds_mch;
@@ -1516,8 +1549,9 @@ dld_capab(dld_str_t *dsp, uint_t type, void *data, uint_t flags)
* completes. So we limit the check to DLD_ENABLE case.
*/
if ((flags == DLD_ENABLE && type != DLD_CAPAB_PERIM) &&
- (dsp->ds_sap != ETHERTYPE_IP ||
- !check_mod_above(dsp->ds_rq, "ip"))) {
+ ((dsp->ds_sap != ETHERTYPE_IP ||
+ !check_mod_above(dsp->ds_rq, "ip")) &&
+ !check_mod_above(dsp->ds_rq, "vnd"))) {
return (ENOTSUP);
}
@@ -1599,9 +1633,15 @@ proto_capability_advertise(dld_str_t *dsp, mblk_t *mp)
}
/*
- * Direct capability negotiation interface between IP and DLD
+ * Direct capability negotiation interface between IP/VND and DLD. Note
+ * that for vnd we only allow the case where the media type is the
+ * native media type so we know that there are no transformations that
+ * would have to happen to the mac header that it receives.
*/
- if (dsp->ds_sap == ETHERTYPE_IP && check_mod_above(dsp->ds_rq, "ip")) {
+ if ((dsp->ds_sap == ETHERTYPE_IP &&
+ check_mod_above(dsp->ds_rq, "ip")) ||
+ (check_mod_above(dsp->ds_rq, "vnd") &&
+ dsp->ds_mip->mi_media == dsp->ds_mip->mi_nativemedia)) {
dld_capable = B_TRUE;
subsize += sizeof (dl_capability_sub_t) +
sizeof (dl_capab_dld_t);
@@ -1720,3 +1760,36 @@ dld_capabilities_disable(dld_str_t *dsp)
if (dsp->ds_polling)
(void) dld_capab_poll_disable(dsp, NULL);
}
+
+static void
+proto_exclusive_req(dld_str_t *dsp, mblk_t *mp)
+{
+ int ret = 0;
+ t_uscalar_t dl_err;
+ mac_perim_handle_t mph;
+
+ if (dsp->ds_passivestate != DLD_UNINITIALIZED) {
+ dl_err = DL_OUTSTATE;
+ goto failed;
+ }
+
+ if (MBLKL(mp) < DL_EXCLUSIVE_REQ_SIZE) {
+ dl_err = DL_BADPRIM;
+ goto failed;
+ }
+
+ mac_perim_enter_by_mh(dsp->ds_mh, &mph);
+ ret = dls_exclusive_set(dsp, B_TRUE);
+ mac_perim_exit(mph);
+
+ if (ret != 0) {
+ dl_err = DL_SYSERR;
+ goto failed;
+ }
+
+ dsp->ds_passivestate = DLD_EXCLUSIVE;
+ dlokack(dsp->ds_wq, mp, DL_EXCLUSIVE_REQ);
+ return;
+failed:
+ dlerrorack(dsp->ds_wq, mp, DL_EXCLUSIVE_REQ, dl_err, (t_uscalar_t)ret);
+}
diff --git a/usr/src/uts/common/io/dld/dld_str.c b/usr/src/uts/common/io/dld/dld_str.c
index 6f0d0b9a6c..f5308e70ff 100644
--- a/usr/src/uts/common/io/dld/dld_str.c
+++ b/usr/src/uts/common/io/dld/dld_str.c
@@ -854,6 +854,77 @@ i_dld_ether_header_update_tag(mblk_t *mp, uint_t pri, uint16_t vid,
return (mp);
}
+static boolean_t
+i_dld_raw_ether_check(dld_str_t *dsp, mac_header_info_t *mhip, mblk_t **mpp)
+{
+ mblk_t *mp = *mpp;
+ mblk_t *newmp;
+ uint_t pri, vid, dvid;
+
+ dvid = mac_client_vid(dsp->ds_mch);
+
+ /*
+ * Discard the packet if this is a VLAN stream but the VID in
+ * the packet is not correct.
+ */
+ vid = VLAN_ID(mhip->mhi_tci);
+ if ((dvid != VLAN_ID_NONE) && (vid != VLAN_ID_NONE))
+ return (B_FALSE);
+
+ /*
+ * Discard the packet if this packet is a tagged packet
+ * but both pri and VID are 0.
+ */
+ pri = VLAN_PRI(mhip->mhi_tci);
+ if (mhip->mhi_istagged && !mhip->mhi_ispvid && pri == 0 &&
+ vid == VLAN_ID_NONE)
+ return (B_FALSE);
+
+ /*
+ * Update the priority bits to the per-stream priority if
+ * priority is not set in the packet. Update the VID for
+ * packets on a VLAN stream.
+ */
+ pri = (pri == 0) ? dsp->ds_pri : 0;
+ if ((pri != 0) || (dvid != VLAN_ID_NONE)) {
+ if ((newmp = i_dld_ether_header_update_tag(mp, pri,
+ dvid, dsp->ds_dlp->dl_tagmode)) == NULL) {
+ return (B_FALSE);
+ }
+ *mpp = newmp;
+ }
+
+ return (B_TRUE);
+}
+
+mac_tx_cookie_t
+str_mdata_raw_fastpath_put(dld_str_t *dsp, mblk_t *mp, uintptr_t f_hint,
+ uint16_t flag)
+{
+ boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER);
+ mac_header_info_t mhi;
+ mac_tx_cookie_t cookie;
+
+ if (mac_vlan_header_info(dsp->ds_mh, mp, &mhi) != 0)
+ goto discard;
+
+ if (is_ethernet) {
+ if (i_dld_raw_ether_check(dsp, &mhi, &mp) == B_FALSE)
+ goto discard;
+ }
+
+ if ((cookie = DLD_TX(dsp, mp, f_hint, flag)) != NULL) {
+ DLD_SETQFULL(dsp);
+ }
+ return (cookie);
+discard:
+ /* TODO: bump kstat? */
+ freemsg(mp);
+ return (NULL);
+}
+
+
+
/*
* M_DATA put (IP fast-path mode)
*/
@@ -902,7 +973,6 @@ str_mdata_raw_put(dld_str_t *dsp, mblk_t *mp)
mblk_t *bp, *newmp;
size_t size;
mac_header_info_t mhi;
- uint_t pri, vid, dvid;
uint_t max_sdu;
/*
@@ -948,38 +1018,8 @@ str_mdata_raw_put(dld_str_t *dsp, mblk_t *mp)
goto discard;
if (is_ethernet) {
- dvid = mac_client_vid(dsp->ds_mch);
-
- /*
- * Discard the packet if this is a VLAN stream but the VID in
- * the packet is not correct.
- */
- vid = VLAN_ID(mhi.mhi_tci);
- if ((dvid != VLAN_ID_NONE) && (vid != VLAN_ID_NONE))
- goto discard;
-
- /*
- * Discard the packet if this packet is a tagged packet
- * but both pri and VID are 0.
- */
- pri = VLAN_PRI(mhi.mhi_tci);
- if (mhi.mhi_istagged && !mhi.mhi_ispvid && pri == 0 &&
- vid == VLAN_ID_NONE)
+ if (i_dld_raw_ether_check(dsp, &mhi, &mp) == B_FALSE)
goto discard;
-
- /*
- * Update the priority bits to the per-stream priority if
- * priority is not set in the packet. Update the VID for
- * packets on a VLAN stream.
- */
- pri = (pri == 0) ? dsp->ds_pri : 0;
- if ((pri != 0) || (dvid != VLAN_ID_NONE)) {
- if ((newmp = i_dld_ether_header_update_tag(mp, pri,
- dvid, dsp->ds_dlp->dl_tagmode)) == NULL) {
- goto discard;
- }
- mp = newmp;
- }
}
if (DLD_TX(dsp, mp, 0, 0) != NULL) {
diff --git a/usr/src/uts/common/io/dls/dls.c b/usr/src/uts/common/io/dls/dls.c
index 92993ada58..0f8dbcb57a 100644
--- a/usr/src/uts/common/io/dls/dls.c
+++ b/usr/src/uts/common/io/dls/dls.c
@@ -25,7 +25,7 @@
*/
/*
- * Copyright (c) 2013 Joyent, Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
*/
/*
@@ -248,19 +248,69 @@ dls_promisc(dld_str_t *dsp, uint32_t new_flags)
{
int err = 0;
uint32_t old_flags = dsp->ds_promisc;
+ uint32_t new_type = new_flags &
+ ~(DLS_PROMISC_RX_ONLY | DLS_PROMISC_FIXUPS);
mac_client_promisc_type_t mptype = MAC_CLIENT_PROMISC_ALL;
+ uint16_t mac_flags = 0;
+ boolean_t doremove = B_FALSE;
ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
ASSERT(!(new_flags & ~(DLS_PROMISC_SAP | DLS_PROMISC_MULTI |
- DLS_PROMISC_PHYS)));
+ DLS_PROMISC_PHYS | DLS_PROMISC_RX_ONLY | DLS_PROMISC_FIXUPS)));
+
+ /*
+ * If we only have the non-data receive flags set or are only changing
+ * them, then there's nothing to do other than update the flags here.
+ * Basically when we only have something in the set of
+ * DLS_PROMISC_RX_ONLY and DLS_PROMISC_FIXUPS around, then there's
+ * nothing else for us to do other than toggle it, as there's no need to
+ * talk to MAC and we don't have to do anything else.
+ */
+ if ((old_flags & ~(DLS_PROMISC_RX_ONLY | DLS_PROMISC_FIXUPS)) == 0 &&
+ (new_flags & ~(DLS_PROMISC_RX_ONLY | DLS_PROMISC_FIXUPS)) == 0) {
+ dsp->ds_promisc = new_flags;
+ return (0);
+ }
/*
* If the user has only requested DLS_PROMISC_MULTI then we need to make
* sure that they don't see all packets.
*/
- if (new_flags == DLS_PROMISC_MULTI)
+ if (new_type == DLS_PROMISC_MULTI)
mptype = MAC_CLIENT_PROMISC_MULTI;
+ /*
+ * Look at new flags and figure out the correct mac promisc flags.
+ * If we've only requested DLS_PROMISC_SAP and not _MULTI or _PHYS,
+ * don't turn on physical promisc mode.
+ */
+ if (new_flags & DLS_PROMISC_RX_ONLY)
+ mac_flags |= MAC_PROMISC_FLAGS_NO_TX_LOOP;
+ if (new_flags & DLS_PROMISC_FIXUPS)
+ mac_flags |= MAC_PROMISC_FLAGS_DO_FIXUPS;
+ if (new_type == DLS_PROMISC_SAP)
+ mac_flags |= MAC_PROMISC_FLAGS_NO_PHYS;
+
+ /*
+ * If we're coming in and we're being asked to transition to a state
+ * where the only DLS flags would be enabled are flags that change what
+ * we do with promiscuous packets (DLS_PROMISC_RX_ONLY and
+ * DLS_PROMISC_FIXUPS) and not which packets we should receive, then we
+ * need to remove the MAC layer promiscuous handler.
+ */
+ if ((new_flags & ~(DLS_PROMISC_RX_ONLY | DLS_PROMISC_FIXUPS)) == 0 &&
+ (old_flags & ~(DLS_PROMISC_RX_ONLY | DLS_PROMISC_FIXUPS)) != 0 &&
+ new_flags != 0) {
+ doremove = B_TRUE;
+ }
+
+ /*
+ * There are three cases we care about here with respect to MAC. Going
+ * from nothing to something, something to nothing, something to
+ * something where we need to change how we're getting stuff from mac.
+ * In the last case, as long as they're not equal, we need to assume
+ * something has changed and do something about it.
+ */
if (dsp->ds_promisc == 0 && new_flags != 0) {
/*
* If only DLS_PROMISC_SAP, we don't turn on the
@@ -268,9 +318,7 @@ dls_promisc(dld_str_t *dsp, uint32_t new_flags)
*/
dsp->ds_promisc = new_flags;
err = mac_promisc_add(dsp->ds_mch, mptype,
- dls_rx_promisc, dsp, &dsp->ds_mph,
- (new_flags != DLS_PROMISC_SAP) ? 0 :
- MAC_PROMISC_FLAGS_NO_PHYS);
+ dls_rx_promisc, dsp, &dsp->ds_mph, mac_flags);
if (err != 0) {
dsp->ds_promisc = old_flags;
return (err);
@@ -281,7 +329,8 @@ dls_promisc(dld_str_t *dsp, uint32_t new_flags)
mac_promisc_remove(dsp->ds_vlan_mph);
dsp->ds_vlan_mph = NULL;
}
- } else if (dsp->ds_promisc != 0 && new_flags == 0) {
+ } else if (dsp->ds_promisc != 0 &&
+ (new_flags == 0 || doremove == B_TRUE)) {
ASSERT(dsp->ds_mph != NULL);
mac_promisc_remove(dsp->ds_mph);
@@ -296,19 +345,13 @@ dls_promisc(dld_str_t *dsp, uint32_t new_flags)
MAC_CLIENT_PROMISC_ALL, dls_rx_vlan_promisc, dsp,
&dsp->ds_vlan_mph, MAC_PROMISC_FLAGS_NO_PHYS);
}
- } else if (dsp->ds_promisc == DLS_PROMISC_SAP && new_flags != 0 &&
- new_flags != dsp->ds_promisc) {
- /*
- * If the old flag is PROMISC_SAP, but the current flag has
- * changed to some new non-zero value, we need to turn the
- * physical promiscuous mode.
- */
+ } else if (new_flags != 0 && new_flags != old_flags) {
ASSERT(dsp->ds_mph != NULL);
mac_promisc_remove(dsp->ds_mph);
/* Honors both after-remove and before-add semantics! */
dsp->ds_promisc = new_flags;
err = mac_promisc_add(dsp->ds_mch, mptype,
- dls_rx_promisc, dsp, &dsp->ds_mph, 0);
+ dls_rx_promisc, dsp, &dsp->ds_mph, mac_flags);
if (err != 0)
dsp->ds_promisc = old_flags;
} else {
@@ -629,6 +672,22 @@ boolean_t
dls_accept_promisc(dld_str_t *dsp, mac_header_info_t *mhip, dls_rx_t *ds_rx,
void **ds_rx_arg, boolean_t loopback)
{
+ if (dsp->ds_promisc == 0) {
+ /*
+ * If there are active walkers of the mi_promisc_list when
+ * promiscuousness is disabled, ds_promisc will be cleared,
+ * but the DLS will remain on the mi_promisc_list until the
+ * walk is completed. If we do not recognize this case here,
+ * we won't properly execute the ds_promisc case in the common
+ * accept routine -- and we will potentially accept a packet
+ * that has originated with this DLS (which in turn can
+ * induce recursion and death by stack overflow). If
+ * ds_promisc is zero, we know that we are in this window --
+ * and we refuse to accept the packet.
+ */
+ return (B_FALSE);
+ }
+
return (dls_accept_common(dsp, mhip, ds_rx, ds_rx_arg, B_TRUE,
loopback));
}
@@ -659,7 +718,10 @@ dls_mac_active_set(dls_link_t *dlp)
* Set the function to start receiving packets.
*/
mac_rx_set(dlp->dl_mch, i_dls_link_rx, dlp);
+ } else if (dlp->dl_exclusive == B_TRUE) {
+ return (EBUSY);
}
+
dlp->dl_nactive++;
return (0);
}
@@ -685,7 +747,11 @@ dls_active_set(dld_str_t *dsp)
if (dsp->ds_passivestate == DLD_PASSIVE)
return (0);
- /* If we're already active, then there's nothing more to do. */
+ if (dsp->ds_dlp->dl_exclusive == B_TRUE &&
+ dsp->ds_passivestate != DLD_EXCLUSIVE)
+ return (EBUSY);
+
+ /* If we're already active, we need to check the link's exclusivity */
if ((dsp->ds_nactive == 0) &&
((err = dls_mac_active_set(dsp->ds_dlp)) != 0)) {
/* except for ENXIO all other errors are mapped to EBUSY */
@@ -694,7 +760,8 @@ dls_active_set(dld_str_t *dsp)
return (err);
}
- dsp->ds_passivestate = DLD_ACTIVE;
+ dsp->ds_passivestate = dsp->ds_dlp->dl_exclusive == B_TRUE ?
+ DLD_EXCLUSIVE : DLD_ACTIVE;
dsp->ds_nactive++;
return (0);
}
@@ -725,7 +792,32 @@ dls_active_clear(dld_str_t *dsp, boolean_t all)
if (dsp->ds_nactive != 0)
return;
- ASSERT(dsp->ds_passivestate == DLD_ACTIVE);
+ ASSERT(dsp->ds_passivestate == DLD_ACTIVE ||
+ dsp->ds_passivestate == DLD_EXCLUSIVE);
dls_mac_active_clear(dsp->ds_dlp);
+ /*
+ * We verify below to ensure that no other part of DLS has mucked with
+ * our exclusive state.
+ */
+ if (dsp->ds_passivestate == DLD_EXCLUSIVE)
+ VERIFY(dls_exclusive_set(dsp, B_FALSE) == 0);
dsp->ds_passivestate = DLD_UNINITIALIZED;
}
+
+int
+dls_exclusive_set(dld_str_t *dsp, boolean_t enable)
+{
+ ASSERT(MAC_PERIM_HELD(dsp->ds_mh));
+
+ if (enable == B_FALSE) {
+ dsp->ds_dlp->dl_exclusive = B_FALSE;
+ return (0);
+ }
+
+ if (dsp->ds_dlp->dl_nactive != 0)
+ return (EBUSY);
+
+ dsp->ds_dlp->dl_exclusive = B_TRUE;
+
+ return (0);
+}
diff --git a/usr/src/uts/common/io/dls/dls_link.c b/usr/src/uts/common/io/dls/dls_link.c
index 6b92a81e77..4a735d870e 100644
--- a/usr/src/uts/common/io/dls/dls_link.c
+++ b/usr/src/uts/common/io/dls/dls_link.c
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
*/
/*
@@ -34,6 +35,9 @@
#include <sys/dld_impl.h>
#include <sys/sdt.h>
#include <sys/atomic.h>
+#include <sys/sysevent.h>
+#include <sys/sysevent/eventdefs.h>
+#include <sys/sysevent/datalink.h>
static kmem_cache_t *i_dls_link_cachep;
mod_hash_t *i_dls_link_hash;
@@ -579,6 +583,67 @@ drop:
freemsg(mp);
}
+/*
+ * We'd like to notify via sysevents that a link state change has occurred.
+ * There are a couple of challenges associated with this. The first is that if
+ * the link is flapping a lot, we may not see an accurate state when we launch
+ * the notification, we're told it changed, not what it changed to.
+ *
+ * The next problem is that all of the information that a user has associated
+ * with this device is the exact opposite of what we have on the dls_link_t. We
+ * have the name of the mac device, which has no bearing on what users see.
+ * Likewise, we don't have the datalink id either. So we're going to have to get
+ * this from dls.
+ *
+ * This is all further complicated by the fact that this could be going on in
+ * another thread at the same time as someone is tearing down the dls_link_t
+ * that we're associated with. We need to be careful not to grab the mac
+ * perimeter, otherwise we stand a good chance of deadlock.
+ */
+static void
+dls_link_notify(void *arg, mac_notify_type_t type)
+{
+ dls_link_t *dlp = arg;
+ dls_dl_handle_t dhp;
+ nvlist_t *nvp;
+ sysevent_t *event;
+ sysevent_id_t eid;
+
+ if (type != MAC_NOTE_LINK && type != MAC_NOTE_LOWLINK)
+ return;
+
+ /*
+ * If we can't find a devnet handle for this link, then there is no user
+ * knowable device for this at the moment and there's nothing we can
+ * really share with them that will make sense.
+ */
+ if (dls_devnet_hold_tmp_by_link(dlp, &dhp) != 0)
+ return;
+
+ /*
+ * Because we're attaching this nvlist_t to the sysevent, it'll get
+ * cleaned up when we call sysevent_free.
+ */
+ VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ VERIFY(nvlist_add_int32(nvp, DATALINK_EV_LINK_ID,
+ dls_devnet_linkid(dhp)) == 0);
+ VERIFY(nvlist_add_string(nvp, DATALINK_EV_LINK_NAME,
+ dls_devnet_link(dhp)) == 0);
+ VERIFY(nvlist_add_int32(nvp, DATALINK_EV_ZONE_ID,
+ dls_devnet_getzid(dhp)) == 0);
+
+ dls_devnet_rele_tmp(dhp);
+
+ event = sysevent_alloc(EC_DATALINK, ESC_DATALINK_LINK_STATE,
+ ILLUMOS_KERN_PUB"dls", SE_SLEEP);
+ VERIFY(event != NULL);
+ (void) sysevent_attach_attributes(event, (sysevent_attr_list_t *)nvp);
+
+ (void) log_sysevent(event, SE_SLEEP, &eid);
+ sysevent_free(event);
+
+}
+
static void
i_dls_link_destroy(dls_link_t *dlp)
{
@@ -589,6 +654,9 @@ i_dls_link_destroy(dls_link_t *dlp)
/*
* Free the structure back to the cache.
*/
+ if (dlp->dl_mnh != NULL)
+ mac_notify_remove(dlp->dl_mnh, B_TRUE);
+
if (dlp->dl_mch != NULL)
mac_client_close(dlp->dl_mch, 0);
@@ -600,8 +668,10 @@ i_dls_link_destroy(dls_link_t *dlp)
dlp->dl_mh = NULL;
dlp->dl_mch = NULL;
dlp->dl_mip = NULL;
+ dlp->dl_mnh = NULL;
dlp->dl_unknowns = 0;
dlp->dl_nonip_cnt = 0;
+ dlp->dl_exclusive = B_FALSE;
kmem_cache_free(i_dls_link_cachep, dlp);
}
@@ -640,6 +710,8 @@ i_dls_link_create(const char *name, dls_link_t **dlpp)
if (err != 0)
goto bail;
+ dlp->dl_mnh = mac_notify_add(dlp->dl_mh, dls_link_notify, dlp);
+
DTRACE_PROBE2(dls__primary__client, char *, dlp->dl_name, void *,
dlp->dl_mch);
diff --git a/usr/src/uts/common/io/dls/dls_mgmt.c b/usr/src/uts/common/io/dls/dls_mgmt.c
index 049c4bd757..105c55c7ce 100644
--- a/usr/src/uts/common/io/dls/dls_mgmt.c
+++ b/usr/src/uts/common/io/dls/dls_mgmt.c
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
*/
/*
@@ -105,12 +106,13 @@ typedef struct dls_devnet_s {
zoneid_t dd_zid; /* current zone */
boolean_t dd_prop_loaded;
taskqid_t dd_prop_taskid;
+ boolean_t dd_transient; /* link goes away when zone does */
} dls_devnet_t;
static int i_dls_devnet_create_iptun(const char *, const char *,
datalink_id_t *);
static int i_dls_devnet_destroy_iptun(datalink_id_t);
-static int i_dls_devnet_setzid(dls_devnet_t *, zoneid_t, boolean_t);
+static int i_dls_devnet_setzid(dls_devnet_t *, zoneid_t, boolean_t, boolean_t);
static int dls_devnet_unset(const char *, datalink_id_t *, boolean_t);
/*ARGSUSED*/
@@ -145,7 +147,12 @@ dls_zone_remove(datalink_id_t linkid, void *arg)
dls_devnet_t *ddp;
if (dls_devnet_hold_tmp(linkid, &ddp) == 0) {
- (void) dls_devnet_setzid(ddp, GLOBAL_ZONEID);
+ /*
+ * Don't bother moving transient links back to the global zone
+ * since we will simply delete them in dls_devnet_unset.
+ */
+ if (!ddp->dd_transient)
+ (void) dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE);
dls_devnet_rele_tmp(ddp);
}
return (0);
@@ -526,6 +533,7 @@ dls_mgmt_get_linkid(const char *link, datalink_id_t *linkid)
getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID;
(void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN);
+ getlinkid.ld_zoneid = getzoneid();
if ((err = i_dls_mgmt_upcall(&getlinkid, sizeof (getlinkid), &retval,
sizeof (retval))) == 0) {
@@ -534,6 +542,27 @@ dls_mgmt_get_linkid(const char *link, datalink_id_t *linkid)
return (err);
}
+int
+dls_mgmt_get_linkid_in_zone(const char *link, datalink_id_t *linkid,
+ zoneid_t zid)
+{
+ dlmgmt_door_getlinkid_t getlinkid;
+ dlmgmt_getlinkid_retval_t retval;
+ int err;
+
+ ASSERT(getzoneid() == GLOBAL_ZONEID || zid == getzoneid());
+ getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID;
+ (void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN);
+ getlinkid.ld_zoneid = zid;
+
+ if ((err = i_dls_mgmt_upcall(&getlinkid, sizeof (getlinkid), &retval,
+ sizeof (retval))) == 0) {
+ *linkid = retval.lr_linkid;
+ }
+ return (err);
+}
+
+
datalink_id_t
dls_mgmt_get_next(datalink_id_t linkid, datalink_class_t class,
datalink_media_t dmedia, uint32_t flags)
@@ -740,12 +769,23 @@ dls_devnet_stat_update(kstat_t *ksp, int rw)
* Create the "link" kstats.
*/
static void
-dls_devnet_stat_create(dls_devnet_t *ddp, zoneid_t zoneid)
+dls_devnet_stat_create(dls_devnet_t *ddp, zoneid_t zoneid, zoneid_t newzoneid)
{
kstat_t *ksp;
+ char *nm;
+ char kname[MAXLINKNAMELEN];
+
+ if (zoneid != newzoneid) {
+ ASSERT(zoneid == GLOBAL_ZONEID);
+ (void) snprintf(kname, sizeof (kname), "z%d_%s", newzoneid,
+ ddp->dd_linkname);
+ nm = kname;
+ } else {
+ nm = ddp->dd_linkname;
+ }
- if (dls_stat_create("link", 0, ddp->dd_linkname, zoneid,
- dls_devnet_stat_update, ddp, &ksp) == 0) {
+ if (dls_stat_create("link", 0, nm, zoneid,
+ dls_devnet_stat_update, ddp, &ksp, newzoneid) == 0) {
ASSERT(ksp != NULL);
if (zoneid == ddp->dd_owner_zid) {
ASSERT(ddp->dd_ksp == NULL);
@@ -765,12 +805,12 @@ dls_devnet_stat_destroy(dls_devnet_t *ddp, zoneid_t zoneid)
{
if (zoneid == ddp->dd_owner_zid) {
if (ddp->dd_ksp != NULL) {
- kstat_delete(ddp->dd_ksp);
+ dls_stat_delete(ddp->dd_ksp);
ddp->dd_ksp = NULL;
}
} else {
if (ddp->dd_zone_ksp != NULL) {
- kstat_delete(ddp->dd_zone_ksp);
+ dls_stat_delete(ddp->dd_zone_ksp);
ddp->dd_zone_ksp = NULL;
}
}
@@ -781,15 +821,25 @@ dls_devnet_stat_destroy(dls_devnet_t *ddp, zoneid_t zoneid)
* and create the new set using the new name.
*/
static void
-dls_devnet_stat_rename(dls_devnet_t *ddp)
+dls_devnet_stat_rename(dls_devnet_t *ddp, boolean_t zoneinit)
{
if (ddp->dd_ksp != NULL) {
- kstat_delete(ddp->dd_ksp);
+ dls_stat_delete(ddp->dd_ksp);
ddp->dd_ksp = NULL;
}
- /* We can't rename a link while it's assigned to a non-global zone. */
+ if (zoneinit && ddp->dd_zone_ksp != NULL) {
+ dls_stat_delete(ddp->dd_zone_ksp);
+ ddp->dd_zone_ksp = NULL;
+ }
+ /*
+ * We can't rename a link while it's assigned to a non-global zone
+ * unless we're first initializing the zone while readying it.
+ */
ASSERT(ddp->dd_zone_ksp == NULL);
- dls_devnet_stat_create(ddp, ddp->dd_owner_zid);
+ dls_devnet_stat_create(ddp, ddp->dd_owner_zid,
+ (zoneinit ? ddp->dd_zid : ddp->dd_owner_zid));
+ if (zoneinit)
+ dls_devnet_stat_create(ddp, ddp->dd_zid, ddp->dd_zid);
}
/*
@@ -878,7 +928,8 @@ done:
rw_exit(&i_dls_devnet_lock);
if (err == 0) {
if (zoneid != GLOBAL_ZONEID &&
- (err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE)) != 0)
+ (err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE,
+ B_FALSE)) != 0)
(void) dls_devnet_unset(macname, &linkid, B_TRUE);
/*
* The kstat subsystem holds its own locks (rather perimeter)
@@ -887,7 +938,7 @@ done:
* lock hierarchy is kstat locks -> i_dls_devnet_lock.
*/
if (stat_create)
- dls_devnet_stat_create(ddp, zoneid);
+ dls_devnet_stat_create(ddp, zoneid, zoneid);
if (ddpp != NULL)
*ddpp = ddp;
}
@@ -924,17 +975,78 @@ dls_devnet_unset(const char *macname, datalink_id_t *id, boolean_t wait)
ASSERT(ddp->dd_ref != 0);
if ((ddp->dd_ref != 1) || (!wait &&
(ddp->dd_tref != 0 || ddp->dd_prop_taskid != NULL))) {
- mutex_exit(&ddp->dd_mutex);
- rw_exit(&i_dls_devnet_lock);
- return (EBUSY);
+ int zstatus = 0;
+
+ /*
+ * There are a couple of alternatives that might be going on
+ * here; a) the zone is shutting down and it has a transient
+ * link assigned, in which case we want to clean it up instead
+ * of moving it back to the global zone, or b) its possible
+ * that we're trying to clean up an orphaned vnic that was
+ * delegated to a zone and which wasn't cleaned up properly
+ * when the zone went away. Check for either of these cases
+ * before we simply return EBUSY.
+ *
+ * zstatus indicates which situation we are dealing with:
+ * 0 - means return EBUSY
+ * 1 - means case (a), cleanup transient link
+ * -1 - means case (b), orphained VNIC
+ */
+ if (ddp->dd_ref > 1 && ddp->dd_zid != GLOBAL_ZONEID) {
+ zone_t *zp;
+
+ if ((zp = zone_find_by_id(ddp->dd_zid)) == NULL) {
+ zstatus = -1;
+ } else {
+ if (ddp->dd_transient) {
+ zone_status_t s = zone_status_get(zp);
+
+ if (s >= ZONE_IS_SHUTTING_DOWN)
+ zstatus = 1;
+ }
+ zone_rele(zp);
+ }
+ }
+
+ if (zstatus == 0) {
+ mutex_exit(&ddp->dd_mutex);
+ rw_exit(&i_dls_devnet_lock);
+ return (EBUSY);
+ }
+
+ /*
+ * We want to delete the link, reset ref to 1;
+ */
+ if (zstatus == -1)
+ /* Log a warning, but continue in this case */
+ cmn_err(CE_WARN, "clear orphaned datalink: %s\n",
+ ddp->dd_linkname);
+ ddp->dd_ref = 1;
}
ddp->dd_flags |= DD_CONDEMNED;
ddp->dd_ref--;
*id = ddp->dd_linkid;
- if (ddp->dd_zid != GLOBAL_ZONEID)
- (void) i_dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE);
+ if (ddp->dd_zid != GLOBAL_ZONEID) {
+ /*
+ * We need to release the dd_mutex before we try and destroy the
+ * stat. When we destroy it, we'll need to grab the lock for the
+ * kstat but if there's a concurrent reader of the kstat, we'll
+ * be blocked on it. This will lead to deadlock because these
+ * kstats employ a ks_update function (dls_devnet_stat_update)
+ * which needs the dd_mutex that we currently hold.
+ *
+ * Because we've already flagged the dls_devnet_t as
+ * DD_CONDEMNED and we still have a write lock on
+ * i_dls_devnet_lock, we should be able to release the dd_mutex.
+ */
+ mutex_exit(&ddp->dd_mutex);
+ dls_devnet_stat_destroy(ddp, ddp->dd_zid);
+ mutex_enter(&ddp->dd_mutex);
+ (void) i_dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE,
+ B_FALSE);
+ }
/*
* Remove this dls_devnet_t from the hash table.
@@ -960,8 +1072,15 @@ dls_devnet_unset(const char *macname, datalink_id_t *id, boolean_t wait)
ASSERT(ddp->dd_tref == 0 && ddp->dd_prop_taskid == NULL);
}
- if (ddp->dd_linkid != DATALINK_INVALID_LINKID)
+ if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
+ /*
+ * See the earlier call in this function for an explanation.
+ */
+ mutex_exit(&ddp->dd_mutex);
dls_devnet_stat_destroy(ddp, ddp->dd_owner_zid);
+ mutex_enter(&ddp->dd_mutex);
+ }
+
ddp->dd_prop_loaded = B_FALSE;
ddp->dd_linkid = DATALINK_INVALID_LINKID;
@@ -972,6 +1091,39 @@ dls_devnet_unset(const char *macname, datalink_id_t *id, boolean_t wait)
return (0);
}
+/*
+ * This is a private hold routine used when we already have the dls_link_t, thus
+ * we know that it cannot go away.
+ */
+int
+dls_devnet_hold_tmp_by_link(dls_link_t *dlp, dls_dl_handle_t *ddhp)
+{
+ int err;
+ dls_devnet_t *ddp = NULL;
+
+ rw_enter(&i_dls_devnet_lock, RW_WRITER);
+ if ((err = mod_hash_find(i_dls_devnet_hash,
+ (mod_hash_key_t)dlp->dl_name, (mod_hash_val_t *)&ddp)) != 0) {
+ ASSERT(err == MH_ERR_NOTFOUND);
+ rw_exit(&i_dls_devnet_lock);
+ return (ENOENT);
+ }
+
+ mutex_enter(&ddp->dd_mutex);
+ ASSERT(ddp->dd_ref > 0);
+ if (ddp->dd_flags & DD_CONDEMNED) {
+ mutex_exit(&ddp->dd_mutex);
+ rw_exit(&i_dls_devnet_lock);
+ return (ENOENT);
+ }
+ ddp->dd_tref++;
+ mutex_exit(&ddp->dd_mutex);
+ rw_exit(&i_dls_devnet_lock);
+
+ *ddhp = ddp;
+ return (0);
+}
+
static int
dls_devnet_hold_common(datalink_id_t linkid, dls_devnet_t **ddpp,
boolean_t tmp_hold)
@@ -1111,7 +1263,7 @@ dls_devnet_rele(dls_devnet_t *ddp)
}
static int
-dls_devnet_hold_by_name(const char *link, dls_devnet_t **ddpp)
+dls_devnet_hold_by_name(const char *link, dls_devnet_t **ddpp, zoneid_t zid)
{
char drv[MAXLINKNAMELEN];
uint_t ppa;
@@ -1121,7 +1273,7 @@ dls_devnet_hold_by_name(const char *link, dls_devnet_t **ddpp)
dls_dev_handle_t ddh;
int err;
- if ((err = dls_mgmt_get_linkid(link, &linkid)) == 0)
+ if ((err = dls_mgmt_get_linkid_in_zone(link, &linkid, zid)) == 0)
return (dls_devnet_hold(linkid, ddpp));
/*
@@ -1261,9 +1413,15 @@ dls_devnet_phydev(datalink_id_t vlanid, dev_t *devp)
*
* This case does not change the <link name, linkid> mapping, so the link's
* kstats need to be updated with using name associated the given id2.
+ *
+ * The zonename parameter is used to allow us to create a VNIC in the global
+ * zone which is assigned to a non-global zone. Since there is a race condition
+ * in the create process if two VNICs have the same name, we need to rename it
+ * after it has been assigned to the zone.
*/
int
-dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link)
+dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link,
+ boolean_t zoneinit)
{
dls_dev_handle_t ddh = NULL;
int err = 0;
@@ -1313,13 +1471,16 @@ dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link)
* is currently accessing the link kstats, or if the link is on-loan
* to a non-global zone. Then set the DD_KSTAT_CHANGING flag to
* prevent any access to the kstats while we delete and recreate
- * kstats below.
+ * kstats below. However, we skip this check if we're renaming the
+ * vnic as part of bringing it up for a zone.
*/
mutex_enter(&ddp->dd_mutex);
- if (ddp->dd_ref > 1) {
- mutex_exit(&ddp->dd_mutex);
- err = EBUSY;
- goto done;
+ if (!zoneinit) {
+ if (ddp->dd_ref > 1) {
+ mutex_exit(&ddp->dd_mutex);
+ err = EBUSY;
+ goto done;
+ }
}
ddp->dd_flags |= DD_KSTAT_CHANGING;
@@ -1333,7 +1494,15 @@ dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link)
/* rename mac client name and its flow if exists */
if ((err = mac_open(ddp->dd_mac, &mh)) != 0)
goto done;
- (void) mac_rename_primary(mh, link);
+ if (zoneinit) {
+ char tname[MAXLINKNAMELEN];
+
+ (void) snprintf(tname, sizeof (tname), "z%d_%s",
+ ddp->dd_zid, link);
+ (void) mac_rename_primary(mh, tname);
+ } else {
+ (void) mac_rename_primary(mh, link);
+ }
mac_close(mh);
goto done;
}
@@ -1406,7 +1575,7 @@ done:
*/
rw_exit(&i_dls_devnet_lock);
if (err == 0)
- dls_devnet_stat_rename(ddp);
+ dls_devnet_stat_rename(ddp, zoneinit);
if (clear_dd_flag) {
mutex_enter(&ddp->dd_mutex);
@@ -1421,7 +1590,8 @@ done:
}
static int
-i_dls_devnet_setzid(dls_devnet_t *ddp, zoneid_t new_zoneid, boolean_t setprop)
+i_dls_devnet_setzid(dls_devnet_t *ddp, zoneid_t new_zoneid, boolean_t setprop,
+ boolean_t transient)
{
int err;
mac_perim_handle_t mph;
@@ -1454,6 +1624,7 @@ i_dls_devnet_setzid(dls_devnet_t *ddp, zoneid_t new_zoneid, boolean_t setprop)
}
if ((err = dls_link_setzid(ddp->dd_mac, new_zoneid)) == 0) {
ddp->dd_zid = new_zoneid;
+ ddp->dd_transient = transient;
devnet_need_rebuild = B_TRUE;
}
@@ -1468,7 +1639,7 @@ done:
}
int
-dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid)
+dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid, boolean_t transient)
{
dls_devnet_t *ddp;
int err;
@@ -1490,7 +1661,7 @@ dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid)
refheld = B_TRUE;
}
- if ((err = i_dls_devnet_setzid(ddh, new_zid, B_TRUE)) != 0) {
+ if ((err = i_dls_devnet_setzid(ddh, new_zid, B_TRUE, transient)) != 0) {
if (refheld)
dls_devnet_rele(ddp);
return (err);
@@ -1507,7 +1678,7 @@ dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid)
if (old_zid != GLOBAL_ZONEID)
dls_devnet_stat_destroy(ddh, old_zid);
if (new_zid != GLOBAL_ZONEID)
- dls_devnet_stat_create(ddh, new_zid);
+ dls_devnet_stat_create(ddh, new_zid, new_zid);
return (0);
}
@@ -1545,15 +1716,19 @@ dls_devnet_islinkvisible(datalink_id_t linkid, zoneid_t zoneid)
* Access a vanity naming node.
*/
int
-dls_devnet_open(const char *link, dls_dl_handle_t *dhp, dev_t *devp)
+dls_devnet_open_in_zone(const char *link, dls_dl_handle_t *dhp, dev_t *devp,
+ zoneid_t zid)
{
dls_devnet_t *ddp;
dls_link_t *dlp;
- zoneid_t zid = getzoneid();
+ zoneid_t czid = getzoneid();
int err;
mac_perim_handle_t mph;
- if ((err = dls_devnet_hold_by_name(link, &ddp)) != 0)
+ if (czid != GLOBAL_ZONEID && czid != zid)
+ return (ENOENT);
+
+ if ((err = dls_devnet_hold_by_name(link, &ddp, zid)) != 0)
return (err);
dls_devnet_prop_task_wait(ddp);
@@ -1586,6 +1761,12 @@ dls_devnet_open(const char *link, dls_dl_handle_t *dhp, dev_t *devp)
return (0);
}
+int
+dls_devnet_open(const char *link, dls_dl_handle_t *dhp, dev_t *devp)
+{
+ return (dls_devnet_open_in_zone(link, dhp, devp, getzoneid()));
+}
+
/*
* Close access to a vanity naming node.
*/
@@ -1765,6 +1946,12 @@ i_dls_devnet_destroy_iptun(datalink_id_t linkid)
}
const char *
+dls_devnet_link(dls_dl_handle_t ddh)
+{
+ return (ddh->dd_linkname);
+}
+
+const char *
dls_devnet_mac(dls_dl_handle_t ddh)
{
return (ddh->dd_mac);
diff --git a/usr/src/uts/common/io/dls/dls_stat.c b/usr/src/uts/common/io/dls/dls_stat.c
index 51e4be7260..82dceff278 100644
--- a/usr/src/uts/common/io/dls/dls_stat.c
+++ b/usr/src/uts/common/io/dls/dls_stat.c
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011 Joyent, Inc. All rights reserved.
*/
/*
@@ -30,30 +31,33 @@
#include <sys/dld_impl.h>
#include <sys/mac_ether.h>
-static mac_stat_info_t i_dls_si[] = {
- { MAC_STAT_IFSPEED, "ifspeed", KSTAT_DATA_UINT64, 0 },
- { MAC_STAT_MULTIRCV, "multircv", KSTAT_DATA_UINT32, 0 },
- { MAC_STAT_BRDCSTRCV, "brdcstrcv", KSTAT_DATA_UINT32, 0 },
- { MAC_STAT_MULTIXMT, "multixmt", KSTAT_DATA_UINT32, 0 },
- { MAC_STAT_BRDCSTXMT, "brdcstxmt", KSTAT_DATA_UINT32, 0 },
- { MAC_STAT_NORCVBUF, "norcvbuf", KSTAT_DATA_UINT32, 0 },
- { MAC_STAT_IERRORS, "ierrors", KSTAT_DATA_UINT32, 0 },
- { MAC_STAT_NOXMTBUF, "noxmtbuf", KSTAT_DATA_UINT32, 0 },
- { MAC_STAT_OERRORS, "oerrors", KSTAT_DATA_UINT32, 0 },
- { MAC_STAT_COLLISIONS, "collisions", KSTAT_DATA_UINT32, 0 },
- { MAC_STAT_RBYTES, "rbytes", KSTAT_DATA_UINT32, 0 },
- { MAC_STAT_IPACKETS, "ipackets", KSTAT_DATA_UINT32, 0 },
- { MAC_STAT_OBYTES, "obytes", KSTAT_DATA_UINT32, 0 },
- { MAC_STAT_OPACKETS, "opackets", KSTAT_DATA_UINT32, 0 },
- { MAC_STAT_RBYTES, "rbytes64", KSTAT_DATA_UINT64, 0 },
- { MAC_STAT_IPACKETS, "ipackets64", KSTAT_DATA_UINT64, 0 },
- { MAC_STAT_OBYTES, "obytes64", KSTAT_DATA_UINT64, 0 },
- { MAC_STAT_OPACKETS, "opackets64", KSTAT_DATA_UINT64, 0 },
- { MAC_STAT_LINK_STATE, "link_state", KSTAT_DATA_UINT32,
- (uint64_t)LINK_STATE_UNKNOWN}
-};
-
-#define STAT_INFO_COUNT (sizeof (i_dls_si) / sizeof (i_dls_si[0]))
+/*
+ * structure for link kstats
+ */
+typedef struct {
+ kstat_named_t dk_ifspeed;
+ kstat_named_t dk_multircv;
+ kstat_named_t dk_brdcstrcv;
+ kstat_named_t dk_multixmt;
+ kstat_named_t dk_brdcstxmt;
+ kstat_named_t dk_norcvbuf;
+ kstat_named_t dk_ierrors;
+ kstat_named_t dk_noxmtbuf;
+ kstat_named_t dk_oerrors;
+ kstat_named_t dk_collisions;
+ kstat_named_t dk_rbytes;
+ kstat_named_t dk_ipackets;
+ kstat_named_t dk_obytes;
+ kstat_named_t dk_opackets;
+ kstat_named_t dk_rbytes64;
+ kstat_named_t dk_ipackets64;
+ kstat_named_t dk_obytes64;
+ kstat_named_t dk_opackets64;
+ kstat_named_t dk_link_state;
+ kstat_named_t dk_link_duplex;
+ kstat_named_t dk_unknowns;
+ kstat_named_t dk_zonename;
+} dls_kstat_t;
/*
* Exported functions.
@@ -61,42 +65,54 @@ static mac_stat_info_t i_dls_si[] = {
int
dls_stat_update(kstat_t *ksp, dls_link_t *dlp, int rw)
{
- kstat_named_t *knp;
- uint_t i;
- uint64_t val;
+ dls_kstat_t *dkp = ksp->ks_data;
if (rw != KSTAT_READ)
return (EACCES);
- knp = (kstat_named_t *)ksp->ks_data;
- for (i = 0; i < STAT_INFO_COUNT; i++) {
- val = mac_stat_get(dlp->dl_mh, i_dls_si[i].msi_stat);
-
- switch (i_dls_si[i].msi_type) {
- case KSTAT_DATA_UINT64:
- knp->value.ui64 = val;
- break;
- case KSTAT_DATA_UINT32:
- knp->value.ui32 = (uint32_t)val;
- break;
- default:
- ASSERT(B_FALSE);
- }
-
- knp++;
- }
+ dkp->dk_ifspeed.value.ui64 = mac_stat_get(dlp->dl_mh, MAC_STAT_IFSPEED);
+ dkp->dk_multircv.value.ui32 = mac_stat_get(dlp->dl_mh,
+ MAC_STAT_MULTIRCV);
+ dkp->dk_brdcstrcv.value.ui32 = mac_stat_get(dlp->dl_mh,
+ MAC_STAT_BRDCSTRCV);
+ dkp->dk_multixmt.value.ui32 = mac_stat_get(dlp->dl_mh,
+ MAC_STAT_MULTIXMT);
+ dkp->dk_brdcstxmt.value.ui32 = mac_stat_get(dlp->dl_mh,
+ MAC_STAT_BRDCSTXMT);
+ dkp->dk_norcvbuf.value.ui32 = mac_stat_get(dlp->dl_mh,
+ MAC_STAT_NORCVBUF);
+ dkp->dk_ierrors.value.ui32 = mac_stat_get(dlp->dl_mh, MAC_STAT_IERRORS);
+ dkp->dk_noxmtbuf.value.ui32 = mac_stat_get(dlp->dl_mh,
+ MAC_STAT_NOXMTBUF);
+ dkp->dk_oerrors.value.ui32 = mac_stat_get(dlp->dl_mh, MAC_STAT_OERRORS);
+ dkp->dk_collisions.value.ui32 = mac_stat_get(dlp->dl_mh,
+ MAC_STAT_COLLISIONS);
+ dkp->dk_rbytes.value.ui32 = mac_stat_get(dlp->dl_mh, MAC_STAT_RBYTES);
+ dkp->dk_ipackets.value.ui32 = mac_stat_get(dlp->dl_mh,
+ MAC_STAT_IPACKETS);
+ dkp->dk_obytes.value.ui32 = mac_stat_get(dlp->dl_mh, MAC_STAT_OBYTES);
+ dkp->dk_opackets.value.ui32 = mac_stat_get(dlp->dl_mh,
+ MAC_STAT_OPACKETS);
+ dkp->dk_rbytes64.value.ui64 = mac_stat_get(dlp->dl_mh, MAC_STAT_RBYTES);
+ dkp->dk_ipackets64.value.ui64 = mac_stat_get(dlp->dl_mh,
+ MAC_STAT_IPACKETS);
+ dkp->dk_obytes64.value.ui64 = mac_stat_get(dlp->dl_mh, MAC_STAT_OBYTES);
+ dkp->dk_opackets64.value.ui64 = mac_stat_get(dlp->dl_mh,
+ MAC_STAT_OPACKETS);
+ dkp->dk_link_state.value.ui32 = mac_stat_get(dlp->dl_mh,
+ MAC_STAT_LINK_STATE);
/*
* Ethernet specific kstat "link_duplex"
*/
if (dlp->dl_mip->mi_nativemedia != DL_ETHER) {
- knp->value.ui32 = LINK_DUPLEX_UNKNOWN;
+ dkp->dk_link_duplex.value.ui32 = LINK_DUPLEX_UNKNOWN;
} else {
- val = mac_stat_get(dlp->dl_mh, ETHER_STAT_LINK_DUPLEX);
- knp->value.ui32 = (uint32_t)val;
+ dkp->dk_link_duplex.value.ui32 =
+ (uint32_t)mac_stat_get(dlp->dl_mh, ETHER_STAT_LINK_DUPLEX);
}
- knp++;
- knp->value.ui32 = dlp->dl_unknowns;
+
+ dkp->dk_unknowns.value.ui32 = dlp->dl_unknowns;
return (0);
}
@@ -104,30 +120,66 @@ dls_stat_update(kstat_t *ksp, dls_link_t *dlp, int rw)
int
dls_stat_create(const char *module, int instance, const char *name,
zoneid_t zoneid, int (*update)(struct kstat *, int), void *private,
- kstat_t **kspp)
+ kstat_t **kspp, zoneid_t newzoneid)
{
kstat_t *ksp;
- kstat_named_t *knp;
- uint_t i;
+ zone_t *zone;
+ dls_kstat_t *dkp;
if ((ksp = kstat_create_zone(module, instance, name, "net",
- KSTAT_TYPE_NAMED, STAT_INFO_COUNT + 2, 0, zoneid)) == NULL) {
+ KSTAT_TYPE_NAMED, sizeof (dls_kstat_t) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL, zoneid)) == NULL) {
return (EINVAL);
}
ksp->ks_update = update;
ksp->ks_private = private;
+ dkp = ksp->ks_data = kmem_zalloc(sizeof (dls_kstat_t), KM_SLEEP);
+ if ((zone = zone_find_by_id(newzoneid)) != NULL) {
+ ksp->ks_data_size += strlen(zone->zone_name) + 1;
+ }
- knp = (kstat_named_t *)ksp->ks_data;
- for (i = 0; i < STAT_INFO_COUNT; i++) {
- kstat_named_init(knp, i_dls_si[i].msi_name,
- i_dls_si[i].msi_type);
- knp++;
+ kstat_named_init(&dkp->dk_ifspeed, "ifspeed", KSTAT_DATA_UINT64);
+ kstat_named_init(&dkp->dk_multircv, "multircv", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_brdcstrcv, "brdcstrcv", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_multixmt, "multixmt", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_brdcstxmt, "brdcstxmt", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_norcvbuf, "norcvbuf", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_ierrors, "ierrors", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_noxmtbuf, "noxmtbuf", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_oerrors, "oerrors", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_collisions, "collisions", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_rbytes, "rbytes", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_ipackets, "ipackets", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_obytes, "obytes", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_opackets, "opackets", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_rbytes64, "rbytes64", KSTAT_DATA_UINT64);
+ kstat_named_init(&dkp->dk_ipackets64, "ipackets64", KSTAT_DATA_UINT64);
+ kstat_named_init(&dkp->dk_obytes64, "obytes64", KSTAT_DATA_UINT64);
+ kstat_named_init(&dkp->dk_opackets64, "opackets64", KSTAT_DATA_UINT64);
+ kstat_named_init(&dkp->dk_link_state, "link_state", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_link_duplex, "link_duplex",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_unknowns, "unknowns", KSTAT_DATA_UINT32);
+ kstat_named_init(&dkp->dk_zonename, "zonename", KSTAT_DATA_STRING);
+
+ if (zone != NULL) {
+ kstat_named_setstr(&dkp->dk_zonename, zone->zone_name);
+ zone_rele(zone);
}
- kstat_named_init(knp++, "link_duplex", KSTAT_DATA_UINT32);
- kstat_named_init(knp, "unknowns", KSTAT_DATA_UINT32);
kstat_install(ksp);
*kspp = ksp;
return (0);
}
+
+void
+dls_stat_delete(kstat_t *ksp)
+{
+ void *data;
+ if (ksp != NULL) {
+ data = ksp->ks_data;
+ kstat_delete(ksp);
+ kmem_free(data, sizeof (dls_kstat_t));
+ }
+}
diff --git a/usr/src/uts/common/io/dr_sas/THIRDPARTYLICENSE b/usr/src/uts/common/io/dr_sas/THIRDPARTYLICENSE
new file mode 100644
index 0000000000..00aefb6f51
--- /dev/null
+++ b/usr/src/uts/common/io/dr_sas/THIRDPARTYLICENSE
@@ -0,0 +1,32 @@
+/*
+ * MegaRAID device driver for SAS2.0 controllers
+ * Copyright (c) 2009, LSI Logic Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the author nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
diff --git a/usr/src/uts/common/io/dr_sas/THIRDPARTYLICENSE.descrip b/usr/src/uts/common/io/dr_sas/THIRDPARTYLICENSE.descrip
new file mode 100644
index 0000000000..ac6d2d1b15
--- /dev/null
+++ b/usr/src/uts/common/io/dr_sas/THIRDPARTYLICENSE.descrip
@@ -0,0 +1 @@
+DR_SAS DRIVER
diff --git a/usr/src/uts/common/io/dr_sas/dr_sas.c b/usr/src/uts/common/io/dr_sas/dr_sas.c
new file mode 100644
index 0000000000..5b1dc82938
--- /dev/null
+++ b/usr/src/uts/common/io/dr_sas/dr_sas.c
@@ -0,0 +1,5506 @@
+/*
+ * dr_sas.c: source for dr_sas driver
+ *
+ * MegaRAID device driver for SAS2.0 controllers
+ * Copyright (c) 2008-2009, LSI Logic Corporation.
+ * All rights reserved.
+ *
+ * Version:
+ * Author:
+ * Arun Chandrashekhar
+ * Manju R
+ * Rajesh Prabhakaran
+ * Seokmann Ju
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the author nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/file.h>
+#include <sys/errno.h>
+#include <sys/open.h>
+#include <sys/cred.h>
+#include <sys/modctl.h>
+#include <sys/conf.h>
+#include <sys/devops.h>
+#include <sys/cmn_err.h>
+#include <sys/kmem.h>
+#include <sys/stat.h>
+#include <sys/mkdev.h>
+#include <sys/pci.h>
+#include <sys/scsi/scsi.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/atomic.h>
+#include <sys/signal.h>
+#include <sys/fs/dv_node.h> /* devfs_clean */
+
+#include "dr_sas.h"
+
+/*
+ * FMA header files
+ */
+#include <sys/ddifm.h>
+#include <sys/fm/protocol.h>
+#include <sys/fm/util.h>
+#include <sys/fm/io/ddi.h>
+
+/*
+ * Local static data
+ */
+static void *drsas_state = NULL;
+static int debug_level_g = CL_NONE;
+
+#pragma weak scsi_hba_open
+#pragma weak scsi_hba_close
+#pragma weak scsi_hba_ioctl
+
+static ddi_dma_attr_t drsas_generic_dma_attr = {
+ DMA_ATTR_V0, /* dma_attr_version */
+ 0, /* low DMA address range */
+ 0xFFFFFFFFU, /* high DMA address range */
+ 0xFFFFFFFFU, /* DMA counter register */
+ 8, /* DMA address alignment */
+ 0x07, /* DMA burstsizes */
+ 1, /* min DMA size */
+ 0xFFFFFFFFU, /* max DMA size */
+ 0xFFFFFFFFU, /* segment boundary */
+ DRSAS_MAX_SGE_CNT, /* dma_attr_sglen */
+ 512, /* granularity of device */
+ 0 /* bus specific DMA flags */
+};
+
+int32_t drsas_max_cap_maxxfer = 0x1000000;
+
+/*
+ * cb_ops contains base level routines
+ */
+static struct cb_ops drsas_cb_ops = {
+ drsas_open, /* open */
+ drsas_close, /* close */
+ nodev, /* strategy */
+ nodev, /* print */
+ nodev, /* dump */
+ nodev, /* read */
+ nodev, /* write */
+ drsas_ioctl, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ nochpoll, /* poll */
+ nodev, /* cb_prop_op */
+ 0, /* streamtab */
+ D_NEW | D_HOTPLUG, /* cb_flag */
+ CB_REV, /* cb_rev */
+ nodev, /* cb_aread */
+ nodev /* cb_awrite */
+};
+
+/*
+ * dev_ops contains configuration routines
+ */
+static struct dev_ops drsas_ops = {
+ DEVO_REV, /* rev, */
+ 0, /* refcnt */
+ drsas_getinfo, /* getinfo */
+ nulldev, /* identify */
+ nulldev, /* probe */
+ drsas_attach, /* attach */
+ drsas_detach, /* detach */
+ drsas_reset, /* reset */
+ &drsas_cb_ops, /* char/block ops */
+ NULL, /* bus ops */
+ NULL, /* power */
+ ddi_quiesce_not_supported, /* quiesce */
+};
+
+char _depends_on[] = "misc/scsi";
+
+static struct modldrv modldrv = {
+ &mod_driverops, /* module type - driver */
+ DRSAS_VERSION,
+ &drsas_ops, /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, /* ml_rev - must be MODREV_1 */
+ &modldrv, /* ml_linkage */
+ NULL /* end of driver linkage */
+};
+
+static struct ddi_device_acc_attr endian_attr = {
+ DDI_DEVICE_ATTR_V0,
+ DDI_STRUCTURE_LE_ACC,
+ DDI_STRICTORDER_ACC
+};
+
+
+/*
+ * ************************************************************************** *
+ * *
+ * common entry points - for loadable kernel modules *
+ * *
+ * ************************************************************************** *
+ */
+
+int
+_init(void)
+{
+ int ret;
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ ret = ddi_soft_state_init(&drsas_state,
+ sizeof (struct drsas_instance), 0);
+
+ if (ret != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN, "dr_sas: could not init state"));
+ return (ret);
+ }
+
+ if ((ret = scsi_hba_init(&modlinkage)) != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN, "dr_sas: could not init scsi hba"));
+ ddi_soft_state_fini(&drsas_state);
+ return (ret);
+ }
+
+ ret = mod_install(&modlinkage);
+
+ if (ret != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN, "dr_sas: mod_install failed"));
+ scsi_hba_fini(&modlinkage);
+ ddi_soft_state_fini(&drsas_state);
+ }
+
+ return (ret);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int ret;
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ if ((ret = mod_remove(&modlinkage)) != DDI_SUCCESS)
+ return (ret);
+
+ scsi_hba_fini(&modlinkage);
+
+ ddi_soft_state_fini(&drsas_state);
+
+ return (ret);
+}
+
+
+/*
+ * ************************************************************************** *
+ * *
+ * common entry points - for autoconfiguration *
+ * *
+ * ************************************************************************** *
+ */
+
+static int
+drsas_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int instance_no;
+ int nregs;
+ uint8_t added_isr_f = 0;
+ uint8_t added_soft_isr_f = 0;
+ uint8_t create_devctl_node_f = 0;
+ uint8_t create_scsi_node_f = 0;
+ uint8_t create_ioc_node_f = 0;
+ uint8_t tran_alloc_f = 0;
+ uint8_t irq;
+ uint16_t vendor_id;
+ uint16_t device_id;
+ uint16_t subsysvid;
+ uint16_t subsysid;
+ uint16_t command;
+ off_t reglength = 0;
+ int intr_types = 0;
+ char *data;
+ int msi_enable = 0;
+
+ scsi_hba_tran_t *tran;
+ ddi_dma_attr_t tran_dma_attr;
+ struct drsas_instance *instance;
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ /* CONSTCOND */
+ ASSERT(NO_COMPETING_THREADS);
+
+ instance_no = ddi_get_instance(dip);
+
+ /*
+ * check to see whether this device is in a DMA-capable slot.
+ */
+ if (ddi_slaveonly(dip) == DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas%d: Device in slave-only slot, unused",
+ instance_no));
+ return (DDI_FAILURE);
+ }
+
+ switch (cmd) {
+ case DDI_ATTACH:
+ con_log(CL_DLEVEL1, (CE_NOTE, "dr_sas: DDI_ATTACH"));
+ /* allocate the soft state for the instance */
+ if (ddi_soft_state_zalloc(drsas_state, instance_no)
+ != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas%d: Failed to allocate soft state",
+ instance_no));
+
+ return (DDI_FAILURE);
+ }
+
+ instance = (struct drsas_instance *)ddi_get_soft_state
+ (drsas_state, instance_no);
+
+ if (instance == NULL) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas%d: Bad soft state", instance_no));
+
+ ddi_soft_state_free(drsas_state, instance_no);
+
+ return (DDI_FAILURE);
+ }
+
+ bzero((caddr_t)instance,
+ sizeof (struct drsas_instance));
+
+ instance->func_ptr = kmem_zalloc(
+ sizeof (struct drsas_func_ptr), KM_SLEEP);
+ ASSERT(instance->func_ptr);
+
+ /* Setup the PCI configuration space handles */
+ if (pci_config_setup(dip, &instance->pci_handle) !=
+ DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas%d: pci config setup failed ",
+ instance_no));
+
+ kmem_free(instance->func_ptr,
+ sizeof (struct drsas_func_ptr));
+ ddi_soft_state_free(drsas_state, instance_no);
+
+ return (DDI_FAILURE);
+ }
+
+ if (ddi_dev_nregs(dip, &nregs) != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas: failed to get registers."));
+
+ pci_config_teardown(&instance->pci_handle);
+ kmem_free(instance->func_ptr,
+ sizeof (struct drsas_func_ptr));
+ ddi_soft_state_free(drsas_state, instance_no);
+
+ return (DDI_FAILURE);
+ }
+
+ vendor_id = pci_config_get16(instance->pci_handle,
+ PCI_CONF_VENID);
+ device_id = pci_config_get16(instance->pci_handle,
+ PCI_CONF_DEVID);
+
+ subsysvid = pci_config_get16(instance->pci_handle,
+ PCI_CONF_SUBVENID);
+ subsysid = pci_config_get16(instance->pci_handle,
+ PCI_CONF_SUBSYSID);
+
+ pci_config_put16(instance->pci_handle, PCI_CONF_COMM,
+ (pci_config_get16(instance->pci_handle,
+ PCI_CONF_COMM) | PCI_COMM_ME));
+ irq = pci_config_get8(instance->pci_handle,
+ PCI_CONF_ILINE);
+
+ con_log(CL_DLEVEL1, (CE_CONT, "dr_sas%d: "
+ "0x%x:0x%x 0x%x:0x%x, irq:%d drv-ver:%s",
+ instance_no, vendor_id, device_id, subsysvid,
+ subsysid, irq, DRSAS_VERSION));
+
+ /* enable bus-mastering */
+ command = pci_config_get16(instance->pci_handle,
+ PCI_CONF_COMM);
+
+ if (!(command & PCI_COMM_ME)) {
+ command |= PCI_COMM_ME;
+
+ pci_config_put16(instance->pci_handle,
+ PCI_CONF_COMM, command);
+
+ con_log(CL_ANN, (CE_CONT, "dr_sas%d: "
+ "enable bus-mastering", instance_no));
+ } else {
+ con_log(CL_DLEVEL1, (CE_CONT, "dr_sas%d: "
+ "bus-mastering already set", instance_no));
+ }
+
+ /* initialize function pointers */
+ if ((device_id == PCI_DEVICE_ID_LSI_2108VDE) ||
+ (device_id == PCI_DEVICE_ID_LSI_2108V)) {
+ con_log(CL_DLEVEL1, (CE_CONT, "dr_sas%d: "
+ "2108V/DE detected", instance_no));
+ instance->func_ptr->read_fw_status_reg =
+ read_fw_status_reg_ppc;
+ instance->func_ptr->issue_cmd = issue_cmd_ppc;
+ instance->func_ptr->issue_cmd_in_sync_mode =
+ issue_cmd_in_sync_mode_ppc;
+ instance->func_ptr->issue_cmd_in_poll_mode =
+ issue_cmd_in_poll_mode_ppc;
+ instance->func_ptr->enable_intr =
+ enable_intr_ppc;
+ instance->func_ptr->disable_intr =
+ disable_intr_ppc;
+ instance->func_ptr->intr_ack = intr_ack_ppc;
+ } else {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas: Invalid device detected"));
+
+ pci_config_teardown(&instance->pci_handle);
+ kmem_free(instance->func_ptr,
+ sizeof (struct drsas_func_ptr));
+ ddi_soft_state_free(drsas_state, instance_no);
+
+ return (DDI_FAILURE);
+ }
+
+ instance->baseaddress = pci_config_get32(
+ instance->pci_handle, PCI_CONF_BASE0);
+ instance->baseaddress &= 0x0fffc;
+
+ instance->dip = dip;
+ instance->vendor_id = vendor_id;
+ instance->device_id = device_id;
+ instance->subsysvid = subsysvid;
+ instance->subsysid = subsysid;
+ instance->instance = instance_no;
+
+ /* Initialize FMA */
+ instance->fm_capabilities = ddi_prop_get_int(
+ DDI_DEV_T_ANY, instance->dip, DDI_PROP_DONTPASS,
+ "fm-capable", DDI_FM_EREPORT_CAPABLE |
+ DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE
+ | DDI_FM_ERRCB_CAPABLE);
+
+ drsas_fm_init(instance);
+
+ /* Initialize Interrupts */
+ if ((ddi_dev_regsize(instance->dip,
+ REGISTER_SET_IO_2108, &reglength) != DDI_SUCCESS) ||
+ reglength < MINIMUM_MFI_MEM_SZ) {
+ return (DDI_FAILURE);
+ }
+ if (reglength > DEFAULT_MFI_MEM_SZ) {
+ reglength = DEFAULT_MFI_MEM_SZ;
+ con_log(CL_DLEVEL1, (CE_NOTE,
+ "dr_sas: register length to map is "
+ "0x%lx bytes", reglength));
+ }
+ if (ddi_regs_map_setup(instance->dip,
+ REGISTER_SET_IO_2108, &instance->regmap, 0,
+ reglength, &endian_attr, &instance->regmap_handle)
+ != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_NOTE,
+ "dr_sas: couldn't map control registers"));
+ goto fail_attach;
+ }
+
+ /*
+ * Disable Interrupt Now.
+ * Setup Software interrupt
+ */
+ instance->func_ptr->disable_intr(instance);
+
+ msi_enable = 0;
+ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip, 0,
+ "drsas-enable-msi", &data) == DDI_SUCCESS) {
+ if (strncmp(data, "yes", 3) == 0) {
+ msi_enable = 1;
+ con_log(CL_ANN, (CE_WARN,
+ "msi_enable = %d ENABLED",
+ msi_enable));
+ }
+ ddi_prop_free(data);
+ }
+
+ con_log(CL_DLEVEL1, (CE_WARN, "msi_enable = %d",
+ msi_enable));
+
+ /* Check for all supported interrupt types */
+ if (ddi_intr_get_supported_types(
+ dip, &intr_types) != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN,
+ "ddi_intr_get_supported_types() failed"));
+ goto fail_attach;
+ }
+
+ con_log(CL_DLEVEL1, (CE_NOTE,
+ "ddi_intr_get_supported_types() ret: 0x%x",
+ intr_types));
+
+ /* Initialize and Setup Interrupt handler */
+ if (msi_enable && (intr_types & DDI_INTR_TYPE_MSIX)) {
+ if (drsas_add_intrs(instance,
+ DDI_INTR_TYPE_MSIX) != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN,
+ "MSIX interrupt query failed"));
+ goto fail_attach;
+ }
+ instance->intr_type = DDI_INTR_TYPE_MSIX;
+ } else if (msi_enable && (intr_types &
+ DDI_INTR_TYPE_MSI)) {
+ if (drsas_add_intrs(instance,
+ DDI_INTR_TYPE_MSI) != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN,
+ "MSI interrupt query failed"));
+ goto fail_attach;
+ }
+ instance->intr_type = DDI_INTR_TYPE_MSI;
+ } else if (intr_types & DDI_INTR_TYPE_FIXED) {
+ msi_enable = 0;
+ if (drsas_add_intrs(instance,
+ DDI_INTR_TYPE_FIXED) != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN,
+ "FIXED interrupt query failed"));
+ goto fail_attach;
+ }
+ instance->intr_type = DDI_INTR_TYPE_FIXED;
+ } else {
+ con_log(CL_ANN, (CE_WARN, "Device cannot "
+ "suppport either FIXED or MSI/X "
+ "interrupts"));
+ goto fail_attach;
+ }
+
+ added_isr_f = 1;
+
+ /* setup the mfi based low level driver */
+ if (init_mfi(instance) != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN, "dr_sas: "
+ "could not initialize the low level driver"));
+
+ goto fail_attach;
+ }
+
+ /* Initialize all Mutex */
+ INIT_LIST_HEAD(&instance->completed_pool_list);
+ mutex_init(&instance->completed_pool_mtx,
+ "completed_pool_mtx", MUTEX_DRIVER,
+ DDI_INTR_PRI(instance->intr_pri));
+
+ mutex_init(&instance->int_cmd_mtx, "int_cmd_mtx",
+ MUTEX_DRIVER, DDI_INTR_PRI(instance->intr_pri));
+ cv_init(&instance->int_cmd_cv, NULL, CV_DRIVER, NULL);
+
+ mutex_init(&instance->cmd_pool_mtx, "cmd_pool_mtx",
+ MUTEX_DRIVER, DDI_INTR_PRI(instance->intr_pri));
+
+ /* Register our soft-isr for highlevel interrupts. */
+ instance->isr_level = instance->intr_pri;
+ if (instance->isr_level == HIGH_LEVEL_INTR) {
+ if (ddi_add_softintr(dip, DDI_SOFTINT_HIGH,
+ &instance->soft_intr_id, NULL, NULL,
+ drsas_softintr, (caddr_t)instance) !=
+ DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN,
+ " Software ISR did not register"));
+
+ goto fail_attach;
+ }
+
+ added_soft_isr_f = 1;
+ }
+
+ /* Allocate a transport structure */
+ tran = scsi_hba_tran_alloc(dip, SCSI_HBA_CANSLEEP);
+
+ if (tran == NULL) {
+ con_log(CL_ANN, (CE_WARN,
+ "scsi_hba_tran_alloc failed"));
+ goto fail_attach;
+ }
+
+ tran_alloc_f = 1;
+
+ instance->tran = tran;
+
+ tran->tran_hba_private = instance;
+ tran->tran_tgt_init = drsas_tran_tgt_init;
+ tran->tran_tgt_probe = scsi_hba_probe;
+ tran->tran_tgt_free = drsas_tran_tgt_free;
+ tran->tran_init_pkt = drsas_tran_init_pkt;
+ tran->tran_start = drsas_tran_start;
+ tran->tran_abort = drsas_tran_abort;
+ tran->tran_reset = drsas_tran_reset;
+ tran->tran_getcap = drsas_tran_getcap;
+ tran->tran_setcap = drsas_tran_setcap;
+ tran->tran_destroy_pkt = drsas_tran_destroy_pkt;
+ tran->tran_dmafree = drsas_tran_dmafree;
+ tran->tran_sync_pkt = drsas_tran_sync_pkt;
+ tran->tran_bus_config = drsas_tran_bus_config;
+
+ tran_dma_attr = drsas_generic_dma_attr;
+ tran_dma_attr.dma_attr_sgllen = instance->max_num_sge;
+
+ /* Attach this instance of the hba */
+ if (scsi_hba_attach_setup(dip, &tran_dma_attr, tran, 0)
+ != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN,
+ "scsi_hba_attach failed"));
+
+ goto fail_attach;
+ }
+
+ /* create devctl node for cfgadm command */
+ if (ddi_create_minor_node(dip, "devctl",
+ S_IFCHR, INST2DEVCTL(instance_no),
+ DDI_NT_SCSI_NEXUS, 0) == DDI_FAILURE) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas: failed to create devctl node."));
+
+ goto fail_attach;
+ }
+
+ create_devctl_node_f = 1;
+
+ /* create scsi node for cfgadm command */
+ if (ddi_create_minor_node(dip, "scsi", S_IFCHR,
+ INST2SCSI(instance_no),
+ DDI_NT_SCSI_ATTACHMENT_POINT, 0) ==
+ DDI_FAILURE) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas: failed to create scsi node."));
+
+ goto fail_attach;
+ }
+
+ create_scsi_node_f = 1;
+
+ (void) sprintf(instance->iocnode, "%d:lsirdctl",
+ instance_no);
+
+ /*
+ * Create a node for applications
+ * for issuing ioctl to the driver.
+ */
+ if (ddi_create_minor_node(dip, instance->iocnode,
+ S_IFCHR, INST2LSIRDCTL(instance_no),
+ DDI_PSEUDO, 0) == DDI_FAILURE) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas: failed to create ioctl node."));
+
+ goto fail_attach;
+ }
+
+ create_ioc_node_f = 1;
+
+ /* Create a taskq to handle dr events */
+ if ((instance->taskq = ddi_taskq_create(dip,
+ "drsas_dr_taskq", 1,
+ TASKQ_DEFAULTPRI, 0)) == NULL) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas: failed to create taskq "));
+ instance->taskq = NULL;
+ goto fail_attach;
+ }
+
+ /* enable interrupt */
+ instance->func_ptr->enable_intr(instance);
+
+ /* initiate AEN */
+ if (start_mfi_aen(instance)) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas: failed to initiate AEN."));
+ goto fail_initiate_aen;
+ }
+
+ con_log(CL_DLEVEL1, (CE_NOTE,
+ "AEN started for instance %d.", instance_no));
+
+ /* Finally! We are on the air. */
+ ddi_report_dev(dip);
+
+ if (drsas_check_acc_handle(instance->regmap_handle) !=
+ DDI_SUCCESS) {
+ goto fail_attach;
+ }
+ if (drsas_check_acc_handle(instance->pci_handle) !=
+ DDI_SUCCESS) {
+ goto fail_attach;
+ }
+ instance->dr_ld_list =
+ kmem_zalloc(MRDRV_MAX_LD * sizeof (struct drsas_ld),
+ KM_SLEEP);
+ break;
+ case DDI_PM_RESUME:
+ con_log(CL_ANN, (CE_NOTE,
+ "dr_sas: DDI_PM_RESUME"));
+ break;
+ case DDI_RESUME:
+ con_log(CL_ANN, (CE_NOTE,
+ "dr_sas: DDI_RESUME"));
+ break;
+ default:
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas: invalid attach cmd=%x", cmd));
+ return (DDI_FAILURE);
+ }
+
+ return (DDI_SUCCESS);
+
+fail_initiate_aen:
+fail_attach:
+ if (create_devctl_node_f) {
+ ddi_remove_minor_node(dip, "devctl");
+ }
+
+ if (create_scsi_node_f) {
+ ddi_remove_minor_node(dip, "scsi");
+ }
+
+ if (create_ioc_node_f) {
+ ddi_remove_minor_node(dip, instance->iocnode);
+ }
+
+ if (tran_alloc_f) {
+ scsi_hba_tran_free(tran);
+ }
+
+
+ if (added_soft_isr_f) {
+ ddi_remove_softintr(instance->soft_intr_id);
+ }
+
+ if (added_isr_f) {
+ drsas_rem_intrs(instance);
+ }
+
+ if (instance && instance->taskq) {
+ ddi_taskq_destroy(instance->taskq);
+ }
+
+ drsas_fm_ereport(instance, DDI_FM_DEVICE_NO_RESPONSE);
+ ddi_fm_service_impact(instance->dip, DDI_SERVICE_LOST);
+
+ drsas_fm_fini(instance);
+
+ pci_config_teardown(&instance->pci_handle);
+
+ ddi_soft_state_free(drsas_state, instance_no);
+
+ con_log(CL_ANN, (CE_NOTE,
+ "dr_sas: return failure from drsas_attach"));
+
+ return (DDI_FAILURE);
+}
+
+/*ARGSUSED*/
+static int
+drsas_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp)
+{
+ int rval;
+ int drsas_minor = getminor((dev_t)arg);
+
+ struct drsas_instance *instance;
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ switch (cmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ instance = (struct drsas_instance *)
+ ddi_get_soft_state(drsas_state,
+ MINOR2INST(drsas_minor));
+
+ if (instance == NULL) {
+ *resultp = NULL;
+ rval = DDI_FAILURE;
+ } else {
+ *resultp = instance->dip;
+ rval = DDI_SUCCESS;
+ }
+ break;
+ case DDI_INFO_DEVT2INSTANCE:
+ *resultp = (void *)instance;
+ rval = DDI_SUCCESS;
+ break;
+ default:
+ *resultp = NULL;
+ rval = DDI_FAILURE;
+ }
+
+ return (rval);
+}
+
+static int
+drsas_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ int instance_no;
+
+ struct drsas_instance *instance;
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ /* CONSTCOND */
+ ASSERT(NO_COMPETING_THREADS);
+
+ instance_no = ddi_get_instance(dip);
+
+ instance = (struct drsas_instance *)ddi_get_soft_state(drsas_state,
+ instance_no);
+
+ if (!instance) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas:%d could not get instance in detach",
+ instance_no));
+
+ return (DDI_FAILURE);
+ }
+
+ con_log(CL_ANN, (CE_NOTE,
+ "dr_sas%d: detaching device 0x%4x:0x%4x:0x%4x:0x%4x",
+ instance_no, instance->vendor_id, instance->device_id,
+ instance->subsysvid, instance->subsysid));
+
+ switch (cmd) {
+ case DDI_DETACH:
+ con_log(CL_ANN, (CE_NOTE,
+ "drsas_detach: DDI_DETACH"));
+
+ if (scsi_hba_detach(dip) != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas:%d failed to detach",
+ instance_no));
+
+ return (DDI_FAILURE);
+ }
+
+ scsi_hba_tran_free(instance->tran);
+
+ flush_cache(instance);
+
+ if (abort_aen_cmd(instance, instance->aen_cmd)) {
+ con_log(CL_ANN, (CE_WARN, "drsas_detach: "
+ "failed to abort prevous AEN command"));
+
+ return (DDI_FAILURE);
+ }
+
+ instance->func_ptr->disable_intr(instance);
+
+ if (instance->isr_level == HIGH_LEVEL_INTR) {
+ ddi_remove_softintr(instance->soft_intr_id);
+ }
+
+ drsas_rem_intrs(instance);
+
+ if (instance->taskq) {
+ ddi_taskq_destroy(instance->taskq);
+ }
+ kmem_free(instance->dr_ld_list, MRDRV_MAX_LD
+ * sizeof (struct drsas_ld));
+ free_space_for_mfi(instance);
+
+ drsas_fm_fini(instance);
+
+ pci_config_teardown(&instance->pci_handle);
+
+ kmem_free(instance->func_ptr,
+ sizeof (struct drsas_func_ptr));
+
+ ddi_soft_state_free(drsas_state, instance_no);
+ break;
+ case DDI_PM_SUSPEND:
+ con_log(CL_ANN, (CE_NOTE,
+ "drsas_detach: DDI_PM_SUSPEND"));
+
+ break;
+ case DDI_SUSPEND:
+ con_log(CL_ANN, (CE_NOTE,
+ "drsas_detach: DDI_SUSPEND"));
+
+ break;
+ default:
+ con_log(CL_ANN, (CE_WARN,
+ "invalid detach command:0x%x", cmd));
+ return (DDI_FAILURE);
+ }
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * ************************************************************************** *
+ * *
+ * common entry points - for character driver types *
+ * *
+ * ************************************************************************** *
+ */
+static int
+drsas_open(dev_t *dev, int openflags, int otyp, cred_t *credp)
+{
+ int rval = 0;
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ /* Check root permissions */
+ if (drv_priv(credp) != 0) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas: Non-root ioctl access denied!"));
+ return (EPERM);
+ }
+
+ /* Verify we are being opened as a character device */
+ if (otyp != OTYP_CHR) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas: ioctl node must be a char node"));
+ return (EINVAL);
+ }
+
+ if (ddi_get_soft_state(drsas_state, MINOR2INST(getminor(*dev)))
+ == NULL) {
+ return (ENXIO);
+ }
+
+ if (scsi_hba_open) {
+ rval = scsi_hba_open(dev, openflags, otyp, credp);
+ }
+
+ return (rval);
+}
+
+static int
+drsas_close(dev_t dev, int openflags, int otyp, cred_t *credp)
+{
+ int rval = 0;
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ /* no need for locks! */
+
+ if (scsi_hba_close) {
+ rval = scsi_hba_close(dev, openflags, otyp, credp);
+ }
+
+ return (rval);
+}
+
+static int
+drsas_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
+ int *rvalp)
+{
+ int rval = 0;
+
+ struct drsas_instance *instance;
+ struct drsas_ioctl *ioctl;
+ struct drsas_aen aen;
+ int i;
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ instance = ddi_get_soft_state(drsas_state, MINOR2INST(getminor(dev)));
+
+ if (instance == NULL) {
+ /* invalid minor number */
+ con_log(CL_ANN, (CE_WARN, "dr_sas: adapter not found."));
+ return (ENXIO);
+ }
+
+ ioctl = (struct drsas_ioctl *)kmem_zalloc(sizeof (struct drsas_ioctl),
+ KM_SLEEP);
+ ASSERT(ioctl);
+
+ switch ((uint_t)cmd) {
+ case DRSAS_IOCTL_FIRMWARE:
+ for (i = 0; i < sizeof (struct drsas_ioctl); i++) {
+ if (ddi_copyin((uint8_t *)arg+i,
+ (uint8_t *)ioctl+i, 1, mode)) {
+ con_log(CL_ANN, (CE_WARN, "drsas_ioctl "
+ "ERROR IOCTL copyin"));
+ kmem_free(ioctl,
+ sizeof (struct drsas_ioctl));
+ return (EFAULT);
+ }
+ }
+ if (ioctl->control_code == DRSAS_DRIVER_IOCTL_COMMON) {
+ rval = handle_drv_ioctl(instance, ioctl, mode);
+ } else {
+ rval = handle_mfi_ioctl(instance, ioctl, mode);
+ }
+ for (i = 0; i < sizeof (struct drsas_ioctl) - 1; i++) {
+ if (ddi_copyout((uint8_t *)ioctl+i,
+ (uint8_t *)arg+i, 1, mode)) {
+ con_log(CL_ANN, (CE_WARN,
+ "drsas_ioctl: ddi_copyout "
+ "failed"));
+ rval = 1;
+ break;
+ }
+ }
+
+ break;
+ case DRSAS_IOCTL_AEN:
+ for (i = 0; i < sizeof (struct drsas_aen); i++) {
+ if (ddi_copyin((uint8_t *)arg+i,
+ (uint8_t *)&aen+i, 1, mode)) {
+ con_log(CL_ANN, (CE_WARN,
+ "drsas_ioctl: "
+ "ERROR AEN copyin"));
+ kmem_free(ioctl,
+ sizeof (struct drsas_ioctl));
+ return (EFAULT);
+ }
+ }
+
+ rval = handle_mfi_aen(instance, &aen);
+ for (i = 0; i < sizeof (struct drsas_aen); i++) {
+ if (ddi_copyout((uint8_t *)&aen + i,
+ (uint8_t *)arg + i, 1, mode)) {
+ con_log(CL_ANN, (CE_WARN,
+ "drsas_ioctl: "
+ "ddi_copyout failed"));
+ rval = 1;
+ break;
+ }
+ }
+
+ break;
+ default:
+ rval = scsi_hba_ioctl(dev, cmd, arg,
+ mode, credp, rvalp);
+
+ con_log(CL_DLEVEL1, (CE_NOTE, "drsas_ioctl: "
+ "scsi_hba_ioctl called, ret = %x.", rval));
+ }
+
+ kmem_free(ioctl, sizeof (struct drsas_ioctl));
+ return (rval);
+}
+
+/*
+ * ************************************************************************** *
+ * *
+ * common entry points - for block driver types *
+ * *
+ * ************************************************************************** *
+ */
+/*ARGSUSED*/
+static int
+drsas_reset(dev_info_t *dip, ddi_reset_cmd_t cmd)
+{
+ int instance_no;
+
+ struct drsas_instance *instance;
+
+ instance_no = ddi_get_instance(dip);
+ instance = (struct drsas_instance *)ddi_get_soft_state
+ (drsas_state, instance_no);
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ if (!instance) {
+ con_log(CL_ANN, (CE_WARN, "dr_sas:%d could not get adapter "
+ "in reset", instance_no));
+ return (DDI_FAILURE);
+ }
+
+ instance->func_ptr->disable_intr(instance);
+
+ con_log(CL_ANN1, (CE_NOTE, "flushing cache for instance %d",
+ instance_no));
+
+ flush_cache(instance);
+
+ return (DDI_SUCCESS);
+}
+
+
+/*
+ * ************************************************************************** *
+ * *
+ * entry points (SCSI HBA) *
+ * *
+ * ************************************************************************** *
+ */
+/*ARGSUSED*/
+static int
+drsas_tran_tgt_init(dev_info_t *hba_dip, dev_info_t *tgt_dip,
+ scsi_hba_tran_t *tran, struct scsi_device *sd)
+{
+ struct drsas_instance *instance;
+ uint16_t tgt = sd->sd_address.a_target;
+ uint8_t lun = sd->sd_address.a_lun;
+
+ con_log(CL_ANN1, (CE_NOTE, "drsas_tgt_init target %d lun %d",
+ tgt, lun));
+
+ instance = ADDR2MR(&sd->sd_address);
+
+ if (ndi_dev_is_persistent_node(tgt_dip) == 0) {
+ (void) ndi_merge_node(tgt_dip, drsas_name_node);
+ ddi_set_name_addr(tgt_dip, NULL);
+
+ con_log(CL_ANN1, (CE_NOTE, "drsas_tgt_init in "
+ "ndi_dev_is_persistent_node DDI_FAILURE t = %d l = %d",
+ tgt, lun));
+ return (DDI_FAILURE);
+ }
+
+ con_log(CL_ANN1, (CE_NOTE, "drsas_tgt_init dev_dip %p tgt_dip %p",
+ (void *)instance->dr_ld_list[tgt].dip, (void *)tgt_dip));
+
+ if (tgt < MRDRV_MAX_LD && lun == 0) {
+ if (instance->dr_ld_list[tgt].dip == NULL &&
+ strcmp(ddi_driver_name(sd->sd_dev), "sd") == 0) {
+ instance->dr_ld_list[tgt].dip = tgt_dip;
+ instance->dr_ld_list[tgt].lun_type = DRSAS_LD_LUN;
+ }
+ }
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static void
+drsas_tran_tgt_free(dev_info_t *hba_dip, dev_info_t *tgt_dip,
+ scsi_hba_tran_t *hba_tran, struct scsi_device *sd)
+{
+ struct drsas_instance *instance;
+ int tgt = sd->sd_address.a_target;
+ int lun = sd->sd_address.a_lun;
+
+ instance = ADDR2MR(&sd->sd_address);
+
+ con_log(CL_ANN1, (CE_NOTE, "tgt_free t = %d l = %d", tgt, lun));
+
+ if (tgt < MRDRV_MAX_LD && lun == 0) {
+ if (instance->dr_ld_list[tgt].dip == tgt_dip) {
+ instance->dr_ld_list[tgt].dip = NULL;
+ }
+ }
+}
+
+static dev_info_t *
+drsas_find_child(struct drsas_instance *instance, uint16_t tgt, uint8_t lun)
+{
+ dev_info_t *child = NULL;
+ char addr[SCSI_MAXNAMELEN];
+ char tmp[MAXNAMELEN];
+
+ (void) sprintf(addr, "%x,%x", tgt, lun);
+ for (child = ddi_get_child(instance->dip); child;
+ child = ddi_get_next_sibling(child)) {
+
+ if (drsas_name_node(child, tmp, MAXNAMELEN) !=
+ DDI_SUCCESS) {
+ continue;
+ }
+
+ if (strcmp(addr, tmp) == 0) {
+ break;
+ }
+ }
+ con_log(CL_ANN1, (CE_NOTE, "drsas_find_child: return child = %p",
+ (void *)child));
+ return (child);
+}
+
+static int
+drsas_name_node(dev_info_t *dip, char *name, int len)
+{
+ int tgt, lun;
+
+ tgt = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
+ DDI_PROP_DONTPASS, "target", -1);
+ con_log(CL_ANN1, (CE_NOTE,
+ "drsas_name_node: dip %p tgt %d", (void *)dip, tgt));
+ if (tgt == -1) {
+ return (DDI_FAILURE);
+ }
+ lun = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
+ "lun", -1);
+ con_log(CL_ANN1,
+ (CE_NOTE, "drsas_name_node: tgt %d lun %d", tgt, lun));
+ if (lun == -1) {
+ return (DDI_FAILURE);
+ }
+ (void) snprintf(name, len, "%x,%x", tgt, lun);
+ return (DDI_SUCCESS);
+}
+
+static struct scsi_pkt *
+drsas_tran_init_pkt(struct scsi_address *ap, register struct scsi_pkt *pkt,
+ struct buf *bp, int cmdlen, int statuslen, int tgtlen,
+ int flags, int (*callback)(), caddr_t arg)
+{
+ struct scsa_cmd *acmd;
+ struct drsas_instance *instance;
+ struct scsi_pkt *new_pkt;
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ instance = ADDR2MR(ap);
+
+ /* step #1 : pkt allocation */
+ if (pkt == NULL) {
+ pkt = scsi_hba_pkt_alloc(instance->dip, ap, cmdlen, statuslen,
+ tgtlen, sizeof (struct scsa_cmd), callback, arg);
+ if (pkt == NULL) {
+ return (NULL);
+ }
+
+ acmd = PKT2CMD(pkt);
+
+ /*
+ * Initialize the new pkt - we redundantly initialize
+ * all the fields for illustrative purposes.
+ */
+ acmd->cmd_pkt = pkt;
+ acmd->cmd_flags = 0;
+ acmd->cmd_scblen = statuslen;
+ acmd->cmd_cdblen = cmdlen;
+ acmd->cmd_dmahandle = NULL;
+ acmd->cmd_ncookies = 0;
+ acmd->cmd_cookie = 0;
+ acmd->cmd_cookiecnt = 0;
+ acmd->cmd_nwin = 0;
+
+ pkt->pkt_address = *ap;
+ pkt->pkt_comp = (void (*)())NULL;
+ pkt->pkt_flags = 0;
+ pkt->pkt_time = 0;
+ pkt->pkt_resid = 0;
+ pkt->pkt_state = 0;
+ pkt->pkt_statistics = 0;
+ pkt->pkt_reason = 0;
+ new_pkt = pkt;
+ } else {
+ acmd = PKT2CMD(pkt);
+ new_pkt = NULL;
+ }
+
+ /* step #2 : dma allocation/move */
+ if (bp && bp->b_bcount != 0) {
+ if (acmd->cmd_dmahandle == NULL) {
+ if (drsas_dma_alloc(instance, pkt, bp, flags,
+ callback) == DDI_FAILURE) {
+ if (new_pkt) {
+ scsi_hba_pkt_free(ap, new_pkt);
+ }
+ return ((struct scsi_pkt *)NULL);
+ }
+ } else {
+ if (drsas_dma_move(instance, pkt, bp) == DDI_FAILURE) {
+ return ((struct scsi_pkt *)NULL);
+ }
+ }
+ }
+
+ return (pkt);
+}
+
+static int
+drsas_tran_start(struct scsi_address *ap, register struct scsi_pkt *pkt)
+{
+ uchar_t cmd_done = 0;
+
+ struct drsas_instance *instance = ADDR2MR(ap);
+ struct drsas_cmd *cmd;
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d:SCSI CDB[0]=0x%x",
+ __func__, __LINE__, pkt->pkt_cdbp[0]));
+
+ pkt->pkt_reason = CMD_CMPLT;
+ *pkt->pkt_scbp = STATUS_GOOD; /* clear arq scsi_status */
+
+ cmd = build_cmd(instance, ap, pkt, &cmd_done);
+
+ /*
+ * Check if the command is already completed by the drsas_build_cmd()
+ * routine. In which case the busy_flag would be clear and scb will be
+ * NULL and appropriate reason provided in pkt_reason field
+ */
+ if (cmd_done) {
+ pkt->pkt_reason = CMD_CMPLT;
+ pkt->pkt_scbp[0] = STATUS_GOOD;
+ pkt->pkt_state |= STATE_GOT_BUS | STATE_GOT_TARGET
+ | STATE_SENT_CMD;
+ if (((pkt->pkt_flags & FLAG_NOINTR) == 0) && pkt->pkt_comp) {
+ (*pkt->pkt_comp)(pkt);
+ }
+
+ return (TRAN_ACCEPT);
+ }
+
+ if (cmd == NULL) {
+ return (TRAN_BUSY);
+ }
+
+ if ((pkt->pkt_flags & FLAG_NOINTR) == 0) {
+ if (instance->fw_outstanding > instance->max_fw_cmds) {
+ con_log(CL_ANN, (CE_CONT, "dr_sas:Firmware busy"));
+ return_mfi_pkt(instance, cmd);
+ return (TRAN_BUSY);
+ }
+
+ /* Synchronize the Cmd frame for the controller */
+ (void) ddi_dma_sync(cmd->frame_dma_obj.dma_handle, 0, 0,
+ DDI_DMA_SYNC_FORDEV);
+
+ instance->func_ptr->issue_cmd(cmd, instance);
+
+ } else {
+ struct drsas_header *hdr = &cmd->frame->hdr;
+
+ cmd->sync_cmd = DRSAS_TRUE;
+
+ instance->func_ptr-> issue_cmd_in_poll_mode(instance, cmd);
+
+ pkt->pkt_reason = CMD_CMPLT;
+ pkt->pkt_statistics = 0;
+ pkt->pkt_state |= STATE_XFERRED_DATA | STATE_GOT_STATUS;
+
+ switch (ddi_get8(cmd->frame_dma_obj.acc_handle,
+ &hdr->cmd_status)) {
+ case MFI_STAT_OK:
+ pkt->pkt_scbp[0] = STATUS_GOOD;
+ break;
+
+ case MFI_STAT_SCSI_DONE_WITH_ERROR:
+
+ pkt->pkt_reason = CMD_CMPLT;
+ pkt->pkt_statistics = 0;
+
+ ((struct scsi_status *)pkt->pkt_scbp)->sts_chk = 1;
+ break;
+
+ case MFI_STAT_DEVICE_NOT_FOUND:
+ pkt->pkt_reason = CMD_DEV_GONE;
+ pkt->pkt_statistics = STAT_DISCON;
+ break;
+
+ default:
+ ((struct scsi_status *)pkt->pkt_scbp)->sts_busy = 1;
+ }
+
+ return_mfi_pkt(instance, cmd);
+ (void) drsas_common_check(instance, cmd);
+
+ if (pkt->pkt_comp) {
+ (*pkt->pkt_comp)(pkt);
+ }
+
+ }
+
+ return (TRAN_ACCEPT);
+}
+
+/*ARGSUSED*/
+static int
+drsas_tran_abort(struct scsi_address *ap, struct scsi_pkt *pkt)
+{
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ /* abort command not supported by H/W */
+
+ return (DDI_FAILURE);
+}
+
+/*ARGSUSED*/
+static int
+drsas_tran_reset(struct scsi_address *ap, int level)
+{
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ /* reset command not supported by H/W */
+
+ return (DDI_FAILURE);
+
+}
+
+/*ARGSUSED*/
+static int
+drsas_tran_getcap(struct scsi_address *ap, char *cap, int whom)
+{
+ int rval = 0;
+
+ struct drsas_instance *instance = ADDR2MR(ap);
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ /* we do allow inquiring about capabilities for other targets */
+ if (cap == NULL) {
+ return (-1);
+ }
+
+ switch (scsi_hba_lookup_capstr(cap)) {
+ case SCSI_CAP_DMA_MAX:
+ /* Limit to 16MB max transfer */
+ rval = drsas_max_cap_maxxfer;
+ break;
+ case SCSI_CAP_MSG_OUT:
+ rval = 1;
+ break;
+ case SCSI_CAP_DISCONNECT:
+ rval = 0;
+ break;
+ case SCSI_CAP_SYNCHRONOUS:
+ rval = 0;
+ break;
+ case SCSI_CAP_WIDE_XFER:
+ rval = 1;
+ break;
+ case SCSI_CAP_TAGGED_QING:
+ rval = 1;
+ break;
+ case SCSI_CAP_UNTAGGED_QING:
+ rval = 1;
+ break;
+ case SCSI_CAP_PARITY:
+ rval = 1;
+ break;
+ case SCSI_CAP_INITIATOR_ID:
+ rval = instance->init_id;
+ break;
+ case SCSI_CAP_ARQ:
+ rval = 1;
+ break;
+ case SCSI_CAP_LINKED_CMDS:
+ rval = 0;
+ break;
+ case SCSI_CAP_RESET_NOTIFICATION:
+ rval = 1;
+ break;
+ case SCSI_CAP_GEOMETRY:
+ rval = -1;
+
+ break;
+ default:
+ con_log(CL_DLEVEL2, (CE_NOTE, "Default cap coming 0x%x",
+ scsi_hba_lookup_capstr(cap)));
+ rval = -1;
+ break;
+ }
+
+ return (rval);
+}
+
+/*ARGSUSED*/
+static int
+drsas_tran_setcap(struct scsi_address *ap, char *cap, int value, int whom)
+{
+ int rval = 1;
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ /* We don't allow setting capabilities for other targets */
+ if (cap == NULL || whom == 0) {
+ return (-1);
+ }
+
+ switch (scsi_hba_lookup_capstr(cap)) {
+ case SCSI_CAP_DMA_MAX:
+ case SCSI_CAP_MSG_OUT:
+ case SCSI_CAP_PARITY:
+ case SCSI_CAP_LINKED_CMDS:
+ case SCSI_CAP_RESET_NOTIFICATION:
+ case SCSI_CAP_DISCONNECT:
+ case SCSI_CAP_SYNCHRONOUS:
+ case SCSI_CAP_UNTAGGED_QING:
+ case SCSI_CAP_WIDE_XFER:
+ case SCSI_CAP_INITIATOR_ID:
+ case SCSI_CAP_ARQ:
+ /*
+ * None of these are settable via
+ * the capability interface.
+ */
+ break;
+ case SCSI_CAP_TAGGED_QING:
+ rval = 1;
+ break;
+ case SCSI_CAP_SECTOR_SIZE:
+ rval = 1;
+ break;
+
+ case SCSI_CAP_TOTAL_SECTORS:
+ rval = 1;
+ break;
+ default:
+ rval = -1;
+ break;
+ }
+
+ return (rval);
+}
+
+static void
+drsas_tran_destroy_pkt(struct scsi_address *ap, struct scsi_pkt *pkt)
+{
+ struct scsa_cmd *acmd = PKT2CMD(pkt);
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ if (acmd->cmd_flags & CFLAG_DMAVALID) {
+ acmd->cmd_flags &= ~CFLAG_DMAVALID;
+
+ (void) ddi_dma_unbind_handle(acmd->cmd_dmahandle);
+
+ ddi_dma_free_handle(&acmd->cmd_dmahandle);
+
+ acmd->cmd_dmahandle = NULL;
+ }
+
+ /* free the pkt */
+ scsi_hba_pkt_free(ap, pkt);
+}
+
+/*ARGSUSED*/
+static void
+drsas_tran_dmafree(struct scsi_address *ap, struct scsi_pkt *pkt)
+{
+ register struct scsa_cmd *acmd = PKT2CMD(pkt);
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ if (acmd->cmd_flags & CFLAG_DMAVALID) {
+ acmd->cmd_flags &= ~CFLAG_DMAVALID;
+
+ (void) ddi_dma_unbind_handle(acmd->cmd_dmahandle);
+
+ ddi_dma_free_handle(&acmd->cmd_dmahandle);
+
+ acmd->cmd_dmahandle = NULL;
+ }
+}
+
+/*ARGSUSED*/
+static void
+drsas_tran_sync_pkt(struct scsi_address *ap, struct scsi_pkt *pkt)
+{
+ register struct scsa_cmd *acmd = PKT2CMD(pkt);
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ if (acmd->cmd_flags & CFLAG_DMAVALID) {
+ (void) ddi_dma_sync(acmd->cmd_dmahandle, acmd->cmd_dma_offset,
+ acmd->cmd_dma_len, (acmd->cmd_flags & CFLAG_DMASEND) ?
+ DDI_DMA_SYNC_FORDEV : DDI_DMA_SYNC_FORCPU);
+ }
+}
+
+/*
+ * drsas_isr(caddr_t)
+ *
+ * The Interrupt Service Routine
+ *
+ * Collect status for all completed commands and do callback
+ *
+ */
+static uint_t
+drsas_isr(struct drsas_instance *instance)
+{
+ int need_softintr;
+ uint32_t producer;
+ uint32_t consumer;
+ uint32_t context;
+
+ struct drsas_cmd *cmd;
+
+ con_log(CL_ANN1, (CE_NOTE, "chkpnt:%s:%d", __func__, __LINE__));
+
+ ASSERT(instance);
+ if ((instance->intr_type == DDI_INTR_TYPE_FIXED) &&
+ !instance->func_ptr->intr_ack(instance)) {
+ return (DDI_INTR_UNCLAIMED);
+ }
+
+ (void) ddi_dma_sync(instance->mfi_internal_dma_obj.dma_handle,
+ 0, 0, DDI_DMA_SYNC_FORCPU);
+
+ if (drsas_check_dma_handle(instance->mfi_internal_dma_obj.dma_handle)
+ != DDI_SUCCESS) {
+ drsas_fm_ereport(instance, DDI_FM_DEVICE_NO_RESPONSE);
+ ddi_fm_service_impact(instance->dip, DDI_SERVICE_LOST);
+ return (DDI_INTR_UNCLAIMED);
+ }
+
+ producer = ddi_get32(instance->mfi_internal_dma_obj.acc_handle,
+ instance->producer);
+ consumer = ddi_get32(instance->mfi_internal_dma_obj.acc_handle,
+ instance->consumer);
+
+ con_log(CL_ANN1, (CE_CONT, " producer %x consumer %x ",
+ producer, consumer));
+ if (producer == consumer) {
+ con_log(CL_ANN1, (CE_WARN, "producer = consumer case"));
+ return (DDI_INTR_UNCLAIMED);
+ }
+ mutex_enter(&instance->completed_pool_mtx);
+
+ while (consumer != producer) {
+ context = ddi_get32(instance->mfi_internal_dma_obj.acc_handle,
+ &instance->reply_queue[consumer]);
+ cmd = instance->cmd_list[context];
+ mlist_add_tail(&cmd->list, &instance->completed_pool_list);
+
+ consumer++;
+ if (consumer == (instance->max_fw_cmds + 1)) {
+ consumer = 0;
+ }
+ }
+
+ mutex_exit(&instance->completed_pool_mtx);
+
+ ddi_put32(instance->mfi_internal_dma_obj.acc_handle,
+ instance->consumer, consumer);
+ (void) ddi_dma_sync(instance->mfi_internal_dma_obj.dma_handle,
+ 0, 0, DDI_DMA_SYNC_FORDEV);
+
+ if (instance->softint_running) {
+ need_softintr = 0;
+ } else {
+ need_softintr = 1;
+ }
+
+ if (instance->isr_level == HIGH_LEVEL_INTR) {
+ if (need_softintr) {
+ ddi_trigger_softintr(instance->soft_intr_id);
+ }
+ } else {
+ /*
+ * Not a high-level interrupt, therefore call the soft level
+ * interrupt explicitly
+ */
+ (void) drsas_softintr(instance);
+ }
+
+ return (DDI_INTR_CLAIMED);
+}
+
+
+/*
+ * ************************************************************************** *
+ * *
+ * libraries *
+ * *
+ * ************************************************************************** *
+ */
+/*
+ * get_mfi_pkt : Get a command from the free pool
+ * After successful allocation, the caller of this routine
+ * must clear the frame buffer (memset to zero) before
+ * using the packet further.
+ *
+ * ***** Note *****
+ * After clearing the frame buffer the context id of the
+ * frame buffer SHOULD be restored back.
+ */
+static struct drsas_cmd *
+get_mfi_pkt(struct drsas_instance *instance)
+{
+ mlist_t *head = &instance->cmd_pool_list;
+ struct drsas_cmd *cmd = NULL;
+
+ mutex_enter(&instance->cmd_pool_mtx);
+ ASSERT(mutex_owned(&instance->cmd_pool_mtx));
+
+ if (!mlist_empty(head)) {
+ cmd = mlist_entry(head->next, struct drsas_cmd, list);
+ mlist_del_init(head->next);
+ }
+ if (cmd != NULL)
+ cmd->pkt = NULL;
+ mutex_exit(&instance->cmd_pool_mtx);
+
+ return (cmd);
+}
+
+/*
+ * return_mfi_pkt : Return a cmd to free command pool
+ */
+static void
+return_mfi_pkt(struct drsas_instance *instance, struct drsas_cmd *cmd)
+{
+ mutex_enter(&instance->cmd_pool_mtx);
+ ASSERT(mutex_owned(&instance->cmd_pool_mtx));
+
+ mlist_add(&cmd->list, &instance->cmd_pool_list);
+
+ mutex_exit(&instance->cmd_pool_mtx);
+}
+
+/*
+ * destroy_mfi_frame_pool
+ */
+static void
+destroy_mfi_frame_pool(struct drsas_instance *instance)
+{
+ int i;
+ uint32_t max_cmd = instance->max_fw_cmds;
+
+ struct drsas_cmd *cmd;
+
+ /* return all frames to pool */
+ for (i = 0; i < max_cmd+1; i++) {
+
+ cmd = instance->cmd_list[i];
+
+ if (cmd->frame_dma_obj_status == DMA_OBJ_ALLOCATED)
+ (void) drsas_free_dma_obj(instance, cmd->frame_dma_obj);
+
+ cmd->frame_dma_obj_status = DMA_OBJ_FREED;
+ }
+
+}
+
+/*
+ * create_mfi_frame_pool
+ */
+static int
+create_mfi_frame_pool(struct drsas_instance *instance)
+{
+ int i = 0;
+ int cookie_cnt;
+ uint16_t max_cmd;
+ uint16_t sge_sz;
+ uint32_t sgl_sz;
+ uint32_t tot_frame_size;
+
+ struct drsas_cmd *cmd;
+
+ max_cmd = instance->max_fw_cmds;
+
+ sge_sz = sizeof (struct drsas_sge64);
+
+ /* calculated the number of 64byte frames required for SGL */
+ sgl_sz = sge_sz * instance->max_num_sge;
+ tot_frame_size = sgl_sz + MRMFI_FRAME_SIZE + SENSE_LENGTH;
+
+ con_log(CL_DLEVEL3, (CE_NOTE, "create_mfi_frame_pool: "
+ "sgl_sz %x tot_frame_size %x", sgl_sz, tot_frame_size));
+
+ while (i < max_cmd+1) {
+ cmd = instance->cmd_list[i];
+
+ cmd->frame_dma_obj.size = tot_frame_size;
+ cmd->frame_dma_obj.dma_attr = drsas_generic_dma_attr;
+ cmd->frame_dma_obj.dma_attr.dma_attr_addr_hi = 0xFFFFFFFFU;
+ cmd->frame_dma_obj.dma_attr.dma_attr_count_max = 0xFFFFFFFFU;
+ cmd->frame_dma_obj.dma_attr.dma_attr_sgllen = 1;
+ cmd->frame_dma_obj.dma_attr.dma_attr_align = 64;
+
+
+ cookie_cnt = drsas_alloc_dma_obj(instance, &cmd->frame_dma_obj,
+ (uchar_t)DDI_STRUCTURE_LE_ACC);
+
+ if (cookie_cnt == -1 || cookie_cnt > 1) {
+ con_log(CL_ANN, (CE_WARN,
+ "create_mfi_frame_pool: could not alloc."));
+ return (DDI_FAILURE);
+ }
+
+ bzero(cmd->frame_dma_obj.buffer, tot_frame_size);
+
+ cmd->frame_dma_obj_status = DMA_OBJ_ALLOCATED;
+ cmd->frame = (union drsas_frame *)cmd->frame_dma_obj.buffer;
+ cmd->frame_phys_addr =
+ cmd->frame_dma_obj.dma_cookie[0].dmac_address;
+
+ cmd->sense = (uint8_t *)(((unsigned long)
+ cmd->frame_dma_obj.buffer) +
+ tot_frame_size - SENSE_LENGTH);
+ cmd->sense_phys_addr =
+ cmd->frame_dma_obj.dma_cookie[0].dmac_address +
+ tot_frame_size - SENSE_LENGTH;
+
+ if (!cmd->frame || !cmd->sense) {
+ con_log(CL_ANN, (CE_NOTE,
+ "dr_sas: pci_pool_alloc failed"));
+
+ return (ENOMEM);
+ }
+
+ ddi_put32(cmd->frame_dma_obj.acc_handle,
+ &cmd->frame->io.context, cmd->index);
+ i++;
+
+ con_log(CL_DLEVEL3, (CE_NOTE, "[%x]-%x",
+ cmd->index, cmd->frame_phys_addr));
+ }
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * free_additional_dma_buffer
+ */
+static void
+free_additional_dma_buffer(struct drsas_instance *instance)
+{
+ if (instance->mfi_internal_dma_obj.status == DMA_OBJ_ALLOCATED) {
+ (void) drsas_free_dma_obj(instance,
+ instance->mfi_internal_dma_obj);
+ instance->mfi_internal_dma_obj.status = DMA_OBJ_FREED;
+ }
+
+ if (instance->mfi_evt_detail_obj.status == DMA_OBJ_ALLOCATED) {
+ (void) drsas_free_dma_obj(instance,
+ instance->mfi_evt_detail_obj);
+ instance->mfi_evt_detail_obj.status = DMA_OBJ_FREED;
+ }
+}
+
+/*
+ * alloc_additional_dma_buffer
+ */
+static int
+alloc_additional_dma_buffer(struct drsas_instance *instance)
+{
+ uint32_t reply_q_sz;
+ uint32_t internal_buf_size = PAGESIZE*2;
+
+ /* max cmds plus 1 + producer & consumer */
+ reply_q_sz = sizeof (uint32_t) * (instance->max_fw_cmds + 1 + 2);
+
+ instance->mfi_internal_dma_obj.size = internal_buf_size;
+ instance->mfi_internal_dma_obj.dma_attr = drsas_generic_dma_attr;
+ instance->mfi_internal_dma_obj.dma_attr.dma_attr_addr_hi = 0xFFFFFFFFU;
+ instance->mfi_internal_dma_obj.dma_attr.dma_attr_count_max =
+ 0xFFFFFFFFU;
+ instance->mfi_internal_dma_obj.dma_attr.dma_attr_sgllen = 1;
+
+ if (drsas_alloc_dma_obj(instance, &instance->mfi_internal_dma_obj,
+ (uchar_t)DDI_STRUCTURE_LE_ACC) != 1) {
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas: could not alloc reply queue"));
+ return (DDI_FAILURE);
+ }
+
+ bzero(instance->mfi_internal_dma_obj.buffer, internal_buf_size);
+
+ instance->mfi_internal_dma_obj.status |= DMA_OBJ_ALLOCATED;
+
+ instance->producer = (uint32_t *)((unsigned long)
+ instance->mfi_internal_dma_obj.buffer);
+ instance->consumer = (uint32_t *)((unsigned long)
+ instance->mfi_internal_dma_obj.buffer + 4);
+ instance->reply_queue = (uint32_t *)((unsigned long)
+ instance->mfi_internal_dma_obj.buffer + 8);
+ instance->internal_buf = (caddr_t)(((unsigned long)
+ instance->mfi_internal_dma_obj.buffer) + reply_q_sz + 8);
+ instance->internal_buf_dmac_add =
+ instance->mfi_internal_dma_obj.dma_cookie[0].dmac_address +
+ (reply_q_sz + 8);
+ instance->internal_buf_size = internal_buf_size -
+ (reply_q_sz + 8);
+
+ /* allocate evt_detail */
+ instance->mfi_evt_detail_obj.size = sizeof (struct drsas_evt_detail);
+ instance->mfi_evt_detail_obj.dma_attr = drsas_generic_dma_attr;
+ instance->mfi_evt_detail_obj.dma_attr.dma_attr_addr_hi = 0xFFFFFFFFU;
+ instance->mfi_evt_detail_obj.dma_attr.dma_attr_count_max = 0xFFFFFFFFU;
+ instance->mfi_evt_detail_obj.dma_attr.dma_attr_sgllen = 1;
+ instance->mfi_evt_detail_obj.dma_attr.dma_attr_align = 1;
+
+ if (drsas_alloc_dma_obj(instance, &instance->mfi_evt_detail_obj,
+ (uchar_t)DDI_STRUCTURE_LE_ACC) != 1) {
+ con_log(CL_ANN, (CE_WARN, "alloc_additional_dma_buffer: "
+ "could not allocate data transfer buffer."));
+ return (DDI_FAILURE);
+ }
+
+ bzero(instance->mfi_evt_detail_obj.buffer,
+ sizeof (struct drsas_evt_detail));
+
+ instance->mfi_evt_detail_obj.status |= DMA_OBJ_ALLOCATED;
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * free_space_for_mfi
+ */
+static void
+free_space_for_mfi(struct drsas_instance *instance)
+{
+ int i;
+ uint32_t max_cmd = instance->max_fw_cmds;
+
+ /* already freed */
+ if (instance->cmd_list == NULL) {
+ return;
+ }
+
+ free_additional_dma_buffer(instance);
+
+ /* first free the MFI frame pool */
+ destroy_mfi_frame_pool(instance);
+
+ /* free all the commands in the cmd_list */
+ for (i = 0; i < instance->max_fw_cmds+1; i++) {
+ kmem_free(instance->cmd_list[i],
+ sizeof (struct drsas_cmd));
+
+ instance->cmd_list[i] = NULL;
+ }
+
+ /* free the cmd_list buffer itself */
+ kmem_free(instance->cmd_list,
+ sizeof (struct drsas_cmd *) * (max_cmd+1));
+
+ instance->cmd_list = NULL;
+
+ INIT_LIST_HEAD(&instance->cmd_pool_list);
+}
+
+/*
+ * alloc_space_for_mfi
+ */
+static int
+alloc_space_for_mfi(struct drsas_instance *instance)
+{
+ int i;
+ uint32_t max_cmd;
+ size_t sz;
+
+ struct drsas_cmd *cmd;
+
+ max_cmd = instance->max_fw_cmds;
+
+ /* reserve 1 more slot for flush_cache */
+ sz = sizeof (struct drsas_cmd *) * (max_cmd+1);
+
+ /*
+ * instance->cmd_list is an array of struct drsas_cmd pointers.
+ * Allocate the dynamic array first and then allocate individual
+ * commands.
+ */
+ instance->cmd_list = kmem_zalloc(sz, KM_SLEEP);
+ ASSERT(instance->cmd_list);
+
+ for (i = 0; i < max_cmd+1; i++) {
+ instance->cmd_list[i] = kmem_zalloc(sizeof (struct drsas_cmd),
+ KM_SLEEP);
+ ASSERT(instance->cmd_list[i]);
+ }
+
+ INIT_LIST_HEAD(&instance->cmd_pool_list);
+
+ /* add all the commands to command pool (instance->cmd_pool) */
+ for (i = 0; i < max_cmd; i++) {
+ cmd = instance->cmd_list[i];
+ cmd->index = i;
+
+ mlist_add_tail(&cmd->list, &instance->cmd_pool_list);
+ }
+
+ /* single slot for flush_cache won't be added in command pool */
+ cmd = instance->cmd_list[max_cmd];
+ cmd->index = i;
+
+ /* create a frame pool and assign one frame to each cmd */
+ if (create_mfi_frame_pool(instance)) {
+ con_log(CL_ANN, (CE_NOTE, "error creating frame DMA pool"));
+ return (DDI_FAILURE);
+ }
+
+ /* create a frame pool and assign one frame to each cmd */
+ if (alloc_additional_dma_buffer(instance)) {
+ con_log(CL_ANN, (CE_NOTE, "error creating frame DMA pool"));
+ return (DDI_FAILURE);
+ }
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * get_ctrl_info
+ */
+static int
+get_ctrl_info(struct drsas_instance *instance,
+ struct drsas_ctrl_info *ctrl_info)
+{
+ int ret = 0;
+
+ struct drsas_cmd *cmd;
+ struct drsas_dcmd_frame *dcmd;
+ struct drsas_ctrl_info *ci;
+
+ cmd = get_mfi_pkt(instance);
+
+ if (!cmd) {
+ con_log(CL_ANN, (CE_WARN,
+ "Failed to get a cmd for ctrl info"));
+ return (DDI_FAILURE);
+ }
+ /* Clear the frame buffer and assign back the context id */
+ (void) memset((char *)&cmd->frame[0], 0, sizeof (union drsas_frame));
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &cmd->frame->hdr.context,
+ cmd->index);
+
+ dcmd = &cmd->frame->dcmd;
+
+ ci = (struct drsas_ctrl_info *)instance->internal_buf;
+
+ if (!ci) {
+ con_log(CL_ANN, (CE_WARN,
+ "Failed to alloc mem for ctrl info"));
+ return_mfi_pkt(instance, cmd);
+ return (DDI_FAILURE);
+ }
+
+ (void) memset(ci, 0, sizeof (struct drsas_ctrl_info));
+
+ /* for( i = 0; i < DCMD_MBOX_SZ; i++ ) dcmd->mbox.b[i] = 0; */
+ (void) memset(dcmd->mbox.b, 0, DCMD_MBOX_SZ);
+
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &dcmd->cmd, MFI_CMD_OP_DCMD);
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &dcmd->cmd_status,
+ MFI_CMD_STATUS_POLL_MODE);
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &dcmd->sge_count, 1);
+ ddi_put16(cmd->frame_dma_obj.acc_handle, &dcmd->flags,
+ MFI_FRAME_DIR_READ);
+ ddi_put16(cmd->frame_dma_obj.acc_handle, &dcmd->timeout, 0);
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->data_xfer_len,
+ sizeof (struct drsas_ctrl_info));
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->opcode,
+ DR_DCMD_CTRL_GET_INFO);
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->sgl.sge32[0].phys_addr,
+ instance->internal_buf_dmac_add);
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->sgl.sge32[0].length,
+ sizeof (struct drsas_ctrl_info));
+
+ cmd->frame_count = 1;
+
+ if (!instance->func_ptr->issue_cmd_in_poll_mode(instance, cmd)) {
+ ret = 0;
+ ddi_rep_get8(cmd->frame_dma_obj.acc_handle,
+ (uint8_t *)ctrl_info, (uint8_t *)ci,
+ sizeof (struct drsas_ctrl_info), DDI_DEV_AUTOINCR);
+ } else {
+ con_log(CL_ANN, (CE_WARN, "get_ctrl_info: Ctrl info failed"));
+ ret = -1;
+ }
+
+ return_mfi_pkt(instance, cmd);
+ if (drsas_common_check(instance, cmd) != DDI_SUCCESS) {
+ ret = -1;
+ }
+
+ return (ret);
+}
+
+/*
+ * abort_aen_cmd
+ */
+static int
+abort_aen_cmd(struct drsas_instance *instance,
+ struct drsas_cmd *cmd_to_abort)
+{
+ int ret = 0;
+
+ struct drsas_cmd *cmd;
+ struct drsas_abort_frame *abort_fr;
+
+ cmd = get_mfi_pkt(instance);
+
+ if (!cmd) {
+ con_log(CL_ANN, (CE_WARN,
+ "Failed to get a cmd for ctrl info"));
+ return (DDI_FAILURE);
+ }
+ /* Clear the frame buffer and assign back the context id */
+ (void) memset((char *)&cmd->frame[0], 0, sizeof (union drsas_frame));
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &cmd->frame->hdr.context,
+ cmd->index);
+
+ abort_fr = &cmd->frame->abort;
+
+ /* prepare and issue the abort frame */
+ ddi_put8(cmd->frame_dma_obj.acc_handle,
+ &abort_fr->cmd, MFI_CMD_OP_ABORT);
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &abort_fr->cmd_status,
+ MFI_CMD_STATUS_SYNC_MODE);
+ ddi_put16(cmd->frame_dma_obj.acc_handle, &abort_fr->flags, 0);
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &abort_fr->abort_context,
+ cmd_to_abort->index);
+ ddi_put32(cmd->frame_dma_obj.acc_handle,
+ &abort_fr->abort_mfi_phys_addr_lo, cmd_to_abort->frame_phys_addr);
+ ddi_put32(cmd->frame_dma_obj.acc_handle,
+ &abort_fr->abort_mfi_phys_addr_hi, 0);
+
+ instance->aen_cmd->abort_aen = 1;
+
+ cmd->sync_cmd = DRSAS_TRUE;
+ cmd->frame_count = 1;
+
+ if (instance->func_ptr->issue_cmd_in_sync_mode(instance, cmd)) {
+ con_log(CL_ANN, (CE_WARN,
+ "abort_aen_cmd: issue_cmd_in_sync_mode failed"));
+ ret = -1;
+ } else {
+ ret = 0;
+ }
+
+ instance->aen_cmd->abort_aen = 1;
+ instance->aen_cmd = 0;
+
+ return_mfi_pkt(instance, cmd);
+ (void) drsas_common_check(instance, cmd);
+
+ return (ret);
+}
+
+/*
+ * init_mfi
+ */
+static int
+init_mfi(struct drsas_instance *instance)
+{
+ struct drsas_cmd *cmd;
+ struct drsas_ctrl_info ctrl_info;
+ struct drsas_init_frame *init_frame;
+ struct drsas_init_queue_info *initq_info;
+
+ /* we expect the FW state to be READY */
+ if (mfi_state_transition_to_ready(instance)) {
+ con_log(CL_ANN, (CE_WARN, "dr_sas: F/W is not ready"));
+ goto fail_ready_state;
+ }
+
+ /* get various operational parameters from status register */
+ instance->max_num_sge =
+ (instance->func_ptr->read_fw_status_reg(instance) &
+ 0xFF0000) >> 0x10;
+ /*
+ * Reduce the max supported cmds by 1. This is to ensure that the
+ * reply_q_sz (1 more than the max cmd that driver may send)
+ * does not exceed max cmds that the FW can support
+ */
+ instance->max_fw_cmds =
+ instance->func_ptr->read_fw_status_reg(instance) & 0xFFFF;
+ instance->max_fw_cmds = instance->max_fw_cmds - 1;
+
+ instance->max_num_sge =
+ (instance->max_num_sge > DRSAS_MAX_SGE_CNT) ?
+ DRSAS_MAX_SGE_CNT : instance->max_num_sge;
+
+ /* create a pool of commands */
+ if (alloc_space_for_mfi(instance) != DDI_SUCCESS)
+ goto fail_alloc_fw_space;
+
+ /*
+ * Prepare a init frame. Note the init frame points to queue info
+ * structure. Each frame has SGL allocated after first 64 bytes. For
+ * this frame - since we don't need any SGL - we use SGL's space as
+ * queue info structure
+ */
+ cmd = get_mfi_pkt(instance);
+ /* Clear the frame buffer and assign back the context id */
+ (void) memset((char *)&cmd->frame[0], 0, sizeof (union drsas_frame));
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &cmd->frame->hdr.context,
+ cmd->index);
+
+ init_frame = (struct drsas_init_frame *)cmd->frame;
+ initq_info = (struct drsas_init_queue_info *)
+ ((unsigned long)init_frame + 64);
+
+ (void) memset(init_frame, 0, MRMFI_FRAME_SIZE);
+ (void) memset(initq_info, 0, sizeof (struct drsas_init_queue_info));
+
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &initq_info->init_flags, 0);
+
+ ddi_put32(cmd->frame_dma_obj.acc_handle,
+ &initq_info->reply_queue_entries, instance->max_fw_cmds + 1);
+
+ ddi_put32(cmd->frame_dma_obj.acc_handle,
+ &initq_info->producer_index_phys_addr_hi, 0);
+ ddi_put32(cmd->frame_dma_obj.acc_handle,
+ &initq_info->producer_index_phys_addr_lo,
+ instance->mfi_internal_dma_obj.dma_cookie[0].dmac_address);
+
+ ddi_put32(cmd->frame_dma_obj.acc_handle,
+ &initq_info->consumer_index_phys_addr_hi, 0);
+ ddi_put32(cmd->frame_dma_obj.acc_handle,
+ &initq_info->consumer_index_phys_addr_lo,
+ instance->mfi_internal_dma_obj.dma_cookie[0].dmac_address + 4);
+
+ ddi_put32(cmd->frame_dma_obj.acc_handle,
+ &initq_info->reply_queue_start_phys_addr_hi, 0);
+ ddi_put32(cmd->frame_dma_obj.acc_handle,
+ &initq_info->reply_queue_start_phys_addr_lo,
+ instance->mfi_internal_dma_obj.dma_cookie[0].dmac_address + 8);
+
+ ddi_put8(cmd->frame_dma_obj.acc_handle,
+ &init_frame->cmd, MFI_CMD_OP_INIT);
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &init_frame->cmd_status,
+ MFI_CMD_STATUS_POLL_MODE);
+ ddi_put16(cmd->frame_dma_obj.acc_handle, &init_frame->flags, 0);
+ ddi_put32(cmd->frame_dma_obj.acc_handle,
+ &init_frame->queue_info_new_phys_addr_lo,
+ cmd->frame_phys_addr + 64);
+ ddi_put32(cmd->frame_dma_obj.acc_handle,
+ &init_frame->queue_info_new_phys_addr_hi, 0);
+
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &init_frame->data_xfer_len,
+ sizeof (struct drsas_init_queue_info));
+
+ cmd->frame_count = 1;
+
+ /* issue the init frame in polled mode */
+ if (instance->func_ptr->issue_cmd_in_poll_mode(instance, cmd)) {
+ con_log(CL_ANN, (CE_WARN, "failed to init firmware"));
+ goto fail_fw_init;
+ }
+
+ return_mfi_pkt(instance, cmd);
+ if (drsas_common_check(instance, cmd) != DDI_SUCCESS) {
+ goto fail_fw_init;
+ }
+
+ /* gather misc FW related information */
+ if (!get_ctrl_info(instance, &ctrl_info)) {
+ instance->max_sectors_per_req = ctrl_info.max_request_size;
+ con_log(CL_ANN1, (CE_NOTE, "product name %s ld present %d",
+ ctrl_info.product_name, ctrl_info.ld_present_count));
+ } else {
+ instance->max_sectors_per_req = instance->max_num_sge *
+ PAGESIZE / 512;
+ }
+
+ if (drsas_check_acc_handle(instance->regmap_handle) != DDI_SUCCESS) {
+ goto fail_fw_init;
+ }
+
+ return (DDI_SUCCESS);
+
+fail_fw_init:
+fail_alloc_fw_space:
+
+ free_space_for_mfi(instance);
+
+fail_ready_state:
+ ddi_regs_map_free(&instance->regmap_handle);
+
+fail_mfi_reg_setup:
+ return (DDI_FAILURE);
+}
+
+/*
+ * mfi_state_transition_to_ready : Move the FW to READY state
+ *
+ * @reg_set : MFI register set
+ */
+static int
+mfi_state_transition_to_ready(struct drsas_instance *instance)
+{
+ int i;
+ uint8_t max_wait;
+ uint32_t fw_ctrl;
+ uint32_t fw_state;
+ uint32_t cur_state;
+
+ fw_state =
+ instance->func_ptr->read_fw_status_reg(instance) & MFI_STATE_MASK;
+ con_log(CL_ANN1, (CE_NOTE,
+ "mfi_state_transition_to_ready:FW state = 0x%x", fw_state));
+
+ while (fw_state != MFI_STATE_READY) {
+ con_log(CL_ANN, (CE_NOTE,
+ "mfi_state_transition_to_ready:FW state%x", fw_state));
+
+ switch (fw_state) {
+ case MFI_STATE_FAULT:
+ con_log(CL_ANN, (CE_NOTE,
+ "dr_sas: FW in FAULT state!!"));
+
+ return (ENODEV);
+ case MFI_STATE_WAIT_HANDSHAKE:
+ /* set the CLR bit in IMR0 */
+ con_log(CL_ANN, (CE_NOTE,
+ "dr_sas: FW waiting for HANDSHAKE"));
+ /*
+ * PCI_Hot Plug: MFI F/W requires
+ * (MFI_INIT_CLEAR_HANDSHAKE|MFI_INIT_HOTPLUG)
+ * to be set
+ */
+ /* WR_IB_MSG_0(MFI_INIT_CLEAR_HANDSHAKE, instance); */
+ WR_IB_DOORBELL(MFI_INIT_CLEAR_HANDSHAKE |
+ MFI_INIT_HOTPLUG, instance);
+
+ max_wait = 2;
+ cur_state = MFI_STATE_WAIT_HANDSHAKE;
+ break;
+ case MFI_STATE_BOOT_MESSAGE_PENDING:
+ /* set the CLR bit in IMR0 */
+ con_log(CL_ANN, (CE_NOTE,
+ "dr_sas: FW state boot message pending"));
+ /*
+ * PCI_Hot Plug: MFI F/W requires
+ * (MFI_INIT_CLEAR_HANDSHAKE|MFI_INIT_HOTPLUG)
+ * to be set
+ */
+ WR_IB_DOORBELL(MFI_INIT_HOTPLUG, instance);
+
+ max_wait = 10;
+ cur_state = MFI_STATE_BOOT_MESSAGE_PENDING;
+ break;
+ case MFI_STATE_OPERATIONAL:
+ /* bring it to READY state; assuming max wait 2 secs */
+ instance->func_ptr->disable_intr(instance);
+ con_log(CL_ANN1, (CE_NOTE,
+ "dr_sas: FW in OPERATIONAL state"));
+ /*
+ * PCI_Hot Plug: MFI F/W requires
+ * (MFI_INIT_READY | MFI_INIT_MFIMODE | MFI_INIT_ABORT)
+ * to be set
+ */
+ /* WR_IB_DOORBELL(MFI_INIT_READY, instance); */
+ WR_IB_DOORBELL(MFI_RESET_FLAGS, instance);
+
+ max_wait = 10;
+ cur_state = MFI_STATE_OPERATIONAL;
+ break;
+ case MFI_STATE_UNDEFINED:
+ /* this state should not last for more than 2 seconds */
+ con_log(CL_ANN, (CE_NOTE, "FW state undefined"));
+
+ max_wait = 2;
+ cur_state = MFI_STATE_UNDEFINED;
+ break;
+ case MFI_STATE_BB_INIT:
+ max_wait = 2;
+ cur_state = MFI_STATE_BB_INIT;
+ break;
+ case MFI_STATE_FW_INIT:
+ max_wait = 2;
+ cur_state = MFI_STATE_FW_INIT;
+ break;
+ case MFI_STATE_DEVICE_SCAN:
+ max_wait = 10;
+ cur_state = MFI_STATE_DEVICE_SCAN;
+ break;
+ default:
+ con_log(CL_ANN, (CE_NOTE,
+ "dr_sas: Unknown state 0x%x", fw_state));
+ return (ENODEV);
+ }
+
+ /* the cur_state should not last for more than max_wait secs */
+ for (i = 0; i < (max_wait * MILLISEC); i++) {
+ /* fw_state = RD_OB_MSG_0(instance) & MFI_STATE_MASK; */
+ fw_state =
+ instance->func_ptr->read_fw_status_reg(instance) &
+ MFI_STATE_MASK;
+
+ if (fw_state == cur_state) {
+ delay(1 * drv_usectohz(MILLISEC));
+ } else {
+ break;
+ }
+ }
+
+ /* return error if fw_state hasn't changed after max_wait */
+ if (fw_state == cur_state) {
+ con_log(CL_ANN, (CE_NOTE,
+ "FW state hasn't changed in %d secs", max_wait));
+ return (ENODEV);
+ }
+ };
+
+ fw_ctrl = RD_IB_DOORBELL(instance);
+
+ con_log(CL_ANN1, (CE_NOTE,
+ "mfi_state_transition_to_ready:FW ctrl = 0x%x", fw_ctrl));
+
+ /*
+ * Write 0xF to the doorbell register to do the following.
+ * - Abort all outstanding commands (bit 0).
+ * - Transition from OPERATIONAL to READY state (bit 1).
+ * - Discard (possible) low MFA posted in 64-bit mode (bit-2).
+ * - Set to release FW to continue running (i.e. BIOS handshake
+ * (bit 3).
+ */
+ WR_IB_DOORBELL(0xF, instance);
+
+ if (drsas_check_acc_handle(instance->regmap_handle) != DDI_SUCCESS) {
+ return (ENODEV);
+ }
+ return (DDI_SUCCESS);
+}
+
+/*
+ * get_seq_num
+ */
+static int
+get_seq_num(struct drsas_instance *instance,
+ struct drsas_evt_log_info *eli)
+{
+ int ret = DDI_SUCCESS;
+
+ dma_obj_t dcmd_dma_obj;
+ struct drsas_cmd *cmd;
+ struct drsas_dcmd_frame *dcmd;
+
+ cmd = get_mfi_pkt(instance);
+
+ if (!cmd) {
+ cmn_err(CE_WARN, "dr_sas: failed to get a cmd");
+ return (ENOMEM);
+ }
+ /* Clear the frame buffer and assign back the context id */
+ (void) memset((char *)&cmd->frame[0], 0, sizeof (union drsas_frame));
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &cmd->frame->hdr.context,
+ cmd->index);
+
+ dcmd = &cmd->frame->dcmd;
+
+ /* allocate the data transfer buffer */
+ dcmd_dma_obj.size = sizeof (struct drsas_evt_log_info);
+ dcmd_dma_obj.dma_attr = drsas_generic_dma_attr;
+ dcmd_dma_obj.dma_attr.dma_attr_addr_hi = 0xFFFFFFFFU;
+ dcmd_dma_obj.dma_attr.dma_attr_count_max = 0xFFFFFFFFU;
+ dcmd_dma_obj.dma_attr.dma_attr_sgllen = 1;
+ dcmd_dma_obj.dma_attr.dma_attr_align = 1;
+
+ if (drsas_alloc_dma_obj(instance, &dcmd_dma_obj,
+ (uchar_t)DDI_STRUCTURE_LE_ACC) != 1) {
+ con_log(CL_ANN, (CE_WARN,
+ "get_seq_num: could not allocate data transfer buffer."));
+ return (DDI_FAILURE);
+ }
+
+ (void) memset(dcmd_dma_obj.buffer, 0,
+ sizeof (struct drsas_evt_log_info));
+
+ (void) memset(dcmd->mbox.b, 0, DCMD_MBOX_SZ);
+
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &dcmd->cmd, MFI_CMD_OP_DCMD);
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &dcmd->cmd_status, 0);
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &dcmd->sge_count, 1);
+ ddi_put16(cmd->frame_dma_obj.acc_handle, &dcmd->flags,
+ MFI_FRAME_DIR_READ);
+ ddi_put16(cmd->frame_dma_obj.acc_handle, &dcmd->timeout, 0);
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->data_xfer_len,
+ sizeof (struct drsas_evt_log_info));
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->opcode,
+ DR_DCMD_CTRL_EVENT_GET_INFO);
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->sgl.sge32[0].length,
+ sizeof (struct drsas_evt_log_info));
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->sgl.sge32[0].phys_addr,
+ dcmd_dma_obj.dma_cookie[0].dmac_address);
+
+ cmd->sync_cmd = DRSAS_TRUE;
+ cmd->frame_count = 1;
+
+ if (instance->func_ptr->issue_cmd_in_sync_mode(instance, cmd)) {
+ cmn_err(CE_WARN, "get_seq_num: "
+ "failed to issue DRSAS_DCMD_CTRL_EVENT_GET_INFO");
+ ret = DDI_FAILURE;
+ } else {
+ /* copy the data back into callers buffer */
+ ddi_rep_get8(cmd->frame_dma_obj.acc_handle, (uint8_t *)eli,
+ (uint8_t *)dcmd_dma_obj.buffer,
+ sizeof (struct drsas_evt_log_info), DDI_DEV_AUTOINCR);
+ ret = DDI_SUCCESS;
+ }
+
+ if (drsas_free_dma_obj(instance, dcmd_dma_obj) != DDI_SUCCESS)
+ ret = DDI_FAILURE;
+
+ return_mfi_pkt(instance, cmd);
+ if (drsas_common_check(instance, cmd) != DDI_SUCCESS) {
+ ret = DDI_FAILURE;
+ }
+ return (ret);
+}
+
+/*
+ * start_mfi_aen
+ */
+static int
+start_mfi_aen(struct drsas_instance *instance)
+{
+ int ret = 0;
+
+ struct drsas_evt_log_info eli;
+ union drsas_evt_class_locale class_locale;
+
+ /* get the latest sequence number from FW */
+ (void) memset(&eli, 0, sizeof (struct drsas_evt_log_info));
+
+ if (get_seq_num(instance, &eli)) {
+ cmn_err(CE_WARN, "start_mfi_aen: failed to get seq num");
+ return (-1);
+ }
+
+ /* register AEN with FW for latest sequence number plus 1 */
+ class_locale.members.reserved = 0;
+ class_locale.members.locale = DR_EVT_LOCALE_ALL;
+ class_locale.members.class = DR_EVT_CLASS_INFO;
+ ret = register_mfi_aen(instance, eli.newest_seq_num + 1,
+ class_locale.word);
+
+ if (ret) {
+ cmn_err(CE_WARN, "start_mfi_aen: aen registration failed");
+ return (-1);
+ }
+
+ return (ret);
+}
+
+/*
+ * flush_cache
+ */
+static void
+flush_cache(struct drsas_instance *instance)
+{
+ struct drsas_cmd *cmd = NULL;
+ struct drsas_dcmd_frame *dcmd;
+ uint32_t max_cmd = instance->max_fw_cmds;
+
+ cmd = instance->cmd_list[max_cmd];
+
+ if (cmd == NULL)
+ return;
+
+ dcmd = &cmd->frame->dcmd;
+
+ (void) memset(dcmd->mbox.b, 0, DCMD_MBOX_SZ);
+
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &dcmd->cmd, MFI_CMD_OP_DCMD);
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &dcmd->cmd_status, 0x0);
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &dcmd->sge_count, 0);
+ ddi_put16(cmd->frame_dma_obj.acc_handle, &dcmd->flags,
+ MFI_FRAME_DIR_NONE);
+ ddi_put16(cmd->frame_dma_obj.acc_handle, &dcmd->timeout, 0);
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->data_xfer_len, 0);
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->opcode,
+ DR_DCMD_CTRL_CACHE_FLUSH);
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &dcmd->mbox.b[0],
+ DR_FLUSH_CTRL_CACHE | DR_FLUSH_DISK_CACHE);
+
+ cmd->frame_count = 1;
+
+ if (instance->func_ptr->issue_cmd_in_poll_mode(instance, cmd)) {
+ con_log(CL_ANN1, (CE_WARN,
+ "flush_cache: failed to issue MFI_DCMD_CTRL_CACHE_FLUSH"));
+ }
+ con_log(CL_DLEVEL1, (CE_NOTE, "done"));
+}
+
+/*
+ * service_mfi_aen- Completes an AEN command
+ * @instance: Adapter soft state
+ * @cmd: Command to be completed
+ *
+ */
+static void
+service_mfi_aen(struct drsas_instance *instance, struct drsas_cmd *cmd)
+{
+ uint32_t seq_num;
+ struct drsas_evt_detail *evt_detail =
+ (struct drsas_evt_detail *)instance->mfi_evt_detail_obj.buffer;
+ int rval = 0;
+ int tgt = 0;
+ ddi_acc_handle_t acc_handle;
+
+ acc_handle = cmd->frame_dma_obj.acc_handle;
+
+ cmd->cmd_status = ddi_get8(acc_handle, &cmd->frame->io.cmd_status);
+
+ if (cmd->cmd_status == ENODATA) {
+ cmd->cmd_status = 0;
+ }
+
+ /*
+ * log the MFI AEN event to the sysevent queue so that
+ * application will get noticed
+ */
+ if (ddi_log_sysevent(instance->dip, DDI_VENDOR_LSI, "LSIMEGA", "SAS",
+ NULL, NULL, DDI_NOSLEEP) != DDI_SUCCESS) {
+ int instance_no = ddi_get_instance(instance->dip);
+ con_log(CL_ANN, (CE_WARN,
+ "dr_sas%d: Failed to log AEN event", instance_no));
+ }
+ /*
+ * Check for any ld devices that has changed state. i.e. online
+ * or offline.
+ */
+ con_log(CL_ANN1, (CE_NOTE,
+ "AEN: code = %x class = %x locale = %x args = %x",
+ ddi_get32(acc_handle, &evt_detail->code),
+ evt_detail->cl.members.class,
+ ddi_get16(acc_handle, &evt_detail->cl.members.locale),
+ ddi_get8(acc_handle, &evt_detail->arg_type)));
+
+ switch (ddi_get32(acc_handle, &evt_detail->code)) {
+ case DR_EVT_CFG_CLEARED: {
+ for (tgt = 0; tgt < MRDRV_MAX_LD; tgt++) {
+ if (instance->dr_ld_list[tgt].dip != NULL) {
+ rval = drsas_service_evt(instance, tgt, 0,
+ DRSAS_EVT_UNCONFIG_TGT, NULL);
+ con_log(CL_ANN1, (CE_WARN,
+ "dr_sas: CFG CLEARED AEN rval = %d "
+ "tgt id = %d", rval, tgt));
+ }
+ }
+ break;
+ }
+
+ case DR_EVT_LD_DELETED: {
+ rval = drsas_service_evt(instance,
+ ddi_get16(acc_handle, &evt_detail->args.ld.target_id), 0,
+ DRSAS_EVT_UNCONFIG_TGT, NULL);
+ con_log(CL_ANN1, (CE_WARN, "dr_sas: LD DELETED AEN rval = %d "
+ "tgt id = %d index = %d", rval,
+ ddi_get16(acc_handle, &evt_detail->args.ld.target_id),
+ ddi_get8(acc_handle, &evt_detail->args.ld.ld_index)));
+ break;
+ } /* End of DR_EVT_LD_DELETED */
+
+ case DR_EVT_LD_CREATED: {
+ rval = drsas_service_evt(instance,
+ ddi_get16(acc_handle, &evt_detail->args.ld.target_id), 0,
+ DRSAS_EVT_CONFIG_TGT, NULL);
+ con_log(CL_ANN1, (CE_WARN, "dr_sas: LD CREATED AEN rval = %d "
+ "tgt id = %d index = %d", rval,
+ ddi_get16(acc_handle, &evt_detail->args.ld.target_id),
+ ddi_get8(acc_handle, &evt_detail->args.ld.ld_index)));
+ break;
+ } /* End of DR_EVT_LD_CREATED */
+ } /* End of Main Switch */
+
+ /* get copy of seq_num and class/locale for re-registration */
+ seq_num = ddi_get32(acc_handle, &evt_detail->seq_num);
+ seq_num++;
+ (void) memset(instance->mfi_evt_detail_obj.buffer, 0,
+ sizeof (struct drsas_evt_detail));
+
+ ddi_put8(acc_handle, &cmd->frame->dcmd.cmd_status, 0x0);
+ ddi_put32(acc_handle, &cmd->frame->dcmd.mbox.w[0], seq_num);
+
+ instance->aen_seq_num = seq_num;
+
+ cmd->frame_count = 1;
+
+ /* Issue the aen registration frame */
+ instance->func_ptr->issue_cmd(cmd, instance);
+}
+
+/*
+ * complete_cmd_in_sync_mode - Completes an internal command
+ * @instance: Adapter soft state
+ * @cmd: Command to be completed
+ *
+ * The issue_cmd_in_sync_mode() function waits for a command to complete
+ * after it issues a command. This function wakes up that waiting routine by
+ * calling wake_up() on the wait queue.
+ */
+static void
+complete_cmd_in_sync_mode(struct drsas_instance *instance,
+ struct drsas_cmd *cmd)
+{
+ cmd->cmd_status = ddi_get8(cmd->frame_dma_obj.acc_handle,
+ &cmd->frame->io.cmd_status);
+
+ cmd->sync_cmd = DRSAS_FALSE;
+
+ if (cmd->cmd_status == ENODATA) {
+ cmd->cmd_status = 0;
+ }
+
+ cv_broadcast(&instance->int_cmd_cv);
+}
+
+/*
+ * drsas_softintr - The Software ISR
+ * @param arg : HBA soft state
+ *
+ * called from high-level interrupt if hi-level interrupt are not there,
+ * otherwise triggered as a soft interrupt
+ */
+static uint_t
+drsas_softintr(struct drsas_instance *instance)
+{
+ struct scsi_pkt *pkt;
+ struct scsa_cmd *acmd;
+ struct drsas_cmd *cmd;
+ struct mlist_head *pos, *next;
+ mlist_t process_list;
+ struct drsas_header *hdr;
+ struct scsi_arq_status *arqstat;
+
+ con_log(CL_ANN1, (CE_CONT, "drsas_softintr called"));
+
+ ASSERT(instance);
+ mutex_enter(&instance->completed_pool_mtx);
+
+ if (mlist_empty(&instance->completed_pool_list)) {
+ mutex_exit(&instance->completed_pool_mtx);
+ return (DDI_INTR_UNCLAIMED);
+ }
+
+ instance->softint_running = 1;
+
+ INIT_LIST_HEAD(&process_list);
+ mlist_splice(&instance->completed_pool_list, &process_list);
+ INIT_LIST_HEAD(&instance->completed_pool_list);
+
+ mutex_exit(&instance->completed_pool_mtx);
+
+ /* perform all callbacks first, before releasing the SCBs */
+ mlist_for_each_safe(pos, next, &process_list) {
+ cmd = mlist_entry(pos, struct drsas_cmd, list);
+
+ /* syncronize the Cmd frame for the controller */
+ (void) ddi_dma_sync(cmd->frame_dma_obj.dma_handle,
+ 0, 0, DDI_DMA_SYNC_FORCPU);
+
+ if (drsas_check_dma_handle(cmd->frame_dma_obj.dma_handle) !=
+ DDI_SUCCESS) {
+ drsas_fm_ereport(instance, DDI_FM_DEVICE_NO_RESPONSE);
+ ddi_fm_service_impact(instance->dip, DDI_SERVICE_LOST);
+ return (DDI_INTR_UNCLAIMED);
+ }
+
+ hdr = &cmd->frame->hdr;
+
+ /* remove the internal command from the process list */
+ mlist_del_init(&cmd->list);
+
+ switch (ddi_get8(cmd->frame_dma_obj.acc_handle, &hdr->cmd)) {
+ case MFI_CMD_OP_PD_SCSI:
+ case MFI_CMD_OP_LD_SCSI:
+ case MFI_CMD_OP_LD_READ:
+ case MFI_CMD_OP_LD_WRITE:
+ /*
+ * MFI_CMD_OP_PD_SCSI and MFI_CMD_OP_LD_SCSI
+ * could have been issued either through an
+ * IO path or an IOCTL path. If it was via IOCTL,
+ * we will send it to internal completion.
+ */
+ if (cmd->sync_cmd == DRSAS_TRUE) {
+ complete_cmd_in_sync_mode(instance, cmd);
+ break;
+ }
+
+ /* regular commands */
+ acmd = cmd->cmd;
+ pkt = CMD2PKT(acmd);
+
+ if (acmd->cmd_flags & CFLAG_DMAVALID) {
+ if (acmd->cmd_flags & CFLAG_CONSISTENT) {
+ (void) ddi_dma_sync(acmd->cmd_dmahandle,
+ acmd->cmd_dma_offset,
+ acmd->cmd_dma_len,
+ DDI_DMA_SYNC_FORCPU);
+ }
+ }
+
+ pkt->pkt_reason = CMD_CMPLT;
+ pkt->pkt_statistics = 0;
+ pkt->pkt_state = STATE_GOT_BUS
+ | STATE_GOT_TARGET | STATE_SENT_CMD
+ | STATE_XFERRED_DATA | STATE_GOT_STATUS;
+
+ con_log(CL_ANN1, (CE_CONT,
+ "CDB[0] = %x completed for %s: size %lx context %x",
+ pkt->pkt_cdbp[0], ((acmd->islogical) ? "LD" : "PD"),
+ acmd->cmd_dmacount, hdr->context));
+
+ if (pkt->pkt_cdbp[0] == SCMD_INQUIRY) {
+ struct scsi_inquiry *inq;
+
+ if (acmd->cmd_dmacount != 0) {
+ bp_mapin(acmd->cmd_buf);
+ inq = (struct scsi_inquiry *)
+ acmd->cmd_buf->b_un.b_addr;
+
+ /* don't expose physical drives to OS */
+ if (acmd->islogical &&
+ (hdr->cmd_status == MFI_STAT_OK)) {
+ display_scsi_inquiry(
+ (caddr_t)inq);
+ } else if ((hdr->cmd_status ==
+ MFI_STAT_OK) && inq->inq_dtype ==
+ DTYPE_DIRECT) {
+
+ display_scsi_inquiry(
+ (caddr_t)inq);
+
+ /* for physical disk */
+ hdr->cmd_status =
+ MFI_STAT_DEVICE_NOT_FOUND;
+ }
+ }
+ }
+
+ switch (hdr->cmd_status) {
+ case MFI_STAT_OK:
+ pkt->pkt_scbp[0] = STATUS_GOOD;
+ break;
+ case MFI_STAT_LD_CC_IN_PROGRESS:
+ case MFI_STAT_LD_RECON_IN_PROGRESS:
+ pkt->pkt_scbp[0] = STATUS_GOOD;
+ break;
+ case MFI_STAT_LD_INIT_IN_PROGRESS:
+ con_log(CL_ANN,
+ (CE_WARN, "Initialization in Progress"));
+ pkt->pkt_reason = CMD_TRAN_ERR;
+
+ break;
+ case MFI_STAT_SCSI_DONE_WITH_ERROR:
+ con_log(CL_ANN1, (CE_CONT, "scsi_done error"));
+
+ pkt->pkt_reason = CMD_CMPLT;
+ ((struct scsi_status *)
+ pkt->pkt_scbp)->sts_chk = 1;
+
+ if (pkt->pkt_cdbp[0] == SCMD_TEST_UNIT_READY) {
+
+ con_log(CL_ANN,
+ (CE_WARN, "TEST_UNIT_READY fail"));
+
+ } else {
+ pkt->pkt_state |= STATE_ARQ_DONE;
+ arqstat = (void *)(pkt->pkt_scbp);
+ arqstat->sts_rqpkt_reason = CMD_CMPLT;
+ arqstat->sts_rqpkt_resid = 0;
+ arqstat->sts_rqpkt_state |=
+ STATE_GOT_BUS | STATE_GOT_TARGET
+ | STATE_SENT_CMD
+ | STATE_XFERRED_DATA;
+ *(uint8_t *)&arqstat->sts_rqpkt_status =
+ STATUS_GOOD;
+ ddi_rep_get8(
+ cmd->frame_dma_obj.acc_handle,
+ (uint8_t *)
+ &(arqstat->sts_sensedata),
+ cmd->sense,
+ acmd->cmd_scblen -
+ offsetof(struct scsi_arq_status,
+ sts_sensedata), DDI_DEV_AUTOINCR);
+ }
+ break;
+ case MFI_STAT_LD_OFFLINE:
+ case MFI_STAT_DEVICE_NOT_FOUND:
+ con_log(CL_ANN1, (CE_CONT,
+ "device not found error"));
+ pkt->pkt_reason = CMD_DEV_GONE;
+ pkt->pkt_statistics = STAT_DISCON;
+ break;
+ case MFI_STAT_LD_LBA_OUT_OF_RANGE:
+ pkt->pkt_state |= STATE_ARQ_DONE;
+ pkt->pkt_reason = CMD_CMPLT;
+ ((struct scsi_status *)
+ pkt->pkt_scbp)->sts_chk = 1;
+
+ arqstat = (void *)(pkt->pkt_scbp);
+ arqstat->sts_rqpkt_reason = CMD_CMPLT;
+ arqstat->sts_rqpkt_resid = 0;
+ arqstat->sts_rqpkt_state |= STATE_GOT_BUS
+ | STATE_GOT_TARGET | STATE_SENT_CMD
+ | STATE_XFERRED_DATA;
+ *(uint8_t *)&arqstat->sts_rqpkt_status =
+ STATUS_GOOD;
+
+ arqstat->sts_sensedata.es_valid = 1;
+ arqstat->sts_sensedata.es_key =
+ KEY_ILLEGAL_REQUEST;
+ arqstat->sts_sensedata.es_class =
+ CLASS_EXTENDED_SENSE;
+
+ /*
+ * LOGICAL BLOCK ADDRESS OUT OF RANGE:
+ * ASC: 0x21h; ASCQ: 0x00h;
+ */
+ arqstat->sts_sensedata.es_add_code = 0x21;
+ arqstat->sts_sensedata.es_qual_code = 0x00;
+
+ break;
+
+ default:
+ con_log(CL_ANN, (CE_CONT, "Unknown status!"));
+ pkt->pkt_reason = CMD_TRAN_ERR;
+
+ break;
+ }
+
+ atomic_add_16(&instance->fw_outstanding, (-1));
+
+ return_mfi_pkt(instance, cmd);
+
+ (void) drsas_common_check(instance, cmd);
+
+ if (acmd->cmd_dmahandle) {
+ if (drsas_check_dma_handle(
+ acmd->cmd_dmahandle) != DDI_SUCCESS) {
+ ddi_fm_service_impact(instance->dip,
+ DDI_SERVICE_UNAFFECTED);
+ pkt->pkt_reason = CMD_TRAN_ERR;
+ pkt->pkt_statistics = 0;
+ }
+ }
+
+ /* Call the callback routine */
+ if (((pkt->pkt_flags & FLAG_NOINTR) == 0) &&
+ pkt->pkt_comp) {
+ (*pkt->pkt_comp)(pkt);
+ }
+
+ break;
+ case MFI_CMD_OP_SMP:
+ case MFI_CMD_OP_STP:
+ complete_cmd_in_sync_mode(instance, cmd);
+ break;
+ case MFI_CMD_OP_DCMD:
+ /* see if got an event notification */
+ if (ddi_get32(cmd->frame_dma_obj.acc_handle,
+ &cmd->frame->dcmd.opcode) ==
+ DR_DCMD_CTRL_EVENT_WAIT) {
+ if ((instance->aen_cmd == cmd) &&
+ (instance->aen_cmd->abort_aen)) {
+ con_log(CL_ANN, (CE_WARN,
+ "drsas_softintr: "
+ "aborted_aen returned"));
+ } else {
+ atomic_add_16(&instance->fw_outstanding,
+ (-1));
+ service_mfi_aen(instance, cmd);
+ }
+ } else {
+ complete_cmd_in_sync_mode(instance, cmd);
+ }
+
+ break;
+ case MFI_CMD_OP_ABORT:
+ con_log(CL_ANN, (CE_WARN, "MFI_CMD_OP_ABORT complete"));
+ /*
+ * MFI_CMD_OP_ABORT successfully completed
+ * in the synchronous mode
+ */
+ complete_cmd_in_sync_mode(instance, cmd);
+ break;
+ default:
+ drsas_fm_ereport(instance, DDI_FM_DEVICE_NO_RESPONSE);
+ ddi_fm_service_impact(instance->dip, DDI_SERVICE_LOST);
+
+ if (cmd->pkt != NULL) {
+ pkt = cmd->pkt;
+ if (((pkt->pkt_flags & FLAG_NOINTR) == 0) &&
+ pkt->pkt_comp) {
+ (*pkt->pkt_comp)(pkt);
+ }
+ }
+ con_log(CL_ANN, (CE_WARN, "Cmd type unknown !"));
+ break;
+ }
+ }
+
+ instance->softint_running = 0;
+
+ return (DDI_INTR_CLAIMED);
+}
+
+/*
+ * drsas_alloc_dma_obj
+ *
+ * Allocate the memory and other resources for an dma object.
+ */
+static int
+drsas_alloc_dma_obj(struct drsas_instance *instance, dma_obj_t *obj,
+ uchar_t endian_flags)
+{
+ int i;
+ size_t alen = 0;
+ uint_t cookie_cnt;
+ struct ddi_device_acc_attr tmp_endian_attr;
+
+ tmp_endian_attr = endian_attr;
+ tmp_endian_attr.devacc_attr_endian_flags = endian_flags;
+
+ i = ddi_dma_alloc_handle(instance->dip, &obj->dma_attr,
+ DDI_DMA_SLEEP, NULL, &obj->dma_handle);
+ if (i != DDI_SUCCESS) {
+
+ switch (i) {
+ case DDI_DMA_BADATTR :
+ con_log(CL_ANN, (CE_WARN,
+ "Failed ddi_dma_alloc_handle- Bad attribute"));
+ break;
+ case DDI_DMA_NORESOURCES :
+ con_log(CL_ANN, (CE_WARN,
+ "Failed ddi_dma_alloc_handle- No Resources"));
+ break;
+ default :
+ con_log(CL_ANN, (CE_WARN,
+ "Failed ddi_dma_alloc_handle: "
+ "unknown status %d", i));
+ break;
+ }
+
+ return (-1);
+ }
+
+ if ((ddi_dma_mem_alloc(obj->dma_handle, obj->size, &tmp_endian_attr,
+ DDI_DMA_RDWR | DDI_DMA_STREAMING, DDI_DMA_SLEEP, NULL,
+ &obj->buffer, &alen, &obj->acc_handle) != DDI_SUCCESS) ||
+ alen < obj->size) {
+
+ ddi_dma_free_handle(&obj->dma_handle);
+
+ con_log(CL_ANN, (CE_WARN, "Failed : ddi_dma_mem_alloc"));
+
+ return (-1);
+ }
+
+ if (ddi_dma_addr_bind_handle(obj->dma_handle, NULL, obj->buffer,
+ obj->size, DDI_DMA_RDWR | DDI_DMA_STREAMING, DDI_DMA_SLEEP,
+ NULL, &obj->dma_cookie[0], &cookie_cnt) != DDI_SUCCESS) {
+
+ ddi_dma_mem_free(&obj->acc_handle);
+ ddi_dma_free_handle(&obj->dma_handle);
+
+ con_log(CL_ANN, (CE_WARN, "Failed : ddi_dma_addr_bind_handle"));
+
+ return (-1);
+ }
+
+ if (drsas_check_dma_handle(obj->dma_handle) != DDI_SUCCESS) {
+ ddi_fm_service_impact(instance->dip, DDI_SERVICE_LOST);
+ return (-1);
+ }
+
+ if (drsas_check_acc_handle(obj->acc_handle) != DDI_SUCCESS) {
+ ddi_fm_service_impact(instance->dip, DDI_SERVICE_LOST);
+ return (-1);
+ }
+
+ return (cookie_cnt);
+}
+
+/*
+ * drsas_free_dma_obj(struct drsas_instance *, dma_obj_t)
+ *
+ * De-allocate the memory and other resources for an dma object, which must
+ * have been alloated by a previous call to drsas_alloc_dma_obj()
+ */
+static int
+drsas_free_dma_obj(struct drsas_instance *instance, dma_obj_t obj)
+{
+
+ if (drsas_check_dma_handle(obj.dma_handle) != DDI_SUCCESS) {
+ ddi_fm_service_impact(instance->dip, DDI_SERVICE_UNAFFECTED);
+ return (DDI_FAILURE);
+ }
+
+ if (drsas_check_acc_handle(obj.acc_handle) != DDI_SUCCESS) {
+ ddi_fm_service_impact(instance->dip, DDI_SERVICE_UNAFFECTED);
+ return (DDI_FAILURE);
+ }
+
+ (void) ddi_dma_unbind_handle(obj.dma_handle);
+ ddi_dma_mem_free(&obj.acc_handle);
+ ddi_dma_free_handle(&obj.dma_handle);
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * drsas_dma_alloc(instance_t *, struct scsi_pkt *, struct buf *,
+ * int, int (*)())
+ *
+ * Allocate dma resources for a new scsi command
+ */
+static int
+drsas_dma_alloc(struct drsas_instance *instance, struct scsi_pkt *pkt,
+ struct buf *bp, int flags, int (*callback)())
+{
+ int dma_flags;
+ int (*cb)(caddr_t);
+ int i;
+
+ ddi_dma_attr_t tmp_dma_attr = drsas_generic_dma_attr;
+ struct scsa_cmd *acmd = PKT2CMD(pkt);
+
+ acmd->cmd_buf = bp;
+
+ if (bp->b_flags & B_READ) {
+ acmd->cmd_flags &= ~CFLAG_DMASEND;
+ dma_flags = DDI_DMA_READ;
+ } else {
+ acmd->cmd_flags |= CFLAG_DMASEND;
+ dma_flags = DDI_DMA_WRITE;
+ }
+
+ if (flags & PKT_CONSISTENT) {
+ acmd->cmd_flags |= CFLAG_CONSISTENT;
+ dma_flags |= DDI_DMA_CONSISTENT;
+ }
+
+ if (flags & PKT_DMA_PARTIAL) {
+ dma_flags |= DDI_DMA_PARTIAL;
+ }
+
+ dma_flags |= DDI_DMA_REDZONE;
+
+ cb = (callback == NULL_FUNC) ? DDI_DMA_DONTWAIT : DDI_DMA_SLEEP;
+
+ tmp_dma_attr.dma_attr_sgllen = instance->max_num_sge;
+ tmp_dma_attr.dma_attr_addr_hi = 0xffffffffffffffffull;
+
+ if ((i = ddi_dma_alloc_handle(instance->dip, &tmp_dma_attr,
+ cb, 0, &acmd->cmd_dmahandle)) != DDI_SUCCESS) {
+ switch (i) {
+ case DDI_DMA_BADATTR:
+ bioerror(bp, EFAULT);
+ return (DDI_FAILURE);
+
+ case DDI_DMA_NORESOURCES:
+ bioerror(bp, 0);
+ return (DDI_FAILURE);
+
+ default:
+ con_log(CL_ANN, (CE_PANIC, "ddi_dma_alloc_handle: "
+ "impossible result (0x%x)", i));
+ bioerror(bp, EFAULT);
+ return (DDI_FAILURE);
+ }
+ }
+
+ i = ddi_dma_buf_bind_handle(acmd->cmd_dmahandle, bp, dma_flags,
+ cb, 0, &acmd->cmd_dmacookies[0], &acmd->cmd_ncookies);
+
+ switch (i) {
+ case DDI_DMA_PARTIAL_MAP:
+ if ((dma_flags & DDI_DMA_PARTIAL) == 0) {
+ con_log(CL_ANN, (CE_PANIC, "ddi_dma_buf_bind_handle: "
+ "DDI_DMA_PARTIAL_MAP impossible"));
+ goto no_dma_cookies;
+ }
+
+ if (ddi_dma_numwin(acmd->cmd_dmahandle, &acmd->cmd_nwin) ==
+ DDI_FAILURE) {
+ con_log(CL_ANN, (CE_PANIC, "ddi_dma_numwin failed"));
+ goto no_dma_cookies;
+ }
+
+ if (ddi_dma_getwin(acmd->cmd_dmahandle, acmd->cmd_curwin,
+ &acmd->cmd_dma_offset, &acmd->cmd_dma_len,
+ &acmd->cmd_dmacookies[0], &acmd->cmd_ncookies) ==
+ DDI_FAILURE) {
+
+ con_log(CL_ANN, (CE_PANIC, "ddi_dma_getwin failed"));
+ goto no_dma_cookies;
+ }
+
+ goto get_dma_cookies;
+ case DDI_DMA_MAPPED:
+ acmd->cmd_nwin = 1;
+ acmd->cmd_dma_len = 0;
+ acmd->cmd_dma_offset = 0;
+
+get_dma_cookies:
+ i = 0;
+ acmd->cmd_dmacount = 0;
+ for (;;) {
+ acmd->cmd_dmacount +=
+ acmd->cmd_dmacookies[i++].dmac_size;
+
+ if (i == instance->max_num_sge ||
+ i == acmd->cmd_ncookies)
+ break;
+
+ ddi_dma_nextcookie(acmd->cmd_dmahandle,
+ &acmd->cmd_dmacookies[i]);
+ }
+
+ acmd->cmd_cookie = i;
+ acmd->cmd_cookiecnt = i;
+
+ acmd->cmd_flags |= CFLAG_DMAVALID;
+
+ if (bp->b_bcount >= acmd->cmd_dmacount) {
+ pkt->pkt_resid = bp->b_bcount - acmd->cmd_dmacount;
+ } else {
+ pkt->pkt_resid = 0;
+ }
+
+ return (DDI_SUCCESS);
+ case DDI_DMA_NORESOURCES:
+ bioerror(bp, 0);
+ break;
+ case DDI_DMA_NOMAPPING:
+ bioerror(bp, EFAULT);
+ break;
+ case DDI_DMA_TOOBIG:
+ bioerror(bp, EINVAL);
+ break;
+ case DDI_DMA_INUSE:
+ con_log(CL_ANN, (CE_PANIC, "ddi_dma_buf_bind_handle:"
+ " DDI_DMA_INUSE impossible"));
+ break;
+ default:
+ con_log(CL_ANN, (CE_PANIC, "ddi_dma_buf_bind_handle: "
+ "impossible result (0x%x)", i));
+ break;
+ }
+
+no_dma_cookies:
+ ddi_dma_free_handle(&acmd->cmd_dmahandle);
+ acmd->cmd_dmahandle = NULL;
+ acmd->cmd_flags &= ~CFLAG_DMAVALID;
+ return (DDI_FAILURE);
+}
+
+/*
+ * drsas_dma_move(struct drsas_instance *, struct scsi_pkt *, struct buf *)
+ *
+ * move dma resources to next dma window
+ *
+ */
+static int
+drsas_dma_move(struct drsas_instance *instance, struct scsi_pkt *pkt,
+ struct buf *bp)
+{
+ int i = 0;
+
+ struct scsa_cmd *acmd = PKT2CMD(pkt);
+
+ /*
+ * If there are no more cookies remaining in this window,
+ * must move to the next window first.
+ */
+ if (acmd->cmd_cookie == acmd->cmd_ncookies) {
+ if (acmd->cmd_curwin == acmd->cmd_nwin && acmd->cmd_nwin == 1) {
+ return (DDI_SUCCESS);
+ }
+
+ /* at last window, cannot move */
+ if (++acmd->cmd_curwin >= acmd->cmd_nwin) {
+ return (DDI_FAILURE);
+ }
+
+ if (ddi_dma_getwin(acmd->cmd_dmahandle, acmd->cmd_curwin,
+ &acmd->cmd_dma_offset, &acmd->cmd_dma_len,
+ &acmd->cmd_dmacookies[0], &acmd->cmd_ncookies) ==
+ DDI_FAILURE) {
+ return (DDI_FAILURE);
+ }
+
+ acmd->cmd_cookie = 0;
+ } else {
+ /* still more cookies in this window - get the next one */
+ ddi_dma_nextcookie(acmd->cmd_dmahandle,
+ &acmd->cmd_dmacookies[0]);
+ }
+
+ /* get remaining cookies in this window, up to our maximum */
+ for (;;) {
+ acmd->cmd_dmacount += acmd->cmd_dmacookies[i++].dmac_size;
+ acmd->cmd_cookie++;
+
+ if (i == instance->max_num_sge ||
+ acmd->cmd_cookie == acmd->cmd_ncookies) {
+ break;
+ }
+
+ ddi_dma_nextcookie(acmd->cmd_dmahandle,
+ &acmd->cmd_dmacookies[i]);
+ }
+
+ acmd->cmd_cookiecnt = i;
+
+ if (bp->b_bcount >= acmd->cmd_dmacount) {
+ pkt->pkt_resid = bp->b_bcount - acmd->cmd_dmacount;
+ } else {
+ pkt->pkt_resid = 0;
+ }
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * build_cmd
+ */
+static struct drsas_cmd *
+build_cmd(struct drsas_instance *instance, struct scsi_address *ap,
+ struct scsi_pkt *pkt, uchar_t *cmd_done)
+{
+ uint16_t flags = 0;
+ uint32_t i;
+ uint32_t context;
+ uint32_t sge_bytes;
+ ddi_acc_handle_t acc_handle;
+ struct drsas_cmd *cmd;
+ struct drsas_sge64 *mfi_sgl;
+ struct scsa_cmd *acmd = PKT2CMD(pkt);
+ struct drsas_pthru_frame *pthru;
+ struct drsas_io_frame *ldio;
+
+ /* find out if this is logical or physical drive command. */
+ acmd->islogical = MRDRV_IS_LOGICAL(ap);
+ acmd->device_id = MAP_DEVICE_ID(instance, ap);
+ *cmd_done = 0;
+
+ /* get the command packet */
+ if (!(cmd = get_mfi_pkt(instance))) {
+ return (NULL);
+ }
+
+ acc_handle = cmd->frame_dma_obj.acc_handle;
+
+ /* Clear the frame buffer and assign back the context id */
+ (void) memset((char *)&cmd->frame[0], 0, sizeof (union drsas_frame));
+ ddi_put32(acc_handle, &cmd->frame->hdr.context, cmd->index);
+
+ cmd->pkt = pkt;
+ cmd->cmd = acmd;
+
+ /* lets get the command directions */
+ if (acmd->cmd_flags & CFLAG_DMASEND) {
+ flags = MFI_FRAME_DIR_WRITE;
+
+ if (acmd->cmd_flags & CFLAG_CONSISTENT) {
+ (void) ddi_dma_sync(acmd->cmd_dmahandle,
+ acmd->cmd_dma_offset, acmd->cmd_dma_len,
+ DDI_DMA_SYNC_FORDEV);
+ }
+ } else if (acmd->cmd_flags & ~CFLAG_DMASEND) {
+ flags = MFI_FRAME_DIR_READ;
+
+ if (acmd->cmd_flags & CFLAG_CONSISTENT) {
+ (void) ddi_dma_sync(acmd->cmd_dmahandle,
+ acmd->cmd_dma_offset, acmd->cmd_dma_len,
+ DDI_DMA_SYNC_FORCPU);
+ }
+ } else {
+ flags = MFI_FRAME_DIR_NONE;
+ }
+
+ flags |= MFI_FRAME_SGL64;
+
+ switch (pkt->pkt_cdbp[0]) {
+
+ /*
+ * case SCMD_SYNCHRONIZE_CACHE:
+ * flush_cache(instance);
+ * return_mfi_pkt(instance, cmd);
+ * *cmd_done = 1;
+ *
+ * return (NULL);
+ */
+
+ case SCMD_READ:
+ case SCMD_WRITE:
+ case SCMD_READ_G1:
+ case SCMD_WRITE_G1:
+ if (acmd->islogical) {
+ ldio = (struct drsas_io_frame *)cmd->frame;
+
+ /*
+ * preare the Logical IO frame:
+ * 2nd bit is zero for all read cmds
+ */
+ ddi_put8(acc_handle, &ldio->cmd,
+ (pkt->pkt_cdbp[0] & 0x02) ? MFI_CMD_OP_LD_WRITE
+ : MFI_CMD_OP_LD_READ);
+ ddi_put8(acc_handle, &ldio->cmd_status, 0x0);
+ ddi_put8(acc_handle, &ldio->scsi_status, 0x0);
+ ddi_put8(acc_handle, &ldio->target_id, acmd->device_id);
+ ddi_put16(acc_handle, &ldio->timeout, 0);
+ ddi_put8(acc_handle, &ldio->reserved_0, 0);
+ ddi_put16(acc_handle, &ldio->pad_0, 0);
+ ddi_put16(acc_handle, &ldio->flags, flags);
+
+ /* Initialize sense Information */
+ bzero(cmd->sense, SENSE_LENGTH);
+ ddi_put8(acc_handle, &ldio->sense_len, SENSE_LENGTH);
+ ddi_put32(acc_handle, &ldio->sense_buf_phys_addr_hi, 0);
+ ddi_put32(acc_handle, &ldio->sense_buf_phys_addr_lo,
+ cmd->sense_phys_addr);
+ ddi_put32(acc_handle, &ldio->start_lba_hi, 0);
+ ddi_put8(acc_handle, &ldio->access_byte,
+ (acmd->cmd_cdblen != 6) ? pkt->pkt_cdbp[1] : 0);
+ ddi_put8(acc_handle, &ldio->sge_count,
+ acmd->cmd_cookiecnt);
+ mfi_sgl = (struct drsas_sge64 *)&ldio->sgl;
+
+ context = ddi_get32(acc_handle, &ldio->context);
+
+ if (acmd->cmd_cdblen == CDB_GROUP0) {
+ ddi_put32(acc_handle, &ldio->lba_count, (
+ (uint16_t)(pkt->pkt_cdbp[4])));
+
+ ddi_put32(acc_handle, &ldio->start_lba_lo, (
+ ((uint32_t)(pkt->pkt_cdbp[3])) |
+ ((uint32_t)(pkt->pkt_cdbp[2]) << 8) |
+ ((uint32_t)((pkt->pkt_cdbp[1]) & 0x1F)
+ << 16)));
+ } else if (acmd->cmd_cdblen == CDB_GROUP1) {
+ ddi_put32(acc_handle, &ldio->lba_count, (
+ ((uint16_t)(pkt->pkt_cdbp[8])) |
+ ((uint16_t)(pkt->pkt_cdbp[7]) << 8)));
+
+ ddi_put32(acc_handle, &ldio->start_lba_lo, (
+ ((uint32_t)(pkt->pkt_cdbp[5])) |
+ ((uint32_t)(pkt->pkt_cdbp[4]) << 8) |
+ ((uint32_t)(pkt->pkt_cdbp[3]) << 16) |
+ ((uint32_t)(pkt->pkt_cdbp[2]) << 24)));
+ } else if (acmd->cmd_cdblen == CDB_GROUP2) {
+ ddi_put32(acc_handle, &ldio->lba_count, (
+ ((uint16_t)(pkt->pkt_cdbp[9])) |
+ ((uint16_t)(pkt->pkt_cdbp[8]) << 8) |
+ ((uint16_t)(pkt->pkt_cdbp[7]) << 16) |
+ ((uint16_t)(pkt->pkt_cdbp[6]) << 24)));
+
+ ddi_put32(acc_handle, &ldio->start_lba_lo, (
+ ((uint32_t)(pkt->pkt_cdbp[5])) |
+ ((uint32_t)(pkt->pkt_cdbp[4]) << 8) |
+ ((uint32_t)(pkt->pkt_cdbp[3]) << 16) |
+ ((uint32_t)(pkt->pkt_cdbp[2]) << 24)));
+ } else if (acmd->cmd_cdblen == CDB_GROUP3) {
+ ddi_put32(acc_handle, &ldio->lba_count, (
+ ((uint16_t)(pkt->pkt_cdbp[13])) |
+ ((uint16_t)(pkt->pkt_cdbp[12]) << 8) |
+ ((uint16_t)(pkt->pkt_cdbp[11]) << 16) |
+ ((uint16_t)(pkt->pkt_cdbp[10]) << 24)));
+
+ ddi_put32(acc_handle, &ldio->start_lba_lo, (
+ ((uint32_t)(pkt->pkt_cdbp[9])) |
+ ((uint32_t)(pkt->pkt_cdbp[8]) << 8) |
+ ((uint32_t)(pkt->pkt_cdbp[7]) << 16) |
+ ((uint32_t)(pkt->pkt_cdbp[6]) << 24)));
+
+ ddi_put32(acc_handle, &ldio->start_lba_lo, (
+ ((uint32_t)(pkt->pkt_cdbp[5])) |
+ ((uint32_t)(pkt->pkt_cdbp[4]) << 8) |
+ ((uint32_t)(pkt->pkt_cdbp[3]) << 16) |
+ ((uint32_t)(pkt->pkt_cdbp[2]) << 24)));
+ }
+
+ break;
+ }
+ /* fall through For all non-rd/wr cmds */
+ default:
+
+ switch (pkt->pkt_cdbp[0]) {
+ case SCMD_MODE_SENSE:
+ case SCMD_MODE_SENSE_G1: {
+ union scsi_cdb *cdbp;
+ uint16_t page_code;
+
+ cdbp = (void *)pkt->pkt_cdbp;
+ page_code = (uint16_t)cdbp->cdb_un.sg.scsi[0];
+ switch (page_code) {
+ case 0x3:
+ case 0x4:
+ (void) drsas_mode_sense_build(pkt);
+ return_mfi_pkt(instance, cmd);
+ *cmd_done = 1;
+ return (NULL);
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ pthru = (struct drsas_pthru_frame *)cmd->frame;
+
+ /* prepare the DCDB frame */
+ ddi_put8(acc_handle, &pthru->cmd, (acmd->islogical) ?
+ MFI_CMD_OP_LD_SCSI : MFI_CMD_OP_PD_SCSI);
+ ddi_put8(acc_handle, &pthru->cmd_status, 0x0);
+ ddi_put8(acc_handle, &pthru->scsi_status, 0x0);
+ ddi_put8(acc_handle, &pthru->target_id, acmd->device_id);
+ ddi_put8(acc_handle, &pthru->lun, 0);
+ ddi_put8(acc_handle, &pthru->cdb_len, acmd->cmd_cdblen);
+ ddi_put16(acc_handle, &pthru->timeout, 0);
+ ddi_put16(acc_handle, &pthru->flags, flags);
+ ddi_put32(acc_handle, &pthru->data_xfer_len,
+ acmd->cmd_dmacount);
+ ddi_put8(acc_handle, &pthru->sge_count, acmd->cmd_cookiecnt);
+ mfi_sgl = (struct drsas_sge64 *)&pthru->sgl;
+
+ bzero(cmd->sense, SENSE_LENGTH);
+ ddi_put8(acc_handle, &pthru->sense_len, SENSE_LENGTH);
+ ddi_put32(acc_handle, &pthru->sense_buf_phys_addr_hi, 0);
+ ddi_put32(acc_handle, &pthru->sense_buf_phys_addr_lo,
+ cmd->sense_phys_addr);
+
+ context = ddi_get32(acc_handle, &pthru->context);
+ ddi_rep_put8(acc_handle, (uint8_t *)pkt->pkt_cdbp,
+ (uint8_t *)pthru->cdb, acmd->cmd_cdblen, DDI_DEV_AUTOINCR);
+
+ break;
+ }
+#ifdef lint
+ context = context;
+#endif
+ /* prepare the scatter-gather list for the firmware */
+ for (i = 0; i < acmd->cmd_cookiecnt; i++, mfi_sgl++) {
+ ddi_put64(acc_handle, &mfi_sgl->phys_addr,
+ acmd->cmd_dmacookies[i].dmac_laddress);
+ ddi_put32(acc_handle, &mfi_sgl->length,
+ acmd->cmd_dmacookies[i].dmac_size);
+ }
+
+ sge_bytes = sizeof (struct drsas_sge64)*acmd->cmd_cookiecnt;
+
+ cmd->frame_count = (sge_bytes / MRMFI_FRAME_SIZE) +
+ ((sge_bytes % MRMFI_FRAME_SIZE) ? 1 : 0) + 1;
+
+ if (cmd->frame_count >= 8) {
+ cmd->frame_count = 8;
+ }
+
+ return (cmd);
+}
+
+/*
+ * issue_mfi_pthru
+ */
+static int
+issue_mfi_pthru(struct drsas_instance *instance, struct drsas_ioctl *ioctl,
+ struct drsas_cmd *cmd, int mode)
+{
+ void *ubuf;
+ uint32_t kphys_addr = 0;
+ uint32_t xferlen = 0;
+ uint_t model;
+ ddi_acc_handle_t acc_handle = cmd->frame_dma_obj.acc_handle;
+ dma_obj_t pthru_dma_obj;
+ struct drsas_pthru_frame *kpthru;
+ struct drsas_pthru_frame *pthru;
+ int i;
+ pthru = &cmd->frame->pthru;
+ kpthru = (struct drsas_pthru_frame *)&ioctl->frame[0];
+
+ model = ddi_model_convert_from(mode & FMODELS);
+ if (model == DDI_MODEL_ILP32) {
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_pthru: DDI_MODEL_LP32"));
+
+ xferlen = kpthru->sgl.sge32[0].length;
+
+ ubuf = (void *)(ulong_t)kpthru->sgl.sge32[0].phys_addr;
+ } else {
+#ifdef _ILP32
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_pthru: DDI_MODEL_LP32"));
+ xferlen = kpthru->sgl.sge32[0].length;
+ ubuf = (void *)(ulong_t)kpthru->sgl.sge32[0].phys_addr;
+#else
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_pthru: DDI_MODEL_LP64"));
+ xferlen = kpthru->sgl.sge64[0].length;
+ ubuf = (void *)(ulong_t)kpthru->sgl.sge64[0].phys_addr;
+#endif
+ }
+
+ if (xferlen) {
+ /* means IOCTL requires DMA */
+ /* allocate the data transfer buffer */
+ pthru_dma_obj.size = xferlen;
+ pthru_dma_obj.dma_attr = drsas_generic_dma_attr;
+ pthru_dma_obj.dma_attr.dma_attr_addr_hi = 0xFFFFFFFFU;
+ pthru_dma_obj.dma_attr.dma_attr_count_max = 0xFFFFFFFFU;
+ pthru_dma_obj.dma_attr.dma_attr_sgllen = 1;
+ pthru_dma_obj.dma_attr.dma_attr_align = 1;
+
+ /* allocate kernel buffer for DMA */
+ if (drsas_alloc_dma_obj(instance, &pthru_dma_obj,
+ (uchar_t)DDI_STRUCTURE_LE_ACC) != 1) {
+ con_log(CL_ANN, (CE_WARN, "issue_mfi_pthru: "
+ "could not allocate data transfer buffer."));
+ return (DDI_FAILURE);
+ }
+
+ /* If IOCTL requires DMA WRITE, do ddi_copyin IOCTL data copy */
+ if (kpthru->flags & MFI_FRAME_DIR_WRITE) {
+ for (i = 0; i < xferlen; i++) {
+ if (ddi_copyin((uint8_t *)ubuf+i,
+ (uint8_t *)pthru_dma_obj.buffer+i,
+ 1, mode)) {
+ con_log(CL_ANN, (CE_WARN,
+ "issue_mfi_pthru : "
+ "copy from user space failed"));
+ return (DDI_FAILURE);
+ }
+ }
+ }
+
+ kphys_addr = pthru_dma_obj.dma_cookie[0].dmac_address;
+ }
+
+ ddi_put8(acc_handle, &pthru->cmd, kpthru->cmd);
+ ddi_put8(acc_handle, &pthru->sense_len, kpthru->sense_len);
+ ddi_put8(acc_handle, &pthru->cmd_status, 0);
+ ddi_put8(acc_handle, &pthru->scsi_status, 0);
+ ddi_put8(acc_handle, &pthru->target_id, kpthru->target_id);
+ ddi_put8(acc_handle, &pthru->lun, kpthru->lun);
+ ddi_put8(acc_handle, &pthru->cdb_len, kpthru->cdb_len);
+ ddi_put8(acc_handle, &pthru->sge_count, kpthru->sge_count);
+ ddi_put16(acc_handle, &pthru->timeout, kpthru->timeout);
+ ddi_put32(acc_handle, &pthru->data_xfer_len, kpthru->data_xfer_len);
+
+ ddi_put32(acc_handle, &pthru->sense_buf_phys_addr_hi, 0);
+ /* pthru->sense_buf_phys_addr_lo = cmd->sense_phys_addr; */
+ ddi_put32(acc_handle, &pthru->sense_buf_phys_addr_lo, 0);
+
+ ddi_rep_put8(acc_handle, (uint8_t *)kpthru->cdb, (uint8_t *)pthru->cdb,
+ pthru->cdb_len, DDI_DEV_AUTOINCR);
+
+ ddi_put16(acc_handle, &pthru->flags, kpthru->flags & ~MFI_FRAME_SGL64);
+ ddi_put32(acc_handle, &pthru->sgl.sge32[0].length, xferlen);
+ ddi_put32(acc_handle, &pthru->sgl.sge32[0].phys_addr, kphys_addr);
+
+ cmd->sync_cmd = DRSAS_TRUE;
+ cmd->frame_count = 1;
+
+ if (instance->func_ptr->issue_cmd_in_sync_mode(instance, cmd)) {
+ con_log(CL_ANN, (CE_WARN,
+ "issue_mfi_pthru: fw_ioctl failed"));
+ } else {
+ if (xferlen && kpthru->flags & MFI_FRAME_DIR_READ) {
+ for (i = 0; i < xferlen; i++) {
+ if (ddi_copyout(
+ (uint8_t *)pthru_dma_obj.buffer+i,
+ (uint8_t *)ubuf+i, 1, mode)) {
+ con_log(CL_ANN, (CE_WARN,
+ "issue_mfi_pthru : "
+ "copy to user space failed"));
+ return (DDI_FAILURE);
+ }
+ }
+ }
+ }
+
+ kpthru->cmd_status = ddi_get8(acc_handle, &pthru->cmd_status);
+ kpthru->scsi_status = ddi_get8(acc_handle, &pthru->scsi_status);
+
+ con_log(CL_ANN, (CE_NOTE, "issue_mfi_pthru: cmd_status %x, "
+ "scsi_status %x", kpthru->cmd_status, kpthru->scsi_status));
+
+ if (xferlen) {
+ /* free kernel buffer */
+ if (drsas_free_dma_obj(instance, pthru_dma_obj) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+ }
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * issue_mfi_dcmd
+ */
+static int
+issue_mfi_dcmd(struct drsas_instance *instance, struct drsas_ioctl *ioctl,
+ struct drsas_cmd *cmd, int mode)
+{
+ void *ubuf;
+ uint32_t kphys_addr = 0;
+ uint32_t xferlen = 0;
+ uint32_t model;
+ dma_obj_t dcmd_dma_obj;
+ struct drsas_dcmd_frame *kdcmd;
+ struct drsas_dcmd_frame *dcmd;
+ ddi_acc_handle_t acc_handle = cmd->frame_dma_obj.acc_handle;
+ int i;
+ dcmd = &cmd->frame->dcmd;
+ kdcmd = (struct drsas_dcmd_frame *)&ioctl->frame[0];
+
+ model = ddi_model_convert_from(mode & FMODELS);
+ if (model == DDI_MODEL_ILP32) {
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_dcmd: DDI_MODEL_ILP32"));
+
+ xferlen = kdcmd->sgl.sge32[0].length;
+
+ ubuf = (void *)(ulong_t)kdcmd->sgl.sge32[0].phys_addr;
+ } else {
+#ifdef _ILP32
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_dcmd: DDI_MODEL_ILP32"));
+ xferlen = kdcmd->sgl.sge32[0].length;
+ ubuf = (void *)(ulong_t)kdcmd->sgl.sge32[0].phys_addr;
+#else
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_dcmd: DDI_MODEL_LP64"));
+ xferlen = kdcmd->sgl.sge64[0].length;
+ ubuf = (void *)(ulong_t)kdcmd->sgl.sge64[0].phys_addr;
+#endif
+ }
+ if (xferlen) {
+ /* means IOCTL requires DMA */
+ /* allocate the data transfer buffer */
+ dcmd_dma_obj.size = xferlen;
+ dcmd_dma_obj.dma_attr = drsas_generic_dma_attr;
+ dcmd_dma_obj.dma_attr.dma_attr_addr_hi = 0xFFFFFFFFU;
+ dcmd_dma_obj.dma_attr.dma_attr_count_max = 0xFFFFFFFFU;
+ dcmd_dma_obj.dma_attr.dma_attr_sgllen = 1;
+ dcmd_dma_obj.dma_attr.dma_attr_align = 1;
+
+ /* allocate kernel buffer for DMA */
+ if (drsas_alloc_dma_obj(instance, &dcmd_dma_obj,
+ (uchar_t)DDI_STRUCTURE_LE_ACC) != 1) {
+ con_log(CL_ANN, (CE_WARN, "issue_mfi_dcmd: "
+ "could not allocate data transfer buffer."));
+ return (DDI_FAILURE);
+ }
+
+ /* If IOCTL requires DMA WRITE, do ddi_copyin IOCTL data copy */
+ if (kdcmd->flags & MFI_FRAME_DIR_WRITE) {
+ for (i = 0; i < xferlen; i++) {
+ if (ddi_copyin((uint8_t *)ubuf + i,
+ (uint8_t *)dcmd_dma_obj.buffer + i,
+ 1, mode)) {
+ con_log(CL_ANN, (CE_WARN,
+ "issue_mfi_dcmd : "
+ "copy from user space failed"));
+ return (DDI_FAILURE);
+ }
+ }
+ }
+
+ kphys_addr = dcmd_dma_obj.dma_cookie[0].dmac_address;
+ }
+
+ ddi_put8(acc_handle, &dcmd->cmd, kdcmd->cmd);
+ ddi_put8(acc_handle, &dcmd->cmd_status, 0);
+ ddi_put8(acc_handle, &dcmd->sge_count, kdcmd->sge_count);
+ ddi_put16(acc_handle, &dcmd->timeout, kdcmd->timeout);
+ ddi_put32(acc_handle, &dcmd->data_xfer_len, kdcmd->data_xfer_len);
+ ddi_put32(acc_handle, &dcmd->opcode, kdcmd->opcode);
+
+ ddi_rep_put8(acc_handle, (uint8_t *)kdcmd->mbox.b,
+ (uint8_t *)dcmd->mbox.b, DCMD_MBOX_SZ, DDI_DEV_AUTOINCR);
+
+ ddi_put16(acc_handle, &dcmd->flags, kdcmd->flags & ~MFI_FRAME_SGL64);
+ ddi_put32(acc_handle, &dcmd->sgl.sge32[0].length, xferlen);
+ ddi_put32(acc_handle, &dcmd->sgl.sge32[0].phys_addr, kphys_addr);
+
+ cmd->sync_cmd = DRSAS_TRUE;
+ cmd->frame_count = 1;
+
+ if (instance->func_ptr->issue_cmd_in_sync_mode(instance, cmd)) {
+ con_log(CL_ANN, (CE_WARN, "issue_mfi_dcmd: fw_ioctl failed"));
+ } else {
+ if (xferlen && (kdcmd->flags & MFI_FRAME_DIR_READ)) {
+ for (i = 0; i < xferlen; i++) {
+ if (ddi_copyout(
+ (uint8_t *)dcmd_dma_obj.buffer + i,
+ (uint8_t *)ubuf + i,
+ 1, mode)) {
+ con_log(CL_ANN, (CE_WARN,
+ "issue_mfi_dcmd : "
+ "copy to user space failed"));
+ return (DDI_FAILURE);
+ }
+ }
+ }
+ }
+
+ kdcmd->cmd_status = ddi_get8(acc_handle, &dcmd->cmd_status);
+
+ if (xferlen) {
+ /* free kernel buffer */
+ if (drsas_free_dma_obj(instance, dcmd_dma_obj) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+ }
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * issue_mfi_smp
+ */
+static int
+issue_mfi_smp(struct drsas_instance *instance, struct drsas_ioctl *ioctl,
+ struct drsas_cmd *cmd, int mode)
+{
+ void *request_ubuf;
+ void *response_ubuf;
+ uint32_t request_xferlen = 0;
+ uint32_t response_xferlen = 0;
+ uint_t model;
+ dma_obj_t request_dma_obj;
+ dma_obj_t response_dma_obj;
+ ddi_acc_handle_t acc_handle = cmd->frame_dma_obj.acc_handle;
+ struct drsas_smp_frame *ksmp;
+ struct drsas_smp_frame *smp;
+ struct drsas_sge32 *sge32;
+#ifndef _ILP32
+ struct drsas_sge64 *sge64;
+#endif
+ int i;
+ uint64_t tmp_sas_addr;
+
+ smp = &cmd->frame->smp;
+ ksmp = (struct drsas_smp_frame *)&ioctl->frame[0];
+
+ model = ddi_model_convert_from(mode & FMODELS);
+ if (model == DDI_MODEL_ILP32) {
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_smp: DDI_MODEL_ILP32"));
+
+ sge32 = &ksmp->sgl[0].sge32[0];
+ response_xferlen = sge32[0].length;
+ request_xferlen = sge32[1].length;
+ con_log(CL_ANN, (CE_NOTE, "issue_mfi_smp: "
+ "response_xferlen = %x, request_xferlen = %x",
+ response_xferlen, request_xferlen));
+
+ response_ubuf = (void *)(ulong_t)sge32[0].phys_addr;
+ request_ubuf = (void *)(ulong_t)sge32[1].phys_addr;
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_smp: "
+ "response_ubuf = %p, request_ubuf = %p",
+ response_ubuf, request_ubuf));
+ } else {
+#ifdef _ILP32
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_smp: DDI_MODEL_ILP32"));
+
+ sge32 = &ksmp->sgl[0].sge32[0];
+ response_xferlen = sge32[0].length;
+ request_xferlen = sge32[1].length;
+ con_log(CL_ANN, (CE_NOTE, "issue_mfi_smp: "
+ "response_xferlen = %x, request_xferlen = %x",
+ response_xferlen, request_xferlen));
+
+ response_ubuf = (void *)(ulong_t)sge32[0].phys_addr;
+ request_ubuf = (void *)(ulong_t)sge32[1].phys_addr;
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_smp: "
+ "response_ubuf = %p, request_ubuf = %p",
+ response_ubuf, request_ubuf));
+#else
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_smp: DDI_MODEL_LP64"));
+
+ sge64 = &ksmp->sgl[0].sge64[0];
+ response_xferlen = sge64[0].length;
+ request_xferlen = sge64[1].length;
+
+ response_ubuf = (void *)(ulong_t)sge64[0].phys_addr;
+ request_ubuf = (void *)(ulong_t)sge64[1].phys_addr;
+#endif
+ }
+ if (request_xferlen) {
+ /* means IOCTL requires DMA */
+ /* allocate the data transfer buffer */
+ request_dma_obj.size = request_xferlen;
+ request_dma_obj.dma_attr = drsas_generic_dma_attr;
+ request_dma_obj.dma_attr.dma_attr_addr_hi = 0xFFFFFFFFU;
+ request_dma_obj.dma_attr.dma_attr_count_max = 0xFFFFFFFFU;
+ request_dma_obj.dma_attr.dma_attr_sgllen = 1;
+ request_dma_obj.dma_attr.dma_attr_align = 1;
+
+ /* allocate kernel buffer for DMA */
+ if (drsas_alloc_dma_obj(instance, &request_dma_obj,
+ (uchar_t)DDI_STRUCTURE_LE_ACC) != 1) {
+ con_log(CL_ANN, (CE_WARN, "issue_mfi_smp: "
+ "could not allocate data transfer buffer."));
+ return (DDI_FAILURE);
+ }
+
+ /* If IOCTL requires DMA WRITE, do ddi_copyin IOCTL data copy */
+ for (i = 0; i < request_xferlen; i++) {
+ if (ddi_copyin((uint8_t *)request_ubuf + i,
+ (uint8_t *)request_dma_obj.buffer + i,
+ 1, mode)) {
+ con_log(CL_ANN, (CE_WARN, "issue_mfi_smp: "
+ "copy from user space failed"));
+ return (DDI_FAILURE);
+ }
+ }
+ }
+
+ if (response_xferlen) {
+ /* means IOCTL requires DMA */
+ /* allocate the data transfer buffer */
+ response_dma_obj.size = response_xferlen;
+ response_dma_obj.dma_attr = drsas_generic_dma_attr;
+ response_dma_obj.dma_attr.dma_attr_addr_hi = 0xFFFFFFFFU;
+ response_dma_obj.dma_attr.dma_attr_count_max = 0xFFFFFFFFU;
+ response_dma_obj.dma_attr.dma_attr_sgllen = 1;
+ response_dma_obj.dma_attr.dma_attr_align = 1;
+
+ /* allocate kernel buffer for DMA */
+ if (drsas_alloc_dma_obj(instance, &response_dma_obj,
+ (uchar_t)DDI_STRUCTURE_LE_ACC) != 1) {
+ con_log(CL_ANN, (CE_WARN, "issue_mfi_smp: "
+ "could not allocate data transfer buffer."));
+ return (DDI_FAILURE);
+ }
+
+ /* If IOCTL requires DMA WRITE, do ddi_copyin IOCTL data copy */
+ for (i = 0; i < response_xferlen; i++) {
+ if (ddi_copyin((uint8_t *)response_ubuf + i,
+ (uint8_t *)response_dma_obj.buffer + i,
+ 1, mode)) {
+ con_log(CL_ANN, (CE_WARN, "issue_mfi_smp: "
+ "copy from user space failed"));
+ return (DDI_FAILURE);
+ }
+ }
+ }
+
+ ddi_put8(acc_handle, &smp->cmd, ksmp->cmd);
+ ddi_put8(acc_handle, &smp->cmd_status, 0);
+ ddi_put8(acc_handle, &smp->connection_status, 0);
+ ddi_put8(acc_handle, &smp->sge_count, ksmp->sge_count);
+ /* smp->context = ksmp->context; */
+ ddi_put16(acc_handle, &smp->timeout, ksmp->timeout);
+ ddi_put32(acc_handle, &smp->data_xfer_len, ksmp->data_xfer_len);
+
+ bcopy((void *)&ksmp->sas_addr, (void *)&tmp_sas_addr,
+ sizeof (uint64_t));
+ ddi_put64(acc_handle, &smp->sas_addr, tmp_sas_addr);
+
+ ddi_put16(acc_handle, &smp->flags, ksmp->flags & ~MFI_FRAME_SGL64);
+
+ model = ddi_model_convert_from(mode & FMODELS);
+ if (model == DDI_MODEL_ILP32) {
+ con_log(CL_ANN1, (CE_NOTE,
+ "handle_drv_ioctl: DDI_MODEL_ILP32"));
+
+ sge32 = &smp->sgl[0].sge32[0];
+ ddi_put32(acc_handle, &sge32[0].length, response_xferlen);
+ ddi_put32(acc_handle, &sge32[0].phys_addr,
+ response_dma_obj.dma_cookie[0].dmac_address);
+ ddi_put32(acc_handle, &sge32[1].length, request_xferlen);
+ ddi_put32(acc_handle, &sge32[1].phys_addr,
+ request_dma_obj.dma_cookie[0].dmac_address);
+ } else {
+#ifdef _ILP32
+ con_log(CL_ANN1, (CE_NOTE,
+ "handle_drv_ioctl: DDI_MODEL_ILP32"));
+ sge32 = &smp->sgl[0].sge32[0];
+ ddi_put32(acc_handle, &sge32[0].length, response_xferlen);
+ ddi_put32(acc_handle, &sge32[0].phys_addr,
+ response_dma_obj.dma_cookie[0].dmac_address);
+ ddi_put32(acc_handle, &sge32[1].length, request_xferlen);
+ ddi_put32(acc_handle, &sge32[1].phys_addr,
+ request_dma_obj.dma_cookie[0].dmac_address);
+#else
+ con_log(CL_ANN1, (CE_NOTE,
+ "issue_mfi_smp: DDI_MODEL_LP64"));
+ sge64 = &smp->sgl[0].sge64[0];
+ ddi_put32(acc_handle, &sge64[0].length, response_xferlen);
+ ddi_put64(acc_handle, &sge64[0].phys_addr,
+ response_dma_obj.dma_cookie[0].dmac_address);
+ ddi_put32(acc_handle, &sge64[1].length, request_xferlen);
+ ddi_put64(acc_handle, &sge64[1].phys_addr,
+ request_dma_obj.dma_cookie[0].dmac_address);
+#endif
+ }
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_smp : "
+ "smp->response_xferlen = %d, smp->request_xferlen = %d "
+ "smp->data_xfer_len = %d", ddi_get32(acc_handle, &sge32[0].length),
+ ddi_get32(acc_handle, &sge32[1].length),
+ ddi_get32(acc_handle, &smp->data_xfer_len)));
+
+ cmd->sync_cmd = DRSAS_TRUE;
+ cmd->frame_count = 1;
+
+ if (instance->func_ptr->issue_cmd_in_sync_mode(instance, cmd)) {
+ con_log(CL_ANN, (CE_WARN,
+ "issue_mfi_smp: fw_ioctl failed"));
+ } else {
+ con_log(CL_ANN1, (CE_NOTE,
+ "issue_mfi_smp: copy to user space"));
+
+ if (request_xferlen) {
+ for (i = 0; i < request_xferlen; i++) {
+ if (ddi_copyout(
+ (uint8_t *)request_dma_obj.buffer +
+ i, (uint8_t *)request_ubuf + i,
+ 1, mode)) {
+ con_log(CL_ANN, (CE_WARN,
+ "issue_mfi_smp : copy to user space"
+ " failed"));
+ return (DDI_FAILURE);
+ }
+ }
+ }
+
+ if (response_xferlen) {
+ for (i = 0; i < response_xferlen; i++) {
+ if (ddi_copyout(
+ (uint8_t *)response_dma_obj.buffer
+ + i, (uint8_t *)response_ubuf
+ + i, 1, mode)) {
+ con_log(CL_ANN, (CE_WARN,
+ "issue_mfi_smp : copy to "
+ "user space failed"));
+ return (DDI_FAILURE);
+ }
+ }
+ }
+ }
+
+ ksmp->cmd_status = ddi_get8(acc_handle, &smp->cmd_status);
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_smp: smp->cmd_status = %d",
+ ddi_get8(acc_handle, &smp->cmd_status)));
+
+
+ if (request_xferlen) {
+ /* free kernel buffer */
+ if (drsas_free_dma_obj(instance, request_dma_obj) !=
+ DDI_SUCCESS)
+ return (DDI_FAILURE);
+ }
+
+ if (response_xferlen) {
+ /* free kernel buffer */
+ if (drsas_free_dma_obj(instance, response_dma_obj) !=
+ DDI_SUCCESS)
+ return (DDI_FAILURE);
+ }
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * issue_mfi_stp
+ */
+static int
+issue_mfi_stp(struct drsas_instance *instance, struct drsas_ioctl *ioctl,
+ struct drsas_cmd *cmd, int mode)
+{
+ void *fis_ubuf;
+ void *data_ubuf;
+ uint32_t fis_xferlen = 0;
+ uint32_t data_xferlen = 0;
+ uint_t model;
+ dma_obj_t fis_dma_obj;
+ dma_obj_t data_dma_obj;
+ struct drsas_stp_frame *kstp;
+ struct drsas_stp_frame *stp;
+ ddi_acc_handle_t acc_handle = cmd->frame_dma_obj.acc_handle;
+ int i;
+
+ stp = &cmd->frame->stp;
+ kstp = (struct drsas_stp_frame *)&ioctl->frame[0];
+
+ model = ddi_model_convert_from(mode & FMODELS);
+ if (model == DDI_MODEL_ILP32) {
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_stp: DDI_MODEL_ILP32"));
+
+ fis_xferlen = kstp->sgl.sge32[0].length;
+ data_xferlen = kstp->sgl.sge32[1].length;
+
+ fis_ubuf = (void *)(ulong_t)kstp->sgl.sge32[0].phys_addr;
+ data_ubuf = (void *)(ulong_t)kstp->sgl.sge32[1].phys_addr;
+ }
+ else
+ {
+#ifdef _ILP32
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_stp: DDI_MODEL_ILP32"));
+
+ fis_xferlen = kstp->sgl.sge32[0].length;
+ data_xferlen = kstp->sgl.sge32[1].length;
+
+ fis_ubuf = (void *)(ulong_t)kstp->sgl.sge32[0].phys_addr;
+ data_ubuf = (void *)(ulong_t)kstp->sgl.sge32[1].phys_addr;
+#else
+ con_log(CL_ANN1, (CE_NOTE, "issue_mfi_stp: DDI_MODEL_LP64"));
+
+ fis_xferlen = kstp->sgl.sge64[0].length;
+ data_xferlen = kstp->sgl.sge64[1].length;
+
+ fis_ubuf = (void *)(ulong_t)kstp->sgl.sge64[0].phys_addr;
+ data_ubuf = (void *)(ulong_t)kstp->sgl.sge64[1].phys_addr;
+#endif
+ }
+
+
+ if (fis_xferlen) {
+ con_log(CL_ANN, (CE_NOTE, "issue_mfi_stp: "
+ "fis_ubuf = %p fis_xferlen = %x", fis_ubuf, fis_xferlen));
+
+ /* means IOCTL requires DMA */
+ /* allocate the data transfer buffer */
+ fis_dma_obj.size = fis_xferlen;
+ fis_dma_obj.dma_attr = drsas_generic_dma_attr;
+ fis_dma_obj.dma_attr.dma_attr_addr_hi = 0xFFFFFFFFU;
+ fis_dma_obj.dma_attr.dma_attr_count_max = 0xFFFFFFFFU;
+ fis_dma_obj.dma_attr.dma_attr_sgllen = 1;
+ fis_dma_obj.dma_attr.dma_attr_align = 1;
+
+ /* allocate kernel buffer for DMA */
+ if (drsas_alloc_dma_obj(instance, &fis_dma_obj,
+ (uchar_t)DDI_STRUCTURE_LE_ACC) != 1) {
+ con_log(CL_ANN, (CE_WARN, "issue_mfi_stp : "
+ "could not allocate data transfer buffer."));
+ return (DDI_FAILURE);
+ }
+
+ /* If IOCTL requires DMA WRITE, do ddi_copyin IOCTL data copy */
+ for (i = 0; i < fis_xferlen; i++) {
+ if (ddi_copyin((uint8_t *)fis_ubuf + i,
+ (uint8_t *)fis_dma_obj.buffer + i, 1, mode)) {
+ con_log(CL_ANN, (CE_WARN, "issue_mfi_stp: "
+ "copy from user space failed"));
+ return (DDI_FAILURE);
+ }
+ }
+ }
+
+ if (data_xferlen) {
+ con_log(CL_ANN, (CE_NOTE, "issue_mfi_stp: data_ubuf = %p "
+ "data_xferlen = %x", data_ubuf, data_xferlen));
+
+ /* means IOCTL requires DMA */
+ /* allocate the data transfer buffer */
+ data_dma_obj.size = data_xferlen;
+ data_dma_obj.dma_attr = drsas_generic_dma_attr;
+ data_dma_obj.dma_attr.dma_attr_addr_hi = 0xFFFFFFFFU;
+ data_dma_obj.dma_attr.dma_attr_count_max = 0xFFFFFFFFU;
+ data_dma_obj.dma_attr.dma_attr_sgllen = 1;
+ data_dma_obj.dma_attr.dma_attr_align = 1;
+
+/* allocate kernel buffer for DMA */
+ if (drsas_alloc_dma_obj(instance, &data_dma_obj,
+ (uchar_t)DDI_STRUCTURE_LE_ACC) != 1) {
+ con_log(CL_ANN, (CE_WARN, "issue_mfi_stp: "
+ "could not allocate data transfer buffer."));
+ return (DDI_FAILURE);
+ }
+
+ /* If IOCTL requires DMA WRITE, do ddi_copyin IOCTL data copy */
+ for (i = 0; i < data_xferlen; i++) {
+ if (ddi_copyin((uint8_t *)data_ubuf + i,
+ (uint8_t *)data_dma_obj.buffer + i, 1, mode)) {
+ con_log(CL_ANN, (CE_WARN, "issue_mfi_stp: "
+ "copy from user space failed"));
+ return (DDI_FAILURE);
+ }
+ }
+ }
+
+ ddi_put8(acc_handle, &stp->cmd, kstp->cmd);
+ ddi_put8(acc_handle, &stp->cmd_status, 0);
+ ddi_put8(acc_handle, &stp->connection_status, 0);
+ ddi_put8(acc_handle, &stp->target_id, kstp->target_id);
+ ddi_put8(acc_handle, &stp->sge_count, kstp->sge_count);
+
+ ddi_put16(acc_handle, &stp->timeout, kstp->timeout);
+ ddi_put32(acc_handle, &stp->data_xfer_len, kstp->data_xfer_len);
+
+ ddi_rep_put8(acc_handle, (uint8_t *)kstp->fis, (uint8_t *)stp->fis, 10,
+ DDI_DEV_AUTOINCR);
+
+ ddi_put16(acc_handle, &stp->flags, kstp->flags & ~MFI_FRAME_SGL64);
+ ddi_put32(acc_handle, &stp->stp_flags, kstp->stp_flags);
+ ddi_put32(acc_handle, &stp->sgl.sge32[0].length, fis_xferlen);
+ ddi_put32(acc_handle, &stp->sgl.sge32[0].phys_addr,
+ fis_dma_obj.dma_cookie[0].dmac_address);
+ ddi_put32(acc_handle, &stp->sgl.sge32[1].length, data_xferlen);
+ ddi_put32(acc_handle, &stp->sgl.sge32[1].phys_addr,
+ data_dma_obj.dma_cookie[0].dmac_address);
+
+ cmd->sync_cmd = DRSAS_TRUE;
+ cmd->frame_count = 1;
+
+ if (instance->func_ptr->issue_cmd_in_sync_mode(instance, cmd)) {
+ con_log(CL_ANN, (CE_WARN, "issue_mfi_stp: fw_ioctl failed"));
+ } else {
+
+ if (fis_xferlen) {
+ for (i = 0; i < fis_xferlen; i++) {
+ if (ddi_copyout(
+ (uint8_t *)fis_dma_obj.buffer + i,
+ (uint8_t *)fis_ubuf + i, 1, mode)) {
+ con_log(CL_ANN, (CE_WARN,
+ "issue_mfi_stp : copy to "
+ "user space failed"));
+ return (DDI_FAILURE);
+ }
+ }
+ }
+ }
+ if (data_xferlen) {
+ for (i = 0; i < data_xferlen; i++) {
+ if (ddi_copyout(
+ (uint8_t *)data_dma_obj.buffer + i,
+ (uint8_t *)data_ubuf + i, 1, mode)) {
+ con_log(CL_ANN, (CE_WARN,
+ "issue_mfi_stp : copy to"
+ " user space failed"));
+ return (DDI_FAILURE);
+ }
+ }
+ }
+
+ kstp->cmd_status = ddi_get8(acc_handle, &stp->cmd_status);
+
+ if (fis_xferlen) {
+ /* free kernel buffer */
+ if (drsas_free_dma_obj(instance, fis_dma_obj) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+ }
+
+ if (data_xferlen) {
+ /* free kernel buffer */
+ if (drsas_free_dma_obj(instance, data_dma_obj) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+ }
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * fill_up_drv_ver
+ */
+static void
+fill_up_drv_ver(struct drsas_drv_ver *dv)
+{
+ (void) memset(dv, 0, sizeof (struct drsas_drv_ver));
+
+ (void) memcpy(dv->signature, "$LSI LOGIC$", strlen("$LSI LOGIC$"));
+ (void) memcpy(dv->os_name, "Solaris", strlen("Solaris"));
+ (void) memcpy(dv->drv_name, "dr_sas", strlen("dr_sas"));
+ (void) memcpy(dv->drv_ver, DRSAS_VERSION, strlen(DRSAS_VERSION));
+ (void) memcpy(dv->drv_rel_date, DRSAS_RELDATE,
+ strlen(DRSAS_RELDATE));
+}
+
+/*
+ * handle_drv_ioctl
+ */
+static int
+handle_drv_ioctl(struct drsas_instance *instance, struct drsas_ioctl *ioctl,
+ int mode)
+{
+ int i;
+ int rval = DDI_SUCCESS;
+ int *props = NULL;
+ void *ubuf;
+
+ uint8_t *pci_conf_buf;
+ uint32_t xferlen;
+ uint32_t num_props;
+ uint_t model;
+ struct drsas_dcmd_frame *kdcmd;
+ struct drsas_drv_ver dv;
+ struct drsas_pci_information pi;
+
+ kdcmd = (struct drsas_dcmd_frame *)&ioctl->frame[0];
+
+ model = ddi_model_convert_from(mode & FMODELS);
+ if (model == DDI_MODEL_ILP32) {
+ con_log(CL_ANN1, (CE_NOTE,
+ "handle_drv_ioctl: DDI_MODEL_ILP32"));
+
+ xferlen = kdcmd->sgl.sge32[0].length;
+
+ ubuf = (void *)(ulong_t)kdcmd->sgl.sge32[0].phys_addr;
+ } else {
+#ifdef _ILP32
+ con_log(CL_ANN1, (CE_NOTE,
+ "handle_drv_ioctl: DDI_MODEL_ILP32"));
+ xferlen = kdcmd->sgl.sge32[0].length;
+ ubuf = (void *)(ulong_t)kdcmd->sgl.sge32[0].phys_addr;
+#else
+ con_log(CL_ANN1, (CE_NOTE,
+ "handle_drv_ioctl: DDI_MODEL_LP64"));
+ xferlen = kdcmd->sgl.sge64[0].length;
+ ubuf = (void *)(ulong_t)kdcmd->sgl.sge64[0].phys_addr;
+#endif
+ }
+ con_log(CL_ANN1, (CE_NOTE, "handle_drv_ioctl: "
+ "dataBuf=%p size=%d bytes", ubuf, xferlen));
+
+ switch (kdcmd->opcode) {
+ case DRSAS_DRIVER_IOCTL_DRIVER_VERSION:
+ con_log(CL_ANN1, (CE_NOTE, "handle_drv_ioctl: "
+ "DRSAS_DRIVER_IOCTL_DRIVER_VERSION"));
+
+ fill_up_drv_ver(&dv);
+ for (i = 0; i < xferlen; i++) {
+ if (ddi_copyout((uint8_t *)&dv + i, (uint8_t *)ubuf + i,
+ 1, mode)) {
+ con_log(CL_ANN, (CE_WARN, "handle_drv_ioctl: "
+ "DRSAS_DRIVER_IOCTL_DRIVER_VERSION"
+ " : copy to user space failed"));
+ kdcmd->cmd_status = 1;
+ rval = DDI_FAILURE;
+ break;
+ }
+ }
+ if (i == xferlen)
+ kdcmd->cmd_status = 0;
+ break;
+ case DRSAS_DRIVER_IOCTL_PCI_INFORMATION:
+ con_log(CL_ANN1, (CE_NOTE, "handle_drv_ioctl: "
+ "DRSAS_DRIVER_IOCTL_PCI_INFORMAITON"));
+
+ if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, instance->dip,
+ 0, "reg", &props, &num_props)) {
+ con_log(CL_ANN, (CE_WARN, "handle_drv_ioctl: "
+ "DRSAS_DRIVER_IOCTL_PCI_INFORMATION : "
+ "ddi_prop_look_int_array failed"));
+ rval = DDI_FAILURE;
+ } else {
+
+ pi.busNumber = (props[0] >> 16) & 0xFF;
+ pi.deviceNumber = (props[0] >> 11) & 0x1f;
+ pi.functionNumber = (props[0] >> 8) & 0x7;
+ ddi_prop_free((void *)props);
+ }
+
+ pci_conf_buf = (uint8_t *)&pi.pciHeaderInfo;
+
+ for (i = 0; i < (sizeof (struct drsas_pci_information) -
+ offsetof(struct drsas_pci_information, pciHeaderInfo));
+ i++) {
+ pci_conf_buf[i] =
+ pci_config_get8(instance->pci_handle, i);
+ }
+ for (i = 0; i < xferlen; i++) {
+ if (ddi_copyout((uint8_t *)&pi + i, (uint8_t *)ubuf + i,
+ 1, mode)) {
+ con_log(CL_ANN, (CE_WARN, "handle_drv_ioctl: "
+ "DRSAS_DRIVER_IOCTL_PCI_INFORMATION"
+ " : copy to user space failed"));
+ kdcmd->cmd_status = 1;
+ rval = DDI_FAILURE;
+ break;
+ }
+ }
+
+ if (i == xferlen)
+ kdcmd->cmd_status = 0;
+
+ break;
+ default:
+ con_log(CL_ANN, (CE_WARN, "handle_drv_ioctl: "
+ "invalid driver specific IOCTL opcode = 0x%x",
+ kdcmd->opcode));
+ kdcmd->cmd_status = 1;
+ rval = DDI_FAILURE;
+ break;
+ }
+
+ return (rval);
+}
+
+/*
+ * handle_mfi_ioctl
+ */
+static int
+handle_mfi_ioctl(struct drsas_instance *instance, struct drsas_ioctl *ioctl,
+ int mode)
+{
+ int rval = DDI_SUCCESS;
+
+ struct drsas_header *hdr;
+ struct drsas_cmd *cmd;
+
+ cmd = get_mfi_pkt(instance);
+
+ if (!cmd) {
+ con_log(CL_ANN, (CE_WARN, "dr_sas: "
+ "failed to get a cmd packet"));
+ return (DDI_FAILURE);
+ }
+
+ /* Clear the frame buffer and assign back the context id */
+ (void) memset((char *)&cmd->frame[0], 0, sizeof (union drsas_frame));
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &cmd->frame->hdr.context,
+ cmd->index);
+
+ hdr = (struct drsas_header *)&ioctl->frame[0];
+
+ switch (hdr->cmd) {
+ case MFI_CMD_OP_DCMD:
+ rval = issue_mfi_dcmd(instance, ioctl, cmd, mode);
+ break;
+ case MFI_CMD_OP_SMP:
+ rval = issue_mfi_smp(instance, ioctl, cmd, mode);
+ break;
+ case MFI_CMD_OP_STP:
+ rval = issue_mfi_stp(instance, ioctl, cmd, mode);
+ break;
+ case MFI_CMD_OP_LD_SCSI:
+ case MFI_CMD_OP_PD_SCSI:
+ rval = issue_mfi_pthru(instance, ioctl, cmd, mode);
+ break;
+ default:
+ con_log(CL_ANN, (CE_WARN, "handle_mfi_ioctl: "
+ "invalid mfi ioctl hdr->cmd = %d", hdr->cmd));
+ rval = DDI_FAILURE;
+ break;
+ }
+
+
+ return_mfi_pkt(instance, cmd);
+ if (drsas_common_check(instance, cmd) != DDI_SUCCESS)
+ rval = DDI_FAILURE;
+ return (rval);
+}
+
+/*
+ * AEN
+ */
+static int
+handle_mfi_aen(struct drsas_instance *instance, struct drsas_aen *aen)
+{
+ int rval = 0;
+
+ rval = register_mfi_aen(instance, instance->aen_seq_num,
+ aen->class_locale_word);
+
+ aen->cmd_status = (uint8_t)rval;
+
+ return (rval);
+}
+
+static int
+register_mfi_aen(struct drsas_instance *instance, uint32_t seq_num,
+ uint32_t class_locale_word)
+{
+ int ret_val;
+
+ struct drsas_cmd *cmd, *aen_cmd;
+ struct drsas_dcmd_frame *dcmd;
+ union drsas_evt_class_locale curr_aen;
+ union drsas_evt_class_locale prev_aen;
+
+ /*
+ * If there an AEN pending already (aen_cmd), check if the
+ * class_locale of that pending AEN is inclusive of the new
+ * AEN request we currently have. If it is, then we don't have
+ * to do anything. In other words, whichever events the current
+ * AEN request is subscribing to, have already been subscribed
+ * to.
+ *
+ * If the old_cmd is _not_ inclusive, then we have to abort
+ * that command, form a class_locale that is superset of both
+ * old and current and re-issue to the FW
+ */
+
+ curr_aen.word = class_locale_word;
+ aen_cmd = instance->aen_cmd;
+ if (aen_cmd) {
+ prev_aen.word = ddi_get32(aen_cmd->frame_dma_obj.acc_handle,
+ &aen_cmd->frame->dcmd.mbox.w[1]);
+
+ /*
+ * A class whose enum value is smaller is inclusive of all
+ * higher values. If a PROGRESS (= -1) was previously
+ * registered, then a new registration requests for higher
+ * classes need not be sent to FW. They are automatically
+ * included.
+ *
+ * Locale numbers don't have such hierarchy. They are bitmap
+ * values
+ */
+ if ((prev_aen.members.class <= curr_aen.members.class) &&
+ !((prev_aen.members.locale & curr_aen.members.locale) ^
+ curr_aen.members.locale)) {
+ /*
+ * Previously issued event registration includes
+ * current request. Nothing to do.
+ */
+
+ return (0);
+ } else {
+ curr_aen.members.locale |= prev_aen.members.locale;
+
+ if (prev_aen.members.class < curr_aen.members.class)
+ curr_aen.members.class = prev_aen.members.class;
+
+ ret_val = abort_aen_cmd(instance, aen_cmd);
+
+ if (ret_val) {
+ con_log(CL_ANN, (CE_WARN, "register_mfi_aen: "
+ "failed to abort prevous AEN command"));
+
+ return (ret_val);
+ }
+ }
+ } else {
+ curr_aen.word = class_locale_word;
+ }
+
+ cmd = get_mfi_pkt(instance);
+
+ if (!cmd)
+ return (ENOMEM);
+ /* Clear the frame buffer and assign back the context id */
+ (void) memset((char *)&cmd->frame[0], 0, sizeof (union drsas_frame));
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &cmd->frame->hdr.context,
+ cmd->index);
+
+ dcmd = &cmd->frame->dcmd;
+
+ /* for(i = 0; i < DCMD_MBOX_SZ; i++) dcmd->mbox.b[i] = 0; */
+ (void) memset(dcmd->mbox.b, 0, DCMD_MBOX_SZ);
+
+ (void) memset(instance->mfi_evt_detail_obj.buffer, 0,
+ sizeof (struct drsas_evt_detail));
+
+ /* Prepare DCMD for aen registration */
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &dcmd->cmd, MFI_CMD_OP_DCMD);
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &dcmd->cmd_status, 0x0);
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &dcmd->sge_count, 1);
+ ddi_put16(cmd->frame_dma_obj.acc_handle, &dcmd->flags,
+ MFI_FRAME_DIR_READ);
+ ddi_put16(cmd->frame_dma_obj.acc_handle, &dcmd->timeout, 0);
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->data_xfer_len,
+ sizeof (struct drsas_evt_detail));
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->opcode,
+ DR_DCMD_CTRL_EVENT_WAIT);
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->mbox.w[0], seq_num);
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->mbox.w[1],
+ curr_aen.word);
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->sgl.sge32[0].phys_addr,
+ instance->mfi_evt_detail_obj.dma_cookie[0].dmac_address);
+ ddi_put32(cmd->frame_dma_obj.acc_handle, &dcmd->sgl.sge32[0].length,
+ sizeof (struct drsas_evt_detail));
+
+ instance->aen_seq_num = seq_num;
+
+
+ /*
+ * Store reference to the cmd used to register for AEN. When an
+ * application wants us to register for AEN, we have to abort this
+ * cmd and re-register with a new EVENT LOCALE supplied by that app
+ */
+ instance->aen_cmd = cmd;
+
+ cmd->frame_count = 1;
+
+ /* Issue the aen registration frame */
+ /* atomic_add_16 (&instance->fw_outstanding, 1); */
+ instance->func_ptr->issue_cmd(cmd, instance);
+
+ return (0);
+}
+
+static void
+display_scsi_inquiry(caddr_t scsi_inq)
+{
+#define MAX_SCSI_DEVICE_CODE 14
+ int i;
+ char inquiry_buf[256] = {0};
+ int len;
+ const char *const scsi_device_types[] = {
+ "Direct-Access ",
+ "Sequential-Access",
+ "Printer ",
+ "Processor ",
+ "WORM ",
+ "CD-ROM ",
+ "Scanner ",
+ "Optical Device ",
+ "Medium Changer ",
+ "Communications ",
+ "Unknown ",
+ "Unknown ",
+ "Unknown ",
+ "Enclosure ",
+ };
+
+ len = 0;
+
+ len += snprintf(inquiry_buf + len, 265 - len, " Vendor: ");
+ for (i = 8; i < 16; i++) {
+ len += snprintf(inquiry_buf + len, 265 - len, "%c",
+ scsi_inq[i]);
+ }
+
+ len += snprintf(inquiry_buf + len, 265 - len, " Model: ");
+
+ for (i = 16; i < 32; i++) {
+ len += snprintf(inquiry_buf + len, 265 - len, "%c",
+ scsi_inq[i]);
+ }
+
+ len += snprintf(inquiry_buf + len, 265 - len, " Rev: ");
+
+ for (i = 32; i < 36; i++) {
+ len += snprintf(inquiry_buf + len, 265 - len, "%c",
+ scsi_inq[i]);
+ }
+
+ len += snprintf(inquiry_buf + len, 265 - len, "\n");
+
+
+ i = scsi_inq[0] & 0x1f;
+
+
+ len += snprintf(inquiry_buf + len, 265 - len, " Type: %s ",
+ i < MAX_SCSI_DEVICE_CODE ? scsi_device_types[i] :
+ "Unknown ");
+
+
+ len += snprintf(inquiry_buf + len, 265 - len,
+ " ANSI SCSI revision: %02x", scsi_inq[2] & 0x07);
+
+ if ((scsi_inq[2] & 0x07) == 1 && (scsi_inq[3] & 0x0f) == 1) {
+ len += snprintf(inquiry_buf + len, 265 - len, " CCS\n");
+ } else {
+ len += snprintf(inquiry_buf + len, 265 - len, "\n");
+ }
+
+ con_log(CL_ANN1, (CE_CONT, inquiry_buf));
+}
+
+static int
+read_fw_status_reg_ppc(struct drsas_instance *instance)
+{
+ return ((int)RD_OB_SCRATCH_PAD_0(instance));
+}
+
+static void
+issue_cmd_ppc(struct drsas_cmd *cmd, struct drsas_instance *instance)
+{
+ atomic_add_16(&instance->fw_outstanding, 1);
+
+ /* Issue the command to the FW */
+ WR_IB_QPORT((cmd->frame_phys_addr) |
+ (((cmd->frame_count - 1) << 1) | 1), instance);
+}
+
+/*
+ * issue_cmd_in_sync_mode
+ */
+static int
+issue_cmd_in_sync_mode_ppc(struct drsas_instance *instance,
+ struct drsas_cmd *cmd)
+{
+ int i;
+ uint32_t msecs = MFI_POLL_TIMEOUT_SECS * (10 * MILLISEC);
+
+ con_log(CL_ANN1, (CE_NOTE, "issue_cmd_in_sync_mode_ppc: called"));
+
+ cmd->cmd_status = ENODATA;
+
+ WR_IB_QPORT((cmd->frame_phys_addr) |
+ (((cmd->frame_count - 1) << 1) | 1), instance);
+
+ mutex_enter(&instance->int_cmd_mtx);
+
+ for (i = 0; i < msecs && (cmd->cmd_status == ENODATA); i++) {
+ cv_wait(&instance->int_cmd_cv, &instance->int_cmd_mtx);
+ }
+
+ mutex_exit(&instance->int_cmd_mtx);
+
+ con_log(CL_ANN1, (CE_NOTE, "issue_cmd_in_sync_mode_ppc: done"));
+
+ if (i < (msecs -1)) {
+ return (DDI_SUCCESS);
+ } else {
+ return (DDI_FAILURE);
+ }
+}
+
+/*
+ * issue_cmd_in_poll_mode
+ */
+static int
+issue_cmd_in_poll_mode_ppc(struct drsas_instance *instance,
+ struct drsas_cmd *cmd)
+{
+ int i;
+ uint16_t flags;
+ uint32_t msecs = MFI_POLL_TIMEOUT_SECS * MILLISEC;
+ struct drsas_header *frame_hdr;
+
+ con_log(CL_ANN1, (CE_NOTE, "issue_cmd_in_poll_mode_ppc: called"));
+
+ frame_hdr = (struct drsas_header *)cmd->frame;
+ ddi_put8(cmd->frame_dma_obj.acc_handle, &frame_hdr->cmd_status,
+ MFI_CMD_STATUS_POLL_MODE);
+ flags = ddi_get16(cmd->frame_dma_obj.acc_handle, &frame_hdr->flags);
+ flags |= MFI_FRAME_DONT_POST_IN_REPLY_QUEUE;
+
+ ddi_put16(cmd->frame_dma_obj.acc_handle, &frame_hdr->flags, flags);
+
+ /* issue the frame using inbound queue port */
+ WR_IB_QPORT((cmd->frame_phys_addr) |
+ (((cmd->frame_count - 1) << 1) | 1), instance);
+
+ /* wait for cmd_status to change from 0xFF */
+ for (i = 0; i < msecs && (
+ ddi_get8(cmd->frame_dma_obj.acc_handle, &frame_hdr->cmd_status)
+ == MFI_CMD_STATUS_POLL_MODE); i++) {
+ drv_usecwait(MILLISEC); /* wait for 1000 usecs */
+ }
+
+ if (ddi_get8(cmd->frame_dma_obj.acc_handle, &frame_hdr->cmd_status)
+ == MFI_CMD_STATUS_POLL_MODE) {
+ con_log(CL_ANN, (CE_NOTE, "issue_cmd_in_poll_mode: "
+ "cmd polling timed out"));
+ return (DDI_FAILURE);
+ }
+
+ return (DDI_SUCCESS);
+}
+
+static void
+enable_intr_ppc(struct drsas_instance *instance)
+{
+ uint32_t mask;
+
+ con_log(CL_ANN1, (CE_NOTE, "enable_intr_ppc: called"));
+
+ /* WR_OB_DOORBELL_CLEAR(0xFFFFFFFF, instance); */
+ WR_OB_DOORBELL_CLEAR(OB_DOORBELL_CLEAR_MASK, instance);
+
+ /* WR_OB_INTR_MASK(~0x80000000, instance); */
+ WR_OB_INTR_MASK(~(MFI_REPLY_2108_MESSAGE_INTR_MASK), instance);
+
+ /* dummy read to force PCI flush */
+ mask = RD_OB_INTR_MASK(instance);
+
+ con_log(CL_ANN1, (CE_NOTE, "enable_intr_ppc: "
+ "outbound_intr_mask = 0x%x", mask));
+}
+
+static void
+disable_intr_ppc(struct drsas_instance *instance)
+{
+ uint32_t mask;
+
+ con_log(CL_ANN1, (CE_NOTE, "disable_intr_ppc: called"));
+
+ con_log(CL_ANN1, (CE_NOTE, "disable_intr_ppc: before : "
+ "outbound_intr_mask = 0x%x", RD_OB_INTR_MASK(instance)));
+
+ /* WR_OB_INTR_MASK(0xFFFFFFFF, instance); */
+ WR_OB_INTR_MASK(OB_INTR_MASK, instance);
+
+ con_log(CL_ANN1, (CE_NOTE, "disable_intr_ppc: after : "
+ "outbound_intr_mask = 0x%x", RD_OB_INTR_MASK(instance)));
+
+ /* dummy read to force PCI flush */
+ mask = RD_OB_INTR_MASK(instance);
+#ifdef lint
+ mask = mask;
+#endif
+}
+
+static int
+intr_ack_ppc(struct drsas_instance *instance)
+{
+ uint32_t status;
+
+ con_log(CL_ANN1, (CE_NOTE, "intr_ack_ppc: called"));
+
+ /* check if it is our interrupt */
+ status = RD_OB_INTR_STATUS(instance);
+
+ con_log(CL_ANN1, (CE_NOTE, "intr_ack_ppc: status = 0x%x", status));
+
+ if (!(status & MFI_REPLY_2108_MESSAGE_INTR)) {
+ return (DDI_INTR_UNCLAIMED);
+ }
+
+ /* clear the interrupt by writing back the same value */
+ WR_OB_DOORBELL_CLEAR(status, instance);
+
+ /* dummy READ */
+ status = RD_OB_INTR_STATUS(instance);
+
+ con_log(CL_ANN1, (CE_NOTE, "intr_ack_ppc: interrupt cleared"));
+
+ return (DDI_INTR_CLAIMED);
+}
+
+static int
+drsas_common_check(struct drsas_instance *instance,
+ struct drsas_cmd *cmd)
+{
+ int ret = DDI_SUCCESS;
+
+ if (drsas_check_dma_handle(cmd->frame_dma_obj.dma_handle) !=
+ DDI_SUCCESS) {
+ ddi_fm_service_impact(instance->dip, DDI_SERVICE_UNAFFECTED);
+ if (cmd->pkt != NULL) {
+ cmd->pkt->pkt_reason = CMD_TRAN_ERR;
+ cmd->pkt->pkt_statistics = 0;
+ }
+ ret = DDI_FAILURE;
+ }
+ if (drsas_check_dma_handle(instance->mfi_internal_dma_obj.dma_handle)
+ != DDI_SUCCESS) {
+ ddi_fm_service_impact(instance->dip, DDI_SERVICE_UNAFFECTED);
+ if (cmd->pkt != NULL) {
+ cmd->pkt->pkt_reason = CMD_TRAN_ERR;
+ cmd->pkt->pkt_statistics = 0;
+ }
+ ret = DDI_FAILURE;
+ }
+ if (drsas_check_dma_handle(instance->mfi_evt_detail_obj.dma_handle) !=
+ DDI_SUCCESS) {
+ ddi_fm_service_impact(instance->dip, DDI_SERVICE_UNAFFECTED);
+ if (cmd->pkt != NULL) {
+ cmd->pkt->pkt_reason = CMD_TRAN_ERR;
+ cmd->pkt->pkt_statistics = 0;
+ }
+ ret = DDI_FAILURE;
+ }
+ if (drsas_check_acc_handle(instance->regmap_handle) != DDI_SUCCESS) {
+ ddi_fm_service_impact(instance->dip, DDI_SERVICE_UNAFFECTED);
+
+ ddi_fm_acc_err_clear(instance->regmap_handle, DDI_FME_VER0);
+
+ if (cmd->pkt != NULL) {
+ cmd->pkt->pkt_reason = CMD_TRAN_ERR;
+ cmd->pkt->pkt_statistics = 0;
+ }
+ ret = DDI_FAILURE;
+ }
+
+ return (ret);
+}
+
+/*ARGSUSED*/
+static int
+drsas_fm_error_cb(dev_info_t *dip, ddi_fm_error_t *err, const void *impl_data)
+{
+ /*
+ * as the driver can always deal with an error in any dma or
+ * access handle, we can just return the fme_status value.
+ */
+ pci_ereport_post(dip, err, NULL);
+ return (err->fme_status);
+}
+
+static void
+drsas_fm_init(struct drsas_instance *instance)
+{
+ /* Need to change iblock to priority for new MSI intr */
+ ddi_iblock_cookie_t fm_ibc;
+
+ /* Only register with IO Fault Services if we have some capability */
+ if (instance->fm_capabilities) {
+ /* Adjust access and dma attributes for FMA */
+ endian_attr.devacc_attr_access = DDI_FLAGERR_ACC;
+ drsas_generic_dma_attr.dma_attr_flags = DDI_DMA_FLAGERR;
+
+ /*
+ * Register capabilities with IO Fault Services.
+ * fm_capabilities will be updated to indicate
+ * capabilities actually supported (not requested.)
+ */
+
+ ddi_fm_init(instance->dip, &instance->fm_capabilities, &fm_ibc);
+
+ /*
+ * Initialize pci ereport capabilities if ereport
+ * capable (should always be.)
+ */
+
+ if (DDI_FM_EREPORT_CAP(instance->fm_capabilities) ||
+ DDI_FM_ERRCB_CAP(instance->fm_capabilities)) {
+ pci_ereport_setup(instance->dip);
+ }
+
+ /*
+ * Register error callback if error callback capable.
+ */
+ if (DDI_FM_ERRCB_CAP(instance->fm_capabilities)) {
+ ddi_fm_handler_register(instance->dip,
+ drsas_fm_error_cb, (void*) instance);
+ }
+ } else {
+ endian_attr.devacc_attr_access = DDI_DEFAULT_ACC;
+ drsas_generic_dma_attr.dma_attr_flags = 0;
+ }
+}
+
+static void
+drsas_fm_fini(struct drsas_instance *instance)
+{
+ /* Only unregister FMA capabilities if registered */
+ if (instance->fm_capabilities) {
+ /*
+ * Un-register error callback if error callback capable.
+ */
+ if (DDI_FM_ERRCB_CAP(instance->fm_capabilities)) {
+ ddi_fm_handler_unregister(instance->dip);
+ }
+
+ /*
+ * Release any resources allocated by pci_ereport_setup()
+ */
+ if (DDI_FM_EREPORT_CAP(instance->fm_capabilities) ||
+ DDI_FM_ERRCB_CAP(instance->fm_capabilities)) {
+ pci_ereport_teardown(instance->dip);
+ }
+
+ /* Unregister from IO Fault Services */
+ ddi_fm_fini(instance->dip);
+
+ /* Adjust access and dma attributes for FMA */
+ endian_attr.devacc_attr_access = DDI_DEFAULT_ACC;
+ drsas_generic_dma_attr.dma_attr_flags = 0;
+ }
+}
+
+int
+drsas_check_acc_handle(ddi_acc_handle_t handle)
+{
+ ddi_fm_error_t de;
+
+ if (handle == NULL) {
+ return (DDI_FAILURE);
+ }
+
+ ddi_fm_acc_err_get(handle, &de, DDI_FME_VERSION);
+
+ return (de.fme_status);
+}
+
+int
+drsas_check_dma_handle(ddi_dma_handle_t handle)
+{
+ ddi_fm_error_t de;
+
+ if (handle == NULL) {
+ return (DDI_FAILURE);
+ }
+
+ ddi_fm_dma_err_get(handle, &de, DDI_FME_VERSION);
+
+ return (de.fme_status);
+}
+
+void
+drsas_fm_ereport(struct drsas_instance *instance, char *detail)
+{
+ uint64_t ena;
+ char buf[FM_MAX_CLASS];
+
+ (void) snprintf(buf, FM_MAX_CLASS, "%s.%s", DDI_FM_DEVICE, detail);
+ ena = fm_ena_generate(0, FM_ENA_FMT1);
+ if (DDI_FM_EREPORT_CAP(instance->fm_capabilities)) {
+ ddi_fm_ereport_post(instance->dip, buf, ena, DDI_NOSLEEP,
+ FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERSION, NULL);
+ }
+}
+
+static int
+drsas_add_intrs(struct drsas_instance *instance, int intr_type)
+{
+
+ dev_info_t *dip = instance->dip;
+ int avail, actual, count;
+ int i, flag, ret;
+
+ con_log(CL_DLEVEL1, (CE_WARN, "drsas_add_intrs: intr_type = %x",
+ intr_type));
+
+ /* Get number of interrupts */
+ ret = ddi_intr_get_nintrs(dip, intr_type, &count);
+ if ((ret != DDI_SUCCESS) || (count == 0)) {
+ con_log(CL_ANN, (CE_WARN, "ddi_intr_get_nintrs() failed:"
+ "ret %d count %d", ret, count));
+
+ return (DDI_FAILURE);
+ }
+
+ con_log(CL_DLEVEL1, (CE_WARN, "drsas_add_intrs: count = %d ", count));
+
+ /* Get number of available interrupts */
+ ret = ddi_intr_get_navail(dip, intr_type, &avail);
+ if ((ret != DDI_SUCCESS) || (avail == 0)) {
+ con_log(CL_ANN, (CE_WARN, "ddi_intr_get_navail() failed:"
+ "ret %d avail %d", ret, avail));
+
+ return (DDI_FAILURE);
+ }
+ con_log(CL_DLEVEL1, (CE_WARN, "drsas_add_intrs: avail = %d ", avail));
+
+ /* Only one interrupt routine. So limit the count to 1 */
+ if (count > 1) {
+ count = 1;
+ }
+
+ /*
+ * Allocate an array of interrupt handlers. Currently we support
+ * only one interrupt. The framework can be extended later.
+ */
+ instance->intr_size = count * sizeof (ddi_intr_handle_t);
+ instance->intr_htable = kmem_zalloc(instance->intr_size, KM_SLEEP);
+ ASSERT(instance->intr_htable);
+
+ flag = ((intr_type == DDI_INTR_TYPE_MSI) || (intr_type ==
+ DDI_INTR_TYPE_MSIX)) ? DDI_INTR_ALLOC_STRICT:DDI_INTR_ALLOC_NORMAL;
+
+ /* Allocate interrupt */
+ ret = ddi_intr_alloc(dip, instance->intr_htable, intr_type, 0,
+ count, &actual, flag);
+
+ if ((ret != DDI_SUCCESS) || (actual == 0)) {
+ con_log(CL_ANN, (CE_WARN, "drsas_add_intrs: "
+ "avail = %d", avail));
+ kmem_free(instance->intr_htable, instance->intr_size);
+ return (DDI_FAILURE);
+ }
+ if (actual < count) {
+ con_log(CL_ANN, (CE_WARN, "drsas_add_intrs: "
+ "Requested = %d Received = %d", count, actual));
+ }
+ instance->intr_cnt = actual;
+
+ /*
+ * Get the priority of the interrupt allocated.
+ */
+ if ((ret = ddi_intr_get_pri(instance->intr_htable[0],
+ &instance->intr_pri)) != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN, "drsas_add_intrs: "
+ "get priority call failed"));
+
+ for (i = 0; i < actual; i++) {
+ (void) ddi_intr_free(instance->intr_htable[i]);
+ }
+ kmem_free(instance->intr_htable, instance->intr_size);
+ return (DDI_FAILURE);
+ }
+
+ /*
+ * Test for high level mutex. we don't support them.
+ */
+ if (instance->intr_pri >= ddi_intr_get_hilevel_pri()) {
+ con_log(CL_ANN, (CE_WARN, "drsas_add_intrs: "
+ "High level interrupts not supported."));
+
+ for (i = 0; i < actual; i++) {
+ (void) ddi_intr_free(instance->intr_htable[i]);
+ }
+ kmem_free(instance->intr_htable, instance->intr_size);
+ return (DDI_FAILURE);
+ }
+
+ con_log(CL_DLEVEL1, (CE_NOTE, "drsas_add_intrs: intr_pri = 0x%x ",
+ instance->intr_pri));
+
+ /* Call ddi_intr_add_handler() */
+ for (i = 0; i < actual; i++) {
+ ret = ddi_intr_add_handler(instance->intr_htable[i],
+ (ddi_intr_handler_t *)drsas_isr, (caddr_t)instance,
+ (caddr_t)(uintptr_t)i);
+
+ if (ret != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN, "drsas_add_intrs:"
+ "failed %d", ret));
+
+ for (i = 0; i < actual; i++) {
+ (void) ddi_intr_free(instance->intr_htable[i]);
+ }
+ kmem_free(instance->intr_htable, instance->intr_size);
+ return (DDI_FAILURE);
+ }
+
+ }
+
+ con_log(CL_DLEVEL1, (CE_WARN, " ddi_intr_add_handler done"));
+
+ if ((ret = ddi_intr_get_cap(instance->intr_htable[0],
+ &instance->intr_cap)) != DDI_SUCCESS) {
+ con_log(CL_ANN, (CE_WARN, "ddi_intr_get_cap() failed %d",
+ ret));
+
+ /* Free already allocated intr */
+ for (i = 0; i < actual; i++) {
+ (void) ddi_intr_remove_handler(
+ instance->intr_htable[i]);
+ (void) ddi_intr_free(instance->intr_htable[i]);
+ }
+ kmem_free(instance->intr_htable, instance->intr_size);
+ return (DDI_FAILURE);
+ }
+
+ if (instance->intr_cap & DDI_INTR_FLAG_BLOCK) {
+ con_log(CL_ANN, (CE_WARN, "Calling ddi_intr_block _enable"));
+
+ (void) ddi_intr_block_enable(instance->intr_htable,
+ instance->intr_cnt);
+ } else {
+ con_log(CL_ANN, (CE_NOTE, " calling ddi_intr_enable"));
+
+ for (i = 0; i < instance->intr_cnt; i++) {
+ (void) ddi_intr_enable(instance->intr_htable[i]);
+ con_log(CL_ANN, (CE_NOTE, "ddi intr enable returns "
+ "%d", i));
+ }
+ }
+
+ return (DDI_SUCCESS);
+
+}
+
+
+static void
+drsas_rem_intrs(struct drsas_instance *instance)
+{
+ int i;
+
+ con_log(CL_ANN, (CE_NOTE, "drsas_rem_intrs called"));
+
+ /* Disable all interrupts first */
+ if (instance->intr_cap & DDI_INTR_FLAG_BLOCK) {
+ (void) ddi_intr_block_disable(instance->intr_htable,
+ instance->intr_cnt);
+ } else {
+ for (i = 0; i < instance->intr_cnt; i++) {
+ (void) ddi_intr_disable(instance->intr_htable[i]);
+ }
+ }
+
+ /* Remove all the handlers */
+
+ for (i = 0; i < instance->intr_cnt; i++) {
+ (void) ddi_intr_remove_handler(instance->intr_htable[i]);
+ (void) ddi_intr_free(instance->intr_htable[i]);
+ }
+
+ kmem_free(instance->intr_htable, instance->intr_size);
+}
+
+static int
+drsas_tran_bus_config(dev_info_t *parent, uint_t flags,
+ ddi_bus_config_op_t op, void *arg, dev_info_t **childp)
+{
+ struct drsas_instance *instance;
+ int config;
+ int rval;
+
+ char *ptr = NULL;
+ int tgt, lun;
+
+ con_log(CL_ANN1, (CE_NOTE, "Bus config called for op = %x", op));
+
+ if ((instance = ddi_get_soft_state(drsas_state,
+ ddi_get_instance(parent))) == NULL) {
+ return (NDI_FAILURE);
+ }
+
+ /* Hold nexus during bus_config */
+ ndi_devi_enter(parent, &config);
+ switch (op) {
+ case BUS_CONFIG_ONE: {
+
+ /* parse wwid/target name out of name given */
+ if ((ptr = strchr((char *)arg, '@')) == NULL) {
+ rval = NDI_FAILURE;
+ break;
+ }
+ ptr++;
+
+ if (drsas_parse_devname(arg, &tgt, &lun) != 0) {
+ rval = NDI_FAILURE;
+ break;
+ }
+
+ if (lun == 0) {
+ rval = drsas_config_ld(instance, tgt, lun, childp);
+ } else {
+ rval = NDI_FAILURE;
+ }
+
+ break;
+ }
+ case BUS_CONFIG_DRIVER:
+ case BUS_CONFIG_ALL: {
+
+ rval = drsas_config_all_devices(instance);
+
+ rval = NDI_SUCCESS;
+ break;
+ }
+ }
+
+ if (rval == NDI_SUCCESS) {
+ rval = ndi_busop_bus_config(parent, flags, op, arg, childp, 0);
+
+ }
+ ndi_devi_exit(parent, config);
+
+ con_log(CL_ANN1, (CE_NOTE, "drsas_tran_bus_config: rval = %x",
+ rval));
+ return (rval);
+}
+
+static int
+drsas_config_all_devices(struct drsas_instance *instance)
+{
+ int rval, tgt;
+
+ for (tgt = 0; tgt < MRDRV_MAX_LD; tgt++) {
+ (void) drsas_config_ld(instance, tgt, 0, NULL);
+
+ }
+
+ rval = NDI_SUCCESS;
+ return (rval);
+}
+
+static int
+drsas_parse_devname(char *devnm, int *tgt, int *lun)
+{
+ char devbuf[SCSI_MAXNAMELEN];
+ char *addr;
+ char *p, *tp, *lp;
+ long num;
+
+ /* Parse dev name and address */
+ (void) strcpy(devbuf, devnm);
+ addr = "";
+ for (p = devbuf; *p != '\0'; p++) {
+ if (*p == '@') {
+ addr = p + 1;
+ *p = '\0';
+ } else if (*p == ':') {
+ *p = '\0';
+ break;
+ }
+ }
+
+ /* Parse target and lun */
+ for (p = tp = addr, lp = NULL; *p != '\0'; p++) {
+ if (*p == ',') {
+ lp = p + 1;
+ *p = '\0';
+ break;
+ }
+ }
+ if (tgt && tp) {
+ if (ddi_strtol(tp, NULL, 0x10, &num)) {
+ return (DDI_FAILURE); /* Can declare this as constant */
+ }
+ *tgt = (int)num;
+ }
+ if (lun && lp) {
+ if (ddi_strtol(lp, NULL, 0x10, &num)) {
+ return (DDI_FAILURE);
+ }
+ *lun = (int)num;
+ }
+ return (DDI_SUCCESS); /* Success case */
+}
+
+static int
+drsas_config_ld(struct drsas_instance *instance, uint16_t tgt,
+ uint8_t lun, dev_info_t **ldip)
+{
+ struct scsi_device *sd;
+ dev_info_t *child;
+ int rval;
+
+ con_log(CL_ANN1, (CE_NOTE, "drsas_config_ld: t = %d l = %d",
+ tgt, lun));
+
+ if ((child = drsas_find_child(instance, tgt, lun)) != NULL) {
+ if (ldip) {
+ *ldip = child;
+ }
+ con_log(CL_ANN1, (CE_NOTE,
+ "drsas_config_ld: Child = %p found t = %d l = %d",
+ (void *)child, tgt, lun));
+ return (NDI_SUCCESS);
+ }
+
+ sd = kmem_zalloc(sizeof (struct scsi_device), KM_SLEEP);
+ sd->sd_address.a_hba_tran = instance->tran;
+ sd->sd_address.a_target = (uint16_t)tgt;
+ sd->sd_address.a_lun = (uint8_t)lun;
+
+ if (scsi_hba_probe(sd, NULL) == SCSIPROBE_EXISTS)
+ rval = drsas_config_scsi_device(instance, sd, ldip);
+ else
+ rval = NDI_FAILURE;
+
+ /* sd_unprobe is blank now. Free buffer manually */
+ if (sd->sd_inq) {
+ kmem_free(sd->sd_inq, SUN_INQSIZE);
+ sd->sd_inq = (struct scsi_inquiry *)NULL;
+ }
+
+ kmem_free(sd, sizeof (struct scsi_device));
+ con_log(CL_ANN1, (CE_NOTE, "drsas_config_ld: return rval = %d",
+ rval));
+ return (rval);
+}
+
+static int
+drsas_config_scsi_device(struct drsas_instance *instance,
+ struct scsi_device *sd, dev_info_t **dipp)
+{
+ char *nodename = NULL;
+ char **compatible = NULL;
+ int ncompatible = 0;
+ char *childname;
+ dev_info_t *ldip = NULL;
+ int tgt = sd->sd_address.a_target;
+ int lun = sd->sd_address.a_lun;
+ int dtype = sd->sd_inq->inq_dtype & DTYPE_MASK;
+ int rval;
+
+ con_log(CL_ANN1, (CE_WARN, "dr_sas: scsi_device t%dL%d", tgt, lun));
+ scsi_hba_nodename_compatible_get(sd->sd_inq, NULL, dtype,
+ NULL, &nodename, &compatible, &ncompatible);
+
+ if (nodename == NULL) {
+ con_log(CL_ANN1, (CE_WARN, "dr_sas: Found no compatible driver "
+ "for t%dL%d", tgt, lun));
+ rval = NDI_FAILURE;
+ goto finish;
+ }
+
+ childname = (dtype == DTYPE_DIRECT) ? "sd" : nodename;
+ con_log(CL_ANN1, (CE_WARN,
+ "dr_sas: Childname = %2s nodename = %s", childname, nodename));
+
+ /* Create a dev node */
+ rval = ndi_devi_alloc(instance->dip, childname, DEVI_SID_NODEID, &ldip);
+ con_log(CL_ANN1, (CE_WARN,
+ "dr_sas_config_scsi_device: ndi_devi_alloc rval = %x", rval));
+ if (rval == NDI_SUCCESS) {
+ if (ndi_prop_update_int(DDI_DEV_T_NONE, ldip, "target", tgt) !=
+ DDI_PROP_SUCCESS) {
+ con_log(CL_ANN1, (CE_WARN, "dr_sas: unable to create "
+ "property for t%dl%d target", tgt, lun));
+ rval = NDI_FAILURE;
+ goto finish;
+ }
+ if (ndi_prop_update_int(DDI_DEV_T_NONE, ldip, "lun", lun) !=
+ DDI_PROP_SUCCESS) {
+ con_log(CL_ANN1, (CE_WARN, "dr_sas: unable to create "
+ "property for t%dl%d lun", tgt, lun));
+ rval = NDI_FAILURE;
+ goto finish;
+ }
+
+ if (ndi_prop_update_string_array(DDI_DEV_T_NONE, ldip,
+ "compatible", compatible, ncompatible) !=
+ DDI_PROP_SUCCESS) {
+ con_log(CL_ANN1, (CE_WARN, "dr_sas: unable to create "
+ "property for t%dl%d compatible", tgt, lun));
+ rval = NDI_FAILURE;
+ goto finish;
+ }
+
+ rval = ndi_devi_online(ldip, NDI_ONLINE_ATTACH);
+ if (rval != NDI_SUCCESS) {
+ con_log(CL_ANN1, (CE_WARN, "dr_sas: unable to online "
+ "t%dl%d", tgt, lun));
+ ndi_prop_remove_all(ldip);
+ (void) ndi_devi_free(ldip);
+ } else {
+ con_log(CL_ANN1, (CE_WARN, "dr_sas: online Done :"
+ "0 t%dl%d", tgt, lun));
+ }
+
+ }
+finish:
+ if (dipp) {
+ *dipp = ldip;
+ }
+
+ con_log(CL_DLEVEL1, (CE_WARN,
+ "dr_sas: config_scsi_device rval = %d t%dL%d",
+ rval, tgt, lun));
+ scsi_hba_nodename_compatible_free(nodename, compatible);
+ return (rval);
+}
+
+/*ARGSUSED*/
+static int
+drsas_service_evt(struct drsas_instance *instance, int tgt, int lun, int event,
+ uint64_t wwn)
+{
+ struct drsas_eventinfo *mrevt = NULL;
+
+ con_log(CL_ANN1, (CE_NOTE,
+ "drsas_service_evt called for t%dl%d event = %d",
+ tgt, lun, event));
+
+ if ((instance->taskq == NULL) || (mrevt =
+ kmem_zalloc(sizeof (struct drsas_eventinfo), KM_NOSLEEP)) == NULL) {
+ return (ENOMEM);
+ }
+
+ mrevt->instance = instance;
+ mrevt->tgt = tgt;
+ mrevt->lun = lun;
+ mrevt->event = event;
+
+ if ((ddi_taskq_dispatch(instance->taskq,
+ (void (*)(void *))drsas_issue_evt_taskq, mrevt, DDI_NOSLEEP)) !=
+ DDI_SUCCESS) {
+ con_log(CL_ANN1, (CE_NOTE,
+ "dr_sas: Event task failed for t%dl%d event = %d",
+ tgt, lun, event));
+ kmem_free(mrevt, sizeof (struct drsas_eventinfo));
+ return (DDI_FAILURE);
+ }
+ return (DDI_SUCCESS);
+}
+
+static void
+drsas_issue_evt_taskq(struct drsas_eventinfo *mrevt)
+{
+ struct drsas_instance *instance = mrevt->instance;
+ dev_info_t *dip, *pdip;
+ int circ1 = 0;
+ char *devname;
+
+ con_log(CL_ANN1, (CE_NOTE, "drsas_issue_evt_taskq: called for"
+ " tgt %d lun %d event %d",
+ mrevt->tgt, mrevt->lun, mrevt->event));
+
+ if (mrevt->tgt < MRDRV_MAX_LD && mrevt->lun == 0) {
+ dip = instance->dr_ld_list[mrevt->tgt].dip;
+ } else {
+ return;
+ }
+
+ ndi_devi_enter(instance->dip, &circ1);
+ switch (mrevt->event) {
+ case DRSAS_EVT_CONFIG_TGT:
+ if (dip == NULL) {
+
+ if (mrevt->lun == 0) {
+ (void) drsas_config_ld(instance, mrevt->tgt,
+ 0, NULL);
+ }
+ con_log(CL_ANN1, (CE_NOTE,
+ "dr_sas: EVT_CONFIG_TGT called:"
+ " for tgt %d lun %d event %d",
+ mrevt->tgt, mrevt->lun, mrevt->event));
+
+ } else {
+ con_log(CL_ANN1, (CE_NOTE,
+ "dr_sas: EVT_CONFIG_TGT dip != NULL:"
+ " for tgt %d lun %d event %d",
+ mrevt->tgt, mrevt->lun, mrevt->event));
+ }
+ break;
+ case DRSAS_EVT_UNCONFIG_TGT:
+ if (dip) {
+ if (i_ddi_devi_attached(dip)) {
+
+ pdip = ddi_get_parent(dip);
+
+ devname = kmem_zalloc(MAXNAMELEN + 1, KM_SLEEP);
+ (void) ddi_deviname(dip, devname);
+
+ (void) devfs_clean(pdip, devname + 1,
+ DV_CLEAN_FORCE);
+ kmem_free(devname, MAXNAMELEN + 1);
+ }
+ (void) ndi_devi_offline(dip, NDI_DEVI_REMOVE);
+ con_log(CL_ANN1, (CE_NOTE,
+ "dr_sas: EVT_UNCONFIG_TGT called:"
+ " for tgt %d lun %d event %d",
+ mrevt->tgt, mrevt->lun, mrevt->event));
+ } else {
+ con_log(CL_ANN1, (CE_NOTE,
+ "dr_sas: EVT_UNCONFIG_TGT dip == NULL:"
+ " for tgt %d lun %d event %d",
+ mrevt->tgt, mrevt->lun, mrevt->event));
+ }
+ break;
+ }
+ kmem_free(mrevt, sizeof (struct drsas_eventinfo));
+ ndi_devi_exit(instance->dip, circ1);
+}
+
+static int
+drsas_mode_sense_build(struct scsi_pkt *pkt)
+{
+ union scsi_cdb *cdbp;
+ uint16_t page_code;
+ struct scsa_cmd *acmd;
+ struct buf *bp;
+ struct mode_header *modehdrp;
+
+ cdbp = (void *)pkt->pkt_cdbp;
+ page_code = cdbp->cdb_un.sg.scsi[0];
+ acmd = PKT2CMD(pkt);
+ bp = acmd->cmd_buf;
+ if ((!bp) && bp->b_un.b_addr && bp->b_bcount && acmd->cmd_dmacount) {
+ con_log(CL_ANN1, (CE_WARN, "Failing MODESENSE Command"));
+ /* ADD pkt statistics as Command failed. */
+ return (NULL);
+ }
+
+ bp_mapin(bp);
+ bzero(bp->b_un.b_addr, bp->b_bcount);
+
+ switch (page_code) {
+ case 0x3: {
+ struct mode_format *page3p = NULL;
+ modehdrp = (struct mode_header *)(bp->b_un.b_addr);
+ modehdrp->bdesc_length = MODE_BLK_DESC_LENGTH;
+
+ page3p = (void *)((caddr_t)modehdrp +
+ MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH);
+ page3p->mode_page.code = 0x3;
+ page3p->mode_page.length =
+ (uchar_t)(sizeof (struct mode_format));
+ page3p->data_bytes_sect = 512;
+ page3p->sect_track = 63;
+ break;
+ }
+ case 0x4: {
+ struct mode_geometry *page4p = NULL;
+ modehdrp = (struct mode_header *)(bp->b_un.b_addr);
+ modehdrp->bdesc_length = MODE_BLK_DESC_LENGTH;
+
+ page4p = (void *)((caddr_t)modehdrp +
+ MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH);
+ page4p->mode_page.code = 0x4;
+ page4p->mode_page.length =
+ (uchar_t)(sizeof (struct mode_geometry));
+ page4p->heads = 255;
+ page4p->rpm = 10000;
+ break;
+ }
+ default:
+ break;
+ }
+ return (NULL);
+}
diff --git a/usr/src/uts/common/io/dr_sas/dr_sas.conf b/usr/src/uts/common/io/dr_sas/dr_sas.conf
new file mode 100644
index 0000000000..3792f43ca4
--- /dev/null
+++ b/usr/src/uts/common/io/dr_sas/dr_sas.conf
@@ -0,0 +1,15 @@
+#
+# Copyright (c) 2008-2009, LSI Logic Corporation.
+# All rights reserved.
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# dr_sas.conf for sol 10 (and later) for all supported architectures
+#
+# global definitions
+
+# MSI specific flag. user can uncomment this line and set flag "yes" to enable MSI
+#drsas-enable-msi="yes";
diff --git a/usr/src/uts/common/io/dr_sas/dr_sas.h b/usr/src/uts/common/io/dr_sas/dr_sas.h
new file mode 100644
index 0000000000..8f78658edf
--- /dev/null
+++ b/usr/src/uts/common/io/dr_sas/dr_sas.h
@@ -0,0 +1,1766 @@
+/*
+ * dr_sas.h: header for dr_sas
+ *
+ * Solaris MegaRAID driver for SAS2.0 controllers
+ * Copyright (c) 2008-2009, LSI Logic Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the author nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _DR_SAS_H_
+#define _DR_SAS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/scsi/scsi.h>
+#include "dr_sas_list.h"
+
+/*
+ * MegaRAID SAS2.0 Driver meta data
+ */
+#define DRSAS_VERSION "LSIv2.0"
+#define DRSAS_RELDATE "Jan 9, 2009"
+
+#define DRSAS_TRUE 1
+#define DRSAS_FALSE 0
+
+/*
+ * MegaRAID SAS2.0 device id conversion definitions.
+ */
+#define INST2LSIRDCTL(x) ((x) << INST_MINOR_SHIFT)
+
+/*
+ * MegaRAID SAS2.0 supported controllers
+ */
+#define PCI_DEVICE_ID_LSI_2108VDE 0x0078
+#define PCI_DEVICE_ID_LSI_2108V 0x0079
+
+/*
+ * Register Index for 2108 Controllers.
+ */
+#define REGISTER_SET_IO_2108 (2)
+
+#define DRSAS_MAX_SGE_CNT 0x50
+
+#define DRSAS_IOCTL_DRIVER 0x12341234
+#define DRSAS_IOCTL_FIRMWARE 0x12345678
+#define DRSAS_IOCTL_AEN 0x87654321
+
+#define DRSAS_1_SECOND 1000000
+
+/* Dynamic Enumeration Flags */
+#define DRSAS_PD_LUN 1
+#define DRSAS_LD_LUN 0
+#define DRSAS_PD_TGT_MAX 255
+#define DRSAS_GET_PD_MAX(s) ((s)->dr_pd_max)
+#define WWN_STRLEN 17
+
+/*
+ * =====================================
+ * MegaRAID SAS2.0 MFI firmware definitions
+ * =====================================
+ */
+/*
+ * MFI stands for MegaRAID SAS2.0 FW Interface. This is just a moniker for
+ * protocol between the software and firmware. Commands are issued using
+ * "message frames"
+ */
+
+/*
+ * FW posts its state in upper 4 bits of outbound_msg_0 register
+ */
+#define MFI_STATE_SHIFT 28
+#define MFI_STATE_MASK ((uint32_t)0xF<<MFI_STATE_SHIFT)
+#define MFI_STATE_UNDEFINED ((uint32_t)0x0<<MFI_STATE_SHIFT)
+#define MFI_STATE_BB_INIT ((uint32_t)0x1<<MFI_STATE_SHIFT)
+#define MFI_STATE_FW_INIT ((uint32_t)0x4<<MFI_STATE_SHIFT)
+#define MFI_STATE_WAIT_HANDSHAKE ((uint32_t)0x6<<MFI_STATE_SHIFT)
+#define MFI_STATE_FW_INIT_2 ((uint32_t)0x7<<MFI_STATE_SHIFT)
+#define MFI_STATE_DEVICE_SCAN ((uint32_t)0x8<<MFI_STATE_SHIFT)
+#define MFI_STATE_BOOT_MESSAGE_PENDING ((uint32_t)0x9<<MFI_STATE_SHIFT)
+#define MFI_STATE_FLUSH_CACHE ((uint32_t)0xA<<MFI_STATE_SHIFT)
+#define MFI_STATE_READY ((uint32_t)0xB<<MFI_STATE_SHIFT)
+#define MFI_STATE_OPERATIONAL ((uint32_t)0xC<<MFI_STATE_SHIFT)
+#define MFI_STATE_FAULT ((uint32_t)0xF<<MFI_STATE_SHIFT)
+
+#define MRMFI_FRAME_SIZE 64
+
+/*
+ * During FW init, clear pending cmds & reset state using inbound_msg_0
+ *
+ * ABORT : Abort all pending cmds
+ * READY : Move from OPERATIONAL to READY state; discard queue info
+ * MFIMODE : Discard (possible) low MFA posted in 64-bit mode (??)
+ * CLR_HANDSHAKE: FW is waiting for HANDSHAKE from BIOS or Driver
+ */
+#define MFI_INIT_ABORT 0x00000001
+#define MFI_INIT_READY 0x00000002
+#define MFI_INIT_MFIMODE 0x00000004
+#define MFI_INIT_CLEAR_HANDSHAKE 0x00000008
+#define MFI_INIT_HOTPLUG 0x00000010
+#define MFI_STOP_ADP 0x00000020
+#define MFI_RESET_FLAGS MFI_INIT_READY|MFI_INIT_MFIMODE|MFI_INIT_ABORT
+
+/*
+ * MFI frame flags
+ */
+#define MFI_FRAME_POST_IN_REPLY_QUEUE 0x0000
+#define MFI_FRAME_DONT_POST_IN_REPLY_QUEUE 0x0001
+#define MFI_FRAME_SGL32 0x0000
+#define MFI_FRAME_SGL64 0x0002
+#define MFI_FRAME_SENSE32 0x0000
+#define MFI_FRAME_SENSE64 0x0004
+#define MFI_FRAME_DIR_NONE 0x0000
+#define MFI_FRAME_DIR_WRITE 0x0008
+#define MFI_FRAME_DIR_READ 0x0010
+#define MFI_FRAME_DIR_BOTH 0x0018
+
+/*
+ * Definition for cmd_status
+ */
+#define MFI_CMD_STATUS_POLL_MODE 0xFF
+#define MFI_CMD_STATUS_SYNC_MODE 0xFF
+
+/*
+ * MFI command opcodes
+ */
+#define MFI_CMD_OP_INIT 0x00
+#define MFI_CMD_OP_LD_READ 0x01
+#define MFI_CMD_OP_LD_WRITE 0x02
+#define MFI_CMD_OP_LD_SCSI 0x03
+#define MFI_CMD_OP_PD_SCSI 0x04
+#define MFI_CMD_OP_DCMD 0x05
+#define MFI_CMD_OP_ABORT 0x06
+#define MFI_CMD_OP_SMP 0x07
+#define MFI_CMD_OP_STP 0x08
+
+#define DR_DCMD_CTRL_GET_INFO 0x01010000
+
+#define DR_DCMD_CTRL_CACHE_FLUSH 0x01101000
+#define DR_FLUSH_CTRL_CACHE 0x01
+#define DR_FLUSH_DISK_CACHE 0x02
+
+#define DR_DCMD_CTRL_SHUTDOWN 0x01050000
+#define DRSAS_ENABLE_DRIVE_SPINDOWN 0x01
+
+#define DR_DCMD_CTRL_EVENT_GET_INFO 0x01040100
+#define DR_DCMD_CTRL_EVENT_GET 0x01040300
+#define DR_DCMD_CTRL_EVENT_WAIT 0x01040500
+#define DR_DCMD_LD_GET_PROPERTIES 0x03030000
+#define DR_DCMD_PD_GET_INFO 0x02020000
+
+/*
+ * Solaris Specific MAX values
+ */
+#define MAX_SGL 24
+/*
+ * MFI command completion codes
+ */
+enum MFI_STAT {
+ MFI_STAT_OK = 0x00,
+ MFI_STAT_INVALID_CMD = 0x01,
+ MFI_STAT_INVALID_DCMD = 0x02,
+ MFI_STAT_INVALID_PARAMETER = 0x03,
+ MFI_STAT_INVALID_SEQUENCE_NUMBER = 0x04,
+ MFI_STAT_ABORT_NOT_POSSIBLE = 0x05,
+ MFI_STAT_APP_HOST_CODE_NOT_FOUND = 0x06,
+ MFI_STAT_APP_IN_USE = 0x07,
+ MFI_STAT_APP_NOT_INITIALIZED = 0x08,
+ MFI_STAT_ARRAY_INDEX_INVALID = 0x09,
+ MFI_STAT_ARRAY_ROW_NOT_EMPTY = 0x0a,
+ MFI_STAT_CONFIG_RESOURCE_CONFLICT = 0x0b,
+ MFI_STAT_DEVICE_NOT_FOUND = 0x0c,
+ MFI_STAT_DRIVE_TOO_SMALL = 0x0d,
+ MFI_STAT_FLASH_ALLOC_FAIL = 0x0e,
+ MFI_STAT_FLASH_BUSY = 0x0f,
+ MFI_STAT_FLASH_ERROR = 0x10,
+ MFI_STAT_FLASH_IMAGE_BAD = 0x11,
+ MFI_STAT_FLASH_IMAGE_INCOMPLETE = 0x12,
+ MFI_STAT_FLASH_NOT_OPEN = 0x13,
+ MFI_STAT_FLASH_NOT_STARTED = 0x14,
+ MFI_STAT_FLUSH_FAILED = 0x15,
+ MFI_STAT_HOST_CODE_NOT_FOUNT = 0x16,
+ MFI_STAT_LD_CC_IN_PROGRESS = 0x17,
+ MFI_STAT_LD_INIT_IN_PROGRESS = 0x18,
+ MFI_STAT_LD_LBA_OUT_OF_RANGE = 0x19,
+ MFI_STAT_LD_MAX_CONFIGURED = 0x1a,
+ MFI_STAT_LD_NOT_OPTIMAL = 0x1b,
+ MFI_STAT_LD_RBLD_IN_PROGRESS = 0x1c,
+ MFI_STAT_LD_RECON_IN_PROGRESS = 0x1d,
+ MFI_STAT_LD_WRONG_RAID_LEVEL = 0x1e,
+ MFI_STAT_MAX_SPARES_EXCEEDED = 0x1f,
+ MFI_STAT_MEMORY_NOT_AVAILABLE = 0x20,
+ MFI_STAT_MFC_HW_ERROR = 0x21,
+ MFI_STAT_NO_HW_PRESENT = 0x22,
+ MFI_STAT_NOT_FOUND = 0x23,
+ MFI_STAT_NOT_IN_ENCL = 0x24,
+ MFI_STAT_PD_CLEAR_IN_PROGRESS = 0x25,
+ MFI_STAT_PD_TYPE_WRONG = 0x26,
+ MFI_STAT_PR_DISABLED = 0x27,
+ MFI_STAT_ROW_INDEX_INVALID = 0x28,
+ MFI_STAT_SAS_CONFIG_INVALID_ACTION = 0x29,
+ MFI_STAT_SAS_CONFIG_INVALID_DATA = 0x2a,
+ MFI_STAT_SAS_CONFIG_INVALID_PAGE = 0x2b,
+ MFI_STAT_SAS_CONFIG_INVALID_TYPE = 0x2c,
+ MFI_STAT_SCSI_DONE_WITH_ERROR = 0x2d,
+ MFI_STAT_SCSI_IO_FAILED = 0x2e,
+ MFI_STAT_SCSI_RESERVATION_CONFLICT = 0x2f,
+ MFI_STAT_SHUTDOWN_FAILED = 0x30,
+ MFI_STAT_TIME_NOT_SET = 0x31,
+ MFI_STAT_WRONG_STATE = 0x32,
+ MFI_STAT_LD_OFFLINE = 0x33,
+ /* UNUSED: 0x34 to 0xfe */
+ MFI_STAT_INVALID_STATUS = 0xFF
+};
+
+enum DR_EVT_CLASS {
+ DR_EVT_CLASS_DEBUG = -2,
+ DR_EVT_CLASS_PROGRESS = -1,
+ DR_EVT_CLASS_INFO = 0,
+ DR_EVT_CLASS_WARNING = 1,
+ DR_EVT_CLASS_CRITICAL = 2,
+ DR_EVT_CLASS_FATAL = 3,
+ DR_EVT_CLASS_DEAD = 4
+};
+
+enum DR_EVT_LOCALE {
+ DR_EVT_LOCALE_LD = 0x0001,
+ DR_EVT_LOCALE_PD = 0x0002,
+ DR_EVT_LOCALE_ENCL = 0x0004,
+ DR_EVT_LOCALE_BBU = 0x0008,
+ DR_EVT_LOCALE_SAS = 0x0010,
+ DR_EVT_LOCALE_CTRL = 0x0020,
+ DR_EVT_LOCALE_CONFIG = 0x0040,
+ DR_EVT_LOCALE_CLUSTER = 0x0080,
+ DR_EVT_LOCALE_ALL = 0xffff
+};
+
+#define DR_EVT_CFG_CLEARED 0x0004
+#define DR_EVT_LD_CREATED 0x008a
+#define DR_EVT_LD_DELETED 0x008b
+#define DR_EVT_PD_REMOVED_EXT 0x00f8
+#define DR_EVT_PD_INSERTED_EXT 0x00f7
+
+enum LD_STATE {
+ LD_OFFLINE = 0,
+ LD_PARTIALLY_DEGRADED = 1,
+ LD_DEGRADED = 2,
+ LD_OPTIMAL = 3,
+ LD_INVALID = 0xFF
+};
+
+enum DRSAS_EVT {
+ DRSAS_EVT_CONFIG_TGT = 0,
+ DRSAS_EVT_UNCONFIG_TGT = 1,
+ DRSAS_EVT_UNCONFIG_SMP = 2
+};
+
+#define DMA_OBJ_ALLOCATED 1
+#define DMA_OBJ_REALLOCATED 2
+#define DMA_OBJ_FREED 3
+
+/*
+ * dma_obj_t - Our DMA object
+ * @param buffer : kernel virtual address
+ * @param size : size of the data to be allocated
+ * @param acc_handle : access handle
+ * @param dma_handle : dma handle
+ * @param dma_cookie : scatter-gather list
+ * @param dma_attr : dma attributes for this buffer
+ * Our DMA object. The caller must initialize the size and dma attributes
+ * (dma_attr) fields before allocating the resources.
+ */
+typedef struct {
+ caddr_t buffer;
+ uint32_t size;
+ ddi_acc_handle_t acc_handle;
+ ddi_dma_handle_t dma_handle;
+ ddi_dma_cookie_t dma_cookie[DRSAS_MAX_SGE_CNT];
+ ddi_dma_attr_t dma_attr;
+ uint8_t status;
+ uint8_t reserved[3];
+} dma_obj_t;
+
+struct drsas_eventinfo {
+ struct drsas_instance *instance;
+ int tgt;
+ int lun;
+ int event;
+};
+
+struct drsas_ld {
+ dev_info_t *dip;
+ uint8_t lun_type;
+ uint8_t reserved[3];
+};
+
+struct drsas_pd {
+ dev_info_t *dip;
+ uint8_t lun_type;
+ uint8_t dev_id;
+ uint8_t flags;
+ uint8_t reserved;
+};
+
+struct drsas_pd_info {
+ uint16_t deviceId;
+ uint16_t seqNum;
+ uint8_t inquiryData[96];
+ uint8_t vpdPage83[64];
+ uint8_t notSupported;
+ uint8_t scsiDevType;
+ uint8_t a;
+ uint8_t device_speed;
+ uint32_t mediaerrcnt;
+ uint32_t other;
+ uint32_t pred;
+ uint32_t lastpred;
+ uint16_t fwState;
+ uint8_t disabled;
+ uint8_t linkspwwd;
+ uint32_t ddfType;
+ struct {
+ uint8_t count;
+ uint8_t isPathBroken;
+ uint8_t connectorIndex[2];
+ uint8_t reserved[4];
+ uint64_t sasAddr[2];
+ uint8_t reserved2[16];
+ } pathInfo;
+};
+
+typedef struct drsas_instance {
+ uint32_t *producer;
+ uint32_t *consumer;
+
+ uint32_t *reply_queue;
+ dma_obj_t mfi_internal_dma_obj;
+
+ uint8_t init_id;
+ uint8_t reserved[3];
+
+ uint16_t max_num_sge;
+ uint16_t max_fw_cmds;
+ uint32_t max_sectors_per_req;
+
+ struct drsas_cmd **cmd_list;
+
+ mlist_t cmd_pool_list;
+ kmutex_t cmd_pool_mtx;
+
+ mlist_t cmd_pend_list;
+ kmutex_t cmd_pend_mtx;
+
+ dma_obj_t mfi_evt_detail_obj;
+ struct drsas_cmd *aen_cmd;
+
+ uint32_t aen_seq_num;
+ uint32_t aen_class_locale_word;
+
+ scsi_hba_tran_t *tran;
+
+ kcondvar_t int_cmd_cv;
+ kmutex_t int_cmd_mtx;
+
+ kcondvar_t aen_cmd_cv;
+ kmutex_t aen_cmd_mtx;
+
+ kcondvar_t abort_cmd_cv;
+ kmutex_t abort_cmd_mtx;
+
+ dev_info_t *dip;
+ ddi_acc_handle_t pci_handle;
+
+ timeout_id_t timeout_id;
+ uint32_t unique_id;
+ uint16_t fw_outstanding;
+ caddr_t regmap;
+ ddi_acc_handle_t regmap_handle;
+ uint8_t isr_level;
+ ddi_iblock_cookie_t iblock_cookie;
+ ddi_iblock_cookie_t soft_iblock_cookie;
+ ddi_softintr_t soft_intr_id;
+ uint8_t softint_running;
+ kmutex_t completed_pool_mtx;
+ mlist_t completed_pool_list;
+
+ caddr_t internal_buf;
+ uint32_t internal_buf_dmac_add;
+ uint32_t internal_buf_size;
+
+ uint16_t vendor_id;
+ uint16_t device_id;
+ uint16_t subsysvid;
+ uint16_t subsysid;
+ int instance;
+ int baseaddress;
+ char iocnode[16];
+
+ int fm_capabilities;
+
+ struct drsas_func_ptr *func_ptr;
+ /* MSI interrupts specific */
+ ddi_intr_handle_t *intr_htable;
+ int intr_type;
+ int intr_cnt;
+ size_t intr_size;
+ uint_t intr_pri;
+ int intr_cap;
+
+ ddi_taskq_t *taskq;
+ struct drsas_ld *dr_ld_list;
+} drsas_t;
+
+struct drsas_func_ptr {
+ int (*read_fw_status_reg)(struct drsas_instance *);
+ void (*issue_cmd)(struct drsas_cmd *, struct drsas_instance *);
+ int (*issue_cmd_in_sync_mode)(struct drsas_instance *,
+ struct drsas_cmd *);
+ int (*issue_cmd_in_poll_mode)(struct drsas_instance *,
+ struct drsas_cmd *);
+ void (*enable_intr)(struct drsas_instance *);
+ void (*disable_intr)(struct drsas_instance *);
+ int (*intr_ack)(struct drsas_instance *);
+};
+
+/*
+ * ### Helper routines ###
+ */
+
+/*
+ * con_log() - console log routine
+ * @param level : indicates the severity of the message.
+ * @fparam mt : format string
+ *
+ * con_log displays the error messages on the console based on the current
+ * debug level. Also it attaches the appropriate kernel severity level with
+ * the message.
+ *
+ *
+ * console messages debug levels
+ */
+#define CL_NONE 0 /* No debug information */
+#define CL_ANN 1 /* print unconditionally, announcements */
+#define CL_ANN1 2 /* No o/p */
+#define CL_DLEVEL1 3 /* debug level 1, informative */
+#define CL_DLEVEL2 4 /* debug level 2, verbose */
+#define CL_DLEVEL3 5 /* debug level 3, very verbose */
+
+#ifdef __SUNPRO_C
+#define __func__ ""
+#endif
+
+#define con_log(level, fmt) { if (debug_level_g >= level) cmn_err fmt; }
+
+/*
+ * ### SCSA definitions ###
+ */
+#define PKT2TGT(pkt) ((pkt)->pkt_address.a_target)
+#define PKT2LUN(pkt) ((pkt)->pkt_address.a_lun)
+#define PKT2TRAN(pkt) ((pkt)->pkt_adress.a_hba_tran)
+#define ADDR2TRAN(ap) ((ap)->a_hba_tran)
+
+#define TRAN2MR(tran) (struct drsas_instance *)(tran)->tran_hba_private)
+#define ADDR2MR(ap) (TRAN2MR(ADDR2TRAN(ap))
+
+#define PKT2CMD(pkt) ((struct scsa_cmd *)(pkt)->pkt_ha_private)
+#define CMD2PKT(sp) ((sp)->cmd_pkt)
+#define PKT2REQ(pkt) (&(PKT2CMD(pkt)->request))
+
+#define CMD2ADDR(cmd) (&CMD2PKT(cmd)->pkt_address)
+#define CMD2TRAN(cmd) (CMD2PKT(cmd)->pkt_address.a_hba_tran)
+#define CMD2MR(cmd) (TRAN2MR(CMD2TRAN(cmd)))
+
+#define CFLAG_DMAVALID 0x0001 /* requires a dma operation */
+#define CFLAG_DMASEND 0x0002 /* Transfer from the device */
+#define CFLAG_CONSISTENT 0x0040 /* consistent data transfer */
+
+/*
+ * ### Data structures for ioctl inteface and internal commands ###
+ */
+
+/*
+ * Data direction flags
+ */
+#define UIOC_RD 0x00001
+#define UIOC_WR 0x00002
+
+#define SCP2HOST(scp) (scp)->device->host /* to host */
+#define SCP2HOSTDATA(scp) SCP2HOST(scp)->hostdata /* to soft state */
+#define SCP2CHANNEL(scp) (scp)->device->channel /* to channel */
+#define SCP2TARGET(scp) (scp)->device->id /* to target */
+#define SCP2LUN(scp) (scp)->device->lun /* to LUN */
+
+#define SCSIHOST2ADAP(host) (((caddr_t *)(host->hostdata))[0])
+#define SCP2ADAPTER(scp) \
+ (struct drsas_instance *)SCSIHOST2ADAP(SCP2HOST(scp))
+
+#define MRDRV_IS_LOGICAL_SCSA(instance, acmd) \
+ (acmd->device_id < MRDRV_MAX_LD) ? 1 : 0
+#define MRDRV_IS_LOGICAL(ap) \
+ ((ap->a_target < MRDRV_MAX_LD) && (ap->a_lun == 0)) ? 1 : 0
+#define MAP_DEVICE_ID(instance, ap) \
+ (ap->a_target)
+
+#define HIGH_LEVEL_INTR 1
+#define NORMAL_LEVEL_INTR 0
+
+/*
+ * scsa_cmd - Per-command mr private data
+ * @param cmd_dmahandle : dma handle
+ * @param cmd_dmacookies : current dma cookies
+ * @param cmd_pkt : scsi_pkt reference
+ * @param cmd_dmacount : dma count
+ * @param cmd_cookie : next cookie
+ * @param cmd_ncookies : cookies per window
+ * @param cmd_cookiecnt : cookies per sub-win
+ * @param cmd_nwin : number of dma windows
+ * @param cmd_curwin : current dma window
+ * @param cmd_dma_offset : current window offset
+ * @param cmd_dma_len : current window length
+ * @param cmd_flags : private flags
+ * @param cmd_cdblen : length of cdb
+ * @param cmd_scblen : length of scb
+ * @param cmd_buf : command buffer
+ * @param channel : channel for scsi sub-system
+ * @param target : target for scsi sub-system
+ * @param lun : LUN for scsi sub-system
+ *
+ * - Allocated at same time as scsi_pkt by scsi_hba_pkt_alloc(9E)
+ * - Pointed to by pkt_ha_private field in scsi_pkt
+ */
+struct scsa_cmd {
+ ddi_dma_handle_t cmd_dmahandle;
+ ddi_dma_cookie_t cmd_dmacookies[DRSAS_MAX_SGE_CNT];
+ struct scsi_pkt *cmd_pkt;
+ ulong_t cmd_dmacount;
+ uint_t cmd_cookie;
+ uint_t cmd_ncookies;
+ uint_t cmd_cookiecnt;
+ uint_t cmd_nwin;
+ uint_t cmd_curwin;
+ off_t cmd_dma_offset;
+ ulong_t cmd_dma_len;
+ ulong_t cmd_flags;
+ uint_t cmd_cdblen;
+ uint_t cmd_scblen;
+ struct buf *cmd_buf;
+ ushort_t device_id;
+ uchar_t islogical;
+ uchar_t lun;
+ struct drsas_device *drsas_dev;
+};
+
+
+struct drsas_cmd {
+ union drsas_frame *frame;
+ uint32_t frame_phys_addr;
+ uint8_t *sense;
+ uint32_t sense_phys_addr;
+ dma_obj_t frame_dma_obj;
+ uint8_t frame_dma_obj_status;
+
+ uint32_t index;
+ uint8_t sync_cmd;
+ uint8_t cmd_status;
+ uint16_t abort_aen;
+ mlist_t list;
+ uint32_t frame_count;
+ struct scsa_cmd *cmd;
+ struct scsi_pkt *pkt;
+};
+
+#define MAX_MGMT_ADAPTERS 1024
+#define IOC_SIGNATURE "MR-SAS"
+
+#define IOC_CMD_FIRMWARE 0x0
+#define DRSAS_DRIVER_IOCTL_COMMON 0xF0010000
+#define DRSAS_DRIVER_IOCTL_DRIVER_VERSION 0xF0010100
+#define DRSAS_DRIVER_IOCTL_PCI_INFORMATION 0xF0010200
+#define DRSAS_DRIVER_IOCTL_MRRAID_STATISTICS 0xF0010300
+
+
+#define DRSAS_MAX_SENSE_LENGTH 32
+
+struct drsas_mgmt_info {
+
+ uint16_t count;
+ struct drsas_instance *instance[MAX_MGMT_ADAPTERS];
+ uint16_t map[MAX_MGMT_ADAPTERS];
+ int max_index;
+};
+
+#pragma pack(1)
+
+/*
+ * SAS controller properties
+ */
+struct drsas_ctrl_prop {
+ uint16_t seq_num;
+ uint16_t pred_fail_poll_interval;
+ uint16_t intr_throttle_count;
+ uint16_t intr_throttle_timeouts;
+
+ uint8_t rebuild_rate;
+ uint8_t patrol_read_rate;
+ uint8_t bgi_rate;
+ uint8_t cc_rate;
+ uint8_t recon_rate;
+
+ uint8_t cache_flush_interval;
+
+ uint8_t spinup_drv_count;
+ uint8_t spinup_delay;
+
+ uint8_t cluster_enable;
+ uint8_t coercion_mode;
+ uint8_t disk_write_cache_disable;
+ uint8_t alarm_enable;
+
+ uint8_t reserved[44];
+};
+
+/*
+ * SAS controller information
+ */
+struct drsas_ctrl_info {
+ /* PCI device information */
+ struct {
+ uint16_t vendor_id;
+ uint16_t device_id;
+ uint16_t sub_vendor_id;
+ uint16_t sub_device_id;
+ uint8_t reserved[24];
+ } pci;
+
+ /* Host interface information */
+ struct {
+ uint8_t PCIX : 1;
+ uint8_t PCIE : 1;
+ uint8_t iSCSI : 1;
+ uint8_t SAS_3G : 1;
+ uint8_t reserved_0 : 4;
+ uint8_t reserved_1[6];
+ uint8_t port_count;
+ uint64_t port_addr[8];
+ } host_interface;
+
+ /* Device (backend) interface information */
+ struct {
+ uint8_t SPI : 1;
+ uint8_t SAS_3G : 1;
+ uint8_t SATA_1_5G : 1;
+ uint8_t SATA_3G : 1;
+ uint8_t reserved_0 : 4;
+ uint8_t reserved_1[6];
+ uint8_t port_count;
+ uint64_t port_addr[8];
+ } device_interface;
+
+ /* List of components residing in flash. All str are null terminated */
+ uint32_t image_check_word;
+ uint32_t image_component_count;
+
+ struct {
+ char name[8];
+ char version[32];
+ char build_date[16];
+ char built_time[16];
+ } image_component[8];
+
+ /*
+ * List of flash components that have been flashed on the card, but
+ * are not in use, pending reset of the adapter. This list will be
+ * empty if a flash operation has not occurred. All stings are null
+ * terminated
+ */
+ uint32_t pending_image_component_count;
+
+ struct {
+ char name[8];
+ char version[32];
+ char build_date[16];
+ char build_time[16];
+ } pending_image_component[8];
+
+ uint8_t max_arms;
+ uint8_t max_spans;
+ uint8_t max_arrays;
+ uint8_t max_lds;
+
+ char product_name[80];
+ char serial_no[32];
+
+ /*
+ * Other physical/controller/operation information. Indicates the
+ * presence of the hardware
+ */
+ struct {
+ uint32_t bbu : 1;
+ uint32_t alarm : 1;
+ uint32_t nvram : 1;
+ uint32_t uart : 1;
+ uint32_t reserved : 28;
+ } hw_present;
+
+ uint32_t current_fw_time;
+
+ /* Maximum data transfer sizes */
+ uint16_t max_concurrent_cmds;
+ uint16_t max_sge_count;
+ uint32_t max_request_size;
+
+ /* Logical and physical device counts */
+ uint16_t ld_present_count;
+ uint16_t ld_degraded_count;
+ uint16_t ld_offline_count;
+
+ uint16_t pd_present_count;
+ uint16_t pd_disk_present_count;
+ uint16_t pd_disk_pred_failure_count;
+ uint16_t pd_disk_failed_count;
+
+ /* Memory size information */
+ uint16_t nvram_size;
+ uint16_t memory_size;
+ uint16_t flash_size;
+
+ /* Error counters */
+ uint16_t mem_correctable_error_count;
+ uint16_t mem_uncorrectable_error_count;
+
+ /* Cluster information */
+ uint8_t cluster_permitted;
+ uint8_t cluster_active;
+ uint8_t reserved_1[2];
+
+ /* Controller capabilities structures */
+ struct {
+ uint32_t raid_level_0 : 1;
+ uint32_t raid_level_1 : 1;
+ uint32_t raid_level_5 : 1;
+ uint32_t raid_level_1E : 1;
+ uint32_t reserved : 28;
+ } raid_levels;
+
+ struct {
+ uint32_t rbld_rate : 1;
+ uint32_t cc_rate : 1;
+ uint32_t bgi_rate : 1;
+ uint32_t recon_rate : 1;
+ uint32_t patrol_rate : 1;
+ uint32_t alarm_control : 1;
+ uint32_t cluster_supported : 1;
+ uint32_t bbu : 1;
+ uint32_t spanning_allowed : 1;
+ uint32_t dedicated_hotspares : 1;
+ uint32_t revertible_hotspares : 1;
+ uint32_t foreign_config_import : 1;
+ uint32_t self_diagnostic : 1;
+ uint32_t reserved : 19;
+ } adapter_operations;
+
+ struct {
+ uint32_t read_policy : 1;
+ uint32_t write_policy : 1;
+ uint32_t io_policy : 1;
+ uint32_t access_policy : 1;
+ uint32_t reserved : 28;
+ } ld_operations;
+
+ struct {
+ uint8_t min;
+ uint8_t max;
+ uint8_t reserved[2];
+ } stripe_size_operations;
+
+ struct {
+ uint32_t force_online : 1;
+ uint32_t force_offline : 1;
+ uint32_t force_rebuild : 1;
+ uint32_t reserved : 29;
+ } pd_operations;
+
+ struct {
+ uint32_t ctrl_supports_sas : 1;
+ uint32_t ctrl_supports_sata : 1;
+ uint32_t allow_mix_in_encl : 1;
+ uint32_t allow_mix_in_ld : 1;
+ uint32_t allow_sata_in_cluster : 1;
+ uint32_t reserved : 27;
+ } pd_mix_support;
+
+ /* Include the controller properties (changeable items) */
+ uint8_t reserved_2[12];
+ struct drsas_ctrl_prop properties;
+
+ uint8_t pad[0x800 - 0x640];
+};
+
+/*
+ * ==================================
+ * MegaRAID SAS2.0 driver definitions
+ * ==================================
+ */
+#define MRDRV_MAX_NUM_CMD 1024
+
+#define MRDRV_MAX_PD_CHANNELS 2
+#define MRDRV_MAX_LD_CHANNELS 2
+#define MRDRV_MAX_CHANNELS (MRDRV_MAX_PD_CHANNELS + \
+ MRDRV_MAX_LD_CHANNELS)
+#define MRDRV_MAX_DEV_PER_CHANNEL 128
+#define MRDRV_DEFAULT_INIT_ID -1
+#define MRDRV_MAX_CMD_PER_LUN 1000
+#define MRDRV_MAX_LUN 1
+#define MRDRV_MAX_LD 64
+
+#define MRDRV_RESET_WAIT_TIME 300
+#define MRDRV_RESET_NOTICE_INTERVAL 5
+
+#define DRSAS_IOCTL_CMD 0
+
+/*
+ * FW can accept both 32 and 64 bit SGLs. We want to allocate 32/64 bit
+ * SGLs based on the size of dma_addr_t
+ */
+#define IS_DMA64 (sizeof (dma_addr_t) == 8)
+
+#define IB_MSG_0_OFF 0x10 /* XScale */
+#define OB_MSG_0_OFF 0x18 /* XScale */
+#define IB_DOORBELL_OFF 0x20 /* XScale & ROC */
+#define OB_INTR_STATUS_OFF 0x30 /* XScale & ROC */
+#define OB_INTR_MASK_OFF 0x34 /* XScale & ROC */
+#define IB_QPORT_OFF 0x40 /* XScale & ROC */
+#define OB_DOORBELL_CLEAR_OFF 0xA0 /* ROC */
+#define OB_SCRATCH_PAD_0_OFF 0xB0 /* ROC */
+#define OB_INTR_MASK 0xFFFFFFFF
+#define OB_DOORBELL_CLEAR_MASK 0xFFFFFFFF
+
+/*
+ * All MFI register set macros accept drsas_register_set*
+ */
+#define WR_IB_MSG_0(v, instance) ddi_put32((instance)->regmap_handle, \
+ (uint32_t *)((uintptr_t)(instance)->regmap + IB_MSG_0_OFF), (v))
+
+#define RD_OB_MSG_0(instance) ddi_get32((instance)->regmap_handle, \
+ (uint32_t *)((uintptr_t)(instance)->regmap + OB_MSG_0_OFF))
+
+#define WR_IB_DOORBELL(v, instance) ddi_put32((instance)->regmap_handle, \
+ (uint32_t *)((uintptr_t)(instance)->regmap + IB_DOORBELL_OFF), (v))
+
+#define RD_IB_DOORBELL(instance) ddi_get32((instance)->regmap_handle, \
+ (uint32_t *)((uintptr_t)(instance)->regmap + IB_DOORBELL_OFF))
+
+#define WR_OB_INTR_STATUS(v, instance) ddi_put32((instance)->regmap_handle, \
+ (uint32_t *)((uintptr_t)(instance)->regmap + OB_INTR_STATUS_OFF), (v))
+
+#define RD_OB_INTR_STATUS(instance) ddi_get32((instance)->regmap_handle, \
+ (uint32_t *)((uintptr_t)(instance)->regmap + OB_INTR_STATUS_OFF))
+
+#define WR_OB_INTR_MASK(v, instance) ddi_put32((instance)->regmap_handle, \
+ (uint32_t *)((uintptr_t)(instance)->regmap + OB_INTR_MASK_OFF), (v))
+
+#define RD_OB_INTR_MASK(instance) ddi_get32((instance)->regmap_handle, \
+ (uint32_t *)((uintptr_t)(instance)->regmap + OB_INTR_MASK_OFF))
+
+#define WR_IB_QPORT(v, instance) ddi_put32((instance)->regmap_handle, \
+ (uint32_t *)((uintptr_t)(instance)->regmap + IB_QPORT_OFF), (v))
+
+#define WR_OB_DOORBELL_CLEAR(v, instance) ddi_put32((instance)->regmap_handle, \
+ (uint32_t *)((uintptr_t)(instance)->regmap + OB_DOORBELL_CLEAR_OFF), \
+ (v))
+
+#define RD_OB_SCRATCH_PAD_0(instance) ddi_get32((instance)->regmap_handle, \
+ (uint32_t *)((uintptr_t)(instance)->regmap + OB_SCRATCH_PAD_0_OFF))
+
+/*
+ * When FW is in MFI_STATE_READY or MFI_STATE_OPERATIONAL, the state data
+ * of Outbound Msg Reg 0 indicates max concurrent cmds supported, max SGEs
+ * supported per cmd and if 64-bit MFAs (M64) is enabled or disabled.
+ */
+#define MFI_OB_INTR_STATUS_MASK 0x00000002
+
+/*
+ * This MFI_REPLY_2108_MESSAGE_INTR flag is used also
+ * in enable_intr_ppc also. Hence bit 2, i.e. 0x4 has
+ * been set in this flag along with bit 1.
+ */
+#define MFI_REPLY_2108_MESSAGE_INTR 0x00000001
+#define MFI_REPLY_2108_MESSAGE_INTR_MASK 0x00000005
+
+#define MFI_POLL_TIMEOUT_SECS 60
+
+#define MFI_ENABLE_INTR(instance) ddi_put32((instance)->regmap_handle, \
+ (uint32_t *)((uintptr_t)(instance)->regmap + OB_INTR_MASK_OFF), 1)
+#define MFI_DISABLE_INTR(instance) \
+{ \
+ uint32_t disable = 1; \
+ uint32_t mask = ddi_get32((instance)->regmap_handle, \
+ (uint32_t *)((uintptr_t)(instance)->regmap + OB_INTR_MASK_OFF));\
+ mask &= ~disable; \
+ ddi_put32((instance)->regmap_handle, (uint32_t *) \
+ (uintptr_t)((instance)->regmap + OB_INTR_MASK_OFF), mask); \
+}
+
+/* By default, the firmware programs for 8 Kbytes of memory */
+#define DEFAULT_MFI_MEM_SZ 8192
+#define MINIMUM_MFI_MEM_SZ 4096
+
+/* DCMD Message Frame MAILBOX0-11 */
+#define DCMD_MBOX_SZ 12
+
+
+struct drsas_register_set {
+ uint32_t reserved_0[4];
+
+ uint32_t inbound_msg_0;
+ uint32_t inbound_msg_1;
+ uint32_t outbound_msg_0;
+ uint32_t outbound_msg_1;
+
+ uint32_t inbound_doorbell;
+ uint32_t inbound_intr_status;
+ uint32_t inbound_intr_mask;
+
+ uint32_t outbound_doorbell;
+ uint32_t outbound_intr_status;
+ uint32_t outbound_intr_mask;
+
+ uint32_t reserved_1[2];
+
+ uint32_t inbound_queue_port;
+ uint32_t outbound_queue_port;
+
+ uint32_t reserved_2[22];
+
+ uint32_t outbound_doorbell_clear;
+
+ uint32_t reserved_3[3];
+
+ uint32_t outbound_scratch_pad;
+
+ uint32_t reserved_4[3];
+
+ uint32_t inbound_low_queue_port;
+
+ uint32_t inbound_high_queue_port;
+
+ uint32_t reserved_5;
+ uint32_t index_registers[820];
+};
+
+struct drsas_sge32 {
+ uint32_t phys_addr;
+ uint32_t length;
+};
+
+struct drsas_sge64 {
+ uint64_t phys_addr;
+ uint32_t length;
+};
+
+union drsas_sgl {
+ struct drsas_sge32 sge32[1];
+ struct drsas_sge64 sge64[1];
+};
+
+struct drsas_header {
+ uint8_t cmd;
+ uint8_t sense_len;
+ uint8_t cmd_status;
+ uint8_t scsi_status;
+
+ uint8_t target_id;
+ uint8_t lun;
+ uint8_t cdb_len;
+ uint8_t sge_count;
+
+ uint32_t context;
+ uint8_t req_id;
+ uint8_t msgvector;
+ uint16_t pad_0;
+
+ uint16_t flags;
+ uint16_t timeout;
+ uint32_t data_xferlen;
+};
+
+union drsas_sgl_frame {
+ struct drsas_sge32 sge32[8];
+ struct drsas_sge64 sge64[5];
+};
+
+struct drsas_init_frame {
+ uint8_t cmd;
+ uint8_t reserved_0;
+ uint8_t cmd_status;
+
+ uint8_t reserved_1;
+ uint32_t reserved_2;
+
+ uint32_t context;
+ uint8_t req_id;
+ uint8_t msgvector;
+ uint16_t pad_0;
+
+ uint16_t flags;
+ uint16_t reserved_3;
+ uint32_t data_xfer_len;
+
+ uint32_t queue_info_new_phys_addr_lo;
+ uint32_t queue_info_new_phys_addr_hi;
+ uint32_t queue_info_old_phys_addr_lo;
+ uint32_t queue_info_old_phys_addr_hi;
+
+ uint32_t reserved_4[6];
+};
+
+struct drsas_init_queue_info {
+ uint32_t init_flags;
+ uint32_t reply_queue_entries;
+
+ uint32_t reply_queue_start_phys_addr_lo;
+ uint32_t reply_queue_start_phys_addr_hi;
+ uint32_t producer_index_phys_addr_lo;
+ uint32_t producer_index_phys_addr_hi;
+ uint32_t consumer_index_phys_addr_lo;
+ uint32_t consumer_index_phys_addr_hi;
+};
+
+struct drsas_io_frame {
+ uint8_t cmd;
+ uint8_t sense_len;
+ uint8_t cmd_status;
+ uint8_t scsi_status;
+
+ uint8_t target_id;
+ uint8_t access_byte;
+ uint8_t reserved_0;
+ uint8_t sge_count;
+
+ uint32_t context;
+ uint8_t req_id;
+ uint8_t msgvector;
+ uint16_t pad_0;
+
+ uint16_t flags;
+ uint16_t timeout;
+ uint32_t lba_count;
+
+ uint32_t sense_buf_phys_addr_lo;
+ uint32_t sense_buf_phys_addr_hi;
+
+ uint32_t start_lba_lo;
+ uint32_t start_lba_hi;
+
+ union drsas_sgl sgl;
+};
+
+struct drsas_pthru_frame {
+ uint8_t cmd;
+ uint8_t sense_len;
+ uint8_t cmd_status;
+ uint8_t scsi_status;
+
+ uint8_t target_id;
+ uint8_t lun;
+ uint8_t cdb_len;
+ uint8_t sge_count;
+
+ uint32_t context;
+ uint8_t req_id;
+ uint8_t msgvector;
+ uint16_t pad_0;
+
+ uint16_t flags;
+ uint16_t timeout;
+ uint32_t data_xfer_len;
+
+ uint32_t sense_buf_phys_addr_lo;
+ uint32_t sense_buf_phys_addr_hi;
+
+ uint8_t cdb[16];
+ union drsas_sgl sgl;
+};
+
+struct drsas_dcmd_frame {
+ uint8_t cmd;
+ uint8_t reserved_0;
+ uint8_t cmd_status;
+ uint8_t reserved_1[4];
+ uint8_t sge_count;
+
+ uint32_t context;
+ uint8_t req_id;
+ uint8_t msgvector;
+ uint16_t pad_0;
+
+ uint16_t flags;
+ uint16_t timeout;
+
+ uint32_t data_xfer_len;
+ uint32_t opcode;
+
+ union {
+ uint8_t b[DCMD_MBOX_SZ];
+ uint16_t s[6];
+ uint32_t w[3];
+ } mbox;
+
+ union drsas_sgl sgl;
+};
+
+struct drsas_abort_frame {
+ uint8_t cmd;
+ uint8_t reserved_0;
+ uint8_t cmd_status;
+
+ uint8_t reserved_1;
+ uint32_t reserved_2;
+
+ uint32_t context;
+ uint8_t req_id;
+ uint8_t msgvector;
+ uint16_t pad_0;
+
+ uint16_t flags;
+ uint16_t reserved_3;
+ uint32_t reserved_4;
+
+ uint32_t abort_context;
+ uint32_t pad_1;
+
+ uint32_t abort_mfi_phys_addr_lo;
+ uint32_t abort_mfi_phys_addr_hi;
+
+ uint32_t reserved_5[6];
+};
+
+struct drsas_smp_frame {
+ uint8_t cmd;
+ uint8_t reserved_1;
+ uint8_t cmd_status;
+ uint8_t connection_status;
+
+ uint8_t reserved_2[3];
+ uint8_t sge_count;
+
+ uint32_t context;
+ uint8_t req_id;
+ uint8_t msgvector;
+ uint16_t pad_0;
+
+ uint16_t flags;
+ uint16_t timeout;
+
+ uint32_t data_xfer_len;
+
+ uint64_t sas_addr;
+
+ union drsas_sgl sgl[2];
+};
+
+struct drsas_stp_frame {
+ uint8_t cmd;
+ uint8_t reserved_1;
+ uint8_t cmd_status;
+ uint8_t connection_status;
+
+ uint8_t target_id;
+ uint8_t reserved_2[2];
+ uint8_t sge_count;
+
+ uint32_t context;
+ uint8_t req_id;
+ uint8_t msgvector;
+ uint16_t pad_0;
+
+ uint16_t flags;
+ uint16_t timeout;
+
+ uint32_t data_xfer_len;
+
+ uint16_t fis[10];
+ uint32_t stp_flags;
+ union drsas_sgl sgl;
+};
+
+union drsas_frame {
+ struct drsas_header hdr;
+ struct drsas_init_frame init;
+ struct drsas_io_frame io;
+ struct drsas_pthru_frame pthru;
+ struct drsas_dcmd_frame dcmd;
+ struct drsas_abort_frame abort;
+ struct drsas_smp_frame smp;
+ struct drsas_stp_frame stp;
+
+ uint8_t raw_bytes[64];
+};
+
+typedef struct drsas_pd_address {
+ uint16_t device_id;
+ uint16_t encl_id;
+
+ union {
+ struct {
+ uint8_t encl_index;
+ uint8_t slot_number;
+ } pd_address;
+ struct {
+ uint8_t encl_position;
+ uint8_t encl_connector_index;
+ } encl_address;
+ }address;
+
+ uint8_t scsi_dev_type;
+
+ union {
+ uint8_t port_bitmap;
+ uint8_t port_numbers;
+ } connected;
+
+ uint64_t sas_addr[2];
+} drsas_pd_address_t;
+
+union drsas_evt_class_locale {
+ struct {
+ uint16_t locale;
+ uint8_t reserved;
+ int8_t class;
+ } members;
+
+ uint32_t word;
+};
+
+struct drsas_evt_log_info {
+ uint32_t newest_seq_num;
+ uint32_t oldest_seq_num;
+ uint32_t clear_seq_num;
+ uint32_t shutdown_seq_num;
+ uint32_t boot_seq_num;
+};
+
+struct drsas_progress {
+ uint16_t progress;
+ uint16_t elapsed_seconds;
+};
+
+struct drsas_evtarg_ld {
+ uint16_t target_id;
+ uint8_t ld_index;
+ uint8_t reserved;
+};
+
+struct drsas_evtarg_pd {
+ uint16_t device_id;
+ uint8_t encl_index;
+ uint8_t slot_number;
+};
+
+struct drsas_evt_detail {
+ uint32_t seq_num;
+ uint32_t time_stamp;
+ uint32_t code;
+ union drsas_evt_class_locale cl;
+ uint8_t arg_type;
+ uint8_t reserved1[15];
+
+ union {
+ struct {
+ struct drsas_evtarg_pd pd;
+ uint8_t cdb_length;
+ uint8_t sense_length;
+ uint8_t reserved[2];
+ uint8_t cdb[16];
+ uint8_t sense[64];
+ } cdbSense;
+
+ struct drsas_evtarg_ld ld;
+
+ struct {
+ struct drsas_evtarg_ld ld;
+ uint64_t count;
+ } ld_count;
+
+ struct {
+ uint64_t lba;
+ struct drsas_evtarg_ld ld;
+ } ld_lba;
+
+ struct {
+ struct drsas_evtarg_ld ld;
+ uint32_t prevOwner;
+ uint32_t newOwner;
+ } ld_owner;
+
+ struct {
+ uint64_t ld_lba;
+ uint64_t pd_lba;
+ struct drsas_evtarg_ld ld;
+ struct drsas_evtarg_pd pd;
+ } ld_lba_pd_lba;
+
+ struct {
+ struct drsas_evtarg_ld ld;
+ struct drsas_progress prog;
+ } ld_prog;
+
+ struct {
+ struct drsas_evtarg_ld ld;
+ uint32_t prev_state;
+ uint32_t new_state;
+ } ld_state;
+
+ struct {
+ uint64_t strip;
+ struct drsas_evtarg_ld ld;
+ } ld_strip;
+
+ struct drsas_evtarg_pd pd;
+
+ struct {
+ struct drsas_evtarg_pd pd;
+ uint32_t err;
+ } pd_err;
+
+ struct {
+ uint64_t lba;
+ struct drsas_evtarg_pd pd;
+ } pd_lba;
+
+ struct {
+ uint64_t lba;
+ struct drsas_evtarg_pd pd;
+ struct drsas_evtarg_ld ld;
+ } pd_lba_ld;
+
+ struct {
+ struct drsas_evtarg_pd pd;
+ struct drsas_progress prog;
+ } pd_prog;
+
+ struct {
+ struct drsas_evtarg_pd pd;
+ uint32_t prevState;
+ uint32_t newState;
+ } pd_state;
+
+ struct {
+ uint16_t vendorId;
+ uint16_t deviceId;
+ uint16_t subVendorId;
+ uint16_t subDeviceId;
+ } pci;
+
+ uint32_t rate;
+ char str[96];
+
+ struct {
+ uint32_t rtc;
+ uint32_t elapsedSeconds;
+ } time;
+
+ struct {
+ uint32_t ecar;
+ uint32_t elog;
+ char str[64];
+ } ecc;
+
+ drsas_pd_address_t pd_addr;
+
+ uint8_t b[96];
+ uint16_t s[48];
+ uint32_t w[24];
+ uint64_t d[12];
+ } args;
+
+ char description[128];
+
+};
+
+/* only 63 are usable by the application */
+#define MAX_LOGICAL_DRIVES 64
+/* only 255 physical devices may be used */
+#define MAX_PHYSICAL_DEVICES 256
+#define MAX_PD_PER_ENCLOSURE 64
+/* maximum disks per array */
+#define MAX_ROW_SIZE 32
+/* maximum spans per logical drive */
+#define MAX_SPAN_DEPTH 8
+/* maximum number of arrays a hot spare may be dedicated to */
+#define MAX_ARRAYS_DEDICATED 16
+/* maximum number of arrays which may exist */
+#define MAX_ARRAYS 128
+/* maximum number of foreign configs that may ha managed at once */
+#define MAX_FOREIGN_CONFIGS 8
+/* maximum spares (global and dedicated combined) */
+#define MAX_SPARES_FOR_THE_CONTROLLER MAX_PHYSICAL_DEVICES
+/* maximum possible Target IDs (i.e. 0 to 63) */
+#define MAX_TARGET_ID 63
+/* maximum number of supported enclosures */
+#define MAX_ENCLOSURES 32
+/* maximum number of PHYs per controller */
+#define MAX_PHYS_PER_CONTROLLER 16
+/* maximum number of LDs per array (due to DDF limitations) */
+#define MAX_LDS_PER_ARRAY 16
+
+/*
+ * -----------------------------------------------------------------------------
+ * -----------------------------------------------------------------------------
+ *
+ * Logical Drive commands
+ *
+ * -----------------------------------------------------------------------------
+ * -----------------------------------------------------------------------------
+ */
+#define DR_DCMD_LD 0x03000000, /* Logical Device (LD) opcodes */
+
+/*
+ * Input: dcmd.opcode - DR_DCMD_LD_GET_LIST
+ * dcmd.mbox - reserved
+ * dcmd.sge IN - ptr to returned DR_LD_LIST structure
+ * Desc: Return the logical drive list structure
+ * Status: No error
+ */
+
+/*
+ * defines the logical drive reference structure
+ */
+typedef union _DR_LD_REF { /* LD reference structure */
+ struct {
+ uint8_t targetId; /* LD target id (0 to MAX_TARGET_ID) */
+ uint8_t reserved; /* reserved for in line with DR_PD_REF */
+ uint16_t seqNum; /* Sequence Number */
+ } ld_ref;
+ uint32_t ref; /* shorthand reference to full 32-bits */
+} DR_LD_REF; /* 4 bytes */
+
+/*
+ * defines the logical drive list structure
+ */
+typedef struct _DR_LD_LIST {
+ uint32_t ldCount; /* number of LDs */
+ uint32_t reserved; /* pad to 8-byte boundary */
+ struct {
+ DR_LD_REF ref; /* LD reference */
+ uint8_t state; /* current LD state (DR_LD_STATE) */
+ uint8_t reserved[3]; /* pad to 8-byte boundary */
+ uint64_t size; /* LD size */
+ } ldList[MAX_LOGICAL_DRIVES];
+} DR_LD_LIST;
+
+struct drsas_drv_ver {
+ uint8_t signature[12];
+ uint8_t os_name[16];
+ uint8_t os_ver[12];
+ uint8_t drv_name[20];
+ uint8_t drv_ver[32];
+ uint8_t drv_rel_date[20];
+};
+
+#define PCI_TYPE0_ADDRESSES 6
+#define PCI_TYPE1_ADDRESSES 2
+#define PCI_TYPE2_ADDRESSES 5
+
+struct drsas_pci_common_header {
+ uint16_t vendorID; /* (ro) */
+ uint16_t deviceID; /* (ro) */
+ uint16_t command; /* Device control */
+ uint16_t status;
+ uint8_t revisionID; /* (ro) */
+ uint8_t progIf; /* (ro) */
+ uint8_t subClass; /* (ro) */
+ uint8_t baseClass; /* (ro) */
+ uint8_t cacheLineSize; /* (ro+) */
+ uint8_t latencyTimer; /* (ro+) */
+ uint8_t headerType; /* (ro) */
+ uint8_t bist; /* Built in self test */
+
+ union {
+ struct {
+ uint32_t baseAddresses[PCI_TYPE0_ADDRESSES];
+ uint32_t cis;
+ uint16_t subVendorID;
+ uint16_t subSystemID;
+ uint32_t romBaseAddress;
+ uint8_t capabilitiesPtr;
+ uint8_t reserved1[3];
+ uint32_t reserved2;
+ uint8_t interruptLine;
+ uint8_t interruptPin; /* (ro) */
+ uint8_t minimumGrant; /* (ro) */
+ uint8_t maximumLatency; /* (ro) */
+ } type_0;
+
+ struct {
+ uint32_t baseAddresses[PCI_TYPE1_ADDRESSES];
+ uint8_t primaryBus;
+ uint8_t secondaryBus;
+ uint8_t subordinateBus;
+ uint8_t secondaryLatency;
+ uint8_t ioBase;
+ uint8_t ioLimit;
+ uint16_t secondaryStatus;
+ uint16_t memoryBase;
+ uint16_t memoryLimit;
+ uint16_t prefetchBase;
+ uint16_t prefetchLimit;
+ uint32_t prefetchBaseUpper32;
+ uint32_t prefetchLimitUpper32;
+ uint16_t ioBaseUpper16;
+ uint16_t ioLimitUpper16;
+ uint8_t capabilitiesPtr;
+ uint8_t reserved1[3];
+ uint32_t romBaseAddress;
+ uint8_t interruptLine;
+ uint8_t interruptPin;
+ uint16_t bridgeControl;
+ } type_1;
+
+ struct {
+ uint32_t socketRegistersBaseAddress;
+ uint8_t capabilitiesPtr;
+ uint8_t reserved;
+ uint16_t secondaryStatus;
+ uint8_t primaryBus;
+ uint8_t secondaryBus;
+ uint8_t subordinateBus;
+ uint8_t secondaryLatency;
+ struct {
+ uint32_t base;
+ uint32_t limit;
+ } range[PCI_TYPE2_ADDRESSES-1];
+ uint8_t interruptLine;
+ uint8_t interruptPin;
+ uint16_t bridgeControl;
+ } type_2;
+ } header;
+};
+
+struct drsas_pci_link_capability {
+ union {
+ struct {
+ uint32_t linkSpeed :4;
+ uint32_t linkWidth :6;
+ uint32_t aspmSupport :2;
+ uint32_t losExitLatency :3;
+ uint32_t l1ExitLatency :3;
+ uint32_t rsvdp :6;
+ uint32_t portNumber :8;
+ } bits;
+
+ uint32_t asUlong;
+ } cap;
+
+};
+
+struct drsas_pci_link_status_capability {
+ union {
+ struct {
+ uint16_t linkSpeed :4;
+ uint16_t negotiatedLinkWidth :6;
+ uint16_t linkTrainingError :1;
+ uint16_t linkTraning :1;
+ uint16_t slotClockConfig :1;
+ uint16_t rsvdZ :3;
+ } bits;
+
+ uint16_t asUshort;
+ } stat_cap;
+
+ uint16_t reserved;
+
+};
+
+struct drsas_pci_capabilities {
+ struct drsas_pci_link_capability linkCapability;
+ struct drsas_pci_link_status_capability linkStatusCapability;
+};
+
+struct drsas_pci_information
+{
+ uint32_t busNumber;
+ uint8_t deviceNumber;
+ uint8_t functionNumber;
+ uint8_t interruptVector;
+ uint8_t reserved;
+ struct drsas_pci_common_header pciHeaderInfo;
+ struct drsas_pci_capabilities capability;
+ uint8_t reserved2[32];
+};
+
+struct drsas_ioctl {
+ uint16_t version;
+ uint16_t controller_id;
+ uint8_t signature[8];
+ uint32_t reserved_1;
+ uint32_t control_code;
+ uint32_t reserved_2[2];
+ uint8_t frame[64];
+ union drsas_sgl_frame sgl_frame;
+ uint8_t sense_buff[DRSAS_MAX_SENSE_LENGTH];
+ uint8_t data[1];
+};
+
+struct drsas_aen {
+ uint16_t host_no;
+ uint16_t cmd_status;
+ uint32_t seq_num;
+ uint32_t class_locale_word;
+};
+#pragma pack()
+
+#ifndef DDI_VENDOR_LSI
+#define DDI_VENDOR_LSI "LSI"
+#endif /* DDI_VENDOR_LSI */
+
+static int drsas_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
+static int drsas_attach(dev_info_t *, ddi_attach_cmd_t);
+static int drsas_reset(dev_info_t *, ddi_reset_cmd_t);
+static int drsas_detach(dev_info_t *, ddi_detach_cmd_t);
+static int drsas_open(dev_t *, int, int, cred_t *);
+static int drsas_close(dev_t, int, int, cred_t *);
+static int drsas_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
+
+static int drsas_tran_tgt_init(dev_info_t *, dev_info_t *,
+ scsi_hba_tran_t *, struct scsi_device *);
+static struct scsi_pkt *drsas_tran_init_pkt(struct scsi_address *, register
+ struct scsi_pkt *, struct buf *, int, int, int, int,
+ int (*)(), caddr_t);
+static int drsas_tran_start(struct scsi_address *,
+ register struct scsi_pkt *);
+static int drsas_tran_abort(struct scsi_address *, struct scsi_pkt *);
+static int drsas_tran_reset(struct scsi_address *, int);
+static int drsas_tran_getcap(struct scsi_address *, char *, int);
+static int drsas_tran_setcap(struct scsi_address *, char *, int, int);
+static void drsas_tran_destroy_pkt(struct scsi_address *,
+ struct scsi_pkt *);
+static void drsas_tran_dmafree(struct scsi_address *, struct scsi_pkt *);
+static void drsas_tran_sync_pkt(struct scsi_address *, struct scsi_pkt *);
+static uint_t drsas_isr();
+static uint_t drsas_softintr();
+
+static int init_mfi(struct drsas_instance *);
+static int drsas_free_dma_obj(struct drsas_instance *, dma_obj_t);
+static int drsas_alloc_dma_obj(struct drsas_instance *, dma_obj_t *,
+ uchar_t);
+static struct drsas_cmd *get_mfi_pkt(struct drsas_instance *);
+static void return_mfi_pkt(struct drsas_instance *,
+ struct drsas_cmd *);
+
+static void free_space_for_mfi(struct drsas_instance *);
+static void free_additional_dma_buffer(struct drsas_instance *);
+static int alloc_additional_dma_buffer(struct drsas_instance *);
+static int read_fw_status_reg_ppc(struct drsas_instance *);
+static void issue_cmd_ppc(struct drsas_cmd *, struct drsas_instance *);
+static int issue_cmd_in_poll_mode_ppc(struct drsas_instance *,
+ struct drsas_cmd *);
+static int issue_cmd_in_sync_mode_ppc(struct drsas_instance *,
+ struct drsas_cmd *);
+static void enable_intr_ppc(struct drsas_instance *);
+static void disable_intr_ppc(struct drsas_instance *);
+static int intr_ack_ppc(struct drsas_instance *);
+static int mfi_state_transition_to_ready(struct drsas_instance *);
+static void destroy_mfi_frame_pool(struct drsas_instance *);
+static int create_mfi_frame_pool(struct drsas_instance *);
+static int drsas_dma_alloc(struct drsas_instance *, struct scsi_pkt *,
+ struct buf *, int, int (*)());
+static int drsas_dma_move(struct drsas_instance *,
+ struct scsi_pkt *, struct buf *);
+static void flush_cache(struct drsas_instance *instance);
+static void display_scsi_inquiry(caddr_t);
+static int start_mfi_aen(struct drsas_instance *instance);
+static int handle_drv_ioctl(struct drsas_instance *instance,
+ struct drsas_ioctl *ioctl, int mode);
+static int handle_mfi_ioctl(struct drsas_instance *instance,
+ struct drsas_ioctl *ioctl, int mode);
+static int handle_mfi_aen(struct drsas_instance *instance,
+ struct drsas_aen *aen);
+static void fill_up_drv_ver(struct drsas_drv_ver *dv);
+static struct drsas_cmd *build_cmd(struct drsas_instance *instance,
+ struct scsi_address *ap, struct scsi_pkt *pkt,
+ uchar_t *cmd_done);
+static int register_mfi_aen(struct drsas_instance *instance,
+ uint32_t seq_num, uint32_t class_locale_word);
+static int issue_mfi_pthru(struct drsas_instance *instance, struct
+ drsas_ioctl *ioctl, struct drsas_cmd *cmd, int mode);
+static int issue_mfi_dcmd(struct drsas_instance *instance, struct
+ drsas_ioctl *ioctl, struct drsas_cmd *cmd, int mode);
+static int issue_mfi_smp(struct drsas_instance *instance, struct
+ drsas_ioctl *ioctl, struct drsas_cmd *cmd, int mode);
+static int issue_mfi_stp(struct drsas_instance *instance, struct
+ drsas_ioctl *ioctl, struct drsas_cmd *cmd, int mode);
+static int abort_aen_cmd(struct drsas_instance *instance,
+ struct drsas_cmd *cmd_to_abort);
+
+static int drsas_common_check(struct drsas_instance *instance,
+ struct drsas_cmd *cmd);
+static void drsas_fm_init(struct drsas_instance *instance);
+static void drsas_fm_fini(struct drsas_instance *instance);
+static int drsas_fm_error_cb(dev_info_t *, ddi_fm_error_t *,
+ const void *);
+static void drsas_fm_ereport(struct drsas_instance *instance,
+ char *detail);
+static int drsas_check_dma_handle(ddi_dma_handle_t handle);
+static int drsas_check_acc_handle(ddi_acc_handle_t handle);
+
+static void drsas_rem_intrs(struct drsas_instance *instance);
+static int drsas_add_intrs(struct drsas_instance *instance, int intr_type);
+
+static void drsas_tran_tgt_free(dev_info_t *, dev_info_t *,
+ scsi_hba_tran_t *, struct scsi_device *);
+static int drsas_tran_bus_config(dev_info_t *, uint_t,
+ ddi_bus_config_op_t, void *, dev_info_t **);
+static int drsas_parse_devname(char *, int *, int *);
+static int drsas_config_all_devices(struct drsas_instance *);
+static int drsas_config_scsi_device(struct drsas_instance *,
+ struct scsi_device *, dev_info_t **);
+static int drsas_config_ld(struct drsas_instance *, uint16_t,
+ uint8_t, dev_info_t **);
+static dev_info_t *drsas_find_child(struct drsas_instance *, uint16_t,
+ uint8_t);
+static int drsas_name_node(dev_info_t *, char *, int);
+static void drsas_issue_evt_taskq(struct drsas_eventinfo *);
+static int drsas_service_evt(struct drsas_instance *, int, int, int,
+ uint64_t);
+static int drsas_mode_sense_build(struct scsi_pkt *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _DR_SAS_H_ */
diff --git a/usr/src/uts/common/io/dr_sas/dr_sas_list.h b/usr/src/uts/common/io/dr_sas/dr_sas_list.h
new file mode 100644
index 0000000000..4154a77796
--- /dev/null
+++ b/usr/src/uts/common/io/dr_sas/dr_sas_list.h
@@ -0,0 +1,212 @@
+/*
+ * dr_sas_list.h: header for dr_sas
+ *
+ * Solaris MegaRAID driver for SAS2.0 controllers
+ * Copyright (c) 2008-2009, LSI Logic Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the author nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _DR_SAS_LIST_H_
+#define _DR_SAS_LIST_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+struct mlist_head {
+ struct mlist_head *next, *prev;
+};
+
+typedef struct mlist_head mlist_t;
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+ struct mlist_head name = LIST_HEAD_INIT(name)
+
+#define INIT_LIST_HEAD(ptr) { \
+ (ptr)->next = (ptr); (ptr)->prev = (ptr); \
+}
+
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static void __list_add(struct mlist_head *new,
+ struct mlist_head *prev,
+ struct mlist_head *next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+
+/*
+ * mlist_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static void mlist_add(struct mlist_head *new, struct mlist_head *head)
+{
+ __list_add(new, head, head->next);
+}
+
+
+/*
+ * mlist_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static void mlist_add_tail(struct mlist_head *new, struct mlist_head *head)
+{
+ __list_add(new, head->prev, head);
+}
+
+
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static void __list_del(struct mlist_head *prev,
+ struct mlist_head *next)
+{
+ next->prev = prev;
+ prev->next = next;
+}
+
+
+/*
+ * mlist_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static void mlist_del_init(struct mlist_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ INIT_LIST_HEAD(entry);
+}
+
+
+/*
+ * mlist_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static int mlist_empty(struct mlist_head *head)
+{
+ return (head->next == head);
+}
+
+
+/*
+ * mlist_splice - join two lists
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static void mlist_splice(struct mlist_head *list, struct mlist_head *head)
+{
+ struct mlist_head *first = list->next;
+
+ if (first != list) {
+ struct mlist_head *last = list->prev;
+ struct mlist_head *at = head->next;
+
+ first->prev = head;
+ head->next = first;
+
+ last->next = at;
+ at->prev = last;
+ }
+}
+
+
+/*
+ * mlist_entry - get the struct for this entry
+ * @ptr: the &struct mlist_head pointer.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_struct within the struct.
+ */
+#define mlist_entry(ptr, type, member) \
+ ((type *)((size_t)(ptr) - offsetof(type, member)))
+
+
+/*
+ * mlist_for_each - iterate over a list
+ * @pos: the &struct mlist_head to use as a loop counter.
+ * @head: the head for your list.
+ */
+#define mlist_for_each(pos, head) \
+ for (pos = (head)->next, prefetch(pos->next); pos != (head); \
+ pos = pos->next, prefetch(pos->next))
+
+
+/*
+ * mlist_for_each_safe - iterate over a list safe against removal of list entry
+ * @pos: the &struct mlist_head to use as a loop counter.
+ * @n: another &struct mlist_head to use as temporary storage
+ * @head: the head for your list.
+ */
+#define mlist_for_each_safe(pos, n, head) \
+ for (pos = (head)->next, n = pos->next; pos != (head); \
+ pos = n, n = pos->next)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _DR_SAS_LIST_H_ */
diff --git a/usr/src/uts/common/io/fibre-channel/impl/fctl.c b/usr/src/uts/common/io/fibre-channel/impl/fctl.c
index 634de6c6dd..87105e779d 100644
--- a/usr/src/uts/common/io/fibre-channel/impl/fctl.c
+++ b/usr/src/uts/common/io/fibre-channel/impl/fctl.c
@@ -24,6 +24,7 @@
*/
/*
* Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved.
+ * Copyright (c) 2015 Joyent, Inc. All rights reserved.
*/
/*
* Fibre channel Transport Library (fctl)
@@ -5500,6 +5501,11 @@ fc_ulp_get_adapter_paths(char *pathList, int count)
maxPorts ++;
}
+ if (maxPorts == 0) {
+ mutex_exit(&fctl_port_lock);
+ return (0);
+ }
+
/* Now allocate a buffer to store all the pointers for comparisons */
portList = kmem_zalloc(sizeof (fc_local_port_t *) * maxPorts, KM_SLEEP);
diff --git a/usr/src/uts/common/io/gsqueue/gsqueue.c b/usr/src/uts/common/io/gsqueue/gsqueue.c
new file mode 100644
index 0000000000..b484b16142
--- /dev/null
+++ b/usr/src/uts/common/io/gsqueue/gsqueue.c
@@ -0,0 +1,612 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Serialization queues are a technique used in illumos to provide what's
+ * commonly known as a 'vertical' perimeter. The idea (described a bit in
+ * uts/common/inet/squeue.c) is to provide a means to make sure that message
+ * blocks (mblk_t) are processed in a specific order. Subsystems like ip and vnd
+ * consume these on different policies, ip on a conn_t basis, vnd on a per
+ * device basis, and use this to ensure that only one packet is being processed
+ * at a given time.
+ *
+ * Serialization queues were originally used by ip. As part of that
+ * implementation, many of the details of ip were baked into it. That includes
+ * things like conn_t, ip receive attributes, and the notion of sets. While an
+ * individual serialization queue, or gsqueue_t, is a useful level of
+ * abstraction, it isn't the basis on which monst consumers want to manage them.
+ * Instead, we have the notion of a set of serialization queues. These sets are
+ * DR (CPU Dynamic reconfiguration) aware, and allow consumers to have a
+ * gsqueue_t per CPU to fanout on without managing them all itself. In the
+ * original implementation, this existed, but they were heavily tied into the
+ * infrastructure of IP, and its notion of polling on the underlying MAC
+ * devices.
+ *
+ * The result of that past is a new interface to serialization queues and a
+ * similar, but slightly different, abstraction to sets of these
+ * (gsqueue_set_t). When designing this there are two different approaches that
+ * one could consider. The first is that the system has one gsqueue_set_t that
+ * the entire world shares, whether IP or some other consumer. The other is that
+ * every consumer has their own set.
+ *
+ * The trade offs between these two failure modes are the pathological failure
+ * modes. There is no guarantee that any two consumers here are equivalent. In
+ * fact, they very likely have very different latency profiles. If they are
+ * being processed in the same queue, that can lead to very odd behaviors. More
+ * generally, if we have a series of processing functions from one consumer
+ * which are generally short, and another which are generally long, that'll
+ * cause undue latency that's harder to observe. If we instead take the approach
+ * that each consumer should have its own set that it fans out over then we
+ * won't end up with the problem that a given serialization queue will have
+ * multiple latency profiles, but instead we'll see cpu contention for the bound
+ * gsqueue_t worker thread. Keep in mind though, that only the gsqueue_t worker
+ * thread is bound and it is in fact possible for it to be processed by other
+ * threads on other CPUs.
+ *
+ * We've opted to go down the second path, so each consumer has its own
+ * independent set of serialization queues that it is bound over.
+ *
+ * Structure Hierarchies
+ * ---------------------
+ *
+ * At the top level, we have a single list of gsqueue_set_t. The gsqueue_set_t
+ * encapsulates all the per-CPU gsqueue_t that exist in the form of
+ * gsqueue_cpu_t. The gsqueue_cpu_t has been designed such that it could
+ * accommodate more than one gsqueue_t, but today there is a one to one mapping.
+ *
+ * We maintain two different lists of gsqueue_cpu_t, the active and defunct
+ * sets. The active set is maintained in the array `gs_cpus`. There are NCPU
+ * entries available in `gs_cpus` with the total number of currently active cpus
+ * described in `gs_ncpus`. The ordering of `gs_cpus` is unimportant. When
+ * there is no longer a need for a given binding (see the following section for
+ * more explanation on when this is the case) then we move the entry to the
+ * `gs_defunct` list which is just a singly linked list of gsqueue_cpu_t.
+ *
+ * In addition, each gsqueue_set_t can have a series of callbacks registered
+ * with it. These are described in the following section. Graphically, a given
+ * gsqueue_set_t looks roughly like the following:
+ *
+ * +---------------+
+ * | gsqueue_set_t |
+ * +---------------+
+ * | | |
+ * | | * . . . gs_cpus
+ * | | |
+ * | | | +-------------------------------------------------+
+ * | | +----->| gsqueue_cpu_t || gsqueue_cpu_t || gsqueue_cpu_t |...
+ * | | +-------------------------------------------------+
+ * | |
+ * | * . . . gs_defunct
+ * | |
+ * | | +---------------+ +---------------+ +---------------+
+ * | +--->| gsqueue_cpu_t |-->| gsqueue_cpu_t |-->| gsqueue_cpu_t |...
+ * | +---------------+ +---------------+ +---------------+
+ * * . . . gs_cbs
+ * |
+ * | +--------------+ +--------------+ +--------------+
+ * +--->| gsqueue_cb_t |-->| gsqueue_cb_t |->| gsqueue_cb_t |...
+ * +--------------+ +--------------+ +--------------+
+ *
+ * CPU DR, gsqueue_t, and gsqueue_t
+ * --------------------------------
+ *
+ * Recall, that every serialization queue (gsqueue_t or squeue_t) has a worker
+ * thread that may end up doing work. As part of supporting fanout, we have one
+ * gsqueue_t per CPU, and its worker thread is bound to that CPU. Because of
+ * this binding, we need to deal with CPU DR changes.
+ *
+ * The gsqueue driver maintains a single CPU DR callback that is used for the
+ * entire sub-system. We break down CPU DR events into three groups. Offline
+ * events, online events, and events we can ignore. When the first group occurs,
+ * we need to go through every gsqueue_t, find the gsqueue_cpu_t that
+ * corresponds to that processor id, and unbind all of its gsqueue_t's. It's
+ * rather important that we only unbind the gsqueue_t's and not actually destroy
+ * them. When this happens, they could very easily have data queued inside of
+ * them and it's unreasonable to just throw out everything in them at this
+ * point. The data remains intact and service continues uinterrupted.
+ *
+ * When we receive an online event, we do the opposite. We try to find a
+ * gsqueue_cpu_t that previously was bound to this CPU (by leaving its gqc_cpuid
+ * field intact) in the defunct list. If we find one, we remove it from the
+ * defunct list and add it to the active list as well as binding the gsqueue_t
+ * to the CPU in question. If we don't find one, then we create a new one.
+ *
+ * To deal with these kinds of situations, we allow a consumer to register
+ * callbacks for the gsqueue_t that they are interested in. These callbacks will
+ * fire whenever we are handling a topology change. The design of the callbacks
+ * is not that the user can take any administrative action during them, but
+ * rather set something for them to do asynchronously. It is illegal to make any
+ * calls into the gsqueue system while you are in a callback.
+ *
+ * Locking
+ * -------
+ *
+ * The lock ordering here is fairly straightforward. Due to our use of CPU
+ * binding and the CPU DR callbacks, we have an additional lock to consider
+ * cpu_lock. Because of that, the following are the rules for locking:
+ *
+ *
+ * o If performing binding operations, you must grab cpu_lock. cpu_lock is
+ * also at the top of the order.
+ *
+ * o cpu_lock > gsqueue_lock > gsqueue_t`gs_lock > squeue_t`sq_lock
+ * If you need to take multiple locks, you must take the greatest
+ * (left-most) one first.
+ */
+
+#include <sys/types.h>
+#include <sys/conf.h>
+#include <sys/stat.h>
+#include <sys/kmem.h>
+#include <sys/stream.h>
+#include <sys/modctl.h>
+#include <sys/cpuvar.h>
+#include <sys/list.h>
+#include <sys/sysmacros.h>
+
+#include <sys/gsqueue.h>
+#include <sys/squeue_impl.h>
+
+typedef struct gsqueue_cb {
+ struct gsqueue_cb *gcb_next;
+ gsqueue_cb_f gcb_func;
+ void *gcb_arg;
+} gsqueue_cb_t;
+
+typedef struct gsqueue_cpu {
+ struct gsqueue_cpu *gqc_next;
+ squeue_t *gqc_head;
+ processorid_t gqc_cpuid;
+} gsqueue_cpu_t;
+
+struct gsqueue_set {
+ list_node_t gs_next;
+ uint_t gs_wwait;
+ pri_t gs_wpri;
+ kmutex_t gs_lock;
+ int gs_ncpus;
+ gsqueue_cpu_t **gs_cpus;
+ gsqueue_cpu_t *gs_defunct;
+ gsqueue_cb_t *gs_cbs;
+};
+
+static kmutex_t gsqueue_lock;
+static list_t gsqueue_list;
+static kmem_cache_t *gsqueue_cb_cache;
+static kmem_cache_t *gsqueue_cpu_cache;
+static kmem_cache_t *gsqueue_set_cache;
+
+static gsqueue_cpu_t *
+gsqueue_cpu_create(uint_t wwait, pri_t wpri, processorid_t cpuid)
+{
+ gsqueue_cpu_t *scp;
+
+ scp = kmem_cache_alloc(gsqueue_cpu_cache, KM_SLEEP);
+
+ scp->gqc_next = NULL;
+ scp->gqc_cpuid = cpuid;
+ scp->gqc_head = squeue_create(wwait, wpri, B_FALSE);
+ scp->gqc_head->sq_state = SQS_DEFAULT;
+ squeue_bind(scp->gqc_head, cpuid);
+
+ return (scp);
+}
+
+static void
+gsqueue_cpu_destroy(gsqueue_cpu_t *scp)
+{
+ squeue_destroy(scp->gqc_head);
+ kmem_cache_free(gsqueue_cpu_cache, scp);
+}
+
+gsqueue_set_t *
+gsqueue_set_create(uint_t wwait, pri_t wpri)
+{
+ int i;
+ gsqueue_set_t *gssp;
+
+ gssp = kmem_cache_alloc(gsqueue_set_cache, KM_SLEEP);
+ gssp->gs_wwait = wwait;
+ gssp->gs_wpri = wpri;
+ gssp->gs_ncpus = 0;
+
+ /*
+ * We're grabbing CPU lock. Once we let go of it we have to ensure all
+ * set up of the gsqueue_set_t is complete, as it'll be in there for the
+ * various CPU DR bits.
+ */
+ mutex_enter(&cpu_lock);
+
+ for (i = 0; i < NCPU; i++) {
+ gsqueue_cpu_t *scp;
+ cpu_t *cp = cpu_get(i);
+ if (cp != NULL && CPU_ACTIVE(cp) &&
+ cp->cpu_flags & CPU_EXISTS) {
+ scp = gsqueue_cpu_create(wwait, wpri, cp->cpu_id);
+ gssp->gs_cpus[gssp->gs_ncpus] = scp;
+ gssp->gs_ncpus++;
+ }
+ }
+
+ /* Finally we can add it to our global list and be done */
+ mutex_enter(&gsqueue_lock);
+ list_insert_tail(&gsqueue_list, gssp);
+ mutex_exit(&gsqueue_lock);
+ mutex_exit(&cpu_lock);
+
+ return (gssp);
+}
+
+void
+gsqueue_set_destroy(gsqueue_set_t *gssp)
+{
+ int i;
+ gsqueue_cpu_t *scp;
+
+ /*
+ * Go through and unbind all of the squeues while cpu_lock is held and
+ * move them to the defunct list. Once that's done, we don't need to do
+ * anything else with cpu_lock.
+ */
+ mutex_enter(&cpu_lock);
+ mutex_enter(&gsqueue_lock);
+ list_remove(&gsqueue_list, gssp);
+ mutex_exit(&gsqueue_lock);
+
+ mutex_enter(&gssp->gs_lock);
+
+ for (i = 0; i < gssp->gs_ncpus; i++) {
+ scp = gssp->gs_cpus[i];
+ squeue_unbind(scp->gqc_head);
+ scp->gqc_next = gssp->gs_defunct;
+ gssp->gs_defunct = scp;
+ gssp->gs_cpus[i] = NULL;
+ }
+ gssp->gs_ncpus = 0;
+
+ mutex_exit(&gssp->gs_lock);
+ mutex_exit(&cpu_lock);
+
+ while (gssp->gs_defunct != NULL) {
+ gsqueue_cpu_t *scp;
+
+ scp = gssp->gs_defunct;
+ gssp->gs_defunct = scp->gqc_next;
+ gsqueue_cpu_destroy(scp);
+ }
+
+ while (gssp->gs_cbs != NULL) {
+ gsqueue_cb_t *cbp;
+
+ cbp = gssp->gs_cbs;
+ gssp->gs_cbs = cbp->gcb_next;
+ kmem_cache_free(gsqueue_cb_cache, cbp);
+ }
+
+ ASSERT(gssp->gs_ncpus == 0);
+ ASSERT(gssp->gs_defunct == NULL);
+ ASSERT(gssp->gs_cbs == NULL);
+ kmem_cache_free(gsqueue_set_cache, gssp);
+}
+
+gsqueue_t *
+gsqueue_set_get(gsqueue_set_t *gssp, uint_t index)
+{
+ squeue_t *sqp;
+ gsqueue_cpu_t *scp;
+
+ mutex_enter(&gssp->gs_lock);
+ scp = gssp->gs_cpus[index % gssp->gs_ncpus];
+ sqp = scp->gqc_head;
+ mutex_exit(&gssp->gs_lock);
+ return ((gsqueue_t *)sqp);
+}
+
+uintptr_t
+gsqueue_set_cb_add(gsqueue_set_t *gssp, gsqueue_cb_f cb, void *arg)
+{
+ gsqueue_cb_t *cbp;
+
+ cbp = kmem_cache_alloc(gsqueue_cb_cache, KM_SLEEP);
+ cbp->gcb_func = cb;
+ cbp->gcb_arg = arg;
+
+ mutex_enter(&gssp->gs_lock);
+ cbp->gcb_next = gssp->gs_cbs;
+ gssp->gs_cbs = cbp;
+ mutex_exit(&gssp->gs_lock);
+ return ((uintptr_t)cbp);
+}
+
+int
+gsqueue_set_cb_remove(gsqueue_set_t *gssp, uintptr_t id)
+{
+ gsqueue_cb_t *cbp, *prev;
+ mutex_enter(&gssp->gs_lock);
+ cbp = gssp->gs_cbs;
+ prev = NULL;
+ while (cbp != NULL) {
+ if ((uintptr_t)cbp != id) {
+ prev = cbp;
+ cbp = cbp->gcb_next;
+ continue;
+ }
+
+ if (prev == NULL) {
+ gssp->gs_cbs = cbp->gcb_next;
+ } else {
+ prev->gcb_next = cbp->gcb_next;
+ }
+
+ mutex_exit(&gssp->gs_lock);
+ kmem_cache_free(gsqueue_cb_cache, cbp);
+ return (0);
+ }
+ mutex_exit(&gssp->gs_lock);
+ return (-1);
+}
+
+void
+gsqueue_enter_one(gsqueue_t *gsp, mblk_t *mp, gsqueue_proc_f func, void *arg,
+ int flags, uint8_t tag)
+{
+ squeue_t *sqp = (squeue_t *)gsp;
+
+ ASSERT(mp->b_next == NULL);
+ ASSERT(mp->b_prev == NULL);
+ mp->b_queue = (queue_t *)func;
+ mp->b_prev = arg;
+ sqp->sq_enter(sqp, mp, mp, 1, NULL, flags, tag);
+}
+
+static void
+gsqueue_notify(gsqueue_set_t *gssp, squeue_t *sqp, boolean_t online)
+{
+ gsqueue_cb_t *cbp;
+
+ ASSERT(MUTEX_HELD(&gssp->gs_lock));
+ cbp = gssp->gs_cbs;
+ while (cbp != NULL) {
+ cbp->gcb_func(gssp, (gsqueue_t *)sqp, cbp->gcb_arg, online);
+ cbp = cbp->gcb_next;
+ }
+
+}
+
+/*
+ * When we online a processor we need to go through and either bind a defunct
+ * squeue or create a new one. We'll try to reuse a gsqueue_cpu_t from the
+ * defunct list that used to be on that processor. If no such gsqueue_cpu_t
+ * exists, then we'll create a new one. We'd rather avoid taking over an
+ * existing defunct one that used to be on another CPU, as its not unreasonable
+ * to believe that its CPU will come back. More CPUs are offlined and onlined by
+ * the administrator or by creating cpu sets than actually get offlined by FMA.
+ */
+static void
+gsqueue_handle_online(processorid_t id)
+{
+ gsqueue_set_t *gssp;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+ mutex_enter(&gsqueue_lock);
+ for (gssp = list_head(&gsqueue_list); gssp != NULL;
+ gssp = list_next(&gsqueue_list, gssp)) {
+ gsqueue_cpu_t *scp;
+
+ mutex_enter(&gssp->gs_lock);
+ scp = gssp->gs_defunct;
+ while (scp != NULL) {
+ if (scp->gqc_cpuid == id)
+ break;
+ scp = scp->gqc_next;
+ }
+
+ if (scp == NULL) {
+ scp = gsqueue_cpu_create(gssp->gs_wwait,
+ gssp->gs_wpri, id);
+ } else {
+ squeue_bind(scp->gqc_head, id);
+ }
+ ASSERT(gssp->gs_ncpus < NCPU);
+ gssp->gs_cpus[gssp->gs_ncpus] = scp;
+ gssp->gs_ncpus++;
+ gsqueue_notify(gssp, scp->gqc_head, B_TRUE);
+ mutex_exit(&gssp->gs_lock);
+ }
+ mutex_exit(&gsqueue_lock);
+}
+
+static void
+gsqueue_handle_offline(processorid_t id)
+{
+ gsqueue_set_t *gssp;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+ mutex_enter(&gsqueue_lock);
+ for (gssp = list_head(&gsqueue_list); gssp != NULL;
+ gssp = list_next(&gsqueue_list, gssp)) {
+ int i;
+ gsqueue_cpu_t *scp = NULL;
+
+ mutex_enter(&gssp->gs_lock);
+ for (i = 0; i < gssp->gs_ncpus; i++) {
+ if (gssp->gs_cpus[i]->gqc_cpuid == id) {
+ scp = gssp->gs_cpus[i];
+ break;
+ }
+ }
+
+ if (scp != NULL) {
+ squeue_unbind(scp->gqc_head);
+ scp->gqc_next = gssp->gs_defunct;
+ gssp->gs_defunct = scp;
+ gssp->gs_cpus[i] = gssp->gs_cpus[gssp->gs_ncpus-1];
+ gssp->gs_ncpus--;
+ gsqueue_notify(gssp, scp->gqc_head, B_FALSE);
+ }
+ mutex_exit(&gssp->gs_lock);
+ }
+ mutex_exit(&gsqueue_lock);
+}
+
+/* ARGSUSED */
+static int
+gsqueue_cpu_setup(cpu_setup_t what, int id, void *unused)
+{
+ cpu_t *cp;
+
+ ASSERT(MUTEX_HELD(&cpu_lock));
+ cp = cpu_get(id);
+ switch (what) {
+ case CPU_CONFIG:
+ case CPU_ON:
+ case CPU_INIT:
+ case CPU_CPUPART_IN:
+ if (cp != NULL && CPU_ACTIVE(cp) && cp->cpu_flags & CPU_EXISTS)
+ gsqueue_handle_online(cp->cpu_id);
+ break;
+ case CPU_UNCONFIG:
+ case CPU_OFF:
+ case CPU_CPUPART_OUT:
+ gsqueue_handle_offline(cp->cpu_id);
+ break;
+ default:
+ break;
+ }
+
+ return (0);
+}
+
+
+/* ARGSUSED */
+static int
+gsqueue_set_cache_construct(void *buf, void *arg, int kmflags)
+{
+ gsqueue_set_t *gssp = buf;
+
+ gssp->gs_cpus = kmem_alloc(sizeof (gsqueue_cpu_t *) * NCPU, kmflags);
+ if (gssp->gs_cpus == NULL)
+ return (-1);
+
+ mutex_init(&gssp->gs_lock, NULL, MUTEX_DRIVER, NULL);
+ gssp->gs_ncpus = 0;
+ gssp->gs_defunct = NULL;
+ gssp->gs_cbs = NULL;
+
+ return (0);
+}
+
+static void
+gsqueue_set_cache_destruct(void *buf, void *arg)
+{
+ gsqueue_set_t *gssp = buf;
+
+ kmem_free(gssp->gs_cpus, sizeof (gsqueue_cpu_t *) * NCPU);
+ gssp->gs_cpus = NULL;
+ mutex_destroy(&gssp->gs_lock);
+}
+
+static void
+gsqueue_ddiinit(void)
+{
+ list_create(&gsqueue_list, sizeof (gsqueue_set_t),
+ offsetof(gsqueue_set_t, gs_next));
+ mutex_init(&gsqueue_lock, NULL, MUTEX_DRIVER, NULL);
+
+ gsqueue_cb_cache = kmem_cache_create("gsqueue_cb_cache",
+ sizeof (gsqueue_cb_t),
+ 0, NULL, NULL, NULL, NULL, NULL, 0);
+ gsqueue_cpu_cache = kmem_cache_create("gsqueue_cpu_cache",
+ sizeof (gsqueue_cpu_t),
+ 0, NULL, NULL, NULL, NULL, NULL, 0);
+ gsqueue_set_cache = kmem_cache_create("squeue_set_cache",
+ sizeof (gsqueue_set_t),
+ 0, gsqueue_set_cache_construct, gsqueue_set_cache_destruct,
+ NULL, NULL, NULL, 0);
+
+
+ mutex_enter(&cpu_lock);
+ register_cpu_setup_func(gsqueue_cpu_setup, NULL);
+ mutex_exit(&cpu_lock);
+}
+
+static int
+gsqueue_ddifini(void)
+{
+ mutex_enter(&gsqueue_lock);
+ if (list_is_empty(&gsqueue_list) == 0) {
+ mutex_exit(&gsqueue_lock);
+ return (EBUSY);
+ }
+ list_destroy(&gsqueue_list);
+ mutex_exit(&gsqueue_lock);
+
+ mutex_enter(&cpu_lock);
+ register_cpu_setup_func(gsqueue_cpu_setup, NULL);
+ mutex_exit(&cpu_lock);
+
+ kmem_cache_destroy(gsqueue_set_cache);
+ kmem_cache_destroy(gsqueue_cpu_cache);
+ kmem_cache_destroy(gsqueue_cb_cache);
+
+ mutex_destroy(&gsqueue_lock);
+
+ return (0);
+}
+
+static struct modlmisc gsqueue_modmisc = {
+ &mod_miscops,
+ "gsqueue"
+};
+
+static struct modlinkage gsqueue_modlinkage = {
+ MODREV_1,
+ &gsqueue_modmisc,
+ NULL
+};
+
+int
+_init(void)
+{
+ int ret;
+
+ gsqueue_ddiinit();
+ if ((ret = mod_install(&gsqueue_modlinkage)) != 0) {
+ VERIFY(gsqueue_ddifini() == 0);
+ return (ret);
+ }
+
+ return (ret);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&gsqueue_modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int ret;
+
+ if ((ret = gsqueue_ddifini()) != 0)
+ return (ret);
+
+ if ((ret = mod_remove(&gsqueue_modlinkage)) != 0)
+ return (ret);
+
+ return (0);
+}
diff --git a/usr/src/uts/common/io/i40e/core/README b/usr/src/uts/common/io/i40e/core/README
new file mode 100644
index 0000000000..dc0149ce62
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/core/README
@@ -0,0 +1,410 @@
+ ixl FreeBSD* Base Driver and ixlv VF Driver for the
+ Intel XL710 Ethernet Controller Family
+
+/*$FreeBSD$*/
+================================================================
+
+August 26, 2014
+
+
+Contents
+========
+
+- Overview
+- Supported Adapters
+- The VF Driver
+- Building and Installation
+- Additional Configurations
+- Known Limitations
+
+
+Overview
+========
+
+This file describes the IXL FreeBSD* Base driver and the IXLV VF Driver
+for the XL710 Ethernet Family of Adapters. The Driver has been developed
+for use with FreeBSD 10.0 or later, but should be compatible with any
+supported release.
+
+For questions related to hardware requirements, refer to the documentation
+supplied with your Intel XL710 adapter. All hardware requirements listed
+apply for use with FreeBSD.
+
+
+Supported Adapters
+==================
+
+The drivers in this release are compatible with XL710 and X710-based
+Intel Ethernet Network Connections.
+
+
+SFP+ Devices with Pluggable Optics
+----------------------------------
+
+SR Modules
+----------
+ Intel DUAL RATE 1G/10G SFP+ SR (bailed) FTLX8571D3BCV-IT
+ Intel DUAL RATE 1G/10G SFP+ SR (bailed) AFBR-703SDZ-IN2
+
+LR Modules
+----------
+ Intel DUAL RATE 1G/10G SFP+ LR (bailed) FTLX1471D3BCV-IT
+ Intel DUAL RATE 1G/10G SFP+ LR (bailed) AFCT-701SDZ-IN2
+
+QSFP+ Modules
+-------------
+ Intel TRIPLE RATE 1G/10G/40G QSFP+ SR (bailed) E40GQSFPSR
+ Intel TRIPLE RATE 1G/10G/40G QSFP+ LR (bailed) E40GQSFPLR
+ QSFP+ 1G speed is not supported on XL710 based devices.
+
+X710/XL710 Based SFP+ adapters support all passive and active limiting direct
+attach cables that comply with SFF-8431 v4.1 and SFF-8472 v10.4 specifications.
+
+The VF Driver
+==================
+The VF driver is normally used in a virtualized environment where a host
+driver manages SRIOV, and provides a VF device to the guest. With this
+first release the only host environment tested was using Linux QEMU/KVM.
+Support is planned for Xen and VMWare hosts at a later time.
+
+In the FreeBSD guest the IXLV driver would be loaded and will function
+using the VF device assigned to it.
+
+The VF driver provides most of the same functionality as the CORE driver,
+but is actually a slave to the Host, access to many controls are actually
+accomplished by a request to the Host via what is called the "Admin queue".
+These are startup and initialization events however, once in operation
+the device is self-contained and should achieve near native performance.
+
+Some notable limitations of the VF environment: for security reasons
+the driver is never permitted to be promiscuous, therefore a tcpdump
+will not behave the same with the interface. Second, media info is not
+available from the PF, so it will always appear as auto.
+
+Tarball Building and Installation
+=========================
+
+NOTE: You must have kernel sources installed to compile the driver tarball.
+
+These instructions assume a standalone driver tarball, building the driver
+already in the kernel source is simply a matter of adding the device entry
+to the kernel config file, or building in the ixl or ixlv module directory.
+
+In the instructions below, x.x.x is the driver version
+as indicated in the name of the driver tarball. The example is
+for ixl, the same procedure applies for ixlv.
+
+1. Move the base driver tar file to the directory of your choice.
+ For example, use /home/username/ixl or /usr/local/src/ixl.
+
+2. Untar/unzip the archive:
+ tar xfz ixl-x.x.x.tar.gz
+
+3. To install man page:
+ cd ixl-x.x.x
+ gzip -c ixl.4 > /usr/share/man/man4/ixl.4.gz
+
+4. To load the driver onto a running system:
+ cd ixl-x.x.x/src
+ make load
+
+5. To assign an IP address to the interface, enter the following:
+ ifconfig ixl<interface_num> <IP_address>
+
+6. Verify that the interface works. Enter the following, where <IP_address>
+ is the IP address for another machine on the same subnet as the interface
+ that is being tested:
+
+ ping <IP_address>
+
+7. If you want the driver to load automatically when the system is booted:
+
+ cd ixl-x.x.x/src
+ make
+ make install
+
+ Edit /boot/loader.conf, and add the following line:
+ if_ixl_load="YES"
+
+ Edit /etc/rc.conf, and create the appropriate
+ ifconfig_ixl<interface_num> entry:
+
+ ifconfig_ixl<interface_num>="<ifconfig_settings>"
+
+ Example usage:
+
+ ifconfig_ixl0="inet 192.168.10.1 netmask 255.255.255.0"
+
+ NOTE: For assistance, see the ifconfig man page.
+
+
+
+Configuration and Tuning
+=========================
+
+Both drivers supports Transmit/Receive Checksum Offload for IPv4 and IPv6,
+TSO forIPv4 and IPv6, LRO, and Jumbo Frames on all 40 Gigabit adapters.
+
+ Jumbo Frames
+ ------------
+ To enable Jumbo Frames, use the ifconfig utility to increase
+ the MTU beyond 1500 bytes.
+
+ - The Jumbo Frames setting on the switch must be set to at least
+ 22 byteslarger than that of the adapter.
+
+ - The maximum MTU setting for Jumbo Frames is 9706. This value
+ coincides with the maximum jumbo frames size of 9728.
+ To modify the setting, enter the following:
+
+ ifconfig ixl<interface_num> <hostname or IP address> mtu 9000
+
+ - To confirm an interface's MTU value, use the ifconfig command.
+ To confirm the MTU used between two specific devices, use:
+
+ route get <destination_IP_address>
+
+ VLANs
+ -----
+ To create a new VLAN pseudo-interface:
+
+ ifconfig <vlan_name> create
+
+ To associate the VLAN pseudo-interface with a physical interface
+ and assign a VLAN ID, IP address, and netmask:
+
+ ifconfig <vlan_name> <ip_address> netmask <subnet_mask> vlan
+ <vlan_id> vlandev <physical_interface>
+
+ Example:
+
+ ifconfig vlan10 10.0.0.1 netmask 255.255.255.0 vlan 10 vlandev ixl0
+
+ In this example, all packets will be marked on egress with
+ 802.1Q VLAN tags, specifying a VLAN ID of 10.
+
+ To remove a VLAN pseudo-interface:
+
+ ifconfig <vlan_name> destroy
+
+
+ Checksum Offload
+ ----------------
+
+ Checksum offloading supports IPv4 and IPv6 with TCP and UDP packets
+ and is supported for both transmit and receive. Checksum offloading
+ for transmit and recieve is enabled by default for both IPv4 and IPv6.
+
+ Checksum offloading can be enabled or disabled using ifconfig.
+ Transmit and receive offloading for IPv4 and Ipv6 are enabled
+ and disabled seperately.
+
+ NOTE: TSO requires Tx checksum, so when Tx checksum
+ is disabled, TSO will also be disabled.
+
+ To enable Tx checksum offloading for ipv4:
+
+ ifconfig ixl<interface_num> txcsum4
+
+ To disable Tx checksum offloading for ipv4:
+
+ ifconfig ixl<interface_num> -txcsum4
+ (NOTE: This will disable TSO4)
+
+ To enable Rx checksum offloading for ipv6:
+
+ ifconfig ixl<interface_num> rxcsum6
+
+ To disable Rx checksum offloading for ipv6:
+
+ ifconfig ixl<interface_num> -rxcsum6
+ (NOTE: This will disable TSO6)
+
+
+ To confirm the current settings:
+
+ ifconfig ixl<interface_num>
+
+
+ TSO
+ ---
+
+ TSO supports both IPv4 and IPv6 and is enabled by default. TSO can
+ be disabled and enabled using the ifconfig utility.
+
+ NOTE: TSO requires Tx checksum, so when Tx checksum is
+ disabled, TSO will also be disabled.
+
+ To disable TSO IPv4:
+
+ ifconfig ixl<interface_num> -tso4
+
+ To enable TSO IPv4:
+
+ ifconfig ixl<interface_num> tso4
+
+ To disable TSO IPv6:
+
+ ifconfig ixl<interface_num> -tso6
+
+ To enable TSO IPv6:
+
+ ifconfig ixl<interface_num> tso6
+
+ To disable BOTH TSO IPv4 and IPv6:
+
+ ifconfig ixl<interface_num> -tso
+
+ To enable BOTH TSO IPv4 and IPv6:
+
+ ifconfig ixl<interface_num> tso
+
+
+ LRO
+ ---
+
+ Large Receive Offload is enabled by default. It can be enabled
+ or disabled by using the ifconfig utility.
+
+ NOTE: LRO should be disabled when forwarding packets.
+
+ To disable LRO:
+
+ ifconfig ixl<interface_num> -lro
+
+ To enable LRO:
+
+ ifconfig ixl<interface_num> lro
+
+
+Flow Control (IXL only)
+------------
+Flow control is disabled by default. To change flow control settings use sysctl.
+
+To enable flow control to Rx pause frames:
+
+ sysctl dev.ixl.<interface_num>.fc=1
+
+To enable flow control to Tx pause frames:
+
+ sysctl dev.ixl.<interface_num>.fc=2
+
+To enable flow control to Rx and Tx pause frames:
+
+ sysctl dev.ixl.<interface_num>.fc=3
+
+To disable flow control:
+
+ sysctl dev.ixl.<interface_num>.fc=0
+
+
+NOTE: You must have a flow control capable link partner.
+
+NOTE: The VF driver does not have access to flow control, it must be
+ managed from the host side.
+
+
+ Important system configuration changes:
+ =======================================
+
+-Change the file /etc/sysctl.conf, and add the line:
+
+ hw.intr_storm_threshold: 0 (the default is 1000)
+
+-Best throughput results are seen with a large MTU; use 9706 if possible.
+
+-The default number of descriptors per ring is 1024, increasing this may
+improve performance depending on the use case.
+
+-The VF driver uses a relatively large buf ring, this was found to eliminate
+ UDP transmit errors, it is a tuneable, and if no UDP traffic is used it can
+ be reduced. It is memory used per queue.
+
+
+Known Limitations
+=================
+
+Network Memory Buffer allocation
+--------------------------------
+ FreeBSD may have a low number of network memory buffers (mbufs) by default.
+If your mbuf value is too low, it may cause the driver to fail to initialize
+and/or cause the system to become unresponsive. You can check to see if the
+system is mbuf-starved by running 'netstat -m'. Increase the number of mbufs
+by editing the lines below in /etc/sysctl.conf:
+
+ kern.ipc.nmbclusters
+ kern.ipc.nmbjumbop
+ kern.ipc.nmbjumbo9
+ kern.ipc.nmbjumbo16
+ kern.ipc.nmbufs
+
+The amount of memory that you allocate is system specific, and may
+require some trial and error.
+
+Also, increasing the follwing in /etc/sysctl.conf could help increase
+network performance:
+
+ kern.ipc.maxsockbuf
+ net.inet.tcp.sendspace
+ net.inet.tcp.recvspace
+ net.inet.udp.maxdgram
+ net.inet.udp.recvspace
+
+
+UDP Stress Test Dropped Packet Issue
+------------------------------------
+Under small packet UDP stress test with the ixl driver, the FreeBSD system
+may drop UDP packets due to the fullness of socket buffers. You may want to
+change the driver's Flow Control variables to the minimum value for
+controlling packet reception.
+
+
+Disable LRO when routing/bridging
+---------------------------------
+LRO must be turned off when forwarding traffic.
+
+
+Lower than expected performance
+-------------------------------
+Some PCIe x8 slots are actually configured as x4 slots. These slots have
+insufficient bandwidth for full line rate with dual port and quad port
+devices.
+
+In addition, if you put a PCIe Generation 3-capable adapter into a PCIe
+Generation 2 slot, you cannot get full bandwidth. The driver detects this
+situation and writes the following message in the system log:
+
+ "PCI-Express bandwidth available for this card is not sufficient for
+ optimal performance. For optimal performance a x8 PCI-Express slot
+ is required."
+
+If this error occurs, moving your adapter to a true PCIe Generation 3 x8
+slot will resolve the issue.
+
+
+Support
+=======
+
+For general information and support, go to the Intel support website at:
+
+ http://support.intel.com
+
+If an issue is identified with the released source code on the supported kernel
+with a supported adapter, email the specific information related to the issue
+to freebsdnic@mailbox.intel.com.
+
+
+License
+=======
+
+This software program is released under the terms of a license agreement
+between you ('Licensee') and Intel. Do not use or load this software or any
+associated materials (collectively, the 'Software') until you have carefully
+read the full terms and conditions of the LICENSE located in this software
+package. By loadingor using the Software, you agree to the terms of this
+Agreement. If you do not agree with the terms of this Agreement, do not
+install or use the Software.
+
+* Other names and brands may be claimed as the property of others.
+
+
diff --git a/usr/src/uts/common/io/i40e/core/THIRDPARTYLICENSE b/usr/src/uts/common/io/i40e/core/THIRDPARTYLICENSE
new file mode 100644
index 0000000000..04c551f1b2
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/core/THIRDPARTYLICENSE
@@ -0,0 +1,29 @@
+ Copyright (c) 2013-2015, Intel Corporation
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/usr/src/uts/common/io/i40e/core/THIRDPARTYLICENSE.descrip b/usr/src/uts/common/io/i40e/core/THIRDPARTYLICENSE.descrip
new file mode 100644
index 0000000000..7a9537b10e
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/core/THIRDPARTYLICENSE.descrip
@@ -0,0 +1 @@
+i40e DRIVER
diff --git a/usr/src/uts/common/io/i40e/core/i40e_adminq.c b/usr/src/uts/common/io/i40e/core/i40e_adminq.c
new file mode 100644
index 0000000000..67b72fd9f2
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/core/i40e_adminq.c
@@ -0,0 +1,1101 @@
+/******************************************************************************
+
+ Copyright (c) 2013-2015, Intel Corporation
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+
+******************************************************************************/
+/*$FreeBSD: head/sys/dev/ixl/i40e_adminq.c 284049 2015-06-05 22:52:42Z jfv $*/
+
+#include "i40e_status.h"
+#include "i40e_type.h"
+#include "i40e_register.h"
+#include "i40e_adminq.h"
+#include "i40e_prototype.h"
+
+/**
+ * i40e_is_nvm_update_op - return TRUE if this is an NVM update operation
+ * @desc: API request descriptor
+ **/
+static INLINE bool i40e_is_nvm_update_op(struct i40e_aq_desc *desc)
+{
+ return (desc->opcode == CPU_TO_LE16(i40e_aqc_opc_nvm_erase) ||
+ desc->opcode == CPU_TO_LE16(i40e_aqc_opc_nvm_update));
+}
+
+/**
+ * i40e_adminq_init_regs - Initialize AdminQ registers
+ * @hw: pointer to the hardware structure
+ *
+ * This assumes the alloc_asq and alloc_arq functions have already been called
+ **/
+static void i40e_adminq_init_regs(struct i40e_hw *hw)
+{
+ /* set head and tail registers in our local struct */
+ if (i40e_is_vf(hw)) {
+ hw->aq.asq.tail = I40E_VF_ATQT1;
+ hw->aq.asq.head = I40E_VF_ATQH1;
+ hw->aq.asq.len = I40E_VF_ATQLEN1;
+ hw->aq.asq.bal = I40E_VF_ATQBAL1;
+ hw->aq.asq.bah = I40E_VF_ATQBAH1;
+ hw->aq.arq.tail = I40E_VF_ARQT1;
+ hw->aq.arq.head = I40E_VF_ARQH1;
+ hw->aq.arq.len = I40E_VF_ARQLEN1;
+ hw->aq.arq.bal = I40E_VF_ARQBAL1;
+ hw->aq.arq.bah = I40E_VF_ARQBAH1;
+ } else {
+ hw->aq.asq.tail = I40E_PF_ATQT;
+ hw->aq.asq.head = I40E_PF_ATQH;
+ hw->aq.asq.len = I40E_PF_ATQLEN;
+ hw->aq.asq.bal = I40E_PF_ATQBAL;
+ hw->aq.asq.bah = I40E_PF_ATQBAH;
+ hw->aq.arq.tail = I40E_PF_ARQT;
+ hw->aq.arq.head = I40E_PF_ARQH;
+ hw->aq.arq.len = I40E_PF_ARQLEN;
+ hw->aq.arq.bal = I40E_PF_ARQBAL;
+ hw->aq.arq.bah = I40E_PF_ARQBAH;
+ }
+}
+
+/**
+ * i40e_alloc_adminq_asq_ring - Allocate Admin Queue send rings
+ * @hw: pointer to the hardware structure
+ **/
+enum i40e_status_code i40e_alloc_adminq_asq_ring(struct i40e_hw *hw)
+{
+ enum i40e_status_code ret_code;
+
+ ret_code = i40e_allocate_dma_mem(hw, &hw->aq.asq.desc_buf,
+ i40e_mem_atq_ring,
+ (hw->aq.num_asq_entries *
+ sizeof(struct i40e_aq_desc)),
+ I40E_ADMINQ_DESC_ALIGNMENT);
+ if (ret_code)
+ return ret_code;
+
+ ret_code = i40e_allocate_virt_mem(hw, &hw->aq.asq.cmd_buf,
+ (hw->aq.num_asq_entries *
+ sizeof(struct i40e_asq_cmd_details)));
+ if (ret_code) {
+ i40e_free_dma_mem(hw, &hw->aq.asq.desc_buf);
+ return ret_code;
+ }
+
+ return ret_code;
+}
+
+/**
+ * i40e_alloc_adminq_arq_ring - Allocate Admin Queue receive rings
+ * @hw: pointer to the hardware structure
+ **/
+enum i40e_status_code i40e_alloc_adminq_arq_ring(struct i40e_hw *hw)
+{
+ enum i40e_status_code ret_code;
+
+ ret_code = i40e_allocate_dma_mem(hw, &hw->aq.arq.desc_buf,
+ i40e_mem_arq_ring,
+ (hw->aq.num_arq_entries *
+ sizeof(struct i40e_aq_desc)),
+ I40E_ADMINQ_DESC_ALIGNMENT);
+
+ return ret_code;
+}
+
+/**
+ * i40e_free_adminq_asq - Free Admin Queue send rings
+ * @hw: pointer to the hardware structure
+ *
+ * This assumes the posted send buffers have already been cleaned
+ * and de-allocated
+ **/
+void i40e_free_adminq_asq(struct i40e_hw *hw)
+{
+ i40e_free_dma_mem(hw, &hw->aq.asq.desc_buf);
+}
+
+/**
+ * i40e_free_adminq_arq - Free Admin Queue receive rings
+ * @hw: pointer to the hardware structure
+ *
+ * This assumes the posted receive buffers have already been cleaned
+ * and de-allocated
+ **/
+void i40e_free_adminq_arq(struct i40e_hw *hw)
+{
+ i40e_free_dma_mem(hw, &hw->aq.arq.desc_buf);
+}
+
+/**
+ * i40e_alloc_arq_bufs - Allocate pre-posted buffers for the receive queue
+ * @hw: pointer to the hardware structure
+ **/
+static enum i40e_status_code i40e_alloc_arq_bufs(struct i40e_hw *hw)
+{
+ enum i40e_status_code ret_code;
+ struct i40e_aq_desc *desc;
+ struct i40e_dma_mem *bi;
+ int i;
+
+ /* We'll be allocating the buffer info memory first, then we can
+ * allocate the mapped buffers for the event processing
+ */
+
+ /* buffer_info structures do not need alignment */
+ ret_code = i40e_allocate_virt_mem(hw, &hw->aq.arq.dma_head,
+ (hw->aq.num_arq_entries * sizeof(struct i40e_dma_mem)));
+ if (ret_code)
+ goto alloc_arq_bufs;
+ hw->aq.arq.r.arq_bi = (struct i40e_dma_mem *)hw->aq.arq.dma_head.va;
+
+ /* allocate the mapped buffers */
+ for (i = 0; i < hw->aq.num_arq_entries; i++) {
+ bi = &hw->aq.arq.r.arq_bi[i];
+ ret_code = i40e_allocate_dma_mem(hw, bi,
+ i40e_mem_arq_buf,
+ hw->aq.arq_buf_size,
+ I40E_ADMINQ_DESC_ALIGNMENT);
+ if (ret_code)
+ goto unwind_alloc_arq_bufs;
+
+ /* now configure the descriptors for use */
+ desc = I40E_ADMINQ_DESC(hw->aq.arq, i);
+
+ desc->flags = CPU_TO_LE16(I40E_AQ_FLAG_BUF);
+ if (hw->aq.arq_buf_size > I40E_AQ_LARGE_BUF)
+ desc->flags |= CPU_TO_LE16(I40E_AQ_FLAG_LB);
+ desc->opcode = 0;
+ /* This is in accordance with Admin queue design, there is no
+ * register for buffer size configuration
+ */
+ desc->datalen = CPU_TO_LE16((u16)bi->size);
+ desc->retval = 0;
+ desc->cookie_high = 0;
+ desc->cookie_low = 0;
+ desc->params.external.addr_high =
+ CPU_TO_LE32(I40E_HI_DWORD(bi->pa));
+ desc->params.external.addr_low =
+ CPU_TO_LE32(I40E_LO_DWORD(bi->pa));
+ desc->params.external.param0 = 0;
+ desc->params.external.param1 = 0;
+ }
+
+alloc_arq_bufs:
+ return ret_code;
+
+unwind_alloc_arq_bufs:
+ /* don't try to free the one that failed... */
+ i--;
+ for (; i >= 0; i--)
+ i40e_free_dma_mem(hw, &hw->aq.arq.r.arq_bi[i]);
+ i40e_free_virt_mem(hw, &hw->aq.arq.dma_head);
+
+ return ret_code;
+}
+
+/**
+ * i40e_alloc_asq_bufs - Allocate empty buffer structs for the send queue
+ * @hw: pointer to the hardware structure
+ **/
+static enum i40e_status_code i40e_alloc_asq_bufs(struct i40e_hw *hw)
+{
+ enum i40e_status_code ret_code;
+ struct i40e_dma_mem *bi;
+ int i;
+
+ /* No mapped memory needed yet, just the buffer info structures */
+ ret_code = i40e_allocate_virt_mem(hw, &hw->aq.asq.dma_head,
+ (hw->aq.num_asq_entries * sizeof(struct i40e_dma_mem)));
+ if (ret_code)
+ goto alloc_asq_bufs;
+ hw->aq.asq.r.asq_bi = (struct i40e_dma_mem *)hw->aq.asq.dma_head.va;
+
+ /* allocate the mapped buffers */
+ for (i = 0; i < hw->aq.num_asq_entries; i++) {
+ bi = &hw->aq.asq.r.asq_bi[i];
+ ret_code = i40e_allocate_dma_mem(hw, bi,
+ i40e_mem_asq_buf,
+ hw->aq.asq_buf_size,
+ I40E_ADMINQ_DESC_ALIGNMENT);
+ if (ret_code)
+ goto unwind_alloc_asq_bufs;
+ }
+alloc_asq_bufs:
+ return ret_code;
+
+unwind_alloc_asq_bufs:
+ /* don't try to free the one that failed... */
+ i--;
+ for (; i >= 0; i--)
+ i40e_free_dma_mem(hw, &hw->aq.asq.r.asq_bi[i]);
+ i40e_free_virt_mem(hw, &hw->aq.asq.dma_head);
+
+ return ret_code;
+}
+
+/**
+ * i40e_free_arq_bufs - Free receive queue buffer info elements
+ * @hw: pointer to the hardware structure
+ **/
+static void i40e_free_arq_bufs(struct i40e_hw *hw)
+{
+ int i;
+
+ /* free descriptors */
+ for (i = 0; i < hw->aq.num_arq_entries; i++)
+ i40e_free_dma_mem(hw, &hw->aq.arq.r.arq_bi[i]);
+
+ /* free the descriptor memory */
+ i40e_free_dma_mem(hw, &hw->aq.arq.desc_buf);
+
+ /* free the dma header */
+ i40e_free_virt_mem(hw, &hw->aq.arq.dma_head);
+}
+
+/**
+ * i40e_free_asq_bufs - Free send queue buffer info elements
+ * @hw: pointer to the hardware structure
+ **/
+static void i40e_free_asq_bufs(struct i40e_hw *hw)
+{
+ int i;
+
+ /* only unmap if the address is non-NULL */
+ for (i = 0; i < hw->aq.num_asq_entries; i++)
+ if (hw->aq.asq.r.asq_bi[i].pa)
+ i40e_free_dma_mem(hw, &hw->aq.asq.r.asq_bi[i]);
+
+ /* free the buffer info list */
+ i40e_free_virt_mem(hw, &hw->aq.asq.cmd_buf);
+
+ /* free the descriptor memory */
+ i40e_free_dma_mem(hw, &hw->aq.asq.desc_buf);
+
+ /* free the dma header */
+ i40e_free_virt_mem(hw, &hw->aq.asq.dma_head);
+}
+
+/**
+ * i40e_config_asq_regs - configure ASQ registers
+ * @hw: pointer to the hardware structure
+ *
+ * Configure base address and length registers for the transmit queue
+ **/
+static enum i40e_status_code i40e_config_asq_regs(struct i40e_hw *hw)
+{
+ enum i40e_status_code ret_code = I40E_SUCCESS;
+ u32 reg = 0;
+
+ /* Clear Head and Tail */
+ wr32(hw, hw->aq.asq.head, 0);
+ wr32(hw, hw->aq.asq.tail, 0);
+
+ /* set starting point */
+ if (!i40e_is_vf(hw))
+ wr32(hw, hw->aq.asq.len, (hw->aq.num_asq_entries |
+ I40E_PF_ATQLEN_ATQENABLE_MASK));
+ if (i40e_is_vf(hw))
+ wr32(hw, hw->aq.asq.len, (hw->aq.num_asq_entries |
+ I40E_VF_ATQLEN1_ATQENABLE_MASK));
+ wr32(hw, hw->aq.asq.bal, I40E_LO_DWORD(hw->aq.asq.desc_buf.pa));
+ wr32(hw, hw->aq.asq.bah, I40E_HI_DWORD(hw->aq.asq.desc_buf.pa));
+
+ /* Check one register to verify that config was applied */
+ reg = rd32(hw, hw->aq.asq.bal);
+ if (reg != I40E_LO_DWORD(hw->aq.asq.desc_buf.pa))
+ ret_code = I40E_ERR_ADMIN_QUEUE_ERROR;
+
+ return ret_code;
+}
+
+/**
+ * i40e_config_arq_regs - ARQ register configuration
+ * @hw: pointer to the hardware structure
+ *
+ * Configure base address and length registers for the receive (event queue)
+ **/
+static enum i40e_status_code i40e_config_arq_regs(struct i40e_hw *hw)
+{
+ enum i40e_status_code ret_code = I40E_SUCCESS;
+ u32 reg = 0;
+
+ /* Clear Head and Tail */
+ wr32(hw, hw->aq.arq.head, 0);
+ wr32(hw, hw->aq.arq.tail, 0);
+
+ /* set starting point */
+ if (!i40e_is_vf(hw))
+ wr32(hw, hw->aq.arq.len, (hw->aq.num_arq_entries |
+ I40E_PF_ARQLEN_ARQENABLE_MASK));
+ if (i40e_is_vf(hw))
+ wr32(hw, hw->aq.arq.len, (hw->aq.num_arq_entries |
+ I40E_VF_ARQLEN1_ARQENABLE_MASK));
+ wr32(hw, hw->aq.arq.bal, I40E_LO_DWORD(hw->aq.arq.desc_buf.pa));
+ wr32(hw, hw->aq.arq.bah, I40E_HI_DWORD(hw->aq.arq.desc_buf.pa));
+
+ /* Update tail in the HW to post pre-allocated buffers */
+ wr32(hw, hw->aq.arq.tail, hw->aq.num_arq_entries - 1);
+
+ /* Check one register to verify that config was applied */
+ reg = rd32(hw, hw->aq.arq.bal);
+ if (reg != I40E_LO_DWORD(hw->aq.arq.desc_buf.pa))
+ ret_code = I40E_ERR_ADMIN_QUEUE_ERROR;
+
+ return ret_code;
+}
+
+/**
+ * i40e_init_asq - main initialization routine for ASQ
+ * @hw: pointer to the hardware structure
+ *
+ * This is the main initialization routine for the Admin Send Queue
+ * Prior to calling this function, drivers *MUST* set the following fields
+ * in the hw->aq structure:
+ * - hw->aq.num_asq_entries
+ * - hw->aq.arq_buf_size
+ *
+ * Do *NOT* hold the lock when calling this as the memory allocation routines
+ * called are not going to be atomic context safe
+ **/
+enum i40e_status_code i40e_init_asq(struct i40e_hw *hw)
+{
+ enum i40e_status_code ret_code = I40E_SUCCESS;
+
+ if (hw->aq.asq.count > 0) {
+ /* queue already initialized */
+ ret_code = I40E_ERR_NOT_READY;
+ goto init_adminq_exit;
+ }
+
+ /* verify input for valid configuration */
+ if ((hw->aq.num_asq_entries == 0) ||
+ (hw->aq.asq_buf_size == 0)) {
+ ret_code = I40E_ERR_CONFIG;
+ goto init_adminq_exit;
+ }
+
+ hw->aq.asq.next_to_use = 0;
+ hw->aq.asq.next_to_clean = 0;
+ hw->aq.asq.count = hw->aq.num_asq_entries;
+
+ /* allocate the ring memory */
+ ret_code = i40e_alloc_adminq_asq_ring(hw);
+ if (ret_code != I40E_SUCCESS)
+ goto init_adminq_exit;
+
+ /* allocate buffers in the rings */
+ ret_code = i40e_alloc_asq_bufs(hw);
+ if (ret_code != I40E_SUCCESS)
+ goto init_adminq_free_rings;
+
+ /* initialize base registers */
+ ret_code = i40e_config_asq_regs(hw);
+ if (ret_code != I40E_SUCCESS)
+ goto init_adminq_free_rings;
+
+ /* success! */
+ goto init_adminq_exit;
+
+init_adminq_free_rings:
+ i40e_free_adminq_asq(hw);
+
+init_adminq_exit:
+ return ret_code;
+}
+
+/**
+ * i40e_init_arq - initialize ARQ
+ * @hw: pointer to the hardware structure
+ *
+ * The main initialization routine for the Admin Receive (Event) Queue.
+ * Prior to calling this function, drivers *MUST* set the following fields
+ * in the hw->aq structure:
+ * - hw->aq.num_asq_entries
+ * - hw->aq.arq_buf_size
+ *
+ * Do *NOT* hold the lock when calling this as the memory allocation routines
+ * called are not going to be atomic context safe
+ **/
+enum i40e_status_code i40e_init_arq(struct i40e_hw *hw)
+{
+ enum i40e_status_code ret_code = I40E_SUCCESS;
+
+ if (hw->aq.arq.count > 0) {
+ /* queue already initialized */
+ ret_code = I40E_ERR_NOT_READY;
+ goto init_adminq_exit;
+ }
+
+ /* verify input for valid configuration */
+ if ((hw->aq.num_arq_entries == 0) ||
+ (hw->aq.arq_buf_size == 0)) {
+ ret_code = I40E_ERR_CONFIG;
+ goto init_adminq_exit;
+ }
+
+ hw->aq.arq.next_to_use = 0;
+ hw->aq.arq.next_to_clean = 0;
+ hw->aq.arq.count = hw->aq.num_arq_entries;
+
+ /* allocate the ring memory */
+ ret_code = i40e_alloc_adminq_arq_ring(hw);
+ if (ret_code != I40E_SUCCESS)
+ goto init_adminq_exit;
+
+ /* allocate buffers in the rings */
+ ret_code = i40e_alloc_arq_bufs(hw);
+ if (ret_code != I40E_SUCCESS)
+ goto init_adminq_free_rings;
+
+ /* initialize base registers */
+ ret_code = i40e_config_arq_regs(hw);
+ if (ret_code != I40E_SUCCESS)
+ goto init_adminq_free_rings;
+
+ /* success! */
+ goto init_adminq_exit;
+
+init_adminq_free_rings:
+ i40e_free_adminq_arq(hw);
+
+init_adminq_exit:
+ return ret_code;
+}
+
+/**
+ * i40e_shutdown_asq - shutdown the ASQ
+ * @hw: pointer to the hardware structure
+ *
+ * The main shutdown routine for the Admin Send Queue
+ **/
+enum i40e_status_code i40e_shutdown_asq(struct i40e_hw *hw)
+{
+ enum i40e_status_code ret_code = I40E_SUCCESS;
+
+ if (hw->aq.asq.count == 0)
+ return I40E_ERR_NOT_READY;
+
+ /* Stop firmware AdminQ processing */
+ wr32(hw, hw->aq.asq.head, 0);
+ wr32(hw, hw->aq.asq.tail, 0);
+ wr32(hw, hw->aq.asq.len, 0);
+ wr32(hw, hw->aq.asq.bal, 0);
+ wr32(hw, hw->aq.asq.bah, 0);
+
+ /* make sure spinlock is available */
+ i40e_acquire_spinlock(&hw->aq.asq_spinlock);
+
+ hw->aq.asq.count = 0; /* to indicate uninitialized queue */
+
+ /* free ring buffers */
+ i40e_free_asq_bufs(hw);
+
+ i40e_release_spinlock(&hw->aq.asq_spinlock);
+
+ return ret_code;
+}
+
+/**
+ * i40e_shutdown_arq - shutdown ARQ
+ * @hw: pointer to the hardware structure
+ *
+ * The main shutdown routine for the Admin Receive Queue
+ **/
+enum i40e_status_code i40e_shutdown_arq(struct i40e_hw *hw)
+{
+ enum i40e_status_code ret_code = I40E_SUCCESS;
+
+ if (hw->aq.arq.count == 0)
+ return I40E_ERR_NOT_READY;
+
+ /* Stop firmware AdminQ processing */
+ wr32(hw, hw->aq.arq.head, 0);
+ wr32(hw, hw->aq.arq.tail, 0);
+ wr32(hw, hw->aq.arq.len, 0);
+ wr32(hw, hw->aq.arq.bal, 0);
+ wr32(hw, hw->aq.arq.bah, 0);
+
+ /* make sure spinlock is available */
+ i40e_acquire_spinlock(&hw->aq.arq_spinlock);
+
+ hw->aq.arq.count = 0; /* to indicate uninitialized queue */
+
+ /* free ring buffers */
+ i40e_free_arq_bufs(hw);
+
+ i40e_release_spinlock(&hw->aq.arq_spinlock);
+
+ return ret_code;
+}
+
+/**
+ * i40e_init_adminq - main initialization routine for Admin Queue
+ * @hw: pointer to the hardware structure
+ *
+ * Prior to calling this function, drivers *MUST* set the following fields
+ * in the hw->aq structure:
+ * - hw->aq.num_asq_entries
+ * - hw->aq.num_arq_entries
+ * - hw->aq.arq_buf_size
+ * - hw->aq.asq_buf_size
+ **/
+enum i40e_status_code i40e_init_adminq(struct i40e_hw *hw)
+{
+ enum i40e_status_code ret_code;
+ u16 eetrack_lo, eetrack_hi;
+ u16 cfg_ptr, oem_hi, oem_lo;
+ int retry = 0;
+ /* verify input for valid configuration */
+ if ((hw->aq.num_arq_entries == 0) ||
+ (hw->aq.num_asq_entries == 0) ||
+ (hw->aq.arq_buf_size == 0) ||
+ (hw->aq.asq_buf_size == 0)) {
+ ret_code = I40E_ERR_CONFIG;
+ goto init_adminq_exit;
+ }
+
+ /* initialize spin locks */
+ i40e_init_spinlock(&hw->aq.asq_spinlock);
+ i40e_init_spinlock(&hw->aq.arq_spinlock);
+
+ /* Set up register offsets */
+ i40e_adminq_init_regs(hw);
+
+ /* setup ASQ command write back timeout */
+ hw->aq.asq_cmd_timeout = I40E_ASQ_CMD_TIMEOUT;
+
+ /* allocate the ASQ */
+ ret_code = i40e_init_asq(hw);
+ if (ret_code != I40E_SUCCESS)
+ goto init_adminq_destroy_spinlocks;
+
+ /* allocate the ARQ */
+ ret_code = i40e_init_arq(hw);
+ if (ret_code != I40E_SUCCESS)
+ goto init_adminq_free_asq;
+
+ /* VF has no need of firmware */
+ if (i40e_is_vf(hw))
+ goto init_adminq_exit;
+ /* There are some cases where the firmware may not be quite ready
+ * for AdminQ operations, so we retry the AdminQ setup a few times
+ * if we see timeouts in this first AQ call.
+ */
+ do {
+ ret_code = i40e_aq_get_firmware_version(hw,
+ &hw->aq.fw_maj_ver,
+ &hw->aq.fw_min_ver,
+ &hw->aq.fw_build,
+ &hw->aq.api_maj_ver,
+ &hw->aq.api_min_ver,
+ NULL);
+ if (ret_code != I40E_ERR_ADMIN_QUEUE_TIMEOUT)
+ break;
+ retry++;
+ i40e_msec_delay(100);
+ i40e_resume_aq(hw);
+ } while (retry < 10);
+ if (ret_code != I40E_SUCCESS)
+ goto init_adminq_free_arq;
+
+ /* get the NVM version info */
+ i40e_read_nvm_word(hw, I40E_SR_NVM_DEV_STARTER_VERSION,
+ &hw->nvm.version);
+ i40e_read_nvm_word(hw, I40E_SR_NVM_EETRACK_LO, &eetrack_lo);
+ i40e_read_nvm_word(hw, I40E_SR_NVM_EETRACK_HI, &eetrack_hi);
+ hw->nvm.eetrack = (eetrack_hi << 16) | eetrack_lo;
+ i40e_read_nvm_word(hw, I40E_SR_BOOT_CONFIG_PTR, &cfg_ptr);
+ i40e_read_nvm_word(hw, (cfg_ptr + I40E_NVM_OEM_VER_OFF),
+ &oem_hi);
+ i40e_read_nvm_word(hw, (cfg_ptr + (I40E_NVM_OEM_VER_OFF + 1)),
+ &oem_lo);
+ hw->nvm.oem_ver = ((u32)oem_hi << 16) | oem_lo;
+
+ if (hw->aq.api_maj_ver > I40E_FW_API_VERSION_MAJOR) {
+ ret_code = I40E_ERR_FIRMWARE_API_VERSION;
+ goto init_adminq_free_arq;
+ }
+
+ /* pre-emptive resource lock release */
+ i40e_aq_release_resource(hw, I40E_NVM_RESOURCE_ID, 0, NULL);
+ hw->aq.nvm_release_on_done = FALSE;
+ hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
+
+ ret_code = i40e_aq_set_hmc_resource_profile(hw,
+ I40E_HMC_PROFILE_DEFAULT,
+ 0,
+ NULL);
+ ret_code = I40E_SUCCESS;
+
+ /* success! */
+ goto init_adminq_exit;
+
+init_adminq_free_arq:
+ i40e_shutdown_arq(hw);
+init_adminq_free_asq:
+ i40e_shutdown_asq(hw);
+init_adminq_destroy_spinlocks:
+ i40e_destroy_spinlock(&hw->aq.asq_spinlock);
+ i40e_destroy_spinlock(&hw->aq.arq_spinlock);
+
+init_adminq_exit:
+ return ret_code;
+}
+
+/**
+ * i40e_shutdown_adminq - shutdown routine for the Admin Queue
+ * @hw: pointer to the hardware structure
+ **/
+enum i40e_status_code i40e_shutdown_adminq(struct i40e_hw *hw)
+{
+ enum i40e_status_code ret_code = I40E_SUCCESS;
+
+ if (i40e_check_asq_alive(hw))
+ i40e_aq_queue_shutdown(hw, TRUE);
+
+ i40e_shutdown_asq(hw);
+ i40e_shutdown_arq(hw);
+
+ /* destroy the spinlocks */
+ i40e_destroy_spinlock(&hw->aq.asq_spinlock);
+ i40e_destroy_spinlock(&hw->aq.arq_spinlock);
+
+ if (hw->nvm_buff.va)
+ i40e_free_virt_mem(hw, &hw->nvm_buff);
+
+ return ret_code;
+}
+
+/**
+ * i40e_clean_asq - cleans Admin send queue
+ * @hw: pointer to the hardware structure
+ *
+ * returns the number of free desc
+ **/
+u16 i40e_clean_asq(struct i40e_hw *hw)
+{
+ struct i40e_adminq_ring *asq = &(hw->aq.asq);
+ struct i40e_asq_cmd_details *details;
+ u16 ntc = asq->next_to_clean;
+ struct i40e_aq_desc desc_cb;
+ struct i40e_aq_desc *desc;
+
+ desc = I40E_ADMINQ_DESC(*asq, ntc);
+ details = I40E_ADMINQ_DETAILS(*asq, ntc);
+
+ while (rd32(hw, hw->aq.asq.head) != ntc) {
+ i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
+ "ntc %d head %d.\n", ntc, rd32(hw, hw->aq.asq.head));
+
+ if (details->callback) {
+ I40E_ADMINQ_CALLBACK cb_func =
+ (I40E_ADMINQ_CALLBACK)details->callback;
+ i40e_memcpy(&desc_cb, desc, sizeof(struct i40e_aq_desc),
+ I40E_DMA_TO_DMA);
+ cb_func(hw, &desc_cb);
+ }
+ i40e_memset(desc, 0, sizeof(*desc), I40E_DMA_MEM);
+ i40e_memset(details, 0, sizeof(*details), I40E_NONDMA_MEM);
+ ntc++;
+ if (ntc == asq->count)
+ ntc = 0;
+ desc = I40E_ADMINQ_DESC(*asq, ntc);
+ details = I40E_ADMINQ_DETAILS(*asq, ntc);
+ }
+
+ asq->next_to_clean = ntc;
+
+ return I40E_DESC_UNUSED(asq);
+}
+
+/**
+ * i40e_asq_done - check if FW has processed the Admin Send Queue
+ * @hw: pointer to the hw struct
+ *
+ * Returns TRUE if the firmware has processed all descriptors on the
+ * admin send queue. Returns FALSE if there are still requests pending.
+ **/
+bool i40e_asq_done(struct i40e_hw *hw)
+{
+ /* AQ designers suggest use of head for better
+ * timing reliability than DD bit
+ */
+ return rd32(hw, hw->aq.asq.head) == hw->aq.asq.next_to_use;
+
+}
+
+/**
+ * i40e_asq_send_command - send command to Admin Queue
+ * @hw: pointer to the hw struct
+ * @desc: prefilled descriptor describing the command (non DMA mem)
+ * @buff: buffer to use for indirect commands
+ * @buff_size: size of buffer for indirect commands
+ * @cmd_details: pointer to command details structure
+ *
+ * This is the main send command driver routine for the Admin Queue send
+ * queue. It runs the queue, cleans the queue, etc
+ **/
+enum i40e_status_code i40e_asq_send_command(struct i40e_hw *hw,
+ struct i40e_aq_desc *desc,
+ void *buff, /* can be NULL */
+ u16 buff_size,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ enum i40e_status_code status = I40E_SUCCESS;
+ struct i40e_dma_mem *dma_buff = NULL;
+ struct i40e_asq_cmd_details *details;
+ struct i40e_aq_desc *desc_on_ring;
+ bool cmd_completed = FALSE;
+ u16 retval = 0;
+ u32 val = 0;
+
+ hw->aq.asq_last_status = I40E_AQ_RC_OK;
+
+ val = rd32(hw, hw->aq.asq.head);
+ if (val >= hw->aq.num_asq_entries) {
+ i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
+ "AQTX: head overrun at %d\n", val);
+ status = I40E_ERR_QUEUE_EMPTY;
+ goto asq_send_command_exit;
+ }
+
+ if (hw->aq.asq.count == 0) {
+ i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
+ "AQTX: Admin queue not initialized.\n");
+ status = I40E_ERR_QUEUE_EMPTY;
+ goto asq_send_command_exit;
+ }
+
+ details = I40E_ADMINQ_DETAILS(hw->aq.asq, hw->aq.asq.next_to_use);
+ if (cmd_details) {
+ i40e_memcpy(details,
+ cmd_details,
+ sizeof(struct i40e_asq_cmd_details),
+ I40E_NONDMA_TO_NONDMA);
+
+ /* If the cmd_details are defined copy the cookie. The
+ * CPU_TO_LE32 is not needed here because the data is ignored
+ * by the FW, only used by the driver
+ */
+ if (details->cookie) {
+ desc->cookie_high =
+ CPU_TO_LE32(I40E_HI_DWORD(details->cookie));
+ desc->cookie_low =
+ CPU_TO_LE32(I40E_LO_DWORD(details->cookie));
+ }
+ } else {
+ i40e_memset(details, 0,
+ sizeof(struct i40e_asq_cmd_details),
+ I40E_NONDMA_MEM);
+ }
+
+ /* clear requested flags and then set additional flags if defined */
+ desc->flags &= ~CPU_TO_LE16(details->flags_dis);
+ desc->flags |= CPU_TO_LE16(details->flags_ena);
+
+ i40e_acquire_spinlock(&hw->aq.asq_spinlock);
+
+ if (buff_size > hw->aq.asq_buf_size) {
+ i40e_debug(hw,
+ I40E_DEBUG_AQ_MESSAGE,
+ "AQTX: Invalid buffer size: %d.\n",
+ buff_size);
+ status = I40E_ERR_INVALID_SIZE;
+ goto asq_send_command_error;
+ }
+
+ if (details->postpone && !details->async) {
+ i40e_debug(hw,
+ I40E_DEBUG_AQ_MESSAGE,
+ "AQTX: Async flag not set along with postpone flag");
+ status = I40E_ERR_PARAM;
+ goto asq_send_command_error;
+ }
+
+ /* call clean and check queue available function to reclaim the
+ * descriptors that were processed by FW, the function returns the
+ * number of desc available
+ */
+ /* the clean function called here could be called in a separate thread
+ * in case of asynchronous completions
+ */
+ if (i40e_clean_asq(hw) == 0) {
+ i40e_debug(hw,
+ I40E_DEBUG_AQ_MESSAGE,
+ "AQTX: Error queue is full.\n");
+ status = I40E_ERR_ADMIN_QUEUE_FULL;
+ goto asq_send_command_error;
+ }
+
+ /* initialize the temp desc pointer with the right desc */
+ desc_on_ring = I40E_ADMINQ_DESC(hw->aq.asq, hw->aq.asq.next_to_use);
+
+ /* if the desc is available copy the temp desc to the right place */
+ i40e_memcpy(desc_on_ring, desc, sizeof(struct i40e_aq_desc),
+ I40E_NONDMA_TO_DMA);
+
+ /* if buff is not NULL assume indirect command */
+ if (buff != NULL) {
+ dma_buff = &(hw->aq.asq.r.asq_bi[hw->aq.asq.next_to_use]);
+ /* copy the user buff into the respective DMA buff */
+ i40e_memcpy(dma_buff->va, buff, buff_size,
+ I40E_NONDMA_TO_DMA);
+ desc_on_ring->datalen = CPU_TO_LE16(buff_size);
+
+ /* Update the address values in the desc with the pa value
+ * for respective buffer
+ */
+ desc_on_ring->params.external.addr_high =
+ CPU_TO_LE32(I40E_HI_DWORD(dma_buff->pa));
+ desc_on_ring->params.external.addr_low =
+ CPU_TO_LE32(I40E_LO_DWORD(dma_buff->pa));
+ }
+
+ /* bump the tail */
+ i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, "AQTX: desc and buffer:\n");
+ i40e_debug_aq(hw, I40E_DEBUG_AQ_COMMAND, (void *)desc_on_ring,
+ buff, buff_size);
+ (hw->aq.asq.next_to_use)++;
+ if (hw->aq.asq.next_to_use == hw->aq.asq.count)
+ hw->aq.asq.next_to_use = 0;
+ if (!details->postpone)
+ wr32(hw, hw->aq.asq.tail, hw->aq.asq.next_to_use);
+
+ /* if cmd_details are not defined or async flag is not set,
+ * we need to wait for desc write back
+ */
+ if (!details->async && !details->postpone) {
+ u32 total_delay = 0;
+
+ do {
+ /* AQ designers suggest use of head for better
+ * timing reliability than DD bit
+ */
+ if (i40e_asq_done(hw))
+ break;
+ /* ugh! delay while spin_lock */
+ i40e_msec_delay(1);
+ total_delay++;
+ } while (total_delay < hw->aq.asq_cmd_timeout);
+ }
+
+ /* if ready, copy the desc back to temp */
+ if (i40e_asq_done(hw)) {
+ i40e_memcpy(desc, desc_on_ring, sizeof(struct i40e_aq_desc),
+ I40E_DMA_TO_NONDMA);
+ if (buff != NULL)
+ i40e_memcpy(buff, dma_buff->va, buff_size,
+ I40E_DMA_TO_NONDMA);
+ retval = LE16_TO_CPU(desc->retval);
+ if (retval != 0) {
+ i40e_debug(hw,
+ I40E_DEBUG_AQ_MESSAGE,
+ "AQTX: Command completed with error 0x%X.\n",
+ retval);
+
+ /* strip off FW internal code */
+ retval &= 0xff;
+ }
+ cmd_completed = TRUE;
+ if ((enum i40e_admin_queue_err)retval == I40E_AQ_RC_OK)
+ status = I40E_SUCCESS;
+ else
+ status = I40E_ERR_ADMIN_QUEUE_ERROR;
+ hw->aq.asq_last_status = (enum i40e_admin_queue_err)retval;
+ }
+
+ i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
+ "AQTX: desc and buffer writeback:\n");
+ i40e_debug_aq(hw, I40E_DEBUG_AQ_COMMAND, (void *)desc, buff, buff_size);
+
+ /* save writeback aq if requested */
+ if (details->wb_desc)
+ i40e_memcpy(details->wb_desc, desc_on_ring,
+ sizeof(struct i40e_aq_desc), I40E_DMA_TO_NONDMA);
+
+ /* update the error if time out occurred */
+ if ((!cmd_completed) &&
+ (!details->async && !details->postpone)) {
+ i40e_debug(hw,
+ I40E_DEBUG_AQ_MESSAGE,
+ "AQTX: Writeback timeout.\n");
+ status = I40E_ERR_ADMIN_QUEUE_TIMEOUT;
+ }
+
+asq_send_command_error:
+ i40e_release_spinlock(&hw->aq.asq_spinlock);
+asq_send_command_exit:
+ return status;
+}
+
+/**
+ * i40e_fill_default_direct_cmd_desc - AQ descriptor helper function
+ * @desc: pointer to the temp descriptor (non DMA mem)
+ * @opcode: the opcode can be used to decide which flags to turn off or on
+ *
+ * Fill the desc with default values
+ **/
+void i40e_fill_default_direct_cmd_desc(struct i40e_aq_desc *desc,
+ u16 opcode)
+{
+ /* zero out the desc */
+ i40e_memset((void *)desc, 0, sizeof(struct i40e_aq_desc),
+ I40E_NONDMA_MEM);
+ desc->opcode = CPU_TO_LE16(opcode);
+ desc->flags = CPU_TO_LE16(I40E_AQ_FLAG_SI);
+}
+
+/**
+ * i40e_clean_arq_element
+ * @hw: pointer to the hw struct
+ * @e: event info from the receive descriptor, includes any buffers
+ * @pending: number of events that could be left to process
+ *
+ * This function cleans one Admin Receive Queue element and returns
+ * the contents through e. It can also return how many events are
+ * left to process through 'pending'
+ **/
+enum i40e_status_code i40e_clean_arq_element(struct i40e_hw *hw,
+ struct i40e_arq_event_info *e,
+ u16 *pending)
+{
+ enum i40e_status_code ret_code = I40E_SUCCESS;
+ u16 ntc = hw->aq.arq.next_to_clean;
+ struct i40e_aq_desc *desc;
+ struct i40e_dma_mem *bi;
+ u16 desc_idx;
+ u16 datalen;
+ u16 flags;
+ u16 ntu;
+
+ /* take the lock before we start messing with the ring */
+ i40e_acquire_spinlock(&hw->aq.arq_spinlock);
+
+ /* set next_to_use to head */
+ if (!i40e_is_vf(hw))
+ ntu = (rd32(hw, hw->aq.arq.head) & I40E_PF_ARQH_ARQH_MASK);
+ if (i40e_is_vf(hw))
+ ntu = (rd32(hw, hw->aq.arq.head) & I40E_VF_ARQH1_ARQH_MASK);
+ if (ntu == ntc) {
+ /* nothing to do - shouldn't need to update ring's values */
+ ret_code = I40E_ERR_ADMIN_QUEUE_NO_WORK;
+ goto clean_arq_element_out;
+ }
+
+ /* now clean the next descriptor */
+ desc = I40E_ADMINQ_DESC(hw->aq.arq, ntc);
+ desc_idx = ntc;
+
+ flags = LE16_TO_CPU(desc->flags);
+ if (flags & I40E_AQ_FLAG_ERR) {
+ ret_code = I40E_ERR_ADMIN_QUEUE_ERROR;
+ hw->aq.arq_last_status =
+ (enum i40e_admin_queue_err)LE16_TO_CPU(desc->retval);
+ i40e_debug(hw,
+ I40E_DEBUG_AQ_MESSAGE,
+ "AQRX: Event received with error 0x%X.\n",
+ hw->aq.arq_last_status);
+ }
+
+ i40e_memcpy(&e->desc, desc, sizeof(struct i40e_aq_desc),
+ I40E_DMA_TO_NONDMA);
+ datalen = LE16_TO_CPU(desc->datalen);
+ e->msg_len = min(datalen, e->buf_len);
+ if (e->msg_buf != NULL && (e->msg_len != 0))
+ i40e_memcpy(e->msg_buf,
+ hw->aq.arq.r.arq_bi[desc_idx].va,
+ e->msg_len, I40E_DMA_TO_NONDMA);
+
+ i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, "AQRX: desc and buffer:\n");
+ i40e_debug_aq(hw, I40E_DEBUG_AQ_COMMAND, (void *)desc, e->msg_buf,
+ hw->aq.arq_buf_size);
+
+ /* Restore the original datalen and buffer address in the desc,
+ * FW updates datalen to indicate the event message
+ * size
+ */
+ bi = &hw->aq.arq.r.arq_bi[ntc];
+ i40e_memset((void *)desc, 0, sizeof(struct i40e_aq_desc), I40E_DMA_MEM);
+
+ desc->flags = CPU_TO_LE16(I40E_AQ_FLAG_BUF);
+ if (hw->aq.arq_buf_size > I40E_AQ_LARGE_BUF)
+ desc->flags |= CPU_TO_LE16(I40E_AQ_FLAG_LB);
+ desc->datalen = CPU_TO_LE16((u16)bi->size);
+ desc->params.external.addr_high = CPU_TO_LE32(I40E_HI_DWORD(bi->pa));
+ desc->params.external.addr_low = CPU_TO_LE32(I40E_LO_DWORD(bi->pa));
+
+ /* set tail = the last cleaned desc index. */
+ wr32(hw, hw->aq.arq.tail, ntc);
+ /* ntc is updated to tail + 1 */
+ ntc++;
+ if (ntc == hw->aq.num_arq_entries)
+ ntc = 0;
+ hw->aq.arq.next_to_clean = ntc;
+ hw->aq.arq.next_to_use = ntu;
+
+clean_arq_element_out:
+ /* Set pending if needed, unlock and return */
+ if (pending != NULL)
+ *pending = (ntc > ntu ? hw->aq.arq.count : 0) + (ntu - ntc);
+ i40e_release_spinlock(&hw->aq.arq_spinlock);
+
+ if (i40e_is_nvm_update_op(&e->desc)) {
+ if (hw->aq.nvm_release_on_done) {
+ i40e_release_nvm(hw);
+ hw->aq.nvm_release_on_done = FALSE;
+ }
+
+ switch (hw->nvmupd_state) {
+ case I40E_NVMUPD_STATE_INIT_WAIT:
+ hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
+ break;
+
+ case I40E_NVMUPD_STATE_WRITE_WAIT:
+ hw->nvmupd_state = I40E_NVMUPD_STATE_WRITING;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return ret_code;
+}
+
+void i40e_resume_aq(struct i40e_hw *hw)
+{
+ /* Registers are reset after PF reset */
+ hw->aq.asq.next_to_use = 0;
+ hw->aq.asq.next_to_clean = 0;
+
+ i40e_config_asq_regs(hw);
+
+ hw->aq.arq.next_to_use = 0;
+ hw->aq.arq.next_to_clean = 0;
+
+ i40e_config_arq_regs(hw);
+}
diff --git a/usr/src/uts/common/io/i40e/core/i40e_adminq.h b/usr/src/uts/common/io/i40e/core/i40e_adminq.h
new file mode 100644
index 0000000000..e20d6893ed
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/core/i40e_adminq.h
@@ -0,0 +1,125 @@
+/******************************************************************************
+
+ Copyright (c) 2013-2015, Intel Corporation
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+
+******************************************************************************/
+/*$FreeBSD: head/sys/dev/ixl/i40e_adminq.h 284049 2015-06-05 22:52:42Z jfv $*/
+
+#ifndef _I40E_ADMINQ_H_
+#define _I40E_ADMINQ_H_
+
+#include "i40e_osdep.h"
+#include "i40e_status.h"
+#include "i40e_adminq_cmd.h"
+
+#define I40E_ADMINQ_DESC(R, i) \
+ (&(((struct i40e_aq_desc *)((R).desc_buf.va))[i]))
+
+#define I40E_ADMINQ_DESC_ALIGNMENT 4096
+
+struct i40e_adminq_ring {
+ struct i40e_virt_mem dma_head; /* space for dma structures */
+ struct i40e_dma_mem desc_buf; /* descriptor ring memory */
+ struct i40e_virt_mem cmd_buf; /* command buffer memory */
+
+ union {
+ struct i40e_dma_mem *asq_bi;
+ struct i40e_dma_mem *arq_bi;
+ } r;
+
+ u16 count; /* Number of descriptors */
+ u16 rx_buf_len; /* Admin Receive Queue buffer length */
+
+ /* used for interrupt processing */
+ u16 next_to_use;
+ u16 next_to_clean;
+
+ /* used for queue tracking */
+ u32 head;
+ u32 tail;
+ u32 len;
+ u32 bah;
+ u32 bal;
+};
+
+/* ASQ transaction details */
+struct i40e_asq_cmd_details {
+ void *callback; /* cast from type I40E_ADMINQ_CALLBACK */
+ u64 cookie;
+ u16 flags_ena;
+ u16 flags_dis;
+ bool async;
+ bool postpone;
+ struct i40e_aq_desc *wb_desc;
+};
+
+#define I40E_ADMINQ_DETAILS(R, i) \
+ (&(((struct i40e_asq_cmd_details *)((R).cmd_buf.va))[i]))
+
+/* ARQ event information */
+struct i40e_arq_event_info {
+ struct i40e_aq_desc desc;
+ u16 msg_len;
+ u16 buf_len;
+ u8 *msg_buf;
+};
+
+/* Admin Queue information */
+struct i40e_adminq_info {
+ struct i40e_adminq_ring arq; /* receive queue */
+ struct i40e_adminq_ring asq; /* send queue */
+ u32 asq_cmd_timeout; /* send queue cmd write back timeout*/
+ u16 num_arq_entries; /* receive queue depth */
+ u16 num_asq_entries; /* send queue depth */
+ u16 arq_buf_size; /* receive queue buffer size */
+ u16 asq_buf_size; /* send queue buffer size */
+ u16 fw_maj_ver; /* firmware major version */
+ u16 fw_min_ver; /* firmware minor version */
+ u32 fw_build; /* firmware build number */
+ u16 api_maj_ver; /* api major version */
+ u16 api_min_ver; /* api minor version */
+ bool nvm_release_on_done;
+
+ struct i40e_spinlock asq_spinlock; /* Send queue spinlock */
+ struct i40e_spinlock arq_spinlock; /* Receive queue spinlock */
+
+ /* last status values on send and receive queues */
+ enum i40e_admin_queue_err asq_last_status;
+ enum i40e_admin_queue_err arq_last_status;
+};
+
+/* general information */
+#define I40E_AQ_LARGE_BUF 512
+#define I40E_ASQ_CMD_TIMEOUT 250 /* msecs */
+
+void i40e_fill_default_direct_cmd_desc(struct i40e_aq_desc *desc,
+ u16 opcode);
+
+#endif /* _I40E_ADMINQ_H_ */
diff --git a/usr/src/uts/common/io/i40e/core/i40e_adminq_cmd.h b/usr/src/uts/common/io/i40e/core/i40e_adminq_cmd.h
new file mode 100644
index 0000000000..af9f107597
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/core/i40e_adminq_cmd.h
@@ -0,0 +1,2424 @@
+/******************************************************************************
+
+ Copyright (c) 2013-2015, Intel Corporation
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+
+******************************************************************************/
+/*$FreeBSD: head/sys/dev/ixl/i40e_adminq_cmd.h 284049 2015-06-05 22:52:42Z jfv $*/
+
+#ifndef _I40E_ADMINQ_CMD_H_
+#define _I40E_ADMINQ_CMD_H_
+
+/* This header file defines the i40e Admin Queue commands and is shared between
+ * i40e Firmware and Software.
+ *
+ * This file needs to comply with the Linux Kernel coding style.
+ */
+
+#define I40E_FW_API_VERSION_MAJOR 0x0001
+#ifdef X722_SUPPORT
+#define I40E_FW_API_VERSION_MINOR 0x0003
+#else
+#define I40E_FW_API_VERSION_MINOR 0x0004
+#endif
+
+struct i40e_aq_desc {
+ __le16 flags;
+ __le16 opcode;
+ __le16 datalen;
+ __le16 retval;
+ __le32 cookie_high;
+ __le32 cookie_low;
+ union {
+ struct {
+ __le32 param0;
+ __le32 param1;
+ __le32 param2;
+ __le32 param3;
+ } internal;
+ struct {
+ __le32 param0;
+ __le32 param1;
+ __le32 addr_high;
+ __le32 addr_low;
+ } external;
+ u8 raw[16];
+ } params;
+};
+
+/* Flags sub-structure
+ * |0 |1 |2 |3 |4 |5 |6 |7 |8 |9 |10 |11 |12 |13 |14 |15 |
+ * |DD |CMP|ERR|VFE| * * RESERVED * * |LB |RD |VFC|BUF|SI |EI |FE |
+ */
+
+/* command flags and offsets*/
+#define I40E_AQ_FLAG_DD_SHIFT 0
+#define I40E_AQ_FLAG_CMP_SHIFT 1
+#define I40E_AQ_FLAG_ERR_SHIFT 2
+#define I40E_AQ_FLAG_VFE_SHIFT 3
+#define I40E_AQ_FLAG_LB_SHIFT 9
+#define I40E_AQ_FLAG_RD_SHIFT 10
+#define I40E_AQ_FLAG_VFC_SHIFT 11
+#define I40E_AQ_FLAG_BUF_SHIFT 12
+#define I40E_AQ_FLAG_SI_SHIFT 13
+#define I40E_AQ_FLAG_EI_SHIFT 14
+#define I40E_AQ_FLAG_FE_SHIFT 15
+
+#define I40E_AQ_FLAG_DD (1 << I40E_AQ_FLAG_DD_SHIFT) /* 0x1 */
+#define I40E_AQ_FLAG_CMP (1 << I40E_AQ_FLAG_CMP_SHIFT) /* 0x2 */
+#define I40E_AQ_FLAG_ERR (1 << I40E_AQ_FLAG_ERR_SHIFT) /* 0x4 */
+#define I40E_AQ_FLAG_VFE (1 << I40E_AQ_FLAG_VFE_SHIFT) /* 0x8 */
+#define I40E_AQ_FLAG_LB (1 << I40E_AQ_FLAG_LB_SHIFT) /* 0x200 */
+#define I40E_AQ_FLAG_RD (1 << I40E_AQ_FLAG_RD_SHIFT) /* 0x400 */
+#define I40E_AQ_FLAG_VFC (1 << I40E_AQ_FLAG_VFC_SHIFT) /* 0x800 */
+#define I40E_AQ_FLAG_BUF (1 << I40E_AQ_FLAG_BUF_SHIFT) /* 0x1000 */
+#define I40E_AQ_FLAG_SI (1 << I40E_AQ_FLAG_SI_SHIFT) /* 0x2000 */
+#define I40E_AQ_FLAG_EI (1 << I40E_AQ_FLAG_EI_SHIFT) /* 0x4000 */
+#define I40E_AQ_FLAG_FE (1 << I40E_AQ_FLAG_FE_SHIFT) /* 0x8000 */
+
+/* error codes */
+enum i40e_admin_queue_err {
+ I40E_AQ_RC_OK = 0, /* success */
+ I40E_AQ_RC_EPERM = 1, /* Operation not permitted */
+ I40E_AQ_RC_ENOENT = 2, /* No such element */
+ I40E_AQ_RC_ESRCH = 3, /* Bad opcode */
+ I40E_AQ_RC_EINTR = 4, /* operation interrupted */
+ I40E_AQ_RC_EIO = 5, /* I/O error */
+ I40E_AQ_RC_ENXIO = 6, /* No such resource */
+ I40E_AQ_RC_E2BIG = 7, /* Arg too long */
+ I40E_AQ_RC_EAGAIN = 8, /* Try again */
+ I40E_AQ_RC_ENOMEM = 9, /* Out of memory */
+ I40E_AQ_RC_EACCES = 10, /* Permission denied */
+ I40E_AQ_RC_EFAULT = 11, /* Bad address */
+ I40E_AQ_RC_EBUSY = 12, /* Device or resource busy */
+ I40E_AQ_RC_EEXIST = 13, /* object already exists */
+ I40E_AQ_RC_EINVAL = 14, /* Invalid argument */
+ I40E_AQ_RC_ENOTTY = 15, /* Not a typewriter */
+ I40E_AQ_RC_ENOSPC = 16, /* No space left or alloc failure */
+ I40E_AQ_RC_ENOSYS = 17, /* Function not implemented */
+ I40E_AQ_RC_ERANGE = 18, /* Parameter out of range */
+ I40E_AQ_RC_EFLUSHED = 19, /* Cmd flushed due to prev cmd error */
+ I40E_AQ_RC_BAD_ADDR = 20, /* Descriptor contains a bad pointer */
+ I40E_AQ_RC_EMODE = 21, /* Op not allowed in current dev mode */
+ I40E_AQ_RC_EFBIG = 22, /* File too large */
+};
+
+/* Admin Queue command opcodes */
+enum i40e_admin_queue_opc {
+ /* aq commands */
+ i40e_aqc_opc_get_version = 0x0001,
+ i40e_aqc_opc_driver_version = 0x0002,
+ i40e_aqc_opc_queue_shutdown = 0x0003,
+ i40e_aqc_opc_set_pf_context = 0x0004,
+
+ /* resource ownership */
+ i40e_aqc_opc_request_resource = 0x0008,
+ i40e_aqc_opc_release_resource = 0x0009,
+
+ i40e_aqc_opc_list_func_capabilities = 0x000A,
+ i40e_aqc_opc_list_dev_capabilities = 0x000B,
+
+ /* LAA */
+ i40e_aqc_opc_mac_address_read = 0x0107,
+ i40e_aqc_opc_mac_address_write = 0x0108,
+
+ /* PXE */
+ i40e_aqc_opc_clear_pxe_mode = 0x0110,
+
+ /* internal switch commands */
+ i40e_aqc_opc_get_switch_config = 0x0200,
+ i40e_aqc_opc_add_statistics = 0x0201,
+ i40e_aqc_opc_remove_statistics = 0x0202,
+ i40e_aqc_opc_set_port_parameters = 0x0203,
+ i40e_aqc_opc_get_switch_resource_alloc = 0x0204,
+
+ i40e_aqc_opc_add_vsi = 0x0210,
+ i40e_aqc_opc_update_vsi_parameters = 0x0211,
+ i40e_aqc_opc_get_vsi_parameters = 0x0212,
+
+ i40e_aqc_opc_add_pv = 0x0220,
+ i40e_aqc_opc_update_pv_parameters = 0x0221,
+ i40e_aqc_opc_get_pv_parameters = 0x0222,
+
+ i40e_aqc_opc_add_veb = 0x0230,
+ i40e_aqc_opc_update_veb_parameters = 0x0231,
+ i40e_aqc_opc_get_veb_parameters = 0x0232,
+
+ i40e_aqc_opc_delete_element = 0x0243,
+
+ i40e_aqc_opc_add_macvlan = 0x0250,
+ i40e_aqc_opc_remove_macvlan = 0x0251,
+ i40e_aqc_opc_add_vlan = 0x0252,
+ i40e_aqc_opc_remove_vlan = 0x0253,
+ i40e_aqc_opc_set_vsi_promiscuous_modes = 0x0254,
+ i40e_aqc_opc_add_tag = 0x0255,
+ i40e_aqc_opc_remove_tag = 0x0256,
+ i40e_aqc_opc_add_multicast_etag = 0x0257,
+ i40e_aqc_opc_remove_multicast_etag = 0x0258,
+ i40e_aqc_opc_update_tag = 0x0259,
+ i40e_aqc_opc_add_control_packet_filter = 0x025A,
+ i40e_aqc_opc_remove_control_packet_filter = 0x025B,
+ i40e_aqc_opc_add_cloud_filters = 0x025C,
+ i40e_aqc_opc_remove_cloud_filters = 0x025D,
+
+ i40e_aqc_opc_add_mirror_rule = 0x0260,
+ i40e_aqc_opc_delete_mirror_rule = 0x0261,
+
+ /* DCB commands */
+ i40e_aqc_opc_dcb_ignore_pfc = 0x0301,
+ i40e_aqc_opc_dcb_updated = 0x0302,
+
+ /* TX scheduler */
+ i40e_aqc_opc_configure_vsi_bw_limit = 0x0400,
+ i40e_aqc_opc_configure_vsi_ets_sla_bw_limit = 0x0406,
+ i40e_aqc_opc_configure_vsi_tc_bw = 0x0407,
+ i40e_aqc_opc_query_vsi_bw_config = 0x0408,
+ i40e_aqc_opc_query_vsi_ets_sla_config = 0x040A,
+ i40e_aqc_opc_configure_switching_comp_bw_limit = 0x0410,
+
+ i40e_aqc_opc_enable_switching_comp_ets = 0x0413,
+ i40e_aqc_opc_modify_switching_comp_ets = 0x0414,
+ i40e_aqc_opc_disable_switching_comp_ets = 0x0415,
+ i40e_aqc_opc_configure_switching_comp_ets_bw_limit = 0x0416,
+ i40e_aqc_opc_configure_switching_comp_bw_config = 0x0417,
+ i40e_aqc_opc_query_switching_comp_ets_config = 0x0418,
+ i40e_aqc_opc_query_port_ets_config = 0x0419,
+ i40e_aqc_opc_query_switching_comp_bw_config = 0x041A,
+ i40e_aqc_opc_suspend_port_tx = 0x041B,
+ i40e_aqc_opc_resume_port_tx = 0x041C,
+ i40e_aqc_opc_configure_partition_bw = 0x041D,
+
+ /* hmc */
+ i40e_aqc_opc_query_hmc_resource_profile = 0x0500,
+ i40e_aqc_opc_set_hmc_resource_profile = 0x0501,
+
+ /* phy commands*/
+ i40e_aqc_opc_get_phy_abilities = 0x0600,
+ i40e_aqc_opc_set_phy_config = 0x0601,
+ i40e_aqc_opc_set_mac_config = 0x0603,
+ i40e_aqc_opc_set_link_restart_an = 0x0605,
+ i40e_aqc_opc_get_link_status = 0x0607,
+ i40e_aqc_opc_set_phy_int_mask = 0x0613,
+ i40e_aqc_opc_get_local_advt_reg = 0x0614,
+ i40e_aqc_opc_set_local_advt_reg = 0x0615,
+ i40e_aqc_opc_get_partner_advt = 0x0616,
+ i40e_aqc_opc_set_lb_modes = 0x0618,
+ i40e_aqc_opc_get_phy_wol_caps = 0x0621,
+ i40e_aqc_opc_set_phy_debug = 0x0622,
+ i40e_aqc_opc_upload_ext_phy_fm = 0x0625,
+
+ /* NVM commands */
+ i40e_aqc_opc_nvm_read = 0x0701,
+ i40e_aqc_opc_nvm_erase = 0x0702,
+ i40e_aqc_opc_nvm_update = 0x0703,
+ i40e_aqc_opc_nvm_config_read = 0x0704,
+ i40e_aqc_opc_nvm_config_write = 0x0705,
+ i40e_aqc_opc_oem_post_update = 0x0720,
+
+ /* virtualization commands */
+ i40e_aqc_opc_send_msg_to_pf = 0x0801,
+ i40e_aqc_opc_send_msg_to_vf = 0x0802,
+ i40e_aqc_opc_send_msg_to_peer = 0x0803,
+
+ /* alternate structure */
+ i40e_aqc_opc_alternate_write = 0x0900,
+ i40e_aqc_opc_alternate_write_indirect = 0x0901,
+ i40e_aqc_opc_alternate_read = 0x0902,
+ i40e_aqc_opc_alternate_read_indirect = 0x0903,
+ i40e_aqc_opc_alternate_write_done = 0x0904,
+ i40e_aqc_opc_alternate_set_mode = 0x0905,
+ i40e_aqc_opc_alternate_clear_port = 0x0906,
+
+ /* LLDP commands */
+ i40e_aqc_opc_lldp_get_mib = 0x0A00,
+ i40e_aqc_opc_lldp_update_mib = 0x0A01,
+ i40e_aqc_opc_lldp_add_tlv = 0x0A02,
+ i40e_aqc_opc_lldp_update_tlv = 0x0A03,
+ i40e_aqc_opc_lldp_delete_tlv = 0x0A04,
+ i40e_aqc_opc_lldp_stop = 0x0A05,
+ i40e_aqc_opc_lldp_start = 0x0A06,
+ i40e_aqc_opc_get_cee_dcb_cfg = 0x0A07,
+ i40e_aqc_opc_lldp_set_local_mib = 0x0A08,
+ i40e_aqc_opc_lldp_stop_start_spec_agent = 0x0A09,
+
+ /* Tunnel commands */
+ i40e_aqc_opc_add_udp_tunnel = 0x0B00,
+ i40e_aqc_opc_del_udp_tunnel = 0x0B01,
+#ifdef X722_SUPPORT
+ i40e_aqc_opc_set_rss_key = 0x0B02,
+ i40e_aqc_opc_set_rss_lut = 0x0B03,
+ i40e_aqc_opc_get_rss_key = 0x0B04,
+ i40e_aqc_opc_get_rss_lut = 0x0B05,
+#endif
+
+ /* Async Events */
+ i40e_aqc_opc_event_lan_overflow = 0x1001,
+
+ /* OEM commands */
+ i40e_aqc_opc_oem_parameter_change = 0xFE00,
+ i40e_aqc_opc_oem_device_status_change = 0xFE01,
+ i40e_aqc_opc_oem_ocsd_initialize = 0xFE02,
+ i40e_aqc_opc_oem_ocbb_initialize = 0xFE03,
+
+ /* debug commands */
+ i40e_aqc_opc_debug_read_reg = 0xFF03,
+ i40e_aqc_opc_debug_write_reg = 0xFF04,
+ i40e_aqc_opc_debug_modify_reg = 0xFF07,
+ i40e_aqc_opc_debug_dump_internals = 0xFF08,
+};
+
+/* command structures and indirect data structures */
+
+/* Structure naming conventions:
+ * - no suffix for direct command descriptor structures
+ * - _data for indirect sent data
+ * - _resp for indirect return data (data which is both will use _data)
+ * - _completion for direct return data
+ * - _element_ for repeated elements (may also be _data or _resp)
+ *
+ * Command structures are expected to overlay the params.raw member of the basic
+ * descriptor, and as such cannot exceed 16 bytes in length.
+ */
+
+/* This macro is used to generate a compilation error if a structure
+ * is not exactly the correct length. It gives a divide by zero error if the
+ * structure is not of the correct size, otherwise it creates an enum that is
+ * never used.
+ */
+#define I40E_CHECK_STRUCT_LEN(n, X) enum i40e_static_assert_enum_##X \
+ { i40e_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) }
+
+/* This macro is used extensively to ensure that command structures are 16
+ * bytes in length as they have to map to the raw array of that size.
+ */
+#define I40E_CHECK_CMD_LENGTH(X) I40E_CHECK_STRUCT_LEN(16, X)
+
+/* internal (0x00XX) commands */
+
+/* Get version (direct 0x0001) */
+struct i40e_aqc_get_version {
+ __le32 rom_ver;
+ __le32 fw_build;
+ __le16 fw_major;
+ __le16 fw_minor;
+ __le16 api_major;
+ __le16 api_minor;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_version);
+
+/* Send driver version (indirect 0x0002) */
+struct i40e_aqc_driver_version {
+ u8 driver_major_ver;
+ u8 driver_minor_ver;
+ u8 driver_build_ver;
+ u8 driver_subbuild_ver;
+ u8 reserved[4];
+ __le32 address_high;
+ __le32 address_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_driver_version);
+
+/* Queue Shutdown (direct 0x0003) */
+struct i40e_aqc_queue_shutdown {
+ __le32 driver_unloading;
+#define I40E_AQ_DRIVER_UNLOADING 0x1
+ u8 reserved[12];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_queue_shutdown);
+
+/* Set PF context (0x0004, direct) */
+struct i40e_aqc_set_pf_context {
+ u8 pf_id;
+ u8 reserved[15];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_pf_context);
+
+/* Request resource ownership (direct 0x0008)
+ * Release resource ownership (direct 0x0009)
+ */
+#define I40E_AQ_RESOURCE_NVM 1
+#define I40E_AQ_RESOURCE_SDP 2
+#define I40E_AQ_RESOURCE_ACCESS_READ 1
+#define I40E_AQ_RESOURCE_ACCESS_WRITE 2
+#define I40E_AQ_RESOURCE_NVM_READ_TIMEOUT 3000
+#define I40E_AQ_RESOURCE_NVM_WRITE_TIMEOUT 180000
+
+struct i40e_aqc_request_resource {
+ __le16 resource_id;
+ __le16 access_type;
+ __le32 timeout;
+ __le32 resource_number;
+ u8 reserved[4];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_request_resource);
+
+/* Get function capabilities (indirect 0x000A)
+ * Get device capabilities (indirect 0x000B)
+ */
+struct i40e_aqc_list_capabilites {
+ u8 command_flags;
+#define I40E_AQ_LIST_CAP_PF_INDEX_EN 1
+ u8 pf_index;
+ u8 reserved[2];
+ __le32 count;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_list_capabilites);
+
+struct i40e_aqc_list_capabilities_element_resp {
+ __le16 id;
+ u8 major_rev;
+ u8 minor_rev;
+ __le32 number;
+ __le32 logical_id;
+ __le32 phys_id;
+ u8 reserved[16];
+};
+
+/* list of caps */
+
+#define I40E_AQ_CAP_ID_SWITCH_MODE 0x0001
+#define I40E_AQ_CAP_ID_MNG_MODE 0x0002
+#define I40E_AQ_CAP_ID_NPAR_ACTIVE 0x0003
+#define I40E_AQ_CAP_ID_OS2BMC_CAP 0x0004
+#define I40E_AQ_CAP_ID_FUNCTIONS_VALID 0x0005
+#define I40E_AQ_CAP_ID_ALTERNATE_RAM 0x0006
+#define I40E_AQ_CAP_ID_SRIOV 0x0012
+#define I40E_AQ_CAP_ID_VF 0x0013
+#define I40E_AQ_CAP_ID_VMDQ 0x0014
+#define I40E_AQ_CAP_ID_8021QBG 0x0015
+#define I40E_AQ_CAP_ID_8021QBR 0x0016
+#define I40E_AQ_CAP_ID_VSI 0x0017
+#define I40E_AQ_CAP_ID_DCB 0x0018
+#define I40E_AQ_CAP_ID_FCOE 0x0021
+#define I40E_AQ_CAP_ID_ISCSI 0x0022
+#define I40E_AQ_CAP_ID_RSS 0x0040
+#define I40E_AQ_CAP_ID_RXQ 0x0041
+#define I40E_AQ_CAP_ID_TXQ 0x0042
+#define I40E_AQ_CAP_ID_MSIX 0x0043
+#define I40E_AQ_CAP_ID_VF_MSIX 0x0044
+#define I40E_AQ_CAP_ID_FLOW_DIRECTOR 0x0045
+#define I40E_AQ_CAP_ID_1588 0x0046
+#define I40E_AQ_CAP_ID_IWARP 0x0051
+#define I40E_AQ_CAP_ID_LED 0x0061
+#define I40E_AQ_CAP_ID_SDP 0x0062
+#define I40E_AQ_CAP_ID_MDIO 0x0063
+#define I40E_AQ_CAP_ID_FLEX10 0x00F1
+#define I40E_AQ_CAP_ID_CEM 0x00F2
+
+/* Set CPPM Configuration (direct 0x0103) */
+struct i40e_aqc_cppm_configuration {
+ __le16 command_flags;
+#define I40E_AQ_CPPM_EN_LTRC 0x0800
+#define I40E_AQ_CPPM_EN_DMCTH 0x1000
+#define I40E_AQ_CPPM_EN_DMCTLX 0x2000
+#define I40E_AQ_CPPM_EN_HPTC 0x4000
+#define I40E_AQ_CPPM_EN_DMARC 0x8000
+ __le16 ttlx;
+ __le32 dmacr;
+ __le16 dmcth;
+ u8 hptc;
+ u8 reserved;
+ __le32 pfltrc;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_cppm_configuration);
+
+/* Set ARP Proxy command / response (indirect 0x0104) */
+struct i40e_aqc_arp_proxy_data {
+ __le16 command_flags;
+#define I40E_AQ_ARP_INIT_IPV4 0x0008
+#define I40E_AQ_ARP_UNSUP_CTL 0x0010
+#define I40E_AQ_ARP_ENA 0x0020
+#define I40E_AQ_ARP_ADD_IPV4 0x0040
+#define I40E_AQ_ARP_DEL_IPV4 0x0080
+ __le16 table_id;
+ __le32 pfpm_proxyfc;
+ __le32 ip_addr;
+ u8 mac_addr[6];
+ u8 reserved[2];
+};
+
+I40E_CHECK_STRUCT_LEN(0x14, i40e_aqc_arp_proxy_data);
+
+/* Set NS Proxy Table Entry Command (indirect 0x0105) */
+struct i40e_aqc_ns_proxy_data {
+ __le16 table_idx_mac_addr_0;
+ __le16 table_idx_mac_addr_1;
+ __le16 table_idx_ipv6_0;
+ __le16 table_idx_ipv6_1;
+ __le16 control;
+#define I40E_AQ_NS_PROXY_ADD_0 0x0100
+#define I40E_AQ_NS_PROXY_DEL_0 0x0200
+#define I40E_AQ_NS_PROXY_ADD_1 0x0400
+#define I40E_AQ_NS_PROXY_DEL_1 0x0800
+#define I40E_AQ_NS_PROXY_ADD_IPV6_0 0x1000
+#define I40E_AQ_NS_PROXY_DEL_IPV6_0 0x2000
+#define I40E_AQ_NS_PROXY_ADD_IPV6_1 0x4000
+#define I40E_AQ_NS_PROXY_DEL_IPV6_1 0x8000
+#define I40E_AQ_NS_PROXY_COMMAND_SEQ 0x0001
+#define I40E_AQ_NS_PROXY_INIT_IPV6_TBL 0x0002
+#define I40E_AQ_NS_PROXY_INIT_MAC_TBL 0x0004
+ u8 mac_addr_0[6];
+ u8 mac_addr_1[6];
+ u8 local_mac_addr[6];
+ u8 ipv6_addr_0[16]; /* Warning! spec specifies BE byte order */
+ u8 ipv6_addr_1[16];
+};
+
+I40E_CHECK_STRUCT_LEN(0x3c, i40e_aqc_ns_proxy_data);
+
+/* Manage LAA Command (0x0106) - obsolete */
+struct i40e_aqc_mng_laa {
+ __le16 command_flags;
+#define I40E_AQ_LAA_FLAG_WR 0x8000
+ u8 reserved[2];
+ __le32 sal;
+ __le16 sah;
+ u8 reserved2[6];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_mng_laa);
+
+/* Manage MAC Address Read Command (indirect 0x0107) */
+struct i40e_aqc_mac_address_read {
+ __le16 command_flags;
+#define I40E_AQC_LAN_ADDR_VALID 0x10
+#define I40E_AQC_SAN_ADDR_VALID 0x20
+#define I40E_AQC_PORT_ADDR_VALID 0x40
+#define I40E_AQC_WOL_ADDR_VALID 0x80
+#define I40E_AQC_MC_MAG_EN_VALID 0x100
+#define I40E_AQC_ADDR_VALID_MASK 0x1F0
+ u8 reserved[6];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_mac_address_read);
+
+struct i40e_aqc_mac_address_read_data {
+ u8 pf_lan_mac[6];
+ u8 pf_san_mac[6];
+ u8 port_mac[6];
+ u8 pf_wol_mac[6];
+};
+
+I40E_CHECK_STRUCT_LEN(24, i40e_aqc_mac_address_read_data);
+
+/* Manage MAC Address Write Command (0x0108) */
+struct i40e_aqc_mac_address_write {
+ __le16 command_flags;
+#define I40E_AQC_WRITE_TYPE_LAA_ONLY 0x0000
+#define I40E_AQC_WRITE_TYPE_LAA_WOL 0x4000
+#define I40E_AQC_WRITE_TYPE_PORT 0x8000
+#define I40E_AQC_WRITE_TYPE_UPDATE_MC_MAG 0xC000
+#define I40E_AQC_WRITE_TYPE_MASK 0xC000
+
+ __le16 mac_sah;
+ __le32 mac_sal;
+ u8 reserved[8];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_mac_address_write);
+
+/* PXE commands (0x011x) */
+
+/* Clear PXE Command and response (direct 0x0110) */
+struct i40e_aqc_clear_pxe {
+ u8 rx_cnt;
+ u8 reserved[15];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_clear_pxe);
+
+/* Switch configuration commands (0x02xx) */
+
+/* Used by many indirect commands that only pass an seid and a buffer in the
+ * command
+ */
+struct i40e_aqc_switch_seid {
+ __le16 seid;
+ u8 reserved[6];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_switch_seid);
+
+/* Get Switch Configuration command (indirect 0x0200)
+ * uses i40e_aqc_switch_seid for the descriptor
+ */
+struct i40e_aqc_get_switch_config_header_resp {
+ __le16 num_reported;
+ __le16 num_total;
+ u8 reserved[12];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_switch_config_header_resp);
+
+struct i40e_aqc_switch_config_element_resp {
+ u8 element_type;
+#define I40E_AQ_SW_ELEM_TYPE_MAC 1
+#define I40E_AQ_SW_ELEM_TYPE_PF 2
+#define I40E_AQ_SW_ELEM_TYPE_VF 3
+#define I40E_AQ_SW_ELEM_TYPE_EMP 4
+#define I40E_AQ_SW_ELEM_TYPE_BMC 5
+#define I40E_AQ_SW_ELEM_TYPE_PV 16
+#define I40E_AQ_SW_ELEM_TYPE_VEB 17
+#define I40E_AQ_SW_ELEM_TYPE_PA 18
+#define I40E_AQ_SW_ELEM_TYPE_VSI 19
+ u8 revision;
+#define I40E_AQ_SW_ELEM_REV_1 1
+ __le16 seid;
+ __le16 uplink_seid;
+ __le16 downlink_seid;
+ u8 reserved[3];
+ u8 connection_type;
+#define I40E_AQ_CONN_TYPE_REGULAR 0x1
+#define I40E_AQ_CONN_TYPE_DEFAULT 0x2
+#define I40E_AQ_CONN_TYPE_CASCADED 0x3
+ __le16 scheduler_id;
+ __le16 element_info;
+};
+
+I40E_CHECK_STRUCT_LEN(0x10, i40e_aqc_switch_config_element_resp);
+
+/* Get Switch Configuration (indirect 0x0200)
+ * an array of elements are returned in the response buffer
+ * the first in the array is the header, remainder are elements
+ */
+struct i40e_aqc_get_switch_config_resp {
+ struct i40e_aqc_get_switch_config_header_resp header;
+ struct i40e_aqc_switch_config_element_resp element[1];
+};
+
+I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_get_switch_config_resp);
+
+/* Add Statistics (direct 0x0201)
+ * Remove Statistics (direct 0x0202)
+ */
+struct i40e_aqc_add_remove_statistics {
+ __le16 seid;
+ __le16 vlan;
+ __le16 stat_index;
+ u8 reserved[10];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_statistics);
+
+/* Set Port Parameters command (direct 0x0203) */
+struct i40e_aqc_set_port_parameters {
+ __le16 command_flags;
+#define I40E_AQ_SET_P_PARAMS_SAVE_BAD_PACKETS 1
+#define I40E_AQ_SET_P_PARAMS_PAD_SHORT_PACKETS 2 /* must set! */
+#define I40E_AQ_SET_P_PARAMS_DOUBLE_VLAN_ENA 4
+ __le16 bad_frame_vsi;
+ __le16 default_seid; /* reserved for command */
+ u8 reserved[10];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_port_parameters);
+
+/* Get Switch Resource Allocation (indirect 0x0204) */
+struct i40e_aqc_get_switch_resource_alloc {
+ u8 num_entries; /* reserved for command */
+ u8 reserved[7];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_switch_resource_alloc);
+
+/* expect an array of these structs in the response buffer */
+struct i40e_aqc_switch_resource_alloc_element_resp {
+ u8 resource_type;
+#define I40E_AQ_RESOURCE_TYPE_VEB 0x0
+#define I40E_AQ_RESOURCE_TYPE_VSI 0x1
+#define I40E_AQ_RESOURCE_TYPE_MACADDR 0x2
+#define I40E_AQ_RESOURCE_TYPE_STAG 0x3
+#define I40E_AQ_RESOURCE_TYPE_ETAG 0x4
+#define I40E_AQ_RESOURCE_TYPE_MULTICAST_HASH 0x5
+#define I40E_AQ_RESOURCE_TYPE_UNICAST_HASH 0x6
+#define I40E_AQ_RESOURCE_TYPE_VLAN 0x7
+#define I40E_AQ_RESOURCE_TYPE_VSI_LIST_ENTRY 0x8
+#define I40E_AQ_RESOURCE_TYPE_ETAG_LIST_ENTRY 0x9
+#define I40E_AQ_RESOURCE_TYPE_VLAN_STAT_POOL 0xA
+#define I40E_AQ_RESOURCE_TYPE_MIRROR_RULE 0xB
+#define I40E_AQ_RESOURCE_TYPE_QUEUE_SETS 0xC
+#define I40E_AQ_RESOURCE_TYPE_VLAN_FILTERS 0xD
+#define I40E_AQ_RESOURCE_TYPE_INNER_MAC_FILTERS 0xF
+#define I40E_AQ_RESOURCE_TYPE_IP_FILTERS 0x10
+#define I40E_AQ_RESOURCE_TYPE_GRE_VN_KEYS 0x11
+#define I40E_AQ_RESOURCE_TYPE_VN2_KEYS 0x12
+#define I40E_AQ_RESOURCE_TYPE_TUNNEL_PORTS 0x13
+ u8 reserved1;
+ __le16 guaranteed;
+ __le16 total;
+ __le16 used;
+ __le16 total_unalloced;
+ u8 reserved2[6];
+};
+
+I40E_CHECK_STRUCT_LEN(0x10, i40e_aqc_switch_resource_alloc_element_resp);
+
+/* Add VSI (indirect 0x0210)
+ * this indirect command uses struct i40e_aqc_vsi_properties_data
+ * as the indirect buffer (128 bytes)
+ *
+ * Update VSI (indirect 0x211)
+ * uses the same data structure as Add VSI
+ *
+ * Get VSI (indirect 0x0212)
+ * uses the same completion and data structure as Add VSI
+ */
+struct i40e_aqc_add_get_update_vsi {
+ __le16 uplink_seid;
+ u8 connection_type;
+#define I40E_AQ_VSI_CONN_TYPE_NORMAL 0x1
+#define I40E_AQ_VSI_CONN_TYPE_DEFAULT 0x2
+#define I40E_AQ_VSI_CONN_TYPE_CASCADED 0x3
+ u8 reserved1;
+ u8 vf_id;
+ u8 reserved2;
+ __le16 vsi_flags;
+#define I40E_AQ_VSI_TYPE_SHIFT 0x0
+#define I40E_AQ_VSI_TYPE_MASK (0x3 << I40E_AQ_VSI_TYPE_SHIFT)
+#define I40E_AQ_VSI_TYPE_VF 0x0
+#define I40E_AQ_VSI_TYPE_VMDQ2 0x1
+#define I40E_AQ_VSI_TYPE_PF 0x2
+#define I40E_AQ_VSI_TYPE_EMP_MNG 0x3
+#define I40E_AQ_VSI_FLAG_CASCADED_PV 0x4
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_get_update_vsi);
+
+struct i40e_aqc_add_get_update_vsi_completion {
+ __le16 seid;
+ __le16 vsi_number;
+ __le16 vsi_used;
+ __le16 vsi_free;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_get_update_vsi_completion);
+
+struct i40e_aqc_vsi_properties_data {
+ /* first 96 byte are written by SW */
+ __le16 valid_sections;
+#define I40E_AQ_VSI_PROP_SWITCH_VALID 0x0001
+#define I40E_AQ_VSI_PROP_SECURITY_VALID 0x0002
+#define I40E_AQ_VSI_PROP_VLAN_VALID 0x0004
+#define I40E_AQ_VSI_PROP_CAS_PV_VALID 0x0008
+#define I40E_AQ_VSI_PROP_INGRESS_UP_VALID 0x0010
+#define I40E_AQ_VSI_PROP_EGRESS_UP_VALID 0x0020
+#define I40E_AQ_VSI_PROP_QUEUE_MAP_VALID 0x0040
+#define I40E_AQ_VSI_PROP_QUEUE_OPT_VALID 0x0080
+#define I40E_AQ_VSI_PROP_OUTER_UP_VALID 0x0100
+#define I40E_AQ_VSI_PROP_SCHED_VALID 0x0200
+ /* switch section */
+ __le16 switch_id; /* 12bit id combined with flags below */
+#define I40E_AQ_VSI_SW_ID_SHIFT 0x0000
+#define I40E_AQ_VSI_SW_ID_MASK (0xFFF << I40E_AQ_VSI_SW_ID_SHIFT)
+#define I40E_AQ_VSI_SW_ID_FLAG_NOT_STAG 0x1000
+#define I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB 0x2000
+#define I40E_AQ_VSI_SW_ID_FLAG_LOCAL_LB 0x4000
+ u8 sw_reserved[2];
+ /* security section */
+ u8 sec_flags;
+#define I40E_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD 0x01
+#define I40E_AQ_VSI_SEC_FLAG_ENABLE_VLAN_CHK 0x02
+#define I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK 0x04
+ u8 sec_reserved;
+ /* VLAN section */
+ __le16 pvid; /* VLANS include priority bits */
+ __le16 fcoe_pvid;
+ u8 port_vlan_flags;
+#define I40E_AQ_VSI_PVLAN_MODE_SHIFT 0x00
+#define I40E_AQ_VSI_PVLAN_MODE_MASK (0x03 << \
+ I40E_AQ_VSI_PVLAN_MODE_SHIFT)
+#define I40E_AQ_VSI_PVLAN_MODE_TAGGED 0x01
+#define I40E_AQ_VSI_PVLAN_MODE_UNTAGGED 0x02
+#define I40E_AQ_VSI_PVLAN_MODE_ALL 0x03
+#define I40E_AQ_VSI_PVLAN_INSERT_PVID 0x04
+#define I40E_AQ_VSI_PVLAN_EMOD_SHIFT 0x03
+#define I40E_AQ_VSI_PVLAN_EMOD_MASK (0x3 << \
+ I40E_AQ_VSI_PVLAN_EMOD_SHIFT)
+#define I40E_AQ_VSI_PVLAN_EMOD_STR_BOTH 0x0
+#define I40E_AQ_VSI_PVLAN_EMOD_STR_UP 0x08
+#define I40E_AQ_VSI_PVLAN_EMOD_STR 0x10
+#define I40E_AQ_VSI_PVLAN_EMOD_NOTHING 0x18
+ u8 pvlan_reserved[3];
+ /* ingress egress up sections */
+ __le32 ingress_table; /* bitmap, 3 bits per up */
+#define I40E_AQ_VSI_UP_TABLE_UP0_SHIFT 0
+#define I40E_AQ_VSI_UP_TABLE_UP0_MASK (0x7 << \
+ I40E_AQ_VSI_UP_TABLE_UP0_SHIFT)
+#define I40E_AQ_VSI_UP_TABLE_UP1_SHIFT 3
+#define I40E_AQ_VSI_UP_TABLE_UP1_MASK (0x7 << \
+ I40E_AQ_VSI_UP_TABLE_UP1_SHIFT)
+#define I40E_AQ_VSI_UP_TABLE_UP2_SHIFT 6
+#define I40E_AQ_VSI_UP_TABLE_UP2_MASK (0x7 << \
+ I40E_AQ_VSI_UP_TABLE_UP2_SHIFT)
+#define I40E_AQ_VSI_UP_TABLE_UP3_SHIFT 9
+#define I40E_AQ_VSI_UP_TABLE_UP3_MASK (0x7 << \
+ I40E_AQ_VSI_UP_TABLE_UP3_SHIFT)
+#define I40E_AQ_VSI_UP_TABLE_UP4_SHIFT 12
+#define I40E_AQ_VSI_UP_TABLE_UP4_MASK (0x7 << \
+ I40E_AQ_VSI_UP_TABLE_UP4_SHIFT)
+#define I40E_AQ_VSI_UP_TABLE_UP5_SHIFT 15
+#define I40E_AQ_VSI_UP_TABLE_UP5_MASK (0x7 << \
+ I40E_AQ_VSI_UP_TABLE_UP5_SHIFT)
+#define I40E_AQ_VSI_UP_TABLE_UP6_SHIFT 18
+#define I40E_AQ_VSI_UP_TABLE_UP6_MASK (0x7 << \
+ I40E_AQ_VSI_UP_TABLE_UP6_SHIFT)
+#define I40E_AQ_VSI_UP_TABLE_UP7_SHIFT 21
+#define I40E_AQ_VSI_UP_TABLE_UP7_MASK (0x7 << \
+ I40E_AQ_VSI_UP_TABLE_UP7_SHIFT)
+ __le32 egress_table; /* same defines as for ingress table */
+ /* cascaded PV section */
+ __le16 cas_pv_tag;
+ u8 cas_pv_flags;
+#define I40E_AQ_VSI_CAS_PV_TAGX_SHIFT 0x00
+#define I40E_AQ_VSI_CAS_PV_TAGX_MASK (0x03 << \
+ I40E_AQ_VSI_CAS_PV_TAGX_SHIFT)
+#define I40E_AQ_VSI_CAS_PV_TAGX_LEAVE 0x00
+#define I40E_AQ_VSI_CAS_PV_TAGX_REMOVE 0x01
+#define I40E_AQ_VSI_CAS_PV_TAGX_COPY 0x02
+#define I40E_AQ_VSI_CAS_PV_INSERT_TAG 0x10
+#define I40E_AQ_VSI_CAS_PV_ETAG_PRUNE 0x20
+#define I40E_AQ_VSI_CAS_PV_ACCEPT_HOST_TAG 0x40
+ u8 cas_pv_reserved;
+ /* queue mapping section */
+ __le16 mapping_flags;
+#define I40E_AQ_VSI_QUE_MAP_CONTIG 0x0
+#define I40E_AQ_VSI_QUE_MAP_NONCONTIG 0x1
+ __le16 queue_mapping[16];
+#define I40E_AQ_VSI_QUEUE_SHIFT 0x0
+#define I40E_AQ_VSI_QUEUE_MASK (0x7FF << I40E_AQ_VSI_QUEUE_SHIFT)
+ __le16 tc_mapping[8];
+#define I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT 0
+#define I40E_AQ_VSI_TC_QUE_OFFSET_MASK (0x1FF << \
+ I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT)
+#define I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT 9
+#define I40E_AQ_VSI_TC_QUE_NUMBER_MASK (0x7 << \
+ I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT)
+ /* queueing option section */
+ u8 queueing_opt_flags;
+#ifdef X722_SUPPORT
+#define I40E_AQ_VSI_QUE_OPT_MULTICAST_UDP_ENA 0x04
+#define I40E_AQ_VSI_QUE_OPT_UNICAST_UDP_ENA 0x08
+#endif
+#define I40E_AQ_VSI_QUE_OPT_TCP_ENA 0x10
+#define I40E_AQ_VSI_QUE_OPT_FCOE_ENA 0x20
+#ifdef X722_SUPPORT
+#define I40E_AQ_VSI_QUE_OPT_RSS_LUT_PF 0x00
+#define I40E_AQ_VSI_QUE_OPT_RSS_LUT_VSI 0x40
+#endif
+ u8 queueing_opt_reserved[3];
+ /* scheduler section */
+ u8 up_enable_bits;
+ u8 sched_reserved;
+ /* outer up section */
+ __le32 outer_up_table; /* same structure and defines as ingress table */
+ u8 cmd_reserved[8];
+ /* last 32 bytes are written by FW */
+ __le16 qs_handle[8];
+#define I40E_AQ_VSI_QS_HANDLE_INVALID 0xFFFF
+ __le16 stat_counter_idx;
+ __le16 sched_id;
+ u8 resp_reserved[12];
+};
+
+I40E_CHECK_STRUCT_LEN(128, i40e_aqc_vsi_properties_data);
+
+/* Add Port Virtualizer (direct 0x0220)
+ * also used for update PV (direct 0x0221) but only flags are used
+ * (IS_CTRL_PORT only works on add PV)
+ */
+struct i40e_aqc_add_update_pv {
+ __le16 command_flags;
+#define I40E_AQC_PV_FLAG_PV_TYPE 0x1
+#define I40E_AQC_PV_FLAG_FWD_UNKNOWN_STAG_EN 0x2
+#define I40E_AQC_PV_FLAG_FWD_UNKNOWN_ETAG_EN 0x4
+#define I40E_AQC_PV_FLAG_IS_CTRL_PORT 0x8
+ __le16 uplink_seid;
+ __le16 connected_seid;
+ u8 reserved[10];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_update_pv);
+
+struct i40e_aqc_add_update_pv_completion {
+ /* reserved for update; for add also encodes error if rc == ENOSPC */
+ __le16 pv_seid;
+#define I40E_AQC_PV_ERR_FLAG_NO_PV 0x1
+#define I40E_AQC_PV_ERR_FLAG_NO_SCHED 0x2
+#define I40E_AQC_PV_ERR_FLAG_NO_COUNTER 0x4
+#define I40E_AQC_PV_ERR_FLAG_NO_ENTRY 0x8
+ u8 reserved[14];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_update_pv_completion);
+
+/* Get PV Params (direct 0x0222)
+ * uses i40e_aqc_switch_seid for the descriptor
+ */
+
+struct i40e_aqc_get_pv_params_completion {
+ __le16 seid;
+ __le16 default_stag;
+ __le16 pv_flags; /* same flags as add_pv */
+#define I40E_AQC_GET_PV_PV_TYPE 0x1
+#define I40E_AQC_GET_PV_FRWD_UNKNOWN_STAG 0x2
+#define I40E_AQC_GET_PV_FRWD_UNKNOWN_ETAG 0x4
+ u8 reserved[8];
+ __le16 default_port_seid;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_pv_params_completion);
+
+/* Add VEB (direct 0x0230) */
+struct i40e_aqc_add_veb {
+ __le16 uplink_seid;
+ __le16 downlink_seid;
+ __le16 veb_flags;
+#define I40E_AQC_ADD_VEB_FLOATING 0x1
+#define I40E_AQC_ADD_VEB_PORT_TYPE_SHIFT 1
+#define I40E_AQC_ADD_VEB_PORT_TYPE_MASK (0x3 << \
+ I40E_AQC_ADD_VEB_PORT_TYPE_SHIFT)
+#define I40E_AQC_ADD_VEB_PORT_TYPE_DEFAULT 0x2
+#define I40E_AQC_ADD_VEB_PORT_TYPE_DATA 0x4
+#define I40E_AQC_ADD_VEB_ENABLE_L2_FILTER 0x8
+ u8 enable_tcs;
+ u8 reserved[9];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_veb);
+
+struct i40e_aqc_add_veb_completion {
+ u8 reserved[6];
+ __le16 switch_seid;
+ /* also encodes error if rc == ENOSPC; codes are the same as add_pv */
+ __le16 veb_seid;
+#define I40E_AQC_VEB_ERR_FLAG_NO_VEB 0x1
+#define I40E_AQC_VEB_ERR_FLAG_NO_SCHED 0x2
+#define I40E_AQC_VEB_ERR_FLAG_NO_COUNTER 0x4
+#define I40E_AQC_VEB_ERR_FLAG_NO_ENTRY 0x8
+ __le16 statistic_index;
+ __le16 vebs_used;
+ __le16 vebs_free;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_veb_completion);
+
+/* Get VEB Parameters (direct 0x0232)
+ * uses i40e_aqc_switch_seid for the descriptor
+ */
+struct i40e_aqc_get_veb_parameters_completion {
+ __le16 seid;
+ __le16 switch_id;
+ __le16 veb_flags; /* only the first/last flags from 0x0230 is valid */
+ __le16 statistic_index;
+ __le16 vebs_used;
+ __le16 vebs_free;
+ u8 reserved[4];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_veb_parameters_completion);
+
+/* Delete Element (direct 0x0243)
+ * uses the generic i40e_aqc_switch_seid
+ */
+
+/* Add MAC-VLAN (indirect 0x0250) */
+
+/* used for the command for most vlan commands */
+struct i40e_aqc_macvlan {
+ __le16 num_addresses;
+ __le16 seid[3];
+#define I40E_AQC_MACVLAN_CMD_SEID_NUM_SHIFT 0
+#define I40E_AQC_MACVLAN_CMD_SEID_NUM_MASK (0x3FF << \
+ I40E_AQC_MACVLAN_CMD_SEID_NUM_SHIFT)
+#define I40E_AQC_MACVLAN_CMD_SEID_VALID 0x8000
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_macvlan);
+
+/* indirect data for command and response */
+struct i40e_aqc_add_macvlan_element_data {
+ u8 mac_addr[6];
+ __le16 vlan_tag;
+ __le16 flags;
+#define I40E_AQC_MACVLAN_ADD_PERFECT_MATCH 0x0001
+#define I40E_AQC_MACVLAN_ADD_HASH_MATCH 0x0002
+#define I40E_AQC_MACVLAN_ADD_IGNORE_VLAN 0x0004
+#define I40E_AQC_MACVLAN_ADD_TO_QUEUE 0x0008
+ __le16 queue_number;
+#define I40E_AQC_MACVLAN_CMD_QUEUE_SHIFT 0
+#define I40E_AQC_MACVLAN_CMD_QUEUE_MASK (0x7FF << \
+ I40E_AQC_MACVLAN_CMD_SEID_NUM_SHIFT)
+ /* response section */
+ u8 match_method;
+#define I40E_AQC_MM_PERFECT_MATCH 0x01
+#define I40E_AQC_MM_HASH_MATCH 0x02
+#define I40E_AQC_MM_ERR_NO_RES 0xFF
+ u8 reserved1[3];
+};
+
+struct i40e_aqc_add_remove_macvlan_completion {
+ __le16 perfect_mac_used;
+ __le16 perfect_mac_free;
+ __le16 unicast_hash_free;
+ __le16 multicast_hash_free;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_macvlan_completion);
+
+/* Remove MAC-VLAN (indirect 0x0251)
+ * uses i40e_aqc_macvlan for the descriptor
+ * data points to an array of num_addresses of elements
+ */
+
+struct i40e_aqc_remove_macvlan_element_data {
+ u8 mac_addr[6];
+ __le16 vlan_tag;
+ u8 flags;
+#define I40E_AQC_MACVLAN_DEL_PERFECT_MATCH 0x01
+#define I40E_AQC_MACVLAN_DEL_HASH_MATCH 0x02
+#define I40E_AQC_MACVLAN_DEL_IGNORE_VLAN 0x08
+#define I40E_AQC_MACVLAN_DEL_ALL_VSIS 0x10
+ u8 reserved[3];
+ /* reply section */
+ u8 error_code;
+#define I40E_AQC_REMOVE_MACVLAN_SUCCESS 0x0
+#define I40E_AQC_REMOVE_MACVLAN_FAIL 0xFF
+ u8 reply_reserved[3];
+};
+
+/* Add VLAN (indirect 0x0252)
+ * Remove VLAN (indirect 0x0253)
+ * use the generic i40e_aqc_macvlan for the command
+ */
+struct i40e_aqc_add_remove_vlan_element_data {
+ __le16 vlan_tag;
+ u8 vlan_flags;
+/* flags for add VLAN */
+#define I40E_AQC_ADD_VLAN_LOCAL 0x1
+#define I40E_AQC_ADD_PVLAN_TYPE_SHIFT 1
+#define I40E_AQC_ADD_PVLAN_TYPE_MASK (0x3 << I40E_AQC_ADD_PVLAN_TYPE_SHIFT)
+#define I40E_AQC_ADD_PVLAN_TYPE_REGULAR 0x0
+#define I40E_AQC_ADD_PVLAN_TYPE_PRIMARY 0x2
+#define I40E_AQC_ADD_PVLAN_TYPE_SECONDARY 0x4
+#define I40E_AQC_VLAN_PTYPE_SHIFT 3
+#define I40E_AQC_VLAN_PTYPE_MASK (0x3 << I40E_AQC_VLAN_PTYPE_SHIFT)
+#define I40E_AQC_VLAN_PTYPE_REGULAR_VSI 0x0
+#define I40E_AQC_VLAN_PTYPE_PROMISC_VSI 0x8
+#define I40E_AQC_VLAN_PTYPE_COMMUNITY_VSI 0x10
+#define I40E_AQC_VLAN_PTYPE_ISOLATED_VSI 0x18
+/* flags for remove VLAN */
+#define I40E_AQC_REMOVE_VLAN_ALL 0x1
+ u8 reserved;
+ u8 result;
+/* flags for add VLAN */
+#define I40E_AQC_ADD_VLAN_SUCCESS 0x0
+#define I40E_AQC_ADD_VLAN_FAIL_REQUEST 0xFE
+#define I40E_AQC_ADD_VLAN_FAIL_RESOURCE 0xFF
+/* flags for remove VLAN */
+#define I40E_AQC_REMOVE_VLAN_SUCCESS 0x0
+#define I40E_AQC_REMOVE_VLAN_FAIL 0xFF
+ u8 reserved1[3];
+};
+
+struct i40e_aqc_add_remove_vlan_completion {
+ u8 reserved[4];
+ __le16 vlans_used;
+ __le16 vlans_free;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* Set VSI Promiscuous Modes (direct 0x0254) */
+struct i40e_aqc_set_vsi_promiscuous_modes {
+ __le16 promiscuous_flags;
+ __le16 valid_flags;
+/* flags used for both fields above */
+#define I40E_AQC_SET_VSI_PROMISC_UNICAST 0x01
+#define I40E_AQC_SET_VSI_PROMISC_MULTICAST 0x02
+#define I40E_AQC_SET_VSI_PROMISC_BROADCAST 0x04
+#define I40E_AQC_SET_VSI_DEFAULT 0x08
+#define I40E_AQC_SET_VSI_PROMISC_VLAN 0x10
+ __le16 seid;
+#define I40E_AQC_VSI_PROM_CMD_SEID_MASK 0x3FF
+ __le16 vlan_tag;
+#define I40E_AQC_SET_VSI_VLAN_MASK 0x0FFF
+#define I40E_AQC_SET_VSI_VLAN_VALID 0x8000
+ u8 reserved[8];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_vsi_promiscuous_modes);
+
+/* Add S/E-tag command (direct 0x0255)
+ * Uses generic i40e_aqc_add_remove_tag_completion for completion
+ */
+struct i40e_aqc_add_tag {
+ __le16 flags;
+#define I40E_AQC_ADD_TAG_FLAG_TO_QUEUE 0x0001
+ __le16 seid;
+#define I40E_AQC_ADD_TAG_CMD_SEID_NUM_SHIFT 0
+#define I40E_AQC_ADD_TAG_CMD_SEID_NUM_MASK (0x3FF << \
+ I40E_AQC_ADD_TAG_CMD_SEID_NUM_SHIFT)
+ __le16 tag;
+ __le16 queue_number;
+ u8 reserved[8];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_tag);
+
+struct i40e_aqc_add_remove_tag_completion {
+ u8 reserved[12];
+ __le16 tags_used;
+ __le16 tags_free;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_tag_completion);
+
+/* Remove S/E-tag command (direct 0x0256)
+ * Uses generic i40e_aqc_add_remove_tag_completion for completion
+ */
+struct i40e_aqc_remove_tag {
+ __le16 seid;
+#define I40E_AQC_REMOVE_TAG_CMD_SEID_NUM_SHIFT 0
+#define I40E_AQC_REMOVE_TAG_CMD_SEID_NUM_MASK (0x3FF << \
+ I40E_AQC_REMOVE_TAG_CMD_SEID_NUM_SHIFT)
+ __le16 tag;
+ u8 reserved[12];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_remove_tag);
+
+/* Add multicast E-Tag (direct 0x0257)
+ * del multicast E-Tag (direct 0x0258) only uses pv_seid and etag fields
+ * and no external data
+ */
+struct i40e_aqc_add_remove_mcast_etag {
+ __le16 pv_seid;
+ __le16 etag;
+ u8 num_unicast_etags;
+ u8 reserved[3];
+ __le32 addr_high; /* address of array of 2-byte s-tags */
+ __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_mcast_etag);
+
+struct i40e_aqc_add_remove_mcast_etag_completion {
+ u8 reserved[4];
+ __le16 mcast_etags_used;
+ __le16 mcast_etags_free;
+ __le32 addr_high;
+ __le32 addr_low;
+
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_mcast_etag_completion);
+
+/* Update S/E-Tag (direct 0x0259) */
+struct i40e_aqc_update_tag {
+ __le16 seid;
+#define I40E_AQC_UPDATE_TAG_CMD_SEID_NUM_SHIFT 0
+#define I40E_AQC_UPDATE_TAG_CMD_SEID_NUM_MASK (0x3FF << \
+ I40E_AQC_UPDATE_TAG_CMD_SEID_NUM_SHIFT)
+ __le16 old_tag;
+ __le16 new_tag;
+ u8 reserved[10];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_update_tag);
+
+struct i40e_aqc_update_tag_completion {
+ u8 reserved[12];
+ __le16 tags_used;
+ __le16 tags_free;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_update_tag_completion);
+
+/* Add Control Packet filter (direct 0x025A)
+ * Remove Control Packet filter (direct 0x025B)
+ * uses the i40e_aqc_add_oveb_cloud,
+ * and the generic direct completion structure
+ */
+struct i40e_aqc_add_remove_control_packet_filter {
+ u8 mac[6];
+ __le16 etype;
+ __le16 flags;
+#define I40E_AQC_ADD_CONTROL_PACKET_FLAGS_IGNORE_MAC 0x0001
+#define I40E_AQC_ADD_CONTROL_PACKET_FLAGS_DROP 0x0002
+#define I40E_AQC_ADD_CONTROL_PACKET_FLAGS_TO_QUEUE 0x0004
+#define I40E_AQC_ADD_CONTROL_PACKET_FLAGS_TX 0x0008
+#define I40E_AQC_ADD_CONTROL_PACKET_FLAGS_RX 0x0000
+ __le16 seid;
+#define I40E_AQC_ADD_CONTROL_PACKET_CMD_SEID_NUM_SHIFT 0
+#define I40E_AQC_ADD_CONTROL_PACKET_CMD_SEID_NUM_MASK (0x3FF << \
+ I40E_AQC_ADD_CONTROL_PACKET_CMD_SEID_NUM_SHIFT)
+ __le16 queue;
+ u8 reserved[2];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_control_packet_filter);
+
+struct i40e_aqc_add_remove_control_packet_filter_completion {
+ __le16 mac_etype_used;
+ __le16 etype_used;
+ __le16 mac_etype_free;
+ __le16 etype_free;
+ u8 reserved[8];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_control_packet_filter_completion);
+
+/* Add Cloud filters (indirect 0x025C)
+ * Remove Cloud filters (indirect 0x025D)
+ * uses the i40e_aqc_add_remove_cloud_filters,
+ * and the generic indirect completion structure
+ */
+struct i40e_aqc_add_remove_cloud_filters {
+ u8 num_filters;
+ u8 reserved;
+ __le16 seid;
+#define I40E_AQC_ADD_CLOUD_CMD_SEID_NUM_SHIFT 0
+#define I40E_AQC_ADD_CLOUD_CMD_SEID_NUM_MASK (0x3FF << \
+ I40E_AQC_ADD_CLOUD_CMD_SEID_NUM_SHIFT)
+ u8 reserved2[4];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_cloud_filters);
+
+struct i40e_aqc_add_remove_cloud_filters_element_data {
+ u8 outer_mac[6];
+ u8 inner_mac[6];
+ __le16 inner_vlan;
+ union {
+ struct {
+ u8 reserved[12];
+ u8 data[4];
+ } v4;
+ struct {
+ u8 data[16];
+ } v6;
+ } ipaddr;
+ __le16 flags;
+#define I40E_AQC_ADD_CLOUD_FILTER_SHIFT 0
+#define I40E_AQC_ADD_CLOUD_FILTER_MASK (0x3F << \
+ I40E_AQC_ADD_CLOUD_FILTER_SHIFT)
+/* 0x0000 reserved */
+#define I40E_AQC_ADD_CLOUD_FILTER_OIP 0x0001
+/* 0x0002 reserved */
+#define I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN 0x0003
+#define I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN_TEN_ID 0x0004
+/* 0x0005 reserved */
+#define I40E_AQC_ADD_CLOUD_FILTER_IMAC_TEN_ID 0x0006
+/* 0x0007 reserved */
+/* 0x0008 reserved */
+#define I40E_AQC_ADD_CLOUD_FILTER_OMAC 0x0009
+#define I40E_AQC_ADD_CLOUD_FILTER_IMAC 0x000A
+#define I40E_AQC_ADD_CLOUD_FILTER_OMAC_TEN_ID_IMAC 0x000B
+#define I40E_AQC_ADD_CLOUD_FILTER_IIP 0x000C
+
+#define I40E_AQC_ADD_CLOUD_FLAGS_TO_QUEUE 0x0080
+#define I40E_AQC_ADD_CLOUD_VNK_SHIFT 6
+#define I40E_AQC_ADD_CLOUD_VNK_MASK 0x00C0
+#define I40E_AQC_ADD_CLOUD_FLAGS_IPV4 0
+#define I40E_AQC_ADD_CLOUD_FLAGS_IPV6 0x0100
+
+#define I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT 9
+#define I40E_AQC_ADD_CLOUD_TNL_TYPE_MASK 0x1E00
+#define I40E_AQC_ADD_CLOUD_TNL_TYPE_XVLAN 0
+#define I40E_AQC_ADD_CLOUD_TNL_TYPE_NVGRE_OMAC 1
+#define I40E_AQC_ADD_CLOUD_TNL_TYPE_NGE 2
+#define I40E_AQC_ADD_CLOUD_TNL_TYPE_IP 3
+
+ __le32 tenant_id;
+ u8 reserved[4];
+ __le16 queue_number;
+#define I40E_AQC_ADD_CLOUD_QUEUE_SHIFT 0
+#define I40E_AQC_ADD_CLOUD_QUEUE_MASK (0x7FF << \
+ I40E_AQC_ADD_CLOUD_QUEUE_SHIFT)
+ u8 reserved2[14];
+ /* response section */
+ u8 allocation_result;
+#define I40E_AQC_ADD_CLOUD_FILTER_SUCCESS 0x0
+#define I40E_AQC_ADD_CLOUD_FILTER_FAIL 0xFF
+ u8 response_reserved[7];
+};
+
+struct i40e_aqc_remove_cloud_filters_completion {
+ __le16 perfect_ovlan_used;
+ __le16 perfect_ovlan_free;
+ __le16 vlan_used;
+ __le16 vlan_free;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_remove_cloud_filters_completion);
+
+/* Add Mirror Rule (indirect or direct 0x0260)
+ * Delete Mirror Rule (indirect or direct 0x0261)
+ * note: some rule types (4,5) do not use an external buffer.
+ * take care to set the flags correctly.
+ */
+struct i40e_aqc_add_delete_mirror_rule {
+ __le16 seid;
+ __le16 rule_type;
+#define I40E_AQC_MIRROR_RULE_TYPE_SHIFT 0
+#define I40E_AQC_MIRROR_RULE_TYPE_MASK (0x7 << \
+ I40E_AQC_MIRROR_RULE_TYPE_SHIFT)
+#define I40E_AQC_MIRROR_RULE_TYPE_VPORT_INGRESS 1
+#define I40E_AQC_MIRROR_RULE_TYPE_VPORT_EGRESS 2
+#define I40E_AQC_MIRROR_RULE_TYPE_VLAN 3
+#define I40E_AQC_MIRROR_RULE_TYPE_ALL_INGRESS 4
+#define I40E_AQC_MIRROR_RULE_TYPE_ALL_EGRESS 5
+ __le16 num_entries;
+ __le16 destination; /* VSI for add, rule id for delete */
+ __le32 addr_high; /* address of array of 2-byte VSI or VLAN ids */
+ __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_delete_mirror_rule);
+
+struct i40e_aqc_add_delete_mirror_rule_completion {
+ u8 reserved[2];
+ __le16 rule_id; /* only used on add */
+ __le16 mirror_rules_used;
+ __le16 mirror_rules_free;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_delete_mirror_rule_completion);
+
+/* DCB 0x03xx*/
+
+/* PFC Ignore (direct 0x0301)
+ * the command and response use the same descriptor structure
+ */
+struct i40e_aqc_pfc_ignore {
+ u8 tc_bitmap;
+ u8 command_flags; /* unused on response */
+#define I40E_AQC_PFC_IGNORE_SET 0x80
+#define I40E_AQC_PFC_IGNORE_CLEAR 0x0
+ u8 reserved[14];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_pfc_ignore);
+
+/* DCB Update (direct 0x0302) uses the i40e_aq_desc structure
+ * with no parameters
+ */
+
+/* TX scheduler 0x04xx */
+
+/* Almost all the indirect commands use
+ * this generic struct to pass the SEID in param0
+ */
+struct i40e_aqc_tx_sched_ind {
+ __le16 vsi_seid;
+ u8 reserved[6];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_tx_sched_ind);
+
+/* Several commands respond with a set of queue set handles */
+struct i40e_aqc_qs_handles_resp {
+ __le16 qs_handles[8];
+};
+
+/* Configure VSI BW limits (direct 0x0400) */
+struct i40e_aqc_configure_vsi_bw_limit {
+ __le16 vsi_seid;
+ u8 reserved[2];
+ __le16 credit;
+ u8 reserved1[2];
+ u8 max_credit; /* 0-3, limit = 2^max */
+ u8 reserved2[7];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_configure_vsi_bw_limit);
+
+/* Configure VSI Bandwidth Limit per Traffic Type (indirect 0x0406)
+ * responds with i40e_aqc_qs_handles_resp
+ */
+struct i40e_aqc_configure_vsi_ets_sla_bw_data {
+ u8 tc_valid_bits;
+ u8 reserved[15];
+ __le16 tc_bw_credits[8]; /* FW writesback QS handles here */
+
+ /* 4 bits per tc 0-7, 4th bit is reserved, limit = 2^max */
+ __le16 tc_bw_max[2];
+ u8 reserved1[28];
+};
+
+I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_configure_vsi_ets_sla_bw_data);
+
+/* Configure VSI Bandwidth Allocation per Traffic Type (indirect 0x0407)
+ * responds with i40e_aqc_qs_handles_resp
+ */
+struct i40e_aqc_configure_vsi_tc_bw_data {
+ u8 tc_valid_bits;
+ u8 reserved[3];
+ u8 tc_bw_credits[8];
+ u8 reserved1[4];
+ __le16 qs_handles[8];
+};
+
+I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_configure_vsi_tc_bw_data);
+
+/* Query vsi bw configuration (indirect 0x0408) */
+struct i40e_aqc_query_vsi_bw_config_resp {
+ u8 tc_valid_bits;
+ u8 tc_suspended_bits;
+ u8 reserved[14];
+ __le16 qs_handles[8];
+ u8 reserved1[4];
+ __le16 port_bw_limit;
+ u8 reserved2[2];
+ u8 max_bw; /* 0-3, limit = 2^max */
+ u8 reserved3[23];
+};
+
+I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_query_vsi_bw_config_resp);
+
+/* Query VSI Bandwidth Allocation per Traffic Type (indirect 0x040A) */
+struct i40e_aqc_query_vsi_ets_sla_config_resp {
+ u8 tc_valid_bits;
+ u8 reserved[3];
+ u8 share_credits[8];
+ __le16 credits[8];
+
+ /* 4 bits per tc 0-7, 4th bit is reserved, limit = 2^max */
+ __le16 tc_bw_max[2];
+};
+
+I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_query_vsi_ets_sla_config_resp);
+
+/* Configure Switching Component Bandwidth Limit (direct 0x0410) */
+struct i40e_aqc_configure_switching_comp_bw_limit {
+ __le16 seid;
+ u8 reserved[2];
+ __le16 credit;
+ u8 reserved1[2];
+ u8 max_bw; /* 0-3, limit = 2^max */
+ u8 reserved2[7];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_configure_switching_comp_bw_limit);
+
+/* Enable Physical Port ETS (indirect 0x0413)
+ * Modify Physical Port ETS (indirect 0x0414)
+ * Disable Physical Port ETS (indirect 0x0415)
+ */
+struct i40e_aqc_configure_switching_comp_ets_data {
+ u8 reserved[4];
+ u8 tc_valid_bits;
+ u8 seepage;
+#define I40E_AQ_ETS_SEEPAGE_EN_MASK 0x1
+ u8 tc_strict_priority_flags;
+ u8 reserved1[17];
+ u8 tc_bw_share_credits[8];
+ u8 reserved2[96];
+};
+
+I40E_CHECK_STRUCT_LEN(0x80, i40e_aqc_configure_switching_comp_ets_data);
+
+/* Configure Switching Component Bandwidth Limits per Tc (indirect 0x0416) */
+struct i40e_aqc_configure_switching_comp_ets_bw_limit_data {
+ u8 tc_valid_bits;
+ u8 reserved[15];
+ __le16 tc_bw_credit[8];
+
+ /* 4 bits per tc 0-7, 4th bit is reserved, limit = 2^max */
+ __le16 tc_bw_max[2];
+ u8 reserved1[28];
+};
+
+I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_configure_switching_comp_ets_bw_limit_data);
+
+/* Configure Switching Component Bandwidth Allocation per Tc
+ * (indirect 0x0417)
+ */
+struct i40e_aqc_configure_switching_comp_bw_config_data {
+ u8 tc_valid_bits;
+ u8 reserved[2];
+ u8 absolute_credits; /* bool */
+ u8 tc_bw_share_credits[8];
+ u8 reserved1[20];
+};
+
+I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_configure_switching_comp_bw_config_data);
+
+/* Query Switching Component Configuration (indirect 0x0418) */
+struct i40e_aqc_query_switching_comp_ets_config_resp {
+ u8 tc_valid_bits;
+ u8 reserved[35];
+ __le16 port_bw_limit;
+ u8 reserved1[2];
+ u8 tc_bw_max; /* 0-3, limit = 2^max */
+ u8 reserved2[23];
+};
+
+I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_query_switching_comp_ets_config_resp);
+
+/* Query PhysicalPort ETS Configuration (indirect 0x0419) */
+struct i40e_aqc_query_port_ets_config_resp {
+ u8 reserved[4];
+ u8 tc_valid_bits;
+ u8 reserved1;
+ u8 tc_strict_priority_bits;
+ u8 reserved2;
+ u8 tc_bw_share_credits[8];
+ __le16 tc_bw_limits[8];
+
+ /* 4 bits per tc 0-7, 4th bit reserved, limit = 2^max */
+ __le16 tc_bw_max[2];
+ u8 reserved3[32];
+};
+
+I40E_CHECK_STRUCT_LEN(0x44, i40e_aqc_query_port_ets_config_resp);
+
+/* Query Switching Component Bandwidth Allocation per Traffic Type
+ * (indirect 0x041A)
+ */
+struct i40e_aqc_query_switching_comp_bw_config_resp {
+ u8 tc_valid_bits;
+ u8 reserved[2];
+ u8 absolute_credits_enable; /* bool */
+ u8 tc_bw_share_credits[8];
+ __le16 tc_bw_limits[8];
+
+ /* 4 bits per tc 0-7, 4th bit is reserved, limit = 2^max */
+ __le16 tc_bw_max[2];
+};
+
+I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_query_switching_comp_bw_config_resp);
+
+/* Suspend/resume port TX traffic
+ * (direct 0x041B and 0x041C) uses the generic SEID struct
+ */
+
+/* Configure partition BW
+ * (indirect 0x041D)
+ */
+struct i40e_aqc_configure_partition_bw_data {
+ __le16 pf_valid_bits;
+ u8 min_bw[16]; /* guaranteed bandwidth */
+ u8 max_bw[16]; /* bandwidth limit */
+};
+
+I40E_CHECK_STRUCT_LEN(0x22, i40e_aqc_configure_partition_bw_data);
+
+/* Get and set the active HMC resource profile and status.
+ * (direct 0x0500) and (direct 0x0501)
+ */
+struct i40e_aq_get_set_hmc_resource_profile {
+ u8 pm_profile;
+ u8 pe_vf_enabled;
+ u8 reserved[14];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aq_get_set_hmc_resource_profile);
+
+enum i40e_aq_hmc_profile {
+ /* I40E_HMC_PROFILE_NO_CHANGE = 0, reserved */
+ I40E_HMC_PROFILE_DEFAULT = 1,
+ I40E_HMC_PROFILE_FAVOR_VF = 2,
+ I40E_HMC_PROFILE_EQUAL = 3,
+};
+
+#define I40E_AQ_GET_HMC_RESOURCE_PROFILE_PM_MASK 0xF
+#define I40E_AQ_GET_HMC_RESOURCE_PROFILE_COUNT_MASK 0x3F
+
+/* Get PHY Abilities (indirect 0x0600) uses the generic indirect struct */
+
+/* set in param0 for get phy abilities to report qualified modules */
+#define I40E_AQ_PHY_REPORT_QUALIFIED_MODULES 0x0001
+#define I40E_AQ_PHY_REPORT_INITIAL_VALUES 0x0002
+
+enum i40e_aq_phy_type {
+ I40E_PHY_TYPE_SGMII = 0x0,
+ I40E_PHY_TYPE_1000BASE_KX = 0x1,
+ I40E_PHY_TYPE_10GBASE_KX4 = 0x2,
+ I40E_PHY_TYPE_10GBASE_KR = 0x3,
+ I40E_PHY_TYPE_40GBASE_KR4 = 0x4,
+ I40E_PHY_TYPE_XAUI = 0x5,
+ I40E_PHY_TYPE_XFI = 0x6,
+ I40E_PHY_TYPE_SFI = 0x7,
+ I40E_PHY_TYPE_XLAUI = 0x8,
+ I40E_PHY_TYPE_XLPPI = 0x9,
+ I40E_PHY_TYPE_40GBASE_CR4_CU = 0xA,
+ I40E_PHY_TYPE_10GBASE_CR1_CU = 0xB,
+ I40E_PHY_TYPE_10GBASE_AOC = 0xC,
+ I40E_PHY_TYPE_40GBASE_AOC = 0xD,
+ I40E_PHY_TYPE_100BASE_TX = 0x11,
+ I40E_PHY_TYPE_1000BASE_T = 0x12,
+ I40E_PHY_TYPE_10GBASE_T = 0x13,
+ I40E_PHY_TYPE_10GBASE_SR = 0x14,
+ I40E_PHY_TYPE_10GBASE_LR = 0x15,
+ I40E_PHY_TYPE_10GBASE_SFPP_CU = 0x16,
+ I40E_PHY_TYPE_10GBASE_CR1 = 0x17,
+ I40E_PHY_TYPE_40GBASE_CR4 = 0x18,
+ I40E_PHY_TYPE_40GBASE_SR4 = 0x19,
+ I40E_PHY_TYPE_40GBASE_LR4 = 0x1A,
+ I40E_PHY_TYPE_1000BASE_SX = 0x1B,
+ I40E_PHY_TYPE_1000BASE_LX = 0x1C,
+ I40E_PHY_TYPE_1000BASE_T_OPTICAL = 0x1D,
+ I40E_PHY_TYPE_20GBASE_KR2 = 0x1E,
+ I40E_PHY_TYPE_MAX
+};
+
+#define I40E_LINK_SPEED_100MB_SHIFT 0x1
+#define I40E_LINK_SPEED_1000MB_SHIFT 0x2
+#define I40E_LINK_SPEED_10GB_SHIFT 0x3
+#define I40E_LINK_SPEED_40GB_SHIFT 0x4
+#define I40E_LINK_SPEED_20GB_SHIFT 0x5
+
+enum i40e_aq_link_speed {
+ I40E_LINK_SPEED_UNKNOWN = 0,
+ I40E_LINK_SPEED_100MB = (1 << I40E_LINK_SPEED_100MB_SHIFT),
+ I40E_LINK_SPEED_1GB = (1 << I40E_LINK_SPEED_1000MB_SHIFT),
+ I40E_LINK_SPEED_10GB = (1 << I40E_LINK_SPEED_10GB_SHIFT),
+ I40E_LINK_SPEED_40GB = (1 << I40E_LINK_SPEED_40GB_SHIFT),
+ I40E_LINK_SPEED_20GB = (1 << I40E_LINK_SPEED_20GB_SHIFT)
+};
+
+struct i40e_aqc_module_desc {
+ u8 oui[3];
+ u8 reserved1;
+ u8 part_number[16];
+ u8 revision[4];
+ u8 reserved2[8];
+};
+
+I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_module_desc);
+
+struct i40e_aq_get_phy_abilities_resp {
+ __le32 phy_type; /* bitmap using the above enum for offsets */
+ u8 link_speed; /* bitmap using the above enum bit patterns */
+ u8 abilities;
+#define I40E_AQ_PHY_FLAG_PAUSE_TX 0x01
+#define I40E_AQ_PHY_FLAG_PAUSE_RX 0x02
+#define I40E_AQ_PHY_FLAG_LOW_POWER 0x04
+#define I40E_AQ_PHY_LINK_ENABLED 0x08
+#define I40E_AQ_PHY_AN_ENABLED 0x10
+#define I40E_AQ_PHY_FLAG_MODULE_QUAL 0x20
+ __le16 eee_capability;
+#define I40E_AQ_EEE_100BASE_TX 0x0002
+#define I40E_AQ_EEE_1000BASE_T 0x0004
+#define I40E_AQ_EEE_10GBASE_T 0x0008
+#define I40E_AQ_EEE_1000BASE_KX 0x0010
+#define I40E_AQ_EEE_10GBASE_KX4 0x0020
+#define I40E_AQ_EEE_10GBASE_KR 0x0040
+ __le32 eeer_val;
+ u8 d3_lpan;
+#define I40E_AQ_SET_PHY_D3_LPAN_ENA 0x01
+ u8 reserved[3];
+ u8 phy_id[4];
+ u8 module_type[3];
+ u8 qualified_module_count;
+#define I40E_AQ_PHY_MAX_QMS 16
+ struct i40e_aqc_module_desc qualified_module[I40E_AQ_PHY_MAX_QMS];
+};
+
+I40E_CHECK_STRUCT_LEN(0x218, i40e_aq_get_phy_abilities_resp);
+
+/* Set PHY Config (direct 0x0601) */
+struct i40e_aq_set_phy_config { /* same bits as above in all */
+ __le32 phy_type;
+ u8 link_speed;
+ u8 abilities;
+/* bits 0-2 use the values from get_phy_abilities_resp */
+#define I40E_AQ_PHY_ENABLE_LINK 0x08
+#define I40E_AQ_PHY_ENABLE_AN 0x10
+#define I40E_AQ_PHY_ENABLE_ATOMIC_LINK 0x20
+ __le16 eee_capability;
+ __le32 eeer;
+ u8 low_power_ctrl;
+ u8 reserved[3];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aq_set_phy_config);
+
+/* Set MAC Config command data structure (direct 0x0603) */
+struct i40e_aq_set_mac_config {
+ __le16 max_frame_size;
+ u8 params;
+#define I40E_AQ_SET_MAC_CONFIG_CRC_EN 0x04
+#define I40E_AQ_SET_MAC_CONFIG_PACING_MASK 0x78
+#define I40E_AQ_SET_MAC_CONFIG_PACING_SHIFT 3
+#define I40E_AQ_SET_MAC_CONFIG_PACING_NONE 0x0
+#define I40E_AQ_SET_MAC_CONFIG_PACING_1B_13TX 0xF
+#define I40E_AQ_SET_MAC_CONFIG_PACING_1DW_9TX 0x9
+#define I40E_AQ_SET_MAC_CONFIG_PACING_1DW_4TX 0x8
+#define I40E_AQ_SET_MAC_CONFIG_PACING_3DW_7TX 0x7
+#define I40E_AQ_SET_MAC_CONFIG_PACING_2DW_3TX 0x6
+#define I40E_AQ_SET_MAC_CONFIG_PACING_1DW_1TX 0x5
+#define I40E_AQ_SET_MAC_CONFIG_PACING_3DW_2TX 0x4
+#define I40E_AQ_SET_MAC_CONFIG_PACING_7DW_3TX 0x3
+#define I40E_AQ_SET_MAC_CONFIG_PACING_4DW_1TX 0x2
+#define I40E_AQ_SET_MAC_CONFIG_PACING_9DW_1TX 0x1
+ u8 tx_timer_priority; /* bitmap */
+ __le16 tx_timer_value;
+ __le16 fc_refresh_threshold;
+ u8 reserved[8];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aq_set_mac_config);
+
+/* Restart Auto-Negotiation (direct 0x605) */
+struct i40e_aqc_set_link_restart_an {
+ u8 command;
+#define I40E_AQ_PHY_RESTART_AN 0x02
+#define I40E_AQ_PHY_LINK_ENABLE 0x04
+ u8 reserved[15];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_link_restart_an);
+
+/* Get Link Status cmd & response data structure (direct 0x0607) */
+struct i40e_aqc_get_link_status {
+ __le16 command_flags; /* only field set on command */
+#define I40E_AQ_LSE_MASK 0x3
+#define I40E_AQ_LSE_NOP 0x0
+#define I40E_AQ_LSE_DISABLE 0x2
+#define I40E_AQ_LSE_ENABLE 0x3
+/* only response uses this flag */
+#define I40E_AQ_LSE_IS_ENABLED 0x1
+ u8 phy_type; /* i40e_aq_phy_type */
+ u8 link_speed; /* i40e_aq_link_speed */
+ u8 link_info;
+#define I40E_AQ_LINK_UP 0x01 /* obsolete */
+#define I40E_AQ_LINK_UP_FUNCTION 0x01
+#define I40E_AQ_LINK_FAULT 0x02
+#define I40E_AQ_LINK_FAULT_TX 0x04
+#define I40E_AQ_LINK_FAULT_RX 0x08
+#define I40E_AQ_LINK_FAULT_REMOTE 0x10
+#define I40E_AQ_LINK_UP_PORT 0x20
+#define I40E_AQ_MEDIA_AVAILABLE 0x40
+#define I40E_AQ_SIGNAL_DETECT 0x80
+ u8 an_info;
+#define I40E_AQ_AN_COMPLETED 0x01
+#define I40E_AQ_LP_AN_ABILITY 0x02
+#define I40E_AQ_PD_FAULT 0x04
+#define I40E_AQ_FEC_EN 0x08
+#define I40E_AQ_PHY_LOW_POWER 0x10
+#define I40E_AQ_LINK_PAUSE_TX 0x20
+#define I40E_AQ_LINK_PAUSE_RX 0x40
+#define I40E_AQ_QUALIFIED_MODULE 0x80
+ u8 ext_info;
+#define I40E_AQ_LINK_PHY_TEMP_ALARM 0x01
+#define I40E_AQ_LINK_XCESSIVE_ERRORS 0x02
+#define I40E_AQ_LINK_TX_SHIFT 0x02
+#define I40E_AQ_LINK_TX_MASK (0x03 << I40E_AQ_LINK_TX_SHIFT)
+#define I40E_AQ_LINK_TX_ACTIVE 0x00
+#define I40E_AQ_LINK_TX_DRAINED 0x01
+#define I40E_AQ_LINK_TX_FLUSHED 0x03
+#define I40E_AQ_LINK_FORCED_40G 0x10
+ u8 loopback; /* use defines from i40e_aqc_set_lb_mode */
+ __le16 max_frame_size;
+ u8 config;
+#define I40E_AQ_CONFIG_CRC_ENA 0x04
+#define I40E_AQ_CONFIG_PACING_MASK 0x78
+ u8 reserved[5];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_link_status);
+
+/* Set event mask command (direct 0x613) */
+struct i40e_aqc_set_phy_int_mask {
+ u8 reserved[8];
+ __le16 event_mask;
+#define I40E_AQ_EVENT_LINK_UPDOWN 0x0002
+#define I40E_AQ_EVENT_MEDIA_NA 0x0004
+#define I40E_AQ_EVENT_LINK_FAULT 0x0008
+#define I40E_AQ_EVENT_PHY_TEMP_ALARM 0x0010
+#define I40E_AQ_EVENT_EXCESSIVE_ERRORS 0x0020
+#define I40E_AQ_EVENT_SIGNAL_DETECT 0x0040
+#define I40E_AQ_EVENT_AN_COMPLETED 0x0080
+#define I40E_AQ_EVENT_MODULE_QUAL_FAIL 0x0100
+#define I40E_AQ_EVENT_PORT_TX_SUSPENDED 0x0200
+ u8 reserved1[6];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_phy_int_mask);
+
+/* Get Local AN advt register (direct 0x0614)
+ * Set Local AN advt register (direct 0x0615)
+ * Get Link Partner AN advt register (direct 0x0616)
+ */
+struct i40e_aqc_an_advt_reg {
+ __le32 local_an_reg0;
+ __le16 local_an_reg1;
+ u8 reserved[10];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_an_advt_reg);
+
+/* Set Loopback mode (0x0618) */
+struct i40e_aqc_set_lb_mode {
+ __le16 lb_mode;
+#define I40E_AQ_LB_PHY_LOCAL 0x01
+#define I40E_AQ_LB_PHY_REMOTE 0x02
+#define I40E_AQ_LB_MAC_LOCAL 0x04
+ u8 reserved[14];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_lb_mode);
+
+/* Set PHY Debug command (0x0622) */
+struct i40e_aqc_set_phy_debug {
+ u8 command_flags;
+#define I40E_AQ_PHY_DEBUG_RESET_INTERNAL 0x02
+#define I40E_AQ_PHY_DEBUG_RESET_EXTERNAL_SHIFT 2
+#define I40E_AQ_PHY_DEBUG_RESET_EXTERNAL_MASK (0x03 << \
+ I40E_AQ_PHY_DEBUG_RESET_EXTERNAL_SHIFT)
+#define I40E_AQ_PHY_DEBUG_RESET_EXTERNAL_NONE 0x00
+#define I40E_AQ_PHY_DEBUG_RESET_EXTERNAL_HARD 0x01
+#define I40E_AQ_PHY_DEBUG_RESET_EXTERNAL_SOFT 0x02
+#define I40E_AQ_PHY_DEBUG_DISABLE_LINK_FW 0x10
+ u8 reserved[15];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_phy_debug);
+
+enum i40e_aq_phy_reg_type {
+ I40E_AQC_PHY_REG_INTERNAL = 0x1,
+ I40E_AQC_PHY_REG_EXERNAL_BASET = 0x2,
+ I40E_AQC_PHY_REG_EXERNAL_MODULE = 0x3
+};
+
+/* NVM Read command (indirect 0x0701)
+ * NVM Erase commands (direct 0x0702)
+ * NVM Update commands (indirect 0x0703)
+ */
+struct i40e_aqc_nvm_update {
+ u8 command_flags;
+#define I40E_AQ_NVM_LAST_CMD 0x01
+#define I40E_AQ_NVM_FLASH_ONLY 0x80
+ u8 module_pointer;
+ __le16 length;
+ __le32 offset;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_nvm_update);
+
+/* NVM Config Read (indirect 0x0704) */
+struct i40e_aqc_nvm_config_read {
+ __le16 cmd_flags;
+#define I40E_AQ_ANVM_SINGLE_OR_MULTIPLE_FEATURES_MASK 1
+#define I40E_AQ_ANVM_READ_SINGLE_FEATURE 0
+#define I40E_AQ_ANVM_READ_MULTIPLE_FEATURES 1
+ __le16 element_count;
+ __le16 element_id; /* Feature/field ID */
+ __le16 element_id_msw; /* MSWord of field ID */
+ __le32 address_high;
+ __le32 address_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_nvm_config_read);
+
+/* NVM Config Write (indirect 0x0705) */
+struct i40e_aqc_nvm_config_write {
+ __le16 cmd_flags;
+ __le16 element_count;
+ u8 reserved[4];
+ __le32 address_high;
+ __le32 address_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_nvm_config_write);
+
+/* Used for 0x0704 as well as for 0x0705 commands */
+#define I40E_AQ_ANVM_FEATURE_OR_IMMEDIATE_SHIFT 1
+#define I40E_AQ_ANVM_FEATURE_OR_IMMEDIATE_MASK (1 << I40E_AQ_ANVM_FEATURE_OR_IMMEDIATE_SHIFT)
+#define I40E_AQ_ANVM_FEATURE 0
+#define I40E_AQ_ANVM_IMMEDIATE_FIELD (1 << FEATURE_OR_IMMEDIATE_SHIFT)
+struct i40e_aqc_nvm_config_data_feature {
+ __le16 feature_id;
+#define I40E_AQ_ANVM_FEATURE_OPTION_OEM_ONLY 0x01
+#define I40E_AQ_ANVM_FEATURE_OPTION_DWORD_MAP 0x08
+#define I40E_AQ_ANVM_FEATURE_OPTION_POR_CSR 0x10
+ __le16 feature_options;
+ __le16 feature_selection;
+};
+
+I40E_CHECK_STRUCT_LEN(0x6, i40e_aqc_nvm_config_data_feature);
+
+struct i40e_aqc_nvm_config_data_immediate_field {
+ __le32 field_id;
+ __le32 field_value;
+ __le16 field_options;
+ __le16 reserved;
+};
+
+I40E_CHECK_STRUCT_LEN(0xc, i40e_aqc_nvm_config_data_immediate_field);
+
+/* OEM Post Update (indirect 0x0720)
+ * no command data struct used
+ */
+ struct i40e_aqc_nvm_oem_post_update {
+#define I40E_AQ_NVM_OEM_POST_UPDATE_EXTERNAL_DATA 0x01
+ u8 sel_data;
+ u8 reserved[7];
+};
+
+I40E_CHECK_STRUCT_LEN(0x8, i40e_aqc_nvm_oem_post_update);
+
+struct i40e_aqc_nvm_oem_post_update_buffer {
+ u8 str_len;
+ u8 dev_addr;
+ __le16 eeprom_addr;
+ u8 data[36];
+};
+
+I40E_CHECK_STRUCT_LEN(0x28, i40e_aqc_nvm_oem_post_update_buffer);
+
+/* Send to PF command (indirect 0x0801) id is only used by PF
+ * Send to VF command (indirect 0x0802) id is only used by PF
+ * Send to Peer PF command (indirect 0x0803)
+ */
+struct i40e_aqc_pf_vf_message {
+ __le32 id;
+ u8 reserved[4];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_pf_vf_message);
+
+/* Alternate structure */
+
+/* Direct write (direct 0x0900)
+ * Direct read (direct 0x0902)
+ */
+struct i40e_aqc_alternate_write {
+ __le32 address0;
+ __le32 data0;
+ __le32 address1;
+ __le32 data1;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_alternate_write);
+
+/* Indirect write (indirect 0x0901)
+ * Indirect read (indirect 0x0903)
+ */
+
+struct i40e_aqc_alternate_ind_write {
+ __le32 address;
+ __le32 length;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_alternate_ind_write);
+
+/* Done alternate write (direct 0x0904)
+ * uses i40e_aq_desc
+ */
+struct i40e_aqc_alternate_write_done {
+ __le16 cmd_flags;
+#define I40E_AQ_ALTERNATE_MODE_BIOS_MASK 1
+#define I40E_AQ_ALTERNATE_MODE_BIOS_LEGACY 0
+#define I40E_AQ_ALTERNATE_MODE_BIOS_UEFI 1
+#define I40E_AQ_ALTERNATE_RESET_NEEDED 2
+ u8 reserved[14];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_alternate_write_done);
+
+/* Set OEM mode (direct 0x0905) */
+struct i40e_aqc_alternate_set_mode {
+ __le32 mode;
+#define I40E_AQ_ALTERNATE_MODE_NONE 0
+#define I40E_AQ_ALTERNATE_MODE_OEM 1
+ u8 reserved[12];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_alternate_set_mode);
+
+/* Clear port Alternate RAM (direct 0x0906) uses i40e_aq_desc */
+
+/* async events 0x10xx */
+
+/* Lan Queue Overflow Event (direct, 0x1001) */
+struct i40e_aqc_lan_overflow {
+ __le32 prtdcb_rupto;
+ __le32 otx_ctl;
+ u8 reserved[8];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lan_overflow);
+
+/* Get LLDP MIB (indirect 0x0A00) */
+struct i40e_aqc_lldp_get_mib {
+ u8 type;
+ u8 reserved1;
+#define I40E_AQ_LLDP_MIB_TYPE_MASK 0x3
+#define I40E_AQ_LLDP_MIB_LOCAL 0x0
+#define I40E_AQ_LLDP_MIB_REMOTE 0x1
+#define I40E_AQ_LLDP_MIB_LOCAL_AND_REMOTE 0x2
+#define I40E_AQ_LLDP_BRIDGE_TYPE_MASK 0xC
+#define I40E_AQ_LLDP_BRIDGE_TYPE_SHIFT 0x2
+#define I40E_AQ_LLDP_BRIDGE_TYPE_NEAREST_BRIDGE 0x0
+#define I40E_AQ_LLDP_BRIDGE_TYPE_NON_TPMR 0x1
+#define I40E_AQ_LLDP_TX_SHIFT 0x4
+#define I40E_AQ_LLDP_TX_MASK (0x03 << I40E_AQ_LLDP_TX_SHIFT)
+/* TX pause flags use I40E_AQ_LINK_TX_* above */
+ __le16 local_len;
+ __le16 remote_len;
+ u8 reserved2[2];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_get_mib);
+
+/* Configure LLDP MIB Change Event (direct 0x0A01)
+ * also used for the event (with type in the command field)
+ */
+struct i40e_aqc_lldp_update_mib {
+ u8 command;
+#define I40E_AQ_LLDP_MIB_UPDATE_ENABLE 0x0
+#define I40E_AQ_LLDP_MIB_UPDATE_DISABLE 0x1
+ u8 reserved[7];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_update_mib);
+
+/* Add LLDP TLV (indirect 0x0A02)
+ * Delete LLDP TLV (indirect 0x0A04)
+ */
+struct i40e_aqc_lldp_add_tlv {
+ u8 type; /* only nearest bridge and non-TPMR from 0x0A00 */
+ u8 reserved1[1];
+ __le16 len;
+ u8 reserved2[4];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_add_tlv);
+
+/* Update LLDP TLV (indirect 0x0A03) */
+struct i40e_aqc_lldp_update_tlv {
+ u8 type; /* only nearest bridge and non-TPMR from 0x0A00 */
+ u8 reserved;
+ __le16 old_len;
+ __le16 new_offset;
+ __le16 new_len;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_update_tlv);
+
+/* Stop LLDP (direct 0x0A05) */
+struct i40e_aqc_lldp_stop {
+ u8 command;
+#define I40E_AQ_LLDP_AGENT_STOP 0x0
+#define I40E_AQ_LLDP_AGENT_SHUTDOWN 0x1
+ u8 reserved[15];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_stop);
+
+/* Start LLDP (direct 0x0A06) */
+
+struct i40e_aqc_lldp_start {
+ u8 command;
+#define I40E_AQ_LLDP_AGENT_START 0x1
+ u8 reserved[15];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_start);
+
+/* Get CEE DCBX Oper Config (0x0A07)
+ * uses the generic descriptor struct
+ * returns below as indirect response
+ */
+
+#define I40E_AQC_CEE_APP_FCOE_SHIFT 0x0
+#define I40E_AQC_CEE_APP_FCOE_MASK (0x7 << I40E_AQC_CEE_APP_FCOE_SHIFT)
+#define I40E_AQC_CEE_APP_ISCSI_SHIFT 0x3
+#define I40E_AQC_CEE_APP_ISCSI_MASK (0x7 << I40E_AQC_CEE_APP_ISCSI_SHIFT)
+#define I40E_AQC_CEE_APP_FIP_SHIFT 0x8
+#define I40E_AQC_CEE_APP_FIP_MASK (0x7 << I40E_AQC_CEE_APP_FIP_SHIFT)
+
+#define I40E_AQC_CEE_PG_STATUS_SHIFT 0x0
+#define I40E_AQC_CEE_PG_STATUS_MASK (0x7 << I40E_AQC_CEE_PG_STATUS_SHIFT)
+#define I40E_AQC_CEE_PFC_STATUS_SHIFT 0x3
+#define I40E_AQC_CEE_PFC_STATUS_MASK (0x7 << I40E_AQC_CEE_PFC_STATUS_SHIFT)
+#define I40E_AQC_CEE_APP_STATUS_SHIFT 0x8
+#define I40E_AQC_CEE_APP_STATUS_MASK (0x7 << I40E_AQC_CEE_APP_STATUS_SHIFT)
+#define I40E_AQC_CEE_FCOE_STATUS_SHIFT 0x8
+#define I40E_AQC_CEE_FCOE_STATUS_MASK (0x7 << I40E_AQC_CEE_FCOE_STATUS_SHIFT)
+#define I40E_AQC_CEE_ISCSI_STATUS_SHIFT 0xB
+#define I40E_AQC_CEE_ISCSI_STATUS_MASK (0x7 << I40E_AQC_CEE_ISCSI_STATUS_SHIFT)
+#define I40E_AQC_CEE_FIP_STATUS_SHIFT 0x10
+#define I40E_AQC_CEE_FIP_STATUS_MASK (0x7 << I40E_AQC_CEE_FIP_STATUS_SHIFT)
+
+/* struct i40e_aqc_get_cee_dcb_cfg_v1_resp was originally defined with
+ * word boundary layout issues, which the Linux compilers silently deal
+ * with by adding padding, making the actual struct larger than designed.
+ * However, the FW compiler for the NIC is less lenient and complains
+ * about the struct. Hence, the struct defined here has an extra byte in
+ * fields reserved3 and reserved4 to directly acknowledge that padding,
+ * and the new length is used in the length check macro.
+ */
+struct i40e_aqc_get_cee_dcb_cfg_v1_resp {
+ u8 reserved1;
+ u8 oper_num_tc;
+ u8 oper_prio_tc[4];
+ u8 reserved2;
+ u8 oper_tc_bw[8];
+ u8 oper_pfc_en;
+ u8 reserved3[2];
+ __le16 oper_app_prio;
+ u8 reserved4[2];
+ __le16 tlv_status;
+};
+
+I40E_CHECK_STRUCT_LEN(0x18, i40e_aqc_get_cee_dcb_cfg_v1_resp);
+
+struct i40e_aqc_get_cee_dcb_cfg_resp {
+ u8 oper_num_tc;
+ u8 oper_prio_tc[4];
+ u8 oper_tc_bw[8];
+ u8 oper_pfc_en;
+ __le16 oper_app_prio;
+ __le32 tlv_status;
+ u8 reserved[12];
+};
+
+I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_get_cee_dcb_cfg_resp);
+
+/* Set Local LLDP MIB (indirect 0x0A08)
+ * Used to replace the local MIB of a given LLDP agent. e.g. DCBx
+ */
+struct i40e_aqc_lldp_set_local_mib {
+#define SET_LOCAL_MIB_AC_TYPE_DCBX_SHIFT 0
+#define SET_LOCAL_MIB_AC_TYPE_DCBX_MASK (1 << SET_LOCAL_MIB_AC_TYPE_DCBX_SHIFT)
+ u8 type;
+ u8 reserved0;
+ __le16 length;
+ u8 reserved1[4];
+ __le32 address_high;
+ __le32 address_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_set_local_mib);
+
+/* Stop/Start LLDP Agent (direct 0x0A09)
+ * Used for stopping/starting specific LLDP agent. e.g. DCBx
+ */
+struct i40e_aqc_lldp_stop_start_specific_agent {
+#define I40E_AQC_START_SPECIFIC_AGENT_SHIFT 0
+#define I40E_AQC_START_SPECIFIC_AGENT_MASK (1 << I40E_AQC_START_SPECIFIC_AGENT_SHIFT)
+ u8 command;
+ u8 reserved[15];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_stop_start_specific_agent);
+
+/* Add Udp Tunnel command and completion (direct 0x0B00) */
+struct i40e_aqc_add_udp_tunnel {
+ __le16 udp_port;
+ u8 reserved0[3];
+ u8 protocol_type;
+#define I40E_AQC_TUNNEL_TYPE_VXLAN 0x00
+#define I40E_AQC_TUNNEL_TYPE_NGE 0x01
+#define I40E_AQC_TUNNEL_TYPE_TEREDO 0x10
+ u8 reserved1[10];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_udp_tunnel);
+
+struct i40e_aqc_add_udp_tunnel_completion {
+ __le16 udp_port;
+ u8 filter_entry_index;
+ u8 multiple_pfs;
+#define I40E_AQC_SINGLE_PF 0x0
+#define I40E_AQC_MULTIPLE_PFS 0x1
+ u8 total_filters;
+ u8 reserved[11];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_udp_tunnel_completion);
+
+/* remove UDP Tunnel command (0x0B01) */
+struct i40e_aqc_remove_udp_tunnel {
+ u8 reserved[2];
+ u8 index; /* 0 to 15 */
+ u8 reserved2[13];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_remove_udp_tunnel);
+
+struct i40e_aqc_del_udp_tunnel_completion {
+ __le16 udp_port;
+ u8 index; /* 0 to 15 */
+ u8 multiple_pfs;
+ u8 total_filters_used;
+ u8 reserved1[11];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_del_udp_tunnel_completion);
+#ifdef X722_SUPPORT
+
+struct i40e_aqc_get_set_rss_key {
+#define I40E_AQC_SET_RSS_KEY_VSI_VALID (0x1 << 15)
+#define I40E_AQC_SET_RSS_KEY_VSI_ID_SHIFT 0
+#define I40E_AQC_SET_RSS_KEY_VSI_ID_MASK (0x3FF << \
+ I40E_AQC_SET_RSS_KEY_VSI_ID_SHIFT)
+ __le16 vsi_id;
+ u8 reserved[6];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_set_rss_key);
+
+struct i40e_aqc_get_set_rss_key_data {
+ u8 standard_rss_key[0x28];
+ u8 extended_hash_key[0xc];
+};
+
+I40E_CHECK_STRUCT_LEN(0x34, i40e_aqc_get_set_rss_key_data);
+
+struct i40e_aqc_get_set_rss_lut {
+#define I40E_AQC_SET_RSS_LUT_VSI_VALID (0x1 << 15)
+#define I40E_AQC_SET_RSS_LUT_VSI_ID_SHIFT 0
+#define I40E_AQC_SET_RSS_LUT_VSI_ID_MASK (0x3FF << \
+ I40E_AQC_SET_RSS_LUT_VSI_ID_SHIFT)
+ __le16 vsi_id;
+#define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT 0
+#define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_MASK (0x1 << \
+ I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT)
+
+#define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_VSI 0
+#define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_PF 1
+ __le16 flags;
+ u8 reserved[4];
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_set_rss_lut);
+#endif
+
+/* tunnel key structure 0x0B10 */
+
+struct i40e_aqc_tunnel_key_structure {
+ u8 key1_off;
+ u8 key2_off;
+ u8 key1_len; /* 0 to 15 */
+ u8 key2_len; /* 0 to 15 */
+ u8 flags;
+#define I40E_AQC_TUNNEL_KEY_STRUCT_OVERRIDE 0x01
+/* response flags */
+#define I40E_AQC_TUNNEL_KEY_STRUCT_SUCCESS 0x01
+#define I40E_AQC_TUNNEL_KEY_STRUCT_MODIFIED 0x02
+#define I40E_AQC_TUNNEL_KEY_STRUCT_OVERRIDDEN 0x03
+ u8 network_key_index;
+#define I40E_AQC_NETWORK_KEY_INDEX_VXLAN 0x0
+#define I40E_AQC_NETWORK_KEY_INDEX_NGE 0x1
+#define I40E_AQC_NETWORK_KEY_INDEX_FLEX_MAC_IN_UDP 0x2
+#define I40E_AQC_NETWORK_KEY_INDEX_GRE 0x3
+ u8 reserved[10];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_tunnel_key_structure);
+
+/* OEM mode commands (direct 0xFE0x) */
+struct i40e_aqc_oem_param_change {
+ __le32 param_type;
+#define I40E_AQ_OEM_PARAM_TYPE_PF_CTL 0
+#define I40E_AQ_OEM_PARAM_TYPE_BW_CTL 1
+#define I40E_AQ_OEM_PARAM_MAC 2
+ __le32 param_value1;
+ __le16 param_value2;
+ u8 reserved[6];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_oem_param_change);
+
+struct i40e_aqc_oem_state_change {
+ __le32 state;
+#define I40E_AQ_OEM_STATE_LINK_DOWN 0x0
+#define I40E_AQ_OEM_STATE_LINK_UP 0x1
+ u8 reserved[12];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_oem_state_change);
+
+/* Initialize OCSD (0xFE02, direct) */
+struct i40e_aqc_opc_oem_ocsd_initialize {
+ u8 type_status;
+ u8 reserved1[3];
+ __le32 ocsd_memory_block_addr_high;
+ __le32 ocsd_memory_block_addr_low;
+ __le32 requested_update_interval;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_opc_oem_ocsd_initialize);
+
+/* Initialize OCBB (0xFE03, direct) */
+struct i40e_aqc_opc_oem_ocbb_initialize {
+ u8 type_status;
+ u8 reserved1[3];
+ __le32 ocbb_memory_block_addr_high;
+ __le32 ocbb_memory_block_addr_low;
+ u8 reserved2[4];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_opc_oem_ocbb_initialize);
+
+/* debug commands */
+
+/* get device id (0xFF00) uses the generic structure */
+
+/* set test more (0xFF01, internal) */
+
+struct i40e_acq_set_test_mode {
+ u8 mode;
+#define I40E_AQ_TEST_PARTIAL 0
+#define I40E_AQ_TEST_FULL 1
+#define I40E_AQ_TEST_NVM 2
+ u8 reserved[3];
+ u8 command;
+#define I40E_AQ_TEST_OPEN 0
+#define I40E_AQ_TEST_CLOSE 1
+#define I40E_AQ_TEST_INC 2
+ u8 reserved2[3];
+ __le32 address_high;
+ __le32 address_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_acq_set_test_mode);
+
+/* Debug Read Register command (0xFF03)
+ * Debug Write Register command (0xFF04)
+ */
+struct i40e_aqc_debug_reg_read_write {
+ __le32 reserved;
+ __le32 address;
+ __le32 value_high;
+ __le32 value_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_debug_reg_read_write);
+
+/* Scatter/gather Reg Read (indirect 0xFF05)
+ * Scatter/gather Reg Write (indirect 0xFF06)
+ */
+
+/* i40e_aq_desc is used for the command */
+struct i40e_aqc_debug_reg_sg_element_data {
+ __le32 address;
+ __le32 value;
+};
+
+/* Debug Modify register (direct 0xFF07) */
+struct i40e_aqc_debug_modify_reg {
+ __le32 address;
+ __le32 value;
+ __le32 clear_mask;
+ __le32 set_mask;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_debug_modify_reg);
+
+/* dump internal data (0xFF08, indirect) */
+
+#define I40E_AQ_CLUSTER_ID_AUX 0
+#define I40E_AQ_CLUSTER_ID_SWITCH_FLU 1
+#define I40E_AQ_CLUSTER_ID_TXSCHED 2
+#define I40E_AQ_CLUSTER_ID_HMC 3
+#define I40E_AQ_CLUSTER_ID_MAC0 4
+#define I40E_AQ_CLUSTER_ID_MAC1 5
+#define I40E_AQ_CLUSTER_ID_MAC2 6
+#define I40E_AQ_CLUSTER_ID_MAC3 7
+#define I40E_AQ_CLUSTER_ID_DCB 8
+#define I40E_AQ_CLUSTER_ID_EMP_MEM 9
+#define I40E_AQ_CLUSTER_ID_PKT_BUF 10
+#define I40E_AQ_CLUSTER_ID_ALTRAM 11
+
+struct i40e_aqc_debug_dump_internals {
+ u8 cluster_id;
+ u8 table_id;
+ __le16 data_size;
+ __le32 idx;
+ __le32 address_high;
+ __le32 address_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_debug_dump_internals);
+
+struct i40e_aqc_debug_modify_internals {
+ u8 cluster_id;
+ u8 cluster_specific_params[7];
+ __le32 address_high;
+ __le32 address_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_debug_modify_internals);
+
+#endif
diff --git a/usr/src/uts/common/io/i40e/core/i40e_alloc.h b/usr/src/uts/common/io/i40e/core/i40e_alloc.h
new file mode 100644
index 0000000000..4428287f83
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/core/i40e_alloc.h
@@ -0,0 +1,66 @@
+/******************************************************************************
+
+ Copyright (c) 2013-2014, Intel Corporation
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+
+******************************************************************************/
+/*$FreeBSD: head/sys/dev/ixl/i40e_alloc.h 283119 2015-05-19 18:35:18Z jhb $*/
+
+#ifndef _I40E_ALLOC_H_
+#define _I40E_ALLOC_H_
+
+struct i40e_hw;
+
+/* Memory allocation types */
+enum i40e_memory_type {
+ i40e_mem_arq_buf = 0, /* ARQ indirect command buffer */
+ i40e_mem_asq_buf = 1,
+ i40e_mem_atq_buf = 2, /* ATQ indirect command buffer */
+ i40e_mem_arq_ring = 3, /* ARQ descriptor ring */
+ i40e_mem_atq_ring = 4, /* ATQ descriptor ring */
+ i40e_mem_pd = 5, /* Page Descriptor */
+ i40e_mem_bp = 6, /* Backing Page - 4KB */
+ i40e_mem_bp_jumbo = 7, /* Backing Page - > 4KB */
+ i40e_mem_reserved
+};
+
+/* prototype for functions used for dynamic memory allocation */
+enum i40e_status_code i40e_allocate_dma_mem(struct i40e_hw *hw,
+ struct i40e_dma_mem *mem,
+ enum i40e_memory_type type,
+ u64 size, u32 alignment);
+enum i40e_status_code i40e_free_dma_mem(struct i40e_hw *hw,
+ struct i40e_dma_mem *mem);
+enum i40e_status_code i40e_allocate_virt_mem(struct i40e_hw *hw,
+ struct i40e_virt_mem *mem,
+ u32 size);
+enum i40e_status_code i40e_free_virt_mem(struct i40e_hw *hw,
+ struct i40e_virt_mem *mem);
+
+#endif /* _I40E_ALLOC_H_ */
diff --git a/usr/src/uts/common/io/i40e/core/i40e_common.c b/usr/src/uts/common/io/i40e/core/i40e_common.c
new file mode 100644
index 0000000000..c58eb9de1e
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/core/i40e_common.c
@@ -0,0 +1,5708 @@
+/******************************************************************************
+
+ Copyright (c) 2013-2015, Intel Corporation
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+
+******************************************************************************/
+/*$FreeBSD: head/sys/dev/ixl/i40e_common.c 284049 2015-06-05 22:52:42Z jfv $*/
+
+#include "i40e_type.h"
+#include "i40e_adminq.h"
+#include "i40e_prototype.h"
+#include "i40e_virtchnl.h"
+
+
+/**
+ * i40e_set_mac_type - Sets MAC type
+ * @hw: pointer to the HW structure
+ *
+ * This function sets the mac type of the adapter based on the
+ * vendor ID and device ID stored in the hw structure.
+ **/
+enum i40e_status_code i40e_set_mac_type(struct i40e_hw *hw)
+{
+ enum i40e_status_code status = I40E_SUCCESS;
+
+ DEBUGFUNC("i40e_set_mac_type\n");
+
+ if (hw->vendor_id == I40E_INTEL_VENDOR_ID) {
+ switch (hw->device_id) {
+ case I40E_DEV_ID_SFP_XL710:
+ case I40E_DEV_ID_QEMU:
+ case I40E_DEV_ID_KX_A:
+ case I40E_DEV_ID_KX_B:
+ case I40E_DEV_ID_KX_C:
+ case I40E_DEV_ID_QSFP_A:
+ case I40E_DEV_ID_QSFP_B:
+ case I40E_DEV_ID_QSFP_C:
+ case I40E_DEV_ID_10G_BASE_T:
+ case I40E_DEV_ID_10G_BASE_T4:
+ case I40E_DEV_ID_20G_KR2:
+ case I40E_DEV_ID_20G_KR2_A:
+ hw->mac.type = I40E_MAC_XL710;
+ break;
+#ifdef X722_SUPPORT
+ case I40E_DEV_ID_SFP_X722:
+ case I40E_DEV_ID_1G_BASE_T_X722:
+ case I40E_DEV_ID_10G_BASE_T_X722:
+ hw->mac.type = I40E_MAC_X722;
+ break;
+#endif
+#ifdef X722_SUPPORT
+ case I40E_DEV_ID_X722_VF:
+ case I40E_DEV_ID_X722_VF_HV:
+ hw->mac.type = I40E_MAC_X722_VF;
+ break;
+#endif
+ case I40E_DEV_ID_VF:
+ case I40E_DEV_ID_VF_HV:
+ hw->mac.type = I40E_MAC_VF;
+ break;
+ default:
+ hw->mac.type = I40E_MAC_GENERIC;
+ break;
+ }
+ } else {
+ status = I40E_ERR_DEVICE_NOT_SUPPORTED;
+ }
+
+ DEBUGOUT2("i40e_set_mac_type found mac: %d, returns: %d\n",
+ hw->mac.type, status);
+ return status;
+}
+
+/**
+ * i40e_aq_str - convert AQ err code to a string
+ * @hw: pointer to the HW structure
+ * @aq_err: the AQ error code to convert
+ **/
+char *i40e_aq_str(struct i40e_hw *hw, enum i40e_admin_queue_err aq_err)
+{
+ switch (aq_err) {
+ case I40E_AQ_RC_OK:
+ return "OK";
+ case I40E_AQ_RC_EPERM:
+ return "I40E_AQ_RC_EPERM";
+ case I40E_AQ_RC_ENOENT:
+ return "I40E_AQ_RC_ENOENT";
+ case I40E_AQ_RC_ESRCH:
+ return "I40E_AQ_RC_ESRCH";
+ case I40E_AQ_RC_EINTR:
+ return "I40E_AQ_RC_EINTR";
+ case I40E_AQ_RC_EIO:
+ return "I40E_AQ_RC_EIO";
+ case I40E_AQ_RC_ENXIO:
+ return "I40E_AQ_RC_ENXIO";
+ case I40E_AQ_RC_E2BIG:
+ return "I40E_AQ_RC_E2BIG";
+ case I40E_AQ_RC_EAGAIN:
+ return "I40E_AQ_RC_EAGAIN";
+ case I40E_AQ_RC_ENOMEM:
+ return "I40E_AQ_RC_ENOMEM";
+ case I40E_AQ_RC_EACCES:
+ return "I40E_AQ_RC_EACCES";
+ case I40E_AQ_RC_EFAULT:
+ return "I40E_AQ_RC_EFAULT";
+ case I40E_AQ_RC_EBUSY:
+ return "I40E_AQ_RC_EBUSY";
+ case I40E_AQ_RC_EEXIST:
+ return "I40E_AQ_RC_EEXIST";
+ case I40E_AQ_RC_EINVAL:
+ return "I40E_AQ_RC_EINVAL";
+ case I40E_AQ_RC_ENOTTY:
+ return "I40E_AQ_RC_ENOTTY";
+ case I40E_AQ_RC_ENOSPC:
+ return "I40E_AQ_RC_ENOSPC";
+ case I40E_AQ_RC_ENOSYS:
+ return "I40E_AQ_RC_ENOSYS";
+ case I40E_AQ_RC_ERANGE:
+ return "I40E_AQ_RC_ERANGE";
+ case I40E_AQ_RC_EFLUSHED:
+ return "I40E_AQ_RC_EFLUSHED";
+ case I40E_AQ_RC_BAD_ADDR:
+ return "I40E_AQ_RC_BAD_ADDR";
+ case I40E_AQ_RC_EMODE:
+ return "I40E_AQ_RC_EMODE";
+ case I40E_AQ_RC_EFBIG:
+ return "I40E_AQ_RC_EFBIG";
+ }
+
+ snprintf(hw->err_str, sizeof(hw->err_str), "%d", aq_err);
+ return hw->err_str;
+}
+
+/**
+ * i40e_stat_str - convert status err code to a string
+ * @hw: pointer to the HW structure
+ * @stat_err: the status error code to convert
+ **/
+char *i40e_stat_str(struct i40e_hw *hw, enum i40e_status_code stat_err)
+{
+ switch (stat_err) {
+ case I40E_SUCCESS:
+ return "OK";
+ case I40E_ERR_NVM:
+ return "I40E_ERR_NVM";
+ case I40E_ERR_NVM_CHECKSUM:
+ return "I40E_ERR_NVM_CHECKSUM";
+ case I40E_ERR_PHY:
+ return "I40E_ERR_PHY";
+ case I40E_ERR_CONFIG:
+ return "I40E_ERR_CONFIG";
+ case I40E_ERR_PARAM:
+ return "I40E_ERR_PARAM";
+ case I40E_ERR_MAC_TYPE:
+ return "I40E_ERR_MAC_TYPE";
+ case I40E_ERR_UNKNOWN_PHY:
+ return "I40E_ERR_UNKNOWN_PHY";
+ case I40E_ERR_LINK_SETUP:
+ return "I40E_ERR_LINK_SETUP";
+ case I40E_ERR_ADAPTER_STOPPED:
+ return "I40E_ERR_ADAPTER_STOPPED";
+ case I40E_ERR_INVALID_MAC_ADDR:
+ return "I40E_ERR_INVALID_MAC_ADDR";
+ case I40E_ERR_DEVICE_NOT_SUPPORTED:
+ return "I40E_ERR_DEVICE_NOT_SUPPORTED";
+ case I40E_ERR_MASTER_REQUESTS_PENDING:
+ return "I40E_ERR_MASTER_REQUESTS_PENDING";
+ case I40E_ERR_INVALID_LINK_SETTINGS:
+ return "I40E_ERR_INVALID_LINK_SETTINGS";
+ case I40E_ERR_AUTONEG_NOT_COMPLETE:
+ return "I40E_ERR_AUTONEG_NOT_COMPLETE";
+ case I40E_ERR_RESET_FAILED:
+ return "I40E_ERR_RESET_FAILED";
+ case I40E_ERR_SWFW_SYNC:
+ return "I40E_ERR_SWFW_SYNC";
+ case I40E_ERR_NO_AVAILABLE_VSI:
+ return "I40E_ERR_NO_AVAILABLE_VSI";
+ case I40E_ERR_NO_MEMORY:
+ return "I40E_ERR_NO_MEMORY";
+ case I40E_ERR_BAD_PTR:
+ return "I40E_ERR_BAD_PTR";
+ case I40E_ERR_RING_FULL:
+ return "I40E_ERR_RING_FULL";
+ case I40E_ERR_INVALID_PD_ID:
+ return "I40E_ERR_INVALID_PD_ID";
+ case I40E_ERR_INVALID_QP_ID:
+ return "I40E_ERR_INVALID_QP_ID";
+ case I40E_ERR_INVALID_CQ_ID:
+ return "I40E_ERR_INVALID_CQ_ID";
+ case I40E_ERR_INVALID_CEQ_ID:
+ return "I40E_ERR_INVALID_CEQ_ID";
+ case I40E_ERR_INVALID_AEQ_ID:
+ return "I40E_ERR_INVALID_AEQ_ID";
+ case I40E_ERR_INVALID_SIZE:
+ return "I40E_ERR_INVALID_SIZE";
+ case I40E_ERR_INVALID_ARP_INDEX:
+ return "I40E_ERR_INVALID_ARP_INDEX";
+ case I40E_ERR_INVALID_FPM_FUNC_ID:
+ return "I40E_ERR_INVALID_FPM_FUNC_ID";
+ case I40E_ERR_QP_INVALID_MSG_SIZE:
+ return "I40E_ERR_QP_INVALID_MSG_SIZE";
+ case I40E_ERR_QP_TOOMANY_WRS_POSTED:
+ return "I40E_ERR_QP_TOOMANY_WRS_POSTED";
+ case I40E_ERR_INVALID_FRAG_COUNT:
+ return "I40E_ERR_INVALID_FRAG_COUNT";
+ case I40E_ERR_QUEUE_EMPTY:
+ return "I40E_ERR_QUEUE_EMPTY";
+ case I40E_ERR_INVALID_ALIGNMENT:
+ return "I40E_ERR_INVALID_ALIGNMENT";
+ case I40E_ERR_FLUSHED_QUEUE:
+ return "I40E_ERR_FLUSHED_QUEUE";
+ case I40E_ERR_INVALID_PUSH_PAGE_INDEX:
+ return "I40E_ERR_INVALID_PUSH_PAGE_INDEX";
+ case I40E_ERR_INVALID_IMM_DATA_SIZE:
+ return "I40E_ERR_INVALID_IMM_DATA_SIZE";
+ case I40E_ERR_TIMEOUT:
+ return "I40E_ERR_TIMEOUT";
+ case I40E_ERR_OPCODE_MISMATCH:
+ return "I40E_ERR_OPCODE_MISMATCH";
+ case I40E_ERR_CQP_COMPL_ERROR:
+ return "I40E_ERR_CQP_COMPL_ERROR";
+ case I40E_ERR_INVALID_VF_ID:
+ return "I40E_ERR_INVALID_VF_ID";
+ case I40E_ERR_INVALID_HMCFN_ID:
+ return "I40E_ERR_INVALID_HMCFN_ID";
+ case I40E_ERR_BACKING_PAGE_ERROR:
+ return "I40E_ERR_BACKING_PAGE_ERROR";
+ case I40E_ERR_NO_PBLCHUNKS_AVAILABLE:
+ return "I40E_ERR_NO_PBLCHUNKS_AVAILABLE";
+ case I40E_ERR_INVALID_PBLE_INDEX:
+ return "I40E_ERR_INVALID_PBLE_INDEX";
+ case I40E_ERR_INVALID_SD_INDEX:
+ return "I40E_ERR_INVALID_SD_INDEX";
+ case I40E_ERR_INVALID_PAGE_DESC_INDEX:
+ return "I40E_ERR_INVALID_PAGE_DESC_INDEX";
+ case I40E_ERR_INVALID_SD_TYPE:
+ return "I40E_ERR_INVALID_SD_TYPE";
+ case I40E_ERR_MEMCPY_FAILED:
+ return "I40E_ERR_MEMCPY_FAILED";
+ case I40E_ERR_INVALID_HMC_OBJ_INDEX:
+ return "I40E_ERR_INVALID_HMC_OBJ_INDEX";
+ case I40E_ERR_INVALID_HMC_OBJ_COUNT:
+ return "I40E_ERR_INVALID_HMC_OBJ_COUNT";
+ case I40E_ERR_INVALID_SRQ_ARM_LIMIT:
+ return "I40E_ERR_INVALID_SRQ_ARM_LIMIT";
+ case I40E_ERR_SRQ_ENABLED:
+ return "I40E_ERR_SRQ_ENABLED";
+ case I40E_ERR_ADMIN_QUEUE_ERROR:
+ return "I40E_ERR_ADMIN_QUEUE_ERROR";
+ case I40E_ERR_ADMIN_QUEUE_TIMEOUT:
+ return "I40E_ERR_ADMIN_QUEUE_TIMEOUT";
+ case I40E_ERR_BUF_TOO_SHORT:
+ return "I40E_ERR_BUF_TOO_SHORT";
+ case I40E_ERR_ADMIN_QUEUE_FULL:
+ return "I40E_ERR_ADMIN_QUEUE_FULL";
+ case I40E_ERR_ADMIN_QUEUE_NO_WORK:
+ return "I40E_ERR_ADMIN_QUEUE_NO_WORK";
+ case I40E_ERR_BAD_IWARP_CQE:
+ return "I40E_ERR_BAD_IWARP_CQE";
+ case I40E_ERR_NVM_BLANK_MODE:
+ return "I40E_ERR_NVM_BLANK_MODE";
+ case I40E_ERR_NOT_IMPLEMENTED:
+ return "I40E_ERR_NOT_IMPLEMENTED";
+ case I40E_ERR_PE_DOORBELL_NOT_ENABLED:
+ return "I40E_ERR_PE_DOORBELL_NOT_ENABLED";
+ case I40E_ERR_DIAG_TEST_FAILED:
+ return "I40E_ERR_DIAG_TEST_FAILED";
+ case I40E_ERR_NOT_READY:
+ return "I40E_ERR_NOT_READY";
+ case I40E_NOT_SUPPORTED:
+ return "I40E_NOT_SUPPORTED";
+ case I40E_ERR_FIRMWARE_API_VERSION:
+ return "I40E_ERR_FIRMWARE_API_VERSION";
+ }
+
+ snprintf(hw->err_str, sizeof(hw->err_str), "%d", stat_err);
+ return hw->err_str;
+}
+
+/**
+ * i40e_debug_aq
+ * @hw: debug mask related to admin queue
+ * @mask: debug mask
+ * @desc: pointer to admin queue descriptor
+ * @buffer: pointer to command buffer
+ * @buf_len: max length of buffer
+ *
+ * Dumps debug log about adminq command with descriptor contents.
+ **/
+void i40e_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void *desc,
+ void *buffer, u16 buf_len)
+{
+ struct i40e_aq_desc *aq_desc = (struct i40e_aq_desc *)desc;
+ u16 len = LE16_TO_CPU(aq_desc->datalen);
+ u8 *buf = (u8 *)buffer;
+ u16 i = 0;
+
+ if ((!(mask & hw->debug_mask)) || (desc == NULL))
+ return;
+
+ i40e_debug(hw, mask,
+ "AQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n",
+ LE16_TO_CPU(aq_desc->opcode),
+ LE16_TO_CPU(aq_desc->flags),
+ LE16_TO_CPU(aq_desc->datalen),
+ LE16_TO_CPU(aq_desc->retval));
+ i40e_debug(hw, mask, "\tcookie (h,l) 0x%08X 0x%08X\n",
+ LE32_TO_CPU(aq_desc->cookie_high),
+ LE32_TO_CPU(aq_desc->cookie_low));
+ i40e_debug(hw, mask, "\tparam (0,1) 0x%08X 0x%08X\n",
+ LE32_TO_CPU(aq_desc->params.internal.param0),
+ LE32_TO_CPU(aq_desc->params.internal.param1));
+ i40e_debug(hw, mask, "\taddr (h,l) 0x%08X 0x%08X\n",
+ LE32_TO_CPU(aq_desc->params.external.addr_high),
+ LE32_TO_CPU(aq_desc->params.external.addr_low));
+
+ if ((buffer != NULL) && (aq_desc->datalen != 0)) {
+ i40e_debug(hw, mask, "AQ CMD Buffer:\n");
+ if (buf_len < len)
+ len = buf_len;
+ /* write the full 16-byte chunks */
+ for (i = 0; i < (len - 16); i += 16)
+ i40e_debug(hw, mask,
+ "\t0x%04X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X\n",
+ i, buf[i], buf[i+1], buf[i+2], buf[i+3],
+ buf[i+4], buf[i+5], buf[i+6], buf[i+7],
+ buf[i+8], buf[i+9], buf[i+10], buf[i+11],
+ buf[i+12], buf[i+13], buf[i+14], buf[i+15]);
+ /* write whatever's left over without overrunning the buffer */
+ if (i < len) {
+ char d_buf[80];
+ int j = 0;
+
+ memset(d_buf, 0, sizeof(d_buf));
+#ifdef I40E_ILLUMOS
+ /*
+ * Sigh.
+ *
+ * The illumos DDI (inherited from OpenSolaris) says
+ * sprintf() returns the pointer to its first
+ * argument, NOT the length of bytes printed. A better
+ * solution would be to have the kernel provide
+ * something like real_sprintf() but for now, we
+ * hack around it.
+ */
+ (void) sprintf(d_buf, "\t0x%04X ", i);
+ j += strlen(d_buf);
+ /* Bounds-check at 77, because " XX" emits 4 chars. */
+ while (i < len && j < 77) {
+ (void) sprintf(&d_buf[j], " %02X", buf[i++]);
+ j += strlen(&d_buf[j]);
+ }
+#else
+ j += sprintf(d_buf, "\t0x%04X ", i);
+ while (i < len)
+ j += sprintf(&d_buf[j], " %02X", buf[i++]);
+#endif
+ i40e_debug(hw, mask, "%s\n", d_buf);
+ }
+ }
+}
+
+/**
+ * i40e_check_asq_alive
+ * @hw: pointer to the hw struct
+ *
+ * Returns TRUE if Queue is enabled else FALSE.
+ **/
+bool i40e_check_asq_alive(struct i40e_hw *hw)
+{
+ if (hw->aq.asq.len)
+ if (!i40e_is_vf(hw))
+ return !!(rd32(hw, hw->aq.asq.len) &
+ I40E_PF_ATQLEN_ATQENABLE_MASK);
+ if (i40e_is_vf(hw))
+ return !!(rd32(hw, hw->aq.asq.len) &
+ I40E_VF_ATQLEN1_ATQENABLE_MASK);
+ return FALSE;
+}
+
+/**
+ * i40e_aq_queue_shutdown
+ * @hw: pointer to the hw struct
+ * @unloading: is the driver unloading itself
+ *
+ * Tell the Firmware that we're shutting down the AdminQ and whether
+ * or not the driver is unloading as well.
+ **/
+enum i40e_status_code i40e_aq_queue_shutdown(struct i40e_hw *hw,
+ bool unloading)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_queue_shutdown *cmd =
+ (struct i40e_aqc_queue_shutdown *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_queue_shutdown);
+
+ if (unloading)
+ cmd->driver_unloading = CPU_TO_LE32(I40E_AQ_DRIVER_UNLOADING);
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, NULL);
+
+ return status;
+}
+#ifdef X722_SUPPORT
+
+/**
+ * i40e_aq_get_set_rss_lut
+ * @hw: pointer to the hardware structure
+ * @vsi_id: vsi fw index
+ * @pf_lut: for PF table set TRUE, for VSI table set FALSE
+ * @lut: pointer to the lut buffer provided by the caller
+ * @lut_size: size of the lut buffer
+ * @set: set TRUE to set the table, FALSE to get the table
+ *
+ * Internal function to get or set RSS look up table
+ **/
+static enum i40e_status_code i40e_aq_get_set_rss_lut(struct i40e_hw *hw,
+ u16 vsi_id, bool pf_lut,
+ u8 *lut, u16 lut_size,
+ bool set)
+{
+ enum i40e_status_code status;
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_get_set_rss_lut *cmd_resp =
+ (struct i40e_aqc_get_set_rss_lut *)&desc.params.raw;
+
+ if (set)
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_rss_lut);
+ else
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_get_rss_lut);
+
+ /* Indirect command */
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_BUF);
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_RD);
+
+ cmd_resp->vsi_id =
+ CPU_TO_LE16((u16)((vsi_id <<
+ I40E_AQC_SET_RSS_LUT_VSI_ID_SHIFT) &
+ I40E_AQC_SET_RSS_LUT_VSI_ID_MASK));
+ cmd_resp->vsi_id |= CPU_TO_LE16((u16)I40E_AQC_SET_RSS_LUT_VSI_VALID);
+
+ if (pf_lut)
+ cmd_resp->flags |= CPU_TO_LE16((u16)
+ ((I40E_AQC_SET_RSS_LUT_TABLE_TYPE_PF <<
+ I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT) &
+ I40E_AQC_SET_RSS_LUT_TABLE_TYPE_MASK));
+ else
+ cmd_resp->flags |= CPU_TO_LE16((u16)
+ ((I40E_AQC_SET_RSS_LUT_TABLE_TYPE_VSI <<
+ I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT) &
+ I40E_AQC_SET_RSS_LUT_TABLE_TYPE_MASK));
+
+ cmd_resp->addr_high = CPU_TO_LE32(I40E_HI_WORD((u64)lut));
+ cmd_resp->addr_low = CPU_TO_LE32(I40E_LO_DWORD((u64)lut));
+
+ status = i40e_asq_send_command(hw, &desc, lut, lut_size, NULL);
+
+ return status;
+}
+
+/**
+ * i40e_aq_get_rss_lut
+ * @hw: pointer to the hardware structure
+ * @vsi_id: vsi fw index
+ * @pf_lut: for PF table set TRUE, for VSI table set FALSE
+ * @lut: pointer to the lut buffer provided by the caller
+ * @lut_size: size of the lut buffer
+ *
+ * get the RSS lookup table, PF or VSI type
+ **/
+enum i40e_status_code i40e_aq_get_rss_lut(struct i40e_hw *hw, u16 vsi_id,
+ bool pf_lut, u8 *lut, u16 lut_size)
+{
+ return i40e_aq_get_set_rss_lut(hw, vsi_id, pf_lut, lut, lut_size,
+ FALSE);
+}
+
+/**
+ * i40e_aq_set_rss_lut
+ * @hw: pointer to the hardware structure
+ * @vsi_id: vsi fw index
+ * @pf_lut: for PF table set TRUE, for VSI table set FALSE
+ * @lut: pointer to the lut buffer provided by the caller
+ * @lut_size: size of the lut buffer
+ *
+ * set the RSS lookup table, PF or VSI type
+ **/
+enum i40e_status_code i40e_aq_set_rss_lut(struct i40e_hw *hw, u16 vsi_id,
+ bool pf_lut, u8 *lut, u16 lut_size)
+{
+ return i40e_aq_get_set_rss_lut(hw, vsi_id, pf_lut, lut, lut_size, TRUE);
+}
+
+/**
+ * i40e_aq_get_set_rss_key
+ * @hw: pointer to the hw struct
+ * @vsi_id: vsi fw index
+ * @key: pointer to key info struct
+ * @set: set TRUE to set the key, FALSE to get the key
+ *
+ * get the RSS key per VSI
+ **/
+static enum i40e_status_code i40e_aq_get_set_rss_key(struct i40e_hw *hw,
+ u16 vsi_id,
+ struct i40e_aqc_get_set_rss_key_data *key,
+ bool set)
+{
+ enum i40e_status_code status;
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_get_set_rss_key *cmd_resp =
+ (struct i40e_aqc_get_set_rss_key *)&desc.params.raw;
+ u16 key_size = sizeof(struct i40e_aqc_get_set_rss_key_data);
+
+ if (set)
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_rss_key);
+ else
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_get_rss_key);
+
+ /* Indirect command */
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_BUF);
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_RD);
+
+ cmd_resp->vsi_id =
+ CPU_TO_LE16((u16)((vsi_id <<
+ I40E_AQC_SET_RSS_KEY_VSI_ID_SHIFT) &
+ I40E_AQC_SET_RSS_KEY_VSI_ID_MASK));
+ cmd_resp->vsi_id |= CPU_TO_LE16((u16)I40E_AQC_SET_RSS_KEY_VSI_VALID);
+ cmd_resp->addr_high = CPU_TO_LE32(I40E_HI_WORD((u64)key));
+ cmd_resp->addr_low = CPU_TO_LE32(I40E_LO_DWORD((u64)key));
+
+ status = i40e_asq_send_command(hw, &desc, key, key_size, NULL);
+
+ return status;
+}
+
+/**
+ * i40e_aq_get_rss_key
+ * @hw: pointer to the hw struct
+ * @vsi_id: vsi fw index
+ * @key: pointer to key info struct
+ *
+ **/
+enum i40e_status_code i40e_aq_get_rss_key(struct i40e_hw *hw,
+ u16 vsi_id,
+ struct i40e_aqc_get_set_rss_key_data *key)
+{
+ return i40e_aq_get_set_rss_key(hw, vsi_id, key, FALSE);
+}
+
+/**
+ * i40e_aq_set_rss_key
+ * @hw: pointer to the hw struct
+ * @vsi_id: vsi fw index
+ * @key: pointer to key info struct
+ *
+ * set the RSS key per VSI
+ **/
+enum i40e_status_code i40e_aq_set_rss_key(struct i40e_hw *hw,
+ u16 vsi_id,
+ struct i40e_aqc_get_set_rss_key_data *key)
+{
+ return i40e_aq_get_set_rss_key(hw, vsi_id, key, TRUE);
+}
+#endif /* X722_SUPPORT */
+
+/* The i40e_ptype_lookup table is used to convert from the 8-bit ptype in the
+ * hardware to a bit-field that can be used by SW to more easily determine the
+ * packet type.
+ *
+ * Macros are used to shorten the table lines and make this table human
+ * readable.
+ *
+ * We store the PTYPE in the top byte of the bit field - this is just so that
+ * we can check that the table doesn't have a row missing, as the index into
+ * the table should be the PTYPE.
+ *
+ * Typical work flow:
+ *
+ * IF NOT i40e_ptype_lookup[ptype].known
+ * THEN
+ * Packet is unknown
+ * ELSE IF i40e_ptype_lookup[ptype].outer_ip == I40E_RX_PTYPE_OUTER_IP
+ * Use the rest of the fields to look at the tunnels, inner protocols, etc
+ * ELSE
+ * Use the enum i40e_rx_l2_ptype to decode the packet type
+ * ENDIF
+ */
+
+/* macro to make the table lines short */
+#define I40E_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\
+ { PTYPE, \
+ 1, \
+ I40E_RX_PTYPE_OUTER_##OUTER_IP, \
+ I40E_RX_PTYPE_OUTER_##OUTER_IP_VER, \
+ I40E_RX_PTYPE_##OUTER_FRAG, \
+ I40E_RX_PTYPE_TUNNEL_##T, \
+ I40E_RX_PTYPE_TUNNEL_END_##TE, \
+ I40E_RX_PTYPE_##TEF, \
+ I40E_RX_PTYPE_INNER_PROT_##I, \
+ I40E_RX_PTYPE_PAYLOAD_LAYER_##PL }
+
+#define I40E_PTT_UNUSED_ENTRY(PTYPE) \
+ { PTYPE, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+
+/* shorter macros makes the table fit but are terse */
+#define I40E_RX_PTYPE_NOF I40E_RX_PTYPE_NOT_FRAG
+#define I40E_RX_PTYPE_FRG I40E_RX_PTYPE_FRAG
+#define I40E_RX_PTYPE_INNER_PROT_TS I40E_RX_PTYPE_INNER_PROT_TIMESYNC
+
+/* Lookup table mapping the HW PTYPE to the bit field for decoding */
+struct i40e_rx_ptype_decoded i40e_ptype_lookup[] = {
+ /* L2 Packet types */
+ I40E_PTT_UNUSED_ENTRY(0),
+ I40E_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+ I40E_PTT(2, L2, NONE, NOF, NONE, NONE, NOF, TS, PAY2),
+ I40E_PTT(3, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+ I40E_PTT_UNUSED_ENTRY(4),
+ I40E_PTT_UNUSED_ENTRY(5),
+ I40E_PTT(6, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+ I40E_PTT(7, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+ I40E_PTT_UNUSED_ENTRY(8),
+ I40E_PTT_UNUSED_ENTRY(9),
+ I40E_PTT(10, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+ I40E_PTT(11, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
+ I40E_PTT(12, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(13, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(14, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(15, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(16, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(17, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(18, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(19, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(20, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(21, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+
+ /* Non Tunneled IPv4 */
+ I40E_PTT(22, IP, IPV4, FRG, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(23, IP, IPV4, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(24, IP, IPV4, NOF, NONE, NONE, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(25),
+ I40E_PTT(26, IP, IPV4, NOF, NONE, NONE, NOF, TCP, PAY4),
+ I40E_PTT(27, IP, IPV4, NOF, NONE, NONE, NOF, SCTP, PAY4),
+ I40E_PTT(28, IP, IPV4, NOF, NONE, NONE, NOF, ICMP, PAY4),
+
+ /* IPv4 --> IPv4 */
+ I40E_PTT(29, IP, IPV4, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
+ I40E_PTT(30, IP, IPV4, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
+ I40E_PTT(31, IP, IPV4, NOF, IP_IP, IPV4, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(32),
+ I40E_PTT(33, IP, IPV4, NOF, IP_IP, IPV4, NOF, TCP, PAY4),
+ I40E_PTT(34, IP, IPV4, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
+ I40E_PTT(35, IP, IPV4, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv4 --> IPv6 */
+ I40E_PTT(36, IP, IPV4, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
+ I40E_PTT(37, IP, IPV4, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
+ I40E_PTT(38, IP, IPV4, NOF, IP_IP, IPV6, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(39),
+ I40E_PTT(40, IP, IPV4, NOF, IP_IP, IPV6, NOF, TCP, PAY4),
+ I40E_PTT(41, IP, IPV4, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
+ I40E_PTT(42, IP, IPV4, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
+
+ /* IPv4 --> GRE/NAT */
+ I40E_PTT(43, IP, IPV4, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
+
+ /* IPv4 --> GRE/NAT --> IPv4 */
+ I40E_PTT(44, IP, IPV4, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
+ I40E_PTT(45, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
+ I40E_PTT(46, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(47),
+ I40E_PTT(48, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4),
+ I40E_PTT(49, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
+ I40E_PTT(50, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv4 --> GRE/NAT --> IPv6 */
+ I40E_PTT(51, IP, IPV4, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
+ I40E_PTT(52, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
+ I40E_PTT(53, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(54),
+ I40E_PTT(55, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4),
+ I40E_PTT(56, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
+ I40E_PTT(57, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
+
+ /* IPv4 --> GRE/NAT --> MAC */
+ I40E_PTT(58, IP, IPV4, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
+
+ /* IPv4 --> GRE/NAT --> MAC --> IPv4 */
+ I40E_PTT(59, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
+ I40E_PTT(60, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
+ I40E_PTT(61, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(62),
+ I40E_PTT(63, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4),
+ I40E_PTT(64, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
+ I40E_PTT(65, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv4 --> GRE/NAT -> MAC --> IPv6 */
+ I40E_PTT(66, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
+ I40E_PTT(67, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
+ I40E_PTT(68, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(69),
+ I40E_PTT(70, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4),
+ I40E_PTT(71, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
+ I40E_PTT(72, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
+
+ /* IPv4 --> GRE/NAT --> MAC/VLAN */
+ I40E_PTT(73, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
+
+ /* IPv4 ---> GRE/NAT -> MAC/VLAN --> IPv4 */
+ I40E_PTT(74, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
+ I40E_PTT(75, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
+ I40E_PTT(76, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(77),
+ I40E_PTT(78, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4),
+ I40E_PTT(79, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
+ I40E_PTT(80, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv4 -> GRE/NAT -> MAC/VLAN --> IPv6 */
+ I40E_PTT(81, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
+ I40E_PTT(82, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
+ I40E_PTT(83, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(84),
+ I40E_PTT(85, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4),
+ I40E_PTT(86, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
+ I40E_PTT(87, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
+
+ /* Non Tunneled IPv6 */
+ I40E_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(91),
+ I40E_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP, PAY4),
+ I40E_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4),
+ I40E_PTT(94, IP, IPV6, NOF, NONE, NONE, NOF, ICMP, PAY4),
+
+ /* IPv6 --> IPv4 */
+ I40E_PTT(95, IP, IPV6, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
+ I40E_PTT(96, IP, IPV6, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
+ I40E_PTT(97, IP, IPV6, NOF, IP_IP, IPV4, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(98),
+ I40E_PTT(99, IP, IPV6, NOF, IP_IP, IPV4, NOF, TCP, PAY4),
+ I40E_PTT(100, IP, IPV6, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
+ I40E_PTT(101, IP, IPV6, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv6 --> IPv6 */
+ I40E_PTT(102, IP, IPV6, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
+ I40E_PTT(103, IP, IPV6, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
+ I40E_PTT(104, IP, IPV6, NOF, IP_IP, IPV6, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(105),
+ I40E_PTT(106, IP, IPV6, NOF, IP_IP, IPV6, NOF, TCP, PAY4),
+ I40E_PTT(107, IP, IPV6, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
+ I40E_PTT(108, IP, IPV6, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
+
+ /* IPv6 --> GRE/NAT */
+ I40E_PTT(109, IP, IPV6, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
+
+ /* IPv6 --> GRE/NAT -> IPv4 */
+ I40E_PTT(110, IP, IPV6, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
+ I40E_PTT(111, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
+ I40E_PTT(112, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(113),
+ I40E_PTT(114, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4),
+ I40E_PTT(115, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
+ I40E_PTT(116, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv6 --> GRE/NAT -> IPv6 */
+ I40E_PTT(117, IP, IPV6, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
+ I40E_PTT(118, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
+ I40E_PTT(119, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(120),
+ I40E_PTT(121, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4),
+ I40E_PTT(122, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
+ I40E_PTT(123, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
+
+ /* IPv6 --> GRE/NAT -> MAC */
+ I40E_PTT(124, IP, IPV6, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
+
+ /* IPv6 --> GRE/NAT -> MAC -> IPv4 */
+ I40E_PTT(125, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
+ I40E_PTT(126, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
+ I40E_PTT(127, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(128),
+ I40E_PTT(129, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4),
+ I40E_PTT(130, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
+ I40E_PTT(131, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv6 --> GRE/NAT -> MAC -> IPv6 */
+ I40E_PTT(132, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
+ I40E_PTT(133, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
+ I40E_PTT(134, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(135),
+ I40E_PTT(136, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4),
+ I40E_PTT(137, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
+ I40E_PTT(138, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
+
+ /* IPv6 --> GRE/NAT -> MAC/VLAN */
+ I40E_PTT(139, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
+
+ /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv4 */
+ I40E_PTT(140, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
+ I40E_PTT(141, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
+ I40E_PTT(142, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(143),
+ I40E_PTT(144, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4),
+ I40E_PTT(145, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
+ I40E_PTT(146, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv6 */
+ I40E_PTT(147, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
+ I40E_PTT(148, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
+ I40E_PTT(149, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(150),
+ I40E_PTT(151, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4),
+ I40E_PTT(152, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
+ I40E_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
+
+ /* unused entries */
+ I40E_PTT_UNUSED_ENTRY(154),
+ I40E_PTT_UNUSED_ENTRY(155),
+ I40E_PTT_UNUSED_ENTRY(156),
+ I40E_PTT_UNUSED_ENTRY(157),
+ I40E_PTT_UNUSED_ENTRY(158),
+ I40E_PTT_UNUSED_ENTRY(159),
+
+ I40E_PTT_UNUSED_ENTRY(160),
+ I40E_PTT_UNUSED_ENTRY(161),
+ I40E_PTT_UNUSED_ENTRY(162),
+ I40E_PTT_UNUSED_ENTRY(163),
+ I40E_PTT_UNUSED_ENTRY(164),
+ I40E_PTT_UNUSED_ENTRY(165),
+ I40E_PTT_UNUSED_ENTRY(166),
+ I40E_PTT_UNUSED_ENTRY(167),
+ I40E_PTT_UNUSED_ENTRY(168),
+ I40E_PTT_UNUSED_ENTRY(169),
+
+ I40E_PTT_UNUSED_ENTRY(170),
+ I40E_PTT_UNUSED_ENTRY(171),
+ I40E_PTT_UNUSED_ENTRY(172),
+ I40E_PTT_UNUSED_ENTRY(173),
+ I40E_PTT_UNUSED_ENTRY(174),
+ I40E_PTT_UNUSED_ENTRY(175),
+ I40E_PTT_UNUSED_ENTRY(176),
+ I40E_PTT_UNUSED_ENTRY(177),
+ I40E_PTT_UNUSED_ENTRY(178),
+ I40E_PTT_UNUSED_ENTRY(179),
+
+ I40E_PTT_UNUSED_ENTRY(180),
+ I40E_PTT_UNUSED_ENTRY(181),
+ I40E_PTT_UNUSED_ENTRY(182),
+ I40E_PTT_UNUSED_ENTRY(183),
+ I40E_PTT_UNUSED_ENTRY(184),
+ I40E_PTT_UNUSED_ENTRY(185),
+ I40E_PTT_UNUSED_ENTRY(186),
+ I40E_PTT_UNUSED_ENTRY(187),
+ I40E_PTT_UNUSED_ENTRY(188),
+ I40E_PTT_UNUSED_ENTRY(189),
+
+ I40E_PTT_UNUSED_ENTRY(190),
+ I40E_PTT_UNUSED_ENTRY(191),
+ I40E_PTT_UNUSED_ENTRY(192),
+ I40E_PTT_UNUSED_ENTRY(193),
+ I40E_PTT_UNUSED_ENTRY(194),
+ I40E_PTT_UNUSED_ENTRY(195),
+ I40E_PTT_UNUSED_ENTRY(196),
+ I40E_PTT_UNUSED_ENTRY(197),
+ I40E_PTT_UNUSED_ENTRY(198),
+ I40E_PTT_UNUSED_ENTRY(199),
+
+ I40E_PTT_UNUSED_ENTRY(200),
+ I40E_PTT_UNUSED_ENTRY(201),
+ I40E_PTT_UNUSED_ENTRY(202),
+ I40E_PTT_UNUSED_ENTRY(203),
+ I40E_PTT_UNUSED_ENTRY(204),
+ I40E_PTT_UNUSED_ENTRY(205),
+ I40E_PTT_UNUSED_ENTRY(206),
+ I40E_PTT_UNUSED_ENTRY(207),
+ I40E_PTT_UNUSED_ENTRY(208),
+ I40E_PTT_UNUSED_ENTRY(209),
+
+ I40E_PTT_UNUSED_ENTRY(210),
+ I40E_PTT_UNUSED_ENTRY(211),
+ I40E_PTT_UNUSED_ENTRY(212),
+ I40E_PTT_UNUSED_ENTRY(213),
+ I40E_PTT_UNUSED_ENTRY(214),
+ I40E_PTT_UNUSED_ENTRY(215),
+ I40E_PTT_UNUSED_ENTRY(216),
+ I40E_PTT_UNUSED_ENTRY(217),
+ I40E_PTT_UNUSED_ENTRY(218),
+ I40E_PTT_UNUSED_ENTRY(219),
+
+ I40E_PTT_UNUSED_ENTRY(220),
+ I40E_PTT_UNUSED_ENTRY(221),
+ I40E_PTT_UNUSED_ENTRY(222),
+ I40E_PTT_UNUSED_ENTRY(223),
+ I40E_PTT_UNUSED_ENTRY(224),
+ I40E_PTT_UNUSED_ENTRY(225),
+ I40E_PTT_UNUSED_ENTRY(226),
+ I40E_PTT_UNUSED_ENTRY(227),
+ I40E_PTT_UNUSED_ENTRY(228),
+ I40E_PTT_UNUSED_ENTRY(229),
+
+ I40E_PTT_UNUSED_ENTRY(230),
+ I40E_PTT_UNUSED_ENTRY(231),
+ I40E_PTT_UNUSED_ENTRY(232),
+ I40E_PTT_UNUSED_ENTRY(233),
+ I40E_PTT_UNUSED_ENTRY(234),
+ I40E_PTT_UNUSED_ENTRY(235),
+ I40E_PTT_UNUSED_ENTRY(236),
+ I40E_PTT_UNUSED_ENTRY(237),
+ I40E_PTT_UNUSED_ENTRY(238),
+ I40E_PTT_UNUSED_ENTRY(239),
+
+ I40E_PTT_UNUSED_ENTRY(240),
+ I40E_PTT_UNUSED_ENTRY(241),
+ I40E_PTT_UNUSED_ENTRY(242),
+ I40E_PTT_UNUSED_ENTRY(243),
+ I40E_PTT_UNUSED_ENTRY(244),
+ I40E_PTT_UNUSED_ENTRY(245),
+ I40E_PTT_UNUSED_ENTRY(246),
+ I40E_PTT_UNUSED_ENTRY(247),
+ I40E_PTT_UNUSED_ENTRY(248),
+ I40E_PTT_UNUSED_ENTRY(249),
+
+ I40E_PTT_UNUSED_ENTRY(250),
+ I40E_PTT_UNUSED_ENTRY(251),
+ I40E_PTT_UNUSED_ENTRY(252),
+ I40E_PTT_UNUSED_ENTRY(253),
+ I40E_PTT_UNUSED_ENTRY(254),
+ I40E_PTT_UNUSED_ENTRY(255)
+};
+
+
+/**
+ * i40e_validate_mac_addr - Validate unicast MAC address
+ * @mac_addr: pointer to MAC address
+ *
+ * Tests a MAC address to ensure it is a valid Individual Address
+ **/
+enum i40e_status_code i40e_validate_mac_addr(u8 *mac_addr)
+{
+ enum i40e_status_code status = I40E_SUCCESS;
+
+ DEBUGFUNC("i40e_validate_mac_addr");
+
+ /* Broadcast addresses ARE multicast addresses
+ * Make sure it is not a multicast address
+ * Reject the zero address
+ */
+ if (I40E_IS_MULTICAST(mac_addr) ||
+ (mac_addr[0] == 0 && mac_addr[1] == 0 && mac_addr[2] == 0 &&
+ mac_addr[3] == 0 && mac_addr[4] == 0 && mac_addr[5] == 0))
+ status = I40E_ERR_INVALID_MAC_ADDR;
+
+ return status;
+}
+
+/**
+ * i40e_init_shared_code - Initialize the shared code
+ * @hw: pointer to hardware structure
+ *
+ * This assigns the MAC type and PHY code and inits the NVM.
+ * Does not touch the hardware. This function must be called prior to any
+ * other function in the shared code. The i40e_hw structure should be
+ * memset to 0 prior to calling this function. The following fields in
+ * hw structure should be filled in prior to calling this function:
+ * hw_addr, back, device_id, vendor_id, subsystem_device_id,
+ * subsystem_vendor_id, and revision_id
+ **/
+enum i40e_status_code i40e_init_shared_code(struct i40e_hw *hw)
+{
+ enum i40e_status_code status = I40E_SUCCESS;
+ u32 port, ari, func_rid;
+
+ DEBUGFUNC("i40e_init_shared_code");
+
+ i40e_set_mac_type(hw);
+
+ switch (hw->mac.type) {
+ case I40E_MAC_XL710:
+#ifdef X722_SUPPORT
+ case I40E_MAC_X722:
+#endif
+ break;
+ default:
+ return I40E_ERR_DEVICE_NOT_SUPPORTED;
+ }
+
+ hw->phy.get_link_info = TRUE;
+
+ /* Determine port number and PF number*/
+ port = (rd32(hw, I40E_PFGEN_PORTNUM) & I40E_PFGEN_PORTNUM_PORT_NUM_MASK)
+ >> I40E_PFGEN_PORTNUM_PORT_NUM_SHIFT;
+ hw->port = (u8)port;
+ ari = (rd32(hw, I40E_GLPCI_CAPSUP) & I40E_GLPCI_CAPSUP_ARI_EN_MASK) >>
+ I40E_GLPCI_CAPSUP_ARI_EN_SHIFT;
+ func_rid = rd32(hw, I40E_PF_FUNC_RID);
+ if (ari)
+ hw->pf_id = (u8)(func_rid & 0xff);
+ else
+ hw->pf_id = (u8)(func_rid & 0x7);
+
+ status = i40e_init_nvm(hw);
+ return status;
+}
+
+/**
+ * i40e_aq_mac_address_read - Retrieve the MAC addresses
+ * @hw: pointer to the hw struct
+ * @flags: a return indicator of what addresses were added to the addr store
+ * @addrs: the requestor's mac addr store
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+static enum i40e_status_code i40e_aq_mac_address_read(struct i40e_hw *hw,
+ u16 *flags,
+ struct i40e_aqc_mac_address_read_data *addrs,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_mac_address_read *cmd_data =
+ (struct i40e_aqc_mac_address_read *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_mac_address_read);
+ desc.flags |= CPU_TO_LE16(I40E_AQ_FLAG_BUF);
+
+ status = i40e_asq_send_command(hw, &desc, addrs,
+ sizeof(*addrs), cmd_details);
+ *flags = LE16_TO_CPU(cmd_data->command_flags);
+
+ return status;
+}
+
+/**
+ * i40e_aq_mac_address_write - Change the MAC addresses
+ * @hw: pointer to the hw struct
+ * @flags: indicates which MAC to be written
+ * @mac_addr: address to write
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_mac_address_write(struct i40e_hw *hw,
+ u16 flags, u8 *mac_addr,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_mac_address_write *cmd_data =
+ (struct i40e_aqc_mac_address_write *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_mac_address_write);
+ cmd_data->command_flags = CPU_TO_LE16(flags);
+ cmd_data->mac_sah = CPU_TO_LE16((u16)mac_addr[0] << 8 | mac_addr[1]);
+ cmd_data->mac_sal = CPU_TO_LE32(((u32)mac_addr[2] << 24) |
+ ((u32)mac_addr[3] << 16) |
+ ((u32)mac_addr[4] << 8) |
+ mac_addr[5]);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_get_mac_addr - get MAC address
+ * @hw: pointer to the HW structure
+ * @mac_addr: pointer to MAC address
+ *
+ * Reads the adapter's MAC address from register
+ **/
+enum i40e_status_code i40e_get_mac_addr(struct i40e_hw *hw, u8 *mac_addr)
+{
+ struct i40e_aqc_mac_address_read_data addrs;
+ enum i40e_status_code status;
+ u16 flags = 0;
+
+ status = i40e_aq_mac_address_read(hw, &flags, &addrs, NULL);
+
+ if (flags & I40E_AQC_LAN_ADDR_VALID)
+ memcpy(mac_addr, &addrs.pf_lan_mac, sizeof(addrs.pf_lan_mac));
+
+ return status;
+}
+
+/**
+ * i40e_get_port_mac_addr - get Port MAC address
+ * @hw: pointer to the HW structure
+ * @mac_addr: pointer to Port MAC address
+ *
+ * Reads the adapter's Port MAC address
+ **/
+enum i40e_status_code i40e_get_port_mac_addr(struct i40e_hw *hw, u8 *mac_addr)
+{
+ struct i40e_aqc_mac_address_read_data addrs;
+ enum i40e_status_code status;
+ u16 flags = 0;
+
+ status = i40e_aq_mac_address_read(hw, &flags, &addrs, NULL);
+ if (status)
+ return status;
+
+ if (flags & I40E_AQC_PORT_ADDR_VALID)
+ memcpy(mac_addr, &addrs.port_mac, sizeof(addrs.port_mac));
+ else
+ status = I40E_ERR_INVALID_MAC_ADDR;
+
+ return status;
+}
+
+/**
+ * i40e_pre_tx_queue_cfg - pre tx queue configure
+ * @hw: pointer to the HW structure
+ * @queue: target pf queue index
+ * @enable: state change request
+ *
+ * Handles hw requirement to indicate intention to enable
+ * or disable target queue.
+ **/
+void i40e_pre_tx_queue_cfg(struct i40e_hw *hw, u32 queue, bool enable)
+{
+ u32 abs_queue_idx = hw->func_caps.base_queue + queue;
+ u32 reg_block = 0;
+ u32 reg_val;
+
+ if (abs_queue_idx >= 128) {
+ reg_block = abs_queue_idx / 128;
+ abs_queue_idx %= 128;
+ }
+
+ reg_val = rd32(hw, I40E_GLLAN_TXPRE_QDIS(reg_block));
+ reg_val &= ~I40E_GLLAN_TXPRE_QDIS_QINDX_MASK;
+ reg_val |= (abs_queue_idx << I40E_GLLAN_TXPRE_QDIS_QINDX_SHIFT);
+
+ if (enable)
+ reg_val |= I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_MASK;
+ else
+ reg_val |= I40E_GLLAN_TXPRE_QDIS_SET_QDIS_MASK;
+
+ wr32(hw, I40E_GLLAN_TXPRE_QDIS(reg_block), reg_val);
+}
+
+/**
+ * i40e_read_pba_string - Reads part number string from EEPROM
+ * @hw: pointer to hardware structure
+ * @pba_num: stores the part number string from the EEPROM
+ * @pba_num_size: part number string buffer length
+ *
+ * Reads the part number string from the EEPROM.
+ **/
+enum i40e_status_code i40e_read_pba_string(struct i40e_hw *hw, u8 *pba_num,
+ u32 pba_num_size)
+{
+ enum i40e_status_code status = I40E_SUCCESS;
+ u16 pba_word = 0;
+ u16 pba_size = 0;
+ u16 pba_ptr = 0;
+ u16 i = 0;
+
+ status = i40e_read_nvm_word(hw, I40E_SR_PBA_FLAGS, &pba_word);
+ if ((status != I40E_SUCCESS) || (pba_word != 0xFAFA)) {
+ DEBUGOUT("Failed to read PBA flags or flag is invalid.\n");
+ return status;
+ }
+
+ status = i40e_read_nvm_word(hw, I40E_SR_PBA_BLOCK_PTR, &pba_ptr);
+ if (status != I40E_SUCCESS) {
+ DEBUGOUT("Failed to read PBA Block pointer.\n");
+ return status;
+ }
+
+ status = i40e_read_nvm_word(hw, pba_ptr, &pba_size);
+ if (status != I40E_SUCCESS) {
+ DEBUGOUT("Failed to read PBA Block size.\n");
+ return status;
+ }
+
+ /* Subtract one to get PBA word count (PBA Size word is included in
+ * total size)
+ */
+ pba_size--;
+ if (pba_num_size < (((u32)pba_size * 2) + 1)) {
+ DEBUGOUT("Buffer to small for PBA data.\n");
+ return I40E_ERR_PARAM;
+ }
+
+ for (i = 0; i < pba_size; i++) {
+ status = i40e_read_nvm_word(hw, (pba_ptr + 1) + i, &pba_word);
+ if (status != I40E_SUCCESS) {
+ DEBUGOUT1("Failed to read PBA Block word %d.\n", i);
+ return status;
+ }
+
+ pba_num[(i * 2)] = (pba_word >> 8) & 0xFF;
+ pba_num[(i * 2) + 1] = pba_word & 0xFF;
+ }
+ pba_num[(pba_size * 2)] = '\0';
+
+ return status;
+}
+
+/**
+ * i40e_get_media_type - Gets media type
+ * @hw: pointer to the hardware structure
+ **/
+static enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw)
+{
+ enum i40e_media_type media;
+
+ switch (hw->phy.link_info.phy_type) {
+ case I40E_PHY_TYPE_10GBASE_SR:
+ case I40E_PHY_TYPE_10GBASE_LR:
+ case I40E_PHY_TYPE_1000BASE_SX:
+ case I40E_PHY_TYPE_1000BASE_LX:
+ case I40E_PHY_TYPE_40GBASE_SR4:
+ case I40E_PHY_TYPE_40GBASE_LR4:
+ media = I40E_MEDIA_TYPE_FIBER;
+ break;
+ case I40E_PHY_TYPE_100BASE_TX:
+ case I40E_PHY_TYPE_1000BASE_T:
+ case I40E_PHY_TYPE_10GBASE_T:
+ media = I40E_MEDIA_TYPE_BASET;
+ break;
+ case I40E_PHY_TYPE_10GBASE_CR1_CU:
+ case I40E_PHY_TYPE_40GBASE_CR4_CU:
+ case I40E_PHY_TYPE_10GBASE_CR1:
+ case I40E_PHY_TYPE_40GBASE_CR4:
+ case I40E_PHY_TYPE_10GBASE_SFPP_CU:
+ case I40E_PHY_TYPE_40GBASE_AOC:
+ case I40E_PHY_TYPE_10GBASE_AOC:
+ media = I40E_MEDIA_TYPE_DA;
+ break;
+ case I40E_PHY_TYPE_1000BASE_KX:
+ case I40E_PHY_TYPE_10GBASE_KX4:
+ case I40E_PHY_TYPE_10GBASE_KR:
+ case I40E_PHY_TYPE_40GBASE_KR4:
+ case I40E_PHY_TYPE_20GBASE_KR2:
+ media = I40E_MEDIA_TYPE_BACKPLANE;
+ break;
+ case I40E_PHY_TYPE_SGMII:
+ case I40E_PHY_TYPE_XAUI:
+ case I40E_PHY_TYPE_XFI:
+ case I40E_PHY_TYPE_XLAUI:
+ case I40E_PHY_TYPE_XLPPI:
+ default:
+ media = I40E_MEDIA_TYPE_UNKNOWN;
+ break;
+ }
+
+ return media;
+}
+
+#define I40E_PF_RESET_WAIT_COUNT 200
+/**
+ * i40e_pf_reset - Reset the PF
+ * @hw: pointer to the hardware structure
+ *
+ * Assuming someone else has triggered a global reset,
+ * assure the global reset is complete and then reset the PF
+ **/
+enum i40e_status_code i40e_pf_reset(struct i40e_hw *hw)
+{
+ u32 cnt = 0;
+ u32 cnt1 = 0;
+ u32 reg = 0;
+ u32 grst_del;
+
+ /* Poll for Global Reset steady state in case of recent GRST.
+ * The grst delay value is in 100ms units, and we'll wait a
+ * couple counts longer to be sure we don't just miss the end.
+ */
+ grst_del = (rd32(hw, I40E_GLGEN_RSTCTL) &
+ I40E_GLGEN_RSTCTL_GRSTDEL_MASK) >>
+ I40E_GLGEN_RSTCTL_GRSTDEL_SHIFT;
+ for (cnt = 0; cnt < grst_del + 10; cnt++) {
+ reg = rd32(hw, I40E_GLGEN_RSTAT);
+ if (!(reg & I40E_GLGEN_RSTAT_DEVSTATE_MASK))
+ break;
+ i40e_msec_delay(100);
+ }
+ if (reg & I40E_GLGEN_RSTAT_DEVSTATE_MASK) {
+ DEBUGOUT("Global reset polling failed to complete.\n");
+ return I40E_ERR_RESET_FAILED;
+ }
+
+ /* Now Wait for the FW to be ready */
+ for (cnt1 = 0; cnt1 < I40E_PF_RESET_WAIT_COUNT; cnt1++) {
+ reg = rd32(hw, I40E_GLNVM_ULD);
+ reg &= (I40E_GLNVM_ULD_CONF_CORE_DONE_MASK |
+ I40E_GLNVM_ULD_CONF_GLOBAL_DONE_MASK);
+ if (reg == (I40E_GLNVM_ULD_CONF_CORE_DONE_MASK |
+ I40E_GLNVM_ULD_CONF_GLOBAL_DONE_MASK)) {
+ DEBUGOUT1("Core and Global modules ready %d\n", cnt1);
+ break;
+ }
+ i40e_msec_delay(10);
+ }
+ if (!(reg & (I40E_GLNVM_ULD_CONF_CORE_DONE_MASK |
+ I40E_GLNVM_ULD_CONF_GLOBAL_DONE_MASK))) {
+ DEBUGOUT("wait for FW Reset complete timedout\n");
+ DEBUGOUT1("I40E_GLNVM_ULD = 0x%x\n", reg);
+ return I40E_ERR_RESET_FAILED;
+ }
+
+ /* If there was a Global Reset in progress when we got here,
+ * we don't need to do the PF Reset
+ */
+ if (!cnt) {
+ reg = rd32(hw, I40E_PFGEN_CTRL);
+ wr32(hw, I40E_PFGEN_CTRL,
+ (reg | I40E_PFGEN_CTRL_PFSWR_MASK));
+ for (cnt = 0; cnt < I40E_PF_RESET_WAIT_COUNT; cnt++) {
+ reg = rd32(hw, I40E_PFGEN_CTRL);
+ if (!(reg & I40E_PFGEN_CTRL_PFSWR_MASK))
+ break;
+ i40e_msec_delay(1);
+ }
+ if (reg & I40E_PFGEN_CTRL_PFSWR_MASK) {
+ DEBUGOUT("PF reset polling failed to complete.\n");
+ return I40E_ERR_RESET_FAILED;
+ }
+ }
+
+ i40e_clear_pxe_mode(hw);
+
+
+ return I40E_SUCCESS;
+}
+
+/**
+ * i40e_clear_hw - clear out any left over hw state
+ * @hw: pointer to the hw struct
+ *
+ * Clear queues and interrupts, typically called at init time,
+ * but after the capabilities have been found so we know how many
+ * queues and msix vectors have been allocated.
+ **/
+void i40e_clear_hw(struct i40e_hw *hw)
+{
+ u32 num_queues, base_queue;
+ u32 num_pf_int;
+ u32 num_vf_int;
+ u32 num_vfs;
+ u32 i, j;
+ u32 val;
+ u32 eol = 0x7ff;
+
+ /* get number of interrupts, queues, and vfs */
+ val = rd32(hw, I40E_GLPCI_CNF2);
+ num_pf_int = (val & I40E_GLPCI_CNF2_MSI_X_PF_N_MASK) >>
+ I40E_GLPCI_CNF2_MSI_X_PF_N_SHIFT;
+ num_vf_int = (val & I40E_GLPCI_CNF2_MSI_X_VF_N_MASK) >>
+ I40E_GLPCI_CNF2_MSI_X_VF_N_SHIFT;
+
+ val = rd32(hw, I40E_PFLAN_QALLOC);
+ base_queue = (val & I40E_PFLAN_QALLOC_FIRSTQ_MASK) >>
+ I40E_PFLAN_QALLOC_FIRSTQ_SHIFT;
+ j = (val & I40E_PFLAN_QALLOC_LASTQ_MASK) >>
+ I40E_PFLAN_QALLOC_LASTQ_SHIFT;
+ if (val & I40E_PFLAN_QALLOC_VALID_MASK)
+ num_queues = (j - base_queue) + 1;
+ else
+ num_queues = 0;
+
+ val = rd32(hw, I40E_PF_VT_PFALLOC);
+ i = (val & I40E_PF_VT_PFALLOC_FIRSTVF_MASK) >>
+ I40E_PF_VT_PFALLOC_FIRSTVF_SHIFT;
+ j = (val & I40E_PF_VT_PFALLOC_LASTVF_MASK) >>
+ I40E_PF_VT_PFALLOC_LASTVF_SHIFT;
+ if (val & I40E_PF_VT_PFALLOC_VALID_MASK)
+ num_vfs = (j - i) + 1;
+ else
+ num_vfs = 0;
+
+ /* stop all the interrupts */
+ wr32(hw, I40E_PFINT_ICR0_ENA, 0);
+ val = 0x3 << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT;
+ for (i = 0; i < num_pf_int - 2; i++)
+ wr32(hw, I40E_PFINT_DYN_CTLN(i), val);
+
+ /* Set the FIRSTQ_INDX field to 0x7FF in PFINT_LNKLSTx */
+ val = eol << I40E_PFINT_LNKLST0_FIRSTQ_INDX_SHIFT;
+ wr32(hw, I40E_PFINT_LNKLST0, val);
+ for (i = 0; i < num_pf_int - 2; i++)
+ wr32(hw, I40E_PFINT_LNKLSTN(i), val);
+ val = eol << I40E_VPINT_LNKLST0_FIRSTQ_INDX_SHIFT;
+ for (i = 0; i < num_vfs; i++)
+ wr32(hw, I40E_VPINT_LNKLST0(i), val);
+ for (i = 0; i < num_vf_int - 2; i++)
+ wr32(hw, I40E_VPINT_LNKLSTN(i), val);
+
+ /* warn the HW of the coming Tx disables */
+ for (i = 0; i < num_queues; i++) {
+ u32 abs_queue_idx = base_queue + i;
+ u32 reg_block = 0;
+
+ if (abs_queue_idx >= 128) {
+ reg_block = abs_queue_idx / 128;
+ abs_queue_idx %= 128;
+ }
+
+ val = rd32(hw, I40E_GLLAN_TXPRE_QDIS(reg_block));
+ val &= ~I40E_GLLAN_TXPRE_QDIS_QINDX_MASK;
+ val |= (abs_queue_idx << I40E_GLLAN_TXPRE_QDIS_QINDX_SHIFT);
+ val |= I40E_GLLAN_TXPRE_QDIS_SET_QDIS_MASK;
+
+ wr32(hw, I40E_GLLAN_TXPRE_QDIS(reg_block), val);
+ }
+ i40e_usec_delay(400);
+
+ /* stop all the queues */
+ for (i = 0; i < num_queues; i++) {
+ wr32(hw, I40E_QINT_TQCTL(i), 0);
+ wr32(hw, I40E_QTX_ENA(i), 0);
+ wr32(hw, I40E_QINT_RQCTL(i), 0);
+ wr32(hw, I40E_QRX_ENA(i), 0);
+ }
+
+ /* short wait for all queue disables to settle */
+ i40e_usec_delay(50);
+}
+
+/**
+ * i40e_clear_pxe_mode - clear pxe operations mode
+ * @hw: pointer to the hw struct
+ *
+ * Make sure all PXE mode settings are cleared, including things
+ * like descriptor fetch/write-back mode.
+ **/
+void i40e_clear_pxe_mode(struct i40e_hw *hw)
+{
+ if (i40e_check_asq_alive(hw))
+ i40e_aq_clear_pxe_mode(hw, NULL);
+}
+
+/**
+ * i40e_led_is_mine - helper to find matching led
+ * @hw: pointer to the hw struct
+ * @idx: index into GPIO registers
+ *
+ * returns: 0 if no match, otherwise the value of the GPIO_CTL register
+ */
+static u32 i40e_led_is_mine(struct i40e_hw *hw, int idx)
+{
+ u32 gpio_val = 0;
+ u32 port;
+
+ if (!hw->func_caps.led[idx])
+ return 0;
+
+ gpio_val = rd32(hw, I40E_GLGEN_GPIO_CTL(idx));
+ port = (gpio_val & I40E_GLGEN_GPIO_CTL_PRT_NUM_MASK) >>
+ I40E_GLGEN_GPIO_CTL_PRT_NUM_SHIFT;
+
+ /* if PRT_NUM_NA is 1 then this LED is not port specific, OR
+ * if it is not our port then ignore
+ */
+ if ((gpio_val & I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_MASK) ||
+ (port != hw->port))
+ return 0;
+
+ return gpio_val;
+}
+
+#define I40E_COMBINED_ACTIVITY 0xA
+#define I40E_FILTER_ACTIVITY 0xE
+#define I40E_LINK_ACTIVITY 0xC
+#define I40E_MAC_ACTIVITY 0xD
+#define I40E_LED0 22
+
+/**
+ * i40e_led_get - return current on/off mode
+ * @hw: pointer to the hw struct
+ *
+ * The value returned is the 'mode' field as defined in the
+ * GPIO register definitions: 0x0 = off, 0xf = on, and other
+ * values are variations of possible behaviors relating to
+ * blink, link, and wire.
+ **/
+u32 i40e_led_get(struct i40e_hw *hw)
+{
+ u32 current_mode = 0;
+ u32 mode = 0;
+ int i;
+
+ /* as per the documentation GPIO 22-29 are the LED
+ * GPIO pins named LED0..LED7
+ */
+ for (i = I40E_LED0; i <= I40E_GLGEN_GPIO_CTL_MAX_INDEX; i++) {
+ u32 gpio_val = i40e_led_is_mine(hw, i);
+
+ if (!gpio_val)
+ continue;
+
+ /* ignore gpio LED src mode entries related to the activity
+ * LEDs
+ */
+ current_mode = ((gpio_val & I40E_GLGEN_GPIO_CTL_LED_MODE_MASK)
+ >> I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT);
+ switch (current_mode) {
+ case I40E_COMBINED_ACTIVITY:
+ case I40E_FILTER_ACTIVITY:
+ case I40E_MAC_ACTIVITY:
+ continue;
+ default:
+ break;
+ }
+
+ mode = (gpio_val & I40E_GLGEN_GPIO_CTL_LED_MODE_MASK) >>
+ I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT;
+ break;
+ }
+
+ return mode;
+}
+
+/**
+ * i40e_led_set - set new on/off mode
+ * @hw: pointer to the hw struct
+ * @mode: 0=off, 0xf=on (else see manual for mode details)
+ * @blink: TRUE if the LED should blink when on, FALSE if steady
+ *
+ * if this function is used to turn on the blink it should
+ * be used to disable the blink when restoring the original state.
+ **/
+void i40e_led_set(struct i40e_hw *hw, u32 mode, bool blink)
+{
+ u32 current_mode = 0;
+ int i;
+
+ if (mode & 0xfffffff0) {
+ DEBUGOUT1("invalid mode passed in %X\n", mode);
+ }
+
+ /* as per the documentation GPIO 22-29 are the LED
+ * GPIO pins named LED0..LED7
+ */
+ for (i = I40E_LED0; i <= I40E_GLGEN_GPIO_CTL_MAX_INDEX; i++) {
+ u32 gpio_val = i40e_led_is_mine(hw, i);
+
+ if (!gpio_val)
+ continue;
+
+ /* ignore gpio LED src mode entries related to the activity
+ * LEDs
+ */
+ current_mode = ((gpio_val & I40E_GLGEN_GPIO_CTL_LED_MODE_MASK)
+ >> I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT);
+ switch (current_mode) {
+ case I40E_COMBINED_ACTIVITY:
+ case I40E_FILTER_ACTIVITY:
+ case I40E_MAC_ACTIVITY:
+ continue;
+ default:
+ break;
+ }
+
+ gpio_val &= ~I40E_GLGEN_GPIO_CTL_LED_MODE_MASK;
+ /* this & is a bit of paranoia, but serves as a range check */
+ gpio_val |= ((mode << I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT) &
+ I40E_GLGEN_GPIO_CTL_LED_MODE_MASK);
+
+ if (mode == I40E_LINK_ACTIVITY)
+ blink = FALSE;
+
+ if (blink)
+ gpio_val |= BIT(I40E_GLGEN_GPIO_CTL_LED_BLINK_SHIFT);
+ else
+ gpio_val &= ~BIT(I40E_GLGEN_GPIO_CTL_LED_BLINK_SHIFT);
+
+ wr32(hw, I40E_GLGEN_GPIO_CTL(i), gpio_val);
+ break;
+ }
+}
+
+/* Admin command wrappers */
+
+/**
+ * i40e_aq_get_phy_capabilities
+ * @hw: pointer to the hw struct
+ * @abilities: structure for PHY capabilities to be filled
+ * @qualified_modules: report Qualified Modules
+ * @report_init: report init capabilities (active are default)
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Returns the various PHY abilities supported on the Port.
+ **/
+enum i40e_status_code i40e_aq_get_phy_capabilities(struct i40e_hw *hw,
+ bool qualified_modules, bool report_init,
+ struct i40e_aq_get_phy_abilities_resp *abilities,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ enum i40e_status_code status;
+ u16 abilities_size = sizeof(struct i40e_aq_get_phy_abilities_resp);
+
+ if (!abilities)
+ return I40E_ERR_PARAM;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_get_phy_abilities);
+
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_BUF);
+ if (abilities_size > I40E_AQ_LARGE_BUF)
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_LB);
+
+ if (qualified_modules)
+ desc.params.external.param0 |=
+ CPU_TO_LE32(I40E_AQ_PHY_REPORT_QUALIFIED_MODULES);
+
+ if (report_init)
+ desc.params.external.param0 |=
+ CPU_TO_LE32(I40E_AQ_PHY_REPORT_INITIAL_VALUES);
+
+ status = i40e_asq_send_command(hw, &desc, abilities, abilities_size,
+ cmd_details);
+
+ if (hw->aq.asq_last_status == I40E_AQ_RC_EIO)
+ status = I40E_ERR_UNKNOWN_PHY;
+
+ return status;
+}
+
+/**
+ * i40e_aq_set_phy_config
+ * @hw: pointer to the hw struct
+ * @config: structure with PHY configuration to be set
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Set the various PHY configuration parameters
+ * supported on the Port.One or more of the Set PHY config parameters may be
+ * ignored in an MFP mode as the PF may not have the privilege to set some
+ * of the PHY Config parameters. This status will be indicated by the
+ * command response.
+ **/
+enum i40e_status_code i40e_aq_set_phy_config(struct i40e_hw *hw,
+ struct i40e_aq_set_phy_config *config,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aq_set_phy_config *cmd =
+ (struct i40e_aq_set_phy_config *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ if (!config)
+ return I40E_ERR_PARAM;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_phy_config);
+
+ *cmd = *config;
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_set_fc
+ * @hw: pointer to the hw struct
+ *
+ * Set the requested flow control mode using set_phy_config.
+ **/
+enum i40e_status_code i40e_set_fc(struct i40e_hw *hw, u8 *aq_failures,
+ bool atomic_restart)
+{
+ enum i40e_fc_mode fc_mode = hw->fc.requested_mode;
+ struct i40e_aq_get_phy_abilities_resp abilities;
+ struct i40e_aq_set_phy_config config;
+ enum i40e_status_code status;
+ u8 pause_mask = 0x0;
+
+ *aq_failures = 0x0;
+
+ switch (fc_mode) {
+ case I40E_FC_FULL:
+ pause_mask |= I40E_AQ_PHY_FLAG_PAUSE_TX;
+ pause_mask |= I40E_AQ_PHY_FLAG_PAUSE_RX;
+ break;
+ case I40E_FC_RX_PAUSE:
+ pause_mask |= I40E_AQ_PHY_FLAG_PAUSE_RX;
+ break;
+ case I40E_FC_TX_PAUSE:
+ pause_mask |= I40E_AQ_PHY_FLAG_PAUSE_TX;
+ break;
+ default:
+ break;
+ }
+
+ /* Get the current phy config */
+ status = i40e_aq_get_phy_capabilities(hw, FALSE, false, &abilities,
+ NULL);
+ if (status) {
+ *aq_failures |= I40E_SET_FC_AQ_FAIL_GET;
+ return status;
+ }
+
+ memset(&config, 0, sizeof(config));
+ /* clear the old pause settings */
+ config.abilities = abilities.abilities & ~(I40E_AQ_PHY_FLAG_PAUSE_TX) &
+ ~(I40E_AQ_PHY_FLAG_PAUSE_RX);
+ /* set the new abilities */
+ config.abilities |= pause_mask;
+ /* If the abilities have changed, then set the new config */
+ if (config.abilities != abilities.abilities) {
+ /* Auto restart link so settings take effect */
+ if (atomic_restart)
+ config.abilities |= I40E_AQ_PHY_ENABLE_ATOMIC_LINK;
+ /* Copy over all the old settings */
+ config.phy_type = abilities.phy_type;
+ config.link_speed = abilities.link_speed;
+ config.eee_capability = abilities.eee_capability;
+ config.eeer = abilities.eeer_val;
+ config.low_power_ctrl = abilities.d3_lpan;
+ status = i40e_aq_set_phy_config(hw, &config, NULL);
+
+ if (status)
+ *aq_failures |= I40E_SET_FC_AQ_FAIL_SET;
+ }
+ /* Update the link info */
+ status = i40e_update_link_info(hw);
+ if (status) {
+ /* Wait a little bit (on 40G cards it sometimes takes a really
+ * long time for link to come back from the atomic reset)
+ * and try once more
+ */
+ i40e_msec_delay(1000);
+ status = i40e_update_link_info(hw);
+ }
+ if (status)
+ *aq_failures |= I40E_SET_FC_AQ_FAIL_UPDATE;
+
+ return status;
+}
+
+/**
+ * i40e_aq_set_mac_config
+ * @hw: pointer to the hw struct
+ * @max_frame_size: Maximum Frame Size to be supported by the port
+ * @crc_en: Tell HW to append a CRC to outgoing frames
+ * @pacing: Pacing configurations
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Configure MAC settings for frame size, jumbo frame support and the
+ * addition of a CRC by the hardware.
+ **/
+enum i40e_status_code i40e_aq_set_mac_config(struct i40e_hw *hw,
+ u16 max_frame_size,
+ bool crc_en, u16 pacing,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aq_set_mac_config *cmd =
+ (struct i40e_aq_set_mac_config *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ if (max_frame_size == 0)
+ return I40E_ERR_PARAM;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_mac_config);
+
+ cmd->max_frame_size = CPU_TO_LE16(max_frame_size);
+ cmd->params = ((u8)pacing & 0x0F) << 3;
+ if (crc_en)
+ cmd->params |= I40E_AQ_SET_MAC_CONFIG_CRC_EN;
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_clear_pxe_mode
+ * @hw: pointer to the hw struct
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Tell the firmware that the driver is taking over from PXE
+ **/
+enum i40e_status_code i40e_aq_clear_pxe_mode(struct i40e_hw *hw,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ enum i40e_status_code status;
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_clear_pxe *cmd =
+ (struct i40e_aqc_clear_pxe *)&desc.params.raw;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_clear_pxe_mode);
+
+ cmd->rx_cnt = 0x2;
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ wr32(hw, I40E_GLLAN_RCTL_0, 0x1);
+
+ return status;
+}
+
+/**
+ * i40e_aq_set_link_restart_an
+ * @hw: pointer to the hw struct
+ * @enable_link: if TRUE: enable link, if FALSE: disable link
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Sets up the link and restarts the Auto-Negotiation over the link.
+ **/
+enum i40e_status_code i40e_aq_set_link_restart_an(struct i40e_hw *hw,
+ bool enable_link, struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_set_link_restart_an *cmd =
+ (struct i40e_aqc_set_link_restart_an *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_link_restart_an);
+
+ cmd->command = I40E_AQ_PHY_RESTART_AN;
+ if (enable_link)
+ cmd->command |= I40E_AQ_PHY_LINK_ENABLE;
+ else
+ cmd->command &= ~I40E_AQ_PHY_LINK_ENABLE;
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_get_link_info
+ * @hw: pointer to the hw struct
+ * @enable_lse: enable/disable LinkStatusEvent reporting
+ * @link: pointer to link status structure - optional
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Returns the link status of the adapter.
+ **/
+enum i40e_status_code i40e_aq_get_link_info(struct i40e_hw *hw,
+ bool enable_lse, struct i40e_link_status *link,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_get_link_status *resp =
+ (struct i40e_aqc_get_link_status *)&desc.params.raw;
+ struct i40e_link_status *hw_link_info = &hw->phy.link_info;
+ enum i40e_status_code status;
+ bool tx_pause, rx_pause;
+ u16 command_flags;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_get_link_status);
+
+ if (enable_lse)
+ command_flags = I40E_AQ_LSE_ENABLE;
+ else
+ command_flags = I40E_AQ_LSE_DISABLE;
+ resp->command_flags = CPU_TO_LE16(command_flags);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ if (status != I40E_SUCCESS)
+ goto aq_get_link_info_exit;
+
+ /* save off old link status information */
+ i40e_memcpy(&hw->phy.link_info_old, hw_link_info,
+ sizeof(*hw_link_info), I40E_NONDMA_TO_NONDMA);
+
+ /* update link status */
+ hw_link_info->phy_type = (enum i40e_aq_phy_type)resp->phy_type;
+ hw->phy.media_type = i40e_get_media_type(hw);
+ hw_link_info->link_speed = (enum i40e_aq_link_speed)resp->link_speed;
+ hw_link_info->link_info = resp->link_info;
+ hw_link_info->an_info = resp->an_info;
+ hw_link_info->ext_info = resp->ext_info;
+ hw_link_info->loopback = resp->loopback;
+ hw_link_info->max_frame_size = LE16_TO_CPU(resp->max_frame_size);
+ hw_link_info->pacing = resp->config & I40E_AQ_CONFIG_PACING_MASK;
+
+ /* update fc info */
+ tx_pause = !!(resp->an_info & I40E_AQ_LINK_PAUSE_TX);
+ rx_pause = !!(resp->an_info & I40E_AQ_LINK_PAUSE_RX);
+ if (tx_pause & rx_pause)
+ hw->fc.current_mode = I40E_FC_FULL;
+ else if (tx_pause)
+ hw->fc.current_mode = I40E_FC_TX_PAUSE;
+ else if (rx_pause)
+ hw->fc.current_mode = I40E_FC_RX_PAUSE;
+ else
+ hw->fc.current_mode = I40E_FC_NONE;
+
+ if (resp->config & I40E_AQ_CONFIG_CRC_ENA)
+ hw_link_info->crc_enable = TRUE;
+ else
+ hw_link_info->crc_enable = FALSE;
+
+ if (resp->command_flags & CPU_TO_LE16(I40E_AQ_LSE_ENABLE))
+ hw_link_info->lse_enable = TRUE;
+ else
+ hw_link_info->lse_enable = FALSE;
+
+ if ((hw->aq.fw_maj_ver < 4 || (hw->aq.fw_maj_ver == 4 &&
+ hw->aq.fw_min_ver < 40)) && hw_link_info->phy_type == 0xE)
+ hw_link_info->phy_type = I40E_PHY_TYPE_10GBASE_SFPP_CU;
+
+ /* save link status information */
+ if (link)
+ i40e_memcpy(link, hw_link_info, sizeof(*hw_link_info),
+ I40E_NONDMA_TO_NONDMA);
+
+ /* flag cleared so helper functions don't call AQ again */
+ hw->phy.get_link_info = FALSE;
+
+aq_get_link_info_exit:
+ return status;
+}
+
+/**
+ * i40e_aq_set_phy_int_mask
+ * @hw: pointer to the hw struct
+ * @mask: interrupt mask to be set
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Set link interrupt mask.
+ **/
+enum i40e_status_code i40e_aq_set_phy_int_mask(struct i40e_hw *hw,
+ u16 mask,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_set_phy_int_mask *cmd =
+ (struct i40e_aqc_set_phy_int_mask *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_phy_int_mask);
+
+ cmd->event_mask = CPU_TO_LE16(mask);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_get_local_advt_reg
+ * @hw: pointer to the hw struct
+ * @advt_reg: local AN advertisement register value
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Get the Local AN advertisement register value.
+ **/
+enum i40e_status_code i40e_aq_get_local_advt_reg(struct i40e_hw *hw,
+ u64 *advt_reg,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_an_advt_reg *resp =
+ (struct i40e_aqc_an_advt_reg *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_get_local_advt_reg);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ if (status != I40E_SUCCESS)
+ goto aq_get_local_advt_reg_exit;
+
+ *advt_reg = (u64)(LE16_TO_CPU(resp->local_an_reg1)) << 32;
+ *advt_reg |= LE32_TO_CPU(resp->local_an_reg0);
+
+aq_get_local_advt_reg_exit:
+ return status;
+}
+
+/**
+ * i40e_aq_set_local_advt_reg
+ * @hw: pointer to the hw struct
+ * @advt_reg: local AN advertisement register value
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Get the Local AN advertisement register value.
+ **/
+enum i40e_status_code i40e_aq_set_local_advt_reg(struct i40e_hw *hw,
+ u64 advt_reg,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_an_advt_reg *cmd =
+ (struct i40e_aqc_an_advt_reg *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_get_local_advt_reg);
+
+ cmd->local_an_reg0 = CPU_TO_LE32(I40E_LO_DWORD(advt_reg));
+ cmd->local_an_reg1 = CPU_TO_LE16(I40E_HI_DWORD(advt_reg));
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_get_partner_advt
+ * @hw: pointer to the hw struct
+ * @advt_reg: AN partner advertisement register value
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Get the link partner AN advertisement register value.
+ **/
+enum i40e_status_code i40e_aq_get_partner_advt(struct i40e_hw *hw,
+ u64 *advt_reg,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_an_advt_reg *resp =
+ (struct i40e_aqc_an_advt_reg *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_get_partner_advt);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ if (status != I40E_SUCCESS)
+ goto aq_get_partner_advt_exit;
+
+ *advt_reg = (u64)(LE16_TO_CPU(resp->local_an_reg1)) << 32;
+ *advt_reg |= LE32_TO_CPU(resp->local_an_reg0);
+
+aq_get_partner_advt_exit:
+ return status;
+}
+
+/**
+ * i40e_aq_set_lb_modes
+ * @hw: pointer to the hw struct
+ * @lb_modes: loopback mode to be set
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Sets loopback modes.
+ **/
+enum i40e_status_code i40e_aq_set_lb_modes(struct i40e_hw *hw,
+ u16 lb_modes,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_set_lb_mode *cmd =
+ (struct i40e_aqc_set_lb_mode *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_lb_modes);
+
+ cmd->lb_mode = CPU_TO_LE16(lb_modes);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_set_phy_debug
+ * @hw: pointer to the hw struct
+ * @cmd_flags: debug command flags
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Reset the external PHY.
+ **/
+enum i40e_status_code i40e_aq_set_phy_debug(struct i40e_hw *hw, u8 cmd_flags,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_set_phy_debug *cmd =
+ (struct i40e_aqc_set_phy_debug *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_phy_debug);
+
+ cmd->command_flags = cmd_flags;
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_add_vsi
+ * @hw: pointer to the hw struct
+ * @vsi_ctx: pointer to a vsi context struct
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Add a VSI context to the hardware.
+**/
+enum i40e_status_code i40e_aq_add_vsi(struct i40e_hw *hw,
+ struct i40e_vsi_context *vsi_ctx,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_add_get_update_vsi *cmd =
+ (struct i40e_aqc_add_get_update_vsi *)&desc.params.raw;
+ struct i40e_aqc_add_get_update_vsi_completion *resp =
+ (struct i40e_aqc_add_get_update_vsi_completion *)
+ &desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_add_vsi);
+
+ cmd->uplink_seid = CPU_TO_LE16(vsi_ctx->uplink_seid);
+ cmd->connection_type = vsi_ctx->connection_type;
+ cmd->vf_id = vsi_ctx->vf_num;
+ cmd->vsi_flags = CPU_TO_LE16(vsi_ctx->flags);
+
+ desc.flags |= CPU_TO_LE16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+
+ status = i40e_asq_send_command(hw, &desc, &vsi_ctx->info,
+ sizeof(vsi_ctx->info), cmd_details);
+
+ if (status != I40E_SUCCESS)
+ goto aq_add_vsi_exit;
+
+ vsi_ctx->seid = LE16_TO_CPU(resp->seid);
+ vsi_ctx->vsi_number = LE16_TO_CPU(resp->vsi_number);
+ vsi_ctx->vsis_allocated = LE16_TO_CPU(resp->vsi_used);
+ vsi_ctx->vsis_unallocated = LE16_TO_CPU(resp->vsi_free);
+
+aq_add_vsi_exit:
+ return status;
+}
+
+/**
+ * i40e_aq_set_default_vsi
+ * @hw: pointer to the hw struct
+ * @seid: vsi number
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_set_default_vsi(struct i40e_hw *hw,
+ u16 seid,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+ (struct i40e_aqc_set_vsi_promiscuous_modes *)
+ &desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_vsi_promiscuous_modes);
+
+ cmd->promiscuous_flags = CPU_TO_LE16(I40E_AQC_SET_VSI_DEFAULT);
+ cmd->valid_flags = CPU_TO_LE16(I40E_AQC_SET_VSI_DEFAULT);
+ cmd->seid = CPU_TO_LE16(seid);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_set_vsi_unicast_promiscuous
+ * @hw: pointer to the hw struct
+ * @seid: vsi number
+ * @set: set unicast promiscuous enable/disable
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_set_vsi_unicast_promiscuous(struct i40e_hw *hw,
+ u16 seid, bool set,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+ (struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw;
+ enum i40e_status_code status;
+ u16 flags = 0;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_vsi_promiscuous_modes);
+
+ if (set)
+ flags |= I40E_AQC_SET_VSI_PROMISC_UNICAST;
+
+ cmd->promiscuous_flags = CPU_TO_LE16(flags);
+
+ cmd->valid_flags = CPU_TO_LE16(I40E_AQC_SET_VSI_PROMISC_UNICAST);
+
+ cmd->seid = CPU_TO_LE16(seid);
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_set_vsi_multicast_promiscuous
+ * @hw: pointer to the hw struct
+ * @seid: vsi number
+ * @set: set multicast promiscuous enable/disable
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_set_vsi_multicast_promiscuous(struct i40e_hw *hw,
+ u16 seid, bool set, struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+ (struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw;
+ enum i40e_status_code status;
+ u16 flags = 0;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_vsi_promiscuous_modes);
+
+ if (set)
+ flags |= I40E_AQC_SET_VSI_PROMISC_MULTICAST;
+
+ cmd->promiscuous_flags = CPU_TO_LE16(flags);
+
+ cmd->valid_flags = CPU_TO_LE16(I40E_AQC_SET_VSI_PROMISC_MULTICAST);
+
+ cmd->seid = CPU_TO_LE16(seid);
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_set_vsi_mc_promisc_on_vlan
+ * @hw: pointer to the hw struct
+ * @seid: vsi number
+ * @enable: set MAC L2 layer unicast promiscuous enable/disable for a given VLAN
+ * @vid: The VLAN tag filter - capture any multicast packet with this VLAN tag
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_set_vsi_mc_promisc_on_vlan(struct i40e_hw *hw,
+ u16 seid, bool enable, u16 vid,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+ (struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw;
+ enum i40e_status_code status;
+ u16 flags = 0;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_vsi_promiscuous_modes);
+
+ if (enable)
+ flags |= I40E_AQC_SET_VSI_PROMISC_MULTICAST;
+
+ cmd->promiscuous_flags = CPU_TO_LE16(flags);
+ cmd->valid_flags = CPU_TO_LE16(I40E_AQC_SET_VSI_PROMISC_MULTICAST);
+ cmd->seid = CPU_TO_LE16(seid);
+ cmd->vlan_tag = CPU_TO_LE16(vid | I40E_AQC_SET_VSI_VLAN_VALID);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_set_vsi_uc_promisc_on_vlan
+ * @hw: pointer to the hw struct
+ * @seid: vsi number
+ * @enable: set MAC L2 layer unicast promiscuous enable/disable for a given VLAN
+ * @vid: The VLAN tag filter - capture any unicast packet with this VLAN tag
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw,
+ u16 seid, bool enable, u16 vid,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+ (struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw;
+ enum i40e_status_code status;
+ u16 flags = 0;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_vsi_promiscuous_modes);
+
+ if (enable)
+ flags |= I40E_AQC_SET_VSI_PROMISC_UNICAST;
+
+ cmd->promiscuous_flags = CPU_TO_LE16(flags);
+ cmd->valid_flags = CPU_TO_LE16(I40E_AQC_SET_VSI_PROMISC_UNICAST);
+ cmd->seid = CPU_TO_LE16(seid);
+ cmd->vlan_tag = CPU_TO_LE16(vid | I40E_AQC_SET_VSI_VLAN_VALID);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_set_vsi_broadcast
+ * @hw: pointer to the hw struct
+ * @seid: vsi number
+ * @set_filter: TRUE to set filter, FALSE to clear filter
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Set or clear the broadcast promiscuous flag (filter) for a given VSI.
+ **/
+enum i40e_status_code i40e_aq_set_vsi_broadcast(struct i40e_hw *hw,
+ u16 seid, bool set_filter,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+ (struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_vsi_promiscuous_modes);
+
+ if (set_filter)
+ cmd->promiscuous_flags
+ |= CPU_TO_LE16(I40E_AQC_SET_VSI_PROMISC_BROADCAST);
+ else
+ cmd->promiscuous_flags
+ &= CPU_TO_LE16(~I40E_AQC_SET_VSI_PROMISC_BROADCAST);
+
+ cmd->valid_flags = CPU_TO_LE16(I40E_AQC_SET_VSI_PROMISC_BROADCAST);
+ cmd->seid = CPU_TO_LE16(seid);
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_get_vsi_params - get VSI configuration info
+ * @hw: pointer to the hw struct
+ * @vsi_ctx: pointer to a vsi context struct
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_get_vsi_params(struct i40e_hw *hw,
+ struct i40e_vsi_context *vsi_ctx,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_add_get_update_vsi *cmd =
+ (struct i40e_aqc_add_get_update_vsi *)&desc.params.raw;
+ struct i40e_aqc_add_get_update_vsi_completion *resp =
+ (struct i40e_aqc_add_get_update_vsi_completion *)
+ &desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_get_vsi_parameters);
+
+ cmd->uplink_seid = CPU_TO_LE16(vsi_ctx->seid);
+
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_BUF);
+
+ status = i40e_asq_send_command(hw, &desc, &vsi_ctx->info,
+ sizeof(vsi_ctx->info), NULL);
+
+ if (status != I40E_SUCCESS)
+ goto aq_get_vsi_params_exit;
+
+ vsi_ctx->seid = LE16_TO_CPU(resp->seid);
+ vsi_ctx->vsi_number = LE16_TO_CPU(resp->vsi_number);
+ vsi_ctx->vsis_allocated = LE16_TO_CPU(resp->vsi_used);
+ vsi_ctx->vsis_unallocated = LE16_TO_CPU(resp->vsi_free);
+
+aq_get_vsi_params_exit:
+ return status;
+}
+
+/**
+ * i40e_aq_update_vsi_params
+ * @hw: pointer to the hw struct
+ * @vsi_ctx: pointer to a vsi context struct
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Update a VSI context.
+ **/
+enum i40e_status_code i40e_aq_update_vsi_params(struct i40e_hw *hw,
+ struct i40e_vsi_context *vsi_ctx,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_add_get_update_vsi *cmd =
+ (struct i40e_aqc_add_get_update_vsi *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_update_vsi_parameters);
+ cmd->uplink_seid = CPU_TO_LE16(vsi_ctx->seid);
+
+ desc.flags |= CPU_TO_LE16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+
+ status = i40e_asq_send_command(hw, &desc, &vsi_ctx->info,
+ sizeof(vsi_ctx->info), cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_get_switch_config
+ * @hw: pointer to the hardware structure
+ * @buf: pointer to the result buffer
+ * @buf_size: length of input buffer
+ * @start_seid: seid to start for the report, 0 == beginning
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Fill the buf with switch configuration returned from AdminQ command
+ **/
+enum i40e_status_code i40e_aq_get_switch_config(struct i40e_hw *hw,
+ struct i40e_aqc_get_switch_config_resp *buf,
+ u16 buf_size, u16 *start_seid,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_switch_seid *scfg =
+ (struct i40e_aqc_switch_seid *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_get_switch_config);
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_BUF);
+ if (buf_size > I40E_AQ_LARGE_BUF)
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_LB);
+ scfg->seid = CPU_TO_LE16(*start_seid);
+
+ status = i40e_asq_send_command(hw, &desc, buf, buf_size, cmd_details);
+ *start_seid = LE16_TO_CPU(scfg->seid);
+
+ return status;
+}
+
+/**
+ * i40e_aq_get_firmware_version
+ * @hw: pointer to the hw struct
+ * @fw_major_version: firmware major version
+ * @fw_minor_version: firmware minor version
+ * @fw_build: firmware build number
+ * @api_major_version: major queue version
+ * @api_minor_version: minor queue version
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Get the firmware version from the admin queue commands
+ **/
+enum i40e_status_code i40e_aq_get_firmware_version(struct i40e_hw *hw,
+ u16 *fw_major_version, u16 *fw_minor_version,
+ u32 *fw_build,
+ u16 *api_major_version, u16 *api_minor_version,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_get_version *resp =
+ (struct i40e_aqc_get_version *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_get_version);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ if (status == I40E_SUCCESS) {
+ if (fw_major_version != NULL)
+ *fw_major_version = LE16_TO_CPU(resp->fw_major);
+ if (fw_minor_version != NULL)
+ *fw_minor_version = LE16_TO_CPU(resp->fw_minor);
+ if (fw_build != NULL)
+ *fw_build = LE32_TO_CPU(resp->fw_build);
+ if (api_major_version != NULL)
+ *api_major_version = LE16_TO_CPU(resp->api_major);
+ if (api_minor_version != NULL)
+ *api_minor_version = LE16_TO_CPU(resp->api_minor);
+
+ /* A workaround to fix the API version in SW */
+ if (api_major_version && api_minor_version &&
+ fw_major_version && fw_minor_version &&
+ ((*api_major_version == 1) && (*api_minor_version == 1)) &&
+ (((*fw_major_version == 4) && (*fw_minor_version >= 2)) ||
+ (*fw_major_version > 4)))
+ *api_minor_version = 2;
+ }
+
+ return status;
+}
+
+/**
+ * i40e_aq_send_driver_version
+ * @hw: pointer to the hw struct
+ * @dv: driver's major, minor version
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Send the driver version to the firmware
+ **/
+enum i40e_status_code i40e_aq_send_driver_version(struct i40e_hw *hw,
+ struct i40e_driver_version *dv,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_driver_version *cmd =
+ (struct i40e_aqc_driver_version *)&desc.params.raw;
+ enum i40e_status_code status;
+ u16 len;
+
+ if (dv == NULL)
+ return I40E_ERR_PARAM;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_driver_version);
+
+ desc.flags |= CPU_TO_LE16(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD);
+ cmd->driver_major_ver = dv->major_version;
+ cmd->driver_minor_ver = dv->minor_version;
+ cmd->driver_build_ver = dv->build_version;
+ cmd->driver_subbuild_ver = dv->subbuild_version;
+
+ len = 0;
+ while (len < sizeof(dv->driver_string) &&
+ (dv->driver_string[len] < 0x80) &&
+ dv->driver_string[len])
+ len++;
+ status = i40e_asq_send_command(hw, &desc, dv->driver_string,
+ len, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_get_link_status - get status of the HW network link
+ * @hw: pointer to the hw struct
+ * @link_up: pointer to bool (TRUE/FALSE = linkup/linkdown)
+ *
+ * Variable link_up TRUE if link is up, FALSE if link is down.
+ * The variable link_up is invalid if returned value of status != I40E_SUCCESS
+ *
+ * Side effect: LinkStatusEvent reporting becomes enabled
+ **/
+enum i40e_status_code i40e_get_link_status(struct i40e_hw *hw, bool *link_up)
+{
+ enum i40e_status_code status = I40E_SUCCESS;
+
+ if (hw->phy.get_link_info) {
+ status = i40e_update_link_info(hw);
+
+ if (status != I40E_SUCCESS)
+ i40e_debug(hw, I40E_DEBUG_LINK, "get link failed: status %d\n",
+ status);
+ }
+
+ *link_up = hw->phy.link_info.link_info & I40E_AQ_LINK_UP;
+
+ return status;
+}
+
+/**
+ * i40e_updatelink_status - update status of the HW network link
+ * @hw: pointer to the hw struct
+ **/
+enum i40e_status_code i40e_update_link_info(struct i40e_hw *hw)
+{
+ struct i40e_aq_get_phy_abilities_resp abilities;
+ enum i40e_status_code status = I40E_SUCCESS;
+
+ status = i40e_aq_get_link_info(hw, TRUE, NULL, NULL);
+ if (status)
+ return status;
+
+ status = i40e_aq_get_phy_capabilities(hw, FALSE, false, &abilities,
+ NULL);
+ if (status)
+ return status;
+
+ memcpy(hw->phy.link_info.module_type, &abilities.module_type,
+ sizeof(hw->phy.link_info.module_type));
+
+ return status;
+}
+
+
+/**
+ * i40e_get_link_speed
+ * @hw: pointer to the hw struct
+ *
+ * Returns the link speed of the adapter.
+ **/
+enum i40e_aq_link_speed i40e_get_link_speed(struct i40e_hw *hw)
+{
+ enum i40e_aq_link_speed speed = I40E_LINK_SPEED_UNKNOWN;
+ enum i40e_status_code status = I40E_SUCCESS;
+
+ if (hw->phy.get_link_info) {
+ status = i40e_aq_get_link_info(hw, TRUE, NULL, NULL);
+
+ if (status != I40E_SUCCESS)
+ goto i40e_link_speed_exit;
+ }
+
+ speed = hw->phy.link_info.link_speed;
+
+i40e_link_speed_exit:
+ return speed;
+}
+
+/**
+ * i40e_aq_add_veb - Insert a VEB between the VSI and the MAC
+ * @hw: pointer to the hw struct
+ * @uplink_seid: the MAC or other gizmo SEID
+ * @downlink_seid: the VSI SEID
+ * @enabled_tc: bitmap of TCs to be enabled
+ * @default_port: TRUE for default port VSI, FALSE for control port
+ * @enable_l2_filtering: TRUE to add L2 filter table rules to regular forwarding rules for cloud support
+ * @veb_seid: pointer to where to put the resulting VEB SEID
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * This asks the FW to add a VEB between the uplink and downlink
+ * elements. If the uplink SEID is 0, this will be a floating VEB.
+ **/
+enum i40e_status_code i40e_aq_add_veb(struct i40e_hw *hw, u16 uplink_seid,
+ u16 downlink_seid, u8 enabled_tc,
+ bool default_port, bool enable_l2_filtering,
+ u16 *veb_seid,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_add_veb *cmd =
+ (struct i40e_aqc_add_veb *)&desc.params.raw;
+ struct i40e_aqc_add_veb_completion *resp =
+ (struct i40e_aqc_add_veb_completion *)&desc.params.raw;
+ enum i40e_status_code status;
+ u16 veb_flags = 0;
+
+ /* SEIDs need to either both be set or both be 0 for floating VEB */
+ if (!!uplink_seid != !!downlink_seid)
+ return I40E_ERR_PARAM;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_add_veb);
+
+ cmd->uplink_seid = CPU_TO_LE16(uplink_seid);
+ cmd->downlink_seid = CPU_TO_LE16(downlink_seid);
+ cmd->enable_tcs = enabled_tc;
+ if (!uplink_seid)
+ veb_flags |= I40E_AQC_ADD_VEB_FLOATING;
+ if (default_port)
+ veb_flags |= I40E_AQC_ADD_VEB_PORT_TYPE_DEFAULT;
+ else
+ veb_flags |= I40E_AQC_ADD_VEB_PORT_TYPE_DATA;
+
+ if (enable_l2_filtering)
+ veb_flags |= I40E_AQC_ADD_VEB_ENABLE_L2_FILTER;
+
+ cmd->veb_flags = CPU_TO_LE16(veb_flags);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ if (!status && veb_seid)
+ *veb_seid = LE16_TO_CPU(resp->veb_seid);
+
+ return status;
+}
+
+/**
+ * i40e_aq_get_veb_parameters - Retrieve VEB parameters
+ * @hw: pointer to the hw struct
+ * @veb_seid: the SEID of the VEB to query
+ * @switch_id: the uplink switch id
+ * @floating: set to TRUE if the VEB is floating
+ * @statistic_index: index of the stats counter block for this VEB
+ * @vebs_used: number of VEB's used by function
+ * @vebs_free: total VEB's not reserved by any function
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * This retrieves the parameters for a particular VEB, specified by
+ * uplink_seid, and returns them to the caller.
+ **/
+enum i40e_status_code i40e_aq_get_veb_parameters(struct i40e_hw *hw,
+ u16 veb_seid, u16 *switch_id,
+ bool *floating, u16 *statistic_index,
+ u16 *vebs_used, u16 *vebs_free,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_get_veb_parameters_completion *cmd_resp =
+ (struct i40e_aqc_get_veb_parameters_completion *)
+ &desc.params.raw;
+ enum i40e_status_code status;
+
+ if (veb_seid == 0)
+ return I40E_ERR_PARAM;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_get_veb_parameters);
+ cmd_resp->seid = CPU_TO_LE16(veb_seid);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+ if (status)
+ goto get_veb_exit;
+
+ if (switch_id)
+ *switch_id = LE16_TO_CPU(cmd_resp->switch_id);
+ if (statistic_index)
+ *statistic_index = LE16_TO_CPU(cmd_resp->statistic_index);
+ if (vebs_used)
+ *vebs_used = LE16_TO_CPU(cmd_resp->vebs_used);
+ if (vebs_free)
+ *vebs_free = LE16_TO_CPU(cmd_resp->vebs_free);
+ if (floating) {
+ u16 flags = LE16_TO_CPU(cmd_resp->veb_flags);
+
+ if (flags & I40E_AQC_ADD_VEB_FLOATING)
+ *floating = TRUE;
+ else
+ *floating = FALSE;
+ }
+
+get_veb_exit:
+ return status;
+}
+
+/**
+ * i40e_aq_add_macvlan
+ * @hw: pointer to the hw struct
+ * @seid: VSI for the mac address
+ * @mv_list: list of macvlans to be added
+ * @count: length of the list
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Add MAC/VLAN addresses to the HW filtering
+ **/
+enum i40e_status_code i40e_aq_add_macvlan(struct i40e_hw *hw, u16 seid,
+ struct i40e_aqc_add_macvlan_element_data *mv_list,
+ u16 count, struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_macvlan *cmd =
+ (struct i40e_aqc_macvlan *)&desc.params.raw;
+ enum i40e_status_code status;
+ u16 buf_size;
+
+ if (count == 0 || !mv_list || !hw)
+ return I40E_ERR_PARAM;
+
+ buf_size = count * sizeof(*mv_list);
+
+ /* prep the rest of the request */
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_add_macvlan);
+ cmd->num_addresses = CPU_TO_LE16(count);
+ cmd->seid[0] = CPU_TO_LE16(I40E_AQC_MACVLAN_CMD_SEID_VALID | seid);
+ cmd->seid[1] = 0;
+ cmd->seid[2] = 0;
+
+ desc.flags |= CPU_TO_LE16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+ if (buf_size > I40E_AQ_LARGE_BUF)
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_LB);
+
+ status = i40e_asq_send_command(hw, &desc, mv_list, buf_size,
+ cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_remove_macvlan
+ * @hw: pointer to the hw struct
+ * @seid: VSI for the mac address
+ * @mv_list: list of macvlans to be removed
+ * @count: length of the list
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Remove MAC/VLAN addresses from the HW filtering
+ **/
+enum i40e_status_code i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 seid,
+ struct i40e_aqc_remove_macvlan_element_data *mv_list,
+ u16 count, struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_macvlan *cmd =
+ (struct i40e_aqc_macvlan *)&desc.params.raw;
+ enum i40e_status_code status;
+ u16 buf_size;
+
+ if (count == 0 || !mv_list || !hw)
+ return I40E_ERR_PARAM;
+
+ buf_size = count * sizeof(*mv_list);
+
+ /* prep the rest of the request */
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_remove_macvlan);
+ cmd->num_addresses = CPU_TO_LE16(count);
+ cmd->seid[0] = CPU_TO_LE16(I40E_AQC_MACVLAN_CMD_SEID_VALID | seid);
+ cmd->seid[1] = 0;
+ cmd->seid[2] = 0;
+
+ desc.flags |= CPU_TO_LE16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+ if (buf_size > I40E_AQ_LARGE_BUF)
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_LB);
+
+ status = i40e_asq_send_command(hw, &desc, mv_list, buf_size,
+ cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_add_vlan - Add VLAN ids to the HW filtering
+ * @hw: pointer to the hw struct
+ * @seid: VSI for the vlan filters
+ * @v_list: list of vlan filters to be added
+ * @count: length of the list
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_add_vlan(struct i40e_hw *hw, u16 seid,
+ struct i40e_aqc_add_remove_vlan_element_data *v_list,
+ u8 count, struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_macvlan *cmd =
+ (struct i40e_aqc_macvlan *)&desc.params.raw;
+ enum i40e_status_code status;
+ u16 buf_size;
+
+ if (count == 0 || !v_list || !hw)
+ return I40E_ERR_PARAM;
+
+ buf_size = count * sizeof(*v_list);
+
+ /* prep the rest of the request */
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_add_vlan);
+ cmd->num_addresses = CPU_TO_LE16(count);
+ cmd->seid[0] = CPU_TO_LE16(seid | I40E_AQC_MACVLAN_CMD_SEID_VALID);
+ cmd->seid[1] = 0;
+ cmd->seid[2] = 0;
+
+ desc.flags |= CPU_TO_LE16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+ if (buf_size > I40E_AQ_LARGE_BUF)
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_LB);
+
+ status = i40e_asq_send_command(hw, &desc, v_list, buf_size,
+ cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_remove_vlan - Remove VLANs from the HW filtering
+ * @hw: pointer to the hw struct
+ * @seid: VSI for the vlan filters
+ * @v_list: list of macvlans to be removed
+ * @count: length of the list
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_remove_vlan(struct i40e_hw *hw, u16 seid,
+ struct i40e_aqc_add_remove_vlan_element_data *v_list,
+ u8 count, struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_macvlan *cmd =
+ (struct i40e_aqc_macvlan *)&desc.params.raw;
+ enum i40e_status_code status;
+ u16 buf_size;
+
+ if (count == 0 || !v_list || !hw)
+ return I40E_ERR_PARAM;
+
+ buf_size = count * sizeof(*v_list);
+
+ /* prep the rest of the request */
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_remove_vlan);
+ cmd->num_addresses = CPU_TO_LE16(count);
+ cmd->seid[0] = CPU_TO_LE16(seid | I40E_AQC_MACVLAN_CMD_SEID_VALID);
+ cmd->seid[1] = 0;
+ cmd->seid[2] = 0;
+
+ desc.flags |= CPU_TO_LE16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+ if (buf_size > I40E_AQ_LARGE_BUF)
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_LB);
+
+ status = i40e_asq_send_command(hw, &desc, v_list, buf_size,
+ cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_send_msg_to_vf
+ * @hw: pointer to the hardware structure
+ * @vfid: vf id to send msg
+ * @v_opcode: opcodes for VF-PF communication
+ * @v_retval: return error code
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ * @cmd_details: pointer to command details
+ *
+ * send msg to vf
+ **/
+enum i40e_status_code i40e_aq_send_msg_to_vf(struct i40e_hw *hw, u16 vfid,
+ u32 v_opcode, u32 v_retval, u8 *msg, u16 msglen,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_pf_vf_message *cmd =
+ (struct i40e_aqc_pf_vf_message *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_send_msg_to_vf);
+ cmd->id = CPU_TO_LE32(vfid);
+ desc.cookie_high = CPU_TO_LE32(v_opcode);
+ desc.cookie_low = CPU_TO_LE32(v_retval);
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_SI);
+ if (msglen) {
+ desc.flags |= CPU_TO_LE16((u16)(I40E_AQ_FLAG_BUF |
+ I40E_AQ_FLAG_RD));
+ if (msglen > I40E_AQ_LARGE_BUF)
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_LB);
+ desc.datalen = CPU_TO_LE16(msglen);
+ }
+ status = i40e_asq_send_command(hw, &desc, msg, msglen, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_debug_read_register
+ * @hw: pointer to the hw struct
+ * @reg_addr: register address
+ * @reg_val: register value
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Read the register using the admin queue commands
+ **/
+enum i40e_status_code i40e_aq_debug_read_register(struct i40e_hw *hw,
+ u32 reg_addr, u64 *reg_val,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_debug_reg_read_write *cmd_resp =
+ (struct i40e_aqc_debug_reg_read_write *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ if (reg_val == NULL)
+ return I40E_ERR_PARAM;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_debug_read_reg);
+
+ cmd_resp->address = CPU_TO_LE32(reg_addr);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ if (status == I40E_SUCCESS) {
+ *reg_val = ((u64)LE32_TO_CPU(cmd_resp->value_high) << 32) |
+ (u64)LE32_TO_CPU(cmd_resp->value_low);
+ }
+
+ return status;
+}
+
+/**
+ * i40e_aq_debug_write_register
+ * @hw: pointer to the hw struct
+ * @reg_addr: register address
+ * @reg_val: register value
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Write to a register using the admin queue commands
+ **/
+enum i40e_status_code i40e_aq_debug_write_register(struct i40e_hw *hw,
+ u32 reg_addr, u64 reg_val,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_debug_reg_read_write *cmd =
+ (struct i40e_aqc_debug_reg_read_write *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_debug_write_reg);
+
+ cmd->address = CPU_TO_LE32(reg_addr);
+ cmd->value_high = CPU_TO_LE32((u32)(reg_val >> 32));
+ cmd->value_low = CPU_TO_LE32((u32)(reg_val & 0xFFFFFFFF));
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_get_hmc_resource_profile
+ * @hw: pointer to the hw struct
+ * @profile: type of profile the HMC is to be set as
+ * @pe_vf_enabled_count: the number of PE enabled VFs the system has
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * query the HMC profile of the device.
+ **/
+enum i40e_status_code i40e_aq_get_hmc_resource_profile(struct i40e_hw *hw,
+ enum i40e_aq_hmc_profile *profile,
+ u8 *pe_vf_enabled_count,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aq_get_set_hmc_resource_profile *resp =
+ (struct i40e_aq_get_set_hmc_resource_profile *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_query_hmc_resource_profile);
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ *profile = (enum i40e_aq_hmc_profile)(resp->pm_profile &
+ I40E_AQ_GET_HMC_RESOURCE_PROFILE_PM_MASK);
+ *pe_vf_enabled_count = resp->pe_vf_enabled &
+ I40E_AQ_GET_HMC_RESOURCE_PROFILE_COUNT_MASK;
+
+ return status;
+}
+
+/**
+ * i40e_aq_set_hmc_resource_profile
+ * @hw: pointer to the hw struct
+ * @profile: type of profile the HMC is to be set as
+ * @pe_vf_enabled_count: the number of PE enabled VFs the system has
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * set the HMC profile of the device.
+ **/
+enum i40e_status_code i40e_aq_set_hmc_resource_profile(struct i40e_hw *hw,
+ enum i40e_aq_hmc_profile profile,
+ u8 pe_vf_enabled_count,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aq_get_set_hmc_resource_profile *cmd =
+ (struct i40e_aq_get_set_hmc_resource_profile *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_hmc_resource_profile);
+
+ cmd->pm_profile = (u8)profile;
+ cmd->pe_vf_enabled = pe_vf_enabled_count;
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_request_resource
+ * @hw: pointer to the hw struct
+ * @resource: resource id
+ * @access: access type
+ * @sdp_number: resource number
+ * @timeout: the maximum time in ms that the driver may hold the resource
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * requests common resource using the admin queue commands
+ **/
+enum i40e_status_code i40e_aq_request_resource(struct i40e_hw *hw,
+ enum i40e_aq_resources_ids resource,
+ enum i40e_aq_resource_access_type access,
+ u8 sdp_number, u64 *timeout,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_request_resource *cmd_resp =
+ (struct i40e_aqc_request_resource *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ DEBUGFUNC("i40e_aq_request_resource");
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_request_resource);
+
+ cmd_resp->resource_id = CPU_TO_LE16(resource);
+ cmd_resp->access_type = CPU_TO_LE16(access);
+ cmd_resp->resource_number = CPU_TO_LE32(sdp_number);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+ /* The completion specifies the maximum time in ms that the driver
+ * may hold the resource in the Timeout field.
+ * If the resource is held by someone else, the command completes with
+ * busy return value and the timeout field indicates the maximum time
+ * the current owner of the resource has to free it.
+ */
+ if (status == I40E_SUCCESS || hw->aq.asq_last_status == I40E_AQ_RC_EBUSY)
+ *timeout = LE32_TO_CPU(cmd_resp->timeout);
+
+ return status;
+}
+
+/**
+ * i40e_aq_release_resource
+ * @hw: pointer to the hw struct
+ * @resource: resource id
+ * @sdp_number: resource number
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * release common resource using the admin queue commands
+ **/
+enum i40e_status_code i40e_aq_release_resource(struct i40e_hw *hw,
+ enum i40e_aq_resources_ids resource,
+ u8 sdp_number,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_request_resource *cmd =
+ (struct i40e_aqc_request_resource *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ DEBUGFUNC("i40e_aq_release_resource");
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_release_resource);
+
+ cmd->resource_id = CPU_TO_LE16(resource);
+ cmd->resource_number = CPU_TO_LE32(sdp_number);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_read_nvm
+ * @hw: pointer to the hw struct
+ * @module_pointer: module pointer location in words from the NVM beginning
+ * @offset: byte offset from the module beginning
+ * @length: length of the section to be read (in bytes from the offset)
+ * @data: command buffer (size [bytes] = length)
+ * @last_command: tells if this is the last command in a series
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Read the NVM using the admin queue commands
+ **/
+enum i40e_status_code i40e_aq_read_nvm(struct i40e_hw *hw, u8 module_pointer,
+ u32 offset, u16 length, void *data,
+ bool last_command,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_nvm_update *cmd =
+ (struct i40e_aqc_nvm_update *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ DEBUGFUNC("i40e_aq_read_nvm");
+
+ /* In offset the highest byte must be zeroed. */
+ if (offset & 0xFF000000) {
+ status = I40E_ERR_PARAM;
+ goto i40e_aq_read_nvm_exit;
+ }
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_nvm_read);
+
+ /* If this is the last command in a series, set the proper flag. */
+ if (last_command)
+ cmd->command_flags |= I40E_AQ_NVM_LAST_CMD;
+ cmd->module_pointer = module_pointer;
+ cmd->offset = CPU_TO_LE32(offset);
+ cmd->length = CPU_TO_LE16(length);
+
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_BUF);
+ if (length > I40E_AQ_LARGE_BUF)
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_LB);
+
+ status = i40e_asq_send_command(hw, &desc, data, length, cmd_details);
+
+i40e_aq_read_nvm_exit:
+ return status;
+}
+
+/**
+ * i40e_aq_read_nvm_config - read an nvm config block
+ * @hw: pointer to the hw struct
+ * @cmd_flags: NVM access admin command bits
+ * @field_id: field or feature id
+ * @data: buffer for result
+ * @buf_size: buffer size
+ * @element_count: pointer to count of elements read by FW
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_read_nvm_config(struct i40e_hw *hw,
+ u8 cmd_flags, u32 field_id, void *data,
+ u16 buf_size, u16 *element_count,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_nvm_config_read *cmd =
+ (struct i40e_aqc_nvm_config_read *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_nvm_config_read);
+ desc.flags |= CPU_TO_LE16((u16)(I40E_AQ_FLAG_BUF));
+ if (buf_size > I40E_AQ_LARGE_BUF)
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_LB);
+
+ cmd->cmd_flags = CPU_TO_LE16(cmd_flags);
+ cmd->element_id = CPU_TO_LE16((u16)(0xffff & field_id));
+ if (cmd_flags & I40E_AQ_ANVM_FEATURE_OR_IMMEDIATE_MASK)
+ cmd->element_id_msw = CPU_TO_LE16((u16)(field_id >> 16));
+ else
+ cmd->element_id_msw = 0;
+
+ status = i40e_asq_send_command(hw, &desc, data, buf_size, cmd_details);
+
+ if (!status && element_count)
+ *element_count = LE16_TO_CPU(cmd->element_count);
+
+ return status;
+}
+
+/**
+ * i40e_aq_write_nvm_config - write an nvm config block
+ * @hw: pointer to the hw struct
+ * @cmd_flags: NVM access admin command bits
+ * @data: buffer for result
+ * @buf_size: buffer size
+ * @element_count: count of elements to be written
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_write_nvm_config(struct i40e_hw *hw,
+ u8 cmd_flags, void *data, u16 buf_size,
+ u16 element_count,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_nvm_config_write *cmd =
+ (struct i40e_aqc_nvm_config_write *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_nvm_config_write);
+ desc.flags |= CPU_TO_LE16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+ if (buf_size > I40E_AQ_LARGE_BUF)
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_LB);
+
+ cmd->element_count = CPU_TO_LE16(element_count);
+ cmd->cmd_flags = CPU_TO_LE16(cmd_flags);
+ status = i40e_asq_send_command(hw, &desc, data, buf_size, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_oem_post_update - triggers an OEM specific flow after update
+ * @hw: pointer to the hw struct
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_oem_post_update(struct i40e_hw *hw,
+ void *buff, u16 buff_size,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ enum i40e_status_code status;
+
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_oem_post_update);
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+ if (status && LE16_TO_CPU(desc.retval) == I40E_AQ_RC_ESRCH)
+ status = I40E_ERR_NOT_IMPLEMENTED;
+
+ return status;
+}
+
+/**
+ * i40e_aq_erase_nvm
+ * @hw: pointer to the hw struct
+ * @module_pointer: module pointer location in words from the NVM beginning
+ * @offset: offset in the module (expressed in 4 KB from module's beginning)
+ * @length: length of the section to be erased (expressed in 4 KB)
+ * @last_command: tells if this is the last command in a series
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Erase the NVM sector using the admin queue commands
+ **/
+enum i40e_status_code i40e_aq_erase_nvm(struct i40e_hw *hw, u8 module_pointer,
+ u32 offset, u16 length, bool last_command,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_nvm_update *cmd =
+ (struct i40e_aqc_nvm_update *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ DEBUGFUNC("i40e_aq_erase_nvm");
+
+ /* In offset the highest byte must be zeroed. */
+ if (offset & 0xFF000000) {
+ status = I40E_ERR_PARAM;
+ goto i40e_aq_erase_nvm_exit;
+ }
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_nvm_erase);
+
+ /* If this is the last command in a series, set the proper flag. */
+ if (last_command)
+ cmd->command_flags |= I40E_AQ_NVM_LAST_CMD;
+ cmd->module_pointer = module_pointer;
+ cmd->offset = CPU_TO_LE32(offset);
+ cmd->length = CPU_TO_LE16(length);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+i40e_aq_erase_nvm_exit:
+ return status;
+}
+
+#define I40E_DEV_FUNC_CAP_SWITCH_MODE 0x01
+#define I40E_DEV_FUNC_CAP_MGMT_MODE 0x02
+#define I40E_DEV_FUNC_CAP_NPAR 0x03
+#define I40E_DEV_FUNC_CAP_OS2BMC 0x04
+#define I40E_DEV_FUNC_CAP_VALID_FUNC 0x05
+#define I40E_DEV_FUNC_CAP_SRIOV_1_1 0x12
+#define I40E_DEV_FUNC_CAP_VF 0x13
+#define I40E_DEV_FUNC_CAP_VMDQ 0x14
+#define I40E_DEV_FUNC_CAP_802_1_QBG 0x15
+#define I40E_DEV_FUNC_CAP_802_1_QBH 0x16
+#define I40E_DEV_FUNC_CAP_VSI 0x17
+#define I40E_DEV_FUNC_CAP_DCB 0x18
+#define I40E_DEV_FUNC_CAP_FCOE 0x21
+#define I40E_DEV_FUNC_CAP_ISCSI 0x22
+#define I40E_DEV_FUNC_CAP_RSS 0x40
+#define I40E_DEV_FUNC_CAP_RX_QUEUES 0x41
+#define I40E_DEV_FUNC_CAP_TX_QUEUES 0x42
+#define I40E_DEV_FUNC_CAP_MSIX 0x43
+#define I40E_DEV_FUNC_CAP_MSIX_VF 0x44
+#define I40E_DEV_FUNC_CAP_FLOW_DIRECTOR 0x45
+#define I40E_DEV_FUNC_CAP_IEEE_1588 0x46
+#define I40E_DEV_FUNC_CAP_FLEX10 0xF1
+#define I40E_DEV_FUNC_CAP_CEM 0xF2
+#define I40E_DEV_FUNC_CAP_IWARP 0x51
+#define I40E_DEV_FUNC_CAP_LED 0x61
+#define I40E_DEV_FUNC_CAP_SDP 0x62
+#define I40E_DEV_FUNC_CAP_MDIO 0x63
+#define I40E_DEV_FUNC_CAP_WR_CSR_PROT 0x64
+
+/**
+ * i40e_parse_discover_capabilities
+ * @hw: pointer to the hw struct
+ * @buff: pointer to a buffer containing device/function capability records
+ * @cap_count: number of capability records in the list
+ * @list_type_opc: type of capabilities list to parse
+ *
+ * Parse the device/function capabilities list.
+ **/
+static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
+ u32 cap_count,
+ enum i40e_admin_queue_opc list_type_opc)
+{
+ struct i40e_aqc_list_capabilities_element_resp *cap;
+ u32 valid_functions, num_functions;
+ u32 number, logical_id, phys_id;
+ struct i40e_hw_capabilities *p;
+ u8 major_rev;
+ u32 i = 0;
+ u16 id;
+
+ cap = (struct i40e_aqc_list_capabilities_element_resp *) buff;
+
+ if (list_type_opc == i40e_aqc_opc_list_dev_capabilities)
+ p = (struct i40e_hw_capabilities *)&hw->dev_caps;
+ else if (list_type_opc == i40e_aqc_opc_list_func_capabilities)
+ p = (struct i40e_hw_capabilities *)&hw->func_caps;
+ else
+ return;
+
+ for (i = 0; i < cap_count; i++, cap++) {
+ id = LE16_TO_CPU(cap->id);
+ number = LE32_TO_CPU(cap->number);
+ logical_id = LE32_TO_CPU(cap->logical_id);
+ phys_id = LE32_TO_CPU(cap->phys_id);
+ major_rev = cap->major_rev;
+
+ switch (id) {
+ case I40E_DEV_FUNC_CAP_SWITCH_MODE:
+ p->switch_mode = number;
+ break;
+ case I40E_DEV_FUNC_CAP_MGMT_MODE:
+ p->management_mode = number;
+ break;
+ case I40E_DEV_FUNC_CAP_NPAR:
+ p->npar_enable = number;
+ break;
+ case I40E_DEV_FUNC_CAP_OS2BMC:
+ p->os2bmc = number;
+ break;
+ case I40E_DEV_FUNC_CAP_VALID_FUNC:
+ p->valid_functions = number;
+ break;
+ case I40E_DEV_FUNC_CAP_SRIOV_1_1:
+ if (number == 1)
+ p->sr_iov_1_1 = TRUE;
+ break;
+ case I40E_DEV_FUNC_CAP_VF:
+ p->num_vfs = number;
+ p->vf_base_id = logical_id;
+ break;
+ case I40E_DEV_FUNC_CAP_VMDQ:
+ if (number == 1)
+ p->vmdq = TRUE;
+ break;
+ case I40E_DEV_FUNC_CAP_802_1_QBG:
+ if (number == 1)
+ p->evb_802_1_qbg = TRUE;
+ break;
+ case I40E_DEV_FUNC_CAP_802_1_QBH:
+ if (number == 1)
+ p->evb_802_1_qbh = TRUE;
+ break;
+ case I40E_DEV_FUNC_CAP_VSI:
+ p->num_vsis = number;
+ break;
+ case I40E_DEV_FUNC_CAP_DCB:
+ if (number == 1) {
+ p->dcb = TRUE;
+ p->enabled_tcmap = logical_id;
+ p->maxtc = phys_id;
+ }
+ break;
+ case I40E_DEV_FUNC_CAP_FCOE:
+ if (number == 1)
+ p->fcoe = TRUE;
+ break;
+ case I40E_DEV_FUNC_CAP_ISCSI:
+ if (number == 1)
+ p->iscsi = TRUE;
+ break;
+ case I40E_DEV_FUNC_CAP_RSS:
+ p->rss = TRUE;
+ p->rss_table_size = number;
+ p->rss_table_entry_width = logical_id;
+ break;
+ case I40E_DEV_FUNC_CAP_RX_QUEUES:
+ p->num_rx_qp = number;
+ p->base_queue = phys_id;
+ break;
+ case I40E_DEV_FUNC_CAP_TX_QUEUES:
+ p->num_tx_qp = number;
+ p->base_queue = phys_id;
+ break;
+ case I40E_DEV_FUNC_CAP_MSIX:
+ p->num_msix_vectors = number;
+ break;
+ case I40E_DEV_FUNC_CAP_MSIX_VF:
+ p->num_msix_vectors_vf = number;
+ break;
+ case I40E_DEV_FUNC_CAP_FLEX10:
+ if (major_rev == 1) {
+ if (number == 1) {
+ p->flex10_enable = TRUE;
+ p->flex10_capable = TRUE;
+ }
+ } else {
+ /* Capability revision >= 2 */
+ if (number & 1)
+ p->flex10_enable = TRUE;
+ if (number & 2)
+ p->flex10_capable = TRUE;
+ }
+ p->flex10_mode = logical_id;
+ p->flex10_status = phys_id;
+ break;
+ case I40E_DEV_FUNC_CAP_CEM:
+ if (number == 1)
+ p->mgmt_cem = TRUE;
+ break;
+ case I40E_DEV_FUNC_CAP_IWARP:
+ if (number == 1)
+ p->iwarp = TRUE;
+ break;
+ case I40E_DEV_FUNC_CAP_LED:
+ if (phys_id < I40E_HW_CAP_MAX_GPIO)
+ p->led[phys_id] = TRUE;
+ break;
+ case I40E_DEV_FUNC_CAP_SDP:
+ if (phys_id < I40E_HW_CAP_MAX_GPIO)
+ p->sdp[phys_id] = TRUE;
+ break;
+ case I40E_DEV_FUNC_CAP_MDIO:
+ if (number == 1) {
+ p->mdio_port_num = phys_id;
+ p->mdio_port_mode = logical_id;
+ }
+ break;
+ case I40E_DEV_FUNC_CAP_IEEE_1588:
+ if (number == 1)
+ p->ieee_1588 = TRUE;
+ break;
+ case I40E_DEV_FUNC_CAP_FLOW_DIRECTOR:
+ p->fd = TRUE;
+ p->fd_filters_guaranteed = number;
+ p->fd_filters_best_effort = logical_id;
+ break;
+ case I40E_DEV_FUNC_CAP_WR_CSR_PROT:
+ p->wr_csr_prot = (u64)number;
+ p->wr_csr_prot |= (u64)logical_id << 32;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (p->fcoe)
+ i40e_debug(hw, I40E_DEBUG_ALL, "device is FCoE capable\n");
+
+ /* Always disable FCoE if compiled without the I40E_FCOE_ENA flag */
+ p->fcoe = FALSE;
+
+ /* count the enabled ports (aka the "not disabled" ports) */
+ hw->num_ports = 0;
+ for (i = 0; i < 4; i++) {
+ u32 port_cfg_reg = I40E_PRTGEN_CNF + (4 * i);
+ u64 port_cfg = 0;
+
+ /* use AQ read to get the physical register offset instead
+ * of the port relative offset
+ */
+ i40e_aq_debug_read_register(hw, port_cfg_reg, &port_cfg, NULL);
+ if (!(port_cfg & I40E_PRTGEN_CNF_PORT_DIS_MASK))
+ hw->num_ports++;
+ }
+
+ valid_functions = p->valid_functions;
+ num_functions = 0;
+ while (valid_functions) {
+ if (valid_functions & 1)
+ num_functions++;
+ valid_functions >>= 1;
+ }
+
+ /* partition id is 1-based, and functions are evenly spread
+ * across the ports as partitions
+ */
+ hw->partition_id = (hw->pf_id / hw->num_ports) + 1;
+ hw->num_partitions = num_functions / hw->num_ports;
+
+ /* additional HW specific goodies that might
+ * someday be HW version specific
+ */
+ p->rx_buf_chain_len = I40E_MAX_CHAINED_RX_BUFFERS;
+}
+
+/**
+ * i40e_aq_discover_capabilities
+ * @hw: pointer to the hw struct
+ * @buff: a virtual buffer to hold the capabilities
+ * @buff_size: Size of the virtual buffer
+ * @data_size: Size of the returned data, or buff size needed if AQ err==ENOMEM
+ * @list_type_opc: capabilities type to discover - pass in the command opcode
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Get the device capabilities descriptions from the firmware
+ **/
+enum i40e_status_code i40e_aq_discover_capabilities(struct i40e_hw *hw,
+ void *buff, u16 buff_size, u16 *data_size,
+ enum i40e_admin_queue_opc list_type_opc,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aqc_list_capabilites *cmd;
+ struct i40e_aq_desc desc;
+ enum i40e_status_code status = I40E_SUCCESS;
+
+ cmd = (struct i40e_aqc_list_capabilites *)&desc.params.raw;
+
+ if (list_type_opc != i40e_aqc_opc_list_func_capabilities &&
+ list_type_opc != i40e_aqc_opc_list_dev_capabilities) {
+ status = I40E_ERR_PARAM;
+ goto exit;
+ }
+
+ i40e_fill_default_direct_cmd_desc(&desc, list_type_opc);
+
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_BUF);
+ if (buff_size > I40E_AQ_LARGE_BUF)
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_LB);
+
+ status = i40e_asq_send_command(hw, &desc, buff, buff_size, cmd_details);
+ *data_size = LE16_TO_CPU(desc.datalen);
+
+ if (status)
+ goto exit;
+
+ i40e_parse_discover_capabilities(hw, buff, LE32_TO_CPU(cmd->count),
+ list_type_opc);
+
+exit:
+ return status;
+}
+
+/**
+ * i40e_aq_update_nvm
+ * @hw: pointer to the hw struct
+ * @module_pointer: module pointer location in words from the NVM beginning
+ * @offset: byte offset from the module beginning
+ * @length: length of the section to be written (in bytes from the offset)
+ * @data: command buffer (size [bytes] = length)
+ * @last_command: tells if this is the last command in a series
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Update the NVM using the admin queue commands
+ **/
+enum i40e_status_code i40e_aq_update_nvm(struct i40e_hw *hw, u8 module_pointer,
+ u32 offset, u16 length, void *data,
+ bool last_command,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_nvm_update *cmd =
+ (struct i40e_aqc_nvm_update *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ DEBUGFUNC("i40e_aq_update_nvm");
+
+ /* In offset the highest byte must be zeroed. */
+ if (offset & 0xFF000000) {
+ status = I40E_ERR_PARAM;
+ goto i40e_aq_update_nvm_exit;
+ }
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_nvm_update);
+
+ /* If this is the last command in a series, set the proper flag. */
+ if (last_command)
+ cmd->command_flags |= I40E_AQ_NVM_LAST_CMD;
+ cmd->module_pointer = module_pointer;
+ cmd->offset = CPU_TO_LE32(offset);
+ cmd->length = CPU_TO_LE16(length);
+
+ desc.flags |= CPU_TO_LE16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+ if (length > I40E_AQ_LARGE_BUF)
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_LB);
+
+ status = i40e_asq_send_command(hw, &desc, data, length, cmd_details);
+
+i40e_aq_update_nvm_exit:
+ return status;
+}
+
+/**
+ * i40e_aq_get_lldp_mib
+ * @hw: pointer to the hw struct
+ * @bridge_type: type of bridge requested
+ * @mib_type: Local, Remote or both Local and Remote MIBs
+ * @buff: pointer to a user supplied buffer to store the MIB block
+ * @buff_size: size of the buffer (in bytes)
+ * @local_len : length of the returned Local LLDP MIB
+ * @remote_len: length of the returned Remote LLDP MIB
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Requests the complete LLDP MIB (entire packet).
+ **/
+enum i40e_status_code i40e_aq_get_lldp_mib(struct i40e_hw *hw, u8 bridge_type,
+ u8 mib_type, void *buff, u16 buff_size,
+ u16 *local_len, u16 *remote_len,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_lldp_get_mib *cmd =
+ (struct i40e_aqc_lldp_get_mib *)&desc.params.raw;
+ struct i40e_aqc_lldp_get_mib *resp =
+ (struct i40e_aqc_lldp_get_mib *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ if (buff_size == 0 || !buff)
+ return I40E_ERR_PARAM;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_lldp_get_mib);
+ /* Indirect Command */
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_BUF);
+
+ cmd->type = mib_type & I40E_AQ_LLDP_MIB_TYPE_MASK;
+ cmd->type |= ((bridge_type << I40E_AQ_LLDP_BRIDGE_TYPE_SHIFT) &
+ I40E_AQ_LLDP_BRIDGE_TYPE_MASK);
+
+ desc.datalen = CPU_TO_LE16(buff_size);
+
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_BUF);
+ if (buff_size > I40E_AQ_LARGE_BUF)
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_LB);
+
+ status = i40e_asq_send_command(hw, &desc, buff, buff_size, cmd_details);
+ if (!status) {
+ if (local_len != NULL)
+ *local_len = LE16_TO_CPU(resp->local_len);
+ if (remote_len != NULL)
+ *remote_len = LE16_TO_CPU(resp->remote_len);
+ }
+
+ return status;
+}
+
+ /**
+ * i40e_aq_set_lldp_mib - Set the LLDP MIB
+ * @hw: pointer to the hw struct
+ * @mib_type: Local, Remote or both Local and Remote MIBs
+ * @buff: pointer to a user supplied buffer to store the MIB block
+ * @buff_size: size of the buffer (in bytes)
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Set the LLDP MIB.
+ **/
+enum i40e_status_code i40e_aq_set_lldp_mib(struct i40e_hw *hw,
+ u8 mib_type, void *buff, u16 buff_size,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_lldp_set_local_mib *cmd =
+ (struct i40e_aqc_lldp_set_local_mib *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ if (buff_size == 0 || !buff)
+ return I40E_ERR_PARAM;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_lldp_set_local_mib);
+ /* Indirect Command */
+ desc.flags |= CPU_TO_LE16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+ if (buff_size > I40E_AQ_LARGE_BUF)
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_LB);
+ desc.datalen = CPU_TO_LE16(buff_size);
+
+ cmd->type = mib_type;
+ cmd->length = CPU_TO_LE16(buff_size);
+ cmd->address_high = CPU_TO_LE32(I40E_HI_WORD((uintptr_t)buff));
+ cmd->address_low = CPU_TO_LE32(I40E_LO_DWORD((uintptr_t)buff));
+
+ status = i40e_asq_send_command(hw, &desc, buff, buff_size, cmd_details);
+ return status;
+}
+
+/**
+ * i40e_aq_cfg_lldp_mib_change_event
+ * @hw: pointer to the hw struct
+ * @enable_update: Enable or Disable event posting
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Enable or Disable posting of an event on ARQ when LLDP MIB
+ * associated with the interface changes
+ **/
+enum i40e_status_code i40e_aq_cfg_lldp_mib_change_event(struct i40e_hw *hw,
+ bool enable_update,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_lldp_update_mib *cmd =
+ (struct i40e_aqc_lldp_update_mib *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_lldp_update_mib);
+
+ if (!enable_update)
+ cmd->command |= I40E_AQ_LLDP_MIB_UPDATE_DISABLE;
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_add_lldp_tlv
+ * @hw: pointer to the hw struct
+ * @bridge_type: type of bridge
+ * @buff: buffer with TLV to add
+ * @buff_size: length of the buffer
+ * @tlv_len: length of the TLV to be added
+ * @mib_len: length of the LLDP MIB returned in response
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Add the specified TLV to LLDP Local MIB for the given bridge type,
+ * it is responsibility of the caller to make sure that the TLV is not
+ * already present in the LLDPDU.
+ * In return firmware will write the complete LLDP MIB with the newly
+ * added TLV in the response buffer.
+ **/
+enum i40e_status_code i40e_aq_add_lldp_tlv(struct i40e_hw *hw, u8 bridge_type,
+ void *buff, u16 buff_size, u16 tlv_len,
+ u16 *mib_len,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_lldp_add_tlv *cmd =
+ (struct i40e_aqc_lldp_add_tlv *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ if (buff_size == 0 || !buff || tlv_len == 0)
+ return I40E_ERR_PARAM;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_lldp_add_tlv);
+
+ /* Indirect Command */
+ desc.flags |= CPU_TO_LE16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+ if (buff_size > I40E_AQ_LARGE_BUF)
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_LB);
+ desc.datalen = CPU_TO_LE16(buff_size);
+
+ cmd->type = ((bridge_type << I40E_AQ_LLDP_BRIDGE_TYPE_SHIFT) &
+ I40E_AQ_LLDP_BRIDGE_TYPE_MASK);
+ cmd->len = CPU_TO_LE16(tlv_len);
+
+ status = i40e_asq_send_command(hw, &desc, buff, buff_size, cmd_details);
+ if (!status) {
+ if (mib_len != NULL)
+ *mib_len = LE16_TO_CPU(desc.datalen);
+ }
+
+ return status;
+}
+
+/**
+ * i40e_aq_update_lldp_tlv
+ * @hw: pointer to the hw struct
+ * @bridge_type: type of bridge
+ * @buff: buffer with TLV to update
+ * @buff_size: size of the buffer holding original and updated TLVs
+ * @old_len: Length of the Original TLV
+ * @new_len: Length of the Updated TLV
+ * @offset: offset of the updated TLV in the buff
+ * @mib_len: length of the returned LLDP MIB
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Update the specified TLV to the LLDP Local MIB for the given bridge type.
+ * Firmware will place the complete LLDP MIB in response buffer with the
+ * updated TLV.
+ **/
+enum i40e_status_code i40e_aq_update_lldp_tlv(struct i40e_hw *hw,
+ u8 bridge_type, void *buff, u16 buff_size,
+ u16 old_len, u16 new_len, u16 offset,
+ u16 *mib_len,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_lldp_update_tlv *cmd =
+ (struct i40e_aqc_lldp_update_tlv *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ if (buff_size == 0 || !buff || offset == 0 ||
+ old_len == 0 || new_len == 0)
+ return I40E_ERR_PARAM;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_lldp_update_tlv);
+
+ /* Indirect Command */
+ desc.flags |= CPU_TO_LE16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+ if (buff_size > I40E_AQ_LARGE_BUF)
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_LB);
+ desc.datalen = CPU_TO_LE16(buff_size);
+
+ cmd->type = ((bridge_type << I40E_AQ_LLDP_BRIDGE_TYPE_SHIFT) &
+ I40E_AQ_LLDP_BRIDGE_TYPE_MASK);
+ cmd->old_len = CPU_TO_LE16(old_len);
+ cmd->new_offset = CPU_TO_LE16(offset);
+ cmd->new_len = CPU_TO_LE16(new_len);
+
+ status = i40e_asq_send_command(hw, &desc, buff, buff_size, cmd_details);
+ if (!status) {
+ if (mib_len != NULL)
+ *mib_len = LE16_TO_CPU(desc.datalen);
+ }
+
+ return status;
+}
+
+/**
+ * i40e_aq_delete_lldp_tlv
+ * @hw: pointer to the hw struct
+ * @bridge_type: type of bridge
+ * @buff: pointer to a user supplied buffer that has the TLV
+ * @buff_size: length of the buffer
+ * @tlv_len: length of the TLV to be deleted
+ * @mib_len: length of the returned LLDP MIB
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Delete the specified TLV from LLDP Local MIB for the given bridge type.
+ * The firmware places the entire LLDP MIB in the response buffer.
+ **/
+enum i40e_status_code i40e_aq_delete_lldp_tlv(struct i40e_hw *hw,
+ u8 bridge_type, void *buff, u16 buff_size,
+ u16 tlv_len, u16 *mib_len,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_lldp_add_tlv *cmd =
+ (struct i40e_aqc_lldp_add_tlv *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ if (buff_size == 0 || !buff)
+ return I40E_ERR_PARAM;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_lldp_delete_tlv);
+
+ /* Indirect Command */
+ desc.flags |= CPU_TO_LE16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+ if (buff_size > I40E_AQ_LARGE_BUF)
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_LB);
+ desc.datalen = CPU_TO_LE16(buff_size);
+ cmd->len = CPU_TO_LE16(tlv_len);
+ cmd->type = ((bridge_type << I40E_AQ_LLDP_BRIDGE_TYPE_SHIFT) &
+ I40E_AQ_LLDP_BRIDGE_TYPE_MASK);
+
+ status = i40e_asq_send_command(hw, &desc, buff, buff_size, cmd_details);
+ if (!status) {
+ if (mib_len != NULL)
+ *mib_len = LE16_TO_CPU(desc.datalen);
+ }
+
+ return status;
+}
+
+/**
+ * i40e_aq_stop_lldp
+ * @hw: pointer to the hw struct
+ * @shutdown_agent: True if LLDP Agent needs to be Shutdown
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Stop or Shutdown the embedded LLDP Agent
+ **/
+enum i40e_status_code i40e_aq_stop_lldp(struct i40e_hw *hw, bool shutdown_agent,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_lldp_stop *cmd =
+ (struct i40e_aqc_lldp_stop *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_lldp_stop);
+
+ if (shutdown_agent)
+ cmd->command |= I40E_AQ_LLDP_AGENT_SHUTDOWN;
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_start_lldp
+ * @hw: pointer to the hw struct
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Start the embedded LLDP Agent on all ports.
+ **/
+enum i40e_status_code i40e_aq_start_lldp(struct i40e_hw *hw,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_lldp_start *cmd =
+ (struct i40e_aqc_lldp_start *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_lldp_start);
+
+ cmd->command = I40E_AQ_LLDP_AGENT_START;
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_get_cee_dcb_config
+ * @hw: pointer to the hw struct
+ * @buff: response buffer that stores CEE operational configuration
+ * @buff_size: size of the buffer passed
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Get CEE DCBX mode operational configuration from firmware
+ **/
+enum i40e_status_code i40e_aq_get_cee_dcb_config(struct i40e_hw *hw,
+ void *buff, u16 buff_size,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ enum i40e_status_code status;
+
+ if (buff_size == 0 || !buff)
+ return I40E_ERR_PARAM;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_get_cee_dcb_cfg);
+
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_BUF);
+ status = i40e_asq_send_command(hw, &desc, (void *)buff, buff_size,
+ cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_start_stop_dcbx - Start/Stop DCBx service in FW
+ * @hw: pointer to the hw struct
+ * @start_agent: True if DCBx Agent needs to be Started
+ * False if DCBx Agent needs to be Stopped
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Start/Stop the embedded dcbx Agent
+ **/
+enum i40e_status_code i40e_aq_start_stop_dcbx(struct i40e_hw *hw,
+ bool start_agent,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_lldp_stop_start_specific_agent *cmd =
+ (struct i40e_aqc_lldp_stop_start_specific_agent *)
+ &desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_lldp_stop_start_spec_agent);
+
+ if (start_agent)
+ cmd->command = I40E_AQC_START_SPECIFIC_AGENT_MASK;
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_add_udp_tunnel
+ * @hw: pointer to the hw struct
+ * @udp_port: the UDP port to add
+ * @header_len: length of the tunneling header length in DWords
+ * @protocol_index: protocol index type
+ * @filter_index: pointer to filter index
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_add_udp_tunnel(struct i40e_hw *hw,
+ u16 udp_port, u8 protocol_index,
+ u8 *filter_index,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_add_udp_tunnel *cmd =
+ (struct i40e_aqc_add_udp_tunnel *)&desc.params.raw;
+ struct i40e_aqc_del_udp_tunnel_completion *resp =
+ (struct i40e_aqc_del_udp_tunnel_completion *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_add_udp_tunnel);
+
+ cmd->udp_port = CPU_TO_LE16(udp_port);
+ cmd->protocol_type = protocol_index;
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ if (!status && filter_index)
+ *filter_index = resp->index;
+
+ return status;
+}
+
+/**
+ * i40e_aq_del_udp_tunnel
+ * @hw: pointer to the hw struct
+ * @index: filter index
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_del_udp_tunnel(struct i40e_hw *hw, u8 index,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_remove_udp_tunnel *cmd =
+ (struct i40e_aqc_remove_udp_tunnel *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_del_udp_tunnel);
+
+ cmd->index = index;
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_get_switch_resource_alloc (0x0204)
+ * @hw: pointer to the hw struct
+ * @num_entries: pointer to u8 to store the number of resource entries returned
+ * @buf: pointer to a user supplied buffer. This buffer must be large enough
+ * to store the resource information for all resource types. Each
+ * resource type is a i40e_aqc_switch_resource_alloc_data structure.
+ * @count: size, in bytes, of the buffer provided
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Query the resources allocated to a function.
+ **/
+enum i40e_status_code i40e_aq_get_switch_resource_alloc(struct i40e_hw *hw,
+ u8 *num_entries,
+ struct i40e_aqc_switch_resource_alloc_element_resp *buf,
+ u16 count,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_get_switch_resource_alloc *cmd_resp =
+ (struct i40e_aqc_get_switch_resource_alloc *)&desc.params.raw;
+ enum i40e_status_code status;
+ u16 length = count * sizeof(*buf);
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_get_switch_resource_alloc);
+
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_BUF);
+ if (length > I40E_AQ_LARGE_BUF)
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_LB);
+
+ status = i40e_asq_send_command(hw, &desc, buf, length, cmd_details);
+
+ if (!status && num_entries)
+ *num_entries = cmd_resp->num_entries;
+
+ return status;
+}
+
+/**
+ * i40e_aq_delete_element - Delete switch element
+ * @hw: pointer to the hw struct
+ * @seid: the SEID to delete from the switch
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * This deletes a switch element from the switch.
+ **/
+enum i40e_status_code i40e_aq_delete_element(struct i40e_hw *hw, u16 seid,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_switch_seid *cmd =
+ (struct i40e_aqc_switch_seid *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ if (seid == 0)
+ return I40E_ERR_PARAM;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_delete_element);
+
+ cmd->seid = CPU_TO_LE16(seid);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40_aq_add_pvirt - Instantiate a Port Virtualizer on a port
+ * @hw: pointer to the hw struct
+ * @flags: component flags
+ * @mac_seid: uplink seid (MAC SEID)
+ * @vsi_seid: connected vsi seid
+ * @ret_seid: seid of create pv component
+ *
+ * This instantiates an i40e port virtualizer with specified flags.
+ * Depending on specified flags the port virtualizer can act as a
+ * 802.1Qbr port virtualizer or a 802.1Qbg S-component.
+ */
+enum i40e_status_code i40e_aq_add_pvirt(struct i40e_hw *hw, u16 flags,
+ u16 mac_seid, u16 vsi_seid,
+ u16 *ret_seid)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_add_update_pv *cmd =
+ (struct i40e_aqc_add_update_pv *)&desc.params.raw;
+ struct i40e_aqc_add_update_pv_completion *resp =
+ (struct i40e_aqc_add_update_pv_completion *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ if (vsi_seid == 0)
+ return I40E_ERR_PARAM;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_add_pv);
+ cmd->command_flags = CPU_TO_LE16(flags);
+ cmd->uplink_seid = CPU_TO_LE16(mac_seid);
+ cmd->connected_seid = CPU_TO_LE16(vsi_seid);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, NULL);
+ if (!status && ret_seid)
+ *ret_seid = LE16_TO_CPU(resp->pv_seid);
+
+ return status;
+}
+
+/**
+ * i40e_aq_add_tag - Add an S/E-tag
+ * @hw: pointer to the hw struct
+ * @direct_to_queue: should s-tag direct flow to a specific queue
+ * @vsi_seid: VSI SEID to use this tag
+ * @tag: value of the tag
+ * @queue_num: queue number, only valid is direct_to_queue is TRUE
+ * @tags_used: return value, number of tags in use by this PF
+ * @tags_free: return value, number of unallocated tags
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * This associates an S- or E-tag to a VSI in the switch complex. It returns
+ * the number of tags allocated by the PF, and the number of unallocated
+ * tags available.
+ **/
+enum i40e_status_code i40e_aq_add_tag(struct i40e_hw *hw, bool direct_to_queue,
+ u16 vsi_seid, u16 tag, u16 queue_num,
+ u16 *tags_used, u16 *tags_free,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_add_tag *cmd =
+ (struct i40e_aqc_add_tag *)&desc.params.raw;
+ struct i40e_aqc_add_remove_tag_completion *resp =
+ (struct i40e_aqc_add_remove_tag_completion *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ if (vsi_seid == 0)
+ return I40E_ERR_PARAM;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_add_tag);
+
+ cmd->seid = CPU_TO_LE16(vsi_seid);
+ cmd->tag = CPU_TO_LE16(tag);
+ if (direct_to_queue) {
+ cmd->flags = CPU_TO_LE16(I40E_AQC_ADD_TAG_FLAG_TO_QUEUE);
+ cmd->queue_number = CPU_TO_LE16(queue_num);
+ }
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ if (!status) {
+ if (tags_used != NULL)
+ *tags_used = LE16_TO_CPU(resp->tags_used);
+ if (tags_free != NULL)
+ *tags_free = LE16_TO_CPU(resp->tags_free);
+ }
+
+ return status;
+}
+
+/**
+ * i40e_aq_remove_tag - Remove an S- or E-tag
+ * @hw: pointer to the hw struct
+ * @vsi_seid: VSI SEID this tag is associated with
+ * @tag: value of the S-tag to delete
+ * @tags_used: return value, number of tags in use by this PF
+ * @tags_free: return value, number of unallocated tags
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * This deletes an S- or E-tag from a VSI in the switch complex. It returns
+ * the number of tags allocated by the PF, and the number of unallocated
+ * tags available.
+ **/
+enum i40e_status_code i40e_aq_remove_tag(struct i40e_hw *hw, u16 vsi_seid,
+ u16 tag, u16 *tags_used, u16 *tags_free,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_remove_tag *cmd =
+ (struct i40e_aqc_remove_tag *)&desc.params.raw;
+ struct i40e_aqc_add_remove_tag_completion *resp =
+ (struct i40e_aqc_add_remove_tag_completion *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ if (vsi_seid == 0)
+ return I40E_ERR_PARAM;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_remove_tag);
+
+ cmd->seid = CPU_TO_LE16(vsi_seid);
+ cmd->tag = CPU_TO_LE16(tag);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ if (!status) {
+ if (tags_used != NULL)
+ *tags_used = LE16_TO_CPU(resp->tags_used);
+ if (tags_free != NULL)
+ *tags_free = LE16_TO_CPU(resp->tags_free);
+ }
+
+ return status;
+}
+
+/**
+ * i40e_aq_add_mcast_etag - Add a multicast E-tag
+ * @hw: pointer to the hw struct
+ * @pv_seid: Port Virtualizer of this SEID to associate E-tag with
+ * @etag: value of E-tag to add
+ * @num_tags_in_buf: number of unicast E-tags in indirect buffer
+ * @buf: address of indirect buffer
+ * @tags_used: return value, number of E-tags in use by this port
+ * @tags_free: return value, number of unallocated M-tags
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * This associates a multicast E-tag to a port virtualizer. It will return
+ * the number of tags allocated by the PF, and the number of unallocated
+ * tags available.
+ *
+ * The indirect buffer pointed to by buf is a list of 2-byte E-tags,
+ * num_tags_in_buf long.
+ **/
+enum i40e_status_code i40e_aq_add_mcast_etag(struct i40e_hw *hw, u16 pv_seid,
+ u16 etag, u8 num_tags_in_buf, void *buf,
+ u16 *tags_used, u16 *tags_free,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_add_remove_mcast_etag *cmd =
+ (struct i40e_aqc_add_remove_mcast_etag *)&desc.params.raw;
+ struct i40e_aqc_add_remove_mcast_etag_completion *resp =
+ (struct i40e_aqc_add_remove_mcast_etag_completion *)&desc.params.raw;
+ enum i40e_status_code status;
+ u16 length = sizeof(u16) * num_tags_in_buf;
+
+ if ((pv_seid == 0) || (buf == NULL) || (num_tags_in_buf == 0))
+ return I40E_ERR_PARAM;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_add_multicast_etag);
+
+ cmd->pv_seid = CPU_TO_LE16(pv_seid);
+ cmd->etag = CPU_TO_LE16(etag);
+ cmd->num_unicast_etags = num_tags_in_buf;
+
+ desc.flags |= CPU_TO_LE16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+ if (length > I40E_AQ_LARGE_BUF)
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_LB);
+
+ status = i40e_asq_send_command(hw, &desc, buf, length, cmd_details);
+
+ if (!status) {
+ if (tags_used != NULL)
+ *tags_used = LE16_TO_CPU(resp->mcast_etags_used);
+ if (tags_free != NULL)
+ *tags_free = LE16_TO_CPU(resp->mcast_etags_free);
+ }
+
+ return status;
+}
+
+/**
+ * i40e_aq_remove_mcast_etag - Remove a multicast E-tag
+ * @hw: pointer to the hw struct
+ * @pv_seid: Port Virtualizer SEID this M-tag is associated with
+ * @etag: value of the E-tag to remove
+ * @tags_used: return value, number of tags in use by this port
+ * @tags_free: return value, number of unallocated tags
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * This deletes an E-tag from the port virtualizer. It will return
+ * the number of tags allocated by the port, and the number of unallocated
+ * tags available.
+ **/
+enum i40e_status_code i40e_aq_remove_mcast_etag(struct i40e_hw *hw, u16 pv_seid,
+ u16 etag, u16 *tags_used, u16 *tags_free,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_add_remove_mcast_etag *cmd =
+ (struct i40e_aqc_add_remove_mcast_etag *)&desc.params.raw;
+ struct i40e_aqc_add_remove_mcast_etag_completion *resp =
+ (struct i40e_aqc_add_remove_mcast_etag_completion *)&desc.params.raw;
+ enum i40e_status_code status;
+
+
+ if (pv_seid == 0)
+ return I40E_ERR_PARAM;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_remove_multicast_etag);
+
+ cmd->pv_seid = CPU_TO_LE16(pv_seid);
+ cmd->etag = CPU_TO_LE16(etag);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ if (!status) {
+ if (tags_used != NULL)
+ *tags_used = LE16_TO_CPU(resp->mcast_etags_used);
+ if (tags_free != NULL)
+ *tags_free = LE16_TO_CPU(resp->mcast_etags_free);
+ }
+
+ return status;
+}
+
+/**
+ * i40e_aq_update_tag - Update an S/E-tag
+ * @hw: pointer to the hw struct
+ * @vsi_seid: VSI SEID using this S-tag
+ * @old_tag: old tag value
+ * @new_tag: new tag value
+ * @tags_used: return value, number of tags in use by this PF
+ * @tags_free: return value, number of unallocated tags
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * This updates the value of the tag currently attached to this VSI
+ * in the switch complex. It will return the number of tags allocated
+ * by the PF, and the number of unallocated tags available.
+ **/
+enum i40e_status_code i40e_aq_update_tag(struct i40e_hw *hw, u16 vsi_seid,
+ u16 old_tag, u16 new_tag, u16 *tags_used,
+ u16 *tags_free,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_update_tag *cmd =
+ (struct i40e_aqc_update_tag *)&desc.params.raw;
+ struct i40e_aqc_update_tag_completion *resp =
+ (struct i40e_aqc_update_tag_completion *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ if (vsi_seid == 0)
+ return I40E_ERR_PARAM;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_update_tag);
+
+ cmd->seid = CPU_TO_LE16(vsi_seid);
+ cmd->old_tag = CPU_TO_LE16(old_tag);
+ cmd->new_tag = CPU_TO_LE16(new_tag);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ if (!status) {
+ if (tags_used != NULL)
+ *tags_used = LE16_TO_CPU(resp->tags_used);
+ if (tags_free != NULL)
+ *tags_free = LE16_TO_CPU(resp->tags_free);
+ }
+
+ return status;
+}
+
+/**
+ * i40e_aq_dcb_ignore_pfc - Ignore PFC for given TCs
+ * @hw: pointer to the hw struct
+ * @tcmap: TC map for request/release any ignore PFC condition
+ * @request: request or release ignore PFC condition
+ * @tcmap_ret: return TCs for which PFC is currently ignored
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * This sends out request/release to ignore PFC condition for a TC.
+ * It will return the TCs for which PFC is currently ignored.
+ **/
+enum i40e_status_code i40e_aq_dcb_ignore_pfc(struct i40e_hw *hw, u8 tcmap,
+ bool request, u8 *tcmap_ret,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_pfc_ignore *cmd_resp =
+ (struct i40e_aqc_pfc_ignore *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_dcb_ignore_pfc);
+
+ if (request)
+ cmd_resp->command_flags = I40E_AQC_PFC_IGNORE_SET;
+
+ cmd_resp->tc_bitmap = tcmap;
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ if (!status) {
+ if (tcmap_ret != NULL)
+ *tcmap_ret = cmd_resp->tc_bitmap;
+ }
+
+ return status;
+}
+
+/**
+ * i40e_aq_dcb_updated - DCB Updated Command
+ * @hw: pointer to the hw struct
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * When LLDP is handled in PF this command is used by the PF
+ * to notify EMP that a DCB setting is modified.
+ * When LLDP is handled in EMP this command is used by the PF
+ * to notify EMP whenever one of the following parameters get
+ * modified:
+ * - PFCLinkDelayAllowance in PRTDCB_GENC.PFCLDA
+ * - PCIRTT in PRTDCB_GENC.PCIRTT
+ * - Maximum Frame Size for non-FCoE TCs set by PRTDCB_TDPUC.MAX_TXFRAME.
+ * EMP will return when the shared RPB settings have been
+ * recomputed and modified. The retval field in the descriptor
+ * will be set to 0 when RPB is modified.
+ **/
+enum i40e_status_code i40e_aq_dcb_updated(struct i40e_hw *hw,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_dcb_updated);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_add_statistics - Add a statistics block to a VLAN in a switch.
+ * @hw: pointer to the hw struct
+ * @seid: defines the SEID of the switch for which the stats are requested
+ * @vlan_id: the VLAN ID for which the statistics are requested
+ * @stat_index: index of the statistics counters block assigned to this VLAN
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * XL710 supports 128 smonVlanStats counters.This command is used to
+ * allocate a set of smonVlanStats counters to a specific VLAN in a specific
+ * switch.
+ **/
+enum i40e_status_code i40e_aq_add_statistics(struct i40e_hw *hw, u16 seid,
+ u16 vlan_id, u16 *stat_index,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_add_remove_statistics *cmd_resp =
+ (struct i40e_aqc_add_remove_statistics *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ if ((seid == 0) || (stat_index == NULL))
+ return I40E_ERR_PARAM;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_add_statistics);
+
+ cmd_resp->seid = CPU_TO_LE16(seid);
+ cmd_resp->vlan = CPU_TO_LE16(vlan_id);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ if (!status && stat_index)
+ *stat_index = LE16_TO_CPU(cmd_resp->stat_index);
+
+ return status;
+}
+
+/**
+ * i40e_aq_remove_statistics - Remove a statistics block to a VLAN in a switch.
+ * @hw: pointer to the hw struct
+ * @seid: defines the SEID of the switch for which the stats are requested
+ * @vlan_id: the VLAN ID for which the statistics are requested
+ * @stat_index: index of the statistics counters block assigned to this VLAN
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * XL710 supports 128 smonVlanStats counters.This command is used to
+ * deallocate a set of smonVlanStats counters to a specific VLAN in a specific
+ * switch.
+ **/
+enum i40e_status_code i40e_aq_remove_statistics(struct i40e_hw *hw, u16 seid,
+ u16 vlan_id, u16 stat_index,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_add_remove_statistics *cmd =
+ (struct i40e_aqc_add_remove_statistics *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ if (seid == 0)
+ return I40E_ERR_PARAM;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_remove_statistics);
+
+ cmd->seid = CPU_TO_LE16(seid);
+ cmd->vlan = CPU_TO_LE16(vlan_id);
+ cmd->stat_index = CPU_TO_LE16(stat_index);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_set_port_parameters - set physical port parameters.
+ * @hw: pointer to the hw struct
+ * @bad_frame_vsi: defines the VSI to which bad frames are forwarded
+ * @save_bad_pac: if set packets with errors are forwarded to the bad frames VSI
+ * @pad_short_pac: if set transmit packets smaller than 60 bytes are padded
+ * @double_vlan: if set double VLAN is enabled
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_set_port_parameters(struct i40e_hw *hw,
+ u16 bad_frame_vsi, bool save_bad_pac,
+ bool pad_short_pac, bool double_vlan,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aqc_set_port_parameters *cmd;
+ enum i40e_status_code status;
+ struct i40e_aq_desc desc;
+ u16 command_flags = 0;
+
+ cmd = (struct i40e_aqc_set_port_parameters *)&desc.params.raw;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_port_parameters);
+
+ cmd->bad_frame_vsi = CPU_TO_LE16(bad_frame_vsi);
+ if (save_bad_pac)
+ command_flags |= I40E_AQ_SET_P_PARAMS_SAVE_BAD_PACKETS;
+ if (pad_short_pac)
+ command_flags |= I40E_AQ_SET_P_PARAMS_PAD_SHORT_PACKETS;
+ if (double_vlan)
+ command_flags |= I40E_AQ_SET_P_PARAMS_DOUBLE_VLAN_ENA;
+ cmd->command_flags = CPU_TO_LE16(command_flags);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_tx_sched_cmd - generic Tx scheduler AQ command handler
+ * @hw: pointer to the hw struct
+ * @seid: seid for the physical port/switching component/vsi
+ * @buff: Indirect buffer to hold data parameters and response
+ * @buff_size: Indirect buffer size
+ * @opcode: Tx scheduler AQ command opcode
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Generic command handler for Tx scheduler AQ commands
+ **/
+static enum i40e_status_code i40e_aq_tx_sched_cmd(struct i40e_hw *hw, u16 seid,
+ void *buff, u16 buff_size,
+ enum i40e_admin_queue_opc opcode,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_tx_sched_ind *cmd =
+ (struct i40e_aqc_tx_sched_ind *)&desc.params.raw;
+ enum i40e_status_code status;
+ bool cmd_param_flag = FALSE;
+
+ switch (opcode) {
+ case i40e_aqc_opc_configure_vsi_ets_sla_bw_limit:
+ case i40e_aqc_opc_configure_vsi_tc_bw:
+ case i40e_aqc_opc_enable_switching_comp_ets:
+ case i40e_aqc_opc_modify_switching_comp_ets:
+ case i40e_aqc_opc_disable_switching_comp_ets:
+ case i40e_aqc_opc_configure_switching_comp_ets_bw_limit:
+ case i40e_aqc_opc_configure_switching_comp_bw_config:
+ cmd_param_flag = TRUE;
+ break;
+ case i40e_aqc_opc_query_vsi_bw_config:
+ case i40e_aqc_opc_query_vsi_ets_sla_config:
+ case i40e_aqc_opc_query_switching_comp_ets_config:
+ case i40e_aqc_opc_query_port_ets_config:
+ case i40e_aqc_opc_query_switching_comp_bw_config:
+ cmd_param_flag = FALSE;
+ break;
+ default:
+ return I40E_ERR_PARAM;
+ }
+
+ i40e_fill_default_direct_cmd_desc(&desc, opcode);
+
+ /* Indirect command */
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_BUF);
+ if (cmd_param_flag)
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_RD);
+ if (buff_size > I40E_AQ_LARGE_BUF)
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_LB);
+
+ desc.datalen = CPU_TO_LE16(buff_size);
+
+ cmd->vsi_seid = CPU_TO_LE16(seid);
+
+ status = i40e_asq_send_command(hw, &desc, buff, buff_size, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_config_vsi_bw_limit - Configure VSI BW Limit
+ * @hw: pointer to the hw struct
+ * @seid: VSI seid
+ * @credit: BW limit credits (0 = disabled)
+ * @max_credit: Max BW limit credits
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_config_vsi_bw_limit(struct i40e_hw *hw,
+ u16 seid, u16 credit, u8 max_credit,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_configure_vsi_bw_limit *cmd =
+ (struct i40e_aqc_configure_vsi_bw_limit *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_configure_vsi_bw_limit);
+
+ cmd->vsi_seid = CPU_TO_LE16(seid);
+ cmd->credit = CPU_TO_LE16(credit);
+ cmd->max_credit = max_credit;
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_config_switch_comp_bw_limit - Configure Switching component BW Limit
+ * @hw: pointer to the hw struct
+ * @seid: switching component seid
+ * @credit: BW limit credits (0 = disabled)
+ * @max_bw: Max BW limit credits
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_config_switch_comp_bw_limit(struct i40e_hw *hw,
+ u16 seid, u16 credit, u8 max_bw,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_configure_switching_comp_bw_limit *cmd =
+ (struct i40e_aqc_configure_switching_comp_bw_limit *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_configure_switching_comp_bw_limit);
+
+ cmd->seid = CPU_TO_LE16(seid);
+ cmd->credit = CPU_TO_LE16(credit);
+ cmd->max_bw = max_bw;
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_config_vsi_ets_sla_bw_limit - Config VSI BW Limit per TC
+ * @hw: pointer to the hw struct
+ * @seid: VSI seid
+ * @bw_data: Buffer holding enabled TCs, per TC BW limit/credits
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_config_vsi_ets_sla_bw_limit(struct i40e_hw *hw,
+ u16 seid,
+ struct i40e_aqc_configure_vsi_ets_sla_bw_data *bw_data,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ return i40e_aq_tx_sched_cmd(hw, seid, (void *)bw_data, sizeof(*bw_data),
+ i40e_aqc_opc_configure_vsi_ets_sla_bw_limit,
+ cmd_details);
+}
+
+/**
+ * i40e_aq_config_vsi_tc_bw - Config VSI BW Allocation per TC
+ * @hw: pointer to the hw struct
+ * @seid: VSI seid
+ * @bw_data: Buffer holding enabled TCs, relative TC BW limit/credits
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_config_vsi_tc_bw(struct i40e_hw *hw,
+ u16 seid,
+ struct i40e_aqc_configure_vsi_tc_bw_data *bw_data,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ return i40e_aq_tx_sched_cmd(hw, seid, (void *)bw_data, sizeof(*bw_data),
+ i40e_aqc_opc_configure_vsi_tc_bw,
+ cmd_details);
+}
+
+/**
+ * i40e_aq_config_switch_comp_ets_bw_limit - Config Switch comp BW Limit per TC
+ * @hw: pointer to the hw struct
+ * @seid: seid of the switching component
+ * @bw_data: Buffer holding enabled TCs, per TC BW limit/credits
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_config_switch_comp_ets_bw_limit(
+ struct i40e_hw *hw, u16 seid,
+ struct i40e_aqc_configure_switching_comp_ets_bw_limit_data *bw_data,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ return i40e_aq_tx_sched_cmd(hw, seid, (void *)bw_data, sizeof(*bw_data),
+ i40e_aqc_opc_configure_switching_comp_ets_bw_limit,
+ cmd_details);
+}
+
+/**
+ * i40e_aq_query_vsi_bw_config - Query VSI BW configuration
+ * @hw: pointer to the hw struct
+ * @seid: seid of the VSI
+ * @bw_data: Buffer to hold VSI BW configuration
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_query_vsi_bw_config(struct i40e_hw *hw,
+ u16 seid,
+ struct i40e_aqc_query_vsi_bw_config_resp *bw_data,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ return i40e_aq_tx_sched_cmd(hw, seid, (void *)bw_data, sizeof(*bw_data),
+ i40e_aqc_opc_query_vsi_bw_config,
+ cmd_details);
+}
+
+/**
+ * i40e_aq_query_vsi_ets_sla_config - Query VSI BW configuration per TC
+ * @hw: pointer to the hw struct
+ * @seid: seid of the VSI
+ * @bw_data: Buffer to hold VSI BW configuration per TC
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_query_vsi_ets_sla_config(struct i40e_hw *hw,
+ u16 seid,
+ struct i40e_aqc_query_vsi_ets_sla_config_resp *bw_data,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ return i40e_aq_tx_sched_cmd(hw, seid, (void *)bw_data, sizeof(*bw_data),
+ i40e_aqc_opc_query_vsi_ets_sla_config,
+ cmd_details);
+}
+
+/**
+ * i40e_aq_query_switch_comp_ets_config - Query Switch comp BW config per TC
+ * @hw: pointer to the hw struct
+ * @seid: seid of the switching component
+ * @bw_data: Buffer to hold switching component's per TC BW config
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_query_switch_comp_ets_config(struct i40e_hw *hw,
+ u16 seid,
+ struct i40e_aqc_query_switching_comp_ets_config_resp *bw_data,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ return i40e_aq_tx_sched_cmd(hw, seid, (void *)bw_data, sizeof(*bw_data),
+ i40e_aqc_opc_query_switching_comp_ets_config,
+ cmd_details);
+}
+
+/**
+ * i40e_aq_query_port_ets_config - Query Physical Port ETS configuration
+ * @hw: pointer to the hw struct
+ * @seid: seid of the VSI or switching component connected to Physical Port
+ * @bw_data: Buffer to hold current ETS configuration for the Physical Port
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_query_port_ets_config(struct i40e_hw *hw,
+ u16 seid,
+ struct i40e_aqc_query_port_ets_config_resp *bw_data,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ return i40e_aq_tx_sched_cmd(hw, seid, (void *)bw_data, sizeof(*bw_data),
+ i40e_aqc_opc_query_port_ets_config,
+ cmd_details);
+}
+
+/**
+ * i40e_aq_query_switch_comp_bw_config - Query Switch comp BW configuration
+ * @hw: pointer to the hw struct
+ * @seid: seid of the switching component
+ * @bw_data: Buffer to hold switching component's BW configuration
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+enum i40e_status_code i40e_aq_query_switch_comp_bw_config(struct i40e_hw *hw,
+ u16 seid,
+ struct i40e_aqc_query_switching_comp_bw_config_resp *bw_data,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ return i40e_aq_tx_sched_cmd(hw, seid, (void *)bw_data, sizeof(*bw_data),
+ i40e_aqc_opc_query_switching_comp_bw_config,
+ cmd_details);
+}
+
+/**
+ * i40e_validate_filter_settings
+ * @hw: pointer to the hardware structure
+ * @settings: Filter control settings
+ *
+ * Check and validate the filter control settings passed.
+ * The function checks for the valid filter/context sizes being
+ * passed for FCoE and PE.
+ *
+ * Returns I40E_SUCCESS if the values passed are valid and within
+ * range else returns an error.
+ **/
+static enum i40e_status_code i40e_validate_filter_settings(struct i40e_hw *hw,
+ struct i40e_filter_control_settings *settings)
+{
+ u32 fcoe_cntx_size, fcoe_filt_size;
+ u32 pe_cntx_size, pe_filt_size;
+ u32 fcoe_fmax;
+
+ u32 val;
+
+ /* Validate FCoE settings passed */
+ switch (settings->fcoe_filt_num) {
+ case I40E_HASH_FILTER_SIZE_1K:
+ case I40E_HASH_FILTER_SIZE_2K:
+ case I40E_HASH_FILTER_SIZE_4K:
+ case I40E_HASH_FILTER_SIZE_8K:
+ case I40E_HASH_FILTER_SIZE_16K:
+ case I40E_HASH_FILTER_SIZE_32K:
+ fcoe_filt_size = I40E_HASH_FILTER_BASE_SIZE;
+ fcoe_filt_size <<= (u32)settings->fcoe_filt_num;
+ break;
+ default:
+ return I40E_ERR_PARAM;
+ }
+
+ switch (settings->fcoe_cntx_num) {
+ case I40E_DMA_CNTX_SIZE_512:
+ case I40E_DMA_CNTX_SIZE_1K:
+ case I40E_DMA_CNTX_SIZE_2K:
+ case I40E_DMA_CNTX_SIZE_4K:
+ fcoe_cntx_size = I40E_DMA_CNTX_BASE_SIZE;
+ fcoe_cntx_size <<= (u32)settings->fcoe_cntx_num;
+ break;
+ default:
+ return I40E_ERR_PARAM;
+ }
+
+ /* Validate PE settings passed */
+ switch (settings->pe_filt_num) {
+ case I40E_HASH_FILTER_SIZE_1K:
+ case I40E_HASH_FILTER_SIZE_2K:
+ case I40E_HASH_FILTER_SIZE_4K:
+ case I40E_HASH_FILTER_SIZE_8K:
+ case I40E_HASH_FILTER_SIZE_16K:
+ case I40E_HASH_FILTER_SIZE_32K:
+ case I40E_HASH_FILTER_SIZE_64K:
+ case I40E_HASH_FILTER_SIZE_128K:
+ case I40E_HASH_FILTER_SIZE_256K:
+ case I40E_HASH_FILTER_SIZE_512K:
+ case I40E_HASH_FILTER_SIZE_1M:
+ pe_filt_size = I40E_HASH_FILTER_BASE_SIZE;
+ pe_filt_size <<= (u32)settings->pe_filt_num;
+ break;
+ default:
+ return I40E_ERR_PARAM;
+ }
+
+ switch (settings->pe_cntx_num) {
+ case I40E_DMA_CNTX_SIZE_512:
+ case I40E_DMA_CNTX_SIZE_1K:
+ case I40E_DMA_CNTX_SIZE_2K:
+ case I40E_DMA_CNTX_SIZE_4K:
+ case I40E_DMA_CNTX_SIZE_8K:
+ case I40E_DMA_CNTX_SIZE_16K:
+ case I40E_DMA_CNTX_SIZE_32K:
+ case I40E_DMA_CNTX_SIZE_64K:
+ case I40E_DMA_CNTX_SIZE_128K:
+ case I40E_DMA_CNTX_SIZE_256K:
+ pe_cntx_size = I40E_DMA_CNTX_BASE_SIZE;
+ pe_cntx_size <<= (u32)settings->pe_cntx_num;
+ break;
+ default:
+ return I40E_ERR_PARAM;
+ }
+
+ /* FCHSIZE + FCDSIZE should not be greater than PMFCOEFMAX */
+ val = rd32(hw, I40E_GLHMC_FCOEFMAX);
+ fcoe_fmax = (val & I40E_GLHMC_FCOEFMAX_PMFCOEFMAX_MASK)
+ >> I40E_GLHMC_FCOEFMAX_PMFCOEFMAX_SHIFT;
+ if (fcoe_filt_size + fcoe_cntx_size > fcoe_fmax)
+ return I40E_ERR_INVALID_SIZE;
+
+ return I40E_SUCCESS;
+}
+
+/**
+ * i40e_set_filter_control
+ * @hw: pointer to the hardware structure
+ * @settings: Filter control settings
+ *
+ * Set the Queue Filters for PE/FCoE and enable filters required
+ * for a single PF. It is expected that these settings are programmed
+ * at the driver initialization time.
+ **/
+enum i40e_status_code i40e_set_filter_control(struct i40e_hw *hw,
+ struct i40e_filter_control_settings *settings)
+{
+ enum i40e_status_code ret = I40E_SUCCESS;
+ u32 hash_lut_size = 0;
+ u32 val;
+
+ if (!settings)
+ return I40E_ERR_PARAM;
+
+ /* Validate the input settings */
+ ret = i40e_validate_filter_settings(hw, settings);
+ if (ret)
+ return ret;
+
+ /* Read the PF Queue Filter control register */
+ val = rd32(hw, I40E_PFQF_CTL_0);
+
+ /* Program required PE hash buckets for the PF */
+ val &= ~I40E_PFQF_CTL_0_PEHSIZE_MASK;
+ val |= ((u32)settings->pe_filt_num << I40E_PFQF_CTL_0_PEHSIZE_SHIFT) &
+ I40E_PFQF_CTL_0_PEHSIZE_MASK;
+ /* Program required PE contexts for the PF */
+ val &= ~I40E_PFQF_CTL_0_PEDSIZE_MASK;
+ val |= ((u32)settings->pe_cntx_num << I40E_PFQF_CTL_0_PEDSIZE_SHIFT) &
+ I40E_PFQF_CTL_0_PEDSIZE_MASK;
+
+ /* Program required FCoE hash buckets for the PF */
+ val &= ~I40E_PFQF_CTL_0_PFFCHSIZE_MASK;
+ val |= ((u32)settings->fcoe_filt_num <<
+ I40E_PFQF_CTL_0_PFFCHSIZE_SHIFT) &
+ I40E_PFQF_CTL_0_PFFCHSIZE_MASK;
+ /* Program required FCoE DDP contexts for the PF */
+ val &= ~I40E_PFQF_CTL_0_PFFCDSIZE_MASK;
+ val |= ((u32)settings->fcoe_cntx_num <<
+ I40E_PFQF_CTL_0_PFFCDSIZE_SHIFT) &
+ I40E_PFQF_CTL_0_PFFCDSIZE_MASK;
+
+ /* Program Hash LUT size for the PF */
+ val &= ~I40E_PFQF_CTL_0_HASHLUTSIZE_MASK;
+ if (settings->hash_lut_size == I40E_HASH_LUT_SIZE_512)
+ hash_lut_size = 1;
+ val |= (hash_lut_size << I40E_PFQF_CTL_0_HASHLUTSIZE_SHIFT) &
+ I40E_PFQF_CTL_0_HASHLUTSIZE_MASK;
+
+ /* Enable FDIR, Ethertype and MACVLAN filters for PF and VFs */
+ if (settings->enable_fdir)
+ val |= I40E_PFQF_CTL_0_FD_ENA_MASK;
+ if (settings->enable_ethtype)
+ val |= I40E_PFQF_CTL_0_ETYPE_ENA_MASK;
+ if (settings->enable_macvlan)
+ val |= I40E_PFQF_CTL_0_MACVLAN_ENA_MASK;
+
+ wr32(hw, I40E_PFQF_CTL_0, val);
+
+ return I40E_SUCCESS;
+}
+
+/**
+ * i40e_aq_add_rem_control_packet_filter - Add or Remove Control Packet Filter
+ * @hw: pointer to the hw struct
+ * @mac_addr: MAC address to use in the filter
+ * @ethtype: Ethertype to use in the filter
+ * @flags: Flags that needs to be applied to the filter
+ * @vsi_seid: seid of the control VSI
+ * @queue: VSI queue number to send the packet to
+ * @is_add: Add control packet filter if True else remove
+ * @stats: Structure to hold information on control filter counts
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * This command will Add or Remove control packet filter for a control VSI.
+ * In return it will update the total number of perfect filter count in
+ * the stats member.
+ **/
+enum i40e_status_code i40e_aq_add_rem_control_packet_filter(struct i40e_hw *hw,
+ u8 *mac_addr, u16 ethtype, u16 flags,
+ u16 vsi_seid, u16 queue, bool is_add,
+ struct i40e_control_filter_stats *stats,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_add_remove_control_packet_filter *cmd =
+ (struct i40e_aqc_add_remove_control_packet_filter *)
+ &desc.params.raw;
+ struct i40e_aqc_add_remove_control_packet_filter_completion *resp =
+ (struct i40e_aqc_add_remove_control_packet_filter_completion *)
+ &desc.params.raw;
+ enum i40e_status_code status;
+
+ if (vsi_seid == 0)
+ return I40E_ERR_PARAM;
+
+ if (is_add) {
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_add_control_packet_filter);
+ cmd->queue = CPU_TO_LE16(queue);
+ } else {
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_remove_control_packet_filter);
+ }
+
+ if (mac_addr)
+ i40e_memcpy(cmd->mac, mac_addr, I40E_ETH_LENGTH_OF_ADDRESS,
+ I40E_NONDMA_TO_NONDMA);
+
+ cmd->etype = CPU_TO_LE16(ethtype);
+ cmd->flags = CPU_TO_LE16(flags);
+ cmd->seid = CPU_TO_LE16(vsi_seid);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ if (!status && stats) {
+ stats->mac_etype_used = LE16_TO_CPU(resp->mac_etype_used);
+ stats->etype_used = LE16_TO_CPU(resp->etype_used);
+ stats->mac_etype_free = LE16_TO_CPU(resp->mac_etype_free);
+ stats->etype_free = LE16_TO_CPU(resp->etype_free);
+ }
+
+ return status;
+}
+
+/**
+ * i40e_aq_add_cloud_filters
+ * @hw: pointer to the hardware structure
+ * @seid: VSI seid to add cloud filters from
+ * @filters: Buffer which contains the filters to be added
+ * @filter_count: number of filters contained in the buffer
+ *
+ * Set the cloud filters for a given VSI. The contents of the
+ * i40e_aqc_add_remove_cloud_filters_element_data are filled
+ * in by the caller of the function.
+ *
+ **/
+enum i40e_status_code i40e_aq_add_cloud_filters(struct i40e_hw *hw,
+ u16 seid,
+ struct i40e_aqc_add_remove_cloud_filters_element_data *filters,
+ u8 filter_count)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_add_remove_cloud_filters *cmd =
+ (struct i40e_aqc_add_remove_cloud_filters *)&desc.params.raw;
+ u16 buff_len;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_add_cloud_filters);
+
+ buff_len = filter_count * sizeof(*filters);
+ desc.datalen = CPU_TO_LE16(buff_len);
+ desc.flags |= CPU_TO_LE16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+ cmd->num_filters = filter_count;
+ cmd->seid = CPU_TO_LE16(seid);
+
+ status = i40e_asq_send_command(hw, &desc, filters, buff_len, NULL);
+
+ return status;
+}
+
+/**
+ * i40e_aq_remove_cloud_filters
+ * @hw: pointer to the hardware structure
+ * @seid: VSI seid to remove cloud filters from
+ * @filters: Buffer which contains the filters to be removed
+ * @filter_count: number of filters contained in the buffer
+ *
+ * Remove the cloud filters for a given VSI. The contents of the
+ * i40e_aqc_add_remove_cloud_filters_element_data are filled
+ * in by the caller of the function.
+ *
+ **/
+enum i40e_status_code i40e_aq_remove_cloud_filters(struct i40e_hw *hw,
+ u16 seid,
+ struct i40e_aqc_add_remove_cloud_filters_element_data *filters,
+ u8 filter_count)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_add_remove_cloud_filters *cmd =
+ (struct i40e_aqc_add_remove_cloud_filters *)&desc.params.raw;
+ enum i40e_status_code status;
+ u16 buff_len;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_remove_cloud_filters);
+
+ buff_len = filter_count * sizeof(*filters);
+ desc.datalen = CPU_TO_LE16(buff_len);
+ desc.flags |= CPU_TO_LE16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+ cmd->num_filters = filter_count;
+ cmd->seid = CPU_TO_LE16(seid);
+
+ status = i40e_asq_send_command(hw, &desc, filters, buff_len, NULL);
+
+ return status;
+}
+
+/**
+ * i40e_aq_alternate_write
+ * @hw: pointer to the hardware structure
+ * @reg_addr0: address of first dword to be read
+ * @reg_val0: value to be written under 'reg_addr0'
+ * @reg_addr1: address of second dword to be read
+ * @reg_val1: value to be written under 'reg_addr1'
+ *
+ * Write one or two dwords to alternate structure. Fields are indicated
+ * by 'reg_addr0' and 'reg_addr1' register numbers.
+ *
+ **/
+enum i40e_status_code i40e_aq_alternate_write(struct i40e_hw *hw,
+ u32 reg_addr0, u32 reg_val0,
+ u32 reg_addr1, u32 reg_val1)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_alternate_write *cmd_resp =
+ (struct i40e_aqc_alternate_write *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_alternate_write);
+ cmd_resp->address0 = CPU_TO_LE32(reg_addr0);
+ cmd_resp->address1 = CPU_TO_LE32(reg_addr1);
+ cmd_resp->data0 = CPU_TO_LE32(reg_val0);
+ cmd_resp->data1 = CPU_TO_LE32(reg_val1);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, NULL);
+
+ return status;
+}
+
+/**
+ * i40e_aq_alternate_write_indirect
+ * @hw: pointer to the hardware structure
+ * @addr: address of a first register to be modified
+ * @dw_count: number of alternate structure fields to write
+ * @buffer: pointer to the command buffer
+ *
+ * Write 'dw_count' dwords from 'buffer' to alternate structure
+ * starting at 'addr'.
+ *
+ **/
+enum i40e_status_code i40e_aq_alternate_write_indirect(struct i40e_hw *hw,
+ u32 addr, u32 dw_count, void *buffer)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_alternate_ind_write *cmd_resp =
+ (struct i40e_aqc_alternate_ind_write *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ if (buffer == NULL)
+ return I40E_ERR_PARAM;
+
+ /* Indirect command */
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_alternate_write_indirect);
+
+ desc.flags |= CPU_TO_LE16(I40E_AQ_FLAG_RD);
+ desc.flags |= CPU_TO_LE16(I40E_AQ_FLAG_BUF);
+ if (dw_count > (I40E_AQ_LARGE_BUF/4))
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_LB);
+
+ cmd_resp->address = CPU_TO_LE32(addr);
+ cmd_resp->length = CPU_TO_LE32(dw_count);
+ cmd_resp->addr_high = CPU_TO_LE32(I40E_HI_WORD((uintptr_t)buffer));
+ cmd_resp->addr_low = CPU_TO_LE32(I40E_LO_DWORD((uintptr_t)buffer));
+
+ status = i40e_asq_send_command(hw, &desc, buffer,
+ I40E_LO_DWORD(4*dw_count), NULL);
+
+ return status;
+}
+
+/**
+ * i40e_aq_alternate_read
+ * @hw: pointer to the hardware structure
+ * @reg_addr0: address of first dword to be read
+ * @reg_val0: pointer for data read from 'reg_addr0'
+ * @reg_addr1: address of second dword to be read
+ * @reg_val1: pointer for data read from 'reg_addr1'
+ *
+ * Read one or two dwords from alternate structure. Fields are indicated
+ * by 'reg_addr0' and 'reg_addr1' register numbers. If 'reg_val1' pointer
+ * is not passed then only register at 'reg_addr0' is read.
+ *
+ **/
+enum i40e_status_code i40e_aq_alternate_read(struct i40e_hw *hw,
+ u32 reg_addr0, u32 *reg_val0,
+ u32 reg_addr1, u32 *reg_val1)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_alternate_write *cmd_resp =
+ (struct i40e_aqc_alternate_write *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ if (reg_val0 == NULL)
+ return I40E_ERR_PARAM;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_alternate_read);
+ cmd_resp->address0 = CPU_TO_LE32(reg_addr0);
+ cmd_resp->address1 = CPU_TO_LE32(reg_addr1);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, NULL);
+
+ if (status == I40E_SUCCESS) {
+ *reg_val0 = LE32_TO_CPU(cmd_resp->data0);
+
+ if (reg_val1 != NULL)
+ *reg_val1 = LE32_TO_CPU(cmd_resp->data1);
+ }
+
+ return status;
+}
+
+/**
+ * i40e_aq_alternate_read_indirect
+ * @hw: pointer to the hardware structure
+ * @addr: address of the alternate structure field
+ * @dw_count: number of alternate structure fields to read
+ * @buffer: pointer to the command buffer
+ *
+ * Read 'dw_count' dwords from alternate structure starting at 'addr' and
+ * place them in 'buffer'. The buffer should be allocated by caller.
+ *
+ **/
+enum i40e_status_code i40e_aq_alternate_read_indirect(struct i40e_hw *hw,
+ u32 addr, u32 dw_count, void *buffer)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_alternate_ind_write *cmd_resp =
+ (struct i40e_aqc_alternate_ind_write *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ if (buffer == NULL)
+ return I40E_ERR_PARAM;
+
+ /* Indirect command */
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_alternate_read_indirect);
+
+ desc.flags |= CPU_TO_LE16(I40E_AQ_FLAG_RD);
+ desc.flags |= CPU_TO_LE16(I40E_AQ_FLAG_BUF);
+ if (dw_count > (I40E_AQ_LARGE_BUF/4))
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_LB);
+
+ cmd_resp->address = CPU_TO_LE32(addr);
+ cmd_resp->length = CPU_TO_LE32(dw_count);
+ cmd_resp->addr_high = CPU_TO_LE32(I40E_HI_DWORD((uintptr_t)buffer));
+ cmd_resp->addr_low = CPU_TO_LE32(I40E_LO_DWORD((uintptr_t)buffer));
+
+ status = i40e_asq_send_command(hw, &desc, buffer,
+ I40E_LO_DWORD(4*dw_count), NULL);
+
+ return status;
+}
+
+/**
+ * i40e_aq_alternate_clear
+ * @hw: pointer to the HW structure.
+ *
+ * Clear the alternate structures of the port from which the function
+ * is called.
+ *
+ **/
+enum i40e_status_code i40e_aq_alternate_clear(struct i40e_hw *hw)
+{
+ struct i40e_aq_desc desc;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_alternate_clear_port);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, NULL);
+
+ return status;
+}
+
+/**
+ * i40e_aq_alternate_write_done
+ * @hw: pointer to the HW structure.
+ * @bios_mode: indicates whether the command is executed by UEFI or legacy BIOS
+ * @reset_needed: indicates the SW should trigger GLOBAL reset
+ *
+ * Indicates to the FW that alternate structures have been changed.
+ *
+ **/
+enum i40e_status_code i40e_aq_alternate_write_done(struct i40e_hw *hw,
+ u8 bios_mode, bool *reset_needed)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_alternate_write_done *cmd =
+ (struct i40e_aqc_alternate_write_done *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ if (reset_needed == NULL)
+ return I40E_ERR_PARAM;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_alternate_write_done);
+
+ cmd->cmd_flags = CPU_TO_LE16(bios_mode);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, NULL);
+ if (!status && reset_needed)
+ *reset_needed = ((LE16_TO_CPU(cmd->cmd_flags) &
+ I40E_AQ_ALTERNATE_RESET_NEEDED) != 0);
+
+ return status;
+}
+
+/**
+ * i40e_aq_set_oem_mode
+ * @hw: pointer to the HW structure.
+ * @oem_mode: the OEM mode to be used
+ *
+ * Sets the device to a specific operating mode. Currently the only supported
+ * mode is no_clp, which causes FW to refrain from using Alternate RAM.
+ *
+ **/
+enum i40e_status_code i40e_aq_set_oem_mode(struct i40e_hw *hw,
+ u8 oem_mode)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_alternate_write_done *cmd =
+ (struct i40e_aqc_alternate_write_done *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_alternate_set_mode);
+
+ cmd->cmd_flags = CPU_TO_LE16(oem_mode);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, NULL);
+
+ return status;
+}
+
+/**
+ * i40e_aq_resume_port_tx
+ * @hw: pointer to the hardware structure
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Resume port's Tx traffic
+ **/
+enum i40e_status_code i40e_aq_resume_port_tx(struct i40e_hw *hw,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_resume_port_tx);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_set_pci_config_data - store PCI bus info
+ * @hw: pointer to hardware structure
+ * @link_status: the link status word from PCI config space
+ *
+ * Stores the PCI bus info (speed, width, type) within the i40e_hw structure
+ **/
+void i40e_set_pci_config_data(struct i40e_hw *hw, u16 link_status)
+{
+ hw->bus.type = i40e_bus_type_pci_express;
+
+ switch (link_status & I40E_PCI_LINK_WIDTH) {
+ case I40E_PCI_LINK_WIDTH_1:
+ hw->bus.width = i40e_bus_width_pcie_x1;
+ break;
+ case I40E_PCI_LINK_WIDTH_2:
+ hw->bus.width = i40e_bus_width_pcie_x2;
+ break;
+ case I40E_PCI_LINK_WIDTH_4:
+ hw->bus.width = i40e_bus_width_pcie_x4;
+ break;
+ case I40E_PCI_LINK_WIDTH_8:
+ hw->bus.width = i40e_bus_width_pcie_x8;
+ break;
+ default:
+ hw->bus.width = i40e_bus_width_unknown;
+ break;
+ }
+
+ switch (link_status & I40E_PCI_LINK_SPEED) {
+ case I40E_PCI_LINK_SPEED_2500:
+ hw->bus.speed = i40e_bus_speed_2500;
+ break;
+ case I40E_PCI_LINK_SPEED_5000:
+ hw->bus.speed = i40e_bus_speed_5000;
+ break;
+ case I40E_PCI_LINK_SPEED_8000:
+ hw->bus.speed = i40e_bus_speed_8000;
+ break;
+ default:
+ hw->bus.speed = i40e_bus_speed_unknown;
+ break;
+ }
+}
+
+/**
+ * i40e_aq_debug_dump
+ * @hw: pointer to the hardware structure
+ * @cluster_id: specific cluster to dump
+ * @table_id: table id within cluster
+ * @start_index: index of line in the block to read
+ * @buff_size: dump buffer size
+ * @buff: dump buffer
+ * @ret_buff_size: actual buffer size returned
+ * @ret_next_table: next block to read
+ * @ret_next_index: next index to read
+ *
+ * Dump internal FW/HW data for debug purposes.
+ *
+ **/
+enum i40e_status_code i40e_aq_debug_dump(struct i40e_hw *hw, u8 cluster_id,
+ u8 table_id, u32 start_index, u16 buff_size,
+ void *buff, u16 *ret_buff_size,
+ u8 *ret_next_table, u32 *ret_next_index,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_debug_dump_internals *cmd =
+ (struct i40e_aqc_debug_dump_internals *)&desc.params.raw;
+ struct i40e_aqc_debug_dump_internals *resp =
+ (struct i40e_aqc_debug_dump_internals *)&desc.params.raw;
+ enum i40e_status_code status;
+
+ if (buff_size == 0 || !buff)
+ return I40E_ERR_PARAM;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_debug_dump_internals);
+ /* Indirect Command */
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_BUF);
+ if (buff_size > I40E_AQ_LARGE_BUF)
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_LB);
+
+ cmd->cluster_id = cluster_id;
+ cmd->table_id = table_id;
+ cmd->idx = CPU_TO_LE32(start_index);
+
+ desc.datalen = CPU_TO_LE16(buff_size);
+
+ status = i40e_asq_send_command(hw, &desc, buff, buff_size, cmd_details);
+ if (!status) {
+ if (ret_buff_size != NULL)
+ *ret_buff_size = LE16_TO_CPU(desc.datalen);
+ if (ret_next_table != NULL)
+ *ret_next_table = resp->table_id;
+ if (ret_next_index != NULL)
+ *ret_next_index = LE32_TO_CPU(resp->idx);
+ }
+
+ return status;
+}
+
+/**
+ * i40e_read_bw_from_alt_ram
+ * @hw: pointer to the hardware structure
+ * @max_bw: pointer for max_bw read
+ * @min_bw: pointer for min_bw read
+ * @min_valid: pointer for bool that is TRUE if min_bw is a valid value
+ * @max_valid: pointer for bool that is TRUE if max_bw is a valid value
+ *
+ * Read bw from the alternate ram for the given pf
+ **/
+enum i40e_status_code i40e_read_bw_from_alt_ram(struct i40e_hw *hw,
+ u32 *max_bw, u32 *min_bw,
+ bool *min_valid, bool *max_valid)
+{
+ enum i40e_status_code status;
+ u32 max_bw_addr, min_bw_addr;
+
+ /* Calculate the address of the min/max bw registers */
+ max_bw_addr = I40E_ALT_STRUCT_FIRST_PF_OFFSET +
+ I40E_ALT_STRUCT_MAX_BW_OFFSET +
+ (I40E_ALT_STRUCT_DWORDS_PER_PF * hw->pf_id);
+ min_bw_addr = I40E_ALT_STRUCT_FIRST_PF_OFFSET +
+ I40E_ALT_STRUCT_MIN_BW_OFFSET +
+ (I40E_ALT_STRUCT_DWORDS_PER_PF * hw->pf_id);
+
+ /* Read the bandwidths from alt ram */
+ status = i40e_aq_alternate_read(hw, max_bw_addr, max_bw,
+ min_bw_addr, min_bw);
+
+ if (*min_bw & I40E_ALT_BW_VALID_MASK)
+ *min_valid = TRUE;
+ else
+ *min_valid = FALSE;
+
+ if (*max_bw & I40E_ALT_BW_VALID_MASK)
+ *max_valid = TRUE;
+ else
+ *max_valid = FALSE;
+
+ return status;
+}
+
+/**
+ * i40e_aq_configure_partition_bw
+ * @hw: pointer to the hardware structure
+ * @bw_data: Buffer holding valid pfs and bw limits
+ * @cmd_details: pointer to command details
+ *
+ * Configure partitions guaranteed/max bw
+ **/
+enum i40e_status_code i40e_aq_configure_partition_bw(struct i40e_hw *hw,
+ struct i40e_aqc_configure_partition_bw_data *bw_data,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ enum i40e_status_code status;
+ struct i40e_aq_desc desc;
+ u16 bwd_size = sizeof(*bw_data);
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_configure_partition_bw);
+
+ /* Indirect command */
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_BUF);
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_RD);
+
+ if (bwd_size > I40E_AQ_LARGE_BUF)
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_LB);
+
+ desc.datalen = CPU_TO_LE16(bwd_size);
+
+ status = i40e_asq_send_command(hw, &desc, bw_data, bwd_size, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_send_msg_to_pf
+ * @hw: pointer to the hardware structure
+ * @v_opcode: opcodes for VF-PF communication
+ * @v_retval: return error code
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ * @cmd_details: pointer to command details
+ *
+ * Send message to PF driver using admin queue. By default, this message
+ * is sent asynchronously, i.e. i40e_asq_send_command() does not wait for
+ * completion before returning.
+ **/
+enum i40e_status_code i40e_aq_send_msg_to_pf(struct i40e_hw *hw,
+ enum i40e_virtchnl_ops v_opcode,
+ enum i40e_status_code v_retval,
+ u8 *msg, u16 msglen,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_asq_cmd_details details;
+ enum i40e_status_code status;
+
+ i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_send_msg_to_pf);
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_SI);
+ desc.cookie_high = CPU_TO_LE32(v_opcode);
+ desc.cookie_low = CPU_TO_LE32(v_retval);
+ if (msglen) {
+ desc.flags |= CPU_TO_LE16((u16)(I40E_AQ_FLAG_BUF
+ | I40E_AQ_FLAG_RD));
+ if (msglen > I40E_AQ_LARGE_BUF)
+ desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_LB);
+ desc.datalen = CPU_TO_LE16(msglen);
+ }
+ if (!cmd_details) {
+ i40e_memset(&details, 0, sizeof(details), I40E_NONDMA_MEM);
+ details.async = TRUE;
+ cmd_details = &details;
+ }
+ status = i40e_asq_send_command(hw, (struct i40e_aq_desc *)&desc, msg,
+ msglen, cmd_details);
+ return status;
+}
+
+/**
+ * i40e_vf_parse_hw_config
+ * @hw: pointer to the hardware structure
+ * @msg: pointer to the virtual channel VF resource structure
+ *
+ * Given a VF resource message from the PF, populate the hw struct
+ * with appropriate information.
+ **/
+void i40e_vf_parse_hw_config(struct i40e_hw *hw,
+ struct i40e_virtchnl_vf_resource *msg)
+{
+ struct i40e_virtchnl_vsi_resource *vsi_res;
+ int i;
+
+ vsi_res = &msg->vsi_res[0];
+
+ hw->dev_caps.num_vsis = msg->num_vsis;
+ hw->dev_caps.num_rx_qp = msg->num_queue_pairs;
+ hw->dev_caps.num_tx_qp = msg->num_queue_pairs;
+ hw->dev_caps.num_msix_vectors_vf = msg->max_vectors;
+ hw->dev_caps.dcb = msg->vf_offload_flags &
+ I40E_VIRTCHNL_VF_OFFLOAD_L2;
+ hw->dev_caps.fcoe = (msg->vf_offload_flags &
+ I40E_VIRTCHNL_VF_OFFLOAD_FCOE) ? 1 : 0;
+ hw->dev_caps.iwarp = (msg->vf_offload_flags &
+ I40E_VIRTCHNL_VF_OFFLOAD_IWARP) ? 1 : 0;
+ for (i = 0; i < msg->num_vsis; i++) {
+ if (vsi_res->vsi_type == I40E_VSI_SRIOV) {
+ i40e_memcpy(hw->mac.perm_addr,
+ vsi_res->default_mac_addr,
+ I40E_ETH_LENGTH_OF_ADDRESS,
+ I40E_NONDMA_TO_NONDMA);
+ i40e_memcpy(hw->mac.addr, vsi_res->default_mac_addr,
+ I40E_ETH_LENGTH_OF_ADDRESS,
+ I40E_NONDMA_TO_NONDMA);
+ }
+ vsi_res++;
+ }
+}
+
+/**
+ * i40e_vf_reset
+ * @hw: pointer to the hardware structure
+ *
+ * Send a VF_RESET message to the PF. Does not wait for response from PF
+ * as none will be forthcoming. Immediately after calling this function,
+ * the admin queue should be shut down and (optionally) reinitialized.
+ **/
+enum i40e_status_code i40e_vf_reset(struct i40e_hw *hw)
+{
+ return i40e_aq_send_msg_to_pf(hw, I40E_VIRTCHNL_OP_RESET_VF,
+ I40E_SUCCESS, NULL, 0, NULL);
+}
diff --git a/usr/src/uts/common/io/i40e/core/i40e_devids.h b/usr/src/uts/common/io/i40e/core/i40e_devids.h
new file mode 100644
index 0000000000..5b927bed9f
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/core/i40e_devids.h
@@ -0,0 +1,68 @@
+/******************************************************************************
+
+ Copyright (c) 2013-2015, Intel Corporation
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+
+******************************************************************************/
+/*$FreeBSD: head/sys/dev/ixl/i40e_devids.h 284049 2015-06-05 22:52:42Z jfv $*/
+
+#ifndef _I40E_DEVIDS_H_
+#define _I40E_DEVIDS_H_
+
+/* Vendor ID */
+#define I40E_INTEL_VENDOR_ID 0x8086
+
+/* Device IDs */
+#define I40E_DEV_ID_SFP_XL710 0x1572
+#define I40E_DEV_ID_QEMU 0x1574
+#define I40E_DEV_ID_KX_A 0x157F
+#define I40E_DEV_ID_KX_B 0x1580
+#define I40E_DEV_ID_KX_C 0x1581
+#define I40E_DEV_ID_QSFP_A 0x1583
+#define I40E_DEV_ID_QSFP_B 0x1584
+#define I40E_DEV_ID_QSFP_C 0x1585
+#define I40E_DEV_ID_10G_BASE_T 0x1586
+#define I40E_DEV_ID_20G_KR2 0x1587
+#define I40E_DEV_ID_20G_KR2_A 0x1588
+#define I40E_DEV_ID_10G_BASE_T4 0x1589
+#define I40E_DEV_ID_VF 0x154C
+#define I40E_DEV_ID_VF_HV 0x1571
+#ifdef X722_SUPPORT
+#define I40E_DEV_ID_SFP_X722 0x37D0
+#define I40E_DEV_ID_1G_BASE_T_X722 0x37D1
+#define I40E_DEV_ID_10G_BASE_T_X722 0x37D2
+#define I40E_DEV_ID_X722_VF 0x37CD
+#define I40E_DEV_ID_X722_VF_HV 0x37D9
+#endif /* X722_SUPPORT */
+
+#define i40e_is_40G_device(d) ((d) == I40E_DEV_ID_QSFP_A || \
+ (d) == I40E_DEV_ID_QSFP_B || \
+ (d) == I40E_DEV_ID_QSFP_C)
+
+#endif /* _I40E_DEVIDS_H_ */
diff --git a/usr/src/uts/common/io/i40e/core/i40e_hmc.c b/usr/src/uts/common/io/i40e/core/i40e_hmc.c
new file mode 100644
index 0000000000..3f0e6e8d5b
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/core/i40e_hmc.c
@@ -0,0 +1,373 @@
+/******************************************************************************
+
+ Copyright (c) 2013-2015, Intel Corporation
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+
+******************************************************************************/
+/*$FreeBSD: head/sys/dev/ixl/i40e_hmc.c 284049 2015-06-05 22:52:42Z jfv $*/
+
+#include "i40e_osdep.h"
+#include "i40e_register.h"
+#include "i40e_status.h"
+#include "i40e_alloc.h"
+#include "i40e_hmc.h"
+#ifndef I40E_NO_TYPE_HEADER
+#include "i40e_type.h"
+#endif
+
+/**
+ * i40e_add_sd_table_entry - Adds a segment descriptor to the table
+ * @hw: pointer to our hw struct
+ * @hmc_info: pointer to the HMC configuration information struct
+ * @sd_index: segment descriptor index to manipulate
+ * @type: what type of segment descriptor we're manipulating
+ * @direct_mode_sz: size to alloc in direct mode
+ **/
+enum i40e_status_code i40e_add_sd_table_entry(struct i40e_hw *hw,
+ struct i40e_hmc_info *hmc_info,
+ u32 sd_index,
+ enum i40e_sd_entry_type type,
+ u64 direct_mode_sz)
+{
+ enum i40e_status_code ret_code = I40E_SUCCESS;
+ struct i40e_hmc_sd_entry *sd_entry;
+ enum i40e_memory_type mem_type;
+ bool dma_mem_alloc_done = FALSE;
+ struct i40e_dma_mem mem;
+ u64 alloc_len;
+
+ if (NULL == hmc_info->sd_table.sd_entry) {
+ ret_code = I40E_ERR_BAD_PTR;
+ DEBUGOUT("i40e_add_sd_table_entry: bad sd_entry\n");
+ goto exit;
+ }
+
+ if (sd_index >= hmc_info->sd_table.sd_cnt) {
+ ret_code = I40E_ERR_INVALID_SD_INDEX;
+ DEBUGOUT("i40e_add_sd_table_entry: bad sd_index\n");
+ goto exit;
+ }
+
+ sd_entry = &hmc_info->sd_table.sd_entry[sd_index];
+ if (!sd_entry->valid) {
+ if (I40E_SD_TYPE_PAGED == type) {
+ mem_type = i40e_mem_pd;
+ alloc_len = I40E_HMC_PAGED_BP_SIZE;
+ } else {
+ mem_type = i40e_mem_bp_jumbo;
+ alloc_len = direct_mode_sz;
+ }
+
+ /* allocate a 4K pd page or 2M backing page */
+ ret_code = i40e_allocate_dma_mem(hw, &mem, mem_type, alloc_len,
+ I40E_HMC_PD_BP_BUF_ALIGNMENT);
+ if (ret_code)
+ goto exit;
+ dma_mem_alloc_done = TRUE;
+ if (I40E_SD_TYPE_PAGED == type) {
+ ret_code = i40e_allocate_virt_mem(hw,
+ &sd_entry->u.pd_table.pd_entry_virt_mem,
+ sizeof(struct i40e_hmc_pd_entry) * 512);
+ if (ret_code)
+ goto exit;
+ sd_entry->u.pd_table.pd_entry =
+ (struct i40e_hmc_pd_entry *)
+ sd_entry->u.pd_table.pd_entry_virt_mem.va;
+ i40e_memcpy(&sd_entry->u.pd_table.pd_page_addr,
+ &mem, sizeof(struct i40e_dma_mem),
+ I40E_NONDMA_TO_NONDMA);
+ } else {
+ i40e_memcpy(&sd_entry->u.bp.addr,
+ &mem, sizeof(struct i40e_dma_mem),
+ I40E_NONDMA_TO_NONDMA);
+ sd_entry->u.bp.sd_pd_index = sd_index;
+ }
+ /* initialize the sd entry */
+ hmc_info->sd_table.sd_entry[sd_index].entry_type = type;
+
+ /* increment the ref count */
+ I40E_INC_SD_REFCNT(&hmc_info->sd_table);
+ }
+ /* Increment backing page reference count */
+ if (I40E_SD_TYPE_DIRECT == sd_entry->entry_type)
+ I40E_INC_BP_REFCNT(&sd_entry->u.bp);
+exit:
+ if (I40E_SUCCESS != ret_code)
+ if (dma_mem_alloc_done)
+ i40e_free_dma_mem(hw, &mem);
+
+ return ret_code;
+}
+
+/**
+ * i40e_add_pd_table_entry - Adds page descriptor to the specified table
+ * @hw: pointer to our HW structure
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @pd_index: which page descriptor index to manipulate
+ * @rsrc_pg: if not NULL, use preallocated page instead of allocating new one.
+ *
+ * This function:
+ * 1. Initializes the pd entry
+ * 2. Adds pd_entry in the pd_table
+ * 3. Mark the entry valid in i40e_hmc_pd_entry structure
+ * 4. Initializes the pd_entry's ref count to 1
+ * assumptions:
+ * 1. The memory for pd should be pinned down, physically contiguous and
+ * aligned on 4K boundary and zeroed memory.
+ * 2. It should be 4K in size.
+ **/
+enum i40e_status_code i40e_add_pd_table_entry(struct i40e_hw *hw,
+ struct i40e_hmc_info *hmc_info,
+ u32 pd_index,
+ struct i40e_dma_mem *rsrc_pg)
+{
+ enum i40e_status_code ret_code = I40E_SUCCESS;
+ struct i40e_hmc_pd_table *pd_table;
+ struct i40e_hmc_pd_entry *pd_entry;
+ struct i40e_dma_mem mem;
+ struct i40e_dma_mem *page = &mem;
+ u32 sd_idx, rel_pd_idx;
+ u64 *pd_addr;
+ u64 page_desc;
+
+ if (pd_index / I40E_HMC_PD_CNT_IN_SD >= hmc_info->sd_table.sd_cnt) {
+ ret_code = I40E_ERR_INVALID_PAGE_DESC_INDEX;
+ DEBUGOUT("i40e_add_pd_table_entry: bad pd_index\n");
+ goto exit;
+ }
+
+ /* find corresponding sd */
+ sd_idx = (pd_index / I40E_HMC_PD_CNT_IN_SD);
+ if (I40E_SD_TYPE_PAGED !=
+ hmc_info->sd_table.sd_entry[sd_idx].entry_type)
+ goto exit;
+
+ rel_pd_idx = (pd_index % I40E_HMC_PD_CNT_IN_SD);
+ pd_table = &hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
+ pd_entry = &pd_table->pd_entry[rel_pd_idx];
+ if (!pd_entry->valid) {
+ if (rsrc_pg) {
+ pd_entry->rsrc_pg = TRUE;
+ page = rsrc_pg;
+ } else {
+ /* allocate a 4K backing page */
+ ret_code = i40e_allocate_dma_mem(hw, page, i40e_mem_bp,
+ I40E_HMC_PAGED_BP_SIZE,
+ I40E_HMC_PD_BP_BUF_ALIGNMENT);
+ if (ret_code)
+ goto exit;
+ pd_entry->rsrc_pg = FALSE;
+ }
+
+ i40e_memcpy(&pd_entry->bp.addr, page,
+ sizeof(struct i40e_dma_mem), I40E_NONDMA_TO_NONDMA);
+ pd_entry->bp.sd_pd_index = pd_index;
+ pd_entry->bp.entry_type = I40E_SD_TYPE_PAGED;
+ /* Set page address and valid bit */
+ page_desc = page->pa | 0x1;
+
+ pd_addr = (u64 *)pd_table->pd_page_addr.va;
+ pd_addr += rel_pd_idx;
+
+ /* Add the backing page physical address in the pd entry */
+ i40e_memcpy(pd_addr, &page_desc, sizeof(u64),
+ I40E_NONDMA_TO_DMA);
+
+ pd_entry->sd_index = sd_idx;
+ pd_entry->valid = TRUE;
+ I40E_INC_PD_REFCNT(pd_table);
+ }
+ I40E_INC_BP_REFCNT(&pd_entry->bp);
+exit:
+ return ret_code;
+}
+
+/**
+ * i40e_remove_pd_bp - remove a backing page from a page descriptor
+ * @hw: pointer to our HW structure
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: the page index
+ * @is_pf: distinguishes a VF from a PF
+ *
+ * This function:
+ * 1. Marks the entry in pd tabe (for paged address mode) or in sd table
+ * (for direct address mode) invalid.
+ * 2. Write to register PMPDINV to invalidate the backing page in FV cache
+ * 3. Decrement the ref count for the pd _entry
+ * assumptions:
+ * 1. Caller can deallocate the memory used by backing storage after this
+ * function returns.
+ **/
+enum i40e_status_code i40e_remove_pd_bp(struct i40e_hw *hw,
+ struct i40e_hmc_info *hmc_info,
+ u32 idx)
+{
+ enum i40e_status_code ret_code = I40E_SUCCESS;
+ struct i40e_hmc_pd_entry *pd_entry;
+ struct i40e_hmc_pd_table *pd_table;
+ struct i40e_hmc_sd_entry *sd_entry;
+ u32 sd_idx, rel_pd_idx;
+ u64 *pd_addr;
+
+ /* calculate index */
+ sd_idx = idx / I40E_HMC_PD_CNT_IN_SD;
+ rel_pd_idx = idx % I40E_HMC_PD_CNT_IN_SD;
+ if (sd_idx >= hmc_info->sd_table.sd_cnt) {
+ ret_code = I40E_ERR_INVALID_PAGE_DESC_INDEX;
+ DEBUGOUT("i40e_remove_pd_bp: bad idx\n");
+ goto exit;
+ }
+ sd_entry = &hmc_info->sd_table.sd_entry[sd_idx];
+ if (I40E_SD_TYPE_PAGED != sd_entry->entry_type) {
+ ret_code = I40E_ERR_INVALID_SD_TYPE;
+ DEBUGOUT("i40e_remove_pd_bp: wrong sd_entry type\n");
+ goto exit;
+ }
+ /* get the entry and decrease its ref counter */
+ pd_table = &hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
+ pd_entry = &pd_table->pd_entry[rel_pd_idx];
+ I40E_DEC_BP_REFCNT(&pd_entry->bp);
+ if (pd_entry->bp.ref_cnt)
+ goto exit;
+
+ /* mark the entry invalid */
+ pd_entry->valid = FALSE;
+ I40E_DEC_PD_REFCNT(pd_table);
+ pd_addr = (u64 *)pd_table->pd_page_addr.va;
+ pd_addr += rel_pd_idx;
+ i40e_memset(pd_addr, 0, sizeof(u64), I40E_DMA_MEM);
+ I40E_INVALIDATE_PF_HMC_PD(hw, sd_idx, idx);
+
+ /* free memory here */
+ if (!pd_entry->rsrc_pg)
+ ret_code = i40e_free_dma_mem(hw, &(pd_entry->bp.addr));
+ if (I40E_SUCCESS != ret_code)
+ goto exit;
+ if (!pd_table->ref_cnt)
+ i40e_free_virt_mem(hw, &pd_table->pd_entry_virt_mem);
+exit:
+ return ret_code;
+}
+
+/**
+ * i40e_prep_remove_sd_bp - Prepares to remove a backing page from a sd entry
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: the page index
+ **/
+enum i40e_status_code i40e_prep_remove_sd_bp(struct i40e_hmc_info *hmc_info,
+ u32 idx)
+{
+ enum i40e_status_code ret_code = I40E_SUCCESS;
+ struct i40e_hmc_sd_entry *sd_entry;
+
+ /* get the entry and decrease its ref counter */
+ sd_entry = &hmc_info->sd_table.sd_entry[idx];
+ I40E_DEC_BP_REFCNT(&sd_entry->u.bp);
+ if (sd_entry->u.bp.ref_cnt) {
+ ret_code = I40E_ERR_NOT_READY;
+ goto exit;
+ }
+ I40E_DEC_SD_REFCNT(&hmc_info->sd_table);
+
+ /* mark the entry invalid */
+ sd_entry->valid = FALSE;
+exit:
+ return ret_code;
+}
+
+/**
+ * i40e_remove_sd_bp_new - Removes a backing page from a segment descriptor
+ * @hw: pointer to our hw struct
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: the page index
+ * @is_pf: used to distinguish between VF and PF
+ **/
+enum i40e_status_code i40e_remove_sd_bp_new(struct i40e_hw *hw,
+ struct i40e_hmc_info *hmc_info,
+ u32 idx, bool is_pf)
+{
+ struct i40e_hmc_sd_entry *sd_entry;
+
+ if (!is_pf)
+ return I40E_NOT_SUPPORTED;
+
+ /* get the entry and decrease its ref counter */
+ sd_entry = &hmc_info->sd_table.sd_entry[idx];
+ I40E_CLEAR_PF_SD_ENTRY(hw, idx, I40E_SD_TYPE_DIRECT);
+
+ return i40e_free_dma_mem(hw, &(sd_entry->u.bp.addr));
+}
+
+/**
+ * i40e_prep_remove_pd_page - Prepares to remove a PD page from sd entry.
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: segment descriptor index to find the relevant page descriptor
+ **/
+enum i40e_status_code i40e_prep_remove_pd_page(struct i40e_hmc_info *hmc_info,
+ u32 idx)
+{
+ enum i40e_status_code ret_code = I40E_SUCCESS;
+ struct i40e_hmc_sd_entry *sd_entry;
+
+ sd_entry = &hmc_info->sd_table.sd_entry[idx];
+
+ if (sd_entry->u.pd_table.ref_cnt) {
+ ret_code = I40E_ERR_NOT_READY;
+ goto exit;
+ }
+
+ /* mark the entry invalid */
+ sd_entry->valid = FALSE;
+
+ I40E_DEC_SD_REFCNT(&hmc_info->sd_table);
+exit:
+ return ret_code;
+}
+
+/**
+ * i40e_remove_pd_page_new - Removes a PD page from sd entry.
+ * @hw: pointer to our hw struct
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: segment descriptor index to find the relevant page descriptor
+ * @is_pf: used to distinguish between VF and PF
+ **/
+enum i40e_status_code i40e_remove_pd_page_new(struct i40e_hw *hw,
+ struct i40e_hmc_info *hmc_info,
+ u32 idx, bool is_pf)
+{
+ struct i40e_hmc_sd_entry *sd_entry;
+
+ if (!is_pf)
+ return I40E_NOT_SUPPORTED;
+
+ sd_entry = &hmc_info->sd_table.sd_entry[idx];
+ I40E_CLEAR_PF_SD_ENTRY(hw, idx, I40E_SD_TYPE_PAGED);
+
+ return i40e_free_dma_mem(hw, &(sd_entry->u.pd_table.pd_page_addr));
+}
diff --git a/usr/src/uts/common/io/i40e/core/i40e_hmc.h b/usr/src/uts/common/io/i40e/core/i40e_hmc.h
new file mode 100644
index 0000000000..d6e1f93421
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/core/i40e_hmc.h
@@ -0,0 +1,246 @@
+/******************************************************************************
+
+ Copyright (c) 2013-2015, Intel Corporation
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+
+******************************************************************************/
+/*$FreeBSD: head/sys/dev/ixl/i40e_hmc.h 284049 2015-06-05 22:52:42Z jfv $*/
+
+#ifndef _I40E_HMC_H_
+#define _I40E_HMC_H_
+
+#define I40E_HMC_MAX_BP_COUNT 512
+
+/* forward-declare the HW struct for the compiler */
+struct i40e_hw;
+
+#define I40E_HMC_INFO_SIGNATURE 0x484D5347 /* HMSG */
+#define I40E_HMC_PD_CNT_IN_SD 512
+#define I40E_HMC_DIRECT_BP_SIZE 0x200000 /* 2M */
+#define I40E_HMC_PAGED_BP_SIZE 4096
+#define I40E_HMC_PD_BP_BUF_ALIGNMENT 4096
+#define I40E_FIRST_VF_FPM_ID 16
+
+struct i40e_hmc_obj_info {
+ u64 base; /* base addr in FPM */
+ u32 max_cnt; /* max count available for this hmc func */
+ u32 cnt; /* count of objects driver actually wants to create */
+ u64 size; /* size in bytes of one object */
+};
+
+enum i40e_sd_entry_type {
+ I40E_SD_TYPE_INVALID = 0,
+ I40E_SD_TYPE_PAGED = 1,
+ I40E_SD_TYPE_DIRECT = 2
+};
+
+struct i40e_hmc_bp {
+ enum i40e_sd_entry_type entry_type;
+ struct i40e_dma_mem addr; /* populate to be used by hw */
+ u32 sd_pd_index;
+ u32 ref_cnt;
+};
+
+struct i40e_hmc_pd_entry {
+ struct i40e_hmc_bp bp;
+ u32 sd_index;
+ bool rsrc_pg;
+ bool valid;
+};
+
+struct i40e_hmc_pd_table {
+ struct i40e_dma_mem pd_page_addr; /* populate to be used by hw */
+ struct i40e_hmc_pd_entry *pd_entry; /* [512] for sw book keeping */
+ struct i40e_virt_mem pd_entry_virt_mem; /* virt mem for pd_entry */
+
+ u32 ref_cnt;
+ u32 sd_index;
+};
+
+struct i40e_hmc_sd_entry {
+ enum i40e_sd_entry_type entry_type;
+ bool valid;
+
+ union {
+ struct i40e_hmc_pd_table pd_table;
+ struct i40e_hmc_bp bp;
+ } u;
+};
+
+struct i40e_hmc_sd_table {
+ struct i40e_virt_mem addr; /* used to track sd_entry allocations */
+ u32 sd_cnt;
+ u32 ref_cnt;
+ struct i40e_hmc_sd_entry *sd_entry; /* (sd_cnt*512) entries max */
+};
+
+struct i40e_hmc_info {
+ u32 signature;
+ /* equals to pci func num for PF and dynamically allocated for VFs */
+ u8 hmc_fn_id;
+ u16 first_sd_index; /* index of the first available SD */
+
+ /* hmc objects */
+ struct i40e_hmc_obj_info *hmc_obj;
+ struct i40e_virt_mem hmc_obj_virt_mem;
+ struct i40e_hmc_sd_table sd_table;
+};
+
+#define I40E_INC_SD_REFCNT(sd_table) ((sd_table)->ref_cnt++)
+#define I40E_INC_PD_REFCNT(pd_table) ((pd_table)->ref_cnt++)
+#define I40E_INC_BP_REFCNT(bp) ((bp)->ref_cnt++)
+
+#define I40E_DEC_SD_REFCNT(sd_table) ((sd_table)->ref_cnt--)
+#define I40E_DEC_PD_REFCNT(pd_table) ((pd_table)->ref_cnt--)
+#define I40E_DEC_BP_REFCNT(bp) ((bp)->ref_cnt--)
+
+/**
+ * I40E_SET_PF_SD_ENTRY - marks the sd entry as valid in the hardware
+ * @hw: pointer to our hw struct
+ * @pa: pointer to physical address
+ * @sd_index: segment descriptor index
+ * @type: if sd entry is direct or paged
+ **/
+#define I40E_SET_PF_SD_ENTRY(hw, pa, sd_index, type) \
+{ \
+ u32 val1, val2, val3; \
+ val1 = (u32)(I40E_HI_DWORD(pa)); \
+ val2 = (u32)(pa) | (I40E_HMC_MAX_BP_COUNT << \
+ I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT) | \
+ ((((type) == I40E_SD_TYPE_PAGED) ? 0 : 1) << \
+ I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT) | \
+ BIT(I40E_PFHMC_SDDATALOW_PMSDVALID_SHIFT); \
+ val3 = (sd_index) | BIT_ULL(I40E_PFHMC_SDCMD_PMSDWR_SHIFT); \
+ wr32((hw), I40E_PFHMC_SDDATAHIGH, val1); \
+ wr32((hw), I40E_PFHMC_SDDATALOW, val2); \
+ wr32((hw), I40E_PFHMC_SDCMD, val3); \
+}
+
+/**
+ * I40E_CLEAR_PF_SD_ENTRY - marks the sd entry as invalid in the hardware
+ * @hw: pointer to our hw struct
+ * @sd_index: segment descriptor index
+ * @type: if sd entry is direct or paged
+ **/
+#define I40E_CLEAR_PF_SD_ENTRY(hw, sd_index, type) \
+{ \
+ u32 val2, val3; \
+ val2 = (I40E_HMC_MAX_BP_COUNT << \
+ I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT) | \
+ ((((type) == I40E_SD_TYPE_PAGED) ? 0 : 1) << \
+ I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT); \
+ val3 = (sd_index) | BIT_ULL(I40E_PFHMC_SDCMD_PMSDWR_SHIFT); \
+ wr32((hw), I40E_PFHMC_SDDATAHIGH, 0); \
+ wr32((hw), I40E_PFHMC_SDDATALOW, val2); \
+ wr32((hw), I40E_PFHMC_SDCMD, val3); \
+}
+
+/**
+ * I40E_INVALIDATE_PF_HMC_PD - Invalidates the pd cache in the hardware
+ * @hw: pointer to our hw struct
+ * @sd_idx: segment descriptor index
+ * @pd_idx: page descriptor index
+ **/
+#define I40E_INVALIDATE_PF_HMC_PD(hw, sd_idx, pd_idx) \
+ wr32((hw), I40E_PFHMC_PDINV, \
+ (((sd_idx) << I40E_PFHMC_PDINV_PMSDIDX_SHIFT) | \
+ ((pd_idx) << I40E_PFHMC_PDINV_PMPDIDX_SHIFT)))
+
+/**
+ * I40E_FIND_SD_INDEX_LIMIT - finds segment descriptor index limit
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @type: type of HMC resources we're searching
+ * @index: starting index for the object
+ * @cnt: number of objects we're trying to create
+ * @sd_idx: pointer to return index of the segment descriptor in question
+ * @sd_limit: pointer to return the maximum number of segment descriptors
+ *
+ * This function calculates the segment descriptor index and index limit
+ * for the resource defined by i40e_hmc_rsrc_type.
+ **/
+#define I40E_FIND_SD_INDEX_LIMIT(hmc_info, type, index, cnt, sd_idx, sd_limit)\
+{ \
+ u64 fpm_addr, fpm_limit; \
+ fpm_addr = (hmc_info)->hmc_obj[(type)].base + \
+ (hmc_info)->hmc_obj[(type)].size * (index); \
+ fpm_limit = fpm_addr + (hmc_info)->hmc_obj[(type)].size * (cnt);\
+ *(sd_idx) = (u32)(fpm_addr / I40E_HMC_DIRECT_BP_SIZE); \
+ *(sd_limit) = (u32)((fpm_limit - 1) / I40E_HMC_DIRECT_BP_SIZE); \
+ /* add one more to the limit to correct our range */ \
+ *(sd_limit) += 1; \
+}
+
+/**
+ * I40E_FIND_PD_INDEX_LIMIT - finds page descriptor index limit
+ * @hmc_info: pointer to the HMC configuration information struct
+ * @type: HMC resource type we're examining
+ * @idx: starting index for the object
+ * @cnt: number of objects we're trying to create
+ * @pd_index: pointer to return page descriptor index
+ * @pd_limit: pointer to return page descriptor index limit
+ *
+ * Calculates the page descriptor index and index limit for the resource
+ * defined by i40e_hmc_rsrc_type.
+ **/
+#define I40E_FIND_PD_INDEX_LIMIT(hmc_info, type, idx, cnt, pd_index, pd_limit)\
+{ \
+ u64 fpm_adr, fpm_limit; \
+ fpm_adr = (hmc_info)->hmc_obj[(type)].base + \
+ (hmc_info)->hmc_obj[(type)].size * (idx); \
+ fpm_limit = fpm_adr + (hmc_info)->hmc_obj[(type)].size * (cnt); \
+ *(pd_index) = (u32)(fpm_adr / I40E_HMC_PAGED_BP_SIZE); \
+ *(pd_limit) = (u32)((fpm_limit - 1) / I40E_HMC_PAGED_BP_SIZE); \
+ /* add one more to the limit to correct our range */ \
+ *(pd_limit) += 1; \
+}
+enum i40e_status_code i40e_add_sd_table_entry(struct i40e_hw *hw,
+ struct i40e_hmc_info *hmc_info,
+ u32 sd_index,
+ enum i40e_sd_entry_type type,
+ u64 direct_mode_sz);
+
+enum i40e_status_code i40e_add_pd_table_entry(struct i40e_hw *hw,
+ struct i40e_hmc_info *hmc_info,
+ u32 pd_index,
+ struct i40e_dma_mem *rsrc_pg);
+enum i40e_status_code i40e_remove_pd_bp(struct i40e_hw *hw,
+ struct i40e_hmc_info *hmc_info,
+ u32 idx);
+enum i40e_status_code i40e_prep_remove_sd_bp(struct i40e_hmc_info *hmc_info,
+ u32 idx);
+enum i40e_status_code i40e_remove_sd_bp_new(struct i40e_hw *hw,
+ struct i40e_hmc_info *hmc_info,
+ u32 idx, bool is_pf);
+enum i40e_status_code i40e_prep_remove_pd_page(struct i40e_hmc_info *hmc_info,
+ u32 idx);
+enum i40e_status_code i40e_remove_pd_page_new(struct i40e_hw *hw,
+ struct i40e_hmc_info *hmc_info,
+ u32 idx, bool is_pf);
+
+#endif /* _I40E_HMC_H_ */
diff --git a/usr/src/uts/common/io/i40e/core/i40e_lan_hmc.c b/usr/src/uts/common/io/i40e/core/i40e_lan_hmc.c
new file mode 100644
index 0000000000..2b2fa4f8f9
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/core/i40e_lan_hmc.c
@@ -0,0 +1,1412 @@
+/******************************************************************************
+
+ Copyright (c) 2013-2015, Intel Corporation
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+
+******************************************************************************/
+/*$FreeBSD: head/sys/dev/ixl/i40e_lan_hmc.c 284049 2015-06-05 22:52:42Z jfv $*/
+
+#include "i40e_osdep.h"
+#include "i40e_register.h"
+#include "i40e_type.h"
+#include "i40e_hmc.h"
+#include "i40e_lan_hmc.h"
+#include "i40e_prototype.h"
+
+/* lan specific interface functions */
+
+/**
+ * i40e_align_l2obj_base - aligns base object pointer to 512 bytes
+ * @offset: base address offset needing alignment
+ *
+ * Aligns the layer 2 function private memory so it's 512-byte aligned.
+ **/
+static u64 i40e_align_l2obj_base(u64 offset)
+{
+ u64 aligned_offset = offset;
+
+ if ((offset % I40E_HMC_L2OBJ_BASE_ALIGNMENT) > 0)
+ aligned_offset += (I40E_HMC_L2OBJ_BASE_ALIGNMENT -
+ (offset % I40E_HMC_L2OBJ_BASE_ALIGNMENT));
+
+ return aligned_offset;
+}
+
+/**
+ * i40e_calculate_l2fpm_size - calculates layer 2 FPM memory size
+ * @txq_num: number of Tx queues needing backing context
+ * @rxq_num: number of Rx queues needing backing context
+ * @fcoe_cntx_num: amount of FCoE statefull contexts needing backing context
+ * @fcoe_filt_num: number of FCoE filters needing backing context
+ *
+ * Calculates the maximum amount of memory for the function required, based
+ * on the number of resources it must provide context for.
+ **/
+u64 i40e_calculate_l2fpm_size(u32 txq_num, u32 rxq_num,
+ u32 fcoe_cntx_num, u32 fcoe_filt_num)
+{
+ u64 fpm_size = 0;
+
+ fpm_size = txq_num * I40E_HMC_OBJ_SIZE_TXQ;
+ fpm_size = i40e_align_l2obj_base(fpm_size);
+
+ fpm_size += (rxq_num * I40E_HMC_OBJ_SIZE_RXQ);
+ fpm_size = i40e_align_l2obj_base(fpm_size);
+
+ fpm_size += (fcoe_cntx_num * I40E_HMC_OBJ_SIZE_FCOE_CNTX);
+ fpm_size = i40e_align_l2obj_base(fpm_size);
+
+ fpm_size += (fcoe_filt_num * I40E_HMC_OBJ_SIZE_FCOE_FILT);
+ fpm_size = i40e_align_l2obj_base(fpm_size);
+
+ return fpm_size;
+}
+
+/**
+ * i40e_init_lan_hmc - initialize i40e_hmc_info struct
+ * @hw: pointer to the HW structure
+ * @txq_num: number of Tx queues needing backing context
+ * @rxq_num: number of Rx queues needing backing context
+ * @fcoe_cntx_num: amount of FCoE statefull contexts needing backing context
+ * @fcoe_filt_num: number of FCoE filters needing backing context
+ *
+ * This function will be called once per physical function initialization.
+ * It will fill out the i40e_hmc_obj_info structure for LAN objects based on
+ * the driver's provided input, as well as information from the HMC itself
+ * loaded from NVRAM.
+ *
+ * Assumptions:
+ * - HMC Resource Profile has been selected before calling this function.
+ **/
+enum i40e_status_code i40e_init_lan_hmc(struct i40e_hw *hw, u32 txq_num,
+ u32 rxq_num, u32 fcoe_cntx_num,
+ u32 fcoe_filt_num)
+{
+ struct i40e_hmc_obj_info *obj, *full_obj;
+ enum i40e_status_code ret_code = I40E_SUCCESS;
+ u64 l2fpm_size;
+ u32 size_exp;
+
+ hw->hmc.signature = I40E_HMC_INFO_SIGNATURE;
+ hw->hmc.hmc_fn_id = hw->pf_id;
+
+ /* allocate memory for hmc_obj */
+ ret_code = i40e_allocate_virt_mem(hw, &hw->hmc.hmc_obj_virt_mem,
+ sizeof(struct i40e_hmc_obj_info) * I40E_HMC_LAN_MAX);
+ if (ret_code)
+ goto init_lan_hmc_out;
+ hw->hmc.hmc_obj = (struct i40e_hmc_obj_info *)
+ hw->hmc.hmc_obj_virt_mem.va;
+
+ /* The full object will be used to create the LAN HMC SD */
+ full_obj = &hw->hmc.hmc_obj[I40E_HMC_LAN_FULL];
+ full_obj->max_cnt = 0;
+ full_obj->cnt = 0;
+ full_obj->base = 0;
+ full_obj->size = 0;
+
+ /* Tx queue context information */
+ obj = &hw->hmc.hmc_obj[I40E_HMC_LAN_TX];
+ obj->max_cnt = rd32(hw, I40E_GLHMC_LANQMAX);
+ obj->cnt = txq_num;
+ obj->base = 0;
+ size_exp = rd32(hw, I40E_GLHMC_LANTXOBJSZ);
+ obj->size = BIT_ULL(size_exp);
+
+ /* validate values requested by driver don't exceed HMC capacity */
+ if (txq_num > obj->max_cnt) {
+ ret_code = I40E_ERR_INVALID_HMC_OBJ_COUNT;
+ DEBUGOUT3("i40e_init_lan_hmc: Tx context: asks for 0x%x but max allowed is 0x%x, returns error %d\n",
+ txq_num, obj->max_cnt, ret_code);
+ goto init_lan_hmc_out;
+ }
+
+ /* aggregate values into the full LAN object for later */
+ full_obj->max_cnt += obj->max_cnt;
+ full_obj->cnt += obj->cnt;
+
+ /* Rx queue context information */
+ obj = &hw->hmc.hmc_obj[I40E_HMC_LAN_RX];
+ obj->max_cnt = rd32(hw, I40E_GLHMC_LANQMAX);
+ obj->cnt = rxq_num;
+ obj->base = hw->hmc.hmc_obj[I40E_HMC_LAN_TX].base +
+ (hw->hmc.hmc_obj[I40E_HMC_LAN_TX].cnt *
+ hw->hmc.hmc_obj[I40E_HMC_LAN_TX].size);
+ obj->base = i40e_align_l2obj_base(obj->base);
+ size_exp = rd32(hw, I40E_GLHMC_LANRXOBJSZ);
+ obj->size = BIT_ULL(size_exp);
+
+ /* validate values requested by driver don't exceed HMC capacity */
+ if (rxq_num > obj->max_cnt) {
+ ret_code = I40E_ERR_INVALID_HMC_OBJ_COUNT;
+ DEBUGOUT3("i40e_init_lan_hmc: Rx context: asks for 0x%x but max allowed is 0x%x, returns error %d\n",
+ rxq_num, obj->max_cnt, ret_code);
+ goto init_lan_hmc_out;
+ }
+
+ /* aggregate values into the full LAN object for later */
+ full_obj->max_cnt += obj->max_cnt;
+ full_obj->cnt += obj->cnt;
+
+ /* FCoE context information */
+ obj = &hw->hmc.hmc_obj[I40E_HMC_FCOE_CTX];
+ obj->max_cnt = rd32(hw, I40E_GLHMC_FCOEMAX);
+ obj->cnt = fcoe_cntx_num;
+ obj->base = hw->hmc.hmc_obj[I40E_HMC_LAN_RX].base +
+ (hw->hmc.hmc_obj[I40E_HMC_LAN_RX].cnt *
+ hw->hmc.hmc_obj[I40E_HMC_LAN_RX].size);
+ obj->base = i40e_align_l2obj_base(obj->base);
+ size_exp = rd32(hw, I40E_GLHMC_FCOEDDPOBJSZ);
+ obj->size = BIT_ULL(size_exp);
+
+ /* validate values requested by driver don't exceed HMC capacity */
+ if (fcoe_cntx_num > obj->max_cnt) {
+ ret_code = I40E_ERR_INVALID_HMC_OBJ_COUNT;
+ DEBUGOUT3("i40e_init_lan_hmc: FCoE context: asks for 0x%x but max allowed is 0x%x, returns error %d\n",
+ fcoe_cntx_num, obj->max_cnt, ret_code);
+ goto init_lan_hmc_out;
+ }
+
+ /* aggregate values into the full LAN object for later */
+ full_obj->max_cnt += obj->max_cnt;
+ full_obj->cnt += obj->cnt;
+
+ /* FCoE filter information */
+ obj = &hw->hmc.hmc_obj[I40E_HMC_FCOE_FILT];
+ obj->max_cnt = rd32(hw, I40E_GLHMC_FCOEFMAX);
+ obj->cnt = fcoe_filt_num;
+ obj->base = hw->hmc.hmc_obj[I40E_HMC_FCOE_CTX].base +
+ (hw->hmc.hmc_obj[I40E_HMC_FCOE_CTX].cnt *
+ hw->hmc.hmc_obj[I40E_HMC_FCOE_CTX].size);
+ obj->base = i40e_align_l2obj_base(obj->base);
+ size_exp = rd32(hw, I40E_GLHMC_FCOEFOBJSZ);
+ obj->size = BIT_ULL(size_exp);
+
+ /* validate values requested by driver don't exceed HMC capacity */
+ if (fcoe_filt_num > obj->max_cnt) {
+ ret_code = I40E_ERR_INVALID_HMC_OBJ_COUNT;
+ DEBUGOUT3("i40e_init_lan_hmc: FCoE filter: asks for 0x%x but max allowed is 0x%x, returns error %d\n",
+ fcoe_filt_num, obj->max_cnt, ret_code);
+ goto init_lan_hmc_out;
+ }
+
+ /* aggregate values into the full LAN object for later */
+ full_obj->max_cnt += obj->max_cnt;
+ full_obj->cnt += obj->cnt;
+
+ hw->hmc.first_sd_index = 0;
+ hw->hmc.sd_table.ref_cnt = 0;
+ l2fpm_size = i40e_calculate_l2fpm_size(txq_num, rxq_num, fcoe_cntx_num,
+ fcoe_filt_num);
+ if (NULL == hw->hmc.sd_table.sd_entry) {
+ hw->hmc.sd_table.sd_cnt = (u32)
+ (l2fpm_size + I40E_HMC_DIRECT_BP_SIZE - 1) /
+ I40E_HMC_DIRECT_BP_SIZE;
+
+ /* allocate the sd_entry members in the sd_table */
+ ret_code = i40e_allocate_virt_mem(hw, &hw->hmc.sd_table.addr,
+ (sizeof(struct i40e_hmc_sd_entry) *
+ hw->hmc.sd_table.sd_cnt));
+ if (ret_code)
+ goto init_lan_hmc_out;
+ hw->hmc.sd_table.sd_entry =
+ (struct i40e_hmc_sd_entry *)hw->hmc.sd_table.addr.va;
+ }
+ /* store in the LAN full object for later */
+ full_obj->size = l2fpm_size;
+
+init_lan_hmc_out:
+ return ret_code;
+}
+
+/**
+ * i40e_remove_pd_page - Remove a page from the page descriptor table
+ * @hw: pointer to the HW structure
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: segment descriptor index to find the relevant page descriptor
+ *
+ * This function:
+ * 1. Marks the entry in pd table (for paged address mode) invalid
+ * 2. write to register PMPDINV to invalidate the backing page in FV cache
+ * 3. Decrement the ref count for pd_entry
+ * assumptions:
+ * 1. caller can deallocate the memory used by pd after this function
+ * returns.
+ **/
+static enum i40e_status_code i40e_remove_pd_page(struct i40e_hw *hw,
+ struct i40e_hmc_info *hmc_info,
+ u32 idx)
+{
+ enum i40e_status_code ret_code = I40E_SUCCESS;
+
+ if (i40e_prep_remove_pd_page(hmc_info, idx) == I40E_SUCCESS)
+ ret_code = i40e_remove_pd_page_new(hw, hmc_info, idx, TRUE);
+
+ return ret_code;
+}
+
+/**
+ * i40e_remove_sd_bp - remove a backing page from a segment descriptor
+ * @hw: pointer to our HW structure
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: the page index
+ *
+ * This function:
+ * 1. Marks the entry in sd table (for direct address mode) invalid
+ * 2. write to register PMSDCMD, PMSDDATALOW(PMSDDATALOW.PMSDVALID set
+ * to 0) and PMSDDATAHIGH to invalidate the sd page
+ * 3. Decrement the ref count for the sd_entry
+ * assumptions:
+ * 1. caller can deallocate the memory used by backing storage after this
+ * function returns.
+ **/
+static enum i40e_status_code i40e_remove_sd_bp(struct i40e_hw *hw,
+ struct i40e_hmc_info *hmc_info,
+ u32 idx)
+{
+ enum i40e_status_code ret_code = I40E_SUCCESS;
+
+ if (i40e_prep_remove_sd_bp(hmc_info, idx) == I40E_SUCCESS)
+ ret_code = i40e_remove_sd_bp_new(hw, hmc_info, idx, TRUE);
+
+ return ret_code;
+}
+
+/**
+ * i40e_create_lan_hmc_object - allocate backing store for hmc objects
+ * @hw: pointer to the HW structure
+ * @info: pointer to i40e_hmc_create_obj_info struct
+ *
+ * This will allocate memory for PDs and backing pages and populate
+ * the sd and pd entries.
+ **/
+enum i40e_status_code i40e_create_lan_hmc_object(struct i40e_hw *hw,
+ struct i40e_hmc_lan_create_obj_info *info)
+{
+ enum i40e_status_code ret_code = I40E_SUCCESS;
+ struct i40e_hmc_sd_entry *sd_entry;
+ u32 pd_idx1 = 0, pd_lmt1 = 0;
+ u32 pd_idx = 0, pd_lmt = 0;
+ bool pd_error = FALSE;
+ u32 sd_idx, sd_lmt;
+ u64 sd_size;
+ u32 i, j;
+
+ if (NULL == info) {
+ ret_code = I40E_ERR_BAD_PTR;
+ DEBUGOUT("i40e_create_lan_hmc_object: bad info ptr\n");
+ goto exit;
+ }
+ if (NULL == info->hmc_info) {
+ ret_code = I40E_ERR_BAD_PTR;
+ DEBUGOUT("i40e_create_lan_hmc_object: bad hmc_info ptr\n");
+ goto exit;
+ }
+ if (I40E_HMC_INFO_SIGNATURE != info->hmc_info->signature) {
+ ret_code = I40E_ERR_BAD_PTR;
+ DEBUGOUT("i40e_create_lan_hmc_object: bad signature\n");
+ goto exit;
+ }
+
+ if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
+ ret_code = I40E_ERR_INVALID_HMC_OBJ_INDEX;
+ DEBUGOUT1("i40e_create_lan_hmc_object: returns error %d\n",
+ ret_code);
+ goto exit;
+ }
+ if ((info->start_idx + info->count) >
+ info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
+ ret_code = I40E_ERR_INVALID_HMC_OBJ_COUNT;
+ DEBUGOUT1("i40e_create_lan_hmc_object: returns error %d\n",
+ ret_code);
+ goto exit;
+ }
+
+ /* find sd index and limit */
+ I40E_FIND_SD_INDEX_LIMIT(info->hmc_info, info->rsrc_type,
+ info->start_idx, info->count,
+ &sd_idx, &sd_lmt);
+ if (sd_idx >= info->hmc_info->sd_table.sd_cnt ||
+ sd_lmt > info->hmc_info->sd_table.sd_cnt) {
+ ret_code = I40E_ERR_INVALID_SD_INDEX;
+ goto exit;
+ }
+ /* find pd index */
+ I40E_FIND_PD_INDEX_LIMIT(info->hmc_info, info->rsrc_type,
+ info->start_idx, info->count, &pd_idx,
+ &pd_lmt);
+
+ /* This is to cover for cases where you may not want to have an SD with
+ * the full 2M memory but something smaller. By not filling out any
+ * size, the function will default the SD size to be 2M.
+ */
+ if (info->direct_mode_sz == 0)
+ sd_size = I40E_HMC_DIRECT_BP_SIZE;
+ else
+ sd_size = info->direct_mode_sz;
+
+ /* check if all the sds are valid. If not, allocate a page and
+ * initialize it.
+ */
+ for (j = sd_idx; j < sd_lmt; j++) {
+ /* update the sd table entry */
+ ret_code = i40e_add_sd_table_entry(hw, info->hmc_info, j,
+ info->entry_type,
+ sd_size);
+ if (I40E_SUCCESS != ret_code)
+ goto exit_sd_error;
+ sd_entry = &info->hmc_info->sd_table.sd_entry[j];
+ if (I40E_SD_TYPE_PAGED == sd_entry->entry_type) {
+ /* check if all the pds in this sd are valid. If not,
+ * allocate a page and initialize it.
+ */
+
+ /* find pd_idx and pd_lmt in this sd */
+ pd_idx1 = max(pd_idx, (j * I40E_HMC_MAX_BP_COUNT));
+ pd_lmt1 = min(pd_lmt,
+ ((j + 1) * I40E_HMC_MAX_BP_COUNT));
+ for (i = pd_idx1; i < pd_lmt1; i++) {
+ /* update the pd table entry */
+ ret_code = i40e_add_pd_table_entry(hw,
+ info->hmc_info,
+ i, NULL);
+ if (I40E_SUCCESS != ret_code) {
+ pd_error = TRUE;
+ break;
+ }
+ }
+ if (pd_error) {
+ /* remove the backing pages from pd_idx1 to i */
+ while (i && (i > pd_idx1)) {
+ i40e_remove_pd_bp(hw, info->hmc_info,
+ (i - 1));
+ i--;
+ }
+ }
+ }
+ if (!sd_entry->valid) {
+ sd_entry->valid = TRUE;
+ switch (sd_entry->entry_type) {
+ case I40E_SD_TYPE_PAGED:
+ I40E_SET_PF_SD_ENTRY(hw,
+ sd_entry->u.pd_table.pd_page_addr.pa,
+ j, sd_entry->entry_type);
+ break;
+ case I40E_SD_TYPE_DIRECT:
+ I40E_SET_PF_SD_ENTRY(hw, sd_entry->u.bp.addr.pa,
+ j, sd_entry->entry_type);
+ break;
+ default:
+ ret_code = I40E_ERR_INVALID_SD_TYPE;
+ goto exit;
+ }
+ }
+ }
+ goto exit;
+
+exit_sd_error:
+ /* cleanup for sd entries from j to sd_idx */
+ while (j && (j > sd_idx)) {
+ sd_entry = &info->hmc_info->sd_table.sd_entry[j - 1];
+ switch (sd_entry->entry_type) {
+ case I40E_SD_TYPE_PAGED:
+ pd_idx1 = max(pd_idx,
+ ((j - 1) * I40E_HMC_MAX_BP_COUNT));
+ pd_lmt1 = min(pd_lmt, (j * I40E_HMC_MAX_BP_COUNT));
+ for (i = pd_idx1; i < pd_lmt1; i++)
+ i40e_remove_pd_bp(hw, info->hmc_info, i);
+ i40e_remove_pd_page(hw, info->hmc_info, (j - 1));
+ break;
+ case I40E_SD_TYPE_DIRECT:
+ i40e_remove_sd_bp(hw, info->hmc_info, (j - 1));
+ break;
+ default:
+ ret_code = I40E_ERR_INVALID_SD_TYPE;
+ break;
+ }
+ j--;
+ }
+exit:
+ return ret_code;
+}
+
+/**
+ * i40e_configure_lan_hmc - prepare the HMC backing store
+ * @hw: pointer to the hw structure
+ * @model: the model for the layout of the SD/PD tables
+ *
+ * - This function will be called once per physical function initialization.
+ * - This function will be called after i40e_init_lan_hmc() and before
+ * any LAN/FCoE HMC objects can be created.
+ **/
+enum i40e_status_code i40e_configure_lan_hmc(struct i40e_hw *hw,
+ enum i40e_hmc_model model)
+{
+ struct i40e_hmc_lan_create_obj_info info;
+ u8 hmc_fn_id = hw->hmc.hmc_fn_id;
+ struct i40e_hmc_obj_info *obj;
+ enum i40e_status_code ret_code = I40E_SUCCESS;
+
+ /* Initialize part of the create object info struct */
+ info.hmc_info = &hw->hmc;
+ info.rsrc_type = I40E_HMC_LAN_FULL;
+ info.start_idx = 0;
+ info.direct_mode_sz = hw->hmc.hmc_obj[I40E_HMC_LAN_FULL].size;
+
+ /* Build the SD entry for the LAN objects */
+ switch (model) {
+ case I40E_HMC_MODEL_DIRECT_PREFERRED:
+ case I40E_HMC_MODEL_DIRECT_ONLY:
+ info.entry_type = I40E_SD_TYPE_DIRECT;
+ /* Make one big object, a single SD */
+ info.count = 1;
+ ret_code = i40e_create_lan_hmc_object(hw, &info);
+ if ((ret_code != I40E_SUCCESS) && (model == I40E_HMC_MODEL_DIRECT_PREFERRED))
+ goto try_type_paged;
+ else if (ret_code != I40E_SUCCESS)
+ goto configure_lan_hmc_out;
+ /* else clause falls through the break */
+ break;
+ case I40E_HMC_MODEL_PAGED_ONLY:
+try_type_paged:
+ info.entry_type = I40E_SD_TYPE_PAGED;
+ /* Make one big object in the PD table */
+ info.count = 1;
+ ret_code = i40e_create_lan_hmc_object(hw, &info);
+ if (ret_code != I40E_SUCCESS)
+ goto configure_lan_hmc_out;
+ break;
+ default:
+ /* unsupported type */
+ ret_code = I40E_ERR_INVALID_SD_TYPE;
+ DEBUGOUT1("i40e_configure_lan_hmc: Unknown SD type: %d\n",
+ ret_code);
+ goto configure_lan_hmc_out;
+ }
+
+ /* Configure and program the FPM registers so objects can be created */
+
+ /* Tx contexts */
+ obj = &hw->hmc.hmc_obj[I40E_HMC_LAN_TX];
+ wr32(hw, I40E_GLHMC_LANTXBASE(hmc_fn_id),
+ (u32)((obj->base & I40E_GLHMC_LANTXBASE_FPMLANTXBASE_MASK) / 512));
+ wr32(hw, I40E_GLHMC_LANTXCNT(hmc_fn_id), obj->cnt);
+
+ /* Rx contexts */
+ obj = &hw->hmc.hmc_obj[I40E_HMC_LAN_RX];
+ wr32(hw, I40E_GLHMC_LANRXBASE(hmc_fn_id),
+ (u32)((obj->base & I40E_GLHMC_LANRXBASE_FPMLANRXBASE_MASK) / 512));
+ wr32(hw, I40E_GLHMC_LANRXCNT(hmc_fn_id), obj->cnt);
+
+ /* FCoE contexts */
+ obj = &hw->hmc.hmc_obj[I40E_HMC_FCOE_CTX];
+ wr32(hw, I40E_GLHMC_FCOEDDPBASE(hmc_fn_id),
+ (u32)((obj->base & I40E_GLHMC_FCOEDDPBASE_FPMFCOEDDPBASE_MASK) / 512));
+ wr32(hw, I40E_GLHMC_FCOEDDPCNT(hmc_fn_id), obj->cnt);
+
+ /* FCoE filters */
+ obj = &hw->hmc.hmc_obj[I40E_HMC_FCOE_FILT];
+ wr32(hw, I40E_GLHMC_FCOEFBASE(hmc_fn_id),
+ (u32)((obj->base & I40E_GLHMC_FCOEFBASE_FPMFCOEFBASE_MASK) / 512));
+ wr32(hw, I40E_GLHMC_FCOEFCNT(hmc_fn_id), obj->cnt);
+
+configure_lan_hmc_out:
+ return ret_code;
+}
+
+/**
+ * i40e_delete_hmc_object - remove hmc objects
+ * @hw: pointer to the HW structure
+ * @info: pointer to i40e_hmc_delete_obj_info struct
+ *
+ * This will de-populate the SDs and PDs. It frees
+ * the memory for PDS and backing storage. After this function is returned,
+ * caller should deallocate memory allocated previously for
+ * book-keeping information about PDs and backing storage.
+ **/
+enum i40e_status_code i40e_delete_lan_hmc_object(struct i40e_hw *hw,
+ struct i40e_hmc_lan_delete_obj_info *info)
+{
+ enum i40e_status_code ret_code = I40E_SUCCESS;
+ struct i40e_hmc_pd_table *pd_table;
+ u32 pd_idx, pd_lmt, rel_pd_idx;
+ u32 sd_idx, sd_lmt;
+ u32 i, j;
+
+ if (NULL == info) {
+ ret_code = I40E_ERR_BAD_PTR;
+ DEBUGOUT("i40e_delete_hmc_object: bad info ptr\n");
+ goto exit;
+ }
+ if (NULL == info->hmc_info) {
+ ret_code = I40E_ERR_BAD_PTR;
+ DEBUGOUT("i40e_delete_hmc_object: bad info->hmc_info ptr\n");
+ goto exit;
+ }
+ if (I40E_HMC_INFO_SIGNATURE != info->hmc_info->signature) {
+ ret_code = I40E_ERR_BAD_PTR;
+ DEBUGOUT("i40e_delete_hmc_object: bad hmc_info->signature\n");
+ goto exit;
+ }
+
+ if (NULL == info->hmc_info->sd_table.sd_entry) {
+ ret_code = I40E_ERR_BAD_PTR;
+ DEBUGOUT("i40e_delete_hmc_object: bad sd_entry\n");
+ goto exit;
+ }
+
+ if (NULL == info->hmc_info->hmc_obj) {
+ ret_code = I40E_ERR_BAD_PTR;
+ DEBUGOUT("i40e_delete_hmc_object: bad hmc_info->hmc_obj\n");
+ goto exit;
+ }
+ if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
+ ret_code = I40E_ERR_INVALID_HMC_OBJ_INDEX;
+ DEBUGOUT1("i40e_delete_hmc_object: returns error %d\n",
+ ret_code);
+ goto exit;
+ }
+
+ if ((info->start_idx + info->count) >
+ info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
+ ret_code = I40E_ERR_INVALID_HMC_OBJ_COUNT;
+ DEBUGOUT1("i40e_delete_hmc_object: returns error %d\n",
+ ret_code);
+ goto exit;
+ }
+
+ I40E_FIND_PD_INDEX_LIMIT(info->hmc_info, info->rsrc_type,
+ info->start_idx, info->count, &pd_idx,
+ &pd_lmt);
+
+ for (j = pd_idx; j < pd_lmt; j++) {
+ sd_idx = j / I40E_HMC_PD_CNT_IN_SD;
+
+ if (I40E_SD_TYPE_PAGED !=
+ info->hmc_info->sd_table.sd_entry[sd_idx].entry_type)
+ continue;
+
+ rel_pd_idx = j % I40E_HMC_PD_CNT_IN_SD;
+
+ pd_table =
+ &info->hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
+ if (pd_table->pd_entry[rel_pd_idx].valid) {
+ ret_code = i40e_remove_pd_bp(hw, info->hmc_info, j);
+ if (I40E_SUCCESS != ret_code)
+ goto exit;
+ }
+ }
+
+ /* find sd index and limit */
+ I40E_FIND_SD_INDEX_LIMIT(info->hmc_info, info->rsrc_type,
+ info->start_idx, info->count,
+ &sd_idx, &sd_lmt);
+ if (sd_idx >= info->hmc_info->sd_table.sd_cnt ||
+ sd_lmt > info->hmc_info->sd_table.sd_cnt) {
+ ret_code = I40E_ERR_INVALID_SD_INDEX;
+ goto exit;
+ }
+
+ for (i = sd_idx; i < sd_lmt; i++) {
+ if (!info->hmc_info->sd_table.sd_entry[i].valid)
+ continue;
+ switch (info->hmc_info->sd_table.sd_entry[i].entry_type) {
+ case I40E_SD_TYPE_DIRECT:
+ ret_code = i40e_remove_sd_bp(hw, info->hmc_info, i);
+ if (I40E_SUCCESS != ret_code)
+ goto exit;
+ break;
+ case I40E_SD_TYPE_PAGED:
+ ret_code = i40e_remove_pd_page(hw, info->hmc_info, i);
+ if (I40E_SUCCESS != ret_code)
+ goto exit;
+ break;
+ default:
+ break;
+ }
+ }
+exit:
+ return ret_code;
+}
+
+/**
+ * i40e_shutdown_lan_hmc - Remove HMC backing store, free allocated memory
+ * @hw: pointer to the hw structure
+ *
+ * This must be called by drivers as they are shutting down and being
+ * removed from the OS.
+ **/
+enum i40e_status_code i40e_shutdown_lan_hmc(struct i40e_hw *hw)
+{
+ struct i40e_hmc_lan_delete_obj_info info;
+ enum i40e_status_code ret_code;
+
+ info.hmc_info = &hw->hmc;
+ info.rsrc_type = I40E_HMC_LAN_FULL;
+ info.start_idx = 0;
+ info.count = 1;
+
+ /* delete the object */
+ ret_code = i40e_delete_lan_hmc_object(hw, &info);
+
+ /* free the SD table entry for LAN */
+ i40e_free_virt_mem(hw, &hw->hmc.sd_table.addr);
+ hw->hmc.sd_table.sd_cnt = 0;
+ hw->hmc.sd_table.sd_entry = NULL;
+
+ /* free memory used for hmc_obj */
+ i40e_free_virt_mem(hw, &hw->hmc.hmc_obj_virt_mem);
+ hw->hmc.hmc_obj = NULL;
+
+ return ret_code;
+}
+
+#define I40E_HMC_STORE(_struct, _ele) \
+ offsetof(struct _struct, _ele), \
+ FIELD_SIZEOF(struct _struct, _ele)
+
+struct i40e_context_ele {
+ u16 offset;
+ u16 size_of;
+ u16 width;
+ u16 lsb;
+};
+
+/* LAN Tx Queue Context */
+static struct i40e_context_ele i40e_hmc_txq_ce_info[] = {
+ /* Field Width LSB */
+ {I40E_HMC_STORE(i40e_hmc_obj_txq, head), 13, 0 },
+ {I40E_HMC_STORE(i40e_hmc_obj_txq, new_context), 1, 30 },
+ {I40E_HMC_STORE(i40e_hmc_obj_txq, base), 57, 32 },
+ {I40E_HMC_STORE(i40e_hmc_obj_txq, fc_ena), 1, 89 },
+ {I40E_HMC_STORE(i40e_hmc_obj_txq, timesync_ena), 1, 90 },
+ {I40E_HMC_STORE(i40e_hmc_obj_txq, fd_ena), 1, 91 },
+ {I40E_HMC_STORE(i40e_hmc_obj_txq, alt_vlan_ena), 1, 92 },
+ {I40E_HMC_STORE(i40e_hmc_obj_txq, cpuid), 8, 96 },
+/* line 1 */
+ {I40E_HMC_STORE(i40e_hmc_obj_txq, thead_wb), 13, 0 + 128 },
+ {I40E_HMC_STORE(i40e_hmc_obj_txq, head_wb_ena), 1, 32 + 128 },
+ {I40E_HMC_STORE(i40e_hmc_obj_txq, qlen), 13, 33 + 128 },
+ {I40E_HMC_STORE(i40e_hmc_obj_txq, tphrdesc_ena), 1, 46 + 128 },
+ {I40E_HMC_STORE(i40e_hmc_obj_txq, tphrpacket_ena), 1, 47 + 128 },
+ {I40E_HMC_STORE(i40e_hmc_obj_txq, tphwdesc_ena), 1, 48 + 128 },
+ {I40E_HMC_STORE(i40e_hmc_obj_txq, head_wb_addr), 64, 64 + 128 },
+/* line 7 */
+ {I40E_HMC_STORE(i40e_hmc_obj_txq, crc), 32, 0 + (7 * 128) },
+ {I40E_HMC_STORE(i40e_hmc_obj_txq, rdylist), 10, 84 + (7 * 128) },
+ {I40E_HMC_STORE(i40e_hmc_obj_txq, rdylist_act), 1, 94 + (7 * 128) },
+ { 0 }
+};
+
+/* LAN Rx Queue Context */
+static struct i40e_context_ele i40e_hmc_rxq_ce_info[] = {
+ /* Field Width LSB */
+ { I40E_HMC_STORE(i40e_hmc_obj_rxq, head), 13, 0 },
+ { I40E_HMC_STORE(i40e_hmc_obj_rxq, cpuid), 8, 13 },
+ { I40E_HMC_STORE(i40e_hmc_obj_rxq, base), 57, 32 },
+ { I40E_HMC_STORE(i40e_hmc_obj_rxq, qlen), 13, 89 },
+ { I40E_HMC_STORE(i40e_hmc_obj_rxq, dbuff), 7, 102 },
+ { I40E_HMC_STORE(i40e_hmc_obj_rxq, hbuff), 5, 109 },
+ { I40E_HMC_STORE(i40e_hmc_obj_rxq, dtype), 2, 114 },
+ { I40E_HMC_STORE(i40e_hmc_obj_rxq, dsize), 1, 116 },
+ { I40E_HMC_STORE(i40e_hmc_obj_rxq, crcstrip), 1, 117 },
+ { I40E_HMC_STORE(i40e_hmc_obj_rxq, fc_ena), 1, 118 },
+ { I40E_HMC_STORE(i40e_hmc_obj_rxq, l2tsel), 1, 119 },
+ { I40E_HMC_STORE(i40e_hmc_obj_rxq, hsplit_0), 4, 120 },
+ { I40E_HMC_STORE(i40e_hmc_obj_rxq, hsplit_1), 2, 124 },
+ { I40E_HMC_STORE(i40e_hmc_obj_rxq, showiv), 1, 127 },
+ { I40E_HMC_STORE(i40e_hmc_obj_rxq, rxmax), 14, 174 },
+ { I40E_HMC_STORE(i40e_hmc_obj_rxq, tphrdesc_ena), 1, 193 },
+ { I40E_HMC_STORE(i40e_hmc_obj_rxq, tphwdesc_ena), 1, 194 },
+ { I40E_HMC_STORE(i40e_hmc_obj_rxq, tphdata_ena), 1, 195 },
+ { I40E_HMC_STORE(i40e_hmc_obj_rxq, tphhead_ena), 1, 196 },
+ { I40E_HMC_STORE(i40e_hmc_obj_rxq, lrxqthresh), 3, 198 },
+ { I40E_HMC_STORE(i40e_hmc_obj_rxq, prefena), 1, 201 },
+ { 0 }
+};
+
+/**
+ * i40e_write_byte - replace HMC context byte
+ * @hmc_bits: pointer to the HMC memory
+ * @ce_info: a description of the struct to be read from
+ * @src: the struct to be read from
+ **/
+static void i40e_write_byte(u8 *hmc_bits,
+ struct i40e_context_ele *ce_info,
+ u8 *src)
+{
+ u8 src_byte, dest_byte, mask;
+ u8 *from, *dest;
+ u16 shift_width;
+
+ /* copy from the next struct field */
+ from = src + ce_info->offset;
+
+ /* prepare the bits and mask */
+ shift_width = ce_info->lsb % 8;
+ mask = BIT(ce_info->width) - 1;
+
+ src_byte = *from;
+ src_byte &= mask;
+
+ /* shift to correct alignment */
+ mask <<= shift_width;
+ src_byte <<= shift_width;
+
+ /* get the current bits from the target bit string */
+ dest = hmc_bits + (ce_info->lsb / 8);
+
+ i40e_memcpy(&dest_byte, dest, sizeof(dest_byte), I40E_DMA_TO_NONDMA);
+
+ dest_byte &= ~mask; /* get the bits not changing */
+ dest_byte |= src_byte; /* add in the new bits */
+
+ /* put it all back */
+ i40e_memcpy(dest, &dest_byte, sizeof(dest_byte), I40E_NONDMA_TO_DMA);
+}
+
+/**
+ * i40e_write_word - replace HMC context word
+ * @hmc_bits: pointer to the HMC memory
+ * @ce_info: a description of the struct to be read from
+ * @src: the struct to be read from
+ **/
+static void i40e_write_word(u8 *hmc_bits,
+ struct i40e_context_ele *ce_info,
+ u8 *src)
+{
+ u16 src_word, mask;
+ u8 *from, *dest;
+ u16 shift_width;
+ __le16 dest_word;
+
+ /* copy from the next struct field */
+ from = src + ce_info->offset;
+
+ /* prepare the bits and mask */
+ shift_width = ce_info->lsb % 8;
+ mask = BIT(ce_info->width) - 1;
+
+ /* don't swizzle the bits until after the mask because the mask bits
+ * will be in a different bit position on big endian machines
+ */
+ src_word = *(u16 *)from;
+ src_word &= mask;
+
+ /* shift to correct alignment */
+ mask <<= shift_width;
+ src_word <<= shift_width;
+
+ /* get the current bits from the target bit string */
+ dest = hmc_bits + (ce_info->lsb / 8);
+
+ i40e_memcpy(&dest_word, dest, sizeof(dest_word), I40E_DMA_TO_NONDMA);
+
+ dest_word &= ~(CPU_TO_LE16(mask)); /* get the bits not changing */
+ dest_word |= CPU_TO_LE16(src_word); /* add in the new bits */
+
+ /* put it all back */
+ i40e_memcpy(dest, &dest_word, sizeof(dest_word), I40E_NONDMA_TO_DMA);
+}
+
+/**
+ * i40e_write_dword - replace HMC context dword
+ * @hmc_bits: pointer to the HMC memory
+ * @ce_info: a description of the struct to be read from
+ * @src: the struct to be read from
+ **/
+static void i40e_write_dword(u8 *hmc_bits,
+ struct i40e_context_ele *ce_info,
+ u8 *src)
+{
+ u32 src_dword, mask;
+ u8 *from, *dest;
+ u16 shift_width;
+ __le32 dest_dword;
+
+ /* copy from the next struct field */
+ from = src + ce_info->offset;
+
+ /* prepare the bits and mask */
+ shift_width = ce_info->lsb % 8;
+
+ /* if the field width is exactly 32 on an x86 machine, then the shift
+ * operation will not work because the SHL instructions count is masked
+ * to 5 bits so the shift will do nothing
+ */
+ if (ce_info->width < 32)
+ mask = BIT(ce_info->width) - 1;
+ else
+ mask = ~(u32)0;
+
+ /* don't swizzle the bits until after the mask because the mask bits
+ * will be in a different bit position on big endian machines
+ */
+ src_dword = *(u32 *)from;
+ src_dword &= mask;
+
+ /* shift to correct alignment */
+ mask <<= shift_width;
+ src_dword <<= shift_width;
+
+ /* get the current bits from the target bit string */
+ dest = hmc_bits + (ce_info->lsb / 8);
+
+ i40e_memcpy(&dest_dword, dest, sizeof(dest_dword), I40E_DMA_TO_NONDMA);
+
+ dest_dword &= ~(CPU_TO_LE32(mask)); /* get the bits not changing */
+ dest_dword |= CPU_TO_LE32(src_dword); /* add in the new bits */
+
+ /* put it all back */
+ i40e_memcpy(dest, &dest_dword, sizeof(dest_dword), I40E_NONDMA_TO_DMA);
+}
+
+/**
+ * i40e_write_qword - replace HMC context qword
+ * @hmc_bits: pointer to the HMC memory
+ * @ce_info: a description of the struct to be read from
+ * @src: the struct to be read from
+ **/
+static void i40e_write_qword(u8 *hmc_bits,
+ struct i40e_context_ele *ce_info,
+ u8 *src)
+{
+ u64 src_qword, mask;
+ u8 *from, *dest;
+ u16 shift_width;
+ __le64 dest_qword;
+
+ /* copy from the next struct field */
+ from = src + ce_info->offset;
+
+ /* prepare the bits and mask */
+ shift_width = ce_info->lsb % 8;
+
+ /* if the field width is exactly 64 on an x86 machine, then the shift
+ * operation will not work because the SHL instructions count is masked
+ * to 6 bits so the shift will do nothing
+ */
+ if (ce_info->width < 64)
+ mask = BIT_ULL(ce_info->width) - 1;
+ else
+ mask = ~(u64)0;
+
+ /* don't swizzle the bits until after the mask because the mask bits
+ * will be in a different bit position on big endian machines
+ */
+ src_qword = *(u64 *)from;
+ src_qword &= mask;
+
+ /* shift to correct alignment */
+ mask <<= shift_width;
+ src_qword <<= shift_width;
+
+ /* get the current bits from the target bit string */
+ dest = hmc_bits + (ce_info->lsb / 8);
+
+ i40e_memcpy(&dest_qword, dest, sizeof(dest_qword), I40E_DMA_TO_NONDMA);
+
+ dest_qword &= ~(CPU_TO_LE64(mask)); /* get the bits not changing */
+ dest_qword |= CPU_TO_LE64(src_qword); /* add in the new bits */
+
+ /* put it all back */
+ i40e_memcpy(dest, &dest_qword, sizeof(dest_qword), I40E_NONDMA_TO_DMA);
+}
+
+/**
+ * i40e_read_byte - read HMC context byte into struct
+ * @hmc_bits: pointer to the HMC memory
+ * @ce_info: a description of the struct to be filled
+ * @dest: the struct to be filled
+ **/
+static void i40e_read_byte(u8 *hmc_bits,
+ struct i40e_context_ele *ce_info,
+ u8 *dest)
+{
+ u8 dest_byte, mask;
+ u8 *src, *target;
+ u16 shift_width;
+
+ /* prepare the bits and mask */
+ shift_width = ce_info->lsb % 8;
+ mask = BIT(ce_info->width) - 1;
+
+ /* shift to correct alignment */
+ mask <<= shift_width;
+
+ /* get the current bits from the src bit string */
+ src = hmc_bits + (ce_info->lsb / 8);
+
+ i40e_memcpy(&dest_byte, src, sizeof(dest_byte), I40E_DMA_TO_NONDMA);
+
+ dest_byte &= ~(mask);
+
+ dest_byte >>= shift_width;
+
+ /* get the address from the struct field */
+ target = dest + ce_info->offset;
+
+ /* put it back in the struct */
+ i40e_memcpy(target, &dest_byte, sizeof(dest_byte), I40E_NONDMA_TO_DMA);
+}
+
+/**
+ * i40e_read_word - read HMC context word into struct
+ * @hmc_bits: pointer to the HMC memory
+ * @ce_info: a description of the struct to be filled
+ * @dest: the struct to be filled
+ **/
+static void i40e_read_word(u8 *hmc_bits,
+ struct i40e_context_ele *ce_info,
+ u8 *dest)
+{
+ u16 dest_word, mask;
+ u8 *src, *target;
+ u16 shift_width;
+ __le16 src_word;
+
+ /* prepare the bits and mask */
+ shift_width = ce_info->lsb % 8;
+ mask = BIT(ce_info->width) - 1;
+
+ /* shift to correct alignment */
+ mask <<= shift_width;
+
+ /* get the current bits from the src bit string */
+ src = hmc_bits + (ce_info->lsb / 8);
+
+ i40e_memcpy(&src_word, src, sizeof(src_word), I40E_DMA_TO_NONDMA);
+
+ /* the data in the memory is stored as little endian so mask it
+ * correctly
+ */
+ src_word &= ~(CPU_TO_LE16(mask));
+
+ /* get the data back into host order before shifting */
+ dest_word = LE16_TO_CPU(src_word);
+
+ dest_word >>= shift_width;
+
+ /* get the address from the struct field */
+ target = dest + ce_info->offset;
+
+ /* put it back in the struct */
+ i40e_memcpy(target, &dest_word, sizeof(dest_word), I40E_NONDMA_TO_DMA);
+}
+
+/**
+ * i40e_read_dword - read HMC context dword into struct
+ * @hmc_bits: pointer to the HMC memory
+ * @ce_info: a description of the struct to be filled
+ * @dest: the struct to be filled
+ **/
+static void i40e_read_dword(u8 *hmc_bits,
+ struct i40e_context_ele *ce_info,
+ u8 *dest)
+{
+ u32 dest_dword, mask;
+ u8 *src, *target;
+ u16 shift_width;
+ __le32 src_dword;
+
+ /* prepare the bits and mask */
+ shift_width = ce_info->lsb % 8;
+
+ /* if the field width is exactly 32 on an x86 machine, then the shift
+ * operation will not work because the SHL instructions count is masked
+ * to 5 bits so the shift will do nothing
+ */
+ if (ce_info->width < 32)
+ mask = BIT(ce_info->width) - 1;
+ else
+ mask = ~(u32)0;
+
+ /* shift to correct alignment */
+ mask <<= shift_width;
+
+ /* get the current bits from the src bit string */
+ src = hmc_bits + (ce_info->lsb / 8);
+
+ i40e_memcpy(&src_dword, src, sizeof(src_dword), I40E_DMA_TO_NONDMA);
+
+ /* the data in the memory is stored as little endian so mask it
+ * correctly
+ */
+ src_dword &= ~(CPU_TO_LE32(mask));
+
+ /* get the data back into host order before shifting */
+ dest_dword = LE32_TO_CPU(src_dword);
+
+ dest_dword >>= shift_width;
+
+ /* get the address from the struct field */
+ target = dest + ce_info->offset;
+
+ /* put it back in the struct */
+ i40e_memcpy(target, &dest_dword, sizeof(dest_dword),
+ I40E_NONDMA_TO_DMA);
+}
+
+/**
+ * i40e_read_qword - read HMC context qword into struct
+ * @hmc_bits: pointer to the HMC memory
+ * @ce_info: a description of the struct to be filled
+ * @dest: the struct to be filled
+ **/
+static void i40e_read_qword(u8 *hmc_bits,
+ struct i40e_context_ele *ce_info,
+ u8 *dest)
+{
+ u64 dest_qword, mask;
+ u8 *src, *target;
+ u16 shift_width;
+ __le64 src_qword;
+
+ /* prepare the bits and mask */
+ shift_width = ce_info->lsb % 8;
+
+ /* if the field width is exactly 64 on an x86 machine, then the shift
+ * operation will not work because the SHL instructions count is masked
+ * to 6 bits so the shift will do nothing
+ */
+ if (ce_info->width < 64)
+ mask = BIT_ULL(ce_info->width) - 1;
+ else
+ mask = ~(u64)0;
+
+ /* shift to correct alignment */
+ mask <<= shift_width;
+
+ /* get the current bits from the src bit string */
+ src = hmc_bits + (ce_info->lsb / 8);
+
+ i40e_memcpy(&src_qword, src, sizeof(src_qword), I40E_DMA_TO_NONDMA);
+
+ /* the data in the memory is stored as little endian so mask it
+ * correctly
+ */
+ src_qword &= ~(CPU_TO_LE64(mask));
+
+ /* get the data back into host order before shifting */
+ dest_qword = LE64_TO_CPU(src_qword);
+
+ dest_qword >>= shift_width;
+
+ /* get the address from the struct field */
+ target = dest + ce_info->offset;
+
+ /* put it back in the struct */
+ i40e_memcpy(target, &dest_qword, sizeof(dest_qword),
+ I40E_NONDMA_TO_DMA);
+}
+
+/**
+ * i40e_get_hmc_context - extract HMC context bits
+ * @context_bytes: pointer to the context bit array
+ * @ce_info: a description of the struct to be filled
+ * @dest: the struct to be filled
+ **/
+static enum i40e_status_code i40e_get_hmc_context(u8 *context_bytes,
+ struct i40e_context_ele *ce_info,
+ u8 *dest)
+{
+ int f;
+
+ for (f = 0; ce_info[f].width != 0; f++) {
+ switch (ce_info[f].size_of) {
+ case 1:
+ i40e_read_byte(context_bytes, &ce_info[f], dest);
+ break;
+ case 2:
+ i40e_read_word(context_bytes, &ce_info[f], dest);
+ break;
+ case 4:
+ i40e_read_dword(context_bytes, &ce_info[f], dest);
+ break;
+ case 8:
+ i40e_read_qword(context_bytes, &ce_info[f], dest);
+ break;
+ default:
+ /* nothing to do, just keep going */
+ break;
+ }
+ }
+
+ return I40E_SUCCESS;
+}
+
+/**
+ * i40e_clear_hmc_context - zero out the HMC context bits
+ * @hw: the hardware struct
+ * @context_bytes: pointer to the context bit array (DMA memory)
+ * @hmc_type: the type of HMC resource
+ **/
+static enum i40e_status_code i40e_clear_hmc_context(struct i40e_hw *hw,
+ u8 *context_bytes,
+ enum i40e_hmc_lan_rsrc_type hmc_type)
+{
+ /* clean the bit array */
+ i40e_memset(context_bytes, 0, (u32)hw->hmc.hmc_obj[hmc_type].size,
+ I40E_DMA_MEM);
+
+ return I40E_SUCCESS;
+}
+
+/**
+ * i40e_set_hmc_context - replace HMC context bits
+ * @context_bytes: pointer to the context bit array
+ * @ce_info: a description of the struct to be filled
+ * @dest: the struct to be filled
+ **/
+static enum i40e_status_code i40e_set_hmc_context(u8 *context_bytes,
+ struct i40e_context_ele *ce_info,
+ u8 *dest)
+{
+ int f;
+
+ for (f = 0; ce_info[f].width != 0; f++) {
+
+ /* we have to deal with each element of the HMC using the
+ * correct size so that we are correct regardless of the
+ * endianness of the machine
+ */
+ switch (ce_info[f].size_of) {
+ case 1:
+ i40e_write_byte(context_bytes, &ce_info[f], dest);
+ break;
+ case 2:
+ i40e_write_word(context_bytes, &ce_info[f], dest);
+ break;
+ case 4:
+ i40e_write_dword(context_bytes, &ce_info[f], dest);
+ break;
+ case 8:
+ i40e_write_qword(context_bytes, &ce_info[f], dest);
+ break;
+ }
+ }
+
+ return I40E_SUCCESS;
+}
+
+/**
+ * i40e_hmc_get_object_va - retrieves an object's virtual address
+ * @hw: pointer to the hw structure
+ * @object_base: pointer to u64 to get the va
+ * @rsrc_type: the hmc resource type
+ * @obj_idx: hmc object index
+ *
+ * This function retrieves the object's virtual address from the object
+ * base pointer. This function is used for LAN Queue contexts.
+ **/
+static
+enum i40e_status_code i40e_hmc_get_object_va(struct i40e_hw *hw,
+ u8 **object_base,
+ enum i40e_hmc_lan_rsrc_type rsrc_type,
+ u32 obj_idx)
+{
+ u32 obj_offset_in_sd, obj_offset_in_pd;
+ struct i40e_hmc_info *hmc_info = &hw->hmc;
+ struct i40e_hmc_sd_entry *sd_entry;
+ struct i40e_hmc_pd_entry *pd_entry;
+ u32 pd_idx, pd_lmt, rel_pd_idx;
+ enum i40e_status_code ret_code = I40E_SUCCESS;
+ u64 obj_offset_in_fpm;
+ u32 sd_idx, sd_lmt;
+
+ if (NULL == hmc_info) {
+ ret_code = I40E_ERR_BAD_PTR;
+ DEBUGOUT("i40e_hmc_get_object_va: bad hmc_info ptr\n");
+ goto exit;
+ }
+ if (NULL == hmc_info->hmc_obj) {
+ ret_code = I40E_ERR_BAD_PTR;
+ DEBUGOUT("i40e_hmc_get_object_va: bad hmc_info->hmc_obj ptr\n");
+ goto exit;
+ }
+ if (NULL == object_base) {
+ ret_code = I40E_ERR_BAD_PTR;
+ DEBUGOUT("i40e_hmc_get_object_va: bad object_base ptr\n");
+ goto exit;
+ }
+ if (I40E_HMC_INFO_SIGNATURE != hmc_info->signature) {
+ ret_code = I40E_ERR_BAD_PTR;
+ DEBUGOUT("i40e_hmc_get_object_va: bad hmc_info->signature\n");
+ goto exit;
+ }
+ if (obj_idx >= hmc_info->hmc_obj[rsrc_type].cnt) {
+ DEBUGOUT1("i40e_hmc_get_object_va: returns error %d\n",
+ ret_code);
+ ret_code = I40E_ERR_INVALID_HMC_OBJ_INDEX;
+ goto exit;
+ }
+ /* find sd index and limit */
+ I40E_FIND_SD_INDEX_LIMIT(hmc_info, rsrc_type, obj_idx, 1,
+ &sd_idx, &sd_lmt);
+
+ sd_entry = &hmc_info->sd_table.sd_entry[sd_idx];
+ obj_offset_in_fpm = hmc_info->hmc_obj[rsrc_type].base +
+ hmc_info->hmc_obj[rsrc_type].size * obj_idx;
+
+ if (I40E_SD_TYPE_PAGED == sd_entry->entry_type) {
+ I40E_FIND_PD_INDEX_LIMIT(hmc_info, rsrc_type, obj_idx, 1,
+ &pd_idx, &pd_lmt);
+ rel_pd_idx = pd_idx % I40E_HMC_PD_CNT_IN_SD;
+ pd_entry = &sd_entry->u.pd_table.pd_entry[rel_pd_idx];
+ obj_offset_in_pd = (u32)(obj_offset_in_fpm %
+ I40E_HMC_PAGED_BP_SIZE);
+ *object_base = (u8 *)pd_entry->bp.addr.va + obj_offset_in_pd;
+ } else {
+ obj_offset_in_sd = (u32)(obj_offset_in_fpm %
+ I40E_HMC_DIRECT_BP_SIZE);
+ *object_base = (u8 *)sd_entry->u.bp.addr.va + obj_offset_in_sd;
+ }
+exit:
+ return ret_code;
+}
+
+/**
+ * i40e_get_lan_tx_queue_context - return the HMC context for the queue
+ * @hw: the hardware struct
+ * @queue: the queue we care about
+ * @s: the struct to be filled
+ **/
+enum i40e_status_code i40e_get_lan_tx_queue_context(struct i40e_hw *hw,
+ u16 queue,
+ struct i40e_hmc_obj_txq *s)
+{
+ enum i40e_status_code err;
+ u8 *context_bytes;
+
+ err = i40e_hmc_get_object_va(hw, &context_bytes, I40E_HMC_LAN_TX, queue);
+ if (err < 0)
+ return err;
+
+ return i40e_get_hmc_context(context_bytes,
+ i40e_hmc_txq_ce_info, (u8 *)s);
+}
+
+/**
+ * i40e_clear_lan_tx_queue_context - clear the HMC context for the queue
+ * @hw: the hardware struct
+ * @queue: the queue we care about
+ **/
+enum i40e_status_code i40e_clear_lan_tx_queue_context(struct i40e_hw *hw,
+ u16 queue)
+{
+ enum i40e_status_code err;
+ u8 *context_bytes;
+
+ err = i40e_hmc_get_object_va(hw, &context_bytes, I40E_HMC_LAN_TX, queue);
+ if (err < 0)
+ return err;
+
+ return i40e_clear_hmc_context(hw, context_bytes, I40E_HMC_LAN_TX);
+}
+
+/**
+ * i40e_set_lan_tx_queue_context - set the HMC context for the queue
+ * @hw: the hardware struct
+ * @queue: the queue we care about
+ * @s: the struct to be filled
+ **/
+enum i40e_status_code i40e_set_lan_tx_queue_context(struct i40e_hw *hw,
+ u16 queue,
+ struct i40e_hmc_obj_txq *s)
+{
+ enum i40e_status_code err;
+ u8 *context_bytes;
+
+ err = i40e_hmc_get_object_va(hw, &context_bytes, I40E_HMC_LAN_TX, queue);
+ if (err < 0)
+ return err;
+
+ return i40e_set_hmc_context(context_bytes,
+ i40e_hmc_txq_ce_info, (u8 *)s);
+}
+
+/**
+ * i40e_get_lan_rx_queue_context - return the HMC context for the queue
+ * @hw: the hardware struct
+ * @queue: the queue we care about
+ * @s: the struct to be filled
+ **/
+enum i40e_status_code i40e_get_lan_rx_queue_context(struct i40e_hw *hw,
+ u16 queue,
+ struct i40e_hmc_obj_rxq *s)
+{
+ enum i40e_status_code err;
+ u8 *context_bytes;
+
+ err = i40e_hmc_get_object_va(hw, &context_bytes, I40E_HMC_LAN_RX, queue);
+ if (err < 0)
+ return err;
+
+ return i40e_get_hmc_context(context_bytes,
+ i40e_hmc_rxq_ce_info, (u8 *)s);
+}
+
+/**
+ * i40e_clear_lan_rx_queue_context - clear the HMC context for the queue
+ * @hw: the hardware struct
+ * @queue: the queue we care about
+ **/
+enum i40e_status_code i40e_clear_lan_rx_queue_context(struct i40e_hw *hw,
+ u16 queue)
+{
+ enum i40e_status_code err;
+ u8 *context_bytes;
+
+ err = i40e_hmc_get_object_va(hw, &context_bytes, I40E_HMC_LAN_RX, queue);
+ if (err < 0)
+ return err;
+
+ return i40e_clear_hmc_context(hw, context_bytes, I40E_HMC_LAN_RX);
+}
+
+/**
+ * i40e_set_lan_rx_queue_context - set the HMC context for the queue
+ * @hw: the hardware struct
+ * @queue: the queue we care about
+ * @s: the struct to be filled
+ **/
+enum i40e_status_code i40e_set_lan_rx_queue_context(struct i40e_hw *hw,
+ u16 queue,
+ struct i40e_hmc_obj_rxq *s)
+{
+ enum i40e_status_code err;
+ u8 *context_bytes;
+
+ err = i40e_hmc_get_object_va(hw, &context_bytes, I40E_HMC_LAN_RX, queue);
+ if (err < 0)
+ return err;
+
+ return i40e_set_hmc_context(context_bytes,
+ i40e_hmc_rxq_ce_info, (u8 *)s);
+}
diff --git a/usr/src/uts/common/io/i40e/core/i40e_lan_hmc.h b/usr/src/uts/common/io/i40e/core/i40e_lan_hmc.h
new file mode 100644
index 0000000000..2a575264ab
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/core/i40e_lan_hmc.h
@@ -0,0 +1,201 @@
+/******************************************************************************
+
+ Copyright (c) 2013-2014, Intel Corporation
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+
+******************************************************************************/
+/*$FreeBSD: head/sys/dev/ixl/i40e_lan_hmc.h 283119 2015-05-19 18:35:18Z jhb $*/
+
+#ifndef _I40E_LAN_HMC_H_
+#define _I40E_LAN_HMC_H_
+
+/* forward-declare the HW struct for the compiler */
+struct i40e_hw;
+
+/* HMC element context information */
+
+/* Rx queue context data
+ *
+ * The sizes of the variables may be larger than needed due to crossing byte
+ * boundaries. If we do not have the width of the variable set to the correct
+ * size then we could end up shifting bits off the top of the variable when the
+ * variable is at the top of a byte and crosses over into the next byte.
+ */
+struct i40e_hmc_obj_rxq {
+ u16 head;
+ u16 cpuid; /* bigger than needed, see above for reason */
+ u64 base;
+ u16 qlen;
+#define I40E_RXQ_CTX_DBUFF_SHIFT 7
+ u16 dbuff; /* bigger than needed, see above for reason */
+#define I40E_RXQ_CTX_HBUFF_SHIFT 6
+ u16 hbuff; /* bigger than needed, see above for reason */
+ u8 dtype;
+ u8 dsize;
+ u8 crcstrip;
+ u8 fc_ena;
+ u8 l2tsel;
+ u8 hsplit_0;
+ u8 hsplit_1;
+ u8 showiv;
+ u32 rxmax; /* bigger than needed, see above for reason */
+ u8 tphrdesc_ena;
+ u8 tphwdesc_ena;
+ u8 tphdata_ena;
+ u8 tphhead_ena;
+ u16 lrxqthresh; /* bigger than needed, see above for reason */
+ u8 prefena; /* NOTE: normally must be set to 1 at init */
+};
+
+/* Tx queue context data
+*
+* The sizes of the variables may be larger than needed due to crossing byte
+* boundaries. If we do not have the width of the variable set to the correct
+* size then we could end up shifting bits off the top of the variable when the
+* variable is at the top of a byte and crosses over into the next byte.
+*/
+struct i40e_hmc_obj_txq {
+ u16 head;
+ u8 new_context;
+ u64 base;
+ u8 fc_ena;
+ u8 timesync_ena;
+ u8 fd_ena;
+ u8 alt_vlan_ena;
+ u16 thead_wb;
+ u8 cpuid;
+ u8 head_wb_ena;
+ u16 qlen;
+ u8 tphrdesc_ena;
+ u8 tphrpacket_ena;
+ u8 tphwdesc_ena;
+ u64 head_wb_addr;
+ u32 crc;
+ u16 rdylist;
+ u8 rdylist_act;
+};
+
+/* for hsplit_0 field of Rx HMC context */
+enum i40e_hmc_obj_rx_hsplit_0 {
+ I40E_HMC_OBJ_RX_HSPLIT_0_NO_SPLIT = 0,
+ I40E_HMC_OBJ_RX_HSPLIT_0_SPLIT_L2 = 1,
+ I40E_HMC_OBJ_RX_HSPLIT_0_SPLIT_IP = 2,
+ I40E_HMC_OBJ_RX_HSPLIT_0_SPLIT_TCP_UDP = 4,
+ I40E_HMC_OBJ_RX_HSPLIT_0_SPLIT_SCTP = 8,
+};
+
+/* fcoe_cntx and fcoe_filt are for debugging purpose only */
+struct i40e_hmc_obj_fcoe_cntx {
+ u32 rsv[32];
+};
+
+struct i40e_hmc_obj_fcoe_filt {
+ u32 rsv[8];
+};
+
+/* Context sizes for LAN objects */
+enum i40e_hmc_lan_object_size {
+ I40E_HMC_LAN_OBJ_SZ_8 = 0x3,
+ I40E_HMC_LAN_OBJ_SZ_16 = 0x4,
+ I40E_HMC_LAN_OBJ_SZ_32 = 0x5,
+ I40E_HMC_LAN_OBJ_SZ_64 = 0x6,
+ I40E_HMC_LAN_OBJ_SZ_128 = 0x7,
+ I40E_HMC_LAN_OBJ_SZ_256 = 0x8,
+ I40E_HMC_LAN_OBJ_SZ_512 = 0x9,
+};
+
+#define I40E_HMC_L2OBJ_BASE_ALIGNMENT 512
+#define I40E_HMC_OBJ_SIZE_TXQ 128
+#define I40E_HMC_OBJ_SIZE_RXQ 32
+#define I40E_HMC_OBJ_SIZE_FCOE_CNTX 64
+#define I40E_HMC_OBJ_SIZE_FCOE_FILT 64
+
+enum i40e_hmc_lan_rsrc_type {
+ I40E_HMC_LAN_FULL = 0,
+ I40E_HMC_LAN_TX = 1,
+ I40E_HMC_LAN_RX = 2,
+ I40E_HMC_FCOE_CTX = 3,
+ I40E_HMC_FCOE_FILT = 4,
+ I40E_HMC_LAN_MAX = 5
+};
+
+enum i40e_hmc_model {
+ I40E_HMC_MODEL_DIRECT_PREFERRED = 0,
+ I40E_HMC_MODEL_DIRECT_ONLY = 1,
+ I40E_HMC_MODEL_PAGED_ONLY = 2,
+ I40E_HMC_MODEL_UNKNOWN,
+};
+
+struct i40e_hmc_lan_create_obj_info {
+ struct i40e_hmc_info *hmc_info;
+ u32 rsrc_type;
+ u32 start_idx;
+ u32 count;
+ enum i40e_sd_entry_type entry_type;
+ u64 direct_mode_sz;
+};
+
+struct i40e_hmc_lan_delete_obj_info {
+ struct i40e_hmc_info *hmc_info;
+ u32 rsrc_type;
+ u32 start_idx;
+ u32 count;
+};
+
+enum i40e_status_code i40e_init_lan_hmc(struct i40e_hw *hw, u32 txq_num,
+ u32 rxq_num, u32 fcoe_cntx_num,
+ u32 fcoe_filt_num);
+enum i40e_status_code i40e_configure_lan_hmc(struct i40e_hw *hw,
+ enum i40e_hmc_model model);
+enum i40e_status_code i40e_shutdown_lan_hmc(struct i40e_hw *hw);
+
+u64 i40e_calculate_l2fpm_size(u32 txq_num, u32 rxq_num,
+ u32 fcoe_cntx_num, u32 fcoe_filt_num);
+enum i40e_status_code i40e_get_lan_tx_queue_context(struct i40e_hw *hw,
+ u16 queue,
+ struct i40e_hmc_obj_txq *s);
+enum i40e_status_code i40e_clear_lan_tx_queue_context(struct i40e_hw *hw,
+ u16 queue);
+enum i40e_status_code i40e_set_lan_tx_queue_context(struct i40e_hw *hw,
+ u16 queue,
+ struct i40e_hmc_obj_txq *s);
+enum i40e_status_code i40e_get_lan_rx_queue_context(struct i40e_hw *hw,
+ u16 queue,
+ struct i40e_hmc_obj_rxq *s);
+enum i40e_status_code i40e_clear_lan_rx_queue_context(struct i40e_hw *hw,
+ u16 queue);
+enum i40e_status_code i40e_set_lan_rx_queue_context(struct i40e_hw *hw,
+ u16 queue,
+ struct i40e_hmc_obj_rxq *s);
+enum i40e_status_code i40e_create_lan_hmc_object(struct i40e_hw *hw,
+ struct i40e_hmc_lan_create_obj_info *info);
+enum i40e_status_code i40e_delete_lan_hmc_object(struct i40e_hw *hw,
+ struct i40e_hmc_lan_delete_obj_info *info);
+
+#endif /* _I40E_LAN_HMC_H_ */
diff --git a/usr/src/uts/common/io/i40e/core/i40e_nvm.c b/usr/src/uts/common/io/i40e/core/i40e_nvm.c
new file mode 100644
index 0000000000..04d61bb969
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/core/i40e_nvm.c
@@ -0,0 +1,712 @@
+/******************************************************************************
+
+ Copyright (c) 2013-2015, Intel Corporation
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+
+******************************************************************************/
+/*$FreeBSD: head/sys/dev/ixl/i40e_nvm.c 284049 2015-06-05 22:52:42Z jfv $*/
+
+#include "i40e_prototype.h"
+
+enum i40e_status_code i40e_read_nvm_word_srctl(struct i40e_hw *hw, u16 offset,
+ u16 *data);
+enum i40e_status_code i40e_read_nvm_word_aq(struct i40e_hw *hw, u16 offset,
+ u16 *data);
+enum i40e_status_code i40e_read_nvm_buffer_srctl(struct i40e_hw *hw, u16 offset,
+ u16 *words, u16 *data);
+enum i40e_status_code i40e_read_nvm_buffer_aq(struct i40e_hw *hw, u16 offset,
+ u16 *words, u16 *data);
+enum i40e_status_code i40e_read_nvm_aq(struct i40e_hw *hw, u8 module_pointer,
+ u32 offset, u16 words, void *data,
+ bool last_command);
+
+/**
+ * i40e_init_nvm_ops - Initialize NVM function pointers
+ * @hw: pointer to the HW structure
+ *
+ * Setup the function pointers and the NVM info structure. Should be called
+ * once per NVM initialization, e.g. inside the i40e_init_shared_code().
+ * Please notice that the NVM term is used here (& in all methods covered
+ * in this file) as an equivalent of the FLASH part mapped into the SR.
+ * We are accessing FLASH always thru the Shadow RAM.
+ **/
+enum i40e_status_code i40e_init_nvm(struct i40e_hw *hw)
+{
+ struct i40e_nvm_info *nvm = &hw->nvm;
+ enum i40e_status_code ret_code = I40E_SUCCESS;
+ u32 fla, gens;
+ u8 sr_size;
+
+ DEBUGFUNC("i40e_init_nvm");
+
+ /* The SR size is stored regardless of the nvm programming mode
+ * as the blank mode may be used in the factory line.
+ */
+ gens = rd32(hw, I40E_GLNVM_GENS);
+ sr_size = ((gens & I40E_GLNVM_GENS_SR_SIZE_MASK) >>
+ I40E_GLNVM_GENS_SR_SIZE_SHIFT);
+ /* Switching to words (sr_size contains power of 2KB) */
+ nvm->sr_size = BIT(sr_size) * I40E_SR_WORDS_IN_1KB;
+
+ /* Check if we are in the normal or blank NVM programming mode */
+ fla = rd32(hw, I40E_GLNVM_FLA);
+ if (fla & I40E_GLNVM_FLA_LOCKED_MASK) { /* Normal programming mode */
+ /* Max NVM timeout */
+ nvm->timeout = I40E_MAX_NVM_TIMEOUT;
+ nvm->blank_nvm_mode = FALSE;
+ } else { /* Blank programming mode */
+ nvm->blank_nvm_mode = TRUE;
+ ret_code = I40E_ERR_NVM_BLANK_MODE;
+ i40e_debug(hw, I40E_DEBUG_NVM, "NVM init error: unsupported blank mode.\n");
+ }
+
+ return ret_code;
+}
+
+/**
+ * i40e_acquire_nvm - Generic request for acquiring the NVM ownership
+ * @hw: pointer to the HW structure
+ * @access: NVM access type (read or write)
+ *
+ * This function will request NVM ownership for reading
+ * via the proper Admin Command.
+ **/
+enum i40e_status_code i40e_acquire_nvm(struct i40e_hw *hw,
+ enum i40e_aq_resource_access_type access)
+{
+ enum i40e_status_code ret_code = I40E_SUCCESS;
+ u64 gtime, timeout;
+ u64 time_left = 0;
+
+ DEBUGFUNC("i40e_acquire_nvm");
+
+ if (hw->nvm.blank_nvm_mode)
+ goto i40e_i40e_acquire_nvm_exit;
+
+ ret_code = i40e_aq_request_resource(hw, I40E_NVM_RESOURCE_ID, access,
+ 0, &time_left, NULL);
+ /* Reading the Global Device Timer */
+ gtime = rd32(hw, I40E_GLVFGEN_TIMER);
+
+ /* Store the timeout */
+ hw->nvm.hw_semaphore_timeout = I40E_MS_TO_GTIME(time_left) + gtime;
+
+ if (ret_code)
+ i40e_debug(hw, I40E_DEBUG_NVM,
+ "NVM acquire type %d failed time_left=%llu ret=%d aq_err=%d\n",
+ access, time_left, ret_code, hw->aq.asq_last_status);
+
+ if (ret_code && time_left) {
+ /* Poll until the current NVM owner timeouts */
+ timeout = I40E_MS_TO_GTIME(I40E_MAX_NVM_TIMEOUT) + gtime;
+ while ((gtime < timeout) && time_left) {
+ i40e_msec_delay(10);
+ gtime = rd32(hw, I40E_GLVFGEN_TIMER);
+ ret_code = i40e_aq_request_resource(hw,
+ I40E_NVM_RESOURCE_ID,
+ access, 0, &time_left,
+ NULL);
+ if (ret_code == I40E_SUCCESS) {
+ hw->nvm.hw_semaphore_timeout =
+ I40E_MS_TO_GTIME(time_left) + gtime;
+ break;
+ }
+ }
+ if (ret_code != I40E_SUCCESS) {
+ hw->nvm.hw_semaphore_timeout = 0;
+ i40e_debug(hw, I40E_DEBUG_NVM,
+ "NVM acquire timed out, wait %llu ms before trying again. status=%d aq_err=%d\n",
+ time_left, ret_code, hw->aq.asq_last_status);
+ }
+ }
+
+i40e_i40e_acquire_nvm_exit:
+ return ret_code;
+}
+
+/**
+ * i40e_release_nvm - Generic request for releasing the NVM ownership
+ * @hw: pointer to the HW structure
+ *
+ * This function will release NVM resource via the proper Admin Command.
+ **/
+void i40e_release_nvm(struct i40e_hw *hw)
+{
+ enum i40e_status_code ret_code = I40E_SUCCESS;
+ u32 total_delay = 0;
+
+ DEBUGFUNC("i40e_release_nvm");
+
+ if (hw->nvm.blank_nvm_mode)
+ return;
+
+ ret_code = i40e_aq_release_resource(hw, I40E_NVM_RESOURCE_ID, 0, NULL);
+
+ /* there are some rare cases when trying to release the resource
+ * results in an admin Q timeout, so handle them correctly
+ */
+ while ((ret_code == I40E_ERR_ADMIN_QUEUE_TIMEOUT) &&
+ (total_delay < hw->aq.asq_cmd_timeout)) {
+ i40e_msec_delay(1);
+ ret_code = i40e_aq_release_resource(hw,
+ I40E_NVM_RESOURCE_ID, 0, NULL);
+ total_delay++;
+ }
+}
+
+/**
+ * i40e_poll_sr_srctl_done_bit - Polls the GLNVM_SRCTL done bit
+ * @hw: pointer to the HW structure
+ *
+ * Polls the SRCTL Shadow RAM register done bit.
+ **/
+static enum i40e_status_code i40e_poll_sr_srctl_done_bit(struct i40e_hw *hw)
+{
+ enum i40e_status_code ret_code = I40E_ERR_TIMEOUT;
+ u32 srctl, wait_cnt;
+
+ DEBUGFUNC("i40e_poll_sr_srctl_done_bit");
+
+ /* Poll the I40E_GLNVM_SRCTL until the done bit is set */
+ for (wait_cnt = 0; wait_cnt < I40E_SRRD_SRCTL_ATTEMPTS; wait_cnt++) {
+ srctl = rd32(hw, I40E_GLNVM_SRCTL);
+ if (srctl & I40E_GLNVM_SRCTL_DONE_MASK) {
+ ret_code = I40E_SUCCESS;
+ break;
+ }
+ i40e_usec_delay(5);
+ }
+ if (ret_code == I40E_ERR_TIMEOUT)
+ i40e_debug(hw, I40E_DEBUG_NVM, "Done bit in GLNVM_SRCTL not set");
+ return ret_code;
+}
+
+/**
+ * i40e_read_nvm_word - Reads Shadow RAM
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
+ * @data: word read from the Shadow RAM
+ *
+ * Reads one 16 bit word from the Shadow RAM using the GLNVM_SRCTL register.
+ **/
+enum i40e_status_code i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
+ u16 *data)
+{
+#ifdef X722_SUPPORT
+ if (hw->mac.type == I40E_MAC_X722)
+ return i40e_read_nvm_word_aq(hw, offset, data);
+#endif
+ return i40e_read_nvm_word_srctl(hw, offset, data);
+}
+
+/**
+ * i40e_read_nvm_word_srctl - Reads Shadow RAM via SRCTL register
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
+ * @data: word read from the Shadow RAM
+ *
+ * Reads one 16 bit word from the Shadow RAM using the GLNVM_SRCTL register.
+ **/
+enum i40e_status_code i40e_read_nvm_word_srctl(struct i40e_hw *hw, u16 offset,
+ u16 *data)
+{
+ enum i40e_status_code ret_code = I40E_ERR_TIMEOUT;
+ u32 sr_reg;
+
+ DEBUGFUNC("i40e_read_nvm_word_srctl");
+
+ if (offset >= hw->nvm.sr_size) {
+ i40e_debug(hw, I40E_DEBUG_NVM,
+ "NVM read error: Offset %d beyond Shadow RAM limit %d\n",
+ offset, hw->nvm.sr_size);
+ ret_code = I40E_ERR_PARAM;
+ goto read_nvm_exit;
+ }
+
+ /* Poll the done bit first */
+ ret_code = i40e_poll_sr_srctl_done_bit(hw);
+ if (ret_code == I40E_SUCCESS) {
+ /* Write the address and start reading */
+ sr_reg = ((u32)offset << I40E_GLNVM_SRCTL_ADDR_SHIFT) |
+ BIT(I40E_GLNVM_SRCTL_START_SHIFT);
+ wr32(hw, I40E_GLNVM_SRCTL, sr_reg);
+
+ /* Poll I40E_GLNVM_SRCTL until the done bit is set */
+ ret_code = i40e_poll_sr_srctl_done_bit(hw);
+ if (ret_code == I40E_SUCCESS) {
+ sr_reg = rd32(hw, I40E_GLNVM_SRDATA);
+ *data = (u16)((sr_reg &
+ I40E_GLNVM_SRDATA_RDDATA_MASK)
+ >> I40E_GLNVM_SRDATA_RDDATA_SHIFT);
+ }
+ }
+ if (ret_code != I40E_SUCCESS)
+ i40e_debug(hw, I40E_DEBUG_NVM,
+ "NVM read error: Couldn't access Shadow RAM address: 0x%x\n",
+ offset);
+
+read_nvm_exit:
+ return ret_code;
+}
+
+/**
+ * i40e_read_nvm_word_aq - Reads Shadow RAM via AQ
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
+ * @data: word read from the Shadow RAM
+ *
+ * Reads one 16 bit word from the Shadow RAM using the GLNVM_SRCTL register.
+ **/
+enum i40e_status_code i40e_read_nvm_word_aq(struct i40e_hw *hw, u16 offset,
+ u16 *data)
+{
+ enum i40e_status_code ret_code = I40E_ERR_TIMEOUT;
+
+ DEBUGFUNC("i40e_read_nvm_word_aq");
+
+ ret_code = i40e_read_nvm_aq(hw, 0x0, offset, 1, data, TRUE);
+ *data = LE16_TO_CPU(*(__le16 *)data);
+
+ return ret_code;
+}
+
+/**
+ * i40e_read_nvm_buffer - Reads Shadow RAM buffer
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF).
+ * @words: (in) number of words to read; (out) number of words actually read
+ * @data: words read from the Shadow RAM
+ *
+ * Reads 16 bit words (data buffer) from the SR using the i40e_read_nvm_srrd()
+ * method. The buffer read is preceded by the NVM ownership take
+ * and followed by the release.
+ **/
+enum i40e_status_code i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
+ u16 *words, u16 *data)
+{
+#ifdef X722_SUPPORT
+ if (hw->mac.type == I40E_MAC_X722)
+ return i40e_read_nvm_buffer_aq(hw, offset, words, data);
+#endif
+ return i40e_read_nvm_buffer_srctl(hw, offset, words, data);
+}
+
+/**
+ * i40e_read_nvm_buffer_srctl - Reads Shadow RAM buffer via SRCTL register
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF).
+ * @words: (in) number of words to read; (out) number of words actually read
+ * @data: words read from the Shadow RAM
+ *
+ * Reads 16 bit words (data buffer) from the SR using the i40e_read_nvm_srrd()
+ * method. The buffer read is preceded by the NVM ownership take
+ * and followed by the release.
+ **/
+enum i40e_status_code i40e_read_nvm_buffer_srctl(struct i40e_hw *hw, u16 offset,
+ u16 *words, u16 *data)
+{
+ enum i40e_status_code ret_code = I40E_SUCCESS;
+ u16 index, word;
+
+ DEBUGFUNC("i40e_read_nvm_buffer_srctl");
+
+ /* Loop thru the selected region */
+ for (word = 0; word < *words; word++) {
+ index = offset + word;
+ ret_code = i40e_read_nvm_word_srctl(hw, index, &data[word]);
+ if (ret_code != I40E_SUCCESS)
+ break;
+ }
+
+ /* Update the number of words read from the Shadow RAM */
+ *words = word;
+
+ return ret_code;
+}
+
+/**
+ * i40e_read_nvm_buffer_aq - Reads Shadow RAM buffer via AQ
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF).
+ * @words: (in) number of words to read; (out) number of words actually read
+ * @data: words read from the Shadow RAM
+ *
+ * Reads 16 bit words (data buffer) from the SR using the i40e_read_nvm_aq()
+ * method. The buffer read is preceded by the NVM ownership take
+ * and followed by the release.
+ **/
+enum i40e_status_code i40e_read_nvm_buffer_aq(struct i40e_hw *hw, u16 offset,
+ u16 *words, u16 *data)
+{
+ enum i40e_status_code ret_code;
+ u16 read_size = *words;
+ bool last_cmd = FALSE;
+ u16 words_read = 0;
+ u16 i = 0;
+
+ DEBUGFUNC("i40e_read_nvm_buffer_aq");
+
+ do {
+ /* Calculate number of bytes we should read in this step.
+ * FVL AQ do not allow to read more than one page at a time or
+ * to cross page boundaries.
+ */
+ if (offset % I40E_SR_SECTOR_SIZE_IN_WORDS)
+ read_size = min(*words,
+ (u16)(I40E_SR_SECTOR_SIZE_IN_WORDS -
+ (offset % I40E_SR_SECTOR_SIZE_IN_WORDS)));
+ else
+ read_size = min((*words - words_read),
+ I40E_SR_SECTOR_SIZE_IN_WORDS);
+
+ /* Check if this is last command, if so set proper flag */
+ if ((words_read + read_size) >= *words)
+ last_cmd = TRUE;
+
+ ret_code = i40e_read_nvm_aq(hw, 0x0, offset, read_size,
+ data + words_read, last_cmd);
+ if (ret_code != I40E_SUCCESS)
+ goto read_nvm_buffer_aq_exit;
+
+ /* Increment counter for words already read and move offset to
+ * new read location
+ */
+ words_read += read_size;
+ offset += read_size;
+ } while (words_read < *words);
+
+ for (i = 0; i < *words; i++)
+ data[i] = LE16_TO_CPU(((__le16 *)data)[i]);
+
+read_nvm_buffer_aq_exit:
+ *words = words_read;
+ return ret_code;
+}
+
+/**
+ * i40e_read_nvm_aq - Read Shadow RAM.
+ * @hw: pointer to the HW structure.
+ * @module_pointer: module pointer location in words from the NVM beginning
+ * @offset: offset in words from module start
+ * @words: number of words to write
+ * @data: buffer with words to write to the Shadow RAM
+ * @last_command: tells the AdminQ that this is the last command
+ *
+ * Writes a 16 bit words buffer to the Shadow RAM using the admin command.
+ **/
+enum i40e_status_code i40e_read_nvm_aq(struct i40e_hw *hw, u8 module_pointer,
+ u32 offset, u16 words, void *data,
+ bool last_command)
+{
+ enum i40e_status_code ret_code = I40E_ERR_NVM;
+ struct i40e_asq_cmd_details cmd_details;
+
+ DEBUGFUNC("i40e_read_nvm_aq");
+
+ memset(&cmd_details, 0, sizeof(cmd_details));
+ cmd_details.wb_desc = &hw->nvm_wb_desc;
+
+ /* Here we are checking the SR limit only for the flat memory model.
+ * We cannot do it for the module-based model, as we did not acquire
+ * the NVM resource yet (we cannot get the module pointer value).
+ * Firmware will check the module-based model.
+ */
+ if ((offset + words) > hw->nvm.sr_size)
+ i40e_debug(hw, I40E_DEBUG_NVM,
+ "NVM write error: offset %d beyond Shadow RAM limit %d\n",
+ (offset + words), hw->nvm.sr_size);
+ else if (words > I40E_SR_SECTOR_SIZE_IN_WORDS)
+ /* We can write only up to 4KB (one sector), in one AQ write */
+ i40e_debug(hw, I40E_DEBUG_NVM,
+ "NVM write fail error: tried to write %d words, limit is %d.\n",
+ words, I40E_SR_SECTOR_SIZE_IN_WORDS);
+ else if (((offset + (words - 1)) / I40E_SR_SECTOR_SIZE_IN_WORDS)
+ != (offset / I40E_SR_SECTOR_SIZE_IN_WORDS))
+ /* A single write cannot spread over two sectors */
+ i40e_debug(hw, I40E_DEBUG_NVM,
+ "NVM write error: cannot spread over two sectors in a single write offset=%d words=%d\n",
+ offset, words);
+ else
+ ret_code = i40e_aq_read_nvm(hw, module_pointer,
+ 2 * offset, /*bytes*/
+ 2 * words, /*bytes*/
+ data, last_command, &cmd_details);
+
+ return ret_code;
+}
+
+/**
+ * i40e_write_nvm_aq - Writes Shadow RAM.
+ * @hw: pointer to the HW structure.
+ * @module_pointer: module pointer location in words from the NVM beginning
+ * @offset: offset in words from module start
+ * @words: number of words to write
+ * @data: buffer with words to write to the Shadow RAM
+ * @last_command: tells the AdminQ that this is the last command
+ *
+ * Writes a 16 bit words buffer to the Shadow RAM using the admin command.
+ **/
+enum i40e_status_code i40e_write_nvm_aq(struct i40e_hw *hw, u8 module_pointer,
+ u32 offset, u16 words, void *data,
+ bool last_command)
+{
+ enum i40e_status_code ret_code = I40E_ERR_NVM;
+ struct i40e_asq_cmd_details cmd_details;
+
+ DEBUGFUNC("i40e_write_nvm_aq");
+
+ memset(&cmd_details, 0, sizeof(cmd_details));
+ cmd_details.wb_desc = &hw->nvm_wb_desc;
+
+ /* Here we are checking the SR limit only for the flat memory model.
+ * We cannot do it for the module-based model, as we did not acquire
+ * the NVM resource yet (we cannot get the module pointer value).
+ * Firmware will check the module-based model.
+ */
+ if ((offset + words) > hw->nvm.sr_size)
+ DEBUGOUT("NVM write error: offset beyond Shadow RAM limit.\n");
+ else if (words > I40E_SR_SECTOR_SIZE_IN_WORDS)
+ /* We can write only up to 4KB (one sector), in one AQ write */
+ DEBUGOUT("NVM write fail error: cannot write more than 4KB in a single write.\n");
+ else if (((offset + (words - 1)) / I40E_SR_SECTOR_SIZE_IN_WORDS)
+ != (offset / I40E_SR_SECTOR_SIZE_IN_WORDS))
+ /* A single write cannot spread over two sectors */
+ DEBUGOUT("NVM write error: cannot spread over two sectors in a single write.\n");
+ else
+ ret_code = i40e_aq_update_nvm(hw, module_pointer,
+ 2 * offset, /*bytes*/
+ 2 * words, /*bytes*/
+ data, last_command, &cmd_details);
+
+ return ret_code;
+}
+
+/**
+ * i40e_write_nvm_word - Writes Shadow RAM word
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to write
+ * @data: word to write to the Shadow RAM
+ *
+ * Writes a 16 bit word to the SR using the i40e_write_nvm_aq() method.
+ * NVM ownership have to be acquired and released (on ARQ completion event
+ * reception) by caller. To commit SR to NVM update checksum function
+ * should be called.
+ **/
+enum i40e_status_code i40e_write_nvm_word(struct i40e_hw *hw, u32 offset,
+ void *data)
+{
+ DEBUGFUNC("i40e_write_nvm_word");
+
+ *((__le16 *)data) = CPU_TO_LE16(*((u16 *)data));
+
+ /* Value 0x00 below means that we treat SR as a flat mem */
+ return i40e_write_nvm_aq(hw, 0x00, offset, 1, data, FALSE);
+}
+
+/**
+ * i40e_write_nvm_buffer - Writes Shadow RAM buffer
+ * @hw: pointer to the HW structure
+ * @module_pointer: module pointer location in words from the NVM beginning
+ * @offset: offset of the Shadow RAM buffer to write
+ * @words: number of words to write
+ * @data: words to write to the Shadow RAM
+ *
+ * Writes a 16 bit words buffer to the Shadow RAM using the admin command.
+ * NVM ownership must be acquired before calling this function and released
+ * on ARQ completion event reception by caller. To commit SR to NVM update
+ * checksum function should be called.
+ **/
+enum i40e_status_code i40e_write_nvm_buffer(struct i40e_hw *hw,
+ u8 module_pointer, u32 offset,
+ u16 words, void *data)
+{
+ __le16 *le_word_ptr = (__le16 *)data;
+ u16 *word_ptr = (u16 *)data;
+ u32 i = 0;
+
+ DEBUGFUNC("i40e_write_nvm_buffer");
+
+ for (i = 0; i < words; i++)
+ le_word_ptr[i] = CPU_TO_LE16(word_ptr[i]);
+
+ /* Here we will only write one buffer as the size of the modules
+ * mirrored in the Shadow RAM is always less than 4K.
+ */
+ return i40e_write_nvm_aq(hw, module_pointer, offset, words,
+ data, FALSE);
+}
+
+/**
+ * i40e_calc_nvm_checksum - Calculates and returns the checksum
+ * @hw: pointer to hardware structure
+ * @checksum: pointer to the checksum
+ *
+ * This function calculates SW Checksum that covers the whole 64kB shadow RAM
+ * except the VPD and PCIe ALT Auto-load modules. The structure and size of VPD
+ * is customer specific and unknown. Therefore, this function skips all maximum
+ * possible size of VPD (1kB).
+ **/
+enum i40e_status_code i40e_calc_nvm_checksum(struct i40e_hw *hw, u16 *checksum)
+{
+ enum i40e_status_code ret_code = I40E_SUCCESS;
+ struct i40e_virt_mem vmem;
+ u16 pcie_alt_module = 0;
+ u16 checksum_local = 0;
+ u16 vpd_module = 0;
+ u16 *data;
+ u16 i = 0;
+
+ DEBUGFUNC("i40e_calc_nvm_checksum");
+
+ ret_code = i40e_allocate_virt_mem(hw, &vmem,
+ I40E_SR_SECTOR_SIZE_IN_WORDS * sizeof(u16));
+ if (ret_code)
+ goto i40e_calc_nvm_checksum_exit;
+ data = (u16 *)vmem.va;
+
+ /* read pointer to VPD area */
+ ret_code = i40e_read_nvm_word(hw, I40E_SR_VPD_PTR, &vpd_module);
+ if (ret_code != I40E_SUCCESS) {
+ ret_code = I40E_ERR_NVM_CHECKSUM;
+ goto i40e_calc_nvm_checksum_exit;
+ }
+
+ /* read pointer to PCIe Alt Auto-load module */
+ ret_code = i40e_read_nvm_word(hw, I40E_SR_PCIE_ALT_AUTO_LOAD_PTR,
+ &pcie_alt_module);
+ if (ret_code != I40E_SUCCESS) {
+ ret_code = I40E_ERR_NVM_CHECKSUM;
+ goto i40e_calc_nvm_checksum_exit;
+ }
+
+ /* Calculate SW checksum that covers the whole 64kB shadow RAM
+ * except the VPD and PCIe ALT Auto-load modules
+ */
+ for (i = 0; i < hw->nvm.sr_size; i++) {
+ /* Read SR page */
+ if ((i % I40E_SR_SECTOR_SIZE_IN_WORDS) == 0) {
+ u16 words = I40E_SR_SECTOR_SIZE_IN_WORDS;
+
+ ret_code = i40e_read_nvm_buffer(hw, i, &words, data);
+ if (ret_code != I40E_SUCCESS) {
+ ret_code = I40E_ERR_NVM_CHECKSUM;
+ goto i40e_calc_nvm_checksum_exit;
+ }
+ }
+
+ /* Skip Checksum word */
+ if (i == I40E_SR_SW_CHECKSUM_WORD)
+ continue;
+ /* Skip VPD module (convert byte size to word count) */
+ if ((i >= (u32)vpd_module) &&
+ (i < ((u32)vpd_module +
+ (I40E_SR_VPD_MODULE_MAX_SIZE / 2)))) {
+ continue;
+ }
+ /* Skip PCIe ALT module (convert byte size to word count) */
+ if ((i >= (u32)pcie_alt_module) &&
+ (i < ((u32)pcie_alt_module +
+ (I40E_SR_PCIE_ALT_MODULE_MAX_SIZE / 2)))) {
+ continue;
+ }
+
+ checksum_local += data[i % I40E_SR_SECTOR_SIZE_IN_WORDS];
+ }
+
+ *checksum = (u16)I40E_SR_SW_CHECKSUM_BASE - checksum_local;
+
+i40e_calc_nvm_checksum_exit:
+ i40e_free_virt_mem(hw, &vmem);
+ return ret_code;
+}
+
+/**
+ * i40e_update_nvm_checksum - Updates the NVM checksum
+ * @hw: pointer to hardware structure
+ *
+ * NVM ownership must be acquired before calling this function and released
+ * on ARQ completion event reception by caller.
+ * This function will commit SR to NVM.
+ **/
+enum i40e_status_code i40e_update_nvm_checksum(struct i40e_hw *hw)
+{
+ enum i40e_status_code ret_code = I40E_SUCCESS;
+ u16 checksum;
+ __le16 le_sum;
+
+ DEBUGFUNC("i40e_update_nvm_checksum");
+
+ ret_code = i40e_calc_nvm_checksum(hw, &checksum);
+ le_sum = CPU_TO_LE16(checksum);
+ if (ret_code == I40E_SUCCESS)
+ ret_code = i40e_write_nvm_aq(hw, 0x00, I40E_SR_SW_CHECKSUM_WORD,
+ 1, &le_sum, TRUE);
+
+ return ret_code;
+}
+
+/**
+ * i40e_validate_nvm_checksum - Validate EEPROM checksum
+ * @hw: pointer to hardware structure
+ * @checksum: calculated checksum
+ *
+ * Performs checksum calculation and validates the NVM SW checksum. If the
+ * caller does not need checksum, the value can be NULL.
+ **/
+enum i40e_status_code i40e_validate_nvm_checksum(struct i40e_hw *hw,
+ u16 *checksum)
+{
+ enum i40e_status_code ret_code = I40E_SUCCESS;
+ u16 checksum_sr = 0;
+ u16 checksum_local = 0;
+
+ DEBUGFUNC("i40e_validate_nvm_checksum");
+
+ ret_code = i40e_calc_nvm_checksum(hw, &checksum_local);
+ if (ret_code != I40E_SUCCESS)
+ goto i40e_validate_nvm_checksum_exit;
+
+ /* Do not use i40e_read_nvm_word() because we do not want to take
+ * the synchronization semaphores twice here.
+ */
+ i40e_read_nvm_word(hw, I40E_SR_SW_CHECKSUM_WORD, &checksum_sr);
+
+ /* Verify read checksum from EEPROM is the same as
+ * calculated checksum
+ */
+ if (checksum_local != checksum_sr)
+ ret_code = I40E_ERR_NVM_CHECKSUM;
+
+ /* If the user cares, return the calculated checksum */
+ if (checksum)
+ *checksum = checksum_local;
+
+i40e_validate_nvm_checksum_exit:
+ return ret_code;
+}
diff --git a/usr/src/uts/common/io/i40e/core/i40e_prototype.h b/usr/src/uts/common/io/i40e/core/i40e_prototype.h
new file mode 100644
index 0000000000..6f1cfc3afe
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/core/i40e_prototype.h
@@ -0,0 +1,478 @@
+/******************************************************************************
+
+ Copyright (c) 2013-2015, Intel Corporation
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+
+******************************************************************************/
+/*$FreeBSD: head/sys/dev/ixl/i40e_prototype.h 284049 2015-06-05 22:52:42Z jfv $*/
+
+#ifndef _I40E_PROTOTYPE_H_
+#define _I40E_PROTOTYPE_H_
+
+#include "i40e_type.h"
+#include "i40e_alloc.h"
+#include "i40e_virtchnl.h"
+
+/* Prototypes for shared code functions that are not in
+ * the standard function pointer structures. These are
+ * mostly because they are needed even before the init
+ * has happened and will assist in the early SW and FW
+ * setup.
+ */
+
+/* adminq functions */
+enum i40e_status_code i40e_init_adminq(struct i40e_hw *hw);
+enum i40e_status_code i40e_shutdown_adminq(struct i40e_hw *hw);
+enum i40e_status_code i40e_init_asq(struct i40e_hw *hw);
+enum i40e_status_code i40e_init_arq(struct i40e_hw *hw);
+enum i40e_status_code i40e_alloc_adminq_asq_ring(struct i40e_hw *hw);
+enum i40e_status_code i40e_alloc_adminq_arq_ring(struct i40e_hw *hw);
+enum i40e_status_code i40e_shutdown_asq(struct i40e_hw *hw);
+enum i40e_status_code i40e_shutdown_arq(struct i40e_hw *hw);
+u16 i40e_clean_asq(struct i40e_hw *hw);
+void i40e_free_adminq_asq(struct i40e_hw *hw);
+void i40e_free_adminq_arq(struct i40e_hw *hw);
+enum i40e_status_code i40e_validate_mac_addr(u8 *mac_addr);
+void i40e_adminq_init_ring_data(struct i40e_hw *hw);
+enum i40e_status_code i40e_clean_arq_element(struct i40e_hw *hw,
+ struct i40e_arq_event_info *e,
+ u16 *events_pending);
+enum i40e_status_code i40e_asq_send_command(struct i40e_hw *hw,
+ struct i40e_aq_desc *desc,
+ void *buff, /* can be NULL */
+ u16 buff_size,
+ struct i40e_asq_cmd_details *cmd_details);
+bool i40e_asq_done(struct i40e_hw *hw);
+
+/* debug function for adminq */
+void i40e_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask,
+ void *desc, void *buffer, u16 buf_len);
+
+void i40e_idle_aq(struct i40e_hw *hw);
+void i40e_resume_aq(struct i40e_hw *hw);
+bool i40e_check_asq_alive(struct i40e_hw *hw);
+enum i40e_status_code i40e_aq_queue_shutdown(struct i40e_hw *hw, bool unloading);
+#ifdef X722_SUPPORT
+
+enum i40e_status_code i40e_aq_get_rss_lut(struct i40e_hw *hw, u16 seid,
+ bool pf_lut, u8 *lut, u16 lut_size);
+enum i40e_status_code i40e_aq_set_rss_lut(struct i40e_hw *hw, u16 seid,
+ bool pf_lut, u8 *lut, u16 lut_size);
+enum i40e_status_code i40e_aq_get_rss_key(struct i40e_hw *hw,
+ u16 seid,
+ struct i40e_aqc_get_set_rss_key_data *key);
+enum i40e_status_code i40e_aq_set_rss_key(struct i40e_hw *hw,
+ u16 seid,
+ struct i40e_aqc_get_set_rss_key_data *key);
+#endif
+char *i40e_aq_str(struct i40e_hw *hw, enum i40e_admin_queue_err aq_err);
+char *i40e_stat_str(struct i40e_hw *hw, enum i40e_status_code stat_err);
+
+
+u32 i40e_led_get(struct i40e_hw *hw);
+void i40e_led_set(struct i40e_hw *hw, u32 mode, bool blink);
+
+/* admin send queue commands */
+
+enum i40e_status_code i40e_aq_get_firmware_version(struct i40e_hw *hw,
+ u16 *fw_major_version, u16 *fw_minor_version,
+ u32 *fw_build,
+ u16 *api_major_version, u16 *api_minor_version,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_debug_write_register(struct i40e_hw *hw,
+ u32 reg_addr, u64 reg_val,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_debug_read_register(struct i40e_hw *hw,
+ u32 reg_addr, u64 *reg_val,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_set_phy_debug(struct i40e_hw *hw, u8 cmd_flags,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_set_default_vsi(struct i40e_hw *hw, u16 vsi_id,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_get_phy_capabilities(struct i40e_hw *hw,
+ bool qualified_modules, bool report_init,
+ struct i40e_aq_get_phy_abilities_resp *abilities,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_set_phy_config(struct i40e_hw *hw,
+ struct i40e_aq_set_phy_config *config,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_set_fc(struct i40e_hw *hw, u8 *aq_failures,
+ bool atomic_reset);
+enum i40e_status_code i40e_aq_set_phy_int_mask(struct i40e_hw *hw, u16 mask,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_set_mac_config(struct i40e_hw *hw,
+ u16 max_frame_size, bool crc_en, u16 pacing,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_get_local_advt_reg(struct i40e_hw *hw,
+ u64 *advt_reg,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_get_partner_advt(struct i40e_hw *hw,
+ u64 *advt_reg,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_set_lb_modes(struct i40e_hw *hw, u16 lb_modes,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_clear_pxe_mode(struct i40e_hw *hw,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_set_link_restart_an(struct i40e_hw *hw,
+ bool enable_link, struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_get_link_info(struct i40e_hw *hw,
+ bool enable_lse, struct i40e_link_status *link,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_set_local_advt_reg(struct i40e_hw *hw,
+ u64 advt_reg,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_send_driver_version(struct i40e_hw *hw,
+ struct i40e_driver_version *dv,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_add_vsi(struct i40e_hw *hw,
+ struct i40e_vsi_context *vsi_ctx,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_set_vsi_broadcast(struct i40e_hw *hw,
+ u16 vsi_id, bool set_filter,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_set_vsi_unicast_promiscuous(struct i40e_hw *hw,
+ u16 vsi_id, bool set, struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_set_vsi_multicast_promiscuous(struct i40e_hw *hw,
+ u16 vsi_id, bool set, struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_set_vsi_mc_promisc_on_vlan(struct i40e_hw *hw,
+ u16 seid, bool enable, u16 vid,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw,
+ u16 seid, bool enable, u16 vid,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_get_vsi_params(struct i40e_hw *hw,
+ struct i40e_vsi_context *vsi_ctx,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_update_vsi_params(struct i40e_hw *hw,
+ struct i40e_vsi_context *vsi_ctx,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_add_veb(struct i40e_hw *hw, u16 uplink_seid,
+ u16 downlink_seid, u8 enabled_tc,
+ bool default_port, bool enable_l2_filtering,
+ u16 *pveb_seid,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_get_veb_parameters(struct i40e_hw *hw,
+ u16 veb_seid, u16 *switch_id, bool *floating,
+ u16 *statistic_index, u16 *vebs_used,
+ u16 *vebs_free,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_add_macvlan(struct i40e_hw *hw, u16 vsi_id,
+ struct i40e_aqc_add_macvlan_element_data *mv_list,
+ u16 count, struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 vsi_id,
+ struct i40e_aqc_remove_macvlan_element_data *mv_list,
+ u16 count, struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_add_vlan(struct i40e_hw *hw, u16 vsi_id,
+ struct i40e_aqc_add_remove_vlan_element_data *v_list,
+ u8 count, struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_remove_vlan(struct i40e_hw *hw, u16 vsi_id,
+ struct i40e_aqc_add_remove_vlan_element_data *v_list,
+ u8 count, struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_send_msg_to_vf(struct i40e_hw *hw, u16 vfid,
+ u32 v_opcode, u32 v_retval, u8 *msg, u16 msglen,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_get_switch_config(struct i40e_hw *hw,
+ struct i40e_aqc_get_switch_config_resp *buf,
+ u16 buf_size, u16 *start_seid,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_request_resource(struct i40e_hw *hw,
+ enum i40e_aq_resources_ids resource,
+ enum i40e_aq_resource_access_type access,
+ u8 sdp_number, u64 *timeout,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_release_resource(struct i40e_hw *hw,
+ enum i40e_aq_resources_ids resource,
+ u8 sdp_number,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_read_nvm(struct i40e_hw *hw, u8 module_pointer,
+ u32 offset, u16 length, void *data,
+ bool last_command,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_erase_nvm(struct i40e_hw *hw, u8 module_pointer,
+ u32 offset, u16 length, bool last_command,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_read_nvm_config(struct i40e_hw *hw,
+ u8 cmd_flags, u32 field_id, void *data,
+ u16 buf_size, u16 *element_count,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_write_nvm_config(struct i40e_hw *hw,
+ u8 cmd_flags, void *data, u16 buf_size,
+ u16 element_count,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_oem_post_update(struct i40e_hw *hw,
+ void *buff, u16 buff_size,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_discover_capabilities(struct i40e_hw *hw,
+ void *buff, u16 buff_size, u16 *data_size,
+ enum i40e_admin_queue_opc list_type_opc,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_update_nvm(struct i40e_hw *hw, u8 module_pointer,
+ u32 offset, u16 length, void *data,
+ bool last_command,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_get_lldp_mib(struct i40e_hw *hw, u8 bridge_type,
+ u8 mib_type, void *buff, u16 buff_size,
+ u16 *local_len, u16 *remote_len,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_set_lldp_mib(struct i40e_hw *hw,
+ u8 mib_type, void *buff, u16 buff_size,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_cfg_lldp_mib_change_event(struct i40e_hw *hw,
+ bool enable_update,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_add_lldp_tlv(struct i40e_hw *hw, u8 bridge_type,
+ void *buff, u16 buff_size, u16 tlv_len,
+ u16 *mib_len,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_update_lldp_tlv(struct i40e_hw *hw,
+ u8 bridge_type, void *buff, u16 buff_size,
+ u16 old_len, u16 new_len, u16 offset,
+ u16 *mib_len,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_delete_lldp_tlv(struct i40e_hw *hw,
+ u8 bridge_type, void *buff, u16 buff_size,
+ u16 tlv_len, u16 *mib_len,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_stop_lldp(struct i40e_hw *hw, bool shutdown_agent,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_start_lldp(struct i40e_hw *hw,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_get_cee_dcb_config(struct i40e_hw *hw,
+ void *buff, u16 buff_size,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_start_stop_dcbx(struct i40e_hw *hw,
+ bool start_agent,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_add_udp_tunnel(struct i40e_hw *hw,
+ u16 udp_port, u8 protocol_index,
+ u8 *filter_index,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_del_udp_tunnel(struct i40e_hw *hw, u8 index,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_get_switch_resource_alloc(struct i40e_hw *hw,
+ u8 *num_entries,
+ struct i40e_aqc_switch_resource_alloc_element_resp *buf,
+ u16 count,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_add_pvirt(struct i40e_hw *hw, u16 flags,
+ u16 mac_seid, u16 vsi_seid,
+ u16 *ret_seid);
+enum i40e_status_code i40e_aq_add_tag(struct i40e_hw *hw, bool direct_to_queue,
+ u16 vsi_seid, u16 tag, u16 queue_num,
+ u16 *tags_used, u16 *tags_free,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_remove_tag(struct i40e_hw *hw, u16 vsi_seid,
+ u16 tag, u16 *tags_used, u16 *tags_free,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_add_mcast_etag(struct i40e_hw *hw, u16 pe_seid,
+ u16 etag, u8 num_tags_in_buf, void *buf,
+ u16 *tags_used, u16 *tags_free,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_remove_mcast_etag(struct i40e_hw *hw, u16 pe_seid,
+ u16 etag, u16 *tags_used, u16 *tags_free,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_update_tag(struct i40e_hw *hw, u16 vsi_seid,
+ u16 old_tag, u16 new_tag, u16 *tags_used,
+ u16 *tags_free,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_add_statistics(struct i40e_hw *hw, u16 seid,
+ u16 vlan_id, u16 *stat_index,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_remove_statistics(struct i40e_hw *hw, u16 seid,
+ u16 vlan_id, u16 stat_index,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_set_port_parameters(struct i40e_hw *hw,
+ u16 bad_frame_vsi, bool save_bad_pac,
+ bool pad_short_pac, bool double_vlan,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_delete_element(struct i40e_hw *hw, u16 seid,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_mac_address_write(struct i40e_hw *hw,
+ u16 flags, u8 *mac_addr,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_config_vsi_bw_limit(struct i40e_hw *hw,
+ u16 seid, u16 credit, u8 max_credit,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_dcb_ignore_pfc(struct i40e_hw *hw,
+ u8 tcmap, bool request, u8 *tcmap_ret,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_get_hmc_resource_profile(struct i40e_hw *hw,
+ enum i40e_aq_hmc_profile *profile,
+ u8 *pe_vf_enabled_count,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_config_switch_comp_ets_bw_limit(
+ struct i40e_hw *hw, u16 seid,
+ struct i40e_aqc_configure_switching_comp_ets_bw_limit_data *bw_data,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_config_vsi_ets_sla_bw_limit(struct i40e_hw *hw,
+ u16 seid,
+ struct i40e_aqc_configure_vsi_ets_sla_bw_data *bw_data,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_dcb_updated(struct i40e_hw *hw,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_set_hmc_resource_profile(struct i40e_hw *hw,
+ enum i40e_aq_hmc_profile profile,
+ u8 pe_vf_enabled_count,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_config_switch_comp_bw_limit(struct i40e_hw *hw,
+ u16 seid, u16 credit, u8 max_bw,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_config_vsi_tc_bw(struct i40e_hw *hw, u16 seid,
+ struct i40e_aqc_configure_vsi_tc_bw_data *bw_data,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_query_vsi_bw_config(struct i40e_hw *hw,
+ u16 seid,
+ struct i40e_aqc_query_vsi_bw_config_resp *bw_data,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_query_vsi_ets_sla_config(struct i40e_hw *hw,
+ u16 seid,
+ struct i40e_aqc_query_vsi_ets_sla_config_resp *bw_data,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_query_switch_comp_ets_config(struct i40e_hw *hw,
+ u16 seid,
+ struct i40e_aqc_query_switching_comp_ets_config_resp *bw_data,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_query_port_ets_config(struct i40e_hw *hw,
+ u16 seid,
+ struct i40e_aqc_query_port_ets_config_resp *bw_data,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_query_switch_comp_bw_config(struct i40e_hw *hw,
+ u16 seid,
+ struct i40e_aqc_query_switching_comp_bw_config_resp *bw_data,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_resume_port_tx(struct i40e_hw *hw,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_read_lldp_cfg(struct i40e_hw *hw,
+ struct i40e_lldp_variables *lldp_cfg);
+enum i40e_status_code i40e_aq_add_cloud_filters(struct i40e_hw *hw,
+ u16 vsi,
+ struct i40e_aqc_add_remove_cloud_filters_element_data *filters,
+ u8 filter_count);
+
+enum i40e_status_code i40e_aq_remove_cloud_filters(struct i40e_hw *hw,
+ u16 vsi,
+ struct i40e_aqc_add_remove_cloud_filters_element_data *filters,
+ u8 filter_count);
+
+enum i40e_status_code i40e_aq_alternate_read(struct i40e_hw *hw,
+ u32 reg_addr0, u32 *reg_val0,
+ u32 reg_addr1, u32 *reg_val1);
+enum i40e_status_code i40e_aq_alternate_read_indirect(struct i40e_hw *hw,
+ u32 addr, u32 dw_count, void *buffer);
+enum i40e_status_code i40e_aq_alternate_write(struct i40e_hw *hw,
+ u32 reg_addr0, u32 reg_val0,
+ u32 reg_addr1, u32 reg_val1);
+enum i40e_status_code i40e_aq_alternate_write_indirect(struct i40e_hw *hw,
+ u32 addr, u32 dw_count, void *buffer);
+enum i40e_status_code i40e_aq_alternate_clear(struct i40e_hw *hw);
+enum i40e_status_code i40e_aq_alternate_write_done(struct i40e_hw *hw,
+ u8 bios_mode, bool *reset_needed);
+enum i40e_status_code i40e_aq_set_oem_mode(struct i40e_hw *hw,
+ u8 oem_mode);
+
+/* i40e_common */
+enum i40e_status_code i40e_init_shared_code(struct i40e_hw *hw);
+enum i40e_status_code i40e_pf_reset(struct i40e_hw *hw);
+void i40e_clear_hw(struct i40e_hw *hw);
+void i40e_clear_pxe_mode(struct i40e_hw *hw);
+enum i40e_status_code i40e_get_link_status(struct i40e_hw *hw, bool *link_up);
+enum i40e_status_code i40e_update_link_info(struct i40e_hw *hw);
+enum i40e_status_code i40e_get_mac_addr(struct i40e_hw *hw, u8 *mac_addr);
+enum i40e_status_code i40e_read_bw_from_alt_ram(struct i40e_hw *hw,
+ u32 *max_bw, u32 *min_bw, bool *min_valid, bool *max_valid);
+enum i40e_status_code i40e_aq_configure_partition_bw(struct i40e_hw *hw,
+ struct i40e_aqc_configure_partition_bw_data *bw_data,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_get_port_mac_addr(struct i40e_hw *hw, u8 *mac_addr);
+enum i40e_status_code i40e_read_pba_string(struct i40e_hw *hw, u8 *pba_num,
+ u32 pba_num_size);
+void i40e_pre_tx_queue_cfg(struct i40e_hw *hw, u32 queue, bool enable);
+enum i40e_aq_link_speed i40e_get_link_speed(struct i40e_hw *hw);
+/* prototype for functions used for NVM access */
+enum i40e_status_code i40e_init_nvm(struct i40e_hw *hw);
+enum i40e_status_code i40e_acquire_nvm(struct i40e_hw *hw,
+ enum i40e_aq_resource_access_type access);
+void i40e_release_nvm(struct i40e_hw *hw);
+enum i40e_status_code i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
+ u16 *data);
+enum i40e_status_code i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
+ u16 *words, u16 *data);
+enum i40e_status_code i40e_write_nvm_aq(struct i40e_hw *hw, u8 module,
+ u32 offset, u16 words, void *data,
+ bool last_command);
+enum i40e_status_code i40e_write_nvm_word(struct i40e_hw *hw, u32 offset,
+ void *data);
+enum i40e_status_code i40e_write_nvm_buffer(struct i40e_hw *hw, u8 module,
+ u32 offset, u16 words, void *data);
+enum i40e_status_code i40e_calc_nvm_checksum(struct i40e_hw *hw, u16 *checksum);
+enum i40e_status_code i40e_update_nvm_checksum(struct i40e_hw *hw);
+enum i40e_status_code i40e_validate_nvm_checksum(struct i40e_hw *hw,
+ u16 *checksum);
+enum i40e_status_code i40e_nvmupd_command(struct i40e_hw *hw,
+ struct i40e_nvm_access *cmd,
+ u8 *bytes, int *);
+void i40e_set_pci_config_data(struct i40e_hw *hw, u16 link_status);
+
+enum i40e_status_code i40e_set_mac_type(struct i40e_hw *hw);
+
+extern struct i40e_rx_ptype_decoded i40e_ptype_lookup[];
+
+static INLINE struct i40e_rx_ptype_decoded decode_rx_desc_ptype(u8 ptype)
+{
+ return i40e_ptype_lookup[ptype];
+}
+
+/* prototype for functions used for SW spinlocks */
+void i40e_init_spinlock(struct i40e_spinlock *sp);
+void i40e_acquire_spinlock(struct i40e_spinlock *sp);
+void i40e_release_spinlock(struct i40e_spinlock *sp);
+void i40e_destroy_spinlock(struct i40e_spinlock *sp);
+
+/* i40e_common for VF drivers*/
+void i40e_vf_parse_hw_config(struct i40e_hw *hw,
+ struct i40e_virtchnl_vf_resource *msg);
+enum i40e_status_code i40e_vf_reset(struct i40e_hw *hw);
+enum i40e_status_code i40e_aq_send_msg_to_pf(struct i40e_hw *hw,
+ enum i40e_virtchnl_ops v_opcode,
+ enum i40e_status_code v_retval,
+ u8 *msg, u16 msglen,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_set_filter_control(struct i40e_hw *hw,
+ struct i40e_filter_control_settings *settings);
+enum i40e_status_code i40e_aq_add_rem_control_packet_filter(struct i40e_hw *hw,
+ u8 *mac_addr, u16 ethtype, u16 flags,
+ u16 vsi_seid, u16 queue, bool is_add,
+ struct i40e_control_filter_stats *stats,
+ struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_debug_dump(struct i40e_hw *hw, u8 cluster_id,
+ u8 table_id, u32 start_index, u16 buff_size,
+ void *buff, u16 *ret_buff_size,
+ u8 *ret_next_table, u32 *ret_next_index,
+ struct i40e_asq_cmd_details *cmd_details);
+#endif /* _I40E_PROTOTYPE_H_ */
diff --git a/usr/src/uts/common/io/i40e/core/i40e_register.h b/usr/src/uts/common/io/i40e/core/i40e_register.h
new file mode 100644
index 0000000000..ff4b8a54f2
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/core/i40e_register.h
@@ -0,0 +1,5317 @@
+/******************************************************************************
+
+ Copyright (c) 2013-2015, Intel Corporation
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+
+******************************************************************************/
+/*$FreeBSD: head/sys/dev/ixl/i40e_register.h 284049 2015-06-05 22:52:42Z jfv $*/
+
+#ifndef _I40E_REGISTER_H_
+#define _I40E_REGISTER_H_
+
+
+#define I40E_GL_ARQBAH 0x000801C0 /* Reset: EMPR */
+#define I40E_GL_ARQBAH_ARQBAH_SHIFT 0
+#define I40E_GL_ARQBAH_ARQBAH_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_ARQBAH_ARQBAH_SHIFT)
+#define I40E_GL_ARQBAL 0x000800C0 /* Reset: EMPR */
+#define I40E_GL_ARQBAL_ARQBAL_SHIFT 0
+#define I40E_GL_ARQBAL_ARQBAL_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_ARQBAL_ARQBAL_SHIFT)
+#define I40E_GL_ARQH 0x000803C0 /* Reset: EMPR */
+#define I40E_GL_ARQH_ARQH_SHIFT 0
+#define I40E_GL_ARQH_ARQH_MASK I40E_MASK(0x3FF, I40E_GL_ARQH_ARQH_SHIFT)
+#define I40E_GL_ARQT 0x000804C0 /* Reset: EMPR */
+#define I40E_GL_ARQT_ARQT_SHIFT 0
+#define I40E_GL_ARQT_ARQT_MASK I40E_MASK(0x3FF, I40E_GL_ARQT_ARQT_SHIFT)
+#define I40E_GL_ATQBAH 0x00080140 /* Reset: EMPR */
+#define I40E_GL_ATQBAH_ATQBAH_SHIFT 0
+#define I40E_GL_ATQBAH_ATQBAH_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_ATQBAH_ATQBAH_SHIFT)
+#define I40E_GL_ATQBAL 0x00080040 /* Reset: EMPR */
+#define I40E_GL_ATQBAL_ATQBAL_SHIFT 0
+#define I40E_GL_ATQBAL_ATQBAL_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_ATQBAL_ATQBAL_SHIFT)
+#define I40E_GL_ATQH 0x00080340 /* Reset: EMPR */
+#define I40E_GL_ATQH_ATQH_SHIFT 0
+#define I40E_GL_ATQH_ATQH_MASK I40E_MASK(0x3FF, I40E_GL_ATQH_ATQH_SHIFT)
+#define I40E_GL_ATQLEN 0x00080240 /* Reset: EMPR */
+#define I40E_GL_ATQLEN_ATQLEN_SHIFT 0
+#define I40E_GL_ATQLEN_ATQLEN_MASK I40E_MASK(0x3FF, I40E_GL_ATQLEN_ATQLEN_SHIFT)
+#define I40E_GL_ATQLEN_ATQVFE_SHIFT 28
+#define I40E_GL_ATQLEN_ATQVFE_MASK I40E_MASK(0x1, I40E_GL_ATQLEN_ATQVFE_SHIFT)
+#define I40E_GL_ATQLEN_ATQOVFL_SHIFT 29
+#define I40E_GL_ATQLEN_ATQOVFL_MASK I40E_MASK(0x1, I40E_GL_ATQLEN_ATQOVFL_SHIFT)
+#define I40E_GL_ATQLEN_ATQCRIT_SHIFT 30
+#define I40E_GL_ATQLEN_ATQCRIT_MASK I40E_MASK(0x1, I40E_GL_ATQLEN_ATQCRIT_SHIFT)
+#define I40E_GL_ATQLEN_ATQENABLE_SHIFT 31
+#define I40E_GL_ATQLEN_ATQENABLE_MASK I40E_MASK(0x1, I40E_GL_ATQLEN_ATQENABLE_SHIFT)
+#define I40E_GL_ATQT 0x00080440 /* Reset: EMPR */
+#define I40E_GL_ATQT_ATQT_SHIFT 0
+#define I40E_GL_ATQT_ATQT_MASK I40E_MASK(0x3FF, I40E_GL_ATQT_ATQT_SHIFT)
+#define I40E_PF_ARQBAH 0x00080180 /* Reset: EMPR */
+#define I40E_PF_ARQBAH_ARQBAH_SHIFT 0
+#define I40E_PF_ARQBAH_ARQBAH_MASK I40E_MASK(0xFFFFFFFF, I40E_PF_ARQBAH_ARQBAH_SHIFT)
+#define I40E_PF_ARQBAL 0x00080080 /* Reset: EMPR */
+#define I40E_PF_ARQBAL_ARQBAL_SHIFT 0
+#define I40E_PF_ARQBAL_ARQBAL_MASK I40E_MASK(0xFFFFFFFF, I40E_PF_ARQBAL_ARQBAL_SHIFT)
+#define I40E_PF_ARQH 0x00080380 /* Reset: EMPR */
+#define I40E_PF_ARQH_ARQH_SHIFT 0
+#define I40E_PF_ARQH_ARQH_MASK I40E_MASK(0x3FF, I40E_PF_ARQH_ARQH_SHIFT)
+#define I40E_PF_ARQLEN 0x00080280 /* Reset: EMPR */
+#define I40E_PF_ARQLEN_ARQLEN_SHIFT 0
+#define I40E_PF_ARQLEN_ARQLEN_MASK I40E_MASK(0x3FF, I40E_PF_ARQLEN_ARQLEN_SHIFT)
+#define I40E_PF_ARQLEN_ARQVFE_SHIFT 28
+#define I40E_PF_ARQLEN_ARQVFE_MASK I40E_MASK(0x1, I40E_PF_ARQLEN_ARQVFE_SHIFT)
+#define I40E_PF_ARQLEN_ARQOVFL_SHIFT 29
+#define I40E_PF_ARQLEN_ARQOVFL_MASK I40E_MASK(0x1, I40E_PF_ARQLEN_ARQOVFL_SHIFT)
+#define I40E_PF_ARQLEN_ARQCRIT_SHIFT 30
+#define I40E_PF_ARQLEN_ARQCRIT_MASK I40E_MASK(0x1, I40E_PF_ARQLEN_ARQCRIT_SHIFT)
+#define I40E_PF_ARQLEN_ARQENABLE_SHIFT 31
+#define I40E_PF_ARQLEN_ARQENABLE_MASK I40E_MASK(0x1, I40E_PF_ARQLEN_ARQENABLE_SHIFT)
+#define I40E_PF_ARQT 0x00080480 /* Reset: EMPR */
+#define I40E_PF_ARQT_ARQT_SHIFT 0
+#define I40E_PF_ARQT_ARQT_MASK I40E_MASK(0x3FF, I40E_PF_ARQT_ARQT_SHIFT)
+#define I40E_PF_ATQBAH 0x00080100 /* Reset: EMPR */
+#define I40E_PF_ATQBAH_ATQBAH_SHIFT 0
+#define I40E_PF_ATQBAH_ATQBAH_MASK I40E_MASK(0xFFFFFFFF, I40E_PF_ATQBAH_ATQBAH_SHIFT)
+#define I40E_PF_ATQBAL 0x00080000 /* Reset: EMPR */
+#define I40E_PF_ATQBAL_ATQBAL_SHIFT 0
+#define I40E_PF_ATQBAL_ATQBAL_MASK I40E_MASK(0xFFFFFFFF, I40E_PF_ATQBAL_ATQBAL_SHIFT)
+#define I40E_PF_ATQH 0x00080300 /* Reset: EMPR */
+#define I40E_PF_ATQH_ATQH_SHIFT 0
+#define I40E_PF_ATQH_ATQH_MASK I40E_MASK(0x3FF, I40E_PF_ATQH_ATQH_SHIFT)
+#define I40E_PF_ATQLEN 0x00080200 /* Reset: EMPR */
+#define I40E_PF_ATQLEN_ATQLEN_SHIFT 0
+#define I40E_PF_ATQLEN_ATQLEN_MASK I40E_MASK(0x3FF, I40E_PF_ATQLEN_ATQLEN_SHIFT)
+#define I40E_PF_ATQLEN_ATQVFE_SHIFT 28
+#define I40E_PF_ATQLEN_ATQVFE_MASK I40E_MASK(0x1, I40E_PF_ATQLEN_ATQVFE_SHIFT)
+#define I40E_PF_ATQLEN_ATQOVFL_SHIFT 29
+#define I40E_PF_ATQLEN_ATQOVFL_MASK I40E_MASK(0x1, I40E_PF_ATQLEN_ATQOVFL_SHIFT)
+#define I40E_PF_ATQLEN_ATQCRIT_SHIFT 30
+#define I40E_PF_ATQLEN_ATQCRIT_MASK I40E_MASK(0x1, I40E_PF_ATQLEN_ATQCRIT_SHIFT)
+#define I40E_PF_ATQLEN_ATQENABLE_SHIFT 31
+#define I40E_PF_ATQLEN_ATQENABLE_MASK I40E_MASK(0x1, I40E_PF_ATQLEN_ATQENABLE_SHIFT)
+#define I40E_PF_ATQT 0x00080400 /* Reset: EMPR */
+#define I40E_PF_ATQT_ATQT_SHIFT 0
+#define I40E_PF_ATQT_ATQT_MASK I40E_MASK(0x3FF, I40E_PF_ATQT_ATQT_SHIFT)
+#define I40E_VF_ARQBAH(_VF) (0x00081400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: EMPR */
+#define I40E_VF_ARQBAH_MAX_INDEX 127
+#define I40E_VF_ARQBAH_ARQBAH_SHIFT 0
+#define I40E_VF_ARQBAH_ARQBAH_MASK I40E_MASK(0xFFFFFFFF, I40E_VF_ARQBAH_ARQBAH_SHIFT)
+#define I40E_VF_ARQBAL(_VF) (0x00080C00 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: EMPR */
+#define I40E_VF_ARQBAL_MAX_INDEX 127
+#define I40E_VF_ARQBAL_ARQBAL_SHIFT 0
+#define I40E_VF_ARQBAL_ARQBAL_MASK I40E_MASK(0xFFFFFFFF, I40E_VF_ARQBAL_ARQBAL_SHIFT)
+#define I40E_VF_ARQH(_VF) (0x00082400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: EMPR */
+#define I40E_VF_ARQH_MAX_INDEX 127
+#define I40E_VF_ARQH_ARQH_SHIFT 0
+#define I40E_VF_ARQH_ARQH_MASK I40E_MASK(0x3FF, I40E_VF_ARQH_ARQH_SHIFT)
+#define I40E_VF_ARQLEN(_VF) (0x00081C00 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: EMPR */
+#define I40E_VF_ARQLEN_MAX_INDEX 127
+#define I40E_VF_ARQLEN_ARQLEN_SHIFT 0
+#define I40E_VF_ARQLEN_ARQLEN_MASK I40E_MASK(0x3FF, I40E_VF_ARQLEN_ARQLEN_SHIFT)
+#define I40E_VF_ARQLEN_ARQVFE_SHIFT 28
+#define I40E_VF_ARQLEN_ARQVFE_MASK I40E_MASK(0x1, I40E_VF_ARQLEN_ARQVFE_SHIFT)
+#define I40E_VF_ARQLEN_ARQOVFL_SHIFT 29
+#define I40E_VF_ARQLEN_ARQOVFL_MASK I40E_MASK(0x1, I40E_VF_ARQLEN_ARQOVFL_SHIFT)
+#define I40E_VF_ARQLEN_ARQCRIT_SHIFT 30
+#define I40E_VF_ARQLEN_ARQCRIT_MASK I40E_MASK(0x1, I40E_VF_ARQLEN_ARQCRIT_SHIFT)
+#define I40E_VF_ARQLEN_ARQENABLE_SHIFT 31
+#define I40E_VF_ARQLEN_ARQENABLE_MASK I40E_MASK(0x1, I40E_VF_ARQLEN_ARQENABLE_SHIFT)
+#define I40E_VF_ARQT(_VF) (0x00082C00 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: EMPR */
+#define I40E_VF_ARQT_MAX_INDEX 127
+#define I40E_VF_ARQT_ARQT_SHIFT 0
+#define I40E_VF_ARQT_ARQT_MASK I40E_MASK(0x3FF, I40E_VF_ARQT_ARQT_SHIFT)
+#define I40E_VF_ATQBAH(_VF) (0x00081000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: EMPR */
+#define I40E_VF_ATQBAH_MAX_INDEX 127
+#define I40E_VF_ATQBAH_ATQBAH_SHIFT 0
+#define I40E_VF_ATQBAH_ATQBAH_MASK I40E_MASK(0xFFFFFFFF, I40E_VF_ATQBAH_ATQBAH_SHIFT)
+#define I40E_VF_ATQBAL(_VF) (0x00080800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: EMPR */
+#define I40E_VF_ATQBAL_MAX_INDEX 127
+#define I40E_VF_ATQBAL_ATQBAL_SHIFT 0
+#define I40E_VF_ATQBAL_ATQBAL_MASK I40E_MASK(0xFFFFFFFF, I40E_VF_ATQBAL_ATQBAL_SHIFT)
+#define I40E_VF_ATQH(_VF) (0x00082000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: EMPR */
+#define I40E_VF_ATQH_MAX_INDEX 127
+#define I40E_VF_ATQH_ATQH_SHIFT 0
+#define I40E_VF_ATQH_ATQH_MASK I40E_MASK(0x3FF, I40E_VF_ATQH_ATQH_SHIFT)
+#define I40E_VF_ATQLEN(_VF) (0x00081800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: EMPR */
+#define I40E_VF_ATQLEN_MAX_INDEX 127
+#define I40E_VF_ATQLEN_ATQLEN_SHIFT 0
+#define I40E_VF_ATQLEN_ATQLEN_MASK I40E_MASK(0x3FF, I40E_VF_ATQLEN_ATQLEN_SHIFT)
+#define I40E_VF_ATQLEN_ATQVFE_SHIFT 28
+#define I40E_VF_ATQLEN_ATQVFE_MASK I40E_MASK(0x1, I40E_VF_ATQLEN_ATQVFE_SHIFT)
+#define I40E_VF_ATQLEN_ATQOVFL_SHIFT 29
+#define I40E_VF_ATQLEN_ATQOVFL_MASK I40E_MASK(0x1, I40E_VF_ATQLEN_ATQOVFL_SHIFT)
+#define I40E_VF_ATQLEN_ATQCRIT_SHIFT 30
+#define I40E_VF_ATQLEN_ATQCRIT_MASK I40E_MASK(0x1, I40E_VF_ATQLEN_ATQCRIT_SHIFT)
+#define I40E_VF_ATQLEN_ATQENABLE_SHIFT 31
+#define I40E_VF_ATQLEN_ATQENABLE_MASK I40E_MASK(0x1, I40E_VF_ATQLEN_ATQENABLE_SHIFT)
+#define I40E_VF_ATQT(_VF) (0x00082800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: EMPR */
+#define I40E_VF_ATQT_MAX_INDEX 127
+#define I40E_VF_ATQT_ATQT_SHIFT 0
+#define I40E_VF_ATQT_ATQT_MASK I40E_MASK(0x3FF, I40E_VF_ATQT_ATQT_SHIFT)
+#define I40E_PRT_L2TAGSEN 0x001C0B20 /* Reset: CORER */
+#define I40E_PRT_L2TAGSEN_ENABLE_SHIFT 0
+#define I40E_PRT_L2TAGSEN_ENABLE_MASK I40E_MASK(0xFF, I40E_PRT_L2TAGSEN_ENABLE_SHIFT)
+#define I40E_PFCM_LAN_ERRDATA 0x0010C080 /* Reset: PFR */
+#define I40E_PFCM_LAN_ERRDATA_ERROR_CODE_SHIFT 0
+#define I40E_PFCM_LAN_ERRDATA_ERROR_CODE_MASK I40E_MASK(0xF, I40E_PFCM_LAN_ERRDATA_ERROR_CODE_SHIFT)
+#define I40E_PFCM_LAN_ERRDATA_Q_TYPE_SHIFT 4
+#define I40E_PFCM_LAN_ERRDATA_Q_TYPE_MASK I40E_MASK(0x7, I40E_PFCM_LAN_ERRDATA_Q_TYPE_SHIFT)
+#define I40E_PFCM_LAN_ERRDATA_Q_NUM_SHIFT 8
+#define I40E_PFCM_LAN_ERRDATA_Q_NUM_MASK I40E_MASK(0xFFF, I40E_PFCM_LAN_ERRDATA_Q_NUM_SHIFT)
+#define I40E_PFCM_LAN_ERRINFO 0x0010C000 /* Reset: PFR */
+#define I40E_PFCM_LAN_ERRINFO_ERROR_VALID_SHIFT 0
+#define I40E_PFCM_LAN_ERRINFO_ERROR_VALID_MASK I40E_MASK(0x1, I40E_PFCM_LAN_ERRINFO_ERROR_VALID_SHIFT)
+#define I40E_PFCM_LAN_ERRINFO_ERROR_INST_SHIFT 4
+#define I40E_PFCM_LAN_ERRINFO_ERROR_INST_MASK I40E_MASK(0x7, I40E_PFCM_LAN_ERRINFO_ERROR_INST_SHIFT)
+#define I40E_PFCM_LAN_ERRINFO_DBL_ERROR_CNT_SHIFT 8
+#define I40E_PFCM_LAN_ERRINFO_DBL_ERROR_CNT_MASK I40E_MASK(0xFF, I40E_PFCM_LAN_ERRINFO_DBL_ERROR_CNT_SHIFT)
+#define I40E_PFCM_LAN_ERRINFO_RLU_ERROR_CNT_SHIFT 16
+#define I40E_PFCM_LAN_ERRINFO_RLU_ERROR_CNT_MASK I40E_MASK(0xFF, I40E_PFCM_LAN_ERRINFO_RLU_ERROR_CNT_SHIFT)
+#define I40E_PFCM_LAN_ERRINFO_RLS_ERROR_CNT_SHIFT 24
+#define I40E_PFCM_LAN_ERRINFO_RLS_ERROR_CNT_MASK I40E_MASK(0xFF, I40E_PFCM_LAN_ERRINFO_RLS_ERROR_CNT_SHIFT)
+#define I40E_PFCM_LANCTXCTL 0x0010C300 /* Reset: CORER */
+#define I40E_PFCM_LANCTXCTL_QUEUE_NUM_SHIFT 0
+#define I40E_PFCM_LANCTXCTL_QUEUE_NUM_MASK I40E_MASK(0xFFF, I40E_PFCM_LANCTXCTL_QUEUE_NUM_SHIFT)
+#define I40E_PFCM_LANCTXCTL_SUB_LINE_SHIFT 12
+#define I40E_PFCM_LANCTXCTL_SUB_LINE_MASK I40E_MASK(0x7, I40E_PFCM_LANCTXCTL_SUB_LINE_SHIFT)
+#define I40E_PFCM_LANCTXCTL_QUEUE_TYPE_SHIFT 15
+#define I40E_PFCM_LANCTXCTL_QUEUE_TYPE_MASK I40E_MASK(0x3, I40E_PFCM_LANCTXCTL_QUEUE_TYPE_SHIFT)
+#define I40E_PFCM_LANCTXCTL_OP_CODE_SHIFT 17
+#define I40E_PFCM_LANCTXCTL_OP_CODE_MASK I40E_MASK(0x3, I40E_PFCM_LANCTXCTL_OP_CODE_SHIFT)
+#define I40E_PFCM_LANCTXDATA(_i) (0x0010C100 + ((_i) * 128)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_PFCM_LANCTXDATA_MAX_INDEX 3
+#define I40E_PFCM_LANCTXDATA_DATA_SHIFT 0
+#define I40E_PFCM_LANCTXDATA_DATA_MASK I40E_MASK(0xFFFFFFFF, I40E_PFCM_LANCTXDATA_DATA_SHIFT)
+#define I40E_PFCM_LANCTXSTAT 0x0010C380 /* Reset: CORER */
+#define I40E_PFCM_LANCTXSTAT_CTX_DONE_SHIFT 0
+#define I40E_PFCM_LANCTXSTAT_CTX_DONE_MASK I40E_MASK(0x1, I40E_PFCM_LANCTXSTAT_CTX_DONE_SHIFT)
+#define I40E_PFCM_LANCTXSTAT_CTX_MISS_SHIFT 1
+#define I40E_PFCM_LANCTXSTAT_CTX_MISS_MASK I40E_MASK(0x1, I40E_PFCM_LANCTXSTAT_CTX_MISS_SHIFT)
+#define I40E_VFCM_PE_ERRDATA1(_VF) (0x00138800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFCM_PE_ERRDATA1_MAX_INDEX 127
+#define I40E_VFCM_PE_ERRDATA1_ERROR_CODE_SHIFT 0
+#define I40E_VFCM_PE_ERRDATA1_ERROR_CODE_MASK I40E_MASK(0xF, I40E_VFCM_PE_ERRDATA1_ERROR_CODE_SHIFT)
+#define I40E_VFCM_PE_ERRDATA1_Q_TYPE_SHIFT 4
+#define I40E_VFCM_PE_ERRDATA1_Q_TYPE_MASK I40E_MASK(0x7, I40E_VFCM_PE_ERRDATA1_Q_TYPE_SHIFT)
+#define I40E_VFCM_PE_ERRDATA1_Q_NUM_SHIFT 8
+#define I40E_VFCM_PE_ERRDATA1_Q_NUM_MASK I40E_MASK(0x3FFFF, I40E_VFCM_PE_ERRDATA1_Q_NUM_SHIFT)
+#define I40E_VFCM_PE_ERRINFO1(_VF) (0x00138400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFCM_PE_ERRINFO1_MAX_INDEX 127
+#define I40E_VFCM_PE_ERRINFO1_ERROR_VALID_SHIFT 0
+#define I40E_VFCM_PE_ERRINFO1_ERROR_VALID_MASK I40E_MASK(0x1, I40E_VFCM_PE_ERRINFO1_ERROR_VALID_SHIFT)
+#define I40E_VFCM_PE_ERRINFO1_ERROR_INST_SHIFT 4
+#define I40E_VFCM_PE_ERRINFO1_ERROR_INST_MASK I40E_MASK(0x7, I40E_VFCM_PE_ERRINFO1_ERROR_INST_SHIFT)
+#define I40E_VFCM_PE_ERRINFO1_DBL_ERROR_CNT_SHIFT 8
+#define I40E_VFCM_PE_ERRINFO1_DBL_ERROR_CNT_MASK I40E_MASK(0xFF, I40E_VFCM_PE_ERRINFO1_DBL_ERROR_CNT_SHIFT)
+#define I40E_VFCM_PE_ERRINFO1_RLU_ERROR_CNT_SHIFT 16
+#define I40E_VFCM_PE_ERRINFO1_RLU_ERROR_CNT_MASK I40E_MASK(0xFF, I40E_VFCM_PE_ERRINFO1_RLU_ERROR_CNT_SHIFT)
+#define I40E_VFCM_PE_ERRINFO1_RLS_ERROR_CNT_SHIFT 24
+#define I40E_VFCM_PE_ERRINFO1_RLS_ERROR_CNT_MASK I40E_MASK(0xFF, I40E_VFCM_PE_ERRINFO1_RLS_ERROR_CNT_SHIFT)
+#define I40E_GLDCB_GENC 0x00083044 /* Reset: CORER */
+#define I40E_GLDCB_GENC_PCIRTT_SHIFT 0
+#define I40E_GLDCB_GENC_PCIRTT_MASK I40E_MASK(0xFFFF, I40E_GLDCB_GENC_PCIRTT_SHIFT)
+#define I40E_GLDCB_RUPTI 0x00122618 /* Reset: CORER */
+#define I40E_GLDCB_RUPTI_PFCTIMEOUT_UP_SHIFT 0
+#define I40E_GLDCB_RUPTI_PFCTIMEOUT_UP_MASK I40E_MASK(0xFFFFFFFF, I40E_GLDCB_RUPTI_PFCTIMEOUT_UP_SHIFT)
+#define I40E_PRTDCB_FCCFG 0x001E4640 /* Reset: GLOBR */
+#define I40E_PRTDCB_FCCFG_TFCE_SHIFT 3
+#define I40E_PRTDCB_FCCFG_TFCE_MASK I40E_MASK(0x3, I40E_PRTDCB_FCCFG_TFCE_SHIFT)
+#define I40E_PRTDCB_FCRTV 0x001E4600 /* Reset: GLOBR */
+#define I40E_PRTDCB_FCRTV_FC_REFRESH_TH_SHIFT 0
+#define I40E_PRTDCB_FCRTV_FC_REFRESH_TH_MASK I40E_MASK(0xFFFF, I40E_PRTDCB_FCRTV_FC_REFRESH_TH_SHIFT)
+#define I40E_PRTDCB_FCTTVN(_i) (0x001E4580 + ((_i) * 32)) /* _i=0...3 */ /* Reset: GLOBR */
+#define I40E_PRTDCB_FCTTVN_MAX_INDEX 3
+#define I40E_PRTDCB_FCTTVN_TTV_2N_SHIFT 0
+#define I40E_PRTDCB_FCTTVN_TTV_2N_MASK I40E_MASK(0xFFFF, I40E_PRTDCB_FCTTVN_TTV_2N_SHIFT)
+#define I40E_PRTDCB_FCTTVN_TTV_2N_P1_SHIFT 16
+#define I40E_PRTDCB_FCTTVN_TTV_2N_P1_MASK I40E_MASK(0xFFFF, I40E_PRTDCB_FCTTVN_TTV_2N_P1_SHIFT)
+#define I40E_PRTDCB_GENC 0x00083000 /* Reset: CORER */
+#define I40E_PRTDCB_GENC_RESERVED_1_SHIFT 0
+#define I40E_PRTDCB_GENC_RESERVED_1_MASK I40E_MASK(0x3, I40E_PRTDCB_GENC_RESERVED_1_SHIFT)
+#define I40E_PRTDCB_GENC_NUMTC_SHIFT 2
+#define I40E_PRTDCB_GENC_NUMTC_MASK I40E_MASK(0xF, I40E_PRTDCB_GENC_NUMTC_SHIFT)
+#define I40E_PRTDCB_GENC_FCOEUP_SHIFT 6
+#define I40E_PRTDCB_GENC_FCOEUP_MASK I40E_MASK(0x7, I40E_PRTDCB_GENC_FCOEUP_SHIFT)
+#define I40E_PRTDCB_GENC_FCOEUP_VALID_SHIFT 9
+#define I40E_PRTDCB_GENC_FCOEUP_VALID_MASK I40E_MASK(0x1, I40E_PRTDCB_GENC_FCOEUP_VALID_SHIFT)
+#define I40E_PRTDCB_GENC_PFCLDA_SHIFT 16
+#define I40E_PRTDCB_GENC_PFCLDA_MASK I40E_MASK(0xFFFF, I40E_PRTDCB_GENC_PFCLDA_SHIFT)
+#define I40E_PRTDCB_GENS 0x00083020 /* Reset: CORER */
+#define I40E_PRTDCB_GENS_DCBX_STATUS_SHIFT 0
+#define I40E_PRTDCB_GENS_DCBX_STATUS_MASK I40E_MASK(0x7, I40E_PRTDCB_GENS_DCBX_STATUS_SHIFT)
+#define I40E_PRTDCB_MFLCN 0x001E2400 /* Reset: GLOBR */
+#define I40E_PRTDCB_MFLCN_PMCF_SHIFT 0
+#define I40E_PRTDCB_MFLCN_PMCF_MASK I40E_MASK(0x1, I40E_PRTDCB_MFLCN_PMCF_SHIFT)
+#define I40E_PRTDCB_MFLCN_DPF_SHIFT 1
+#define I40E_PRTDCB_MFLCN_DPF_MASK I40E_MASK(0x1, I40E_PRTDCB_MFLCN_DPF_SHIFT)
+#define I40E_PRTDCB_MFLCN_RPFCM_SHIFT 2
+#define I40E_PRTDCB_MFLCN_RPFCM_MASK I40E_MASK(0x1, I40E_PRTDCB_MFLCN_RPFCM_SHIFT)
+#define I40E_PRTDCB_MFLCN_RFCE_SHIFT 3
+#define I40E_PRTDCB_MFLCN_RFCE_MASK I40E_MASK(0x1, I40E_PRTDCB_MFLCN_RFCE_SHIFT)
+#define I40E_PRTDCB_MFLCN_RPFCE_SHIFT 4
+#define I40E_PRTDCB_MFLCN_RPFCE_MASK I40E_MASK(0xFF, I40E_PRTDCB_MFLCN_RPFCE_SHIFT)
+#define I40E_PRTDCB_RETSC 0x001223E0 /* Reset: CORER */
+#define I40E_PRTDCB_RETSC_ETS_MODE_SHIFT 0
+#define I40E_PRTDCB_RETSC_ETS_MODE_MASK I40E_MASK(0x1, I40E_PRTDCB_RETSC_ETS_MODE_SHIFT)
+#define I40E_PRTDCB_RETSC_NON_ETS_MODE_SHIFT 1
+#define I40E_PRTDCB_RETSC_NON_ETS_MODE_MASK I40E_MASK(0x1, I40E_PRTDCB_RETSC_NON_ETS_MODE_SHIFT)
+#define I40E_PRTDCB_RETSC_ETS_MAX_EXP_SHIFT 2
+#define I40E_PRTDCB_RETSC_ETS_MAX_EXP_MASK I40E_MASK(0xF, I40E_PRTDCB_RETSC_ETS_MAX_EXP_SHIFT)
+#define I40E_PRTDCB_RETSC_LLTC_SHIFT 8
+#define I40E_PRTDCB_RETSC_LLTC_MASK I40E_MASK(0xFF, I40E_PRTDCB_RETSC_LLTC_SHIFT)
+#define I40E_PRTDCB_RETSTCC(_i) (0x00122180 + ((_i) * 32)) /* _i=0...7 */ /* Reset: CORER */
+#define I40E_PRTDCB_RETSTCC_MAX_INDEX 7
+#define I40E_PRTDCB_RETSTCC_BWSHARE_SHIFT 0
+#define I40E_PRTDCB_RETSTCC_BWSHARE_MASK I40E_MASK(0x7F, I40E_PRTDCB_RETSTCC_BWSHARE_SHIFT)
+#define I40E_PRTDCB_RETSTCC_UPINTC_MODE_SHIFT 30
+#define I40E_PRTDCB_RETSTCC_UPINTC_MODE_MASK I40E_MASK(0x1, I40E_PRTDCB_RETSTCC_UPINTC_MODE_SHIFT)
+#define I40E_PRTDCB_RETSTCC_ETSTC_SHIFT 31
+#define I40E_PRTDCB_RETSTCC_ETSTC_MASK I40E_MASK(0x1, I40E_PRTDCB_RETSTCC_ETSTC_SHIFT)
+#define I40E_PRTDCB_RPPMC 0x001223A0 /* Reset: CORER */
+#define I40E_PRTDCB_RPPMC_LANRPPM_SHIFT 0
+#define I40E_PRTDCB_RPPMC_LANRPPM_MASK I40E_MASK(0xFF, I40E_PRTDCB_RPPMC_LANRPPM_SHIFT)
+#define I40E_PRTDCB_RPPMC_RDMARPPM_SHIFT 8
+#define I40E_PRTDCB_RPPMC_RDMARPPM_MASK I40E_MASK(0xFF, I40E_PRTDCB_RPPMC_RDMARPPM_SHIFT)
+#define I40E_PRTDCB_RPPMC_RX_FIFO_SIZE_SHIFT 16
+#define I40E_PRTDCB_RPPMC_RX_FIFO_SIZE_MASK I40E_MASK(0xFF, I40E_PRTDCB_RPPMC_RX_FIFO_SIZE_SHIFT)
+#define I40E_PRTDCB_RUP 0x001C0B00 /* Reset: CORER */
+#define I40E_PRTDCB_RUP_NOVLANUP_SHIFT 0
+#define I40E_PRTDCB_RUP_NOVLANUP_MASK I40E_MASK(0x7, I40E_PRTDCB_RUP_NOVLANUP_SHIFT)
+#define I40E_PRTDCB_RUP2TC 0x001C09A0 /* Reset: CORER */
+#define I40E_PRTDCB_RUP2TC_UP0TC_SHIFT 0
+#define I40E_PRTDCB_RUP2TC_UP0TC_MASK I40E_MASK(0x7, I40E_PRTDCB_RUP2TC_UP0TC_SHIFT)
+#define I40E_PRTDCB_RUP2TC_UP1TC_SHIFT 3
+#define I40E_PRTDCB_RUP2TC_UP1TC_MASK I40E_MASK(0x7, I40E_PRTDCB_RUP2TC_UP1TC_SHIFT)
+#define I40E_PRTDCB_RUP2TC_UP2TC_SHIFT 6
+#define I40E_PRTDCB_RUP2TC_UP2TC_MASK I40E_MASK(0x7, I40E_PRTDCB_RUP2TC_UP2TC_SHIFT)
+#define I40E_PRTDCB_RUP2TC_UP3TC_SHIFT 9
+#define I40E_PRTDCB_RUP2TC_UP3TC_MASK I40E_MASK(0x7, I40E_PRTDCB_RUP2TC_UP3TC_SHIFT)
+#define I40E_PRTDCB_RUP2TC_UP4TC_SHIFT 12
+#define I40E_PRTDCB_RUP2TC_UP4TC_MASK I40E_MASK(0x7, I40E_PRTDCB_RUP2TC_UP4TC_SHIFT)
+#define I40E_PRTDCB_RUP2TC_UP5TC_SHIFT 15
+#define I40E_PRTDCB_RUP2TC_UP5TC_MASK I40E_MASK(0x7, I40E_PRTDCB_RUP2TC_UP5TC_SHIFT)
+#define I40E_PRTDCB_RUP2TC_UP6TC_SHIFT 18
+#define I40E_PRTDCB_RUP2TC_UP6TC_MASK I40E_MASK(0x7, I40E_PRTDCB_RUP2TC_UP6TC_SHIFT)
+#define I40E_PRTDCB_RUP2TC_UP7TC_SHIFT 21
+#define I40E_PRTDCB_RUP2TC_UP7TC_MASK I40E_MASK(0x7, I40E_PRTDCB_RUP2TC_UP7TC_SHIFT)
+#define I40E_PRTDCB_RUPTQ(_i) (0x00122400 + ((_i) * 32)) /* _i=0...7 */ /* Reset: CORER */
+#define I40E_PRTDCB_RUPTQ_MAX_INDEX 7
+#define I40E_PRTDCB_RUPTQ_RXQNUM_SHIFT 0
+#define I40E_PRTDCB_RUPTQ_RXQNUM_MASK I40E_MASK(0x3FFF, I40E_PRTDCB_RUPTQ_RXQNUM_SHIFT)
+#define I40E_PRTDCB_TC2PFC 0x001C0980 /* Reset: CORER */
+#define I40E_PRTDCB_TC2PFC_TC2PFC_SHIFT 0
+#define I40E_PRTDCB_TC2PFC_TC2PFC_MASK I40E_MASK(0xFF, I40E_PRTDCB_TC2PFC_TC2PFC_SHIFT)
+#define I40E_PRTDCB_TCMSTC(_i) (0x000A0040 + ((_i) * 32)) /* _i=0...7 */ /* Reset: CORER */
+#define I40E_PRTDCB_TCMSTC_MAX_INDEX 7
+#define I40E_PRTDCB_TCMSTC_MSTC_SHIFT 0
+#define I40E_PRTDCB_TCMSTC_MSTC_MASK I40E_MASK(0xFFFFF, I40E_PRTDCB_TCMSTC_MSTC_SHIFT)
+#define I40E_PRTDCB_TCPMC 0x000A21A0 /* Reset: CORER */
+#define I40E_PRTDCB_TCPMC_CPM_SHIFT 0
+#define I40E_PRTDCB_TCPMC_CPM_MASK I40E_MASK(0x1FFF, I40E_PRTDCB_TCPMC_CPM_SHIFT)
+#define I40E_PRTDCB_TCPMC_LLTC_SHIFT 13
+#define I40E_PRTDCB_TCPMC_LLTC_MASK I40E_MASK(0xFF, I40E_PRTDCB_TCPMC_LLTC_SHIFT)
+#define I40E_PRTDCB_TCPMC_TCPM_MODE_SHIFT 30
+#define I40E_PRTDCB_TCPMC_TCPM_MODE_MASK I40E_MASK(0x1, I40E_PRTDCB_TCPMC_TCPM_MODE_SHIFT)
+#define I40E_PRTDCB_TCWSTC(_i) (0x000A2040 + ((_i) * 32)) /* _i=0...7 */ /* Reset: CORER */
+#define I40E_PRTDCB_TCWSTC_MAX_INDEX 7
+#define I40E_PRTDCB_TCWSTC_MSTC_SHIFT 0
+#define I40E_PRTDCB_TCWSTC_MSTC_MASK I40E_MASK(0xFFFFF, I40E_PRTDCB_TCWSTC_MSTC_SHIFT)
+#define I40E_PRTDCB_TDPMC 0x000A0180 /* Reset: CORER */
+#define I40E_PRTDCB_TDPMC_DPM_SHIFT 0
+#define I40E_PRTDCB_TDPMC_DPM_MASK I40E_MASK(0xFF, I40E_PRTDCB_TDPMC_DPM_SHIFT)
+#define I40E_PRTDCB_TDPMC_TCPM_MODE_SHIFT 30
+#define I40E_PRTDCB_TDPMC_TCPM_MODE_MASK I40E_MASK(0x1, I40E_PRTDCB_TDPMC_TCPM_MODE_SHIFT)
+#define I40E_PRTDCB_TETSC_TCB 0x000AE060 /* Reset: CORER */
+#define I40E_PRTDCB_TETSC_TCB_EN_LL_STRICT_PRIORITY_SHIFT 0
+#define I40E_PRTDCB_TETSC_TCB_EN_LL_STRICT_PRIORITY_MASK I40E_MASK(0x1, I40E_PRTDCB_TETSC_TCB_EN_LL_STRICT_PRIORITY_SHIFT)
+#define I40E_PRTDCB_TETSC_TCB_LLTC_SHIFT 8
+#define I40E_PRTDCB_TETSC_TCB_LLTC_MASK I40E_MASK(0xFF, I40E_PRTDCB_TETSC_TCB_LLTC_SHIFT)
+#define I40E_PRTDCB_TETSC_TPB 0x00098060 /* Reset: CORER */
+#define I40E_PRTDCB_TETSC_TPB_EN_LL_STRICT_PRIORITY_SHIFT 0
+#define I40E_PRTDCB_TETSC_TPB_EN_LL_STRICT_PRIORITY_MASK I40E_MASK(0x1, I40E_PRTDCB_TETSC_TPB_EN_LL_STRICT_PRIORITY_SHIFT)
+#define I40E_PRTDCB_TETSC_TPB_LLTC_SHIFT 8
+#define I40E_PRTDCB_TETSC_TPB_LLTC_MASK I40E_MASK(0xFF, I40E_PRTDCB_TETSC_TPB_LLTC_SHIFT)
+#define I40E_PRTDCB_TFCS 0x001E4560 /* Reset: GLOBR */
+#define I40E_PRTDCB_TFCS_TXOFF_SHIFT 0
+#define I40E_PRTDCB_TFCS_TXOFF_MASK I40E_MASK(0x1, I40E_PRTDCB_TFCS_TXOFF_SHIFT)
+#define I40E_PRTDCB_TFCS_TXOFF0_SHIFT 8
+#define I40E_PRTDCB_TFCS_TXOFF0_MASK I40E_MASK(0x1, I40E_PRTDCB_TFCS_TXOFF0_SHIFT)
+#define I40E_PRTDCB_TFCS_TXOFF1_SHIFT 9
+#define I40E_PRTDCB_TFCS_TXOFF1_MASK I40E_MASK(0x1, I40E_PRTDCB_TFCS_TXOFF1_SHIFT)
+#define I40E_PRTDCB_TFCS_TXOFF2_SHIFT 10
+#define I40E_PRTDCB_TFCS_TXOFF2_MASK I40E_MASK(0x1, I40E_PRTDCB_TFCS_TXOFF2_SHIFT)
+#define I40E_PRTDCB_TFCS_TXOFF3_SHIFT 11
+#define I40E_PRTDCB_TFCS_TXOFF3_MASK I40E_MASK(0x1, I40E_PRTDCB_TFCS_TXOFF3_SHIFT)
+#define I40E_PRTDCB_TFCS_TXOFF4_SHIFT 12
+#define I40E_PRTDCB_TFCS_TXOFF4_MASK I40E_MASK(0x1, I40E_PRTDCB_TFCS_TXOFF4_SHIFT)
+#define I40E_PRTDCB_TFCS_TXOFF5_SHIFT 13
+#define I40E_PRTDCB_TFCS_TXOFF5_MASK I40E_MASK(0x1, I40E_PRTDCB_TFCS_TXOFF5_SHIFT)
+#define I40E_PRTDCB_TFCS_TXOFF6_SHIFT 14
+#define I40E_PRTDCB_TFCS_TXOFF6_MASK I40E_MASK(0x1, I40E_PRTDCB_TFCS_TXOFF6_SHIFT)
+#define I40E_PRTDCB_TFCS_TXOFF7_SHIFT 15
+#define I40E_PRTDCB_TFCS_TXOFF7_MASK I40E_MASK(0x1, I40E_PRTDCB_TFCS_TXOFF7_SHIFT)
+#define I40E_PRTDCB_TPFCTS(_i) (0x001E4660 + ((_i) * 32)) /* _i=0...7 */ /* Reset: GLOBR */
+#define I40E_PRTDCB_TPFCTS_MAX_INDEX 7
+#define I40E_PRTDCB_TPFCTS_PFCTIMER_SHIFT 0
+#define I40E_PRTDCB_TPFCTS_PFCTIMER_MASK I40E_MASK(0x3FFF, I40E_PRTDCB_TPFCTS_PFCTIMER_SHIFT)
+#define I40E_GLFCOE_RCTL 0x00269B94 /* Reset: CORER */
+#define I40E_GLFCOE_RCTL_FCOEVER_SHIFT 0
+#define I40E_GLFCOE_RCTL_FCOEVER_MASK I40E_MASK(0xF, I40E_GLFCOE_RCTL_FCOEVER_SHIFT)
+#define I40E_GLFCOE_RCTL_SAVBAD_SHIFT 4
+#define I40E_GLFCOE_RCTL_SAVBAD_MASK I40E_MASK(0x1, I40E_GLFCOE_RCTL_SAVBAD_SHIFT)
+#define I40E_GLFCOE_RCTL_ICRC_SHIFT 5
+#define I40E_GLFCOE_RCTL_ICRC_MASK I40E_MASK(0x1, I40E_GLFCOE_RCTL_ICRC_SHIFT)
+#define I40E_GLFCOE_RCTL_MAX_SIZE_SHIFT 16
+#define I40E_GLFCOE_RCTL_MAX_SIZE_MASK I40E_MASK(0x3FFF, I40E_GLFCOE_RCTL_MAX_SIZE_SHIFT)
+#define I40E_GL_FWSTS 0x00083048 /* Reset: POR */
+#define I40E_GL_FWSTS_FWS0B_SHIFT 0
+#define I40E_GL_FWSTS_FWS0B_MASK I40E_MASK(0xFF, I40E_GL_FWSTS_FWS0B_SHIFT)
+#define I40E_GL_FWSTS_FWRI_SHIFT 9
+#define I40E_GL_FWSTS_FWRI_MASK I40E_MASK(0x1, I40E_GL_FWSTS_FWRI_SHIFT)
+#define I40E_GL_FWSTS_FWS1B_SHIFT 16
+#define I40E_GL_FWSTS_FWS1B_MASK I40E_MASK(0xFF, I40E_GL_FWSTS_FWS1B_SHIFT)
+#define I40E_GLGEN_CLKSTAT 0x000B8184 /* Reset: POR */
+#define I40E_GLGEN_CLKSTAT_CLKMODE_SHIFT 0
+#define I40E_GLGEN_CLKSTAT_CLKMODE_MASK I40E_MASK(0x1, I40E_GLGEN_CLKSTAT_CLKMODE_SHIFT)
+#define I40E_GLGEN_CLKSTAT_U_CLK_SPEED_SHIFT 4
+#define I40E_GLGEN_CLKSTAT_U_CLK_SPEED_MASK I40E_MASK(0x3, I40E_GLGEN_CLKSTAT_U_CLK_SPEED_SHIFT)
+#define I40E_GLGEN_CLKSTAT_P0_CLK_SPEED_SHIFT 8
+#define I40E_GLGEN_CLKSTAT_P0_CLK_SPEED_MASK I40E_MASK(0x7, I40E_GLGEN_CLKSTAT_P0_CLK_SPEED_SHIFT)
+#define I40E_GLGEN_CLKSTAT_P1_CLK_SPEED_SHIFT 12
+#define I40E_GLGEN_CLKSTAT_P1_CLK_SPEED_MASK I40E_MASK(0x7, I40E_GLGEN_CLKSTAT_P1_CLK_SPEED_SHIFT)
+#define I40E_GLGEN_CLKSTAT_P2_CLK_SPEED_SHIFT 16
+#define I40E_GLGEN_CLKSTAT_P2_CLK_SPEED_MASK I40E_MASK(0x7, I40E_GLGEN_CLKSTAT_P2_CLK_SPEED_SHIFT)
+#define I40E_GLGEN_CLKSTAT_P3_CLK_SPEED_SHIFT 20
+#define I40E_GLGEN_CLKSTAT_P3_CLK_SPEED_MASK I40E_MASK(0x7, I40E_GLGEN_CLKSTAT_P3_CLK_SPEED_SHIFT)
+#define I40E_GLGEN_GPIO_CTL(_i) (0x00088100 + ((_i) * 4)) /* _i=0...29 */ /* Reset: POR */
+#define I40E_GLGEN_GPIO_CTL_MAX_INDEX 29
+#define I40E_GLGEN_GPIO_CTL_PRT_NUM_SHIFT 0
+#define I40E_GLGEN_GPIO_CTL_PRT_NUM_MASK I40E_MASK(0x3, I40E_GLGEN_GPIO_CTL_PRT_NUM_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_SHIFT 3
+#define I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_MASK I40E_MASK(0x1, I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_PIN_DIR_SHIFT 4
+#define I40E_GLGEN_GPIO_CTL_PIN_DIR_MASK I40E_MASK(0x1, I40E_GLGEN_GPIO_CTL_PIN_DIR_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_TRI_CTL_SHIFT 5
+#define I40E_GLGEN_GPIO_CTL_TRI_CTL_MASK I40E_MASK(0x1, I40E_GLGEN_GPIO_CTL_TRI_CTL_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_OUT_CTL_SHIFT 6
+#define I40E_GLGEN_GPIO_CTL_OUT_CTL_MASK I40E_MASK(0x1, I40E_GLGEN_GPIO_CTL_OUT_CTL_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT 7
+#define I40E_GLGEN_GPIO_CTL_PIN_FUNC_MASK I40E_MASK(0x7, I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_LED_INVRT_SHIFT 10
+#define I40E_GLGEN_GPIO_CTL_LED_INVRT_MASK I40E_MASK(0x1, I40E_GLGEN_GPIO_CTL_LED_INVRT_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_LED_BLINK_SHIFT 11
+#define I40E_GLGEN_GPIO_CTL_LED_BLINK_MASK I40E_MASK(0x1, I40E_GLGEN_GPIO_CTL_LED_BLINK_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT 12
+#define I40E_GLGEN_GPIO_CTL_LED_MODE_MASK I40E_MASK(0x1F, I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_INT_MODE_SHIFT 17
+#define I40E_GLGEN_GPIO_CTL_INT_MODE_MASK I40E_MASK(0x3, I40E_GLGEN_GPIO_CTL_INT_MODE_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_OUT_DEFAULT_SHIFT 19
+#define I40E_GLGEN_GPIO_CTL_OUT_DEFAULT_MASK I40E_MASK(0x1, I40E_GLGEN_GPIO_CTL_OUT_DEFAULT_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_PHY_PIN_NAME_SHIFT 20
+#define I40E_GLGEN_GPIO_CTL_PHY_PIN_NAME_MASK I40E_MASK(0x3F, I40E_GLGEN_GPIO_CTL_PHY_PIN_NAME_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_PRT_BIT_MAP_SHIFT 26
+#define I40E_GLGEN_GPIO_CTL_PRT_BIT_MAP_MASK I40E_MASK(0xF, I40E_GLGEN_GPIO_CTL_PRT_BIT_MAP_SHIFT)
+#define I40E_GLGEN_GPIO_SET 0x00088184 /* Reset: POR */
+#define I40E_GLGEN_GPIO_SET_GPIO_INDX_SHIFT 0
+#define I40E_GLGEN_GPIO_SET_GPIO_INDX_MASK I40E_MASK(0x1F, I40E_GLGEN_GPIO_SET_GPIO_INDX_SHIFT)
+#define I40E_GLGEN_GPIO_SET_SDP_DATA_SHIFT 5
+#define I40E_GLGEN_GPIO_SET_SDP_DATA_MASK I40E_MASK(0x1, I40E_GLGEN_GPIO_SET_SDP_DATA_SHIFT)
+#define I40E_GLGEN_GPIO_SET_DRIVE_SDP_SHIFT 6
+#define I40E_GLGEN_GPIO_SET_DRIVE_SDP_MASK I40E_MASK(0x1, I40E_GLGEN_GPIO_SET_DRIVE_SDP_SHIFT)
+#define I40E_GLGEN_GPIO_STAT 0x0008817C /* Reset: POR */
+#define I40E_GLGEN_GPIO_STAT_GPIO_VALUE_SHIFT 0
+#define I40E_GLGEN_GPIO_STAT_GPIO_VALUE_MASK I40E_MASK(0x3FFFFFFF, I40E_GLGEN_GPIO_STAT_GPIO_VALUE_SHIFT)
+#define I40E_GLGEN_GPIO_TRANSIT 0x00088180 /* Reset: POR */
+#define I40E_GLGEN_GPIO_TRANSIT_GPIO_TRANSITION_SHIFT 0
+#define I40E_GLGEN_GPIO_TRANSIT_GPIO_TRANSITION_MASK I40E_MASK(0x3FFFFFFF, I40E_GLGEN_GPIO_TRANSIT_GPIO_TRANSITION_SHIFT)
+#define I40E_GLGEN_I2CCMD(_i) (0x000881E0 + ((_i) * 4)) /* _i=0...3 */ /* Reset: POR */
+#define I40E_GLGEN_I2CCMD_MAX_INDEX 3
+#define I40E_GLGEN_I2CCMD_DATA_SHIFT 0
+#define I40E_GLGEN_I2CCMD_DATA_MASK I40E_MASK(0xFFFF, I40E_GLGEN_I2CCMD_DATA_SHIFT)
+#define I40E_GLGEN_I2CCMD_REGADD_SHIFT 16
+#define I40E_GLGEN_I2CCMD_REGADD_MASK I40E_MASK(0xFF, I40E_GLGEN_I2CCMD_REGADD_SHIFT)
+#define I40E_GLGEN_I2CCMD_PHYADD_SHIFT 24
+#define I40E_GLGEN_I2CCMD_PHYADD_MASK I40E_MASK(0x7, I40E_GLGEN_I2CCMD_PHYADD_SHIFT)
+#define I40E_GLGEN_I2CCMD_OP_SHIFT 27
+#define I40E_GLGEN_I2CCMD_OP_MASK I40E_MASK(0x1, I40E_GLGEN_I2CCMD_OP_SHIFT)
+#define I40E_GLGEN_I2CCMD_RESET_SHIFT 28
+#define I40E_GLGEN_I2CCMD_RESET_MASK I40E_MASK(0x1, I40E_GLGEN_I2CCMD_RESET_SHIFT)
+#define I40E_GLGEN_I2CCMD_R_SHIFT 29
+#define I40E_GLGEN_I2CCMD_R_MASK I40E_MASK(0x1, I40E_GLGEN_I2CCMD_R_SHIFT)
+#define I40E_GLGEN_I2CCMD_E_SHIFT 31
+#define I40E_GLGEN_I2CCMD_E_MASK I40E_MASK(0x1, I40E_GLGEN_I2CCMD_E_SHIFT)
+#define I40E_GLGEN_I2CPARAMS(_i) (0x000881AC + ((_i) * 4)) /* _i=0...3 */ /* Reset: POR */
+#define I40E_GLGEN_I2CPARAMS_MAX_INDEX 3
+#define I40E_GLGEN_I2CPARAMS_WRITE_TIME_SHIFT 0
+#define I40E_GLGEN_I2CPARAMS_WRITE_TIME_MASK I40E_MASK(0x1F, I40E_GLGEN_I2CPARAMS_WRITE_TIME_SHIFT)
+#define I40E_GLGEN_I2CPARAMS_READ_TIME_SHIFT 5
+#define I40E_GLGEN_I2CPARAMS_READ_TIME_MASK I40E_MASK(0x7, I40E_GLGEN_I2CPARAMS_READ_TIME_SHIFT)
+#define I40E_GLGEN_I2CPARAMS_I2CBB_EN_SHIFT 8
+#define I40E_GLGEN_I2CPARAMS_I2CBB_EN_MASK I40E_MASK(0x1, I40E_GLGEN_I2CPARAMS_I2CBB_EN_SHIFT)
+#define I40E_GLGEN_I2CPARAMS_CLK_SHIFT 9
+#define I40E_GLGEN_I2CPARAMS_CLK_MASK I40E_MASK(0x1, I40E_GLGEN_I2CPARAMS_CLK_SHIFT)
+#define I40E_GLGEN_I2CPARAMS_DATA_OUT_SHIFT 10
+#define I40E_GLGEN_I2CPARAMS_DATA_OUT_MASK I40E_MASK(0x1, I40E_GLGEN_I2CPARAMS_DATA_OUT_SHIFT)
+#define I40E_GLGEN_I2CPARAMS_DATA_OE_N_SHIFT 11
+#define I40E_GLGEN_I2CPARAMS_DATA_OE_N_MASK I40E_MASK(0x1, I40E_GLGEN_I2CPARAMS_DATA_OE_N_SHIFT)
+#define I40E_GLGEN_I2CPARAMS_DATA_IN_SHIFT 12
+#define I40E_GLGEN_I2CPARAMS_DATA_IN_MASK I40E_MASK(0x1, I40E_GLGEN_I2CPARAMS_DATA_IN_SHIFT)
+#define I40E_GLGEN_I2CPARAMS_CLK_OE_N_SHIFT 13
+#define I40E_GLGEN_I2CPARAMS_CLK_OE_N_MASK I40E_MASK(0x1, I40E_GLGEN_I2CPARAMS_CLK_OE_N_SHIFT)
+#define I40E_GLGEN_I2CPARAMS_CLK_IN_SHIFT 14
+#define I40E_GLGEN_I2CPARAMS_CLK_IN_MASK I40E_MASK(0x1, I40E_GLGEN_I2CPARAMS_CLK_IN_SHIFT)
+#define I40E_GLGEN_I2CPARAMS_CLK_STRETCH_DIS_SHIFT 15
+#define I40E_GLGEN_I2CPARAMS_CLK_STRETCH_DIS_MASK I40E_MASK(0x1, I40E_GLGEN_I2CPARAMS_CLK_STRETCH_DIS_SHIFT)
+#define I40E_GLGEN_I2CPARAMS_I2C_DATA_ORDER_SHIFT 31
+#define I40E_GLGEN_I2CPARAMS_I2C_DATA_ORDER_MASK I40E_MASK(0x1, I40E_GLGEN_I2CPARAMS_I2C_DATA_ORDER_SHIFT)
+#define I40E_GLGEN_LED_CTL 0x00088178 /* Reset: POR */
+#define I40E_GLGEN_LED_CTL_GLOBAL_BLINK_MODE_SHIFT 0
+#define I40E_GLGEN_LED_CTL_GLOBAL_BLINK_MODE_MASK I40E_MASK(0x1, I40E_GLGEN_LED_CTL_GLOBAL_BLINK_MODE_SHIFT)
+#define I40E_GLGEN_MDIO_CTRL(_i) (0x000881D0 + ((_i) * 4)) /* _i=0...3 */ /* Reset: POR */
+#define I40E_GLGEN_MDIO_CTRL_MAX_INDEX 3
+#define I40E_GLGEN_MDIO_CTRL_LEGACY_RSVD2_SHIFT 0
+#define I40E_GLGEN_MDIO_CTRL_LEGACY_RSVD2_MASK I40E_MASK(0x1FFFF, I40E_GLGEN_MDIO_CTRL_LEGACY_RSVD2_SHIFT)
+#define I40E_GLGEN_MDIO_CTRL_CONTMDC_SHIFT 17
+#define I40E_GLGEN_MDIO_CTRL_CONTMDC_MASK I40E_MASK(0x1, I40E_GLGEN_MDIO_CTRL_CONTMDC_SHIFT)
+#define I40E_GLGEN_MDIO_CTRL_LEGACY_RSVD1_SHIFT 18
+#define I40E_GLGEN_MDIO_CTRL_LEGACY_RSVD1_MASK I40E_MASK(0x7FF, I40E_GLGEN_MDIO_CTRL_LEGACY_RSVD1_SHIFT)
+#define I40E_GLGEN_MDIO_CTRL_LEGACY_RSVD0_SHIFT 29
+#define I40E_GLGEN_MDIO_CTRL_LEGACY_RSVD0_MASK I40E_MASK(0x7, I40E_GLGEN_MDIO_CTRL_LEGACY_RSVD0_SHIFT)
+#define I40E_GLGEN_MDIO_I2C_SEL(_i) (0x000881C0 + ((_i) * 4)) /* _i=0...3 */ /* Reset: POR */
+#define I40E_GLGEN_MDIO_I2C_SEL_MAX_INDEX 3
+#define I40E_GLGEN_MDIO_I2C_SEL_MDIO_I2C_SEL_SHIFT 0
+#define I40E_GLGEN_MDIO_I2C_SEL_MDIO_I2C_SEL_MASK I40E_MASK(0x1, I40E_GLGEN_MDIO_I2C_SEL_MDIO_I2C_SEL_SHIFT)
+#define I40E_GLGEN_MDIO_I2C_SEL_PHY_PORT_NUM_SHIFT 1
+#define I40E_GLGEN_MDIO_I2C_SEL_PHY_PORT_NUM_MASK I40E_MASK(0xF, I40E_GLGEN_MDIO_I2C_SEL_PHY_PORT_NUM_SHIFT)
+#define I40E_GLGEN_MDIO_I2C_SEL_PHY0_ADDRESS_SHIFT 5
+#define I40E_GLGEN_MDIO_I2C_SEL_PHY0_ADDRESS_MASK I40E_MASK(0x1F, I40E_GLGEN_MDIO_I2C_SEL_PHY0_ADDRESS_SHIFT)
+#define I40E_GLGEN_MDIO_I2C_SEL_PHY1_ADDRESS_SHIFT 10
+#define I40E_GLGEN_MDIO_I2C_SEL_PHY1_ADDRESS_MASK I40E_MASK(0x1F, I40E_GLGEN_MDIO_I2C_SEL_PHY1_ADDRESS_SHIFT)
+#define I40E_GLGEN_MDIO_I2C_SEL_PHY2_ADDRESS_SHIFT 15
+#define I40E_GLGEN_MDIO_I2C_SEL_PHY2_ADDRESS_MASK I40E_MASK(0x1F, I40E_GLGEN_MDIO_I2C_SEL_PHY2_ADDRESS_SHIFT)
+#define I40E_GLGEN_MDIO_I2C_SEL_PHY3_ADDRESS_SHIFT 20
+#define I40E_GLGEN_MDIO_I2C_SEL_PHY3_ADDRESS_MASK I40E_MASK(0x1F, I40E_GLGEN_MDIO_I2C_SEL_PHY3_ADDRESS_SHIFT)
+#define I40E_GLGEN_MDIO_I2C_SEL_MDIO_IF_MODE_SHIFT 25
+#define I40E_GLGEN_MDIO_I2C_SEL_MDIO_IF_MODE_MASK I40E_MASK(0xF, I40E_GLGEN_MDIO_I2C_SEL_MDIO_IF_MODE_SHIFT)
+#define I40E_GLGEN_MDIO_I2C_SEL_EN_FAST_MODE_SHIFT 31
+#define I40E_GLGEN_MDIO_I2C_SEL_EN_FAST_MODE_MASK I40E_MASK(0x1, I40E_GLGEN_MDIO_I2C_SEL_EN_FAST_MODE_SHIFT)
+#define I40E_GLGEN_MSCA(_i) (0x0008818C + ((_i) * 4)) /* _i=0...3 */ /* Reset: POR */
+#define I40E_GLGEN_MSCA_MAX_INDEX 3
+#define I40E_GLGEN_MSCA_MDIADD_SHIFT 0
+#define I40E_GLGEN_MSCA_MDIADD_MASK I40E_MASK(0xFFFF, I40E_GLGEN_MSCA_MDIADD_SHIFT)
+#define I40E_GLGEN_MSCA_DEVADD_SHIFT 16
+#define I40E_GLGEN_MSCA_DEVADD_MASK I40E_MASK(0x1F, I40E_GLGEN_MSCA_DEVADD_SHIFT)
+#define I40E_GLGEN_MSCA_PHYADD_SHIFT 21
+#define I40E_GLGEN_MSCA_PHYADD_MASK I40E_MASK(0x1F, I40E_GLGEN_MSCA_PHYADD_SHIFT)
+#define I40E_GLGEN_MSCA_OPCODE_SHIFT 26
+#define I40E_GLGEN_MSCA_OPCODE_MASK I40E_MASK(0x3, I40E_GLGEN_MSCA_OPCODE_SHIFT)
+#define I40E_GLGEN_MSCA_STCODE_SHIFT 28
+#define I40E_GLGEN_MSCA_STCODE_MASK I40E_MASK(0x3, I40E_GLGEN_MSCA_STCODE_SHIFT)
+#define I40E_GLGEN_MSCA_MDICMD_SHIFT 30
+#define I40E_GLGEN_MSCA_MDICMD_MASK I40E_MASK(0x1, I40E_GLGEN_MSCA_MDICMD_SHIFT)
+#define I40E_GLGEN_MSCA_MDIINPROGEN_SHIFT 31
+#define I40E_GLGEN_MSCA_MDIINPROGEN_MASK I40E_MASK(0x1, I40E_GLGEN_MSCA_MDIINPROGEN_SHIFT)
+#define I40E_GLGEN_MSRWD(_i) (0x0008819C + ((_i) * 4)) /* _i=0...3 */ /* Reset: POR */
+#define I40E_GLGEN_MSRWD_MAX_INDEX 3
+#define I40E_GLGEN_MSRWD_MDIWRDATA_SHIFT 0
+#define I40E_GLGEN_MSRWD_MDIWRDATA_MASK I40E_MASK(0xFFFF, I40E_GLGEN_MSRWD_MDIWRDATA_SHIFT)
+#define I40E_GLGEN_MSRWD_MDIRDDATA_SHIFT 16
+#define I40E_GLGEN_MSRWD_MDIRDDATA_MASK I40E_MASK(0xFFFF, I40E_GLGEN_MSRWD_MDIRDDATA_SHIFT)
+#define I40E_GLGEN_PCIFCNCNT 0x001C0AB4 /* Reset: PCIR */
+#define I40E_GLGEN_PCIFCNCNT_PCIPFCNT_SHIFT 0
+#define I40E_GLGEN_PCIFCNCNT_PCIPFCNT_MASK I40E_MASK(0x1F, I40E_GLGEN_PCIFCNCNT_PCIPFCNT_SHIFT)
+#define I40E_GLGEN_PCIFCNCNT_PCIVFCNT_SHIFT 16
+#define I40E_GLGEN_PCIFCNCNT_PCIVFCNT_MASK I40E_MASK(0xFF, I40E_GLGEN_PCIFCNCNT_PCIVFCNT_SHIFT)
+#define I40E_GLGEN_RSTAT 0x000B8188 /* Reset: POR */
+#define I40E_GLGEN_RSTAT_DEVSTATE_SHIFT 0
+#define I40E_GLGEN_RSTAT_DEVSTATE_MASK I40E_MASK(0x3, I40E_GLGEN_RSTAT_DEVSTATE_SHIFT)
+#define I40E_GLGEN_RSTAT_RESET_TYPE_SHIFT 2
+#define I40E_GLGEN_RSTAT_RESET_TYPE_MASK I40E_MASK(0x3, I40E_GLGEN_RSTAT_RESET_TYPE_SHIFT)
+#define I40E_GLGEN_RSTAT_CORERCNT_SHIFT 4
+#define I40E_GLGEN_RSTAT_CORERCNT_MASK I40E_MASK(0x3, I40E_GLGEN_RSTAT_CORERCNT_SHIFT)
+#define I40E_GLGEN_RSTAT_GLOBRCNT_SHIFT 6
+#define I40E_GLGEN_RSTAT_GLOBRCNT_MASK I40E_MASK(0x3, I40E_GLGEN_RSTAT_GLOBRCNT_SHIFT)
+#define I40E_GLGEN_RSTAT_EMPRCNT_SHIFT 8
+#define I40E_GLGEN_RSTAT_EMPRCNT_MASK I40E_MASK(0x3, I40E_GLGEN_RSTAT_EMPRCNT_SHIFT)
+#define I40E_GLGEN_RSTAT_TIME_TO_RST_SHIFT 10
+#define I40E_GLGEN_RSTAT_TIME_TO_RST_MASK I40E_MASK(0x3F, I40E_GLGEN_RSTAT_TIME_TO_RST_SHIFT)
+#define I40E_GLGEN_RSTCTL 0x000B8180 /* Reset: POR */
+#define I40E_GLGEN_RSTCTL_GRSTDEL_SHIFT 0
+#define I40E_GLGEN_RSTCTL_GRSTDEL_MASK I40E_MASK(0x3F, I40E_GLGEN_RSTCTL_GRSTDEL_SHIFT)
+#define I40E_GLGEN_RSTCTL_ECC_RST_ENA_SHIFT 8
+#define I40E_GLGEN_RSTCTL_ECC_RST_ENA_MASK I40E_MASK(0x1, I40E_GLGEN_RSTCTL_ECC_RST_ENA_SHIFT)
+#define I40E_GLGEN_RTRIG 0x000B8190 /* Reset: CORER */
+#define I40E_GLGEN_RTRIG_CORER_SHIFT 0
+#define I40E_GLGEN_RTRIG_CORER_MASK I40E_MASK(0x1, I40E_GLGEN_RTRIG_CORER_SHIFT)
+#define I40E_GLGEN_RTRIG_GLOBR_SHIFT 1
+#define I40E_GLGEN_RTRIG_GLOBR_MASK I40E_MASK(0x1, I40E_GLGEN_RTRIG_GLOBR_SHIFT)
+#define I40E_GLGEN_RTRIG_EMPFWR_SHIFT 2
+#define I40E_GLGEN_RTRIG_EMPFWR_MASK I40E_MASK(0x1, I40E_GLGEN_RTRIG_EMPFWR_SHIFT)
+#define I40E_GLGEN_STAT 0x000B612C /* Reset: POR */
+#define I40E_GLGEN_STAT_HWRSVD0_SHIFT 0
+#define I40E_GLGEN_STAT_HWRSVD0_MASK I40E_MASK(0x3, I40E_GLGEN_STAT_HWRSVD0_SHIFT)
+#define I40E_GLGEN_STAT_DCBEN_SHIFT 2
+#define I40E_GLGEN_STAT_DCBEN_MASK I40E_MASK(0x1, I40E_GLGEN_STAT_DCBEN_SHIFT)
+#define I40E_GLGEN_STAT_VTEN_SHIFT 3
+#define I40E_GLGEN_STAT_VTEN_MASK I40E_MASK(0x1, I40E_GLGEN_STAT_VTEN_SHIFT)
+#define I40E_GLGEN_STAT_FCOEN_SHIFT 4
+#define I40E_GLGEN_STAT_FCOEN_MASK I40E_MASK(0x1, I40E_GLGEN_STAT_FCOEN_SHIFT)
+#define I40E_GLGEN_STAT_EVBEN_SHIFT 5
+#define I40E_GLGEN_STAT_EVBEN_MASK I40E_MASK(0x1, I40E_GLGEN_STAT_EVBEN_SHIFT)
+#define I40E_GLGEN_STAT_HWRSVD1_SHIFT 6
+#define I40E_GLGEN_STAT_HWRSVD1_MASK I40E_MASK(0x3, I40E_GLGEN_STAT_HWRSVD1_SHIFT)
+#define I40E_GLGEN_VFLRSTAT(_i) (0x00092600 + ((_i) * 4)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLGEN_VFLRSTAT_MAX_INDEX 3
+#define I40E_GLGEN_VFLRSTAT_VFLRE_SHIFT 0
+#define I40E_GLGEN_VFLRSTAT_VFLRE_MASK I40E_MASK(0xFFFFFFFF, I40E_GLGEN_VFLRSTAT_VFLRE_SHIFT)
+#define I40E_GLVFGEN_TIMER 0x000881BC /* Reset: CORER */
+#define I40E_GLVFGEN_TIMER_GTIME_SHIFT 0
+#define I40E_GLVFGEN_TIMER_GTIME_MASK I40E_MASK(0xFFFFFFFF, I40E_GLVFGEN_TIMER_GTIME_SHIFT)
+#define I40E_PFGEN_CTRL 0x00092400 /* Reset: PFR */
+#define I40E_PFGEN_CTRL_PFSWR_SHIFT 0
+#define I40E_PFGEN_CTRL_PFSWR_MASK I40E_MASK(0x1, I40E_PFGEN_CTRL_PFSWR_SHIFT)
+#define I40E_PFGEN_DRUN 0x00092500 /* Reset: CORER */
+#define I40E_PFGEN_DRUN_DRVUNLD_SHIFT 0
+#define I40E_PFGEN_DRUN_DRVUNLD_MASK I40E_MASK(0x1, I40E_PFGEN_DRUN_DRVUNLD_SHIFT)
+#define I40E_PFGEN_PORTNUM 0x001C0480 /* Reset: CORER */
+#define I40E_PFGEN_PORTNUM_PORT_NUM_SHIFT 0
+#define I40E_PFGEN_PORTNUM_PORT_NUM_MASK I40E_MASK(0x3, I40E_PFGEN_PORTNUM_PORT_NUM_SHIFT)
+#define I40E_PFGEN_STATE 0x00088000 /* Reset: CORER */
+#define I40E_PFGEN_STATE_RESERVED_0_SHIFT 0
+#define I40E_PFGEN_STATE_RESERVED_0_MASK I40E_MASK(0x1, I40E_PFGEN_STATE_RESERVED_0_SHIFT)
+#define I40E_PFGEN_STATE_PFFCEN_SHIFT 1
+#define I40E_PFGEN_STATE_PFFCEN_MASK I40E_MASK(0x1, I40E_PFGEN_STATE_PFFCEN_SHIFT)
+#define I40E_PFGEN_STATE_PFLINKEN_SHIFT 2
+#define I40E_PFGEN_STATE_PFLINKEN_MASK I40E_MASK(0x1, I40E_PFGEN_STATE_PFLINKEN_SHIFT)
+#define I40E_PFGEN_STATE_PFSCEN_SHIFT 3
+#define I40E_PFGEN_STATE_PFSCEN_MASK I40E_MASK(0x1, I40E_PFGEN_STATE_PFSCEN_SHIFT)
+#define I40E_PRTGEN_CNF 0x000B8120 /* Reset: POR */
+#define I40E_PRTGEN_CNF_PORT_DIS_SHIFT 0
+#define I40E_PRTGEN_CNF_PORT_DIS_MASK I40E_MASK(0x1, I40E_PRTGEN_CNF_PORT_DIS_SHIFT)
+#define I40E_PRTGEN_CNF_ALLOW_PORT_DIS_SHIFT 1
+#define I40E_PRTGEN_CNF_ALLOW_PORT_DIS_MASK I40E_MASK(0x1, I40E_PRTGEN_CNF_ALLOW_PORT_DIS_SHIFT)
+#define I40E_PRTGEN_CNF_EMP_PORT_DIS_SHIFT 2
+#define I40E_PRTGEN_CNF_EMP_PORT_DIS_MASK I40E_MASK(0x1, I40E_PRTGEN_CNF_EMP_PORT_DIS_SHIFT)
+#define I40E_PRTGEN_CNF2 0x000B8160 /* Reset: POR */
+#define I40E_PRTGEN_CNF2_ACTIVATE_PORT_LINK_SHIFT 0
+#define I40E_PRTGEN_CNF2_ACTIVATE_PORT_LINK_MASK I40E_MASK(0x1, I40E_PRTGEN_CNF2_ACTIVATE_PORT_LINK_SHIFT)
+#define I40E_PRTGEN_STATUS 0x000B8100 /* Reset: POR */
+#define I40E_PRTGEN_STATUS_PORT_VALID_SHIFT 0
+#define I40E_PRTGEN_STATUS_PORT_VALID_MASK I40E_MASK(0x1, I40E_PRTGEN_STATUS_PORT_VALID_SHIFT)
+#define I40E_PRTGEN_STATUS_PORT_ACTIVE_SHIFT 1
+#define I40E_PRTGEN_STATUS_PORT_ACTIVE_MASK I40E_MASK(0x1, I40E_PRTGEN_STATUS_PORT_ACTIVE_SHIFT)
+#define I40E_VFGEN_RSTAT1(_VF) (0x00074400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFGEN_RSTAT1_MAX_INDEX 127
+#define I40E_VFGEN_RSTAT1_VFR_STATE_SHIFT 0
+#define I40E_VFGEN_RSTAT1_VFR_STATE_MASK I40E_MASK(0x3, I40E_VFGEN_RSTAT1_VFR_STATE_SHIFT)
+#define I40E_VPGEN_VFRSTAT(_VF) (0x00091C00 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */
+#define I40E_VPGEN_VFRSTAT_MAX_INDEX 127
+#define I40E_VPGEN_VFRSTAT_VFRD_SHIFT 0
+#define I40E_VPGEN_VFRSTAT_VFRD_MASK I40E_MASK(0x1, I40E_VPGEN_VFRSTAT_VFRD_SHIFT)
+#define I40E_VPGEN_VFRTRIG(_VF) (0x00091800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */
+#define I40E_VPGEN_VFRTRIG_MAX_INDEX 127
+#define I40E_VPGEN_VFRTRIG_VFSWR_SHIFT 0
+#define I40E_VPGEN_VFRTRIG_VFSWR_MASK I40E_MASK(0x1, I40E_VPGEN_VFRTRIG_VFSWR_SHIFT)
+#define I40E_VSIGEN_RSTAT(_VSI) (0x00090800 + ((_VSI) * 4)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_VSIGEN_RSTAT_MAX_INDEX 383
+#define I40E_VSIGEN_RSTAT_VMRD_SHIFT 0
+#define I40E_VSIGEN_RSTAT_VMRD_MASK I40E_MASK(0x1, I40E_VSIGEN_RSTAT_VMRD_SHIFT)
+#define I40E_VSIGEN_RTRIG(_VSI) (0x00090000 + ((_VSI) * 4)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_VSIGEN_RTRIG_MAX_INDEX 383
+#define I40E_VSIGEN_RTRIG_VMSWR_SHIFT 0
+#define I40E_VSIGEN_RTRIG_VMSWR_MASK I40E_MASK(0x1, I40E_VSIGEN_RTRIG_VMSWR_SHIFT)
+#define I40E_GLHMC_FCOEDDPBASE(_i) (0x000C6600 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_FCOEDDPBASE_MAX_INDEX 15
+#define I40E_GLHMC_FCOEDDPBASE_FPMFCOEDDPBASE_SHIFT 0
+#define I40E_GLHMC_FCOEDDPBASE_FPMFCOEDDPBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_FCOEDDPBASE_FPMFCOEDDPBASE_SHIFT)
+#define I40E_GLHMC_FCOEDDPCNT(_i) (0x000C6700 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_FCOEDDPCNT_MAX_INDEX 15
+#define I40E_GLHMC_FCOEDDPCNT_FPMFCOEDDPCNT_SHIFT 0
+#define I40E_GLHMC_FCOEDDPCNT_FPMFCOEDDPCNT_MASK I40E_MASK(0xFFFFF, I40E_GLHMC_FCOEDDPCNT_FPMFCOEDDPCNT_SHIFT)
+#define I40E_GLHMC_FCOEDDPOBJSZ 0x000C2010 /* Reset: CORER */
+#define I40E_GLHMC_FCOEDDPOBJSZ_PMFCOEDDPOBJSZ_SHIFT 0
+#define I40E_GLHMC_FCOEDDPOBJSZ_PMFCOEDDPOBJSZ_MASK I40E_MASK(0xF, I40E_GLHMC_FCOEDDPOBJSZ_PMFCOEDDPOBJSZ_SHIFT)
+#define I40E_GLHMC_FCOEFBASE(_i) (0x000C6800 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_FCOEFBASE_MAX_INDEX 15
+#define I40E_GLHMC_FCOEFBASE_FPMFCOEFBASE_SHIFT 0
+#define I40E_GLHMC_FCOEFBASE_FPMFCOEFBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_FCOEFBASE_FPMFCOEFBASE_SHIFT)
+#define I40E_GLHMC_FCOEFCNT(_i) (0x000C6900 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_FCOEFCNT_MAX_INDEX 15
+#define I40E_GLHMC_FCOEFCNT_FPMFCOEFCNT_SHIFT 0
+#define I40E_GLHMC_FCOEFCNT_FPMFCOEFCNT_MASK I40E_MASK(0x7FFFFF, I40E_GLHMC_FCOEFCNT_FPMFCOEFCNT_SHIFT)
+#define I40E_GLHMC_FCOEFMAX 0x000C20D0 /* Reset: CORER */
+#define I40E_GLHMC_FCOEFMAX_PMFCOEFMAX_SHIFT 0
+#define I40E_GLHMC_FCOEFMAX_PMFCOEFMAX_MASK I40E_MASK(0xFFFF, I40E_GLHMC_FCOEFMAX_PMFCOEFMAX_SHIFT)
+#define I40E_GLHMC_FCOEFOBJSZ 0x000C2018 /* Reset: CORER */
+#define I40E_GLHMC_FCOEFOBJSZ_PMFCOEFOBJSZ_SHIFT 0
+#define I40E_GLHMC_FCOEFOBJSZ_PMFCOEFOBJSZ_MASK I40E_MASK(0xF, I40E_GLHMC_FCOEFOBJSZ_PMFCOEFOBJSZ_SHIFT)
+#define I40E_GLHMC_FCOEMAX 0x000C2014 /* Reset: CORER */
+#define I40E_GLHMC_FCOEMAX_PMFCOEMAX_SHIFT 0
+#define I40E_GLHMC_FCOEMAX_PMFCOEMAX_MASK I40E_MASK(0x1FFF, I40E_GLHMC_FCOEMAX_PMFCOEMAX_SHIFT)
+#define I40E_GLHMC_FSIAVBASE(_i) (0x000C5600 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_FSIAVBASE_MAX_INDEX 15
+#define I40E_GLHMC_FSIAVBASE_FPMFSIAVBASE_SHIFT 0
+#define I40E_GLHMC_FSIAVBASE_FPMFSIAVBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_FSIAVBASE_FPMFSIAVBASE_SHIFT)
+#define I40E_GLHMC_FSIAVCNT(_i) (0x000C5700 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_FSIAVCNT_MAX_INDEX 15
+#define I40E_GLHMC_FSIAVCNT_FPMFSIAVCNT_SHIFT 0
+#define I40E_GLHMC_FSIAVCNT_FPMFSIAVCNT_MASK I40E_MASK(0x1FFFFFFF, I40E_GLHMC_FSIAVCNT_FPMFSIAVCNT_SHIFT)
+#define I40E_GLHMC_FSIAVCNT_RSVD_SHIFT 29
+#define I40E_GLHMC_FSIAVCNT_RSVD_MASK I40E_MASK(0x7, I40E_GLHMC_FSIAVCNT_RSVD_SHIFT)
+#define I40E_GLHMC_FSIAVMAX 0x000C2068 /* Reset: CORER */
+#define I40E_GLHMC_FSIAVMAX_PMFSIAVMAX_SHIFT 0
+#define I40E_GLHMC_FSIAVMAX_PMFSIAVMAX_MASK I40E_MASK(0x1FFFF, I40E_GLHMC_FSIAVMAX_PMFSIAVMAX_SHIFT)
+#define I40E_GLHMC_FSIAVOBJSZ 0x000C2064 /* Reset: CORER */
+#define I40E_GLHMC_FSIAVOBJSZ_PMFSIAVOBJSZ_SHIFT 0
+#define I40E_GLHMC_FSIAVOBJSZ_PMFSIAVOBJSZ_MASK I40E_MASK(0xF, I40E_GLHMC_FSIAVOBJSZ_PMFSIAVOBJSZ_SHIFT)
+#define I40E_GLHMC_FSIMCBASE(_i) (0x000C6000 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_FSIMCBASE_MAX_INDEX 15
+#define I40E_GLHMC_FSIMCBASE_FPMFSIMCBASE_SHIFT 0
+#define I40E_GLHMC_FSIMCBASE_FPMFSIMCBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_FSIMCBASE_FPMFSIMCBASE_SHIFT)
+#define I40E_GLHMC_FSIMCCNT(_i) (0x000C6100 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_FSIMCCNT_MAX_INDEX 15
+#define I40E_GLHMC_FSIMCCNT_FPMFSIMCSZ_SHIFT 0
+#define I40E_GLHMC_FSIMCCNT_FPMFSIMCSZ_MASK I40E_MASK(0x1FFFFFFF, I40E_GLHMC_FSIMCCNT_FPMFSIMCSZ_SHIFT)
+#define I40E_GLHMC_FSIMCMAX 0x000C2060 /* Reset: CORER */
+#define I40E_GLHMC_FSIMCMAX_PMFSIMCMAX_SHIFT 0
+#define I40E_GLHMC_FSIMCMAX_PMFSIMCMAX_MASK I40E_MASK(0x3FFF, I40E_GLHMC_FSIMCMAX_PMFSIMCMAX_SHIFT)
+#define I40E_GLHMC_FSIMCOBJSZ 0x000C205c /* Reset: CORER */
+#define I40E_GLHMC_FSIMCOBJSZ_PMFSIMCOBJSZ_SHIFT 0
+#define I40E_GLHMC_FSIMCOBJSZ_PMFSIMCOBJSZ_MASK I40E_MASK(0xF, I40E_GLHMC_FSIMCOBJSZ_PMFSIMCOBJSZ_SHIFT)
+#define I40E_GLHMC_LANQMAX 0x000C2008 /* Reset: CORER */
+#define I40E_GLHMC_LANQMAX_PMLANQMAX_SHIFT 0
+#define I40E_GLHMC_LANQMAX_PMLANQMAX_MASK I40E_MASK(0x7FF, I40E_GLHMC_LANQMAX_PMLANQMAX_SHIFT)
+#define I40E_GLHMC_LANRXBASE(_i) (0x000C6400 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_LANRXBASE_MAX_INDEX 15
+#define I40E_GLHMC_LANRXBASE_FPMLANRXBASE_SHIFT 0
+#define I40E_GLHMC_LANRXBASE_FPMLANRXBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_LANRXBASE_FPMLANRXBASE_SHIFT)
+#define I40E_GLHMC_LANRXCNT(_i) (0x000C6500 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_LANRXCNT_MAX_INDEX 15
+#define I40E_GLHMC_LANRXCNT_FPMLANRXCNT_SHIFT 0
+#define I40E_GLHMC_LANRXCNT_FPMLANRXCNT_MASK I40E_MASK(0x7FF, I40E_GLHMC_LANRXCNT_FPMLANRXCNT_SHIFT)
+#define I40E_GLHMC_LANRXOBJSZ 0x000C200c /* Reset: CORER */
+#define I40E_GLHMC_LANRXOBJSZ_PMLANRXOBJSZ_SHIFT 0
+#define I40E_GLHMC_LANRXOBJSZ_PMLANRXOBJSZ_MASK I40E_MASK(0xF, I40E_GLHMC_LANRXOBJSZ_PMLANRXOBJSZ_SHIFT)
+#define I40E_GLHMC_LANTXBASE(_i) (0x000C6200 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_LANTXBASE_MAX_INDEX 15
+#define I40E_GLHMC_LANTXBASE_FPMLANTXBASE_SHIFT 0
+#define I40E_GLHMC_LANTXBASE_FPMLANTXBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_LANTXBASE_FPMLANTXBASE_SHIFT)
+#define I40E_GLHMC_LANTXBASE_RSVD_SHIFT 24
+#define I40E_GLHMC_LANTXBASE_RSVD_MASK I40E_MASK(0xFF, I40E_GLHMC_LANTXBASE_RSVD_SHIFT)
+#define I40E_GLHMC_LANTXCNT(_i) (0x000C6300 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_LANTXCNT_MAX_INDEX 15
+#define I40E_GLHMC_LANTXCNT_FPMLANTXCNT_SHIFT 0
+#define I40E_GLHMC_LANTXCNT_FPMLANTXCNT_MASK I40E_MASK(0x7FF, I40E_GLHMC_LANTXCNT_FPMLANTXCNT_SHIFT)
+#define I40E_GLHMC_LANTXOBJSZ 0x000C2004 /* Reset: CORER */
+#define I40E_GLHMC_LANTXOBJSZ_PMLANTXOBJSZ_SHIFT 0
+#define I40E_GLHMC_LANTXOBJSZ_PMLANTXOBJSZ_MASK I40E_MASK(0xF, I40E_GLHMC_LANTXOBJSZ_PMLANTXOBJSZ_SHIFT)
+#define I40E_GLHMC_PFASSIGN(_i) (0x000C0c00 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_PFASSIGN_MAX_INDEX 15
+#define I40E_GLHMC_PFASSIGN_PMFCNPFASSIGN_SHIFT 0
+#define I40E_GLHMC_PFASSIGN_PMFCNPFASSIGN_MASK I40E_MASK(0xF, I40E_GLHMC_PFASSIGN_PMFCNPFASSIGN_SHIFT)
+#define I40E_GLHMC_SDPART(_i) (0x000C0800 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_SDPART_MAX_INDEX 15
+#define I40E_GLHMC_SDPART_PMSDBASE_SHIFT 0
+#define I40E_GLHMC_SDPART_PMSDBASE_MASK I40E_MASK(0xFFF, I40E_GLHMC_SDPART_PMSDBASE_SHIFT)
+#define I40E_GLHMC_SDPART_PMSDSIZE_SHIFT 16
+#define I40E_GLHMC_SDPART_PMSDSIZE_MASK I40E_MASK(0x1FFF, I40E_GLHMC_SDPART_PMSDSIZE_SHIFT)
+#define I40E_PFHMC_ERRORDATA 0x000C0500 /* Reset: PFR */
+#define I40E_PFHMC_ERRORDATA_HMC_ERROR_DATA_SHIFT 0
+#define I40E_PFHMC_ERRORDATA_HMC_ERROR_DATA_MASK I40E_MASK(0x3FFFFFFF, I40E_PFHMC_ERRORDATA_HMC_ERROR_DATA_SHIFT)
+#define I40E_PFHMC_ERRORINFO 0x000C0400 /* Reset: PFR */
+#define I40E_PFHMC_ERRORINFO_PMF_INDEX_SHIFT 0
+#define I40E_PFHMC_ERRORINFO_PMF_INDEX_MASK I40E_MASK(0x1F, I40E_PFHMC_ERRORINFO_PMF_INDEX_SHIFT)
+#define I40E_PFHMC_ERRORINFO_PMF_ISVF_SHIFT 7
+#define I40E_PFHMC_ERRORINFO_PMF_ISVF_MASK I40E_MASK(0x1, I40E_PFHMC_ERRORINFO_PMF_ISVF_SHIFT)
+#define I40E_PFHMC_ERRORINFO_HMC_ERROR_TYPE_SHIFT 8
+#define I40E_PFHMC_ERRORINFO_HMC_ERROR_TYPE_MASK I40E_MASK(0xF, I40E_PFHMC_ERRORINFO_HMC_ERROR_TYPE_SHIFT)
+#define I40E_PFHMC_ERRORINFO_HMC_OBJECT_TYPE_SHIFT 16
+#define I40E_PFHMC_ERRORINFO_HMC_OBJECT_TYPE_MASK I40E_MASK(0x1F, I40E_PFHMC_ERRORINFO_HMC_OBJECT_TYPE_SHIFT)
+#define I40E_PFHMC_ERRORINFO_ERROR_DETECTED_SHIFT 31
+#define I40E_PFHMC_ERRORINFO_ERROR_DETECTED_MASK I40E_MASK(0x1, I40E_PFHMC_ERRORINFO_ERROR_DETECTED_SHIFT)
+#define I40E_PFHMC_PDINV 0x000C0300 /* Reset: PFR */
+#define I40E_PFHMC_PDINV_PMSDIDX_SHIFT 0
+#define I40E_PFHMC_PDINV_PMSDIDX_MASK I40E_MASK(0xFFF, I40E_PFHMC_PDINV_PMSDIDX_SHIFT)
+#define I40E_PFHMC_PDINV_PMPDIDX_SHIFT 16
+#define I40E_PFHMC_PDINV_PMPDIDX_MASK I40E_MASK(0x1FF, I40E_PFHMC_PDINV_PMPDIDX_SHIFT)
+#define I40E_PFHMC_SDCMD 0x000C0000 /* Reset: PFR */
+#define I40E_PFHMC_SDCMD_PMSDIDX_SHIFT 0
+#define I40E_PFHMC_SDCMD_PMSDIDX_MASK I40E_MASK(0xFFF, I40E_PFHMC_SDCMD_PMSDIDX_SHIFT)
+#define I40E_PFHMC_SDCMD_PMSDWR_SHIFT 31
+#define I40E_PFHMC_SDCMD_PMSDWR_MASK I40E_MASK(0x1, I40E_PFHMC_SDCMD_PMSDWR_SHIFT)
+#define I40E_PFHMC_SDDATAHIGH 0x000C0200 /* Reset: PFR */
+#define I40E_PFHMC_SDDATAHIGH_PMSDDATAHIGH_SHIFT 0
+#define I40E_PFHMC_SDDATAHIGH_PMSDDATAHIGH_MASK I40E_MASK(0xFFFFFFFF, I40E_PFHMC_SDDATAHIGH_PMSDDATAHIGH_SHIFT)
+#define I40E_PFHMC_SDDATALOW 0x000C0100 /* Reset: PFR */
+#define I40E_PFHMC_SDDATALOW_PMSDVALID_SHIFT 0
+#define I40E_PFHMC_SDDATALOW_PMSDVALID_MASK I40E_MASK(0x1, I40E_PFHMC_SDDATALOW_PMSDVALID_SHIFT)
+#define I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT 1
+#define I40E_PFHMC_SDDATALOW_PMSDTYPE_MASK I40E_MASK(0x1, I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT)
+#define I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT 2
+#define I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_MASK I40E_MASK(0x3FF, I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT)
+#define I40E_PFHMC_SDDATALOW_PMSDDATALOW_SHIFT 12
+#define I40E_PFHMC_SDDATALOW_PMSDDATALOW_MASK I40E_MASK(0xFFFFF, I40E_PFHMC_SDDATALOW_PMSDDATALOW_SHIFT)
+#define I40E_GL_GP_FUSE(_i) (0x0009400C + ((_i) * 4)) /* _i=0...28 */ /* Reset: POR */
+#define I40E_GL_GP_FUSE_MAX_INDEX 28
+#define I40E_GL_GP_FUSE_GL_GP_FUSE_SHIFT 0
+#define I40E_GL_GP_FUSE_GL_GP_FUSE_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_GP_FUSE_GL_GP_FUSE_SHIFT)
+#define I40E_GL_UFUSE 0x00094008 /* Reset: POR */
+#define I40E_GL_UFUSE_FOUR_PORT_ENABLE_SHIFT 1
+#define I40E_GL_UFUSE_FOUR_PORT_ENABLE_MASK I40E_MASK(0x1, I40E_GL_UFUSE_FOUR_PORT_ENABLE_SHIFT)
+#define I40E_GL_UFUSE_NIC_ID_SHIFT 2
+#define I40E_GL_UFUSE_NIC_ID_MASK I40E_MASK(0x1, I40E_GL_UFUSE_NIC_ID_SHIFT)
+#define I40E_GL_UFUSE_ULT_LOCKOUT_SHIFT 10
+#define I40E_GL_UFUSE_ULT_LOCKOUT_MASK I40E_MASK(0x1, I40E_GL_UFUSE_ULT_LOCKOUT_SHIFT)
+#define I40E_GL_UFUSE_CLS_LOCKOUT_SHIFT 11
+#define I40E_GL_UFUSE_CLS_LOCKOUT_MASK I40E_MASK(0x1, I40E_GL_UFUSE_CLS_LOCKOUT_SHIFT)
+#define I40E_EMPINT_GPIO_ENA 0x00088188 /* Reset: POR */
+#define I40E_EMPINT_GPIO_ENA_GPIO0_ENA_SHIFT 0
+#define I40E_EMPINT_GPIO_ENA_GPIO0_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO0_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO1_ENA_SHIFT 1
+#define I40E_EMPINT_GPIO_ENA_GPIO1_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO1_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO2_ENA_SHIFT 2
+#define I40E_EMPINT_GPIO_ENA_GPIO2_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO2_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO3_ENA_SHIFT 3
+#define I40E_EMPINT_GPIO_ENA_GPIO3_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO3_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO4_ENA_SHIFT 4
+#define I40E_EMPINT_GPIO_ENA_GPIO4_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO4_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO5_ENA_SHIFT 5
+#define I40E_EMPINT_GPIO_ENA_GPIO5_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO5_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO6_ENA_SHIFT 6
+#define I40E_EMPINT_GPIO_ENA_GPIO6_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO6_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO7_ENA_SHIFT 7
+#define I40E_EMPINT_GPIO_ENA_GPIO7_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO7_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO8_ENA_SHIFT 8
+#define I40E_EMPINT_GPIO_ENA_GPIO8_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO8_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO9_ENA_SHIFT 9
+#define I40E_EMPINT_GPIO_ENA_GPIO9_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO9_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO10_ENA_SHIFT 10
+#define I40E_EMPINT_GPIO_ENA_GPIO10_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO10_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO11_ENA_SHIFT 11
+#define I40E_EMPINT_GPIO_ENA_GPIO11_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO11_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO12_ENA_SHIFT 12
+#define I40E_EMPINT_GPIO_ENA_GPIO12_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO12_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO13_ENA_SHIFT 13
+#define I40E_EMPINT_GPIO_ENA_GPIO13_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO13_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO14_ENA_SHIFT 14
+#define I40E_EMPINT_GPIO_ENA_GPIO14_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO14_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO15_ENA_SHIFT 15
+#define I40E_EMPINT_GPIO_ENA_GPIO15_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO15_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO16_ENA_SHIFT 16
+#define I40E_EMPINT_GPIO_ENA_GPIO16_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO16_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO17_ENA_SHIFT 17
+#define I40E_EMPINT_GPIO_ENA_GPIO17_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO17_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO18_ENA_SHIFT 18
+#define I40E_EMPINT_GPIO_ENA_GPIO18_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO18_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO19_ENA_SHIFT 19
+#define I40E_EMPINT_GPIO_ENA_GPIO19_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO19_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO20_ENA_SHIFT 20
+#define I40E_EMPINT_GPIO_ENA_GPIO20_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO20_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO21_ENA_SHIFT 21
+#define I40E_EMPINT_GPIO_ENA_GPIO21_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO21_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO22_ENA_SHIFT 22
+#define I40E_EMPINT_GPIO_ENA_GPIO22_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO22_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO23_ENA_SHIFT 23
+#define I40E_EMPINT_GPIO_ENA_GPIO23_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO23_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO24_ENA_SHIFT 24
+#define I40E_EMPINT_GPIO_ENA_GPIO24_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO24_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO25_ENA_SHIFT 25
+#define I40E_EMPINT_GPIO_ENA_GPIO25_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO25_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO26_ENA_SHIFT 26
+#define I40E_EMPINT_GPIO_ENA_GPIO26_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO26_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO27_ENA_SHIFT 27
+#define I40E_EMPINT_GPIO_ENA_GPIO27_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO27_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO28_ENA_SHIFT 28
+#define I40E_EMPINT_GPIO_ENA_GPIO28_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO28_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO29_ENA_SHIFT 29
+#define I40E_EMPINT_GPIO_ENA_GPIO29_ENA_MASK I40E_MASK(0x1, I40E_EMPINT_GPIO_ENA_GPIO29_ENA_SHIFT)
+#define I40E_PFGEN_PORTMDIO_NUM 0x0003F100 /* Reset: CORER */
+#define I40E_PFGEN_PORTMDIO_NUM_PORT_NUM_SHIFT 0
+#define I40E_PFGEN_PORTMDIO_NUM_PORT_NUM_MASK I40E_MASK(0x3, I40E_PFGEN_PORTMDIO_NUM_PORT_NUM_SHIFT)
+#define I40E_PFGEN_PORTMDIO_NUM_VFLINK_STAT_ENA_SHIFT 4
+#define I40E_PFGEN_PORTMDIO_NUM_VFLINK_STAT_ENA_MASK I40E_MASK(0x1, I40E_PFGEN_PORTMDIO_NUM_VFLINK_STAT_ENA_SHIFT)
+#define I40E_PFINT_AEQCTL 0x00038700 /* Reset: CORER */
+#define I40E_PFINT_AEQCTL_MSIX_INDX_SHIFT 0
+#define I40E_PFINT_AEQCTL_MSIX_INDX_MASK I40E_MASK(0xFF, I40E_PFINT_AEQCTL_MSIX_INDX_SHIFT)
+#define I40E_PFINT_AEQCTL_ITR_INDX_SHIFT 11
+#define I40E_PFINT_AEQCTL_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_AEQCTL_ITR_INDX_SHIFT)
+#define I40E_PFINT_AEQCTL_MSIX0_INDX_SHIFT 13
+#define I40E_PFINT_AEQCTL_MSIX0_INDX_MASK I40E_MASK(0x7, I40E_PFINT_AEQCTL_MSIX0_INDX_SHIFT)
+#define I40E_PFINT_AEQCTL_CAUSE_ENA_SHIFT 30
+#define I40E_PFINT_AEQCTL_CAUSE_ENA_MASK I40E_MASK(0x1, I40E_PFINT_AEQCTL_CAUSE_ENA_SHIFT)
+#define I40E_PFINT_AEQCTL_INTEVENT_SHIFT 31
+#define I40E_PFINT_AEQCTL_INTEVENT_MASK I40E_MASK(0x1, I40E_PFINT_AEQCTL_INTEVENT_SHIFT)
+#define I40E_PFINT_CEQCTL(_INTPF) (0x00036800 + ((_INTPF) * 4)) /* _i=0...511 */ /* Reset: CORER */
+#define I40E_PFINT_CEQCTL_MAX_INDEX 511
+#define I40E_PFINT_CEQCTL_MSIX_INDX_SHIFT 0
+#define I40E_PFINT_CEQCTL_MSIX_INDX_MASK I40E_MASK(0xFF, I40E_PFINT_CEQCTL_MSIX_INDX_SHIFT)
+#define I40E_PFINT_CEQCTL_ITR_INDX_SHIFT 11
+#define I40E_PFINT_CEQCTL_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_CEQCTL_ITR_INDX_SHIFT)
+#define I40E_PFINT_CEQCTL_MSIX0_INDX_SHIFT 13
+#define I40E_PFINT_CEQCTL_MSIX0_INDX_MASK I40E_MASK(0x7, I40E_PFINT_CEQCTL_MSIX0_INDX_SHIFT)
+#define I40E_PFINT_CEQCTL_NEXTQ_INDX_SHIFT 16
+#define I40E_PFINT_CEQCTL_NEXTQ_INDX_MASK I40E_MASK(0x7FF, I40E_PFINT_CEQCTL_NEXTQ_INDX_SHIFT)
+#define I40E_PFINT_CEQCTL_NEXTQ_TYPE_SHIFT 27
+#define I40E_PFINT_CEQCTL_NEXTQ_TYPE_MASK I40E_MASK(0x3, I40E_PFINT_CEQCTL_NEXTQ_TYPE_SHIFT)
+#define I40E_PFINT_CEQCTL_CAUSE_ENA_SHIFT 30
+#define I40E_PFINT_CEQCTL_CAUSE_ENA_MASK I40E_MASK(0x1, I40E_PFINT_CEQCTL_CAUSE_ENA_SHIFT)
+#define I40E_PFINT_CEQCTL_INTEVENT_SHIFT 31
+#define I40E_PFINT_CEQCTL_INTEVENT_MASK I40E_MASK(0x1, I40E_PFINT_CEQCTL_INTEVENT_SHIFT)
+#define I40E_GLINT_CTL 0x0003F800 /* Reset: CORER */
+#define I40E_GLINT_CTL_DIS_AUTOMASK_PF0_SHIFT 0
+#define I40E_GLINT_CTL_DIS_AUTOMASK_PF0_MASK I40E_MASK(0x1, I40E_GLINT_CTL_DIS_AUTOMASK_PF0_SHIFT)
+#define I40E_GLINT_CTL_DIS_AUTOMASK_VF0_SHIFT 1
+#define I40E_GLINT_CTL_DIS_AUTOMASK_VF0_MASK I40E_MASK(0x1, I40E_GLINT_CTL_DIS_AUTOMASK_VF0_SHIFT)
+#define I40E_GLINT_CTL_DIS_AUTOMASK_N_SHIFT 2
+#define I40E_GLINT_CTL_DIS_AUTOMASK_N_MASK I40E_MASK(0x1, I40E_GLINT_CTL_DIS_AUTOMASK_N_SHIFT)
+#define I40E_PFINT_DYN_CTL0 0x00038480 /* Reset: PFR */
+#define I40E_PFINT_DYN_CTL0_INTENA_SHIFT 0
+#define I40E_PFINT_DYN_CTL0_INTENA_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTL0_INTENA_SHIFT)
+#define I40E_PFINT_DYN_CTL0_CLEARPBA_SHIFT 1
+#define I40E_PFINT_DYN_CTL0_CLEARPBA_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTL0_CLEARPBA_SHIFT)
+#define I40E_PFINT_DYN_CTL0_SWINT_TRIG_SHIFT 2
+#define I40E_PFINT_DYN_CTL0_SWINT_TRIG_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTL0_SWINT_TRIG_SHIFT)
+#define I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT 3
+#define I40E_PFINT_DYN_CTL0_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT)
+#define I40E_PFINT_DYN_CTL0_INTERVAL_SHIFT 5
+#define I40E_PFINT_DYN_CTL0_INTERVAL_MASK I40E_MASK(0xFFF, I40E_PFINT_DYN_CTL0_INTERVAL_SHIFT)
+#define I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_SHIFT 24
+#define I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_SHIFT)
+#define I40E_PFINT_DYN_CTL0_SW_ITR_INDX_SHIFT 25
+#define I40E_PFINT_DYN_CTL0_SW_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTL0_SW_ITR_INDX_SHIFT)
+#define I40E_PFINT_DYN_CTL0_INTENA_MSK_SHIFT 31
+#define I40E_PFINT_DYN_CTL0_INTENA_MSK_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTL0_INTENA_MSK_SHIFT)
+#define I40E_PFINT_DYN_CTLN(_INTPF) (0x00034800 + ((_INTPF) * 4)) /* _i=0...511 */ /* Reset: PFR */
+#define I40E_PFINT_DYN_CTLN_MAX_INDEX 511
+#define I40E_PFINT_DYN_CTLN_INTENA_SHIFT 0
+#define I40E_PFINT_DYN_CTLN_INTENA_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTLN_INTENA_SHIFT)
+#define I40E_PFINT_DYN_CTLN_CLEARPBA_SHIFT 1
+#define I40E_PFINT_DYN_CTLN_CLEARPBA_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTLN_CLEARPBA_SHIFT)
+#define I40E_PFINT_DYN_CTLN_SWINT_TRIG_SHIFT 2
+#define I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTLN_SWINT_TRIG_SHIFT)
+#define I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT 3
+#define I40E_PFINT_DYN_CTLN_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT)
+#define I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT 5
+#define I40E_PFINT_DYN_CTLN_INTERVAL_MASK I40E_MASK(0xFFF, I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT)
+#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT 24
+#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT)
+#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT 25
+#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT)
+#define I40E_PFINT_DYN_CTLN_INTENA_MSK_SHIFT 31
+#define I40E_PFINT_DYN_CTLN_INTENA_MSK_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTLN_INTENA_MSK_SHIFT)
+#define I40E_PFINT_GPIO_ENA 0x00088080 /* Reset: CORER */
+#define I40E_PFINT_GPIO_ENA_GPIO0_ENA_SHIFT 0
+#define I40E_PFINT_GPIO_ENA_GPIO0_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO0_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO1_ENA_SHIFT 1
+#define I40E_PFINT_GPIO_ENA_GPIO1_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO1_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO2_ENA_SHIFT 2
+#define I40E_PFINT_GPIO_ENA_GPIO2_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO2_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO3_ENA_SHIFT 3
+#define I40E_PFINT_GPIO_ENA_GPIO3_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO3_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO4_ENA_SHIFT 4
+#define I40E_PFINT_GPIO_ENA_GPIO4_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO4_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO5_ENA_SHIFT 5
+#define I40E_PFINT_GPIO_ENA_GPIO5_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO5_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO6_ENA_SHIFT 6
+#define I40E_PFINT_GPIO_ENA_GPIO6_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO6_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO7_ENA_SHIFT 7
+#define I40E_PFINT_GPIO_ENA_GPIO7_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO7_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO8_ENA_SHIFT 8
+#define I40E_PFINT_GPIO_ENA_GPIO8_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO8_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO9_ENA_SHIFT 9
+#define I40E_PFINT_GPIO_ENA_GPIO9_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO9_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO10_ENA_SHIFT 10
+#define I40E_PFINT_GPIO_ENA_GPIO10_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO10_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO11_ENA_SHIFT 11
+#define I40E_PFINT_GPIO_ENA_GPIO11_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO11_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO12_ENA_SHIFT 12
+#define I40E_PFINT_GPIO_ENA_GPIO12_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO12_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO13_ENA_SHIFT 13
+#define I40E_PFINT_GPIO_ENA_GPIO13_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO13_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO14_ENA_SHIFT 14
+#define I40E_PFINT_GPIO_ENA_GPIO14_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO14_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO15_ENA_SHIFT 15
+#define I40E_PFINT_GPIO_ENA_GPIO15_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO15_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO16_ENA_SHIFT 16
+#define I40E_PFINT_GPIO_ENA_GPIO16_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO16_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO17_ENA_SHIFT 17
+#define I40E_PFINT_GPIO_ENA_GPIO17_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO17_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO18_ENA_SHIFT 18
+#define I40E_PFINT_GPIO_ENA_GPIO18_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO18_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO19_ENA_SHIFT 19
+#define I40E_PFINT_GPIO_ENA_GPIO19_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO19_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO20_ENA_SHIFT 20
+#define I40E_PFINT_GPIO_ENA_GPIO20_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO20_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO21_ENA_SHIFT 21
+#define I40E_PFINT_GPIO_ENA_GPIO21_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO21_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO22_ENA_SHIFT 22
+#define I40E_PFINT_GPIO_ENA_GPIO22_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO22_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO23_ENA_SHIFT 23
+#define I40E_PFINT_GPIO_ENA_GPIO23_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO23_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO24_ENA_SHIFT 24
+#define I40E_PFINT_GPIO_ENA_GPIO24_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO24_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO25_ENA_SHIFT 25
+#define I40E_PFINT_GPIO_ENA_GPIO25_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO25_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO26_ENA_SHIFT 26
+#define I40E_PFINT_GPIO_ENA_GPIO26_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO26_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO27_ENA_SHIFT 27
+#define I40E_PFINT_GPIO_ENA_GPIO27_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO27_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO28_ENA_SHIFT 28
+#define I40E_PFINT_GPIO_ENA_GPIO28_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO28_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO29_ENA_SHIFT 29
+#define I40E_PFINT_GPIO_ENA_GPIO29_ENA_MASK I40E_MASK(0x1, I40E_PFINT_GPIO_ENA_GPIO29_ENA_SHIFT)
+#define I40E_PFINT_ICR0 0x00038780 /* Reset: CORER */
+#define I40E_PFINT_ICR0_INTEVENT_SHIFT 0
+#define I40E_PFINT_ICR0_INTEVENT_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_INTEVENT_SHIFT)
+#define I40E_PFINT_ICR0_QUEUE_0_SHIFT 1
+#define I40E_PFINT_ICR0_QUEUE_0_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_QUEUE_0_SHIFT)
+#define I40E_PFINT_ICR0_QUEUE_1_SHIFT 2
+#define I40E_PFINT_ICR0_QUEUE_1_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_QUEUE_1_SHIFT)
+#define I40E_PFINT_ICR0_QUEUE_2_SHIFT 3
+#define I40E_PFINT_ICR0_QUEUE_2_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_QUEUE_2_SHIFT)
+#define I40E_PFINT_ICR0_QUEUE_3_SHIFT 4
+#define I40E_PFINT_ICR0_QUEUE_3_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_QUEUE_3_SHIFT)
+#define I40E_PFINT_ICR0_QUEUE_4_SHIFT 5
+#define I40E_PFINT_ICR0_QUEUE_4_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_QUEUE_4_SHIFT)
+#define I40E_PFINT_ICR0_QUEUE_5_SHIFT 6
+#define I40E_PFINT_ICR0_QUEUE_5_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_QUEUE_5_SHIFT)
+#define I40E_PFINT_ICR0_QUEUE_6_SHIFT 7
+#define I40E_PFINT_ICR0_QUEUE_6_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_QUEUE_6_SHIFT)
+#define I40E_PFINT_ICR0_QUEUE_7_SHIFT 8
+#define I40E_PFINT_ICR0_QUEUE_7_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_QUEUE_7_SHIFT)
+#define I40E_PFINT_ICR0_ECC_ERR_SHIFT 16
+#define I40E_PFINT_ICR0_ECC_ERR_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_ECC_ERR_SHIFT)
+#define I40E_PFINT_ICR0_MAL_DETECT_SHIFT 19
+#define I40E_PFINT_ICR0_MAL_DETECT_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_MAL_DETECT_SHIFT)
+#define I40E_PFINT_ICR0_GRST_SHIFT 20
+#define I40E_PFINT_ICR0_GRST_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_GRST_SHIFT)
+#define I40E_PFINT_ICR0_PCI_EXCEPTION_SHIFT 21
+#define I40E_PFINT_ICR0_PCI_EXCEPTION_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_PCI_EXCEPTION_SHIFT)
+#define I40E_PFINT_ICR0_GPIO_SHIFT 22
+#define I40E_PFINT_ICR0_GPIO_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_GPIO_SHIFT)
+#define I40E_PFINT_ICR0_TIMESYNC_SHIFT 23
+#define I40E_PFINT_ICR0_TIMESYNC_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_TIMESYNC_SHIFT)
+#define I40E_PFINT_ICR0_STORM_DETECT_SHIFT 24
+#define I40E_PFINT_ICR0_STORM_DETECT_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_STORM_DETECT_SHIFT)
+#define I40E_PFINT_ICR0_LINK_STAT_CHANGE_SHIFT 25
+#define I40E_PFINT_ICR0_LINK_STAT_CHANGE_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_LINK_STAT_CHANGE_SHIFT)
+#define I40E_PFINT_ICR0_HMC_ERR_SHIFT 26
+#define I40E_PFINT_ICR0_HMC_ERR_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_HMC_ERR_SHIFT)
+#define I40E_PFINT_ICR0_PE_CRITERR_SHIFT 28
+#define I40E_PFINT_ICR0_PE_CRITERR_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_PE_CRITERR_SHIFT)
+#define I40E_PFINT_ICR0_VFLR_SHIFT 29
+#define I40E_PFINT_ICR0_VFLR_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_VFLR_SHIFT)
+#define I40E_PFINT_ICR0_ADMINQ_SHIFT 30
+#define I40E_PFINT_ICR0_ADMINQ_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_ADMINQ_SHIFT)
+#define I40E_PFINT_ICR0_SWINT_SHIFT 31
+#define I40E_PFINT_ICR0_SWINT_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_SWINT_SHIFT)
+#define I40E_PFINT_ICR0_ENA 0x00038800 /* Reset: CORER */
+#define I40E_PFINT_ICR0_ENA_ECC_ERR_SHIFT 16
+#define I40E_PFINT_ICR0_ENA_ECC_ERR_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_ENA_ECC_ERR_SHIFT)
+#define I40E_PFINT_ICR0_ENA_MAL_DETECT_SHIFT 19
+#define I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_ENA_MAL_DETECT_SHIFT)
+#define I40E_PFINT_ICR0_ENA_GRST_SHIFT 20
+#define I40E_PFINT_ICR0_ENA_GRST_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_ENA_GRST_SHIFT)
+#define I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_SHIFT 21
+#define I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_SHIFT)
+#define I40E_PFINT_ICR0_ENA_GPIO_SHIFT 22
+#define I40E_PFINT_ICR0_ENA_GPIO_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_ENA_GPIO_SHIFT)
+#define I40E_PFINT_ICR0_ENA_TIMESYNC_SHIFT 23
+#define I40E_PFINT_ICR0_ENA_TIMESYNC_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_ENA_TIMESYNC_SHIFT)
+#define I40E_PFINT_ICR0_ENA_STORM_DETECT_SHIFT 24
+#define I40E_PFINT_ICR0_ENA_STORM_DETECT_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_ENA_STORM_DETECT_SHIFT)
+#define I40E_PFINT_ICR0_ENA_LINK_STAT_CHANGE_SHIFT 25
+#define I40E_PFINT_ICR0_ENA_LINK_STAT_CHANGE_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_ENA_LINK_STAT_CHANGE_SHIFT)
+#define I40E_PFINT_ICR0_ENA_HMC_ERR_SHIFT 26
+#define I40E_PFINT_ICR0_ENA_HMC_ERR_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_ENA_HMC_ERR_SHIFT)
+#define I40E_PFINT_ICR0_ENA_PE_CRITERR_SHIFT 28
+#define I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_ENA_PE_CRITERR_SHIFT)
+#define I40E_PFINT_ICR0_ENA_VFLR_SHIFT 29
+#define I40E_PFINT_ICR0_ENA_VFLR_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_ENA_VFLR_SHIFT)
+#define I40E_PFINT_ICR0_ENA_ADMINQ_SHIFT 30
+#define I40E_PFINT_ICR0_ENA_ADMINQ_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_ENA_ADMINQ_SHIFT)
+#define I40E_PFINT_ICR0_ENA_RSVD_SHIFT 31
+#define I40E_PFINT_ICR0_ENA_RSVD_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_ENA_RSVD_SHIFT)
+#define I40E_PFINT_ITR0(_i) (0x00038000 + ((_i) * 128)) /* _i=0...2 */ /* Reset: PFR */
+#define I40E_PFINT_ITR0_MAX_INDEX 2
+#define I40E_PFINT_ITR0_INTERVAL_SHIFT 0
+#define I40E_PFINT_ITR0_INTERVAL_MASK I40E_MASK(0xFFF, I40E_PFINT_ITR0_INTERVAL_SHIFT)
+#define I40E_PFINT_ITRN(_i, _INTPF) (0x00030000 + ((_i) * 2048 + (_INTPF) * 4)) /* _i=0...2, _INTPF=0...511 */ /* Reset: PFR */
+#define I40E_PFINT_ITRN_MAX_INDEX 2
+#define I40E_PFINT_ITRN_INTERVAL_SHIFT 0
+#define I40E_PFINT_ITRN_INTERVAL_MASK I40E_MASK(0xFFF, I40E_PFINT_ITRN_INTERVAL_SHIFT)
+#define I40E_PFINT_LNKLST0 0x00038500 /* Reset: PFR */
+#define I40E_PFINT_LNKLST0_FIRSTQ_INDX_SHIFT 0
+#define I40E_PFINT_LNKLST0_FIRSTQ_INDX_MASK I40E_MASK(0x7FF, I40E_PFINT_LNKLST0_FIRSTQ_INDX_SHIFT)
+#define I40E_PFINT_LNKLST0_FIRSTQ_TYPE_SHIFT 11
+#define I40E_PFINT_LNKLST0_FIRSTQ_TYPE_MASK I40E_MASK(0x3, I40E_PFINT_LNKLST0_FIRSTQ_TYPE_SHIFT)
+#define I40E_PFINT_LNKLSTN(_INTPF) (0x00035000 + ((_INTPF) * 4)) /* _i=0...511 */ /* Reset: PFR */
+#define I40E_PFINT_LNKLSTN_MAX_INDEX 511
+#define I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT 0
+#define I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK I40E_MASK(0x7FF, I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT)
+#define I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT 11
+#define I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_MASK I40E_MASK(0x3, I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT)
+#define I40E_PFINT_RATE0 0x00038580 /* Reset: PFR */
+#define I40E_PFINT_RATE0_INTERVAL_SHIFT 0
+#define I40E_PFINT_RATE0_INTERVAL_MASK I40E_MASK(0x3F, I40E_PFINT_RATE0_INTERVAL_SHIFT)
+#define I40E_PFINT_RATE0_INTRL_ENA_SHIFT 6
+#define I40E_PFINT_RATE0_INTRL_ENA_MASK I40E_MASK(0x1, I40E_PFINT_RATE0_INTRL_ENA_SHIFT)
+#define I40E_PFINT_RATEN(_INTPF) (0x00035800 + ((_INTPF) * 4)) /* _i=0...511 */ /* Reset: PFR */
+#define I40E_PFINT_RATEN_MAX_INDEX 511
+#define I40E_PFINT_RATEN_INTERVAL_SHIFT 0
+#define I40E_PFINT_RATEN_INTERVAL_MASK I40E_MASK(0x3F, I40E_PFINT_RATEN_INTERVAL_SHIFT)
+#define I40E_PFINT_RATEN_INTRL_ENA_SHIFT 6
+#define I40E_PFINT_RATEN_INTRL_ENA_MASK I40E_MASK(0x1, I40E_PFINT_RATEN_INTRL_ENA_SHIFT)
+#define I40E_PFINT_STAT_CTL0 0x00038400 /* Reset: CORER */
+#define I40E_PFINT_STAT_CTL0_OTHER_ITR_INDX_SHIFT 2
+#define I40E_PFINT_STAT_CTL0_OTHER_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_STAT_CTL0_OTHER_ITR_INDX_SHIFT)
+#define I40E_QINT_RQCTL(_Q) (0x0003A000 + ((_Q) * 4)) /* _i=0...1535 */ /* Reset: CORER */
+#define I40E_QINT_RQCTL_MAX_INDEX 1535
+#define I40E_QINT_RQCTL_MSIX_INDX_SHIFT 0
+#define I40E_QINT_RQCTL_MSIX_INDX_MASK I40E_MASK(0xFF, I40E_QINT_RQCTL_MSIX_INDX_SHIFT)
+#define I40E_QINT_RQCTL_ITR_INDX_SHIFT 11
+#define I40E_QINT_RQCTL_ITR_INDX_MASK I40E_MASK(0x3, I40E_QINT_RQCTL_ITR_INDX_SHIFT)
+#define I40E_QINT_RQCTL_MSIX0_INDX_SHIFT 13
+#define I40E_QINT_RQCTL_MSIX0_INDX_MASK I40E_MASK(0x7, I40E_QINT_RQCTL_MSIX0_INDX_SHIFT)
+#define I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT 16
+#define I40E_QINT_RQCTL_NEXTQ_INDX_MASK I40E_MASK(0x7FF, I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT)
+#define I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT 27
+#define I40E_QINT_RQCTL_NEXTQ_TYPE_MASK I40E_MASK(0x3, I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT)
+#define I40E_QINT_RQCTL_CAUSE_ENA_SHIFT 30
+#define I40E_QINT_RQCTL_CAUSE_ENA_MASK I40E_MASK(0x1, I40E_QINT_RQCTL_CAUSE_ENA_SHIFT)
+#define I40E_QINT_RQCTL_INTEVENT_SHIFT 31
+#define I40E_QINT_RQCTL_INTEVENT_MASK I40E_MASK(0x1, I40E_QINT_RQCTL_INTEVENT_SHIFT)
+#define I40E_QINT_TQCTL(_Q) (0x0003C000 + ((_Q) * 4)) /* _i=0...1535 */ /* Reset: CORER */
+#define I40E_QINT_TQCTL_MAX_INDEX 1535
+#define I40E_QINT_TQCTL_MSIX_INDX_SHIFT 0
+#define I40E_QINT_TQCTL_MSIX_INDX_MASK I40E_MASK(0xFF, I40E_QINT_TQCTL_MSIX_INDX_SHIFT)
+#define I40E_QINT_TQCTL_ITR_INDX_SHIFT 11
+#define I40E_QINT_TQCTL_ITR_INDX_MASK I40E_MASK(0x3, I40E_QINT_TQCTL_ITR_INDX_SHIFT)
+#define I40E_QINT_TQCTL_MSIX0_INDX_SHIFT 13
+#define I40E_QINT_TQCTL_MSIX0_INDX_MASK I40E_MASK(0x7, I40E_QINT_TQCTL_MSIX0_INDX_SHIFT)
+#define I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT 16
+#define I40E_QINT_TQCTL_NEXTQ_INDX_MASK I40E_MASK(0x7FF, I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT)
+#define I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT 27
+#define I40E_QINT_TQCTL_NEXTQ_TYPE_MASK I40E_MASK(0x3, I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT)
+#define I40E_QINT_TQCTL_CAUSE_ENA_SHIFT 30
+#define I40E_QINT_TQCTL_CAUSE_ENA_MASK I40E_MASK(0x1, I40E_QINT_TQCTL_CAUSE_ENA_SHIFT)
+#define I40E_QINT_TQCTL_INTEVENT_SHIFT 31
+#define I40E_QINT_TQCTL_INTEVENT_MASK I40E_MASK(0x1, I40E_QINT_TQCTL_INTEVENT_SHIFT)
+#define I40E_VFINT_DYN_CTL0(_VF) (0x0002A400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFINT_DYN_CTL0_MAX_INDEX 127
+#define I40E_VFINT_DYN_CTL0_INTENA_SHIFT 0
+#define I40E_VFINT_DYN_CTL0_INTENA_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTL0_INTENA_SHIFT)
+#define I40E_VFINT_DYN_CTL0_CLEARPBA_SHIFT 1
+#define I40E_VFINT_DYN_CTL0_CLEARPBA_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTL0_CLEARPBA_SHIFT)
+#define I40E_VFINT_DYN_CTL0_SWINT_TRIG_SHIFT 2
+#define I40E_VFINT_DYN_CTL0_SWINT_TRIG_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTL0_SWINT_TRIG_SHIFT)
+#define I40E_VFINT_DYN_CTL0_ITR_INDX_SHIFT 3
+#define I40E_VFINT_DYN_CTL0_ITR_INDX_MASK I40E_MASK(0x3, I40E_VFINT_DYN_CTL0_ITR_INDX_SHIFT)
+#define I40E_VFINT_DYN_CTL0_INTERVAL_SHIFT 5
+#define I40E_VFINT_DYN_CTL0_INTERVAL_MASK I40E_MASK(0xFFF, I40E_VFINT_DYN_CTL0_INTERVAL_SHIFT)
+#define I40E_VFINT_DYN_CTL0_SW_ITR_INDX_ENA_SHIFT 24
+#define I40E_VFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTL0_SW_ITR_INDX_ENA_SHIFT)
+#define I40E_VFINT_DYN_CTL0_SW_ITR_INDX_SHIFT 25
+#define I40E_VFINT_DYN_CTL0_SW_ITR_INDX_MASK I40E_MASK(0x3, I40E_VFINT_DYN_CTL0_SW_ITR_INDX_SHIFT)
+#define I40E_VFINT_DYN_CTL0_INTENA_MSK_SHIFT 31
+#define I40E_VFINT_DYN_CTL0_INTENA_MSK_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTL0_INTENA_MSK_SHIFT)
+#define I40E_VFINT_DYN_CTLN(_INTVF) (0x00024800 + ((_INTVF) * 4)) /* _i=0...511 */ /* Reset: VFR */
+#define I40E_VFINT_DYN_CTLN_MAX_INDEX 511
+#define I40E_VFINT_DYN_CTLN_INTENA_SHIFT 0
+#define I40E_VFINT_DYN_CTLN_INTENA_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTLN_INTENA_SHIFT)
+#define I40E_VFINT_DYN_CTLN_CLEARPBA_SHIFT 1
+#define I40E_VFINT_DYN_CTLN_CLEARPBA_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTLN_CLEARPBA_SHIFT)
+#define I40E_VFINT_DYN_CTLN_SWINT_TRIG_SHIFT 2
+#define I40E_VFINT_DYN_CTLN_SWINT_TRIG_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTLN_SWINT_TRIG_SHIFT)
+#define I40E_VFINT_DYN_CTLN_ITR_INDX_SHIFT 3
+#define I40E_VFINT_DYN_CTLN_ITR_INDX_MASK I40E_MASK(0x3, I40E_VFINT_DYN_CTLN_ITR_INDX_SHIFT)
+#define I40E_VFINT_DYN_CTLN_INTERVAL_SHIFT 5
+#define I40E_VFINT_DYN_CTLN_INTERVAL_MASK I40E_MASK(0xFFF, I40E_VFINT_DYN_CTLN_INTERVAL_SHIFT)
+#define I40E_VFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT 24
+#define I40E_VFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT)
+#define I40E_VFINT_DYN_CTLN_SW_ITR_INDX_SHIFT 25
+#define I40E_VFINT_DYN_CTLN_SW_ITR_INDX_MASK I40E_MASK(0x3, I40E_VFINT_DYN_CTLN_SW_ITR_INDX_SHIFT)
+#define I40E_VFINT_DYN_CTLN_INTENA_MSK_SHIFT 31
+#define I40E_VFINT_DYN_CTLN_INTENA_MSK_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTLN_INTENA_MSK_SHIFT)
+#define I40E_VFINT_ICR0(_VF) (0x0002BC00 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */
+#define I40E_VFINT_ICR0_MAX_INDEX 127
+#define I40E_VFINT_ICR0_INTEVENT_SHIFT 0
+#define I40E_VFINT_ICR0_INTEVENT_MASK I40E_MASK(0x1, I40E_VFINT_ICR0_INTEVENT_SHIFT)
+#define I40E_VFINT_ICR0_QUEUE_0_SHIFT 1
+#define I40E_VFINT_ICR0_QUEUE_0_MASK I40E_MASK(0x1, I40E_VFINT_ICR0_QUEUE_0_SHIFT)
+#define I40E_VFINT_ICR0_QUEUE_1_SHIFT 2
+#define I40E_VFINT_ICR0_QUEUE_1_MASK I40E_MASK(0x1, I40E_VFINT_ICR0_QUEUE_1_SHIFT)
+#define I40E_VFINT_ICR0_QUEUE_2_SHIFT 3
+#define I40E_VFINT_ICR0_QUEUE_2_MASK I40E_MASK(0x1, I40E_VFINT_ICR0_QUEUE_2_SHIFT)
+#define I40E_VFINT_ICR0_QUEUE_3_SHIFT 4
+#define I40E_VFINT_ICR0_QUEUE_3_MASK I40E_MASK(0x1, I40E_VFINT_ICR0_QUEUE_3_SHIFT)
+#define I40E_VFINT_ICR0_LINK_STAT_CHANGE_SHIFT 25
+#define I40E_VFINT_ICR0_LINK_STAT_CHANGE_MASK I40E_MASK(0x1, I40E_VFINT_ICR0_LINK_STAT_CHANGE_SHIFT)
+#define I40E_VFINT_ICR0_ADMINQ_SHIFT 30
+#define I40E_VFINT_ICR0_ADMINQ_MASK I40E_MASK(0x1, I40E_VFINT_ICR0_ADMINQ_SHIFT)
+#define I40E_VFINT_ICR0_SWINT_SHIFT 31
+#define I40E_VFINT_ICR0_SWINT_MASK I40E_MASK(0x1, I40E_VFINT_ICR0_SWINT_SHIFT)
+#define I40E_VFINT_ICR0_ENA(_VF) (0x0002C000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */
+#define I40E_VFINT_ICR0_ENA_MAX_INDEX 127
+#define I40E_VFINT_ICR0_ENA_LINK_STAT_CHANGE_SHIFT 25
+#define I40E_VFINT_ICR0_ENA_LINK_STAT_CHANGE_MASK I40E_MASK(0x1, I40E_VFINT_ICR0_ENA_LINK_STAT_CHANGE_SHIFT)
+#define I40E_VFINT_ICR0_ENA_ADMINQ_SHIFT 30
+#define I40E_VFINT_ICR0_ENA_ADMINQ_MASK I40E_MASK(0x1, I40E_VFINT_ICR0_ENA_ADMINQ_SHIFT)
+#define I40E_VFINT_ICR0_ENA_RSVD_SHIFT 31
+#define I40E_VFINT_ICR0_ENA_RSVD_MASK I40E_MASK(0x1, I40E_VFINT_ICR0_ENA_RSVD_SHIFT)
+#define I40E_VFINT_ITR0(_i, _VF) (0x00028000 + ((_i) * 1024 + (_VF) * 4)) /* _i=0...2, _VF=0...127 */ /* Reset: VFR */
+#define I40E_VFINT_ITR0_MAX_INDEX 2
+#define I40E_VFINT_ITR0_INTERVAL_SHIFT 0
+#define I40E_VFINT_ITR0_INTERVAL_MASK I40E_MASK(0xFFF, I40E_VFINT_ITR0_INTERVAL_SHIFT)
+#define I40E_VFINT_ITRN(_i, _INTVF) (0x00020000 + ((_i) * 2048 + (_INTVF) * 4)) /* _i=0...2, _INTVF=0...511 */ /* Reset: VFR */
+#define I40E_VFINT_ITRN_MAX_INDEX 2
+#define I40E_VFINT_ITRN_INTERVAL_SHIFT 0
+#define I40E_VFINT_ITRN_INTERVAL_MASK I40E_MASK(0xFFF, I40E_VFINT_ITRN_INTERVAL_SHIFT)
+#define I40E_VFINT_STAT_CTL0(_VF) (0x0002A000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */
+#define I40E_VFINT_STAT_CTL0_MAX_INDEX 127
+#define I40E_VFINT_STAT_CTL0_OTHER_ITR_INDX_SHIFT 2
+#define I40E_VFINT_STAT_CTL0_OTHER_ITR_INDX_MASK I40E_MASK(0x3, I40E_VFINT_STAT_CTL0_OTHER_ITR_INDX_SHIFT)
+#define I40E_VPINT_AEQCTL(_VF) (0x0002B800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */
+#define I40E_VPINT_AEQCTL_MAX_INDEX 127
+#define I40E_VPINT_AEQCTL_MSIX_INDX_SHIFT 0
+#define I40E_VPINT_AEQCTL_MSIX_INDX_MASK I40E_MASK(0xFF, I40E_VPINT_AEQCTL_MSIX_INDX_SHIFT)
+#define I40E_VPINT_AEQCTL_ITR_INDX_SHIFT 11
+#define I40E_VPINT_AEQCTL_ITR_INDX_MASK I40E_MASK(0x3, I40E_VPINT_AEQCTL_ITR_INDX_SHIFT)
+#define I40E_VPINT_AEQCTL_MSIX0_INDX_SHIFT 13
+#define I40E_VPINT_AEQCTL_MSIX0_INDX_MASK I40E_MASK(0x7, I40E_VPINT_AEQCTL_MSIX0_INDX_SHIFT)
+#define I40E_VPINT_AEQCTL_CAUSE_ENA_SHIFT 30
+#define I40E_VPINT_AEQCTL_CAUSE_ENA_MASK I40E_MASK(0x1, I40E_VPINT_AEQCTL_CAUSE_ENA_SHIFT)
+#define I40E_VPINT_AEQCTL_INTEVENT_SHIFT 31
+#define I40E_VPINT_AEQCTL_INTEVENT_MASK I40E_MASK(0x1, I40E_VPINT_AEQCTL_INTEVENT_SHIFT)
+#define I40E_VPINT_CEQCTL(_INTVF) (0x00026800 + ((_INTVF) * 4)) /* _i=0...511 */ /* Reset: CORER */
+#define I40E_VPINT_CEQCTL_MAX_INDEX 511
+#define I40E_VPINT_CEQCTL_MSIX_INDX_SHIFT 0
+#define I40E_VPINT_CEQCTL_MSIX_INDX_MASK I40E_MASK(0xFF, I40E_VPINT_CEQCTL_MSIX_INDX_SHIFT)
+#define I40E_VPINT_CEQCTL_ITR_INDX_SHIFT 11
+#define I40E_VPINT_CEQCTL_ITR_INDX_MASK I40E_MASK(0x3, I40E_VPINT_CEQCTL_ITR_INDX_SHIFT)
+#define I40E_VPINT_CEQCTL_MSIX0_INDX_SHIFT 13
+#define I40E_VPINT_CEQCTL_MSIX0_INDX_MASK I40E_MASK(0x7, I40E_VPINT_CEQCTL_MSIX0_INDX_SHIFT)
+#define I40E_VPINT_CEQCTL_NEXTQ_INDX_SHIFT 16
+#define I40E_VPINT_CEQCTL_NEXTQ_INDX_MASK I40E_MASK(0x7FF, I40E_VPINT_CEQCTL_NEXTQ_INDX_SHIFT)
+#define I40E_VPINT_CEQCTL_NEXTQ_TYPE_SHIFT 27
+#define I40E_VPINT_CEQCTL_NEXTQ_TYPE_MASK I40E_MASK(0x3, I40E_VPINT_CEQCTL_NEXTQ_TYPE_SHIFT)
+#define I40E_VPINT_CEQCTL_CAUSE_ENA_SHIFT 30
+#define I40E_VPINT_CEQCTL_CAUSE_ENA_MASK I40E_MASK(0x1, I40E_VPINT_CEQCTL_CAUSE_ENA_SHIFT)
+#define I40E_VPINT_CEQCTL_INTEVENT_SHIFT 31
+#define I40E_VPINT_CEQCTL_INTEVENT_MASK I40E_MASK(0x1, I40E_VPINT_CEQCTL_INTEVENT_SHIFT)
+#define I40E_VPINT_LNKLST0(_VF) (0x0002A800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VPINT_LNKLST0_MAX_INDEX 127
+#define I40E_VPINT_LNKLST0_FIRSTQ_INDX_SHIFT 0
+#define I40E_VPINT_LNKLST0_FIRSTQ_INDX_MASK I40E_MASK(0x7FF, I40E_VPINT_LNKLST0_FIRSTQ_INDX_SHIFT)
+#define I40E_VPINT_LNKLST0_FIRSTQ_TYPE_SHIFT 11
+#define I40E_VPINT_LNKLST0_FIRSTQ_TYPE_MASK I40E_MASK(0x3, I40E_VPINT_LNKLST0_FIRSTQ_TYPE_SHIFT)
+#define I40E_VPINT_LNKLSTN(_INTVF) (0x00025000 + ((_INTVF) * 4)) /* _i=0...511 */ /* Reset: VFR */
+#define I40E_VPINT_LNKLSTN_MAX_INDEX 511
+#define I40E_VPINT_LNKLSTN_FIRSTQ_INDX_SHIFT 0
+#define I40E_VPINT_LNKLSTN_FIRSTQ_INDX_MASK I40E_MASK(0x7FF, I40E_VPINT_LNKLSTN_FIRSTQ_INDX_SHIFT)
+#define I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT 11
+#define I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_MASK I40E_MASK(0x3, I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT)
+#define I40E_VPINT_RATE0(_VF) (0x0002AC00 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VPINT_RATE0_MAX_INDEX 127
+#define I40E_VPINT_RATE0_INTERVAL_SHIFT 0
+#define I40E_VPINT_RATE0_INTERVAL_MASK I40E_MASK(0x3F, I40E_VPINT_RATE0_INTERVAL_SHIFT)
+#define I40E_VPINT_RATE0_INTRL_ENA_SHIFT 6
+#define I40E_VPINT_RATE0_INTRL_ENA_MASK I40E_MASK(0x1, I40E_VPINT_RATE0_INTRL_ENA_SHIFT)
+#define I40E_VPINT_RATEN(_INTVF) (0x00025800 + ((_INTVF) * 4)) /* _i=0...511 */ /* Reset: VFR */
+#define I40E_VPINT_RATEN_MAX_INDEX 511
+#define I40E_VPINT_RATEN_INTERVAL_SHIFT 0
+#define I40E_VPINT_RATEN_INTERVAL_MASK I40E_MASK(0x3F, I40E_VPINT_RATEN_INTERVAL_SHIFT)
+#define I40E_VPINT_RATEN_INTRL_ENA_SHIFT 6
+#define I40E_VPINT_RATEN_INTRL_ENA_MASK I40E_MASK(0x1, I40E_VPINT_RATEN_INTRL_ENA_SHIFT)
+#define I40E_GL_RDPU_CNTRL 0x00051060 /* Reset: CORER */
+#define I40E_GL_RDPU_CNTRL_RX_PAD_EN_SHIFT 0
+#define I40E_GL_RDPU_CNTRL_RX_PAD_EN_MASK I40E_MASK(0x1, I40E_GL_RDPU_CNTRL_RX_PAD_EN_SHIFT)
+#define I40E_GL_RDPU_CNTRL_ECO_SHIFT 1
+#define I40E_GL_RDPU_CNTRL_ECO_MASK I40E_MASK(0x7FFFFFFF, I40E_GL_RDPU_CNTRL_ECO_SHIFT)
+#define I40E_GLLAN_RCTL_0 0x0012A500 /* Reset: CORER */
+#define I40E_GLLAN_RCTL_0_PXE_MODE_SHIFT 0
+#define I40E_GLLAN_RCTL_0_PXE_MODE_MASK I40E_MASK(0x1, I40E_GLLAN_RCTL_0_PXE_MODE_SHIFT)
+#define I40E_GLLAN_TSOMSK_F 0x000442D8 /* Reset: CORER */
+#define I40E_GLLAN_TSOMSK_F_TCPMSKF_SHIFT 0
+#define I40E_GLLAN_TSOMSK_F_TCPMSKF_MASK I40E_MASK(0xFFF, I40E_GLLAN_TSOMSK_F_TCPMSKF_SHIFT)
+#define I40E_GLLAN_TSOMSK_L 0x000442E0 /* Reset: CORER */
+#define I40E_GLLAN_TSOMSK_L_TCPMSKL_SHIFT 0
+#define I40E_GLLAN_TSOMSK_L_TCPMSKL_MASK I40E_MASK(0xFFF, I40E_GLLAN_TSOMSK_L_TCPMSKL_SHIFT)
+#define I40E_GLLAN_TSOMSK_M 0x000442DC /* Reset: CORER */
+#define I40E_GLLAN_TSOMSK_M_TCPMSKM_SHIFT 0
+#define I40E_GLLAN_TSOMSK_M_TCPMSKM_MASK I40E_MASK(0xFFF, I40E_GLLAN_TSOMSK_M_TCPMSKM_SHIFT)
+#define I40E_GLLAN_TXPRE_QDIS(_i) (0x000e6500 + ((_i) * 4)) /* _i=0...11 */ /* Reset: CORER */
+#define I40E_GLLAN_TXPRE_QDIS_MAX_INDEX 11
+#define I40E_GLLAN_TXPRE_QDIS_QINDX_SHIFT 0
+#define I40E_GLLAN_TXPRE_QDIS_QINDX_MASK I40E_MASK(0x7FF, I40E_GLLAN_TXPRE_QDIS_QINDX_SHIFT)
+#define I40E_GLLAN_TXPRE_QDIS_QDIS_STAT_SHIFT 16
+#define I40E_GLLAN_TXPRE_QDIS_QDIS_STAT_MASK I40E_MASK(0x1, I40E_GLLAN_TXPRE_QDIS_QDIS_STAT_SHIFT)
+#define I40E_GLLAN_TXPRE_QDIS_SET_QDIS_SHIFT 30
+#define I40E_GLLAN_TXPRE_QDIS_SET_QDIS_MASK I40E_MASK(0x1, I40E_GLLAN_TXPRE_QDIS_SET_QDIS_SHIFT)
+#define I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_SHIFT 31
+#define I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_MASK I40E_MASK(0x1, I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_SHIFT)
+#define I40E_PFLAN_QALLOC 0x001C0400 /* Reset: CORER */
+#define I40E_PFLAN_QALLOC_FIRSTQ_SHIFT 0
+#define I40E_PFLAN_QALLOC_FIRSTQ_MASK I40E_MASK(0x7FF, I40E_PFLAN_QALLOC_FIRSTQ_SHIFT)
+#define I40E_PFLAN_QALLOC_LASTQ_SHIFT 16
+#define I40E_PFLAN_QALLOC_LASTQ_MASK I40E_MASK(0x7FF, I40E_PFLAN_QALLOC_LASTQ_SHIFT)
+#define I40E_PFLAN_QALLOC_VALID_SHIFT 31
+#define I40E_PFLAN_QALLOC_VALID_MASK I40E_MASK(0x1, I40E_PFLAN_QALLOC_VALID_SHIFT)
+#define I40E_QRX_ENA(_Q) (0x00120000 + ((_Q) * 4)) /* _i=0...1535 */ /* Reset: PFR */
+#define I40E_QRX_ENA_MAX_INDEX 1535
+#define I40E_QRX_ENA_QENA_REQ_SHIFT 0
+#define I40E_QRX_ENA_QENA_REQ_MASK I40E_MASK(0x1, I40E_QRX_ENA_QENA_REQ_SHIFT)
+#define I40E_QRX_ENA_FAST_QDIS_SHIFT 1
+#define I40E_QRX_ENA_FAST_QDIS_MASK I40E_MASK(0x1, I40E_QRX_ENA_FAST_QDIS_SHIFT)
+#define I40E_QRX_ENA_QENA_STAT_SHIFT 2
+#define I40E_QRX_ENA_QENA_STAT_MASK I40E_MASK(0x1, I40E_QRX_ENA_QENA_STAT_SHIFT)
+#define I40E_QRX_TAIL(_Q) (0x00128000 + ((_Q) * 4)) /* _i=0...1535 */ /* Reset: CORER */
+#define I40E_QRX_TAIL_MAX_INDEX 1535
+#define I40E_QRX_TAIL_TAIL_SHIFT 0
+#define I40E_QRX_TAIL_TAIL_MASK I40E_MASK(0x1FFF, I40E_QRX_TAIL_TAIL_SHIFT)
+#define I40E_QTX_CTL(_Q) (0x00104000 + ((_Q) * 4)) /* _i=0...1535 */ /* Reset: CORER */
+#define I40E_QTX_CTL_MAX_INDEX 1535
+#define I40E_QTX_CTL_PFVF_Q_SHIFT 0
+#define I40E_QTX_CTL_PFVF_Q_MASK I40E_MASK(0x3, I40E_QTX_CTL_PFVF_Q_SHIFT)
+#define I40E_QTX_CTL_PF_INDX_SHIFT 2
+#define I40E_QTX_CTL_PF_INDX_MASK I40E_MASK(0xF, I40E_QTX_CTL_PF_INDX_SHIFT)
+#define I40E_QTX_CTL_VFVM_INDX_SHIFT 7
+#define I40E_QTX_CTL_VFVM_INDX_MASK I40E_MASK(0x1FF, I40E_QTX_CTL_VFVM_INDX_SHIFT)
+#define I40E_QTX_ENA(_Q) (0x00100000 + ((_Q) * 4)) /* _i=0...1535 */ /* Reset: PFR */
+#define I40E_QTX_ENA_MAX_INDEX 1535
+#define I40E_QTX_ENA_QENA_REQ_SHIFT 0
+#define I40E_QTX_ENA_QENA_REQ_MASK I40E_MASK(0x1, I40E_QTX_ENA_QENA_REQ_SHIFT)
+#define I40E_QTX_ENA_FAST_QDIS_SHIFT 1
+#define I40E_QTX_ENA_FAST_QDIS_MASK I40E_MASK(0x1, I40E_QTX_ENA_FAST_QDIS_SHIFT)
+#define I40E_QTX_ENA_QENA_STAT_SHIFT 2
+#define I40E_QTX_ENA_QENA_STAT_MASK I40E_MASK(0x1, I40E_QTX_ENA_QENA_STAT_SHIFT)
+#define I40E_QTX_HEAD(_Q) (0x000E4000 + ((_Q) * 4)) /* _i=0...1535 */ /* Reset: CORER */
+#define I40E_QTX_HEAD_MAX_INDEX 1535
+#define I40E_QTX_HEAD_HEAD_SHIFT 0
+#define I40E_QTX_HEAD_HEAD_MASK I40E_MASK(0x1FFF, I40E_QTX_HEAD_HEAD_SHIFT)
+#define I40E_QTX_HEAD_RS_PENDING_SHIFT 16
+#define I40E_QTX_HEAD_RS_PENDING_MASK I40E_MASK(0x1, I40E_QTX_HEAD_RS_PENDING_SHIFT)
+#define I40E_QTX_TAIL(_Q) (0x00108000 + ((_Q) * 4)) /* _i=0...1535 */ /* Reset: PFR */
+#define I40E_QTX_TAIL_MAX_INDEX 1535
+#define I40E_QTX_TAIL_TAIL_SHIFT 0
+#define I40E_QTX_TAIL_TAIL_MASK I40E_MASK(0x1FFF, I40E_QTX_TAIL_TAIL_SHIFT)
+#define I40E_VPLAN_MAPENA(_VF) (0x00074000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VPLAN_MAPENA_MAX_INDEX 127
+#define I40E_VPLAN_MAPENA_TXRX_ENA_SHIFT 0
+#define I40E_VPLAN_MAPENA_TXRX_ENA_MASK I40E_MASK(0x1, I40E_VPLAN_MAPENA_TXRX_ENA_SHIFT)
+#define I40E_VPLAN_QTABLE(_i, _VF) (0x00070000 + ((_i) * 1024 + (_VF) * 4)) /* _i=0...15, _VF=0...127 */ /* Reset: VFR */
+#define I40E_VPLAN_QTABLE_MAX_INDEX 15
+#define I40E_VPLAN_QTABLE_QINDEX_SHIFT 0
+#define I40E_VPLAN_QTABLE_QINDEX_MASK I40E_MASK(0x7FF, I40E_VPLAN_QTABLE_QINDEX_SHIFT)
+#define I40E_VSILAN_QBASE(_VSI) (0x0020C800 + ((_VSI) * 4)) /* _i=0...383 */ /* Reset: PFR */
+#define I40E_VSILAN_QBASE_MAX_INDEX 383
+#define I40E_VSILAN_QBASE_VSIBASE_SHIFT 0
+#define I40E_VSILAN_QBASE_VSIBASE_MASK I40E_MASK(0x7FF, I40E_VSILAN_QBASE_VSIBASE_SHIFT)
+#define I40E_VSILAN_QBASE_VSIQTABLE_ENA_SHIFT 11
+#define I40E_VSILAN_QBASE_VSIQTABLE_ENA_MASK I40E_MASK(0x1, I40E_VSILAN_QBASE_VSIQTABLE_ENA_SHIFT)
+#define I40E_VSILAN_QTABLE(_i, _VSI) (0x00200000 + ((_i) * 2048 + (_VSI) * 4)) /* _i=0...7, _VSI=0...383 */ /* Reset: PFR */
+#define I40E_VSILAN_QTABLE_MAX_INDEX 7
+#define I40E_VSILAN_QTABLE_QINDEX_0_SHIFT 0
+#define I40E_VSILAN_QTABLE_QINDEX_0_MASK I40E_MASK(0x7FF, I40E_VSILAN_QTABLE_QINDEX_0_SHIFT)
+#define I40E_VSILAN_QTABLE_QINDEX_1_SHIFT 16
+#define I40E_VSILAN_QTABLE_QINDEX_1_MASK I40E_MASK(0x7FF, I40E_VSILAN_QTABLE_QINDEX_1_SHIFT)
+#define I40E_PRTGL_SAH 0x001E2140 /* Reset: GLOBR */
+#define I40E_PRTGL_SAH_FC_SAH_SHIFT 0
+#define I40E_PRTGL_SAH_FC_SAH_MASK I40E_MASK(0xFFFF, I40E_PRTGL_SAH_FC_SAH_SHIFT)
+#define I40E_PRTGL_SAH_MFS_SHIFT 16
+#define I40E_PRTGL_SAH_MFS_MASK I40E_MASK(0xFFFF, I40E_PRTGL_SAH_MFS_SHIFT)
+#define I40E_PRTGL_SAL 0x001E2120 /* Reset: GLOBR */
+#define I40E_PRTGL_SAL_FC_SAL_SHIFT 0
+#define I40E_PRTGL_SAL_FC_SAL_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTGL_SAL_FC_SAL_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GCP 0x001E30E0 /* Reset: GLOBR */
+#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GCP_HSEC_CTL_RX_ENABLE_GCP_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GCP_HSEC_CTL_RX_ENABLE_GCP_MASK I40E_MASK(0x1, I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GCP_HSEC_CTL_RX_ENABLE_GCP_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GPP 0x001E3260 /* Reset: GLOBR */
+#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GPP_HSEC_CTL_RX_ENABLE_GPP_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GPP_HSEC_CTL_RX_ENABLE_GPP_MASK I40E_MASK(0x1, I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GPP_HSEC_CTL_RX_ENABLE_GPP_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_PPP 0x001E32E0 /* Reset: GLOBR */
+#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_PPP_HSEC_CTL_RX_ENABLE_PPP_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_PPP_HSEC_CTL_RX_ENABLE_PPP_MASK I40E_MASK(0x1, I40E_PRTMAC_HSEC_CTL_RX_ENABLE_PPP_HSEC_CTL_RX_ENABLE_PPP_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_RX_FORWARD_CONTROL 0x001E3360 /* Reset: GLOBR */
+#define I40E_PRTMAC_HSEC_CTL_RX_FORWARD_CONTROL_HSEC_CTL_RX_FORWARD_CONTROL_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_RX_FORWARD_CONTROL_HSEC_CTL_RX_FORWARD_CONTROL_MASK I40E_MASK(0x1, I40E_PRTMAC_HSEC_CTL_RX_FORWARD_CONTROL_HSEC_CTL_RX_FORWARD_CONTROL_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_DA_UCAST_PART1 0x001E3110 /* Reset: GLOBR */
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_DA_UCAST_PART1_HSEC_CTL_RX_PAUSE_DA_UCAST_PART1_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_DA_UCAST_PART1_HSEC_CTL_RX_PAUSE_DA_UCAST_PART1_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTMAC_HSEC_CTL_RX_PAUSE_DA_UCAST_PART1_HSEC_CTL_RX_PAUSE_DA_UCAST_PART1_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_DA_UCAST_PART2 0x001E3120 /* Reset: GLOBR */
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_DA_UCAST_PART2_HSEC_CTL_RX_PAUSE_DA_UCAST_PART2_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_DA_UCAST_PART2_HSEC_CTL_RX_PAUSE_DA_UCAST_PART2_MASK I40E_MASK(0xFFFF, I40E_PRTMAC_HSEC_CTL_RX_PAUSE_DA_UCAST_PART2_HSEC_CTL_RX_PAUSE_DA_UCAST_PART2_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE 0x001E30C0 /* Reset: GLOBR */
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_HSEC_CTL_RX_PAUSE_ENABLE_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_HSEC_CTL_RX_PAUSE_ENABLE_MASK I40E_MASK(0x1FF, I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_HSEC_CTL_RX_PAUSE_ENABLE_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_SA_PART1 0x001E3140 /* Reset: GLOBR */
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_SA_PART1_HSEC_CTL_RX_PAUSE_SA_PART1_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_SA_PART1_HSEC_CTL_RX_PAUSE_SA_PART1_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTMAC_HSEC_CTL_RX_PAUSE_SA_PART1_HSEC_CTL_RX_PAUSE_SA_PART1_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_SA_PART2 0x001E3150 /* Reset: GLOBR */
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_SA_PART2_HSEC_CTL_RX_PAUSE_SA_PART2_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_SA_PART2_HSEC_CTL_RX_PAUSE_SA_PART2_MASK I40E_MASK(0xFFFF, I40E_PRTMAC_HSEC_CTL_RX_PAUSE_SA_PART2_HSEC_CTL_RX_PAUSE_SA_PART2_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE 0x001E30D0 /* Reset: GLOBR */
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_HSEC_CTL_TX_PAUSE_ENABLE_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_HSEC_CTL_TX_PAUSE_ENABLE_MASK I40E_MASK(0x1FF, I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_HSEC_CTL_TX_PAUSE_ENABLE_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA(_i) (0x001E3370 + ((_i) * 16)) /* _i=0...8 */ /* Reset: GLOBR */
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA_MAX_INDEX 8
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA_HSEC_CTL_TX_PAUSE_QUANTA_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA_HSEC_CTL_TX_PAUSE_QUANTA_MASK I40E_MASK(0xFFFF, I40E_PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA_HSEC_CTL_TX_PAUSE_QUANTA_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER(_i) (0x001E3400 + ((_i) * 16)) /* _i=0...8 */ /* Reset: GLOBR */
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_MAX_INDEX 8
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_MASK I40E_MASK(0xFFFF, I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_TX_SA_PART1 0x001E34B0 /* Reset: GLOBR */
+#define I40E_PRTMAC_HSEC_CTL_TX_SA_PART1_HSEC_CTL_TX_SA_PART1_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_TX_SA_PART1_HSEC_CTL_TX_SA_PART1_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTMAC_HSEC_CTL_TX_SA_PART1_HSEC_CTL_TX_SA_PART1_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_TX_SA_PART2 0x001E34C0 /* Reset: GLOBR */
+#define I40E_PRTMAC_HSEC_CTL_TX_SA_PART2_HSEC_CTL_TX_SA_PART2_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_TX_SA_PART2_HSEC_CTL_TX_SA_PART2_MASK I40E_MASK(0xFFFF, I40E_PRTMAC_HSEC_CTL_TX_SA_PART2_HSEC_CTL_TX_SA_PART2_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A 0x0008C480 /* Reset: GLOBR */
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_TX_LANE3_SHIFT 0
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_TX_LANE3_MASK I40E_MASK(0x3, I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_TX_LANE3_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_TX_LANE2_SHIFT 2
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_TX_LANE2_MASK I40E_MASK(0x3, I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_TX_LANE2_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_TX_LANE1_SHIFT 4
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_TX_LANE1_MASK I40E_MASK(0x3, I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_TX_LANE1_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_TX_LANE0_SHIFT 6
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_TX_LANE0_MASK I40E_MASK(0x3, I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_TX_LANE0_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_RX_LANE3_SHIFT 8
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_RX_LANE3_MASK I40E_MASK(0x3, I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_RX_LANE3_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_RX_LANE2_SHIFT 10
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_RX_LANE2_MASK I40E_MASK(0x3, I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_RX_LANE2_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_RX_LANE1_SHIFT 12
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_RX_LANE1_MASK I40E_MASK(0x3, I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_RX_LANE1_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_RX_LANE0_SHIFT 14
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_RX_LANE0_MASK I40E_MASK(0x3, I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_RX_LANE0_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B 0x0008C484 /* Reset: GLOBR */
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_TX_LANE3_SHIFT 0
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_TX_LANE3_MASK I40E_MASK(0x3, I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_TX_LANE3_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_TX_LANE2_SHIFT 2
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_TX_LANE2_MASK I40E_MASK(0x3, I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_TX_LANE2_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_TX_LANE1_SHIFT 4
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_TX_LANE1_MASK I40E_MASK(0x3, I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_TX_LANE1_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_TX_LANE0_SHIFT 6
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_TX_LANE0_MASK I40E_MASK(0x3, I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_TX_LANE0_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_RX_LANE3_SHIFT 8
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_RX_LANE3_MASK I40E_MASK(0x3, I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_RX_LANE3_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_RX_LANE2_SHIFT 10
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_RX_LANE2_MASK I40E_MASK(0x3, I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_RX_LANE2_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_RX_LANE1_SHIFT 12
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_RX_LANE1_MASK I40E_MASK(0x3, I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_RX_LANE1_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_RX_LANE0_SHIFT 14
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_RX_LANE0_MASK I40E_MASK(0x3, I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_RX_LANE0_SHIFT)
+#define I40E_GL_FWRESETCNT 0x00083100 /* Reset: POR */
+#define I40E_GL_FWRESETCNT_FWRESETCNT_SHIFT 0
+#define I40E_GL_FWRESETCNT_FWRESETCNT_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_FWRESETCNT_FWRESETCNT_SHIFT)
+#define I40E_GL_MNG_FWSM 0x000B6134 /* Reset: POR */
+#define I40E_GL_MNG_FWSM_FW_MODES_SHIFT 0
+#define I40E_GL_MNG_FWSM_FW_MODES_MASK I40E_MASK(0x3, I40E_GL_MNG_FWSM_FW_MODES_SHIFT)
+#define I40E_GL_MNG_FWSM_EEP_RELOAD_IND_SHIFT 10
+#define I40E_GL_MNG_FWSM_EEP_RELOAD_IND_MASK I40E_MASK(0x1, I40E_GL_MNG_FWSM_EEP_RELOAD_IND_SHIFT)
+#define I40E_GL_MNG_FWSM_CRC_ERROR_MODULE_SHIFT 11
+#define I40E_GL_MNG_FWSM_CRC_ERROR_MODULE_MASK I40E_MASK(0xF, I40E_GL_MNG_FWSM_CRC_ERROR_MODULE_SHIFT)
+#define I40E_GL_MNG_FWSM_FW_STATUS_VALID_SHIFT 15
+#define I40E_GL_MNG_FWSM_FW_STATUS_VALID_MASK I40E_MASK(0x1, I40E_GL_MNG_FWSM_FW_STATUS_VALID_SHIFT)
+#define I40E_GL_MNG_FWSM_RESET_CNT_SHIFT 16
+#define I40E_GL_MNG_FWSM_RESET_CNT_MASK I40E_MASK(0x7, I40E_GL_MNG_FWSM_RESET_CNT_SHIFT)
+#define I40E_GL_MNG_FWSM_EXT_ERR_IND_SHIFT 19
+#define I40E_GL_MNG_FWSM_EXT_ERR_IND_MASK I40E_MASK(0x3F, I40E_GL_MNG_FWSM_EXT_ERR_IND_SHIFT)
+#define I40E_GL_MNG_FWSM_PHY_SERDES0_CONFIG_ERR_SHIFT 26
+#define I40E_GL_MNG_FWSM_PHY_SERDES0_CONFIG_ERR_MASK I40E_MASK(0x1, I40E_GL_MNG_FWSM_PHY_SERDES0_CONFIG_ERR_SHIFT)
+#define I40E_GL_MNG_FWSM_PHY_SERDES1_CONFIG_ERR_SHIFT 27
+#define I40E_GL_MNG_FWSM_PHY_SERDES1_CONFIG_ERR_MASK I40E_MASK(0x1, I40E_GL_MNG_FWSM_PHY_SERDES1_CONFIG_ERR_SHIFT)
+#define I40E_GL_MNG_FWSM_PHY_SERDES2_CONFIG_ERR_SHIFT 28
+#define I40E_GL_MNG_FWSM_PHY_SERDES2_CONFIG_ERR_MASK I40E_MASK(0x1, I40E_GL_MNG_FWSM_PHY_SERDES2_CONFIG_ERR_SHIFT)
+#define I40E_GL_MNG_FWSM_PHY_SERDES3_CONFIG_ERR_SHIFT 29
+#define I40E_GL_MNG_FWSM_PHY_SERDES3_CONFIG_ERR_MASK I40E_MASK(0x1, I40E_GL_MNG_FWSM_PHY_SERDES3_CONFIG_ERR_SHIFT)
+#define I40E_GL_MNG_HWARB_CTRL 0x000B6130 /* Reset: POR */
+#define I40E_GL_MNG_HWARB_CTRL_NCSI_ARB_EN_SHIFT 0
+#define I40E_GL_MNG_HWARB_CTRL_NCSI_ARB_EN_MASK I40E_MASK(0x1, I40E_GL_MNG_HWARB_CTRL_NCSI_ARB_EN_SHIFT)
+#define I40E_PRT_MNG_FTFT_DATA(_i) (0x000852A0 + ((_i) * 32)) /* _i=0...31 */ /* Reset: POR */
+#define I40E_PRT_MNG_FTFT_DATA_MAX_INDEX 31
+#define I40E_PRT_MNG_FTFT_DATA_DWORD_SHIFT 0
+#define I40E_PRT_MNG_FTFT_DATA_DWORD_MASK I40E_MASK(0xFFFFFFFF, I40E_PRT_MNG_FTFT_DATA_DWORD_SHIFT)
+#define I40E_PRT_MNG_FTFT_LENGTH 0x00085260 /* Reset: POR */
+#define I40E_PRT_MNG_FTFT_LENGTH_LENGTH_SHIFT 0
+#define I40E_PRT_MNG_FTFT_LENGTH_LENGTH_MASK I40E_MASK(0xFF, I40E_PRT_MNG_FTFT_LENGTH_LENGTH_SHIFT)
+#define I40E_PRT_MNG_FTFT_MASK(_i) (0x00085160 + ((_i) * 32)) /* _i=0...7 */ /* Reset: POR */
+#define I40E_PRT_MNG_FTFT_MASK_MAX_INDEX 7
+#define I40E_PRT_MNG_FTFT_MASK_MASK_SHIFT 0
+#define I40E_PRT_MNG_FTFT_MASK_MASK_MASK I40E_MASK(0xFFFF, I40E_PRT_MNG_FTFT_MASK_MASK_SHIFT)
+#define I40E_PRT_MNG_MANC 0x00256A20 /* Reset: POR */
+#define I40E_PRT_MNG_MANC_FLOW_CONTROL_DISCARD_SHIFT 0
+#define I40E_PRT_MNG_MANC_FLOW_CONTROL_DISCARD_MASK I40E_MASK(0x1, I40E_PRT_MNG_MANC_FLOW_CONTROL_DISCARD_SHIFT)
+#define I40E_PRT_MNG_MANC_NCSI_DISCARD_SHIFT 1
+#define I40E_PRT_MNG_MANC_NCSI_DISCARD_MASK I40E_MASK(0x1, I40E_PRT_MNG_MANC_NCSI_DISCARD_SHIFT)
+#define I40E_PRT_MNG_MANC_RCV_TCO_EN_SHIFT 17
+#define I40E_PRT_MNG_MANC_RCV_TCO_EN_MASK I40E_MASK(0x1, I40E_PRT_MNG_MANC_RCV_TCO_EN_SHIFT)
+#define I40E_PRT_MNG_MANC_RCV_ALL_SHIFT 19
+#define I40E_PRT_MNG_MANC_RCV_ALL_MASK I40E_MASK(0x1, I40E_PRT_MNG_MANC_RCV_ALL_SHIFT)
+#define I40E_PRT_MNG_MANC_FIXED_NET_TYPE_SHIFT 25
+#define I40E_PRT_MNG_MANC_FIXED_NET_TYPE_MASK I40E_MASK(0x1, I40E_PRT_MNG_MANC_FIXED_NET_TYPE_SHIFT)
+#define I40E_PRT_MNG_MANC_NET_TYPE_SHIFT 26
+#define I40E_PRT_MNG_MANC_NET_TYPE_MASK I40E_MASK(0x1, I40E_PRT_MNG_MANC_NET_TYPE_SHIFT)
+#define I40E_PRT_MNG_MANC_EN_BMC2OS_SHIFT 28
+#define I40E_PRT_MNG_MANC_EN_BMC2OS_MASK I40E_MASK(0x1, I40E_PRT_MNG_MANC_EN_BMC2OS_SHIFT)
+#define I40E_PRT_MNG_MANC_EN_BMC2NET_SHIFT 29
+#define I40E_PRT_MNG_MANC_EN_BMC2NET_MASK I40E_MASK(0x1, I40E_PRT_MNG_MANC_EN_BMC2NET_SHIFT)
+#define I40E_PRT_MNG_MAVTV(_i) (0x00255900 + ((_i) * 32)) /* _i=0...7 */ /* Reset: POR */
+#define I40E_PRT_MNG_MAVTV_MAX_INDEX 7
+#define I40E_PRT_MNG_MAVTV_VID_SHIFT 0
+#define I40E_PRT_MNG_MAVTV_VID_MASK I40E_MASK(0xFFF, I40E_PRT_MNG_MAVTV_VID_SHIFT)
+#define I40E_PRT_MNG_MDEF(_i) (0x00255D00 + ((_i) * 32)) /* _i=0...7 */ /* Reset: POR */
+#define I40E_PRT_MNG_MDEF_MAX_INDEX 7
+#define I40E_PRT_MNG_MDEF_MAC_EXACT_AND_SHIFT 0
+#define I40E_PRT_MNG_MDEF_MAC_EXACT_AND_MASK I40E_MASK(0xF, I40E_PRT_MNG_MDEF_MAC_EXACT_AND_SHIFT)
+#define I40E_PRT_MNG_MDEF_BROADCAST_AND_SHIFT 4
+#define I40E_PRT_MNG_MDEF_BROADCAST_AND_MASK I40E_MASK(0x1, I40E_PRT_MNG_MDEF_BROADCAST_AND_SHIFT)
+#define I40E_PRT_MNG_MDEF_VLAN_AND_SHIFT 5
+#define I40E_PRT_MNG_MDEF_VLAN_AND_MASK I40E_MASK(0xFF, I40E_PRT_MNG_MDEF_VLAN_AND_SHIFT)
+#define I40E_PRT_MNG_MDEF_IPV4_ADDRESS_AND_SHIFT 13
+#define I40E_PRT_MNG_MDEF_IPV4_ADDRESS_AND_MASK I40E_MASK(0xF, I40E_PRT_MNG_MDEF_IPV4_ADDRESS_AND_SHIFT)
+#define I40E_PRT_MNG_MDEF_IPV6_ADDRESS_AND_SHIFT 17
+#define I40E_PRT_MNG_MDEF_IPV6_ADDRESS_AND_MASK I40E_MASK(0xF, I40E_PRT_MNG_MDEF_IPV6_ADDRESS_AND_SHIFT)
+#define I40E_PRT_MNG_MDEF_MAC_EXACT_OR_SHIFT 21
+#define I40E_PRT_MNG_MDEF_MAC_EXACT_OR_MASK I40E_MASK(0xF, I40E_PRT_MNG_MDEF_MAC_EXACT_OR_SHIFT)
+#define I40E_PRT_MNG_MDEF_BROADCAST_OR_SHIFT 25
+#define I40E_PRT_MNG_MDEF_BROADCAST_OR_MASK I40E_MASK(0x1, I40E_PRT_MNG_MDEF_BROADCAST_OR_SHIFT)
+#define I40E_PRT_MNG_MDEF_MULTICAST_AND_SHIFT 26
+#define I40E_PRT_MNG_MDEF_MULTICAST_AND_MASK I40E_MASK(0x1, I40E_PRT_MNG_MDEF_MULTICAST_AND_SHIFT)
+#define I40E_PRT_MNG_MDEF_ARP_REQUEST_OR_SHIFT 27
+#define I40E_PRT_MNG_MDEF_ARP_REQUEST_OR_MASK I40E_MASK(0x1, I40E_PRT_MNG_MDEF_ARP_REQUEST_OR_SHIFT)
+#define I40E_PRT_MNG_MDEF_ARP_RESPONSE_OR_SHIFT 28
+#define I40E_PRT_MNG_MDEF_ARP_RESPONSE_OR_MASK I40E_MASK(0x1, I40E_PRT_MNG_MDEF_ARP_RESPONSE_OR_SHIFT)
+#define I40E_PRT_MNG_MDEF_NEIGHBOR_DISCOVERY_134_OR_SHIFT 29
+#define I40E_PRT_MNG_MDEF_NEIGHBOR_DISCOVERY_134_OR_MASK I40E_MASK(0x1, I40E_PRT_MNG_MDEF_NEIGHBOR_DISCOVERY_134_OR_SHIFT)
+#define I40E_PRT_MNG_MDEF_PORT_0X298_OR_SHIFT 30
+#define I40E_PRT_MNG_MDEF_PORT_0X298_OR_MASK I40E_MASK(0x1, I40E_PRT_MNG_MDEF_PORT_0X298_OR_SHIFT)
+#define I40E_PRT_MNG_MDEF_PORT_0X26F_OR_SHIFT 31
+#define I40E_PRT_MNG_MDEF_PORT_0X26F_OR_MASK I40E_MASK(0x1, I40E_PRT_MNG_MDEF_PORT_0X26F_OR_SHIFT)
+#define I40E_PRT_MNG_MDEF_EXT(_i) (0x00255F00 + ((_i) * 32)) /* _i=0...7 */ /* Reset: POR */
+#define I40E_PRT_MNG_MDEF_EXT_MAX_INDEX 7
+#define I40E_PRT_MNG_MDEF_EXT_L2_ETHERTYPE_AND_SHIFT 0
+#define I40E_PRT_MNG_MDEF_EXT_L2_ETHERTYPE_AND_MASK I40E_MASK(0xF, I40E_PRT_MNG_MDEF_EXT_L2_ETHERTYPE_AND_SHIFT)
+#define I40E_PRT_MNG_MDEF_EXT_L2_ETHERTYPE_OR_SHIFT 4
+#define I40E_PRT_MNG_MDEF_EXT_L2_ETHERTYPE_OR_MASK I40E_MASK(0xF, I40E_PRT_MNG_MDEF_EXT_L2_ETHERTYPE_OR_SHIFT)
+#define I40E_PRT_MNG_MDEF_EXT_FLEX_PORT_OR_SHIFT 8
+#define I40E_PRT_MNG_MDEF_EXT_FLEX_PORT_OR_MASK I40E_MASK(0xFFFF, I40E_PRT_MNG_MDEF_EXT_FLEX_PORT_OR_SHIFT)
+#define I40E_PRT_MNG_MDEF_EXT_FLEX_TCO_SHIFT 24
+#define I40E_PRT_MNG_MDEF_EXT_FLEX_TCO_MASK I40E_MASK(0x1, I40E_PRT_MNG_MDEF_EXT_FLEX_TCO_SHIFT)
+#define I40E_PRT_MNG_MDEF_EXT_NEIGHBOR_DISCOVERY_135_OR_SHIFT 25
+#define I40E_PRT_MNG_MDEF_EXT_NEIGHBOR_DISCOVERY_135_OR_MASK I40E_MASK(0x1, I40E_PRT_MNG_MDEF_EXT_NEIGHBOR_DISCOVERY_135_OR_SHIFT)
+#define I40E_PRT_MNG_MDEF_EXT_NEIGHBOR_DISCOVERY_136_OR_SHIFT 26
+#define I40E_PRT_MNG_MDEF_EXT_NEIGHBOR_DISCOVERY_136_OR_MASK I40E_MASK(0x1, I40E_PRT_MNG_MDEF_EXT_NEIGHBOR_DISCOVERY_136_OR_SHIFT)
+#define I40E_PRT_MNG_MDEF_EXT_NEIGHBOR_DISCOVERY_137_OR_SHIFT 27
+#define I40E_PRT_MNG_MDEF_EXT_NEIGHBOR_DISCOVERY_137_OR_MASK I40E_MASK(0x1, I40E_PRT_MNG_MDEF_EXT_NEIGHBOR_DISCOVERY_137_OR_SHIFT)
+#define I40E_PRT_MNG_MDEF_EXT_ICMP_OR_SHIFT 28
+#define I40E_PRT_MNG_MDEF_EXT_ICMP_OR_MASK I40E_MASK(0x1, I40E_PRT_MNG_MDEF_EXT_ICMP_OR_SHIFT)
+#define I40E_PRT_MNG_MDEF_EXT_MLD_SHIFT 29
+#define I40E_PRT_MNG_MDEF_EXT_MLD_MASK I40E_MASK(0x1, I40E_PRT_MNG_MDEF_EXT_MLD_SHIFT)
+#define I40E_PRT_MNG_MDEF_EXT_APPLY_TO_NETWORK_TRAFFIC_SHIFT 30
+#define I40E_PRT_MNG_MDEF_EXT_APPLY_TO_NETWORK_TRAFFIC_MASK I40E_MASK(0x1, I40E_PRT_MNG_MDEF_EXT_APPLY_TO_NETWORK_TRAFFIC_SHIFT)
+#define I40E_PRT_MNG_MDEF_EXT_APPLY_TO_HOST_TRAFFIC_SHIFT 31
+#define I40E_PRT_MNG_MDEF_EXT_APPLY_TO_HOST_TRAFFIC_MASK I40E_MASK(0x1, I40E_PRT_MNG_MDEF_EXT_APPLY_TO_HOST_TRAFFIC_SHIFT)
+#define I40E_PRT_MNG_MDEFVSI(_i) (0x00256580 + ((_i) * 32)) /* _i=0...3 */ /* Reset: POR */
+#define I40E_PRT_MNG_MDEFVSI_MAX_INDEX 3
+#define I40E_PRT_MNG_MDEFVSI_MDEFVSI_2N_SHIFT 0
+#define I40E_PRT_MNG_MDEFVSI_MDEFVSI_2N_MASK I40E_MASK(0xFFFF, I40E_PRT_MNG_MDEFVSI_MDEFVSI_2N_SHIFT)
+#define I40E_PRT_MNG_MDEFVSI_MDEFVSI_2NP1_SHIFT 16
+#define I40E_PRT_MNG_MDEFVSI_MDEFVSI_2NP1_MASK I40E_MASK(0xFFFF, I40E_PRT_MNG_MDEFVSI_MDEFVSI_2NP1_SHIFT)
+#define I40E_PRT_MNG_METF(_i) (0x00256780 + ((_i) * 32)) /* _i=0...3 */ /* Reset: POR */
+#define I40E_PRT_MNG_METF_MAX_INDEX 3
+#define I40E_PRT_MNG_METF_ETYPE_SHIFT 0
+#define I40E_PRT_MNG_METF_ETYPE_MASK I40E_MASK(0xFFFF, I40E_PRT_MNG_METF_ETYPE_SHIFT)
+#define I40E_PRT_MNG_METF_POLARITY_SHIFT 30
+#define I40E_PRT_MNG_METF_POLARITY_MASK I40E_MASK(0x1, I40E_PRT_MNG_METF_POLARITY_SHIFT)
+#define I40E_PRT_MNG_MFUTP(_i) (0x00254E00 + ((_i) * 32)) /* _i=0...15 */ /* Reset: POR */
+#define I40E_PRT_MNG_MFUTP_MAX_INDEX 15
+#define I40E_PRT_MNG_MFUTP_MFUTP_N_SHIFT 0
+#define I40E_PRT_MNG_MFUTP_MFUTP_N_MASK I40E_MASK(0xFFFF, I40E_PRT_MNG_MFUTP_MFUTP_N_SHIFT)
+#define I40E_PRT_MNG_MFUTP_UDP_SHIFT 16
+#define I40E_PRT_MNG_MFUTP_UDP_MASK I40E_MASK(0x1, I40E_PRT_MNG_MFUTP_UDP_SHIFT)
+#define I40E_PRT_MNG_MFUTP_TCP_SHIFT 17
+#define I40E_PRT_MNG_MFUTP_TCP_MASK I40E_MASK(0x1, I40E_PRT_MNG_MFUTP_TCP_SHIFT)
+#define I40E_PRT_MNG_MFUTP_SOURCE_DESTINATION_SHIFT 18
+#define I40E_PRT_MNG_MFUTP_SOURCE_DESTINATION_MASK I40E_MASK(0x1, I40E_PRT_MNG_MFUTP_SOURCE_DESTINATION_SHIFT)
+#define I40E_PRT_MNG_MIPAF4(_i) (0x00256280 + ((_i) * 32)) /* _i=0...3 */ /* Reset: POR */
+#define I40E_PRT_MNG_MIPAF4_MAX_INDEX 3
+#define I40E_PRT_MNG_MIPAF4_MIPAF_SHIFT 0
+#define I40E_PRT_MNG_MIPAF4_MIPAF_MASK I40E_MASK(0xFFFFFFFF, I40E_PRT_MNG_MIPAF4_MIPAF_SHIFT)
+#define I40E_PRT_MNG_MIPAF6(_i) (0x00254200 + ((_i) * 32)) /* _i=0...15 */ /* Reset: POR */
+#define I40E_PRT_MNG_MIPAF6_MAX_INDEX 15
+#define I40E_PRT_MNG_MIPAF6_MIPAF_SHIFT 0
+#define I40E_PRT_MNG_MIPAF6_MIPAF_MASK I40E_MASK(0xFFFFFFFF, I40E_PRT_MNG_MIPAF6_MIPAF_SHIFT)
+#define I40E_PRT_MNG_MMAH(_i) (0x00256380 + ((_i) * 32)) /* _i=0...3 */ /* Reset: POR */
+#define I40E_PRT_MNG_MMAH_MAX_INDEX 3
+#define I40E_PRT_MNG_MMAH_MMAH_SHIFT 0
+#define I40E_PRT_MNG_MMAH_MMAH_MASK I40E_MASK(0xFFFF, I40E_PRT_MNG_MMAH_MMAH_SHIFT)
+#define I40E_PRT_MNG_MMAL(_i) (0x00256480 + ((_i) * 32)) /* _i=0...3 */ /* Reset: POR */
+#define I40E_PRT_MNG_MMAL_MAX_INDEX 3
+#define I40E_PRT_MNG_MMAL_MMAL_SHIFT 0
+#define I40E_PRT_MNG_MMAL_MMAL_MASK I40E_MASK(0xFFFFFFFF, I40E_PRT_MNG_MMAL_MMAL_SHIFT)
+#define I40E_PRT_MNG_MNGONLY 0x00256A60 /* Reset: POR */
+#define I40E_PRT_MNG_MNGONLY_EXCLUSIVE_TO_MANAGEABILITY_SHIFT 0
+#define I40E_PRT_MNG_MNGONLY_EXCLUSIVE_TO_MANAGEABILITY_MASK I40E_MASK(0xFF, I40E_PRT_MNG_MNGONLY_EXCLUSIVE_TO_MANAGEABILITY_SHIFT)
+#define I40E_PRT_MNG_MSFM 0x00256AA0 /* Reset: POR */
+#define I40E_PRT_MNG_MSFM_PORT_26F_UDP_SHIFT 0
+#define I40E_PRT_MNG_MSFM_PORT_26F_UDP_MASK I40E_MASK(0x1, I40E_PRT_MNG_MSFM_PORT_26F_UDP_SHIFT)
+#define I40E_PRT_MNG_MSFM_PORT_26F_TCP_SHIFT 1
+#define I40E_PRT_MNG_MSFM_PORT_26F_TCP_MASK I40E_MASK(0x1, I40E_PRT_MNG_MSFM_PORT_26F_TCP_SHIFT)
+#define I40E_PRT_MNG_MSFM_PORT_298_UDP_SHIFT 2
+#define I40E_PRT_MNG_MSFM_PORT_298_UDP_MASK I40E_MASK(0x1, I40E_PRT_MNG_MSFM_PORT_298_UDP_SHIFT)
+#define I40E_PRT_MNG_MSFM_PORT_298_TCP_SHIFT 3
+#define I40E_PRT_MNG_MSFM_PORT_298_TCP_MASK I40E_MASK(0x1, I40E_PRT_MNG_MSFM_PORT_298_TCP_SHIFT)
+#define I40E_PRT_MNG_MSFM_IPV6_0_MASK_SHIFT 4
+#define I40E_PRT_MNG_MSFM_IPV6_0_MASK_MASK I40E_MASK(0x1, I40E_PRT_MNG_MSFM_IPV6_0_MASK_SHIFT)
+#define I40E_PRT_MNG_MSFM_IPV6_1_MASK_SHIFT 5
+#define I40E_PRT_MNG_MSFM_IPV6_1_MASK_MASK I40E_MASK(0x1, I40E_PRT_MNG_MSFM_IPV6_1_MASK_SHIFT)
+#define I40E_PRT_MNG_MSFM_IPV6_2_MASK_SHIFT 6
+#define I40E_PRT_MNG_MSFM_IPV6_2_MASK_MASK I40E_MASK(0x1, I40E_PRT_MNG_MSFM_IPV6_2_MASK_SHIFT)
+#define I40E_PRT_MNG_MSFM_IPV6_3_MASK_SHIFT 7
+#define I40E_PRT_MNG_MSFM_IPV6_3_MASK_MASK I40E_MASK(0x1, I40E_PRT_MNG_MSFM_IPV6_3_MASK_SHIFT)
+#define I40E_MSIX_PBA(_i) (0x00001000 + ((_i) * 4)) /* _i=0...5 */ /* Reset: FLR */
+#define I40E_MSIX_PBA_MAX_INDEX 5
+#define I40E_MSIX_PBA_PENBIT_SHIFT 0
+#define I40E_MSIX_PBA_PENBIT_MASK I40E_MASK(0xFFFFFFFF, I40E_MSIX_PBA_PENBIT_SHIFT)
+#define I40E_MSIX_TADD(_i) (0x00000000 + ((_i) * 16)) /* _i=0...128 */ /* Reset: FLR */
+#define I40E_MSIX_TADD_MAX_INDEX 128
+#define I40E_MSIX_TADD_MSIXTADD10_SHIFT 0
+#define I40E_MSIX_TADD_MSIXTADD10_MASK I40E_MASK(0x3, I40E_MSIX_TADD_MSIXTADD10_SHIFT)
+#define I40E_MSIX_TADD_MSIXTADD_SHIFT 2
+#define I40E_MSIX_TADD_MSIXTADD_MASK I40E_MASK(0x3FFFFFFF, I40E_MSIX_TADD_MSIXTADD_SHIFT)
+#define I40E_MSIX_TMSG(_i) (0x00000008 + ((_i) * 16)) /* _i=0...128 */ /* Reset: FLR */
+#define I40E_MSIX_TMSG_MAX_INDEX 128
+#define I40E_MSIX_TMSG_MSIXTMSG_SHIFT 0
+#define I40E_MSIX_TMSG_MSIXTMSG_MASK I40E_MASK(0xFFFFFFFF, I40E_MSIX_TMSG_MSIXTMSG_SHIFT)
+#define I40E_MSIX_TUADD(_i) (0x00000004 + ((_i) * 16)) /* _i=0...128 */ /* Reset: FLR */
+#define I40E_MSIX_TUADD_MAX_INDEX 128
+#define I40E_MSIX_TUADD_MSIXTUADD_SHIFT 0
+#define I40E_MSIX_TUADD_MSIXTUADD_MASK I40E_MASK(0xFFFFFFFF, I40E_MSIX_TUADD_MSIXTUADD_SHIFT)
+#define I40E_MSIX_TVCTRL(_i) (0x0000000C + ((_i) * 16)) /* _i=0...128 */ /* Reset: FLR */
+#define I40E_MSIX_TVCTRL_MAX_INDEX 128
+#define I40E_MSIX_TVCTRL_MASK_SHIFT 0
+#define I40E_MSIX_TVCTRL_MASK_MASK I40E_MASK(0x1, I40E_MSIX_TVCTRL_MASK_SHIFT)
+#define I40E_VFMSIX_PBA1(_i) (0x00002000 + ((_i) * 4)) /* _i=0...19 */ /* Reset: VFLR */
+#define I40E_VFMSIX_PBA1_MAX_INDEX 19
+#define I40E_VFMSIX_PBA1_PENBIT_SHIFT 0
+#define I40E_VFMSIX_PBA1_PENBIT_MASK I40E_MASK(0xFFFFFFFF, I40E_VFMSIX_PBA1_PENBIT_SHIFT)
+#define I40E_VFMSIX_TADD1(_i) (0x00002100 + ((_i) * 16)) /* _i=0...639 */ /* Reset: VFLR */
+#define I40E_VFMSIX_TADD1_MAX_INDEX 639
+#define I40E_VFMSIX_TADD1_MSIXTADD10_SHIFT 0
+#define I40E_VFMSIX_TADD1_MSIXTADD10_MASK I40E_MASK(0x3, I40E_VFMSIX_TADD1_MSIXTADD10_SHIFT)
+#define I40E_VFMSIX_TADD1_MSIXTADD_SHIFT 2
+#define I40E_VFMSIX_TADD1_MSIXTADD_MASK I40E_MASK(0x3FFFFFFF, I40E_VFMSIX_TADD1_MSIXTADD_SHIFT)
+#define I40E_VFMSIX_TMSG1(_i) (0x00002108 + ((_i) * 16)) /* _i=0...639 */ /* Reset: VFLR */
+#define I40E_VFMSIX_TMSG1_MAX_INDEX 639
+#define I40E_VFMSIX_TMSG1_MSIXTMSG_SHIFT 0
+#define I40E_VFMSIX_TMSG1_MSIXTMSG_MASK I40E_MASK(0xFFFFFFFF, I40E_VFMSIX_TMSG1_MSIXTMSG_SHIFT)
+#define I40E_VFMSIX_TUADD1(_i) (0x00002104 + ((_i) * 16)) /* _i=0...639 */ /* Reset: VFLR */
+#define I40E_VFMSIX_TUADD1_MAX_INDEX 639
+#define I40E_VFMSIX_TUADD1_MSIXTUADD_SHIFT 0
+#define I40E_VFMSIX_TUADD1_MSIXTUADD_MASK I40E_MASK(0xFFFFFFFF, I40E_VFMSIX_TUADD1_MSIXTUADD_SHIFT)
+#define I40E_VFMSIX_TVCTRL1(_i) (0x0000210C + ((_i) * 16)) /* _i=0...639 */ /* Reset: VFLR */
+#define I40E_VFMSIX_TVCTRL1_MAX_INDEX 639
+#define I40E_VFMSIX_TVCTRL1_MASK_SHIFT 0
+#define I40E_VFMSIX_TVCTRL1_MASK_MASK I40E_MASK(0x1, I40E_VFMSIX_TVCTRL1_MASK_SHIFT)
+#define I40E_GLNVM_FLA 0x000B6108 /* Reset: POR */
+#define I40E_GLNVM_FLA_FL_SCK_SHIFT 0
+#define I40E_GLNVM_FLA_FL_SCK_MASK I40E_MASK(0x1, I40E_GLNVM_FLA_FL_SCK_SHIFT)
+#define I40E_GLNVM_FLA_FL_CE_SHIFT 1
+#define I40E_GLNVM_FLA_FL_CE_MASK I40E_MASK(0x1, I40E_GLNVM_FLA_FL_CE_SHIFT)
+#define I40E_GLNVM_FLA_FL_SI_SHIFT 2
+#define I40E_GLNVM_FLA_FL_SI_MASK I40E_MASK(0x1, I40E_GLNVM_FLA_FL_SI_SHIFT)
+#define I40E_GLNVM_FLA_FL_SO_SHIFT 3
+#define I40E_GLNVM_FLA_FL_SO_MASK I40E_MASK(0x1, I40E_GLNVM_FLA_FL_SO_SHIFT)
+#define I40E_GLNVM_FLA_FL_REQ_SHIFT 4
+#define I40E_GLNVM_FLA_FL_REQ_MASK I40E_MASK(0x1, I40E_GLNVM_FLA_FL_REQ_SHIFT)
+#define I40E_GLNVM_FLA_FL_GNT_SHIFT 5
+#define I40E_GLNVM_FLA_FL_GNT_MASK I40E_MASK(0x1, I40E_GLNVM_FLA_FL_GNT_SHIFT)
+#define I40E_GLNVM_FLA_LOCKED_SHIFT 6
+#define I40E_GLNVM_FLA_LOCKED_MASK I40E_MASK(0x1, I40E_GLNVM_FLA_LOCKED_SHIFT)
+#define I40E_GLNVM_FLA_FL_SADDR_SHIFT 18
+#define I40E_GLNVM_FLA_FL_SADDR_MASK I40E_MASK(0x7FF, I40E_GLNVM_FLA_FL_SADDR_SHIFT)
+#define I40E_GLNVM_FLA_FL_BUSY_SHIFT 30
+#define I40E_GLNVM_FLA_FL_BUSY_MASK I40E_MASK(0x1, I40E_GLNVM_FLA_FL_BUSY_SHIFT)
+#define I40E_GLNVM_FLA_FL_DER_SHIFT 31
+#define I40E_GLNVM_FLA_FL_DER_MASK I40E_MASK(0x1, I40E_GLNVM_FLA_FL_DER_SHIFT)
+#define I40E_GLNVM_FLASHID 0x000B6104 /* Reset: POR */
+#define I40E_GLNVM_FLASHID_FLASHID_SHIFT 0
+#define I40E_GLNVM_FLASHID_FLASHID_MASK I40E_MASK(0xFFFFFF, I40E_GLNVM_FLASHID_FLASHID_SHIFT)
+#define I40E_GLNVM_FLASHID_FLEEP_PERF_SHIFT 31
+#define I40E_GLNVM_FLASHID_FLEEP_PERF_MASK I40E_MASK(0x1, I40E_GLNVM_FLASHID_FLEEP_PERF_SHIFT)
+#define I40E_GLNVM_GENS 0x000B6100 /* Reset: POR */
+#define I40E_GLNVM_GENS_NVM_PRES_SHIFT 0
+#define I40E_GLNVM_GENS_NVM_PRES_MASK I40E_MASK(0x1, I40E_GLNVM_GENS_NVM_PRES_SHIFT)
+#define I40E_GLNVM_GENS_SR_SIZE_SHIFT 5
+#define I40E_GLNVM_GENS_SR_SIZE_MASK I40E_MASK(0x7, I40E_GLNVM_GENS_SR_SIZE_SHIFT)
+#define I40E_GLNVM_GENS_BANK1VAL_SHIFT 8
+#define I40E_GLNVM_GENS_BANK1VAL_MASK I40E_MASK(0x1, I40E_GLNVM_GENS_BANK1VAL_SHIFT)
+#define I40E_GLNVM_GENS_ALT_PRST_SHIFT 23
+#define I40E_GLNVM_GENS_ALT_PRST_MASK I40E_MASK(0x1, I40E_GLNVM_GENS_ALT_PRST_SHIFT)
+#define I40E_GLNVM_GENS_FL_AUTO_RD_SHIFT 25
+#define I40E_GLNVM_GENS_FL_AUTO_RD_MASK I40E_MASK(0x1, I40E_GLNVM_GENS_FL_AUTO_RD_SHIFT)
+#define I40E_GLNVM_PROTCSR(_i) (0x000B6010 + ((_i) * 4)) /* _i=0...59 */ /* Reset: POR */
+#define I40E_GLNVM_PROTCSR_MAX_INDEX 59
+#define I40E_GLNVM_PROTCSR_ADDR_BLOCK_SHIFT 0
+#define I40E_GLNVM_PROTCSR_ADDR_BLOCK_MASK I40E_MASK(0xFFFFFF, I40E_GLNVM_PROTCSR_ADDR_BLOCK_SHIFT)
+#define I40E_GLNVM_SRCTL 0x000B6110 /* Reset: POR */
+#define I40E_GLNVM_SRCTL_SRBUSY_SHIFT 0
+#define I40E_GLNVM_SRCTL_SRBUSY_MASK I40E_MASK(0x1, I40E_GLNVM_SRCTL_SRBUSY_SHIFT)
+#define I40E_GLNVM_SRCTL_ADDR_SHIFT 14
+#define I40E_GLNVM_SRCTL_ADDR_MASK I40E_MASK(0x7FFF, I40E_GLNVM_SRCTL_ADDR_SHIFT)
+#define I40E_GLNVM_SRCTL_WRITE_SHIFT 29
+#define I40E_GLNVM_SRCTL_WRITE_MASK I40E_MASK(0x1, I40E_GLNVM_SRCTL_WRITE_SHIFT)
+#define I40E_GLNVM_SRCTL_START_SHIFT 30
+#define I40E_GLNVM_SRCTL_START_MASK I40E_MASK(0x1, I40E_GLNVM_SRCTL_START_SHIFT)
+#define I40E_GLNVM_SRCTL_DONE_SHIFT 31
+#define I40E_GLNVM_SRCTL_DONE_MASK I40E_MASK(0x1, I40E_GLNVM_SRCTL_DONE_SHIFT)
+#define I40E_GLNVM_SRDATA 0x000B6114 /* Reset: POR */
+#define I40E_GLNVM_SRDATA_WRDATA_SHIFT 0
+#define I40E_GLNVM_SRDATA_WRDATA_MASK I40E_MASK(0xFFFF, I40E_GLNVM_SRDATA_WRDATA_SHIFT)
+#define I40E_GLNVM_SRDATA_RDDATA_SHIFT 16
+#define I40E_GLNVM_SRDATA_RDDATA_MASK I40E_MASK(0xFFFF, I40E_GLNVM_SRDATA_RDDATA_SHIFT)
+#define I40E_GLNVM_ULD 0x000B6008 /* Reset: POR */
+#define I40E_GLNVM_ULD_CONF_PCIR_DONE_SHIFT 0
+#define I40E_GLNVM_ULD_CONF_PCIR_DONE_MASK I40E_MASK(0x1, I40E_GLNVM_ULD_CONF_PCIR_DONE_SHIFT)
+#define I40E_GLNVM_ULD_CONF_PCIRTL_DONE_SHIFT 1
+#define I40E_GLNVM_ULD_CONF_PCIRTL_DONE_MASK I40E_MASK(0x1, I40E_GLNVM_ULD_CONF_PCIRTL_DONE_SHIFT)
+#define I40E_GLNVM_ULD_CONF_LCB_DONE_SHIFT 2
+#define I40E_GLNVM_ULD_CONF_LCB_DONE_MASK I40E_MASK(0x1, I40E_GLNVM_ULD_CONF_LCB_DONE_SHIFT)
+#define I40E_GLNVM_ULD_CONF_CORE_DONE_SHIFT 3
+#define I40E_GLNVM_ULD_CONF_CORE_DONE_MASK I40E_MASK(0x1, I40E_GLNVM_ULD_CONF_CORE_DONE_SHIFT)
+#define I40E_GLNVM_ULD_CONF_GLOBAL_DONE_SHIFT 4
+#define I40E_GLNVM_ULD_CONF_GLOBAL_DONE_MASK I40E_MASK(0x1, I40E_GLNVM_ULD_CONF_GLOBAL_DONE_SHIFT)
+#define I40E_GLNVM_ULD_CONF_POR_DONE_SHIFT 5
+#define I40E_GLNVM_ULD_CONF_POR_DONE_MASK I40E_MASK(0x1, I40E_GLNVM_ULD_CONF_POR_DONE_SHIFT)
+#define I40E_GLNVM_ULD_CONF_PCIE_ANA_DONE_SHIFT 6
+#define I40E_GLNVM_ULD_CONF_PCIE_ANA_DONE_MASK I40E_MASK(0x1, I40E_GLNVM_ULD_CONF_PCIE_ANA_DONE_SHIFT)
+#define I40E_GLNVM_ULD_CONF_PHY_ANA_DONE_SHIFT 7
+#define I40E_GLNVM_ULD_CONF_PHY_ANA_DONE_MASK I40E_MASK(0x1, I40E_GLNVM_ULD_CONF_PHY_ANA_DONE_SHIFT)
+#define I40E_GLNVM_ULD_CONF_EMP_DONE_SHIFT 8
+#define I40E_GLNVM_ULD_CONF_EMP_DONE_MASK I40E_MASK(0x1, I40E_GLNVM_ULD_CONF_EMP_DONE_SHIFT)
+#define I40E_GLNVM_ULD_CONF_PCIALT_DONE_SHIFT 9
+#define I40E_GLNVM_ULD_CONF_PCIALT_DONE_MASK I40E_MASK(0x1, I40E_GLNVM_ULD_CONF_PCIALT_DONE_SHIFT)
+#define I40E_GLPCI_BYTCTH 0x0009C484 /* Reset: PCIR */
+#define I40E_GLPCI_BYTCTH_PCI_COUNT_BW_BCT_SHIFT 0
+#define I40E_GLPCI_BYTCTH_PCI_COUNT_BW_BCT_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPCI_BYTCTH_PCI_COUNT_BW_BCT_SHIFT)
+#define I40E_GLPCI_BYTCTL 0x0009C488 /* Reset: PCIR */
+#define I40E_GLPCI_BYTCTL_PCI_COUNT_BW_BCT_SHIFT 0
+#define I40E_GLPCI_BYTCTL_PCI_COUNT_BW_BCT_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPCI_BYTCTL_PCI_COUNT_BW_BCT_SHIFT)
+#define I40E_GLPCI_CAPCTRL 0x000BE4A4 /* Reset: PCIR */
+#define I40E_GLPCI_CAPCTRL_VPD_EN_SHIFT 0
+#define I40E_GLPCI_CAPCTRL_VPD_EN_MASK I40E_MASK(0x1, I40E_GLPCI_CAPCTRL_VPD_EN_SHIFT)
+#define I40E_GLPCI_CAPSUP 0x000BE4A8 /* Reset: PCIR */
+#define I40E_GLPCI_CAPSUP_PCIE_VER_SHIFT 0
+#define I40E_GLPCI_CAPSUP_PCIE_VER_MASK I40E_MASK(0x1, I40E_GLPCI_CAPSUP_PCIE_VER_SHIFT)
+#define I40E_GLPCI_CAPSUP_LTR_EN_SHIFT 2
+#define I40E_GLPCI_CAPSUP_LTR_EN_MASK I40E_MASK(0x1, I40E_GLPCI_CAPSUP_LTR_EN_SHIFT)
+#define I40E_GLPCI_CAPSUP_TPH_EN_SHIFT 3
+#define I40E_GLPCI_CAPSUP_TPH_EN_MASK I40E_MASK(0x1, I40E_GLPCI_CAPSUP_TPH_EN_SHIFT)
+#define I40E_GLPCI_CAPSUP_ARI_EN_SHIFT 4
+#define I40E_GLPCI_CAPSUP_ARI_EN_MASK I40E_MASK(0x1, I40E_GLPCI_CAPSUP_ARI_EN_SHIFT)
+#define I40E_GLPCI_CAPSUP_IOV_EN_SHIFT 5
+#define I40E_GLPCI_CAPSUP_IOV_EN_MASK I40E_MASK(0x1, I40E_GLPCI_CAPSUP_IOV_EN_SHIFT)
+#define I40E_GLPCI_CAPSUP_ACS_EN_SHIFT 6
+#define I40E_GLPCI_CAPSUP_ACS_EN_MASK I40E_MASK(0x1, I40E_GLPCI_CAPSUP_ACS_EN_SHIFT)
+#define I40E_GLPCI_CAPSUP_SEC_EN_SHIFT 7
+#define I40E_GLPCI_CAPSUP_SEC_EN_MASK I40E_MASK(0x1, I40E_GLPCI_CAPSUP_SEC_EN_SHIFT)
+#define I40E_GLPCI_CAPSUP_ECRC_GEN_EN_SHIFT 16
+#define I40E_GLPCI_CAPSUP_ECRC_GEN_EN_MASK I40E_MASK(0x1, I40E_GLPCI_CAPSUP_ECRC_GEN_EN_SHIFT)
+#define I40E_GLPCI_CAPSUP_ECRC_CHK_EN_SHIFT 17
+#define I40E_GLPCI_CAPSUP_ECRC_CHK_EN_MASK I40E_MASK(0x1, I40E_GLPCI_CAPSUP_ECRC_CHK_EN_SHIFT)
+#define I40E_GLPCI_CAPSUP_IDO_EN_SHIFT 18
+#define I40E_GLPCI_CAPSUP_IDO_EN_MASK I40E_MASK(0x1, I40E_GLPCI_CAPSUP_IDO_EN_SHIFT)
+#define I40E_GLPCI_CAPSUP_MSI_MASK_SHIFT 19
+#define I40E_GLPCI_CAPSUP_MSI_MASK_MASK I40E_MASK(0x1, I40E_GLPCI_CAPSUP_MSI_MASK_SHIFT)
+#define I40E_GLPCI_CAPSUP_CSR_CONF_EN_SHIFT 20
+#define I40E_GLPCI_CAPSUP_CSR_CONF_EN_MASK I40E_MASK(0x1, I40E_GLPCI_CAPSUP_CSR_CONF_EN_SHIFT)
+#define I40E_GLPCI_CAPSUP_LOAD_SUBSYS_ID_SHIFT 30
+#define I40E_GLPCI_CAPSUP_LOAD_SUBSYS_ID_MASK I40E_MASK(0x1, I40E_GLPCI_CAPSUP_LOAD_SUBSYS_ID_SHIFT)
+#define I40E_GLPCI_CAPSUP_LOAD_DEV_ID_SHIFT 31
+#define I40E_GLPCI_CAPSUP_LOAD_DEV_ID_MASK I40E_MASK(0x1, I40E_GLPCI_CAPSUP_LOAD_DEV_ID_SHIFT)
+#define I40E_GLPCI_CNF 0x000BE4C0 /* Reset: POR */
+#define I40E_GLPCI_CNF_FLEX10_SHIFT 1
+#define I40E_GLPCI_CNF_FLEX10_MASK I40E_MASK(0x1, I40E_GLPCI_CNF_FLEX10_SHIFT)
+#define I40E_GLPCI_CNF_WAKE_PIN_EN_SHIFT 2
+#define I40E_GLPCI_CNF_WAKE_PIN_EN_MASK I40E_MASK(0x1, I40E_GLPCI_CNF_WAKE_PIN_EN_SHIFT)
+#define I40E_GLPCI_CNF2 0x000BE494 /* Reset: PCIR */
+#define I40E_GLPCI_CNF2_RO_DIS_SHIFT 0
+#define I40E_GLPCI_CNF2_RO_DIS_MASK I40E_MASK(0x1, I40E_GLPCI_CNF2_RO_DIS_SHIFT)
+#define I40E_GLPCI_CNF2_CACHELINE_SIZE_SHIFT 1
+#define I40E_GLPCI_CNF2_CACHELINE_SIZE_MASK I40E_MASK(0x1, I40E_GLPCI_CNF2_CACHELINE_SIZE_SHIFT)
+#define I40E_GLPCI_CNF2_MSI_X_PF_N_SHIFT 2
+#define I40E_GLPCI_CNF2_MSI_X_PF_N_MASK I40E_MASK(0x7FF, I40E_GLPCI_CNF2_MSI_X_PF_N_SHIFT)
+#define I40E_GLPCI_CNF2_MSI_X_VF_N_SHIFT 13
+#define I40E_GLPCI_CNF2_MSI_X_VF_N_MASK I40E_MASK(0x7FF, I40E_GLPCI_CNF2_MSI_X_VF_N_SHIFT)
+#define I40E_GLPCI_DREVID 0x0009C480 /* Reset: PCIR */
+#define I40E_GLPCI_DREVID_DEFAULT_REVID_SHIFT 0
+#define I40E_GLPCI_DREVID_DEFAULT_REVID_MASK I40E_MASK(0xFF, I40E_GLPCI_DREVID_DEFAULT_REVID_SHIFT)
+#define I40E_GLPCI_GSCL_1 0x0009C48C /* Reset: PCIR */
+#define I40E_GLPCI_GSCL_1_GIO_COUNT_EN_0_SHIFT 0
+#define I40E_GLPCI_GSCL_1_GIO_COUNT_EN_0_MASK I40E_MASK(0x1, I40E_GLPCI_GSCL_1_GIO_COUNT_EN_0_SHIFT)
+#define I40E_GLPCI_GSCL_1_GIO_COUNT_EN_1_SHIFT 1
+#define I40E_GLPCI_GSCL_1_GIO_COUNT_EN_1_MASK I40E_MASK(0x1, I40E_GLPCI_GSCL_1_GIO_COUNT_EN_1_SHIFT)
+#define I40E_GLPCI_GSCL_1_GIO_COUNT_EN_2_SHIFT 2
+#define I40E_GLPCI_GSCL_1_GIO_COUNT_EN_2_MASK I40E_MASK(0x1, I40E_GLPCI_GSCL_1_GIO_COUNT_EN_2_SHIFT)
+#define I40E_GLPCI_GSCL_1_GIO_COUNT_EN_3_SHIFT 3
+#define I40E_GLPCI_GSCL_1_GIO_COUNT_EN_3_MASK I40E_MASK(0x1, I40E_GLPCI_GSCL_1_GIO_COUNT_EN_3_SHIFT)
+#define I40E_GLPCI_GSCL_1_LBC_ENABLE_0_SHIFT 4
+#define I40E_GLPCI_GSCL_1_LBC_ENABLE_0_MASK I40E_MASK(0x1, I40E_GLPCI_GSCL_1_LBC_ENABLE_0_SHIFT)
+#define I40E_GLPCI_GSCL_1_LBC_ENABLE_1_SHIFT 5
+#define I40E_GLPCI_GSCL_1_LBC_ENABLE_1_MASK I40E_MASK(0x1, I40E_GLPCI_GSCL_1_LBC_ENABLE_1_SHIFT)
+#define I40E_GLPCI_GSCL_1_LBC_ENABLE_2_SHIFT 6
+#define I40E_GLPCI_GSCL_1_LBC_ENABLE_2_MASK I40E_MASK(0x1, I40E_GLPCI_GSCL_1_LBC_ENABLE_2_SHIFT)
+#define I40E_GLPCI_GSCL_1_LBC_ENABLE_3_SHIFT 7
+#define I40E_GLPCI_GSCL_1_LBC_ENABLE_3_MASK I40E_MASK(0x1, I40E_GLPCI_GSCL_1_LBC_ENABLE_3_SHIFT)
+#define I40E_GLPCI_GSCL_1_PCI_COUNT_LAT_EN_SHIFT 8
+#define I40E_GLPCI_GSCL_1_PCI_COUNT_LAT_EN_MASK I40E_MASK(0x1, I40E_GLPCI_GSCL_1_PCI_COUNT_LAT_EN_SHIFT)
+#define I40E_GLPCI_GSCL_1_PCI_COUNT_LAT_EV_SHIFT 9
+#define I40E_GLPCI_GSCL_1_PCI_COUNT_LAT_EV_MASK I40E_MASK(0x1F, I40E_GLPCI_GSCL_1_PCI_COUNT_LAT_EV_SHIFT)
+#define I40E_GLPCI_GSCL_1_PCI_COUNT_BW_EN_SHIFT 14
+#define I40E_GLPCI_GSCL_1_PCI_COUNT_BW_EN_MASK I40E_MASK(0x1, I40E_GLPCI_GSCL_1_PCI_COUNT_BW_EN_SHIFT)
+#define I40E_GLPCI_GSCL_1_PCI_COUNT_BW_EV_SHIFT 15
+#define I40E_GLPCI_GSCL_1_PCI_COUNT_BW_EV_MASK I40E_MASK(0x1F, I40E_GLPCI_GSCL_1_PCI_COUNT_BW_EV_SHIFT)
+#define I40E_GLPCI_GSCL_1_GIO_64_BIT_EN_SHIFT 28
+#define I40E_GLPCI_GSCL_1_GIO_64_BIT_EN_MASK I40E_MASK(0x1, I40E_GLPCI_GSCL_1_GIO_64_BIT_EN_SHIFT)
+#define I40E_GLPCI_GSCL_1_GIO_COUNT_RESET_SHIFT 29
+#define I40E_GLPCI_GSCL_1_GIO_COUNT_RESET_MASK I40E_MASK(0x1, I40E_GLPCI_GSCL_1_GIO_COUNT_RESET_SHIFT)
+#define I40E_GLPCI_GSCL_1_GIO_COUNT_STOP_SHIFT 30
+#define I40E_GLPCI_GSCL_1_GIO_COUNT_STOP_MASK I40E_MASK(0x1, I40E_GLPCI_GSCL_1_GIO_COUNT_STOP_SHIFT)
+#define I40E_GLPCI_GSCL_1_GIO_COUNT_START_SHIFT 31
+#define I40E_GLPCI_GSCL_1_GIO_COUNT_START_MASK I40E_MASK(0x1, I40E_GLPCI_GSCL_1_GIO_COUNT_START_SHIFT)
+#define I40E_GLPCI_GSCL_2 0x0009C490 /* Reset: PCIR */
+#define I40E_GLPCI_GSCL_2_GIO_EVENT_NUM_0_SHIFT 0
+#define I40E_GLPCI_GSCL_2_GIO_EVENT_NUM_0_MASK I40E_MASK(0xFF, I40E_GLPCI_GSCL_2_GIO_EVENT_NUM_0_SHIFT)
+#define I40E_GLPCI_GSCL_2_GIO_EVENT_NUM_1_SHIFT 8
+#define I40E_GLPCI_GSCL_2_GIO_EVENT_NUM_1_MASK I40E_MASK(0xFF, I40E_GLPCI_GSCL_2_GIO_EVENT_NUM_1_SHIFT)
+#define I40E_GLPCI_GSCL_2_GIO_EVENT_NUM_2_SHIFT 16
+#define I40E_GLPCI_GSCL_2_GIO_EVENT_NUM_2_MASK I40E_MASK(0xFF, I40E_GLPCI_GSCL_2_GIO_EVENT_NUM_2_SHIFT)
+#define I40E_GLPCI_GSCL_2_GIO_EVENT_NUM_3_SHIFT 24
+#define I40E_GLPCI_GSCL_2_GIO_EVENT_NUM_3_MASK I40E_MASK(0xFF, I40E_GLPCI_GSCL_2_GIO_EVENT_NUM_3_SHIFT)
+#define I40E_GLPCI_GSCL_5_8(_i) (0x0009C494 + ((_i) * 4)) /* _i=0...3 */ /* Reset: PCIR */
+#define I40E_GLPCI_GSCL_5_8_MAX_INDEX 3
+#define I40E_GLPCI_GSCL_5_8_LBC_THRESHOLD_N_SHIFT 0
+#define I40E_GLPCI_GSCL_5_8_LBC_THRESHOLD_N_MASK I40E_MASK(0xFFFF, I40E_GLPCI_GSCL_5_8_LBC_THRESHOLD_N_SHIFT)
+#define I40E_GLPCI_GSCL_5_8_LBC_TIMER_N_SHIFT 16
+#define I40E_GLPCI_GSCL_5_8_LBC_TIMER_N_MASK I40E_MASK(0xFFFF, I40E_GLPCI_GSCL_5_8_LBC_TIMER_N_SHIFT)
+#define I40E_GLPCI_GSCN_0_3(_i) (0x0009C4A4 + ((_i) * 4)) /* _i=0...3 */ /* Reset: PCIR */
+#define I40E_GLPCI_GSCN_0_3_MAX_INDEX 3
+#define I40E_GLPCI_GSCN_0_3_EVENT_COUNTER_SHIFT 0
+#define I40E_GLPCI_GSCN_0_3_EVENT_COUNTER_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPCI_GSCN_0_3_EVENT_COUNTER_SHIFT)
+#define I40E_GLPCI_LBARCTRL 0x000BE484 /* Reset: POR */
+#define I40E_GLPCI_LBARCTRL_PREFBAR_SHIFT 0
+#define I40E_GLPCI_LBARCTRL_PREFBAR_MASK I40E_MASK(0x1, I40E_GLPCI_LBARCTRL_PREFBAR_SHIFT)
+#define I40E_GLPCI_LBARCTRL_BAR32_SHIFT 1
+#define I40E_GLPCI_LBARCTRL_BAR32_MASK I40E_MASK(0x1, I40E_GLPCI_LBARCTRL_BAR32_SHIFT)
+#define I40E_GLPCI_LBARCTRL_FLASH_EXPOSE_SHIFT 3
+#define I40E_GLPCI_LBARCTRL_FLASH_EXPOSE_MASK I40E_MASK(0x1, I40E_GLPCI_LBARCTRL_FLASH_EXPOSE_SHIFT)
+#define I40E_GLPCI_LBARCTRL_RSVD_4_SHIFT 4
+#define I40E_GLPCI_LBARCTRL_RSVD_4_MASK I40E_MASK(0x3, I40E_GLPCI_LBARCTRL_RSVD_4_SHIFT)
+#define I40E_GLPCI_LBARCTRL_FL_SIZE_SHIFT 6
+#define I40E_GLPCI_LBARCTRL_FL_SIZE_MASK I40E_MASK(0x7, I40E_GLPCI_LBARCTRL_FL_SIZE_SHIFT)
+#define I40E_GLPCI_LBARCTRL_RSVD_10_SHIFT 10
+#define I40E_GLPCI_LBARCTRL_RSVD_10_MASK I40E_MASK(0x1, I40E_GLPCI_LBARCTRL_RSVD_10_SHIFT)
+#define I40E_GLPCI_LBARCTRL_EXROM_SIZE_SHIFT 11
+#define I40E_GLPCI_LBARCTRL_EXROM_SIZE_MASK I40E_MASK(0x7, I40E_GLPCI_LBARCTRL_EXROM_SIZE_SHIFT)
+#define I40E_GLPCI_LINKCAP 0x000BE4AC /* Reset: PCIR */
+#define I40E_GLPCI_LINKCAP_LINK_SPEEDS_VECTOR_SHIFT 0
+#define I40E_GLPCI_LINKCAP_LINK_SPEEDS_VECTOR_MASK I40E_MASK(0x3F, I40E_GLPCI_LINKCAP_LINK_SPEEDS_VECTOR_SHIFT)
+#define I40E_GLPCI_LINKCAP_MAX_PAYLOAD_SHIFT 6
+#define I40E_GLPCI_LINKCAP_MAX_PAYLOAD_MASK I40E_MASK(0x7, I40E_GLPCI_LINKCAP_MAX_PAYLOAD_SHIFT)
+#define I40E_GLPCI_LINKCAP_MAX_LINK_WIDTH_SHIFT 9
+#define I40E_GLPCI_LINKCAP_MAX_LINK_WIDTH_MASK I40E_MASK(0xF, I40E_GLPCI_LINKCAP_MAX_LINK_WIDTH_SHIFT)
+#define I40E_GLPCI_PCIERR 0x000BE4FC /* Reset: PCIR */
+#define I40E_GLPCI_PCIERR_PCIE_ERR_REP_SHIFT 0
+#define I40E_GLPCI_PCIERR_PCIE_ERR_REP_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPCI_PCIERR_PCIE_ERR_REP_SHIFT)
+#define I40E_GLPCI_PKTCT 0x0009C4BC /* Reset: PCIR */
+#define I40E_GLPCI_PKTCT_PCI_COUNT_BW_PCT_SHIFT 0
+#define I40E_GLPCI_PKTCT_PCI_COUNT_BW_PCT_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPCI_PKTCT_PCI_COUNT_BW_PCT_SHIFT)
+#define I40E_GLPCI_PM_MUX_NPQ 0x0009C4F4 /* Reset: PCIR */
+#define I40E_GLPCI_PM_MUX_NPQ_NPQ_NUM_PORT_SEL_SHIFT 0
+#define I40E_GLPCI_PM_MUX_NPQ_NPQ_NUM_PORT_SEL_MASK I40E_MASK(0x7, I40E_GLPCI_PM_MUX_NPQ_NPQ_NUM_PORT_SEL_SHIFT)
+#define I40E_GLPCI_PM_MUX_NPQ_INNER_NPQ_SEL_SHIFT 16
+#define I40E_GLPCI_PM_MUX_NPQ_INNER_NPQ_SEL_MASK I40E_MASK(0x1F, I40E_GLPCI_PM_MUX_NPQ_INNER_NPQ_SEL_SHIFT)
+#define I40E_GLPCI_PM_MUX_PFB 0x0009C4F0 /* Reset: PCIR */
+#define I40E_GLPCI_PM_MUX_PFB_PFB_PORT_SEL_SHIFT 0
+#define I40E_GLPCI_PM_MUX_PFB_PFB_PORT_SEL_MASK I40E_MASK(0x1F, I40E_GLPCI_PM_MUX_PFB_PFB_PORT_SEL_SHIFT)
+#define I40E_GLPCI_PM_MUX_PFB_INNER_PORT_SEL_SHIFT 16
+#define I40E_GLPCI_PM_MUX_PFB_INNER_PORT_SEL_MASK I40E_MASK(0x7, I40E_GLPCI_PM_MUX_PFB_INNER_PORT_SEL_SHIFT)
+#define I40E_GLPCI_PMSUP 0x000BE4B0 /* Reset: PCIR */
+#define I40E_GLPCI_PMSUP_ASPM_SUP_SHIFT 0
+#define I40E_GLPCI_PMSUP_ASPM_SUP_MASK I40E_MASK(0x3, I40E_GLPCI_PMSUP_ASPM_SUP_SHIFT)
+#define I40E_GLPCI_PMSUP_L0S_EXIT_LAT_SHIFT 2
+#define I40E_GLPCI_PMSUP_L0S_EXIT_LAT_MASK I40E_MASK(0x7, I40E_GLPCI_PMSUP_L0S_EXIT_LAT_SHIFT)
+#define I40E_GLPCI_PMSUP_L1_EXIT_LAT_SHIFT 5
+#define I40E_GLPCI_PMSUP_L1_EXIT_LAT_MASK I40E_MASK(0x7, I40E_GLPCI_PMSUP_L1_EXIT_LAT_SHIFT)
+#define I40E_GLPCI_PMSUP_L0S_ACC_LAT_SHIFT 8
+#define I40E_GLPCI_PMSUP_L0S_ACC_LAT_MASK I40E_MASK(0x7, I40E_GLPCI_PMSUP_L0S_ACC_LAT_SHIFT)
+#define I40E_GLPCI_PMSUP_L1_ACC_LAT_SHIFT 11
+#define I40E_GLPCI_PMSUP_L1_ACC_LAT_MASK I40E_MASK(0x7, I40E_GLPCI_PMSUP_L1_ACC_LAT_SHIFT)
+#define I40E_GLPCI_PMSUP_SLOT_CLK_SHIFT 14
+#define I40E_GLPCI_PMSUP_SLOT_CLK_MASK I40E_MASK(0x1, I40E_GLPCI_PMSUP_SLOT_CLK_SHIFT)
+#define I40E_GLPCI_PMSUP_OBFF_SUP_SHIFT 15
+#define I40E_GLPCI_PMSUP_OBFF_SUP_MASK I40E_MASK(0x3, I40E_GLPCI_PMSUP_OBFF_SUP_SHIFT)
+#define I40E_GLPCI_PQ_MAX_USED_SPC 0x0009C4EC /* Reset: PCIR */
+#define I40E_GLPCI_PQ_MAX_USED_SPC_GLPCI_PQ_MAX_USED_SPC_12_SHIFT 0
+#define I40E_GLPCI_PQ_MAX_USED_SPC_GLPCI_PQ_MAX_USED_SPC_12_MASK I40E_MASK(0xFF, I40E_GLPCI_PQ_MAX_USED_SPC_GLPCI_PQ_MAX_USED_SPC_12_SHIFT)
+#define I40E_GLPCI_PQ_MAX_USED_SPC_GLPCI_PQ_MAX_USED_SPC_13_SHIFT 8
+#define I40E_GLPCI_PQ_MAX_USED_SPC_GLPCI_PQ_MAX_USED_SPC_13_MASK I40E_MASK(0xFF, I40E_GLPCI_PQ_MAX_USED_SPC_GLPCI_PQ_MAX_USED_SPC_13_SHIFT)
+#define I40E_GLPCI_PWRDATA 0x000BE490 /* Reset: PCIR */
+#define I40E_GLPCI_PWRDATA_D0_POWER_SHIFT 0
+#define I40E_GLPCI_PWRDATA_D0_POWER_MASK I40E_MASK(0xFF, I40E_GLPCI_PWRDATA_D0_POWER_SHIFT)
+#define I40E_GLPCI_PWRDATA_COMM_POWER_SHIFT 8
+#define I40E_GLPCI_PWRDATA_COMM_POWER_MASK I40E_MASK(0xFF, I40E_GLPCI_PWRDATA_COMM_POWER_SHIFT)
+#define I40E_GLPCI_PWRDATA_D3_POWER_SHIFT 16
+#define I40E_GLPCI_PWRDATA_D3_POWER_MASK I40E_MASK(0xFF, I40E_GLPCI_PWRDATA_D3_POWER_SHIFT)
+#define I40E_GLPCI_PWRDATA_DATA_SCALE_SHIFT 24
+#define I40E_GLPCI_PWRDATA_DATA_SCALE_MASK I40E_MASK(0x3, I40E_GLPCI_PWRDATA_DATA_SCALE_SHIFT)
+#define I40E_GLPCI_REVID 0x000BE4B4 /* Reset: PCIR */
+#define I40E_GLPCI_REVID_NVM_REVID_SHIFT 0
+#define I40E_GLPCI_REVID_NVM_REVID_MASK I40E_MASK(0xFF, I40E_GLPCI_REVID_NVM_REVID_SHIFT)
+#define I40E_GLPCI_SERH 0x000BE49C /* Reset: PCIR */
+#define I40E_GLPCI_SERH_SER_NUM_H_SHIFT 0
+#define I40E_GLPCI_SERH_SER_NUM_H_MASK I40E_MASK(0xFFFF, I40E_GLPCI_SERH_SER_NUM_H_SHIFT)
+#define I40E_GLPCI_SERL 0x000BE498 /* Reset: PCIR */
+#define I40E_GLPCI_SERL_SER_NUM_L_SHIFT 0
+#define I40E_GLPCI_SERL_SER_NUM_L_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPCI_SERL_SER_NUM_L_SHIFT)
+#define I40E_GLPCI_SPARE_BITS_0 0x0009C4F8 /* Reset: PCIR */
+#define I40E_GLPCI_SPARE_BITS_0_SPARE_BITS_SHIFT 0
+#define I40E_GLPCI_SPARE_BITS_0_SPARE_BITS_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPCI_SPARE_BITS_0_SPARE_BITS_SHIFT)
+#define I40E_GLPCI_SPARE_BITS_1 0x0009C4FC /* Reset: PCIR */
+#define I40E_GLPCI_SPARE_BITS_1_SPARE_BITS_SHIFT 0
+#define I40E_GLPCI_SPARE_BITS_1_SPARE_BITS_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPCI_SPARE_BITS_1_SPARE_BITS_SHIFT)
+#define I40E_GLPCI_SUBVENID 0x000BE48C /* Reset: PCIR */
+#define I40E_GLPCI_SUBVENID_SUB_VEN_ID_SHIFT 0
+#define I40E_GLPCI_SUBVENID_SUB_VEN_ID_MASK I40E_MASK(0xFFFF, I40E_GLPCI_SUBVENID_SUB_VEN_ID_SHIFT)
+#define I40E_GLPCI_UPADD 0x000BE4F8 /* Reset: PCIR */
+#define I40E_GLPCI_UPADD_ADDRESS_SHIFT 1
+#define I40E_GLPCI_UPADD_ADDRESS_MASK I40E_MASK(0x7FFFFFFF, I40E_GLPCI_UPADD_ADDRESS_SHIFT)
+#define I40E_GLPCI_VENDORID 0x000BE518 /* Reset: PCIR */
+#define I40E_GLPCI_VENDORID_VENDORID_SHIFT 0
+#define I40E_GLPCI_VENDORID_VENDORID_MASK I40E_MASK(0xFFFF, I40E_GLPCI_VENDORID_VENDORID_SHIFT)
+#define I40E_GLPCI_VFSUP 0x000BE4B8 /* Reset: PCIR */
+#define I40E_GLPCI_VFSUP_VF_PREFETCH_SHIFT 0
+#define I40E_GLPCI_VFSUP_VF_PREFETCH_MASK I40E_MASK(0x1, I40E_GLPCI_VFSUP_VF_PREFETCH_SHIFT)
+#define I40E_GLPCI_VFSUP_VR_BAR_TYPE_SHIFT 1
+#define I40E_GLPCI_VFSUP_VR_BAR_TYPE_MASK I40E_MASK(0x1, I40E_GLPCI_VFSUP_VR_BAR_TYPE_SHIFT)
+#define I40E_GLTPH_CTRL 0x000BE480 /* Reset: PCIR */
+#define I40E_GLTPH_CTRL_DESC_PH_SHIFT 9
+#define I40E_GLTPH_CTRL_DESC_PH_MASK I40E_MASK(0x3, I40E_GLTPH_CTRL_DESC_PH_SHIFT)
+#define I40E_GLTPH_CTRL_DATA_PH_SHIFT 11
+#define I40E_GLTPH_CTRL_DATA_PH_MASK I40E_MASK(0x3, I40E_GLTPH_CTRL_DATA_PH_SHIFT)
+#define I40E_PF_FUNC_RID 0x0009C000 /* Reset: PCIR */
+#define I40E_PF_FUNC_RID_FUNCTION_NUMBER_SHIFT 0
+#define I40E_PF_FUNC_RID_FUNCTION_NUMBER_MASK I40E_MASK(0x7, I40E_PF_FUNC_RID_FUNCTION_NUMBER_SHIFT)
+#define I40E_PF_FUNC_RID_DEVICE_NUMBER_SHIFT 3
+#define I40E_PF_FUNC_RID_DEVICE_NUMBER_MASK I40E_MASK(0x1F, I40E_PF_FUNC_RID_DEVICE_NUMBER_SHIFT)
+#define I40E_PF_FUNC_RID_BUS_NUMBER_SHIFT 8
+#define I40E_PF_FUNC_RID_BUS_NUMBER_MASK I40E_MASK(0xFF, I40E_PF_FUNC_RID_BUS_NUMBER_SHIFT)
+#define I40E_PF_PCI_CIAA 0x0009C080 /* Reset: FLR */
+#define I40E_PF_PCI_CIAA_ADDRESS_SHIFT 0
+#define I40E_PF_PCI_CIAA_ADDRESS_MASK I40E_MASK(0xFFF, I40E_PF_PCI_CIAA_ADDRESS_SHIFT)
+#define I40E_PF_PCI_CIAA_VF_NUM_SHIFT 12
+#define I40E_PF_PCI_CIAA_VF_NUM_MASK I40E_MASK(0x7F, I40E_PF_PCI_CIAA_VF_NUM_SHIFT)
+#define I40E_PF_PCI_CIAD 0x0009C100 /* Reset: FLR */
+#define I40E_PF_PCI_CIAD_DATA_SHIFT 0
+#define I40E_PF_PCI_CIAD_DATA_MASK I40E_MASK(0xFFFFFFFF, I40E_PF_PCI_CIAD_DATA_SHIFT)
+#define I40E_PFPCI_CLASS 0x000BE400 /* Reset: PCIR */
+#define I40E_PFPCI_CLASS_STORAGE_CLASS_SHIFT 0
+#define I40E_PFPCI_CLASS_STORAGE_CLASS_MASK I40E_MASK(0x1, I40E_PFPCI_CLASS_STORAGE_CLASS_SHIFT)
+#define I40E_PFPCI_CLASS_RESERVED_1_SHIFT 1
+#define I40E_PFPCI_CLASS_RESERVED_1_MASK I40E_MASK(0x1, I40E_PFPCI_CLASS_RESERVED_1_SHIFT)
+#define I40E_PFPCI_CLASS_PF_IS_LAN_SHIFT 2
+#define I40E_PFPCI_CLASS_PF_IS_LAN_MASK I40E_MASK(0x1, I40E_PFPCI_CLASS_PF_IS_LAN_SHIFT)
+#define I40E_PFPCI_CNF 0x000BE000 /* Reset: PCIR */
+#define I40E_PFPCI_CNF_MSI_EN_SHIFT 2
+#define I40E_PFPCI_CNF_MSI_EN_MASK I40E_MASK(0x1, I40E_PFPCI_CNF_MSI_EN_SHIFT)
+#define I40E_PFPCI_CNF_EXROM_DIS_SHIFT 3
+#define I40E_PFPCI_CNF_EXROM_DIS_MASK I40E_MASK(0x1, I40E_PFPCI_CNF_EXROM_DIS_SHIFT)
+#define I40E_PFPCI_CNF_IO_BAR_SHIFT 4
+#define I40E_PFPCI_CNF_IO_BAR_MASK I40E_MASK(0x1, I40E_PFPCI_CNF_IO_BAR_SHIFT)
+#define I40E_PFPCI_CNF_INT_PIN_SHIFT 5
+#define I40E_PFPCI_CNF_INT_PIN_MASK I40E_MASK(0x3, I40E_PFPCI_CNF_INT_PIN_SHIFT)
+#define I40E_PFPCI_DEVID 0x000BE080 /* Reset: PCIR */
+#define I40E_PFPCI_DEVID_PF_DEV_ID_SHIFT 0
+#define I40E_PFPCI_DEVID_PF_DEV_ID_MASK I40E_MASK(0xFFFF, I40E_PFPCI_DEVID_PF_DEV_ID_SHIFT)
+#define I40E_PFPCI_DEVID_VF_DEV_ID_SHIFT 16
+#define I40E_PFPCI_DEVID_VF_DEV_ID_MASK I40E_MASK(0xFFFF, I40E_PFPCI_DEVID_VF_DEV_ID_SHIFT)
+#define I40E_PFPCI_FACTPS 0x0009C180 /* Reset: FLR */
+#define I40E_PFPCI_FACTPS_FUNC_POWER_STATE_SHIFT 0
+#define I40E_PFPCI_FACTPS_FUNC_POWER_STATE_MASK I40E_MASK(0x3, I40E_PFPCI_FACTPS_FUNC_POWER_STATE_SHIFT)
+#define I40E_PFPCI_FACTPS_FUNC_AUX_EN_SHIFT 3
+#define I40E_PFPCI_FACTPS_FUNC_AUX_EN_MASK I40E_MASK(0x1, I40E_PFPCI_FACTPS_FUNC_AUX_EN_SHIFT)
+#define I40E_PFPCI_FUNC 0x000BE200 /* Reset: POR */
+#define I40E_PFPCI_FUNC_FUNC_DIS_SHIFT 0
+#define I40E_PFPCI_FUNC_FUNC_DIS_MASK I40E_MASK(0x1, I40E_PFPCI_FUNC_FUNC_DIS_SHIFT)
+#define I40E_PFPCI_FUNC_ALLOW_FUNC_DIS_SHIFT 1
+#define I40E_PFPCI_FUNC_ALLOW_FUNC_DIS_MASK I40E_MASK(0x1, I40E_PFPCI_FUNC_ALLOW_FUNC_DIS_SHIFT)
+#define I40E_PFPCI_FUNC_DIS_FUNC_ON_PORT_DIS_SHIFT 2
+#define I40E_PFPCI_FUNC_DIS_FUNC_ON_PORT_DIS_MASK I40E_MASK(0x1, I40E_PFPCI_FUNC_DIS_FUNC_ON_PORT_DIS_SHIFT)
+#define I40E_PFPCI_FUNC2 0x000BE180 /* Reset: PCIR */
+#define I40E_PFPCI_FUNC2_EMP_FUNC_DIS_SHIFT 0
+#define I40E_PFPCI_FUNC2_EMP_FUNC_DIS_MASK I40E_MASK(0x1, I40E_PFPCI_FUNC2_EMP_FUNC_DIS_SHIFT)
+#define I40E_PFPCI_ICAUSE 0x0009C200 /* Reset: PFR */
+#define I40E_PFPCI_ICAUSE_PCIE_ERR_CAUSE_SHIFT 0
+#define I40E_PFPCI_ICAUSE_PCIE_ERR_CAUSE_MASK I40E_MASK(0xFFFFFFFF, I40E_PFPCI_ICAUSE_PCIE_ERR_CAUSE_SHIFT)
+#define I40E_PFPCI_IENA 0x0009C280 /* Reset: PFR */
+#define I40E_PFPCI_IENA_PCIE_ERR_EN_SHIFT 0
+#define I40E_PFPCI_IENA_PCIE_ERR_EN_MASK I40E_MASK(0xFFFFFFFF, I40E_PFPCI_IENA_PCIE_ERR_EN_SHIFT)
+#define I40E_PFPCI_PF_FLUSH_DONE 0x0009C800 /* Reset: PCIR */
+#define I40E_PFPCI_PF_FLUSH_DONE_FLUSH_DONE_SHIFT 0
+#define I40E_PFPCI_PF_FLUSH_DONE_FLUSH_DONE_MASK I40E_MASK(0x1, I40E_PFPCI_PF_FLUSH_DONE_FLUSH_DONE_SHIFT)
+#define I40E_PFPCI_PM 0x000BE300 /* Reset: POR */
+#define I40E_PFPCI_PM_PME_EN_SHIFT 0
+#define I40E_PFPCI_PM_PME_EN_MASK I40E_MASK(0x1, I40E_PFPCI_PM_PME_EN_SHIFT)
+#define I40E_PFPCI_STATUS1 0x000BE280 /* Reset: POR */
+#define I40E_PFPCI_STATUS1_FUNC_VALID_SHIFT 0
+#define I40E_PFPCI_STATUS1_FUNC_VALID_MASK I40E_MASK(0x1, I40E_PFPCI_STATUS1_FUNC_VALID_SHIFT)
+#define I40E_PFPCI_SUBSYSID 0x000BE100 /* Reset: PCIR */
+#define I40E_PFPCI_SUBSYSID_PF_SUBSYS_ID_SHIFT 0
+#define I40E_PFPCI_SUBSYSID_PF_SUBSYS_ID_MASK I40E_MASK(0xFFFF, I40E_PFPCI_SUBSYSID_PF_SUBSYS_ID_SHIFT)
+#define I40E_PFPCI_SUBSYSID_VF_SUBSYS_ID_SHIFT 16
+#define I40E_PFPCI_SUBSYSID_VF_SUBSYS_ID_MASK I40E_MASK(0xFFFF, I40E_PFPCI_SUBSYSID_VF_SUBSYS_ID_SHIFT)
+#define I40E_PFPCI_VF_FLUSH_DONE 0x0000E400 /* Reset: PCIR */
+#define I40E_PFPCI_VF_FLUSH_DONE_FLUSH_DONE_SHIFT 0
+#define I40E_PFPCI_VF_FLUSH_DONE_FLUSH_DONE_MASK I40E_MASK(0x1, I40E_PFPCI_VF_FLUSH_DONE_FLUSH_DONE_SHIFT)
+#define I40E_PFPCI_VF_FLUSH_DONE1(_VF) (0x0009C600 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: PCIR */
+#define I40E_PFPCI_VF_FLUSH_DONE1_MAX_INDEX 127
+#define I40E_PFPCI_VF_FLUSH_DONE1_FLUSH_DONE_SHIFT 0
+#define I40E_PFPCI_VF_FLUSH_DONE1_FLUSH_DONE_MASK I40E_MASK(0x1, I40E_PFPCI_VF_FLUSH_DONE1_FLUSH_DONE_SHIFT)
+#define I40E_PFPCI_VM_FLUSH_DONE 0x0009C880 /* Reset: PCIR */
+#define I40E_PFPCI_VM_FLUSH_DONE_FLUSH_DONE_SHIFT 0
+#define I40E_PFPCI_VM_FLUSH_DONE_FLUSH_DONE_MASK I40E_MASK(0x1, I40E_PFPCI_VM_FLUSH_DONE_FLUSH_DONE_SHIFT)
+#define I40E_PFPCI_VMINDEX 0x0009C300 /* Reset: PCIR */
+#define I40E_PFPCI_VMINDEX_VMINDEX_SHIFT 0
+#define I40E_PFPCI_VMINDEX_VMINDEX_MASK I40E_MASK(0x1FF, I40E_PFPCI_VMINDEX_VMINDEX_SHIFT)
+#define I40E_PFPCI_VMPEND 0x0009C380 /* Reset: PCIR */
+#define I40E_PFPCI_VMPEND_PENDING_SHIFT 0
+#define I40E_PFPCI_VMPEND_PENDING_MASK I40E_MASK(0x1, I40E_PFPCI_VMPEND_PENDING_SHIFT)
+#define I40E_PRTPM_EEE_STAT 0x001E4320 /* Reset: GLOBR */
+#define I40E_PRTPM_EEE_STAT_EEE_NEG_SHIFT 29
+#define I40E_PRTPM_EEE_STAT_EEE_NEG_MASK I40E_MASK(0x1, I40E_PRTPM_EEE_STAT_EEE_NEG_SHIFT)
+#define I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_SHIFT 30
+#define I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_MASK I40E_MASK(0x1, I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_SHIFT)
+#define I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_SHIFT 31
+#define I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_MASK I40E_MASK(0x1, I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_SHIFT)
+#define I40E_PRTPM_EEEC 0x001E4380 /* Reset: GLOBR */
+#define I40E_PRTPM_EEEC_TW_WAKE_MIN_SHIFT 16
+#define I40E_PRTPM_EEEC_TW_WAKE_MIN_MASK I40E_MASK(0x3F, I40E_PRTPM_EEEC_TW_WAKE_MIN_SHIFT)
+#define I40E_PRTPM_EEEC_TX_LU_LPI_DLY_SHIFT 24
+#define I40E_PRTPM_EEEC_TX_LU_LPI_DLY_MASK I40E_MASK(0x3, I40E_PRTPM_EEEC_TX_LU_LPI_DLY_SHIFT)
+#define I40E_PRTPM_EEEC_TEEE_DLY_SHIFT 26
+#define I40E_PRTPM_EEEC_TEEE_DLY_MASK I40E_MASK(0x3F, I40E_PRTPM_EEEC_TEEE_DLY_SHIFT)
+#define I40E_PRTPM_EEEFWD 0x001E4400 /* Reset: GLOBR */
+#define I40E_PRTPM_EEEFWD_EEE_FW_CONFIG_DONE_SHIFT 31
+#define I40E_PRTPM_EEEFWD_EEE_FW_CONFIG_DONE_MASK I40E_MASK(0x1, I40E_PRTPM_EEEFWD_EEE_FW_CONFIG_DONE_SHIFT)
+#define I40E_PRTPM_EEER 0x001E4360 /* Reset: GLOBR */
+#define I40E_PRTPM_EEER_TW_SYSTEM_SHIFT 0
+#define I40E_PRTPM_EEER_TW_SYSTEM_MASK I40E_MASK(0xFFFF, I40E_PRTPM_EEER_TW_SYSTEM_SHIFT)
+#define I40E_PRTPM_EEER_TX_LPI_EN_SHIFT 16
+#define I40E_PRTPM_EEER_TX_LPI_EN_MASK I40E_MASK(0x1, I40E_PRTPM_EEER_TX_LPI_EN_SHIFT)
+#define I40E_PRTPM_EEETXC 0x001E43E0 /* Reset: GLOBR */
+#define I40E_PRTPM_EEETXC_TW_PHY_SHIFT 0
+#define I40E_PRTPM_EEETXC_TW_PHY_MASK I40E_MASK(0xFFFF, I40E_PRTPM_EEETXC_TW_PHY_SHIFT)
+#define I40E_PRTPM_GC 0x000B8140 /* Reset: POR */
+#define I40E_PRTPM_GC_EMP_LINK_ON_SHIFT 0
+#define I40E_PRTPM_GC_EMP_LINK_ON_MASK I40E_MASK(0x1, I40E_PRTPM_GC_EMP_LINK_ON_SHIFT)
+#define I40E_PRTPM_GC_MNG_VETO_SHIFT 1
+#define I40E_PRTPM_GC_MNG_VETO_MASK I40E_MASK(0x1, I40E_PRTPM_GC_MNG_VETO_SHIFT)
+#define I40E_PRTPM_GC_RATD_SHIFT 2
+#define I40E_PRTPM_GC_RATD_MASK I40E_MASK(0x1, I40E_PRTPM_GC_RATD_SHIFT)
+#define I40E_PRTPM_GC_LCDMP_SHIFT 3
+#define I40E_PRTPM_GC_LCDMP_MASK I40E_MASK(0x1, I40E_PRTPM_GC_LCDMP_SHIFT)
+#define I40E_PRTPM_GC_LPLU_ASSERTED_SHIFT 31
+#define I40E_PRTPM_GC_LPLU_ASSERTED_MASK I40E_MASK(0x1, I40E_PRTPM_GC_LPLU_ASSERTED_SHIFT)
+#define I40E_PRTPM_RLPIC 0x001E43A0 /* Reset: GLOBR */
+#define I40E_PRTPM_RLPIC_ERLPIC_SHIFT 0
+#define I40E_PRTPM_RLPIC_ERLPIC_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTPM_RLPIC_ERLPIC_SHIFT)
+#define I40E_PRTPM_TLPIC 0x001E43C0 /* Reset: GLOBR */
+#define I40E_PRTPM_TLPIC_ETLPIC_SHIFT 0
+#define I40E_PRTPM_TLPIC_ETLPIC_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTPM_TLPIC_ETLPIC_SHIFT)
+#define I40E_GLRPB_DPSS 0x000AC828 /* Reset: CORER */
+#define I40E_GLRPB_DPSS_DPS_TCN_SHIFT 0
+#define I40E_GLRPB_DPSS_DPS_TCN_MASK I40E_MASK(0xFFFFF, I40E_GLRPB_DPSS_DPS_TCN_SHIFT)
+#define I40E_GLRPB_GHW 0x000AC830 /* Reset: CORER */
+#define I40E_GLRPB_GHW_GHW_SHIFT 0
+#define I40E_GLRPB_GHW_GHW_MASK I40E_MASK(0xFFFFF, I40E_GLRPB_GHW_GHW_SHIFT)
+#define I40E_GLRPB_GLW 0x000AC834 /* Reset: CORER */
+#define I40E_GLRPB_GLW_GLW_SHIFT 0
+#define I40E_GLRPB_GLW_GLW_MASK I40E_MASK(0xFFFFF, I40E_GLRPB_GLW_GLW_SHIFT)
+#define I40E_GLRPB_PHW 0x000AC844 /* Reset: CORER */
+#define I40E_GLRPB_PHW_PHW_SHIFT 0
+#define I40E_GLRPB_PHW_PHW_MASK I40E_MASK(0xFFFFF, I40E_GLRPB_PHW_PHW_SHIFT)
+#define I40E_GLRPB_PLW 0x000AC848 /* Reset: CORER */
+#define I40E_GLRPB_PLW_PLW_SHIFT 0
+#define I40E_GLRPB_PLW_PLW_MASK I40E_MASK(0xFFFFF, I40E_GLRPB_PLW_PLW_SHIFT)
+#define I40E_PRTRPB_DHW(_i) (0x000AC100 + ((_i) * 32)) /* _i=0...7 */ /* Reset: CORER */
+#define I40E_PRTRPB_DHW_MAX_INDEX 7
+#define I40E_PRTRPB_DHW_DHW_TCN_SHIFT 0
+#define I40E_PRTRPB_DHW_DHW_TCN_MASK I40E_MASK(0xFFFFF, I40E_PRTRPB_DHW_DHW_TCN_SHIFT)
+#define I40E_PRTRPB_DLW(_i) (0x000AC220 + ((_i) * 32)) /* _i=0...7 */ /* Reset: CORER */
+#define I40E_PRTRPB_DLW_MAX_INDEX 7
+#define I40E_PRTRPB_DLW_DLW_TCN_SHIFT 0
+#define I40E_PRTRPB_DLW_DLW_TCN_MASK I40E_MASK(0xFFFFF, I40E_PRTRPB_DLW_DLW_TCN_SHIFT)
+#define I40E_PRTRPB_DPS(_i) (0x000AC320 + ((_i) * 32)) /* _i=0...7 */ /* Reset: CORER */
+#define I40E_PRTRPB_DPS_MAX_INDEX 7
+#define I40E_PRTRPB_DPS_DPS_TCN_SHIFT 0
+#define I40E_PRTRPB_DPS_DPS_TCN_MASK I40E_MASK(0xFFFFF, I40E_PRTRPB_DPS_DPS_TCN_SHIFT)
+#define I40E_PRTRPB_SHT(_i) (0x000AC480 + ((_i) * 32)) /* _i=0...7 */ /* Reset: CORER */
+#define I40E_PRTRPB_SHT_MAX_INDEX 7
+#define I40E_PRTRPB_SHT_SHT_TCN_SHIFT 0
+#define I40E_PRTRPB_SHT_SHT_TCN_MASK I40E_MASK(0xFFFFF, I40E_PRTRPB_SHT_SHT_TCN_SHIFT)
+#define I40E_PRTRPB_SHW 0x000AC580 /* Reset: CORER */
+#define I40E_PRTRPB_SHW_SHW_SHIFT 0
+#define I40E_PRTRPB_SHW_SHW_MASK I40E_MASK(0xFFFFF, I40E_PRTRPB_SHW_SHW_SHIFT)
+#define I40E_PRTRPB_SLT(_i) (0x000AC5A0 + ((_i) * 32)) /* _i=0...7 */ /* Reset: CORER */
+#define I40E_PRTRPB_SLT_MAX_INDEX 7
+#define I40E_PRTRPB_SLT_SLT_TCN_SHIFT 0
+#define I40E_PRTRPB_SLT_SLT_TCN_MASK I40E_MASK(0xFFFFF, I40E_PRTRPB_SLT_SLT_TCN_SHIFT)
+#define I40E_PRTRPB_SLW 0x000AC6A0 /* Reset: CORER */
+#define I40E_PRTRPB_SLW_SLW_SHIFT 0
+#define I40E_PRTRPB_SLW_SLW_MASK I40E_MASK(0xFFFFF, I40E_PRTRPB_SLW_SLW_SHIFT)
+#define I40E_PRTRPB_SPS 0x000AC7C0 /* Reset: CORER */
+#define I40E_PRTRPB_SPS_SPS_SHIFT 0
+#define I40E_PRTRPB_SPS_SPS_MASK I40E_MASK(0xFFFFF, I40E_PRTRPB_SPS_SPS_SHIFT)
+#define I40E_GLQF_CTL 0x00269BA4 /* Reset: CORER */
+#define I40E_GLQF_CTL_HTOEP_SHIFT 1
+#define I40E_GLQF_CTL_HTOEP_MASK I40E_MASK(0x1, I40E_GLQF_CTL_HTOEP_SHIFT)
+#define I40E_GLQF_CTL_HTOEP_FCOE_SHIFT 2
+#define I40E_GLQF_CTL_HTOEP_FCOE_MASK I40E_MASK(0x1, I40E_GLQF_CTL_HTOEP_FCOE_SHIFT)
+#define I40E_GLQF_CTL_PCNT_ALLOC_SHIFT 3
+#define I40E_GLQF_CTL_PCNT_ALLOC_MASK I40E_MASK(0x7, I40E_GLQF_CTL_PCNT_ALLOC_SHIFT)
+#define I40E_GLQF_CTL_FD_AUTO_PCTYPE_SHIFT 6
+#define I40E_GLQF_CTL_FD_AUTO_PCTYPE_MASK I40E_MASK(0x1, I40E_GLQF_CTL_FD_AUTO_PCTYPE_SHIFT)
+#define I40E_GLQF_CTL_RSVD_SHIFT 7
+#define I40E_GLQF_CTL_RSVD_MASK I40E_MASK(0x1, I40E_GLQF_CTL_RSVD_SHIFT)
+#define I40E_GLQF_CTL_MAXPEBLEN_SHIFT 8
+#define I40E_GLQF_CTL_MAXPEBLEN_MASK I40E_MASK(0x7, I40E_GLQF_CTL_MAXPEBLEN_SHIFT)
+#define I40E_GLQF_CTL_MAXFCBLEN_SHIFT 11
+#define I40E_GLQF_CTL_MAXFCBLEN_MASK I40E_MASK(0x7, I40E_GLQF_CTL_MAXFCBLEN_SHIFT)
+#define I40E_GLQF_CTL_MAXFDBLEN_SHIFT 14
+#define I40E_GLQF_CTL_MAXFDBLEN_MASK I40E_MASK(0x7, I40E_GLQF_CTL_MAXFDBLEN_SHIFT)
+#define I40E_GLQF_CTL_FDBEST_SHIFT 17
+#define I40E_GLQF_CTL_FDBEST_MASK I40E_MASK(0xFF, I40E_GLQF_CTL_FDBEST_SHIFT)
+#define I40E_GLQF_CTL_PROGPRIO_SHIFT 25
+#define I40E_GLQF_CTL_PROGPRIO_MASK I40E_MASK(0x1, I40E_GLQF_CTL_PROGPRIO_SHIFT)
+#define I40E_GLQF_CTL_INVALPRIO_SHIFT 26
+#define I40E_GLQF_CTL_INVALPRIO_MASK I40E_MASK(0x1, I40E_GLQF_CTL_INVALPRIO_SHIFT)
+#define I40E_GLQF_CTL_IGNORE_IP_SHIFT 27
+#define I40E_GLQF_CTL_IGNORE_IP_MASK I40E_MASK(0x1, I40E_GLQF_CTL_IGNORE_IP_SHIFT)
+#define I40E_GLQF_FDCNT_0 0x00269BAC /* Reset: CORER */
+#define I40E_GLQF_FDCNT_0_GUARANT_CNT_SHIFT 0
+#define I40E_GLQF_FDCNT_0_GUARANT_CNT_MASK I40E_MASK(0x1FFF, I40E_GLQF_FDCNT_0_GUARANT_CNT_SHIFT)
+#define I40E_GLQF_FDCNT_0_BESTCNT_SHIFT 13
+#define I40E_GLQF_FDCNT_0_BESTCNT_MASK I40E_MASK(0x1FFF, I40E_GLQF_FDCNT_0_BESTCNT_SHIFT)
+#define I40E_GLQF_HKEY(_i) (0x00270140 + ((_i) * 4)) /* _i=0...12 */ /* Reset: CORER */
+#define I40E_GLQF_HKEY_MAX_INDEX 12
+#define I40E_GLQF_HKEY_KEY_0_SHIFT 0
+#define I40E_GLQF_HKEY_KEY_0_MASK I40E_MASK(0xFF, I40E_GLQF_HKEY_KEY_0_SHIFT)
+#define I40E_GLQF_HKEY_KEY_1_SHIFT 8
+#define I40E_GLQF_HKEY_KEY_1_MASK I40E_MASK(0xFF, I40E_GLQF_HKEY_KEY_1_SHIFT)
+#define I40E_GLQF_HKEY_KEY_2_SHIFT 16
+#define I40E_GLQF_HKEY_KEY_2_MASK I40E_MASK(0xFF, I40E_GLQF_HKEY_KEY_2_SHIFT)
+#define I40E_GLQF_HKEY_KEY_3_SHIFT 24
+#define I40E_GLQF_HKEY_KEY_3_MASK I40E_MASK(0xFF, I40E_GLQF_HKEY_KEY_3_SHIFT)
+#define I40E_GLQF_HSYM(_i) (0x00269D00 + ((_i) * 4)) /* _i=0...63 */ /* Reset: CORER */
+#define I40E_GLQF_HSYM_MAX_INDEX 63
+#define I40E_GLQF_HSYM_SYMH_ENA_SHIFT 0
+#define I40E_GLQF_HSYM_SYMH_ENA_MASK I40E_MASK(0x1, I40E_GLQF_HSYM_SYMH_ENA_SHIFT)
+#define I40E_GLQF_PCNT(_i) (0x00266800 + ((_i) * 4)) /* _i=0...511 */ /* Reset: CORER */
+#define I40E_GLQF_PCNT_MAX_INDEX 511
+#define I40E_GLQF_PCNT_PCNT_SHIFT 0
+#define I40E_GLQF_PCNT_PCNT_MASK I40E_MASK(0xFFFFFFFF, I40E_GLQF_PCNT_PCNT_SHIFT)
+#define I40E_GLQF_SWAP(_i, _j) (0x00267E00 + ((_i) * 4 + (_j) * 8)) /* _i=0...1, _j=0...63 */ /* Reset: CORER */
+#define I40E_GLQF_SWAP_MAX_INDEX 1
+#define I40E_GLQF_SWAP_OFF0_SRC0_SHIFT 0
+#define I40E_GLQF_SWAP_OFF0_SRC0_MASK I40E_MASK(0x3F, I40E_GLQF_SWAP_OFF0_SRC0_SHIFT)
+#define I40E_GLQF_SWAP_OFF0_SRC1_SHIFT 6
+#define I40E_GLQF_SWAP_OFF0_SRC1_MASK I40E_MASK(0x3F, I40E_GLQF_SWAP_OFF0_SRC1_SHIFT)
+#define I40E_GLQF_SWAP_FLEN0_SHIFT 12
+#define I40E_GLQF_SWAP_FLEN0_MASK I40E_MASK(0xF, I40E_GLQF_SWAP_FLEN0_SHIFT)
+#define I40E_GLQF_SWAP_OFF1_SRC0_SHIFT 16
+#define I40E_GLQF_SWAP_OFF1_SRC0_MASK I40E_MASK(0x3F, I40E_GLQF_SWAP_OFF1_SRC0_SHIFT)
+#define I40E_GLQF_SWAP_OFF1_SRC1_SHIFT 22
+#define I40E_GLQF_SWAP_OFF1_SRC1_MASK I40E_MASK(0x3F, I40E_GLQF_SWAP_OFF1_SRC1_SHIFT)
+#define I40E_GLQF_SWAP_FLEN1_SHIFT 28
+#define I40E_GLQF_SWAP_FLEN1_MASK I40E_MASK(0xF, I40E_GLQF_SWAP_FLEN1_SHIFT)
+#define I40E_PFQF_CTL_0 0x001C0AC0 /* Reset: CORER */
+#define I40E_PFQF_CTL_0_PEHSIZE_SHIFT 0
+#define I40E_PFQF_CTL_0_PEHSIZE_MASK I40E_MASK(0x1F, I40E_PFQF_CTL_0_PEHSIZE_SHIFT)
+#define I40E_PFQF_CTL_0_PEDSIZE_SHIFT 5
+#define I40E_PFQF_CTL_0_PEDSIZE_MASK I40E_MASK(0x1F, I40E_PFQF_CTL_0_PEDSIZE_SHIFT)
+#define I40E_PFQF_CTL_0_PFFCHSIZE_SHIFT 10
+#define I40E_PFQF_CTL_0_PFFCHSIZE_MASK I40E_MASK(0xF, I40E_PFQF_CTL_0_PFFCHSIZE_SHIFT)
+#define I40E_PFQF_CTL_0_PFFCDSIZE_SHIFT 14
+#define I40E_PFQF_CTL_0_PFFCDSIZE_MASK I40E_MASK(0x3, I40E_PFQF_CTL_0_PFFCDSIZE_SHIFT)
+#define I40E_PFQF_CTL_0_HASHLUTSIZE_SHIFT 16
+#define I40E_PFQF_CTL_0_HASHLUTSIZE_MASK I40E_MASK(0x1, I40E_PFQF_CTL_0_HASHLUTSIZE_SHIFT)
+#define I40E_PFQF_CTL_0_FD_ENA_SHIFT 17
+#define I40E_PFQF_CTL_0_FD_ENA_MASK I40E_MASK(0x1, I40E_PFQF_CTL_0_FD_ENA_SHIFT)
+#define I40E_PFQF_CTL_0_ETYPE_ENA_SHIFT 18
+#define I40E_PFQF_CTL_0_ETYPE_ENA_MASK I40E_MASK(0x1, I40E_PFQF_CTL_0_ETYPE_ENA_SHIFT)
+#define I40E_PFQF_CTL_0_MACVLAN_ENA_SHIFT 19
+#define I40E_PFQF_CTL_0_MACVLAN_ENA_MASK I40E_MASK(0x1, I40E_PFQF_CTL_0_MACVLAN_ENA_SHIFT)
+#define I40E_PFQF_CTL_0_VFFCHSIZE_SHIFT 20
+#define I40E_PFQF_CTL_0_VFFCHSIZE_MASK I40E_MASK(0xF, I40E_PFQF_CTL_0_VFFCHSIZE_SHIFT)
+#define I40E_PFQF_CTL_0_VFFCDSIZE_SHIFT 24
+#define I40E_PFQF_CTL_0_VFFCDSIZE_MASK I40E_MASK(0x3, I40E_PFQF_CTL_0_VFFCDSIZE_SHIFT)
+#define I40E_PFQF_CTL_1 0x00245D80 /* Reset: CORER */
+#define I40E_PFQF_CTL_1_CLEARFDTABLE_SHIFT 0
+#define I40E_PFQF_CTL_1_CLEARFDTABLE_MASK I40E_MASK(0x1, I40E_PFQF_CTL_1_CLEARFDTABLE_SHIFT)
+#define I40E_PFQF_FDALLOC 0x00246280 /* Reset: CORER */
+#define I40E_PFQF_FDALLOC_FDALLOC_SHIFT 0
+#define I40E_PFQF_FDALLOC_FDALLOC_MASK I40E_MASK(0xFF, I40E_PFQF_FDALLOC_FDALLOC_SHIFT)
+#define I40E_PFQF_FDALLOC_FDBEST_SHIFT 8
+#define I40E_PFQF_FDALLOC_FDBEST_MASK I40E_MASK(0xFF, I40E_PFQF_FDALLOC_FDBEST_SHIFT)
+#define I40E_PFQF_FDSTAT 0x00246380 /* Reset: CORER */
+#define I40E_PFQF_FDSTAT_GUARANT_CNT_SHIFT 0
+#define I40E_PFQF_FDSTAT_GUARANT_CNT_MASK I40E_MASK(0x1FFF, I40E_PFQF_FDSTAT_GUARANT_CNT_SHIFT)
+#define I40E_PFQF_FDSTAT_BEST_CNT_SHIFT 16
+#define I40E_PFQF_FDSTAT_BEST_CNT_MASK I40E_MASK(0x1FFF, I40E_PFQF_FDSTAT_BEST_CNT_SHIFT)
+#define I40E_PFQF_HENA(_i) (0x00245900 + ((_i) * 128)) /* _i=0...1 */ /* Reset: CORER */
+#define I40E_PFQF_HENA_MAX_INDEX 1
+#define I40E_PFQF_HENA_PTYPE_ENA_SHIFT 0
+#define I40E_PFQF_HENA_PTYPE_ENA_MASK I40E_MASK(0xFFFFFFFF, I40E_PFQF_HENA_PTYPE_ENA_SHIFT)
+#define I40E_PFQF_HKEY(_i) (0x00244800 + ((_i) * 128)) /* _i=0...12 */ /* Reset: CORER */
+#define I40E_PFQF_HKEY_MAX_INDEX 12
+#define I40E_PFQF_HKEY_KEY_0_SHIFT 0
+#define I40E_PFQF_HKEY_KEY_0_MASK I40E_MASK(0xFF, I40E_PFQF_HKEY_KEY_0_SHIFT)
+#define I40E_PFQF_HKEY_KEY_1_SHIFT 8
+#define I40E_PFQF_HKEY_KEY_1_MASK I40E_MASK(0xFF, I40E_PFQF_HKEY_KEY_1_SHIFT)
+#define I40E_PFQF_HKEY_KEY_2_SHIFT 16
+#define I40E_PFQF_HKEY_KEY_2_MASK I40E_MASK(0xFF, I40E_PFQF_HKEY_KEY_2_SHIFT)
+#define I40E_PFQF_HKEY_KEY_3_SHIFT 24
+#define I40E_PFQF_HKEY_KEY_3_MASK I40E_MASK(0xFF, I40E_PFQF_HKEY_KEY_3_SHIFT)
+#define I40E_PFQF_HLUT(_i) (0x00240000 + ((_i) * 128)) /* _i=0...127 */ /* Reset: CORER */
+#define I40E_PFQF_HLUT_MAX_INDEX 127
+#define I40E_PFQF_HLUT_LUT0_SHIFT 0
+#define I40E_PFQF_HLUT_LUT0_MASK I40E_MASK(0x3F, I40E_PFQF_HLUT_LUT0_SHIFT)
+#define I40E_PFQF_HLUT_LUT1_SHIFT 8
+#define I40E_PFQF_HLUT_LUT1_MASK I40E_MASK(0x3F, I40E_PFQF_HLUT_LUT1_SHIFT)
+#define I40E_PFQF_HLUT_LUT2_SHIFT 16
+#define I40E_PFQF_HLUT_LUT2_MASK I40E_MASK(0x3F, I40E_PFQF_HLUT_LUT2_SHIFT)
+#define I40E_PFQF_HLUT_LUT3_SHIFT 24
+#define I40E_PFQF_HLUT_LUT3_MASK I40E_MASK(0x3F, I40E_PFQF_HLUT_LUT3_SHIFT)
+#define I40E_PRTQF_CTL_0 0x00256E60 /* Reset: CORER */
+#define I40E_PRTQF_CTL_0_HSYM_ENA_SHIFT 0
+#define I40E_PRTQF_CTL_0_HSYM_ENA_MASK I40E_MASK(0x1, I40E_PRTQF_CTL_0_HSYM_ENA_SHIFT)
+#define I40E_PRTQF_FD_FLXINSET(_i) (0x00253800 + ((_i) * 32)) /* _i=0...63 */ /* Reset: CORER */
+#define I40E_PRTQF_FD_FLXINSET_MAX_INDEX 63
+#define I40E_PRTQF_FD_FLXINSET_INSET_SHIFT 0
+#define I40E_PRTQF_FD_FLXINSET_INSET_MASK I40E_MASK(0xFF, I40E_PRTQF_FD_FLXINSET_INSET_SHIFT)
+#define I40E_PRTQF_FD_MSK(_i, _j) (0x00252000 + ((_i) * 64 + (_j) * 32)) /* _i=0...63, _j=0...1 */ /* Reset: CORER */
+#define I40E_PRTQF_FD_MSK_MAX_INDEX 63
+#define I40E_PRTQF_FD_MSK_MASK_SHIFT 0
+#define I40E_PRTQF_FD_MSK_MASK_MASK I40E_MASK(0xFFFF, I40E_PRTQF_FD_MSK_MASK_SHIFT)
+#define I40E_PRTQF_FD_MSK_OFFSET_SHIFT 16
+#define I40E_PRTQF_FD_MSK_OFFSET_MASK I40E_MASK(0x3F, I40E_PRTQF_FD_MSK_OFFSET_SHIFT)
+#define I40E_PRTQF_FLX_PIT(_i) (0x00255200 + ((_i) * 32)) /* _i=0...8 */ /* Reset: CORER */
+#define I40E_PRTQF_FLX_PIT_MAX_INDEX 8
+#define I40E_PRTQF_FLX_PIT_SOURCE_OFF_SHIFT 0
+#define I40E_PRTQF_FLX_PIT_SOURCE_OFF_MASK I40E_MASK(0x1F, I40E_PRTQF_FLX_PIT_SOURCE_OFF_SHIFT)
+#define I40E_PRTQF_FLX_PIT_FSIZE_SHIFT 5
+#define I40E_PRTQF_FLX_PIT_FSIZE_MASK I40E_MASK(0x1F, I40E_PRTQF_FLX_PIT_FSIZE_SHIFT)
+#define I40E_PRTQF_FLX_PIT_DEST_OFF_SHIFT 10
+#define I40E_PRTQF_FLX_PIT_DEST_OFF_MASK I40E_MASK(0x3F, I40E_PRTQF_FLX_PIT_DEST_OFF_SHIFT)
+#define I40E_VFQF_HENA1(_i, _VF) (0x00230800 + ((_i) * 1024 + (_VF) * 4)) /* _i=0...1, _VF=0...127 */ /* Reset: CORER */
+#define I40E_VFQF_HENA1_MAX_INDEX 1
+#define I40E_VFQF_HENA1_PTYPE_ENA_SHIFT 0
+#define I40E_VFQF_HENA1_PTYPE_ENA_MASK I40E_MASK(0xFFFFFFFF, I40E_VFQF_HENA1_PTYPE_ENA_SHIFT)
+#define I40E_VFQF_HKEY1(_i, _VF) (0x00228000 + ((_i) * 1024 + (_VF) * 4)) /* _i=0...12, _VF=0...127 */ /* Reset: CORER */
+#define I40E_VFQF_HKEY1_MAX_INDEX 12
+#define I40E_VFQF_HKEY1_KEY_0_SHIFT 0
+#define I40E_VFQF_HKEY1_KEY_0_MASK I40E_MASK(0xFF, I40E_VFQF_HKEY1_KEY_0_SHIFT)
+#define I40E_VFQF_HKEY1_KEY_1_SHIFT 8
+#define I40E_VFQF_HKEY1_KEY_1_MASK I40E_MASK(0xFF, I40E_VFQF_HKEY1_KEY_1_SHIFT)
+#define I40E_VFQF_HKEY1_KEY_2_SHIFT 16
+#define I40E_VFQF_HKEY1_KEY_2_MASK I40E_MASK(0xFF, I40E_VFQF_HKEY1_KEY_2_SHIFT)
+#define I40E_VFQF_HKEY1_KEY_3_SHIFT 24
+#define I40E_VFQF_HKEY1_KEY_3_MASK I40E_MASK(0xFF, I40E_VFQF_HKEY1_KEY_3_SHIFT)
+#define I40E_VFQF_HLUT1(_i, _VF) (0x00220000 + ((_i) * 1024 + (_VF) * 4)) /* _i=0...15, _VF=0...127 */ /* Reset: CORER */
+#define I40E_VFQF_HLUT1_MAX_INDEX 15
+#define I40E_VFQF_HLUT1_LUT0_SHIFT 0
+#define I40E_VFQF_HLUT1_LUT0_MASK I40E_MASK(0xF, I40E_VFQF_HLUT1_LUT0_SHIFT)
+#define I40E_VFQF_HLUT1_LUT1_SHIFT 8
+#define I40E_VFQF_HLUT1_LUT1_MASK I40E_MASK(0xF, I40E_VFQF_HLUT1_LUT1_SHIFT)
+#define I40E_VFQF_HLUT1_LUT2_SHIFT 16
+#define I40E_VFQF_HLUT1_LUT2_MASK I40E_MASK(0xF, I40E_VFQF_HLUT1_LUT2_SHIFT)
+#define I40E_VFQF_HLUT1_LUT3_SHIFT 24
+#define I40E_VFQF_HLUT1_LUT3_MASK I40E_MASK(0xF, I40E_VFQF_HLUT1_LUT3_SHIFT)
+#define I40E_VFQF_HREGION1(_i, _VF) (0x0022E000 + ((_i) * 1024 + (_VF) * 4)) /* _i=0...7, _VF=0...127 */ /* Reset: CORER */
+#define I40E_VFQF_HREGION1_MAX_INDEX 7
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_0_SHIFT 0
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_0_MASK I40E_MASK(0x1, I40E_VFQF_HREGION1_OVERRIDE_ENA_0_SHIFT)
+#define I40E_VFQF_HREGION1_REGION_0_SHIFT 1
+#define I40E_VFQF_HREGION1_REGION_0_MASK I40E_MASK(0x7, I40E_VFQF_HREGION1_REGION_0_SHIFT)
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_1_SHIFT 4
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_1_MASK I40E_MASK(0x1, I40E_VFQF_HREGION1_OVERRIDE_ENA_1_SHIFT)
+#define I40E_VFQF_HREGION1_REGION_1_SHIFT 5
+#define I40E_VFQF_HREGION1_REGION_1_MASK I40E_MASK(0x7, I40E_VFQF_HREGION1_REGION_1_SHIFT)
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_2_SHIFT 8
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_2_MASK I40E_MASK(0x1, I40E_VFQF_HREGION1_OVERRIDE_ENA_2_SHIFT)
+#define I40E_VFQF_HREGION1_REGION_2_SHIFT 9
+#define I40E_VFQF_HREGION1_REGION_2_MASK I40E_MASK(0x7, I40E_VFQF_HREGION1_REGION_2_SHIFT)
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_3_SHIFT 12
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_3_MASK I40E_MASK(0x1, I40E_VFQF_HREGION1_OVERRIDE_ENA_3_SHIFT)
+#define I40E_VFQF_HREGION1_REGION_3_SHIFT 13
+#define I40E_VFQF_HREGION1_REGION_3_MASK I40E_MASK(0x7, I40E_VFQF_HREGION1_REGION_3_SHIFT)
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_4_SHIFT 16
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_4_MASK I40E_MASK(0x1, I40E_VFQF_HREGION1_OVERRIDE_ENA_4_SHIFT)
+#define I40E_VFQF_HREGION1_REGION_4_SHIFT 17
+#define I40E_VFQF_HREGION1_REGION_4_MASK I40E_MASK(0x7, I40E_VFQF_HREGION1_REGION_4_SHIFT)
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_5_SHIFT 20
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_5_MASK I40E_MASK(0x1, I40E_VFQF_HREGION1_OVERRIDE_ENA_5_SHIFT)
+#define I40E_VFQF_HREGION1_REGION_5_SHIFT 21
+#define I40E_VFQF_HREGION1_REGION_5_MASK I40E_MASK(0x7, I40E_VFQF_HREGION1_REGION_5_SHIFT)
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_6_SHIFT 24
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_6_MASK I40E_MASK(0x1, I40E_VFQF_HREGION1_OVERRIDE_ENA_6_SHIFT)
+#define I40E_VFQF_HREGION1_REGION_6_SHIFT 25
+#define I40E_VFQF_HREGION1_REGION_6_MASK I40E_MASK(0x7, I40E_VFQF_HREGION1_REGION_6_SHIFT)
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_7_SHIFT 28
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_7_MASK I40E_MASK(0x1, I40E_VFQF_HREGION1_OVERRIDE_ENA_7_SHIFT)
+#define I40E_VFQF_HREGION1_REGION_7_SHIFT 29
+#define I40E_VFQF_HREGION1_REGION_7_MASK I40E_MASK(0x7, I40E_VFQF_HREGION1_REGION_7_SHIFT)
+#define I40E_VPQF_CTL(_VF) (0x001C0000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VPQF_CTL_MAX_INDEX 127
+#define I40E_VPQF_CTL_PEHSIZE_SHIFT 0
+#define I40E_VPQF_CTL_PEHSIZE_MASK I40E_MASK(0x1F, I40E_VPQF_CTL_PEHSIZE_SHIFT)
+#define I40E_VPQF_CTL_PEDSIZE_SHIFT 5
+#define I40E_VPQF_CTL_PEDSIZE_MASK I40E_MASK(0x1F, I40E_VPQF_CTL_PEDSIZE_SHIFT)
+#define I40E_VPQF_CTL_FCHSIZE_SHIFT 10
+#define I40E_VPQF_CTL_FCHSIZE_MASK I40E_MASK(0xF, I40E_VPQF_CTL_FCHSIZE_SHIFT)
+#define I40E_VPQF_CTL_FCDSIZE_SHIFT 14
+#define I40E_VPQF_CTL_FCDSIZE_MASK I40E_MASK(0x3, I40E_VPQF_CTL_FCDSIZE_SHIFT)
+#define I40E_VSIQF_CTL(_VSI) (0x0020D800 + ((_VSI) * 4)) /* _i=0...383 */ /* Reset: PFR */
+#define I40E_VSIQF_CTL_MAX_INDEX 383
+#define I40E_VSIQF_CTL_FCOE_ENA_SHIFT 0
+#define I40E_VSIQF_CTL_FCOE_ENA_MASK I40E_MASK(0x1, I40E_VSIQF_CTL_FCOE_ENA_SHIFT)
+#define I40E_VSIQF_CTL_PETCP_ENA_SHIFT 1
+#define I40E_VSIQF_CTL_PETCP_ENA_MASK I40E_MASK(0x1, I40E_VSIQF_CTL_PETCP_ENA_SHIFT)
+#define I40E_VSIQF_CTL_PEUUDP_ENA_SHIFT 2
+#define I40E_VSIQF_CTL_PEUUDP_ENA_MASK I40E_MASK(0x1, I40E_VSIQF_CTL_PEUUDP_ENA_SHIFT)
+#define I40E_VSIQF_CTL_PEMUDP_ENA_SHIFT 3
+#define I40E_VSIQF_CTL_PEMUDP_ENA_MASK I40E_MASK(0x1, I40E_VSIQF_CTL_PEMUDP_ENA_SHIFT)
+#define I40E_VSIQF_CTL_PEUFRAG_ENA_SHIFT 4
+#define I40E_VSIQF_CTL_PEUFRAG_ENA_MASK I40E_MASK(0x1, I40E_VSIQF_CTL_PEUFRAG_ENA_SHIFT)
+#define I40E_VSIQF_CTL_PEMFRAG_ENA_SHIFT 5
+#define I40E_VSIQF_CTL_PEMFRAG_ENA_MASK I40E_MASK(0x1, I40E_VSIQF_CTL_PEMFRAG_ENA_SHIFT)
+#define I40E_VSIQF_TCREGION(_i, _VSI) (0x00206000 + ((_i) * 2048 + (_VSI) * 4)) /* _i=0...3, _VSI=0...383 */ /* Reset: PFR */
+#define I40E_VSIQF_TCREGION_MAX_INDEX 3
+#define I40E_VSIQF_TCREGION_TC_OFFSET_SHIFT 0
+#define I40E_VSIQF_TCREGION_TC_OFFSET_MASK I40E_MASK(0x1FF, I40E_VSIQF_TCREGION_TC_OFFSET_SHIFT)
+#define I40E_VSIQF_TCREGION_TC_SIZE_SHIFT 9
+#define I40E_VSIQF_TCREGION_TC_SIZE_MASK I40E_MASK(0x7, I40E_VSIQF_TCREGION_TC_SIZE_SHIFT)
+#define I40E_VSIQF_TCREGION_TC_OFFSET2_SHIFT 16
+#define I40E_VSIQF_TCREGION_TC_OFFSET2_MASK I40E_MASK(0x1FF, I40E_VSIQF_TCREGION_TC_OFFSET2_SHIFT)
+#define I40E_VSIQF_TCREGION_TC_SIZE2_SHIFT 25
+#define I40E_VSIQF_TCREGION_TC_SIZE2_MASK I40E_MASK(0x7, I40E_VSIQF_TCREGION_TC_SIZE2_SHIFT)
+#define I40E_GL_FCOECRC(_i) (0x00314d80 + ((_i) * 8)) /* _i=0...143 */ /* Reset: CORER */
+#define I40E_GL_FCOECRC_MAX_INDEX 143
+#define I40E_GL_FCOECRC_FCOECRC_SHIFT 0
+#define I40E_GL_FCOECRC_FCOECRC_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_FCOECRC_FCOECRC_SHIFT)
+#define I40E_GL_FCOEDDPC(_i) (0x00314480 + ((_i) * 8)) /* _i=0...143 */ /* Reset: CORER */
+#define I40E_GL_FCOEDDPC_MAX_INDEX 143
+#define I40E_GL_FCOEDDPC_FCOEDDPC_SHIFT 0
+#define I40E_GL_FCOEDDPC_FCOEDDPC_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_FCOEDDPC_FCOEDDPC_SHIFT)
+#define I40E_GL_FCOEDIFEC(_i) (0x00318480 + ((_i) * 8)) /* _i=0...143 */ /* Reset: CORER */
+#define I40E_GL_FCOEDIFEC_MAX_INDEX 143
+#define I40E_GL_FCOEDIFEC_FCOEDIFRC_SHIFT 0
+#define I40E_GL_FCOEDIFEC_FCOEDIFRC_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_FCOEDIFEC_FCOEDIFRC_SHIFT)
+#define I40E_GL_FCOEDIFTCL(_i) (0x00354000 + ((_i) * 8)) /* _i=0...143 */ /* Reset: CORER */
+#define I40E_GL_FCOEDIFTCL_MAX_INDEX 143
+#define I40E_GL_FCOEDIFTCL_FCOEDIFTC_SHIFT 0
+#define I40E_GL_FCOEDIFTCL_FCOEDIFTC_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_FCOEDIFTCL_FCOEDIFTC_SHIFT)
+#define I40E_GL_FCOEDIXEC(_i) (0x0034c000 + ((_i) * 8)) /* _i=0...143 */ /* Reset: CORER */
+#define I40E_GL_FCOEDIXEC_MAX_INDEX 143
+#define I40E_GL_FCOEDIXEC_FCOEDIXEC_SHIFT 0
+#define I40E_GL_FCOEDIXEC_FCOEDIXEC_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_FCOEDIXEC_FCOEDIXEC_SHIFT)
+#define I40E_GL_FCOEDIXVC(_i) (0x00350000 + ((_i) * 8)) /* _i=0...143 */ /* Reset: CORER */
+#define I40E_GL_FCOEDIXVC_MAX_INDEX 143
+#define I40E_GL_FCOEDIXVC_FCOEDIXVC_SHIFT 0
+#define I40E_GL_FCOEDIXVC_FCOEDIXVC_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_FCOEDIXVC_FCOEDIXVC_SHIFT)
+#define I40E_GL_FCOEDWRCH(_i) (0x00320004 + ((_i) * 8)) /* _i=0...143 */ /* Reset: CORER */
+#define I40E_GL_FCOEDWRCH_MAX_INDEX 143
+#define I40E_GL_FCOEDWRCH_FCOEDWRCH_SHIFT 0
+#define I40E_GL_FCOEDWRCH_FCOEDWRCH_MASK I40E_MASK(0xFFFF, I40E_GL_FCOEDWRCH_FCOEDWRCH_SHIFT)
+#define I40E_GL_FCOEDWRCL(_i) (0x00320000 + ((_i) * 8)) /* _i=0...143 */ /* Reset: CORER */
+#define I40E_GL_FCOEDWRCL_MAX_INDEX 143
+#define I40E_GL_FCOEDWRCL_FCOEDWRCL_SHIFT 0
+#define I40E_GL_FCOEDWRCL_FCOEDWRCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_FCOEDWRCL_FCOEDWRCL_SHIFT)
+#define I40E_GL_FCOEDWTCH(_i) (0x00348084 + ((_i) * 8)) /* _i=0...143 */ /* Reset: CORER */
+#define I40E_GL_FCOEDWTCH_MAX_INDEX 143
+#define I40E_GL_FCOEDWTCH_FCOEDWTCH_SHIFT 0
+#define I40E_GL_FCOEDWTCH_FCOEDWTCH_MASK I40E_MASK(0xFFFF, I40E_GL_FCOEDWTCH_FCOEDWTCH_SHIFT)
+#define I40E_GL_FCOEDWTCL(_i) (0x00348080 + ((_i) * 8)) /* _i=0...143 */ /* Reset: CORER */
+#define I40E_GL_FCOEDWTCL_MAX_INDEX 143
+#define I40E_GL_FCOEDWTCL_FCOEDWTCL_SHIFT 0
+#define I40E_GL_FCOEDWTCL_FCOEDWTCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_FCOEDWTCL_FCOEDWTCL_SHIFT)
+#define I40E_GL_FCOELAST(_i) (0x00314000 + ((_i) * 8)) /* _i=0...143 */ /* Reset: CORER */
+#define I40E_GL_FCOELAST_MAX_INDEX 143
+#define I40E_GL_FCOELAST_FCOELAST_SHIFT 0
+#define I40E_GL_FCOELAST_FCOELAST_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_FCOELAST_FCOELAST_SHIFT)
+#define I40E_GL_FCOEPRC(_i) (0x00315200 + ((_i) * 8)) /* _i=0...143 */ /* Reset: CORER */
+#define I40E_GL_FCOEPRC_MAX_INDEX 143
+#define I40E_GL_FCOEPRC_FCOEPRC_SHIFT 0
+#define I40E_GL_FCOEPRC_FCOEPRC_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_FCOEPRC_FCOEPRC_SHIFT)
+#define I40E_GL_FCOEPTC(_i) (0x00344C00 + ((_i) * 8)) /* _i=0...143 */ /* Reset: CORER */
+#define I40E_GL_FCOEPTC_MAX_INDEX 143
+#define I40E_GL_FCOEPTC_FCOEPTC_SHIFT 0
+#define I40E_GL_FCOEPTC_FCOEPTC_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_FCOEPTC_FCOEPTC_SHIFT)
+#define I40E_GL_FCOERPDC(_i) (0x00324000 + ((_i) * 8)) /* _i=0...143 */ /* Reset: CORER */
+#define I40E_GL_FCOERPDC_MAX_INDEX 143
+#define I40E_GL_FCOERPDC_FCOERPDC_SHIFT 0
+#define I40E_GL_FCOERPDC_FCOERPDC_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_FCOERPDC_FCOERPDC_SHIFT)
+#define I40E_GL_RXERR1_L(_i) (0x00318000 + ((_i) * 8)) /* _i=0...143 */ /* Reset: CORER */
+#define I40E_GL_RXERR1_L_MAX_INDEX 143
+#define I40E_GL_RXERR1_L_FCOEDIFRC_SHIFT 0
+#define I40E_GL_RXERR1_L_FCOEDIFRC_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_RXERR1_L_FCOEDIFRC_SHIFT)
+#define I40E_GL_RXERR2_L(_i) (0x0031c000 + ((_i) * 8)) /* _i=0...143 */ /* Reset: CORER */
+#define I40E_GL_RXERR2_L_MAX_INDEX 143
+#define I40E_GL_RXERR2_L_FCOEDIXAC_SHIFT 0
+#define I40E_GL_RXERR2_L_FCOEDIXAC_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_RXERR2_L_FCOEDIXAC_SHIFT)
+#define I40E_GLPRT_BPRCH(_i) (0x003005E4 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_BPRCH_MAX_INDEX 3
+#define I40E_GLPRT_BPRCH_BPRCH_SHIFT 0
+#define I40E_GLPRT_BPRCH_BPRCH_MASK I40E_MASK(0xFFFF, I40E_GLPRT_BPRCH_BPRCH_SHIFT)
+#define I40E_GLPRT_BPRCL(_i) (0x003005E0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_BPRCL_MAX_INDEX 3
+#define I40E_GLPRT_BPRCL_BPRCL_SHIFT 0
+#define I40E_GLPRT_BPRCL_BPRCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_BPRCL_BPRCL_SHIFT)
+#define I40E_GLPRT_BPTCH(_i) (0x00300A04 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_BPTCH_MAX_INDEX 3
+#define I40E_GLPRT_BPTCH_BPTCH_SHIFT 0
+#define I40E_GLPRT_BPTCH_BPTCH_MASK I40E_MASK(0xFFFF, I40E_GLPRT_BPTCH_BPTCH_SHIFT)
+#define I40E_GLPRT_BPTCL(_i) (0x00300A00 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_BPTCL_MAX_INDEX 3
+#define I40E_GLPRT_BPTCL_BPTCL_SHIFT 0
+#define I40E_GLPRT_BPTCL_BPTCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_BPTCL_BPTCL_SHIFT)
+#define I40E_GLPRT_CRCERRS(_i) (0x00300080 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_CRCERRS_MAX_INDEX 3
+#define I40E_GLPRT_CRCERRS_CRCERRS_SHIFT 0
+#define I40E_GLPRT_CRCERRS_CRCERRS_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_CRCERRS_CRCERRS_SHIFT)
+#define I40E_GLPRT_GORCH(_i) (0x00300004 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_GORCH_MAX_INDEX 3
+#define I40E_GLPRT_GORCH_GORCH_SHIFT 0
+#define I40E_GLPRT_GORCH_GORCH_MASK I40E_MASK(0xFFFF, I40E_GLPRT_GORCH_GORCH_SHIFT)
+#define I40E_GLPRT_GORCL(_i) (0x00300000 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_GORCL_MAX_INDEX 3
+#define I40E_GLPRT_GORCL_GORCL_SHIFT 0
+#define I40E_GLPRT_GORCL_GORCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_GORCL_GORCL_SHIFT)
+#define I40E_GLPRT_GOTCH(_i) (0x00300684 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_GOTCH_MAX_INDEX 3
+#define I40E_GLPRT_GOTCH_GOTCH_SHIFT 0
+#define I40E_GLPRT_GOTCH_GOTCH_MASK I40E_MASK(0xFFFF, I40E_GLPRT_GOTCH_GOTCH_SHIFT)
+#define I40E_GLPRT_GOTCL(_i) (0x00300680 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_GOTCL_MAX_INDEX 3
+#define I40E_GLPRT_GOTCL_GOTCL_SHIFT 0
+#define I40E_GLPRT_GOTCL_GOTCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_GOTCL_GOTCL_SHIFT)
+#define I40E_GLPRT_ILLERRC(_i) (0x003000E0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_ILLERRC_MAX_INDEX 3
+#define I40E_GLPRT_ILLERRC_ILLERRC_SHIFT 0
+#define I40E_GLPRT_ILLERRC_ILLERRC_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_ILLERRC_ILLERRC_SHIFT)
+#define I40E_GLPRT_LDPC(_i) (0x00300620 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_LDPC_MAX_INDEX 3
+#define I40E_GLPRT_LDPC_LDPC_SHIFT 0
+#define I40E_GLPRT_LDPC_LDPC_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_LDPC_LDPC_SHIFT)
+#define I40E_GLPRT_LXOFFRXC(_i) (0x00300160 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_LXOFFRXC_MAX_INDEX 3
+#define I40E_GLPRT_LXOFFRXC_LXOFFRXCNT_SHIFT 0
+#define I40E_GLPRT_LXOFFRXC_LXOFFRXCNT_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_LXOFFRXC_LXOFFRXCNT_SHIFT)
+#define I40E_GLPRT_LXOFFTXC(_i) (0x003009A0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_LXOFFTXC_MAX_INDEX 3
+#define I40E_GLPRT_LXOFFTXC_LXOFFTXC_SHIFT 0
+#define I40E_GLPRT_LXOFFTXC_LXOFFTXC_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_LXOFFTXC_LXOFFTXC_SHIFT)
+#define I40E_GLPRT_LXONRXC(_i) (0x00300140 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_LXONRXC_MAX_INDEX 3
+#define I40E_GLPRT_LXONRXC_LXONRXCNT_SHIFT 0
+#define I40E_GLPRT_LXONRXC_LXONRXCNT_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_LXONRXC_LXONRXCNT_SHIFT)
+#define I40E_GLPRT_LXONTXC(_i) (0x00300980 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_LXONTXC_MAX_INDEX 3
+#define I40E_GLPRT_LXONTXC_LXONTXC_SHIFT 0
+#define I40E_GLPRT_LXONTXC_LXONTXC_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_LXONTXC_LXONTXC_SHIFT)
+#define I40E_GLPRT_MLFC(_i) (0x00300020 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_MLFC_MAX_INDEX 3
+#define I40E_GLPRT_MLFC_MLFC_SHIFT 0
+#define I40E_GLPRT_MLFC_MLFC_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_MLFC_MLFC_SHIFT)
+#define I40E_GLPRT_MPRCH(_i) (0x003005C4 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_MPRCH_MAX_INDEX 3
+#define I40E_GLPRT_MPRCH_MPRCH_SHIFT 0
+#define I40E_GLPRT_MPRCH_MPRCH_MASK I40E_MASK(0xFFFF, I40E_GLPRT_MPRCH_MPRCH_SHIFT)
+#define I40E_GLPRT_MPRCL(_i) (0x003005C0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_MPRCL_MAX_INDEX 3
+#define I40E_GLPRT_MPRCL_MPRCL_SHIFT 0
+#define I40E_GLPRT_MPRCL_MPRCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_MPRCL_MPRCL_SHIFT)
+#define I40E_GLPRT_MPTCH(_i) (0x003009E4 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_MPTCH_MAX_INDEX 3
+#define I40E_GLPRT_MPTCH_MPTCH_SHIFT 0
+#define I40E_GLPRT_MPTCH_MPTCH_MASK I40E_MASK(0xFFFF, I40E_GLPRT_MPTCH_MPTCH_SHIFT)
+#define I40E_GLPRT_MPTCL(_i) (0x003009E0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_MPTCL_MAX_INDEX 3
+#define I40E_GLPRT_MPTCL_MPTCL_SHIFT 0
+#define I40E_GLPRT_MPTCL_MPTCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_MPTCL_MPTCL_SHIFT)
+#define I40E_GLPRT_MRFC(_i) (0x00300040 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_MRFC_MAX_INDEX 3
+#define I40E_GLPRT_MRFC_MRFC_SHIFT 0
+#define I40E_GLPRT_MRFC_MRFC_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_MRFC_MRFC_SHIFT)
+#define I40E_GLPRT_PRC1023H(_i) (0x00300504 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PRC1023H_MAX_INDEX 3
+#define I40E_GLPRT_PRC1023H_PRC1023H_SHIFT 0
+#define I40E_GLPRT_PRC1023H_PRC1023H_MASK I40E_MASK(0xFFFF, I40E_GLPRT_PRC1023H_PRC1023H_SHIFT)
+#define I40E_GLPRT_PRC1023L(_i) (0x00300500 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PRC1023L_MAX_INDEX 3
+#define I40E_GLPRT_PRC1023L_PRC1023L_SHIFT 0
+#define I40E_GLPRT_PRC1023L_PRC1023L_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_PRC1023L_PRC1023L_SHIFT)
+#define I40E_GLPRT_PRC127H(_i) (0x003004A4 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PRC127H_MAX_INDEX 3
+#define I40E_GLPRT_PRC127H_PRC127H_SHIFT 0
+#define I40E_GLPRT_PRC127H_PRC127H_MASK I40E_MASK(0xFFFF, I40E_GLPRT_PRC127H_PRC127H_SHIFT)
+#define I40E_GLPRT_PRC127L(_i) (0x003004A0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PRC127L_MAX_INDEX 3
+#define I40E_GLPRT_PRC127L_PRC127L_SHIFT 0
+#define I40E_GLPRT_PRC127L_PRC127L_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_PRC127L_PRC127L_SHIFT)
+#define I40E_GLPRT_PRC1522H(_i) (0x00300524 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PRC1522H_MAX_INDEX 3
+#define I40E_GLPRT_PRC1522H_PRC1522H_SHIFT 0
+#define I40E_GLPRT_PRC1522H_PRC1522H_MASK I40E_MASK(0xFFFF, I40E_GLPRT_PRC1522H_PRC1522H_SHIFT)
+#define I40E_GLPRT_PRC1522L(_i) (0x00300520 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PRC1522L_MAX_INDEX 3
+#define I40E_GLPRT_PRC1522L_PRC1522L_SHIFT 0
+#define I40E_GLPRT_PRC1522L_PRC1522L_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_PRC1522L_PRC1522L_SHIFT)
+#define I40E_GLPRT_PRC255H(_i) (0x003004C4 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PRC255H_MAX_INDEX 3
+#define I40E_GLPRT_PRC255H_PRTPRC255H_SHIFT 0
+#define I40E_GLPRT_PRC255H_PRTPRC255H_MASK I40E_MASK(0xFFFF, I40E_GLPRT_PRC255H_PRTPRC255H_SHIFT)
+#define I40E_GLPRT_PRC255L(_i) (0x003004C0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PRC255L_MAX_INDEX 3
+#define I40E_GLPRT_PRC255L_PRC255L_SHIFT 0
+#define I40E_GLPRT_PRC255L_PRC255L_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_PRC255L_PRC255L_SHIFT)
+#define I40E_GLPRT_PRC511H(_i) (0x003004E4 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PRC511H_MAX_INDEX 3
+#define I40E_GLPRT_PRC511H_PRC511H_SHIFT 0
+#define I40E_GLPRT_PRC511H_PRC511H_MASK I40E_MASK(0xFFFF, I40E_GLPRT_PRC511H_PRC511H_SHIFT)
+#define I40E_GLPRT_PRC511L(_i) (0x003004E0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PRC511L_MAX_INDEX 3
+#define I40E_GLPRT_PRC511L_PRC511L_SHIFT 0
+#define I40E_GLPRT_PRC511L_PRC511L_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_PRC511L_PRC511L_SHIFT)
+#define I40E_GLPRT_PRC64H(_i) (0x00300484 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PRC64H_MAX_INDEX 3
+#define I40E_GLPRT_PRC64H_PRC64H_SHIFT 0
+#define I40E_GLPRT_PRC64H_PRC64H_MASK I40E_MASK(0xFFFF, I40E_GLPRT_PRC64H_PRC64H_SHIFT)
+#define I40E_GLPRT_PRC64L(_i) (0x00300480 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PRC64L_MAX_INDEX 3
+#define I40E_GLPRT_PRC64L_PRC64L_SHIFT 0
+#define I40E_GLPRT_PRC64L_PRC64L_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_PRC64L_PRC64L_SHIFT)
+#define I40E_GLPRT_PRC9522H(_i) (0x00300544 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PRC9522H_MAX_INDEX 3
+#define I40E_GLPRT_PRC9522H_PRC1522H_SHIFT 0
+#define I40E_GLPRT_PRC9522H_PRC1522H_MASK I40E_MASK(0xFFFF, I40E_GLPRT_PRC9522H_PRC1522H_SHIFT)
+#define I40E_GLPRT_PRC9522L(_i) (0x00300540 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PRC9522L_MAX_INDEX 3
+#define I40E_GLPRT_PRC9522L_PRC1522L_SHIFT 0
+#define I40E_GLPRT_PRC9522L_PRC1522L_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_PRC9522L_PRC1522L_SHIFT)
+#define I40E_GLPRT_PTC1023H(_i) (0x00300724 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PTC1023H_MAX_INDEX 3
+#define I40E_GLPRT_PTC1023H_PTC1023H_SHIFT 0
+#define I40E_GLPRT_PTC1023H_PTC1023H_MASK I40E_MASK(0xFFFF, I40E_GLPRT_PTC1023H_PTC1023H_SHIFT)
+#define I40E_GLPRT_PTC1023L(_i) (0x00300720 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PTC1023L_MAX_INDEX 3
+#define I40E_GLPRT_PTC1023L_PTC1023L_SHIFT 0
+#define I40E_GLPRT_PTC1023L_PTC1023L_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_PTC1023L_PTC1023L_SHIFT)
+#define I40E_GLPRT_PTC127H(_i) (0x003006C4 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PTC127H_MAX_INDEX 3
+#define I40E_GLPRT_PTC127H_PTC127H_SHIFT 0
+#define I40E_GLPRT_PTC127H_PTC127H_MASK I40E_MASK(0xFFFF, I40E_GLPRT_PTC127H_PTC127H_SHIFT)
+#define I40E_GLPRT_PTC127L(_i) (0x003006C0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PTC127L_MAX_INDEX 3
+#define I40E_GLPRT_PTC127L_PTC127L_SHIFT 0
+#define I40E_GLPRT_PTC127L_PTC127L_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_PTC127L_PTC127L_SHIFT)
+#define I40E_GLPRT_PTC1522H(_i) (0x00300744 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PTC1522H_MAX_INDEX 3
+#define I40E_GLPRT_PTC1522H_PTC1522H_SHIFT 0
+#define I40E_GLPRT_PTC1522H_PTC1522H_MASK I40E_MASK(0xFFFF, I40E_GLPRT_PTC1522H_PTC1522H_SHIFT)
+#define I40E_GLPRT_PTC1522L(_i) (0x00300740 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PTC1522L_MAX_INDEX 3
+#define I40E_GLPRT_PTC1522L_PTC1522L_SHIFT 0
+#define I40E_GLPRT_PTC1522L_PTC1522L_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_PTC1522L_PTC1522L_SHIFT)
+#define I40E_GLPRT_PTC255H(_i) (0x003006E4 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PTC255H_MAX_INDEX 3
+#define I40E_GLPRT_PTC255H_PTC255H_SHIFT 0
+#define I40E_GLPRT_PTC255H_PTC255H_MASK I40E_MASK(0xFFFF, I40E_GLPRT_PTC255H_PTC255H_SHIFT)
+#define I40E_GLPRT_PTC255L(_i) (0x003006E0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PTC255L_MAX_INDEX 3
+#define I40E_GLPRT_PTC255L_PTC255L_SHIFT 0
+#define I40E_GLPRT_PTC255L_PTC255L_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_PTC255L_PTC255L_SHIFT)
+#define I40E_GLPRT_PTC511H(_i) (0x00300704 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PTC511H_MAX_INDEX 3
+#define I40E_GLPRT_PTC511H_PTC511H_SHIFT 0
+#define I40E_GLPRT_PTC511H_PTC511H_MASK I40E_MASK(0xFFFF, I40E_GLPRT_PTC511H_PTC511H_SHIFT)
+#define I40E_GLPRT_PTC511L(_i) (0x00300700 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PTC511L_MAX_INDEX 3
+#define I40E_GLPRT_PTC511L_PTC511L_SHIFT 0
+#define I40E_GLPRT_PTC511L_PTC511L_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_PTC511L_PTC511L_SHIFT)
+#define I40E_GLPRT_PTC64H(_i) (0x003006A4 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PTC64H_MAX_INDEX 3
+#define I40E_GLPRT_PTC64H_PTC64H_SHIFT 0
+#define I40E_GLPRT_PTC64H_PTC64H_MASK I40E_MASK(0xFFFF, I40E_GLPRT_PTC64H_PTC64H_SHIFT)
+#define I40E_GLPRT_PTC64L(_i) (0x003006A0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PTC64L_MAX_INDEX 3
+#define I40E_GLPRT_PTC64L_PTC64L_SHIFT 0
+#define I40E_GLPRT_PTC64L_PTC64L_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_PTC64L_PTC64L_SHIFT)
+#define I40E_GLPRT_PTC9522H(_i) (0x00300764 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PTC9522H_MAX_INDEX 3
+#define I40E_GLPRT_PTC9522H_PTC9522H_SHIFT 0
+#define I40E_GLPRT_PTC9522H_PTC9522H_MASK I40E_MASK(0xFFFF, I40E_GLPRT_PTC9522H_PTC9522H_SHIFT)
+#define I40E_GLPRT_PTC9522L(_i) (0x00300760 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PTC9522L_MAX_INDEX 3
+#define I40E_GLPRT_PTC9522L_PTC9522L_SHIFT 0
+#define I40E_GLPRT_PTC9522L_PTC9522L_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_PTC9522L_PTC9522L_SHIFT)
+#define I40E_GLPRT_PXOFFRXC(_i, _j) (0x00300280 + ((_i) * 8 + (_j) * 32)) /* _i=0...3, _j=0...7 */ /* Reset: CORER */
+#define I40E_GLPRT_PXOFFRXC_MAX_INDEX 3
+#define I40E_GLPRT_PXOFFRXC_PRPXOFFRXCNT_SHIFT 0
+#define I40E_GLPRT_PXOFFRXC_PRPXOFFRXCNT_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_PXOFFRXC_PRPXOFFRXCNT_SHIFT)
+#define I40E_GLPRT_PXOFFTXC(_i, _j) (0x00300880 + ((_i) * 8 + (_j) * 32)) /* _i=0...3, _j=0...7 */ /* Reset: CORER */
+#define I40E_GLPRT_PXOFFTXC_MAX_INDEX 3
+#define I40E_GLPRT_PXOFFTXC_PRPXOFFTXCNT_SHIFT 0
+#define I40E_GLPRT_PXOFFTXC_PRPXOFFTXCNT_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_PXOFFTXC_PRPXOFFTXCNT_SHIFT)
+#define I40E_GLPRT_PXONRXC(_i, _j) (0x00300180 + ((_i) * 8 + (_j) * 32)) /* _i=0...3, _j=0...7 */ /* Reset: CORER */
+#define I40E_GLPRT_PXONRXC_MAX_INDEX 3
+#define I40E_GLPRT_PXONRXC_PRPXONRXCNT_SHIFT 0
+#define I40E_GLPRT_PXONRXC_PRPXONRXCNT_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_PXONRXC_PRPXONRXCNT_SHIFT)
+#define I40E_GLPRT_PXONTXC(_i, _j) (0x00300780 + ((_i) * 8 + (_j) * 32)) /* _i=0...3, _j=0...7 */ /* Reset: CORER */
+#define I40E_GLPRT_PXONTXC_MAX_INDEX 3
+#define I40E_GLPRT_PXONTXC_PRPXONTXC_SHIFT 0
+#define I40E_GLPRT_PXONTXC_PRPXONTXC_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_PXONTXC_PRPXONTXC_SHIFT)
+#define I40E_GLPRT_RDPC(_i) (0x00300600 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_RDPC_MAX_INDEX 3
+#define I40E_GLPRT_RDPC_RDPC_SHIFT 0
+#define I40E_GLPRT_RDPC_RDPC_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_RDPC_RDPC_SHIFT)
+#define I40E_GLPRT_RFC(_i) (0x00300560 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_RFC_MAX_INDEX 3
+#define I40E_GLPRT_RFC_RFC_SHIFT 0
+#define I40E_GLPRT_RFC_RFC_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_RFC_RFC_SHIFT)
+#define I40E_GLPRT_RJC(_i) (0x00300580 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_RJC_MAX_INDEX 3
+#define I40E_GLPRT_RJC_RJC_SHIFT 0
+#define I40E_GLPRT_RJC_RJC_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_RJC_RJC_SHIFT)
+#define I40E_GLPRT_RLEC(_i) (0x003000A0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_RLEC_MAX_INDEX 3
+#define I40E_GLPRT_RLEC_RLEC_SHIFT 0
+#define I40E_GLPRT_RLEC_RLEC_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_RLEC_RLEC_SHIFT)
+#define I40E_GLPRT_ROC(_i) (0x00300120 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_ROC_MAX_INDEX 3
+#define I40E_GLPRT_ROC_ROC_SHIFT 0
+#define I40E_GLPRT_ROC_ROC_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_ROC_ROC_SHIFT)
+#define I40E_GLPRT_RUC(_i) (0x00300100 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_RUC_MAX_INDEX 3
+#define I40E_GLPRT_RUC_RUC_SHIFT 0
+#define I40E_GLPRT_RUC_RUC_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_RUC_RUC_SHIFT)
+#define I40E_GLPRT_RUPP(_i) (0x00300660 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_RUPP_MAX_INDEX 3
+#define I40E_GLPRT_RUPP_RUPP_SHIFT 0
+#define I40E_GLPRT_RUPP_RUPP_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_RUPP_RUPP_SHIFT)
+#define I40E_GLPRT_RXON2OFFCNT(_i, _j) (0x00300380 + ((_i) * 8 + (_j) * 32)) /* _i=0...3, _j=0...7 */ /* Reset: CORER */
+#define I40E_GLPRT_RXON2OFFCNT_MAX_INDEX 3
+#define I40E_GLPRT_RXON2OFFCNT_PRRXON2OFFCNT_SHIFT 0
+#define I40E_GLPRT_RXON2OFFCNT_PRRXON2OFFCNT_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_RXON2OFFCNT_PRRXON2OFFCNT_SHIFT)
+#define I40E_GLPRT_TDOLD(_i) (0x00300A20 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_TDOLD_MAX_INDEX 3
+#define I40E_GLPRT_TDOLD_GLPRT_TDOLD_SHIFT 0
+#define I40E_GLPRT_TDOLD_GLPRT_TDOLD_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_TDOLD_GLPRT_TDOLD_SHIFT)
+#define I40E_GLPRT_UPRCH(_i) (0x003005A4 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_UPRCH_MAX_INDEX 3
+#define I40E_GLPRT_UPRCH_UPRCH_SHIFT 0
+#define I40E_GLPRT_UPRCH_UPRCH_MASK I40E_MASK(0xFFFF, I40E_GLPRT_UPRCH_UPRCH_SHIFT)
+#define I40E_GLPRT_UPRCL(_i) (0x003005A0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_UPRCL_MAX_INDEX 3
+#define I40E_GLPRT_UPRCL_UPRCL_SHIFT 0
+#define I40E_GLPRT_UPRCL_UPRCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_UPRCL_UPRCL_SHIFT)
+#define I40E_GLPRT_UPTCH(_i) (0x003009C4 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_UPTCH_MAX_INDEX 3
+#define I40E_GLPRT_UPTCH_UPTCH_SHIFT 0
+#define I40E_GLPRT_UPTCH_UPTCH_MASK I40E_MASK(0xFFFF, I40E_GLPRT_UPTCH_UPTCH_SHIFT)
+#define I40E_GLPRT_UPTCL(_i) (0x003009C0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_UPTCL_MAX_INDEX 3
+#define I40E_GLPRT_UPTCL_VUPTCH_SHIFT 0
+#define I40E_GLPRT_UPTCL_VUPTCH_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPRT_UPTCL_VUPTCH_SHIFT)
+#define I40E_GLSW_BPRCH(_i) (0x00370104 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_BPRCH_MAX_INDEX 15
+#define I40E_GLSW_BPRCH_BPRCH_SHIFT 0
+#define I40E_GLSW_BPRCH_BPRCH_MASK I40E_MASK(0xFFFF, I40E_GLSW_BPRCH_BPRCH_SHIFT)
+#define I40E_GLSW_BPRCL(_i) (0x00370100 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_BPRCL_MAX_INDEX 15
+#define I40E_GLSW_BPRCL_BPRCL_SHIFT 0
+#define I40E_GLSW_BPRCL_BPRCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLSW_BPRCL_BPRCL_SHIFT)
+#define I40E_GLSW_BPTCH(_i) (0x00340104 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_BPTCH_MAX_INDEX 15
+#define I40E_GLSW_BPTCH_BPTCH_SHIFT 0
+#define I40E_GLSW_BPTCH_BPTCH_MASK I40E_MASK(0xFFFF, I40E_GLSW_BPTCH_BPTCH_SHIFT)
+#define I40E_GLSW_BPTCL(_i) (0x00340100 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_BPTCL_MAX_INDEX 15
+#define I40E_GLSW_BPTCL_BPTCL_SHIFT 0
+#define I40E_GLSW_BPTCL_BPTCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLSW_BPTCL_BPTCL_SHIFT)
+#define I40E_GLSW_GORCH(_i) (0x0035C004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_GORCH_MAX_INDEX 15
+#define I40E_GLSW_GORCH_GORCH_SHIFT 0
+#define I40E_GLSW_GORCH_GORCH_MASK I40E_MASK(0xFFFF, I40E_GLSW_GORCH_GORCH_SHIFT)
+#define I40E_GLSW_GORCL(_i) (0x0035c000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_GORCL_MAX_INDEX 15
+#define I40E_GLSW_GORCL_GORCL_SHIFT 0
+#define I40E_GLSW_GORCL_GORCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLSW_GORCL_GORCL_SHIFT)
+#define I40E_GLSW_GOTCH(_i) (0x0032C004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_GOTCH_MAX_INDEX 15
+#define I40E_GLSW_GOTCH_GOTCH_SHIFT 0
+#define I40E_GLSW_GOTCH_GOTCH_MASK I40E_MASK(0xFFFF, I40E_GLSW_GOTCH_GOTCH_SHIFT)
+#define I40E_GLSW_GOTCL(_i) (0x0032c000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_GOTCL_MAX_INDEX 15
+#define I40E_GLSW_GOTCL_GOTCL_SHIFT 0
+#define I40E_GLSW_GOTCL_GOTCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLSW_GOTCL_GOTCL_SHIFT)
+#define I40E_GLSW_MPRCH(_i) (0x00370084 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_MPRCH_MAX_INDEX 15
+#define I40E_GLSW_MPRCH_MPRCH_SHIFT 0
+#define I40E_GLSW_MPRCH_MPRCH_MASK I40E_MASK(0xFFFF, I40E_GLSW_MPRCH_MPRCH_SHIFT)
+#define I40E_GLSW_MPRCL(_i) (0x00370080 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_MPRCL_MAX_INDEX 15
+#define I40E_GLSW_MPRCL_MPRCL_SHIFT 0
+#define I40E_GLSW_MPRCL_MPRCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLSW_MPRCL_MPRCL_SHIFT)
+#define I40E_GLSW_MPTCH(_i) (0x00340084 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_MPTCH_MAX_INDEX 15
+#define I40E_GLSW_MPTCH_MPTCH_SHIFT 0
+#define I40E_GLSW_MPTCH_MPTCH_MASK I40E_MASK(0xFFFF, I40E_GLSW_MPTCH_MPTCH_SHIFT)
+#define I40E_GLSW_MPTCL(_i) (0x00340080 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_MPTCL_MAX_INDEX 15
+#define I40E_GLSW_MPTCL_MPTCL_SHIFT 0
+#define I40E_GLSW_MPTCL_MPTCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLSW_MPTCL_MPTCL_SHIFT)
+#define I40E_GLSW_RUPP(_i) (0x00370180 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_RUPP_MAX_INDEX 15
+#define I40E_GLSW_RUPP_RUPP_SHIFT 0
+#define I40E_GLSW_RUPP_RUPP_MASK I40E_MASK(0xFFFFFFFF, I40E_GLSW_RUPP_RUPP_SHIFT)
+#define I40E_GLSW_TDPC(_i) (0x00348000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_TDPC_MAX_INDEX 15
+#define I40E_GLSW_TDPC_TDPC_SHIFT 0
+#define I40E_GLSW_TDPC_TDPC_MASK I40E_MASK(0xFFFFFFFF, I40E_GLSW_TDPC_TDPC_SHIFT)
+#define I40E_GLSW_UPRCH(_i) (0x00370004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_UPRCH_MAX_INDEX 15
+#define I40E_GLSW_UPRCH_UPRCH_SHIFT 0
+#define I40E_GLSW_UPRCH_UPRCH_MASK I40E_MASK(0xFFFF, I40E_GLSW_UPRCH_UPRCH_SHIFT)
+#define I40E_GLSW_UPRCL(_i) (0x00370000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_UPRCL_MAX_INDEX 15
+#define I40E_GLSW_UPRCL_UPRCL_SHIFT 0
+#define I40E_GLSW_UPRCL_UPRCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLSW_UPRCL_UPRCL_SHIFT)
+#define I40E_GLSW_UPTCH(_i) (0x00340004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_UPTCH_MAX_INDEX 15
+#define I40E_GLSW_UPTCH_UPTCH_SHIFT 0
+#define I40E_GLSW_UPTCH_UPTCH_MASK I40E_MASK(0xFFFF, I40E_GLSW_UPTCH_UPTCH_SHIFT)
+#define I40E_GLSW_UPTCL(_i) (0x00340000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_UPTCL_MAX_INDEX 15
+#define I40E_GLSW_UPTCL_UPTCL_SHIFT 0
+#define I40E_GLSW_UPTCL_UPTCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLSW_UPTCL_UPTCL_SHIFT)
+#define I40E_GLV_BPRCH(_i) (0x0036D804 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_BPRCH_MAX_INDEX 383
+#define I40E_GLV_BPRCH_BPRCH_SHIFT 0
+#define I40E_GLV_BPRCH_BPRCH_MASK I40E_MASK(0xFFFF, I40E_GLV_BPRCH_BPRCH_SHIFT)
+#define I40E_GLV_BPRCL(_i) (0x0036d800 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_BPRCL_MAX_INDEX 383
+#define I40E_GLV_BPRCL_BPRCL_SHIFT 0
+#define I40E_GLV_BPRCL_BPRCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLV_BPRCL_BPRCL_SHIFT)
+#define I40E_GLV_BPTCH(_i) (0x0033D804 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_BPTCH_MAX_INDEX 383
+#define I40E_GLV_BPTCH_BPTCH_SHIFT 0
+#define I40E_GLV_BPTCH_BPTCH_MASK I40E_MASK(0xFFFF, I40E_GLV_BPTCH_BPTCH_SHIFT)
+#define I40E_GLV_BPTCL(_i) (0x0033d800 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_BPTCL_MAX_INDEX 383
+#define I40E_GLV_BPTCL_BPTCL_SHIFT 0
+#define I40E_GLV_BPTCL_BPTCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLV_BPTCL_BPTCL_SHIFT)
+#define I40E_GLV_GORCH(_i) (0x00358004 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_GORCH_MAX_INDEX 383
+#define I40E_GLV_GORCH_GORCH_SHIFT 0
+#define I40E_GLV_GORCH_GORCH_MASK I40E_MASK(0xFFFF, I40E_GLV_GORCH_GORCH_SHIFT)
+#define I40E_GLV_GORCL(_i) (0x00358000 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_GORCL_MAX_INDEX 383
+#define I40E_GLV_GORCL_GORCL_SHIFT 0
+#define I40E_GLV_GORCL_GORCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLV_GORCL_GORCL_SHIFT)
+#define I40E_GLV_GOTCH(_i) (0x00328004 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_GOTCH_MAX_INDEX 383
+#define I40E_GLV_GOTCH_GOTCH_SHIFT 0
+#define I40E_GLV_GOTCH_GOTCH_MASK I40E_MASK(0xFFFF, I40E_GLV_GOTCH_GOTCH_SHIFT)
+#define I40E_GLV_GOTCL(_i) (0x00328000 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_GOTCL_MAX_INDEX 383
+#define I40E_GLV_GOTCL_GOTCL_SHIFT 0
+#define I40E_GLV_GOTCL_GOTCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLV_GOTCL_GOTCL_SHIFT)
+#define I40E_GLV_MPRCH(_i) (0x0036CC04 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_MPRCH_MAX_INDEX 383
+#define I40E_GLV_MPRCH_MPRCH_SHIFT 0
+#define I40E_GLV_MPRCH_MPRCH_MASK I40E_MASK(0xFFFF, I40E_GLV_MPRCH_MPRCH_SHIFT)
+#define I40E_GLV_MPRCL(_i) (0x0036cc00 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_MPRCL_MAX_INDEX 383
+#define I40E_GLV_MPRCL_MPRCL_SHIFT 0
+#define I40E_GLV_MPRCL_MPRCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLV_MPRCL_MPRCL_SHIFT)
+#define I40E_GLV_MPTCH(_i) (0x0033CC04 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_MPTCH_MAX_INDEX 383
+#define I40E_GLV_MPTCH_MPTCH_SHIFT 0
+#define I40E_GLV_MPTCH_MPTCH_MASK I40E_MASK(0xFFFF, I40E_GLV_MPTCH_MPTCH_SHIFT)
+#define I40E_GLV_MPTCL(_i) (0x0033cc00 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_MPTCL_MAX_INDEX 383
+#define I40E_GLV_MPTCL_MPTCL_SHIFT 0
+#define I40E_GLV_MPTCL_MPTCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLV_MPTCL_MPTCL_SHIFT)
+#define I40E_GLV_RDPC(_i) (0x00310000 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_RDPC_MAX_INDEX 383
+#define I40E_GLV_RDPC_RDPC_SHIFT 0
+#define I40E_GLV_RDPC_RDPC_MASK I40E_MASK(0xFFFFFFFF, I40E_GLV_RDPC_RDPC_SHIFT)
+#define I40E_GLV_RUPP(_i) (0x0036E400 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_RUPP_MAX_INDEX 383
+#define I40E_GLV_RUPP_RUPP_SHIFT 0
+#define I40E_GLV_RUPP_RUPP_MASK I40E_MASK(0xFFFFFFFF, I40E_GLV_RUPP_RUPP_SHIFT)
+#define I40E_GLV_TEPC(_VSI) (0x00344000 + ((_VSI) * 4)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_TEPC_MAX_INDEX 383
+#define I40E_GLV_TEPC_TEPC_SHIFT 0
+#define I40E_GLV_TEPC_TEPC_MASK I40E_MASK(0xFFFFFFFF, I40E_GLV_TEPC_TEPC_SHIFT)
+#define I40E_GLV_UPRCH(_i) (0x0036C004 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_UPRCH_MAX_INDEX 383
+#define I40E_GLV_UPRCH_UPRCH_SHIFT 0
+#define I40E_GLV_UPRCH_UPRCH_MASK I40E_MASK(0xFFFF, I40E_GLV_UPRCH_UPRCH_SHIFT)
+#define I40E_GLV_UPRCL(_i) (0x0036c000 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_UPRCL_MAX_INDEX 383
+#define I40E_GLV_UPRCL_UPRCL_SHIFT 0
+#define I40E_GLV_UPRCL_UPRCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLV_UPRCL_UPRCL_SHIFT)
+#define I40E_GLV_UPTCH(_i) (0x0033C004 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_UPTCH_MAX_INDEX 383
+#define I40E_GLV_UPTCH_GLVUPTCH_SHIFT 0
+#define I40E_GLV_UPTCH_GLVUPTCH_MASK I40E_MASK(0xFFFF, I40E_GLV_UPTCH_GLVUPTCH_SHIFT)
+#define I40E_GLV_UPTCL(_i) (0x0033c000 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_UPTCL_MAX_INDEX 383
+#define I40E_GLV_UPTCL_UPTCL_SHIFT 0
+#define I40E_GLV_UPTCL_UPTCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLV_UPTCL_UPTCL_SHIFT)
+#define I40E_GLVEBTC_RBCH(_i, _j) (0x00364004 + ((_i) * 8 + (_j) * 64)) /* _i=0...7, _j=0...15 */ /* Reset: CORER */
+#define I40E_GLVEBTC_RBCH_MAX_INDEX 7
+#define I40E_GLVEBTC_RBCH_TCBCH_SHIFT 0
+#define I40E_GLVEBTC_RBCH_TCBCH_MASK I40E_MASK(0xFFFF, I40E_GLVEBTC_RBCH_TCBCH_SHIFT)
+#define I40E_GLVEBTC_RBCL(_i, _j) (0x00364000 + ((_i) * 8 + (_j) * 64)) /* _i=0...7, _j=0...15 */ /* Reset: CORER */
+#define I40E_GLVEBTC_RBCL_MAX_INDEX 7
+#define I40E_GLVEBTC_RBCL_TCBCL_SHIFT 0
+#define I40E_GLVEBTC_RBCL_TCBCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLVEBTC_RBCL_TCBCL_SHIFT)
+#define I40E_GLVEBTC_RPCH(_i, _j) (0x00368004 + ((_i) * 8 + (_j) * 64)) /* _i=0...7, _j=0...15 */ /* Reset: CORER */
+#define I40E_GLVEBTC_RPCH_MAX_INDEX 7
+#define I40E_GLVEBTC_RPCH_TCPCH_SHIFT 0
+#define I40E_GLVEBTC_RPCH_TCPCH_MASK I40E_MASK(0xFFFF, I40E_GLVEBTC_RPCH_TCPCH_SHIFT)
+#define I40E_GLVEBTC_RPCL(_i, _j) (0x00368000 + ((_i) * 8 + (_j) * 64)) /* _i=0...7, _j=0...15 */ /* Reset: CORER */
+#define I40E_GLVEBTC_RPCL_MAX_INDEX 7
+#define I40E_GLVEBTC_RPCL_TCPCL_SHIFT 0
+#define I40E_GLVEBTC_RPCL_TCPCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLVEBTC_RPCL_TCPCL_SHIFT)
+#define I40E_GLVEBTC_TBCH(_i, _j) (0x00334004 + ((_i) * 8 + (_j) * 64)) /* _i=0...7, _j=0...15 */ /* Reset: CORER */
+#define I40E_GLVEBTC_TBCH_MAX_INDEX 7
+#define I40E_GLVEBTC_TBCH_TCBCH_SHIFT 0
+#define I40E_GLVEBTC_TBCH_TCBCH_MASK I40E_MASK(0xFFFF, I40E_GLVEBTC_TBCH_TCBCH_SHIFT)
+#define I40E_GLVEBTC_TBCL(_i, _j) (0x00334000 + ((_i) * 8 + (_j) * 64)) /* _i=0...7, _j=0...15 */ /* Reset: CORER */
+#define I40E_GLVEBTC_TBCL_MAX_INDEX 7
+#define I40E_GLVEBTC_TBCL_TCBCL_SHIFT 0
+#define I40E_GLVEBTC_TBCL_TCBCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLVEBTC_TBCL_TCBCL_SHIFT)
+#define I40E_GLVEBTC_TPCH(_i, _j) (0x00338004 + ((_i) * 8 + (_j) * 64)) /* _i=0...7, _j=0...15 */ /* Reset: CORER */
+#define I40E_GLVEBTC_TPCH_MAX_INDEX 7
+#define I40E_GLVEBTC_TPCH_TCPCH_SHIFT 0
+#define I40E_GLVEBTC_TPCH_TCPCH_MASK I40E_MASK(0xFFFF, I40E_GLVEBTC_TPCH_TCPCH_SHIFT)
+#define I40E_GLVEBTC_TPCL(_i, _j) (0x00338000 + ((_i) * 8 + (_j) * 64)) /* _i=0...7, _j=0...15 */ /* Reset: CORER */
+#define I40E_GLVEBTC_TPCL_MAX_INDEX 7
+#define I40E_GLVEBTC_TPCL_TCPCL_SHIFT 0
+#define I40E_GLVEBTC_TPCL_TCPCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLVEBTC_TPCL_TCPCL_SHIFT)
+#define I40E_GLVEBVL_BPCH(_i) (0x00374804 + ((_i) * 8)) /* _i=0...127 */ /* Reset: CORER */
+#define I40E_GLVEBVL_BPCH_MAX_INDEX 127
+#define I40E_GLVEBVL_BPCH_VLBPCH_SHIFT 0
+#define I40E_GLVEBVL_BPCH_VLBPCH_MASK I40E_MASK(0xFFFF, I40E_GLVEBVL_BPCH_VLBPCH_SHIFT)
+#define I40E_GLVEBVL_BPCL(_i) (0x00374800 + ((_i) * 8)) /* _i=0...127 */ /* Reset: CORER */
+#define I40E_GLVEBVL_BPCL_MAX_INDEX 127
+#define I40E_GLVEBVL_BPCL_VLBPCL_SHIFT 0
+#define I40E_GLVEBVL_BPCL_VLBPCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLVEBVL_BPCL_VLBPCL_SHIFT)
+#define I40E_GLVEBVL_GORCH(_i) (0x00360004 + ((_i) * 8)) /* _i=0...127 */ /* Reset: CORER */
+#define I40E_GLVEBVL_GORCH_MAX_INDEX 127
+#define I40E_GLVEBVL_GORCH_VLBCH_SHIFT 0
+#define I40E_GLVEBVL_GORCH_VLBCH_MASK I40E_MASK(0xFFFF, I40E_GLVEBVL_GORCH_VLBCH_SHIFT)
+#define I40E_GLVEBVL_GORCL(_i) (0x00360000 + ((_i) * 8)) /* _i=0...127 */ /* Reset: CORER */
+#define I40E_GLVEBVL_GORCL_MAX_INDEX 127
+#define I40E_GLVEBVL_GORCL_VLBCL_SHIFT 0
+#define I40E_GLVEBVL_GORCL_VLBCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLVEBVL_GORCL_VLBCL_SHIFT)
+#define I40E_GLVEBVL_GOTCH(_i) (0x00330004 + ((_i) * 8)) /* _i=0...127 */ /* Reset: CORER */
+#define I40E_GLVEBVL_GOTCH_MAX_INDEX 127
+#define I40E_GLVEBVL_GOTCH_VLBCH_SHIFT 0
+#define I40E_GLVEBVL_GOTCH_VLBCH_MASK I40E_MASK(0xFFFF, I40E_GLVEBVL_GOTCH_VLBCH_SHIFT)
+#define I40E_GLVEBVL_GOTCL(_i) (0x00330000 + ((_i) * 8)) /* _i=0...127 */ /* Reset: CORER */
+#define I40E_GLVEBVL_GOTCL_MAX_INDEX 127
+#define I40E_GLVEBVL_GOTCL_VLBCL_SHIFT 0
+#define I40E_GLVEBVL_GOTCL_VLBCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLVEBVL_GOTCL_VLBCL_SHIFT)
+#define I40E_GLVEBVL_MPCH(_i) (0x00374404 + ((_i) * 8)) /* _i=0...127 */ /* Reset: CORER */
+#define I40E_GLVEBVL_MPCH_MAX_INDEX 127
+#define I40E_GLVEBVL_MPCH_VLMPCH_SHIFT 0
+#define I40E_GLVEBVL_MPCH_VLMPCH_MASK I40E_MASK(0xFFFF, I40E_GLVEBVL_MPCH_VLMPCH_SHIFT)
+#define I40E_GLVEBVL_MPCL(_i) (0x00374400 + ((_i) * 8)) /* _i=0...127 */ /* Reset: CORER */
+#define I40E_GLVEBVL_MPCL_MAX_INDEX 127
+#define I40E_GLVEBVL_MPCL_VLMPCL_SHIFT 0
+#define I40E_GLVEBVL_MPCL_VLMPCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLVEBVL_MPCL_VLMPCL_SHIFT)
+#define I40E_GLVEBVL_UPCH(_i) (0x00374004 + ((_i) * 8)) /* _i=0...127 */ /* Reset: CORER */
+#define I40E_GLVEBVL_UPCH_MAX_INDEX 127
+#define I40E_GLVEBVL_UPCH_VLUPCH_SHIFT 0
+#define I40E_GLVEBVL_UPCH_VLUPCH_MASK I40E_MASK(0xFFFF, I40E_GLVEBVL_UPCH_VLUPCH_SHIFT)
+#define I40E_GLVEBVL_UPCL(_i) (0x00374000 + ((_i) * 8)) /* _i=0...127 */ /* Reset: CORER */
+#define I40E_GLVEBVL_UPCL_MAX_INDEX 127
+#define I40E_GLVEBVL_UPCL_VLUPCL_SHIFT 0
+#define I40E_GLVEBVL_UPCL_VLUPCL_MASK I40E_MASK(0xFFFFFFFF, I40E_GLVEBVL_UPCL_VLUPCL_SHIFT)
+#define I40E_GL_MTG_FLU_MSK_H 0x00269F4C /* Reset: CORER */
+#define I40E_GL_MTG_FLU_MSK_H_MASK_HIGH_SHIFT 0
+#define I40E_GL_MTG_FLU_MSK_H_MASK_HIGH_MASK I40E_MASK(0xFFFF, I40E_GL_MTG_FLU_MSK_H_MASK_HIGH_SHIFT)
+#define I40E_GL_SWR_DEF_ACT(_i) (0x00270200 + ((_i) * 4)) /* _i=0...35 */ /* Reset: CORER */
+#define I40E_GL_SWR_DEF_ACT_MAX_INDEX 35
+#define I40E_GL_SWR_DEF_ACT_DEF_ACTION_SHIFT 0
+#define I40E_GL_SWR_DEF_ACT_DEF_ACTION_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_SWR_DEF_ACT_DEF_ACTION_SHIFT)
+#define I40E_GL_SWR_DEF_ACT_EN(_i) (0x0026CFB8 + ((_i) * 4)) /* _i=0...1 */ /* Reset: CORER */
+#define I40E_GL_SWR_DEF_ACT_EN_MAX_INDEX 1
+#define I40E_GL_SWR_DEF_ACT_EN_DEF_ACT_EN_BITMAP_SHIFT 0
+#define I40E_GL_SWR_DEF_ACT_EN_DEF_ACT_EN_BITMAP_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_SWR_DEF_ACT_EN_DEF_ACT_EN_BITMAP_SHIFT)
+#define I40E_PRTTSYN_ADJ 0x001E4280 /* Reset: GLOBR */
+#define I40E_PRTTSYN_ADJ_TSYNADJ_SHIFT 0
+#define I40E_PRTTSYN_ADJ_TSYNADJ_MASK I40E_MASK(0x7FFFFFFF, I40E_PRTTSYN_ADJ_TSYNADJ_SHIFT)
+#define I40E_PRTTSYN_ADJ_SIGN_SHIFT 31
+#define I40E_PRTTSYN_ADJ_SIGN_MASK I40E_MASK(0x1, I40E_PRTTSYN_ADJ_SIGN_SHIFT)
+#define I40E_PRTTSYN_AUX_0(_i) (0x001E42A0 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */
+#define I40E_PRTTSYN_AUX_0_MAX_INDEX 1
+#define I40E_PRTTSYN_AUX_0_OUT_ENA_SHIFT 0
+#define I40E_PRTTSYN_AUX_0_OUT_ENA_MASK I40E_MASK(0x1, I40E_PRTTSYN_AUX_0_OUT_ENA_SHIFT)
+#define I40E_PRTTSYN_AUX_0_OUTMOD_SHIFT 1
+#define I40E_PRTTSYN_AUX_0_OUTMOD_MASK I40E_MASK(0x3, I40E_PRTTSYN_AUX_0_OUTMOD_SHIFT)
+#define I40E_PRTTSYN_AUX_0_OUTLVL_SHIFT 3
+#define I40E_PRTTSYN_AUX_0_OUTLVL_MASK I40E_MASK(0x1, I40E_PRTTSYN_AUX_0_OUTLVL_SHIFT)
+#define I40E_PRTTSYN_AUX_0_PULSEW_SHIFT 8
+#define I40E_PRTTSYN_AUX_0_PULSEW_MASK I40E_MASK(0xF, I40E_PRTTSYN_AUX_0_PULSEW_SHIFT)
+#define I40E_PRTTSYN_AUX_0_EVNTLVL_SHIFT 16
+#define I40E_PRTTSYN_AUX_0_EVNTLVL_MASK I40E_MASK(0x3, I40E_PRTTSYN_AUX_0_EVNTLVL_SHIFT)
+#define I40E_PRTTSYN_AUX_1(_i) (0x001E42E0 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */
+#define I40E_PRTTSYN_AUX_1_MAX_INDEX 1
+#define I40E_PRTTSYN_AUX_1_INSTNT_SHIFT 0
+#define I40E_PRTTSYN_AUX_1_INSTNT_MASK I40E_MASK(0x1, I40E_PRTTSYN_AUX_1_INSTNT_SHIFT)
+#define I40E_PRTTSYN_AUX_1_SAMPLE_TIME_SHIFT 1
+#define I40E_PRTTSYN_AUX_1_SAMPLE_TIME_MASK I40E_MASK(0x1, I40E_PRTTSYN_AUX_1_SAMPLE_TIME_SHIFT)
+#define I40E_PRTTSYN_CLKO(_i) (0x001E4240 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */
+#define I40E_PRTTSYN_CLKO_MAX_INDEX 1
+#define I40E_PRTTSYN_CLKO_TSYNCLKO_SHIFT 0
+#define I40E_PRTTSYN_CLKO_TSYNCLKO_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTTSYN_CLKO_TSYNCLKO_SHIFT)
+#define I40E_PRTTSYN_CTL0 0x001E4200 /* Reset: GLOBR */
+#define I40E_PRTTSYN_CTL0_CLEAR_TSYNTIMER_SHIFT 0
+#define I40E_PRTTSYN_CTL0_CLEAR_TSYNTIMER_MASK I40E_MASK(0x1, I40E_PRTTSYN_CTL0_CLEAR_TSYNTIMER_SHIFT)
+#define I40E_PRTTSYN_CTL0_TXTIME_INT_ENA_SHIFT 1
+#define I40E_PRTTSYN_CTL0_TXTIME_INT_ENA_MASK I40E_MASK(0x1, I40E_PRTTSYN_CTL0_TXTIME_INT_ENA_SHIFT)
+#define I40E_PRTTSYN_CTL0_EVENT_INT_ENA_SHIFT 2
+#define I40E_PRTTSYN_CTL0_EVENT_INT_ENA_MASK I40E_MASK(0x1, I40E_PRTTSYN_CTL0_EVENT_INT_ENA_SHIFT)
+#define I40E_PRTTSYN_CTL0_TGT_INT_ENA_SHIFT 3
+#define I40E_PRTTSYN_CTL0_TGT_INT_ENA_MASK I40E_MASK(0x1, I40E_PRTTSYN_CTL0_TGT_INT_ENA_SHIFT)
+#define I40E_PRTTSYN_CTL0_PF_ID_SHIFT 8
+#define I40E_PRTTSYN_CTL0_PF_ID_MASK I40E_MASK(0xF, I40E_PRTTSYN_CTL0_PF_ID_SHIFT)
+#define I40E_PRTTSYN_CTL0_TSYNACT_SHIFT 12
+#define I40E_PRTTSYN_CTL0_TSYNACT_MASK I40E_MASK(0x3, I40E_PRTTSYN_CTL0_TSYNACT_SHIFT)
+#define I40E_PRTTSYN_CTL0_TSYNENA_SHIFT 31
+#define I40E_PRTTSYN_CTL0_TSYNENA_MASK I40E_MASK(0x1, I40E_PRTTSYN_CTL0_TSYNENA_SHIFT)
+#define I40E_PRTTSYN_CTL1 0x00085020 /* Reset: CORER */
+#define I40E_PRTTSYN_CTL1_V1MESSTYPE0_SHIFT 0
+#define I40E_PRTTSYN_CTL1_V1MESSTYPE0_MASK I40E_MASK(0xFF, I40E_PRTTSYN_CTL1_V1MESSTYPE0_SHIFT)
+#define I40E_PRTTSYN_CTL1_V1MESSTYPE1_SHIFT 8
+#define I40E_PRTTSYN_CTL1_V1MESSTYPE1_MASK I40E_MASK(0xFF, I40E_PRTTSYN_CTL1_V1MESSTYPE1_SHIFT)
+#define I40E_PRTTSYN_CTL1_V2MESSTYPE0_SHIFT 16
+#define I40E_PRTTSYN_CTL1_V2MESSTYPE0_MASK I40E_MASK(0xF, I40E_PRTTSYN_CTL1_V2MESSTYPE0_SHIFT)
+#define I40E_PRTTSYN_CTL1_V2MESSTYPE1_SHIFT 20
+#define I40E_PRTTSYN_CTL1_V2MESSTYPE1_MASK I40E_MASK(0xF, I40E_PRTTSYN_CTL1_V2MESSTYPE1_SHIFT)
+#define I40E_PRTTSYN_CTL1_TSYNTYPE_SHIFT 24
+#define I40E_PRTTSYN_CTL1_TSYNTYPE_MASK I40E_MASK(0x3, I40E_PRTTSYN_CTL1_TSYNTYPE_SHIFT)
+#define I40E_PRTTSYN_CTL1_UDP_ENA_SHIFT 26
+#define I40E_PRTTSYN_CTL1_UDP_ENA_MASK I40E_MASK(0x3, I40E_PRTTSYN_CTL1_UDP_ENA_SHIFT)
+#define I40E_PRTTSYN_CTL1_TSYNENA_SHIFT 31
+#define I40E_PRTTSYN_CTL1_TSYNENA_MASK I40E_MASK(0x1, I40E_PRTTSYN_CTL1_TSYNENA_SHIFT)
+#define I40E_PRTTSYN_EVNT_H(_i) (0x001E40C0 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */
+#define I40E_PRTTSYN_EVNT_H_MAX_INDEX 1
+#define I40E_PRTTSYN_EVNT_H_TSYNEVNT_H_SHIFT 0
+#define I40E_PRTTSYN_EVNT_H_TSYNEVNT_H_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTTSYN_EVNT_H_TSYNEVNT_H_SHIFT)
+#define I40E_PRTTSYN_EVNT_L(_i) (0x001E4080 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */
+#define I40E_PRTTSYN_EVNT_L_MAX_INDEX 1
+#define I40E_PRTTSYN_EVNT_L_TSYNEVNT_L_SHIFT 0
+#define I40E_PRTTSYN_EVNT_L_TSYNEVNT_L_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTTSYN_EVNT_L_TSYNEVNT_L_SHIFT)
+#define I40E_PRTTSYN_INC_H 0x001E4060 /* Reset: GLOBR */
+#define I40E_PRTTSYN_INC_H_TSYNINC_H_SHIFT 0
+#define I40E_PRTTSYN_INC_H_TSYNINC_H_MASK I40E_MASK(0x3F, I40E_PRTTSYN_INC_H_TSYNINC_H_SHIFT)
+#define I40E_PRTTSYN_INC_L 0x001E4040 /* Reset: GLOBR */
+#define I40E_PRTTSYN_INC_L_TSYNINC_L_SHIFT 0
+#define I40E_PRTTSYN_INC_L_TSYNINC_L_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTTSYN_INC_L_TSYNINC_L_SHIFT)
+#define I40E_PRTTSYN_RXTIME_H(_i) (0x00085040 + ((_i) * 32)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_PRTTSYN_RXTIME_H_MAX_INDEX 3
+#define I40E_PRTTSYN_RXTIME_H_RXTIEM_H_SHIFT 0
+#define I40E_PRTTSYN_RXTIME_H_RXTIEM_H_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTTSYN_RXTIME_H_RXTIEM_H_SHIFT)
+#define I40E_PRTTSYN_RXTIME_L(_i) (0x000850C0 + ((_i) * 32)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_PRTTSYN_RXTIME_L_MAX_INDEX 3
+#define I40E_PRTTSYN_RXTIME_L_RXTIEM_L_SHIFT 0
+#define I40E_PRTTSYN_RXTIME_L_RXTIEM_L_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTTSYN_RXTIME_L_RXTIEM_L_SHIFT)
+#define I40E_PRTTSYN_STAT_0 0x001E4220 /* Reset: GLOBR */
+#define I40E_PRTTSYN_STAT_0_EVENT0_SHIFT 0
+#define I40E_PRTTSYN_STAT_0_EVENT0_MASK I40E_MASK(0x1, I40E_PRTTSYN_STAT_0_EVENT0_SHIFT)
+#define I40E_PRTTSYN_STAT_0_EVENT1_SHIFT 1
+#define I40E_PRTTSYN_STAT_0_EVENT1_MASK I40E_MASK(0x1, I40E_PRTTSYN_STAT_0_EVENT1_SHIFT)
+#define I40E_PRTTSYN_STAT_0_TGT0_SHIFT 2
+#define I40E_PRTTSYN_STAT_0_TGT0_MASK I40E_MASK(0x1, I40E_PRTTSYN_STAT_0_TGT0_SHIFT)
+#define I40E_PRTTSYN_STAT_0_TGT1_SHIFT 3
+#define I40E_PRTTSYN_STAT_0_TGT1_MASK I40E_MASK(0x1, I40E_PRTTSYN_STAT_0_TGT1_SHIFT)
+#define I40E_PRTTSYN_STAT_0_TXTIME_SHIFT 4
+#define I40E_PRTTSYN_STAT_0_TXTIME_MASK I40E_MASK(0x1, I40E_PRTTSYN_STAT_0_TXTIME_SHIFT)
+#define I40E_PRTTSYN_STAT_1 0x00085140 /* Reset: CORER */
+#define I40E_PRTTSYN_STAT_1_RXT0_SHIFT 0
+#define I40E_PRTTSYN_STAT_1_RXT0_MASK I40E_MASK(0x1, I40E_PRTTSYN_STAT_1_RXT0_SHIFT)
+#define I40E_PRTTSYN_STAT_1_RXT1_SHIFT 1
+#define I40E_PRTTSYN_STAT_1_RXT1_MASK I40E_MASK(0x1, I40E_PRTTSYN_STAT_1_RXT1_SHIFT)
+#define I40E_PRTTSYN_STAT_1_RXT2_SHIFT 2
+#define I40E_PRTTSYN_STAT_1_RXT2_MASK I40E_MASK(0x1, I40E_PRTTSYN_STAT_1_RXT2_SHIFT)
+#define I40E_PRTTSYN_STAT_1_RXT3_SHIFT 3
+#define I40E_PRTTSYN_STAT_1_RXT3_MASK I40E_MASK(0x1, I40E_PRTTSYN_STAT_1_RXT3_SHIFT)
+#define I40E_PRTTSYN_TGT_H(_i) (0x001E4180 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */
+#define I40E_PRTTSYN_TGT_H_MAX_INDEX 1
+#define I40E_PRTTSYN_TGT_H_TSYNTGTT_H_SHIFT 0
+#define I40E_PRTTSYN_TGT_H_TSYNTGTT_H_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTTSYN_TGT_H_TSYNTGTT_H_SHIFT)
+#define I40E_PRTTSYN_TGT_L(_i) (0x001E4140 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */
+#define I40E_PRTTSYN_TGT_L_MAX_INDEX 1
+#define I40E_PRTTSYN_TGT_L_TSYNTGTT_L_SHIFT 0
+#define I40E_PRTTSYN_TGT_L_TSYNTGTT_L_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTTSYN_TGT_L_TSYNTGTT_L_SHIFT)
+#define I40E_PRTTSYN_TIME_H 0x001E4120 /* Reset: GLOBR */
+#define I40E_PRTTSYN_TIME_H_TSYNTIME_H_SHIFT 0
+#define I40E_PRTTSYN_TIME_H_TSYNTIME_H_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTTSYN_TIME_H_TSYNTIME_H_SHIFT)
+#define I40E_PRTTSYN_TIME_L 0x001E4100 /* Reset: GLOBR */
+#define I40E_PRTTSYN_TIME_L_TSYNTIME_L_SHIFT 0
+#define I40E_PRTTSYN_TIME_L_TSYNTIME_L_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTTSYN_TIME_L_TSYNTIME_L_SHIFT)
+#define I40E_PRTTSYN_TXTIME_H 0x001E41E0 /* Reset: GLOBR */
+#define I40E_PRTTSYN_TXTIME_H_TXTIEM_H_SHIFT 0
+#define I40E_PRTTSYN_TXTIME_H_TXTIEM_H_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTTSYN_TXTIME_H_TXTIEM_H_SHIFT)
+#define I40E_PRTTSYN_TXTIME_L 0x001E41C0 /* Reset: GLOBR */
+#define I40E_PRTTSYN_TXTIME_L_TXTIEM_L_SHIFT 0
+#define I40E_PRTTSYN_TXTIME_L_TXTIEM_L_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTTSYN_TXTIME_L_TXTIEM_L_SHIFT)
+#define I40E_GL_MDET_RX 0x0012A510 /* Reset: CORER */
+#define I40E_GL_MDET_RX_FUNCTION_SHIFT 0
+#define I40E_GL_MDET_RX_FUNCTION_MASK I40E_MASK(0xFF, I40E_GL_MDET_RX_FUNCTION_SHIFT)
+#define I40E_GL_MDET_RX_EVENT_SHIFT 8
+#define I40E_GL_MDET_RX_EVENT_MASK I40E_MASK(0x1FF, I40E_GL_MDET_RX_EVENT_SHIFT)
+#define I40E_GL_MDET_RX_QUEUE_SHIFT 17
+#define I40E_GL_MDET_RX_QUEUE_MASK I40E_MASK(0x3FFF, I40E_GL_MDET_RX_QUEUE_SHIFT)
+#define I40E_GL_MDET_RX_VALID_SHIFT 31
+#define I40E_GL_MDET_RX_VALID_MASK I40E_MASK(0x1, I40E_GL_MDET_RX_VALID_SHIFT)
+#define I40E_GL_MDET_TX 0x000E6480 /* Reset: CORER */
+#define I40E_GL_MDET_TX_QUEUE_SHIFT 0
+#define I40E_GL_MDET_TX_QUEUE_MASK I40E_MASK(0xFFF, I40E_GL_MDET_TX_QUEUE_SHIFT)
+#define I40E_GL_MDET_TX_VF_NUM_SHIFT 12
+#define I40E_GL_MDET_TX_VF_NUM_MASK I40E_MASK(0x1FF, I40E_GL_MDET_TX_VF_NUM_SHIFT)
+#define I40E_GL_MDET_TX_PF_NUM_SHIFT 21
+#define I40E_GL_MDET_TX_PF_NUM_MASK I40E_MASK(0xF, I40E_GL_MDET_TX_PF_NUM_SHIFT)
+#define I40E_GL_MDET_TX_EVENT_SHIFT 25
+#define I40E_GL_MDET_TX_EVENT_MASK I40E_MASK(0x1F, I40E_GL_MDET_TX_EVENT_SHIFT)
+#define I40E_GL_MDET_TX_VALID_SHIFT 31
+#define I40E_GL_MDET_TX_VALID_MASK I40E_MASK(0x1, I40E_GL_MDET_TX_VALID_SHIFT)
+#define I40E_PF_MDET_RX 0x0012A400 /* Reset: CORER */
+#define I40E_PF_MDET_RX_VALID_SHIFT 0
+#define I40E_PF_MDET_RX_VALID_MASK I40E_MASK(0x1, I40E_PF_MDET_RX_VALID_SHIFT)
+#define I40E_PF_MDET_TX 0x000E6400 /* Reset: CORER */
+#define I40E_PF_MDET_TX_VALID_SHIFT 0
+#define I40E_PF_MDET_TX_VALID_MASK I40E_MASK(0x1, I40E_PF_MDET_TX_VALID_SHIFT)
+#define I40E_PF_VT_PFALLOC 0x001C0500 /* Reset: CORER */
+#define I40E_PF_VT_PFALLOC_FIRSTVF_SHIFT 0
+#define I40E_PF_VT_PFALLOC_FIRSTVF_MASK I40E_MASK(0xFF, I40E_PF_VT_PFALLOC_FIRSTVF_SHIFT)
+#define I40E_PF_VT_PFALLOC_LASTVF_SHIFT 8
+#define I40E_PF_VT_PFALLOC_LASTVF_MASK I40E_MASK(0xFF, I40E_PF_VT_PFALLOC_LASTVF_SHIFT)
+#define I40E_PF_VT_PFALLOC_VALID_SHIFT 31
+#define I40E_PF_VT_PFALLOC_VALID_MASK I40E_MASK(0x1, I40E_PF_VT_PFALLOC_VALID_SHIFT)
+#define I40E_VP_MDET_RX(_VF) (0x0012A000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */
+#define I40E_VP_MDET_RX_MAX_INDEX 127
+#define I40E_VP_MDET_RX_VALID_SHIFT 0
+#define I40E_VP_MDET_RX_VALID_MASK I40E_MASK(0x1, I40E_VP_MDET_RX_VALID_SHIFT)
+#define I40E_VP_MDET_TX(_VF) (0x000E6000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */
+#define I40E_VP_MDET_TX_MAX_INDEX 127
+#define I40E_VP_MDET_TX_VALID_SHIFT 0
+#define I40E_VP_MDET_TX_VALID_MASK I40E_MASK(0x1, I40E_VP_MDET_TX_VALID_SHIFT)
+#define I40E_GLPM_WUMC 0x0006C800 /* Reset: POR */
+#define I40E_GLPM_WUMC_NOTCO_SHIFT 0
+#define I40E_GLPM_WUMC_NOTCO_MASK I40E_MASK(0x1, I40E_GLPM_WUMC_NOTCO_SHIFT)
+#define I40E_GLPM_WUMC_SRST_PIN_VAL_SHIFT 1
+#define I40E_GLPM_WUMC_SRST_PIN_VAL_MASK I40E_MASK(0x1, I40E_GLPM_WUMC_SRST_PIN_VAL_SHIFT)
+#define I40E_GLPM_WUMC_ROL_MODE_SHIFT 2
+#define I40E_GLPM_WUMC_ROL_MODE_MASK I40E_MASK(0x1, I40E_GLPM_WUMC_ROL_MODE_SHIFT)
+#define I40E_GLPM_WUMC_RESERVED_4_SHIFT 3
+#define I40E_GLPM_WUMC_RESERVED_4_MASK I40E_MASK(0x1FFF, I40E_GLPM_WUMC_RESERVED_4_SHIFT)
+#define I40E_GLPM_WUMC_MNG_WU_PF_SHIFT 16
+#define I40E_GLPM_WUMC_MNG_WU_PF_MASK I40E_MASK(0xFFFF, I40E_GLPM_WUMC_MNG_WU_PF_SHIFT)
+#define I40E_PFPM_APM 0x000B8080 /* Reset: POR */
+#define I40E_PFPM_APM_APME_SHIFT 0
+#define I40E_PFPM_APM_APME_MASK I40E_MASK(0x1, I40E_PFPM_APM_APME_SHIFT)
+#define I40E_PFPM_FHFT_LENGTH(_i) (0x0006A000 + ((_i) * 128)) /* _i=0...7 */ /* Reset: POR */
+#define I40E_PFPM_FHFT_LENGTH_MAX_INDEX 7
+#define I40E_PFPM_FHFT_LENGTH_LENGTH_SHIFT 0
+#define I40E_PFPM_FHFT_LENGTH_LENGTH_MASK I40E_MASK(0xFF, I40E_PFPM_FHFT_LENGTH_LENGTH_SHIFT)
+#define I40E_PFPM_WUC 0x0006B200 /* Reset: POR */
+#define I40E_PFPM_WUC_EN_APM_D0_SHIFT 5
+#define I40E_PFPM_WUC_EN_APM_D0_MASK I40E_MASK(0x1, I40E_PFPM_WUC_EN_APM_D0_SHIFT)
+#define I40E_PFPM_WUFC 0x0006B400 /* Reset: POR */
+#define I40E_PFPM_WUFC_LNKC_SHIFT 0
+#define I40E_PFPM_WUFC_LNKC_MASK I40E_MASK(0x1, I40E_PFPM_WUFC_LNKC_SHIFT)
+#define I40E_PFPM_WUFC_MAG_SHIFT 1
+#define I40E_PFPM_WUFC_MAG_MASK I40E_MASK(0x1, I40E_PFPM_WUFC_MAG_SHIFT)
+#define I40E_PFPM_WUFC_MNG_SHIFT 3
+#define I40E_PFPM_WUFC_MNG_MASK I40E_MASK(0x1, I40E_PFPM_WUFC_MNG_SHIFT)
+#define I40E_PFPM_WUFC_FLX0_ACT_SHIFT 4
+#define I40E_PFPM_WUFC_FLX0_ACT_MASK I40E_MASK(0x1, I40E_PFPM_WUFC_FLX0_ACT_SHIFT)
+#define I40E_PFPM_WUFC_FLX1_ACT_SHIFT 5
+#define I40E_PFPM_WUFC_FLX1_ACT_MASK I40E_MASK(0x1, I40E_PFPM_WUFC_FLX1_ACT_SHIFT)
+#define I40E_PFPM_WUFC_FLX2_ACT_SHIFT 6
+#define I40E_PFPM_WUFC_FLX2_ACT_MASK I40E_MASK(0x1, I40E_PFPM_WUFC_FLX2_ACT_SHIFT)
+#define I40E_PFPM_WUFC_FLX3_ACT_SHIFT 7
+#define I40E_PFPM_WUFC_FLX3_ACT_MASK I40E_MASK(0x1, I40E_PFPM_WUFC_FLX3_ACT_SHIFT)
+#define I40E_PFPM_WUFC_FLX4_ACT_SHIFT 8
+#define I40E_PFPM_WUFC_FLX4_ACT_MASK I40E_MASK(0x1, I40E_PFPM_WUFC_FLX4_ACT_SHIFT)
+#define I40E_PFPM_WUFC_FLX5_ACT_SHIFT 9
+#define I40E_PFPM_WUFC_FLX5_ACT_MASK I40E_MASK(0x1, I40E_PFPM_WUFC_FLX5_ACT_SHIFT)
+#define I40E_PFPM_WUFC_FLX6_ACT_SHIFT 10
+#define I40E_PFPM_WUFC_FLX6_ACT_MASK I40E_MASK(0x1, I40E_PFPM_WUFC_FLX6_ACT_SHIFT)
+#define I40E_PFPM_WUFC_FLX7_ACT_SHIFT 11
+#define I40E_PFPM_WUFC_FLX7_ACT_MASK I40E_MASK(0x1, I40E_PFPM_WUFC_FLX7_ACT_SHIFT)
+#define I40E_PFPM_WUFC_FLX0_SHIFT 16
+#define I40E_PFPM_WUFC_FLX0_MASK I40E_MASK(0x1, I40E_PFPM_WUFC_FLX0_SHIFT)
+#define I40E_PFPM_WUFC_FLX1_SHIFT 17
+#define I40E_PFPM_WUFC_FLX1_MASK I40E_MASK(0x1, I40E_PFPM_WUFC_FLX1_SHIFT)
+#define I40E_PFPM_WUFC_FLX2_SHIFT 18
+#define I40E_PFPM_WUFC_FLX2_MASK I40E_MASK(0x1, I40E_PFPM_WUFC_FLX2_SHIFT)
+#define I40E_PFPM_WUFC_FLX3_SHIFT 19
+#define I40E_PFPM_WUFC_FLX3_MASK I40E_MASK(0x1, I40E_PFPM_WUFC_FLX3_SHIFT)
+#define I40E_PFPM_WUFC_FLX4_SHIFT 20
+#define I40E_PFPM_WUFC_FLX4_MASK I40E_MASK(0x1, I40E_PFPM_WUFC_FLX4_SHIFT)
+#define I40E_PFPM_WUFC_FLX5_SHIFT 21
+#define I40E_PFPM_WUFC_FLX5_MASK I40E_MASK(0x1, I40E_PFPM_WUFC_FLX5_SHIFT)
+#define I40E_PFPM_WUFC_FLX6_SHIFT 22
+#define I40E_PFPM_WUFC_FLX6_MASK I40E_MASK(0x1, I40E_PFPM_WUFC_FLX6_SHIFT)
+#define I40E_PFPM_WUFC_FLX7_SHIFT 23
+#define I40E_PFPM_WUFC_FLX7_MASK I40E_MASK(0x1, I40E_PFPM_WUFC_FLX7_SHIFT)
+#define I40E_PFPM_WUFC_FW_RST_WK_SHIFT 31
+#define I40E_PFPM_WUFC_FW_RST_WK_MASK I40E_MASK(0x1, I40E_PFPM_WUFC_FW_RST_WK_SHIFT)
+#define I40E_PFPM_WUS 0x0006B600 /* Reset: POR */
+#define I40E_PFPM_WUS_LNKC_SHIFT 0
+#define I40E_PFPM_WUS_LNKC_MASK I40E_MASK(0x1, I40E_PFPM_WUS_LNKC_SHIFT)
+#define I40E_PFPM_WUS_MAG_SHIFT 1
+#define I40E_PFPM_WUS_MAG_MASK I40E_MASK(0x1, I40E_PFPM_WUS_MAG_SHIFT)
+#define I40E_PFPM_WUS_PME_STATUS_SHIFT 2
+#define I40E_PFPM_WUS_PME_STATUS_MASK I40E_MASK(0x1, I40E_PFPM_WUS_PME_STATUS_SHIFT)
+#define I40E_PFPM_WUS_MNG_SHIFT 3
+#define I40E_PFPM_WUS_MNG_MASK I40E_MASK(0x1, I40E_PFPM_WUS_MNG_SHIFT)
+#define I40E_PFPM_WUS_FLX0_SHIFT 16
+#define I40E_PFPM_WUS_FLX0_MASK I40E_MASK(0x1, I40E_PFPM_WUS_FLX0_SHIFT)
+#define I40E_PFPM_WUS_FLX1_SHIFT 17
+#define I40E_PFPM_WUS_FLX1_MASK I40E_MASK(0x1, I40E_PFPM_WUS_FLX1_SHIFT)
+#define I40E_PFPM_WUS_FLX2_SHIFT 18
+#define I40E_PFPM_WUS_FLX2_MASK I40E_MASK(0x1, I40E_PFPM_WUS_FLX2_SHIFT)
+#define I40E_PFPM_WUS_FLX3_SHIFT 19
+#define I40E_PFPM_WUS_FLX3_MASK I40E_MASK(0x1, I40E_PFPM_WUS_FLX3_SHIFT)
+#define I40E_PFPM_WUS_FLX4_SHIFT 20
+#define I40E_PFPM_WUS_FLX4_MASK I40E_MASK(0x1, I40E_PFPM_WUS_FLX4_SHIFT)
+#define I40E_PFPM_WUS_FLX5_SHIFT 21
+#define I40E_PFPM_WUS_FLX5_MASK I40E_MASK(0x1, I40E_PFPM_WUS_FLX5_SHIFT)
+#define I40E_PFPM_WUS_FLX6_SHIFT 22
+#define I40E_PFPM_WUS_FLX6_MASK I40E_MASK(0x1, I40E_PFPM_WUS_FLX6_SHIFT)
+#define I40E_PFPM_WUS_FLX7_SHIFT 23
+#define I40E_PFPM_WUS_FLX7_MASK I40E_MASK(0x1, I40E_PFPM_WUS_FLX7_SHIFT)
+#define I40E_PFPM_WUS_FW_RST_WK_SHIFT 31
+#define I40E_PFPM_WUS_FW_RST_WK_MASK I40E_MASK(0x1, I40E_PFPM_WUS_FW_RST_WK_SHIFT)
+#define I40E_PRTPM_FHFHR 0x0006C000 /* Reset: POR */
+#define I40E_PRTPM_FHFHR_UNICAST_SHIFT 0
+#define I40E_PRTPM_FHFHR_UNICAST_MASK I40E_MASK(0x1, I40E_PRTPM_FHFHR_UNICAST_SHIFT)
+#define I40E_PRTPM_FHFHR_MULTICAST_SHIFT 1
+#define I40E_PRTPM_FHFHR_MULTICAST_MASK I40E_MASK(0x1, I40E_PRTPM_FHFHR_MULTICAST_SHIFT)
+#define I40E_PRTPM_SAH(_i) (0x001E44C0 + ((_i) * 32)) /* _i=0...3 */ /* Reset: PFR */
+#define I40E_PRTPM_SAH_MAX_INDEX 3
+#define I40E_PRTPM_SAH_PFPM_SAH_SHIFT 0
+#define I40E_PRTPM_SAH_PFPM_SAH_MASK I40E_MASK(0xFFFF, I40E_PRTPM_SAH_PFPM_SAH_SHIFT)
+#define I40E_PRTPM_SAH_PF_NUM_SHIFT 26
+#define I40E_PRTPM_SAH_PF_NUM_MASK I40E_MASK(0xF, I40E_PRTPM_SAH_PF_NUM_SHIFT)
+#define I40E_PRTPM_SAH_MC_MAG_EN_SHIFT 30
+#define I40E_PRTPM_SAH_MC_MAG_EN_MASK I40E_MASK(0x1, I40E_PRTPM_SAH_MC_MAG_EN_SHIFT)
+#define I40E_PRTPM_SAH_AV_SHIFT 31
+#define I40E_PRTPM_SAH_AV_MASK I40E_MASK(0x1, I40E_PRTPM_SAH_AV_SHIFT)
+#define I40E_PRTPM_SAL(_i) (0x001E4440 + ((_i) * 32)) /* _i=0...3 */ /* Reset: PFR */
+#define I40E_PRTPM_SAL_MAX_INDEX 3
+#define I40E_PRTPM_SAL_PFPM_SAL_SHIFT 0
+#define I40E_PRTPM_SAL_PFPM_SAL_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTPM_SAL_PFPM_SAL_SHIFT)
+#define I40E_VF_ARQBAH1 0x00006000 /* Reset: EMPR */
+#define I40E_VF_ARQBAH1_ARQBAH_SHIFT 0
+#define I40E_VF_ARQBAH1_ARQBAH_MASK I40E_MASK(0xFFFFFFFF, I40E_VF_ARQBAH1_ARQBAH_SHIFT)
+#define I40E_VF_ARQBAL1 0x00006C00 /* Reset: EMPR */
+#define I40E_VF_ARQBAL1_ARQBAL_SHIFT 0
+#define I40E_VF_ARQBAL1_ARQBAL_MASK I40E_MASK(0xFFFFFFFF, I40E_VF_ARQBAL1_ARQBAL_SHIFT)
+#define I40E_VF_ARQH1 0x00007400 /* Reset: EMPR */
+#define I40E_VF_ARQH1_ARQH_SHIFT 0
+#define I40E_VF_ARQH1_ARQH_MASK I40E_MASK(0x3FF, I40E_VF_ARQH1_ARQH_SHIFT)
+#define I40E_VF_ARQLEN1 0x00008000 /* Reset: EMPR */
+#define I40E_VF_ARQLEN1_ARQLEN_SHIFT 0
+#define I40E_VF_ARQLEN1_ARQLEN_MASK I40E_MASK(0x3FF, I40E_VF_ARQLEN1_ARQLEN_SHIFT)
+#define I40E_VF_ARQLEN1_ARQVFE_SHIFT 28
+#define I40E_VF_ARQLEN1_ARQVFE_MASK I40E_MASK(0x1, I40E_VF_ARQLEN1_ARQVFE_SHIFT)
+#define I40E_VF_ARQLEN1_ARQOVFL_SHIFT 29
+#define I40E_VF_ARQLEN1_ARQOVFL_MASK I40E_MASK(0x1, I40E_VF_ARQLEN1_ARQOVFL_SHIFT)
+#define I40E_VF_ARQLEN1_ARQCRIT_SHIFT 30
+#define I40E_VF_ARQLEN1_ARQCRIT_MASK I40E_MASK(0x1, I40E_VF_ARQLEN1_ARQCRIT_SHIFT)
+#define I40E_VF_ARQLEN1_ARQENABLE_SHIFT 31
+#define I40E_VF_ARQLEN1_ARQENABLE_MASK I40E_MASK(0x1, I40E_VF_ARQLEN1_ARQENABLE_SHIFT)
+#define I40E_VF_ARQT1 0x00007000 /* Reset: EMPR */
+#define I40E_VF_ARQT1_ARQT_SHIFT 0
+#define I40E_VF_ARQT1_ARQT_MASK I40E_MASK(0x3FF, I40E_VF_ARQT1_ARQT_SHIFT)
+#define I40E_VF_ATQBAH1 0x00007800 /* Reset: EMPR */
+#define I40E_VF_ATQBAH1_ATQBAH_SHIFT 0
+#define I40E_VF_ATQBAH1_ATQBAH_MASK I40E_MASK(0xFFFFFFFF, I40E_VF_ATQBAH1_ATQBAH_SHIFT)
+#define I40E_VF_ATQBAL1 0x00007C00 /* Reset: EMPR */
+#define I40E_VF_ATQBAL1_ATQBAL_SHIFT 0
+#define I40E_VF_ATQBAL1_ATQBAL_MASK I40E_MASK(0xFFFFFFFF, I40E_VF_ATQBAL1_ATQBAL_SHIFT)
+#define I40E_VF_ATQH1 0x00006400 /* Reset: EMPR */
+#define I40E_VF_ATQH1_ATQH_SHIFT 0
+#define I40E_VF_ATQH1_ATQH_MASK I40E_MASK(0x3FF, I40E_VF_ATQH1_ATQH_SHIFT)
+#define I40E_VF_ATQLEN1 0x00006800 /* Reset: EMPR */
+#define I40E_VF_ATQLEN1_ATQLEN_SHIFT 0
+#define I40E_VF_ATQLEN1_ATQLEN_MASK I40E_MASK(0x3FF, I40E_VF_ATQLEN1_ATQLEN_SHIFT)
+#define I40E_VF_ATQLEN1_ATQVFE_SHIFT 28
+#define I40E_VF_ATQLEN1_ATQVFE_MASK I40E_MASK(0x1, I40E_VF_ATQLEN1_ATQVFE_SHIFT)
+#define I40E_VF_ATQLEN1_ATQOVFL_SHIFT 29
+#define I40E_VF_ATQLEN1_ATQOVFL_MASK I40E_MASK(0x1, I40E_VF_ATQLEN1_ATQOVFL_SHIFT)
+#define I40E_VF_ATQLEN1_ATQCRIT_SHIFT 30
+#define I40E_VF_ATQLEN1_ATQCRIT_MASK I40E_MASK(0x1, I40E_VF_ATQLEN1_ATQCRIT_SHIFT)
+#define I40E_VF_ATQLEN1_ATQENABLE_SHIFT 31
+#define I40E_VF_ATQLEN1_ATQENABLE_MASK I40E_MASK(0x1, I40E_VF_ATQLEN1_ATQENABLE_SHIFT)
+#define I40E_VF_ATQT1 0x00008400 /* Reset: EMPR */
+#define I40E_VF_ATQT1_ATQT_SHIFT 0
+#define I40E_VF_ATQT1_ATQT_MASK I40E_MASK(0x3FF, I40E_VF_ATQT1_ATQT_SHIFT)
+#define I40E_VFGEN_RSTAT 0x00008800 /* Reset: VFR */
+#define I40E_VFGEN_RSTAT_VFR_STATE_SHIFT 0
+#define I40E_VFGEN_RSTAT_VFR_STATE_MASK I40E_MASK(0x3, I40E_VFGEN_RSTAT_VFR_STATE_SHIFT)
+#define I40E_VFINT_DYN_CTL01 0x00005C00 /* Reset: VFR */
+#define I40E_VFINT_DYN_CTL01_INTENA_SHIFT 0
+#define I40E_VFINT_DYN_CTL01_INTENA_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTL01_INTENA_SHIFT)
+#define I40E_VFINT_DYN_CTL01_CLEARPBA_SHIFT 1
+#define I40E_VFINT_DYN_CTL01_CLEARPBA_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTL01_CLEARPBA_SHIFT)
+#define I40E_VFINT_DYN_CTL01_SWINT_TRIG_SHIFT 2
+#define I40E_VFINT_DYN_CTL01_SWINT_TRIG_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTL01_SWINT_TRIG_SHIFT)
+#define I40E_VFINT_DYN_CTL01_ITR_INDX_SHIFT 3
+#define I40E_VFINT_DYN_CTL01_ITR_INDX_MASK I40E_MASK(0x3, I40E_VFINT_DYN_CTL01_ITR_INDX_SHIFT)
+#define I40E_VFINT_DYN_CTL01_INTERVAL_SHIFT 5
+#define I40E_VFINT_DYN_CTL01_INTERVAL_MASK I40E_MASK(0xFFF, I40E_VFINT_DYN_CTL01_INTERVAL_SHIFT)
+#define I40E_VFINT_DYN_CTL01_SW_ITR_INDX_ENA_SHIFT 24
+#define I40E_VFINT_DYN_CTL01_SW_ITR_INDX_ENA_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTL01_SW_ITR_INDX_ENA_SHIFT)
+#define I40E_VFINT_DYN_CTL01_SW_ITR_INDX_SHIFT 25
+#define I40E_VFINT_DYN_CTL01_SW_ITR_INDX_MASK I40E_MASK(0x3, I40E_VFINT_DYN_CTL01_SW_ITR_INDX_SHIFT)
+#define I40E_VFINT_DYN_CTL01_INTENA_MSK_SHIFT 31
+#define I40E_VFINT_DYN_CTL01_INTENA_MSK_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTL01_INTENA_MSK_SHIFT)
+#define I40E_VFINT_DYN_CTLN1(_INTVF) (0x00003800 + ((_INTVF) * 4)) /* _i=0...15 */ /* Reset: VFR */
+#define I40E_VFINT_DYN_CTLN1_MAX_INDEX 15
+#define I40E_VFINT_DYN_CTLN1_INTENA_SHIFT 0
+#define I40E_VFINT_DYN_CTLN1_INTENA_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTLN1_INTENA_SHIFT)
+#define I40E_VFINT_DYN_CTLN1_CLEARPBA_SHIFT 1
+#define I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTLN1_CLEARPBA_SHIFT)
+#define I40E_VFINT_DYN_CTLN1_SWINT_TRIG_SHIFT 2
+#define I40E_VFINT_DYN_CTLN1_SWINT_TRIG_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTLN1_SWINT_TRIG_SHIFT)
+#define I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT 3
+#define I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK I40E_MASK(0x3, I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT)
+#define I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT 5
+#define I40E_VFINT_DYN_CTLN1_INTERVAL_MASK I40E_MASK(0xFFF, I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT)
+#define I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_SHIFT 24
+#define I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_SHIFT)
+#define I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_SHIFT 25
+#define I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_MASK I40E_MASK(0x3, I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_SHIFT)
+#define I40E_VFINT_DYN_CTLN1_INTENA_MSK_SHIFT 31
+#define I40E_VFINT_DYN_CTLN1_INTENA_MSK_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTLN1_INTENA_MSK_SHIFT)
+#define I40E_VFINT_ICR0_ENA1 0x00005000 /* Reset: CORER */
+#define I40E_VFINT_ICR0_ENA1_LINK_STAT_CHANGE_SHIFT 25
+#define I40E_VFINT_ICR0_ENA1_LINK_STAT_CHANGE_MASK I40E_MASK(0x1, I40E_VFINT_ICR0_ENA1_LINK_STAT_CHANGE_SHIFT)
+#define I40E_VFINT_ICR0_ENA1_ADMINQ_SHIFT 30
+#define I40E_VFINT_ICR0_ENA1_ADMINQ_MASK I40E_MASK(0x1, I40E_VFINT_ICR0_ENA1_ADMINQ_SHIFT)
+#define I40E_VFINT_ICR0_ENA1_RSVD_SHIFT 31
+#define I40E_VFINT_ICR0_ENA1_RSVD_MASK I40E_MASK(0x1, I40E_VFINT_ICR0_ENA1_RSVD_SHIFT)
+#define I40E_VFINT_ICR01 0x00004800 /* Reset: CORER */
+#define I40E_VFINT_ICR01_INTEVENT_SHIFT 0
+#define I40E_VFINT_ICR01_INTEVENT_MASK I40E_MASK(0x1, I40E_VFINT_ICR01_INTEVENT_SHIFT)
+#define I40E_VFINT_ICR01_QUEUE_0_SHIFT 1
+#define I40E_VFINT_ICR01_QUEUE_0_MASK I40E_MASK(0x1, I40E_VFINT_ICR01_QUEUE_0_SHIFT)
+#define I40E_VFINT_ICR01_QUEUE_1_SHIFT 2
+#define I40E_VFINT_ICR01_QUEUE_1_MASK I40E_MASK(0x1, I40E_VFINT_ICR01_QUEUE_1_SHIFT)
+#define I40E_VFINT_ICR01_QUEUE_2_SHIFT 3
+#define I40E_VFINT_ICR01_QUEUE_2_MASK I40E_MASK(0x1, I40E_VFINT_ICR01_QUEUE_2_SHIFT)
+#define I40E_VFINT_ICR01_QUEUE_3_SHIFT 4
+#define I40E_VFINT_ICR01_QUEUE_3_MASK I40E_MASK(0x1, I40E_VFINT_ICR01_QUEUE_3_SHIFT)
+#define I40E_VFINT_ICR01_LINK_STAT_CHANGE_SHIFT 25
+#define I40E_VFINT_ICR01_LINK_STAT_CHANGE_MASK I40E_MASK(0x1, I40E_VFINT_ICR01_LINK_STAT_CHANGE_SHIFT)
+#define I40E_VFINT_ICR01_ADMINQ_SHIFT 30
+#define I40E_VFINT_ICR01_ADMINQ_MASK I40E_MASK(0x1, I40E_VFINT_ICR01_ADMINQ_SHIFT)
+#define I40E_VFINT_ICR01_SWINT_SHIFT 31
+#define I40E_VFINT_ICR01_SWINT_MASK I40E_MASK(0x1, I40E_VFINT_ICR01_SWINT_SHIFT)
+#define I40E_VFINT_ITR01(_i) (0x00004C00 + ((_i) * 4)) /* _i=0...2 */ /* Reset: VFR */
+#define I40E_VFINT_ITR01_MAX_INDEX 2
+#define I40E_VFINT_ITR01_INTERVAL_SHIFT 0
+#define I40E_VFINT_ITR01_INTERVAL_MASK I40E_MASK(0xFFF, I40E_VFINT_ITR01_INTERVAL_SHIFT)
+#define I40E_VFINT_ITRN1(_i, _INTVF) (0x00002800 + ((_i) * 64 + (_INTVF) * 4)) /* _i=0...2, _INTVF=0...15 */ /* Reset: VFR */
+#define I40E_VFINT_ITRN1_MAX_INDEX 2
+#define I40E_VFINT_ITRN1_INTERVAL_SHIFT 0
+#define I40E_VFINT_ITRN1_INTERVAL_MASK I40E_MASK(0xFFF, I40E_VFINT_ITRN1_INTERVAL_SHIFT)
+#define I40E_VFINT_STAT_CTL01 0x00005400 /* Reset: CORER */
+#define I40E_VFINT_STAT_CTL01_OTHER_ITR_INDX_SHIFT 2
+#define I40E_VFINT_STAT_CTL01_OTHER_ITR_INDX_MASK I40E_MASK(0x3, I40E_VFINT_STAT_CTL01_OTHER_ITR_INDX_SHIFT)
+#define I40E_QRX_TAIL1(_Q) (0x00002000 + ((_Q) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_QRX_TAIL1_MAX_INDEX 15
+#define I40E_QRX_TAIL1_TAIL_SHIFT 0
+#define I40E_QRX_TAIL1_TAIL_MASK I40E_MASK(0x1FFF, I40E_QRX_TAIL1_TAIL_SHIFT)
+#define I40E_QTX_TAIL1(_Q) (0x00000000 + ((_Q) * 4)) /* _i=0...15 */ /* Reset: PFR */
+#define I40E_QTX_TAIL1_MAX_INDEX 15
+#define I40E_QTX_TAIL1_TAIL_SHIFT 0
+#define I40E_QTX_TAIL1_TAIL_MASK I40E_MASK(0x1FFF, I40E_QTX_TAIL1_TAIL_SHIFT)
+#define I40E_VFMSIX_PBA 0x00002000 /* Reset: VFLR */
+#define I40E_VFMSIX_PBA_PENBIT_SHIFT 0
+#define I40E_VFMSIX_PBA_PENBIT_MASK I40E_MASK(0xFFFFFFFF, I40E_VFMSIX_PBA_PENBIT_SHIFT)
+#define I40E_VFMSIX_TADD(_i) (0x00000000 + ((_i) * 16)) /* _i=0...16 */ /* Reset: VFLR */
+#define I40E_VFMSIX_TADD_MAX_INDEX 16
+#define I40E_VFMSIX_TADD_MSIXTADD10_SHIFT 0
+#define I40E_VFMSIX_TADD_MSIXTADD10_MASK I40E_MASK(0x3, I40E_VFMSIX_TADD_MSIXTADD10_SHIFT)
+#define I40E_VFMSIX_TADD_MSIXTADD_SHIFT 2
+#define I40E_VFMSIX_TADD_MSIXTADD_MASK I40E_MASK(0x3FFFFFFF, I40E_VFMSIX_TADD_MSIXTADD_SHIFT)
+#define I40E_VFMSIX_TMSG(_i) (0x00000008 + ((_i) * 16)) /* _i=0...16 */ /* Reset: VFLR */
+#define I40E_VFMSIX_TMSG_MAX_INDEX 16
+#define I40E_VFMSIX_TMSG_MSIXTMSG_SHIFT 0
+#define I40E_VFMSIX_TMSG_MSIXTMSG_MASK I40E_MASK(0xFFFFFFFF, I40E_VFMSIX_TMSG_MSIXTMSG_SHIFT)
+#define I40E_VFMSIX_TUADD(_i) (0x00000004 + ((_i) * 16)) /* _i=0...16 */ /* Reset: VFLR */
+#define I40E_VFMSIX_TUADD_MAX_INDEX 16
+#define I40E_VFMSIX_TUADD_MSIXTUADD_SHIFT 0
+#define I40E_VFMSIX_TUADD_MSIXTUADD_MASK I40E_MASK(0xFFFFFFFF, I40E_VFMSIX_TUADD_MSIXTUADD_SHIFT)
+#define I40E_VFMSIX_TVCTRL(_i) (0x0000000C + ((_i) * 16)) /* _i=0...16 */ /* Reset: VFLR */
+#define I40E_VFMSIX_TVCTRL_MAX_INDEX 16
+#define I40E_VFMSIX_TVCTRL_MASK_SHIFT 0
+#define I40E_VFMSIX_TVCTRL_MASK_MASK I40E_MASK(0x1, I40E_VFMSIX_TVCTRL_MASK_SHIFT)
+#define I40E_VFCM_PE_ERRDATA 0x0000DC00 /* Reset: VFR */
+#define I40E_VFCM_PE_ERRDATA_ERROR_CODE_SHIFT 0
+#define I40E_VFCM_PE_ERRDATA_ERROR_CODE_MASK I40E_MASK(0xF, I40E_VFCM_PE_ERRDATA_ERROR_CODE_SHIFT)
+#define I40E_VFCM_PE_ERRDATA_Q_TYPE_SHIFT 4
+#define I40E_VFCM_PE_ERRDATA_Q_TYPE_MASK I40E_MASK(0x7, I40E_VFCM_PE_ERRDATA_Q_TYPE_SHIFT)
+#define I40E_VFCM_PE_ERRDATA_Q_NUM_SHIFT 8
+#define I40E_VFCM_PE_ERRDATA_Q_NUM_MASK I40E_MASK(0x3FFFF, I40E_VFCM_PE_ERRDATA_Q_NUM_SHIFT)
+#define I40E_VFCM_PE_ERRINFO 0x0000D800 /* Reset: VFR */
+#define I40E_VFCM_PE_ERRINFO_ERROR_VALID_SHIFT 0
+#define I40E_VFCM_PE_ERRINFO_ERROR_VALID_MASK I40E_MASK(0x1, I40E_VFCM_PE_ERRINFO_ERROR_VALID_SHIFT)
+#define I40E_VFCM_PE_ERRINFO_ERROR_INST_SHIFT 4
+#define I40E_VFCM_PE_ERRINFO_ERROR_INST_MASK I40E_MASK(0x7, I40E_VFCM_PE_ERRINFO_ERROR_INST_SHIFT)
+#define I40E_VFCM_PE_ERRINFO_DBL_ERROR_CNT_SHIFT 8
+#define I40E_VFCM_PE_ERRINFO_DBL_ERROR_CNT_MASK I40E_MASK(0xFF, I40E_VFCM_PE_ERRINFO_DBL_ERROR_CNT_SHIFT)
+#define I40E_VFCM_PE_ERRINFO_RLU_ERROR_CNT_SHIFT 16
+#define I40E_VFCM_PE_ERRINFO_RLU_ERROR_CNT_MASK I40E_MASK(0xFF, I40E_VFCM_PE_ERRINFO_RLU_ERROR_CNT_SHIFT)
+#define I40E_VFCM_PE_ERRINFO_RLS_ERROR_CNT_SHIFT 24
+#define I40E_VFCM_PE_ERRINFO_RLS_ERROR_CNT_MASK I40E_MASK(0xFF, I40E_VFCM_PE_ERRINFO_RLS_ERROR_CNT_SHIFT)
+#define I40E_VFQF_HENA(_i) (0x0000C400 + ((_i) * 4)) /* _i=0...1 */ /* Reset: CORER */
+#define I40E_VFQF_HENA_MAX_INDEX 1
+#define I40E_VFQF_HENA_PTYPE_ENA_SHIFT 0
+#define I40E_VFQF_HENA_PTYPE_ENA_MASK I40E_MASK(0xFFFFFFFF, I40E_VFQF_HENA_PTYPE_ENA_SHIFT)
+#define I40E_VFQF_HKEY(_i) (0x0000CC00 + ((_i) * 4)) /* _i=0...12 */ /* Reset: CORER */
+#define I40E_VFQF_HKEY_MAX_INDEX 12
+#define I40E_VFQF_HKEY_KEY_0_SHIFT 0
+#define I40E_VFQF_HKEY_KEY_0_MASK I40E_MASK(0xFF, I40E_VFQF_HKEY_KEY_0_SHIFT)
+#define I40E_VFQF_HKEY_KEY_1_SHIFT 8
+#define I40E_VFQF_HKEY_KEY_1_MASK I40E_MASK(0xFF, I40E_VFQF_HKEY_KEY_1_SHIFT)
+#define I40E_VFQF_HKEY_KEY_2_SHIFT 16
+#define I40E_VFQF_HKEY_KEY_2_MASK I40E_MASK(0xFF, I40E_VFQF_HKEY_KEY_2_SHIFT)
+#define I40E_VFQF_HKEY_KEY_3_SHIFT 24
+#define I40E_VFQF_HKEY_KEY_3_MASK I40E_MASK(0xFF, I40E_VFQF_HKEY_KEY_3_SHIFT)
+#define I40E_VFQF_HLUT(_i) (0x0000D000 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_VFQF_HLUT_MAX_INDEX 15
+#define I40E_VFQF_HLUT_LUT0_SHIFT 0
+#define I40E_VFQF_HLUT_LUT0_MASK I40E_MASK(0xF, I40E_VFQF_HLUT_LUT0_SHIFT)
+#define I40E_VFQF_HLUT_LUT1_SHIFT 8
+#define I40E_VFQF_HLUT_LUT1_MASK I40E_MASK(0xF, I40E_VFQF_HLUT_LUT1_SHIFT)
+#define I40E_VFQF_HLUT_LUT2_SHIFT 16
+#define I40E_VFQF_HLUT_LUT2_MASK I40E_MASK(0xF, I40E_VFQF_HLUT_LUT2_SHIFT)
+#define I40E_VFQF_HLUT_LUT3_SHIFT 24
+#define I40E_VFQF_HLUT_LUT3_MASK I40E_MASK(0xF, I40E_VFQF_HLUT_LUT3_SHIFT)
+#define I40E_VFQF_HREGION(_i) (0x0000D400 + ((_i) * 4)) /* _i=0...7 */ /* Reset: CORER */
+#define I40E_VFQF_HREGION_MAX_INDEX 7
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_0_SHIFT 0
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_0_MASK I40E_MASK(0x1, I40E_VFQF_HREGION_OVERRIDE_ENA_0_SHIFT)
+#define I40E_VFQF_HREGION_REGION_0_SHIFT 1
+#define I40E_VFQF_HREGION_REGION_0_MASK I40E_MASK(0x7, I40E_VFQF_HREGION_REGION_0_SHIFT)
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_1_SHIFT 4
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_1_MASK I40E_MASK(0x1, I40E_VFQF_HREGION_OVERRIDE_ENA_1_SHIFT)
+#define I40E_VFQF_HREGION_REGION_1_SHIFT 5
+#define I40E_VFQF_HREGION_REGION_1_MASK I40E_MASK(0x7, I40E_VFQF_HREGION_REGION_1_SHIFT)
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_2_SHIFT 8
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_2_MASK I40E_MASK(0x1, I40E_VFQF_HREGION_OVERRIDE_ENA_2_SHIFT)
+#define I40E_VFQF_HREGION_REGION_2_SHIFT 9
+#define I40E_VFQF_HREGION_REGION_2_MASK I40E_MASK(0x7, I40E_VFQF_HREGION_REGION_2_SHIFT)
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_3_SHIFT 12
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_3_MASK I40E_MASK(0x1, I40E_VFQF_HREGION_OVERRIDE_ENA_3_SHIFT)
+#define I40E_VFQF_HREGION_REGION_3_SHIFT 13
+#define I40E_VFQF_HREGION_REGION_3_MASK I40E_MASK(0x7, I40E_VFQF_HREGION_REGION_3_SHIFT)
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_4_SHIFT 16
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_4_MASK I40E_MASK(0x1, I40E_VFQF_HREGION_OVERRIDE_ENA_4_SHIFT)
+#define I40E_VFQF_HREGION_REGION_4_SHIFT 17
+#define I40E_VFQF_HREGION_REGION_4_MASK I40E_MASK(0x7, I40E_VFQF_HREGION_REGION_4_SHIFT)
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_5_SHIFT 20
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_5_MASK I40E_MASK(0x1, I40E_VFQF_HREGION_OVERRIDE_ENA_5_SHIFT)
+#define I40E_VFQF_HREGION_REGION_5_SHIFT 21
+#define I40E_VFQF_HREGION_REGION_5_MASK I40E_MASK(0x7, I40E_VFQF_HREGION_REGION_5_SHIFT)
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_6_SHIFT 24
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_6_MASK I40E_MASK(0x1, I40E_VFQF_HREGION_OVERRIDE_ENA_6_SHIFT)
+#define I40E_VFQF_HREGION_REGION_6_SHIFT 25
+#define I40E_VFQF_HREGION_REGION_6_MASK I40E_MASK(0x7, I40E_VFQF_HREGION_REGION_6_SHIFT)
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_7_SHIFT 28
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_7_MASK I40E_MASK(0x1, I40E_VFQF_HREGION_OVERRIDE_ENA_7_SHIFT)
+#define I40E_VFQF_HREGION_REGION_7_SHIFT 29
+#define I40E_VFQF_HREGION_REGION_7_MASK I40E_MASK(0x7, I40E_VFQF_HREGION_REGION_7_SHIFT)
+#ifdef X722_SUPPORT
+
+#define I40E_MNGSB_FDCRC 0x000B7050 /* Reset: POR */
+#define I40E_MNGSB_FDCRC_CRC_RES_SHIFT 0
+#define I40E_MNGSB_FDCRC_CRC_RES_MASK I40E_MASK(0xFF, I40E_MNGSB_FDCRC_CRC_RES_SHIFT)
+#define I40E_MNGSB_FDCS 0x000B7040 /* Reset: POR */
+#define I40E_MNGSB_FDCS_CRC_CONT_SHIFT 2
+#define I40E_MNGSB_FDCS_CRC_CONT_MASK I40E_MASK(0x1, I40E_MNGSB_FDCS_CRC_CONT_SHIFT)
+#define I40E_MNGSB_FDCS_CRC_SEED_EN_SHIFT 3
+#define I40E_MNGSB_FDCS_CRC_SEED_EN_MASK I40E_MASK(0x1, I40E_MNGSB_FDCS_CRC_SEED_EN_SHIFT)
+#define I40E_MNGSB_FDCS_CRC_WR_INH_SHIFT 4
+#define I40E_MNGSB_FDCS_CRC_WR_INH_MASK I40E_MASK(0x1, I40E_MNGSB_FDCS_CRC_WR_INH_SHIFT)
+#define I40E_MNGSB_FDCS_CRC_SEED_SHIFT 8
+#define I40E_MNGSB_FDCS_CRC_SEED_MASK I40E_MASK(0xFF, I40E_MNGSB_FDCS_CRC_SEED_SHIFT)
+#define I40E_MNGSB_FDS 0x000B7048 /* Reset: POR */
+#define I40E_MNGSB_FDS_START_BC_SHIFT 0
+#define I40E_MNGSB_FDS_START_BC_MASK I40E_MASK(0xFFF, I40E_MNGSB_FDS_START_BC_SHIFT)
+#define I40E_MNGSB_FDS_LAST_BC_SHIFT 16
+#define I40E_MNGSB_FDS_LAST_BC_MASK I40E_MASK(0xFFF, I40E_MNGSB_FDS_LAST_BC_SHIFT)
+
+#define I40E_GL_VF_CTRL_RX(_VF) (0x00083600 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: EMPR */
+#define I40E_GL_VF_CTRL_RX_MAX_INDEX 127
+#define I40E_GL_VF_CTRL_RX_AQ_RX_EN_SHIFT 0
+#define I40E_GL_VF_CTRL_RX_AQ_RX_EN_MASK I40E_MASK(0x1, I40E_GL_VF_CTRL_RX_AQ_RX_EN_SHIFT)
+#define I40E_GL_VF_CTRL_TX(_VF) (0x00083400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: EMPR */
+#define I40E_GL_VF_CTRL_TX_MAX_INDEX 127
+#define I40E_GL_VF_CTRL_TX_AQ_TX_EN_SHIFT 0
+#define I40E_GL_VF_CTRL_TX_AQ_TX_EN_MASK I40E_MASK(0x1, I40E_GL_VF_CTRL_TX_AQ_TX_EN_SHIFT)
+
+#define I40E_GLCM_LAN_CACHESIZE 0x0010C4D8 /* Reset: CORER */
+#define I40E_GLCM_LAN_CACHESIZE_WORD_SIZE_SHIFT 0
+#define I40E_GLCM_LAN_CACHESIZE_WORD_SIZE_MASK I40E_MASK(0xFFF, I40E_GLCM_LAN_CACHESIZE_WORD_SIZE_SHIFT)
+#define I40E_GLCM_LAN_CACHESIZE_SETS_SHIFT 12
+#define I40E_GLCM_LAN_CACHESIZE_SETS_MASK I40E_MASK(0xF, I40E_GLCM_LAN_CACHESIZE_SETS_SHIFT)
+#define I40E_GLCM_LAN_CACHESIZE_WAYS_SHIFT 16
+#define I40E_GLCM_LAN_CACHESIZE_WAYS_MASK I40E_MASK(0x3FF, I40E_GLCM_LAN_CACHESIZE_WAYS_SHIFT)
+#define I40E_GLCM_PE_CACHESIZE 0x00138FE4 /* Reset: CORER */
+#define I40E_GLCM_PE_CACHESIZE_WORD_SIZE_SHIFT 0
+#define I40E_GLCM_PE_CACHESIZE_WORD_SIZE_MASK I40E_MASK(0xFFF, I40E_GLCM_PE_CACHESIZE_WORD_SIZE_SHIFT)
+#define I40E_GLCM_PE_CACHESIZE_SETS_SHIFT 12
+#define I40E_GLCM_PE_CACHESIZE_SETS_MASK I40E_MASK(0xF, I40E_GLCM_PE_CACHESIZE_SETS_SHIFT)
+#define I40E_GLCM_PE_CACHESIZE_WAYS_SHIFT 16
+#define I40E_GLCM_PE_CACHESIZE_WAYS_MASK I40E_MASK(0x1FF, I40E_GLCM_PE_CACHESIZE_WAYS_SHIFT)
+#define I40E_PFCM_PE_ERRDATA 0x00138D00 /* Reset: PFR */
+#define I40E_PFCM_PE_ERRDATA_ERROR_CODE_SHIFT 0
+#define I40E_PFCM_PE_ERRDATA_ERROR_CODE_MASK I40E_MASK(0xF, I40E_PFCM_PE_ERRDATA_ERROR_CODE_SHIFT)
+#define I40E_PFCM_PE_ERRDATA_Q_TYPE_SHIFT 4
+#define I40E_PFCM_PE_ERRDATA_Q_TYPE_MASK I40E_MASK(0x7, I40E_PFCM_PE_ERRDATA_Q_TYPE_SHIFT)
+#define I40E_PFCM_PE_ERRDATA_Q_NUM_SHIFT 8
+#define I40E_PFCM_PE_ERRDATA_Q_NUM_MASK I40E_MASK(0x3FFFF, I40E_PFCM_PE_ERRDATA_Q_NUM_SHIFT)
+#define I40E_PFCM_PE_ERRINFO 0x00138C80 /* Reset: PFR */
+#define I40E_PFCM_PE_ERRINFO_ERROR_VALID_SHIFT 0
+#define I40E_PFCM_PE_ERRINFO_ERROR_VALID_MASK I40E_MASK(0x1, I40E_PFCM_PE_ERRINFO_ERROR_VALID_SHIFT)
+#define I40E_PFCM_PE_ERRINFO_ERROR_INST_SHIFT 4
+#define I40E_PFCM_PE_ERRINFO_ERROR_INST_MASK I40E_MASK(0x7, I40E_PFCM_PE_ERRINFO_ERROR_INST_SHIFT)
+#define I40E_PFCM_PE_ERRINFO_DBL_ERROR_CNT_SHIFT 8
+#define I40E_PFCM_PE_ERRINFO_DBL_ERROR_CNT_MASK I40E_MASK(0xFF, I40E_PFCM_PE_ERRINFO_DBL_ERROR_CNT_SHIFT)
+#define I40E_PFCM_PE_ERRINFO_RLU_ERROR_CNT_SHIFT 16
+#define I40E_PFCM_PE_ERRINFO_RLU_ERROR_CNT_MASK I40E_MASK(0xFF, I40E_PFCM_PE_ERRINFO_RLU_ERROR_CNT_SHIFT)
+#define I40E_PFCM_PE_ERRINFO_RLS_ERROR_CNT_SHIFT 24
+#define I40E_PFCM_PE_ERRINFO_RLS_ERROR_CNT_MASK I40E_MASK(0xFF, I40E_PFCM_PE_ERRINFO_RLS_ERROR_CNT_SHIFT)
+
+#define I40E_PRTDCB_TFMSTC(_i) (0x000A0040 + ((_i) * 32)) /* _i=0...7 */ /* Reset: CORER */
+#define I40E_PRTDCB_TFMSTC_MAX_INDEX 7
+#define I40E_PRTDCB_TFMSTC_MSTC_SHIFT 0
+#define I40E_PRTDCB_TFMSTC_MSTC_MASK I40E_MASK(0xFFFFF, I40E_PRTDCB_TFMSTC_MSTC_SHIFT)
+#define I40E_GL_FWSTS_FWROWD_SHIFT 8
+#define I40E_GL_FWSTS_FWROWD_MASK I40E_MASK(0x1, I40E_GL_FWSTS_FWROWD_SHIFT)
+#define I40E_GLFOC_CACHESIZE 0x000AA0DC /* Reset: CORER */
+#define I40E_GLFOC_CACHESIZE_WORD_SIZE_SHIFT 0
+#define I40E_GLFOC_CACHESIZE_WORD_SIZE_MASK I40E_MASK(0xFF, I40E_GLFOC_CACHESIZE_WORD_SIZE_SHIFT)
+#define I40E_GLFOC_CACHESIZE_SETS_SHIFT 8
+#define I40E_GLFOC_CACHESIZE_SETS_MASK I40E_MASK(0xFFF, I40E_GLFOC_CACHESIZE_SETS_SHIFT)
+#define I40E_GLFOC_CACHESIZE_WAYS_SHIFT 20
+#define I40E_GLFOC_CACHESIZE_WAYS_MASK I40E_MASK(0xF, I40E_GLFOC_CACHESIZE_WAYS_SHIFT)
+#define I40E_GLHMC_APBVTINUSEBASE(_i) (0x000C4a00 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_APBVTINUSEBASE_MAX_INDEX 15
+#define I40E_GLHMC_APBVTINUSEBASE_FPMAPBINUSEBASE_SHIFT 0
+#define I40E_GLHMC_APBVTINUSEBASE_FPMAPBINUSEBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_APBVTINUSEBASE_FPMAPBINUSEBASE_SHIFT)
+#define I40E_GLHMC_CEQPART(_i) (0x001312C0 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_CEQPART_MAX_INDEX 15
+#define I40E_GLHMC_CEQPART_PMCEQBASE_SHIFT 0
+#define I40E_GLHMC_CEQPART_PMCEQBASE_MASK I40E_MASK(0xFF, I40E_GLHMC_CEQPART_PMCEQBASE_SHIFT)
+#define I40E_GLHMC_CEQPART_PMCEQSIZE_SHIFT 16
+#define I40E_GLHMC_CEQPART_PMCEQSIZE_MASK I40E_MASK(0x1FF, I40E_GLHMC_CEQPART_PMCEQSIZE_SHIFT)
+#define I40E_GLHMC_DBCQMAX 0x000C20F0 /* Reset: CORER */
+#define I40E_GLHMC_DBCQMAX_GLHMC_DBCQMAX_SHIFT 0
+#define I40E_GLHMC_DBCQMAX_GLHMC_DBCQMAX_MASK I40E_MASK(0x3FFFF, I40E_GLHMC_DBCQMAX_GLHMC_DBCQMAX_SHIFT)
+#define I40E_GLHMC_DBCQPART(_i) (0x00131240 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_DBCQPART_MAX_INDEX 15
+#define I40E_GLHMC_DBCQPART_PMDBCQBASE_SHIFT 0
+#define I40E_GLHMC_DBCQPART_PMDBCQBASE_MASK I40E_MASK(0x3FFF, I40E_GLHMC_DBCQPART_PMDBCQBASE_SHIFT)
+#define I40E_GLHMC_DBCQPART_PMDBCQSIZE_SHIFT 16
+#define I40E_GLHMC_DBCQPART_PMDBCQSIZE_MASK I40E_MASK(0x7FFF, I40E_GLHMC_DBCQPART_PMDBCQSIZE_SHIFT)
+#define I40E_GLHMC_DBQPMAX 0x000C20EC /* Reset: CORER */
+#define I40E_GLHMC_DBQPMAX_GLHMC_DBQPMAX_SHIFT 0
+#define I40E_GLHMC_DBQPMAX_GLHMC_DBQPMAX_MASK I40E_MASK(0x7FFFF, I40E_GLHMC_DBQPMAX_GLHMC_DBQPMAX_SHIFT)
+#define I40E_GLHMC_DBQPPART(_i) (0x00138D80 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_DBQPPART_MAX_INDEX 15
+#define I40E_GLHMC_DBQPPART_PMDBQPBASE_SHIFT 0
+#define I40E_GLHMC_DBQPPART_PMDBQPBASE_MASK I40E_MASK(0x3FFF, I40E_GLHMC_DBQPPART_PMDBQPBASE_SHIFT)
+#define I40E_GLHMC_DBQPPART_PMDBQPSIZE_SHIFT 16
+#define I40E_GLHMC_DBQPPART_PMDBQPSIZE_MASK I40E_MASK(0x7FFF, I40E_GLHMC_DBQPPART_PMDBQPSIZE_SHIFT)
+#define I40E_GLHMC_PEARPBASE(_i) (0x000C4800 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_PEARPBASE_MAX_INDEX 15
+#define I40E_GLHMC_PEARPBASE_FPMPEARPBASE_SHIFT 0
+#define I40E_GLHMC_PEARPBASE_FPMPEARPBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_PEARPBASE_FPMPEARPBASE_SHIFT)
+#define I40E_GLHMC_PEARPCNT(_i) (0x000C4900 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_PEARPCNT_MAX_INDEX 15
+#define I40E_GLHMC_PEARPCNT_FPMPEARPCNT_SHIFT 0
+#define I40E_GLHMC_PEARPCNT_FPMPEARPCNT_MASK I40E_MASK(0x1FFFFFFF, I40E_GLHMC_PEARPCNT_FPMPEARPCNT_SHIFT)
+#define I40E_GLHMC_PEARPMAX 0x000C2038 /* Reset: CORER */
+#define I40E_GLHMC_PEARPMAX_PMPEARPMAX_SHIFT 0
+#define I40E_GLHMC_PEARPMAX_PMPEARPMAX_MASK I40E_MASK(0x1FFFF, I40E_GLHMC_PEARPMAX_PMPEARPMAX_SHIFT)
+#define I40E_GLHMC_PEARPOBJSZ 0x000C2034 /* Reset: CORER */
+#define I40E_GLHMC_PEARPOBJSZ_PMPEARPOBJSZ_SHIFT 0
+#define I40E_GLHMC_PEARPOBJSZ_PMPEARPOBJSZ_MASK I40E_MASK(0x7, I40E_GLHMC_PEARPOBJSZ_PMPEARPOBJSZ_SHIFT)
+#define I40E_GLHMC_PECQBASE(_i) (0x000C4200 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_PECQBASE_MAX_INDEX 15
+#define I40E_GLHMC_PECQBASE_FPMPECQBASE_SHIFT 0
+#define I40E_GLHMC_PECQBASE_FPMPECQBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_PECQBASE_FPMPECQBASE_SHIFT)
+#define I40E_GLHMC_PECQCNT(_i) (0x000C4300 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_PECQCNT_MAX_INDEX 15
+#define I40E_GLHMC_PECQCNT_FPMPECQCNT_SHIFT 0
+#define I40E_GLHMC_PECQCNT_FPMPECQCNT_MASK I40E_MASK(0x1FFFFFFF, I40E_GLHMC_PECQCNT_FPMPECQCNT_SHIFT)
+#define I40E_GLHMC_PECQOBJSZ 0x000C2020 /* Reset: CORER */
+#define I40E_GLHMC_PECQOBJSZ_PMPECQOBJSZ_SHIFT 0
+#define I40E_GLHMC_PECQOBJSZ_PMPECQOBJSZ_MASK I40E_MASK(0xF, I40E_GLHMC_PECQOBJSZ_PMPECQOBJSZ_SHIFT)
+#define I40E_GLHMC_PEHTCNT(_i) (0x000C4700 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_PEHTCNT_MAX_INDEX 15
+#define I40E_GLHMC_PEHTCNT_FPMPEHTCNT_SHIFT 0
+#define I40E_GLHMC_PEHTCNT_FPMPEHTCNT_MASK I40E_MASK(0x1FFFFFFF, I40E_GLHMC_PEHTCNT_FPMPEHTCNT_SHIFT)
+#define I40E_GLHMC_PEHTEBASE(_i) (0x000C4600 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_PEHTEBASE_MAX_INDEX 15
+#define I40E_GLHMC_PEHTEBASE_FPMPEHTEBASE_SHIFT 0
+#define I40E_GLHMC_PEHTEBASE_FPMPEHTEBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_PEHTEBASE_FPMPEHTEBASE_SHIFT)
+#define I40E_GLHMC_PEHTEOBJSZ 0x000C202c /* Reset: CORER */
+#define I40E_GLHMC_PEHTEOBJSZ_PMPEHTEOBJSZ_SHIFT 0
+#define I40E_GLHMC_PEHTEOBJSZ_PMPEHTEOBJSZ_MASK I40E_MASK(0xF, I40E_GLHMC_PEHTEOBJSZ_PMPEHTEOBJSZ_SHIFT)
+#define I40E_GLHMC_PEHTMAX 0x000C2030 /* Reset: CORER */
+#define I40E_GLHMC_PEHTMAX_PMPEHTMAX_SHIFT 0
+#define I40E_GLHMC_PEHTMAX_PMPEHTMAX_MASK I40E_MASK(0x1FFFFF, I40E_GLHMC_PEHTMAX_PMPEHTMAX_SHIFT)
+#define I40E_GLHMC_PEMRBASE(_i) (0x000C4c00 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_PEMRBASE_MAX_INDEX 15
+#define I40E_GLHMC_PEMRBASE_FPMPEMRBASE_SHIFT 0
+#define I40E_GLHMC_PEMRBASE_FPMPEMRBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_PEMRBASE_FPMPEMRBASE_SHIFT)
+#define I40E_GLHMC_PEMRCNT(_i) (0x000C4d00 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_PEMRCNT_MAX_INDEX 15
+#define I40E_GLHMC_PEMRCNT_FPMPEMRSZ_SHIFT 0
+#define I40E_GLHMC_PEMRCNT_FPMPEMRSZ_MASK I40E_MASK(0x1FFFFFFF, I40E_GLHMC_PEMRCNT_FPMPEMRSZ_SHIFT)
+#define I40E_GLHMC_PEMRMAX 0x000C2040 /* Reset: CORER */
+#define I40E_GLHMC_PEMRMAX_PMPEMRMAX_SHIFT 0
+#define I40E_GLHMC_PEMRMAX_PMPEMRMAX_MASK I40E_MASK(0x7FFFFF, I40E_GLHMC_PEMRMAX_PMPEMRMAX_SHIFT)
+#define I40E_GLHMC_PEMROBJSZ 0x000C203c /* Reset: CORER */
+#define I40E_GLHMC_PEMROBJSZ_PMPEMROBJSZ_SHIFT 0
+#define I40E_GLHMC_PEMROBJSZ_PMPEMROBJSZ_MASK I40E_MASK(0xF, I40E_GLHMC_PEMROBJSZ_PMPEMROBJSZ_SHIFT)
+#define I40E_GLHMC_PEPBLBASE(_i) (0x000C5800 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_PEPBLBASE_MAX_INDEX 15
+#define I40E_GLHMC_PEPBLBASE_FPMPEPBLBASE_SHIFT 0
+#define I40E_GLHMC_PEPBLBASE_FPMPEPBLBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_PEPBLBASE_FPMPEPBLBASE_SHIFT)
+#define I40E_GLHMC_PEPBLCNT(_i) (0x000C5900 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_PEPBLCNT_MAX_INDEX 15
+#define I40E_GLHMC_PEPBLCNT_FPMPEPBLCNT_SHIFT 0
+#define I40E_GLHMC_PEPBLCNT_FPMPEPBLCNT_MASK I40E_MASK(0x1FFFFFFF, I40E_GLHMC_PEPBLCNT_FPMPEPBLCNT_SHIFT)
+#define I40E_GLHMC_PEPBLMAX 0x000C206c /* Reset: CORER */
+#define I40E_GLHMC_PEPBLMAX_PMPEPBLMAX_SHIFT 0
+#define I40E_GLHMC_PEPBLMAX_PMPEPBLMAX_MASK I40E_MASK(0x1FFFFFFF, I40E_GLHMC_PEPBLMAX_PMPEPBLMAX_SHIFT)
+#define I40E_GLHMC_PEPFFIRSTSD 0x000C20E4 /* Reset: CORER */
+#define I40E_GLHMC_PEPFFIRSTSD_GLHMC_PEPFFIRSTSD_SHIFT 0
+#define I40E_GLHMC_PEPFFIRSTSD_GLHMC_PEPFFIRSTSD_MASK I40E_MASK(0xFFF, I40E_GLHMC_PEPFFIRSTSD_GLHMC_PEPFFIRSTSD_SHIFT)
+#define I40E_GLHMC_PEQ1BASE(_i) (0x000C5200 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_PEQ1BASE_MAX_INDEX 15
+#define I40E_GLHMC_PEQ1BASE_FPMPEQ1BASE_SHIFT 0
+#define I40E_GLHMC_PEQ1BASE_FPMPEQ1BASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_PEQ1BASE_FPMPEQ1BASE_SHIFT)
+#define I40E_GLHMC_PEQ1CNT(_i) (0x000C5300 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_PEQ1CNT_MAX_INDEX 15
+#define I40E_GLHMC_PEQ1CNT_FPMPEQ1CNT_SHIFT 0
+#define I40E_GLHMC_PEQ1CNT_FPMPEQ1CNT_MASK I40E_MASK(0x1FFFFFFF, I40E_GLHMC_PEQ1CNT_FPMPEQ1CNT_SHIFT)
+#define I40E_GLHMC_PEQ1FLBASE(_i) (0x000C5400 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_PEQ1FLBASE_MAX_INDEX 15
+#define I40E_GLHMC_PEQ1FLBASE_FPMPEQ1FLBASE_SHIFT 0
+#define I40E_GLHMC_PEQ1FLBASE_FPMPEQ1FLBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_PEQ1FLBASE_FPMPEQ1FLBASE_SHIFT)
+#define I40E_GLHMC_PEQ1FLMAX 0x000C2058 /* Reset: CORER */
+#define I40E_GLHMC_PEQ1FLMAX_PMPEQ1FLMAX_SHIFT 0
+#define I40E_GLHMC_PEQ1FLMAX_PMPEQ1FLMAX_MASK I40E_MASK(0x3FFFFFF, I40E_GLHMC_PEQ1FLMAX_PMPEQ1FLMAX_SHIFT)
+#define I40E_GLHMC_PEQ1MAX 0x000C2054 /* Reset: CORER */
+#define I40E_GLHMC_PEQ1MAX_PMPEQ1MAX_SHIFT 0
+#define I40E_GLHMC_PEQ1MAX_PMPEQ1MAX_MASK I40E_MASK(0x3FFFFFF, I40E_GLHMC_PEQ1MAX_PMPEQ1MAX_SHIFT)
+#define I40E_GLHMC_PEQ1OBJSZ 0x000C2050 /* Reset: CORER */
+#define I40E_GLHMC_PEQ1OBJSZ_PMPEQ1OBJSZ_SHIFT 0
+#define I40E_GLHMC_PEQ1OBJSZ_PMPEQ1OBJSZ_MASK I40E_MASK(0xF, I40E_GLHMC_PEQ1OBJSZ_PMPEQ1OBJSZ_SHIFT)
+#define I40E_GLHMC_PEQPBASE(_i) (0x000C4000 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_PEQPBASE_MAX_INDEX 15
+#define I40E_GLHMC_PEQPBASE_FPMPEQPBASE_SHIFT 0
+#define I40E_GLHMC_PEQPBASE_FPMPEQPBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_PEQPBASE_FPMPEQPBASE_SHIFT)
+#define I40E_GLHMC_PEQPCNT(_i) (0x000C4100 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_PEQPCNT_MAX_INDEX 15
+#define I40E_GLHMC_PEQPCNT_FPMPEQPCNT_SHIFT 0
+#define I40E_GLHMC_PEQPCNT_FPMPEQPCNT_MASK I40E_MASK(0x1FFFFFFF, I40E_GLHMC_PEQPCNT_FPMPEQPCNT_SHIFT)
+#define I40E_GLHMC_PEQPOBJSZ 0x000C201c /* Reset: CORER */
+#define I40E_GLHMC_PEQPOBJSZ_PMPEQPOBJSZ_SHIFT 0
+#define I40E_GLHMC_PEQPOBJSZ_PMPEQPOBJSZ_MASK I40E_MASK(0xF, I40E_GLHMC_PEQPOBJSZ_PMPEQPOBJSZ_SHIFT)
+#define I40E_GLHMC_PESRQBASE(_i) (0x000C4400 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_PESRQBASE_MAX_INDEX 15
+#define I40E_GLHMC_PESRQBASE_FPMPESRQBASE_SHIFT 0
+#define I40E_GLHMC_PESRQBASE_FPMPESRQBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_PESRQBASE_FPMPESRQBASE_SHIFT)
+#define I40E_GLHMC_PESRQCNT(_i) (0x000C4500 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_PESRQCNT_MAX_INDEX 15
+#define I40E_GLHMC_PESRQCNT_FPMPESRQCNT_SHIFT 0
+#define I40E_GLHMC_PESRQCNT_FPMPESRQCNT_MASK I40E_MASK(0x1FFFFFFF, I40E_GLHMC_PESRQCNT_FPMPESRQCNT_SHIFT)
+#define I40E_GLHMC_PESRQMAX 0x000C2028 /* Reset: CORER */
+#define I40E_GLHMC_PESRQMAX_PMPESRQMAX_SHIFT 0
+#define I40E_GLHMC_PESRQMAX_PMPESRQMAX_MASK I40E_MASK(0xFFFF, I40E_GLHMC_PESRQMAX_PMPESRQMAX_SHIFT)
+#define I40E_GLHMC_PESRQOBJSZ 0x000C2024 /* Reset: CORER */
+#define I40E_GLHMC_PESRQOBJSZ_PMPESRQOBJSZ_SHIFT 0
+#define I40E_GLHMC_PESRQOBJSZ_PMPESRQOBJSZ_MASK I40E_MASK(0xF, I40E_GLHMC_PESRQOBJSZ_PMPESRQOBJSZ_SHIFT)
+#define I40E_GLHMC_PETIMERBASE(_i) (0x000C5A00 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_PETIMERBASE_MAX_INDEX 15
+#define I40E_GLHMC_PETIMERBASE_FPMPETIMERBASE_SHIFT 0
+#define I40E_GLHMC_PETIMERBASE_FPMPETIMERBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_PETIMERBASE_FPMPETIMERBASE_SHIFT)
+#define I40E_GLHMC_PETIMERCNT(_i) (0x000C5B00 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_PETIMERCNT_MAX_INDEX 15
+#define I40E_GLHMC_PETIMERCNT_FPMPETIMERCNT_SHIFT 0
+#define I40E_GLHMC_PETIMERCNT_FPMPETIMERCNT_MASK I40E_MASK(0x1FFFFFFF, I40E_GLHMC_PETIMERCNT_FPMPETIMERCNT_SHIFT)
+#define I40E_GLHMC_PETIMERMAX 0x000C2084 /* Reset: CORER */
+#define I40E_GLHMC_PETIMERMAX_PMPETIMERMAX_SHIFT 0
+#define I40E_GLHMC_PETIMERMAX_PMPETIMERMAX_MASK I40E_MASK(0x1FFFFFFF, I40E_GLHMC_PETIMERMAX_PMPETIMERMAX_SHIFT)
+#define I40E_GLHMC_PETIMEROBJSZ 0x000C2080 /* Reset: CORER */
+#define I40E_GLHMC_PETIMEROBJSZ_PMPETIMEROBJSZ_SHIFT 0
+#define I40E_GLHMC_PETIMEROBJSZ_PMPETIMEROBJSZ_MASK I40E_MASK(0xF, I40E_GLHMC_PETIMEROBJSZ_PMPETIMEROBJSZ_SHIFT)
+#define I40E_GLHMC_PEXFBASE(_i) (0x000C4e00 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_PEXFBASE_MAX_INDEX 15
+#define I40E_GLHMC_PEXFBASE_FPMPEXFBASE_SHIFT 0
+#define I40E_GLHMC_PEXFBASE_FPMPEXFBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_PEXFBASE_FPMPEXFBASE_SHIFT)
+#define I40E_GLHMC_PEXFCNT(_i) (0x000C4f00 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_PEXFCNT_MAX_INDEX 15
+#define I40E_GLHMC_PEXFCNT_FPMPEXFCNT_SHIFT 0
+#define I40E_GLHMC_PEXFCNT_FPMPEXFCNT_MASK I40E_MASK(0x1FFFFFFF, I40E_GLHMC_PEXFCNT_FPMPEXFCNT_SHIFT)
+#define I40E_GLHMC_PEXFFLBASE(_i) (0x000C5000 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_PEXFFLBASE_MAX_INDEX 15
+#define I40E_GLHMC_PEXFFLBASE_FPMPEXFFLBASE_SHIFT 0
+#define I40E_GLHMC_PEXFFLBASE_FPMPEXFFLBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_PEXFFLBASE_FPMPEXFFLBASE_SHIFT)
+#define I40E_GLHMC_PEXFFLMAX 0x000C204c /* Reset: CORER */
+#define I40E_GLHMC_PEXFFLMAX_PMPEXFFLMAX_SHIFT 0
+#define I40E_GLHMC_PEXFFLMAX_PMPEXFFLMAX_MASK I40E_MASK(0x1FFFFFF, I40E_GLHMC_PEXFFLMAX_PMPEXFFLMAX_SHIFT)
+#define I40E_GLHMC_PEXFMAX 0x000C2048 /* Reset: CORER */
+#define I40E_GLHMC_PEXFMAX_PMPEXFMAX_SHIFT 0
+#define I40E_GLHMC_PEXFMAX_PMPEXFMAX_MASK I40E_MASK(0x3FFFFFF, I40E_GLHMC_PEXFMAX_PMPEXFMAX_SHIFT)
+#define I40E_GLHMC_PEXFOBJSZ 0x000C2044 /* Reset: CORER */
+#define I40E_GLHMC_PEXFOBJSZ_PMPEXFOBJSZ_SHIFT 0
+#define I40E_GLHMC_PEXFOBJSZ_PMPEXFOBJSZ_MASK I40E_MASK(0xF, I40E_GLHMC_PEXFOBJSZ_PMPEXFOBJSZ_SHIFT)
+#define I40E_GLHMC_PFPESDPART(_i) (0x000C0880 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_PFPESDPART_MAX_INDEX 15
+#define I40E_GLHMC_PFPESDPART_PMSDBASE_SHIFT 0
+#define I40E_GLHMC_PFPESDPART_PMSDBASE_MASK I40E_MASK(0xFFF, I40E_GLHMC_PFPESDPART_PMSDBASE_SHIFT)
+#define I40E_GLHMC_PFPESDPART_PMSDSIZE_SHIFT 16
+#define I40E_GLHMC_PFPESDPART_PMSDSIZE_MASK I40E_MASK(0x1FFF, I40E_GLHMC_PFPESDPART_PMSDSIZE_SHIFT)
+#define I40E_GLHMC_VFAPBVTINUSEBASE(_i) (0x000Cca00 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFAPBVTINUSEBASE_MAX_INDEX 31
+#define I40E_GLHMC_VFAPBVTINUSEBASE_FPMAPBINUSEBASE_SHIFT 0
+#define I40E_GLHMC_VFAPBVTINUSEBASE_FPMAPBINUSEBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_VFAPBVTINUSEBASE_FPMAPBINUSEBASE_SHIFT)
+#define I40E_GLHMC_VFCEQPART(_i) (0x00132240 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFCEQPART_MAX_INDEX 31
+#define I40E_GLHMC_VFCEQPART_PMCEQBASE_SHIFT 0
+#define I40E_GLHMC_VFCEQPART_PMCEQBASE_MASK I40E_MASK(0xFF, I40E_GLHMC_VFCEQPART_PMCEQBASE_SHIFT)
+#define I40E_GLHMC_VFCEQPART_PMCEQSIZE_SHIFT 16
+#define I40E_GLHMC_VFCEQPART_PMCEQSIZE_MASK I40E_MASK(0x1FF, I40E_GLHMC_VFCEQPART_PMCEQSIZE_SHIFT)
+#define I40E_GLHMC_VFDBCQPART(_i) (0x00132140 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFDBCQPART_MAX_INDEX 31
+#define I40E_GLHMC_VFDBCQPART_PMDBCQBASE_SHIFT 0
+#define I40E_GLHMC_VFDBCQPART_PMDBCQBASE_MASK I40E_MASK(0x3FFF, I40E_GLHMC_VFDBCQPART_PMDBCQBASE_SHIFT)
+#define I40E_GLHMC_VFDBCQPART_PMDBCQSIZE_SHIFT 16
+#define I40E_GLHMC_VFDBCQPART_PMDBCQSIZE_MASK I40E_MASK(0x7FFF, I40E_GLHMC_VFDBCQPART_PMDBCQSIZE_SHIFT)
+#define I40E_GLHMC_VFDBQPPART(_i) (0x00138E00 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFDBQPPART_MAX_INDEX 31
+#define I40E_GLHMC_VFDBQPPART_PMDBQPBASE_SHIFT 0
+#define I40E_GLHMC_VFDBQPPART_PMDBQPBASE_MASK I40E_MASK(0x3FFF, I40E_GLHMC_VFDBQPPART_PMDBQPBASE_SHIFT)
+#define I40E_GLHMC_VFDBQPPART_PMDBQPSIZE_SHIFT 16
+#define I40E_GLHMC_VFDBQPPART_PMDBQPSIZE_MASK I40E_MASK(0x7FFF, I40E_GLHMC_VFDBQPPART_PMDBQPSIZE_SHIFT)
+#define I40E_GLHMC_VFFSIAVBASE(_i) (0x000Cd600 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFFSIAVBASE_MAX_INDEX 31
+#define I40E_GLHMC_VFFSIAVBASE_FPMFSIAVBASE_SHIFT 0
+#define I40E_GLHMC_VFFSIAVBASE_FPMFSIAVBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_VFFSIAVBASE_FPMFSIAVBASE_SHIFT)
+#define I40E_GLHMC_VFFSIAVCNT(_i) (0x000Cd700 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFFSIAVCNT_MAX_INDEX 31
+#define I40E_GLHMC_VFFSIAVCNT_FPMFSIAVCNT_SHIFT 0
+#define I40E_GLHMC_VFFSIAVCNT_FPMFSIAVCNT_MASK I40E_MASK(0x1FFFFFFF, I40E_GLHMC_VFFSIAVCNT_FPMFSIAVCNT_SHIFT)
+#define I40E_GLHMC_VFPDINV(_i) (0x000C8300 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFPDINV_MAX_INDEX 31
+#define I40E_GLHMC_VFPDINV_PMSDIDX_SHIFT 0
+#define I40E_GLHMC_VFPDINV_PMSDIDX_MASK I40E_MASK(0xFFF, I40E_GLHMC_VFPDINV_PMSDIDX_SHIFT)
+#define I40E_GLHMC_VFPDINV_PMSDPARTSEL_SHIFT 15
+#define I40E_GLHMC_VFPDINV_PMSDPARTSEL_MASK I40E_MASK(0x1, I40E_GLHMC_VFPDINV_PMSDPARTSEL_SHIFT)
+#define I40E_GLHMC_VFPDINV_PMPDIDX_SHIFT 16
+#define I40E_GLHMC_VFPDINV_PMPDIDX_MASK I40E_MASK(0x1FF, I40E_GLHMC_VFPDINV_PMPDIDX_SHIFT)
+#define I40E_GLHMC_VFPEARPBASE(_i) (0x000Cc800 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFPEARPBASE_MAX_INDEX 31
+#define I40E_GLHMC_VFPEARPBASE_FPMPEARPBASE_SHIFT 0
+#define I40E_GLHMC_VFPEARPBASE_FPMPEARPBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_VFPEARPBASE_FPMPEARPBASE_SHIFT)
+#define I40E_GLHMC_VFPEARPCNT(_i) (0x000Cc900 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFPEARPCNT_MAX_INDEX 31
+#define I40E_GLHMC_VFPEARPCNT_FPMPEARPCNT_SHIFT 0
+#define I40E_GLHMC_VFPEARPCNT_FPMPEARPCNT_MASK I40E_MASK(0x1FFFFFFF, I40E_GLHMC_VFPEARPCNT_FPMPEARPCNT_SHIFT)
+#define I40E_GLHMC_VFPECQBASE(_i) (0x000Cc200 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFPECQBASE_MAX_INDEX 31
+#define I40E_GLHMC_VFPECQBASE_FPMPECQBASE_SHIFT 0
+#define I40E_GLHMC_VFPECQBASE_FPMPECQBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_VFPECQBASE_FPMPECQBASE_SHIFT)
+#define I40E_GLHMC_VFPECQCNT(_i) (0x000Cc300 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFPECQCNT_MAX_INDEX 31
+#define I40E_GLHMC_VFPECQCNT_FPMPECQCNT_SHIFT 0
+#define I40E_GLHMC_VFPECQCNT_FPMPECQCNT_MASK I40E_MASK(0x1FFFFFFF, I40E_GLHMC_VFPECQCNT_FPMPECQCNT_SHIFT)
+#define I40E_GLHMC_VFPEHTCNT(_i) (0x000Cc700 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFPEHTCNT_MAX_INDEX 31
+#define I40E_GLHMC_VFPEHTCNT_FPMPEHTCNT_SHIFT 0
+#define I40E_GLHMC_VFPEHTCNT_FPMPEHTCNT_MASK I40E_MASK(0x1FFFFFFF, I40E_GLHMC_VFPEHTCNT_FPMPEHTCNT_SHIFT)
+#define I40E_GLHMC_VFPEHTEBASE(_i) (0x000Cc600 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFPEHTEBASE_MAX_INDEX 31
+#define I40E_GLHMC_VFPEHTEBASE_FPMPEHTEBASE_SHIFT 0
+#define I40E_GLHMC_VFPEHTEBASE_FPMPEHTEBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_VFPEHTEBASE_FPMPEHTEBASE_SHIFT)
+#define I40E_GLHMC_VFPEMRBASE(_i) (0x000Ccc00 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFPEMRBASE_MAX_INDEX 31
+#define I40E_GLHMC_VFPEMRBASE_FPMPEMRBASE_SHIFT 0
+#define I40E_GLHMC_VFPEMRBASE_FPMPEMRBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_VFPEMRBASE_FPMPEMRBASE_SHIFT)
+#define I40E_GLHMC_VFPEMRCNT(_i) (0x000Ccd00 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFPEMRCNT_MAX_INDEX 31
+#define I40E_GLHMC_VFPEMRCNT_FPMPEMRSZ_SHIFT 0
+#define I40E_GLHMC_VFPEMRCNT_FPMPEMRSZ_MASK I40E_MASK(0x1FFFFFFF, I40E_GLHMC_VFPEMRCNT_FPMPEMRSZ_SHIFT)
+#define I40E_GLHMC_VFPEPBLBASE(_i) (0x000Cd800 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFPEPBLBASE_MAX_INDEX 31
+#define I40E_GLHMC_VFPEPBLBASE_FPMPEPBLBASE_SHIFT 0
+#define I40E_GLHMC_VFPEPBLBASE_FPMPEPBLBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_VFPEPBLBASE_FPMPEPBLBASE_SHIFT)
+#define I40E_GLHMC_VFPEPBLCNT(_i) (0x000Cd900 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFPEPBLCNT_MAX_INDEX 31
+#define I40E_GLHMC_VFPEPBLCNT_FPMPEPBLCNT_SHIFT 0
+#define I40E_GLHMC_VFPEPBLCNT_FPMPEPBLCNT_MASK I40E_MASK(0x1FFFFFFF, I40E_GLHMC_VFPEPBLCNT_FPMPEPBLCNT_SHIFT)
+#define I40E_GLHMC_VFPEQ1BASE(_i) (0x000Cd200 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFPEQ1BASE_MAX_INDEX 31
+#define I40E_GLHMC_VFPEQ1BASE_FPMPEQ1BASE_SHIFT 0
+#define I40E_GLHMC_VFPEQ1BASE_FPMPEQ1BASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_VFPEQ1BASE_FPMPEQ1BASE_SHIFT)
+#define I40E_GLHMC_VFPEQ1CNT(_i) (0x000Cd300 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFPEQ1CNT_MAX_INDEX 31
+#define I40E_GLHMC_VFPEQ1CNT_FPMPEQ1CNT_SHIFT 0
+#define I40E_GLHMC_VFPEQ1CNT_FPMPEQ1CNT_MASK I40E_MASK(0x1FFFFFFF, I40E_GLHMC_VFPEQ1CNT_FPMPEQ1CNT_SHIFT)
+#define I40E_GLHMC_VFPEQ1FLBASE(_i) (0x000Cd400 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFPEQ1FLBASE_MAX_INDEX 31
+#define I40E_GLHMC_VFPEQ1FLBASE_FPMPEQ1FLBASE_SHIFT 0
+#define I40E_GLHMC_VFPEQ1FLBASE_FPMPEQ1FLBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_VFPEQ1FLBASE_FPMPEQ1FLBASE_SHIFT)
+#define I40E_GLHMC_VFPEQPBASE(_i) (0x000Cc000 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFPEQPBASE_MAX_INDEX 31
+#define I40E_GLHMC_VFPEQPBASE_FPMPEQPBASE_SHIFT 0
+#define I40E_GLHMC_VFPEQPBASE_FPMPEQPBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_VFPEQPBASE_FPMPEQPBASE_SHIFT)
+#define I40E_GLHMC_VFPEQPCNT(_i) (0x000Cc100 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFPEQPCNT_MAX_INDEX 31
+#define I40E_GLHMC_VFPEQPCNT_FPMPEQPCNT_SHIFT 0
+#define I40E_GLHMC_VFPEQPCNT_FPMPEQPCNT_MASK I40E_MASK(0x1FFFFFFF, I40E_GLHMC_VFPEQPCNT_FPMPEQPCNT_SHIFT)
+#define I40E_GLHMC_VFPESRQBASE(_i) (0x000Cc400 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFPESRQBASE_MAX_INDEX 31
+#define I40E_GLHMC_VFPESRQBASE_FPMPESRQBASE_SHIFT 0
+#define I40E_GLHMC_VFPESRQBASE_FPMPESRQBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_VFPESRQBASE_FPMPESRQBASE_SHIFT)
+#define I40E_GLHMC_VFPESRQCNT(_i) (0x000Cc500 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFPESRQCNT_MAX_INDEX 31
+#define I40E_GLHMC_VFPESRQCNT_FPMPESRQCNT_SHIFT 0
+#define I40E_GLHMC_VFPESRQCNT_FPMPESRQCNT_MASK I40E_MASK(0x1FFFFFFF, I40E_GLHMC_VFPESRQCNT_FPMPESRQCNT_SHIFT)
+#define I40E_GLHMC_VFPETIMERBASE(_i) (0x000CDA00 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFPETIMERBASE_MAX_INDEX 31
+#define I40E_GLHMC_VFPETIMERBASE_FPMPETIMERBASE_SHIFT 0
+#define I40E_GLHMC_VFPETIMERBASE_FPMPETIMERBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_VFPETIMERBASE_FPMPETIMERBASE_SHIFT)
+#define I40E_GLHMC_VFPETIMERCNT(_i) (0x000CDB00 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFPETIMERCNT_MAX_INDEX 31
+#define I40E_GLHMC_VFPETIMERCNT_FPMPETIMERCNT_SHIFT 0
+#define I40E_GLHMC_VFPETIMERCNT_FPMPETIMERCNT_MASK I40E_MASK(0x1FFFFFFF, I40E_GLHMC_VFPETIMERCNT_FPMPETIMERCNT_SHIFT)
+#define I40E_GLHMC_VFPEXFBASE(_i) (0x000Cce00 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFPEXFBASE_MAX_INDEX 31
+#define I40E_GLHMC_VFPEXFBASE_FPMPEXFBASE_SHIFT 0
+#define I40E_GLHMC_VFPEXFBASE_FPMPEXFBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_VFPEXFBASE_FPMPEXFBASE_SHIFT)
+#define I40E_GLHMC_VFPEXFCNT(_i) (0x000Ccf00 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFPEXFCNT_MAX_INDEX 31
+#define I40E_GLHMC_VFPEXFCNT_FPMPEXFCNT_SHIFT 0
+#define I40E_GLHMC_VFPEXFCNT_FPMPEXFCNT_MASK I40E_MASK(0x1FFFFFFF, I40E_GLHMC_VFPEXFCNT_FPMPEXFCNT_SHIFT)
+#define I40E_GLHMC_VFPEXFFLBASE(_i) (0x000Cd000 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFPEXFFLBASE_MAX_INDEX 31
+#define I40E_GLHMC_VFPEXFFLBASE_FPMPEXFFLBASE_SHIFT 0
+#define I40E_GLHMC_VFPEXFFLBASE_FPMPEXFFLBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_VFPEXFFLBASE_FPMPEXFFLBASE_SHIFT)
+#define I40E_GLHMC_VFSDPART(_i) (0x000C8800 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLHMC_VFSDPART_MAX_INDEX 31
+#define I40E_GLHMC_VFSDPART_PMSDBASE_SHIFT 0
+#define I40E_GLHMC_VFSDPART_PMSDBASE_MASK I40E_MASK(0xFFF, I40E_GLHMC_VFSDPART_PMSDBASE_SHIFT)
+#define I40E_GLHMC_VFSDPART_PMSDSIZE_SHIFT 16
+#define I40E_GLHMC_VFSDPART_PMSDSIZE_MASK I40E_MASK(0x1FFF, I40E_GLHMC_VFSDPART_PMSDSIZE_SHIFT)
+#define I40E_GLPBLOC_CACHESIZE 0x000A80BC /* Reset: CORER */
+#define I40E_GLPBLOC_CACHESIZE_WORD_SIZE_SHIFT 0
+#define I40E_GLPBLOC_CACHESIZE_WORD_SIZE_MASK I40E_MASK(0xFF, I40E_GLPBLOC_CACHESIZE_WORD_SIZE_SHIFT)
+#define I40E_GLPBLOC_CACHESIZE_SETS_SHIFT 8
+#define I40E_GLPBLOC_CACHESIZE_SETS_MASK I40E_MASK(0xFFF, I40E_GLPBLOC_CACHESIZE_SETS_SHIFT)
+#define I40E_GLPBLOC_CACHESIZE_WAYS_SHIFT 20
+#define I40E_GLPBLOC_CACHESIZE_WAYS_MASK I40E_MASK(0xF, I40E_GLPBLOC_CACHESIZE_WAYS_SHIFT)
+#define I40E_GLPDOC_CACHESIZE 0x000D0088 /* Reset: CORER */
+#define I40E_GLPDOC_CACHESIZE_WORD_SIZE_SHIFT 0
+#define I40E_GLPDOC_CACHESIZE_WORD_SIZE_MASK I40E_MASK(0xFF, I40E_GLPDOC_CACHESIZE_WORD_SIZE_SHIFT)
+#define I40E_GLPDOC_CACHESIZE_SETS_SHIFT 8
+#define I40E_GLPDOC_CACHESIZE_SETS_MASK I40E_MASK(0xFFF, I40E_GLPDOC_CACHESIZE_SETS_SHIFT)
+#define I40E_GLPDOC_CACHESIZE_WAYS_SHIFT 20
+#define I40E_GLPDOC_CACHESIZE_WAYS_MASK I40E_MASK(0xF, I40E_GLPDOC_CACHESIZE_WAYS_SHIFT)
+#define I40E_GLPEOC_CACHESIZE 0x000A60E8 /* Reset: CORER */
+#define I40E_GLPEOC_CACHESIZE_WORD_SIZE_SHIFT 0
+#define I40E_GLPEOC_CACHESIZE_WORD_SIZE_MASK I40E_MASK(0xFF, I40E_GLPEOC_CACHESIZE_WORD_SIZE_SHIFT)
+#define I40E_GLPEOC_CACHESIZE_SETS_SHIFT 8
+#define I40E_GLPEOC_CACHESIZE_SETS_MASK I40E_MASK(0xFFF, I40E_GLPEOC_CACHESIZE_SETS_SHIFT)
+#define I40E_GLPEOC_CACHESIZE_WAYS_SHIFT 20
+#define I40E_GLPEOC_CACHESIZE_WAYS_MASK I40E_MASK(0xF, I40E_GLPEOC_CACHESIZE_WAYS_SHIFT)
+#define I40E_PFHMC_PDINV_PMSDPARTSEL_SHIFT 15
+#define I40E_PFHMC_PDINV_PMSDPARTSEL_MASK I40E_MASK(0x1, I40E_PFHMC_PDINV_PMSDPARTSEL_SHIFT)
+#define I40E_PFHMC_SDCMD_PMSDPARTSEL_SHIFT 15
+#define I40E_PFHMC_SDCMD_PMSDPARTSEL_MASK I40E_MASK(0x1, I40E_PFHMC_SDCMD_PMSDPARTSEL_SHIFT)
+#define I40E_GL_PPRS_SPARE 0x000856E0 /* Reset: CORER */
+#define I40E_GL_PPRS_SPARE_GL_PPRS_SPARE_SHIFT 0
+#define I40E_GL_PPRS_SPARE_GL_PPRS_SPARE_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_PPRS_SPARE_GL_PPRS_SPARE_SHIFT)
+#define I40E_GL_TLAN_SPARE 0x000E64E0 /* Reset: CORER */
+#define I40E_GL_TLAN_SPARE_GL_TLAN_SPARE_SHIFT 0
+#define I40E_GL_TLAN_SPARE_GL_TLAN_SPARE_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_TLAN_SPARE_GL_TLAN_SPARE_SHIFT)
+#define I40E_GL_TUPM_SPARE 0x000a2230 /* Reset: CORER */
+#define I40E_GL_TUPM_SPARE_GL_TUPM_SPARE_SHIFT 0
+#define I40E_GL_TUPM_SPARE_GL_TUPM_SPARE_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_TUPM_SPARE_GL_TUPM_SPARE_SHIFT)
+#define I40E_GLGEN_CAR_DEBUG 0x000B81C0 /* Reset: POR */
+#define I40E_GLGEN_CAR_DEBUG_CAR_UPPER_CORE_CLK_EN_SHIFT 0
+#define I40E_GLGEN_CAR_DEBUG_CAR_UPPER_CORE_CLK_EN_MASK I40E_MASK(0x1, I40E_GLGEN_CAR_DEBUG_CAR_UPPER_CORE_CLK_EN_SHIFT)
+#define I40E_GLGEN_CAR_DEBUG_CAR_PCIE_HIU_CLK_EN_SHIFT 1
+#define I40E_GLGEN_CAR_DEBUG_CAR_PCIE_HIU_CLK_EN_MASK I40E_MASK(0x1, I40E_GLGEN_CAR_DEBUG_CAR_PCIE_HIU_CLK_EN_SHIFT)
+#define I40E_GLGEN_CAR_DEBUG_CAR_PE_CLK_EN_SHIFT 2
+#define I40E_GLGEN_CAR_DEBUG_CAR_PE_CLK_EN_MASK I40E_MASK(0x1, I40E_GLGEN_CAR_DEBUG_CAR_PE_CLK_EN_SHIFT)
+#define I40E_GLGEN_CAR_DEBUG_CAR_PCIE_PRIM_CLK_ACTIVE_SHIFT 3
+#define I40E_GLGEN_CAR_DEBUG_CAR_PCIE_PRIM_CLK_ACTIVE_MASK I40E_MASK(0x1, I40E_GLGEN_CAR_DEBUG_CAR_PCIE_PRIM_CLK_ACTIVE_SHIFT)
+#define I40E_GLGEN_CAR_DEBUG_CDC_PE_ACTIVE_SHIFT 4
+#define I40E_GLGEN_CAR_DEBUG_CDC_PE_ACTIVE_MASK I40E_MASK(0x1, I40E_GLGEN_CAR_DEBUG_CDC_PE_ACTIVE_SHIFT)
+#define I40E_GLGEN_CAR_DEBUG_CAR_PCIE_RAW_PRST_RESET_N_SHIFT 5
+#define I40E_GLGEN_CAR_DEBUG_CAR_PCIE_RAW_PRST_RESET_N_MASK I40E_MASK(0x1, I40E_GLGEN_CAR_DEBUG_CAR_PCIE_RAW_PRST_RESET_N_SHIFT)
+#define I40E_GLGEN_CAR_DEBUG_CAR_PCIE_RAW_SCLR_RESET_N_SHIFT 6
+#define I40E_GLGEN_CAR_DEBUG_CAR_PCIE_RAW_SCLR_RESET_N_MASK I40E_MASK(0x1, I40E_GLGEN_CAR_DEBUG_CAR_PCIE_RAW_SCLR_RESET_N_SHIFT)
+#define I40E_GLGEN_CAR_DEBUG_CAR_PCIE_RAW_IB_RESET_N_SHIFT 7
+#define I40E_GLGEN_CAR_DEBUG_CAR_PCIE_RAW_IB_RESET_N_MASK I40E_MASK(0x1, I40E_GLGEN_CAR_DEBUG_CAR_PCIE_RAW_IB_RESET_N_SHIFT)
+#define I40E_GLGEN_CAR_DEBUG_CAR_PCIE_RAW_IMIB_RESET_N_SHIFT 8
+#define I40E_GLGEN_CAR_DEBUG_CAR_PCIE_RAW_IMIB_RESET_N_MASK I40E_MASK(0x1, I40E_GLGEN_CAR_DEBUG_CAR_PCIE_RAW_IMIB_RESET_N_SHIFT)
+#define I40E_GLGEN_CAR_DEBUG_CAR_RAW_EMP_RESET_N_SHIFT 9
+#define I40E_GLGEN_CAR_DEBUG_CAR_RAW_EMP_RESET_N_MASK I40E_MASK(0x1, I40E_GLGEN_CAR_DEBUG_CAR_RAW_EMP_RESET_N_SHIFT)
+#define I40E_GLGEN_CAR_DEBUG_CAR_RAW_GLOBAL_RESET_N_SHIFT 10
+#define I40E_GLGEN_CAR_DEBUG_CAR_RAW_GLOBAL_RESET_N_MASK I40E_MASK(0x1, I40E_GLGEN_CAR_DEBUG_CAR_RAW_GLOBAL_RESET_N_SHIFT)
+#define I40E_GLGEN_CAR_DEBUG_CAR_RAW_LAN_POWER_GOOD_SHIFT 11
+#define I40E_GLGEN_CAR_DEBUG_CAR_RAW_LAN_POWER_GOOD_MASK I40E_MASK(0x1, I40E_GLGEN_CAR_DEBUG_CAR_RAW_LAN_POWER_GOOD_SHIFT)
+#define I40E_GLGEN_CAR_DEBUG_CDC_IOSF_PRIMERY_RST_B_SHIFT 12
+#define I40E_GLGEN_CAR_DEBUG_CDC_IOSF_PRIMERY_RST_B_MASK I40E_MASK(0x1, I40E_GLGEN_CAR_DEBUG_CDC_IOSF_PRIMERY_RST_B_SHIFT)
+#define I40E_GLGEN_CAR_DEBUG_GBE_GLOBALRST_B_SHIFT 13
+#define I40E_GLGEN_CAR_DEBUG_GBE_GLOBALRST_B_MASK I40E_MASK(0x1, I40E_GLGEN_CAR_DEBUG_GBE_GLOBALRST_B_SHIFT)
+#define I40E_GLGEN_CAR_DEBUG_FLEEP_AL_GLOBR_DONE_SHIFT 14
+#define I40E_GLGEN_CAR_DEBUG_FLEEP_AL_GLOBR_DONE_MASK I40E_MASK(0x1, I40E_GLGEN_CAR_DEBUG_FLEEP_AL_GLOBR_DONE_SHIFT)
+#define I40E_GLGEN_MISC_SPARE 0x000880E0 /* Reset: POR */
+#define I40E_GLGEN_MISC_SPARE_GLGEN_MISC_SPARE_SHIFT 0
+#define I40E_GLGEN_MISC_SPARE_GLGEN_MISC_SPARE_MASK I40E_MASK(0xFFFFFFFF, I40E_GLGEN_MISC_SPARE_GLGEN_MISC_SPARE_SHIFT)
+#define I40E_GL_UFUSE_SOC 0x000BE550 /* Reset: POR */
+#define I40E_GL_UFUSE_SOC_PORT_MODE_SHIFT 0
+#define I40E_GL_UFUSE_SOC_PORT_MODE_MASK I40E_MASK(0x3, I40E_GL_UFUSE_SOC_PORT_MODE_SHIFT)
+#define I40E_GL_UFUSE_SOC_NIC_ID_SHIFT 2
+#define I40E_GL_UFUSE_SOC_NIC_ID_MASK I40E_MASK(0x1, I40E_GL_UFUSE_SOC_NIC_ID_SHIFT)
+#define I40E_GL_UFUSE_SOC_SPARE_FUSES_SHIFT 3
+#define I40E_GL_UFUSE_SOC_SPARE_FUSES_MASK I40E_MASK(0x1FFF, I40E_GL_UFUSE_SOC_SPARE_FUSES_SHIFT)
+#define I40E_PFINT_DYN_CTL0_WB_ON_ITR_SHIFT 30
+#define I40E_PFINT_DYN_CTL0_WB_ON_ITR_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTL0_WB_ON_ITR_SHIFT)
+#define I40E_PFINT_DYN_CTLN_WB_ON_ITR_SHIFT 30
+#define I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTLN_WB_ON_ITR_SHIFT)
+#define I40E_VFINT_DYN_CTL0_WB_ON_ITR_SHIFT 30
+#define I40E_VFINT_DYN_CTL0_WB_ON_ITR_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTL0_WB_ON_ITR_SHIFT)
+#define I40E_VFINT_DYN_CTLN_WB_ON_ITR_SHIFT 30
+#define I40E_VFINT_DYN_CTLN_WB_ON_ITR_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTLN_WB_ON_ITR_SHIFT)
+#define I40E_VPLAN_QBASE(_VF) (0x00074800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VPLAN_QBASE_MAX_INDEX 127
+#define I40E_VPLAN_QBASE_VFFIRSTQ_SHIFT 0
+#define I40E_VPLAN_QBASE_VFFIRSTQ_MASK I40E_MASK(0x7FF, I40E_VPLAN_QBASE_VFFIRSTQ_SHIFT)
+#define I40E_VPLAN_QBASE_VFNUMQ_SHIFT 11
+#define I40E_VPLAN_QBASE_VFNUMQ_MASK I40E_MASK(0xFF, I40E_VPLAN_QBASE_VFNUMQ_SHIFT)
+#define I40E_VPLAN_QBASE_VFQTABLE_ENA_SHIFT 31
+#define I40E_VPLAN_QBASE_VFQTABLE_ENA_MASK I40E_MASK(0x1, I40E_VPLAN_QBASE_VFQTABLE_ENA_SHIFT)
+#define I40E_PRTMAC_LINK_DOWN_COUNTER 0x001E2440 /* Reset: GLOBR */
+#define I40E_PRTMAC_LINK_DOWN_COUNTER_LINK_DOWN_COUNTER_SHIFT 0
+#define I40E_PRTMAC_LINK_DOWN_COUNTER_LINK_DOWN_COUNTER_MASK I40E_MASK(0xFFFF, I40E_PRTMAC_LINK_DOWN_COUNTER_LINK_DOWN_COUNTER_SHIFT)
+#define I40E_GLNVM_AL_REQ 0x000B6164 /* Reset: POR */
+#define I40E_GLNVM_AL_REQ_POR_SHIFT 0
+#define I40E_GLNVM_AL_REQ_POR_MASK I40E_MASK(0x1, I40E_GLNVM_AL_REQ_POR_SHIFT)
+#define I40E_GLNVM_AL_REQ_PCIE_IMIB_SHIFT 1
+#define I40E_GLNVM_AL_REQ_PCIE_IMIB_MASK I40E_MASK(0x1, I40E_GLNVM_AL_REQ_PCIE_IMIB_SHIFT)
+#define I40E_GLNVM_AL_REQ_GLOBR_SHIFT 2
+#define I40E_GLNVM_AL_REQ_GLOBR_MASK I40E_MASK(0x1, I40E_GLNVM_AL_REQ_GLOBR_SHIFT)
+#define I40E_GLNVM_AL_REQ_CORER_SHIFT 3
+#define I40E_GLNVM_AL_REQ_CORER_MASK I40E_MASK(0x1, I40E_GLNVM_AL_REQ_CORER_SHIFT)
+#define I40E_GLNVM_AL_REQ_PE_SHIFT 4
+#define I40E_GLNVM_AL_REQ_PE_MASK I40E_MASK(0x1, I40E_GLNVM_AL_REQ_PE_SHIFT)
+#define I40E_GLNVM_AL_REQ_PCIE_IMIB_ASSERT_SHIFT 5
+#define I40E_GLNVM_AL_REQ_PCIE_IMIB_ASSERT_MASK I40E_MASK(0x1, I40E_GLNVM_AL_REQ_PCIE_IMIB_ASSERT_SHIFT)
+#define I40E_GLNVM_ALTIMERS 0x000B6140 /* Reset: POR */
+#define I40E_GLNVM_ALTIMERS_PCI_ALTIMER_SHIFT 0
+#define I40E_GLNVM_ALTIMERS_PCI_ALTIMER_MASK I40E_MASK(0xFFF, I40E_GLNVM_ALTIMERS_PCI_ALTIMER_SHIFT)
+#define I40E_GLNVM_ALTIMERS_GEN_ALTIMER_SHIFT 12
+#define I40E_GLNVM_ALTIMERS_GEN_ALTIMER_MASK I40E_MASK(0xFFFFF, I40E_GLNVM_ALTIMERS_GEN_ALTIMER_SHIFT)
+#define I40E_GLNVM_FLA 0x000B6108 /* Reset: POR */
+#define I40E_GLNVM_FLA_LOCKED_SHIFT 6
+#define I40E_GLNVM_FLA_LOCKED_MASK I40E_MASK(0x1, I40E_GLNVM_FLA_LOCKED_SHIFT)
+
+#define I40E_GLNVM_ULD 0x000B6008 /* Reset: POR */
+#define I40E_GLNVM_ULD_PCIER_DONE_SHIFT 0
+#define I40E_GLNVM_ULD_PCIER_DONE_MASK I40E_MASK(0x1, I40E_GLNVM_ULD_PCIER_DONE_SHIFT)
+#define I40E_GLNVM_ULD_PCIER_DONE_1_SHIFT 1
+#define I40E_GLNVM_ULD_PCIER_DONE_1_MASK I40E_MASK(0x1, I40E_GLNVM_ULD_PCIER_DONE_1_SHIFT)
+#define I40E_GLNVM_ULD_CORER_DONE_SHIFT 3
+#define I40E_GLNVM_ULD_CORER_DONE_MASK I40E_MASK(0x1, I40E_GLNVM_ULD_CORER_DONE_SHIFT)
+#define I40E_GLNVM_ULD_GLOBR_DONE_SHIFT 4
+#define I40E_GLNVM_ULD_GLOBR_DONE_MASK I40E_MASK(0x1, I40E_GLNVM_ULD_GLOBR_DONE_SHIFT)
+#define I40E_GLNVM_ULD_POR_DONE_SHIFT 5
+#define I40E_GLNVM_ULD_POR_DONE_MASK I40E_MASK(0x1, I40E_GLNVM_ULD_POR_DONE_SHIFT)
+#define I40E_GLNVM_ULD_POR_DONE_1_SHIFT 8
+#define I40E_GLNVM_ULD_POR_DONE_1_MASK I40E_MASK(0x1, I40E_GLNVM_ULD_POR_DONE_1_SHIFT)
+#define I40E_GLNVM_ULD_PCIER_DONE_2_SHIFT 9
+#define I40E_GLNVM_ULD_PCIER_DONE_2_MASK I40E_MASK(0x1, I40E_GLNVM_ULD_PCIER_DONE_2_SHIFT)
+#define I40E_GLNVM_ULD_PE_DONE_SHIFT 10
+#define I40E_GLNVM_ULD_PE_DONE_MASK I40E_MASK(0x1, I40E_GLNVM_ULD_PE_DONE_SHIFT)
+#define I40E_GLNVM_ULT 0x000B6154 /* Reset: POR */
+#define I40E_GLNVM_ULT_CONF_PCIR_AE_SHIFT 0
+#define I40E_GLNVM_ULT_CONF_PCIR_AE_MASK I40E_MASK(0x1, I40E_GLNVM_ULT_CONF_PCIR_AE_SHIFT)
+#define I40E_GLNVM_ULT_CONF_PCIRTL_AE_SHIFT 1
+#define I40E_GLNVM_ULT_CONF_PCIRTL_AE_MASK I40E_MASK(0x1, I40E_GLNVM_ULT_CONF_PCIRTL_AE_SHIFT)
+#define I40E_GLNVM_ULT_RESERVED_1_SHIFT 2
+#define I40E_GLNVM_ULT_RESERVED_1_MASK I40E_MASK(0x1, I40E_GLNVM_ULT_RESERVED_1_SHIFT)
+#define I40E_GLNVM_ULT_CONF_CORE_AE_SHIFT 3
+#define I40E_GLNVM_ULT_CONF_CORE_AE_MASK I40E_MASK(0x1, I40E_GLNVM_ULT_CONF_CORE_AE_SHIFT)
+#define I40E_GLNVM_ULT_CONF_GLOBAL_AE_SHIFT 4
+#define I40E_GLNVM_ULT_CONF_GLOBAL_AE_MASK I40E_MASK(0x1, I40E_GLNVM_ULT_CONF_GLOBAL_AE_SHIFT)
+#define I40E_GLNVM_ULT_CONF_POR_AE_SHIFT 5
+#define I40E_GLNVM_ULT_CONF_POR_AE_MASK I40E_MASK(0x1, I40E_GLNVM_ULT_CONF_POR_AE_SHIFT)
+#define I40E_GLNVM_ULT_RESERVED_2_SHIFT 6
+#define I40E_GLNVM_ULT_RESERVED_2_MASK I40E_MASK(0x1, I40E_GLNVM_ULT_RESERVED_2_SHIFT)
+#define I40E_GLNVM_ULT_RESERVED_3_SHIFT 7
+#define I40E_GLNVM_ULT_RESERVED_3_MASK I40E_MASK(0x1, I40E_GLNVM_ULT_RESERVED_3_SHIFT)
+#define I40E_GLNVM_ULT_CONF_EMP_AE_SHIFT 8
+#define I40E_GLNVM_ULT_CONF_EMP_AE_MASK I40E_MASK(0x1, I40E_GLNVM_ULT_CONF_EMP_AE_SHIFT)
+#define I40E_GLNVM_ULT_CONF_PCIALT_AE_SHIFT 9
+#define I40E_GLNVM_ULT_CONF_PCIALT_AE_MASK I40E_MASK(0x1, I40E_GLNVM_ULT_CONF_PCIALT_AE_SHIFT)
+#define I40E_GLNVM_ULT_RESERVED_4_SHIFT 10
+#define I40E_GLNVM_ULT_RESERVED_4_MASK I40E_MASK(0x3FFFFF, I40E_GLNVM_ULT_RESERVED_4_SHIFT)
+#define I40E_MEM_INIT_DONE_STAT 0x000B615C /* Reset: POR */
+#define I40E_MEM_INIT_DONE_STAT_CMLAN_MEM_INIT_DONE_SHIFT 0
+#define I40E_MEM_INIT_DONE_STAT_CMLAN_MEM_INIT_DONE_MASK I40E_MASK(0x1, I40E_MEM_INIT_DONE_STAT_CMLAN_MEM_INIT_DONE_SHIFT)
+#define I40E_MEM_INIT_DONE_STAT_PMAT_MEM_INIT_DONE_SHIFT 1
+#define I40E_MEM_INIT_DONE_STAT_PMAT_MEM_INIT_DONE_MASK I40E_MASK(0x1, I40E_MEM_INIT_DONE_STAT_PMAT_MEM_INIT_DONE_SHIFT)
+#define I40E_MEM_INIT_DONE_STAT_RCU_MEM_INIT_DONE_SHIFT 2
+#define I40E_MEM_INIT_DONE_STAT_RCU_MEM_INIT_DONE_MASK I40E_MASK(0x1, I40E_MEM_INIT_DONE_STAT_RCU_MEM_INIT_DONE_SHIFT)
+#define I40E_MEM_INIT_DONE_STAT_TDPU_MEM_INIT_DONE_SHIFT 3
+#define I40E_MEM_INIT_DONE_STAT_TDPU_MEM_INIT_DONE_MASK I40E_MASK(0x1, I40E_MEM_INIT_DONE_STAT_TDPU_MEM_INIT_DONE_SHIFT)
+#define I40E_MEM_INIT_DONE_STAT_TLAN_MEM_INIT_DONE_SHIFT 4
+#define I40E_MEM_INIT_DONE_STAT_TLAN_MEM_INIT_DONE_MASK I40E_MASK(0x1, I40E_MEM_INIT_DONE_STAT_TLAN_MEM_INIT_DONE_SHIFT)
+#define I40E_MEM_INIT_DONE_STAT_RLAN_MEM_INIT_DONE_SHIFT 5
+#define I40E_MEM_INIT_DONE_STAT_RLAN_MEM_INIT_DONE_MASK I40E_MASK(0x1, I40E_MEM_INIT_DONE_STAT_RLAN_MEM_INIT_DONE_SHIFT)
+#define I40E_MEM_INIT_DONE_STAT_RDPU_MEM_INIT_DONE_SHIFT 6
+#define I40E_MEM_INIT_DONE_STAT_RDPU_MEM_INIT_DONE_MASK I40E_MASK(0x1, I40E_MEM_INIT_DONE_STAT_RDPU_MEM_INIT_DONE_SHIFT)
+#define I40E_MEM_INIT_DONE_STAT_PPRS_MEM_INIT_DONE_SHIFT 7
+#define I40E_MEM_INIT_DONE_STAT_PPRS_MEM_INIT_DONE_MASK I40E_MASK(0x1, I40E_MEM_INIT_DONE_STAT_PPRS_MEM_INIT_DONE_SHIFT)
+#define I40E_MEM_INIT_DONE_STAT_RPB_MEM_INIT_DONE_SHIFT 8
+#define I40E_MEM_INIT_DONE_STAT_RPB_MEM_INIT_DONE_MASK I40E_MASK(0x1, I40E_MEM_INIT_DONE_STAT_RPB_MEM_INIT_DONE_SHIFT)
+#define I40E_MEM_INIT_DONE_STAT_TPB_MEM_INIT_DONE_SHIFT 9
+#define I40E_MEM_INIT_DONE_STAT_TPB_MEM_INIT_DONE_MASK I40E_MASK(0x1, I40E_MEM_INIT_DONE_STAT_TPB_MEM_INIT_DONE_SHIFT)
+#define I40E_MEM_INIT_DONE_STAT_FOC_MEM_INIT_DONE_SHIFT 10
+#define I40E_MEM_INIT_DONE_STAT_FOC_MEM_INIT_DONE_MASK I40E_MASK(0x1, I40E_MEM_INIT_DONE_STAT_FOC_MEM_INIT_DONE_SHIFT)
+#define I40E_MEM_INIT_DONE_STAT_TSCD_MEM_INIT_DONE_SHIFT 11
+#define I40E_MEM_INIT_DONE_STAT_TSCD_MEM_INIT_DONE_MASK I40E_MASK(0x1, I40E_MEM_INIT_DONE_STAT_TSCD_MEM_INIT_DONE_SHIFT)
+#define I40E_MEM_INIT_DONE_STAT_TCB_MEM_INIT_DONE_SHIFT 12
+#define I40E_MEM_INIT_DONE_STAT_TCB_MEM_INIT_DONE_MASK I40E_MASK(0x1, I40E_MEM_INIT_DONE_STAT_TCB_MEM_INIT_DONE_SHIFT)
+#define I40E_MEM_INIT_DONE_STAT_RCB_MEM_INIT_DONE_SHIFT 13
+#define I40E_MEM_INIT_DONE_STAT_RCB_MEM_INIT_DONE_MASK I40E_MASK(0x1, I40E_MEM_INIT_DONE_STAT_RCB_MEM_INIT_DONE_SHIFT)
+#define I40E_MEM_INIT_DONE_STAT_WUC_MEM_INIT_DONE_SHIFT 14
+#define I40E_MEM_INIT_DONE_STAT_WUC_MEM_INIT_DONE_MASK I40E_MASK(0x1, I40E_MEM_INIT_DONE_STAT_WUC_MEM_INIT_DONE_SHIFT)
+#define I40E_MEM_INIT_DONE_STAT_STAT_MEM_INIT_DONE_SHIFT 15
+#define I40E_MEM_INIT_DONE_STAT_STAT_MEM_INIT_DONE_MASK I40E_MASK(0x1, I40E_MEM_INIT_DONE_STAT_STAT_MEM_INIT_DONE_SHIFT)
+#define I40E_MEM_INIT_DONE_STAT_ITR_MEM_INIT_DONE_SHIFT 16
+#define I40E_MEM_INIT_DONE_STAT_ITR_MEM_INIT_DONE_MASK I40E_MASK(0x1, I40E_MEM_INIT_DONE_STAT_ITR_MEM_INIT_DONE_SHIFT)
+#define I40E_MNGSB_DADD 0x000B7030 /* Reset: POR */
+#define I40E_MNGSB_DADD_ADDR_SHIFT 0
+#define I40E_MNGSB_DADD_ADDR_MASK I40E_MASK(0xFFFFFFFF, I40E_MNGSB_DADD_ADDR_SHIFT)
+#define I40E_MNGSB_DCNT 0x000B7034 /* Reset: POR */
+#define I40E_MNGSB_DCNT_BYTE_CNT_SHIFT 0
+#define I40E_MNGSB_DCNT_BYTE_CNT_MASK I40E_MASK(0xFFFFFFFF, I40E_MNGSB_DCNT_BYTE_CNT_SHIFT)
+#define I40E_MNGSB_MSGCTL 0x000B7020 /* Reset: POR */
+#define I40E_MNGSB_MSGCTL_HDR_DWS_SHIFT 0
+#define I40E_MNGSB_MSGCTL_HDR_DWS_MASK I40E_MASK(0x3, I40E_MNGSB_MSGCTL_HDR_DWS_SHIFT)
+#define I40E_MNGSB_MSGCTL_EXP_RDW_SHIFT 8
+#define I40E_MNGSB_MSGCTL_EXP_RDW_MASK I40E_MASK(0x1FF, I40E_MNGSB_MSGCTL_EXP_RDW_SHIFT)
+#define I40E_MNGSB_MSGCTL_MSG_MODE_SHIFT 26
+#define I40E_MNGSB_MSGCTL_MSG_MODE_MASK I40E_MASK(0x3, I40E_MNGSB_MSGCTL_MSG_MODE_SHIFT)
+#define I40E_MNGSB_MSGCTL_TOKEN_MODE_SHIFT 28
+#define I40E_MNGSB_MSGCTL_TOKEN_MODE_MASK I40E_MASK(0x3, I40E_MNGSB_MSGCTL_TOKEN_MODE_SHIFT)
+#define I40E_MNGSB_MSGCTL_BARCLR_SHIFT 30
+#define I40E_MNGSB_MSGCTL_BARCLR_MASK I40E_MASK(0x1, I40E_MNGSB_MSGCTL_BARCLR_SHIFT)
+#define I40E_MNGSB_MSGCTL_CMDV_SHIFT 31
+#define I40E_MNGSB_MSGCTL_CMDV_MASK I40E_MASK(0x1, I40E_MNGSB_MSGCTL_CMDV_SHIFT)
+#define I40E_MNGSB_RDATA 0x000B7300 /* Reset: POR */
+#define I40E_MNGSB_RDATA_DATA_SHIFT 0
+#define I40E_MNGSB_RDATA_DATA_MASK I40E_MASK(0xFFFFFFFF, I40E_MNGSB_RDATA_DATA_SHIFT)
+#define I40E_MNGSB_RHDR0 0x000B72FC /* Reset: POR */
+#define I40E_MNGSB_RHDR0_DESTINATION_SHIFT 0
+#define I40E_MNGSB_RHDR0_DESTINATION_MASK I40E_MASK(0xFF, I40E_MNGSB_RHDR0_DESTINATION_SHIFT)
+#define I40E_MNGSB_RHDR0_SOURCE_SHIFT 8
+#define I40E_MNGSB_RHDR0_SOURCE_MASK I40E_MASK(0xFF, I40E_MNGSB_RHDR0_SOURCE_SHIFT)
+#define I40E_MNGSB_RHDR0_OPCODE_SHIFT 16
+#define I40E_MNGSB_RHDR0_OPCODE_MASK I40E_MASK(0xFF, I40E_MNGSB_RHDR0_OPCODE_SHIFT)
+#define I40E_MNGSB_RHDR0_TAG_SHIFT 24
+#define I40E_MNGSB_RHDR0_TAG_MASK I40E_MASK(0x7, I40E_MNGSB_RHDR0_TAG_SHIFT)
+#define I40E_MNGSB_RHDR0_RESPONSE_SHIFT 27
+#define I40E_MNGSB_RHDR0_RESPONSE_MASK I40E_MASK(0x7, I40E_MNGSB_RHDR0_RESPONSE_SHIFT)
+#define I40E_MNGSB_RHDR0_EH_SHIFT 31
+#define I40E_MNGSB_RHDR0_EH_MASK I40E_MASK(0x1, I40E_MNGSB_RHDR0_EH_SHIFT)
+#define I40E_MNGSB_RSPCTL 0x000B7024 /* Reset: POR */
+#define I40E_MNGSB_RSPCTL_DMA_MSG_DWORDS_SHIFT 0
+#define I40E_MNGSB_RSPCTL_DMA_MSG_DWORDS_MASK I40E_MASK(0x1FF, I40E_MNGSB_RSPCTL_DMA_MSG_DWORDS_SHIFT)
+#define I40E_MNGSB_RSPCTL_RSP_MODE_SHIFT 26
+#define I40E_MNGSB_RSPCTL_RSP_MODE_MASK I40E_MASK(0x3, I40E_MNGSB_RSPCTL_RSP_MODE_SHIFT)
+#define I40E_MNGSB_RSPCTL_RSP_BAD_LEN_SHIFT 30
+#define I40E_MNGSB_RSPCTL_RSP_BAD_LEN_MASK I40E_MASK(0x1, I40E_MNGSB_RSPCTL_RSP_BAD_LEN_SHIFT)
+#define I40E_MNGSB_RSPCTL_RSP_ERR_SHIFT 31
+#define I40E_MNGSB_RSPCTL_RSP_ERR_MASK I40E_MASK(0x1, I40E_MNGSB_RSPCTL_RSP_ERR_SHIFT)
+#define I40E_MNGSB_WDATA 0x000B7100 /* Reset: POR */
+#define I40E_MNGSB_WDATA_DATA_SHIFT 0
+#define I40E_MNGSB_WDATA_DATA_MASK I40E_MASK(0xFFFFFFFF, I40E_MNGSB_WDATA_DATA_SHIFT)
+#define I40E_MNGSB_WHDR0 0x000B70F4 /* Reset: POR */
+#define I40E_MNGSB_WHDR0_RAW_DEST_SHIFT 0
+#define I40E_MNGSB_WHDR0_RAW_DEST_MASK I40E_MASK(0xFF, I40E_MNGSB_WHDR0_RAW_DEST_SHIFT)
+#define I40E_MNGSB_WHDR0_DEST_SEL_SHIFT 12
+#define I40E_MNGSB_WHDR0_DEST_SEL_MASK I40E_MASK(0xF, I40E_MNGSB_WHDR0_DEST_SEL_SHIFT)
+#define I40E_MNGSB_WHDR0_OPCODE_SEL_SHIFT 16
+#define I40E_MNGSB_WHDR0_OPCODE_SEL_MASK I40E_MASK(0xFF, I40E_MNGSB_WHDR0_OPCODE_SEL_SHIFT)
+#define I40E_MNGSB_WHDR0_TAG_SHIFT 24
+#define I40E_MNGSB_WHDR0_TAG_MASK I40E_MASK(0x7F, I40E_MNGSB_WHDR0_TAG_SHIFT)
+#define I40E_MNGSB_WHDR1 0x000B70F8 /* Reset: POR */
+#define I40E_MNGSB_WHDR1_ADDR_SHIFT 0
+#define I40E_MNGSB_WHDR1_ADDR_MASK I40E_MASK(0xFFFFFFFF, I40E_MNGSB_WHDR1_ADDR_SHIFT)
+#define I40E_MNGSB_WHDR2 0x000B70FC /* Reset: POR */
+#define I40E_MNGSB_WHDR2_LENGTH_SHIFT 0
+#define I40E_MNGSB_WHDR2_LENGTH_MASK I40E_MASK(0xFFFFFFFF, I40E_MNGSB_WHDR2_LENGTH_SHIFT)
+
+#define I40E_GLPCI_CAPSUP_WAKUP_EN_SHIFT 21
+#define I40E_GLPCI_CAPSUP_WAKUP_EN_MASK I40E_MASK(0x1, I40E_GLPCI_CAPSUP_WAKUP_EN_SHIFT)
+
+#define I40E_GLPCI_CUR_CLNT_COMMON 0x0009CA18 /* Reset: PCIR */
+#define I40E_GLPCI_CUR_CLNT_COMMON_DATA_LINES_SHIFT 0
+#define I40E_GLPCI_CUR_CLNT_COMMON_DATA_LINES_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_CLNT_COMMON_DATA_LINES_SHIFT)
+#define I40E_GLPCI_CUR_CLNT_COMMON_OSR_SHIFT 16
+#define I40E_GLPCI_CUR_CLNT_COMMON_OSR_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_CLNT_COMMON_OSR_SHIFT)
+#define I40E_GLPCI_CUR_CLNT_PIPEMON 0x0009CA20 /* Reset: PCIR */
+#define I40E_GLPCI_CUR_CLNT_PIPEMON_DATA_LINES_SHIFT 0
+#define I40E_GLPCI_CUR_CLNT_PIPEMON_DATA_LINES_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_CLNT_PIPEMON_DATA_LINES_SHIFT)
+#define I40E_GLPCI_CUR_MNG_ALWD 0x0009c514 /* Reset: PCIR */
+#define I40E_GLPCI_CUR_MNG_ALWD_DATA_LINES_SHIFT 0
+#define I40E_GLPCI_CUR_MNG_ALWD_DATA_LINES_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_MNG_ALWD_DATA_LINES_SHIFT)
+#define I40E_GLPCI_CUR_MNG_ALWD_OSR_SHIFT 16
+#define I40E_GLPCI_CUR_MNG_ALWD_OSR_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_MNG_ALWD_OSR_SHIFT)
+#define I40E_GLPCI_CUR_MNG_RSVD 0x0009c594 /* Reset: PCIR */
+#define I40E_GLPCI_CUR_MNG_RSVD_DATA_LINES_SHIFT 0
+#define I40E_GLPCI_CUR_MNG_RSVD_DATA_LINES_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_MNG_RSVD_DATA_LINES_SHIFT)
+#define I40E_GLPCI_CUR_MNG_RSVD_OSR_SHIFT 16
+#define I40E_GLPCI_CUR_MNG_RSVD_OSR_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_MNG_RSVD_OSR_SHIFT)
+#define I40E_GLPCI_CUR_PMAT_ALWD 0x0009c510 /* Reset: PCIR */
+#define I40E_GLPCI_CUR_PMAT_ALWD_DATA_LINES_SHIFT 0
+#define I40E_GLPCI_CUR_PMAT_ALWD_DATA_LINES_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_PMAT_ALWD_DATA_LINES_SHIFT)
+#define I40E_GLPCI_CUR_PMAT_ALWD_OSR_SHIFT 16
+#define I40E_GLPCI_CUR_PMAT_ALWD_OSR_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_PMAT_ALWD_OSR_SHIFT)
+#define I40E_GLPCI_CUR_PMAT_RSVD 0x0009c590 /* Reset: PCIR */
+#define I40E_GLPCI_CUR_PMAT_RSVD_DATA_LINES_SHIFT 0
+#define I40E_GLPCI_CUR_PMAT_RSVD_DATA_LINES_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_PMAT_RSVD_DATA_LINES_SHIFT)
+#define I40E_GLPCI_CUR_PMAT_RSVD_OSR_SHIFT 16
+#define I40E_GLPCI_CUR_PMAT_RSVD_OSR_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_PMAT_RSVD_OSR_SHIFT)
+#define I40E_GLPCI_CUR_RLAN_ALWD 0x0009c500 /* Reset: PCIR */
+#define I40E_GLPCI_CUR_RLAN_ALWD_DATA_LINES_SHIFT 0
+#define I40E_GLPCI_CUR_RLAN_ALWD_DATA_LINES_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_RLAN_ALWD_DATA_LINES_SHIFT)
+#define I40E_GLPCI_CUR_RLAN_ALWD_OSR_SHIFT 16
+#define I40E_GLPCI_CUR_RLAN_ALWD_OSR_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_RLAN_ALWD_OSR_SHIFT)
+#define I40E_GLPCI_CUR_RLAN_RSVD 0x0009c580 /* Reset: PCIR */
+#define I40E_GLPCI_CUR_RLAN_RSVD_DATA_LINES_SHIFT 0
+#define I40E_GLPCI_CUR_RLAN_RSVD_DATA_LINES_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_RLAN_RSVD_DATA_LINES_SHIFT)
+#define I40E_GLPCI_CUR_RLAN_RSVD_OSR_SHIFT 16
+#define I40E_GLPCI_CUR_RLAN_RSVD_OSR_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_RLAN_RSVD_OSR_SHIFT)
+#define I40E_GLPCI_CUR_RXPE_ALWD 0x0009c508 /* Reset: PCIR */
+#define I40E_GLPCI_CUR_RXPE_ALWD_DATA_LINES_SHIFT 0
+#define I40E_GLPCI_CUR_RXPE_ALWD_DATA_LINES_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_RXPE_ALWD_DATA_LINES_SHIFT)
+#define I40E_GLPCI_CUR_RXPE_ALWD_OSR_SHIFT 16
+#define I40E_GLPCI_CUR_RXPE_ALWD_OSR_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_RXPE_ALWD_OSR_SHIFT)
+#define I40E_GLPCI_CUR_RXPE_RSVD 0x0009c588 /* Reset: PCIR */
+#define I40E_GLPCI_CUR_RXPE_RSVD_DATA_LINES_SHIFT 0
+#define I40E_GLPCI_CUR_RXPE_RSVD_DATA_LINES_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_RXPE_RSVD_DATA_LINES_SHIFT)
+#define I40E_GLPCI_CUR_RXPE_RSVD_OSR_SHIFT 16
+#define I40E_GLPCI_CUR_RXPE_RSVD_OSR_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_RXPE_RSVD_OSR_SHIFT)
+#define I40E_GLPCI_CUR_TDPU_ALWD 0x0009c518 /* Reset: PCIR */
+#define I40E_GLPCI_CUR_TDPU_ALWD_DATA_LINES_SHIFT 0
+#define I40E_GLPCI_CUR_TDPU_ALWD_DATA_LINES_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_TDPU_ALWD_DATA_LINES_SHIFT)
+#define I40E_GLPCI_CUR_TDPU_ALWD_OSR_SHIFT 16
+#define I40E_GLPCI_CUR_TDPU_ALWD_OSR_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_TDPU_ALWD_OSR_SHIFT)
+#define I40E_GLPCI_CUR_TDPU_RSVD 0x0009c598 /* Reset: PCIR */
+#define I40E_GLPCI_CUR_TDPU_RSVD_DATA_LINES_SHIFT 0
+#define I40E_GLPCI_CUR_TDPU_RSVD_DATA_LINES_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_TDPU_RSVD_DATA_LINES_SHIFT)
+#define I40E_GLPCI_CUR_TDPU_RSVD_OSR_SHIFT 16
+#define I40E_GLPCI_CUR_TDPU_RSVD_OSR_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_TDPU_RSVD_OSR_SHIFT)
+#define I40E_GLPCI_CUR_TLAN_ALWD 0x0009c504 /* Reset: PCIR */
+#define I40E_GLPCI_CUR_TLAN_ALWD_DATA_LINES_SHIFT 0
+#define I40E_GLPCI_CUR_TLAN_ALWD_DATA_LINES_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_TLAN_ALWD_DATA_LINES_SHIFT)
+#define I40E_GLPCI_CUR_TLAN_ALWD_OSR_SHIFT 16
+#define I40E_GLPCI_CUR_TLAN_ALWD_OSR_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_TLAN_ALWD_OSR_SHIFT)
+#define I40E_GLPCI_CUR_TLAN_RSVD 0x0009c584 /* Reset: PCIR */
+#define I40E_GLPCI_CUR_TLAN_RSVD_DATA_LINES_SHIFT 0
+#define I40E_GLPCI_CUR_TLAN_RSVD_DATA_LINES_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_TLAN_RSVD_DATA_LINES_SHIFT)
+#define I40E_GLPCI_CUR_TLAN_RSVD_OSR_SHIFT 16
+#define I40E_GLPCI_CUR_TLAN_RSVD_OSR_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_TLAN_RSVD_OSR_SHIFT)
+#define I40E_GLPCI_CUR_TXPE_ALWD 0x0009c50C /* Reset: PCIR */
+#define I40E_GLPCI_CUR_TXPE_ALWD_DATA_LINES_SHIFT 0
+#define I40E_GLPCI_CUR_TXPE_ALWD_DATA_LINES_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_TXPE_ALWD_DATA_LINES_SHIFT)
+#define I40E_GLPCI_CUR_TXPE_ALWD_OSR_SHIFT 16
+#define I40E_GLPCI_CUR_TXPE_ALWD_OSR_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_TXPE_ALWD_OSR_SHIFT)
+#define I40E_GLPCI_CUR_TXPE_RSVD 0x0009c58c /* Reset: PCIR */
+#define I40E_GLPCI_CUR_TXPE_RSVD_DATA_LINES_SHIFT 0
+#define I40E_GLPCI_CUR_TXPE_RSVD_DATA_LINES_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_TXPE_RSVD_DATA_LINES_SHIFT)
+#define I40E_GLPCI_CUR_TXPE_RSVD_OSR_SHIFT 16
+#define I40E_GLPCI_CUR_TXPE_RSVD_OSR_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_TXPE_RSVD_OSR_SHIFT)
+#define I40E_GLPCI_CUR_WATMK_CLNT_COMMON 0x0009CA28 /* Reset: PCIR */
+#define I40E_GLPCI_CUR_WATMK_CLNT_COMMON_DATA_LINES_SHIFT 0
+#define I40E_GLPCI_CUR_WATMK_CLNT_COMMON_DATA_LINES_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_WATMK_CLNT_COMMON_DATA_LINES_SHIFT)
+#define I40E_GLPCI_CUR_WATMK_CLNT_COMMON_OSR_SHIFT 16
+#define I40E_GLPCI_CUR_WATMK_CLNT_COMMON_OSR_MASK I40E_MASK(0xFFFF, I40E_GLPCI_CUR_WATMK_CLNT_COMMON_OSR_SHIFT)
+
+#define I40E_GLPCI_LBARCTRL_PE_DB_SIZE_SHIFT 4
+#define I40E_GLPCI_LBARCTRL_PE_DB_SIZE_MASK I40E_MASK(0x3, I40E_GLPCI_LBARCTRL_PE_DB_SIZE_SHIFT)
+#define I40E_GLPCI_LBARCTRL_VF_PE_DB_SIZE_SHIFT 10
+#define I40E_GLPCI_LBARCTRL_VF_PE_DB_SIZE_MASK I40E_MASK(0x1, I40E_GLPCI_LBARCTRL_VF_PE_DB_SIZE_SHIFT)
+#define I40E_GLPCI_NPQ_CFG 0x0009CA00 /* Reset: PCIR */
+#define I40E_GLPCI_NPQ_CFG_EXTEND_TO_SHIFT 0
+#define I40E_GLPCI_NPQ_CFG_EXTEND_TO_MASK I40E_MASK(0x1, I40E_GLPCI_NPQ_CFG_EXTEND_TO_SHIFT)
+#define I40E_GLPCI_NPQ_CFG_SMALL_TO_SHIFT 1
+#define I40E_GLPCI_NPQ_CFG_SMALL_TO_MASK I40E_MASK(0x1, I40E_GLPCI_NPQ_CFG_SMALL_TO_SHIFT)
+#define I40E_GLPCI_NPQ_CFG_WEIGHT_AVG_SHIFT 2
+#define I40E_GLPCI_NPQ_CFG_WEIGHT_AVG_MASK I40E_MASK(0xF, I40E_GLPCI_NPQ_CFG_WEIGHT_AVG_SHIFT)
+#define I40E_GLPCI_NPQ_CFG_NPQ_SPARE_SHIFT 6
+#define I40E_GLPCI_NPQ_CFG_NPQ_SPARE_MASK I40E_MASK(0x3FF, I40E_GLPCI_NPQ_CFG_NPQ_SPARE_SHIFT)
+#define I40E_GLPCI_NPQ_CFG_NPQ_ERR_STAT_SHIFT 16
+#define I40E_GLPCI_NPQ_CFG_NPQ_ERR_STAT_MASK I40E_MASK(0xF, I40E_GLPCI_NPQ_CFG_NPQ_ERR_STAT_SHIFT)
+#define I40E_GLPCI_WATMK_CLNT_PIPEMON 0x0009CA30 /* Reset: PCIR */
+#define I40E_GLPCI_WATMK_CLNT_PIPEMON_DATA_LINES_SHIFT 0
+#define I40E_GLPCI_WATMK_CLNT_PIPEMON_DATA_LINES_MASK I40E_MASK(0xFFFF, I40E_GLPCI_WATMK_CLNT_PIPEMON_DATA_LINES_SHIFT)
+#define I40E_GLPCI_WATMK_MNG_ALWD 0x0009CB14 /* Reset: PCIR */
+#define I40E_GLPCI_WATMK_MNG_ALWD_DATA_LINES_SHIFT 0
+#define I40E_GLPCI_WATMK_MNG_ALWD_DATA_LINES_MASK I40E_MASK(0xFFFF, I40E_GLPCI_WATMK_MNG_ALWD_DATA_LINES_SHIFT)
+#define I40E_GLPCI_WATMK_MNG_ALWD_OSR_SHIFT 16
+#define I40E_GLPCI_WATMK_MNG_ALWD_OSR_MASK I40E_MASK(0xFFFF, I40E_GLPCI_WATMK_MNG_ALWD_OSR_SHIFT)
+#define I40E_GLPCI_WATMK_PMAT_ALWD 0x0009CB10 /* Reset: PCIR */
+#define I40E_GLPCI_WATMK_PMAT_ALWD_DATA_LINES_SHIFT 0
+#define I40E_GLPCI_WATMK_PMAT_ALWD_DATA_LINES_MASK I40E_MASK(0xFFFF, I40E_GLPCI_WATMK_PMAT_ALWD_DATA_LINES_SHIFT)
+#define I40E_GLPCI_WATMK_PMAT_ALWD_OSR_SHIFT 16
+#define I40E_GLPCI_WATMK_PMAT_ALWD_OSR_MASK I40E_MASK(0xFFFF, I40E_GLPCI_WATMK_PMAT_ALWD_OSR_SHIFT)
+#define I40E_GLPCI_WATMK_RLAN_ALWD 0x0009CB00 /* Reset: PCIR */
+#define I40E_GLPCI_WATMK_RLAN_ALWD_DATA_LINES_SHIFT 0
+#define I40E_GLPCI_WATMK_RLAN_ALWD_DATA_LINES_MASK I40E_MASK(0xFFFF, I40E_GLPCI_WATMK_RLAN_ALWD_DATA_LINES_SHIFT)
+#define I40E_GLPCI_WATMK_RLAN_ALWD_OSR_SHIFT 16
+#define I40E_GLPCI_WATMK_RLAN_ALWD_OSR_MASK I40E_MASK(0xFFFF, I40E_GLPCI_WATMK_RLAN_ALWD_OSR_SHIFT)
+#define I40E_GLPCI_WATMK_RXPE_ALWD 0x0009CB08 /* Reset: PCIR */
+#define I40E_GLPCI_WATMK_RXPE_ALWD_DATA_LINES_SHIFT 0
+#define I40E_GLPCI_WATMK_RXPE_ALWD_DATA_LINES_MASK I40E_MASK(0xFFFF, I40E_GLPCI_WATMK_RXPE_ALWD_DATA_LINES_SHIFT)
+#define I40E_GLPCI_WATMK_RXPE_ALWD_OSR_SHIFT 16
+#define I40E_GLPCI_WATMK_RXPE_ALWD_OSR_MASK I40E_MASK(0xFFFF, I40E_GLPCI_WATMK_RXPE_ALWD_OSR_SHIFT)
+#define I40E_GLPCI_WATMK_TLAN_ALWD 0x0009CB04 /* Reset: PCIR */
+#define I40E_GLPCI_WATMK_TLAN_ALWD_DATA_LINES_SHIFT 0
+#define I40E_GLPCI_WATMK_TLAN_ALWD_DATA_LINES_MASK I40E_MASK(0xFFFF, I40E_GLPCI_WATMK_TLAN_ALWD_DATA_LINES_SHIFT)
+#define I40E_GLPCI_WATMK_TLAN_ALWD_OSR_SHIFT 16
+#define I40E_GLPCI_WATMK_TLAN_ALWD_OSR_MASK I40E_MASK(0xFFFF, I40E_GLPCI_WATMK_TLAN_ALWD_OSR_SHIFT)
+#define I40E_GLPCI_WATMK_TPDU_ALWD 0x0009CB18 /* Reset: PCIR */
+#define I40E_GLPCI_WATMK_TPDU_ALWD_DATA_LINES_SHIFT 0
+#define I40E_GLPCI_WATMK_TPDU_ALWD_DATA_LINES_MASK I40E_MASK(0xFFFF, I40E_GLPCI_WATMK_TPDU_ALWD_DATA_LINES_SHIFT)
+#define I40E_GLPCI_WATMK_TPDU_ALWD_OSR_SHIFT 16
+#define I40E_GLPCI_WATMK_TPDU_ALWD_OSR_MASK I40E_MASK(0xFFFF, I40E_GLPCI_WATMK_TPDU_ALWD_OSR_SHIFT)
+#define I40E_GLPCI_WATMK_TXPE_ALWD 0x0009CB0c /* Reset: PCIR */
+#define I40E_GLPCI_WATMK_TXPE_ALWD_DATA_LINES_SHIFT 0
+#define I40E_GLPCI_WATMK_TXPE_ALWD_DATA_LINES_MASK I40E_MASK(0xFFFF, I40E_GLPCI_WATMK_TXPE_ALWD_DATA_LINES_SHIFT)
+#define I40E_GLPCI_WATMK_TXPE_ALWD_OSR_SHIFT 16
+#define I40E_GLPCI_WATMK_TXPE_ALWD_OSR_MASK I40E_MASK(0xFFFF, I40E_GLPCI_WATMK_TXPE_ALWD_OSR_SHIFT)
+#define I40E_GLPE_CPUSTATUS0 0x0000D040 /* Reset: PE_CORER */
+#define I40E_GLPE_CPUSTATUS0_PECPUSTATUS0_SHIFT 0
+#define I40E_GLPE_CPUSTATUS0_PECPUSTATUS0_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPE_CPUSTATUS0_PECPUSTATUS0_SHIFT)
+#define I40E_GLPE_CPUSTATUS1 0x0000D044 /* Reset: PE_CORER */
+#define I40E_GLPE_CPUSTATUS1_PECPUSTATUS1_SHIFT 0
+#define I40E_GLPE_CPUSTATUS1_PECPUSTATUS1_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPE_CPUSTATUS1_PECPUSTATUS1_SHIFT)
+#define I40E_GLPE_CPUSTATUS2 0x0000D048 /* Reset: PE_CORER */
+#define I40E_GLPE_CPUSTATUS2_PECPUSTATUS2_SHIFT 0
+#define I40E_GLPE_CPUSTATUS2_PECPUSTATUS2_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPE_CPUSTATUS2_PECPUSTATUS2_SHIFT)
+#define I40E_GLPE_CPUTRIG0 0x0000D060 /* Reset: PE_CORER */
+#define I40E_GLPE_CPUTRIG0_PECPUTRIG0_SHIFT 0
+#define I40E_GLPE_CPUTRIG0_PECPUTRIG0_MASK I40E_MASK(0xFFFF, I40E_GLPE_CPUTRIG0_PECPUTRIG0_SHIFT)
+#define I40E_GLPE_CPUTRIG0_TEPREQUEST0_SHIFT 17
+#define I40E_GLPE_CPUTRIG0_TEPREQUEST0_MASK I40E_MASK(0x1, I40E_GLPE_CPUTRIG0_TEPREQUEST0_SHIFT)
+#define I40E_GLPE_CPUTRIG0_OOPREQUEST0_SHIFT 18
+#define I40E_GLPE_CPUTRIG0_OOPREQUEST0_MASK I40E_MASK(0x1, I40E_GLPE_CPUTRIG0_OOPREQUEST0_SHIFT)
+#define I40E_GLPE_DUAL40_RUPM 0x0000DA04 /* Reset: PE_CORER */
+#define I40E_GLPE_DUAL40_RUPM_DUAL_40G_MODE_SHIFT 0
+#define I40E_GLPE_DUAL40_RUPM_DUAL_40G_MODE_MASK I40E_MASK(0x1, I40E_GLPE_DUAL40_RUPM_DUAL_40G_MODE_SHIFT)
+#define I40E_GLPE_PFAEQEDROPCNT(_i) (0x00131440 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLPE_PFAEQEDROPCNT_MAX_INDEX 15
+#define I40E_GLPE_PFAEQEDROPCNT_AEQEDROPCNT_SHIFT 0
+#define I40E_GLPE_PFAEQEDROPCNT_AEQEDROPCNT_MASK I40E_MASK(0xFFFF, I40E_GLPE_PFAEQEDROPCNT_AEQEDROPCNT_SHIFT)
+#define I40E_GLPE_PFCEQEDROPCNT(_i) (0x001313C0 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLPE_PFCEQEDROPCNT_MAX_INDEX 15
+#define I40E_GLPE_PFCEQEDROPCNT_CEQEDROPCNT_SHIFT 0
+#define I40E_GLPE_PFCEQEDROPCNT_CEQEDROPCNT_MASK I40E_MASK(0xFFFF, I40E_GLPE_PFCEQEDROPCNT_CEQEDROPCNT_SHIFT)
+#define I40E_GLPE_PFCQEDROPCNT(_i) (0x00131340 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLPE_PFCQEDROPCNT_MAX_INDEX 15
+#define I40E_GLPE_PFCQEDROPCNT_CQEDROPCNT_SHIFT 0
+#define I40E_GLPE_PFCQEDROPCNT_CQEDROPCNT_MASK I40E_MASK(0xFFFF, I40E_GLPE_PFCQEDROPCNT_CQEDROPCNT_SHIFT)
+#define I40E_GLPE_RUPM_CQPPOOL 0x0000DACC /* Reset: PE_CORER */
+#define I40E_GLPE_RUPM_CQPPOOL_CQPSPADS_SHIFT 0
+#define I40E_GLPE_RUPM_CQPPOOL_CQPSPADS_MASK I40E_MASK(0xFF, I40E_GLPE_RUPM_CQPPOOL_CQPSPADS_SHIFT)
+#define I40E_GLPE_RUPM_FLRPOOL 0x0000DAC4 /* Reset: PE_CORER */
+#define I40E_GLPE_RUPM_FLRPOOL_FLRSPADS_SHIFT 0
+#define I40E_GLPE_RUPM_FLRPOOL_FLRSPADS_MASK I40E_MASK(0xFF, I40E_GLPE_RUPM_FLRPOOL_FLRSPADS_SHIFT)
+#define I40E_GLPE_RUPM_GCTL 0x0000DA00 /* Reset: PE_CORER */
+#define I40E_GLPE_RUPM_GCTL_ALLOFFTH_SHIFT 0
+#define I40E_GLPE_RUPM_GCTL_ALLOFFTH_MASK I40E_MASK(0xFF, I40E_GLPE_RUPM_GCTL_ALLOFFTH_SHIFT)
+#define I40E_GLPE_RUPM_GCTL_RUPM_P0_DIS_SHIFT 26
+#define I40E_GLPE_RUPM_GCTL_RUPM_P0_DIS_MASK I40E_MASK(0x1, I40E_GLPE_RUPM_GCTL_RUPM_P0_DIS_SHIFT)
+#define I40E_GLPE_RUPM_GCTL_RUPM_P1_DIS_SHIFT 27
+#define I40E_GLPE_RUPM_GCTL_RUPM_P1_DIS_MASK I40E_MASK(0x1, I40E_GLPE_RUPM_GCTL_RUPM_P1_DIS_SHIFT)
+#define I40E_GLPE_RUPM_GCTL_RUPM_P2_DIS_SHIFT 28
+#define I40E_GLPE_RUPM_GCTL_RUPM_P2_DIS_MASK I40E_MASK(0x1, I40E_GLPE_RUPM_GCTL_RUPM_P2_DIS_SHIFT)
+#define I40E_GLPE_RUPM_GCTL_RUPM_P3_DIS_SHIFT 29
+#define I40E_GLPE_RUPM_GCTL_RUPM_P3_DIS_MASK I40E_MASK(0x1, I40E_GLPE_RUPM_GCTL_RUPM_P3_DIS_SHIFT)
+#define I40E_GLPE_RUPM_GCTL_RUPM_DIS_SHIFT 30
+#define I40E_GLPE_RUPM_GCTL_RUPM_DIS_MASK I40E_MASK(0x1, I40E_GLPE_RUPM_GCTL_RUPM_DIS_SHIFT)
+#define I40E_GLPE_RUPM_GCTL_SWLB_MODE_SHIFT 31
+#define I40E_GLPE_RUPM_GCTL_SWLB_MODE_MASK I40E_MASK(0x1, I40E_GLPE_RUPM_GCTL_SWLB_MODE_SHIFT)
+#define I40E_GLPE_RUPM_PTXPOOL 0x0000DAC8 /* Reset: PE_CORER */
+#define I40E_GLPE_RUPM_PTXPOOL_PTXSPADS_SHIFT 0
+#define I40E_GLPE_RUPM_PTXPOOL_PTXSPADS_MASK I40E_MASK(0xFF, I40E_GLPE_RUPM_PTXPOOL_PTXSPADS_SHIFT)
+#define I40E_GLPE_RUPM_PUSHPOOL 0x0000DAC0 /* Reset: PE_CORER */
+#define I40E_GLPE_RUPM_PUSHPOOL_PUSHSPADS_SHIFT 0
+#define I40E_GLPE_RUPM_PUSHPOOL_PUSHSPADS_MASK I40E_MASK(0xFF, I40E_GLPE_RUPM_PUSHPOOL_PUSHSPADS_SHIFT)
+#define I40E_GLPE_RUPM_TXHOST_EN 0x0000DA08 /* Reset: PE_CORER */
+#define I40E_GLPE_RUPM_TXHOST_EN_TXHOST_EN_SHIFT 0
+#define I40E_GLPE_RUPM_TXHOST_EN_TXHOST_EN_MASK I40E_MASK(0x1, I40E_GLPE_RUPM_TXHOST_EN_TXHOST_EN_SHIFT)
+#define I40E_GLPE_VFAEQEDROPCNT(_i) (0x00132540 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLPE_VFAEQEDROPCNT_MAX_INDEX 31
+#define I40E_GLPE_VFAEQEDROPCNT_AEQEDROPCNT_SHIFT 0
+#define I40E_GLPE_VFAEQEDROPCNT_AEQEDROPCNT_MASK I40E_MASK(0xFFFF, I40E_GLPE_VFAEQEDROPCNT_AEQEDROPCNT_SHIFT)
+#define I40E_GLPE_VFCEQEDROPCNT(_i) (0x00132440 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLPE_VFCEQEDROPCNT_MAX_INDEX 31
+#define I40E_GLPE_VFCEQEDROPCNT_CEQEDROPCNT_SHIFT 0
+#define I40E_GLPE_VFCEQEDROPCNT_CEQEDROPCNT_MASK I40E_MASK(0xFFFF, I40E_GLPE_VFCEQEDROPCNT_CEQEDROPCNT_SHIFT)
+#define I40E_GLPE_VFCQEDROPCNT(_i) (0x00132340 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_GLPE_VFCQEDROPCNT_MAX_INDEX 31
+#define I40E_GLPE_VFCQEDROPCNT_CQEDROPCNT_SHIFT 0
+#define I40E_GLPE_VFCQEDROPCNT_CQEDROPCNT_MASK I40E_MASK(0xFFFF, I40E_GLPE_VFCQEDROPCNT_CQEDROPCNT_SHIFT)
+#define I40E_GLPE_VFFLMOBJCTRL(_i) (0x0000D400 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPE_VFFLMOBJCTRL_MAX_INDEX 31
+#define I40E_GLPE_VFFLMOBJCTRL_XMIT_BLOCKSIZE_SHIFT 0
+#define I40E_GLPE_VFFLMOBJCTRL_XMIT_BLOCKSIZE_MASK I40E_MASK(0x7, I40E_GLPE_VFFLMOBJCTRL_XMIT_BLOCKSIZE_SHIFT)
+#define I40E_GLPE_VFFLMOBJCTRL_Q1_BLOCKSIZE_SHIFT 8
+#define I40E_GLPE_VFFLMOBJCTRL_Q1_BLOCKSIZE_MASK I40E_MASK(0x7, I40E_GLPE_VFFLMOBJCTRL_Q1_BLOCKSIZE_SHIFT)
+#define I40E_GLPE_VFFLMQ1ALLOCERR(_i) (0x0000C700 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPE_VFFLMQ1ALLOCERR_MAX_INDEX 31
+#define I40E_GLPE_VFFLMQ1ALLOCERR_ERROR_COUNT_SHIFT 0
+#define I40E_GLPE_VFFLMQ1ALLOCERR_ERROR_COUNT_MASK I40E_MASK(0xFFFF, I40E_GLPE_VFFLMQ1ALLOCERR_ERROR_COUNT_SHIFT)
+#define I40E_GLPE_VFFLMXMITALLOCERR(_i) (0x0000C600 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPE_VFFLMXMITALLOCERR_MAX_INDEX 31
+#define I40E_GLPE_VFFLMXMITALLOCERR_ERROR_COUNT_SHIFT 0
+#define I40E_GLPE_VFFLMXMITALLOCERR_ERROR_COUNT_MASK I40E_MASK(0xFFFF, I40E_GLPE_VFFLMXMITALLOCERR_ERROR_COUNT_SHIFT)
+#define I40E_GLPE_VFUDACTRL(_i) (0x0000C000 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPE_VFUDACTRL_MAX_INDEX 31
+#define I40E_GLPE_VFUDACTRL_IPV4MCFRAGRESBP_SHIFT 0
+#define I40E_GLPE_VFUDACTRL_IPV4MCFRAGRESBP_MASK I40E_MASK(0x1, I40E_GLPE_VFUDACTRL_IPV4MCFRAGRESBP_SHIFT)
+#define I40E_GLPE_VFUDACTRL_IPV4UCFRAGRESBP_SHIFT 1
+#define I40E_GLPE_VFUDACTRL_IPV4UCFRAGRESBP_MASK I40E_MASK(0x1, I40E_GLPE_VFUDACTRL_IPV4UCFRAGRESBP_SHIFT)
+#define I40E_GLPE_VFUDACTRL_IPV6MCFRAGRESBP_SHIFT 2
+#define I40E_GLPE_VFUDACTRL_IPV6MCFRAGRESBP_MASK I40E_MASK(0x1, I40E_GLPE_VFUDACTRL_IPV6MCFRAGRESBP_SHIFT)
+#define I40E_GLPE_VFUDACTRL_IPV6UCFRAGRESBP_SHIFT 3
+#define I40E_GLPE_VFUDACTRL_IPV6UCFRAGRESBP_MASK I40E_MASK(0x1, I40E_GLPE_VFUDACTRL_IPV6UCFRAGRESBP_SHIFT)
+#define I40E_GLPE_VFUDACTRL_UDPMCFRAGRESFAIL_SHIFT 4
+#define I40E_GLPE_VFUDACTRL_UDPMCFRAGRESFAIL_MASK I40E_MASK(0x1, I40E_GLPE_VFUDACTRL_UDPMCFRAGRESFAIL_SHIFT)
+#define I40E_GLPE_VFUDAUCFBQPN(_i) (0x0000C100 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPE_VFUDAUCFBQPN_MAX_INDEX 31
+#define I40E_GLPE_VFUDAUCFBQPN_QPN_SHIFT 0
+#define I40E_GLPE_VFUDAUCFBQPN_QPN_MASK I40E_MASK(0x3FFFF, I40E_GLPE_VFUDAUCFBQPN_QPN_SHIFT)
+#define I40E_GLPE_VFUDAUCFBQPN_VALID_SHIFT 31
+#define I40E_GLPE_VFUDAUCFBQPN_VALID_MASK I40E_MASK(0x1, I40E_GLPE_VFUDAUCFBQPN_VALID_SHIFT)
+#define I40E_PFPE_AEQALLOC 0x00131180 /* Reset: PFR */
+#define I40E_PFPE_AEQALLOC_AECOUNT_SHIFT 0
+#define I40E_PFPE_AEQALLOC_AECOUNT_MASK I40E_MASK(0xFFFFFFFF, I40E_PFPE_AEQALLOC_AECOUNT_SHIFT)
+#define I40E_PFPE_CCQPHIGH 0x00008200 /* Reset: PFR */
+#define I40E_PFPE_CCQPHIGH_PECCQPHIGH_SHIFT 0
+#define I40E_PFPE_CCQPHIGH_PECCQPHIGH_MASK I40E_MASK(0xFFFFFFFF, I40E_PFPE_CCQPHIGH_PECCQPHIGH_SHIFT)
+#define I40E_PFPE_CCQPLOW 0x00008180 /* Reset: PFR */
+#define I40E_PFPE_CCQPLOW_PECCQPLOW_SHIFT 0
+#define I40E_PFPE_CCQPLOW_PECCQPLOW_MASK I40E_MASK(0xFFFFFFFF, I40E_PFPE_CCQPLOW_PECCQPLOW_SHIFT)
+#define I40E_PFPE_CCQPSTATUS 0x00008100 /* Reset: PFR */
+#define I40E_PFPE_CCQPSTATUS_CCQP_DONE_SHIFT 0
+#define I40E_PFPE_CCQPSTATUS_CCQP_DONE_MASK I40E_MASK(0x1, I40E_PFPE_CCQPSTATUS_CCQP_DONE_SHIFT)
+#define I40E_PFPE_CCQPSTATUS_HMC_PROFILE_SHIFT 4
+#define I40E_PFPE_CCQPSTATUS_HMC_PROFILE_MASK I40E_MASK(0x7, I40E_PFPE_CCQPSTATUS_HMC_PROFILE_SHIFT)
+#define I40E_PFPE_CCQPSTATUS_RDMA_EN_VFS_SHIFT 16
+#define I40E_PFPE_CCQPSTATUS_RDMA_EN_VFS_MASK I40E_MASK(0x3F, I40E_PFPE_CCQPSTATUS_RDMA_EN_VFS_SHIFT)
+#define I40E_PFPE_CCQPSTATUS_CCQP_ERR_SHIFT 31
+#define I40E_PFPE_CCQPSTATUS_CCQP_ERR_MASK I40E_MASK(0x1, I40E_PFPE_CCQPSTATUS_CCQP_ERR_SHIFT)
+#define I40E_PFPE_CQACK 0x00131100 /* Reset: PFR */
+#define I40E_PFPE_CQACK_PECQID_SHIFT 0
+#define I40E_PFPE_CQACK_PECQID_MASK I40E_MASK(0x1FFFF, I40E_PFPE_CQACK_PECQID_SHIFT)
+#define I40E_PFPE_CQARM 0x00131080 /* Reset: PFR */
+#define I40E_PFPE_CQARM_PECQID_SHIFT 0
+#define I40E_PFPE_CQARM_PECQID_MASK I40E_MASK(0x1FFFF, I40E_PFPE_CQARM_PECQID_SHIFT)
+#define I40E_PFPE_CQPDB 0x00008000 /* Reset: PFR */
+#define I40E_PFPE_CQPDB_WQHEAD_SHIFT 0
+#define I40E_PFPE_CQPDB_WQHEAD_MASK I40E_MASK(0x7FF, I40E_PFPE_CQPDB_WQHEAD_SHIFT)
+#define I40E_PFPE_CQPERRCODES 0x00008880 /* Reset: PFR */
+#define I40E_PFPE_CQPERRCODES_CQP_MINOR_CODE_SHIFT 0
+#define I40E_PFPE_CQPERRCODES_CQP_MINOR_CODE_MASK I40E_MASK(0xFFFF, I40E_PFPE_CQPERRCODES_CQP_MINOR_CODE_SHIFT)
+#define I40E_PFPE_CQPERRCODES_CQP_MAJOR_CODE_SHIFT 16
+#define I40E_PFPE_CQPERRCODES_CQP_MAJOR_CODE_MASK I40E_MASK(0xFFFF, I40E_PFPE_CQPERRCODES_CQP_MAJOR_CODE_SHIFT)
+#define I40E_PFPE_CQPTAIL 0x00008080 /* Reset: PFR */
+#define I40E_PFPE_CQPTAIL_WQTAIL_SHIFT 0
+#define I40E_PFPE_CQPTAIL_WQTAIL_MASK I40E_MASK(0x7FF, I40E_PFPE_CQPTAIL_WQTAIL_SHIFT)
+#define I40E_PFPE_CQPTAIL_CQP_OP_ERR_SHIFT 31
+#define I40E_PFPE_CQPTAIL_CQP_OP_ERR_MASK I40E_MASK(0x1, I40E_PFPE_CQPTAIL_CQP_OP_ERR_SHIFT)
+#define I40E_PFPE_FLMQ1ALLOCERR 0x00008980 /* Reset: PFR */
+#define I40E_PFPE_FLMQ1ALLOCERR_ERROR_COUNT_SHIFT 0
+#define I40E_PFPE_FLMQ1ALLOCERR_ERROR_COUNT_MASK I40E_MASK(0xFFFF, I40E_PFPE_FLMQ1ALLOCERR_ERROR_COUNT_SHIFT)
+#define I40E_PFPE_FLMXMITALLOCERR 0x00008900 /* Reset: PFR */
+#define I40E_PFPE_FLMXMITALLOCERR_ERROR_COUNT_SHIFT 0
+#define I40E_PFPE_FLMXMITALLOCERR_ERROR_COUNT_MASK I40E_MASK(0xFFFF, I40E_PFPE_FLMXMITALLOCERR_ERROR_COUNT_SHIFT)
+#define I40E_PFPE_IPCONFIG0 0x00008280 /* Reset: PFR */
+#define I40E_PFPE_IPCONFIG0_PEIPID_SHIFT 0
+#define I40E_PFPE_IPCONFIG0_PEIPID_MASK I40E_MASK(0xFFFF, I40E_PFPE_IPCONFIG0_PEIPID_SHIFT)
+#define I40E_PFPE_IPCONFIG0_USEENTIREIDRANGE_SHIFT 16
+#define I40E_PFPE_IPCONFIG0_USEENTIREIDRANGE_MASK I40E_MASK(0x1, I40E_PFPE_IPCONFIG0_USEENTIREIDRANGE_SHIFT)
+#define I40E_PFPE_MRTEIDXMASK 0x00008600 /* Reset: PFR */
+#define I40E_PFPE_MRTEIDXMASK_MRTEIDXMASKBITS_SHIFT 0
+#define I40E_PFPE_MRTEIDXMASK_MRTEIDXMASKBITS_MASK I40E_MASK(0x1F, I40E_PFPE_MRTEIDXMASK_MRTEIDXMASKBITS_SHIFT)
+#define I40E_PFPE_RCVUNEXPECTEDERROR 0x00008680 /* Reset: PFR */
+#define I40E_PFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_SHIFT 0
+#define I40E_PFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_MASK I40E_MASK(0xFFFFFF, I40E_PFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_SHIFT)
+#define I40E_PFPE_TCPNOWTIMER 0x00008580 /* Reset: PFR */
+#define I40E_PFPE_TCPNOWTIMER_TCP_NOW_SHIFT 0
+#define I40E_PFPE_TCPNOWTIMER_TCP_NOW_MASK I40E_MASK(0xFFFFFFFF, I40E_PFPE_TCPNOWTIMER_TCP_NOW_SHIFT)
+#define I40E_PFPE_UDACTRL 0x00008700 /* Reset: PFR */
+#define I40E_PFPE_UDACTRL_IPV4MCFRAGRESBP_SHIFT 0
+#define I40E_PFPE_UDACTRL_IPV4MCFRAGRESBP_MASK I40E_MASK(0x1, I40E_PFPE_UDACTRL_IPV4MCFRAGRESBP_SHIFT)
+#define I40E_PFPE_UDACTRL_IPV4UCFRAGRESBP_SHIFT 1
+#define I40E_PFPE_UDACTRL_IPV4UCFRAGRESBP_MASK I40E_MASK(0x1, I40E_PFPE_UDACTRL_IPV4UCFRAGRESBP_SHIFT)
+#define I40E_PFPE_UDACTRL_IPV6MCFRAGRESBP_SHIFT 2
+#define I40E_PFPE_UDACTRL_IPV6MCFRAGRESBP_MASK I40E_MASK(0x1, I40E_PFPE_UDACTRL_IPV6MCFRAGRESBP_SHIFT)
+#define I40E_PFPE_UDACTRL_IPV6UCFRAGRESBP_SHIFT 3
+#define I40E_PFPE_UDACTRL_IPV6UCFRAGRESBP_MASK I40E_MASK(0x1, I40E_PFPE_UDACTRL_IPV6UCFRAGRESBP_SHIFT)
+#define I40E_PFPE_UDACTRL_UDPMCFRAGRESFAIL_SHIFT 4
+#define I40E_PFPE_UDACTRL_UDPMCFRAGRESFAIL_MASK I40E_MASK(0x1, I40E_PFPE_UDACTRL_UDPMCFRAGRESFAIL_SHIFT)
+#define I40E_PFPE_UDAUCFBQPN 0x00008780 /* Reset: PFR */
+#define I40E_PFPE_UDAUCFBQPN_QPN_SHIFT 0
+#define I40E_PFPE_UDAUCFBQPN_QPN_MASK I40E_MASK(0x3FFFF, I40E_PFPE_UDAUCFBQPN_QPN_SHIFT)
+#define I40E_PFPE_UDAUCFBQPN_VALID_SHIFT 31
+#define I40E_PFPE_UDAUCFBQPN_VALID_MASK I40E_MASK(0x1, I40E_PFPE_UDAUCFBQPN_VALID_SHIFT)
+#define I40E_PFPE_WQEALLOC 0x00138C00 /* Reset: PFR */
+#define I40E_PFPE_WQEALLOC_PEQPID_SHIFT 0
+#define I40E_PFPE_WQEALLOC_PEQPID_MASK I40E_MASK(0x3FFFF, I40E_PFPE_WQEALLOC_PEQPID_SHIFT)
+#define I40E_PFPE_WQEALLOC_WQE_DESC_INDEX_SHIFT 20
+#define I40E_PFPE_WQEALLOC_WQE_DESC_INDEX_MASK I40E_MASK(0xFFF, I40E_PFPE_WQEALLOC_WQE_DESC_INDEX_SHIFT)
+#define I40E_PRTDCB_RLPMC 0x0001F140 /* Reset: PE_CORER */
+#define I40E_PRTDCB_RLPMC_TC2PFC_SHIFT 0
+#define I40E_PRTDCB_RLPMC_TC2PFC_MASK I40E_MASK(0xFF, I40E_PRTDCB_RLPMC_TC2PFC_SHIFT)
+#define I40E_PRTDCB_TCMSTC_RLPM(_i) (0x0001F040 + ((_i) * 32)) /* _i=0...7 */ /* Reset: PE_CORER */
+#define I40E_PRTDCB_TCMSTC_RLPM_MAX_INDEX 7
+#define I40E_PRTDCB_TCMSTC_RLPM_MSTC_SHIFT 0
+#define I40E_PRTDCB_TCMSTC_RLPM_MSTC_MASK I40E_MASK(0xFFFFF, I40E_PRTDCB_TCMSTC_RLPM_MSTC_SHIFT)
+#define I40E_PRTDCB_TCPMC_RLPM 0x0001F1A0 /* Reset: PE_CORER */
+#define I40E_PRTDCB_TCPMC_RLPM_CPM_SHIFT 0
+#define I40E_PRTDCB_TCPMC_RLPM_CPM_MASK I40E_MASK(0x1FFF, I40E_PRTDCB_TCPMC_RLPM_CPM_SHIFT)
+#define I40E_PRTDCB_TCPMC_RLPM_LLTC_SHIFT 13
+#define I40E_PRTDCB_TCPMC_RLPM_LLTC_MASK I40E_MASK(0xFF, I40E_PRTDCB_TCPMC_RLPM_LLTC_SHIFT)
+#define I40E_PRTDCB_TCPMC_RLPM_TCPM_MODE_SHIFT 30
+#define I40E_PRTDCB_TCPMC_RLPM_TCPM_MODE_MASK I40E_MASK(0x1, I40E_PRTDCB_TCPMC_RLPM_TCPM_MODE_SHIFT)
+#define I40E_PRTE_RUPM_TCCNTR03 0x0000DAE0 /* Reset: PE_CORER */
+#define I40E_PRTE_RUPM_TCCNTR03_TC0COUNT_SHIFT 0
+#define I40E_PRTE_RUPM_TCCNTR03_TC0COUNT_MASK I40E_MASK(0xFF, I40E_PRTE_RUPM_TCCNTR03_TC0COUNT_SHIFT)
+#define I40E_PRTE_RUPM_TCCNTR03_TC1COUNT_SHIFT 8
+#define I40E_PRTE_RUPM_TCCNTR03_TC1COUNT_MASK I40E_MASK(0xFF, I40E_PRTE_RUPM_TCCNTR03_TC1COUNT_SHIFT)
+#define I40E_PRTE_RUPM_TCCNTR03_TC2COUNT_SHIFT 16
+#define I40E_PRTE_RUPM_TCCNTR03_TC2COUNT_MASK I40E_MASK(0xFF, I40E_PRTE_RUPM_TCCNTR03_TC2COUNT_SHIFT)
+#define I40E_PRTE_RUPM_TCCNTR03_TC3COUNT_SHIFT 24
+#define I40E_PRTE_RUPM_TCCNTR03_TC3COUNT_MASK I40E_MASK(0xFF, I40E_PRTE_RUPM_TCCNTR03_TC3COUNT_SHIFT)
+#define I40E_PRTPE_RUPM_CNTR 0x0000DB20 /* Reset: PE_CORER */
+#define I40E_PRTPE_RUPM_CNTR_COUNT_SHIFT 0
+#define I40E_PRTPE_RUPM_CNTR_COUNT_MASK I40E_MASK(0xFF, I40E_PRTPE_RUPM_CNTR_COUNT_SHIFT)
+#define I40E_PRTPE_RUPM_CTL 0x0000DA40 /* Reset: PE_CORER */
+#define I40E_PRTPE_RUPM_CTL_LLTC_SHIFT 13
+#define I40E_PRTPE_RUPM_CTL_LLTC_MASK I40E_MASK(0xFF, I40E_PRTPE_RUPM_CTL_LLTC_SHIFT)
+#define I40E_PRTPE_RUPM_CTL_RUPM_MODE_SHIFT 30
+#define I40E_PRTPE_RUPM_CTL_RUPM_MODE_MASK I40E_MASK(0x1, I40E_PRTPE_RUPM_CTL_RUPM_MODE_SHIFT)
+#define I40E_PRTPE_RUPM_PFCCTL 0x0000DA60 /* Reset: PE_CORER */
+#define I40E_PRTPE_RUPM_PFCCTL_TC2PFC_SHIFT 0
+#define I40E_PRTPE_RUPM_PFCCTL_TC2PFC_MASK I40E_MASK(0xFF, I40E_PRTPE_RUPM_PFCCTL_TC2PFC_SHIFT)
+#define I40E_PRTPE_RUPM_PFCPC 0x0000DA80 /* Reset: PE_CORER */
+#define I40E_PRTPE_RUPM_PFCPC_PORTOFFTH_SHIFT 0
+#define I40E_PRTPE_RUPM_PFCPC_PORTOFFTH_MASK I40E_MASK(0xFF, I40E_PRTPE_RUPM_PFCPC_PORTOFFTH_SHIFT)
+#define I40E_PRTPE_RUPM_PFCTCC 0x0000DAA0 /* Reset: PE_CORER */
+#define I40E_PRTPE_RUPM_PFCTCC_TCOFFTH_SHIFT 0
+#define I40E_PRTPE_RUPM_PFCTCC_TCOFFTH_MASK I40E_MASK(0xFF, I40E_PRTPE_RUPM_PFCTCC_TCOFFTH_SHIFT)
+#define I40E_PRTPE_RUPM_PFCTCC_LL_PRI_TH_SHIFT 16
+#define I40E_PRTPE_RUPM_PFCTCC_LL_PRI_TH_MASK I40E_MASK(0xFF, I40E_PRTPE_RUPM_PFCTCC_LL_PRI_TH_SHIFT)
+#define I40E_PRTPE_RUPM_PFCTCC_LL_PRI_EN_SHIFT 31
+#define I40E_PRTPE_RUPM_PFCTCC_LL_PRI_EN_MASK I40E_MASK(0x1, I40E_PRTPE_RUPM_PFCTCC_LL_PRI_EN_SHIFT)
+#define I40E_PRTPE_RUPM_PTCTCCNTR47 0x0000DB60 /* Reset: PE_CORER */
+#define I40E_PRTPE_RUPM_PTCTCCNTR47_TC4COUNT_SHIFT 0
+#define I40E_PRTPE_RUPM_PTCTCCNTR47_TC4COUNT_MASK I40E_MASK(0xFF, I40E_PRTPE_RUPM_PTCTCCNTR47_TC4COUNT_SHIFT)
+#define I40E_PRTPE_RUPM_PTCTCCNTR47_TC5COUNT_SHIFT 8
+#define I40E_PRTPE_RUPM_PTCTCCNTR47_TC5COUNT_MASK I40E_MASK(0xFF, I40E_PRTPE_RUPM_PTCTCCNTR47_TC5COUNT_SHIFT)
+#define I40E_PRTPE_RUPM_PTCTCCNTR47_TC6COUNT_SHIFT 16
+#define I40E_PRTPE_RUPM_PTCTCCNTR47_TC6COUNT_MASK I40E_MASK(0xFF, I40E_PRTPE_RUPM_PTCTCCNTR47_TC6COUNT_SHIFT)
+#define I40E_PRTPE_RUPM_PTCTCCNTR47_TC7COUNT_SHIFT 24
+#define I40E_PRTPE_RUPM_PTCTCCNTR47_TC7COUNT_MASK I40E_MASK(0xFF, I40E_PRTPE_RUPM_PTCTCCNTR47_TC7COUNT_SHIFT)
+#define I40E_PRTPE_RUPM_PTXTCCNTR03 0x0000DB40 /* Reset: PE_CORER */
+#define I40E_PRTPE_RUPM_PTXTCCNTR03_TC0COUNT_SHIFT 0
+#define I40E_PRTPE_RUPM_PTXTCCNTR03_TC0COUNT_MASK I40E_MASK(0xFF, I40E_PRTPE_RUPM_PTXTCCNTR03_TC0COUNT_SHIFT)
+#define I40E_PRTPE_RUPM_PTXTCCNTR03_TC1COUNT_SHIFT 8
+#define I40E_PRTPE_RUPM_PTXTCCNTR03_TC1COUNT_MASK I40E_MASK(0xFF, I40E_PRTPE_RUPM_PTXTCCNTR03_TC1COUNT_SHIFT)
+#define I40E_PRTPE_RUPM_PTXTCCNTR03_TC2COUNT_SHIFT 16
+#define I40E_PRTPE_RUPM_PTXTCCNTR03_TC2COUNT_MASK I40E_MASK(0xFF, I40E_PRTPE_RUPM_PTXTCCNTR03_TC2COUNT_SHIFT)
+#define I40E_PRTPE_RUPM_PTXTCCNTR03_TC3COUNT_SHIFT 24
+#define I40E_PRTPE_RUPM_PTXTCCNTR03_TC3COUNT_MASK I40E_MASK(0xFF, I40E_PRTPE_RUPM_PTXTCCNTR03_TC3COUNT_SHIFT)
+#define I40E_PRTPE_RUPM_TCCNTR47 0x0000DB00 /* Reset: PE_CORER */
+#define I40E_PRTPE_RUPM_TCCNTR47_TC4COUNT_SHIFT 0
+#define I40E_PRTPE_RUPM_TCCNTR47_TC4COUNT_MASK I40E_MASK(0xFF, I40E_PRTPE_RUPM_TCCNTR47_TC4COUNT_SHIFT)
+#define I40E_PRTPE_RUPM_TCCNTR47_TC5COUNT_SHIFT 8
+#define I40E_PRTPE_RUPM_TCCNTR47_TC5COUNT_MASK I40E_MASK(0xFF, I40E_PRTPE_RUPM_TCCNTR47_TC5COUNT_SHIFT)
+#define I40E_PRTPE_RUPM_TCCNTR47_TC6COUNT_SHIFT 16
+#define I40E_PRTPE_RUPM_TCCNTR47_TC6COUNT_MASK I40E_MASK(0xFF, I40E_PRTPE_RUPM_TCCNTR47_TC6COUNT_SHIFT)
+#define I40E_PRTPE_RUPM_TCCNTR47_TC7COUNT_SHIFT 24
+#define I40E_PRTPE_RUPM_TCCNTR47_TC7COUNT_MASK I40E_MASK(0xFF, I40E_PRTPE_RUPM_TCCNTR47_TC7COUNT_SHIFT)
+#define I40E_PRTPE_RUPM_THRES 0x0000DA20 /* Reset: PE_CORER */
+#define I40E_PRTPE_RUPM_THRES_MINSPADSPERTC_SHIFT 0
+#define I40E_PRTPE_RUPM_THRES_MINSPADSPERTC_MASK I40E_MASK(0xFF, I40E_PRTPE_RUPM_THRES_MINSPADSPERTC_SHIFT)
+#define I40E_PRTPE_RUPM_THRES_MAXSPADS_SHIFT 8
+#define I40E_PRTPE_RUPM_THRES_MAXSPADS_MASK I40E_MASK(0xFF, I40E_PRTPE_RUPM_THRES_MAXSPADS_SHIFT)
+#define I40E_PRTPE_RUPM_THRES_MAXSPADSPERTC_SHIFT 16
+#define I40E_PRTPE_RUPM_THRES_MAXSPADSPERTC_MASK I40E_MASK(0xFF, I40E_PRTPE_RUPM_THRES_MAXSPADSPERTC_SHIFT)
+#define I40E_VFPE_AEQALLOC(_VF) (0x00130C00 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_AEQALLOC_MAX_INDEX 127
+#define I40E_VFPE_AEQALLOC_AECOUNT_SHIFT 0
+#define I40E_VFPE_AEQALLOC_AECOUNT_MASK I40E_MASK(0xFFFFFFFF, I40E_VFPE_AEQALLOC_AECOUNT_SHIFT)
+#define I40E_VFPE_CCQPHIGH(_VF) (0x00001000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CCQPHIGH_MAX_INDEX 127
+#define I40E_VFPE_CCQPHIGH_PECCQPHIGH_SHIFT 0
+#define I40E_VFPE_CCQPHIGH_PECCQPHIGH_MASK I40E_MASK(0xFFFFFFFF, I40E_VFPE_CCQPHIGH_PECCQPHIGH_SHIFT)
+#define I40E_VFPE_CCQPLOW(_VF) (0x00000C00 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CCQPLOW_MAX_INDEX 127
+#define I40E_VFPE_CCQPLOW_PECCQPLOW_SHIFT 0
+#define I40E_VFPE_CCQPLOW_PECCQPLOW_MASK I40E_MASK(0xFFFFFFFF, I40E_VFPE_CCQPLOW_PECCQPLOW_SHIFT)
+#define I40E_VFPE_CCQPSTATUS(_VF) (0x00000800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CCQPSTATUS_MAX_INDEX 127
+#define I40E_VFPE_CCQPSTATUS_CCQP_DONE_SHIFT 0
+#define I40E_VFPE_CCQPSTATUS_CCQP_DONE_MASK I40E_MASK(0x1, I40E_VFPE_CCQPSTATUS_CCQP_DONE_SHIFT)
+#define I40E_VFPE_CCQPSTATUS_HMC_PROFILE_SHIFT 4
+#define I40E_VFPE_CCQPSTATUS_HMC_PROFILE_MASK I40E_MASK(0x7, I40E_VFPE_CCQPSTATUS_HMC_PROFILE_SHIFT)
+#define I40E_VFPE_CCQPSTATUS_RDMA_EN_VFS_SHIFT 16
+#define I40E_VFPE_CCQPSTATUS_RDMA_EN_VFS_MASK I40E_MASK(0x3F, I40E_VFPE_CCQPSTATUS_RDMA_EN_VFS_SHIFT)
+#define I40E_VFPE_CCQPSTATUS_CCQP_ERR_SHIFT 31
+#define I40E_VFPE_CCQPSTATUS_CCQP_ERR_MASK I40E_MASK(0x1, I40E_VFPE_CCQPSTATUS_CCQP_ERR_SHIFT)
+#define I40E_VFPE_CQACK(_VF) (0x00130800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CQACK_MAX_INDEX 127
+#define I40E_VFPE_CQACK_PECQID_SHIFT 0
+#define I40E_VFPE_CQACK_PECQID_MASK I40E_MASK(0x1FFFF, I40E_VFPE_CQACK_PECQID_SHIFT)
+#define I40E_VFPE_CQARM(_VF) (0x00130400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CQARM_MAX_INDEX 127
+#define I40E_VFPE_CQARM_PECQID_SHIFT 0
+#define I40E_VFPE_CQARM_PECQID_MASK I40E_MASK(0x1FFFF, I40E_VFPE_CQARM_PECQID_SHIFT)
+#define I40E_VFPE_CQPDB(_VF) (0x00000000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CQPDB_MAX_INDEX 127
+#define I40E_VFPE_CQPDB_WQHEAD_SHIFT 0
+#define I40E_VFPE_CQPDB_WQHEAD_MASK I40E_MASK(0x7FF, I40E_VFPE_CQPDB_WQHEAD_SHIFT)
+#define I40E_VFPE_CQPERRCODES(_VF) (0x00001800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CQPERRCODES_MAX_INDEX 127
+#define I40E_VFPE_CQPERRCODES_CQP_MINOR_CODE_SHIFT 0
+#define I40E_VFPE_CQPERRCODES_CQP_MINOR_CODE_MASK I40E_MASK(0xFFFF, I40E_VFPE_CQPERRCODES_CQP_MINOR_CODE_SHIFT)
+#define I40E_VFPE_CQPERRCODES_CQP_MAJOR_CODE_SHIFT 16
+#define I40E_VFPE_CQPERRCODES_CQP_MAJOR_CODE_MASK I40E_MASK(0xFFFF, I40E_VFPE_CQPERRCODES_CQP_MAJOR_CODE_SHIFT)
+#define I40E_VFPE_CQPTAIL(_VF) (0x00000400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_CQPTAIL_MAX_INDEX 127
+#define I40E_VFPE_CQPTAIL_WQTAIL_SHIFT 0
+#define I40E_VFPE_CQPTAIL_WQTAIL_MASK I40E_MASK(0x7FF, I40E_VFPE_CQPTAIL_WQTAIL_SHIFT)
+#define I40E_VFPE_CQPTAIL_CQP_OP_ERR_SHIFT 31
+#define I40E_VFPE_CQPTAIL_CQP_OP_ERR_MASK I40E_MASK(0x1, I40E_VFPE_CQPTAIL_CQP_OP_ERR_SHIFT)
+#define I40E_VFPE_IPCONFIG0(_VF) (0x00001400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_IPCONFIG0_MAX_INDEX 127
+#define I40E_VFPE_IPCONFIG0_PEIPID_SHIFT 0
+#define I40E_VFPE_IPCONFIG0_PEIPID_MASK I40E_MASK(0xFFFF, I40E_VFPE_IPCONFIG0_PEIPID_SHIFT)
+#define I40E_VFPE_IPCONFIG0_USEENTIREIDRANGE_SHIFT 16
+#define I40E_VFPE_IPCONFIG0_USEENTIREIDRANGE_MASK I40E_MASK(0x1, I40E_VFPE_IPCONFIG0_USEENTIREIDRANGE_SHIFT)
+#define I40E_VFPE_MRTEIDXMASK(_VF) (0x00003000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_MRTEIDXMASK_MAX_INDEX 127
+#define I40E_VFPE_MRTEIDXMASK_MRTEIDXMASKBITS_SHIFT 0
+#define I40E_VFPE_MRTEIDXMASK_MRTEIDXMASKBITS_MASK I40E_MASK(0x1F, I40E_VFPE_MRTEIDXMASK_MRTEIDXMASKBITS_SHIFT)
+#define I40E_VFPE_RCVUNEXPECTEDERROR(_VF) (0x00003400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_RCVUNEXPECTEDERROR_MAX_INDEX 127
+#define I40E_VFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_SHIFT 0
+#define I40E_VFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_MASK I40E_MASK(0xFFFFFF, I40E_VFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_SHIFT)
+#define I40E_VFPE_TCPNOWTIMER(_VF) (0x00002C00 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_TCPNOWTIMER_MAX_INDEX 127
+#define I40E_VFPE_TCPNOWTIMER_TCP_NOW_SHIFT 0
+#define I40E_VFPE_TCPNOWTIMER_TCP_NOW_MASK I40E_MASK(0xFFFFFFFF, I40E_VFPE_TCPNOWTIMER_TCP_NOW_SHIFT)
+#define I40E_VFPE_WQEALLOC(_VF) (0x00138000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFPE_WQEALLOC_MAX_INDEX 127
+#define I40E_VFPE_WQEALLOC_PEQPID_SHIFT 0
+#define I40E_VFPE_WQEALLOC_PEQPID_MASK I40E_MASK(0x3FFFF, I40E_VFPE_WQEALLOC_PEQPID_SHIFT)
+#define I40E_VFPE_WQEALLOC_WQE_DESC_INDEX_SHIFT 20
+#define I40E_VFPE_WQEALLOC_WQE_DESC_INDEX_MASK I40E_MASK(0xFFF, I40E_VFPE_WQEALLOC_WQE_DESC_INDEX_SHIFT)
+#define I40E_GLPES_PFIP4RXDISCARD(_i) (0x00010600 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXDISCARD_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXDISCARD_IP4RXDISCARD_SHIFT 0
+#define I40E_GLPES_PFIP4RXDISCARD_IP4RXDISCARD_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFIP4RXDISCARD_IP4RXDISCARD_SHIFT)
+#define I40E_GLPES_PFIP4RXFRAGSHI(_i) (0x00010804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXFRAGSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXFRAGSHI_IP4RXFRAGSHI_SHIFT 0
+#define I40E_GLPES_PFIP4RXFRAGSHI_IP4RXFRAGSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFIP4RXFRAGSHI_IP4RXFRAGSHI_SHIFT)
+#define I40E_GLPES_PFIP4RXFRAGSLO(_i) (0x00010800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXFRAGSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXFRAGSLO_IP4RXFRAGSLO_SHIFT 0
+#define I40E_GLPES_PFIP4RXFRAGSLO_IP4RXFRAGSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFIP4RXFRAGSLO_IP4RXFRAGSLO_SHIFT)
+#define I40E_GLPES_PFIP4RXMCOCTSHI(_i) (0x00010A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXMCOCTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXMCOCTSHI_IP4RXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4RXMCOCTSHI_IP4RXMCOCTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFIP4RXMCOCTSHI_IP4RXMCOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP4RXMCOCTSLO(_i) (0x00010A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXMCOCTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXMCOCTSLO_IP4RXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4RXMCOCTSLO_IP4RXMCOCTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFIP4RXMCOCTSLO_IP4RXMCOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP4RXMCPKTSHI(_i) (0x00010C04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXMCPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXMCPKTSHI_IP4RXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4RXMCPKTSHI_IP4RXMCPKTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFIP4RXMCPKTSHI_IP4RXMCPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP4RXMCPKTSLO(_i) (0x00010C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXMCPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXMCPKTSLO_IP4RXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4RXMCPKTSLO_IP4RXMCPKTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFIP4RXMCPKTSLO_IP4RXMCPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP4RXOCTSHI(_i) (0x00010204 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXOCTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXOCTSHI_IP4RXOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4RXOCTSHI_IP4RXOCTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFIP4RXOCTSHI_IP4RXOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP4RXOCTSLO(_i) (0x00010200 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXOCTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXOCTSLO_IP4RXOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4RXOCTSLO_IP4RXOCTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFIP4RXOCTSLO_IP4RXOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP4RXPKTSHI(_i) (0x00010404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXPKTSHI_IP4RXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4RXPKTSHI_IP4RXPKTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFIP4RXPKTSHI_IP4RXPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP4RXPKTSLO(_i) (0x00010400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXPKTSLO_IP4RXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4RXPKTSLO_IP4RXPKTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFIP4RXPKTSLO_IP4RXPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP4RXTRUNC(_i) (0x00010700 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXTRUNC_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXTRUNC_IP4RXTRUNC_SHIFT 0
+#define I40E_GLPES_PFIP4RXTRUNC_IP4RXTRUNC_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFIP4RXTRUNC_IP4RXTRUNC_SHIFT)
+#define I40E_GLPES_PFIP4TXFRAGSHI(_i) (0x00011E04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXFRAGSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXFRAGSHI_IP4TXFRAGSHI_SHIFT 0
+#define I40E_GLPES_PFIP4TXFRAGSHI_IP4TXFRAGSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFIP4TXFRAGSHI_IP4TXFRAGSHI_SHIFT)
+#define I40E_GLPES_PFIP4TXFRAGSLO(_i) (0x00011E00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXFRAGSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXFRAGSLO_IP4TXFRAGSLO_SHIFT 0
+#define I40E_GLPES_PFIP4TXFRAGSLO_IP4TXFRAGSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFIP4TXFRAGSLO_IP4TXFRAGSLO_SHIFT)
+#define I40E_GLPES_PFIP4TXMCOCTSHI(_i) (0x00012004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXMCOCTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXMCOCTSHI_IP4TXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4TXMCOCTSHI_IP4TXMCOCTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFIP4TXMCOCTSHI_IP4TXMCOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP4TXMCOCTSLO(_i) (0x00012000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXMCOCTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXMCOCTSLO_IP4TXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4TXMCOCTSLO_IP4TXMCOCTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFIP4TXMCOCTSLO_IP4TXMCOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP4TXMCPKTSHI(_i) (0x00012204 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXMCPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXMCPKTSHI_IP4TXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4TXMCPKTSHI_IP4TXMCPKTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFIP4TXMCPKTSHI_IP4TXMCPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP4TXMCPKTSLO(_i) (0x00012200 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXMCPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXMCPKTSLO_IP4TXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4TXMCPKTSLO_IP4TXMCPKTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFIP4TXMCPKTSLO_IP4TXMCPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP4TXNOROUTE(_i) (0x00012E00 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXNOROUTE_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXNOROUTE_IP4TXNOROUTE_SHIFT 0
+#define I40E_GLPES_PFIP4TXNOROUTE_IP4TXNOROUTE_MASK I40E_MASK(0xFFFFFF, I40E_GLPES_PFIP4TXNOROUTE_IP4TXNOROUTE_SHIFT)
+#define I40E_GLPES_PFIP4TXOCTSHI(_i) (0x00011A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXOCTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXOCTSHI_IP4TXOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4TXOCTSHI_IP4TXOCTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFIP4TXOCTSHI_IP4TXOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP4TXOCTSLO(_i) (0x00011A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXOCTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXOCTSLO_IP4TXOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4TXOCTSLO_IP4TXOCTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFIP4TXOCTSLO_IP4TXOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP4TXPKTSHI(_i) (0x00011C04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXPKTSHI_IP4TXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4TXPKTSHI_IP4TXPKTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFIP4TXPKTSHI_IP4TXPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP4TXPKTSLO(_i) (0x00011C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXPKTSLO_IP4TXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4TXPKTSLO_IP4TXPKTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFIP4TXPKTSLO_IP4TXPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXDISCARD(_i) (0x00011200 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXDISCARD_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXDISCARD_IP6RXDISCARD_SHIFT 0
+#define I40E_GLPES_PFIP6RXDISCARD_IP6RXDISCARD_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFIP6RXDISCARD_IP6RXDISCARD_SHIFT)
+#define I40E_GLPES_PFIP6RXFRAGSHI(_i) (0x00011404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXFRAGSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXFRAGSHI_IP6RXFRAGSHI_SHIFT 0
+#define I40E_GLPES_PFIP6RXFRAGSHI_IP6RXFRAGSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFIP6RXFRAGSHI_IP6RXFRAGSHI_SHIFT)
+#define I40E_GLPES_PFIP6RXFRAGSLO(_i) (0x00011400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXFRAGSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXFRAGSLO_IP6RXFRAGSLO_SHIFT 0
+#define I40E_GLPES_PFIP6RXFRAGSLO_IP6RXFRAGSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFIP6RXFRAGSLO_IP6RXFRAGSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXMCOCTSHI(_i) (0x00011604 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXMCOCTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXMCOCTSHI_IP6RXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6RXMCOCTSHI_IP6RXMCOCTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFIP6RXMCOCTSHI_IP6RXMCOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP6RXMCOCTSLO(_i) (0x00011600 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXMCOCTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXMCOCTSLO_IP6RXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6RXMCOCTSLO_IP6RXMCOCTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFIP6RXMCOCTSLO_IP6RXMCOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXMCPKTSHI(_i) (0x00011804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXMCPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXMCPKTSHI_IP6RXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6RXMCPKTSHI_IP6RXMCPKTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFIP6RXMCPKTSHI_IP6RXMCPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP6RXMCPKTSLO(_i) (0x00011800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXMCPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXMCPKTSLO_IP6RXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6RXMCPKTSLO_IP6RXMCPKTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFIP6RXMCPKTSLO_IP6RXMCPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXOCTSHI(_i) (0x00010E04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXOCTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXOCTSHI_IP6RXOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6RXOCTSHI_IP6RXOCTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFIP6RXOCTSHI_IP6RXOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP6RXOCTSLO(_i) (0x00010E00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXOCTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXOCTSLO_IP6RXOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6RXOCTSLO_IP6RXOCTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFIP6RXOCTSLO_IP6RXOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXPKTSHI(_i) (0x00011004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXPKTSHI_IP6RXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6RXPKTSHI_IP6RXPKTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFIP6RXPKTSHI_IP6RXPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP6RXPKTSLO(_i) (0x00011000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXPKTSLO_IP6RXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6RXPKTSLO_IP6RXPKTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFIP6RXPKTSLO_IP6RXPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXTRUNC(_i) (0x00011300 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXTRUNC_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXTRUNC_IP6RXTRUNC_SHIFT 0
+#define I40E_GLPES_PFIP6RXTRUNC_IP6RXTRUNC_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFIP6RXTRUNC_IP6RXTRUNC_SHIFT)
+#define I40E_GLPES_PFIP6TXFRAGSHI(_i) (0x00012804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXFRAGSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXFRAGSHI_IP6TXFRAGSHI_SHIFT 0
+#define I40E_GLPES_PFIP6TXFRAGSHI_IP6TXFRAGSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFIP6TXFRAGSHI_IP6TXFRAGSHI_SHIFT)
+#define I40E_GLPES_PFIP6TXFRAGSLO(_i) (0x00012800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXFRAGSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXFRAGSLO_IP6TXFRAGSLO_SHIFT 0
+#define I40E_GLPES_PFIP6TXFRAGSLO_IP6TXFRAGSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFIP6TXFRAGSLO_IP6TXFRAGSLO_SHIFT)
+#define I40E_GLPES_PFIP6TXMCOCTSHI(_i) (0x00012A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXMCOCTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXMCOCTSHI_IP6TXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6TXMCOCTSHI_IP6TXMCOCTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFIP6TXMCOCTSHI_IP6TXMCOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP6TXMCOCTSLO(_i) (0x00012A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXMCOCTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXMCOCTSLO_IP6TXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6TXMCOCTSLO_IP6TXMCOCTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFIP6TXMCOCTSLO_IP6TXMCOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP6TXMCPKTSHI(_i) (0x00012C04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXMCPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXMCPKTSHI_IP6TXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6TXMCPKTSHI_IP6TXMCPKTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFIP6TXMCPKTSHI_IP6TXMCPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP6TXMCPKTSLO(_i) (0x00012C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXMCPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXMCPKTSLO_IP6TXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6TXMCPKTSLO_IP6TXMCPKTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFIP6TXMCPKTSLO_IP6TXMCPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP6TXNOROUTE(_i) (0x00012F00 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXNOROUTE_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXNOROUTE_IP6TXNOROUTE_SHIFT 0
+#define I40E_GLPES_PFIP6TXNOROUTE_IP6TXNOROUTE_MASK I40E_MASK(0xFFFFFF, I40E_GLPES_PFIP6TXNOROUTE_IP6TXNOROUTE_SHIFT)
+#define I40E_GLPES_PFIP6TXOCTSHI(_i) (0x00012404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXOCTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXOCTSHI_IP6TXOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6TXOCTSHI_IP6TXOCTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFIP6TXOCTSHI_IP6TXOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP6TXOCTSLO(_i) (0x00012400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXOCTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXOCTSLO_IP6TXOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6TXOCTSLO_IP6TXOCTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFIP6TXOCTSLO_IP6TXOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP6TXPKTSHI(_i) (0x00012604 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXPKTSHI_IP6TXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6TXPKTSHI_IP6TXPKTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFIP6TXPKTSHI_IP6TXPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP6TXPKTSLO(_i) (0x00012600 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXPKTSLO_IP6TXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6TXPKTSLO_IP6TXPKTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFIP6TXPKTSLO_IP6TXPKTSLO_SHIFT)
+#define I40E_GLPES_PFRDMARXRDSHI(_i) (0x00013E04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXRDSHI_MAX_INDEX 15
+#define I40E_GLPES_PFRDMARXRDSHI_RDMARXRDSHI_SHIFT 0
+#define I40E_GLPES_PFRDMARXRDSHI_RDMARXRDSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFRDMARXRDSHI_RDMARXRDSHI_SHIFT)
+#define I40E_GLPES_PFRDMARXRDSLO(_i) (0x00013E00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXRDSLO_MAX_INDEX 15
+#define I40E_GLPES_PFRDMARXRDSLO_RDMARXRDSLO_SHIFT 0
+#define I40E_GLPES_PFRDMARXRDSLO_RDMARXRDSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFRDMARXRDSLO_RDMARXRDSLO_SHIFT)
+#define I40E_GLPES_PFRDMARXSNDSHI(_i) (0x00014004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXSNDSHI_MAX_INDEX 15
+#define I40E_GLPES_PFRDMARXSNDSHI_RDMARXSNDSHI_SHIFT 0
+#define I40E_GLPES_PFRDMARXSNDSHI_RDMARXSNDSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFRDMARXSNDSHI_RDMARXSNDSHI_SHIFT)
+#define I40E_GLPES_PFRDMARXSNDSLO(_i) (0x00014000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXSNDSLO_MAX_INDEX 15
+#define I40E_GLPES_PFRDMARXSNDSLO_RDMARXSNDSLO_SHIFT 0
+#define I40E_GLPES_PFRDMARXSNDSLO_RDMARXSNDSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFRDMARXSNDSLO_RDMARXSNDSLO_SHIFT)
+#define I40E_GLPES_PFRDMARXWRSHI(_i) (0x00013C04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXWRSHI_MAX_INDEX 15
+#define I40E_GLPES_PFRDMARXWRSHI_RDMARXWRSHI_SHIFT 0
+#define I40E_GLPES_PFRDMARXWRSHI_RDMARXWRSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFRDMARXWRSHI_RDMARXWRSHI_SHIFT)
+#define I40E_GLPES_PFRDMARXWRSLO(_i) (0x00013C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXWRSLO_MAX_INDEX 15
+#define I40E_GLPES_PFRDMARXWRSLO_RDMARXWRSLO_SHIFT 0
+#define I40E_GLPES_PFRDMARXWRSLO_RDMARXWRSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFRDMARXWRSLO_RDMARXWRSLO_SHIFT)
+#define I40E_GLPES_PFRDMATXRDSHI(_i) (0x00014404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXRDSHI_MAX_INDEX 15
+#define I40E_GLPES_PFRDMATXRDSHI_RDMARXRDSHI_SHIFT 0
+#define I40E_GLPES_PFRDMATXRDSHI_RDMARXRDSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFRDMATXRDSHI_RDMARXRDSHI_SHIFT)
+#define I40E_GLPES_PFRDMATXRDSLO(_i) (0x00014400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXRDSLO_MAX_INDEX 15
+#define I40E_GLPES_PFRDMATXRDSLO_RDMARXRDSLO_SHIFT 0
+#define I40E_GLPES_PFRDMATXRDSLO_RDMARXRDSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFRDMATXRDSLO_RDMARXRDSLO_SHIFT)
+#define I40E_GLPES_PFRDMATXSNDSHI(_i) (0x00014604 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXSNDSHI_MAX_INDEX 15
+#define I40E_GLPES_PFRDMATXSNDSHI_RDMARXSNDSHI_SHIFT 0
+#define I40E_GLPES_PFRDMATXSNDSHI_RDMARXSNDSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFRDMATXSNDSHI_RDMARXSNDSHI_SHIFT)
+#define I40E_GLPES_PFRDMATXSNDSLO(_i) (0x00014600 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXSNDSLO_MAX_INDEX 15
+#define I40E_GLPES_PFRDMATXSNDSLO_RDMARXSNDSLO_SHIFT 0
+#define I40E_GLPES_PFRDMATXSNDSLO_RDMARXSNDSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFRDMATXSNDSLO_RDMARXSNDSLO_SHIFT)
+#define I40E_GLPES_PFRDMATXWRSHI(_i) (0x00014204 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXWRSHI_MAX_INDEX 15
+#define I40E_GLPES_PFRDMATXWRSHI_RDMARXWRSHI_SHIFT 0
+#define I40E_GLPES_PFRDMATXWRSHI_RDMARXWRSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFRDMATXWRSHI_RDMARXWRSHI_SHIFT)
+#define I40E_GLPES_PFRDMATXWRSLO(_i) (0x00014200 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXWRSLO_MAX_INDEX 15
+#define I40E_GLPES_PFRDMATXWRSLO_RDMARXWRSLO_SHIFT 0
+#define I40E_GLPES_PFRDMATXWRSLO_RDMARXWRSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFRDMATXWRSLO_RDMARXWRSLO_SHIFT)
+#define I40E_GLPES_PFRDMAVBNDHI(_i) (0x00014804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMAVBNDHI_MAX_INDEX 15
+#define I40E_GLPES_PFRDMAVBNDHI_RDMAVBNDHI_SHIFT 0
+#define I40E_GLPES_PFRDMAVBNDHI_RDMAVBNDHI_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFRDMAVBNDHI_RDMAVBNDHI_SHIFT)
+#define I40E_GLPES_PFRDMAVBNDLO(_i) (0x00014800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMAVBNDLO_MAX_INDEX 15
+#define I40E_GLPES_PFRDMAVBNDLO_RDMAVBNDLO_SHIFT 0
+#define I40E_GLPES_PFRDMAVBNDLO_RDMAVBNDLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFRDMAVBNDLO_RDMAVBNDLO_SHIFT)
+#define I40E_GLPES_PFRDMAVINVHI(_i) (0x00014A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMAVINVHI_MAX_INDEX 15
+#define I40E_GLPES_PFRDMAVINVHI_RDMAVINVHI_SHIFT 0
+#define I40E_GLPES_PFRDMAVINVHI_RDMAVINVHI_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFRDMAVINVHI_RDMAVINVHI_SHIFT)
+#define I40E_GLPES_PFRDMAVINVLO(_i) (0x00014A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMAVINVLO_MAX_INDEX 15
+#define I40E_GLPES_PFRDMAVINVLO_RDMAVINVLO_SHIFT 0
+#define I40E_GLPES_PFRDMAVINVLO_RDMAVINVLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFRDMAVINVLO_RDMAVINVLO_SHIFT)
+#define I40E_GLPES_PFRXVLANERR(_i) (0x00010000 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRXVLANERR_MAX_INDEX 15
+#define I40E_GLPES_PFRXVLANERR_RXVLANERR_SHIFT 0
+#define I40E_GLPES_PFRXVLANERR_RXVLANERR_MASK I40E_MASK(0xFFFFFF, I40E_GLPES_PFRXVLANERR_RXVLANERR_SHIFT)
+#define I40E_GLPES_PFTCPRTXSEG(_i) (0x00013600 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPRTXSEG_MAX_INDEX 15
+#define I40E_GLPES_PFTCPRTXSEG_TCPRTXSEG_SHIFT 0
+#define I40E_GLPES_PFTCPRTXSEG_TCPRTXSEG_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFTCPRTXSEG_TCPRTXSEG_SHIFT)
+#define I40E_GLPES_PFTCPRXOPTERR(_i) (0x00013200 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPRXOPTERR_MAX_INDEX 15
+#define I40E_GLPES_PFTCPRXOPTERR_TCPRXOPTERR_SHIFT 0
+#define I40E_GLPES_PFTCPRXOPTERR_TCPRXOPTERR_MASK I40E_MASK(0xFFFFFF, I40E_GLPES_PFTCPRXOPTERR_TCPRXOPTERR_SHIFT)
+#define I40E_GLPES_PFTCPRXPROTOERR(_i) (0x00013300 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPRXPROTOERR_MAX_INDEX 15
+#define I40E_GLPES_PFTCPRXPROTOERR_TCPRXPROTOERR_SHIFT 0
+#define I40E_GLPES_PFTCPRXPROTOERR_TCPRXPROTOERR_MASK I40E_MASK(0xFFFFFF, I40E_GLPES_PFTCPRXPROTOERR_TCPRXPROTOERR_SHIFT)
+#define I40E_GLPES_PFTCPRXSEGSHI(_i) (0x00013004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPRXSEGSHI_MAX_INDEX 15
+#define I40E_GLPES_PFTCPRXSEGSHI_TCPRXSEGSHI_SHIFT 0
+#define I40E_GLPES_PFTCPRXSEGSHI_TCPRXSEGSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFTCPRXSEGSHI_TCPRXSEGSHI_SHIFT)
+#define I40E_GLPES_PFTCPRXSEGSLO(_i) (0x00013000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPRXSEGSLO_MAX_INDEX 15
+#define I40E_GLPES_PFTCPRXSEGSLO_TCPRXSEGSLO_SHIFT 0
+#define I40E_GLPES_PFTCPRXSEGSLO_TCPRXSEGSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFTCPRXSEGSLO_TCPRXSEGSLO_SHIFT)
+#define I40E_GLPES_PFTCPTXSEGHI(_i) (0x00013404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPTXSEGHI_MAX_INDEX 15
+#define I40E_GLPES_PFTCPTXSEGHI_TCPTXSEGHI_SHIFT 0
+#define I40E_GLPES_PFTCPTXSEGHI_TCPTXSEGHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFTCPTXSEGHI_TCPTXSEGHI_SHIFT)
+#define I40E_GLPES_PFTCPTXSEGLO(_i) (0x00013400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPTXSEGLO_MAX_INDEX 15
+#define I40E_GLPES_PFTCPTXSEGLO_TCPTXSEGLO_SHIFT 0
+#define I40E_GLPES_PFTCPTXSEGLO_TCPTXSEGLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFTCPTXSEGLO_TCPTXSEGLO_SHIFT)
+#define I40E_GLPES_PFUDPRXPKTSHI(_i) (0x00013804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFUDPRXPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFUDPRXPKTSHI_UDPRXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFUDPRXPKTSHI_UDPRXPKTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFUDPRXPKTSHI_UDPRXPKTSHI_SHIFT)
+#define I40E_GLPES_PFUDPRXPKTSLO(_i) (0x00013800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFUDPRXPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFUDPRXPKTSLO_UDPRXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFUDPRXPKTSLO_UDPRXPKTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFUDPRXPKTSLO_UDPRXPKTSLO_SHIFT)
+#define I40E_GLPES_PFUDPTXPKTSHI(_i) (0x00013A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFUDPTXPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFUDPTXPKTSHI_UDPTXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFUDPTXPKTSHI_UDPTXPKTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_PFUDPTXPKTSHI_UDPTXPKTSHI_SHIFT)
+#define I40E_GLPES_PFUDPTXPKTSLO(_i) (0x00013A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFUDPTXPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFUDPTXPKTSLO_UDPTXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFUDPTXPKTSLO_UDPTXPKTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_PFUDPTXPKTSLO_UDPTXPKTSLO_SHIFT)
+#define I40E_GLPES_RDMARXMULTFPDUSHI 0x0001E014 /* Reset: PE_CORER */
+#define I40E_GLPES_RDMARXMULTFPDUSHI_RDMARXMULTFPDUSHI_SHIFT 0
+#define I40E_GLPES_RDMARXMULTFPDUSHI_RDMARXMULTFPDUSHI_MASK I40E_MASK(0xFFFFFF, I40E_GLPES_RDMARXMULTFPDUSHI_RDMARXMULTFPDUSHI_SHIFT)
+#define I40E_GLPES_RDMARXMULTFPDUSLO 0x0001E010 /* Reset: PE_CORER */
+#define I40E_GLPES_RDMARXMULTFPDUSLO_RDMARXMULTFPDUSLO_SHIFT 0
+#define I40E_GLPES_RDMARXMULTFPDUSLO_RDMARXMULTFPDUSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_RDMARXMULTFPDUSLO_RDMARXMULTFPDUSLO_SHIFT)
+#define I40E_GLPES_RDMARXOOODDPHI 0x0001E01C /* Reset: PE_CORER */
+#define I40E_GLPES_RDMARXOOODDPHI_RDMARXOOODDPHI_SHIFT 0
+#define I40E_GLPES_RDMARXOOODDPHI_RDMARXOOODDPHI_MASK I40E_MASK(0xFFFFFF, I40E_GLPES_RDMARXOOODDPHI_RDMARXOOODDPHI_SHIFT)
+#define I40E_GLPES_RDMARXOOODDPLO 0x0001E018 /* Reset: PE_CORER */
+#define I40E_GLPES_RDMARXOOODDPLO_RDMARXOOODDPLO_SHIFT 0
+#define I40E_GLPES_RDMARXOOODDPLO_RDMARXOOODDPLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_RDMARXOOODDPLO_RDMARXOOODDPLO_SHIFT)
+#define I40E_GLPES_RDMARXOOONOMARK 0x0001E004 /* Reset: PE_CORER */
+#define I40E_GLPES_RDMARXOOONOMARK_RDMAOOONOMARK_SHIFT 0
+#define I40E_GLPES_RDMARXOOONOMARK_RDMAOOONOMARK_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_RDMARXOOONOMARK_RDMAOOONOMARK_SHIFT)
+#define I40E_GLPES_RDMARXUNALIGN 0x0001E000 /* Reset: PE_CORER */
+#define I40E_GLPES_RDMARXUNALIGN_RDMRXAUNALIGN_SHIFT 0
+#define I40E_GLPES_RDMARXUNALIGN_RDMRXAUNALIGN_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_RDMARXUNALIGN_RDMRXAUNALIGN_SHIFT)
+#define I40E_GLPES_TCPRXFOURHOLEHI 0x0001E044 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXFOURHOLEHI_TCPRXFOURHOLEHI_SHIFT 0
+#define I40E_GLPES_TCPRXFOURHOLEHI_TCPRXFOURHOLEHI_MASK I40E_MASK(0xFFFFFF, I40E_GLPES_TCPRXFOURHOLEHI_TCPRXFOURHOLEHI_SHIFT)
+#define I40E_GLPES_TCPRXFOURHOLELO 0x0001E040 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXFOURHOLELO_TCPRXFOURHOLELO_SHIFT 0
+#define I40E_GLPES_TCPRXFOURHOLELO_TCPRXFOURHOLELO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_TCPRXFOURHOLELO_TCPRXFOURHOLELO_SHIFT)
+#define I40E_GLPES_TCPRXONEHOLEHI 0x0001E02C /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXONEHOLEHI_TCPRXONEHOLEHI_SHIFT 0
+#define I40E_GLPES_TCPRXONEHOLEHI_TCPRXONEHOLEHI_MASK I40E_MASK(0xFFFFFF, I40E_GLPES_TCPRXONEHOLEHI_TCPRXONEHOLEHI_SHIFT)
+#define I40E_GLPES_TCPRXONEHOLELO 0x0001E028 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXONEHOLELO_TCPRXONEHOLELO_SHIFT 0
+#define I40E_GLPES_TCPRXONEHOLELO_TCPRXONEHOLELO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_TCPRXONEHOLELO_TCPRXONEHOLELO_SHIFT)
+#define I40E_GLPES_TCPRXPUREACKHI 0x0001E024 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXPUREACKHI_TCPRXPUREACKSHI_SHIFT 0
+#define I40E_GLPES_TCPRXPUREACKHI_TCPRXPUREACKSHI_MASK I40E_MASK(0xFFFFFF, I40E_GLPES_TCPRXPUREACKHI_TCPRXPUREACKSHI_SHIFT)
+#define I40E_GLPES_TCPRXPUREACKSLO 0x0001E020 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXPUREACKSLO_TCPRXPUREACKLO_SHIFT 0
+#define I40E_GLPES_TCPRXPUREACKSLO_TCPRXPUREACKLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_TCPRXPUREACKSLO_TCPRXPUREACKLO_SHIFT)
+#define I40E_GLPES_TCPRXTHREEHOLEHI 0x0001E03C /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXTHREEHOLEHI_TCPRXTHREEHOLEHI_SHIFT 0
+#define I40E_GLPES_TCPRXTHREEHOLEHI_TCPRXTHREEHOLEHI_MASK I40E_MASK(0xFFFFFF, I40E_GLPES_TCPRXTHREEHOLEHI_TCPRXTHREEHOLEHI_SHIFT)
+#define I40E_GLPES_TCPRXTHREEHOLELO 0x0001E038 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXTHREEHOLELO_TCPRXTHREEHOLELO_SHIFT 0
+#define I40E_GLPES_TCPRXTHREEHOLELO_TCPRXTHREEHOLELO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_TCPRXTHREEHOLELO_TCPRXTHREEHOLELO_SHIFT)
+#define I40E_GLPES_TCPRXTWOHOLEHI 0x0001E034 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXTWOHOLEHI_TCPRXTWOHOLEHI_SHIFT 0
+#define I40E_GLPES_TCPRXTWOHOLEHI_TCPRXTWOHOLEHI_MASK I40E_MASK(0xFFFFFF, I40E_GLPES_TCPRXTWOHOLEHI_TCPRXTWOHOLEHI_SHIFT)
+#define I40E_GLPES_TCPRXTWOHOLELO 0x0001E030 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPRXTWOHOLELO_TCPRXTWOHOLELO_SHIFT 0
+#define I40E_GLPES_TCPRXTWOHOLELO_TCPRXTWOHOLELO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_TCPRXTWOHOLELO_TCPRXTWOHOLELO_SHIFT)
+#define I40E_GLPES_TCPTXRETRANSFASTHI 0x0001E04C /* Reset: PE_CORER */
+#define I40E_GLPES_TCPTXRETRANSFASTHI_TCPTXRETRANSFASTHI_SHIFT 0
+#define I40E_GLPES_TCPTXRETRANSFASTHI_TCPTXRETRANSFASTHI_MASK I40E_MASK(0xFFFFFF, I40E_GLPES_TCPTXRETRANSFASTHI_TCPTXRETRANSFASTHI_SHIFT)
+#define I40E_GLPES_TCPTXRETRANSFASTLO 0x0001E048 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPTXRETRANSFASTLO_TCPTXRETRANSFASTLO_SHIFT 0
+#define I40E_GLPES_TCPTXRETRANSFASTLO_TCPTXRETRANSFASTLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_TCPTXRETRANSFASTLO_TCPTXRETRANSFASTLO_SHIFT)
+#define I40E_GLPES_TCPTXTOUTSFASTHI 0x0001E054 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPTXTOUTSFASTHI_TCPTXTOUTSFASTHI_SHIFT 0
+#define I40E_GLPES_TCPTXTOUTSFASTHI_TCPTXTOUTSFASTHI_MASK I40E_MASK(0xFFFFFF, I40E_GLPES_TCPTXTOUTSFASTHI_TCPTXTOUTSFASTHI_SHIFT)
+#define I40E_GLPES_TCPTXTOUTSFASTLO 0x0001E050 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPTXTOUTSFASTLO_TCPTXTOUTSFASTLO_SHIFT 0
+#define I40E_GLPES_TCPTXTOUTSFASTLO_TCPTXTOUTSFASTLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_TCPTXTOUTSFASTLO_TCPTXTOUTSFASTLO_SHIFT)
+#define I40E_GLPES_TCPTXTOUTSHI 0x0001E05C /* Reset: PE_CORER */
+#define I40E_GLPES_TCPTXTOUTSHI_TCPTXTOUTSHI_SHIFT 0
+#define I40E_GLPES_TCPTXTOUTSHI_TCPTXTOUTSHI_MASK I40E_MASK(0xFFFFFF, I40E_GLPES_TCPTXTOUTSHI_TCPTXTOUTSHI_SHIFT)
+#define I40E_GLPES_TCPTXTOUTSLO 0x0001E058 /* Reset: PE_CORER */
+#define I40E_GLPES_TCPTXTOUTSLO_TCPTXTOUTSLO_SHIFT 0
+#define I40E_GLPES_TCPTXTOUTSLO_TCPTXTOUTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_TCPTXTOUTSLO_TCPTXTOUTSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXDISCARD(_i) (0x00018600 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXDISCARD_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXDISCARD_IP4RXDISCARD_SHIFT 0
+#define I40E_GLPES_VFIP4RXDISCARD_IP4RXDISCARD_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFIP4RXDISCARD_IP4RXDISCARD_SHIFT)
+#define I40E_GLPES_VFIP4RXFRAGSHI(_i) (0x00018804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXFRAGSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXFRAGSHI_IP4RXFRAGSHI_SHIFT 0
+#define I40E_GLPES_VFIP4RXFRAGSHI_IP4RXFRAGSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFIP4RXFRAGSHI_IP4RXFRAGSHI_SHIFT)
+#define I40E_GLPES_VFIP4RXFRAGSLO(_i) (0x00018800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXFRAGSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXFRAGSLO_IP4RXFRAGSLO_SHIFT 0
+#define I40E_GLPES_VFIP4RXFRAGSLO_IP4RXFRAGSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFIP4RXFRAGSLO_IP4RXFRAGSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXMCOCTSHI(_i) (0x00018A04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXMCOCTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXMCOCTSHI_IP4RXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4RXMCOCTSHI_IP4RXMCOCTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFIP4RXMCOCTSHI_IP4RXMCOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP4RXMCOCTSLO(_i) (0x00018A00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXMCOCTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXMCOCTSLO_IP4RXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4RXMCOCTSLO_IP4RXMCOCTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFIP4RXMCOCTSLO_IP4RXMCOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXMCPKTSHI(_i) (0x00018C04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXMCPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXMCPKTSHI_IP4RXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4RXMCPKTSHI_IP4RXMCPKTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFIP4RXMCPKTSHI_IP4RXMCPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP4RXMCPKTSLO(_i) (0x00018C00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXMCPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXMCPKTSLO_IP4RXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4RXMCPKTSLO_IP4RXMCPKTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFIP4RXMCPKTSLO_IP4RXMCPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXOCTSHI(_i) (0x00018204 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXOCTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXOCTSHI_IP4RXOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4RXOCTSHI_IP4RXOCTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFIP4RXOCTSHI_IP4RXOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP4RXOCTSLO(_i) (0x00018200 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXOCTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXOCTSLO_IP4RXOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4RXOCTSLO_IP4RXOCTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFIP4RXOCTSLO_IP4RXOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXPKTSHI(_i) (0x00018404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXPKTSHI_IP4RXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4RXPKTSHI_IP4RXPKTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFIP4RXPKTSHI_IP4RXPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP4RXPKTSLO(_i) (0x00018400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXPKTSLO_IP4RXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4RXPKTSLO_IP4RXPKTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFIP4RXPKTSLO_IP4RXPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXTRUNC(_i) (0x00018700 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4RXTRUNC_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXTRUNC_IP4RXTRUNC_SHIFT 0
+#define I40E_GLPES_VFIP4RXTRUNC_IP4RXTRUNC_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFIP4RXTRUNC_IP4RXTRUNC_SHIFT)
+#define I40E_GLPES_VFIP4TXFRAGSHI(_i) (0x00019E04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXFRAGSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXFRAGSHI_IP4TXFRAGSHI_SHIFT 0
+#define I40E_GLPES_VFIP4TXFRAGSHI_IP4TXFRAGSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFIP4TXFRAGSHI_IP4TXFRAGSHI_SHIFT)
+#define I40E_GLPES_VFIP4TXFRAGSLO(_i) (0x00019E00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXFRAGSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXFRAGSLO_IP4TXFRAGSLO_SHIFT 0
+#define I40E_GLPES_VFIP4TXFRAGSLO_IP4TXFRAGSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFIP4TXFRAGSLO_IP4TXFRAGSLO_SHIFT)
+#define I40E_GLPES_VFIP4TXMCOCTSHI(_i) (0x0001A004 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXMCOCTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXMCOCTSHI_IP4TXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4TXMCOCTSHI_IP4TXMCOCTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFIP4TXMCOCTSHI_IP4TXMCOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP4TXMCOCTSLO(_i) (0x0001A000 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXMCOCTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXMCOCTSLO_IP4TXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4TXMCOCTSLO_IP4TXMCOCTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFIP4TXMCOCTSLO_IP4TXMCOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP4TXMCPKTSHI(_i) (0x0001A204 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXMCPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXMCPKTSHI_IP4TXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4TXMCPKTSHI_IP4TXMCPKTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFIP4TXMCPKTSHI_IP4TXMCPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP4TXMCPKTSLO(_i) (0x0001A200 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXMCPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXMCPKTSLO_IP4TXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4TXMCPKTSLO_IP4TXMCPKTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFIP4TXMCPKTSLO_IP4TXMCPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP4TXNOROUTE(_i) (0x0001AE00 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXNOROUTE_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXNOROUTE_IP4TXNOROUTE_SHIFT 0
+#define I40E_GLPES_VFIP4TXNOROUTE_IP4TXNOROUTE_MASK I40E_MASK(0xFFFFFF, I40E_GLPES_VFIP4TXNOROUTE_IP4TXNOROUTE_SHIFT)
+#define I40E_GLPES_VFIP4TXOCTSHI(_i) (0x00019A04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXOCTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXOCTSHI_IP4TXOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4TXOCTSHI_IP4TXOCTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFIP4TXOCTSHI_IP4TXOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP4TXOCTSLO(_i) (0x00019A00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXOCTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXOCTSLO_IP4TXOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4TXOCTSLO_IP4TXOCTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFIP4TXOCTSLO_IP4TXOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP4TXPKTSHI(_i) (0x00019C04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXPKTSHI_IP4TXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4TXPKTSHI_IP4TXPKTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFIP4TXPKTSHI_IP4TXPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP4TXPKTSLO(_i) (0x00019C00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP4TXPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXPKTSLO_IP4TXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4TXPKTSLO_IP4TXPKTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFIP4TXPKTSLO_IP4TXPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXDISCARD(_i) (0x00019200 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXDISCARD_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXDISCARD_IP6RXDISCARD_SHIFT 0
+#define I40E_GLPES_VFIP6RXDISCARD_IP6RXDISCARD_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFIP6RXDISCARD_IP6RXDISCARD_SHIFT)
+#define I40E_GLPES_VFIP6RXFRAGSHI(_i) (0x00019404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXFRAGSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXFRAGSHI_IP6RXFRAGSHI_SHIFT 0
+#define I40E_GLPES_VFIP6RXFRAGSHI_IP6RXFRAGSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFIP6RXFRAGSHI_IP6RXFRAGSHI_SHIFT)
+#define I40E_GLPES_VFIP6RXFRAGSLO(_i) (0x00019400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXFRAGSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXFRAGSLO_IP6RXFRAGSLO_SHIFT 0
+#define I40E_GLPES_VFIP6RXFRAGSLO_IP6RXFRAGSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFIP6RXFRAGSLO_IP6RXFRAGSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXMCOCTSHI(_i) (0x00019604 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXMCOCTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXMCOCTSHI_IP6RXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6RXMCOCTSHI_IP6RXMCOCTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFIP6RXMCOCTSHI_IP6RXMCOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP6RXMCOCTSLO(_i) (0x00019600 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXMCOCTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXMCOCTSLO_IP6RXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6RXMCOCTSLO_IP6RXMCOCTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFIP6RXMCOCTSLO_IP6RXMCOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXMCPKTSHI(_i) (0x00019804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXMCPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXMCPKTSHI_IP6RXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6RXMCPKTSHI_IP6RXMCPKTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFIP6RXMCPKTSHI_IP6RXMCPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP6RXMCPKTSLO(_i) (0x00019800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXMCPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXMCPKTSLO_IP6RXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6RXMCPKTSLO_IP6RXMCPKTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFIP6RXMCPKTSLO_IP6RXMCPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXOCTSHI(_i) (0x00018E04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXOCTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXOCTSHI_IP6RXOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6RXOCTSHI_IP6RXOCTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFIP6RXOCTSHI_IP6RXOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP6RXOCTSLO(_i) (0x00018E00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXOCTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXOCTSLO_IP6RXOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6RXOCTSLO_IP6RXOCTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFIP6RXOCTSLO_IP6RXOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXPKTSHI(_i) (0x00019004 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXPKTSHI_IP6RXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6RXPKTSHI_IP6RXPKTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFIP6RXPKTSHI_IP6RXPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP6RXPKTSLO(_i) (0x00019000 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXPKTSLO_IP6RXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6RXPKTSLO_IP6RXPKTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFIP6RXPKTSLO_IP6RXPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXTRUNC(_i) (0x00019300 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6RXTRUNC_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXTRUNC_IP6RXTRUNC_SHIFT 0
+#define I40E_GLPES_VFIP6RXTRUNC_IP6RXTRUNC_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFIP6RXTRUNC_IP6RXTRUNC_SHIFT)
+#define I40E_GLPES_VFIP6TXFRAGSHI(_i) (0x0001A804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXFRAGSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXFRAGSHI_IP6TXFRAGSHI_SHIFT 0
+#define I40E_GLPES_VFIP6TXFRAGSHI_IP6TXFRAGSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFIP6TXFRAGSHI_IP6TXFRAGSHI_SHIFT)
+#define I40E_GLPES_VFIP6TXFRAGSLO(_i) (0x0001A800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXFRAGSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXFRAGSLO_IP6TXFRAGSLO_SHIFT 0
+#define I40E_GLPES_VFIP6TXFRAGSLO_IP6TXFRAGSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFIP6TXFRAGSLO_IP6TXFRAGSLO_SHIFT)
+#define I40E_GLPES_VFIP6TXMCOCTSHI(_i) (0x0001AA04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXMCOCTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXMCOCTSHI_IP6TXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6TXMCOCTSHI_IP6TXMCOCTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFIP6TXMCOCTSHI_IP6TXMCOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP6TXMCOCTSLO(_i) (0x0001AA00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXMCOCTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXMCOCTSLO_IP6TXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6TXMCOCTSLO_IP6TXMCOCTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFIP6TXMCOCTSLO_IP6TXMCOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP6TXMCPKTSHI(_i) (0x0001AC04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXMCPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXMCPKTSHI_IP6TXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6TXMCPKTSHI_IP6TXMCPKTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFIP6TXMCPKTSHI_IP6TXMCPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP6TXMCPKTSLO(_i) (0x0001AC00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXMCPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXMCPKTSLO_IP6TXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6TXMCPKTSLO_IP6TXMCPKTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFIP6TXMCPKTSLO_IP6TXMCPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP6TXNOROUTE(_i) (0x0001AF00 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXNOROUTE_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXNOROUTE_IP6TXNOROUTE_SHIFT 0
+#define I40E_GLPES_VFIP6TXNOROUTE_IP6TXNOROUTE_MASK I40E_MASK(0xFFFFFF, I40E_GLPES_VFIP6TXNOROUTE_IP6TXNOROUTE_SHIFT)
+#define I40E_GLPES_VFIP6TXOCTSHI(_i) (0x0001A404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXOCTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXOCTSHI_IP6TXOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6TXOCTSHI_IP6TXOCTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFIP6TXOCTSHI_IP6TXOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP6TXOCTSLO(_i) (0x0001A400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXOCTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXOCTSLO_IP6TXOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6TXOCTSLO_IP6TXOCTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFIP6TXOCTSLO_IP6TXOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP6TXPKTSHI(_i) (0x0001A604 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXPKTSHI_IP6TXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6TXPKTSHI_IP6TXPKTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFIP6TXPKTSHI_IP6TXPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP6TXPKTSLO(_i) (0x0001A600 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFIP6TXPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXPKTSLO_IP6TXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6TXPKTSLO_IP6TXPKTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFIP6TXPKTSLO_IP6TXPKTSLO_SHIFT)
+#define I40E_GLPES_VFRDMARXRDSHI(_i) (0x0001BE04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMARXRDSHI_MAX_INDEX 31
+#define I40E_GLPES_VFRDMARXRDSHI_RDMARXRDSHI_SHIFT 0
+#define I40E_GLPES_VFRDMARXRDSHI_RDMARXRDSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFRDMARXRDSHI_RDMARXRDSHI_SHIFT)
+#define I40E_GLPES_VFRDMARXRDSLO(_i) (0x0001BE00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMARXRDSLO_MAX_INDEX 31
+#define I40E_GLPES_VFRDMARXRDSLO_RDMARXRDSLO_SHIFT 0
+#define I40E_GLPES_VFRDMARXRDSLO_RDMARXRDSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFRDMARXRDSLO_RDMARXRDSLO_SHIFT)
+#define I40E_GLPES_VFRDMARXSNDSHI(_i) (0x0001C004 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMARXSNDSHI_MAX_INDEX 31
+#define I40E_GLPES_VFRDMARXSNDSHI_RDMARXSNDSHI_SHIFT 0
+#define I40E_GLPES_VFRDMARXSNDSHI_RDMARXSNDSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFRDMARXSNDSHI_RDMARXSNDSHI_SHIFT)
+#define I40E_GLPES_VFRDMARXSNDSLO(_i) (0x0001C000 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMARXSNDSLO_MAX_INDEX 31
+#define I40E_GLPES_VFRDMARXSNDSLO_RDMARXSNDSLO_SHIFT 0
+#define I40E_GLPES_VFRDMARXSNDSLO_RDMARXSNDSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFRDMARXSNDSLO_RDMARXSNDSLO_SHIFT)
+#define I40E_GLPES_VFRDMARXWRSHI(_i) (0x0001BC04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMARXWRSHI_MAX_INDEX 31
+#define I40E_GLPES_VFRDMARXWRSHI_RDMARXWRSHI_SHIFT 0
+#define I40E_GLPES_VFRDMARXWRSHI_RDMARXWRSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFRDMARXWRSHI_RDMARXWRSHI_SHIFT)
+#define I40E_GLPES_VFRDMARXWRSLO(_i) (0x0001BC00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMARXWRSLO_MAX_INDEX 31
+#define I40E_GLPES_VFRDMARXWRSLO_RDMARXWRSLO_SHIFT 0
+#define I40E_GLPES_VFRDMARXWRSLO_RDMARXWRSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFRDMARXWRSLO_RDMARXWRSLO_SHIFT)
+#define I40E_GLPES_VFRDMATXRDSHI(_i) (0x0001C404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMATXRDSHI_MAX_INDEX 31
+#define I40E_GLPES_VFRDMATXRDSHI_RDMARXRDSHI_SHIFT 0
+#define I40E_GLPES_VFRDMATXRDSHI_RDMARXRDSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFRDMATXRDSHI_RDMARXRDSHI_SHIFT)
+#define I40E_GLPES_VFRDMATXRDSLO(_i) (0x0001C400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMATXRDSLO_MAX_INDEX 31
+#define I40E_GLPES_VFRDMATXRDSLO_RDMARXRDSLO_SHIFT 0
+#define I40E_GLPES_VFRDMATXRDSLO_RDMARXRDSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFRDMATXRDSLO_RDMARXRDSLO_SHIFT)
+#define I40E_GLPES_VFRDMATXSNDSHI(_i) (0x0001C604 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMATXSNDSHI_MAX_INDEX 31
+#define I40E_GLPES_VFRDMATXSNDSHI_RDMARXSNDSHI_SHIFT 0
+#define I40E_GLPES_VFRDMATXSNDSHI_RDMARXSNDSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFRDMATXSNDSHI_RDMARXSNDSHI_SHIFT)
+#define I40E_GLPES_VFRDMATXSNDSLO(_i) (0x0001C600 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMATXSNDSLO_MAX_INDEX 31
+#define I40E_GLPES_VFRDMATXSNDSLO_RDMARXSNDSLO_SHIFT 0
+#define I40E_GLPES_VFRDMATXSNDSLO_RDMARXSNDSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFRDMATXSNDSLO_RDMARXSNDSLO_SHIFT)
+#define I40E_GLPES_VFRDMATXWRSHI(_i) (0x0001C204 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMATXWRSHI_MAX_INDEX 31
+#define I40E_GLPES_VFRDMATXWRSHI_RDMARXWRSHI_SHIFT 0
+#define I40E_GLPES_VFRDMATXWRSHI_RDMARXWRSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFRDMATXWRSHI_RDMARXWRSHI_SHIFT)
+#define I40E_GLPES_VFRDMATXWRSLO(_i) (0x0001C200 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMATXWRSLO_MAX_INDEX 31
+#define I40E_GLPES_VFRDMATXWRSLO_RDMARXWRSLO_SHIFT 0
+#define I40E_GLPES_VFRDMATXWRSLO_RDMARXWRSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFRDMATXWRSLO_RDMARXWRSLO_SHIFT)
+#define I40E_GLPES_VFRDMAVBNDHI(_i) (0x0001C804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMAVBNDHI_MAX_INDEX 31
+#define I40E_GLPES_VFRDMAVBNDHI_RDMAVBNDHI_SHIFT 0
+#define I40E_GLPES_VFRDMAVBNDHI_RDMAVBNDHI_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFRDMAVBNDHI_RDMAVBNDHI_SHIFT)
+#define I40E_GLPES_VFRDMAVBNDLO(_i) (0x0001C800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMAVBNDLO_MAX_INDEX 31
+#define I40E_GLPES_VFRDMAVBNDLO_RDMAVBNDLO_SHIFT 0
+#define I40E_GLPES_VFRDMAVBNDLO_RDMAVBNDLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFRDMAVBNDLO_RDMAVBNDLO_SHIFT)
+#define I40E_GLPES_VFRDMAVINVHI(_i) (0x0001CA04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMAVINVHI_MAX_INDEX 31
+#define I40E_GLPES_VFRDMAVINVHI_RDMAVINVHI_SHIFT 0
+#define I40E_GLPES_VFRDMAVINVHI_RDMAVINVHI_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFRDMAVINVHI_RDMAVINVHI_SHIFT)
+#define I40E_GLPES_VFRDMAVINVLO(_i) (0x0001CA00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRDMAVINVLO_MAX_INDEX 31
+#define I40E_GLPES_VFRDMAVINVLO_RDMAVINVLO_SHIFT 0
+#define I40E_GLPES_VFRDMAVINVLO_RDMAVINVLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFRDMAVINVLO_RDMAVINVLO_SHIFT)
+#define I40E_GLPES_VFRXVLANERR(_i) (0x00018000 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFRXVLANERR_MAX_INDEX 31
+#define I40E_GLPES_VFRXVLANERR_RXVLANERR_SHIFT 0
+#define I40E_GLPES_VFRXVLANERR_RXVLANERR_MASK I40E_MASK(0xFFFFFF, I40E_GLPES_VFRXVLANERR_RXVLANERR_SHIFT)
+#define I40E_GLPES_VFTCPRTXSEG(_i) (0x0001B600 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFTCPRTXSEG_MAX_INDEX 31
+#define I40E_GLPES_VFTCPRTXSEG_TCPRTXSEG_SHIFT 0
+#define I40E_GLPES_VFTCPRTXSEG_TCPRTXSEG_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFTCPRTXSEG_TCPRTXSEG_SHIFT)
+#define I40E_GLPES_VFTCPRXOPTERR(_i) (0x0001B200 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFTCPRXOPTERR_MAX_INDEX 31
+#define I40E_GLPES_VFTCPRXOPTERR_TCPRXOPTERR_SHIFT 0
+#define I40E_GLPES_VFTCPRXOPTERR_TCPRXOPTERR_MASK I40E_MASK(0xFFFFFF, I40E_GLPES_VFTCPRXOPTERR_TCPRXOPTERR_SHIFT)
+#define I40E_GLPES_VFTCPRXPROTOERR(_i) (0x0001B300 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFTCPRXPROTOERR_MAX_INDEX 31
+#define I40E_GLPES_VFTCPRXPROTOERR_TCPRXPROTOERR_SHIFT 0
+#define I40E_GLPES_VFTCPRXPROTOERR_TCPRXPROTOERR_MASK I40E_MASK(0xFFFFFF, I40E_GLPES_VFTCPRXPROTOERR_TCPRXPROTOERR_SHIFT)
+#define I40E_GLPES_VFTCPRXSEGSHI(_i) (0x0001B004 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFTCPRXSEGSHI_MAX_INDEX 31
+#define I40E_GLPES_VFTCPRXSEGSHI_TCPRXSEGSHI_SHIFT 0
+#define I40E_GLPES_VFTCPRXSEGSHI_TCPRXSEGSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFTCPRXSEGSHI_TCPRXSEGSHI_SHIFT)
+#define I40E_GLPES_VFTCPRXSEGSLO(_i) (0x0001B000 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFTCPRXSEGSLO_MAX_INDEX 31
+#define I40E_GLPES_VFTCPRXSEGSLO_TCPRXSEGSLO_SHIFT 0
+#define I40E_GLPES_VFTCPRXSEGSLO_TCPRXSEGSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFTCPRXSEGSLO_TCPRXSEGSLO_SHIFT)
+#define I40E_GLPES_VFTCPTXSEGHI(_i) (0x0001B404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFTCPTXSEGHI_MAX_INDEX 31
+#define I40E_GLPES_VFTCPTXSEGHI_TCPTXSEGHI_SHIFT 0
+#define I40E_GLPES_VFTCPTXSEGHI_TCPTXSEGHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFTCPTXSEGHI_TCPTXSEGHI_SHIFT)
+#define I40E_GLPES_VFTCPTXSEGLO(_i) (0x0001B400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFTCPTXSEGLO_MAX_INDEX 31
+#define I40E_GLPES_VFTCPTXSEGLO_TCPTXSEGLO_SHIFT 0
+#define I40E_GLPES_VFTCPTXSEGLO_TCPTXSEGLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFTCPTXSEGLO_TCPTXSEGLO_SHIFT)
+#define I40E_GLPES_VFUDPRXPKTSHI(_i) (0x0001B804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFUDPRXPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFUDPRXPKTSHI_UDPRXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFUDPRXPKTSHI_UDPRXPKTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFUDPRXPKTSHI_UDPRXPKTSHI_SHIFT)
+#define I40E_GLPES_VFUDPRXPKTSLO(_i) (0x0001B800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFUDPRXPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFUDPRXPKTSLO_UDPRXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFUDPRXPKTSLO_UDPRXPKTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFUDPRXPKTSLO_UDPRXPKTSLO_SHIFT)
+#define I40E_GLPES_VFUDPTXPKTSHI(_i) (0x0001BA04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFUDPTXPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFUDPTXPKTSHI_UDPTXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFUDPTXPKTSHI_UDPTXPKTSHI_MASK I40E_MASK(0xFFFF, I40E_GLPES_VFUDPTXPKTSHI_UDPTXPKTSHI_SHIFT)
+#define I40E_GLPES_VFUDPTXPKTSLO(_i) (0x0001BA00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
+#define I40E_GLPES_VFUDPTXPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFUDPTXPKTSLO_UDPTXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFUDPTXPKTSLO_UDPTXPKTSLO_MASK I40E_MASK(0xFFFFFFFF, I40E_GLPES_VFUDPTXPKTSLO_UDPTXPKTSLO_SHIFT)
+#define I40E_GLGEN_PME_TO 0x000B81BC /* Reset: POR */
+#define I40E_GLGEN_PME_TO_PME_TO_FOR_PE_SHIFT 0
+#define I40E_GLGEN_PME_TO_PME_TO_FOR_PE_MASK I40E_MASK(0x1, I40E_GLGEN_PME_TO_PME_TO_FOR_PE_SHIFT)
+#define I40E_GLQF_APBVT(_i) (0x00260000 + ((_i) * 4)) /* _i=0...2047 */ /* Reset: CORER */
+#define I40E_GLQF_APBVT_MAX_INDEX 2047
+#define I40E_GLQF_APBVT_APBVT_SHIFT 0
+#define I40E_GLQF_APBVT_APBVT_MASK I40E_MASK(0xFFFFFFFF, I40E_GLQF_APBVT_APBVT_SHIFT)
+#define I40E_GLQF_FD_PCTYPES(_i) (0x00268000 + ((_i) * 4)) /* _i=0...63 */ /* Reset: POR */
+#define I40E_GLQF_FD_PCTYPES_MAX_INDEX 63
+#define I40E_GLQF_FD_PCTYPES_FD_PCTYPE_SHIFT 0
+#define I40E_GLQF_FD_PCTYPES_FD_PCTYPE_MASK I40E_MASK(0x3F, I40E_GLQF_FD_PCTYPES_FD_PCTYPE_SHIFT)
+#define I40E_GLQF_FDEVICTENA(_i) (0x00270384 + ((_i) * 4)) /* _i=0...1 */ /* Reset: CORER */
+#define I40E_GLQF_FDEVICTENA_MAX_INDEX 1
+#define I40E_GLQF_FDEVICTENA_GLQF_FDEVICTENA_SHIFT 0
+#define I40E_GLQF_FDEVICTENA_GLQF_FDEVICTENA_MASK I40E_MASK(0xFFFFFFFF, I40E_GLQF_FDEVICTENA_GLQF_FDEVICTENA_SHIFT)
+#define I40E_GLQF_FDEVICTFLAG 0x00270280 /* Reset: CORER */
+#define I40E_GLQF_FDEVICTFLAG_TX_FLAGS_SHIFT 0
+#define I40E_GLQF_FDEVICTFLAG_TX_FLAGS_MASK I40E_MASK(0xFF, I40E_GLQF_FDEVICTFLAG_TX_FLAGS_SHIFT)
+#define I40E_GLQF_FDEVICTFLAG_RX_FLAGS_SHIFT 8
+#define I40E_GLQF_FDEVICTFLAG_RX_FLAGS_MASK I40E_MASK(0xFF, I40E_GLQF_FDEVICTFLAG_RX_FLAGS_SHIFT)
+#define I40E_PFQF_CTL_2 0x00270300 /* Reset: CORER */
+#define I40E_PFQF_CTL_2_PEHSIZE_SHIFT 0
+#define I40E_PFQF_CTL_2_PEHSIZE_MASK I40E_MASK(0x1F, I40E_PFQF_CTL_2_PEHSIZE_SHIFT)
+#define I40E_PFQF_CTL_2_PEDSIZE_SHIFT 5
+#define I40E_PFQF_CTL_2_PEDSIZE_MASK I40E_MASK(0x1F, I40E_PFQF_CTL_2_PEDSIZE_SHIFT)
+/* Redefined for X722 family */
+#define I40E_X722_PFQF_HLUT(_i) (0x00240000 + ((_i) * 128)) /* _i=0...127 */ /* Reset: CORER */
+#define I40E_X722_PFQF_HLUT_MAX_INDEX 127
+#define I40E_X722_PFQF_HLUT_LUT0_SHIFT 0
+#define I40E_X722_PFQF_HLUT_LUT0_MASK I40E_MASK(0x7F, I40E_X722_PFQF_HLUT_LUT0_SHIFT)
+#define I40E_X722_PFQF_HLUT_LUT1_SHIFT 8
+#define I40E_X722_PFQF_HLUT_LUT1_MASK I40E_MASK(0x7F, I40E_X722_PFQF_HLUT_LUT1_SHIFT)
+#define I40E_X722_PFQF_HLUT_LUT2_SHIFT 16
+#define I40E_X722_PFQF_HLUT_LUT2_MASK I40E_MASK(0x7F, I40E_X722_PFQF_HLUT_LUT2_SHIFT)
+#define I40E_X722_PFQF_HLUT_LUT3_SHIFT 24
+#define I40E_X722_PFQF_HLUT_LUT3_MASK I40E_MASK(0x7F, I40E_X722_PFQF_HLUT_LUT3_SHIFT)
+#define I40E_PFQF_HREGION(_i) (0x00245400 + ((_i) * 128)) /* _i=0...7 */ /* Reset: CORER */
+#define I40E_PFQF_HREGION_MAX_INDEX 7
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_0_SHIFT 0
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_0_MASK I40E_MASK(0x1, I40E_PFQF_HREGION_OVERRIDE_ENA_0_SHIFT)
+#define I40E_PFQF_HREGION_REGION_0_SHIFT 1
+#define I40E_PFQF_HREGION_REGION_0_MASK I40E_MASK(0x7, I40E_PFQF_HREGION_REGION_0_SHIFT)
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_1_SHIFT 4
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_1_MASK I40E_MASK(0x1, I40E_PFQF_HREGION_OVERRIDE_ENA_1_SHIFT)
+#define I40E_PFQF_HREGION_REGION_1_SHIFT 5
+#define I40E_PFQF_HREGION_REGION_1_MASK I40E_MASK(0x7, I40E_PFQF_HREGION_REGION_1_SHIFT)
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_2_SHIFT 8
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_2_MASK I40E_MASK(0x1, I40E_PFQF_HREGION_OVERRIDE_ENA_2_SHIFT)
+#define I40E_PFQF_HREGION_REGION_2_SHIFT 9
+#define I40E_PFQF_HREGION_REGION_2_MASK I40E_MASK(0x7, I40E_PFQF_HREGION_REGION_2_SHIFT)
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_3_SHIFT 12
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_3_MASK I40E_MASK(0x1, I40E_PFQF_HREGION_OVERRIDE_ENA_3_SHIFT)
+#define I40E_PFQF_HREGION_REGION_3_SHIFT 13
+#define I40E_PFQF_HREGION_REGION_3_MASK I40E_MASK(0x7, I40E_PFQF_HREGION_REGION_3_SHIFT)
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_4_SHIFT 16
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_4_MASK I40E_MASK(0x1, I40E_PFQF_HREGION_OVERRIDE_ENA_4_SHIFT)
+#define I40E_PFQF_HREGION_REGION_4_SHIFT 17
+#define I40E_PFQF_HREGION_REGION_4_MASK I40E_MASK(0x7, I40E_PFQF_HREGION_REGION_4_SHIFT)
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_5_SHIFT 20
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_5_MASK I40E_MASK(0x1, I40E_PFQF_HREGION_OVERRIDE_ENA_5_SHIFT)
+#define I40E_PFQF_HREGION_REGION_5_SHIFT 21
+#define I40E_PFQF_HREGION_REGION_5_MASK I40E_MASK(0x7, I40E_PFQF_HREGION_REGION_5_SHIFT)
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_6_SHIFT 24
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_6_MASK I40E_MASK(0x1, I40E_PFQF_HREGION_OVERRIDE_ENA_6_SHIFT)
+#define I40E_PFQF_HREGION_REGION_6_SHIFT 25
+#define I40E_PFQF_HREGION_REGION_6_MASK I40E_MASK(0x7, I40E_PFQF_HREGION_REGION_6_SHIFT)
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_7_SHIFT 28
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_7_MASK I40E_MASK(0x1, I40E_PFQF_HREGION_OVERRIDE_ENA_7_SHIFT)
+#define I40E_PFQF_HREGION_REGION_7_SHIFT 29
+#define I40E_PFQF_HREGION_REGION_7_MASK I40E_MASK(0x7, I40E_PFQF_HREGION_REGION_7_SHIFT)
+#define I40E_VSIQF_CTL_RSS_LUT_TYPE_SHIFT 8
+#define I40E_VSIQF_CTL_RSS_LUT_TYPE_MASK I40E_MASK(0x1, I40E_VSIQF_CTL_RSS_LUT_TYPE_SHIFT)
+#define I40E_VSIQF_HKEY(_i, _VSI) (0x002A0000 + ((_i) * 2048 + (_VSI) * 4)) /* _i=0...12, _VSI=0...383 */ /* Reset: CORER */
+#define I40E_VSIQF_HKEY_MAX_INDEX 12
+#define I40E_VSIQF_HKEY_KEY_0_SHIFT 0
+#define I40E_VSIQF_HKEY_KEY_0_MASK I40E_MASK(0xFF, I40E_VSIQF_HKEY_KEY_0_SHIFT)
+#define I40E_VSIQF_HKEY_KEY_1_SHIFT 8
+#define I40E_VSIQF_HKEY_KEY_1_MASK I40E_MASK(0xFF, I40E_VSIQF_HKEY_KEY_1_SHIFT)
+#define I40E_VSIQF_HKEY_KEY_2_SHIFT 16
+#define I40E_VSIQF_HKEY_KEY_2_MASK I40E_MASK(0xFF, I40E_VSIQF_HKEY_KEY_2_SHIFT)
+#define I40E_VSIQF_HKEY_KEY_3_SHIFT 24
+#define I40E_VSIQF_HKEY_KEY_3_MASK I40E_MASK(0xFF, I40E_VSIQF_HKEY_KEY_3_SHIFT)
+#define I40E_VSIQF_HLUT(_i, _VSI) (0x00220000 + ((_i) * 2048 + (_VSI) * 4)) /* _i=0...15, _VSI=0...383 */ /* Reset: CORER */
+#define I40E_VSIQF_HLUT_MAX_INDEX 15
+#define I40E_VSIQF_HLUT_LUT0_SHIFT 0
+#define I40E_VSIQF_HLUT_LUT0_MASK I40E_MASK(0xF, I40E_VSIQF_HLUT_LUT0_SHIFT)
+#define I40E_VSIQF_HLUT_LUT1_SHIFT 8
+#define I40E_VSIQF_HLUT_LUT1_MASK I40E_MASK(0xF, I40E_VSIQF_HLUT_LUT1_SHIFT)
+#define I40E_VSIQF_HLUT_LUT2_SHIFT 16
+#define I40E_VSIQF_HLUT_LUT2_MASK I40E_MASK(0xF, I40E_VSIQF_HLUT_LUT2_SHIFT)
+#define I40E_VSIQF_HLUT_LUT3_SHIFT 24
+#define I40E_VSIQF_HLUT_LUT3_MASK I40E_MASK(0xF, I40E_VSIQF_HLUT_LUT3_SHIFT)
+#define I40E_GLGEN_STAT_CLEAR 0x00390004 /* Reset: CORER */
+#define I40E_GLGEN_STAT_CLEAR_GLGEN_STAT_CLEAR_SHIFT 0
+#define I40E_GLGEN_STAT_CLEAR_GLGEN_STAT_CLEAR_MASK I40E_MASK(0x1, I40E_GLGEN_STAT_CLEAR_GLGEN_STAT_CLEAR_SHIFT)
+#define I40E_GLGEN_STAT_HALT 0x00390000 /* Reset: CORER */
+#define I40E_GLGEN_STAT_HALT_HALT_CELLS_SHIFT 0
+#define I40E_GLGEN_STAT_HALT_HALT_CELLS_MASK I40E_MASK(0x3FFFFFFF, I40E_GLGEN_STAT_HALT_HALT_CELLS_SHIFT)
+#define I40E_VFINT_DYN_CTL01_WB_ON_ITR_SHIFT 30
+#define I40E_VFINT_DYN_CTL01_WB_ON_ITR_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTL01_WB_ON_ITR_SHIFT)
+#define I40E_VFINT_DYN_CTLN1_WB_ON_ITR_SHIFT 30
+#define I40E_VFINT_DYN_CTLN1_WB_ON_ITR_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTLN1_WB_ON_ITR_SHIFT)
+#define I40E_VFPE_AEQALLOC1 0x0000A400 /* Reset: VFR */
+#define I40E_VFPE_AEQALLOC1_AECOUNT_SHIFT 0
+#define I40E_VFPE_AEQALLOC1_AECOUNT_MASK I40E_MASK(0xFFFFFFFF, I40E_VFPE_AEQALLOC1_AECOUNT_SHIFT)
+#define I40E_VFPE_CCQPHIGH1 0x00009800 /* Reset: VFR */
+#define I40E_VFPE_CCQPHIGH1_PECCQPHIGH_SHIFT 0
+#define I40E_VFPE_CCQPHIGH1_PECCQPHIGH_MASK I40E_MASK(0xFFFFFFFF, I40E_VFPE_CCQPHIGH1_PECCQPHIGH_SHIFT)
+#define I40E_VFPE_CCQPLOW1 0x0000AC00 /* Reset: VFR */
+#define I40E_VFPE_CCQPLOW1_PECCQPLOW_SHIFT 0
+#define I40E_VFPE_CCQPLOW1_PECCQPLOW_MASK I40E_MASK(0xFFFFFFFF, I40E_VFPE_CCQPLOW1_PECCQPLOW_SHIFT)
+#define I40E_VFPE_CCQPSTATUS1 0x0000B800 /* Reset: VFR */
+#define I40E_VFPE_CCQPSTATUS1_CCQP_DONE_SHIFT 0
+#define I40E_VFPE_CCQPSTATUS1_CCQP_DONE_MASK I40E_MASK(0x1, I40E_VFPE_CCQPSTATUS1_CCQP_DONE_SHIFT)
+#define I40E_VFPE_CCQPSTATUS1_HMC_PROFILE_SHIFT 4
+#define I40E_VFPE_CCQPSTATUS1_HMC_PROFILE_MASK I40E_MASK(0x7, I40E_VFPE_CCQPSTATUS1_HMC_PROFILE_SHIFT)
+#define I40E_VFPE_CCQPSTATUS1_RDMA_EN_VFS_SHIFT 16
+#define I40E_VFPE_CCQPSTATUS1_RDMA_EN_VFS_MASK I40E_MASK(0x3F, I40E_VFPE_CCQPSTATUS1_RDMA_EN_VFS_SHIFT)
+#define I40E_VFPE_CCQPSTATUS1_CCQP_ERR_SHIFT 31
+#define I40E_VFPE_CCQPSTATUS1_CCQP_ERR_MASK I40E_MASK(0x1, I40E_VFPE_CCQPSTATUS1_CCQP_ERR_SHIFT)
+#define I40E_VFPE_CQACK1 0x0000B000 /* Reset: VFR */
+#define I40E_VFPE_CQACK1_PECQID_SHIFT 0
+#define I40E_VFPE_CQACK1_PECQID_MASK I40E_MASK(0x1FFFF, I40E_VFPE_CQACK1_PECQID_SHIFT)
+#define I40E_VFPE_CQARM1 0x0000B400 /* Reset: VFR */
+#define I40E_VFPE_CQARM1_PECQID_SHIFT 0
+#define I40E_VFPE_CQARM1_PECQID_MASK I40E_MASK(0x1FFFF, I40E_VFPE_CQARM1_PECQID_SHIFT)
+#define I40E_VFPE_CQPDB1 0x0000BC00 /* Reset: VFR */
+#define I40E_VFPE_CQPDB1_WQHEAD_SHIFT 0
+#define I40E_VFPE_CQPDB1_WQHEAD_MASK I40E_MASK(0x7FF, I40E_VFPE_CQPDB1_WQHEAD_SHIFT)
+#define I40E_VFPE_CQPERRCODES1 0x00009C00 /* Reset: VFR */
+#define I40E_VFPE_CQPERRCODES1_CQP_MINOR_CODE_SHIFT 0
+#define I40E_VFPE_CQPERRCODES1_CQP_MINOR_CODE_MASK I40E_MASK(0xFFFF, I40E_VFPE_CQPERRCODES1_CQP_MINOR_CODE_SHIFT)
+#define I40E_VFPE_CQPERRCODES1_CQP_MAJOR_CODE_SHIFT 16
+#define I40E_VFPE_CQPERRCODES1_CQP_MAJOR_CODE_MASK I40E_MASK(0xFFFF, I40E_VFPE_CQPERRCODES1_CQP_MAJOR_CODE_SHIFT)
+#define I40E_VFPE_CQPTAIL1 0x0000A000 /* Reset: VFR */
+#define I40E_VFPE_CQPTAIL1_WQTAIL_SHIFT 0
+#define I40E_VFPE_CQPTAIL1_WQTAIL_MASK I40E_MASK(0x7FF, I40E_VFPE_CQPTAIL1_WQTAIL_SHIFT)
+#define I40E_VFPE_CQPTAIL1_CQP_OP_ERR_SHIFT 31
+#define I40E_VFPE_CQPTAIL1_CQP_OP_ERR_MASK I40E_MASK(0x1, I40E_VFPE_CQPTAIL1_CQP_OP_ERR_SHIFT)
+#define I40E_VFPE_IPCONFIG01 0x00008C00 /* Reset: VFR */
+#define I40E_VFPE_IPCONFIG01_PEIPID_SHIFT 0
+#define I40E_VFPE_IPCONFIG01_PEIPID_MASK I40E_MASK(0xFFFF, I40E_VFPE_IPCONFIG01_PEIPID_SHIFT)
+#define I40E_VFPE_IPCONFIG01_USEENTIREIDRANGE_SHIFT 16
+#define I40E_VFPE_IPCONFIG01_USEENTIREIDRANGE_MASK I40E_MASK(0x1, I40E_VFPE_IPCONFIG01_USEENTIREIDRANGE_SHIFT)
+#define I40E_VFPE_MRTEIDXMASK1 0x00009000 /* Reset: VFR */
+#define I40E_VFPE_MRTEIDXMASK1_MRTEIDXMASKBITS_SHIFT 0
+#define I40E_VFPE_MRTEIDXMASK1_MRTEIDXMASKBITS_MASK I40E_MASK(0x1F, I40E_VFPE_MRTEIDXMASK1_MRTEIDXMASKBITS_SHIFT)
+#define I40E_VFPE_RCVUNEXPECTEDERROR1 0x00009400 /* Reset: VFR */
+#define I40E_VFPE_RCVUNEXPECTEDERROR1_TCP_RX_UNEXP_ERR_SHIFT 0
+#define I40E_VFPE_RCVUNEXPECTEDERROR1_TCP_RX_UNEXP_ERR_MASK I40E_MASK(0xFFFFFF, I40E_VFPE_RCVUNEXPECTEDERROR1_TCP_RX_UNEXP_ERR_SHIFT)
+#define I40E_VFPE_TCPNOWTIMER1 0x0000A800 /* Reset: VFR */
+#define I40E_VFPE_TCPNOWTIMER1_TCP_NOW_SHIFT 0
+#define I40E_VFPE_TCPNOWTIMER1_TCP_NOW_MASK I40E_MASK(0xFFFFFFFF, I40E_VFPE_TCPNOWTIMER1_TCP_NOW_SHIFT)
+#define I40E_VFPE_WQEALLOC1 0x0000C000 /* Reset: VFR */
+#define I40E_VFPE_WQEALLOC1_PEQPID_SHIFT 0
+#define I40E_VFPE_WQEALLOC1_PEQPID_MASK I40E_MASK(0x3FFFF, I40E_VFPE_WQEALLOC1_PEQPID_SHIFT)
+#define I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_SHIFT 20
+#define I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_MASK I40E_MASK(0xFFF, I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_SHIFT)
+
+#endif /* X722_SUPPORT */
+#endif /* _I40E_REGISTER_H_ */
diff --git a/usr/src/uts/common/io/i40e/core/i40e_status.h b/usr/src/uts/common/io/i40e/core/i40e_status.h
new file mode 100644
index 0000000000..1f27507970
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/core/i40e_status.h
@@ -0,0 +1,108 @@
+/******************************************************************************
+
+ Copyright (c) 2013-2014, Intel Corporation
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+
+******************************************************************************/
+/*$FreeBSD: head/sys/dev/ixl/i40e_status.h 283119 2015-05-19 18:35:18Z jhb $*/
+
+#ifndef _I40E_STATUS_H_
+#define _I40E_STATUS_H_
+
+/* Error Codes */
+enum i40e_status_code {
+ I40E_SUCCESS = 0,
+ I40E_ERR_NVM = -1,
+ I40E_ERR_NVM_CHECKSUM = -2,
+ I40E_ERR_PHY = -3,
+ I40E_ERR_CONFIG = -4,
+ I40E_ERR_PARAM = -5,
+ I40E_ERR_MAC_TYPE = -6,
+ I40E_ERR_UNKNOWN_PHY = -7,
+ I40E_ERR_LINK_SETUP = -8,
+ I40E_ERR_ADAPTER_STOPPED = -9,
+ I40E_ERR_INVALID_MAC_ADDR = -10,
+ I40E_ERR_DEVICE_NOT_SUPPORTED = -11,
+ I40E_ERR_MASTER_REQUESTS_PENDING = -12,
+ I40E_ERR_INVALID_LINK_SETTINGS = -13,
+ I40E_ERR_AUTONEG_NOT_COMPLETE = -14,
+ I40E_ERR_RESET_FAILED = -15,
+ I40E_ERR_SWFW_SYNC = -16,
+ I40E_ERR_NO_AVAILABLE_VSI = -17,
+ I40E_ERR_NO_MEMORY = -18,
+ I40E_ERR_BAD_PTR = -19,
+ I40E_ERR_RING_FULL = -20,
+ I40E_ERR_INVALID_PD_ID = -21,
+ I40E_ERR_INVALID_QP_ID = -22,
+ I40E_ERR_INVALID_CQ_ID = -23,
+ I40E_ERR_INVALID_CEQ_ID = -24,
+ I40E_ERR_INVALID_AEQ_ID = -25,
+ I40E_ERR_INVALID_SIZE = -26,
+ I40E_ERR_INVALID_ARP_INDEX = -27,
+ I40E_ERR_INVALID_FPM_FUNC_ID = -28,
+ I40E_ERR_QP_INVALID_MSG_SIZE = -29,
+ I40E_ERR_QP_TOOMANY_WRS_POSTED = -30,
+ I40E_ERR_INVALID_FRAG_COUNT = -31,
+ I40E_ERR_QUEUE_EMPTY = -32,
+ I40E_ERR_INVALID_ALIGNMENT = -33,
+ I40E_ERR_FLUSHED_QUEUE = -34,
+ I40E_ERR_INVALID_PUSH_PAGE_INDEX = -35,
+ I40E_ERR_INVALID_IMM_DATA_SIZE = -36,
+ I40E_ERR_TIMEOUT = -37,
+ I40E_ERR_OPCODE_MISMATCH = -38,
+ I40E_ERR_CQP_COMPL_ERROR = -39,
+ I40E_ERR_INVALID_VF_ID = -40,
+ I40E_ERR_INVALID_HMCFN_ID = -41,
+ I40E_ERR_BACKING_PAGE_ERROR = -42,
+ I40E_ERR_NO_PBLCHUNKS_AVAILABLE = -43,
+ I40E_ERR_INVALID_PBLE_INDEX = -44,
+ I40E_ERR_INVALID_SD_INDEX = -45,
+ I40E_ERR_INVALID_PAGE_DESC_INDEX = -46,
+ I40E_ERR_INVALID_SD_TYPE = -47,
+ I40E_ERR_MEMCPY_FAILED = -48,
+ I40E_ERR_INVALID_HMC_OBJ_INDEX = -49,
+ I40E_ERR_INVALID_HMC_OBJ_COUNT = -50,
+ I40E_ERR_INVALID_SRQ_ARM_LIMIT = -51,
+ I40E_ERR_SRQ_ENABLED = -52,
+ I40E_ERR_ADMIN_QUEUE_ERROR = -53,
+ I40E_ERR_ADMIN_QUEUE_TIMEOUT = -54,
+ I40E_ERR_BUF_TOO_SHORT = -55,
+ I40E_ERR_ADMIN_QUEUE_FULL = -56,
+ I40E_ERR_ADMIN_QUEUE_NO_WORK = -57,
+ I40E_ERR_BAD_IWARP_CQE = -58,
+ I40E_ERR_NVM_BLANK_MODE = -59,
+ I40E_ERR_NOT_IMPLEMENTED = -60,
+ I40E_ERR_PE_DOORBELL_NOT_ENABLED = -61,
+ I40E_ERR_DIAG_TEST_FAILED = -62,
+ I40E_ERR_NOT_READY = -63,
+ I40E_NOT_SUPPORTED = -64,
+ I40E_ERR_FIRMWARE_API_VERSION = -65,
+};
+
+#endif /* _I40E_STATUS_H_ */
diff --git a/usr/src/uts/common/io/i40e/core/i40e_type.h b/usr/src/uts/common/io/i40e/core/i40e_type.h
new file mode 100644
index 0000000000..b4a84993e9
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/core/i40e_type.h
@@ -0,0 +1,1581 @@
+/******************************************************************************
+
+ Copyright (c) 2013-2015, Intel Corporation
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+
+******************************************************************************/
+/*$FreeBSD: head/sys/dev/ixl/i40e_type.h 284049 2015-06-05 22:52:42Z jfv $*/
+
+#ifndef _I40E_TYPE_H_
+#define _I40E_TYPE_H_
+
+#include "i40e_status.h"
+#include "i40e_osdep.h"
+#include "i40e_register.h"
+#include "i40e_adminq.h"
+#include "i40e_hmc.h"
+#include "i40e_lan_hmc.h"
+#include "i40e_devids.h"
+
+#define UNREFERENCED_XPARAMETER
+
+#define BIT(a) (1UL << (a))
+#define BIT_ULL(a) (1ULL << (a))
+
+#ifndef I40E_MASK
+/* I40E_MASK is a macro used on 32 bit registers */
+#define I40E_MASK(mask, shift) (((uint32_t)(mask)) << ((uint32_t)(shift)))
+#endif
+
+#define I40E_MAX_PF 16
+#define I40E_MAX_PF_VSI 64
+#define I40E_MAX_PF_QP 128
+#define I40E_MAX_VSI_QP 16
+#define I40E_MAX_VF_VSI 3
+#define I40E_MAX_CHAINED_RX_BUFFERS 5
+#define I40E_MAX_PF_UDP_OFFLOAD_PORTS 16
+
+/* something less than 1 minute */
+#define I40E_HEARTBEAT_TIMEOUT (HZ * 50)
+
+/* Max default timeout in ms, */
+#define I40E_MAX_NVM_TIMEOUT 18000
+
+/* Check whether address is multicast. */
+#define I40E_IS_MULTICAST(address) (bool)(((u8 *)(address))[0] & ((u8)0x01))
+
+/* Check whether an address is broadcast. */
+#define I40E_IS_BROADCAST(address) \
+ ((((u8 *)(address))[0] == ((u8)0xff)) && \
+ (((u8 *)(address))[1] == ((u8)0xff)))
+
+/* Switch from ms to the 1usec global time (this is the GTIME resolution) */
+#define I40E_MS_TO_GTIME(time) ((time) * 1000)
+
+/* forward declaration */
+struct i40e_hw;
+typedef void (*I40E_ADMINQ_CALLBACK)(struct i40e_hw *, struct i40e_aq_desc *);
+
+#define I40E_ETH_LENGTH_OF_ADDRESS 6
+/* Data type manipulation macros. */
+#define I40E_HI_DWORD(x) ((u32)((((x) >> 16) >> 16) & 0xFFFFFFFF))
+#define I40E_LO_DWORD(x) ((u32)((x) & 0xFFFFFFFF))
+
+#define I40E_HI_WORD(x) ((u16)(((x) >> 16) & 0xFFFF))
+#define I40E_LO_WORD(x) ((u16)((x) & 0xFFFF))
+
+#define I40E_HI_BYTE(x) ((u8)(((x) >> 8) & 0xFF))
+#define I40E_LO_BYTE(x) ((u8)((x) & 0xFF))
+
+/* Number of Transmit Descriptors must be a multiple of 8. */
+#define I40E_REQ_TX_DESCRIPTOR_MULTIPLE 8
+/* Number of Receive Descriptors must be a multiple of 32 if
+ * the number of descriptors is greater than 32.
+ */
+#define I40E_REQ_RX_DESCRIPTOR_MULTIPLE 32
+
+#define I40E_DESC_UNUSED(R) \
+ ((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
+ (R)->next_to_clean - (R)->next_to_use - 1)
+
+/* bitfields for Tx queue mapping in QTX_CTL */
+#define I40E_QTX_CTL_VF_QUEUE 0x0
+#define I40E_QTX_CTL_VM_QUEUE 0x1
+#define I40E_QTX_CTL_PF_QUEUE 0x2
+
+/* debug masks - set these bits in hw->debug_mask to control output */
+enum i40e_debug_mask {
+ I40E_DEBUG_INIT = 0x00000001,
+ I40E_DEBUG_RELEASE = 0x00000002,
+
+ I40E_DEBUG_LINK = 0x00000010,
+ I40E_DEBUG_PHY = 0x00000020,
+ I40E_DEBUG_HMC = 0x00000040,
+ I40E_DEBUG_NVM = 0x00000080,
+ I40E_DEBUG_LAN = 0x00000100,
+ I40E_DEBUG_FLOW = 0x00000200,
+ I40E_DEBUG_DCB = 0x00000400,
+ I40E_DEBUG_DIAG = 0x00000800,
+ I40E_DEBUG_FD = 0x00001000,
+
+ I40E_DEBUG_AQ_MESSAGE = 0x01000000,
+ I40E_DEBUG_AQ_DESCRIPTOR = 0x02000000,
+ I40E_DEBUG_AQ_DESC_BUFFER = 0x04000000,
+ I40E_DEBUG_AQ_COMMAND = 0x06000000,
+ I40E_DEBUG_AQ = 0x0F000000,
+
+ /*
+ * Ugggh, have to cast these because of enums being "int" and these
+ * overflow int.
+ */
+ I40E_DEBUG_USER = (int)0xF0000000,
+
+ I40E_DEBUG_ALL = (int)0xFFFFFFFF
+};
+
+/* PCI Bus Info */
+#define I40E_PCI_LINK_STATUS 0xB2
+#define I40E_PCI_LINK_WIDTH 0x3F0
+#define I40E_PCI_LINK_WIDTH_1 0x10
+#define I40E_PCI_LINK_WIDTH_2 0x20
+#define I40E_PCI_LINK_WIDTH_4 0x40
+#define I40E_PCI_LINK_WIDTH_8 0x80
+#define I40E_PCI_LINK_SPEED 0xF
+#define I40E_PCI_LINK_SPEED_2500 0x1
+#define I40E_PCI_LINK_SPEED_5000 0x2
+#define I40E_PCI_LINK_SPEED_8000 0x3
+
+/* Memory types */
+enum i40e_memset_type {
+ I40E_NONDMA_MEM = 0,
+ I40E_DMA_MEM
+};
+
+/* Memcpy types */
+enum i40e_memcpy_type {
+ I40E_NONDMA_TO_NONDMA = 0,
+ I40E_NONDMA_TO_DMA,
+ I40E_DMA_TO_DMA,
+ I40E_DMA_TO_NONDMA
+};
+
+/* These are structs for managing the hardware information and the operations.
+ * The structures of function pointers are filled out at init time when we
+ * know for sure exactly which hardware we're working with. This gives us the
+ * flexibility of using the same main driver code but adapting to slightly
+ * different hardware needs as new parts are developed. For this architecture,
+ * the Firmware and AdminQ are intended to insulate the driver from most of the
+ * future changes, but these structures will also do part of the job.
+ */
+enum i40e_mac_type {
+ I40E_MAC_UNKNOWN = 0,
+ I40E_MAC_X710,
+ I40E_MAC_XL710,
+ I40E_MAC_VF,
+#ifdef X722_SUPPORT
+ I40E_MAC_X722,
+ I40E_MAC_X722_VF,
+#endif
+ I40E_MAC_GENERIC,
+};
+
+enum i40e_media_type {
+ I40E_MEDIA_TYPE_UNKNOWN = 0,
+ I40E_MEDIA_TYPE_FIBER,
+ I40E_MEDIA_TYPE_BASET,
+ I40E_MEDIA_TYPE_BACKPLANE,
+ I40E_MEDIA_TYPE_CX4,
+ I40E_MEDIA_TYPE_DA,
+ I40E_MEDIA_TYPE_VIRTUAL
+};
+
+enum i40e_fc_mode {
+ I40E_FC_NONE = 0,
+ I40E_FC_RX_PAUSE,
+ I40E_FC_TX_PAUSE,
+ I40E_FC_FULL,
+ I40E_FC_PFC,
+ I40E_FC_DEFAULT
+};
+
+enum i40e_set_fc_aq_failures {
+ I40E_SET_FC_AQ_FAIL_NONE = 0,
+ I40E_SET_FC_AQ_FAIL_GET = 1,
+ I40E_SET_FC_AQ_FAIL_SET = 2,
+ I40E_SET_FC_AQ_FAIL_UPDATE = 4,
+ I40E_SET_FC_AQ_FAIL_SET_UPDATE = 6
+};
+
+enum i40e_vsi_type {
+ I40E_VSI_MAIN = 0,
+ I40E_VSI_VMDQ1 = 1,
+ I40E_VSI_VMDQ2 = 2,
+ I40E_VSI_CTRL = 3,
+ I40E_VSI_FCOE = 4,
+ I40E_VSI_MIRROR = 5,
+ I40E_VSI_SRIOV = 6,
+ I40E_VSI_FDIR = 7,
+ I40E_VSI_TYPE_UNKNOWN
+};
+
+enum i40e_queue_type {
+ I40E_QUEUE_TYPE_RX = 0,
+ I40E_QUEUE_TYPE_TX,
+ I40E_QUEUE_TYPE_PE_CEQ,
+ I40E_QUEUE_TYPE_UNKNOWN
+};
+
+struct i40e_link_status {
+ enum i40e_aq_phy_type phy_type;
+ enum i40e_aq_link_speed link_speed;
+ u8 link_info;
+ u8 an_info;
+ u8 ext_info;
+ u8 loopback;
+ /* is Link Status Event notification to SW enabled */
+ bool lse_enable;
+ u16 max_frame_size;
+ bool crc_enable;
+ u8 pacing;
+ u8 requested_speeds;
+ u8 module_type[3];
+ /* 1st byte: module identifier */
+#define I40E_MODULE_TYPE_SFP 0x03
+#define I40E_MODULE_TYPE_QSFP 0x0D
+ /* 2nd byte: ethernet compliance codes for 10/40G */
+#define I40E_MODULE_TYPE_40G_ACTIVE 0x01
+#define I40E_MODULE_TYPE_40G_LR4 0x02
+#define I40E_MODULE_TYPE_40G_SR4 0x04
+#define I40E_MODULE_TYPE_40G_CR4 0x08
+#define I40E_MODULE_TYPE_10G_BASE_SR 0x10
+#define I40E_MODULE_TYPE_10G_BASE_LR 0x20
+#define I40E_MODULE_TYPE_10G_BASE_LRM 0x40
+#define I40E_MODULE_TYPE_10G_BASE_ER 0x80
+ /* 3rd byte: ethernet compliance codes for 1G */
+#define I40E_MODULE_TYPE_1000BASE_SX 0x01
+#define I40E_MODULE_TYPE_1000BASE_LX 0x02
+#define I40E_MODULE_TYPE_1000BASE_CX 0x04
+#define I40E_MODULE_TYPE_1000BASE_T 0x08
+};
+
+enum i40e_aq_capabilities_phy_type {
+ I40E_CAP_PHY_TYPE_SGMII = BIT(I40E_PHY_TYPE_SGMII),
+ I40E_CAP_PHY_TYPE_1000BASE_KX = BIT(I40E_PHY_TYPE_1000BASE_KX),
+ I40E_CAP_PHY_TYPE_10GBASE_KX4 = BIT(I40E_PHY_TYPE_10GBASE_KX4),
+ I40E_CAP_PHY_TYPE_10GBASE_KR = BIT(I40E_PHY_TYPE_10GBASE_KR),
+ I40E_CAP_PHY_TYPE_40GBASE_KR4 = BIT(I40E_PHY_TYPE_40GBASE_KR4),
+ I40E_CAP_PHY_TYPE_XAUI = BIT(I40E_PHY_TYPE_XAUI),
+ I40E_CAP_PHY_TYPE_XFI = BIT(I40E_PHY_TYPE_XFI),
+ I40E_CAP_PHY_TYPE_SFI = BIT(I40E_PHY_TYPE_SFI),
+ I40E_CAP_PHY_TYPE_XLAUI = BIT(I40E_PHY_TYPE_XLAUI),
+ I40E_CAP_PHY_TYPE_XLPPI = BIT(I40E_PHY_TYPE_XLPPI),
+ I40E_CAP_PHY_TYPE_40GBASE_CR4_CU = BIT(I40E_PHY_TYPE_40GBASE_CR4_CU),
+ I40E_CAP_PHY_TYPE_10GBASE_CR1_CU = BIT(I40E_PHY_TYPE_10GBASE_CR1_CU),
+ I40E_CAP_PHY_TYPE_10GBASE_AOC = BIT(I40E_PHY_TYPE_10GBASE_AOC),
+ I40E_CAP_PHY_TYPE_40GBASE_AOC = BIT(I40E_PHY_TYPE_40GBASE_AOC),
+ I40E_CAP_PHY_TYPE_100BASE_TX = BIT(I40E_PHY_TYPE_100BASE_TX),
+ I40E_CAP_PHY_TYPE_1000BASE_T = BIT(I40E_PHY_TYPE_1000BASE_T),
+ I40E_CAP_PHY_TYPE_10GBASE_T = BIT(I40E_PHY_TYPE_10GBASE_T),
+ I40E_CAP_PHY_TYPE_10GBASE_SR = BIT(I40E_PHY_TYPE_10GBASE_SR),
+ I40E_CAP_PHY_TYPE_10GBASE_LR = BIT(I40E_PHY_TYPE_10GBASE_LR),
+ I40E_CAP_PHY_TYPE_10GBASE_SFPP_CU = BIT(I40E_PHY_TYPE_10GBASE_SFPP_CU),
+ I40E_CAP_PHY_TYPE_10GBASE_CR1 = BIT(I40E_PHY_TYPE_10GBASE_CR1),
+ I40E_CAP_PHY_TYPE_40GBASE_CR4 = BIT(I40E_PHY_TYPE_40GBASE_CR4),
+ I40E_CAP_PHY_TYPE_40GBASE_SR4 = BIT(I40E_PHY_TYPE_40GBASE_SR4),
+ I40E_CAP_PHY_TYPE_40GBASE_LR4 = BIT(I40E_PHY_TYPE_40GBASE_LR4),
+ I40E_CAP_PHY_TYPE_1000BASE_SX = BIT(I40E_PHY_TYPE_1000BASE_SX),
+ I40E_CAP_PHY_TYPE_1000BASE_LX = BIT(I40E_PHY_TYPE_1000BASE_LX),
+ I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL = BIT(I40E_PHY_TYPE_1000BASE_T_OPTICAL),
+ I40E_CAP_PHY_TYPE_20GBASE_KR2 = BIT(I40E_PHY_TYPE_20GBASE_KR2)
+};
+
+struct i40e_phy_info {
+ struct i40e_link_status link_info;
+ struct i40e_link_status link_info_old;
+ bool get_link_info;
+ enum i40e_media_type media_type;
+ /* all the phy types the NVM is capable of */
+ enum i40e_aq_capabilities_phy_type phy_types;
+};
+
+#define I40E_HW_CAP_MAX_GPIO 30
+#define I40E_HW_CAP_MDIO_PORT_MODE_MDIO 0
+#define I40E_HW_CAP_MDIO_PORT_MODE_I2C 1
+
+/* Capabilities of a PF or a VF or the whole device */
+struct i40e_hw_capabilities {
+ u32 switch_mode;
+#define I40E_NVM_IMAGE_TYPE_EVB 0x0
+#define I40E_NVM_IMAGE_TYPE_CLOUD 0x2
+#define I40E_NVM_IMAGE_TYPE_UDP_CLOUD 0x3
+
+ u32 management_mode;
+ u32 npar_enable;
+ u32 os2bmc;
+ u32 valid_functions;
+ bool sr_iov_1_1;
+ bool vmdq;
+ bool evb_802_1_qbg; /* Edge Virtual Bridging */
+ bool evb_802_1_qbh; /* Bridge Port Extension */
+ bool dcb;
+ bool fcoe;
+ bool iscsi; /* Indicates iSCSI enabled */
+ bool flex10_enable;
+ bool flex10_capable;
+ u32 flex10_mode;
+#define I40E_FLEX10_MODE_UNKNOWN 0x0
+#define I40E_FLEX10_MODE_DCC 0x1
+#define I40E_FLEX10_MODE_DCI 0x2
+
+ u32 flex10_status;
+#define I40E_FLEX10_STATUS_DCC_ERROR 0x1
+#define I40E_FLEX10_STATUS_VC_MODE 0x2
+
+ bool mgmt_cem;
+ bool ieee_1588;
+ bool iwarp;
+ bool fd;
+ u32 fd_filters_guaranteed;
+ u32 fd_filters_best_effort;
+ bool rss;
+ u32 rss_table_size;
+ u32 rss_table_entry_width;
+ bool led[I40E_HW_CAP_MAX_GPIO];
+ bool sdp[I40E_HW_CAP_MAX_GPIO];
+ u32 nvm_image_type;
+ u32 num_flow_director_filters;
+ u32 num_vfs;
+ u32 vf_base_id;
+ u32 num_vsis;
+ u32 num_rx_qp;
+ u32 num_tx_qp;
+ u32 base_queue;
+ u32 num_msix_vectors;
+ u32 num_msix_vectors_vf;
+ u32 led_pin_num;
+ u32 sdp_pin_num;
+ u32 mdio_port_num;
+ u32 mdio_port_mode;
+ u8 rx_buf_chain_len;
+ u32 enabled_tcmap;
+ u32 maxtc;
+ u64 wr_csr_prot;
+};
+
+struct i40e_mac_info {
+ enum i40e_mac_type type;
+ u8 addr[I40E_ETH_LENGTH_OF_ADDRESS];
+ u8 perm_addr[I40E_ETH_LENGTH_OF_ADDRESS];
+ u8 san_addr[I40E_ETH_LENGTH_OF_ADDRESS];
+ u8 port_addr[I40E_ETH_LENGTH_OF_ADDRESS];
+ u16 max_fcoeq;
+};
+
+enum i40e_aq_resources_ids {
+ I40E_NVM_RESOURCE_ID = 1
+};
+
+enum i40e_aq_resource_access_type {
+ I40E_RESOURCE_READ = 1,
+ I40E_RESOURCE_WRITE
+};
+
+struct i40e_nvm_info {
+ u64 hw_semaphore_timeout; /* usec global time (GTIME resolution) */
+ u32 timeout; /* [ms] */
+ u16 sr_size; /* Shadow RAM size in words */
+ bool blank_nvm_mode; /* is NVM empty (no FW present)*/
+ u16 version; /* NVM package version */
+ u32 eetrack; /* NVM data version */
+ u32 oem_ver; /* OEM version info */
+};
+
+/* definitions used in NVM update support */
+
+enum i40e_nvmupd_cmd {
+ I40E_NVMUPD_INVALID,
+ I40E_NVMUPD_READ_CON,
+ I40E_NVMUPD_READ_SNT,
+ I40E_NVMUPD_READ_LCB,
+ I40E_NVMUPD_READ_SA,
+ I40E_NVMUPD_WRITE_ERA,
+ I40E_NVMUPD_WRITE_CON,
+ I40E_NVMUPD_WRITE_SNT,
+ I40E_NVMUPD_WRITE_LCB,
+ I40E_NVMUPD_WRITE_SA,
+ I40E_NVMUPD_CSUM_CON,
+ I40E_NVMUPD_CSUM_SA,
+ I40E_NVMUPD_CSUM_LCB,
+ I40E_NVMUPD_STATUS,
+ I40E_NVMUPD_EXEC_AQ,
+ I40E_NVMUPD_GET_AQ_RESULT,
+};
+
+enum i40e_nvmupd_state {
+ I40E_NVMUPD_STATE_INIT,
+ I40E_NVMUPD_STATE_READING,
+ I40E_NVMUPD_STATE_WRITING,
+ I40E_NVMUPD_STATE_INIT_WAIT,
+ I40E_NVMUPD_STATE_WRITE_WAIT,
+};
+
+/* nvm_access definition and its masks/shifts need to be accessible to
+ * application, core driver, and shared code. Where is the right file?
+ */
+#define I40E_NVM_READ 0xB
+#define I40E_NVM_WRITE 0xC
+
+#define I40E_NVM_MOD_PNT_MASK 0xFF
+
+#define I40E_NVM_TRANS_SHIFT 8
+#define I40E_NVM_TRANS_MASK (0xf << I40E_NVM_TRANS_SHIFT)
+#define I40E_NVM_CON 0x0
+#define I40E_NVM_SNT 0x1
+#define I40E_NVM_LCB 0x2
+#define I40E_NVM_SA (I40E_NVM_SNT | I40E_NVM_LCB)
+#define I40E_NVM_ERA 0x4
+#define I40E_NVM_CSUM 0x8
+#define I40E_NVM_EXEC 0xf
+
+#define I40E_NVM_ADAPT_SHIFT 16
+#define I40E_NVM_ADAPT_MASK (0xffffULL << I40E_NVM_ADAPT_SHIFT)
+
+#define I40E_NVMUPD_MAX_DATA 4096
+#define I40E_NVMUPD_IFACE_TIMEOUT 2 /* seconds */
+
+struct i40e_nvm_access {
+ u32 command;
+ u32 config;
+ u32 offset; /* in bytes */
+ u32 data_size; /* in bytes */
+ u8 data[1];
+};
+
+/* PCI bus types */
+enum i40e_bus_type {
+ i40e_bus_type_unknown = 0,
+ i40e_bus_type_pci,
+ i40e_bus_type_pcix,
+ i40e_bus_type_pci_express,
+ i40e_bus_type_reserved
+};
+
+/* PCI bus speeds */
+enum i40e_bus_speed {
+ i40e_bus_speed_unknown = 0,
+ i40e_bus_speed_33 = 33,
+ i40e_bus_speed_66 = 66,
+ i40e_bus_speed_100 = 100,
+ i40e_bus_speed_120 = 120,
+ i40e_bus_speed_133 = 133,
+ i40e_bus_speed_2500 = 2500,
+ i40e_bus_speed_5000 = 5000,
+ i40e_bus_speed_8000 = 8000,
+ i40e_bus_speed_reserved
+};
+
+/* PCI bus widths */
+enum i40e_bus_width {
+ i40e_bus_width_unknown = 0,
+ i40e_bus_width_pcie_x1 = 1,
+ i40e_bus_width_pcie_x2 = 2,
+ i40e_bus_width_pcie_x4 = 4,
+ i40e_bus_width_pcie_x8 = 8,
+ i40e_bus_width_32 = 32,
+ i40e_bus_width_64 = 64,
+ i40e_bus_width_reserved
+};
+
+/* Bus parameters */
+struct i40e_bus_info {
+ enum i40e_bus_speed speed;
+ enum i40e_bus_width width;
+ enum i40e_bus_type type;
+
+ u16 func;
+ u16 device;
+ u16 lan_id;
+};
+
+/* Flow control (FC) parameters */
+struct i40e_fc_info {
+ enum i40e_fc_mode current_mode; /* FC mode in effect */
+ enum i40e_fc_mode requested_mode; /* FC mode requested by caller */
+};
+
+#define I40E_MAX_TRAFFIC_CLASS 8
+#define I40E_MAX_USER_PRIORITY 8
+#define I40E_DCBX_MAX_APPS 32
+#define I40E_LLDPDU_SIZE 1500
+#define I40E_TLV_STATUS_OPER 0x1
+#define I40E_TLV_STATUS_SYNC 0x2
+#define I40E_TLV_STATUS_ERR 0x4
+#define I40E_CEE_OPER_MAX_APPS 3
+#define I40E_APP_PROTOID_FCOE 0x8906
+#define I40E_APP_PROTOID_ISCSI 0x0cbc
+#define I40E_APP_PROTOID_FIP 0x8914
+#define I40E_APP_SEL_ETHTYPE 0x1
+#define I40E_APP_SEL_TCPIP 0x2
+#define I40E_CEE_APP_SEL_ETHTYPE 0x0
+#define I40E_CEE_APP_SEL_TCPIP 0x1
+
+/* CEE or IEEE 802.1Qaz ETS Configuration data */
+struct i40e_dcb_ets_config {
+ u8 willing;
+ u8 cbs;
+ u8 maxtcs;
+ u8 prioritytable[I40E_MAX_TRAFFIC_CLASS];
+ u8 tcbwtable[I40E_MAX_TRAFFIC_CLASS];
+ u8 tsatable[I40E_MAX_TRAFFIC_CLASS];
+};
+
+/* CEE or IEEE 802.1Qaz PFC Configuration data */
+struct i40e_dcb_pfc_config {
+ u8 willing;
+ u8 mbc;
+ u8 pfccap;
+ u8 pfcenable;
+};
+
+/* CEE or IEEE 802.1Qaz Application Priority data */
+struct i40e_dcb_app_priority_table {
+ u8 priority;
+ u8 selector;
+ u16 protocolid;
+};
+
+struct i40e_dcbx_config {
+ u8 dcbx_mode;
+#define I40E_DCBX_MODE_CEE 0x1
+#define I40E_DCBX_MODE_IEEE 0x2
+ u32 numapps;
+ u32 tlv_status; /* CEE mode TLV status */
+ struct i40e_dcb_ets_config etscfg;
+ struct i40e_dcb_ets_config etsrec;
+ struct i40e_dcb_pfc_config pfc;
+ struct i40e_dcb_app_priority_table app[I40E_DCBX_MAX_APPS];
+};
+
+/* Port hardware description */
+struct i40e_hw {
+ u8 *hw_addr;
+ void *back;
+
+ /* subsystem structs */
+ struct i40e_phy_info phy;
+ struct i40e_mac_info mac;
+ struct i40e_bus_info bus;
+ struct i40e_nvm_info nvm;
+ struct i40e_fc_info fc;
+
+ /* pci info */
+ u16 device_id;
+ u16 vendor_id;
+ u16 subsystem_device_id;
+ u16 subsystem_vendor_id;
+ u8 revision_id;
+ u8 port;
+ bool adapter_stopped;
+
+ /* capabilities for entire device and PCI func */
+ struct i40e_hw_capabilities dev_caps;
+ struct i40e_hw_capabilities func_caps;
+
+ /* Flow Director shared filter space */
+ u16 fdir_shared_filter_count;
+
+ /* device profile info */
+ u8 pf_id;
+ u16 main_vsi_seid;
+
+ /* for multi-function MACs */
+ u16 partition_id;
+ u16 num_partitions;
+ u16 num_ports;
+
+ /* Closest numa node to the device */
+ u16 numa_node;
+
+ /* Admin Queue info */
+ struct i40e_adminq_info aq;
+
+ /* state of nvm update process */
+ enum i40e_nvmupd_state nvmupd_state;
+ struct i40e_aq_desc nvm_wb_desc;
+ struct i40e_virt_mem nvm_buff;
+
+ /* HMC info */
+ struct i40e_hmc_info hmc; /* HMC info struct */
+
+ /* LLDP/DCBX Status */
+ u16 dcbx_status;
+
+ /* DCBX info */
+ struct i40e_dcbx_config local_dcbx_config; /* Oper/Local Cfg */
+ struct i40e_dcbx_config remote_dcbx_config; /* Peer Cfg */
+ struct i40e_dcbx_config desired_dcbx_config; /* CEE Desired Cfg */
+
+ /* debug mask */
+ u32 debug_mask;
+ char err_str[16];
+};
+
+static INLINE bool i40e_is_vf(struct i40e_hw *hw)
+{
+#ifdef X722_SUPPORT
+ return (hw->mac.type == I40E_MAC_VF ||
+ hw->mac.type == I40E_MAC_X722_VF);
+#else
+ return hw->mac.type == I40E_MAC_VF;
+#endif
+}
+
+struct i40e_driver_version {
+ u8 major_version;
+ u8 minor_version;
+ u8 build_version;
+ u8 subbuild_version;
+ u8 driver_string[32];
+};
+
+/* RX Descriptors */
+union i40e_16byte_rx_desc {
+ struct {
+ __le64 pkt_addr; /* Packet buffer address */
+ __le64 hdr_addr; /* Header buffer address */
+ } read;
+ struct {
+ struct {
+ struct {
+ union {
+ __le16 mirroring_status;
+ __le16 fcoe_ctx_id;
+ } mirr_fcoe;
+ __le16 l2tag1;
+ } lo_dword;
+ union {
+ __le32 rss; /* RSS Hash */
+ __le32 fd_id; /* Flow director filter id */
+ __le32 fcoe_param; /* FCoE DDP Context id */
+ } hi_dword;
+ } qword0;
+ struct {
+ /* ext status/error/pktype/length */
+ __le64 status_error_len;
+ } qword1;
+ } wb; /* writeback */
+};
+
+union i40e_32byte_rx_desc {
+ struct {
+ __le64 pkt_addr; /* Packet buffer address */
+ __le64 hdr_addr; /* Header buffer address */
+ /* bit 0 of hdr_buffer_addr is DD bit */
+ __le64 rsvd1;
+ __le64 rsvd2;
+ } read;
+ struct {
+ struct {
+ struct {
+ union {
+ __le16 mirroring_status;
+ __le16 fcoe_ctx_id;
+ } mirr_fcoe;
+ __le16 l2tag1;
+ } lo_dword;
+ union {
+ __le32 rss; /* RSS Hash */
+ __le32 fcoe_param; /* FCoE DDP Context id */
+ /* Flow director filter id in case of
+ * Programming status desc WB
+ */
+ __le32 fd_id;
+ } hi_dword;
+ } qword0;
+ struct {
+ /* status/error/pktype/length */
+ __le64 status_error_len;
+ } qword1;
+ struct {
+ __le16 ext_status; /* extended status */
+ __le16 rsvd;
+ __le16 l2tag2_1;
+ __le16 l2tag2_2;
+ } qword2;
+ struct {
+ union {
+ __le32 flex_bytes_lo;
+ __le32 pe_status;
+ } lo_dword;
+ union {
+ __le32 flex_bytes_hi;
+ __le32 fd_id;
+ } hi_dword;
+ } qword3;
+ } wb; /* writeback */
+};
+
+#define I40E_RXD_QW0_MIRROR_STATUS_SHIFT 8
+#define I40E_RXD_QW0_MIRROR_STATUS_MASK (0x3FUL << \
+ I40E_RXD_QW0_MIRROR_STATUS_SHIFT)
+#define I40E_RXD_QW0_FCOEINDX_SHIFT 0
+#define I40E_RXD_QW0_FCOEINDX_MASK (0xFFFUL << \
+ I40E_RXD_QW0_FCOEINDX_SHIFT)
+
+enum i40e_rx_desc_status_bits {
+ /* Note: These are predefined bit offsets */
+ I40E_RX_DESC_STATUS_DD_SHIFT = 0,
+ I40E_RX_DESC_STATUS_EOF_SHIFT = 1,
+ I40E_RX_DESC_STATUS_L2TAG1P_SHIFT = 2,
+ I40E_RX_DESC_STATUS_L3L4P_SHIFT = 3,
+ I40E_RX_DESC_STATUS_CRCP_SHIFT = 4,
+ I40E_RX_DESC_STATUS_TSYNINDX_SHIFT = 5, /* 2 BITS */
+ I40E_RX_DESC_STATUS_TSYNVALID_SHIFT = 7,
+#ifdef X722_SUPPORT
+ I40E_RX_DESC_STATUS_EXT_UDP_0_SHIFT = 8,
+#else
+ I40E_RX_DESC_STATUS_RESERVED1_SHIFT = 8,
+#endif
+
+ I40E_RX_DESC_STATUS_UMBCAST_SHIFT = 9, /* 2 BITS */
+ I40E_RX_DESC_STATUS_FLM_SHIFT = 11,
+ I40E_RX_DESC_STATUS_FLTSTAT_SHIFT = 12, /* 2 BITS */
+ I40E_RX_DESC_STATUS_LPBK_SHIFT = 14,
+ I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT = 15,
+ I40E_RX_DESC_STATUS_RESERVED2_SHIFT = 16, /* 2 BITS */
+#ifdef X722_SUPPORT
+ I40E_RX_DESC_STATUS_INT_UDP_0_SHIFT = 18,
+#else
+ I40E_RX_DESC_STATUS_UDP_0_SHIFT = 18,
+#endif
+ I40E_RX_DESC_STATUS_LAST /* this entry must be last!!! */
+};
+
+#define I40E_RXD_QW1_STATUS_SHIFT 0
+#define I40E_RXD_QW1_STATUS_MASK ((BIT(I40E_RX_DESC_STATUS_LAST) - 1) << \
+ I40E_RXD_QW1_STATUS_SHIFT)
+
+#define I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT I40E_RX_DESC_STATUS_TSYNINDX_SHIFT
+#define I40E_RXD_QW1_STATUS_TSYNINDX_MASK (0x3UL << \
+ I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT)
+
+#define I40E_RXD_QW1_STATUS_TSYNVALID_SHIFT I40E_RX_DESC_STATUS_TSYNVALID_SHIFT
+#define I40E_RXD_QW1_STATUS_TSYNVALID_MASK BIT_ULL(I40E_RXD_QW1_STATUS_TSYNVALID_SHIFT)
+
+#define I40E_RXD_QW1_STATUS_UMBCAST_SHIFT I40E_RX_DESC_STATUS_UMBCAST
+#define I40E_RXD_QW1_STATUS_UMBCAST_MASK (0x3UL << \
+ I40E_RXD_QW1_STATUS_UMBCAST_SHIFT)
+
+enum i40e_rx_desc_fltstat_values {
+ I40E_RX_DESC_FLTSTAT_NO_DATA = 0,
+ I40E_RX_DESC_FLTSTAT_RSV_FD_ID = 1, /* 16byte desc? FD_ID : RSV */
+ I40E_RX_DESC_FLTSTAT_RSV = 2,
+ I40E_RX_DESC_FLTSTAT_RSS_HASH = 3,
+};
+
+#define I40E_RXD_PACKET_TYPE_UNICAST 0
+#define I40E_RXD_PACKET_TYPE_MULTICAST 1
+#define I40E_RXD_PACKET_TYPE_BROADCAST 2
+#define I40E_RXD_PACKET_TYPE_MIRRORED 3
+
+#define I40E_RXD_QW1_ERROR_SHIFT 19
+#define I40E_RXD_QW1_ERROR_MASK (0xFFUL << I40E_RXD_QW1_ERROR_SHIFT)
+
+enum i40e_rx_desc_error_bits {
+ /* Note: These are predefined bit offsets */
+ I40E_RX_DESC_ERROR_RXE_SHIFT = 0,
+ I40E_RX_DESC_ERROR_RECIPE_SHIFT = 1,
+ I40E_RX_DESC_ERROR_HBO_SHIFT = 2,
+ I40E_RX_DESC_ERROR_L3L4E_SHIFT = 3, /* 3 BITS */
+ I40E_RX_DESC_ERROR_IPE_SHIFT = 3,
+ I40E_RX_DESC_ERROR_L4E_SHIFT = 4,
+ I40E_RX_DESC_ERROR_EIPE_SHIFT = 5,
+ I40E_RX_DESC_ERROR_OVERSIZE_SHIFT = 6,
+ I40E_RX_DESC_ERROR_PPRS_SHIFT = 7
+};
+
+enum i40e_rx_desc_error_l3l4e_fcoe_masks {
+ I40E_RX_DESC_ERROR_L3L4E_NONE = 0,
+ I40E_RX_DESC_ERROR_L3L4E_PROT = 1,
+ I40E_RX_DESC_ERROR_L3L4E_FC = 2,
+ I40E_RX_DESC_ERROR_L3L4E_DMAC_ERR = 3,
+ I40E_RX_DESC_ERROR_L3L4E_DMAC_WARN = 4
+};
+
+#define I40E_RXD_QW1_PTYPE_SHIFT 30
+#define I40E_RXD_QW1_PTYPE_MASK (0xFFULL << I40E_RXD_QW1_PTYPE_SHIFT)
+
+/* Packet type non-ip values */
+enum i40e_rx_l2_ptype {
+ I40E_RX_PTYPE_L2_RESERVED = 0,
+ I40E_RX_PTYPE_L2_MAC_PAY2 = 1,
+ I40E_RX_PTYPE_L2_TIMESYNC_PAY2 = 2,
+ I40E_RX_PTYPE_L2_FIP_PAY2 = 3,
+ I40E_RX_PTYPE_L2_OUI_PAY2 = 4,
+ I40E_RX_PTYPE_L2_MACCNTRL_PAY2 = 5,
+ I40E_RX_PTYPE_L2_LLDP_PAY2 = 6,
+ I40E_RX_PTYPE_L2_ECP_PAY2 = 7,
+ I40E_RX_PTYPE_L2_EVB_PAY2 = 8,
+ I40E_RX_PTYPE_L2_QCN_PAY2 = 9,
+ I40E_RX_PTYPE_L2_EAPOL_PAY2 = 10,
+ I40E_RX_PTYPE_L2_ARP = 11,
+ I40E_RX_PTYPE_L2_FCOE_PAY3 = 12,
+ I40E_RX_PTYPE_L2_FCOE_FCDATA_PAY3 = 13,
+ I40E_RX_PTYPE_L2_FCOE_FCRDY_PAY3 = 14,
+ I40E_RX_PTYPE_L2_FCOE_FCRSP_PAY3 = 15,
+ I40E_RX_PTYPE_L2_FCOE_FCOTHER_PA = 16,
+ I40E_RX_PTYPE_L2_FCOE_VFT_PAY3 = 17,
+ I40E_RX_PTYPE_L2_FCOE_VFT_FCDATA = 18,
+ I40E_RX_PTYPE_L2_FCOE_VFT_FCRDY = 19,
+ I40E_RX_PTYPE_L2_FCOE_VFT_FCRSP = 20,
+ I40E_RX_PTYPE_L2_FCOE_VFT_FCOTHER = 21,
+ I40E_RX_PTYPE_GRENAT4_MAC_PAY3 = 58,
+ I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4 = 87,
+ I40E_RX_PTYPE_GRENAT6_MAC_PAY3 = 124,
+ I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4 = 153
+};
+
+struct i40e_rx_ptype_decoded {
+ u32 ptype:8;
+ u32 known:1;
+ u32 outer_ip:1;
+ u32 outer_ip_ver:1;
+ u32 outer_frag:1;
+ u32 tunnel_type:3;
+ u32 tunnel_end_prot:2;
+ u32 tunnel_end_frag:1;
+ u32 inner_prot:4;
+ u32 payload_layer:3;
+};
+
+enum i40e_rx_ptype_outer_ip {
+ I40E_RX_PTYPE_OUTER_L2 = 0,
+ I40E_RX_PTYPE_OUTER_IP = 1
+};
+
+enum i40e_rx_ptype_outer_ip_ver {
+ I40E_RX_PTYPE_OUTER_NONE = 0,
+ I40E_RX_PTYPE_OUTER_IPV4 = 0,
+ I40E_RX_PTYPE_OUTER_IPV6 = 1
+};
+
+enum i40e_rx_ptype_outer_fragmented {
+ I40E_RX_PTYPE_NOT_FRAG = 0,
+ I40E_RX_PTYPE_FRAG = 1
+};
+
+enum i40e_rx_ptype_tunnel_type {
+ I40E_RX_PTYPE_TUNNEL_NONE = 0,
+ I40E_RX_PTYPE_TUNNEL_IP_IP = 1,
+ I40E_RX_PTYPE_TUNNEL_IP_GRENAT = 2,
+ I40E_RX_PTYPE_TUNNEL_IP_GRENAT_MAC = 3,
+ I40E_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN = 4,
+};
+
+enum i40e_rx_ptype_tunnel_end_prot {
+ I40E_RX_PTYPE_TUNNEL_END_NONE = 0,
+ I40E_RX_PTYPE_TUNNEL_END_IPV4 = 1,
+ I40E_RX_PTYPE_TUNNEL_END_IPV6 = 2,
+};
+
+enum i40e_rx_ptype_inner_prot {
+ I40E_RX_PTYPE_INNER_PROT_NONE = 0,
+ I40E_RX_PTYPE_INNER_PROT_UDP = 1,
+ I40E_RX_PTYPE_INNER_PROT_TCP = 2,
+ I40E_RX_PTYPE_INNER_PROT_SCTP = 3,
+ I40E_RX_PTYPE_INNER_PROT_ICMP = 4,
+ I40E_RX_PTYPE_INNER_PROT_TIMESYNC = 5
+};
+
+enum i40e_rx_ptype_payload_layer {
+ I40E_RX_PTYPE_PAYLOAD_LAYER_NONE = 0,
+ I40E_RX_PTYPE_PAYLOAD_LAYER_PAY2 = 1,
+ I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3 = 2,
+ I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4 = 3,
+};
+
+#define I40E_RX_PTYPE_BIT_MASK 0x0FFFFFFF
+#define I40E_RX_PTYPE_SHIFT 56
+
+#define I40E_RXD_QW1_LENGTH_PBUF_SHIFT 38
+#define I40E_RXD_QW1_LENGTH_PBUF_MASK (0x3FFFULL << \
+ I40E_RXD_QW1_LENGTH_PBUF_SHIFT)
+
+#define I40E_RXD_QW1_LENGTH_HBUF_SHIFT 52
+#define I40E_RXD_QW1_LENGTH_HBUF_MASK (0x7FFULL << \
+ I40E_RXD_QW1_LENGTH_HBUF_SHIFT)
+
+#define I40E_RXD_QW1_LENGTH_SPH_SHIFT 63
+#define I40E_RXD_QW1_LENGTH_SPH_MASK BIT_ULL(I40E_RXD_QW1_LENGTH_SPH_SHIFT)
+
+#define I40E_RXD_QW1_NEXTP_SHIFT 38
+#define I40E_RXD_QW1_NEXTP_MASK (0x1FFFULL << I40E_RXD_QW1_NEXTP_SHIFT)
+
+#define I40E_RXD_QW2_EXT_STATUS_SHIFT 0
+#define I40E_RXD_QW2_EXT_STATUS_MASK (0xFFFFFUL << \
+ I40E_RXD_QW2_EXT_STATUS_SHIFT)
+
+enum i40e_rx_desc_ext_status_bits {
+ /* Note: These are predefined bit offsets */
+ I40E_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT = 0,
+ I40E_RX_DESC_EXT_STATUS_L2TAG3P_SHIFT = 1,
+ I40E_RX_DESC_EXT_STATUS_FLEXBL_SHIFT = 2, /* 2 BITS */
+ I40E_RX_DESC_EXT_STATUS_FLEXBH_SHIFT = 4, /* 2 BITS */
+ I40E_RX_DESC_EXT_STATUS_FDLONGB_SHIFT = 9,
+ I40E_RX_DESC_EXT_STATUS_FCOELONGB_SHIFT = 10,
+ I40E_RX_DESC_EXT_STATUS_PELONGB_SHIFT = 11,
+};
+
+#define I40E_RXD_QW2_L2TAG2_SHIFT 0
+#define I40E_RXD_QW2_L2TAG2_MASK (0xFFFFUL << I40E_RXD_QW2_L2TAG2_SHIFT)
+
+#define I40E_RXD_QW2_L2TAG3_SHIFT 16
+#define I40E_RXD_QW2_L2TAG3_MASK (0xFFFFUL << I40E_RXD_QW2_L2TAG3_SHIFT)
+
+enum i40e_rx_desc_pe_status_bits {
+ /* Note: These are predefined bit offsets */
+ I40E_RX_DESC_PE_STATUS_QPID_SHIFT = 0, /* 18 BITS */
+ I40E_RX_DESC_PE_STATUS_L4PORT_SHIFT = 0, /* 16 BITS */
+ I40E_RX_DESC_PE_STATUS_IPINDEX_SHIFT = 16, /* 8 BITS */
+ I40E_RX_DESC_PE_STATUS_QPIDHIT_SHIFT = 24,
+ I40E_RX_DESC_PE_STATUS_APBVTHIT_SHIFT = 25,
+ I40E_RX_DESC_PE_STATUS_PORTV_SHIFT = 26,
+ I40E_RX_DESC_PE_STATUS_URG_SHIFT = 27,
+ I40E_RX_DESC_PE_STATUS_IPFRAG_SHIFT = 28,
+ I40E_RX_DESC_PE_STATUS_IPOPT_SHIFT = 29
+};
+
+#define I40E_RX_PROG_STATUS_DESC_LENGTH_SHIFT 38
+#define I40E_RX_PROG_STATUS_DESC_LENGTH 0x2000000
+
+#define I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT 2
+#define I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK (0x7UL << \
+ I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT)
+
+#define I40E_RX_PROG_STATUS_DESC_QW1_STATUS_SHIFT 0
+#define I40E_RX_PROG_STATUS_DESC_QW1_STATUS_MASK (0x7FFFUL << \
+ I40E_RX_PROG_STATUS_DESC_QW1_STATUS_SHIFT)
+
+#define I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT 19
+#define I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK (0x3FUL << \
+ I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT)
+
+enum i40e_rx_prog_status_desc_status_bits {
+ /* Note: These are predefined bit offsets */
+ I40E_RX_PROG_STATUS_DESC_DD_SHIFT = 0,
+ I40E_RX_PROG_STATUS_DESC_PROG_ID_SHIFT = 2 /* 3 BITS */
+};
+
+enum i40e_rx_prog_status_desc_prog_id_masks {
+ I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS = 1,
+ I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS = 2,
+ I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS = 4,
+};
+
+enum i40e_rx_prog_status_desc_error_bits {
+ /* Note: These are predefined bit offsets */
+ I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT = 0,
+ I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT = 1,
+ I40E_RX_PROG_STATUS_DESC_FCOE_TBL_FULL_SHIFT = 2,
+ I40E_RX_PROG_STATUS_DESC_FCOE_CONFLICT_SHIFT = 3
+};
+
+#define I40E_TWO_BIT_MASK 0x3
+#define I40E_THREE_BIT_MASK 0x7
+#define I40E_FOUR_BIT_MASK 0xF
+#define I40E_EIGHTEEN_BIT_MASK 0x3FFFF
+
+/* TX Descriptor */
+struct i40e_tx_desc {
+ __le64 buffer_addr; /* Address of descriptor's data buf */
+ __le64 cmd_type_offset_bsz;
+};
+
+#define I40E_TXD_QW1_DTYPE_SHIFT 0
+#define I40E_TXD_QW1_DTYPE_MASK (0xFUL << I40E_TXD_QW1_DTYPE_SHIFT)
+
+enum i40e_tx_desc_dtype_value {
+ I40E_TX_DESC_DTYPE_DATA = 0x0,
+ I40E_TX_DESC_DTYPE_NOP = 0x1, /* same as Context desc */
+ I40E_TX_DESC_DTYPE_CONTEXT = 0x1,
+ I40E_TX_DESC_DTYPE_FCOE_CTX = 0x2,
+ I40E_TX_DESC_DTYPE_FILTER_PROG = 0x8,
+ I40E_TX_DESC_DTYPE_DDP_CTX = 0x9,
+ I40E_TX_DESC_DTYPE_FLEX_DATA = 0xB,
+ I40E_TX_DESC_DTYPE_FLEX_CTX_1 = 0xC,
+ I40E_TX_DESC_DTYPE_FLEX_CTX_2 = 0xD,
+ I40E_TX_DESC_DTYPE_DESC_DONE = 0xF
+};
+
+#define I40E_TXD_QW1_CMD_SHIFT 4
+#define I40E_TXD_QW1_CMD_MASK (0x3FFUL << I40E_TXD_QW1_CMD_SHIFT)
+
+enum i40e_tx_desc_cmd_bits {
+ I40E_TX_DESC_CMD_EOP = 0x0001,
+ I40E_TX_DESC_CMD_RS = 0x0002,
+ I40E_TX_DESC_CMD_ICRC = 0x0004,
+ I40E_TX_DESC_CMD_IL2TAG1 = 0x0008,
+ I40E_TX_DESC_CMD_DUMMY = 0x0010,
+ I40E_TX_DESC_CMD_IIPT_NONIP = 0x0000, /* 2 BITS */
+ I40E_TX_DESC_CMD_IIPT_IPV6 = 0x0020, /* 2 BITS */
+ I40E_TX_DESC_CMD_IIPT_IPV4 = 0x0040, /* 2 BITS */
+ I40E_TX_DESC_CMD_IIPT_IPV4_CSUM = 0x0060, /* 2 BITS */
+ I40E_TX_DESC_CMD_FCOET = 0x0080,
+ I40E_TX_DESC_CMD_L4T_EOFT_UNK = 0x0000, /* 2 BITS */
+ I40E_TX_DESC_CMD_L4T_EOFT_TCP = 0x0100, /* 2 BITS */
+ I40E_TX_DESC_CMD_L4T_EOFT_SCTP = 0x0200, /* 2 BITS */
+ I40E_TX_DESC_CMD_L4T_EOFT_UDP = 0x0300, /* 2 BITS */
+ I40E_TX_DESC_CMD_L4T_EOFT_EOF_N = 0x0000, /* 2 BITS */
+ I40E_TX_DESC_CMD_L4T_EOFT_EOF_T = 0x0100, /* 2 BITS */
+ I40E_TX_DESC_CMD_L4T_EOFT_EOF_NI = 0x0200, /* 2 BITS */
+ I40E_TX_DESC_CMD_L4T_EOFT_EOF_A = 0x0300, /* 2 BITS */
+};
+
+#define I40E_TXD_QW1_OFFSET_SHIFT 16
+#define I40E_TXD_QW1_OFFSET_MASK (0x3FFFFULL << \
+ I40E_TXD_QW1_OFFSET_SHIFT)
+
+enum i40e_tx_desc_length_fields {
+ /* Note: These are predefined bit offsets */
+ I40E_TX_DESC_LENGTH_MACLEN_SHIFT = 0, /* 7 BITS */
+ I40E_TX_DESC_LENGTH_IPLEN_SHIFT = 7, /* 7 BITS */
+ I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT = 14 /* 4 BITS */
+};
+
+#define I40E_TXD_QW1_MACLEN_MASK (0x7FUL << I40E_TX_DESC_LENGTH_MACLEN_SHIFT)
+#define I40E_TXD_QW1_IPLEN_MASK (0x7FUL << I40E_TX_DESC_LENGTH_IPLEN_SHIFT)
+#define I40E_TXD_QW1_L4LEN_MASK (0xFUL << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT)
+#define I40E_TXD_QW1_FCLEN_MASK (0xFUL << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT)
+
+#define I40E_TXD_QW1_TX_BUF_SZ_SHIFT 34
+#define I40E_TXD_QW1_TX_BUF_SZ_MASK (0x3FFFULL << \
+ I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
+
+#define I40E_TXD_QW1_L2TAG1_SHIFT 48
+#define I40E_TXD_QW1_L2TAG1_MASK (0xFFFFULL << I40E_TXD_QW1_L2TAG1_SHIFT)
+
+/* Context descriptors */
+struct i40e_tx_context_desc {
+ __le32 tunneling_params;
+ __le16 l2tag2;
+ __le16 rsvd;
+ __le64 type_cmd_tso_mss;
+};
+
+#define I40E_TXD_CTX_QW1_DTYPE_SHIFT 0
+#define I40E_TXD_CTX_QW1_DTYPE_MASK (0xFUL << I40E_TXD_CTX_QW1_DTYPE_SHIFT)
+
+#define I40E_TXD_CTX_QW1_CMD_SHIFT 4
+#define I40E_TXD_CTX_QW1_CMD_MASK (0xFFFFUL << I40E_TXD_CTX_QW1_CMD_SHIFT)
+
+enum i40e_tx_ctx_desc_cmd_bits {
+ I40E_TX_CTX_DESC_TSO = 0x01,
+ I40E_TX_CTX_DESC_TSYN = 0x02,
+ I40E_TX_CTX_DESC_IL2TAG2 = 0x04,
+ I40E_TX_CTX_DESC_IL2TAG2_IL2H = 0x08,
+ I40E_TX_CTX_DESC_SWTCH_NOTAG = 0x00,
+ I40E_TX_CTX_DESC_SWTCH_UPLINK = 0x10,
+ I40E_TX_CTX_DESC_SWTCH_LOCAL = 0x20,
+ I40E_TX_CTX_DESC_SWTCH_VSI = 0x30,
+ I40E_TX_CTX_DESC_SWPE = 0x40
+};
+
+#define I40E_TXD_CTX_QW1_TSO_LEN_SHIFT 30
+#define I40E_TXD_CTX_QW1_TSO_LEN_MASK (0x3FFFFULL << \
+ I40E_TXD_CTX_QW1_TSO_LEN_SHIFT)
+
+#define I40E_TXD_CTX_QW1_MSS_SHIFT 50
+#define I40E_TXD_CTX_QW1_MSS_MASK (0x3FFFULL << \
+ I40E_TXD_CTX_QW1_MSS_SHIFT)
+
+#define I40E_TXD_CTX_QW1_VSI_SHIFT 50
+#define I40E_TXD_CTX_QW1_VSI_MASK (0x1FFULL << I40E_TXD_CTX_QW1_VSI_SHIFT)
+
+#define I40E_TXD_CTX_QW0_EXT_IP_SHIFT 0
+#define I40E_TXD_CTX_QW0_EXT_IP_MASK (0x3ULL << \
+ I40E_TXD_CTX_QW0_EXT_IP_SHIFT)
+
+enum i40e_tx_ctx_desc_eipt_offload {
+ I40E_TX_CTX_EXT_IP_NONE = 0x0,
+ I40E_TX_CTX_EXT_IP_IPV6 = 0x1,
+ I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM = 0x2,
+ I40E_TX_CTX_EXT_IP_IPV4 = 0x3
+};
+
+#define I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT 2
+#define I40E_TXD_CTX_QW0_EXT_IPLEN_MASK (0x3FULL << \
+ I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT)
+
+#define I40E_TXD_CTX_QW0_NATT_SHIFT 9
+#define I40E_TXD_CTX_QW0_NATT_MASK (0x3ULL << I40E_TXD_CTX_QW0_NATT_SHIFT)
+
+#define I40E_TXD_CTX_UDP_TUNNELING BIT_ULL(I40E_TXD_CTX_QW0_NATT_SHIFT)
+#define I40E_TXD_CTX_GRE_TUNNELING (0x2ULL << I40E_TXD_CTX_QW0_NATT_SHIFT)
+
+#define I40E_TXD_CTX_QW0_EIP_NOINC_SHIFT 11
+#define I40E_TXD_CTX_QW0_EIP_NOINC_MASK BIT_ULL(I40E_TXD_CTX_QW0_EIP_NOINC_SHIFT)
+
+#define I40E_TXD_CTX_EIP_NOINC_IPID_CONST I40E_TXD_CTX_QW0_EIP_NOINC_MASK
+
+#define I40E_TXD_CTX_QW0_NATLEN_SHIFT 12
+#define I40E_TXD_CTX_QW0_NATLEN_MASK (0X7FULL << \
+ I40E_TXD_CTX_QW0_NATLEN_SHIFT)
+
+#define I40E_TXD_CTX_QW0_DECTTL_SHIFT 19
+#define I40E_TXD_CTX_QW0_DECTTL_MASK (0xFULL << \
+ I40E_TXD_CTX_QW0_DECTTL_SHIFT)
+
+#ifdef X722_SUPPORT
+#define I40E_TXD_CTX_QW0_L4T_CS_SHIFT 23
+#define I40E_TXD_CTX_QW0_L4T_CS_MASK BIT_ULL(I40E_TXD_CTX_QW0_L4T_CS_SHIFT)
+#endif
+struct i40e_nop_desc {
+ __le64 rsvd;
+ __le64 dtype_cmd;
+};
+
+#define I40E_TXD_NOP_QW1_DTYPE_SHIFT 0
+#define I40E_TXD_NOP_QW1_DTYPE_MASK (0xFUL << I40E_TXD_NOP_QW1_DTYPE_SHIFT)
+
+#define I40E_TXD_NOP_QW1_CMD_SHIFT 4
+#define I40E_TXD_NOP_QW1_CMD_MASK (0x7FUL << I40E_TXD_NOP_QW1_CMD_SHIFT)
+
+enum i40e_tx_nop_desc_cmd_bits {
+ /* Note: These are predefined bit offsets */
+ I40E_TX_NOP_DESC_EOP_SHIFT = 0,
+ I40E_TX_NOP_DESC_RS_SHIFT = 1,
+ I40E_TX_NOP_DESC_RSV_SHIFT = 2 /* 5 bits */
+};
+
+struct i40e_filter_program_desc {
+ __le32 qindex_flex_ptype_vsi;
+ __le32 rsvd;
+ __le32 dtype_cmd_cntindex;
+ __le32 fd_id;
+};
+#define I40E_TXD_FLTR_QW0_QINDEX_SHIFT 0
+#define I40E_TXD_FLTR_QW0_QINDEX_MASK (0x7FFUL << \
+ I40E_TXD_FLTR_QW0_QINDEX_SHIFT)
+#define I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT 11
+#define I40E_TXD_FLTR_QW0_FLEXOFF_MASK (0x7UL << \
+ I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT)
+#define I40E_TXD_FLTR_QW0_PCTYPE_SHIFT 17
+#define I40E_TXD_FLTR_QW0_PCTYPE_MASK (0x3FUL << \
+ I40E_TXD_FLTR_QW0_PCTYPE_SHIFT)
+
+/* Packet Classifier Types for filters */
+enum i40e_filter_pctype {
+#ifdef X722_SUPPORT
+ /* Note: Values 0-28 are reserved for future use.
+ * Value 29, 30, 32 are not supported on XL710 and X710.
+ */
+ I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP = 29,
+ I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP = 30,
+#else
+ /* Note: Values 0-30 are reserved for future use */
+#endif
+ I40E_FILTER_PCTYPE_NONF_IPV4_UDP = 31,
+#ifdef X722_SUPPORT
+ I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK = 32,
+#else
+ /* Note: Value 32 is reserved for future use */
+#endif
+ I40E_FILTER_PCTYPE_NONF_IPV4_TCP = 33,
+ I40E_FILTER_PCTYPE_NONF_IPV4_SCTP = 34,
+ I40E_FILTER_PCTYPE_NONF_IPV4_OTHER = 35,
+ I40E_FILTER_PCTYPE_FRAG_IPV4 = 36,
+#ifdef X722_SUPPORT
+ /* Note: Values 37-38 are reserved for future use.
+ * Value 39, 40, 42 are not supported on XL710 and X710.
+ */
+ I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP = 39,
+ I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP = 40,
+#else
+ /* Note: Values 37-40 are reserved for future use */
+#endif
+ I40E_FILTER_PCTYPE_NONF_IPV6_UDP = 41,
+#ifdef X722_SUPPORT
+ I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK = 42,
+#endif
+ I40E_FILTER_PCTYPE_NONF_IPV6_TCP = 43,
+ I40E_FILTER_PCTYPE_NONF_IPV6_SCTP = 44,
+ I40E_FILTER_PCTYPE_NONF_IPV6_OTHER = 45,
+ I40E_FILTER_PCTYPE_FRAG_IPV6 = 46,
+ /* Note: Value 47 is reserved for future use */
+ I40E_FILTER_PCTYPE_FCOE_OX = 48,
+ I40E_FILTER_PCTYPE_FCOE_RX = 49,
+ I40E_FILTER_PCTYPE_FCOE_OTHER = 50,
+ /* Note: Values 51-62 are reserved for future use */
+ I40E_FILTER_PCTYPE_L2_PAYLOAD = 63,
+};
+
+enum i40e_filter_program_desc_dest {
+ I40E_FILTER_PROGRAM_DESC_DEST_DROP_PACKET = 0x0,
+ I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX = 0x1,
+ I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_OTHER = 0x2,
+};
+
+enum i40e_filter_program_desc_fd_status {
+ I40E_FILTER_PROGRAM_DESC_FD_STATUS_NONE = 0x0,
+ I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID = 0x1,
+ I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID_4FLEX_BYTES = 0x2,
+ I40E_FILTER_PROGRAM_DESC_FD_STATUS_8FLEX_BYTES = 0x3,
+};
+
+#define I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT 23
+#define I40E_TXD_FLTR_QW0_DEST_VSI_MASK BIT_ULL(I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT)
+
+#define I40E_TXD_FLTR_QW1_DTYPE_SHIFT 0
+#define I40E_TXD_FLTR_QW1_DTYPE_MASK (0xFUL << I40E_TXD_FLTR_QW1_DTYPE_SHIFT)
+
+#define I40E_TXD_FLTR_QW1_CMD_SHIFT 4
+#define I40E_TXD_FLTR_QW1_CMD_MASK (0xFFFFULL << \
+ I40E_TXD_FLTR_QW1_CMD_SHIFT)
+
+#define I40E_TXD_FLTR_QW1_PCMD_SHIFT (0x0ULL + I40E_TXD_FLTR_QW1_CMD_SHIFT)
+#define I40E_TXD_FLTR_QW1_PCMD_MASK (0x7ULL << I40E_TXD_FLTR_QW1_PCMD_SHIFT)
+
+enum i40e_filter_program_desc_pcmd {
+ I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE = 0x1,
+ I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE = 0x2,
+};
+
+#define I40E_TXD_FLTR_QW1_DEST_SHIFT (0x3ULL + I40E_TXD_FLTR_QW1_CMD_SHIFT)
+#define I40E_TXD_FLTR_QW1_DEST_MASK (0x3ULL << I40E_TXD_FLTR_QW1_DEST_SHIFT)
+
+#define I40E_TXD_FLTR_QW1_CNT_ENA_SHIFT (0x7ULL + I40E_TXD_FLTR_QW1_CMD_SHIFT)
+#define I40E_TXD_FLTR_QW1_CNT_ENA_MASK BIT_ULL(I40E_TXD_FLTR_QW1_CNT_ENA_SHIFT)
+
+#define I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT (0x9ULL + \
+ I40E_TXD_FLTR_QW1_CMD_SHIFT)
+#define I40E_TXD_FLTR_QW1_FD_STATUS_MASK (0x3ULL << \
+ I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT)
+#ifdef X722_SUPPORT
+
+#define I40E_TXD_FLTR_QW1_ATR_SHIFT (0xEULL + \
+ I40E_TXD_FLTR_QW1_CMD_SHIFT)
+#define I40E_TXD_FLTR_QW1_ATR_MASK BIT_ULL(I40E_TXD_FLTR_QW1_ATR_SHIFT)
+#endif
+
+#define I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT 20
+#define I40E_TXD_FLTR_QW1_CNTINDEX_MASK (0x1FFUL << \
+ I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT)
+
+enum i40e_filter_type {
+ I40E_FLOW_DIRECTOR_FLTR = 0,
+ I40E_PE_QUAD_HASH_FLTR = 1,
+ I40E_ETHERTYPE_FLTR,
+ I40E_FCOE_CTX_FLTR,
+ I40E_MAC_VLAN_FLTR,
+ I40E_HASH_FLTR
+};
+
+struct i40e_vsi_context {
+ u16 seid;
+ u16 uplink_seid;
+ u16 vsi_number;
+ u16 vsis_allocated;
+ u16 vsis_unallocated;
+ u16 flags;
+ u8 pf_num;
+ u8 vf_num;
+ u8 connection_type;
+ struct i40e_aqc_vsi_properties_data info;
+};
+
+struct i40e_veb_context {
+ u16 seid;
+ u16 uplink_seid;
+ u16 veb_number;
+ u16 vebs_allocated;
+ u16 vebs_unallocated;
+ u16 flags;
+ struct i40e_aqc_get_veb_parameters_completion info;
+};
+
+/* Statistics collected by each port, VSI, VEB, and S-channel */
+struct i40e_eth_stats {
+ u64 rx_bytes; /* gorc */
+ u64 rx_unicast; /* uprc */
+ u64 rx_multicast; /* mprc */
+ u64 rx_broadcast; /* bprc */
+ u64 rx_discards; /* rdpc */
+ u64 rx_unknown_protocol; /* rupp */
+ u64 tx_bytes; /* gotc */
+ u64 tx_unicast; /* uptc */
+ u64 tx_multicast; /* mptc */
+ u64 tx_broadcast; /* bptc */
+ u64 tx_discards; /* tdpc */
+ u64 tx_errors; /* tepc */
+};
+
+/* Statistics collected per VEB per TC */
+struct i40e_veb_tc_stats {
+ u64 tc_rx_packets[I40E_MAX_TRAFFIC_CLASS];
+ u64 tc_rx_bytes[I40E_MAX_TRAFFIC_CLASS];
+ u64 tc_tx_packets[I40E_MAX_TRAFFIC_CLASS];
+ u64 tc_tx_bytes[I40E_MAX_TRAFFIC_CLASS];
+};
+
+/* Statistics collected by the MAC */
+struct i40e_hw_port_stats {
+ /* eth stats collected by the port */
+ struct i40e_eth_stats eth;
+
+ /* additional port specific stats */
+ u64 tx_dropped_link_down; /* tdold */
+ u64 crc_errors; /* crcerrs */
+ u64 illegal_bytes; /* illerrc */
+ u64 error_bytes; /* errbc */
+ u64 mac_local_faults; /* mlfc */
+ u64 mac_remote_faults; /* mrfc */
+ u64 rx_length_errors; /* rlec */
+ u64 link_xon_rx; /* lxonrxc */
+ u64 link_xoff_rx; /* lxoffrxc */
+ u64 priority_xon_rx[8]; /* pxonrxc[8] */
+ u64 priority_xoff_rx[8]; /* pxoffrxc[8] */
+ u64 link_xon_tx; /* lxontxc */
+ u64 link_xoff_tx; /* lxofftxc */
+ u64 priority_xon_tx[8]; /* pxontxc[8] */
+ u64 priority_xoff_tx[8]; /* pxofftxc[8] */
+ u64 priority_xon_2_xoff[8]; /* rxon2offcnt[8] */
+ u64 rx_size_64; /* prc64 */
+ u64 rx_size_127; /* prc127 */
+ u64 rx_size_255; /* prc255 */
+ u64 rx_size_511; /* prc511 */
+ u64 rx_size_1023; /* prc1023 */
+ u64 rx_size_1522; /* prc1522 */
+ u64 rx_size_big; /* prc9522 */
+ u64 rx_undersize; /* ruc */
+ u64 rx_fragments; /* rfc */
+ u64 rx_oversize; /* roc */
+ u64 rx_jabber; /* rjc */
+ u64 tx_size_64; /* ptc64 */
+ u64 tx_size_127; /* ptc127 */
+ u64 tx_size_255; /* ptc255 */
+ u64 tx_size_511; /* ptc511 */
+ u64 tx_size_1023; /* ptc1023 */
+ u64 tx_size_1522; /* ptc1522 */
+ u64 tx_size_big; /* ptc9522 */
+ u64 mac_short_packet_dropped; /* mspdc */
+ u64 checksum_error; /* xec */
+ /* flow director stats */
+ u64 fd_atr_match;
+ u64 fd_sb_match;
+ u64 fd_atr_tunnel_match;
+ u32 fd_atr_status;
+ u32 fd_sb_status;
+ /* EEE LPI */
+ u32 tx_lpi_status;
+ u32 rx_lpi_status;
+ u64 tx_lpi_count; /* etlpic */
+ u64 rx_lpi_count; /* erlpic */
+};
+
+/* Checksum and Shadow RAM pointers */
+#define I40E_SR_NVM_CONTROL_WORD 0x00
+#define I40E_SR_PCIE_ANALOG_CONFIG_PTR 0x03
+#define I40E_SR_PHY_ANALOG_CONFIG_PTR 0x04
+#define I40E_SR_OPTION_ROM_PTR 0x05
+#define I40E_SR_RO_PCIR_REGS_AUTO_LOAD_PTR 0x06
+#define I40E_SR_AUTO_GENERATED_POINTERS_PTR 0x07
+#define I40E_SR_PCIR_REGS_AUTO_LOAD_PTR 0x08
+#define I40E_SR_EMP_GLOBAL_MODULE_PTR 0x09
+#define I40E_SR_RO_PCIE_LCB_PTR 0x0A
+#define I40E_SR_EMP_IMAGE_PTR 0x0B
+#define I40E_SR_PE_IMAGE_PTR 0x0C
+#define I40E_SR_CSR_PROTECTED_LIST_PTR 0x0D
+#define I40E_SR_MNG_CONFIG_PTR 0x0E
+#define I40E_SR_EMP_MODULE_PTR 0x0F
+#define I40E_SR_PBA_FLAGS 0x15
+#define I40E_SR_PBA_BLOCK_PTR 0x16
+#define I40E_SR_BOOT_CONFIG_PTR 0x17
+#define I40E_NVM_OEM_VER_OFF 0x83
+#define I40E_SR_NVM_DEV_STARTER_VERSION 0x18
+#define I40E_SR_NVM_WAKE_ON_LAN 0x19
+#define I40E_SR_ALTERNATE_SAN_MAC_ADDRESS_PTR 0x27
+#define I40E_SR_PERMANENT_SAN_MAC_ADDRESS_PTR 0x28
+#define I40E_SR_NVM_MAP_VERSION 0x29
+#define I40E_SR_NVM_IMAGE_VERSION 0x2A
+#define I40E_SR_NVM_STRUCTURE_VERSION 0x2B
+#define I40E_SR_NVM_EETRACK_LO 0x2D
+#define I40E_SR_NVM_EETRACK_HI 0x2E
+#define I40E_SR_VPD_PTR 0x2F
+#define I40E_SR_PXE_SETUP_PTR 0x30
+#define I40E_SR_PXE_CONFIG_CUST_OPTIONS_PTR 0x31
+#define I40E_SR_NVM_ORIGINAL_EETRACK_LO 0x34
+#define I40E_SR_NVM_ORIGINAL_EETRACK_HI 0x35
+#define I40E_SR_SW_ETHERNET_MAC_ADDRESS_PTR 0x37
+#define I40E_SR_POR_REGS_AUTO_LOAD_PTR 0x38
+#define I40E_SR_EMPR_REGS_AUTO_LOAD_PTR 0x3A
+#define I40E_SR_GLOBR_REGS_AUTO_LOAD_PTR 0x3B
+#define I40E_SR_CORER_REGS_AUTO_LOAD_PTR 0x3C
+#define I40E_SR_PCIE_ALT_AUTO_LOAD_PTR 0x3E
+#define I40E_SR_SW_CHECKSUM_WORD 0x3F
+#define I40E_SR_1ST_FREE_PROVISION_AREA_PTR 0x40
+#define I40E_SR_4TH_FREE_PROVISION_AREA_PTR 0x42
+#define I40E_SR_3RD_FREE_PROVISION_AREA_PTR 0x44
+#define I40E_SR_2ND_FREE_PROVISION_AREA_PTR 0x46
+#define I40E_SR_EMP_SR_SETTINGS_PTR 0x48
+#define I40E_SR_FEATURE_CONFIGURATION_PTR 0x49
+#define I40E_SR_CONFIGURATION_METADATA_PTR 0x4D
+#define I40E_SR_IMMEDIATE_VALUES_PTR 0x4E
+
+/* Auxiliary field, mask and shift definition for Shadow RAM and NVM Flash */
+#define I40E_SR_VPD_MODULE_MAX_SIZE 1024
+#define I40E_SR_PCIE_ALT_MODULE_MAX_SIZE 1024
+#define I40E_SR_CONTROL_WORD_1_SHIFT 0x06
+#define I40E_SR_CONTROL_WORD_1_MASK (0x03 << I40E_SR_CONTROL_WORD_1_SHIFT)
+
+/* Shadow RAM related */
+#define I40E_SR_SECTOR_SIZE_IN_WORDS 0x800
+#define I40E_SR_BUF_ALIGNMENT 4096
+#define I40E_SR_WORDS_IN_1KB 512
+/* Checksum should be calculated such that after adding all the words,
+ * including the checksum word itself, the sum should be 0xBABA.
+ */
+#define I40E_SR_SW_CHECKSUM_BASE 0xBABA
+
+#define I40E_SRRD_SRCTL_ATTEMPTS 100000
+
+enum i40e_switch_element_types {
+ I40E_SWITCH_ELEMENT_TYPE_MAC = 1,
+ I40E_SWITCH_ELEMENT_TYPE_PF = 2,
+ I40E_SWITCH_ELEMENT_TYPE_VF = 3,
+ I40E_SWITCH_ELEMENT_TYPE_EMP = 4,
+ I40E_SWITCH_ELEMENT_TYPE_BMC = 6,
+ I40E_SWITCH_ELEMENT_TYPE_PE = 16,
+ I40E_SWITCH_ELEMENT_TYPE_VEB = 17,
+ I40E_SWITCH_ELEMENT_TYPE_PA = 18,
+ I40E_SWITCH_ELEMENT_TYPE_VSI = 19,
+};
+
+/* Supported EtherType filters */
+enum i40e_ether_type_index {
+ I40E_ETHER_TYPE_1588 = 0,
+ I40E_ETHER_TYPE_FIP = 1,
+ I40E_ETHER_TYPE_OUI_EXTENDED = 2,
+ I40E_ETHER_TYPE_MAC_CONTROL = 3,
+ I40E_ETHER_TYPE_LLDP = 4,
+ I40E_ETHER_TYPE_EVB_PROTOCOL1 = 5,
+ I40E_ETHER_TYPE_EVB_PROTOCOL2 = 6,
+ I40E_ETHER_TYPE_QCN_CNM = 7,
+ I40E_ETHER_TYPE_8021X = 8,
+ I40E_ETHER_TYPE_ARP = 9,
+ I40E_ETHER_TYPE_RSV1 = 10,
+ I40E_ETHER_TYPE_RSV2 = 11,
+};
+
+/* Filter context base size is 1K */
+#define I40E_HASH_FILTER_BASE_SIZE 1024
+/* Supported Hash filter values */
+enum i40e_hash_filter_size {
+ I40E_HASH_FILTER_SIZE_1K = 0,
+ I40E_HASH_FILTER_SIZE_2K = 1,
+ I40E_HASH_FILTER_SIZE_4K = 2,
+ I40E_HASH_FILTER_SIZE_8K = 3,
+ I40E_HASH_FILTER_SIZE_16K = 4,
+ I40E_HASH_FILTER_SIZE_32K = 5,
+ I40E_HASH_FILTER_SIZE_64K = 6,
+ I40E_HASH_FILTER_SIZE_128K = 7,
+ I40E_HASH_FILTER_SIZE_256K = 8,
+ I40E_HASH_FILTER_SIZE_512K = 9,
+ I40E_HASH_FILTER_SIZE_1M = 10,
+};
+
+/* DMA context base size is 0.5K */
+#define I40E_DMA_CNTX_BASE_SIZE 512
+/* Supported DMA context values */
+enum i40e_dma_cntx_size {
+ I40E_DMA_CNTX_SIZE_512 = 0,
+ I40E_DMA_CNTX_SIZE_1K = 1,
+ I40E_DMA_CNTX_SIZE_2K = 2,
+ I40E_DMA_CNTX_SIZE_4K = 3,
+ I40E_DMA_CNTX_SIZE_8K = 4,
+ I40E_DMA_CNTX_SIZE_16K = 5,
+ I40E_DMA_CNTX_SIZE_32K = 6,
+ I40E_DMA_CNTX_SIZE_64K = 7,
+ I40E_DMA_CNTX_SIZE_128K = 8,
+ I40E_DMA_CNTX_SIZE_256K = 9,
+};
+
+/* Supported Hash look up table (LUT) sizes */
+enum i40e_hash_lut_size {
+ I40E_HASH_LUT_SIZE_128 = 0,
+ I40E_HASH_LUT_SIZE_512 = 1,
+};
+
+/* Structure to hold a per PF filter control settings */
+struct i40e_filter_control_settings {
+ /* number of PE Quad Hash filter buckets */
+ enum i40e_hash_filter_size pe_filt_num;
+ /* number of PE Quad Hash contexts */
+ enum i40e_dma_cntx_size pe_cntx_num;
+ /* number of FCoE filter buckets */
+ enum i40e_hash_filter_size fcoe_filt_num;
+ /* number of FCoE DDP contexts */
+ enum i40e_dma_cntx_size fcoe_cntx_num;
+ /* size of the Hash LUT */
+ enum i40e_hash_lut_size hash_lut_size;
+ /* enable FDIR filters for PF and its VFs */
+ bool enable_fdir;
+ /* enable Ethertype filters for PF and its VFs */
+ bool enable_ethtype;
+ /* enable MAC/VLAN filters for PF and its VFs */
+ bool enable_macvlan;
+};
+
+/* Structure to hold device level control filter counts */
+struct i40e_control_filter_stats {
+ u16 mac_etype_used; /* Used perfect match MAC/EtherType filters */
+ u16 etype_used; /* Used perfect EtherType filters */
+ u16 mac_etype_free; /* Un-used perfect match MAC/EtherType filters */
+ u16 etype_free; /* Un-used perfect EtherType filters */
+};
+
+enum i40e_reset_type {
+ I40E_RESET_POR = 0,
+ I40E_RESET_CORER = 1,
+ I40E_RESET_GLOBR = 2,
+ I40E_RESET_EMPR = 3,
+};
+
+/* IEEE 802.1AB LLDP Agent Variables from NVM */
+#define I40E_NVM_LLDP_CFG_PTR 0xD
+struct i40e_lldp_variables {
+ u16 length;
+ u16 adminstatus;
+ u16 msgfasttx;
+ u16 msgtxinterval;
+ u16 txparams;
+ u16 timers;
+ u16 crc8;
+};
+
+/* Offsets into Alternate Ram */
+#define I40E_ALT_STRUCT_FIRST_PF_OFFSET 0 /* in dwords */
+#define I40E_ALT_STRUCT_DWORDS_PER_PF 64 /* in dwords */
+#define I40E_ALT_STRUCT_OUTER_VLAN_TAG_OFFSET 0xD /* in dwords */
+#define I40E_ALT_STRUCT_USER_PRIORITY_OFFSET 0xC /* in dwords */
+#define I40E_ALT_STRUCT_MIN_BW_OFFSET 0xE /* in dwords */
+#define I40E_ALT_STRUCT_MAX_BW_OFFSET 0xF /* in dwords */
+
+/* Alternate Ram Bandwidth Masks */
+#define I40E_ALT_BW_VALUE_MASK 0xFF
+#define I40E_ALT_BW_RELATIVE_MASK 0x40000000
+#define I40E_ALT_BW_VALID_MASK 0x80000000
+
+/* RSS Hash Table Size */
+#define I40E_PFQF_CTL_0_HASHLUTSIZE_512 0x00010000
+
+/* PBA length (and one with additional zero-padding byte), see Table 6-2. */
+#define I40E_PBANUM_LENGTH 12
+#define I40E_PBANUM_STRLEN 13
+
+#endif /* _I40E_TYPE_H_ */
diff --git a/usr/src/uts/common/io/i40e/core/i40e_virtchnl.h b/usr/src/uts/common/io/i40e/core/i40e_virtchnl.h
new file mode 100644
index 0000000000..17b090f454
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/core/i40e_virtchnl.h
@@ -0,0 +1,378 @@
+/******************************************************************************
+
+ Copyright (c) 2013-2015, Intel Corporation
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+
+******************************************************************************/
+/*$FreeBSD: head/sys/dev/ixl/i40e_virtchnl.h 284049 2015-06-05 22:52:42Z jfv $*/
+
+#ifndef _I40E_VIRTCHNL_H_
+#define _I40E_VIRTCHNL_H_
+
+#include "i40e_type.h"
+
+/* Description:
+ * This header file describes the VF-PF communication protocol used
+ * by the various i40e drivers.
+ *
+ * Admin queue buffer usage:
+ * desc->opcode is always i40e_aqc_opc_send_msg_to_pf
+ * flags, retval, datalen, and data addr are all used normally.
+ * Firmware copies the cookie fields when sending messages between the PF and
+ * VF, but uses all other fields internally. Due to this limitation, we
+ * must send all messages as "indirect", i.e. using an external buffer.
+ *
+ * All the vsi indexes are relative to the VF. Each VF can have maximum of
+ * three VSIs. All the queue indexes are relative to the VSI. Each VF can
+ * have a maximum of sixteen queues for all of its VSIs.
+ *
+ * The PF is required to return a status code in v_retval for all messages
+ * except RESET_VF, which does not require any response. The return value is of
+ * i40e_status_code type, defined in the i40e_type.h.
+ *
+ * In general, VF driver initialization should roughly follow the order of these
+ * opcodes. The VF driver must first validate the API version of the PF driver,
+ * then request a reset, then get resources, then configure queues and
+ * interrupts. After these operations are complete, the VF driver may start
+ * its queues, optionally add MAC and VLAN filters, and process traffic.
+ */
+
+/* Opcodes for VF-PF communication. These are placed in the v_opcode field
+ * of the virtchnl_msg structure.
+ */
+enum i40e_virtchnl_ops {
+/* The PF sends status change events to VFs using
+ * the I40E_VIRTCHNL_OP_EVENT opcode.
+ * VFs send requests to the PF using the other ops.
+ */
+ I40E_VIRTCHNL_OP_UNKNOWN = 0,
+ I40E_VIRTCHNL_OP_VERSION = 1, /* must ALWAYS be 1 */
+ I40E_VIRTCHNL_OP_RESET_VF = 2,
+ I40E_VIRTCHNL_OP_GET_VF_RESOURCES = 3,
+ I40E_VIRTCHNL_OP_CONFIG_TX_QUEUE = 4,
+ I40E_VIRTCHNL_OP_CONFIG_RX_QUEUE = 5,
+ I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES = 6,
+ I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP = 7,
+ I40E_VIRTCHNL_OP_ENABLE_QUEUES = 8,
+ I40E_VIRTCHNL_OP_DISABLE_QUEUES = 9,
+ I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS = 10,
+ I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS = 11,
+ I40E_VIRTCHNL_OP_ADD_VLAN = 12,
+ I40E_VIRTCHNL_OP_DEL_VLAN = 13,
+ I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE = 14,
+ I40E_VIRTCHNL_OP_GET_STATS = 15,
+ I40E_VIRTCHNL_OP_FCOE = 16,
+ I40E_VIRTCHNL_OP_EVENT = 17,
+};
+
+/* Virtual channel message descriptor. This overlays the admin queue
+ * descriptor. All other data is passed in external buffers.
+ */
+
+struct i40e_virtchnl_msg {
+ u8 pad[8]; /* AQ flags/opcode/len/retval fields */
+ enum i40e_virtchnl_ops v_opcode; /* avoid confusion with desc->opcode */
+ enum i40e_status_code v_retval; /* ditto for desc->retval */
+ u32 vfid; /* used by PF when sending to VF */
+};
+
+/* Message descriptions and data structures.*/
+
+/* I40E_VIRTCHNL_OP_VERSION
+ * VF posts its version number to the PF. PF responds with its version number
+ * in the same format, along with a return code.
+ * Reply from PF has its major/minor versions also in param0 and param1.
+ * If there is a major version mismatch, then the VF cannot operate.
+ * If there is a minor version mismatch, then the VF can operate but should
+ * add a warning to the system log.
+ *
+ * This enum element MUST always be specified as == 1, regardless of other
+ * changes in the API. The PF must always respond to this message without
+ * error regardless of version mismatch.
+ */
+#define I40E_VIRTCHNL_VERSION_MAJOR 1
+#define I40E_VIRTCHNL_VERSION_MINOR 1
+#define I40E_VIRTCHNL_VERSION_MINOR_NO_VF_CAPS 0
+
+struct i40e_virtchnl_version_info {
+ u32 major;
+ u32 minor;
+};
+
+/* I40E_VIRTCHNL_OP_RESET_VF
+ * VF sends this request to PF with no parameters
+ * PF does NOT respond! VF driver must delay then poll VFGEN_RSTAT register
+ * until reset completion is indicated. The admin queue must be reinitialized
+ * after this operation.
+ *
+ * When reset is complete, PF must ensure that all queues in all VSIs associated
+ * with the VF are stopped, all queue configurations in the HMC are set to 0,
+ * and all MAC and VLAN filters (except the default MAC address) on all VSIs
+ * are cleared.
+ */
+
+/* I40E_VIRTCHNL_OP_GET_VF_RESOURCES
+ * Version 1.0 VF sends this request to PF with no parameters
+ * Version 1.1 VF sends this request to PF with u32 bitmap of its capabilities
+ * PF responds with an indirect message containing
+ * i40e_virtchnl_vf_resource and one or more
+ * i40e_virtchnl_vsi_resource structures.
+ */
+
+struct i40e_virtchnl_vsi_resource {
+ u16 vsi_id;
+ u16 num_queue_pairs;
+ enum i40e_vsi_type vsi_type;
+ u16 qset_handle;
+ u8 default_mac_addr[I40E_ETH_LENGTH_OF_ADDRESS];
+};
+/* VF offload flags */
+#define I40E_VIRTCHNL_VF_OFFLOAD_L2 0x00000001
+#define I40E_VIRTCHNL_VF_OFFLOAD_IWARP 0x00000002
+#define I40E_VIRTCHNL_VF_OFFLOAD_FCOE 0x00000004
+#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_AQ 0x00000008
+#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_REG 0x00000010
+#define I40E_VIRTCHNL_VF_OFFLOAD_VLAN 0x00010000
+#define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING 0x00020000
+
+struct i40e_virtchnl_vf_resource {
+ u16 num_vsis;
+ u16 num_queue_pairs;
+ u16 max_vectors;
+ u16 max_mtu;
+
+ u32 vf_offload_flags;
+ u32 max_fcoe_contexts;
+ u32 max_fcoe_filters;
+
+ struct i40e_virtchnl_vsi_resource vsi_res[1];
+};
+
+/* I40E_VIRTCHNL_OP_CONFIG_TX_QUEUE
+ * VF sends this message to set up parameters for one TX queue.
+ * External data buffer contains one instance of i40e_virtchnl_txq_info.
+ * PF configures requested queue and returns a status code.
+ */
+
+/* Tx queue config info */
+struct i40e_virtchnl_txq_info {
+ u16 vsi_id;
+ u16 queue_id;
+ u16 ring_len; /* number of descriptors, multiple of 8 */
+ u16 headwb_enabled;
+ u64 dma_ring_addr;
+ u64 dma_headwb_addr;
+};
+
+/* I40E_VIRTCHNL_OP_CONFIG_RX_QUEUE
+ * VF sends this message to set up parameters for one RX queue.
+ * External data buffer contains one instance of i40e_virtchnl_rxq_info.
+ * PF configures requested queue and returns a status code.
+ */
+
+/* Rx queue config info */
+struct i40e_virtchnl_rxq_info {
+ u16 vsi_id;
+ u16 queue_id;
+ u32 ring_len; /* number of descriptors, multiple of 32 */
+ u16 hdr_size;
+ u16 splithdr_enabled;
+ u32 databuffer_size;
+ u32 max_pkt_size;
+ u64 dma_ring_addr;
+ enum i40e_hmc_obj_rx_hsplit_0 rx_split_pos;
+};
+
+/* I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES
+ * VF sends this message to set parameters for all active TX and RX queues
+ * associated with the specified VSI.
+ * PF configures queues and returns status.
+ * If the number of queues specified is greater than the number of queues
+ * associated with the VSI, an error is returned and no queues are configured.
+ */
+struct i40e_virtchnl_queue_pair_info {
+ /* NOTE: vsi_id and queue_id should be identical for both queues. */
+ struct i40e_virtchnl_txq_info txq;
+ struct i40e_virtchnl_rxq_info rxq;
+};
+
+struct i40e_virtchnl_vsi_queue_config_info {
+ u16 vsi_id;
+ u16 num_queue_pairs;
+ struct i40e_virtchnl_queue_pair_info qpair[1];
+};
+
+/* I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP
+ * VF uses this message to map vectors to queues.
+ * The rxq_map and txq_map fields are bitmaps used to indicate which queues
+ * are to be associated with the specified vector.
+ * The "other" causes are always mapped to vector 0.
+ * PF configures interrupt mapping and returns status.
+ */
+struct i40e_virtchnl_vector_map {
+ u16 vsi_id;
+ u16 vector_id;
+ u16 rxq_map;
+ u16 txq_map;
+ u16 rxitr_idx;
+ u16 txitr_idx;
+};
+
+struct i40e_virtchnl_irq_map_info {
+ u16 num_vectors;
+ struct i40e_virtchnl_vector_map vecmap[1];
+};
+
+/* I40E_VIRTCHNL_OP_ENABLE_QUEUES
+ * I40E_VIRTCHNL_OP_DISABLE_QUEUES
+ * VF sends these message to enable or disable TX/RX queue pairs.
+ * The queues fields are bitmaps indicating which queues to act upon.
+ * (Currently, we only support 16 queues per VF, but we make the field
+ * u32 to allow for expansion.)
+ * PF performs requested action and returns status.
+ */
+struct i40e_virtchnl_queue_select {
+ u16 vsi_id;
+ u16 pad;
+ u32 rx_queues;
+ u32 tx_queues;
+};
+
+/* I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS
+ * VF sends this message in order to add one or more unicast or multicast
+ * address filters for the specified VSI.
+ * PF adds the filters and returns status.
+ */
+
+/* I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS
+ * VF sends this message in order to remove one or more unicast or multicast
+ * filters for the specified VSI.
+ * PF removes the filters and returns status.
+ */
+
+struct i40e_virtchnl_ether_addr {
+ u8 addr[I40E_ETH_LENGTH_OF_ADDRESS];
+ u8 pad[2];
+};
+
+struct i40e_virtchnl_ether_addr_list {
+ u16 vsi_id;
+ u16 num_elements;
+ struct i40e_virtchnl_ether_addr list[1];
+};
+
+/* I40E_VIRTCHNL_OP_ADD_VLAN
+ * VF sends this message to add one or more VLAN tag filters for receives.
+ * PF adds the filters and returns status.
+ * If a port VLAN is configured by the PF, this operation will return an
+ * error to the VF.
+ */
+
+/* I40E_VIRTCHNL_OP_DEL_VLAN
+ * VF sends this message to remove one or more VLAN tag filters for receives.
+ * PF removes the filters and returns status.
+ * If a port VLAN is configured by the PF, this operation will return an
+ * error to the VF.
+ */
+
+struct i40e_virtchnl_vlan_filter_list {
+ u16 vsi_id;
+ u16 num_elements;
+ u16 vlan_id[1];
+};
+
+/* I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE
+ * VF sends VSI id and flags.
+ * PF returns status code in retval.
+ * Note: we assume that broadcast accept mode is always enabled.
+ */
+struct i40e_virtchnl_promisc_info {
+ u16 vsi_id;
+ u16 flags;
+};
+
+#define I40E_FLAG_VF_UNICAST_PROMISC 0x00000001
+#define I40E_FLAG_VF_MULTICAST_PROMISC 0x00000002
+
+/* I40E_VIRTCHNL_OP_GET_STATS
+ * VF sends this message to request stats for the selected VSI. VF uses
+ * the i40e_virtchnl_queue_select struct to specify the VSI. The queue_id
+ * field is ignored by the PF.
+ *
+ * PF replies with struct i40e_eth_stats in an external buffer.
+ */
+
+/* I40E_VIRTCHNL_OP_EVENT
+ * PF sends this message to inform the VF driver of events that may affect it.
+ * No direct response is expected from the VF, though it may generate other
+ * messages in response to this one.
+ */
+enum i40e_virtchnl_event_codes {
+ I40E_VIRTCHNL_EVENT_UNKNOWN = 0,
+ I40E_VIRTCHNL_EVENT_LINK_CHANGE,
+ I40E_VIRTCHNL_EVENT_RESET_IMPENDING,
+ I40E_VIRTCHNL_EVENT_PF_DRIVER_CLOSE,
+};
+#define I40E_PF_EVENT_SEVERITY_INFO 0
+#define I40E_PF_EVENT_SEVERITY_ATTENTION 1
+#define I40E_PF_EVENT_SEVERITY_ACTION_REQUIRED 2
+#define I40E_PF_EVENT_SEVERITY_CERTAIN_DOOM 255
+
+struct i40e_virtchnl_pf_event {
+ enum i40e_virtchnl_event_codes event;
+ union {
+ struct {
+ enum i40e_aq_link_speed link_speed;
+ bool link_status;
+ } link_event;
+ } event_data;
+
+ int severity;
+};
+
+/* VF reset states - these are written into the RSTAT register:
+ * I40E_VFGEN_RSTAT1 on the PF
+ * I40E_VFGEN_RSTAT on the VF
+ * When the PF initiates a reset, it writes 0
+ * When the reset is complete, it writes 1
+ * When the PF detects that the VF has recovered, it writes 2
+ * VF checks this register periodically to determine if a reset has occurred,
+ * then polls it to know when the reset is complete.
+ * If either the PF or VF reads the register while the hardware
+ * is in a reset state, it will return DEADBEEF, which, when masked
+ * will result in 3.
+ */
+enum i40e_vfr_states {
+ I40E_VFR_INPROGRESS = 0,
+ I40E_VFR_COMPLETED,
+ I40E_VFR_VFACTIVE,
+ I40E_VFR_UNKNOWN,
+};
+
+#endif /* _I40E_VIRTCHNL_H_ */
diff --git a/usr/src/uts/common/io/i40e/i40e.conf b/usr/src/uts/common/io/i40e/i40e.conf
new file mode 100644
index 0000000000..b4c3459931
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/i40e.conf
@@ -0,0 +1,19 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2016 Joyent, Inc.
+#
+
+#
+# Driver.conf file for Intel XL710 PCIe NIC Driver (i40e)
+# See i40e(7D) for valid options.
+#
diff --git a/usr/src/uts/common/io/i40e/i40e_gld.c b/usr/src/uts/common/io/i40e/i40e_gld.c
new file mode 100644
index 0000000000..0af4c4c71f
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/i40e_gld.c
@@ -0,0 +1,1097 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
+ */
+
+/*
+ * For more information, please see the big theory statement in i40e_main.c.
+ */
+
+#include "i40e_sw.h"
+
+#define I40E_PROP_RX_DMA_THRESH "_rx_dma_threshold"
+#define I40E_PROP_TX_DMA_THRESH "_tx_dma_threshold"
+#define I40E_PROP_RX_ITR "_rx_intr_throttle"
+#define I40E_PROP_TX_ITR "_tx_intr_throttle"
+#define I40E_PROP_OTHER_ITR "_other_intr_throttle"
+
+char *i40e_priv_props[] = {
+ I40E_PROP_RX_DMA_THRESH,
+ I40E_PROP_TX_DMA_THRESH,
+ I40E_PROP_RX_ITR,
+ I40E_PROP_TX_ITR,
+ I40E_PROP_OTHER_ITR,
+ NULL
+};
+
+static int
+i40e_group_remove_mac(void *arg, const uint8_t *mac_addr)
+{
+ i40e_t *i40e = arg;
+ struct i40e_aqc_remove_macvlan_element_data filt;
+ struct i40e_hw *hw = &i40e->i40e_hw_space;
+ int ret, i, last;
+ i40e_uaddr_t *iua;
+
+ if (I40E_IS_MULTICAST(mac_addr))
+ return (EINVAL);
+
+ mutex_enter(&i40e->i40e_general_lock);
+
+ if (i40e->i40e_state & I40E_SUSPENDED) {
+ ret = ECANCELED;
+ goto done;
+ }
+
+ for (i = 0; i < i40e->i40e_resources.ifr_nmacfilt_used; i++) {
+ if (bcmp(mac_addr, i40e->i40e_uaddrs[i].iua_mac,
+ ETHERADDRL) == 0)
+ break;
+ }
+
+ if (i == i40e->i40e_resources.ifr_nmacfilt_used) {
+ ret = ENOENT;
+ goto done;
+ }
+
+ iua = &i40e->i40e_uaddrs[i];
+ ASSERT(i40e->i40e_resources.ifr_nmacfilt_used > 0);
+
+ bzero(&filt, sizeof (filt));
+ bcopy(mac_addr, filt.mac_addr, ETHERADDRL);
+ filt.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH |
+ I40E_AQC_MACVLAN_DEL_IGNORE_VLAN;
+
+ if (i40e_aq_remove_macvlan(hw, iua->iua_vsi, &filt, 1, NULL) !=
+ I40E_SUCCESS) {
+ i40e_error(i40e, "failed to remove mac address "
+ "%2x:%2x:%2x:%2x:%2x:%2x from unicast filter: %d",
+ mac_addr[0], mac_addr[1], mac_addr[2], mac_addr[3],
+ mac_addr[4], mac_addr[5], filt.error_code);
+ ret = EIO;
+ goto done;
+ }
+
+ last = i40e->i40e_resources.ifr_nmacfilt_used - 1;
+ if (i != last) {
+ i40e_uaddr_t *src = &i40e->i40e_uaddrs[last];
+ bcopy(src, iua, sizeof (i40e_uaddr_t));
+ }
+
+ /*
+ * Set the multicast bit in the last one to indicate to ourselves that
+ * it's invalid.
+ */
+ bzero(&i40e->i40e_uaddrs[last], sizeof (i40e_uaddr_t));
+ i40e->i40e_uaddrs[last].iua_mac[0] = 0x01;
+ i40e->i40e_resources.ifr_nmacfilt_used--;
+ ret = 0;
+done:
+ mutex_exit(&i40e->i40e_general_lock);
+
+ return (ret);
+}
+
+static int
+i40e_group_add_mac(void *arg, const uint8_t *mac_addr)
+{
+ i40e_t *i40e = arg;
+ struct i40e_hw *hw = &i40e->i40e_hw_space;
+ int i, ret;
+ i40e_uaddr_t *iua;
+ struct i40e_aqc_add_macvlan_element_data filt;
+
+ if (I40E_IS_MULTICAST(mac_addr))
+ return (EINVAL);
+
+ mutex_enter(&i40e->i40e_general_lock);
+ if (i40e->i40e_state & I40E_SUSPENDED) {
+ ret = ECANCELED;
+ goto done;
+ }
+
+ if (i40e->i40e_resources.ifr_nmacfilt ==
+ i40e->i40e_resources.ifr_nmacfilt_used) {
+ ret = ENOSPC;
+ goto done;
+ }
+
+ for (i = 0; i < i40e->i40e_resources.ifr_nmacfilt_used; i++) {
+ if (bcmp(mac_addr, i40e->i40e_uaddrs[i].iua_mac,
+ ETHERADDRL) == 0) {
+ ret = EEXIST;
+ goto done;
+ }
+ }
+
+ /*
+ * Note, the general use of the i40e_vsi_id will have to be refactored
+ * when we have proper group support.
+ */
+ bzero(&filt, sizeof (filt));
+ bcopy(mac_addr, filt.mac_addr, ETHERADDRL);
+ filt.flags = I40E_AQC_MACVLAN_ADD_PERFECT_MATCH |
+ I40E_AQC_MACVLAN_ADD_IGNORE_VLAN;
+
+ if ((ret = i40e_aq_add_macvlan(hw, i40e->i40e_vsi_id, &filt, 1,
+ NULL)) != I40E_SUCCESS) {
+ i40e_error(i40e, "failed to add mac address "
+ "%2x:%2x:%2x:%2x:%2x:%2x to unicast filter: %d",
+ mac_addr[0], mac_addr[1], mac_addr[2], mac_addr[3],
+ mac_addr[4], mac_addr[5], ret);
+ ret = EIO;
+ goto done;
+ }
+
+ iua = &i40e->i40e_uaddrs[i40e->i40e_resources.ifr_nmacfilt_used];
+ bcopy(mac_addr, iua->iua_mac, ETHERADDRL);
+ iua->iua_vsi = i40e->i40e_vsi_id;
+ i40e->i40e_resources.ifr_nmacfilt_used++;
+ ASSERT(i40e->i40e_resources.ifr_nmacfilt_used <=
+ i40e->i40e_resources.ifr_nmacfilt);
+ ret = 0;
+done:
+ mutex_exit(&i40e->i40e_general_lock);
+ return (ret);
+}
+
+static int
+i40e_m_start(void *arg)
+{
+ i40e_t *i40e = arg;
+ int rc = 0;
+
+ mutex_enter(&i40e->i40e_general_lock);
+ if (i40e->i40e_state & I40E_SUSPENDED) {
+ rc = ECANCELED;
+ goto done;
+ }
+
+ if (!i40e_start(i40e, B_TRUE)) {
+ rc = EIO;
+ goto done;
+ }
+
+ atomic_or_32(&i40e->i40e_state, I40E_STARTED);
+done:
+ mutex_exit(&i40e->i40e_general_lock);
+
+ return (rc);
+}
+
+static void
+i40e_m_stop(void *arg)
+{
+ i40e_t *i40e = arg;
+
+ mutex_enter(&i40e->i40e_general_lock);
+
+ if (i40e->i40e_state & I40E_SUSPENDED)
+ goto done;
+
+ atomic_and_32(&i40e->i40e_state, ~I40E_STARTED);
+ i40e_stop(i40e, B_TRUE);
+done:
+ mutex_exit(&i40e->i40e_general_lock);
+}
+
+/*
+ * Enable and disable promiscuous mode as requested. We have to toggle both
+ * unicast and multicast. Note that multicast may already be enabled due to the
+ * i40e_m_multicast may toggle it itself. See i40e_main.c for more information
+ * on this.
+ */
+static int
+i40e_m_promisc(void *arg, boolean_t on)
+{
+ i40e_t *i40e = arg;
+ struct i40e_hw *hw = &i40e->i40e_hw_space;
+ int ret = 0, err = 0;
+
+ mutex_enter(&i40e->i40e_general_lock);
+ if (i40e->i40e_state & I40E_SUSPENDED) {
+ ret = ECANCELED;
+ goto done;
+ }
+
+
+ ret = i40e_aq_set_vsi_unicast_promiscuous(hw, i40e->i40e_vsi_id,
+ on, NULL);
+ if (ret != I40E_SUCCESS) {
+ i40e_error(i40e, "failed to %s unicast promiscuity on "
+ "the default VSI: %d", on == B_TRUE ? "enable" : "disable",
+ ret);
+ err = EIO;
+ goto done;
+ }
+
+ /*
+ * If we have a non-zero mcast_promisc_count, then it has already been
+ * enabled or we need to leave it that way and not touch it.
+ */
+ if (i40e->i40e_mcast_promisc_count > 0) {
+ i40e->i40e_promisc_on = on;
+ goto done;
+ }
+
+ ret = i40e_aq_set_vsi_multicast_promiscuous(hw, i40e->i40e_vsi_id,
+ on, NULL);
+ if (ret != I40E_SUCCESS) {
+ i40e_error(i40e, "failed to %s multicast promiscuity on "
+ "the default VSI: %d", on == B_TRUE ? "enable" : "disable",
+ ret);
+
+ /*
+ * Try our best to put us back into a state that MAC expects us
+ * to be in.
+ */
+ ret = i40e_aq_set_vsi_unicast_promiscuous(hw, i40e->i40e_vsi_id,
+ !on, NULL);
+ if (ret != I40E_SUCCESS) {
+ i40e_error(i40e, "failed to %s unicast promiscuity on "
+ "the default VSI after toggling multicast failed: "
+ "%d", on == B_TRUE ? "disable" : "enable", ret);
+ }
+
+ err = EIO;
+ goto done;
+ } else {
+ i40e->i40e_promisc_on = on;
+ }
+
+done:
+ mutex_exit(&i40e->i40e_general_lock);
+ return (err);
+}
+
+/*
+ * See the big theory statement in i40e_main.c for multicast address management.
+ */
+static int
+i40e_multicast_add(i40e_t *i40e, const uint8_t *multicast_address)
+{
+ struct i40e_hw *hw = &i40e->i40e_hw_space;
+ struct i40e_aqc_add_macvlan_element_data filt;
+ i40e_maddr_t *mc;
+ int ret;
+
+ ASSERT(MUTEX_HELD(&i40e->i40e_general_lock));
+
+ if (i40e->i40e_resources.ifr_nmcastfilt_used ==
+ i40e->i40e_resources.ifr_nmcastfilt) {
+ if (i40e->i40e_mcast_promisc_count == 0 &&
+ i40e->i40e_promisc_on == B_FALSE) {
+ ret = i40e_aq_set_vsi_multicast_promiscuous(hw,
+ i40e->i40e_vsi_id, B_TRUE, NULL);
+ if (ret != I40E_SUCCESS) {
+ i40e_error(i40e, "failed to enable promiscuous "
+ "mode on VSI %d: %d", i40e->i40e_vsi_id,
+ ret);
+ return (EIO);
+ }
+ }
+ i40e->i40e_mcast_promisc_count++;
+ return (0);
+ }
+
+ mc = &i40e->i40e_maddrs[i40e->i40e_resources.ifr_nmcastfilt_used];
+ bzero(&filt, sizeof (filt));
+ bcopy(multicast_address, filt.mac_addr, ETHERADDRL);
+ filt.flags = I40E_AQC_MACVLAN_ADD_HASH_MATCH |
+ I40E_AQC_MACVLAN_ADD_IGNORE_VLAN;
+
+ if ((ret = i40e_aq_add_macvlan(hw, i40e->i40e_vsi_id, &filt, 1,
+ NULL)) != I40E_SUCCESS) {
+ i40e_error(i40e, "failed to add mac address "
+ "%2x:%2x:%2x:%2x:%2x:%2x to multicast filter: %d",
+ multicast_address[0], multicast_address[1],
+ multicast_address[2], multicast_address[3],
+ multicast_address[4], multicast_address[5],
+ ret);
+ return (EIO);
+ }
+
+ bcopy(multicast_address, mc->ima_mac, ETHERADDRL);
+ i40e->i40e_resources.ifr_nmcastfilt_used++;
+ return (0);
+}
+
+/*
+ * See the big theory statement in i40e_main.c for multicast address management.
+ */
+static int
+i40e_multicast_remove(i40e_t *i40e, const uint8_t *multicast_address)
+{
+ int i, ret;
+ struct i40e_hw *hw = &i40e->i40e_hw_space;
+
+ ASSERT(MUTEX_HELD(&i40e->i40e_general_lock));
+
+ for (i = 0; i < i40e->i40e_resources.ifr_nmcastfilt_used; i++) {
+ struct i40e_aqc_remove_macvlan_element_data filt;
+ int last;
+
+ if (bcmp(multicast_address, i40e->i40e_maddrs[i].ima_mac,
+ ETHERADDRL) != 0) {
+ continue;
+ }
+
+ bzero(&filt, sizeof (filt));
+ bcopy(multicast_address, filt.mac_addr, ETHERADDRL);
+ filt.flags = I40E_AQC_MACVLAN_DEL_HASH_MATCH |
+ I40E_AQC_MACVLAN_DEL_IGNORE_VLAN;
+
+ if (i40e_aq_remove_macvlan(hw, i40e->i40e_vsi_id,
+ &filt, 1, NULL) != I40E_SUCCESS) {
+ i40e_error(i40e, "failed to remove mac address "
+ "%2x:%2x:%2x:%2x:%2x:%2x from multicast "
+ "filter: %d",
+ multicast_address[0], multicast_address[1],
+ multicast_address[2], multicast_address[3],
+ multicast_address[4], multicast_address[5],
+ filt.error_code);
+ return (EIO);
+ }
+
+ last = i40e->i40e_resources.ifr_nmcastfilt_used - 1;
+ if (i != last) {
+ bcopy(&i40e->i40e_maddrs[last], &i40e->i40e_maddrs[i],
+ sizeof (i40e_maddr_t));
+ bzero(&i40e->i40e_maddrs[last], sizeof (i40e_maddr_t));
+ }
+
+ ASSERT(i40e->i40e_resources.ifr_nmcastfilt_used > 0);
+ i40e->i40e_resources.ifr_nmcastfilt_used--;
+ return (0);
+ }
+
+ if (i40e->i40e_mcast_promisc_count > 0) {
+ if (i40e->i40e_mcast_promisc_count == 1 &&
+ i40e->i40e_promisc_on == B_FALSE) {
+ ret = i40e_aq_set_vsi_multicast_promiscuous(hw,
+ i40e->i40e_vsi_id, B_FALSE, NULL);
+ if (ret != I40E_SUCCESS) {
+ i40e_error(i40e, "failed to disable "
+ "promiscuous mode on VSI %d: %d",
+ i40e->i40e_vsi_id, ret);
+ return (EIO);
+ }
+ }
+ i40e->i40e_mcast_promisc_count--;
+
+ return (0);
+ }
+
+ return (ENOENT);
+}
+
+static int
+i40e_m_multicast(void *arg, boolean_t add, const uint8_t *multicast_address)
+{
+ i40e_t *i40e = arg;
+ int rc;
+
+ mutex_enter(&i40e->i40e_general_lock);
+
+ if (i40e->i40e_state & I40E_SUSPENDED) {
+ mutex_exit(&i40e->i40e_general_lock);
+ return (ECANCELED);
+ }
+
+ if (add == B_TRUE) {
+ rc = i40e_multicast_add(i40e, multicast_address);
+ } else {
+ rc = i40e_multicast_remove(i40e, multicast_address);
+ }
+
+ mutex_exit(&i40e->i40e_general_lock);
+ return (rc);
+}
+
+/* ARGSUSED */
+static void
+i40e_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
+{
+ /*
+ * At this time, we don't support toggling i40e into loopback mode. It's
+ * questionable how much value this has when there's no clear way to
+ * toggle this behavior from a supported way in userland.
+ */
+ miocnak(q, mp, 0, EINVAL);
+}
+
+static int
+i40e_ring_start(mac_ring_driver_t rh, uint64_t gen_num)
+{
+ i40e_trqpair_t *itrq = (i40e_trqpair_t *)rh;
+
+ /*
+ * GLDv3 requires we keep track of a generation number, as it uses
+ * that number to keep track of whether or not a ring is active.
+ */
+ mutex_enter(&itrq->itrq_rx_lock);
+ itrq->itrq_rxgen = gen_num;
+ mutex_exit(&itrq->itrq_rx_lock);
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+i40e_rx_ring_intr_enable(mac_intr_handle_t intrh)
+{
+ i40e_trqpair_t *itrq = (i40e_trqpair_t *)intrh;
+ i40e_t *i40e = itrq->itrq_i40e;
+
+ mutex_enter(&i40e->i40e_general_lock);
+ ASSERT(i40e->i40e_intr_poll == B_TRUE);
+ i40e_intr_rx_queue_enable(i40e, itrq->itrq_index);
+ i40e->i40e_intr_poll = B_FALSE;
+ mutex_exit(&i40e->i40e_general_lock);
+
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+i40e_rx_ring_intr_disable(mac_intr_handle_t intrh)
+{
+ i40e_trqpair_t *itrq = (i40e_trqpair_t *)intrh;
+ i40e_t *i40e = itrq->itrq_i40e;
+
+ mutex_enter(&i40e->i40e_general_lock);
+ i40e_intr_rx_queue_disable(i40e, itrq->itrq_index);
+ i40e->i40e_intr_poll = B_TRUE;
+ mutex_exit(&i40e->i40e_general_lock);
+
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+i40e_fill_tx_ring(void *arg, mac_ring_type_t rtype, const int group_index,
+ const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh)
+{
+ i40e_t *i40e = arg;
+ mac_intr_t *mintr = &infop->mri_intr;
+ i40e_trqpair_t *itrq = &(i40e->i40e_trqpairs[ring_index]);
+
+ /*
+ * Note the group index here is expected to be -1 due to the fact that
+ * we're not actually grouping things tx-wise at this time.
+ */
+ ASSERT(group_index == -1);
+ ASSERT(ring_index < i40e->i40e_num_trqpairs);
+
+ itrq->itrq_mactxring = rh;
+ infop->mri_driver = (mac_ring_driver_t)itrq;
+ infop->mri_start = NULL;
+ infop->mri_stop = NULL;
+ infop->mri_tx = i40e_ring_tx;
+ infop->mri_stat = i40e_tx_ring_stat;
+
+ /*
+ * We only provide the handle in cases where we have MSI-X interrupts,
+ * to indicate that we'd actually support retargetting.
+ */
+ if (i40e->i40e_intr_type & DDI_INTR_TYPE_MSIX) {
+ mintr->mi_ddi_handle =
+ i40e->i40e_intr_handles[itrq->itrq_tx_intrvec];
+ }
+}
+
+/* ARGSUSED */
+static void
+i40e_fill_rx_ring(void *arg, mac_ring_type_t rtype, const int group_index,
+ const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh)
+{
+ i40e_t *i40e = arg;
+ mac_intr_t *mintr = &infop->mri_intr;
+ i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[ring_index];
+
+ /*
+ * We assert the group number and ring index to help sanity check
+ * ourselves and mark that we'll need to rework this when we have
+ * multiple groups.
+ */
+ ASSERT3S(group_index, ==, 0);
+ ASSERT3S(ring_index, <, i40e->i40e_num_trqpairs);
+
+ itrq->itrq_macrxring = rh;
+ infop->mri_driver = (mac_ring_driver_t)itrq;
+ infop->mri_start = i40e_ring_start;
+ infop->mri_stop = NULL;
+ infop->mri_poll = i40e_ring_rx_poll;
+ infop->mri_stat = i40e_rx_ring_stat;
+ mintr->mi_handle = (mac_intr_handle_t)itrq;
+ mintr->mi_enable = i40e_rx_ring_intr_enable;
+ mintr->mi_disable = i40e_rx_ring_intr_disable;
+
+ /*
+ * We only provide the handle in cases where we have MSI-X interrupts,
+ * to indicate that we'd actually support retargetting.
+ */
+ if (i40e->i40e_intr_type & DDI_INTR_TYPE_MSIX) {
+ mintr->mi_ddi_handle =
+ i40e->i40e_intr_handles[itrq->itrq_rx_intrvec];
+ }
+}
+
+/* ARGSUSED */
+static void
+i40e_fill_rx_group(void *arg, mac_ring_type_t rtype, const int index,
+ mac_group_info_t *infop, mac_group_handle_t gh)
+{
+ i40e_t *i40e = arg;
+
+ if (rtype != MAC_RING_TYPE_RX)
+ return;
+
+ /*
+ * Note, this is a simplified view of a group, given that we only have a
+ * single group and a single ring at the moment. We'll want to expand
+ * upon this as we leverage more hardware functionality.
+ */
+ i40e->i40e_rx_group_handle = gh;
+ infop->mgi_driver = (mac_group_driver_t)i40e;
+ infop->mgi_start = NULL;
+ infop->mgi_stop = NULL;
+ infop->mgi_addmac = i40e_group_add_mac;
+ infop->mgi_remmac = i40e_group_remove_mac;
+
+ ASSERT(i40e->i40e_num_rx_groups == I40E_GROUP_MAX);
+ infop->mgi_count = i40e->i40e_num_trqpairs;
+}
+
+static boolean_t
+i40e_m_getcapab(void *arg, mac_capab_t cap, void *cap_data)
+{
+ i40e_t *i40e = arg;
+ mac_capab_rings_t *cap_rings;
+
+ switch (cap) {
+ case MAC_CAPAB_HCKSUM: {
+ uint32_t *txflags = cap_data;
+
+ *txflags = 0;
+ if (i40e->i40e_tx_hcksum_enable == B_TRUE)
+ *txflags = HCKSUM_INET_PARTIAL | HCKSUM_IPHDRCKSUM;
+ break;
+ }
+
+ case MAC_CAPAB_RINGS:
+ cap_rings = cap_data;
+ cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
+ switch (cap_rings->mr_type) {
+ case MAC_RING_TYPE_TX:
+ /*
+ * Note, saying we have no rings, but some number of
+ * groups indicates to MAC that it should create
+ * psuedo-groups with one for each TX ring. This may not
+ * be the long term behavior we want, but it'll work for
+ * now.
+ */
+ cap_rings->mr_gnum = 0;
+ cap_rings->mr_rnum = i40e->i40e_num_trqpairs;
+ cap_rings->mr_rget = i40e_fill_tx_ring;
+ cap_rings->mr_gget = NULL;
+ cap_rings->mr_gaddring = NULL;
+ cap_rings->mr_gremring = NULL;
+ break;
+ case MAC_RING_TYPE_RX:
+ cap_rings->mr_rnum = i40e->i40e_num_trqpairs;
+ cap_rings->mr_rget = i40e_fill_rx_ring;
+ cap_rings->mr_gnum = I40E_GROUP_MAX;
+ cap_rings->mr_gget = i40e_fill_rx_group;
+ cap_rings->mr_gaddring = NULL;
+ cap_rings->mr_gremring = NULL;
+ break;
+ default:
+ return (B_FALSE);
+ }
+ break;
+ default:
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
+
+/* ARGSUSED */
+static int
+i40e_m_setprop_private(i40e_t *i40e, const char *pr_name, uint_t pr_valsize,
+ const void *pr_val)
+{
+ int ret;
+ long val;
+ char *eptr;
+
+ ASSERT(MUTEX_HELD(&i40e->i40e_general_lock));
+
+ if ((ret = ddi_strtol(pr_val, &eptr, 10, &val)) != 0 ||
+ *eptr != '\0') {
+ return (ret);
+ }
+
+ if (strcmp(pr_name, I40E_PROP_RX_DMA_THRESH) == 0) {
+ if (val < I40E_MIN_RX_DMA_THRESH ||
+ val > I40E_MAX_RX_DMA_THRESH) {
+ return (EINVAL);
+ }
+ i40e->i40e_rx_dma_min = (uint32_t)val;
+ return (0);
+ }
+
+ if (strcmp(pr_name, I40E_PROP_TX_DMA_THRESH) == 0) {
+ if (val < I40E_MIN_TX_DMA_THRESH ||
+ val > I40E_MAX_TX_DMA_THRESH) {
+ return (EINVAL);
+ }
+ i40e->i40e_tx_dma_min = (uint32_t)val;
+ return (0);
+ }
+
+ if (strcmp(pr_name, I40E_PROP_RX_ITR) == 0) {
+ if (val < I40E_MIN_ITR ||
+ val > I40E_MAX_ITR) {
+ return (EINVAL);
+ }
+ i40e->i40e_rx_itr = (uint32_t)val;
+ i40e_intr_set_itr(i40e, I40E_ITR_INDEX_RX, i40e->i40e_rx_itr);
+ return (0);
+ }
+
+ if (strcmp(pr_name, I40E_PROP_TX_ITR) == 0) {
+ if (val < I40E_MIN_ITR ||
+ val > I40E_MAX_ITR) {
+ return (EINVAL);
+ }
+ i40e->i40e_tx_itr = (uint32_t)val;
+ i40e_intr_set_itr(i40e, I40E_ITR_INDEX_TX, i40e->i40e_tx_itr);
+ return (0);
+ }
+
+ if (strcmp(pr_name, I40E_PROP_OTHER_ITR) == 0) {
+ if (val < I40E_MIN_ITR ||
+ val > I40E_MAX_ITR) {
+ return (EINVAL);
+ }
+ i40e->i40e_tx_itr = (uint32_t)val;
+ i40e_intr_set_itr(i40e, I40E_ITR_INDEX_OTHER,
+ i40e->i40e_other_itr);
+ return (0);
+ }
+
+ return (ENOTSUP);
+}
+
+static int
+i40e_m_getprop_private(i40e_t *i40e, const char *pr_name, uint_t pr_valsize,
+ void *pr_val)
+{
+ uint32_t val;
+
+ ASSERT(MUTEX_HELD(&i40e->i40e_general_lock));
+
+ if (strcmp(pr_name, I40E_PROP_RX_DMA_THRESH) == 0) {
+ val = i40e->i40e_rx_dma_min;
+ } else if (strcmp(pr_name, I40E_PROP_TX_DMA_THRESH) == 0) {
+ val = i40e->i40e_tx_dma_min;
+ } else if (strcmp(pr_name, I40E_PROP_RX_ITR) == 0) {
+ val = i40e->i40e_rx_itr;
+ } else if (strcmp(pr_name, I40E_PROP_TX_ITR) == 0) {
+ val = i40e->i40e_tx_itr;
+ } else if (strcmp(pr_name, I40E_PROP_OTHER_ITR) == 0) {
+ val = i40e->i40e_other_itr;
+ } else {
+ return (ENOTSUP);
+ }
+
+ if (snprintf(pr_val, pr_valsize, "%d", val) >= pr_valsize)
+ return (ERANGE);
+ return (0);
+}
+
+/*
+ * Annoyingly for private properties MAC seems to ignore default values that
+ * aren't strings. That means that we have to translate all of these into
+ * uint32_t's and instead we size the buffer to be large enough to hold a
+ * uint32_t.
+ */
+/* ARGSUSED */
+static void
+i40e_m_propinfo_private(i40e_t *i40e, const char *pr_name,
+ mac_prop_info_handle_t prh)
+{
+ char buf[64];
+ uint32_t def;
+
+ if (strcmp(pr_name, I40E_PROP_RX_DMA_THRESH) == 0) {
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW);
+ def = I40E_DEF_RX_DMA_THRESH;
+ mac_prop_info_set_range_uint32(prh,
+ I40E_MIN_RX_DMA_THRESH,
+ I40E_MAX_RX_DMA_THRESH);
+ } else if (strcmp(pr_name, I40E_PROP_TX_DMA_THRESH) == 0) {
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW);
+ def = I40E_DEF_TX_DMA_THRESH;
+ mac_prop_info_set_range_uint32(prh,
+ I40E_MIN_TX_DMA_THRESH,
+ I40E_MAX_TX_DMA_THRESH);
+ } else if (strcmp(pr_name, I40E_PROP_RX_ITR) == 0) {
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW);
+ def = I40E_DEF_RX_ITR;
+ mac_prop_info_set_range_uint32(prh, I40E_MIN_ITR, I40E_MAX_ITR);
+ } else if (strcmp(pr_name, I40E_PROP_TX_ITR) == 0) {
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW);
+ def = I40E_DEF_TX_ITR;
+ mac_prop_info_set_range_uint32(prh, I40E_MIN_ITR, I40E_MAX_ITR);
+ } else if (strcmp(pr_name, I40E_PROP_OTHER_ITR) == 0) {
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW);
+ def = I40E_DEF_OTHER_ITR;
+ mac_prop_info_set_range_uint32(prh, I40E_MIN_ITR, I40E_MAX_ITR);
+ } else {
+ return;
+ }
+
+ (void) snprintf(buf, sizeof (buf), "%d", def);
+ mac_prop_info_set_default_str(prh, buf);
+}
+
+static int
+i40e_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+ uint_t pr_valsize, const void *pr_val)
+{
+ uint32_t new_mtu;
+ i40e_t *i40e = arg;
+ int ret = 0;
+
+ mutex_enter(&i40e->i40e_general_lock);
+ if (i40e->i40e_state & I40E_SUSPENDED) {
+ mutex_exit(&i40e->i40e_general_lock);
+ return (ECANCELED);
+ }
+
+ switch (pr_num) {
+ /*
+ * These properties are always read-only across every device.
+ */
+ case MAC_PROP_DUPLEX:
+ case MAC_PROP_SPEED:
+ case MAC_PROP_STATUS:
+ case MAC_PROP_ADV_100FDX_CAP:
+ case MAC_PROP_ADV_1000FDX_CAP:
+ case MAC_PROP_ADV_10GFDX_CAP:
+ case MAC_PROP_ADV_40GFDX_CAP:
+ ret = ENOTSUP;
+ break;
+ /*
+ * These are read-only at this time as we don't support configuring
+ * auto-negotiation. See the theory statement in i40e_main.c.
+ */
+ case MAC_PROP_EN_100FDX_CAP:
+ case MAC_PROP_EN_1000FDX_CAP:
+ case MAC_PROP_EN_10GFDX_CAP:
+ case MAC_PROP_EN_40GFDX_CAP:
+ case MAC_PROP_AUTONEG:
+ case MAC_PROP_FLOWCTRL:
+ ret = ENOTSUP;
+ break;
+
+ case MAC_PROP_MTU:
+ bcopy(pr_val, &new_mtu, sizeof (new_mtu));
+ if (new_mtu == i40e->i40e_sdu)
+ break;
+
+ if (new_mtu < I40E_MIN_MTU ||
+ new_mtu > I40E_MAX_MTU) {
+ ret = EINVAL;
+ break;
+ }
+
+ if (i40e->i40e_state & I40E_STARTED) {
+ ret = EBUSY;
+ break;
+ }
+
+ ret = mac_maxsdu_update(i40e->i40e_mac_hdl, new_mtu);
+ if (ret == 0) {
+ i40e->i40e_sdu = new_mtu;
+ i40e_update_mtu(i40e);
+ }
+ break;
+
+ case MAC_PROP_PRIVATE:
+ ret = i40e_m_setprop_private(i40e, pr_name, pr_valsize, pr_val);
+ break;
+ default:
+ ret = ENOTSUP;
+ break;
+ }
+
+ mutex_exit(&i40e->i40e_general_lock);
+ return (ret);
+}
+
+static int
+i40e_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+ uint_t pr_valsize, void *pr_val)
+{
+ i40e_t *i40e = arg;
+ uint64_t speed;
+ int ret = 0;
+ uint8_t *u8;
+ link_flowctrl_t fctl;
+
+ mutex_enter(&i40e->i40e_general_lock);
+
+ switch (pr_num) {
+ case MAC_PROP_DUPLEX:
+ if (pr_valsize < sizeof (link_duplex_t)) {
+ ret = EOVERFLOW;
+ break;
+ }
+ bcopy(&i40e->i40e_link_duplex, pr_val, sizeof (link_duplex_t));
+ break;
+ case MAC_PROP_SPEED:
+ if (pr_valsize < sizeof (uint64_t)) {
+ ret = EOVERFLOW;
+ break;
+ }
+ speed = i40e->i40e_link_speed * 1000000ULL;
+ bcopy(&speed, pr_val, sizeof (speed));
+ break;
+ case MAC_PROP_STATUS:
+ if (pr_valsize < sizeof (link_state_t)) {
+ ret = EOVERFLOW;
+ break;
+ }
+ bcopy(&i40e->i40e_link_state, pr_val, sizeof (link_state_t));
+ break;
+ case MAC_PROP_AUTONEG:
+ if (pr_valsize < sizeof (uint8_t)) {
+ ret = EOVERFLOW;
+ break;
+ }
+ u8 = pr_val;
+ *u8 = 1;
+ break;
+ case MAC_PROP_FLOWCTRL:
+ /*
+ * Because we don't currently support hardware flow control, we
+ * just hardcode this to be none.
+ */
+ if (pr_valsize < sizeof (link_flowctrl_t)) {
+ ret = EOVERFLOW;
+ break;
+ }
+ fctl = LINK_FLOWCTRL_NONE;
+ bcopy(&fctl, pr_val, sizeof (link_flowctrl_t));
+ break;
+ case MAC_PROP_MTU:
+ if (pr_valsize < sizeof (uint32_t)) {
+ ret = EOVERFLOW;
+ break;
+ }
+ bcopy(&i40e->i40e_sdu, pr_val, sizeof (uint32_t));
+ break;
+
+ /*
+ * Because we don't let users control the speeds we may auto-negotiate
+ * to, the values of the ADV_ and EN_ will always be the same.
+ */
+ case MAC_PROP_ADV_100FDX_CAP:
+ case MAC_PROP_EN_100FDX_CAP:
+ if (pr_valsize < sizeof (uint8_t)) {
+ ret = EOVERFLOW;
+ break;
+ }
+ u8 = pr_val;
+ *u8 = (i40e->i40e_phy.link_speed & I40E_LINK_SPEED_100MB) != 0;
+ break;
+ case MAC_PROP_ADV_1000FDX_CAP:
+ case MAC_PROP_EN_1000FDX_CAP:
+ if (pr_valsize < sizeof (uint8_t)) {
+ ret = EOVERFLOW;
+ break;
+ }
+ u8 = pr_val;
+ *u8 = (i40e->i40e_phy.link_speed & I40E_LINK_SPEED_1GB) != 0;
+ break;
+ case MAC_PROP_ADV_10GFDX_CAP:
+ case MAC_PROP_EN_10GFDX_CAP:
+ if (pr_valsize < sizeof (uint8_t)) {
+ ret = EOVERFLOW;
+ break;
+ }
+ u8 = pr_val;
+ *u8 = (i40e->i40e_phy.link_speed & I40E_LINK_SPEED_10GB) != 0;
+ break;
+ case MAC_PROP_ADV_40GFDX_CAP:
+ case MAC_PROP_EN_40GFDX_CAP:
+ if (pr_valsize < sizeof (uint8_t)) {
+ ret = EOVERFLOW;
+ break;
+ }
+ u8 = pr_val;
+ *u8 = (i40e->i40e_phy.link_speed & I40E_LINK_SPEED_40GB) != 0;
+ break;
+ case MAC_PROP_PRIVATE:
+ ret = i40e_m_getprop_private(i40e, pr_name, pr_valsize, pr_val);
+ break;
+ default:
+ ret = ENOTSUP;
+ break;
+ }
+
+ mutex_exit(&i40e->i40e_general_lock);
+
+ return (ret);
+}
+
+static void
+i40e_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+ mac_prop_info_handle_t prh)
+{
+ i40e_t *i40e = arg;
+
+ mutex_enter(&i40e->i40e_general_lock);
+
+ switch (pr_num) {
+ case MAC_PROP_DUPLEX:
+ case MAC_PROP_SPEED:
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ break;
+ case MAC_PROP_FLOWCTRL:
+ /*
+ * At the moment, the driver doesn't support flow control, hence
+ * why this is set to read-only and none.
+ */
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ mac_prop_info_set_default_link_flowctrl(prh,
+ LINK_FLOWCTRL_NONE);
+ break;
+ case MAC_PROP_MTU:
+ mac_prop_info_set_range_uint32(prh, I40E_MIN_MTU, I40E_MAX_MTU);
+ break;
+
+ /*
+ * We set the defaults for these based upon the phy's ability to
+ * support the speeds. Note, auto-negotiation is required for fiber,
+ * hence it is read-only and always enabled. When we have access to
+ * copper phys we can revisit this.
+ */
+ case MAC_PROP_AUTONEG:
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ mac_prop_info_set_default_uint8(prh, 1);
+ break;
+ case MAC_PROP_ADV_100FDX_CAP:
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ mac_prop_info_set_default_uint8(prh,
+ (i40e->i40e_phy.link_speed & I40E_LINK_SPEED_100MB) != 0);
+ break;
+ case MAC_PROP_EN_100FDX_CAP:
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ mac_prop_info_set_default_uint8(prh,
+ (i40e->i40e_phy.link_speed & I40E_LINK_SPEED_100MB) != 0);
+ break;
+ case MAC_PROP_ADV_1000FDX_CAP:
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ mac_prop_info_set_default_uint8(prh,
+ (i40e->i40e_phy.link_speed & I40E_LINK_SPEED_1GB) != 0);
+ break;
+ case MAC_PROP_EN_1000FDX_CAP:
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ mac_prop_info_set_default_uint8(prh,
+ (i40e->i40e_phy.link_speed & I40E_LINK_SPEED_1GB) != 0);
+ break;
+ case MAC_PROP_ADV_10GFDX_CAP:
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ mac_prop_info_set_default_uint8(prh,
+ (i40e->i40e_phy.link_speed & I40E_LINK_SPEED_10GB) != 0);
+ break;
+ case MAC_PROP_EN_10GFDX_CAP:
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ mac_prop_info_set_default_uint8(prh,
+ (i40e->i40e_phy.link_speed & I40E_LINK_SPEED_10GB) != 0);
+ break;
+ case MAC_PROP_ADV_40GFDX_CAP:
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ mac_prop_info_set_default_uint8(prh,
+ (i40e->i40e_phy.link_speed & I40E_LINK_SPEED_40GB) != 0);
+ break;
+ case MAC_PROP_EN_40GFDX_CAP:
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ mac_prop_info_set_default_uint8(prh,
+ (i40e->i40e_phy.link_speed & I40E_LINK_SPEED_40GB) != 0);
+ break;
+ case MAC_PROP_PRIVATE:
+ i40e_m_propinfo_private(i40e, pr_name, prh);
+ break;
+ default:
+ break;
+ }
+
+ mutex_exit(&i40e->i40e_general_lock);
+}
+
+#define I40E_M_CALLBACK_FLAGS \
+ (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO)
+
+static mac_callbacks_t i40e_m_callbacks = {
+ I40E_M_CALLBACK_FLAGS,
+ i40e_m_stat,
+ i40e_m_start,
+ i40e_m_stop,
+ i40e_m_promisc,
+ i40e_m_multicast,
+ NULL,
+ NULL,
+ NULL,
+ i40e_m_ioctl,
+ i40e_m_getcapab,
+ NULL,
+ NULL,
+ i40e_m_setprop,
+ i40e_m_getprop,
+ i40e_m_propinfo
+};
+
+boolean_t
+i40e_register_mac(i40e_t *i40e)
+{
+ struct i40e_hw *hw = &i40e->i40e_hw_space;
+ int status;
+ mac_register_t *mac = mac_alloc(MAC_VERSION);
+
+ if (mac == NULL)
+ return (B_FALSE);
+
+ mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
+ mac->m_driver = i40e;
+ mac->m_dip = i40e->i40e_dip;
+ mac->m_src_addr = hw->mac.addr;
+ mac->m_callbacks = &i40e_m_callbacks;
+ mac->m_min_sdu = 0;
+ mac->m_max_sdu = i40e->i40e_sdu;
+ mac->m_margin = VLAN_TAGSZ;
+ mac->m_priv_props = i40e_priv_props;
+ mac->m_v12n = MAC_VIRT_LEVEL1;
+
+ status = mac_register(mac, &i40e->i40e_mac_hdl);
+ if (status != 0)
+ i40e_error(i40e, "mac_register() returned %d", status);
+ mac_free(mac);
+
+ return (status == 0);
+}
diff --git a/usr/src/uts/common/io/i40e/i40e_intr.c b/usr/src/uts/common/io/i40e/i40e_intr.c
new file mode 100644
index 0000000000..ba9bea7b20
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/i40e_intr.c
@@ -0,0 +1,757 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+/*
+ * -------------------------
+ * Interrupt Handling Theory
+ * -------------------------
+ *
+ * There are a couple different sets of interrupts that we need to worry about:
+ *
+ * - Interrupts from receive queues
+ * - Interrupts from transmit queues
+ * - 'Other Interrupts', such as the administrative queue
+ *
+ * 'Other Interrupts' are asynchronous events such as a link status change event
+ * being posted to the administrative queue, unrecoverable ECC errors, and more.
+ * If we have something being posted to the administrative queue, then we go
+ * through and process it, because it's generally enabled as a separate logical
+ * interrupt. Note, we may need to do more here eventually. To re-enable the
+ * interrupts from the 'Other Interrupts' section, we need to clear the PBA and
+ * write ENA to PFINT_ICR0.
+ *
+ * Interrupts from the transmit and receive queues indicates that our requests
+ * have been processed. In the rx case, it means that we have data that we
+ * should take a look at and send up the stack. In the tx case, it means that
+ * data which we got from MAC has now been sent out on the wire and we can free
+ * the associated data. Most of the logic for acting upon the presence of this
+ * data can be found in i40e_transciever.c which handles all of the DMA, rx, and
+ * tx operations. This file is dedicated to handling and dealing with interrupt
+ * processing.
+ *
+ * All devices supported by this driver support three kinds of interrupts:
+ *
+ * o Extended Message Signaled Interrupts (MSI-X)
+ * o Message Signaled Interrupts (MSI)
+ * o Legacy PCI interrupts (INTx)
+ *
+ * Generally speaking the hardware logically handles MSI and INTx the same and
+ * restricts us to only using a single interrupt, which isn't the interesting
+ * case. With MSI-X available, each physical function of the device provides the
+ * opportunity for multiple interrupts which is what we'll focus on.
+ *
+ * --------------------
+ * Interrupt Management
+ * --------------------
+ *
+ * By default, the admin queue, which consists of the asynchronous other
+ * interrupts is always bound to MSI-X vector zero. Next, we spread out all of
+ * the other interrupts that we have available to us over the remaining
+ * interrupt vectors.
+ *
+ * This means that there may be multiple queues, both tx and rx, which are
+ * mapped to the same interrupt. When the interrupt fires, we'll have to check
+ * all of them for servicing, before we go through and indicate that the
+ * interrupt is claimed.
+ *
+ * The hardware provides the means of mapping various queues to MSI-X interrupts
+ * by programming the I40E_QINT_RQCTL() and I4OE_QINT_TQCTL() registers. These
+ * registers can also be used to enable and disable whether or not the queue is
+ * a source of interrupts. As part of this, the hardware requires that we
+ * maintain a linked list of queues for each interrupt vector. While it may seem
+ * like this is only there for the purproses of ITRs, that's not the case. The
+ * first queue must be programmed in I40E_QINT_LNKLSTN(%vector) register. Each
+ * queue defines the next one in either the I40E_QINT_RQCTL or I40E_QINT_TQCTL
+ * register.
+ *
+ * Because we only have a single queue enabled at the moment and we always have
+ * two interrupts, we do something pretty simple and just know that there's one
+ * data queue in the interrupt handler. Longer term, we'll need to think harder
+ * about this, but for the moment it'll have to suffice.
+ *
+ * Finally, the individual interrupt vector itself has the ability to be enabled
+ * and disabled. The overall interrupt is controlled through the
+ * I40E_PFINT_DYN_CTLN() register. This is used to turn on and off the interrupt
+ * as a whole.
+ *
+ * Note that this means that both the individual queue and the interrupt as a
+ * whole can be toggled and re-enabled.
+ *
+ * -------------------
+ * Non-MSIX Management
+ * -------------------
+ *
+ * We may have a case where the Operating System is unable to actually allocate
+ * any MSI-X to the system. In such a world, there is only one transmit/receive
+ * queue pair and it is bound to the same interrupt with index zero. The
+ * hardware doesn't allow us access to additional interrupt vectors in these
+ * modes. Note that technically we could support more transmit/receive queues if
+ * we wanted.
+ *
+ * In this world, because the interrupts for the admin queue and traffic are
+ * mixed together, we have to consult ICR0 to determine what has occurred. The
+ * QINT_TQCTL and QINT_RQCTL registers have a field, 'MSI-X 0 index' which
+ * allows us to set a specific bit in ICR0. There are up to seven such bits;
+ * however, we only use the bit 0 and 1 for the rx and tx queue respectively.
+ * These are contained by the I40E_INTR_NOTX_{R|T}X_QUEUE and
+ * I40E_INTR_NOTX_{R|T}X_MASK registers respectively.
+ *
+ * Unfortunately, these corresponding queue bits have no corresponding entry in
+ * the ICR0_ENA register. So instead, when enabling interrupts on the queues, we
+ * end up enabling it on the queue registers rather than on the MSI-X registers.
+ * In the MSI-X world, because they can be enabled and disabled, this is
+ * different and the queues can always be enabled and disabled, but the
+ * interrupts themselves are toggled (ignoring the question of interrupt
+ * blanking for polling on rings).
+ *
+ * Finally, we still have to set up the interrupt linked list, but the list is
+ * instead rooted at the register I40E_PFINT_LNKLST0, rather than being tied to
+ * one of the other MSI-X registers.
+ *
+ * --------------------
+ * Interrupt Moderation
+ * --------------------
+ *
+ * The XL710 hardware has three different interrupt moderation registers per
+ * interrupt. Unsurprisingly, we use these for:
+ *
+ * o RX interrupts
+ * o TX interrupts
+ * o 'Other interrupts' (link status change, admin queue, etc.)
+ *
+ * By default, we throttle 'other interrupts' the most, then TX interrupts, and
+ * then RX interrupts. The default values for these were based on trying to
+ * reason about both the importance and frequency of events. Generally speaking
+ * 'other interrupts' are not very frequent and they're not important for the
+ * I/O data path in and of itself (though they may indicate issues with the I/O
+ * data path).
+ *
+ * On the flip side, when we're not polling, RX interrupts are very important.
+ * The longer we wait for them, the more latency that we inject into the system.
+ * However, if we allow interrupts to occur too frequently, we risk a few
+ * problems:
+ *
+ * 1) Abusing system resources. Without proper interrupt blanking and polling,
+ * we can see upwards of 200k-300k interrupts per second on the system.
+ *
+ * 2) Not enough data coalescing to enable polling. In other words, the more
+ * data that we allow to build up, the more likely we'll be able to enable
+ * polling mode and allowing us to better handle bulk data.
+ *
+ * In-between the 'other interrupts' and the TX interrupts we have the
+ * reclamation of TX buffers. This operation is not quite as important as we
+ * generally size the ring large enough that we should be able to reclaim a
+ * substantial amount of the descriptors that we have used per interrupt. So
+ * while it's important that this interrupt occur, we don't necessarily need it
+ * firing as frequently as RX; it doesn't, on its own, induce additional latency
+ * into the system.
+ *
+ * Based on all this we currently assign static ITR values for the system. While
+ * we could move to a dynamic system (the hardware supports that), we'd want to
+ * make sure that we're seeing problems from this that we believe would be
+ * generally helped by the added complexity.
+ *
+ * Based on this, the default values that we have allow for the following
+ * interrupt thresholds:
+ *
+ * o 20k interrupts/s for RX
+ * o 5k interrupts/s for TX
+ * o 2k interupts/s for 'Other Interrupts'
+ */
+
+#include "i40e_sw.h"
+
+#define I40E_INTR_NOTX_QUEUE 0
+#define I40E_INTR_NOTX_INTR 0
+#define I40E_INTR_NOTX_RX_QUEUE 0
+#define I40E_INTR_NOTX_RX_MASK (1 << I40E_PFINT_ICR0_QUEUE_0_SHIFT)
+#define I40E_INTR_NOTX_TX_QUEUE 1
+#define I40E_INTR_NOTX_TX_MASK (1 << I40E_PFINT_ICR0_QUEUE_1_SHIFT)
+
+void
+i40e_intr_set_itr(i40e_t *i40e, i40e_itr_index_t itr, uint_t val)
+{
+ int i;
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+
+ VERIFY3U(val, <=, I40E_MAX_ITR);
+ VERIFY3U(itr, <, I40E_ITR_INDEX_NONE);
+
+ /*
+ * No matter the interrupt mode, the ITR for other interrupts is always
+ * on interrupt zero and the same is true if we're not using MSI-X.
+ */
+ if (itr == I40E_ITR_INDEX_OTHER ||
+ i40e->i40e_intr_type != DDI_INTR_TYPE_MSIX) {
+ I40E_WRITE_REG(hw, I40E_PFINT_ITR0(itr), val);
+ return;
+ }
+
+ for (i = 1; i < i40e->i40e_intr_count; i++) {
+ I40E_WRITE_REG(hw, I40E_PFINT_ITRN(itr, i - 1), val);
+ }
+}
+
+/*
+ * Re-enable the adminq. Note that the adminq doesn't have a traditional queue
+ * associated with it from an interrupt perspective and just lives on ICR0.
+ * However when MSI-X interrupts are not being used, then this also enables and
+ * disables those interrupts.
+ */
+static void
+i40e_intr_adminq_enable(i40e_t *i40e)
+{
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+ uint32_t reg;
+
+ reg = I40E_PFINT_DYN_CTL0_INTENA_MASK |
+ I40E_PFINT_DYN_CTL0_CLEARPBA_MASK |
+ (I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT);
+ I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0, reg);
+ i40e_flush(hw);
+}
+
+static void
+i40e_intr_adminq_disable(i40e_t *i40e)
+{
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+ uint32_t reg;
+
+ reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT;
+ I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTL0, reg);
+}
+
+static void
+i40e_intr_io_enable(i40e_t *i40e, int vector)
+{
+ uint32_t reg;
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+
+ reg = I40E_PFINT_DYN_CTLN_INTENA_MASK |
+ I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
+ (I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
+ I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg);
+}
+
+static void
+i40e_intr_io_disable(i40e_t *i40e, int vector)
+{
+ uint32_t reg;
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+
+ reg = I40E_ITR_INDEX_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT;
+ I40E_WRITE_REG(hw, I40E_PFINT_DYN_CTLN(vector - 1), reg);
+}
+
+/*
+ * When MSI-X interrupts are being used, then we can enable the actual
+ * interrupts themselves. However, when they are not, we instead have to turn
+ * towards the queue's CAUSE_ENA bit and enable that.
+ */
+void
+i40e_intr_io_enable_all(i40e_t *i40e)
+{
+ if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
+ int i;
+
+ for (i = 1; i < i40e->i40e_intr_count; i++) {
+ i40e_intr_io_enable(i40e, i);
+ }
+ } else {
+ uint32_t reg;
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+
+ reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE));
+ reg |= I40E_QINT_RQCTL_CAUSE_ENA_MASK;
+ I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg);
+
+ reg = I40E_READ_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE));
+ reg |= I40E_QINT_TQCTL_CAUSE_ENA_MASK;
+ I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg);
+ }
+}
+
+/*
+ * When MSI-X interrupts are being used, then we can disable the actual
+ * interrupts themselves. However, when they are not, we instead have to turn
+ * towards the queue's CAUSE_ENA bit and disable that.
+ */
+void
+i40e_intr_io_disable_all(i40e_t *i40e)
+{
+ if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
+ int i;
+
+ for (i = 1; i < i40e->i40e_intr_count; i++) {
+ i40e_intr_io_disable(i40e, i);
+ }
+ } else {
+ uint32_t reg;
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+
+ reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE));
+ reg &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK;
+ I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg);
+
+ reg = I40E_READ_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE));
+ reg &= ~I40E_QINT_TQCTL_CAUSE_ENA_MASK;
+ I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg);
+ }
+}
+
+/*
+ * As part of disabling the tx and rx queue's we're technically supposed to
+ * remove the linked list entries. The simplest way is to clear the LNKLSTN
+ * register by setting it to I40E_QUEUE_TYPE_EOL (0x7FF).
+ *
+ * Note all of the FM register access checks are performed by the caller.
+ */
+void
+i40e_intr_io_clear_cause(i40e_t *i40e)
+{
+ int i;
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+
+ if (i40e->i40e_intr_type != DDI_INTR_TYPE_MSIX) {
+ uint32_t reg;
+ reg = I40E_QUEUE_TYPE_EOL;
+ I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, reg);
+ return;
+ }
+
+ for (i = 1; i < i40e->i40e_intr_count; i++) {
+ uint32_t reg;
+#ifdef DEBUG
+ /*
+ * Verify that the interrupt in question is disabled. This is a
+ * prerequisite of modifying the data in question.
+ */
+ reg = I40E_READ_REG(hw, I40E_PFINT_DYN_CTLN(i - 1));
+ VERIFY0(reg & I40E_PFINT_DYN_CTLN_INTENA_MASK);
+#endif
+ reg = I40E_QUEUE_TYPE_EOL;
+ I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(i - 1), reg);
+ }
+
+ i40e_flush(hw);
+}
+
+/*
+ * Finalize interrupt handling. Mostly this disables the admin queue.
+ */
+void
+i40e_intr_chip_fini(i40e_t *i40e)
+{
+#ifdef DEBUG
+ int i;
+ uint32_t reg;
+
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+
+ /*
+ * Take a look and verify that all other interrupts have been disabled
+ * and the interrupt linked lists have been zeroed.
+ */
+ if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
+ for (i = 1; i < i40e->i40e_intr_count; i++) {
+ reg = I40E_READ_REG(hw, I40E_PFINT_DYN_CTLN(i - 1));
+ VERIFY0(reg & I40E_PFINT_DYN_CTLN_INTENA_MASK);
+
+ reg = I40E_READ_REG(hw, I40E_PFINT_LNKLSTN(i - 1));
+ VERIFY3U(reg, ==, I40E_QUEUE_TYPE_EOL);
+ }
+ }
+#endif
+
+ i40e_intr_adminq_disable(i40e);
+}
+
+/*
+ * Enable all of the queues and set the corresponding LNKLSTN registers. Note
+ * that we always enable queues as interrupt sources, even though we don't
+ * enable the MSI-X interrupt vectors.
+ */
+static void
+i40e_intr_init_queue_msix(i40e_t *i40e)
+{
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+ uint32_t reg;
+
+ /*
+ * Because we only have a single queue, just do something simple now.
+ * How this all works will need to really be properly redone based on
+ * the bit maps, etc. Note that we skip the ITR logic for the moment,
+ * just to make our lives as explicit and simple as possible.
+ */
+ reg = (0 << I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT) |
+ (I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
+ I40E_WRITE_REG(hw, I40E_PFINT_LNKLSTN(0), reg);
+
+ reg = (1 << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) |
+ (I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) |
+ (0 << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) |
+ (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) |
+ I40E_QINT_RQCTL_CAUSE_ENA_MASK;
+
+ I40E_WRITE_REG(hw, I40E_QINT_RQCTL(0), reg);
+
+ reg = (1 << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) |
+ (I40E_ITR_INDEX_TX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) |
+ (I40E_QUEUE_TYPE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) |
+ (I40E_QUEUE_TYPE_RX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) |
+ I40E_QINT_TQCTL_CAUSE_ENA_MASK;
+
+ I40E_WRITE_REG(hw, I40E_QINT_TQCTL(0), reg);
+
+}
+
+/*
+ * Set up a single queue to share the admin queue interrupt in the non-MSI-X
+ * world. Note we do not enable the queue as an interrupt cause at this time. We
+ * don't have any other vector of control here, unlike with the MSI-X interrupt
+ * case.
+ */
+static void
+i40e_intr_init_queue_shared(i40e_t *i40e)
+{
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+ uint32_t reg;
+
+ VERIFY(i40e->i40e_intr_type == DDI_INTR_TYPE_FIXED ||
+ i40e->i40e_intr_type == DDI_INTR_TYPE_MSI);
+
+ reg = (I40E_INTR_NOTX_QUEUE << I40E_PFINT_LNKLST0_FIRSTQ_INDX_SHIFT) |
+ (I40E_QUEUE_TYPE_RX << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
+ I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, reg);
+
+ reg = (I40E_INTR_NOTX_INTR << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) |
+ (I40E_ITR_INDEX_RX << I40E_QINT_RQCTL_ITR_INDX_SHIFT) |
+ (I40E_INTR_NOTX_RX_QUEUE << I40E_QINT_RQCTL_MSIX0_INDX_SHIFT) |
+ (I40E_INTR_NOTX_QUEUE << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) |
+ (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT);
+
+ I40E_WRITE_REG(hw, I40E_QINT_RQCTL(I40E_INTR_NOTX_QUEUE), reg);
+
+ reg = (I40E_INTR_NOTX_INTR << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) |
+ (I40E_ITR_INDEX_TX << I40E_QINT_TQCTL_ITR_INDX_SHIFT) |
+ (I40E_INTR_NOTX_TX_QUEUE << I40E_QINT_TQCTL_MSIX0_INDX_SHIFT) |
+ (I40E_QUEUE_TYPE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) |
+ (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT);
+
+ I40E_WRITE_REG(hw, I40E_QINT_TQCTL(I40E_INTR_NOTX_QUEUE), reg);
+}
+
+/*
+ * Enable the specified queue as a valid source of interrupts. Note, this should
+ * only be used as part of the GLDv3's interrupt blanking routines. The debug
+ * build assertions are specific to that.
+ */
+void
+i40e_intr_rx_queue_enable(i40e_t *i40e, uint_t queue)
+{
+ uint32_t reg;
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+
+ ASSERT(MUTEX_HELD(&i40e->i40e_general_lock));
+ ASSERT(queue < i40e->i40e_num_trqpairs);
+
+ reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(queue));
+ ASSERT0(reg & I40E_QINT_RQCTL_CAUSE_ENA_MASK);
+ reg |= I40E_QINT_RQCTL_CAUSE_ENA_MASK;
+ I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg);
+}
+
+/*
+ * Disable the specified queue as a valid source of interrupts. Note, this
+ * should only be used as part of the GLDv3's interrupt blanking routines. The
+ * debug build assertions are specific to that.
+ */
+void
+i40e_intr_rx_queue_disable(i40e_t *i40e, uint_t queue)
+{
+ uint32_t reg;
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+
+ ASSERT(MUTEX_HELD(&i40e->i40e_general_lock));
+ ASSERT(queue < i40e->i40e_num_trqpairs);
+
+ reg = I40E_READ_REG(hw, I40E_QINT_RQCTL(queue));
+ ASSERT3U(reg & I40E_QINT_RQCTL_CAUSE_ENA_MASK, ==,
+ I40E_QINT_RQCTL_CAUSE_ENA_MASK);
+ reg &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK;
+ I40E_WRITE_REG(hw, I40E_QINT_RQCTL(queue), reg);
+}
+
+/*
+ * Start up the various chip's interrupt handling. We not only configure the
+ * adminq here, but we also go through and configure all of the actual queues,
+ * the interrupt linked lists, and others.
+ */
+void
+i40e_intr_chip_init(i40e_t *i40e)
+{
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+ uint32_t reg;
+
+ /*
+ * Ensure that all non adminq interrupts are disabled at the chip level.
+ */
+ i40e_intr_io_disable_all(i40e);
+
+ I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, 0);
+ (void) I40E_READ_REG(hw, I40E_PFINT_ICR0);
+
+ /*
+ * Always enable all of the other-class interrupts to be on their own
+ * ITR. This only needs to be set on interrupt zero, which has its own
+ * special setting.
+ */
+ reg = I40E_ITR_INDEX_OTHER << I40E_PFINT_STAT_CTL0_OTHER_ITR_INDX_SHIFT;
+ I40E_WRITE_REG(hw, I40E_PFINT_STAT_CTL0, reg);
+
+ /*
+ * Enable interrupt types we expect to receive. At the moment, this
+ * is limited to the adminq; however, we'll want to review 11.2.2.9.22
+ * for more types here as we add support for detecting them, handling
+ * them, and resetting the device as appropriate.
+ */
+ reg = I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
+ I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, reg);
+
+ /*
+ * Always set the interrupt linked list to empty. We'll come back and
+ * change this if MSI-X are actually on the scene.
+ */
+ I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, I40E_QUEUE_TYPE_EOL);
+
+ i40e_intr_adminq_enable(i40e);
+
+ /*
+ * Set up all of the queues and map them to interrupts based on the bit
+ * assignments.
+ */
+ if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
+ i40e_intr_init_queue_msix(i40e);
+ } else {
+ i40e_intr_init_queue_shared(i40e);
+ }
+
+ /*
+ * Finally set all of the default ITRs for the interrupts. Note that the
+ * queues will have been set up above.
+ */
+ i40e_intr_set_itr(i40e, I40E_ITR_INDEX_RX, i40e->i40e_rx_itr);
+ i40e_intr_set_itr(i40e, I40E_ITR_INDEX_TX, i40e->i40e_tx_itr);
+ i40e_intr_set_itr(i40e, I40E_ITR_INDEX_OTHER, i40e->i40e_other_itr);
+}
+
+static void
+i40e_intr_adminq_work(i40e_t *i40e)
+{
+ struct i40e_hw *hw = &i40e->i40e_hw_space;
+ struct i40e_arq_event_info evt;
+ uint16_t remain = 1;
+
+ bzero(&evt, sizeof (struct i40e_arq_event_info));
+ evt.buf_len = I40E_ADMINQ_BUFSZ;
+ evt.msg_buf = i40e->i40e_aqbuf;
+
+ while (remain != 0) {
+ enum i40e_status_code ret;
+ uint16_t opcode;
+
+ /*
+ * At the moment, the only error code that seems to be returned
+ * is one saying that there's no work. In such a case we leave
+ * this be.
+ */
+ ret = i40e_clean_arq_element(hw, &evt, &remain);
+ if (ret != I40E_SUCCESS)
+ break;
+
+ opcode = LE_16(evt.desc.opcode);
+ switch (opcode) {
+ case i40e_aqc_opc_get_link_status:
+ mutex_enter(&i40e->i40e_general_lock);
+ i40e_link_check(i40e);
+ mutex_exit(&i40e->i40e_general_lock);
+ break;
+ default:
+ /*
+ * Longer term we'll want to enable other causes here
+ * and get these cleaned up and doing something.
+ */
+ break;
+ }
+ }
+}
+
+static void
+i40e_intr_rx_work(i40e_t *i40e, int queue)
+{
+ mblk_t *mp;
+ i40e_trqpair_t *itrq;
+
+ ASSERT(queue < i40e->i40e_num_trqpairs);
+ itrq = &i40e->i40e_trqpairs[queue];
+
+ mutex_enter(&itrq->itrq_rx_lock);
+ mp = i40e_ring_rx(itrq, I40E_POLL_NULL);
+ mutex_exit(&itrq->itrq_rx_lock);
+
+ if (mp != NULL) {
+ mac_rx_ring(i40e->i40e_mac_hdl, itrq->itrq_macrxring, mp,
+ itrq->itrq_rxgen);
+ }
+}
+
+static void
+i40e_intr_tx_work(i40e_t *i40e, int queue)
+{
+ i40e_trqpair_t *itrq;
+
+ itrq = &i40e->i40e_trqpairs[queue];
+ i40e_tx_recycle_ring(itrq);
+}
+
+/*
+ * At the moment, the only 'other' interrupt on ICR0 that we handle is the
+ * adminq. We should go through and support the other notifications at some
+ * point.
+ */
+static void
+i40e_intr_other_work(i40e_t *i40e)
+{
+ struct i40e_hw *hw = &i40e->i40e_hw_space;
+ uint32_t reg;
+
+ reg = I40E_READ_REG(hw, I40E_PFINT_ICR0);
+ if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) !=
+ DDI_FM_OK) {
+ ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED);
+ atomic_or_32(&i40e->i40e_state, I40E_ERROR);
+ return;
+ }
+
+ if (reg & I40E_PFINT_ICR0_ADMINQ_MASK)
+ i40e_intr_adminq_work(i40e);
+
+ /*
+ * Make sure that the adminq interrupt is not masked and then explicitly
+ * enable the adminq and thus the other interrupt.
+ */
+ reg = I40E_READ_REG(hw, I40E_PFINT_ICR0_ENA);
+ reg |= I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
+ I40E_WRITE_REG(hw, I40E_PFINT_ICR0_ENA, reg);
+
+ i40e_intr_adminq_enable(i40e);
+}
+
+uint_t
+i40e_intr_msix(void *arg1, void *arg2)
+{
+ i40e_t *i40e = (i40e_t *)arg1;
+ int vector_idx = (int)(uintptr_t)arg2;
+
+ /*
+ * When using MSI-X interrupts, vector 0 is always reserved for the
+ * adminq at this time. Though longer term, we'll want to also bridge
+ * some I/O to them.
+ */
+ if (vector_idx == 0) {
+ i40e_intr_other_work(i40e);
+ return (DDI_INTR_CLAIMED);
+ }
+
+ VERIFY(vector_idx == 1);
+
+ /*
+ * Note that we explicitly do not check this value under the lock even
+ * though assignments to it are done so. In this case, the cost of
+ * getting this wrong is at worst a bit of additional contention and
+ * even more rarely, a duplicated packet. However, the cost on the other
+ * hand is a lot more. This is something that as we more generally
+ * implement ring support we should revisit.
+ */
+ if (i40e->i40e_intr_poll != B_TRUE)
+ i40e_intr_rx_work(i40e, 0);
+ i40e_intr_tx_work(i40e, 0);
+ i40e_intr_io_enable(i40e, 1);
+
+ return (DDI_INTR_CLAIMED);
+}
+
+static uint_t
+i40e_intr_notx(i40e_t *i40e, boolean_t shared)
+{
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+ uint32_t reg;
+ int ret = DDI_INTR_CLAIMED;
+
+ if (shared == B_TRUE) {
+ mutex_enter(&i40e->i40e_general_lock);
+ if (i40e->i40e_state & I40E_SUSPENDED) {
+ mutex_exit(&i40e->i40e_general_lock);
+ return (DDI_INTR_UNCLAIMED);
+ }
+ mutex_exit(&i40e->i40e_general_lock);
+ }
+
+ reg = I40E_READ_REG(hw, I40E_PFINT_ICR0);
+ if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) !=
+ DDI_FM_OK) {
+ ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED);
+ atomic_or_32(&i40e->i40e_state, I40E_ERROR);
+ return (DDI_INTR_CLAIMED);
+ }
+
+ if (reg == 0) {
+ if (shared == B_TRUE)
+ ret = DDI_INTR_UNCLAIMED;
+ goto done;
+ }
+
+ if (reg & I40E_PFINT_ICR0_ADMINQ_MASK)
+ i40e_intr_adminq_work(i40e);
+
+ if (reg & I40E_INTR_NOTX_RX_MASK)
+ i40e_intr_rx_work(i40e, 0);
+
+ if (reg & I40E_INTR_NOTX_TX_MASK)
+ i40e_intr_tx_work(i40e, 0);
+
+done:
+ i40e_intr_adminq_enable(i40e);
+ return (ret);
+
+}
+
+/* ARGSUSED */
+uint_t
+i40e_intr_msi(void *arg1, void *arg2)
+{
+ i40e_t *i40e = (i40e_t *)arg1;
+
+ return (i40e_intr_notx(i40e, B_FALSE));
+}
+
+/* ARGSUSED */
+uint_t
+i40e_intr_legacy(void *arg1, void *arg2)
+{
+ i40e_t *i40e = (i40e_t *)arg1;
+
+ return (i40e_intr_notx(i40e, B_TRUE));
+}
diff --git a/usr/src/uts/common/io/i40e/i40e_main.c b/usr/src/uts/common/io/i40e/i40e_main.c
new file mode 100644
index 0000000000..91164abf87
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/i40e_main.c
@@ -0,0 +1,2883 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
+ */
+
+/*
+ * i40e - Intel 10/40 Gb Ethernet driver
+ *
+ * The i40e driver is the main software device driver for the Intel 40 Gb family
+ * of devices. Note that these devices come in many flavors with both 40 GbE
+ * ports and 10 GbE ports. This device is the successor to the 82599 family of
+ * devices (ixgbe).
+ *
+ * Unlike previous generations of Intel 1 GbE and 10 GbE devices, the 40 GbE
+ * devices defined in the XL710 controller (previously known as Fortville) are a
+ * rather different beast and have a small switch embedded inside of them. In
+ * addition, the way that most of the programming is done has been overhauled.
+ * As opposed to just using PCIe memory mapped registers, it also has an
+ * administrative queue which is used to communicate with firmware running on
+ * the chip.
+ *
+ * Each physical function in the hardware shows up as a device that this driver
+ * will bind to. The hardware splits many resources evenly across all of the
+ * physical functions present on the device, while other resources are instead
+ * shared across the entire card and its up to the device driver to
+ * intelligently partition them.
+ *
+ * ------------
+ * Organization
+ * ------------
+ *
+ * This driver is made up of several files which have their own theory
+ * statements spread across them. We'll touch on the high level purpose of each
+ * file here, and then we'll get into more discussion on how the device is
+ * generally modelled with respect to the interfaces in illumos.
+ *
+ * i40e_gld.c: This file contains all of the bindings to MAC and the networking
+ * stack.
+ *
+ * i40e_intr.c: This file contains all of the interrupt service routines and
+ * contains logic to enable and disable interrupts on the hardware.
+ * It also contains the logic to map hardware resources such as the
+ * rings to and from interrupts and controls their ability to fire.
+ *
+ * There is a big theory statement on interrupts present there.
+ *
+ * i40e_main.c: The file that you're currently in. It interfaces with the
+ * traditional OS DDI interfaces and is in charge of configuring
+ * the device.
+ *
+ * i40e_osdep.[ch]: These files contain interfaces and definitions needed to
+ * work with Intel's common code for the device.
+ *
+ * i40e_stats.c: This file contains the general work and logic around our
+ * kstats. A theory statement on their organization and use of the
+ * hardware exists there.
+ *
+ * i40e_sw.h: This header file contains all of the primary structure definitions
+ * and constants that are used across the entire driver.
+ *
+ * i40e_transceiver.c: This file contains all of the logic for sending and
+ * receiving data. It contains all of the ring and DMA
+ * allocation logic, as well as, the actual interfaces to
+ * send and receive data.
+ *
+ * A big theory statement on ring management, descriptors,
+ * and how it ties into the OS is present there.
+ *
+ * --------------
+ * General Design
+ * --------------
+ *
+ * Before we go too far into the general way we've laid out data structures and
+ * the like, it's worth taking some time to explain how the hardware is
+ * organized. This organization informs a lot of how we do things at this time
+ * in the driver.
+ *
+ * Each physical device consists of a number of one or more ports, which are
+ * considered physical functions in the PCI sense and thus each get enumerated
+ * by the system, resulting in an instance being created and attached to. While
+ * there are many resources that are unique to each physical function eg.
+ * instance of the device, there are many that are shared across all of them.
+ * Several resources have an amount reserved for each VSI and then a static pool
+ * of resources, available for all functions on the card.
+ *
+ * The most important resource in hardware are its transmit and receive queue
+ * pairs (i40e_trqpair_t). These should be thought of as rings in GLDv3
+ * parlance. There are a set number of these on each device; however, they are
+ * statically partitioned among all of the different physical functions.
+ *
+ * 'Fortville' (the code name for this device family) is basically a switch. To
+ * map MAC addresses and other things to queues, we end up having to create
+ * Virtual Station Interfaces (VSIs) and establish forwarding rules that direct
+ * traffic to a queue. A VSI owns a collection of queues and has a series of
+ * forwarding rules that point to it. One way to think of this is to treat it
+ * like MAC does a VNIC. When MAC refers to a group, a collection of rings and
+ * classification resources, that is a VSI in i40e.
+ *
+ * The sets of VSIs is shared across the entire device, though there may be some
+ * amount that are reserved to each PF. Because the GLDv3 does not let us change
+ * the number of groups dynamically, we instead statically divide this amount
+ * evenly between all the functions that exist. In addition, we have the same
+ * problem with the mac address forwarding rules. There are a static number that
+ * exist shared across all the functions.
+ *
+ * To handle both of these resources, what we end up doing is going through and
+ * determining which functions belong to the same device. Nominally one might do
+ * this by having a nexus driver; however, a prime requirement for a nexus
+ * driver is identifying the various children and activating them. While it is
+ * possible to get this information from NVRAM, we would end up duplicating a
+ * lot of the PCI enumeration logic. Really, at the end of the day, the device
+ * doesn't give us the traditional identification properties we want from a
+ * nexus driver.
+ *
+ * Instead, we rely on some properties that are guaranteed to be unique. While
+ * it might be tempting to leverage the PBA or serial number of the device from
+ * NVRAM, there is nothing that says that two devices can't be mis-programmed to
+ * have the same values in NVRAM. Instead, we uniquely identify a group of
+ * functions based on their parent in the /devices tree, their PCI bus and PCI
+ * function identifiers. Using either on their own may not be sufficient.
+ *
+ * For each unique PCI device that we encounter, we'll create a i40e_device_t.
+ * From there, because we don't have a good way to tell the GLDv3 about sharing
+ * resources between everything, we'll end up just dividing the resources
+ * evenly between all of the functions. Longer term, if we don't have to declare
+ * to the GLDv3 that these resources are shared, then we'll maintain a pool and
+ * hae each PF allocate from the pool in the device, thus if only two of four
+ * ports are being used, for example, then all of the resources can still be
+ * used.
+ *
+ * -------------------------------------------
+ * Transmit and Receive Queue Pair Allocations
+ * -------------------------------------------
+ *
+ * NVRAM ends up assigning each PF its own share of the transmit and receive LAN
+ * queue pairs, we have no way of modifying it, only observing it. From there,
+ * it's up to us to map these queues to VSIs and VFs. Since we don't support any
+ * VFs at this time, we only focus on assignments to VSIs.
+ *
+ * At the moment, we used a static mapping of transmit/receive queue pairs to a
+ * given VSI (eg. rings to a group). Though in the fullness of time, we want to
+ * make this something which is fully dynamic and take advantage of documented,
+ * but not yet available functionality for adding filters based on VXLAN and
+ * other encapsulation technologies.
+ *
+ * -------------------------------------
+ * Broadcast, Multicast, and Promiscuous
+ * -------------------------------------
+ *
+ * As part of the GLDv3, we need to make sure that we can handle receiving
+ * broadcast and multicast traffic. As well as enabling promiscuous mode when
+ * requested. GLDv3 requires that all broadcast and multicast traffic be
+ * retrieved by the default group, eg. the first one. This is the same thing as
+ * the default VSI.
+ *
+ * To receieve broadcast traffic, we enable it through the admin queue, rather
+ * than use one of our filters for it. For multicast traffic, we reserve a
+ * certain number of the hash filters and assign them to a given PF. When we
+ * exceed those, we then switch to using promicuous mode for multicast traffic.
+ *
+ * More specifically, once we exceed the number of filters (indicated because
+ * the i40e_t`i40e_resources.ifr_nmcastfilt ==
+ * i40e_t`i40e_resources.ifr_nmcastfilt_used), we then instead need to toggle
+ * promiscuous mode. If promiscuous mode is toggled then we keep track of the
+ * number of MACs added to it by incrementing i40e_t`i40e_mcast_promisc_count.
+ * That will stay enabled until that count reaches zero indicating that we have
+ * only added multicast addresses that we have a corresponding entry for.
+ *
+ * Because MAC itself wants to toggle promiscuous mode, which includes both
+ * unicast and multicast traffic, we go through and keep track of that
+ * ourselves. That is maintained through the use of the i40e_t`i40e_promisc_on
+ * member.
+ *
+ * --------------
+ * VSI Management
+ * --------------
+ *
+ * At this time, we currently only support a single MAC group, and thus a single
+ * VSI. This VSI is considered the default VSI and should be the only one that
+ * exists after a reset. Currently it is stored as the member
+ * i40e_t`i40e_vsi_id. While this works for the moment and for an initial
+ * driver, it's not sufficient for the longer-term path of the driver. Instead,
+ * we'll want to actually have a unique i40e_vsi_t structure which is used
+ * everywhere. Note that this means that every place that uses the
+ * i40e_t`i40e_vsi_id will need to be refactored.
+ *
+ * ----------------
+ * Structure Layout
+ * ----------------
+ *
+ * The following images relates the core data structures together. The primary
+ * structure in the system is the i40e_t. It itself contains multiple rings,
+ * i40e_trqpair_t's which contain the various transmit and receive data. The
+ * receive data is stored outside of the i40e_trqpair_t and instead in the
+ * i40e_rx_data_t. The i40e_t has a corresponding i40e_device_t which keeps
+ * track of per-physical device state. Finally, for every active descriptor,
+ * there is a corresponding control block, which is where the
+ * i40e_rx_control_block_t and the i40e_tx_control_block_t come from.
+ *
+ * +-----------------------+ +-----------------------+
+ * | Global i40e_t list | | Global Device list |
+ * | | +--| |
+ * | i40e_glist | | | i40e_dlist |
+ * +-----------------------+ | +-----------------------+
+ * | v
+ * | +------------------------+ +-----------------------+
+ * | | Device-wide Structure |----->| Device-wide Structure |--> ...
+ * | | i40e_device_t | | i40e_device_t |
+ * | | | +-----------------------+
+ * | | dev_info_t * ------+--> Parent in devices tree.
+ * | | uint_t ------+--> PCI bus number
+ * | | uint_t ------+--> PCI device number
+ * | | uint_t ------+--> Number of functions
+ * | | i40e_switch_rsrcs_t ---+--> Captured total switch resources
+ * | | list_t ------+-------------+
+ * | +------------------------+ |
+ * | ^ |
+ * | +--------+ |
+ * | | v
+ * | +---------------------------+ | +-------------------+
+ * +->| GLDv3 Device, per PF |-----|-->| GLDv3 Device (PF) |--> ...
+ * | i40e_t | | | i40e_t |
+ * | **Primary Structure** | | +-------------------+
+ * | | |
+ * | i40e_device_t * --+-----+
+ * | i40e_state_t --+---> Device State
+ * | i40e_hw_t --+---> Intel common code structure
+ * | mac_handle_t --+---> GLDv3 handle to MAC
+ * | ddi_periodic_t --+---> Link activity timer
+ * | int (vsi_id) --+---> VSI ID, main identifier
+ * | i40e_func_rsrc_t --+---> Available hardware resources
+ * | i40e_switch_rsrc_t * --+---> Switch resource snapshot
+ * | i40e_sdu --+---> Current MTU
+ * | i40e_frame_max --+---> Current HW frame size
+ * | i40e_uaddr_t * --+---> Array of assigned unicast MACs
+ * | i40e_maddr_t * --+---> Array of assigned multicast MACs
+ * | i40e_mcast_promisccount --+---> Active multicast state
+ * | i40e_promisc_on --+---> Current promiscuous mode state
+ * | int --+---> Number of transmit/receive pairs
+ * | kstat_t * --+---> PF kstats
+ * | kstat_t * --+---> VSI kstats
+ * | i40e_pf_stats_t --+---> PF kstat backing data
+ * | i40e_vsi_stats_t --+---> VSI kstat backing data
+ * | i40e_trqpair_t * --+---------+
+ * +---------------------------+ |
+ * |
+ * v
+ * +-------------------------------+ +-----------------------------+
+ * | Transmit/Receive Queue Pair |-------| Transmit/Receive Queue Pair |->...
+ * | i40e_trqpair_t | | i40e_trqpair_t |
+ * + Ring Data Structure | +-----------------------------+
+ * | |
+ * | mac_ring_handle_t +--> MAC RX ring handle
+ * | mac_ring_handle_t +--> MAC TX ring handle
+ * | i40e_rxq_stat_t --+--> RX Queue stats
+ * | i40e_txq_stat_t --+--> TX Queue stats
+ * | uint32_t (tx ring size) +--> TX Ring Size
+ * | uint32_t (tx free list size) +--> TX Free List Size
+ * | i40e_dma_buffer_t --------+--> TX Descriptor ring DMA
+ * | i40e_tx_desc_t * --------+--> TX descriptor ring
+ * | volatile unt32_t * +--> TX Write back head
+ * | uint32_t -------+--> TX ring head
+ * | uint32_t -------+--> TX ring tail
+ * | uint32_t -------+--> Num TX desc free
+ * | i40e_tx_control_block_t * --+--> TX control block array ---+
+ * | i40e_tx_control_block_t ** --+--> TCB work list ----+
+ * | i40e_tx_control_block_t ** --+--> TCB free list ---+
+ * | uint32_t -------+--> Free TCB count |
+ * | i40e_rx_data_t * -------+--+ v
+ * +-------------------------------+ | +---------------------------+
+ * | | Per-TX Frame Metadata |
+ * | | i40e_tx_control_block_t |
+ * +--------------------+ | |
+ * | mblk to transmit <--+--- mblk_t * |
+ * | type of transmit <--+--- i40e_tx_type_t |
+ * | TX DMA handle <--+--- ddi_dma_handle_t |
+ * v TX DMA buffer <--+--- i40e_dma_buffer_t |
+ * +------------------------------+ +---------------------------+
+ * | Core Receive Data |
+ * | i40e_rx_data_t |
+ * | |
+ * | i40e_dma_buffer_t --+--> RX descriptor DMA Data
+ * | i40e_rx_desc_t --+--> RX descriptor ring
+ * | uint32_t --+--> Next free desc.
+ * | i40e_rx_control_block_t * --+--> RX Control Block Array ---+
+ * | i40e_rx_control_block_t ** --+--> RCB work list ---+
+ * | i40e_rx_control_block_t ** --+--> RCB free list ---+
+ * +------------------------------+ |
+ * ^ |
+ * | +---------------------------+ |
+ * | | Per-RX Frame Metadata |<---------------+
+ * | | i40e_rx_control_block_t |
+ * | | |
+ * | | mblk_t * ----+--> Received mblk_t data
+ * | | uint32_t ----+--> Reference count
+ * | | i40e_dma_buffer_t ----+--> Receive data DMA info
+ * | | frtn_t ----+--> mblk free function info
+ * +-----+-- i40e_rx_data_t * |
+ * +---------------------------+
+ *
+ * -------------
+ * Lock Ordering
+ * -------------
+ *
+ * In order to ensure that we don't deadlock, the following represents the
+ * lock oder being used. When grabbing locks, follow the following order. Lower
+ * numbers are more important. Thus, the i40e_glock which is number 0, must be
+ * taken before any other locks in the driver. On the other hand, the
+ * i40e_t`i40e_stat_lock, has the highest number because it's the least
+ * important lock. Note, that just because one lock is higher than another does
+ * not mean that all intermediary locks are required.
+ *
+ * 0) i40e_glock
+ * 1) i40e_t`i40e_general_lock
+ *
+ * 2) i40e_trqpair_t`itrq_rx_lock
+ * 3) i40e_trqpair_t`itrq_tx_lock
+ * 4) i40e_t`i40e_rx_pending_lock
+ * 5) i40e_trqpair_t`itrq_tcb_lock
+ *
+ * 6) i40e_t`i40e_stat_lock
+ *
+ * Rules and expectations:
+ *
+ * 1) A thread holding locks belong to one PF should not hold locks belonging to
+ * a second. If for some reason this becomes necessary, locks should be grabbed
+ * based on the list order in the i40e_device_t, which implies that the
+ * i40e_glock is held.
+ *
+ * 2) When grabbing locks between multiple transmit and receive queues, the
+ * locks for the lowest number transmit/receive queue should be grabbed first.
+ *
+ * 3) When grabbing both the transmit and receive lock for a given queue, always
+ * grab i40e_trqpair_t`itrq_rx_lock before the i40e_trqpair_t`itrq_tx_lock.
+ *
+ * 4) The following pairs of locks are not expected to be held at the same time:
+ *
+ * o i40e_t`i40e_rx_pending_lock and i40e_trqpair_t`itrq_tcb_lock
+ *
+ * -----------
+ * Future Work
+ * -----------
+ *
+ * At the moment the i40e_t driver is rather bare bones, allowing us to start
+ * getting data flowing and folks using it while we develop additional features.
+ * While bugs have been filed to cover this future work, the following gives an
+ * overview of expected work:
+ *
+ * o TSO support
+ * o RSS / multiple ring support
+ * o Multiple group support
+ * o DMA binding and breaking up the locking in ring recycling.
+ * o Enhanced detection of device errors
+ * o Participation in IRM
+ * o FMA device reset
+ * o Stall detection, temperature error detection, etc.
+ * o More dynamic resource pools
+ */
+
+#include "i40e_sw.h"
+
+static char i40e_ident[] = "Intel 10/40Gb Ethernet v1.0.0";
+
+/*
+ * The i40e_glock primarily protects the lists below and the i40e_device_t
+ * structures.
+ */
+static kmutex_t i40e_glock;
+static list_t i40e_glist;
+static list_t i40e_dlist;
+
+/*
+ * Access attributes for register mapping.
+ */
+static ddi_device_acc_attr_t i40e_regs_acc_attr = {
+ DDI_DEVICE_ATTR_V1,
+ DDI_STRUCTURE_LE_ACC,
+ DDI_STRICTORDER_ACC,
+ DDI_FLAGERR_ACC
+};
+
+/*
+ * Logging function for this driver.
+ */
+static void
+i40e_dev_err(i40e_t *i40e, int level, boolean_t console, const char *fmt,
+ va_list ap)
+{
+ char buf[1024];
+
+ (void) vsnprintf(buf, sizeof (buf), fmt, ap);
+
+ if (i40e == NULL) {
+ cmn_err(level, (console) ? "%s: %s" : "!%s: %s",
+ I40E_MODULE_NAME, buf);
+ } else {
+ dev_err(i40e->i40e_dip, level, (console) ? "%s" : "!%s",
+ buf);
+ }
+}
+
+/*
+ * Because there's the stupid trailing-comma problem with the C preprocessor
+ * and variable arguments, I need to instantiate these. Pardon the redundant
+ * code.
+ */
+void
+i40e_error(i40e_t *i40e, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ i40e_dev_err(i40e, CE_WARN, B_FALSE, fmt, ap);
+ va_end(ap);
+}
+
+void
+i40e_log(i40e_t *i40e, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ i40e_dev_err(i40e, CE_NOTE, B_FALSE, fmt, ap);
+ va_end(ap);
+}
+
+void
+i40e_notice(i40e_t *i40e, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ i40e_dev_err(i40e, CE_NOTE, B_TRUE, fmt, ap);
+ va_end(ap);
+}
+
+static void
+i40e_device_rele(i40e_t *i40e)
+{
+ i40e_device_t *idp = i40e->i40e_device;
+
+ if (idp == NULL)
+ return;
+
+ mutex_enter(&i40e_glock);
+ VERIFY(idp->id_nreg > 0);
+ list_remove(&idp->id_i40e_list, i40e);
+ idp->id_nreg--;
+ if (idp->id_nreg == 0) {
+ list_remove(&i40e_dlist, idp);
+ list_destroy(&idp->id_i40e_list);
+ kmem_free(idp->id_rsrcs, sizeof (i40e_switch_rsrc_t) *
+ idp->id_rsrcs_alloc);
+ kmem_free(idp, sizeof (i40e_device_t));
+ }
+ i40e->i40e_device = NULL;
+ mutex_exit(&i40e_glock);
+}
+
+static i40e_device_t *
+i40e_device_find(i40e_t *i40e, dev_info_t *parent, uint_t bus, uint_t device)
+{
+ i40e_device_t *idp;
+ mutex_enter(&i40e_glock);
+ for (idp = list_head(&i40e_dlist); idp != NULL;
+ idp = list_next(&i40e_dlist, idp)) {
+ if (idp->id_parent == parent && idp->id_pci_bus == bus &&
+ idp->id_pci_device == device) {
+ break;
+ }
+ }
+
+ if (idp != NULL) {
+ VERIFY(idp->id_nreg < idp->id_nfuncs);
+ idp->id_nreg++;
+ } else {
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+ ASSERT(hw->num_ports > 0);
+ ASSERT(hw->num_partitions > 0);
+
+ /*
+ * The Intel common code doesn't exactly keep the number of PCI
+ * functions. But it calculates it during discovery of
+ * partitions and ports. So what we do is undo the calculation
+ * that it does originally, as functions are evenly spread
+ * across ports in the rare case of partitions.
+ */
+ idp = kmem_alloc(sizeof (i40e_device_t), KM_SLEEP);
+ idp->id_parent = parent;
+ idp->id_pci_bus = bus;
+ idp->id_pci_device = device;
+ idp->id_nfuncs = hw->num_ports * hw->num_partitions;
+ idp->id_nreg = 1;
+ idp->id_rsrcs_alloc = i40e->i40e_switch_rsrc_alloc;
+ idp->id_rsrcs_act = i40e->i40e_switch_rsrc_actual;
+ idp->id_rsrcs = kmem_alloc(sizeof (i40e_switch_rsrc_t) *
+ idp->id_rsrcs_alloc, KM_SLEEP);
+ bcopy(i40e->i40e_switch_rsrcs, idp->id_rsrcs,
+ sizeof (i40e_switch_rsrc_t) * idp->id_rsrcs_alloc);
+ list_create(&idp->id_i40e_list, sizeof (i40e_t),
+ offsetof(i40e_t, i40e_dlink));
+
+ list_insert_tail(&i40e_dlist, idp);
+ }
+
+ list_insert_tail(&idp->id_i40e_list, i40e);
+ mutex_exit(&i40e_glock);
+
+ return (idp);
+}
+
+static void
+i40e_link_state_set(i40e_t *i40e, link_state_t state)
+{
+ if (i40e->i40e_link_state == state)
+ return;
+
+ i40e->i40e_link_state = state;
+ mac_link_update(i40e->i40e_mac_hdl, i40e->i40e_link_state);
+}
+
+/*
+ * This is a basic link check routine. Mostly we're using this just to see
+ * if we can get any accurate information about the state of the link being
+ * up or down, as well as updating the link state, speed, etc. information.
+ */
+void
+i40e_link_check(i40e_t *i40e)
+{
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+ boolean_t ls;
+ int ret;
+
+ ASSERT(MUTEX_HELD(&i40e->i40e_general_lock));
+
+ hw->phy.get_link_info = B_TRUE;
+ if ((ret = i40e_get_link_status(hw, &ls)) != I40E_SUCCESS) {
+ i40e->i40e_s_link_status_errs++;
+ i40e->i40e_s_link_status_lasterr = ret;
+ return;
+ }
+
+ /*
+ * Firmware abstracts all of the mac and phy information for us, so we
+ * can use i40e_get_link_status to determine the current state.
+ */
+ if (ls == B_TRUE) {
+ enum i40e_aq_link_speed speed;
+
+ speed = i40e_get_link_speed(hw);
+
+ /*
+ * Translate from an i40e value to a value in Mbits/s.
+ */
+ switch (speed) {
+ case I40E_LINK_SPEED_100MB:
+ i40e->i40e_link_speed = 100;
+ break;
+ case I40E_LINK_SPEED_1GB:
+ i40e->i40e_link_speed = 1000;
+ break;
+ case I40E_LINK_SPEED_10GB:
+ i40e->i40e_link_speed = 10000;
+ break;
+ case I40E_LINK_SPEED_20GB:
+ i40e->i40e_link_speed = 20000;
+ break;
+ case I40E_LINK_SPEED_40GB:
+ i40e->i40e_link_speed = 40000;
+ break;
+ default:
+ i40e->i40e_link_speed = 0;
+ break;
+ }
+
+ /*
+ * At this time, hardware does not support half-duplex
+ * operation, hence why we don't ask the hardware about our
+ * current speed.
+ */
+ i40e->i40e_link_duplex = LINK_DUPLEX_FULL;
+ i40e_link_state_set(i40e, LINK_STATE_UP);
+ } else {
+ i40e->i40e_link_speed = 0;
+ i40e->i40e_link_duplex = 0;
+ i40e_link_state_set(i40e, LINK_STATE_DOWN);
+ }
+}
+
+static void
+i40e_rem_intrs(i40e_t *i40e)
+{
+ int i, rc;
+
+ for (i = 0; i < i40e->i40e_intr_count; i++) {
+ rc = ddi_intr_free(i40e->i40e_intr_handles[i]);
+ if (rc != DDI_SUCCESS) {
+ i40e_log(i40e, "failed to free interrupt %d: %d",
+ i, rc);
+ }
+ }
+
+ kmem_free(i40e->i40e_intr_handles, i40e->i40e_intr_size);
+ i40e->i40e_intr_handles = NULL;
+}
+
+static void
+i40e_rem_intr_handlers(i40e_t *i40e)
+{
+ int i, rc;
+
+ for (i = 0; i < i40e->i40e_intr_count; i++) {
+ rc = ddi_intr_remove_handler(i40e->i40e_intr_handles[i]);
+ if (rc != DDI_SUCCESS) {
+ i40e_log(i40e, "failed to remove interrupt %d: %d",
+ i, rc);
+ }
+ }
+}
+
+/*
+ * illumos Fault Management Architecture (FMA) support.
+ */
+
+int
+i40e_check_acc_handle(ddi_acc_handle_t handle)
+{
+ ddi_fm_error_t de;
+
+ ddi_fm_acc_err_get(handle, &de, DDI_FME_VERSION);
+ ddi_fm_acc_err_clear(handle, DDI_FME_VERSION);
+ return (de.fme_status);
+}
+
+int
+i40e_check_dma_handle(ddi_dma_handle_t handle)
+{
+ ddi_fm_error_t de;
+
+ ddi_fm_dma_err_get(handle, &de, DDI_FME_VERSION);
+ return (de.fme_status);
+}
+
+/*
+ * Fault service error handling callback function.
+ */
+/* ARGSUSED */
+static int
+i40e_fm_error_cb(dev_info_t *dip, ddi_fm_error_t *err, const void *impl_data)
+{
+ pci_ereport_post(dip, err, NULL);
+ return (err->fme_status);
+}
+
+static void
+i40e_fm_init(i40e_t *i40e)
+{
+ ddi_iblock_cookie_t iblk;
+
+ i40e->i40e_fm_capabilities = ddi_prop_get_int(DDI_DEV_T_ANY,
+ i40e->i40e_dip, DDI_PROP_DONTPASS, "fm_capable",
+ DDI_FM_EREPORT_CAPABLE | DDI_FM_ACCCHK_CAPABLE |
+ DDI_FM_DMACHK_CAPABLE | DDI_FM_ERRCB_CAPABLE);
+
+ if (i40e->i40e_fm_capabilities < 0) {
+ i40e->i40e_fm_capabilities = 0;
+ } else if (i40e->i40e_fm_capabilities > 0xf) {
+ i40e->i40e_fm_capabilities = DDI_FM_EREPORT_CAPABLE |
+ DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE |
+ DDI_FM_ERRCB_CAPABLE;
+ }
+
+ /*
+ * Only register with IO Fault Services if we have some capability
+ */
+ if (i40e->i40e_fm_capabilities & DDI_FM_ACCCHK_CAPABLE) {
+ i40e_regs_acc_attr.devacc_attr_access = DDI_FLAGERR_ACC;
+ } else {
+ i40e_regs_acc_attr.devacc_attr_access = DDI_DEFAULT_ACC;
+ }
+
+ if (i40e->i40e_fm_capabilities) {
+ ddi_fm_init(i40e->i40e_dip, &i40e->i40e_fm_capabilities, &iblk);
+
+ if (DDI_FM_EREPORT_CAP(i40e->i40e_fm_capabilities) ||
+ DDI_FM_ERRCB_CAP(i40e->i40e_fm_capabilities)) {
+ pci_ereport_setup(i40e->i40e_dip);
+ }
+
+ if (DDI_FM_ERRCB_CAP(i40e->i40e_fm_capabilities)) {
+ ddi_fm_handler_register(i40e->i40e_dip,
+ i40e_fm_error_cb, (void*)i40e);
+ }
+ }
+
+ if (i40e->i40e_fm_capabilities & DDI_FM_DMACHK_CAPABLE) {
+ i40e_init_dma_attrs(i40e, B_TRUE);
+ } else {
+ i40e_init_dma_attrs(i40e, B_FALSE);
+ }
+}
+
+static void
+i40e_fm_fini(i40e_t *i40e)
+{
+ if (i40e->i40e_fm_capabilities) {
+
+ if (DDI_FM_EREPORT_CAP(i40e->i40e_fm_capabilities) ||
+ DDI_FM_ERRCB_CAP(i40e->i40e_fm_capabilities))
+ pci_ereport_teardown(i40e->i40e_dip);
+
+ if (DDI_FM_ERRCB_CAP(i40e->i40e_fm_capabilities))
+ ddi_fm_handler_unregister(i40e->i40e_dip);
+
+ ddi_fm_fini(i40e->i40e_dip);
+ }
+}
+
+void
+i40e_fm_ereport(i40e_t *i40e, char *detail)
+{
+ uint64_t ena;
+ char buf[FM_MAX_CLASS];
+
+ (void) snprintf(buf, FM_MAX_CLASS, "%s.%s", DDI_FM_DEVICE, detail);
+ ena = fm_ena_generate(0, FM_ENA_FMT1);
+ if (DDI_FM_EREPORT_CAP(i40e->i40e_fm_capabilities)) {
+ ddi_fm_ereport_post(i40e->i40e_dip, buf, ena, DDI_NOSLEEP,
+ FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0, NULL);
+ }
+}
+
+/*
+ * Here we're trying to get the ID of the default VSI. In general, when we come
+ * through and look at this shortly after attach, we expect there to only be a
+ * single element present, which is the default VSI. Importantly, each PF seems
+ * to not see any other devices, in part because of the simple switch mode that
+ * we're using. If for some reason, we see more artifact, we'll need to revisit
+ * what we're doing here.
+ */
+static int
+i40e_get_vsi_id(i40e_t *i40e)
+{
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+ struct i40e_aqc_get_switch_config_resp *sw_config;
+ uint8_t aq_buf[I40E_AQ_LARGE_BUF];
+ uint16_t next = 0;
+ int rc;
+
+ /* LINTED: E_BAD_PTR_CAST_ALIGN */
+ sw_config = (struct i40e_aqc_get_switch_config_resp *)aq_buf;
+ rc = i40e_aq_get_switch_config(hw, sw_config, sizeof (aq_buf), &next,
+ NULL);
+ if (rc != I40E_SUCCESS) {
+ i40e_error(i40e, "i40e_aq_get_switch_config() failed %d: %d\n",
+ rc, hw->aq.asq_last_status);
+ return (-1);
+ }
+
+ if (LE_16(sw_config->header.num_reported) != 1) {
+ i40e_error(i40e, "encountered multiple (%d) switching units "
+ "during attach, not proceeding",
+ LE_16(sw_config->header.num_reported));
+ return (-1);
+ }
+
+ return (sw_config->element[0].seid);
+}
+
+/*
+ * We need to fill the i40e_hw_t structure with the capabilities of this PF. We
+ * must also provide the memory for it; however, we don't need to keep it around
+ * to the call to the common code. It takes it and parses it into an internal
+ * structure.
+ */
+static boolean_t
+i40e_get_hw_capabilities(i40e_t *i40e, i40e_hw_t *hw)
+{
+ struct i40e_aqc_list_capabilities_element_resp *buf;
+ int rc;
+ size_t len;
+ uint16_t needed;
+ int nelems = I40E_HW_CAP_DEFAULT;
+
+ for (;;) {
+ len = nelems * sizeof (*buf);
+ ASSERT(len > 0);
+ buf = kmem_alloc(len, KM_SLEEP);
+ rc = i40e_aq_discover_capabilities(hw, buf, len,
+ &needed, i40e_aqc_opc_list_func_capabilities, NULL);
+ kmem_free(buf, len);
+
+ if (hw->aq.asq_last_status == I40E_AQ_RC_ENOMEM &&
+ nelems == I40E_HW_CAP_DEFAULT) {
+ if (nelems == needed) {
+ i40e_error(i40e, "Capability discovery failed "
+ "due to byzantine common code");
+ return (B_FALSE);
+ }
+ nelems = needed;
+ continue;
+ } else if (hw->aq.asq_last_status != I40E_AQ_RC_OK) {
+ i40e_error(i40e, "Capability discovery failed: %d", rc);
+ return (B_FALSE);
+ }
+
+ break;
+ }
+
+ return (B_TRUE);
+}
+
+/*
+ * Obtain the switch's capabilities as seen by this PF and keep it around for
+ * our later use.
+ */
+static boolean_t
+i40e_get_switch_resources(i40e_t *i40e)
+{
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+ uint8_t cnt = 2;
+ uint8_t act;
+ size_t size;
+ i40e_switch_rsrc_t *buf;
+
+ for (;;) {
+ enum i40e_status_code ret;
+ size = cnt * sizeof (i40e_switch_rsrc_t);
+ ASSERT(size > 0);
+ if (size > UINT16_MAX)
+ return (B_FALSE);
+ buf = kmem_alloc(size, KM_SLEEP);
+
+ ret = i40e_aq_get_switch_resource_alloc(hw, &act, buf,
+ cnt, NULL);
+ if (ret == I40E_ERR_ADMIN_QUEUE_ERROR &&
+ hw->aq.asq_last_status == I40E_AQ_RC_EINVAL) {
+ kmem_free(buf, size);
+ cnt += I40E_SWITCH_CAP_DEFAULT;
+ continue;
+ } else if (ret != I40E_SUCCESS) {
+ kmem_free(buf, size);
+ i40e_error(i40e,
+ "failed to retrieve switch statistics: %d\n", ret);
+ return (B_FALSE);
+ }
+
+ break;
+ }
+
+ i40e->i40e_switch_rsrc_alloc = cnt;
+ i40e->i40e_switch_rsrc_actual = act;
+ i40e->i40e_switch_rsrcs = buf;
+
+ return (B_TRUE);
+}
+
+static void
+i40e_cleanup_resources(i40e_t *i40e)
+{
+ if (i40e->i40e_uaddrs != NULL) {
+ kmem_free(i40e->i40e_uaddrs, sizeof (i40e_uaddr_t) *
+ i40e->i40e_resources.ifr_nmacfilt);
+ i40e->i40e_uaddrs = NULL;
+ }
+
+ if (i40e->i40e_maddrs != NULL) {
+ kmem_free(i40e->i40e_maddrs, sizeof (i40e_maddr_t) *
+ i40e->i40e_resources.ifr_nmcastfilt);
+ i40e->i40e_maddrs = NULL;
+ }
+
+ if (i40e->i40e_switch_rsrcs != NULL) {
+ size_t sz = sizeof (i40e_switch_rsrc_t) *
+ i40e->i40e_switch_rsrc_alloc;
+ ASSERT(sz > 0);
+ kmem_free(i40e->i40e_switch_rsrcs, sz);
+ i40e->i40e_switch_rsrcs = NULL;
+ }
+
+ if (i40e->i40e_device != NULL)
+ i40e_device_rele(i40e);
+}
+
+static boolean_t
+i40e_get_available_resources(i40e_t *i40e)
+{
+ dev_info_t *parent;
+ uint16_t bus, device, func;
+ uint_t nregs;
+ int *regs, i;
+ i40e_device_t *idp;
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+
+ parent = ddi_get_parent(i40e->i40e_dip);
+
+ if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, i40e->i40e_dip, 0, "reg",
+ &regs, &nregs) != DDI_PROP_SUCCESS) {
+ return (B_FALSE);
+ }
+
+ if (nregs < 1) {
+ ddi_prop_free(regs);
+ return (B_FALSE);
+ }
+
+ bus = PCI_REG_BUS_G(regs[0]);
+ device = PCI_REG_DEV_G(regs[0]);
+ func = PCI_REG_FUNC_G(regs[0]);
+ ddi_prop_free(regs);
+
+ i40e->i40e_hw_space.bus.func = func;
+ i40e->i40e_hw_space.bus.device = device;
+
+ if (i40e_get_switch_resources(i40e) == B_FALSE) {
+ return (B_FALSE);
+ }
+
+ /*
+ * To calculate the total amount of a resource we have available, we
+ * need to add how many our i40e_t thinks it has guaranteed, if any, and
+ * then we need to go through and divide the number of available on the
+ * device, which was snapshotted before anyone should have allocated
+ * anything, and use that to derive how many are available from the
+ * pool. Longer term, we may want to turn this into something that's
+ * more of a pool-like resource that everything can share (though that
+ * may require some more assistance from MAC).
+ *
+ * Though for transmit and receive queue pairs, we just have to ask
+ * firmware instead.
+ */
+ idp = i40e_device_find(i40e, parent, bus, device);
+ i40e->i40e_device = idp;
+ i40e->i40e_resources.ifr_nvsis = 0;
+ i40e->i40e_resources.ifr_nvsis_used = 0;
+ i40e->i40e_resources.ifr_nmacfilt = 0;
+ i40e->i40e_resources.ifr_nmacfilt_used = 0;
+ i40e->i40e_resources.ifr_nmcastfilt = 0;
+ i40e->i40e_resources.ifr_nmcastfilt_used = 0;
+
+ for (i = 0; i < i40e->i40e_switch_rsrc_actual; i++) {
+ i40e_switch_rsrc_t *srp = &i40e->i40e_switch_rsrcs[i];
+
+ switch (srp->resource_type) {
+ case I40E_AQ_RESOURCE_TYPE_VSI:
+ i40e->i40e_resources.ifr_nvsis +=
+ LE_16(srp->guaranteed);
+ i40e->i40e_resources.ifr_nvsis_used = LE_16(srp->used);
+ break;
+ case I40E_AQ_RESOURCE_TYPE_MACADDR:
+ i40e->i40e_resources.ifr_nmacfilt +=
+ LE_16(srp->guaranteed);
+ i40e->i40e_resources.ifr_nmacfilt_used =
+ LE_16(srp->used);
+ break;
+ case I40E_AQ_RESOURCE_TYPE_MULTICAST_HASH:
+ i40e->i40e_resources.ifr_nmcastfilt +=
+ LE_16(srp->guaranteed);
+ i40e->i40e_resources.ifr_nmcastfilt_used =
+ LE_16(srp->used);
+ break;
+ default:
+ break;
+ }
+ }
+
+ for (i = 0; i < idp->id_rsrcs_act; i++) {
+ i40e_switch_rsrc_t *srp = &i40e->i40e_switch_rsrcs[i];
+ switch (srp->resource_type) {
+ case I40E_AQ_RESOURCE_TYPE_VSI:
+ i40e->i40e_resources.ifr_nvsis +=
+ LE_16(srp->total_unalloced) / idp->id_nfuncs;
+ break;
+ case I40E_AQ_RESOURCE_TYPE_MACADDR:
+ i40e->i40e_resources.ifr_nmacfilt +=
+ LE_16(srp->total_unalloced) / idp->id_nfuncs;
+ break;
+ case I40E_AQ_RESOURCE_TYPE_MULTICAST_HASH:
+ i40e->i40e_resources.ifr_nmcastfilt +=
+ LE_16(srp->total_unalloced) / idp->id_nfuncs;
+ default:
+ break;
+ }
+ }
+
+ i40e->i40e_resources.ifr_nrx_queue = hw->func_caps.num_rx_qp;
+ i40e->i40e_resources.ifr_ntx_queue = hw->func_caps.num_tx_qp;
+
+ i40e->i40e_uaddrs = kmem_zalloc(sizeof (i40e_uaddr_t) *
+ i40e->i40e_resources.ifr_nmacfilt, KM_SLEEP);
+ i40e->i40e_maddrs = kmem_zalloc(sizeof (i40e_maddr_t) *
+ i40e->i40e_resources.ifr_nmcastfilt, KM_SLEEP);
+
+ /*
+ * Initialize these a multicast address to indicate it's invalid for
+ * sanity purposes. Think of it like 0xdeadbeef.
+ */
+ for (i = 0; i < i40e->i40e_resources.ifr_nmacfilt; i++)
+ i40e->i40e_uaddrs[i].iua_mac[0] = 0x01;
+
+ return (B_TRUE);
+}
+
+static boolean_t
+i40e_enable_interrupts(i40e_t *i40e)
+{
+ int i, rc;
+
+ if (i40e->i40e_intr_cap & DDI_INTR_FLAG_BLOCK) {
+ rc = ddi_intr_block_enable(i40e->i40e_intr_handles,
+ i40e->i40e_intr_count);
+ if (rc != DDI_SUCCESS) {
+ i40e_error(i40e, "Interrupt block-enable failed: %d",
+ rc);
+ return (B_FALSE);
+ }
+ } else {
+ for (i = 0; i < i40e->i40e_intr_count; i++) {
+ rc = ddi_intr_enable(i40e->i40e_intr_handles[i]);
+ if (rc != DDI_SUCCESS) {
+ i40e_error(i40e,
+ "Failed to enable interrupt %d: %d", i, rc);
+ while (--i >= 0) {
+ (void) ddi_intr_disable(
+ i40e->i40e_intr_handles[i]);
+ }
+ return (B_FALSE);
+ }
+ }
+ }
+
+ return (B_TRUE);
+}
+
+static boolean_t
+i40e_disable_interrupts(i40e_t *i40e)
+{
+ int i, rc;
+
+ if (i40e->i40e_intr_cap & DDI_INTR_FLAG_BLOCK) {
+ rc = ddi_intr_block_disable(i40e->i40e_intr_handles,
+ i40e->i40e_intr_count);
+ if (rc != DDI_SUCCESS) {
+ i40e_error(i40e,
+ "Interrupt block-disabled failed: %d", rc);
+ return (B_FALSE);
+ }
+ } else {
+ for (i = 0; i < i40e->i40e_intr_count; i++) {
+ rc = ddi_intr_disable(i40e->i40e_intr_handles[i]);
+ if (rc != DDI_SUCCESS) {
+ i40e_error(i40e,
+ "Failed to disable interrupt %d: %d",
+ i, rc);
+ return (B_FALSE);
+ }
+ }
+ }
+
+ return (B_TRUE);
+}
+
+/*
+ * Free receive & transmit rings.
+ */
+static void
+i40e_free_trqpairs(i40e_t *i40e)
+{
+ int i;
+ i40e_trqpair_t *itrq;
+
+ if (i40e->i40e_trqpairs != NULL) {
+ for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
+ itrq = &i40e->i40e_trqpairs[i];
+ mutex_destroy(&itrq->itrq_rx_lock);
+ mutex_destroy(&itrq->itrq_tx_lock);
+ mutex_destroy(&itrq->itrq_tcb_lock);
+
+ /*
+ * Should have already been cleaned up by start/stop,
+ * etc.
+ */
+ ASSERT(itrq->itrq_txkstat == NULL);
+ ASSERT(itrq->itrq_rxkstat == NULL);
+ }
+
+ kmem_free(i40e->i40e_trqpairs,
+ sizeof (i40e_trqpair_t) * i40e->i40e_num_trqpairs);
+ i40e->i40e_trqpairs = NULL;
+ }
+
+ cv_destroy(&i40e->i40e_rx_pending_cv);
+ mutex_destroy(&i40e->i40e_rx_pending_lock);
+ mutex_destroy(&i40e->i40e_general_lock);
+}
+
+/*
+ * Allocate transmit and receive rings, as well as other data structures that we
+ * need.
+ */
+static boolean_t
+i40e_alloc_trqpairs(i40e_t *i40e)
+{
+ int i;
+ void *mutexpri = DDI_INTR_PRI(i40e->i40e_intr_pri);
+
+ /*
+ * Now that we have the priority for the interrupts, initialize
+ * all relevant locks.
+ */
+ mutex_init(&i40e->i40e_general_lock, NULL, MUTEX_DRIVER, mutexpri);
+ mutex_init(&i40e->i40e_rx_pending_lock, NULL, MUTEX_DRIVER, mutexpri);
+ cv_init(&i40e->i40e_rx_pending_cv, NULL, CV_DRIVER, NULL);
+
+ i40e->i40e_trqpairs = kmem_zalloc(sizeof (i40e_trqpair_t) *
+ i40e->i40e_num_trqpairs, KM_SLEEP);
+ for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
+ i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[i];
+
+ itrq->itrq_i40e = i40e;
+ mutex_init(&itrq->itrq_rx_lock, NULL, MUTEX_DRIVER, mutexpri);
+ mutex_init(&itrq->itrq_tx_lock, NULL, MUTEX_DRIVER, mutexpri);
+ mutex_init(&itrq->itrq_tcb_lock, NULL, MUTEX_DRIVER, mutexpri);
+ itrq->itrq_index = i;
+ }
+
+ return (B_TRUE);
+}
+
+
+
+/*
+ * Unless a .conf file already overrode i40e_t structure values, they will
+ * be 0, and need to be set in conjunction with the now-available HW report.
+ *
+ * However, at the moment, we cap all of these resources as we only support a
+ * single receive ring and a single group.
+ */
+/* ARGSUSED */
+static void
+i40e_hw_to_instance(i40e_t *i40e, i40e_hw_t *hw)
+{
+ if (i40e->i40e_num_trqpairs == 0) {
+ i40e->i40e_num_trqpairs = I40E_TRQPAIR_MAX;
+ }
+
+ if (i40e->i40e_num_rx_groups == 0) {
+ i40e->i40e_num_rx_groups = I40E_GROUP_MAX;
+ }
+}
+
+/*
+ * Free any resources required by, or setup by, the Intel common code.
+ */
+static void
+i40e_common_code_fini(i40e_t *i40e)
+{
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+ int rc;
+
+ rc = i40e_shutdown_lan_hmc(hw);
+ if (rc != I40E_SUCCESS)
+ i40e_error(i40e, "failed to shutdown LAN hmc: %d", rc);
+
+ rc = i40e_shutdown_adminq(hw);
+ if (rc != I40E_SUCCESS)
+ i40e_error(i40e, "failed to shutdown admin queue: %d", rc);
+}
+
+/*
+ * Initialize and call Intel common-code routines, includes some setup
+ * the common code expects from the driver. Also prints on failure, so
+ * the caller doesn't have to.
+ */
+static boolean_t
+i40e_common_code_init(i40e_t *i40e, i40e_hw_t *hw)
+{
+ int rc;
+
+ i40e_clear_hw(hw);
+ rc = i40e_pf_reset(hw);
+ if (rc != 0) {
+ i40e_error(i40e, "failed to reset hardware: %d", rc);
+ i40e_fm_ereport(i40e, DDI_FM_DEVICE_NO_RESPONSE);
+ return (B_FALSE);
+ }
+
+ rc = i40e_init_shared_code(hw);
+ if (rc != 0) {
+ i40e_error(i40e, "failed to initialize i40e core: %d", rc);
+ return (B_FALSE);
+ }
+
+ hw->aq.num_arq_entries = I40E_DEF_ADMINQ_SIZE;
+ hw->aq.num_asq_entries = I40E_DEF_ADMINQ_SIZE;
+ hw->aq.arq_buf_size = I40E_ADMINQ_BUFSZ;
+ hw->aq.asq_buf_size = I40E_ADMINQ_BUFSZ;
+
+ rc = i40e_init_adminq(hw);
+ if (rc != 0) {
+ i40e_error(i40e, "failed to initialize firmware admin queue: "
+ "%d, potential firmware version mismatch", rc);
+ i40e_fm_ereport(i40e, DDI_FM_DEVICE_INVAL_STATE);
+ return (B_FALSE);
+ }
+
+ if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
+ hw->aq.api_min_ver > I40E_FW_API_VERSION_MINOR) {
+ i40e_notice(i40e, "The driver for the device detected a newer "
+ "version of the NVM image (%d.%d) than expected (%d.%d).\n"
+ "Please install the most recent version of the network "
+ "driver.\n", hw->aq.api_maj_ver, hw->aq.api_min_ver,
+ I40E_FW_API_VERSION_MAJOR, I40E_FW_API_VERSION_MINOR);
+ } else if (hw->aq.api_maj_ver < I40E_FW_API_VERSION_MAJOR ||
+ hw->aq.api_min_ver < (I40E_FW_API_VERSION_MINOR - 1)) {
+ i40e_notice(i40e, "The driver for the device detected an older"
+ " version of the NVM image (%d.%d) than expected (%d.%d)."
+ "\nPlease update the NVM image.\n",
+ hw->aq.api_maj_ver, hw->aq.api_min_ver,
+ I40E_FW_API_VERSION_MAJOR, I40E_FW_API_VERSION_MINOR - 1);
+ }
+
+ i40e_clear_pxe_mode(hw);
+
+ /*
+ * We need to call this so that the common code can discover
+ * capabilities of the hardware, which it uses throughout the rest.
+ */
+ if (!i40e_get_hw_capabilities(i40e, hw)) {
+ i40e_error(i40e, "failed to obtain hardware capabilities");
+ return (B_FALSE);
+ }
+
+ if (i40e_get_available_resources(i40e) == B_FALSE) {
+ i40e_error(i40e, "failed to obtain hardware resources");
+ return (B_FALSE);
+ }
+
+ i40e_hw_to_instance(i40e, hw);
+
+ rc = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp,
+ hw->func_caps.num_rx_qp, 0, 0);
+ if (rc != 0) {
+ i40e_error(i40e, "failed to initialize hardware memory cache: "
+ "%d\n", rc);
+ return (B_FALSE);
+ }
+
+ rc = i40e_configure_lan_hmc(hw, I40E_HMC_MODEL_DIRECT_ONLY);
+ if (rc != 0) {
+ i40e_error(i40e, "failed to configure hardware memory cache: "
+ "%d\n", rc);
+ return (B_FALSE);
+ }
+
+ (void) i40e_aq_stop_lldp(hw, TRUE, NULL);
+
+ rc = i40e_get_mac_addr(hw, hw->mac.addr);
+ if (rc != I40E_SUCCESS) {
+ i40e_error(i40e, "failed to retrieve hardware mac address: %d",
+ rc);
+ return (B_FALSE);
+ }
+
+ rc = i40e_validate_mac_addr(hw->mac.addr);
+ if (rc != 0) {
+ i40e_error(i40e, "failed to validate internal mac address: "
+ "%d\n", rc);
+ return (B_FALSE);
+ }
+ bcopy(hw->mac.addr, hw->mac.perm_addr, ETHERADDRL);
+ if ((rc = i40e_get_port_mac_addr(hw, hw->mac.port_addr)) !=
+ I40E_SUCCESS) {
+ i40e_error(i40e, "failed to retrieve port mac address: %d",
+ rc);
+ return (B_FALSE);
+ }
+
+ /*
+ * We need to obtain the Virtual Station ID (VSI) before we can
+ * perform other operations on the device.
+ */
+ i40e->i40e_vsi_id = i40e_get_vsi_id(i40e);
+ if (i40e->i40e_vsi_id == -1) {
+ i40e_error(i40e, "failed to obtain VSI ID");
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
+
+static void
+i40e_unconfigure(dev_info_t *devinfo, i40e_t *i40e)
+{
+ int rc;
+
+ if (i40e->i40e_attach_progress & I40E_ATTACH_ENABLE_INTR)
+ (void) i40e_disable_interrupts(i40e);
+
+ if ((i40e->i40e_attach_progress & I40E_ATTACH_LINK_TIMER) &&
+ i40e->i40e_periodic_id != 0) {
+ ddi_periodic_delete(i40e->i40e_periodic_id);
+ i40e->i40e_periodic_id = 0;
+ }
+
+ if (i40e->i40e_attach_progress & I40E_ATTACH_MAC) {
+ rc = mac_unregister(i40e->i40e_mac_hdl);
+ if (rc != 0) {
+ i40e_error(i40e, "failed to unregister from mac: %d",
+ rc);
+ }
+ }
+
+ if (i40e->i40e_attach_progress & I40E_ATTACH_STATS) {
+ i40e_stats_fini(i40e);
+ }
+
+ if (i40e->i40e_attach_progress & I40E_ATTACH_ADD_INTR)
+ i40e_rem_intr_handlers(i40e);
+
+ if (i40e->i40e_attach_progress & I40E_ATTACH_ALLOC_RINGSLOCKS)
+ i40e_free_trqpairs(i40e);
+
+ if (i40e->i40e_attach_progress & I40E_ATTACH_ALLOC_INTR)
+ i40e_rem_intrs(i40e);
+
+ if (i40e->i40e_attach_progress & I40E_ATTACH_COMMON_CODE)
+ i40e_common_code_fini(i40e);
+
+ i40e_cleanup_resources(i40e);
+
+ if (i40e->i40e_attach_progress & I40E_ATTACH_PROPS)
+ (void) ddi_prop_remove_all(devinfo);
+
+ if (i40e->i40e_attach_progress & I40E_ATTACH_REGS_MAP &&
+ i40e->i40e_osdep_space.ios_reg_handle != NULL) {
+ ddi_regs_map_free(&i40e->i40e_osdep_space.ios_reg_handle);
+ i40e->i40e_osdep_space.ios_reg_handle = NULL;
+ }
+
+ if ((i40e->i40e_attach_progress & I40E_ATTACH_PCI_CONFIG) &&
+ i40e->i40e_osdep_space.ios_cfg_handle != NULL) {
+ pci_config_teardown(&i40e->i40e_osdep_space.ios_cfg_handle);
+ i40e->i40e_osdep_space.ios_cfg_handle = NULL;
+ }
+
+ if (i40e->i40e_attach_progress & I40E_ATTACH_FM_INIT)
+ i40e_fm_fini(i40e);
+
+ kmem_free(i40e->i40e_aqbuf, I40E_ADMINQ_BUFSZ);
+ kmem_free(i40e, sizeof (i40e_t));
+
+ ddi_set_driver_private(devinfo, NULL);
+}
+
+static boolean_t
+i40e_final_init(i40e_t *i40e)
+{
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+ struct i40e_osdep *osdep = OS_DEP(hw);
+ uint8_t pbanum[I40E_PBANUM_STRLEN];
+ enum i40e_status_code irc;
+ char buf[I40E_DDI_PROP_LEN];
+
+ pbanum[0] = '\0';
+ irc = i40e_read_pba_string(hw, pbanum, sizeof (pbanum));
+ if (irc != I40E_SUCCESS) {
+ i40e_log(i40e, "failed to read PBA string: %d", irc);
+ } else {
+ (void) ddi_prop_update_string(DDI_DEV_T_NONE, i40e->i40e_dip,
+ "printed-board-assembly", (char *)pbanum);
+ }
+
+#ifdef DEBUG
+ ASSERT(snprintf(NULL, 0, "%d.%d", hw->aq.fw_maj_ver,
+ hw->aq.fw_min_ver) < sizeof (buf));
+ ASSERT(snprintf(NULL, 0, "%x", hw->aq.fw_build) < sizeof (buf));
+ ASSERT(snprintf(NULL, 0, "%d.%d", hw->aq.api_maj_ver,
+ hw->aq.api_min_ver) < sizeof (buf));
+#endif
+
+ (void) snprintf(buf, sizeof (buf), "%d.%d", hw->aq.fw_maj_ver,
+ hw->aq.fw_min_ver);
+ (void) ddi_prop_update_string(DDI_DEV_T_NONE, i40e->i40e_dip,
+ "firmware-version", buf);
+ (void) snprintf(buf, sizeof (buf), "%x", hw->aq.fw_build);
+ (void) ddi_prop_update_string(DDI_DEV_T_NONE, i40e->i40e_dip,
+ "firmware-build", buf);
+ (void) snprintf(buf, sizeof (buf), "%d.%d", hw->aq.api_maj_ver,
+ hw->aq.api_min_ver);
+ (void) ddi_prop_update_string(DDI_DEV_T_NONE, i40e->i40e_dip,
+ "api-version", buf);
+
+ if (!i40e_set_hw_bus_info(hw))
+ return (B_FALSE);
+
+ if (i40e_check_acc_handle(osdep->ios_reg_handle) != DDI_FM_OK) {
+ ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_LOST);
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
+
+static boolean_t
+i40e_identify_hardware(i40e_t *i40e)
+{
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+ struct i40e_osdep *osdep = &i40e->i40e_osdep_space;
+
+ hw->vendor_id = pci_config_get16(osdep->ios_cfg_handle, PCI_CONF_VENID);
+ hw->device_id = pci_config_get16(osdep->ios_cfg_handle, PCI_CONF_DEVID);
+ hw->revision_id = pci_config_get8(osdep->ios_cfg_handle,
+ PCI_CONF_REVID);
+ hw->subsystem_device_id =
+ pci_config_get16(osdep->ios_cfg_handle, PCI_CONF_SUBSYSID);
+ hw->subsystem_vendor_id =
+ pci_config_get16(osdep->ios_cfg_handle, PCI_CONF_SUBVENID);
+
+ /*
+ * Note that we set the hardware's bus information later on, in
+ * i40e_get_available_resources(). The common code doesn't seem to
+ * require that it be set in any ways, it seems to be mostly for
+ * book-keeping.
+ */
+
+ /* Call common code to set the MAC type for this adapter. */
+ if (i40e_set_mac_type(hw) != I40E_SUCCESS)
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+static boolean_t
+i40e_regs_map(i40e_t *i40e)
+{
+ dev_info_t *devinfo = i40e->i40e_dip;
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+ struct i40e_osdep *osdep = &i40e->i40e_osdep_space;
+ off_t memsize;
+ int ret;
+
+ if (ddi_dev_regsize(devinfo, I40E_ADAPTER_REGSET, &memsize) !=
+ DDI_SUCCESS) {
+ i40e_error(i40e, "Used invalid register set to map PCIe regs");
+ return (B_FALSE);
+ }
+
+ if ((ret = ddi_regs_map_setup(devinfo, I40E_ADAPTER_REGSET,
+ (caddr_t *)&hw->hw_addr, 0, memsize, &i40e_regs_acc_attr,
+ &osdep->ios_reg_handle)) != DDI_SUCCESS) {
+ i40e_error(i40e, "failed to map device registers: %d", ret);
+ return (B_FALSE);
+ }
+
+ osdep->ios_reg_size = memsize;
+ return (B_TRUE);
+}
+
+/*
+ * Update parameters required when a new MTU has been configured. Calculate the
+ * maximum frame size, as well as, size our DMA buffers which we size in
+ * increments of 1K.
+ */
+void
+i40e_update_mtu(i40e_t *i40e)
+{
+ uint32_t rx, tx;
+
+ i40e->i40e_frame_max = i40e->i40e_sdu +
+ sizeof (struct ether_vlan_header) + ETHERFCSL;
+
+ rx = i40e->i40e_frame_max + I40E_BUF_IPHDR_ALIGNMENT;
+ i40e->i40e_rx_buf_size = ((rx >> 10) +
+ ((rx & (((uint32_t)1 << 10) -1)) > 0 ? 1 : 0)) << 10;
+
+ tx = i40e->i40e_frame_max;
+ i40e->i40e_tx_buf_size = ((tx >> 10) +
+ ((tx & (((uint32_t)1 << 10) -1)) > 0 ? 1 : 0)) << 10;
+}
+
+static int
+i40e_get_prop(i40e_t *i40e, char *prop, int min, int max, int def)
+{
+ int val;
+
+ val = ddi_prop_get_int(DDI_DEV_T_ANY, i40e->i40e_dip, DDI_PROP_DONTPASS,
+ prop, def);
+ if (val > max)
+ val = max;
+ if (val < min)
+ val = min;
+ return (val);
+}
+
+static void
+i40e_init_properties(i40e_t *i40e)
+{
+ i40e->i40e_sdu = i40e_get_prop(i40e, "default_mtu",
+ I40E_MIN_MTU, I40E_MAX_MTU, I40E_DEF_MTU);
+
+ i40e->i40e_intr_force = i40e_get_prop(i40e, "intr_force",
+ I40E_INTR_NONE, I40E_INTR_LEGACY, I40E_INTR_NONE);
+
+ i40e->i40e_mr_enable = i40e_get_prop(i40e, "mr_enable",
+ B_FALSE, B_TRUE, B_TRUE);
+
+ i40e->i40e_tx_ring_size = i40e_get_prop(i40e, "tx_ring_size",
+ I40E_MIN_TX_RING_SIZE, I40E_MAX_TX_RING_SIZE,
+ I40E_DEF_TX_RING_SIZE);
+ if ((i40e->i40e_tx_ring_size % I40E_DESC_ALIGN) != 0) {
+ i40e->i40e_tx_ring_size = P2ROUNDUP(i40e->i40e_tx_ring_size,
+ I40E_DESC_ALIGN);
+ }
+
+ i40e->i40e_tx_block_thresh = i40e_get_prop(i40e, "tx_resched_threshold",
+ I40E_MIN_TX_BLOCK_THRESH,
+ i40e->i40e_tx_ring_size - I40E_TX_MAX_COOKIE,
+ I40E_DEF_TX_BLOCK_THRESH);
+
+ i40e->i40e_rx_ring_size = i40e_get_prop(i40e, "rx_ring_size",
+ I40E_MIN_RX_RING_SIZE, I40E_MAX_RX_RING_SIZE,
+ I40E_DEF_RX_RING_SIZE);
+ if ((i40e->i40e_rx_ring_size % I40E_DESC_ALIGN) != 0) {
+ i40e->i40e_rx_ring_size = P2ROUNDUP(i40e->i40e_rx_ring_size,
+ I40E_DESC_ALIGN);
+ }
+
+ i40e->i40e_rx_limit_per_intr = i40e_get_prop(i40e, "rx_limit_per_intr",
+ I40E_MIN_RX_LIMIT_PER_INTR, I40E_MAX_RX_LIMIT_PER_INTR,
+ I40E_DEF_RX_LIMIT_PER_INTR);
+
+ i40e->i40e_tx_hcksum_enable = i40e_get_prop(i40e, "tx_hcksum_enable",
+ B_FALSE, B_TRUE, B_TRUE);
+
+ i40e->i40e_rx_hcksum_enable = i40e_get_prop(i40e, "rx_hcksum_enable",
+ B_FALSE, B_TRUE, B_TRUE);
+
+ i40e->i40e_rx_dma_min = i40e_get_prop(i40e, "rx_dma_threshold",
+ I40E_MIN_RX_DMA_THRESH, I40E_MAX_RX_DMA_THRESH,
+ I40E_DEF_RX_DMA_THRESH);
+
+ i40e->i40e_tx_dma_min = i40e_get_prop(i40e, "tx_dma_threshold",
+ I40E_MIN_TX_DMA_THRESH, I40E_MAX_TX_DMA_THRESH,
+ I40E_DEF_TX_DMA_THRESH);
+
+ i40e->i40e_tx_itr = i40e_get_prop(i40e, "tx_intr_throttle",
+ I40E_MIN_ITR, I40E_MAX_ITR, I40E_DEF_TX_ITR);
+
+ i40e->i40e_rx_itr = i40e_get_prop(i40e, "rx_intr_throttle",
+ I40E_MIN_ITR, I40E_MAX_ITR, I40E_DEF_RX_ITR);
+
+ i40e->i40e_other_itr = i40e_get_prop(i40e, "other_intr_throttle",
+ I40E_MIN_ITR, I40E_MAX_ITR, I40E_DEF_OTHER_ITR);
+
+ if (!i40e->i40e_mr_enable) {
+ i40e->i40e_num_trqpairs = I40E_TRQPAIR_NOMSIX;
+ i40e->i40e_num_rx_groups = I40E_GROUP_NOMSIX;
+ }
+
+ i40e_update_mtu(i40e);
+}
+
+/*
+ * There are a few constraints on interrupts that we're currently imposing, some
+ * of which are restrictions from hardware. For a fuller treatment, see
+ * i40e_intr.c.
+ *
+ * Currently, to use MSI-X we require two interrupts be available though in
+ * theory we should participate in IRM and happily use more interrupts.
+ *
+ * Hardware only supports a single MSI being programmed and therefore if we
+ * don't have MSI-X interrupts available at this time, then we ratchet down the
+ * number of rings and groups available. Obviously, we only bother with a single
+ * fixed interrupt.
+ */
+static boolean_t
+i40e_alloc_intr_handles(i40e_t *i40e, dev_info_t *devinfo, int intr_type)
+{
+ int request, count, actual, rc, min;
+
+ switch (intr_type) {
+ case DDI_INTR_TYPE_FIXED:
+ case DDI_INTR_TYPE_MSI:
+ request = 1;
+ min = 1;
+ break;
+ case DDI_INTR_TYPE_MSIX:
+ /*
+ * At the moment, we always request two MSI-X while we still
+ * only support a single interrupt. The upper bound on what's
+ * supported by a given device is defined by MSI_X_PF_N in
+ * GLPCI_CNF2. When we evolve, we should read it to determine
+ * what the real max is.
+ */
+ ASSERT(i40e->i40e_num_trqpairs == 1);
+ request = 2;
+ min = 2;
+ break;
+ default:
+ panic("bad interrupt type passed to i40e_alloc_intr_handles: "
+ "%d", intr_type);
+ return (B_FALSE);
+ }
+
+ rc = ddi_intr_get_nintrs(devinfo, intr_type, &count);
+ if (rc != DDI_SUCCESS || count < min) {
+ i40e_log(i40e, "Get interrupt number failed, "
+ "returned %d, count %d\n", rc, count);
+ return (B_FALSE);
+ }
+
+ rc = ddi_intr_get_navail(devinfo, intr_type, &count);
+ if (rc != DDI_SUCCESS || count < min) {
+ i40e_log(i40e, "Get AVAILABLE interrupt number failed, "
+ "returned %d, count %d\n", rc, count);
+ return (B_FALSE);
+ }
+
+ actual = 0;
+ i40e->i40e_intr_count = 0;
+ i40e->i40e_intr_count_max = 0;
+ i40e->i40e_intr_count_min = 0;
+
+ i40e->i40e_intr_size = request * sizeof (ddi_intr_handle_t);
+ ASSERT(i40e->i40e_intr_size != 0);
+ i40e->i40e_intr_handles = kmem_alloc(i40e->i40e_intr_size, KM_SLEEP);
+
+ rc = ddi_intr_alloc(devinfo, i40e->i40e_intr_handles, intr_type, 0,
+ min(request, count), &actual, DDI_INTR_ALLOC_NORMAL);
+ if (rc != DDI_SUCCESS) {
+ i40e_log(i40e, "Interrupt allocation failed with %d.", rc);
+ goto alloc_handle_fail;
+ }
+
+ i40e->i40e_intr_count = actual;
+ i40e->i40e_intr_count_max = request;
+ i40e->i40e_intr_count_min = min;
+
+ if (actual < min) {
+ i40e_log(i40e, "actual (%d) is less than minimum (%d).",
+ actual, min);
+ goto alloc_handle_fail;
+ }
+
+ /*
+ * Record the priority and capabilities for our first vector. Once
+ * we have it, that's our priority until detach time. Even if we
+ * eventually participate in IRM, our priority shouldn't change.
+ */
+ rc = ddi_intr_get_pri(i40e->i40e_intr_handles[0], &i40e->i40e_intr_pri);
+ if (rc != DDI_SUCCESS) {
+ i40e_log(i40e,
+ "Getting interrupt priority failed with %d.", rc);
+ goto alloc_handle_fail;
+ }
+
+ rc = ddi_intr_get_cap(i40e->i40e_intr_handles[0], &i40e->i40e_intr_cap);
+ if (rc != DDI_SUCCESS) {
+ i40e_log(i40e,
+ "Getting interrupt capabilities failed with %d.", rc);
+ goto alloc_handle_fail;
+ }
+
+ i40e->i40e_intr_type = intr_type;
+ return (B_TRUE);
+
+alloc_handle_fail:
+
+ i40e_rem_intrs(i40e);
+ return (B_FALSE);
+}
+
+static boolean_t
+i40e_alloc_intrs(i40e_t *i40e, dev_info_t *devinfo)
+{
+ int intr_types, rc;
+
+ rc = ddi_intr_get_supported_types(devinfo, &intr_types);
+ if (rc != DDI_SUCCESS) {
+ i40e_error(i40e, "failed to get supported interrupt types: %d",
+ rc);
+ return (B_FALSE);
+ }
+
+ i40e->i40e_intr_type = 0;
+
+ if ((intr_types & DDI_INTR_TYPE_MSIX) &&
+ i40e->i40e_intr_force <= I40E_INTR_MSIX) {
+ if (i40e_alloc_intr_handles(i40e, devinfo, DDI_INTR_TYPE_MSIX))
+ return (B_TRUE);
+ }
+
+ /*
+ * We only use multiple transmit/receive pairs when MSI-X interrupts are
+ * available due to the fact that the device basically only supports a
+ * single MSI interrupt.
+ */
+ i40e->i40e_num_trqpairs = I40E_TRQPAIR_NOMSIX;
+ i40e->i40e_num_rx_groups = I40E_GROUP_NOMSIX;
+
+ if ((intr_types & DDI_INTR_TYPE_MSI) &&
+ (i40e->i40e_intr_force <= I40E_INTR_MSI)) {
+ if (i40e_alloc_intr_handles(i40e, devinfo, DDI_INTR_TYPE_MSI))
+ return (B_TRUE);
+ }
+
+ if (intr_types & DDI_INTR_TYPE_FIXED) {
+ if (i40e_alloc_intr_handles(i40e, devinfo, DDI_INTR_TYPE_FIXED))
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+/*
+ * Map different interrupts to MSI-X vectors.
+ */
+static boolean_t
+i40e_map_intrs_to_vectors(i40e_t *i40e)
+{
+ if (i40e->i40e_intr_type != DDI_INTR_TYPE_MSIX) {
+ return (B_TRUE);
+ }
+
+ /*
+ * At the moment, we only have one queue and one interrupt thus both are
+ * on that one interrupt. However, longer term we need to go back to
+ * using the ixgbe style map of queues to vectors or walk the linked
+ * list from the device to know what to go handle. Therefore for the
+ * moment, since we need to map our single set of rings to the one
+ * I/O interrupt that exists for MSI-X.
+ */
+ ASSERT(i40e->i40e_intr_count == 2);
+ ASSERT(i40e->i40e_num_trqpairs == 1);
+
+ i40e->i40e_trqpairs[0].itrq_rx_intrvec = 1;
+ i40e->i40e_trqpairs[0].itrq_tx_intrvec = 1;
+
+ return (B_TRUE);
+}
+
+static boolean_t
+i40e_add_intr_handlers(i40e_t *i40e)
+{
+ int rc, vector;
+
+ switch (i40e->i40e_intr_type) {
+ case DDI_INTR_TYPE_MSIX:
+ for (vector = 0; vector < i40e->i40e_intr_count; vector++) {
+ rc = ddi_intr_add_handler(
+ i40e->i40e_intr_handles[vector],
+ (ddi_intr_handler_t *)i40e_intr_msix, i40e,
+ (void *)(uintptr_t)vector);
+ if (rc != DDI_SUCCESS) {
+ i40e_log(i40e, "Add interrupt handler (MSI-X) "
+ "failed: return %d, vector %d", rc, vector);
+ for (vector--; vector >= 0; vector--) {
+ (void) ddi_intr_remove_handler(
+ i40e->i40e_intr_handles[vector]);
+ }
+ return (B_FALSE);
+ }
+ }
+ break;
+ case DDI_INTR_TYPE_MSI:
+ rc = ddi_intr_add_handler(i40e->i40e_intr_handles[0],
+ (ddi_intr_handler_t *)i40e_intr_msi, i40e, NULL);
+ if (rc != DDI_SUCCESS) {
+ i40e_log(i40e, "Add interrupt handler (MSI) failed: "
+ "return %d", rc);
+ return (B_FALSE);
+ }
+ break;
+ case DDI_INTR_TYPE_FIXED:
+ rc = ddi_intr_add_handler(i40e->i40e_intr_handles[0],
+ (ddi_intr_handler_t *)i40e_intr_legacy, i40e, NULL);
+ if (rc != DDI_SUCCESS) {
+ i40e_log(i40e, "Add interrupt handler (legacy) failed:"
+ " return %d", rc);
+ return (B_FALSE);
+ }
+ break;
+ default:
+ /* Cast to pacify lint */
+ panic("i40e_intr_type %p contains an unknown type: %d",
+ (void *)i40e, i40e->i40e_intr_type);
+ }
+
+ return (B_TRUE);
+}
+
+/*
+ * Perform periodic checks. Longer term, we should be thinking about additional
+ * things here:
+ *
+ * o Stall Detection
+ * o Temperature sensor detection
+ * o Device resetting
+ * o Statistics updating to avoid wraparound
+ */
+static void
+i40e_timer(void *arg)
+{
+ i40e_t *i40e = arg;
+
+ mutex_enter(&i40e->i40e_general_lock);
+ i40e_link_check(i40e);
+ mutex_exit(&i40e->i40e_general_lock);
+}
+
+/*
+ * Get the hardware state, and scribble away anything that needs scribbling.
+ */
+static void
+i40e_get_hw_state(i40e_t *i40e, i40e_hw_t *hw)
+{
+ int rc;
+
+ ASSERT(MUTEX_HELD(&i40e->i40e_general_lock));
+
+ (void) i40e_aq_get_link_info(hw, TRUE, NULL, NULL);
+ i40e_link_check(i40e);
+
+ /*
+ * Try and determine our PHY. Note that we may have to retry to and
+ * delay to detect fiber correctly.
+ */
+ rc = i40e_aq_get_phy_capabilities(hw, B_FALSE, B_TRUE, &i40e->i40e_phy,
+ NULL);
+ if (rc == I40E_ERR_UNKNOWN_PHY) {
+ i40e_msec_delay(200);
+ rc = i40e_aq_get_phy_capabilities(hw, B_FALSE, B_TRUE,
+ &i40e->i40e_phy, NULL);
+ }
+
+ if (rc != I40E_SUCCESS) {
+ if (rc == I40E_ERR_UNKNOWN_PHY) {
+ i40e_error(i40e, "encountered unknown PHY type, "
+ "not attaching.");
+ } else {
+ i40e_error(i40e, "error getting physical capabilities: "
+ "%d, %d", rc, hw->aq.asq_last_status);
+ }
+ }
+
+ rc = i40e_update_link_info(hw);
+ if (rc != I40E_SUCCESS) {
+ i40e_error(i40e, "failed to update link information: %d", rc);
+ }
+
+ /*
+ * In general, we don't want to mask off (as in stop from being a cause)
+ * any of the interrupts that the phy might be able to generate.
+ */
+ rc = i40e_aq_set_phy_int_mask(hw, 0, NULL);
+ if (rc != I40E_SUCCESS) {
+ i40e_error(i40e, "failed to update phy link mask: %d\n", rc);
+ }
+}
+
+/*
+ * Go through and re-initialize any existing filters that we may have set up for
+ * this device. Note that we would only expect them to exist if hardware had
+ * already been initialized and we had just reset it. While we're not
+ * implementing this yet, we're keeping this around for when we add reset
+ * capabilities, so this isn't forgotten.
+ */
+/* ARGSUSED */
+static void
+i40e_init_macaddrs(i40e_t *i40e, i40e_hw_t *hw)
+{
+}
+
+/*
+ * Configure the hardware for the Virtual Station Interface (VSI). Currently
+ * we only support one, but in the future we could instantiate more than one
+ * per attach-point.
+ */
+static boolean_t
+i40e_config_vsi(i40e_t *i40e, i40e_hw_t *hw)
+{
+ struct i40e_vsi_context context;
+ int err;
+
+ bzero(&context, sizeof (struct i40e_vsi_context));
+ context.seid = i40e->i40e_vsi_id;
+ context.pf_num = hw->pf_id;
+ err = i40e_aq_get_vsi_params(hw, &context, NULL);
+ if (err != I40E_SUCCESS) {
+ i40e_error(i40e, "get VSI params failed with %d\n", err);
+ return (B_FALSE);
+ }
+
+ /*
+ * Set the queue and traffic class bits. Keep it simple for now.
+ */
+ context.info.valid_sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
+ context.info.mapping_flags = I40E_AQ_VSI_QUE_MAP_CONTIG;
+ context.info.queue_mapping[0] = I40E_ASSIGN_ALL_QUEUES;
+ context.info.tc_mapping[0] = I40E_TRAFFIC_CLASS_NO_QUEUES;
+
+ context.info.valid_sections |= I40E_AQ_VSI_PROP_VLAN_VALID;
+ context.info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_ALL |
+ I40E_AQ_VSI_PVLAN_EMOD_NOTHING;
+
+ context.flags = LE16_TO_CPU(I40E_AQ_VSI_TYPE_PF);
+
+ i40e->i40e_vsi_stat_id = LE16_TO_CPU(context.info.stat_counter_idx);
+ if (i40e_stat_vsi_init(i40e) == B_FALSE)
+ return (B_FALSE);
+
+ err = i40e_aq_update_vsi_params(hw, &context, NULL);
+ if (err != I40E_SUCCESS) {
+ i40e_error(i40e, "Update VSI params failed with %d", err);
+ return (B_FALSE);
+ }
+
+
+ return (B_TRUE);
+}
+
+/*
+ * Wrapper to kick the chipset on.
+ */
+static boolean_t
+i40e_chip_start(i40e_t *i40e)
+{
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+ struct i40e_filter_control_settings filter;
+ int rc;
+
+ if (((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver < 33)) ||
+ (hw->aq.fw_maj_ver < 4)) {
+ i40e_msec_delay(75);
+ if (i40e_aq_set_link_restart_an(hw, TRUE, NULL) !=
+ I40E_SUCCESS) {
+ i40e_error(i40e, "failed to restart link: admin queue "
+ "error: %d\n", hw->aq.asq_last_status);
+ return (B_FALSE);
+ }
+ }
+
+ /* Determine hardware state */
+ i40e_get_hw_state(i40e, hw);
+
+ /* Initialize mac addresses. */
+ i40e_init_macaddrs(i40e, hw);
+
+ /*
+ * Set up the filter control.
+ */
+ bzero(&filter, sizeof (filter));
+ filter.enable_ethtype = TRUE;
+ filter.enable_macvlan = TRUE;
+
+ rc = i40e_set_filter_control(hw, &filter);
+ if (rc != I40E_SUCCESS) {
+ i40e_error(i40e, "i40e_set_filter_control() returned %d", rc);
+ return (B_FALSE);
+ }
+
+ i40e_intr_chip_init(i40e);
+
+ if (!i40e_config_vsi(i40e, hw))
+ return (B_FALSE);
+
+ i40e_flush(hw);
+
+ return (B_TRUE);
+}
+
+/*
+ * Take care of tearing down the rx ring. See 8.3.3.1.2 for more information.
+ */
+static void
+i40e_shutdown_rx_rings(i40e_t *i40e)
+{
+ int i;
+ uint32_t reg;
+
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+
+ /*
+ * Step 1. The interrupt linked list (see i40e_intr.c for more
+ * information) should have already been cleared before calling this
+ * function.
+ */
+#ifdef DEBUG
+ if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
+ for (i = 1; i < i40e->i40e_intr_count; i++) {
+ reg = I40E_READ_REG(hw, I40E_PFINT_LNKLSTN(i - 1));
+ VERIFY3U(reg, ==, I40E_QUEUE_TYPE_EOL);
+ }
+ } else {
+ reg = I40E_READ_REG(hw, I40E_PFINT_LNKLST0);
+ VERIFY3U(reg, ==, I40E_QUEUE_TYPE_EOL);
+ }
+
+#endif /* DEBUG */
+
+ for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
+ /*
+ * Step 1. Request the queue by clearing QENA_REQ. It may not be
+ * set due to unwinding from failures and a partially enabled
+ * ring set.
+ */
+ reg = I40E_READ_REG(hw, I40E_QRX_ENA(i));
+ if (!(reg & I40E_QRX_ENA_QENA_REQ_MASK))
+ continue;
+ VERIFY((reg & I40E_QRX_ENA_QENA_REQ_MASK) ==
+ I40E_QRX_ENA_QENA_REQ_MASK);
+ reg &= ~I40E_QRX_ENA_QENA_REQ_MASK;
+ I40E_WRITE_REG(hw, I40E_QRX_ENA(i), reg);
+ }
+
+ /*
+ * Step 2. Wait for the disable to take, by having QENA_STAT in the FPM
+ * be cleared. Note that we could still receive data in the queue during
+ * this time. We don't actually wait for this now and instead defer this
+ * to i40e_shutdown_rings_wait(), after we've interleaved disabling the
+ * TX queues as well.
+ */
+}
+
+static void
+i40e_shutdown_tx_rings(i40e_t *i40e)
+{
+ int i;
+ uint32_t reg;
+
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+
+ /*
+ * Step 1. The interrupt linked list should already have been cleared.
+ */
+#ifdef DEBUG
+ if (i40e->i40e_intr_type == DDI_INTR_TYPE_MSIX) {
+ for (i = 1; i < i40e->i40e_intr_count; i++) {
+ reg = I40E_READ_REG(hw, I40E_PFINT_LNKLSTN(i - 1));
+ VERIFY3U(reg, ==, I40E_QUEUE_TYPE_EOL);
+ }
+ } else {
+ reg = I40E_READ_REG(hw, I40E_PFINT_LNKLST0);
+ VERIFY3U(reg, ==, I40E_QUEUE_TYPE_EOL);
+
+ }
+#endif /* DEBUG */
+
+ for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
+ /*
+ * Step 2. Set the SET_QDIS flag for every queue.
+ */
+ i40e_pre_tx_queue_cfg(hw, i, B_FALSE);
+ }
+
+ /*
+ * Step 3. Wait at least 400 usec (can be done once for all queues).
+ */
+ drv_usecwait(500);
+
+ for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
+ /*
+ * Step 4. Clear the QENA_REQ flag which tells hardware to
+ * quiesce. If QENA_REQ is not already set then that means that
+ * we likely already tried to disable this queue.
+ */
+ reg = I40E_READ_REG(hw, I40E_QTX_ENA(i));
+ if (!(reg & I40E_QTX_ENA_QENA_REQ_MASK))
+ continue;
+ reg &= ~I40E_QTX_ENA_QENA_REQ_MASK;
+ I40E_WRITE_REG(hw, I40E_QTX_ENA(i), reg);
+ }
+
+ /*
+ * Step 5. Wait for all drains to finish. This will be done by the
+ * hardware removing the QENA_STAT flag from the queue. Rather than
+ * waiting here, we interleave it with all the others in
+ * i40e_shutdown_rings_wait().
+ */
+}
+
+/*
+ * Wait for all the rings to be shut down. e.g. Steps 2 and 5 from the above
+ * functions.
+ */
+static boolean_t
+i40e_shutdown_rings_wait(i40e_t *i40e)
+{
+ int i, try;
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+
+ for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
+ uint32_t reg;
+
+ for (try = 0; try < I40E_RING_WAIT_NTRIES; try++) {
+ reg = I40E_READ_REG(hw, I40E_QRX_ENA(i));
+ if ((reg & I40E_QRX_ENA_QENA_STAT_MASK) == 0)
+ break;
+ i40e_msec_delay(I40E_RING_WAIT_PAUSE);
+ }
+
+ if ((reg & I40E_QRX_ENA_QENA_STAT_MASK) != 0) {
+ i40e_error(i40e, "timed out disabling rx queue %d\n",
+ i);
+ return (B_FALSE);
+ }
+
+ for (try = 0; try < I40E_RING_WAIT_NTRIES; try++) {
+ reg = I40E_READ_REG(hw, I40E_QTX_ENA(i));
+ if ((reg & I40E_QTX_ENA_QENA_STAT_MASK) == 0)
+ break;
+ i40e_msec_delay(I40E_RING_WAIT_PAUSE);
+ }
+
+ if ((reg & I40E_QTX_ENA_QENA_STAT_MASK) != 0) {
+ i40e_error(i40e, "timed out disabling tx queue %d\n",
+ i);
+ return (B_FALSE);
+ }
+ }
+
+ return (B_TRUE);
+}
+
+static boolean_t
+i40e_shutdown_rings(i40e_t *i40e)
+{
+ i40e_shutdown_rx_rings(i40e);
+ i40e_shutdown_tx_rings(i40e);
+ return (i40e_shutdown_rings_wait(i40e));
+}
+
+static void
+i40e_setup_rx_descs(i40e_trqpair_t *itrq)
+{
+ int i;
+ i40e_rx_data_t *rxd = itrq->itrq_rxdata;
+
+ for (i = 0; i < rxd->rxd_ring_size; i++) {
+ i40e_rx_control_block_t *rcb;
+ i40e_rx_desc_t *rdesc;
+
+ rcb = rxd->rxd_work_list[i];
+ rdesc = &rxd->rxd_desc_ring[i];
+
+ rdesc->read.pkt_addr =
+ CPU_TO_LE64((uintptr_t)rcb->rcb_dma.dmab_dma_address);
+ rdesc->read.hdr_addr = 0;
+ }
+}
+
+static boolean_t
+i40e_setup_rx_hmc(i40e_trqpair_t *itrq)
+{
+ i40e_rx_data_t *rxd = itrq->itrq_rxdata;
+ i40e_t *i40e = itrq->itrq_i40e;
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+
+ struct i40e_hmc_obj_rxq rctx;
+ int err;
+
+ bzero(&rctx, sizeof (struct i40e_hmc_obj_rxq));
+ rctx.base = rxd->rxd_desc_area.dmab_dma_address /
+ I40E_HMC_RX_CTX_UNIT;
+ rctx.qlen = rxd->rxd_ring_size;
+ VERIFY(i40e->i40e_rx_buf_size >= I40E_HMC_RX_DBUFF_MIN);
+ VERIFY(i40e->i40e_rx_buf_size <= I40E_HMC_RX_DBUFF_MAX);
+ rctx.dbuff = i40e->i40e_rx_buf_size >> I40E_RXQ_CTX_DBUFF_SHIFT;
+ rctx.hbuff = 0 >> I40E_RXQ_CTX_HBUFF_SHIFT;
+ rctx.dtype = I40E_HMC_RX_DTYPE_NOSPLIT;
+ rctx.dsize = I40E_HMC_RX_DSIZE_32BYTE;
+ rctx.crcstrip = I40E_HMC_RX_CRCSTRIP_ENABLE;
+ rctx.fc_ena = I40E_HMC_RX_FC_DISABLE;
+ rctx.l2tsel = I40E_HMC_RX_L2TAGORDER;
+ rctx.hsplit_0 = I40E_HMC_RX_HDRSPLIT_DISABLE;
+ rctx.hsplit_1 = I40E_HMC_RX_HDRSPLIT_DISABLE;
+ rctx.showiv = I40E_HMC_RX_INVLAN_DONTSTRIP;
+ rctx.rxmax = i40e->i40e_frame_max;
+ rctx.tphrdesc_ena = I40E_HMC_RX_TPH_DISABLE;
+ rctx.tphwdesc_ena = I40E_HMC_RX_TPH_DISABLE;
+ rctx.tphdata_ena = I40E_HMC_RX_TPH_DISABLE;
+ rctx.tphhead_ena = I40E_HMC_RX_TPH_DISABLE;
+ rctx.lrxqthresh = I40E_HMC_RX_LOWRXQ_NOINTR;
+
+ /*
+ * This must be set to 0x1, see Table 8-12 in section 8.3.3.2.2.
+ */
+ rctx.prefena = I40E_HMC_RX_PREFENA;
+
+ err = i40e_clear_lan_rx_queue_context(hw, itrq->itrq_index);
+ if (err != I40E_SUCCESS) {
+ i40e_error(i40e, "failed to clear rx queue %d context: %d\n",
+ itrq->itrq_index, err);
+ return (B_FALSE);
+ }
+
+ err = i40e_set_lan_rx_queue_context(hw, itrq->itrq_index, &rctx);
+ if (err != I40E_SUCCESS) {
+ i40e_error(i40e, "failed to set rx queue %d context: %d\n",
+ itrq->itrq_index, err);
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
+
+/*
+ * Take care of setting up the descriptor rings and actually programming the
+ * device. See 8.3.3.1.1 for the full list of steps we need to do to enable the
+ * rx rings.
+ */
+static boolean_t
+i40e_setup_rx_rings(i40e_t *i40e)
+{
+ int i;
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+
+ for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
+ i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[i];
+ i40e_rx_data_t *rxd = itrq->itrq_rxdata;
+ uint32_t reg;
+
+ /*
+ * Step 1. Program all receive ring descriptors.
+ */
+ i40e_setup_rx_descs(itrq);
+
+ /*
+ * Step 2. Program the queue's FPM/HMC context.
+ */
+ if (i40e_setup_rx_hmc(itrq) == B_FALSE)
+ return (B_FALSE);
+
+ /*
+ * Step 3. Clear the queue's tail pointer and set it to the end
+ * of the space.
+ */
+ I40E_WRITE_REG(hw, I40E_QRX_TAIL(i), 0);
+ I40E_WRITE_REG(hw, I40E_QRX_TAIL(i), rxd->rxd_ring_size - 1);
+
+ /*
+ * Step 4. Enable the queue via the QENA_REQ.
+ */
+ reg = I40E_READ_REG(hw, I40E_QRX_ENA(i));
+ VERIFY0(reg & (I40E_QRX_ENA_QENA_REQ_MASK |
+ I40E_QRX_ENA_QENA_STAT_MASK));
+ reg |= I40E_QRX_ENA_QENA_REQ_MASK;
+ I40E_WRITE_REG(hw, I40E_QRX_ENA(i), reg);
+ }
+
+ /*
+ * Note, we wait for every queue to be enabled before we start checking.
+ * This will hopefully cause most queues to be enabled at this point.
+ */
+ for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
+ uint32_t j, reg;
+
+ /*
+ * Step 5. Verify that QENA_STAT has been set. It's promised
+ * that this should occur within about 10 us, but like other
+ * systems, we give the card a bit more time.
+ */
+ for (j = 0; j < I40E_RING_WAIT_NTRIES; j++) {
+ reg = I40E_READ_REG(hw, I40E_QRX_ENA(i));
+
+ if (reg & I40E_QRX_ENA_QENA_STAT_MASK)
+ break;
+ i40e_msec_delay(I40E_RING_WAIT_PAUSE);
+ }
+
+ if ((reg & I40E_QRX_ENA_QENA_STAT_MASK) == 0) {
+ i40e_error(i40e, "failed to enable rx queue %d, timed "
+ "out.");
+ return (B_FALSE);
+ }
+ }
+
+ return (B_TRUE);
+}
+
+static boolean_t
+i40e_setup_tx_hmc(i40e_trqpair_t *itrq)
+{
+ i40e_t *i40e = itrq->itrq_i40e;
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+
+ struct i40e_hmc_obj_txq tctx;
+ struct i40e_vsi_context context;
+ int err;
+
+ bzero(&tctx, sizeof (struct i40e_hmc_obj_txq));
+ tctx.new_context = I40E_HMC_TX_NEW_CONTEXT;
+ tctx.base = itrq->itrq_desc_area.dmab_dma_address /
+ I40E_HMC_TX_CTX_UNIT;
+ tctx.fc_ena = I40E_HMC_TX_FC_DISABLE;
+ tctx.timesync_ena = I40E_HMC_TX_TS_DISABLE;
+ tctx.fd_ena = I40E_HMC_TX_FD_DISABLE;
+ tctx.alt_vlan_ena = I40E_HMC_TX_ALT_VLAN_DISABLE;
+ tctx.head_wb_ena = I40E_HMC_TX_WB_ENABLE;
+ tctx.qlen = itrq->itrq_tx_ring_size;
+ tctx.tphrdesc_ena = I40E_HMC_TX_TPH_DISABLE;
+ tctx.tphrpacket_ena = I40E_HMC_TX_TPH_DISABLE;
+ tctx.tphwdesc_ena = I40E_HMC_TX_TPH_DISABLE;
+ tctx.head_wb_addr = itrq->itrq_desc_area.dmab_dma_address +
+ sizeof (i40e_tx_desc_t) * itrq->itrq_tx_ring_size;
+
+ /*
+ * This field isn't actually documented, like crc, but it suggests that
+ * it should be zeroed. We leave both of these here because of that for
+ * now. We should check with Intel on why these are here even.
+ */
+ tctx.crc = 0;
+ tctx.rdylist_act = 0;
+
+ /*
+ * We're supposed to assign the rdylist field with the value of the
+ * traffic class index for the first device. We query the VSI parameters
+ * again to get what the handle is. Note that every queue is always
+ * assigned to traffic class zero, because we don't actually use them.
+ */
+ bzero(&context, sizeof (struct i40e_vsi_context));
+ context.seid = i40e->i40e_vsi_id;
+ context.pf_num = hw->pf_id;
+ err = i40e_aq_get_vsi_params(hw, &context, NULL);
+ if (err != I40E_SUCCESS) {
+ i40e_error(i40e, "get VSI params failed with %d\n", err);
+ return (B_FALSE);
+ }
+ tctx.rdylist = LE_16(context.info.qs_handle[0]);
+
+ err = i40e_clear_lan_tx_queue_context(hw, itrq->itrq_index);
+ if (err != I40E_SUCCESS) {
+ i40e_error(i40e, "failed to clear tx queue %d context: %d\n",
+ itrq->itrq_index, err);
+ return (B_FALSE);
+ }
+
+ err = i40e_set_lan_tx_queue_context(hw, itrq->itrq_index, &tctx);
+ if (err != I40E_SUCCESS) {
+ i40e_error(i40e, "failed to set tx queue %d context: %d\n",
+ itrq->itrq_index, err);
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
+
+/*
+ * Take care of setting up the descriptor rings and actually programming the
+ * device. See 8.4.3.1.1 for what we need to do here.
+ */
+static boolean_t
+i40e_setup_tx_rings(i40e_t *i40e)
+{
+ int i;
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+
+ for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
+ i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[i];
+ uint32_t reg;
+
+ /*
+ * Step 1. Clear the queue disable flag and verify that the
+ * index is set correctly.
+ */
+ i40e_pre_tx_queue_cfg(hw, i, B_TRUE);
+
+ /*
+ * Step 2. Prepare the queue's FPM/HMC context.
+ */
+ if (i40e_setup_tx_hmc(itrq) == B_FALSE)
+ return (B_FALSE);
+
+ /*
+ * Step 3. Verify that it's clear that this PF owns this queue.
+ */
+ reg = I40E_QTX_CTL_PF_QUEUE;
+ reg |= (hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) &
+ I40E_QTX_CTL_PF_INDX_MASK;
+ I40E_WRITE_REG(hw, I40E_QTX_CTL(itrq->itrq_index), reg);
+ i40e_flush(hw);
+
+ /*
+ * Step 4. Set the QENA_REQ flag.
+ */
+ reg = I40E_READ_REG(hw, I40E_QTX_ENA(i));
+ VERIFY0(reg & (I40E_QTX_ENA_QENA_REQ_MASK |
+ I40E_QTX_ENA_QENA_STAT_MASK));
+ reg |= I40E_QTX_ENA_QENA_REQ_MASK;
+ I40E_WRITE_REG(hw, I40E_QTX_ENA(i), reg);
+ }
+
+ /*
+ * Note, we wait for every queue to be enabled before we start checking.
+ * This will hopefully cause most queues to be enabled at this point.
+ */
+ for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
+ uint32_t j, reg;
+
+ /*
+ * Step 5. Verify that QENA_STAT has been set. It's promised
+ * that this should occur within about 10 us, but like BSD,
+ * we'll try for up to 100 ms for this queue.
+ */
+ for (j = 0; j < I40E_RING_WAIT_NTRIES; j++) {
+ reg = I40E_READ_REG(hw, I40E_QTX_ENA(i));
+
+ if (reg & I40E_QTX_ENA_QENA_STAT_MASK)
+ break;
+ i40e_msec_delay(I40E_RING_WAIT_PAUSE);
+ }
+
+ if ((reg & I40E_QTX_ENA_QENA_STAT_MASK) == 0) {
+ i40e_error(i40e, "failed to enable tx queue %d, timed "
+ "out");
+ return (B_FALSE);
+ }
+ }
+
+ return (B_TRUE);
+}
+
+void
+i40e_stop(i40e_t *i40e, boolean_t free_allocations)
+{
+ int i;
+
+ ASSERT(MUTEX_HELD(&i40e->i40e_general_lock));
+
+ /*
+ * Shutdown and drain the tx and rx pipeline. We do this using the
+ * following steps.
+ *
+ * 1) Shutdown interrupts to all the queues (trying to keep the admin
+ * queue alive).
+ *
+ * 2) Remove all of the interrupt tx and rx causes by setting the
+ * interrupt linked lists to zero.
+ *
+ * 2) Shutdown the tx and rx rings. Because i40e_shutdown_rings() should
+ * wait for all the queues to be disabled, once we reach that point
+ * it should be safe to free associated data.
+ *
+ * 4) Wait 50ms after all that is done. This ensures that the rings are
+ * ready for programming again and we don't have to think about this
+ * in other parts of the driver.
+ *
+ * 5) Disable remaining chip interrupts, (admin queue, etc.)
+ *
+ * 6) Verify that FM is happy with all the register accesses we
+ * performed.
+ */
+ i40e_intr_io_disable_all(i40e);
+ i40e_intr_io_clear_cause(i40e);
+
+ if (i40e_shutdown_rings(i40e) == B_FALSE) {
+ ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_LOST);
+ }
+
+ delay(50 * drv_usectohz(1000));
+
+ i40e_intr_chip_fini(i40e);
+
+ for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
+ mutex_enter(&i40e->i40e_trqpairs[i].itrq_rx_lock);
+ mutex_enter(&i40e->i40e_trqpairs[i].itrq_tx_lock);
+ }
+
+ /*
+ * We should consider refactoring this to be part of the ring start /
+ * stop routines at some point.
+ */
+ for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
+ i40e_stats_trqpair_fini(&i40e->i40e_trqpairs[i]);
+ }
+
+ if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_cfg_handle) !=
+ DDI_FM_OK) {
+ ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_LOST);
+ }
+
+ for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
+ i40e_tx_cleanup_ring(&i40e->i40e_trqpairs[i]);
+ }
+
+ for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
+ mutex_exit(&i40e->i40e_trqpairs[i].itrq_rx_lock);
+ mutex_exit(&i40e->i40e_trqpairs[i].itrq_tx_lock);
+ }
+
+ i40e_stat_vsi_fini(i40e);
+
+ i40e->i40e_link_speed = 0;
+ i40e->i40e_link_duplex = 0;
+ i40e_link_state_set(i40e, LINK_STATE_UNKNOWN);
+
+ if (free_allocations) {
+ i40e_free_ring_mem(i40e, B_FALSE);
+ }
+}
+
+boolean_t
+i40e_start(i40e_t *i40e, boolean_t alloc)
+{
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+ boolean_t rc = B_TRUE;
+ int i, err;
+
+ ASSERT(MUTEX_HELD(&i40e->i40e_general_lock));
+
+ if (alloc) {
+ if (i40e_alloc_ring_mem(i40e) == B_FALSE) {
+ i40e_error(i40e,
+ "Failed to allocate ring memory");
+ return (B_FALSE);
+ }
+ }
+
+ /*
+ * This should get refactored to be part of ring start and stop at
+ * some point, along with most of the logic here.
+ */
+ for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
+ if (i40e_stats_trqpair_init(&i40e->i40e_trqpairs[i]) ==
+ B_FALSE) {
+ int j;
+
+ for (j = 0; j < i; j++) {
+ i40e_trqpair_t *itrq = &i40e->i40e_trqpairs[j];
+ i40e_stats_trqpair_fini(itrq);
+ }
+ return (B_FALSE);
+ }
+ }
+
+ if (!i40e_chip_start(i40e)) {
+ i40e_fm_ereport(i40e, DDI_FM_DEVICE_INVAL_STATE);
+ rc = B_FALSE;
+ goto done;
+ }
+
+ if (i40e_setup_rx_rings(i40e) == B_FALSE) {
+ rc = B_FALSE;
+ goto done;
+ }
+
+ if (i40e_setup_tx_rings(i40e) == B_FALSE) {
+ rc = B_FALSE;
+ goto done;
+ }
+
+ /*
+ * Enable broadcast traffic; however, do not enable multicast traffic.
+ * That's handle exclusively through MAC's mc_multicst routines.
+ */
+ err = i40e_aq_set_vsi_broadcast(hw, i40e->i40e_vsi_id, B_TRUE, NULL);
+ if (err != I40E_SUCCESS) {
+ i40e_error(i40e, "failed to set default VSI: %d\n", err);
+ rc = B_FALSE;
+ goto done;
+ }
+
+ err = i40e_aq_set_mac_config(hw, i40e->i40e_frame_max, B_TRUE, 0, NULL);
+ if (err != I40E_SUCCESS) {
+ i40e_error(i40e, "failed to set MAC config: %d\n", err);
+ rc = B_FALSE;
+ goto done;
+ }
+
+ /*
+ * Finally, make sure that we're happy from an FM perspective.
+ */
+ if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) !=
+ DDI_FM_OK) {
+ rc = B_FALSE;
+ goto done;
+ }
+
+ /* Clear state bits prior to final interrupt enabling. */
+ atomic_and_32(&i40e->i40e_state,
+ ~(I40E_ERROR | I40E_STALL | I40E_OVERTEMP));
+
+ i40e_intr_io_enable_all(i40e);
+
+done:
+ if (rc == B_FALSE) {
+ i40e_stop(i40e, B_FALSE);
+ if (alloc == B_TRUE) {
+ i40e_free_ring_mem(i40e, B_TRUE);
+ }
+ ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_LOST);
+ }
+
+ return (rc);
+}
+
+/*
+ * We may have loaned up descriptors to the stack. As such, if we still have
+ * them outstanding, then we will not continue with detach.
+ */
+static boolean_t
+i40e_drain_rx(i40e_t *i40e)
+{
+ mutex_enter(&i40e->i40e_rx_pending_lock);
+ while (i40e->i40e_rx_pending > 0) {
+ if (cv_reltimedwait(&i40e->i40e_rx_pending_cv,
+ &i40e->i40e_rx_pending_lock,
+ drv_usectohz(I40E_DRAIN_RX_WAIT), TR_CLOCK_TICK) == -1) {
+ mutex_exit(&i40e->i40e_rx_pending_lock);
+ return (B_FALSE);
+ }
+ }
+ mutex_exit(&i40e->i40e_rx_pending_lock);
+
+ return (B_TRUE);
+}
+
+static int
+i40e_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
+{
+ i40e_t *i40e;
+ struct i40e_osdep *osdep;
+ i40e_hw_t *hw;
+ int instance;
+
+ if (cmd != DDI_ATTACH)
+ return (DDI_FAILURE);
+
+ instance = ddi_get_instance(devinfo);
+ i40e = kmem_zalloc(sizeof (i40e_t), KM_SLEEP);
+
+ i40e->i40e_aqbuf = kmem_zalloc(I40E_ADMINQ_BUFSZ, KM_SLEEP);
+ i40e->i40e_instance = instance;
+ i40e->i40e_dip = devinfo;
+
+ hw = &i40e->i40e_hw_space;
+ osdep = &i40e->i40e_osdep_space;
+ hw->back = osdep;
+ osdep->ios_i40e = i40e;
+
+ ddi_set_driver_private(devinfo, i40e);
+
+ i40e_fm_init(i40e);
+ i40e->i40e_attach_progress |= I40E_ATTACH_FM_INIT;
+
+ if (pci_config_setup(devinfo, &osdep->ios_cfg_handle) != DDI_SUCCESS) {
+ i40e_error(i40e, "Failed to map PCI configurations.");
+ goto attach_fail;
+ }
+ i40e->i40e_attach_progress |= I40E_ATTACH_PCI_CONFIG;
+
+ if (!i40e_identify_hardware(i40e)) {
+ i40e_error(i40e, "Failed to identify hardware");
+ goto attach_fail;
+ }
+
+ if (!i40e_regs_map(i40e)) {
+ i40e_error(i40e, "Failed to map device registers.");
+ goto attach_fail;
+ }
+ i40e->i40e_attach_progress |= I40E_ATTACH_REGS_MAP;
+
+ i40e_init_properties(i40e);
+ i40e->i40e_attach_progress |= I40E_ATTACH_PROPS;
+
+ if (!i40e_common_code_init(i40e, hw))
+ goto attach_fail;
+ i40e->i40e_attach_progress |= I40E_ATTACH_COMMON_CODE;
+
+ /*
+ * When we participate in IRM, we should make sure that we register
+ * ourselves with it before callbacks.
+ */
+ if (!i40e_alloc_intrs(i40e, devinfo)) {
+ i40e_error(i40e, "Failed to allocate interrupts.");
+ goto attach_fail;
+ }
+ i40e->i40e_attach_progress |= I40E_ATTACH_ALLOC_INTR;
+
+ if (!i40e_alloc_trqpairs(i40e)) {
+ i40e_error(i40e,
+ "Failed to allocate receive & transmit rings.");
+ goto attach_fail;
+ }
+ i40e->i40e_attach_progress |= I40E_ATTACH_ALLOC_RINGSLOCKS;
+
+ if (!i40e_map_intrs_to_vectors(i40e)) {
+ i40e_error(i40e, "Failed to map interrupts to vectors.");
+ goto attach_fail;
+ }
+
+ if (!i40e_add_intr_handlers(i40e)) {
+ i40e_error(i40e, "Failed to add the interrupt handlers.");
+ goto attach_fail;
+ }
+ i40e->i40e_attach_progress |= I40E_ATTACH_ADD_INTR;
+
+ if (!i40e_final_init(i40e)) {
+ i40e_error(i40e, "Final initialization failed.");
+ goto attach_fail;
+ }
+ i40e->i40e_attach_progress |= I40E_ATTACH_INIT;
+
+ if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_cfg_handle) !=
+ DDI_FM_OK) {
+ ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_LOST);
+ goto attach_fail;
+ }
+
+ if (!i40e_stats_init(i40e)) {
+ i40e_error(i40e, "Stats initialization failed.");
+ goto attach_fail;
+ }
+ i40e->i40e_attach_progress |= I40E_ATTACH_STATS;
+
+ if (!i40e_register_mac(i40e)) {
+ i40e_error(i40e, "Failed to register to MAC/GLDv3");
+ goto attach_fail;
+ }
+ i40e->i40e_attach_progress |= I40E_ATTACH_MAC;
+
+ i40e->i40e_periodic_id = ddi_periodic_add(i40e_timer, i40e,
+ I40E_CYCLIC_PERIOD, DDI_IPL_0);
+ if (i40e->i40e_periodic_id == 0) {
+ i40e_error(i40e, "Failed to add the link-check timer");
+ goto attach_fail;
+ }
+ i40e->i40e_attach_progress |= I40E_ATTACH_LINK_TIMER;
+
+ if (!i40e_enable_interrupts(i40e)) {
+ i40e_error(i40e, "Failed to enable DDI interrupts");
+ goto attach_fail;
+ }
+ i40e->i40e_attach_progress |= I40E_ATTACH_ENABLE_INTR;
+
+ atomic_or_32(&i40e->i40e_state, I40E_INITIALIZED);
+
+ mutex_enter(&i40e_glock);
+ list_insert_tail(&i40e_glist, i40e);
+ mutex_exit(&i40e_glock);
+
+ return (DDI_SUCCESS);
+
+attach_fail:
+ i40e_unconfigure(devinfo, i40e);
+ return (DDI_FAILURE);
+}
+
+static int
+i40e_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
+{
+ i40e_t *i40e;
+
+ if (cmd != DDI_DETACH)
+ return (DDI_FAILURE);
+
+ i40e = (i40e_t *)ddi_get_driver_private(devinfo);
+ if (i40e == NULL) {
+ i40e_log(NULL, "i40e_detach() called with no i40e pointer!");
+ return (DDI_FAILURE);
+ }
+
+ if (i40e_drain_rx(i40e) == B_FALSE) {
+ i40e_log(i40e, "timed out draining DMA resources, %d buffers "
+ "remain", i40e->i40e_rx_pending);
+ return (DDI_FAILURE);
+ }
+
+ mutex_enter(&i40e_glock);
+ list_remove(&i40e_glist, i40e);
+ mutex_exit(&i40e_glock);
+
+ i40e_unconfigure(devinfo, i40e);
+
+ return (DDI_SUCCESS);
+}
+
+static struct cb_ops i40e_cb_ops = {
+ nulldev, /* cb_open */
+ nulldev, /* cb_close */
+ nodev, /* cb_strategy */
+ nodev, /* cb_print */
+ nodev, /* cb_dump */
+ nodev, /* cb_read */
+ nodev, /* cb_write */
+ nodev, /* cb_ioctl */
+ nodev, /* cb_devmap */
+ nodev, /* cb_mmap */
+ nodev, /* cb_segmap */
+ nochpoll, /* cb_chpoll */
+ ddi_prop_op, /* cb_prop_op */
+ NULL, /* cb_stream */
+ D_MP | D_HOTPLUG, /* cb_flag */
+ CB_REV, /* cb_rev */
+ nodev, /* cb_aread */
+ nodev /* cb_awrite */
+};
+
+static struct dev_ops i40e_dev_ops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* devo_refcnt */
+ NULL, /* devo_getinfo */
+ nulldev, /* devo_identify */
+ nulldev, /* devo_probe */
+ i40e_attach, /* devo_attach */
+ i40e_detach, /* devo_detach */
+ nodev, /* devo_reset */
+ &i40e_cb_ops, /* devo_cb_ops */
+ NULL, /* devo_bus_ops */
+ ddi_power, /* devo_power */
+ ddi_quiesce_not_supported /* devo_quiesce */
+};
+
+static struct modldrv i40e_modldrv = {
+ &mod_driverops,
+ i40e_ident,
+ &i40e_dev_ops
+};
+
+static struct modlinkage i40e_modlinkage = {
+ MODREV_1,
+ &i40e_modldrv,
+ NULL
+};
+
+/*
+ * Module Initialization Functions.
+ */
+int
+_init(void)
+{
+ int status;
+
+ list_create(&i40e_glist, sizeof (i40e_t), offsetof(i40e_t, i40e_glink));
+ list_create(&i40e_dlist, sizeof (i40e_device_t),
+ offsetof(i40e_device_t, id_link));
+ mutex_init(&i40e_glock, NULL, MUTEX_DRIVER, NULL);
+ mac_init_ops(&i40e_dev_ops, I40E_MODULE_NAME);
+
+ status = mod_install(&i40e_modlinkage);
+ if (status != DDI_SUCCESS) {
+ mac_fini_ops(&i40e_dev_ops);
+ mutex_destroy(&i40e_glock);
+ list_destroy(&i40e_dlist);
+ list_destroy(&i40e_glist);
+ }
+
+ return (status);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&i40e_modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int status;
+
+ status = mod_remove(&i40e_modlinkage);
+ if (status == DDI_SUCCESS) {
+ mac_fini_ops(&i40e_dev_ops);
+ mutex_destroy(&i40e_glock);
+ list_destroy(&i40e_dlist);
+ list_destroy(&i40e_glist);
+ }
+
+ return (status);
+}
diff --git a/usr/src/uts/common/io/i40e/i40e_osdep.c b/usr/src/uts/common/io/i40e/i40e_osdep.c
new file mode 100644
index 0000000000..41a13ee4ec
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/i40e_osdep.c
@@ -0,0 +1,236 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include "i40e_sw.h"
+#include "i40e_type.h"
+#include "i40e_alloc.h"
+#include "i40e_osdep.h"
+
+#include <sys/dtrace.h>
+
+/* ARGSUSED */
+i40e_status
+i40e_allocate_virt_mem(struct i40e_hw *hw, struct i40e_virt_mem *mem, u32 size)
+{
+ mem->va = kmem_zalloc(size, KM_SLEEP);
+ mem->size = size;
+ return (I40E_SUCCESS);
+}
+
+/* ARGSUSED */
+i40e_status
+i40e_free_virt_mem(struct i40e_hw *hw, struct i40e_virt_mem *mem)
+{
+ if (mem->va != NULL)
+ kmem_free(mem->va, mem->size);
+ return (I40E_SUCCESS);
+}
+
+/* ARGSUSED */
+i40e_status
+i40e_allocate_dma_mem(struct i40e_hw *hw, struct i40e_dma_mem *mem,
+ enum i40e_memory_type type, u64 size, u32 alignment)
+{
+ int rc;
+ i40e_t *i40e = OS_DEP(hw)->ios_i40e;
+ dev_info_t *dip = i40e->i40e_dip;
+ size_t len;
+ ddi_dma_cookie_t cookie;
+ uint_t cookie_num;
+ ddi_dma_attr_t attr;
+
+ /*
+ * Because we need to honor the specified alignment, we need to
+ * dynamically construct the attributes. We save the alignment for
+ * debugging purposes.
+ */
+ bcopy(&i40e->i40e_static_dma_attr, &attr, sizeof (ddi_dma_attr_t));
+ attr.dma_attr_align = alignment;
+ mem->idm_alignment = alignment;
+ rc = ddi_dma_alloc_handle(dip, &i40e->i40e_static_dma_attr,
+ DDI_DMA_DONTWAIT, NULL, &mem->idm_dma_handle);
+ if (rc != DDI_SUCCESS) {
+ mem->idm_dma_handle = NULL;
+ i40e_error(i40e, "failed to allocate DMA handle for common "
+ "code: %d", rc);
+
+ /*
+ * Swallow unknown errors and treat them like we do
+ * DDI_DMA_NORESOURCES, in other words, a memory error.
+ */
+ if (rc == DDI_DMA_BADATTR)
+ return (I40E_ERR_PARAM);
+ return (I40E_ERR_NO_MEMORY);
+ }
+
+ rc = ddi_dma_mem_alloc(mem->idm_dma_handle, size,
+ &i40e->i40e_buf_acc_attr, DDI_DMA_STREAMING, DDI_DMA_DONTWAIT,
+ NULL, (caddr_t *)&mem->va, &len, &mem->idm_acc_handle);
+ if (rc != DDI_SUCCESS) {
+ mem->idm_acc_handle = NULL;
+ mem->va = NULL;
+ ASSERT(mem->idm_dma_handle != NULL);
+ ddi_dma_free_handle(&mem->idm_dma_handle);
+ mem->idm_dma_handle = NULL;
+
+ i40e_error(i40e, "failed to allocate %d bytes of DMA memory "
+ "for common code", size);
+ return (I40E_ERR_NO_MEMORY);
+ }
+
+ bzero(mem->va, len);
+
+ rc = ddi_dma_addr_bind_handle(mem->idm_dma_handle, NULL, mem->va, len,
+ DDI_DMA_RDWR | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, NULL,
+ &cookie, &cookie_num);
+ if (rc != DDI_DMA_MAPPED) {
+ mem->pa = NULL;
+ ASSERT(mem->idm_acc_handle != NULL);
+ ddi_dma_mem_free(&mem->idm_acc_handle);
+ mem->idm_acc_handle = NULL;
+ mem->va = NULL;
+ ASSERT(mem->idm_dma_handle != NULL);
+ ddi_dma_free_handle(&mem->idm_dma_handle);
+ mem->idm_dma_handle = NULL;
+
+ i40e_error(i40e, "failed to bind %d byte sized dma region: %d",
+ len, rc);
+ switch (rc) {
+ case DDI_DMA_INUSE:
+ return (I40E_ERR_NOT_READY);
+ case DDI_DMA_TOOBIG:
+ return (I40E_ERR_INVALID_SIZE);
+ case DDI_DMA_NOMAPPING:
+ case DDI_DMA_NORESOURCES:
+ default:
+ return (I40E_ERR_NO_MEMORY);
+ }
+ }
+
+ ASSERT(cookie_num == 1);
+ mem->pa = cookie.dmac_laddress;
+ /*
+ * Lint doesn't like this because the common code gives us a uint64_t as
+ * input, but the common code then asks us to assign it to a size_t. So
+ * lint's right, but in this case there isn't much we can do.
+ */
+ mem->size = (size_t)size;
+
+ return (I40E_SUCCESS);
+}
+
+/* ARGSUSED */
+i40e_status
+i40e_free_dma_mem(struct i40e_hw *hw, struct i40e_dma_mem *mem)
+{
+ if (mem->pa != 0) {
+ VERIFY(mem->idm_dma_handle != NULL);
+ (void) ddi_dma_unbind_handle(mem->idm_dma_handle);
+ mem->pa = 0;
+ mem->size = 0;
+ }
+
+ if (mem->idm_acc_handle != NULL) {
+ ddi_dma_mem_free(&mem->idm_acc_handle);
+ mem->idm_acc_handle = NULL;
+ mem->va = NULL;
+ }
+
+ if (mem->idm_dma_handle != NULL) {
+ ddi_dma_free_handle(&mem->idm_dma_handle);
+ mem->idm_dma_handle = NULL;
+ }
+
+ /*
+ * Watch out for sloppiness.
+ */
+ ASSERT(mem->pa == 0);
+ ASSERT(mem->va == NULL);
+ ASSERT(mem->size == 0);
+ mem->idm_alignment = UINT32_MAX;
+
+ return (I40E_SUCCESS);
+}
+
+/*
+ * The common code wants to initialize its 'spinlocks' here, aka adaptive
+ * mutexes. At this time these are only used to maintain the adminq's data and
+ * as such it will only be used outside of interrupt context and even then,
+ * we're not going to actually end up ever doing anything above lock level and
+ * up in doing stuff with high level interrupts.
+ */
+void
+i40e_init_spinlock(struct i40e_spinlock *lock)
+{
+ mutex_init(&lock->ispl_mutex, NULL, MUTEX_DRIVER, NULL);
+}
+
+void
+i40e_acquire_spinlock(struct i40e_spinlock *lock)
+{
+ mutex_enter(&lock->ispl_mutex);
+}
+
+void
+i40e_release_spinlock(struct i40e_spinlock *lock)
+{
+ mutex_exit(&lock->ispl_mutex);
+}
+
+void
+i40e_destroy_spinlock(struct i40e_spinlock *lock)
+{
+ mutex_destroy(&lock->ispl_mutex);
+}
+
+boolean_t
+i40e_set_hw_bus_info(struct i40e_hw *hw)
+{
+ uint8_t pcie_id = PCI_CAP_ID_PCI_E;
+ uint16_t pcie_cap, value;
+ int status;
+
+ /* locate the pci-e capability block */
+ status = pci_lcap_locate((OS_DEP(hw))->ios_cfg_handle, pcie_id,
+ &pcie_cap);
+ if (status != DDI_SUCCESS) {
+ i40e_error(OS_DEP(hw)->ios_i40e, "failed to locate PCIe "
+ "capability block: %d",
+ status);
+ return (B_FALSE);
+ }
+
+ value = pci_config_get16(OS_DEP(hw)->ios_cfg_handle,
+ pcie_cap + PCIE_LINKSTS);
+
+ i40e_set_pci_config_data(hw, value);
+
+ return (B_TRUE);
+}
+
+/* ARGSUSED */
+void
+i40e_debug(void *hw, u32 mask, char *fmt, ...)
+{
+ char buf[1024];
+ va_list args;
+
+ va_start(args, fmt);
+ (void) vsnprintf(buf, sizeof (buf), fmt, args);
+ va_end(args);
+
+ DTRACE_PROBE2(i40e__debug, uint32_t, mask, char *, buf);
+}
diff --git a/usr/src/uts/common/io/i40e/i40e_osdep.h b/usr/src/uts/common/io/i40e/i40e_osdep.h
new file mode 100644
index 0000000000..12f498bc72
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/i40e_osdep.h
@@ -0,0 +1,201 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#ifndef _I40E_OSDEP_H
+#define _I40E_OSDEP_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/pci_cap.h>
+#include <sys/sysmacros.h>
+
+/*
+ * For the moment, we use this to basically deal with a few custom changes
+ * particularly around mutex initialization. This is used to indicate that we
+ * should take illumos variants.
+ */
+#define I40E_ILLUMOS 1
+
+#define DEBUGOUT(S) i40e_debug(NULL, 0, S)
+#define DEBUGOUT1(S, A) i40e_debug(NULL, 0, S, A)
+#define DEBUGOUT2(S, A, B) i40e_debug(NULL, 0, S, A, B)
+#define DEBUGOUT3(S, A, B, C) i40e_debug(NULL, 0, S, A, B, C)
+#define DEBUGOUT4(S, A, B, C, D) \
+ i40e_debug(NULL, 0, S, A, B, C, D)
+#define DEBUGOUT5(S, A, B, C, D, E) \
+ i40e_debug(NULL, 0, S, A, B, C, D, E)
+#define DEBUGOUT6(S, A, B, C, D, E, F) \
+ i40e_debug(NULL, 0, S, A, B, C, D, E, F)
+#define DEBUGOUT7(S, A, B, C, D, E, F, G) \
+ i40e_debug(NULL, 0, S, A, B, C, D, E, F, G)
+#define DEBUGFUNC(F) DEBUGOUT(F);
+
+
+#define UNREFERENCED_PARAMETER(x) _NOTE(ARGUNUSED(x))
+#define UNREFERENCED_1PARAMETER(_p) UNREFERENCED_PARAMETER(_p)
+#define UNREFERENCED_2PARAMETER(_p, _q) _NOTE(ARGUNUSED(_p, _q))
+#define UNREFERENCED_3PARAMETER(_p, _q, _r) _NOTE(ARGUNUSED(_p, _q, _r))
+#define UNREFERENCED_4PARAMETER(_p, _q, _r, _s) _NOTE(ARGUNUSED(_p, _q,_r, _s))
+
+#define INLINE inline
+
+/*
+ * The mdb dmod needs to use this code as well, but mdb already defines TRUE and
+ * FALSE in the module API. Thus we don't define these if we're building the
+ * dmod, as indicated by _I40E_MDB_DMOD. However, if we don't define these, then
+ * the shared code will be upset.
+ */
+#ifndef _I40E_MDB_DMOD
+#define FALSE B_FALSE
+#define false B_FALSE
+#define TRUE B_TRUE
+#define true B_TRUE
+#endif /* _I40E_MDB_DMOD */
+
+
+#define CPU_TO_LE16(o) LE_16(o)
+#define CPU_TO_LE32(s) LE_32(s)
+#define CPU_TO_LE64(h) LE_64(h)
+#define LE16_TO_CPU(a) LE_16(a)
+#define LE32_TO_CPU(c) LE_32(c)
+#define LE64_TO_CPU(k) LE_64(k)
+
+#define I40E_NTOHS(a) ntohs(a)
+#define I40E_NTOHL(a) ntohl(a)
+#define I40E_HTONS(a) htons(a)
+#define I40E_HTONL(a) htonl(a)
+
+#define i40e_memset(a, b, c, d) memset((a), (b), (c))
+#define i40e_memcpy(a, b, c, d) bcopy((b), (a), (c))
+
+#define i40e_usec_delay(x) drv_usecwait(x)
+#define i40e_msec_delay(x) drv_usecwait(1000 * (x))
+
+#define FIELD_SIZEOF(x, y) (sizeof (((x*)0)->y))
+
+#define BIT(a) (1UL << (a))
+#define BIT_ULL(a) (1ULL << (a))
+
+typedef boolean_t bool;
+
+typedef uint8_t u8;
+typedef int8_t s8;
+typedef uint16_t u16;
+typedef int16_t s16;
+typedef uint32_t u32;
+typedef int32_t s32;
+typedef uint64_t u64;
+
+/* long string relief */
+typedef enum i40e_status_code i40e_status;
+
+#define __le16 u16
+#define __le32 u32
+#define __le64 u64
+#define __be16 u16
+#define __be32 u32
+#define __be64 u64
+
+/*
+ * Most other systems use spin locks for interrupts. However, illumos always
+ * uses a single kmutex_t for both and we decide what to do based on IPL (hint:
+ * it's not going to be a true spin lock, we'll use an adaptive mutex).
+ */
+struct i40e_spinlock {
+ kmutex_t ispl_mutex;
+};
+
+/*
+ * Note, while prefetch is strictly not present on all architectures, (it's an
+ * SSE extension on i386), it is expected that the platforms provide it.
+ */
+#define prefetch(x) prefetch_read_many(x)
+
+struct i40e_osdep {
+ off_t ios_reg_size;
+ ddi_acc_handle_t ios_reg_handle;
+ ddi_acc_handle_t ios_cfg_handle;
+ struct i40e *ios_i40e;
+};
+
+/*
+ * This structure and its members are defined by the common code. This means we
+ * cannot structure prefix it, even if we want to.
+ */
+struct i40e_virt_mem {
+ void *va;
+ u32 size;
+};
+
+/*
+ * The first three members of this structure are defined by the common code.
+ * This means we cannot structure prefix them, even if we wanted to.
+ */
+struct i40e_dma_mem {
+ void *va; /* Virtual address. */
+ u64 pa; /* Physical (DMA/Hardware) address. */
+ size_t size; /* Buffer size. */
+
+ /* illumos-private members */
+ ddi_acc_handle_t idm_acc_handle; /* Data access handle */
+ ddi_dma_handle_t idm_dma_handle; /* DMA handle */
+ uint32_t idm_alignment; /* Requested alignment */
+};
+
+struct i40e_hw; /* forward decl */
+
+#define OS_DEP(hw) ((struct i40e_osdep *)((hw)->back))
+#define i40e_read_pci_cfg(hw, reg) \
+ (pci_config_get16(OS_DEP(hw)->ios_cfg_handle, (reg)))
+#define i40e_write_pci_cfg(hw, reg, value) \
+ (pci_config_put16(OS_DEP(hw)->ios_cfg_handle, (reg), (value)))
+
+/*
+ * Intel expects that the symbol wr32 and r32 be defined to something which can
+ * read and write the 32-bit register in PCI space.
+ *
+ * To make it easier for readers and satisfy the general agreement that macros
+ * should be in all capitals, we use our own versions of these macros.
+ */
+#define wr32(hw, reg, value) \
+ ddi_put32(OS_DEP(hw)->ios_reg_handle, \
+ (uint32_t *)((uintptr_t)(hw)->hw_addr + (reg)), (value))
+#define rd32(hw, reg) \
+ ddi_get32(OS_DEP(hw)->ios_reg_handle, \
+ (uint32_t *)((uintptr_t)(hw)->hw_addr + (reg)))
+#define I40E_WRITE_REG wr32
+#define I40E_READ_REG rd32
+
+/*
+ * The use of GLEN_STAT presumes that we're only using this file for a PF
+ * driver. If we end up doing a VF driver, then we'll want to logically change
+ * this.
+ */
+#define i40e_flush(hw) (void) rd32(hw, I40E_GLGEN_STAT)
+
+extern void i40e_debug(void *, u32, char *, ...);
+extern boolean_t i40e_set_hw_bus_info(struct i40e_hw *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _I40E_OSDEP_H */
diff --git a/usr/src/uts/common/io/i40e/i40e_stats.c b/usr/src/uts/common/io/i40e/i40e_stats.c
new file mode 100644
index 0000000000..c7dd403fc8
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/i40e_stats.c
@@ -0,0 +1,1310 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include "i40e_sw.h"
+
+/*
+ * -------------------
+ * Statistics Overview
+ * -------------------
+ *
+ * As part of managing the driver and understanding what's going on, we keep
+ * track of statistics from two different sources:
+ *
+ * - Statistics from the device
+ * - Statistics maintained by the driver
+ *
+ * Generally, the hardware provides us traditional IETF and MIB Ethernet
+ * statistics, for example, the total packets in and out, various errors in
+ * packets, the negotiated status etc. The driver, on the other hand, primarily
+ * contains statistics around driver-specific issues, such as information about
+ * checksumming on receive and transmit and the data in and out of a specific
+ * ring.
+ *
+ * We export statistics in two different forms. The first form is the required
+ * GLDv3 endpoints, specifically:
+ *
+ * - The general GLDv3 mc_getstat interface
+ * - The GLDv3 ring mri_stat interface
+ *
+ * The second form that we export statistics is through kstats. kstats are
+ * exported in different ways. Particularly we arrange the kstats to monitor the
+ * layout of the device. Currently we have kstats which capture both the IEEE
+ * and driver-implementation specific stats. There are kstats for each of the
+ * following structures:
+ *
+ * - Each physical function
+ * - Each VSI
+ * - Each Queue
+ *
+ * The PF's kstat is called 'pfstats' so as not to collide with other system
+ * provided kstats. Thus, for instance 0, usually the first PF, the full kstat
+ * would be: i40e:0:pfstats:.
+ *
+ * The kstat for each VSI is called vsi_%instance. So for the first PF, which is
+ * instance zero and the first vsi, which has id 0, it will be named vsi_0 and
+ * the full kstat would be i40e:0:vsi_0:.
+ *
+ * The kstat for each queue is trqpair_tx_%queue and trqpair_rx_%queue. Note
+ * that these are labeled based on their local index, which may mean that
+ * different instances have overlapping sets of queues. This isn't a problem as
+ * the kstats will always use the instance number of the pf to distinguish it in
+ * the kstat tuple.
+ *
+ * ---------------------
+ * Hardware Arrangements
+ * ---------------------
+ *
+ * The hardware keeps statistics at each physical function/MAC (PF) and it keeps
+ * statistics on each virtual station interface (VSI). Currently we only use one
+ * VSI per PF (see the i40e_main.c theory statement). The hardware has a limited
+ * number of statistics units available. While every PF is guaranteed to have a
+ * statistics unit, it is possible that we will run out for a given VSI. We'll
+ * have to figure out an appropriate strategy here when we end up supporting
+ * multiple VSIs.
+ *
+ * The hardware keeps these statistics as 32-bit and 48-bit counters. We are
+ * required to read them and then compute the differences between them. The
+ * 48-bit counters span more than one 32-bit register in the BAR. The hardware
+ * suggests that to read them, we perform 64-bit reads of the lower of the two
+ * registers that make up a 48-bit stat. The hardware guarantees that the reads
+ * of those two registers will be atomic and we'll get a consistent value, not a
+ * property it has for every read of two registers.
+ *
+ * For every kstat we have based on this, we have a corresponding uint64_t that
+ * we keep around as a base value in a separate structure. Whenever we read a
+ * value, we end up grabbing the current value, calculating a difference between
+ * the previously stored value and the current one, and updating the kstat with
+ * that difference. After which, we go through and update the base value that we
+ * stored. This is all encapsulated in i40e_stat_get_uint32() and
+ * i40e_stat_get_uint48().
+ *
+ * The only unfortunate thing here is that the hardware doesn't give us any kind
+ * of overflow counter. It just tries to make sure that the uint32_t and
+ * uint48_t counters are large enough to hopefully not overflow right away. This
+ * isn't the most reassuring statement and we should investigate ways of
+ * ensuring that if a system is active, but not actively measured, we don't lose
+ * data.
+ *
+ * The pf kstats data is stored in the i40e_t`i40e_pf_kstat. It is backed by the
+ * i40e_t`i40e_pf_stat structure. Similarly the VSI related kstat is in
+ * i40e_t`i40e_vsi_kstat and the data is backed in the i40e_t`i40e_vsi_stat. All
+ * of this data is protected by the i40e_stat_lock, which should be taken last,
+ * when acquiring locks.
+ */
+
+static void
+i40e_stat_get_uint48(i40e_t *i40e, uintptr_t reg, kstat_named_t *kstat,
+ uint64_t *base, boolean_t init)
+{
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+ uint64_t raw, delta;
+
+ ASSERT(MUTEX_HELD(&i40e->i40e_stat_lock));
+
+ raw = ddi_get64(i40e->i40e_osdep_space.ios_reg_handle,
+ (uint64_t *)((uintptr_t)hw->hw_addr + reg));
+
+ if (init == B_TRUE) {
+ *base = raw;
+ return;
+ }
+
+ /*
+ * Check for wraparound, note that the counter is actually only 48-bits,
+ * even though it has two uint32_t regs present.
+ */
+ if (raw >= *base) {
+ delta = raw - *base;
+ } else {
+ delta = 0x1000000000000ULL - *base + raw;
+ }
+
+ kstat->value.ui64 += delta;
+ *base = raw;
+}
+
+static void
+i40e_stat_get_uint32(i40e_t *i40e, uintptr_t reg, kstat_named_t *kstat,
+ uint64_t *base, boolean_t init)
+{
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+ uint64_t raw, delta;
+
+ ASSERT(MUTEX_HELD(&i40e->i40e_stat_lock));
+
+ raw = ddi_get32(i40e->i40e_osdep_space.ios_reg_handle,
+ (uint32_t *)((uintptr_t)hw->hw_addr + reg));
+
+ if (init == B_TRUE) {
+ *base = raw;
+ return;
+ }
+
+ /*
+ * Watch out for wraparound as we only have a 32-bit counter.
+ */
+ if (raw >= *base) {
+ delta = raw - *base;
+ } else {
+ delta = 0x100000000ULL - *base + raw;
+ }
+
+ kstat->value.ui64 += delta;
+ *base = raw;
+
+}
+
+static void
+i40e_stat_vsi_update(i40e_t *i40e, boolean_t init)
+{
+ i40e_vsi_stats_t *ivs;
+ i40e_vsi_kstats_t *ivk;
+ int id = i40e->i40e_vsi_stat_id;
+
+ ASSERT(i40e->i40e_vsi_kstat != NULL);
+ ivs = &i40e->i40e_vsi_stat;
+ ivk = i40e->i40e_vsi_kstat->ks_data;
+
+ mutex_enter(&i40e->i40e_stat_lock);
+
+ i40e_stat_get_uint48(i40e, I40E_GLV_GORCL(id), &ivk->ivk_rx_bytes,
+ &ivs->ivs_rx_bytes, init);
+ i40e_stat_get_uint48(i40e, I40E_GLV_UPRCL(id), &ivk->ivk_rx_unicast,
+ &ivs->ivs_rx_unicast, init);
+ i40e_stat_get_uint48(i40e, I40E_GLV_MPRCL(id), &ivk->ivk_rx_multicast,
+ &ivs->ivs_rx_multicast, init);
+ i40e_stat_get_uint48(i40e, I40E_GLV_BPRCL(id), &ivk->ivk_rx_broadcast,
+ &ivs->ivs_rx_broadcast, init);
+
+ i40e_stat_get_uint32(i40e, I40E_GLV_RDPC(id), &ivk->ivk_rx_discards,
+ &ivs->ivs_rx_discards, init);
+ i40e_stat_get_uint32(i40e, I40E_GLV_RUPP(id),
+ &ivk->ivk_rx_unknown_protocol,
+ &ivs->ivs_rx_unknown_protocol,
+ init);
+
+ i40e_stat_get_uint48(i40e, I40E_GLV_GOTCL(id), &ivk->ivk_tx_bytes,
+ &ivs->ivs_tx_bytes, init);
+ i40e_stat_get_uint48(i40e, I40E_GLV_UPTCL(id), &ivk->ivk_tx_unicast,
+ &ivs->ivs_tx_unicast, init);
+ i40e_stat_get_uint48(i40e, I40E_GLV_MPTCL(id), &ivk->ivk_tx_multicast,
+ &ivs->ivs_tx_multicast, init);
+ i40e_stat_get_uint48(i40e, I40E_GLV_BPTCL(id), &ivk->ivk_tx_broadcast,
+ &ivs->ivs_tx_broadcast, init);
+
+ i40e_stat_get_uint32(i40e, I40E_GLV_TEPC(id), &ivk->ivk_tx_errors,
+ &ivs->ivs_tx_errors, init);
+
+ mutex_exit(&i40e->i40e_stat_lock);
+
+ /*
+ * We follow ixgbe's lead here and that if a kstat update didn't work
+ * 100% then we mark service unaffected as opposed to when fetching
+ * things for MAC directly.
+ */
+ if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) !=
+ DDI_FM_OK) {
+ ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_UNAFFECTED);
+ }
+}
+
+static int
+i40e_stat_vsi_kstat_update(kstat_t *ksp, int rw)
+{
+ i40e_t *i40e;
+
+ if (rw == KSTAT_WRITE)
+ return (EACCES);
+
+ i40e = ksp->ks_private;
+ i40e_stat_vsi_update(i40e, B_FALSE);
+ return (0);
+}
+
+void
+i40e_stat_vsi_fini(i40e_t *i40e)
+{
+ if (i40e->i40e_vsi_kstat != NULL) {
+ kstat_delete(i40e->i40e_vsi_kstat);
+ i40e->i40e_vsi_kstat = NULL;
+ }
+}
+
+boolean_t
+i40e_stat_vsi_init(i40e_t *i40e)
+{
+ kstat_t *ksp;
+ i40e_vsi_kstats_t *ivk;
+ char buf[64];
+
+ (void) snprintf(buf, sizeof (buf), "vsi_%d", i40e->i40e_vsi_id);
+
+ ksp = kstat_create(I40E_MODULE_NAME, ddi_get_instance(i40e->i40e_dip),
+ buf, "net", KSTAT_TYPE_NAMED,
+ sizeof (i40e_vsi_kstats_t) / sizeof (kstat_named_t), 0);
+
+ if (ksp == NULL) {
+ i40e_error(i40e, "Failed to create kstats for VSI %d",
+ i40e->i40e_vsi_id);
+ return (B_FALSE);
+ }
+
+ i40e->i40e_vsi_kstat = ksp;
+ ivk = ksp->ks_data;
+ ksp->ks_update = i40e_stat_vsi_kstat_update;
+ ksp->ks_private = i40e;
+
+ kstat_named_init(&ivk->ivk_rx_bytes, "rx_bytes",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ivk->ivk_rx_unicast, "rx_unicast",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ivk->ivk_rx_multicast, "rx_multicast",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ivk->ivk_rx_broadcast, "rx_broadcast",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ivk->ivk_rx_discards, "rx_discards",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ivk->ivk_rx_unknown_protocol, "rx_unknown_protocol",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ivk->ivk_tx_bytes, "tx_bytes",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ivk->ivk_tx_unicast, "tx_unicast",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ivk->ivk_tx_multicast, "tx_multicast",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ivk->ivk_tx_broadcast, "tx_broadcast",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ivk->ivk_tx_errors, "tx_errors",
+ KSTAT_DATA_UINT64);
+
+ bzero(&i40e->i40e_vsi_stat, sizeof (i40e_vsi_stats_t));
+ i40e_stat_vsi_update(i40e, B_TRUE);
+ kstat_install(i40e->i40e_vsi_kstat);
+
+ return (B_TRUE);
+}
+
+static void
+i40e_stat_pf_update(i40e_t *i40e, boolean_t init)
+{
+ i40e_pf_stats_t *ips;
+ i40e_pf_kstats_t *ipk;
+ int port = i40e->i40e_hw_space.port;
+ int i;
+
+ ASSERT(i40e->i40e_pf_kstat != NULL);
+ ips = &i40e->i40e_pf_stat;
+ ipk = i40e->i40e_pf_kstat->ks_data;
+
+ mutex_enter(&i40e->i40e_stat_lock);
+
+ /* 64-bit PCIe regs */
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_GORCL(port),
+ &ipk->ipk_rx_bytes, &ips->ips_rx_bytes, init);
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_UPRCL(port),
+ &ipk->ipk_rx_unicast, &ips->ips_rx_unicast, init);
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_MPRCL(port),
+ &ipk->ipk_rx_multicast, &ips->ips_rx_multicast, init);
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_BPRCL(port),
+ &ipk->ipk_rx_broadcast, &ips->ips_rx_broadcast, init);
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_GOTCL(port),
+ &ipk->ipk_tx_bytes, &ips->ips_tx_bytes, init);
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_UPTCL(port),
+ &ipk->ipk_tx_unicast, &ips->ips_tx_unicast, init);
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_MPTCL(port),
+ &ipk->ipk_tx_multicast, &ips->ips_tx_multicast, init);
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_BPTCL(port),
+ &ipk->ipk_tx_broadcast, &ips->ips_tx_broadcast, init);
+
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_PRC64L(port),
+ &ipk->ipk_rx_size_64, &ips->ips_rx_size_64, init);
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_PRC127L(port),
+ &ipk->ipk_rx_size_127, &ips->ips_rx_size_127, init);
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_PRC255L(port),
+ &ipk->ipk_rx_size_255, &ips->ips_rx_size_255, init);
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_PRC511L(port),
+ &ipk->ipk_rx_size_511, &ips->ips_rx_size_511, init);
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_PRC1023L(port),
+ &ipk->ipk_rx_size_1023, &ips->ips_rx_size_1023, init);
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_PRC1522L(port),
+ &ipk->ipk_rx_size_1522, &ips->ips_rx_size_1522, init);
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_PRC9522L(port),
+ &ipk->ipk_rx_size_9522, &ips->ips_rx_size_9522, init);
+
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_PTC64L(port),
+ &ipk->ipk_tx_size_64, &ips->ips_tx_size_64, init);
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_PTC127L(port),
+ &ipk->ipk_tx_size_127, &ips->ips_tx_size_127, init);
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_PTC255L(port),
+ &ipk->ipk_tx_size_255, &ips->ips_tx_size_255, init);
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_PTC511L(port),
+ &ipk->ipk_tx_size_511, &ips->ips_tx_size_511, init);
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_PTC1023L(port),
+ &ipk->ipk_tx_size_1023, &ips->ips_tx_size_1023, init);
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_PTC1522L(port),
+ &ipk->ipk_tx_size_1522, &ips->ips_tx_size_1522, init);
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_PTC9522L(port),
+ &ipk->ipk_tx_size_9522, &ips->ips_tx_size_9522, init);
+
+ /* 32-bit PCIe regs */
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_LXONRXC(port),
+ &ipk->ipk_link_xon_rx, &ips->ips_link_xon_rx, init);
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_LXOFFRXC(port),
+ &ipk->ipk_link_xoff_rx, &ips->ips_link_xoff_rx, init);
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_LXONTXC(port),
+ &ipk->ipk_link_xon_tx, &ips->ips_link_xon_tx, init);
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_LXOFFTXC(port),
+ &ipk->ipk_link_xoff_tx, &ips->ips_link_xoff_tx, init);
+
+ for (i = 0; i < 8; i++) {
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_PXONRXC(port, i),
+ &ipk->ipk_priority_xon_rx[i], &ips->ips_priority_xon_rx[i],
+ init);
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_PXOFFRXC(port, i),
+ &ipk->ipk_priority_xoff_rx[i],
+ &ips->ips_priority_xoff_rx[i],
+ init);
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_PXONTXC(port, i),
+ &ipk->ipk_priority_xon_tx[i], &ips->ips_priority_xon_tx[i],
+ init);
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_PXOFFTXC(port, i),
+ &ipk->ipk_priority_xoff_tx[i],
+ &ips->ips_priority_xoff_tx[i],
+ init);
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_RXON2OFFCNT(port, i),
+ &ipk->ipk_priority_xon_2_xoff[i],
+ &ips->ips_priority_xon_2_xoff[i],
+ init);
+ }
+
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_CRCERRS(port),
+ &ipk->ipk_crc_errors, &ips->ips_crc_errors, init);
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_ILLERRC(port),
+ &ipk->ipk_illegal_bytes, &ips->ips_illegal_bytes, init);
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_MLFC(port),
+ &ipk->ipk_mac_local_faults, &ips->ips_mac_local_faults, init);
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_MRFC(port),
+ &ipk->ipk_mac_remote_faults, &ips->ips_mac_remote_faults, init);
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_RLEC(port),
+ &ipk->ipk_rx_length_errors, &ips->ips_rx_length_errors, init);
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_RUC(port),
+ &ipk->ipk_rx_undersize, &ips->ips_rx_undersize, init);
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_RFC(port),
+ &ipk->ipk_rx_fragments, &ips->ips_rx_fragments, init);
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_ROC(port),
+ &ipk->ipk_rx_oversize, &ips->ips_rx_oversize, init);
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_RJC(port),
+ &ipk->ipk_rx_jabber, &ips->ips_rx_jabber, init);
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_RDPC(port),
+ &ipk->ipk_rx_discards, &ips->ips_rx_discards, init);
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_LDPC(port),
+ &ipk->ipk_rx_vm_discards, &ips->ips_rx_vm_discards, init);
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_MSPDC(port),
+ &ipk->ipk_rx_short_discards, &ips->ips_rx_short_discards, init);
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_TDOLD(port),
+ &ipk->ipk_tx_dropped_link_down, &ips->ips_tx_dropped_link_down,
+ init);
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_RUPP(port),
+ &ipk->ipk_rx_unknown_protocol, &ips->ips_rx_unknown_protocol, init);
+
+ /* 64-bit */
+ i40e_stat_get_uint48(i40e, I40E_GL_RXERR1_L(port), &ipk->ipk_rx_err1,
+ &ips->ips_rx_err1, init);
+ i40e_stat_get_uint48(i40e, I40E_GL_RXERR2_L(port), &ipk->ipk_rx_err2,
+ &ips->ips_rx_err2, init);
+
+ mutex_exit(&i40e->i40e_stat_lock);
+
+ /*
+ * We follow ixgbe's lead here and that if a kstat update didn't work
+ * 100% then we mark service unaffected as opposed to when fetching
+ * things for MAC directly.
+ */
+ if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) !=
+ DDI_FM_OK) {
+ ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_UNAFFECTED);
+ }
+}
+
+static int
+i40e_stat_pf_kstat_update(kstat_t *ksp, int rw)
+{
+ i40e_t *i40e;
+
+ if (rw == KSTAT_WRITE)
+ return (EACCES);
+
+ i40e = ksp->ks_private;
+ i40e_stat_pf_update(i40e, B_FALSE);
+ return (0);
+}
+
+
+static boolean_t
+i40e_stat_pf_init(i40e_t *i40e)
+{
+ kstat_t *ksp;
+ i40e_pf_kstats_t *ipk;
+
+ ksp = kstat_create(I40E_MODULE_NAME, ddi_get_instance(i40e->i40e_dip),
+ "pfstats", "net", KSTAT_TYPE_NAMED,
+ sizeof (i40e_pf_kstats_t) / sizeof (kstat_named_t), 0);
+ if (ksp == NULL) {
+ i40e_error(i40e, "Could not create kernel statistics.");
+ return (B_FALSE);
+ }
+
+ i40e->i40e_pf_kstat = ksp;
+ ipk = ksp->ks_data;
+ ksp->ks_update = i40e_stat_pf_kstat_update;
+ ksp->ks_private = i40e;
+
+ kstat_named_init(&ipk->ipk_rx_bytes, "rx_bytes",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_rx_unicast, "rx_unicast",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_rx_multicast, "rx_multicast",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_rx_broadcast, "rx_broadcast",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_tx_bytes, "tx_bytes",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_tx_unicast, "tx_unicast",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_tx_multicast, "tx_multicast",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_tx_broadcast, "tx_broadcast",
+ KSTAT_DATA_UINT64);
+
+ kstat_named_init(&ipk->ipk_rx_size_64, "rx_size_64",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_rx_size_127, "rx_size_127",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_rx_size_255, "rx_size_255",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_rx_size_511, "rx_size_511",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_rx_size_1023, "rx_size_1023",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_rx_size_1522, "rx_size_1522",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_rx_size_9522, "rx_size_9522",
+ KSTAT_DATA_UINT64);
+
+ kstat_named_init(&ipk->ipk_tx_size_64, "tx_size_64",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_tx_size_127, "tx_size_127",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_tx_size_255, "tx_size_255",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_tx_size_511, "tx_size_511",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_tx_size_1023, "tx_size_1023",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_tx_size_1522, "tx_size_1522",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_tx_size_9522, "tx_size_9522",
+ KSTAT_DATA_UINT64);
+
+ kstat_named_init(&ipk->ipk_link_xon_rx, "link_xon_rx",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_link_xoff_rx, "link_xoff_rx",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_link_xon_tx, "link_xon_tx",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_link_xoff_tx, "link_xoff_tx",
+ KSTAT_DATA_UINT64);
+
+ kstat_named_init(&ipk->ipk_priority_xon_rx[0], "priority_xon_rx[0]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xoff_rx[0], "priority_xoff_rx[0]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xon_tx[0], "priority_xon_tx[0]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xoff_tx[0], "priority_xoff_tx[0]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xon_2_xoff[0],
+ "priority_xon_2_xoff[0]",
+ KSTAT_DATA_UINT64);
+
+ kstat_named_init(&ipk->ipk_priority_xon_rx[1], "priority_xon_rx[1]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xoff_rx[1], "priority_xoff_rx[1]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xon_tx[1], "priority_xon_tx[1]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xoff_tx[1], "priority_xoff_tx[1]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xon_2_xoff[1],
+ "priority_xon_2_xoff[1]",
+ KSTAT_DATA_UINT64);
+
+ kstat_named_init(&ipk->ipk_priority_xon_rx[2], "priority_xon_rx[2]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xoff_rx[2], "priority_xoff_rx[2]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xon_tx[2], "priority_xon_tx[2]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xoff_tx[2], "priority_xoff_tx[2]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xon_2_xoff[2],
+ "priority_xon_2_xoff[2]",
+ KSTAT_DATA_UINT64);
+
+ kstat_named_init(&ipk->ipk_priority_xon_rx[3], "priority_xon_rx[3]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xoff_rx[3], "priority_xoff_rx[3]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xon_tx[3], "priority_xon_tx[3]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xoff_tx[3], "priority_xoff_tx[3]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xon_2_xoff[3],
+ "priority_xon_2_xoff[3]",
+ KSTAT_DATA_UINT64);
+
+ kstat_named_init(&ipk->ipk_priority_xon_rx[4], "priority_xon_rx[4]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xoff_rx[4], "priority_xoff_rx[4]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xon_tx[4], "priority_xon_tx[4]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xoff_tx[4], "priority_xoff_tx[4]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xon_2_xoff[4],
+ "priority_xon_2_xoff[4]",
+ KSTAT_DATA_UINT64);
+
+ kstat_named_init(&ipk->ipk_priority_xon_rx[5], "priority_xon_rx[5]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xoff_rx[5], "priority_xoff_rx[5]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xon_tx[5], "priority_xon_tx[5]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xoff_tx[5], "priority_xoff_tx[5]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xon_2_xoff[5],
+ "priority_xon_2_xoff[5]",
+ KSTAT_DATA_UINT64);
+
+ kstat_named_init(&ipk->ipk_priority_xon_rx[6], "priority_xon_rx[6]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xoff_rx[6], "priority_xoff_rx[6]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xon_tx[6], "priority_xon_tx[6]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xoff_tx[6], "priority_xoff_tx[6]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xon_2_xoff[6],
+ "priority_xon_2_xoff[6]",
+ KSTAT_DATA_UINT64);
+
+ kstat_named_init(&ipk->ipk_priority_xon_rx[7], "priority_xon_rx[7]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xoff_rx[7], "priority_xoff_rx[7]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xon_tx[7], "priority_xon_tx[7]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xoff_tx[7], "priority_xoff_tx[7]",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_priority_xon_2_xoff[7],
+ "priority_xon_2_xoff[7]",
+ KSTAT_DATA_UINT64);
+
+ kstat_named_init(&ipk->ipk_crc_errors, "crc_errors",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_illegal_bytes, "illegal_bytes",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_mac_local_faults, "mac_local_faults",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_mac_remote_faults, "mac_remote_faults",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_rx_length_errors, "rx_length_errors",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_rx_undersize, "rx_undersize",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_rx_fragments, "rx_fragments",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_rx_oversize, "rx_oversize",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_rx_jabber, "rx_jabber",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_rx_discards, "rx_discards",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_rx_vm_discards, "rx_vm_discards",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_rx_short_discards, "rx_short_discards",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_tx_dropped_link_down, "tx_dropped_link_down",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_rx_unknown_protocol, "rx_unknown_protocol",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_rx_err1, "rx_err1",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&ipk->ipk_rx_err2, "rx_err2",
+ KSTAT_DATA_UINT64);
+
+
+ bzero(&i40e->i40e_pf_stat, sizeof (i40e_pf_stats_t));
+ i40e_stat_pf_update(i40e, B_TRUE);
+
+ kstat_install(i40e->i40e_pf_kstat);
+
+ return (B_TRUE);
+}
+
+void
+i40e_stats_fini(i40e_t *i40e)
+{
+ ASSERT(i40e->i40e_vsi_kstat == NULL);
+ if (i40e->i40e_pf_kstat != NULL) {
+ kstat_delete(i40e->i40e_pf_kstat);
+ i40e->i40e_pf_kstat = NULL;
+ }
+
+ mutex_destroy(&i40e->i40e_stat_lock);
+}
+
+boolean_t
+i40e_stats_init(i40e_t *i40e)
+{
+ mutex_init(&i40e->i40e_stat_lock, NULL, MUTEX_DRIVER, NULL);
+ if (i40e_stat_pf_init(i40e) == B_FALSE) {
+ mutex_destroy(&i40e->i40e_stat_lock);
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
+
+/*
+ * For Nemo/GLDv3.
+ */
+int
+i40e_m_stat(void *arg, uint_t stat, uint64_t *val)
+{
+ i40e_t *i40e = (i40e_t *)arg;
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+ int port = i40e->i40e_hw_space.port;
+ i40e_pf_stats_t *ips;
+ i40e_pf_kstats_t *ipk;
+
+
+ ASSERT(i40e->i40e_pf_kstat != NULL);
+ ips = &i40e->i40e_pf_stat;
+ ipk = i40e->i40e_pf_kstat->ks_data;
+
+ /*
+ * We need both locks, as various stats are protected by different
+ * things here.
+ */
+ mutex_enter(&i40e->i40e_general_lock);
+
+ if (i40e->i40e_state & I40E_SUSPENDED) {
+ mutex_exit(&i40e->i40e_general_lock);
+ return (ECANCELED);
+ }
+
+ mutex_enter(&i40e->i40e_stat_lock);
+
+ /*
+ * Unfortunately the GLDv3 conflates two rather different things here.
+ * We're combining statistics about the physical port represented by
+ * this instance with statistics that describe the properties of the
+ * logical interface. As such, we're going to use the various aspects of
+ * the port to describe these stats as they represent what the physical
+ * instance is doing, even though that that means some tools may be
+ * confused and that to see the logical traffic on the interface itself
+ * sans VNICs and the like will require more work.
+ *
+ * Stats which are not listed in this switch statement are unimplemented
+ * at this time in hardware or don't currently apply to the device.
+ */
+ switch (stat) {
+ /* MIB-II stats (RFC 1213 and RFC 1573) */
+ case MAC_STAT_IFSPEED:
+ *val = i40e->i40e_link_speed * 1000000ull;
+ break;
+ case MAC_STAT_MULTIRCV:
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_MPRCL(port),
+ &ipk->ipk_rx_multicast, &ips->ips_rx_multicast, B_FALSE);
+ *val = ipk->ipk_rx_multicast.value.ui64;
+ break;
+ case MAC_STAT_BRDCSTRCV:
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_BPRCL(port),
+ &ipk->ipk_rx_broadcast, &ips->ips_rx_broadcast, B_FALSE);
+ *val = ipk->ipk_rx_broadcast.value.ui64;
+ break;
+ case MAC_STAT_MULTIXMT:
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_MPTCL(port),
+ &ipk->ipk_tx_multicast, &ips->ips_tx_multicast, B_FALSE);
+ *val = ipk->ipk_tx_multicast.value.ui64;
+ break;
+ case MAC_STAT_BRDCSTXMT:
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_BPTCL(port),
+ &ipk->ipk_tx_broadcast, &ips->ips_tx_broadcast, B_FALSE);
+ *val = ipk->ipk_tx_broadcast.value.ui64;
+ break;
+ case MAC_STAT_NORCVBUF:
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_RDPC(port),
+ &ipk->ipk_rx_discards, &ips->ips_rx_discards, B_FALSE);
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_LDPC(port),
+ &ipk->ipk_rx_vm_discards, &ips->ips_rx_vm_discards,
+ B_FALSE);
+ *val = ipk->ipk_rx_discards.value.ui64 +
+ ipk->ipk_rx_vm_discards.value.ui64;
+ break;
+ /*
+ * Note, that some RXERR2 stats are also duplicated by the switch filter
+ * stats; however, since we're not using those at this time, it seems
+ * reasonable to include them.
+ */
+ case MAC_STAT_IERRORS:
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_CRCERRS(port),
+ &ipk->ipk_crc_errors, &ips->ips_crc_errors, B_FALSE);
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_ILLERRC(port),
+ &ipk->ipk_illegal_bytes, &ips->ips_illegal_bytes, B_FALSE);
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_RLEC(port),
+ &ipk->ipk_rx_length_errors, &ips->ips_rx_length_errors,
+ B_FALSE);
+ i40e_stat_get_uint48(i40e, I40E_GL_RXERR1_L(port),
+ &ipk->ipk_rx_err1, &ips->ips_rx_err1, B_FALSE);
+ i40e_stat_get_uint48(i40e, I40E_GL_RXERR2_L(port),
+ &ipk->ipk_rx_err2, &ips->ips_rx_err2, B_FALSE);
+
+ *val = ipk->ipk_crc_errors.value.ui64 +
+ ipk->ipk_illegal_bytes.value.ui64 +
+ ipk->ipk_rx_length_errors.value.ui64 +
+ ipk->ipk_rx_err1.value.ui64 +
+ ipk->ipk_rx_err2.value.ui64;
+ break;
+ case MAC_STAT_UNKNOWNS:
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_RUPP(port),
+ &ipk->ipk_rx_unknown_protocol,
+ &ips->ips_rx_unknown_protocol,
+ B_FALSE);
+ *val = ipk->ipk_rx_unknown_protocol.value.ui64;
+ break;
+ case MAC_STAT_RBYTES:
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_GORCL(port),
+ &ipk->ipk_rx_bytes, &ips->ips_rx_bytes, B_FALSE);
+ *val = ipk->ipk_rx_bytes.value.ui64;
+ break;
+ case MAC_STAT_IPACKETS:
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_UPRCL(port),
+ &ipk->ipk_rx_unicast, &ips->ips_rx_unicast, B_FALSE);
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_MPRCL(port),
+ &ipk->ipk_rx_multicast, &ips->ips_rx_multicast, B_FALSE);
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_BPRCL(port),
+ &ipk->ipk_rx_broadcast, &ips->ips_rx_broadcast, B_FALSE);
+ *val = ipk->ipk_rx_unicast.value.ui64 +
+ ipk->ipk_rx_multicast.value.ui64 +
+ ipk->ipk_rx_broadcast.value.ui64;
+ break;
+ case MAC_STAT_OBYTES:
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_GOTCL(port),
+ &ipk->ipk_tx_bytes, &ips->ips_tx_bytes, B_FALSE);
+ *val = ipk->ipk_tx_bytes.value.ui64;
+ break;
+ case MAC_STAT_OPACKETS:
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_UPTCL(port),
+ &ipk->ipk_tx_unicast, &ips->ips_tx_unicast, B_FALSE);
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_MPTCL(port),
+ &ipk->ipk_tx_multicast, &ips->ips_tx_multicast, B_FALSE);
+ i40e_stat_get_uint48(i40e, I40E_GLPRT_BPTCL(port),
+ &ipk->ipk_tx_broadcast, &ips->ips_tx_broadcast, B_FALSE);
+ *val = ipk->ipk_tx_unicast.value.ui64 +
+ ipk->ipk_tx_multicast.value.ui64 +
+ ipk->ipk_tx_broadcast.value.ui64;
+ break;
+ case MAC_STAT_UNDERFLOWS:
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_RUC(port),
+ &ipk->ipk_rx_undersize, &ips->ips_rx_undersize, B_FALSE);
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_RFC(port),
+ &ipk->ipk_rx_fragments, &ips->ips_rx_fragments, B_FALSE);
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_MSPDC(port),
+ &ipk->ipk_rx_short_discards, &ips->ips_rx_short_discards,
+ B_FALSE);
+ *val = ipk->ipk_rx_undersize.value.ui64 +
+ ipk->ipk_rx_fragments.value.ui64 +
+ ipk->ipk_rx_short_discards.value.ui64;
+ break;
+ case MAC_STAT_OVERFLOWS:
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_ROC(port),
+ &ipk->ipk_rx_oversize, &ips->ips_rx_oversize, B_FALSE);
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_RJC(port),
+ &ipk->ipk_rx_jabber, &ips->ips_rx_jabber, B_FALSE);
+ *val = ipk->ipk_rx_oversize.value.ui64 +
+ ipk->ipk_rx_fragments.value.ui64;
+ break;
+
+ /* RFC 1643 stats */
+ case ETHER_STAT_FCS_ERRORS:
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_CRCERRS(port),
+ &ipk->ipk_crc_errors, &ips->ips_crc_errors, B_FALSE);
+ *val = ipk->ipk_crc_errors.value.ui64;
+ break;
+ case ETHER_STAT_TOOLONG_ERRORS:
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_ROC(port),
+ &ipk->ipk_rx_oversize, &ips->ips_rx_oversize, B_FALSE);
+ *val = ipk->ipk_rx_oversize.value.ui64;
+ break;
+ case ETHER_STAT_MACRCV_ERRORS:
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_ILLERRC(port),
+ &ipk->ipk_illegal_bytes, &ips->ips_illegal_bytes, B_FALSE);
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_RLEC(port),
+ &ipk->ipk_rx_length_errors, &ips->ips_rx_length_errors,
+ B_FALSE);
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_RFC(port),
+ &ipk->ipk_rx_fragments, &ips->ips_rx_fragments, B_FALSE);
+ *val = ipk->ipk_illegal_bytes.value.ui64 +
+ ipk->ipk_rx_length_errors.value.ui64 +
+ ipk->ipk_rx_fragments.value.ui64;
+ break;
+ /* MII/GMII stats */
+
+ /*
+ * The receiver address is apparently the same as the port number.
+ */
+ case ETHER_STAT_XCVR_ADDR:
+ /* The Receiver address is apparently the same as the port */
+ *val = i40e->i40e_hw_space.port;
+ break;
+ case ETHER_STAT_XCVR_ID:
+ switch (hw->phy.media_type) {
+ case I40E_MEDIA_TYPE_BASET:
+ /*
+ * Transform the data here into the ID. Note, generally
+ * the revision is left out.
+ */
+ *val = i40e->i40e_phy.phy_id[3] << 24 |
+ i40e->i40e_phy.phy_id[2] << 16 |
+ i40e->i40e_phy.phy_id[1] << 8;
+ break;
+ case I40E_MEDIA_TYPE_FIBER:
+ case I40E_MEDIA_TYPE_BACKPLANE:
+ case I40E_MEDIA_TYPE_CX4:
+ case I40E_MEDIA_TYPE_DA:
+ case I40E_MEDIA_TYPE_VIRTUAL:
+ *val = i40e->i40e_phy.phy_id[0] |
+ i40e->i40e_phy.phy_id[1] << 8 |
+ i40e->i40e_phy.phy_id[2] << 16;
+ break;
+ case I40E_MEDIA_TYPE_UNKNOWN:
+ default:
+ goto unimpl;
+ }
+ break;
+ case ETHER_STAT_XCVR_INUSE:
+ switch (hw->phy.link_info.phy_type) {
+ case I40E_PHY_TYPE_100BASE_TX:
+ *val = XCVR_100T2;
+ break;
+ case I40E_PHY_TYPE_1000BASE_T:
+ *val = XCVR_1000T;
+ break;
+ default:
+ *val = XCVR_UNDEFINED;
+ break;
+ }
+ break;
+
+ /*
+ * This group answers the question of do we support a given speed in
+ * theory.
+ */
+ case ETHER_STAT_CAP_100FDX:
+ *val = (i40e->i40e_phy.link_speed & I40E_LINK_SPEED_100MB) != 0;
+ break;
+ case ETHER_STAT_CAP_1000FDX:
+ *val = (i40e->i40e_phy.link_speed & I40E_LINK_SPEED_1GB) != 0;
+ break;
+ case ETHER_STAT_CAP_10GFDX:
+ *val = (i40e->i40e_phy.link_speed & I40E_LINK_SPEED_10GB) != 0;
+ break;
+ case ETHER_STAT_CAP_40GFDX:
+ *val = (i40e->i40e_phy.link_speed & I40E_LINK_SPEED_40GB) != 0;
+ break;
+
+ /*
+ * These ask are we currently advertising these speeds and abilities.
+ * Until we support setting these because we're working with a copper
+ * PHY, then the only things we advertise are based on the link PHY
+ * speeds. In other words, we advertise everything we support.
+ */
+ case ETHER_STAT_ADV_CAP_100FDX:
+ *val = (i40e->i40e_phy.link_speed & I40E_LINK_SPEED_100MB) != 0;
+ break;
+ case ETHER_STAT_ADV_CAP_1000FDX:
+ *val = (i40e->i40e_phy.link_speed & I40E_LINK_SPEED_1GB) != 0;
+ break;
+ case ETHER_STAT_ADV_CAP_10GFDX:
+ *val = (i40e->i40e_phy.link_speed & I40E_LINK_SPEED_10GB) != 0;
+ break;
+ case ETHER_STAT_ADV_CAP_40GFDX:
+ *val = (i40e->i40e_phy.link_speed & I40E_LINK_SPEED_40GB) != 0;
+ break;
+
+ /*
+ * These ask if the peer supports these speeds, e.g. what did they tell
+ * us in auto-negotiation. Unfortunately, hardware doesn't appear to
+ * give us a way to determine whether or not they actually support
+ * something, only what they have enabled. This means that all we can
+ * tell the user is the speed that we're currently at, unfortunately.
+ */
+ case ETHER_STAT_LP_CAP_100FDX:
+ *val = i40e->i40e_link_speed == 100;
+ break;
+ case ETHER_STAT_LP_CAP_1000FDX:
+ *val = i40e->i40e_link_speed == 1000;
+ break;
+ case ETHER_STAT_LP_CAP_10GFDX:
+ *val = i40e->i40e_link_speed == 10000;
+ break;
+ case ETHER_STAT_LP_CAP_40GFDX:
+ *val = i40e->i40e_link_speed == 40000;
+ break;
+
+ /*
+ * Statistics for unsupported speeds. Note that these often have the
+ * same constraints as the other ones. For example, we can't answer the
+ * question of the ETHER_STAT_LP_CAP family because hardware doesn't
+ * give us any way of knowing whether or not it does.
+ */
+ case ETHER_STAT_CAP_100HDX:
+ case ETHER_STAT_CAP_1000HDX:
+ case ETHER_STAT_CAP_10FDX:
+ case ETHER_STAT_CAP_10HDX:
+ case ETHER_STAT_CAP_100T4:
+ case ETHER_STAT_CAP_100GFDX:
+ case ETHER_STAT_CAP_2500FDX:
+ case ETHER_STAT_CAP_5000FDX:
+ case ETHER_STAT_ADV_CAP_1000HDX:
+ case ETHER_STAT_ADV_CAP_100HDX:
+ case ETHER_STAT_ADV_CAP_10FDX:
+ case ETHER_STAT_ADV_CAP_10HDX:
+ case ETHER_STAT_ADV_CAP_100T4:
+ case ETHER_STAT_ADV_CAP_100GFDX:
+ case ETHER_STAT_ADV_CAP_2500FDX:
+ case ETHER_STAT_ADV_CAP_5000FDX:
+ case ETHER_STAT_LP_CAP_1000HDX:
+ case ETHER_STAT_LP_CAP_100HDX:
+ case ETHER_STAT_LP_CAP_10FDX:
+ case ETHER_STAT_LP_CAP_10HDX:
+ case ETHER_STAT_LP_CAP_100T4:
+ case ETHER_STAT_LP_CAP_100GFDX:
+ case ETHER_STAT_LP_CAP_2500FDX:
+ case ETHER_STAT_LP_CAP_5000FDX:
+ *val = 0;
+ break;
+
+ case ETHER_STAT_LINK_DUPLEX:
+ *val = i40e->i40e_link_duplex;
+ break;
+ case ETHER_STAT_TOOSHORT_ERRORS:
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_RUC(port),
+ &ipk->ipk_rx_undersize, &ips->ips_rx_undersize, B_FALSE);
+
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_MSPDC(port),
+ &ipk->ipk_rx_short_discards, &ips->ips_rx_short_discards,
+ B_FALSE);
+ *val = ipk->ipk_rx_undersize.value.ui64 +
+ ipk->ipk_rx_short_discards.value.ui64;
+ break;
+ case ETHER_STAT_JABBER_ERRORS:
+ i40e_stat_get_uint32(i40e, I40E_GLPRT_RJC(port),
+ &ipk->ipk_rx_jabber, &ips->ips_rx_jabber, B_FALSE);
+ *val = ipk->ipk_rx_jabber.value.ui64;
+ break;
+
+ /*
+ * Non-Link speed related capabilities.
+ */
+ case ETHER_STAT_CAP_AUTONEG:
+ *val = 1;
+ break;
+
+ case ETHER_STAT_ADV_CAP_AUTONEG:
+ *val = 1;
+ break;
+
+ case ETHER_STAT_LP_CAP_AUTONEG:
+ *val = (hw->phy.link_info.an_info & I40E_AQ_LP_AN_ABILITY) != 0;
+ break;
+
+ case ETHER_STAT_LINK_AUTONEG:
+ *val = 1;
+ break;
+
+ /*
+ * Note that while the hardware does support the pause functionality, at
+ * this time we do not use it at all and effectively disable it.
+ */
+ case ETHER_STAT_CAP_ASMPAUSE:
+ *val = (i40e->i40e_phy.abilities &
+ I40E_AQ_PHY_FLAG_PAUSE_RX) != 0;
+ break;
+ case ETHER_STAT_CAP_PAUSE:
+ *val = (i40e->i40e_phy.abilities &
+ I40E_AQ_PHY_FLAG_PAUSE_TX) != 0;
+ break;
+
+ /*
+ * Because we don't support these at this time, they are always
+ * hard-coded to zero.
+ */
+ case ETHER_STAT_ADV_CAP_ASMPAUSE:
+ case ETHER_STAT_ADV_CAP_PAUSE:
+ *val = 0;
+ break;
+
+ /*
+ * Like the other LP fields, we can only answer the question have we
+ * enabled it, not whether the other end actually supports it.
+ */
+ case ETHER_STAT_LP_CAP_ASMPAUSE:
+ case ETHER_STAT_LINK_ASMPAUSE:
+ *val = (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_RX) != 0;
+ break;
+ case ETHER_STAT_LP_CAP_PAUSE:
+ case ETHER_STAT_LINK_PAUSE:
+ *val = (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_TX) != 0;
+ break;
+
+ default:
+ unimpl:
+ mutex_exit(&i40e->i40e_stat_lock);
+ mutex_exit(&i40e->i40e_general_lock);
+ return (ENOTSUP);
+ }
+
+ mutex_exit(&i40e->i40e_stat_lock);
+ mutex_exit(&i40e->i40e_general_lock);
+
+ if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) !=
+ DDI_FM_OK) {
+ ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED);
+ return (EIO);
+ }
+
+ return (0);
+}
+
+int
+i40e_rx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val)
+{
+ i40e_trqpair_t *itrq = (i40e_trqpair_t *)rh;
+ i40e_t *i40e = itrq->itrq_i40e;
+
+ if (i40e->i40e_state & I40E_SUSPENDED) {
+ return (ECANCELED);
+ }
+
+ switch (stat) {
+ case MAC_STAT_RBYTES:
+ *val = itrq->itrq_rxstat.irxs_bytes.value.ui64;
+ break;
+ case MAC_STAT_IPACKETS:
+ *val = itrq->itrq_rxstat.irxs_packets.value.ui64;
+ break;
+ default:
+ *val = 0;
+ return (ENOTSUP);
+ }
+
+ return (0);
+}
+
+int
+i40e_tx_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val)
+{
+ i40e_trqpair_t *itrq = (i40e_trqpair_t *)rh;
+ i40e_t *i40e = itrq->itrq_i40e;
+
+ if (i40e->i40e_state & I40E_SUSPENDED) {
+ return (ECANCELED);
+ }
+
+ switch (stat) {
+ case MAC_STAT_OBYTES:
+ *val = itrq->itrq_txstat.itxs_bytes.value.ui64;
+ break;
+ case MAC_STAT_OPACKETS:
+ *val = itrq->itrq_txstat.itxs_packets.value.ui64;
+ break;
+ default:
+ *val = 0;
+ return (ENOTSUP);
+ }
+
+ return (0);
+}
+
+/*
+ * When we end up refactoring all off the queue assignments and have non-static
+ * queue to VSI mappings, then we may need to revisit the general locking
+ * strategy that we employ and have the kstat creation / deletion be part of the
+ * ring start and stop routines.
+ */
+void
+i40e_stats_trqpair_fini(i40e_trqpair_t *itrq)
+{
+ if (itrq->itrq_txkstat != NULL) {
+ kstat_delete(itrq->itrq_txkstat);
+ itrq->itrq_txkstat = NULL;
+ }
+
+ if (itrq->itrq_rxkstat != NULL) {
+ kstat_delete(itrq->itrq_rxkstat);
+ itrq->itrq_rxkstat = NULL;
+ }
+}
+
+boolean_t
+i40e_stats_trqpair_init(i40e_trqpair_t *itrq)
+{
+ char buf[128];
+ i40e_t *i40e = itrq->itrq_i40e;
+ i40e_txq_stat_t *tsp = &itrq->itrq_txstat;
+ i40e_rxq_stat_t *rsp = &itrq->itrq_rxstat;
+
+ (void) snprintf(buf, sizeof (buf), "trqpair_tx_%d", itrq->itrq_index);
+ itrq->itrq_txkstat = kstat_create(I40E_MODULE_NAME,
+ ddi_get_instance(i40e->i40e_dip), buf, "net", KSTAT_TYPE_NAMED,
+ sizeof (i40e_txq_stat_t) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL);
+
+ if (itrq->itrq_txkstat == NULL)
+ return (B_FALSE);
+
+ (void) snprintf(buf, sizeof (buf), "trqpair_rx_%d", itrq->itrq_index);
+ itrq->itrq_rxkstat = kstat_create(I40E_MODULE_NAME,
+ ddi_get_instance(i40e->i40e_dip), buf, "net", KSTAT_TYPE_NAMED,
+ sizeof (i40e_rxq_stat_t) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL);
+
+ if (itrq->itrq_rxkstat == NULL) {
+ kstat_delete(itrq->itrq_txkstat);
+ itrq->itrq_txkstat = NULL;
+ return (B_FALSE);
+ }
+
+ itrq->itrq_txkstat->ks_data = &itrq->itrq_txstat;
+ itrq->itrq_rxkstat->ks_data = &itrq->itrq_rxstat;
+
+ kstat_named_init(&tsp->itxs_bytes, "tx_bytes",
+ KSTAT_DATA_UINT64);
+ tsp->itxs_bytes.value.ui64 = 0;
+ kstat_named_init(&tsp->itxs_packets, "tx_packets",
+ KSTAT_DATA_UINT64);
+ tsp->itxs_packets.value.ui64 = 0;
+ kstat_named_init(&tsp->itxs_descriptors, "tx_descriptors",
+ KSTAT_DATA_UINT64);
+ tsp->itxs_descriptors.value.ui64 = 0;
+ kstat_named_init(&tsp->itxs_recycled, "tx_recycled",
+ KSTAT_DATA_UINT64);
+ tsp->itxs_recycled.value.ui64 = 0;
+
+ kstat_named_init(&tsp->itxs_hck_meoifail, "tx_hck_meoifail",
+ KSTAT_DATA_UINT64);
+ tsp->itxs_hck_meoifail.value.ui64 = 0;
+ kstat_named_init(&tsp->itxs_hck_nol2info, "tx_hck_nol2info",
+ KSTAT_DATA_UINT64);
+ tsp->itxs_hck_nol2info.value.ui64 = 0;
+ kstat_named_init(&tsp->itxs_hck_nol3info, "tx_hck_nol3info",
+ KSTAT_DATA_UINT64);
+ tsp->itxs_hck_nol3info.value.ui64 = 0;
+ kstat_named_init(&tsp->itxs_hck_nol4info, "tx_hck_nol4info",
+ KSTAT_DATA_UINT64);
+ tsp->itxs_hck_nol4info.value.ui64 = 0;
+ kstat_named_init(&tsp->itxs_hck_badl3, "tx_hck_badl3",
+ KSTAT_DATA_UINT64);
+ tsp->itxs_hck_badl3.value.ui64 = 0;
+ kstat_named_init(&tsp->itxs_hck_badl4, "tx_hck_badl4",
+ KSTAT_DATA_UINT64);
+ tsp->itxs_hck_badl4.value.ui64 = 0;
+ kstat_named_init(&tsp->itxs_err_notcb, "tx_err_notcb",
+ KSTAT_DATA_UINT64);
+ tsp->itxs_err_notcb.value.ui64 = 0;
+ kstat_named_init(&tsp->itxs_err_nodescs, "tx_err_nodescs",
+ KSTAT_DATA_UINT64);
+ tsp->itxs_err_nodescs.value.ui64 = 0;
+ kstat_named_init(&tsp->itxs_err_context, "tx_err_context",
+ KSTAT_DATA_UINT64);
+ tsp->itxs_err_context.value.ui64 = 0;
+ kstat_named_init(&tsp->itxs_num_unblocked, "tx_num_unblocked",
+ KSTAT_DATA_UINT64);
+ tsp->itxs_num_unblocked.value.ui64 = 0;
+
+
+ kstat_named_init(&rsp->irxs_bytes, "rx_bytes",
+ KSTAT_DATA_UINT64);
+ rsp->irxs_bytes.value.ui64 = 0;
+ kstat_named_init(&rsp->irxs_packets, "rx_packets",
+ KSTAT_DATA_UINT64);
+ rsp->irxs_packets.value.ui64 = 0;
+ kstat_named_init(&rsp->irxs_rx_desc_error, "rx_desc_error",
+ KSTAT_DATA_UINT64);
+ rsp->irxs_rx_desc_error.value.ui64 = 0;
+ kstat_named_init(&rsp->irxs_rx_intr_limit, "rx_intr_limit",
+ KSTAT_DATA_UINT64);
+ rsp->irxs_rx_intr_limit.value.ui64 = 0;
+ kstat_named_init(&rsp->irxs_rx_bind_norcb, "rx_bind_norcb",
+ KSTAT_DATA_UINT64);
+ rsp->irxs_rx_bind_norcb.value.ui64 = 0;
+ kstat_named_init(&rsp->irxs_rx_bind_nomp, "rx_bind_nomp",
+ KSTAT_DATA_UINT64);
+ rsp->irxs_rx_bind_nomp.value.ui64 = 0;
+ kstat_named_init(&rsp->irxs_rx_copy_nomem, "rx_copy_nomem",
+ KSTAT_DATA_UINT64);
+ rsp->irxs_rx_copy_nomem.value.ui64 = 0;
+ kstat_named_init(&rsp->irxs_hck_v4hdrok, "rx_hck_v4hdrok",
+ KSTAT_DATA_UINT64);
+ rsp->irxs_hck_v4hdrok.value.ui64 = 0;
+ kstat_named_init(&rsp->irxs_hck_l4hdrok, "rx_hck_l4hdrok",
+ KSTAT_DATA_UINT64);
+ rsp->irxs_hck_l4hdrok.value.ui64 = 0;
+ kstat_named_init(&rsp->irxs_hck_unknown, "rx_hck_unknown",
+ KSTAT_DATA_UINT64);
+ rsp->irxs_hck_unknown.value.ui64 = 0;
+ kstat_named_init(&rsp->irxs_hck_nol3l4p, "rx_hck_nol3l4p",
+ KSTAT_DATA_UINT64);
+ rsp->irxs_hck_nol3l4p.value.ui64 = 0;
+ kstat_named_init(&rsp->irxs_hck_iperr, "rx_hck_iperr",
+ KSTAT_DATA_UINT64);
+ rsp->irxs_hck_iperr.value.ui64 = 0;
+ kstat_named_init(&rsp->irxs_hck_eiperr, "rx_hck_eiperr",
+ KSTAT_DATA_UINT64);
+ rsp->irxs_hck_eiperr.value.ui64 = 0;
+ kstat_named_init(&rsp->irxs_hck_l4err, "rx_hck_l4err",
+ KSTAT_DATA_UINT64);
+ rsp->irxs_hck_l4err.value.ui64 = 0;
+ kstat_named_init(&rsp->irxs_hck_v6skip, "rx_hck_v6skip",
+ KSTAT_DATA_UINT64);
+ rsp->irxs_hck_v6skip.value.ui64 = 0;
+ kstat_named_init(&rsp->irxs_hck_set, "rx_hck_set",
+ KSTAT_DATA_UINT64);
+ rsp->irxs_hck_set.value.ui64 = 0;
+ kstat_named_init(&rsp->irxs_hck_miss, "rx_hck_miss",
+ KSTAT_DATA_UINT64);
+ rsp->irxs_hck_miss.value.ui64 = 0;
+
+ kstat_install(itrq->itrq_txkstat);
+ kstat_install(itrq->itrq_rxkstat);
+
+ return (B_TRUE);
+}
diff --git a/usr/src/uts/common/io/i40e/i40e_sw.h b/usr/src/uts/common/io/i40e/i40e_sw.h
new file mode 100644
index 0000000000..04959b1590
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/i40e_sw.h
@@ -0,0 +1,974 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
+ */
+
+/*
+ * Please see i40e_main.c for an introduction to the device driver, its layout,
+ * and more.
+ */
+
+#ifndef _I40E_SW_H
+#define _I40E_SW_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <sys/conf.h>
+#include <sys/debug.h>
+#include <sys/stropts.h>
+#include <sys/stream.h>
+#include <sys/strsun.h>
+#include <sys/strlog.h>
+#include <sys/kmem.h>
+#include <sys/stat.h>
+#include <sys/kstat.h>
+#include <sys/modctl.h>
+#include <sys/errno.h>
+#include <sys/dlpi.h>
+#include <sys/mac_provider.h>
+#include <sys/mac_ether.h>
+#include <sys/vlan.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/pci.h>
+#include <sys/pcie.h>
+#include <sys/sdt.h>
+#include <sys/ethernet.h>
+#include <sys/pattr.h>
+#include <sys/strsubr.h>
+#include <sys/netlb.h>
+#include <sys/random.h>
+#include <inet/common.h>
+#include <inet/tcp.h>
+#include <inet/ip.h>
+#include <inet/mi.h>
+#include <inet/nd.h>
+#include <netinet/udp.h>
+#include <netinet/sctp.h>
+#include <sys/bitmap.h>
+#include <sys/cpuvar.h>
+#include <sys/ddifm.h>
+#include <sys/fm/protocol.h>
+#include <sys/fm/util.h>
+#include <sys/disp.h>
+#include <sys/fm/io/ddi.h>
+#include <sys/list.h>
+#include <sys/debug.h>
+#include <sys/sdt.h>
+#include "i40e_type.h"
+#include "i40e_osdep.h"
+#include "i40e_prototype.h"
+#include "i40e_xregs.h"
+
+#define I40E_MODULE_NAME "i40e"
+
+#define I40E_ADAPTER_REGSET 1
+
+/*
+ * Configuration constants. Note that the hardware defines a minimum bound of 32
+ * descriptors and requires that the programming of the descriptor lengths be
+ * aligned in units of 32 descriptors.
+ */
+#define I40E_MIN_TX_RING_SIZE 64
+#define I40E_MAX_TX_RING_SIZE 4096
+#define I40E_DEF_TX_RING_SIZE 1024
+
+#define I40E_MIN_RX_RING_SIZE 64
+#define I40E_MAX_RX_RING_SIZE 4096
+#define I40E_DEF_RX_RING_SIZE 1024
+
+#define I40E_DESC_ALIGN 32
+
+/*
+ * Sizes used for asynchronous processing of the adminq. We allocate a fixed
+ * size buffer for each instance of the device during attach time, rather than
+ * allocating and freeing one during interrupt processing.
+ *
+ * We also define the descriptor size of the admin queue here.
+ */
+#define I40E_ADMINQ_BUFSZ 4096
+#define I40E_MAX_ADMINQ_SIZE 1024
+#define I40E_DEF_ADMINQ_SIZE 256
+
+/*
+ * Note, while the min and maximum values are based upon the sizing of the ring
+ * itself, the default is taken from ixgbe without much thought. It's basically
+ * been cargo culted. See i40e_transciever.c for a bit more information.
+ */
+#define I40E_MIN_RX_LIMIT_PER_INTR 16
+#define I40E_MAX_RX_LIMIT_PER_INTR 4096
+#define I40E_DEF_RX_LIMIT_PER_INTR 256
+
+/*
+ * Valid MTU ranges. Note that the XL710's maximum payload is actually 9728.
+ * However, we need to adjust for the ETHERFCSL (4 bytes) and the Ethernet VLAN
+ * header size (18 bytes) to get the actual maximum frame we can use. If
+ * different adapters end up with different sizes, we should make this value a
+ * bit more dynamic.
+ */
+#define I40E_MAX_MTU 9706
+#define I40E_MIN_MTU ETHERMIN
+#define I40E_DEF_MTU ETHERMTU
+
+/*
+ * Interrupt throttling related values. Interrupt throttling values are defined
+ * in two microsecond increments. Note that a value of zero basically says do no
+ * ITR activity. A helpful way to think about these is that setting the ITR to a
+ * value will allow a certain number of interrupts per second.
+ *
+ * Our default values for RX allow 20k interrupts per second while our default
+ * values for TX allow for 5k interrupts per second. For other class interrupts,
+ * we limit ourselves to a rate of 2k/s.
+ */
+#define I40E_MIN_ITR 0x0000
+#define I40E_MAX_ITR 0x0FF0
+#define I40E_DEF_RX_ITR 0x0019
+#define I40E_DEF_TX_ITR 0x0064
+#define I40E_DEF_OTHER_ITR 0x00FA
+
+/*
+ * Indexes into the three ITR registers that we have.
+ */
+typedef enum i40e_itr_index {
+ I40E_ITR_INDEX_RX = 0x0,
+ I40E_ITR_INDEX_TX = 0x1,
+ I40E_ITR_INDEX_OTHER = 0x2,
+ I40E_ITR_INDEX_NONE = 0x3
+} i40e_itr_index_t;
+
+
+/*
+ * Table 1-5 of the PRM notes that LSO supports up to 256 KB.
+ */
+#define I40E_LSO_MAXLEN (256 * 1024)
+
+#define I40E_CYCLIC_PERIOD NANOSEC /* 1 second */
+#define I40E_DRAIN_RX_WAIT (500 * MILLISEC) /* In us */
+
+/*
+ * All the other queue types for are defined by the common code. However, this
+ * is the constant to indicate that it's terminated.
+ */
+#define I40E_QUEUE_TYPE_EOL 0x7FF
+
+/*
+ * See the comments in i40e_buf.c as to the purpose of this value and how it's
+ * used to ensure that the IP header is eventually aligned when it's received by
+ * the OS.
+ */
+#define I40E_BUF_IPHDR_ALIGNMENT 2
+
+/*
+ * The XL710 controller has a limit of eight buffers being allowed to be used
+ * for the transmission of a single frame. This is defined in 8.4.1 - Transmit
+ * Packet in System Memory.
+ */
+#define I40E_TX_MAX_COOKIE 8
+
+/*
+ * Sizing to determine the amount of available descriptors at which we'll
+ * consider ourselves blocked. Also, when we have these available, we'll then
+ * consider ourselves available to transmit to MAC again. Strictly speaking, the
+ * MAX is based on the ring size. The default sizing is based on ixgbe.
+ */
+#define I40E_MIN_TX_BLOCK_THRESH I40E_TX_MAX_COOKIE
+#define I40E_DEF_TX_BLOCK_THRESH I40E_MIN_TX_BLOCK_THRESH
+
+/*
+ * Sizing for DMA thresholds. These are used to indicate whether or not we
+ * should perform a bcopy or a DMA binding of a given message block. The range
+ * allows for setting things such that we'll always do a bcopy (a high value) or
+ * always perform a DMA binding (a low value).
+ */
+#define I40E_MIN_RX_DMA_THRESH 0
+#define I40E_DEF_RX_DMA_THRESH 256
+#define I40E_MAX_RX_DMA_THRESH INT32_MAX
+
+#define I40E_MIN_TX_DMA_THRESH 0
+#define I40E_DEF_TX_DMA_THRESH 256
+#define I40E_MAX_TX_DMA_THRESH INT32_MAX
+
+/*
+ * Resource sizing counts. There are various aspects of hardware where we may
+ * have some variable number of elements that we need to handle. Such as the
+ * hardware capabilities and switch capacities. We cannot know a priori how many
+ * elements to do, so instead we take a starting guess and then will grow it up
+ * to an upper bound on a number of elements, to limit memory consumption in
+ * case of a hardware bug.
+ */
+#define I40E_HW_CAP_DEFAULT 40
+#define I40E_SWITCH_CAP_DEFAULT 25
+
+/*
+ * Host Memory Context related constants.
+ */
+#define I40E_HMC_RX_CTX_UNIT 128
+#define I40E_HMC_RX_DBUFF_MIN 1024
+#define I40E_HMC_RX_DBUFF_MAX (16 * 1024 - 128)
+#define I40E_HMC_RX_DTYPE_NOSPLIT 0
+#define I40E_HMC_RX_DSIZE_32BYTE 1
+#define I40E_HMC_RX_CRCSTRIP_ENABLE 1
+#define I40E_HMC_RX_FC_DISABLE 0
+#define I40E_HMC_RX_L2TAGORDER 1
+#define I40E_HMC_RX_HDRSPLIT_DISABLE 0
+#define I40E_HMC_RX_INVLAN_DONTSTRIP 0
+#define I40E_HMC_RX_TPH_DISABLE 0
+#define I40E_HMC_RX_LOWRXQ_NOINTR 0
+#define I40E_HMC_RX_PREFENA 1
+
+#define I40E_HMC_TX_CTX_UNIT 128
+#define I40E_HMC_TX_NEW_CONTEXT 1
+#define I40E_HMC_TX_FC_DISABLE 0
+#define I40E_HMC_TX_TS_DISABLE 0
+#define I40E_HMC_TX_FD_DISABLE 0
+#define I40E_HMC_TX_ALT_VLAN_DISABLE 0
+#define I40E_HMC_TX_WB_ENABLE 1
+#define I40E_HMC_TX_TPH_DISABLE 0
+
+/*
+ * Whenever we establish and create a VSI, we need to assign some number of
+ * queues that it's allowed to access from the PF. Because we only have a single
+ * VSI per PF at this time, we assign it all the queues.
+ *
+ * Many of the devices support what's called Data-center Bridging. Which is a
+ * feature that we don't have much use of at this time. However, we still need
+ * to fill in this information. We follow the guidance of the note in Table 7-80
+ * which talks about bytes 62-77. It says that if we don't want to assign
+ * anything to traffic classes, we should set the field to zero. Effectively
+ * this means that everything in the system is assigned to traffic class zero.
+ */
+#define I40E_ASSIGN_ALL_QUEUES 0
+#define I40E_TRAFFIC_CLASS_NO_QUEUES 0
+
+/*
+ * This defines the error mask that we care about from rx descriptors. Currently
+ * we're only concerned with the general errors and oversize errors.
+ */
+#define I40E_RX_ERR_BITS ((1 << I40E_RX_DESC_ERROR_RXE_SHIFT) | \
+ (1 << I40E_RX_DESC_ERROR_OVERSIZE_SHIFT))
+
+/*
+ * Property sizing macros for firmware versions, etc. They need to be large
+ * enough to hold 32-bit quantities transformed to strings as %d.%d or %x.
+ */
+#define I40E_DDI_PROP_LEN 64
+
+/*
+ * We currently consolidate some overrides that we use in the code here. These
+ * will be gone in the fullness of time, but as we're bringing up the device,
+ * this is what we use.
+ */
+#define I40E_GROUP_MAX 1
+#define I40E_TRQPAIR_MAX 1
+
+#define I40E_GROUP_NOMSIX 1
+#define I40E_TRQPAIR_NOMSIX 1
+
+/*
+ * It seems reasonable to cast this to void because the only reason that we
+ * should be getting a DDI_FAILURE is due to the fact that we specify addresses
+ * out of range. Because we specify no offset or address, it shouldn't happen.
+ */
+#ifdef DEBUG
+#define I40E_DMA_SYNC(handle, flag) ASSERT0(ddi_dma_sync( \
+ (handle)->dmab_dma_handle, 0, 0, \
+ (flag)))
+#else /* !DEBUG */
+#define I40E_DMA_SYNC(handle, flag) ((void) ddi_dma_sync( \
+ (handle)->dmab_dma_handle, 0, 0, \
+ (flag)))
+#endif /* DEBUG */
+
+/*
+ * Constants related to ring startup and teardown. These refer to the amount of
+ * time that we're willing to wait for a ring to spin up and spin down.
+ */
+#define I40E_RING_WAIT_NTRIES 10
+#define I40E_RING_WAIT_PAUSE 10 /* ms */
+
+/*
+ * Bit flags for attach_progress
+ */
+typedef enum i40e_attach_state {
+ I40E_ATTACH_PCI_CONFIG = 0x0001, /* PCI config setup */
+ I40E_ATTACH_REGS_MAP = 0x0002, /* Registers mapped */
+ I40E_ATTACH_PROPS = 0x0004, /* Properties initialized */
+ I40E_ATTACH_ALLOC_INTR = 0x0008, /* Interrupts allocated */
+ I40E_ATTACH_ALLOC_RINGSLOCKS = 0x0010, /* Rings & locks allocated */
+ I40E_ATTACH_ADD_INTR = 0x0020, /* Intr handlers added */
+ I40E_ATTACH_COMMON_CODE = 0x0040, /* Intel code initialized */
+ I40E_ATTACH_INIT = 0x0080, /* Device initialized */
+ I40E_ATTACH_STATS = 0x0200, /* Kstats created */
+ I40E_ATTACH_MAC = 0x0800, /* MAC registered */
+ I40E_ATTACH_ENABLE_INTR = 0x1000, /* DDI interrupts enabled */
+ I40E_ATTACH_FM_INIT = 0x2000, /* FMA initialized */
+ I40E_ATTACH_LINK_TIMER = 0x4000, /* link check timer */
+} i40e_attach_state_t;
+
+
+/*
+ * State flags that what's going on in in the device. Some of these state flags
+ * indicate some aspirational work that needs to happen in the driver.
+ *
+ * I40E_UNKNOWN: The device has yet to be started.
+ * I40E_INITIALIZED: The device has been fully attached.
+ * I40E_STARTED: The device has come out of the GLDV3 start routine.
+ * I40E_SUSPENDED: The device is suspended and I/O among other things
+ * should not occur. This happens because of an actual
+ * DDI_SUSPEND or interrupt adjustments.
+ * I40E_STALL: The tx stall detection logic has found a stall.
+ * I40E_OVERTEMP: The device has encountered a temperature alarm.
+ * I40E_INTR_ADJUST: Our interrupts are being manipulated and therefore we
+ * shouldn't be manipulating their state.
+ * I40E_ERROR: We've detected an FM error and degraded the device.
+ */
+typedef enum i40e_state {
+ I40E_UNKNOWN = 0x00,
+ I40E_INITIALIZED = 0x01,
+ I40E_STARTED = 0x02,
+ I40E_SUSPENDED = 0x04,
+ I40E_STALL = 0x08,
+ I40E_OVERTEMP = 0x20,
+ I40E_INTR_ADJUST = 0x40,
+ I40E_ERROR = 0x80
+} i40e_state_t;
+
+
+/*
+ * Definitions for common Intel things that we use and some slightly more usable
+ * names.
+ */
+typedef struct i40e_hw i40e_hw_t;
+typedef struct i40e_aqc_switch_resource_alloc_element_resp i40e_switch_rsrc_t;
+
+/*
+ * Handles and addresses of DMA buffers.
+ */
+typedef struct i40e_dma_buffer {
+ caddr_t dmab_address; /* Virtual address */
+ uint64_t dmab_dma_address; /* DMA (Hardware) address */
+ ddi_acc_handle_t dmab_acc_handle; /* Data access handle */
+ ddi_dma_handle_t dmab_dma_handle; /* DMA handle */
+ size_t dmab_size; /* Buffer size */
+ size_t dmab_len; /* Data length in the buffer */
+} i40e_dma_buffer_t;
+
+/*
+ * RX Control Block
+ */
+typedef struct i40e_rx_control_block {
+ mblk_t *rcb_mp;
+ uint32_t rcb_ref;
+ i40e_dma_buffer_t rcb_dma;
+ frtn_t rcb_free_rtn;
+ struct i40e_rx_data *rcb_rxd;
+} i40e_rx_control_block_t;
+
+typedef enum {
+ I40E_TX_NONE,
+ I40E_TX_COPY,
+ I40E_TX_DMA
+} i40e_tx_type_t;
+
+typedef struct i40e_tx_desc i40e_tx_desc_t;
+typedef union i40e_32byte_rx_desc i40e_rx_desc_t;
+
+typedef struct i40e_tx_control_block {
+ struct i40e_tx_control_block *tcb_next;
+ mblk_t *tcb_mp;
+ i40e_tx_type_t tcb_type;
+ ddi_dma_handle_t tcb_dma_handle;
+ i40e_dma_buffer_t tcb_dma;
+} i40e_tx_control_block_t;
+
+/*
+ * Receive ring data (used below).
+ */
+typedef struct i40e_rx_data {
+ struct i40e *rxd_i40e;
+
+ /*
+ * RX descriptor ring definitions
+ */
+ i40e_dma_buffer_t rxd_desc_area; /* DMA buffer of rx desc ring */
+ i40e_rx_desc_t *rxd_desc_ring; /* Rx desc ring */
+ uint32_t rxd_desc_next; /* Index of next rx desc */
+
+ /*
+ * RX control block list definitions
+ */
+ kmutex_t rxd_free_lock; /* Lock to protect free data */
+ i40e_rx_control_block_t *rxd_rcb_area; /* Array of control blocks */
+ i40e_rx_control_block_t **rxd_work_list; /* Work list of rcbs */
+ i40e_rx_control_block_t **rxd_free_list; /* Free list of rcbs */
+ uint32_t rxd_rcb_free; /* Number of free rcbs */
+
+ /*
+ * RX software ring settings
+ */
+ uint32_t rxd_ring_size; /* Rx descriptor ring size */
+ uint32_t rxd_free_list_size; /* Rx free list size */
+
+ /*
+ * RX outstanding data. This is used to keep track of outstanding loaned
+ * descriptors after we've shut down receiving information. Note these
+ * are protected by the i40e_t`i40e_rx_pending_lock.
+ */
+ uint32_t rxd_rcb_pending;
+ boolean_t rxd_shutdown;
+} i40e_rx_data_t;
+
+/*
+ * Structures for unicast and multicast addresses. Note that we keep the VSI id
+ * around for unicast addresses, since they may belong to different VSIs.
+ * However, since all multicast addresses belong to the default VSI, we don't
+ * duplicate that information.
+ */
+typedef struct i40e_uaddr {
+ uint8_t iua_mac[ETHERADDRL];
+ int iua_vsi;
+} i40e_uaddr_t;
+
+typedef struct i40e_maddr {
+ uint8_t ima_mac[ETHERADDRL];
+} i40e_maddr_t;
+
+/*
+ * Collection of RX statistics on a given queue.
+ */
+typedef struct i40e_rxq_stat {
+ /*
+ * The i40e hardware does not maintain statistics on a per-ring basis,
+ * only on a per-PF and per-VSI level. As such, to satisfy the GLDv3, we
+ * need to maintain our own stats for packets and bytes.
+ */
+ kstat_named_t irxs_bytes; /* Bytes in on queue */
+ kstat_named_t irxs_packets; /* Packets in on queue */
+
+ /*
+ * The following set of stats cover non-checksum data path issues.
+ */
+ kstat_named_t irxs_rx_desc_error; /* Error bit set on desc */
+ kstat_named_t irxs_rx_copy_nomem; /* allocb failure for copy */
+ kstat_named_t irxs_rx_intr_limit; /* Hit i40e_rx_limit_per_intr */
+ kstat_named_t irxs_rx_bind_norcb; /* No replacement rcb free */
+ kstat_named_t irxs_rx_bind_nomp; /* No mblk_t in bind rcb */
+
+ /*
+ * The following set of statistics covers rx checksum related activity.
+ * These are all primarily set in i40e_rx_hcksum. If rx checksum
+ * activity is disabled, then these should all be zero.
+ */
+ kstat_named_t irxs_hck_v4hdrok; /* Valid IPv4 Header */
+ kstat_named_t irxs_hck_l4hdrok; /* Valid L4 Header */
+ kstat_named_t irxs_hck_unknown; /* !pinfo.known */
+ kstat_named_t irxs_hck_nol3l4p; /* Missing L3L4P bit in desc */
+ kstat_named_t irxs_hck_iperr; /* IPE error bit set */
+ kstat_named_t irxs_hck_eiperr; /* EIPE error bit set */
+ kstat_named_t irxs_hck_l4err; /* L4E error bit set */
+ kstat_named_t irxs_hck_v6skip; /* IPv6 case hw fails on */
+ kstat_named_t irxs_hck_set; /* Total times we set cksum */
+ kstat_named_t irxs_hck_miss; /* Times with zero cksum bits */
+} i40e_rxq_stat_t;
+
+/*
+ * Collection of TX Statistics on a given queue
+ */
+typedef struct i40e_txq_stat {
+ kstat_named_t itxs_bytes; /* Bytes out on queue */
+ kstat_named_t itxs_packets; /* Packets out on queue */
+ kstat_named_t itxs_descriptors; /* Descriptors issued */
+ kstat_named_t itxs_recycled; /* Descriptors reclaimed */
+ /*
+ * Various failure conditions.
+ */
+ kstat_named_t itxs_hck_meoifail; /* ether offload failures */
+ kstat_named_t itxs_hck_nol2info; /* Missing l2 info */
+ kstat_named_t itxs_hck_nol3info; /* Missing l3 info */
+ kstat_named_t itxs_hck_nol4info; /* Missing l4 info */
+ kstat_named_t itxs_hck_badl3; /* Not IPv4/IPv6 */
+ kstat_named_t itxs_hck_badl4; /* Bad L4 Paylaod */
+
+ kstat_named_t itxs_err_notcb; /* No tcb's available */
+ kstat_named_t itxs_err_nodescs; /* No tcb's available */
+ kstat_named_t itxs_err_context; /* Total context failures */
+
+ kstat_named_t itxs_num_unblocked; /* Number of MAC unblocks */
+} i40e_txq_stat_t;
+
+/*
+ * An instance of an XL710 transmit/receive queue pair. This currently
+ * represents a combination of both a transmit and receive ring, though they
+ * should really be split apart into separate logical structures. Unfortunately,
+ * during initial work we mistakenly joined them together.
+ */
+typedef struct i40e_trqpair {
+ struct i40e *itrq_i40e;
+
+ /* Receive-side structures. */
+ kmutex_t itrq_rx_lock;
+ mac_ring_handle_t itrq_macrxring; /* Receive ring handle. */
+ i40e_rx_data_t *itrq_rxdata; /* Receive ring rx data. */
+ uint64_t itrq_rxgen; /* Generation number for mac/GLDv3. */
+ uint32_t itrq_index; /* Queue index in the PF */
+ uint32_t itrq_rx_intrvec; /* Receive interrupt vector. */
+
+ /* Receive-side stats. */
+ i40e_rxq_stat_t itrq_rxstat;
+ kstat_t *itrq_rxkstat;
+
+ /* Transmit-side structures. */
+ kmutex_t itrq_tx_lock;
+ mac_ring_handle_t itrq_mactxring; /* Transmit ring handle. */
+ uint32_t itrq_tx_intrvec; /* Transmit interrupt vector. */
+ boolean_t itrq_tx_blocked; /* Does MAC think we're blocked? */
+
+ /*
+ * TX data sizing
+ */
+ uint32_t itrq_tx_ring_size;
+ uint32_t itrq_tx_free_list_size;
+
+ /*
+ * TX descriptor ring data
+ */
+ i40e_dma_buffer_t itrq_desc_area; /* DMA buffer of tx desc ring */
+ i40e_tx_desc_t *itrq_desc_ring; /* TX Desc ring */
+ volatile uint32_t *itrq_desc_wbhead; /* TX write-back index */
+ uint32_t itrq_desc_head; /* Last index hw freed */
+ uint32_t itrq_desc_tail; /* Index of next free desc */
+ uint32_t itrq_desc_free; /* Number of free descriptors */
+
+ /*
+ * TX control block (tcb) data
+ */
+ kmutex_t itrq_tcb_lock;
+ i40e_tx_control_block_t *itrq_tcb_area; /* Array of control blocks */
+ i40e_tx_control_block_t **itrq_tcb_work_list; /* In use tcb */
+ i40e_tx_control_block_t **itrq_tcb_free_list; /* Available tcb */
+ uint32_t itrq_tcb_free; /* Count of free tcb */
+
+ /* Transmit-side stats. */
+ i40e_txq_stat_t itrq_txstat;
+ kstat_t *itrq_txkstat;
+
+} i40e_trqpair_t;
+
+/*
+ * VSI statistics.
+ *
+ * This mirrors the i40e_eth_stats structure but transforms it into a kstat.
+ * Note that the stock statistic structure also includes entries for tx
+ * discards. However, this is not actually implemented for the VSI (see Table
+ * 7-221), hence why we don't include the member which would always have a value
+ * of zero. This choice was made to minimize confusion to someone looking at
+ * these, as a value of zero does not necessarily equate to the fact that it's
+ * not implemented.
+ */
+typedef struct i40e_vsi_stats {
+ uint64_t ivs_rx_bytes; /* gorc */
+ uint64_t ivs_rx_unicast; /* uprc */
+ uint64_t ivs_rx_multicast; /* mprc */
+ uint64_t ivs_rx_broadcast; /* bprc */
+ uint64_t ivs_rx_discards; /* rdpc */
+ uint64_t ivs_rx_unknown_protocol; /* rupp */
+ uint64_t ivs_tx_bytes; /* gotc */
+ uint64_t ivs_tx_unicast; /* uptc */
+ uint64_t ivs_tx_multicast; /* mptc */
+ uint64_t ivs_tx_broadcast; /* bptc */
+ uint64_t ivs_tx_errors; /* tepc */
+} i40e_vsi_stats_t;
+
+typedef struct i40e_vsi_kstats {
+ kstat_named_t ivk_rx_bytes;
+ kstat_named_t ivk_rx_unicast;
+ kstat_named_t ivk_rx_multicast;
+ kstat_named_t ivk_rx_broadcast;
+ kstat_named_t ivk_rx_discards;
+ kstat_named_t ivk_rx_unknown_protocol;
+ kstat_named_t ivk_tx_bytes;
+ kstat_named_t ivk_tx_unicast;
+ kstat_named_t ivk_tx_multicast;
+ kstat_named_t ivk_tx_broadcast;
+ kstat_named_t ivk_tx_errors;
+} i40e_vsi_kstats_t;
+
+/*
+ * For pf statistics, we opt not to use the standard statistics as defined by
+ * the Intel common code. This also currently combines statistics that are
+ * global across the entire device.
+ */
+typedef struct i40e_pf_stats {
+ uint64_t ips_rx_bytes; /* gorc */
+ uint64_t ips_rx_unicast; /* uprc */
+ uint64_t ips_rx_multicast; /* mprc */
+ uint64_t ips_rx_broadcast; /* bprc */
+ uint64_t ips_tx_bytes; /* gotc */
+ uint64_t ips_tx_unicast; /* uptc */
+ uint64_t ips_tx_multicast; /* mptc */
+ uint64_t ips_tx_broadcast; /* bptc */
+
+ uint64_t ips_rx_size_64; /* prc64 */
+ uint64_t ips_rx_size_127; /* prc127 */
+ uint64_t ips_rx_size_255; /* prc255 */
+ uint64_t ips_rx_size_511; /* prc511 */
+ uint64_t ips_rx_size_1023; /* prc1023 */
+ uint64_t ips_rx_size_1522; /* prc1522 */
+ uint64_t ips_rx_size_9522; /* prc9522 */
+
+ uint64_t ips_tx_size_64; /* ptc64 */
+ uint64_t ips_tx_size_127; /* ptc127 */
+ uint64_t ips_tx_size_255; /* ptc255 */
+ uint64_t ips_tx_size_511; /* ptc511 */
+ uint64_t ips_tx_size_1023; /* ptc1023 */
+ uint64_t ips_tx_size_1522; /* ptc1522 */
+ uint64_t ips_tx_size_9522; /* ptc9522 */
+
+ uint64_t ips_link_xon_rx; /* lxonrxc */
+ uint64_t ips_link_xoff_rx; /* lxoffrxc */
+ uint64_t ips_link_xon_tx; /* lxontxc */
+ uint64_t ips_link_xoff_tx; /* lxofftxc */
+ uint64_t ips_priority_xon_rx[8]; /* pxonrxc[8] */
+ uint64_t ips_priority_xoff_rx[8]; /* pxoffrxc[8] */
+ uint64_t ips_priority_xon_tx[8]; /* pxontxc[8] */
+ uint64_t ips_priority_xoff_tx[8]; /* pxofftxc[8] */
+ uint64_t ips_priority_xon_2_xoff[8]; /* rxon2offcnt[8] */
+
+ uint64_t ips_crc_errors; /* crcerrs */
+ uint64_t ips_illegal_bytes; /* illerrc */
+ uint64_t ips_mac_local_faults; /* mlfc */
+ uint64_t ips_mac_remote_faults; /* mrfc */
+ uint64_t ips_rx_length_errors; /* rlec */
+ uint64_t ips_rx_undersize; /* ruc */
+ uint64_t ips_rx_fragments; /* rfc */
+ uint64_t ips_rx_oversize; /* roc */
+ uint64_t ips_rx_jabber; /* rjc */
+ uint64_t ips_rx_discards; /* rdpc */
+ uint64_t ips_rx_vm_discards; /* ldpc */
+ uint64_t ips_rx_short_discards; /* mspdc */
+ uint64_t ips_tx_dropped_link_down; /* tdold */
+ uint64_t ips_rx_unknown_protocol; /* rupp */
+ uint64_t ips_rx_err1; /* rxerr1 */
+ uint64_t ips_rx_err2; /* rxerr2 */
+} i40e_pf_stats_t;
+
+typedef struct i40e_pf_kstats {
+ kstat_named_t ipk_rx_bytes; /* gorc */
+ kstat_named_t ipk_rx_unicast; /* uprc */
+ kstat_named_t ipk_rx_multicast; /* mprc */
+ kstat_named_t ipk_rx_broadcast; /* bprc */
+ kstat_named_t ipk_tx_bytes; /* gotc */
+ kstat_named_t ipk_tx_unicast; /* uptc */
+ kstat_named_t ipk_tx_multicast; /* mptc */
+ kstat_named_t ipk_tx_broadcast; /* bptc */
+
+ kstat_named_t ipk_rx_size_64; /* prc64 */
+ kstat_named_t ipk_rx_size_127; /* prc127 */
+ kstat_named_t ipk_rx_size_255; /* prc255 */
+ kstat_named_t ipk_rx_size_511; /* prc511 */
+ kstat_named_t ipk_rx_size_1023; /* prc1023 */
+ kstat_named_t ipk_rx_size_1522; /* prc1522 */
+ kstat_named_t ipk_rx_size_9522; /* prc9522 */
+
+ kstat_named_t ipk_tx_size_64; /* ptc64 */
+ kstat_named_t ipk_tx_size_127; /* ptc127 */
+ kstat_named_t ipk_tx_size_255; /* ptc255 */
+ kstat_named_t ipk_tx_size_511; /* ptc511 */
+ kstat_named_t ipk_tx_size_1023; /* ptc1023 */
+ kstat_named_t ipk_tx_size_1522; /* ptc1522 */
+ kstat_named_t ipk_tx_size_9522; /* ptc9522 */
+
+ kstat_named_t ipk_link_xon_rx; /* lxonrxc */
+ kstat_named_t ipk_link_xoff_rx; /* lxoffrxc */
+ kstat_named_t ipk_link_xon_tx; /* lxontxc */
+ kstat_named_t ipk_link_xoff_tx; /* lxofftxc */
+ kstat_named_t ipk_priority_xon_rx[8]; /* pxonrxc[8] */
+ kstat_named_t ipk_priority_xoff_rx[8]; /* pxoffrxc[8] */
+ kstat_named_t ipk_priority_xon_tx[8]; /* pxontxc[8] */
+ kstat_named_t ipk_priority_xoff_tx[8]; /* pxofftxc[8] */
+ kstat_named_t ipk_priority_xon_2_xoff[8]; /* rxon2offcnt[8] */
+
+ kstat_named_t ipk_crc_errors; /* crcerrs */
+ kstat_named_t ipk_illegal_bytes; /* illerrc */
+ kstat_named_t ipk_mac_local_faults; /* mlfc */
+ kstat_named_t ipk_mac_remote_faults; /* mrfc */
+ kstat_named_t ipk_rx_length_errors; /* rlec */
+ kstat_named_t ipk_rx_undersize; /* ruc */
+ kstat_named_t ipk_rx_fragments; /* rfc */
+ kstat_named_t ipk_rx_oversize; /* roc */
+ kstat_named_t ipk_rx_jabber; /* rjc */
+ kstat_named_t ipk_rx_discards; /* rdpc */
+ kstat_named_t ipk_rx_vm_discards; /* ldpc */
+ kstat_named_t ipk_rx_short_discards; /* mspdc */
+ kstat_named_t ipk_tx_dropped_link_down; /* tdold */
+ kstat_named_t ipk_rx_unknown_protocol; /* rupp */
+ kstat_named_t ipk_rx_err1; /* rxerr1 */
+ kstat_named_t ipk_rx_err2; /* rxerr2 */
+} i40e_pf_kstats_t;
+
+/*
+ * Resources that are pooled and specific to a given i40e_t.
+ */
+typedef struct i40e_func_rsrc {
+ uint_t ifr_nrx_queue;
+ uint_t ifr_nrx_queue_used;
+ uint_t ifr_ntx_queue;
+ uint_t ifr_trx_queue_used;
+ uint_t ifr_nvsis;
+ uint_t ifr_nvsis_used;
+ uint_t ifr_nmacfilt;
+ uint_t ifr_nmacfilt_used;
+ uint_t ifr_nmcastfilt;
+ uint_t ifr_nmcastfilt_used;
+} i40e_func_rsrc_t;
+
+/*
+ * Main i40e per-instance state.
+ */
+typedef struct i40e {
+ list_node_t i40e_glink; /* Global list link */
+ list_node_t i40e_dlink; /* Device list link */
+ kmutex_t i40e_general_lock; /* General device lock */
+
+ /*
+ * General Data and management
+ */
+ dev_info_t *i40e_dip;
+ int i40e_instance;
+ int i40e_fm_capabilities;
+ uint_t i40e_state;
+ i40e_attach_state_t i40e_attach_progress;
+ mac_handle_t i40e_mac_hdl;
+ ddi_periodic_t i40e_periodic_id;
+
+ /*
+ * Pointers to common code data structures and memory for the common
+ * code.
+ */
+ struct i40e_hw i40e_hw_space;
+ struct i40e_osdep i40e_osdep_space;
+ struct i40e_aq_get_phy_abilities_resp i40e_phy;
+ void *i40e_aqbuf;
+
+ /*
+ * Device state, switch information, and resources.
+ */
+ int i40e_vsi_id;
+ struct i40e_device *i40e_device;
+ i40e_func_rsrc_t i40e_resources;
+ uint16_t i40e_switch_rsrc_alloc;
+ uint16_t i40e_switch_rsrc_actual;
+ i40e_switch_rsrc_t *i40e_switch_rsrcs;
+ i40e_uaddr_t *i40e_uaddrs;
+ i40e_maddr_t *i40e_maddrs;
+ int i40e_mcast_promisc_count;
+ boolean_t i40e_promisc_on;
+ link_state_t i40e_link_state;
+ uint32_t i40e_link_speed; /* In Mbps */
+ link_duplex_t i40e_link_duplex;
+ uint_t i40e_sdu;
+ uint_t i40e_frame_max;
+
+ /*
+ * Transmit and receive information, tunables, and MAC info.
+ */
+ i40e_trqpair_t *i40e_trqpairs;
+ boolean_t i40e_mr_enable;
+ int i40e_num_trqpairs;
+ uint_t i40e_other_itr;
+
+ int i40e_num_rx_groups;
+ int i40e_num_rx_descs;
+ mac_group_handle_t i40e_rx_group_handle;
+ uint32_t i40e_rx_ring_size;
+ uint32_t i40e_rx_buf_size;
+ boolean_t i40e_rx_hcksum_enable;
+ uint32_t i40e_rx_dma_min;
+ uint32_t i40e_rx_limit_per_intr;
+ uint_t i40e_rx_itr;
+
+ int i40e_num_tx_descs;
+ uint32_t i40e_tx_ring_size;
+ uint32_t i40e_tx_buf_size;
+ uint32_t i40e_tx_block_thresh;
+ boolean_t i40e_tx_hcksum_enable;
+ uint32_t i40e_tx_dma_min;
+ uint_t i40e_tx_itr;
+
+ /*
+ * Interrupt state
+ *
+ * Note that the use of a single boolean_t for i40e_intr_poll isn't
+ * really the best design. When we have more than a single ring on the
+ * device working, we'll transition to using something more
+ * sophisticated.
+ */
+ uint_t i40e_intr_pri;
+ uint_t i40e_intr_force;
+ uint_t i40e_intr_type;
+ int i40e_intr_cap;
+ uint32_t i40e_intr_count;
+ uint32_t i40e_intr_count_max;
+ uint32_t i40e_intr_count_min;
+ size_t i40e_intr_size;
+ ddi_intr_handle_t *i40e_intr_handles;
+ ddi_cb_handle_t i40e_callback_handle;
+ boolean_t i40e_intr_poll;
+
+ /*
+ * DMA attributes. See i40e_buf.c for why we have copies of them in the
+ * i40e_t.
+ */
+ ddi_dma_attr_t i40e_static_dma_attr;
+ ddi_dma_attr_t i40e_txbind_dma_attr;
+ ddi_device_acc_attr_t i40e_desc_acc_attr;
+ ddi_device_acc_attr_t i40e_buf_acc_attr;
+
+ /*
+ * The following two fields are used to protect and keep track of
+ * outstanding, loaned buffers to MAC. If we have these, we can't
+ * detach as we have active DMA memory outstanding.
+ */
+ kmutex_t i40e_rx_pending_lock;
+ kcondvar_t i40e_rx_pending_cv;
+ uint32_t i40e_rx_pending;
+
+ /*
+ * PF statistics and VSI statistics.
+ */
+ kmutex_t i40e_stat_lock;
+ kstat_t *i40e_pf_kstat;
+ kstat_t *i40e_vsi_kstat;
+ i40e_pf_stats_t i40e_pf_stat;
+ i40e_vsi_stats_t i40e_vsi_stat;
+ uint16_t i40e_vsi_stat_id;
+
+ /*
+ * Misc. stats and counters that should maybe one day be kstats.
+ */
+ uint64_t i40e_s_link_status_errs;
+ uint32_t i40e_s_link_status_lasterr;
+} i40e_t;
+
+/*
+ * The i40e_device represents a PCI device which encapsulates multiple physical
+ * functions which are represented as an i40e_t. This is used to track the use
+ * of pooled resources throughout all of the various devices.
+ */
+typedef struct i40e_device {
+ list_node_t id_link;
+ dev_info_t *id_parent;
+ uint_t id_pci_bus;
+ uint_t id_pci_device;
+ uint_t id_nfuncs; /* Total number of functions */
+ uint_t id_nreg; /* Total number present */
+ list_t id_i40e_list; /* List of i40e_t's registered */
+ i40e_switch_rsrc_t *id_rsrcs; /* Switch resources for this PF */
+ uint_t id_rsrcs_alloc; /* Total allocated resources */
+ uint_t id_rsrcs_act; /* Actual number of resources */
+} i40e_device_t;
+
+/* Values for the interrupt forcing on the NIC. */
+#define I40E_INTR_NONE 0
+#define I40E_INTR_MSIX 1
+#define I40E_INTR_MSI 2
+#define I40E_INTR_LEGACY 3
+
+/* Hint that we don't want to do any polling... */
+#define I40E_POLL_NULL -1
+
+/*
+ * Logging functions.
+ */
+extern void i40e_error(i40e_t *, const char *, ...);
+extern void i40e_notice(i40e_t *, const char *, ...);
+extern void i40e_log(i40e_t *, const char *, ...);
+
+/*
+ * General link handling functions.
+ */
+extern void i40e_link_check(i40e_t *);
+extern void i40e_update_mtu(i40e_t *);
+
+/*
+ * FMA functions.
+ */
+extern int i40e_check_acc_handle(ddi_acc_handle_t);
+extern int i40e_check_dma_handle(ddi_dma_handle_t);
+extern void i40e_fm_ereport(i40e_t *, char *);
+
+/*
+ * Interrupt handlers and interrupt handler setup.
+ */
+extern void i40e_intr_chip_init(i40e_t *);
+extern void i40e_intr_chip_fini(i40e_t *);
+extern uint_t i40e_intr_msix(void *, void *);
+extern uint_t i40e_intr_msi(void *, void *);
+extern uint_t i40e_intr_legacy(void *, void *);
+extern void i40e_intr_io_enable_all(i40e_t *);
+extern void i40e_intr_io_disable_all(i40e_t *);
+extern void i40e_intr_io_clear_cause(i40e_t *);
+extern void i40e_intr_rx_queue_disable(i40e_t *, uint_t);
+extern void i40e_intr_rx_queue_enable(i40e_t *, uint_t);
+extern void i40e_intr_set_itr(i40e_t *, i40e_itr_index_t, uint_t);
+
+/*
+ * Receive-side functions
+ */
+extern mblk_t *i40e_ring_rx(i40e_trqpair_t *, int);
+extern mblk_t *i40e_ring_rx_poll(void *, int);
+extern void i40e_rx_recycle(caddr_t);
+
+/*
+ * Transmit-side functions
+ */
+mblk_t *i40e_ring_tx(void *, mblk_t *);
+extern void i40e_tx_recycle_ring(i40e_trqpair_t *);
+extern void i40e_tx_cleanup_ring(i40e_trqpair_t *);
+
+/*
+ * Statistics functions.
+ */
+extern boolean_t i40e_stats_init(i40e_t *);
+extern void i40e_stats_fini(i40e_t *);
+extern boolean_t i40e_stat_vsi_init(i40e_t *);
+extern void i40e_stat_vsi_fini(i40e_t *);
+extern boolean_t i40e_stats_trqpair_init(i40e_trqpair_t *);
+extern void i40e_stats_trqpair_fini(i40e_trqpair_t *);
+extern int i40e_m_stat(void *, uint_t, uint64_t *);
+extern int i40e_rx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *);
+extern int i40e_tx_ring_stat(mac_ring_driver_t, uint_t, uint64_t *);
+
+/*
+ * MAC/GLDv3 functions, and functions called by MAC/GLDv3 support code.
+ */
+extern boolean_t i40e_register_mac(i40e_t *);
+extern boolean_t i40e_start(i40e_t *, boolean_t);
+extern void i40e_stop(i40e_t *, boolean_t);
+
+/*
+ * DMA & buffer functions and attributes
+ */
+extern void i40e_init_dma_attrs(i40e_t *, boolean_t);
+extern boolean_t i40e_alloc_ring_mem(i40e_t *);
+extern void i40e_free_ring_mem(i40e_t *, boolean_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _I40E_SW_H */
diff --git a/usr/src/uts/common/io/i40e/i40e_transceiver.c b/usr/src/uts/common/io/i40e/i40e_transceiver.c
new file mode 100644
index 0000000000..06f82f856e
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/i40e_transceiver.c
@@ -0,0 +1,2266 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include "i40e_sw.h"
+
+/*
+ * ---------------------------------------------------------
+ * Buffer and Memory Management, Receiving, and Transmitting
+ * ---------------------------------------------------------
+ *
+ * Each physical function (PF), which is what we think of as an instance of the
+ * device driver, has a series of associated transmit and receive queue pairs.
+ * Effectively, what we think of in MAC as rings. Each of these has their own
+ * ring of descriptors which is used as part of doing DMA activity.
+ *
+ * The transmit ring of descriptors are 16-byte entries which are used to send
+ * packets, program filters, etc. The receive ring of descriptors are either
+ * 16-byte or 32-bytes each. At the moment, we opt to use the larger descriptor
+ * format so that we're in a better position if we ever want to leverage that
+ * information later on.
+ *
+ * However, these rings are just for descriptors, they don't talk or deal with
+ * how we actually store the memory that we need for DMA or the associated
+ * information that we need for keeping track of message blocks. To correspond
+ * to the hardware descriptor ring which is how we communicate with hardware, we
+ * introduce a control block which keeps track of our required metadata like DMA
+ * mappings.
+ *
+ * There are two main considerations that dictate how much memory and buffers
+ * we end up allocating. Those are:
+ *
+ * o The size of the ring (controlled through the driver.conf file)
+ *
+ * o The maximum size frame we can receive.
+ *
+ * The size of the rings currently defaults to 1024 descriptors and is stored in
+ * the i40e_t`i40e_rx_ring_size and i40e_t`i40e_tx_ring_size.
+ *
+ * While the size of the rings is controlled by the driver.conf, the maximum
+ * size frame is informed primarily through the use of dladm and the setting of
+ * the MTU property on the device. From the MTU, we then go and do some
+ * machinations. The first thing we do is we then have to add in space for the
+ * Ethernet header, potentially a VLAN header, and the FCS check. This value is
+ * what's stored as i40e_t`i40e_frame_max and is derived any time
+ * i40e_t`i40e_sdu changes.
+ *
+ * This size is then rounded up to the nearest 1k chunk, which represents the
+ * actual amount of memory that we'll allocate for a single frame.
+ *
+ * Note, that for rx, we do something that might be unexpected. We always add
+ * an extra two bytes to the frame size that we allocate. We then offset the DMA
+ * address that we receive a packet into by two bytes. This ensures that the IP
+ * header will always be 4 byte aligned because the MAC header is either 14 or
+ * 18 bytes in length, depending on the use of 802.1Q tagging, which makes IP's
+ * and MAC's lives easier.
+ *
+ * Both the rx and tx descriptor rings (which are what we use to communicate
+ * with hardware) are allocated as a single region of DMA memory which is the
+ * size of the descriptor (4 bytes and 2 bytes respectively) times the total
+ * number of descriptors for an rx and tx ring.
+ *
+ * While the rx and tx descriptors are allocated using DMA-based memory, the
+ * control blocks for each of them are allocated using normal kernel memory.
+ * They aren't special from a DMA perspective. We'll go over the design of both
+ * receiving and transmitting separately, as they have slightly different
+ * control blocks and different ways that we manage the relationship between
+ * control blocks and descriptors.
+ *
+ * ---------------------------------
+ * RX Descriptors and Control Blocks
+ * ---------------------------------
+ *
+ * For every descriptor in the ring that the driver has, we need some associated
+ * memory, which means that we need to have the receive specific control block.
+ * We have a couple different, but related goals:
+ *
+ * o Once we've completed the mc_start GLDv3 endpoint, we do not want to do
+ * any additional memory allocations or DMA allocations if we don't have to.
+ *
+ * o We'd like to try and do as much zero-copy as possible, while taking into
+ * account the cost of mapping in DMA resources.
+ *
+ * o We'd like to have every receive descriptor available.
+ *
+ * Now, these rules are a bit in tension with one another. The act of mapping in
+ * is an exercise of trying to find the break-even point between page table
+ * updates and bcopy. We currently start by using the same metrics that ixgbe
+ * used; however, it should be known that this value has effectively been
+ * cargo-culted across to yet another driver, sorry.
+ *
+ * If we receive a packet which is larger than our copy threshold, we'll create
+ * a message block out of the DMA memory via desballoc(9F) and send that up to
+ * MAC that way. This will cause us to be notified when the message block is
+ * then freed because it has been consumed, dropped, or otherwise. Otherwise, if
+ * it's less than the threshold, we'll try to use allocb and bcopy it into the
+ * block, thus allowing us to immediately reuse the DMA resource. Note, on debug
+ * builds, we allow someone to whack the variable i40e_debug_rx_mode to override
+ * the behavior and always do a bcopy or a DMA bind.
+ *
+ * To try and ensure that the device always has blocks that it can receive data
+ * into, we maintain two lists of control blocks, a working list and a free
+ * list. Each list is sized equal to the number of descriptors in the rx ring.
+ * During the GLDv3 mc_start routine, we allocate a number of rx control blocks
+ * equal to twice the number of descriptors in the ring and we assign them
+ * equally to the free list and to the working list. Each control block also has
+ * DMA memory allocated and associated with which it will be used to receive the
+ * actual packet data. All of a received frame's data will end up in a single
+ * DMA buffer.
+ *
+ * During operation, we always maintain the invariant that each rx descriptor
+ * has an associated rx control block which lives in the working list. If we
+ * feel that we should loan up DMA memory to MAC in the form of a message block,
+ * we can only do so if we can maintain this invariant. To do that, we swap in
+ * one of the buffers from the free list. If none are available, then we resort
+ * to using allocb(9F) and bcopy(9F) on the packet instead, regardless of the
+ * size.
+ *
+ * Loaned message blocks come back to use when freemsg(9F) or freeb(9F) is
+ * called on the block, at which point we restore the rx control block to the
+ * free list and are able to reuse the DMA memory again. While the scheme may
+ * seem odd, it importantly keeps us out of trying to do any DMA allocations in
+ * the normal path of operation, even though we may still have to allocate
+ * message blocks and copy.
+ *
+ * The following state machine describes the life time of a rx control block. In
+ * the diagram we abbrviate the rx ring descriptor entry as rxd and the rx
+ * control block entry as rcb.
+ *
+ * | |
+ * * ... 1/2 of all initial rcb's ... *
+ * | |
+ * v v
+ * +------------------+ +------------------+
+ * | rcb on free list |---*---------->| rcb on work list |
+ * +------------------+ . +------------------+
+ * ^ . moved to |
+ * | replace rcb * . . Frame received,
+ * | loaned to | entry on free list
+ * | MAC + co. | available. rcb's
+ * | | memory made into mblk_t
+ * * . freemsg(9F) | and sent up to MAC.
+ * | called on |
+ * | loaned rcb |
+ * | and it is v
+ * | recycled. +-------------------+
+ * +--------------------<-----| rcb loaned to MAC |
+ * +-------------------+
+ *
+ * Finally, note that every rx control block has a reference count on it. One
+ * reference is added as long as the driver has had the GLDv3 mc_start endpoint
+ * called. If the GLDv3 mc_stop entry point is called, IP has been unplumbed and
+ * no other DLPI consumers remain, then we'll decrement the reference count by
+ * one. Whenever we loan up the rx control block and associated buffer to MAC,
+ * then we bump the reference count again. Even though the device is stopped,
+ * there may still be loaned frames in upper levels that we'll want to account
+ * for. Our callback from freemsg(9F)/freeb(9F) will take care of making sure
+ * that it is cleaned up.
+ *
+ * --------------------
+ * Managing the RX Ring
+ * --------------------
+ *
+ * The receive ring descriptors are arranged in a circular buffer with a head
+ * and tail pointer. There are both the conventional head and tail pointers
+ * which are used to partition the ring into two portions, a portion that we,
+ * the operating system, manage and a portion that is managed by hardware. When
+ * hardware owns a descriptor in the ring, it means that it is waiting for data
+ * to be filled in. However, when a portion of the ring is owned by the driver,
+ * then that means that the descriptor has been consumed and we need to go take
+ * a look at it.
+ *
+ * The initial head is configured to be zero by writing it as such in the
+ * receive queue context in the FPM (function private memory from the host). The
+ * initial tail is written to be the last descriptor. This is written to via the
+ * PCIe register I40E_QRX_TAIL(). Technically, hardware owns everything between
+ * the HEAD and TAIL, inclusive. Note that while we initially program the HEAD,
+ * the only values we ever consult ourselves are the TAIL register and our own
+ * state tracking. Effectively, we cache the HEAD register and then update it
+ * ourselves based on our work.
+ *
+ * When we iterate over the rx descriptors and thus the received frames, we are
+ * either in an interrupt context or we've been asked by MAC to poll on the
+ * ring. If we've been asked to poll on the ring, we have a maximum number of
+ * bytes of mblk_t's to return. If processing an rx descriptor would cause us to
+ * exceed that count, then we do not process it. When in interrupt context, we
+ * don't have a strict byte count. However, to ensure liveness, we limit the
+ * amount of data based on a configuration value
+ * (i40e_t`i40e_rx_limit_per_intr). The number that we've started with for this
+ * is based on similar numbers that are used for ixgbe. After some additional
+ * time in the field, we'll have a sense as to whether or not it should be
+ * changed.
+ *
+ * When processing, we start at our own HEAD pointer
+ * (i40e_rx_data_t`rxd_desc_next), which indicates the descriptor to start
+ * processing. Every RX descriptor has what's described as the DD bit. This bit
+ * (the LSB of the second 8-byte word), indicates whether or not the descriptor
+ * is done. When we give descriptors to the hardware, this value is always
+ * zero. When the hardware has finished a descriptor, it will always be one.
+ *
+ * The first thing that we check is whether the DD bit indicates that the
+ * current HEAD is ready. If it isn't, then we're done. That's the primary
+ * invariant of processing a frame. If it's done, then there are a few other
+ * things that we want to look at. In the same status word as the DD bit, there
+ * are two other important bits:
+ *
+ * o End of Packet (EOP)
+ * o Error bits
+ *
+ * The end of packet indicates that we have reached the last descriptor. Now,
+ * you might ask when would there be more than one descriptor. The reason for
+ * that might be due to large receive offload (lro) or header splitting
+ * functionality, which presently isn't supported in the driver. The error bits
+ * in the frame are only valid when EOP is set.
+ *
+ * If error bits are set on the frame, then we still consume it; however, we
+ * will not generate an mblk_t to send up to MAC. If there are no error bits
+ * set, then we'll consume the descriptor either using bcopy or DMA binding. See
+ * the earlier section 'RX DESCRIPTORS AND CONTROL BLOCKS' for more information
+ * on how that selection is made.
+ *
+ * Regardless of whether we construct an mblk_t or encounter an error, we end up
+ * resetting the descriptor. This re-arms the descriptor for hardware and in the
+ * process, we may end up assigning it a new receive control bock. After we do
+ * this, we always update our HEAD pointer, no matter what.
+ *
+ * Finally, once we've consumed as much as we will in a given window, we go and
+ * update the TAIL register to indicate all the frames we've consumed. We only
+ * do a single bulk write for the ring.
+ *
+ * ---------------------------------
+ * TX Descriptors and Control Blocks
+ * ---------------------------------
+ *
+ * While the transmit path is similar in spirit to the receive path, it works
+ * differently due to the fact that all data is originated by the operating
+ * system and not by the device.
+ *
+ * Like rx, there is both a descriptor ring that we use to communicate to the
+ * driver and which points to the memory used to transmit a frame. Similarly,
+ * there is a corresponding transmit control block. Each transmit control block
+ * has a region of DMA memory allocated to it; however, the way we use it
+ * varies.
+ *
+ * The driver is asked to process a single frame at a time. That message block
+ * may be made up of multiple fragments linked together by the mblk_t`b_cont
+ * member. The device has a hard limit of up to 8 buffers being allowed for use
+ * for a single logical frame. For each fragment, we'll try and use an entry
+ * from the tx descriptor ring and then we'll allocate a corresponding tx
+ * control block. Depending on the size of the fragment, we may copy it around
+ * or we might instead try to do DMA binding of the fragment.
+ *
+ * If we exceed the number of blocks that fit, we'll try to pull up the block
+ * and then we'll do a DMA bind and send it out.
+ *
+ * If we don't have enough space in the ring or tx control blocks available,
+ * then we'll return the unprocessed message block to MAC. This will induce flow
+ * control and once we recycle enough entries, we'll once again enable sending
+ * on the ring.
+ *
+ * We size the working list as equal to the number of descriptors in the ring.
+ * We size the free list as equal to 1.5 times the number of descriptors in the
+ * ring. We'll allocate a number of tx control block entries equal to the number
+ * of entries in the free list. By default, all entries are placed in the free
+ * list. As we come along and try to send something, we'll allocate entries from
+ * the free list and add them to the working list, where they'll stay until the
+ * hardware indicates that all of the data has been written back to us. The
+ * reason that we start with 1.5x is to help facilitate having more than one TX
+ * buffer associated with the DMA activity.
+ *
+ * --------------------
+ * Managing the TX Ring
+ * --------------------
+ *
+ * The transmit descriptor ring is driven by us. We maintain our own notion of a
+ * HEAD and TAIL register and we update the hardware with updates to the TAIL
+ * register. When the hardware is done writing out data, it updates us by
+ * writing back to a specific address, not by updating the individual
+ * descriptors. That address is a 4-byte region after the main transmit
+ * descriptor ring. This is why the descriptor ring has an extra descriptor's
+ * worth allocated to it.
+ *
+ * We maintain our notion of the HEAD in the i40e_trqpair_t`itrq_desc_head and
+ * the TAIL in the i40e_trqpair_t`itrq_desc_tail. When we write out frames,
+ * we'll update the tail there and in the I40E_QTX_TAIL() register. At various
+ * points in time, through both interrupts, and our own internal checks, we'll
+ * sync the write-back head portion of the DMA space. Based on the index it
+ * reports back, we'll free everything between our current HEAD and the
+ * indicated index and update HEAD to the new index.
+ *
+ * When a frame comes in, we try to use a number of transmit control blocks and
+ * we'll transition them from the free list to the work list. They'll get moved
+ * to the entry on the work list that corresponds with the transmit descriptor
+ * they correspond to. Once we are indicated that the corresponding descriptor
+ * has been freed, we'll return it to the list.
+ *
+ * The thread control block free list is managed by keeping track of the number
+ * of entries in it, i40e_trqpair_t`itrq_tcb_free. We use it as a way to index
+ * into the free list and add things to it. In effect, we always push and pop
+ * from the tail and protect it with a single lock,
+ * i40e_trqpair_t`itrq_tcb_lock. This scheme is somewhat simplistic and may not
+ * stand up to further performance testing; however, it does allow us to get off
+ * the ground with the device driver.
+ *
+ * The following image describes where a given transmit control block lives in
+ * its lifetime:
+ *
+ * |
+ * * ... Initial placement for all tcb's
+ * |
+ * v
+ * +------------------+ +------------------+
+ * | tcb on free list |---*------------------>| tcb on work list |
+ * +------------------+ . +------------------+
+ * ^ . tcb allocated |
+ * | to send frame v
+ * | or fragment on |
+ * | wire, mblk from |
+ * | MAC associated. |
+ * | |
+ * +------*-------------------------------<----+
+ * .
+ * . Hardware indicates
+ * entry transmitted.
+ * tcb recycled, mblk
+ * from MAC freed.
+ *
+ * ------------
+ * Blocking MAC
+ * ------------
+ *
+ * Wen performing transmit, we can run out of descriptors and ring entries. When
+ * such a case happens, we return the mblk_t to MAC to indicate that we've been
+ * blocked. At that point in time, MAC becomes blocked and will not transmit
+ * anything out that specific ring until we notify MAC. To indicate that we're
+ * in such a situation we set i40e_trqpair_t`itrq_tx_blocked member to B_TRUE.
+ *
+ * When we recycle tx descriptors then we'll end up signaling MAC by calling
+ * mac_tx_ring_update() if we were blocked, letting it know that it's safe to
+ * start sending frames out to us again.
+ */
+
+/*
+ * We set our DMA alignment requests based on the smallest supported page size
+ * of the corresponding platform.
+ */
+#if defined(__sparc)
+#define I40E_DMA_ALIGNMENT 0x2000ull
+#elif defined(__x86)
+#define I40E_DMA_ALIGNMENT 0x1000ull
+#else
+#error "unknown architecture for i40e"
+#endif
+
+/*
+ * This structure is used to maintain information and flags related to
+ * transmitting a frame. The first member is the set of flags we need to or into
+ * the command word (generally checksumming related). The second member controls
+ * the word offsets which is required for IP and L4 checksumming.
+ */
+typedef struct i40e_tx_context {
+ enum i40e_tx_desc_cmd_bits itc_cmdflags;
+ uint32_t itc_offsets;
+} i40e_tx_context_t;
+
+/*
+ * Toggles on debug builds which can be used to override our RX behaviour based
+ * on thresholds.
+ */
+#ifdef DEBUG
+typedef enum {
+ I40E_DEBUG_RX_DEFAULT = 0,
+ I40E_DEBUG_RX_BCOPY = 1,
+ I40E_DEBUG_RX_DMABIND = 2
+} i40e_debug_rx_t;
+
+i40e_debug_rx_t i40e_debug_rx_mode = I40E_DEBUG_RX_DEFAULT;
+#endif /* DEBUG */
+
+/*
+ * Notes on the following pair of DMA attributes. The first attribute,
+ * i40e_static_dma_attr, is designed to be used for both the descriptor rings
+ * and the static buffers that we associate with control blocks. For this
+ * reason, we force an SGL length of one. While technically the driver supports
+ * a larger SGL (5 on rx and 8 on tx), we opt to only use one to simplify our
+ * management here. In addition, when the Intel common code wants to allocate
+ * memory via the i40e_allocate_virt_mem osdep function, we have it leverage
+ * the static dma attr.
+ *
+ * The second set of attributes, i40e_txbind_dma_attr, is what we use when we're
+ * binding a bunch of mblk_t fragments to go out the door. Note that the main
+ * difference here is that we're allowed a larger SGL length -- eight.
+ *
+ * Note, we default to setting ourselves to be DMA capable here. However,
+ * because we could have multiple instances which have different FMA error
+ * checking capabilities, or end up on different buses, we make these static
+ * and const and copy them into the i40e_t for the given device with the actual
+ * values that reflect the actual capabilities.
+ */
+static const ddi_dma_attr_t i40e_g_static_dma_attr = {
+ DMA_ATTR_V0, /* version number */
+ 0x0000000000000000ull, /* low address */
+ 0xFFFFFFFFFFFFFFFFull, /* high address */
+ 0x00000000FFFFFFFFull, /* dma counter max */
+ I40E_DMA_ALIGNMENT, /* alignment */
+ 0x00000FFF, /* burst sizes */
+ 0x00000001, /* minimum transfer size */
+ 0x00000000FFFFFFFFull, /* maximum transfer size */
+ 0xFFFFFFFFFFFFFFFFull, /* maximum segment size */
+ 1, /* scatter/gather list length */
+ 0x00000001, /* granularity */
+ DDI_DMA_FLAGERR /* DMA flags */
+};
+
+static const ddi_dma_attr_t i40e_g_txbind_dma_attr = {
+ DMA_ATTR_V0, /* version number */
+ 0x0000000000000000ull, /* low address */
+ 0xFFFFFFFFFFFFFFFFull, /* high address */
+ 0x00000000FFFFFFFFull, /* dma counter max */
+ I40E_DMA_ALIGNMENT, /* alignment */
+ 0x00000FFF, /* burst sizes */
+ 0x00000001, /* minimum transfer size */
+ 0x00000000FFFFFFFFull, /* maximum transfer size */
+ 0xFFFFFFFFFFFFFFFFull, /* maximum segment size */
+ I40E_TX_MAX_COOKIE, /* scatter/gather list length */
+ 0x00000001, /* granularity */
+ DDI_DMA_FLAGERR /* DMA flags */
+};
+
+/*
+ * Next, we have the attributes for these structures. The descriptor rings are
+ * all strictly little endian, while the data buffers are just arrays of bytes
+ * representing frames. Because of this, we purposefully simplify the driver
+ * programming life by programming the descriptor ring as little endian, while
+ * for the buffer data we keep it as unstructured.
+ *
+ * Note, that to keep the Intel common code operating in a reasonable way, when
+ * we allocate DMA memory for it, we do not use byte swapping and thus use the
+ * standard i40e_buf_acc_attr.
+ */
+static const ddi_device_acc_attr_t i40e_g_desc_acc_attr = {
+ DDI_DEVICE_ATTR_V0,
+ DDI_STRUCTURE_LE_ACC,
+ DDI_STRICTORDER_ACC
+};
+
+static const ddi_device_acc_attr_t i40e_g_buf_acc_attr = {
+ DDI_DEVICE_ATTR_V0,
+ DDI_NEVERSWAP_ACC,
+ DDI_STRICTORDER_ACC
+};
+
+/*
+ * The next two functions are designed to be type-safe versions of macros that
+ * are used to increment and decrement a descriptor index in the loop. Note,
+ * these are marked inline to try and keep the data path hot and they were
+ * effectively inlined in their previous life as macros.
+ */
+static inline int
+i40e_next_desc(int base, int count, int size)
+{
+ int out;
+
+ ASSERT(base >= 0);
+ ASSERT(count > 0);
+ ASSERT(size > 0);
+
+ if (base + count < size) {
+ out = base + count;
+ } else {
+ out = base + count - size;
+ }
+
+ ASSERT(out >= 0 && out < size);
+ return (out);
+}
+
+static inline int
+i40e_prev_desc(int base, int count, int size)
+{
+ int out;
+
+ ASSERT(base >= 0);
+ ASSERT(count > 0);
+ ASSERT(size > 0);
+
+ if (base >= count) {
+ out = base - count;
+ } else {
+ out = base - count + size;
+ }
+
+ ASSERT(out >= 0 && out < size);
+ return (out);
+}
+
+/*
+ * Free DMA memory that is represented by a i40e_dma_buffer_t.
+ */
+static void
+i40e_free_dma_buffer(i40e_dma_buffer_t *dmap)
+{
+ if (dmap->dmab_dma_address != NULL) {
+ VERIFY(dmap->dmab_dma_handle != NULL);
+ (void) ddi_dma_unbind_handle(dmap->dmab_dma_handle);
+ dmap->dmab_dma_address = NULL;
+ dmap->dmab_size = 0;
+ }
+
+ if (dmap->dmab_acc_handle != NULL) {
+ ddi_dma_mem_free(&dmap->dmab_acc_handle);
+ dmap->dmab_acc_handle = NULL;
+ dmap->dmab_address = NULL;
+ }
+
+ if (dmap->dmab_dma_handle != NULL) {
+ ddi_dma_free_handle(&dmap->dmab_dma_handle);
+ dmap->dmab_dma_handle = NULL;
+ }
+
+ /*
+ * These should only be set if we have valid handles allocated and
+ * therefore should always be NULLed out due to the above code. This
+ * is here to catch us acting sloppy.
+ */
+ ASSERT(dmap->dmab_dma_address == NULL);
+ ASSERT(dmap->dmab_address == NULL);
+ ASSERT(dmap->dmab_size == 0);
+ dmap->dmab_len = 0;
+}
+
+/*
+ * Allocate size bytes of DMA memory based on the passed in attributes. This
+ * fills in the information in dmap and is designed for all of our single cookie
+ * allocations.
+ */
+static boolean_t
+i40e_alloc_dma_buffer(i40e_t *i40e, i40e_dma_buffer_t *dmap,
+ ddi_dma_attr_t *attrsp, ddi_device_acc_attr_t *accp, boolean_t stream,
+ boolean_t zero, size_t size)
+{
+ int ret;
+ uint_t flags;
+ size_t len;
+ ddi_dma_cookie_t cookie;
+ uint_t ncookies;
+
+ if (stream == B_TRUE)
+ flags = DDI_DMA_STREAMING;
+ else
+ flags = DDI_DMA_CONSISTENT;
+
+ /*
+ * Step one: Allocate the DMA handle
+ */
+ ret = ddi_dma_alloc_handle(i40e->i40e_dip, attrsp, DDI_DMA_DONTWAIT,
+ NULL, &dmap->dmab_dma_handle);
+ if (ret != DDI_SUCCESS) {
+ i40e_error(i40e, "failed to allocate dma handle for I/O "
+ "buffers: %d", ret);
+ dmap->dmab_dma_handle = NULL;
+ return (B_FALSE);
+ }
+
+ /*
+ * Step two: Allocate the DMA memory
+ */
+ ret = ddi_dma_mem_alloc(dmap->dmab_dma_handle, size, accp, flags,
+ DDI_DMA_DONTWAIT, NULL, &dmap->dmab_address, &len,
+ &dmap->dmab_acc_handle);
+ if (ret != DDI_SUCCESS) {
+ i40e_error(i40e, "failed to allocate %d bytes of DMA for I/O "
+ "buffers", size);
+ dmap->dmab_address = NULL;
+ dmap->dmab_acc_handle = NULL;
+ i40e_free_dma_buffer(dmap);
+ return (B_FALSE);
+ }
+
+ /*
+ * Step three: Optionally zero
+ */
+ if (zero == B_TRUE)
+ bzero(dmap->dmab_address, len);
+
+ /*
+ * Step four: Bind the memory
+ */
+ ret = ddi_dma_addr_bind_handle(dmap->dmab_dma_handle, NULL,
+ dmap->dmab_address, len, DDI_DMA_RDWR | flags, DDI_DMA_DONTWAIT,
+ NULL, &cookie, &ncookies);
+ if (ret != DDI_DMA_MAPPED) {
+ i40e_error(i40e, "failed to allocate %d bytes of DMA for I/O "
+ "buffers: %d", size, ret);
+ i40e_free_dma_buffer(dmap);
+ return (B_FALSE);
+ }
+
+ VERIFY(ncookies == 1);
+ dmap->dmab_dma_address = cookie.dmac_laddress;
+ dmap->dmab_size = len;
+ dmap->dmab_len = 0;
+ return (B_TRUE);
+}
+
+/*
+ * This function is called once the last pending rcb has been freed by the upper
+ * levels of the system.
+ */
+static void
+i40e_free_rx_data(i40e_rx_data_t *rxd)
+{
+ VERIFY(rxd->rxd_rcb_pending == 0);
+
+ if (rxd->rxd_rcb_area != NULL) {
+ kmem_free(rxd->rxd_rcb_area,
+ sizeof (i40e_rx_control_block_t) *
+ (rxd->rxd_free_list_size + rxd->rxd_ring_size));
+ rxd->rxd_rcb_area = NULL;
+ }
+
+ if (rxd->rxd_free_list != NULL) {
+ kmem_free(rxd->rxd_free_list,
+ sizeof (i40e_rx_control_block_t *) *
+ rxd->rxd_free_list_size);
+ rxd->rxd_free_list = NULL;
+ }
+
+ if (rxd->rxd_work_list != NULL) {
+ kmem_free(rxd->rxd_work_list,
+ sizeof (i40e_rx_control_block_t *) *
+ rxd->rxd_ring_size);
+ }
+
+ kmem_free(rxd, sizeof (i40e_rx_data_t));
+}
+
+static boolean_t
+i40e_alloc_rx_data(i40e_t *i40e, i40e_trqpair_t *itrq)
+{
+ i40e_rx_data_t *rxd;
+
+ rxd = kmem_zalloc(sizeof (i40e_rx_data_t), KM_NOSLEEP);
+ if (rxd == NULL)
+ return (B_FALSE);
+ itrq->itrq_rxdata = rxd;
+ rxd->rxd_i40e = i40e;
+
+ rxd->rxd_ring_size = i40e->i40e_rx_ring_size;
+ rxd->rxd_free_list_size = i40e->i40e_rx_ring_size;
+
+ rxd->rxd_rcb_free = rxd->rxd_free_list_size;
+
+ rxd->rxd_work_list = kmem_zalloc(sizeof (i40e_rx_control_block_t *) *
+ rxd->rxd_ring_size, KM_NOSLEEP);
+ if (rxd->rxd_work_list == NULL) {
+ i40e_error(i40e, "failed to allocate rx work list for a ring "
+ "of %d entries for ring %d", rxd->rxd_ring_size,
+ itrq->itrq_index);
+ goto cleanup;
+ }
+
+ rxd->rxd_free_list = kmem_zalloc(sizeof (i40e_rx_control_block_t *) *
+ rxd->rxd_free_list_size, KM_NOSLEEP);
+ if (rxd->rxd_free_list == NULL) {
+ i40e_error(i40e, "failed to allocate a %d entry rx free list "
+ "for ring %d", rxd->rxd_free_list_size, itrq->itrq_index);
+ goto cleanup;
+ }
+
+ rxd->rxd_rcb_area = kmem_zalloc(sizeof (i40e_rx_control_block_t) *
+ (rxd->rxd_free_list_size + rxd->rxd_ring_size), KM_NOSLEEP);
+ if (rxd->rxd_rcb_area == NULL) {
+ i40e_error(i40e, "failed to allocate a %d entry rcb area for "
+ "ring %d", rxd->rxd_ring_size + rxd->rxd_free_list_size,
+ itrq->itrq_index);
+ goto cleanup;
+ }
+
+ return (B_TRUE);
+
+cleanup:
+ i40e_free_rx_data(rxd);
+ itrq->itrq_rxdata = NULL;
+ return (B_FALSE);
+}
+
+/*
+ * Free all of the memory that we've allocated for DMA. Note that we may have
+ * buffers that we've loaned up to the OS which are still outstanding. We'll
+ * always free up the descriptor ring, because we no longer need that. For each
+ * rcb, we'll iterate over it and if we send the reference count to zero, then
+ * we'll free the message block and DMA related resources. However, if we don't
+ * take the last one, then we'll go ahead and keep track that we'll have pending
+ * data and clean it up when we get there.
+ */
+static void
+i40e_free_rx_dma(i40e_rx_data_t *rxd, boolean_t failed_init)
+{
+ uint32_t i, count, ref;
+
+ i40e_rx_control_block_t *rcb;
+ i40e_t *i40e = rxd->rxd_i40e;
+
+ i40e_free_dma_buffer(&rxd->rxd_desc_area);
+ rxd->rxd_desc_ring = NULL;
+ rxd->rxd_desc_next = 0;
+
+ mutex_enter(&i40e->i40e_rx_pending_lock);
+
+ rcb = rxd->rxd_rcb_area;
+ count = rxd->rxd_ring_size + rxd->rxd_free_list_size;
+
+ for (i = 0; i < count; i++, rcb++) {
+ VERIFY(rcb != NULL);
+
+ /*
+ * If we're cleaning up from a failed creation attempt, then an
+ * entry may never have been assembled which would mean that
+ * it's reference count is zero. If we find that, we leave it
+ * be, because nothing else should be modifying it at this
+ * point. We're not at the point that any more references can be
+ * added, just removed.
+ */
+ if (failed_init == B_TRUE && rcb->rcb_ref == 0)
+ continue;
+
+ ref = atomic_dec_32_nv(&rcb->rcb_ref);
+ if (ref == 0) {
+ freemsg(rcb->rcb_mp);
+ rcb->rcb_mp = NULL;
+ i40e_free_dma_buffer(&rcb->rcb_dma);
+ } else {
+ atomic_inc_32(&rxd->rxd_rcb_pending);
+ atomic_inc_32(&i40e->i40e_rx_pending);
+ }
+ }
+ mutex_exit(&i40e->i40e_rx_pending_lock);
+}
+
+/*
+ * Initialize the DMA memory for the descriptor ring and for each frame in the
+ * control block list.
+ */
+static boolean_t
+i40e_alloc_rx_dma(i40e_rx_data_t *rxd)
+{
+ int i, count;
+ size_t dmasz;
+ i40e_rx_control_block_t *rcb;
+ i40e_t *i40e = rxd->rxd_i40e;
+
+ /*
+ * First allocate the rx descriptor ring.
+ */
+ dmasz = sizeof (i40e_rx_desc_t) * rxd->rxd_ring_size;
+ VERIFY(dmasz > 0);
+ if (i40e_alloc_dma_buffer(i40e, &rxd->rxd_desc_area,
+ &i40e->i40e_static_dma_attr, &i40e->i40e_desc_acc_attr, B_FALSE,
+ B_TRUE, dmasz) == B_FALSE) {
+ i40e_error(i40e, "failed to allocate DMA resources "
+ "for rx descriptor ring");
+ return (B_FALSE);
+ }
+ rxd->rxd_desc_ring =
+ (i40e_rx_desc_t *)(uintptr_t)rxd->rxd_desc_area.dmab_address;
+ rxd->rxd_desc_next = 0;
+
+ count = rxd->rxd_ring_size + rxd->rxd_free_list_size;
+ rcb = rxd->rxd_rcb_area;
+
+ dmasz = i40e->i40e_rx_buf_size;
+ VERIFY(dmasz > 0);
+ for (i = 0; i < count; i++, rcb++) {
+ i40e_dma_buffer_t *dmap;
+ VERIFY(rcb != NULL);
+
+ if (i < rxd->rxd_ring_size) {
+ rxd->rxd_work_list[i] = rcb;
+ } else {
+ rxd->rxd_free_list[i - rxd->rxd_ring_size] = rcb;
+ }
+
+ dmap = &rcb->rcb_dma;
+ if (i40e_alloc_dma_buffer(i40e, dmap,
+ &i40e->i40e_static_dma_attr, &i40e->i40e_buf_acc_attr,
+ B_TRUE, B_FALSE, dmasz) == B_FALSE) {
+ i40e_error(i40e, "failed to allocate rx dma buffer");
+ return (B_FALSE);
+ }
+
+ /*
+ * Initialize the control block and offset the DMA address. See
+ * the note in the big theory statement that explains how this
+ * helps IP deal with alignment. Note, we don't worry about
+ * whether or not we successfully get an mblk_t from desballoc,
+ * it's a common case that we have to handle later on in the
+ * system.
+ */
+ dmap->dmab_size -= I40E_BUF_IPHDR_ALIGNMENT;
+ dmap->dmab_address += I40E_BUF_IPHDR_ALIGNMENT;
+ dmap->dmab_dma_address += I40E_BUF_IPHDR_ALIGNMENT;
+
+ rcb->rcb_ref = 1;
+ rcb->rcb_rxd = rxd;
+ rcb->rcb_free_rtn.free_func = i40e_rx_recycle;
+ rcb->rcb_free_rtn.free_arg = (caddr_t)rcb;
+ rcb->rcb_mp = desballoc((unsigned char *)dmap->dmab_address,
+ dmap->dmab_size, 0, &rcb->rcb_free_rtn);
+ }
+
+ return (B_TRUE);
+}
+
+static void
+i40e_free_tx_dma(i40e_trqpair_t *itrq)
+{
+ size_t fsz;
+
+ if (itrq->itrq_tcb_area != NULL) {
+ uint32_t i;
+ i40e_tx_control_block_t *tcb = itrq->itrq_tcb_area;
+
+ for (i = 0; i < itrq->itrq_tx_free_list_size; i++, tcb++) {
+ i40e_free_dma_buffer(&tcb->tcb_dma);
+ if (tcb->tcb_dma_handle != NULL) {
+ ddi_dma_free_handle(&tcb->tcb_dma_handle);
+ tcb->tcb_dma_handle = NULL;
+ }
+ }
+
+ fsz = sizeof (i40e_tx_control_block_t) *
+ itrq->itrq_tx_free_list_size;
+ kmem_free(itrq->itrq_tcb_area, fsz);
+ itrq->itrq_tcb_area = NULL;
+ }
+
+ if (itrq->itrq_tcb_free_list != NULL) {
+ fsz = sizeof (i40e_tx_control_block_t *) *
+ itrq->itrq_tx_free_list_size;
+ kmem_free(itrq->itrq_tcb_free_list, fsz);
+ itrq->itrq_tcb_free_list = NULL;
+ }
+
+ if (itrq->itrq_tcb_work_list != NULL) {
+ fsz = sizeof (i40e_tx_control_block_t *) *
+ itrq->itrq_tx_ring_size;
+ kmem_free(itrq->itrq_tcb_work_list, fsz);
+ itrq->itrq_tcb_work_list = NULL;
+ }
+
+ i40e_free_dma_buffer(&itrq->itrq_desc_area);
+ itrq->itrq_desc_ring = NULL;
+
+}
+
+static boolean_t
+i40e_alloc_tx_dma(i40e_trqpair_t *itrq)
+{
+ int i, ret;
+ size_t dmasz;
+ i40e_tx_control_block_t *tcb;
+ i40e_t *i40e = itrq->itrq_i40e;
+
+ itrq->itrq_tx_ring_size = i40e->i40e_tx_ring_size;
+ itrq->itrq_tx_free_list_size = i40e->i40e_tx_ring_size +
+ (i40e->i40e_tx_ring_size >> 1);
+
+ /*
+ * Allocate an additional tx descriptor for the writeback head.
+ */
+ dmasz = sizeof (i40e_tx_desc_t) * itrq->itrq_tx_ring_size;
+ dmasz += sizeof (i40e_tx_desc_t);
+
+ VERIFY(dmasz > 0);
+ if (i40e_alloc_dma_buffer(i40e, &itrq->itrq_desc_area,
+ &i40e->i40e_static_dma_attr, &i40e->i40e_desc_acc_attr,
+ B_FALSE, B_TRUE, dmasz) == B_FALSE) {
+ i40e_error(i40e, "failed to allocate DMA resources for tx "
+ "descriptor ring");
+ return (B_FALSE);
+ }
+ itrq->itrq_desc_ring =
+ (i40e_tx_desc_t *)(uintptr_t)itrq->itrq_desc_area.dmab_address;
+ itrq->itrq_desc_wbhead = (uint32_t *)(itrq->itrq_desc_ring +
+ itrq->itrq_tx_ring_size);
+ itrq->itrq_desc_head = 0;
+ itrq->itrq_desc_tail = 0;
+ itrq->itrq_desc_free = itrq->itrq_tx_ring_size;
+
+ itrq->itrq_tcb_work_list = kmem_zalloc(itrq->itrq_tx_ring_size *
+ sizeof (i40e_tx_control_block_t *), KM_NOSLEEP);
+ if (itrq->itrq_tcb_work_list == NULL) {
+ i40e_error(i40e, "failed to allocate a %d entry tx work list "
+ "for ring %d", itrq->itrq_tx_ring_size, itrq->itrq_index);
+ goto cleanup;
+ }
+
+ itrq->itrq_tcb_free_list = kmem_zalloc(itrq->itrq_tx_free_list_size *
+ sizeof (i40e_tx_control_block_t *), KM_SLEEP);
+ if (itrq->itrq_tcb_free_list == NULL) {
+ i40e_error(i40e, "failed to allocate a %d entry tx free list "
+ "for ring %d", itrq->itrq_tx_free_list_size,
+ itrq->itrq_index);
+ goto cleanup;
+ }
+
+ /*
+ * We allocate enough tx control blocks to cover the free list.
+ */
+ itrq->itrq_tcb_area = kmem_zalloc(sizeof (i40e_tx_control_block_t) *
+ itrq->itrq_tx_free_list_size, KM_NOSLEEP);
+ if (itrq->itrq_tcb_area == NULL) {
+ i40e_error(i40e, "failed to allocate a %d entry tcb area for "
+ "ring %d", itrq->itrq_tx_free_list_size, itrq->itrq_index);
+ goto cleanup;
+ }
+
+ /*
+ * For each tcb, allocate DMA memory.
+ */
+ dmasz = i40e->i40e_tx_buf_size;
+ VERIFY(dmasz > 0);
+ tcb = itrq->itrq_tcb_area;
+ for (i = 0; i < itrq->itrq_tx_free_list_size; i++, tcb++) {
+ VERIFY(tcb != NULL);
+
+ /*
+ * Allocate both a DMA buffer which we'll use for when we copy
+ * packets for transmission and allocate a DMA handle which
+ * we'll use when we bind data.
+ */
+ ret = ddi_dma_alloc_handle(i40e->i40e_dip,
+ &i40e->i40e_txbind_dma_attr, DDI_DMA_DONTWAIT, NULL,
+ &tcb->tcb_dma_handle);
+ if (ret != DDI_SUCCESS) {
+ i40e_error(i40e, "failed to allocate DMA handle for tx "
+ "data binding on ring %d: %d", itrq->itrq_index,
+ ret);
+ tcb->tcb_dma_handle = NULL;
+ goto cleanup;
+ }
+
+ if (i40e_alloc_dma_buffer(i40e, &tcb->tcb_dma,
+ &i40e->i40e_static_dma_attr, &i40e->i40e_buf_acc_attr,
+ B_TRUE, B_FALSE, dmasz) == B_FALSE) {
+ i40e_error(i40e, "failed to allocate %d bytes of "
+ "DMA for tx data binding on ring %d: %d", dmasz,
+ itrq->itrq_index);
+ goto cleanup;
+ }
+
+ itrq->itrq_tcb_free_list[i] = tcb;
+ }
+
+ itrq->itrq_tcb_free = itrq->itrq_tx_free_list_size;
+
+ return (B_TRUE);
+
+cleanup:
+ i40e_free_tx_dma(itrq);
+ return (B_FALSE);
+}
+
+/*
+ * Free all memory associated with all of the rings on this i40e instance. Note,
+ * this is done as part of the GLDv3 stop routine.
+ */
+void
+i40e_free_ring_mem(i40e_t *i40e, boolean_t failed_init)
+{
+ int i;
+
+ for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
+ i40e_rx_data_t *rxd = i40e->i40e_trqpairs[i].itrq_rxdata;
+
+ /*
+ * Clean up our rx data. We have to free DMA resources first and
+ * then if we have no more pending RCB's, then we'll go ahead
+ * and clean things up. Note, we can't set the stopped flag on
+ * the rx data until after we've done the first pass of the
+ * pending resources. Otherwise we might race with
+ * i40e_rx_recycle on determining who should free the
+ * i40e_rx_data_t above.
+ */
+ i40e_free_rx_dma(rxd, failed_init);
+
+ mutex_enter(&i40e->i40e_rx_pending_lock);
+ rxd->rxd_shutdown = B_TRUE;
+ if (rxd->rxd_rcb_pending == 0) {
+ i40e_free_rx_data(rxd);
+ i40e->i40e_trqpairs[i].itrq_rxdata = NULL;
+ }
+ mutex_exit(&i40e->i40e_rx_pending_lock);
+
+ i40e_free_tx_dma(&i40e->i40e_trqpairs[i]);
+ }
+}
+
+/*
+ * Allocate all of the resources associated with all of the rings on this i40e
+ * instance. Note this is done as part of the GLDv3 start routine and thus we
+ * should not use blocking allocations. This takes care of both DMA and non-DMA
+ * related resources.
+ */
+boolean_t
+i40e_alloc_ring_mem(i40e_t *i40e)
+{
+ int i;
+
+ for (i = 0; i < i40e->i40e_num_trqpairs; i++) {
+ if (i40e_alloc_rx_data(i40e, &i40e->i40e_trqpairs[i]) ==
+ B_FALSE)
+ goto unwind;
+
+ if (i40e_alloc_rx_dma(i40e->i40e_trqpairs[i].itrq_rxdata) ==
+ B_FALSE)
+ goto unwind;
+
+ if (i40e_alloc_tx_dma(&i40e->i40e_trqpairs[i]) == B_FALSE)
+ goto unwind;
+ }
+
+ return (B_TRUE);
+
+unwind:
+ i40e_free_ring_mem(i40e, B_TRUE);
+ return (B_FALSE);
+}
+
+
+/*
+ * Because every instance of i40e may have different support for FMA
+ * capabilities, we copy the DMA attributes into the i40e_t and set them that
+ * way and use them for determining attributes.
+ */
+void
+i40e_init_dma_attrs(i40e_t *i40e, boolean_t fma)
+{
+ bcopy(&i40e_g_static_dma_attr, &i40e->i40e_static_dma_attr,
+ sizeof (ddi_dma_attr_t));
+ bcopy(&i40e_g_txbind_dma_attr, &i40e->i40e_txbind_dma_attr,
+ sizeof (ddi_dma_attr_t));
+ bcopy(&i40e_g_desc_acc_attr, &i40e->i40e_desc_acc_attr,
+ sizeof (ddi_device_acc_attr_t));
+ bcopy(&i40e_g_buf_acc_attr, &i40e->i40e_buf_acc_attr,
+ sizeof (ddi_device_acc_attr_t));
+
+ if (fma == B_TRUE) {
+ i40e->i40e_static_dma_attr.dma_attr_flags |= DDI_DMA_FLAGERR;
+ i40e->i40e_txbind_dma_attr.dma_attr_flags |= DDI_DMA_FLAGERR;
+ } else {
+ i40e->i40e_static_dma_attr.dma_attr_flags &= ~DDI_DMA_FLAGERR;
+ i40e->i40e_txbind_dma_attr.dma_attr_flags &= ~DDI_DMA_FLAGERR;
+ }
+}
+
+static void
+i40e_rcb_free(i40e_rx_data_t *rxd, i40e_rx_control_block_t *rcb)
+{
+ mutex_enter(&rxd->rxd_free_lock);
+ ASSERT(rxd->rxd_rcb_free < rxd->rxd_free_list_size);
+ ASSERT(rxd->rxd_free_list[rxd->rxd_rcb_free] == NULL);
+ rxd->rxd_free_list[rxd->rxd_rcb_free] = rcb;
+ rxd->rxd_rcb_free++;
+ mutex_exit(&rxd->rxd_free_lock);
+}
+
+static i40e_rx_control_block_t *
+i40e_rcb_alloc(i40e_rx_data_t *rxd)
+{
+ i40e_rx_control_block_t *rcb;
+
+ mutex_enter(&rxd->rxd_free_lock);
+ if (rxd->rxd_rcb_free == 0) {
+ mutex_exit(&rxd->rxd_free_lock);
+ return (NULL);
+ }
+ rxd->rxd_rcb_free--;
+ rcb = rxd->rxd_free_list[rxd->rxd_rcb_free];
+ VERIFY(rcb != NULL);
+ rxd->rxd_free_list[rxd->rxd_rcb_free] = NULL;
+ mutex_exit(&rxd->rxd_free_lock);
+
+ return (rcb);
+}
+
+/*
+ * This is the callback that we get from the OS when freemsg(9F) has been called
+ * on a loaned descriptor. In addition, if we take the last reference count
+ * here, then we have to tear down all of the rx data.
+ */
+void
+i40e_rx_recycle(caddr_t arg)
+{
+ uint32_t ref;
+ i40e_rx_control_block_t *rcb;
+ i40e_rx_data_t *rxd;
+ i40e_t *i40e;
+
+ /* LINTED: E_BAD_PTR_CAST_ALIGN */
+ rcb = (i40e_rx_control_block_t *)arg;
+ rxd = rcb->rcb_rxd;
+ i40e = rxd->rxd_i40e;
+
+ /*
+ * It's possible for this to be called with a reference count of zero.
+ * That will happen when we're doing the freemsg after taking the last
+ * reference because we're tearing down everything and this rcb is not
+ * outstanding.
+ */
+ if (rcb->rcb_ref == 0)
+ return;
+
+ /*
+ * Don't worry about failure of desballoc here. It'll only become fatal
+ * if we're trying to use it and we can't in i40e_rx_bind().
+ */
+ rcb->rcb_mp = desballoc((unsigned char *)rcb->rcb_dma.dmab_address,
+ rcb->rcb_dma.dmab_size, 0, &rcb->rcb_free_rtn);
+ i40e_rcb_free(rxd, rcb);
+
+ /*
+ * It's possible that the rcb was being used while we are shutting down
+ * the device. In that case, we'll take the final reference from the
+ * device here.
+ */
+ ref = atomic_dec_32_nv(&rcb->rcb_ref);
+ if (ref == 0) {
+ freemsg(rcb->rcb_mp);
+ rcb->rcb_mp = NULL;
+ i40e_free_dma_buffer(&rcb->rcb_dma);
+
+ mutex_enter(&i40e->i40e_rx_pending_lock);
+ atomic_dec_32(&rxd->rxd_rcb_pending);
+ atomic_dec_32(&i40e->i40e_rx_pending);
+
+ /*
+ * If this was the last block and it's been indicated that we've
+ * passed the shutdown point, we should clean up.
+ */
+ if (rxd->rxd_shutdown == B_TRUE && rxd->rxd_rcb_pending == 0) {
+ i40e_free_rx_data(rxd);
+ cv_broadcast(&i40e->i40e_rx_pending_cv);
+ }
+
+ mutex_exit(&i40e->i40e_rx_pending_lock);
+ }
+}
+
+static mblk_t *
+i40e_rx_bind(i40e_trqpair_t *itrq, i40e_rx_data_t *rxd, uint32_t index,
+ uint32_t plen)
+{
+ mblk_t *mp;
+ i40e_t *i40e = rxd->rxd_i40e;
+ i40e_rx_control_block_t *rcb, *rep_rcb;
+
+ ASSERT(MUTEX_HELD(&itrq->itrq_rx_lock));
+
+ if ((rep_rcb = i40e_rcb_alloc(rxd)) == NULL) {
+ itrq->itrq_rxstat.irxs_rx_bind_norcb.value.ui64++;
+ return (NULL);
+ }
+
+ rcb = rxd->rxd_work_list[index];
+
+ /*
+ * Check to make sure we have a mblk_t. If we don't, this is our last
+ * chance to try and get one.
+ */
+ if (rcb->rcb_mp == NULL) {
+ rcb->rcb_mp =
+ desballoc((unsigned char *)rcb->rcb_dma.dmab_address,
+ rcb->rcb_dma.dmab_size, 0, &rcb->rcb_free_rtn);
+ if (rcb->rcb_mp == NULL) {
+ itrq->itrq_rxstat.irxs_rx_bind_nomp.value.ui64++;
+ i40e_rcb_free(rxd, rcb);
+ return (NULL);
+ }
+ }
+
+ I40E_DMA_SYNC(&rcb->rcb_dma, DDI_DMA_SYNC_FORKERNEL);
+
+ if (i40e_check_dma_handle(rcb->rcb_dma.dmab_dma_handle) != DDI_FM_OK) {
+ ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED);
+ atomic_or_32(&i40e->i40e_state, I40E_ERROR);
+ i40e_rcb_free(rxd, rcb);
+ return (NULL);
+ }
+
+ /*
+ * Note, we've already accounted for the I40E_BUF_IPHDR_ALIGNMENT.
+ */
+ mp = rcb->rcb_mp;
+ atomic_inc_32(&rcb->rcb_ref);
+ mp->b_wptr = mp->b_rptr + plen;
+ mp->b_next = mp->b_cont = NULL;
+
+ rxd->rxd_work_list[index] = rep_rcb;
+ return (mp);
+}
+
+/*
+ * We're going to allocate a new message block for this frame and attempt to
+ * receive it. See the big theory statement for more information on when we copy
+ * versus bind.
+ */
+static mblk_t *
+i40e_rx_copy(i40e_trqpair_t *itrq, i40e_rx_data_t *rxd, uint32_t index,
+ uint32_t plen)
+{
+ i40e_t *i40e = rxd->rxd_i40e;
+ i40e_rx_control_block_t *rcb;
+ mblk_t *mp;
+
+ ASSERT(index < rxd->rxd_ring_size);
+ rcb = rxd->rxd_work_list[index];
+
+ I40E_DMA_SYNC(&rcb->rcb_dma, DDI_DMA_SYNC_FORKERNEL);
+
+ if (i40e_check_dma_handle(rcb->rcb_dma.dmab_dma_handle) != DDI_FM_OK) {
+ ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED);
+ atomic_or_32(&i40e->i40e_state, I40E_ERROR);
+ return (NULL);
+ }
+
+ mp = allocb(plen + I40E_BUF_IPHDR_ALIGNMENT, 0);
+ if (mp == NULL) {
+ itrq->itrq_rxstat.irxs_rx_copy_nomem.value.ui64++;
+ return (NULL);
+ }
+
+ mp->b_rptr += I40E_BUF_IPHDR_ALIGNMENT;
+ bcopy(rcb->rcb_dma.dmab_address, mp->b_rptr, plen);
+ mp->b_wptr = mp->b_rptr + plen;
+
+ return (mp);
+}
+
+/*
+ * Determine if the device has enabled any checksum flags for us. The level of
+ * checksum computed will depend on the type packet that we have, which is
+ * contained in ptype. For example, the checksum logic it does will vary
+ * depending on whether or not the packet is considered tunneled, whether it
+ * recognizes the L4 type, etc. Section 8.3.4.3 summarizes which checksums are
+ * valid.
+ *
+ * While there are additional checksums that we could recognize here, we'll need
+ * to get some additional GLDv3 enhancements to be able to properly describe
+ * them.
+ */
+static void
+i40e_rx_hcksum(i40e_trqpair_t *itrq, mblk_t *mp, uint64_t status, uint32_t err,
+ uint32_t ptype)
+{
+ uint32_t cksum;
+ struct i40e_rx_ptype_decoded pinfo;
+
+ ASSERT(ptype <= 255);
+ pinfo = decode_rx_desc_ptype(ptype);
+
+ cksum = 0;
+
+ /*
+ * If the ptype isn't something that we know in the driver, then we
+ * shouldn't even consider moving forward.
+ */
+ if (pinfo.known == 0) {
+ itrq->itrq_rxstat.irxs_hck_unknown.value.ui64++;
+ return;
+ }
+
+ /*
+ * If hardware didn't set the L3L4P bit on the frame, then there is no
+ * checksum offload to consider.
+ */
+ if ((status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) == 0) {
+ itrq->itrq_rxstat.irxs_hck_nol3l4p.value.ui64++;
+ return;
+ }
+
+ /*
+ * The device tells us that IPv6 checksums where a Destination Options
+ * Header or a Routing header shouldn't be trusted. Discard all
+ * checksums in this case.
+ */
+ if (pinfo.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
+ pinfo.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6 &&
+ (status & (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))) {
+ itrq->itrq_rxstat.irxs_hck_v6skip.value.ui64++;
+ return;
+ }
+
+ /*
+ * The hardware denotes three kinds of possible errors. Two are reserved
+ * for inner and outer IP checksum errors (IPE and EIPE) and the latter
+ * is for L4 checksum errors (L4E). If there is only one IP header, then
+ * the only thing that we care about is IPE. Note that since we don't
+ * support inner checksums, we will ignore IPE being set on tunneled
+ * packets and only care about EIPE.
+ */
+ if (pinfo.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
+ pinfo.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) {
+ if (pinfo.tunnel_type == I40E_RX_PTYPE_OUTER_NONE) {
+ if ((err & (1 << I40E_RX_DESC_ERROR_IPE_SHIFT)) != 0) {
+ itrq->itrq_rxstat.irxs_hck_iperr.value.ui64++;
+ } else {
+ itrq->itrq_rxstat.irxs_hck_v4hdrok.value.ui64++;
+ cksum |= HCK_IPV4_HDRCKSUM_OK;
+ }
+ } else {
+ if ((err & (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT)) != 0) {
+ itrq->itrq_rxstat.irxs_hck_eiperr.value.ui64++;
+ } else {
+ itrq->itrq_rxstat.irxs_hck_v4hdrok.value.ui64++;
+ cksum |= HCK_IPV4_HDRCKSUM_OK;
+ }
+ }
+ }
+
+ /*
+ * We only have meaningful L4 checksums in the case of IP->L4 and
+ * IP->IP->L4. There is not outer L4 checksum data available in any
+ * other case. Further, we don't bother reporting the valid checksum in
+ * the case of IP->IP->L4 set.
+ */
+ if (pinfo.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
+ pinfo.tunnel_type == I40E_RX_PTYPE_TUNNEL_NONE &&
+ (pinfo.inner_prot == I40E_RX_PTYPE_INNER_PROT_UDP ||
+ pinfo.inner_prot == I40E_RX_PTYPE_INNER_PROT_TCP ||
+ pinfo.inner_prot == I40E_RX_PTYPE_INNER_PROT_ICMP ||
+ pinfo.inner_prot == I40E_RX_PTYPE_INNER_PROT_SCTP)) {
+ ASSERT(pinfo.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4);
+ if ((err & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT)) != 0) {
+ itrq->itrq_rxstat.irxs_hck_l4err.value.ui64++;
+ } else {
+ itrq->itrq_rxstat.irxs_hck_l4hdrok.value.ui64++;
+ cksum |= HCK_FULLCKSUM_OK;
+ }
+ }
+
+ if (cksum != 0) {
+ itrq->itrq_rxstat.irxs_hck_set.value.ui64++;
+ mac_hcksum_set(mp, 0, 0, 0, 0, cksum);
+ } else {
+ itrq->itrq_rxstat.irxs_hck_miss.value.ui64++;
+ }
+}
+
+mblk_t *
+i40e_ring_rx(i40e_trqpair_t *itrq, int poll_bytes)
+{
+ i40e_t *i40e;
+ i40e_hw_t *hw;
+ i40e_rx_data_t *rxd;
+ uint32_t cur_head;
+ i40e_rx_desc_t *cur_desc;
+ i40e_rx_control_block_t *rcb;
+ uint64_t rx_bytes, rx_frames;
+ uint64_t stword;
+ mblk_t *mp, *mp_head, **mp_tail;
+
+ ASSERT(MUTEX_HELD(&itrq->itrq_rx_lock));
+ rxd = itrq->itrq_rxdata;
+ i40e = itrq->itrq_i40e;
+ hw = &i40e->i40e_hw_space;
+
+ if (!(i40e->i40e_state & I40E_STARTED) ||
+ (i40e->i40e_state & I40E_OVERTEMP) ||
+ (i40e->i40e_state & I40E_SUSPENDED) ||
+ (i40e->i40e_state & I40E_ERROR))
+ return (NULL);
+
+ /*
+ * Before we do anything else, we have to make sure that all of the DMA
+ * buffers are synced up and then check to make sure that they're
+ * actually good from an FM perspective.
+ */
+ I40E_DMA_SYNC(&rxd->rxd_desc_area, DDI_DMA_SYNC_FORKERNEL);
+ if (i40e_check_dma_handle(rxd->rxd_desc_area.dmab_dma_handle) !=
+ DDI_FM_OK) {
+ ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED);
+ atomic_or_32(&i40e->i40e_state, I40E_ERROR);
+ return (NULL);
+ }
+
+ /*
+ * Prepare our stats. We do a limited amount of processing in both
+ * polling and interrupt context. The limit in interrupt context is
+ * based on frames, in polling context based on bytes.
+ */
+ rx_bytes = rx_frames = 0;
+ mp_head = NULL;
+ mp_tail = &mp_head;
+
+ /*
+ * At this point, the descriptor ring is available to check. We'll try
+ * and process until we either run out of poll_bytes or descriptors.
+ */
+ cur_head = rxd->rxd_desc_next;
+ cur_desc = &rxd->rxd_desc_ring[cur_head];
+ stword = LE64_TO_CPU(cur_desc->wb.qword1.status_error_len);
+
+ /*
+ * Note, the primary invariant of this loop should be tha cur_head,
+ * cur_desc, and stword always point to the currently processed
+ * descriptor. When we leave the loop, it should point to a descriptor
+ * that HAS NOT been processed. Meaning, that if we haven't consumed the
+ * frame, the descriptor should not be advanced.
+ */
+ while ((stword & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) != 0) {
+ uint32_t error, eop, plen, ptype;
+
+ /*
+ * The DD, PLEN, and EOP bits are the only ones that are valid
+ * in every frame. The error information is only valid when EOP
+ * is set in the same frame.
+ *
+ * At this time, because we don't do any LRO or header
+ * splitting. We expect that every frame should have EOP set in
+ * it. When later functionality comes in, we'll want to
+ * re-evaluate this.
+ */
+ eop = stword & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT);
+ VERIFY(eop != 0);
+
+ error = (stword & I40E_RXD_QW1_ERROR_MASK) >>
+ I40E_RXD_QW1_ERROR_SHIFT;
+ if (error & I40E_RX_ERR_BITS) {
+ itrq->itrq_rxstat.irxs_rx_desc_error.value.ui64++;
+ goto discard;
+ }
+
+ plen = (stword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
+ I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
+
+ ptype = (stword & I40E_RXD_QW1_PTYPE_MASK) >>
+ I40E_RXD_QW1_PTYPE_SHIFT;
+
+ /*
+ * This packet contains valid data. We should check to see if
+ * we're actually going to consume it based on its length (to
+ * ensure that we don't overshoot our quota). We determine
+ * whether to bcopy or bind the DMA resources based on the size
+ * of the frame. However, if on debug, we allow it to be
+ * overridden for testing purposes.
+ *
+ * We should be smarter about this and do DMA binding for
+ * larger frames, but for now, it's really more important that
+ * we actually just get something simple working.
+ */
+
+ /*
+ * Ensure we don't exceed our polling quota by reading this
+ * frame. Note we only bump bytes now, we bump frames later.
+ */
+ if ((poll_bytes != I40E_POLL_NULL) &&
+ (rx_bytes + plen) > poll_bytes)
+ break;
+ rx_bytes += plen;
+
+ mp = NULL;
+ if (plen >= i40e->i40e_rx_dma_min)
+ mp = i40e_rx_bind(itrq, rxd, cur_head, plen);
+ if (mp == NULL)
+ mp = i40e_rx_copy(itrq, rxd, cur_head, plen);
+
+ if (mp != NULL) {
+ if (i40e->i40e_rx_hcksum_enable)
+ i40e_rx_hcksum(itrq, mp, stword, error, ptype);
+ *mp_tail = mp;
+ mp_tail = &mp->b_next;
+ }
+
+ /*
+ * Now we need to prepare this frame for use again. See the
+ * discussion in the big theory statements.
+ *
+ * However, right now we're doing the simple version of this.
+ * Normally what we'd do would depend on whether or not we were
+ * doing DMA binding or bcopying. But because we're always doing
+ * bcopying, we can just always use the current index as a key
+ * for what to do and reassign the buffer based on the ring.
+ */
+discard:
+ rcb = rxd->rxd_work_list[cur_head];
+ cur_desc->read.pkt_addr =
+ CPU_TO_LE64((uintptr_t)rcb->rcb_dma.dmab_dma_address);
+ cur_desc->read.hdr_addr = 0;
+
+ /*
+ * Finally, update our loop invariants.
+ */
+ cur_head = i40e_next_desc(cur_head, 1, rxd->rxd_ring_size);
+ cur_desc = &rxd->rxd_desc_ring[cur_head];
+ stword = LE64_TO_CPU(cur_desc->wb.qword1.status_error_len);
+
+ /*
+ * To help provide liveness, we limit the amount of data that
+ * we'll end up counting. Note that in these cases, an interrupt
+ * is not dissimilar from a polling request.
+ */
+ rx_frames++;
+ if (rx_frames > i40e->i40e_rx_limit_per_intr) {
+ itrq->itrq_rxstat.irxs_rx_intr_limit.value.ui64++;
+ break;
+ }
+ }
+
+ /*
+ * As we've modified the ring, we need to make sure that we sync the
+ * descriptor ring for the device. Next, we update the hardware and
+ * update our notion of where the head for us to read from hardware is
+ * next.
+ */
+ I40E_DMA_SYNC(&rxd->rxd_desc_area, DDI_DMA_SYNC_FORDEV);
+ if (i40e_check_dma_handle(rxd->rxd_desc_area.dmab_dma_handle) !=
+ DDI_FM_OK) {
+ ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED);
+ atomic_or_32(&i40e->i40e_state, I40E_ERROR);
+ }
+
+ if (rx_frames != 0) {
+ uint32_t tail;
+ ddi_acc_handle_t rh = i40e->i40e_osdep_space.ios_reg_handle;
+ rxd->rxd_desc_next = cur_head;
+ tail = i40e_prev_desc(cur_head, 1, rxd->rxd_ring_size);
+
+ I40E_WRITE_REG(hw, I40E_QRX_TAIL(itrq->itrq_index), tail);
+ if (i40e_check_acc_handle(rh) != DDI_FM_OK) {
+ ddi_fm_service_impact(i40e->i40e_dip,
+ DDI_SERVICE_DEGRADED);
+ atomic_or_32(&i40e->i40e_state, I40E_ERROR);
+ }
+
+ itrq->itrq_rxstat.irxs_bytes.value.ui64 += rx_bytes;
+ itrq->itrq_rxstat.irxs_packets.value.ui64 += rx_frames;
+ }
+
+#ifdef DEBUG
+ if (rx_frames == 0) {
+ ASSERT(rx_bytes == 0);
+ }
+#endif
+
+ return (mp_head);
+}
+
+/*
+ * This function is called by the GLDv3 when it wants to poll on a ring. The
+ * only primary difference from when we call this during an interrupt is that we
+ * have a limit on the number of bytes that we should consume.
+ */
+mblk_t *
+i40e_ring_rx_poll(void *arg, int poll_bytes)
+{
+ i40e_trqpair_t *itrq = arg;
+ mblk_t *mp;
+
+ ASSERT(poll_bytes > 0);
+ if (poll_bytes == 0)
+ return (NULL);
+
+ mutex_enter(&itrq->itrq_rx_lock);
+ mp = i40e_ring_rx(itrq, poll_bytes);
+ mutex_exit(&itrq->itrq_rx_lock);
+
+ return (mp);
+}
+
+/*
+ * This is a structure I wish someone would fill out for me for dorking with the
+ * checksums. When we get some more experience with this, we should go ahead and
+ * consider adding this to MAC.
+ */
+typedef enum mac_ether_offload_flags {
+ MEOI_L2INFO_SET = 0x01,
+ MEOI_VLAN_TAGGED = 0x02,
+ MEOI_L3INFO_SET = 0x04,
+ MEOI_L3CKSUM_SET = 0x08,
+ MEOI_L4INFO_SET = 0x10,
+ MEOI_L4CKSUM_SET = 0x20
+} mac_ether_offload_flags_t;
+
+typedef struct mac_ether_offload_info {
+ mac_ether_offload_flags_t meoi_flags;
+ uint8_t meoi_l2hlen; /* How long is the Ethernet header? */
+ uint16_t meoi_l3proto; /* What's the Ethertype */
+ uint8_t meoi_l3hlen; /* How long is the header? */
+ uint8_t meoi_l4proto; /* What is the payload type? */
+ uint8_t meoi_l4hlen; /* How long is the L4 header */
+ mblk_t *meoi_l3ckmp; /* Which mblk has the l3 checksum */
+ off_t meoi_l3ckoff; /* What's the offset to it */
+ mblk_t *meoi_l4ckmp; /* Which mblk has the L4 checksum */
+ off_t meoi_l4off; /* What is the offset to it? */
+} mac_ether_offload_info_t;
+
+/*
+ * This is something that we'd like to make a general MAC function. Before we do
+ * that, we should add support for TSO.
+ *
+ * We should really keep track of our offset and not walk everything every
+ * time. I can't imagine that this will be kind to us at high packet rates;
+ * however, for the moment, let's leave that.
+ *
+ * This walks a message block chain without pulling up to fill in the context
+ * information. Note that the data we care about could be hidden across more
+ * than one mblk_t.
+ */
+static int
+i40e_meoi_get_uint8(mblk_t *mp, off_t off, uint8_t *out)
+{
+ size_t mpsize;
+ uint8_t *bp;
+
+ mpsize = msgsize(mp);
+ /* Check for overflow */
+ if (off + sizeof (uint16_t) > mpsize)
+ return (-1);
+
+ mpsize = MBLKL(mp);
+ while (off >= mpsize) {
+ mp = mp->b_cont;
+ off -= mpsize;
+ mpsize = MBLKL(mp);
+ }
+
+ bp = mp->b_rptr + off;
+ *out = *bp;
+ return (0);
+
+}
+
+static int
+i40e_meoi_get_uint16(mblk_t *mp, off_t off, uint16_t *out)
+{
+ size_t mpsize;
+ uint8_t *bp;
+
+ mpsize = msgsize(mp);
+ /* Check for overflow */
+ if (off + sizeof (uint16_t) > mpsize)
+ return (-1);
+
+ mpsize = MBLKL(mp);
+ while (off >= mpsize) {
+ mp = mp->b_cont;
+ off -= mpsize;
+ mpsize = MBLKL(mp);
+ }
+
+ /*
+ * Data is in network order. Note the second byte of data might be in
+ * the next mp.
+ */
+ bp = mp->b_rptr + off;
+ *out = *bp << 8;
+ if (off + 1 == mpsize) {
+ mp = mp->b_cont;
+ bp = mp->b_rptr;
+ } else {
+ bp++;
+ }
+
+ *out |= *bp;
+ return (0);
+
+}
+
+static int
+mac_ether_offload_info(mblk_t *mp, mac_ether_offload_info_t *meoi)
+{
+ size_t off;
+ uint16_t ether;
+ uint8_t ipproto, iplen, l4len, maclen;
+
+ bzero(meoi, sizeof (mac_ether_offload_info_t));
+
+ off = offsetof(struct ether_header, ether_type);
+ if (i40e_meoi_get_uint16(mp, off, &ether) != 0)
+ return (-1);
+
+ if (ether == ETHERTYPE_VLAN) {
+ off = offsetof(struct ether_vlan_header, ether_type);
+ if (i40e_meoi_get_uint16(mp, off, &ether) != 0)
+ return (-1);
+ meoi->meoi_flags |= MEOI_VLAN_TAGGED;
+ maclen = sizeof (struct ether_vlan_header);
+ } else {
+ maclen = sizeof (struct ether_header);
+ }
+ meoi->meoi_flags |= MEOI_L2INFO_SET;
+ meoi->meoi_l2hlen = maclen;
+ meoi->meoi_l3proto = ether;
+
+ switch (ether) {
+ case ETHERTYPE_IP:
+ /*
+ * For IPv4 we need to get the length of the header, as it can
+ * be variable.
+ */
+ off = offsetof(ipha_t, ipha_version_and_hdr_length) + maclen;
+ if (i40e_meoi_get_uint8(mp, off, &iplen) != 0)
+ return (-1);
+ iplen &= 0x0f;
+ if (iplen < 5 || iplen > 0x0f)
+ return (-1);
+ iplen *= 4;
+ off = offsetof(ipha_t, ipha_protocol) + maclen;
+ if (i40e_meoi_get_uint8(mp, off, &ipproto) == -1)
+ return (-1);
+ break;
+ case ETHERTYPE_IPV6:
+ iplen = 40;
+ off = offsetof(ip6_t, ip6_nxt) + maclen;
+ if (i40e_meoi_get_uint8(mp, off, &ipproto) == -1)
+ return (-1);
+ break;
+ default:
+ return (0);
+ }
+ meoi->meoi_l3hlen = iplen;
+ meoi->meoi_l4proto = ipproto;
+ meoi->meoi_flags |= MEOI_L3INFO_SET;
+
+ switch (ipproto) {
+ case IPPROTO_TCP:
+ off = offsetof(tcph_t, th_offset_and_rsrvd) + maclen + iplen;
+ if (i40e_meoi_get_uint8(mp, off, &l4len) == -1)
+ return (-1);
+ l4len = (l4len & 0xf0) >> 4;
+ if (l4len < 5 || l4len > 0xf)
+ return (-1);
+ l4len *= 4;
+ break;
+ case IPPROTO_UDP:
+ l4len = sizeof (struct udphdr);
+ break;
+ case IPPROTO_SCTP:
+ l4len = sizeof (sctp_hdr_t);
+ break;
+ default:
+ return (0);
+ }
+
+ meoi->meoi_l4hlen = l4len;
+ meoi->meoi_flags |= MEOI_L4INFO_SET;
+ return (0);
+}
+
+/*
+ * Attempt to put togther the information we'll need to feed into a descriptor
+ * to properly program the hardware for checksum offload as well as the
+ * generally required flags.
+ *
+ * The i40e_tx_contex_t`itc_cmdflags contains the set of flags we need to or
+ * into the descriptor based on the checksum flags for this mblk_t and the
+ * actual information we care about.
+ */
+static int
+i40e_tx_context(i40e_t *i40e, i40e_trqpair_t *itrq, mblk_t *mp,
+ i40e_tx_context_t *tctx)
+{
+ int ret;
+ uint32_t flags, start;
+ mac_ether_offload_info_t meo;
+ i40e_txq_stat_t *txs = &itrq->itrq_txstat;
+
+ bzero(tctx, sizeof (i40e_tx_context_t));
+
+ if (i40e->i40e_tx_hcksum_enable != B_TRUE)
+ return (0);
+
+ mac_hcksum_get(mp, &start, NULL, NULL, NULL, &flags);
+ if (flags == 0)
+ return (0);
+
+ if ((ret = mac_ether_offload_info(mp, &meo)) != 0) {
+ txs->itxs_hck_meoifail.value.ui64++;
+ return (ret);
+ }
+
+ /*
+ * Have we been asked to checksum an IPv4 header. If so, verify that we
+ * have sufficient information and then set the proper fields in the
+ * command structure.
+ */
+ if (flags & HCK_IPV4_HDRCKSUM) {
+ if ((meo.meoi_flags & MEOI_L2INFO_SET) == 0) {
+ txs->itxs_hck_nol2info.value.ui64++;
+ return (-1);
+ }
+ if ((meo.meoi_flags & MEOI_L3INFO_SET) == 0) {
+ txs->itxs_hck_nol3info.value.ui64++;
+ return (-1);
+ }
+ if (meo.meoi_l3proto != ETHERTYPE_IP) {
+ txs->itxs_hck_badl3.value.ui64++;
+ return (-1);
+ }
+ tctx->itc_cmdflags |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
+ tctx->itc_offsets |= (meo.meoi_l2hlen >> 1) <<
+ I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
+ tctx->itc_offsets |= (meo.meoi_l3hlen >> 2) <<
+ I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
+ }
+
+ /*
+ * We've been asked to provide an L4 header, first, set up the IP
+ * information in the descriptor if we haven't already before moving
+ * onto seeing if we have enough information for the L4 checksum
+ * offload.
+ */
+ if (flags & HCK_PARTIALCKSUM) {
+ if ((meo.meoi_flags & MEOI_L4INFO_SET) == 0) {
+ txs->itxs_hck_nol4info.value.ui64++;
+ return (-1);
+ }
+
+ if (!(flags & HCK_IPV4_HDRCKSUM)) {
+ if ((meo.meoi_flags & MEOI_L2INFO_SET) == 0) {
+ txs->itxs_hck_nol2info.value.ui64++;
+ return (-1);
+ }
+ if ((meo.meoi_flags & MEOI_L3INFO_SET) == 0) {
+ txs->itxs_hck_nol3info.value.ui64++;
+ return (-1);
+ }
+
+ if (meo.meoi_l3proto == ETHERTYPE_IP) {
+ tctx->itc_cmdflags |=
+ I40E_TX_DESC_CMD_IIPT_IPV4;
+ } else if (meo.meoi_l3proto == ETHERTYPE_IPV6) {
+ tctx->itc_cmdflags |=
+ I40E_TX_DESC_CMD_IIPT_IPV6;
+ } else {
+ txs->itxs_hck_badl3.value.ui64++;
+ return (-1);
+ }
+ tctx->itc_offsets |= (meo.meoi_l2hlen >> 1) <<
+ I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
+ tctx->itc_offsets |= (meo.meoi_l3hlen >> 2) <<
+ I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
+ }
+
+ switch (meo.meoi_l4proto) {
+ case IPPROTO_TCP:
+ tctx->itc_cmdflags |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
+ break;
+ case IPPROTO_UDP:
+ tctx->itc_cmdflags |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
+ break;
+ case IPPROTO_SCTP:
+ tctx->itc_cmdflags |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
+ break;
+ default:
+ txs->itxs_hck_badl4.value.ui64++;
+ return (-1);
+ }
+
+ tctx->itc_offsets |= (meo.meoi_l4hlen >> 2) <<
+ I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+ }
+
+ return (0);
+}
+
+static void
+i40e_tcb_free(i40e_trqpair_t *itrq, i40e_tx_control_block_t *tcb)
+{
+ ASSERT(tcb != NULL);
+
+ mutex_enter(&itrq->itrq_tcb_lock);
+ ASSERT(itrq->itrq_tcb_free < itrq->itrq_tx_free_list_size);
+ itrq->itrq_tcb_free_list[itrq->itrq_tcb_free] = tcb;
+ itrq->itrq_tcb_free++;
+ mutex_exit(&itrq->itrq_tcb_lock);
+}
+
+static i40e_tx_control_block_t *
+i40e_tcb_alloc(i40e_trqpair_t *itrq)
+{
+ i40e_tx_control_block_t *ret;
+
+ mutex_enter(&itrq->itrq_tcb_lock);
+ if (itrq->itrq_tcb_free == 0) {
+ mutex_exit(&itrq->itrq_tcb_lock);
+ return (NULL);
+ }
+
+ itrq->itrq_tcb_free--;
+ ret = itrq->itrq_tcb_free_list[itrq->itrq_tcb_free];
+ itrq->itrq_tcb_free_list[itrq->itrq_tcb_free] = NULL;
+ mutex_exit(&itrq->itrq_tcb_lock);
+
+ ASSERT(ret != NULL);
+ return (ret);
+}
+
+/*
+ * This should be used to free any DMA resources, associated mblk_t's, etc. It's
+ * used as part of recycling the message blocks when we have either an interrupt
+ * or other activity that indicates that we need to take a look.
+ */
+static void
+i40e_tcb_reset(i40e_tx_control_block_t *tcb)
+{
+ switch (tcb->tcb_type) {
+ case I40E_TX_COPY:
+ tcb->tcb_dma.dmab_len = 0;
+ break;
+ case I40E_TX_DMA:
+ (void) ddi_dma_unbind_handle(tcb->tcb_dma_handle);
+ break;
+ case I40E_TX_NONE:
+ /* Cast to pacify lint */
+ panic("trying to free tcb %p with bad type none\n", (void *)tcb);
+ default:
+ panic("unknown i40e tcb type: %d", tcb->tcb_type);
+ }
+
+ tcb->tcb_type = I40E_TX_NONE;
+ freemsg(tcb->tcb_mp);
+ tcb->tcb_mp = NULL;
+ tcb->tcb_next = NULL;
+}
+
+/*
+ * This is called as part of shutting down to clean up all outstanding
+ * descriptors. Similar to recycle, except we don't re-arm anything and instead
+ * just return control blocks to the free list.
+ */
+void
+i40e_tx_cleanup_ring(i40e_trqpair_t *itrq)
+{
+ uint32_t index;
+
+ ASSERT(MUTEX_HELD(&itrq->itrq_tx_lock));
+ ASSERT(itrq->itrq_desc_free <= itrq->itrq_tx_ring_size);
+
+ /*
+ * Because we should have shut down the chip at this point, it should be
+ * safe to just clean up all the entries between our head and tail.
+ */
+#ifdef DEBUG
+ index = I40E_READ_REG(&itrq->itrq_i40e->i40e_hw_space,
+ I40E_QTX_ENA(itrq->itrq_index));
+ VERIFY0(index & (I40E_QTX_ENA_QENA_REQ_MASK |
+ I40E_QTX_ENA_QENA_STAT_MASK));
+#endif
+
+ index = itrq->itrq_desc_head;
+ while (itrq->itrq_desc_free < itrq->itrq_tx_ring_size) {
+ i40e_tx_control_block_t *tcb;
+
+ tcb = itrq->itrq_tcb_work_list[index];
+ VERIFY(tcb != NULL);
+ itrq->itrq_tcb_work_list[index] = NULL;
+ i40e_tcb_reset(tcb);
+ i40e_tcb_free(itrq, tcb);
+
+ bzero(&itrq->itrq_desc_ring[index], sizeof (i40e_tx_desc_t));
+ index = i40e_next_desc(index, 1, itrq->itrq_tx_ring_size);
+ itrq->itrq_desc_free++;
+ }
+
+ ASSERT(index == itrq->itrq_desc_tail);
+ itrq->itrq_desc_head = index;
+}
+
+/*
+ * We're here either by hook or by crook. We need to see if there are transmit
+ * descriptors available for us to go and clean up and return to the hardware.
+ * We may also be blocked, and if so, we should make sure that we let it know
+ * we're good to go.
+ */
+void
+i40e_tx_recycle_ring(i40e_trqpair_t *itrq)
+{
+ uint32_t wbhead, toclean, count;
+ i40e_tx_control_block_t *tcbhead;
+ i40e_t *i40e = itrq->itrq_i40e;
+
+ mutex_enter(&itrq->itrq_tx_lock);
+
+ ASSERT(itrq->itrq_desc_free <= itrq->itrq_tx_ring_size);
+ if (itrq->itrq_desc_free == itrq->itrq_tx_ring_size) {
+ if (itrq->itrq_tx_blocked == B_TRUE) {
+ itrq->itrq_tx_blocked = B_FALSE;
+ mac_tx_ring_update(i40e->i40e_mac_hdl,
+ itrq->itrq_mactxring);
+ itrq->itrq_txstat.itxs_num_unblocked.value.ui64++;
+ }
+ mutex_exit(&itrq->itrq_tx_lock);
+ return;
+ }
+
+ /*
+ * Now we need to try and see if there's anything available. The driver
+ * will write to the head location and it guarantees that it does not
+ * use relaxed ordering.
+ */
+ VERIFY0(ddi_dma_sync(itrq->itrq_desc_area.dmab_dma_handle,
+ (uintptr_t)itrq->itrq_desc_wbhead,
+ sizeof (uint32_t), DDI_DMA_SYNC_FORKERNEL));
+
+ if (i40e_check_dma_handle(itrq->itrq_desc_area.dmab_dma_handle) !=
+ DDI_FM_OK) {
+ mutex_exit(&itrq->itrq_tx_lock);
+ ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED);
+ atomic_or_32(&i40e->i40e_state, I40E_ERROR);
+ return;
+ }
+
+ wbhead = *itrq->itrq_desc_wbhead;
+ toclean = itrq->itrq_desc_head;
+ count = 0;
+ tcbhead = NULL;
+
+ while (toclean != wbhead) {
+ i40e_tx_control_block_t *tcb;
+
+ tcb = itrq->itrq_tcb_work_list[toclean];
+ itrq->itrq_tcb_work_list[toclean] = NULL;
+ ASSERT(tcb != NULL);
+ tcb->tcb_next = tcbhead;
+ tcbhead = tcb;
+
+ /*
+ * We zero this out for sanity purposes.
+ */
+ bzero(&itrq->itrq_desc_ring[toclean], sizeof (i40e_tx_desc_t));
+ toclean = i40e_next_desc(toclean, 1, itrq->itrq_tx_ring_size);
+ count++;
+ }
+
+ itrq->itrq_desc_head = wbhead;
+ itrq->itrq_desc_free += count;
+ itrq->itrq_txstat.itxs_recycled.value.ui64 += count;
+ ASSERT(itrq->itrq_desc_free <= itrq->itrq_tx_ring_size);
+
+ if (itrq->itrq_tx_blocked == B_TRUE &&
+ itrq->itrq_desc_free > i40e->i40e_tx_block_thresh) {
+ itrq->itrq_tx_blocked = B_FALSE;
+
+ mac_tx_ring_update(i40e->i40e_mac_hdl, itrq->itrq_mactxring);
+ itrq->itrq_txstat.itxs_num_unblocked.value.ui64++;
+ }
+
+ mutex_exit(&itrq->itrq_tx_lock);
+
+ /*
+ * Now clean up the tcb.
+ */
+ while (tcbhead != NULL) {
+ i40e_tx_control_block_t *tcb = tcbhead;
+
+ tcbhead = tcb->tcb_next;
+ i40e_tcb_reset(tcb);
+ i40e_tcb_free(itrq, tcb);
+ }
+
+ DTRACE_PROBE2(i40e__recycle, i40e_trqpair_t *, itrq, uint32_t, count);
+}
+
+/*
+ * We've been asked to send a message block on the wire. We'll only have a
+ * single chain. There will not be any b_next pointers; however, there may be
+ * multiple b_cont blocks.
+ *
+ * We may do one of three things with any given mblk_t chain:
+ *
+ * 1) Drop it
+ * 2) Transmit it
+ * 3) Return it
+ *
+ * If we return it to MAC, then MAC will flow control on our behalf. In other
+ * words, it won't send us anything until we tell it that it's okay to send us
+ * something.
+ */
+mblk_t *
+i40e_ring_tx(void *arg, mblk_t *mp)
+{
+ const mblk_t *nmp;
+ size_t mpsize;
+ i40e_tx_control_block_t *tcb;
+ i40e_tx_desc_t *txdesc;
+ i40e_tx_context_t tctx;
+ int cmd, type;
+
+ i40e_trqpair_t *itrq = arg;
+ i40e_t *i40e = itrq->itrq_i40e;
+ i40e_hw_t *hw = &i40e->i40e_hw_space;
+ i40e_txq_stat_t *txs = &itrq->itrq_txstat;
+
+ ASSERT(mp->b_next == NULL);
+
+ if (!(i40e->i40e_state & I40E_STARTED) ||
+ (i40e->i40e_state & I40E_OVERTEMP) ||
+ (i40e->i40e_state & I40E_SUSPENDED) ||
+ (i40e->i40e_state & I40E_ERROR) ||
+ (i40e->i40e_link_state != LINK_STATE_UP)) {
+ freemsg(mp);
+ return (NULL);
+ }
+
+ /*
+ * Figure out the relevant context about this frame that we might need
+ * for enabling checksum, lso, etc. This also fills in information that
+ * we might set around the packet type, etc.
+ */
+ if (i40e_tx_context(i40e, itrq, mp, &tctx) < 0) {
+ freemsg(mp);
+ itrq->itrq_txstat.itxs_err_context.value.ui64++;
+ return (NULL);
+ }
+
+ /*
+ * For the primordial driver we can punt on doing any recycling right
+ * now; however, longer term we need to probably do some more pro-active
+ * recycling to cut back on stalls in the tx path.
+ */
+
+ /*
+ * Do a quick size check to make sure it fits into what we think it
+ * should for this device. Note that longer term this will be false,
+ * particularly when we have the world of TSO.
+ */
+ mpsize = 0;
+ for (nmp = mp; nmp != NULL; nmp = nmp->b_cont) {
+ mpsize += MBLKL(nmp);
+ }
+
+ /*
+ * First we allocate our tx control block and prepare the packet for
+ * transmit before we do a final check for descriptors. We do it this
+ * way to minimize the time under the tx lock.
+ */
+ tcb = i40e_tcb_alloc(itrq);
+ if (tcb == NULL) {
+ txs->itxs_err_notcb.value.ui64++;
+ goto txfail;
+ }
+
+ /*
+ * For transmitting a block, we're currently going to use just a
+ * single control block and bcopy all of the fragments into it. We
+ * should be more intelligent about doing DMA binding or otherwise, but
+ * for getting off the ground this will have to do.
+ */
+ ASSERT(tcb->tcb_dma.dmab_len == 0);
+ ASSERT(tcb->tcb_dma.dmab_size >= mpsize);
+ for (nmp = mp; nmp != NULL; nmp = nmp->b_cont) {
+ size_t clen = MBLKL(nmp);
+ void *coff = tcb->tcb_dma.dmab_address + tcb->tcb_dma.dmab_len;
+
+ bcopy(nmp->b_rptr, coff, clen);
+ tcb->tcb_dma.dmab_len += clen;
+ }
+ ASSERT(tcb->tcb_dma.dmab_len == mpsize);
+
+ /*
+ * While there's really no need to keep the mp here, but let's just do
+ * it to help with our own debugging for now.
+ */
+ tcb->tcb_mp = mp;
+ tcb->tcb_type = I40E_TX_COPY;
+ I40E_DMA_SYNC(&tcb->tcb_dma, DDI_DMA_SYNC_FORDEV);
+
+ mutex_enter(&itrq->itrq_tx_lock);
+ if (itrq->itrq_desc_free < i40e->i40e_tx_block_thresh) {
+ txs->itxs_err_nodescs.value.ui64++;
+ mutex_exit(&itrq->itrq_tx_lock);
+ goto txfail;
+ }
+
+ /*
+ * Build up the descriptor and send it out. Thankfully at the moment
+ * we only need a single desc, because we're not doing anything fancy
+ * yet.
+ */
+ ASSERT(itrq->itrq_desc_free > 0);
+ itrq->itrq_desc_free--;
+ txdesc = &itrq->itrq_desc_ring[itrq->itrq_desc_tail];
+ itrq->itrq_tcb_work_list[itrq->itrq_desc_tail] = tcb;
+ itrq->itrq_desc_tail = i40e_next_desc(itrq->itrq_desc_tail, 1,
+ itrq->itrq_tx_ring_size);
+
+ /*
+ * Note, we always set EOP and RS which indicates that this is the last
+ * data frame and that we should ask for it to be transmitted. We also
+ * must always set ICRC, because that is an internal bit that must be
+ * set to one for data descriptors. The remaining bits in the command
+ * descriptor depend on checksumming and are determined based on the
+ * information set up in i40e_tx_context().
+ */
+ type = I40E_TX_DESC_DTYPE_DATA;
+ cmd = I40E_TX_DESC_CMD_EOP |
+ I40E_TX_DESC_CMD_RS |
+ I40E_TX_DESC_CMD_ICRC |
+ tctx.itc_cmdflags;
+ txdesc->buffer_addr =
+ CPU_TO_LE64((uintptr_t)tcb->tcb_dma.dmab_dma_address);
+ txdesc->cmd_type_offset_bsz = CPU_TO_LE64(((uint64_t)type |
+ ((uint64_t)tctx.itc_offsets << I40E_TXD_QW1_OFFSET_SHIFT) |
+ ((uint64_t)cmd << I40E_TXD_QW1_CMD_SHIFT) |
+ ((uint64_t)tcb->tcb_dma.dmab_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)));
+
+ /*
+ * Now, finally, sync the DMA data and alert hardware.
+ */
+ I40E_DMA_SYNC(&itrq->itrq_desc_area, DDI_DMA_SYNC_FORDEV);
+
+ I40E_WRITE_REG(hw, I40E_QTX_TAIL(itrq->itrq_index),
+ itrq->itrq_desc_tail);
+ if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) !=
+ DDI_FM_OK) {
+ /*
+ * Note, we can't really go through and clean this up very well,
+ * because the memory has been given to the device, so just
+ * indicate it's been transmitted.
+ */
+ ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED);
+ atomic_or_32(&i40e->i40e_state, I40E_ERROR);
+ }
+
+ txs->itxs_bytes.value.ui64 += mpsize;
+ txs->itxs_packets.value.ui64++;
+ txs->itxs_descriptors.value.ui64++;
+
+ mutex_exit(&itrq->itrq_tx_lock);
+
+ return (NULL);
+
+txfail:
+ /*
+ * We ran out of resources. Return it to MAC and indicate that we'll
+ * need to signal MAC. If there are allocated tcb's, return them now.
+ * Make sure to reset their message block's, since we'll return them
+ * back to MAC.
+ */
+ if (tcb != NULL) {
+ tcb->tcb_mp = NULL;
+ i40e_tcb_reset(tcb);
+ i40e_tcb_free(itrq, tcb);
+ }
+
+ mutex_enter(&itrq->itrq_tx_lock);
+ itrq->itrq_tx_blocked = B_TRUE;
+ mutex_exit(&itrq->itrq_tx_lock);
+
+ return (mp);
+}
diff --git a/usr/src/uts/common/io/i40e/i40e_xregs.h b/usr/src/uts/common/io/i40e/i40e_xregs.h
new file mode 100644
index 0000000000..1bf3a1f0be
--- /dev/null
+++ b/usr/src/uts/common/io/i40e/i40e_xregs.h
@@ -0,0 +1,53 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#ifndef _I40E_XREGS_H
+#define _I40E_XREGS_H
+
+/*
+ * This file contains extra register definitions and other things that would
+ * nominally come from the Intel common code, but do not due to bugs, erratum,
+ * etc. Ideally we'll get to a point where we can remove this file.
+ */
+#include "i40e_type.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * The MSPDC register is missing from the current datasheet.
+ */
+#define I40E_GLPRT_MSPDC(_i) (0x00300060 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_MSDPC_MAX_INDEX 3
+#define I40E_GLPRT_MSPDC_MSPDC_SHIFT 0
+#define I40E_GLPRT_MSPDC_MSPDC_MASK \
+ I40E_MASK(0xFFFFFFFF, I40E_GLPRT_MSPDC_MSPDC_SHIFT)
+
+/*
+ * The RXERR* registers are technically correct from the perspective of their
+ * addreses; however, the other associated constants are not correct. Instead,
+ * we have new definitions here in the interim.
+ */
+
+#define I40E_X_GL_RXERR1_L(_i) (0x00318000 + ((_i) * 8))
+
+#define I40E_X_GL_RXERR2_L(_i) (0x0031c000 + ((_i) * 8))
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _I40E_XREGS_H */
diff --git a/usr/src/uts/common/io/inotify.c b/usr/src/uts/common/io/inotify.c
new file mode 100644
index 0000000000..baa36cfc8d
--- /dev/null
+++ b/usr/src/uts/common/io/inotify.c
@@ -0,0 +1,1504 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2015 Joyent, Inc. All rights reserved.
+ * Copyright (c) 2015 The MathWorks, Inc. All rights reserved.
+ */
+
+/*
+ * Support for the inotify facility, a Linux-borne facility for asynchronous
+ * notification of certain events on specified files or directories. Our
+ * implementation broadly leverages the file event monitoring facility, and
+ * would actually be quite straightforward were it not for a very serious
+ * blunder in the inotify interface: in addition to allowing for one to be
+ * notified on events on a particular file or directory, inotify also allows
+ * for one to be notified on certain events on files _within_ a watched
+ * directory -- even though those events have absolutely nothing to do with
+ * the directory itself. This leads to all sorts of madness because file
+ * operations are (of course) not undertaken on paths but rather on open
+ * files -- and the relationships between open files and the paths that resolve
+ * to those files are neither static nor isomorphic. We implement this
+ * concept by having _child watches_ when directories are watched with events
+ * in IN_CHILD_EVENTS. We add child watches when a watch on a directory is
+ * first added, and we modify those child watches dynamically as files are
+ * created, deleted, moved into or moved out of the specified directory. This
+ * mechanism works well, absent hard links. Hard links, unfortunately, break
+ * this rather badly, and the user is warned that watches on directories that
+ * have multiple directory entries referring to the same file may behave
+ * unexpectedly.
+ */
+
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/inotify.h>
+#include <sys/fem.h>
+#include <sys/conf.h>
+#include <sys/stat.h>
+#include <sys/vfs_opreg.h>
+#include <sys/vmem.h>
+#include <sys/avl.h>
+#include <sys/sysmacros.h>
+#include <sys/cyclic.h>
+#include <sys/filio.h>
+
+struct inotify_state;
+struct inotify_kevent;
+
+typedef struct inotify_watch inotify_watch_t;
+typedef struct inotify_state inotify_state_t;
+typedef struct inotify_kevent inotify_kevent_t;
+
+struct inotify_watch {
+ kmutex_t inw_lock; /* lock protecting ref count */
+ int inw_refcnt; /* reference count */
+ uint8_t inw_zombie:1; /* boolean: is zombie */
+ uint8_t inw_fired:1; /* boolean: fired one-shot */
+ uint8_t inw_active:1; /* boolean: watch is active */
+ uint8_t inw_orphaned:1; /* boolean: orphaned */
+ kcondvar_t inw_cv; /* condvar for zombifier */
+ uint32_t inw_mask; /* mask of watch */
+ int32_t inw_wd; /* watch descriptor */
+ vnode_t *inw_vp; /* underlying vnode */
+ inotify_watch_t *inw_parent; /* parent, if a child */
+ avl_node_t inw_byvp; /* watches by vnode */
+ avl_node_t inw_bywd; /* watches by descriptor */
+ avl_tree_t inw_children; /* children, if a parent */
+ char *inw_name; /* name, if a child */
+ list_node_t inw_orphan; /* orphan list */
+ cred_t *inw_cred; /* cred, if orphaned */
+ inotify_state_t *inw_state; /* corresponding state */
+};
+
+struct inotify_kevent {
+ inotify_kevent_t *ine_next; /* next event in queue */
+ struct inotify_event ine_event; /* event (variable size) */
+};
+
+#define INOTIFY_EVENT_LENGTH(ev) \
+ (sizeof (inotify_kevent_t) + (ev)->ine_event.len)
+
+struct inotify_state {
+ kmutex_t ins_lock; /* lock protecting state */
+ avl_tree_t ins_byvp; /* watches by vnode */
+ avl_tree_t ins_bywd; /* watches by descriptor */
+ vmem_t *ins_wds; /* watch identifier arena */
+ int ins_maxwatches; /* maximum number of watches */
+ int ins_maxevents; /* maximum number of events */
+ int ins_nevents; /* current # of events */
+ int32_t ins_size; /* total size of events */
+ inotify_kevent_t *ins_head; /* head of event queue */
+ inotify_kevent_t *ins_tail; /* tail of event queue */
+ pollhead_t ins_pollhd; /* poll head */
+ kcondvar_t ins_cv; /* condvar for reading */
+ list_t ins_orphans; /* orphan list */
+ ddi_periodic_t ins_cleaner; /* cyclic for cleaning */
+ inotify_watch_t *ins_zombies; /* zombie watch list */
+ cred_t *ins_cred; /* creator's credentials */
+ inotify_state_t *ins_next; /* next state on global list */
+};
+
+/*
+ * Tunables (exported read-only in lx-branded zones via /proc).
+ */
+int inotify_maxwatches = 8192; /* max watches per instance */
+int inotify_maxevents = 16384; /* max events */
+int inotify_maxinstances = 128; /* max instances per user */
+
+/*
+ * Internal global variables.
+ */
+static kmutex_t inotify_lock; /* lock protecting state */
+static dev_info_t *inotify_devi; /* device info */
+static fem_t *inotify_femp; /* FEM pointer */
+static vmem_t *inotify_minor; /* minor number arena */
+static void *inotify_softstate; /* softstate pointer */
+static inotify_state_t *inotify_state; /* global list if state */
+
+static void inotify_watch_event(inotify_watch_t *, uint64_t, char *);
+static void inotify_watch_insert(inotify_watch_t *, vnode_t *, char *);
+static void inotify_watch_delete(inotify_watch_t *, uint32_t);
+static void inotify_watch_remove(inotify_state_t *state,
+ inotify_watch_t *watch);
+
+static int
+inotify_fop_close(femarg_t *vf, int flag, int count, offset_t offset,
+ cred_t *cr, caller_context_t *ct)
+{
+ inotify_watch_t *watch = vf->fa_fnode->fn_available;
+ int rval;
+
+ if ((rval = vnext_close(vf, flag, count, offset, cr, ct)) == 0) {
+ inotify_watch_event(watch, flag & FWRITE ?
+ IN_CLOSE_WRITE : IN_CLOSE_NOWRITE, NULL);
+ }
+
+ return (rval);
+}
+
+static int
+inotify_fop_create(femarg_t *vf, char *name, vattr_t *vap, vcexcl_t excl,
+ int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct,
+ vsecattr_t *vsecp)
+{
+ inotify_watch_t *watch = vf->fa_fnode->fn_available;
+ int rval;
+
+ if ((rval = vnext_create(vf, name, vap, excl, mode,
+ vpp, cr, flag, ct, vsecp)) == 0) {
+ inotify_watch_insert(watch, *vpp, name);
+ inotify_watch_event(watch, IN_CREATE, name);
+ }
+
+ return (rval);
+}
+
+static int
+inotify_fop_link(femarg_t *vf, vnode_t *svp, char *tnm, cred_t *cr,
+ caller_context_t *ct, int flags)
+{
+ inotify_watch_t *watch = vf->fa_fnode->fn_available;
+ int rval;
+
+ if ((rval = vnext_link(vf, svp, tnm, cr, ct, flags)) == 0) {
+ inotify_watch_insert(watch, svp, tnm);
+ inotify_watch_event(watch, IN_CREATE, tnm);
+ }
+
+ return (rval);
+}
+
+static int
+inotify_fop_mkdir(femarg_t *vf, char *name, vattr_t *vap, vnode_t **vpp,
+ cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp)
+{
+ inotify_watch_t *watch = vf->fa_fnode->fn_available;
+ int rval;
+
+ if ((rval = vnext_mkdir(vf, name, vap, vpp, cr,
+ ct, flags, vsecp)) == 0) {
+ inotify_watch_insert(watch, *vpp, name);
+ inotify_watch_event(watch, IN_CREATE | IN_ISDIR, name);
+ }
+
+ return (rval);
+}
+
+static int
+inotify_fop_open(femarg_t *vf, int mode, cred_t *cr, caller_context_t *ct)
+{
+ inotify_watch_t *watch = vf->fa_fnode->fn_available;
+ int rval;
+
+ if ((rval = vnext_open(vf, mode, cr, ct)) == 0)
+ inotify_watch_event(watch, IN_OPEN, NULL);
+
+ return (rval);
+}
+
+static int
+inotify_fop_read(femarg_t *vf, struct uio *uiop, int ioflag, struct cred *cr,
+ caller_context_t *ct)
+{
+ inotify_watch_t *watch = vf->fa_fnode->fn_available;
+ int rval = vnext_read(vf, uiop, ioflag, cr, ct);
+ inotify_watch_event(watch, IN_ACCESS, NULL);
+
+ return (rval);
+}
+
+static int
+inotify_fop_readdir(femarg_t *vf, uio_t *uiop, cred_t *cr, int *eofp,
+ caller_context_t *ct, int flags)
+{
+ inotify_watch_t *watch = vf->fa_fnode->fn_available;
+ int rval = vnext_readdir(vf, uiop, cr, eofp, ct, flags);
+ inotify_watch_event(watch, IN_ACCESS | IN_ISDIR, NULL);
+
+ return (rval);
+}
+
+int
+inotify_fop_remove(femarg_t *vf, char *nm, cred_t *cr, caller_context_t *ct,
+ int flags)
+{
+ inotify_watch_t *watch = vf->fa_fnode->fn_available;
+ int rval;
+
+ if ((rval = vnext_remove(vf, nm, cr, ct, flags)) == 0)
+ inotify_watch_event(watch, IN_DELETE, nm);
+
+ return (rval);
+}
+
+int
+inotify_fop_rmdir(femarg_t *vf, char *nm, vnode_t *cdir, cred_t *cr,
+ caller_context_t *ct, int flags)
+{
+ inotify_watch_t *watch = vf->fa_fnode->fn_available;
+ int rval;
+
+ if ((rval = vnext_rmdir(vf, nm, cdir, cr, ct, flags)) == 0)
+ inotify_watch_event(watch, IN_DELETE | IN_ISDIR, nm);
+
+ return (rval);
+}
+
+static int
+inotify_fop_setattr(femarg_t *vf, vattr_t *vap, int flags, cred_t *cr,
+ caller_context_t *ct)
+{
+ inotify_watch_t *watch = vf->fa_fnode->fn_available;
+ int rval;
+
+ if ((rval = vnext_setattr(vf, vap, flags, cr, ct)) == 0)
+ inotify_watch_event(watch, IN_ATTRIB, NULL);
+
+ return (rval);
+}
+
+static int
+inotify_fop_write(femarg_t *vf, struct uio *uiop, int ioflag, struct cred *cr,
+ caller_context_t *ct)
+{
+ inotify_watch_t *watch = vf->fa_fnode->fn_available;
+ int rval = vnext_write(vf, uiop, ioflag, cr, ct);
+ inotify_watch_event(watch, IN_MODIFY, NULL);
+
+ return (rval);
+}
+
+static int
+inotify_fop_vnevent(femarg_t *vf, vnevent_t vnevent, vnode_t *dvp, char *name,
+ caller_context_t *ct)
+{
+ inotify_watch_t *watch = vf->fa_fnode->fn_available;
+
+ switch (vnevent) {
+ case VE_RENAME_SRC:
+ inotify_watch_event(watch, IN_MOVE_SELF, NULL);
+ inotify_watch_delete(watch, IN_MOVE_SELF);
+ break;
+ case VE_REMOVE:
+ /*
+ * Linux will apparently fire an IN_ATTRIB event when the link
+ * count changes (including when it drops to 0 on a remove).
+ * This is merely somewhat odd; what is amazing is that this
+ * IN_ATTRIB event is not visible on an inotify watch on the
+ * parent directory. (IN_ATTRIB events are normally sent to
+ * watches on the parent directory). While it's hard to
+ * believe that this constitutes desired semantics, ltp
+ * unfortunately tests this case (if implicitly); in the name
+ * of bug-for-bug compatibility, we fire IN_ATTRIB iff we are
+ * explicitly watching the file that has been removed.
+ */
+ if (watch->inw_parent == NULL)
+ inotify_watch_event(watch, IN_ATTRIB, NULL);
+
+ /*FALLTHROUGH*/
+ case VE_RENAME_DEST:
+ inotify_watch_event(watch, IN_DELETE_SELF, NULL);
+ inotify_watch_delete(watch, IN_DELETE_SELF);
+ break;
+ case VE_RMDIR:
+ /*
+ * It seems that IN_ISDIR should really be OR'd in here, but
+ * Linux doesn't seem to do that in this case; for the sake of
+ * bug-for-bug compatibility, we don't do it either.
+ */
+ inotify_watch_event(watch, IN_DELETE_SELF, NULL);
+ inotify_watch_delete(watch, IN_DELETE_SELF);
+ break;
+ case VE_CREATE:
+ case VE_TRUNCATE:
+ case VE_RESIZE:
+ inotify_watch_event(watch, IN_MODIFY | IN_ATTRIB, NULL);
+ break;
+ case VE_LINK:
+ inotify_watch_event(watch, IN_ATTRIB, NULL);
+ break;
+ case VE_RENAME_SRC_DIR:
+ inotify_watch_event(watch, IN_MOVED_FROM, name);
+ break;
+ case VE_RENAME_DEST_DIR:
+ if (name == NULL)
+ name = dvp->v_path;
+
+ inotify_watch_insert(watch, dvp, name);
+ inotify_watch_event(watch, IN_MOVED_TO, name);
+ break;
+ case VE_SUPPORT:
+ case VE_MOUNTEDOVER:
+ case VE_PRE_RENAME_SRC:
+ case VE_PRE_RENAME_DEST:
+ case VE_PRE_RENAME_DEST_DIR:
+ break;
+ }
+
+ return (vnext_vnevent(vf, vnevent, dvp, name, ct));
+}
+
+const fs_operation_def_t inotify_vnodesrc_template[] = {
+ VOPNAME_CLOSE, { .femop_close = inotify_fop_close },
+ VOPNAME_CREATE, { .femop_create = inotify_fop_create },
+ VOPNAME_LINK, { .femop_link = inotify_fop_link },
+ VOPNAME_MKDIR, { .femop_mkdir = inotify_fop_mkdir },
+ VOPNAME_OPEN, { .femop_open = inotify_fop_open },
+ VOPNAME_READ, { .femop_read = inotify_fop_read },
+ VOPNAME_READDIR, { .femop_readdir = inotify_fop_readdir },
+ VOPNAME_REMOVE, { .femop_remove = inotify_fop_remove },
+ VOPNAME_RMDIR, { .femop_rmdir = inotify_fop_rmdir },
+ VOPNAME_SETATTR, { .femop_setattr = inotify_fop_setattr },
+ VOPNAME_WRITE, { .femop_write = inotify_fop_write },
+ VOPNAME_VNEVENT, { .femop_vnevent = inotify_fop_vnevent },
+ NULL, NULL
+};
+
+static int
+inotify_watch_cmpwd(inotify_watch_t *lhs, inotify_watch_t *rhs)
+{
+ if (lhs->inw_wd < rhs->inw_wd)
+ return (-1);
+
+ if (lhs->inw_wd > rhs->inw_wd)
+ return (1);
+
+ return (0);
+}
+
+static int
+inotify_watch_cmpvp(inotify_watch_t *lhs, inotify_watch_t *rhs)
+{
+ uintptr_t lvp = (uintptr_t)lhs->inw_vp, rvp = (uintptr_t)rhs->inw_vp;
+
+ if (lvp < rvp)
+ return (-1);
+
+ if (lvp > rvp)
+ return (1);
+
+ return (0);
+}
+
+static void
+inotify_watch_hold(inotify_watch_t *watch)
+{
+ mutex_enter(&watch->inw_lock);
+ VERIFY(watch->inw_refcnt > 0);
+ watch->inw_refcnt++;
+ mutex_exit(&watch->inw_lock);
+}
+
+static void
+inotify_watch_release(inotify_watch_t *watch)
+{
+ mutex_enter(&watch->inw_lock);
+ VERIFY(watch->inw_refcnt > 1);
+
+ if (--watch->inw_refcnt == 1 && watch->inw_zombie) {
+ /*
+ * We're down to our last reference; kick anyone that might be
+ * waiting.
+ */
+ cv_signal(&watch->inw_cv);
+ }
+
+ mutex_exit(&watch->inw_lock);
+}
+
+static void
+inotify_watch_event(inotify_watch_t *watch, uint64_t mask, char *name)
+{
+ inotify_kevent_t *event, *tail;
+ inotify_state_t *state = watch->inw_state;
+ uint32_t wd = watch->inw_wd, cookie = 0, len;
+ boolean_t removal = mask & IN_REMOVAL ? B_TRUE : B_FALSE;
+ inotify_watch_t *source = watch;
+
+ if (!(mask &= watch->inw_mask) || mask == IN_ISDIR)
+ return;
+
+ if (watch->inw_parent != NULL) {
+ /*
+ * This is an event on the child; if this isn't a valid child
+ * event, return. Otherwise, we move our watch to be our
+ * parent (which we know is around because we have a hold on
+ * it) and continue.
+ */
+ if (!(mask & IN_CHILD_EVENTS))
+ return;
+
+ name = watch->inw_name;
+ watch = watch->inw_parent;
+ wd = watch->inw_wd;
+ }
+
+ if (!removal) {
+ mutex_enter(&state->ins_lock);
+
+ if (watch->inw_zombie ||
+ watch->inw_fired || !watch->inw_active) {
+ mutex_exit(&state->ins_lock);
+ return;
+ }
+ } else {
+ if (!watch->inw_active)
+ return;
+
+ VERIFY(MUTEX_HELD(&state->ins_lock));
+ }
+
+ /*
+ * If this is an operation on a directory and it's a child event
+ * (event if it's not on a child), we specify IN_ISDIR.
+ */
+ if (source->inw_vp->v_type == VDIR && (mask & IN_CHILD_EVENTS))
+ mask |= IN_ISDIR;
+
+ if (mask & (IN_MOVED_FROM | IN_MOVED_TO))
+ cookie = (uint32_t)curthread->t_did;
+
+ if (state->ins_nevents >= state->ins_maxevents) {
+ /*
+ * We're at our maximum number of events -- turn our event
+ * into an IN_Q_OVERFLOW event, which will be coalesced if
+ * it's already the tail event.
+ */
+ mask = IN_Q_OVERFLOW;
+ wd = (uint32_t)-1;
+ cookie = 0;
+ len = 0;
+ }
+
+ if ((tail = state->ins_tail) != NULL && tail->ine_event.wd == wd &&
+ tail->ine_event.mask == mask && tail->ine_event.cookie == cookie &&
+ ((tail->ine_event.len == 0 && len == 0) ||
+ (name != NULL && tail->ine_event.len != 0 &&
+ strcmp(tail->ine_event.name, name) == 0))) {
+ /*
+ * This is an implicitly coalesced event; we're done.
+ */
+ if (!removal)
+ mutex_exit(&state->ins_lock);
+ return;
+ }
+
+ if (name != NULL) {
+ len = strlen(name) + 1;
+ len = roundup(len, sizeof (struct inotify_event));
+ } else {
+ len = 0;
+ }
+
+ event = kmem_zalloc(sizeof (inotify_kevent_t) + len, KM_SLEEP);
+ event->ine_event.wd = wd;
+ event->ine_event.mask = (uint32_t)mask;
+ event->ine_event.cookie = cookie;
+ event->ine_event.len = len;
+
+ if (name != NULL)
+ strcpy(event->ine_event.name, name);
+
+ if (tail != NULL) {
+ tail->ine_next = event;
+ } else {
+ VERIFY(state->ins_head == NULL);
+ state->ins_head = event;
+ cv_broadcast(&state->ins_cv);
+ }
+
+ state->ins_tail = event;
+ state->ins_nevents++;
+ state->ins_size += sizeof (event->ine_event) + len;
+
+ if (removal)
+ return;
+
+ if ((watch->inw_mask & IN_ONESHOT) && !watch->inw_fired) {
+ /*
+ * If this is a one-shot, we need to remove the watch. (Note
+ * that this will recurse back into inotify_watch_event() to
+ * fire the IN_IGNORED event -- but with "removal" set.)
+ */
+ watch->inw_fired = 1;
+ inotify_watch_remove(state, watch);
+ }
+
+ mutex_exit(&state->ins_lock);
+ pollwakeup(&state->ins_pollhd, POLLRDNORM | POLLIN);
+}
+
+/*
+ * Destroy a watch. By the time we're in here, the watch must have exactly
+ * one reference.
+ */
+static void
+inotify_watch_destroy(inotify_watch_t *watch)
+{
+ VERIFY(MUTEX_HELD(&watch->inw_lock));
+
+ if (watch->inw_name != NULL)
+ kmem_free(watch->inw_name, strlen(watch->inw_name) + 1);
+
+ kmem_free(watch, sizeof (inotify_watch_t));
+}
+
+/*
+ * Zombify a watch. By the time we come in here, it must be true that the
+ * watch has already been fem_uninstall()'d -- the only reference should be
+ * in the state's data structure. If we can get away with freeing it, we'll
+ * do that -- but if the reference count is greater than one due to an active
+ * vnode operation, we'll put this watch on the zombie list on the state
+ * structure.
+ */
+static void
+inotify_watch_zombify(inotify_watch_t *watch)
+{
+ inotify_state_t *state = watch->inw_state;
+
+ VERIFY(MUTEX_HELD(&state->ins_lock));
+ VERIFY(!watch->inw_zombie);
+
+ watch->inw_zombie = 1;
+
+ if (watch->inw_parent != NULL) {
+ inotify_watch_release(watch->inw_parent);
+ } else {
+ avl_remove(&state->ins_byvp, watch);
+ avl_remove(&state->ins_bywd, watch);
+ vmem_free(state->ins_wds, (void *)(uintptr_t)watch->inw_wd, 1);
+ watch->inw_wd = -1;
+ }
+
+ mutex_enter(&watch->inw_lock);
+
+ if (watch->inw_refcnt == 1) {
+ /*
+ * There are no operations in flight and there is no way
+ * for anyone to discover this watch -- we can destroy it.
+ */
+ inotify_watch_destroy(watch);
+ } else {
+ /*
+ * There are operations in flight; we will need to enqueue
+ * this for later destruction.
+ */
+ watch->inw_parent = state->ins_zombies;
+ state->ins_zombies = watch;
+ mutex_exit(&watch->inw_lock);
+ }
+}
+
+static inotify_watch_t *
+inotify_watch_add(inotify_state_t *state, inotify_watch_t *parent,
+ const char *name, vnode_t *vp, uint32_t mask)
+{
+ inotify_watch_t *watch;
+ int err;
+
+ VERIFY(MUTEX_HELD(&state->ins_lock));
+
+ watch = kmem_zalloc(sizeof (inotify_watch_t), KM_SLEEP);
+
+ watch->inw_vp = vp;
+ watch->inw_mask = mask;
+ watch->inw_state = state;
+ watch->inw_refcnt = 1;
+
+ if (parent == NULL) {
+ watch->inw_wd = (int)(uintptr_t)vmem_alloc(state->ins_wds,
+ 1, VM_BESTFIT | VM_SLEEP);
+ avl_add(&state->ins_byvp, watch);
+ avl_add(&state->ins_bywd, watch);
+
+ avl_create(&watch->inw_children,
+ (int(*)(const void *, const void *))inotify_watch_cmpvp,
+ sizeof (inotify_watch_t),
+ offsetof(inotify_watch_t, inw_byvp));
+ } else {
+ VERIFY(name != NULL);
+ inotify_watch_hold(parent);
+ watch->inw_mask &= IN_CHILD_EVENTS;
+ watch->inw_parent = parent;
+ watch->inw_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
+ strcpy(watch->inw_name, name);
+
+ avl_add(&parent->inw_children, watch);
+ }
+
+ /*
+ * Add our monitor to the vnode. We must not have the watch lock held
+ * when we do this, as it will immediately hold our watch.
+ */
+ err = fem_install(vp, inotify_femp, watch, OPARGUNIQ,
+ (void (*)(void *))inotify_watch_hold,
+ (void (*)(void *))inotify_watch_release);
+
+ VERIFY(err == 0);
+
+ return (watch);
+}
+
+/*
+ * Remove a (non-child) watch. This is called from either synchronous context
+ * via inotify_rm_watch() or monitor context via either a vnevent or a
+ * one-shot.
+ */
+static void
+inotify_watch_remove(inotify_state_t *state, inotify_watch_t *watch)
+{
+ inotify_watch_t *child;
+ int err;
+
+ VERIFY(MUTEX_HELD(&state->ins_lock));
+ VERIFY(watch->inw_parent == NULL);
+
+ err = fem_uninstall(watch->inw_vp, inotify_femp, watch);
+ VERIFY(err == 0);
+
+ /*
+ * If we have children, we're going to remove them all and set them
+ * all to be zombies.
+ */
+ while ((child = avl_first(&watch->inw_children)) != NULL) {
+ VERIFY(child->inw_parent == watch);
+ avl_remove(&watch->inw_children, child);
+
+ err = fem_uninstall(child->inw_vp, inotify_femp, child);
+ VERIFY(err == 0);
+
+ /*
+ * If this child watch has been orphaned, remove it from the
+ * state's list of orphans.
+ */
+ if (child->inw_orphaned) {
+ list_remove(&state->ins_orphans, child);
+ crfree(child->inw_cred);
+ }
+
+ VN_RELE(child->inw_vp);
+
+ /*
+ * We're down (or should be down) to a single reference to
+ * this child watch; it's safe to zombify it.
+ */
+ inotify_watch_zombify(child);
+ }
+
+ inotify_watch_event(watch, IN_IGNORED | IN_REMOVAL, NULL);
+ VN_RELE(watch->inw_vp);
+
+ /*
+ * It's now safe to zombify the watch -- we know that the only reference
+ * can come from operations in flight.
+ */
+ inotify_watch_zombify(watch);
+}
+
+/*
+ * Delete a watch. Should only be called from VOP context.
+ */
+static void
+inotify_watch_delete(inotify_watch_t *watch, uint32_t event)
+{
+ inotify_state_t *state = watch->inw_state;
+ inotify_watch_t cmp = { .inw_vp = watch->inw_vp }, *parent;
+ int err;
+
+ if (event != IN_DELETE_SELF && !(watch->inw_mask & IN_CHILD_EVENTS))
+ return;
+
+ mutex_enter(&state->ins_lock);
+
+ if (watch->inw_zombie) {
+ mutex_exit(&state->ins_lock);
+ return;
+ }
+
+ if ((parent = watch->inw_parent) == NULL) {
+ if (event == IN_DELETE_SELF) {
+ /*
+ * If we're here because we're being deleted and we
+ * are not a child watch, we need to delete the entire
+ * watch, children and all.
+ */
+ inotify_watch_remove(state, watch);
+ }
+
+ mutex_exit(&state->ins_lock);
+ return;
+ } else {
+ if (event == IN_DELETE_SELF &&
+ !(parent->inw_mask & IN_EXCL_UNLINK)) {
+ /*
+ * This is a child watch for a file that is being
+ * removed and IN_EXCL_UNLINK has not been specified;
+ * indicate that it is orphaned and add it to the list
+ * of orphans. (This list will be checked by the
+ * cleaning cyclic to determine when the watch has
+ * become the only hold on the vnode, at which point
+ * the watch can be zombified.) Note that we check
+ * if the watch is orphaned before we orphan it: hard
+ * links make it possible for VE_REMOVE to be called
+ * multiple times on the same vnode. (!)
+ */
+ if (!watch->inw_orphaned) {
+ watch->inw_orphaned = 1;
+ watch->inw_cred = CRED();
+ crhold(watch->inw_cred);
+ list_insert_head(&state->ins_orphans, watch);
+ }
+
+ mutex_exit(&state->ins_lock);
+ return;
+ }
+
+ if (watch->inw_orphaned) {
+ /*
+ * If we're here, a file was orphaned and then later
+ * moved -- which almost certainly means that hard
+ * links are on the scene. We choose the orphan over
+ * the move because we don't want to spuriously
+ * drop events if we can avoid it.
+ */
+ crfree(watch->inw_cred);
+ list_remove(&state->ins_orphans, watch);
+ }
+ }
+
+ if (avl_find(&parent->inw_children, &cmp, NULL) == NULL) {
+ /*
+ * This watch has already been deleted from the parent.
+ */
+ mutex_exit(&state->ins_lock);
+ return;
+ }
+
+ avl_remove(&parent->inw_children, watch);
+ err = fem_uninstall(watch->inw_vp, inotify_femp, watch);
+ VERIFY(err == 0);
+
+ VN_RELE(watch->inw_vp);
+
+ /*
+ * It's now safe to zombify the watch -- which won't actually delete
+ * it as we know that the reference count is greater than 1.
+ */
+ inotify_watch_zombify(watch);
+ mutex_exit(&state->ins_lock);
+}
+
+/*
+ * Insert a new child watch. Should only be called from VOP context when
+ * a child is created in a watched directory.
+ */
+static void
+inotify_watch_insert(inotify_watch_t *watch, vnode_t *vp, char *name)
+{
+ inotify_state_t *state = watch->inw_state;
+ inotify_watch_t cmp = { .inw_vp = vp };
+
+ if (!(watch->inw_mask & IN_CHILD_EVENTS))
+ return;
+
+ mutex_enter(&state->ins_lock);
+
+ if (watch->inw_zombie || watch->inw_parent != NULL || vp == NULL) {
+ mutex_exit(&state->ins_lock);
+ return;
+ }
+
+ if (avl_find(&watch->inw_children, &cmp, NULL) != NULL) {
+ mutex_exit(&state->ins_lock);
+ return;
+ }
+
+ VN_HOLD(vp);
+ watch = inotify_watch_add(state, watch, name, vp, watch->inw_mask);
+ VERIFY(watch != NULL);
+
+ mutex_exit(&state->ins_lock);
+}
+
+
+static int
+inotify_add_watch(inotify_state_t *state, vnode_t *vp, uint32_t mask,
+ int32_t *wdp)
+{
+ inotify_watch_t *watch, cmp = { .inw_vp = vp };
+ uint32_t set;
+
+ set = (mask & (IN_ALL_EVENTS | IN_MODIFIERS)) | IN_UNMASKABLE;
+
+ /*
+ * Lookup our vnode to determine if we already have a watch on it.
+ */
+ mutex_enter(&state->ins_lock);
+
+ if ((watch = avl_find(&state->ins_byvp, &cmp, NULL)) == NULL) {
+ /*
+ * We don't have this watch; allocate a new one, provided that
+ * we have fewer than our limit.
+ */
+ if (avl_numnodes(&state->ins_bywd) >= state->ins_maxwatches) {
+ mutex_exit(&state->ins_lock);
+ return (ENOSPC);
+ }
+
+ VN_HOLD(vp);
+ watch = inotify_watch_add(state, NULL, NULL, vp, set);
+ *wdp = watch->inw_wd;
+ mutex_exit(&state->ins_lock);
+
+ return (0);
+ }
+
+ VERIFY(!watch->inw_zombie);
+
+ if (!(mask & IN_MASK_ADD)) {
+ /*
+ * Note that if we're resetting our event mask and we're
+ * transitioning from an event mask that includes child events
+ * to one that doesn't, there will be potentially some stale
+ * child watches. This is basically fine: they won't fire,
+ * and they will correctly be removed when the watch is
+ * removed.
+ */
+ watch->inw_mask = 0;
+ }
+
+ watch->inw_mask |= set;
+
+ *wdp = watch->inw_wd;
+
+ mutex_exit(&state->ins_lock);
+
+ return (0);
+}
+
+static int
+inotify_add_child(inotify_state_t *state, vnode_t *vp, char *name)
+{
+ inotify_watch_t *watch, cmp = { .inw_vp = vp };
+ vnode_t *cvp;
+ int err;
+
+ /*
+ * Verify that the specified child doesn't have a directory component
+ * within it.
+ */
+ if (strchr(name, '/') != NULL)
+ return (EINVAL);
+
+ /*
+ * Lookup the underlying file. Note that this will succeed even if
+ * we don't have permissions to actually read the file.
+ */
+ if ((err = lookupnameat(name,
+ UIO_SYSSPACE, NO_FOLLOW, NULL, &cvp, vp)) != 0) {
+ return (err);
+ }
+
+ /*
+ * Use our vnode to find our watch, and then add our child watch to it.
+ */
+ mutex_enter(&state->ins_lock);
+
+ if ((watch = avl_find(&state->ins_byvp, &cmp, NULL)) == NULL) {
+ /*
+ * This is unexpected -- it means that we don't have the
+ * watch that we thought we had.
+ */
+ mutex_exit(&state->ins_lock);
+ VN_RELE(cvp);
+ return (ENXIO);
+ }
+
+ /*
+ * Now lookup the child vnode in the watch; we'll only add it if it
+ * isn't already there.
+ */
+ cmp.inw_vp = cvp;
+
+ if (avl_find(&watch->inw_children, &cmp, NULL) != NULL) {
+ mutex_exit(&state->ins_lock);
+ VN_RELE(cvp);
+ return (0);
+ }
+
+ watch = inotify_watch_add(state, watch, name, cvp, watch->inw_mask);
+ VERIFY(watch != NULL);
+ mutex_exit(&state->ins_lock);
+
+ return (0);
+}
+
+static int
+inotify_rm_watch(inotify_state_t *state, int32_t wd)
+{
+ inotify_watch_t *watch, cmp = { .inw_wd = wd };
+
+ mutex_enter(&state->ins_lock);
+
+ if ((watch = avl_find(&state->ins_bywd, &cmp, NULL)) == NULL) {
+ mutex_exit(&state->ins_lock);
+ return (EINVAL);
+ }
+
+ inotify_watch_remove(state, watch);
+ mutex_exit(&state->ins_lock);
+
+ return (0);
+}
+
+static int
+inotify_activate(inotify_state_t *state, int32_t wd)
+{
+ inotify_watch_t *watch, cmp = { .inw_wd = wd };
+
+ mutex_enter(&state->ins_lock);
+
+ if ((watch = avl_find(&state->ins_bywd, &cmp, NULL)) == NULL) {
+ mutex_exit(&state->ins_lock);
+ return (EINVAL);
+ }
+
+ watch->inw_active = 1;
+
+ mutex_exit(&state->ins_lock);
+
+ return (0);
+}
+
+/*
+ * Called periodically as a cyclic to process the orphans and zombies.
+ */
+static void
+inotify_clean(void *arg)
+{
+ inotify_state_t *state = arg;
+ inotify_watch_t *watch, *parent, *next, **prev;
+ cred_t *savecred;
+ int err;
+
+ mutex_enter(&state->ins_lock);
+
+ for (watch = list_head(&state->ins_orphans);
+ watch != NULL; watch = next) {
+ next = list_next(&state->ins_orphans, watch);
+
+ VERIFY(!watch->inw_zombie);
+ VERIFY((parent = watch->inw_parent) != NULL);
+
+ if (watch->inw_vp->v_count > 1)
+ continue;
+
+ avl_remove(&parent->inw_children, watch);
+ err = fem_uninstall(watch->inw_vp, inotify_femp, watch);
+ VERIFY(err == 0);
+
+ list_remove(&state->ins_orphans, watch);
+
+ /*
+ * For purposes of releasing the vnode, we need to switch our
+ * cred to be the cred of the orphaning thread (which we held
+ * at the time this watch was orphaned).
+ */
+ savecred = curthread->t_cred;
+ curthread->t_cred = watch->inw_cred;
+ VN_RELE(watch->inw_vp);
+ crfree(watch->inw_cred);
+ curthread->t_cred = savecred;
+
+ inotify_watch_zombify(watch);
+ }
+
+ prev = &state->ins_zombies;
+
+ while ((watch = *prev) != NULL) {
+ mutex_enter(&watch->inw_lock);
+
+ if (watch->inw_refcnt == 1) {
+ *prev = watch->inw_parent;
+ inotify_watch_destroy(watch);
+ continue;
+ }
+
+ prev = &watch->inw_parent;
+ mutex_exit(&watch->inw_lock);
+ }
+
+ mutex_exit(&state->ins_lock);
+}
+
+/*ARGSUSED*/
+static int
+inotify_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
+{
+ inotify_state_t *state;
+ major_t major = getemajor(*devp);
+ minor_t minor = getminor(*devp);
+ int instances = 0;
+ char c[64];
+
+ if (minor != INOTIFYMNRN_INOTIFY)
+ return (ENXIO);
+
+ mutex_enter(&inotify_lock);
+
+ for (state = inotify_state; state != NULL; state = state->ins_next) {
+ if (state->ins_cred == cred_p)
+ instances++;
+ }
+
+ if (instances >= inotify_maxinstances) {
+ mutex_exit(&inotify_lock);
+ return (EMFILE);
+ }
+
+ minor = (minor_t)(uintptr_t)vmem_alloc(inotify_minor, 1,
+ VM_BESTFIT | VM_SLEEP);
+
+ if (ddi_soft_state_zalloc(inotify_softstate, minor) != DDI_SUCCESS) {
+ vmem_free(inotify_minor, (void *)(uintptr_t)minor, 1);
+ mutex_exit(&inotify_lock);
+ return (NULL);
+ }
+
+ state = ddi_get_soft_state(inotify_softstate, minor);
+ *devp = makedevice(major, minor);
+
+ crhold(cred_p);
+ state->ins_cred = cred_p;
+ state->ins_next = inotify_state;
+ inotify_state = state;
+
+ (void) snprintf(c, sizeof (c), "inotify_watchid_%d", minor);
+ state->ins_wds = vmem_create(c, (void *)1, UINT32_MAX, 1,
+ NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
+
+ avl_create(&state->ins_bywd,
+ (int(*)(const void *, const void *))inotify_watch_cmpwd,
+ sizeof (inotify_watch_t),
+ offsetof(inotify_watch_t, inw_bywd));
+
+ avl_create(&state->ins_byvp,
+ (int(*)(const void *, const void *))inotify_watch_cmpvp,
+ sizeof (inotify_watch_t),
+ offsetof(inotify_watch_t, inw_byvp));
+
+ list_create(&state->ins_orphans, sizeof (inotify_watch_t),
+ offsetof(inotify_watch_t, inw_orphan));
+
+ state->ins_maxwatches = inotify_maxwatches;
+ state->ins_maxevents = inotify_maxevents;
+
+ mutex_exit(&inotify_lock);
+
+ state->ins_cleaner = ddi_periodic_add(inotify_clean,
+ state, NANOSEC, DDI_IPL_0);
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+inotify_read(dev_t dev, uio_t *uio, cred_t *cr)
+{
+ inotify_state_t *state;
+ inotify_kevent_t *event;
+ minor_t minor = getminor(dev);
+ int err = 0, nevents = 0;
+ size_t len;
+
+ state = ddi_get_soft_state(inotify_softstate, minor);
+
+ mutex_enter(&state->ins_lock);
+
+ while (state->ins_head == NULL) {
+ if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) {
+ mutex_exit(&state->ins_lock);
+ return (EAGAIN);
+ }
+
+ if (!cv_wait_sig_swap(&state->ins_cv, &state->ins_lock)) {
+ mutex_exit(&state->ins_lock);
+ return (EINTR);
+ }
+ }
+
+ /*
+ * We have events and we have our lock; return as many as we can.
+ */
+ while ((event = state->ins_head) != NULL) {
+ len = sizeof (event->ine_event) + event->ine_event.len;
+
+ if (uio->uio_resid < len) {
+ if (nevents == 0)
+ err = EINVAL;
+ break;
+ }
+
+ nevents++;
+
+ if ((err = uiomove(&event->ine_event, len, UIO_READ, uio)) != 0)
+ break;
+
+ VERIFY(state->ins_nevents > 0);
+ state->ins_nevents--;
+
+ VERIFY(state->ins_size > 0);
+ state->ins_size -= len;
+
+ if ((state->ins_head = event->ine_next) == NULL) {
+ VERIFY(event == state->ins_tail);
+ VERIFY(state->ins_nevents == 0);
+ state->ins_tail = NULL;
+ }
+
+ kmem_free(event, INOTIFY_EVENT_LENGTH(event));
+ }
+
+ mutex_exit(&state->ins_lock);
+
+ return (err);
+}
+
+/*ARGSUSED*/
+static int
+inotify_poll(dev_t dev, short events, int anyyet, short *reventsp,
+ struct pollhead **phpp)
+{
+ inotify_state_t *state;
+ minor_t minor = getminor(dev);
+
+ state = ddi_get_soft_state(inotify_softstate, minor);
+
+ mutex_enter(&state->ins_lock);
+
+ if (state->ins_head != NULL) {
+ *reventsp = events & (POLLRDNORM | POLLIN);
+ } else {
+ *reventsp = 0;
+
+ if (!anyyet)
+ *phpp = &state->ins_pollhd;
+ }
+
+ mutex_exit(&state->ins_lock);
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+inotify_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
+{
+ inotify_state_t *state;
+ minor_t minor = getminor(dev);
+ file_t *fp;
+ int rval;
+
+ state = ddi_get_soft_state(inotify_softstate, minor);
+
+ switch (cmd) {
+ case INOTIFYIOC_ADD_WATCH: {
+ inotify_addwatch_t addwatch;
+ file_t *fp;
+
+ if (copyin((void *)arg, &addwatch, sizeof (addwatch)) != 0)
+ return (EFAULT);
+
+ if ((fp = getf(addwatch.inaw_fd)) == NULL)
+ return (EBADF);
+
+ rval = inotify_add_watch(state, fp->f_vnode,
+ addwatch.inaw_mask, rv);
+
+ releasef(addwatch.inaw_fd);
+ return (rval);
+ }
+
+ case INOTIFYIOC_ADD_CHILD: {
+ inotify_addchild_t addchild;
+ char name[MAXPATHLEN];
+
+ if (copyin((void *)arg, &addchild, sizeof (addchild)) != 0)
+ return (EFAULT);
+
+ if (copyinstr(addchild.inac_name, name, MAXPATHLEN, NULL) != 0)
+ return (EFAULT);
+
+ if ((fp = getf(addchild.inac_fd)) == NULL)
+ return (EBADF);
+
+ rval = inotify_add_child(state, fp->f_vnode, name);
+
+ releasef(addchild.inac_fd);
+ return (rval);
+ }
+
+ case INOTIFYIOC_RM_WATCH:
+ return (inotify_rm_watch(state, arg));
+
+ case INOTIFYIOC_ACTIVATE:
+ return (inotify_activate(state, arg));
+
+ case FIONREAD: {
+ int32_t size;
+
+ mutex_enter(&state->ins_lock);
+ size = state->ins_size;
+ mutex_exit(&state->ins_lock);
+
+ if (copyout(&size, (void *)arg, sizeof (size)) != 0)
+ return (EFAULT);
+
+ return (0);
+ }
+
+ default:
+ break;
+ }
+
+ return (ENOTTY);
+}
+
+/*ARGSUSED*/
+static int
+inotify_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
+{
+ inotify_state_t *state, **sp;
+ inotify_watch_t *watch, *zombies;
+ inotify_kevent_t *event;
+ minor_t minor = getminor(dev);
+
+ state = ddi_get_soft_state(inotify_softstate, minor);
+
+ if (state->ins_pollhd.ph_list != NULL) {
+ pollwakeup(&state->ins_pollhd, POLLERR);
+ pollhead_clean(&state->ins_pollhd);
+ }
+
+ mutex_enter(&state->ins_lock);
+
+ /*
+ * First, destroy all of our watches.
+ */
+ while ((watch = avl_first(&state->ins_bywd)) != NULL)
+ inotify_watch_remove(state, watch);
+
+ /*
+ * And now destroy our event queue.
+ */
+ while ((event = state->ins_head) != NULL) {
+ state->ins_head = event->ine_next;
+ kmem_free(event, INOTIFY_EVENT_LENGTH(event));
+ }
+
+ zombies = state->ins_zombies;
+ state->ins_zombies = NULL;
+ mutex_exit(&state->ins_lock);
+
+ /*
+ * Now that our state lock is dropped, we can synchronously wait on
+ * any zombies.
+ */
+ while ((watch = zombies) != NULL) {
+ zombies = zombies->inw_parent;
+
+ mutex_enter(&watch->inw_lock);
+
+ while (watch->inw_refcnt > 1)
+ cv_wait(&watch->inw_cv, &watch->inw_lock);
+
+ inotify_watch_destroy(watch);
+ }
+
+ if (state->ins_cleaner != NULL) {
+ ddi_periodic_delete(state->ins_cleaner);
+ state->ins_cleaner = NULL;
+ }
+
+ mutex_enter(&inotify_lock);
+
+ /*
+ * Remove our state from our global list, and release our hold on
+ * the cred.
+ */
+ for (sp = &inotify_state; *sp != state; sp = &((*sp)->ins_next))
+ VERIFY(*sp != NULL);
+
+ *sp = (*sp)->ins_next;
+ crfree(state->ins_cred);
+ vmem_destroy(state->ins_wds);
+
+ ddi_soft_state_free(inotify_softstate, minor);
+ vmem_free(inotify_minor, (void *)(uintptr_t)minor, 1);
+
+ mutex_exit(&inotify_lock);
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+inotify_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
+{
+ mutex_enter(&inotify_lock);
+
+ if (ddi_soft_state_init(&inotify_softstate,
+ sizeof (inotify_state_t), 0) != 0) {
+ cmn_err(CE_NOTE, "/dev/inotify failed to create soft state");
+ mutex_exit(&inotify_lock);
+ return (DDI_FAILURE);
+ }
+
+ if (ddi_create_minor_node(devi, "inotify", S_IFCHR,
+ INOTIFYMNRN_INOTIFY, DDI_PSEUDO, NULL) == DDI_FAILURE) {
+ cmn_err(CE_NOTE, "/dev/inotify couldn't create minor node");
+ ddi_soft_state_fini(&inotify_softstate);
+ mutex_exit(&inotify_lock);
+ return (DDI_FAILURE);
+ }
+
+ if (fem_create("inotify_fem",
+ inotify_vnodesrc_template, &inotify_femp) != 0) {
+ cmn_err(CE_NOTE, "/dev/inotify couldn't create FEM state");
+ ddi_remove_minor_node(devi, NULL);
+ ddi_soft_state_fini(&inotify_softstate);
+ mutex_exit(&inotify_lock);
+ return (DDI_FAILURE);
+ }
+
+ ddi_report_dev(devi);
+ inotify_devi = devi;
+
+ inotify_minor = vmem_create("inotify_minor", (void *)INOTIFYMNRN_CLONE,
+ UINT32_MAX - INOTIFYMNRN_CLONE, 1, NULL, NULL, NULL, 0,
+ VM_SLEEP | VMC_IDENTIFIER);
+
+ mutex_exit(&inotify_lock);
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+inotify_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ switch (cmd) {
+ case DDI_DETACH:
+ break;
+
+ case DDI_SUSPEND:
+ return (DDI_SUCCESS);
+
+ default:
+ return (DDI_FAILURE);
+ }
+
+ mutex_enter(&inotify_lock);
+ fem_free(inotify_femp);
+ vmem_destroy(inotify_minor);
+
+ ddi_remove_minor_node(inotify_devi, NULL);
+ inotify_devi = NULL;
+
+ ddi_soft_state_fini(&inotify_softstate);
+ mutex_exit(&inotify_lock);
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+inotify_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
+{
+ int error;
+
+ switch (infocmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ *result = (void *)inotify_devi;
+ error = DDI_SUCCESS;
+ break;
+ case DDI_INFO_DEVT2INSTANCE:
+ *result = (void *)0;
+ error = DDI_SUCCESS;
+ break;
+ default:
+ error = DDI_FAILURE;
+ }
+ return (error);
+}
+
+static struct cb_ops inotify_cb_ops = {
+ inotify_open, /* open */
+ inotify_close, /* close */
+ nulldev, /* strategy */
+ nulldev, /* print */
+ nodev, /* dump */
+ inotify_read, /* read */
+ nodev, /* write */
+ inotify_ioctl, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ inotify_poll, /* poll */
+ ddi_prop_op, /* cb_prop_op */
+ 0, /* streamtab */
+ D_NEW | D_MP /* Driver compatibility flag */
+};
+
+static struct dev_ops inotify_ops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* refcnt */
+ inotify_info, /* get_dev_info */
+ nulldev, /* identify */
+ nulldev, /* probe */
+ inotify_attach, /* attach */
+ inotify_detach, /* detach */
+ nodev, /* reset */
+ &inotify_cb_ops, /* driver operations */
+ NULL, /* bus operations */
+ nodev, /* dev power */
+ ddi_quiesce_not_needed, /* quiesce */
+};
+
+static struct modldrv modldrv = {
+ &mod_driverops, /* module type (this is a pseudo driver) */
+ "inotify support", /* name of module */
+ &inotify_ops, /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ (void *)&modldrv,
+ NULL
+};
+
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ return (mod_remove(&modlinkage));
+}
diff --git a/usr/src/uts/common/io/inotify.conf b/usr/src/uts/common/io/inotify.conf
new file mode 100644
index 0000000000..ce9da6180f
--- /dev/null
+++ b/usr/src/uts/common/io/inotify.conf
@@ -0,0 +1,16 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+name="inotify" parent="pseudo" instance=0;
diff --git a/usr/src/uts/common/io/ixgbe/ixgbe_main.c b/usr/src/uts/common/io/ixgbe/ixgbe_main.c
index 848e3470c7..c29a762e06 100644
--- a/usr/src/uts/common/io/ixgbe/ixgbe_main.c
+++ b/usr/src/uts/common/io/ixgbe/ixgbe_main.c
@@ -271,7 +271,7 @@ static adapter_info_t ixgbe_82599eb_cap = {
128, /* default number of rx queues */
64, /* maximum number of rx groups */
1, /* minimum number of rx groups */
- 1, /* default number of rx groups */
+ 32, /* default number of rx groups */
128, /* maximum number of tx queues */
1, /* minimum number of tx queues */
8, /* default number of tx queues */
@@ -302,7 +302,7 @@ static adapter_info_t ixgbe_X540_cap = {
128, /* default number of rx queues */
64, /* maximum number of rx groups */
1, /* minimum number of rx groups */
- 1, /* default number of rx groups */
+ 32, /* default number of rx groups */
128, /* maximum number of tx queues */
1, /* minimum number of tx queues */
8, /* default number of tx queues */
@@ -1792,6 +1792,7 @@ ixgbe_cbfunc(dev_info_t *dip, ddi_cb_action_t cbaction, void *cbarg,
void *arg1, void *arg2)
{
ixgbe_t *ixgbe = (ixgbe_t *)arg1;
+ int prev = ixgbe->intr_cnt;
switch (cbaction) {
/* IRM callback */
@@ -1805,7 +1806,8 @@ ixgbe_cbfunc(dev_info_t *dip, ddi_cb_action_t cbaction, void *cbarg,
if (ixgbe_intr_adjust(ixgbe, cbaction, count) !=
DDI_SUCCESS) {
ixgbe_error(ixgbe,
- "IRM CB: Failed to adjust interrupts");
+ "IRM CB: Failed to adjust interrupts [%d %d %d]",
+ cbaction, count, prev);
goto cb_fail;
}
break;
diff --git a/usr/src/uts/common/io/ksocket/ksocket.c b/usr/src/uts/common/io/ksocket/ksocket.c
index 8944fcbff3..25da45be39 100644
--- a/usr/src/uts/common/io/ksocket/ksocket.c
+++ b/usr/src/uts/common/io/ksocket/ksocket.c
@@ -22,7 +22,7 @@
/*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#include <sys/file.h>
@@ -932,3 +932,15 @@ ksocket_rele(ksocket_t ks)
cv_signal(&so->so_closing_cv);
}
}
+
+int
+ksocket_krecv_set(ksocket_t ks, ksocket_krecv_f cb, void *arg)
+{
+ return (so_krecv_set(KSTOSO(ks), (so_krecv_f)cb, arg));
+}
+
+void
+ksocket_krecv_unblock(ksocket_t ks)
+{
+ return (so_krecv_unblock(KSTOSO(ks)));
+}
diff --git a/usr/src/uts/common/io/ksocket/ksocket_impl.h b/usr/src/uts/common/io/ksocket/ksocket_impl.h
index ac5251540f..516a68d358 100644
--- a/usr/src/uts/common/io/ksocket/ksocket_impl.h
+++ b/usr/src/uts/common/io/ksocket/ksocket_impl.h
@@ -22,11 +22,17 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015, Joyent, Inc.
*/
#ifndef _INET_KSOCKET_KSOCKET_IMPL_H
#define _INET_KSOCKET_KSOCKET_IMPL_H
+/*
+ * Note that if this relationship ever changes, the logic in ksocket_krecv_set
+ * must be updated and we must maintain local state about this on whatever the
+ * new ksocket object is.
+ */
#define KSTOSO(ks) ((struct sonode *)(ks))
#define SOTOKS(so) ((ksocket_t)(uintptr_t)(so))
diff --git a/usr/src/uts/common/io/ksyms.c b/usr/src/uts/common/io/ksyms.c
index c9f0c63b69..5233fcd0b4 100644
--- a/usr/src/uts/common/io/ksyms.c
+++ b/usr/src/uts/common/io/ksyms.c
@@ -21,6 +21,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
*/
@@ -219,6 +220,14 @@ ksyms_open(dev_t *devp, int flag, int otyp, struct cred *cred)
char *addr;
void *hptr = NULL;
ksyms_buflist_hdr_t hdr;
+
+ /*
+ * This device should never be visible in a zone, but if it somehow
+ * does get created we refuse to allow the zone to use it.
+ */
+ if (crgetzoneid(cred) != GLOBAL_ZONEID)
+ return (EACCES);
+
bzero(&hdr, sizeof (struct ksyms_buflist_hdr));
list_create(&hdr.blist, PAGESIZE,
offsetof(ksyms_buflist_t, buflist_node));
diff --git a/usr/src/uts/common/io/mac/mac.c b/usr/src/uts/common/io/mac/mac.c
index 1d30dc3478..1bf49a5b44 100644
--- a/usr/src/uts/common/io/mac/mac.c
+++ b/usr/src/uts/common/io/mac/mac.c
@@ -3141,6 +3141,9 @@ mac_prop_check_size(mac_prop_id_t id, uint_t valsize, boolean_t is_range)
case MAC_PROP_WL_MLME:
minsize = sizeof (wl_mlme_t);
break;
+ case MAC_PROP_VN_PROMISC_FILTERED:
+ minsize = sizeof (boolean_t);
+ break;
}
return (valsize >= minsize);
diff --git a/usr/src/uts/common/io/mac/mac_client.c b/usr/src/uts/common/io/mac/mac_client.c
index 8f0ec9eb67..18a6613424 100644
--- a/usr/src/uts/common/io/mac/mac_client.c
+++ b/usr/src/uts/common/io/mac/mac_client.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
/*
@@ -3263,6 +3263,11 @@ mac_promisc_add(mac_client_handle_t mch, mac_client_promisc_type_t type,
mac_cb_info_t *mcbi;
int rc;
+ if ((flags & MAC_PROMISC_FLAGS_NO_COPY) &&
+ (flags & MAC_PROMISC_FLAGS_DO_FIXUPS)) {
+ return (EINVAL);
+ }
+
i_mac_perim_enter(mip);
if ((rc = mac_start((mac_handle_t)mip)) != 0) {
@@ -3271,7 +3276,8 @@ mac_promisc_add(mac_client_handle_t mch, mac_client_promisc_type_t type,
}
if ((mcip->mci_state_flags & MCIS_IS_VNIC) &&
- type == MAC_CLIENT_PROMISC_ALL) {
+ type == MAC_CLIENT_PROMISC_ALL &&
+ (mcip->mci_protect_flags & MPT_FLAG_PROMISC_FILTERED)) {
/*
* The function is being invoked by the upper MAC client
* of a VNIC. The VNIC should only see the traffic
@@ -3308,6 +3314,7 @@ mac_promisc_add(mac_client_handle_t mch, mac_client_promisc_type_t type,
mpip->mpi_strip_vlan_tag =
((flags & MAC_PROMISC_FLAGS_VLAN_TAG_STRIP) != 0);
mpip->mpi_no_copy = ((flags & MAC_PROMISC_FLAGS_NO_COPY) != 0);
+ mpip->mpi_do_fixups = ((flags & MAC_PROMISC_FLAGS_DO_FIXUPS) != 0);
mcbi = &mip->mi_promisc_cb_info;
mutex_enter(mcbi->mcbi_lockp);
@@ -3944,15 +3951,22 @@ mac_client_get_effective_resources(mac_client_handle_t mch,
static void
mac_promisc_dispatch_one(mac_promisc_impl_t *mpip, mblk_t *mp,
- boolean_t loopback)
+ boolean_t loopback, boolean_t local)
{
mblk_t *mp_copy, *mp_next;
- if (!mpip->mpi_no_copy || mpip->mpi_strip_vlan_tag) {
+ if (!mpip->mpi_no_copy || mpip->mpi_strip_vlan_tag ||
+ (mpip->mpi_do_fixups && local)) {
mp_copy = copymsg(mp);
if (mp_copy == NULL)
return;
+ if (mpip->mpi_do_fixups && local) {
+ mp_copy = mac_fix_cksum(mp_copy);
+ if (mp_copy == NULL)
+ return;
+ }
+
if (mpip->mpi_strip_vlan_tag) {
mp_copy = mac_strip_vlan_tag_chain(mp_copy);
if (mp_copy == NULL)
@@ -4009,7 +4023,7 @@ mac_is_mcast(mac_impl_t *mip, mblk_t *mp)
*/
void
mac_promisc_dispatch(mac_impl_t *mip, mblk_t *mp_chain,
- mac_client_impl_t *sender)
+ mac_client_impl_t *sender, boolean_t local)
{
mac_promisc_impl_t *mpip;
mac_cb_t *mcb;
@@ -4049,8 +4063,10 @@ mac_promisc_dispatch(mac_impl_t *mip, mblk_t *mp_chain,
if (is_sender ||
mpip->mpi_type == MAC_CLIENT_PROMISC_ALL ||
- is_mcast)
- mac_promisc_dispatch_one(mpip, mp, is_sender);
+ is_mcast) {
+ mac_promisc_dispatch_one(mpip, mp, is_sender,
+ local);
+ }
}
}
MAC_PROMISC_WALKER_DCR(mip);
@@ -4079,7 +4095,8 @@ mac_promisc_client_dispatch(mac_client_impl_t *mcip, mblk_t *mp_chain)
mpip = (mac_promisc_impl_t *)mcb->mcb_objp;
if (mpip->mpi_type == MAC_CLIENT_PROMISC_FILTERED &&
!is_mcast) {
- mac_promisc_dispatch_one(mpip, mp, B_FALSE);
+ mac_promisc_dispatch_one(mpip, mp, B_FALSE,
+ B_FALSE);
}
}
}
@@ -4150,16 +4167,15 @@ mac_info_get(const char *name, mac_info_t *minfop)
/*
* To get the capabilities that MAC layer cares about, such as rings, factory
* mac address, vnic or not, it should directly invoke this function. If the
- * link is part of a bridge, then the only "capability" it has is the inability
- * to do zero copy.
+ * link is part of a bridge, then the link is unable to do zero copy.
*/
boolean_t
i_mac_capab_get(mac_handle_t mh, mac_capab_t cap, void *cap_data)
{
mac_impl_t *mip = (mac_impl_t *)mh;
- if (mip->mi_bridge_link != NULL)
- return (cap == MAC_CAPAB_NO_ZCOPY);
+ if (mip->mi_bridge_link != NULL && cap == MAC_CAPAB_NO_ZCOPY)
+ return (B_TRUE);
else if (mip->mi_callbacks->mc_callbacks & MC_GETCAPAB)
return (mip->mi_getcapab(mip->mi_driver, cap, cap_data));
else
@@ -4338,7 +4354,13 @@ mac_addr_len(mac_handle_t mh)
boolean_t
mac_is_vnic(mac_handle_t mh)
{
- return (((mac_impl_t *)mh)->mi_state_flags & MIS_IS_VNIC);
+ return ((((mac_impl_t *)mh)->mi_state_flags & MIS_IS_VNIC) != 0);
+}
+
+boolean_t
+mac_is_overlay(mac_handle_t mh)
+{
+ return ((((mac_impl_t *)mh)->mi_state_flags & MIS_IS_OVERLAY) != 0);
}
mac_handle_t
@@ -5552,3 +5574,23 @@ mac_client_set_rings(mac_client_handle_t mch, int rxrings, int txrings)
mrp->mrp_ntxrings = txrings;
}
}
+
+boolean_t
+mac_get_promisc_filtered(mac_client_handle_t mch)
+{
+ mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
+
+ return (mcip->mci_protect_flags & MPT_FLAG_PROMISC_FILTERED);
+}
+
+void
+mac_set_promisc_filtered(mac_client_handle_t mch, boolean_t enable)
+{
+ mac_client_impl_t *mcip = (mac_client_impl_t *)mch;
+
+ ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip));
+ if (enable)
+ mcip->mci_protect_flags |= MPT_FLAG_PROMISC_FILTERED;
+ else
+ mcip->mci_protect_flags &= ~MPT_FLAG_PROMISC_FILTERED;
+}
diff --git a/usr/src/uts/common/io/mac/mac_datapath_setup.c b/usr/src/uts/common/io/mac/mac_datapath_setup.c
index 14d94981cd..0459506784 100644
--- a/usr/src/uts/common/io/mac/mac_datapath_setup.c
+++ b/usr/src/uts/common/io/mac/mac_datapath_setup.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#include <sys/types.h>
@@ -603,6 +604,7 @@ mac_srs_cpu_setup(cpu_setup_t what, int id, void *arg)
*
* TODO: Cleanup and tighten some of the assumptions.
*/
+boolean_t mac_check_overlay = B_TRUE;
boolean_t mac_use_bw_heuristic = B_TRUE;
static int
mac_compute_soft_ring_count(flow_entry_t *flent, int rx_srs_cnt, int maxcpus)
@@ -610,6 +612,7 @@ mac_compute_soft_ring_count(flow_entry_t *flent, int rx_srs_cnt, int maxcpus)
uint64_t cpu_speed, bw = 0;
int srings = 0;
boolean_t bw_enabled = B_FALSE;
+ mac_client_impl_t *mcip = flent->fe_mcip;
ASSERT(!(flent->fe_type & FLOW_USER));
if (flent->fe_resource_props.mrp_mask & MRP_MAXBW &&
@@ -637,7 +640,16 @@ mac_compute_soft_ring_count(flow_entry_t *flent, int rx_srs_cnt, int maxcpus)
*/
if (mac_soft_ring_enable)
srings = srings * 2;
+ } else if (mac_check_overlay == B_TRUE &&
+ (mcip->mci_state_flags & MCIS_IS_VNIC) != 0) {
+ /* Is this a VNIC on an overlay? */
+ mac_handle_t mh = (mac_handle_t)mcip->mci_mip;
+ if (mac_is_overlay(mh) == B_TRUE) {
+ srings = mac_rx_soft_ring_10gig_count;
+ }
}
+
+
} else {
/*
* Soft ring computation using CPU speed and specified
diff --git a/usr/src/uts/common/io/mac/mac_protect.c b/usr/src/uts/common/io/mac/mac_protect.c
index 805b5d36f9..da83dc643e 100644
--- a/usr/src/uts/common/io/mac/mac_protect.c
+++ b/usr/src/uts/common/io/mac/mac_protect.c
@@ -2576,6 +2576,9 @@ mac_protect_init(mac_client_impl_t *mcip)
sizeof (dhcpv6_addr_t), offsetof(dhcpv6_addr_t, da_node));
avl_create(&mcip->mci_v6_slaac_ip, compare_slaac_ip,
sizeof (slaac_addr_t), offsetof(slaac_addr_t, sla_node));
+
+ if (mcip->mci_state_flags & MCIS_IS_VNIC)
+ mcip->mci_protect_flags |= MPT_FLAG_PROMISC_FILTERED;
}
void
diff --git a/usr/src/uts/common/io/mac/mac_provider.c b/usr/src/uts/common/io/mac/mac_provider.c
index 57d1996d84..98b770786a 100644
--- a/usr/src/uts/common/io/mac/mac_provider.c
+++ b/usr/src/uts/common/io/mac/mac_provider.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#include <sys/types.h>
@@ -350,6 +351,9 @@ mac_register(mac_register_t *mregp, mac_handle_t *mhp)
if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_AGGR, NULL))
mip->mi_state_flags |= MIS_IS_AGGR;
+ if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_OVERLAY, NULL))
+ mip->mi_state_flags |= MIS_IS_OVERLAY;
+
mac_addr_factory_init(mip);
/*
@@ -670,7 +674,7 @@ mac_trill_snoop(mac_handle_t mh, mblk_t *mp)
mac_impl_t *mip = (mac_impl_t *)mh;
if (mip->mi_promisc_list != NULL)
- mac_promisc_dispatch(mip, mp, NULL);
+ mac_promisc_dispatch(mip, mp, NULL, B_FALSE);
}
/*
@@ -691,7 +695,7 @@ mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain)
* this MAC, pass them a copy if appropriate.
*/
if (mip->mi_promisc_list != NULL)
- mac_promisc_dispatch(mip, mp_chain, NULL);
+ mac_promisc_dispatch(mip, mp_chain, NULL, B_FALSE);
if (mr != NULL) {
/*
diff --git a/usr/src/uts/common/io/mac/mac_sched.c b/usr/src/uts/common/io/mac/mac_sched.c
index 148f739d52..0e2cb864c9 100644
--- a/usr/src/uts/common/io/mac/mac_sched.c
+++ b/usr/src/uts/common/io/mac/mac_sched.c
@@ -1370,7 +1370,7 @@ int mac_srs_worker_wakeup_ticks = 0;
* said, the constant is left as a static variable to allow it to be
* dynamically tuned in the field if and as needed.
*/
-static uintptr_t mac_rx_srs_stack_needed = 10240;
+static uintptr_t mac_rx_srs_stack_needed = 13312;
static uint_t mac_rx_srs_stack_toodeep;
#ifndef STACK_GROWTH_DOWN
@@ -2310,7 +2310,7 @@ check_again:
if (smcip->mci_mip->mi_promisc_list != NULL) {
mutex_exit(lock);
mac_promisc_dispatch(smcip->mci_mip,
- head, NULL);
+ head, NULL, B_FALSE);
mutex_enter(lock);
}
}
@@ -4450,8 +4450,10 @@ mac_tx_send(mac_client_handle_t mch, mac_ring_handle_t ring, mblk_t *mp_chain,
* check is done inside the MAC_TX()
* macro.
*/
- if (mip->mi_promisc_list != NULL)
- mac_promisc_dispatch(mip, mp, src_mcip);
+ if (mip->mi_promisc_list != NULL) {
+ mac_promisc_dispatch(mip, mp, src_mcip,
+ B_TRUE);
+ }
do_switch = ((src_mcip->mci_state_flags &
dst_mcip->mci_state_flags &
diff --git a/usr/src/uts/common/io/mac/mac_stat.c b/usr/src/uts/common/io/mac/mac_stat.c
index 31972f94d8..c1a5c9c069 100644
--- a/usr/src/uts/common/io/mac/mac_stat.c
+++ b/usr/src/uts/common/io/mac/mac_stat.c
@@ -21,6 +21,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2013 Joyent, Inc. All rights reserved.
*/
/*
@@ -390,8 +391,8 @@ i_mac_stat_create(void *handle, const char *modname, const char *statname,
kstat_t *ksp;
kstat_named_t *knp;
- ksp = kstat_create(modname, 0, statname, "net",
- KSTAT_TYPE_NAMED, count, 0);
+ ksp = kstat_create_zone(modname, 0, statname, "net",
+ KSTAT_TYPE_NAMED, count, 0, getzoneid());
if (ksp == NULL)
return (NULL);
@@ -948,9 +949,9 @@ mac_driver_stat_create(mac_impl_t *mip)
major_t major = getmajor(mip->mi_phy_dev);
count = MAC_MOD_NKSTAT + MAC_NKSTAT + mip->mi_type->mt_statcount;
- ksp = kstat_create((const char *)ddi_major_to_name(major),
+ ksp = kstat_create_zone((const char *)ddi_major_to_name(major),
getminor(mip->mi_phy_dev) - 1, MAC_KSTAT_NAME,
- MAC_KSTAT_CLASS, KSTAT_TYPE_NAMED, count, 0);
+ MAC_KSTAT_CLASS, KSTAT_TYPE_NAMED, count, 0, getzoneid());
if (ksp == NULL)
return;
diff --git a/usr/src/uts/common/io/mem.c b/usr/src/uts/common/io/mem.c
index cdbeb0d422..8955b3d935 100644
--- a/usr/src/uts/common/io/mem.c
+++ b/usr/src/uts/common/io/mem.c
@@ -24,7 +24,7 @@
*/
/*
- * Copyright (c) 2015, Joyent, Inc. All rights reserved.
+ * Copyright 2016, Joyent, Inc.
*/
/*
@@ -221,10 +221,19 @@ mmopen(dev_t *devp, int flag, int typ, struct cred *cred)
switch (getminor(*devp)) {
case M_NULL:
case M_ZERO:
+ /* standard devices */
+ break;
+
case M_MEM:
case M_KMEM:
case M_ALLKMEM:
- /* standard devices */
+ /*
+ * These devices should never be visible in a zone, but if they
+ * somehow do get created we refuse to allow the zone to use
+ * them.
+ */
+ if (crgetzoneid(cred) != GLOBAL_ZONEID)
+ return (EACCES);
break;
default:
diff --git a/usr/src/uts/common/io/mr_sas/mr_sas.conf b/usr/src/uts/common/io/mr_sas/mr_sas.conf
index cfda434e23..6c585c6a42 100644
--- a/usr/src/uts/common/io/mr_sas/mr_sas.conf
+++ b/usr/src/uts/common/io/mr_sas/mr_sas.conf
@@ -13,3 +13,11 @@
# Fast-Path specific flag. Default is "yes".
# mrsas-enable-fp="yes";
+flow_control="dmult" queue="qsort" tape="sctp";
+
+# MSI specific flag. To enable MSI modify the flag value to "yes"
+mrsas-enable-msi="yes";
+
+# Fast-Path specific flag. To enable Fast-Path modify the flag value to "yes"
+mrsas-enable-fp="yes";
+
diff --git a/usr/src/uts/common/io/nfp/THIRDPARTYLICENSE b/usr/src/uts/common/io/nfp/THIRDPARTYLICENSE
new file mode 100644
index 0000000000..187088ff34
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/THIRDPARTYLICENSE
@@ -0,0 +1,19 @@
+Copyright (c) 2014, Thales UK Limited
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/usr/src/uts/common/io/nfp/THIRDPARTYLICENSE.descrip b/usr/src/uts/common/io/nfp/THIRDPARTYLICENSE.descrip
new file mode 100644
index 0000000000..cde8b65b37
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/THIRDPARTYLICENSE.descrip
@@ -0,0 +1 @@
+NFAST CRYPTO ACCELERATOR DRIVER
diff --git a/usr/src/uts/common/io/nfp/autoversion.h b/usr/src/uts/common/io/nfp/autoversion.h
new file mode 100644
index 0000000000..b9021942b2
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/autoversion.h
@@ -0,0 +1,21 @@
+/*
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+*/
+
+/* AUTOGENERATED - DO NOT EDIT */
+#ifndef AUTOVERSION_H
+#define AUTOVERSION_H
+
+#define VERSION_RELEASEMAJOR 2
+#define VERSION_RELEASEMINOR 26
+#define VERSION_RELEASEPATCH 40
+#define VERSION_NO "2.26.40cam999"
+#define VERSION_COMPNAME "nfdrv"
+
+#endif
diff --git a/usr/src/uts/common/io/nfp/drvlist.c b/usr/src/uts/common/io/nfp/drvlist.c
new file mode 100644
index 0000000000..a04b1fd5b0
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/drvlist.c
@@ -0,0 +1,19 @@
+/*
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+*/
+
+#include "nfp_common.h"
+#include "nfp_cmd.h"
+
+const nfpcmd_dev *nfp_drvlist[] = {
+ &i21285_cmddev,
+ &i21555_cmddev,
+ NULL
+};
+
diff --git a/usr/src/uts/common/io/nfp/hostif.c b/usr/src/uts/common/io/nfp/hostif.c
new file mode 100644
index 0000000000..684be703ea
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/hostif.c
@@ -0,0 +1,1192 @@
+/*
+
+hostif.c: nFast PCI driver for Solaris 2.5, 2.6, 2.7 and 2.8
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+history
+
+06/05/1998 jsh Original solaris 2.6
+21/05/1999 jsh added support for solaris 2.5
+10/06/1999 jsh added support for solaris 2.7 (32 and 64 bit)
+??/??/2001 jsh added support for solaris 2.8 (32 and 64 bit)
+16/10/2001 jsh moved from nfast to new structure in nfdrv
+12/02/2002 jsh added high level interrupt support
+
+*/
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/file.h>
+#include <sys/conf.h>
+#include <sys/uio.h>
+#include <sys/map.h>
+#include <sys/debug.h>
+#include <sys/modctl.h>
+#include <sys/kmem.h>
+#include <sys/cmn_err.h>
+#include <sys/open.h>
+#include <sys/stat.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/pci.h>
+
+#include "nfp_common.h"
+#include "nfp_hostif.h"
+#include "nfp_osif.h"
+#include "nfp_cmd.h"
+
+#include "nfp.h"
+
+/* mapped memory attributes, no-swap endianess (done in higher level) */
+static struct ddi_device_acc_attr nosw_attr = {
+ DDI_DEVICE_ATTR_V0,
+ DDI_NEVERSWAP_ACC,
+ DDI_STRICTORDER_ACC
+};
+
+/* dma attributes */
+static ddi_dma_attr_t dma_attrs = {
+ DMA_ATTR_V0, /* version number */
+ (uint64_t)0x0, /* low address */
+ (uint64_t)0xffffffff, /* high address */
+ (uint64_t)0xffffff, /* DMA counter max */
+ (uint64_t)0x1, /* alignment */
+ 0x0c, /* burst sizes */
+ 0x1, /* minimum transfer size */
+ (uint64_t)0x3ffffff, /* maximum transfer size */
+ (uint64_t)0x7fff, /* maximum segment size */
+ 1, /* no scatter/gather lists */
+ 1, /* granularity */
+ 0 /* DMA flags */
+};
+
+/*
+ * Debug message control
+ * Debug Levels:
+ * 0 = no messages
+ * 1 = Errors
+ * 2 = Subroutine calls & control flow
+ * 3 = I/O Data (verbose!)
+ * Can be set with adb or in the /etc/system file with
+ * "set nfp:nfp_debug=<value>"
+ */
+
+int nfp_debug= 1;
+
+static void *state_head; /* opaque handle top of state structs */
+
+static int nfp_open(dev_t *dev, int openflags, int otyp, cred_t *credp);
+static int nfp_close(dev_t dev, int openflags, int otyp, cred_t *credp);
+static int nfp_release_dev( dev_info_t *dip );
+
+static int nfp_read(dev_t dev, struct uio *uiop, cred_t *credp);
+static int nfp_write(dev_t dev, struct uio *uiop, cred_t *credp);
+static int nfp_strategy(struct buf *bp);
+
+static int nfp_ioctl(dev_t dev, int cmd, ioctlptr_t arg, int mode, cred_t *credp, int *rvalp);
+static int nfp_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
+ struct pollhead **phpp);
+
+static void nfp_wrtimeout (void *pdev);
+static void nfp_rdtimeout (void *pdev);
+
+static int nfp_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result);
+static int nfp_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
+static int nfp_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
+
+static void nfp_read_complete_final(nfp_dev *pdev, int ok);
+static void nfp_write_complete_final(nfp_dev *pdev, int ok);
+
+/* nfp file ops --------------------------------------------------- */
+
+static struct cb_ops nfp_cb_ops = {
+ nfp_open,
+ nfp_close,
+ nodev, /* no nfp_strategy */
+ nodev, /* no print routine */
+ nodev, /* no dump routine */
+ nfp_read,
+ nfp_write,
+ nfp_ioctl,
+ nodev, /* no devmap routine */
+ nodev, /* no mmap routine */
+ nodev, /* no segmap routine */
+ nfp_chpoll,
+ ddi_prop_op,
+ 0, /* not a STREAMS driver, no cb_str routine */
+ D_NEW | D_MP | EXTRA_CB_FLAGS, /* must be safe for multi-thread/multi-processor */
+ CB_REV,
+ nodev, /* aread */
+ nodev /* awrite */
+};
+
+static struct dev_ops nfp_ops = {
+ DEVO_REV, /* DEVO_REV indicated by manual */
+ 0, /* device reference count */
+ nfp_getinfo,
+ nulldev, /* identify */
+ nulldev, /* probe */
+ nfp_attach,
+ nfp_detach,
+ nodev, /* device reset routine */
+ &nfp_cb_ops,
+ (struct bus_ops *)0, /* bus operations */
+};
+
+extern struct mod_ops mod_driverops;
+static struct modldrv modldrv = {
+ &mod_driverops,
+ NFP_DRVNAME,
+ &nfp_ops,
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, /* MODREV_1 indicated by manual */
+ (void *)&modldrv,
+ NULL, /* termination of list of linkage structures */
+};
+
+/* interface resource allocation */
+
+int nfp_alloc_pci_push( nfp_dev *pdev ) {
+ /* allocate resources needed for PCI Push,
+ * if not already allocated.
+ * return True if successful
+ */
+ nfp_err ret;
+ uint_t cookie_count;
+ size_t real_length;
+
+ if(!pdev->read_buf) {
+ /* allocate read buffer */
+ pdev->read_buf = kmem_zalloc( NFP_READBUF_SIZE, KM_NOSLEEP );
+ }
+ if(!pdev->read_buf) {
+ nfp_log( NFP_DBG1, "nfp_attach: kmem_zalloc read buffer failed");
+ pdev->read_buf = NULL;
+ return 0;
+ }
+
+ if(!pdev->rd_dma_ok) {
+ /* allocate dma handle for read buffer */
+ ret = ddi_dma_alloc_handle( pdev->dip,
+ &dma_attrs,
+ DDI_DMA_DONTWAIT,
+ NULL,
+ &pdev->read_dma_handle );
+ if( ret != DDI_SUCCESS ) {
+ nfp_log( NFP_DBG1,
+ "nfp_alloc_pci_push: ddi_dma_alloc_handle failed (%d)",
+ ret );
+ return 0;
+ }
+
+ /* Allocate the memory for dma transfers */
+ ret = ddi_dma_mem_alloc(pdev->read_dma_handle, NFP_READBUF_SIZE, &nosw_attr,
+ DDI_DMA_CONSISTENT, DDI_DMA_DONTWAIT, NULL,
+ (caddr_t*)&pdev->read_buf, &real_length, &pdev->acchandle);
+ if (ret != DDI_SUCCESS) {
+ nfp_log( NFP_DBG1, "nfp_alloc_pci_push: ddi_dma_mem_alloc failed (%d)", ret);
+ ddi_dma_free_handle( &pdev->read_dma_handle );
+ return 0;
+ }
+
+ ret = ddi_dma_addr_bind_handle( pdev->read_dma_handle,
+ NULL, /* kernel address space */
+ (caddr_t)pdev->read_buf, real_length,
+ DDI_DMA_READ | DDI_DMA_CONSISTENT, /* dma flags */
+ DDI_DMA_DONTWAIT, NULL,
+ &pdev->read_dma_cookie, &cookie_count );
+ if( ret != DDI_DMA_MAPPED ) {
+ nfp_log( NFP_DBG1,
+ "nfp_alloc_pci_push: ddi_dma_addr_bind_handle failed (%d)",
+ ret);
+ ddi_dma_mem_free(&pdev->acchandle);
+ ddi_dma_free_handle( &pdev->read_dma_handle );
+ return 0;
+ }
+ if( cookie_count > 1 ) {
+ nfp_log( NFP_DBG1,
+ "nfp_alloc_pci_push: error:"
+ " ddi_dma_addr_bind_handle wants %d transfers",
+ cookie_count);
+ ddi_dma_mem_free(&pdev->acchandle);
+ (void) ddi_dma_unbind_handle( pdev->read_dma_handle );
+ ddi_dma_free_handle( &pdev->read_dma_handle );
+ return 0;
+ }
+ pdev->rd_dma_ok = 1;
+ }
+ return pdev->rd_dma_ok;
+}
+
+void nfp_free_pci_push( nfp_dev *pdev ) {
+ /* free resources allocated to PCI Push */
+ if( pdev->rd_dma_ok ) {
+ (void) ddi_dma_sync(pdev->read_dma_handle,0,0,DDI_DMA_SYNC_FORKERNEL);
+ ddi_dma_mem_free(&pdev->acchandle);
+ (void) ddi_dma_unbind_handle( pdev->read_dma_handle );
+ ddi_dma_free_handle( &pdev->read_dma_handle );
+ pdev->rd_dma_ok = 0;
+ }
+ if( pdev->read_buf ) {
+ kmem_free( pdev->read_buf, NFP_READBUF_SIZE );
+ pdev->read_buf = NULL;
+ }
+}
+
+/* include definition of nfp_set_ifvers() */
+#define nfp_ifvers NFDEV_IF_PCI_PUSH
+#include "nfp_ifvers.c"
+#undef nfp_ifvers
+
+/*--------------------*/
+/* nfp_isr */
+/*--------------------*/
+
+static u_int nfp_isr( char *pdev_in ) {
+ /* LINTED: alignment */
+ nfp_dev *pdev= (nfp_dev *)pdev_in;
+ nfp_err ne;
+ int handled;
+
+ nfp_log( NFP_DBG3, "nfp_isr: entered");
+
+ if( !pdev ) {
+ nfp_log( NFP_DBG1, "nfp_isr: cannot find dev");
+ return DDI_INTR_UNCLAIMED;
+ }
+
+ /* The isr needs to be mutex'ed - an SMP can call us while we're still
+ * running!
+ */
+ mutex_enter(&pdev->low_mutex);
+ ne= pdev->cmddev->isr( pdev->common.cmdctx, &handled );
+ mutex_exit(&pdev->low_mutex);
+
+ if( !ne && handled )
+ return DDI_INTR_CLAIMED;
+ if (ne)
+ nfp_log( NFP_DBG1, "nfp_isr: failed");
+ else
+ nfp_log( NFP_DBG3, "nfp_isr: unclaimed");
+ return DDI_INTR_UNCLAIMED;
+}
+
+static u_int nfp_soft_isr( char *pdev_in ) {
+ /* LINTED: alignment */
+ nfp_dev *pdev= (nfp_dev *)pdev_in;
+ int rd, wr;
+
+ nfp_log( NFP_DBG3, "nfp_soft_isr: entered");
+
+ if( !pdev ) {
+ nfp_log( NFP_DBG1, "nfp_soft_isr: cannot find dev");
+ return DDI_INTR_UNCLAIMED;
+ }
+ rd= wr= 0;
+
+ mutex_enter(&pdev->high_mutex);
+ if(pdev->high_read) {
+ pdev->high_read= 0;
+ mutex_exit(&pdev->high_mutex);
+ rd= 1;
+ }
+ if(pdev->high_write) {
+ pdev->high_write= 0;
+ wr= 1;
+ }
+ mutex_exit(&pdev->high_mutex);
+
+ if(rd) {
+ nfp_log( NFP_DBG3, "nfp_soft_isr: read done");
+ nfp_read_complete_final(pdev, pdev->rd_ok);
+ }
+ if(wr) {
+ nfp_log( NFP_DBG3, "nfp_soft_isr: write done");
+ nfp_write_complete_final(pdev, pdev->wr_ok);
+ }
+ if( rd || wr )
+ return DDI_INTR_CLAIMED;
+
+ nfp_log( NFP_DBG2, "nfp_isr: unclaimed");
+ return DDI_INTR_UNCLAIMED;
+}
+
+
+/*-------------------------*/
+/* nfp_read */
+/*-------------------------*/
+
+void nfp_read_complete(nfp_dev *pdev, int ok) {
+ nfp_log( NFP_DBG2,"nfp_read_complete: entering");
+
+ if(pdev->high_intr) {
+ nfp_log(NFP_DBG2, "nfp_read_complete: high_intr");
+ mutex_enter(&pdev->high_mutex);
+ nfp_log(NFP_DBG3, "nfp_read_complete: high_mutex entered");
+ if(pdev->high_read)
+ nfp_log(NFP_DBG1, "nfp_read_complete: high_read allread set!");
+ pdev->high_read= 1;
+ pdev->rd_ok= ok;
+ nfp_log(NFP_DBG3, "nfp_read_complete: exiting high_mutex");
+ mutex_exit(&pdev->high_mutex);
+ ddi_trigger_softintr(pdev->soft_int_id);
+ } else
+ nfp_read_complete_final( pdev, ok );
+ nfp_log( NFP_DBG2,"nfp_read_complete: exiting");
+}
+
+static void nfp_read_complete_final(nfp_dev *pdev, int ok) {
+ nfp_log( NFP_DBG2,"nfp_read_complete_final: entering");
+ if(pdev->rdtimeout)
+ (void) untimeout(pdev->rdtimeout);
+ if(!pdev->rd_outstanding) {
+ nfp_log( NFP_DBG1,"nfp_read_complete_final: !pdev->rd_outstanding");
+ }
+ nfp_log( NFP_DBG2,"nfp_read_complete_final: pdev->rd_outstanding=0, ok %d", ok);
+ mutex_enter(&pdev->isr_mutex);
+ pdev->rd_outstanding= 0;
+ pdev->rd_ready= 1;
+ pdev->rd_ok= ok;
+ cv_broadcast(&pdev->rd_cv);
+ mutex_exit(&pdev->isr_mutex);
+ pollwakeup (&pdev->pollhead, POLLRDNORM);
+ nfp_log( NFP_DBG2,"nfp_read_complete_final: exiting");
+}
+
+static void nfp_rdtimeout( void *pdev_in )
+{
+ nfp_dev *pdev= (nfp_dev *)pdev_in;
+
+ nfp_log( NFP_DBG1, "nfp_rdtimeout: read timed out");
+
+ if (!pdev) {
+ nfp_log( NFP_DBG1, "nfp_rdtimeout: NULL pdev." );
+ return;
+ }
+ pdev->rdtimeout= 0;
+ nfp_read_complete_final(pdev, 0);
+}
+
+/* ARGSUSED */
+static int nfp_read(dev_t dev, struct uio *uiop, cred_t *credp) {
+ int ret;
+ nfp_log( NFP_DBG2, "nfp_read: entered" );
+ if (ddi_get_soft_state(state_head, getminor(dev)) != NULL) {
+ nfp_log( NFP_DBG1, "nfp_read: unable to get nfp_dev");
+ return (ENODEV);
+ }
+ nfp_log( NFP_DBG2, "nfp_read: about to physio." );
+ ret = physio(nfp_strategy, (struct buf *)0, dev, B_READ, minphys, uiop );
+ if(ret)
+ nfp_log( NFP_DBG1, "nfp_read: physio returned %x.", ret );
+ return ret;
+}
+
+/*-------------------------*/
+/* nfp_write */
+/*-------------------------*/
+
+void nfp_write_complete( nfp_dev *pdev, int ok) {
+ nfp_log( NFP_DBG2,"nfp_write_complete: entering");
+
+ if(pdev->high_intr) {
+ mutex_enter(&pdev->high_mutex);
+ if(pdev->high_write)
+ nfp_log(NFP_DBG1, "nfp_write_complete: high_write allread set!");
+ pdev->high_write= 1;
+ pdev->wr_ok= ok;
+ mutex_exit(&pdev->high_mutex);
+ ddi_trigger_softintr(pdev->soft_int_id);
+ } else
+ nfp_write_complete_final( pdev, ok );
+ nfp_log( NFP_DBG2,"nfp_write_complete: exiting");
+}
+
+static void nfp_write_complete_final( nfp_dev *pdev, int ok) {
+ struct buf *local_wr_bp;
+ nfp_log( NFP_DBG2,"nfp_write_complete_final: entering");
+ if(pdev->wrtimeout)
+ (void) untimeout(pdev->wrtimeout);
+
+ if (!pdev->wr_bp) {
+ nfp_log( NFP_DBG2, "nfp_write_complete_final: write: wr_bp == NULL." );
+ return;
+ }
+
+ bp_mapout(pdev->wr_bp);
+ pdev->wr_bp->b_resid = ok ? 0 : pdev->wr_bp->b_bcount;
+ /* Make sure we set wr_ready before calling biodone to avoid a race */
+ pdev->wr_ready = 1;
+ bioerror(pdev->wr_bp, ok ? 0 : ENXIO);
+ local_wr_bp = pdev->wr_bp;
+ pdev->wr_bp = 0;
+ biodone(local_wr_bp);
+ nfp_log( NFP_DBG2, "nfp_write_complete_final: isr_mutex extited");
+ pollwakeup (&pdev->pollhead, POLLWRNORM);
+
+ nfp_log( NFP_DBG2, "nfp_write_complete_final: leaving");
+}
+
+static void nfp_wrtimeout( void *pdev_in )
+{
+ nfp_dev *pdev= (nfp_dev *)pdev_in;
+
+ nfp_log( NFP_DBG1, "nfp_wrtimeout: write timed out");
+
+ if (!pdev) {
+ nfp_log( NFP_DBG1, "nfp_wrtimeout: NULL pdev." );
+ return;
+ }
+ pdev->wrtimeout= 0;
+ nfp_write_complete_final(pdev, 0);
+}
+
+/* ARGSUSED */
+static int nfp_write(dev_t dev, struct uio *uiop, cred_t *credp) {
+ int ret;
+ nfp_log( NFP_DBG2, "nfp_write: entered." );
+ if (ddi_get_soft_state(state_head, getminor(dev)) == NULL) {
+ nfp_log( NFP_DBG1, "nfp_chread: unable to get nfp_dev.");
+ return (ENODEV);
+ }
+ nfp_log( NFP_DBG2, "nfp_write: about to physio." );
+ ret = physio(nfp_strategy, (struct buf *)0, dev, B_WRITE, minphys, uiop );
+ if(ret)
+ nfp_log( NFP_DBG1, "nfp_write: physio returned %x.", ret );
+ return ret;
+}
+
+/*-------------------------*/
+/* nfp_strategy */
+/*-------------------------*/
+
+#define NFP_STRAT_ERR(thebp,err,txt) \
+ nfp_log( NFP_DBG1, "nfp_strategy: " txt ".\n"); \
+ (thebp)->b_resid = (thebp)->b_bcount; \
+ bioerror ((thebp), err); \
+ biodone ((thebp));
+
+static int nfp_strategy(struct buf *bp) {
+ register struct nfp_dev *pdev;
+ nfp_err ne;
+
+ nfp_log( NFP_DBG2, "nfp_strategy: entered." );
+ if (!(pdev = ddi_get_soft_state(state_head, getminor(bp->b_edev)))) {
+ NFP_STRAT_ERR (bp, ENXIO, "unable to get nfp_dev");
+ return (0);
+ }
+
+ if (bp->b_flags & B_READ) {
+ int count;
+ /* read */
+ if (!pdev->rd_ready) {
+ NFP_STRAT_ERR (bp,ENXIO,"read called when not ready");
+ return (0);
+ }
+ pdev->rd_ready=0;
+ pdev->rd_pending = 0;
+ if( !pdev->rd_ok) {
+ NFP_STRAT_ERR (bp,ENXIO,"read failed");
+ return (0);
+ }
+ /* copy data from module */
+ if(pdev->ifvers >= NFDEV_IF_PCI_PUSH) {
+ nfp_log( NFP_DBG3, "nfp_strategy: copying kernel read buffer");
+ if( ddi_dma_sync(pdev->read_dma_handle,0,0,DDI_DMA_SYNC_FORKERNEL) != DDI_SUCCESS )
+ {
+ NFP_STRAT_ERR(bp,ENXIO,"ddi_dma_sync(read_dma_handle) failed");
+ return (0);
+ }
+ /* LINTED: alignment */
+ count= *(unsigned int *)(pdev->read_buf+4);
+ count= FROM_LE32_MEM(&count);
+ nfp_log( NFP_DBG3, "nfp_strategy: read count %d", count);
+ if(count<0 || count>bp->b_bcount) {
+ NFP_STRAT_ERR(bp,ENXIO,"bad read byte count from device");
+ nfp_log( NFP_DBG1, "nfp_strategy: bad read byte count (%d) from device", count);
+ return (0);
+ }
+ bp_mapin (bp);
+ bcopy( pdev->read_buf + 8, bp->b_un.b_addr, count );
+ bp_mapout (bp);
+ } else {
+ bp_mapin (bp);
+ ne= pdev->cmddev->read_block( bp->b_un.b_addr, bp->b_bcount, pdev->common.cmdctx, &count );
+ bp_mapout (bp);
+ if( ne != NFP_SUCCESS) {
+ NFP_STRAT_ERR (bp,nfp_oserr(ne),"read_block failed");
+ return (0);
+ }
+ }
+ bioerror(bp, 0);
+ bp->b_resid = 0;
+ biodone (bp);
+ } else {
+ /* write */
+ if (!pdev->wr_ready) {
+ NFP_STRAT_ERR (bp,ENXIO,"write called when not ready");
+ return (0);
+ }
+ if (pdev->wr_bp) {
+ NFP_STRAT_ERR (bp,ENXIO,"wr_bp != NULL");
+ return (0);
+ }
+ pdev->wrtimeout= timeout(nfp_wrtimeout, (caddr_t)pdev, NFP_TIMEOUT_SEC * drv_usectohz(1000000));
+ pdev->wr_bp = bp;
+ pdev->wr_ready = 0;
+ bp_mapin (bp);
+ ne= pdev->cmddev->write_block( bp->b_un.b_addr, bp->b_bcount, pdev->common.cmdctx);
+ if( ne != NFP_SUCCESS ) {
+ bp_mapout (bp);
+ (void) untimeout(pdev->wrtimeout);
+ pdev->wr_bp = 0;
+ pdev->wr_ready = 1;
+ NFP_STRAT_ERR (bp,nfp_oserr(ne),"write failed");
+ return (0);
+ }
+ }
+ nfp_log( NFP_DBG2, "nfp_strategy: leaving");
+
+ return (0);
+}
+
+
+/*--------------------*/
+/* poll / select */
+/*--------------------*/
+
+static int nfp_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
+ struct pollhead **phpp) {
+ nfp_dev *pdev;
+ short revents;
+
+ if (!(pdev = ddi_get_soft_state(state_head, getminor(dev)))) {
+ nfp_log( NFP_DBG1, "nfp_chpoll: unable to get nfp_dev");
+ *reventsp=0;
+ return (0);
+ }
+ nfp_log( NFP_DBG2, "nfp_chpoll: entered %x", events);
+
+ revents=0;
+ if (events&POLLWRNORM) {
+ if (pdev->wr_ready) {
+ nfp_log( NFP_DBG2, "nfp_chpoll: write ready");
+ revents|=POLLWRNORM;
+ }
+ }
+
+ if (events&POLLRDNORM) {
+ if (pdev->rd_ready) {
+ nfp_log( NFP_DBG2, "nfp_chpoll: read ready");
+ revents|=POLLRDNORM;
+ }
+ }
+
+ if (!revents && !anyyet) {
+ *phpp=&pdev->pollhead;
+ }
+ *reventsp=revents;
+
+ nfp_log( NFP_DBG2, "nfp_chpoll: leaving");
+ return (0);
+}
+
+
+/*--------------------*/
+/* ioctl */
+/*--------------------*/
+
+/* ARGSUSED */
+static int nfp_ioctl(dev_t dev, int cmd, ioctlptr_t arg, int mode, cred_t *credp, int *rvalp) {
+ register struct nfp_dev *pdev;
+
+ nfp_log( NFP_DBG2, "nfp_ioctl: entered." );
+
+ if (!(pdev = ddi_get_soft_state(state_head, getminor(dev)))) {
+ nfp_log( NFP_DBG1, "nfp_ioctl: unable to get nfp dev.");
+ return (ENXIO);
+ }
+
+ switch (cmd) {
+ case NFDEV_IOCTL_ENQUIRY:
+ {
+ long *outp;
+ int outlen;
+ nfdev_enquiry_str enq_data;
+
+ enq_data.busno = (unsigned int)-1;
+ enq_data.slotno = (unsigned char)-1;
+
+ /* get our bus and slot num */
+ if (ddi_getlongprop (DDI_DEV_T_NONE,
+ pdev->dip, 0, "reg",
+ (caddr_t)&outp, &outlen) != DDI_PROP_NOT_FOUND) {
+ nfp_log( NFP_DBG2, "ddi_getlongprop('reg') ok." );
+ if( outlen > 0 ) {
+ enq_data.busno = ((*outp)>>16) & 0xff;
+ enq_data.slotno = ((*outp)>>11) & 0x1f;
+ nfp_log( NFP_DBG2, "busno %d, slotno %d.",
+ enq_data.busno, enq_data.slotno );
+ }
+ } else
+ nfp_log( NFP_DBG1, "ddi_getlongprop('reg') failed." );
+
+ if( ddi_copyout( (char *)&enq_data, (void *)arg, sizeof(enq_data), mode ) != 0 ) {
+ nfp_log( NFP_DBG1, "ddi_copyout() failed." );
+ return EFAULT;
+ }
+ }
+ break;
+
+ case NFDEV_IOCTL_ENSUREREADING:
+ {
+ unsigned int addr, len;
+ nfp_err ret;
+ if( ddi_copyin( (void *)arg, (char *)&len, sizeof(unsigned int), mode ) != 0 ) {
+ nfp_log( NFP_DBG1, "ddi_copyin() failed." );
+ return (EFAULT);
+ }
+ /* signal a read to the module */
+ nfp_log( NFP_DBG2, "nfp_ioctl: signalling read request to module, len = %x.", len );
+ if (len>8192) {
+ nfp_log( NFP_DBG1, "nfp_ioctl: len >8192 = %x.", len );
+ return EINVAL;
+ }
+ if (pdev->rd_outstanding==1) {
+ nfp_log( NFP_DBG1, "nfp_ioctl: not about to call read with read outstanding.");
+ return EIO;
+ }
+
+ addr= 0;
+ if(pdev->ifvers >= NFDEV_IF_PCI_PUSH) {
+ if( len > NFP_READBUF_SIZE ) {
+ nfp_log( NFP_DBG1, "nfp_ioctl: len > NFP_READBUF_SIZE = %x.", len );
+ return EINVAL;
+ }
+ addr= pdev->read_dma_cookie.dmac_address;
+ }
+
+ pdev->rd_outstanding = 1;
+ nfp_log( NFP_DBG2,"nfp_ioctl: pdev->rd_outstanding=1");
+
+ /* setup timeout timer */
+ pdev->rdtimeout= timeout(nfp_rdtimeout, (caddr_t)pdev, NFP_TIMEOUT_SEC * drv_usectohz(1000000));
+
+ nfp_log( NFP_DBG2, "nfp_ioctl: read request");
+ ret = pdev->cmddev->ensure_reading(addr, len, pdev->common.cmdctx);
+ if ( ret != NFP_SUCCESS ) {
+ (void) untimeout(pdev->rdtimeout);
+ pdev->rdtimeout = 0;
+ pdev->rd_outstanding = 0;
+ nfp_log( NFP_DBG1, "nfp_ioctl : cmddev->ensure_reading failed ");
+ return nfp_oserr( ret );
+ }
+ }
+ break;
+
+ case NFDEV_IOCTL_PCI_IFVERS:
+ {
+ int vers;
+
+ nfp_log( NFP_DBG2, "nfp_ioctl: NFDEV_IOCTL_PCI_IFVERS");
+
+ if( ddi_copyin( (void *)arg, (char *)&vers, sizeof(vers), mode ) != 0 ) {
+ nfp_log( NFP_DBG1, "ddi_copyin() failed." );
+ return (EFAULT);
+ }
+
+ if( pdev->rd_outstanding ) {
+ nfp_log( NFP_DBG1, "nfp_ioctl: can't set ifvers %d as read outstanding", vers);
+ return EIO;
+ }
+
+ nfp_set_ifvers(pdev, vers);
+ if( pdev->ifvers != vers ) {
+ nfp_log( NFP_DBG1, "nfp_ioctl: can't set ifvers %d", vers);
+ return EIO;
+ }
+ }
+ break;
+
+ case NFDEV_IOCTL_STATS:
+ {
+ if( ddi_copyout( (char *)&(pdev->common.stats),
+ (void *)arg,
+ sizeof(nfdev_stats_str),
+ mode ) != 0 ) {
+ nfp_log( NFP_DBG1, "ddi_copyout() failed." );
+ return EFAULT;
+ }
+ }
+ break;
+
+ default:
+ nfp_log( NFP_DBG1, "nfp_ioctl: unknown ioctl." );
+ return EINVAL;
+ }
+
+ return 0;
+}
+
+/*-------------------------*/
+/* nfp_open */
+/*-------------------------*/
+
+/* ARGSUSED */
+int nfp_open(dev_t *dev, int openflags, int otyp, cred_t *credp)
+{
+ nfp_err ret;
+ register struct nfp_dev *pdev;
+
+ nfp_log( NFP_DBG2, "entered nfp_open." );
+
+ pdev = (nfp_dev *)ddi_get_soft_state(state_head, getminor(*dev));
+
+ if( !pdev ) {
+ nfp_log( NFP_DBG1, "nfp_open: unable to get nfp dev.");
+ return (ENODEV);
+ }
+
+ if( otyp != OTYP_CHR ) {
+ nfp_log( NFP_DBG1, "nfp_open: not opened as character device");
+ return (EINVAL);
+ }
+
+ mutex_enter(&pdev->busy_mutex);
+
+ if (pdev->busy) {
+ mutex_exit(&pdev->busy_mutex);
+ nfp_log( NFP_DBG1, "nfp_open: device busy");
+ return EBUSY;
+ }
+ pdev->busy= 1;
+ mutex_exit(&pdev->busy_mutex);
+
+ /* use oldest possible interface until told otherwise */
+ pdev->ifvers= NFDEV_IF_STANDARD;
+ nfp_log( NFP_DBG3, "nfp_open: setting ifvers %d", pdev->ifvers);
+ pdev->rd_ready= 0; /* drop any old data */
+
+ ret = pdev->cmddev->open(pdev->common.cmdctx);
+ if( ret != NFP_SUCCESS ) {
+ nfp_log( NFP_DBG1, "nfp_open : cmddev->open failed ");
+ return nfp_oserr( ret );
+ }
+
+ nfp_log( NFP_DBG2, "nfp_open: done");
+
+ return 0;
+}
+
+/*--------------------*/
+/* nfp_close */
+/*--------------------*/
+
+/* ARGSUSED */
+static int nfp_close(dev_t dev, int openflags, int otyp, cred_t *credp) {
+ nfp_dev *pdev;
+ nfp_err ret;
+
+ nfp_log( NFP_DBG2, "nfp_close: entered");
+
+ pdev = (struct nfp_dev *)ddi_get_soft_state(state_head, getminor(dev));
+ if( !pdev ) {
+ nfp_log( NFP_DBG1, "nfp_close: cannot find dev.");
+ return ENODEV;
+ }
+
+ mutex_enter(&pdev->isr_mutex);
+ if(pdev->rd_outstanding) {
+ int lbolt, err;
+ nfp_get_lbolt(&lbolt, err);
+ if(!err)
+ (void) cv_timedwait(&pdev->rd_cv, &pdev->isr_mutex, lbolt + (NFP_TIMEOUT_SEC * drv_usectohz(1000000)) );
+ }
+ mutex_exit(&pdev->isr_mutex);
+ ret = pdev->cmddev->close(pdev->common.cmdctx);
+ if (ret != NFP_SUCCESS ) {
+ nfp_log( NFP_DBG1, " nfp_close : cmddev->close failed");
+ return nfp_oserr( ret );
+ }
+
+ mutex_enter(&pdev->busy_mutex);
+ pdev->busy= 0;
+ mutex_exit(&pdev->busy_mutex);
+
+ return 0;
+}
+
+/****************************************************************************
+
+ nfp driver config
+
+ ****************************************************************************/
+
+/*-------------------------*/
+/* nfp_getinfo */
+/*-------------------------*/
+
+/* ARGSUSED */
+static int nfp_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) {
+ int error;
+ nfp_dev *pdev;
+
+ nfp_log( NFP_DBG2, "nfp_getinfo: entered" );
+
+ pdev = (struct nfp_dev *)ddi_get_soft_state(state_head, getminor((dev_t)arg));
+ if( !pdev ) {
+ nfp_log( NFP_DBG1, "nfp_close: cannot find dev.");
+ return ENODEV;
+ }
+
+ switch (infocmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ if (pdev == NULL) {
+ *result = NULL;
+ error = DDI_FAILURE;
+ } else {
+ /*
+ * don't need to use a MUTEX even though we are
+ * accessing our instance structure; dev->dip
+ * never changes.
+ */
+ *result = pdev->dip;
+ error = DDI_SUCCESS;
+ }
+ break;
+ case DDI_INFO_DEVT2INSTANCE:
+ *result = (void *)(uintptr_t)getminor((dev_t)arg);
+ error = DDI_SUCCESS;
+ break;
+ default:
+ *result = NULL;
+ error = DDI_FAILURE;
+ }
+
+ nfp_log( NFP_DBG2, "nfp_getinfo: leaving." );
+ return (error);
+}
+
+/*-------------------------*/
+/* nfp_release */
+/*-------------------------*/
+
+static int nfp_release_dev( dev_info_t *dip ) {
+ nfp_dev *pdev;
+ int instance, i;
+ nfp_err ret;
+
+ nfp_log( NFP_DBG2, "nfp_release_dev: entering" );
+
+ instance = ddi_get_instance(dip);
+ pdev = (struct nfp_dev *)ddi_get_soft_state(state_head, instance);
+ if (pdev) {
+ nfp_log( NFP_DBG3, "nfp_release_dev: removing device" );
+
+ nfp_free_pci_push(pdev);
+
+ if( pdev->cmddev ) {
+ nfp_log( NFP_DBG3, "nfp_release_dev: destroying cmd dev" );
+ ret = pdev->cmddev->destroy(pdev->common.cmdctx);
+ if (ret != NFP_SUCCESS) {
+ nfp_log( NFP_DBG1, " nfp_release_dev : cmddev->destroy failed ");
+ return nfp_oserr( ret );
+ }
+ }
+
+ if(pdev->high_iblock_cookie) {
+ nfp_log( NFP_DBG3, "nfp_release_dev: removing high and soft irq" );
+ ddi_remove_softintr(pdev->soft_int_id);
+ ddi_remove_intr(pdev->dip, 0, pdev->high_iblock_cookie);
+ mutex_destroy( &pdev->busy_mutex );
+ cv_destroy( &pdev->rd_cv );
+ mutex_destroy( &pdev->isr_mutex );
+ mutex_destroy( &pdev->high_mutex );
+ } else if(pdev->iblock_cookie) {
+ nfp_log( NFP_DBG3, "nfp_release_dev: removing irq" );
+ ddi_remove_intr(pdev->dip, 0, pdev->iblock_cookie);
+ mutex_destroy( &pdev->busy_mutex );
+ cv_destroy( &pdev->rd_cv );
+ mutex_destroy( &pdev->isr_mutex );
+ }
+ if(pdev->low_iblock_cookie) {
+ ddi_remove_intr(pdev->dip, 0, pdev->low_iblock_cookie);
+ mutex_destroy( &pdev->low_mutex);
+ }
+
+ for(i=0;i<6;i++) {
+ if( pdev->common.extra[i] ) {
+ nfp_log( NFP_DBG3, "nfp_release_dev: unmapping BAR %d", i );
+ ddi_regs_map_free ((ddi_acc_handle_t *)&pdev->common.extra[i]);
+ }
+ }
+
+ ddi_remove_minor_node(dip, NULL);
+
+ if (pdev->conf_handle)
+ pci_config_teardown( &pdev->conf_handle );
+
+ ddi_soft_state_free(state_head, instance);
+ }
+ nfp_log( NFP_DBG2, "nfp_release: finished" );
+
+ return DDI_SUCCESS;
+}
+
+
+/*-------------------------*/
+/* nfp_attach */
+/*-------------------------*/
+
+static int nfp_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) {
+ int instance;
+ nfp_dev *pdev = NULL;
+ int intres;
+ uint16_t device, vendor, sub_device, sub_vendor;
+ long *outp;
+ nfpcmd_dev const *cmddev;
+ int index, i;
+ nfp_err ret;
+
+ nfp_log( NFP_DBG2, "nfp_attach: entered." );
+
+ if (cmd != DDI_ATTACH) {
+ nfp_log( NFP_DBG1, "nfp_attach: bad command." );
+ goto bailout;
+ }
+
+ instance = ddi_get_instance(dip);
+
+ if (ddi_soft_state_zalloc(state_head, instance) != 0) {
+ nfp_log( NFP_DBG1, "nfp_attach: ddi_soft_state_zalloc() failed." );
+ goto bailout;
+ }
+
+ pdev = (struct nfp_dev *)ddi_get_soft_state(state_head, instance);
+ if( !pdev ) {
+ nfp_log( NFP_DBG1, "nfp_attach: cannot find dev.");
+ return ENODEV;
+ }
+ pdev->dip = dip;
+
+ /* map in pci config registers */
+ if (pci_config_setup(dip, &pdev->conf_handle)) {
+ nfp_log( NFP_DBG1, "nfp_attach: pci_config_setup() failed." );
+ goto bailout;
+ }
+
+ /* find out what we have got */
+ vendor= PCI_CONFIG_GET16( pdev->conf_handle, PCI_CONF_VENID );
+ device = PCI_CONFIG_GET16( pdev->conf_handle, PCI_CONF_DEVID );
+ sub_vendor = PCI_CONFIG_GET16( pdev->conf_handle, PCI_CONF_SUBVENID );
+ sub_device = PCI_CONFIG_GET16( pdev->conf_handle, PCI_CONF_SUBSYSID );
+
+ index= 0;
+ while( (cmddev = nfp_drvlist[index++]) != NULL ) {
+ if( cmddev->vendorid == vendor &&
+ cmddev->deviceid == device &&
+ cmddev->sub_vendorid == sub_vendor &&
+ cmddev->sub_deviceid == sub_device )
+ break;
+ }
+ if( !cmddev ) {
+ nfp_log( NFP_DBG1, "nfp_attach: unknonw device." );
+ goto bailout;
+ }
+
+ /* map BARs */
+ for( i=0; i<6; i++ ) {
+ if( cmddev->bar_sizes[i] ) {
+ off_t size;
+ if( ddi_dev_regsize(dip, i+1, &size) != DDI_SUCCESS) {
+ nfp_log( NFP_DBG1, "nfp_attach: ddi_dev_regsize() failed for BAR %d", i );
+ goto bailout;
+ }
+ if( size < (cmddev->bar_sizes[i] & ~NFP_MEMBAR_MASK) ) {
+ nfp_log( NFP_DBG1, "nfp_attach: BAR %d too small %x (%x)", i, size, (cmddev->bar_sizes[i] & ~0xF) );
+ goto bailout;
+ }
+ if (ddi_regs_map_setup(dip, i+1, (caddr_t *)&pdev->common.bar[i],
+ 0, cmddev->bar_sizes[i] & ~NFP_MEMBAR_MASK, &nosw_attr, (ddi_acc_handle_t *)&pdev->common.extra[i] )) {
+ nfp_log( NFP_DBG1, "nfp_attach: ddi_regs_map_setup() failed for BAR %d", i );
+ goto bailout;
+ }
+ nfp_log( NFP_DBG3, "nfp_attach: BAR[%d] mapped to %x (%x)", i, pdev->common.bar[i], size );
+ }
+ }
+
+ pdev->read_buf = NULL;
+ pdev->rd_dma_ok = 0;
+
+ /* attach to minor node */
+ if (ddi_create_minor_node(dip, "nfp", S_IFCHR, instance, (char *)cmddev->name, 0) == DDI_FAILURE) {
+ ddi_remove_minor_node(dip, NULL);
+ nfp_log( NFP_DBG1, "nfp_attach: ddi_create_minor_node() failed." );
+ goto bailout;
+ }
+
+ pdev->wr_ready = 1;
+ pdev->rd_ready = 0;
+ pdev->rd_pending = 0;
+ pdev->rd_outstanding = 0;
+ pdev->busy=0;
+ pdev->cmddev= cmddev;
+
+ ret = pdev->cmddev->create(&pdev->common);
+ if( ret != NFP_SUCCESS) {
+ nfp_log( NFP_DBG1, "nfp_attach: failed to create command device");
+ goto bailout;
+ }
+ pdev->common.dev= pdev;
+
+ if (ddi_intr_hilevel(dip, 0) != 0){
+ nfp_log( NFP_DBG2, "nfp_attach: high-level interrupt");
+ if( ddi_get_iblock_cookie(dip, 0, &pdev->high_iblock_cookie) ) {
+ nfp_log( NFP_DBG1, "nfp_attach: ddi_get_iblock_cookie(high) failed." );
+ goto bailout;
+ }
+ if( ddi_get_iblock_cookie(dip, 0, &pdev->low_iblock_cookie) ) {
+ nfp_log( NFP_DBG1, "nfp_attach: ddi_get_iblock_cookie(low) failed." );
+ goto bailout;
+ }
+ mutex_init(&pdev->high_mutex, NULL, MUTEX_DRIVER,
+ (void *)pdev->high_iblock_cookie);
+ mutex_init(&pdev->low_mutex, NULL, MUTEX_DRIVER,
+ (void *)pdev->low_iblock_cookie);
+ if (ddi_add_intr(dip, 0, NULL,
+ NULL, nfp_isr,
+ (caddr_t)pdev) != DDI_SUCCESS) {
+ nfp_log( NFP_DBG1, "nfp_attach: ddi_add_intr(high) failed." );
+ goto bailout;
+ }
+ if( ddi_get_soft_iblock_cookie(dip, DDI_SOFTINT_HIGH,
+ &pdev->iblock_cookie) ) {
+ nfp_log( NFP_DBG1, "nfp_attach: ddi_get_iblock_cookie(soft) failed." );
+ goto bailout;
+ }
+ mutex_init(&pdev->isr_mutex, NULL, MUTEX_DRIVER,
+ (void *)pdev->iblock_cookie);
+ if (ddi_add_softintr(dip, DDI_SOFTINT_HIGH, &pdev->soft_int_id,
+ &pdev->iblock_cookie, NULL,
+ nfp_soft_isr, (caddr_t)pdev) != DDI_SUCCESS)
+ goto bailout;
+ pdev->high_intr= 1;
+ } else {
+ nfp_log( NFP_DBG2, "nfp_attach: low-level interrupt");
+
+ if (ddi_get_iblock_cookie (dip, 0, &pdev->iblock_cookie)) {
+ nfp_log( NFP_DBG1, "nfp_attach: ddi_get_iblock_cookie() failed." );
+ goto bailout;
+ }
+
+ mutex_init(&pdev->isr_mutex, "nfp isr mutex", MUTEX_DRIVER, (void *)pdev->iblock_cookie);
+
+ if (ddi_add_intr(dip, 0, NULL,
+ (ddi_idevice_cookie_t *)NULL, nfp_isr,
+ (caddr_t)pdev) != DDI_SUCCESS) {
+ nfp_log( NFP_DBG1, "nfp_attach: ddi_add_intr() failed." );
+ goto bailout;
+ }
+ }
+ mutex_init(&pdev->busy_mutex, "nfp busy mutex", MUTEX_DRIVER, NULL );
+ cv_init(&pdev->rd_cv, "nfp read condvar", CV_DRIVER, NULL );
+
+ /* get our bus and slot num */
+ if (ddi_getlongprop (DDI_DEV_T_NONE,
+ pdev->dip, 0, "reg",
+ (caddr_t)&outp, &intres) != DDI_PROP_NOT_FOUND) {
+ nfp_log( NFP_DBG2, "nfp_attach: ddi_getlongprop('reg') ok." );
+ if( intres > 0 ) {
+ nfp_log( NFP_DBG1, "nfp_attach: found PCI nfast bus %x slot %x.",
+ ((*outp)>>16) & 0xff, ((*outp)>>11) & 0x1f );
+ }
+ }
+
+ nfp_log( NFP_DBG2, "nfp_attach: attach succeeded." );
+ return DDI_SUCCESS;
+
+bailout:
+ (void) nfp_release_dev( dip );
+
+ return DDI_FAILURE;
+}
+
+/*-------------------------*/
+/* nfp_detach */
+/*-------------------------*/
+
+/*
+ * When our driver is unloaded, nfp_detach cleans up and frees the resources
+ * we allocated in nfp_attach.
+ */
+static int nfp_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) {
+ if (cmd != DDI_DETACH)
+ return (DDI_FAILURE);
+
+ (void) nfp_release_dev(dip);
+
+ return (DDI_SUCCESS);
+}
+
+/*-------------------------*/
+/* _init */
+/*-------------------------*/
+
+int _init(void) {
+ register int error;
+
+ nfp_log( NFP_DBG2, "_init: entered" );
+
+ if ((error = ddi_soft_state_init(&state_head, sizeof (struct nfp_dev), 1)) != 0) {
+ nfp_log( NFP_DBG1, "_init: soft_state_init() failed" );
+ return (error);
+ }
+
+ if ((error = mod_install(&modlinkage)) != 0) {
+ nfp_log( NFP_DBG1, "_init: mod_install() failed" );
+ ddi_soft_state_fini(&state_head);
+ }
+
+ nfp_log( NFP_DBG2, "_init: leaving" );
+ return (error);
+}
+
+/*-------------------------*/
+/* _info */
+/*-------------------------*/
+
+int _info(struct modinfo *modinfop) {
+ nfp_log( NFP_DBG2, "_info: entered" );
+
+ return (mod_info(&modlinkage, modinfop));
+}
+
+/*-------------------------*/
+/* _fini */
+/*-------------------------*/
+
+int _fini(void) {
+ int status;
+
+ nfp_log( NFP_DBG2, "_fini: entered" );
+
+ if ((status = mod_remove(&modlinkage)) != 0) {
+ nfp_log( NFP_DBG2, "_fini: mod_remove() failed." );
+ return (status);
+ }
+
+ ddi_soft_state_fini(&state_head);
+
+ nfp_log( NFP_DBG2, "_fini: leaving" );
+
+ return (status);
+}
+
diff --git a/usr/src/uts/common/io/nfp/i21285.c b/usr/src/uts/common/io/nfp/i21285.c
new file mode 100644
index 0000000000..f51a09188d
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/i21285.c
@@ -0,0 +1,310 @@
+/*
+
+i21285.c: nCipher PCI HSM intel/digital 21285 command driver
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+
+history
+
+09/10/2001 jsh Original
+
+*/
+
+#include "nfp_common.h"
+#include "nfp_error.h"
+#include "nfp_hostif.h"
+#include "nfp_osif.h"
+#include "i21285.h"
+#include "nfp_cmd.h"
+#include "nfpci.h"
+
+/* create ------------------------------------------------------- */
+
+static nfp_err i21285_create( nfp_cdev *pdev ) {
+ unsigned int tmp32;
+
+ nfp_log( NFP_DBG2, "i21285_create: entered");
+ pdev->cmdctx= pdev; /* set our context to just be a pointer to our nfp_cdev */
+
+ nfp_log( NFP_DBG2, "i21285_create: enable doorbell");
+ if(!pdev->bar[ IOBAR ]) {
+ nfp_log( NFP_DBG1, "i21285_create: null BAR[%d]", IOBAR );
+ return NFP_ENOMEM;
+ }
+ TO_LE32_IO( &tmp32, DOORBELL_ENABLE | POSTLIST_ENABLE);
+ nfp_outl( pdev, IOBAR, I21285_OFFSET_INTERRUPT_MASK, tmp32 );
+
+ return NFP_SUCCESS;
+}
+
+/* stop ------------------------------------------------------- */
+
+static nfp_err i21285_destroy( void * ctx ) {
+ nfp_cdev *pdev;
+ unsigned int tmp32;
+
+ nfp_log( NFP_DBG2, "i21285_destroy: entered");
+
+ pdev= (nfp_cdev *)ctx;
+ if(!pdev) {
+ nfp_log( NFP_DBG1, "i21285_destroy: NULL pdev");
+ return NFP_ENODEV;
+ }
+ if(!pdev->bar[ IOBAR ]) {
+ nfp_log( NFP_DBG1, "i21285_destroy: null BAR[%d]", IOBAR );
+ return NFP_ENOMEM;
+ }
+ TO_LE32_IO( &tmp32, DOORBELL_DISABLE | POSTLIST_DISABLE );
+ nfp_outl( pdev, IOBAR, I21285_OFFSET_INTERRUPT_MASK, tmp32 );
+
+ return NFP_SUCCESS;
+}
+
+/* open ------------------------------------------------------- */
+
+/* ARGSUSED */
+static nfp_err i21285_open( void * ctx ) {
+ nfp_log( NFP_DBG2, "i21285_open: entered");
+
+ return NFP_SUCCESS;
+}
+
+/* close ------------------------------------------------------- */
+
+/* ARGSUSED */
+static nfp_err i21285_close( void * ctx ) {
+ nfp_log( NFP_DBG2, "i21285_close: entered");
+
+ return NFP_SUCCESS;
+}
+
+/* isr ------------------------------------------------------- */
+
+static nfp_err i21285_isr( void *ctx, int *handled ) {
+ nfp_cdev *pdev;
+ unsigned int doorbell;
+ unsigned int tmp32;
+
+ nfp_log( NFP_DBG3, "i21285_isr: entered");
+
+ *handled= 0;
+ pdev= (nfp_cdev *)ctx;
+ if(!pdev) {
+ nfp_log( NFP_DBG1, "i21285_isr: NULL pdev");
+ return NFP_ENODEV;
+ }
+
+ doorbell= nfp_inl( pdev, IOBAR, I21285_OFFSET_DOORBELL);
+ doorbell= FROM_LE32_IO(&doorbell) & 0xffff;
+ while( doorbell && doorbell != 0xffff) {
+ *handled= 1;
+ /* service interrupts */
+ if( doorbell & (NFAST_INT_DEVICE_WRITE_OK | NFAST_INT_DEVICE_WRITE_FAILED)) {
+ TO_LE32_IO( &tmp32, NFAST_INT_DEVICE_WRITE_OK | NFAST_INT_DEVICE_WRITE_FAILED);
+ nfp_outl( pdev, IOBAR, I21285_OFFSET_DOORBELL, tmp32 );
+
+ nfp_log(NFP_DBG2, "i21285_isr: write done interrupt, ok = %d.", doorbell & NFAST_INT_DEVICE_WRITE_OK ? 1 : 0 );
+
+ nfp_write_complete(pdev->dev, doorbell & NFAST_INT_DEVICE_WRITE_OK ? 1 : 0 );
+ }
+
+ if( doorbell & (NFAST_INT_DEVICE_READ_OK | NFAST_INT_DEVICE_READ_FAILED)) {
+ TO_LE32_IO( &tmp32, NFAST_INT_DEVICE_READ_OK | NFAST_INT_DEVICE_READ_FAILED );
+ nfp_outl( pdev, IOBAR, I21285_OFFSET_DOORBELL, tmp32 );
+
+ nfp_log(NFP_DBG2, "i21285_isr: read ack interrupt, ok = %d.", doorbell & NFAST_INT_DEVICE_READ_OK ? 1 : 0 );
+ nfp_read_complete( pdev->dev, doorbell & NFAST_INT_DEVICE_READ_OK ? 1 : 0);
+ }
+
+ if( doorbell & ~(NFAST_INT_DEVICE_READ_OK | NFAST_INT_DEVICE_READ_FAILED |
+ NFAST_INT_DEVICE_WRITE_OK | NFAST_INT_DEVICE_WRITE_FAILED)) {
+ nfp_log( NFP_DBG1, "i21285_isr: unexpected interrupt %x", doorbell );
+ TO_LE32_IO( &tmp32, 0xffff & doorbell );
+ nfp_outl( pdev, IOBAR, I21285_OFFSET_DOORBELL, tmp32 );
+ }
+ doorbell= nfp_inl( pdev, IOBAR, I21285_OFFSET_DOORBELL);
+ doorbell= FROM_LE32_IO(&doorbell) & 0xffff;
+ }
+ return 0;
+}
+
+/* write ------------------------------------------------------- */
+
+static nfp_err i21285_write( const char *block, int len, void *ctx ) {
+ nfp_cdev *cdev;
+ unsigned int hdr[2];
+ nfp_err ne;
+ unsigned int tmp32;
+
+ nfp_log( NFP_DBG2, "i21285_write: entered");
+
+ cdev= (nfp_cdev *)ctx;
+ if(!cdev) {
+ nfp_log( NFP_DBG1, "i21285_write: NULL pdev");
+ return NFP_ENODEV;
+ }
+
+ nfp_log(NFP_DBG2, "i21285_write: pdev->bar[ MEMBAR ]= %x\n", cdev->bar[ MEMBAR ]);
+ nfp_log(NFP_DBG2, "i21285_write: pdev->bar[ IOBAR ]= %x\n", cdev->bar[ IOBAR ]);
+ if(!cdev->bar[ MEMBAR ]) {
+ nfp_log( NFP_DBG1, "i21285_write: null BAR[%d]", MEMBAR );
+ return NFP_ENOMEM;
+ }
+ ne= nfp_copy_from_user_to_dev( cdev, MEMBAR, NFPCI_JOBS_WR_DATA, block, len);
+ if (ne) {
+ nfp_log( NFP_DBG1, "i21285_write: nfp_copy_from_user_to_dev failed");
+ return ne;
+ }
+ TO_LE32_MEM(&hdr[0], NFPCI_JOB_CONTROL);
+ TO_LE32_MEM(&hdr[1], len);
+
+ ne= nfp_copy_to_dev( cdev, MEMBAR, NFPCI_JOBS_WR_CONTROL, (const char *)hdr, 8);
+ if (ne) {
+ nfp_log( NFP_DBG1, "i21285_write: nfp_copy_to_dev failed");
+ return ne;
+ }
+
+ ne= nfp_copy_from_dev( cdev, MEMBAR, NFPCI_JOBS_WR_LENGTH, (char *)hdr, 4);
+ if (ne) {
+ nfp_log( NFP_DBG1, "i21285_write: nfp_copy_from_dev failed");
+ return ne;
+ }
+
+ TO_LE32_MEM( &tmp32, len );
+ if ( hdr[0] != tmp32 ) {
+ nfp_log( NFP_DBG1, "i21285_write: length not written");
+ return NFP_EIO;
+ }
+
+ TO_LE32_IO( &tmp32, NFAST_INT_HOST_WRITE_REQUEST);
+
+ nfp_outl( cdev, IOBAR, I21285_OFFSET_DOORBELL, tmp32 );
+
+ nfp_log( NFP_DBG2, "i21285_write: done");
+ return NFP_SUCCESS;
+}
+
+/* read ------------------------------------------------------- */
+
+static nfp_err i21285_read( char *block, int len, void *ctx, int *rcount) {
+ nfp_cdev *cdev;
+ nfp_err ne;
+ int count;
+
+ nfp_log( NFP_DBG2, "i21285_read: entered, len %d", len);
+ *rcount= 0;
+
+ cdev= (nfp_cdev *)ctx;
+ if(!cdev) {
+ nfp_log( NFP_DBG1, "i21285_read: NULL pdev");
+ return NFP_ENODEV;
+ }
+
+ if(!cdev->bar[ MEMBAR ]) {
+ nfp_log( NFP_DBG1, "i21285_read: null BAR[%d]", MEMBAR );
+ return NFP_ENOMEM;
+ }
+ ne= nfp_copy_from_dev( cdev, MEMBAR, NFPCI_JOBS_RD_LENGTH, (char *)&count, 4);
+ if(ne) {
+ nfp_log( NFP_DBG1, "i21285_read: nfp_copy_from_dev failed.");
+ return ne;
+ }
+ count= FROM_LE32_MEM(&count);
+ if(count<0 || count>len) {
+ nfp_log( NFP_DBG1, "i21285_read: bad byte count (%d) from device", count);
+ return NFP_EIO;
+ }
+ ne= nfp_copy_to_user_from_dev( cdev, MEMBAR, NFPCI_JOBS_RD_DATA, block, count);
+ if( ne ) {
+ nfp_log( NFP_DBG1, "i21285_read: nfp_copy_to_user_from_dev failed.");
+ return ne;
+ }
+ nfp_log( NFP_DBG2, "i21285_read: done");
+ *rcount= count;
+ return NFP_SUCCESS;
+}
+
+/* chupdate ------------------------------------------------------- */
+
+/* ARGSUSED */
+static nfp_err i21285_chupdate( char *data, int len, void *ctx ) {
+ nfp_log( NFP_DBG1, "i21285_chupdate: NYI");
+ return NFP_SUCCESS;
+}
+
+/* ensure reading -------------------------------------------------- */
+
+static nfp_err i21285_ensure_reading( unsigned int addr, int len, void *ctx ) {
+ nfp_cdev *cdev;
+ unsigned int hdr[2];
+ unsigned int tmp32;
+ nfp_err ne;
+
+ nfp_log( NFP_DBG2, "i21285_ensure_reading: entered");
+
+ if(addr) {
+ nfp_log( NFP_DBG2, "i21285_ensure_reading: bad addr");
+ return -NFP_EINVAL;
+ }
+
+ cdev= (nfp_cdev *)ctx;
+ if(!cdev) {
+ nfp_log( NFP_DBG1, "i21285_ensure_reading: NULL pdev");
+ return NFP_ENODEV;
+ }
+
+ if(!cdev->bar[ MEMBAR ]) {
+ nfp_log( NFP_DBG1, "i21285_ensure_reading: null BAR[%d]", MEMBAR );
+ return NFP_ENXIO;
+ }
+ nfp_log( NFP_DBG3, "i21285_ensure_reading: pdev->bar[ MEMBAR ]= %x", cdev->bar[ MEMBAR ]);
+ nfp_log( NFP_DBG3, "i21285_ensure_reading: pdev->bar[ IOBAR ]= %x", cdev->bar[ IOBAR ]);
+ TO_LE32_MEM( &hdr[0], NFPCI_JOB_CONTROL);
+ TO_LE32_MEM( &hdr[1], len);
+ ne= nfp_copy_to_dev( cdev, MEMBAR, NFPCI_JOBS_RD_CONTROL, (const char *)hdr, 8);
+ if (ne) {
+ nfp_log( NFP_DBG1, "i21285_ensure_reading: nfp_copy_to_dev failed");
+ return ne;
+ }
+ ne= nfp_copy_from_dev( cdev, MEMBAR, NFPCI_JOBS_RD_LENGTH, (char *)hdr, 4);
+ if (ne) {
+ nfp_log( NFP_DBG1, "i21285_ensure_reading: nfp_copy_from_dev failed");
+ return ne;
+ }
+ TO_LE32_MEM( &tmp32, len );
+ if ( hdr[0] != tmp32 ) {
+ nfp_log( NFP_DBG1, "i21285_ensure_reading: len not written");
+ return NFP_EIO;
+ };
+ TO_LE32_IO( &tmp32, NFAST_INT_HOST_READ_REQUEST );
+ nfp_outl( cdev, IOBAR, I21285_OFFSET_DOORBELL, tmp32 );
+
+ return NFP_SUCCESS;
+}
+
+/* command device structure ------------------------------------- */
+
+
+const nfpcmd_dev i21285_cmddev = {
+ "nCipher Gen 1 PCI",
+ PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_DEC_21285,
+ PCI_VENDOR_ID_NCIPHER, PCI_DEVICE_ID_NFAST_GEN1,
+ { 0, IOSIZE | PCI_BASE_ADDRESS_SPACE_IO, NFPCI_RAM_MINSIZE, 0, 0, 0 },
+ NFP_CMD_FLG_NEED_IOBUF,
+ i21285_create,
+ i21285_destroy,
+ i21285_open,
+ i21285_close,
+ i21285_isr,
+ i21285_write,
+ i21285_read,
+ i21285_chupdate,
+ i21285_ensure_reading,
+ 0, /* no debug */
+};
+
diff --git a/usr/src/uts/common/io/nfp/i21285.h b/usr/src/uts/common/io/nfp/i21285.h
new file mode 100644
index 0000000000..4ea1d853ec
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/i21285.h
@@ -0,0 +1,43 @@
+/*
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+*/
+
+#ifndef NFP_I21285_H
+#define NFP_I21285_H
+
+#ifndef PCI_VENDOR_ID_DEC
+#define PCI_VENDOR_ID_DEC 0x1011
+#endif
+#ifndef PCI_DEVICE_ID_DEC_21285
+#define PCI_DEVICE_ID_DEC_21285 0x1065
+#endif
+#ifndef PCI_VENDOR_ID_NCIPHER
+#define PCI_VENDOR_ID_NCIPHER 0x0100
+#endif
+
+#ifndef PCI_DEVICE_ID_NFAST_GEN1
+#define PCI_DEVICE_ID_NFAST_GEN1 0x0100
+#endif
+
+#define I21285_OFFSET_DOORBELL 0x60
+#define I21285_OFFSET_INTERRUPT_MASK 0x34
+
+#define DOORBELL_ENABLE 0x0
+#define DOORBELL_DISABLE 0x4
+
+#define POSTLIST_ENABLE 0x0
+#define POSTLIST_DISABLE 0x8
+
+#define IOBAR 1
+#define MEMBAR 2
+
+#define IOSIZE 0x80
+#define MEMSIZE 0x100000
+
+#endif
diff --git a/usr/src/uts/common/io/nfp/i21555.c b/usr/src/uts/common/io/nfp/i21555.c
new file mode 100644
index 0000000000..82024dc800
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/i21555.c
@@ -0,0 +1,423 @@
+/*
+
+i21555.c: nCipher PCI HSM intel 21555 command driver
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+history
+
+09/10/2001 jsh Original
+
+*/
+
+#include "nfp_common.h"
+#include "nfp_error.h"
+#include "nfp_hostif.h"
+#include "nfp_osif.h"
+#include "i21555.h"
+#include "nfp_cmd.h"
+#include "nfpci.h"
+
+/* started ------------------------------------------------------
+ *
+ * Check that device is ready to talk, by checking that
+ * the i21555 has master enabled on its secondary interface
+ */
+
+static nfp_err i21555_started( nfp_cdev *pdev ) {
+ unsigned int tmp32;
+#ifdef CONFIGSPACE_DEBUG
+ unsigned int reg32[64];
+ int i;
+#endif
+ nfp_err ne;
+
+ nfp_log( NFP_DBG2, "i21555_started: entered");
+
+#ifdef CONFIGSPACE_DEBUG
+ /* Suck up all the registers */
+ for (i=0; i < 64; i++) {
+ ne = nfp_config_inl( pdev, i*4, &reg32[i] );
+ }
+
+ for (i=0; i < 16; i++) {
+ int j = i * 4;
+ nfp_log( NFP_DBG3, "i21555 config reg %2x: %08x %08x %08x %08x", j*4,
+ reg32[j], reg32[j+1], reg32[j+2], reg32[j+3]);
+ }
+#endif
+
+ ne = nfp_config_inl( pdev, I21555_CFG_SEC_CMD_STATUS, &tmp32 );
+ if (ne) {
+ /* succeed if PCI config reads are not implemented */
+ if (ne == NFP_EUNKNOWN)
+ return NFP_SUCCESS;
+ nfp_log( NFP_DBG1, "i21555_started: nfp_config_inl failed");
+ return ne;
+ }
+
+ tmp32= FROM_LE32_IO(&tmp32) & 0xffff;
+
+ if ( tmp32 & CFG_CMD_MASTER ) {
+ nfp_log( NFP_DBG3, "i21555_started: Yes %x", tmp32);
+ return NFP_SUCCESS;
+ } else {
+ nfp_log( NFP_DBG1, "i21555_started: device not started yet %x", tmp32);
+ return NFP_ESTARTING;
+ }
+}
+
+/* create ------------------------------------------------------- */
+
+static nfp_err i21555_create( nfp_cdev *pdev ) {
+ unsigned int tmp32;
+
+ nfp_log( NFP_DBG2, "i21555_create: entered");
+ pdev->cmdctx= pdev; /* set our context to just be a pointer to our nfp_cdev */
+
+ if(!pdev->bar[ IOBAR ]) {
+ nfp_log( NFP_DBG1, "i21555_create: null BAR[%d]", IOBAR );
+ return NFP_ENOMEM;
+ }
+ nfp_log( NFP_DBG2, "i21555_create: enable doorbell");
+ TO_LE32_IO( &tmp32, I21555_DOORBELL_PRI_ENABLE );
+ nfp_outl( pdev, IOBAR, I21555_OFFSET_DOORBELL_PRI_SET_MASK, tmp32 );
+ nfp_outl( pdev, IOBAR, I21555_OFFSET_DOORBELL_PRI_CLEAR_MASK, tmp32 );
+ return NFP_SUCCESS;
+}
+
+/* stop ------------------------------------------------------- */
+
+static nfp_err i21555_destroy( void * ctx ) {
+ nfp_cdev *pdev;
+ unsigned int tmp32;
+
+ nfp_log( NFP_DBG2, "i21555_destroy: entered");
+
+ pdev= (nfp_cdev *)ctx;
+ if(!pdev) {
+ nfp_log( NFP_DBG1, "i21555_destroy: NULL pdev");
+ return NFP_ENODEV;
+ }
+ if(!pdev->bar[ IOBAR ]) {
+ nfp_log( NFP_DBG1, "i21555_destroy: null BAR[%d]", IOBAR );
+ return NFP_ENOMEM;
+ }
+ TO_LE32_IO( &tmp32, I21555_DOORBELL_PRI_DISABLE );
+ nfp_outl( pdev, IOBAR, I21555_OFFSET_DOORBELL_PRI_SET_MASK, tmp32 );
+ nfp_outl( pdev, IOBAR, I21555_OFFSET_DOORBELL_PRI_CLEAR_MASK, tmp32 );
+
+ return NFP_SUCCESS;
+}
+
+/* open ------------------------------------------------------- */
+
+/* ARGSUSED */
+static nfp_err i21555_open( void * ctx ) {
+
+ nfp_log( NFP_DBG2, "i21555_open: entered");
+
+ return NFP_SUCCESS;
+}
+
+/* close ------------------------------------------------------- */
+
+/* ARGSUSED */
+static nfp_err i21555_close( void * ctx ) {
+ nfp_log( NFP_DBG2, "i21555_close: entered");
+
+ return NFP_SUCCESS;
+}
+
+/* isr ------------------------------------------------------- */
+
+static nfp_err i21555_isr( void *ctx, int *handled ) {
+ nfp_cdev *pdev;
+ nfp_err ne;
+ unsigned short doorbell;
+ unsigned short tmp16;
+
+ nfp_log( NFP_DBG3, "i21555_isr: entered");
+
+ *handled= 0;
+ pdev= (nfp_cdev *)ctx;
+ if(!pdev) {
+ nfp_log( NFP_DBG1, "i21555_isr: NULL pdev");
+ return NFP_ENODEV;
+ }
+
+ pdev->stats.isr++;
+
+ if(!pdev->bar[ IOBAR ]) {
+ nfp_log( NFP_DBG1, "i21555_isr: null BAR[%d]", IOBAR );
+ return NFP_ENOMEM;
+ }
+
+ /* This interrupt may not be from our module, so check that it actually is
+ * us before handling it.
+ */
+ ne = i21555_started( pdev );
+ if (ne) {
+ if (ne != NFP_ESTARTING) {
+ nfp_log( NFP_DBG1, "i21555_isr: i21555_started failed");
+ }
+ return ne;
+ }
+
+ doorbell= nfp_inw( pdev, IOBAR, I21555_OFFSET_DOORBELL_PRI_SET);
+ doorbell= FROM_LE16_IO(&doorbell);
+ while( doorbell && doorbell != 0xffff) {
+ *handled= 1;
+ /* service interrupts */
+ if( doorbell & (NFAST_INT_DEVICE_WRITE_OK | NFAST_INT_DEVICE_WRITE_FAILED)) {
+ pdev->stats.isr_write++;
+ TO_LE16_IO(&tmp16,NFAST_INT_DEVICE_WRITE_OK | NFAST_INT_DEVICE_WRITE_FAILED);
+ nfp_outw( pdev, IOBAR, I21555_OFFSET_DOORBELL_PRI_CLEAR, tmp16 );
+
+ nfp_log( NFP_DBG2, "i21555_isr: write done interrupt, ok = %d.", doorbell & NFAST_INT_DEVICE_WRITE_OK ? 1 : 0 );
+
+ nfp_write_complete(pdev->dev, doorbell & NFAST_INT_DEVICE_WRITE_OK ? 1 : 0 );
+ }
+
+ if( doorbell & (NFAST_INT_DEVICE_READ_OK | NFAST_INT_DEVICE_READ_FAILED)) {
+ pdev->stats.isr_read++;
+ TO_LE16_IO(&tmp16,NFAST_INT_DEVICE_READ_OK | NFAST_INT_DEVICE_READ_FAILED);
+ nfp_outw( pdev, IOBAR, I21555_OFFSET_DOORBELL_PRI_CLEAR, tmp16 );
+
+ nfp_log( NFP_DBG2, "i21555_isr: read ack interrupt, ok = %d.", doorbell & NFAST_INT_DEVICE_READ_OK ? 1 : 0 );
+ nfp_read_complete( pdev->dev, doorbell & NFAST_INT_DEVICE_READ_OK ? 1 : 0);
+ }
+
+ if( doorbell & ~(NFAST_INT_DEVICE_READ_OK | NFAST_INT_DEVICE_READ_FAILED |
+ NFAST_INT_DEVICE_WRITE_OK | NFAST_INT_DEVICE_WRITE_FAILED)) {
+ TO_LE16_IO(&tmp16,doorbell);
+ nfp_outw( pdev, IOBAR, I21555_OFFSET_DOORBELL_PRI_CLEAR, tmp16 );
+ nfp_log( NFP_DBG1, "i21555_isr: unexpected interrupt %x", doorbell );
+ }
+ doorbell= nfp_inw( pdev, IOBAR, I21555_OFFSET_DOORBELL_PRI_SET);
+ doorbell= FROM_LE16_IO(&doorbell);
+ }
+ nfp_log( NFP_DBG3, "i21555_isr: exiting");
+ return 0;
+}
+
+/* write ------------------------------------------------------- */
+
+static nfp_err i21555_write( const char *block, int len, void *ctx) {
+ nfp_cdev *cdev;
+ unsigned int hdr[2];
+ nfp_err ne;
+ unsigned short tmp16;
+ unsigned int tmp32;
+
+ nfp_log( NFP_DBG2, "i21555_write: entered");
+
+ cdev= (nfp_cdev *)ctx;
+ if(!cdev) {
+ nfp_log( NFP_DBG1, "i21555_write: NULL cdev");
+ return NFP_ENODEV;
+ }
+
+ cdev->stats.write_fail++;
+
+ if(!cdev->bar[ IOBAR ]) {
+ nfp_log( NFP_DBG1, "i21555_write: null BAR[%d]", IOBAR );
+ return NFP_ENOMEM;
+ }
+
+ ne = i21555_started( cdev );
+ if (ne) {
+ if (ne != NFP_ESTARTING) {
+ nfp_log( NFP_DBG1, "i21555_write: i21555_started failed");
+ }
+ return ne;
+ }
+
+ nfp_log( NFP_DBG3, "i21555_write: cdev->bar[ MEMBAR ]= %x", cdev->bar[ MEMBAR ]);
+ nfp_log( NFP_DBG3, "i21555_write: cdev->bar[ IOBAR ]= %x", cdev->bar[ IOBAR ]);
+ nfp_log( NFP_DBG3, "i21555_write: block len %d", len );
+ ne= nfp_copy_from_user_to_dev( cdev, MEMBAR, NFPCI_JOBS_WR_DATA, block, len);
+ if (ne) {
+ nfp_log( NFP_DBG1, "i21555_write: nfp_copy_from_user_to_dev failed");
+ return ne;
+ }
+ TO_LE32_MEM(&hdr[0], NFPCI_JOB_CONTROL);
+ TO_LE32_MEM(&hdr[1], len);
+ ne= nfp_copy_to_dev( cdev, MEMBAR, NFPCI_JOBS_WR_CONTROL, (const char *)hdr, 8);
+ if (ne) {
+ nfp_log( NFP_DBG1, "i21555_write: nfp_copy_to_dev failed");
+ return ne;
+ }
+
+ ne= nfp_copy_from_dev( cdev, MEMBAR, NFPCI_JOBS_WR_LENGTH, (char *)hdr, 4);
+ if (ne) {
+ nfp_log( NFP_DBG1, "i21555_write: nfp_copy_from_dev failed");
+ return ne;
+ }
+
+ TO_LE32_MEM(&tmp32, len);
+ if ( hdr[0] != tmp32 ) {
+ nfp_log( NFP_DBG1, "i21555_write: length not written");
+ return NFP_EIO;
+ }
+ TO_LE16_IO(&tmp16, NFAST_INT_HOST_WRITE_REQUEST >> 16);
+ nfp_outw( cdev, IOBAR, I21555_OFFSET_DOORBELL_SEC_SET, tmp16);
+
+ cdev->stats.write_fail--;
+ cdev->stats.write_block++;
+ cdev->stats.write_byte += len;
+
+ nfp_log( NFP_DBG2, "i21555_write: done");
+ return NFP_SUCCESS;
+}
+
+/* read ------------------------------------------------------- */
+
+static nfp_err i21555_read( char *block, int len, void *ctx, int *rcount) {
+ nfp_cdev *cdev;
+ nfp_err ne;
+ int count;
+
+ nfp_log( NFP_DBG2, "i21555_read: entered");
+ *rcount= 0;
+
+ cdev= (nfp_cdev *)ctx;
+ if(!cdev) {
+ nfp_log( NFP_DBG1, "i21555_read: NULL pdev");
+ return NFP_ENODEV;
+ }
+
+ cdev->stats.read_fail++;
+
+ if(!cdev->bar[ IOBAR ]) {
+ nfp_log( NFP_DBG1, "i21555_read: null BAR[%d]", IOBAR );
+ return NFP_ENOMEM;
+ }
+
+ ne= nfp_copy_from_dev( cdev, MEMBAR, NFPCI_JOBS_RD_LENGTH, (char *)&count, 4);
+ if (ne) {
+ nfp_log( NFP_DBG1, "i21555_read: nfp_copy_from_dev failed.");
+ return ne;
+ }
+ count= FROM_LE32_MEM(&count);
+ if(count<0 || count>len) {
+ nfp_log( NFP_DBG1, "i21555_read: bad byte count (%d) from device", count);
+ return NFP_EIO;
+ }
+ ne= nfp_copy_to_user_from_dev( cdev, MEMBAR, NFPCI_JOBS_RD_DATA, block, count);
+ if (ne) {
+ nfp_log( NFP_DBG1, "i21555_read: nfp_copy_to_user failed.");
+ return ne;
+ }
+ nfp_log( NFP_DBG2, "i21555_read: done");
+ *rcount= count;
+ cdev->stats.read_fail--;
+ cdev->stats.read_block++;
+ cdev->stats.read_byte += len;
+ return NFP_SUCCESS;
+}
+
+/* chupdate ------------------------------------------------------- */
+
+/* ARGSUSED */
+static nfp_err i21555_chupdate( char *data, int len, void *ctx ) {
+ nfp_log( NFP_DBG1, "i21555_chupdate: NYI");
+ return NFP_SUCCESS;
+}
+
+/* ensure reading -------------------------------------------------- */
+
+static nfp_err i21555_ensure_reading( unsigned int addr, int len, void *ctx ) {
+ nfp_cdev *cdev;
+ unsigned int hdr[3];
+ unsigned short tmp16;
+ unsigned int tmp32;
+ nfp_err ne;
+ int hdr_len;
+
+ nfp_log( NFP_DBG2, "i21555_ensure_reading: entered");
+
+ cdev= (nfp_cdev *)ctx;
+ if(!cdev) {
+ nfp_log( NFP_DBG1, "i21555_ensure_reading: NULL pdev");
+ return NFP_ENODEV;
+ }
+
+ cdev->stats.ensure_fail++;
+
+ if(!cdev->bar[ IOBAR ]) {
+ nfp_log( NFP_DBG1, "i21555_ensure_reading: null BAR[%d]", IOBAR );
+ return NFP_ENOMEM;
+ }
+
+ ne = i21555_started( cdev );
+ if (ne) {
+ if (ne != NFP_ESTARTING) {
+ nfp_log( NFP_DBG1, "i21555_ensure_reading: i21555_started failed");
+ }
+ return ne;
+ }
+
+ nfp_log( NFP_DBG3, "i21555_ensure_reading: pdev->bar[ MEMBAR ]= %x", cdev->bar[ MEMBAR ]);
+ nfp_log( NFP_DBG3, "i21555_ensure_reading: pdev->bar[ IOBAR ]= %x", cdev->bar[ IOBAR ]);
+ if(addr) {
+ nfp_log( NFP_DBG3, "i21555_ensure_reading: new format, addr %x", addr);
+ TO_LE32_MEM(&hdr[0], NFPCI_JOB_CONTROL_PCI_PUSH);
+ TO_LE32_MEM(&hdr[1], len);
+ TO_LE32_MEM(&hdr[2], addr);
+ hdr_len= 12;
+ } else {
+ TO_LE32_MEM(&hdr[0], NFPCI_JOB_CONTROL);
+ TO_LE32_MEM(&hdr[1], len);
+ hdr_len= 8;
+ }
+ ne= nfp_copy_to_dev( cdev, MEMBAR, NFPCI_JOBS_RD_CONTROL, (const char *)hdr, hdr_len);
+ if (ne) {
+ nfp_log( NFP_DBG1, "i21555_ensure_reading: nfp_copy_to_dev failed");
+ return ne;
+ }
+
+ ne= nfp_copy_from_dev( cdev, MEMBAR, NFPCI_JOBS_RD_LENGTH, (char *)hdr, 4);
+ if (ne) {
+ nfp_log( NFP_DBG1, "i21555_ensure_reading: nfp_copy_from_dev failed");
+ return ne;
+ }
+
+ TO_LE32_MEM(&tmp32, len);
+
+ if ( hdr[0] != tmp32 ) {
+ nfp_log( NFP_DBG1, "i21555_ensure_reading: len not written");
+ return NFP_EIO;
+ }
+ TO_LE16_IO( &tmp16, NFAST_INT_HOST_READ_REQUEST >> 16);
+ nfp_outw( cdev, IOBAR, I21555_OFFSET_DOORBELL_SEC_SET, tmp16);
+
+ cdev->stats.ensure_fail--;
+ cdev->stats.ensure++;
+
+ return NFP_SUCCESS;
+}
+
+/* command device structure ------------------------------------- */
+
+const nfpcmd_dev i21555_cmddev = {
+ "nCipher Gen 2 PCI",
+ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_21555,
+ PCI_VENDOR_ID_NCIPHER, PCI_SUBSYSTEM_ID_NFAST_REV1,
+ { 0, IOSIZE | PCI_BASE_ADDRESS_SPACE_IO, NFPCI_RAM_MINSIZE_JOBS, 0, 0, 0 },
+ NFP_CMD_FLG_NEED_IOBUF,
+ i21555_create,
+ i21555_destroy,
+ i21555_open,
+ i21555_close,
+ i21555_isr,
+ i21555_write,
+ i21555_read,
+ i21555_chupdate,
+ i21555_ensure_reading,
+ i21555_debug,
+};
diff --git a/usr/src/uts/common/io/nfp/i21555.h b/usr/src/uts/common/io/nfp/i21555.h
new file mode 100644
index 0000000000..d8f3965938
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/i21555.h
@@ -0,0 +1,51 @@
+/*
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+*/
+
+#ifndef I21555_H
+#define I21555_H
+
+#ifndef PCI_VENDOR_ID_INTEL
+#define PCI_VENDOR_ID_INTEL 0x8086
+#endif
+
+#ifndef PCI_DEVICE_ID_INTEL_21555
+#define PCI_DEVICE_ID_INTEL_21555 0xb555
+#endif
+
+#ifndef PCI_VENDOR_ID_NCIPHER
+#define PCI_VENDOR_ID_NCIPHER 0x0100
+#endif
+
+#ifndef PCI_SUBSYSTEM_ID_NFAST_REV1
+#define PCI_SUBSYSTEM_ID_NFAST_REV1 0x0100
+#endif
+
+#define I21555_OFFSET_DOORBELL_PRI_SET 0x9C
+#define I21555_OFFSET_DOORBELL_SEC_SET 0x9E
+#define I21555_OFFSET_DOORBELL_PRI_CLEAR 0x98
+
+#define I21555_OFFSET_DOORBELL_PRI_SET_MASK 0xA4
+#define I21555_OFFSET_DOORBELL_PRI_CLEAR_MASK 0xA0
+
+#define I21555_DOORBELL_PRI_ENABLE 0x0000
+#define I21555_DOORBELL_PRI_DISABLE 0xFFFF
+
+#define I21555_CFG_SEC_CMD_STATUS 0x44
+
+#define CFG_CMD_MASTER 0x0004
+
+#define IOBAR 1
+#define MEMBAR 2
+
+#define IOSIZE 0x100
+
+extern nfp_err i21555_debug( int cmd, void *ctx );
+
+#endif
diff --git a/usr/src/uts/common/io/nfp/i21555d.c b/usr/src/uts/common/io/nfp/i21555d.c
new file mode 100644
index 0000000000..183ace8275
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/i21555d.c
@@ -0,0 +1,28 @@
+/*
+
+i21555d.c: nCipher PCI HSM intel 21555 debug ioctl
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+
+history
+
+15/05/2002 jsh Original, does nothing
+
+*/
+
+#include "nfp_common.h"
+#include "nfp_error.h"
+#include "nfp_osif.h"
+#include "i21555.h"
+
+/* ARGSUSED */
+nfp_err i21555_debug( int cmd, void *ctx) {
+ nfp_log( NFP_DBG1, "i21555_debug: entered");
+
+ return NFP_EUNKNOWN;
+}
diff --git a/usr/src/uts/common/io/nfp/nfdev-common.h b/usr/src/uts/common/io/nfp/nfdev-common.h
new file mode 100644
index 0000000000..8a97bf2c63
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/nfdev-common.h
@@ -0,0 +1,141 @@
+/*
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+*/
+/** \file nfdev-common.h
+ *
+ * \brief nFast device driver (not generic SCSI) ioctl struct definition file
+ * include NFDEV-$(system) for ioctl number definitions
+ *
+ * 1998.07.13 jsh Started
+ *
+ *
+ */
+
+#ifndef NFDEV_COMMON_H
+#define NFDEV_COMMON_H
+
+/**
+ * Result of the ENQUIRY ioctl.
+ */
+typedef struct nfdev_enquiry_str {
+ unsigned int busno; /**< Which bus is the PCI device on. */
+ unsigned char slotno; /**< Which slot is the PCI device in. */
+ unsigned char reserved[3]; /**< for consistant struct alignment */
+} nfdev_enquiry_str;
+
+/**
+ * Result of the STATS ioctl.
+ */
+typedef struct nfdev_stats_str {
+ unsigned long isr; /**< Count interrupts. */
+ unsigned long isr_read; /**< Count read interrupts. */
+ unsigned long isr_write; /**< Count write interrupts. */
+ unsigned long write_fail; /**< Count write failures. */
+ unsigned long write_block; /**< Count blocks written. */
+ unsigned long write_byte; /**< Count bytes written. */
+ unsigned long read_fail; /**< Count read failures. */
+ unsigned long read_block; /**< Count blocks read. */
+ unsigned long read_byte; /**< Count bytes read. */
+ unsigned long ensure_fail; /**< Count read request failures. */
+ unsigned long ensure; /**< Count read requests. */
+} nfdev_stats_str;
+
+/**
+ * Input to the CONTROL ioctl.
+ */
+typedef struct nfdev_control_str {
+ unsigned control; /**< Control flags. */
+} nfdev_control_str;
+
+/** Control bit indicating host supports MOI control */
+#define NFDEV_CONTROL_HOST_MOI 0x0001
+
+/** Index of control bits indicating desired mode
+ *
+ * Desired mode follows the M_ModuleMode enumeration.
+ */
+#define NFDEV_CONTROL_MODE_SHIFT 1
+
+/** Detect a backwards-compatible control value
+ *
+ * Returns true if the request control value "makes no difference", i.e.
+ * and the failure of an attempt to set it is therefore uninteresting.
+ */
+#define NFDEV_CONTROL_HARMLESS(c) ((c) <= 1)
+
+/**
+ * Result of the STATUS ioctl.
+ */
+typedef struct nfdev_status_str {
+ unsigned status; /**< Status flags. */
+ char error[8]; /**< Error string. */
+} nfdev_status_str;
+
+/** Monitor firmware supports MOI control and error reporting */
+#define NFDEV_STATUS_MONITOR_MOI 0x0001
+
+/** Application firmware supports MOI control and error reporting */
+#define NFDEV_STATUS_APPLICATION_MOI 0x0002
+
+/** Application firmware running and supports error reporting */
+#define NFDEV_STATUS_APPLICATION_RUNNING 0x0004
+
+/** HSM failed
+ *
+ * Consult error[] for additional information.
+ */
+#define NFDEV_STATUS_FAILED 0x0008
+
+/** Standard PCI interface. */
+#define NFDEV_IF_STANDARD 0x01
+
+/** PCI interface with results pushed from device
+ * via DMA.
+ */
+#define NFDEV_IF_PCI_PUSH 0x02
+
+/* platform independant base ioctl numbers */
+
+/** Enquiry ioctl.
+ * \return nfdev_enquiry_str describing the attached device. */
+#define NFDEV_IOCTL_NUM_ENQUIRY 0x01
+/** Channel Update ioctl.
+ * \deprecated */
+#define NFDEV_IOCTL_NUM_CHUPDATE 0x02
+/** Ensure Reading ioctl.
+ * Signal a read request to the device.
+ * \param (unsigned int) Length of data to be read.
+ */
+#define NFDEV_IOCTL_NUM_ENSUREREADING 0x03
+/** Device Count ioctl.
+ * Not implemented for on all platforms.
+ * \return (int) the number of attached devices. */
+#define NFDEV_IOCTL_NUM_DEVCOUNT 0x04
+/** Internal Debug ioctl.
+ * Not implemented in release drivers. */
+#define NFDEV_IOCTL_NUM_DEBUG 0x05
+/** PCI Interface Version ioctl.
+ * \param (int) Maximum PCI interface version
+ * supported by the user of the device. */
+#define NFDEV_IOCTL_NUM_PCI_IFVERS 0x06
+/** Statistics ioctl.
+ * \return nfdev_enquiry_str describing the attached device. */
+#define NFDEV_IOCTL_NUM_STATS 0x07
+
+/** Module control ioctl
+ * \param (nfdev_control_str) Value to write to HSM control register
+ */
+#define NFDEV_IOCTL_NUM_CONTROL 0x08
+
+/** Module state ioctl
+ * \return (nfdev_status_str) Values read from HSM status/error registers
+ */
+#define NFDEV_IOCTL_NUM_STATUS 0x09
+
+#endif
diff --git a/usr/src/uts/common/io/nfp/nfdev-solaris.h b/usr/src/uts/common/io/nfp/nfdev-solaris.h
new file mode 100644
index 0000000000..923b902e46
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/nfdev-solaris.h
@@ -0,0 +1,37 @@
+/*
+
+nfdev-solaris.h: nFast solaris specific device ioctl interface.
+
+(C) Copyright nCipher Corporation Ltd 1998-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+history
+
+14/07/1998 jsh Original
+
+*/
+
+#ifndef NFDEV_SOLARIS_H
+#define NFDEV_SOLARIS_H
+
+#include "nfdev-common.h"
+
+#define NFDEV_IOCTL_TYPE ('n'<<8)
+
+#define NFDEV_IOCTL_ENQUIRY ( NFDEV_IOCTL_TYPE | \
+ NFDEV_IOCTL_NUM_ENQUIRY )
+#define NFDEV_IOCTL_ENSUREREADING ( NFDEV_IOCTL_TYPE | \
+ NFDEV_IOCTL_NUM_ENSUREREADING )
+#define NFDEV_IOCTL_DEVCOUNT ( NFDEV_IOCTL_TYPE | \
+ NFDEV_IOCTL_NUM_DEVCOUNT )
+#define NFDEV_IOCTL_DEBUG ( NFDEV_IOCTL_TYPE | \
+ NFDEV_IOCTL_NUM_DEBUG )
+#define NFDEV_IOCTL_PCI_IFVERS ( NFDEV_IOCTL_TYPE | \
+ NFDEV_IOCTL_NUM_PCI_IFVERS )
+#define NFDEV_IOCTL_STATS ( NFDEV_IOCTL_TYPE | \
+ NFDEV_IOCTL_NUM_STATS )
+
+#endif /* NFDEV_SOLARIS_H */
diff --git a/usr/src/uts/common/io/nfp/nfp.h b/usr/src/uts/common/io/nfp/nfp.h
new file mode 100644
index 0000000000..9704f04fbc
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/nfp.h
@@ -0,0 +1,113 @@
+/*
+
+nfp.h: nFast PCI driver for Solaris 2.5, 2.6 and 2.7
+
+(C) Copyright nCipher Corporation Ltd 2001-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+history
+
+06/05/1998 jsh Original solaris 2.6
+21/05/1999 jsh added support for solaris 2.5
+10/06/1999 jsh added support for solaris 2.7 (32 and 64 bit)
+16/10/2001 jsh moved from nfast to new structure in nfdrv
+
+*/
+
+#ifndef NFP_H
+#define NFP_H
+
+#ifndef _KERNEL
+#error Hello? this is a driver, please compile with -D_KERNEL
+#endif
+
+#if ( CH_KERNELVER < 260 )
+typedef int ioctlptr_t;
+typedef unsigned short uint16_t;
+#define DDI_GET32 ddi_getl
+#define DDI_PUT32 ddi_putl
+#define DDI_GET16 ddi_getw
+#define DDI_PUT16 ddi_putw
+#define DDI_REP_GET8 ddi_rep_getb
+#define DDI_REP_PUT8 ddi_rep_putb
+#define DDI_REP_GET32 ddi_rep_getl
+#define DDI_REP_PUT32 ddi_rep_putl
+#define PCI_CONFIG_GET16 pci_config_getw
+#else /* ( CH_KERNELVER >= 260 ) */
+typedef intptr_t ioctlptr_t;
+#define DDI_GET32 ddi_get32
+#define DDI_PUT32 ddi_put32
+#define DDI_GET16 ddi_get16
+#define DDI_PUT16 ddi_put16
+#define DDI_REP_GET8 ddi_rep_get8
+#define DDI_REP_PUT8 ddi_rep_put8
+#define DDI_REP_GET32 ddi_rep_get32
+#define DDI_REP_PUT32 ddi_rep_put32
+#define PCI_CONFIG_GET16 pci_config_get16
+#endif
+
+#if ( CH_KERNELVER < 270 )
+typedef int nfp_timeout_t;
+#define EXTRA_CB_FLAGS 0
+#define VSXPRINTF(s, n, format, ap) vsprintf (s, format, ap)
+#else /* ( CH_KERNELVER >= 270 ) */
+typedef timeout_id_t nfp_timeout_t;
+#define EXTRA_CB_FLAGS D_64BIT
+#define VSXPRINTF(s, n, format, ap) vsnprintf(s, n, format, ap)
+#endif
+
+typedef struct nfp_dev {
+ int rd_ok;
+ int wr_ok;
+
+ int ifvers;
+
+ /* for PCI push read interface */
+ unsigned char *read_buf;
+ ddi_dma_handle_t read_dma_handle;
+ ddi_dma_cookie_t read_dma_cookie;
+
+ ddi_acc_handle_t acchandle;
+
+ int rd_dma_ok;
+
+ nfp_timeout_t wrtimeout;
+ nfp_timeout_t rdtimeout;
+
+ struct buf *wr_bp;
+ int wr_ready;
+ int rd_ready;
+ int rd_pending;
+ int rd_outstanding;
+ kcondvar_t rd_cv;
+
+ struct pollhead pollhead;
+ dev_info_t *dip;
+
+ ddi_iblock_cookie_t high_iblock_cookie; /* for mutex */
+ ddi_iblock_cookie_t low_iblock_cookie; /* for mutex */
+ kmutex_t high_mutex;
+ kmutex_t low_mutex;
+ int high_intr;
+ ddi_softintr_t soft_int_id;
+ int high_read;
+ int high_write;
+
+ ddi_iblock_cookie_t iblock_cookie; /* for mutex */
+ kmutex_t isr_mutex;
+
+ kmutex_t busy_mutex;
+ int busy;
+
+ ddi_acc_handle_t conf_handle;
+
+ nfp_cdev common;
+ const nfpcmd_dev *cmddev;
+} nfp_dev;
+
+extern struct nfp_dev *nfp_dev_list[];
+
+#endif /* NFP_H */
diff --git a/usr/src/uts/common/io/nfp/nfp_cmd.h b/usr/src/uts/common/io/nfp/nfp_cmd.h
new file mode 100644
index 0000000000..db8af0b2f9
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/nfp_cmd.h
@@ -0,0 +1,68 @@
+/*
+
+nfp_cmd.h: nCipher PCI HSM command driver decalrations
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+history
+
+10/10/2001 jsh Original
+
+*/
+
+#ifndef NFPCMD_H
+#define NFPCMD_H
+
+#include "nfp_hostif.h"
+#include "nfp_error.h"
+
+/* read and write called with userspace buffer */
+
+typedef struct nfpcmd_dev {
+ const char *name;
+ unsigned short vendorid, deviceid,
+ sub_vendorid, sub_deviceid;
+ unsigned int bar_sizes[6]; /* includes IO bit */
+ unsigned int flags;
+ nfp_err (*create)(struct nfp_cdev *pdev);
+ nfp_err (*destroy)(void * ctx);
+ nfp_err (*open)(void * ctx);
+ nfp_err (*close)(void * ctx);
+ nfp_err (*isr)(void *ctx, int *handled);
+ nfp_err (*write_block)( const char *ublock, int len, void *ctx );
+ nfp_err (*read_block)( char *ublock, int len, void *ctx, int *rcount);
+ nfp_err (*channel_update)( char *data, int len, void *ctx);
+ nfp_err (*ensure_reading)( unsigned int addr, int len, void *ctx );
+ nfp_err (*debug)( int cmd, void *ctx);
+} nfpcmd_dev;
+
+#define NFP_CMD_FLG_NEED_IOBUF 0x1
+
+/* list of all supported drivers ---------------------------------------- */
+
+extern const nfpcmd_dev *nfp_drvlist[];
+
+extern const nfpcmd_dev i21285_cmddev;
+extern const nfpcmd_dev i21555_cmddev;
+extern const nfpcmd_dev bcm5820_cmddev;
+
+#ifndef PCI_BASE_ADDRESS_SPACE_IO
+#define PCI_BASE_ADDRESS_SPACE_IO 0x1
+#endif
+
+#define NFP_MAXDEV 16
+
+
+#define NFP_MEMBAR_MASK ~0xf
+#define NFP_IOBAR_MASK ~0x3
+/*
+ This masks off the bottom bits of the PCI_CSR_BAR which signify that the
+ BAR is an IO BAR rather than a MEM BAR
+*/
+
+#endif
+
diff --git a/usr/src/uts/common/io/nfp/nfp_common.h b/usr/src/uts/common/io/nfp/nfp_common.h
new file mode 100644
index 0000000000..d1d2100fea
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/nfp_common.h
@@ -0,0 +1,68 @@
+/*
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+*/
+
+#ifndef NFP_COMMON_H
+#define NFP_COMMON_H
+
+#include <sys/types.h>
+#include <sys/conf.h>
+
+typedef uint32_t UINT32;
+typedef uint8_t BYTE;
+
+#define DEFINE_NFPCI_PACKED_STRUCTS
+#include "nfpci.h"
+#include "nfdev-solaris.h"
+
+typedef int oserr_t;
+
+#if CH_BIGENDIAN
+
+/* Big Endian Sparc */
+
+#define SWP32(x) \
+( (((unsigned int)(x)>>24)&0xff) | (((unsigned int)(x)>>8)&0xff00) | (((unsigned int)(x)<<8)&0xff0000) | (((unsigned int)(x)<<24)&0xff000000) )
+
+#define SWP16(x) ( (((x)>>8)&0xff) | (((x)<<8)&0xff00) )
+
+#define FROM_LE32_IO(x) SWP32(*x)
+#define TO_LE32_IO(x,y) *x=SWP32(y)
+
+#define FROM_LE32_MEM(x) SWP32(*x)
+#define TO_LE32_MEM(x,y) *x=SWP32(y)
+
+#define FROM_LE16_IO(x) SWP16(*x)
+#define TO_LE16_IO(x,y) *x=SWP16(y)
+
+#else
+
+/* Little Endian x86 */
+
+#define FROM_LE32_IO(x) (*x)
+#define TO_LE32_IO(x,y) (*x=y)
+
+#define FROM_LE32_MEM(x) (*x)
+#define TO_LE32_MEM(x,y) (*x=y)
+
+#define FROM_LE16_IO(x) (*x)
+#define TO_LE16_IO(x,y) (*x=y)
+
+#endif /* !CH_BIGENDIAN */
+
+#include <sys/types.h>
+
+#if CH_KERNELVER == 260
+#define nfp_get_lbolt( lbolt, err ) err= drv_getparm( LBOLT, lbolt )
+#else
+#define nfp_get_lbolt( lbolt, err ) { *lbolt= ddi_get_lbolt(); err= 0; }
+#endif
+
+#endif
+
diff --git a/usr/src/uts/common/io/nfp/nfp_error.h b/usr/src/uts/common/io/nfp/nfp_error.h
new file mode 100644
index 0000000000..d64cb78fd4
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/nfp_error.h
@@ -0,0 +1,48 @@
+/*
+
+nfp_error.h: nCipher PCI HSM error handling
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+history
+
+05/12/2001 jsh Original
+
+*/
+
+#ifndef NFP_ERROR_H
+#define NFP_ERROR_H
+
+#include "nfp_common.h"
+
+#define NFP_SUCCESS 0x0
+#define NFP_EFAULT 0x1
+#define NFP_ENOMEM 0x2
+#define NFP_EINVAL 0x3
+#define NFP_EIO 0x4
+#define NFP_ENXIO 0x5
+#define NFP_ENODEV 0x6
+#define NFP_EINTR 0x7
+#define NFP_ESTARTING 0x8
+#define NFP_EAGAIN 0x9
+#define NFP_EUNKNOWN 0x100
+
+typedef int nfp_err;
+
+extern oserr_t nfp_oserr( nfp_err nerr );
+extern nfp_err nfp_error( oserr_t oerr );
+
+#define nfr( x) \
+ return nfp_error((x))
+
+#define nfer(x, fn, msg) \
+ { oserr_t err=(x); if(err) { nfp_log( NFP_DBG1, #fn ": " msg); return nfp_error(err); } }
+
+#define er(x, fn, msg ) \
+{ nfp_err err=(x); if(err) { nfp_log( NFP_DBG1, #fn ": " msg); return err; } }
+
+#endif
diff --git a/usr/src/uts/common/io/nfp/nfp_hostif.h b/usr/src/uts/common/io/nfp/nfp_hostif.h
new file mode 100644
index 0000000000..3e7d8187e5
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/nfp_hostif.h
@@ -0,0 +1,54 @@
+/*
+
+nfp_hostif.h: nCipher PCI HSM host interface declarations
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+history
+
+10/10/2001 jsh Original
+
+*/
+
+#ifndef NFP_HOSTIF_H
+#define NFP_HOSTIF_H
+
+#include "nfdev-common.h"
+
+struct nfp_dev;
+
+/* common device structure */
+
+typedef struct nfp_cdev {
+ unsigned char *bar[6];
+ void *extra[6];
+
+ int busno;
+ int slotno;
+
+ void *cmdctx;
+
+ char *iobuf;
+
+ struct nfp_dev* dev;
+
+ struct nfdev_stats_str stats;
+
+} nfp_cdev;
+
+/* callbacks from command drivers -------------------------------------- */
+
+void nfp_read_complete( struct nfp_dev *pdev, int ok);
+void nfp_write_complete( struct nfp_dev *pdev, int ok);
+
+#define NFP_READ_MAX (8 * 1024)
+#define NFP_READBUF_SIZE (NFP_READ_MAX + 8)
+#define NFP_TIMEOUT_SEC 10
+
+#define NFP_DRVNAME "nCipher nFast PCI driver"
+
+#endif
diff --git a/usr/src/uts/common/io/nfp/nfp_ifvers.c b/usr/src/uts/common/io/nfp/nfp_ifvers.c
new file mode 100644
index 0000000000..807b4f24c5
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/nfp_ifvers.c
@@ -0,0 +1,51 @@
+/*
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+*/
+
+/*
+ * nfp_ifervs.c - common pci interface versioning
+ *
+ * uses:
+ *
+ * int pdev->ifvers
+ * device interface version
+ *
+ * int nfp_ifvers
+ * interface version limit
+ *
+ * int nfp_alloc_pci_push( nfp_dev *pdev )
+ * allocates resources needed for PCI Push,
+ * if not already allocated, and return True if successful
+ *
+ * void nfp_free_pci_push( nfp_dev *pdev ) {
+ * frees any resources allocated to PCI Push
+ */
+
+void nfp_set_ifvers( nfp_dev *pdev, int vers ) {
+ if( nfp_ifvers != 0 && vers > nfp_ifvers ) {
+ nfp_log( NFP_DBG2,
+ "nfp_set_ifvers: can't set ifvers %d"
+ " as nfp_ifvers wants max ifvers %d",
+ vers, nfp_ifvers);
+ return;
+ }
+ if( vers >= NFDEV_IF_PCI_PUSH ) {
+ if(!nfp_alloc_pci_push(pdev)) {
+ nfp_log( NFP_DBG1,
+ "nfp_set_ifvers: can't set ifvers %d"
+ " as resources not available",
+ vers);
+ return;
+ }
+ } else {
+ nfp_free_pci_push(pdev);
+ }
+ pdev->ifvers= vers;
+ nfp_log( NFP_DBG3, "nfp_set_ifvers: setting ifvers %d", vers);
+}
diff --git a/usr/src/uts/common/io/nfp/nfp_osif.h b/usr/src/uts/common/io/nfp/nfp_osif.h
new file mode 100644
index 0000000000..17ffe469ce
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/nfp_osif.h
@@ -0,0 +1,105 @@
+/*
+
+nfp_osif.h: nCipher PCI HSM OS interface declarations
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+history
+
+10/10/2001 jsh Original
+
+*/
+
+#ifndef NFP_OSIF_H
+#define NFP_OSIF_H
+
+#include "nfp_hostif.h"
+#include "nfp_error.h"
+
+/* general typedefs ----------------------------------------------- */
+
+typedef volatile unsigned int reg32;
+typedef volatile unsigned short reg16;
+typedef volatile unsigned char reg8;
+
+/* sempaphores, mutexs and events --------------------------------- */
+
+#if 0
+extern nfp_err nfp_sema_init( nfp_sema *sema, int initial);
+extern void nfp_sema_destroy( nfp_sema *sema );
+extern void nfp_sema_post( nfp_sema *sema );
+extern void nfp_sema_wait( nfp_sema *sema );
+extern int nfp_sema_wait_sig( nfp_sema *sema );
+
+extern nfp_err nfp_mutex_init( nfp_mutex *mutex );
+extern void nfp_mutex_destroy( nfp_mutex *mutex );
+extern void nfp_mutex_enter( nfp_mutex *mutex );
+extern void nfp_mutex_exit( nfp_mutex *mutex );
+
+extern nfp_err nfp_event_init( nfp_event *event );
+extern void nfp_event_destroy( nfp_event *event );
+extern void nfp_event_set( nfp_event *event );
+extern void nfp_event_clear( nfp_event *event );
+extern void nfp_event_wait( nfp_event *event );
+extern void nfp_event_wait_sig( nfp_event *event );
+
+#endif
+
+/* timeouts ------------------------------------------------------ */
+
+extern void nfp_sleep( int ms );
+
+/* memory handling ----------------------------------------------- */
+
+#define KMALLOC_DMA 0
+#define KMALLOC_CACHED 1
+
+extern void *nfp_kmalloc( int size, int flags );
+extern void *nfp_krealloc( void *ptr, int size, int flags );
+extern void nfp_kfree( void * );
+
+/* config space access ------------------------------------------------ */
+
+/* return Little Endian 32 bit config register */
+extern nfp_err nfp_config_inl( nfp_cdev *pdev, int offset, unsigned int *res );
+
+/* io space access ------------------------------------------------ */
+
+extern unsigned int nfp_inl( nfp_cdev *pdev, int bar, int offset );
+extern unsigned short nfp_inw( nfp_cdev *pdev, int bar, int offset );
+extern void nfp_outl( nfp_cdev *pdev, int bar, int offset, unsigned int data );
+extern void nfp_outw( nfp_cdev *pdev, int bar, int offset, unsigned short data );
+
+/* user and device memory space access ---------------------------- */
+
+/* NB these 2 functions are not guarenteed to be re-entrant for a given device */
+extern nfp_err nfp_copy_from_user_to_dev( nfp_cdev *cdev, int bar, int offset, const char *ubuf, int len);
+extern nfp_err nfp_copy_to_user_from_dev( nfp_cdev *cdev, int bar, int offset, char *ubuf, int len);
+
+extern nfp_err nfp_copy_from_user( char *kbuf, const char *ubuf, int len );
+extern nfp_err nfp_copy_to_user( char *ubuf, const char *kbuf, int len );
+
+extern nfp_err nfp_copy_from_dev( nfp_cdev *cdev, int bar, int offset, char *kbuf, int len );
+extern nfp_err nfp_copy_to_dev( nfp_cdev *cdev, int bar, int offset, const char *kbuf, int len);
+
+/* debug ------------------------------------------------------------ */
+
+#define NFP_DBG1 1
+#define NFP_DBGE NFP_DBG1
+#define NFP_DBG2 2
+#define NFP_DBG3 3
+#define NFP_DBG4 4
+
+#ifdef STRANGE_VARARGS
+extern void nfp_log();
+#else
+extern void nfp_log( int severity, const char *format, ...);
+#endif
+
+extern int nfp_debug;
+
+#endif
diff --git a/usr/src/uts/common/io/nfp/nfpci.h b/usr/src/uts/common/io/nfp/nfpci.h
new file mode 100644
index 0000000000..793f5995e6
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/nfpci.h
@@ -0,0 +1,171 @@
+/*
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+*/
+
+/*
+*
+* NFPCI.H - nFast PCI interface definition file
+*
+*
+*
+* 1998.06.09 IH Started
+*
+* The interface presented by nFast PCI devices consists of:
+*
+* A region of shared RAM used for data transfer & control information
+* A doorbell interrupt register, so both sides can give each other interrupts
+* A number of DMA channels for transferring data
+*/
+
+#ifndef NFPCI_H
+#define NFPCI_H
+
+/* Sizes of some regions */
+#define NFPCI_RAM_MINSIZE 0x00100000
+/* This is the minimum size of shared RAM. In future it may be possible to
+ negotiate larger sizes of shared RAM or auto-detect how big it is */
+#define NFPCI_RAM_MINSIZE_JOBS 0x00020000 /* standard jobs only */
+#define NFPCI_RAM_MINSIZE_KERN 0x00040000 /* standard and kernel jobs */
+
+/* Offsets within shared memory space.
+ The following main regions are:
+ jobs input area
+ jobs output area
+ kernel jobs input area
+ kernel output area
+*/
+
+#define NFPCI_OFFSET_JOBS 0x00000000
+#define NFPCI_OFFSET_JOBS_WR 0x00000000
+#define NFPCI_OFFSET_JOBS_RD 0x00010000
+#define NFPCI_OFFSET_KERN 0x00020000
+#define NFPCI_OFFSET_KERN_WR 0x00020000
+#define NFPCI_OFFSET_KERN_RD 0x00030000
+
+/* Interrupts, defined by bit position in doorbell register */
+
+/* Interrupts from device to host */
+#define NFAST_INT_DEVICE_WRITE_OK 0x00000001
+#define NFAST_INT_DEVICE_WRITE_FAILED 0x00000002
+#define NFAST_INT_DEVICE_READ_OK 0x00000004
+#define NFAST_INT_DEVICE_READ_FAILED 0x00000008
+#define NFAST_INT_DEVICE_KERN_WRITE_OK 0x00000010
+#define NFAST_INT_DEVICE_KERN_WRITE_FAILED 0x00000020
+#define NFAST_INT_DEVICE_KERN_READ_OK 0x00000040
+#define NFAST_INT_DEVICE_KERN_READ_FAILED 0x00000080
+
+/* Interrupts from host to device */
+#define NFAST_INT_HOST_WRITE_REQUEST 0x00010000
+#define NFAST_INT_HOST_READ_REQUEST 0x00020000
+#define NFAST_INT_HOST_DEBUG 0x00040000
+#define NFAST_INT_HOST_KERN_WRITE_REQUEST 0x00080000
+#define NFAST_INT_HOST_KERN_READ_REQUEST 0x00100000
+
+/* Ordinary job submission ------------------------ */
+
+/* The NFPCI_OFFSET_JOBS_WR and NFPCI_OFFSET_JOBS_RD regions are defined
+ by the following (byte) address offsets... */
+
+#define NFPCI_OFFSET_CONTROL 0x0
+#define NFPCI_OFFSET_LENGTH 0x4
+#define NFPCI_OFFSET_DATA 0x8
+#define NFPCI_OFFSET_PUSH_ADDR 0x8
+
+#define NFPCI_JOBS_WR_CONTROL (NFPCI_OFFSET_JOBS_WR + NFPCI_OFFSET_CONTROL)
+#define NFPCI_JOBS_WR_LENGTH (NFPCI_OFFSET_JOBS_WR + NFPCI_OFFSET_LENGTH)
+#define NFPCI_JOBS_WR_DATA (NFPCI_OFFSET_JOBS_WR + NFPCI_OFFSET_DATA)
+#define NFPCI_MAX_JOBS_WR_LEN (0x0000FFF8)
+
+#define NFPCI_JOBS_RD_CONTROL (NFPCI_OFFSET_JOBS_RD + NFPCI_OFFSET_CONTROL)
+#define NFPCI_JOBS_RD_LENGTH (NFPCI_OFFSET_JOBS_RD + NFPCI_OFFSET_LENGTH)
+#define NFPCI_JOBS_RD_DATA (NFPCI_OFFSET_JOBS_RD + NFPCI_OFFSET_DATA)
+/* address in PCI space of host buffer for NFPCI_JOB_CONTROL_PCI_PUSH */
+#define NFPCI_JOBS_RD_PUSH_ADDR (NFPCI_OFFSET_JOBS_RD + NFPCI_OFFSET_PUSH_ADDR)
+#define NFPCI_MAX_JOBS_RD_LEN (0x000FFF8)
+
+/* Kernel inferface job submission ---------------- */
+
+#define NFPCI_KERN_WR_CONTROL (NFPCI_OFFSET_KERN_WR + NFPCI_OFFSET_CONTROL)
+#define NFPCI_KERN_WR_LENGTH (NFPCI_OFFSET_KERN_WR + NFPCI_OFFSET_LENGTH)
+#define NFPCI_KERN_WR_DATA (NFPCI_OFFSET_KERN_WR + NFPCI_OFFSET_DATA)
+#define NFPCI_MAX_KERN_WR_LEN (0x0000FFF8)
+
+#define NFPCI_KERN_RD_CONTROL (NFPCI_OFFSET_KERN_RD + NFPCI_OFFSET_CONTROL)
+#define NFPCI_KERN_RD_LENGTH (NFPCI_OFFSET_KERN_RD + NFPCI_OFFSET_LENGTH)
+#define NFPCI_KERN_RD_DATA (NFPCI_OFFSET_KERN_RD + NFPCI_OFFSET_DATA)
+/* address in PCI space of host buffer for NFPCI_JOB_CONTROL_PCI_PUSH */
+#define NFPCI_KERN_RD_ADDR (NFPCI_OFFSET_KERN_RD + NFPCI_OFFSET_PUSH_ADDR)
+#define NFPCI_MAX_KERN_RD_LEN (0x000FFF8)
+
+#ifdef DEFINE_NFPCI_PACKED_STRUCTS
+typedef struct
+{
+ UINT32 controlword;
+ UINT32 length; /* length of data to follow */
+ union {
+ BYTE data[1];
+ UINT32 addr;
+ } uu;
+}
+ NFPCI_JOBS_BLOCK;
+#endif
+
+
+#define NFPCI_JOB_CONTROL 0x00000001
+#define NFPCI_JOB_CONTROL_PCI_PUSH 0x00000002
+/*
+ The 'Control' word is analogous to the SCSI read/write address;
+ 1 = standard push/pull IO
+ 2 = push/push IO
+
+ To submit a block of job data, the host:
+ - sets the (32-bit, little-endian) word at NFPCI_JOBS_WR_CONTROL to NFPCI_JOB_CONTROL
+ - sets the word at NFPCI_JOBS_WR_LENGTH to the length of the data
+ - copies the data to NFPCI_JOBS_WR_DATA
+ - sets interrupt NFAST_INT_HOST_WRITE_REQUEST in the doorbell register
+ - awaits the NFAST_INT_DEVICE_WRITE_OK (or _FAILED) interrupts back
+
+ To read a block of jobs back, the host:
+ - sets the word at NFPCI_JOBS_RD_CONTROL to NFPCI_JOB_CONTROL
+ - sets the word at NFPCI_JOBS_RD_LENGTH to the max length for returned data
+ - sets interrupt NFAST_INT_HOST_READ_REQUEST
+ - awaits the NFAST_INT_DEVICE_READ_OK (or _FAILED) interrupt
+ - reads the data from NFPCI_JOBS_RD_DATA; the module will set the word at
+ NFPCI_JOBS_RD_LENGTH to its actual length.
+
+ Optionally the host can request the PCI read data to be pushed to host PCI mapped ram:
+ - allocates a contiguous PCI addressable buffer for a NFPCI_JOBS_BLOCK of max
+ size NFPCI_MAX_JOBS_RD_LEN (or NFPCI_MAX_KERN_RD_LEN) + 8
+ - sets the word at NFPCI_JOBS_RD_CONTROL to NFPCI_JOB_CONTROL_PCI_PUSH
+ - sets the word at NFPCI_JOBS_RD_LENGTH to the max length for returned data
+ - sets the word at NFPCI_JOBS_RD_PUSH_ADDR to be the host PCI address of
+ the buffer
+ - sets interrupt NFAST_INT_HOST_READ_REQUEST
+ - awaits the NFAST_INT_DEVICE_READ_OK (or _FAILED) interrupt
+ - reads the data from the buffer at NFPCI_OFFSET_DATA in the buffer. The
+ module will set NFPCI_OFFSET_LENGTH to the actual length.
+*/
+
+#define NFPCI_SCRATCH_CONTROL 0
+
+#define NFPCI_SCRATCH_CONTROL_HOST_MOI (1<<0)
+#define NFPCI_SCRATCH_CONTROL_MODE_SHIFT 1
+#define NFPCI_SCRATCH_CONTROL_MODE_MASK (3<<NFPCI_SCRATCH_CONTROL_MODE_SHIFT)
+
+#define NFPCI_SCRATCH_STATUS 1
+
+#define NFPCI_SCRATCH_STATUS_MONITOR_MOI (1<<0)
+#define NFPCI_SCRATCH_STATUS_APPLICATION_MOI (1<<1)
+#define NFPCI_SCRATCH_STATUS_APPLICATION_RUNNING (1<<2)
+#define NFPCI_SCRATCH_STATUS_ERROR (1<<3)
+
+#define NFPCI_SCRATCH_ERROR_LO 2
+#define NFPCI_SCRATCH_ERROR_HI 3
+
+#endif
diff --git a/usr/src/uts/common/io/nfp/osif.c b/usr/src/uts/common/io/nfp/osif.c
new file mode 100644
index 0000000000..fba62f9a37
--- /dev/null
+++ b/usr/src/uts/common/io/nfp/osif.c
@@ -0,0 +1,184 @@
+/*
+
+(C) Copyright nCipher Corporation Ltd 2002-2008 All rights reserved
+
+Copyright (c) 2008-2013 Thales e-Security All rights reserved
+
+Copyright (c) 2014 Thales UK All rights reserved
+
+*/
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/file.h>
+#include <sys/conf.h>
+#include <sys/uio.h>
+#include <sys/map.h>
+#include <sys/debug.h>
+#include <sys/modctl.h>
+#include <sys/kmem.h>
+#include <sys/cmn_err.h>
+#include <sys/open.h>
+#include <sys/stat.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/pci.h>
+
+#include "nfp_common.h"
+#include "nfp_hostif.h"
+#include "nfp_error.h"
+#include "nfp_osif.h"
+#include "nfp_cmd.h"
+#include "nfp.h"
+#include "autoversion.h"
+
+/* config space access ---------------------------------- */
+
+nfp_err nfp_config_inl( nfp_cdev *pdev, int offset, unsigned int *res ) {
+ unsigned int tmp32;
+ if ( !pdev || !pdev->dev || !pdev->dev->conf_handle )
+ return NFP_ENODEV;
+
+/* pci_config_get32() does byte swapping, so put back to LE */
+ tmp32 = pci_config_get32( pdev->dev->conf_handle, offset );
+ TO_LE32_IO(res, tmp32);
+
+ return NFP_SUCCESS;
+}
+
+/* user space memory access ---------------------------------- */
+
+nfp_err nfp_copy_from_user( char *kbuf, const char *ubuf, int len) {
+ bcopy(ubuf, kbuf, len);
+ return 0;
+}
+
+nfp_err nfp_copy_to_user( char *ubuf, const char *kbuf, int len) {
+ bcopy(kbuf, ubuf, len);
+ return 0;
+}
+
+nfp_err nfp_copy_from_user_to_dev( nfp_cdev *cdev, int bar, int offset, const char *ubuf, int len) {
+ /* dirty hack on Solaris, as we are called from strategy we are, in fact, copying from kernel mem */
+ return nfp_copy_to_dev( cdev, bar, offset, ubuf, len );
+}
+
+nfp_err nfp_copy_to_user_from_dev( nfp_cdev *cdev, int bar, int offset, char *ubuf, int len) {
+ /* dirty hack on Solaris, as we are called from strategy we are, in fact, copying to kernel mem */
+ return nfp_copy_from_dev( cdev, bar, offset, ubuf, len );
+}
+
+nfp_err nfp_copy_from_dev( nfp_cdev *cdev, int bar, int offset, char *kbuf, int len) {
+ if( len & 0x3 || offset & 0x3 )
+ DDI_REP_GET8( cdev->extra[bar], (unsigned char *)kbuf, cdev->bar[bar] + offset, len, DDI_DEV_AUTOINCR);
+ else
+ /* LINTED: alignment */
+ DDI_REP_GET32( cdev->extra[bar], (unsigned int *)kbuf, (unsigned int *)(cdev->bar[bar] + offset), len / 4, DDI_DEV_AUTOINCR);
+ return NFP_SUCCESS;
+}
+
+nfp_err nfp_copy_to_dev( nfp_cdev *cdev, int bar, int offset, const char *kbuf, int len) {
+ if( len & 0x3 || offset & 0x3 )
+ DDI_REP_PUT8( cdev->extra[bar], (unsigned char *)kbuf, cdev->bar[bar] + offset, len, DDI_DEV_AUTOINCR );
+ else
+ /* LINTED: alignment */
+ DDI_REP_PUT32( cdev->extra[bar], (unsigned int *)kbuf, (unsigned int *)(cdev->bar[bar] + offset), len / 4, DDI_DEV_AUTOINCR );
+ return NFP_SUCCESS;
+}
+
+/* pci io space access --------------------------------------- */
+
+unsigned int nfp_inl( nfp_cdev *pdev, int bar, int offset ) {
+ nfp_log( NFP_DBG3, "nfp_inl: addr %x", (uintptr_t) pdev->bar[bar] + offset);
+ /* LINTED: alignment */
+ return DDI_GET32( pdev->extra[bar], (uint32_t *)(pdev->bar[bar] + offset) );
+}
+
+unsigned short nfp_inw( nfp_cdev *pdev, int bar, int offset ) {
+ nfp_log( NFP_DBG3, "nfp_inw: addr %x", (uintptr_t) pdev->bar[bar] + offset);
+ /* LINTED: alignment */
+ return DDI_GET16( pdev->extra[bar], (unsigned short *)(pdev->bar[ bar ] + offset) );
+}
+
+void nfp_outl( nfp_cdev *pdev, int bar, int offset, unsigned int data ) {
+ nfp_log( NFP_DBG3, "nfp_outl: addr %x, data %x", (uintptr_t) pdev->bar[bar] + offset, data);
+ /* LINTED: alignment */
+ DDI_PUT32( pdev->extra[bar], (uint32_t *)(pdev->bar[ bar ] + offset), data );
+}
+
+void nfp_outw( nfp_cdev *pdev, int bar, int offset, unsigned short data ) {
+ nfp_log( NFP_DBG3, "nfp_outl: addr %x, data %x", (uintptr_t) pdev->bar[bar] + offset, data);
+ /* LINTED: alignment */
+ DDI_PUT16( pdev->extra[bar], (unsigned short *)(pdev->bar[ bar ] + offset), data );
+}
+
+/* logging ---------------------------------------------------- */
+
+void nfp_log( int level, const char *fmt, ...)
+{
+ auto char buf[256];
+ va_list ap;
+
+ switch (level) {
+ case NFP_DBG4: if (nfp_debug < 4) break;
+ /*FALLTHROUGH*/
+ case NFP_DBG3: if (nfp_debug < 3) break;
+ /*FALLTHROUGH*/
+ case NFP_DBG2: if (nfp_debug < 2) break;
+ /*FALLTHROUGH*/
+ case NFP_DBG1: if (nfp_debug < 1) break;
+ /*FALLTHROUGH*/
+ default:
+ va_start(ap, fmt);
+ (void) vsnprintf(buf, 256, fmt, ap);
+ va_end(ap);
+ cmn_err(CE_CONT, "!" VERSION_COMPNAME " " VERSION_NO ": %s\n", buf);
+ break;
+ }
+}
+
+struct errstr {
+ int oserr;
+ nfp_err nferr;
+};
+
+
+static struct errstr errtab[] = {
+ { EFAULT, NFP_EFAULT },
+ { ENOMEM, NFP_ENOMEM },
+ { EINVAL, NFP_EINVAL },
+ { EIO, NFP_EIO },
+ { ENXIO, NFP_ENXIO },
+ { ENODEV, NFP_ENODEV },
+ { EINVAL, NFP_EUNKNOWN },
+ { 0, 0 }
+};
+
+nfp_err nfp_error( int oserr )
+{
+ struct errstr *perr;
+ if(!oserr)
+ return 0;
+ perr= errtab;
+ while(perr->nferr) {
+ if(perr->oserr == oserr)
+ return perr->nferr;
+ perr++;
+ }
+ return NFP_EUNKNOWN;
+}
+
+int nfp_oserr( nfp_err nferr )
+{
+ struct errstr *perr;
+ if(nferr == NFP_SUCCESS)
+ return 0;
+ perr= errtab;
+ while(perr->nferr) {
+ if(perr->nferr == nferr)
+ return perr->oserr;
+ perr++;
+ }
+ return EIO;
+}
diff --git a/usr/src/uts/common/io/overlay/overlay.c b/usr/src/uts/common/io/overlay/overlay.c
new file mode 100644
index 0000000000..3f34ec3b58
--- /dev/null
+++ b/usr/src/uts/common/io/overlay/overlay.c
@@ -0,0 +1,2184 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+/*
+ * Overlay Devices
+ *
+ * Overlay devices provide a means for creating overlay networks, a means of
+ * multiplexing multiple logical, isolated, and discrete layer two and layer
+ * three networks on top of one physical network.
+ *
+ * In general, these overlay devices encapsulate the logic to answer two
+ * different questions:
+ *
+ * 1) How should I transform a packet to put it on the wire?
+ * 2) Where should I send a transformed packet?
+ *
+ * Each overlay device is presented to the user as a GLDv3 device. While the
+ * link itself cannot have an IP interface created on top of it, it allows for
+ * additional GLDv3 devices, such as a VNIC, to be created on top of it which
+ * can be plumbed up with IP interfaces.
+ *
+ *
+ * --------------------
+ * General Architecture
+ * --------------------
+ *
+ * The logical overlay device that a user sees in dladm(1M) is a combination of
+ * two different components that work together. The first component is this
+ * kernel module, which is responsible for answering question one -- how should
+ * I transform a packet to put it on the wire.
+ *
+ * The second component is what we call the virtual ARP daemon, or varpd. It is
+ * a userland component that is responsible for answering the second question --
+ * Where should I send a transformed packet. Instances of the kernel overlay
+ * GLDv3 device ask varpd the question of where should a packet go.
+ *
+ * The split was done for a few reasons. Importantly, we wanted to keep the act
+ * of generating encapsulated packets in the kernel so as to ensure that the
+ * general data path was fast and also kept simple. On the flip side, while the
+ * question of where should something go may be simple, it may often be
+ * complicated and need to interface with several different external or
+ * distributed systems. In those cases, it's simpler to allow for the full
+ * flexibility of userland to be brought to bear to solve that problem and in
+ * general, the path isn't very common.
+ *
+ * The following is what makes up the logical overlay device that a user would
+ * create with dladm(1M).
+ *
+ * Kernel Userland
+ * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+ * . +--------+ +--------+ +--------+ . . .
+ * . | VNIC 0 | | VNIC 1 | | VNIC 2 | . . .
+ * . +--------+ +--------+ +--------+ . . .
+ * . | | | . . .
+ * . | | | . . .
+ * . +------------+-----------+ . . .
+ * . | . . /dev/overlay .
+ * . +--------------+ . . . +------------+ .
+ * . | | . . . | | .
+ * . | Overlay |======*=================| Virtual | .
+ * . | GLDv3 Device |========================| ARP Daemon | .
+ * . | | . . | | .
+ * . +--------------+ . . +------------+ .
+ * . | . . | .
+ * . | . . | .
+ * . +----------------+ . . +--------+ .
+ * . | Overlay | . . | varpd | .
+ * . | Encapsulation | . . | Lookup | .
+ * . | Plugin | . . | Plugin | .
+ * . +----------------+ . . +--------+ .
+ * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+ *
+ *
+ * This image shows the two different components and where they live.
+ * Importantly, it also shows that both the kernel overlay device and the
+ * userland varpd both support plugins. The plugins actually implement the
+ * things that users care about and the APIs have been designed to try to
+ * minimize the amount of things that a module writer needs to worry about it.
+ *
+ * IDENTIFIERS
+ *
+ * Every overlay device is defined by a unique identifier which is the overlay
+ * identifier. Its purpose is similar to that of a VLAN identifier, it's a
+ * unique number that is used to differentiate between different entries on the
+ * wire.
+ *
+ * ENCAPSULATION
+ *
+ * An overlay encapsulation plugin is a kernel miscellaneous module whose
+ * purpose is to contain knowledge about how to transform packets to put them
+ * onto the wire and to take them off. An example of an encapsulation plugin is
+ * vxlan. It's also how support for things like nvgre or geneve would be brought
+ * into the system.
+ *
+ * Each encapsulation plugins defines a series of operation vectors and
+ * properties. For the full details on everything they should provide, please
+ * read uts/common/sys/overlay_plugin.h. The encapsulation plugin is responsible
+ * for telling the system what information is required to send a packet. For
+ * example, vxlan is defined to send everything over a UDP packet and therefore
+ * requires a port and an IP address, while nvgre on the other hand is its own
+ * IP type and therefore just requires an IP address. In addition, it also
+ * provides information about the kind of socket that should be created. This is
+ * used by the kernel multiplexor, more of that in the Kernel Components
+ * section.
+ *
+ * LOOKUPS
+ *
+ * The kernel communicates requests for lookups over the character device
+ * /dev/overlay. varpd is responsible for listening for requests on that device
+ * and answering them. The character device is specific to the target path and
+ * varpd.
+ *
+ * Much as the kernel overlay module handles the bulk of the scaffolding but
+ * leaves the important work to the encapsulation plugin, varpd provides a
+ * similar role and leaves the full brunt of lookups to a userland dynamic
+ * shared object which implements the logic of lookups.
+ *
+ * Each lookup plugin defines a series of operation vectors and properties. For
+ * the full details on everything that they should provide, please read
+ * lib/varpd/libvarpd/libvarpd_provider.h. Essentially, they are given a MAC
+ * address and asked to give an address on the physical network that it should
+ * be sent to. In addition, they handle questions related to how to handle
+ * things like broadcast and multicast traffic, etc.
+ *
+ * ----------
+ * Properties
+ * ----------
+ *
+ * A device from a dladm perspective has a unique set of properties that are
+ * combined from three different sources:
+ *
+ * 1) Generic properties that every overlay device has
+ * 2) Properties that are specific to the encapsulation plugin
+ * 3) Properties that are specific to the lookup plugin
+ *
+ * All of these are exposed in a single set of properties in dladm. Note that
+ * these are not necessarily traditional link properties. However, if something
+ * is both a traditional GLDv3 link property, say the MTU of a device, and a
+ * specific property here, than the driver ensures that all existing GLDv3
+ * specific means of manipulating it are used and wraps up its private property
+ * interfaces to ensure that works.
+ *
+ * Properties in the second and third category are prefixed with the name of
+ * their module. For example, the vxlan encapsulation module has a property
+ * called the 'listen_ip'. This property would show up in dladm as
+ * 'vxlan/listen_ip'. This allows different plugins to both use similar names
+ * for similar properties and to also have independent name spaces so that
+ * overlapping names do not conflict with anything else.
+ *
+ * While the kernel combines both sets one and two into a single coherent view,
+ * it does not do anything with respect to the properties that are owned by the
+ * lookup plugin -- those are owned wholly by varpd. Instead, libdladm is in
+ * charge of bridging these two worlds into one magical experience for the user.
+ * It carries the burden of knowing about both overlay specific and varpd
+ * specific properties. Importantly, we want to maintain this distinction. We
+ * don't want to treat the kernel as an arbitrary key/value store for varpd and
+ * we want the kernel to own its own data and not have to ask userland for
+ * information that it owns.
+ *
+ * Every property in the system has the following attributes:
+ *
+ * o A name
+ * o A type
+ * o A size
+ * o Permissions
+ * o Default value
+ * o Valid value ranges
+ * o A value
+ *
+ * Everything except for the value is obtained by callers through the propinfo
+ * callbacks and a property has a maximum size of OVERLAY_PROP_SIZEMAX,
+ * currently 256 bytes.
+ *
+ * The following are the supported types of properties:
+ *
+ * OVERLAY_PROP_T_INT
+ *
+ * A signed integer, its length is 8 bytes, corresponding to a
+ * int64_t.
+ *
+ * OVERLAY_PROP_T_UINT
+ *
+ * An unsigned integer, its length is 8 bytes, corresponding to a
+ * uint64_t.
+ *
+ * OVERLAY_PROP_T_IP
+ *
+ * A struct in6_addr, it has a fixed size.
+ *
+ * OVERLAY_PROP_T_STRING
+ *
+ * A null-terminated character string encoded in either ASCII or
+ * UTF-8. Note that the size of the string includes the null
+ * terminator.
+ *
+ * The next thing that we apply to a property is its permission. The permissions
+ * are put together by the bitwise or of the following flags and values.
+ *
+ * OVERLAY_PROP_PERM_REQ
+ *
+ * This indicates a required property. A property that is required
+ * must be set by a consumer before the device can be created. If a
+ * required property has a default property, this constraint is
+ * loosened because the default property defines the value.
+ *
+ * OVERLAY_PORP_PERM_READ
+ *
+ * This indicates that a property can be read. All properties will
+ * have this value set.
+ *
+ * OVERLAY_PROP_PERM_WRITE
+ *
+ * This indicates that a property can be written to and thus
+ * updated by userland. Properties that are only intended to
+ * display information, will not have OVERLAY_PROP_PERM_WRITE set.
+ *
+ * In addition, a few additional values are defined as a convenience to
+ * consumers. The first, OVERLAY_PROP_PERM_RW, is a combination of
+ * OVERLAY_PROP_PERM_READ and OVERLAY_PERM_PROP_WRITE. The second,
+ * OVERLAY_PROP_PERM_RRW, is a combination of OVERLAY_PROP_PERM_REQ,
+ * OVERLAY_PROP_PERM_READ, and OVERLAY_PROP_PERM_WRITE. The protection mode of a
+ * property should generally be a constant across its lifetime.
+ *
+ * A property may optionally have a default value. If it does have a default
+ * value, and that property is not set to be a different value, then the default
+ * value is inherited automatically. It also means that if the default value is
+ * acceptable, there is no need to set the value for a required property. For
+ * example, the vxlan module has the vxlan/listen_port property which is
+ * required, but has a default value of 4789 (the IANA assigned port). Because
+ * of that default value, there is no need for it to be set.
+ *
+ * Finally, a property may declare a list of valid values. These valid values
+ * are used for display purposes, they are not enforced by the broader system,
+ * but merely allow a means for the information to be communicated to the user
+ * through dladm(1M). Like a default value, this is optional.
+ *
+ * The general scaffolding does not do very much with respect to the getting and
+ * setting of properties. That is really owned by the individual plugins
+ * themselves.
+ *
+ * -----------------------------
+ * Destinations and Plugin Types
+ * -----------------------------
+ *
+ * Both encapsulation and lookup plugins define the kinds of destinations that
+ * they know how to support. There are three different pieces of information
+ * that can be used to address to a destination currently, all of which is
+ * summarized in the type overlay_point_t. Any combination of these is
+ * supported.
+ *
+ * OVERLAY_PLUGIN_D_ETHERNET
+ *
+ * An Ethernet MAC address is required.
+ *
+ * OVERLAY_PLUGIN_D_IP
+ *
+ * An IP address is required. All IP addresses used by the overlay
+ * system are transmitted as IPv6 addresses. IPv4 addresses can be
+ * represented by using IPv4-mapped IPv6 addresses.
+ *
+ * OVERLAY_PLUGIN_D_PORT
+ *
+ * A TCP/UDP port is required.
+ *
+ * A kernel encapsulation plugin declares which of these that it requires, it's
+ * a static set. On the other hand, a userland lookup plugin can be built to
+ * support all of these or any combination thereof. It gets passed the required
+ * destination type, based on the kernel encapsulation method, and then it makes
+ * the determination as to whether or not it supports it. For example, the
+ * direct plugin can support either an IP or both an IP and a port, it simply
+ * doesn't display the direct/dest_port property in the cases where a port is
+ * not required to support this.
+ *
+ * The user lookup plugins have two different modes of operation which
+ * determines how they interact with the broader system and how look ups are
+ * performed. These types are:
+ *
+ * OVERLAY_TARGET_POINT
+ *
+ * A point to point plugin has a single static definition for where
+ * to send all traffic. Every packet in the system always gets sent
+ * to the exact same destination which is programmed into the
+ * kernel when the general device is activated.
+ *
+ * OVERLAY_TARGET_DYNAMIC
+ *
+ * A dynamic plugin does not have a single static definition.
+ * Instead, for each destination, the kernel makes an asynchronous
+ * request to varpd to determine where the packet should be routed,
+ * and if a specific destination is found, then that destination is
+ * cached in the overlay device's target cache.
+ *
+ * This distinction, while important for the general overlay device's operation,
+ * is not important to the encapsulation plugins. They don't need to know about
+ * any of these pieces. It's just a concern for varpd, the userland plugin, and
+ * the general overlay scaffolding.
+ *
+ * When an overlay device is set to OVERLAY_TARGET_POINT, then it does not
+ * maintain a target cache, and instead just keeps track of the destination and
+ * always sends encapsulated packets to that address. When the target type is of
+ * OVERLAY_TARGET_DYNAMIC, then the kernel maintains a cache of all such
+ * destinations. These destinations are kept around in an instance of a
+ * reference hash that is specific to the given overlay device. Entries in the
+ * cache can be invalidated and replaced by varpd and its lookup plugins.
+ *
+ * ----------------------------------
+ * Kernel Components and Architecture
+ * ----------------------------------
+ *
+ * There are multiple pieces inside the kernel that work together, there is the
+ * general overlay_dev_t structure, which is the logical GLDv3 device, but it
+ * itself has references to things like an instance of an encapsulation plugin,
+ * a pointer to a mux and a target cache. It can roughly be summarized in the
+ * following image:
+ *
+ * +------------------+
+ * | global |
+ * | overlay list |
+ * | overlay_dev_list |
+ * +------------------+
+ * |
+ * | +-----------------------+ +---------------+
+ * +->| GLDv3 Device |----------->| GLDv3 Device | -> ...
+ * | overlay_dev_t | | overlay_dev_t |
+ * | | +---------------+
+ * | |
+ * | mac_handle_t -----+---> GLDv3 handle to MAC
+ * | datalink_id_t -----+---> Datalink ID used by DLS
+ * | overlay_dev_flag_t ---+---> Device state
+ * | uint_t -----+---> Curent device MTU
+ * | uint_t -----+---> In-progress RX operations
+ * | uint_t -----+---> In-progress TX operations
+ * | char[] -----+---> FMA degraded message
+ * | void * -----+---> plugin private data
+ * | overlay_target_t * ---+---------------------+
+ * | overlay_plugin_t * ---+---------+ |
+ * +-----------------------+ | |
+ * ^ | |
+ * +--------------------+ | | |
+ * | Kernel Socket | | | |
+ * | Multiplexor | | | |
+ * | overlay_mux_t | | | |
+ * | | | | |
+ * | avl_tree_t -+--+ | |
+ * | uint_t -+--> socket family | |
+ * | uint_t -+--> socket type | |
+ * | uint_t -+--> socket protocol | |
+ * | ksocket_t -+--> I/O socket | |
+ * | struct sockaddr * -+--> ksocket address | |
+ * | overlay_plugin_t --+--------+ | |
+ * +--------------------+ | | |
+ * | | |
+ * +-------------------------+ | | |
+ * | Encap Plugin |<--+-----------+ |
+ * | overlay_plugin_t | |
+ * | | |
+ * | char * ---+--> plugin name |
+ * | overlay_plugin_ops_t * -+--> plugin downcalls |
+ * | char ** (props) ---+--> property list |
+ * | uint_t ---+--> id length |
+ * | overlay_plugin_flags_t -+--> plugin flags |
+ * | overlay_plugin_dest_t --+--> destination type v
+ * +-------------------------+ +-------------------------+
+ * | Target Cache |
+ * | overlay_target_t |
+ * | |
+ * cache mode <--+- overlay_target_mode_t |
+ * dest type <--+- overlay_plugin_dest_t |
+ * cache flags <--+- overlay_target_flag_t |
+ * varpd id <--+- uint64_t |
+ * outstanding varpd reqs. <--+- uint_t |
+ * OVERLAY_TARGET_POINT state <--+- overlay_target_point_t |
+ * OVERLAY_TARGET_DYNAMIC state <-+---+- overlay_target_dyn_t |
+ * | +-------------------------+
+ * +-----------------------+
+ * |
+ * v
+ * +-------------------------------+ +------------------------+
+ * | Target Entry |-->| Target Entry |--> ...
+ * | overlay_target_entry_t | | overlay_target_entry_t |
+ * | | +------------------------+
+ * | |
+ * | overlay_target_entry_flags_t -+--> Entry flags
+ * | uint8_t[ETHERADDRL] ---+--> Target MAC address
+ * | overlay_target_point_t ---+--> Target underlay address
+ * | mblk_t * ---+--> outstanding mblk head
+ * | mblk_t * ---+--> outstanding mblk tail
+ * | size_t ---+--> outstanding mblk size
+ * +-------------------------------+
+ *
+ * The primary entries that we care about are the overlay_dev_t, which
+ * correspond to each overlay device that is created with dladm(1M). Globally,
+ * these devices are maintained in a simple list_t which is protected with a
+ * lock. Hence, these include important information such as the mac_handle_t
+ * and a datalink_id_t which is used to interact with the broader MAC and DLS
+ * ecosystem. We also maintain additional information such as the current state,
+ * outstanding operations, the mtu, and importantly, the plugin's private data.
+ * This is the instance of an encapsulation plugin that gets created as part of
+ * creating an overlay device. Another aspect of this is that the overlay_dev_t
+ * also includes information with respect to FMA. For more information, see the
+ * FMA section.
+ *
+ * Each overlay_dev_t has a pointer to a plugin, a mux, and a target. The plugin
+ * is the encapsulation plugin. This allows the device to make downcalls into it
+ * based on doing things like getting and setting properties. Otherwise, the
+ * plugin itself is a fairly straightforward entity. They are maintained in an
+ * (not pictured above) list. The plugins themselves mostly maintain things like
+ * the static list of properties, what kind of destination they require, and the
+ * operations vector. A given module may contain more if necessary.
+ *
+ * The next piece of the puzzle is the mux, or a multiplexor. The mux itself
+ * maintains a ksocket and it is through the mux that we send and receive
+ * message blocks. The mux represents a socket type and address, as well as a
+ * plugin. Multiple overlay_dev_t devices may then share the same mux. For
+ * example, consider the case where you have different instances of vxlan all on
+ * the same underlay network. These would all logically share the same IP
+ * address and port that packets are sent and received on; however, what differs
+ * is the decapuslation ID.
+ *
+ * Each mux maintains a ksocket_t which is similar to a socket(3SOCKET). Unlike
+ * a socket, we enable a direct callback on the ksocket. This means that
+ * whenever a message block chain is received, rather than sitting there and
+ * getting a callback in a context and kicking that back out to a taskq. Instead
+ * data comes into the callback function overlay_mux_recv().
+ *
+ * The mux is given encapsulated packets (via overlay_m_tx, the GLDv3 tx
+ * function) to transmit. It receives encapsulated packets, decapsulates them to
+ * determine the overlay identifier, looks up the given device that matches that
+ * identifier, and then causes the broader MAC world to receive the packet with
+ * a call to mac_rx().
+ *
+ * Today, we don't do too much that's special with the ksocket; however, as
+ * hardware is gaining understanding for these encapuslation protocols, we'll
+ * probably want to think of better ways to get those capabilities passed down
+ * and potentially better ways to program receive filters so they get directly
+ * to us. Though, that's all fantasy future land.
+ *
+ * The next part of the puzzle is the target cache. The purpose of the target
+ * cache is to cache where we should send a packet on the underlay network,
+ * given its mac address. The target cache operates in two modes depending on
+ * whether the lookup module was declared to OVERLAY_TARGET_POINT or
+ * OVERLAY_TARGET_DYANMIC.
+ *
+ * In the case where the target cache has been programmed to be
+ * OVERLAY_TARGET_POINT, then we only maintain a single overlay_target_point_t
+ * which has the destination that we send everything, no matter the destination
+ * mac address.
+ *
+ * On the other hand, when we have an instance of OVERLAY_TARGET_DYNAMIC, things
+ * are much more interesting and as a result, more complicated. We primarily
+ * store lists of overlay_target_entry_t's which are stored in both an avl tree
+ * and a refhash_t. The primary look up path uses the refhash_t and the avl tree
+ * is only used for a few of the target ioctls used to dump data such that we
+ * can get a consistent iteration order for things like dladm show-overlay -t.
+ * The key that we use for the reference hashtable is based on the mac address
+ * in the cache and currently we just do a simple CRC32 to transform it into a
+ * hash.
+ *
+ * Each entry maintains a set of flags to indicate the current status of the
+ * request. The flags may indicate one of three states: that current cache entry
+ * is valid, that the current cache entry has been directed to drop all output,
+ * and that the current cache entry is invalid and may be being looked up. In
+ * the case where it's valid, we just take the destination address and run with
+ * it.
+ *
+ * If it's invalid and a lookup has not been made, then we start the process
+ * that prepares a query that will make its way up to varpd. The cache entry
+ * entry maintains a message block chain of outstanding message blocks and a
+ * size. These lists are populated only when we don't know the answer as to
+ * where should these be sent. The size entry is used to cap the amount of
+ * outstanding data that we don't know the answer to. If we exceed a cap on the
+ * amount of outstanding data (currently 1 Mb), then we'll drop any additional
+ * packets. Once we get an answer indicating a valid destination, we transmit
+ * any outstanding data to that place. For the full story on how we look that up
+ * will be discussed in the section on the Target Cache Lifecycle.
+ *
+ * ------------------------
+ * FMA and Degraded Devices
+ * ------------------------
+ *
+ * Every kernel overlay device keeps track of its FMA state. Today in FMA we
+ * cannot represent partitions between resources nor can we represent that a
+ * given minor node of a psuedo device has failed -- if we degrade the overlay
+ * device, then the entire dev_info_t is degraded. However, we still want to be
+ * able to indicate to administrators that things may go wrong.
+ *
+ * To this end, we've added a notion of a degraded state to every overlay
+ * device. This state is primarily dictated by userland and it can happen for
+ * various reasons. Generally, because a userland lookup plugin has been
+ * partitioned, or something has gone wrong such that there is no longer any
+ * userland lookup module for a device, then we'll mark it degraded.
+ *
+ * As long as any of our minor instances is degraded, then we'll fire off the
+ * FMA event to note that. Once the last degraded instance is no longer
+ * degraded, then we'll end up telling FMA that we're all clean.
+ *
+ * To help administrators get a better sense of which of the various minor
+ * devices is wrong, we store the odd_fmamsg[] character array. This character
+ * array can be fetched with doing a dladm show-overlay -f.
+ *
+ * Note, that it's important that we do not update the link status of the
+ * devices. We want to remain up as much as possible. By changing the link in a
+ * degraded state, this may end up making things worse. We may still actually
+ * have information in the target cache and if we mark the link down, that'll
+ * result in not being able to use it. The reason being that this'll mark all
+ * the downstream VNICs down which will go to IP and from there we end up
+ * dealing with sadness.
+ *
+ * -----------------------
+ * Target Cache Life Cycle
+ * -----------------------
+ *
+ * This section only applies when we have a lookup plugin of
+ * OVERLAY_TARGET_DYNAMIC. None of this applies to those of type
+ * OVERLAY_TARGET_POINT.
+ *
+ * While we got into the target cache in the general architecture section, it's
+ * worth going into more details as to how this actually works and showing some
+ * examples and state machines. Recall that a target cache entry basically has
+ * the following state transition diagram:
+ *
+ * Initial state
+ * . . . . . . first access . . . varpd lookup enqueued
+ * . . .
+ * . . .
+ * +-------+ . +----------+ .
+ * | No |------*---->| Invalid |-------*----+
+ * | Entry | | Entry | |
+ * +-------+ +----------+ |
+ * varpd ^ ^ varpd |
+ * invalidate | | drop |
+ * . . . * * . . v
+ * +-------+ | | +---------+
+ * | Entry |--->-----+ +----<----| Entry |
+ * | Valid |<----------*---------<----| Pending |->-+ varpd
+ * +-------+ . +---------+ * . . drop, but
+ * . varpd ^ | other queued
+ * . success | | entries
+ * +-----+
+ *
+ * When the table is first created, it is empty. As we attempt to lookup entries
+ * and we find there is no entry at all, we'll create a new table entry for it.
+ * At that point the entry is technically in an invalid state, that means that
+ * we have no valid data from varpd. In that case, we'll go ahead and queue the
+ * packet into the entry's pending chain, and queue a varpd lookup, setting the
+ * OVERLAY_ENTRY_F_PENDING flag in the progress.
+ *
+ * If additional mblk_t's come in for this entry, we end up appending them to
+ * the tail of the chain, if and only if, we don't exceed the threshold for the
+ * amount of space they can take up. An entry remains pending until we get a
+ * varpd reply. If varpd replies with a valid results, we move to the valid
+ * entry state, and remove the OVERLAY_ENTRY_F_PENDING flag and set it with one
+ * of OVERLAY_ENTRY_F_VALID or OVERLAY_ENTRY_F_DROP as appropriate.
+ *
+ * Once an entry is valid, it stays valid until user land tells us to invalidate
+ * it with an ioctl or replace it, OVERLAY_TARG_CACHE_REMOE and
+ * OVERLAY_TARG_CACHE_SET respectively.
+ *
+ * If the lookup fails with a call to drop the packet, then the next state is
+ * determined by the state of the queue. If the set of outstanding entries is
+ * empty, then we just transition back to the invalid state. If instead, the
+ * set of outstanding entries is not empty, then we'll queue another entry and
+ * stay in the same state, repeating this until the number of requests is
+ * drained.
+ *
+ * The following images describes the flow of a given lookup and where the
+ * overlay_target_entry_t is at any given time.
+ *
+ * +-------------------+
+ * | Invalid Entry | An entry starts off as an invalid entry
+ * | de:ad:be:ef:00:00 | and only exists in the target cache.
+ * +-------------------+
+ *
+ * ~~~~
+ *
+ * +---------------------+
+ * | Global list_t | A mblk_t comes in for an entry. We
+ * | overlay_target_list | append it to the overlay_target_list.
+ * +---------------------+
+ * |
+ * v
+ * +-------------------+ +-------------------+
+ * | Pending Entry |----->| Pending Entry |--->...
+ * | 42:5e:1a:10:d6:2d | | de:ad:be:ef:00:00 |
+ * +-------------------+ +-------------------+
+ *
+ * ~~~~
+ *
+ * +--------------------------+
+ * | /dev/overlay minor state | User land said that it would look up an
+ * | overlay_target_hdl_t | entry for us. We remove it from the
+ * +--------------------------+ global list and add it to the handle's
+ * | outstanding list.
+ * |
+ * v
+ * +-------------------+ +-------------------+
+ * | Pending Entry |----->| Pending Entry |
+ * | 90:b8:d0:79:02:dd | | de:ad:be:ef:00:00 |
+ * +-------------------+ +-------------------+
+ *
+ * ~~~~
+ *
+ * +-------------------+
+ * | Valid Entry | varpd returned an answer with
+ * | de:ad:be:ef:00:00 | OVERLAY_IOC_RESPOND and the target cache
+ * | 10.169.23.42:4789 | entry is now populated with a
+ * +-------------------+ destination and marked as valid
+ *
+ *
+ * The lookup mechanism is performed via a series of operations on the character
+ * psuedo-device /dev/overlay. The only thing that uses this device is the
+ * userland daemon varpd. /dev/overlay is a cloneable device, each open of it
+ * granting a new minor number which maintains its own state. We maintain this
+ * state so that way if an outstanding lookup was queued to something that
+ * crashed or closed its handle without responding, we can know about this and
+ * thus handle it appropriately.
+ *
+ * When a lookup is first created it's added to our global list of outstanding
+ * lookups. To service requests, userland is required to perform an ioctl to ask
+ * for a request. We will block it in the kernel a set amount of time waiting
+ * for a request. When we give a request to a given minor instance of the
+ * device, we remove it from the global list and append the request to the
+ * device's list of outstanding entries, for the reasons we discussed above.
+ * When a lookup comes in, we give user land a smaller amount of information
+ * specific to that packet, the overlay_targ_lookup_t. It includes a request id
+ * to identify this, and then the overlay id, the varpd id, the header and
+ * packet size, the source and destination mac address, the SAP, and any
+ * potential VLAN header.
+ *
+ * At that point, it stays in that outstanding list until one of two ioctls are
+ * returned: OVERLAY_TARG_RESPOND or OVERLAY_TARG_DROP. During this time,
+ * userland may also perform other operations. For example, it may use
+ * OVERLAY_TARG_PKT to get a copy of this packet so it can perform more in-depth
+ * analysis of what to do beyond what we gave it initially. This is useful for
+ * providing proxy arp and the like. Finally, there are two other ioctls that
+ * varpd can then do. The first is OVERLAY_TARG_INJECT which injects the
+ * non-jumbo frame packet up into that mac device and OVERLAY_TARG_RESEND which
+ * causes us to encapsulate and send out the packet they've given us.
+ *
+ *
+ * Finally, through the target cache, several ioctls are provided to allow for
+ * interrogation and management of the cache. They allow for individual entries
+ * to be retrieved, set, or have the entire table flushed. For the full set of
+ * ioctls here and what they do, take a look at uts/common/sys/overlay_target.h.
+ *
+ * ------------------
+ * Sample Packet Flow
+ * ------------------
+ *
+ * There's a lot of pieces here, hopefully an example of how this all fits
+ * together will help clarify and elucidate what's going on. We're going to
+ * first track an outgoing packet, eg. one that is sent from an IP interface on
+ * a VNIC on top of an overlay device, and then we'll look at what it means to
+ * respond to that.
+ *
+ *
+ * +----------------+ +--------------+ +------------------+
+ * | IP/DLS send |------->| MAC sends it |----------->| mblk_t reaches |
+ * | packet to MAC | | to the GLDv3 | | overlay GLDv3 tx |
+ * +----------------+ | VNIC device | | overlay_m_tx() |
+ * +--------------+ +------------------+
+ * |
+ * . lookup . cache |
+ * . drop . miss v
+ * +---------+ . +--------+ . +------------------+
+ * | freemsg |<-----*-------| varpd |<---*------| Lookup each mblk |
+ * | mblk_t | | lookup | | in the target |
+ * +---------+ | queued | | cache |
+ * ^ +--------+ +------------------+
+ * on send | | | cache
+ * error . . * *. . lookup * . . hit
+ * | | success v
+ * | | +------------------+
+ * +-----------------+ +--------------->| call plugin |
+ * | Send out | | ovpo_encap() to |
+ * | overlay_mux_t's |<----------------------------------| get encap mblk_t |
+ * | ksocket | +------------------+
+ * +-----------------+
+ *
+ * The receive end point looks a little different and looks more like:
+ *
+ * +------------------+ +----------------+ +-----------+
+ * | mblk_t comes off |---->| enter netstack |--->| delivered |---+
+ * | the physical | | IP stack | | to | * . . direct
+ * | device | +----------------+ | ksocket | | callback
+ * +------------------+ +-----------+ |
+ * . overlay id |
+ * . not found v
+ * +-----------+ . +-----------------+ +--------------------+
+ * | freemsg |<--*------| call plugin |<------| overlay_mux_recv() |
+ * | mblk_t | | ovpo_decap() to | +--------------------+
+ * +-----------+ | decap mblk_t |
+ * +-----------------+
+ * |
+ * * . . overlay id
+ * v found
+ * +--------+ +----------------+
+ * | adjust |----->| call mac_rx |
+ * | mblk_t | | on original |
+ * +--------+ | decaped packet |
+ * +----------------+
+ *
+ * ------------------
+ * Netstack Awareness
+ * ------------------
+ *
+ * In the above image we note that this enters a netstack. Today the only
+ * netstack that can be is the global zone as the overlay driver itself is not
+ * exactly netstack aware. What this really means is that varpd cannot run in a
+ * non-global zone and an overlay device cannot belong to a non-global zone.
+ * Non-global zones can still have a VNIC assigned to them that's been created
+ * over the overlay device the same way they would if it had been created over
+ * an etherstub or a physical device.
+ *
+ * The majority of the work to make it netstack aware is straightforward and the
+ * biggest thing is to create a netstack module that allows us to hook into
+ * netstack (and thus zone) creation and destruction. From there, we need to
+ * amend the target cache lookup routines that we discussed earlier to not have
+ * a global outstanding list and a global list of handles, but rather, one per
+ * netstack.
+ *
+ * For the mux, we'll need to open the ksocket in the context of the zone, we
+ * can likely do this with a properly composed credential, but we'll need to do
+ * some more work on that path. Finally, we'll want to make sure the dld ioctls
+ * are aware of the zoneid of the caller and we use that appropriately and store
+ * it in the overlay_dev_t.
+ *
+ * -----------
+ * GLDv3 Notes
+ * -----------
+ *
+ * The overlay driver implements a GLDv3 device. Parts of GLDv3 are more
+ * relevant and other parts are much less relevant for us. For example, the
+ * GLDv3 is used to toggle the device being put into and out of promiscuous
+ * mode, to program MAC addresses for unicast and multicast hardware filters.
+ * Today, an overlay device doesn't have a notion of promiscuous mode nor does
+ * it have a notion of unicast and multicast addresses programmed into the
+ * device. Instead, for the purposes of the hardware filter, we don't do
+ * anything and just always accept new addresses being added and removed.
+ *
+ * If the GLDv3 start function has not been called, then we will not use this
+ * device for I/O purposes. Any calls to transmit or receive should be dropped,
+ * though the GLDv3 guarantees us that transmit will not be called without
+ * calling start. Similarly, once stop is called, then no packets can be dealt
+ * with.
+ *
+ * Today we don't support the stat interfaces, though there's no good reason
+ * that we shouldn't assemble some of the stats based on what we have in the
+ * future.
+ *
+ * When it comes to link properties, many of the traditional link properties do
+ * not apply and many others MAC handles for us. For example, we don't need to
+ * implement anything for overlay_m_getprop() to deal with returning the MTU, as
+ * MAC never calls into us for that. As such, there isn't much of anything to
+ * support in terms of properties.
+ *
+ * Today, we don't support any notion of hardware capabilities. However, if
+ * future NIC hardware or other changes to the system cause it to make sense for
+ * us to emulate logical groups, then we should do that. However, we still do
+ * implement a capab function so that we can identify ourselves as an overlay
+ * device to the broader MAC framework. This is done mostly so that a device
+ * created on top of us can have fanout rings as we don't try to lie about a
+ * speed for our device.
+ *
+ * The other question is what should be done for a device's MTU and margin. We
+ * set our minimum supported MTU to be the minimum value that an IP network may
+ * be set to 576 -- which mimics what an etherstub does. On the flip side, we
+ * have our upper bound set to 8900. This value comes from the fact that a lot
+ * of jumbo networks use their maximum as 9000. As such, we want to reserve 100
+ * bytes, which isn't exactly the most accurate number, but it'll be good enough
+ * for now. Because of that, our default MTU off of these devices is 1400, as
+ * the default MTU for everything is usually 1500 or whatever the underlying
+ * device is at; however, this is a bit simpler than asking the netstack what
+ * are all the IP interfaces at. It also calls into question how PMTU and PMTU
+ * discovery should work here. The challenge, especially for
+ * OVERLAY_TARG_DYNAMIC is that the MTU to any of the places will vary and it's
+ * not clear that if you have a single bad entry that the overall MTU should be
+ * lowered. Instead, we should figure out a better way of determining these
+ * kinds of PMTU errors and appropriately alerting the administrator via FMA.
+ *
+ * Regarding margin, we allow a margin of up to VLAN_TAGSZ depending on whether
+ * or not the underlying encapsulation device supports VLAN tags. If it does,
+ * then we'll set the margin to allow for it, otherwise, we will not.
+ */
+
+#include <sys/conf.h>
+#include <sys/errno.h>
+#include <sys/stat.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/modctl.h>
+#include <sys/policy.h>
+#include <sys/stream.h>
+#include <sys/strsubr.h>
+#include <sys/strsun.h>
+#include <sys/types.h>
+#include <sys/kmem.h>
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/ddifm.h>
+
+#include <sys/dls.h>
+#include <sys/dld_ioc.h>
+#include <sys/mac_provider.h>
+#include <sys/mac_client_priv.h>
+#include <sys/mac_ether.h>
+#include <sys/vlan.h>
+
+#include <sys/overlay_impl.h>
+
+dev_info_t *overlay_dip;
+static kmutex_t overlay_dev_lock;
+static list_t overlay_dev_list;
+static uint8_t overlay_macaddr[ETHERADDRL] =
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+
+typedef enum overlay_dev_prop {
+ OVERLAY_DEV_P_MTU = 0,
+ OVERLAY_DEV_P_VNETID,
+ OVERLAY_DEV_P_ENCAP,
+ OVERLAY_DEV_P_VARPDID
+} overlay_dev_prop_t;
+
+#define OVERLAY_DEV_NPROPS 4
+static const char *overlay_dev_props[] = {
+ "mtu",
+ "vnetid",
+ "encap",
+ "varpd/id"
+};
+
+#define OVERLAY_MTU_MIN 576
+#define OVERLAY_MTU_DEF 1400
+#define OVERLAY_MTU_MAX 8900
+
+overlay_dev_t *
+overlay_hold_by_dlid(datalink_id_t id)
+{
+ overlay_dev_t *o;
+
+ mutex_enter(&overlay_dev_lock);
+ for (o = list_head(&overlay_dev_list); o != NULL;
+ o = list_next(&overlay_dev_list, o)) {
+ if (id == o->odd_linkid) {
+ mutex_enter(&o->odd_lock);
+ o->odd_ref++;
+ mutex_exit(&o->odd_lock);
+ mutex_exit(&overlay_dev_lock);
+ return (o);
+ }
+ }
+
+ mutex_exit(&overlay_dev_lock);
+ return (NULL);
+}
+
+void
+overlay_hold_rele(overlay_dev_t *odd)
+{
+ mutex_enter(&odd->odd_lock);
+ ASSERT(odd->odd_ref > 0);
+ odd->odd_ref--;
+ mutex_exit(&odd->odd_lock);
+}
+
+void
+overlay_io_start(overlay_dev_t *odd, overlay_dev_flag_t flag)
+{
+ ASSERT(flag == OVERLAY_F_IN_RX || flag == OVERLAY_F_IN_TX);
+ ASSERT(MUTEX_HELD(&odd->odd_lock));
+
+ if (flag & OVERLAY_F_IN_RX)
+ odd->odd_rxcount++;
+ if (flag & OVERLAY_F_IN_TX)
+ odd->odd_txcount++;
+ odd->odd_flags |= flag;
+}
+
+void
+overlay_io_done(overlay_dev_t *odd, overlay_dev_flag_t flag)
+{
+ boolean_t signal = B_FALSE;
+
+ ASSERT(flag == OVERLAY_F_IN_RX || flag == OVERLAY_F_IN_TX);
+ ASSERT(MUTEX_HELD(&odd->odd_lock));
+
+ if (flag & OVERLAY_F_IN_RX) {
+ ASSERT(odd->odd_rxcount > 0);
+ odd->odd_rxcount--;
+ if (odd->odd_rxcount == 0) {
+ signal = B_TRUE;
+ odd->odd_flags &= ~OVERLAY_F_IN_RX;
+ }
+ }
+ if (flag & OVERLAY_F_IN_TX) {
+ ASSERT(odd->odd_txcount > 0);
+ odd->odd_txcount--;
+ if (odd->odd_txcount == 0) {
+ signal = B_TRUE;
+ odd->odd_flags &= ~OVERLAY_F_IN_TX;
+ }
+ }
+
+ if (signal == B_TRUE)
+ cv_broadcast(&odd->odd_iowait);
+}
+
+static void
+overlay_io_wait(overlay_dev_t *odd, overlay_dev_flag_t flag)
+{
+ ASSERT((flag & ~OVERLAY_F_IOMASK) == 0);
+ ASSERT(MUTEX_HELD(&odd->odd_lock));
+
+ while (odd->odd_flags & flag) {
+ cv_wait(&odd->odd_iowait, &odd->odd_lock);
+ }
+}
+
+void
+overlay_dev_iter(overlay_dev_iter_f func, void *arg)
+{
+ overlay_dev_t *odd;
+
+ mutex_enter(&overlay_dev_lock);
+ for (odd = list_head(&overlay_dev_list); odd != NULL;
+ odd = list_next(&overlay_dev_list, odd)) {
+ if (func(odd, arg) != 0) {
+ mutex_exit(&overlay_dev_lock);
+ return;
+ }
+ }
+ mutex_exit(&overlay_dev_lock);
+}
+
+/* ARGSUSED */
+static int
+overlay_m_stat(void *arg, uint_t stat, uint64_t *val)
+{
+ return (ENOTSUP);
+}
+
+static int
+overlay_m_start(void *arg)
+{
+ overlay_dev_t *odd = arg;
+ overlay_mux_t *mux;
+ int ret, domain, family, prot;
+ struct sockaddr_storage storage;
+ socklen_t slen;
+
+ mutex_enter(&odd->odd_lock);
+ if ((odd->odd_flags & OVERLAY_F_ACTIVATED) == 0) {
+ mutex_exit(&odd->odd_lock);
+ return (EAGAIN);
+ }
+ mutex_exit(&odd->odd_lock);
+
+ ret = odd->odd_plugin->ovp_ops->ovpo_socket(odd->odd_pvoid, &domain,
+ &family, &prot, (struct sockaddr *)&storage, &slen);
+ if (ret != 0)
+ return (ret);
+
+ mux = overlay_mux_open(odd->odd_plugin, domain, family, prot,
+ (struct sockaddr *)&storage, slen, &ret);
+ if (mux == NULL)
+ return (ret);
+
+ overlay_mux_add_dev(mux, odd);
+ odd->odd_mux = mux;
+ mutex_enter(&odd->odd_lock);
+ ASSERT(!(odd->odd_flags & OVERLAY_F_IN_MUX));
+ odd->odd_flags |= OVERLAY_F_IN_MUX;
+ mutex_exit(&odd->odd_lock);
+
+ return (0);
+}
+
+static void
+overlay_m_stop(void *arg)
+{
+ overlay_dev_t *odd = arg;
+
+ /*
+ * The MAC Perimeter is held here, so we don't have to worry about
+ * synchornizing this with respect to metadata operations.
+ */
+ mutex_enter(&odd->odd_lock);
+ VERIFY(odd->odd_flags & OVERLAY_F_IN_MUX);
+ VERIFY(!(odd->odd_flags & OVERLAY_F_MDDROP));
+ odd->odd_flags |= OVERLAY_F_MDDROP;
+ overlay_io_wait(odd, OVERLAY_F_IOMASK);
+ mutex_exit(&odd->odd_lock);
+
+ overlay_mux_remove_dev(odd->odd_mux, odd);
+ overlay_mux_close(odd->odd_mux);
+ odd->odd_mux = NULL;
+
+ mutex_enter(&odd->odd_lock);
+ odd->odd_flags &= ~OVERLAY_F_IN_MUX;
+ odd->odd_flags &= ~OVERLAY_F_MDDROP;
+ VERIFY((odd->odd_flags & OVERLAY_F_STOPMASK) == 0);
+ mutex_exit(&odd->odd_lock);
+}
+
+/*
+ * For more info on this, see the big theory statement.
+ */
+/* ARGSUSED */
+static int
+overlay_m_promisc(void *arg, boolean_t on)
+{
+ return (0);
+}
+
+/*
+ * For more info on this, see the big theory statement.
+ */
+/* ARGSUSED */
+static int
+overlay_m_multicast(void *arg, boolean_t add, const uint8_t *addrp)
+{
+ return (0);
+}
+
+/*
+ * For more info on this, see the big theory statement.
+ */
+/* ARGSUSED */
+static int
+overlay_m_unicast(void *arg, const uint8_t *macaddr)
+{
+ return (0);
+}
+
+mblk_t *
+overlay_m_tx(void *arg, mblk_t *mp_chain)
+{
+ overlay_dev_t *odd = arg;
+ mblk_t *mp, *ep;
+ int ret;
+ ovep_encap_info_t einfo;
+ struct msghdr hdr;
+
+ mutex_enter(&odd->odd_lock);
+ if ((odd->odd_flags & OVERLAY_F_MDDROP) ||
+ !(odd->odd_flags & OVERLAY_F_IN_MUX)) {
+ mutex_exit(&odd->odd_lock);
+ freemsgchain(mp_chain);
+ return (NULL);
+ }
+ overlay_io_start(odd, OVERLAY_F_IN_TX);
+ mutex_exit(&odd->odd_lock);
+
+ bzero(&hdr, sizeof (struct msghdr));
+
+ bzero(&einfo, sizeof (ovep_encap_info_t));
+ einfo.ovdi_id = odd->odd_vid;
+ mp = mp_chain;
+ while (mp != NULL) {
+ socklen_t slen;
+ struct sockaddr_storage storage;
+
+ mp_chain = mp->b_next;
+ mp->b_next = NULL;
+ ep = NULL;
+
+ ret = overlay_target_lookup(odd, mp,
+ (struct sockaddr *)&storage, &slen);
+ if (ret != OVERLAY_TARGET_OK) {
+ if (ret == OVERLAY_TARGET_DROP)
+ freemsg(mp);
+ mp = mp_chain;
+ continue;
+ }
+
+ hdr.msg_name = &storage;
+ hdr.msg_namelen = slen;
+
+ ret = odd->odd_plugin->ovp_ops->ovpo_encap(odd->odd_mh, mp,
+ &einfo, &ep);
+ if (ret != 0 || ep == NULL) {
+ freemsg(mp);
+ goto out;
+ }
+
+ ep->b_cont = mp;
+ ret = overlay_mux_tx(odd->odd_mux, &hdr, ep);
+ if (ret != 0)
+ goto out;
+
+ mp = mp_chain;
+ }
+
+out:
+ mutex_enter(&odd->odd_lock);
+ overlay_io_done(odd, OVERLAY_F_IN_TX);
+ mutex_exit(&odd->odd_lock);
+ return (mp_chain);
+}
+
+/* ARGSUSED */
+static void
+overlay_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
+{
+ miocnak(q, mp, 0, ENOTSUP);
+}
+
+/* ARGSUSED */
+static boolean_t
+overlay_m_getcapab(void *arg, mac_capab_t cap, void *cap_data)
+{
+ /*
+ * Tell MAC we're an overlay.
+ */
+ if (cap == MAC_CAPAB_OVERLAY)
+ return (B_TRUE);
+ return (B_FALSE);
+}
+
+/* ARGSUSED */
+static int
+overlay_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+ uint_t pr_valsize, const void *pr_val)
+{
+ uint32_t mtu, old;
+ int err;
+ overlay_dev_t *odd = arg;
+
+ if (pr_num != MAC_PROP_MTU)
+ return (ENOTSUP);
+
+ bcopy(pr_val, &mtu, sizeof (mtu));
+ if (mtu < OVERLAY_MTU_MIN || mtu > OVERLAY_MTU_MAX)
+ return (EINVAL);
+
+ mutex_enter(&odd->odd_lock);
+ old = odd->odd_mtu;
+ odd->odd_mtu = mtu;
+ err = mac_maxsdu_update(odd->odd_mh, mtu);
+ if (err != 0)
+ odd->odd_mtu = old;
+ mutex_exit(&odd->odd_lock);
+
+ return (err);
+}
+
+/* ARGSUSED */
+static int
+overlay_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+ uint_t pr_valsize, void *pr_val)
+{
+ return (ENOTSUP);
+}
+
+/* ARGSUSED */
+static void
+overlay_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+ mac_prop_info_handle_t prh)
+{
+ if (pr_num != MAC_PROP_MTU)
+ return;
+
+ mac_prop_info_set_default_uint32(prh, OVERLAY_MTU_DEF);
+ mac_prop_info_set_range_uint32(prh, OVERLAY_MTU_MIN, OVERLAY_MTU_MAX);
+}
+
+static mac_callbacks_t overlay_m_callbacks = {
+ .mc_callbacks = (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP |
+ MC_PROPINFO),
+ .mc_getstat = overlay_m_stat,
+ .mc_start = overlay_m_start,
+ .mc_stop = overlay_m_stop,
+ .mc_setpromisc = overlay_m_promisc,
+ .mc_multicst = overlay_m_multicast,
+ .mc_unicst = overlay_m_unicast,
+ .mc_tx = overlay_m_tx,
+ .mc_ioctl = overlay_m_ioctl,
+ .mc_getcapab = overlay_m_getcapab,
+ .mc_getprop = overlay_m_getprop,
+ .mc_setprop = overlay_m_setprop,
+ .mc_propinfo = overlay_m_propinfo
+};
+
+static boolean_t
+overlay_valid_name(const char *name, size_t buflen)
+{
+ size_t actlen;
+ int err, i;
+
+ for (i = 0; i < buflen; i++) {
+ if (name[i] == '\0')
+ break;
+ }
+
+ if (i == 0 || i == buflen)
+ return (B_FALSE);
+ actlen = i;
+ if (strchr(name, '/') != NULL)
+ return (B_FALSE);
+ if (u8_validate((char *)name, actlen, NULL,
+ U8_VALIDATE_ENTIRE, &err) < 0)
+ return (B_FALSE);
+ return (B_TRUE);
+}
+
+/* ARGSUSED */
+static int
+overlay_i_create(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp)
+{
+ int err;
+ uint64_t maxid;
+ overlay_dev_t *odd, *o;
+ mac_register_t *mac;
+ overlay_ioc_create_t *oicp = karg;
+
+ if (overlay_valid_name(oicp->oic_encap, MAXLINKNAMELEN) == B_FALSE)
+ return (EINVAL);
+
+ odd = kmem_zalloc(sizeof (overlay_dev_t), KM_SLEEP);
+ odd->odd_linkid = oicp->oic_linkid;
+ odd->odd_plugin = overlay_plugin_lookup(oicp->oic_encap);
+ if (odd->odd_plugin == NULL) {
+ kmem_free(odd, sizeof (overlay_dev_t));
+ return (ENOENT);
+ }
+ err = odd->odd_plugin->ovp_ops->ovpo_init((overlay_handle_t)odd,
+ &odd->odd_pvoid);
+ if (err != 0) {
+ odd->odd_plugin->ovp_ops->ovpo_fini(odd->odd_pvoid);
+ overlay_plugin_rele(odd->odd_plugin);
+ kmem_free(odd, sizeof (overlay_dev_t));
+ return (EINVAL);
+ }
+
+ /*
+ * Make sure that our virtual network id is valid for the given plugin
+ * that we're working with.
+ */
+ ASSERT(odd->odd_plugin->ovp_id_size <= 8);
+ maxid = UINT64_MAX;
+ if (odd->odd_plugin->ovp_id_size != 8)
+ maxid = (1ULL << (odd->odd_plugin->ovp_id_size * 8)) - 1ULL;
+ if (oicp->oic_vnetid > maxid) {
+ odd->odd_plugin->ovp_ops->ovpo_fini(odd->odd_pvoid);
+ overlay_plugin_rele(odd->odd_plugin);
+ kmem_free(odd, sizeof (overlay_dev_t));
+ return (EINVAL);
+ }
+ odd->odd_vid = oicp->oic_vnetid;
+
+ mac = mac_alloc(MAC_VERSION);
+ if (mac == NULL) {
+ mutex_exit(&overlay_dev_lock);
+ odd->odd_plugin->ovp_ops->ovpo_fini(odd->odd_pvoid);
+ overlay_plugin_rele(odd->odd_plugin);
+ kmem_free(odd, sizeof (overlay_dev_t));
+ return (EINVAL);
+ }
+
+ mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
+ mac->m_driver = odd;
+ mac->m_dip = overlay_dip;
+ mac->m_dst_addr = NULL;
+ mac->m_callbacks = &overlay_m_callbacks;
+ mac->m_pdata = NULL;
+ mac->m_pdata_size = 0;
+
+ mac->m_priv_props = NULL;
+
+ /* Let mac handle this itself. */
+ mac->m_instance = (uint_t)-1;
+
+ /*
+ * There is no real source address that should be used here, but saying
+ * that we're not ethernet is going to cause its own problems. At the
+ * end of the say, this is fine.
+ */
+ mac->m_src_addr = overlay_macaddr;
+
+ /*
+ * Start with the default MTU as the max SDU. If the MTU is changed, the
+ * SDU will be changed to reflect that.
+ */
+ mac->m_min_sdu = 1;
+ mac->m_max_sdu = OVERLAY_MTU_DEF;
+ mac->m_multicast_sdu = 0;
+
+ /*
+ * The underlying device doesn't matter, instead this comes from the
+ * encapsulation protocol and whether or not they allow VLAN tags.
+ */
+ if (odd->odd_plugin->ovp_flags & OVEP_F_VLAN_TAG) {
+ mac->m_margin = VLAN_TAGSZ;
+ } else {
+ mac->m_margin = 0;
+ }
+
+ /*
+ * Today, we have no MAC virtualization, it may make sense in the future
+ * to go ahead and emulate some subset of this, but it doesn't today.
+ */
+ mac->m_v12n = MAC_VIRT_NONE;
+
+ mutex_enter(&overlay_dev_lock);
+ for (o = list_head(&overlay_dev_list); o != NULL;
+ o = list_next(&overlay_dev_list, o)) {
+ if (o->odd_linkid == oicp->oic_linkid) {
+ mutex_exit(&overlay_dev_lock);
+ odd->odd_plugin->ovp_ops->ovpo_fini(odd->odd_pvoid);
+ overlay_plugin_rele(odd->odd_plugin);
+ kmem_free(odd, sizeof (overlay_dev_t));
+ return (EEXIST);
+ }
+
+ if (o->odd_vid == oicp->oic_vnetid &&
+ o->odd_plugin == odd->odd_plugin) {
+ mutex_exit(&overlay_dev_lock);
+ odd->odd_plugin->ovp_ops->ovpo_fini(odd->odd_pvoid);
+ overlay_plugin_rele(odd->odd_plugin);
+ kmem_free(odd, sizeof (overlay_dev_t));
+ return (EEXIST);
+ }
+ }
+
+ err = mac_register(mac, &odd->odd_mh);
+ mac_free(mac);
+ if (err != 0) {
+ mutex_exit(&overlay_dev_lock);
+ odd->odd_plugin->ovp_ops->ovpo_fini(odd->odd_pvoid);
+ overlay_plugin_rele(odd->odd_plugin);
+ kmem_free(odd, sizeof (overlay_dev_t));
+ return (err);
+ }
+
+ err = dls_devnet_create(odd->odd_mh, odd->odd_linkid,
+ crgetzoneid(cred));
+ if (err != 0) {
+ mutex_exit(&overlay_dev_lock);
+ (void) mac_unregister(odd->odd_mh);
+ odd->odd_plugin->ovp_ops->ovpo_fini(odd->odd_pvoid);
+ overlay_plugin_rele(odd->odd_plugin);
+ kmem_free(odd, sizeof (overlay_dev_t));
+ return (err);
+ }
+
+ mutex_init(&odd->odd_lock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&odd->odd_iowait, NULL, CV_DRIVER, NULL);
+ odd->odd_ref = 0;
+ odd->odd_flags = 0;
+ list_insert_tail(&overlay_dev_list, odd);
+ mutex_exit(&overlay_dev_lock);
+
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+overlay_i_activate(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp)
+{
+ int i, ret;
+ overlay_dev_t *odd;
+ mac_perim_handle_t mph;
+ overlay_ioc_activate_t *oiap = karg;
+ overlay_ioc_propinfo_t *infop;
+ overlay_ioc_prop_t *oip;
+ overlay_prop_handle_t phdl;
+
+ odd = overlay_hold_by_dlid(oiap->oia_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ infop = kmem_alloc(sizeof (overlay_ioc_propinfo_t), KM_SLEEP);
+ oip = kmem_alloc(sizeof (overlay_ioc_prop_t), KM_SLEEP);
+ phdl = (overlay_prop_handle_t)infop;
+
+ mac_perim_enter_by_mh(odd->odd_mh, &mph);
+ mutex_enter(&odd->odd_lock);
+ if (odd->odd_flags & OVERLAY_F_ACTIVATED) {
+ mutex_exit(&odd->odd_lock);
+ mac_perim_exit(mph);
+ overlay_hold_rele(odd);
+ kmem_free(infop, sizeof (overlay_ioc_propinfo_t));
+ kmem_free(oip, sizeof (overlay_ioc_prop_t));
+ return (EEXIST);
+ }
+ mutex_exit(&odd->odd_lock);
+
+ for (i = 0; i < odd->odd_plugin->ovp_nprops; i++) {
+ const char *pname = odd->odd_plugin->ovp_props[i];
+ bzero(infop, sizeof (overlay_ioc_propinfo_t));
+ overlay_prop_init(phdl);
+ ret = odd->odd_plugin->ovp_ops->ovpo_propinfo(pname, phdl);
+ if (ret != 0) {
+ mac_perim_exit(mph);
+ overlay_hold_rele(odd);
+ kmem_free(infop, sizeof (overlay_ioc_propinfo_t));
+ kmem_free(oip, sizeof (overlay_ioc_prop_t));
+ return (ret);
+ }
+
+ if ((infop->oipi_prot & OVERLAY_PROP_PERM_REQ) == 0)
+ continue;
+ bzero(oip, sizeof (overlay_ioc_prop_t));
+ oip->oip_size = sizeof (oip->oip_value);
+ ret = odd->odd_plugin->ovp_ops->ovpo_getprop(odd->odd_pvoid,
+ pname, oip->oip_value, &oip->oip_size);
+ if (ret != 0) {
+ mac_perim_exit(mph);
+ overlay_hold_rele(odd);
+ kmem_free(infop, sizeof (overlay_ioc_propinfo_t));
+ kmem_free(oip, sizeof (overlay_ioc_prop_t));
+ return (ret);
+ }
+ if (oip->oip_size == 0) {
+ mac_perim_exit(mph);
+ overlay_hold_rele(odd);
+ kmem_free(infop, sizeof (overlay_ioc_propinfo_t));
+ kmem_free(oip, sizeof (overlay_ioc_prop_t));
+ return (EINVAL);
+ }
+ }
+
+ mutex_enter(&odd->odd_lock);
+ if ((odd->odd_flags & OVERLAY_F_VARPD) == 0) {
+ mutex_exit(&odd->odd_lock);
+ mac_perim_exit(mph);
+ overlay_hold_rele(odd);
+ kmem_free(infop, sizeof (overlay_ioc_propinfo_t));
+ kmem_free(oip, sizeof (overlay_ioc_prop_t));
+ return (ENXIO);
+ }
+
+ ASSERT((odd->odd_flags & OVERLAY_F_ACTIVATED) == 0);
+ odd->odd_flags |= OVERLAY_F_ACTIVATED;
+
+ /*
+ * Now that we've activated ourselves, we should indicate to the world
+ * that we're up. Note that we may not be able to perform lookups at
+ * this time, but our notion of being 'up' isn't dependent on that
+ * ability.
+ */
+ mac_link_update(odd->odd_mh, LINK_STATE_UP);
+ mutex_exit(&odd->odd_lock);
+
+ mac_perim_exit(mph);
+ overlay_hold_rele(odd);
+ kmem_free(infop, sizeof (overlay_ioc_propinfo_t));
+ kmem_free(oip, sizeof (overlay_ioc_prop_t));
+
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+overlay_i_delete(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp)
+{
+ overlay_ioc_delete_t *oidp = karg;
+ overlay_dev_t *odd;
+ datalink_id_t tid;
+ int ret;
+
+ odd = overlay_hold_by_dlid(oidp->oid_linkid);
+ if (odd == NULL) {
+ return (ENOENT);
+ }
+
+ mutex_enter(&odd->odd_lock);
+ /* If we're not the only hold, we're busy */
+ if (odd->odd_ref != 1) {
+ mutex_exit(&odd->odd_lock);
+ overlay_hold_rele(odd);
+ return (EBUSY);
+ }
+
+ if (odd->odd_flags & OVERLAY_F_IN_MUX) {
+ mutex_exit(&odd->odd_lock);
+ overlay_hold_rele(odd);
+ return (EBUSY);
+ }
+
+ /*
+ * To remove this, we need to first remove it from dls and then remove
+ * it from mac. The act of removing it from mac will check if there are
+ * devices on top of this, eg. vnics. If there are, then that will fail
+ * and we'll have to go through and recreate the dls entry. Only after
+ * mac_unregister has succeeded, then we'll go through and actually free
+ * everything and drop the dev lock.
+ */
+ ret = dls_devnet_destroy(odd->odd_mh, &tid, B_TRUE);
+ if (ret != 0) {
+ overlay_hold_rele(odd);
+ return (ret);
+ }
+
+ ASSERT(oidp->oid_linkid == tid);
+ ret = mac_disable(odd->odd_mh);
+ if (ret != 0) {
+ (void) dls_devnet_create(odd->odd_mh, odd->odd_linkid,
+ crgetzoneid(cred));
+ overlay_hold_rele(odd);
+ return (ret);
+ }
+
+ overlay_target_quiesce(odd->odd_target);
+
+ mutex_enter(&overlay_dev_lock);
+ list_remove(&overlay_dev_list, odd);
+ mutex_exit(&overlay_dev_lock);
+
+ cv_destroy(&odd->odd_iowait);
+ mutex_destroy(&odd->odd_lock);
+ overlay_target_free(odd);
+ odd->odd_plugin->ovp_ops->ovpo_fini(odd->odd_pvoid);
+ overlay_plugin_rele(odd->odd_plugin);
+ kmem_free(odd, sizeof (overlay_dev_t));
+
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+overlay_i_nprops(void *karg, intptr_t arg, int mode, cred_t *cred,
+ int *rvalp)
+{
+ overlay_dev_t *odd;
+ overlay_ioc_nprops_t *on = karg;
+
+ odd = overlay_hold_by_dlid(on->oipn_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+ on->oipn_nprops = odd->odd_plugin->ovp_nprops + OVERLAY_DEV_NPROPS;
+ overlay_hold_rele(odd);
+
+ return (0);
+}
+
+static int
+overlay_propinfo_plugin_cb(overlay_plugin_t *opp, void *arg)
+{
+ overlay_prop_handle_t phdl = arg;
+ overlay_prop_set_range_str(phdl, opp->ovp_name);
+ return (0);
+}
+
+static int
+overlay_i_name_to_propid(overlay_dev_t *odd, const char *name, uint_t *id)
+{
+ int i;
+
+ for (i = 0; i < OVERLAY_DEV_NPROPS; i++) {
+ if (strcmp(overlay_dev_props[i], name) == 0) {
+ *id = i;
+ return (0);
+ }
+ }
+
+ for (i = 0; i < odd->odd_plugin->ovp_nprops; i++) {
+ if (strcmp(odd->odd_plugin->ovp_props[i], name) == 0) {
+ *id = i + OVERLAY_DEV_NPROPS;
+ return (0);
+ }
+ }
+
+ return (ENOENT);
+}
+
+static void
+overlay_i_propinfo_mtu(overlay_dev_t *odd, overlay_prop_handle_t phdl)
+{
+ uint32_t def;
+ mac_propval_range_t range;
+ uint_t perm;
+
+ ASSERT(MAC_PERIM_HELD(odd->odd_mh));
+
+ bzero(&range, sizeof (mac_propval_range_t));
+ range.mpr_count = 1;
+ if (mac_prop_info(odd->odd_mh, MAC_PROP_MTU, "mtu", &def,
+ sizeof (def), &range, &perm) != 0)
+ return;
+
+ if (perm == MAC_PROP_PERM_READ)
+ overlay_prop_set_prot(phdl, OVERLAY_PROP_PERM_READ);
+ else if (perm == MAC_PROP_PERM_WRITE)
+ overlay_prop_set_prot(phdl, OVERLAY_PROP_PERM_WRITE);
+ else if (perm == MAC_PROP_PERM_RW)
+ overlay_prop_set_prot(phdl, OVERLAY_PROP_PERM_RW);
+
+ overlay_prop_set_type(phdl, OVERLAY_PROP_T_UINT);
+ overlay_prop_set_default(phdl, &def, sizeof (def));
+ overlay_prop_set_range_uint32(phdl, range.mpr_range_uint32[0].mpur_min,
+ range.mpr_range_uint32[0].mpur_max);
+}
+
+/* ARGSUSED */
+static int
+overlay_i_propinfo(void *karg, intptr_t arg, int mode, cred_t *cred,
+ int *rvalp)
+{
+ overlay_dev_t *odd;
+ int ret;
+ mac_perim_handle_t mph;
+ uint_t propid = UINT_MAX;
+ overlay_ioc_propinfo_t *oip = karg;
+ overlay_prop_handle_t phdl = (overlay_prop_handle_t)oip;
+
+ odd = overlay_hold_by_dlid(oip->oipi_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ overlay_prop_init(phdl);
+ mac_perim_enter_by_mh(odd->odd_mh, &mph);
+
+ /*
+ * If the id is -1, then the property that we're looking for is named in
+ * oipi_name and we should fill in its id. Otherwise, we've been given
+ * an id and we need to turn that into a name for our plugin's sake. The
+ * id is our own fabrication for property discovery.
+ */
+ if (oip->oipi_id == -1) {
+ /*
+ * Determine if it's a known generic property or it belongs to a
+ * module by checking against the list of known names.
+ */
+ oip->oipi_name[OVERLAY_PROP_NAMELEN-1] = '\0';
+ if ((ret = overlay_i_name_to_propid(odd, oip->oipi_name,
+ &propid)) != 0) {
+ overlay_hold_rele(odd);
+ mac_perim_exit(mph);
+ return (ret);
+ }
+ oip->oipi_id = propid;
+ if (propid >= OVERLAY_DEV_NPROPS) {
+ ret = odd->odd_plugin->ovp_ops->ovpo_propinfo(
+ oip->oipi_name, phdl);
+ overlay_hold_rele(odd);
+ mac_perim_exit(mph);
+ return (ret);
+
+ }
+ } else if (oip->oipi_id >= OVERLAY_DEV_NPROPS) {
+ uint_t id = oip->oipi_id - OVERLAY_DEV_NPROPS;
+
+ if (id >= odd->odd_plugin->ovp_nprops) {
+ overlay_hold_rele(odd);
+ mac_perim_exit(mph);
+ return (EINVAL);
+ }
+ ret = odd->odd_plugin->ovp_ops->ovpo_propinfo(
+ odd->odd_plugin->ovp_props[id], phdl);
+ overlay_hold_rele(odd);
+ mac_perim_exit(mph);
+ return (ret);
+ } else if (oip->oipi_id < -1) {
+ overlay_hold_rele(odd);
+ mac_perim_exit(mph);
+ return (EINVAL);
+ } else {
+ ASSERT(oip->oipi_id < OVERLAY_DEV_NPROPS);
+ ASSERT(oip->oipi_id >= 0);
+ propid = oip->oipi_id;
+ (void) strlcpy(oip->oipi_name, overlay_dev_props[propid],
+ sizeof (oip->oipi_name));
+ }
+
+ switch (propid) {
+ case OVERLAY_DEV_P_MTU:
+ overlay_i_propinfo_mtu(odd, phdl);
+ break;
+ case OVERLAY_DEV_P_VNETID:
+ overlay_prop_set_prot(phdl, OVERLAY_PROP_PERM_RW);
+ overlay_prop_set_type(phdl, OVERLAY_PROP_T_UINT);
+ overlay_prop_set_nodefault(phdl);
+ break;
+ case OVERLAY_DEV_P_ENCAP:
+ overlay_prop_set_prot(phdl, OVERLAY_PROP_PERM_READ);
+ overlay_prop_set_type(phdl, OVERLAY_PROP_T_STRING);
+ overlay_prop_set_nodefault(phdl);
+ overlay_plugin_walk(overlay_propinfo_plugin_cb, phdl);
+ break;
+ case OVERLAY_DEV_P_VARPDID:
+ overlay_prop_set_prot(phdl, OVERLAY_PROP_PERM_READ);
+ overlay_prop_set_type(phdl, OVERLAY_PROP_T_UINT);
+ overlay_prop_set_nodefault(phdl);
+ break;
+ default:
+ overlay_hold_rele(odd);
+ mac_perim_exit(mph);
+ return (ENOENT);
+ }
+
+ overlay_hold_rele(odd);
+ mac_perim_exit(mph);
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+overlay_i_getprop(void *karg, intptr_t arg, int mode, cred_t *cred,
+ int *rvalp)
+{
+ int ret;
+ overlay_dev_t *odd;
+ mac_perim_handle_t mph;
+ overlay_ioc_prop_t *oip = karg;
+ uint_t propid, mtu;
+
+ odd = overlay_hold_by_dlid(oip->oip_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ mac_perim_enter_by_mh(odd->odd_mh, &mph);
+ oip->oip_size = OVERLAY_PROP_SIZEMAX;
+ oip->oip_name[OVERLAY_PROP_NAMELEN-1] = '\0';
+ if (oip->oip_id == -1) {
+ int i;
+
+ for (i = 0; i < OVERLAY_DEV_NPROPS; i++) {
+ if (strcmp(overlay_dev_props[i], oip->oip_name) == 0)
+ break;
+ if (i == OVERLAY_DEV_NPROPS) {
+ ret = odd->odd_plugin->ovp_ops->ovpo_getprop(
+ odd->odd_pvoid, oip->oip_name,
+ oip->oip_value, &oip->oip_size);
+ overlay_hold_rele(odd);
+ mac_perim_exit(mph);
+ return (ret);
+ }
+ }
+
+ propid = i;
+ } else if (oip->oip_id >= OVERLAY_DEV_NPROPS) {
+ uint_t id = oip->oip_id - OVERLAY_DEV_NPROPS;
+
+ if (id > odd->odd_plugin->ovp_nprops) {
+ overlay_hold_rele(odd);
+ mac_perim_exit(mph);
+ return (EINVAL);
+ }
+ ret = odd->odd_plugin->ovp_ops->ovpo_getprop(odd->odd_pvoid,
+ odd->odd_plugin->ovp_props[id], oip->oip_value,
+ &oip->oip_size);
+ overlay_hold_rele(odd);
+ mac_perim_exit(mph);
+ return (ret);
+ } else if (oip->oip_id < -1) {
+ overlay_hold_rele(odd);
+ mac_perim_exit(mph);
+ return (EINVAL);
+ } else {
+ ASSERT(oip->oip_id < OVERLAY_DEV_NPROPS);
+ ASSERT(oip->oip_id >= 0);
+ propid = oip->oip_id;
+ }
+
+ ret = 0;
+ switch (propid) {
+ case OVERLAY_DEV_P_MTU:
+ /*
+ * The MTU is always set and retrieved through MAC, to allow for
+ * MAC to do whatever it wants, as really that property belongs
+ * to MAC. This is important for things where vnics have hold on
+ * the MTU.
+ */
+ mac_sdu_get(odd->odd_mh, NULL, &mtu);
+ bcopy(&mtu, oip->oip_value, sizeof (uint_t));
+ oip->oip_size = sizeof (uint_t);
+ break;
+ case OVERLAY_DEV_P_VNETID:
+ /*
+ * While it's read-only while inside of a mux, we're not in a
+ * context that can guarantee that. Therefore we always grab the
+ * overlay_dev_t's odd_lock.
+ */
+ mutex_enter(&odd->odd_lock);
+ bcopy(&odd->odd_vid, oip->oip_value, sizeof (uint64_t));
+ mutex_exit(&odd->odd_lock);
+ oip->oip_size = sizeof (uint64_t);
+ break;
+ case OVERLAY_DEV_P_ENCAP:
+ oip->oip_size = strlcpy((char *)oip->oip_value,
+ odd->odd_plugin->ovp_name, oip->oip_size);
+ break;
+ case OVERLAY_DEV_P_VARPDID:
+ mutex_enter(&odd->odd_lock);
+ if (odd->odd_flags & OVERLAY_F_VARPD) {
+ const uint64_t val = odd->odd_target->ott_id;
+ bcopy(&val, oip->oip_value, sizeof (uint64_t));
+ oip->oip_size = sizeof (uint64_t);
+ } else {
+ oip->oip_size = 0;
+ }
+ mutex_exit(&odd->odd_lock);
+ break;
+ default:
+ ret = ENOENT;
+ }
+
+ overlay_hold_rele(odd);
+ mac_perim_exit(mph);
+ return (ret);
+}
+
+static void
+overlay_setprop_vnetid(overlay_dev_t *odd, uint64_t vnetid)
+{
+ mutex_enter(&odd->odd_lock);
+
+ /* Simple case, not active */
+ if (!(odd->odd_flags & OVERLAY_F_IN_MUX)) {
+ odd->odd_vid = vnetid;
+ mutex_exit(&odd->odd_lock);
+ return;
+ }
+
+ /*
+ * In the hard case, we need to set the drop flag, quiesce I/O and then
+ * we can go ahead and do everything.
+ */
+ odd->odd_flags |= OVERLAY_F_MDDROP;
+ overlay_io_wait(odd, OVERLAY_F_IOMASK);
+ mutex_exit(&odd->odd_lock);
+
+ overlay_mux_remove_dev(odd->odd_mux, odd);
+ mutex_enter(&odd->odd_lock);
+ odd->odd_vid = vnetid;
+ mutex_exit(&odd->odd_lock);
+ overlay_mux_add_dev(odd->odd_mux, odd);
+
+ mutex_enter(&odd->odd_lock);
+ ASSERT(odd->odd_flags & OVERLAY_F_IN_MUX);
+ odd->odd_flags &= ~OVERLAY_F_IN_MUX;
+ mutex_exit(&odd->odd_lock);
+}
+
+/* ARGSUSED */
+static int
+overlay_i_setprop(void *karg, intptr_t arg, int mode, cred_t *cred,
+ int *rvalp)
+{
+ int ret;
+ overlay_dev_t *odd;
+ overlay_ioc_prop_t *oip = karg;
+ uint_t propid = UINT_MAX;
+ mac_perim_handle_t mph;
+ uint64_t maxid, *vidp;
+
+ if (oip->oip_size > OVERLAY_PROP_SIZEMAX)
+ return (EINVAL);
+
+ odd = overlay_hold_by_dlid(oip->oip_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ oip->oip_name[OVERLAY_PROP_NAMELEN-1] = '\0';
+ mac_perim_enter_by_mh(odd->odd_mh, &mph);
+ mutex_enter(&odd->odd_lock);
+ if (odd->odd_flags & OVERLAY_F_ACTIVATED) {
+ mac_perim_exit(mph);
+ mutex_exit(&odd->odd_lock);
+ return (ENOTSUP);
+ }
+ mutex_exit(&odd->odd_lock);
+ if (oip->oip_id == -1) {
+ int i;
+
+ for (i = 0; i < OVERLAY_DEV_NPROPS; i++) {
+ if (strcmp(overlay_dev_props[i], oip->oip_name) == 0)
+ break;
+ if (i == OVERLAY_DEV_NPROPS) {
+ ret = odd->odd_plugin->ovp_ops->ovpo_setprop(
+ odd->odd_pvoid, oip->oip_name,
+ oip->oip_value, oip->oip_size);
+ overlay_hold_rele(odd);
+ mac_perim_exit(mph);
+ return (ret);
+ }
+ }
+
+ propid = i;
+ } else if (oip->oip_id >= OVERLAY_DEV_NPROPS) {
+ uint_t id = oip->oip_id - OVERLAY_DEV_NPROPS;
+
+ if (id > odd->odd_plugin->ovp_nprops) {
+ mac_perim_exit(mph);
+ overlay_hold_rele(odd);
+ return (EINVAL);
+ }
+ ret = odd->odd_plugin->ovp_ops->ovpo_setprop(odd->odd_pvoid,
+ odd->odd_plugin->ovp_props[id], oip->oip_value,
+ oip->oip_size);
+ mac_perim_exit(mph);
+ overlay_hold_rele(odd);
+ return (ret);
+ } else if (oip->oip_id < -1) {
+ mac_perim_exit(mph);
+ overlay_hold_rele(odd);
+ return (EINVAL);
+ } else {
+ ASSERT(oip->oip_id < OVERLAY_DEV_NPROPS);
+ ASSERT(oip->oip_id >= 0);
+ propid = oip->oip_id;
+ }
+
+ ret = 0;
+ switch (propid) {
+ case OVERLAY_DEV_P_MTU:
+ ret = mac_set_prop(odd->odd_mh, MAC_PROP_MTU, "mtu",
+ oip->oip_value, oip->oip_size);
+ break;
+ case OVERLAY_DEV_P_VNETID:
+ if (oip->oip_size != sizeof (uint64_t)) {
+ ret = EINVAL;
+ break;
+ }
+ vidp = (uint64_t *)oip->oip_value;
+ ASSERT(odd->odd_plugin->ovp_id_size <= 8);
+ maxid = UINT64_MAX;
+ if (odd->odd_plugin->ovp_id_size != 8)
+ maxid = (1ULL << (odd->odd_plugin->ovp_id_size * 8)) -
+ 1ULL;
+ if (*vidp >= maxid) {
+ ret = EINVAL;
+ break;
+ }
+ overlay_setprop_vnetid(odd, *vidp);
+ break;
+ case OVERLAY_DEV_P_ENCAP:
+ case OVERLAY_DEV_P_VARPDID:
+ ret = EPERM;
+ break;
+ default:
+ ret = ENOENT;
+ }
+
+ mac_perim_exit(mph);
+ overlay_hold_rele(odd);
+ return (ret);
+}
+
+/* ARGSUSED */
+static int
+overlay_i_status(void *karg, intptr_t arg, int mode, cred_t *cred,
+ int *rvalp)
+{
+ overlay_dev_t *odd;
+ overlay_ioc_status_t *os = karg;
+
+ odd = overlay_hold_by_dlid(os->ois_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ mutex_enter(&odd->odd_lock);
+ if ((odd->odd_flags & OVERLAY_F_DEGRADED) != 0) {
+ os->ois_status = OVERLAY_I_DEGRADED;
+ if (odd->odd_fmamsg != NULL) {
+ (void) strlcpy(os->ois_message, odd->odd_fmamsg,
+ OVERLAY_STATUS_BUFLEN);
+ } else {
+ os->ois_message[0] = '\0';
+ }
+
+ } else {
+ os->ois_status = OVERLAY_I_OK;
+ os->ois_message[0] = '\0';
+ }
+ mutex_exit(&odd->odd_lock);
+ overlay_hold_rele(odd);
+
+ return (0);
+}
+
+static dld_ioc_info_t overlay_ioc_list[] = {
+ { OVERLAY_IOC_CREATE, DLDCOPYIN, sizeof (overlay_ioc_create_t),
+ overlay_i_create, secpolicy_dl_config },
+ { OVERLAY_IOC_ACTIVATE, DLDCOPYIN, sizeof (overlay_ioc_activate_t),
+ overlay_i_activate, secpolicy_dl_config },
+ { OVERLAY_IOC_DELETE, DLDCOPYIN, sizeof (overlay_ioc_delete_t),
+ overlay_i_delete, secpolicy_dl_config },
+ { OVERLAY_IOC_PROPINFO, DLDCOPYIN | DLDCOPYOUT,
+ sizeof (overlay_ioc_propinfo_t), overlay_i_propinfo,
+ secpolicy_dl_config },
+ { OVERLAY_IOC_GETPROP, DLDCOPYIN | DLDCOPYOUT,
+ sizeof (overlay_ioc_prop_t), overlay_i_getprop,
+ secpolicy_dl_config },
+ { OVERLAY_IOC_SETPROP, DLDCOPYIN,
+ sizeof (overlay_ioc_prop_t), overlay_i_setprop,
+ secpolicy_dl_config },
+ { OVERLAY_IOC_NPROPS, DLDCOPYIN | DLDCOPYOUT,
+ sizeof (overlay_ioc_nprops_t), overlay_i_nprops,
+ secpolicy_dl_config },
+ { OVERLAY_IOC_STATUS, DLDCOPYIN | DLDCOPYOUT,
+ sizeof (overlay_ioc_status_t), overlay_i_status,
+ NULL }
+};
+
+static int
+overlay_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int fmcap = DDI_FM_EREPORT_CAPABLE;
+ if (cmd != DDI_ATTACH)
+ return (DDI_FAILURE);
+
+ if (overlay_dip != NULL || ddi_get_instance(dip) != 0)
+ return (DDI_FAILURE);
+
+ ddi_fm_init(dip, &fmcap, NULL);
+
+ if (ddi_create_minor_node(dip, OVERLAY_CTL, S_IFCHR,
+ ddi_get_instance(dip), DDI_PSEUDO, 0) == DDI_FAILURE)
+ return (DDI_FAILURE);
+
+ if (dld_ioc_register(OVERLAY_IOC, overlay_ioc_list,
+ DLDIOCCNT(overlay_ioc_list)) != 0) {
+ ddi_remove_minor_node(dip, OVERLAY_CTL);
+ return (DDI_FAILURE);
+ }
+
+ overlay_dip = dip;
+ return (DDI_SUCCESS);
+}
+
+/* ARGSUSED */
+static int
+overlay_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resp)
+{
+ int error;
+
+ switch (cmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ *resp = (void *)overlay_dip;
+ error = DDI_SUCCESS;
+ break;
+ case DDI_INFO_DEVT2INSTANCE:
+ *resp = (void *)0;
+ error = DDI_SUCCESS;
+ break;
+ default:
+ error = DDI_FAILURE;
+ break;
+ }
+
+ return (error);
+}
+
+static int
+overlay_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ if (cmd != DDI_DETACH)
+ return (DDI_FAILURE);
+
+ mutex_enter(&overlay_dev_lock);
+ if (!list_is_empty(&overlay_dev_list) || overlay_target_busy()) {
+ mutex_exit(&overlay_dev_lock);
+ return (EBUSY);
+ }
+ mutex_exit(&overlay_dev_lock);
+
+
+ dld_ioc_unregister(OVERLAY_IOC);
+ ddi_remove_minor_node(dip, OVERLAY_CTL);
+ ddi_fm_fini(dip);
+ overlay_dip = NULL;
+ return (DDI_SUCCESS);
+}
+
+static struct cb_ops overlay_cbops = {
+ overlay_target_open, /* cb_open */
+ overlay_target_close, /* cb_close */
+ nodev, /* cb_strategy */
+ nodev, /* cb_print */
+ nodev, /* cb_dump */
+ nodev, /* cb_read */
+ nodev, /* cb_write */
+ overlay_target_ioctl, /* cb_ioctl */
+ nodev, /* cb_devmap */
+ nodev, /* cb_mmap */
+ nodev, /* cb_segmap */
+ nochpoll, /* cb_chpoll */
+ ddi_prop_op, /* cb_prop_op */
+ NULL, /* cb_stream */
+ D_MP, /* cb_flag */
+ CB_REV, /* cb_rev */
+ nodev, /* cb_aread */
+ nodev, /* cb_awrite */
+};
+
+static struct dev_ops overlay_dev_ops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* devo_refcnt */
+ overlay_getinfo, /* devo_getinfo */
+ nulldev, /* devo_identify */
+ nulldev, /* devo_probe */
+ overlay_attach, /* devo_attach */
+ overlay_detach, /* devo_detach */
+ nulldev, /* devo_reset */
+ &overlay_cbops, /* devo_cb_ops */
+ NULL, /* devo_bus_ops */
+ NULL, /* devo_power */
+ ddi_quiesce_not_supported /* devo_quiesce */
+};
+
+static struct modldrv overlay_modldrv = {
+ &mod_driverops,
+ "Overlay Network Driver",
+ &overlay_dev_ops
+};
+
+static struct modlinkage overlay_linkage = {
+ MODREV_1,
+ &overlay_modldrv
+};
+
+static int
+overlay_init(void)
+{
+ mutex_init(&overlay_dev_lock, NULL, MUTEX_DRIVER, NULL);
+ list_create(&overlay_dev_list, sizeof (overlay_dev_t),
+ offsetof(overlay_dev_t, odd_link));
+ overlay_mux_init();
+ overlay_plugin_init();
+ overlay_target_init();
+
+ return (DDI_SUCCESS);
+}
+
+static void
+overlay_fini(void)
+{
+ overlay_target_fini();
+ overlay_plugin_fini();
+ overlay_mux_fini();
+ mutex_destroy(&overlay_dev_lock);
+ list_destroy(&overlay_dev_list);
+}
+
+int
+_init(void)
+{
+ int err;
+
+ if ((err = overlay_init()) != DDI_SUCCESS)
+ return (err);
+
+ mac_init_ops(NULL, "overlay");
+ err = mod_install(&overlay_linkage);
+ if (err != DDI_SUCCESS) {
+ overlay_fini();
+ return (err);
+ }
+
+ return (0);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&overlay_linkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int err;
+
+ err = mod_remove(&overlay_linkage);
+ if (err != 0)
+ return (err);
+
+ overlay_fini();
+ return (0);
+}
diff --git a/usr/src/uts/common/io/overlay/overlay.conf b/usr/src/uts/common/io/overlay/overlay.conf
new file mode 100644
index 0000000000..4b62fafd94
--- /dev/null
+++ b/usr/src/uts/common/io/overlay/overlay.conf
@@ -0,0 +1,16 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015, Joyent, Inc.
+#
+
+name="overlay" parent="pseudo" instance=0;
diff --git a/usr/src/uts/common/io/overlay/overlay.mapfile b/usr/src/uts/common/io/overlay/overlay.mapfile
new file mode 100644
index 0000000000..800d72dc2b
--- /dev/null
+++ b/usr/src/uts/common/io/overlay/overlay.mapfile
@@ -0,0 +1,46 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object versioning must comply with the rules detailed in
+#
+# usr/src/lib/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_VERSION ILLUMOSprivate {
+ global:
+ # DDI Interfaces
+ _fini;
+ _init;
+ _info;
+
+ # Encapsualation Plugin interfaces
+ overlay_plugin_alloc;
+ overlay_plugin_free;
+ overlay_plugin_register;
+ overlay_plugin_unregister;
+ local:
+ *;
+};
diff --git a/usr/src/uts/common/io/overlay/overlay_fm.c b/usr/src/uts/common/io/overlay/overlay_fm.c
new file mode 100644
index 0000000000..0701d08e8b
--- /dev/null
+++ b/usr/src/uts/common/io/overlay/overlay_fm.c
@@ -0,0 +1,82 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+/*
+ * Overlay device FMA operations.
+ *
+ * For more information, see the big theory statement in
+ * uts/common/io/overlay/overlay.c
+ */
+
+#include <sys/ddifm.h>
+#include <sys/overlay_impl.h>
+
+kmutex_t overlay_fm_lock;
+uint_t overlay_fm_count;
+
+void
+overlay_fm_init(void)
+{
+ overlay_fm_count = 0;
+ mutex_init(&overlay_fm_lock, NULL, MUTEX_DRIVER, NULL);
+}
+
+void
+overlay_fm_fini(void)
+{
+ VERIFY(overlay_fm_count == 0);
+ mutex_destroy(&overlay_fm_lock);
+}
+
+void
+overlay_fm_degrade(overlay_dev_t *odd, const char *msg)
+{
+ mutex_enter(&overlay_fm_lock);
+ mutex_enter(&odd->odd_lock);
+
+ if (msg != NULL)
+ (void) strlcpy(odd->odd_fmamsg, msg, OVERLAY_STATUS_BUFLEN);
+
+ if (odd->odd_flags & OVERLAY_F_DEGRADED)
+ goto out;
+
+ odd->odd_flags |= OVERLAY_F_DEGRADED;
+ overlay_fm_count++;
+ if (overlay_fm_count == 1) {
+ ddi_fm_service_impact(overlay_dip, DDI_SERVICE_DEGRADED);
+ }
+out:
+ mutex_exit(&odd->odd_lock);
+ mutex_exit(&overlay_fm_lock);
+}
+
+void
+overlay_fm_restore(overlay_dev_t *odd)
+{
+ mutex_enter(&overlay_fm_lock);
+ mutex_enter(&odd->odd_lock);
+ if (!(odd->odd_flags & OVERLAY_F_DEGRADED))
+ goto out;
+
+ odd->odd_fmamsg[0] = '\0';
+ odd->odd_flags &= ~OVERLAY_F_DEGRADED;
+ overlay_fm_count--;
+ if (overlay_fm_count == 0) {
+ ddi_fm_service_impact(overlay_dip, DDI_SERVICE_RESTORED);
+ }
+out:
+ mutex_exit(&odd->odd_lock);
+ mutex_exit(&overlay_fm_lock);
+}
diff --git a/usr/src/uts/common/io/overlay/overlay_mux.c b/usr/src/uts/common/io/overlay/overlay_mux.c
new file mode 100644
index 0000000000..9f70e8c83e
--- /dev/null
+++ b/usr/src/uts/common/io/overlay/overlay_mux.c
@@ -0,0 +1,354 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * Overlay device ksocket multiplexer.
+ *
+ * For more information, see the big theory statement in
+ * uts/common/io/overlay/overlay.c
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/ksynch.h>
+#include <sys/ksocket.h>
+#include <sys/avl.h>
+#include <sys/list.h>
+#include <sys/sysmacros.h>
+#include <sys/strsubr.h>
+#include <sys/strsun.h>
+#include <sys/tihdr.h>
+
+#include <sys/overlay_impl.h>
+
+#include <sys/sdt.h>
+
+#define OVERLAY_FREEMSG(mp, reason) \
+ DTRACE_PROBE2(overlay__fremsg, mblk_t *, mp, char *, reason)
+
+static list_t overlay_mux_list;
+static kmutex_t overlay_mux_lock;
+
+void
+overlay_mux_init(void)
+{
+ list_create(&overlay_mux_list, sizeof (overlay_mux_t),
+ offsetof(overlay_mux_t, omux_lnode));
+ mutex_init(&overlay_mux_lock, NULL, MUTEX_DRIVER, NULL);
+}
+
+void
+overlay_mux_fini(void)
+{
+ mutex_destroy(&overlay_mux_lock);
+ list_destroy(&overlay_mux_list);
+}
+
+static int
+overlay_mux_comparator(const void *a, const void *b)
+{
+ const overlay_dev_t *odl, *odr;
+ odl = a;
+ odr = b;
+ if (odl->odd_vid > odr->odd_vid)
+ return (1);
+ else if (odl->odd_vid < odr->odd_vid)
+ return (-1);
+ else
+ return (0);
+}
+
+/*
+ * This is the central receive data path. We need to decode the packet, if we
+ * can, and then deliver it to the appropriate overlay.
+ */
+/* ARGSUSED */
+static boolean_t
+overlay_mux_recv(ksocket_t ks, mblk_t *mpchain, size_t msgsize, int oob,
+ void *arg)
+{
+ mblk_t *mp, *nmp, *fmp;
+ overlay_mux_t *mux = arg;
+
+ /*
+ * We may have a received a chain of messages. Each messsage in the
+ * chain will likely have a T_unitdata_ind attached to it as an M_PROTO.
+ * If we aren't getting that, we should probably drop that for the
+ * moment.
+ */
+ for (mp = mpchain; mp != NULL; mp = nmp) {
+ struct T_unitdata_ind *tudi;
+ ovep_encap_info_t infop;
+ overlay_dev_t od, *odd;
+ int ret;
+
+ nmp = mp->b_next;
+ mp->b_next = NULL;
+
+ if (DB_TYPE(mp) != M_PROTO) {
+ OVERLAY_FREEMSG(mp, "first one isn't M_PROTO");
+ freemsg(mp);
+ continue;
+ }
+
+ if (mp->b_cont == NULL) {
+ OVERLAY_FREEMSG(mp, "missing a b_cont");
+ freemsg(mp);
+ continue;
+ }
+
+ tudi = (struct T_unitdata_ind *)mp->b_rptr;
+ if (tudi->PRIM_type != T_UNITDATA_IND) {
+ OVERLAY_FREEMSG(mp, "Not a T_unitdata_ind *");
+ freemsg(mp);
+ continue;
+ }
+
+ /*
+ * In the future, we'll care about the source information
+ * for purposes of telling varpd for oob invalidation. But for
+ * now, just drop that block.
+ */
+ fmp = mp;
+ mp = fmp->b_cont;
+ fmp->b_cont = NULL;
+ freemsg(fmp);
+
+ /*
+ * Decap and deliver.
+ */
+ bzero(&infop, sizeof (ovep_encap_info_t));
+ ret = mux->omux_plugin->ovp_ops->ovpo_decap(NULL, mp, &infop);
+ if (ret != 0) {
+ OVERLAY_FREEMSG(mp, "decap failed");
+ freemsg(mp);
+ continue;
+ }
+ if (MBLKL(mp) > infop.ovdi_hdr_size) {
+ mp->b_rptr += infop.ovdi_hdr_size;
+ } else {
+ while (infop.ovdi_hdr_size != 0) {
+ size_t rem, blkl;
+
+ if (mp == NULL)
+ break;
+
+ blkl = MBLKL(mp);
+ rem = MIN(infop.ovdi_hdr_size, blkl);
+ infop.ovdi_hdr_size -= rem;
+ mp->b_rptr += rem;
+ if (rem == blkl) {
+ fmp = mp;
+ mp = fmp->b_cont;
+ fmp->b_cont = NULL;
+ OVERLAY_FREEMSG(mp,
+ "freed a fmp block");
+ freemsg(fmp);
+ }
+ }
+ if (mp == NULL) {
+ OVERLAY_FREEMSG(mp, "freed it all...");
+ continue;
+ }
+ }
+
+
+ od.odd_vid = infop.ovdi_id;
+ mutex_enter(&mux->omux_lock);
+ odd = avl_find(&mux->omux_devices, &od, NULL);
+ if (odd == NULL) {
+ mutex_exit(&mux->omux_lock);
+ OVERLAY_FREEMSG(mp, "no matching vid");
+ freemsg(mp);
+ continue;
+ }
+ mutex_enter(&odd->odd_lock);
+ if ((odd->odd_flags & OVERLAY_F_MDDROP) ||
+ !(odd->odd_flags & OVERLAY_F_IN_MUX)) {
+ mutex_exit(&odd->odd_lock);
+ mutex_exit(&mux->omux_lock);
+ OVERLAY_FREEMSG(mp, "dev dropped");
+ freemsg(mp);
+ continue;
+ }
+ overlay_io_start(odd, OVERLAY_F_IN_RX);
+ mutex_exit(&odd->odd_lock);
+ mutex_exit(&mux->omux_lock);
+
+ mac_rx(odd->odd_mh, NULL, mp);
+
+ mutex_enter(&odd->odd_lock);
+ overlay_io_done(odd, OVERLAY_F_IN_RX);
+ mutex_exit(&odd->odd_lock);
+ }
+
+ return (B_TRUE);
+}
+
+/*
+ * Register a given device with a socket backend. If no such device socket
+ * exists, create a new one.
+ */
+overlay_mux_t *
+overlay_mux_open(overlay_plugin_t *opp, int domain, int family, int protocol,
+ struct sockaddr *addr, socklen_t len, int *errp)
+{
+ int err;
+ overlay_mux_t *mux;
+ ksocket_t ksock;
+
+ if (errp == NULL)
+ errp = &err;
+
+ mutex_enter(&overlay_mux_lock);
+ for (mux = list_head(&overlay_mux_list); mux != NULL;
+ mux = list_next(&overlay_mux_list, mux)) {
+ if (domain == mux->omux_domain &&
+ family == mux->omux_family &&
+ protocol == mux->omux_protocol &&
+ len == mux->omux_alen &&
+ bcmp(addr, mux->omux_addr, len) == 0) {
+
+ if (opp != mux->omux_plugin) {
+ *errp = EEXIST;
+ return (NULL);
+ }
+
+ mutex_enter(&mux->omux_lock);
+ mux->omux_count++;
+ mutex_exit(&mux->omux_lock);
+ mutex_exit(&overlay_mux_lock);
+ *errp = 0;
+ return (mux);
+ }
+ }
+
+ /*
+ * Today we aren't zone-aware and only exist in the global zone. When we
+ * allow for things to exist in the non-global zone, we'll want to use a
+ * credential that's actually specific to the zone.
+ */
+ *errp = ksocket_socket(&ksock, domain, family, protocol, KSOCKET_SLEEP,
+ kcred);
+ if (*errp != 0) {
+ mutex_exit(&overlay_mux_lock);
+ return (NULL);
+ }
+
+ *errp = ksocket_bind(ksock, addr, len, kcred);
+ if (*errp != 0) {
+ mutex_exit(&overlay_mux_lock);
+ ksocket_close(ksock, kcred);
+ return (NULL);
+ }
+
+ /*
+ * Ask our lower layer to optionally toggle anything they need on this
+ * socket. Because a socket is owned by a single type of plugin, we can
+ * then ask it to perform any additional socket set up it'd like to do.
+ */
+ if (opp->ovp_ops->ovpo_sockopt != NULL &&
+ (*errp = opp->ovp_ops->ovpo_sockopt(ksock)) != 0) {
+ mutex_exit(&overlay_mux_lock);
+ ksocket_close(ksock, kcred);
+ return (NULL);
+ }
+
+ mux = kmem_alloc(sizeof (overlay_mux_t), KM_SLEEP);
+ list_link_init(&mux->omux_lnode);
+ mux->omux_ksock = ksock;
+ mux->omux_plugin = opp;
+ mux->omux_domain = domain;
+ mux->omux_family = family;
+ mux->omux_protocol = protocol;
+ mux->omux_addr = kmem_alloc(len, KM_SLEEP);
+ bcopy(addr, mux->omux_addr, len);
+ mux->omux_alen = len;
+ mux->omux_count = 1;
+ avl_create(&mux->omux_devices, overlay_mux_comparator,
+ sizeof (overlay_dev_t), offsetof(overlay_dev_t, odd_muxnode));
+ mutex_init(&mux->omux_lock, NULL, MUTEX_DRIVER, NULL);
+
+
+ /* Once this is called, we need to expect to rx data */
+ *errp = ksocket_krecv_set(ksock, overlay_mux_recv, mux);
+ if (*errp != 0) {
+ ksocket_close(ksock, kcred);
+ mutex_destroy(&mux->omux_lock);
+ avl_destroy(&mux->omux_devices);
+ kmem_free(mux->omux_addr, len);
+ kmem_free(mux, sizeof (overlay_mux_t));
+ return (NULL);
+ }
+
+ list_insert_tail(&overlay_mux_list, mux);
+ mutex_exit(&overlay_mux_lock);
+
+ *errp = 0;
+ return (mux);
+}
+
+void
+overlay_mux_close(overlay_mux_t *mux)
+{
+ mutex_enter(&overlay_mux_lock);
+ mutex_enter(&mux->omux_lock);
+ mux->omux_count--;
+ if (mux->omux_count != 0) {
+ mutex_exit(&mux->omux_lock);
+ mutex_exit(&overlay_mux_lock);
+ return;
+ }
+ list_remove(&overlay_mux_list, mux);
+ mutex_exit(&mux->omux_lock);
+ mutex_exit(&overlay_mux_lock);
+
+ ksocket_close(mux->omux_ksock, kcred);
+ avl_destroy(&mux->omux_devices);
+ kmem_free(mux->omux_addr, mux->omux_alen);
+ kmem_free(mux, sizeof (overlay_mux_t));
+}
+
+void
+overlay_mux_add_dev(overlay_mux_t *mux, overlay_dev_t *odd)
+{
+ mutex_enter(&mux->omux_lock);
+ avl_add(&mux->omux_devices, odd);
+ mutex_exit(&mux->omux_lock);
+}
+
+void
+overlay_mux_remove_dev(overlay_mux_t *mux, overlay_dev_t *odd)
+{
+ mutex_enter(&mux->omux_lock);
+ avl_remove(&mux->omux_devices, odd);
+ mutex_exit(&mux->omux_lock);
+}
+
+int
+overlay_mux_tx(overlay_mux_t *mux, struct msghdr *hdr, mblk_t *mp)
+{
+ int ret;
+
+ /*
+ * It'd be nice to be able to use MSG_MBLK_QUICKRELE, unfortunately,
+ * that isn't actually supported by UDP at this time.
+ */
+ ret = ksocket_sendmblk(mux->omux_ksock, hdr, 0, &mp, kcred);
+ if (ret != 0)
+ freemsg(mp);
+
+ return (ret);
+}
diff --git a/usr/src/uts/common/io/overlay/overlay_plugin.c b/usr/src/uts/common/io/overlay/overlay_plugin.c
new file mode 100644
index 0000000000..348ddb92a2
--- /dev/null
+++ b/usr/src/uts/common/io/overlay/overlay_plugin.c
@@ -0,0 +1,281 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * Overlay device encapsulation plugin management
+ *
+ * For more information, see the big theory statement in
+ * uts/common/io/overlay/overlay.c
+ */
+
+#include <sys/types.h>
+#include <sys/kmem.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/errno.h>
+#include <sys/sysmacros.h>
+#include <sys/modctl.h>
+
+#include <sys/overlay_impl.h>
+
+static kmem_cache_t *overlay_plugin_cache;
+static kmutex_t overlay_plugin_lock;
+static list_t overlay_plugin_list;
+
+#define OVERLAY_MODDIR "overlay"
+
+/* ARGSUSED */
+static int
+overlay_plugin_cache_constructor(void *buf, void *arg, int kmflags)
+{
+ overlay_plugin_t *opp = buf;
+
+ mutex_init(&opp->ovp_mutex, NULL, MUTEX_DRIVER, NULL);
+ list_link_init(&opp->ovp_link);
+
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+overlay_plugin_cache_destructor(void *buf, void *arg)
+{
+ overlay_plugin_t *opp = buf;
+ ASSERT(list_link_active(&opp->ovp_link) == 0);
+ mutex_destroy(&opp->ovp_mutex);
+}
+
+void
+overlay_plugin_init(void)
+{
+ mutex_init(&overlay_plugin_lock, NULL, MUTEX_DRIVER, 0);
+
+ /*
+ * In the future we may want to have a reaper to unload unused modules
+ * to help the kernel be able to reclaim memory.
+ */
+ overlay_plugin_cache = kmem_cache_create("overlay_plugin_cache",
+ sizeof (overlay_plugin_t), 0, overlay_plugin_cache_constructor,
+ overlay_plugin_cache_destructor, NULL, NULL, NULL, 0);
+ list_create(&overlay_plugin_list, sizeof (overlay_plugin_t),
+ offsetof(overlay_plugin_t, ovp_link));
+}
+
+void
+overlay_plugin_fini(void)
+{
+ mutex_enter(&overlay_plugin_lock);
+ VERIFY(list_is_empty(&overlay_plugin_list));
+ mutex_exit(&overlay_plugin_lock);
+
+ list_destroy(&overlay_plugin_list);
+ kmem_cache_destroy(overlay_plugin_cache);
+ mutex_destroy(&overlay_plugin_lock);
+}
+
+overlay_plugin_register_t *
+overlay_plugin_alloc(uint_t version)
+{
+ overlay_plugin_register_t *ovrp;
+ /* Version 1 is the only one that exists */
+ if (version != OVEP_VERSION_ONE)
+ return (NULL);
+
+ ovrp = kmem_zalloc(sizeof (overlay_plugin_register_t), KM_SLEEP);
+ ovrp->ovep_version = version;
+ return (ovrp);
+}
+
+void
+overlay_plugin_free(overlay_plugin_register_t *ovrp)
+{
+ kmem_free(ovrp, sizeof (overlay_plugin_register_t));
+}
+
+int
+overlay_plugin_register(overlay_plugin_register_t *ovrp)
+{
+ overlay_plugin_t *opp, *ipp;
+
+ /* Sanity check parameters of the registration */
+ if (ovrp->ovep_version != OVEP_VERSION_ONE)
+ return (EINVAL);
+
+ if (ovrp->ovep_name == NULL || ovrp->ovep_ops == NULL)
+ return (EINVAL);
+
+ if ((ovrp->ovep_flags & ~(OVEP_F_VLAN_TAG)) != 0)
+ return (EINVAL);
+
+ if (ovrp->ovep_id_size < 1)
+ return (EINVAL);
+
+ /* Don't support anything that has an id size larger than 8 bytes */
+ if (ovrp->ovep_id_size > 8)
+ return (ENOTSUP);
+
+ if (ovrp->ovep_dest == OVERLAY_PLUGIN_D_INVALID)
+ return (EINVAL);
+
+ if ((ovrp->ovep_dest & ~OVERLAY_PLUGIN_D_MASK) != 0)
+ return (EINVAL);
+
+ if (ovrp->ovep_ops->ovpo_callbacks != 0)
+ return (EINVAL);
+ if (ovrp->ovep_ops->ovpo_init == NULL)
+ return (EINVAL);
+ if (ovrp->ovep_ops->ovpo_fini == NULL)
+ return (EINVAL);
+ if (ovrp->ovep_ops->ovpo_encap == NULL)
+ return (EINVAL);
+ if (ovrp->ovep_ops->ovpo_decap == NULL)
+ return (EINVAL);
+ if (ovrp->ovep_ops->ovpo_socket == NULL)
+ return (EINVAL);
+ if (ovrp->ovep_ops->ovpo_getprop == NULL)
+ return (EINVAL);
+ if (ovrp->ovep_ops->ovpo_setprop == NULL)
+ return (EINVAL);
+ if (ovrp->ovep_ops->ovpo_propinfo == NULL)
+ return (EINVAL);
+
+
+ opp = kmem_cache_alloc(overlay_plugin_cache, KM_SLEEP);
+ opp->ovp_active = 0;
+ opp->ovp_name = ovrp->ovep_name;
+ opp->ovp_ops = ovrp->ovep_ops;
+ opp->ovp_props = ovrp->ovep_props;
+ opp->ovp_id_size = ovrp->ovep_id_size;
+ opp->ovp_flags = ovrp->ovep_flags;
+ opp->ovp_dest = ovrp->ovep_dest;
+
+ opp->ovp_nprops = 0;
+ if (ovrp->ovep_props != NULL) {
+ while (ovrp->ovep_props[opp->ovp_nprops] != NULL) {
+ if (strlen(ovrp->ovep_props[opp->ovp_nprops]) >=
+ OVERLAY_PROP_NAMELEN) {
+ mutex_exit(&overlay_plugin_lock);
+ kmem_cache_free(overlay_plugin_cache, opp);
+ return (EINVAL);
+ }
+ opp->ovp_nprops++;
+ }
+ }
+
+ mutex_enter(&overlay_plugin_lock);
+ for (ipp = list_head(&overlay_plugin_list); ipp != NULL;
+ ipp = list_next(&overlay_plugin_list, ipp)) {
+ if (strcmp(ipp->ovp_name, opp->ovp_name) == 0) {
+ mutex_exit(&overlay_plugin_lock);
+ kmem_cache_free(overlay_plugin_cache, opp);
+ return (EEXIST);
+ }
+ }
+ list_insert_tail(&overlay_plugin_list, opp);
+ mutex_exit(&overlay_plugin_lock);
+
+ return (0);
+}
+
+int
+overlay_plugin_unregister(const char *name)
+{
+ overlay_plugin_t *opp;
+
+ mutex_enter(&overlay_plugin_lock);
+ for (opp = list_head(&overlay_plugin_list); opp != NULL;
+ opp = list_next(&overlay_plugin_list, opp)) {
+ if (strcmp(opp->ovp_name, name) == 0)
+ break;
+ }
+
+ if (opp == NULL) {
+ mutex_exit(&overlay_plugin_lock);
+ return (ENOENT);
+ }
+
+ mutex_enter(&opp->ovp_mutex);
+ if (opp->ovp_active > 0) {
+ mutex_exit(&opp->ovp_mutex);
+ mutex_exit(&overlay_plugin_lock);
+ return (EBUSY);
+ }
+ mutex_exit(&opp->ovp_mutex);
+
+ list_remove(&overlay_plugin_list, opp);
+ mutex_exit(&overlay_plugin_lock);
+
+ kmem_cache_free(overlay_plugin_cache, opp);
+ return (0);
+}
+
+overlay_plugin_t *
+overlay_plugin_lookup(const char *name)
+{
+ overlay_plugin_t *opp;
+ boolean_t trymodload = B_FALSE;
+
+ for (;;) {
+ mutex_enter(&overlay_plugin_lock);
+ for (opp = list_head(&overlay_plugin_list); opp != NULL;
+ opp = list_next(&overlay_plugin_list, opp)) {
+ if (strcmp(name, opp->ovp_name) == 0) {
+ mutex_enter(&opp->ovp_mutex);
+ opp->ovp_active++;
+ mutex_exit(&opp->ovp_mutex);
+ mutex_exit(&overlay_plugin_lock);
+ return (opp);
+ }
+ }
+ mutex_exit(&overlay_plugin_lock);
+
+ if (trymodload == B_TRUE)
+ return (NULL);
+
+ /*
+ * If we didn't find it, it may still exist, but just not have
+ * been a loaded module. In that case, we'll do one attempt to
+ * load it.
+ */
+ if (modload(OVERLAY_MODDIR, (char *)name) == -1)
+ return (NULL);
+ trymodload = B_TRUE;
+ }
+
+}
+
+void
+overlay_plugin_rele(overlay_plugin_t *opp)
+{
+ mutex_enter(&opp->ovp_mutex);
+ ASSERT(opp->ovp_active > 0);
+ opp->ovp_active--;
+ mutex_exit(&opp->ovp_mutex);
+}
+
+void
+overlay_plugin_walk(overlay_plugin_walk_f func, void *arg)
+{
+ overlay_plugin_t *opp;
+ mutex_enter(&overlay_plugin_lock);
+ for (opp = list_head(&overlay_plugin_list); opp != NULL;
+ opp = list_next(&overlay_plugin_list, opp)) {
+ if (func(opp, arg) != 0) {
+ mutex_exit(&overlay_plugin_lock);
+ return;
+ }
+ }
+ mutex_exit(&overlay_plugin_lock);
+}
diff --git a/usr/src/uts/common/io/overlay/overlay_prop.c b/usr/src/uts/common/io/overlay/overlay_prop.c
new file mode 100644
index 0000000000..ba1ea2a629
--- /dev/null
+++ b/usr/src/uts/common/io/overlay/overlay_prop.c
@@ -0,0 +1,122 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015, Joyent, Inc.
+ */
+
+/*
+ * Routines for manipulating property information structures.
+ *
+ * For more information, see the big theory statement in
+ * uts/common/io/overlay/overlay.c
+ */
+
+#include <sys/overlay_impl.h>
+
+void
+overlay_prop_init(overlay_prop_handle_t phdl)
+{
+ overlay_ioc_propinfo_t *infop = (overlay_ioc_propinfo_t *)phdl;
+ mac_propval_range_t *rangep = (mac_propval_range_t *)infop->oipi_poss;
+
+ infop->oipi_posssize = sizeof (mac_propval_range_t);
+ bzero(rangep, sizeof (mac_propval_range_t));
+}
+
+void
+overlay_prop_set_name(overlay_prop_handle_t phdl, const char *name)
+{
+ overlay_ioc_propinfo_t *infop = (overlay_ioc_propinfo_t *)phdl;
+ (void) strlcpy(infop->oipi_name, name, OVERLAY_PROP_NAMELEN);
+}
+
+void
+overlay_prop_set_prot(overlay_prop_handle_t phdl, overlay_prop_prot_t prot)
+{
+ overlay_ioc_propinfo_t *infop = (overlay_ioc_propinfo_t *)phdl;
+ infop->oipi_prot = prot;
+}
+
+void
+overlay_prop_set_type(overlay_prop_handle_t phdl, overlay_prop_type_t type)
+{
+ overlay_ioc_propinfo_t *infop = (overlay_ioc_propinfo_t *)phdl;
+ infop->oipi_type = type;
+}
+
+int
+overlay_prop_set_default(overlay_prop_handle_t phdl, void *def, ssize_t len)
+{
+ overlay_ioc_propinfo_t *infop = (overlay_ioc_propinfo_t *)phdl;
+
+ if (len > OVERLAY_PROP_SIZEMAX)
+ return (E2BIG);
+
+ if (len < 0)
+ return (EOVERFLOW);
+
+ bcopy(def, infop->oipi_default, len);
+ infop->oipi_defsize = (uint32_t)len;
+
+ return (0);
+}
+
+void
+overlay_prop_set_nodefault(overlay_prop_handle_t phdl)
+{
+ overlay_ioc_propinfo_t *infop = (overlay_ioc_propinfo_t *)phdl;
+ infop->oipi_default[0] = '\0';
+ infop->oipi_defsize = 0;
+}
+
+void
+overlay_prop_set_range_uint32(overlay_prop_handle_t phdl, uint32_t min,
+ uint32_t max)
+{
+ overlay_ioc_propinfo_t *infop = (overlay_ioc_propinfo_t *)phdl;
+ mac_propval_range_t *rangep = (mac_propval_range_t *)infop->oipi_poss;
+
+ if (rangep->mpr_count != 0 && rangep->mpr_type != MAC_PROPVAL_UINT32)
+ return;
+
+ if (infop->oipi_posssize + sizeof (mac_propval_uint32_range_t) >
+ sizeof (infop->oipi_poss))
+ return;
+
+ infop->oipi_posssize += sizeof (mac_propval_uint32_range_t);
+ rangep->mpr_count++;
+ rangep->mpr_type = MAC_PROPVAL_UINT32;
+ rangep->u.mpr_uint32[rangep->mpr_count-1].mpur_min = min;
+ rangep->u.mpr_uint32[rangep->mpr_count-1].mpur_max = max;
+}
+
+void
+overlay_prop_set_range_str(overlay_prop_handle_t phdl, const char *str)
+{
+ size_t len = strlen(str) + 1; /* Account for a null terminator */
+ overlay_ioc_propinfo_t *infop = (overlay_ioc_propinfo_t *)phdl;
+ mac_propval_range_t *rangep = (mac_propval_range_t *)infop->oipi_poss;
+ mac_propval_str_range_t *pstr = &rangep->u.mpr_str;
+
+ if (rangep->mpr_count != 0 && rangep->mpr_type != MAC_PROPVAL_STR)
+ return;
+
+ if (infop->oipi_posssize + len > sizeof (infop->oipi_poss))
+ return;
+
+ rangep->mpr_count++;
+ rangep->mpr_type = MAC_PROPVAL_STR;
+ strlcpy((char *)&pstr->mpur_data[pstr->mpur_nextbyte], str,
+ sizeof (infop->oipi_poss) - infop->oipi_posssize);
+ pstr->mpur_nextbyte += len;
+ infop->oipi_posssize += len;
+}
diff --git a/usr/src/uts/common/io/overlay/overlay_target.c b/usr/src/uts/common/io/overlay/overlay_target.c
new file mode 100644
index 0000000000..f4147b56d1
--- /dev/null
+++ b/usr/src/uts/common/io/overlay/overlay_target.c
@@ -0,0 +1,1651 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+/*
+ * Overlay device target cache management
+ *
+ * For more information, see the big theory statement in
+ * uts/common/io/overlay/overlay.c
+ */
+
+#include <sys/types.h>
+#include <sys/ethernet.h>
+#include <sys/kmem.h>
+#include <sys/policy.h>
+#include <sys/sysmacros.h>
+#include <sys/stream.h>
+#include <sys/strsun.h>
+#include <sys/strsubr.h>
+#include <sys/mac_provider.h>
+#include <sys/mac_client.h>
+#include <sys/mac_client_priv.h>
+#include <sys/vlan.h>
+#include <sys/crc32.h>
+#include <sys/cred.h>
+#include <sys/file.h>
+#include <sys/errno.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+
+#include <sys/overlay_impl.h>
+#include <sys/sdt.h>
+
+/*
+ * This is total straw man, but at least it's a prime number. Here we're
+ * going to have to go through and do a lot of evaluation and understanding as
+ * to how these target caches should grow and shrink, as well as, memory
+ * pressure and evictions. This just gives us a starting point that'll be 'good
+ * enough', until it's not.
+ */
+#define OVERLAY_HSIZE 823
+
+/*
+ * We use this data structure to keep track of what requests have been actively
+ * allocated to a given instance so we know what to put back on the pending
+ * list.
+ */
+typedef struct overlay_target_hdl {
+ minor_t oth_minor; /* RO */
+ zoneid_t oth_zoneid; /* RO */
+ int oth_oflags; /* RO */
+ list_node_t oth_link; /* overlay_target_lock */
+ kmutex_t oth_lock;
+ list_t oth_outstanding; /* oth_lock */
+} overlay_target_hdl_t;
+
+typedef int (*overlay_target_copyin_f)(const void *, void **, size_t *, int);
+typedef int (*overlay_target_ioctl_f)(overlay_target_hdl_t *, void *);
+typedef int (*overlay_target_copyout_f)(void *, void *, size_t, int);
+
+typedef struct overaly_target_ioctl {
+ int oti_cmd; /* ioctl id */
+ boolean_t oti_write; /* ioctl requires FWRITE */
+ boolean_t oti_ncopyout; /* copyout data? */
+ overlay_target_copyin_f oti_copyin; /* copyin func */
+ overlay_target_ioctl_f oti_func; /* function to call */
+ overlay_target_copyout_f oti_copyout; /* copyin func */
+ size_t oti_size; /* size of user level structure */
+} overlay_target_ioctl_t;
+
+static kmem_cache_t *overlay_target_cache;
+static kmem_cache_t *overlay_entry_cache;
+static id_space_t *overlay_thdl_idspace;
+static void *overlay_thdl_state;
+
+/*
+ * When we support overlay devices in the NGZ, then all of these need to become
+ * zone aware, by plugging into the netstack engine and becoming per-netstack
+ * data.
+ */
+static list_t overlay_thdl_list;
+static kmutex_t overlay_target_lock;
+static kcondvar_t overlay_target_condvar;
+static list_t overlay_target_list;
+static boolean_t overlay_target_excl;
+
+/*
+ * Outstanding data per hash table entry.
+ */
+static int overlay_ent_size = 128 * 1024;
+
+/* ARGSUSED */
+static int
+overlay_target_cache_constructor(void *buf, void *arg, int kmflgs)
+{
+ overlay_target_t *ott = buf;
+
+ mutex_init(&ott->ott_lock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&ott->ott_cond, NULL, CV_DRIVER, NULL);
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+overlay_target_cache_destructor(void *buf, void *arg)
+{
+ overlay_target_t *ott = buf;
+
+ cv_destroy(&ott->ott_cond);
+ mutex_destroy(&ott->ott_lock);
+}
+
+/* ARGSUSED */
+static int
+overlay_entry_cache_constructor(void *buf, void *arg, int kmflgs)
+{
+ overlay_target_entry_t *ote = buf;
+
+ bzero(ote, sizeof (overlay_target_entry_t));
+ mutex_init(&ote->ote_lock, NULL, MUTEX_DRIVER, NULL);
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+overlay_entry_cache_destructor(void *buf, void *arg)
+{
+ overlay_target_entry_t *ote = buf;
+
+ mutex_destroy(&ote->ote_lock);
+}
+
+static uint64_t
+overlay_mac_hash(const void *v)
+{
+ uint32_t crc;
+ CRC32(crc, v, ETHERADDRL, -1U, crc32_table);
+ return (crc);
+}
+
+static int
+overlay_mac_cmp(const void *a, const void *b)
+{
+ return (bcmp(a, b, ETHERADDRL));
+}
+
+/* ARGSUSED */
+static void
+overlay_target_entry_dtor(void *arg)
+{
+ overlay_target_entry_t *ote = arg;
+
+ ote->ote_flags = 0;
+ bzero(ote->ote_addr, ETHERADDRL);
+ ote->ote_ott = NULL;
+ ote->ote_odd = NULL;
+ freemsgchain(ote->ote_chead);
+ ote->ote_chead = ote->ote_ctail = NULL;
+ ote->ote_mbsize = 0;
+ ote->ote_vtime = 0;
+ kmem_cache_free(overlay_entry_cache, ote);
+}
+
+static int
+overlay_mac_avl(const void *a, const void *b)
+{
+ int i;
+ const overlay_target_entry_t *l, *r;
+ l = a;
+ r = b;
+
+ for (i = 0; i < ETHERADDRL; i++) {
+ if (l->ote_addr[i] > r->ote_addr[i])
+ return (1);
+ else if (l->ote_addr[i] < r->ote_addr[i])
+ return (-1);
+ }
+
+ return (0);
+}
+
+void
+overlay_target_init(void)
+{
+ int ret;
+ ret = ddi_soft_state_init(&overlay_thdl_state,
+ sizeof (overlay_target_hdl_t), 1);
+ VERIFY(ret == 0);
+ overlay_target_cache = kmem_cache_create("overlay_target",
+ sizeof (overlay_target_t), 0, overlay_target_cache_constructor,
+ overlay_target_cache_destructor, NULL, NULL, NULL, 0);
+ overlay_entry_cache = kmem_cache_create("overlay_entry",
+ sizeof (overlay_target_entry_t), 0, overlay_entry_cache_constructor,
+ overlay_entry_cache_destructor, NULL, NULL, NULL, 0);
+ mutex_init(&overlay_target_lock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&overlay_target_condvar, NULL, CV_DRIVER, NULL);
+ list_create(&overlay_target_list, sizeof (overlay_target_entry_t),
+ offsetof(overlay_target_entry_t, ote_qlink));
+ list_create(&overlay_thdl_list, sizeof (overlay_target_hdl_t),
+ offsetof(overlay_target_hdl_t, oth_link));
+ overlay_thdl_idspace = id_space_create("overlay_target_minors",
+ 1, INT32_MAX);
+}
+
+void
+overlay_target_fini(void)
+{
+ id_space_destroy(overlay_thdl_idspace);
+ list_destroy(&overlay_thdl_list);
+ list_destroy(&overlay_target_list);
+ cv_destroy(&overlay_target_condvar);
+ mutex_destroy(&overlay_target_lock);
+ kmem_cache_destroy(overlay_entry_cache);
+ kmem_cache_destroy(overlay_target_cache);
+ ddi_soft_state_fini(&overlay_thdl_state);
+}
+
+void
+overlay_target_free(overlay_dev_t *odd)
+{
+ if (odd->odd_target == NULL)
+ return;
+
+ if (odd->odd_target->ott_mode == OVERLAY_TARGET_DYNAMIC) {
+ refhash_t *rp = odd->odd_target->ott_u.ott_dyn.ott_dhash;
+ avl_tree_t *ap = &odd->odd_target->ott_u.ott_dyn.ott_tree;
+ overlay_target_entry_t *ote;
+
+ /*
+ * Our AVL tree and hashtable contain the same elements,
+ * therefore we should just remove it from the tree, but then
+ * delete the entries when we remove them from the hash table
+ * (which happens through the refhash dtor).
+ */
+ while ((ote = avl_first(ap)) != NULL)
+ avl_remove(ap, ote);
+
+ avl_destroy(ap);
+ for (ote = refhash_first(rp); ote != NULL;
+ ote = refhash_next(rp, ote)) {
+ refhash_remove(rp, ote);
+ }
+ refhash_destroy(rp);
+ }
+
+ ASSERT(odd->odd_target->ott_ocount == 0);
+ kmem_cache_free(overlay_target_cache, odd->odd_target);
+}
+
+int
+overlay_target_busy()
+{
+ int ret;
+
+ mutex_enter(&overlay_target_lock);
+ ret = !list_is_empty(&overlay_thdl_list);
+ mutex_exit(&overlay_target_lock);
+
+ return (ret);
+}
+
+static void
+overlay_target_queue(overlay_target_entry_t *entry)
+{
+ mutex_enter(&overlay_target_lock);
+ mutex_enter(&entry->ote_ott->ott_lock);
+ if (entry->ote_ott->ott_flags & OVERLAY_T_TEARDOWN) {
+ mutex_exit(&entry->ote_ott->ott_lock);
+ mutex_exit(&overlay_target_lock);
+ return;
+ }
+ entry->ote_ott->ott_ocount++;
+ mutex_exit(&entry->ote_ott->ott_lock);
+ list_insert_tail(&overlay_target_list, entry);
+ cv_signal(&overlay_target_condvar);
+ mutex_exit(&overlay_target_lock);
+}
+
+void
+overlay_target_quiesce(overlay_target_t *ott)
+{
+ if (ott == NULL)
+ return;
+ mutex_enter(&ott->ott_lock);
+ ott->ott_flags |= OVERLAY_T_TEARDOWN;
+ while (ott->ott_ocount != 0)
+ cv_wait(&ott->ott_cond, &ott->ott_lock);
+ mutex_exit(&ott->ott_lock);
+}
+
+/*
+ * This functions assumes that the destination mode is OVERLAY_PLUGIN_D_IP |
+ * OVERLAY_PLUGIN_D_PORT. As we don't have an implementation of anything else at
+ * this time, say for NVGRE, we drop all packets that mcuh this.
+ */
+int
+overlay_target_lookup(overlay_dev_t *odd, mblk_t *mp, struct sockaddr *sock,
+ socklen_t *slenp)
+{
+ int ret;
+ struct sockaddr_in6 *v6;
+ overlay_target_t *ott;
+ mac_header_info_t mhi;
+ overlay_target_entry_t *entry;
+
+ ASSERT(odd->odd_target != NULL);
+
+ /*
+ * At this point, the overlay device is in a mux which means that it's
+ * been activated. At this point, parts of the target, such as the mode
+ * and the destination are now read-only and we don't have to worry
+ * about synchronization for them.
+ */
+ ott = odd->odd_target;
+ if (ott->ott_dest != (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))
+ return (OVERLAY_TARGET_DROP);
+
+ v6 = (struct sockaddr_in6 *)sock;
+ bzero(v6, sizeof (struct sockaddr_in6));
+ v6->sin6_family = AF_INET6;
+
+ if (ott->ott_mode == OVERLAY_TARGET_POINT) {
+ mutex_enter(&ott->ott_lock);
+ bcopy(&ott->ott_u.ott_point.otp_ip, &v6->sin6_addr,
+ sizeof (struct in6_addr));
+ v6->sin6_port = htons(ott->ott_u.ott_point.otp_port);
+ mutex_exit(&ott->ott_lock);
+ *slenp = sizeof (struct sockaddr_in6);
+
+ return (OVERLAY_TARGET_OK);
+ }
+
+ ASSERT(ott->ott_mode == OVERLAY_TARGET_DYNAMIC);
+
+ /*
+ * Note we only want the MAC address here, therefore we won't bother
+ * using mac_vlan_header_info(). If any caller needs the vlan info at
+ * this point, this should change to a call to mac_vlan_header_info().
+ */
+ if (mac_header_info(odd->odd_mh, mp, &mhi) != 0)
+ return (OVERLAY_TARGET_DROP);
+ mutex_enter(&ott->ott_lock);
+ entry = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
+ mhi.mhi_daddr);
+ if (entry == NULL) {
+ entry = kmem_cache_alloc(overlay_entry_cache,
+ KM_NOSLEEP | KM_NORMALPRI);
+ if (entry == NULL) {
+ mutex_exit(&ott->ott_lock);
+ return (OVERLAY_TARGET_DROP);
+ }
+ bcopy(mhi.mhi_daddr, entry->ote_addr, ETHERADDRL);
+ entry->ote_chead = entry->ote_ctail = mp;
+ entry->ote_mbsize = msgsize(mp);
+ entry->ote_flags |= OVERLAY_ENTRY_F_PENDING;
+ entry->ote_ott = ott;
+ entry->ote_odd = odd;
+ refhash_insert(ott->ott_u.ott_dyn.ott_dhash, entry);
+ avl_add(&ott->ott_u.ott_dyn.ott_tree, entry);
+ mutex_exit(&ott->ott_lock);
+ overlay_target_queue(entry);
+ return (OVERLAY_TARGET_ASYNC);
+ }
+ refhash_hold(ott->ott_u.ott_dyn.ott_dhash, entry);
+ mutex_exit(&ott->ott_lock);
+
+ mutex_enter(&entry->ote_lock);
+ if (entry->ote_flags & OVERLAY_ENTRY_F_DROP) {
+ ret = OVERLAY_TARGET_DROP;
+ } else if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) {
+ bcopy(&entry->ote_dest.otp_ip, &v6->sin6_addr,
+ sizeof (struct in6_addr));
+ v6->sin6_port = htons(entry->ote_dest.otp_port);
+ *slenp = sizeof (struct sockaddr_in6);
+ ret = OVERLAY_TARGET_OK;
+ } else {
+ size_t mlen = msgsize(mp);
+
+ if (mlen + entry->ote_mbsize > overlay_ent_size) {
+ ret = OVERLAY_TARGET_DROP;
+ } else {
+ if (entry->ote_ctail != NULL) {
+ ASSERT(entry->ote_ctail->b_next ==
+ NULL);
+ entry->ote_ctail->b_next = mp;
+ entry->ote_ctail = mp;
+ } else {
+ entry->ote_chead = mp;
+ entry->ote_ctail = mp;
+ }
+ entry->ote_mbsize += mlen;
+ if ((entry->ote_flags &
+ OVERLAY_ENTRY_F_PENDING) == 0) {
+ entry->ote_flags |=
+ OVERLAY_ENTRY_F_PENDING;
+ overlay_target_queue(entry);
+ }
+ ret = OVERLAY_TARGET_ASYNC;
+ }
+ }
+ mutex_exit(&entry->ote_lock);
+
+ mutex_enter(&ott->ott_lock);
+ refhash_rele(ott->ott_u.ott_dyn.ott_dhash, entry);
+ mutex_exit(&ott->ott_lock);
+
+ return (ret);
+}
+
+/* ARGSUSED */
+static int
+overlay_target_info(overlay_target_hdl_t *thdl, void *arg)
+{
+ overlay_dev_t *odd;
+ overlay_targ_info_t *oti = arg;
+
+ odd = overlay_hold_by_dlid(oti->oti_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ mutex_enter(&odd->odd_lock);
+ oti->oti_flags = 0;
+ oti->oti_needs = odd->odd_plugin->ovp_dest;
+ if (odd->odd_flags & OVERLAY_F_DEGRADED)
+ oti->oti_flags |= OVERLAY_TARG_INFO_F_DEGRADED;
+ if (odd->odd_flags & OVERLAY_F_ACTIVATED)
+ oti->oti_flags |= OVERLAY_TARG_INFO_F_ACTIVE;
+ oti->oti_vnetid = odd->odd_vid;
+ mutex_exit(&odd->odd_lock);
+ overlay_hold_rele(odd);
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+overlay_target_associate(overlay_target_hdl_t *thdl, void *arg)
+{
+ overlay_dev_t *odd;
+ overlay_target_t *ott;
+ overlay_targ_associate_t *ota = arg;
+
+ odd = overlay_hold_by_dlid(ota->ota_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ if (ota->ota_id == 0) {
+ overlay_hold_rele(odd);
+ return (EINVAL);
+ }
+
+ if (ota->ota_mode != OVERLAY_TARGET_POINT &&
+ ota->ota_mode != OVERLAY_TARGET_DYNAMIC) {
+ overlay_hold_rele(odd);
+ return (EINVAL);
+ }
+
+ if (ota->ota_provides != odd->odd_plugin->ovp_dest) {
+ overlay_hold_rele(odd);
+ return (EINVAL);
+ }
+
+ if (ota->ota_mode == OVERLAY_TARGET_POINT) {
+ if (ota->ota_provides & OVERLAY_PLUGIN_D_IP) {
+ if (IN6_IS_ADDR_UNSPECIFIED(&ota->ota_point.otp_ip) ||
+ IN6_IS_ADDR_V4COMPAT(&ota->ota_point.otp_ip) ||
+ IN6_IS_ADDR_V4MAPPED_ANY(&ota->ota_point.otp_ip)) {
+ overlay_hold_rele(odd);
+ return (EINVAL);
+ }
+ }
+
+ if (ota->ota_provides & OVERLAY_PLUGIN_D_PORT) {
+ if (ota->ota_point.otp_port == 0) {
+ overlay_hold_rele(odd);
+ return (EINVAL);
+ }
+ }
+ }
+
+ ott = kmem_cache_alloc(overlay_target_cache, KM_SLEEP);
+ ott->ott_flags = 0;
+ ott->ott_ocount = 0;
+ ott->ott_mode = ota->ota_mode;
+ ott->ott_dest = ota->ota_provides;
+ ott->ott_id = ota->ota_id;
+
+ if (ott->ott_mode == OVERLAY_TARGET_POINT) {
+ bcopy(&ota->ota_point, &ott->ott_u.ott_point,
+ sizeof (overlay_target_point_t));
+ } else {
+ ott->ott_u.ott_dyn.ott_dhash = refhash_create(OVERLAY_HSIZE,
+ overlay_mac_hash, overlay_mac_cmp,
+ overlay_target_entry_dtor, sizeof (overlay_target_entry_t),
+ offsetof(overlay_target_entry_t, ote_reflink),
+ offsetof(overlay_target_entry_t, ote_addr), KM_SLEEP);
+ avl_create(&ott->ott_u.ott_dyn.ott_tree, overlay_mac_avl,
+ sizeof (overlay_target_entry_t),
+ offsetof(overlay_target_entry_t, ote_avllink));
+ }
+ mutex_enter(&odd->odd_lock);
+ if (odd->odd_flags & OVERLAY_F_VARPD) {
+ mutex_exit(&odd->odd_lock);
+ kmem_cache_free(overlay_target_cache, ott);
+ overlay_hold_rele(odd);
+ return (EEXIST);
+ }
+
+ odd->odd_flags |= OVERLAY_F_VARPD;
+ odd->odd_target = ott;
+ mutex_exit(&odd->odd_lock);
+
+ overlay_hold_rele(odd);
+
+
+ return (0);
+}
+
+
+/* ARGSUSED */
+static int
+overlay_target_degrade(overlay_target_hdl_t *thdl, void *arg)
+{
+ overlay_dev_t *odd;
+ overlay_targ_degrade_t *otd = arg;
+
+ odd = overlay_hold_by_dlid(otd->otd_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ overlay_fm_degrade(odd, otd->otd_buf);
+ overlay_hold_rele(odd);
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+overlay_target_restore(overlay_target_hdl_t *thdl, void *arg)
+{
+ overlay_dev_t *odd;
+ overlay_targ_id_t *otid = arg;
+
+ odd = overlay_hold_by_dlid(otid->otid_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ overlay_fm_restore(odd);
+ overlay_hold_rele(odd);
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+overlay_target_disassociate(overlay_target_hdl_t *thdl, void *arg)
+{
+ overlay_dev_t *odd;
+ overlay_targ_id_t *otid = arg;
+
+ odd = overlay_hold_by_dlid(otid->otid_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ mutex_enter(&odd->odd_lock);
+ odd->odd_flags &= ~OVERLAY_F_VARPD;
+ mutex_exit(&odd->odd_lock);
+
+ overlay_hold_rele(odd);
+ return (0);
+
+}
+
+static int
+overlay_target_lookup_request(overlay_target_hdl_t *thdl, void *arg)
+{
+ overlay_targ_lookup_t *otl = arg;
+ overlay_target_entry_t *entry;
+ clock_t ret, timeout;
+ mac_header_info_t mhi;
+
+ timeout = ddi_get_lbolt() + drv_usectohz(MICROSEC);
+again:
+ mutex_enter(&overlay_target_lock);
+ while (list_is_empty(&overlay_target_list)) {
+ ret = cv_timedwait(&overlay_target_condvar,
+ &overlay_target_lock, timeout);
+ if (ret == -1) {
+ mutex_exit(&overlay_target_lock);
+ return (ETIME);
+ }
+ }
+ entry = list_remove_head(&overlay_target_list);
+ mutex_exit(&overlay_target_lock);
+ mutex_enter(&entry->ote_lock);
+ if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) {
+ ASSERT(entry->ote_chead == NULL);
+ mutex_exit(&entry->ote_lock);
+ goto again;
+ }
+ ASSERT(entry->ote_chead != NULL);
+
+ /*
+ * If we have a bogon that doesn't have a valid mac header, drop it and
+ * try again.
+ */
+ if (mac_vlan_header_info(entry->ote_odd->odd_mh, entry->ote_chead,
+ &mhi) != 0) {
+ boolean_t queue = B_FALSE;
+ mblk_t *mp = entry->ote_chead;
+ entry->ote_chead = mp->b_next;
+ mp->b_next = NULL;
+ if (entry->ote_ctail == mp)
+ entry->ote_ctail = entry->ote_chead;
+ entry->ote_mbsize -= msgsize(mp);
+ if (entry->ote_chead != NULL)
+ queue = B_TRUE;
+ mutex_exit(&entry->ote_lock);
+ if (queue == B_TRUE)
+ overlay_target_queue(entry);
+ freemsg(mp);
+ goto again;
+ }
+
+ otl->otl_dlid = entry->ote_odd->odd_linkid;
+ otl->otl_reqid = (uintptr_t)entry;
+ otl->otl_varpdid = entry->ote_ott->ott_id;
+ otl->otl_vnetid = entry->ote_odd->odd_vid;
+
+ otl->otl_hdrsize = mhi.mhi_hdrsize;
+ otl->otl_pktsize = msgsize(entry->ote_chead) - otl->otl_hdrsize;
+ bcopy(mhi.mhi_daddr, otl->otl_dstaddr, ETHERADDRL);
+ bcopy(mhi.mhi_saddr, otl->otl_srcaddr, ETHERADDRL);
+ otl->otl_dsttype = mhi.mhi_dsttype;
+ otl->otl_sap = mhi.mhi_bindsap;
+ otl->otl_vlan = VLAN_ID(mhi.mhi_tci);
+ mutex_exit(&entry->ote_lock);
+
+ mutex_enter(&thdl->oth_lock);
+ list_insert_tail(&thdl->oth_outstanding, entry);
+ mutex_exit(&thdl->oth_lock);
+
+ return (0);
+}
+
+static int
+overlay_target_lookup_respond(overlay_target_hdl_t *thdl, void *arg)
+{
+ const overlay_targ_resp_t *otr = arg;
+ overlay_target_entry_t *entry;
+ mblk_t *mp;
+
+ mutex_enter(&thdl->oth_lock);
+ for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
+ entry = list_next(&thdl->oth_outstanding, entry)) {
+ if ((uintptr_t)entry == otr->otr_reqid)
+ break;
+ }
+
+ if (entry == NULL) {
+ mutex_exit(&thdl->oth_lock);
+ return (EINVAL);
+ }
+ list_remove(&thdl->oth_outstanding, entry);
+ mutex_exit(&thdl->oth_lock);
+
+ mutex_enter(&entry->ote_lock);
+ bcopy(&otr->otr_answer, &entry->ote_dest,
+ sizeof (overlay_target_point_t));
+ entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING;
+ entry->ote_flags |= OVERLAY_ENTRY_F_VALID;
+ mp = entry->ote_chead;
+ entry->ote_chead = NULL;
+ entry->ote_ctail = NULL;
+ entry->ote_mbsize = 0;
+ entry->ote_vtime = gethrtime();
+ mutex_exit(&entry->ote_lock);
+
+ /*
+ * For now do an in-situ drain.
+ */
+ mp = overlay_m_tx(entry->ote_odd, mp);
+ freemsgchain(mp);
+
+ mutex_enter(&entry->ote_ott->ott_lock);
+ entry->ote_ott->ott_ocount--;
+ cv_signal(&entry->ote_ott->ott_cond);
+ mutex_exit(&entry->ote_ott->ott_lock);
+
+ return (0);
+}
+
+static int
+overlay_target_lookup_drop(overlay_target_hdl_t *thdl, void *arg)
+{
+ const overlay_targ_resp_t *otr = arg;
+ overlay_target_entry_t *entry;
+ mblk_t *mp;
+ boolean_t queue = B_FALSE;
+
+ mutex_enter(&thdl->oth_lock);
+ for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
+ entry = list_next(&thdl->oth_outstanding, entry)) {
+ if ((uintptr_t)entry == otr->otr_reqid)
+ break;
+ }
+
+ if (entry == NULL) {
+ mutex_exit(&thdl->oth_lock);
+ return (EINVAL);
+ }
+ list_remove(&thdl->oth_outstanding, entry);
+ mutex_exit(&thdl->oth_lock);
+
+ mutex_enter(&entry->ote_lock);
+
+ /* Safeguard against a confused varpd */
+ if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) {
+ entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING;
+ DTRACE_PROBE1(overlay__target__valid__drop,
+ overlay_target_entry_t *, entry);
+ mutex_exit(&entry->ote_lock);
+ goto done;
+ }
+
+ mp = entry->ote_chead;
+ if (mp != NULL) {
+ entry->ote_chead = mp->b_next;
+ mp->b_next = NULL;
+ if (entry->ote_ctail == mp)
+ entry->ote_ctail = entry->ote_chead;
+ entry->ote_mbsize -= msgsize(mp);
+ }
+ if (entry->ote_chead != NULL) {
+ queue = B_TRUE;
+ entry->ote_flags |= OVERLAY_ENTRY_F_PENDING;
+ } else {
+ entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING;
+ }
+ mutex_exit(&entry->ote_lock);
+
+ if (queue == B_TRUE)
+ overlay_target_queue(entry);
+ freemsg(mp);
+
+done:
+ mutex_enter(&entry->ote_ott->ott_lock);
+ entry->ote_ott->ott_ocount--;
+ cv_signal(&entry->ote_ott->ott_cond);
+ mutex_exit(&entry->ote_ott->ott_lock);
+
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+overlay_target_pkt_copyin(const void *ubuf, void **outp, size_t *bsize,
+ int flags)
+{
+ overlay_targ_pkt_t *pkt;
+ overlay_targ_pkt32_t *pkt32;
+
+ pkt = kmem_alloc(sizeof (overlay_targ_pkt_t), KM_SLEEP);
+ *outp = pkt;
+ *bsize = sizeof (overlay_targ_pkt_t);
+ if (ddi_model_convert_from(flags & FMODELS) == DDI_MODEL_ILP32) {
+ uintptr_t addr;
+
+ if (ddi_copyin(ubuf, pkt, sizeof (overlay_targ_pkt32_t),
+ flags & FKIOCTL) != 0) {
+ kmem_free(pkt, *bsize);
+ return (EFAULT);
+ }
+ pkt32 = (overlay_targ_pkt32_t *)pkt;
+ addr = pkt32->otp_buf;
+ pkt->otp_buf = (void *)addr;
+ } else {
+ if (ddi_copyin(ubuf, pkt, *bsize, flags & FKIOCTL) != 0) {
+ kmem_free(pkt, *bsize);
+ return (EFAULT);
+ }
+ }
+ return (0);
+}
+
+static int
+overlay_target_pkt_copyout(void *ubuf, void *buf, size_t bufsize,
+ int flags)
+{
+ if (ddi_model_convert_from(flags & FMODELS) == DDI_MODEL_ILP32) {
+ overlay_targ_pkt_t *pkt = buf;
+ overlay_targ_pkt32_t *pkt32 = buf;
+ uintptr_t addr = (uintptr_t)pkt->otp_buf;
+ pkt32->otp_buf = (caddr32_t)addr;
+ if (ddi_copyout(buf, ubuf, sizeof (overlay_targ_pkt32_t),
+ flags & FKIOCTL) != 0)
+ return (EFAULT);
+ } else {
+ if (ddi_copyout(buf, ubuf, bufsize, flags & FKIOCTL) != 0)
+ return (EFAULT);
+ }
+ return (0);
+}
+
+static int
+overlay_target_packet(overlay_target_hdl_t *thdl, void *arg)
+{
+ overlay_targ_pkt_t *pkt = arg;
+ overlay_target_entry_t *entry;
+ mblk_t *mp;
+ size_t mlen;
+ size_t boff;
+
+ mutex_enter(&thdl->oth_lock);
+ for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
+ entry = list_next(&thdl->oth_outstanding, entry)) {
+ if ((uintptr_t)entry == pkt->otp_reqid)
+ break;
+ }
+
+ if (entry == NULL) {
+ mutex_exit(&thdl->oth_lock);
+ return (EINVAL);
+ }
+ mutex_enter(&entry->ote_lock);
+ mutex_exit(&thdl->oth_lock);
+ mp = entry->ote_chead;
+ /* Protect against a rogue varpd */
+ if (mp == NULL) {
+ mutex_exit(&entry->ote_lock);
+ return (EINVAL);
+ }
+ mlen = MIN(msgsize(mp), pkt->otp_size);
+ pkt->otp_size = mlen;
+ boff = 0;
+ while (mlen > 0) {
+ size_t wlen = MIN(MBLKL(mp), mlen);
+ if (ddi_copyout(mp->b_rptr,
+ (void *)((uintptr_t)pkt->otp_buf + boff),
+ wlen, 0) != 0) {
+ mutex_exit(&entry->ote_lock);
+ return (EFAULT);
+ }
+ mlen -= wlen;
+ boff += wlen;
+ mp = mp->b_cont;
+ }
+ mutex_exit(&entry->ote_lock);
+ return (0);
+}
+
+static int
+overlay_target_inject(overlay_target_hdl_t *thdl, void *arg)
+{
+ overlay_targ_pkt_t *pkt = arg;
+ overlay_target_entry_t *entry;
+ overlay_dev_t *odd;
+ mblk_t *mp;
+
+ if (pkt->otp_size > ETHERMAX + VLAN_TAGSZ)
+ return (EINVAL);
+
+ mp = allocb(pkt->otp_size, 0);
+ if (mp == NULL)
+ return (ENOMEM);
+
+ if (ddi_copyin(pkt->otp_buf, mp->b_rptr, pkt->otp_size, 0) != 0) {
+ freeb(mp);
+ return (EFAULT);
+ }
+ mp->b_wptr += pkt->otp_size;
+
+ if (pkt->otp_linkid != UINT64_MAX) {
+ odd = overlay_hold_by_dlid(pkt->otp_linkid);
+ if (odd == NULL) {
+ freeb(mp);
+ return (ENOENT);
+ }
+ } else {
+ mutex_enter(&thdl->oth_lock);
+ for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
+ entry = list_next(&thdl->oth_outstanding, entry)) {
+ if ((uintptr_t)entry == pkt->otp_reqid)
+ break;
+ }
+
+ if (entry == NULL) {
+ mutex_exit(&thdl->oth_lock);
+ freeb(mp);
+ return (ENOENT);
+ }
+ odd = entry->ote_odd;
+ mutex_exit(&thdl->oth_lock);
+ }
+
+ mutex_enter(&odd->odd_lock);
+ overlay_io_start(odd, OVERLAY_F_IN_RX);
+ mutex_exit(&odd->odd_lock);
+
+ mac_rx(odd->odd_mh, NULL, mp);
+
+ mutex_enter(&odd->odd_lock);
+ overlay_io_done(odd, OVERLAY_F_IN_RX);
+ mutex_exit(&odd->odd_lock);
+
+ return (0);
+}
+
+static int
+overlay_target_resend(overlay_target_hdl_t *thdl, void *arg)
+{
+ overlay_targ_pkt_t *pkt = arg;
+ overlay_target_entry_t *entry;
+ overlay_dev_t *odd;
+ mblk_t *mp;
+
+ if (pkt->otp_size > ETHERMAX + VLAN_TAGSZ)
+ return (EINVAL);
+
+ mp = allocb(pkt->otp_size, 0);
+ if (mp == NULL)
+ return (ENOMEM);
+
+ if (ddi_copyin(pkt->otp_buf, mp->b_rptr, pkt->otp_size, 0) != 0) {
+ freeb(mp);
+ return (EFAULT);
+ }
+ mp->b_wptr += pkt->otp_size;
+
+ if (pkt->otp_linkid != UINT64_MAX) {
+ odd = overlay_hold_by_dlid(pkt->otp_linkid);
+ if (odd == NULL) {
+ freeb(mp);
+ return (ENOENT);
+ }
+ } else {
+ mutex_enter(&thdl->oth_lock);
+ for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
+ entry = list_next(&thdl->oth_outstanding, entry)) {
+ if ((uintptr_t)entry == pkt->otp_reqid)
+ break;
+ }
+
+ if (entry == NULL) {
+ mutex_exit(&thdl->oth_lock);
+ freeb(mp);
+ return (ENOENT);
+ }
+ odd = entry->ote_odd;
+ mutex_exit(&thdl->oth_lock);
+ }
+
+ mp = overlay_m_tx(odd, mp);
+ freemsgchain(mp);
+
+ return (0);
+}
+
+typedef struct overlay_targ_list_int {
+ boolean_t otli_count;
+ uint32_t otli_cur;
+ uint32_t otli_nents;
+ uint32_t otli_ents[];
+} overlay_targ_list_int_t;
+
+static int
+overlay_target_list_copyin(const void *ubuf, void **outp, size_t *bsize,
+ int flags)
+{
+ overlay_targ_list_t n;
+ overlay_targ_list_int_t *otl;
+
+ if (ddi_copyin(ubuf, &n, sizeof (overlay_targ_list_t),
+ flags & FKIOCTL) != 0)
+ return (EFAULT);
+
+ /*
+ */
+ if (n.otl_nents >= INT32_MAX / sizeof (uint32_t))
+ return (EINVAL);
+ *bsize = sizeof (overlay_targ_list_int_t) +
+ sizeof (uint32_t) * n.otl_nents;
+ otl = kmem_zalloc(*bsize, KM_SLEEP);
+ otl->otli_cur = 0;
+ otl->otli_nents = n.otl_nents;
+ if (otl->otli_nents != 0) {
+ otl->otli_count = B_FALSE;
+ if (ddi_copyin((void *)((uintptr_t)ubuf +
+ offsetof(overlay_targ_list_t, otl_ents)),
+ otl->otli_ents, n.otl_nents * sizeof (uint32_t),
+ flags & FKIOCTL) != 0) {
+ kmem_free(otl, *bsize);
+ return (EFAULT);
+ }
+ } else {
+ otl->otli_count = B_TRUE;
+ }
+
+ *outp = otl;
+ return (0);
+}
+
+static int
+overlay_target_ioctl_list_cb(overlay_dev_t *odd, void *arg)
+{
+ overlay_targ_list_int_t *otl = arg;
+
+ if (otl->otli_cur < otl->otli_nents)
+ otl->otli_ents[otl->otli_cur] = odd->odd_linkid;
+ otl->otli_cur++;
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+overlay_target_ioctl_list(overlay_target_hdl_t *thdl, void *arg)
+{
+ overlay_dev_iter(overlay_target_ioctl_list_cb, arg);
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+overlay_target_list_copyout(void *ubuf, void *buf, size_t bufsize, int flags)
+{
+ overlay_targ_list_int_t *otl = buf;
+
+ if (ddi_copyout(&otl->otli_cur, ubuf, sizeof (uint32_t),
+ flags & FKIOCTL) != 0)
+ return (EFAULT);
+
+ if (otl->otli_count == B_FALSE) {
+ if (ddi_copyout(otl->otli_ents,
+ (void *)((uintptr_t)ubuf +
+ offsetof(overlay_targ_list_t, otl_ents)),
+ sizeof (uint32_t) * otl->otli_nents,
+ flags & FKIOCTL) != 0)
+ return (EFAULT);
+ }
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+overlay_target_cache_get(overlay_target_hdl_t *thdl, void *arg)
+{
+ int ret = 0;
+ overlay_dev_t *odd;
+ overlay_target_t *ott;
+ overlay_targ_cache_t *otc = arg;
+
+ odd = overlay_hold_by_dlid(otc->otc_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ mutex_enter(&odd->odd_lock);
+ if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
+ mutex_exit(&odd->odd_lock);
+ overlay_hold_rele(odd);
+ return (ENXIO);
+ }
+ ott = odd->odd_target;
+ if (ott->ott_mode != OVERLAY_TARGET_POINT &&
+ ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
+ mutex_exit(&odd->odd_lock);
+ overlay_hold_rele(odd);
+ return (ENOTSUP);
+ }
+ mutex_enter(&ott->ott_lock);
+ mutex_exit(&odd->odd_lock);
+
+ if (ott->ott_mode == OVERLAY_TARGET_POINT) {
+ otc->otc_entry.otce_flags = 0;
+ bcopy(&ott->ott_u.ott_point, &otc->otc_entry.otce_dest,
+ sizeof (overlay_target_point_t));
+ } else {
+ overlay_target_entry_t *ote;
+ ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
+ otc->otc_entry.otce_mac);
+ if (ote != NULL) {
+ mutex_enter(&ote->ote_lock);
+ if ((ote->ote_flags &
+ OVERLAY_ENTRY_F_VALID_MASK) != 0) {
+ if (ote->ote_flags & OVERLAY_ENTRY_F_DROP) {
+ otc->otc_entry.otce_flags =
+ OVERLAY_TARGET_CACHE_DROP;
+ } else {
+ otc->otc_entry.otce_flags = 0;
+ bcopy(&ote->ote_dest,
+ &otc->otc_entry.otce_dest,
+ sizeof (overlay_target_point_t));
+ }
+ ret = 0;
+ } else {
+ ret = ENOENT;
+ }
+ mutex_exit(&ote->ote_lock);
+ } else {
+ ret = ENOENT;
+ }
+ }
+
+ mutex_exit(&ott->ott_lock);
+ overlay_hold_rele(odd);
+
+ return (ret);
+}
+
+/* ARGSUSED */
+static int
+overlay_target_cache_set(overlay_target_hdl_t *thdl, void *arg)
+{
+ overlay_dev_t *odd;
+ overlay_target_t *ott;
+ overlay_target_entry_t *ote;
+ overlay_targ_cache_t *otc = arg;
+ mblk_t *mp = NULL;
+
+ if (otc->otc_entry.otce_flags & ~OVERLAY_TARGET_CACHE_DROP)
+ return (EINVAL);
+
+ odd = overlay_hold_by_dlid(otc->otc_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ mutex_enter(&odd->odd_lock);
+ if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
+ mutex_exit(&odd->odd_lock);
+ overlay_hold_rele(odd);
+ return (ENXIO);
+ }
+ ott = odd->odd_target;
+ if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
+ mutex_exit(&odd->odd_lock);
+ overlay_hold_rele(odd);
+ return (ENOTSUP);
+ }
+ mutex_enter(&ott->ott_lock);
+ mutex_exit(&odd->odd_lock);
+
+ ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
+ otc->otc_entry.otce_mac);
+ if (ote == NULL) {
+ ote = kmem_cache_alloc(overlay_entry_cache, KM_SLEEP);
+ bcopy(otc->otc_entry.otce_mac, ote->ote_addr, ETHERADDRL);
+ ote->ote_chead = ote->ote_ctail = NULL;
+ ote->ote_mbsize = 0;
+ ote->ote_ott = ott;
+ ote->ote_odd = odd;
+ mutex_enter(&ote->ote_lock);
+ refhash_insert(ott->ott_u.ott_dyn.ott_dhash, ote);
+ avl_add(&ott->ott_u.ott_dyn.ott_tree, ote);
+ } else {
+ mutex_enter(&ote->ote_lock);
+ }
+
+ if (otc->otc_entry.otce_flags & OVERLAY_TARGET_CACHE_DROP) {
+ ote->ote_flags |= OVERLAY_ENTRY_F_DROP;
+ } else {
+ ote->ote_flags |= OVERLAY_ENTRY_F_VALID;
+ bcopy(&otc->otc_entry.otce_dest, &ote->ote_dest,
+ sizeof (overlay_target_point_t));
+ mp = ote->ote_chead;
+ ote->ote_chead = NULL;
+ ote->ote_ctail = NULL;
+ ote->ote_mbsize = 0;
+ ote->ote_vtime = gethrtime();
+ }
+
+ mutex_exit(&ote->ote_lock);
+ mutex_exit(&ott->ott_lock);
+
+ if (mp != NULL) {
+ mp = overlay_m_tx(ote->ote_odd, mp);
+ freemsgchain(mp);
+ }
+
+ overlay_hold_rele(odd);
+
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+overlay_target_cache_remove(overlay_target_hdl_t *thdl, void *arg)
+{
+ int ret = 0;
+ overlay_dev_t *odd;
+ overlay_target_t *ott;
+ overlay_target_entry_t *ote;
+ overlay_targ_cache_t *otc = arg;
+
+ odd = overlay_hold_by_dlid(otc->otc_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ mutex_enter(&odd->odd_lock);
+ if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
+ mutex_exit(&odd->odd_lock);
+ overlay_hold_rele(odd);
+ return (ENXIO);
+ }
+ ott = odd->odd_target;
+ if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
+ mutex_exit(&odd->odd_lock);
+ overlay_hold_rele(odd);
+ return (ENOTSUP);
+ }
+ mutex_enter(&ott->ott_lock);
+ mutex_exit(&odd->odd_lock);
+
+ ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
+ otc->otc_entry.otce_mac);
+ if (ote != NULL) {
+ mutex_enter(&ote->ote_lock);
+ ote->ote_flags &= ~OVERLAY_ENTRY_F_VALID_MASK;
+ mutex_exit(&ote->ote_lock);
+ ret = 0;
+ } else {
+ ret = ENOENT;
+ }
+
+ mutex_exit(&ott->ott_lock);
+ overlay_hold_rele(odd);
+
+ return (ret);
+}
+
+/* ARGSUSED */
+static int
+overlay_target_cache_flush(overlay_target_hdl_t *thdl, void *arg)
+{
+ avl_tree_t *avl;
+ overlay_dev_t *odd;
+ overlay_target_t *ott;
+ overlay_target_entry_t *ote;
+ overlay_targ_cache_t *otc = arg;
+
+ odd = overlay_hold_by_dlid(otc->otc_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ mutex_enter(&odd->odd_lock);
+ if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
+ mutex_exit(&odd->odd_lock);
+ overlay_hold_rele(odd);
+ return (ENXIO);
+ }
+ ott = odd->odd_target;
+ if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
+ mutex_exit(&odd->odd_lock);
+ overlay_hold_rele(odd);
+ return (ENOTSUP);
+ }
+ mutex_enter(&ott->ott_lock);
+ mutex_exit(&odd->odd_lock);
+ avl = &ott->ott_u.ott_dyn.ott_tree;
+
+ for (ote = avl_first(avl); ote != NULL; ote = AVL_NEXT(avl, ote)) {
+ mutex_enter(&ote->ote_lock);
+ ote->ote_flags &= ~OVERLAY_ENTRY_F_VALID_MASK;
+ mutex_exit(&ote->ote_lock);
+ }
+ ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
+ otc->otc_entry.otce_mac);
+
+ mutex_exit(&ott->ott_lock);
+ overlay_hold_rele(odd);
+
+ return (0);
+}
+
+static int
+overlay_target_cache_iter_copyin(const void *ubuf, void **outp, size_t *bsize,
+ int flags)
+{
+ overlay_targ_cache_iter_t base, *iter;
+
+ if (ddi_copyin(ubuf, &base, sizeof (overlay_targ_cache_iter_t),
+ flags & FKIOCTL) != 0)
+ return (EFAULT);
+
+ if (base.otci_count > OVERLAY_TARGET_ITER_MAX)
+ return (E2BIG);
+
+ if (base.otci_count == 0)
+ return (EINVAL);
+
+ *bsize = sizeof (overlay_targ_cache_iter_t) +
+ base.otci_count * sizeof (overlay_targ_cache_entry_t);
+ iter = kmem_alloc(*bsize, KM_SLEEP);
+ bcopy(&base, iter, sizeof (overlay_targ_cache_iter_t));
+ *outp = iter;
+
+ return (0);
+}
+
+typedef struct overlay_targ_cache_marker {
+ uint8_t otcm_mac[ETHERADDRL];
+ uint16_t otcm_done;
+} overlay_targ_cache_marker_t;
+
+/* ARGSUSED */
+static int
+overlay_target_cache_iter(overlay_target_hdl_t *thdl, void *arg)
+{
+ overlay_dev_t *odd;
+ overlay_target_t *ott;
+ overlay_target_entry_t lookup, *ent;
+ overlay_targ_cache_marker_t *mark;
+ avl_index_t where;
+ avl_tree_t *avl;
+ uint16_t written = 0;
+
+ overlay_targ_cache_iter_t *iter = arg;
+ mark = (void *)&iter->otci_marker;
+
+ if (mark->otcm_done != 0) {
+ iter->otci_count = 0;
+ return (0);
+ }
+
+ odd = overlay_hold_by_dlid(iter->otci_linkid);
+ if (odd == NULL)
+ return (ENOENT);
+
+ mutex_enter(&odd->odd_lock);
+ if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
+ mutex_exit(&odd->odd_lock);
+ overlay_hold_rele(odd);
+ return (ENXIO);
+ }
+ ott = odd->odd_target;
+ if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC &&
+ ott->ott_mode != OVERLAY_TARGET_POINT) {
+ mutex_exit(&odd->odd_lock);
+ overlay_hold_rele(odd);
+ return (ENOTSUP);
+ }
+
+ /*
+ * Holding this lock across the entire iteration probably isn't very
+ * good. We should perhaps add an r/w lock for the avl tree. But we'll
+ * wait until we now it's necessary before we do more.
+ */
+ mutex_enter(&ott->ott_lock);
+ mutex_exit(&odd->odd_lock);
+
+ if (ott->ott_mode == OVERLAY_TARGET_POINT) {
+ overlay_targ_cache_entry_t *out = &iter->otci_ents[0];
+ bzero(out->otce_mac, ETHERADDRL);
+ out->otce_flags = 0;
+ bcopy(&ott->ott_u.ott_point, &out->otce_dest,
+ sizeof (overlay_target_point_t));
+ written++;
+ mark->otcm_done = 1;
+ }
+
+ avl = &ott->ott_u.ott_dyn.ott_tree;
+ bcopy(mark->otcm_mac, lookup.ote_addr, ETHERADDRL);
+ ent = avl_find(avl, &lookup, &where);
+
+ /*
+ * NULL ent means that the entry does not exist, so we want to start
+ * with the closest node in the tree. This means that we implicitly rely
+ * on the tree's order and the first node will be the mac 00:00:00:00:00
+ * and the last will be ff:ff:ff:ff:ff:ff.
+ */
+ if (ent == NULL) {
+ ent = avl_nearest(avl, where, AVL_AFTER);
+ if (ent == NULL) {
+ mark->otcm_done = 1;
+ goto done;
+ }
+ }
+
+ for (; ent != NULL && written < iter->otci_count;
+ ent = AVL_NEXT(avl, ent)) {
+ overlay_targ_cache_entry_t *out = &iter->otci_ents[written];
+ mutex_enter(&ent->ote_lock);
+ if ((ent->ote_flags & OVERLAY_ENTRY_F_VALID_MASK) == 0) {
+ mutex_exit(&ent->ote_lock);
+ continue;
+ }
+ bcopy(ent->ote_addr, out->otce_mac, ETHERADDRL);
+ out->otce_flags = 0;
+ if (ent->ote_flags & OVERLAY_ENTRY_F_DROP)
+ out->otce_flags |= OVERLAY_TARGET_CACHE_DROP;
+ if (ent->ote_flags & OVERLAY_ENTRY_F_VALID)
+ bcopy(&ent->ote_dest, &out->otce_dest,
+ sizeof (overlay_target_point_t));
+ written++;
+ mutex_exit(&ent->ote_lock);
+ }
+
+ if (ent != NULL) {
+ bcopy(ent->ote_addr, mark->otcm_mac, ETHERADDRL);
+ } else {
+ mark->otcm_done = 1;
+ }
+
+done:
+ iter->otci_count = written;
+ mutex_exit(&ott->ott_lock);
+ overlay_hold_rele(odd);
+
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+overlay_target_cache_iter_copyout(void *ubuf, void *buf, size_t bufsize,
+ int flags)
+{
+ size_t outsize;
+ const overlay_targ_cache_iter_t *iter = buf;
+
+ outsize = sizeof (overlay_targ_cache_iter_t) +
+ iter->otci_count * sizeof (overlay_targ_cache_entry_t);
+
+ if (ddi_copyout(buf, ubuf, outsize, flags & FKIOCTL) != 0)
+ return (EFAULT);
+
+ return (0);
+}
+
+static overlay_target_ioctl_t overlay_target_ioctab[] = {
+ { OVERLAY_TARG_INFO, B_TRUE, B_TRUE,
+ NULL, overlay_target_info,
+ NULL, sizeof (overlay_targ_info_t) },
+ { OVERLAY_TARG_ASSOCIATE, B_TRUE, B_FALSE,
+ NULL, overlay_target_associate,
+ NULL, sizeof (overlay_targ_associate_t) },
+ { OVERLAY_TARG_DISASSOCIATE, B_TRUE, B_FALSE,
+ NULL, overlay_target_disassociate,
+ NULL, sizeof (overlay_targ_id_t) },
+ { OVERLAY_TARG_DEGRADE, B_TRUE, B_FALSE,
+ NULL, overlay_target_degrade,
+ NULL, sizeof (overlay_targ_degrade_t) },
+ { OVERLAY_TARG_RESTORE, B_TRUE, B_FALSE,
+ NULL, overlay_target_restore,
+ NULL, sizeof (overlay_targ_id_t) },
+ { OVERLAY_TARG_LOOKUP, B_FALSE, B_TRUE,
+ NULL, overlay_target_lookup_request,
+ NULL, sizeof (overlay_targ_lookup_t) },
+ { OVERLAY_TARG_RESPOND, B_TRUE, B_FALSE,
+ NULL, overlay_target_lookup_respond,
+ NULL, sizeof (overlay_targ_resp_t) },
+ { OVERLAY_TARG_DROP, B_TRUE, B_FALSE,
+ NULL, overlay_target_lookup_drop,
+ NULL, sizeof (overlay_targ_resp_t) },
+ { OVERLAY_TARG_PKT, B_TRUE, B_TRUE,
+ overlay_target_pkt_copyin,
+ overlay_target_packet,
+ overlay_target_pkt_copyout,
+ sizeof (overlay_targ_pkt_t) },
+ { OVERLAY_TARG_INJECT, B_TRUE, B_FALSE,
+ overlay_target_pkt_copyin,
+ overlay_target_inject,
+ NULL, sizeof (overlay_targ_pkt_t) },
+ { OVERLAY_TARG_RESEND, B_TRUE, B_FALSE,
+ overlay_target_pkt_copyin,
+ overlay_target_resend,
+ NULL, sizeof (overlay_targ_pkt_t) },
+ { OVERLAY_TARG_LIST, B_FALSE, B_TRUE,
+ overlay_target_list_copyin,
+ overlay_target_ioctl_list,
+ overlay_target_list_copyout,
+ sizeof (overlay_targ_list_t) },
+ { OVERLAY_TARG_CACHE_GET, B_FALSE, B_TRUE,
+ NULL, overlay_target_cache_get,
+ NULL, sizeof (overlay_targ_cache_t) },
+ { OVERLAY_TARG_CACHE_SET, B_TRUE, B_TRUE,
+ NULL, overlay_target_cache_set,
+ NULL, sizeof (overlay_targ_cache_t) },
+ { OVERLAY_TARG_CACHE_REMOVE, B_TRUE, B_TRUE,
+ NULL, overlay_target_cache_remove,
+ NULL, sizeof (overlay_targ_cache_t) },
+ { OVERLAY_TARG_CACHE_FLUSH, B_TRUE, B_TRUE,
+ NULL, overlay_target_cache_flush,
+ NULL, sizeof (overlay_targ_cache_t) },
+ { OVERLAY_TARG_CACHE_ITER, B_FALSE, B_TRUE,
+ overlay_target_cache_iter_copyin,
+ overlay_target_cache_iter,
+ overlay_target_cache_iter_copyout,
+ sizeof (overlay_targ_cache_iter_t) },
+ { 0 }
+};
+
+int
+overlay_target_open(dev_t *devp, int flags, int otype, cred_t *credp)
+{
+ minor_t mid;
+ overlay_target_hdl_t *thdl;
+
+ if (secpolicy_dl_config(credp) != 0)
+ return (EPERM);
+
+ if (getminor(*devp) != 0)
+ return (ENXIO);
+
+ if (otype & OTYP_BLK)
+ return (EINVAL);
+
+ if (flags & ~(FREAD | FWRITE | FEXCL))
+ return (EINVAL);
+
+ if ((flags & FWRITE) &&
+ !(flags & FEXCL))
+ return (EINVAL);
+
+ if (!(flags & FREAD) && !(flags & FWRITE))
+ return (EINVAL);
+
+ if (crgetzoneid(credp) != GLOBAL_ZONEID)
+ return (EPERM);
+
+ mid = id_alloc(overlay_thdl_idspace);
+ if (ddi_soft_state_zalloc(overlay_thdl_state, mid) != 0) {
+ id_free(overlay_thdl_idspace, mid);
+ return (ENXIO);
+ }
+
+ thdl = ddi_get_soft_state(overlay_thdl_state, mid);
+ VERIFY(thdl != NULL);
+ thdl->oth_minor = mid;
+ thdl->oth_zoneid = crgetzoneid(credp);
+ thdl->oth_oflags = flags;
+ mutex_init(&thdl->oth_lock, NULL, MUTEX_DRIVER, NULL);
+ list_create(&thdl->oth_outstanding, sizeof (overlay_target_entry_t),
+ offsetof(overlay_target_entry_t, ote_qlink));
+ *devp = makedevice(getmajor(*devp), mid);
+
+ mutex_enter(&overlay_target_lock);
+ if ((flags & FEXCL) && overlay_target_excl == B_TRUE) {
+ mutex_exit(&overlay_target_lock);
+ list_destroy(&thdl->oth_outstanding);
+ mutex_destroy(&thdl->oth_lock);
+ ddi_soft_state_free(overlay_thdl_state, mid);
+ id_free(overlay_thdl_idspace, mid);
+ return (EEXIST);
+ } else if ((flags & FEXCL) != 0) {
+ VERIFY(overlay_target_excl == B_FALSE);
+ overlay_target_excl = B_TRUE;
+ }
+ list_insert_tail(&overlay_thdl_list, thdl);
+ mutex_exit(&overlay_target_lock);
+
+ return (0);
+}
+
+/* ARGSUSED */
+int
+overlay_target_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
+ int *rvalp)
+{
+ overlay_target_ioctl_t *ioc;
+ overlay_target_hdl_t *thdl;
+
+ if (secpolicy_dl_config(credp) != 0)
+ return (EPERM);
+
+ if ((thdl = ddi_get_soft_state(overlay_thdl_state,
+ getminor(dev))) == NULL)
+ return (ENXIO);
+
+ for (ioc = &overlay_target_ioctab[0]; ioc->oti_cmd != 0; ioc++) {
+ int ret;
+ caddr_t buf;
+ size_t bufsize;
+
+ if (ioc->oti_cmd != cmd)
+ continue;
+
+ if (ioc->oti_write == B_TRUE && !(mode & FWRITE))
+ return (EBADF);
+
+ if (ioc->oti_copyin == NULL) {
+ bufsize = ioc->oti_size;
+ buf = kmem_alloc(bufsize, KM_SLEEP);
+ if (ddi_copyin((void *)(uintptr_t)arg, buf, bufsize,
+ mode & FKIOCTL) != 0) {
+ kmem_free(buf, bufsize);
+ return (EFAULT);
+ }
+ } else {
+ if ((ret = ioc->oti_copyin((void *)(uintptr_t)arg,
+ (void **)&buf, &bufsize, mode)) != 0)
+ return (ret);
+ }
+
+ ret = ioc->oti_func(thdl, buf);
+ if (ret == 0 && ioc->oti_size != 0 &&
+ ioc->oti_ncopyout == B_TRUE) {
+ if (ioc->oti_copyout == NULL) {
+ if (ddi_copyout(buf, (void *)(uintptr_t)arg,
+ bufsize, mode & FKIOCTL) != 0)
+ ret = EFAULT;
+ } else {
+ ret = ioc->oti_copyout((void *)(uintptr_t)arg,
+ buf, bufsize, mode);
+ }
+ }
+
+ kmem_free(buf, bufsize);
+ return (ret);
+ }
+
+ return (ENOTTY);
+}
+
+/* ARGSUSED */
+int
+overlay_target_close(dev_t dev, int flags, int otype, cred_t *credp)
+{
+ overlay_target_hdl_t *thdl;
+ overlay_target_entry_t *entry;
+ minor_t mid = getminor(dev);
+
+ if ((thdl = ddi_get_soft_state(overlay_thdl_state, mid)) == NULL)
+ return (ENXIO);
+
+ mutex_enter(&overlay_target_lock);
+ list_remove(&overlay_thdl_list, thdl);
+ mutex_enter(&thdl->oth_lock);
+ while ((entry = list_remove_head(&thdl->oth_outstanding)) != NULL)
+ list_insert_tail(&overlay_target_list, entry);
+ cv_signal(&overlay_target_condvar);
+ mutex_exit(&thdl->oth_lock);
+ if ((thdl->oth_oflags & FEXCL) != 0) {
+ VERIFY(overlay_target_excl == B_TRUE);
+ overlay_target_excl = B_FALSE;
+ }
+ mutex_exit(&overlay_target_lock);
+
+ list_destroy(&thdl->oth_outstanding);
+ mutex_destroy(&thdl->oth_lock);
+ mid = thdl->oth_minor;
+ ddi_soft_state_free(overlay_thdl_state, mid);
+ id_free(overlay_thdl_idspace, mid);
+
+ return (0);
+}
diff --git a/usr/src/uts/common/io/overlay/plugins/overlay_vxlan.c b/usr/src/uts/common/io/overlay/plugins/overlay_vxlan.c
new file mode 100644
index 0000000000..8b4e4ecb42
--- /dev/null
+++ b/usr/src/uts/common/io/overlay/plugins/overlay_vxlan.c
@@ -0,0 +1,372 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * VXLAN encapsulation module
+ *
+ *
+ * The VXLAN header looks as follows in network byte order:
+ *
+ * |0 3| 4 |5 31|
+ * +----------+---+------------------------+
+ * | Reserved | I | Reserved |
+ * +---------------------------------------+
+ * | Virtual Network ID | Reserved |
+ * +----------------------------+----------+
+ * |0 23|24 31|
+ *
+ * All reserved values must be 0. The I bit must be 1. We call the top
+ * word the VXLAN magic field for the time being. The second word is
+ * definitely not the most friendly way to operate. Specifically, the ID
+ * is a 24-bit big endian value, but we have to make sure not to use the
+ * reserved byte.
+ *
+ * For us, VXLAN encapsulation is a fairly straightforward implementation. It
+ * only has two properties, a listen_ip and a listen_port. These determine on
+ * what address we should be listening on. While we do not have a default
+ * address to listen upon, we do have a default port, which is the IANA assigned
+ * port for VXLAN -- 4789.
+ */
+
+#include <sys/overlay_plugin.h>
+#include <sys/modctl.h>
+#include <sys/errno.h>
+#include <sys/byteorder.h>
+#include <sys/vxlan.h>
+#include <inet/ip.h>
+#include <netinet/in.h>
+#include <sys/strsun.h>
+#include <netinet/udp.h>
+
+static const char *vxlan_ident = "vxlan";
+static uint16_t vxlan_defport = IPPORT_VXLAN;
+
+/*
+ * Should we enable UDP source port hashing for fanout.
+ */
+boolean_t vxlan_fanout = B_TRUE;
+
+static const char *vxlan_props[] = {
+ "vxlan/listen_ip",
+ "vxlan/listen_port",
+ NULL
+};
+
+typedef struct vxlan {
+ kmutex_t vxl_lock;
+ overlay_handle_t vxl_oh;
+ uint16_t vxl_lport;
+ boolean_t vxl_hladdr;
+ struct in6_addr vxl_laddr;
+} vxlan_t;
+
+static int
+vxlan_o_init(overlay_handle_t oh, void **outp)
+{
+ vxlan_t *vxl;
+
+ vxl = kmem_alloc(sizeof (vxlan_t), KM_SLEEP);
+ *outp = vxl;
+ mutex_init(&vxl->vxl_lock, NULL, MUTEX_DRIVER, NULL);
+ vxl->vxl_oh = oh;
+ vxl->vxl_lport = vxlan_defport;
+ vxl->vxl_hladdr = B_FALSE;
+
+ return (0);
+}
+
+static void
+vxlan_o_fini(void *arg)
+{
+ vxlan_t *vxl = arg;
+
+ mutex_destroy(&vxl->vxl_lock);
+ kmem_free(arg, sizeof (vxlan_t));
+}
+
+static int
+vxlan_o_socket(void *arg, int *dp, int *fp, int *pp, struct sockaddr *addr,
+ socklen_t *slenp)
+{
+ vxlan_t *vxl = arg;
+ struct sockaddr_in6 *in;
+
+ in = (struct sockaddr_in6 *)addr;
+ *dp = AF_INET6;
+ *fp = SOCK_DGRAM;
+ *pp = 0;
+ bzero(in, sizeof (struct sockaddr_in6));
+ in->sin6_family = AF_INET6;
+
+ /*
+ * We should consider a more expressive private errno set that
+ * provider's can use.
+ */
+ mutex_enter(&vxl->vxl_lock);
+ if (vxl->vxl_hladdr == B_FALSE) {
+ mutex_exit(&vxl->vxl_lock);
+ return (EINVAL);
+ }
+ in->sin6_port = htons(vxl->vxl_lport);
+ in->sin6_addr = vxl->vxl_laddr;
+ mutex_exit(&vxl->vxl_lock);
+ *slenp = sizeof (struct sockaddr_in6);
+
+ return (0);
+}
+
+static int
+vxlan_o_sockopt(ksocket_t ksock)
+{
+ int val, err;
+ if (vxlan_fanout == B_FALSE)
+ return (0);
+
+ val = UDP_HASH_VXLAN;
+ err = ksocket_setsockopt(ksock, IPPROTO_UDP, UDP_SRCPORT_HASH, &val,
+ sizeof (val), kcred);
+ return (err);
+}
+
+/* ARGSUSED */
+static int
+vxlan_o_encap(void *arg, mblk_t *mp, ovep_encap_info_t *einfop,
+ mblk_t **outp)
+{
+ mblk_t *ob;
+ vxlan_hdr_t *vxh;
+
+ ASSERT(einfop->ovdi_id < (1 << 24));
+
+ /*
+ * This allocation could get hot. We may want to have a good way to
+ * cache and handle this allocation the same way that IP does with
+ * keeping around a message block per entry, or basically treating this
+ * as an immutable message block in the system. Basically freemsg() will
+ * be a nop, but we'll do the right thing with respect to the rest of
+ * the chain.
+ */
+ ob = allocb(VXLAN_HDR_LEN, 0);
+ if (ob == NULL)
+ return (ENOMEM);
+
+ vxh = (vxlan_hdr_t *)ob->b_rptr;
+ vxh->vxlan_flags = ntohl(VXLAN_F_VDI);
+ vxh->vxlan_id = htonl((uint32_t)einfop->ovdi_id << VXLAN_ID_SHIFT);
+ ob->b_wptr += VXLAN_HDR_LEN;
+ *outp = ob;
+
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+vxlan_o_decap(void *arg, mblk_t *mp, ovep_encap_info_t *dinfop)
+{
+ vxlan_hdr_t *vxh;
+
+ if (MBLKL(mp) < sizeof (vxlan_hdr_t))
+ return (EINVAL);
+ vxh = (vxlan_hdr_t *)mp->b_rptr;
+ if ((ntohl(vxh->vxlan_flags) & VXLAN_F_VDI) == 0)
+ return (EINVAL);
+
+ dinfop->ovdi_id = ntohl(vxh->vxlan_id) >> VXLAN_ID_SHIFT;
+ dinfop->ovdi_hdr_size = VXLAN_HDR_LEN;
+
+ return (0);
+}
+
+static int
+vxlan_o_getprop(void *arg, const char *pr_name, void *buf, uint32_t *bufsize)
+{
+ vxlan_t *vxl = arg;
+
+ /* vxlan/listen_ip */
+ if (strcmp(pr_name, vxlan_props[0]) == 0) {
+ if (*bufsize < sizeof (struct in6_addr))
+ return (EOVERFLOW);
+
+ mutex_enter(&vxl->vxl_lock);
+ if (vxl->vxl_hladdr == B_FALSE) {
+ *bufsize = 0;
+ } else {
+ bcopy(&vxl->vxl_laddr, buf, sizeof (struct in6_addr));
+ *bufsize = sizeof (struct in6_addr);
+ }
+ mutex_exit(&vxl->vxl_lock);
+ return (0);
+ }
+
+ /* vxlan/listen_port */
+ if (strcmp(pr_name, vxlan_props[1]) == 0) {
+ uint64_t val;
+ if (*bufsize < sizeof (uint64_t))
+ return (EOVERFLOW);
+
+ mutex_enter(&vxl->vxl_lock);
+ val = vxl->vxl_lport;
+ bcopy(&val, buf, sizeof (uint64_t));
+ *bufsize = sizeof (uint64_t);
+ mutex_exit(&vxl->vxl_lock);
+ return (0);
+ }
+
+ return (EINVAL);
+}
+
+static int
+vxlan_o_setprop(void *arg, const char *pr_name, const void *buf,
+ uint32_t bufsize)
+{
+ vxlan_t *vxl = arg;
+
+ /* vxlan/listen_ip */
+ if (strcmp(pr_name, vxlan_props[0]) == 0) {
+ const struct in6_addr *ipv6 = buf;
+ if (bufsize != sizeof (struct in6_addr))
+ return (EINVAL);
+
+ if (IN6_IS_ADDR_V4COMPAT(ipv6))
+ return (EINVAL);
+
+ if (IN6_IS_ADDR_MULTICAST(ipv6))
+ return (EINVAL);
+
+ if (IN6_IS_ADDR_6TO4(ipv6))
+ return (EINVAL);
+
+ if (IN6_IS_ADDR_V4MAPPED(ipv6)) {
+ ipaddr_t v4;
+ IN6_V4MAPPED_TO_IPADDR(ipv6, v4);
+ if (IN_MULTICAST(v4))
+ return (EINVAL);
+ }
+
+ mutex_enter(&vxl->vxl_lock);
+ vxl->vxl_hladdr = B_TRUE;
+ bcopy(ipv6, &vxl->vxl_laddr, sizeof (struct in6_addr));
+ mutex_exit(&vxl->vxl_lock);
+
+ return (0);
+ }
+
+ /* vxlan/listen_port */
+ if (strcmp(pr_name, vxlan_props[1]) == 0) {
+ const uint64_t *valp = buf;
+ if (bufsize != 8)
+ return (EINVAL);
+
+ if (*valp == 0 || *valp > UINT16_MAX)
+ return (EINVAL);
+
+ mutex_enter(&vxl->vxl_lock);
+ vxl->vxl_lport = *valp;
+ mutex_exit(&vxl->vxl_lock);
+ return (0);
+ }
+ return (EINVAL);
+}
+
+static int
+vxlan_o_propinfo(const char *pr_name, overlay_prop_handle_t phdl)
+{
+ /* vxlan/listen_ip */
+ if (strcmp(pr_name, vxlan_props[0]) == 0) {
+ overlay_prop_set_name(phdl, vxlan_props[0]);
+ overlay_prop_set_prot(phdl, OVERLAY_PROP_PERM_RRW);
+ overlay_prop_set_type(phdl, OVERLAY_PROP_T_IP);
+ overlay_prop_set_nodefault(phdl);
+ return (0);
+ }
+
+ if (strcmp(pr_name, vxlan_props[1]) == 0) {
+ overlay_prop_set_name(phdl, vxlan_props[1]);
+ overlay_prop_set_prot(phdl, OVERLAY_PROP_PERM_RRW);
+ overlay_prop_set_type(phdl, OVERLAY_PROP_T_UINT);
+ (void) overlay_prop_set_default(phdl, &vxlan_defport,
+ sizeof (vxlan_defport));
+ overlay_prop_set_range_uint32(phdl, 1, UINT16_MAX);
+ return (0);
+ }
+
+ return (EINVAL);
+}
+
+static struct overlay_plugin_ops vxlan_o_ops = {
+ 0,
+ vxlan_o_init,
+ vxlan_o_fini,
+ vxlan_o_encap,
+ vxlan_o_decap,
+ vxlan_o_socket,
+ vxlan_o_sockopt,
+ vxlan_o_getprop,
+ vxlan_o_setprop,
+ vxlan_o_propinfo
+};
+
+static struct modlmisc vxlan_modlmisc = {
+ &mod_miscops,
+ "VXLAN encap plugin"
+};
+
+static struct modlinkage vxlan_modlinkage = {
+ MODREV_1,
+ &vxlan_modlmisc
+};
+
+int
+_init(void)
+{
+ int err;
+ overlay_plugin_register_t *ovrp;
+
+ ovrp = overlay_plugin_alloc(OVEP_VERSION);
+ if (ovrp == NULL)
+ return (ENOTSUP);
+ ovrp->ovep_name = vxlan_ident;
+ ovrp->ovep_ops = &vxlan_o_ops;
+ ovrp->ovep_id_size = VXLAN_ID_LEN;
+ ovrp->ovep_flags = OVEP_F_VLAN_TAG;
+ ovrp->ovep_dest = OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT;
+ ovrp->ovep_props = vxlan_props;
+
+ if ((err = overlay_plugin_register(ovrp)) == 0) {
+ if ((err = mod_install(&vxlan_modlinkage)) != 0) {
+ (void) overlay_plugin_unregister(vxlan_ident);
+ }
+ }
+
+ overlay_plugin_free(ovrp);
+ return (err);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&vxlan_modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int err;
+
+ if ((err = overlay_plugin_unregister(vxlan_ident)) != 0)
+ return (err);
+
+ return (mod_remove(&vxlan_modlinkage));
+}
diff --git a/usr/src/uts/common/io/physmem.c b/usr/src/uts/common/io/physmem.c
index 39d5003b02..c48fecd133 100644
--- a/usr/src/uts/common/io/physmem.c
+++ b/usr/src/uts/common/io/physmem.c
@@ -21,6 +21,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
*/
@@ -807,6 +808,13 @@ physmem_open(dev_t *devp, int flag, int otyp, cred_t *credp)
int ret;
static int msg_printed = 0;
+ /*
+ * This device should never be visible in a zone, but if it somehow
+ * does get created we refuse to allow the zone to use it.
+ */
+ if (crgetzoneid(credp) != GLOBAL_ZONEID)
+ return (EACCES);
+
if ((flag & (FWRITE | FREAD)) != (FWRITE | FREAD)) {
return (EINVAL);
}
diff --git a/usr/src/uts/common/io/pseudo.conf b/usr/src/uts/common/io/pseudo.conf
index 42248e93d6..08affec609 100644
--- a/usr/src/uts/common/io/pseudo.conf
+++ b/usr/src/uts/common/io/pseudo.conf
@@ -22,8 +22,7 @@
#
# Copyright 2003 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
-#
-# ident "%Z%%M% %I% %E% SMI"
+# Copyright 2014 Joyent, Inc. All rights reserved.
#
# This file is private to the pseudonex driver. It should not be edited.
#
@@ -38,3 +37,9 @@ name="pseudo" class="root" instance=0;
# /pseudo; it has as its children the zone console pseudo nodes.
#
name="zconsnex" parent="/pseudo" instance=1 valid-children="zcons";
+
+#
+# zfdnex is an alias for pseudo; this node is instantiated as a child of
+# /pseudo; it has as its children the zone fd pseudo nodes.
+#
+name="zfdnex" parent="/pseudo" instance=2 valid-children="zfd";
diff --git a/usr/src/uts/common/io/pseudonex.c b/usr/src/uts/common/io/pseudonex.c
index f83b0abf39..0ae06f88cc 100644
--- a/usr/src/uts/common/io/pseudonex.c
+++ b/usr/src/uts/common/io/pseudonex.c
@@ -83,6 +83,8 @@ static int pseudonex_detach(dev_info_t *, ddi_detach_cmd_t);
static int pseudonex_open(dev_t *, int, int, cred_t *);
static int pseudonex_close(dev_t, int, int, cred_t *);
static int pseudonex_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
+static int pseudonex_fm_init(dev_info_t *, dev_info_t *, int,
+ ddi_iblock_cookie_t *);
static int pseudonex_ctl(dev_info_t *, dev_info_t *, ddi_ctl_enum_t, void *,
void *);
@@ -90,6 +92,8 @@ static void *pseudonex_state;
typedef struct pseudonex_state {
dev_info_t *pnx_devi;
+ int pnx_fmcap;
+ ddi_iblock_cookie_t pnx_fm_ibc;
} pseudonex_state_t;
static struct bus_ops pseudonex_bus_ops = {
@@ -116,7 +120,7 @@ static struct bus_ops pseudonex_bus_ops = {
NULL, /* bus_intr_ctl */
NULL, /* bus_config */
NULL, /* bus_unconfig */
- NULL, /* bus_fm_init */
+ pseudonex_fm_init, /* bus_fm_init */
NULL, /* bus_fm_fini */
NULL, /* bus_fm_access_enter */
NULL, /* bus_fm_access_exit */
@@ -228,6 +232,9 @@ pseudonex_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
pnx_state = ddi_get_soft_state(pseudonex_state, instance);
pnx_state->pnx_devi = devi;
+ pnx_state->pnx_fmcap = DDI_FM_EREPORT_CAPABLE;
+ ddi_fm_init(devi, &pnx_state->pnx_fmcap, &pnx_state->pnx_fm_ibc);
+
if (ddi_create_minor_node(devi, "devctl", S_IFCHR, instance,
DDI_NT_NEXUS, 0) != DDI_SUCCESS) {
ddi_remove_minor_node(devi, NULL);
@@ -247,6 +254,10 @@ pseudonex_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
if (cmd == DDI_SUSPEND)
return (DDI_SUCCESS);
+ if (cmd != DDI_DETACH)
+ return (DDI_FAILURE);
+
+ ddi_fm_fini(devi);
ddi_remove_minor_node(devi, NULL);
ddi_soft_state_free(pseudonex_state, instance);
return (DDI_SUCCESS);
@@ -375,6 +386,19 @@ pseudonex_auto_assign(dev_info_t *child)
}
static int
+pseudonex_fm_init(dev_info_t *dip, dev_info_t *tdip, int cap,
+ ddi_iblock_cookie_t *ibc)
+{
+ pseudonex_state_t *pnx_state;
+
+ pnx_state = ddi_get_soft_state(pseudonex_state, ddi_get_instance(dip));
+ ASSERT(pnx_state != NULL);
+ ASSERT(ibc != NULL);
+ *ibc = pnx_state->pnx_fm_ibc;
+ return (pnx_state->pnx_fmcap & cap);
+}
+
+static int
pseudonex_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop,
void *arg, void *result)
{
diff --git a/usr/src/uts/common/io/ptm.c b/usr/src/uts/common/io/ptm.c
index 400e9ffd10..07ffddc123 100644
--- a/usr/src/uts/common/io/ptm.c
+++ b/usr/src/uts/common/io/ptm.c
@@ -447,6 +447,18 @@ ptmclose(queue_t *rqp, int flag, cred_t *credp)
return (0);
}
+static boolean_t
+ptmptsopencb(ptmptsopencb_arg_t arg)
+{
+ struct pt_ttys *ptmp = (struct pt_ttys *)arg;
+ boolean_t rval;
+
+ PT_ENTER_READ(ptmp);
+ rval = (ptmp->pt_nullmsg != NULL);
+ PT_EXIT_READ(ptmp);
+ return (rval);
+}
+
/*
* The wput procedure will only handle ioctl and flush messages.
*/
@@ -574,6 +586,41 @@ ptmwput(queue_t *qp, mblk_t *mp)
miocack(qp, mp, 0, 0);
break;
}
+ case PTMPTSOPENCB:
+ {
+ mblk_t *dp; /* ioctl reply data */
+ ptmptsopencb_t *ppocb;
+
+ /* only allow the kernel to invoke this ioctl */
+ if (iocp->ioc_cr != kcred) {
+ miocnak(qp, mp, 0, EINVAL);
+ break;
+ }
+
+ /* we don't support transparent ioctls */
+ ASSERT(iocp->ioc_count != TRANSPARENT);
+ if (iocp->ioc_count == TRANSPARENT) {
+ miocnak(qp, mp, 0, EINVAL);
+ break;
+ }
+
+ /* allocate a response message */
+ dp = allocb(sizeof (ptmptsopencb_t), BPRI_MED);
+ if (dp == NULL) {
+ miocnak(qp, mp, 0, EAGAIN);
+ break;
+ }
+
+ /* initialize the ioctl results */
+ ppocb = (ptmptsopencb_t *)dp->b_rptr;
+ ppocb->ppocb_func = ptmptsopencb;
+ ppocb->ppocb_arg = (ptmptsopencb_arg_t)ptmp;
+
+ /* send the reply data */
+ mioc2ack(mp, dp, sizeof (ptmptsopencb_t), 0);
+ qreply(qp, mp);
+ break;
+ }
}
break;
diff --git a/usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas.c b/usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas.c
index 72c8800f3e..dc5e8eafc9 100644
--- a/usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas.c
+++ b/usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas.c
@@ -22,7 +22,7 @@
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2016 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
* Copyright 2014 OmniTI Computer Consulting, Inc. All rights reserved.
* Copyright (c) 2014, Tegile Systems Inc. All rights reserved.
*/
@@ -72,6 +72,7 @@
#include <sys/file.h>
#include <sys/policy.h>
#include <sys/model.h>
+#include <sys/refhash.h>
#include <sys/sysevent.h>
#include <sys/sysevent/eventdefs.h>
#include <sys/sysevent/dr.h>
@@ -99,7 +100,6 @@
#include <sys/scsi/adapters/mpt_sas/mptsas_var.h>
#include <sys/scsi/adapters/mpt_sas/mptsas_ioctl.h>
#include <sys/scsi/adapters/mpt_sas/mptsas_smhba.h>
-#include <sys/scsi/adapters/mpt_sas/mptsas_hash.h>
#include <sys/raidioctl.h>
#include <sys/fs/dv_node.h> /* devfs_clean */
diff --git a/usr/src/uts/common/io/scsi/targets/sd.c b/usr/src/uts/common/io/scsi/targets/sd.c
index ae1e7e0fc3..dc5dc22e37 100644
--- a/usr/src/uts/common/io/scsi/targets/sd.c
+++ b/usr/src/uts/common/io/scsi/targets/sd.c
@@ -3503,9 +3503,13 @@ sd_set_mmc_caps(sd_ssc_t *ssc)
* according to the successful response to the page
* 0x2A mode sense request.
*/
- scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
- "sd_set_mmc_caps: Mode Sense returned "
- "invalid block descriptor length\n");
+ /*
+ * The following warning occurs due to the KVM CD-ROM
+ * mishandling the multi-media commands. Ignore it.
+ * scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
+ * "sd_set_mmc_caps: Mode Sense returned "
+ * "invalid block descriptor length\n");
+ */
kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
return;
}
@@ -4450,18 +4454,77 @@ sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen)
{
struct scsi_inquiry *sd_inq;
int rval = SD_SUCCESS;
+ char *p;
+ int chk_vidlen = 0, chk_pidlen = 0;
+ int has_tail = 0;
+ static const int VSZ = sizeof (sd_inq->inq_vid);
+ static const int PSZ = sizeof (sd_inq->inq_pid);
ASSERT(un != NULL);
sd_inq = un->un_sd->sd_inq;
ASSERT(id != NULL);
/*
- * We use the inq_vid as a pointer to a buffer containing the
- * vid and pid and use the entire vid/pid length of the table
- * entry for the comparison. This works because the inq_pid
- * data member follows inq_vid in the scsi_inquiry structure.
+ * We would like to use the inq_vid as a pointer to a buffer
+ * containing the vid and pid and use the entire vid/pid length of
+ * the table entry for the comparison. However, this does not work
+ * because, while the inq_pid data member follows inq_vid in the
+ * scsi_inquiry structure, we do not control the contents of this
+ * buffer, and some broken devices violate SPC 4.3.1 and return
+ * fields with null bytes in them.
+ */
+ chk_vidlen = MIN(VSZ, idlen);
+ p = id + chk_vidlen - 1;
+ while (*p == ' ' && chk_vidlen > 0) {
+ --p;
+ --chk_vidlen;
+ }
+
+ /*
+ * If it's all spaces, check the whole thing.
*/
- if (strncasecmp(sd_inq->inq_vid, id, idlen) != 0) {
+ if (chk_vidlen == 0)
+ chk_vidlen = MIN(VSZ, idlen);
+
+ if (idlen > VSZ) {
+ chk_pidlen = idlen - VSZ;
+ p = id + idlen - 1;
+ while (*p == ' ' && chk_pidlen > 0) {
+ --p;
+ --chk_pidlen;
+ }
+ if (chk_pidlen == 0)
+ chk_pidlen = MIN(PSZ, idlen - VSZ);
+ }
+
+ /*
+ * There's one more thing we need to do here. If the user specified
+ * an ID with trailing spaces, we need to make sure the inquiry
+ * vid/pid has only spaces or NULs after the check length; otherwise, it
+ * can't match.
+ */
+ if (idlen > chk_vidlen && chk_vidlen < VSZ) {
+ for (p = sd_inq->inq_vid + chk_vidlen;
+ p < sd_inq->inq_vid + VSZ; ++p) {
+ if (*p != ' ' && *p != '\0') {
+ ++has_tail;
+ break;
+ }
+ }
+ }
+ if (idlen > chk_pidlen + VSZ && chk_pidlen < PSZ) {
+ for (p = sd_inq->inq_pid + chk_pidlen;
+ p < sd_inq->inq_pid + PSZ; ++p) {
+ if (*p != ' ' && *p != '\0') {
+ ++has_tail;
+ break;
+ }
+ }
+ }
+
+ if (has_tail || strncasecmp(sd_inq->inq_vid, id, chk_vidlen) != 0 ||
+ (idlen > VSZ &&
+ strncasecmp(sd_inq->inq_pid, id + VSZ, chk_pidlen) != 0)) {
/*
* The user id string is compared to the inquiry vid/pid
* using a case insensitive comparison and ignoring
diff --git a/usr/src/uts/common/io/signalfd.c b/usr/src/uts/common/io/signalfd.c
index 32f8f85f7a..4ab4f36d4e 100644
--- a/usr/src/uts/common/io/signalfd.c
+++ b/usr/src/uts/common/io/signalfd.c
@@ -10,7 +10,7 @@
*/
/*
- * Copyright 2015 Joyent, Inc.
+ * Copyright 2016 Joyent, Inc.
*/
/*
@@ -19,97 +19,73 @@
*
* As described on the signalfd(3C) man page, the general idea behind these
* file descriptors is that they can be used to synchronously consume signals
- * via the read(2) syscall. That capability already exists with the
- * sigwaitinfo(3C) function but the key advantage of signalfd is that, because
- * it is file descriptor based, poll(2) can be used to determine when signals
- * are available to be consumed.
+ * via the read(2) syscall. While that capability already exists with the
+ * sigwaitinfo(3C) function, signalfd holds an advantage since it is file
+ * descriptor based: It is able use the event facilities (poll(2), /dev/poll,
+ * event ports) to notify interested parties when consumable signals arrive.
*
- * The general implementation uses signalfd_state to hold both the signal set
- * and poll head for an open file descriptor. Because a process can be using
- * different sigfds with different signal sets, each signalfd_state poll head
- * can be thought of as an independent signal stream and the thread(s) waiting
- * on that stream will get poll notification when any signal in the
- * corresponding set is received.
+ * The signalfd lifecycle begins When a process opens /dev/signalfd. A minor
+ * will be allocated for them along with an associated signalfd_state_t struct.
+ * It is there where the mask of desired signals resides.
*
- * The sigfd_proc_state_t struct lives on the proc_t and maintains per-proc
- * state for function callbacks and data when the proc needs to do work during
- * signal delivery for pollwakeup.
+ * Reading from the signalfd is straightforward and mimics the kernel behavior
+ * for sigtimedwait(). Signals continue to live on either the proc's p_sig, or
+ * thread's t_sig, member. During a read operation, those which match the mask
+ * are consumed so they are no longer pending.
*
- * The read side of the implementation is straightforward and mimics the
- * kernel behavior for sigtimedwait(). Signals continue to live on either
- * the proc's p_sig, or thread's t_sig, member. Read consumes the signal so
- * that it is no longer pending.
+ * The poll side is more complex. Every time a signal is delivered, all of the
+ * signalfds on the process need to be examined in order to pollwake threads
+ * waiting for signal arrival.
*
- * The poll side is more complex since all of the sigfds on the process need
- * to be examined every time a signal is delivered to the process in order to
- * pollwake any thread waiting in poll for that signal.
+ * When a thread polling on a signalfd requires a pollhead, several steps must
+ * be taken to safely ensure the proper result. A sigfd_proc_state_t is
+ * created for the calling process if it does not yet exist. It is there where
+ * a list of sigfd_poll_waiter_t structures reside which associate pollheads to
+ * signalfd_state_t entries. The sigfd_proc_state_t list is walked to find a
+ * sigfd_poll_waiter_t matching the signalfd_state_t which corresponds to the
+ * polled resource. If one is found, it is reused. Otherwise a new one is
+ * created, incrementing the refcount on the signalfd_state_t, and it is added
+ * to the sigfd_poll_waiter_t list.
*
- * Because it is likely that a process will only be using one, or a few, sigfds,
- * but many total file descriptors, we maintain a list of sigfds which need
- * pollwakeup. The list lives on the proc's p_sigfd struct. In this way only
- * zero, or a few, of the state structs will need to be examined every time a
- * signal is delivered to the process, instead of having to examine all of the
- * file descriptors to find the state structs. When a state struct with a
- * matching signal set is found then pollwakeup is called.
+ * The complications imposed by fork(2) are why the pollhead is stored in the
+ * associated sigfd_poll_waiter_t instead of directly in the signalfd_state_t.
+ * More than one process can hold a reference to the signalfd at a time but
+ * arriving signals should wake only process-local pollers. Additionally,
+ * signalfd_close is called only when the last referencing fd is closed, hiding
+ * occurrences of preceeding threads which released their references. This
+ * necessitates reference counting on the signalfd_state_t so it is able to
+ * persist after close until all poll references have been cleansed. Doing so
+ * ensures that blocked pollers which hold references to the signalfd_state_t
+ * will be able to do clean-up after the descriptor itself has been closed.
*
- * The sigfd_list is self-cleaning; as signalfd_pollwake_cb is called, the list
- * will clear out on its own. There is an exit helper (signalfd_exit_helper)
- * which cleans up any remaining per-proc state when the process exits.
+ * When a signal arrives in a process polling on signalfd, signalfd_pollwake_cb
+ * is called via the pointer in sigfd_proc_state_t. It will walk over the
+ * sigfd_poll_waiter_t entries present in the list, searching for any
+ * associated with a signalfd_state_t with a matching signal mask. The
+ * approach of keeping the poller list in p_sigfd was chosen because a process
+ * is likely to use few signalfds relative to its total file descriptors. It
+ * reduces the work required for each received signal.
*
- * The main complexity with signalfd is the interaction of forking and polling.
- * This interaction is complex because now two processes have a fd that
- * references the same dev_t (and its associated signalfd_state), but signals
- * go to only one of those processes. Also, we don't know when one of the
- * processes closes its fd because our 'close' entry point is only called when
- * the last fd is closed (which could be by either process).
+ * The sigfd_list is self-cleaning; as signalfd_pollwake_cb is called, the list
+ * will clear out on its own. Any remaining per-process state which remains
+ * will be cleaned up by the exit helper (signalfd_exit_helper).
*
- * Because the state struct is referenced by both file descriptors, and the
- * state struct represents a signal stream needing a pollwakeup, if both
- * processes were polling then both processes would get a pollwakeup when a
- * signal arrives for either process (that is, the pollhead is associated with
- * our dev_t so when a signal arrives the pollwakeup wakes up all waiters).
+ * The structures associated with signalfd state are designed to operate
+ * correctly across fork, but there is one caveat that applies. Using
+ * fork-shared signalfd descriptors in conjuction with fork-shared caching poll
+ * descriptors (such as /dev/poll or event ports) will result in missed poll
+ * wake-ups. This is caused by the pollhead identity of signalfd descriptors
+ * being dependent on the process they are polled from. Because it has a
+ * thread-local cache, poll(2) is unaffected by this limitation.
*
- * Fortunately this is not a common problem in practice, but the implementation
- * attempts to mitigate unexpected behavior. The typical behavior is that the
- * parent has been polling the signalfd (which is why it was open in the first
- * place) and the parent might have a pending signalfd_state (with the
- * pollhead) on its per-process sigfd_list. After the fork the child will
- * simply close that fd (among others) as part of the typical fork/close/exec
- * pattern. Because the child will never poll that fd, it will never get any
- * state onto its own sigfd_list (the child starts with a null list). The
- * intention is that the child sees no pollwakeup activity for signals unless
- * it explicitly reinvokes poll on the sigfd.
+ * Lock ordering:
*
- * As background, there are two primary polling cases to consider when the
- * parent process forks:
- * 1) If any thread is blocked in poll(2) then both the parent and child will
- * return from the poll syscall with EINTR. This means that if either
- * process wants to re-poll on a sigfd then it needs to re-run poll and
- * would come back in to the signalfd_poll entry point. The parent would
- * already have the dev_t's state on its sigfd_list and the child would not
- * have anything there unless it called poll again on its fd.
- * 2) If the process is using /dev/poll(7D) then the polling info is being
- * cached by the poll device and the process might not currently be blocked
- * on anything polling related. A subsequent DP_POLL ioctl will not invoke
- * our signalfd_poll entry point again. Because the parent still has its
- * sigfd_list setup, an incoming signal will hit our signalfd_pollwake_cb
- * entry point, which in turn calls pollwake, and /dev/poll will do the
- * right thing on DP_POLL. The child will not have a sigfd_list yet so the
- * signal will not cause a pollwakeup. The dp code does its own handling for
- * cleaning up its cache.
+ * 1. signalfd_lock
+ * 2. signalfd_state_t`sfd_lock
*
- * This leaves only one odd corner case. If the parent and child both use
- * the dup-ed sigfd to poll then when a signal is delivered to either process
- * there is no way to determine which one should get the pollwakeup (since
- * both processes will be queued on the same signal stream poll head). What
- * happens in this case is that both processes will return from poll, but only
- * one of them will actually have a signal to read. The other will return
- * from read with EAGAIN, or block. This case is actually similar to the
- * situation within a single process which got two different sigfd's with the
- * same mask (or poll on two fd's that are dup-ed). Both would return from poll
- * when a signal arrives but only one read would consume the signal and the
- * other read would fail or block. Applications which poll on shared fd's
- * cannot assume that a subsequent read will actually obtain data.
+ * 1. proc_t`p_lock (to walk p_sigfd)
+ * 2. signalfd_state_t`sfd_lock
+ * 2a. signalfd_lock (after sfd_lock is dropped, when sfd_count falls to 0)
*/
#include <sys/ddi.h>
@@ -123,118 +99,150 @@
#include <sys/schedctl.h>
#include <sys/id_space.h>
#include <sys/sdt.h>
+#include <sys/brand.h>
typedef struct signalfd_state signalfd_state_t;
struct signalfd_state {
- kmutex_t sfd_lock; /* lock protecting state */
- pollhead_t sfd_pollhd; /* poll head */
- k_sigset_t sfd_set; /* signals for this fd */
- signalfd_state_t *sfd_next; /* next state on global list */
+ list_node_t sfd_list; /* node in global list */
+ kmutex_t sfd_lock; /* protects fields below */
+ uint_t sfd_count; /* ref count */
+ boolean_t sfd_valid; /* valid while open */
+ k_sigset_t sfd_set; /* signals for this fd */
};
+typedef struct sigfd_poll_waiter {
+ list_node_t spw_list;
+ signalfd_state_t *spw_state;
+ pollhead_t spw_pollhd;
+} sigfd_poll_waiter_t;
+
/*
- * Internal global variables.
+ * Protects global state in signalfd_devi, signalfd_minor, signalfd_softstate,
+ * and signalfd_state (including sfd_list field of members)
*/
-static kmutex_t signalfd_lock; /* lock protecting state */
+static kmutex_t signalfd_lock;
static dev_info_t *signalfd_devi; /* device info */
static id_space_t *signalfd_minor; /* minor number arena */
static void *signalfd_softstate; /* softstate pointer */
-static signalfd_state_t *signalfd_state; /* global list of state */
+static list_t signalfd_state; /* global list of state */
+
-/*
- * If we don't already have an entry in the proc's list for this state, add one.
- */
static void
-signalfd_wake_list_add(signalfd_state_t *state)
+signalfd_state_enter(signalfd_state_t *state)
{
- proc_t *p = curproc;
- list_t *lst;
- sigfd_wake_list_t *wlp;
-
- ASSERT(MUTEX_HELD(&p->p_lock));
- ASSERT(p->p_sigfd != NULL);
+ ASSERT(MUTEX_HELD(&state->sfd_lock));
+ ASSERT(state->sfd_count > 0);
+ VERIFY(state->sfd_valid == B_TRUE);
- lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
- for (wlp = list_head(lst); wlp != NULL; wlp = list_next(lst, wlp)) {
- if (wlp->sigfd_wl_state == state)
- break;
- }
-
- if (wlp == NULL) {
- wlp = kmem_zalloc(sizeof (sigfd_wake_list_t), KM_SLEEP);
- wlp->sigfd_wl_state = state;
- list_insert_head(lst, wlp);
- }
+ state->sfd_count++;
}
static void
-signalfd_wake_rm(list_t *lst, sigfd_wake_list_t *wlp)
+signalfd_state_release(signalfd_state_t *state, boolean_t locked)
{
- list_remove(lst, wlp);
- kmem_free(wlp, sizeof (sigfd_wake_list_t));
+ ASSERT(MUTEX_HELD(&state->sfd_lock));
+ ASSERT(state->sfd_count > 0);
+
+ if (state->sfd_count == 1) {
+ VERIFY(state->sfd_valid == B_FALSE);
+ mutex_exit(&state->sfd_lock);
+ if (locked) {
+ ASSERT(MUTEX_HELD(&signalfd_lock));
+ list_remove(&signalfd_state, state);
+ } else {
+ ASSERT(MUTEX_NOT_HELD(&signalfd_lock));
+ mutex_enter(&signalfd_lock);
+ list_remove(&signalfd_state, state);
+ mutex_exit(&signalfd_lock);
+ }
+ kmem_free(state, sizeof (*state));
+ return;
+ }
+ state->sfd_count--;
+ mutex_exit(&state->sfd_lock);
}
-static void
-signalfd_wake_list_rm(proc_t *p, signalfd_state_t *state)
+static sigfd_poll_waiter_t *
+signalfd_wake_list_add(sigfd_proc_state_t *pstate, signalfd_state_t *state)
{
- sigfd_wake_list_t *wlp;
- list_t *lst;
+ list_t *lst = &pstate->sigfd_list;
+ sigfd_poll_waiter_t *pw;
- ASSERT(MUTEX_HELD(&p->p_lock));
+ for (pw = list_head(lst); pw != NULL; pw = list_next(lst, pw)) {
+ if (pw->spw_state == state)
+ break;
+ }
- if (p->p_sigfd == NULL)
- return;
+ if (pw == NULL) {
+ pw = kmem_zalloc(sizeof (*pw), KM_SLEEP);
+
+ mutex_enter(&state->sfd_lock);
+ signalfd_state_enter(state);
+ pw->spw_state = state;
+ mutex_exit(&state->sfd_lock);
+ list_insert_head(lst, pw);
+ }
+ return (pw);
+}
+
+static sigfd_poll_waiter_t *
+signalfd_wake_list_rm(sigfd_proc_state_t *pstate, signalfd_state_t *state)
+{
+ list_t *lst = &pstate->sigfd_list;
+ sigfd_poll_waiter_t *pw;
- lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
- for (wlp = list_head(lst); wlp != NULL; wlp = list_next(lst, wlp)) {
- if (wlp->sigfd_wl_state == state) {
- signalfd_wake_rm(lst, wlp);
+ for (pw = list_head(lst); pw != NULL; pw = list_next(lst, pw)) {
+ if (pw->spw_state == state) {
break;
}
}
- if (list_is_empty(lst)) {
- ((sigfd_proc_state_t *)p->p_sigfd)->sigfd_pollwake_cb = NULL;
- list_destroy(lst);
- kmem_free(p->p_sigfd, sizeof (sigfd_proc_state_t));
- p->p_sigfd = NULL;
+ if (pw != NULL) {
+ list_remove(lst, pw);
+ mutex_enter(&state->sfd_lock);
+ signalfd_state_release(state, B_FALSE);
+ pw->spw_state = NULL;
}
+
+ return (pw);
}
static void
signalfd_wake_list_cleanup(proc_t *p)
{
- sigfd_wake_list_t *wlp;
+ sigfd_proc_state_t *pstate = p->p_sigfd;
+ sigfd_poll_waiter_t *pw;
list_t *lst;
ASSERT(MUTEX_HELD(&p->p_lock));
+ ASSERT(pstate != NULL);
+
+ lst = &pstate->sigfd_list;
+ while ((pw = list_remove_head(lst)) != NULL) {
+ signalfd_state_t *state = pw->spw_state;
- ((sigfd_proc_state_t *)p->p_sigfd)->sigfd_pollwake_cb = NULL;
+ pw->spw_state = NULL;
+ mutex_enter(&state->sfd_lock);
+ signalfd_state_release(state, B_FALSE);
- lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
- while (!list_is_empty(lst)) {
- wlp = (sigfd_wake_list_t *)list_remove_head(lst);
- kmem_free(wlp, sizeof (sigfd_wake_list_t));
+ pollwakeup(&pw->spw_pollhd, POLLERR);
+ pollhead_clean(&pw->spw_pollhd);
+ kmem_free(pw, sizeof (*pw));
}
+ list_destroy(lst);
+
+ p->p_sigfd = NULL;
+ kmem_free(pstate, sizeof (*pstate));
}
static void
signalfd_exit_helper(void)
{
proc_t *p = curproc;
- list_t *lst;
-
- /* This being non-null is the only way we can get here */
- ASSERT(p->p_sigfd != NULL);
mutex_enter(&p->p_lock);
- lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
-
signalfd_wake_list_cleanup(p);
- list_destroy(lst);
- kmem_free(p->p_sigfd, sizeof (sigfd_proc_state_t));
- p->p_sigfd = NULL;
mutex_exit(&p->p_lock);
}
@@ -254,35 +262,40 @@ static void
signalfd_pollwake_cb(void *arg0, int sig)
{
proc_t *p = (proc_t *)arg0;
+ sigfd_proc_state_t *pstate = (sigfd_proc_state_t *)p->p_sigfd;
list_t *lst;
- sigfd_wake_list_t *wlp;
+ sigfd_poll_waiter_t *pw;
ASSERT(MUTEX_HELD(&p->p_lock));
+ ASSERT(pstate != NULL);
- if (p->p_sigfd == NULL)
- return;
-
- lst = &((sigfd_proc_state_t *)p->p_sigfd)->sigfd_list;
- wlp = list_head(lst);
- while (wlp != NULL) {
- signalfd_state_t *state = wlp->sigfd_wl_state;
+ lst = &pstate->sigfd_list;
+ pw = list_head(lst);
+ while (pw != NULL) {
+ signalfd_state_t *state = pw->spw_state;
+ sigfd_poll_waiter_t *next;
+ short pollev;
mutex_enter(&state->sfd_lock);
-
- if (sigismember(&state->sfd_set, sig) &&
- state->sfd_pollhd.ph_list != NULL) {
- sigfd_wake_list_t *tmp = wlp;
-
- /* remove it from the list */
- wlp = list_next(lst, wlp);
- signalfd_wake_rm(lst, tmp);
-
- mutex_exit(&state->sfd_lock);
- pollwakeup(&state->sfd_pollhd, POLLRDNORM | POLLIN);
+ if (!state->sfd_valid) {
+ pollev = POLLERR;
+ } else if (sigismember(&state->sfd_set, sig)) {
+ pollev = POLLRDNORM | POLLIN;
} else {
mutex_exit(&state->sfd_lock);
- wlp = list_next(lst, wlp);
+ pw = list_next(lst, pw);
+ continue;
}
+
+ signalfd_state_release(state, B_FALSE);
+ pw->spw_state = NULL;
+ pollwakeup(&pw->spw_pollhd, pollev);
+ pollhead_clean(&pw->spw_pollhd);
+
+ next = list_next(lst, pw);
+ list_remove(lst, pw);
+ kmem_free(pw, sizeof (*pw));
+ pw = next;
}
}
@@ -290,7 +303,7 @@ _NOTE(ARGSUSED(1))
static int
signalfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
{
- signalfd_state_t *state;
+ signalfd_state_t *state, **sstate;
major_t major = getemajor(*devp);
minor_t minor = getminor(*devp);
@@ -300,18 +313,20 @@ signalfd_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
mutex_enter(&signalfd_lock);
minor = (minor_t)id_allocff(signalfd_minor);
-
if (ddi_soft_state_zalloc(signalfd_softstate, minor) != DDI_SUCCESS) {
id_free(signalfd_minor, minor);
mutex_exit(&signalfd_lock);
return (ENODEV);
}
- state = ddi_get_soft_state(signalfd_softstate, minor);
- *devp = makedevice(major, minor);
+ state = kmem_zalloc(sizeof (*state), KM_SLEEP);
+ state->sfd_valid = B_TRUE;
+ state->sfd_count = 1;
+ list_insert_head(&signalfd_state, (void *)state);
- state->sfd_next = signalfd_state;
- signalfd_state = state;
+ sstate = ddi_get_soft_state(signalfd_softstate, minor);
+ *sstate = state;
+ *devp = makedevice(major, minor);
mutex_exit(&signalfd_lock);
@@ -405,6 +420,9 @@ consume_signal(k_sigset_t set, uio_t *uio, boolean_t block)
lwp->lwp_extsig = 0;
mutex_exit(&p->p_lock);
+ if (PROC_IS_BRANDED(p) && BROP(p)->b_sigfd_translate)
+ BROP(p)->b_sigfd_translate(infop);
+
/* Convert k_siginfo into external, datamodel independent, struct. */
bzero(ssp, sizeof (*ssp));
ssp->ssi_signo = infop->si_signo;
@@ -439,7 +457,7 @@ _NOTE(ARGSUSED(2))
static int
signalfd_read(dev_t dev, uio_t *uio, cred_t *cr)
{
- signalfd_state_t *state;
+ signalfd_state_t *state, **sstate;
minor_t minor = getminor(dev);
boolean_t block = B_TRUE;
k_sigset_t set;
@@ -449,7 +467,8 @@ signalfd_read(dev_t dev, uio_t *uio, cred_t *cr)
if (uio->uio_resid < sizeof (signalfd_siginfo_t))
return (EINVAL);
- state = ddi_get_soft_state(signalfd_softstate, minor);
+ sstate = ddi_get_soft_state(signalfd_softstate, minor);
+ state = *sstate;
if (uio->uio_fmode & (FNDELAY|FNONBLOCK))
block = B_FALSE;
@@ -462,15 +481,26 @@ signalfd_read(dev_t dev, uio_t *uio, cred_t *cr)
return (set_errno(EINVAL));
do {
- res = consume_signal(state->sfd_set, uio, block);
- if (res == 0)
- got_one = B_TRUE;
+ res = consume_signal(set, uio, block);
- /*
- * After consuming one signal we won't block trying to consume
- * further signals.
- */
- block = B_FALSE;
+ if (res == 0) {
+ /*
+ * After consuming one signal, do not block while
+ * trying to consume more.
+ */
+ got_one = B_TRUE;
+ block = B_FALSE;
+
+ /*
+ * Refresh the matching signal set in case it was
+ * updated during the wait.
+ */
+ mutex_enter(&state->sfd_lock);
+ set = state->sfd_set;
+ mutex_exit(&state->sfd_lock);
+ if (sigisempty(&set))
+ break;
+ }
} while (res == 0 && uio->uio_resid >= sizeof (signalfd_siginfo_t));
if (got_one)
@@ -499,13 +529,14 @@ static int
signalfd_poll(dev_t dev, short events, int anyyet, short *reventsp,
struct pollhead **phpp)
{
- signalfd_state_t *state;
+ signalfd_state_t *state, **sstate;
minor_t minor = getminor(dev);
kthread_t *t = curthread;
proc_t *p = ttoproc(t);
short revents = 0;
- state = ddi_get_soft_state(signalfd_softstate, minor);
+ sstate = ddi_get_soft_state(signalfd_softstate, minor);
+ state = *sstate;
mutex_enter(&state->sfd_lock);
@@ -515,39 +546,36 @@ signalfd_poll(dev_t dev, short events, int anyyet, short *reventsp,
mutex_exit(&state->sfd_lock);
if (!(*reventsp = revents & events) && !anyyet) {
- *phpp = &state->sfd_pollhd;
+ sigfd_proc_state_t *pstate;
+ sigfd_poll_waiter_t *pw;
/*
* Enable pollwakeup handling.
*/
- if (p->p_sigfd == NULL) {
- sigfd_proc_state_t *pstate;
+ mutex_enter(&p->p_lock);
+ if ((pstate = (sigfd_proc_state_t *)p->p_sigfd) == NULL) {
- pstate = kmem_zalloc(sizeof (sigfd_proc_state_t),
- KM_SLEEP);
+ mutex_exit(&p->p_lock);
+ pstate = kmem_zalloc(sizeof (*pstate), KM_SLEEP);
list_create(&pstate->sigfd_list,
- sizeof (sigfd_wake_list_t),
- offsetof(sigfd_wake_list_t, sigfd_wl_lst));
+ sizeof (sigfd_poll_waiter_t),
+ offsetof(sigfd_poll_waiter_t, spw_list));
+ pstate->sigfd_pollwake_cb = signalfd_pollwake_cb;
+ /* Check again, after blocking for the alloc. */
mutex_enter(&p->p_lock);
- /* check again now that we're locked */
if (p->p_sigfd == NULL) {
p->p_sigfd = pstate;
} else {
/* someone beat us to it */
list_destroy(&pstate->sigfd_list);
- kmem_free(pstate, sizeof (sigfd_proc_state_t));
+ kmem_free(pstate, sizeof (*pstate));
+ pstate = p->p_sigfd;
}
- mutex_exit(&p->p_lock);
}
- mutex_enter(&p->p_lock);
- if (((sigfd_proc_state_t *)p->p_sigfd)->sigfd_pollwake_cb ==
- NULL) {
- ((sigfd_proc_state_t *)p->p_sigfd)->sigfd_pollwake_cb =
- signalfd_pollwake_cb;
- }
- signalfd_wake_list_add(state);
+ pw = signalfd_wake_list_add(pstate, state);
+ *phpp = &pw->spw_pollhd;
mutex_exit(&p->p_lock);
}
@@ -558,11 +586,12 @@ _NOTE(ARGSUSED(4))
static int
signalfd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
{
- signalfd_state_t *state;
+ signalfd_state_t *state, **sstate;
minor_t minor = getminor(dev);
sigset_t mask;
- state = ddi_get_soft_state(signalfd_softstate, minor);
+ sstate = ddi_get_soft_state(signalfd_softstate, minor);
+ state = *sstate;
switch (cmd) {
case SIGNALFDIOC_MASK:
@@ -587,33 +616,42 @@ _NOTE(ARGSUSED(1))
static int
signalfd_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
{
- signalfd_state_t *state, **sp;
+ signalfd_state_t *state, **sstate;
+ sigfd_poll_waiter_t *pw = NULL;
minor_t minor = getminor(dev);
proc_t *p = curproc;
- state = ddi_get_soft_state(signalfd_softstate, minor);
-
- if (state->sfd_pollhd.ph_list != NULL) {
- pollwakeup(&state->sfd_pollhd, POLLERR);
- pollhead_clean(&state->sfd_pollhd);
- }
+ sstate = ddi_get_soft_state(signalfd_softstate, minor);
+ state = *sstate;
- /* Make sure our state is removed from our proc's pollwake list. */
+ /* Make sure state is removed from this proc's pollwake list. */
mutex_enter(&p->p_lock);
- signalfd_wake_list_rm(p, state);
- mutex_exit(&p->p_lock);
+ if (p->p_sigfd != NULL) {
+ sigfd_proc_state_t *pstate = p->p_sigfd;
- mutex_enter(&signalfd_lock);
+ pw = signalfd_wake_list_rm(pstate, state);
+ if (list_is_empty(&pstate->sigfd_list)) {
+ signalfd_wake_list_cleanup(p);
+ }
+ }
+ mutex_exit(&p->p_lock);
- /* Remove our state from our global list. */
- for (sp = &signalfd_state; *sp != state; sp = &((*sp)->sfd_next))
- VERIFY(*sp != NULL);
+ if (pw != NULL) {
+ pollwakeup(&pw->spw_pollhd, POLLERR);
+ pollhead_clean(&pw->spw_pollhd);
+ kmem_free(pw, sizeof (*pw));
+ }
- *sp = (*sp)->sfd_next;
+ mutex_enter(&signalfd_lock);
+ *sstate = NULL;
ddi_soft_state_free(signalfd_softstate, minor);
id_free(signalfd_minor, minor);
+ mutex_enter(&state->sfd_lock);
+ state->sfd_valid = B_FALSE;
+ signalfd_state_release(state, B_TRUE);
+
mutex_exit(&signalfd_lock);
return (0);
@@ -635,7 +673,7 @@ signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
}
if (ddi_soft_state_init(&signalfd_softstate,
- sizeof (signalfd_state_t), 0) != 0) {
+ sizeof (signalfd_state_t *), 0) != 0) {
cmn_err(CE_WARN, "signalfd failed to create soft state");
id_space_destroy(signalfd_minor);
mutex_exit(&signalfd_lock);
@@ -656,6 +694,9 @@ signalfd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
sigfd_exit_helper = signalfd_exit_helper;
+ list_create(&signalfd_state, sizeof (signalfd_state_t),
+ offsetof(signalfd_state_t, sfd_list));
+
mutex_exit(&signalfd_lock);
return (DDI_SUCCESS);
@@ -673,10 +714,19 @@ signalfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
return (DDI_FAILURE);
}
- /* list should be empty */
- VERIFY(signalfd_state == NULL);
-
mutex_enter(&signalfd_lock);
+
+ if (!list_is_empty(&signalfd_state)) {
+ /*
+ * There are dangling poll waiters holding signalfd_state_t
+ * entries on the global list. Detach is not possible until
+ * they purge themselves.
+ */
+ return (DDI_FAILURE);
+ mutex_exit(&signalfd_lock);
+ }
+ list_destroy(&signalfd_state);
+
id_space_destroy(signalfd_minor);
ddi_remove_minor_node(signalfd_devi, NULL);
diff --git a/usr/src/uts/common/io/stream.c b/usr/src/uts/common/io/stream.c
index e9af19ca18..994ca8baa8 100644
--- a/usr/src/uts/common/io/stream.c
+++ b/usr/src/uts/common/io/stream.c
@@ -1451,6 +1451,16 @@ copyb(mblk_t *bp)
ndp = nbp->b_datap;
/*
+ * Copy the various checksum information that came in
+ * originally.
+ */
+ ndp->db_cksumstart = dp->db_cksumstart;
+ ndp->db_cksumend = dp->db_cksumend;
+ ndp->db_cksumstuff = dp->db_cksumstuff;
+ bcopy(dp->db_struioun.data, ndp->db_struioun.data,
+ sizeof (dp->db_struioun.data));
+
+ /*
* Well, here is a potential issue. If we are trying to
* trace a flow, and we copy the message, we might lose
* information about where this message might have been.
diff --git a/usr/src/uts/common/io/udmf/dm9601reg.h b/usr/src/uts/common/io/udmf/dm9601reg.h
new file mode 100644
index 0000000000..a36f2b0fc8
--- /dev/null
+++ b/usr/src/uts/common/io/udmf/dm9601reg.h
@@ -0,0 +1,348 @@
+/*
+ * %W% %E%
+ * Macro definitions for Davicom DM9601 USB to fast ethernet controler
+ * based on Davicom DM9601E data sheet
+ * This file is public domain. Coded by M.Murayama (KHF04453@nifty.com)
+ */
+
+#ifndef __DM9601_H__
+#define __DM9601_H__
+
+/*
+ * offset of registers
+ */
+#define NCR 0x00U /* network control register */
+#define NSR 0x01U /* network status register */
+#define TCR 0x02U /* tx control register */
+#define TSR1 0x03U /* tx status register 1 */
+#define TSR2 0x04U /* tx status register 2 */
+#define RCR 0x05U /* rx control register */
+#define RSR 0x06U /* rx status register */
+#define ROCR 0x07U /* rx overflow counter register */
+#define BPTR 0x08U /* back pressure threshold regster */
+#define FCTR 0x09U /* flow control threshold regster */
+#define FCR 0x0aU /* flow control threshold regster */
+#define EPCR 0x0bU /* eeprom & phy control register */
+#define EPAR 0x0cU /* eeprom & phy address register */
+#define EPDR 0x0dU /* eeprom & phy data register (2byte) */
+#define WCR 0x0fU /* wake up control register */
+#define PAR 0x10U /* physical address register (6byte) */
+#define MAR 0x16U /* multicast address register (8byte) */
+#define GPCR 0x1eU /* general purpose control register */
+#define GPR 0x1fU /* general purpose register */
+#define VID 0x28U /* vendor ID (2byte) */
+#define PID 0x2aU /* product ID (2byte) */
+#define CHIPR 0x2cU /* chip revision */
+#define USBDA 0xf0U /* usb device address register */
+#define RXC 0xf1U /* received packet counter register */
+#define TUSC 0xf2U /* tx packet counter/usb status register */
+#define USBC 0xf4U /* usb control register */
+
+/*
+ * register definitions
+ */
+/* network control register */
+#define NCR_EXT_PHY 0x80U /* 1: select external phy */
+#define NCR_WAKEEN 0x40U /* 1: wake up event enable */
+#define NCR_FCOL 0x10U /* force collision mode for test */
+#define NCR_FDX 0x08U /* 1: full duplex mode (for external phy) */
+#define NCR_LBK 0x06U
+#define NCR_LBK_SHIFT 1
+#define NCR_LBK_NORMAL (0U << NCR_LBK_SHIFT)
+#define NCR_LBK_MAC (1U << NCR_LBK_SHIFT)
+#define NCR_LBK_PHY_D (2U << NCR_LBK_SHIFT)
+#define NCR_LBK_PHY_A (3U << NCR_LBK_SHIFT)
+#define NCR_RST 0x01U /* 1: reset, auto clear */
+
+#define NCR_BITS \
+ "\020" \
+ "\010EXT_PHY" \
+ "\007WAKEEN" \
+ "\005FCOL" \
+ "\004FDX" \
+ "\001RST"
+
+/* network status register */
+#define NSR_SPEED 0x80U /* 1:10M 0:100M */
+#define NSR_LINKST 0x40U /* 1:ok 0:fail */
+#define NSR_WAKEST 0x20U /* 1:enabled */
+#define NSR_TXFULL 0x10U /* 1:tx fifo full */
+#define NSR_TX2END 0x08U /* tx packet2 complete status */
+#define NSR_TX1END 0x04U /* tx packet1 complete status */
+#define NSR_RXOV 0x02U /* rx fifo overflow */
+#define NSR_RXRDY 0x01U /* rx packet ready */
+
+#define NSR_BITS \
+ "\020" \
+ "\010SPEED_10" \
+ "\007LINKST_UP" \
+ "\006WAKEST" \
+ "\005TXFULL" \
+ "\004TX2END" \
+ "\003TX1END" \
+ "\002RXOV" \
+ "\001RXRDY"
+
+/* tx control register */
+#define TCR_TJDIS 0x40U /* tx jitter control */
+#define TCR_EXCEDM 0x20U /* excessive collision mode */
+#define TCR_PAD_DIS2 0x10U /* PAD appends disable for pkt2 */
+#define TCR_CRC_DIS2 0x08U /* CRC appends disable for pkt2 */
+#define TCR_PAD_DIS1 0x04U /* PAD appends disable for pkt1 */
+#define TCR_CRC_DIS1 0x02U /* CRC appends disable for pkt1 */
+
+#define TCR_BITS \
+ "\020" \
+ "\007TJDIS" \
+ "\006EXCEDM" \
+ "\005PAD_DIS2" \
+ "\004CRC_DIS2" \
+ "\003PAD_DIS1" \
+ "\002CRC_DIS1"
+
+/* tx status register (ro) */
+#define TSR_TJTO 0x80U /* tx jabber time out */
+#define TSR_LC 0x40U /* loss of carrier */
+#define TSR_NC 0x20U /* no carrier */
+#define TSR_LATEC 0x10U /* late collision */
+#define TSR_COL 0x08U /* late collision */
+#define TSR_EL 0x04U /* excessive collision */
+
+#define TSR_BITS \
+ "\020" \
+ "\010TJTO" \
+ "\007LC" \
+ "\006NC" \
+ "\005LATEC" \
+ "\004COL" \
+ "\003EL"
+
+/* rx control register */
+#define RCR_WTDIS 0x40U /* watch dog timer disable */
+#define RCR_DIS_LONG 0x20U /* discard longer packets than 1522 */
+#define RCR_DIS_CRC 0x10U /* discard crc error packets */
+#define RCR_ALL 0x08U /* pass all multicast */
+#define RCR_RUNT 0x04U /* pass runt packets */
+#define RCR_PRMSC 0x02U /* promiscuous mode */
+#define RCR_RXEN 0x01U /* rx enable */
+
+#define RCR_BITS \
+ "\020" \
+ "\007WTDIS" \
+ "\006DIS_LONG" \
+ "\005DIS_CRC" \
+ "\004ALL" \
+ "\003RUNT" \
+ "\002PRMSC" \
+ "\001RXEN"
+
+/* rx status register */
+#define RSR_RF 0x80U /* runt frame */
+#define RSR_MF 0x40U /* multicast frame */
+#define RSR_LCS 0x20U /* late collision seen */
+#define RSR_RWTO 0x10U /* receive watchdog timeout */
+#define RSR_PLE 0x08U /* physical layer error */
+#define RSR_AE 0x04U /* alignment error */
+#define RSR_CE 0x02U /* crc error */
+#define RSR_FOE 0x01U /* fifo overflow error */
+
+#define RSR_BITS \
+ "\020" \
+ "\010RF" \
+ "\007MF" \
+ "\006LCS" \
+ "\005RWTO" \
+ "\004PLE" \
+ "\003AE" \
+ "\002CE" \
+ "\001FOE"
+
+/* receive overflow counter register */
+#define ROCR_RXFU 0x80U /* receive overflow counter overflow */
+#define ROCR_ROC 0x7fU /* receive overflow counter */
+
+#define ROCR_BITS \
+ "\020" \
+ "\010RXFU"
+
+/* back pressure threshold register */
+#define BPTR_BPHW 0xf0U /* high water overflow threshold */
+#define BPTR_BPHW_SHIFT 4
+#define BPTR_BPHW_UNIT 1024U
+#define BPTR_BPHW_DEFAULT (3 << BPTR_BPHW_SHIFT) /* 3k */
+#define BPTR_JPT 0x0fU /* jam pattern time */
+#define BPTR_JPT_SHIFT 0
+#define BPTR_JPT_5us (0U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_10us (1U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_15us (2U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_25us (3U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_50us (4U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_100us (5U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_150us (6U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_200us (7U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_250us (8U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_300us (9U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_350us (10U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_400us (11U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_450us (12U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_500us (13U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_550us (14U << BPTR_JPT_SHIFT)
+#define BPTR_JPT_600us (15U << BPTR_JPT_SHIFT)
+
+/* flow control threshold register */
+#define FCTR_HWOT 0xf0U /* rx fifo high water overflow threshold */
+#define FCTR_HWOT_SHIFT 4
+#define FCTR_HWOT_UNIT 1024U
+#define FCTR_LWOT 0x0fU /* rx fifo low water overflow threshold */
+#define FCTR_LWOT_SHIFT 0
+#define FCTR_LWOT_UNIT 1024U
+
+/* rx/tx flow control register */
+#define FCR_TXPO 0x80U /* tx pause packet */
+#define FCR_TXPF 0x40U /* tx pause packet */
+#define FCR_TXPEN 0x20U /* tx pause packet */
+#define FCR_BKPA 0x10U /* back pressure mode */
+#define FCR_BKPM 0x08U /* back pressure mode */
+#define FCR_BKPS 0x04U /* rx pause packet current status (r/c) */
+#define FCR_RXPCS 0x02U /* rx pause packet current status (ro) */
+#define FCR_FLCE 0x01U /* flow control enbale */
+
+#define FCR_BITS \
+ "\020" \
+ "\000TXPO" \
+ "\000TXPF" \
+ "\000TXPEN" \
+ "\000BKPA" \
+ "\000BKPM" \
+ "\000BKPS" \
+ "\000RXPCS" \
+ "\000FLCE"
+
+/* EEPROM & PHY control register (0x0b) */
+#define EPCR_REEP 0x20U /* reload eeprom */
+#define EPCR_WEP 0x10U /* write eeprom enable */
+#define EPCR_EPOS 0x08U /* select device, 0:eeprom, 1:phy */
+#define EPCR_ERPRR 0x04U /* read command */
+#define EPCR_ERPRW 0x02U /* write command */
+#define EPCR_ERRE 0x01U /* eeprom/phy access in progress (ro) */
+
+#define EPCR_BITS \
+ "\020" \
+ "\005REEP" \
+ "\004WEP" \
+ "\003EPOS" \
+ "\002ERPRR" \
+ "\001ERPRW" \
+ "\000ERRE"
+
+/* EEPROM & PHY access register (0x0c) */
+#define EPAR_PHYADR 0xc0U /* phy address, internal phy(1) or external */
+#define EPAR_PHYADR_SHIFT 6
+#define EPAR_EROA 0x3fU /* eeprom word addr or phy register addr */
+#define EPAR_EROA_SHIFT 0
+
+/* EEPROM & PHY data register (0x0d(low)-0x0e(hi)) */
+
+/* wake up control register (0x0f) */
+#define WCR_LINKEN 0x20U /* enable link status event */
+#define WCR_SAMPLEEN 0x10U /* enable sample frame event */
+#define WCR_MAGICEN 0x08U /* enable magic pkt event */
+#define WCR_LINKST 0x04U /* link status change occur ro */
+#define WCR_SAMPLEST 0x02U /* sample frame rx occur ro */
+#define WCR_MAGICST 0x01U /* magic pkt rx occur ro */
+
+#define WCR_BITS \
+ "\020" \
+ "\000LINKEN" \
+ "\000SAMPLEEN" \
+ "\000MAGICEN" \
+ "\000LINKST" \
+ "\000SAMPLEST" \
+ "\000MAGICST"
+
+/* physical address register (0x10-0x15) */
+/* multicast address register (0x16-0x1c) */
+/* general purpose control register (0x1e) */
+#define GPCR_GEPCTRL 0x7f
+#define GPCR_OUT(n) (1U << (n))
+
+#define GPCR_BITS \
+ "\020" \
+ "\006OUT5" \
+ "\005OUT4" \
+ "\004OUT3" \
+ "\003OUT2" \
+ "\002OUT1" \
+ "\001OUT0"
+
+/* general purpose register (0x1f) */
+#define GPR_GEPIO5 0x20U
+#define GPR_GEPIO4 0x10U
+#define GPR_GEPIO3 0x08U
+#define GPR_GEPIO2 0x04U
+#define GPR_GEPIO1 0x02U
+#define GPR_GEPIO0 0x01U
+
+#define GPR_BITS \
+ "\020" \
+ "\006GEPIO5" \
+ "\005GEPIO4" \
+ "\004GEPIO3" \
+ "\003GEPIO2" \
+ "\002GEPIO1" \
+ "\001GEPIO0"
+
+/* vendor id register (0x28-0x29) */
+/* product id register (0x2a-0x2b) */
+/* chip revision register (0x2c) */
+
+/* usb device address register (0xf0) */
+#define USBDA_USBFA 0x3fU /* usb device address */
+#define USBDA_USBFA_SHIFT 0
+
+/* receive packet counter register (0xf1) */
+
+/* transmitpacket counter/usb status register (0xf2) */
+#define TUSR_RXFAULT 0x80U /* indicate rx has unexpected condition */
+#define TUSR_SUSFLAG 0x40U /* indicate device has suspended condition */
+#define TUSR_EP1RDY 0x20U /* ready for read from ep1 pipe */
+#define TUSR_SRAM 0x18U /* sram size 0:32K, 1:48K, 2:16K, 3:64K */
+#define TUSR_SRAM_SHIFT 3
+#define TUSR_SRAM_32K (0U << TUSR_SRAM_SHIFT)
+#define TUSR_SRAM_48K (1U << TUSR_SRAM_SHIFT)
+#define TUSR_SRAM_16K (2U << TUSR_SRAM_SHIFT)
+#define TUSR_SRAM_64K (3U << TUSR_SRAM_SHIFT)
+#define TUSR_TXC2 0x04U /* two or more packets in tx buffer */
+#define TUSR_TXC1 0x02U /* one packet in tx buffer */
+#define TUSR_TXC0 0x01U /* no packet in tx buffer */
+
+#define TUSR_BITS \
+ "\020" \
+ "\010RXFAULT" \
+ "\007SUSFLAG" \
+ "\006EP1RDY" \
+ "\003TXC2" \
+ "\002TXC1" \
+ "\001TXC0"
+
+/* usb control register (0xf4) */
+#define USBC_EP3ACK 0x20U /* ep3 will alway return 8byte data if NAK=0*/
+#define USBC_EP3NACK 0x10U /* ep3 will alway return NAK */
+#define USBC_MEMTST 0x01U
+
+/* bulk message format */
+#define TX_HEADER_SIZE 2
+#define RX_HEADER_SIZE 3
+
+/* interrupt msg format */
+struct intr_msg {
+ uint8_t im_nsr;
+ uint8_t im_tsr1;
+ uint8_t im_tsr2;
+ uint8_t im_rsr;
+ uint8_t im_rocr;
+ uint8_t im_rxc;
+ uint8_t im_txc;
+ uint8_t im_gpr;
+};
+#endif /* __DM9601_H__ */
diff --git a/usr/src/uts/common/io/udmf/udmf_usbgem.c b/usr/src/uts/common/io/udmf/udmf_usbgem.c
new file mode 100644
index 0000000000..0637de054b
--- /dev/null
+++ b/usr/src/uts/common/io/udmf/udmf_usbgem.c
@@ -0,0 +1,1036 @@
+/*
+ * udmfE_usbgem.c : Davicom DM9601E USB to Fast Ethernet Driver for Solaris
+ *
+ * Copyright (c) 2009-2012 Masayuki Murayama. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the author nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+#pragma ident "%W% %E%"
+
+/*
+ * Changelog:
+ */
+
+/*
+ * TODO
+ */
+/* ======================================================= */
+
+/*
+ * Solaris system header files and macros
+ */
+
+/* minimum kernel headers for drivers */
+#include <sys/types.h>
+#include <sys/conf.h>
+#include <sys/debug.h>
+#include <sys/kmem.h>
+#include <sys/modctl.h>
+#include <sys/errno.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/byteorder.h>
+
+/* ethernet stuff */
+#include <sys/ethernet.h>
+
+/* interface card depend stuff */
+#include <sys/stropts.h>
+#include <sys/stream.h>
+#include <sys/strlog.h>
+#include <sys/usb/usba.h>
+#include "usbgem.h"
+
+/* hardware stuff */
+#include "usbgem_mii.h"
+#include "dm9601reg.h"
+
+char ident[] = "dm9601 usbnic driver v" VERSION;
+
+/*
+ * Useful macros
+ */
+#define CHECK_AND_JUMP(err, label) if (err != USB_SUCCESS) goto label
+#define LE16P(p) ((((uint8_t *)(p))[1] << 8) | ((uint8_t *)(p))[0])
+
+/*
+ * Debugging
+ */
+#ifdef DEBUG_LEVEL
+static int udmf_debug = DEBUG_LEVEL;
+#define DPRINTF(n, args) if (udmf_debug > (n)) cmn_err args
+#else
+#define DPRINTF(n, args)
+#endif
+
+/*
+ * Our configration for dm9601
+ */
+/* timeouts */
+#define ONESEC (drv_usectohz(1*1000000))
+
+/*
+ * Local device definitions
+ */
+struct udmf_dev {
+ /*
+ * Misc HW information
+ */
+ uint8_t rcr;
+ uint8_t last_nsr;
+ uint8_t mac_addr[ETHERADDRL];
+};
+
+/*
+ * private functions
+ */
+
+/* mii operations */
+static uint16_t udmf_mii_read(struct usbgem_dev *, uint_t, int *errp);
+static void udmf_mii_write(struct usbgem_dev *, uint_t, uint16_t, int *errp);
+
+/* nic operations */
+static int udmf_reset_chip(struct usbgem_dev *);
+static int udmf_init_chip(struct usbgem_dev *);
+static int udmf_start_chip(struct usbgem_dev *);
+static int udmf_stop_chip(struct usbgem_dev *);
+static int udmf_set_media(struct usbgem_dev *);
+static int udmf_set_rx_filter(struct usbgem_dev *);
+static int udmf_get_stats(struct usbgem_dev *);
+static void udmf_interrupt(struct usbgem_dev *, mblk_t *);
+
+/* packet operations */
+static mblk_t *udmf_tx_make_packet(struct usbgem_dev *, mblk_t *);
+static mblk_t *udmf_rx_make_packet(struct usbgem_dev *, mblk_t *);
+
+/* =============================================================== */
+/*
+ * I/O functions
+ */
+/* =============================================================== */
+#define OUT(dp, ix, len, buf, errp, label) \
+ if ((*(errp) = usbgem_ctrl_out((dp), \
+ /* bmRequestType */ USB_DEV_REQ_HOST_TO_DEV \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ 1, \
+ /* wValue */ 0, \
+ /* wIndex */ (ix), \
+ /* wLength */ (len), \
+ /* value */ (buf), \
+ /* size */ (len))) != USB_SUCCESS) goto label
+
+#define OUTB(dp, ix, val, errp, label) \
+ if ((*(errp) = usbgem_ctrl_out((dp), \
+ /* bmRequestType */ USB_DEV_REQ_HOST_TO_DEV \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ 3, \
+ /* wValue */ (val), \
+ /* wIndex */ (ix), \
+ /* wLength */ 0, \
+ /* value */ NULL, \
+ /* size */ 0)) != USB_SUCCESS) goto label
+
+#define IN(dp, ix, len, buf, errp, label) \
+ if ((*(errp) = usbgem_ctrl_in((dp), \
+ /* bmRequestType */ USB_DEV_REQ_DEV_TO_HOST \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ 0, \
+ /* wValue */ 0, \
+ /* wIndex */ (ix), \
+ /* wLength */ (len), \
+ /* valuep */ (buf), \
+ /* size */ (len))) != USB_SUCCESS) goto label
+
+/* =============================================================== */
+/*
+ * Hardware manupilation
+ */
+/* =============================================================== */
+static void
+udmf_enable_phy(struct usbgem_dev *dp)
+{
+ int err = USB_SUCCESS;
+
+ /* de-assert reset signal to phy */
+ OUTB(dp, GPCR, GPCR_OUT(0), &err, usberr);
+ OUTB(dp, GPR, 0, &err, usberr);
+usberr:
+ ;
+}
+
+static int
+udmf_reset_chip(struct usbgem_dev *dp)
+{
+ int err = USB_SUCCESS;
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ OUTB(dp, NCR, NCR_LBK_NORMAL | NCR_RST, &err, usberr);
+ drv_usecwait(100);
+usberr:
+ return (err);
+}
+
+/*
+ * Setup dm9601
+ */
+static int
+udmf_init_chip(struct usbgem_dev *dp)
+{
+ int i;
+ uint32_t val;
+ int err = USB_SUCCESS;
+ uint16_t reg;
+ uint8_t buf[2];
+ struct udmf_dev *lp = dp->private;
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ OUTB(dp, NCR, NCR_LBK_NORMAL, &err, usberr);
+
+ /* tx control regiser: enable padding and crc generation */
+ OUTB(dp, TCR, 0, &err, usberr);
+
+ /* rx control register: will be set later by udmf_set_rx_filer() */
+ lp->rcr = RCR_RUNT;
+
+ /* back pressure threshold: */
+ OUTB(dp, BPTR, (2 << BPTR_BPHW_SHIFT) | BPTR_JPT_200us,
+ &err, usberr);
+
+ /* flow control threshold: same as default */
+ OUTB(dp, FCTR, (3 << FCTR_HWOT_SHIFT) | (8 << FCTR_LWOT_SHIFT),
+ &err, usberr);
+
+ /* usb control register */
+ OUTB(dp, USBC, USBC_EP3ACK | 0x06, &err, usberr);
+
+ /* flow control: will be set later by udmf_set_media() */
+
+ /* wake up control register: */
+ OUTB(dp, WCR, 0, &err, usberr);
+
+usberr:
+ DPRINTF(2, (CE_CONT, "!%s: %s: end err:%d(%s)",
+ dp->name, __func__,
+ err, err == USB_SUCCESS ? "success" : "error"));
+ return (err);
+}
+
+static int
+udmf_start_chip(struct usbgem_dev *dp)
+{
+ int err = USB_SUCCESS;
+ struct udmf_dev *lp = dp->private;
+
+ /* enable Rx */
+ lp->rcr |= RCR_RXEN;
+ OUTB(dp, RCR, lp->rcr, &err, usberr);
+
+usberr:
+ DPRINTF(2, (CE_CONT, "!%s: %s: end err:%d(%s)",
+ dp->name, __func__,
+ err, err == USB_SUCCESS ? "success" : "error"));
+ return (err);
+}
+
+static int
+udmf_stop_chip(struct usbgem_dev *dp)
+{
+ int err = USB_SUCCESS;
+ struct udmf_dev *lp = dp->private;
+
+ /* disable rx */
+ lp->rcr &= ~RCR_RXEN;
+ OUTB(dp, RCR, lp->rcr, &err, usberr);
+
+usberr:
+ DPRINTF(2, (CE_CONT, "!%s: %s: end err:%d(%s)",
+ dp->name, __func__,
+ err, err == USB_SUCCESS ? "success" : "error"));
+ return (err);
+}
+
+static int
+udmf_get_stats(struct usbgem_dev *dp)
+{
+ /* EMPTY */
+ return (USB_SUCCESS);
+}
+
+static uint_t
+udmf_mcast_hash(struct usbgem_dev *dp, const uint8_t *addr)
+{
+ return (usbgem_ether_crc_le(addr) & 0x3f);
+}
+
+static int
+udmf_set_rx_filter(struct usbgem_dev *dp)
+{
+ int i;
+ uint8_t rcr;
+ uint8_t mode;
+ uint8_t mhash[8];
+ uint8_t *mac;
+ uint_t h;
+ int err = USB_SUCCESS;
+ struct udmf_dev *lp = dp->private;
+ static uint8_t invalid_mac[ETHERADDRL] = {0, 0, 0, 0, 0, 0};
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: called, rxmode:%x",
+ dp->name, __func__, dp->rxmode));
+
+ if (lp->rcr & RCR_RXEN) {
+ /* set promiscuous mode before changing rx filter mode */
+ OUTB(dp, RCR, lp->rcr | RCR_PRMSC, &err, usberr);
+ }
+
+ lp->rcr &= ~(RCR_ALL | RCR_PRMSC);
+ mode = 0;
+ bzero(mhash, sizeof (mhash));
+ mac = dp->cur_addr.ether_addr_octet;
+
+ if ((dp->rxmode & RXMODE_ENABLE) == 0) {
+ mac = invalid_mac;
+ } else if (dp->rxmode & RXMODE_PROMISC) {
+ /* promiscious mode implies all multicast and all physical */
+ mode |= RCR_PRMSC;
+ } else if ((dp->rxmode & RXMODE_ALLMULTI) || dp->mc_count > 32) {
+ /* accept all multicast packets */
+ mode |= RCR_ALL;
+ } else if (dp->mc_count > 0) {
+ /*
+ * make hash table to select interresting
+ * multicast address only.
+ */
+ for (i = 0; i < dp->mc_count; i++) {
+ /* hash table is 64 = 2^6 bit width */
+ h = dp->mc_list[i].hash;
+ mhash[h / 8] |= 1 << (h % 8);
+ }
+ }
+
+ /* set node address */
+ if (bcmp(mac, lp->mac_addr, ETHERADDRL) != 0) {
+ OUT(dp, PAR, ETHERADDRL, dp->cur_addr.ether_addr_octet,
+ &err, usberr);
+ bcopy(mac, lp->mac_addr, ETHERADDRL);
+ }
+
+ /* set multicast hash table */
+ OUT(dp, MAR, sizeof (mhash), &mhash[0], &err, usberr);
+
+ /* update rcr */
+ lp->rcr |= mode;
+ OUTB(dp, RCR, lp->rcr, &err, usberr);
+
+#if DEBUG_LEVEL > 1
+ /* verify rcr */
+ IN(dp, RCR, 1, &rcr, &err, usberr);
+ cmn_err(CE_CONT, "!%s: %s: rcr:%b returned",
+ dp->name, __func__, rcr, RCR_BITS);
+#endif
+usberr:
+ DPRINTF(2, (CE_CONT, "!%s: %s: end err:%d(%s)",
+ dp->name, __func__,
+ err, err == USB_SUCCESS ? "success" : "error"));
+ return (err);
+}
+
+static int
+udmf_set_media(struct usbgem_dev *dp)
+{
+ int err = USB_SUCCESS;
+ uint8_t fcr;
+ struct udmf_dev *lp = dp->private;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /* setup flow control */
+ fcr = 0;
+ if (dp->full_duplex) {
+ /* select flow control */
+ switch (dp->flow_control) {
+ case FLOW_CONTROL_RX_PAUSE:
+ fcr |= FCR_FLCE;
+ break;
+
+ case FLOW_CONTROL_TX_PAUSE:
+ fcr |= FCR_TXPEN;
+ break;
+
+ case FLOW_CONTROL_SYMMETRIC:
+ fcr |= FCR_FLCE | FCR_TXPEN;
+ break;
+ }
+ }
+
+ /* update flow control register */
+ OUTB(dp, FCR, fcr, &err, usberr);
+
+usberr:
+ DPRINTF(2, (CE_CONT, "!%s: %s: end err:%d(%s)",
+ dp->name, __func__,
+ err, err == USB_SUCCESS ? "success" : "error"));
+ return (err);
+}
+
+/*
+ * send/receive packet check
+ */
+static mblk_t *
+udmf_tx_make_packet(struct usbgem_dev *dp, mblk_t *mp)
+{
+ int n;
+ size_t pkt_size;
+ mblk_t *new;
+ mblk_t *tp;
+ uint8_t *bp;
+ uint8_t *last_pos;
+ uint_t align_mask;
+
+ pkt_size = msgdsize(mp);
+ align_mask = 63;
+
+ /*
+ * re-allocate the mp
+ */
+
+ /* minimum ethernet packet size of ETHERMIN */
+ pkt_size = max(pkt_size, ETHERMIN);
+
+#if 0 /* CONFIG_ADD_TX_DELIMITOR_ALWAYS */
+ pkt_size += TX_HEADER_SIZE;
+#endif
+ if (((pkt_size + TX_HEADER_SIZE) & align_mask) == 0) {
+ /* padding is required in usb communication */
+ pkt_size += TX_HEADER_SIZE;
+ }
+
+ if ((new = allocb(TX_HEADER_SIZE + pkt_size, 0)) == NULL) {
+ return (NULL);
+ }
+ new->b_wptr = new->b_rptr + TX_HEADER_SIZE + pkt_size;
+
+ /* add a header */
+ bp = new->b_rptr;
+ bp[0] = (uint8_t)pkt_size;
+ bp[1] = (uint8_t)(pkt_size >> 8);
+ bp += TX_HEADER_SIZE;
+
+ /* copy contents of the buffer */
+ for (tp = mp; tp; tp = tp->b_cont) {
+ n = tp->b_wptr - tp->b_rptr;
+ bcopy(tp->b_rptr, bp, n);
+ bp += n;
+ }
+
+ /* clear the rest including the next zero length header */
+ last_pos = new->b_wptr;
+ while (bp < last_pos) {
+ *bp++ = 0;
+ }
+
+ return (new);
+}
+
+static void
+udmf_dump_packet(struct usbgem_dev *dp, uint8_t *bp, int n)
+{
+ int i;
+
+ for (i = 0; i < n; i += 8, bp += 8) {
+ cmn_err(CE_CONT, "%02x %02x %02x %02x %02x %02x %02x %02x",
+ bp[0], bp[1], bp[2], bp[3], bp[4], bp[5], bp[6], bp[7]);
+ }
+}
+
+static mblk_t *
+udmf_rx_make_packet(struct usbgem_dev *dp, mblk_t *mp)
+{
+ int len;
+ uint8_t rx_stat;
+
+ len = mp->b_wptr - mp->b_rptr;
+
+ if (len <= RX_HEADER_SIZE) {
+ /*
+ * the usb bulk-in frame doesn't include a valid
+ * ethernet packet.
+ */
+ return (NULL);
+ }
+
+ /* remove rx header */
+ rx_stat = mp->b_rptr[0];
+ if (rx_stat & (RSR_RF | RSR_LCS | RSR_RWTO |
+ RSR_PLE | RSR_AE | RSR_CE | RSR_FOE)) {
+ if (rx_stat & RSR_RF) {
+ dp->stats.runt++;
+ }
+ if (rx_stat & RSR_LCS) {
+ /* late collision */
+ dp->stats.rcv_internal_err++;
+ }
+ if (rx_stat & RSR_RWTO) {
+ /* rx timeout */
+ dp->stats.rcv_internal_err++;
+ }
+ if (rx_stat & RSR_PLE) {
+ /* physical layer error */
+ dp->stats.rcv_internal_err++;
+ }
+ if (rx_stat & RSR_AE) {
+ /* alignment error */
+ dp->stats.frame++;
+ }
+ if (rx_stat & RSR_CE) {
+ /* crc error */
+ dp->stats.crc++;
+ }
+ if (rx_stat & RSR_FOE) {
+ /* fifo overflow error */
+ dp->stats.overflow++;
+ }
+ dp->stats.errrcv++;
+ }
+ len = LE16P(&mp->b_rptr[1]);
+ if (len >= ETHERFCSL) {
+ len -= ETHERFCSL;
+ }
+ mp->b_rptr += RX_HEADER_SIZE;
+ mp->b_wptr = mp->b_rptr + len;
+
+ return (mp);
+}
+
+/*
+ * MII Interfaces
+ */
+static uint16_t
+udmf_ep_read(struct usbgem_dev *dp, uint_t which, uint_t addr, int *errp)
+{
+ int i;
+ uint8_t epcr;
+ uint16_t val;
+
+ DPRINTF(4, (CE_CONT, "!%s: %s: called, ix:%d",
+ dp->name, __func__, addr));
+
+ OUTB(dp, EPAR, addr, errp, usberr);
+ OUTB(dp, EPCR, which | EPCR_ERPRR, errp, usberr);
+
+ for (i = 0; i < 100; i++) {
+ IN(dp, EPCR, sizeof (epcr), &epcr, errp, usberr);
+ if ((epcr & EPCR_ERRE) == 0) {
+ /* done */
+ IN(dp, EPDR, sizeof (val), &val, errp, usberr);
+ val = LE_16(val);
+ goto done;
+ }
+ drv_usecwait(10);
+ }
+ /* timeout */
+ cmn_err(CE_WARN, "!%s: %s: timeout", dp->name, __func__);
+ val = 0;
+done:
+ OUTB(dp, EPCR, 0, errp, usberr);
+ return (val);
+
+usberr:
+ DPRINTF(2, (CE_CONT, "!%s: %s: end err:%d(%s)",
+ dp->name, __func__,
+ *errp, *errp == USB_SUCCESS ? "success" : "error"));
+ return (0);
+}
+
+static void
+udmf_ep_write(struct usbgem_dev *dp, uint_t which, uint_t addr,
+ uint16_t val, int *errp)
+{
+ int i;
+ uint8_t epcr;
+
+ DPRINTF(5, (CE_CONT, "!%s: %s called", dp->name, __func__));
+
+ val = LE_16(val);
+ OUT(dp, EPDR, sizeof (val), &val, errp, usberr);
+
+ OUTB(dp, EPAR, addr, errp, usberr);
+
+ OUTB(dp, EPCR, which | EPCR_WEP | EPCR_ERPRW, errp, usberr);
+
+ for (i = 0; i < 100; i++) {
+ IN(dp, EPCR, 1, &epcr, errp, usberr);
+ if ((epcr & EPCR_ERRE) == 0) {
+ /* done */
+ goto done;
+ }
+ drv_usecwait(10);
+ }
+ /* timeout */
+ cmn_err(CE_WARN, "!%s: %s: timeout", dp->name, __func__);
+done:
+ OUTB(dp, EPCR, 0, errp, usberr);
+ return;
+
+usberr:
+ DPRINTF(2, (CE_CONT, "!%s: %s: end err:%d(%s)",
+ dp->name, __func__,
+ *errp, *errp == USB_SUCCESS ? "success" : "error"));
+}
+
+static uint16_t
+udmf_mii_read(struct usbgem_dev *dp, uint_t index, int *errp)
+{
+ uint16_t val;
+
+ val = udmf_ep_read(dp, EPCR_EPOS,
+ (dp->mii_phy_addr << EPAR_PHYADR_SHIFT) | index, errp);
+
+ return (val);
+}
+
+static void
+udmf_mii_write(struct usbgem_dev *dp, uint_t index, uint16_t val, int *errp)
+{
+ udmf_ep_write(dp, EPCR_EPOS,
+ (dp->mii_phy_addr << EPAR_PHYADR_SHIFT) | index, val, errp);
+}
+
+static void
+udmf_interrupt(struct usbgem_dev *dp, mblk_t *mp)
+{
+ struct intr_msg *imp;
+ struct udmf_dev *lp = dp->private;
+
+ imp = (struct intr_msg *)&mp->b_rptr[0];
+
+ DPRINTF(4, (CE_CONT,
+ "!%s: %s: size:%d, nsr:%b tsr1:%b tsr2:%b"
+ " rsr:%b rocr:%b rxc:%02x txc:%b gpr:%b",
+ dp->name, __func__, mp->b_wptr - mp->b_rptr,
+ imp->im_nsr, NSR_BITS,
+ imp->im_tsr1, TSR_BITS,
+ imp->im_tsr2, TSR_BITS,
+ imp->im_rsr, RSR_BITS,
+ imp->im_rocr, ROCR_BITS,
+ imp->im_rxc,
+ imp->im_txc, TUSR_BITS,
+ imp->im_gpr, GPR_BITS));
+
+ if ((lp->last_nsr ^ imp->im_nsr) & NSR_LINKST) {
+ usbgem_mii_update_link(dp);
+ }
+
+ lp->last_nsr = imp->im_nsr;
+}
+
+/* ======================================================== */
+/*
+ * OS depend (device driver DKI) routine
+ */
+/* ======================================================== */
+static uint16_t
+udmf_eeprom_read(struct usbgem_dev *dp, uint_t index, int *errp)
+{
+ uint16_t val;
+
+ val = udmf_ep_read(dp, 0, index, errp);
+
+ return (val);
+}
+
+#ifdef DEBUG_LEVEL
+static void
+udmf_eeprom_dump(struct usbgem_dev *dp, int size)
+{
+ int i;
+ int err;
+ uint16_t w0, w1, w2, w3;
+
+ cmn_err(CE_CONT, "!%s: eeprom dump:", dp->name);
+
+ err = USB_SUCCESS;
+
+ for (i = 0; i < size; i += 4) {
+ w0 = udmf_eeprom_read(dp, i + 0, &err);
+ w1 = udmf_eeprom_read(dp, i + 1, &err);
+ w2 = udmf_eeprom_read(dp, i + 2, &err);
+ w3 = udmf_eeprom_read(dp, i + 3, &err);
+ cmn_err(CE_CONT, "!0x%02x: 0x%04x 0x%04x 0x%04x 0x%04x",
+ i, w0, w1, w2, w3);
+ }
+usberr:
+ ;
+}
+#endif
+
+static int
+udmf_attach_chip(struct usbgem_dev *dp)
+{
+ int i;
+ uint_t val;
+ uint8_t *m;
+ int err;
+ struct udmf_dev *lp = dp->private;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s enter", dp->name, __func__));
+
+ /*
+ * get mac address from EEPROM
+ */
+ m = dp->dev_addr.ether_addr_octet;
+ for (i = 0; i < ETHERADDRL; i += 2) {
+ val = udmf_eeprom_read(dp, i/2, &err);
+ m[i + 0] = (uint8_t)val;
+ m[i + 1] = (uint8_t)(val >> 8);
+ }
+
+ /* invalidate a private cache for mac addr */
+ bzero(lp->mac_addr, sizeof (lp->mac_addr));
+#ifdef CONFIG_VLAN
+ dp->misc_flag = USBGEM_VLAN;
+#endif
+#if DEBUG_LEVEL > 0
+ udmf_eeprom_dump(dp, /* 0x3f + 1 */ 128);
+#endif
+{
+ static uint8_t bcst[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+ DPRINTF(0, (CE_CONT, "!%s: %s: hash of bcast:%x",
+ dp->name, __func__, usbgem_ether_crc_be(bcst)));
+}
+ return (USB_SUCCESS);
+
+usberr:
+ cmn_err(CE_WARN, "%s: %s: usb error detected (%d)",
+ dp->name, __func__, err);
+ return (USB_FAILURE);
+}
+
+static int
+udmf_mii_probe(struct usbgem_dev *dp)
+{
+ DPRINTF(2, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ udmf_enable_phy(dp);
+ return (usbgem_mii_probe_default(dp));
+}
+
+static int
+udmf_mii_init(struct usbgem_dev *dp)
+{
+ DPRINTF(2, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+ udmf_enable_phy(dp);
+ return (USB_SUCCESS);
+}
+
+static int
+udmfattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int i;
+ ddi_iblock_cookie_t c;
+ int ret;
+ int revid;
+ int unit;
+ int len;
+ const char *drv_name;
+ struct usbgem_dev *dp;
+ void *base;
+ struct usbgem_conf *ugcp;
+ struct udmf_dev *lp;
+
+ unit = ddi_get_instance(dip);
+ drv_name = ddi_driver_name(dip);
+
+ DPRINTF(3, (CE_CONT, "!%s%d: %s: called, cmd:%d",
+ drv_name, unit, __func__, cmd));
+
+ if (cmd == DDI_ATTACH) {
+ /*
+ * construct usbgem configration
+ */
+ ugcp = kmem_zalloc(sizeof (*ugcp), KM_SLEEP);
+
+ /* name */
+ /*
+ * softmac requires that ppa is the instance number
+ * of the device, otherwise it hangs in seaching the device.
+ */
+ sprintf(ugcp->usbgc_name, "%s%d", drv_name, unit);
+ ugcp->usbgc_ppa = unit;
+
+ ugcp->usbgc_ifnum = 0;
+ ugcp->usbgc_alt = 0;
+
+ ugcp->usbgc_tx_list_max = 64;
+
+ ugcp->usbgc_rx_header_len = RX_HEADER_SIZE;
+ ugcp->usbgc_rx_list_max = 64;
+
+ /* time out parameters */
+ ugcp->usbgc_tx_timeout = USBGEM_TX_TIMEOUT;
+ ugcp->usbgc_tx_timeout_interval = USBGEM_TX_TIMEOUT_INTERVAL;
+#if 1
+ /* flow control */
+ ugcp->usbgc_flow_control = FLOW_CONTROL_RX_PAUSE;
+#else
+ /*
+ * XXX - flow control caused link down frequently under
+ * heavy traffic
+ */
+ ugcp->usbgc_flow_control = FLOW_CONTROL_NONE;
+#endif
+ /* MII timeout parameters */
+ ugcp->usbgc_mii_link_watch_interval =
+ USBGEM_LINK_WATCH_INTERVAL;
+ ugcp->usbgc_mii_an_watch_interval =
+ USBGEM_LINK_WATCH_INTERVAL/5;
+ ugcp->usbgc_mii_reset_timeout = MII_RESET_TIMEOUT; /* 1 sec */
+ ugcp->usbgc_mii_an_timeout = MII_AN_TIMEOUT; /* 5 sec */
+ ugcp->usbgc_mii_an_wait = (25*ONESEC)/10;
+ ugcp->usbgc_mii_linkdown_timeout = MII_LINKDOWN_TIMEOUT;
+
+ ugcp->usbgc_mii_an_delay = ONESEC/10;
+ ugcp->usbgc_mii_linkdown_action = MII_ACTION_RSA;
+ ugcp->usbgc_mii_linkdown_timeout_action = MII_ACTION_RESET;
+ ugcp->usbgc_mii_dont_reset = B_FALSE;
+ ugcp->usbgc_mii_hw_link_detection = B_TRUE;
+
+ /* I/O methods */
+
+ /* mac operation */
+ ugcp->usbgc_attach_chip = &udmf_attach_chip;
+ ugcp->usbgc_reset_chip = &udmf_reset_chip;
+ ugcp->usbgc_init_chip = &udmf_init_chip;
+ ugcp->usbgc_start_chip = &udmf_start_chip;
+ ugcp->usbgc_stop_chip = &udmf_stop_chip;
+ ugcp->usbgc_multicast_hash = &udmf_mcast_hash;
+
+ ugcp->usbgc_set_rx_filter = &udmf_set_rx_filter;
+ ugcp->usbgc_set_media = &udmf_set_media;
+ ugcp->usbgc_get_stats = &udmf_get_stats;
+ ugcp->usbgc_interrupt = &udmf_interrupt;
+
+ /* packet operation */
+ ugcp->usbgc_tx_make_packet = &udmf_tx_make_packet;
+ ugcp->usbgc_rx_make_packet = &udmf_rx_make_packet;
+
+ /* mii operations */
+ ugcp->usbgc_mii_probe = &udmf_mii_probe;
+ ugcp->usbgc_mii_init = &udmf_mii_init;
+ ugcp->usbgc_mii_config = &usbgem_mii_config_default;
+ ugcp->usbgc_mii_read = &udmf_mii_read;
+ ugcp->usbgc_mii_write = &udmf_mii_write;
+ ugcp->usbgc_mii_addr_min = 1;
+
+ /* mtu */
+ ugcp->usbgc_min_mtu = ETHERMTU;
+ ugcp->usbgc_max_mtu = ETHERMTU;
+ ugcp->usbgc_default_mtu = ETHERMTU;
+
+ lp = kmem_zalloc(sizeof (struct udmf_dev), KM_SLEEP);
+ lp->last_nsr;
+
+ ddi_set_driver_private(dip, NULL);
+
+ dp = usbgem_do_attach(dip, ugcp, lp, sizeof (struct udmf_dev));
+
+ kmem_free(ugcp, sizeof (*ugcp));
+
+ if (dp != NULL) {
+ return (DDI_SUCCESS);
+ }
+
+err_free_mem:
+ kmem_free(lp, sizeof (struct udmf_dev));
+err_close_pipe:
+err:
+ return (DDI_FAILURE);
+ }
+
+ if (cmd == DDI_RESUME) {
+ return (usbgem_resume(dip));
+ }
+
+ return (DDI_FAILURE);
+}
+
+static int
+udmfdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ int ret;
+
+ if (cmd == DDI_DETACH) {
+ ret = usbgem_do_detach(dip);
+ if (ret != DDI_SUCCESS) {
+ return (DDI_FAILURE);
+ }
+ return (DDI_SUCCESS);
+ }
+ if (cmd == DDI_SUSPEND) {
+ return (usbgem_suspend(dip));
+ }
+ return (DDI_FAILURE);
+}
+
+/* ======================================================== */
+/*
+ * OS depend (loadable streams driver) routine
+ */
+/* ======================================================== */
+#ifdef USBGEM_CONFIG_GLDv3
+USBGEM_STREAM_OPS(udmf_ops, udmfattach, udmfdetach);
+#else
+static struct module_info udmfminfo = {
+ 0, /* mi_idnum */
+ "udmf", /* mi_idname */
+ 0, /* mi_minpsz */
+ ETHERMTU, /* mi_maxpsz */
+ ETHERMTU*128, /* mi_hiwat */
+ 1, /* mi_lowat */
+};
+
+static struct qinit udmfrinit = {
+ (int (*)()) NULL, /* qi_putp */
+ usbgem_rsrv, /* qi_srvp */
+ usbgem_open, /* qi_qopen */
+ usbgem_close, /* qi_qclose */
+ (int (*)()) NULL, /* qi_qadmin */
+ &udmfminfo, /* qi_minfo */
+ NULL /* qi_mstat */
+};
+
+static struct qinit udmfwinit = {
+ usbgem_wput, /* qi_putp */
+ usbgem_wsrv, /* qi_srvp */
+ (int (*)()) NULL, /* qi_qopen */
+ (int (*)()) NULL, /* qi_qclose */
+ (int (*)()) NULL, /* qi_qadmin */
+ &udmfminfo, /* qi_minfo */
+ NULL /* qi_mstat */
+};
+
+static struct streamtab udmf_info = {
+ &udmfrinit, /* st_rdinit */
+ &udmfwinit, /* st_wrinit */
+ NULL, /* st_muxrinit */
+ NULL /* st_muxwrinit */
+};
+
+static struct cb_ops cb_udmf_ops = {
+ nulldev, /* cb_open */
+ nulldev, /* cb_close */
+ nodev, /* cb_strategy */
+ nodev, /* cb_print */
+ nodev, /* cb_dump */
+ nodev, /* cb_read */
+ nodev, /* cb_write */
+ nodev, /* cb_ioctl */
+ nodev, /* cb_devmap */
+ nodev, /* cb_mmap */
+ nodev, /* cb_segmap */
+ nochpoll, /* cb_chpoll */
+ ddi_prop_op, /* cb_prop_op */
+ &udmf_info, /* cb_stream */
+ D_NEW|D_MP /* cb_flag */
+};
+
+static struct dev_ops udmf_ops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* devo_refcnt */
+ usbgem_getinfo, /* devo_getinfo */
+ nulldev, /* devo_identify */
+ nulldev, /* devo_probe */
+ udmfattach, /* devo_attach */
+ udmfdetach, /* devo_detach */
+ nodev, /* devo_reset */
+ &cb_udmf_ops, /* devo_cb_ops */
+ NULL, /* devo_bus_ops */
+ usbgem_power, /* devo_power */
+#if DEVO_REV >= 4
+ usbgem_quiesce, /* devo_quiesce */
+#endif
+};
+#endif
+
+static struct modldrv modldrv = {
+ &mod_driverops, /* Type of module. This one is a driver */
+ ident,
+ &udmf_ops, /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, &modldrv, NULL
+};
+
+/* ======================================================== */
+/*
+ * _init : done
+ */
+/* ======================================================== */
+int
+_init(void)
+{
+ int status;
+
+ DPRINTF(2, (CE_CONT, "!udmf: _init: called"));
+
+ status = usbgem_mod_init(&udmf_ops, "udmf");
+ if (status != DDI_SUCCESS) {
+ return (status);
+ }
+ status = mod_install(&modlinkage);
+ if (status != DDI_SUCCESS) {
+ usbgem_mod_fini(&udmf_ops);
+ }
+ return (status);
+}
+
+/*
+ * _fini : done
+ */
+int
+_fini(void)
+{
+ int status;
+
+ DPRINTF(2, (CE_CONT, "!udmf: _fini: called"));
+ status = mod_remove(&modlinkage);
+ if (status == DDI_SUCCESS) {
+ usbgem_mod_fini(&udmf_ops);
+ }
+ return (status);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
diff --git a/usr/src/uts/common/io/upf/adm8511reg.h b/usr/src/uts/common/io/upf/adm8511reg.h
new file mode 100644
index 0000000000..68a2207bb5
--- /dev/null
+++ b/usr/src/uts/common/io/upf/adm8511reg.h
@@ -0,0 +1,205 @@
+/*
+ * @(#)adm8511reg.h 1.1 09/06/20
+ * Register dehinitsions of ADMtek ADM8511 Fast Ethernet to USB controller.
+ * Codeded by Masayuki Murayama(KHF04453@nifty.ne.jp)
+ * This file is public domain.
+ */
+
+#define EC0 0x00 /* B */
+#define EC1 0x01 /* B */
+#define EC2 0x02 /* B */
+#define MA 0x08 /* 8byte array */
+#define EID 0x10 /* B */
+#define PAUSETIMER 0x18 /* B pause timer */
+#define RPNBFC 0x1a /* B */
+#define ORFBFC 0x1b /* B */
+#define EP1C 0x1c /* B */
+#define RXFC 0x1d /* B */
+#define BIST 0x1e /* B */
+#define EEOFFSET 0x20 /* B */
+#define EEDATA 0x21 /* W */
+#define EECTRL 0x23 /* B */
+#define PHYA 0x25 /* B */
+#define PHYD 0x26 /* W */
+#define PHYAC 0x28 /* B */
+#define USBSTAT 0x2a /* B */
+#define ETHTXSTAT 0x2b /* W */
+#define ETHRXSTAT 0x2d /* B */
+#define LOSTCNT 0x2e /* W */
+#define WF0MASK 0x30 /* 16byte array */
+#define WF0OFFSET 0x40 /* W */
+#define WF0CRC 0x41 /* W */
+#define WF1MASK 0x48 /* 16byte array */
+#define WF1OFFSET 0x58 /* W */
+#define WF1CRC 0x59 /* W */
+#define WF2MASK 0x60 /* 16byte array */
+#define WF2OFFSET 0x70 /* W */
+#define WF2CRC 0x71 /* W */
+#define WCTRL 0x78 /* B */
+#define WSTAT 0x7a /* B */
+#define IPHYC 0x7b /* B */
+#define GPIO54 0x7c /* B */
+#define GPIO10 0x7e /* B */
+#define GPIO32 0x7f /* B */
+#define TEST 0x80 /* B */
+#define TM 0x81 /* B */
+#define RPN 0x82 /* B */
+
+/* Ethernet control register 0: offset 0 */
+#define EC0_TXE 0x80U
+#define EC0_RXE 0x40U
+#define EC0_RXFCE 0x20U
+#define EC0_WOE 0x10U
+#define EC0_RXSA 0x08U
+#define EC0_SBO 0x04U
+#define EC0_RXMA 0x02U
+#define EC0_RXCS 0x01U
+
+#define EC0_BITS \
+ "\020" \
+ "\010TXE" \
+ "\007RXE" \
+ "\006RXFCE" \
+ "\005WOE" \
+ "\004RXSA" \
+ "\003SBO" \
+ "\002RXMA" \
+ "\001RXCS"
+
+/* Ethernet control register 1: offset 1 */
+#define EC1_FD 0x20U
+#define EC1_100M 0x10U /* 0:10Mbps 1:100Mbps */
+#define EC1_RM 0x08U /* reset mac */
+
+#define EC1_BITS \
+ "\020" \
+ "\006FD" \
+ "\005100M" \
+ "\004RM"
+
+/* Ethernet control register 2: offset 2 */
+#define EC2_MEPL 0x80U /* 8515: MTU 0:1528, 1:1638 */
+#define EC2_RPNC 0x40U
+#define EC2_LEEPRS 0x20U
+#define EC2_EEPRW 0x10U
+#define EC2_LB 0x08U
+#define EC2_PROM 0x04U
+#define EC2_RXBP 0x02U
+#define EC2_EP3RC 0x01U
+
+#define EC2_BITS \
+ "\020" \
+ "\010MEPS" \
+ "\007RPNC" \
+ "\006LEEPRS" \
+ "\005EEPRW" \
+ "\004LB" \
+ "\003PROM" \
+ "\002RXBP" \
+ "\001EP3RC"
+
+/* Recieve Packet number based Flow Control register: offset 0x1a */
+#define RPNBFC_PN 0x7eU /* */
+#define RPNBFC_PN_SHIFT 1
+#define RPNBFC_FCP 0x01U /* enable rx flow control */
+
+/* Occupied Recieve FIFO based Flow Control register: offset 0x1b */
+#define ORFBFC_RXS 0x7eU /* */
+#define ORFBFC_RXS_SHIFT 1
+#define ORFBFC_RXS_UNIT 1024U
+#define ORFBFC_FCRXS 0x01U /* enable rx flow control */
+
+/* EP1 control register: offset 0x1c */
+#define EP1C_EP1S0E 0x80U /* send 0 enable */
+#define EP1C_ITMA 0x60U /* internal test mode A */
+#define EP1C_ITMB 0x1fU /* internal test mode B */
+
+#define EP1C_BITS \
+ "\020" \
+ "\010EP1S0E"
+
+/* Rx FIFO Control register: offset 0x1d */
+#define RXFC_EXT_SRAM 0x02 /* enable external 32k sram */
+#define RXFC_RX32PKT 0x01 /* max 32 packet */
+
+/* EEPROM offset register: offset 0x20 */
+#define EEOFFSET_MASK 0x3f /* eeprom offset address in word */
+
+/* EEPROM access control register: offset 0x23 */
+#define EECTRL_DONE 0x04
+#define EECTRL_RD 0x02
+#define EECTRL_WR 0x01
+
+#define EECTRL_BITS \
+ "\020" \
+ "\003DONE" \
+ "\002RD" \
+ "\001WR"
+
+/* PHY control register: offset 28 */
+#define PHYAC_DO 0x80U /* Done */
+#define PHYAC_RDPHY 0x40U /* read phy */
+#define PHYAC_WRPHY 0x20U /* write phy */
+#define PHYAC_PHYRA 0x1fU /* PHY register address */
+
+#define PHYCTRL_BITS \
+ "\020" \
+ "\010DO" \
+ "\007RDPHY" \
+ "\006WRPHY"
+
+/* Internal PHY control register: offset 7b */
+#define IPHYC_EPHY 0x02
+#define IPHYC_PHYR 0x01
+
+#define IPHYC_BITS \
+ "\020" \
+ "\002EPHY" \
+ "\001PHYR"
+
+/* GPIO45 register: offset 7c */
+#define GPIO54_5OE 0x20
+#define GPIO54_5O 0x10
+#define GPIO54_5I 0x08
+#define GPIO54_4OE 0x04
+#define GPIO54_4O 0x02
+#define GPIO54_4I 0x01
+
+/* GPIO01 register: offset 7e */
+#define GPIO10_1OE 0x20
+#define GPIO10_1O 0x10
+#define GPIO10_1I 0x08
+#define GPIO10_0OE 0x04
+#define GPIO10_0O 0x02
+#define GPIO10_0I 0x01
+
+/* GPIO23 register: offset 7f */
+#define GPIO32_3OE 0x20
+#define GPIO32_3O 0x10
+#define GPIO32_3I 0x08
+#define GPIO32_2OE 0x04
+#define GPIO32_2O 0x02
+#define GPIO32_2I 0x01
+
+/* rx status at the end of received packets */
+/* byte 0 and 1 is packet length in little endian */
+/* byte 2 is receive status */
+#define RSR_DRIBBLE 0x10
+#define RSR_CRC 0x08
+#define RSR_RUNT 0x04
+#define RSR_LONG 0x02
+#define RSR_MULTI 0x01
+
+#define RSR_ERRORS \
+ (RSR_DRIBBLE | RSR_CRC | RSR_RUNT | RSR_LONG | RSR_MULTI)
+
+#define RSR_BITS \
+ "\020" \
+ "\005DRIBBLE" \
+ "\004CRC" \
+ "\003RUNT" \
+ "\002LONG" \
+ "\001MULTI"
+/* byte 3 is reserved */
+
+/* TEST register: offset 80 */
diff --git a/usr/src/uts/common/io/upf/upf_usbgem.c b/usr/src/uts/common/io/upf/upf_usbgem.c
new file mode 100644
index 0000000000..5614803158
--- /dev/null
+++ b/usr/src/uts/common/io/upf/upf_usbgem.c
@@ -0,0 +1,1213 @@
+/*
+ * upf_usbgem.c : ADMtek an986/adm8511/adm8513/adm8515 USB to
+ * Fast Ethernet Driver for Solaris
+ */
+
+/*
+ * Copyright (c) 2004-2011 Masayuki Murayama. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the author nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+#pragma ident "%W% %E%"
+
+/*
+ * Changelog:
+ */
+
+/*
+ * TODO
+ */
+/* ======================================================= */
+
+/*
+ * Solaris system header files and macros
+ */
+#include <sys/types.h>
+#include <sys/conf.h>
+#include <sys/debug.h>
+#include <sys/kmem.h>
+#include <sys/modctl.h>
+#include <sys/errno.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/byteorder.h>
+
+/* ethernet stuff */
+#include <sys/ethernet.h>
+
+/* interface card depend stuff */
+#include <sys/stropts.h>
+#include <sys/stream.h>
+#include <sys/strlog.h>
+#include <sys/usb/usba.h>
+#include "usbgem.h"
+
+/* hardware stuff */
+#include "usbgem_mii.h"
+#include "adm8511reg.h"
+
+char ident[] = "pegasus usbnic driver v" VERSION;
+
+/*
+ * Useful macros
+ */
+#define CHECK_AND_JUMP(val, label) \
+ if ((val) != USB_SUCCESS) { goto label; }
+
+/*
+ * Debugging
+ */
+#ifdef DEBUG_LEVEL
+static int upf_debug = DEBUG_LEVEL;
+#define DPRINTF(n, args) if (upf_debug > (n)) cmn_err args
+#else
+#define DPRINTF(n, args)
+#endif
+
+/*
+ * Our configration for ADMtek Pegasus/PegasusII
+ */
+/* timeouts */
+#define ONESEC (drv_usectohz(1*1000000))
+
+/*
+ * Local device definitions
+ */
+struct upf_dev {
+ /*
+ * Misc HW information
+ */
+ uint8_t ec[3];
+ uint8_t mac_addr[ETHERADDRL];
+ int chip_type;
+#define CHIP_AN986 1 /* avoid 0 */
+#define CHIP_ADM8511 2 /* including adm8515 */
+#define CHIP_ADM8513 3
+ boolean_t phy_init_done;
+ uint8_t last_link_state;
+
+ uint16_t vid; /* vendor id */
+ uint16_t pid; /* product id */
+};
+
+/*
+ * private functions
+ */
+
+/* mii operations */
+static uint16_t upf_mii_read(struct usbgem_dev *, uint_t, int *errp);
+static void upf_mii_write(struct usbgem_dev *, uint_t, uint16_t, int *errp);
+
+/* nic operations */
+static int upf_attach_chip(struct usbgem_dev *);
+static int upf_reset_chip(struct usbgem_dev *);
+static int upf_init_chip(struct usbgem_dev *);
+static int upf_start_chip(struct usbgem_dev *);
+static int upf_stop_chip(struct usbgem_dev *);
+static int upf_set_media(struct usbgem_dev *);
+static int upf_set_rx_filter(struct usbgem_dev *);
+static int upf_get_stats(struct usbgem_dev *);
+
+/* packet operations */
+static mblk_t *upf_tx_make_packet(struct usbgem_dev *, mblk_t *);
+static mblk_t *upf_rx_make_packet(struct usbgem_dev *, mblk_t *);
+
+/* interrupt handler */
+static void upf_interrupt(struct usbgem_dev *, mblk_t *);
+
+/* =============================================================== */
+/*
+ * I/O functions
+ */
+/* =============================================================== */
+#define UPF_REQ_GET_REGISTER 0xf0
+#define UPF_REQ_SET_REGISTER 0xf1
+#define OUTB(dp, p, v, errp, label) \
+ if ((*(errp) = usbgem_ctrl_out((dp), \
+ /* bmRequestType */ USB_DEV_REQ_HOST_TO_DEV \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ UPF_REQ_SET_REGISTER, \
+ /* wValue */ (v), \
+ /* wIndex */ (p), \
+ /* wLength */ 1, \
+ /* buf */ NULL, \
+ /* size */ 0)) != USB_SUCCESS) goto label;
+
+#define OUTW(dp, p, v, errp, label) \
+ if ((*(errp) = usbgem_ctrl_out_val((dp), \
+ /* bmRequestType */ USB_DEV_REQ_HOST_TO_DEV \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ UPF_REQ_SET_REGISTER, \
+ /* wValue */ 0, \
+ /* wIndex */ (p), \
+ /* wLength */ 2, \
+ /* value */ (v))) != USB_SUCCESS) goto label
+
+#define OUTS(dp, p, buf, len, errp, label) \
+ if ((*(errp) = usbgem_ctrl_out((dp), \
+ /* bmRequestType */ USB_DEV_REQ_HOST_TO_DEV \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ UPF_REQ_SET_REGISTER, \
+ /* wValue */ 0, \
+ /* wIndex */ (p), \
+ /* wLength */ (len), \
+ /* buf */ (buf), \
+ /* size */ (len))) != USB_SUCCESS) goto label
+
+#define INB(dp, p, vp, errp, label) \
+ if ((*(errp) = usbgem_ctrl_in_val((dp), \
+ /* bmRequestType */ USB_DEV_REQ_DEV_TO_HOST \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ UPF_REQ_GET_REGISTER, \
+ /* wValue */ 0, \
+ /* wIndex */ (p), \
+ /* wLength */ 1, \
+ /* valuep */ (vp))) != USB_SUCCESS) goto label
+
+#define INW(dp, p, vp, errp, label) \
+ if ((*(errp) = usbgem_ctrl_in_val((dp), \
+ /* bmRequestType */ USB_DEV_REQ_DEV_TO_HOST \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ UPF_REQ_GET_REGISTER, \
+ /* wValue */ 0, \
+ /* wIndex */ (p), \
+ /* wLength */ 2, \
+ /* valuep */ (vp))) != USB_SUCCESS) goto label
+
+#define INS(dp, p, buf, len, errp, label) \
+ if ((*(errp) = usbgem_ctrl_in((dp), \
+ /* bmRequestType */ USB_DEV_REQ_DEV_TO_HOST \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ UPF_REQ_GET_REGISTER, \
+ /* wValue */ 0, \
+ /* wIndex */ (p), \
+ /* wLength */ (len), \
+ /* buf */ (buf), \
+ /* size */ (len))) != USB_SUCCESS) goto label
+
+/* =============================================================== */
+/*
+ * Hardware manupilation
+ */
+/* =============================================================== */
+static int
+upf_reset_chip(struct usbgem_dev *dp)
+{
+ int i;
+ uint8_t val;
+ int err;
+ struct upf_dev *lp = dp->private;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+ bzero(lp->mac_addr, sizeof (lp->mac_addr));
+
+ lp->ec[1] = 0;
+ OUTB(dp, EC1, EC1_RM, &err, usberr);
+
+ for (i = 0; i < 1000; i++) {
+ INB(dp, EC1, &val, &err, usberr);
+ if ((val & EC1_RM) == 0) {
+ lp->ec[1] = val;
+ return (USB_SUCCESS);
+ }
+ drv_usecwait(10);
+ }
+
+ /* time out */
+ cmn_err(CE_WARN, "!%s: failed to reset: timeout", dp->name);
+ return (USB_FAILURE);
+
+usberr:
+ cmn_err(CE_NOTE, "!%s: %s: usberr detected", dp->name, __func__);
+ return (USB_FAILURE);
+}
+
+/*
+ * Setup an986/adm8511/adm8513/adm8515
+ */
+static int
+upf_init_chip(struct usbgem_dev *dp)
+{
+ uint64_t zero64 = 0;
+ int err = USB_SUCCESS;
+ struct upf_dev *lp = dp->private;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /* ethernet control register 0 */
+ lp->ec[0] |= EC0_RXSA | EC0_RXCS;
+ OUTB(dp, EC0, lp->ec[0], &err, usberr);
+
+ /* ethernet control reg1: will be set later in set_rx_filter() */
+
+ /* ethernet control register 2: will be set later in set_rx_filter() */
+ INB(dp, EC2, &lp->ec[2], &err, usberr);
+ lp->ec[2] |= EC2_RXBP | EC2_EP3RC;
+#ifdef CONFIG_VLAN
+ if (dp->misc_flag & USBGEM_VLAN) {
+ lp->ec[2] |= EC2_MEPL;
+ }
+#endif
+ OUTB(dp, EC2, lp->ec[2], &err, usberr);
+
+ /* Multicast address hash: clear */
+ OUTS(dp, MA, &zero64, 8, &err, usberr);
+
+ /* Ethernet ID : will be set later in upf_set_rx_filter() */
+
+ /* PAUSE timer */
+ OUTB(dp, PAUSETIMER, 0x1f, &err, usberr);
+
+ /* receive packet number based pause control:set in upf_set_media() */
+
+ /* occupied receive FIFO based pause control:set in upf_set_media() */
+
+ /* EP1 control: default */
+
+ /* Rx FIFO control */
+ if (lp->chip_type != CHIP_AN986) {
+ /* use 24K internal sram, 16pkts in fifo */
+ OUTB(dp, RXFC, 0, &err, usberr);
+ }
+
+ /* BIST contror: do nothing */
+ err = upf_set_media(dp);
+ CHECK_AND_JUMP(err, usberr);
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: end (success)", dp->name, __func__));
+ return (USB_SUCCESS);
+
+usberr:
+ cmn_err(CE_NOTE, "!%s: %s: usberr(%d) detected",
+ dp->name, __func__, err);
+ return (err);
+}
+
+static int
+upf_start_chip(struct usbgem_dev *dp)
+{
+ int err = USB_SUCCESS;
+ struct upf_dev *lp = dp->private;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /* enable RX and TX */
+ lp->ec[0] |= EC0_TXE | EC0_RXE;
+ OUTB(dp, EC0, lp->ec[0], &err, usberr);
+ return (USB_SUCCESS);
+
+usberr:
+ cmn_err(CE_WARN, "!%s: %s: usberr(%d) detected",
+ dp->name, __func__, err);
+ return (err);
+}
+
+static int
+upf_stop_chip(struct usbgem_dev *dp)
+{
+ int err;
+ struct upf_dev *lp = dp->private;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /* disable RX and TX */
+ lp->ec[0] &= ~(EC0_TXE | EC0_RXE);
+ OUTB(dp, EC0, lp->ec[0], &err, usberr);
+
+ return (USB_SUCCESS);
+
+usberr:
+ cmn_err(CE_WARN, "!%s: %s: usberr(%d) detected",
+ dp->name, __func__, err);
+ return (err);
+}
+
+static int
+upf_get_stats(struct usbgem_dev *dp)
+{
+ /* do nothing */
+ return (USB_SUCCESS);
+}
+
+static uint_t
+upf_mcast_hash(struct usbgem_dev *dp, const uint8_t *addr)
+{
+ /* hash table is 64 = 2^6 bit width */
+ return (usbgem_ether_crc_le(addr) & 0x3f);
+}
+
+static int
+upf_set_rx_filter(struct usbgem_dev *dp)
+{
+ int i;
+ int err;
+#ifdef DEBUG_LEVEL
+ uint8_t reg0;
+ uint8_t reg1;
+ uint8_t reg2;
+#endif
+ struct upf_dev *lp = dp->private;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called, rxmode:%b",
+ dp->name, __func__, dp->rxmode, RXMODE_BITS));
+
+ /* reset rx mode */
+ lp->ec[0] &= ~EC0_RXMA;
+ lp->ec[2] &= ~EC2_PROM;
+
+ if (dp->rxmode & RXMODE_PROMISC) {
+ /* promiscious mode implies all multicast and all physical */
+ lp->ec[0] |= EC0_RXMA;
+ lp->ec[2] |= EC2_PROM;
+ } else if ((dp->rxmode & RXMODE_ALLMULTI) || dp->mc_count > 0) {
+ /* XXX - multicast hash table didin't work */
+ /* accept all multicast packets */
+ lp->ec[0] |= EC0_RXMA;
+ }
+
+ if (bcmp(dp->cur_addr.ether_addr_octet,
+ lp->mac_addr, ETHERADDRL) != 0) {
+
+ /* need to update mac address */
+ bcopy(dp->cur_addr.ether_addr_octet,
+ lp->mac_addr, ETHERADDRL);
+ OUTS(dp, EID,
+ lp->mac_addr, ETHERADDRL, &err, usberr);
+ }
+
+ /* update rx mode */
+ OUTS(dp, EC0, lp->ec, 3, &err, usberr);
+
+#if DEBUG_LEVEL > 0
+ INB(dp, EC0, &reg0, &err, usberr);
+ INB(dp, EC1, &reg1, &err, usberr);
+ INB(dp, EC2, &reg2, &err, usberr);
+
+ cmn_err(CE_CONT, "!%s: %s: returned, ec:%b %b %b",
+ dp->name, __func__,
+ reg0, EC0_BITS, reg1, EC1_BITS, reg2, EC2_BITS);
+#endif
+ return (USB_SUCCESS);
+
+usberr:
+ cmn_err(CE_NOTE, "!%s: %s: usberr detected", dp->name, __func__);
+ return (err);
+}
+
+static int
+upf_set_media(struct usbgem_dev *dp)
+{
+ int err;
+ struct upf_dev *lp = dp->private;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ lp->ec[1] &= ~(EC1_FD | EC1_100M);
+
+ /* select duplex */
+ if (dp->full_duplex) {
+ lp->ec[1] |= EC1_FD;
+ }
+
+ /* select speed */
+ if (dp->speed == USBGEM_SPD_100) {
+ lp->ec[1] |= EC1_100M;
+ }
+
+ /* rx flow control */
+ switch (dp->flow_control) {
+ case FLOW_CONTROL_SYMMETRIC:
+ case FLOW_CONTROL_RX_PAUSE:
+ lp->ec[0] |= EC0_RXFCE;
+ break;
+
+ default:
+ lp->ec[0] &= ~EC0_RXFCE;
+ break;
+ }
+
+ /* tx flow control */
+ switch (dp->flow_control) {
+ case FLOW_CONTROL_SYMMETRIC:
+ case FLOW_CONTROL_TX_PAUSE:
+ if (lp->chip_type != CHIP_AN986) {
+ /* pegasus II has internal 24k fifo */
+ OUTB(dp, ORFBFC,
+ (12 << ORFBFC_RXS_SHIFT) | ORFBFC_FCRXS,
+ &err, usberr);
+
+ /* 16 packts can be stored in rx fifo */
+ OUTB(dp, RPNBFC_PN,
+ (8 << RPNBFC_PN_SHIFT) | RPNBFC_FCP,
+ &err, usberr);
+ } else {
+ /* an986 has external 32k fifo */
+ OUTB(dp, ORFBFC,
+ (16 << ORFBFC_RXS_SHIFT) | ORFBFC_FCRXS,
+ &err, usberr);
+
+ /* AN986 fails to link up when RPNBFC is enabled */
+ OUTB(dp, RPNBFC, 0, &err, usberr);
+ }
+ break;
+
+ default:
+ OUTB(dp, ORFBFC, 0, &err, usberr);
+ OUTB(dp, RPNBFC, 0, &err, usberr);
+ break;
+ }
+
+ /* update ether control registers */
+ OUTS(dp, EC0, lp->ec, 2, &err, usberr);
+ DPRINTF(0, (CE_CONT, "!%s: %s: returned, ec0:%b, ec1:%b",
+ dp->name, __func__, lp->ec[0], EC0_BITS, lp->ec[1], EC1_BITS));
+
+ return (USB_SUCCESS);
+
+usberr:
+ cmn_err(CE_WARN, "%s: %s: failed to write ec1", dp->name, __func__);
+ return (err);
+}
+
+/*
+ * send/receive packet check
+ */
+static mblk_t *
+upf_tx_make_packet(struct usbgem_dev *dp, mblk_t *mp)
+{
+ size_t len;
+ mblk_t *new;
+ mblk_t *tp;
+ uint8_t *bp;
+ uint8_t *last_pos;
+ int msglen;
+
+ DPRINTF(3, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ len = msgdsize(mp);
+ if (len < ETHERMIN) {
+ len = ETHERMIN;
+ }
+
+ /* allocate msg block */
+ msglen = len + sizeof (uint16_t);
+
+ /* avoid usb controller bug */
+ if ((msglen & 0x3f) == 0) {
+ /* add a header for additional 0-length usb message */
+ msglen += sizeof (uint16_t);
+ }
+
+ if ((new = allocb(msglen, 0)) == NULL) {
+ return (NULL);
+ }
+
+ /* copy contents of the buffer */
+ new->b_wptr = new->b_rptr + msglen;
+ bp = new->b_rptr;
+
+ /* the nic requires a two byte header of the packet size */
+ bp[0] = (uint8_t)len;
+ bp[1] = (uint8_t)(len >> 8);
+ bp += sizeof (uint16_t);
+
+ /* copy the payload */
+ for (tp = mp; tp; tp = tp->b_cont) {
+ len = tp->b_wptr - tp->b_rptr;
+ if (len > 0) {
+ bcopy(tp->b_rptr, bp, len);
+ bp += len;
+ }
+ }
+
+ /* clear ethernet pads and additional usb header if we have */
+ last_pos = new->b_wptr;
+ while (bp < last_pos) {
+ *bp++ = 0;
+ }
+
+ return (new);
+}
+
+static void
+upf_dump_packet(struct usbgem_dev *dp, uint8_t *bp, int n)
+{
+ int i;
+
+ for (i = 0; i < n; i += 8, bp += 8) {
+ cmn_err(CE_CONT, "%02x %02x %02x %02x %02x %02x %02x %02x",
+ bp[0], bp[1], bp[2], bp[3], bp[4], bp[5], bp[6], bp[7]);
+ }
+}
+
+static mblk_t *
+upf_rx_make_packet(struct usbgem_dev *dp, mblk_t *mp)
+{
+ uint8_t *p;
+ uint16_t rxhd;
+ uint_t len;
+ uint8_t rsr;
+ struct upf_dev *lp = dp->private;
+
+ ASSERT(mp != NULL);
+
+#ifdef DEBUG_LEVEL
+ len = msgdsize(mp);
+ DPRINTF(2, (CE_CONT, "!%s: time:%d %s: cont:%p",
+ dp->name, ddi_get_lbolt(), __func__, len, mp->b_cont));
+
+ if (upf_debug > 3) {
+ upf_dump_packet(dp, mp->b_rptr, max(6, len));
+ }
+#endif
+ /* get the length of Rx packet */
+ p = mp->b_wptr - 4;
+ rsr = p[3];
+ if (lp->chip_type == CHIP_ADM8513) {
+ /* As Rx packets from ADM8513 have two byte header, remove it */
+ p = mp->b_rptr;
+ len = ((p[1] << 8) | p[0]) & 0x0fff;
+ mp->b_rptr += 2;
+ } else {
+ len = (((p[1] << 8) | p[0]) & 0x0fff) - ETHERFCSL - 4;
+ }
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: rsr:%b len:%d",
+ dp->name, __func__, rsr, RSR_BITS, len));
+
+ /* check if error happen */
+ if (rsr & RSR_ERRORS) {
+ DPRINTF(0, (CE_CONT, "!%s: rsr:%b", dp->name, rsr, RSR_BITS));
+ if (rsr & (RSR_CRC | RSR_DRIBBLE)) {
+ dp->stats.frame++;
+ }
+ if (rsr & RSR_LONG) {
+ dp->stats.frame_too_long++;
+ }
+ if (rsr & RSR_RUNT) {
+ dp->stats.runt++;
+ }
+
+ dp->stats.errrcv++;
+ return (NULL);
+ }
+#ifndef CONFIG_VLAN
+ /* check packet size */
+ if (len > ETHERMAX) {
+ /* too long */
+ dp->stats.frame_too_long++;
+ dp->stats.errrcv++;
+ return (NULL);
+ } else if (len < ETHERMIN) {
+ dp->stats.runt++;
+ dp->stats.errrcv++;
+ return (NULL);
+ }
+#endif
+ /* remove tailing crc and rx status fields */
+ mp->b_wptr = mp->b_rptr + len;
+ ASSERT(mp->b_next == NULL);
+ return (mp);
+}
+
+/*
+ * Device depend interrupt handler
+ */
+static void
+upf_interrupt(struct usbgem_dev *dp, mblk_t *mp)
+{
+ uint8_t *bp;
+ struct upf_dev *lp = dp->private;
+
+ bp = mp->b_rptr;
+
+ DPRINTF(2, (CE_CONT,
+ "!%s: %s: size:%d, %02x %02x %02x %02x %02x %02x %02x %02x",
+ dp->name, __func__, mp->b_wptr - mp->b_rptr,
+ bp[0], bp[1], bp[2], bp[3], bp[4], bp[5], bp[6], bp[7]));
+
+ if ((lp->last_link_state ^ bp[5]) & 1) {
+ DPRINTF(1, (CE_CONT, "!%s:%s link status changed:",
+ dp->name, __func__));
+ usbgem_mii_update_link(dp);
+ }
+
+ lp->last_link_state = bp[5] & 1;
+}
+
+/*
+ * MII Interfaces
+ */
+static uint16_t
+upf_mii_read(struct usbgem_dev *dp, uint_t index, int *errp)
+{
+ uint8_t phyctrl;
+ uint16_t val;
+ int i;
+
+ DPRINTF(4, (CE_CONT, "!%s: %s: called, ix:%d",
+ dp->name, __func__, index));
+ ASSERT(index >= 0 && index < 32);
+
+ *errp = USB_SUCCESS;
+
+ /* set PHYADDR */
+ OUTB(dp, PHYA, dp->mii_phy_addr, errp, usberr);
+
+ /* Initiate MII read transaction */
+ OUTB(dp, PHYAC, index | PHYAC_RDPHY, errp, usberr);
+
+ for (i = 0; i < 100; i++) {
+ INB(dp, PHYAC, &phyctrl, errp, usberr);
+ if (phyctrl & PHYAC_DO) {
+ /* done */
+ INW(dp, PHYD, &val, errp, usberr);
+ DPRINTF(4, (CE_CONT, "!%s: %s: return %04x",
+ dp->name, __func__, val));
+ return (val);
+ }
+ drv_usecwait(10);
+ }
+ /* timeout */
+ cmn_err(CE_WARN, "!%s: %s: timeout detected", dp->name, __func__);
+ *errp = USB_FAILURE;
+ return (0);
+
+usberr:
+ cmn_err(CE_CONT,
+ "!%s: %s: usberr(%d) detected", dp->name, __func__, *errp);
+ return (0);
+}
+
+static void
+upf_mii_write(struct usbgem_dev *dp, uint_t index, uint16_t val, int *errp)
+{
+ int i;
+ uint8_t phyctrl;
+
+ DPRINTF(4, (CE_CONT, "!%s: %s called index:%d val:0x%04x",
+ dp->name, __func__, index, val));
+ ASSERT(index >= 0 && index < 32);
+
+ *errp = USB_SUCCESS;
+
+ OUTW(dp, PHYD, val, errp, usberr);
+ OUTB(dp, PHYA, dp->mii_phy_addr, errp, usberr);
+ OUTB(dp, PHYAC, index | PHYAC_WRPHY, errp, usberr);
+
+ for (i = 0; i < 100; i++) {
+ INB(dp, PHYAC, &phyctrl, errp, usberr);
+ if (phyctrl & PHYAC_DO) {
+ /* done */
+ return;
+ }
+ drv_usecwait(10);
+ }
+
+ /* time out */
+ cmn_err(CE_WARN, "!%s: %s: timeout detected", dp->name, __func__);
+ *errp = USB_FAILURE;
+ return;
+
+usberr:
+ cmn_err(CE_CONT,
+ "!%s: %s: usberr(%d) detected", dp->name, __func__, *errp);
+}
+
+
+static int
+upf_enable_phy(struct usbgem_dev *dp)
+{
+ uint8_t val;
+ int err;
+ struct upf_dev *lp = dp->private;
+
+ /*
+ * first, try to enable internal phy
+ */
+ INB(dp, IPHYC, &val, &err, usberr);
+ val = (val | IPHYC_EPHY) & ~IPHYC_PHYR;
+ OUTB(dp, IPHYC, val, &err, usberr);
+
+ INB(dp, IPHYC, &val, &err, usberr);
+ DPRINTF(0, (CE_CONT, "!%s: %s: IPHYC: %b",
+ dp->name, __func__, val, IPHYC_BITS));
+ if (val) {
+ /* reset internal phy */
+ OUTB(dp, IPHYC, val | IPHYC_PHYR, &err, usberr);
+ OUTB(dp, IPHYC, val, &err, usberr);
+ delay(drv_usectohz(10000));
+
+ /* identify the chip generation */
+ OUTB(dp, 0x83, 0xa5, &err, usberr);
+ INB(dp, 0x83, &val, &err, usberr);
+ if (val == 0xa5) {
+ lp->chip_type = CHIP_ADM8513;
+ } else {
+ /* adm8511 or adm8515 */
+ lp->chip_type = CHIP_ADM8511;
+ }
+ dp->ugc.usbgc_mii_hw_link_detection = B_TRUE;
+ } else {
+ /*
+ * It should be AN986 which doesn't have an internal PHY.
+ * We need to setup gpio ports in AN986, which are
+ * connected to external PHY control pins.
+ */
+ lp->chip_type = CHIP_AN986;
+
+ /* reset external phy */
+ /* output port#0 L, port#1 L */
+ OUTB(dp, GPIO10, GPIO10_0O | GPIO10_0OE, &err, usberr);
+
+ /* output port#0 H, port#1 L */
+ OUTB(dp, GPIO10,
+ GPIO10_0O | GPIO10_0OE | GPIO10_1OE, &err, usberr);
+
+ /* hw link detection doesn't work correctly */
+ dp->ugc.usbgc_mii_hw_link_detection = B_FALSE;
+ }
+
+ return (USB_SUCCESS);
+
+usberr:
+ cmn_err(CE_NOTE, "!%s: %s: usberr detected", dp->name, __func__);
+ return (USB_FAILURE);
+}
+
+static int
+upf_mii_probe(struct usbgem_dev *dp)
+{
+ int err;
+ uint16_t val;
+ struct upf_dev *lp = dp->private;
+
+ if (!lp->phy_init_done) {
+ upf_enable_phy(dp);
+ lp->phy_init_done = B_TRUE;
+ }
+
+ return (usbgem_mii_probe_default(dp));
+}
+
+static int
+upf_mii_init(struct usbgem_dev *dp)
+{
+ uint16_t val;
+ int err = USB_SUCCESS;
+ struct upf_dev *lp = dp->private;
+
+ if (!lp->phy_init_done) {
+ upf_enable_phy(dp);
+ }
+ lp->phy_init_done = B_FALSE;
+
+ if (lp->chip_type == CHIP_AN986 &&
+ (lp->vid == 0x0db7 /* elecom */ ||
+ lp->vid == 0x066b /* linksys */ ||
+ lp->vid == 0x077b /* linksys */ ||
+ lp->vid == 0x2001 /* dlink */)) {
+ /* special treatment for Linksys products */
+ val = upf_mii_read(dp, 0x1b, &err) | 0x4;
+ upf_mii_write(dp, 0x1b, val, &err);
+ }
+ return (err);
+}
+
+/* ======================================================== */
+/*
+ * OS depend (device driver DKI) routine
+ */
+/* ======================================================== */
+static uint16_t
+upf_read_eeprom(struct usbgem_dev *dp, int index, int *errp)
+{
+ int i;
+ uint8_t eectrl;
+ uint16_t data;
+
+ *errp = USB_SUCCESS;
+
+ OUTB(dp, EECTRL, 0, errp, usberr);
+
+ OUTB(dp, EEOFFSET, index, errp, usberr);
+ OUTB(dp, EECTRL, EECTRL_RD, errp, usberr);
+
+ for (i = 0; i < 100; i++) {
+ INB(dp, EECTRL, &eectrl, errp, usberr);
+ if (eectrl & EECTRL_DONE) {
+ INW(dp, EEDATA, &data, errp, usberr);
+ return (data);
+ }
+ drv_usecwait(10);
+ }
+
+ /* time out */
+ *errp = USB_FAILURE;
+ return (0);
+
+usberr:
+ cmn_err(CE_CONT,
+ "!%s: %s: usberr(%d) detected", dp->name, __func__, *errp);
+ return (0);
+}
+
+static void
+upf_eeprom_dump(struct usbgem_dev *dp, int size)
+{
+ int i;
+ int err;
+
+ cmn_err(CE_CONT, "!%s: %s dump:", dp->name, __func__);
+
+ for (i = 0; i < size; i += 4) {
+ cmn_err(CE_CONT, "!0x%02x: 0x%04x 0x%04x 0x%04x 0x%04x",
+ i*2,
+ upf_read_eeprom(dp, i + 0, &err),
+ upf_read_eeprom(dp, i + 1, &err),
+ upf_read_eeprom(dp, i + 2, &err),
+ upf_read_eeprom(dp, i + 3, &err));
+ }
+}
+
+static int
+upf_attach_chip(struct usbgem_dev *dp)
+{
+ int i;
+ int err;
+ uint16_t val;
+ uint8_t *mac;
+ struct upf_dev *lp = dp->private;
+
+ /*
+ * Read mac address from EEPROM
+ */
+ mac = dp->dev_addr.ether_addr_octet;
+ for (i = 0; i < 3; i++) {
+ val = upf_read_eeprom(dp, i, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ mac[i*2+0] = (uint8_t)val;
+ mac[i*2+1] = (uint8_t)(val >> 8);
+ }
+
+ DPRINTF(0, (CE_CONT,
+ "%s: %s: mac: %02x:%02x:%02x:%02x:%02x:%02x",
+ dp->name, __func__,
+ mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]));
+
+ dp->misc_flag = 0;
+#ifdef CONFIG_VLAN
+ dp->misc_flag |= USBGEM_VLAN;
+#endif
+#if DEBUG_LEVEL > 3
+ upf_eeprom_dump(dp, 0x80);
+#endif
+ return (USB_SUCCESS);
+
+usberr:
+ cmn_err(CE_WARN, "!%s: %s: usb error detected", dp->name, __func__);
+ return (USB_FAILURE);
+}
+
+static int
+upfattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int i;
+ ddi_iblock_cookie_t c;
+ int ret;
+ int unit;
+ uint32_t tcr;
+ int len;
+ const char *drv_name;
+ struct usbgem_dev *dp;
+ void *base;
+ struct usbgem_conf *ugcp;
+ struct upf_dev *lp;
+
+ unit = ddi_get_instance(dip);
+ drv_name = ddi_driver_name(dip);
+
+ DPRINTF(3, (CE_CONT, "!%s%d: %s: called, cmd:%d",
+ drv_name, unit, __func__, cmd));
+
+ if (cmd == DDI_ATTACH) {
+ /*
+ * construct usbgem configration
+ */
+ ugcp = kmem_zalloc(sizeof (*ugcp), KM_SLEEP);
+
+ /* name */
+ sprintf(ugcp->usbgc_name, "%s%d", drv_name, unit);
+ ugcp->usbgc_ppa = unit;
+
+ ugcp->usbgc_ifnum = 0;
+ ugcp->usbgc_alt = 0;
+
+ ugcp->usbgc_tx_list_max = 16;
+
+ ugcp->usbgc_rx_header_len = 4;
+ ugcp->usbgc_rx_list_max = 64;
+
+ /* time out parameters */
+ ugcp->usbgc_tx_timeout = USBGEM_TX_TIMEOUT;
+ ugcp->usbgc_tx_timeout_interval = USBGEM_TX_TIMEOUT_INTERVAL;
+
+ /* flow control */
+ ugcp->usbgc_flow_control = FLOW_CONTROL_NONE;
+ ugcp->usbgc_flow_control = FLOW_CONTROL_RX_PAUSE;
+
+ /* MII timeout parameters */
+ ugcp->usbgc_mii_link_watch_interval = ONESEC;
+ ugcp->usbgc_mii_an_watch_interval = ONESEC/5;
+ ugcp->usbgc_mii_reset_timeout = MII_RESET_TIMEOUT; /* 1 sec */
+ ugcp->usbgc_mii_an_timeout = MII_AN_TIMEOUT; /* 5 sec */
+ ugcp->usbgc_mii_an_wait = MII_AN_TIMEOUT/2;
+ ugcp->usbgc_mii_linkdown_timeout = MII_LINKDOWN_TIMEOUT;
+ ugcp->usbgc_mii_an_delay = ONESEC/10;
+
+ ugcp->usbgc_mii_linkdown_action = MII_ACTION_RESET;
+ ugcp->usbgc_mii_linkdown_timeout_action = MII_ACTION_RESET;
+ ugcp->usbgc_mii_dont_reset = B_FALSE;
+
+ /* I/O methods */
+
+ /* mac operation */
+ ugcp->usbgc_attach_chip = &upf_attach_chip;
+ ugcp->usbgc_reset_chip = &upf_reset_chip;
+ ugcp->usbgc_init_chip = &upf_init_chip;
+ ugcp->usbgc_start_chip = &upf_start_chip;
+ ugcp->usbgc_stop_chip = &upf_stop_chip;
+ ugcp->usbgc_multicast_hash = &upf_mcast_hash;
+
+ ugcp->usbgc_set_rx_filter = &upf_set_rx_filter;
+ ugcp->usbgc_set_media = &upf_set_media;
+ ugcp->usbgc_get_stats = &upf_get_stats;
+ ugcp->usbgc_interrupt = &upf_interrupt;
+
+ /* packet operation */
+ ugcp->usbgc_tx_make_packet = &upf_tx_make_packet;
+ ugcp->usbgc_rx_make_packet = &upf_rx_make_packet;
+
+ /* mii operations */
+ ugcp->usbgc_mii_probe = &upf_mii_probe;
+ ugcp->usbgc_mii_init = &upf_mii_init;
+ ugcp->usbgc_mii_config = &usbgem_mii_config_default;
+ ugcp->usbgc_mii_read = &upf_mii_read;
+ ugcp->usbgc_mii_write = &upf_mii_write;
+
+ /* mtu */
+ ugcp->usbgc_min_mtu = ETHERMTU;
+ ugcp->usbgc_max_mtu = ETHERMTU;
+ ugcp->usbgc_default_mtu = ETHERMTU;
+
+ lp = kmem_zalloc(sizeof (struct upf_dev), KM_SLEEP);
+
+ lp->vid = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
+ DDI_PROP_DONTPASS, "usb-vendor-id", -1);
+ lp->pid = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
+ DDI_PROP_DONTPASS, "usb-product-id", -1);
+
+ dp = usbgem_do_attach(dip, ugcp, lp, sizeof (struct upf_dev));
+
+ kmem_free(ugcp, sizeof (*ugcp));
+
+ if (dp != NULL) {
+ return (DDI_SUCCESS);
+ }
+
+err_free_mem:
+ kmem_free(lp, sizeof (struct upf_dev));
+err_close_pipe:
+err:
+ return (DDI_FAILURE);
+ }
+ if (cmd == DDI_RESUME) {
+ dp = USBGEM_GET_DEV(dip);
+ lp = dp->private;
+ lp->phy_init_done = B_FALSE;
+
+ return (usbgem_resume(dip));
+ }
+ return (DDI_FAILURE);
+}
+
+static int
+upfdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ int ret;
+
+ if (cmd == DDI_DETACH) {
+ ret = usbgem_do_detach(dip);
+ if (ret != DDI_SUCCESS) {
+ return (DDI_FAILURE);
+ }
+ return (DDI_SUCCESS);
+ }
+ if (cmd == DDI_SUSPEND) {
+ return (usbgem_suspend(dip));
+ }
+ return (DDI_FAILURE);
+}
+
+/* ======================================================== */
+/*
+ * OS depend (loadable streams driver) routine
+ */
+/* ======================================================== */
+#ifdef USBGEM_CONFIG_GLDv3
+USBGEM_STREAM_OPS(upf_ops, upfattach, upfdetach);
+#else
+static struct module_info upfminfo = {
+ 0, /* mi_idnum */
+ "upf", /* mi_idname */
+ 0, /* mi_minpsz */
+ ETHERMTU, /* mi_maxpsz */
+ 32*1024, /* mi_hiwat */
+ 1, /* mi_lowat */
+};
+
+static struct qinit upfrinit = {
+ (int (*)()) NULL, /* qi_putp */
+ usbgem_rsrv, /* qi_srvp */
+ usbgem_open, /* qi_qopen */
+ usbgem_close, /* qi_qclose */
+ (int (*)()) NULL, /* qi_qadmin */
+ &upfminfo, /* qi_minfo */
+ NULL /* qi_mstat */
+};
+
+static struct qinit upfwinit = {
+ usbgem_wput, /* qi_putp */
+ usbgem_wsrv, /* qi_srvp */
+ (int (*)()) NULL, /* qi_qopen */
+ (int (*)()) NULL, /* qi_qclose */
+ (int (*)()) NULL, /* qi_qadmin */
+ &upfminfo, /* qi_minfo */
+ NULL /* qi_mstat */
+};
+
+static struct streamtab upf_info = {
+ &upfrinit, /* st_rdinit */
+ &upfwinit, /* st_wrinit */
+ NULL, /* st_muxrinit */
+ NULL /* st_muxwrinit */
+};
+
+static struct cb_ops cb_upf_ops = {
+ nulldev, /* cb_open */
+ nulldev, /* cb_close */
+ nodev, /* cb_strategy */
+ nodev, /* cb_print */
+ nodev, /* cb_dump */
+ nodev, /* cb_read */
+ nodev, /* cb_write */
+ nodev, /* cb_ioctl */
+ nodev, /* cb_devmap */
+ nodev, /* cb_mmap */
+ nodev, /* cb_segmap */
+ nochpoll, /* cb_chpoll */
+ ddi_prop_op, /* cb_prop_op */
+ &upf_info, /* cb_stream */
+ D_MP /* cb_flag */
+};
+
+static struct dev_ops upf_ops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* devo_refcnt */
+ usbgem_getinfo, /* devo_getinfo */
+ nulldev, /* devo_identify */
+ nulldev, /* devo_probe */
+ upfattach, /* devo_attach */
+ upfdetach, /* devo_detach */
+ nodev, /* devo_reset */
+ &cb_upf_ops, /* devo_cb_ops */
+ NULL, /* devo_bus_ops */
+ usbgem_power, /* devo_power */
+#if DEVO_REV >= 4
+ usbgem_quiesce, /* devo_quiesce */
+#endif
+
+};
+#endif
+static struct modldrv modldrv = {
+ &mod_driverops, /* Type of module. This one is a driver */
+ ident,
+ &upf_ops, /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, &modldrv, NULL
+};
+
+/* ======================================================== */
+/*
+ * _init : done
+ */
+/* ======================================================== */
+int
+_init(void)
+{
+ int status;
+
+ DPRINTF(2, (CE_CONT, "!upf: _init: called"));
+
+ status = usbgem_mod_init(&upf_ops, "upf");
+ if (status != DDI_SUCCESS) {
+ return (status);
+ }
+ status = mod_install(&modlinkage);
+ if (status != DDI_SUCCESS) {
+ usbgem_mod_fini(&upf_ops);
+ }
+ return (status);
+}
+
+/*
+ * _fini : done
+ */
+int
+_fini(void)
+{
+ int status;
+
+ DPRINTF(2, (CE_CONT, "!upf: _fini: called"));
+ status = mod_remove(&modlinkage);
+ if (status == DDI_SUCCESS) {
+ usbgem_mod_fini(&upf_ops);
+ }
+ return (status);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
diff --git a/usr/src/uts/common/io/urf/rtl8150reg.h b/usr/src/uts/common/io/urf/rtl8150reg.h
new file mode 100644
index 0000000000..7cba53356e
--- /dev/null
+++ b/usr/src/uts/common/io/urf/rtl8150reg.h
@@ -0,0 +1,218 @@
+/*
+ * @(#)rtl8150reg.h 1.1 04/09/16
+ * Macro definitions for Realtek 8150 USB to fast ethernet controller
+ * based on Realtek RTL8150 data sheet
+ * This file is public domain. Coded by M.Murayama (KHF04453@nifty.com)
+ */
+
+/*
+ * Register offset
+ */
+#define IDR 0x0120 /* Base of ID registers */
+#define MAR 0x0126 /* Base of multicast registers */
+#define CR 0x012e /* Command register */
+#define TCR 0x012f /* Transmit Configuration register */
+#define RCR 0x0130 /* Receive Configuration register */
+#define TSR 0x0132 /* Transmit Status register */
+#define RSR 0x0133 /* Receive Status register */
+#define CON0 0x0135 /* Configuration register 0 */
+#define CON1 0x0136 /* Configuration register 1 */
+#define MSR 0x0137 /* Media Status register */
+#define PHYADD 0x0138 /* PHY address register */
+#define PHYDAT 0x0139 /* PHY data register */
+#define PHYCNT 0x013b /* PHY control register */
+#define GPPC 0x013d /* General purpose pin control */
+#define WAKECNT 0x013e /* Wake up event control */
+#define BMCR 0x0140 /* Basic Mode Control register */
+#define BMSR 0x0142 /* Basic Mode Status register */
+#define ANAR 0x0144 /* Auto Negotiation Advertisement register */
+#define ANLP 0x0146 /* Auto Negotiation Link Partner register */
+#define ANER 0x0148 /* Auto Negotiation Expansion register */
+#define NWAYT 0x014a /* Nway test register */
+#define CSCR 0x014c /* CS configuration register */
+#define CRC0 0x014e /* Power management register for wakeup frame0 */
+#define CRC1 0x0150 /* Power management register for wakeup frame1 */
+#define CRC2 0x0152 /* Power management register for wakeup frame2 */
+#define CRC3 0x0154 /* Power management register for wakeup frame3 */
+#define CRC4 0x0156 /* Power management register for wakeup frame4 */
+#define BYTEMASK0 0x0158 /* Power management wakeup frame0 bytemask */
+#define BYTEMASK1 0x0160 /* Power management wakeup frame1 bytemask */
+#define BYTEMASK2 0x0168 /* Power management wakeup frame2 bytemask */
+#define BYTEMASK3 0x0170 /* Power management wakeup frame3 bytemask */
+#define BYTEMASK4 0x0178 /* Power management wakeup frame4 bytemask */
+#define PHY1 0x0180 /* PHY parameter 1 */
+#define PHY2 0x0184 /* PHY parameter 2 */
+#define TW1 0x0186 /* Twister parameter 1 */
+
+/*
+ * Bit field definitions
+ */
+/* CR : Command register (uint8_t) */
+#define CR_WEPROM 0x20 /* EEPROM write enable */
+#define CR_SOFT_RST 0x10 /* Reset */
+#define CR_RE 0x08 /* Ethernet receive enable */
+#define CR_TE 0x04 /* Ethernet transmit enable */
+#define CR_EP3CLREN 0x02 /* clear performance counter after EP3 */
+#define CR_AUTOLOAD 0x01 /* autoload contents of 93c46 */
+
+#define CR_BITS "\020\006WEPROM\005SOFT_RST\004RE\003TE\002EP3CLREN\001AUTOLOAD"
+
+/* TCR: Transmit Configuration register */
+#define TCR_TXRR 0xc0 /* Tx retry count */
+#define TCR_TXRR_SHIFT 6
+#define TCR_IFG 0x18 /* Interframe Gap */
+#define TCR_IFG_SHIFT 3
+#define TCR_IFG_802_3 (3 << TCR_IFG_SHIFT) /* 802.3 standard */
+#define TCR_NOCRC 0x01 /* Inhibit Appending CRC */
+
+#define TCR_BITS "\020\001NOCRC"
+
+/* Receive Configuration register */
+#define RCR_TAIL 0x0080 /* Rx header forward to host in CRC field */
+#define RCR_AER 0x0040 /* Accept Error packet */
+#define RCR_AR 0x0020 /* Accept runt */
+#define RCR_AM 0x0010 /* Accept multicast */
+#define RCR_AB 0x0008 /* Accept broadcast */
+#define RCR_AD 0x0004 /* Accept physical match */
+#define RCR_AAM 0x0002 /* Accept all Multicast */
+#define RCR_AAP 0x0001 /* Accept all physical */
+
+#define RCR_ACCEPT_MODE \
+ (RCR_AER | RCR_AR | RCR_AM | RCR_AB | RCR_AD | RCR_AAM | RCR_AAP)
+
+#define RCR_BITS \
+ "\020\010TAIL\007AER\006AR\005AM\004AB\003AD\002AAM\001AAP"
+
+/* Transmit Status register */
+
+#define TSR_ECOL 0x20 /* excessive collision indication */
+#define TSR_LCOL 0x10 /* late collision indication */
+#define TSR_LOSS_CRS 0x08 /* lost of carrier indication */
+#define TSR_JBR 0x04 /* jabber time out indication */
+#define TSR_BUF_EMPTY 0x02 /* Tx buffer is empty */
+#define TSR_BUF_FULL 0x01 /* Tx buffer is full */
+
+#define TSR_BITS \
+ "\020" \
+ "\006ECOL" \
+ "\005LCOL" \
+ "\004LOSS_CRS" \
+ "\003JBR" \
+ "\002BUF_EMPTY" \
+ "\001BUF_FULL"
+
+/* Receive status register in Rx packet field */
+#define RSR_WEVENT 0x80 /* Wakeup event indication */
+#define RSR_RX_BUF_FULL 0x40 /* Receive buffer full indication */
+#define RSR_LKCHG 0x20 /* Link change indication */
+#define RSR_RUNT 0x10 /* short packet indication */
+#define RSR_LONG 0x08 /* Long packet indication*/
+#define RSR_CRC 0x04 /* CRC error indication*/
+#define RSR_FAE 0x02 /* Frame alignment error */
+#define RSR_ROK 0x01 /* Receive OK indication */
+
+#define RSR_ERRS (RSR_RUNT | RSR_LONG | RSR_CRC | RSR_FAE)
+#define RSR_BITS \
+ "\020" \
+ "\010WEVENT" \
+ "\007RX_BUF_FULL" \
+ "\006LKCHG" \
+ "\005RUNT" \
+ "\004LONG" \
+ "\003CRC" \
+ "\002FAE" \
+ "\001ROK"
+
+/* Config 0 */
+
+#define CON0_SUSLED 0x80
+#define CON0_PARM_EN 0x40 /* parameter enable */
+#define CON0_LDPS 0x08
+#define CON0_MSEL 0x04 /* media select 1:MII, 0:auto */
+#define CON0_LEDS 0x03 /* LED pattern */
+
+/* Config 1 */
+#define CON0_BWF 0x40 /* Broadcast wakeup function 1:on 0:off */
+#define CON0_MWF 0x20 /* Multicast wakeup function 1:on 0:off */
+#define CON0_UWF 0x10 /* Unicast wakeup function 1:on 0:off */
+#define CON0_LONGWF1 0x02 /* */
+#define CON0_LONGWF0 0x01 /* */
+
+
+/* MSR : Media Status register */
+#define MSR_TXFCE 0x80 /* Tx Flow control enable */
+#define MSR_RXFCE 0x40 /* Rx Flow control enable */
+#define MSR_DUPLEX 0x10 /* full duplex */
+#define MSR_SPEED_100 0x08 /* 100Mbps mode */
+#define MSR_LINK 0x04 /* link status */
+#define MSR_TXPF 0x02 /* 8150 sends pause packet */
+#define MSR_RXPF 0x01 /* 8150 is in backoff state*/
+
+#define MSR_BITS \
+ "\020" \
+ "\010TXFCE" \
+ "\007RXFCE" \
+ "\005DUPLEX" \
+ "\004SPEED_100" \
+ "\003LINK" \
+ "\002TXPF" \
+ "\001RXPF"
+
+/* MII PHY Address */
+#define PHYADD_MASK 0x1f
+
+/* MII PHY Data */
+#define PHYCNT_OWN 0x40 /* 8150 owns:1 not owns:0 */
+#define PHYCNT_RWCR 0x20 /* write:1 read:0 */
+#define PHYCNT_PHYOFF 0x1f
+
+/* BMCR (almost same with MII_CONTROL register) */
+#define BMCR_RESET 0x8000 /* PHY reset */
+#define BMCR_Spd_Set 0x2000 /* 100Mbps */
+#define BMCR_ANE 0x1000 /* auto negotiation enable */
+#define BMCR_RSA 0x0200 /* restart auto negotiation */
+#define BMCR_duplex 0x0100 /* 100Mbps */
+
+/* Basic mode status register */
+/* Auto-negotiation Advertisement register */
+/* Auto-negotiation Link Partner Ability register */
+/* Auto-negotiation Expansion register */
+
+/* Nway test register */
+#define NWAYT_NWLPBK 0x0080
+#define NWAYT_ENNWLE 0x0008
+#define NWAYT_FLAGABD 0x0004
+#define NWAYT_FLAGPDF 0x0002
+#define NWAYT_FLAGLSC 0x0001
+
+/* CS configuration register */
+#define CS_TESTFUN 0x8000 /* */
+#define CS_LD 0x0200 /* */
+#define CS_HEARTBEAT 0x0100 /* */
+#define CS_JBEN 0x0080 /* */
+#define CS_F_LINK100 0x0040 /* */
+#define CS_F_CONNECT 0x0020 /* */
+#define CS_CON_STATUS 0x0008 /* */
+#define CS_CON_STATUS_EN 0x0004 /* */
+#define CS_PASS_SCR 0x0001 /* bypass scramble function */
+
+/*
+ * header format of rx packet
+ */
+#define RXHD_MULT 0x8000 /* multicast packet */
+#define RXHD_PHYS 0x4000 /* physical match packet */
+#define RXHD_RUNT 0x2000 /* too short */
+#define RXHD_VALID 0x1000 /* packet is ok */
+#define RXHD_BYTECNT 0x0fff /* rx byte count */
+
+#define RXHD_BITS \
+ "\020" \
+ "\020MULT" \
+ "\017PHYS" \
+ "\016RUNT" \
+ "\015VALID"
+/*
+ * Offset to EPROM contents
+ */
+#define URF_EEPROM_BASE 0x1200
+#define EPROM_EthernetID 0x0002
diff --git a/usr/src/uts/common/io/urf/urf_usbgem.c b/usr/src/uts/common/io/urf/urf_usbgem.c
new file mode 100644
index 0000000000..f61c8e3502
--- /dev/null
+++ b/usr/src/uts/common/io/urf/urf_usbgem.c
@@ -0,0 +1,1039 @@
+/*
+ * urf_usbgem.c : Realtek RTL8150 USB to Fast Ethernet Driver for Solaris
+ *
+ * Copyright (c) 2003-2012 Masayuki Murayama. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the author nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+#pragma ident "%W% %E%"
+
+/*
+ * Changelog:
+ */
+
+/*
+ * TODO
+ */
+/* ======================================================= */
+
+/*
+ * Solaris system header files and macros
+ */
+
+/* minimum kernel headers for drivers */
+#include <sys/types.h>
+#include <sys/conf.h>
+#include <sys/debug.h>
+#include <sys/kmem.h>
+#include <sys/modctl.h>
+#include <sys/errno.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/byteorder.h>
+
+/* ethernet stuff */
+#include <sys/ethernet.h>
+
+/* interface card depend stuff */
+#include <sys/stropts.h>
+#include <sys/stream.h>
+#include <sys/strlog.h>
+#include <sys/usb/usba.h>
+#include "usbgem.h"
+#include "usbgem_mii.h"
+#include "rtl8150reg.h"
+
+char ident[] = "rtl8150 usbnic driver v" VERSION;
+
+/*
+ * Useful macros
+ */
+#define ROUNDUP2(x, y) (((x)+(y)-1) & ~((y)-1))
+#define CHECK_AND_JUMP(err, label) if (err != USB_SUCCESS) goto label
+
+/*
+ * Debugging
+ */
+#ifdef DEBUG_LEVEL
+static int urf_debug = DEBUG_LEVEL;
+#define DPRINTF(n, args) if (urf_debug > (n)) cmn_err args
+#else
+#define DPRINTF(n, args)
+#endif
+
+/*
+ * Our configration for rtl8150
+ */
+/* timeouts */
+#define ONESEC (drv_usectohz(1*1000000))
+
+/*
+ * Local device definitions
+ */
+struct chip_info {
+ int flags;
+ char *name;
+ int type;
+};
+
+#define CHIPTABLESIZE (sizeof (chiptbl_8150) / sizeof (struct chip_info))
+
+struct urf_dev {
+ /*
+ * Misc HW information
+ */
+ struct chip_info *chip;
+ uint8_t cr;
+ uint8_t tsr;
+ uint16_t rcr;
+ uint8_t txok_cnt;
+};
+
+/*
+ * private functions
+ */
+
+/* mii operations */
+static uint16_t urf_mii_read(struct usbgem_dev *, uint_t, int *errp);
+static void urf_mii_write(struct usbgem_dev *, uint_t, uint16_t, int *errp);
+
+/* nic operations */
+static int urf_attach_chip(struct usbgem_dev *);
+static int urf_reset_chip(struct usbgem_dev *);
+static int urf_init_chip(struct usbgem_dev *);
+static int urf_start_chip(struct usbgem_dev *);
+static int urf_stop_chip(struct usbgem_dev *);
+static int urf_set_media(struct usbgem_dev *);
+static int urf_set_rx_filter(struct usbgem_dev *);
+static int urf_get_stats(struct usbgem_dev *);
+
+/* packet operations */
+static mblk_t *urf_tx_make_packet(struct usbgem_dev *, mblk_t *);
+static mblk_t *urf_rx_make_packet(struct usbgem_dev *, mblk_t *);
+
+/* =============================================================== */
+/*
+ * I/O functions
+ */
+/* =============================================================== */
+#define OUTB(dp, p, v, errp, label) \
+ if ((*(errp) = usbgem_ctrl_out_val((dp), \
+ /* bmRequestType */ USB_DEV_REQ_HOST_TO_DEV \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ USB_REQ_SET_ADDRESS, \
+ /* wValue */ (p), \
+ /* wIndex */ 0, \
+ /* wLength */ 1, \
+ /* value */ (v))) != USB_SUCCESS) goto label
+
+#define OUTW(dp, p, v, errp, label) \
+ if ((*(errp) = usbgem_ctrl_out_val((dp), \
+ /* bmRequestType */ USB_DEV_REQ_HOST_TO_DEV \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ USB_REQ_SET_ADDRESS, \
+ /* wValue */ (p), \
+ /* wIndex */ 0, \
+ /* wLength */ 2, \
+ /* value */ (v))) != USB_SUCCESS) goto label
+
+#define OUTS(dp, p, buf, len, errp, label) \
+ if ((*(errp) = usbgem_ctrl_out((dp), \
+ /* bmRequestType */ USB_DEV_REQ_HOST_TO_DEV \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ USB_REQ_SET_ADDRESS, \
+ /* wValue */ (p), \
+ /* wIndex */ 0, \
+ /* wLength */ (len), \
+ /* value */ (buf), \
+ /* size */ (len))) != USB_SUCCESS) goto label
+
+#define IN(dp, p, vp, errp, label) \
+ if ((*(errp) = usbgem_ctrl_in_val((dp), \
+ /* bmRequestType */ USB_DEV_REQ_DEV_TO_HOST \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ USB_REQ_SET_ADDRESS, \
+ /* wValue */ (p), \
+ /* wIndex */ 0, \
+ /* wLength */ sizeof ((*vp)), \
+ /* valuep */ (vp))) != USB_SUCCESS) goto label
+
+#define INS(dp, p, buf, len, errp, label) \
+ if ((*(errp) = usbgem_ctrl_in((dp), \
+ /* bmRequestType */ USB_DEV_REQ_DEV_TO_HOST \
+ | USB_DEV_REQ_TYPE_VENDOR | USB_DEV_REQ_RCPT_DEV, \
+ /* bRequest */ USB_REQ_SET_ADDRESS, \
+ /* wValue */ (p), \
+ /* wIndex */ 0, \
+ /* wLength */ (len), \
+ /* valuep */ (buf), \
+ /* size */ (len))) != USB_SUCCESS) goto label
+
+/* =============================================================== */
+/*
+ * variables
+ */
+/* =============================================================== */
+static int urf_ppa = 0;
+
+/* =============================================================== */
+/*
+ * Hardware manupilation
+ */
+/* =============================================================== */
+static int
+urf_reset_chip(struct usbgem_dev *dp)
+{
+ int i;
+ int err;
+ uint8_t reg;
+ struct urf_dev *lp = dp->private;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ lp->cr = 0;
+ OUTB(dp, CR, lp->cr | CR_SOFT_RST, &err, usberr);
+
+ for (i = 0; i < 100; i++) {
+ IN(dp, CR, &reg, &err, usberr);
+ if ((reg & CR_SOFT_RST) == 0) {
+ return (USB_SUCCESS);
+ }
+ }
+ /* time out */
+ cmn_err(CE_WARN, "%s: failed to reset: timeout", dp->name);
+ return (USB_FAILURE);
+
+usberr:
+ cmn_err(CE_NOTE, "!%s: %s: usberr detected", dp->name, __func__);
+ return (USB_FAILURE);
+}
+
+/*
+ * Setup rtl8150
+ */
+static int
+urf_init_chip(struct usbgem_dev *dp)
+{
+ int i;
+ uint32_t val;
+ int err;
+ struct urf_dev *lp = dp->private;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /* ID registers: set later by urf_set_rx_filter */
+
+ /* Multicast registers: set later by urf_set_rx_filter */
+
+ /* Command register : Enable Tx and Rx before writing TCR and RCR */
+ lp->cr |= CR_RE | CR_TE;
+ OUTB(dp, CR, lp->cr, &err, usberr);
+
+ /* Transmit configration register : */
+ OUTB(dp, TCR, TCR_IFG_802_3, &err, usberr);
+
+ /* Receive configuration register : disable rx filter */
+ lp->rcr = RCR_TAIL | RCR_AER | RCR_AR;
+ OUTW(dp, RCR, lp->rcr, &err, usberr);
+#ifdef notdef
+ /* Media status register */
+ err = urf_set_media(dp);
+ CHECK_AND_JUMP(err, usberr);
+#endif
+ /* Configuration register 0: no need to change */
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: end (success)", dp->name, __func__));
+ return (USB_SUCCESS);
+
+usberr:
+ cmn_err(CE_NOTE, "!%s: %s: usberr detected", dp->name, __func__);
+ return (USB_FAILURE);
+}
+
+static int
+urf_start_chip(struct usbgem_dev *dp)
+{
+ struct urf_dev *lp = dp->private;
+
+ /* do nothing */
+ return (USB_SUCCESS);
+}
+
+static int
+urf_stop_chip(struct usbgem_dev *dp)
+{
+ return (urf_reset_chip(dp));
+}
+
+static int
+urf_get_stats(struct usbgem_dev *dp)
+{
+ /* do nothing */
+ return (USB_SUCCESS);
+}
+
+static uint_t
+urf_mcast_hash(struct usbgem_dev *dp, const uint8_t *addr)
+{
+ return (usbgem_ether_crc_be(addr));
+}
+
+static int
+urf_set_rx_filter(struct usbgem_dev *dp)
+{
+ int i;
+ uint16_t mode;
+ uint8_t mhash[8];
+ int err;
+ int16_t rcr;
+ struct urf_dev *lp = dp->private;
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: called, rxmode:%x",
+ dp->name, __func__, dp->rxmode));
+
+ if (lp->rcr & (RCR_AB | RCR_AD | RCR_AAM | RCR_AAP | RCR_AM)) {
+#ifdef notdef
+ /* disable rx filter before changing it. */
+ lp->rcr &= ~(RCR_AB | RCR_AD | RCR_AAM | RCR_AAP | RCR_AM);
+ OUTW(dp, RCR, lp->rcr, &err, usberr);
+#else
+ /* receive all packets while we change rx filter*/
+ OUTW(dp, RCR, lp->rcr | RCR_AAM | RCR_AAP, &err, usberr);
+#endif
+ }
+
+ mode = RCR_AB /* accept broadcast */
+ | RCR_AD; /* accept physical match */
+ bzero(mhash, sizeof (mhash));
+
+ if (dp->rxmode & RXMODE_PROMISC) {
+ /* promiscious mode implies all multicast and all physical */
+ mode |= RCR_AAM | RCR_AAP;
+ } else if ((dp->rxmode & RXMODE_ALLMULTI) || dp->mc_count > 64/2) {
+ /* accept all multicast packets */
+ mode |= RCR_AAM;
+ } else if (dp->mc_count > 0) {
+ /*
+ * make hash table to select interresting
+ * multicast address only.
+ */
+ mode |= RCR_AM;
+ for (i = 0; i < dp->mc_count; i++) {
+ uint_t h;
+ /* hash table is 64 = 2^6 bit width */
+ h = dp->mc_list[i].hash >> (32 - 6);
+ mhash[h / 8] |= 1 << (h % 8);
+ }
+ }
+ lp->rcr |= mode;
+
+ /* set mac address */
+ OUTS(dp, IDR, dp->cur_addr.ether_addr_octet, ETHERADDRL, &err, usberr);
+
+ /* set multicast hash table */
+ if (mode & RCR_AM) {
+ /* need to set up multicast hash table */
+ OUTS(dp, MAR, mhash, sizeof (mhash), &err, usberr);
+ }
+
+ OUTW(dp, RCR, lp->rcr, &err, usberr);
+
+#if DEBUG_LEVEL > 2
+ IN(dp, RCR, &rcr, &err, usberr);
+ cmn_err(CE_CONT, "!%s: %s: rcr:%b returned",
+ dp->name, __func__, rcr, RCR_BITS);
+#endif
+ return (USB_SUCCESS);
+
+usberr:
+ cmn_err(CE_NOTE, "!%s: %s: usberr detected", dp->name, __func__);
+ return (USB_FAILURE);
+}
+
+static int
+urf_set_media(struct usbgem_dev *dp)
+{
+ uint8_t new;
+ uint8_t old;
+ int err;
+ struct urf_dev *lp = dp->private;
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /* select duplex: do nothing */
+
+ /* select speed: do nothing */
+
+ /* flow control */
+ IN(dp, MSR, &old, &err, usberr);
+
+
+ /* setup flow control */
+ new = old & ~(MSR_TXFCE | MSR_RXFCE);
+ switch (dp->flow_control) {
+ case FLOW_CONTROL_SYMMETRIC:
+ new |= MSR_TXFCE | MSR_RXFCE;
+ break;
+
+ case FLOW_CONTROL_TX_PAUSE:
+ new |= MSR_TXFCE;
+ break;
+
+ case FLOW_CONTROL_RX_PAUSE:
+ new |= MSR_RXFCE;
+ break;
+
+ case FLOW_CONTROL_NONE:
+ default:
+ break;
+ }
+
+ if (new != old) {
+ OUTB(dp, MSR, new, &err, usberr);
+ }
+ DPRINTF(2, (CE_CONT, "!%s: %s: returned", dp->name, __func__));
+ return (USB_SUCCESS);
+
+usberr:
+ cmn_err(CE_NOTE, "!%s: %s: usberr detected", dp->name, __func__);
+ return (USB_FAILURE);
+}
+
+/*
+ * send/receive packet check
+ */
+static mblk_t *
+urf_tx_make_packet(struct usbgem_dev *dp, mblk_t *mp)
+{
+ size_t len;
+ mblk_t *new;
+ mblk_t *tp;
+ uint8_t *bp;
+ uint8_t *last_pos;
+
+ len = msgdsize(mp);
+
+ if (len < ETHERMIN || mp->b_cont != NULL || (len & 0x3f) == 0) {
+ /*
+ * re-allocate mp
+ */
+ len = max(len, ETHERMIN);
+
+ if ((len & 0x3f) == 0) {
+ /* workaround for buggy USB hba */
+ len++;
+ }
+
+ if ((new = allocb(len, 0)) == NULL) {
+ return (NULL);
+ }
+
+ /* copy contents of the buffer */
+ new->b_wptr = new->b_rptr + len;
+ bp = new->b_rptr;
+ for (tp = mp; tp; tp = tp->b_cont) {
+ len = tp->b_wptr - tp->b_rptr;
+ bcopy(tp->b_rptr, bp, len);
+ bp += len;
+ }
+
+ last_pos = new->b_wptr;
+ while (bp < last_pos) {
+ *bp++ = 0;
+ }
+
+ mp = new;
+ }
+
+ return (mp);
+}
+
+static void
+urf_dump_packet(struct usbgem_dev *dp, uint8_t *bp, int n)
+{
+ int i;
+
+ for (i = 0; i < n; i += 8, bp += 8) {
+ cmn_err(CE_CONT, "%02x %02x %02x %02x %02x %02x %02x %02x",
+ bp[0], bp[1], bp[2], bp[3], bp[4], bp[5], bp[6], bp[7]);
+ }
+}
+
+static mblk_t *
+urf_rx_make_packet(struct usbgem_dev *dp, mblk_t *mp)
+{
+ uint8_t *p;
+ uint16_t rxhd;
+ uint_t len;
+
+ ASSERT(mp != NULL);
+ len = msgdsize(mp);
+#ifdef DEBUG_LEVEL
+ DPRINTF(2, (CE_CONT, "!%s: time:%d %s: len:%d cont:%p",
+ dp->name, ddi_get_lbolt(), __func__, len, mp->b_cont));
+
+ if (urf_debug > 2) {
+ urf_dump_packet(dp, mp->b_rptr, max(6, len));
+ }
+#endif
+ if (len < ETHERMIN + ETHERFCSL) {
+ /* Too short */
+ dp->stats.runt++;
+ dp->stats.errrcv++;
+ return (NULL);
+ }
+
+ /* get Rx header which is placed at tail of the packet. */
+ p = mp->b_wptr - 4;
+ rxhd = (p[1] << 8) | p[0];
+ len = rxhd & RXHD_BYTECNT;
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: rsr:%b len:%d",
+ dp->name, __func__, rxhd, RXHD_BITS, len));
+
+ /* check if error happen */
+ if ((rxhd & (RXHD_VALID)) == 0) {
+ DPRINTF(-1, (CE_CONT, "!%s: %s: rxhd:%b",
+ dp->name, __func__, rxhd, RXHD_BITS));
+ if (rxhd & RXHD_RUNT) {
+ dp->stats.runt++;
+ }
+
+ dp->stats.errrcv++;
+ return (NULL);
+ }
+#ifdef notdef
+ /* check packet size */
+ if (len > ETHERMAX + ETHERFCSL) {
+ /* too long */
+ dp->stats.frame_too_long++;
+ dp->stats.errrcv++;
+ return (NULL);
+ } else if (len < ETHERMIN + ETHERFCSL) {
+ dp->stats.runt++;
+ dp->stats.errrcv++;
+ return (NULL);
+ }
+#endif
+ /* remove tailing crc field */
+ mp->b_wptr -= ETHERFCSL;
+ return (mp);
+}
+
+/*
+ * MII Interfaces
+ */
+static uint16_t
+urf_mii_read(struct usbgem_dev *dp, uint_t index, int *errp)
+{
+ int reg;
+ uint16_t val;
+
+ DPRINTF(4, (CE_CONT, "!%s: %s: called, ix:%d",
+ dp->name, __func__, index));
+
+ *errp = USB_SUCCESS;
+
+ switch (index) {
+ case MII_CONTROL:
+ reg = BMCR;
+ break;
+
+ case MII_STATUS:
+ reg = BMSR;
+ break;
+
+ case MII_AN_ADVERT:
+ reg = ANAR;
+ break;
+
+ case MII_AN_LPABLE:
+ reg = ANLP;
+ break;
+
+ case MII_AN_EXPANSION:
+ reg = ANER;
+ break;
+
+ default:
+ return (0);
+ }
+
+ IN(dp, reg, &val, errp, usberr);
+
+ if (index == MII_STATUS) {
+ uint8_t msr;
+ /*
+ * Fix MII status register as it does't have LINKUP and
+ * MFPRMBLSUPR bits.
+ */
+ IN(dp, MSR, &msr, errp, usberr);
+
+ val |= (MII_STATUS_MFPRMBLSUPR | MII_STATUS_LINKUP);
+ if ((msr & MSR_LINK) == 0) {
+ val &= ~MII_STATUS_LINKUP;
+ }
+ }
+
+ return (val);
+
+usberr:
+ cmn_err(CE_CONT,
+ "!%s: %s: usberr(%d) detected", dp->name, __func__, *errp);
+
+ return (0);
+}
+
+static void
+urf_mii_write(struct usbgem_dev *dp, uint_t index, uint16_t val, int *errp)
+{
+ int reg;
+
+ DPRINTF(5, (CE_CONT, "!%s: %s called", dp->name, __func__));
+
+ *errp = USB_SUCCESS;
+
+ switch (index) {
+ case MII_CONTROL:
+ reg = BMCR;
+ break;
+
+ case MII_STATUS:
+ reg = BMSR;
+ break;
+
+ case MII_AN_ADVERT:
+ reg = ANAR;
+ break;
+
+ case MII_AN_LPABLE:
+ reg = ANLP;
+ break;
+
+ case MII_AN_EXPANSION:
+ reg = ANER;
+ break;
+
+ default:
+ return;
+ }
+
+ OUTW(dp, reg, val, errp, usberr);
+usberr:
+ ;
+}
+
+/* ======================================================== */
+/*
+ * OS depend (device driver DKI) routine
+ */
+/* ======================================================== */
+static void
+urf_eeprom_dump(struct usbgem_dev *dp, int size)
+{
+ int i;
+ int err;
+ uint16_t w0, w1, w2, w3;
+
+ cmn_err(CE_CONT, "!%s: eeprom dump:", dp->name);
+ for (i = URF_EEPROM_BASE; i < size + URF_EEPROM_BASE; i += 8) {
+ IN(dp, i + 0, &w0, &err, usberr);
+ IN(dp, i + 2, &w1, &err, usberr);
+ IN(dp, i + 4, &w2, &err, usberr);
+ IN(dp, i + 6, &w3, &err, usberr);
+ cmn_err(CE_CONT, "!0x%02x: 0x%04x 0x%04x 0x%04x 0x%04x",
+ i - URF_EEPROM_BASE, w0, w1, w2, w3);
+ }
+usberr:
+ ;
+}
+
+static int
+urf_attach_chip(struct usbgem_dev *dp)
+{
+ int i;
+ uint8_t old;
+ uint_t new;
+ uint8_t reg;
+ int err;
+ struct urf_dev *lp = dp->private;
+
+ /*
+ * setup flow control bit in eeprom
+ */
+ IN(dp, URF_EEPROM_BASE + 9, &old, &err, usberr);
+
+ DPRINTF(0, (CE_CONT, "!%s: eeprom offset 9: %02x", dp->name, old));
+
+ if (dp->ugc.usbgc_flow_control != FLOW_CONTROL_NONE) {
+ /* enable PAUSE bit */
+ new = old | 0x04;
+ } else {
+ /* clear PAUSE bit */
+ new = old & ~0x04;
+ }
+ if (new != old) {
+ /* make eeprom writable */
+ OUTB(dp, CR, lp->cr | CR_WEPROM, &err, usberr);
+
+ /* eerom allows only word access for writing */
+ IN(dp, URF_EEPROM_BASE + 8, &reg, &err, usberr);
+ new = (new << 8) | reg;
+
+ OUTW(dp, URF_EEPROM_BASE + 8, new, &err, usberr);
+
+ /* make eeprom non-writable */
+ OUTB(dp, CR, lp->cr, &err, usberr);
+ }
+
+ /*
+ * load EEPROM contents into nic
+ */
+ OUTB(dp, CR, lp->cr | CR_AUTOLOAD, &err, usberr);
+ CHECK_AND_JUMP(err, usberr);
+
+ for (i = 0; i < 100; i++) {
+ IN(dp, CR, &reg, &err, usberr);
+ if ((reg & CR_AUTOLOAD) == 0) {
+ goto autoload_done;
+ }
+ }
+ /* timeout */
+ cmn_err(CE_WARN, "%s: %s: failed to autoload: timeout",
+ dp->name, __func__);
+ goto usberr;
+
+autoload_done:
+ /*
+ * mac address in EEPROM has loaded to ID registers.
+ */
+ INS(dp, IDR, dp->dev_addr.ether_addr_octet, ETHERADDRL, &err, usberr);
+
+ /* no need to scan phy */
+ dp->mii_phy_addr = -1;
+
+#if DEBUG_LEVEL > 2
+ urf_eeprom_dump(dp, 0x80);
+#endif
+
+#ifdef CONFIG_VLAN
+ dp->misc_flag = USBGEM_VLAN;
+#endif
+ return (USB_SUCCESS);
+
+usberr:
+ cmn_err(CE_WARN, "%s: urf_attach_chip: usb error detected", dp->name);
+ return (USB_FAILURE);
+}
+
+static int
+urfattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int i;
+ ddi_iblock_cookie_t c;
+ int ret;
+ int unit;
+ struct chip_info *p;
+ const char *drv_name;
+ struct usbgem_dev *dp;
+ void *base;
+ struct usbgem_conf *ugcp;
+ struct urf_dev *lp;
+
+ unit = ddi_get_instance(dip);
+ drv_name = ddi_driver_name(dip);
+
+ DPRINTF(3, (CE_CONT, "!%s%d: %s: called, cmd:%d",
+ drv_name, __func__, unit, cmd));
+
+ if (cmd == DDI_ATTACH) {
+ /*
+ * Check if the chip is supported.
+ */
+
+ /*
+ * Check the chip if it is really realtek rtl8150
+ */
+
+ /*
+ * construct usbgem configration
+ */
+ ugcp = kmem_zalloc(sizeof (*ugcp), KM_SLEEP);
+
+ /* name */
+ sprintf(ugcp->usbgc_name,
+ "%s%d(ppa=%d)", drv_name, unit, urf_ppa);
+#ifdef USBGEM_CONFIG_GLDv3
+ ugcp->usbgc_ppa = urf_ppa;
+#else
+ ugcp->usbgc_ppa = unit;
+#endif
+ ugcp->usbgc_ifnum = 0;
+ ugcp->usbgc_alt = 0;
+
+ ugcp->usbgc_tx_list_max = 16;
+
+ /* the rx status partially replaces FCS */
+ ugcp->usbgc_rx_header_len = 0;
+ ugcp->usbgc_rx_list_max = 64;
+
+ /* time out parameters */
+ ugcp->usbgc_tx_timeout = USBGEM_TX_TIMEOUT;
+ ugcp->usbgc_tx_timeout_interval = ONESEC;
+
+ /* flow control */
+ ugcp->usbgc_flow_control = FLOW_CONTROL_RX_PAUSE;
+
+ /* MII timeout parameters */
+ ugcp->usbgc_mii_link_watch_interval = ONESEC;
+ ugcp->usbgc_mii_an_watch_interval = ONESEC/5;
+ ugcp->usbgc_mii_reset_timeout = MII_RESET_TIMEOUT; /* 1 sec */
+ ugcp->usbgc_mii_an_timeout = MII_AN_TIMEOUT; /* 5 sec */
+ ugcp->usbgc_mii_an_wait = (25*ONESEC)/10;
+ ugcp->usbgc_mii_linkdown_timeout = MII_LINKDOWN_TIMEOUT;
+
+ ugcp->usbgc_mii_an_delay = ONESEC/10;
+ ugcp->usbgc_mii_linkdown_action = MII_ACTION_RSA;
+ ugcp->usbgc_mii_linkdown_timeout_action = MII_ACTION_RESET;
+ ugcp->usbgc_mii_dont_reset = B_FALSE;
+
+ /* I/O methods */
+
+ /* mac operation */
+ ugcp->usbgc_attach_chip = &urf_attach_chip;
+ ugcp->usbgc_reset_chip = &urf_reset_chip;
+ ugcp->usbgc_init_chip = &urf_init_chip;
+ ugcp->usbgc_start_chip = &urf_start_chip;
+ ugcp->usbgc_stop_chip = &urf_stop_chip;
+ ugcp->usbgc_multicast_hash = &urf_mcast_hash;
+
+ ugcp->usbgc_set_rx_filter = &urf_set_rx_filter;
+ ugcp->usbgc_set_media = &urf_set_media;
+ ugcp->usbgc_get_stats = &urf_get_stats;
+#ifdef notdef
+ ugcp->usbgc_interrupt = &urf_interrupt;
+#else
+ ugcp->usbgc_interrupt = NULL;
+#endif
+ /* packet operation */
+ ugcp->usbgc_tx_make_packet = &urf_tx_make_packet;
+ ugcp->usbgc_rx_make_packet = &urf_rx_make_packet;
+
+ /* mii operations */
+ ugcp->usbgc_mii_probe = &usbgem_mii_probe_default;
+ ugcp->usbgc_mii_init = &usbgem_mii_init_default;
+ ugcp->usbgc_mii_config = &usbgem_mii_config_default;
+ ugcp->usbgc_mii_read = &urf_mii_read;
+ ugcp->usbgc_mii_write = &urf_mii_write;
+
+ /* mtu */
+ ugcp->usbgc_min_mtu = ETHERMTU;
+ ugcp->usbgc_max_mtu = ETHERMTU;
+ ugcp->usbgc_default_mtu = ETHERMTU;
+
+ lp = kmem_zalloc(sizeof (struct urf_dev), KM_SLEEP);
+ lp->chip = p;
+
+ ddi_set_driver_private(dip, NULL);
+
+ dp = usbgem_do_attach(dip, ugcp, lp, sizeof (struct urf_dev));
+
+ kmem_free(ugcp, sizeof (*ugcp));
+
+ if (dp != NULL) {
+ urf_ppa++;
+ return (DDI_SUCCESS);
+ }
+
+err_free_mem:
+ kmem_free(lp, sizeof (struct urf_dev));
+err_close_pipe:
+err:
+ return (DDI_FAILURE);
+ }
+ if (cmd == DDI_RESUME) {
+ return (usbgem_resume(dip));
+ }
+ return (DDI_FAILURE);
+}
+
+static int
+urfdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ int ret;
+
+ if (cmd == DDI_DETACH) {
+ ret = usbgem_do_detach(dip);
+ if (ret != DDI_SUCCESS) {
+ return (DDI_FAILURE);
+ }
+ urf_ppa--;
+ return (DDI_SUCCESS);
+ }
+ if (cmd == DDI_SUSPEND) {
+ return (usbgem_suspend(dip));
+ }
+ return (DDI_FAILURE);
+}
+
+/* ======================================================== */
+/*
+ * OS depend (loadable streams driver) routine
+ */
+/* ======================================================== */
+#ifdef USBGEM_CONFIG_GLDv3
+USBGEM_STREAM_OPS(urf_ops, urfattach, urfdetach);
+#else
+static struct module_info urfminfo = {
+ 0, /* mi_idnum */
+ "urf", /* mi_idname */
+ 0, /* mi_minpsz */
+ ETHERMTU, /* mi_maxpsz */
+ ETHERMTU*128, /* mi_hiwat */
+ 1, /* mi_lowat */
+};
+
+static struct qinit urfrinit = {
+ (int (*)()) NULL, /* qi_putp */
+ usbgem_rsrv, /* qi_srvp */
+ usbgem_open, /* qi_qopen */
+ usbgem_close, /* qi_qclose */
+ (int (*)()) NULL, /* qi_qadmin */
+ &urfminfo, /* qi_minfo */
+ NULL /* qi_mstat */
+};
+
+static struct qinit urfwinit = {
+ usbgem_wput, /* qi_putp */
+ usbgem_wsrv, /* qi_srvp */
+ (int (*)()) NULL, /* qi_qopen */
+ (int (*)()) NULL, /* qi_qclose */
+ (int (*)()) NULL, /* qi_qadmin */
+ &urfminfo, /* qi_minfo */
+ NULL /* qi_mstat */
+};
+
+static struct streamtab urf_info = {
+ &urfrinit, /* st_rdinit */
+ &urfwinit, /* st_wrinit */
+ NULL, /* st_muxrinit */
+ NULL /* st_muxwrinit */
+};
+
+static struct cb_ops cb_urf_ops = {
+ nulldev, /* cb_open */
+ nulldev, /* cb_close */
+ nodev, /* cb_strategy */
+ nodev, /* cb_print */
+ nodev, /* cb_dump */
+ nodev, /* cb_read */
+ nodev, /* cb_write */
+ nodev, /* cb_ioctl */
+ nodev, /* cb_devmap */
+ nodev, /* cb_mmap */
+ nodev, /* cb_segmap */
+ nochpoll, /* cb_chpoll */
+ ddi_prop_op, /* cb_prop_op */
+ &urf_info, /* cb_stream */
+ D_NEW|D_MP /* cb_flag */
+};
+
+static struct dev_ops urf_ops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* devo_refcnt */
+ usbgem_getinfo, /* devo_getinfo */
+ nulldev, /* devo_identify */
+ nulldev, /* devo_probe */
+ urfattach, /* devo_attach */
+ urfdetach, /* devo_detach */
+ nodev, /* devo_reset */
+ &cb_urf_ops, /* devo_cb_ops */
+ NULL, /* devo_bus_ops */
+ usbgem_power, /* devo_power */
+#if DEVO_REV >= 4
+ usbgem_quiesce, /* devo_quiesce */
+#endif
+
+};
+#endif
+
+static struct modldrv modldrv = {
+ &mod_driverops, /* Type of module. This one is a driver */
+ ident,
+ &urf_ops, /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, &modldrv, NULL
+};
+
+/* ======================================================== */
+/*
+ * _init : done
+ */
+/* ======================================================== */
+int
+_init(void)
+{
+ int status;
+
+ DPRINTF(2, (CE_CONT, "!urf: _init: called"));
+
+ status = usbgem_mod_init(&urf_ops, "urf");
+ if (status != DDI_SUCCESS) {
+ return (status);
+ }
+ status = mod_install(&modlinkage);
+ if (status != DDI_SUCCESS) {
+ usbgem_mod_fini(&urf_ops);
+ }
+ return (status);
+}
+
+/*
+ * _fini : done
+ */
+int
+_fini(void)
+{
+ int status;
+
+ DPRINTF(2, (CE_CONT, "!urf: _fini: called"));
+ status = mod_remove(&modlinkage);
+ if (status == DDI_SUCCESS) {
+ usbgem_mod_fini(&urf_ops);
+ }
+ return (status);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
diff --git a/usr/src/uts/common/io/usbgem/usbgem.c b/usr/src/uts/common/io/usbgem/usbgem.c
new file mode 100644
index 0000000000..a42f7119ef
--- /dev/null
+++ b/usr/src/uts/common/io/usbgem/usbgem.c
@@ -0,0 +1,6389 @@
+/*
+ * usbgem.c: General USB to Fast Ethernet mac driver framework
+ *
+ * Copyright (c) 2002-2012 Masayuki Murayama. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the author nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+#pragma ident "@(#)usbgem.c 1.6 12/02/09"
+
+/*
+ * Change log
+ */
+
+/*
+ * TODO:
+ * implement DELAYED_START
+ */
+
+/*
+ * System Header files.
+ */
+#include <sys/types.h>
+#include <sys/conf.h>
+#include <sys/debug.h>
+#include <sys/kmem.h>
+#include <sys/vtrace.h>
+#include <sys/ethernet.h>
+#include <sys/modctl.h>
+#include <sys/errno.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#ifndef USBGEM_CONFIG_GLDv3
+#include <sys/dlpi.h>
+#include <sys/strsubr.h>
+#endif
+#include <sys/stream.h> /* required for MBLK* */
+#include <sys/strsun.h> /* required for mionack() */
+#include <sys/byteorder.h>
+
+#include <sys/usb/usba.h>
+#ifdef USBGEM_CONFIG_GLDv3
+#include <inet/common.h>
+#include <inet/led.h>
+#include <inet/mi.h>
+#include <inet/nd.h>
+#endif
+
+/* supplement definitions */
+extern const char *usb_str_cr(usb_cr_t);
+
+#ifndef USBGEM_CONFIG_GLDv3
+#pragma weak gld_linkstate
+#endif
+#include <sys/note.h>
+
+#include "usbgem_mii.h"
+#include "usbgem.h"
+
+#ifdef MODULE
+char ident[] = "usb general ethernet mac driver v" VERSION;
+#else
+extern char ident[];
+#endif
+
+/* Debugging support */
+#ifdef USBGEM_DEBUG_LEVEL
+static int usbgem_debug = USBGEM_DEBUG_LEVEL;
+#define DPRINTF(n, args) if (usbgem_debug > (n)) cmn_err args
+#else
+#define DPRINTF(n, args)
+#endif
+
+/*
+ * Useful macros and typedefs
+ */
+#define ROUNDUP(x, a) (((x) + (a) - 1) & ~((a) - 1))
+#define DEFAULT_PIPE(dp) ((dp)->reg_data->dev_default_ph)
+#define VTAG_SIZE 4
+#define BOOLEAN(x) ((x) != 0)
+/*
+ * configuration parameters
+ */
+#define USBDRV_MAJOR_VER 2
+#define USBDRV_MINOR_VER 0
+
+#define ETHERHEADERL (sizeof (struct ether_header))
+#define MAXPKTLEN(dp) ((dp)->mtu + ETHERHEADERL)
+#define MAXPKTBUF(dp) ((dp)->mtu + ETHERHEADERL + ETHERFCSL)
+
+#define WATCH_INTERVAL_FAST drv_usectohz(100*1000)
+
+#define STOP_GRACEFUL B_TRUE
+
+/*
+ * Private functions
+ */
+static int usbgem_open_pipes(struct usbgem_dev *dp);
+static int usbgem_close_pipes(struct usbgem_dev *dp);
+static void usbgem_intr_cb(usb_pipe_handle_t, usb_intr_req_t *);
+static void usbgem_bulkin_cb(usb_pipe_handle_t, usb_bulk_req_t *);
+static void usbgem_bulkout_cb(usb_pipe_handle_t, usb_bulk_req_t *);
+
+static int usbgem_mii_start(struct usbgem_dev *);
+static void usbgem_mii_stop(struct usbgem_dev *);
+
+/* local buffer management */
+static int usbgem_init_rx_buf(struct usbgem_dev *);
+
+/* internal mac interfaces */
+static void usbgem_tx_timeout(struct usbgem_dev *);
+static void usbgem_mii_link_watcher(struct usbgem_dev *);
+static int usbgem_mac_init(struct usbgem_dev *);
+static int usbgem_mac_start(struct usbgem_dev *);
+static int usbgem_mac_stop(struct usbgem_dev *, int, boolean_t);
+static void usbgem_mac_ioctl(struct usbgem_dev *, queue_t *, mblk_t *);
+
+int usbgem_speed_value[] = {10, 100, 1000};
+
+static int usbgem_ctrl_retry = 5;
+
+/* usb event support */
+static int usbgem_disconnect_cb(dev_info_t *dip);
+static int usbgem_reconnect_cb(dev_info_t *dip);
+int usbgem_suspend(dev_info_t *dip);
+int usbgem_resume(dev_info_t *dip);
+
+static uint8_t usbgem_bcastaddr[] = {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+
+#ifdef MODULE
+extern struct mod_ops mod_miscops;
+
+static struct modlmisc modlmisc = {
+ &mod_miscops,
+ "usbgem v" VERSION,
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, &modlmisc, NULL
+};
+
+/*
+ * _init : done
+ */
+int
+_init(void)
+{
+ int status;
+
+ DPRINTF(2, (CE_CONT, "!usbgem: _init: called"));
+ status = mod_install(&modlinkage);
+
+ return (status);
+}
+
+/*
+ * _fini : done
+ */
+int
+_fini(void)
+{
+ int status;
+
+ DPRINTF(2, (CE_CONT, "!usbgem: _fini: called"));
+ status = mod_remove(&modlinkage);
+ return (status);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+#endif /* MODULE */
+
+/* ============================================================== */
+/*
+ * Ether CRC calculation utilities
+ */
+/* ============================================================== */
+/*
+ * Ether CRC calculation according to 21143 data sheet
+ */
+#define CRC32_POLY_LE 0xedb88320
+uint32_t
+usbgem_ether_crc_le(const uint8_t *addr)
+{
+ int idx;
+ int bit;
+ uint_t data;
+ uint32_t crc = 0xffffffff;
+
+ crc = 0xffffffff;
+ for (idx = 0; idx < ETHERADDRL; idx++) {
+ for (data = *addr++, bit = 0; bit < 8; bit++, data >>= 1) {
+ crc = (crc >> 1) ^
+ (((crc ^ data) & 1) ? CRC32_POLY_LE : 0);
+ }
+ }
+ return (crc);
+}
+
+#define CRC32_POLY_BE 0x04c11db7
+uint32_t
+usbgem_ether_crc_be(const uint8_t *addr)
+{
+ int idx;
+ int bit;
+ uint_t data;
+ uint32_t crc;
+
+ crc = 0xffffffff;
+ for (idx = 0; idx < ETHERADDRL; idx++) {
+ for (data = *addr++, bit = 0; bit < 8; bit++, data >>= 1) {
+ crc = (crc << 1) ^
+ ((((crc >> 31) ^ data) & 1) ? CRC32_POLY_BE : 0);
+ }
+ }
+ return (crc);
+}
+
+int
+usbgem_prop_get_int(struct usbgem_dev *dp, char *prop_template, int def_val)
+{
+ char propname[32];
+
+ (void) sprintf(propname, prop_template, dp->name);
+
+ return (ddi_prop_get_int(DDI_DEV_T_ANY, dp->dip,
+ DDI_PROP_DONTPASS, propname, def_val));
+}
+
+static int
+usbgem_population(uint32_t x)
+{
+ int i;
+ int cnt;
+
+ cnt = 0;
+ for (i = 0; i < 32; i++) {
+ if (x & (1 << i)) {
+ cnt++;
+ }
+ }
+ return (cnt);
+}
+
+static clock_t
+usbgem_timestamp_nz()
+{
+ clock_t now;
+ now = ddi_get_lbolt();
+ return (now ? now : (clock_t)1);
+}
+
+#ifdef USBGEM_DEBUG_LEVEL
+#ifdef USBGEM_DEBUG_VLAN
+#ifdef notdef
+#include <netinet/in.h>
+#endif
+static void
+usbgem_dump_packet(struct usbgem_dev *dp, char *title, mblk_t *mp,
+ boolean_t check_cksum)
+{
+ char msg[180];
+ uint8_t buf[18+20+20];
+ uint8_t *p;
+ size_t offset;
+ uint_t ethertype;
+ uint_t proto;
+ uint_t ipproto = 0;
+ uint_t iplen;
+ uint_t iphlen;
+ uint_t tcplen;
+ uint_t udplen;
+ uint_t cksum;
+ int rest;
+ int len;
+ char *bp;
+ mblk_t *tp;
+ extern uint_t ip_cksum(mblk_t *, int, uint32_t);
+
+ msg[0] = 0;
+ bp = msg;
+
+ rest = sizeof (buf);
+ offset = 0;
+ for (tp = mp; tp; tp = tp->b_cont) {
+ len = tp->b_wptr - tp->b_rptr;
+ len = min(rest, len);
+ bcopy(tp->b_rptr, &buf[offset], len);
+ rest -= len;
+ offset += len;
+ if (rest == 0) {
+ break;
+ }
+ }
+
+ offset = 0;
+ p = &buf[offset];
+
+ /* ethernet address */
+ sprintf(bp,
+ "ether: %02x:%02x:%02x:%02x:%02x:%02x"
+ " -> %02x:%02x:%02x:%02x:%02x:%02x",
+ p[6], p[7], p[8], p[9], p[10], p[11],
+ p[0], p[1], p[2], p[3], p[4], p[5]);
+ bp = &msg[strlen(msg)];
+
+ /* vlag tag and etherrtype */
+ ethertype = GET_ETHERTYPE(p);
+ if (ethertype == VTAG_TPID) {
+ sprintf(bp, " vtag:0x%04x", GET_NET16(&p[14]));
+ bp = &msg[strlen(msg)];
+
+ offset += VTAG_SIZE;
+ p = &buf[offset];
+ ethertype = GET_ETHERTYPE(p);
+ }
+ sprintf(bp, " type:%04x", ethertype);
+ bp = &msg[strlen(msg)];
+
+ /* ethernet packet length */
+ sprintf(bp, " mblklen:%d", msgdsize(mp));
+ bp = &msg[strlen(msg)];
+ if (mp->b_cont) {
+ sprintf(bp, "(");
+ bp = &msg[strlen(msg)];
+ for (tp = mp; tp; tp = tp->b_cont) {
+ if (tp == mp) {
+ sprintf(bp, "%d", tp->b_wptr - tp->b_rptr);
+ } else {
+ sprintf(bp, "+%d", tp->b_wptr - tp->b_rptr);
+ }
+ bp = &msg[strlen(msg)];
+ }
+ sprintf(bp, ")");
+ bp = &msg[strlen(msg)];
+ }
+
+ if (ethertype != ETHERTYPE_IP) {
+ goto x;
+ }
+
+ /* ip address */
+ offset += sizeof (struct ether_header);
+ p = &buf[offset];
+ ipproto = p[9];
+ iplen = GET_NET16(&p[2]);
+ sprintf(bp, ", ip: %d.%d.%d.%d -> %d.%d.%d.%d proto:%d iplen:%d",
+ p[12], p[13], p[14], p[15],
+ p[16], p[17], p[18], p[19],
+ ipproto, iplen);
+ bp = (void *)&msg[strlen(msg)];
+
+ iphlen = (p[0] & 0xf) * 4;
+
+ /* cksum for psuedo header */
+ cksum = *(uint16_t *)&p[12];
+ cksum += *(uint16_t *)&p[14];
+ cksum += *(uint16_t *)&p[16];
+ cksum += *(uint16_t *)&p[18];
+ cksum += BE_16(ipproto);
+
+ /* tcp or udp protocol header */
+ offset += iphlen;
+ p = &buf[offset];
+ if (ipproto == IPPROTO_TCP) {
+ tcplen = iplen - iphlen;
+ sprintf(bp, ", tcp: len:%d cksum:%x",
+ tcplen, GET_NET16(&p[16]));
+ bp = (void *)&msg[strlen(msg)];
+
+ if (check_cksum) {
+ cksum += BE_16(tcplen);
+ cksum = (uint16_t)ip_cksum(mp, offset, cksum);
+ sprintf(bp, " (%s)",
+ (cksum == 0 || cksum == 0xffff) ? "ok" : "ng");
+ bp = (void *)&msg[strlen(msg)];
+ }
+ } else if (ipproto == IPPROTO_UDP) {
+ udplen = GET_NET16(&p[4]);
+ sprintf(bp, ", udp: len:%d cksum:%x",
+ udplen, GET_NET16(&p[6]));
+ bp = (void *)&msg[strlen(msg)];
+
+ if (GET_NET16(&p[6]) && check_cksum) {
+ cksum += *(uint16_t *)&p[4];
+ cksum = (uint16_t)ip_cksum(mp, offset, cksum);
+ sprintf(bp, " (%s)",
+ (cksum == 0 || cksum == 0xffff) ? "ok" : "ng");
+ bp = (void *)&msg[strlen(msg)];
+ }
+ }
+x:
+ cmn_err(CE_CONT, "!%s: %s: %s", dp->name, title, msg);
+}
+#endif /* USBGEM_DEBUG_VLAN */
+#endif /* USBGEM_DEBUG_LEVEL */
+
+#ifdef GEM_GCC_RUNTIME
+/*
+ * gcc3 runtime routines
+ */
+#pragma weak memcmp
+int
+memcmp(const void *s1, const void *s2, size_t n)
+{
+ int i;
+ int ret;
+
+ ret = 0;
+ for (i = 0; i < n; i++) {
+ ret = (int)((uint8_t *)s1)[i] - (int)((uint8_t *)s2)[i];
+ if (ret) {
+ return (ret);
+ }
+ }
+ return (0);
+}
+
+#pragma weak memset
+void *
+memset(void *s, int c, size_t n)
+{
+ if ((c & 0xff) == 0) {
+ bzero(s, n);
+ } else {
+ while (n--) {
+ ((uint8_t *)s)[n] = c;
+ }
+ }
+ return (s);
+}
+
+#pragma weak _memcpy = memcpy
+#pragma weak memcpy
+void *
+memcpy(void *s1, const void *s2, size_t n)
+{
+ bcopy(s2, s1, n);
+ return (s1);
+}
+#endif /* GEM_GCC_RUNTIME */
+/* ============================================================== */
+/*
+ * hardware operations
+ */
+/* ============================================================== */
+static int
+usbgem_hal_reset_chip(struct usbgem_dev *dp)
+{
+ int err;
+
+ sema_p(&dp->hal_op_lock);
+ err = (*dp->ugc.usbgc_reset_chip)(dp);
+ sema_v(&dp->hal_op_lock);
+ return (err);
+}
+
+static int
+usbgem_hal_init_chip(struct usbgem_dev *dp)
+{
+ int err;
+
+ sema_p(&dp->hal_op_lock);
+ err = (*dp->ugc.usbgc_init_chip)(dp);
+ sema_v(&dp->hal_op_lock);
+ return (err);
+}
+
+static int
+usbgem_hal_attach_chip(struct usbgem_dev *dp)
+{
+ int err;
+
+ sema_p(&dp->hal_op_lock);
+ err = (*dp->ugc.usbgc_attach_chip)(dp);
+ sema_v(&dp->hal_op_lock);
+ return (err);
+}
+
+static int
+usbgem_hal_set_rx_filter(struct usbgem_dev *dp)
+{
+ int err;
+
+ sema_p(&dp->hal_op_lock);
+ err = (*dp->ugc.usbgc_set_rx_filter)(dp);
+ sema_v(&dp->hal_op_lock);
+ return (err);
+}
+
+static int
+usbgem_hal_set_media(struct usbgem_dev *dp)
+{
+ int err;
+
+ sema_p(&dp->hal_op_lock);
+ err = (*dp->ugc.usbgc_set_media)(dp);
+ sema_v(&dp->hal_op_lock);
+ return (err);
+}
+
+static int
+usbgem_hal_start_chip(struct usbgem_dev *dp)
+{
+ int err;
+
+ sema_p(&dp->hal_op_lock);
+ err = (*dp->ugc.usbgc_start_chip)(dp);
+ sema_v(&dp->hal_op_lock);
+ return (err);
+}
+
+static int
+usbgem_hal_stop_chip(struct usbgem_dev *dp)
+{
+ int err;
+
+ sema_p(&dp->hal_op_lock);
+ err = (*dp->ugc.usbgc_stop_chip)(dp);
+ sema_v(&dp->hal_op_lock);
+ return (err);
+}
+
+static int
+usbgem_hal_get_stats(struct usbgem_dev *dp)
+{
+ int err;
+
+ sema_p(&dp->hal_op_lock);
+ err = (*dp->ugc.usbgc_get_stats)(dp);
+ sema_v(&dp->hal_op_lock);
+ return (err);
+}
+
+
+/* ============================================================== */
+/*
+ * USB pipe management
+ */
+/* ============================================================== */
+static boolean_t
+usbgem_rx_start_unit(struct usbgem_dev *dp, usb_bulk_req_t *req)
+{
+ mblk_t *mp;
+ int err;
+ usb_flags_t flags;
+
+ ASSERT(req);
+
+ mp = allocb(dp->rx_buf_len, BPRI_MED);
+ if (mp == NULL) {
+ cmn_err(CE_WARN, "!%s: %s: failed to allocate mblk",
+ dp->name, __func__);
+ goto err;
+ }
+
+ req->bulk_len = dp->rx_buf_len;
+ req->bulk_data = mp;
+ req->bulk_client_private = (usb_opaque_t)dp;
+ req->bulk_timeout = 0;
+ req->bulk_attributes = USB_ATTRS_SHORT_XFER_OK;
+ req->bulk_cb = usbgem_bulkin_cb;
+ req->bulk_exc_cb = usbgem_bulkin_cb;
+ req->bulk_completion_reason = 0;
+ req->bulk_cb_flags = 0;
+
+ flags = 0;
+ err = usb_pipe_bulk_xfer(dp->bulkin_pipe, req, flags);
+
+ if (err != USB_SUCCESS) {
+ cmn_err(CE_WARN, "%s: failed to bulk_xfer for rx, err:%d",
+ dp->name, err);
+
+ /* free req and mp */
+ usb_free_bulk_req(req);
+ goto err;
+ }
+ return (B_TRUE);
+err:
+ return (B_FALSE);
+}
+
+/* ============================================================== */
+/*
+ * Rx/Tx buffer management
+ */
+/* ============================================================== */
+static int
+usbgem_init_rx_buf(struct usbgem_dev *dp)
+{
+ int i;
+ usb_bulk_req_t *req;
+
+ ASSERT(dp->mac_state == MAC_STATE_ONLINE);
+
+ for (i = 0; i < dp->ugc.usbgc_rx_list_max; i++) {
+ req = usb_alloc_bulk_req(dp->dip, 0, USB_FLAGS_SLEEP);
+ if (req == NULL) {
+ cmn_err(CE_WARN,
+ "!%s: %s: failed to allocate bulkreq for rx",
+ dp->name, __func__);
+ return (USB_FAILURE);
+ }
+ if (!usbgem_rx_start_unit(dp, req)) {
+ return (USB_FAILURE);
+ }
+ mutex_enter(&dp->rxlock);
+ dp->rx_busy_cnt++;
+ mutex_exit(&dp->rxlock);
+ }
+ return (USB_SUCCESS);
+}
+
+/* ============================================================== */
+/*
+ * memory resource management
+ */
+/* ============================================================== */
+static int
+usbgem_free_memory(struct usbgem_dev *dp)
+{
+ usb_bulk_req_t *req;
+
+ /* free all tx requst structure */
+ while ((req = dp->tx_free_list) != NULL) {
+ dp->tx_free_list =
+ (usb_bulk_req_t *)req->bulk_client_private;
+ req->bulk_data = NULL;
+ usb_free_bulk_req(req);
+ }
+ return (USB_SUCCESS);
+}
+
+static int
+usbgem_alloc_memory(struct usbgem_dev *dp)
+{
+ int i;
+ usb_bulk_req_t *req;
+
+ /* allocate tx requests */
+ dp->tx_free_list = NULL;
+ for (i = 0; i < dp->ugc.usbgc_tx_list_max; i++) {
+ req = usb_alloc_bulk_req(dp->dip, 0, USB_FLAGS_SLEEP);
+ if (req == NULL) {
+ cmn_err(CE_WARN,
+ "%s:%s failed to allocate tx requests",
+ dp->name, __func__);
+
+ /* free partially allocated tx requests */
+ (void) usbgem_free_memory(dp);
+ return (USB_FAILURE);
+ }
+
+ /* add the new one allocated into tx free list */
+ req->bulk_client_private = (usb_opaque_t)dp->tx_free_list;
+ dp->tx_free_list = req;
+ }
+
+ return (USB_SUCCESS);
+}
+
+/* ========================================================== */
+/*
+ * Start transmission.
+ * Return zero on success,
+ */
+/* ========================================================== */
+
+#ifdef TXTIMEOUT_TEST
+static int usbgem_send_cnt = 0;
+#endif
+
+/*
+ * usbgem_send is used only to send data packet into ethernet line.
+ */
+static mblk_t *
+usbgem_send_common(struct usbgem_dev *dp, mblk_t *mp, uint32_t flags)
+{
+ int err;
+ mblk_t *new;
+ usb_bulk_req_t *req;
+ int mcast;
+ int bcast;
+ int len;
+ boolean_t intr;
+ usb_flags_t usb_flags = 0;
+#ifdef USBGEM_DEBUG_LEVEL
+ usb_pipe_state_t p_state;
+#endif
+ DPRINTF(2, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ intr = (flags & 1) != 0;
+ len = msgdsize(mp);
+ bcast = 0;
+ mcast = 0;
+ if (mp->b_rptr[0] & 1) {
+ if (bcmp(mp->b_rptr, &usbgem_bcastaddr, ETHERADDRL) == 0) {
+ bcast = 1;
+ } else {
+ mcast = 1;
+ }
+ }
+ new = (*dp->ugc.usbgc_tx_make_packet)(dp, mp);
+ if (new == NULL) {
+ /*
+ * no memory resource. we don't stop downstream,
+ * we just discard the packet.
+ */
+ DPRINTF(0, (CE_CONT, "!%s: %s: no memory",
+ dp->name, __func__));
+ freemsg(mp);
+
+ mutex_enter(&dp->txlock);
+ dp->stats.noxmtbuf++;
+ dp->stats.errxmt++;
+ mutex_exit(&dp->txlock);
+
+ return (NULL);
+ }
+
+ ASSERT(new->b_cont == NULL);
+
+ mutex_enter(&dp->txlock);
+ if (dp->tx_free_list == NULL) {
+ /*
+ * no tx free slot
+ */
+ ASSERT(dp->tx_busy_cnt == dp->ugc.usbgc_tx_list_max);
+ mutex_exit(&dp->txlock);
+
+ DPRINTF(4, (CE_CONT, "!%s: %s: no free slot",
+ dp->name, __func__));
+ if (new && new != mp) {
+ /* free reallocated message */
+ freemsg(new);
+ }
+ return (mp);
+ }
+ req = dp->tx_free_list;
+ dp->tx_free_list = (usb_bulk_req_t *)req->bulk_client_private;
+ dp->tx_busy_cnt++;
+
+ if (dp->tx_free_list == NULL) {
+ intr = B_TRUE;
+ }
+ if (intr) {
+ dp->tx_intr_pended++;
+ }
+ DB_TCI(new) = intr;
+#ifdef USBGEM_DEBUG_LEVEL
+ new->b_datap->db_cksum32 = dp->tx_seq_num;
+ dp->tx_seq_num++;
+#endif
+ dp->stats.obytes += len;
+ dp->stats.opackets++;
+ if (bcast | mcast) {
+ dp->stats.obcast += bcast;
+ dp->stats.omcast += mcast;
+ }
+ mutex_exit(&dp->txlock);
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: sending", dp->name, __func__));
+
+ req->bulk_len = (long)new->b_wptr - (long)new->b_rptr;
+ req->bulk_data = new;
+ req->bulk_client_private = (usb_opaque_t)dp;
+ req->bulk_timeout = dp->bulkout_timeout; /* in second */
+ req->bulk_attributes = 0;
+ req->bulk_cb = usbgem_bulkout_cb;
+ req->bulk_exc_cb = usbgem_bulkout_cb;
+ req->bulk_completion_reason = 0;
+ req->bulk_cb_flags = 0;
+
+ if (intr) {
+ usb_flags = USB_FLAGS_SLEEP;
+ }
+ if ((err = usb_pipe_bulk_xfer(dp->bulkout_pipe, req, usb_flags))
+ != USB_SUCCESS) {
+
+ /* failed to transfer the packet, discard it. */
+ freemsg(new);
+ req->bulk_data = NULL;
+
+ /* recycle the request block */
+ mutex_enter(&dp->txlock);
+ dp->tx_busy_cnt--;
+ req->bulk_client_private = (usb_opaque_t)dp->tx_free_list;
+ dp->tx_free_list = req;
+ mutex_exit(&dp->txlock);
+
+ cmn_err(CE_NOTE,
+ "%s: %s: usb_pipe_bulk_xfer: failed: err:%d",
+ dp->name, __func__, err);
+
+ /* we use another flag to indicate error state. */
+ if (dp->fatal_error == (clock_t)0) {
+ dp->fatal_error = usbgem_timestamp_nz();
+ }
+ } else {
+ /* record the start time */
+ dp->tx_start_time = ddi_get_lbolt();
+ }
+
+ if (err == USB_SUCCESS && (usb_flags & USB_FLAGS_SLEEP)) {
+ usbgem_bulkout_cb(dp->bulkout_pipe, req);
+ }
+
+ if (new != mp) {
+ freemsg(mp);
+ }
+ return (NULL);
+}
+
+int
+usbgem_restart_nic(struct usbgem_dev *dp)
+{
+ int ret;
+ int flags = 0;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ ASSERT(dp->mac_state != MAC_STATE_DISCONNECTED);
+
+ /*
+ * ensure to stop the nic
+ */
+ if (dp->mac_state == MAC_STATE_ONLINE) {
+ (void) usbgem_mac_stop(dp, MAC_STATE_STOPPED, STOP_GRACEFUL);
+ }
+
+ /* now the nic become quiescent, reset the chip */
+ if (usbgem_hal_reset_chip(dp) != USB_SUCCESS) {
+ cmn_err(CE_WARN, "%s: %s: failed to reset chip",
+ dp->name, __func__);
+ goto err;
+ }
+
+ /*
+ * restore the nic state step by step
+ */
+ if (dp->nic_state < NIC_STATE_INITIALIZED) {
+ goto done;
+ }
+
+ if (usbgem_mac_init(dp) != USB_SUCCESS) {
+ cmn_err(CE_WARN, "%s: %s: failed to initialize chip",
+ dp->name, __func__);
+ goto err;
+ }
+
+ /* setup mac address and enable rx filter */
+ sema_p(&dp->rxfilter_lock);
+ dp->rxmode |= RXMODE_ENABLE;
+ ret = usbgem_hal_set_rx_filter(dp);
+ sema_v(&dp->rxfilter_lock);
+ if (ret != USB_SUCCESS) {
+ goto err;
+ }
+
+ /*
+ * update the link state asynchronously
+ */
+ cv_signal(&dp->link_watcher_wait_cv);
+
+ /*
+ * XXX - a panic happened because of linkdown.
+ * We must check mii_state here, because the link can be down just
+ * before the restart event happen. If the link is down now,
+ * gem_mac_start() will be called from gem_mii_link_check() when
+ * the link become up later.
+ */
+ if (dp->mii_state == MII_STATE_LINKUP) {
+ if (usbgem_hal_set_media(dp) != USB_SUCCESS) {
+ goto err;
+ }
+ if (dp->nic_state < NIC_STATE_ONLINE) {
+ goto done;
+ }
+
+ (void) usbgem_mac_start(dp);
+
+ }
+done:
+ return (USB_SUCCESS);
+err:
+#ifdef GEM_CONFIG_FMA
+ ddi_fm_service_impact(dp->dip, DDI_SERVICE_DEGRADED);
+#endif
+ return (USB_FAILURE);
+}
+
+static void
+usbgem_tx_timeout(struct usbgem_dev *dp)
+{
+ int ret;
+ uint_t rwlock;
+ clock_t now;
+
+ for (; ; ) {
+ mutex_enter(&dp->tx_watcher_lock);
+ ret = cv_timedwait(&dp->tx_watcher_cv, &dp->tx_watcher_lock,
+ dp->tx_watcher_interval + ddi_get_lbolt());
+ mutex_exit(&dp->tx_watcher_lock);
+
+ if (dp->tx_watcher_stop) {
+ break;
+ }
+
+ now = ddi_get_lbolt();
+
+ rwlock = RW_READER;
+again:
+ rw_enter(&dp->dev_state_lock, rwlock);
+
+ if ((dp->mac_state != MAC_STATE_DISCONNECTED &&
+ dp->fatal_error &&
+ now - dp->fatal_error >= dp->ugc.usbgc_tx_timeout) ||
+ (dp->mac_state == MAC_STATE_ONLINE &&
+ dp->mii_state == MII_STATE_LINKUP &&
+ dp->tx_busy_cnt != 0 &&
+ now - dp->tx_start_time >= dp->ugc.usbgc_tx_timeout)) {
+ if (rwlock == RW_READER) {
+ /*
+ * Upgrade dev_state_lock from shared mode
+ * to exclusive mode to restart nic
+ */
+ rwlock = RW_WRITER;
+ rw_exit(&dp->dev_state_lock);
+ goto again;
+ }
+ cmn_err(CE_WARN, "%s: %s: restarting the nic:"
+ " fatal_error:%ld nic_state:%d"
+ " mac_state:%d starttime:%ld",
+ dp->name, __func__,
+ dp->fatal_error ? now - dp->fatal_error: 0,
+ dp->nic_state, dp->mac_state,
+ dp->tx_busy_cnt ? now - dp->tx_start_time : 0);
+
+ (void) usbgem_restart_nic(dp);
+ }
+
+ rw_exit(&dp->dev_state_lock);
+ }
+}
+
+static int
+usbgem_tx_watcher_start(struct usbgem_dev *dp)
+{
+ int err;
+ kthread_t *wdth;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /* make a first call of uwgem_lw_link_check() */
+ dp->tx_watcher_stop = 0;
+ dp->tx_watcher_interval = drv_usectohz(1000*1000);
+
+ wdth = thread_create(NULL, 0, usbgem_tx_timeout, dp, 0, &p0,
+ TS_RUN, minclsyspri);
+ if (wdth == NULL) {
+ cmn_err(CE_WARN,
+ "!%s: %s: failed to create a tx_watcher thread",
+ dp->name, __func__);
+ return (USB_FAILURE);
+ }
+ dp->tx_watcher_did = wdth->t_did;
+
+ return (USB_SUCCESS);
+}
+
+static void
+usbgem_tx_watcher_stop(struct usbgem_dev *dp)
+{
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+ if (dp->tx_watcher_did) {
+ /* Ensure timer routine stopped */
+ dp->tx_watcher_stop = 1;
+ cv_signal(&dp->tx_watcher_cv);
+ thread_join(dp->tx_watcher_did);
+ dp->tx_watcher_did = NULL;
+ }
+}
+
+/* ================================================================== */
+/*
+ * Callback handlers
+ */
+/* ================================================================== */
+static void
+usbgem_bulkin_cb(usb_pipe_handle_t pipe, usb_bulk_req_t *req)
+{
+ mblk_t *newmp;
+ mblk_t *mp;
+ mblk_t *tp;
+ int len = 0;
+ int pkts = 0;
+ int bcast = 0;
+ int mcast = 0;
+ boolean_t busy;
+ struct usbgem_dev *dp;
+
+ dp = (struct usbgem_dev *)req->bulk_client_private;
+ mp = req->bulk_data;
+ req->bulk_data = NULL;
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: mp:%p, cr:%s(%d)",
+ dp->name, __func__, mp,
+ usb_str_cr(req->bulk_completion_reason),
+ req->bulk_completion_reason));
+
+ /*
+ * we cannot acquire dev_state_lock because the routine
+ * must be executed during usbgem_mac_stop() to avoid
+ * dead lock.
+ * we use a simle membar operation to get the state correctly.
+ */
+ membar_consumer();
+
+ if (req->bulk_completion_reason == USB_CR_OK &&
+ dp->nic_state == NIC_STATE_ONLINE) {
+ newmp = (*dp->ugc.usbgc_rx_make_packet)(dp, mp);
+
+ if (newmp != mp) {
+ /* the message has been reallocated, free old one */
+ freemsg(mp);
+ }
+
+ /* the message may includes one or more ethernet packets */
+ for (tp = newmp; tp; tp = tp->b_next) {
+ len += tp->b_wptr - tp->b_rptr;
+ pkts++;
+ if (tp->b_rptr[0] & 1) {
+ if (bcmp(tp->b_rptr, &usbgem_bcastaddr,
+ ETHERADDRL) == 0) {
+ bcast++;
+ } else {
+ mcast++;
+ }
+ }
+ }
+
+ /* send up if it is a valid packet */
+#ifdef USBGEM_CONFIG_GLDv3
+ mac_rx(dp->mh, NULL, newmp);
+#else
+ while (newmp) {
+ tp = newmp;
+ newmp = newmp->b_next;
+ tp->b_next = NULL;
+ gld_recv(dp->macinfo, tp);
+ }
+#endif
+ } else {
+ freemsg(mp);
+ len = 0;
+ }
+
+ mutex_enter(&dp->rxlock);
+ /* update rx_active */
+ if (dp->rx_active) {
+ dp->rx_active = dp->mac_state == MAC_STATE_ONLINE;
+ }
+
+ dp->stats.rbytes += len;
+ dp->stats.rpackets += pkts;
+ if (bcast | mcast) {
+ dp->stats.rbcast += bcast;
+ dp->stats.rmcast += mcast;
+ }
+ mutex_exit(&dp->rxlock);
+
+ if (dp->rx_active) {
+ /* prepare to receive the next packets */
+ if (usbgem_rx_start_unit(dp, req)) {
+ /* we successed */
+ goto done;
+ }
+ cmn_err(CE_WARN,
+ "!%s: %s: failed to fill next rx packet",
+ dp->name, __func__);
+ /*
+ * we use another flag to indicate error state.
+ * if we acquire dev_state_lock for RW_WRITER here,
+ * usbgem_mac_stop() may hang.
+ */
+ if (dp->fatal_error == (clock_t)0) {
+ dp->fatal_error = usbgem_timestamp_nz();
+ }
+ } else {
+ /* no need to prepare the next packets */
+ usb_free_bulk_req(req);
+ }
+
+ mutex_enter(&dp->rxlock);
+ dp->rx_active = B_FALSE;
+ dp->rx_busy_cnt--;
+ if (dp->rx_busy_cnt == 0) {
+ /* wake up someone waits for me */
+ cv_broadcast(&dp->rx_drain_cv);
+ }
+ mutex_exit(&dp->rxlock);
+done:
+ ;
+}
+
+static void
+usbgem_bulkout_cb(usb_pipe_handle_t pipe, usb_bulk_req_t *req)
+{
+ boolean_t intr;
+ boolean_t tx_sched;
+ struct usbgem_dev *dp;
+
+ dp = (struct usbgem_dev *)req->bulk_client_private;
+ tx_sched = B_FALSE;
+
+ DPRINTF(2, (CE_CONT,
+ "!%s: %s: cr:%s(%d) cb_flags:0x%x head:%d tail:%d",
+ dp->name, __func__,
+ usb_str_cr(req->bulk_completion_reason),
+ req->bulk_completion_reason,
+ req->bulk_cb_flags,
+ dp->tx_busy_cnt));
+
+ /* we have finished to transfer the packet into tx fifo */
+ intr = DB_TCI(req->bulk_data);
+ freemsg(req->bulk_data);
+
+ if (req->bulk_completion_reason != USB_CR_OK &&
+ dp->fatal_error == (clock_t)0) {
+ dp->fatal_error = usbgem_timestamp_nz();
+ }
+
+ mutex_enter(&dp->txlock);
+
+ if (intr) {
+ ASSERT(dp->tx_intr_pended > 0);
+ /* find the last interrupt we have scheduled */
+ if (--(dp->tx_intr_pended) == 0) {
+ tx_sched = B_TRUE;
+ }
+ }
+
+ ASSERT(dp->tx_busy_cnt > 0);
+ req->bulk_client_private = (usb_opaque_t)dp->tx_free_list;
+ dp->tx_free_list = req;
+ dp->tx_busy_cnt--;
+
+#ifdef CONFIG_TX_LIMITER
+ if (tx_sched) {
+ dp->tx_max_packets =
+ min(dp->tx_max_packets + 1, dp->ugc.usbgc_tx_list_max);
+ }
+#endif
+ if (dp->mac_state != MAC_STATE_ONLINE && dp->tx_busy_cnt == 0) {
+ cv_broadcast(&dp->tx_drain_cv);
+ }
+
+ mutex_exit(&dp->txlock);
+
+ if (tx_sched) {
+#ifdef USBGEM_CONFIG_GLDv3
+ mac_tx_update(dp->mh);
+#else
+ gld_sched(dp->macinfo);
+#endif
+ }
+}
+
+static void
+usbgem_intr_cb(usb_pipe_handle_t ph, usb_intr_req_t *req)
+{
+ struct usbgem_dev *dp;
+
+ dp = (struct usbgem_dev *)req->intr_client_private;
+ dp->stats.intr++;
+
+ if (req->intr_completion_reason == USB_CR_OK) {
+ (*dp->ugc.usbgc_interrupt)(dp, req->intr_data);
+ }
+
+ /* free the request and data */
+ usb_free_intr_req(req);
+}
+
+/* ======================================================================== */
+/*
+ * MII support routines
+ */
+/* ======================================================================== */
+static void
+usbgem_choose_forcedmode(struct usbgem_dev *dp)
+{
+ /* choose media mode */
+ if (dp->anadv_1000fdx || dp->anadv_1000hdx) {
+ dp->speed = USBGEM_SPD_1000;
+ dp->full_duplex = dp->anadv_1000fdx;
+ } else if (dp->anadv_100fdx || dp->anadv_100t4) {
+ dp->speed = USBGEM_SPD_100;
+ dp->full_duplex = B_TRUE;
+ } else if (dp->anadv_100hdx) {
+ dp->speed = USBGEM_SPD_100;
+ dp->full_duplex = B_FALSE;
+ } else {
+ dp->speed = USBGEM_SPD_10;
+ dp->full_duplex = dp->anadv_10fdx;
+ }
+}
+
+static uint16_t
+usbgem_mii_read(struct usbgem_dev *dp, uint_t reg, int *errp)
+{
+ uint16_t val;
+
+ sema_p(&dp->hal_op_lock);
+ val = (*dp->ugc.usbgc_mii_read)(dp, reg, errp);
+ sema_v(&dp->hal_op_lock);
+
+ return (val);
+}
+
+static void
+usbgem_mii_write(struct usbgem_dev *dp, uint_t reg, uint16_t val, int *errp)
+{
+ sema_p(&dp->hal_op_lock);
+ (*dp->ugc.usbgc_mii_write)(dp, reg, val, errp);
+ sema_v(&dp->hal_op_lock);
+}
+
+static int
+usbgem_mii_probe(struct usbgem_dev *dp)
+{
+ int err;
+
+ err = (*dp->ugc.usbgc_mii_probe)(dp);
+ return (err);
+}
+
+static int
+usbgem_mii_init(struct usbgem_dev *dp)
+{
+ int err;
+
+ err = (*dp->ugc.usbgc_mii_init)(dp);
+ return (err);
+}
+
+#define fc_cap_decode(x) \
+ ((((x) & MII_ABILITY_PAUSE) != 0 ? 1 : 0) | \
+ (((x) & MII_ABILITY_ASM_DIR) != 0 ? 2 : 0))
+
+int
+usbgem_mii_config_default(struct usbgem_dev *dp, int *errp)
+{
+ uint16_t mii_stat;
+ uint16_t val;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /*
+ * Configure bits in advertisement register
+ */
+ mii_stat = dp->mii_status;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: MII_STATUS reg:%b",
+ dp->name, __func__, mii_stat, MII_STATUS_BITS));
+
+ if ((mii_stat & MII_STATUS_ABILITY_TECH) == 0) {
+ /* it's funny */
+ cmn_err(CE_WARN, "!%s: wrong ability bits: mii_status:%b",
+ dp->name, mii_stat, MII_STATUS_BITS);
+ return (USB_FAILURE);
+ }
+
+ /* Do not change the rest of ability bits in advert reg */
+ val = usbgem_mii_read(dp, MII_AN_ADVERT, errp) & ~MII_ABILITY_ALL;
+ if (*errp != USB_SUCCESS) {
+ goto usberr;
+ }
+
+ DPRINTF(0, (CE_CONT,
+ "!%s: %s: 100T4:%d 100F:%d 100H:%d 10F:%d 10H:%d",
+ dp->name, __func__,
+ dp->anadv_100t4, dp->anadv_100fdx, dp->anadv_100hdx,
+ dp->anadv_10fdx, dp->anadv_10hdx));
+
+ /* set technology bits */
+ if (dp->anadv_100t4) {
+ val |= MII_ABILITY_100BASE_T4;
+ }
+ if (dp->anadv_100fdx) {
+ val |= MII_ABILITY_100BASE_TX_FD;
+ }
+ if (dp->anadv_100hdx) {
+ val |= MII_ABILITY_100BASE_TX;
+ }
+ if (dp->anadv_10fdx) {
+ val |= MII_ABILITY_10BASE_T_FD;
+ }
+ if (dp->anadv_10hdx) {
+ val |= MII_ABILITY_10BASE_T;
+ }
+
+ /* set flow control capabilities */
+ if (dp->anadv_pause) {
+ val |= MII_ABILITY_PAUSE;
+ }
+ if (dp->anadv_asmpause) {
+ val |= MII_ABILITY_ASM_DIR;
+ }
+
+ DPRINTF(0, (CE_CONT,
+ "!%s: %s: setting MII_AN_ADVERT reg:%b, pause:%d, asmpause:%d",
+ dp->name, __func__, val, MII_ABILITY_BITS,
+ dp->anadv_pause, dp->anadv_asmpause));
+
+ usbgem_mii_write(dp, MII_AN_ADVERT, val, errp);
+ if (*errp != USB_SUCCESS) {
+ goto usberr;
+ }
+
+ if (dp->mii_status & MII_STATUS_XSTATUS) {
+ /*
+ * 1000Base-T GMII support
+ */
+ if (!dp->anadv_autoneg) {
+ /* enable manual configuration */
+ val = MII_1000TC_CFG_EN;
+ if (dp->anadv_1000t_ms == 2) {
+ val |= MII_1000TC_CFG_VAL;
+ }
+ } else {
+ val = 0;
+ if (dp->anadv_1000fdx) {
+ val |= MII_1000TC_ADV_FULL;
+ }
+ if (dp->anadv_1000hdx) {
+ val |= MII_1000TC_ADV_HALF;
+ }
+ switch (dp->anadv_1000t_ms) {
+ case 1:
+ /* slave */
+ val |= MII_1000TC_CFG_EN;
+ break;
+
+ case 2:
+ /* master */
+ val |= MII_1000TC_CFG_EN | MII_1000TC_CFG_VAL;
+ break;
+
+ default:
+ /* auto: do nothing */
+ break;
+ }
+ }
+ DPRINTF(0, (CE_CONT,
+ "!%s: %s: setting MII_1000TC reg:%b",
+ dp->name, __func__, val, MII_1000TC_BITS));
+
+ usbgem_mii_write(dp, MII_1000TC, val, errp);
+ if (*errp != USB_SUCCESS) {
+ goto usberr;
+ }
+ }
+ return (USB_SUCCESS);
+
+usberr:
+ return (*errp);
+}
+
+static char *usbgem_fc_type[] = {
+ "without",
+ "with symmetric",
+ "with tx",
+ "with rx",
+};
+
+#ifdef USBGEM_CONFIG_GLDv3
+#define USBGEM_LINKUP(dp) mac_link_update((dp)->mh, LINK_STATE_UP)
+#define USBGEM_LINKDOWN(dp) mac_link_update((dp)->mh, LINK_STATE_DOWN)
+#else
+#define USBGEM_LINKUP(dp) \
+ if (gld_linkstate) { \
+ gld_linkstate((dp)->macinfo, GLD_LINKSTATE_UP); \
+ }
+#define USBGEM_LINKDOWN(dp) \
+ if (gld_linkstate) { \
+ gld_linkstate((dp)->macinfo, GLD_LINKSTATE_DOWN); \
+ }
+#endif
+
+static uint8_t usbgem_fc_result[4 /* my cap */][4 /* lp cap */] = {
+/* none symm tx rx/symm */
+/* none */
+ {FLOW_CONTROL_NONE,
+ FLOW_CONTROL_NONE,
+ FLOW_CONTROL_NONE,
+ FLOW_CONTROL_NONE},
+/* sym */
+ {FLOW_CONTROL_NONE,
+ FLOW_CONTROL_SYMMETRIC,
+ FLOW_CONTROL_NONE,
+ FLOW_CONTROL_SYMMETRIC},
+/* tx */
+ {FLOW_CONTROL_NONE,
+ FLOW_CONTROL_NONE,
+ FLOW_CONTROL_NONE,
+ FLOW_CONTROL_TX_PAUSE},
+/* rx/symm */
+ {FLOW_CONTROL_NONE,
+ FLOW_CONTROL_SYMMETRIC,
+ FLOW_CONTROL_RX_PAUSE,
+ FLOW_CONTROL_SYMMETRIC},
+};
+
+static boolean_t
+usbgem_mii_link_check(struct usbgem_dev *dp, int *oldstatep, int *newstatep)
+{
+ boolean_t tx_sched = B_FALSE;
+ uint16_t status;
+ uint16_t advert;
+ uint16_t lpable;
+ uint16_t exp;
+ uint16_t ctl1000;
+ uint16_t stat1000;
+ uint16_t val;
+ clock_t now;
+ clock_t diff;
+ int linkdown_action;
+ boolean_t fix_phy = B_FALSE;
+ int err;
+ uint_t rwlock;
+
+ DPRINTF(4, (CE_CONT, "!%s: %s: time:%d state:%d",
+ dp->name, __func__, ddi_get_lbolt(), dp->mii_state));
+
+ if (dp->mii_state != MII_STATE_LINKUP) {
+ rwlock = RW_WRITER;
+ } else {
+ rwlock = RW_READER;
+ }
+again:
+ rw_enter(&dp->dev_state_lock, rwlock);
+
+ /* save old mii state */
+ *oldstatep = dp->mii_state;
+
+ if (dp->mac_state == MAC_STATE_DISCONNECTED) {
+ /* stop periodic execution of the link watcher */
+ dp->mii_interval = 0;
+ tx_sched = B_FALSE;
+ goto next;
+ }
+
+ now = ddi_get_lbolt();
+ diff = now - dp->mii_last_check;
+ dp->mii_last_check = now;
+
+ /*
+ * For NWAM, don't show linkdown state right
+ * when the device is attached.
+ */
+ if (dp->linkup_delay > 0) {
+ if (dp->linkup_delay > diff) {
+ dp->linkup_delay -= diff;
+ } else {
+ /* link up timeout */
+ dp->linkup_delay = -1;
+ }
+ }
+
+next_nowait:
+ switch (dp->mii_state) {
+ case MII_STATE_UNKNOWN:
+ goto reset_phy;
+
+ case MII_STATE_RESETTING:
+ dp->mii_timer -= diff;
+ if (dp->mii_timer > 0) {
+ /* don't read phy registers in resetting */
+ dp->mii_interval = WATCH_INTERVAL_FAST;
+ goto next;
+ }
+
+ val = usbgem_mii_read(dp, MII_CONTROL, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ if (val & MII_CONTROL_RESET) {
+ cmn_err(CE_NOTE,
+ "!%s: time:%ld resetting phy not complete."
+ " mii_control:0x%b",
+ dp->name, ddi_get_lbolt(),
+ val, MII_CONTROL_BITS);
+ }
+
+ /* ensure neither isolated nor pwrdown nor auto-nego mode */
+ usbgem_mii_write(dp, MII_CONTROL, 0, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+#if USBGEM_DEBUG_LEVEL > 10
+ val = usbgem_mii_read(dp, MII_CONTROL, &err);
+ cmn_err(CE_CONT, "!%s: readback control %b",
+ dp->name, val, MII_CONTROL_BITS);
+#endif
+ /* As resetting PHY has completed, configure PHY registers */
+ if ((*dp->ugc.usbgc_mii_config)(dp, &err) != USB_SUCCESS) {
+ /* we failed to configure PHY */
+ goto usberr;
+ }
+
+ /* prepare for forced mode */
+ usbgem_choose_forcedmode(dp);
+
+ dp->mii_lpable = 0;
+ dp->mii_advert = 0;
+ dp->mii_exp = 0;
+ dp->mii_ctl1000 = 0;
+ dp->mii_stat1000 = 0;
+
+ dp->flow_control = FLOW_CONTROL_NONE;
+
+ if (!dp->anadv_autoneg) {
+ /* skip auto-negotiation phase */
+ dp->mii_state = MII_STATE_MEDIA_SETUP;
+ dp->mii_timer = dp->ugc.usbgc_mii_linkdown_timeout;
+ goto next_nowait;
+ }
+
+ /* issue an auto-negotiation command */
+ goto autonego;
+
+ case MII_STATE_AUTONEGOTIATING:
+ /*
+ * Autonegotiation in progress
+ */
+ dp->mii_timer -= diff;
+ if (dp->mii_timer -
+ (dp->ugc.usbgc_mii_an_timeout - dp->ugc.usbgc_mii_an_wait)
+ > 0) {
+ /* wait for minimum time (2.3 - 2.5 sec) */
+ dp->mii_interval = WATCH_INTERVAL_FAST;
+ goto next;
+ }
+
+ /* read PHY status */
+ status = usbgem_mii_read(dp, MII_STATUS, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ DPRINTF(4, (CE_CONT,
+ "!%s: %s: called: mii_state:%d MII_STATUS reg:%b",
+ dp->name, __func__, dp->mii_state,
+ status, MII_STATUS_BITS));
+
+ if (status & MII_STATUS_REMFAULT) {
+ /*
+ * The link parnert told me something wrong happend.
+ * What do we do ?
+ */
+ cmn_err(CE_CONT,
+ "!%s: auto-negotiation failed: remote fault",
+ dp->name);
+ goto autonego;
+ }
+
+ if ((status & MII_STATUS_ANDONE) == 0) {
+ if (dp->mii_timer <= 0) {
+ /*
+ * Auto-negotiation has been timed out,
+ * Reset PHY and try again.
+ */
+ if (!dp->mii_supress_msg) {
+ cmn_err(CE_WARN,
+ "!%s: auto-negotiation failed:"
+ " timeout",
+ dp->name);
+ dp->mii_supress_msg = B_TRUE;
+ }
+ goto autonego;
+ }
+ /*
+ * Auto-negotiation is in progress. Wait for a while.
+ */
+ dp->mii_interval = dp->ugc.usbgc_mii_an_watch_interval;
+ goto next;
+ }
+
+ /*
+ * Auto-negotiation has been completed. Let's go to AN_DONE.
+ */
+ dp->mii_state = MII_STATE_AN_DONE;
+ dp->mii_supress_msg = B_FALSE;
+ DPRINTF(0, (CE_CONT,
+ "!%s: auto-negotiation completed, MII_STATUS:%b",
+ dp->name, status, MII_STATUS_BITS));
+
+ if (dp->ugc.usbgc_mii_an_delay > 0) {
+ dp->mii_timer = dp->ugc.usbgc_mii_an_delay;
+ dp->mii_interval = drv_usectohz(20*1000);
+ goto next;
+ }
+
+ dp->mii_timer = 0;
+ diff = 0;
+ goto next_nowait;
+
+ case MII_STATE_AN_DONE:
+ /*
+ * Auto-negotiation has done. Now we can set up media.
+ */
+ dp->mii_timer -= diff;
+ if (dp->mii_timer > 0) {
+ /* wait for a while */
+ dp->mii_interval = WATCH_INTERVAL_FAST;
+ goto next;
+ }
+
+ /*
+ * Setup speed and duplex mode according with
+ * the result of auto negotiation.
+ */
+
+ /*
+ * Read registers required to determin current
+ * duplex mode and media speed.
+ */
+ if (dp->ugc.usbgc_mii_an_delay > 0) {
+ /* the 'status' variable is not initialized yet */
+ status = usbgem_mii_read(dp, MII_STATUS, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ }
+ advert = usbgem_mii_read(dp, MII_AN_ADVERT, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ lpable = usbgem_mii_read(dp, MII_AN_LPABLE, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ exp = usbgem_mii_read(dp, MII_AN_EXPANSION, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ if (exp == 0xffff) {
+ /* some phys don't have exp register */
+ exp = 0;
+ }
+
+ ctl1000 = 0;
+ stat1000 = 0;
+ if (dp->mii_status & MII_STATUS_XSTATUS) {
+ ctl1000 = usbgem_mii_read(dp, MII_1000TC, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ stat1000 = usbgem_mii_read(dp, MII_1000TS, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ }
+ dp->mii_lpable = lpable;
+ dp->mii_advert = advert;
+ dp->mii_exp = exp;
+ dp->mii_ctl1000 = ctl1000;
+ dp->mii_stat1000 = stat1000;
+
+ cmn_err(CE_CONT,
+ "!%s: auto-negotiation done: "
+ "status:%b, advert:%b, lpable:%b, exp:%b",
+ dp->name,
+ status, MII_STATUS_BITS,
+ advert, MII_ABILITY_BITS,
+ lpable, MII_ABILITY_BITS,
+ exp, MII_AN_EXP_BITS);
+
+ DPRINTF(0, (CE_CONT, "!%s: MII_STATUS:%b",
+ dp->name, status, MII_STATUS_BITS));
+
+ if (dp->mii_status & MII_STATUS_XSTATUS) {
+ cmn_err(CE_CONT,
+ "! MII_1000TC reg:%b, MII_1000TS reg:%b",
+ ctl1000, MII_1000TC_BITS,
+ stat1000, MII_1000TS_BITS);
+ }
+
+ if (usbgem_population(lpable) <= 1 &&
+ (exp & MII_AN_EXP_LPCANAN) == 0) {
+ if ((advert & MII_ABILITY_TECH) != lpable) {
+ cmn_err(CE_WARN,
+ "!%s: but the link partner doesn't seem"
+ " to have auto-negotiation capability."
+ " please check the link configuration.",
+ dp->name);
+ }
+ /*
+ * it should be a result of pararell detection,
+ * which cannot detect duplex mode.
+ */
+ if ((advert & lpable) == 0 &&
+ lpable & MII_ABILITY_10BASE_T) {
+ /* no common technology, try 10M half mode */
+ lpable |= advert & MII_ABILITY_10BASE_T;
+ fix_phy = B_TRUE;
+ }
+ } else if (lpable == 0) {
+ cmn_err(CE_WARN, "!%s: wrong lpable.", dp->name);
+ goto reset_phy;
+ }
+ /*
+ * configure current link mode according to AN priority.
+ */
+ val = advert & lpable;
+ if ((ctl1000 & MII_1000TC_ADV_FULL) &&
+ (stat1000 & MII_1000TS_LP_FULL)) {
+ /* 1000BaseT & full duplex */
+ dp->speed = USBGEM_SPD_1000;
+ dp->full_duplex = B_TRUE;
+ } else if ((ctl1000 & MII_1000TC_ADV_HALF) &&
+ (stat1000 & MII_1000TS_LP_HALF)) {
+ /* 1000BaseT & half duplex */
+ dp->speed = USBGEM_SPD_1000;
+ dp->full_duplex = B_FALSE;
+ } else if ((val & MII_ABILITY_100BASE_TX_FD)) {
+ /* 100BaseTx & fullduplex */
+ dp->speed = USBGEM_SPD_100;
+ dp->full_duplex = B_TRUE;
+ } else if ((val & MII_ABILITY_100BASE_T4)) {
+ /* 100BaseTx & fullduplex */
+ dp->speed = USBGEM_SPD_100;
+ dp->full_duplex = B_TRUE;
+ } else if ((val & MII_ABILITY_100BASE_TX)) {
+ /* 100BaseTx & half duplex */
+ dp->speed = USBGEM_SPD_100;
+ dp->full_duplex = B_FALSE;
+ } else if ((val & MII_ABILITY_10BASE_T_FD)) {
+ /* 10BaseT & full duplex */
+ dp->speed = USBGEM_SPD_10;
+ dp->full_duplex = B_TRUE;
+ } else if ((val & MII_ABILITY_10BASE_T)) {
+ /* 10BaseT & half duplex */
+ dp->speed = USBGEM_SPD_10;
+ dp->full_duplex = B_FALSE;
+ } else {
+ /*
+ * the link partner doesn't seem to have
+ * auto-negotiation capability and our PHY
+ * could not report current mode correctly.
+ * We guess current mode by mii_control register.
+ */
+ val = usbgem_mii_read(dp, MII_CONTROL, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+
+ /* select 100m half or 10m half */
+ dp->speed = (val & MII_CONTROL_100MB) ?
+ USBGEM_SPD_100 : USBGEM_SPD_10;
+ dp->full_duplex = B_FALSE;
+ fix_phy = B_TRUE;
+
+ cmn_err(CE_NOTE,
+ "!%s: auto-negotiation done but "
+ "common ability not found.\n"
+ "PHY state: control:%b advert:%b lpable:%b\n"
+ "guessing %d Mbps %s duplex mode",
+ dp->name,
+ val, MII_CONTROL_BITS,
+ advert, MII_ABILITY_BITS,
+ lpable, MII_ABILITY_BITS,
+ usbgem_speed_value[dp->speed],
+ dp->full_duplex ? "full" : "half");
+ }
+
+ if (dp->full_duplex) {
+ dp->flow_control =
+ usbgem_fc_result[fc_cap_decode(advert)]
+ [fc_cap_decode(lpable)];
+ } else {
+ dp->flow_control = FLOW_CONTROL_NONE;
+ }
+ dp->mii_state = MII_STATE_MEDIA_SETUP;
+ dp->mii_timer = dp->ugc.usbgc_mii_linkdown_timeout;
+ goto next_nowait;
+
+ case MII_STATE_MEDIA_SETUP:
+ DPRINTF(2, (CE_CONT, "!%s: setup midia mode", dp->name));
+
+ /* assume the link state is down */
+ dp->mii_state = MII_STATE_LINKDOWN;
+ dp->mii_supress_msg = B_FALSE;
+
+ /* use short interval */
+ dp->mii_interval = WATCH_INTERVAL_FAST;
+
+ if ((!dp->anadv_autoneg) ||
+ dp->ugc.usbgc_mii_an_oneshot || fix_phy) {
+
+ /*
+ * write the result of auto negotiation back.
+ */
+ val = usbgem_mii_read(dp, MII_CONTROL, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ val &= ~(MII_CONTROL_SPEED | MII_CONTROL_FDUPLEX |
+ MII_CONTROL_ANE | MII_CONTROL_RSAN);
+
+ if (dp->full_duplex) {
+ val |= MII_CONTROL_FDUPLEX;
+ }
+
+ switch (dp->speed) {
+ case USBGEM_SPD_1000:
+ val |= MII_CONTROL_1000MB;
+ break;
+
+ case USBGEM_SPD_100:
+ val |= MII_CONTROL_100MB;
+ break;
+
+ default:
+ cmn_err(CE_WARN, "%s: unknown speed:%d",
+ dp->name, dp->speed);
+ /* FALLTHROUGH */
+
+ case USBGEM_SPD_10:
+ /* for USBGEM_SPD_10, do nothing */
+ break;
+ }
+
+ if (dp->mii_status & MII_STATUS_XSTATUS) {
+ usbgem_mii_write(dp,
+ MII_1000TC, MII_1000TC_CFG_EN, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ }
+ usbgem_mii_write(dp, MII_CONTROL, val, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ }
+ /*
+ * XXX -- nic state should be one of
+ * NIC_STATE_DISCONNECTED
+ * NIC_STATE_STOPPED
+ * NIC_STATE_INITIALIZED
+ * NIC_STATE_ONLINE
+ */
+ if (dp->nic_state >= NIC_STATE_INITIALIZED) {
+ /* notify the result of autonegotiation to mac */
+ if (usbgem_hal_set_media(dp) != USB_SUCCESS) {
+ goto usberr;
+ }
+ }
+ goto next_nowait;
+
+ case MII_STATE_LINKDOWN:
+ status = usbgem_mii_read(dp, MII_STATUS, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ if (status & MII_STATUS_LINKUP) {
+ /*
+ * Link is going up
+ */
+ dp->mii_state = MII_STATE_LINKUP;
+ dp->mii_supress_msg = B_FALSE;
+
+ DPRINTF(0, (CE_CONT,
+ "!%s: link up detected: status:%b",
+ dp->name, status, MII_STATUS_BITS));
+
+ /*
+ * MII_CONTROL_100MB and MII_CONTROL_FDUPLEX are
+ * ignored when MII_CONTROL_ANE is set.
+ */
+ cmn_err(CE_CONT,
+ "!%s: Link up: %d Mbps %s duplex %s flow control",
+ dp->name,
+ usbgem_speed_value[dp->speed],
+ dp->full_duplex ? "full" : "half",
+ usbgem_fc_type[dp->flow_control]);
+
+ dp->mii_interval =
+ dp->ugc.usbgc_mii_link_watch_interval;
+
+ if (dp->ugc.usbgc_mii_hw_link_detection &&
+ dp->nic_state == NIC_STATE_ONLINE) {
+ dp->mii_interval = 0;
+ }
+
+ if (dp->nic_state == NIC_STATE_ONLINE) {
+ if (dp->mac_state == MAC_STATE_INITIALIZED) {
+ (void) usbgem_mac_start(dp);
+ }
+ tx_sched = B_TRUE;
+ }
+
+ goto next;
+ }
+
+ dp->mii_supress_msg = B_TRUE;
+ if (dp->anadv_autoneg) {
+ dp->mii_timer -= diff;
+ if (dp->mii_timer <= 0) {
+ /*
+ * the link down timer expired.
+ * need to restart auto-negotiation.
+ */
+ linkdown_action =
+ dp->ugc.usbgc_mii_linkdown_timeout_action;
+ goto restart_autonego;
+ }
+ }
+ /* don't change mii_state */
+ goto next;
+
+ case MII_STATE_LINKUP:
+ if (rwlock == RW_READER) {
+ /* first pass, read mii status */
+ status = usbgem_mii_read(dp, MII_STATUS, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ }
+ if ((status & MII_STATUS_LINKUP) == 0) {
+ /*
+ * Link is going down
+ */
+ cmn_err(CE_NOTE,
+ "!%s: link down detected: status:%b",
+ dp->name, status, MII_STATUS_BITS);
+ /*
+ * Acquire exclusive lock to change mii_state
+ */
+ if (rwlock == RW_READER) {
+ rwlock = RW_WRITER;
+ rw_exit(&dp->dev_state_lock);
+ goto again;
+ }
+
+ dp->mii_state = MII_STATE_LINKDOWN;
+ dp->mii_timer = dp->ugc.usbgc_mii_linkdown_timeout;
+
+ /*
+ * As we may change the state of the device,
+ * let us acquire exclusive lock for the state.
+ */
+ if (dp->nic_state == NIC_STATE_ONLINE &&
+ dp->mac_state == MAC_STATE_ONLINE &&
+ dp->ugc.usbgc_mii_stop_mac_on_linkdown) {
+ (void) usbgem_restart_nic(dp);
+ /* drain tx */
+ tx_sched = B_TRUE;
+ }
+
+ if (dp->anadv_autoneg) {
+ /* need to restart auto-negotiation */
+ linkdown_action =
+ dp->ugc.usbgc_mii_linkdown_action;
+ goto restart_autonego;
+ }
+ /*
+ * don't use hw link down detection until the link
+ * status become stable for a while.
+ */
+ dp->mii_interval =
+ dp->ugc.usbgc_mii_link_watch_interval;
+
+ goto next;
+ }
+
+ /*
+ * still link up, no need to change mii_state
+ */
+ if (dp->ugc.usbgc_mii_hw_link_detection &&
+ dp->nic_state == NIC_STATE_ONLINE) {
+ /*
+ * no need to check link status periodicly
+ * if nic can generate interrupts when link go down.
+ */
+ dp->mii_interval = 0;
+ }
+ goto next;
+ }
+ /* NOTREACHED */
+ cmn_err(CE_PANIC, "!%s: %s: not reached", dp->name, __func__);
+
+ /*
+ * Actions for new state.
+ */
+restart_autonego:
+ switch (linkdown_action) {
+ case MII_ACTION_RESET:
+ if (!dp->mii_supress_msg) {
+ cmn_err(CE_CONT, "!%s: resetting PHY", dp->name);
+ }
+ dp->mii_supress_msg = B_TRUE;
+ goto reset_phy;
+
+ case MII_ACTION_NONE:
+ dp->mii_supress_msg = B_TRUE;
+ if (dp->ugc.usbgc_mii_an_oneshot) {
+ goto autonego;
+ }
+ /* PHY will restart autonego automatically */
+ dp->mii_state = MII_STATE_AUTONEGOTIATING;
+ dp->mii_timer = dp->ugc.usbgc_mii_an_timeout;
+ dp->mii_interval = dp->ugc.usbgc_mii_an_watch_interval;
+ goto next;
+
+ case MII_ACTION_RSA:
+ if (!dp->mii_supress_msg) {
+ cmn_err(CE_CONT, "!%s: restarting auto-negotiation",
+ dp->name);
+ }
+ dp->mii_supress_msg = B_TRUE;
+ goto autonego;
+
+ default:
+ cmn_err(CE_PANIC, "!%s: unknowm linkdown action: %d",
+ dp->name, dp->ugc.usbgc_mii_linkdown_action);
+ dp->mii_supress_msg = B_TRUE;
+ }
+ /* NOTREACHED */
+
+reset_phy:
+ if (!dp->mii_supress_msg) {
+ cmn_err(CE_CONT, "!%s: resetting PHY", dp->name);
+ }
+ dp->mii_state = MII_STATE_RESETTING;
+ dp->mii_timer = dp->ugc.usbgc_mii_reset_timeout;
+ if (!dp->ugc.usbgc_mii_dont_reset) {
+ usbgem_mii_write(dp, MII_CONTROL, MII_CONTROL_RESET, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ }
+ dp->mii_interval = WATCH_INTERVAL_FAST;
+ goto next;
+
+autonego:
+ if (!dp->mii_supress_msg) {
+ cmn_err(CE_CONT, "!%s: auto-negotiation started", dp->name);
+ }
+ dp->mii_state = MII_STATE_AUTONEGOTIATING;
+ dp->mii_timer = dp->ugc.usbgc_mii_an_timeout;
+
+ /* start/restart autoneg */
+ val = usbgem_mii_read(dp, MII_CONTROL, &err) &
+ ~(MII_CONTROL_ISOLATE | MII_CONTROL_PWRDN | MII_CONTROL_RESET);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ if (val & MII_CONTROL_ANE) {
+ val |= MII_CONTROL_RSAN;
+ }
+ usbgem_mii_write(dp, MII_CONTROL,
+ val | dp->ugc.usbgc_mii_an_cmd | MII_CONTROL_ANE, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+
+ dp->mii_interval = dp->ugc.usbgc_mii_an_watch_interval;
+ goto next;
+
+usberr:
+ dp->mii_state = MII_STATE_UNKNOWN;
+ dp->mii_interval = dp->ugc.usbgc_mii_link_watch_interval;
+ tx_sched = B_TRUE;
+
+next:
+ *newstatep = dp->mii_state;
+ rw_exit(&dp->dev_state_lock);
+ return (tx_sched);
+}
+
+static void
+usbgem_mii_link_watcher(struct usbgem_dev *dp)
+{
+ int old_mii_state;
+ int new_mii_state;
+ boolean_t tx_sched;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ for (; ; ) {
+
+ mutex_enter(&dp->link_watcher_lock);
+ if (dp->mii_interval) {
+ (void) cv_timedwait(&dp->link_watcher_wait_cv,
+ &dp->link_watcher_lock,
+ dp->mii_interval + ddi_get_lbolt());
+ } else {
+ cv_wait(&dp->link_watcher_wait_cv,
+ &dp->link_watcher_lock);
+ }
+ mutex_exit(&dp->link_watcher_lock);
+
+ if (dp->link_watcher_stop) {
+ break;
+ }
+
+ /* we block callbacks from disconnect/suspend and restart */
+ tx_sched = usbgem_mii_link_check(dp,
+ &old_mii_state, &new_mii_state);
+
+ /*
+ * gld v2 notifier functions are not able to
+ * be called with any locks in this layer.
+ */
+ if (tx_sched) {
+ /* kick potentially stopped downstream */
+#ifdef USBGEM_CONFIG_GLDv3
+ mac_tx_update(dp->mh);
+#else
+ gld_sched(dp->macinfo);
+#endif
+ }
+
+ if (old_mii_state != new_mii_state) {
+ /* notify new mii link state */
+ if (new_mii_state == MII_STATE_LINKUP) {
+ dp->linkup_delay = 0;
+ USBGEM_LINKUP(dp);
+ } else if (dp->linkup_delay <= 0) {
+ USBGEM_LINKDOWN(dp);
+ }
+ } else if (dp->linkup_delay < 0) {
+ /* first linkup timeout */
+ dp->linkup_delay = 0;
+ USBGEM_LINKDOWN(dp);
+ }
+ }
+
+ thread_exit();
+}
+
+void
+usbgem_mii_update_link(struct usbgem_dev *dp)
+{
+ cv_signal(&dp->link_watcher_wait_cv);
+}
+
+int
+usbgem_mii_probe_default(struct usbgem_dev *dp)
+{
+ int phy;
+ uint16_t status;
+ uint16_t xstatus;
+ int err;
+ uint16_t adv;
+ uint16_t adv_org;
+
+ DPRINTF(3, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /*
+ * Scan PHY
+ */
+ dp->mii_status = 0;
+
+ /* Try default phy first */
+ if (dp->mii_phy_addr) {
+ status = usbgem_mii_read(dp, MII_STATUS, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ if (status != 0xffff && status != 0x0000) {
+ goto PHY_found;
+ }
+
+ if (dp->mii_phy_addr < 0) {
+ cmn_err(CE_NOTE,
+ "!%s: failed to probe default internal and/or non-MII PHY",
+ dp->name);
+ return (USB_FAILURE);
+ }
+
+ cmn_err(CE_NOTE,
+ "!%s: failed to probe default MII PHY at %d",
+ dp->name, dp->mii_phy_addr);
+ }
+
+ /* Try all possible address */
+ for (phy = dp->ugc.usbgc_mii_addr_min; phy < 32; phy++) {
+ dp->mii_phy_addr = phy;
+ status = usbgem_mii_read(dp, MII_STATUS, &err);
+ if (err != USB_SUCCESS) {
+ DPRINTF(0, (CE_CONT,
+ "!%s: %s: mii_read(status) failed",
+ dp->name, __func__));
+ goto usberr;
+ }
+
+ if (status != 0xffff && status != 0x0000) {
+ usbgem_mii_write(dp, MII_CONTROL, 0, &err);
+ if (err != USB_SUCCESS) {
+ DPRINTF(0, (CE_CONT,
+ "!%s: %s: mii_write(control) failed",
+ dp->name, __func__));
+ goto usberr;
+ }
+ goto PHY_found;
+ }
+ }
+ for (phy = dp->ugc.usbgc_mii_addr_min; phy < 32; phy++) {
+ dp->mii_phy_addr = phy;
+ usbgem_mii_write(dp, MII_CONTROL, 0, &err);
+ if (err != USB_SUCCESS) {
+ DPRINTF(0, (CE_CONT,
+ "!%s: %s: mii_write(control) failed",
+ dp->name, __func__));
+ goto usberr;
+ }
+ status = usbgem_mii_read(dp, MII_STATUS, &err);
+ if (err != USB_SUCCESS) {
+ DPRINTF(0, (CE_CONT,
+ "!%s: %s: mii_read(status) failed",
+ dp->name, __func__));
+ goto usberr;
+ }
+
+ if (status != 0xffff && status != 0) {
+ goto PHY_found;
+ }
+ }
+
+ cmn_err(CE_NOTE, "!%s: no MII PHY found", dp->name);
+ return (USB_FAILURE);
+
+PHY_found:
+ dp->mii_status = status;
+ dp->mii_status_ro = ~status;
+ dp->mii_phy_id = usbgem_mii_read(dp, MII_PHYIDH, &err) << 16;
+ if (err != USB_SUCCESS) {
+ DPRINTF(0, (CE_CONT,
+ "!%s: %s: mii_read(PHYIDH) failed",
+ dp->name, __func__));
+ goto usberr;
+ }
+ dp->mii_phy_id |= usbgem_mii_read(dp, MII_PHYIDL, &err);
+ if (err != USB_SUCCESS) {
+ DPRINTF(0, (CE_CONT,
+ "!%s: %s: mii_read(PHYIDL) failed",
+ dp->name, __func__));
+ goto usberr;
+ }
+
+ if (dp->mii_phy_addr < 0) {
+ cmn_err(CE_CONT, "!%s: using internal/non-MII PHY(0x%08x)",
+ dp->name, dp->mii_phy_id);
+ } else {
+ cmn_err(CE_CONT, "!%s: MII PHY (0x%08x) found at %d",
+ dp->name, dp->mii_phy_id, dp->mii_phy_addr);
+ }
+
+ cmn_err(CE_CONT,
+ "!%s: PHY control:%b, status:%b, advert:%b, lpar:%b, exp:%b",
+ dp->name,
+ usbgem_mii_read(dp, MII_CONTROL, &err), MII_CONTROL_BITS,
+ status, MII_STATUS_BITS,
+ usbgem_mii_read(dp, MII_AN_ADVERT, &err), MII_ABILITY_BITS,
+ usbgem_mii_read(dp, MII_AN_LPABLE, &err), MII_ABILITY_BITS,
+ usbgem_mii_read(dp, MII_AN_EXPANSION, &err), MII_AN_EXP_BITS);
+
+ dp->mii_xstatus = 0;
+ if (status & MII_STATUS_XSTATUS) {
+ dp->mii_xstatus = usbgem_mii_read(dp, MII_XSTATUS, &err);
+
+ cmn_err(CE_CONT, "!%s: xstatus:%b",
+ dp->name, dp->mii_xstatus, MII_XSTATUS_BITS);
+ }
+ dp->mii_xstatus_ro = ~dp->mii_xstatus;
+
+ /* check if the phy can advertize pause abilities */
+ adv_org = usbgem_mii_read(dp, MII_AN_ADVERT, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+
+ usbgem_mii_write(dp, MII_AN_ADVERT,
+ MII_ABILITY_PAUSE | MII_ABILITY_ASM_DIR, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+
+ adv = usbgem_mii_read(dp, MII_AN_ADVERT, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+
+ if ((adv & MII_ABILITY_PAUSE) == 0) {
+ dp->ugc.usbgc_flow_control &= ~1;
+ }
+
+ if ((adv & MII_ABILITY_ASM_DIR) == 0) {
+ dp->ugc.usbgc_flow_control &= ~2;
+ }
+
+ usbgem_mii_write(dp, MII_AN_ADVERT, adv_org, &err);
+ if (err != USB_SUCCESS) {
+ goto usberr;
+ }
+ return (USB_SUCCESS);
+
+usberr:
+ return (USB_FAILURE);
+}
+
+int
+usbgem_mii_init_default(struct usbgem_dev *dp)
+{
+ /* ENPTY */
+ return (USB_SUCCESS);
+}
+
+static int
+usbgem_mii_start(struct usbgem_dev *dp)
+{
+ int err;
+ kthread_t *lwth;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /* make a first call of usbgem_mii_link_check() */
+ dp->link_watcher_stop = 0;
+ dp->mii_state = MII_STATE_UNKNOWN;
+ dp->mii_interval = drv_usectohz(1000*1000); /* 1sec */
+ dp->mii_last_check = ddi_get_lbolt();
+ dp->linkup_delay = 600 * drv_usectohz(1000*1000); /* 10 minutes */
+
+ lwth = thread_create(NULL, 0, usbgem_mii_link_watcher, dp, 0, &p0,
+ TS_RUN, minclsyspri);
+ if (lwth == NULL) {
+ cmn_err(CE_WARN,
+ "!%s: %s: failed to create a link watcher thread",
+ dp->name, __func__);
+ return (USB_FAILURE);
+ }
+ dp->link_watcher_did = lwth->t_did;
+
+ return (USB_SUCCESS);
+}
+
+static void
+usbgem_mii_stop(struct usbgem_dev *dp)
+{
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /* Ensure timer routine stopped */
+ dp->link_watcher_stop = 1;
+ cv_signal(&dp->link_watcher_wait_cv);
+ thread_join(dp->link_watcher_did);
+}
+
+/* ============================================================== */
+/*
+ * internal mac register operation interface
+ */
+/* ============================================================== */
+/*
+ * usbgem_mac_init: cold start
+ */
+static int
+usbgem_mac_init(struct usbgem_dev *dp)
+{
+ int err;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ if (dp->mac_state == MAC_STATE_DISCONNECTED) {
+ /* pretend we succeeded */
+ return (USB_SUCCESS);
+ }
+
+ ASSERT(dp->mac_state == MAC_STATE_STOPPED);
+
+ /* reset fatal error timestamp */
+ dp->fatal_error = (clock_t)0;
+
+ /* reset tx side state */
+ mutex_enter(&dp->txlock);
+ dp->tx_busy_cnt = 0;
+ dp->tx_max_packets = dp->ugc.usbgc_tx_list_max;
+ mutex_exit(&dp->txlock);
+
+ /* reset rx side state */
+ mutex_enter(&dp->rxlock);
+ dp->rx_busy_cnt = 0;
+ mutex_exit(&dp->rxlock);
+
+ err = usbgem_hal_init_chip(dp);
+ if (err == USB_SUCCESS) {
+ dp->mac_state = MAC_STATE_INITIALIZED;
+ }
+
+ return (err);
+}
+
+/*
+ * usbgem_mac_start: warm start
+ */
+static int
+usbgem_mac_start(struct usbgem_dev *dp)
+{
+ int err;
+ int i;
+ usb_flags_t flags = 0;
+ usb_intr_req_t *req;
+#ifdef USBGEM_DEBUG_LEVEL
+ usb_pipe_state_t p_state;
+#endif
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ if (dp->mac_state == MAC_STATE_DISCONNECTED) {
+ /* do nothing but don't return failure */
+ return (USB_SUCCESS);
+ }
+
+ if (dp->mac_state != MAC_STATE_INITIALIZED) {
+ /* don't return failer */
+ DPRINTF(0, (CE_CONT,
+ "!%s: %s: mac_state(%d) is not MAC_STATE_INITIALIZED",
+ dp->name, __func__, dp->mac_state));
+ goto x;
+ }
+
+ dp->mac_state = MAC_STATE_ONLINE;
+
+ if (usbgem_hal_start_chip(dp) != USB_SUCCESS) {
+ cmn_err(CE_NOTE,
+ "!%s: %s: usb error was detected during start_chip",
+ dp->name, __func__);
+ goto x;
+ }
+
+#ifdef USBGEM_DEBUG_LEVEL
+ usb_pipe_get_state(dp->intr_pipe, &p_state, 0);
+ ASSERT(p_state == USB_PIPE_STATE_IDLE);
+#endif /* USBGEM_DEBUG_LEVEL */
+
+ if (dp->ugc.usbgc_interrupt && dp->intr_pipe) {
+
+ /* make a request for interrupt */
+
+ req = usb_alloc_intr_req(dp->dip, 0, USB_FLAGS_SLEEP);
+ if (req == NULL) {
+ cmn_err(CE_WARN, "!%s: %s: failed to allocate intreq",
+ dp->name, __func__);
+ goto x;
+ }
+ req->intr_data = NULL;
+ req->intr_client_private = (usb_opaque_t)dp;
+ req->intr_timeout = 0;
+ req->intr_attributes =
+ USB_ATTRS_SHORT_XFER_OK | USB_ATTRS_AUTOCLEARING;
+ req->intr_len = dp->ep_intr->wMaxPacketSize;
+ req->intr_cb = usbgem_intr_cb;
+ req->intr_exc_cb = usbgem_intr_cb;
+ req->intr_completion_reason = 0;
+ req->intr_cb_flags = 0;
+
+ err = usb_pipe_intr_xfer(dp->intr_pipe, req, flags);
+ if (err != USB_SUCCESS) {
+ cmn_err(CE_WARN,
+ "%s: err:%d failed to start polling of intr pipe",
+ dp->name, err);
+ goto x;
+ }
+ }
+
+ /* kick to receive the first packet */
+ if (usbgem_init_rx_buf(dp) != USB_SUCCESS) {
+ goto err_stop_intr;
+ }
+ dp->rx_active = B_TRUE;
+
+ return (USB_SUCCESS);
+
+err_stop_intr:
+ /* stop the interrupt pipe */
+ DPRINTF(0, (CE_CONT, "!%s: %s: FAULURE", dp->name, __func__));
+ if (dp->ugc.usbgc_interrupt && dp->intr_pipe) {
+ usb_pipe_stop_intr_polling(dp->intr_pipe, USB_FLAGS_SLEEP);
+ }
+x:
+ ASSERT(dp->mac_state == MAC_STATE_ONLINE);
+ /* we use another flag to indicate error state. */
+ if (dp->fatal_error == (clock_t)0) {
+ dp->fatal_error = usbgem_timestamp_nz();
+ }
+ return (USB_FAILURE);
+}
+
+static int
+usbgem_mac_stop(struct usbgem_dev *dp, int new_state, boolean_t graceful)
+{
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /*
+ * we must have writer lock for dev_state_lock
+ */
+ ASSERT(new_state == MAC_STATE_STOPPED
+ || new_state == MAC_STATE_DISCONNECTED);
+
+ /* stop polling interrupt pipe */
+ if (dp->ugc.usbgc_interrupt && dp->intr_pipe) {
+ usb_pipe_stop_intr_polling(dp->intr_pipe, USB_FLAGS_SLEEP);
+ }
+
+ if (new_state == MAC_STATE_STOPPED || graceful) {
+ /* stop the nic hardware completely */
+ if (usbgem_hal_stop_chip(dp) != USB_SUCCESS) {
+ (void) usbgem_hal_reset_chip(dp);
+ }
+ }
+
+ /* stop preparing new rx packets and sending new packets */
+ dp->mac_state = new_state;
+
+ /* other processors must get mac_state correctly after here */
+ membar_producer();
+
+ /* cancel all requests we have sent */
+ usb_pipe_reset(dp->dip, dp->bulkin_pipe, USB_FLAGS_SLEEP, NULL, 0);
+ usb_pipe_reset(dp->dip, dp->bulkout_pipe, USB_FLAGS_SLEEP, NULL, 0);
+
+ DPRINTF(0, (CE_CONT,
+ "!%s: %s: rx_busy_cnt:%d tx_busy_cnt:%d",
+ dp->name, __func__, dp->rx_busy_cnt, dp->tx_busy_cnt));
+
+ /*
+ * Here all rx packets has been cancelled and their call back
+ * function has been exeuted, because we called usb_pipe_reset
+ * synchronously.
+ * So actually we just ensure rx_busy_cnt == 0.
+ */
+ mutex_enter(&dp->rxlock);
+ while (dp->rx_busy_cnt > 0) {
+ cv_wait(&dp->rx_drain_cv, &dp->rxlock);
+ }
+ mutex_exit(&dp->rxlock);
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: rx_busy_cnt is %d now",
+ dp->name, __func__, dp->rx_busy_cnt));
+
+ mutex_enter(&dp->txlock);
+ while (dp->tx_busy_cnt > 0) {
+ cv_wait(&dp->tx_drain_cv, &dp->txlock);
+ }
+ mutex_exit(&dp->txlock);
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: tx_busy_cnt is %d now",
+ dp->name, __func__, dp->tx_busy_cnt));
+
+ return (USB_SUCCESS);
+}
+
+static int
+usbgem_add_multicast(struct usbgem_dev *dp, const uint8_t *ep)
+{
+ int cnt;
+ int err;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ sema_p(&dp->rxfilter_lock);
+ if (dp->mc_count_req++ < USBGEM_MAXMC) {
+ /* append the new address at the end of the mclist */
+ cnt = dp->mc_count;
+ bcopy(ep, dp->mc_list[cnt].addr.ether_addr_octet,
+ ETHERADDRL);
+ if (dp->ugc.usbgc_multicast_hash) {
+ dp->mc_list[cnt].hash =
+ (*dp->ugc.usbgc_multicast_hash)(dp, ep);
+ }
+ dp->mc_count = cnt + 1;
+ }
+
+ if (dp->mc_count_req != dp->mc_count) {
+ /* multicast address list overflow */
+ dp->rxmode |= RXMODE_MULTI_OVF;
+ } else {
+ dp->rxmode &= ~RXMODE_MULTI_OVF;
+ }
+
+ if (dp->mac_state != MAC_STATE_DISCONNECTED) {
+ /* tell new multicast list to the hardware */
+ err = usbgem_hal_set_rx_filter(dp);
+ }
+ sema_v(&dp->rxfilter_lock);
+
+ return (err);
+}
+
+static int
+usbgem_remove_multicast(struct usbgem_dev *dp, const uint8_t *ep)
+{
+ size_t len;
+ int i;
+ int cnt;
+ int err;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ sema_p(&dp->rxfilter_lock);
+ dp->mc_count_req--;
+ cnt = dp->mc_count;
+ for (i = 0; i < cnt; i++) {
+ if (bcmp(ep, &dp->mc_list[i].addr, ETHERADDRL)) {
+ continue;
+ }
+ /* shrink the mclist by copying forward */
+ len = (cnt - (i + 1)) * sizeof (*dp->mc_list);
+ if (len > 0) {
+ bcopy(&dp->mc_list[i+1], &dp->mc_list[i], len);
+ }
+ dp->mc_count--;
+ break;
+ }
+
+ if (dp->mc_count_req != dp->mc_count) {
+ /* multicast address list overflow */
+ dp->rxmode |= RXMODE_MULTI_OVF;
+ } else {
+ dp->rxmode &= ~RXMODE_MULTI_OVF;
+ }
+
+ if (dp->mac_state != MAC_STATE_DISCONNECTED) {
+ err = usbgem_hal_set_rx_filter(dp);
+ }
+ sema_v(&dp->rxfilter_lock);
+
+ return (err);
+}
+
+
+/* ============================================================== */
+/*
+ * ioctl
+ */
+/* ============================================================== */
+enum ioc_reply {
+ IOC_INVAL = -1, /* bad, NAK with EINVAL */
+ IOC_DONE, /* OK, reply sent */
+ IOC_ACK, /* OK, just send ACK */
+ IOC_REPLY, /* OK, just send reply */
+ IOC_RESTART_ACK, /* OK, restart & ACK */
+ IOC_RESTART_REPLY /* OK, restart & reply */
+};
+
+
+#ifdef USBGEM_CONFIG_MAC_PROP
+static int
+usbgem_get_def_val(struct usbgem_dev *dp, mac_prop_id_t pr_num,
+ uint_t pr_valsize, void *pr_val)
+{
+ link_flowctrl_t fl;
+ int err = 0;
+
+ ASSERT(pr_valsize > 0);
+ switch (pr_num) {
+ case MAC_PROP_AUTONEG:
+ *(uint8_t *)pr_val =
+ BOOLEAN(dp->mii_status & MII_STATUS_CANAUTONEG);
+ break;
+
+ case MAC_PROP_FLOWCTRL:
+ if (pr_valsize < sizeof (link_flowctrl_t)) {
+ return (EINVAL);
+ }
+ switch (dp->ugc.usbgc_flow_control) {
+ case FLOW_CONTROL_NONE:
+ fl = LINK_FLOWCTRL_NONE;
+ break;
+ case FLOW_CONTROL_SYMMETRIC:
+ fl = LINK_FLOWCTRL_BI;
+ break;
+ case FLOW_CONTROL_TX_PAUSE:
+ fl = LINK_FLOWCTRL_TX;
+ break;
+ case FLOW_CONTROL_RX_PAUSE:
+ fl = LINK_FLOWCTRL_RX;
+ break;
+ }
+ bcopy(&fl, pr_val, sizeof (fl));
+ break;
+
+ case MAC_PROP_ADV_1000FDX_CAP:
+ case MAC_PROP_EN_1000FDX_CAP:
+ *(uint8_t *)pr_val =
+ (dp->mii_xstatus & MII_XSTATUS_1000BASET_FD) ||
+ (dp->mii_xstatus & MII_XSTATUS_1000BASEX_FD);
+ break;
+
+ case MAC_PROP_ADV_1000HDX_CAP:
+ case MAC_PROP_EN_1000HDX_CAP:
+ *(uint8_t *)pr_val =
+ (dp->mii_xstatus & MII_XSTATUS_1000BASET) ||
+ (dp->mii_xstatus & MII_XSTATUS_1000BASEX);
+ break;
+
+ case MAC_PROP_ADV_100T4_CAP:
+ case MAC_PROP_EN_100T4_CAP:
+ *(uint8_t *)pr_val =
+ BOOLEAN(dp->mii_status & MII_STATUS_100_BASE_T4);
+ break;
+
+ case MAC_PROP_ADV_100FDX_CAP:
+ case MAC_PROP_EN_100FDX_CAP:
+ *(uint8_t *)pr_val =
+ BOOLEAN(dp->mii_status & MII_STATUS_100_BASEX_FD);
+ break;
+
+ case MAC_PROP_ADV_100HDX_CAP:
+ case MAC_PROP_EN_100HDX_CAP:
+ *(uint8_t *)pr_val =
+ BOOLEAN(dp->mii_status & MII_STATUS_100_BASEX);
+ break;
+
+ case MAC_PROP_ADV_10FDX_CAP:
+ case MAC_PROP_EN_10FDX_CAP:
+ *(uint8_t *)pr_val =
+ BOOLEAN(dp->mii_status & MII_STATUS_10_FD);
+ break;
+
+ case MAC_PROP_ADV_10HDX_CAP:
+ case MAC_PROP_EN_10HDX_CAP:
+ *(uint8_t *)pr_val =
+ BOOLEAN(dp->mii_status & MII_STATUS_10);
+ break;
+
+ default:
+ err = ENOTSUP;
+ break;
+ }
+ return (err);
+}
+
+#ifdef MAC_VERSION_V1
+static void
+usbgem_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+ mac_prop_info_handle_t prh)
+{
+ struct usbgem_dev *dp = arg;
+ link_flowctrl_t fl;
+
+ /*
+ * By default permissions are read/write unless specified
+ * otherwise by the driver.
+ */
+
+ switch (pr_num) {
+ case MAC_PROP_DUPLEX:
+ case MAC_PROP_SPEED:
+ case MAC_PROP_STATUS:
+ case MAC_PROP_ADV_1000FDX_CAP:
+ case MAC_PROP_ADV_1000HDX_CAP:
+ case MAC_PROP_ADV_100FDX_CAP:
+ case MAC_PROP_ADV_100HDX_CAP:
+ case MAC_PROP_ADV_10FDX_CAP:
+ case MAC_PROP_ADV_10HDX_CAP:
+ case MAC_PROP_ADV_100T4_CAP:
+ case MAC_PROP_EN_100T4_CAP:
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ break;
+
+ case MAC_PROP_EN_1000FDX_CAP:
+ if ((dp->mii_xstatus_ro & MII_XSTATUS_1000BASET_FD) == 0) {
+ mac_prop_info_set_default_uint8(prh,
+ BOOLEAN(
+ dp->mii_xstatus & MII_XSTATUS_1000BASET_FD));
+ } else if ((dp->mii_xstatus_ro & MII_XSTATUS_1000BASEX_FD)
+ == 0) {
+ mac_prop_info_set_default_uint8(prh,
+ BOOLEAN(
+ dp->mii_xstatus & MII_XSTATUS_1000BASEX_FD));
+ } else {
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ }
+ break;
+
+ case MAC_PROP_EN_1000HDX_CAP:
+ if ((dp->mii_xstatus_ro & MII_XSTATUS_1000BASET) == 0) {
+ mac_prop_info_set_default_uint8(prh,
+ BOOLEAN(
+ dp->mii_xstatus & MII_XSTATUS_1000BASET));
+ } else if ((dp->mii_xstatus_ro & MII_XSTATUS_1000BASEX) == 0) {
+ mac_prop_info_set_default_uint8(prh,
+ BOOLEAN(
+ dp->mii_xstatus & MII_XSTATUS_1000BASEX));
+ } else {
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ }
+ break;
+
+ case MAC_PROP_EN_100FDX_CAP:
+ if ((dp->mii_status_ro & MII_STATUS_100_BASEX_FD) == 0) {
+ mac_prop_info_set_default_uint8(prh,
+ BOOLEAN(dp->mii_status & MII_STATUS_100_BASEX_FD));
+ } else {
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ }
+ break;
+
+ case MAC_PROP_EN_100HDX_CAP:
+ if ((dp->mii_status_ro & MII_STATUS_100_BASEX) == 0) {
+ mac_prop_info_set_default_uint8(prh,
+ BOOLEAN(dp->mii_status & MII_STATUS_100_BASEX));
+ } else {
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ }
+ break;
+
+ case MAC_PROP_EN_10FDX_CAP:
+ if ((dp->mii_status_ro & MII_STATUS_10_FD) == 0) {
+ mac_prop_info_set_default_uint8(prh,
+ BOOLEAN(dp->mii_status & MII_STATUS_10_FD));
+ } else {
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ }
+ break;
+
+ case MAC_PROP_EN_10HDX_CAP:
+ if ((dp->mii_status_ro & MII_STATUS_10) == 0) {
+ mac_prop_info_set_default_uint8(prh,
+ BOOLEAN(dp->mii_status & MII_STATUS_10));
+ } else {
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ }
+ break;
+
+ case MAC_PROP_AUTONEG:
+ if ((dp->mii_status_ro & MII_STATUS_CANAUTONEG) == 0) {
+ mac_prop_info_set_default_uint8(prh,
+ BOOLEAN(dp->mii_status & MII_STATUS_CANAUTONEG));
+ } else {
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
+ }
+ break;
+
+ case MAC_PROP_FLOWCTRL:
+ switch (dp->ugc.usbgc_flow_control) {
+ case FLOW_CONTROL_NONE:
+ fl = LINK_FLOWCTRL_NONE;
+ break;
+ case FLOW_CONTROL_SYMMETRIC:
+ fl = LINK_FLOWCTRL_BI;
+ break;
+ case FLOW_CONTROL_TX_PAUSE:
+ fl = LINK_FLOWCTRL_TX;
+ break;
+ case FLOW_CONTROL_RX_PAUSE:
+ fl = LINK_FLOWCTRL_RX;
+ break;
+ }
+ mac_prop_info_set_default_link_flowctrl(prh, fl);
+ break;
+
+ case MAC_PROP_MTU:
+ mac_prop_info_set_range_uint32(prh,
+ dp->ugc.usbgc_min_mtu, dp->ugc.usbgc_max_mtu);
+ break;
+
+ case MAC_PROP_PRIVATE:
+ break;
+ }
+}
+#endif
+
+static int
+usbgem_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+ uint_t pr_valsize, const void *pr_val)
+{
+ struct usbgem_dev *dp = arg;
+ int err = 0;
+ boolean_t update = B_FALSE;
+ link_flowctrl_t flowctrl;
+ uint32_t cur_mtu, new_mtu;
+
+ rw_enter(&dp->dev_state_lock, RW_WRITER);
+ switch (pr_num) {
+ case MAC_PROP_EN_1000FDX_CAP:
+ if ((dp->mii_xstatus_ro & MII_XSTATUS_1000BASET_FD) == 0 ||
+ (dp->mii_xstatus_ro & MII_XSTATUS_1000BASEX_FD) == 0) {
+ if (dp->anadv_1000fdx != *(uint8_t *)pr_val) {
+ dp->anadv_1000fdx = *(uint8_t *)pr_val;
+ update = B_TRUE;
+ }
+ } else {
+ err = ENOTSUP;
+ }
+ break;
+
+ case MAC_PROP_EN_1000HDX_CAP:
+ if ((dp->mii_xstatus_ro & MII_XSTATUS_1000BASET) == 0 ||
+ (dp->mii_xstatus_ro & MII_XSTATUS_1000BASEX) == 0) {
+ if (dp->anadv_1000hdx != *(uint8_t *)pr_val) {
+ dp->anadv_1000hdx = *(uint8_t *)pr_val;
+ update = B_TRUE;
+ }
+ } else {
+ err = ENOTSUP;
+ }
+ break;
+
+ case MAC_PROP_EN_100FDX_CAP:
+ if ((dp->mii_status_ro & MII_STATUS_100_BASEX_FD) == 0) {
+ if (dp->anadv_100fdx != *(uint8_t *)pr_val) {
+ dp->anadv_100fdx = *(uint8_t *)pr_val;
+ update = B_TRUE;
+ }
+ } else {
+ err = ENOTSUP;
+ }
+ break;
+
+ case MAC_PROP_EN_100HDX_CAP:
+ if ((dp->mii_status_ro & MII_STATUS_100_BASEX) == 0) {
+ if (dp->anadv_100hdx != *(uint8_t *)pr_val) {
+ dp->anadv_100hdx = *(uint8_t *)pr_val;
+ update = B_TRUE;
+ }
+ } else {
+ err = ENOTSUP;
+ }
+ break;
+
+ case MAC_PROP_EN_10FDX_CAP:
+ if ((dp->mii_status_ro & MII_STATUS_10_FD) == 0) {
+ if (dp->anadv_10fdx != *(uint8_t *)pr_val) {
+ dp->anadv_10fdx = *(uint8_t *)pr_val;
+ update = B_TRUE;
+ }
+ } else {
+ err = ENOTSUP;
+ }
+ break;
+
+ case MAC_PROP_EN_10HDX_CAP:
+ if ((dp->mii_status_ro & MII_STATUS_10_FD) == 0) {
+ if (dp->anadv_10hdx != *(uint8_t *)pr_val) {
+ dp->anadv_10hdx = *(uint8_t *)pr_val;
+ update = B_TRUE;
+ }
+ } else {
+ err = ENOTSUP;
+ }
+ break;
+
+ case MAC_PROP_AUTONEG:
+ if ((dp->mii_status_ro & MII_STATUS_CANAUTONEG) == 0) {
+ if (dp->anadv_autoneg != *(uint8_t *)pr_val) {
+ dp->anadv_autoneg = *(uint8_t *)pr_val;
+ update = B_TRUE;
+ }
+ } else {
+ err = ENOTSUP;
+ }
+ break;
+
+ case MAC_PROP_FLOWCTRL:
+ bcopy(pr_val, &flowctrl, sizeof (flowctrl));
+
+ switch (flowctrl) {
+ default:
+ err = EINVAL;
+ break;
+
+ case LINK_FLOWCTRL_NONE:
+ if (dp->flow_control != FLOW_CONTROL_NONE) {
+ dp->flow_control = FLOW_CONTROL_NONE;
+ update = B_TRUE;
+ }
+ break;
+
+ case LINK_FLOWCTRL_RX:
+ if (dp->flow_control != FLOW_CONTROL_RX_PAUSE) {
+ dp->flow_control = FLOW_CONTROL_RX_PAUSE;
+ update = B_TRUE;
+ }
+ break;
+
+ case LINK_FLOWCTRL_TX:
+ if (dp->flow_control != FLOW_CONTROL_TX_PAUSE) {
+ dp->flow_control = FLOW_CONTROL_TX_PAUSE;
+ update = B_TRUE;
+ }
+ break;
+
+ case LINK_FLOWCTRL_BI:
+ if (dp->flow_control != FLOW_CONTROL_SYMMETRIC) {
+ dp->flow_control = FLOW_CONTROL_SYMMETRIC;
+ update = B_TRUE;
+ }
+ break;
+ }
+ break;
+
+ case MAC_PROP_ADV_1000FDX_CAP:
+ case MAC_PROP_ADV_1000HDX_CAP:
+ case MAC_PROP_ADV_100FDX_CAP:
+ case MAC_PROP_ADV_100HDX_CAP:
+ case MAC_PROP_ADV_10FDX_CAP:
+ case MAC_PROP_ADV_10HDX_CAP:
+ case MAC_PROP_STATUS:
+ case MAC_PROP_SPEED:
+ case MAC_PROP_DUPLEX:
+ err = ENOTSUP; /* read-only prop. Can't set this. */
+ break;
+
+ case MAC_PROP_MTU:
+ bcopy(pr_val, &new_mtu, sizeof (new_mtu));
+ if (new_mtu != dp->mtu) {
+ err = EINVAL;
+ }
+ break;
+
+ case MAC_PROP_PRIVATE:
+ err = ENOTSUP;
+ break;
+
+ default:
+ err = ENOTSUP;
+ break;
+ }
+
+ if (update) {
+ /* sync with PHY */
+ usbgem_choose_forcedmode(dp);
+ dp->mii_state = MII_STATE_UNKNOWN;
+ cv_signal(&dp->link_watcher_wait_cv);
+ }
+ rw_exit(&dp->dev_state_lock);
+ return (err);
+}
+
+static int
+#ifdef MAC_VERSION_V1
+usbgem_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+ uint_t pr_valsize, void *pr_val)
+#else
+usbgem_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
+ uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm)
+#endif
+{
+ struct usbgem_dev *dp = arg;
+ int err = 0;
+ link_flowctrl_t flowctrl;
+ uint64_t tmp = 0;
+
+ if (pr_valsize == 0) {
+ return (EINVAL);
+ }
+#ifndef MAC_VERSION_V1
+ *perm = MAC_PROP_PERM_RW;
+#endif
+ bzero(pr_val, pr_valsize);
+#ifndef MAC_VERSION_V1
+ if ((pr_flags & MAC_PROP_DEFAULT) && (pr_num != MAC_PROP_PRIVATE)) {
+ return (usbgem_get_def_val(dp, pr_num, pr_valsize, pr_val));
+ }
+#endif
+ rw_enter(&dp->dev_state_lock, RW_READER);
+ switch (pr_num) {
+ case MAC_PROP_DUPLEX:
+#ifndef MAC_VERSION_V1
+ *perm = MAC_PROP_PERM_READ;
+#endif
+ if (pr_valsize >= sizeof (link_duplex_t)) {
+ if (dp->mii_state != MII_STATE_LINKUP) {
+ *(link_duplex_t *)pr_val = LINK_DUPLEX_UNKNOWN;
+ } else if (dp->full_duplex) {
+ *(link_duplex_t *)pr_val = LINK_DUPLEX_FULL;
+ } else {
+ *(link_duplex_t *)pr_val = LINK_DUPLEX_HALF;
+ }
+ } else {
+ err = EINVAL;
+ }
+ break;
+ case MAC_PROP_SPEED:
+#ifndef MAC_VERSION_V1
+ *perm = MAC_PROP_PERM_READ;
+#endif
+ if (pr_valsize >= sizeof (uint64_t)) {
+ switch (dp->speed) {
+ case USBGEM_SPD_1000:
+ tmp = 1000000000;
+ break;
+ case USBGEM_SPD_100:
+ tmp = 100000000;
+ break;
+ case USBGEM_SPD_10:
+ tmp = 10000000;
+ break;
+ default:
+ tmp = 0;
+ }
+ bcopy(&tmp, pr_val, sizeof (tmp));
+ } else {
+ err = EINVAL;
+ }
+ break;
+
+ case MAC_PROP_AUTONEG:
+#ifndef MAC_VERSION_V1
+ if (dp->mii_status_ro & MII_STATUS_CANAUTONEG) {
+ *perm = MAC_PROP_PERM_READ;
+ }
+#endif
+ *(uint8_t *)pr_val = dp->anadv_autoneg;
+ break;
+
+ case MAC_PROP_FLOWCTRL:
+ if (pr_valsize >= sizeof (link_flowctrl_t)) {
+ switch (dp->flow_control) {
+ case FLOW_CONTROL_NONE:
+ flowctrl = LINK_FLOWCTRL_NONE;
+ break;
+ case FLOW_CONTROL_RX_PAUSE:
+ flowctrl = LINK_FLOWCTRL_RX;
+ break;
+ case FLOW_CONTROL_TX_PAUSE:
+ flowctrl = LINK_FLOWCTRL_TX;
+ break;
+ case FLOW_CONTROL_SYMMETRIC:
+ flowctrl = LINK_FLOWCTRL_BI;
+ break;
+ }
+ bcopy(&flowctrl, pr_val, sizeof (flowctrl));
+ } else {
+ err = EINVAL;
+ }
+ break;
+
+ case MAC_PROP_ADV_1000FDX_CAP:
+ case MAC_PROP_ADV_1000HDX_CAP:
+ case MAC_PROP_ADV_100FDX_CAP:
+ case MAC_PROP_ADV_100HDX_CAP:
+ case MAC_PROP_ADV_10FDX_CAP:
+ case MAC_PROP_ADV_10HDX_CAP:
+ case MAC_PROP_ADV_100T4_CAP:
+ usbgem_get_def_val(dp, pr_num, pr_valsize, pr_val);
+ break;
+
+ case MAC_PROP_EN_1000FDX_CAP:
+#ifndef MAC_VERSION_V1
+ if ((dp->mii_xstatus_ro & MII_XSTATUS_1000BASET_FD) &&
+ (dp->mii_xstatus_ro & MII_XSTATUS_1000BASEX_FD)) {
+ *perm = MAC_PROP_PERM_READ;
+ }
+#endif
+ *(uint8_t *)pr_val = dp->anadv_1000fdx;
+ break;
+
+ case MAC_PROP_EN_1000HDX_CAP:
+#ifndef MAC_VERSION_V1
+ if ((dp->mii_xstatus_ro & MII_XSTATUS_1000BASET) &&
+ (dp->mii_xstatus_ro & MII_XSTATUS_1000BASEX)) {
+ *perm = MAC_PROP_PERM_READ;
+ }
+#endif
+ *(uint8_t *)pr_val = dp->anadv_1000hdx;
+ break;
+
+ case MAC_PROP_EN_100FDX_CAP:
+#ifndef MAC_VERSION_V1
+ if (dp->mii_status_ro & MII_STATUS_100_BASEX_FD) {
+ *perm = MAC_PROP_PERM_READ;
+ }
+#endif
+ *(uint8_t *)pr_val = dp->anadv_100fdx;
+ break;
+
+ case MAC_PROP_EN_100HDX_CAP:
+#ifndef MAC_VERSION_V1
+ if (dp->mii_status_ro & MII_STATUS_100_BASEX) {
+ *perm = MAC_PROP_PERM_READ;
+ }
+#endif
+ *(uint8_t *)pr_val = dp->anadv_100hdx;
+ break;
+
+ case MAC_PROP_EN_10FDX_CAP:
+#ifndef MAC_VERSION_V1
+ if (dp->mii_status_ro & MII_STATUS_10_FD) {
+ *perm = MAC_PROP_PERM_READ;
+ }
+#endif
+ *(uint8_t *)pr_val = dp->anadv_10fdx;
+ break;
+
+ case MAC_PROP_EN_10HDX_CAP:
+#ifndef MAC_VERSION_V1
+ if (dp->mii_status_ro & MII_STATUS_10) {
+ *perm = MAC_PROP_PERM_READ;
+ }
+#endif
+ *(uint8_t *)pr_val = dp->anadv_10hdx;
+ break;
+
+ case MAC_PROP_EN_100T4_CAP:
+#ifndef MAC_VERSION_V1
+ if (dp->mii_status_ro & MII_STATUS_100_BASE_T4) {
+ *perm = MAC_PROP_PERM_READ;
+ }
+#endif
+ *(uint8_t *)pr_val = dp->anadv_100t4;
+ break;
+
+ case MAC_PROP_PRIVATE:
+ err = ENOTSUP;
+ break;
+
+#ifndef MAC_VERSION_V1
+ case MAC_PROP_MTU: {
+ mac_propval_range_t range;
+ if (!(pr_flags & MAC_PROP_POSSIBLE)) {
+ err = ENOTSUP;
+ break;
+ }
+ if (pr_valsize < sizeof (mac_propval_range_t)) {
+ err = EINVAL;
+ break;
+ }
+ range.mpr_count = 1;
+ range.mpr_type = MAC_PROPVAL_UINT32;
+ range.range_uint32[0].mpur_min = ETHERMTU;
+ range.range_uint32[0].mpur_max = dp->mtu;
+ bcopy(&range, pr_val, sizeof (range));
+ break;
+ }
+#endif
+ default:
+ err = ENOTSUP;
+ break;
+ }
+
+ rw_exit(&dp->dev_state_lock);
+ return (err);
+}
+#endif /* USBGEM_CONFIG_MAC_PROP */
+
+#ifdef USBGEM_CONFIG_ND
+/* ============================================================== */
+/*
+ * ND interface
+ */
+/* ============================================================== */
+enum {
+ PARAM_AUTONEG_CAP,
+ PARAM_PAUSE_CAP,
+ PARAM_ASYM_PAUSE_CAP,
+ PARAM_1000FDX_CAP,
+ PARAM_1000HDX_CAP,
+ PARAM_100T4_CAP,
+ PARAM_100FDX_CAP,
+ PARAM_100HDX_CAP,
+ PARAM_10FDX_CAP,
+ PARAM_10HDX_CAP,
+
+ PARAM_ADV_AUTONEG_CAP,
+ PARAM_ADV_PAUSE_CAP,
+ PARAM_ADV_ASYM_PAUSE_CAP,
+ PARAM_ADV_1000FDX_CAP,
+ PARAM_ADV_1000HDX_CAP,
+ PARAM_ADV_100T4_CAP,
+ PARAM_ADV_100FDX_CAP,
+ PARAM_ADV_100HDX_CAP,
+ PARAM_ADV_10FDX_CAP,
+ PARAM_ADV_10HDX_CAP,
+ PARAM_ADV_1000T_MS,
+
+ PARAM_LP_AUTONEG_CAP,
+ PARAM_LP_PAUSE_CAP,
+ PARAM_LP_ASYM_PAUSE_CAP,
+ PARAM_LP_1000FDX_CAP,
+ PARAM_LP_1000HDX_CAP,
+ PARAM_LP_100T4_CAP,
+ PARAM_LP_100FDX_CAP,
+ PARAM_LP_100HDX_CAP,
+ PARAM_LP_10FDX_CAP,
+ PARAM_LP_10HDX_CAP,
+
+ PARAM_LINK_STATUS,
+ PARAM_LINK_SPEED,
+ PARAM_LINK_DUPLEX,
+
+ PARAM_LINK_AUTONEG,
+ PARAM_LINK_RX_PAUSE,
+ PARAM_LINK_TX_PAUSE,
+
+ PARAM_LOOP_MODE,
+ PARAM_MSI_CNT,
+#ifdef DEBUG_RESUME
+ PARAM_RESUME_TEST,
+#endif
+
+ PARAM_COUNT
+};
+
+struct usbgem_nd_arg {
+ struct usbgem_dev *dp;
+ int item;
+};
+
+static int
+usbgem_param_get(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *credp)
+{
+ struct usbgem_dev *dp = ((struct usbgem_nd_arg *)(void *)arg)->dp;
+ int item = ((struct usbgem_nd_arg *)(void *)arg)->item;
+ long val;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called, item:%d",
+ dp->name, __func__, item));
+
+ switch (item) {
+ case PARAM_AUTONEG_CAP:
+ val = BOOLEAN(dp->mii_status & MII_STATUS_CANAUTONEG);
+ DPRINTF(1, (CE_CONT, "autoneg_cap:%d", val));
+ break;
+
+ case PARAM_PAUSE_CAP:
+ val = dp->ugc.usbgc_flow_control != FLOW_CONTROL_NONE;
+ break;
+
+ case PARAM_ASYM_PAUSE_CAP:
+ val = dp->ugc.usbgc_flow_control > FLOW_CONTROL_SYMMETRIC;
+ break;
+
+ case PARAM_1000FDX_CAP:
+ val = (dp->mii_xstatus & MII_XSTATUS_1000BASET_FD) ||
+ (dp->mii_xstatus & MII_XSTATUS_1000BASEX_FD);
+ break;
+
+ case PARAM_1000HDX_CAP:
+ val = (dp->mii_xstatus & MII_XSTATUS_1000BASET) ||
+ (dp->mii_xstatus & MII_XSTATUS_1000BASEX);
+ break;
+
+ case PARAM_100T4_CAP:
+ val = BOOLEAN(dp->mii_status & MII_STATUS_100_BASE_T4);
+ break;
+
+ case PARAM_100FDX_CAP:
+ val = BOOLEAN(dp->mii_status & MII_STATUS_100_BASEX_FD);
+ break;
+
+ case PARAM_100HDX_CAP:
+ val = BOOLEAN(dp->mii_status & MII_STATUS_100_BASEX);
+ break;
+
+ case PARAM_10FDX_CAP:
+ val = BOOLEAN(dp->mii_status & MII_STATUS_10_FD);
+ break;
+
+ case PARAM_10HDX_CAP:
+ val = BOOLEAN(dp->mii_status & MII_STATUS_10);
+ break;
+
+ case PARAM_ADV_AUTONEG_CAP:
+ val = dp->anadv_autoneg;
+ break;
+
+ case PARAM_ADV_PAUSE_CAP:
+ val = dp->anadv_pause;
+ break;
+
+ case PARAM_ADV_ASYM_PAUSE_CAP:
+ val = dp->anadv_asmpause;
+ break;
+
+ case PARAM_ADV_1000FDX_CAP:
+ val = dp->anadv_1000fdx;
+ break;
+
+ case PARAM_ADV_1000HDX_CAP:
+ val = dp->anadv_1000hdx;
+ break;
+
+ case PARAM_ADV_100T4_CAP:
+ val = dp->anadv_100t4;
+ break;
+
+ case PARAM_ADV_100FDX_CAP:
+ val = dp->anadv_100fdx;
+ break;
+
+ case PARAM_ADV_100HDX_CAP:
+ val = dp->anadv_100hdx;
+ break;
+
+ case PARAM_ADV_10FDX_CAP:
+ val = dp->anadv_10fdx;
+ break;
+
+ case PARAM_ADV_10HDX_CAP:
+ val = dp->anadv_10hdx;
+ break;
+
+ case PARAM_ADV_1000T_MS:
+ val = dp->anadv_1000t_ms;
+ break;
+
+ case PARAM_LP_AUTONEG_CAP:
+ val = BOOLEAN(dp->mii_exp & MII_AN_EXP_LPCANAN);
+ break;
+
+ case PARAM_LP_PAUSE_CAP:
+ val = BOOLEAN(dp->mii_lpable & MII_ABILITY_PAUSE);
+ break;
+
+ case PARAM_LP_ASYM_PAUSE_CAP:
+ val = BOOLEAN(dp->mii_lpable & MII_ABILITY_ASM_DIR);
+ break;
+
+ case PARAM_LP_1000FDX_CAP:
+ val = BOOLEAN(dp->mii_stat1000 & MII_1000TS_LP_FULL);
+ break;
+
+ case PARAM_LP_1000HDX_CAP:
+ val = BOOLEAN(dp->mii_stat1000 & MII_1000TS_LP_HALF);
+ break;
+
+ case PARAM_LP_100T4_CAP:
+ val = BOOLEAN(dp->mii_lpable & MII_ABILITY_100BASE_T4);
+ break;
+
+ case PARAM_LP_100FDX_CAP:
+ val = BOOLEAN(dp->mii_lpable & MII_ABILITY_100BASE_TX_FD);
+ break;
+
+ case PARAM_LP_100HDX_CAP:
+ val = BOOLEAN(dp->mii_lpable & MII_ABILITY_100BASE_TX);
+ break;
+
+ case PARAM_LP_10FDX_CAP:
+ val = BOOLEAN(dp->mii_lpable & MII_ABILITY_10BASE_T_FD);
+ break;
+
+ case PARAM_LP_10HDX_CAP:
+ val = BOOLEAN(dp->mii_lpable & MII_ABILITY_10BASE_T);
+ break;
+
+ case PARAM_LINK_STATUS:
+ val = (dp->mii_state == MII_STATE_LINKUP);
+ break;
+
+ case PARAM_LINK_SPEED:
+ val = usbgem_speed_value[dp->speed];
+ break;
+
+ case PARAM_LINK_DUPLEX:
+ val = 0;
+ if (dp->mii_state == MII_STATE_LINKUP) {
+ val = dp->full_duplex ? 2 : 1;
+ }
+ break;
+
+ case PARAM_LINK_AUTONEG:
+ val = BOOLEAN(dp->mii_exp & MII_AN_EXP_LPCANAN);
+ break;
+
+ case PARAM_LINK_RX_PAUSE:
+ val = (dp->flow_control == FLOW_CONTROL_SYMMETRIC) ||
+ (dp->flow_control == FLOW_CONTROL_RX_PAUSE);
+ break;
+
+ case PARAM_LINK_TX_PAUSE:
+ val = (dp->flow_control == FLOW_CONTROL_SYMMETRIC) ||
+ (dp->flow_control == FLOW_CONTROL_TX_PAUSE);
+ break;
+
+#ifdef DEBUG_RESUME
+ case PARAM_RESUME_TEST:
+ val = 0;
+ break;
+#endif
+ default:
+ cmn_err(CE_WARN, "%s: unimplemented ndd control (%d)",
+ dp->name, item);
+ break;
+ }
+
+ (void) mi_mpprintf(mp, "%ld", val);
+
+ return (0);
+}
+
+static int
+usbgem_param_set(queue_t *q,
+ mblk_t *mp, char *value, caddr_t arg, cred_t *credp)
+{
+ struct usbgem_dev *dp = ((struct usbgem_nd_arg *)(void *)arg)->dp;
+ int item = ((struct usbgem_nd_arg *)(void *)arg)->item;
+ long val;
+ char *end;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+ if (ddi_strtol(value, &end, 10, &val)) {
+ return (EINVAL);
+ }
+ if (end == value) {
+ return (EINVAL);
+ }
+
+ switch (item) {
+ case PARAM_ADV_AUTONEG_CAP:
+ if (val != 0 && val != 1) {
+ goto err;
+ }
+ if (val && (dp->mii_status & MII_STATUS_CANAUTONEG) == 0) {
+ goto err;
+ }
+ dp->anadv_autoneg = (int)val;
+ break;
+
+ case PARAM_ADV_PAUSE_CAP:
+ if (val != 0 && val != 1) {
+ goto err;
+ }
+ if (val && dp->ugc.usbgc_flow_control == FLOW_CONTROL_NONE) {
+ goto err;
+ }
+ dp->anadv_pause = (int)val;
+ break;
+
+ case PARAM_ADV_ASYM_PAUSE_CAP:
+ if (val != 0 && val != 1) {
+ goto err;
+ }
+ if (val &&
+ dp->ugc.usbgc_flow_control <= FLOW_CONTROL_SYMMETRIC) {
+ goto err;
+ }
+ dp->anadv_asmpause = (int)val;
+ break;
+
+ case PARAM_ADV_1000FDX_CAP:
+ if (val != 0 && val != 1) {
+ goto err;
+ }
+ if (val && (dp->mii_xstatus &
+ (MII_XSTATUS_1000BASET_FD |
+ MII_XSTATUS_1000BASEX_FD)) == 0) {
+ goto err;
+ }
+ dp->anadv_1000fdx = (int)val;
+ break;
+
+ case PARAM_ADV_1000HDX_CAP:
+ if (val != 0 && val != 1) {
+ goto err;
+ }
+ if (val && (dp->mii_xstatus &
+ (MII_XSTATUS_1000BASET | MII_XSTATUS_1000BASEX)) == 0) {
+ goto err;
+ }
+ dp->anadv_1000hdx = (int)val;
+ break;
+
+ case PARAM_ADV_100T4_CAP:
+ if (val != 0 && val != 1) {
+ goto err;
+ }
+ if (val && (dp->mii_status & MII_STATUS_100_BASE_T4) == 0) {
+ goto err;
+ }
+ dp->anadv_100t4 = (int)val;
+ break;
+
+ case PARAM_ADV_100FDX_CAP:
+ if (val != 0 && val != 1) {
+ goto err;
+ }
+ if (val && (dp->mii_status & MII_STATUS_100_BASEX_FD) == 0) {
+ goto err;
+ }
+ dp->anadv_100fdx = (int)val;
+ break;
+
+ case PARAM_ADV_100HDX_CAP:
+ if (val != 0 && val != 1) {
+ goto err;
+ }
+ if (val && (dp->mii_status & MII_STATUS_100_BASEX) == 0) {
+ goto err;
+ }
+ dp->anadv_100hdx = (int)val;
+ break;
+
+ case PARAM_ADV_10FDX_CAP:
+ if (val != 0 && val != 1) {
+ goto err;
+ }
+ if (val && (dp->mii_status & MII_STATUS_10_FD) == 0) {
+ goto err;
+ }
+ dp->anadv_10fdx = (int)val;
+ break;
+
+ case PARAM_ADV_10HDX_CAP:
+ if (val != 0 && val != 1) {
+ goto err;
+ }
+ if (val && (dp->mii_status & MII_STATUS_10) == 0) {
+ goto err;
+ }
+ dp->anadv_10hdx = (int)val;
+ break;
+
+ case PARAM_ADV_1000T_MS:
+ if (val != 0 && val != 1 && val != 2) {
+ goto err;
+ }
+ if (val && (dp->mii_xstatus &
+ (MII_XSTATUS_1000BASET | MII_XSTATUS_1000BASET_FD)) == 0) {
+ goto err;
+ }
+ dp->anadv_1000t_ms = (int)val;
+ break;
+
+#ifdef DEBUG_RESUME
+ case PARAM_RESUME_TEST:
+ mutex_exit(&dp->xmitlock);
+ mutex_exit(&dp->intrlock);
+ gem_suspend(dp->dip);
+ gem_resume(dp->dip);
+ mutex_enter(&dp->intrlock);
+ mutex_enter(&dp->xmitlock);
+ break;
+#endif
+ }
+
+ /* sync with PHY */
+ usbgem_choose_forcedmode(dp);
+
+ dp->mii_state = MII_STATE_UNKNOWN;
+ if (dp->ugc.usbgc_mii_hw_link_detection) {
+ /* wake up link watcher possiblely sleeps */
+ cv_signal(&dp->link_watcher_wait_cv);
+ }
+
+ return (0);
+err:
+ return (EINVAL);
+}
+
+static void
+usbgem_nd_load(struct usbgem_dev *dp,
+ char *name, ndgetf_t gf, ndsetf_t sf, int item)
+{
+ struct usbgem_nd_arg *arg;
+
+ ASSERT(item >= 0);
+ ASSERT(item < PARAM_COUNT);
+
+ arg = &((struct usbgem_nd_arg *)(void *)dp->nd_arg_p)[item];
+ arg->dp = dp;
+ arg->item = item;
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: name:%s, item:%d",
+ dp->name, __func__, name, item));
+ (void) nd_load(&dp->nd_data_p, name, gf, sf, (caddr_t)arg);
+}
+
+static void
+usbgem_nd_setup(struct usbgem_dev *dp)
+{
+ DPRINTF(1, (CE_CONT, "!%s: %s: called, mii_status:0x%b",
+ dp->name, __func__, dp->mii_status, MII_STATUS_BITS));
+
+ ASSERT(dp->nd_arg_p == NULL);
+
+ dp->nd_arg_p =
+ kmem_zalloc(sizeof (struct usbgem_nd_arg) * PARAM_COUNT, KM_SLEEP);
+
+#define SETFUNC(x) ((x) ? usbgem_param_set : NULL)
+
+ usbgem_nd_load(dp, "autoneg_cap",
+ usbgem_param_get, NULL, PARAM_AUTONEG_CAP);
+ usbgem_nd_load(dp, "pause_cap",
+ usbgem_param_get, NULL, PARAM_PAUSE_CAP);
+ usbgem_nd_load(dp, "asym_pause_cap",
+ usbgem_param_get, NULL, PARAM_ASYM_PAUSE_CAP);
+ usbgem_nd_load(dp, "1000fdx_cap",
+ usbgem_param_get, NULL, PARAM_1000FDX_CAP);
+ usbgem_nd_load(dp, "1000hdx_cap",
+ usbgem_param_get, NULL, PARAM_1000HDX_CAP);
+ usbgem_nd_load(dp, "100T4_cap",
+ usbgem_param_get, NULL, PARAM_100T4_CAP);
+ usbgem_nd_load(dp, "100fdx_cap",
+ usbgem_param_get, NULL, PARAM_100FDX_CAP);
+ usbgem_nd_load(dp, "100hdx_cap",
+ usbgem_param_get, NULL, PARAM_100HDX_CAP);
+ usbgem_nd_load(dp, "10fdx_cap",
+ usbgem_param_get, NULL, PARAM_10FDX_CAP);
+ usbgem_nd_load(dp, "10hdx_cap",
+ usbgem_param_get, NULL, PARAM_10HDX_CAP);
+
+ /* Our advertised capabilities */
+ usbgem_nd_load(dp, "adv_autoneg_cap", usbgem_param_get,
+ SETFUNC(dp->mii_status & MII_STATUS_CANAUTONEG),
+ PARAM_ADV_AUTONEG_CAP);
+ usbgem_nd_load(dp, "adv_pause_cap", usbgem_param_get,
+ SETFUNC(dp->ugc.usbgc_flow_control & 1),
+ PARAM_ADV_PAUSE_CAP);
+ usbgem_nd_load(dp, "adv_asym_pause_cap", usbgem_param_get,
+ SETFUNC(dp->ugc.usbgc_flow_control & 2),
+ PARAM_ADV_ASYM_PAUSE_CAP);
+ usbgem_nd_load(dp, "adv_1000fdx_cap", usbgem_param_get,
+ SETFUNC(dp->mii_xstatus &
+ (MII_XSTATUS_1000BASEX_FD | MII_XSTATUS_1000BASET_FD)),
+ PARAM_ADV_1000FDX_CAP);
+ usbgem_nd_load(dp, "adv_1000hdx_cap", usbgem_param_get,
+ SETFUNC(dp->mii_xstatus &
+ (MII_XSTATUS_1000BASEX | MII_XSTATUS_1000BASET)),
+ PARAM_ADV_1000HDX_CAP);
+ usbgem_nd_load(dp, "adv_100T4_cap", usbgem_param_get,
+ SETFUNC((dp->mii_status & MII_STATUS_100_BASE_T4) &&
+ !dp->mii_advert_ro),
+ PARAM_ADV_100T4_CAP);
+ usbgem_nd_load(dp, "adv_100fdx_cap", usbgem_param_get,
+ SETFUNC((dp->mii_status & MII_STATUS_100_BASEX_FD) &&
+ !dp->mii_advert_ro),
+ PARAM_ADV_100FDX_CAP);
+ usbgem_nd_load(dp, "adv_100hdx_cap", usbgem_param_get,
+ SETFUNC((dp->mii_status & MII_STATUS_100_BASEX) &&
+ !dp->mii_advert_ro),
+ PARAM_ADV_100HDX_CAP);
+ usbgem_nd_load(dp, "adv_10fdx_cap", usbgem_param_get,
+ SETFUNC((dp->mii_status & MII_STATUS_10_FD) &&
+ !dp->mii_advert_ro),
+ PARAM_ADV_10FDX_CAP);
+ usbgem_nd_load(dp, "adv_10hdx_cap", usbgem_param_get,
+ SETFUNC((dp->mii_status & MII_STATUS_10) &&
+ !dp->mii_advert_ro),
+ PARAM_ADV_10HDX_CAP);
+ usbgem_nd_load(dp, "adv_1000t_ms", usbgem_param_get,
+ SETFUNC(dp->mii_xstatus &
+ (MII_XSTATUS_1000BASET_FD | MII_XSTATUS_1000BASET)),
+ PARAM_ADV_1000T_MS);
+
+
+ /* Partner's advertised capabilities */
+ usbgem_nd_load(dp, "lp_autoneg_cap",
+ usbgem_param_get, NULL, PARAM_LP_AUTONEG_CAP);
+ usbgem_nd_load(dp, "lp_pause_cap",
+ usbgem_param_get, NULL, PARAM_LP_PAUSE_CAP);
+ usbgem_nd_load(dp, "lp_asym_pause_cap",
+ usbgem_param_get, NULL, PARAM_LP_ASYM_PAUSE_CAP);
+ usbgem_nd_load(dp, "lp_1000fdx_cap",
+ usbgem_param_get, NULL, PARAM_LP_1000FDX_CAP);
+ usbgem_nd_load(dp, "lp_1000hdx_cap",
+ usbgem_param_get, NULL, PARAM_LP_1000HDX_CAP);
+ usbgem_nd_load(dp, "lp_100T4_cap",
+ usbgem_param_get, NULL, PARAM_LP_100T4_CAP);
+ usbgem_nd_load(dp, "lp_100fdx_cap",
+ usbgem_param_get, NULL, PARAM_LP_100FDX_CAP);
+ usbgem_nd_load(dp, "lp_100hdx_cap",
+ usbgem_param_get, NULL, PARAM_LP_100HDX_CAP);
+ usbgem_nd_load(dp, "lp_10fdx_cap",
+ usbgem_param_get, NULL, PARAM_LP_10FDX_CAP);
+ usbgem_nd_load(dp, "lp_10hdx_cap",
+ usbgem_param_get, NULL, PARAM_LP_10HDX_CAP);
+
+ /* Current operating modes */
+ usbgem_nd_load(dp, "link_status",
+ usbgem_param_get, NULL, PARAM_LINK_STATUS);
+ usbgem_nd_load(dp, "link_speed",
+ usbgem_param_get, NULL, PARAM_LINK_SPEED);
+ usbgem_nd_load(dp, "link_duplex",
+ usbgem_param_get, NULL, PARAM_LINK_DUPLEX);
+ usbgem_nd_load(dp, "link_autoneg",
+ usbgem_param_get, NULL, PARAM_LINK_AUTONEG);
+ usbgem_nd_load(dp, "link_rx_pause",
+ usbgem_param_get, NULL, PARAM_LINK_RX_PAUSE);
+ usbgem_nd_load(dp, "link_tx_pause",
+ usbgem_param_get, NULL, PARAM_LINK_TX_PAUSE);
+#ifdef DEBUG_RESUME
+ usbgem_nd_load(dp, "resume_test",
+ usbgem_param_get, usbgem_param_set, PARAM_RESUME_TEST);
+#endif
+#undef SETFUNC
+}
+
+static
+enum ioc_reply
+usbgem_nd_ioctl(struct usbgem_dev *dp,
+ queue_t *wq, mblk_t *mp, struct iocblk *iocp)
+{
+ boolean_t ok;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ switch (iocp->ioc_cmd) {
+ case ND_GET:
+ ok = nd_getset(wq, dp->nd_data_p, mp);
+ DPRINTF(1, (CE_CONT,
+ "%s: get %s", dp->name, ok ? "OK" : "FAIL"));
+ return (ok ? IOC_REPLY : IOC_INVAL);
+
+ case ND_SET:
+ ok = nd_getset(wq, dp->nd_data_p, mp);
+
+ DPRINTF(1, (CE_CONT, "%s: set %s err %d",
+ dp->name, ok ? "OK" : "FAIL", iocp->ioc_error));
+
+ if (!ok) {
+ return (IOC_INVAL);
+ }
+
+ if (iocp->ioc_error) {
+ return (IOC_REPLY);
+ }
+
+ return (IOC_RESTART_REPLY);
+ }
+
+ cmn_err(CE_WARN, "%s: invalid cmd 0x%x", dp->name, iocp->ioc_cmd);
+
+ return (IOC_INVAL);
+}
+
+static void
+usbgem_nd_cleanup(struct usbgem_dev *dp)
+{
+ ASSERT(dp->nd_data_p != NULL);
+ ASSERT(dp->nd_arg_p != NULL);
+
+ nd_free(&dp->nd_data_p);
+
+ kmem_free(dp->nd_arg_p, sizeof (struct usbgem_nd_arg) * PARAM_COUNT);
+ dp->nd_arg_p = NULL;
+}
+#endif /* USBGEM_CONFIG_ND */
+
+static void
+usbgem_mac_ioctl(struct usbgem_dev *dp, queue_t *wq, mblk_t *mp)
+{
+ struct iocblk *iocp;
+ enum ioc_reply status;
+ int cmd;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /*
+ * Validate the command before bothering with the mutex ...
+ */
+ iocp = (void *)mp->b_rptr;
+ iocp->ioc_error = 0;
+ cmd = iocp->ioc_cmd;
+
+ DPRINTF(1, (CE_CONT, "%s: %s cmd:0x%x", dp->name, __func__, cmd));
+
+#ifdef USBGEM_CONFIG_ND
+ switch (cmd) {
+ default:
+ _NOTE(NOTREACHED)
+ status = IOC_INVAL;
+ break;
+
+ case ND_GET:
+ case ND_SET:
+ status = usbgem_nd_ioctl(dp, wq, mp, iocp);
+ break;
+ }
+
+ /*
+ * Finally, decide how to reply
+ */
+ switch (status) {
+ default:
+ case IOC_INVAL:
+ /*
+ * Error, reply with a NAK and EINVAL or the specified error
+ */
+ miocnak(wq, mp, 0, iocp->ioc_error == 0 ?
+ EINVAL : iocp->ioc_error);
+ break;
+
+ case IOC_DONE:
+ /*
+ * OK, reply already sent
+ */
+ break;
+
+ case IOC_RESTART_ACK:
+ case IOC_ACK:
+ /*
+ * OK, reply with an ACK
+ */
+ miocack(wq, mp, 0, 0);
+ break;
+
+ case IOC_RESTART_REPLY:
+ case IOC_REPLY:
+ /*
+ * OK, send prepared reply as ACK or NAK
+ */
+ mp->b_datap->db_type =
+ iocp->ioc_error == 0 ? M_IOCACK : M_IOCNAK;
+ qreply(wq, mp);
+ break;
+ }
+#else
+ miocnak(wq, mp, 0, EINVAL);
+ return;
+#endif /* USBGEM_CONFIG_GLDv3 */
+}
+
+#ifndef SYS_MAC_H
+#define XCVR_UNDEFINED 0
+#define XCVR_NONE 1
+#define XCVR_10 2
+#define XCVR_100T4 3
+#define XCVR_100X 4
+#define XCVR_100T2 5
+#define XCVR_1000X 6
+#define XCVR_1000T 7
+#endif
+static int
+usbgem_mac_xcvr_inuse(struct usbgem_dev *dp)
+{
+ int val = XCVR_UNDEFINED;
+
+ if ((dp->mii_status & MII_STATUS_XSTATUS) == 0) {
+ if (dp->mii_status & MII_STATUS_100_BASE_T4) {
+ val = XCVR_100T4;
+ } else if (dp->mii_status &
+ (MII_STATUS_100_BASEX_FD |
+ MII_STATUS_100_BASEX)) {
+ val = XCVR_100X;
+ } else if (dp->mii_status &
+ (MII_STATUS_100_BASE_T2_FD |
+ MII_STATUS_100_BASE_T2)) {
+ val = XCVR_100T2;
+ } else if (dp->mii_status &
+ (MII_STATUS_10_FD | MII_STATUS_10)) {
+ val = XCVR_10;
+ }
+ } else if (dp->mii_xstatus &
+ (MII_XSTATUS_1000BASET_FD | MII_XSTATUS_1000BASET)) {
+ val = XCVR_1000T;
+ } else if (dp->mii_xstatus &
+ (MII_XSTATUS_1000BASEX_FD | MII_XSTATUS_1000BASEX)) {
+ val = XCVR_1000X;
+ }
+
+ return (val);
+}
+
+#ifdef USBGEM_CONFIG_GLDv3
+/* ============================================================== */
+/*
+ * GLDv3 interface
+ */
+/* ============================================================== */
+static int usbgem_m_getstat(void *, uint_t, uint64_t *);
+static int usbgem_m_start(void *);
+static void usbgem_m_stop(void *);
+static int usbgem_m_setpromisc(void *, boolean_t);
+static int usbgem_m_multicst(void *, boolean_t, const uint8_t *);
+static int usbgem_m_unicst(void *, const uint8_t *);
+static mblk_t *usbgem_m_tx(void *, mblk_t *);
+static void usbgem_m_ioctl(void *, queue_t *, mblk_t *);
+#ifdef GEM_CONFIG_MAC_PROP
+static int usbgem_m_setprop(void *, const char *, mac_prop_id_t,
+ uint_t, const void *);
+#ifdef MAC_VERSION_V1
+static int usbgem_m_getprop(void *, const char *, mac_prop_id_t,
+ uint_t, void *);
+#else
+static int usbgem_m_getprop(void *, const char *, mac_prop_id_t,
+ uint_t, uint_t, void *, uint_t *);
+#endif
+#endif
+
+#ifdef _SYS_MAC_PROVIDER_H
+#define GEM_M_CALLBACK_FLAGS (MC_IOCTL)
+#else
+#define GEM_M_CALLBACK_FLAGS (MC_IOCTL)
+#endif
+
+static mac_callbacks_t gem_m_callbacks = {
+#ifdef USBGEM_CONFIG_MAC_PROP
+#ifdef MAC_VERSION_V1
+ GEM_M_CALLBACK_FLAGS | MC_SETPROP | MC_GETPROP | MC_PROPINFO,
+#else
+ GEM_M_CALLBACK_FLAGS | MC_SETPROP | MC_GETPROP,
+#endif
+#else
+ GEM_M_CALLBACK_FLAGS,
+#endif
+ usbgem_m_getstat,
+ usbgem_m_start,
+ usbgem_m_stop,
+ usbgem_m_setpromisc,
+ usbgem_m_multicst,
+ usbgem_m_unicst,
+ usbgem_m_tx,
+#ifdef _SYS_MAC_PROVIDER_H
+#ifdef MAC_VERSION_V1
+ NULL,
+#endif
+#else
+ NULL, /* m_resources */
+#endif
+ usbgem_m_ioctl,
+ NULL, /* m_getcapab */
+#ifdef USBGEM_CONFIG_MAC_PROP
+ NULL,
+ NULL,
+ usbgem_m_setprop,
+ usbgem_m_getprop,
+#endif
+#ifdef MAC_VERSION_V1
+ usbgem_m_propinfo,
+#endif
+};
+
+static int
+usbgem_m_start(void *arg)
+{
+ int ret;
+ int err;
+ struct usbgem_dev *dp = arg;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ err = EIO;
+
+ rw_enter(&dp->dev_state_lock, RW_WRITER);
+ dp->nic_state = NIC_STATE_ONLINE;
+
+ if (dp->mac_state == MAC_STATE_DISCONNECTED) {
+ err = 0;
+ goto x;
+ }
+ if (usbgem_mac_init(dp) != USB_SUCCESS) {
+ goto x;
+ }
+
+ /* initialize rx filter state */
+ sema_p(&dp->rxfilter_lock);
+ dp->mc_count = 0;
+ dp->mc_count_req = 0;
+
+ bcopy(dp->dev_addr.ether_addr_octet,
+ dp->cur_addr.ether_addr_octet, ETHERADDRL);
+ dp->rxmode |= RXMODE_ENABLE;
+
+ ret = usbgem_hal_set_rx_filter(dp);
+ sema_v(&dp->rxfilter_lock);
+
+ if (ret != USB_SUCCESS) {
+ goto x;
+ }
+
+ if (dp->mii_state == MII_STATE_LINKUP) {
+ /* setup media mode if the link have been up */
+ if (usbgem_hal_set_media(dp) != USB_SUCCESS) {
+ goto x;
+ }
+ if (usbgem_mac_start(dp) != USB_SUCCESS) {
+ goto x;
+ }
+ }
+
+ err = 0;
+x:
+ rw_exit(&dp->dev_state_lock);
+ return (err);
+}
+
+static void
+usbgem_m_stop(void *arg)
+{
+ struct usbgem_dev *dp = arg;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /* stop rx gracefully */
+ rw_enter(&dp->dev_state_lock, RW_READER);
+ sema_p(&dp->rxfilter_lock);
+ dp->rxmode &= ~RXMODE_ENABLE;
+
+ if (dp->mac_state != MAC_STATE_DISCONNECTED) {
+ (void) usbgem_hal_set_rx_filter(dp);
+ }
+ sema_v(&dp->rxfilter_lock);
+ rw_exit(&dp->dev_state_lock);
+
+ /* make the nic state inactive */
+ rw_enter(&dp->dev_state_lock, RW_WRITER);
+ dp->nic_state = NIC_STATE_STOPPED;
+
+ /* stop mac completely */
+ if (dp->mac_state != MAC_STATE_DISCONNECTED) {
+ (void) usbgem_mac_stop(dp, MAC_STATE_STOPPED, STOP_GRACEFUL);
+ }
+ rw_exit(&dp->dev_state_lock);
+}
+
+static int
+usbgem_m_multicst(void *arg, boolean_t add, const uint8_t *ep)
+{
+ int err;
+ int ret;
+ struct usbgem_dev *dp = arg;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ rw_enter(&dp->dev_state_lock, RW_READER);
+ if (add) {
+ ret = usbgem_add_multicast(dp, ep);
+ } else {
+ ret = usbgem_remove_multicast(dp, ep);
+ }
+ rw_exit(&dp->dev_state_lock);
+
+ err = 0;
+ if (ret != USB_SUCCESS) {
+#ifdef GEM_CONFIG_FMA
+ ddi_fm_service_impact(dp->dip, DDI_SERVICE_DEGRADED);
+#endif
+ err = EIO;
+ }
+
+ return (err);
+}
+
+static int
+usbgem_m_setpromisc(void *arg, boolean_t on)
+{
+ int err;
+ struct usbgem_dev *dp = arg;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ rw_enter(&dp->dev_state_lock, RW_READER);
+
+ sema_p(&dp->rxfilter_lock);
+ if (on) {
+ dp->rxmode |= RXMODE_PROMISC;
+ } else {
+ dp->rxmode &= ~RXMODE_PROMISC;
+ }
+
+ err = 0;
+ if (dp->mac_state != MAC_STATE_DISCONNECTED) {
+ if (usbgem_hal_set_rx_filter(dp) != USB_SUCCESS) {
+ err = EIO;
+ }
+ }
+ sema_v(&dp->rxfilter_lock);
+
+ rw_exit(&dp->dev_state_lock);
+
+#ifdef GEM_CONFIG_FMA
+ if (err != 0) {
+ ddi_fm_service_impact(dp->dip, DDI_SERVICE_DEGRADED);
+ }
+#endif
+ return (err);
+}
+
+int
+usbgem_m_getstat(void *arg, uint_t stat, uint64_t *valp)
+{
+ int ret;
+ uint64_t val;
+ struct usbgem_dev *dp = arg;
+ struct usbgem_stats *gstp = &dp->stats;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ rw_enter(&dp->dev_state_lock, RW_READER);
+ if (dp->mac_state == MAC_STATE_DISCONNECTED) {
+ rw_exit(&dp->dev_state_lock);
+ return (0);
+ }
+ ret = usbgem_hal_get_stats(dp);
+ rw_exit(&dp->dev_state_lock);
+
+#ifdef GEM_CONFIG_FMA
+ if (ret != USB_SUCCESS) {
+ ddi_fm_service_impact(dp->dip, DDI_SERVICE_DEGRADED);
+ return (EIO);
+ }
+#endif
+
+ switch (stat) {
+ case MAC_STAT_IFSPEED:
+ val = usbgem_speed_value[dp->speed] *1000000ull;
+ break;
+
+ case MAC_STAT_MULTIRCV:
+ val = gstp->rmcast;
+ break;
+
+ case MAC_STAT_BRDCSTRCV:
+ val = gstp->rbcast;
+ break;
+
+ case MAC_STAT_MULTIXMT:
+ val = gstp->omcast;
+ break;
+
+ case MAC_STAT_BRDCSTXMT:
+ val = gstp->obcast;
+ break;
+
+ case MAC_STAT_NORCVBUF:
+ val = gstp->norcvbuf + gstp->missed;
+ break;
+
+ case MAC_STAT_IERRORS:
+ val = gstp->errrcv;
+ break;
+
+ case MAC_STAT_NOXMTBUF:
+ val = gstp->noxmtbuf;
+ break;
+
+ case MAC_STAT_OERRORS:
+ val = gstp->errxmt;
+ break;
+
+ case MAC_STAT_COLLISIONS:
+ val = gstp->collisions;
+ break;
+
+ case MAC_STAT_RBYTES:
+ val = gstp->rbytes;
+ break;
+
+ case MAC_STAT_IPACKETS:
+ val = gstp->rpackets;
+ break;
+
+ case MAC_STAT_OBYTES:
+ val = gstp->obytes;
+ break;
+
+ case MAC_STAT_OPACKETS:
+ val = gstp->opackets;
+ break;
+
+ case MAC_STAT_UNDERFLOWS:
+ val = gstp->underflow;
+ break;
+
+ case MAC_STAT_OVERFLOWS:
+ val = gstp->overflow;
+ break;
+
+ case ETHER_STAT_ALIGN_ERRORS:
+ val = gstp->frame;
+ break;
+
+ case ETHER_STAT_FCS_ERRORS:
+ val = gstp->crc;
+ break;
+
+ case ETHER_STAT_FIRST_COLLISIONS:
+ val = gstp->first_coll;
+ break;
+
+ case ETHER_STAT_MULTI_COLLISIONS:
+ val = gstp->multi_coll;
+ break;
+
+ case ETHER_STAT_SQE_ERRORS:
+ val = gstp->sqe;
+ break;
+
+ case ETHER_STAT_DEFER_XMTS:
+ val = gstp->defer;
+ break;
+
+ case ETHER_STAT_TX_LATE_COLLISIONS:
+ val = gstp->xmtlatecoll;
+ break;
+
+ case ETHER_STAT_EX_COLLISIONS:
+ val = gstp->excoll;
+ break;
+
+ case ETHER_STAT_MACXMT_ERRORS:
+ val = gstp->xmit_internal_err;
+ break;
+
+ case ETHER_STAT_CARRIER_ERRORS:
+ val = gstp->nocarrier;
+ break;
+
+ case ETHER_STAT_TOOLONG_ERRORS:
+ val = gstp->frame_too_long;
+ break;
+
+ case ETHER_STAT_MACRCV_ERRORS:
+ val = gstp->rcv_internal_err;
+ break;
+
+ case ETHER_STAT_XCVR_ADDR:
+ val = dp->mii_phy_addr;
+ break;
+
+ case ETHER_STAT_XCVR_ID:
+ val = dp->mii_phy_id;
+ break;
+
+ case ETHER_STAT_XCVR_INUSE:
+ val = usbgem_mac_xcvr_inuse(dp);
+ break;
+
+ case ETHER_STAT_CAP_1000FDX:
+ val = (dp->mii_xstatus & MII_XSTATUS_1000BASET_FD) ||
+ (dp->mii_xstatus & MII_XSTATUS_1000BASEX_FD);
+ break;
+
+ case ETHER_STAT_CAP_1000HDX:
+ val = (dp->mii_xstatus & MII_XSTATUS_1000BASET) ||
+ (dp->mii_xstatus & MII_XSTATUS_1000BASEX);
+ break;
+
+ case ETHER_STAT_CAP_100FDX:
+ val = BOOLEAN(dp->mii_status & MII_STATUS_100_BASEX_FD);
+ break;
+
+ case ETHER_STAT_CAP_100HDX:
+ val = BOOLEAN(dp->mii_status & MII_STATUS_100_BASEX);
+ break;
+
+ case ETHER_STAT_CAP_10FDX:
+ val = BOOLEAN(dp->mii_status & MII_STATUS_10_FD);
+ break;
+
+ case ETHER_STAT_CAP_10HDX:
+ val = BOOLEAN(dp->mii_status & MII_STATUS_10);
+ break;
+
+ case ETHER_STAT_CAP_ASMPAUSE:
+ val = dp->ugc.usbgc_flow_control > FLOW_CONTROL_SYMMETRIC;
+ break;
+
+ case ETHER_STAT_CAP_PAUSE:
+ val = dp->ugc.usbgc_flow_control != FLOW_CONTROL_NONE;
+ break;
+
+ case ETHER_STAT_CAP_AUTONEG:
+ val = BOOLEAN(dp->mii_status & MII_STATUS_CANAUTONEG);
+ break;
+
+ case ETHER_STAT_ADV_CAP_1000FDX:
+ val = dp->anadv_1000fdx;
+ break;
+
+ case ETHER_STAT_ADV_CAP_1000HDX:
+ val = dp->anadv_1000hdx;
+ break;
+
+ case ETHER_STAT_ADV_CAP_100FDX:
+ val = dp->anadv_100fdx;
+ break;
+
+ case ETHER_STAT_ADV_CAP_100HDX:
+ val = dp->anadv_100hdx;
+ break;
+
+ case ETHER_STAT_ADV_CAP_10FDX:
+ val = dp->anadv_10fdx;
+ break;
+
+ case ETHER_STAT_ADV_CAP_10HDX:
+ val = dp->anadv_10hdx;
+ break;
+
+ case ETHER_STAT_ADV_CAP_ASMPAUSE:
+ val = dp->anadv_asmpause;
+ break;
+
+ case ETHER_STAT_ADV_CAP_PAUSE:
+ val = dp->anadv_pause;
+ break;
+
+ case ETHER_STAT_ADV_CAP_AUTONEG:
+ val = dp->anadv_autoneg;
+ break;
+
+ case ETHER_STAT_LP_CAP_1000FDX:
+ val = BOOLEAN(dp->mii_stat1000 & MII_1000TS_LP_FULL);
+ break;
+
+ case ETHER_STAT_LP_CAP_1000HDX:
+ val = BOOLEAN(dp->mii_stat1000 & MII_1000TS_LP_HALF);
+ break;
+
+ case ETHER_STAT_LP_CAP_100FDX:
+ val = BOOLEAN(dp->mii_lpable & MII_ABILITY_100BASE_TX_FD);
+ break;
+
+ case ETHER_STAT_LP_CAP_100HDX:
+ val = BOOLEAN(dp->mii_lpable & MII_ABILITY_100BASE_TX);
+ break;
+
+ case ETHER_STAT_LP_CAP_10FDX:
+ val = BOOLEAN(dp->mii_lpable & MII_ABILITY_10BASE_T_FD);
+ break;
+
+ case ETHER_STAT_LP_CAP_10HDX:
+ val = BOOLEAN(dp->mii_lpable & MII_ABILITY_10BASE_T);
+ break;
+
+ case ETHER_STAT_LP_CAP_ASMPAUSE:
+ val = BOOLEAN(dp->mii_lpable & MII_ABILITY_ASM_DIR);
+ break;
+
+ case ETHER_STAT_LP_CAP_PAUSE:
+ val = BOOLEAN(dp->mii_lpable & MII_ABILITY_PAUSE);
+ break;
+
+ case ETHER_STAT_LP_CAP_AUTONEG:
+ val = BOOLEAN(dp->mii_exp & MII_AN_EXP_LPCANAN);
+ break;
+
+ case ETHER_STAT_LINK_ASMPAUSE:
+ val = BOOLEAN(dp->flow_control & 2);
+ break;
+
+ case ETHER_STAT_LINK_PAUSE:
+ val = BOOLEAN(dp->flow_control & 1);
+ break;
+
+ case ETHER_STAT_LINK_AUTONEG:
+ val = dp->anadv_autoneg &&
+ BOOLEAN(dp->mii_exp & MII_AN_EXP_LPCANAN);
+ break;
+
+ case ETHER_STAT_LINK_DUPLEX:
+ val = (dp->mii_state == MII_STATE_LINKUP) ?
+ (dp->full_duplex ? 2 : 1) : 0;
+ break;
+
+ case ETHER_STAT_TOOSHORT_ERRORS:
+ val = gstp->runt;
+ break;
+#ifdef NEVER /* it doesn't make sense */
+ case ETHER_STAT_CAP_REMFAULT:
+ val = B_TRUE;
+ break;
+
+ case ETHER_STAT_ADV_REMFAULT:
+ val = dp->anadv_remfault;
+ break;
+#endif
+ case ETHER_STAT_LP_REMFAULT:
+ val = BOOLEAN(dp->mii_lpable & MII_AN_ADVERT_REMFAULT);
+ break;
+
+ case ETHER_STAT_JABBER_ERRORS:
+ val = gstp->jabber;
+ break;
+
+ case ETHER_STAT_CAP_100T4:
+ val = BOOLEAN(dp->mii_status & MII_STATUS_100_BASE_T4);
+ break;
+
+ case ETHER_STAT_ADV_CAP_100T4:
+ val = dp->anadv_100t4;
+ break;
+
+ case ETHER_STAT_LP_CAP_100T4:
+ val = BOOLEAN(dp->mii_lpable & MII_ABILITY_100BASE_T4);
+ break;
+
+ default:
+#if GEM_DEBUG_LEVEL > 2
+ cmn_err(CE_WARN,
+ "%s: unrecognized parameter value = %d",
+ __func__, stat);
+#endif
+ *valp = 0;
+ return (ENOTSUP);
+ }
+
+ *valp = val;
+
+ return (0);
+}
+
+static int
+usbgem_m_unicst(void *arg, const uint8_t *mac)
+{
+ int err;
+ struct usbgem_dev *dp = arg;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ rw_enter(&dp->dev_state_lock, RW_READER);
+
+ sema_p(&dp->rxfilter_lock);
+ bcopy(mac, dp->cur_addr.ether_addr_octet, ETHERADDRL);
+ dp->rxmode |= RXMODE_ENABLE;
+
+ err = 0;
+ if (dp->mac_state != MAC_STATE_DISCONNECTED) {
+ if (usbgem_hal_set_rx_filter(dp) != USB_SUCCESS) {
+ err = EIO;
+ }
+ }
+ sema_v(&dp->rxfilter_lock);
+ rw_exit(&dp->dev_state_lock);
+
+#ifdef GEM_CONFIG_FMA
+ if (err != 0) {
+ ddi_fm_service_impact(dp->dip, DDI_SERVICE_DEGRADED);
+ }
+#endif
+ return (err);
+}
+
+/*
+ * usbgem_m_tx is used only for sending data packets into ethernet wire.
+ */
+static mblk_t *
+usbgem_m_tx(void *arg, mblk_t *mp_head)
+{
+ int limit;
+ mblk_t *mp;
+ mblk_t *nmp;
+ uint32_t flags;
+ struct usbgem_dev *dp = arg;
+
+ DPRINTF(4, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ mp = mp_head;
+ flags = 0;
+
+ rw_enter(&dp->dev_state_lock, RW_READER);
+
+ if (dp->mii_state != MII_STATE_LINKUP ||
+ dp->mac_state != MAC_STATE_ONLINE) {
+ /* some nics hate to send packets during the link is down */
+ for (; mp; mp = nmp) {
+ nmp = mp->b_next;
+ mp->b_next = NULL;
+ freemsg(mp);
+ }
+ goto x;
+ }
+
+ ASSERT(dp->nic_state == NIC_STATE_ONLINE);
+
+ limit = dp->tx_max_packets;
+ for (; limit-- && mp; mp = nmp) {
+ nmp = mp->b_next;
+ mp->b_next = NULL;
+ if (usbgem_send_common(dp, mp,
+ (limit == 0 && nmp) ? 1 : 0)) {
+ mp->b_next = nmp;
+ break;
+ }
+ }
+#ifdef CONFIG_TX_LIMITER
+ if (mp == mp_head) {
+ /* no packets were sent, descrease allocation limit */
+ mutex_enter(&dp->txlock);
+ dp->tx_max_packets = max(dp->tx_max_packets - 1, 1);
+ mutex_exit(&dp->txlock);
+ }
+#endif
+x:
+ rw_exit(&dp->dev_state_lock);
+
+ return (mp);
+}
+
+static void
+usbgem_m_ioctl(void *arg, queue_t *wq, mblk_t *mp)
+{
+ struct usbgem_dev *dp = arg;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called",
+ ((struct usbgem_dev *)arg)->name, __func__));
+
+ rw_enter(&dp->dev_state_lock, RW_READER);
+ usbgem_mac_ioctl((struct usbgem_dev *)arg, wq, mp);
+ rw_exit(&dp->dev_state_lock);
+}
+
+static void
+usbgem_gld3_init(struct usbgem_dev *dp, mac_register_t *macp)
+{
+ macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
+ macp->m_driver = dp;
+ macp->m_dip = dp->dip;
+ macp->m_src_addr = dp->dev_addr.ether_addr_octet;
+ macp->m_callbacks = &gem_m_callbacks;
+ macp->m_min_sdu = 0;
+ macp->m_max_sdu = dp->mtu;
+
+ if (dp->misc_flag & USBGEM_VLAN) {
+ macp->m_margin = VTAG_SIZE;
+ }
+}
+#else
+/* ============================================================== */
+/*
+ * GLDv2 interface
+ */
+/* ============================================================== */
+static int usbgem_gld_reset(gld_mac_info_t *);
+static int usbgem_gld_start(gld_mac_info_t *);
+static int usbgem_gld_stop(gld_mac_info_t *);
+static int usbgem_gld_set_mac_address(gld_mac_info_t *, uint8_t *);
+static int usbgem_gld_set_multicast(gld_mac_info_t *, uint8_t *, int);
+static int usbgem_gld_set_promiscuous(gld_mac_info_t *, int);
+static int usbgem_gld_get_stats(gld_mac_info_t *, struct gld_stats *);
+static int usbgem_gld_send(gld_mac_info_t *, mblk_t *);
+static int usbgem_gld_send_tagged(gld_mac_info_t *, mblk_t *, uint32_t);
+
+static int
+usbgem_gld_reset(gld_mac_info_t *macinfo)
+{
+ int err;
+ struct usbgem_dev *dp;
+
+ err = GLD_SUCCESS;
+ dp = (struct usbgem_dev *)macinfo->gldm_private;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ rw_enter(&dp->dev_state_lock, RW_WRITER);
+ if (usbgem_mac_init(dp) != USB_SUCCESS) {
+ err = GLD_FAILURE;
+ goto x;
+ }
+
+ dp->nic_state = NIC_STATE_INITIALIZED;
+
+ /* setup media mode if the link have been up */
+ if (dp->mii_state == MII_STATE_LINKUP) {
+ if (dp->mac_state != MAC_STATE_DISCONNECTED) {
+ (void) usbgem_hal_set_media(dp);
+ }
+ }
+x:
+ rw_exit(&dp->dev_state_lock);
+ return (err);
+}
+
+static int
+usbgem_gld_start(gld_mac_info_t *macinfo)
+{
+ int err;
+ struct usbgem_dev *dp;
+
+ dp = (struct usbgem_dev *)macinfo->gldm_private;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ rw_enter(&dp->dev_state_lock, RW_WRITER);
+
+ dp->nic_state = NIC_STATE_ONLINE;
+
+ if (dp->mii_state == MII_STATE_LINKUP) {
+ if (usbgem_mac_start(dp) != USB_SUCCESS) {
+ /* sema_v(&dp->mii_lock); */
+ err = GLD_FAILURE;
+ goto x;
+ }
+ }
+
+ /*
+ * XXX - don't call gld_linkstate() here,
+ * otherwise it cause recursive mutex call.
+ */
+ err = GLD_SUCCESS;
+x:
+ rw_exit(&dp->dev_state_lock);
+
+ return (err);
+}
+
+static int
+usbgem_gld_stop(gld_mac_info_t *macinfo)
+{
+ int err = GLD_SUCCESS;
+ struct usbgem_dev *dp;
+
+ dp = (struct usbgem_dev *)macinfo->gldm_private;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /* try to stop rx gracefully */
+ rw_enter(&dp->dev_state_lock, RW_READER);
+ sema_p(&dp->rxfilter_lock);
+ dp->rxmode &= ~RXMODE_ENABLE;
+
+ if (dp->mac_state != MAC_STATE_DISCONNECTED) {
+ (void) usbgem_hal_set_rx_filter(dp);
+ }
+ sema_v(&dp->rxfilter_lock);
+ rw_exit(&dp->dev_state_lock);
+
+ /* make the nic state inactive */
+ rw_enter(&dp->dev_state_lock, RW_WRITER);
+ dp->nic_state = NIC_STATE_STOPPED;
+
+ if (dp->mac_state != MAC_STATE_DISCONNECTED) {
+ if (usbgem_mac_stop(dp, MAC_STATE_STOPPED, STOP_GRACEFUL)
+ != USB_SUCCESS) {
+ err = GLD_FAILURE;
+ }
+ }
+ rw_exit(&dp->dev_state_lock);
+
+ return (err);
+}
+
+static int
+usbgem_gld_set_multicast(gld_mac_info_t *macinfo, uint8_t *ep, int flag)
+{
+ int err;
+ int ret;
+ struct usbgem_dev *dp;
+
+ dp = (struct usbgem_dev *)macinfo->gldm_private;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ rw_enter(&dp->dev_state_lock, RW_READER);
+ if (flag == GLD_MULTI_ENABLE) {
+ ret = usbgem_add_multicast(dp, ep);
+ } else {
+ ret = usbgem_remove_multicast(dp, ep);
+ }
+ rw_exit(&dp->dev_state_lock);
+
+ err = GLD_SUCCESS;
+ if (ret != USB_SUCCESS) {
+#ifdef GEM_CONFIG_FMA
+ ddi_fm_service_impact(dp->dip, DDI_SERVICE_DEGRADED);
+#endif
+ err = GLD_FAILURE;
+ }
+ return (err);
+}
+
+static int
+usbgem_gld_set_promiscuous(gld_mac_info_t *macinfo, int flag)
+{
+ boolean_t need_to_change = B_TRUE;
+ struct usbgem_dev *dp;
+
+ dp = (struct usbgem_dev *)macinfo->gldm_private;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ sema_p(&dp->rxfilter_lock);
+ if (flag == GLD_MAC_PROMISC_NONE) {
+ dp->rxmode &= ~(RXMODE_PROMISC | RXMODE_ALLMULTI_REQ);
+ } else if (flag == GLD_MAC_PROMISC_MULTI) {
+ dp->rxmode |= RXMODE_ALLMULTI_REQ;
+ } else if (flag == GLD_MAC_PROMISC_PHYS) {
+ dp->rxmode |= RXMODE_PROMISC;
+ } else {
+ /* mode unchanged */
+ need_to_change = B_FALSE;
+ }
+
+ if (need_to_change) {
+ if (dp->mac_state != MAC_STATE_DISCONNECTED) {
+ (void) usbgem_hal_set_rx_filter(dp);
+ }
+ }
+ sema_v(&dp->rxfilter_lock);
+
+ return (GLD_SUCCESS);
+}
+
+static int
+usbgem_gld_set_mac_address(gld_mac_info_t *macinfo, uint8_t *mac)
+{
+ struct usbgem_dev *dp;
+ dp = (struct usbgem_dev *)macinfo->gldm_private;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ sema_p(&dp->rxfilter_lock);
+ bcopy(mac, dp->cur_addr.ether_addr_octet, ETHERADDRL);
+ dp->rxmode |= RXMODE_ENABLE;
+
+ if (dp->mac_state != MAC_STATE_DISCONNECTED) {
+ (void) usbgem_hal_set_rx_filter(dp);
+ }
+ sema_v(&dp->rxfilter_lock);
+
+ return (GLD_SUCCESS);
+}
+
+static int
+usbgem_gld_get_stats(gld_mac_info_t *macinfo, struct gld_stats *gs)
+{
+ struct usbgem_dev *dp;
+ struct usbgem_stats *vs;
+
+ dp = (struct usbgem_dev *)macinfo->gldm_private;
+
+ if ((*dp->ugc.usbgc_get_stats)(dp) != USB_SUCCESS) {
+#ifdef GEM_CONFIG_FMA
+ ddi_fm_service_impact(dp->dip, DDI_SERVICE_DEGRADED);
+#endif
+ return (USB_FAILURE);
+ }
+
+ vs = &dp->stats;
+
+ gs->glds_errxmt = vs->errxmt;
+ gs->glds_errrcv = vs->errrcv;
+ gs->glds_collisions = vs->collisions;
+
+ gs->glds_excoll = vs->excoll;
+ gs->glds_defer = vs->defer;
+ gs->glds_frame = vs->frame;
+ gs->glds_crc = vs->crc;
+
+ gs->glds_overflow = vs->overflow; /* fifo err,underrun,rbufovf */
+ gs->glds_underflow = vs->underflow;
+ gs->glds_short = vs->runt;
+ gs->glds_missed = vs->missed; /* missed pkts while rbuf ovf */
+ gs->glds_xmtlatecoll = vs->xmtlatecoll;
+ gs->glds_nocarrier = vs->nocarrier;
+ gs->glds_norcvbuf = vs->norcvbuf; /* OS resource exaust */
+ gs->glds_intr = vs->intr;
+
+ /* all before here must be kept in place for v0 compatibility */
+ gs->glds_speed = usbgem_speed_value[dp->speed] * 1000000;
+ gs->glds_media = GLDM_PHYMII;
+ gs->glds_duplex = dp->full_duplex ? GLD_DUPLEX_FULL : GLD_DUPLEX_HALF;
+
+ /* gs->glds_media_specific */
+ gs->glds_dot3_first_coll = vs->first_coll;
+ gs->glds_dot3_multi_coll = vs->multi_coll;
+ gs->glds_dot3_sqe_error = 0;
+ gs->glds_dot3_mac_xmt_error = 0;
+ gs->glds_dot3_mac_rcv_error = 0;
+ gs->glds_dot3_frame_too_long = vs->frame_too_long;
+
+ return (GLD_SUCCESS);
+}
+
+static int
+usbgem_gld_ioctl(gld_mac_info_t *macinfo, queue_t *wq, mblk_t *mp)
+{
+ struct usbgem_dev *dp;
+
+ dp = (struct usbgem_dev *)macinfo->gldm_private;
+ usbgem_mac_ioctl(dp, wq, mp);
+
+ return (GLD_SUCCESS);
+}
+
+/*
+ * gem_gld_send is used only for sending data packets into ethernet wire.
+ */
+static int
+usbgem_gld_send(gld_mac_info_t *macinfo, mblk_t *mp)
+{
+ int ret;
+ uint32_t flags = 0;
+ struct usbgem_dev *dp;
+
+ dp = (struct usbgem_dev *)macinfo->gldm_private;
+
+ /* nic state must be online of suspended */
+ rw_enter(&dp->dev_state_lock, RW_READER);
+
+ ASSERT(dp->nic_state == NIC_STATE_ONLINE);
+ ASSERT(mp->b_next == NULL);
+
+ if (dp->mii_state != MII_STATE_LINKUP) {
+ /* Some nics hate to send packets while the link is down. */
+ /* we discard the untransmitted packets silently */
+ rw_exit(&dp->dev_state_lock);
+
+ freemsg(mp);
+#ifdef GEM_CONFIG_FMA
+ /* FIXME - should we ignore the error? */
+ ddi_fm_service_impact(dp->dip, DDI_SERVICE_DEGRADED);
+#endif
+ return (GLD_SUCCESS);
+ }
+
+ ret = (usbgem_send_common(dp, mp, flags) == NULL)
+ ? GLD_SUCCESS : GLD_NORESOURCES;
+ rw_exit(&dp->dev_state_lock);
+
+ return (ret);
+}
+
+/*
+ * usbgem_gld_send is used only for sending data packets into ethernet wire.
+ */
+static int
+usbgem_gld_send_tagged(gld_mac_info_t *macinfo, mblk_t *mp, uint32_t vtag)
+{
+ uint32_t flags;
+ struct usbgem_dev *dp;
+
+ dp = (struct usbgem_dev *)macinfo->gldm_private;
+
+ /*
+ * Some nics hate to send packets while the link is down.
+ */
+ if (dp->mii_state != MII_STATE_LINKUP) {
+ /* we dicard the untransmitted packets silently */
+ freemsg(mp);
+#ifdef GEM_CONFIG_FMA
+ /* FIXME - should we ignore the error? */
+ ddi_fm_service_impact(dp->dip, DDI_SERVICE_UNAFFECTED);
+#endif
+ return (GLD_SUCCESS);
+ }
+#ifdef notyet
+ flags = GLD_VTAG_TCI(vtag) << GEM_SEND_VTAG_SHIFT;
+#endif
+ return ((usbgem_send_common(dp, mp, 0) == NULL) ?
+ GLD_SUCCESS : GLD_NORESOURCES);
+}
+
+static void
+usbgem_gld_init(struct usbgem_dev *dp, gld_mac_info_t *macinfo, char *ident)
+{
+ /*
+ * configure GLD
+ */
+ macinfo->gldm_devinfo = dp->dip;
+ macinfo->gldm_private = (caddr_t)dp;
+
+ macinfo->gldm_reset = usbgem_gld_reset;
+ macinfo->gldm_start = usbgem_gld_start;
+ macinfo->gldm_stop = usbgem_gld_stop;
+ macinfo->gldm_set_mac_addr = usbgem_gld_set_mac_address;
+ macinfo->gldm_send = usbgem_gld_send;
+ macinfo->gldm_set_promiscuous = usbgem_gld_set_promiscuous;
+ macinfo->gldm_get_stats = usbgem_gld_get_stats;
+ macinfo->gldm_ioctl = usbgem_gld_ioctl;
+ macinfo->gldm_set_multicast = usbgem_gld_set_multicast;
+ macinfo->gldm_intr = NULL;
+ macinfo->gldm_mctl = NULL;
+
+ macinfo->gldm_ident = ident;
+ macinfo->gldm_type = DL_ETHER;
+ macinfo->gldm_minpkt = 0;
+ macinfo->gldm_maxpkt = dp->mtu;
+ macinfo->gldm_addrlen = ETHERADDRL;
+ macinfo->gldm_saplen = -2;
+ macinfo->gldm_ppa = ddi_get_instance(dp->dip);
+#ifdef GLD_CAP_LINKSTATE
+ macinfo->gldm_capabilities = GLD_CAP_LINKSTATE;
+#endif
+ macinfo->gldm_vendor_addr = dp->dev_addr.ether_addr_octet;
+ macinfo->gldm_broadcast_addr = usbgem_bcastaddr;
+}
+#endif /* USBGEM_CONFIG_GLDv3 */
+
+
+/* ======================================================================== */
+/*
+ * .conf interface
+ */
+/* ======================================================================== */
+void
+usbgem_generate_macaddr(struct usbgem_dev *dp, uint8_t *mac)
+{
+ extern char hw_serial[];
+ char *hw_serial_p;
+ int i;
+ uint64_t val;
+ uint64_t key;
+
+ cmn_err(CE_NOTE,
+ "!%s: using temp ether address,"
+ " do not use this for long time",
+ dp->name);
+
+ /* prefer a fixed address for DHCP */
+ hw_serial_p = &hw_serial[0];
+ val = stoi(&hw_serial_p);
+
+ key = 0;
+ for (i = 0; i < USBGEM_NAME_LEN; i++) {
+ if (dp->name[i] == 0) {
+ break;
+ }
+ key ^= dp->name[i];
+ }
+ key ^= ddi_get_instance(dp->dip);
+ val ^= key << 32;
+
+ /* generate a local address */
+ mac[0] = 0x02;
+ mac[1] = (uint8_t)(val >> 32);
+ mac[2] = (uint8_t)(val >> 24);
+ mac[3] = (uint8_t)(val >> 16);
+ mac[4] = (uint8_t)(val >> 8);
+ mac[5] = (uint8_t)val;
+}
+
+boolean_t
+usbgem_get_mac_addr_conf(struct usbgem_dev *dp)
+{
+ char propname[32];
+ char *valstr;
+ uint8_t mac[ETHERADDRL];
+ char *cp;
+ int c;
+ int i;
+ int j;
+ uint8_t v;
+ uint8_t d;
+ uint8_t ored;
+
+ DPRINTF(3, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+ /*
+ * Get ethernet address from .conf file
+ */
+ (void) sprintf(propname, "mac-addr");
+ if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, dp->dip,
+ DDI_PROP_DONTPASS, propname, &valstr)) != DDI_PROP_SUCCESS) {
+ return (B_FALSE);
+ }
+
+ if (strlen(valstr) != ETHERADDRL*3-1) {
+ goto syntax_err;
+ }
+
+ cp = valstr;
+ j = 0;
+ ored = 0;
+ for (;;) {
+ v = 0;
+ for (i = 0; i < 2; i++) {
+ c = *cp++;
+
+ if (c >= 'a' && c <= 'f') {
+ d = c - 'a' + 10;
+ } else if (c >= 'A' && c <= 'F') {
+ d = c - 'A' + 10;
+ } else if (c >= '0' && c <= '9') {
+ d = c - '0';
+ } else {
+ goto syntax_err;
+ }
+ v = (v << 4) | d;
+ }
+
+ mac[j++] = v;
+ ored |= v;
+ if (j == ETHERADDRL) {
+ /* done */
+ break;
+ }
+
+ c = *cp++;
+ if (c != ':') {
+ goto syntax_err;
+ }
+ }
+
+ if (ored == 0) {
+ usbgem_generate_macaddr(dp, mac);
+ }
+ for (i = 0; i < ETHERADDRL; i++) {
+ dp->dev_addr.ether_addr_octet[i] = mac[i];
+ }
+ ddi_prop_free(valstr);
+ return (B_TRUE);
+
+syntax_err:
+ cmn_err(CE_CONT,
+ "!%s: read mac addr: trying .conf: syntax err %s",
+ dp->name, valstr);
+ ddi_prop_free(valstr);
+
+ return (B_FALSE);
+}
+
+static void
+usbgem_read_conf(struct usbgem_dev *dp)
+{
+ int val;
+
+ DPRINTF(1, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ /*
+ * Get media mode infomation from .conf file
+ */
+ dp->anadv_autoneg = usbgem_prop_get_int(dp, "adv_autoneg_cap", 1) != 0;
+ dp->anadv_1000fdx = usbgem_prop_get_int(dp, "adv_1000fdx_cap", 1) != 0;
+ dp->anadv_1000hdx = usbgem_prop_get_int(dp, "adv_1000hdx_cap", 1) != 0;
+ dp->anadv_100t4 = usbgem_prop_get_int(dp, "adv_100T4_cap", 1) != 0;
+ dp->anadv_100fdx = usbgem_prop_get_int(dp, "adv_100fdx_cap", 1) != 0;
+ dp->anadv_100hdx = usbgem_prop_get_int(dp, "adv_100hdx_cap", 1) != 0;
+ dp->anadv_10fdx = usbgem_prop_get_int(dp, "adv_10fdx_cap", 1) != 0;
+ dp->anadv_10hdx = usbgem_prop_get_int(dp, "adv_10hdx_cap", 1) != 0;
+ dp->anadv_1000t_ms = usbgem_prop_get_int(dp, "adv_1000t_ms", 0);
+
+ if ((ddi_prop_exists(DDI_DEV_T_ANY, dp->dip,
+ DDI_PROP_DONTPASS, "full-duplex"))) {
+ dp->full_duplex =
+ usbgem_prop_get_int(dp, "full-duplex", 1) != 0;
+ dp->anadv_autoneg = B_FALSE;
+ if (dp->full_duplex) {
+ dp->anadv_1000hdx = B_FALSE;
+ dp->anadv_100hdx = B_FALSE;
+ dp->anadv_10hdx = B_FALSE;
+ } else {
+ dp->anadv_1000fdx = B_FALSE;
+ dp->anadv_100fdx = B_FALSE;
+ dp->anadv_10fdx = B_FALSE;
+ }
+ }
+
+ if ((val = usbgem_prop_get_int(dp, "speed", 0)) > 0) {
+ dp->anadv_autoneg = B_FALSE;
+ switch (val) {
+ case 1000:
+ dp->speed = USBGEM_SPD_1000;
+ dp->anadv_100t4 = B_FALSE;
+ dp->anadv_100fdx = B_FALSE;
+ dp->anadv_100hdx = B_FALSE;
+ dp->anadv_10fdx = B_FALSE;
+ dp->anadv_10hdx = B_FALSE;
+ break;
+ case 100:
+ dp->speed = USBGEM_SPD_100;
+ dp->anadv_1000fdx = B_FALSE;
+ dp->anadv_1000hdx = B_FALSE;
+ dp->anadv_10fdx = B_FALSE;
+ dp->anadv_10hdx = B_FALSE;
+ break;
+ case 10:
+ dp->speed = USBGEM_SPD_10;
+ dp->anadv_1000fdx = B_FALSE;
+ dp->anadv_1000hdx = B_FALSE;
+ dp->anadv_100t4 = B_FALSE;
+ dp->anadv_100fdx = B_FALSE;
+ dp->anadv_100hdx = B_FALSE;
+ break;
+ default:
+ cmn_err(CE_WARN,
+ "!%s: property %s: illegal value:%d",
+ dp->name, "speed", val);
+ dp->anadv_autoneg = B_TRUE;
+ break;
+ }
+ }
+ val = usbgem_prop_get_int(dp,
+ "adv_pause", dp->ugc.usbgc_flow_control & 1);
+ val |= usbgem_prop_get_int(dp,
+ "adv_asmpause", BOOLEAN(dp->ugc.usbgc_flow_control & 2)) << 1;
+ if (val > FLOW_CONTROL_RX_PAUSE || val < FLOW_CONTROL_NONE) {
+ cmn_err(CE_WARN,
+ "!%s: property %s: illegal value:%d",
+ dp->name, "flow-control", val);
+ } else {
+ val = min(val, dp->ugc.usbgc_flow_control);
+ }
+ dp->anadv_pause = BOOLEAN(val & 1);
+ dp->anadv_asmpause = BOOLEAN(val & 2);
+
+ dp->mtu = usbgem_prop_get_int(dp, "mtu", dp->mtu);
+ dp->txthr = usbgem_prop_get_int(dp, "txthr", dp->txthr);
+ dp->rxthr = usbgem_prop_get_int(dp, "rxthr", dp->rxthr);
+ dp->txmaxdma = usbgem_prop_get_int(dp, "txmaxdma", dp->txmaxdma);
+ dp->rxmaxdma = usbgem_prop_get_int(dp, "rxmaxdma", dp->rxmaxdma);
+#ifdef GEM_CONFIG_POLLING
+ dp->poll_pkt_delay =
+ usbgem_prop_get_int(dp, "pkt_delay", dp->poll_pkt_delay);
+
+ dp->max_poll_interval[GEM_SPD_10] =
+ usbgem_prop_get_int(dp, "max_poll_interval_10",
+ dp->max_poll_interval[GEM_SPD_10]);
+ dp->max_poll_interval[GEM_SPD_100] =
+ usbgem_prop_get_int(dp, "max_poll_interval_100",
+ dp->max_poll_interval[GEM_SPD_100]);
+ dp->max_poll_interval[GEM_SPD_1000] =
+ usbgem_prop_get_int(dp, "max_poll_interval_1000",
+ dp->max_poll_interval[GEM_SPD_1000]);
+
+ dp->min_poll_interval[GEM_SPD_10] =
+ usbgem_prop_get_int(dp, "min_poll_interval_10",
+ dp->min_poll_interval[GEM_SPD_10]);
+ dp->min_poll_interval[GEM_SPD_100] =
+ usbgem_prop_get_int(dp, "min_poll_interval_100",
+ dp->min_poll_interval[GEM_SPD_100]);
+ dp->min_poll_interval[GEM_SPD_1000] =
+ usbgem_prop_get_int(dp, "min_poll_interval_1000",
+ dp->min_poll_interval[GEM_SPD_1000]);
+#endif
+}
+
+/*
+ * usbem kstat support
+ */
+#ifndef GEM_CONFIG_GLDv3
+/* kstat items based from dmfe driver */
+
+struct usbgem_kstat_named {
+ struct kstat_named ks_xcvr_addr;
+ struct kstat_named ks_xcvr_id;
+ struct kstat_named ks_xcvr_inuse;
+ struct kstat_named ks_link_up;
+ struct kstat_named ks_link_duplex; /* 0:unknwon, 1:half, 2:full */
+ struct kstat_named ks_cap_1000fdx;
+ struct kstat_named ks_cap_1000hdx;
+ struct kstat_named ks_cap_100fdx;
+ struct kstat_named ks_cap_100hdx;
+ struct kstat_named ks_cap_10fdx;
+ struct kstat_named ks_cap_10hdx;
+#ifdef NEVER
+ struct kstat_named ks_cap_remfault;
+#endif
+ struct kstat_named ks_cap_autoneg;
+
+ struct kstat_named ks_adv_cap_1000fdx;
+ struct kstat_named ks_adv_cap_1000hdx;
+ struct kstat_named ks_adv_cap_100fdx;
+ struct kstat_named ks_adv_cap_100hdx;
+ struct kstat_named ks_adv_cap_10fdx;
+ struct kstat_named ks_adv_cap_10hdx;
+#ifdef NEVER
+ struct kstat_named ks_adv_cap_remfault;
+#endif
+ struct kstat_named ks_adv_cap_autoneg;
+ struct kstat_named ks_lp_cap_1000fdx;
+ struct kstat_named ks_lp_cap_1000hdx;
+ struct kstat_named ks_lp_cap_100fdx;
+ struct kstat_named ks_lp_cap_100hdx;
+ struct kstat_named ks_lp_cap_10fdx;
+ struct kstat_named ks_lp_cap_10hdx;
+ struct kstat_named ks_lp_cap_remfault;
+ struct kstat_named ks_lp_cap_autoneg;
+};
+
+static int
+usbgem_kstat_update(kstat_t *ksp, int rw)
+{
+ struct usbgem_kstat_named *knp;
+ struct usbgem_dev *dp = (struct usbgem_dev *)ksp->ks_private;
+
+ if (rw != KSTAT_READ) {
+ return (0);
+ }
+
+ knp = (struct usbgem_kstat_named *)ksp->ks_data;
+
+ knp->ks_xcvr_addr.value.ul = dp->mii_phy_addr;
+ knp->ks_xcvr_id.value.ul = dp->mii_phy_id;
+ knp->ks_xcvr_inuse.value.ul = usbgem_mac_xcvr_inuse(dp);
+ knp->ks_link_up.value.ul = dp->mii_state == MII_STATE_LINKUP;
+ knp->ks_link_duplex.value.ul =
+ (dp->mii_state == MII_STATE_LINKUP) ?
+ (dp->full_duplex ? 2 : 1) : 0;
+
+ knp->ks_cap_1000fdx.value.ul =
+ (dp->mii_xstatus & MII_XSTATUS_1000BASET_FD) ||
+ (dp->mii_xstatus & MII_XSTATUS_1000BASEX_FD);
+ knp->ks_cap_1000hdx.value.ul =
+ (dp->mii_xstatus & MII_XSTATUS_1000BASET) ||
+ (dp->mii_xstatus & MII_XSTATUS_1000BASEX);
+ knp->ks_cap_100fdx.value.ul =
+ BOOLEAN(dp->mii_status & MII_STATUS_100_BASEX_FD);
+ knp->ks_cap_100hdx.value.ul =
+ BOOLEAN(dp->mii_status & MII_STATUS_100_BASEX);
+ knp->ks_cap_10fdx.value.ul =
+ BOOLEAN(dp->mii_status & MII_STATUS_10_FD);
+ knp->ks_cap_10hdx.value.ul =
+ BOOLEAN(dp->mii_status & MII_STATUS_10);
+#ifdef NEVER
+ knp->ks_cap_remfault.value.ul = B_TRUE;
+#endif
+ knp->ks_cap_autoneg.value.ul =
+ BOOLEAN(dp->mii_status & MII_STATUS_CANAUTONEG);
+
+ knp->ks_adv_cap_1000fdx.value.ul = dp->anadv_1000fdx;
+ knp->ks_adv_cap_1000hdx.value.ul = dp->anadv_1000hdx;
+ knp->ks_adv_cap_100fdx.value.ul = dp->anadv_100fdx;
+ knp->ks_adv_cap_100hdx.value.ul = dp->anadv_100hdx;
+ knp->ks_adv_cap_10fdx.value.ul = dp->anadv_10fdx;
+ knp->ks_adv_cap_10hdx.value.ul = dp->anadv_10hdx;
+#ifdef NEVER
+ knp->ks_adv_cap_remfault.value.ul = 0;
+#endif
+ knp->ks_adv_cap_autoneg.value.ul = dp->anadv_autoneg;
+
+ knp->ks_lp_cap_1000fdx.value.ul =
+ BOOLEAN(dp->mii_stat1000 & MII_1000TS_LP_FULL);
+ knp->ks_lp_cap_1000hdx.value.ul =
+ BOOLEAN(dp->mii_stat1000 & MII_1000TS_LP_HALF);
+ knp->ks_lp_cap_100fdx.value.ul =
+ BOOLEAN(dp->mii_lpable & MII_ABILITY_100BASE_TX_FD);
+ knp->ks_lp_cap_100hdx.value.ul =
+ BOOLEAN(dp->mii_lpable & MII_ABILITY_100BASE_TX);
+ knp->ks_lp_cap_10fdx.value.ul =
+ BOOLEAN(dp->mii_lpable & MII_ABILITY_10BASE_T_FD);
+ knp->ks_lp_cap_10hdx.value.ul =
+ BOOLEAN(dp->mii_lpable & MII_ABILITY_10BASE_T);
+ knp->ks_lp_cap_remfault.value.ul =
+ BOOLEAN(dp->mii_exp & MII_AN_EXP_PARFAULT);
+ knp->ks_lp_cap_autoneg.value.ul =
+ BOOLEAN(dp->mii_exp & MII_AN_EXP_LPCANAN);
+
+ return (0);
+}
+
+
+static int
+usbgem_kstat_init(struct usbgem_dev *dp)
+{
+ int i;
+ kstat_t *ksp;
+ struct usbgem_kstat_named *knp;
+
+ ksp = kstat_create(
+ (char *)ddi_driver_name(dp->dip), ddi_get_instance(dp->dip),
+ "mii", "net", KSTAT_TYPE_NAMED,
+ sizeof (*knp) / sizeof (knp->ks_xcvr_addr), 0);
+
+ if (ksp == NULL) {
+ cmn_err(CE_WARN, "%s: %s() for mii failed",
+ dp->name, __func__);
+ return (USB_FAILURE);
+ }
+
+ knp = (struct usbgem_kstat_named *)ksp->ks_data;
+
+ kstat_named_init(&knp->ks_xcvr_addr, "xcvr_addr",
+ KSTAT_DATA_INT32);
+ kstat_named_init(&knp->ks_xcvr_id, "xcvr_id",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_xcvr_inuse, "xcvr_inuse",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_link_up, "link_up",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_link_duplex, "link_duplex",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_cap_1000fdx, "cap_1000fdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_cap_1000hdx, "cap_1000hdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_cap_100fdx, "cap_100fdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_cap_100hdx, "cap_100hdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_cap_10fdx, "cap_10fdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_cap_10hdx, "cap_10hdx",
+ KSTAT_DATA_UINT32);
+#ifdef NEVER
+ kstat_named_init(&knp->ks_cap_remfault, "cap_rem_fault",
+ KSTAT_DATA_UINT32);
+#endif
+ kstat_named_init(&knp->ks_cap_autoneg, "cap_autoneg",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_adv_cap_1000fdx, "adv_cap_1000fdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_adv_cap_1000hdx, "adv_cap_1000hdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_adv_cap_100fdx, "adv_cap_100fdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_adv_cap_100hdx, "adv_cap_100hdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_adv_cap_10fdx, "adv_cap_10fdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_adv_cap_10hdx, "adv_cap_10hdx",
+ KSTAT_DATA_UINT32);
+#ifdef NEVER
+ kstat_named_init(&knp->ks_adv_cap_remfault, "adv_rem_fault",
+ KSTAT_DATA_UINT32);
+#endif
+ kstat_named_init(&knp->ks_adv_cap_autoneg, "adv_cap_autoneg",
+ KSTAT_DATA_UINT32);
+
+ kstat_named_init(&knp->ks_lp_cap_1000fdx, "lp_cap_1000fdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_lp_cap_1000hdx, "lp_cap_1000hdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_lp_cap_100fdx, "lp_cap_100fdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_lp_cap_100hdx, "lp_cap_100hdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_lp_cap_10fdx, "lp_cap_10fdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_lp_cap_10hdx, "lp_cap_10hdx",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_lp_cap_remfault, "lp_cap_rem_fault",
+ KSTAT_DATA_UINT32);
+ kstat_named_init(&knp->ks_lp_cap_autoneg, "lp_cap_autoneg",
+ KSTAT_DATA_UINT32);
+
+ ksp->ks_private = (void *) dp;
+ ksp->ks_update = usbgem_kstat_update;
+ dp->ksp = ksp;
+
+ kstat_install(ksp);
+
+ return (USB_SUCCESS);
+}
+#endif /* GEM_CONFIG_GLDv3 */
+/* ======================================================================== */
+/*
+ * attach/detatch/usb support
+ */
+/* ======================================================================== */
+int
+usbgem_ctrl_out(struct usbgem_dev *dp,
+ uint8_t reqt, uint8_t req, uint16_t val, uint16_t ix, uint16_t len,
+ void *bp, int size)
+{
+ mblk_t *data;
+ usb_ctrl_setup_t setup;
+ usb_cr_t completion_reason;
+ usb_cb_flags_t cb_flags;
+ usb_flags_t flags;
+ int i;
+ int ret;
+
+ DPRINTF(4, (CE_CONT, "!%s: %s "
+ "reqt:0x%02x req:0x%02x val:0x%04x ix:0x%04x len:0x%02x "
+ "bp:0x%p nic_state:%d",
+ dp->name, __func__, reqt, req, val, ix, len, bp, dp->nic_state));
+
+ if (dp->mac_state == MAC_STATE_DISCONNECTED) {
+ return (USB_PIPE_ERROR);
+ }
+
+ data = NULL;
+ if (size > 0) {
+ if ((data = allocb(size, 0)) == NULL) {
+ return (USB_FAILURE);
+ }
+
+ bcopy(bp, data->b_rptr, size);
+ data->b_wptr = data->b_rptr + size;
+ }
+
+ setup.bmRequestType = reqt;
+ setup.bRequest = req;
+ setup.wValue = val;
+ setup.wIndex = ix;
+ setup.wLength = len;
+ setup.attrs = 0; /* attributes */
+
+ for (i = usbgem_ctrl_retry; i > 0; i--) {
+ completion_reason = 0;
+ cb_flags = 0;
+
+ ret = usb_pipe_ctrl_xfer_wait(DEFAULT_PIPE(dp),
+ &setup, &data, &completion_reason, &cb_flags, 0);
+
+ if (ret == USB_SUCCESS) {
+ break;
+ }
+ if (i == 1) {
+ cmn_err(CE_WARN,
+ "!%s: %s failed: "
+ "reqt:0x%x req:0x%x val:0x%x ix:0x%x len:0x%x "
+ "ret:%d cr:%s(%d), cb_flags:0x%x %s",
+ dp->name, __func__, reqt, req, val, ix, len,
+ ret, usb_str_cr(completion_reason),
+ completion_reason,
+ cb_flags,
+ (i > 1) ? "retrying..." : "fatal");
+ }
+ }
+
+ if (data != NULL) {
+ freemsg(data);
+ }
+
+ return (ret);
+}
+
+int
+usbgem_ctrl_in(struct usbgem_dev *dp,
+ uint8_t reqt, uint8_t req, uint16_t val, uint16_t ix, uint16_t len,
+ void *bp, int size)
+{
+ mblk_t *data;
+ usb_ctrl_setup_t setup;
+ usb_cr_t completion_reason;
+ usb_cb_flags_t cb_flags;
+ int i;
+ int ret;
+ int reclen;
+
+ DPRINTF(4, (CE_CONT,
+ "!%s: %s:"
+ " reqt:0x%02x req:0x%02x val:0x%04x ix:0x%04x len:0x%02x"
+ " bp:x%p mac_state:%d",
+ dp->name, __func__, reqt, req, val, ix, len, bp, dp->mac_state));
+
+ if (dp->mac_state == MAC_STATE_DISCONNECTED) {
+ return (USB_PIPE_ERROR);
+ }
+
+ data = NULL;
+
+ setup.bmRequestType = reqt;
+ setup.bRequest = req;
+ setup.wValue = val;
+ setup.wIndex = ix;
+ setup.wLength = len;
+ setup.attrs = USB_ATTRS_AUTOCLEARING; /* XXX */
+
+ for (i = usbgem_ctrl_retry; i > 0; i--) {
+ completion_reason = 0;
+ cb_flags = 0;
+ ret = usb_pipe_ctrl_xfer_wait(DEFAULT_PIPE(dp), &setup, &data,
+ &completion_reason, &cb_flags, 0);
+
+ if (ret == USB_SUCCESS) {
+ reclen = msgdsize(data);
+ bcopy(data->b_rptr, bp, min(reclen, size));
+ break;
+ }
+ if (i == 1) {
+ cmn_err(CE_WARN,
+ "!%s: %s failed: "
+ "reqt:0x%x req:0x%x val:0x%x ix:0x%x len:0x%x "
+ "ret:%d cr:%s(%d) cb_flags:0x%x %s",
+ dp->name, __func__,
+ reqt, req, val, ix, len,
+ ret, usb_str_cr(completion_reason),
+ completion_reason,
+ cb_flags,
+ (i > 1) ? "retrying..." : "fatal");
+ }
+ }
+
+ if (data) {
+ freemsg(data);
+ }
+
+ return (ret);
+}
+
+int
+usbgem_ctrl_out_val(struct usbgem_dev *dp,
+ uint8_t reqt, uint8_t req, uint16_t val, uint16_t ix, uint16_t len,
+ uint32_t v)
+{
+ uint8_t buf[4];
+
+ /* convert to little endian from native byte order */
+ switch (len) {
+ case 4:
+ buf[3] = v >> 24;
+ buf[2] = v >> 16;
+ /* fall thru */
+ case 2:
+ buf[1] = v >> 8;
+ /* fall thru */
+ case 1:
+ buf[0] = v;
+ }
+
+ return (usbgem_ctrl_out(dp, reqt, req, val, ix, len, buf, len));
+}
+
+int
+usbgem_ctrl_in_val(struct usbgem_dev *dp,
+ uint8_t reqt, uint8_t req, uint16_t val, uint16_t ix, uint16_t len,
+ void *valp)
+{
+ uint8_t buf[4];
+ uint_t v;
+ int err;
+
+#ifdef SANITY
+ bzero(buf, sizeof (buf));
+#endif
+ err = usbgem_ctrl_in(dp, reqt, req, val, ix, len, buf, len);
+ if (err == USB_SUCCESS) {
+ v = 0;
+ switch (len) {
+ case 4:
+ v |= buf[3] << 24;
+ v |= buf[2] << 16;
+ /* FALLTHROUGH */
+ case 2:
+ v |= buf[1] << 8;
+ /* FALLTHROUGH */
+ case 1:
+ v |= buf[0];
+ }
+
+ switch (len) {
+ case 4:
+ *(uint32_t *)valp = v;
+ break;
+ case 2:
+ *(uint16_t *)valp = v;
+ break;
+ case 1:
+ *(uint8_t *)valp = v;
+ break;
+ }
+ }
+ return (err);
+}
+
+/*
+ * Attach / detach / disconnect / reconnect management
+ */
+static int
+usbgem_open_pipes(struct usbgem_dev *dp)
+{
+ int i;
+ int ret;
+ int ifnum;
+ int alt;
+ usb_client_dev_data_t *reg_data;
+ usb_ep_data_t *ep_tree_node;
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ ifnum = dp->ugc.usbgc_ifnum;
+ alt = dp->ugc.usbgc_alt;
+
+ ep_tree_node = usb_lookup_ep_data(dp->dip, dp->reg_data, ifnum, alt,
+ 0, USB_EP_ATTR_BULK, USB_EP_DIR_IN);
+ if (ep_tree_node == NULL) {
+ cmn_err(CE_WARN, "!%s: %s: ep_bulkin is NULL",
+ dp->name, __func__);
+ goto err;
+ }
+ dp->ep_bulkin = &ep_tree_node->ep_descr;
+
+ ep_tree_node = usb_lookup_ep_data(dp->dip, dp->reg_data, ifnum, alt,
+ 0, USB_EP_ATTR_BULK, USB_EP_DIR_OUT);
+ if (ep_tree_node == NULL) {
+ cmn_err(CE_WARN, "!%s: %s: ep_bulkout is NULL",
+ dp->name, __func__);
+ goto err;
+ }
+ dp->ep_bulkout = &ep_tree_node->ep_descr;
+
+ ep_tree_node = usb_lookup_ep_data(dp->dip, dp->reg_data, ifnum, alt,
+ 0, USB_EP_ATTR_INTR, USB_EP_DIR_IN);
+ if (ep_tree_node) {
+ dp->ep_intr = &ep_tree_node->ep_descr;
+ } else {
+ /* don't care */
+ DPRINTF(1, (CE_CONT, "!%s: %s: ep_intr is NULL",
+ dp->name, __func__));
+ dp->ep_intr = NULL;
+ }
+
+ /* XXX -- no need to open default pipe */
+
+ /* open bulk out pipe */
+ bzero(&dp->policy_bulkout, sizeof (usb_pipe_policy_t));
+ dp->policy_bulkout.pp_max_async_reqs = 1;
+
+ if ((ret = usb_pipe_open(dp->dip,
+ dp->ep_bulkout, &dp->policy_bulkout, USB_FLAGS_SLEEP,
+ &dp->bulkout_pipe)) != USB_SUCCESS) {
+ cmn_err(CE_WARN,
+ "!%s: %s: err:%x: failed to open bulk-out pipe",
+ dp->name, __func__, ret);
+ dp->bulkout_pipe = NULL;
+ goto err;
+ }
+ DPRINTF(1, (CE_CONT, "!%s: %s: bulkout_pipe opened successfully",
+ dp->name, __func__));
+
+ /* open bulk in pipe */
+ bzero(&dp->policy_bulkin, sizeof (usb_pipe_policy_t));
+ dp->policy_bulkin.pp_max_async_reqs = 1;
+ if ((ret = usb_pipe_open(dp->dip,
+ dp->ep_bulkin, &dp->policy_bulkin, USB_FLAGS_SLEEP,
+ &dp->bulkin_pipe)) != USB_SUCCESS) {
+ cmn_err(CE_WARN,
+ "!%s: %s: ret:%x failed to open bulk-in pipe",
+ dp->name, __func__, ret);
+ dp->bulkin_pipe = NULL;
+ goto err;
+ }
+ DPRINTF(1, (CE_CONT, "!%s: %s: bulkin_pipe opened successfully",
+ dp->name, __func__));
+
+ if (dp->ep_intr) {
+ /* open interrupt pipe */
+ bzero(&dp->policy_interrupt, sizeof (usb_pipe_policy_t));
+ dp->policy_interrupt.pp_max_async_reqs = 1;
+ if ((ret = usb_pipe_open(dp->dip, dp->ep_intr,
+ &dp->policy_interrupt, USB_FLAGS_SLEEP,
+ &dp->intr_pipe)) != USB_SUCCESS) {
+ cmn_err(CE_WARN,
+ "!%s: %s: ret:%x failed to open interrupt pipe",
+ dp->name, __func__, ret);
+ dp->intr_pipe = NULL;
+ goto err;
+ }
+ }
+ DPRINTF(1, (CE_CONT, "!%s: %s: intr_pipe opened successfully",
+ dp->name, __func__));
+
+ return (USB_SUCCESS);
+
+err:
+ if (dp->bulkin_pipe) {
+ usb_pipe_close(dp->dip,
+ dp->bulkin_pipe, USB_FLAGS_SLEEP, NULL, 0);
+ dp->bulkin_pipe = NULL;
+ }
+ if (dp->bulkout_pipe) {
+ usb_pipe_close(dp->dip,
+ dp->bulkout_pipe, USB_FLAGS_SLEEP, NULL, 0);
+ dp->bulkout_pipe = NULL;
+ }
+ if (dp->intr_pipe) {
+ usb_pipe_close(dp->dip,
+ dp->intr_pipe, USB_FLAGS_SLEEP, NULL, 0);
+ dp->intr_pipe = NULL;
+ }
+
+ return (USB_FAILURE);
+}
+
+static int
+usbgem_close_pipes(struct usbgem_dev *dp)
+{
+ DPRINTF(0, (CE_CONT, "!%s: %s: called", dp->name, __func__));
+
+ if (dp->intr_pipe) {
+ usb_pipe_close(dp->dip,
+ dp->intr_pipe, USB_FLAGS_SLEEP, NULL, 0);
+ dp->intr_pipe = NULL;
+ }
+ DPRINTF(1, (CE_CONT, "!%s: %s: 1", dp->name, __func__));
+
+ ASSERT(dp->bulkin_pipe);
+ usb_pipe_close(dp->dip, dp->bulkin_pipe, USB_FLAGS_SLEEP, NULL, 0);
+ dp->bulkin_pipe = NULL;
+ DPRINTF(1, (CE_CONT, "!%s: %s: 2", dp->name, __func__));
+
+ ASSERT(dp->bulkout_pipe);
+ usb_pipe_close(dp->dip, dp->bulkout_pipe, USB_FLAGS_SLEEP, NULL, 0);
+ dp->bulkout_pipe = NULL;
+ DPRINTF(1, (CE_CONT, "!%s: %s: 3", dp->name, __func__));
+
+ return (USB_SUCCESS);
+}
+
+#define FREEZE_GRACEFUL (B_TRUE)
+#define FREEZE_NO_GRACEFUL (B_FALSE)
+static int
+usbgem_freeze_device(struct usbgem_dev *dp, boolean_t graceful)
+{
+ DPRINTF(0, (CE_NOTE, "!%s: %s: called", dp->name, __func__));
+
+ /* stop nic activity */
+ (void) usbgem_mac_stop(dp, MAC_STATE_DISCONNECTED, graceful);
+
+ /*
+ * Here we free all memory resource allocated, because it will
+ * cause to panic the system that we free usb_bulk_req objects
+ * during the usb device is disconnected.
+ */
+ (void) usbgem_free_memory(dp);
+
+ return (USB_SUCCESS);
+}
+
+static int
+usbgem_disconnect_cb(dev_info_t *dip)
+{
+ int ret;
+ struct usbgem_dev *dp;
+
+ dp = USBGEM_GET_DEV(dip);
+
+ cmn_err(CE_NOTE, "!%s: the usb device was disconnected (dp=%p)",
+ dp->name, dp);
+
+ /* start serialize */
+ rw_enter(&dp->dev_state_lock, RW_WRITER);
+
+ ret = usbgem_freeze_device(dp, 0);
+
+ /* end of serialize */
+ rw_exit(&dp->dev_state_lock);
+
+ return (ret);
+}
+
+static int
+usbgem_recover_device(struct usbgem_dev *dp)
+{
+ int err;
+
+ DPRINTF(0, (CE_NOTE, "!%s: %s: called", dp->name, __func__));
+
+ err = USB_SUCCESS;
+
+ /* reinitialize the usb connection */
+ usbgem_close_pipes(dp);
+ if ((err = usbgem_open_pipes(dp)) != USB_SUCCESS) {
+ goto x;
+ }
+
+ /* initialize nic state */
+ dp->mac_state = MAC_STATE_STOPPED;
+ dp->mii_state = MII_STATE_UNKNOWN;
+
+ /* allocate memory resources again */
+ if ((err = usbgem_alloc_memory(dp)) != USB_SUCCESS) {
+ goto x;
+ }
+
+ /* restart nic and recover state */
+ (void) usbgem_restart_nic(dp);
+
+ usbgem_mii_init(dp);
+
+ /* kick potentially stopped house keeping thread */
+ cv_signal(&dp->link_watcher_wait_cv);
+x:
+ return (err);
+}
+
+static int
+usbgem_reconnect_cb(dev_info_t *dip)
+{
+ int err = USB_SUCCESS;
+ struct usbgem_dev *dp;
+
+ dp = USBGEM_GET_DEV(dip);
+ DPRINTF(0, (CE_CONT, "!%s: dp=%p", ddi_get_name(dip), dp));
+#ifdef notdef
+ /* check device changes after disconnect */
+ if (usb_check_same_device(dp->dip, NULL, USB_LOG_L2, -1,
+ USB_CHK_BASIC | USB_CHK_CFG, NULL) != USB_SUCCESS) {
+ cmn_err(CE_CONT,
+ "!%s: no or different device installed", dp->name);
+ return (DDI_SUCCESS);
+ }
+#endif
+ cmn_err(CE_NOTE, "%s: the usb device was reconnected", dp->name);
+
+ /* start serialize */
+ rw_enter(&dp->dev_state_lock, RW_WRITER);
+
+ if (dp->mac_state == MAC_STATE_DISCONNECTED) {
+ err = usbgem_recover_device(dp);
+ }
+
+ /* end of serialize */
+ rw_exit(&dp->dev_state_lock);
+
+ return (err == USB_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
+}
+
+int
+usbgem_suspend(dev_info_t *dip)
+{
+ int err = USB_SUCCESS;
+ struct usbgem_dev *dp;
+
+ dp = USBGEM_GET_DEV(dip);
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: callded", dp->name, __func__));
+
+ /* start serialize */
+ rw_enter(&dp->dev_state_lock, RW_WRITER);
+
+ if (dp->mac_state == MAC_STATE_DISCONNECTED) {
+ err = usbgem_freeze_device(dp, STOP_GRACEFUL);
+ }
+
+ /* end of serialize */
+ rw_exit(&dp->dev_state_lock);
+
+ return (err == USB_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
+}
+
+int
+usbgem_resume(dev_info_t *dip)
+{
+ int err = USB_SUCCESS;
+ struct usbgem_dev *dp;
+
+ dp = USBGEM_GET_DEV(dip);
+
+ DPRINTF(0, (CE_CONT, "!%s: %s: callded", dp->name, __func__));
+#ifdef notdef
+ /* check device changes after disconnect */
+ if (usb_check_same_device(dp->dip, NULL, USB_LOG_L2, -1,
+ USB_CHK_BASIC | USB_CHK_CFG, NULL) != USB_SUCCESS) {
+ cmn_err(CE_CONT,
+ "!%s: no or different device installed", dp->name);
+ return (DDI_SUCCESS);
+ }
+#endif
+ /* start serialize */
+ rw_enter(&dp->dev_state_lock, RW_WRITER);
+
+ if (dp->mac_state == MAC_STATE_DISCONNECTED) {
+ err = usbgem_recover_device(dp);
+ }
+
+ /* end of serialize */
+ rw_exit(&dp->dev_state_lock);
+
+ return (err == USB_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
+}
+
+#define USBGEM_LOCAL_DATA_SIZE(gc) \
+ (sizeof (struct usbgem_dev) + USBGEM_MCALLOC)
+
+struct usbgem_dev *
+usbgem_do_attach(dev_info_t *dip,
+ struct usbgem_conf *gc, void *lp, int lmsize)
+{
+ struct usbgem_dev *dp;
+ int i;
+#ifdef USBGEM_CONFIG_GLDv3
+ mac_register_t *macp = NULL;
+#else
+ gld_mac_info_t *macinfo;
+ void *tmp;
+#endif
+ int ret;
+ int unit;
+ int err;
+
+ unit = ddi_get_instance(dip);
+
+ DPRINTF(2, (CE_CONT, "!usbgem%d: %s: called", unit, __func__));
+
+ /*
+ * Allocate soft data structure
+ */
+ dp = kmem_zalloc(USBGEM_LOCAL_DATA_SIZE(gc), KM_SLEEP);
+ if (dp == NULL) {
+#ifndef USBGEM_CONFIG_GLDv3
+ gld_mac_free(macinfo);
+#endif
+ return (NULL);
+ }
+#ifdef USBGEM_CONFIG_GLDv3
+ if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
+ cmn_err(CE_WARN, "!gem%d: %s: mac_alloc failed",
+ unit, __func__);
+ return (NULL);
+ }
+#else
+ macinfo = gld_mac_alloc(dip);
+ dp->macinfo = macinfo;
+#endif
+
+ /* link to private area */
+ dp->private = lp;
+ dp->priv_size = lmsize;
+ dp->mc_list = (struct mcast_addr *)&dp[1];
+
+ dp->dip = dip;
+ bcopy(gc->usbgc_name, dp->name, USBGEM_NAME_LEN);
+
+ /*
+ * register with usb service
+ */
+ if (usb_client_attach(dip, USBDRV_VERSION, 0) != USB_SUCCESS) {
+ cmn_err(CE_WARN,
+ "%s: %s: usb_client_attach failed",
+ dp->name, __func__);
+ goto err_free_private;
+ }
+
+ if (usb_get_dev_data(dip, &dp->reg_data,
+ USB_PARSE_LVL_ALL, 0) != USB_SUCCESS) {
+ dp->reg_data = NULL;
+ goto err_unregister_client;
+ }
+#ifdef USBGEM_DEBUG_LEVEL
+ usb_print_descr_tree(dp->dip, dp->reg_data);
+#endif
+
+ if (usbgem_open_pipes(dp) != USB_SUCCESS) {
+ /* failed to open pipes */
+ cmn_err(CE_WARN, "!%s: %s: failed to open pipes",
+ dp->name, __func__);
+ goto err_unregister_client;
+ }
+
+ /*
+ * Initialize mutexs and condition variables
+ */
+ mutex_init(&dp->rxlock, NULL, MUTEX_DRIVER, NULL);
+ mutex_init(&dp->txlock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&dp->rx_drain_cv, NULL, CV_DRIVER, NULL);
+ cv_init(&dp->tx_drain_cv, NULL, CV_DRIVER, NULL);
+ rw_init(&dp->dev_state_lock, NULL, RW_DRIVER, NULL);
+ mutex_init(&dp->link_watcher_lock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&dp->link_watcher_wait_cv, NULL, CV_DRIVER, NULL);
+ sema_init(&dp->hal_op_lock, 1, NULL, SEMA_DRIVER, NULL);
+ sema_init(&dp->rxfilter_lock, 1, NULL, SEMA_DRIVER, NULL);
+
+ /*
+ * Initialize configuration
+ */
+ dp->ugc = *gc;
+
+ dp->mtu = ETHERMTU;
+ dp->rxmode = 0;
+ dp->speed = USBGEM_SPD_10; /* default is 10Mbps */
+ dp->full_duplex = B_FALSE; /* default is half */
+ dp->flow_control = FLOW_CONTROL_NONE;
+
+ dp->nic_state = NIC_STATE_STOPPED;
+ dp->mac_state = MAC_STATE_STOPPED;
+ dp->mii_state = MII_STATE_UNKNOWN;
+
+ /* performance tuning parameters */
+ dp->txthr = ETHERMAX; /* tx fifo threshoold */
+ dp->txmaxdma = 16*4; /* tx max dma burst size */
+ dp->rxthr = 128; /* rx fifo threshoold */
+ dp->rxmaxdma = 16*4; /* rx max dma burst size */
+
+ /*
+ * Get media mode infomation from .conf file
+ */
+ usbgem_read_conf(dp);
+
+ /* rx_buf_len depend on MTU */
+ dp->rx_buf_len = MAXPKTBUF(dp) + dp->ugc.usbgc_rx_header_len;
+
+ /*
+ * Reset the chip
+ */
+ if (usbgem_hal_reset_chip(dp) != USB_SUCCESS) {
+ cmn_err(CE_WARN,
+ "!%s: %s: failed to reset the usb device",
+ dp->name, __func__);
+ goto err_destroy_locks;
+ }
+
+ /*
+ * HW dependant paremeter initialization
+ */
+ if (usbgem_hal_attach_chip(dp) != USB_SUCCESS) {
+ cmn_err(CE_WARN,
+ "!%s: %s: failed to attach the usb device",
+ dp->name, __func__);
+ goto err_destroy_locks;
+ }
+
+ /* allocate resources */
+ if (usbgem_alloc_memory(dp) != USB_SUCCESS) {
+ goto err_destroy_locks;
+ }
+
+ DPRINTF(0, (CE_CONT,
+ "!%s: %02x:%02x:%02x:%02x:%02x:%02x",
+ dp->name,
+ dp->dev_addr.ether_addr_octet[0],
+ dp->dev_addr.ether_addr_octet[1],
+ dp->dev_addr.ether_addr_octet[2],
+ dp->dev_addr.ether_addr_octet[3],
+ dp->dev_addr.ether_addr_octet[4],
+ dp->dev_addr.ether_addr_octet[5]));
+
+ /* copy mac address */
+ dp->cur_addr = dp->dev_addr;
+
+ /* pre-calculated tx timeout in second for performance */
+ dp->bulkout_timeout =
+ dp->ugc.usbgc_tx_timeout / drv_usectohz(1000*1000);
+
+#ifdef USBGEM_CONFIG_GLDv3
+ usbgem_gld3_init(dp, macp);
+#else
+ usbgem_gld_init(dp, macinfo, ident);
+#endif
+
+ /* Probe MII phy (scan phy) */
+ dp->mii_lpable = 0;
+ dp->mii_advert = 0;
+ dp->mii_exp = 0;
+ dp->mii_ctl1000 = 0;
+ dp->mii_stat1000 = 0;
+
+ dp->mii_status_ro = 0;
+ dp->mii_xstatus_ro = 0;
+
+ if (usbgem_mii_probe(dp) != USB_SUCCESS) {
+ cmn_err(CE_WARN, "!%s: %s: mii_probe failed",
+ dp->name, __func__);
+ goto err_free_memory;
+ }
+
+ /* mask unsupported abilities */
+ dp->anadv_autoneg &= BOOLEAN(dp->mii_status & MII_STATUS_CANAUTONEG);
+ dp->anadv_1000fdx &=
+ BOOLEAN(dp->mii_xstatus &
+ (MII_XSTATUS_1000BASEX_FD | MII_XSTATUS_1000BASET_FD));
+ dp->anadv_1000hdx &=
+ BOOLEAN(dp->mii_xstatus &
+ (MII_XSTATUS_1000BASEX | MII_XSTATUS_1000BASET));
+ dp->anadv_100t4 &= BOOLEAN(dp->mii_status & MII_STATUS_100_BASE_T4);
+ dp->anadv_100fdx &= BOOLEAN(dp->mii_status & MII_STATUS_100_BASEX_FD);
+ dp->anadv_100hdx &= BOOLEAN(dp->mii_status & MII_STATUS_100_BASEX);
+ dp->anadv_10fdx &= BOOLEAN(dp->mii_status & MII_STATUS_10_FD);
+ dp->anadv_10hdx &= BOOLEAN(dp->mii_status & MII_STATUS_10);
+
+ if (usbgem_mii_init(dp) != USB_SUCCESS) {
+ cmn_err(CE_WARN, "!%s: %s: mii_init failed",
+ dp->name, __func__);
+ goto err_free_memory;
+ }
+
+ /*
+ * initialize kstats including mii statistics
+ */
+#ifdef USBGEM_CONFIG_GLDv3
+#ifdef USBGEM_CONFIG_ND
+ usbgem_nd_setup(dp);
+#endif
+#else
+ if (usbgem_kstat_init(dp) != USB_SUCCESS) {
+ goto err_free_memory;
+ }
+#endif
+
+ /*
+ * Add interrupt to system.
+ */
+#ifdef USBGEM_CONFIG_GLDv3
+ if (ret = mac_register(macp, &dp->mh)) {
+ cmn_err(CE_WARN, "!%s: mac_register failed, error:%d",
+ dp->name, ret);
+ goto err_release_stats;
+ }
+ mac_free(macp);
+ macp = NULL;
+#else
+ /* gld_register will corrupts driver_private */
+ tmp = ddi_get_driver_private(dip);
+ if (gld_register(dip,
+ (char *)ddi_driver_name(dip), macinfo) != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "!%s: %s: gld_register failed",
+ dp->name, __func__);
+ ddi_set_driver_private(dip, tmp);
+ goto err_release_stats;
+ }
+ /* restore driver private */
+ ddi_set_driver_private(dip, tmp);
+#endif /* USBGEM_CONFIG_GLDv3 */
+ if (usb_register_hotplug_cbs(dip,
+ usbgem_suspend, usbgem_resume) != USB_SUCCESS) {
+ cmn_err(CE_WARN,
+ "!%s: %s: failed to register hotplug cbs",
+ dp->name, __func__);
+ goto err_unregister_gld;
+ }
+
+ /* reset mii and start mii link watcher */
+ if (usbgem_mii_start(dp) != USB_SUCCESS) {
+ goto err_unregister_hotplug;
+ }
+
+ /* start tx watchdow watcher */
+ if (usbgem_tx_watcher_start(dp)) {
+ goto err_usbgem_mii_stop;
+ }
+
+ ddi_set_driver_private(dip, (caddr_t)dp);
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: return: success", dp->name, __func__));
+
+ return (dp);
+
+err_usbgem_mii_stop:
+ usbgem_mii_stop(dp);
+
+err_unregister_hotplug:
+ usb_unregister_hotplug_cbs(dip);
+
+err_unregister_gld:
+#ifdef USBGEM_CONFIG_GLDv3
+ mac_unregister(dp->mh);
+#else
+ gld_unregister(macinfo);
+#endif
+
+err_release_stats:
+#ifdef USBGEM_CONFIG_GLDv3
+#ifdef USBGEM_CONFIG_ND
+ /* release NDD resources */
+ usbgem_nd_cleanup(dp);
+#endif
+#else
+ kstat_delete(dp->ksp);
+#endif
+
+err_free_memory:
+ usbgem_free_memory(dp);
+
+err_destroy_locks:
+ cv_destroy(&dp->tx_drain_cv);
+ cv_destroy(&dp->rx_drain_cv);
+ mutex_destroy(&dp->txlock);
+ mutex_destroy(&dp->rxlock);
+ rw_destroy(&dp->dev_state_lock);
+ mutex_destroy(&dp->link_watcher_lock);
+ cv_destroy(&dp->link_watcher_wait_cv);
+ sema_destroy(&dp->hal_op_lock);
+ sema_destroy(&dp->rxfilter_lock);
+
+err_close_pipes:
+ (void) usbgem_close_pipes(dp);
+
+err_unregister_client:
+ usb_client_detach(dp->dip, dp->reg_data);
+
+err_free_private:
+#ifdef USBGEM_CONFIG_GLDv3
+ if (macp) {
+ mac_free(macp);
+ }
+#else
+ gld_mac_free(macinfo);
+#endif
+ kmem_free((caddr_t)dp, USBGEM_LOCAL_DATA_SIZE(gc));
+
+ return (NULL);
+}
+
+int
+usbgem_do_detach(dev_info_t *dip)
+{
+ struct usbgem_dev *dp;
+
+ dp = USBGEM_GET_DEV(dip);
+
+#ifdef USBGEM_CONFIG_GLDv3
+ /* unregister with gld v3 */
+ if (mac_unregister(dp->mh) != DDI_SUCCESS) {
+ return (DDI_FAILURE);
+ }
+#else
+ /* unregister with gld v2 */
+ if (gld_unregister(dp->macinfo) != DDI_SUCCESS) {
+ return (DDI_FAILURE);
+ }
+#endif
+ /* unregister with hotplug service */
+ usb_unregister_hotplug_cbs(dip);
+
+ /* stop tx watchdog watcher*/
+ usbgem_tx_watcher_stop(dp);
+
+ /* stop the link manager */
+ usbgem_mii_stop(dp);
+
+ /* unregister with usb service */
+ (void) usbgem_free_memory(dp);
+ (void) usbgem_close_pipes(dp);
+ usb_client_detach(dp->dip, dp->reg_data);
+ dp->reg_data = NULL;
+
+ /* unregister with kernel statistics */
+#ifdef USBGEM_CONFIG_GLDv3
+#ifdef USBGEM_CONFIG_ND
+ /* release ndd resources */
+ usbgem_nd_cleanup(dp);
+#endif
+#else
+ /* destroy kstat objects */
+ kstat_delete(dp->ksp);
+#endif
+
+ /* release locks and condition variables */
+ mutex_destroy(&dp->txlock);
+ mutex_destroy(&dp->rxlock);
+ cv_destroy(&dp->tx_drain_cv);
+ cv_destroy(&dp->rx_drain_cv);
+ rw_destroy(&dp->dev_state_lock);
+ mutex_destroy(&dp->link_watcher_lock);
+ cv_destroy(&dp->link_watcher_wait_cv);
+ sema_destroy(&dp->hal_op_lock);
+ sema_destroy(&dp->rxfilter_lock);
+
+ /* release basic memory resources */
+#ifndef USBGEM_CONFIG_GLDv3
+ gld_mac_free(dp->macinfo);
+#endif
+ kmem_free((caddr_t)(dp->private), dp->priv_size);
+ kmem_free((caddr_t)dp, USBGEM_LOCAL_DATA_SIZE(&dp->ugc));
+
+ DPRINTF(2, (CE_CONT, "!%s: %s: return: success",
+ ddi_driver_name(dip), __func__));
+
+ return (DDI_SUCCESS);
+}
+
+int
+usbgem_mod_init(struct dev_ops *dop, char *name)
+{
+#ifdef USBGEM_CONFIG_GLDv3
+ major_t major;
+ major = ddi_name_to_major(name);
+ if (major == DDI_MAJOR_T_NONE) {
+ return (DDI_FAILURE);
+ }
+ mac_init_ops(dop, name);
+#endif
+ return (DDI_SUCCESS);
+}
+
+void
+usbgem_mod_fini(struct dev_ops *dop)
+{
+#ifdef USBGEM_CONFIG_GLDv3
+ mac_fini_ops(dop);
+#endif
+}
+
+int
+usbgem_quiesce(dev_info_t *dip)
+{
+ struct usbgem_dev *dp;
+
+ dp = USBGEM_GET_DEV(dip);
+
+ ASSERT(dp != NULL);
+
+ if (dp->mac_state != MAC_STATE_DISCONNECTED &&
+ dp->mac_state != MAC_STATE_STOPPED) {
+ if (usbgem_hal_stop_chip(dp) != USB_SUCCESS) {
+ (void) usbgem_hal_reset_chip(dp);
+ }
+ }
+
+ /* devo_quiesce() must return DDI_SUCCESS always */
+ return (DDI_SUCCESS);
+}
diff --git a/usr/src/uts/common/io/usbgem/usbgem.h b/usr/src/uts/common/io/usbgem/usbgem.h
new file mode 100644
index 0000000000..80b89a260e
--- /dev/null
+++ b/usr/src/uts/common/io/usbgem/usbgem.h
@@ -0,0 +1,428 @@
+/*
+ * usbgem.h: General USB to Ethernet MAC driver framework
+ * @(#)usbgem.h 1.4 12/02/09
+ * (C) Copyright 2003-2009 Masayuki Murayama KHF04453@nifty.ne.jp
+ */
+
+#ifndef __USBGEM_H__
+#define __USBGEM_H__
+
+#pragma ident "@(#)usbgem.h 1.4 12/02/09"
+
+#ifdef USBGEM_CONFIG_GLDv3
+#include <sys/mac.h>
+#ifndef MAC_VERSION
+#include <sys/mac_provider.h>
+#endif
+#include <sys/mac_ether.h>
+#else
+#include <sys/gld.h>
+#endif /* GLDv3 */
+
+/*
+ * Useful macros and typedefs
+ */
+#define USBGEM_NAME_LEN 32
+
+#define USBGEM_TX_TIMEOUT (drv_usectohz(3*1000000))
+#define USBGEM_TX_TIMEOUT_INTERVAL (drv_usectohz(1*1000000))
+#define USBGEM_LINK_WATCH_INTERVAL (drv_usectohz(1*1000000))
+
+/* general return code */
+#define USBGEM_SUCCESS 0
+#define USBGEM_FAILURE 1
+
+/* return code of usbgem_tx_done */
+#define INTR_RESTART_TX 0x80000000U
+
+struct usbgem_stats {
+ uint32_t intr;
+
+ uint32_t crc;
+ uint32_t errrcv;
+ uint32_t overflow;
+ uint32_t frame;
+ uint32_t missed;
+ uint32_t runt;
+ uint32_t frame_too_long;
+ uint32_t norcvbuf;
+ uint32_t sqe;
+
+ uint32_t collisions;
+ uint32_t first_coll;
+ uint32_t multi_coll;
+ uint32_t excoll;
+ uint32_t xmit_internal_err;
+ uint32_t nocarrier;
+ uint32_t defer;
+ uint32_t errxmt;
+ uint32_t underflow;
+ uint32_t xmtlatecoll;
+ uint32_t noxmtbuf;
+ uint32_t jabber;
+
+
+ uint64_t rbytes;
+ uint64_t obytes;
+ uint64_t rpackets;
+ uint64_t opackets;
+ uint32_t rbcast;
+ uint32_t obcast;
+ uint32_t rmcast;
+ uint32_t omcast;
+ uint32_t rcv_internal_err;
+};
+
+struct mcast_addr {
+ struct ether_addr addr;
+ uint32_t hash;
+};
+
+#define USBGEM_MAXMC 64
+#define USBGEM_MCALLOC (sizeof(struct mcast_addr) * USBGEM_MAXMC)
+
+#define SLOT(dp, n) ((n) % (dp)->ugc.usbgc_tx_list_max)
+
+/*
+ * mac soft state
+ */
+struct usbgem_dev {
+ dev_info_t *dip;
+#ifdef USBGEM_CONFIG_GLDv3
+ mac_handle_t mh;
+#else
+ void *macinfo; /* opaque handle for upper layer */
+#endif
+ char name[USBGEM_NAME_LEN];
+
+ /* pointer to usb private data */
+ usb_client_dev_data_t *reg_data;
+
+ /* usb handles */
+ usb_pipe_handle_t default_pipe;
+ usb_pipe_handle_t bulkin_pipe;
+ usb_pipe_handle_t bulkout_pipe;
+ usb_pipe_handle_t intr_pipe;
+
+ /* usb endpoints */
+ usb_ep_descr_t *ep_default;
+ usb_ep_descr_t *ep_bulkin;
+ usb_ep_descr_t *ep_bulkout;
+ usb_ep_descr_t *ep_intr;
+
+ /* usb policies */
+ usb_pipe_policy_t policy_default;
+ usb_pipe_policy_t policy_bulkin;
+ usb_pipe_policy_t policy_bulkout;
+ usb_pipe_policy_t policy_interrupt;
+
+ /* MAC address information */
+ struct ether_addr cur_addr;
+ struct ether_addr dev_addr;
+
+ /* RX state and resource management */
+ kmutex_t rxlock;
+ int rx_busy_cnt;
+ boolean_t rx_active;
+ kcondvar_t rx_drain_cv;
+
+ /* RX buffer management */
+ int rx_buf_len;
+
+ /* TX state and resource management */
+ kmutex_t txlock;
+ int tx_busy_cnt;
+ usb_bulk_req_t *tx_free_list;
+ kcondvar_t tx_drain_cv;
+ clock_t tx_start_time;
+ int bulkout_timeout; /* in second */
+ int tx_max_packets;
+ int tx_seq_num;
+ int tx_intr_pended;
+
+ /* NIC state from OS view */
+ int nic_state;
+#define NIC_STATE_UNKNOWN 0
+#define NIC_STATE_STOPPED 1
+#define NIC_STATE_INITIALIZED 2
+#define NIC_STATE_ONLINE 3
+
+ /* MAC state from hardware view */
+ int mac_state;
+#define MAC_STATE_DISCONNECTED 0 /* it includes suspended state too */
+#define MAC_STATE_STOPPED 1 /* powered up / buf not initialized */
+#define MAC_STATE_INITIALIZED 2 /* initialized */
+#define MAC_STATE_ONLINE 3 /* working correctly */
+#define MAC_STATE_ERROR 4 /* need to restart nic */
+
+ clock_t fatal_error;
+
+ /* robustness: timer and watchdog */
+ uint_t tx_watcher_stop;
+ kt_did_t tx_watcher_did;
+ kcondvar_t tx_watcher_cv;
+ kmutex_t tx_watcher_lock;
+ clock_t tx_watcher_timeout;
+ clock_t tx_watcher_interval;
+
+ /* MII mamagement */
+ boolean_t anadv_autoneg:1;
+ boolean_t anadv_1000fdx:1;
+ boolean_t anadv_1000hdx:1;
+ boolean_t anadv_100t4:1;
+ boolean_t anadv_100fdx:1;
+ boolean_t anadv_100hdx:1;
+ boolean_t anadv_10fdx:1;
+ boolean_t anadv_10hdx:1;
+ boolean_t anadv_1000t_ms:2;
+ boolean_t anadv_pause:1;
+ boolean_t anadv_asmpause:1;
+ boolean_t mii_advert_ro:1;
+
+ boolean_t full_duplex:1;
+ int speed:3;
+#define USBGEM_SPD_10 0
+#define USBGEM_SPD_100 1
+#define USBGEM_SPD_1000 2
+#define USBGEM_SPD_NUM 3
+ unsigned int flow_control:2;
+#define FLOW_CONTROL_NONE 0
+#define FLOW_CONTROL_SYMMETRIC 1
+#define FLOW_CONTROL_TX_PAUSE 2
+#define FLOW_CONTROL_RX_PAUSE 3
+
+ boolean_t mii_supress_msg:1;
+
+ uint32_t mii_phy_id;
+ uint16_t mii_status;
+ uint16_t mii_advert;
+ uint16_t mii_lpable;
+ uint16_t mii_exp;
+ uint16_t mii_ctl1000;
+ uint16_t mii_stat1000;
+ uint16_t mii_xstatus;
+ int8_t mii_phy_addr; /* must be signed */
+
+ uint16_t mii_status_ro;
+ uint16_t mii_xstatus_ro;
+
+ int mii_state;
+#define MII_STATE_UNKNOWN 0
+#define MII_STATE_RESETTING 1
+#define MII_STATE_AUTONEGOTIATING 2
+#define MII_STATE_AN_DONE 3
+#define MII_STATE_MEDIA_SETUP 4
+#define MII_STATE_LINKUP 5
+#define MII_STATE_LINKDOWN 6
+
+ clock_t mii_last_check; /* in tick */
+ clock_t mii_timer; /* in tick */
+#define MII_RESET_TIMEOUT drv_usectohz(1000*1000)
+#define MII_AN_TIMEOUT drv_usectohz(5000*1000)
+#define MII_LINKDOWN_TIMEOUT drv_usectohz(10000*1000)
+
+ clock_t mii_interval; /* in tick */
+ clock_t linkup_delay; /* in tick */
+
+ uint_t link_watcher_stop;
+ kt_did_t link_watcher_did;
+ kcondvar_t link_watcher_wait_cv;
+ kmutex_t link_watcher_lock;
+
+ krwlock_t dev_state_lock; /* mac_state and nic_state */
+ ksema_t hal_op_lock; /* serialize hw operations */
+ ksema_t drv_op_lock; /* hotplug op lock */
+
+ /* multcast list */
+ ksema_t rxfilter_lock;
+ int mc_count;
+ int mc_count_req;
+ struct mcast_addr *mc_list;
+ int rxmode;
+#define RXMODE_PROMISC 0x01
+#define RXMODE_ALLMULTI_REQ 0x02
+#define RXMODE_MULTI_OVF 0x04
+#define RXMODE_ENABLE 0x08
+#define RXMODE_ALLMULTI (RXMODE_ALLMULTI_REQ | RXMODE_MULTI_OVF)
+#define RXMODE_BITS \
+ "\020" \
+ "\004ENABLE" \
+ "\003MULTI_OVF" \
+ "\002ALLMULTI_REQ" \
+ "\001PROMISC"
+
+ /* statistcs */
+ struct usbgem_stats stats;
+
+ /* pointer to local structure */
+ void *private;
+ int priv_size;
+
+ /* configuration */
+ struct usbgem_conf {
+ /* name */
+ char usbgc_name[USBGEM_NAME_LEN];
+ int usbgc_ppa;
+
+ /* specification on usb */
+ int usbgc_ifnum; /* interface number */
+ int usbgc_alt; /* alternate */
+
+ /* specification on tx engine */
+ int usbgc_tx_list_max;
+
+ /* specification on rx engine */
+ int usbgc_rx_header_len;
+ int usbgc_rx_list_max;
+
+ /* time out parameters */
+ clock_t usbgc_tx_timeout;
+ clock_t usbgc_tx_timeout_interval;
+
+ /* flow control */
+ int usbgc_flow_control;
+
+ /* MII timeout parameters */
+ clock_t usbgc_mii_linkdown_timeout;
+ clock_t usbgc_mii_link_watch_interval;
+ clock_t usbgc_mii_reset_timeout;
+
+ clock_t usbgc_mii_an_watch_interval;
+ clock_t usbgc_mii_an_timeout;
+ clock_t usbgc_mii_an_wait;
+ clock_t usbgc_mii_an_delay;
+
+ /* MII configuration */
+ int usbgc_mii_addr_min;
+ int usbgc_mii_linkdown_action;
+ int usbgc_mii_linkdown_timeout_action;
+#define MII_ACTION_NONE 0
+#define MII_ACTION_RESET 1
+#define MII_ACTION_RSA 2
+ boolean_t usbgc_mii_dont_reset:1;
+ boolean_t usbgc_mii_an_oneshot:1;
+ boolean_t usbgc_mii_hw_link_detection:1;
+ boolean_t usbgc_mii_stop_mac_on_linkdown:1;
+ uint16_t usbgc_mii_an_cmd;
+
+ /* I/O methods */
+
+ /* mac operation */
+ int (*usbgc_attach_chip)(struct usbgem_dev *dp);
+ int (*usbgc_reset_chip)(struct usbgem_dev *dp);
+ int (*usbgc_init_chip)(struct usbgem_dev *dp);
+ int (*usbgc_start_chip)(struct usbgem_dev *dp);
+ int (*usbgc_stop_chip)(struct usbgem_dev *dp);
+ uint32_t (*usbgc_multicast_hash)(struct usbgem_dev *dp,
+ const uint8_t *);
+ int (*usbgc_set_rx_filter)(struct usbgem_dev *dp);
+ int (*usbgc_set_media)(struct usbgem_dev *dp);
+ int (*usbgc_get_stats)(struct usbgem_dev *dp);
+ void (*usbgc_interrupt)(struct usbgem_dev *dp, mblk_t *mp);
+
+ /* packet manupilation */
+ mblk_t *(*usbgc_tx_make_packet)(struct usbgem_dev *dp, mblk_t *mp);
+ mblk_t *(*usbgc_rx_make_packet)(struct usbgem_dev *dp, mblk_t *mp);
+ /* mii operations */
+ int (*usbgc_mii_probe)(struct usbgem_dev *dp);
+ int (*usbgc_mii_init)(struct usbgem_dev *dp);
+ int (*usbgc_mii_config)(struct usbgem_dev *dp, int *errp);
+ uint16_t (*usbgc_mii_read)(struct usbgem_dev *dp, uint_t reg, int *errp);
+ void (*usbgc_mii_write)(struct usbgem_dev *dp, uint_t reg, uint16_t val, int *errp);
+
+ /* jumbo frame */
+ int usbgc_max_mtu;
+ int usbgc_default_mtu;
+ int usbgc_min_mtu;
+ } ugc;
+
+ int misc_flag;
+#define USBGEM_VLAN 0x0001
+ timeout_id_t intr_watcher_id;
+
+ /* buffer size */
+ uint_t mtu;
+
+ /* performance tuning parameters */
+ uint_t txthr; /* tx fifo threshoold */
+ uint_t txmaxdma; /* tx max dma burst size */
+ uint_t rxthr; /* rx fifo threshoold */
+ uint_t rxmaxdma; /* tx max dma burst size */
+
+ /* kstat stuff */
+ kstat_t *ksp;
+
+ /* ndd stuff */
+ caddr_t nd_data_p;
+ caddr_t nd_arg_p;
+
+#ifdef USBGEM_DEBUG_LEVEL
+ int tx_cnt;
+#endif
+};
+
+/*
+ * Exported functions
+ */
+int usbgem_ctrl_out(struct usbgem_dev *dp,
+ uint8_t reqt, uint8_t req, uint16_t val, uint16_t ix, uint16_t len,
+ void *bp, int size);
+
+int usbgem_ctrl_in(struct usbgem_dev *dp,
+ uint8_t reqt, uint8_t req, uint16_t val, uint16_t ix, uint16_t len,
+ void *bp, int size);
+
+int usbgem_ctrl_out_val(struct usbgem_dev *dp,
+ uint8_t reqt, uint8_t req, uint16_t val, uint16_t ix, uint16_t len,
+ uint32_t v);
+
+int usbgem_ctrl_in_val(struct usbgem_dev *dp,
+ uint8_t reqt, uint8_t req, uint16_t val, uint16_t ix, uint16_t len,
+ void *valp);
+
+void usbgem_generate_macaddr(struct usbgem_dev *, uint8_t *);
+boolean_t usbgem_get_mac_addr_conf(struct usbgem_dev *);
+int usbgem_mii_probe_default(struct usbgem_dev *);
+int usbgem_mii_init_default(struct usbgem_dev *);
+int usbgem_mii_config_default(struct usbgem_dev *, int *errp);
+void usbgem_mii_update_link(struct usbgem_dev *);
+void usbgem_restart_tx(struct usbgem_dev *);
+boolean_t usbgem_tx_done(struct usbgem_dev *, int);
+void usbgem_receive(struct usbgem_dev *);
+struct usbgem_dev *usbgem_do_attach(dev_info_t *,
+ struct usbgem_conf *, void *, int);
+int usbgem_do_detach(dev_info_t *);
+
+uint32_t usbgem_ether_crc_le(const uint8_t *addr);
+uint32_t usbgem_ether_crc_be(const uint8_t *addr);
+
+int usbgem_resume(dev_info_t *);
+int usbgem_suspend(dev_info_t *);
+int usbgem_quiesce(dev_info_t *);
+
+#ifdef USBGEM_CONFIG_GLDv3
+#if DEVO_REV < 4
+#define USBGEM_STREAM_OPS(dev_ops, attach, detach) \
+ DDI_DEFINE_STREAM_OPS(dev_ops, nulldev, nulldev, attach, detach, \
+ nodev, NULL, D_MP, NULL)
+#else
+#define USBGEM_STREAM_OPS(dev_ops, attach, detach) \
+ DDI_DEFINE_STREAM_OPS(dev_ops, nulldev, nulldev, attach, detach, \
+ nodev, NULL, D_MP, NULL, usbgem_quiesce)
+#endif
+#else
+#define usbgem_getinfo gld_getinfo
+#define usbgem_open gld_open
+#define usbgem_close gld_close
+#define usbgem_wput gld_wput
+#define usbgem_wsrv gld_wsrv
+#define usbgem_rsrv gld_rsrv
+#define usbgem_power NULL
+#endif
+int usbgem_mod_init(struct dev_ops *, char *);
+void usbgem_mod_fini(struct dev_ops *);
+
+#define USBGEM_GET_DEV(dip) \
+ ((struct usbgem_dev *)(ddi_get_driver_private(dip)))
+
+#endif /* __USBGEM_H__ */
diff --git a/usr/src/uts/common/io/usbgem/usbgem_mii.h b/usr/src/uts/common/io/usbgem/usbgem_mii.h
new file mode 100644
index 0000000000..2b4176a340
--- /dev/null
+++ b/usr/src/uts/common/io/usbgem/usbgem_mii.h
@@ -0,0 +1,242 @@
+/*
+ * gem_mii.h: mii header for gem
+ *
+ * Copyright (c) 2002-2007 Masayuki Murayama. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the author nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+#pragma ident "@(#)gem_mii.h 1.4 07/11/30"
+
+/*
+ * gem_mii.h : MII registers
+ */
+#ifndef _GEM_MII_H_
+#define _GEM_MII_H_
+
+#ifdef GEM_CONFIG_GLDv3
+#include <sys/miiregs.h>
+#else
+#define MII_CONTROL 0
+#define MII_STATUS 1
+#define MII_PHYIDH 2
+#define MII_PHYIDL 3
+#define MII_AN_ADVERT 4
+#define MII_AN_LPABLE 5
+#define MII_AN_EXPANSION 6
+#define MII_AN_NXTPGXMIT 7
+#endif /* GEM_CONFIG_GLDv3 */
+
+#define MII_AN_LPANXT 8
+#define MII_MS_CONTROL 9
+#define MII_MS_STATUS 10
+#define MII_XSTATUS 15
+
+/* for 1000BaseT support */
+#define MII_1000TC MII_MS_CONTROL
+#define MII_1000TS MII_MS_STATUS
+#ifndef GEM_CONFIG_GLDv3
+#define MII_CONTROL_RESET 0x8000
+#define MII_CONTROL_LOOPBACK 0x4000
+#define MII_CONTROL_100MB 0x2000
+#define MII_CONTROL_ANE 0x1000
+#define MII_CONTROL_PWRDN 0x0800
+#define MII_CONTROL_ISOLATE 0x0400
+#define MII_CONTROL_RSAN 0x0200
+#define MII_CONTROL_FDUPLEX 0x0100
+#define MII_CONTROL_COLTST 0x0080
+#endif /* !GEM_CONFIG_GLDv3 */
+#define MII_CONTROL_SPEED 0x2040
+
+#define MII_CONTROL_10MB 0x0000
+#define MII_CONTROL_1000MB 0x0040
+
+#define MII_CONTROL_BITS \
+ "\020" \
+ "\020RESET" \
+ "\017LOOPBACK" \
+ "\016100MB" \
+ "\015ANE" \
+ "\014PWRDN" \
+ "\013ISOLATE" \
+ "\012RSAN" \
+ "\011FDUPLEX" \
+ "\010COLTST" \
+ "\0071000M"
+#ifndef GEM_CONFIG_GLDv3
+#define MII_STATUS_100_BASE_T4 0x8000
+#define MII_STATUS_100_BASEX_FD 0x4000
+#define MII_STATUS_100_BASEX 0x2000
+#define MII_STATUS_10_FD 0x1000
+#define MII_STATUS_10 0x0800
+#define MII_STATUS_MFPRMBLSUPR 0x0040
+#define MII_STATUS_ANDONE 0x0020
+#define MII_STATUS_REMFAULT 0x0010
+#define MII_STATUS_CANAUTONEG 0x0008
+#define MII_STATUS_LINKUP 0x0004
+#define MII_STATUS_JABBERING 0x0002
+#define MII_STATUS_EXTENDED 0x0001
+#endif /* !GEM_CONFIG_GLDv3 */
+#define MII_STATUS_XSTATUS 0x0100
+#define MII_STATUS_100_BASE_T2_FD 0x0400
+#define MII_STATUS_100_BASE_T2 0x0200
+
+#define MII_STATUS_ABILITY_TECH \
+ (MII_STATUS_100_BASE_T4 | \
+ MII_STATUS_100_BASEX_FD | \
+ MII_STATUS_100_BASEX | \
+ MII_STATUS_10 | \
+ MII_STATUS_10_FD)
+
+
+#define MII_STATUS_BITS \
+ "\020" \
+ "\020100_BASE_T4" \
+ "\017100_BASEX_FD" \
+ "\016100_BASEX" \
+ "\01510_BASE_FD" \
+ "\01410_BASE" \
+ "\013100_BASE_T2_FD" \
+ "\012100_BASE_T2" \
+ "\011XSTATUS" \
+ "\007MFPRMBLSUPR" \
+ "\006ANDONE" \
+ "\005REMFAULT" \
+ "\004CANAUTONEG" \
+ "\003LINKUP" \
+ "\002JABBERING" \
+ "\001EXTENDED"
+#ifndef GEM_CONFIG_GLDv3
+#define MII_AN_ADVERT_NP 0x8000
+#define MII_AN_ADVERT_REMFAULT 0x2000
+#define MII_AN_ADVERT_SELECTOR 0x001f
+#endif /* !GEM_CONFIG_GLDv3 */
+
+#define MII_ABILITY_ASM_DIR 0x0800 /* for annex 28B */
+#ifndef MII_ABILITY_PAUSE
+#define MII_ABILITY_PAUSE 0x0400 /* for IEEE 802.3x */
+#endif
+#ifndef GEM_CONFIG_GLDv3
+#define MII_ABILITY_100BASE_T4 0x0200
+#define MII_ABILITY_100BASE_TX_FD 0x0100
+#define MII_ABILITY_100BASE_TX 0x0080
+#define MII_ABILITY_10BASE_T_FD 0x0040
+#define MII_ABILITY_10BASE_T 0x0020
+#endif /* !GEM_CONFIG_GLDv3 */
+
+#define MII_AN_LPABLE_NP 0x8000
+
+#define MII_ABILITY_TECH \
+ (MII_ABILITY_100BASE_T4 | \
+ MII_ABILITY_100BASE_TX_FD | \
+ MII_ABILITY_100BASE_TX | \
+ MII_ABILITY_10BASE_T | \
+ MII_ABILITY_10BASE_T_FD)
+
+#define MII_ABILITY_ALL \
+ (MII_AN_ADVERT_REMFAULT | \
+ MII_ABILITY_ASM_DIR | \
+ MII_ABILITY_PAUSE | \
+ MII_ABILITY_TECH)
+
+
+#define MII_ABILITY_BITS \
+ "\020" \
+ "\016REMFAULT" \
+ "\014ASM_DIR" \
+ "\013PAUSE" \
+ "\012100BASE_T4" \
+ "\011100BASE_TX_FD" \
+ "\010100BASE_TX" \
+ "\00710BASE_T_FD" \
+ "\00610BASE_T"
+#ifndef GEM_CONFIG_GLDv3
+#define MII_AN_EXP_PARFAULT 0x0010
+#define MII_AN_EXP_LPCANNXTP 0x0008
+#define MII_AN_EXP_CANNXTPP 0x0004
+#define MII_AN_EXP_PAGERCVD 0x0002
+#define MII_AN_EXP_LPCANAN 0x0001
+#endif /* !GEM_CONFIG_GLDv3 */
+
+#define MII_AN_EXP_BITS \
+ "\020" \
+ "\005PARFAULT" \
+ "\004LPCANNXTP" \
+ "\003CANNXTPP" \
+ "\002PAGERCVD" \
+ "\001LPCANAN"
+
+#define MII_1000TC_TESTMODE 0xe000
+#define MII_1000TC_CFG_EN 0x1000
+#define MII_1000TC_CFG_VAL 0x0800
+#define MII_1000TC_PORTTYPE 0x0400
+#define MII_1000TC_ADV_FULL 0x0200
+#define MII_1000TC_ADV_HALF 0x0100
+
+#define MII_1000TC_BITS \
+ "\020" \
+ "\015CFG_EN" \
+ "\014CFG_VAL" \
+ "\013PORTTYPE" \
+ "\012FULL" \
+ "\011HALF"
+
+#define MII_1000TS_CFG_FAULT 0x8000
+#define MII_1000TS_CFG_MASTER 0x4000
+#define MII_1000TS_LOCALRXOK 0x2000
+#define MII_1000TS_REMOTERXOK 0x1000
+#define MII_1000TS_LP_FULL 0x0800
+#define MII_1000TS_LP_HALF 0x0400
+
+#define MII_1000TS_BITS \
+ "\020" \
+ "\020CFG_FAULT" \
+ "\017CFG_MASTER" \
+ "\014CFG_LOCALRXOK" \
+ "\013CFG_REMOTERXOK" \
+ "\012LP_FULL" \
+ "\011LP_HALF"
+
+#define MII_XSTATUS_1000BASEX_FD 0x8000
+#define MII_XSTATUS_1000BASEX 0x4000
+#define MII_XSTATUS_1000BASET_FD 0x2000
+#define MII_XSTATUS_1000BASET 0x1000
+
+#define MII_XSTATUS_BITS \
+ "\020" \
+ "\0201000BASEX_FD" \
+ "\0171000BASEX" \
+ "\0161000BASET_FD" \
+ "\0151000BASET"
+
+#define MII_READ_CMD(p, r) \
+ ((6<<(18+5+5)) | ((p)<<(18+5)) | ((r)<<18))
+
+#define MII_WRITE_CMD(p, r, v) \
+ ((5<<(18+5+5)) | ((p)<<(18+5)) | ((r)<<18) | (2 << 16) | (v))
+
+#endif /* _GEM_MII_H_ */
diff --git a/usr/src/uts/common/io/vioif/vioif.c b/usr/src/uts/common/io/vioif/vioif.c
index 0d1132febc..27241894aa 100644
--- a/usr/src/uts/common/io/vioif/vioif.c
+++ b/usr/src/uts/common/io/vioif/vioif.c
@@ -12,6 +12,7 @@
/*
* Copyright 2013 Nexenta Inc. All rights reserved.
* Copyright (c) 2014, 2015 by Delphix. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
/* Based on the NetBSD virtio driver by Minoura Makoto. */
@@ -285,6 +286,13 @@ struct vioif_softc {
unsigned int sc_tx_csum:1;
unsigned int sc_tx_tso4:1;
+ /*
+ * For debugging, it is useful to know whether the MAC address we
+ * are using came from the host (via VIRTIO_NET_CONFIG_MAC) or
+ * was otherwise generated or set from within the guest.
+ */
+ unsigned int sc_mac_from_host:1;
+
int sc_mtu;
uint8_t sc_mac[ETHERADDRL];
/*
@@ -312,7 +320,10 @@ struct vioif_softc {
/* Copying small packets turns out to be faster then mapping them. */
unsigned long sc_rxcopy_thresh;
unsigned long sc_txcopy_thresh;
- /* Some statistic coming here */
+
+ /*
+ * Statistics visible through mac:
+ */
uint64_t sc_ipackets;
uint64_t sc_opackets;
uint64_t sc_rbytes;
@@ -325,6 +336,18 @@ struct vioif_softc {
uint64_t sc_notxbuf;
uint64_t sc_ierrors;
uint64_t sc_oerrors;
+
+ /*
+ * Internal debugging statistics:
+ */
+ uint64_t sc_rxfail_dma_handle;
+ uint64_t sc_rxfail_dma_buffer;
+ uint64_t sc_rxfail_dma_bind;
+ uint64_t sc_rxfail_chain_undersize;
+ uint64_t sc_rxfail_no_descriptors;
+ uint64_t sc_txfail_dma_handle;
+ uint64_t sc_txfail_dma_bind;
+ uint64_t sc_txfail_indirect_limit;
};
#define ETHER_HEADER_LEN sizeof (struct ether_header)
@@ -474,8 +497,7 @@ vioif_rx_construct(void *buffer, void *user_arg, int kmflags)
if (ddi_dma_alloc_handle(sc->sc_dev, &vioif_mapped_buf_dma_attr,
DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmah)) {
- dev_err(sc->sc_dev, CE_WARN,
- "Can't allocate dma handle for rx buffer");
+ sc->sc_rxfail_dma_handle++;
goto exit_handle;
}
@@ -483,8 +505,7 @@ vioif_rx_construct(void *buffer, void *user_arg, int kmflags)
VIOIF_RX_SIZE + sizeof (struct virtio_net_hdr),
&vioif_bufattr, DDI_DMA_STREAMING, DDI_DMA_SLEEP,
NULL, &buf->rb_mapping.vbm_buf, &len, &buf->rb_mapping.vbm_acch)) {
- dev_err(sc->sc_dev, CE_WARN,
- "Can't allocate rx buffer");
+ sc->sc_rxfail_dma_buffer++;
goto exit_alloc;
}
ASSERT(len >= VIOIF_RX_SIZE);
@@ -493,8 +514,7 @@ vioif_rx_construct(void *buffer, void *user_arg, int kmflags)
buf->rb_mapping.vbm_buf, len, DDI_DMA_READ | DDI_DMA_STREAMING,
DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmac,
&buf->rb_mapping.vbm_ncookies)) {
- dev_err(sc->sc_dev, CE_WARN, "Can't bind tx buffer");
-
+ sc->sc_rxfail_dma_bind++;
goto exit_bind;
}
@@ -716,27 +736,24 @@ vioif_add_rx(struct vioif_softc *sc, int kmflag)
struct vioif_rx_buf *buf;
ve = vq_alloc_entry(sc->sc_rx_vq);
- if (!ve) {
+ if (ve == NULL) {
/*
* Out of free descriptors - ring already full.
- * It would be better to update sc_norxdescavail
- * but MAC does not ask for this info, hence we
- * update sc_norecvbuf.
*/
+ sc->sc_rxfail_no_descriptors++;
sc->sc_norecvbuf++;
goto exit_vq;
}
buf = sc->sc_rxbufs[ve->qe_index];
- if (!buf) {
+ if (buf == NULL) {
/* First run, allocate the buffer. */
buf = kmem_cache_alloc(sc->sc_rxbuf_cache, kmflag);
sc->sc_rxbufs[ve->qe_index] = buf;
}
/* Still nothing? Bye. */
- if (!buf) {
- dev_err(sc->sc_dev, CE_WARN, "Can't allocate rx buffer");
+ if (buf == NULL) {
sc->sc_norecvbuf++;
goto exit_buf;
}
@@ -789,20 +806,19 @@ static int
vioif_populate_rx(struct vioif_softc *sc, int kmflag)
{
int i = 0;
- int ret;
for (;;) {
- ret = vioif_add_rx(sc, kmflag);
- if (ret)
+ if (vioif_add_rx(sc, kmflag) != DDI_SUCCESS) {
/*
* We could not allocate some memory. Try to work with
* what we've got.
*/
break;
+ }
i++;
}
- if (i)
+ if (i != 0)
virtio_sync_vq(sc->sc_rx_vq);
return (i);
@@ -823,8 +839,7 @@ vioif_process_rx(struct vioif_softc *sc)
ASSERT(buf);
if (len < sizeof (struct virtio_net_hdr)) {
- dev_err(sc->sc_dev, CE_WARN, "RX: Cnain too small: %u",
- len - (uint32_t)sizeof (struct virtio_net_hdr));
+ sc->sc_rxfail_chain_undersize++;
sc->sc_ierrors++;
virtio_free_chain(ve);
continue;
@@ -838,7 +853,7 @@ vioif_process_rx(struct vioif_softc *sc)
*/
if (len < sc->sc_rxcopy_thresh) {
mp = allocb(len, 0);
- if (!mp) {
+ if (mp == NULL) {
sc->sc_norecvbuf++;
sc->sc_ierrors++;
@@ -855,7 +870,7 @@ vioif_process_rx(struct vioif_softc *sc)
buf->rb_mapping.vbm_buf +
sizeof (struct virtio_net_hdr) +
VIOIF_IP_ALIGN, len, 0, &buf->rb_frtn);
- if (!mp) {
+ if (mp == NULL) {
sc->sc_norecvbuf++;
sc->sc_ierrors++;
@@ -901,31 +916,32 @@ vioif_reclaim_used_tx(struct vioif_softc *sc)
struct vioif_tx_buf *buf;
uint32_t len;
mblk_t *mp;
- int i = 0;
+ unsigned chains = 0;
while ((ve = virtio_pull_chain(sc->sc_tx_vq, &len))) {
/* We don't chain descriptors for tx, so don't expect any. */
- ASSERT(!ve->qe_next);
+ ASSERT(ve->qe_next == NULL);
buf = &sc->sc_txbufs[ve->qe_index];
mp = buf->tb_mp;
buf->tb_mp = NULL;
- if (mp) {
- for (i = 0; i < buf->tb_external_num; i++)
+ if (mp != NULL) {
+ for (int i = 0; i < buf->tb_external_num; i++) {
(void) ddi_dma_unbind_handle(
buf->tb_external_mapping[i].vbm_dmah);
+ }
}
virtio_free_chain(ve);
/* External mapping used, mp was not freed in vioif_send() */
- if (mp)
+ if (mp != NULL)
freemsg(mp);
- i++;
+ chains++;
}
- if (sc->sc_tx_stopped && i) {
+ if (sc->sc_tx_stopped != 0 && chains > 0) {
sc->sc_tx_stopped = 0;
mac_tx_update(sc->sc_mac_handle);
}
@@ -962,8 +978,7 @@ vioif_tx_lazy_handle_alloc(struct vioif_softc *sc, struct vioif_tx_buf *buf,
&vioif_mapped_buf_dma_attr, DDI_DMA_SLEEP, NULL,
&buf->tb_external_mapping[i].vbm_dmah);
if (ret != DDI_SUCCESS) {
- dev_err(sc->sc_dev, CE_WARN,
- "Can't allocate dma handle for external tx buffer");
+ sc->sc_txfail_dma_handle++;
}
}
@@ -1017,17 +1032,14 @@ vioif_tx_external(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
DDI_DMA_SLEEP, NULL, &dmac, &ncookies);
if (ret != DDI_SUCCESS) {
+ sc->sc_txfail_dma_bind++;
sc->sc_oerrors++;
- dev_err(sc->sc_dev, CE_NOTE,
- "TX: Failed to bind external handle");
goto exit_bind;
}
/* Check if we still fit into the indirect table. */
if (virtio_ve_indirect_available(ve) < ncookies) {
- dev_err(sc->sc_dev, CE_NOTE,
- "TX: Indirect descriptor table limit reached."
- " It took %d fragments.", i);
+ sc->sc_txfail_indirect_limit++;
sc->sc_notxbuf++;
sc->sc_oerrors++;
@@ -1086,7 +1098,7 @@ vioif_send(struct vioif_softc *sc, mblk_t *mp)
ve = vq_alloc_entry(sc->sc_tx_vq);
- if (!ve) {
+ if (ve == NULL) {
sc->sc_notxbuf++;
/* Out of free descriptors - try later. */
return (B_FALSE);
@@ -1138,9 +1150,9 @@ vioif_send(struct vioif_softc *sc, mblk_t *mp)
/* meanwhile update the statistic */
if (mp->b_rptr[0] & 0x1) {
if (bcmp(mp->b_rptr, vioif_broadcast, ETHERADDRL) != 0)
- sc->sc_multixmt++;
- else
- sc->sc_brdcstxmt++;
+ sc->sc_multixmt++;
+ else
+ sc->sc_brdcstxmt++;
}
/*
@@ -1202,8 +1214,7 @@ vioif_start(void *arg)
{
struct vioif_softc *sc = arg;
- mac_link_update(sc->sc_mac_handle,
- vioif_link_state(sc));
+ mac_link_update(sc->sc_mac_handle, vioif_link_state(sc));
virtio_start_vq_intr(sc->sc_rx_vq);
@@ -1404,10 +1415,8 @@ vioif_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
case MAC_PROP_PRIVATE:
bzero(valstr, sizeof (valstr));
if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
-
value = sc->sc_txcopy_thresh;
- } else if (strcmp(pr_name,
- vioif_rxcopy_thresh) == 0) {
+ } else if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) {
value = sc->sc_rxcopy_thresh;
} else {
return;
@@ -1483,7 +1492,6 @@ vioif_show_features(struct vioif_softc *sc, const char *prefix,
bufp += virtio_show_features(features, bufp, bufend - bufp);
*bufp = '\0';
-
/* Using '!' to only CE_NOTE this to the system log. */
dev_err(sc->sc_dev, CE_NOTE, "!%s Vioif (%b)", buf, features,
VIRTIO_NET_FEATURE_BITS);
@@ -1512,8 +1520,8 @@ vioif_dev_features(struct vioif_softc *sc)
sc->sc_virtio.sc_features);
if (!(sc->sc_virtio.sc_features & VIRTIO_F_RING_INDIRECT_DESC)) {
- dev_err(sc->sc_dev, CE_NOTE,
- "Host does not support RING_INDIRECT_DESC, bye.");
+ dev_err(sc->sc_dev, CE_WARN,
+ "Host does not support RING_INDIRECT_DESC. Cannot attach.");
return (DDI_FAILURE);
}
@@ -1535,6 +1543,7 @@ vioif_set_mac(struct vioif_softc *sc)
virtio_write_device_config_1(&sc->sc_virtio,
VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
}
+ sc->sc_mac_from_host = 0;
}
/* Get the mac address out of the hardware, or make up one. */
@@ -1548,8 +1557,7 @@ vioif_get_mac(struct vioif_softc *sc)
&sc->sc_virtio,
VIRTIO_NET_CONFIG_MAC + i);
}
- dev_err(sc->sc_dev, CE_NOTE, "Got MAC address from host: %s",
- ether_sprintf((struct ether_addr *)sc->sc_mac));
+ sc->sc_mac_from_host = 1;
} else {
/* Get a few random bytes */
(void) random_get_pseudo_bytes(sc->sc_mac, ETHERADDRL);
@@ -1561,7 +1569,7 @@ vioif_get_mac(struct vioif_softc *sc)
vioif_set_mac(sc);
dev_err(sc->sc_dev, CE_NOTE,
- "Generated a random MAC address: %s",
+ "!Generated a random MAC address: %s",
ether_sprintf((struct ether_addr *)sc->sc_mac));
}
}
@@ -1624,7 +1632,7 @@ vioif_check_features(struct vioif_softc *sc)
if (!vioif_has_feature(sc, VIRTIO_NET_F_GUEST_CSUM)) {
sc->sc_rx_csum = 0;
}
- cmn_err(CE_NOTE, "Csum enabled.");
+ dev_err(sc->sc_dev, CE_NOTE, "!Csum enabled.");
if (vioif_has_feature(sc, VIRTIO_NET_F_HOST_TSO4)) {
@@ -1638,11 +1646,11 @@ vioif_check_features(struct vioif_softc *sc)
*/
if (!vioif_has_feature(sc, VIRTIO_NET_F_HOST_ECN)) {
dev_err(sc->sc_dev, CE_NOTE,
- "TSO4 supported, but not ECN. "
+ "!TSO4 supported, but not ECN. "
"Not using LSO.");
sc->sc_tx_tso4 = 0;
} else {
- cmn_err(CE_NOTE, "LSO enabled");
+ dev_err(sc->sc_dev, CE_NOTE, "!LSO enabled");
}
}
}
@@ -1766,7 +1774,7 @@ vioif_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
vioif_check_features(sc);
- if (vioif_alloc_mems(sc))
+ if (vioif_alloc_mems(sc) != 0)
goto exit_alloc_mems;
if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
@@ -1854,7 +1862,7 @@ vioif_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
return (DDI_FAILURE);
}
- if (sc->sc_rxloan) {
+ if (sc->sc_rxloan > 0) {
dev_err(devinfo, CE_WARN, "!Some rx buffers are still upstream,"
" not detaching.");
return (DDI_FAILURE);
diff --git a/usr/src/uts/common/io/vnd/frameio.c b/usr/src/uts/common/io/vnd/frameio.c
new file mode 100644
index 0000000000..e4e700fa12
--- /dev/null
+++ b/usr/src/uts/common/io/vnd/frameio.c
@@ -0,0 +1,464 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Frame I/O utility functions
+ */
+
+#include <sys/frameio.h>
+
+#include <sys/file.h>
+#include <sys/types.h>
+#include <sys/kmem.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/stream.h>
+#include <sys/strsun.h>
+#include <sys/sysmacros.h>
+#include <sys/inttypes.h>
+
+static kmem_cache_t *frameio_cache;
+
+int
+frameio_init(void)
+{
+ frameio_cache = kmem_cache_create("frameio_cache",
+ sizeof (frameio_t) + sizeof (framevec_t) * FRAMEIO_NVECS_MAX,
+ 0, NULL, NULL, NULL, NULL, NULL, 0);
+ if (frameio_cache == NULL)
+ return (1);
+
+ return (0);
+}
+
+void
+frameio_fini(void)
+{
+ if (frameio_cache != NULL)
+ kmem_cache_destroy(frameio_cache);
+}
+
+frameio_t *
+frameio_alloc(int kmflags)
+{
+ return (kmem_cache_alloc(frameio_cache, kmflags));
+}
+
+void
+frameio_free(frameio_t *fio)
+{
+ return (kmem_cache_free(frameio_cache, fio));
+}
+
+/*
+ * Ensure that we don't see any garbage in the framevecs that we're nominally
+ * supposed to work with. Specifically we want to make sure that the buflen and
+ * the address are not zero.
+ */
+static int
+frameio_hdr_check_vecs(frameio_t *fio)
+{
+ int i;
+ for (i = 0; i < fio->fio_nvecs; i++)
+ if (fio->fio_vecs[i].fv_buf == NULL ||
+ fio->fio_vecs[i].fv_buflen == 0)
+ return (EINVAL);
+
+ return (0);
+}
+
+/*
+ * We have to copy in framevec32_t's. To work around the data model issues and
+ * trying not to copy memory we first copy in the framevec32_t data into the
+ * standard fio_vec space. Next we work backwards copying a given framevec32_t
+ * to a temporaory framevec_t and then overwrite the frameio_t's data. Note that
+ * it is important that we do this in reverse so as to ensure that we don't
+ * clobber data as the framevec_t is larger than the framevec32_t.
+ */
+static int
+frameio_hdr_copyin_ilp32(frameio_t *fio, const void *addr)
+{
+ framevec32_t *vec32p;
+ framevec_t fv;
+ int i;
+
+ vec32p = (framevec32_t *)&fio->fio_vecs[0];
+
+ if (ddi_copyin(addr, vec32p, sizeof (framevec32_t) * fio->fio_nvecs,
+ 0) != 0)
+ return (EFAULT);
+
+ for (i = fio->fio_nvecs - 1; i >= 0; i--) {
+ fv.fv_buf = (void *)(uintptr_t)vec32p[i].fv_buf;
+ fv.fv_buflen = vec32p[i].fv_buflen;
+ fv.fv_actlen = vec32p[i].fv_actlen;
+ fio->fio_vecs[i].fv_buf = fv.fv_buf;
+ fio->fio_vecs[i].fv_buflen = fv.fv_buflen;
+ fio->fio_vecs[i].fv_actlen = fv.fv_actlen;
+ }
+
+ return (frameio_hdr_check_vecs(fio));
+}
+
+/*
+ * Copy in a frame io header into fio with space for up to nvecs. If the frameio
+ * contains more vectors than specified it will be ignored. mode should contain
+ * information about the datamodel.
+ */
+int
+frameio_hdr_copyin(frameio_t *fio, int max_vecs, const void *addr, uint_t mode)
+{
+ int model = ddi_model_convert_from(mode & FMODELS);
+ int cpf = mode & FKIOCTL ? FKIOCTL : 0;
+ size_t fsize = model == DDI_MODEL_ILP32 ?
+ sizeof (frameio32_t) : sizeof (frameio_t);
+
+ /*
+ * The start of the header is the same in all data models for the
+ * current verison.
+ */
+ if (ddi_copyin(addr, fio, fsize, cpf) != 0)
+ return (EFAULT);
+
+ if (fio->fio_version != FRAMEIO_VERSION_ONE)
+ return (EINVAL);
+
+ if (fio->fio_nvecs > FRAMEIO_NVECS_MAX || fio->fio_nvecs == 0)
+ return (EINVAL);
+
+ if (fio->fio_nvpf == 0)
+ return (EINVAL);
+
+ if (fio->fio_nvecs % fio->fio_nvpf != 0)
+ return (EINVAL);
+
+ if (fio->fio_nvecs > max_vecs)
+ return (EOVERFLOW);
+
+ addr = (void *)((uintptr_t)addr + fsize);
+ if (model == DDI_MODEL_ILP32) {
+ if (cpf != 0)
+ return (EINVAL);
+ return (frameio_hdr_copyin_ilp32(fio, addr));
+ }
+
+ if (ddi_copyin(addr, &fio->fio_vecs[0],
+ sizeof (framevec_t) * fio->fio_nvecs, cpf) != 0)
+ return (EFAULT);
+
+ return (frameio_hdr_check_vecs(fio));
+}
+
+static mblk_t *
+frameio_allocb(size_t sz)
+{
+ mblk_t *mp;
+
+ mp = allocb(sz, 0);
+ if (mp == NULL)
+ return (NULL);
+
+ mp->b_datap->db_type = M_DATA;
+ return (mp);
+}
+
+static int
+framevec_mblk_read(framevec_t *fv, mblk_t **mpp, int cpf)
+{
+ mblk_t *mp;
+ cpf = cpf != 0 ? FKIOCTL : 0;
+
+ mp = frameio_allocb(fv->fv_buflen);
+
+ if (mp == NULL) {
+ freemsg(mp);
+ return (EAGAIN);
+ }
+
+ if (ddi_copyin(fv->fv_buf, mp->b_wptr, fv->fv_buflen,
+ cpf) != 0) {
+ freemsg(mp);
+ return (EFAULT);
+ }
+
+ mp->b_wptr += fv->fv_buflen;
+ *mpp = mp;
+ return (0);
+}
+
+/*
+ * Read a set of frame vectors that make up a single message boundary and return
+ * that as a single message in *mpp that consists of multiple data parts.
+ */
+static int
+frameio_mblk_read(frameio_t *fio, framevec_t *fv, mblk_t **mpp, int cpf)
+{
+ int nparts = fio->fio_nvpf;
+ int part, error;
+ mblk_t *mp;
+
+ *mpp = NULL;
+ cpf = cpf != 0 ? FKIOCTL : 0;
+
+ /*
+ * Construct the initial frame
+ */
+ for (part = 0; part < nparts; part++) {
+ error = framevec_mblk_read(fv, &mp, cpf);
+ if (error != 0) {
+ freemsg(*mpp);
+ return (error);
+ }
+
+ if (*mpp == NULL)
+ *mpp = mp;
+ else
+ linkb(*mpp, mp);
+ fv++;
+ }
+
+ return (0);
+}
+
+/*
+ * Read data from a series of frameio vectors into a message block chain. A
+ * given frameio request has a number of discrete messages divided into
+ * individual vectors based on fio->fio_nvcspframe. Each discrete message will
+ * be constructed into a message block chain pointed to by b_next.
+ *
+ * If we get an EAGAIN while trying to construct a given message block what we
+ * return depends on what else we've done so far. If we have succesfully
+ * completed at least one message then we free everything else we've done so
+ * far and return that. If no messages have been completed we return EAGAIN. If
+ * instead we encounter a different error, say EFAULT, then all of the fv_actlen
+ * entries values are undefined.
+ */
+int
+frameio_mblk_chain_read(frameio_t *fio, mblk_t **mpp, int *nvecs, int cpf)
+{
+ int error = ENOTSUP;
+ int nframes = fio->fio_nvecs / fio->fio_nvpf;
+ int frame;
+ framevec_t *fv;
+ mblk_t *mp, *bmp = NULL;
+
+ /*
+ * Protect against bogus kernel subsystems.
+ */
+ VERIFY(fio->fio_nvecs > 0);
+ VERIFY(fio->fio_nvecs % fio->fio_nvpf == 0);
+
+ *mpp = NULL;
+ cpf = cpf != 0 ? FKIOCTL : 0;
+
+ fv = &fio->fio_vecs[0];
+ for (frame = 0; frame < nframes; frame++) {
+ error = frameio_mblk_read(fio, fv, &mp, cpf);
+ if (error != 0)
+ goto failed;
+
+ if (bmp != NULL)
+ bmp->b_next = mp;
+ else
+ *mpp = mp;
+ bmp = mp;
+ }
+
+ *nvecs = nframes;
+ return (0);
+failed:
+ /*
+ * On EAGAIN we've already taken care of making sure that we have no
+ * leftover messages, eg. they were never linked in.
+ */
+ if (error == EAGAIN) {
+ if (frame != 0)
+ error = 0;
+ if (*nvecs != NULL)
+ *nvecs = frame;
+ ASSERT(*mpp != NULL);
+ } else {
+ for (mp = *mpp; mp != NULL; mp = bmp) {
+ bmp = mp->b_next;
+ freemsg(mp);
+ }
+ if (nvecs != NULL)
+ *nvecs = 0;
+ *mpp = NULL;
+ }
+ return (error);
+}
+
+size_t
+frameio_frame_length(frameio_t *fio, framevec_t *fv)
+{
+ int i;
+ size_t len = 0;
+
+ for (i = 0; i < fio->fio_nvpf; i++, fv++)
+ len += fv->fv_buflen;
+
+ return (len);
+}
+
+/*
+ * Write a portion of an mblk to the current.
+ */
+static int
+framevec_write_mblk_part(framevec_t *fv, mblk_t *mp, size_t len, size_t moff,
+ size_t foff, int cpf)
+{
+ ASSERT(len <= MBLKL(mp) - moff);
+ ASSERT(len <= fv->fv_buflen - fv->fv_actlen);
+ cpf = cpf != 0 ? FKIOCTL : 0;
+
+ if (ddi_copyout(mp->b_rptr + moff, fv->fv_buf + foff, len, cpf) != 0)
+ return (EFAULT);
+ fv->fv_actlen += len;
+
+ return (0);
+}
+
+/*
+ * Because copying this out to the user might fail we don't want to update the
+ * b_rptr in case we need to copy it out again.
+ */
+static int
+framevec_map_blk(frameio_t *fio, framevec_t *fv, mblk_t *mp, int cpf)
+{
+ int err;
+ size_t msize, blksize, len, moff, foff;
+
+ msize = msgsize(mp);
+ if (msize > frameio_frame_length(fio, fv))
+ return (EOVERFLOW);
+
+ moff = 0;
+ foff = 0;
+ blksize = MBLKL(mp);
+ fv->fv_actlen = 0;
+ while (msize != 0) {
+ len = MIN(blksize, fv->fv_buflen - fv->fv_actlen);
+ err = framevec_write_mblk_part(fv, mp, len, moff, foff, cpf);
+ if (err != 0)
+ return (err);
+
+ msize -= len;
+ blksize -= len;
+ moff += len;
+ foff += len;
+
+ if (blksize == 0 && msize != 0) {
+ mp = mp->b_cont;
+ ASSERT(mp != NULL);
+ moff = 0;
+ blksize = MBLKL(mp);
+ }
+
+ if (fv->fv_buflen == fv->fv_actlen && msize != 0) {
+ fv++;
+ fv->fv_actlen = 0;
+ foff = 0;
+ }
+ }
+
+ return (0);
+}
+
+int
+frameio_mblk_chain_write(frameio_t *fio, frameio_write_mblk_map_t map,
+ mblk_t *mp, int *nwrite, int cpf)
+{
+ int mcount = 0;
+ int ret = 0;
+
+ if (map != MAP_BLK_FRAME)
+ return (EINVAL);
+
+ while (mp != NULL && mcount < fio->fio_nvecs) {
+ ret = framevec_map_blk(fio, &fio->fio_vecs[mcount], mp, cpf);
+ if (ret != 0)
+ break;
+ mcount += fio->fio_nvpf;
+ mp = mp->b_next;
+ }
+
+ if (ret != 0 && mcount == 0) {
+ if (nwrite != NULL)
+ *nwrite = 0;
+ return (ret);
+ }
+
+ if (nwrite != NULL)
+ *nwrite = mcount / fio->fio_nvpf;
+
+ return (0);
+}
+
+/*
+ * Copy out nframes worth of frameio header data back to userland.
+ */
+int
+frameio_hdr_copyout(frameio_t *fio, int nframes, void *addr, uint_t mode)
+{
+ int i;
+ int model = ddi_model_convert_from(mode & FMODELS);
+ framevec32_t *vec32p;
+ framevec32_t f;
+
+ if (fio->fio_nvecs / fio->fio_nvpf < nframes)
+ return (EINVAL);
+
+ fio->fio_nvecs = nframes * fio->fio_nvpf;
+
+ if (model == DDI_MODEL_NONE) {
+ if (ddi_copyout(fio, addr,
+ sizeof (frameio_t) + fio->fio_nvecs * sizeof (framevec_t),
+ mode & FKIOCTL) != 0)
+ return (EFAULT);
+ return (0);
+ }
+
+ ASSERT(model == DDI_MODEL_ILP32);
+
+ vec32p = (framevec32_t *)&fio->fio_vecs[0];
+ for (i = 0; i < fio->fio_nvecs; i++) {
+ f.fv_buf = (caddr32_t)(uintptr_t)fio->fio_vecs[i].fv_buf;
+ if (fio->fio_vecs[i].fv_buflen > UINT_MAX ||
+ fio->fio_vecs[i].fv_actlen > UINT_MAX)
+ return (EOVERFLOW);
+ f.fv_buflen = fio->fio_vecs[i].fv_buflen;
+ f.fv_actlen = fio->fio_vecs[i].fv_actlen;
+ vec32p[i].fv_buf = f.fv_buf;
+ vec32p[i].fv_buflen = f.fv_buflen;
+ vec32p[i].fv_actlen = f.fv_actlen;
+ }
+
+ if (ddi_copyout(fio, addr,
+ sizeof (frameio32_t) + fio->fio_nvecs * sizeof (framevec32_t),
+ mode & FKIOCTL) != 0)
+ return (EFAULT);
+ return (0);
+}
+
+void
+frameio_mark_consumed(frameio_t *fio, int nframes)
+{
+ int i;
+
+ ASSERT(fio->fio_nvecs / fio->fio_nvpf >= nframes);
+ for (i = 0; i < nframes * fio->fio_nvpf; i++)
+ fio->fio_vecs[i].fv_actlen = fio->fio_vecs[i].fv_buflen;
+}
diff --git a/usr/src/uts/common/io/vnd/vnd.c b/usr/src/uts/common/io/vnd/vnd.c
new file mode 100644
index 0000000000..2abb6f9464
--- /dev/null
+++ b/usr/src/uts/common/io/vnd/vnd.c
@@ -0,0 +1,5800 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+/*
+ * vnd - virtual (machine) networking datapath
+ *
+ * vnd's purpose is to provide a highly performant data path for Layer 2 network
+ * traffic and exist side by side an active IP netstack, each servicing
+ * different datalinks. vnd provides many of the same capabilities as the
+ * current TCP/IP stack does and some specific to layer two. Specifically:
+ *
+ * o Use of the DLD fastpath
+ * o Packet capture hooks
+ * o Ability to use hardware capabilities
+ * o Useful interfaces for handling multiple frames
+ *
+ * The following image shows where vnd fits into today's networking stack:
+ *
+ * +---------+----------+----------+
+ * | libdlpi | libvnd | libsocket|
+ * +---------+----------+----------+
+ * | · · VFS |
+ * | VFS · VFS +----------+
+ * | · | sockfs |
+ * +---------+----------+----------+
+ * | | VND | IP |
+ * | +----------+----------+
+ * | DLD/DLS |
+ * +-------------------------------+
+ * | MAC |
+ * +-------------------------------+
+ * | GLDv3 |
+ * +-------------------------------+
+ *
+ * -----------------------------------------
+ * A Tale of Two Devices - DDI Device Basics
+ * -----------------------------------------
+ *
+ * vnd presents itself to userland as a character device; however, it also is a
+ * STREAMS device so that it can interface with dld and the rest of the
+ * networking stack. Users never interface with the STREAMs devices directly and
+ * they are purely an implementation detail of vnd. Opening the STREAMS device
+ * require kcred and as such userland cannot interact with it or push it onto
+ * the stream head.
+ *
+ * The main vnd character device, /dev/vnd/ctl, is a self-cloning device. Every
+ * clone gets its own minor number; however, minor nodes are not created in the
+ * devices tree for these instances. In this state a user may do two different
+ * things. They may issue ioctls that affect global state or they may issue
+ * ioctls that try to attach it to a given datalink. Once a minor device has
+ * been attached to a datalink, all operations on it are scoped to that context,
+ * therefore subsequent global operations are not permitted.
+ *
+ * A given device can be linked into the /devices and /dev name space via a link
+ * ioctl. That ioctl causes a minor node to be created in /devices and then it
+ * will also appear under /dev/vnd/ due to vnd's sdev plugin. This is similar
+ * to, but simpler than, IP's persistence mechanism.
+ *
+ * ---------------------
+ * Binding to a datalink
+ * ---------------------
+ *
+ * Datalinks are backed by the dld (datalink device) and dls (datalink services)
+ * drivers. These drivers provide a STREAMS device for datalinks on the system
+ * which are exposed through /dev/net. Userland generally manipulates datalinks
+ * through libdlpi. When an IP interface is being plumbed up what actually
+ * happens is that someone does a dlpi_open(3DLPI) of the underlying datalink
+ * and then pushes on the ip STREAMS module with an I_PUSH ioctl. Modules may
+ * then can negotiate with dld and dls to obtain access to various capabilities
+ * and fast paths via a series of STREAMS messages.
+ *
+ * In vnd, we do the same thing, but we leave our STREAMS module as an
+ * implementation detail of the system. We don't want users to be able to
+ * arbitrarily push vnd STREAMS module onto any stream, so we explicitly require
+ * kcred to manipulate it. Thus, when a user issues a request to attach a
+ * datalink to a minor instance of the character device, that vnd minor instance
+ * itself does a layered open (ldi_open_by_name(9F)) of the specified datalink.
+ * vnd does that open using the passed in credentials from the ioctl, not kcred.
+ * This ensures that users who doesn't have permissions to open the device
+ * cannot. Once that's been opened, we push on the vnd streams module.
+ *
+ * Once the vnd STREAMS instance has been created for this device, eg. the
+ * I_PUSH ioctl returns, we explicitly send a STREAMS ioctl
+ * (VND_STRIOC_ASSOCIATE) to associate the vnd STREAMS and character devices.
+ * This association begins the STREAM device's initialization. We start up an
+ * asynchronous state machine that takes care of all the different aspects of
+ * plumbing up the device with dld and dls and enabling the MAC fast path. We
+ * need to guarantee to consumers of the character device that by the time their
+ * ioctl returns, the data path has been fully initialized.
+ *
+ * The state progression is fairly linear. There are two general steady states.
+ * The first is VND_S_ONLINE, which means that everything is jacked up and good
+ * to go. The alternative is VND_S_ZOMBIE, which means that the streams device
+ * encountered an error or we have finished tearing it down and the character
+ * device can clean it up. The following is our state progression and the
+ * meaning of each state:
+ *
+ * |
+ * |
+ * V
+ * +---------------+
+ * | VNS_S_INITIAL | This is our initial state. Every
+ * +---------------+ vnd STREAMS device starts here.
+ * | While in this state, only dlpi
+ * | M_PROTO and M_IOCTL messages can be
+ * | sent or received. All STREAMS based
+ * | data messages are dropped.
+ * | We transition out of this state by
+ * | sending a DL_INFO_REQ to obtain
+ * | information about the underlying
+ * | link.
+ * v
+ * +-----------------+
+ * +--<-| VNS_S_INFO_SENT | In this state, we verify and
+ * | +-----------------+ record information about the
+ * | | underlying device. If the device is
+ * | | not suitable, eg. not of type
+ * v | DL_ETHER, then we immediately
+ * | | become a ZOMBIE. To leave this
+ * | | state we request exclusive active
+ * | | access to the device via
+ * v | DL_EXCLUSIVE_REQ.
+ * | v
+ * | +----------------------+
+ * +--<-| VNS_S_EXCLUSIVE_SENT | In this state, we verify whether
+ * | +----------------------+ or not we were able to obtain
+ * | | | exclusive access to the device. If
+ * | | | we were not able to, then we leave,
+ * v | | as that means that something like
+ * | | | IP is already plumbed up on top of
+ * | | | the datalink. We leave this state
+ * | | | by progressing through to the
+ * | | | appropriate DLPI primitive, either
+ * v | | DLPI_ATTACH_REQ or DLPI_BIND_REQ
+ * | | | depending on the style of the
+ * | | | datalink.
+ * | | v
+ * | | +-------------------+
+ * +------ |--<-| VNS_S_ATTACH_SENT | In this state, we verify we were
+ * | | +-------------------+ able to perform a standard DLPI
+ * | | | attach and if so, go ahead and
+ * v | | send a DLPI_BIND_REQ.
+ * | v v
+ * | +-------------------+
+ * +--<-| VNS_S_BIND_SENT | In this state we see the result of
+ * | +-------------------+ our attempt to bind to PPA 0 of the
+ * v | underlying device. Because we're
+ * | | trying to be a layer two datapath,
+ * | | the specific attachment point isn't
+ * | | too important as we're going to
+ * v | have to enable promiscuous mode. We
+ * | | transition out of this by sending
+ * | | our first of three promiscuous mode
+ * | | requests.
+ * v v
+ * | +------------------------+
+ * +--<-| VNS_S_SAP_PROMISC_SENT | In this state we verify that we
+ * | +------------------------+ were able to enable promiscuous
+ * | | mode at the physical level. We
+ * | | transition out of this by enabling
+ * | | multicast and broadcast promiscuous
+ * v | mode.
+ * | v
+ * | +--------------------------+
+ * +--<-| VNS_S_MULTI_PROMISC_SENT | In this state we verify that we
+ * | +--------------------------+ have enabled DL_PROMISC_MULTI and
+ * v | move onto the second promiscuous
+ * | | mode request.
+ * | v
+ * | +----------------------------+
+ * +--<-| VNS_S_RX_ONLY_PROMISC_SENT | In this state we verify that we
+ * | +----------------------------+ enabled RX_ONLY promiscuous mode.
+ * | | We specifically do this as we don't
+ * v | want to receive our own traffic
+ * | | that we'll send out. We leave this
+ * | | state by enabling the final flag
+ * | | DL_PROMISC_FIXUPS.
+ * | v
+ * | +--------------------------+
+ * +--<-| VNS_S_FIXUP_PROMISC_SENT | In this state we verify that we
+ * | +--------------------------+ enabled FIXUP promiscuous mode.
+ * | | We specifically do this as we need
+ * v | to ensure that traffic which is
+ * | | received by being looped back to us
+ * | | correctly has checksums fixed. We
+ * | | leave this state by requesting the
+ * | | dld/dls capabilities that we can
+ * v | process.
+ * | v
+ * | +--------------------+
+ * +--<-| VNS_S_CAPAB_Q_SENT | We loop over the set of
+ * | +--------------------+ capabilities that dld advertised
+ * | | and enable the ones that currently
+ * v | support for use. See the section
+ * | | later on regarding capabilities
+ * | | for more information. We leave this
+ * | | state by sending an enable request.
+ * v v
+ * | +--------------------+
+ * +--<-| VNS_S_CAPAB_E_SENT | Here we finish all capability
+ * | +--------------------+ initialization. Once finished, we
+ * | | transition to the next state. If
+ * v | the dld fast path is not available,
+ * | | we become a zombie.
+ * | v
+ * | +--------------+
+ * | | VNS_S_ONLINE | This is a vnd STREAMS device's
+ * | +--------------+ steady state. It will normally
+ * | | reside in this state while it is in
+ * | | active use. It will only transition
+ * v | to the next state when the STREAMS
+ * | | device is closed by the character
+ * | | device. In this state, all data
+ * | | flows over the dld fast path.
+ * | v
+ * | +---------------------+
+ * +--->| VNS_S_SHUTTING_DOWN | This vnd state takes care of
+ * | +---------------------+ disabling capabilities and
+ * | | flushing all data. At this point
+ * | | any additional data that we receive
+ * | | will be dropped. We leave this
+ * v | state by trying to remove multicast
+ * | | promiscuity.
+ * | |
+ * | v
+ * | +---------------------------------+
+ * +-->| VNS_S_MULTICAST_PROMISCOFF_SENT | In this state, we check if we have
+ * | +---------------------------------+ successfully removed multicast
+ * | | promiscuous mode. If we have
+ * | | failed, we still carry on but only
+ * | | warn. We leave this state by trying
+ * | | to disable SAP level promiscuous
+ * | | mode.
+ * | v
+ * | +---------------------------+
+ * +-->| VNS_S_SAP_PROMISCOFF_SENT | In this state, we check if we have
+ * | +---------------------------+ successfully removed SAP level
+ * | | promiscuous mode. If we have
+ * | | failed, we still carry on but only
+ * | | warn. Note that we don't worry
+ * | | about either of
+ * | | DL_PROMISC_FIXUPS or
+ * | | DL_PROMISC_RX_ONLY. If these are
+ * | | the only two entries left, then we
+ * | | should have anything that MAC is
+ * | | doing for us at this point,
+ * | | therefore it's safe for us to
+ * | | proceed to unbind, which is how we
+ * | | leave this state via a
+ * | v DL_UNBIND_REQ.
+ * | +-------------------+
+ * +--->| VNS_S_UNBIND_SENT | Here, we check how the unbind
+ * | +-------------------+ request went. Regardless of its
+ * | | success, we always transition to
+ * | | a zombie state.
+ * | v
+ * | +--------------+
+ * +--->| VNS_S_ZOMBIE | In this state, the vnd STREAMS
+ * +--------------+ device is waiting to finish being
+ * reaped. Because we have no more
+ * ways to receive data it should be
+ * safe to destroy all remaining data
+ * structures.
+ *
+ * If the stream association fails for any reason the state machine reaches
+ * VNS_S_ZOMBIE. A more detailed vnd_errno_t will propagate back through the
+ * STREAMS ioctl to the character device. That will fail the user ioctl and
+ * propagate the vnd_errno_t back to userland. If, on the other hand, the
+ * association succeeds, then the vnd STREAMS device will be fully plumbed up
+ * and ready to transmit and receive message blocks. Consumers will be able to
+ * start using the other cbops(9E) entry points once the attach has fully
+ * finished, which will occur after the original user attach ioctl to the
+ * character device returns.
+ *
+ * It's quite important that we end up sending the full series of STREAMS
+ * messages when tearing down. While it's tempting to say that we should just
+ * rely on the STREAMS device being closed to properly ensure that we have no
+ * more additional data, that's not sufficient due to our use of direct
+ * callbacks. DLS does not ensure that by the time we change the direct
+ * callback (vnd_mac_input) that all callers to it will have been quiesced.
+ * However, it does guarantee that if we disable promiscuous mode ourselves and
+ * we turn off the main data path via DL_UNBIND_REQ that it will work.
+ * Therefore, we make sure to do this ourselves rather than letting DLS/DLD do
+ * it as part of tearing down the STREAMS device. This ensures that we'll
+ * quiesce all data before we destroy our data structures and thus we should
+ * eliminate the race in changing the data function.
+ *
+ * --------------------
+ * General Architecture
+ * --------------------
+ *
+ * There are several different devices and structures in the vnd driver. There
+ * is a per-netstack component, pieces related to the character device that
+ * consumers see, the internal STREAMS device state, and the data queues
+ * themselves. The following ASCII art picture describes their relationships and
+ * some of the major pieces of data that contain them. These are not exhaustive,
+ * e.g. synchronization primitives are left out.
+ *
+ * +----------------+ +-----------------+
+ * | global | | global |
+ * | device list | | netstack list |
+ * | vnd_dev_list | | vnd_nsd_list |
+ * +----------------+ +-----------------+
+ * | |
+ * | v
+ * | +-------------------+ +-------------------+
+ * | | per-netstack data | ---> | per-netstack data | --> ...
+ * | | vnd_pnsd_t | | vnd_pnsd_t |
+ * | | | +-------------------+
+ * | | |
+ * | | nestackid_t ---+----> Netstack ID
+ * | | vnd_pnsd_flags_t -+----> Status flags
+ * | | zoneid_t ---+----> Zone ID for this netstack
+ * | | hook_family_t ---+----> VND IPv4 Hooks
+ * | | hook_family_t ---+----> VND IPv6 Hooks
+ * | | list_t ----+ |
+ * | +------------+------+
+ * | |
+ * | v
+ * | +------------------+ +------------------+
+ * | | character device | ---> | character device | -> ...
+ * +---------->| vnd_dev_t | | vnd_dev_t |
+ * | | +------------------+
+ * | |
+ * | minor_t ---+--> device minor number
+ * | ldi_handle_t ---+--> handle to /dev/net/%datalink
+ * | vnd_dev_flags_t -+--> device flags, non blocking, etc.
+ * | char[] ---+--> name if linked
+ * | vnd_str_t * -+ |
+ * +--------------+---+
+ * |
+ * v
+ * +-------------------------+
+ * | STREAMS device |
+ * | vnd_str_t |
+ * | |
+ * | vnd_str_state_t ---+---> State machine state
+ * | gsqueue_t * ---+---> mblk_t Serialization queue
+ * | vnd_str_stat_t ---+---> per-device kstats
+ * | vnd_str_capab_t ---+----------------------------+
+ * | vnd_data_queue_t ---+ | |
+ * | vnd_data_queue_t -+ | | v
+ * +-------------------+-+---+ +---------------------+
+ * | | | Stream capabilities |
+ * | | | vnd_str_capab_t |
+ * | | | |
+ * | | supported caps <--+-- vnd_capab_flags_t |
+ * | | dld cap handle <--+-- void * |
+ * | | direct tx func <--+-- vnd_dld_tx_t |
+ * | | +---------------------+
+ * | |
+ * +----------------+ +-------------+
+ * | |
+ * v v
+ * +-------------------+ +-------------------+
+ * | Read data queue | | Write data queue |
+ * | vnd_data_queue_t | | vnd_data_queue_t |
+ * | | | |
+ * | size_t ----+--> Current size | size_t ----+--> Current size
+ * | size_t ----+--> Max size | size_t ----+--> Max size
+ * | mblk_t * ----+--> Queue head | mblk_t * ----+--> Queue head
+ * | mblk_t * ----+--> Queue tail | mblk_t * ----+--> Queue tail
+ * +-------------------+ +-------------------+
+ *
+ *
+ * Globally, we maintain two lists. One list contains all of the character
+ * device soft states. The other maintains a list of all our netstack soft
+ * states. Each netstack maintains a list of active devices that have been
+ * associated with a datalink in its netstack.
+ *
+ * Recall that a given minor instance of the character device exists in one of
+ * two modes. It can either be a cloned open of /dev/vnd/ctl, the control node,
+ * or it can be associated with a given datalink. When minor instances are in
+ * the former state, they do not exist in a given vnd_pnsd_t's list of devices.
+ * As part of attaching to a datalink, the given vnd_dev_t will be inserted into
+ * the appropriate vnd_pnsd_t. In addition, this will cause a STREAMS device, a
+ * vnd_str_t, to be created and associated to a vnd_dev_t.
+ *
+ * The character device, and its vnd_dev_t, is the interface to the rest of the
+ * system. The vnd_dev_t keeps track of various aspects like whether various
+ * operations, such as read, write and the frameio ioctls, are considered
+ * blocking or non-blocking in the O_NONBLOCK sense. It also is responsible for
+ * keeping track of things like the name of the device, if any, in /dev. The
+ * vnd_str_t, on the other hand manages aspects like buffer sizes and the actual
+ * data queues. However, ioctls that manipulate these properties all go through
+ * the vnd_dev_t to its associated vnd_str_t.
+ *
+ * Each of the STREAMS devices, the vnd_str_t, maintains two data queues. One
+ * for frames to transmit (write queue) and one for frames received (read
+ * queue). These data queues have a maximum size and attempting to add data
+ * beyond that maximum size will result in data being dropped. The sizes are
+ * configurable via ioctls VND_IOC_SETTXBUF, VND_IOC_SETRXBUF. Data either sits
+ * in those buffers or has a reservation in those buffers while they are in vnd
+ * and waiting to be consumed by the user or by mac.
+ *
+ * Finally, the vnd_str_t also has a vnd_str_capab_t which we use to manage the
+ * available, negotiated, and currently active features.
+ *
+ * ----------------------
+ * Data Path and gsqueues
+ * ----------------------
+ *
+ * There's a lot of plumbing in vnd to get to the point where we can send data,
+ * but vnd's bread and butter is the data path, so it's worth diving into it in
+ * more detail. Data enters and exits the system from two ends.
+ *
+ * The first end is the vnd consumer. This comes in the form of read and write
+ * system calls as well as the frame I/O ioctls. The read and write system calls
+ * operate on a single frame at a time. Think of a frame as a single message
+ * that has come in off the wire, which may itself comprise multiple mblk_t's
+ * linked together in the kernel. readv(2) and writev(2) have the same
+ * limitations as read(2) and write(2). We enforce this as the system is
+ * required to fill up every uio(9S) buffer before moving onto the next one.
+ * This means that if you have a MTU sized buffer and two frames come in which
+ * are less than half of the MTU they must fill up the given iovec. Even if we
+ * didn't want to do this, we have no way of informing the supplier of the
+ * iovecs that they were only partially filled or where one frame ends and
+ * another begins. That's life, as such we have frame I/O which solves this
+ * problem. It allows for multiple frames to be consumed as well as for frames
+ * to be broken down into multiple vector components.
+ *
+ * The second end is the mac direct calls. As part of negotiating capabilities
+ * via dld, we give mac a function of ours to call when packets are received
+ * [vnd_mac_input()] and a callback to indicate that flow has been restored
+ * [vnd_mac_flow_control()]. In turn, we also get a function pointer that we can
+ * transmit data with. As part of the contract with mac, mac is allowed to flow
+ * control us by returning a cookie to the transmit function. When that happens,
+ * all outbound traffic is halted until our callback function is called and we
+ * can schedule drains.
+ *
+ * It's worth looking at these in further detail. We'll start with the rx path.
+ *
+ *
+ * |
+ * * . . . packets from gld
+ * |
+ * v
+ * +-------------+
+ * | mac |
+ * +-------------+
+ * |
+ * v
+ * +-------------+
+ * | dld |
+ * +-------------+
+ * |
+ * * . . . dld direct callback
+ * |
+ * v
+ * +---------------+
+ * | vnd_mac_input |
+ * +---------------+
+ * |
+ * v
+ * +---------+ +-------------+
+ * | dropped |<--*---------| vnd_hooks |
+ * | by | . +-------------+
+ * | hooks | . drop probe |
+ * +---------+ kstat bump * . . . Do we have free
+ * | buffer space?
+ * |
+ * no . | . yes
+ * . + .
+ * +---*--+------*-------+
+ * | |
+ * * . . drop probe * . . recv probe
+ * | kstat bump | kstat bump
+ * v |
+ * +---------+ * . . fire pollin
+ * | freemsg | v
+ * +---------+ +-----------------------+
+ * | vnd_str_t`vns_dq_read |
+ * +-----------------------+
+ * ^ ^
+ * +----------+ | | +---------+
+ * | read(9E) |-->-+ +--<--| frameio |
+ * +----------+ +---------+
+ *
+ * The rx path is rather linear. Packets come into us from mac. We always run
+ * them through the various hooks, and if they come out of that, we inspect the
+ * read data queue. If there is not enough space for a packet, we drop it.
+ * Otherwise, we append it to the data queue, and fire read notifications
+ * targetting anyone polling or doing blocking I/O on this device. Those
+ * consumers then drain the head of the data queue.
+ *
+ * The tx path is more complicated due to mac flow control. After any call into
+ * mac, we may have to potentially suspend writes and buffer data for an
+ * arbitrary amount of time. As such, we need to carefully track the total
+ * amount of outstanding data so that we don't waste kernel memory. This is
+ * further complicated by the fact that mac will asynchronously tell us when our
+ * flow has been resumed.
+ *
+ * For data to be able to enter the system, it needs to be able to take a
+ * reservation from the write data queue. Once the reservation has been
+ * obtained, we enter the gsqueue so that we can actually append it. We use
+ * gsqueues (serialization queues) to ensure that packets are manipulated in
+ * order as we deal with the draining and appending packets. We also leverage
+ * its worker thread to help us do draining after mac has restorted our flow.
+ *
+ * The following image describes the flow:
+ *
+ * +-----------+ +--------------+ +-------------------------+ +------+
+ * | write(9E) |-->| Space in the |--*--->| gsqueue_enter_one() |-->| Done |
+ * | frameio | | write queue? | . | +->vnd_squeue_tx_append | +------+
+ * +-----------+ +--------------+ . +-------------------------+
+ * | ^ .
+ * | | . reserve space from gsqueue
+ * | | |
+ * queue . . . * | space v
+ * full | * . . . avail +------------------------+
+ * v | | vnd_squeue_tx_append() |
+ * +--------+ +------------+ +------------------------+
+ * | EAGAIN |<--*------| Non-block? |<-+ |
+ * +--------+ . +------------+ | v
+ * . yes v | wait +--------------+
+ * no . .* * . . for | append chain |
+ * +----+ space | to outgoing |
+ * | mblk chain |
+ * from gsqueue +--------------+
+ * | |
+ * | +-------------------------------------------------+
+ * | |
+ * | | yes . . .
+ * v v .
+ * +-----------------------+ +--------------+ . +------+
+ * | vnd_squeue_tx_drain() |--->| mac blocked? |----*---->| Done |
+ * +-----------------------+ +--------------+ +------+
+ * | |
+ * +---------------------------------|---------------------+
+ * | | tx |
+ * | no . . * queue . . *
+ * | flow controlled . | empty * . fire pollout
+ * | . v | if mblk_t's
+ * +-------------+ . +---------------------+ | sent
+ * | set blocked |<----*------| vnd_squeue_tx_one() |--------^-------+
+ * | flags | +---------------------+ |
+ * +-------------+ More data | | | More data |
+ * and limit ^ v * . . and limit ^
+ * not reached . . * | | reached |
+ * +----+ | |
+ * v |
+ * +----------+ +-------------+ +---------------------------+
+ * | mac flow |--------->| remove mac |--->| gsqueue_enter_one() with |
+ * | control | | block flags | | vnd_squeue_tx_drain() and |
+ * | callback | +-------------+ | GSQUEUE_FILL flag, iff |
+ * +----------+ | not already scheduled |
+ * +---------------------------+
+ *
+ * The final path taken for a given write(9E)/frameio ioctl depends on whether
+ * or not the vnd_dev_t is non-blocking. That controls the initial path of
+ * trying to take a reservation in write data queue. If the device is in
+ * non-blocking mode, we'll return EAGAIN when there is not enough space
+ * available, otherwise, the calling thread blocks on the data queue.
+ *
+ * Today when we call into vnd_squeue_tx_drain() we will not try to drain the
+ * entire queue, as that could be quite large and we don't want to necessarily
+ * keep the thread that's doing the drain until it's been finished. Not only
+ * could more data be coming in, but the draining thread could be a userland
+ * thread that has more work to do. We have two limits today. There is an upper
+ * bound on the total amount of data and the total number of mblk_t chains. If
+ * we hit either limit, then we will schedule another drain in the gsqueue and
+ * go from there.
+ *
+ * It's worth taking some time to describe how we interact with gsqueues. vnd
+ * has a gsqueue_set_t for itself. It's important that it has its own set, as
+ * the profile of work that vnd does is different from other sub-systems in the
+ * kernel. When we open a STREAMS device in vnd_s_open, we get a random gsqueue.
+ * Unlike TCP/IP which uses an gsqueue for per TCP connection, we end up
+ * maintaining one for a given device. Because of that, we want to use a
+ * pseudo-random one to try and spread out the load, and picking one at random
+ * is likely to be just as good as any fancy algorithm we might come up with,
+ * especially as any two devices could have radically different transmit
+ * profiles.
+ *
+ * While some of the write path may seem complicated, it does allow us to
+ * maintain an important property. Once we have acknowledged a write(9E) or
+ * frameio ioctl, we will not drop the packet, excepting something like ipf via
+ * the firewall hooks.
+ *
+ * There is one other source of flow control that can exist in the system which
+ * is in the form of a barrier. The barrier is an internal mechanism used for
+ * ensuring that an gsqueue is drained for a given device. We use this as part
+ * of tearing down. Specifically we disable the write path so nothing new can be
+ * inserted into the gsqueue and then insert a barrier block. Once the barrier
+ * block comes out of the gsqueue, then we know nothing else in the gsqueue that
+ * could refer to the vnd_str_t, being destroyed, exists.
+ *
+ * ---------------------
+ * vnd, zones, netstacks
+ * ---------------------
+ *
+ * vnd devices are scoped to datalinks and datalinks are scoped to a netstack.
+ * Because of that, vnd is also a netstack module. It registers with the
+ * netstack sub-system and receives callbacks every time a netstack is created,
+ * being shutdown, and destroyed. The netstack callbacks drive the creation and
+ * destruction of the vnd_pnsd_t structures.
+ *
+ * Recall from the earlier architecture diagrams that every vnd device is scoped
+ * to a netstack and known about by a given vnd_pnsd_t. When that netstack is
+ * torn down, we also tear down any vnd devices that are hanging around. When
+ * the netstack is torn down, we know that any zones that are scoped to that
+ * netstack are being shut down and have no processes remaining. This is going
+ * to be the case whether they are shared or exclusive stack zones. We have to
+ * perform a careful dance.
+ *
+ * There are two different callbacks that happen on tear down, the first is a
+ * shutdown callback, the second is a destroy callback. When the shutdown
+ * callback is fired we need to prepare for the netstack to go away and ensure
+ * that nothing can continue to persist itself.
+ *
+ * More specifically, when we get notice of a stack being shutdown we first
+ * remove the netstack from the global netstack list to ensure that no one new
+ * can come in and find the netstack and get a reference to it. After that, we
+ * notify the neti hooks that they're going away. Once that's all done, we get
+ * to the heart of the matter.
+ *
+ * When shutting down there could be any number of outstanding contexts that
+ * have a reference on the vnd_pnsd_t and on the individual links. However, we
+ * know that no one new will be able to find the vnd_pnsd_t. To account for
+ * things that have existing references we mark the vnd_pnsd_t`vpnd_flags with
+ * VND_NS_CONDEMNED. This is checked by code paths that wish to append a device
+ * to the netstack's list. If this is set, then they must not append to it.
+ * Once this is set, we know that the netstack's list of devices can never grow,
+ * only shrink.
+ *
+ * Next, for each device we tag it with VND_D_ZONE_DYING. This indicates that
+ * the container for the device is being destroyed and that we should not allow
+ * additional references to the device to be created, whether via open, or
+ * linking. The presence of this bit also allows things like the list ioctl and
+ * sdev to know not to consider its existence. At the conclusion of this being
+ * set, we know that no one else should be able to obtain a new reference to the
+ * device.
+ *
+ * Once that has been set for all devices, we go through and remove any existing
+ * links that have been established in sdev. Because doing that may cause the
+ * final reference for the device to be dropped, which still has a reference to
+ * the netstack, we have to restart our walk due to dropped locks. We know that
+ * this walk will eventually complete because the device cannot be relinked and
+ * no new devices will be attached in this netstack due to VND_NS_CONDEMNED.
+ * Once that's finished, the shutdown callback returns.
+ *
+ * When we reach the destroy callback, we simply wait for references on the
+ * netstack to disappear. Because the zone has been shut down, all processes in
+ * it that have open references have been terminated and reaped. Any threads
+ * that are newly trying to reference it will fail. However, there is one thing
+ * that can halt this that we have no control over, which is the global zone
+ * holding open a reference to the device. In this case the zone halt will hang
+ * in vnd_stack_destroy. Once the last references is dropped we finish destroy
+ * the netinfo hooks and free the vnd_pnsd_t.
+ *
+ * ----
+ * sdev
+ * ----
+ *
+ * vnd registers a sdev plugin which allows it to dynamically fill out /dev/vnd
+ * for both the global and non-global zones. In any given zone we always supply
+ * a control node via /dev/vnd/ctl. This is the self-cloning node. Each zone
+ * will also have an entry per-link in that zone under /dev/vnd/%datalink, eg.
+ * if a link was named net0, there would be a /dev/vnd/net0. The global zone can
+ * also see every link for every zone, ala /dev/net, under
+ * /dev/vnd/%zonename/%datalink, eg. if a zone named 'turin' had a vnd device
+ * named net0, the global zone would have /dev/vnd/turin/net0.
+ *
+ * The sdev plugin has three interfaces that it supplies back to sdev. One is to
+ * validate that a given node is still valid. The next is a callback from sdev
+ * to say that it is no longer using the node. The third and final one is from
+ * sdev where it asks us to fill a directory. All of the heavy lifting is done
+ * in directory filling and in valiation. We opt not to maintain a reference on
+ * the device while there is an sdev node present. This makes the removal of
+ * nodes much simpler and most of the possible failure modes shouldn't cause any
+ * real problems. For example, the open path has to handle both dev_t's which no
+ * longer exist and which are no longer linked.
+ *
+ * -----
+ * hooks
+ * -----
+ *
+ * Like IP, vnd sends all L3 packets through its firewall hooks. Currently vnd
+ * provides these for L3 IP and IPv6 traffic. Each netstack provides these hooks
+ * in a minimal fashion. While we will allow traffic to be filtered through the
+ * hooks, we do not provide means for packet injection or additional inspection
+ * at this time. There are a total of four different events created:
+ *
+ * o IPv4 physical in
+ * o IPv4 physical out
+ * o IPv6 physical in
+ * o IPv6 physical out
+ *
+ * ---------------
+ * Synchronization
+ * ---------------
+ *
+ * To make our synchronization simpler, we've put more effort into making the
+ * metadata/setup paths do more work. That work allows the data paths to make
+ * assumptions around synchronization that simplify the general case. Each major
+ * structure, the vnd_pnsd_t, vnd_dev_t, vnd_str_t, and vnd_data_queue_t is
+ * annotated with the protection that its members receives. The following
+ * annotations are used:
+ *
+ * A Atomics; these values are only modified using atomics values.
+ * Currently this only applies to kstat values.
+ * E Existence; no lock is needed to access this member, it does not
+ * change while the structure is valid.
+ * GL Global Lock; these members are protected by the global
+ * vnd_dev_lock.
+ * L Locked; access to the member is controlled by a lock that is in
+ * the structure.
+ * NSL netstack lock; this member is protected by the containing
+ * netstack. This only applies to the vnd_dev_t`vdd_nslink.
+ * X This member is special, and is discussed in this section.
+ *
+ * In addition to locking, we also have reference counts on the vnd_dev_t and
+ * the vnd_pnsd_t. The reference counts describe the lifetimes of the structure.
+ * With rare exception, once a reference count is decremented, the consumer
+ * should not assume that the data is valid any more. The only exception to this
+ * is the case where we're removing an extant reference count from a link into
+ * /devices or /dev. Reference counts are obtained on these structures as a part
+ * of looking them up.
+ *
+ * # Global Lock Ordering
+ * ######################
+ *
+ * The following is the order that you must take locks in vnd:
+ *
+ * 1) vnd`vnd_dev_lock
+ * 2) vnd_pnsd_t`vpnd_lock
+ * 3) vnd_dev_t`vnd_lock
+ * 4) vnd_str_t`vns_lock
+ * 5) vnd_data_queue_t`vdq_lock
+ *
+ * One must adhere to the following rules:
+ *
+ * o You must acquire a lower numbered lock before a high numbered lock.
+ * o It is NOT legal to hold two locks of the same level concurrently, eg. you
+ * can not hold two different vnd_dev_t's vnd_lock at the same time.
+ * o You may release locks in any order.
+ * o If you release a lock, you must honor the locking rules before acquiring
+ * it again.
+ * o You should not hold any locks when calling any of the rele functions.
+ *
+ * # Special Considerations
+ * ########################
+ *
+ * While most of the locking is what's expected, it's worth going into the
+ * special nature that a few members hold. Today, only two structures have
+ * special considerations: the vnd_dev_t and the vnd_str_t. All members with
+ * special considerations have an additional annotation that describes how you
+ * should interact with it.
+ *
+ * vnd_dev_t: The vdd_nsd and vdd_cr are only valid when the minor node is
+ * attached or in the process of attaching. If the code path that goes through
+ * requires an attached vnd_dev_t, eg. the data path and tear down path, then it
+ * is always legal to dereference that member without a lock held. When they are
+ * added to the system, they should be done under the vdd_lock and done as part
+ * of setting the VND_D_ATTACH_INFLIGHT flag. These should not change during the
+ * lifetime of the vnd_dev_t.
+ *
+ * vnd_dev_t: The vdd_ldih is similar to the vdd_nsd and vdd_cr, except that it
+ * always exists as it is a part of the structure. The only time that it's valid
+ * to be using it is during the attach path with the VND_D_ATTACH_INFLIGHT flag
+ * set or during tear down. Outside of those paths which are naturally
+ * serialized, there is no explicit locking around the member.
+ *
+ * vnd_str_t: The vns_dev and vns_nsd work in similar ways. They are not
+ * initially set as part of creating the structure, but are set as part of
+ * responding to the association ioctl. Anything in the data path or metadata
+ * path that requires association may assume that they exist, as we do not kick
+ * off the state machine until they're set.
+ *
+ * vnd_str_t: The vns_drainblk and vns_barrierblk are similarly special. The
+ * members are designed to be used as part of various operations with the
+ * gsqueues. A lock isn't needed to use them, but to work with them, the
+ * appropriate flag in the vnd_str_t`vns_flags must have been set by the current
+ * thread. Otherwise, it is always fair game to refer to their addresses. Their
+ * contents are ignored by vnd, but some members are manipulated by the gsqueue
+ * subsystem.
+ */
+
+#include <sys/conf.h>
+#include <sys/devops.h>
+#include <sys/modctl.h>
+#include <sys/stat.h>
+#include <sys/file.h>
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/open.h>
+#include <sys/ddi.h>
+#include <sys/ethernet.h>
+#include <sys/stropts.h>
+#include <sys/sunddi.h>
+#include <sys/stream.h>
+#include <sys/strsun.h>
+#include <sys/ksynch.h>
+#include <sys/taskq_impl.h>
+#include <sys/sdt.h>
+#include <sys/debug.h>
+#include <sys/sysmacros.h>
+#include <sys/dlpi.h>
+#include <sys/cred.h>
+#include <sys/id_space.h>
+#include <sys/list.h>
+#include <sys/ctype.h>
+#include <sys/policy.h>
+#include <sys/sunldi.h>
+#include <sys/cred.h>
+#include <sys/strsubr.h>
+#include <sys/poll.h>
+#include <sys/neti.h>
+#include <sys/hook.h>
+#include <sys/hook_event.h>
+#include <sys/vlan.h>
+#include <sys/dld.h>
+#include <sys/mac_client.h>
+#include <sys/netstack.h>
+#include <sys/fs/sdev_plugin.h>
+#include <sys/kstat.h>
+#include <sys/atomic.h>
+#include <sys/disp.h>
+#include <sys/random.h>
+#include <sys/gsqueue.h>
+
+#include <inet/ip.h>
+#include <inet/ip6.h>
+
+#include <sys/vnd.h>
+
+/*
+ * Globals
+ */
+static dev_info_t *vnd_dip;
+static taskq_t *vnd_taskq;
+static kmem_cache_t *vnd_str_cache;
+static kmem_cache_t *vnd_dev_cache;
+static kmem_cache_t *vnd_pnsd_cache;
+static id_space_t *vnd_minors;
+static int vnd_list_init = 0;
+static sdev_plugin_hdl_t vnd_sdev_hdl;
+static gsqueue_set_t *vnd_sqset;
+
+static kmutex_t vnd_dev_lock;
+static list_t vnd_dev_list; /* Protected by the vnd_dev_lock */
+static list_t vnd_nsd_list; /* Protected by the vnd_dev_lock */
+
+/*
+ * STREAMs ioctls
+ *
+ * The STREAMs ioctls are internal to vnd. No one should be seeing them, as such
+ * they aren't a part of the header file.
+ */
+#define VND_STRIOC (('v' << 24) | ('n' << 16) | ('d' << 8) | 0x80)
+
+/*
+ * Private ioctl to associate a given streams instance with a minor instance of
+ * the character device.
+ */
+#define VND_STRIOC_ASSOCIATE (VND_STRIOC | 0x1)
+
+typedef struct vnd_strioc_associate {
+ minor_t vsa_minor; /* minor device node */
+ netstackid_t vsa_nsid; /* netstack id */
+ vnd_errno_t vsa_errno; /* errno */
+} vnd_strioc_associate_t;
+
+typedef enum vnd_strioc_state {
+ VSS_UNKNOWN = 0,
+ VSS_COPYIN = 1,
+ VSS_COPYOUT = 2,
+} vnd_strioc_state_t;
+
+typedef struct vnd_strioc {
+ vnd_strioc_state_t vs_state;
+ caddr_t vs_addr;
+} vnd_strioc_t;
+
+/*
+ * VND SQUEUE TAGS, start at 0x42 so we don't overlap with extent tags. Though
+ * really, overlap is at the end of the day, inevitable.
+ */
+#define VND_SQUEUE_TAG_TX_DRAIN 0x42
+#define VND_SQUEUE_TAG_MAC_FLOW_CONTROL 0x43
+#define VND_SQUEUE_TAG_VND_WRITE 0x44
+#define VND_SQUEUE_TAG_ND_FRAMEIO_WRITE 0x45
+#define VND_SQUEUE_TAG_STRBARRIER 0x46
+
+/*
+ * vnd reserved names. These are names which are reserved by vnd and thus
+ * shouldn't be used by some external program.
+ */
+static char *vnd_reserved_names[] = {
+ "ctl",
+ "zone",
+ NULL
+};
+
+/*
+ * vnd's DTrace probe macros
+ *
+ * DTRACE_VND* are all for a stable provider. We also have an unstable internal
+ * set of probes for reference count manipulation.
+ */
+#define DTRACE_VND3(name, type1, arg1, type2, arg2, type3, arg3) \
+ DTRACE_PROBE3(__vnd_##name, type1, arg1, type2, arg2, type3, arg3);
+
+#define DTRACE_VND4(name, type1, arg1, type2, arg2, type3, arg3, type4, arg4) \
+ DTRACE_PROBE4(__vnd_##name, type1, arg1, type2, arg2, type3, arg3, \
+ type4, arg4);
+
+#define DTRACE_VND5(name, type1, arg1, type2, arg2, type3, arg3, \
+ type4, arg4, type5, arg5) \
+ DTRACE_PROBE5(__vnd_##name, type1, arg1, type2, arg2, type3, arg3, \
+ type4, arg4, type5, arg5);
+
+#define DTRACE_VND_REFINC(vdp) \
+ DTRACE_PROBE2(vnd__ref__inc, vnd_dev_t *, vdp, int, vdp->vdd_ref);
+#define DTRACE_VND_REFDEC(vdp) \
+ DTRACE_PROBE2(vnd__ref__dec, vnd_dev_t *, vdp, int, vdp->vdd_ref);
+
+
+/*
+ * Tunables
+ */
+size_t vnd_vdq_default_size = 1024 * 64; /* 64 KB */
+size_t vnd_vdq_hard_max = 1024 * 1024 * 4; /* 4 MB */
+
+/*
+ * These numbers are designed as per-device tunables that are applied when a new
+ * vnd device is attached. They're a rough stab at what may be a reasonable
+ * amount of work to do in one burst in an squeue.
+ */
+size_t vnd_flush_burst_size = 1520 * 10; /* 10 1500 MTU packets */
+size_t vnd_flush_nburst = 10; /* 10 frames */
+
+/*
+ * Constants related to our sdev plugins
+ */
+#define VND_SDEV_NAME "vnd"
+#define VND_SDEV_ROOT "/dev/vnd"
+#define VND_SDEV_ZROOT "/dev/vnd/zone"
+
+/*
+ * Statistic macros
+ */
+#define VND_STAT_INC(vsp, field, val) \
+ atomic_add_64(&(vsp)->vns_ksdata.field.value.ui64, val)
+#define VND_LATENCY_1MS 1000000
+#define VND_LATENCY_10MS 10000000
+#define VND_LATENCY_100MS 100000000
+#define VND_LATENCY_1S 1000000000
+#define VND_LATENCY_10S 10000000000
+
+/*
+ * Constants for vnd hooks
+ */
+static uint8_t vnd_bcast_addr[6] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
+#define IPV4_MCAST_LEN 3
+static uint8_t vnd_ipv4_mcast[3] = { 0x01, 0x00, 0x5E };
+#define IPV6_MCAST_LEN 2
+static uint8_t vnd_ipv6_mcast[2] = { 0x33, 0x33 };
+
+/*
+ * vnd internal data structures and types
+ */
+
+struct vnd_str;
+struct vnd_dev;
+struct vnd_pnsd;
+
+/*
+ * As part of opening the device stream we need to properly communicate with our
+ * underlying stream. This is a bit of an asynchronous dance and we need to
+ * properly work with dld to get everything set up. We have to initiate the
+ * conversation with dld and as such we keep track of our state here.
+ */
+typedef enum vnd_str_state {
+ VNS_S_INITIAL = 0,
+ VNS_S_INFO_SENT,
+ VNS_S_EXCLUSIVE_SENT,
+ VNS_S_ATTACH_SENT,
+ VNS_S_BIND_SENT,
+ VNS_S_SAP_PROMISC_SENT,
+ VNS_S_MULTI_PROMISC_SENT,
+ VNS_S_RX_ONLY_PROMISC_SENT,
+ VNS_S_FIXUP_PROMISC_SENT,
+ VNS_S_CAPAB_Q_SENT,
+ VNS_S_CAPAB_E_SENT,
+ VNS_S_ONLINE,
+ VNS_S_SHUTTING_DOWN,
+ VNS_S_MULTICAST_PROMISCOFF_SENT,
+ VNS_S_SAP_PROMISCOFF_SENT,
+ VNS_S_UNBIND_SENT,
+ VNS_S_ZOMBIE
+} vnd_str_state_t;
+
+typedef enum vnd_str_flags {
+ VNS_F_NEED_ZONE = 0x1,
+ VNS_F_TASKQ_DISPATCHED = 0x2,
+ VNS_F_CONDEMNED = 0x4,
+ VNS_F_FLOW_CONTROLLED = 0x8,
+ VNS_F_DRAIN_SCHEDULED = 0x10,
+ VNS_F_BARRIER = 0x20,
+ VNS_F_BARRIER_DONE = 0x40
+} vnd_str_flags_t;
+
+typedef enum vnd_capab_flags {
+ VNS_C_HCKSUM = 0x1,
+ VNS_C_DLD = 0x2,
+ VNS_C_DIRECT = 0x4,
+ VNS_C_HCKSUM_BADVERS = 0x8
+} vnd_capab_flags_t;
+
+/*
+ * Definitions to interact with direct callbacks
+ */
+typedef void (*vnd_rx_t)(struct vnd_str *, mac_resource_t *, mblk_t *,
+ mac_header_info_t *);
+typedef uintptr_t vnd_mac_cookie_t;
+/* DLD Direct capability function */
+typedef int (*vnd_dld_cap_t)(void *, uint_t, void *, uint_t);
+/* DLD Direct tx function */
+typedef vnd_mac_cookie_t (*vnd_dld_tx_t)(void *, mblk_t *, uint64_t, uint16_t);
+/* DLD Direct function to set flow control callback */
+typedef void *(*vnd_dld_set_fcb_t)(void *, void (*)(void *, vnd_mac_cookie_t),
+ void *);
+/* DLD Direct function to see if flow controlled still */
+typedef int (*vnd_dld_is_fc_t)(void *, vnd_mac_cookie_t);
+
+/*
+ * The vnd_str_capab_t is always protected by the vnd_str_t it's a member of.
+ */
+typedef struct vnd_str_capab {
+ vnd_capab_flags_t vsc_flags;
+ t_uscalar_t vsc_hcksum_opts;
+ vnd_dld_cap_t vsc_capab_f;
+ void *vsc_capab_hdl;
+ vnd_dld_tx_t vsc_tx_f;
+ void *vsc_tx_hdl;
+ vnd_dld_set_fcb_t vsc_set_fcb_f;
+ void *vsc_set_fcb_hdl;
+ vnd_dld_is_fc_t vsc_is_fc_f;
+ void *vsc_is_fc_hdl;
+ vnd_mac_cookie_t vsc_fc_cookie;
+ void *vsc_tx_fc_hdl;
+} vnd_str_capab_t;
+
+/*
+ * The vnd_data_queue is a simple construct for storing a series of messages in
+ * a queue.
+ *
+ * See synchronization section of the big theory statement for member
+ * annotations.
+ */
+typedef struct vnd_data_queue {
+ struct vnd_str *vdq_vns; /* E */
+ kmutex_t vdq_lock;
+ kcondvar_t vdq_ready; /* Uses vdq_lock */
+ ssize_t vdq_max; /* L */
+ ssize_t vdq_cur; /* L */
+ mblk_t *vdq_head; /* L */
+ mblk_t *vdq_tail; /* L */
+} vnd_data_queue_t;
+
+typedef struct vnd_str_stat {
+ kstat_named_t vks_rbytes;
+ kstat_named_t vks_rpackets;
+ kstat_named_t vks_obytes;
+ kstat_named_t vks_opackets;
+ kstat_named_t vks_nhookindrops;
+ kstat_named_t vks_nhookoutdrops;
+ kstat_named_t vks_ndlpidrops;
+ kstat_named_t vks_ndataindrops;
+ kstat_named_t vks_ndataoutdrops;
+ kstat_named_t vks_tdrops;
+ kstat_named_t vks_linkname;
+ kstat_named_t vks_zonename;
+ kstat_named_t vks_nmacflow;
+ kstat_named_t vks_tmacflow;
+ kstat_named_t vks_mac_flow_1ms;
+ kstat_named_t vks_mac_flow_10ms;
+ kstat_named_t vks_mac_flow_100ms;
+ kstat_named_t vks_mac_flow_1s;
+ kstat_named_t vks_mac_flow_10s;
+} vnd_str_stat_t;
+
+/*
+ * vnd stream structure
+ *
+ * See synchronization section of the big theory statement for member
+ * annotations.
+ */
+typedef struct vnd_str {
+ kmutex_t vns_lock;
+ kcondvar_t vns_cancelcv; /* Uses vns_lock */
+ kcondvar_t vns_barriercv; /* Uses vns_lock */
+ kcondvar_t vns_stcv; /* Uses vns_lock */
+ vnd_str_state_t vns_state; /* L */
+ vnd_str_state_t vns_laststate; /* L */
+ vnd_errno_t vns_errno; /* L */
+ vnd_str_flags_t vns_flags; /* L */
+ vnd_str_capab_t vns_caps; /* L */
+ taskq_ent_t vns_tqe; /* L */
+ vnd_data_queue_t vns_dq_read; /* E */
+ vnd_data_queue_t vns_dq_write; /* E */
+ mblk_t *vns_dlpi_inc; /* L */
+ queue_t *vns_rq; /* E */
+ queue_t *vns_wq; /* E */
+ queue_t *vns_lrq; /* E */
+ t_uscalar_t vns_dlpi_style; /* L */
+ t_uscalar_t vns_minwrite; /* L */
+ t_uscalar_t vns_maxwrite; /* L */
+ hrtime_t vns_fclatch; /* L */
+ hrtime_t vns_fcupdate; /* L */
+ kstat_t *vns_kstat; /* E */
+ gsqueue_t *vns_squeue; /* E */
+ mblk_t vns_drainblk; /* E + X */
+ mblk_t vns_barrierblk; /* E + X */
+ vnd_str_stat_t vns_ksdata; /* A */
+ size_t vns_nflush; /* L */
+ size_t vns_bsize; /* L */
+ struct vnd_dev *vns_dev; /* E + X */
+ struct vnd_pnsd *vns_nsd; /* E + X */
+} vnd_str_t;
+
+typedef enum vnd_dev_flags {
+ VND_D_ATTACH_INFLIGHT = 0x001,
+ VND_D_ATTACHED = 0x002,
+ VND_D_LINK_INFLIGHT = 0x004,
+ VND_D_LINKED = 0x008,
+ VND_D_CONDEMNED = 0x010,
+ VND_D_ZONE_DYING = 0x020,
+ VND_D_OPENED = 0x040
+} vnd_dev_flags_t;
+
+/*
+ * This represents the data associated with a minor device instance.
+ *
+ * See synchronization section of the big theory statement for member
+ * annotations.
+ */
+typedef struct vnd_dev {
+ kmutex_t vdd_lock;
+ list_node_t vdd_link; /* GL */
+ list_node_t vdd_nslink; /* NSL */
+ int vdd_ref; /* L */
+ vnd_dev_flags_t vdd_flags; /* L */
+ minor_t vdd_minor; /* E */
+ dev_t vdd_devid; /* E */
+ ldi_ident_t vdd_ldiid; /* E */
+ ldi_handle_t vdd_ldih; /* X */
+ cred_t *vdd_cr; /* X */
+ vnd_str_t *vdd_str; /* L */
+ struct pollhead vdd_ph; /* E */
+ struct vnd_pnsd *vdd_nsd; /* E + X */
+ char vdd_datalink[VND_NAMELEN]; /* L */
+ char vdd_lname[VND_NAMELEN]; /* L */
+} vnd_dev_t;
+
+typedef enum vnd_pnsd_flags {
+ VND_NS_CONDEMNED = 0x1
+} vnd_pnsd_flags_t;
+
+/*
+ * Per netstack data structure.
+ *
+ * See synchronization section of the big theory statement for member
+ * annotations.
+ */
+typedef struct vnd_pnsd {
+ list_node_t vpnd_link; /* protected by global dev lock */
+ zoneid_t vpnd_zid; /* E */
+ netstackid_t vpnd_nsid; /* E */
+ boolean_t vpnd_hooked; /* E */
+ net_handle_t vpnd_neti_v4; /* E */
+ hook_family_t vpnd_family_v4; /* E */
+ hook_event_t vpnd_event_in_v4; /* E */
+ hook_event_t vpnd_event_out_v4; /* E */
+ hook_event_token_t vpnd_token_in_v4; /* E */
+ hook_event_token_t vpnd_token_out_v4; /* E */
+ net_handle_t vpnd_neti_v6; /* E */
+ hook_family_t vpnd_family_v6; /* E */
+ hook_event_t vpnd_event_in_v6; /* E */
+ hook_event_t vpnd_event_out_v6; /* E */
+ hook_event_token_t vpnd_token_in_v6; /* E */
+ hook_event_token_t vpnd_token_out_v6; /* E */
+ kmutex_t vpnd_lock; /* Protects remaining members */
+ kcondvar_t vpnd_ref_change; /* Uses vpnd_lock */
+ int vpnd_ref; /* L */
+ vnd_pnsd_flags_t vpnd_flags; /* L */
+ list_t vpnd_dev_list; /* L */
+} vnd_pnsd_t;
+
+static void vnd_squeue_tx_drain(void *, mblk_t *, gsqueue_t *, void *);
+
+/*
+ * Drop function signature.
+ */
+typedef void (*vnd_dropper_f)(vnd_str_t *, mblk_t *, const char *);
+
+static void
+vnd_drop_ctl(vnd_str_t *vsp, mblk_t *mp, const char *reason)
+{
+ DTRACE_VND4(drop__ctl, mblk_t *, mp, vnd_str_t *, vsp, mblk_t *,
+ mp, const char *, reason);
+ if (mp != NULL) {
+ freemsg(mp);
+ }
+ VND_STAT_INC(vsp, vks_ndlpidrops, 1);
+ VND_STAT_INC(vsp, vks_tdrops, 1);
+}
+
+static void
+vnd_drop_in(vnd_str_t *vsp, mblk_t *mp, const char *reason)
+{
+ DTRACE_VND4(drop__in, mblk_t *, mp, vnd_str_t *, vsp, mblk_t *,
+ mp, const char *, reason);
+ if (mp != NULL) {
+ freemsg(mp);
+ }
+ VND_STAT_INC(vsp, vks_ndataindrops, 1);
+ VND_STAT_INC(vsp, vks_tdrops, 1);
+}
+
+static void
+vnd_drop_out(vnd_str_t *vsp, mblk_t *mp, const char *reason)
+{
+ DTRACE_VND4(drop__out, mblk_t *, mp, vnd_str_t *, vsp, mblk_t *,
+ mp, const char *, reason);
+ if (mp != NULL) {
+ freemsg(mp);
+ }
+ VND_STAT_INC(vsp, vks_ndataoutdrops, 1);
+ VND_STAT_INC(vsp, vks_tdrops, 1);
+}
+
+static void
+vnd_drop_hook_in(vnd_str_t *vsp, mblk_t *mp, const char *reason)
+{
+ DTRACE_VND4(drop__in, mblk_t *, mp, vnd_str_t *, vsp, mblk_t *,
+ mp, const char *, reason);
+ if (mp != NULL) {
+ freemsg(mp);
+ }
+ VND_STAT_INC(vsp, vks_nhookindrops, 1);
+ VND_STAT_INC(vsp, vks_tdrops, 1);
+}
+
+static void
+vnd_drop_hook_out(vnd_str_t *vsp, mblk_t *mp, const char *reason)
+{
+ DTRACE_VND4(drop__out, mblk_t *, mp, vnd_str_t *, vsp, mblk_t *,
+ mp, const char *, reason);
+ if (mp != NULL) {
+ freemsg(mp);
+ }
+ VND_STAT_INC(vsp, vks_nhookoutdrops, 1);
+ VND_STAT_INC(vsp, vks_tdrops, 1);
+}
+
+static void
+vnd_drop_panic(vnd_str_t *vsp, mblk_t *mp, const char *reason)
+{
+ panic("illegal vnd drop");
+}
+
+static void
+vnd_mac_drop_input(vnd_str_t *vsp, mac_resource_t *unused, mblk_t *mp_chain,
+ mac_header_info_t *mhip)
+{
+ mblk_t *mp;
+
+ while (mp_chain != NULL) {
+ mp = mp_chain;
+ mp_chain = mp->b_next;
+ vnd_drop_hook_in(vsp, mp, "stream not associated");
+ }
+}
+
+static vnd_pnsd_t *
+vnd_nsd_lookup(netstackid_t nsid)
+{
+ vnd_pnsd_t *nsp;
+
+ mutex_enter(&vnd_dev_lock);
+ for (nsp = list_head(&vnd_nsd_list); nsp != NULL;
+ nsp = list_next(&vnd_nsd_list, nsp)) {
+ if (nsp->vpnd_nsid == nsid) {
+ mutex_enter(&nsp->vpnd_lock);
+ VERIFY(nsp->vpnd_ref >= 0);
+ nsp->vpnd_ref++;
+ mutex_exit(&nsp->vpnd_lock);
+ break;
+ }
+ }
+ mutex_exit(&vnd_dev_lock);
+ return (nsp);
+}
+
+static vnd_pnsd_t *
+vnd_nsd_lookup_by_zid(zoneid_t zid)
+{
+ netstack_t *ns;
+ vnd_pnsd_t *nsp;
+ ns = netstack_find_by_zoneid(zid);
+ if (ns == NULL)
+ return (NULL);
+ nsp = vnd_nsd_lookup(ns->netstack_stackid);
+ netstack_rele(ns);
+ return (nsp);
+}
+
+static vnd_pnsd_t *
+vnd_nsd_lookup_by_zonename(char *zname)
+{
+ zone_t *zonep;
+ vnd_pnsd_t *nsp;
+
+ zonep = zone_find_by_name(zname);
+ if (zonep == NULL)
+ return (NULL);
+
+ nsp = vnd_nsd_lookup_by_zid(zonep->zone_id);
+ zone_rele(zonep);
+ return (nsp);
+}
+
+static void
+vnd_nsd_ref(vnd_pnsd_t *nsp)
+{
+ mutex_enter(&nsp->vpnd_lock);
+ /*
+ * This can only be used on something that has been obtained through
+ * some other means. As such, the caller should already have a reference
+ * before adding another one. This function should not be used as a
+ * means of creating the initial reference.
+ */
+ VERIFY(nsp->vpnd_ref > 0);
+ nsp->vpnd_ref++;
+ mutex_exit(&nsp->vpnd_lock);
+ cv_broadcast(&nsp->vpnd_ref_change);
+}
+
+static void
+vnd_nsd_rele(vnd_pnsd_t *nsp)
+{
+ mutex_enter(&nsp->vpnd_lock);
+ VERIFY(nsp->vpnd_ref > 0);
+ nsp->vpnd_ref--;
+ mutex_exit(&nsp->vpnd_lock);
+ cv_broadcast(&nsp->vpnd_ref_change);
+}
+
+static vnd_dev_t *
+vnd_dev_lookup(minor_t m)
+{
+ vnd_dev_t *vdp;
+ mutex_enter(&vnd_dev_lock);
+ for (vdp = list_head(&vnd_dev_list); vdp != NULL;
+ vdp = list_next(&vnd_dev_list, vdp)) {
+ if (vdp->vdd_minor == m) {
+ mutex_enter(&vdp->vdd_lock);
+ VERIFY(vdp->vdd_ref > 0);
+ vdp->vdd_ref++;
+ DTRACE_VND_REFINC(vdp);
+ mutex_exit(&vdp->vdd_lock);
+ break;
+ }
+ }
+ mutex_exit(&vnd_dev_lock);
+ return (vdp);
+}
+
+static void
+vnd_dev_free(vnd_dev_t *vdp)
+{
+ /*
+ * When the STREAM exists we need to go through and make sure
+ * communication gets torn down. As part of closing the stream, we
+ * guarantee that nothing else should be able to enter the stream layer
+ * at this point. That means no one should be able to call
+ * read(),write() or one of the frameio ioctls.
+ */
+ if (vdp->vdd_flags & VND_D_ATTACHED) {
+ ldi_close(vdp->vdd_ldih, FREAD | FWRITE, vdp->vdd_cr);
+ crfree(vdp->vdd_cr);
+ vdp->vdd_cr = NULL;
+
+ /*
+ * We have to remove ourselves from our parents list now. It is
+ * really quite important that we have already set the condemend
+ * flag here so that our containing netstack basically knows
+ * that we're on the way down and knows not to wait for us. It's
+ * also important that we do that before we put a rele on the
+ * the device as that is the point at which it will check again.
+ */
+ mutex_enter(&vdp->vdd_nsd->vpnd_lock);
+ list_remove(&vdp->vdd_nsd->vpnd_dev_list, vdp);
+ mutex_exit(&vdp->vdd_nsd->vpnd_lock);
+ vnd_nsd_rele(vdp->vdd_nsd);
+ vdp->vdd_nsd = NULL;
+ }
+ ASSERT(vdp->vdd_flags & VND_D_CONDEMNED);
+ id_free(vnd_minors, vdp->vdd_minor);
+ mutex_destroy(&vdp->vdd_lock);
+ kmem_cache_free(vnd_dev_cache, vdp);
+}
+
+static void
+vnd_dev_ref(vnd_dev_t *vdp)
+{
+ mutex_enter(&vdp->vdd_lock);
+ VERIFY(vdp->vdd_ref > 0);
+ vdp->vdd_ref++;
+ DTRACE_VND_REFINC(vdp);
+ mutex_exit(&vdp->vdd_lock);
+}
+
+/*
+ * As part of releasing the hold on this we may tear down a given vnd_dev_t As
+ * such we need to make sure that we grab the list lock first before grabbing
+ * the vnd_dev_t's lock to ensure proper lock ordering.
+ */
+static void
+vnd_dev_rele(vnd_dev_t *vdp)
+{
+ mutex_enter(&vnd_dev_lock);
+ mutex_enter(&vdp->vdd_lock);
+ VERIFY(vdp->vdd_ref > 0);
+ vdp->vdd_ref--;
+ DTRACE_VND_REFDEC(vdp);
+ if (vdp->vdd_ref > 0) {
+ mutex_exit(&vdp->vdd_lock);
+ mutex_exit(&vnd_dev_lock);
+ return;
+ }
+
+ /*
+ * Now that we've removed this from the list, we can go ahead and
+ * drop the list lock. No one else can find this device and reference
+ * it. As its reference count is zero, it by definition does not have
+ * any remaining entries in /devices that could lead someone back to
+ * this.
+ */
+ vdp->vdd_flags |= VND_D_CONDEMNED;
+ list_remove(&vnd_dev_list, vdp);
+ mutex_exit(&vdp->vdd_lock);
+ mutex_exit(&vnd_dev_lock);
+
+ vnd_dev_free(vdp);
+}
+
+/*
+ * Insert a mesage block chain if there's space, otherwise drop it. Return one
+ * so someone who was waiting for data would now end up having found it. eg.
+ * caller should consider a broadcast.
+ */
+static int
+vnd_dq_push(vnd_data_queue_t *vqp, mblk_t *mp, boolean_t reserved,
+ vnd_dropper_f dropf)
+{
+ size_t msize;
+
+ ASSERT(MUTEX_HELD(&vqp->vdq_lock));
+ if (reserved == B_FALSE) {
+ msize = msgsize(mp);
+ if (vqp->vdq_cur + msize > vqp->vdq_max) {
+ dropf(vqp->vdq_vns, mp, "buffer full");
+ return (0);
+ }
+ vqp->vdq_cur += msize;
+ }
+
+ if (vqp->vdq_head == NULL) {
+ ASSERT(vqp->vdq_tail == NULL);
+ vqp->vdq_head = mp;
+ vqp->vdq_tail = mp;
+ } else {
+ vqp->vdq_tail->b_next = mp;
+ vqp->vdq_tail = mp;
+ }
+
+ return (1);
+}
+
+/*
+ * Remove a message message block chain. If the amount of space in the buffer
+ * has changed we return 1. We have no way of knowing whether or not there is
+ * enough space overall for a given writer who is blocked, so we always end up
+ * having to return true and thus tell consumers that they should consider
+ * signalling.
+ */
+static int
+vnd_dq_pop(vnd_data_queue_t *vqp, mblk_t **mpp)
+{
+ size_t msize;
+ mblk_t *mp;
+
+ ASSERT(MUTEX_HELD(&vqp->vdq_lock));
+ ASSERT(mpp != NULL);
+ if (vqp->vdq_head == NULL) {
+ ASSERT(vqp->vdq_tail == NULL);
+ *mpp = NULL;
+ return (0);
+ }
+
+ mp = vqp->vdq_head;
+ msize = msgsize(mp);
+
+ vqp->vdq_cur -= msize;
+ if (mp->b_next == NULL) {
+ vqp->vdq_head = NULL;
+ vqp->vdq_tail = NULL;
+ /*
+ * We can't be certain that this is always going to be zero.
+ * Someone may have basically taken a reservation of space on
+ * the data queue, eg. claimed spae but not yet pushed it on
+ * yet.
+ */
+ ASSERT(vqp->vdq_cur >= 0);
+ } else {
+ vqp->vdq_head = mp->b_next;
+ ASSERT(vqp->vdq_cur > 0);
+ }
+ mp->b_next = NULL;
+ *mpp = mp;
+ return (1);
+}
+
+/*
+ * Reserve space in the queue. This will bump up the size of the queue and
+ * entitle the user to push something on later without bumping the space.
+ */
+static int
+vnd_dq_reserve(vnd_data_queue_t *vqp, ssize_t size)
+{
+ ASSERT(MUTEX_HELD(&vqp->vdq_lock));
+ ASSERT(size >= 0);
+
+ if (size == 0)
+ return (0);
+
+ if (size + vqp->vdq_cur > vqp->vdq_max)
+ return (0);
+
+ vqp->vdq_cur += size;
+ return (1);
+}
+
+static void
+vnd_dq_unreserve(vnd_data_queue_t *vqp, ssize_t size)
+{
+ ASSERT(MUTEX_HELD(&vqp->vdq_lock));
+ ASSERT(size > 0);
+ ASSERT(size <= vqp->vdq_cur);
+
+ vqp->vdq_cur -= size;
+}
+
+static void
+vnd_dq_flush(vnd_data_queue_t *vqp, vnd_dropper_f dropf)
+{
+ mblk_t *mp, *next;
+
+ mutex_enter(&vqp->vdq_lock);
+ for (mp = vqp->vdq_head; mp != NULL; mp = next) {
+ next = mp->b_next;
+ mp->b_next = NULL;
+ dropf(vqp->vdq_vns, mp, "vnd_dq_flush");
+ }
+ vqp->vdq_cur = 0;
+ vqp->vdq_head = NULL;
+ vqp->vdq_tail = NULL;
+ mutex_exit(&vqp->vdq_lock);
+}
+
+static boolean_t
+vnd_dq_is_empty(vnd_data_queue_t *vqp)
+{
+ boolean_t ret;
+
+ mutex_enter(&vqp->vdq_lock);
+ if (vqp->vdq_head == NULL)
+ ret = B_TRUE;
+ else
+ ret = B_FALSE;
+ mutex_exit(&vqp->vdq_lock);
+
+ return (ret);
+}
+
+/*
+ * Get a network uint16_t from the message and translate it into something the
+ * host understands.
+ */
+static int
+vnd_mbc_getu16(mblk_t *mp, off_t off, uint16_t *out)
+{
+ size_t mpsize;
+ uint8_t *bp;
+
+ mpsize = msgsize(mp);
+ /* Check for overflow */
+ if (off + sizeof (uint16_t) > mpsize)
+ return (1);
+
+ mpsize = MBLKL(mp);
+ while (off >= mpsize) {
+ mp = mp->b_cont;
+ off -= mpsize;
+ mpsize = MBLKL(mp);
+ }
+
+ /*
+ * Data is in network order. Note the second byte of data might be in
+ * the next mp.
+ */
+ bp = mp->b_rptr + off;
+ *out = *bp << 8;
+ if (off + 1 == mpsize) {
+ mp = mp->b_cont;
+ bp = mp->b_rptr;
+ } else {
+ bp++;
+ }
+
+ *out |= *bp;
+ return (0);
+}
+
+/*
+ * Given an mblk chain find the mblk and address of a particular offset.
+ */
+static int
+vnd_mbc_getoffset(mblk_t *mp, off_t off, mblk_t **mpp, uintptr_t *offp)
+{
+ size_t mpsize;
+
+ if (off >= msgsize(mp))
+ return (1);
+
+ mpsize = MBLKL(mp);
+ while (off >= mpsize) {
+ mp = mp->b_cont;
+ off -= mpsize;
+ mpsize = MBLKL(mp);
+ }
+ *mpp = mp;
+ *offp = (uintptr_t)mp->b_rptr + off;
+
+ return (0);
+}
+
+/*
+ * Fetch the destination mac address. Set *dstp to that mac address. If the data
+ * is not contiguous in the first mblk_t, fill in datap and set *dstp to it.
+ */
+static int
+vnd_mbc_getdstmac(mblk_t *mp, uint8_t **dstpp, uint8_t *datap)
+{
+ int i;
+
+ if (MBLKL(mp) >= ETHERADDRL) {
+ *dstpp = mp->b_rptr;
+ return (0);
+ }
+
+ *dstpp = datap;
+ for (i = 0; i < ETHERADDRL; i += 2, datap += 2) {
+ if (vnd_mbc_getu16(mp, i, (uint16_t *)datap) != 0)
+ return (1);
+ }
+
+ return (0);
+}
+
+static int
+vnd_hook(vnd_str_t *vsp, mblk_t **mpp, net_handle_t netiv4, hook_event_t hev4,
+ hook_event_token_t hetv4, net_handle_t netiv6, hook_event_t hev6,
+ hook_event_token_t hetv6, vnd_dropper_f hdrop, vnd_dropper_f ddrop)
+{
+ uint16_t etype;
+ int vlan = 0;
+ hook_pkt_event_t info;
+ size_t offset, mblen;
+ uint8_t *dstp;
+ uint8_t dstaddr[6];
+ hook_event_t he;
+ hook_event_token_t het;
+ net_handle_t neti;
+
+ /*
+ * Before we can ask if we're interested we have to do enough work to
+ * determine the ethertype.
+ */
+
+ /* Byte 12 is either the VLAN tag or the ethertype */
+ if (vnd_mbc_getu16(*mpp, 12, &etype) != 0) {
+ ddrop(vsp, *mpp, "packet has incomplete ethernet header");
+ *mpp = NULL;
+ return (1);
+ }
+
+ if (etype == ETHERTYPE_VLAN) {
+ vlan = 1;
+ /* Actual ethertype is another four bytes in */
+ if (vnd_mbc_getu16(*mpp, 16, &etype) != 0) {
+ ddrop(vsp, *mpp,
+ "packet has incomplete ethernet vlan header");
+ *mpp = NULL;
+ return (1);
+ }
+ offset = sizeof (struct ether_vlan_header);
+ } else {
+ offset = sizeof (struct ether_header);
+ }
+
+ /*
+ * At the moment we only hook on the kinds of things that the IP module
+ * would normally.
+ */
+ if (etype != ETHERTYPE_IP && etype != ETHERTYPE_IPV6)
+ return (0);
+
+ if (etype == ETHERTYPE_IP) {
+ neti = netiv4;
+ he = hev4;
+ het = hetv4;
+ } else {
+ neti = netiv6;
+ he = hev6;
+ het = hetv6;
+ }
+
+ if (!he.he_interested)
+ return (0);
+
+
+ if (vnd_mbc_getdstmac(*mpp, &dstp, dstaddr) != 0) {
+ ddrop(vsp, *mpp, "packet has incomplete ethernet header");
+ *mpp = NULL;
+ return (1);
+ }
+
+ /*
+ * Now that we know we're interested, we have to do some additional
+ * sanity checking for IPF's sake, ala ip_check_length(). Specifically
+ * we need to check to make sure that the remaining packet size,
+ * excluding MAC, is at least the size of an IP header.
+ */
+ mblen = msgsize(*mpp);
+ if ((etype == ETHERTYPE_IP &&
+ mblen - offset < IP_SIMPLE_HDR_LENGTH) ||
+ (etype == ETHERTYPE_IPV6 && mblen - offset < IPV6_HDR_LEN)) {
+ ddrop(vsp, *mpp, "packet has invalid IP header");
+ *mpp = NULL;
+ return (1);
+ }
+
+ info.hpe_protocol = neti;
+ info.hpe_ifp = (phy_if_t)vsp;
+ info.hpe_ofp = (phy_if_t)vsp;
+ info.hpe_mp = mpp;
+ info.hpe_flags = 0;
+
+ if (bcmp(vnd_bcast_addr, dstp, ETHERADDRL) == 0)
+ info.hpe_flags |= HPE_BROADCAST;
+ else if (etype == ETHERTYPE_IP &&
+ bcmp(vnd_ipv4_mcast, vnd_bcast_addr, IPV4_MCAST_LEN) == 0)
+ info.hpe_flags |= HPE_MULTICAST;
+ else if (etype == ETHERTYPE_IPV6 &&
+ bcmp(vnd_ipv6_mcast, vnd_bcast_addr, IPV6_MCAST_LEN) == 0)
+ info.hpe_flags |= HPE_MULTICAST;
+
+ if (vnd_mbc_getoffset(*mpp, offset, &info.hpe_mb,
+ (uintptr_t *)&info.hpe_hdr) != 0) {
+ ddrop(vsp, *mpp, "packet too small -- "
+ "unable to find payload");
+ *mpp = NULL;
+ return (1);
+ }
+
+ if (hook_run(neti->netd_hooks, het, (hook_data_t)&info) != 0) {
+ hdrop(vsp, *mpp, "drooped by hooks");
+ return (1);
+ }
+
+ return (0);
+}
+
+/*
+ * This should not be used for DL_INFO_REQ.
+ */
+static mblk_t *
+vnd_dlpi_alloc(size_t len, t_uscalar_t prim)
+{
+ mblk_t *mp;
+ mp = allocb(len, BPRI_MED);
+ if (mp == NULL)
+ return (NULL);
+
+ mp->b_datap->db_type = M_PROTO;
+ mp->b_wptr = mp->b_rptr + len;
+ bzero(mp->b_rptr, len);
+ ((dl_unitdata_req_t *)mp->b_rptr)->dl_primitive = prim;
+
+ return (mp);
+}
+
+static void
+vnd_dlpi_inc_push(vnd_str_t *vsp, mblk_t *mp)
+{
+ mblk_t **mpp;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ ASSERT(mp->b_next == NULL);
+ mpp = &vsp->vns_dlpi_inc;
+ while (*mpp != NULL)
+ mpp = &((*mpp)->b_next);
+ *mpp = mp;
+}
+
+static mblk_t *
+vnd_dlpi_inc_pop(vnd_str_t *vsp)
+{
+ mblk_t *mp;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ mp = vsp->vns_dlpi_inc;
+ if (mp != NULL) {
+ VERIFY(mp->b_next == NULL || mp->b_next != mp);
+ vsp->vns_dlpi_inc = mp->b_next;
+ mp->b_next = NULL;
+ }
+ return (mp);
+}
+
+static int
+vnd_st_sinfo(vnd_str_t *vsp)
+{
+ mblk_t *mp;
+ dl_info_req_t *dlir;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ mp = allocb(MAX(sizeof (dl_info_req_t), sizeof (dl_info_ack_t)),
+ BPRI_HI);
+ if (mp == NULL) {
+ vsp->vns_errno = VND_E_NOMEM;
+ return (1);
+ }
+ vsp->vns_state = VNS_S_INFO_SENT;
+ cv_broadcast(&vsp->vns_stcv);
+
+ mp->b_datap->db_type = M_PCPROTO;
+ dlir = (dl_info_req_t *)mp->b_rptr;
+ mp->b_wptr = (uchar_t *)&dlir[1];
+ dlir->dl_primitive = DL_INFO_REQ;
+ putnext(vsp->vns_wq, mp);
+
+ return (0);
+}
+
+static int
+vnd_st_info(vnd_str_t *vsp)
+{
+ dl_info_ack_t *dlia;
+ mblk_t *mp;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ mp = vnd_dlpi_inc_pop(vsp);
+ dlia = (dl_info_ack_t *)mp->b_rptr;
+ vsp->vns_dlpi_style = dlia->dl_provider_style;
+ vsp->vns_minwrite = dlia->dl_min_sdu;
+ vsp->vns_maxwrite = dlia->dl_max_sdu;
+
+ /*
+ * At this time we only support DL_ETHER devices.
+ */
+ if (dlia->dl_mac_type != DL_ETHER) {
+ freemsg(mp);
+ vsp->vns_errno = VND_E_NOTETHER;
+ return (1);
+ }
+
+ /*
+ * Because vnd operates on entire packets, we need to manually account
+ * for the ethernet header information. We add the size of the
+ * ether_vlan_header to account for this, regardless if it is using
+ * vlans or not.
+ */
+ vsp->vns_maxwrite += sizeof (struct ether_vlan_header);
+
+ freemsg(mp);
+ return (0);
+}
+
+static int
+vnd_st_sexclusive(vnd_str_t *vsp)
+{
+ mblk_t *mp;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ mp = vnd_dlpi_alloc(sizeof (dl_attach_req_t), DL_EXCLUSIVE_REQ);
+ if (mp == NULL) {
+ vsp->vns_errno = VND_E_NOMEM;
+ return (1);
+ }
+
+ vsp->vns_state = VNS_S_EXCLUSIVE_SENT;
+ cv_broadcast(&vsp->vns_stcv);
+ putnext(vsp->vns_wq, mp);
+ return (0);
+}
+
+static int
+vnd_st_exclusive(vnd_str_t *vsp)
+{
+ mblk_t *mp;
+ t_uscalar_t prim, cprim;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ mp = vnd_dlpi_inc_pop(vsp);
+ prim = ((dl_error_ack_t *)mp->b_rptr)->dl_primitive;
+ cprim = ((dl_ok_ack_t *)mp->b_rptr)->dl_correct_primitive;
+
+ if (prim != DL_OK_ACK && prim != DL_ERROR_ACK) {
+ vnd_drop_ctl(vsp, mp,
+ "wrong dlpi primitive for vnd_st_exclusive");
+ vsp->vns_errno = VND_E_DLPIINVAL;
+ return (1);
+ }
+
+ if (cprim != DL_EXCLUSIVE_REQ) {
+ vnd_drop_ctl(vsp, mp,
+ "vnd_st_exclusive: got ack/nack for wrong primitive");
+ vsp->vns_errno = VND_E_DLPIINVAL;
+ return (1);
+ }
+
+ if (prim == DL_ERROR_ACK)
+ vsp->vns_errno = VND_E_DLEXCL;
+
+ freemsg(mp);
+ return (prim == DL_ERROR_ACK);
+}
+
+/*
+ * Send down a DLPI_ATTACH_REQ.
+ */
+static int
+vnd_st_sattach(vnd_str_t *vsp)
+{
+ mblk_t *mp;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ mp = vnd_dlpi_alloc(sizeof (dl_attach_req_t), DL_ATTACH_REQ);
+ if (mp == NULL) {
+ vsp->vns_errno = VND_E_NOMEM;
+ return (1);
+ }
+
+ ((dl_attach_req_t *)mp->b_rptr)->dl_ppa = 0;
+ vsp->vns_state = VNS_S_ATTACH_SENT;
+ cv_broadcast(&vsp->vns_stcv);
+ putnext(vsp->vns_wq, mp);
+
+ return (0);
+}
+
+static int
+vnd_st_attach(vnd_str_t *vsp)
+{
+ mblk_t *mp;
+ t_uscalar_t prim, cprim;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ mp = vnd_dlpi_inc_pop(vsp);
+ prim = ((dl_ok_ack_t *)mp->b_rptr)->dl_primitive;
+ cprim = ((dl_ok_ack_t *)mp->b_rptr)->dl_correct_primitive;
+
+
+ if (prim != DL_OK_ACK && prim != DL_ERROR_ACK) {
+ vnd_drop_ctl(vsp, mp, "vnd_st_attach: unknown primitive type");
+ vsp->vns_errno = VND_E_DLPIINVAL;
+ return (1);
+ }
+
+ if (cprim != DL_ATTACH_REQ) {
+ vnd_drop_ctl(vsp, mp,
+ "vnd_st_attach: Got ack/nack for wrong primitive");
+ vsp->vns_errno = VND_E_DLPIINVAL;
+ return (1);
+ }
+
+ if (prim == DL_ERROR_ACK)
+ vsp->vns_errno = VND_E_ATTACHFAIL;
+
+ freemsg(mp);
+ return (prim == DL_ERROR_ACK);
+}
+
+static int
+vnd_st_sbind(vnd_str_t *vsp)
+{
+ mblk_t *mp;
+ dl_bind_req_t *dbrp;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ mp = vnd_dlpi_alloc(sizeof (dl_bind_req_t) + sizeof (long),
+ DL_BIND_REQ);
+ if (mp == NULL) {
+ vsp->vns_errno = VND_E_NOMEM;
+ return (1);
+ }
+ dbrp = (dl_bind_req_t *)(mp->b_rptr);
+ dbrp->dl_sap = 0;
+ dbrp->dl_service_mode = DL_CLDLS;
+
+ vsp->vns_state = VNS_S_BIND_SENT;
+ cv_broadcast(&vsp->vns_stcv);
+ putnext(vsp->vns_wq, mp);
+
+ return (0);
+}
+
+static int
+vnd_st_bind(vnd_str_t *vsp)
+{
+ mblk_t *mp;
+ t_uscalar_t prim;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ mp = vnd_dlpi_inc_pop(vsp);
+ prim = ((dl_error_ack_t *)mp->b_rptr)->dl_primitive;
+
+ if (prim != DL_BIND_ACK && prim != DL_ERROR_ACK) {
+ vnd_drop_ctl(vsp, mp, "wrong dlpi primitive for vnd_st_bind");
+ vsp->vns_errno = VND_E_DLPIINVAL;
+ return (1);
+ }
+
+ if (prim == DL_ERROR_ACK)
+ vsp->vns_errno = VND_E_BINDFAIL;
+
+ freemsg(mp);
+ return (prim == DL_ERROR_ACK);
+}
+
+static int
+vnd_st_spromisc(vnd_str_t *vsp, int type, vnd_str_state_t next)
+{
+ mblk_t *mp;
+ dl_promiscon_req_t *dprp;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ mp = vnd_dlpi_alloc(sizeof (dl_promiscon_req_t), DL_PROMISCON_REQ);
+ if (mp == NULL) {
+ vsp->vns_errno = VND_E_NOMEM;
+ return (1);
+ }
+
+ dprp = (dl_promiscon_req_t *)mp->b_rptr;
+ dprp->dl_level = type;
+
+ vsp->vns_state = next;
+ cv_broadcast(&vsp->vns_stcv);
+ putnext(vsp->vns_wq, mp);
+
+ return (0);
+}
+
+static int
+vnd_st_promisc(vnd_str_t *vsp)
+{
+ mblk_t *mp;
+ t_uscalar_t prim, cprim;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ mp = vnd_dlpi_inc_pop(vsp);
+ prim = ((dl_ok_ack_t *)mp->b_rptr)->dl_primitive;
+ cprim = ((dl_ok_ack_t *)mp->b_rptr)->dl_correct_primitive;
+
+ if (prim != DL_OK_ACK && prim != DL_ERROR_ACK) {
+ vnd_drop_ctl(vsp, mp,
+ "wrong dlpi primitive for vnd_st_promisc");
+ vsp->vns_errno = VND_E_DLPIINVAL;
+ return (1);
+ }
+
+ if (cprim != DL_PROMISCON_REQ) {
+ vnd_drop_ctl(vsp, mp,
+ "vnd_st_promisc: Got ack/nack for wrong primitive");
+ vsp->vns_errno = VND_E_DLPIINVAL;
+ return (1);
+ }
+
+ if (prim == DL_ERROR_ACK)
+ vsp->vns_errno = VND_E_PROMISCFAIL;
+
+ freemsg(mp);
+ return (prim == DL_ERROR_ACK);
+}
+
+static int
+vnd_st_scapabq(vnd_str_t *vsp)
+{
+ mblk_t *mp;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+
+ mp = vnd_dlpi_alloc(sizeof (dl_capability_req_t), DL_CAPABILITY_REQ);
+ if (mp == NULL) {
+ vsp->vns_errno = VND_E_NOMEM;
+ return (1);
+ }
+
+ vsp->vns_state = VNS_S_CAPAB_Q_SENT;
+ cv_broadcast(&vsp->vns_stcv);
+ putnext(vsp->vns_wq, mp);
+
+ return (0);
+}
+
+static void
+vnd_mac_input(vnd_str_t *vsp, mac_resource_t *unused, mblk_t *mp_chain,
+ mac_header_info_t *mhip)
+{
+ int signal = 0;
+ mblk_t *mp;
+ vnd_pnsd_t *nsp = vsp->vns_nsd;
+
+ ASSERT(vsp != NULL);
+ ASSERT(mp_chain != NULL);
+
+ for (mp = mp_chain; mp != NULL; mp = mp_chain) {
+ uint16_t vid;
+ mp_chain = mp->b_next;
+ mp->b_next = NULL;
+
+ /*
+ * If we were operating in a traditional dlpi context then we
+ * would have enabled DLIOCRAW and rather than the fast path, we
+ * would come through dld_str_rx_raw. That function does two
+ * things that we have to consider doing ourselves. The first is
+ * that it adjusts the b_rptr back to account for dld bumping us
+ * past the mac header. It also tries to account for cases where
+ * mac provides an illusion of the mac header. Fortunately, dld
+ * only allows the fastpath when the media type is the same as
+ * the native type. Therefore all we have to do here is adjust
+ * the b_rptr.
+ */
+ ASSERT(mp->b_rptr >= DB_BASE(mp) + mhip->mhi_hdrsize);
+ mp->b_rptr -= mhip->mhi_hdrsize;
+ vid = VLAN_ID(mhip->mhi_tci);
+ if (mhip->mhi_istagged && vid != VLAN_ID_NONE) {
+ bcopy(mp->b_rptr, mp->b_rptr + 4, 12);
+ mp->b_rptr += 4;
+ }
+
+ if (nsp->vpnd_hooked && vnd_hook(vsp, &mp, nsp->vpnd_neti_v4,
+ nsp->vpnd_event_in_v4, nsp->vpnd_token_in_v4,
+ nsp->vpnd_neti_v6, nsp->vpnd_event_in_v6,
+ nsp->vpnd_token_in_v6, vnd_drop_hook_in, vnd_drop_in) != 0)
+ continue;
+
+ VND_STAT_INC(vsp, vks_rpackets, 1);
+ VND_STAT_INC(vsp, vks_rbytes, msgsize(mp));
+ DTRACE_VND5(recv, mblk_t *, mp, void *, NULL, void *, NULL,
+ vnd_str_t *, vsp, mblk_t *, mp);
+ mutex_enter(&vsp->vns_dq_read.vdq_lock);
+ signal |= vnd_dq_push(&vsp->vns_dq_read, mp, B_FALSE,
+ vnd_drop_in);
+ mutex_exit(&vsp->vns_dq_read.vdq_lock);
+
+ }
+
+ if (signal != 0) {
+ cv_broadcast(&vsp->vns_dq_read.vdq_ready);
+ pollwakeup(&vsp->vns_dev->vdd_ph, POLLIN | POLLRDNORM);
+ }
+
+}
+
+static void
+vnd_mac_flow_control_stat(vnd_str_t *vsp, hrtime_t diff)
+{
+ VND_STAT_INC(vsp, vks_nmacflow, 1);
+ VND_STAT_INC(vsp, vks_tmacflow, diff);
+ if (diff >= VND_LATENCY_1MS)
+ VND_STAT_INC(vsp, vks_mac_flow_1ms, 1);
+ if (diff >= VND_LATENCY_10MS)
+ VND_STAT_INC(vsp, vks_mac_flow_10ms, 1);
+ if (diff >= VND_LATENCY_100MS)
+ VND_STAT_INC(vsp, vks_mac_flow_100ms, 1);
+ if (diff >= VND_LATENCY_1S)
+ VND_STAT_INC(vsp, vks_mac_flow_1s, 1);
+ if (diff >= VND_LATENCY_10S)
+ VND_STAT_INC(vsp, vks_mac_flow_10s, 1);
+}
+
+/*
+ * This is a callback from MAC that indicates that we are allowed to send
+ * packets again.
+ */
+static void
+vnd_mac_flow_control(void *arg, vnd_mac_cookie_t cookie)
+{
+ vnd_str_t *vsp = arg;
+ hrtime_t now, diff;
+
+ mutex_enter(&vsp->vns_lock);
+ now = gethrtime();
+
+ /*
+ * Check for the case that we beat vnd_squeue_tx_one to the punch.
+ * There's also an additional case here that we got notified because
+ * we're sharing a device that ran out of tx descriptors, even though it
+ * wasn't because of us.
+ */
+ if (!(vsp->vns_flags & VNS_F_FLOW_CONTROLLED)) {
+ vsp->vns_fcupdate = now;
+ mutex_exit(&vsp->vns_lock);
+ return;
+ }
+
+ ASSERT(vsp->vns_flags & VNS_F_FLOW_CONTROLLED);
+ ASSERT(vsp->vns_caps.vsc_fc_cookie == cookie);
+ vsp->vns_flags &= ~VNS_F_FLOW_CONTROLLED;
+ vsp->vns_caps.vsc_fc_cookie = NULL;
+ diff = now - vsp->vns_fclatch;
+ vsp->vns_fclatch = 0;
+ DTRACE_VND3(flow__resumed, vnd_str_t *, vsp, uint64_t,
+ vsp->vns_dq_write.vdq_cur, uintptr_t, cookie);
+ /*
+ * If someone has asked to flush the squeue and thus inserted a barrier,
+ * than we shouldn't schedule a drain.
+ */
+ if (!(vsp->vns_flags & (VNS_F_DRAIN_SCHEDULED | VNS_F_BARRIER))) {
+ vsp->vns_flags |= VNS_F_DRAIN_SCHEDULED;
+ gsqueue_enter_one(vsp->vns_squeue, &vsp->vns_drainblk,
+ vnd_squeue_tx_drain, vsp, GSQUEUE_FILL,
+ VND_SQUEUE_TAG_MAC_FLOW_CONTROL);
+ }
+ mutex_exit(&vsp->vns_lock);
+}
+
+static void
+vnd_mac_enter(vnd_str_t *vsp, mac_perim_handle_t *mphp)
+{
+ ASSERT(MUTEX_HELD(&vsp->vns_lock));
+ VERIFY(vsp->vns_caps.vsc_capab_f(vsp->vns_caps.vsc_capab_hdl,
+ DLD_CAPAB_PERIM, mphp, DLD_ENABLE) == 0);
+}
+
+static void
+vnd_mac_exit(vnd_str_t *vsp, mac_perim_handle_t mph)
+{
+ ASSERT(MUTEX_HELD(&vsp->vns_lock));
+ VERIFY(vsp->vns_caps.vsc_capab_f(vsp->vns_caps.vsc_capab_hdl,
+ DLD_CAPAB_PERIM, mph, DLD_DISABLE) == 0);
+}
+
+static int
+vnd_dld_cap_enable(vnd_str_t *vsp, vnd_rx_t rxfunc)
+{
+ int ret;
+ dld_capab_direct_t d;
+ mac_perim_handle_t mph;
+ vnd_str_capab_t *c = &vsp->vns_caps;
+
+ bzero(&d, sizeof (d));
+ d.di_rx_cf = (uintptr_t)rxfunc;
+ d.di_rx_ch = vsp;
+ d.di_flags = DI_DIRECT_RAW;
+
+ vnd_mac_enter(vsp, &mph);
+
+ /*
+ * If we're coming in here for a second pass, we need to make sure that
+ * we remove an existing flow control notification callback, otherwise
+ * we'll create a duplicate that will remain with garbage data.
+ */
+ if (c->vsc_tx_fc_hdl != NULL) {
+ ASSERT(c->vsc_set_fcb_hdl != NULL);
+ (void) c->vsc_set_fcb_f(c->vsc_set_fcb_hdl, NULL,
+ c->vsc_tx_fc_hdl);
+ c->vsc_tx_fc_hdl = NULL;
+ }
+
+ if (vsp->vns_caps.vsc_capab_f(c->vsc_capab_hdl,
+ DLD_CAPAB_DIRECT, &d, DLD_ENABLE) == 0) {
+ c->vsc_tx_f = (vnd_dld_tx_t)d.di_tx_df;
+ c->vsc_tx_hdl = d.di_tx_dh;
+ c->vsc_set_fcb_f = (vnd_dld_set_fcb_t)d.di_tx_cb_df;
+ c->vsc_set_fcb_hdl = d.di_tx_cb_dh;
+ c->vsc_is_fc_f = (vnd_dld_is_fc_t)d.di_tx_fctl_df;
+ c->vsc_is_fc_hdl = d.di_tx_fctl_dh;
+ c->vsc_tx_fc_hdl = c->vsc_set_fcb_f(c->vsc_set_fcb_hdl,
+ vnd_mac_flow_control, vsp);
+ c->vsc_flags |= VNS_C_DIRECT;
+ ret = 0;
+ } else {
+ vsp->vns_errno = VND_E_DIRECTFAIL;
+ ret = 1;
+ }
+ vnd_mac_exit(vsp, mph);
+ return (ret);
+}
+
+static int
+vnd_st_capabq(vnd_str_t *vsp)
+{
+ mblk_t *mp;
+ dl_capability_ack_t *cap;
+ dl_capability_sub_t *subp;
+ dl_capab_hcksum_t *hck;
+ dl_capab_dld_t *dld;
+ unsigned char *rp;
+ int ret = 0;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ mp = vnd_dlpi_inc_pop(vsp);
+
+ rp = mp->b_rptr;
+ cap = (dl_capability_ack_t *)rp;
+ if (cap->dl_sub_length == 0)
+ goto done;
+
+ /* Don't try to process something too big */
+ if (sizeof (dl_capability_ack_t) + cap->dl_sub_length > MBLKL(mp)) {
+ VND_STAT_INC(vsp, vks_ndlpidrops, 1);
+ VND_STAT_INC(vsp, vks_tdrops, 1);
+ vsp->vns_errno = VND_E_CAPACKINVAL;
+ ret = 1;
+ goto done;
+ }
+
+ rp += cap->dl_sub_offset;
+
+ while (cap->dl_sub_length > 0) {
+ subp = (dl_capability_sub_t *)rp;
+ /* Sanity check something crazy from down below */
+ if (subp->dl_length + sizeof (dl_capability_sub_t) >
+ cap->dl_sub_length) {
+ VND_STAT_INC(vsp, vks_ndlpidrops, 1);
+ VND_STAT_INC(vsp, vks_tdrops, 1);
+ vsp->vns_errno = VND_E_SUBCAPINVAL;
+ ret = 1;
+ goto done;
+ }
+
+ switch (subp->dl_cap) {
+ case DL_CAPAB_HCKSUM:
+ hck = (dl_capab_hcksum_t *)(rp +
+ sizeof (dl_capability_sub_t));
+ if (hck->hcksum_version != HCKSUM_CURRENT_VERSION) {
+ vsp->vns_caps.vsc_flags |= VNS_C_HCKSUM_BADVERS;
+ break;
+ }
+ if (dlcapabcheckqid(&hck->hcksum_mid, vsp->vns_lrq) !=
+ B_TRUE) {
+ vsp->vns_errno = VND_E_CAPABPASS;
+ ret = 1;
+ goto done;
+ }
+ vsp->vns_caps.vsc_flags |= VNS_C_HCKSUM;
+ vsp->vns_caps.vsc_hcksum_opts = hck->hcksum_txflags;
+ break;
+ case DL_CAPAB_DLD:
+ dld = (dl_capab_dld_t *)(rp +
+ sizeof (dl_capability_sub_t));
+ if (dld->dld_version != DLD_CURRENT_VERSION) {
+ vsp->vns_errno = VND_E_DLDBADVERS;
+ ret = 1;
+ goto done;
+ }
+ if (dlcapabcheckqid(&dld->dld_mid, vsp->vns_lrq) !=
+ B_TRUE) {
+ vsp->vns_errno = VND_E_CAPABPASS;
+ ret = 1;
+ goto done;
+ }
+ vsp->vns_caps.vsc_flags |= VNS_C_DLD;
+ vsp->vns_caps.vsc_capab_f =
+ (vnd_dld_cap_t)dld->dld_capab;
+ vsp->vns_caps.vsc_capab_hdl =
+ (void *)dld->dld_capab_handle;
+ /*
+ * At this point in time, we have to set up a direct
+ * function that drops all input. This validates that
+ * we'll be able to set up direct input and that we can
+ * easily switch it earlier to the real data function
+ * when we've plumbed everything up.
+ */
+ if (vnd_dld_cap_enable(vsp, vnd_mac_drop_input) != 0) {
+ /* vns_errno set by vnd_dld_cap_enable */
+ ret = 1;
+ goto done;
+ }
+ break;
+ default:
+ /* Ignore unsupported cap */
+ break;
+ }
+
+ rp += sizeof (dl_capability_sub_t) + subp->dl_length;
+ cap->dl_sub_length -= sizeof (dl_capability_sub_t) +
+ subp->dl_length;
+ }
+
+done:
+ /* Make sure we enabled direct callbacks */
+ if (ret == 0 && !(vsp->vns_caps.vsc_flags & VNS_C_DIRECT)) {
+ vsp->vns_errno = VND_E_DIRECTNOTSUP;
+ ret = 1;
+ }
+
+ freemsg(mp);
+ return (ret);
+}
+
+static void
+vnd_st_sonline(vnd_str_t *vsp)
+{
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ vsp->vns_state = VNS_S_ONLINE;
+ cv_broadcast(&vsp->vns_stcv);
+}
+
+static void
+vnd_st_shutdown(vnd_str_t *vsp)
+{
+ mac_perim_handle_t mph;
+ vnd_str_capab_t *vsc = &vsp->vns_caps;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+
+ /*
+ * At this point in time we know that there is no one transmitting as
+ * our final reference has been torn down and that vnd_s_close inserted
+ * a barrier to validate that everything is flushed.
+ */
+ if (vsc->vsc_flags & VNS_C_DIRECT) {
+ vnd_mac_enter(vsp, &mph);
+ vsc->vsc_flags &= ~VNS_C_DIRECT;
+ (void) vsc->vsc_set_fcb_f(vsc->vsc_set_fcb_hdl, NULL,
+ vsc->vsc_tx_fc_hdl);
+ vsc->vsc_tx_fc_hdl = NULL;
+ (void) vsc->vsc_capab_f(vsc->vsc_capab_hdl, DLD_CAPAB_DIRECT,
+ NULL, DLD_DISABLE);
+ vnd_mac_exit(vsp, mph);
+ }
+}
+
+static boolean_t
+vnd_st_spromiscoff(vnd_str_t *vsp, int type, vnd_str_state_t next)
+{
+ boolean_t ret = B_TRUE;
+ mblk_t *mp;
+ dl_promiscoff_req_t *dprp;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+ mp = vnd_dlpi_alloc(sizeof (dl_promiscon_req_t), DL_PROMISCOFF_REQ);
+ if (mp == NULL) {
+ cmn_err(CE_NOTE, "!vnd failed to allocate mblk_t for "
+ "promiscoff request");
+ ret = B_FALSE;
+ goto next;
+ }
+
+ dprp = (dl_promiscoff_req_t *)mp->b_rptr;
+ dprp->dl_level = type;
+
+ putnext(vsp->vns_wq, mp);
+next:
+ vsp->vns_state = next;
+ cv_broadcast(&vsp->vns_stcv);
+ return (ret);
+}
+
+static void
+vnd_st_promiscoff(vnd_str_t *vsp)
+{
+ mblk_t *mp;
+ t_uscalar_t prim, cprim;
+
+ VERIFY(MUTEX_HELD(&vsp->vns_lock));
+
+ /*
+ * Unlike other cases where we guard against the incoming packet being
+ * NULL, during tear down we try to keep driving and therefore we may
+ * have gotten here due to an earlier failure, so there's nothing to do.
+ */
+ mp = vnd_dlpi_inc_pop(vsp);
+ if (mp == NULL)
+ return;
+
+ prim = ((dl_ok_ack_t *)mp->b_rptr)->dl_primitive;
+ cprim = ((dl_ok_ack_t *)mp->b_rptr)->dl_correct_primitive;
+
+ if (prim != DL_OK_ACK && prim != DL_ERROR_ACK) {
+ vnd_drop_ctl(vsp, mp,
+ "wrong dlpi primitive for vnd_st_promiscoff");
+ return;
+ }
+
+ if (cprim != DL_PROMISCOFF_REQ) {
+ vnd_drop_ctl(vsp, mp,
+ "vnd_st_promiscoff: Got ack/nack for wrong primitive");
+ return;
+ }
+
+ if (prim == DL_ERROR_ACK) {
+ cmn_err(CE_WARN, "!failed to disable promiscuos mode during "
+ "vnd teardown");
+ }
+}
+
+static boolean_t
+vnd_st_sunbind(vnd_str_t *vsp)
+{
+ mblk_t *mp;
+ boolean_t ret = B_TRUE;
+
+ mp = vnd_dlpi_alloc(sizeof (dl_unbind_req_t), DL_UNBIND_REQ);
+ if (mp == NULL) {
+ cmn_err(CE_NOTE, "!vnd failed to allocate mblk_t for "
+ "unbind request");
+ ret = B_FALSE;
+ goto next;
+ }
+
+ putnext(vsp->vns_wq, mp);
+next:
+ vsp->vns_state = VNS_S_UNBIND_SENT;
+ cv_broadcast(&vsp->vns_stcv);
+ return (ret);
+}
+
+static void
+vnd_st_unbind(vnd_str_t *vsp)
+{
+ mblk_t *mp;
+ t_uscalar_t prim, cprim;
+
+ /*
+ * Unlike other cases where we guard against the incoming packet being
+ * NULL, during tear down we try to keep driving and therefore we may
+ * have gotten here due to an earlier failure, so there's nothing to do.
+ */
+ mp = vnd_dlpi_inc_pop(vsp);
+ if (mp == NULL)
+ goto next;
+
+ prim = ((dl_ok_ack_t *)mp->b_rptr)->dl_primitive;
+ cprim = ((dl_ok_ack_t *)mp->b_rptr)->dl_correct_primitive;
+
+ if (prim != DL_OK_ACK && prim != DL_ERROR_ACK) {
+ vnd_drop_ctl(vsp, mp,
+ "wrong dlpi primitive for vnd_st_unbind");
+ goto next;
+ }
+
+ if (cprim != DL_UNBIND_REQ) {
+ vnd_drop_ctl(vsp, mp,
+ "vnd_st_unbind: Got ack/nack for wrong primitive");
+ goto next;
+ }
+
+ if (prim == DL_ERROR_ACK) {
+ cmn_err(CE_WARN, "!failed to unbind stream during vnd "
+ "teardown");
+ }
+
+next:
+ vsp->vns_state = VNS_S_ZOMBIE;
+ cv_broadcast(&vsp->vns_stcv);
+}
+
+/*
+ * Perform state transitions. This is a one way shot down the flow chart
+ * described in the big theory statement.
+ */
+static void
+vnd_str_state_transition(void *arg)
+{
+ boolean_t died = B_FALSE;
+ vnd_str_t *vsp = arg;
+ mblk_t *mp;
+
+ mutex_enter(&vsp->vns_lock);
+ if (vsp->vns_dlpi_inc == NULL && (vsp->vns_state != VNS_S_INITIAL &&
+ vsp->vns_state != VNS_S_SHUTTING_DOWN)) {
+ mutex_exit(&vsp->vns_lock);
+ return;
+ }
+
+ /*
+ * When trying to shut down, or unwinding from a failed enabling, rather
+ * than immediately entering the ZOMBIE state, we may instead opt to try
+ * and enter the next state in the progression. This is especially
+ * important when trying to tear everything down.
+ */
+loop:
+ DTRACE_PROBE2(vnd__state__transition, uintptr_t, vsp,
+ vnd_str_state_t, vsp->vns_state);
+ switch (vsp->vns_state) {
+ case VNS_S_INITIAL:
+ VERIFY(vsp->vns_dlpi_inc == NULL);
+ if (vnd_st_sinfo(vsp) != 0)
+ died = B_TRUE;
+ break;
+ case VNS_S_INFO_SENT:
+ VERIFY(vsp->vns_dlpi_inc != NULL);
+ if (vnd_st_info(vsp) == 0) {
+ if (vnd_st_sexclusive(vsp) != 0)
+ died = B_TRUE;
+ } else {
+ died = B_TRUE;
+ }
+ break;
+ case VNS_S_EXCLUSIVE_SENT:
+ VERIFY(vsp->vns_dlpi_inc != NULL);
+ if (vnd_st_exclusive(vsp) == 0) {
+ if (vsp->vns_dlpi_style == DL_STYLE2) {
+ if (vnd_st_sattach(vsp) != 0)
+ died = B_TRUE;
+ } else {
+ if (vnd_st_sbind(vsp) != 0)
+ died = B_TRUE;
+ }
+ } else {
+ died = B_TRUE;
+ }
+ break;
+ case VNS_S_ATTACH_SENT:
+ VERIFY(vsp->vns_dlpi_inc != NULL);
+ if (vnd_st_attach(vsp) == 0) {
+ if (vnd_st_sbind(vsp) != 0)
+ died = B_TRUE;
+ } else {
+ died = B_TRUE;
+ }
+ break;
+ case VNS_S_BIND_SENT:
+ VERIFY(vsp->vns_dlpi_inc != NULL);
+ if (vnd_st_bind(vsp) == 0) {
+ if (vnd_st_spromisc(vsp, DL_PROMISC_SAP,
+ VNS_S_SAP_PROMISC_SENT) != 0)
+ died = B_TRUE;
+ } else {
+ died = B_TRUE;
+ }
+ break;
+ case VNS_S_SAP_PROMISC_SENT:
+ VERIFY(vsp->vns_dlpi_inc != NULL);
+ if (vnd_st_promisc(vsp) == 0) {
+ if (vnd_st_spromisc(vsp, DL_PROMISC_MULTI,
+ VNS_S_MULTI_PROMISC_SENT) != 0)
+ died = B_TRUE;
+ } else {
+ died = B_TRUE;
+ }
+ break;
+ case VNS_S_MULTI_PROMISC_SENT:
+ VERIFY(vsp->vns_dlpi_inc != NULL);
+ if (vnd_st_promisc(vsp) == 0) {
+ if (vnd_st_spromisc(vsp, DL_PROMISC_RX_ONLY,
+ VNS_S_RX_ONLY_PROMISC_SENT) != 0)
+ died = B_TRUE;
+ } else {
+ died = B_TRUE;
+ }
+ break;
+ case VNS_S_RX_ONLY_PROMISC_SENT:
+ VERIFY(vsp->vns_dlpi_inc != NULL);
+ if (vnd_st_promisc(vsp) == 0) {
+ if (vnd_st_spromisc(vsp, DL_PROMISC_FIXUPS,
+ VNS_S_FIXUP_PROMISC_SENT) != 0)
+ died = B_TRUE;
+ } else {
+ died = B_TRUE;
+ }
+ break;
+ case VNS_S_FIXUP_PROMISC_SENT:
+ VERIFY(vsp->vns_dlpi_inc != NULL);
+ if (vnd_st_promisc(vsp) == 0) {
+ if (vnd_st_scapabq(vsp) != 0)
+ died = B_TRUE;
+ } else {
+ died = B_TRUE;
+ }
+ break;
+ case VNS_S_CAPAB_Q_SENT:
+ if (vnd_st_capabq(vsp) != 0)
+ died = B_TRUE;
+ else
+ vnd_st_sonline(vsp);
+ break;
+ case VNS_S_SHUTTING_DOWN:
+ vnd_st_shutdown(vsp);
+ if (vnd_st_spromiscoff(vsp, DL_PROMISC_MULTI,
+ VNS_S_MULTICAST_PROMISCOFF_SENT) == B_FALSE)
+ goto loop;
+ break;
+ case VNS_S_MULTICAST_PROMISCOFF_SENT:
+ vnd_st_promiscoff(vsp);
+ if (vnd_st_spromiscoff(vsp, DL_PROMISC_SAP,
+ VNS_S_SAP_PROMISCOFF_SENT) == B_FALSE)
+ goto loop;
+ break;
+ case VNS_S_SAP_PROMISCOFF_SENT:
+ vnd_st_promiscoff(vsp);
+ if (vnd_st_sunbind(vsp) == B_FALSE)
+ goto loop;
+ break;
+ case VNS_S_UNBIND_SENT:
+ vnd_st_unbind(vsp);
+ break;
+ case VNS_S_ZOMBIE:
+ while ((mp = vnd_dlpi_inc_pop(vsp)) != NULL)
+ vnd_drop_ctl(vsp, mp, "vsp received data as a zombie");
+ break;
+ default:
+ panic("vnd_str_t entered an unknown state");
+ }
+
+ if (died == B_TRUE) {
+ ASSERT(vsp->vns_errno != VND_E_SUCCESS);
+ vsp->vns_laststate = vsp->vns_state;
+ vsp->vns_state = VNS_S_ZOMBIE;
+ cv_broadcast(&vsp->vns_stcv);
+ }
+
+ mutex_exit(&vsp->vns_lock);
+}
+
+static void
+vnd_dlpi_taskq_dispatch(void *arg)
+{
+ vnd_str_t *vsp = arg;
+ int run = 1;
+
+ while (run != 0) {
+ vnd_str_state_transition(vsp);
+ mutex_enter(&vsp->vns_lock);
+ if (vsp->vns_flags & VNS_F_CONDEMNED ||
+ vsp->vns_dlpi_inc == NULL) {
+ run = 0;
+ vsp->vns_flags &= ~VNS_F_TASKQ_DISPATCHED;
+ }
+ if (vsp->vns_flags & VNS_F_CONDEMNED)
+ cv_signal(&vsp->vns_cancelcv);
+ mutex_exit(&vsp->vns_lock);
+ }
+}
+
+static int
+vnd_neti_getifname(net_handle_t neti, phy_if_t phy, char *buf, const size_t len)
+{
+ return (-1);
+}
+
+static int
+vnd_neti_getmtu(net_handle_t neti, phy_if_t phy, lif_if_t ifdata)
+{
+ return (-1);
+}
+
+static int
+vnd_neti_getptmue(net_handle_t neti)
+{
+ return (-1);
+}
+
+static int
+vnd_neti_getlifaddr(net_handle_t neti, phy_if_t phy, lif_if_t ifdata,
+ size_t nelem, net_ifaddr_t type[], void *storage)
+{
+ return (-1);
+}
+
+static int
+vnd_neti_getlifzone(net_handle_t neti, phy_if_t phy, lif_if_t ifdata,
+ zoneid_t *zid)
+{
+ return (-1);
+}
+
+static int
+vnd_neti_getlifflags(net_handle_t neti, phy_if_t phy, lif_if_t ifdata,
+ uint64_t *flags)
+{
+ return (-1);
+}
+
+static phy_if_t
+vnd_neti_phygetnext(net_handle_t neti, phy_if_t phy)
+{
+ return (-1);
+}
+
+static phy_if_t
+vnd_neti_phylookup(net_handle_t neti, const char *name)
+{
+ return (-1);
+}
+
+static lif_if_t
+vnd_neti_lifgetnext(net_handle_t neti, phy_if_t phy, lif_if_t ifdata)
+{
+ return (-1);
+}
+
+static int
+vnd_neti_inject(net_handle_t neti, inject_t style, net_inject_t *packet)
+{
+ return (-1);
+}
+
+static phy_if_t
+vnd_neti_route(net_handle_t neti, struct sockaddr *address,
+ struct sockaddr *next)
+{
+ return ((phy_if_t)-1);
+}
+
+static int
+vnd_neti_ispchksum(net_handle_t neti, mblk_t *mp)
+{
+ return (-1);
+}
+
+static int
+vnd_neti_isvchksum(net_handle_t neti, mblk_t *mp)
+{
+ return (-1);
+}
+
+static net_protocol_t vnd_neti_info_v4 = {
+ NETINFO_VERSION,
+ NHF_VND_INET,
+ vnd_neti_getifname,
+ vnd_neti_getmtu,
+ vnd_neti_getptmue,
+ vnd_neti_getlifaddr,
+ vnd_neti_getlifzone,
+ vnd_neti_getlifflags,
+ vnd_neti_phygetnext,
+ vnd_neti_phylookup,
+ vnd_neti_lifgetnext,
+ vnd_neti_inject,
+ vnd_neti_route,
+ vnd_neti_ispchksum,
+ vnd_neti_isvchksum
+};
+
+static net_protocol_t vnd_neti_info_v6 = {
+ NETINFO_VERSION,
+ NHF_VND_INET6,
+ vnd_neti_getifname,
+ vnd_neti_getmtu,
+ vnd_neti_getptmue,
+ vnd_neti_getlifaddr,
+ vnd_neti_getlifzone,
+ vnd_neti_getlifflags,
+ vnd_neti_phygetnext,
+ vnd_neti_phylookup,
+ vnd_neti_lifgetnext,
+ vnd_neti_inject,
+ vnd_neti_route,
+ vnd_neti_ispchksum,
+ vnd_neti_isvchksum
+};
+
+
+static int
+vnd_netinfo_init(vnd_pnsd_t *nsp)
+{
+ nsp->vpnd_neti_v4 = net_protocol_register(nsp->vpnd_nsid,
+ &vnd_neti_info_v4);
+ ASSERT(nsp->vpnd_neti_v4 != NULL);
+
+ nsp->vpnd_neti_v6 = net_protocol_register(nsp->vpnd_nsid,
+ &vnd_neti_info_v6);
+ ASSERT(nsp->vpnd_neti_v6 != NULL);
+
+ nsp->vpnd_family_v4.hf_version = HOOK_VERSION;
+ nsp->vpnd_family_v4.hf_name = "vnd_inet";
+
+ if (net_family_register(nsp->vpnd_neti_v4, &nsp->vpnd_family_v4) != 0) {
+ net_protocol_unregister(nsp->vpnd_neti_v4);
+ net_protocol_unregister(nsp->vpnd_neti_v6);
+ cmn_err(CE_NOTE, "vnd_netinfo_init: net_family_register "
+ "failed for stack %d", nsp->vpnd_nsid);
+ return (1);
+ }
+
+ nsp->vpnd_family_v6.hf_version = HOOK_VERSION;
+ nsp->vpnd_family_v6.hf_name = "vnd_inet6";
+
+ if (net_family_register(nsp->vpnd_neti_v6, &nsp->vpnd_family_v6) != 0) {
+ net_family_unregister(nsp->vpnd_neti_v4, &nsp->vpnd_family_v4);
+ net_protocol_unregister(nsp->vpnd_neti_v4);
+ net_protocol_unregister(nsp->vpnd_neti_v6);
+ cmn_err(CE_NOTE, "vnd_netinfo_init: net_family_register "
+ "failed for stack %d", nsp->vpnd_nsid);
+ return (1);
+ }
+
+ nsp->vpnd_event_in_v4.he_version = HOOK_VERSION;
+ nsp->vpnd_event_in_v4.he_name = NH_PHYSICAL_IN;
+ nsp->vpnd_event_in_v4.he_flags = 0;
+ nsp->vpnd_event_in_v4.he_interested = B_FALSE;
+
+ nsp->vpnd_token_in_v4 = net_event_register(nsp->vpnd_neti_v4,
+ &nsp->vpnd_event_in_v4);
+ if (nsp->vpnd_token_in_v4 == NULL) {
+ net_family_unregister(nsp->vpnd_neti_v4, &nsp->vpnd_family_v4);
+ net_family_unregister(nsp->vpnd_neti_v6, &nsp->vpnd_family_v6);
+ net_protocol_unregister(nsp->vpnd_neti_v4);
+ net_protocol_unregister(nsp->vpnd_neti_v6);
+ cmn_err(CE_NOTE, "vnd_netinfo_init: net_event_register "
+ "failed for stack %d", nsp->vpnd_nsid);
+ return (1);
+ }
+
+ nsp->vpnd_event_in_v6.he_version = HOOK_VERSION;
+ nsp->vpnd_event_in_v6.he_name = NH_PHYSICAL_IN;
+ nsp->vpnd_event_in_v6.he_flags = 0;
+ nsp->vpnd_event_in_v6.he_interested = B_FALSE;
+
+ nsp->vpnd_token_in_v6 = net_event_register(nsp->vpnd_neti_v6,
+ &nsp->vpnd_event_in_v6);
+ if (nsp->vpnd_token_in_v6 == NULL) {
+ net_event_shutdown(nsp->vpnd_neti_v4, &nsp->vpnd_event_in_v4);
+ net_event_unregister(nsp->vpnd_neti_v4, &nsp->vpnd_event_in_v4);
+ net_family_unregister(nsp->vpnd_neti_v4, &nsp->vpnd_family_v4);
+ net_family_unregister(nsp->vpnd_neti_v6, &nsp->vpnd_family_v6);
+ net_protocol_unregister(nsp->vpnd_neti_v4);
+ net_protocol_unregister(nsp->vpnd_neti_v6);
+ cmn_err(CE_NOTE, "vnd_netinfo_init: net_event_register "
+ "failed for stack %d", nsp->vpnd_nsid);
+ return (1);
+ }
+
+ nsp->vpnd_event_out_v4.he_version = HOOK_VERSION;
+ nsp->vpnd_event_out_v4.he_name = NH_PHYSICAL_OUT;
+ nsp->vpnd_event_out_v4.he_flags = 0;
+ nsp->vpnd_event_out_v4.he_interested = B_FALSE;
+
+ nsp->vpnd_token_out_v4 = net_event_register(nsp->vpnd_neti_v4,
+ &nsp->vpnd_event_out_v4);
+ if (nsp->vpnd_token_out_v4 == NULL) {
+ net_event_shutdown(nsp->vpnd_neti_v6, &nsp->vpnd_event_in_v6);
+ net_event_unregister(nsp->vpnd_neti_v6, &nsp->vpnd_event_in_v6);
+ net_event_shutdown(nsp->vpnd_neti_v4, &nsp->vpnd_event_in_v4);
+ net_event_unregister(nsp->vpnd_neti_v4, &nsp->vpnd_event_in_v4);
+ net_family_unregister(nsp->vpnd_neti_v4, &nsp->vpnd_family_v4);
+ net_family_unregister(nsp->vpnd_neti_v6, &nsp->vpnd_family_v6);
+ net_protocol_unregister(nsp->vpnd_neti_v4);
+ net_protocol_unregister(nsp->vpnd_neti_v6);
+ cmn_err(CE_NOTE, "vnd_netinfo_init: net_event_register "
+ "failed for stack %d", nsp->vpnd_nsid);
+ return (1);
+ }
+
+ nsp->vpnd_event_out_v6.he_version = HOOK_VERSION;
+ nsp->vpnd_event_out_v6.he_name = NH_PHYSICAL_OUT;
+ nsp->vpnd_event_out_v6.he_flags = 0;
+ nsp->vpnd_event_out_v6.he_interested = B_FALSE;
+
+ nsp->vpnd_token_out_v6 = net_event_register(nsp->vpnd_neti_v6,
+ &nsp->vpnd_event_out_v6);
+ if (nsp->vpnd_token_out_v6 == NULL) {
+ net_event_shutdown(nsp->vpnd_neti_v6, &nsp->vpnd_event_in_v6);
+ net_event_unregister(nsp->vpnd_neti_v6, &nsp->vpnd_event_in_v6);
+ net_event_shutdown(nsp->vpnd_neti_v6, &nsp->vpnd_event_in_v6);
+ net_event_unregister(nsp->vpnd_neti_v6, &nsp->vpnd_event_in_v6);
+ net_event_shutdown(nsp->vpnd_neti_v4, &nsp->vpnd_event_in_v4);
+ net_event_unregister(nsp->vpnd_neti_v4, &nsp->vpnd_event_in_v4);
+ net_family_unregister(nsp->vpnd_neti_v4, &nsp->vpnd_family_v4);
+ net_family_unregister(nsp->vpnd_neti_v6, &nsp->vpnd_family_v6);
+ net_protocol_unregister(nsp->vpnd_neti_v4);
+ net_protocol_unregister(nsp->vpnd_neti_v6);
+ cmn_err(CE_NOTE, "vnd_netinfo_init: net_event_register "
+ "failed for stack %d", nsp->vpnd_nsid);
+ return (1);
+ }
+
+ return (0);
+}
+
+static void
+vnd_netinfo_shutdown(vnd_pnsd_t *nsp)
+{
+ int ret;
+
+ ret = net_event_shutdown(nsp->vpnd_neti_v4, &nsp->vpnd_event_in_v4);
+ VERIFY(ret == 0);
+ ret = net_event_shutdown(nsp->vpnd_neti_v4, &nsp->vpnd_event_out_v4);
+ VERIFY(ret == 0);
+ ret = net_event_shutdown(nsp->vpnd_neti_v6, &nsp->vpnd_event_in_v6);
+ VERIFY(ret == 0);
+ ret = net_event_shutdown(nsp->vpnd_neti_v6, &nsp->vpnd_event_out_v6);
+ VERIFY(ret == 0);
+}
+
+static void
+vnd_netinfo_fini(vnd_pnsd_t *nsp)
+{
+ int ret;
+
+ ret = net_event_unregister(nsp->vpnd_neti_v4, &nsp->vpnd_event_in_v4);
+ VERIFY(ret == 0);
+ ret = net_event_unregister(nsp->vpnd_neti_v4, &nsp->vpnd_event_out_v4);
+ VERIFY(ret == 0);
+ ret = net_event_unregister(nsp->vpnd_neti_v6, &nsp->vpnd_event_in_v6);
+ VERIFY(ret == 0);
+ ret = net_event_unregister(nsp->vpnd_neti_v6, &nsp->vpnd_event_out_v6);
+ VERIFY(ret == 0);
+ ret = net_family_unregister(nsp->vpnd_neti_v4, &nsp->vpnd_family_v4);
+ VERIFY(ret == 0);
+ ret = net_family_unregister(nsp->vpnd_neti_v6, &nsp->vpnd_family_v6);
+ VERIFY(ret == 0);
+ ret = net_protocol_unregister(nsp->vpnd_neti_v4);
+ VERIFY(ret == 0);
+ ret = net_protocol_unregister(nsp->vpnd_neti_v6);
+ VERIFY(ret == 0);
+}
+
+static void
+vnd_strbarrier_cb(void *arg, mblk_t *bmp, gsqueue_t *gsp, void *dummy)
+{
+ vnd_str_t *vsp = arg;
+
+ VERIFY(bmp == &vsp->vns_barrierblk);
+ mutex_enter(&vsp->vns_lock);
+ VERIFY(vsp->vns_flags & VNS_F_BARRIER);
+ VERIFY(!(vsp->vns_flags & VNS_F_BARRIER_DONE));
+ vsp->vns_flags |= VNS_F_BARRIER_DONE;
+ mutex_exit(&vsp->vns_lock);
+
+ /*
+ * For better or worse, we have to broadcast here as we could have a
+ * thread that's blocked for completion as well as one that's blocked
+ * waiting to do a barrier itself.
+ */
+ cv_broadcast(&vsp->vns_barriercv);
+}
+
+/*
+ * This is a data barrier for the stream while it is in fastpath mode. It blocks
+ * and ensures that there is nothing else in the squeue.
+ */
+static void
+vnd_strbarrier(vnd_str_t *vsp)
+{
+ mutex_enter(&vsp->vns_lock);
+ while (vsp->vns_flags & VNS_F_BARRIER)
+ cv_wait(&vsp->vns_barriercv, &vsp->vns_lock);
+ vsp->vns_flags |= VNS_F_BARRIER;
+ mutex_exit(&vsp->vns_lock);
+
+ gsqueue_enter_one(vsp->vns_squeue, &vsp->vns_barrierblk,
+ vnd_strbarrier_cb, vsp, GSQUEUE_PROCESS, VND_SQUEUE_TAG_STRBARRIER);
+
+ mutex_enter(&vsp->vns_lock);
+ while (!(vsp->vns_flags & VNS_F_BARRIER_DONE))
+ cv_wait(&vsp->vns_barriercv, &vsp->vns_lock);
+ vsp->vns_flags &= ~VNS_F_BARRIER;
+ vsp->vns_flags &= ~VNS_F_BARRIER_DONE;
+ mutex_exit(&vsp->vns_lock);
+
+ /*
+ * We have to broadcast in case anyone is waiting for the barrier
+ * themselves.
+ */
+ cv_broadcast(&vsp->vns_barriercv);
+}
+
+/*
+ * Based on the type of message that we're dealing with we're going to want to
+ * do one of several things. Basically if it looks like it's something we know
+ * about, we should probably handle it in one of our transition threads.
+ * Otherwise, we should just simply putnext.
+ */
+static int
+vnd_s_rput(queue_t *q, mblk_t *mp)
+{
+ t_uscalar_t prim;
+ int dispatch = 0;
+ vnd_str_t *vsp = q->q_ptr;
+
+ switch (DB_TYPE(mp)) {
+ case M_PROTO:
+ case M_PCPROTO:
+ if (MBLKL(mp) < sizeof (t_uscalar_t)) {
+ vnd_drop_ctl(vsp, mp, "PROTO message too short");
+ break;
+ }
+
+ prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive;
+ if (prim == DL_UNITDATA_REQ || prim == DL_UNITDATA_IND) {
+ vnd_drop_ctl(vsp, mp,
+ "recieved an unsupported dlpi DATA req");
+ break;
+ }
+
+ /*
+ * Enqueue the entry and fire off a taskq dispatch.
+ */
+ mutex_enter(&vsp->vns_lock);
+ vnd_dlpi_inc_push(vsp, mp);
+ if (!(vsp->vns_flags & VNS_F_TASKQ_DISPATCHED)) {
+ dispatch = 1;
+ vsp->vns_flags |= VNS_F_TASKQ_DISPATCHED;
+ }
+ mutex_exit(&vsp->vns_lock);
+ if (dispatch != 0)
+ taskq_dispatch_ent(vnd_taskq, vnd_dlpi_taskq_dispatch,
+ vsp, 0, &vsp->vns_tqe);
+ break;
+ case M_DATA:
+ vnd_drop_in(vsp, mp, "M_DATA via put(9E)");
+ break;
+ default:
+ putnext(vsp->vns_rq, mp);
+ }
+ return (0);
+}
+
+static void
+vnd_strioctl(queue_t *q, vnd_str_t *vsp, mblk_t *mp, struct iocblk *iocp)
+{
+ int error;
+ vnd_strioc_t *visp;
+
+ if (iocp->ioc_cmd != VND_STRIOC_ASSOCIATE ||
+ iocp->ioc_count != TRANSPARENT) {
+ error = EINVAL;
+ goto nak;
+ }
+
+ /*
+ * All streams ioctls that we support must use kcred as a means to
+ * distinguish that this is a layered open by the kernel as opposed to
+ * one by a user who has done an I_PUSH of the module.
+ */
+ if (iocp->ioc_cr != kcred) {
+ error = EPERM;
+ goto nak;
+ }
+
+ if (mp->b_cont == NULL) {
+ error = EAGAIN;
+ goto nak;
+ }
+
+ visp = kmem_alloc(sizeof (vnd_strioc_t), KM_SLEEP);
+ ASSERT(MBLKL(mp->b_cont) == sizeof (caddr_t));
+ visp->vs_addr = *(caddr_t *)mp->b_cont->b_rptr;
+ visp->vs_state = VSS_COPYIN;
+
+ mcopyin(mp, (void *)visp, sizeof (vnd_strioc_associate_t), NULL);
+ qreply(q, mp);
+
+ return;
+
+nak:
+ if (mp->b_cont != NULL) {
+ freemsg(mp->b_cont);
+ mp->b_cont = NULL;
+ }
+
+ iocp->ioc_error = error;
+ mp->b_datap->db_type = M_IOCNAK;
+ iocp->ioc_count = 0;
+ qreply(q, mp);
+}
+
+static void
+vnd_striocdata(queue_t *q, vnd_str_t *vsp, mblk_t *mp, struct copyresp *csp)
+{
+ int error;
+ vnd_str_state_t state;
+ struct copyreq *crp;
+ vnd_strioc_associate_t *vss;
+ vnd_dev_t *vdp = NULL;
+ vnd_pnsd_t *nsp = NULL;
+ char iname[2*VND_NAMELEN];
+ zone_t *zone;
+ vnd_strioc_t *visp;
+
+ visp = (vnd_strioc_t *)csp->cp_private;
+
+ /* If it's not ours, it's not our problem */
+ if (csp->cp_cmd != VND_STRIOC_ASSOCIATE) {
+ if (q->q_next != NULL) {
+ putnext(q, mp);
+ } else {
+ VND_STAT_INC(vsp, vks_ndlpidrops, 1);
+ VND_STAT_INC(vsp, vks_tdrops, 1);
+ vnd_drop_ctl(vsp, mp, "uknown cmd for M_IOCDATA");
+ }
+ kmem_free(visp, sizeof (vnd_strioc_t));
+ return;
+ }
+
+ /* The nak is already sent for us */
+ if (csp->cp_rval != 0) {
+ vnd_drop_ctl(vsp, mp, "M_COPYIN failed");
+ kmem_free(visp, sizeof (vnd_strioc_t));
+ return;
+ }
+
+ /* Data is sitting for us in b_cont */
+ if (mp->b_cont == NULL ||
+ MBLKL(mp->b_cont) != sizeof (vnd_strioc_associate_t)) {
+ kmem_free(visp, sizeof (vnd_strioc_t));
+ miocnak(q, mp, 0, EINVAL);
+ return;
+ }
+
+ vss = (vnd_strioc_associate_t *)mp->b_cont->b_rptr;
+ vdp = vnd_dev_lookup(vss->vsa_minor);
+ if (vdp == NULL) {
+ error = EIO;
+ vss->vsa_errno = VND_E_NODEV;
+ goto nak;
+ }
+
+ nsp = vnd_nsd_lookup(vss->vsa_nsid);
+ if (nsp == NULL) {
+ error = EIO;
+ vss->vsa_errno = VND_E_NONETSTACK;
+ goto nak;
+ }
+
+ mutex_enter(&vsp->vns_lock);
+ if (!(vsp->vns_flags & VNS_F_NEED_ZONE)) {
+ mutex_exit(&vsp->vns_lock);
+ error = EEXIST;
+ vss->vsa_errno = VND_E_ASSOCIATED;
+ goto nak;
+ }
+
+ vsp->vns_nsd = nsp;
+ vsp->vns_flags &= ~VNS_F_NEED_ZONE;
+ vsp->vns_flags |= VNS_F_TASKQ_DISPATCHED;
+ mutex_exit(&vsp->vns_lock);
+
+ taskq_dispatch_ent(vnd_taskq, vnd_dlpi_taskq_dispatch, vsp, 0,
+ &vsp->vns_tqe);
+
+
+ /* At this point we need to wait until we have transitioned to ONLINE */
+ mutex_enter(&vsp->vns_lock);
+ while (vsp->vns_state != VNS_S_ONLINE && vsp->vns_state != VNS_S_ZOMBIE)
+ cv_wait(&vsp->vns_stcv, &vsp->vns_lock);
+ state = vsp->vns_state;
+ mutex_exit(&vsp->vns_lock);
+
+ if (state == VNS_S_ZOMBIE) {
+ vss->vsa_errno = vsp->vns_errno;
+ error = EIO;
+ goto nak;
+ }
+
+ mutex_enter(&vdp->vdd_lock);
+ mutex_enter(&vsp->vns_lock);
+ VERIFY(vdp->vdd_str == NULL);
+ /*
+ * Now initialize the remaining kstat properties and let's go ahead and
+ * create it.
+ */
+ (void) snprintf(iname, sizeof (iname), "z%d_%d",
+ vdp->vdd_nsd->vpnd_zid, vdp->vdd_minor);
+ vsp->vns_kstat = kstat_create_zone("vnd", vdp->vdd_minor, iname, "net",
+ KSTAT_TYPE_NAMED, sizeof (vnd_str_stat_t) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL, GLOBAL_ZONEID);
+ if (vsp->vns_kstat == NULL) {
+ error = EIO;
+ vss->vsa_errno = VND_E_KSTATCREATE;
+ mutex_exit(&vsp->vns_lock);
+ mutex_exit(&vdp->vdd_lock);
+ goto nak;
+ }
+ vdp->vdd_str = vsp;
+ vsp->vns_dev = vdp;
+
+ /*
+ * Now, it's time to do the las thing that can fail, changing out the
+ * input function. After this we know that we can receive data, so we
+ * should make sure that we're ready.
+ */
+ if (vnd_dld_cap_enable(vsp, vnd_mac_input) != 0) {
+ error = EIO;
+ vss->vsa_errno = VND_E_DIRECTFAIL;
+ vdp->vdd_str = NULL;
+ vsp->vns_dev = NULL;
+ mutex_exit(&vsp->vns_lock);
+ mutex_exit(&vdp->vdd_lock);
+ goto nak;
+ }
+
+ zone = zone_find_by_id(vdp->vdd_nsd->vpnd_zid);
+ ASSERT(zone != NULL);
+ vsp->vns_kstat->ks_data = &vsp->vns_ksdata;
+ /* Account for zone name */
+ vsp->vns_kstat->ks_data_size += strlen(zone->zone_name) + 1;
+ /* Account for eventual link name */
+ vsp->vns_kstat->ks_data_size += VND_NAMELEN;
+ kstat_named_setstr(&vsp->vns_ksdata.vks_zonename, zone->zone_name);
+ kstat_named_setstr(&vdp->vdd_str->vns_ksdata.vks_linkname,
+ vdp->vdd_lname);
+ zone_rele(zone);
+ kstat_install(vsp->vns_kstat);
+
+ mutex_exit(&vsp->vns_lock);
+ mutex_exit(&vdp->vdd_lock);
+
+ /*
+ * Note that the vnd_str_t does not keep a permanent hold on the
+ * vnd_pnsd_t. We leave that up to the vnd_dev_t as that's also what
+ * the nestack goes through to take care of everything.
+ */
+ vss->vsa_errno = VND_E_SUCCESS;
+nak:
+ if (vdp != NULL)
+ vnd_dev_rele(vdp);
+ if (nsp != NULL)
+ vnd_nsd_rele(nsp);
+ /*
+ * Change the copyin request to a copyout. Note that we can't use
+ * mcopyout here as it only works when the DB_TYPE is M_IOCTL. That's
+ * okay, as the copyin vs. copyout is basically the same.
+ */
+ DB_TYPE(mp) = M_COPYOUT;
+ visp->vs_state = VSS_COPYOUT;
+ crp = (struct copyreq *)mp->b_rptr;
+ crp->cq_private = (void *)visp;
+ crp->cq_addr = visp->vs_addr;
+ crp->cq_size = sizeof (vnd_strioc_associate_t);
+ qreply(q, mp);
+}
+
+static void
+vnd_stroutdata(queue_t *q, vnd_str_t *vsp, mblk_t *mp, struct copyresp *csp)
+{
+ ASSERT(csp->cp_private != NULL);
+ kmem_free(csp->cp_private, sizeof (vnd_strioc_t));
+ if (csp->cp_cmd != VND_STRIOC_ASSOCIATE) {
+ if (q->q_next != NULL) {
+ putnext(q, mp);
+ } else {
+ VND_STAT_INC(vsp, vks_ndlpidrops, 1);
+ VND_STAT_INC(vsp, vks_tdrops, 1);
+ vnd_drop_ctl(vsp, mp, "uknown cmd for M_IOCDATA");
+ }
+ return;
+ }
+
+ /* The nak is already sent for us */
+ if (csp->cp_rval != 0) {
+ vnd_drop_ctl(vsp, mp, "M_COPYOUT failed");
+ return;
+ }
+
+ /* Ack and let's be done with it all */
+ miocack(q, mp, 0, 0);
+}
+
+static int
+vnd_s_wput(queue_t *q, mblk_t *mp)
+{
+ vnd_str_t *vsp = q->q_ptr;
+ struct copyresp *crp;
+ vnd_strioc_state_t vstate;
+ vnd_strioc_t *visp;
+
+ switch (DB_TYPE(mp)) {
+ case M_IOCTL:
+ vnd_strioctl(q, vsp, mp, (struct iocblk *)mp->b_rptr);
+ return (0);
+ case M_IOCDATA:
+ crp = (struct copyresp *)mp->b_rptr;
+ ASSERT(crp->cp_private != NULL);
+ visp = (vnd_strioc_t *)crp->cp_private;
+ vstate = visp->vs_state;
+ ASSERT(vstate == VSS_COPYIN || vstate == VSS_COPYOUT);
+ if (vstate == VSS_COPYIN)
+ vnd_striocdata(q, vsp, mp,
+ (struct copyresp *)mp->b_rptr);
+ else
+ vnd_stroutdata(q, vsp, mp,
+ (struct copyresp *)mp->b_rptr);
+ return (0);
+ default:
+ break;
+ }
+ if (q->q_next != NULL)
+ putnext(q, mp);
+ else
+ vnd_drop_ctl(vsp, mp, "!M_IOCTL in wput");
+
+ return (0);
+}
+
+static int
+vnd_s_open(queue_t *q, dev_t *devp, int oflag, int sflag, cred_t *credp)
+{
+ vnd_str_t *vsp;
+ uint_t rand;
+
+ if (q->q_ptr != NULL)
+ return (EINVAL);
+
+ if (!(sflag & MODOPEN))
+ return (ENXIO);
+
+ if (credp != kcred)
+ return (EPERM);
+
+ vsp = kmem_cache_alloc(vnd_str_cache, KM_SLEEP);
+ bzero(vsp, sizeof (*vsp));
+ mutex_init(&vsp->vns_lock, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&vsp->vns_cancelcv, NULL, CV_DRIVER, NULL);
+ cv_init(&vsp->vns_barriercv, NULL, CV_DRIVER, NULL);
+ cv_init(&vsp->vns_stcv, NULL, CV_DRIVER, NULL);
+ vsp->vns_state = VNS_S_INITIAL;
+
+ mutex_init(&vsp->vns_dq_read.vdq_lock, NULL, MUTEX_DRIVER, NULL);
+ mutex_init(&vsp->vns_dq_write.vdq_lock, NULL, MUTEX_DRIVER, NULL);
+ mutex_enter(&vnd_dev_lock);
+ vsp->vns_dq_read.vdq_max = vnd_vdq_default_size;
+ vsp->vns_dq_read.vdq_vns = vsp;
+ vsp->vns_dq_write.vdq_max = vnd_vdq_default_size;
+ vsp->vns_dq_write.vdq_vns = vsp;
+ mutex_exit(&vnd_dev_lock);
+ vsp->vns_rq = q;
+ vsp->vns_wq = WR(q);
+ q->q_ptr = WR(q)->q_ptr = vsp;
+ vsp->vns_flags = VNS_F_NEED_ZONE;
+ vsp->vns_nflush = vnd_flush_nburst;
+ vsp->vns_bsize = vnd_flush_burst_size;
+
+ (void) random_get_pseudo_bytes((uint8_t *)&rand, sizeof (rand));
+ vsp->vns_squeue = gsqueue_set_get(vnd_sqset, rand);
+
+ /*
+ * We create our kstat and initialize all of its fields now, but we
+ * don't install it until we actually do the zone association so we can
+ * get everything.
+ */
+ kstat_named_init(&vsp->vns_ksdata.vks_rbytes, "rbytes",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_rpackets, "rpackets",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_obytes, "obytes",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_opackets, "opackets",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_nhookindrops, "nhookindrops",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_nhookoutdrops, "nhookoutdrops",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_ndlpidrops, "ndlpidrops",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_ndataindrops, "ndataindrops",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_ndataoutdrops, "ndataoutdrops",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_tdrops, "total_drops",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_linkname, "linkname",
+ KSTAT_DATA_STRING);
+ kstat_named_init(&vsp->vns_ksdata.vks_zonename, "zonename",
+ KSTAT_DATA_STRING);
+ kstat_named_init(&vsp->vns_ksdata.vks_nmacflow, "flowcontrol_events",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_tmacflow, "flowcontrol_time",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_mac_flow_1ms, "flowcontrol_1ms",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_mac_flow_10ms, "flowcontrol_10ms",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_mac_flow_100ms,
+ "flowcontrol_100ms", KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_mac_flow_1s, "flowcontrol_1s",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&vsp->vns_ksdata.vks_mac_flow_10s, "flowcontrol_10s",
+ KSTAT_DATA_UINT64);
+ qprocson(q);
+ /*
+ * Now that we've called qprocson, grab the lower module for making sure
+ * that we don't have any pass through modules.
+ */
+ vsp->vns_lrq = RD(vsp->vns_wq->q_next);
+
+ return (0);
+}
+
+static int
+vnd_s_close(queue_t *q, int flag, cred_t *credp)
+{
+ vnd_str_t *vsp;
+ mblk_t *mp;
+
+ VERIFY(WR(q)->q_next != NULL);
+
+ vsp = q->q_ptr;
+ ASSERT(vsp != NULL);
+
+ /*
+ * We need to transition ourselves down. This means that we have a few
+ * important different things to do in the process of tearing down our
+ * input and output buffers, making sure we've drained the current
+ * squeue, and disabling the fast path. Before we disable the fast path,
+ * we should make sure the squeue is drained. Because we're in streams
+ * close, we know that no packets can come into us from userland, but we
+ * can receive more. As such, the following is the exact order of things
+ * that we do:
+ *
+ * 1) flush the vns_dq_read
+ * 2) Insert the drain mblk
+ * 3) When it's been received, tear down the fast path by kicking
+ * off the state machine.
+ * 4) One final flush of both the vns_dq_read,vns_dq_write
+ */
+
+ vnd_dq_flush(&vsp->vns_dq_read, vnd_drop_in);
+ vnd_strbarrier(vsp);
+ mutex_enter(&vsp->vns_lock);
+ vsp->vns_state = VNS_S_SHUTTING_DOWN;
+ if (!(vsp->vns_flags & VNS_F_TASKQ_DISPATCHED)) {
+ vsp->vns_flags |= VNS_F_TASKQ_DISPATCHED;
+ taskq_dispatch_ent(vnd_taskq, vnd_dlpi_taskq_dispatch, vsp,
+ 0, &vsp->vns_tqe);
+ }
+ while (vsp->vns_state != VNS_S_ZOMBIE)
+ cv_wait(&vsp->vns_stcv, &vsp->vns_lock);
+ mutex_exit(&vsp->vns_lock);
+
+ qprocsoff(q);
+ mutex_enter(&vsp->vns_lock);
+ vsp->vns_flags |= VNS_F_CONDEMNED;
+ while (vsp->vns_flags & VNS_F_TASKQ_DISPATCHED)
+ cv_wait(&vsp->vns_cancelcv, &vsp->vns_lock);
+
+ while ((mp = vnd_dlpi_inc_pop(vsp)) != NULL)
+ vnd_drop_ctl(vsp, mp, "vnd_s_close");
+ mutex_exit(&vsp->vns_lock);
+
+ q->q_ptr = NULL;
+ vnd_dq_flush(&vsp->vns_dq_read, vnd_drop_in);
+ vnd_dq_flush(&vsp->vns_dq_write, vnd_drop_out);
+ mutex_destroy(&vsp->vns_dq_read.vdq_lock);
+ mutex_destroy(&vsp->vns_dq_write.vdq_lock);
+
+ if (vsp->vns_kstat != NULL)
+ kstat_delete(vsp->vns_kstat);
+ mutex_destroy(&vsp->vns_lock);
+ cv_destroy(&vsp->vns_stcv);
+ cv_destroy(&vsp->vns_barriercv);
+ cv_destroy(&vsp->vns_cancelcv);
+ kmem_cache_free(vnd_str_cache, vsp);
+
+ return (0);
+}
+
+static vnd_mac_cookie_t
+vnd_squeue_tx_one(vnd_str_t *vsp, mblk_t *mp)
+{
+ hrtime_t txtime;
+ vnd_mac_cookie_t vc;
+
+ VND_STAT_INC(vsp, vks_opackets, 1);
+ VND_STAT_INC(vsp, vks_obytes, msgsize(mp));
+ DTRACE_VND5(send, mblk_t *, mp, void *, NULL, void *, NULL,
+ vnd_str_t *, vsp, mblk_t *, mp);
+ /* Actually tx now */
+ txtime = gethrtime();
+ vc = vsp->vns_caps.vsc_tx_f(vsp->vns_caps.vsc_tx_hdl,
+ mp, 0, MAC_DROP_ON_NO_DESC);
+
+ /*
+ * We need to check two different conditions before we immediately set
+ * the flow control lock. The first thing that we need to do is verify
+ * that this is an instance of hard flow control, so to say. The flow
+ * control callbacks won't always fire in cases where we still get a
+ * cookie returned. The explicit check for flow control will guarantee
+ * us that we'll get a subsequent notification callback.
+ *
+ * The second case comes about because we do not hold the
+ * vnd_str_t`vns_lock across calls to tx, we need to determine if a flow
+ * control notification already came across for us in a different thread
+ * calling vnd_mac_flow_control(). To deal with this, we record a
+ * timestamp every time that we change the flow control state. We grab
+ * txtime here before we transmit because that guarantees that the
+ * hrtime_t of the call to vnd_mac_flow_control() will be after txtime.
+ *
+ * If the flow control notification beat us to the punch, the value of
+ * vns_fcupdate will be larger than the value of txtime, and we should
+ * just record the statistics. However, if we didn't beat it to the
+ * punch (txtime > vns_fcupdate), then we know that it's safe to wait
+ * for a notification.
+ */
+ if (vc != NULL) {
+ hrtime_t diff;
+
+ if (vsp->vns_caps.vsc_is_fc_f(vsp->vns_caps.vsc_is_fc_hdl,
+ vc) == 0)
+ return (NULL);
+ mutex_enter(&vsp->vns_lock);
+ diff = vsp->vns_fcupdate - txtime;
+ if (diff > 0) {
+ mutex_exit(&vsp->vns_lock);
+ vnd_mac_flow_control_stat(vsp, diff);
+ return (NULL);
+ }
+ vsp->vns_flags |= VNS_F_FLOW_CONTROLLED;
+ vsp->vns_caps.vsc_fc_cookie = vc;
+ vsp->vns_fclatch = txtime;
+ vsp->vns_fcupdate = txtime;
+ DTRACE_VND3(flow__blocked, vnd_str_t *, vsp,
+ uint64_t, vsp->vns_dq_write.vdq_cur, uintptr_t, vc);
+ mutex_exit(&vsp->vns_lock);
+ }
+
+ return (vc);
+}
+
+static void
+vnd_squeue_tx_drain(void *arg, mblk_t *drain_mp, gsqueue_t *gsp, void *dummy)
+{
+ mblk_t *mp;
+ int nmps;
+ size_t mptot, nflush, bsize;
+ boolean_t blocked, empty;
+ vnd_data_queue_t *vqp;
+ vnd_str_t *vsp = arg;
+
+ mutex_enter(&vsp->vns_lock);
+ /*
+ * We either enter here via an squeue or via vnd_squeue_tx_append(). In
+ * the former case we need to mark that there is no longer an active
+ * user of the drain block.
+ */
+ if (drain_mp != NULL) {
+ VERIFY(drain_mp == &vsp->vns_drainblk);
+ VERIFY(vsp->vns_flags & VNS_F_DRAIN_SCHEDULED);
+ vsp->vns_flags &= ~VNS_F_DRAIN_SCHEDULED;
+ }
+
+ /*
+ * If we're still flow controlled or under a flush barrier, nothing to
+ * do.
+ */
+ if (vsp->vns_flags & (VNS_F_FLOW_CONTROLLED | VNS_F_BARRIER)) {
+ mutex_exit(&vsp->vns_lock);
+ return;
+ }
+
+ nflush = vsp->vns_nflush;
+ bsize = vsp->vns_bsize;
+ mutex_exit(&vsp->vns_lock);
+
+ nmps = 0;
+ mptot = 0;
+ blocked = B_FALSE;
+ vqp = &vsp->vns_dq_write;
+ while (nmps < nflush && mptot <= bsize) {
+ mutex_enter(&vqp->vdq_lock);
+ if (vnd_dq_pop(vqp, &mp) == 0) {
+ mutex_exit(&vqp->vdq_lock);
+ break;
+ }
+ mutex_exit(&vqp->vdq_lock);
+
+ nmps++;
+ mptot += msgsize(mp);
+ if (vnd_squeue_tx_one(vsp, mp) != NULL) {
+ blocked = B_TRUE;
+ break;
+ }
+ }
+
+ empty = vnd_dq_is_empty(&vsp->vns_dq_write);
+
+ /*
+ * If the queue is not empty, we're not blocked, and there isn't a drain
+ * scheduled, put it into the squeue with the drain block and
+ * GSQUEUE_FILL.
+ */
+ if (blocked == B_FALSE && empty == B_FALSE) {
+ mutex_enter(&vsp->vns_lock);
+ if (!(vsp->vns_flags & VNS_F_DRAIN_SCHEDULED)) {
+ mblk_t *mp = &vsp->vns_drainblk;
+ vsp->vns_flags |= VNS_F_DRAIN_SCHEDULED;
+ gsqueue_enter_one(vsp->vns_squeue,
+ mp, vnd_squeue_tx_drain, vsp,
+ GSQUEUE_FILL, VND_SQUEUE_TAG_TX_DRAIN);
+ }
+ mutex_exit(&vsp->vns_lock);
+ }
+
+ /*
+ * If we drained some amount of data, we need to signal the data queue.
+ */
+ if (nmps > 0) {
+ cv_broadcast(&vsp->vns_dq_write.vdq_ready);
+ pollwakeup(&vsp->vns_dev->vdd_ph, POLLOUT);
+ }
+}
+
+static void
+vnd_squeue_tx_append(void *arg, mblk_t *mp, gsqueue_t *gsp, void *dummy)
+{
+ vnd_str_t *vsp = arg;
+ vnd_data_queue_t *vqp = &vsp->vns_dq_write;
+ vnd_pnsd_t *nsp = vsp->vns_nsd;
+ size_t len = msgsize(mp);
+
+ /*
+ * Before we append this packet, we should run it through the firewall
+ * rules.
+ */
+ if (nsp->vpnd_hooked && vnd_hook(vsp, &mp, nsp->vpnd_neti_v4,
+ nsp->vpnd_event_out_v4, nsp->vpnd_token_out_v4, nsp->vpnd_neti_v6,
+ nsp->vpnd_event_out_v6, nsp->vpnd_token_out_v6, vnd_drop_hook_out,
+ vnd_drop_out) != 0) {
+ /*
+ * Because we earlier reserved space for this packet and it's
+ * not making the cut, we need to go through and unreserve that
+ * space. Also note that the message block will likely be freed
+ * by the time we return from vnd_hook so we cannot rely on it.
+ */
+ mutex_enter(&vqp->vdq_lock);
+ vnd_dq_unreserve(vqp, len);
+ mutex_exit(&vqp->vdq_lock);
+ return;
+ }
+
+ /*
+ * We earlier reserved space for this packet. So for now simply append
+ * it and call drain. We know that no other drain can be going on right
+ * now thanks to the squeue.
+ */
+ mutex_enter(&vqp->vdq_lock);
+ (void) vnd_dq_push(&vsp->vns_dq_write, mp, B_TRUE, vnd_drop_panic);
+ mutex_exit(&vqp->vdq_lock);
+ vnd_squeue_tx_drain(vsp, NULL, NULL, NULL);
+}
+
+/*
+ * We need to see if this is a valid name of sorts for us. That means a few
+ * things. First off, we can't assume that what we've been given has actually
+ * been null terminated. More importantly, that it's a valid name as far as
+ * ddi_create_minor_node is concerned (that means no '@', '/', or ' '). We
+ * further constrain ourselves to simply alphanumeric characters and a few
+ * additional ones, ':', '-', and '_'.
+ */
+static int
+vnd_validate_name(const char *buf, size_t buflen)
+{
+ int i, len;
+
+ /* First make sure a null terminator exists */
+ for (i = 0; i < buflen; i++)
+ if (buf[i] == '\0')
+ break;
+ len = i;
+ if (i == 0 || i == buflen)
+ return (0);
+
+ for (i = 0; i < len; i++)
+ if (!isalnum(buf[i]) && buf[i] != ':' && buf[i] != '-' &&
+ buf[i] != '_')
+ return (0);
+
+ return (1);
+}
+
+static int
+vnd_ioctl_attach(vnd_dev_t *vdp, uintptr_t arg, cred_t *credp, int cpflag)
+{
+ vnd_ioc_attach_t via;
+ vnd_strioc_associate_t vss;
+ vnd_pnsd_t *nsp;
+ zone_t *zonep;
+ zoneid_t zid;
+ char buf[2*VND_NAMELEN];
+ int ret, rp;
+
+ if (secpolicy_net_config(credp, B_FALSE) != 0)
+ return (EPERM);
+
+ if (secpolicy_net_rawaccess(credp) != 0)
+ return (EPERM);
+
+ if (ddi_copyin((void *)arg, &via, sizeof (via), cpflag) != 0)
+ return (EFAULT);
+ via.via_errno = VND_E_SUCCESS;
+
+ if (vnd_validate_name(via.via_name, VND_NAMELEN) == 0) {
+ via.via_errno = VND_E_BADNAME;
+ ret = EIO;
+ goto errcopyout;
+ }
+
+ /*
+ * Only the global zone can request to create a device in a different
+ * zone.
+ */
+ zid = crgetzoneid(credp);
+ if (zid != GLOBAL_ZONEID && via.via_zoneid != -1 &&
+ zid != via.via_zoneid) {
+ via.via_errno = VND_E_PERM;
+ ret = EIO;
+ goto errcopyout;
+ }
+
+ if (via.via_zoneid == -1)
+ via.via_zoneid = zid;
+
+ /*
+ * Establish the name we'll use now. We want to be extra paranoid about
+ * the device we're opening so check that now.
+ */
+ if (zid == GLOBAL_ZONEID && via.via_zoneid != zid) {
+ zonep = zone_find_by_id(via.via_zoneid);
+ if (zonep == NULL) {
+ via.via_errno = VND_E_NOZONE;
+ ret = EIO;
+ goto errcopyout;
+ }
+ if (snprintf(NULL, 0, "/dev/net/zone/%s/%s", zonep->zone_name,
+ via.via_name) >= sizeof (buf)) {
+ zone_rele(zonep);
+ via.via_errno = VND_E_BADNAME;
+ ret = EIO;
+ goto errcopyout;
+ }
+ (void) snprintf(buf, sizeof (buf), "/dev/net/zone/%s/%s",
+ zonep->zone_name, via.via_name);
+ zone_rele(zonep);
+ zonep = NULL;
+ } else {
+ if (snprintf(NULL, 0, "/dev/net/%s", via.via_name) >=
+ sizeof (buf)) {
+ via.via_errno = VND_E_BADNAME;
+ ret = EIO;
+ goto errcopyout;
+ }
+ (void) snprintf(buf, sizeof (buf), "/dev/net/%s", via.via_name);
+ }
+
+ /*
+ * If our zone is dying then the netstack will have been removed from
+ * this list.
+ */
+ nsp = vnd_nsd_lookup_by_zid(via.via_zoneid);
+ if (nsp == NULL) {
+ via.via_errno = VND_E_NOZONE;
+ ret = EIO;
+ goto errcopyout;
+ }
+
+ /*
+ * Note we set the attached handle even though we haven't actually
+ * finished the process of attaching the ldi handle.
+ */
+ mutex_enter(&vdp->vdd_lock);
+ if (vdp->vdd_flags & (VND_D_ATTACHED | VND_D_ATTACH_INFLIGHT)) {
+ mutex_exit(&vdp->vdd_lock);
+ vnd_nsd_rele(nsp);
+ via.via_errno = VND_E_ATTACHED;
+ ret = EIO;
+ goto errcopyout;
+ }
+ vdp->vdd_flags |= VND_D_ATTACH_INFLIGHT;
+ ASSERT(vdp->vdd_cr == NULL);
+ crhold(credp);
+ vdp->vdd_cr = credp;
+ ASSERT(vdp->vdd_nsd == NULL);
+ vdp->vdd_nsd = nsp;
+ mutex_exit(&vdp->vdd_lock);
+
+ /*
+ * Place an additional hold on the vnd_pnsd_t as we go through and do
+ * all of the rest of our work. This will be the hold that we keep for
+ * as long as this thing is attached.
+ */
+ vnd_nsd_ref(nsp);
+
+ ret = ldi_open_by_name(buf, FREAD | FWRITE, vdp->vdd_cr,
+ &vdp->vdd_ldih, vdp->vdd_ldiid);
+ if (ret != 0) {
+ if (ret == ENODEV)
+ via.via_errno = VND_E_NODATALINK;
+ goto err;
+ }
+
+ /*
+ * Unfortunately the I_PUSH interface doesn't allow us a way to detect
+ * whether or not we're coming in from a layered device. We really want
+ * to make sure that a normal user can't push on our streams module.
+ * Currently the only idea I have for this is to make sure that the
+ * credp is kcred which is really terrible.
+ */
+ ret = ldi_ioctl(vdp->vdd_ldih, I_PUSH, (intptr_t)"vnd", FKIOCTL,
+ kcred, &rp);
+ if (ret != 0) {
+ rp = ldi_close(vdp->vdd_ldih, FREAD | FWRITE, vdp->vdd_cr);
+ VERIFY(rp == 0);
+ via.via_errno = VND_E_STRINIT;
+ ret = EIO;
+ goto err;
+ }
+
+ vss.vsa_minor = vdp->vdd_minor;
+ vss.vsa_nsid = nsp->vpnd_nsid;
+
+ ret = ldi_ioctl(vdp->vdd_ldih, VND_STRIOC_ASSOCIATE, (intptr_t)&vss,
+ FKIOCTL, kcred, &rp);
+ if (ret != 0 || vss.vsa_errno != VND_E_SUCCESS) {
+ rp = ldi_close(vdp->vdd_ldih, FREAD | FWRITE, vdp->vdd_cr);
+ VERIFY(rp == 0);
+ if (ret == 0) {
+ via.via_errno = vss.vsa_errno;
+ ret = EIO;
+ }
+ goto err;
+ }
+
+ mutex_enter(&vdp->vdd_nsd->vpnd_lock);
+
+ /*
+ * There's a chance that our netstack was condemned while we've had a
+ * hold on it. As such we need to check and if so, error out.
+ */
+ if (vdp->vdd_nsd->vpnd_flags & VND_NS_CONDEMNED) {
+ mutex_exit(&vdp->vdd_nsd->vpnd_lock);
+ rp = ldi_close(vdp->vdd_ldih, FREAD | FWRITE, vdp->vdd_cr);
+ VERIFY(rp == 0);
+ ret = EIO;
+ via.via_errno = VND_E_NOZONE;
+ goto err;
+ }
+
+ mutex_enter(&vdp->vdd_lock);
+ VERIFY(vdp->vdd_str != NULL);
+ vdp->vdd_flags &= ~VND_D_ATTACH_INFLIGHT;
+ vdp->vdd_flags |= VND_D_ATTACHED;
+ (void) strlcpy(vdp->vdd_datalink, via.via_name,
+ sizeof (vdp->vdd_datalink));
+ list_insert_tail(&vdp->vdd_nsd->vpnd_dev_list, vdp);
+ mutex_exit(&vdp->vdd_lock);
+ mutex_exit(&vdp->vdd_nsd->vpnd_lock);
+ vnd_nsd_rele(nsp);
+
+ return (0);
+
+err:
+ mutex_enter(&vdp->vdd_lock);
+ vdp->vdd_flags &= ~VND_D_ATTACH_INFLIGHT;
+ crfree(vdp->vdd_cr);
+ vdp->vdd_cr = NULL;
+ vdp->vdd_nsd = NULL;
+ mutex_exit(&vdp->vdd_lock);
+
+ /*
+ * We have two holds to drop here. One for our original reference and
+ * one for the hold this operation would have represented.
+ */
+ vnd_nsd_rele(nsp);
+ vnd_nsd_rele(nsp);
+errcopyout:
+ if (ddi_copyout(&via, (void *)arg, sizeof (via), cpflag) != 0)
+ ret = EFAULT;
+
+ return (ret);
+}
+
+static int
+vnd_ioctl_link(vnd_dev_t *vdp, intptr_t arg, cred_t *credp, int cpflag)
+{
+ int ret = 0;
+ vnd_ioc_link_t vil;
+ char mname[2*VND_NAMELEN];
+ char **c;
+ vnd_dev_t *v;
+ zoneid_t zid;
+
+ /* Not anyone can link something */
+ if (secpolicy_net_config(credp, B_FALSE) != 0)
+ return (EPERM);
+
+ if (ddi_copyin((void *)arg, &vil, sizeof (vil), cpflag) != 0)
+ return (EFAULT);
+
+ if (vnd_validate_name(vil.vil_name, VND_NAMELEN) == 0) {
+ ret = EIO;
+ vil.vil_errno = VND_E_BADNAME;
+ goto errcopyout;
+ }
+
+ c = vnd_reserved_names;
+ while (*c != NULL) {
+ if (strcmp(vil.vil_name, *c) == 0) {
+ ret = EIO;
+ vil.vil_errno = VND_E_BADNAME;
+ goto errcopyout;
+ }
+ c++;
+ }
+
+ mutex_enter(&vdp->vdd_lock);
+ if (!(vdp->vdd_flags & VND_D_ATTACHED)) {
+ mutex_exit(&vdp->vdd_lock);
+ ret = EIO;
+ vil.vil_errno = VND_E_NOTATTACHED;
+ goto errcopyout;
+ }
+
+ if (vdp->vdd_flags & VND_D_ZONE_DYING) {
+ mutex_exit(&vdp->vdd_lock);
+ ret = EIO;
+ vil.vil_errno = VND_E_NOZONE;
+ goto errcopyout;
+ }
+
+ if (vdp->vdd_flags & (VND_D_LINK_INFLIGHT | VND_D_LINKED)) {
+ mutex_exit(&vdp->vdd_lock);
+ ret = EIO;
+ vil.vil_errno = VND_E_LINKED;
+ goto errcopyout;
+ }
+ vdp->vdd_flags |= VND_D_LINK_INFLIGHT;
+ zid = vdp->vdd_nsd->vpnd_zid;
+ mutex_exit(&vdp->vdd_lock);
+
+ if (snprintf(NULL, 0, "z%d:%s", zid, vil.vil_name) >=
+ sizeof (mname)) {
+ ret = EIO;
+ vil.vil_errno = VND_E_BADNAME;
+ goto errcopyout;
+ }
+
+ mutex_enter(&vnd_dev_lock);
+ for (v = list_head(&vnd_dev_list); v != NULL;
+ v = list_next(&vnd_dev_list, v)) {
+ if (!(v->vdd_flags & VND_D_LINKED))
+ continue;
+
+ if (v->vdd_nsd->vpnd_zid == zid &&
+ strcmp(v->vdd_lname, vil.vil_name) == 0) {
+ mutex_exit(&vnd_dev_lock);
+ ret = EIO;
+ vil.vil_errno = VND_E_LINKEXISTS;
+ goto error;
+ }
+ }
+
+ /*
+ * We set the name and mark ourselves attached while holding the list
+ * lock to ensure that no other user can mistakingly find our name.
+ */
+ (void) snprintf(mname, sizeof (mname), "z%d:%s", zid,
+ vil.vil_name);
+ mutex_enter(&vdp->vdd_lock);
+
+ /*
+ * Because we dropped our lock, we need to double check whether or not
+ * the zone was marked as dying while we were here. If it hasn't, then
+ * it's safe for us to link it in.
+ */
+ if (vdp->vdd_flags & VND_D_ZONE_DYING) {
+ mutex_exit(&vdp->vdd_lock);
+ mutex_exit(&vnd_dev_lock);
+ ret = EIO;
+ vil.vil_errno = VND_E_NOZONE;
+ goto error;
+ }
+
+ (void) strlcpy(vdp->vdd_lname, vil.vil_name, sizeof (vdp->vdd_lname));
+ if (ddi_create_minor_node(vnd_dip, mname, S_IFCHR, vdp->vdd_minor,
+ DDI_PSEUDO, 0) != DDI_SUCCESS) {
+ ret = EIO;
+ vil.vil_errno = VND_E_MINORNODE;
+ } else {
+ vdp->vdd_flags &= ~VND_D_LINK_INFLIGHT;
+ vdp->vdd_flags |= VND_D_LINKED;
+ kstat_named_setstr(&vdp->vdd_str->vns_ksdata.vks_linkname,
+ vdp->vdd_lname);
+ ret = 0;
+ }
+ mutex_exit(&vdp->vdd_lock);
+ mutex_exit(&vnd_dev_lock);
+
+ if (ret == 0) {
+ /*
+ * Add a reference to represent that this device is linked into
+ * the file system name space to ensure that it doesn't
+ * disappear.
+ */
+ vnd_dev_ref(vdp);
+ return (0);
+ }
+
+error:
+ mutex_enter(&vdp->vdd_lock);
+ vdp->vdd_flags &= ~VND_D_LINK_INFLIGHT;
+ vdp->vdd_lname[0] = '\0';
+ mutex_exit(&vdp->vdd_lock);
+
+errcopyout:
+ if (ddi_copyout(&vil, (void *)arg, sizeof (vil), cpflag) != 0)
+ ret = EFAULT;
+ return (ret);
+}
+
+/*
+ * Common unlink function. This is used both from the ioctl path and from the
+ * netstack shutdown path. The caller is required to hold the mutex on the
+ * vnd_dev_t, but they basically will have it relinquished for them. The only
+ * thing the caller is allowed to do afterward is to potentially rele the
+ * vnd_dev_t if they have their own hold. Note that only the ioctl path has its
+ * own hold.
+ */
+static void
+vnd_dev_unlink(vnd_dev_t *vdp)
+{
+ char mname[2*VND_NAMELEN];
+
+ ASSERT(MUTEX_HELD(&vdp->vdd_lock));
+
+ (void) snprintf(mname, sizeof (mname), "z%d:%s",
+ vdp->vdd_nsd->vpnd_zid, vdp->vdd_lname);
+ ddi_remove_minor_node(vnd_dip, mname);
+ vdp->vdd_lname[0] = '\0';
+ vdp->vdd_flags &= ~VND_D_LINKED;
+ kstat_named_setstr(&vdp->vdd_str->vns_ksdata.vks_linkname,
+ vdp->vdd_lname);
+ mutex_exit(&vdp->vdd_lock);
+
+ /*
+ * This rele corresponds to the reference that we took in
+ * vnd_ioctl_link.
+ */
+ vnd_dev_rele(vdp);
+}
+
+static int
+vnd_ioctl_unlink(vnd_dev_t *vdp, intptr_t arg, cred_t *credp, int cpflag)
+{
+ int ret;
+ zoneid_t zid;
+ vnd_ioc_unlink_t viu;
+
+ /* Not anyone can unlink something */
+ if (secpolicy_net_config(credp, B_FALSE) != 0)
+ return (EPERM);
+
+ zid = crgetzoneid(credp);
+
+ if (ddi_copyin((void *)arg, &viu, sizeof (viu), cpflag) != 0)
+ return (EFAULT);
+
+ viu.viu_errno = VND_E_SUCCESS;
+
+ mutex_enter(&vdp->vdd_lock);
+ if (!(vdp->vdd_flags & VND_D_LINKED)) {
+ mutex_exit(&vdp->vdd_lock);
+ ret = EIO;
+ viu.viu_errno = VND_E_NOTLINKED;
+ goto err;
+ }
+ VERIFY(vdp->vdd_flags & VND_D_ATTACHED);
+
+ if (zid != GLOBAL_ZONEID && zid != vdp->vdd_nsd->vpnd_zid) {
+ mutex_exit(&vdp->vdd_lock);
+ ret = EIO;
+ viu.viu_errno = VND_E_PERM;
+ goto err;
+ }
+
+ /* vnd_dev_unlink releases the vdp mutex for us */
+ vnd_dev_unlink(vdp);
+ ret = 0;
+err:
+ if (ddi_copyout(&viu, (void *)arg, sizeof (viu), cpflag) != 0)
+ return (EFAULT);
+
+ return (ret);
+}
+
+static int
+vnd_ioctl_setrxbuf(vnd_dev_t *vdp, intptr_t arg, int cpflag)
+{
+ int ret;
+ vnd_ioc_buf_t vib;
+
+ if (ddi_copyin((void *)arg, &vib, sizeof (vib), cpflag) != 0)
+ return (EFAULT);
+
+ mutex_enter(&vnd_dev_lock);
+ if (vib.vib_size > vnd_vdq_hard_max) {
+ mutex_exit(&vnd_dev_lock);
+ vib.vib_errno = VND_E_BUFTOOBIG;
+ ret = EIO;
+ goto err;
+ }
+ mutex_exit(&vnd_dev_lock);
+
+ mutex_enter(&vdp->vdd_lock);
+ if (!(vdp->vdd_flags & VND_D_ATTACHED)) {
+ mutex_exit(&vdp->vdd_lock);
+ vib.vib_errno = VND_E_NOTATTACHED;
+ ret = EIO;
+ goto err;
+ }
+
+ mutex_enter(&vdp->vdd_str->vns_lock);
+ if (vib.vib_size < vdp->vdd_str->vns_minwrite) {
+ mutex_exit(&vdp->vdd_str->vns_lock);
+ mutex_exit(&vdp->vdd_lock);
+ vib.vib_errno = VND_E_BUFTOOSMALL;
+ ret = EIO;
+ goto err;
+ }
+
+ mutex_exit(&vdp->vdd_str->vns_lock);
+ mutex_enter(&vdp->vdd_str->vns_dq_read.vdq_lock);
+ vdp->vdd_str->vns_dq_read.vdq_max = vib.vib_size;
+ mutex_exit(&vdp->vdd_str->vns_dq_read.vdq_lock);
+ mutex_exit(&vdp->vdd_lock);
+ ret = 0;
+
+err:
+ if (ddi_copyout(&vib, (void *)arg, sizeof (vib), cpflag) != 0)
+ return (EFAULT);
+
+ return (ret);
+}
+
+static int
+vnd_ioctl_getrxbuf(vnd_dev_t *vdp, intptr_t arg, int cpflag)
+{
+ int ret;
+ vnd_ioc_buf_t vib;
+
+ mutex_enter(&vdp->vdd_lock);
+ if (!(vdp->vdd_flags & VND_D_ATTACHED)) {
+ mutex_exit(&vdp->vdd_lock);
+ vib.vib_errno = VND_E_NOTATTACHED;
+ ret = EIO;
+ goto err;
+ }
+
+ mutex_enter(&vdp->vdd_str->vns_dq_read.vdq_lock);
+ vib.vib_size = vdp->vdd_str->vns_dq_read.vdq_max;
+ mutex_exit(&vdp->vdd_str->vns_dq_read.vdq_lock);
+ mutex_exit(&vdp->vdd_lock);
+ ret = 0;
+
+err:
+ if (ddi_copyout(&vib, (void *)arg, sizeof (vib), cpflag) != 0)
+ return (EFAULT);
+
+ return (ret);
+}
+
+static int
+vnd_ioctl_getmaxbuf(vnd_dev_t *vdp, intptr_t arg, int cpflag)
+{
+ vnd_ioc_buf_t vib;
+
+ mutex_enter(&vnd_dev_lock);
+ vib.vib_size = vnd_vdq_hard_max;
+ mutex_exit(&vnd_dev_lock);
+
+ if (ddi_copyout(&vib, (void *)arg, sizeof (vib), cpflag) != 0)
+ return (EFAULT);
+
+ return (0);
+}
+
+static int
+vnd_ioctl_gettxbuf(vnd_dev_t *vdp, intptr_t arg, int cpflag)
+{
+ int ret;
+ vnd_ioc_buf_t vib;
+
+ mutex_enter(&vdp->vdd_lock);
+ if (!(vdp->vdd_flags & VND_D_ATTACHED)) {
+ mutex_exit(&vdp->vdd_lock);
+ vib.vib_errno = VND_E_NOTATTACHED;
+ ret = EIO;
+ goto err;
+ }
+
+ mutex_enter(&vdp->vdd_str->vns_dq_write.vdq_lock);
+ vib.vib_size = vdp->vdd_str->vns_dq_write.vdq_max;
+ mutex_exit(&vdp->vdd_str->vns_dq_write.vdq_lock);
+ mutex_exit(&vdp->vdd_lock);
+ ret = 0;
+
+err:
+ if (ddi_copyout(&vib, (void *)arg, sizeof (vib), cpflag) != 0)
+ return (EFAULT);
+
+ return (ret);
+}
+
+static int
+vnd_ioctl_settxbuf(vnd_dev_t *vdp, intptr_t arg, int cpflag)
+{
+ int ret;
+ vnd_ioc_buf_t vib;
+
+ if (ddi_copyin((void *)arg, &vib, sizeof (vib), cpflag) != 0)
+ return (EFAULT);
+
+ mutex_enter(&vnd_dev_lock);
+ if (vib.vib_size > vnd_vdq_hard_max) {
+ mutex_exit(&vnd_dev_lock);
+ vib.vib_errno = VND_E_BUFTOOBIG;
+ ret = EIO;
+ goto err;
+ }
+ mutex_exit(&vnd_dev_lock);
+
+ mutex_enter(&vdp->vdd_lock);
+ if (!(vdp->vdd_flags & VND_D_ATTACHED)) {
+ mutex_exit(&vdp->vdd_lock);
+ vib.vib_errno = VND_E_NOTATTACHED;
+ ret = EIO;
+ goto err;
+ }
+
+ mutex_enter(&vdp->vdd_str->vns_lock);
+ if (vib.vib_size < vdp->vdd_str->vns_minwrite) {
+ mutex_exit(&vdp->vdd_str->vns_lock);
+ mutex_exit(&vdp->vdd_lock);
+ vib.vib_errno = VND_E_BUFTOOSMALL;
+ ret = EIO;
+ goto err;
+ }
+ mutex_exit(&vdp->vdd_str->vns_lock);
+
+ mutex_enter(&vdp->vdd_str->vns_dq_write.vdq_lock);
+ vdp->vdd_str->vns_dq_write.vdq_max = vib.vib_size;
+ mutex_exit(&vdp->vdd_str->vns_dq_write.vdq_lock);
+ mutex_exit(&vdp->vdd_lock);
+ ret = 0;
+
+err:
+ if (ddi_copyout(&vib, (void *)arg, sizeof (vib), cpflag) != 0)
+ return (EFAULT);
+
+ return (ret);
+}
+
+static int
+vnd_ioctl_gettu(vnd_dev_t *vdp, intptr_t arg, int mode, boolean_t min)
+{
+ vnd_ioc_buf_t vib;
+
+ vib.vib_errno = 0;
+ mutex_enter(&vdp->vdd_lock);
+ if (vdp->vdd_flags & VND_D_ATTACHED) {
+ mutex_enter(&vdp->vdd_str->vns_lock);
+ if (min == B_TRUE)
+ vib.vib_size = vdp->vdd_str->vns_minwrite;
+ else
+ vib.vib_size = vdp->vdd_str->vns_maxwrite;
+ mutex_exit(&vdp->vdd_str->vns_lock);
+ } else {
+ vib.vib_errno = VND_E_NOTATTACHED;
+ }
+ mutex_exit(&vdp->vdd_lock);
+
+ if (ddi_copyout(&vib, (void *)arg, sizeof (vib), mode & FKIOCTL) != 0)
+ return (EFAULT);
+
+ return (0);
+}
+
+static int
+vnd_frameio_read(vnd_dev_t *vdp, intptr_t addr, int mode)
+{
+ int ret, nonblock, nwrite;
+ frameio_t *fio;
+ vnd_data_queue_t *vqp;
+ mblk_t *mp;
+
+ fio = frameio_alloc(KM_NOSLEEP | KM_NORMALPRI);
+ if (fio == NULL)
+ return (EAGAIN);
+
+ ret = frameio_hdr_copyin(fio, FRAMEIO_NVECS_MAX, (const void *)addr,
+ mode);
+ if (ret != 0) {
+ frameio_free(fio);
+ return (ret);
+ }
+
+ mutex_enter(&vdp->vdd_lock);
+ if (!(vdp->vdd_flags & VND_D_ATTACHED)) {
+ mutex_exit(&vdp->vdd_lock);
+ frameio_free(fio);
+ return (ENXIO);
+ }
+ mutex_exit(&vdp->vdd_lock);
+
+ nonblock = mode & (FNONBLOCK | FNDELAY);
+
+ vqp = &vdp->vdd_str->vns_dq_read;
+ mutex_enter(&vqp->vdq_lock);
+
+ /* Check empty case */
+ if (vqp->vdq_cur == 0) {
+ if (nonblock != 0) {
+ mutex_exit(&vqp->vdq_lock);
+ frameio_free(fio);
+ return (EWOULDBLOCK);
+ }
+ while (vqp->vdq_cur == 0) {
+ if (cv_wait_sig(&vqp->vdq_ready, &vqp->vdq_lock) <= 0) {
+ mutex_exit(&vqp->vdq_lock);
+ frameio_free(fio);
+ return (EINTR);
+ }
+ }
+ }
+
+ ret = frameio_mblk_chain_write(fio, MAP_BLK_FRAME, vqp->vdq_head,
+ &nwrite, mode & FKIOCTL);
+ if (ret != 0) {
+ mutex_exit(&vqp->vdq_lock);
+ frameio_free(fio);
+ return (ret);
+ }
+
+ ret = frameio_hdr_copyout(fio, nwrite, (void *)addr, mode);
+ if (ret != 0) {
+ mutex_exit(&vqp->vdq_lock);
+ frameio_free(fio);
+ return (ret);
+ }
+
+ while (nwrite > 0) {
+ (void) vnd_dq_pop(vqp, &mp);
+ freemsg(mp);
+ nwrite--;
+ }
+ mutex_exit(&vqp->vdq_lock);
+ frameio_free(fio);
+
+ return (0);
+}
+
+static int
+vnd_frameio_write(vnd_dev_t *vdp, intptr_t addr, int mode)
+{
+ frameio_t *fio;
+ int ret, nonblock, nframes, i, nread;
+ size_t maxwrite, minwrite, total, flen;
+ mblk_t *mp_chain, *mp, *nmp;
+ vnd_data_queue_t *vqp;
+
+ fio = frameio_alloc(KM_NOSLEEP | KM_NORMALPRI);
+ if (fio == NULL)
+ return (EAGAIN);
+
+ ret = frameio_hdr_copyin(fio, FRAMEIO_NVECS_MAX, (void *)addr, mode);
+ if (ret != 0) {
+ frameio_free(fio);
+ return (ret);
+ }
+
+ mutex_enter(&vdp->vdd_lock);
+ if (!(vdp->vdd_flags & VND_D_ATTACHED)) {
+ mutex_exit(&vdp->vdd_lock);
+ frameio_free(fio);
+ return (ENXIO);
+ }
+ mutex_exit(&vdp->vdd_lock);
+
+ nonblock = mode & (FNONBLOCK | FNDELAY);
+
+ /*
+ * Make sure no single frame is larger than we can accept.
+ */
+ mutex_enter(&vdp->vdd_str->vns_lock);
+ minwrite = vdp->vdd_str->vns_minwrite;
+ maxwrite = vdp->vdd_str->vns_maxwrite;
+ mutex_exit(&vdp->vdd_str->vns_lock);
+
+ nframes = fio->fio_nvpf / fio->fio_nvecs;
+ total = 0;
+ for (i = 0; i < nframes; i++) {
+ flen = frameio_frame_length(fio,
+ &fio->fio_vecs[i*fio->fio_nvpf]);
+ if (flen < minwrite || flen > maxwrite) {
+ frameio_free(fio);
+ return (ERANGE);
+ }
+ total += flen;
+ }
+
+ vqp = &vdp->vdd_str->vns_dq_write;
+ mutex_enter(&vqp->vdq_lock);
+ while (vnd_dq_reserve(vqp, total) == 0) {
+ if (nonblock != 0) {
+ frameio_free(fio);
+ mutex_exit(&vqp->vdq_lock);
+ return (EAGAIN);
+ }
+ if (cv_wait_sig(&vqp->vdq_ready, &vqp->vdq_lock) <= 0) {
+ mutex_exit(&vqp->vdq_lock);
+ frameio_free(fio);
+ return (EINTR);
+ }
+ }
+ mutex_exit(&vqp->vdq_lock);
+
+ /*
+ * We've reserved our space, let's copyin and go from here.
+ */
+ ret = frameio_mblk_chain_read(fio, &mp_chain, &nread, mode & FKIOCTL);
+ if (ret != 0) {
+ frameio_free(fio);
+ vnd_dq_unreserve(vqp, total);
+ cv_broadcast(&vqp->vdq_ready);
+ pollwakeup(&vdp->vdd_ph, POLLOUT);
+ return (ret);
+ }
+
+ for (mp = mp_chain; mp != NULL; mp = nmp) {
+ nmp = mp->b_next;
+ mp->b_next = NULL;
+ gsqueue_enter_one(vdp->vdd_str->vns_squeue, mp,
+ vnd_squeue_tx_append, vdp->vdd_str, GSQUEUE_PROCESS,
+ VND_SQUEUE_TAG_VND_WRITE);
+ }
+
+ /*
+ * Update the frameio structure to indicate that we wrote those frames.
+ */
+ frameio_mark_consumed(fio, nread);
+ ret = frameio_hdr_copyout(fio, nread, (void *)addr, mode);
+ frameio_free(fio);
+
+ return (ret);
+}
+
+static int
+vnd_ioctl_list_copy_info(vnd_dev_t *vdp, vnd_ioc_info_t *arg, int mode)
+{
+ const char *link;
+ uint32_t vers = 1;
+ ASSERT(MUTEX_HELD(&vdp->vdd_lock));
+
+ /*
+ * Copy all of the members out to userland.
+ */
+ if (ddi_copyout(&vers, &arg->vii_version, sizeof (uint32_t),
+ mode & FKIOCTL) != 0)
+ return (EFAULT);
+
+ if (vdp->vdd_flags & VND_D_LINKED)
+ link = vdp->vdd_lname;
+ else
+ link = "<anonymous>";
+ if (ddi_copyout(link, arg->vii_name, sizeof (arg->vii_name),
+ mode & FKIOCTL) != 0)
+ return (EFAULT);
+
+ if (ddi_copyout(vdp->vdd_datalink, arg->vii_datalink,
+ sizeof (arg->vii_datalink), mode & FKIOCTL) != 0)
+ return (EFAULT);
+
+ if (ddi_copyout(&vdp->vdd_nsd->vpnd_zid, &arg->vii_zone,
+ sizeof (zoneid_t), mode & FKIOCTL) != 0)
+ return (EFAULT);
+ return (0);
+}
+
+static int
+vnd_ioctl_list(intptr_t arg, cred_t *credp, int mode)
+{
+ vnd_ioc_list_t vl;
+ vnd_ioc_list32_t vl32;
+ zoneid_t zid;
+ vnd_dev_t *vdp;
+ vnd_ioc_info_t *vip;
+ int found, cancopy, ret;
+
+ if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
+ if (ddi_copyin((void *)arg, &vl32, sizeof (vnd_ioc_list32_t),
+ mode & FKIOCTL) != 0)
+ return (EFAULT);
+ vl.vl_nents = vl32.vl_nents;
+ vl.vl_actents = vl32.vl_actents;
+ vl.vl_ents = (void *)(uintptr_t)vl32.vl_ents;
+ } else {
+ if (ddi_copyin((void *)arg, &vl, sizeof (vnd_ioc_list_t),
+ mode & FKIOCTL) != 0)
+ return (EFAULT);
+ }
+
+ cancopy = vl.vl_nents;
+ vip = vl.vl_ents;
+ found = 0;
+ zid = crgetzoneid(credp);
+ mutex_enter(&vnd_dev_lock);
+ for (vdp = list_head(&vnd_dev_list); vdp != NULL;
+ vdp = list_next(&vnd_dev_list, vdp)) {
+ mutex_enter(&vdp->vdd_lock);
+ if (vdp->vdd_flags & VND_D_ATTACHED &&
+ !(vdp->vdd_flags & (VND_D_CONDEMNED | VND_D_ZONE_DYING)) &&
+ (zid == GLOBAL_ZONEID || zid == vdp->vdd_nsd->vpnd_zid)) {
+ found++;
+ if (cancopy > 0) {
+ ret = vnd_ioctl_list_copy_info(vdp, vip, mode);
+ if (ret != 0) {
+ mutex_exit(&vdp->vdd_lock);
+ mutex_exit(&vnd_dev_lock);
+ return (ret);
+ }
+ cancopy--;
+ vip++;
+ }
+ }
+ mutex_exit(&vdp->vdd_lock);
+ }
+ mutex_exit(&vnd_dev_lock);
+
+ if (ddi_copyout(&found, &((vnd_ioc_list_t *)arg)->vl_actents,
+ sizeof (uint_t), mode & FKIOCTL) != 0)
+ return (EFAULT);
+
+ return (0);
+}
+
+
+static int
+vnd_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
+ int *rvalp)
+{
+ int ret;
+ minor_t m;
+ vnd_dev_t *vdp;
+
+ m = getminor(dev);
+ ASSERT(m != 0);
+
+ /*
+ * Make sure no one has come in on an ioctl from the strioc case.
+ */
+ if ((cmd & VND_STRIOC) == VND_STRIOC)
+ return (ENOTTY);
+
+ /*
+ * Like close, seems like if this minor isn't found, it's a programmer
+ * error somehow.
+ */
+ vdp = vnd_dev_lookup(m);
+ if (vdp == NULL)
+ return (ENXIO);
+
+ switch (cmd) {
+ case VND_IOC_ATTACH:
+ if (!(mode & FWRITE)) {
+ ret = EBADF;
+ break;
+ }
+ ret = vnd_ioctl_attach(vdp, arg, credp, mode);
+ break;
+ case VND_IOC_LINK:
+ if (!(mode & FWRITE)) {
+ ret = EBADF;
+ break;
+ }
+ ret = vnd_ioctl_link(vdp, arg, credp, mode);
+ break;
+ case VND_IOC_UNLINK:
+ if (!(mode & FWRITE)) {
+ ret = EBADF;
+ break;
+ }
+ ret = vnd_ioctl_unlink(vdp, arg, credp, mode);
+ break;
+ case VND_IOC_GETRXBUF:
+ if (!(mode & FREAD)) {
+ ret = EBADF;
+ break;
+ }
+ ret = vnd_ioctl_getrxbuf(vdp, arg, mode);
+ break;
+ case VND_IOC_SETRXBUF:
+ if (!(mode & FWRITE)) {
+ ret = EBADF;
+ break;
+ }
+ ret = vnd_ioctl_setrxbuf(vdp, arg, mode);
+ break;
+ case VND_IOC_GETTXBUF:
+ if (!(mode & FREAD)) {
+ ret = EBADF;
+ break;
+ }
+ ret = vnd_ioctl_gettxbuf(vdp, arg, mode);
+ break;
+ case VND_IOC_SETTXBUF:
+ if (!(mode & FWRITE)) {
+ ret = EBADF;
+ break;
+ }
+ ret = vnd_ioctl_settxbuf(vdp, arg, mode);
+ break;
+ case VND_IOC_GETMAXBUF:
+ if (!(mode & FREAD)) {
+ ret = EBADF;
+ break;
+ }
+ if (crgetzoneid(credp) != GLOBAL_ZONEID) {
+ ret = EPERM;
+ break;
+ }
+ ret = vnd_ioctl_getmaxbuf(vdp, arg, mode);
+ break;
+ case VND_IOC_GETMINTU:
+ if (!(mode & FREAD)) {
+ ret = EBADF;
+ break;
+ }
+ ret = vnd_ioctl_gettu(vdp, arg, mode, B_TRUE);
+ break;
+ case VND_IOC_GETMAXTU:
+ if (!(mode & FREAD)) {
+ ret = EBADF;
+ break;
+ }
+ ret = vnd_ioctl_gettu(vdp, arg, mode, B_FALSE);
+ break;
+ case VND_IOC_FRAMEIO_READ:
+ if (!(mode & FREAD)) {
+ ret = EBADF;
+ break;
+ }
+ ret = vnd_frameio_read(vdp, arg, mode);
+ break;
+ case VND_IOC_FRAMEIO_WRITE:
+ if (!(mode & FWRITE)) {
+ ret = EBADF;
+ break;
+ }
+ ret = vnd_frameio_write(vdp, arg, mode);
+ break;
+ case VND_IOC_LIST:
+ if (!(mode & FREAD)) {
+ ret = EBADF;
+ break;
+ }
+ ret = vnd_ioctl_list(arg, credp, mode);
+ break;
+ default:
+ ret = ENOTTY;
+ break;
+ }
+
+ vnd_dev_rele(vdp);
+ return (ret);
+}
+
+static int
+vnd_open(dev_t *devp, int flag, int otyp, cred_t *credp)
+{
+ vnd_dev_t *vdp;
+ minor_t m;
+ zoneid_t zid;
+
+ if (flag & (FEXCL | FNDELAY))
+ return (ENOTSUP);
+
+ if (otyp & OTYP_BLK)
+ return (ENOTSUP);
+
+ zid = crgetzoneid(credp);
+ m = getminor(*devp);
+
+ /*
+ * If we have an open of a non-zero instance then we need to look that
+ * up in our list of entries.
+ */
+ if (m != 0) {
+
+ /*
+ * We don't check for rawaccess globally as a user could be
+ * doing a list ioctl on the control node which doesn't require
+ * this privilege.
+ */
+ if (secpolicy_net_rawaccess(credp) != 0)
+ return (EPERM);
+
+
+ vdp = vnd_dev_lookup(m);
+ if (vdp == NULL)
+ return (ENOENT);
+
+ /*
+ * We need to check to make sure that the user is allowed to
+ * open this node. At this point it should be an attached handle
+ * as that's all we're allowed to access.
+ */
+ mutex_enter(&vdp->vdd_lock);
+ if (!(vdp->vdd_flags & VND_D_LINKED)) {
+ mutex_exit(&vdp->vdd_lock);
+ vnd_dev_rele(vdp);
+ return (ENOENT);
+ }
+
+ if (vdp->vdd_flags & VND_D_ZONE_DYING) {
+ mutex_exit(&vdp->vdd_lock);
+ vnd_dev_rele(vdp);
+ return (ENOENT);
+ }
+
+ if (zid != GLOBAL_ZONEID && zid != vdp->vdd_nsd->vpnd_zid) {
+ mutex_exit(&vdp->vdd_lock);
+ vnd_dev_rele(vdp);
+ return (ENOENT);
+ }
+
+ if ((flag & FEXCL) && (vdp->vdd_flags & VND_D_OPENED)) {
+ mutex_exit(&vdp->vdd_lock);
+ vnd_dev_rele(vdp);
+ return (EBUSY);
+ }
+
+ if (!(vdp->vdd_flags & VND_D_OPENED)) {
+ vdp->vdd_flags |= VND_D_OPENED;
+ vdp->vdd_ref++;
+ DTRACE_VND_REFINC(vdp);
+ }
+ mutex_exit(&vdp->vdd_lock);
+ vnd_dev_rele(vdp);
+
+ return (0);
+ }
+
+ if (flag & FEXCL)
+ return (ENOTSUP);
+
+ /*
+ * We need to clone ourselves and set up new a state.
+ */
+ vdp = kmem_cache_alloc(vnd_dev_cache, KM_SLEEP);
+ bzero(vdp, sizeof (vnd_dev_t));
+
+ if (ldi_ident_from_dev(*devp, &vdp->vdd_ldiid) != 0) {
+ kmem_cache_free(vnd_dev_cache, vdp);
+ return (EINVAL);
+ }
+
+ vdp->vdd_minor = id_alloc(vnd_minors);
+ mutex_init(&vdp->vdd_lock, NULL, MUTEX_DRIVER, NULL);
+ list_link_init(&vdp->vdd_link);
+ vdp->vdd_ref = 1;
+ *devp = makedevice(getmajor(*devp), vdp->vdd_minor);
+ vdp->vdd_devid = *devp;
+ DTRACE_VND_REFINC(vdp);
+ vdp->vdd_flags |= VND_D_OPENED;
+
+ mutex_enter(&vnd_dev_lock);
+ list_insert_head(&vnd_dev_list, vdp);
+ mutex_exit(&vnd_dev_lock);
+
+ return (0);
+}
+
+static int
+vnd_close(dev_t dev, int flag, int otyp, cred_t *credp)
+{
+ minor_t m;
+ vnd_dev_t *vdp;
+
+ m = getminor(dev);
+ if (m == 0)
+ return (ENXIO);
+
+ vdp = vnd_dev_lookup(m);
+ if (vdp == NULL)
+ return (ENXIO);
+
+ mutex_enter(&vdp->vdd_lock);
+ VERIFY(vdp->vdd_flags & VND_D_OPENED);
+ vdp->vdd_flags &= ~VND_D_OPENED;
+ mutex_exit(&vdp->vdd_lock);
+
+ /* Remove the hold from the previous open. */
+ vnd_dev_rele(vdp);
+
+ /* And now from lookup */
+ vnd_dev_rele(vdp);
+ return (0);
+}
+
+static int
+vnd_read(dev_t dev, struct uio *uiop, cred_t *credp)
+{
+ int nonblock, error = 0;
+ size_t mpsize;
+ vnd_dev_t *vdp;
+ vnd_data_queue_t *vqp;
+ mblk_t *mp = NULL;
+ offset_t u_loffset;
+
+ /*
+ * If we have more than one uio we refuse to do anything. That's for
+ * frameio.
+ */
+ if (uiop->uio_iovcnt > 1)
+ return (EINVAL);
+
+ vdp = vnd_dev_lookup(getminor(dev));
+ if (vdp == NULL)
+ return (ENXIO);
+
+ mutex_enter(&vdp->vdd_lock);
+ if (!(vdp->vdd_flags & VND_D_ATTACHED)) {
+ mutex_exit(&vdp->vdd_lock);
+ vnd_dev_rele(vdp);
+ return (ENXIO);
+ }
+ mutex_exit(&vdp->vdd_lock);
+ nonblock = uiop->uio_fmode & (FNONBLOCK | FNDELAY);
+
+ vqp = &vdp->vdd_str->vns_dq_read;
+ mutex_enter(&vqp->vdq_lock);
+
+ /* Check empty case */
+ if (vqp->vdq_cur == 0) {
+ if (nonblock != 0) {
+ error = EWOULDBLOCK;
+ goto err;
+ }
+ while (vqp->vdq_cur == 0) {
+ if (cv_wait_sig(&vqp->vdq_ready, &vqp->vdq_lock) <= 0) {
+ error = EINTR;
+ goto err;
+ }
+ }
+ }
+
+ /* Ensure our buffer is big enough */
+ mp = vqp->vdq_head;
+ ASSERT(mp != NULL);
+ mpsize = msgsize(mp);
+ if (mpsize > uiop->uio_resid) {
+ error = EOVERFLOW;
+ goto err;
+ }
+
+ u_loffset = uiop->uio_loffset;
+ while (mp != NULL) {
+ if (uiomove(mp->b_rptr, MBLKL(mp), UIO_READ, uiop) != 0) {
+ error = EFAULT;
+ uiop->uio_loffset = u_loffset;
+ mp = NULL;
+ goto err;
+ }
+ mpsize -= MBLKL(mp);
+ mp = mp->b_cont;
+ }
+ ASSERT(mpsize == 0);
+ (void) vnd_dq_pop(vqp, &mp);
+ freemsg(mp);
+err:
+ mutex_exit(&vqp->vdq_lock);
+ vnd_dev_rele(vdp);
+
+ return (error);
+}
+
+static int
+vnd_write(dev_t dev, struct uio *uiop, cred_t *credp)
+{
+ int nonblock, error;
+ vnd_dev_t *vdp;
+ mblk_t *mp;
+ ssize_t iosize, origsize;
+ vnd_data_queue_t *vqp;
+
+ if (uiop->uio_iovcnt > 1)
+ return (EINVAL);
+
+ vdp = vnd_dev_lookup(getminor(dev));
+ if (vdp == NULL)
+ return (ENXIO);
+
+ mutex_enter(&vdp->vdd_lock);
+ if (!(vdp->vdd_flags & VND_D_ATTACHED)) {
+ mutex_exit(&vdp->vdd_lock);
+ vnd_dev_rele(vdp);
+ return (ENXIO);
+ }
+ mutex_exit(&vdp->vdd_lock);
+ nonblock = uiop->uio_fmode & (FNONBLOCK | FNDELAY);
+
+ mutex_enter(&vdp->vdd_str->vns_lock);
+ if (uiop->uio_resid > vdp->vdd_str->vns_maxwrite ||
+ uiop->uio_resid < vdp->vdd_str->vns_minwrite) {
+ mutex_exit(&vdp->vdd_str->vns_lock);
+ vnd_dev_rele(vdp);
+ return (ERANGE);
+ }
+ mutex_exit(&vdp->vdd_str->vns_lock);
+ VERIFY(vdp->vdd_str != NULL);
+
+ /*
+ * Reserve space in the data queue if we can. If we can't, block or
+ * return EAGAIN. If we can, go and squeue_enter.
+ */
+ vqp = &vdp->vdd_str->vns_dq_write;
+ mutex_enter(&vqp->vdq_lock);
+ while (vnd_dq_reserve(vqp, uiop->uio_resid) == 0) {
+ if (nonblock != 0) {
+ mutex_exit(&vqp->vdq_lock);
+ vnd_dev_rele(vdp);
+ return (EAGAIN);
+ }
+ if (cv_wait_sig(&vqp->vdq_ready, &vqp->vdq_lock) <= 0) {
+ mutex_exit(&vqp->vdq_lock);
+ vnd_dev_rele(vdp);
+ return (EINTR);
+ }
+ }
+ mutex_exit(&vqp->vdq_lock);
+
+ /*
+ * Now that we've reserved the space, try to allocate kernel space for
+ * and copy in the block. To take care of all this we use the
+ * strmakedata subroutine for now.
+ */
+ origsize = iosize = uiop->uio_resid;
+ error = strmakedata(&iosize, uiop, vdp->vdd_str->vns_wq->q_stream, 0,
+ &mp);
+
+ /*
+ * strmakedata() will return an error or it may only consume a portion
+ * of the data.
+ */
+ if (error != 0 || uiop->uio_resid != 0) {
+ vnd_dq_unreserve(vqp, origsize);
+ cv_broadcast(&vqp->vdq_ready);
+ pollwakeup(&vdp->vdd_ph, POLLOUT);
+ vnd_dev_rele(vdp);
+ return (ENOSR);
+ }
+
+ gsqueue_enter_one(vdp->vdd_str->vns_squeue, mp,
+ vnd_squeue_tx_append, vdp->vdd_str, GSQUEUE_PROCESS,
+ VND_SQUEUE_TAG_VND_WRITE);
+
+ vnd_dev_rele(vdp);
+ return (0);
+}
+
+static int
+vnd_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
+ struct pollhead **phpp)
+{
+ int ready = 0;
+ vnd_dev_t *vdp;
+ vnd_data_queue_t *vqp;
+
+ vdp = vnd_dev_lookup(getminor(dev));
+ if (vdp == NULL)
+ return (ENXIO);
+
+ mutex_enter(&vdp->vdd_lock);
+ if (!(vdp->vdd_flags & VND_D_ATTACHED)) {
+ mutex_exit(&vdp->vdd_lock);
+ vnd_dev_rele(vdp);
+ return (ENXIO);
+ }
+ mutex_exit(&vdp->vdd_lock);
+
+ if ((events & POLLIN) || (events & POLLRDNORM)) {
+ vqp = &vdp->vdd_str->vns_dq_read;
+ mutex_enter(&vqp->vdq_lock);
+ if (vqp->vdq_head != NULL)
+ ready |= events & (POLLIN | POLLRDNORM);
+ mutex_exit(&vqp->vdq_lock);
+ }
+
+ if (events & POLLOUT) {
+ vqp = &vdp->vdd_str->vns_dq_write;
+ mutex_enter(&vqp->vdq_lock);
+ if (vqp->vdq_cur != vqp->vdq_max)
+ ready |= POLLOUT;
+ mutex_exit(&vqp->vdq_lock);
+ }
+
+ if (ready != 0) {
+ *reventsp = ready;
+ vnd_dev_rele(vdp);
+ return (0);
+ }
+
+ *reventsp = 0;
+ if (!anyyet)
+ *phpp = &vdp->vdd_ph;
+
+ vnd_dev_rele(vdp);
+ return (0);
+}
+
+static void *
+vnd_stack_init(netstackid_t stackid, netstack_t *ns)
+{
+ vnd_pnsd_t *nsp;
+
+ nsp = kmem_cache_alloc(vnd_pnsd_cache, KM_SLEEP);
+ bzero(nsp, sizeof (*nsp));
+ nsp->vpnd_nsid = stackid;
+ nsp->vpnd_zid = netstackid_to_zoneid(stackid);
+ nsp->vpnd_flags = 0;
+ mutex_init(&nsp->vpnd_lock, NULL, MUTEX_DRIVER, NULL);
+ list_create(&nsp->vpnd_dev_list, sizeof (vnd_dev_t),
+ offsetof(vnd_dev_t, vdd_nslink));
+ if (vnd_netinfo_init(nsp) == 0)
+ nsp->vpnd_hooked = B_TRUE;
+
+ mutex_enter(&vnd_dev_lock);
+ list_insert_tail(&vnd_nsd_list, nsp);
+ mutex_exit(&vnd_dev_lock);
+
+ return (nsp);
+}
+
+static void
+vnd_stack_shutdown(netstackid_t stackid, void *arg)
+{
+ vnd_pnsd_t *nsp = arg;
+ vnd_dev_t *vdp;
+
+ ASSERT(nsp != NULL);
+ /*
+ * After shut down no one should be able to find their way to this
+ * netstack again.
+ */
+ mutex_enter(&vnd_dev_lock);
+ list_remove(&vnd_nsd_list, nsp);
+ mutex_exit(&vnd_dev_lock);
+
+ /*
+ * Make sure hooks know that they're going away.
+ */
+ if (nsp->vpnd_hooked == B_TRUE)
+ vnd_netinfo_shutdown(nsp);
+
+ /*
+ * Now we need to go through and notify each zone that they are in
+ * teardown phase. See the big theory statement section on vnd, zones,
+ * netstacks, and sdev for more information about this.
+ */
+ mutex_enter(&nsp->vpnd_lock);
+ nsp->vpnd_flags |= VND_NS_CONDEMNED;
+ for (vdp = list_head(&nsp->vpnd_dev_list); vdp != NULL;
+ vdp = list_next(&nsp->vpnd_dev_list, vdp)) {
+ mutex_enter(&vdp->vdd_lock);
+ if (!(vdp->vdd_flags & VND_D_CONDEMNED))
+ vdp->vdd_flags |= VND_D_ZONE_DYING;
+ mutex_exit(&vdp->vdd_lock);
+ }
+ mutex_exit(&nsp->vpnd_lock);
+
+ /*
+ * Next we remove all the links as we know nothing new can be added to
+ * the list and that none of the extent devices can obtain additional
+ * links.
+ */
+restart:
+ mutex_enter(&nsp->vpnd_lock);
+ for (vdp = list_head(&nsp->vpnd_dev_list); vdp != NULL;
+ vdp = list_next(&nsp->vpnd_dev_list, vdp)) {
+ mutex_enter(&vdp->vdd_lock);
+ if ((vdp->vdd_flags & VND_D_CONDEMNED) ||
+ !(vdp->vdd_flags & VND_D_LINKED)) {
+ mutex_exit(&vdp->vdd_lock);
+ continue;
+ }
+
+ /*
+ * We drop our lock here and restart afterwards. Note that as
+ * part of unlinking we end up doing a rele of the vnd_dev_t. If
+ * this is the final hold on the vnd_dev_t then it might try and
+ * remove itself. Our locking rules requires not to be holding
+ * any locks when we call any of the rele functions.
+ *
+ * Note that the unlink function requires holders to call into
+ * it with the vnd_dev_t->vdd_lock held and will take care of it
+ * for us. Because we don't have a hold on it, we're done at
+ * this point.
+ */
+ mutex_exit(&nsp->vpnd_lock);
+ /* Forcibly unlink */
+ vnd_dev_unlink(vdp);
+ goto restart;
+ }
+ mutex_exit(&nsp->vpnd_lock);
+}
+
+static void
+vnd_stack_destroy(netstackid_t stackid, void *arg)
+{
+ vnd_pnsd_t *nsp = arg;
+
+ ASSERT(nsp != NULL);
+
+ /*
+ * Now that we've unlinked everything we just have to hang out for
+ * it to finish exiting. Now that it's no longer the kernel itself
+ * that's doing this we just need to wait for our reference count to
+ * equal zero and then we're free. If the global zone is holding open a
+ * reference to a vnd device for another zone, that's bad, but there's
+ * nothing much we can do. See the section on 'vnd, zones, netstacks' in
+ * the big theory statement for more information.
+ */
+ mutex_enter(&nsp->vpnd_lock);
+ while (nsp->vpnd_ref != 0)
+ cv_wait(&nsp->vpnd_ref_change, &nsp->vpnd_lock);
+ mutex_exit(&nsp->vpnd_lock);
+
+ /*
+ * During shutdown we removed ourselves from the list and now we have no
+ * more references so we can safely say that there is nothing left and
+ * destroy everything that we had sitting around.
+ */
+ if (nsp->vpnd_hooked == B_TRUE)
+ vnd_netinfo_fini(nsp);
+
+ mutex_destroy(&nsp->vpnd_lock);
+ list_destroy(&nsp->vpnd_dev_list);
+ kmem_cache_free(vnd_pnsd_cache, nsp);
+}
+
+/*
+ * Convert a node with a name of the form /dev/vnd/zone/%zonename and
+ * /dev/vnd/zone/%zonename/%linkname to the corresponding vnd netstack.
+ */
+static vnd_pnsd_t *
+vnd_sdev_ctx_to_ns(sdev_ctx_t ctx)
+{
+ enum vtype vt;
+ const char *path = sdev_ctx_path(ctx);
+ char *zstart, *dup;
+ size_t duplen;
+ vnd_pnsd_t *nsp;
+
+ vt = sdev_ctx_vtype(ctx);
+ ASSERT(strncmp(path, VND_SDEV_ZROOT, strlen(VND_SDEV_ZROOT)) == 0);
+
+ if (vt == VDIR) {
+ zstart = strrchr(path, '/');
+ ASSERT(zstart != NULL);
+ zstart++;
+ return (vnd_nsd_lookup_by_zonename(zstart));
+ }
+
+ ASSERT(vt == VCHR);
+
+ dup = strdup(path);
+ duplen = strlen(dup) + 1;
+ zstart = strrchr(dup, '/');
+ *zstart = '\0';
+ zstart--;
+ zstart = strrchr(dup, '/');
+ zstart++;
+ nsp = vnd_nsd_lookup_by_zonename(zstart);
+ kmem_free(dup, duplen);
+
+ return (nsp);
+}
+
+static sdev_plugin_validate_t
+vnd_sdev_validate_dir(sdev_ctx_t ctx)
+{
+ vnd_pnsd_t *nsp;
+
+ if (strcmp(sdev_ctx_path(ctx), VND_SDEV_ROOT) == 0)
+ return (SDEV_VTOR_VALID);
+
+ if (strcmp(sdev_ctx_path(ctx), VND_SDEV_ZROOT) == 0) {
+ ASSERT(getzoneid() == GLOBAL_ZONEID);
+ ASSERT(sdev_ctx_flags(ctx) & SDEV_CTX_GLOBAL);
+ return (SDEV_VTOR_VALID);
+ }
+
+ nsp = vnd_sdev_ctx_to_ns(ctx);
+ if (nsp == NULL)
+ return (SDEV_VTOR_INVALID);
+ vnd_nsd_rele(nsp);
+
+ return (SDEV_VTOR_VALID);
+}
+
+static sdev_plugin_validate_t
+vnd_sdev_validate(sdev_ctx_t ctx)
+{
+ enum vtype vt;
+ dev_t dev;
+ vnd_dev_t *vdp;
+
+ vt = sdev_ctx_vtype(ctx);
+ if (vt == VDIR)
+ return (vnd_sdev_validate_dir(ctx));
+ ASSERT(vt == VCHR);
+
+ if (strcmp("ctl", sdev_ctx_name(ctx)) == 0)
+ return (SDEV_VTOR_VALID);
+
+ dev = (uintptr_t)sdev_ctx_vtype_data(ctx);
+ vdp = vnd_dev_lookup(getminor(dev));
+ if (vdp == NULL)
+ return (SDEV_VTOR_STALE);
+
+ mutex_enter(&vdp->vdd_lock);
+ if (!(vdp->vdd_flags & VND_D_LINKED) ||
+ (vdp->vdd_flags & (VND_D_CONDEMNED | VND_D_ZONE_DYING))) {
+ mutex_exit(&vdp->vdd_lock);
+ vnd_dev_rele(vdp);
+ return (SDEV_VTOR_STALE);
+ }
+
+ if (strcmp(sdev_ctx_name(ctx), vdp->vdd_lname) != 0) {
+ mutex_exit(&vdp->vdd_lock);
+ vnd_dev_rele(vdp);
+ return (SDEV_VTOR_STALE);
+ }
+
+ mutex_exit(&vdp->vdd_lock);
+ vnd_dev_rele(vdp);
+ return (SDEV_VTOR_VALID);
+}
+
+/*
+ * This function is a no-op. sdev never has holds on our devices as they can go
+ * away at any time and specfs has to deal with that fact.
+ */
+static void
+vnd_sdev_inactive(sdev_ctx_t ctx)
+{
+}
+
+static int
+vnd_sdev_fillzone(vnd_pnsd_t *nsp, sdev_ctx_t ctx)
+{
+ int ret;
+ vnd_dev_t *vdp;
+
+ mutex_enter(&nsp->vpnd_lock);
+ for (vdp = list_head(&nsp->vpnd_dev_list); vdp != NULL;
+ vdp = list_next(&nsp->vpnd_dev_list, vdp)) {
+ mutex_enter(&vdp->vdd_lock);
+ if ((vdp->vdd_flags & VND_D_LINKED) &&
+ !(vdp->vdd_flags & (VND_D_CONDEMNED | VND_D_ZONE_DYING))) {
+ ret = sdev_plugin_mknod(ctx, vdp->vdd_lname, S_IFCHR,
+ vdp->vdd_devid);
+ if (ret != 0 && ret != EEXIST) {
+ mutex_exit(&vdp->vdd_lock);
+ mutex_exit(&nsp->vpnd_lock);
+ vnd_nsd_rele(nsp);
+ return (ret);
+ }
+ }
+ mutex_exit(&vdp->vdd_lock);
+ }
+ mutex_exit(&nsp->vpnd_lock);
+
+ return (0);
+}
+
+static int
+vnd_sdev_filldir_root(sdev_ctx_t ctx)
+{
+ zoneid_t zid;
+ vnd_pnsd_t *nsp;
+ int ret;
+
+ zid = getzoneid();
+ nsp = vnd_nsd_lookup(zoneid_to_netstackid(zid));
+ ASSERT(nsp != NULL);
+ ret = vnd_sdev_fillzone(nsp, ctx);
+ vnd_nsd_rele(nsp);
+ if (ret != 0)
+ return (ret);
+
+ /*
+ * Checking the zone id is not sufficient as the global zone could be
+ * reaching down into a non-global zone's mounted /dev.
+ */
+ if (zid == GLOBAL_ZONEID && (sdev_ctx_flags(ctx) & SDEV_CTX_GLOBAL)) {
+ ret = sdev_plugin_mkdir(ctx, "zone");
+ if (ret != 0 && ret != EEXIST)
+ return (ret);
+ }
+
+ /*
+ * Always add a reference to the control node. There's no need to
+ * reference it since it always exists and is always what we clone from.
+ */
+ ret = sdev_plugin_mknod(ctx, "ctl", S_IFCHR,
+ makedevice(ddi_driver_major(vnd_dip), 0));
+ if (ret != 0 && ret != EEXIST)
+ return (ret);
+
+ return (0);
+}
+
+static int
+vnd_sdev_filldir_zroot(sdev_ctx_t ctx)
+{
+ int ret;
+ vnd_pnsd_t *nsp;
+ zone_t *zonep;
+
+ ASSERT(getzoneid() == GLOBAL_ZONEID);
+ ASSERT(sdev_ctx_flags(ctx) & SDEV_CTX_GLOBAL);
+
+ mutex_enter(&vnd_dev_lock);
+ for (nsp = list_head(&vnd_nsd_list); nsp != NULL;
+ nsp = list_next(&vnd_nsd_list, nsp)) {
+ mutex_enter(&nsp->vpnd_lock);
+ if (list_is_empty(&nsp->vpnd_dev_list)) {
+ mutex_exit(&nsp->vpnd_lock);
+ continue;
+ }
+ mutex_exit(&nsp->vpnd_lock);
+ zonep = zone_find_by_id(nsp->vpnd_zid);
+ /*
+ * This zone must be being torn down, so skip it.
+ */
+ if (zonep == NULL)
+ continue;
+ ret = sdev_plugin_mkdir(ctx, zonep->zone_name);
+ zone_rele(zonep);
+ if (ret != 0 && ret != EEXIST) {
+ mutex_exit(&vnd_dev_lock);
+ return (ret);
+ }
+ }
+ mutex_exit(&vnd_dev_lock);
+ return (0);
+}
+
+static int
+vnd_sdev_filldir(sdev_ctx_t ctx)
+{
+ int ret;
+ vnd_pnsd_t *nsp;
+
+ ASSERT(sdev_ctx_vtype(ctx) == VDIR);
+ if (strcmp(VND_SDEV_ROOT, sdev_ctx_path(ctx)) == 0)
+ return (vnd_sdev_filldir_root(ctx));
+
+ if (strcmp(VND_SDEV_ZROOT, sdev_ctx_path(ctx)) == 0)
+ return (vnd_sdev_filldir_zroot(ctx));
+
+ ASSERT(strncmp(VND_SDEV_ZROOT, sdev_ctx_path(ctx),
+ strlen(VND_SDEV_ZROOT)) == 0);
+ nsp = vnd_sdev_ctx_to_ns(ctx);
+ if (nsp == NULL)
+ return (0);
+
+ ret = vnd_sdev_fillzone(nsp, ctx);
+ vnd_nsd_rele(nsp);
+
+ return (ret);
+}
+
+static sdev_plugin_ops_t vnd_sdev_ops = {
+ SDEV_PLUGIN_VERSION,
+ SDEV_PLUGIN_SUBDIR,
+ vnd_sdev_validate,
+ vnd_sdev_filldir,
+ vnd_sdev_inactive
+};
+
+static int
+vnd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int errp = 0;
+
+ if (cmd != DDI_ATTACH)
+ return (DDI_FAILURE);
+
+ /*
+ * Only allow one instance.
+ */
+ if (vnd_dip != NULL)
+ return (DDI_FAILURE);
+
+ vnd_dip = dip;
+ if (ddi_create_minor_node(vnd_dip, "vnd", S_IFCHR, 0, DDI_PSEUDO, 0) !=
+ DDI_SUCCESS) {
+ vnd_dip = NULL;
+ return (DDI_FAILURE);
+ }
+
+ if (ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
+ DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) {
+ ddi_remove_minor_node(vnd_dip, NULL);
+ vnd_dip = NULL;
+ return (DDI_FAILURE);
+ }
+
+ vnd_sdev_hdl = sdev_plugin_register(VND_SDEV_NAME, &vnd_sdev_ops,
+ &errp);
+ if (vnd_sdev_hdl == NULL) {
+ ddi_remove_minor_node(vnd_dip, NULL);
+ ddi_prop_remove_all(vnd_dip);
+ vnd_dip = NULL;
+ return (DDI_FAILURE);
+ }
+
+ vnd_sqset = gsqueue_set_create(GSQUEUE_DEFAULT_WAIT,
+ GSQUEUE_DEFAULT_PRIORITY);
+
+ return (DDI_SUCCESS);
+}
+
+static int
+vnd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ if (cmd != DDI_DETACH)
+ return (DDI_FAILURE);
+
+ mutex_enter(&vnd_dev_lock);
+ if (!list_is_empty(&vnd_dev_list)) {
+ mutex_exit(&vnd_dev_lock);
+ return (DDI_FAILURE);
+ }
+ mutex_exit(&vnd_dev_lock);
+
+ return (DDI_FAILURE);
+}
+
+static int
+vnd_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
+{
+ int error;
+
+ switch (cmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ *result = (void *)vnd_dip;
+ error = DDI_SUCCESS;
+ break;
+ case DDI_INFO_DEVT2INSTANCE:
+ *result = (void *)0;
+ error = DDI_SUCCESS;
+ break;
+ default:
+ error = DDI_FAILURE;
+ break;
+ }
+ return (error);
+}
+
+
+
+static void
+vnd_ddi_fini(void)
+{
+ netstack_unregister(NS_VND);
+ if (vnd_taskq != NULL)
+ taskq_destroy(vnd_taskq);
+ if (vnd_str_cache != NULL)
+ kmem_cache_destroy(vnd_str_cache);
+ if (vnd_dev_cache != NULL)
+ kmem_cache_destroy(vnd_dev_cache);
+ if (vnd_pnsd_cache != NULL)
+ kmem_cache_destroy(vnd_pnsd_cache);
+ if (vnd_minors != NULL)
+ id_space_destroy(vnd_minors);
+ if (vnd_list_init != 0) {
+ list_destroy(&vnd_nsd_list);
+ list_destroy(&vnd_dev_list);
+ mutex_destroy(&vnd_dev_lock);
+ vnd_list_init = 0;
+ }
+ frameio_fini();
+}
+
+static int
+vnd_ddi_init(void)
+{
+ if (frameio_init() != 0)
+ return (DDI_FAILURE);
+
+ vnd_str_cache = kmem_cache_create("vnd_str_cache", sizeof (vnd_str_t),
+ 0, NULL, NULL, NULL, NULL, NULL, 0);
+ if (vnd_str_cache == NULL) {
+ frameio_fini();
+ return (DDI_FAILURE);
+ }
+ vnd_dev_cache = kmem_cache_create("vnd_dev_cache", sizeof (vnd_dev_t),
+ 0, NULL, NULL, NULL, NULL, NULL, 0);
+ if (vnd_dev_cache == NULL) {
+ kmem_cache_destroy(vnd_str_cache);
+ frameio_fini();
+ return (DDI_FAILURE);
+ }
+ vnd_pnsd_cache = kmem_cache_create("vnd_pnsd_cache",
+ sizeof (vnd_pnsd_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+ if (vnd_pnsd_cache == NULL) {
+ kmem_cache_destroy(vnd_dev_cache);
+ kmem_cache_destroy(vnd_str_cache);
+ frameio_fini();
+ return (DDI_FAILURE);
+ }
+
+ vnd_taskq = taskq_create_instance("vnd", -1, 1, minclsyspri, 0, 0, 0);
+ if (vnd_taskq == NULL) {
+ kmem_cache_destroy(vnd_pnsd_cache);
+ kmem_cache_destroy(vnd_dev_cache);
+ kmem_cache_destroy(vnd_str_cache);
+ frameio_fini();
+ return (DDI_FAILURE);
+ }
+
+ vnd_minors = id_space_create("vnd_minors", 1, INT32_MAX);
+ if (vnd_minors == NULL) {
+ taskq_destroy(vnd_taskq);
+ kmem_cache_destroy(vnd_pnsd_cache);
+ kmem_cache_destroy(vnd_dev_cache);
+ kmem_cache_destroy(vnd_str_cache);
+ frameio_fini();
+ return (DDI_FAILURE);
+ }
+
+ mutex_init(&vnd_dev_lock, NULL, MUTEX_DRIVER, NULL);
+ list_create(&vnd_dev_list, sizeof (vnd_dev_t),
+ offsetof(vnd_dev_t, vdd_link));
+ list_create(&vnd_nsd_list, sizeof (vnd_pnsd_t),
+ offsetof(vnd_pnsd_t, vpnd_link));
+ vnd_list_init = 1;
+
+ netstack_register(NS_VND, vnd_stack_init, vnd_stack_shutdown,
+ vnd_stack_destroy);
+
+ return (DDI_SUCCESS);
+}
+
+static struct module_info vnd_minfo = {
+ 0, /* module id */
+ "vnd", /* module name */
+ 1, /* smallest packet size */
+ INFPSZ, /* largest packet size (infinite) */
+ 1, /* high watermark */
+ 0 /* low watermark */
+};
+
+static struct qinit vnd_r_qinit = {
+ vnd_s_rput,
+ NULL,
+ vnd_s_open,
+ vnd_s_close,
+ NULL,
+ &vnd_minfo,
+ NULL
+};
+
+static struct qinit vnd_w_qinit = {
+ vnd_s_wput,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ &vnd_minfo,
+ NULL
+};
+
+static struct streamtab vnd_strtab = {
+ &vnd_r_qinit,
+ &vnd_w_qinit,
+ NULL,
+ NULL
+};
+
+
+static struct cb_ops vnd_cb_ops = {
+ vnd_open, /* open */
+ vnd_close, /* close */
+ nulldev, /* strategy */
+ nulldev, /* print */
+ nodev, /* dump */
+ vnd_read, /* read */
+ vnd_write, /* write */
+ vnd_ioctl, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ vnd_chpoll, /* poll */
+ ddi_prop_op, /* cb_prop_op */
+ NULL, /* streamtab */
+ D_MP /* Driver compatibility flag */
+};
+
+static struct dev_ops vnd_dev_ops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* refcnt */
+ vnd_info, /* get_dev_info */
+ nulldev, /* identify */
+ nulldev, /* probe */
+ vnd_attach, /* attach */
+ vnd_detach, /* detach */
+ nodev, /* reset */
+ &vnd_cb_ops, /* driver operations */
+ NULL, /* bus operations */
+ nodev, /* dev power */
+ ddi_quiesce_not_needed /* quiesce */
+};
+
+static struct modldrv vnd_modldrv = {
+ &mod_driverops,
+ "Virtual Networking Datapath Driver",
+ &vnd_dev_ops
+};
+
+static struct fmodsw vnd_fmodfsw = {
+ "vnd",
+ &vnd_strtab,
+ D_NEW | D_MP
+};
+
+static struct modlstrmod vnd_modlstrmod = {
+ &mod_strmodops,
+ "Virtual Networking Datapath Driver",
+ &vnd_fmodfsw
+};
+
+static struct modlinkage vnd_modlinkage = {
+ MODREV_1,
+ &vnd_modldrv,
+ &vnd_modlstrmod,
+ NULL
+};
+
+int
+_init(void)
+{
+ int error;
+
+ /*
+ * We need to do all of our global initialization in init as opposed to
+ * attach and detach. The problem here is that because vnd can be used
+ * from a stream context while being detached, we can not rely on having
+ * run attach to create everything, alas. so it goes in _init, just like
+ * our friend ip.
+ */
+ if ((error = vnd_ddi_init()) != DDI_SUCCESS)
+ return (error);
+ error = mod_install((&vnd_modlinkage));
+ if (error != 0)
+ vnd_ddi_fini();
+ return (error);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&vnd_modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int error;
+
+ error = mod_remove(&vnd_modlinkage);
+ if (error == 0)
+ vnd_ddi_fini();
+ return (error);
+}
diff --git a/usr/src/uts/common/io/vnd/vnd.conf b/usr/src/uts/common/io/vnd/vnd.conf
new file mode 100644
index 0000000000..65872e1ddf
--- /dev/null
+++ b/usr/src/uts/common/io/vnd/vnd.conf
@@ -0,0 +1,16 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014, Joyent, Inc. All rights reserved.
+#
+
+name="vnd" parent="pseudo" instance=0;
diff --git a/usr/src/uts/common/io/vnic/vnic_dev.c b/usr/src/uts/common/io/vnic/vnic_dev.c
index 3cacbe395b..3cb7e7660a 100644
--- a/usr/src/uts/common/io/vnic/vnic_dev.c
+++ b/usr/src/uts/common/io/vnic/vnic_dev.c
@@ -53,6 +53,7 @@
#include <sys/vlan.h>
#include <sys/vnic.h>
#include <sys/vnic_impl.h>
+#include <sys/mac_impl.h>
#include <sys/mac_flow_impl.h>
#include <inet/ip_impl.h>
@@ -369,6 +370,7 @@ vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid,
bzero(vnic, sizeof (*vnic));
+ vnic->vn_ls = LINK_STATE_UNKNOWN;
vnic->vn_id = vnic_id;
vnic->vn_link_id = linkid;
vnic->vn_vrid = vrid;
@@ -579,11 +581,12 @@ vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid,
vnic->vn_enabled = B_TRUE;
if (is_anchor) {
- mac_link_update(vnic->vn_mh, LINK_STATE_UP);
+ vnic->vn_ls = LINK_STATE_UP;
} else {
- mac_link_update(vnic->vn_mh,
- mac_client_stat_get(vnic->vn_mch, MAC_STAT_LINK_STATE));
+ vnic->vn_ls = mac_client_stat_get(vnic->vn_mch,
+ MAC_STAT_LINK_STATE);
}
+ mac_link_update(vnic->vn_mh, vnic->vn_ls);
rw_exit(&vnic_lock);
@@ -1072,6 +1075,18 @@ vnic_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
err = mac_maxsdu_update(vn->vn_mh, mtu);
break;
}
+ case MAC_PROP_VN_PROMISC_FILTERED: {
+ boolean_t filtered;
+
+ if (pr_valsize < sizeof (filtered)) {
+ err = EINVAL;
+ break;
+ }
+
+ bcopy(pr_val, &filtered, sizeof (filtered));
+ mac_set_promisc_filtered(vn->vn_mch, filtered);
+ break;
+ }
case MAC_PROP_SECONDARY_ADDRS: {
mac_secondary_addr_t msa;
@@ -1079,6 +1094,34 @@ vnic_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
err = vnic_set_secondary_macs(vn, &msa);
break;
}
+ case MAC_PROP_PRIVATE: {
+ long val, i;
+ const char *v;
+
+ if (vn->vn_link_id != DATALINK_INVALID_LINKID ||
+ strcmp(pr_name, "_linkstate") != 0) {
+ err = ENOTSUP;
+ break;
+ }
+
+ for (v = pr_val, i = 0; i < pr_valsize; i++, v++) {
+ if (*v == '\0')
+ break;
+ }
+ if (i == pr_valsize) {
+ err = EINVAL;
+ break;
+ }
+
+ (void) ddi_strtol(pr_val, (char **)NULL, 0, &val);
+ if (val != LINK_STATE_UP && val != LINK_STATE_DOWN) {
+ err = EINVAL;
+ break;
+ }
+ vn->vn_ls = val;
+ mac_link_update(vn->vn_mh, vn->vn_ls);
+ break;
+ }
default:
err = ENOTSUP;
break;
@@ -1093,11 +1136,29 @@ vnic_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
{
vnic_t *vn = arg;
int ret = 0;
+ boolean_t out;
switch (pr_num) {
+ case MAC_PROP_VN_PROMISC_FILTERED:
+ out = mac_get_promisc_filtered(vn->vn_mch);
+ ASSERT(pr_valsize >= sizeof (boolean_t));
+ bcopy(&out, pr_val, sizeof (boolean_t));
+ break;
case MAC_PROP_SECONDARY_ADDRS:
ret = vnic_get_secondary_macs(vn, pr_valsize, pr_val);
break;
+ case MAC_PROP_PRIVATE:
+ if (vn->vn_link_id != DATALINK_INVALID_LINKID) {
+ ret = EINVAL;
+ break;
+ }
+
+ if (strcmp(pr_name, "_linkstate") != 0) {
+ ret = EINVAL;
+ break;
+ }
+ (void) snprintf(pr_val, pr_valsize, "%d", vn->vn_ls);
+ break;
default:
ret = ENOTSUP;
break;
@@ -1107,7 +1168,8 @@ vnic_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
}
/* ARGSUSED */
-static void vnic_m_propinfo(void *m_driver, const char *pr_name,
+static void
+vnic_m_propinfo(void *m_driver, const char *pr_name,
mac_prop_id_t pr_num, mac_prop_info_handle_t prh)
{
vnic_t *vn = m_driver;
@@ -1150,6 +1212,18 @@ static void vnic_m_propinfo(void *m_driver, const char *pr_name,
mac_perim_exit(mph);
}
break;
+ case MAC_PROP_PRIVATE:
+ if (vn->vn_link_id != DATALINK_INVALID_LINKID)
+ break;
+
+ if (strcmp(pr_name, "_linkstate") == 0) {
+ char buf[16];
+
+ mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW);
+ (void) snprintf(buf, sizeof (buf), "%d", vn->vn_ls);
+ mac_prop_info_set_default_str(prh, buf);
+ }
+ break;
}
}
@@ -1222,8 +1296,9 @@ vnic_notify_cb(void *arg, mac_notify_type_t type)
break;
case MAC_NOTE_LINK:
- mac_link_update(vnic->vn_mh,
- mac_client_stat_get(vnic->vn_mch, MAC_STAT_LINK_STATE));
+ vnic->vn_ls = mac_client_stat_get(vnic->vn_mch,
+ MAC_STAT_LINK_STATE);
+ mac_link_update(vnic->vn_mh, vnic->vn_ls);
break;
default:
diff --git a/usr/src/uts/common/io/vscan/vscan_svc.c b/usr/src/uts/common/io/vscan/vscan_svc.c
index a9817f571f..92eb0901c2 100644
--- a/usr/src/uts/common/io/vscan/vscan_svc.c
+++ b/usr/src/uts/common/io/vscan/vscan_svc.c
@@ -22,6 +22,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright (c) 2015, Joyent, Inc.
*/
#include <sys/stat.h>
@@ -461,7 +462,7 @@ vscan_svc_scan_file(vnode_t *vp, cred_t *cr, int async)
boolean_t allow;
clock_t timeout, time_left;
- if ((vp == NULL) || (vp->v_path == NULL) || cr == NULL)
+ if ((vp == NULL) || (vp->v_path == vn_vpath_empty) || cr == NULL)
return (0);
DTRACE_PROBE2(vscan__scan__file, char *, vp->v_path, int, async);
@@ -1080,7 +1081,6 @@ vscan_svc_exempt_file(vnode_t *vp, boolean_t *allow)
struct vattr attr;
ASSERT(vp != NULL);
- ASSERT(vp->v_path != NULL);
attr.va_mask = AT_SIZE;
diff --git a/usr/src/uts/common/io/zfd.c b/usr/src/uts/common/io/zfd.c
new file mode 100644
index 0000000000..2da310ab8d
--- /dev/null
+++ b/usr/src/uts/common/io/zfd.c
@@ -0,0 +1,1154 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Zone File Descriptor Driver.
+ *
+ * This driver is derived from the zcons driver which is in turn derived from
+ * the pts/ptm drivers. The purpose is to expose file descriptors within the
+ * zone which are connected to zoneadmd and used for logging or an interactive
+ * connection to a process within the zone.
+ *
+ * Its implementation is straightforward. Each instance of the driver
+ * represents a global-zone/local-zone pair. Unlike the zcons device, zoneadmd
+ * uses these devices unidirectionally to provide stdin, stdout and stderr to
+ * the process within the zone.
+ *
+ * Instances of zfd are onlined as children of /pseudo/zfdnex@2/ by zoneadmd,
+ * using the devctl framework; thus the driver does not need to maintain any
+ * sort of "admin" node.
+ *
+ * The driver shuttles I/O from master side to slave side and back. In a break
+ * from the pts/ptm semantics, if one side is not open, I/O directed towards
+ * it will simply be discarded. This is so that if zoneadmd is not holding the
+ * master side fd open (i.e. it has died somehow), processes in the zone do not
+ * experience any errors and I/O to the fd does not cause the process to hang.
+ *
+ * The driver can also act as a multiplexer so that data written to the
+ * slave side within the zone is also redirected back to another zfd device
+ * inside the zone for consumption (i.e. it can be read). The intention is
+ * that a logging process within the zone can consume data that is being
+ * written by an application onto the primary stream. This is essentially
+ * a tee off of the primary stream into a log stream. This tee can also be
+ * configured to be flow controlled via an ioctl. Flow control happens on the
+ * primary stream and is used to ensure that the log stream receives all of
+ * the messages off the primary stream when consumption of the data off of
+ * the log stream gets behind. Configuring for flow control implies that the
+ * application writing to the primary stream will be blocked when the log
+ * consumer gets behind. Note that closing the log stream (e.g. when the zone
+ * halts) will cause the loss of all messages queued in the stream.
+ *
+ * The zone's zfd device configuration is driven by zoneadmd and a zone mode.
+ * The mode, which is controlled by the zone attribute "zlog-mode" is somewhat
+ * of a misnomer since its purpose has evolved. The attribute can have a
+ * variety of values, but the lowest two positions are used to control how many
+ * zfd devices are created inside the zone and if the primary stream is a tty.
+ *
+ * Here is a summary of how the 4 modes control what zfd devices are created
+ * and how they're used:
+ *
+ * t-: 1 stdio zdev (0) configured as a tty
+ * --: 3 stdio zdevs (0, 1, 2), not configured as a tty
+ * tn: 1 stdio zdev (0) configured as a tty, 1 additional zdev (1)
+ * -n: 3 stdio zdevs (0, 1, 2), not tty, 2 additional zdevs (3, 4)
+ *
+ * With the 't' flag set, stdin/out/err is multiplexed onto a single full-duplex
+ * stream which is configured as a tty. That is, ptem, ldterm and ttycompat are
+ * autopushed onto the stream when the slave side is opened. There is only a
+ * single zfd dev (0) needed for the primary stream.
+ *
+ * When the 'n' flag is set, it is assumed that output logging will be done
+ * within the zone itself. In this configuration 1 or 2 additional zfd devices,
+ * depending on tty mode ('t' flag) are created within the zone. An application
+ * can then configure the zfd streams driver into a multiplexer. Output from
+ * the stdout/stderr zfd(s) will be teed into the correspond logging zfd(s)
+ * within the zone.
+ *
+ * The following is a diagram of how this works for a '-n' configuration:
+ *
+ *
+ * zoneadmd (for zlogin -I stdout)
+ * GZ: ^
+ * |
+ * --------------------------
+ * ^
+ * NGZ: |
+ * app >1 -> zfd1 -> zfd3 -> logger (for logger to consume app's stdout)
+ *
+ * There would be a similar path for the app's stderr into zfd4 for the logger
+ * to consume stderr.
+ */
+
+#include <sys/types.h>
+#include <sys/cmn_err.h>
+#include <sys/conf.h>
+#include <sys/cred.h>
+#include <sys/ddi.h>
+#include <sys/debug.h>
+#include <sys/devops.h>
+#include <sys/errno.h>
+#include <sys/file.h>
+#include <sys/kstr.h>
+#include <sys/modctl.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/stream.h>
+#include <sys/stropts.h>
+#include <sys/strsun.h>
+#include <sys/sunddi.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/zfd.h>
+#include <sys/vnode.h>
+#include <sys/fs/snode.h>
+#include <sys/zone.h>
+#include <sys/sdt.h>
+
+static kmutex_t zfd_mux_lock;
+
+static int zfd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
+static int zfd_attach(dev_info_t *, ddi_attach_cmd_t);
+static int zfd_detach(dev_info_t *, ddi_detach_cmd_t);
+
+static int zfd_open(queue_t *, dev_t *, int, int, cred_t *);
+static int zfd_close(queue_t *, int, cred_t *);
+static void zfd_wput(queue_t *, mblk_t *);
+static void zfd_rsrv(queue_t *);
+static void zfd_wsrv(queue_t *);
+
+/*
+ * The instance number is encoded in the dev_t in the minor number; the lowest
+ * bit of the minor number is used to track the master vs. slave side of the
+ * fd. The rest of the bits in the minor number are the instance.
+ */
+#define ZFD_MASTER_MINOR 0
+#define ZFD_SLAVE_MINOR 1
+
+#define ZFD_INSTANCE(x) (getminor((x)) >> 1)
+#define ZFD_NODE(x) (getminor((x)) & 0x01)
+
+/*
+ * This macro converts a zfd_state_t pointer to the associated slave minor
+ * node's dev_t.
+ */
+#define ZFD_STATE_TO_SLAVEDEV(x) \
+ (makedevice(ddi_driver_major((x)->zfd_devinfo), \
+ (minor_t)(ddi_get_instance((x)->zfd_devinfo) << 1 | ZFD_SLAVE_MINOR)))
+
+int zfd_debug = 0;
+#define DBG(a) if (zfd_debug) cmn_err(CE_NOTE, a)
+#define DBG1(a, b) if (zfd_debug) cmn_err(CE_NOTE, a, b)
+
+/*
+ * ZFD Pseudo Terminal Module: stream data structure definitions,
+ * based on zcons.
+ */
+static struct module_info zfd_info = {
+ 0x20FD, /* ZOFD - 8445 */
+ "zfd",
+ 0, /* min packet size */
+ INFPSZ, /* max packet size - infinity */
+ 2048, /* high water */
+ 128 /* low water */
+};
+
+static struct qinit zfd_rinit = {
+ NULL,
+ (int (*)()) zfd_rsrv,
+ zfd_open,
+ zfd_close,
+ NULL,
+ &zfd_info,
+ NULL
+};
+
+static struct qinit zfd_winit = {
+ (int (*)()) zfd_wput,
+ (int (*)()) zfd_wsrv,
+ NULL,
+ NULL,
+ NULL,
+ &zfd_info,
+ NULL
+};
+
+static struct streamtab zfd_tab_info = {
+ &zfd_rinit,
+ &zfd_winit,
+ NULL,
+ NULL
+};
+
+#define ZFD_CONF_FLAG (D_MP | D_MTQPAIR | D_MTOUTPERIM | D_MTOCEXCL)
+
+/*
+ * this will define (struct cb_ops cb_zfd_ops) and (struct dev_ops zfd_ops)
+ */
+DDI_DEFINE_STREAM_OPS(zfd_ops, nulldev, nulldev, zfd_attach, zfd_detach, \
+ nodev, zfd_getinfo, ZFD_CONF_FLAG, &zfd_tab_info, \
+ ddi_quiesce_not_needed);
+
+/*
+ * Module linkage information for the kernel.
+ */
+
+static struct modldrv modldrv = {
+ &mod_driverops, /* Type of module (this is a pseudo driver) */
+ "Zone FD driver", /* description of module */
+ &zfd_ops /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ &modldrv,
+ NULL
+};
+
+typedef enum {
+ ZFD_NO_MUX,
+ ZFD_PRIMARY_STREAM,
+ ZFD_LOG_STREAM
+} zfd_mux_type_t;
+
+typedef struct zfd_state {
+ dev_info_t *zfd_devinfo; /* instance info */
+ queue_t *zfd_master_rdq; /* GZ read queue */
+ queue_t *zfd_slave_rdq; /* in-zone read queue */
+ int zfd_state; /* ZFD_STATE_MOPEN, ZFD_STATE_SOPEN */
+ int zfd_tty; /* ZFD_MAKETTY - strm mods will push */
+ boolean_t zfd_is_flowcon; /* primary stream flow stopped */
+ boolean_t zfd_allow_flowcon; /* use flow control */
+ zfd_mux_type_t zfd_muxt; /* state type: none, primary, log */
+ struct zfd_state *zfd_inst_pri; /* log state's primary ptr */
+ struct zfd_state *zfd_inst_log; /* primary state's log ptr */
+} zfd_state_t;
+
+#define ZFD_STATE_MOPEN 0x01
+#define ZFD_STATE_SOPEN 0x02
+
+static void *zfd_soft_state;
+
+/*
+ * List of STREAMS modules that are autopushed onto a slave instance when its
+ * opened, but only if the ZFD_MAKETTY ioctl has first been received by the
+ * master.
+ */
+static char *zfd_mods[] = {
+ "ptem",
+ "ldterm",
+ "ttcompat",
+ NULL
+};
+
+int
+_init(void)
+{
+ int err;
+
+ if ((err = ddi_soft_state_init(&zfd_soft_state, sizeof (zfd_state_t),
+ 0)) != 0) {
+ return (err);
+ }
+
+ if ((err = mod_install(&modlinkage)) != 0)
+ ddi_soft_state_fini(zfd_soft_state);
+
+ mutex_init(&zfd_mux_lock, NULL, MUTEX_DEFAULT, NULL);
+ return (err);
+}
+
+
+int
+_fini(void)
+{
+ int err;
+
+ if ((err = mod_remove(&modlinkage)) != 0) {
+ return (err);
+ }
+
+ ddi_soft_state_fini(&zfd_soft_state);
+ mutex_destroy(&zfd_mux_lock);
+ return (0);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+static int
+zfd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ zfd_state_t *zfds;
+ int instance;
+ char masternm[ZFD_NAME_LEN], slavenm[ZFD_NAME_LEN];
+
+ if (cmd != DDI_ATTACH)
+ return (DDI_FAILURE);
+
+ instance = ddi_get_instance(dip);
+ if (ddi_soft_state_zalloc(zfd_soft_state, instance) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+
+ (void) snprintf(masternm, sizeof (masternm), "%s%d", ZFD_MASTER_NAME,
+ instance);
+ (void) snprintf(slavenm, sizeof (slavenm), "%s%d", ZFD_SLAVE_NAME,
+ instance);
+
+ /*
+ * Create the master and slave minor nodes.
+ */
+ if ((ddi_create_minor_node(dip, slavenm, S_IFCHR,
+ instance << 1 | ZFD_SLAVE_MINOR, DDI_PSEUDO, 0) == DDI_FAILURE) ||
+ (ddi_create_minor_node(dip, masternm, S_IFCHR,
+ instance << 1 | ZFD_MASTER_MINOR, DDI_PSEUDO, 0) == DDI_FAILURE)) {
+ ddi_remove_minor_node(dip, NULL);
+ ddi_soft_state_free(zfd_soft_state, instance);
+ return (DDI_FAILURE);
+ }
+
+ VERIFY((zfds = ddi_get_soft_state(zfd_soft_state, instance)) != NULL);
+ zfds->zfd_devinfo = dip;
+ zfds->zfd_tty = 0;
+ zfds->zfd_muxt = ZFD_NO_MUX;
+ zfds->zfd_inst_log = NULL;
+ return (DDI_SUCCESS);
+}
+
+static int
+zfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ zfd_state_t *zfds;
+ int instance;
+
+ if (cmd != DDI_DETACH)
+ return (DDI_FAILURE);
+
+ instance = ddi_get_instance(dip);
+ if ((zfds = ddi_get_soft_state(zfd_soft_state, instance)) == NULL)
+ return (DDI_FAILURE);
+
+ if ((zfds->zfd_state & ZFD_STATE_MOPEN) ||
+ (zfds->zfd_state & ZFD_STATE_SOPEN)) {
+ DBG1("zfd_detach: device (dip=%p) still open\n", (void *)dip);
+ return (DDI_FAILURE);
+ }
+
+ ddi_remove_minor_node(dip, NULL);
+ ddi_soft_state_free(zfd_soft_state, instance);
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * zfd_getinfo()
+ * getinfo(9e) entrypoint.
+ */
+/*ARGSUSED*/
+static int
+zfd_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
+{
+ zfd_state_t *zfds;
+ int instance = ZFD_INSTANCE((dev_t)arg);
+
+ switch (infocmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ if ((zfds = ddi_get_soft_state(zfd_soft_state,
+ instance)) == NULL)
+ return (DDI_FAILURE);
+ *result = zfds->zfd_devinfo;
+ return (DDI_SUCCESS);
+ case DDI_INFO_DEVT2INSTANCE:
+ *result = (void *)(uintptr_t)instance;
+ return (DDI_SUCCESS);
+ }
+ return (DDI_FAILURE);
+}
+
+/*
+ * Return the equivalent queue from the other side of the relationship.
+ * e.g.: given the slave's write queue, return the master's write queue.
+ */
+static queue_t *
+zfd_switch(queue_t *qp)
+{
+ zfd_state_t *zfds = qp->q_ptr;
+ ASSERT(zfds != NULL);
+
+ if (qp == zfds->zfd_master_rdq)
+ return (zfds->zfd_slave_rdq);
+ else if (OTHERQ(qp) == zfds->zfd_master_rdq && zfds->zfd_slave_rdq
+ != NULL)
+ return (OTHERQ(zfds->zfd_slave_rdq));
+ else if (qp == zfds->zfd_slave_rdq)
+ return (zfds->zfd_master_rdq);
+ else if (OTHERQ(qp) == zfds->zfd_slave_rdq && zfds->zfd_master_rdq
+ != NULL)
+ return (OTHERQ(zfds->zfd_master_rdq));
+ else
+ return (NULL);
+}
+
+/*
+ * For debugging and outputting messages. Returns the name of the side of
+ * the relationship associated with this queue.
+ */
+static const char *
+zfd_side(queue_t *qp)
+{
+ zfd_state_t *zfds = qp->q_ptr;
+ ASSERT(zfds != NULL);
+
+ if (qp == zfds->zfd_master_rdq ||
+ OTHERQ(qp) == zfds->zfd_master_rdq) {
+ return ("master");
+ }
+ ASSERT(qp == zfds->zfd_slave_rdq || OTHERQ(qp) == zfds->zfd_slave_rdq);
+ return ("slave");
+}
+
+/*ARGSUSED*/
+static int
+zfd_master_open(zfd_state_t *zfds,
+ queue_t *rqp, /* pointer to the read side queue */
+ dev_t *devp, /* pointer to stream tail's dev */
+ int oflag, /* the user open(2) supplied flags */
+ int sflag, /* open state flag */
+ cred_t *credp) /* credentials */
+{
+ mblk_t *mop;
+ struct stroptions *sop;
+
+ /*
+ * Enforce exclusivity on the master side; the only consumer should
+ * be the zoneadmd for the zone.
+ */
+ if ((zfds->zfd_state & ZFD_STATE_MOPEN) != 0)
+ return (EBUSY);
+
+ if ((mop = allocb(sizeof (struct stroptions), BPRI_MED)) == NULL) {
+ DBG("zfd_master_open(): mop allocation failed\n");
+ return (ENOMEM);
+ }
+
+ zfds->zfd_state |= ZFD_STATE_MOPEN;
+
+ /*
+ * q_ptr stores driver private data; stash the soft state data on both
+ * read and write sides of the queue.
+ */
+ WR(rqp)->q_ptr = rqp->q_ptr = zfds;
+ qprocson(rqp);
+
+ /*
+ * Following qprocson(), the master side is fully plumbed into the
+ * STREAM and may send/receive messages. Setting zfds->zfd_master_rdq
+ * will allow the slave to send messages to us (the master).
+ * This cannot occur before qprocson() because the master is not
+ * ready to process them until that point.
+ */
+ zfds->zfd_master_rdq = rqp;
+
+ /*
+ * set up hi/lo water marks on stream head read queue and add
+ * controlling tty as needed.
+ */
+ mop->b_datap->db_type = M_SETOPTS;
+ mop->b_wptr += sizeof (struct stroptions);
+ sop = (struct stroptions *)(void *)mop->b_rptr;
+ if (oflag & FNOCTTY)
+ sop->so_flags = SO_HIWAT | SO_LOWAT;
+ else
+ sop->so_flags = SO_HIWAT | SO_LOWAT | SO_ISTTY;
+ sop->so_hiwat = 512;
+ sop->so_lowat = 256;
+ putnext(rqp, mop);
+
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+zfd_slave_open(zfd_state_t *zfds,
+ queue_t *rqp, /* pointer to the read side queue */
+ dev_t *devp, /* pointer to stream tail's dev */
+ int oflag, /* the user open(2) supplied flags */
+ int sflag, /* open state flag */
+ cred_t *credp) /* credentials */
+{
+ mblk_t *mop;
+ struct stroptions *sop;
+ /*
+ * The slave side can be opened as many times as needed.
+ */
+ if ((zfds->zfd_state & ZFD_STATE_SOPEN) != 0) {
+ ASSERT((rqp != NULL) && (WR(rqp)->q_ptr == zfds));
+ return (0);
+ }
+
+ /* A log stream is read-only */
+ if (zfds->zfd_muxt == ZFD_LOG_STREAM &&
+ (oflag & (FREAD | FWRITE)) != FREAD)
+ return (EINVAL);
+
+ if (zfds->zfd_tty == 1) {
+ major_t major;
+ minor_t minor;
+ minor_t lastminor;
+ uint_t anchorindex;
+
+ /*
+ * Set up sad(7D) so that the necessary STREAMS modules will
+ * be in place. A wrinkle is that 'ptem' must be anchored
+ * in place (see streamio(7i)) because we always want the
+ * fd to have terminal semantics.
+ */
+ minor =
+ ddi_get_instance(zfds->zfd_devinfo) << 1 | ZFD_SLAVE_MINOR;
+ major = ddi_driver_major(zfds->zfd_devinfo);
+ lastminor = 0;
+ anchorindex = 1;
+ if (kstr_autopush(SET_AUTOPUSH, &major, &minor, &lastminor,
+ &anchorindex, zfd_mods) != 0) {
+ DBG("zfd_slave_open(): kstr_autopush() failed\n");
+ return (EIO);
+ }
+ }
+
+ if ((mop = allocb(sizeof (struct stroptions), BPRI_MED)) == NULL) {
+ DBG("zfd_slave_open(): mop allocation failed\n");
+ return (ENOMEM);
+ }
+
+ zfds->zfd_state |= ZFD_STATE_SOPEN;
+
+ /*
+ * q_ptr stores driver private data; stash the soft state data on both
+ * read and write sides of the queue.
+ */
+ WR(rqp)->q_ptr = rqp->q_ptr = zfds;
+
+ qprocson(rqp);
+
+ /*
+ * Must follow qprocson(), since we aren't ready to process until then.
+ */
+ zfds->zfd_slave_rdq = rqp;
+
+ /*
+ * set up hi/lo water marks on stream head read queue and add
+ * controlling tty as needed.
+ */
+ mop->b_datap->db_type = M_SETOPTS;
+ mop->b_wptr += sizeof (struct stroptions);
+ sop = (struct stroptions *)(void *)mop->b_rptr;
+ sop->so_flags = SO_HIWAT | SO_LOWAT | SO_ISTTY;
+ sop->so_hiwat = 512;
+ sop->so_lowat = 256;
+ putnext(rqp, mop);
+
+ return (0);
+}
+
+/*
+ * open(9e) entrypoint; checks sflag, and rejects anything unordinary.
+ */
+static int
+zfd_open(queue_t *rqp, /* pointer to the read side queue */
+ dev_t *devp, /* pointer to stream tail's dev */
+ int oflag, /* the user open(2) supplied flags */
+ int sflag, /* open state flag */
+ cred_t *credp) /* credentials */
+{
+ int instance = ZFD_INSTANCE(*devp);
+ int ret;
+ zfd_state_t *zfds;
+
+ if (sflag != 0)
+ return (EINVAL);
+
+ if ((zfds = ddi_get_soft_state(zfd_soft_state, instance)) == NULL)
+ return (ENXIO);
+
+ switch (ZFD_NODE(*devp)) {
+ case ZFD_MASTER_MINOR:
+ ret = zfd_master_open(zfds, rqp, devp, oflag, sflag, credp);
+ break;
+ case ZFD_SLAVE_MINOR:
+ ret = zfd_slave_open(zfds, rqp, devp, oflag, sflag, credp);
+ /*
+ * If we just opened the log stream and flow control has
+ * been enabled, we want to make sure the primary stream can
+ * start flowing.
+ */
+ if (ret == 0 && zfds->zfd_muxt == ZFD_LOG_STREAM &&
+ zfds->zfd_inst_pri->zfd_allow_flowcon) {
+ zfds->zfd_inst_pri->zfd_is_flowcon = B_FALSE;
+ if (zfds->zfd_inst_pri->zfd_master_rdq != NULL)
+ qenable(RD(zfds->zfd_inst_pri->zfd_master_rdq));
+ }
+ break;
+ default:
+ ret = ENXIO;
+ break;
+ }
+
+ return (ret);
+}
+
+/*
+ * close(9e) entrypoint.
+ */
+/*ARGSUSED1*/
+static int
+zfd_close(queue_t *rqp, int flag, cred_t *credp)
+{
+ queue_t *wqp;
+ mblk_t *bp;
+ zfd_state_t *zfds;
+ major_t major;
+ minor_t minor;
+
+ zfds = (zfd_state_t *)rqp->q_ptr;
+
+ if (rqp == zfds->zfd_master_rdq) {
+ DBG("Closing master side");
+
+ zfds->zfd_master_rdq = NULL;
+ zfds->zfd_state &= ~ZFD_STATE_MOPEN;
+
+ /*
+ * qenable slave side write queue so that it can flush
+ * its messages as master's read queue is going away
+ */
+ if (zfds->zfd_slave_rdq != NULL) {
+ qenable(WR(zfds->zfd_slave_rdq));
+ }
+
+ qprocsoff(rqp);
+ WR(rqp)->q_ptr = rqp->q_ptr = NULL;
+
+ } else if (rqp == zfds->zfd_slave_rdq) {
+
+ DBG("Closing slave side");
+ zfds->zfd_state &= ~ZFD_STATE_SOPEN;
+ zfds->zfd_slave_rdq = NULL;
+
+ wqp = WR(rqp);
+ while ((bp = getq(wqp)) != NULL) {
+ if (zfds->zfd_master_rdq != NULL)
+ putnext(zfds->zfd_master_rdq, bp);
+ else if (bp->b_datap->db_type == M_IOCTL)
+ miocnak(wqp, bp, 0, 0);
+ else
+ freemsg(bp);
+ }
+
+ /*
+ * Qenable master side write queue so that it can flush its
+ * messages as slaves's read queue is going away.
+ */
+ if (zfds->zfd_master_rdq != NULL)
+ qenable(WR(zfds->zfd_master_rdq));
+
+ /*
+ * Qenable primary stream if necessary.
+ */
+ if (zfds->zfd_muxt == ZFD_LOG_STREAM &&
+ zfds->zfd_inst_pri->zfd_allow_flowcon) {
+ zfds->zfd_inst_pri->zfd_is_flowcon = B_FALSE;
+ if (zfds->zfd_inst_pri->zfd_master_rdq != NULL)
+ qenable(RD(zfds->zfd_inst_pri->zfd_master_rdq));
+ }
+
+ qprocsoff(rqp);
+ WR(rqp)->q_ptr = rqp->q_ptr = NULL;
+
+ if (zfds->zfd_tty == 1) {
+ /*
+ * Clear the sad configuration so that reopening
+ * doesn't fail to set up sad configuration.
+ */
+ major = ddi_driver_major(zfds->zfd_devinfo);
+ minor = ddi_get_instance(zfds->zfd_devinfo) << 1 |
+ ZFD_SLAVE_MINOR;
+ (void) kstr_autopush(CLR_AUTOPUSH, &major, &minor,
+ NULL, NULL, NULL);
+ }
+ }
+
+ return (0);
+}
+
+static void
+handle_mflush(queue_t *qp, mblk_t *mp)
+{
+ mblk_t *nmp;
+ DBG1("M_FLUSH on %s side", zfd_side(qp));
+
+ if (*mp->b_rptr & FLUSHW) {
+ DBG1("M_FLUSH, FLUSHW, %s side", zfd_side(qp));
+ flushq(qp, FLUSHDATA);
+ *mp->b_rptr &= ~FLUSHW;
+ if ((*mp->b_rptr & FLUSHR) == 0) {
+ /*
+ * FLUSHW only. Change to FLUSHR and putnext other side,
+ * then we are done.
+ */
+ *mp->b_rptr |= FLUSHR;
+ if (zfd_switch(RD(qp)) != NULL) {
+ putnext(zfd_switch(RD(qp)), mp);
+ return;
+ }
+ } else if ((zfd_switch(RD(qp)) != NULL) &&
+ (nmp = copyb(mp)) != NULL) {
+ /*
+ * It is a FLUSHRW; we copy the mblk and send
+ * it to the other side, since we still need to use
+ * the mblk in FLUSHR processing, below.
+ */
+ putnext(zfd_switch(RD(qp)), nmp);
+ }
+ }
+
+ if (*mp->b_rptr & FLUSHR) {
+ DBG("qreply(qp) turning FLUSHR around\n");
+ qreply(qp, mp);
+ return;
+ }
+ freemsg(mp);
+}
+
+/*
+ * Evaluate the various conditionals to determine if we're teeing into a log
+ * stream and if the primary stream should be flow controlled. This function
+ * can set the zfd_is_flowcon flag as a side effect.
+ *
+ * When teeing with flow control, we always queue the teed msg here and if
+ * the queue is getting full, we set zfd_is_flowcon. The primary stream will
+ * always queue when zfd_is_flowcon and will also not be served when
+ * zfd_is_flowcon is set. This causes backpressure on the primary stream
+ * until the teed queue can drain.
+ */
+static void
+zfd_tee_handler(zfd_state_t *zfds, unsigned char type, mblk_t *mp)
+{
+ queue_t *log_qp;
+ zfd_state_t *log_zfds;
+ mblk_t *lmp;
+
+ if (zfds->zfd_muxt != ZFD_PRIMARY_STREAM)
+ return;
+
+ if (type != M_DATA)
+ return;
+
+ log_zfds = zfds->zfd_inst_log;
+ if (log_zfds == NULL)
+ return;
+
+ ASSERT(log_zfds->zfd_muxt == ZFD_LOG_STREAM);
+
+ if ((log_zfds->zfd_state & ZFD_STATE_SOPEN) == 0) {
+ if (zfds->zfd_allow_flowcon)
+ zfds->zfd_is_flowcon = B_TRUE;
+ return;
+ }
+
+ /* The zfd_slave_rdq is null until the log dev is opened in the zone */
+ log_qp = RD(log_zfds->zfd_slave_rdq);
+ DTRACE_PROBE2(zfd__tee__check, void *, log_qp, void *, zfds);
+
+ if (!zfds->zfd_allow_flowcon) {
+ /*
+ * We're not supposed to tee with flow control and the tee is
+ * full so we skip teeing into the log stream.
+ */
+ if ((log_qp->q_flag & QFULL) != 0)
+ return;
+ }
+
+ /*
+ * Tee the message into the log stream.
+ */
+ lmp = dupmsg(mp);
+ if (lmp == NULL) {
+ if (zfds->zfd_allow_flowcon)
+ zfds->zfd_is_flowcon = B_TRUE;
+ return;
+ }
+
+ if (log_qp->q_first == NULL && bcanputnext(log_qp, lmp->b_band)) {
+ putnext(log_qp, lmp);
+ } else {
+ if (putq(log_qp, lmp) == 0) {
+ /* The logger queue is full, free the msg. */
+ freemsg(lmp);
+ }
+ /*
+ * If we're supposed to tee with flow control and the tee is
+ * over the high water mark then we want the primary stream to
+ * stop flowing. We'll stop queueing the primary stream after
+ * the log stream has drained.
+ */
+ if (zfds->zfd_allow_flowcon &&
+ log_qp->q_count > log_qp->q_hiwat) {
+ zfds->zfd_is_flowcon = B_TRUE;
+ }
+ }
+}
+
+/*
+ * wput(9E) is symmetric for master and slave sides, so this handles both
+ * without splitting the codepath. (The only exception to this is the
+ * processing of zfd ioctls, which is restricted to the master side.)
+ *
+ * zfd_wput() looks at the other side; if there is no process holding that
+ * side open, it frees the message. This prevents processes from hanging
+ * if no one is holding open the fd. Otherwise, it putnext's high
+ * priority messages, putnext's normal messages if possible, and otherwise
+ * enqueues the messages; in the case that something is enqueued, wsrv(9E)
+ * will take care of eventually shuttling I/O to the other side.
+ *
+ * When configured as a multiplexer, then anything written to the stream
+ * from inside the zone is also teed off to the corresponding log stream
+ * for consumption within the zone (i.e. the log stream can be read, but never
+ * written to, by an application inside the zone).
+ */
+static void
+zfd_wput(queue_t *qp, mblk_t *mp)
+{
+ unsigned char type = mp->b_datap->db_type;
+ zfd_state_t *zfds;
+ struct iocblk *iocbp;
+ boolean_t must_queue = B_FALSE;
+
+ ASSERT(qp->q_ptr);
+
+ DBG1("entering zfd_wput, %s side", zfd_side(qp));
+
+ /*
+ * Process zfd ioctl messages if qp is the master side's write queue.
+ */
+ zfds = (zfd_state_t *)qp->q_ptr;
+
+ if (type == M_IOCTL) {
+ iocbp = (struct iocblk *)(void *)mp->b_rptr;
+
+ switch (iocbp->ioc_cmd) {
+ case ZFD_MAKETTY:
+ zfds->zfd_tty = 1;
+ miocack(qp, mp, 0, 0);
+ return;
+ case ZFD_EOF:
+ if (zfds->zfd_slave_rdq != NULL)
+ (void) putnextctl(zfds->zfd_slave_rdq,
+ M_HANGUP);
+ miocack(qp, mp, 0, 0);
+ return;
+ case ZFD_HAS_SLAVE:
+ if ((zfds->zfd_state & ZFD_STATE_SOPEN) != 0) {
+ miocack(qp, mp, 0, 0);
+ } else {
+ miocack(qp, mp, 0, ENOTTY);
+ }
+ return;
+ case ZFD_MUX: {
+ /*
+ * Setup the multiplexer configuration for the two
+ * streams.
+ *
+ * We expect to be called on the stream that will
+ * become the log stream and be passed one data block
+ * with the minor number of the slave side of the
+ * primary stream.
+ */
+ int to;
+ int instance;
+ zfd_state_t *prim_zfds;
+
+ if (iocbp->ioc_count != TRANSPARENT ||
+ mp->b_cont == NULL) {
+ miocack(qp, mp, 0, EINVAL);
+ return;
+ }
+
+ /* Get the primary slave minor device number */
+ to = *(int *)mp->b_cont->b_rptr;
+ instance = ZFD_INSTANCE(to);
+
+ if ((prim_zfds = ddi_get_soft_state(zfd_soft_state,
+ instance)) == NULL) {
+ miocack(qp, mp, 0, EINVAL);
+ return;
+ }
+
+ /* Disallow changing primary/log once set. */
+ mutex_enter(&zfd_mux_lock);
+ if (zfds->zfd_muxt != ZFD_NO_MUX ||
+ prim_zfds->zfd_muxt != ZFD_NO_MUX) {
+ mutex_exit(&zfd_mux_lock);
+ miocack(qp, mp, 0, EINVAL);
+ return;
+ }
+
+ zfds->zfd_muxt = ZFD_LOG_STREAM;
+ zfds->zfd_inst_pri = prim_zfds;
+ prim_zfds->zfd_muxt = ZFD_PRIMARY_STREAM;
+ prim_zfds->zfd_inst_log = zfds;
+ mutex_exit(&zfd_mux_lock);
+ DTRACE_PROBE2(zfd__mux__link, void *, prim_zfds,
+ void *, zfds);
+
+ miocack(qp, mp, 0, 0);
+ return;
+ }
+ case ZFD_MUX_FLOWCON: {
+ /*
+ * We expect this ioctl to be issued against the
+ * log stream. We don't use the primary stream since
+ * there can be other streams modules pushed onto that
+ * stream which would interfere with the ioctl.
+ */
+ int val;
+ zfd_state_t *prim_zfds;
+
+ if (iocbp->ioc_count != TRANSPARENT ||
+ mp->b_cont == NULL) {
+ miocack(qp, mp, 0, EINVAL);
+ return;
+ }
+
+ if (zfds->zfd_muxt != ZFD_LOG_STREAM) {
+ miocack(qp, mp, 0, EINVAL);
+ return;
+ }
+ prim_zfds = zfds->zfd_inst_pri;
+
+ /* Get the flow control setting */
+ val = *(int *)mp->b_cont->b_rptr;
+ if (val != 0 && val != 1) {
+ miocack(qp, mp, 0, EINVAL);
+ return;
+ }
+
+ prim_zfds->zfd_allow_flowcon = (boolean_t)val;
+ if (!prim_zfds->zfd_allow_flowcon)
+ prim_zfds->zfd_is_flowcon = B_FALSE;
+
+ DTRACE_PROBE1(zfd__mux__flowcon, void *, prim_zfds);
+ miocack(qp, mp, 0, 0);
+ return;
+ }
+ default:
+ break;
+ }
+ }
+
+ /* if on the write side, may need to tee */
+ if (zfds->zfd_slave_rdq != NULL && qp == WR(zfds->zfd_slave_rdq)) {
+ /* tee output to any attached log stream */
+ zfd_tee_handler(zfds, type, mp);
+
+ /* high-priority msgs are not subject to flow control */
+ if (zfds->zfd_is_flowcon && type == M_DATA)
+ must_queue = B_TRUE;
+ }
+
+ if (zfd_switch(RD(qp)) == NULL) {
+ DBG1("wput to %s side (no one listening)", zfd_side(qp));
+ switch (type) {
+ case M_FLUSH:
+ handle_mflush(qp, mp);
+ break;
+ case M_IOCTL:
+ miocnak(qp, mp, 0, 0);
+ break;
+ default:
+ freemsg(mp);
+ break;
+ }
+ return;
+ }
+
+ if (type >= QPCTL) {
+ DBG1("(hipri) wput, %s side", zfd_side(qp));
+ switch (type) {
+ case M_READ: /* supposedly from ldterm? */
+ DBG("zfd_wput: tossing M_READ\n");
+ freemsg(mp);
+ break;
+ case M_FLUSH:
+ handle_mflush(qp, mp);
+ break;
+ default:
+ /*
+ * Put this to the other side.
+ */
+ ASSERT(zfd_switch(RD(qp)) != NULL);
+ putnext(zfd_switch(RD(qp)), mp);
+ break;
+ }
+ DBG1("done (hipri) wput, %s side", zfd_side(qp));
+ return;
+ }
+
+ /*
+ * If the primary stream has been stopped for flow control then
+ * enqueue the msg, otherwise only putnext if there isn't already
+ * something in the queue. If we don't do this then things would wind
+ * up out of order.
+ */
+ if (!must_queue && qp->q_first == NULL &&
+ bcanputnext(RD(zfd_switch(qp)), mp->b_band)) {
+ putnext(RD(zfd_switch(qp)), mp);
+ } else {
+ /*
+ * zfd_wsrv expects msgs queued on the primary queue. Those
+ * will be handled by zfd_wsrv after zfd_rsrv performs the
+ * qenable on the proper queue.
+ */
+ (void) putq(qp, mp);
+ }
+
+ DBG1("done wput, %s side", zfd_side(qp));
+}
+
+/*
+ * Read server
+ *
+ * For primary stream:
+ * Under normal execution rsrv(9E) is symmetric for master and slave, so
+ * zfd_rsrv() can handle both without splitting up the codepath. We do this by
+ * enabling the write side of the partner. This triggers the partner to send
+ * messages queued on its write side to this queue's read side.
+ *
+ * For log stream:
+ * Internally we've queued up the msgs that we've teed off to the log stream
+ * so when we're invoked we need to pass these along.
+ */
+static void
+zfd_rsrv(queue_t *qp)
+{
+ zfd_state_t *zfds;
+ zfds = (zfd_state_t *)qp->q_ptr;
+
+ /*
+ * log stream server
+ */
+ if (zfds->zfd_muxt == ZFD_LOG_STREAM && zfds->zfd_slave_rdq != NULL) {
+ queue_t *log_qp;
+ mblk_t *mp;
+
+ log_qp = RD(zfds->zfd_slave_rdq);
+
+ if ((zfds->zfd_state & ZFD_STATE_SOPEN) != 0) {
+ zfd_state_t *pzfds = zfds->zfd_inst_pri;
+
+ while ((mp = getq(qp)) != NULL) {
+ if (bcanputnext(log_qp, mp->b_band)) {
+ putnext(log_qp, mp);
+ } else {
+ (void) putbq(log_qp, mp);
+ break;
+ }
+ }
+
+ if (log_qp->q_count < log_qp->q_lowat) {
+ DTRACE_PROBE(zfd__flow__on);
+ pzfds->zfd_is_flowcon = B_FALSE;
+ if (pzfds->zfd_master_rdq != NULL)
+ qenable(RD(pzfds->zfd_master_rdq));
+ }
+ } else {
+ /* No longer open, drain the queue */
+ while ((mp = getq(qp)) != NULL) {
+ freemsg(mp);
+ }
+ flushq(qp, FLUSHALL);
+ }
+ return;
+ }
+
+ /*
+ * Care must be taken here, as either of the master or slave side
+ * qptr could be NULL.
+ */
+ ASSERT(qp == zfds->zfd_master_rdq || qp == zfds->zfd_slave_rdq);
+ if (zfd_switch(qp) == NULL) {
+ DBG("zfd_rsrv: other side isn't listening\n");
+ return;
+ }
+ qenable(WR(zfd_switch(qp)));
+}
+
+/*
+ * Write server
+ *
+ * This routine is symmetric for master and slave, so it handles both without
+ * splitting up the codepath.
+ *
+ * If there are messages on this queue that can be sent to the other, send
+ * them via putnext(). Else, if queued messages cannot be sent, leave them
+ * on this queue.
+ */
+static void
+zfd_wsrv(queue_t *qp)
+{
+ queue_t *swq;
+ mblk_t *mp;
+ zfd_state_t *zfds = (zfd_state_t *)qp->q_ptr;
+
+ ASSERT(zfds != NULL);
+
+ /*
+ * Partner has no read queue, so take the data, and throw it away.
+ */
+ if (zfd_switch(RD(qp)) == NULL) {
+ DBG("zfd_wsrv: other side isn't listening");
+ while ((mp = getq(qp)) != NULL) {
+ if (mp->b_datap->db_type == M_IOCTL)
+ miocnak(qp, mp, 0, 0);
+ else
+ freemsg(mp);
+ }
+ flushq(qp, FLUSHALL);
+ return;
+ }
+
+ swq = RD(zfd_switch(qp));
+
+ /*
+ * while there are messages on this write queue...
+ */
+ while (!zfds->zfd_is_flowcon && (mp = getq(qp)) != NULL) {
+ /*
+ * Due to the way zfd_wput is implemented, we should never
+ * see a high priority control message here.
+ */
+ ASSERT(mp->b_datap->db_type < QPCTL);
+
+ if (bcanputnext(swq, mp->b_band)) {
+ putnext(swq, mp);
+ } else {
+ (void) putbq(qp, mp);
+ break;
+ }
+ }
+}
diff --git a/usr/src/uts/common/mapfiles/README b/usr/src/uts/common/mapfiles/README
new file mode 100644
index 0000000000..5b65771325
--- /dev/null
+++ b/usr/src/uts/common/mapfiles/README
@@ -0,0 +1,68 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2016 Joyent, Inc.
+#
+
+Kernel Module Build Time Symbol Verification
+--------------------------------------------
+
+Historically, kernel modules have all been built as relocatable objects.
+They are not dynamic objects and dependency information is always noted
+in individual makefiles. Along with this, there has never been any
+verification of the symbols that are being used. This means that it's
+possible for a kernel module author to refer to a symbol that doesn't
+exist and not find out until they try to install the module.
+
+To help find these problems at build time, we provide an opt-in system
+for modules to use, leveraging the link-editor's '-z defs' option. This
+option ensures that there are no unknown definitons at link-edit time.
+To supply these definitions we supply a series of mapfiles in this
+directory.
+
+These mapfiles are not the traditional versioning mapfiles like those in
+usr/src/lib/README.mapfiles! Please review the following differences
+closely:
+
+* These mapfiles do not declare any versions!
+* These mapfiles do not use the 'SYMBOL_VERSION' directive, instead they
+ use the 'SYMBOL_SCOPE' directive.
+* These mapfiles do not hide symbols! Library mapfiles always have
+ something to catch all local symbols. That should *never* be used
+ here. These mapfiles should not effect visibility.
+* All symbols in these mapfiles should be marked 'EXTERN' to indicate
+ that they are not provided by the kernel module but by another.
+* These mapfiles do not declare what is or isn't a public interface,
+ though they are often grouped around interfaces, to make it easier for
+ a driver author to get this right.
+
+Mapfiles are organized based on kernel module. For example the GLDv3
+device driver interface is provided by the 'mac' module and thus is
+found in the file 'mac.mapfile'. The DDI is currently in the 'ddi'
+mapfile. Functions that are found in genunix and unix that aren't in
+the DDI should not be put in that mapfile.
+
+Note, the existing files may not be complete. These are intended to only
+have the public interfaces provided by modules and thus should not
+include every symbol in them. As the need arises, add new symbols or
+modules as appropriate.
+
+To opt a module into this, first declare a series of MAPFILES that they
+should check against in the module. This should be a series of one or
+more files, for example:
+
+MAPFILES += ddi mac
+
+Next, you should add an include of Makefile.mapfile right before you
+include Makefile.targ. You can do this with the following line:
+
+include $(UTSBASE)/Makefile.mapfile
diff --git a/usr/src/uts/common/mapfiles/ddi.mapfile b/usr/src/uts/common/mapfiles/ddi.mapfile
new file mode 100644
index 0000000000..25aa8ab045
--- /dev/null
+++ b/usr/src/uts/common/mapfiles/ddi.mapfile
@@ -0,0 +1,190 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2016 Joyent, Inc.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object scoping must comply with the rules detailed in
+#
+# usr/src/uts/common/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+#
+# This file contains core functions provided by the DDI and also items
+# required as part of the platform's runime ABI (think compiler
+# functions).
+#
+
+$mapfile_version 2
+
+SYMBOL_SCOPE {
+ global:
+ __divdi3 { FLAGS = EXTERN };
+ __stack_chk_fail { FLAGS = EXTERN };
+ __stack_chk_guard { FLAGS = EXTERN };
+ allocb { FLAGS = EXTERN };
+ assfail { FLAGS = EXTERN };
+ assfail3 { FLAGS = EXTERN };
+ atomic_dec_32_nv { FLAGS = EXTERN };
+ bcmp { FLAGS = EXTERN };
+ bcopy { FLAGS = EXTERN };
+ bzero { FLAGS = EXTERN };
+ cmn_err { FLAGS = EXTERN };
+ cv_broadcast { FLAGS = EXTERN };
+ cv_destroy { FLAGS = EXTERN };
+ cv_init { FLAGS = EXTERN };
+ cv_reltimedwait { FLAGS = EXTERN };
+ ddi_cb_register { FLAGS = EXTERN };
+ ddi_cb_unregister { FLAGS = EXTERN };
+ ddi_dev_regsize { FLAGS = EXTERN };
+ ddi_dma_addr_bind_handle { FLAGS = EXTERN };
+ ddi_dma_alloc_handle { FLAGS = EXTERN };
+ ddi_dma_free_handle { FLAGS = EXTERN };
+ ddi_dma_mem_alloc { FLAGS = EXTERN };
+ ddi_dma_mem_free { FLAGS = EXTERN };
+ ddi_dma_nextcookie { FLAGS = EXTERN };
+ ddi_dma_sync { FLAGS = EXTERN };
+ ddi_dma_unbind_handle { FLAGS = EXTERN };
+ ddi_fm_acc_err_clear { FLAGS = EXTERN };
+ ddi_fm_acc_err_get { FLAGS = EXTERN };
+ ddi_fm_dma_err_get { FLAGS = EXTERN };
+ ddi_fm_ereport_post { FLAGS = EXTERN };
+ ddi_fm_fini { FLAGS = EXTERN };
+ ddi_fm_handler_register { FLAGS = EXTERN };
+ ddi_fm_handler_unregister { FLAGS = EXTERN };
+ ddi_fm_init { FLAGS = EXTERN };
+ ddi_fm_service_impact { FLAGS = EXTERN };
+ ddi_get_driver_private { FLAGS = EXTERN };
+ ddi_get_instance { FLAGS = EXTERN };
+ ddi_get_lbolt { FLAGS = EXTERN };
+ ddi_get_lbolt64 { FLAGS = EXTERN };
+ ddi_get_name { FLAGS = EXTERN };
+ ddi_get_parent { FLAGS = EXTERN };
+ ddi_get16 { FLAGS = EXTERN };
+ ddi_get32 { FLAGS = EXTERN };
+ ddi_get64 { FLAGS = EXTERN };
+ ddi_intr_add_handler { FLAGS = EXTERN };
+ ddi_intr_alloc { FLAGS = EXTERN };
+ ddi_intr_block_disable { FLAGS = EXTERN };
+ ddi_intr_block_enable { FLAGS = EXTERN };
+ ddi_intr_disable { FLAGS = EXTERN };
+ ddi_intr_enable { FLAGS = EXTERN };
+ ddi_intr_free { FLAGS = EXTERN };
+ ddi_intr_get_cap { FLAGS = EXTERN };
+ ddi_intr_get_navail { FLAGS = EXTERN };
+ ddi_intr_get_nintrs { FLAGS = EXTERN };
+ ddi_intr_get_pri { FLAGS = EXTERN };
+ ddi_intr_get_supported_types { FLAGS = EXTERN };
+ ddi_intr_remove_handler { FLAGS = EXTERN };
+ ddi_periodic_add { FLAGS = EXTERN };
+ ddi_periodic_delete { FLAGS = EXTERN };
+ ddi_power { FLAGS = EXTERN };
+ ddi_prop_free { FLAGS = EXTERN };
+ ddi_prop_get_int { FLAGS = EXTERN };
+ ddi_prop_lookup_int_array { FLAGS = EXTERN };
+ ddi_prop_op { FLAGS = EXTERN };
+ ddi_prop_remove_all { FLAGS = EXTERN };
+ ddi_prop_update_int_array { FLAGS = EXTERN };
+ ddi_prop_update_string { FLAGS = EXTERN };
+ ddi_ptob { FLAGS = EXTERN };
+ ddi_put16 { FLAGS = EXTERN };
+ ddi_put32 { FLAGS = EXTERN };
+ ddi_quiesce_not_supported { FLAGS = EXTERN };
+ ddi_regs_map_free { FLAGS = EXTERN };
+ ddi_regs_map_setup { FLAGS = EXTERN };
+ ddi_set_driver_private { FLAGS = EXTERN };
+ ddi_strtol { FLAGS = EXTERN };
+ ddi_taskq_create { FLAGS = EXTERN };
+ ddi_taskq_destroy { FLAGS = EXTERN };
+ ddi_taskq_dispatch { FLAGS = EXTERN };
+ delay { FLAGS = EXTERN };
+ desballoc { FLAGS = EXTERN };
+ dev_err { FLAGS = EXTERN };
+ drv_usectohz { FLAGS = EXTERN };
+ drv_usecwait { FLAGS = EXTERN };
+ fm_ena_generate { FLAGS = EXTERN };
+ freeb { FLAGS = EXTERN };
+ freemsg { FLAGS = EXTERN };
+ freemsgchain { FLAGS = EXTERN };
+ gethrtime { FLAGS = EXTERN };
+ kmem_alloc { FLAGS = EXTERN };
+ kmem_free { FLAGS = EXTERN };
+ kmem_zalloc { FLAGS = EXTERN };
+ kstat_create { FLAGS = EXTERN };
+ kstat_delete { FLAGS = EXTERN };
+ kstat_install { FLAGS = EXTERN };
+ kstat_named_init { FLAGS = EXTERN };
+ list_create { FLAGS = EXTERN };
+ list_destroy { FLAGS = EXTERN };
+ list_head { FLAGS = EXTERN };
+ list_insert_tail { FLAGS = EXTERN };
+ list_next { FLAGS = EXTERN };
+ list_remove { FLAGS = EXTERN };
+ memcpy { FLAGS = EXTERN };
+ memset { FLAGS = EXTERN };
+ miocack { FLAGS = EXTERN };
+ miocnak { FLAGS = EXTERN };
+ mod_driverops { FLAGS = EXTERN };
+ mod_info { FLAGS = EXTERN };
+ mod_install { FLAGS = EXTERN };
+ mod_remove { FLAGS = EXTERN };
+ msgpullup { FLAGS = EXTERN };
+ msgsize { FLAGS = EXTERN };
+ mutex_destroy { FLAGS = EXTERN };
+ mutex_enter { FLAGS = EXTERN };
+ mutex_exit { FLAGS = EXTERN };
+ mutex_init { FLAGS = EXTERN };
+ mutex_owned { FLAGS = EXTERN };
+ mutex_tryenter { FLAGS = EXTERN };
+ nochpoll { FLAGS = EXTERN };
+ nodev { FLAGS = EXTERN };
+ nulldev { FLAGS = EXTERN };
+ panic { FLAGS = EXTERN };
+ pci_config_get16 { FLAGS = EXTERN };
+ pci_config_get32 { FLAGS = EXTERN };
+ pci_config_get64 { FLAGS = EXTERN };
+ pci_config_get8 { FLAGS = EXTERN };
+ pci_config_put16 { FLAGS = EXTERN };
+ pci_config_put32 { FLAGS = EXTERN };
+ pci_config_put64 { FLAGS = EXTERN };
+ pci_config_put8 { FLAGS = EXTERN };
+ pci_config_setup { FLAGS = EXTERN };
+ pci_config_teardown { FLAGS = EXTERN };
+ pci_ereport_post { FLAGS = EXTERN };
+ pci_ereport_setup { FLAGS = EXTERN };
+ pci_ereport_teardown { FLAGS = EXTERN };
+ pci_lcap_locate { FLAGS = EXTERN };
+ qreply { FLAGS = EXTERN };
+ rw_destroy { FLAGS = EXTERN };
+ rw_enter { FLAGS = EXTERN };
+ rw_exit { FLAGS = EXTERN };
+ rw_init { FLAGS = EXTERN };
+ snprintf { FLAGS = EXTERN };
+ sprintf { FLAGS = EXTERN };
+ strcat { FLAGS = EXTERN };
+ strcmp { FLAGS = EXTERN };
+ strcpy { FLAGS = EXTERN };
+ strlen { FLAGS = EXTERN };
+ timeout { FLAGS = EXTERN };
+ untimeout { FLAGS = EXTERN };
+ vsnprintf { FLAGS = EXTERN };
+ vsprintf { FLAGS = EXTERN };
+};
diff --git a/usr/src/uts/common/mapfiles/dtrace.mapfile.awk b/usr/src/uts/common/mapfiles/dtrace.mapfile.awk
new file mode 100644
index 0000000000..b8a7e2d372
--- /dev/null
+++ b/usr/src/uts/common/mapfiles/dtrace.mapfile.awk
@@ -0,0 +1,34 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2016 Joyent, Inc.
+#
+
+#
+# This script is designed to assemble a mapfile for DTrace probes.
+#
+BEGIN {
+ print "#"
+ print "# This file is autogenerated by dtrace.mapfile.awk"
+ print "#"
+ print "$mapfile_version 2"
+ print "SYMBOL_SCOPE {"
+ print " global:"
+}
+
+/__dtrace_probe_/ {
+ printf "\t%s\t{ FLAGS = EXTERN };\n", $1
+}
+
+END {
+ print "};"
+}
diff --git a/usr/src/uts/common/mapfiles/kernel.mapfile b/usr/src/uts/common/mapfiles/kernel.mapfile
new file mode 100644
index 0000000000..6bddb3c7ef
--- /dev/null
+++ b/usr/src/uts/common/mapfiles/kernel.mapfile
@@ -0,0 +1,41 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2016 Joyent, Inc.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object scoping must comply with the rules detailed in
+#
+# usr/src/uts/common/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+#
+# This file contains functions provided by the kernel that various
+# modules use. This is a combination of things in both unix and genunix.
+#
+
+$mapfile_version 2
+
+SYMBOL_SCOPE {
+ global:
+ bt_getlowbit { FLAGS = EXTERN };
+ servicing_interrupt { FLAGS = EXTERN };
+};
diff --git a/usr/src/uts/common/mapfiles/mac.mapfile b/usr/src/uts/common/mapfiles/mac.mapfile
new file mode 100644
index 0000000000..30462f80d5
--- /dev/null
+++ b/usr/src/uts/common/mapfiles/mac.mapfile
@@ -0,0 +1,55 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2016 Joyent, Inc.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object scoping must comply with the rules detailed in
+#
+# usr/src/uts/common/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_SCOPE {
+ global:
+ mac_alloc { FLAGS = EXTERN };
+ mac_fini_ops { FLAGS = EXTERN };
+ mac_free { FLAGS = EXTERN };
+ mac_hcksum_get { FLAGS = EXTERN };
+ mac_hcksum_set { FLAGS = EXTERN };
+ mac_init_ops { FLAGS = EXTERN };
+ mac_link_update { FLAGS = EXTERN };
+ mac_lso_get { FLAGS = EXTERN };
+ mac_maxsdu_update { FLAGS = EXTERN };
+ mac_prop_info_set_default_link_flowctrl { FLAGS = EXTERN };
+ mac_prop_info_set_default_str { FLAGS = EXTERN };
+ mac_prop_info_set_default_uint8 { FLAGS = EXTERN };
+ mac_prop_info_set_perm { FLAGS = EXTERN };
+ mac_prop_info_set_range_uint32 { FLAGS = EXTERN };
+ mac_ring_intr_set { FLAGS = EXTERN };
+ mac_register { FLAGS = EXTERN };
+ mac_rx { FLAGS = EXTERN };
+ mac_rx_ring { FLAGS = EXTERN };
+ mac_tx_ring_update { FLAGS = EXTERN };
+ mac_tx_update { FLAGS = EXTERN };
+ mac_unregister { FLAGS = EXTERN };
+};
diff --git a/usr/src/uts/common/mapfiles/random.mapfile b/usr/src/uts/common/mapfiles/random.mapfile
new file mode 100644
index 0000000000..d3d8bc89fa
--- /dev/null
+++ b/usr/src/uts/common/mapfiles/random.mapfile
@@ -0,0 +1,37 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2016 Joyent, Inc.
+#
+
+#
+# MAPFILE HEADER START
+#
+# WARNING: STOP NOW. DO NOT MODIFY THIS FILE.
+# Object scoping must comply with the rules detailed in
+#
+# usr/src/uts/common/README.mapfiles
+#
+# You should not be making modifications here until you've read the most current
+# copy of that file. If you need help, contact a gatekeeper for guidance.
+#
+# MAPFILE HEADER END
+#
+
+$mapfile_version 2
+
+SYMBOL_SCOPE {
+ global:
+ random_get_bytes { FLAGS = EXTERN };
+ random_get_blocking_bytes { FLAGS = EXTERN };
+ random_get_pseudo_bytes { FLAGS = EXTERN };
+};
diff --git a/usr/src/uts/common/netinet/in.h b/usr/src/uts/common/netinet/in.h
index d530b7f36e..7927cf5e24 100644
--- a/usr/src/uts/common/netinet/in.h
+++ b/usr/src/uts/common/netinet/in.h
@@ -3,6 +3,7 @@
* Use is subject to license terms.
*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
/*
* Copyright (c) 1982, 1986 Regents of the University of California.
@@ -225,6 +226,7 @@ typedef uint16_t sa_family_t;
#define IPPORT_SLP 427
#define IPPORT_MIP 434
#define IPPORT_SMB 445 /* a.k.a. microsoft-ds */
+#define IPPORT_VXLAN 4789
/*
* Internet Key Exchange (IKE) ports
@@ -268,6 +270,11 @@ typedef uint16_t sa_family_t;
#define IPPORT_RESERVED 1024
#define IPPORT_USERRESERVED 5000
+#ifdef _KERNEL
+#define IPPORT_DYNAMIC_MIN 49152
+#define IPPORT_DYNAMIC_MAX 65535
+#endif
+
/*
* Link numbers
*/
diff --git a/usr/src/uts/common/netinet/udp.h b/usr/src/uts/common/netinet/udp.h
index c65a9bad3a..74cff75d43 100644
--- a/usr/src/uts/common/netinet/udp.h
+++ b/usr/src/uts/common/netinet/udp.h
@@ -1,6 +1,7 @@
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
*/
/*
@@ -17,9 +18,6 @@
#ifndef _NETINET_UDP_H
#define _NETINET_UDP_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-/* udp.h 1.7 88/08/19 SMI; from UCB 7.1 6/5/86 */
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -36,6 +34,16 @@ struct udphdr {
#define UDP_EXCLBIND 0x0101 /* for internal use only */
#define UDP_RCVHDR 0x0102 /* for internal use only */
#define UDP_NAT_T_ENDPOINT 0x0103 /* for internal use only */
+#define UDP_SRCPORT_HASH 0x0104 /* for internal use only */
+#define UDP_SND_TO_CONNECTED 0x0105 /* for internal use only */
+
+/*
+ * Hash definitions for UDP_SRCPORT_HASH that effectively tell UDP how to go
+ * handle UDP_SRCPORT_HASH.
+ */
+#define UDP_HASH_DISABLE 0x0000 /* for internal use only */
+#define UDP_HASH_VXLAN 0x0001 /* for internal use only */
+
/*
* Following option in UDP_ namespace required to be exposed through
* <xti.h> (It also requires exposing options not implemented). The options
diff --git a/usr/src/uts/common/os/brand.c b/usr/src/uts/common/os/brand.c
index 0af67f5d98..02901d023d 100644
--- a/usr/src/uts/common/os/brand.c
+++ b/usr/src/uts/common/os/brand.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
*/
#include <sys/kmem.h>
@@ -45,7 +46,7 @@ struct brand_mach_ops native_mach_ops = {
};
#else /* !__sparcv9 */
struct brand_mach_ops native_mach_ops = {
- NULL, NULL, NULL, NULL
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL
};
#endif /* !__sparcv9 */
@@ -53,7 +54,8 @@ brand_t native_brand = {
BRAND_VER_1,
"native",
NULL,
- &native_mach_ops
+ &native_mach_ops,
+ 0
};
/*
@@ -310,46 +312,112 @@ brand_unregister_zone(struct brand *bp)
mutex_exit(&brand_list_lock);
}
-void
-brand_setbrand(proc_t *p)
+int
+brand_setbrand(proc_t *p, boolean_t lwps_ok)
{
brand_t *bp = p->p_zone->zone_brand;
+ void *brand_data = NULL;
- ASSERT(bp != NULL);
- ASSERT(p->p_brand == &native_brand);
+ VERIFY(MUTEX_NOT_HELD(&p->p_lock));
+ VERIFY(bp != NULL);
/*
- * We should only be called from exec(), when we know the process
- * is single-threaded.
+ * Process branding occurs during fork() and exec(). When it happens
+ * during fork(), the LWP count will always be 0 since branding is
+ * performed as part of getproc(), before LWPs have been associated.
+ * The same is not true during exec(), where a multi-LWP process may
+ * undergo branding just prior to gexec(). This is to ensure
+ * exec-related brand hooks are available. While it may seem
+ * complicated to brand a multi-LWP process, the two possible outcomes
+ * simplify things:
+ *
+ * 1. The exec() succeeds: LWPs besides the caller will be killed and
+ * any further branding will occur in a single-LWP context.
+ * 2. The exec() fails: The process will be promptly unbranded since
+ * the hooks are no longer needed.
+ *
+ * To prevent inconsistent brand state from being encountered during
+ * the exec(), LWPs beyond the caller which are associated with this
+ * process must be held temporarily. They will be released either when
+ * they are killed in the exec() success, or when the brand is cleared
+ * after exec() failure.
*/
- ASSERT(p->p_tlist == p->p_tlist->t_forw);
+ if (lwps_ok) {
+ /*
+ * We've been called from a exec() context tolerating the
+ * existence of multiple LWPs during branding is necessary.
+ */
+ VERIFY(p == curproc);
+ VERIFY(p->p_tlist != NULL);
+ if (p->p_tlist != p->p_tlist->t_forw) {
+ /*
+ * Multiple LWPs are present. Hold all but the caller.
+ */
+ if (!holdlwps(SHOLDFORK1)) {
+ return (-1);
+ }
+ }
+ } else {
+ /*
+ * Processes branded during fork() should not have LWPs at all.
+ */
+ VERIFY(p->p_tlist == NULL);
+ }
+
+ if (bp->b_data_size > 0) {
+ brand_data = kmem_zalloc(bp->b_data_size, KM_SLEEP);
+ }
+
+ mutex_enter(&p->p_lock);
+ ASSERT(!PROC_IS_BRANDED(p));
p->p_brand = bp;
+ p->p_brand_data = brand_data;
ASSERT(PROC_IS_BRANDED(p));
BROP(p)->b_setbrand(p);
+ mutex_exit(&p->p_lock);
+ return (0);
}
void
-brand_clearbrand(proc_t *p, boolean_t no_lwps)
+brand_clearbrand(proc_t *p, boolean_t lwps_ok)
{
brand_t *bp = p->p_zone->zone_brand;
- klwp_t *lwp = NULL;
- ASSERT(bp != NULL);
- ASSERT(!no_lwps || (p->p_tlist == NULL));
+ void *brand_data;
- /*
- * If called from exec_common() or proc_exit(),
- * we know the process is single-threaded.
- * If called from fork_fail, p_tlist is NULL.
- */
- if (!no_lwps) {
- ASSERT(p->p_tlist == p->p_tlist->t_forw);
- lwp = p->p_tlist->t_lwp;
- }
+ VERIFY(MUTEX_NOT_HELD(&p->p_lock));
+ VERIFY(bp != NULL);
+ VERIFY(PROC_IS_BRANDED(p));
- ASSERT(PROC_IS_BRANDED(p));
- BROP(p)->b_proc_exit(p, lwp);
+ mutex_enter(&p->p_lock);
p->p_brand = &native_brand;
+ brand_data = p->p_brand_data;
+ p->p_brand_data = NULL;
+
+ if (lwps_ok) {
+ VERIFY(p == curproc);
+ /*
+ * A process with multiple LWPs is being de-branded after
+ * failing an exec. The other LWPs were held as part of the
+ * procedure, so they must be resumed now.
+ */
+ if (p->p_tlist != NULL && p->p_tlist != p->p_tlist->t_forw) {
+ continuelwps(p);
+ }
+ } else {
+ /*
+ * While clearing the brand, it's ok for one LWP to be present.
+ * This happens when a native binary is executed inside a
+ * branded zone, since the brand will be removed during the
+ * course of a successful exec.
+ */
+ VERIFY(p->p_tlist == NULL || p->p_tlist == p->p_tlist->t_forw);
+ }
+ mutex_exit(&p->p_lock);
+
+ if (brand_data != NULL) {
+ kmem_free(brand_data, bp->b_data_size);
+ }
}
#if defined(__sparcv9)
@@ -483,7 +551,7 @@ brand_solaris_cmd(int cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3,
return (ENOSYS);
/* For all other operations this must be a branded process. */
- if (p->p_brand == &native_brand)
+ if (!PROC_IS_BRANDED(p))
return (ENOSYS);
ASSERT(p->p_brand == pbrand);
@@ -601,15 +669,15 @@ restoreexecenv(struct execenv *ep, stack_t *sp)
int
brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
intpdata_t *idatap, int level, long *execsz, int setid, caddr_t exec_file,
- cred_t *cred, int brand_action, struct brand *pbrand, char *bname,
- char *brandlib, char *brandlib32, char *brandlinker, char *brandlinker32)
+ cred_t *cred, int *brand_action, struct brand *pbrand, char *bname,
+ char *brandlib, char *brandlib32)
{
vnode_t *nvp;
Ehdr ehdr;
Addr uphdr_vaddr;
intptr_t voffset;
- int interp;
+ char *interp;
int i, err;
struct execenv env;
struct execenv origenv;
@@ -619,7 +687,6 @@ brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
klwp_t *lwp = ttolwp(curthread);
brand_proc_data_t *spd;
brand_elf_data_t sed, *sedp;
- char *linker;
uintptr_t lddata; /* lddata of executable's linker */
ASSERT(curproc->p_brand == pbrand);
@@ -636,12 +703,10 @@ brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
*/
if (args->to_model == DATAMODEL_NATIVE) {
args->emulator = brandlib;
- linker = brandlinker;
}
#if defined(_LP64)
else {
args->emulator = brandlib32;
- linker = brandlinker32;
}
#endif /* _LP64 */
@@ -725,7 +790,7 @@ brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
if (args->to_model == DATAMODEL_NATIVE) {
err = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr,
&voffset, exec_file, &interp, &env.ex_bssbase,
- &env.ex_brkbase, &env.ex_brksize, NULL);
+ &env.ex_brkbase, &env.ex_brksize, NULL, NULL);
}
#if defined(_LP64)
else {
@@ -733,7 +798,7 @@ brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
Elf32_Addr uphdr_vaddr32;
err = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32,
&voffset, exec_file, &interp, &env.ex_bssbase,
- &env.ex_brkbase, &env.ex_brksize, NULL);
+ &env.ex_brkbase, &env.ex_brksize, NULL, NULL);
Ehdr32to64(&ehdr32, &ehdr);
if (uphdr_vaddr32 == (Elf32_Addr)-1)
@@ -744,6 +809,10 @@ brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
#endif /* _LP64 */
if (err != 0) {
restoreexecenv(&origenv, &orig_sigaltstack);
+
+ if (interp != NULL)
+ kmem_free(interp, MAXPATHLEN);
+
return (err);
}
@@ -761,7 +830,7 @@ brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
sedp->sed_phent = ehdr.e_phentsize;
sedp->sed_phnum = ehdr.e_phnum;
- if (interp) {
+ if (interp != NULL) {
if (ehdr.e_type == ET_DYN) {
/*
* This is a shared object executable, so we
@@ -777,16 +846,20 @@ brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
* it in and store relevant information about it in the
* aux vector, where the brand library can find it.
*/
- if ((err = lookupname(linker, UIO_SYSSPACE,
+ if ((err = lookupname(interp, UIO_SYSSPACE,
FOLLOW, NULLVPP, &nvp)) != 0) {
- uprintf("%s: not found.", brandlinker);
+ uprintf("%s: not found.", interp);
restoreexecenv(&origenv, &orig_sigaltstack);
+ kmem_free(interp, MAXPATHLEN);
return (err);
}
+
+ kmem_free(interp, MAXPATHLEN);
+
if (args->to_model == DATAMODEL_NATIVE) {
err = mapexec_brand(nvp, args, &ehdr,
&uphdr_vaddr, &voffset, exec_file, &interp,
- NULL, NULL, NULL, &lddata);
+ NULL, NULL, NULL, &lddata, NULL);
}
#if defined(_LP64)
else {
@@ -794,7 +867,7 @@ brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
Elf32_Addr uphdr_vaddr32;
err = mapexec32_brand(nvp, args, &ehdr32,
&uphdr_vaddr32, &voffset, exec_file, &interp,
- NULL, NULL, NULL, &lddata);
+ NULL, NULL, NULL, &lddata, NULL);
Ehdr32to64(&ehdr32, &ehdr);
if (uphdr_vaddr32 == (Elf32_Addr)-1)
@@ -934,9 +1007,9 @@ brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
/*
* Third, the /proc aux vectors set up by elfexec() point to
- * brand emulation library and it's linker. Copy these to the
+ * brand emulation library and its linker. Copy these to the
* /proc brand specific aux vector, and update the regular
- * /proc aux vectors to point to the executable (and it's
+ * /proc aux vectors to point to the executable (and its
* linker). This will enable debuggers to access the
* executable via the usual /proc or elf notes aux vectors.
*
@@ -1078,55 +1151,31 @@ brand_solaris_freelwp(klwp_t *l, struct brand *pbrand)
}
/*ARGSUSED*/
-int
+void
brand_solaris_initlwp(klwp_t *l, struct brand *pbrand)
{
ASSERT(l->lwp_procp->p_brand == pbrand);
ASSERT(l->lwp_procp->p_brand_data != NULL);
ASSERT(l->lwp_brand == NULL);
l->lwp_brand = (void *)-1;
- return (0);
}
/*ARGSUSED*/
void
brand_solaris_lwpexit(klwp_t *l, struct brand *pbrand)
{
- proc_t *p = l->lwp_procp;
-
ASSERT(l->lwp_procp->p_brand == pbrand);
ASSERT(l->lwp_procp->p_brand_data != NULL);
ASSERT(l->lwp_brand != NULL);
-
- /*
- * We should never be called for the last thread in a process.
- * (That case is handled by brand_solaris_proc_exit().)
- * Therefore this lwp must be exiting from a multi-threaded
- * process.
- */
- ASSERT(p->p_tlist != p->p_tlist->t_forw);
-
- l->lwp_brand = NULL;
}
/*ARGSUSED*/
void
-brand_solaris_proc_exit(struct proc *p, klwp_t *l, struct brand *pbrand)
+brand_solaris_proc_exit(struct proc *p, struct brand *pbrand)
{
ASSERT(p->p_brand == pbrand);
ASSERT(p->p_brand_data != NULL);
- /*
- * When called from proc_exit(), we know that process is
- * single-threaded and free our lwp brand data.
- * otherwise just free p_brand_data and return.
- */
- if (l != NULL) {
- ASSERT(p->p_tlist == p->p_tlist->t_forw);
- ASSERT(p->p_tlist->t_lwp == l);
- (void) brand_solaris_freelwp(l, pbrand);
- }
-
/* upon exit, free our proc brand data */
kmem_free(p->p_brand_data, sizeof (brand_proc_data_t));
p->p_brand_data = NULL;
@@ -1145,5 +1194,4 @@ brand_solaris_setbrand(proc_t *p, struct brand *pbrand)
ASSERT(p->p_tlist == p->p_tlist->t_forw);
p->p_brand_data = kmem_zalloc(sizeof (brand_proc_data_t), KM_SLEEP);
- (void) brand_solaris_initlwp(p->p_tlist->t_lwp, pbrand);
}
diff --git a/usr/src/uts/common/os/clock_highres.c b/usr/src/uts/common/os/clock_highres.c
index 805813037d..1280c8a1b6 100644
--- a/usr/src/uts/common/os/clock_highres.c
+++ b/usr/src/uts/common/os/clock_highres.c
@@ -25,7 +25,7 @@
*/
/*
- * Copyright (c) 2015, Joyent Inc. All rights reserved.
+ * Copyright 2016, Joyent Inc.
*/
#include <sys/timer.h>
@@ -41,6 +41,9 @@
static clock_backend_t clock_highres;
+/* minimum non-privileged interval (200us) */
+long clock_highres_interval_min = 200000;
+
/*ARGSUSED*/
static int
clock_highres_settime(timespec_t *ts)
@@ -68,17 +71,6 @@ clock_highres_getres(timespec_t *ts)
static int
clock_highres_timer_create(itimer_t *it, void (*fire)(itimer_t *))
{
- /*
- * CLOCK_HIGHRES timers of sufficiently high resolution can deny
- * service; only allow privileged users to create such timers.
- * Sites that do not wish to have this restriction should
- * give users the "proc_clock_highres" privilege.
- */
- if (secpolicy_clock_highres(CRED()) != 0) {
- it->it_arg = NULL;
- return (EPERM);
- }
-
it->it_arg = kmem_zalloc(sizeof (cyclic_id_t), KM_SLEEP);
it->it_fire = fire;
@@ -111,6 +103,49 @@ clock_highres_timer_settime(itimer_t *it, int flags,
cpu_t *cpu;
cpupart_t *cpupart;
int pset;
+ boolean_t value_need_clamp = B_FALSE;
+ boolean_t intval_need_clamp = B_FALSE;
+ cred_t *cr = CRED();
+ struct itimerspec clamped;
+
+ /*
+ * CLOCK_HIGHRES timers of sufficiently high resolution can deny
+ * service; only allow privileged users to create such timers.
+ * Non-privileged users (those without the "proc_clock_highres"
+ * privilege) can create timers with lower resolution but if they
+ * attempt to use a very low time value (< 200us) then their
+ * timer will be clamped at 200us.
+ */
+ if (when->it_value.tv_sec == 0 &&
+ when->it_value.tv_nsec > 0 &&
+ when->it_value.tv_nsec < clock_highres_interval_min)
+ value_need_clamp = B_TRUE;
+
+ if (when->it_interval.tv_sec == 0 &&
+ when->it_interval.tv_nsec > 0 &&
+ when->it_interval.tv_nsec < clock_highres_interval_min)
+ intval_need_clamp = B_TRUE;
+
+ if ((value_need_clamp || intval_need_clamp) &&
+ secpolicy_clock_highres(cr) != 0) {
+ clamped.it_value.tv_sec = when->it_value.tv_sec;
+ clamped.it_interval.tv_sec = when->it_interval.tv_sec;
+
+ if (value_need_clamp) {
+ clamped.it_value.tv_nsec = clock_highres_interval_min;
+ } else {
+ clamped.it_value.tv_nsec = when->it_value.tv_nsec;
+ }
+
+ if (intval_need_clamp) {
+ clamped.it_interval.tv_nsec =
+ clock_highres_interval_min;
+ } else {
+ clamped.it_interval.tv_nsec = when->it_interval.tv_nsec;
+ }
+
+ when = &clamped;
+ }
cyctime.cyt_when = ts2hrt(&when->it_value);
cyctime.cyt_interval = ts2hrt(&when->it_interval);
diff --git a/usr/src/uts/common/os/contract.c b/usr/src/uts/common/os/contract.c
index 249066674e..9ea08f5535 100644
--- a/usr/src/uts/common/os/contract.c
+++ b/usr/src/uts/common/os/contract.c
@@ -21,6 +21,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
*/
/*
@@ -287,7 +288,10 @@ contract_ctor(contract_t *ct, ct_type_t *type, ct_template_t *tmpl, void *data,
avl_index_t where;
klwp_t *curlwp = ttolwp(curthread);
- ASSERT(author == curproc);
+ /*
+ * It's possible that author is not curproc if the zone is creating
+ * a new process as a child of zsched.
+ */
mutex_init(&ct->ct_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&ct->ct_reflock, NULL, MUTEX_DEFAULT, NULL);
diff --git a/usr/src/uts/common/os/cred.c b/usr/src/uts/common/os/cred.c
index 733fd03a92..b0098946b3 100644
--- a/usr/src/uts/common/os/cred.c
+++ b/usr/src/uts/common/os/cred.c
@@ -727,6 +727,14 @@ crgetzoneid(const cred_t *cr)
cr->cr_zone->zone_id);
}
+zoneid_t
+crgetzonedid(const cred_t *cr)
+{
+ return (cr->cr_zone == NULL ?
+ (cr->cr_uid == -1 ? (zoneid_t)-1 : GLOBAL_ZONEID) :
+ cr->cr_zone->zone_did);
+}
+
projid_t
crgetprojid(const cred_t *cr)
{
diff --git a/usr/src/uts/common/os/ddi_intr_irm.c b/usr/src/uts/common/os/ddi_intr_irm.c
index c3c0481e7f..a4b35dcb5b 100644
--- a/usr/src/uts/common/os/ddi_intr_irm.c
+++ b/usr/src/uts/common/os/ddi_intr_irm.c
@@ -1320,7 +1320,7 @@ i_ddi_irm_notify(ddi_irm_pool_t *pool_p, ddi_irm_req_t *req_p)
/* Log callback errors */
if (ret != DDI_SUCCESS) {
- cmn_err(CE_WARN, "%s%d: failed callback (action=%d, ret=%d)\n",
+ cmn_err(CE_WARN, "!%s%d: failed callback (action=%d, ret=%d)\n",
ddi_driver_name(req_p->ireq_dip),
ddi_get_instance(req_p->ireq_dip), (int)action, ret);
}
diff --git a/usr/src/uts/common/os/dumpsubr.c b/usr/src/uts/common/os/dumpsubr.c
index 781c564233..38d5f1ab18 100644
--- a/usr/src/uts/common/os/dumpsubr.c
+++ b/usr/src/uts/common/os/dumpsubr.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
*/
#include <sys/types.h>
@@ -342,6 +343,7 @@ typedef struct dumpsync {
uint_t neednl; /* will need to print a newline */
uint_t percent; /* dump progress */
uint_t percent_done; /* dump progress reported */
+ int sec_done; /* dump progress last report time */
cqueue_t freebufq; /* free kmem bufs for writing */
cqueue_t mainq; /* input for main task */
cqueue_t helperq; /* input for helpers */
@@ -2285,7 +2287,7 @@ dumpsys_main_task(void *arg)
cbuf_t *cp;
pgcnt_t baseoff, pfnoff;
pfn_t base, pfn;
- int sec, i, dumpserial;
+ int i, dumpserial;
/*
* Fall back to serial mode if there are no helpers.
@@ -2311,13 +2313,20 @@ dumpsys_main_task(void *arg)
dump_init_memlist_walker(&mlw);
- /* CONSTCOND */
- while (1) {
+ for (;;) {
+ int sec = (gethrtime() - ds->start) / NANOSEC;
- if (ds->percent > ds->percent_done) {
+ /*
+ * Render a simple progress display on the system console to
+ * make clear to the operator that the system has not hung.
+ * Emit an update when dump progress has advanced by one
+ * percent, or when no update has been drawn in the last
+ * second.
+ */
+ if (ds->percent > ds->percent_done || sec > ds->sec_done) {
+ ds->sec_done = sec;
ds->percent_done = ds->percent;
- sec = (gethrtime() - ds->start) / 1000 / 1000 / 1000;
- uprintf("^\r%2d:%02d %3d%% done",
+ uprintf("^\rdumping: %2d:%02d %3d%% done",
sec / 60, sec % 60, ds->percent);
ds->neednl = 1;
}
@@ -2501,8 +2510,7 @@ dumpsys_main_task(void *arg)
break;
} /* end switch */
-
- } /* end while(1) */
+ }
}
#ifdef COLLECT_METRICS
diff --git a/usr/src/uts/common/os/exec.c b/usr/src/uts/common/os/exec.c
index 172fce8d89..d46b8538a9 100644
--- a/usr/src/uts/common/os/exec.c
+++ b/usr/src/uts/common/os/exec.c
@@ -26,7 +26,7 @@
/* Copyright (c) 1988 AT&T */
/* All Rights Reserved */
/*
- * Copyright 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -69,6 +69,7 @@
#include <sys/sdt.h>
#include <sys/brand.h>
#include <sys/klpd.h>
+#include <sys/random.h>
#include <c2/audit.h>
@@ -97,6 +98,7 @@ uint_t auxv_hwcap32_2 = 0; /* 32-bit version of auxv_hwcap2 */
#endif
#define PSUIDFLAGS (SNOCD|SUGID)
+#define RANDOM_LEN 16 /* 16 bytes for AT_RANDOM aux entry */
/*
* exece() - system call wrapper around exec_common()
@@ -297,14 +299,43 @@ exec_common(const char *fname, const char **argp, const char **envp,
ua.argp = argp;
ua.envp = envp;
- /* If necessary, brand this process before we start the exec. */
- if (brandme)
- brand_setbrand(p);
+ /* If necessary, brand this process/lwp before we start the exec. */
+ if (brandme) {
+ void *brand_data = NULL;
+
+ /*
+ * Process branding may fail if multiple LWPs are present and
+ * holdlwps() cannot complete successfully.
+ */
+ error = brand_setbrand(p, B_TRUE);
+
+ if (error == 0 && BROP(p)->b_lwpdata_alloc != NULL) {
+ brand_data = BROP(p)->b_lwpdata_alloc(p);
+ if (brand_data == NULL) {
+ error = 1;
+ }
+ }
+
+ if (error == 0) {
+ mutex_enter(&p->p_lock);
+ BROP(p)->b_initlwp(lwp, brand_data);
+ mutex_exit(&p->p_lock);
+ } else {
+ VN_RELE(vp);
+ if (dir != NULL) {
+ VN_RELE(dir);
+ }
+ pn_free(&resolvepn);
+ goto fail;
+ }
+ }
if ((error = gexec(&vp, &ua, &args, NULL, 0, &execsz,
- exec_file, p->p_cred, brand_action)) != 0) {
- if (brandme)
- brand_clearbrand(p, B_FALSE);
+ exec_file, p->p_cred, &brand_action)) != 0) {
+ if (brandme) {
+ BROP(p)->b_freelwp(lwp);
+ brand_clearbrand(p, B_TRUE);
+ }
VN_RELE(vp);
if (dir != NULL)
VN_RELE(dir);
@@ -336,7 +367,7 @@ exec_common(const char *fname, const char **argp, const char **envp,
/*
* Clear contract template state
*/
- lwp_ctmpl_clear(lwp);
+ lwp_ctmpl_clear(lwp, B_TRUE);
/*
* Save the directory in which we found the executable for expanding
@@ -360,6 +391,8 @@ exec_common(const char *fname, const char **argp, const char **envp,
* pending held signals remain held, so don't clear t_hold.
*/
mutex_enter(&p->p_lock);
+ DTRACE_PROBE3(oldcontext__set, klwp_t *, lwp,
+ uintptr_t, lwp->lwp_oldcontext, uintptr_t, 0);
lwp->lwp_oldcontext = 0;
lwp->lwp_ustack = 0;
lwp->lwp_old_stk_ctl = 0;
@@ -419,8 +452,10 @@ exec_common(const char *fname, const char **argp, const char **envp,
TRACE_2(TR_FAC_PROC, TR_PROC_EXEC, "proc_exec:p %p up %p", p, up);
/* Unbrand ourself if necessary. */
- if (PROC_IS_BRANDED(p) && (brand_action == EBA_NATIVE))
+ if (PROC_IS_BRANDED(p) && (brand_action == EBA_NATIVE)) {
+ BROP(p)->b_freelwp(lwp);
brand_clearbrand(p, B_FALSE);
+ }
setregs(&args);
@@ -544,7 +579,7 @@ gexec(
long *execsz,
caddr_t exec_file,
struct cred *cred,
- int brand_action)
+ int *brand_action)
{
struct vnode *vp, *execvp = NULL;
proc_t *pp = ttoproc(curthread);
@@ -858,8 +893,14 @@ gexec(
if (pp->p_plist || (pp->p_proc_flag & P_PR_TRACE))
args->traceinval = 1;
}
- if (pp->p_proc_flag & P_PR_PTRACE)
+
+ /*
+ * If legacy ptrace is enabled, generate the SIGTRAP.
+ */
+ if (pp->p_proc_flag & P_PR_PTRACE) {
psignal(pp, SIGTRAP);
+ }
+
if (args->traceinval)
prinvalidate(&pp->p_user);
}
@@ -1517,6 +1558,27 @@ stk_add(uarg_t *args, const char *sp, enum uio_seg segflg)
return (0);
}
+/*
+ * Add a fixed size byte array to the stack (only from kernel space).
+ */
+static int
+stk_byte_add(uarg_t *args, const uint8_t *sp, size_t len)
+{
+ int error;
+
+ if (STK_AVAIL(args) < sizeof (int))
+ return (E2BIG);
+ *--args->stk_offp = args->stk_strp - args->stk_base;
+
+ if (len > STK_AVAIL(args))
+ return (E2BIG);
+ bcopy(sp, args->stk_strp, len);
+
+ args->stk_strp += len;
+
+ return (0);
+}
+
static int
stk_getptr(uarg_t *args, char *src, char **dst)
{
@@ -1553,6 +1615,7 @@ stk_copyin(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp)
size_t size, pad;
char *argv = (char *)uap->argp;
char *envp = (char *)uap->envp;
+ uint8_t rdata[RANDOM_LEN];
/*
* Copy interpreter's name and argument to argv[0] and argv[1].
@@ -1635,8 +1698,9 @@ stk_copyin(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp)
args->ne = args->na - argc;
/*
- * Add AT_SUN_PLATFORM, AT_SUN_EXECNAME, AT_SUN_BRANDNAME, and
- * AT_SUN_EMULATOR strings to the stack.
+ * Add AT_SUN_PLATFORM, AT_SUN_EXECNAME, AT_SUN_BRANDNAME,
+ * AT_SUN_BRAND_NROOT, and AT_SUN_EMULATOR strings, as well as AT_RANDOM
+ * array, to the stack.
*/
if (auxvpp != NULL && *auxvpp != NULL) {
if ((error = stk_add(args, platform, UIO_SYSSPACE)) != 0)
@@ -1649,6 +1713,20 @@ stk_copyin(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp)
if (args->emulator != NULL &&
(error = stk_add(args, args->emulator, UIO_SYSSPACE)) != 0)
return (error);
+
+ /*
+ * For the AT_RANDOM aux vector we provide 16 bytes of random
+ * data.
+ */
+ (void) random_get_pseudo_bytes(rdata, sizeof (rdata));
+
+ if ((error = stk_byte_add(args, rdata, sizeof (rdata))) != 0)
+ return (error);
+
+ if (args->brand_nroot != NULL &&
+ (error = stk_add(args, args->brand_nroot,
+ UIO_SYSSPACE)) != 0)
+ return (error);
}
/*
@@ -1755,7 +1833,7 @@ stk_copyout(uarg_t *args, char *usrstack, void **auxvpp, user_t *up)
/*
* Fill in the aux vector now that we know the user stack addresses
* for the AT_SUN_PLATFORM, AT_SUN_EXECNAME, AT_SUN_BRANDNAME and
- * AT_SUN_EMULATOR strings.
+ * AT_SUN_EMULATOR strings, as well as the AT_RANDOM array.
*/
if (auxvpp != NULL && *auxvpp != NULL) {
if (args->to_model == DATAMODEL_NATIVE) {
@@ -1768,6 +1846,11 @@ stk_copyout(uarg_t *args, char *usrstack, void **auxvpp, user_t *up)
if (args->emulator != NULL)
ADDAUX(*a,
AT_SUN_EMULATOR, (long)&ustrp[*--offp])
+ ADDAUX(*a, AT_RANDOM, (long)&ustrp[*--offp])
+ if (args->brand_nroot != NULL) {
+ ADDAUX(*a,
+ AT_SUN_BRAND_NROOT, (long)&ustrp[*--offp])
+ }
} else {
auxv32_t **a = (auxv32_t **)auxvpp;
ADDAUX(*a,
@@ -1780,6 +1863,11 @@ stk_copyout(uarg_t *args, char *usrstack, void **auxvpp, user_t *up)
if (args->emulator != NULL)
ADDAUX(*a, AT_SUN_EMULATOR,
(int)(uintptr_t)&ustrp[*--offp])
+ ADDAUX(*a, AT_RANDOM, (int)(uintptr_t)&ustrp[*--offp])
+ if (args->brand_nroot != NULL) {
+ ADDAUX(*a, AT_SUN_BRAND_NROOT,
+ (int)(uintptr_t)&ustrp[*--offp])
+ }
}
}
@@ -1868,6 +1956,9 @@ exec_args(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp)
usrstack = (char *)USRSTACK32;
}
+ if (args->maxstack != 0 && (uintptr_t)usrstack > args->maxstack)
+ usrstack = (char *)args->maxstack;
+
ASSERT(P2PHASE((uintptr_t)usrstack, args->stk_align) == 0);
#if defined(__sparc)
diff --git a/usr/src/uts/common/os/exit.c b/usr/src/uts/common/os/exit.c
index f0c0983a3a..0e213deb21 100644
--- a/usr/src/uts/common/os/exit.c
+++ b/usr/src/uts/common/os/exit.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, Joyent, Inc. All rights reserved.
+ * Copyright 2015 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -230,7 +230,7 @@ restart_init(int what, int why)
siginfofree(lwp->lwp_curinfo);
lwp->lwp_curinfo = NULL;
}
- lwp_ctmpl_clear(lwp);
+ lwp_ctmpl_clear(lwp, B_FALSE);
/*
* Reset both the process root directory and the current working
@@ -366,19 +366,6 @@ proc_exit(int why, int what)
}
mutex_exit(&p->p_lock);
- DTRACE_PROC(lwp__exit);
- DTRACE_PROC1(exit, int, why);
-
- /*
- * Will perform any brand specific proc exit processing, since this
- * is always the last lwp, will also perform lwp_exit and free brand
- * data
- */
- if (PROC_IS_BRANDED(p)) {
- lwp_detach_brand_hdlrs(lwp);
- brand_clearbrand(p, B_FALSE);
- }
-
/*
* Don't let init exit unless zone_start_init() failed its exec, or
* we are shutting down the zone or the machine.
@@ -390,12 +377,35 @@ proc_exit(int why, int what)
if (z->zone_boot_err == 0 &&
zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN) {
- if (z->zone_restart_init == B_TRUE) {
- if (restart_init(what, why) == 0)
- return (0);
+
+ /*
+ * If the init process should be restarted, the
+ * "zone_restart_init" member will be set. Some init
+ * programs in branded zones do not tolerate a restart
+ * in the traditional manner; setting the
+ * "zone_reboot_on_init_exit" member will cause the
+ * entire zone to be rebooted instead. If neither of
+ * these flags is set the zone will shut down.
+ */
+ if (z->zone_reboot_on_init_exit == B_TRUE &&
+ z->zone_restart_init == B_TRUE) {
+ /*
+ * Trigger a zone reboot and continue
+ * with exit processing.
+ */
+ z->zone_init_status = wstat(why, what);
+ (void) zone_kadmin(A_REBOOT, 0, NULL,
+ zone_kcred());
+
} else {
+ if (z->zone_restart_init == B_TRUE) {
+ if (restart_init(what, why) == 0)
+ return (0);
+ }
+
+ z->zone_init_status = wstat(why, what);
(void) zone_kadmin(A_SHUTDOWN, AD_HALT, NULL,
- CRED());
+ zone_kcred());
}
}
@@ -407,6 +417,32 @@ proc_exit(int why, int what)
z->zone_proc_initpid = -1;
}
+ /*
+ * Delay firing probes (and performing brand cleanup) until after the
+ * zone_proc_initpid check. Cases which result in zone shutdown or
+ * restart via zone_kadmin eventually result in a call back to
+ * proc_exit.
+ */
+ DTRACE_PROC(lwp__exit);
+ DTRACE_PROC1(exit, int, why);
+
+ /*
+ * Will perform any brand specific proc exit processing. Since this
+ * is always the last lwp, will also perform lwp exit/free and proc
+ * exit. Brand data will be freed when the process is reaped.
+ */
+ if (PROC_IS_BRANDED(p)) {
+ BROP(p)->b_lwpexit(lwp);
+ BROP(p)->b_proc_exit(p);
+ /*
+ * To ensure that b_proc_exit has access to brand-specific data
+ * contained by the one remaining lwp, call the freelwp hook as
+ * the last part of this clean-up process.
+ */
+ BROP(p)->b_freelwp(lwp);
+ lwp_detach_brand_hdlrs(lwp);
+ }
+
lwp_pcb_exit();
/*
@@ -658,10 +694,22 @@ proc_exit(int why, int what)
if ((q = p->p_child) != NULL && p != proc_init) {
struct proc *np;
struct proc *initp = proc_init;
+ pid_t zone_initpid = 1;
+ struct proc *zoneinitp = NULL;
boolean_t setzonetop = B_FALSE;
- if (!INGLOBALZONE(curproc))
- setzonetop = B_TRUE;
+ if (!INGLOBALZONE(curproc)) {
+ zone_initpid = curproc->p_zone->zone_proc_initpid;
+
+ ASSERT(MUTEX_HELD(&pidlock));
+ zoneinitp = prfind(zone_initpid);
+ if (zoneinitp != NULL) {
+ initp = zoneinitp;
+ } else {
+ zone_initpid = 1;
+ setzonetop = B_TRUE;
+ }
+ }
pgdetach(p);
@@ -673,7 +721,8 @@ proc_exit(int why, int what)
*/
delete_ns(q->p_parent, q);
- q->p_ppid = 1;
+ q->p_ppid = zone_initpid;
+
q->p_pidflag &= ~(CLDNOSIGCHLD | CLDWAITPID);
if (setzonetop) {
mutex_enter(&q->p_lock);
@@ -847,8 +896,50 @@ proc_exit(int why, int what)
mutex_exit(&p->p_lock);
if (!evaporate) {
- p->p_pidflag &= ~CLDPEND;
- sigcld(p, sqp);
+ /*
+ * The brand specific code only happens when the brand has a
+ * function to call in place of sigcld and the parent of the
+ * exiting process is not the global zone init. If the parent
+ * is the global zone init, then the process was reparented,
+ * and we don't want brand code delivering possibly strange
+ * signals to init. Also, init is not branded, so any brand
+ * specific exit data will not be picked up by init anyway.
+ */
+ if (PROC_IS_BRANDED(p) &&
+ BROP(p)->b_exit_with_sig != NULL &&
+ p->p_ppid != 1) {
+ /*
+ * The code for _fini that could unload the brand_t
+ * blocks until the count of zones using the module
+ * reaches zero. Zones decrement the refcount on their
+ * brands only after all user tasks in that zone have
+ * exited and been waited on. The decrement on the
+ * brand's refcount happen in zone_destroy(). That
+ * depends on zone_shutdown() having been completed.
+ * zone_shutdown() includes a call to zone_empty(),
+ * where the zone waits for itself to reach the state
+ * ZONE_IS_EMPTY. This state is only set in either
+ * zone_shutdown(), when there are no user processes as
+ * the zone enters this function, or in
+ * zone_task_rele(). zone_task_rele() is called from
+ * code triggered by waiting on processes, not by the
+ * processes exiting through proc_exit(). This means
+ * all the branded processes that could exist for a
+ * specific brand_t must exit and get reaped before the
+ * refcount on the brand_t can reach 0. _fini will
+ * never unload the corresponding brand module before
+ * proc_exit finishes execution for all processes
+ * branded with a particular brand_t, which makes the
+ * operation below safe to do. Brands that wish to use
+ * this mechanism must wait in _fini as described
+ * above.
+ */
+ BROP(p)->b_exit_with_sig(p, sqp);
+ } else {
+ p->p_pidflag &= ~CLDPEND;
+ sigcld(p, sqp);
+ }
+
} else {
/*
* Do what sigcld() would do if the disposition
@@ -927,10 +1018,9 @@ winfo(proc_t *pp, k_siginfo_t *ip, int waitflag)
int
waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
{
- int found;
proc_t *cp, *pp;
- int proc_gone;
int waitflag = !(options & WNOWAIT);
+ boolean_t have_brand_helper = B_FALSE;
/*
* Obsolete flag, defined here only for binary compatibility
@@ -958,7 +1048,8 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
pp = ttoproc(curthread);
/*
- * lock parent mutex so that sibling chain can be searched.
+ * Anytime you are looking for a process, you take pidlock to prevent
+ * things from changing as you look.
*/
mutex_enter(&pidlock);
@@ -978,10 +1069,37 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
return (ECHILD);
}
- while (pp->p_child != NULL) {
+ if (PROC_IS_BRANDED(pp) && BROP(pp)->b_waitid_helper != NULL) {
+ have_brand_helper = B_TRUE;
+ }
+
+ while (pp->p_child != NULL || have_brand_helper) {
+ boolean_t brand_wants_wait = B_FALSE;
+ int proc_gone = 0;
+ int found = 0;
- proc_gone = 0;
+ /*
+ * Give the brand a chance to return synthetic results from
+ * this waitid() call before we do the real thing.
+ */
+ if (have_brand_helper) {
+ int ret;
+ if (BROP(pp)->b_waitid_helper(idtype, id, ip, options,
+ &brand_wants_wait, &ret) == 0) {
+ mutex_exit(&pidlock);
+ return (ret);
+ }
+
+ if (pp->p_child == NULL) {
+ goto no_real_children;
+ }
+ }
+
+ /*
+ * Look for interesting children in the newstate list.
+ */
+ VERIFY(pp->p_child != NULL);
for (cp = pp->p_child_ns; cp != NULL; cp = cp->p_sibling_ns) {
if (idtype != P_PID && (cp->p_pidflag & CLDWAITPID))
continue;
@@ -989,6 +1107,11 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
continue;
if (idtype == P_PGID && id != cp->p_pgrp)
continue;
+ if (PROC_IS_BRANDED(pp)) {
+ if (BROP(pp)->b_wait_filter != NULL &&
+ BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
+ continue;
+ }
switch (cp->p_wcode) {
@@ -1033,12 +1156,16 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
* Wow! None of the threads on the p_sibling_ns list were
* interesting threads. Check all the kids!
*/
- found = 0;
for (cp = pp->p_child; cp != NULL; cp = cp->p_sibling) {
if (idtype == P_PID && id != cp->p_pid)
continue;
if (idtype == P_PGID && id != cp->p_pgrp)
continue;
+ if (PROC_IS_BRANDED(pp)) {
+ if (BROP(pp)->b_wait_filter != NULL &&
+ BROP(pp)->b_wait_filter(pp, cp) == B_FALSE)
+ continue;
+ }
switch (cp->p_wcode) {
case CLD_TRAPPED:
@@ -1107,11 +1234,12 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
break;
}
+no_real_children:
/*
* If we found no interesting processes at all,
* break out and return ECHILD.
*/
- if (found + proc_gone == 0)
+ if (!brand_wants_wait && (found + proc_gone == 0))
break;
if (options & WNOHANG) {
@@ -1130,7 +1258,7 @@ waitid(idtype_t idtype, id_t id, k_siginfo_t *ip, int options)
* change state while we wait, we don't wait at all.
* Get out with ECHILD according to SVID.
*/
- if (found == proc_gone)
+ if (!brand_wants_wait && (found == proc_gone))
break;
if (!cv_wait_sig_swap(&pp->p_cv, &pidlock)) {
@@ -1226,6 +1354,12 @@ freeproc(proc_t *p)
p->p_killsqp = NULL;
}
+ /* Clear any remaining brand data */
+ if (PROC_IS_BRANDED(p)) {
+ brand_clearbrand(p, B_FALSE);
+ }
+
+
prfree(p); /* inform /proc */
/*
diff --git a/usr/src/uts/common/os/fio.c b/usr/src/uts/common/os/fio.c
index 76eddd4e50..bfee77130d 100644
--- a/usr/src/uts/common/os/fio.c
+++ b/usr/src/uts/common/os/fio.c
@@ -852,7 +852,8 @@ flist_fork(uf_info_t *pfip, uf_info_t *cfip)
*/
cfip->fi_nfiles = nfiles = flist_minsize(pfip);
- cfip->fi_list = kmem_zalloc(nfiles * sizeof (uf_entry_t), KM_SLEEP);
+ cfip->fi_list = nfiles == 0 ? NULL :
+ kmem_zalloc(nfiles * sizeof (uf_entry_t), KM_SLEEP);
for (fd = 0, pufp = pfip->fi_list, cufp = cfip->fi_list; fd < nfiles;
fd++, pufp++, cufp++) {
diff --git a/usr/src/uts/common/os/fork.c b/usr/src/uts/common/os/fork.c
index fe3a362fa7..d5ba123894 100644
--- a/usr/src/uts/common/os/fork.c
+++ b/usr/src/uts/common/os/fork.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2016, Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -84,6 +84,7 @@ static int64_t cfork(int, int, int);
static int getproc(proc_t **, pid_t, uint_t);
#define GETPROC_USER 0x0
#define GETPROC_KERNEL 0x1
+#define GETPROC_ZSCHED 0x2
static void fork_fail(proc_t *);
static void forklwp_fail(proc_t *);
@@ -696,7 +697,7 @@ fork_fail(proc_t *cp)
if (PTOU(curproc)->u_cwd)
refstr_rele(PTOU(curproc)->u_cwd);
if (PROC_IS_BRANDED(cp)) {
- brand_clearbrand(cp, B_TRUE);
+ brand_clearbrand(cp, B_FALSE);
}
}
@@ -745,7 +746,7 @@ forklwp_fail(proc_t *p)
kmem_free(t->t_door, sizeof (door_data_t));
t->t_door = NULL;
}
- lwp_ctmpl_clear(ttolwp(t));
+ lwp_ctmpl_clear(ttolwp(t), B_FALSE);
/*
* Remove the thread from the all threads list.
@@ -782,6 +783,9 @@ extern struct as kas;
/*
* fork a kernel process.
+ *
+ * Passing a pid argument of -1 indicates that the new process should be
+ * launched as a child of 'zsched' within the zone.
*/
int
newproc(void (*pc)(), caddr_t arg, id_t cid, int pri, struct contract **ct,
@@ -800,6 +804,7 @@ newproc(void (*pc)(), caddr_t arg, id_t cid, int pri, struct contract **ct,
rctl_set_t *init_set;
ASSERT(pid != 1);
+ ASSERT(pid >= 0);
if (getproc(&p, pid, GETPROC_KERNEL) < 0)
return (EAGAIN);
@@ -843,8 +848,18 @@ newproc(void (*pc)(), caddr_t arg, id_t cid, int pri, struct contract **ct,
rctl_set_t *init_set;
task_t *tk, *tk_old;
klwp_t *lwp;
+ boolean_t pzsched = B_FALSE;
+ int flag = GETPROC_USER;
+
+ /* Handle a new user-level thread as child of zsched. */
+ if (pid < 0) {
+ VERIFY(curzone != global_zone);
+ flag = GETPROC_ZSCHED;
+ pzsched = B_TRUE;
+ pid = 0;
+ }
- if (getproc(&p, pid, GETPROC_USER) < 0)
+ if (getproc(&p, pid, flag) < 0)
return (EAGAIN);
/*
* init creates a new task, distinct from the task
@@ -902,7 +917,8 @@ newproc(void (*pc)(), caddr_t arg, id_t cid, int pri, struct contract **ct,
}
t = lwptot(lwp);
- ctp = contract_process_fork(sys_process_tmpl, p, curproc,
+ ctp = contract_process_fork(sys_process_tmpl, p,
+ (pzsched ? curproc->p_zone->zone_zsched : curproc),
B_FALSE);
ASSERT(ctp != NULL);
if (ct != NULL)
@@ -943,7 +959,11 @@ getproc(proc_t **cpp, pid_t pid, uint_t flags)
if (zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN)
return (-1); /* no point in starting new processes */
- pp = (flags & GETPROC_KERNEL) ? &p0 : curproc;
+ if (flags & GETPROC_ZSCHED) {
+ pp = curproc->p_zone->zone_zsched;
+ } else {
+ pp = (flags & GETPROC_KERNEL) ? &p0 : curproc;
+ }
task = pp->p_task;
proj = task->tk_proj;
zone = pp->p_zone;
@@ -1004,6 +1024,9 @@ getproc(proc_t **cpp, pid_t pid, uint_t flags)
cp->p_t1_lgrpid = LGRP_NONE;
cp->p_tr_lgrpid = LGRP_NONE;
+ /* Default to native brand initially */
+ cp->p_brand = &native_brand;
+
if ((newpid = pid_allocate(cp, pid, PID_ALLOC_PROC)) == -1) {
if (nproc == v.v_proc) {
CPU_STATS_ADDQ(CPU, sys, procovf, 1);
@@ -1071,9 +1094,6 @@ getproc(proc_t **cpp, pid_t pid, uint_t flags)
cp->p_flag = pp->p_flag & (SJCTL|SNOWAIT|SNOCD);
cp->p_sessp = pp->p_sessp;
sess_hold(pp);
- cp->p_brand = pp->p_brand;
- if (PROC_IS_BRANDED(pp))
- BROP(pp)->b_copy_procdata(cp, pp);
cp->p_bssbase = pp->p_bssbase;
cp->p_brkbase = pp->p_brkbase;
cp->p_brksize = pp->p_brksize;
@@ -1153,6 +1173,18 @@ getproc(proc_t **cpp, pid_t pid, uint_t flags)
mutex_exit(&cp->p_lock);
mutex_exit(&pidlock);
+ if (PROC_IS_BRANDED(pp)) {
+ /*
+ * The only reason why process branding should fail is when
+ * the procedure is complicated by multiple LWPs on the scene.
+ * With an LWP count of 0, this newly allocated process has no
+ * reason to fail branding.
+ */
+ VERIFY0(brand_setbrand(cp, B_FALSE));
+
+ BROP(pp)->b_copy_procdata(cp, pp);
+ }
+
avl_create(&cp->p_ct_held, contract_compar, sizeof (contract_t),
offsetof(contract_t, ct_ctlist));
diff --git a/usr/src/uts/common/os/grow.c b/usr/src/uts/common/os/grow.c
index f5e92cfd94..0c4c0bcad6 100644
--- a/usr/src/uts/common/os/grow.c
+++ b/usr/src/uts/common/os/grow.c
@@ -19,7 +19,10 @@
* CDDL HEADER END
*/
-/* Copyright 2013 OmniTI Computer Consulting, Inc. All rights reserved. */
+/*
+ * Copyright 2013 OmniTI Computer Consulting, Inc. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
@@ -52,6 +55,7 @@
#include <sys/fcntl.h>
#include <sys/lwpchan_impl.h>
#include <sys/nbmlock.h>
+#include <sys/brand.h>
#include <vm/hat.h>
#include <vm/as.h>
@@ -522,6 +526,20 @@ choose_addr(struct as *as, caddr_t *addrp, size_t len, offset_t off,
return (0);
}
+caddr_t
+map_userlimit(proc_t *pp, struct as *as, int flags)
+{
+ if (flags & _MAP_LOW32) {
+ if (PROC_IS_BRANDED(pp) && BROP(pp)->b_map32limit != NULL) {
+ return ((caddr_t)(uintptr_t)BROP(pp)->b_map32limit(pp));
+ } else {
+ return ((caddr_t)_userlimit32);
+ }
+ }
+
+ return (as->a_userlimit);
+}
+
/*
* Used for MAP_ANON - fast way to get anonymous pages
@@ -537,8 +555,6 @@ zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags,
return (EACCES);
if ((flags & MAP_FIXED) != 0) {
- caddr_t userlimit;
-
/*
* Use the user address. First verify that
* the address to be used is page aligned.
@@ -547,9 +563,8 @@ zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags,
if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
return (EINVAL);
- userlimit = flags & _MAP_LOW32 ?
- (caddr_t)USERLIMIT32 : as->a_userlimit;
- switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
+ switch (valid_usr_range(*addrp, len, uprot, as,
+ map_userlimit(as->a_proc, as, flags))) {
case RANGE_OKAY:
break;
case RANGE_BADPROT:
@@ -717,8 +732,6 @@ smmap_common(caddr_t *addrp, size_t len,
* If the user specified an address, do some simple checks here
*/
if ((flags & MAP_FIXED) != 0) {
- caddr_t userlimit;
-
/*
* Use the user address. First verify that
* the address to be used is page aligned.
@@ -726,10 +739,8 @@ smmap_common(caddr_t *addrp, size_t len,
*/
if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
return (EINVAL);
-
- userlimit = flags & _MAP_LOW32 ?
- (caddr_t)USERLIMIT32 : as->a_userlimit;
- switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
+ switch (valid_usr_range(*addrp, len, uprot, as,
+ map_userlimit(curproc, as, flags))) {
case RANGE_OKAY:
break;
case RANGE_BADPROT:
diff --git a/usr/src/uts/common/os/ipc.c b/usr/src/uts/common/os/ipc.c
index 9381019cd1..6a6f5d84ef 100644
--- a/usr/src/uts/common/os/ipc.c
+++ b/usr/src/uts/common/os/ipc.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -1217,6 +1218,23 @@ ipc_remove(ipc_service_t *service, kipc_perm_t *perm)
(IPC_ZONE_USAGE(perm, service) == 0)));
}
+/*
+ * Perform actual IPC_RMID, either via ipc_rmid or due to a delayed *_RMID.
+ */
+void
+ipc_rmsvc(ipc_service_t *service, kipc_perm_t *perm)
+{
+ ASSERT(service->ipcs_count > 0);
+ ASSERT(MUTEX_HELD(&service->ipcs_lock));
+
+ ipc_remove(service, perm);
+ mutex_exit(&service->ipcs_lock);
+
+ /* perform any per-service removal actions */
+ service->ipcs_rmid(perm);
+
+ ipc_rele(service, perm);
+}
/*
* Common code to perform an IPC_RMID. Returns an errno value on
@@ -1247,13 +1265,7 @@ ipc_rmid(ipc_service_t *service, int id, cred_t *cr)
/*
* Nothing can fail from this point on.
*/
- ipc_remove(service, perm);
- mutex_exit(&service->ipcs_lock);
-
- /* perform any per-service removal actions */
- service->ipcs_rmid(perm);
-
- ipc_rele(service, perm);
+ ipc_rmsvc(service, perm);
return (0);
}
diff --git a/usr/src/uts/common/os/kmem.c b/usr/src/uts/common/os/kmem.c
index cc53c2fb76..734fa910e4 100644
--- a/usr/src/uts/common/os/kmem.c
+++ b/usr/src/uts/common/os/kmem.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015 Joyent, Inc. All rights reserved.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*/
@@ -158,10 +159,22 @@
* find known objects and is about to free it, or
* c) the client has freed the object.
* In all these cases (a, b, and c) kmem frees the new object (the
- * unused copy destination) and searches for the old object in the
- * magazine layer. If found, the object is removed from the magazine
- * layer and freed to the slab layer so it will no longer hold the
- * slab hostage.
+ * unused copy destination). In the first case, the object is in
+ * use and the correct action is that for LATER; in the latter two
+ * cases, we know that the object is either freed or about to be
+ * freed, in which case it is either already in a magazine or about
+ * to be in one. In these cases, we know that the object will either
+ * be reallocated and reused, or it will end up in a full magazine
+ * that will be reaped (thereby liberating the slab). Because it
+ * is prohibitively expensive to differentiate these cases, and
+ * because the defrag code is executed when we're low on memory
+ * (thereby biasing the system to reclaim full magazines) we treat
+ * all DONT_KNOW cases as LATER and rely on cache reaping to
+ * generally clean up full magazines. While we take the same action
+ * for these cases, we maintain their semantic distinction: if
+ * defragmentation is not occurring, it is useful to know if this
+ * is due to objects in use (LATER) or objects in an unknown state
+ * of transition (DONT_KNOW).
*
* 2.3 Object States
*
@@ -284,10 +297,10 @@
* view of the slab layer, making it a candidate for the move callback. Most
* objects unrecognized by the client in the move callback fall into this
* category and are cheaply distinguished from known objects by the test
- * described earlier. Since recognition is cheap for the client, and searching
- * magazines is expensive for kmem, kmem defers searching until the client first
- * returns KMEM_CBRC_DONT_KNOW. As long as the needed effort is reasonable, kmem
- * elsewhere does what it can to avoid bothering the client unnecessarily.
+ * described earlier. Because searching magazines is prohibitively expensive
+ * for kmem, clients that do not mark freed objects (and therefore return
+ * KMEM_CBRC_DONT_KNOW for large numbers of objects) may find defragmentation
+ * efficacy reduced.
*
* Invalidating the designated pointer member before freeing the object marks
* the object to be avoided in the callback, and conversely, assigning a valid
@@ -997,6 +1010,7 @@ size_t kmem_transaction_log_size; /* transaction log size [2% of memory] */
size_t kmem_content_log_size; /* content log size [2% of memory] */
size_t kmem_failure_log_size; /* failure log [4 pages per CPU] */
size_t kmem_slab_log_size; /* slab create log [4 pages per CPU] */
+size_t kmem_zerosized_log_size; /* zero-sized log [4 pages per CPU] */
size_t kmem_content_maxsave = 256; /* KMF_CONTENTS max bytes to log */
size_t kmem_lite_minsize = 0; /* minimum buffer size for KMF_LITE */
size_t kmem_lite_maxalign = 1024; /* maximum buffer alignment for KMF_LITE */
@@ -1004,6 +1018,14 @@ int kmem_lite_pcs = 4; /* number of PCs to store in KMF_LITE mode */
size_t kmem_maxverify; /* maximum bytes to inspect in debug routines */
size_t kmem_minfirewall; /* hardware-enforced redzone threshold */
+#ifdef DEBUG
+int kmem_warn_zerosized = 1; /* whether to warn on zero-sized KM_SLEEP */
+#else
+int kmem_warn_zerosized = 0; /* whether to warn on zero-sized KM_SLEEP */
+#endif
+
+int kmem_panic_zerosized = 0; /* whether to panic on zero-sized KM_SLEEP */
+
#ifdef _LP64
size_t kmem_max_cached = KMEM_BIG_MAXBUF; /* maximum kmem_alloc cache */
#else
@@ -1037,21 +1059,7 @@ static vmem_t *kmem_default_arena;
static vmem_t *kmem_firewall_va_arena;
static vmem_t *kmem_firewall_arena;
-/*
- * Define KMEM_STATS to turn on statistic gathering. By default, it is only
- * turned on when DEBUG is also defined.
- */
-#ifdef DEBUG
-#define KMEM_STATS
-#endif /* DEBUG */
-
-#ifdef KMEM_STATS
-#define KMEM_STAT_ADD(stat) ((stat)++)
-#define KMEM_STAT_COND_ADD(cond, stat) ((void) (!(cond) || (stat)++))
-#else
-#define KMEM_STAT_ADD(stat) /* nothing */
-#define KMEM_STAT_COND_ADD(cond, stat) /* nothing */
-#endif /* KMEM_STATS */
+static int kmem_zerosized; /* # of zero-sized allocs */
/*
* kmem slab consolidator thresholds (tunables)
@@ -1070,47 +1078,6 @@ size_t kmem_reclaim_max_slabs = 1;
*/
size_t kmem_reclaim_scan_range = 12;
-#ifdef KMEM_STATS
-static struct {
- uint64_t kms_callbacks;
- uint64_t kms_yes;
- uint64_t kms_no;
- uint64_t kms_later;
- uint64_t kms_dont_need;
- uint64_t kms_dont_know;
- uint64_t kms_hunt_found_mag;
- uint64_t kms_hunt_found_slab;
- uint64_t kms_hunt_alloc_fail;
- uint64_t kms_hunt_lucky;
- uint64_t kms_notify;
- uint64_t kms_notify_callbacks;
- uint64_t kms_disbelief;
- uint64_t kms_already_pending;
- uint64_t kms_callback_alloc_fail;
- uint64_t kms_callback_taskq_fail;
- uint64_t kms_endscan_slab_dead;
- uint64_t kms_endscan_slab_destroyed;
- uint64_t kms_endscan_nomem;
- uint64_t kms_endscan_refcnt_changed;
- uint64_t kms_endscan_nomove_changed;
- uint64_t kms_endscan_freelist;
- uint64_t kms_avl_update;
- uint64_t kms_avl_noupdate;
- uint64_t kms_no_longer_reclaimable;
- uint64_t kms_notify_no_longer_reclaimable;
- uint64_t kms_notify_slab_dead;
- uint64_t kms_notify_slab_destroyed;
- uint64_t kms_alloc_fail;
- uint64_t kms_constructor_fail;
- uint64_t kms_dead_slabs_freed;
- uint64_t kms_defrags;
- uint64_t kms_scans;
- uint64_t kms_scan_depot_ws_reaps;
- uint64_t kms_debug_reaps;
- uint64_t kms_debug_scans;
-} kmem_move_stats;
-#endif /* KMEM_STATS */
-
/* consolidator knobs */
static boolean_t kmem_move_noreap;
static boolean_t kmem_move_blocked;
@@ -1141,6 +1108,7 @@ kmem_log_header_t *kmem_transaction_log;
kmem_log_header_t *kmem_content_log;
kmem_log_header_t *kmem_failure_log;
kmem_log_header_t *kmem_slab_log;
+kmem_log_header_t *kmem_zerosized_log;
static int kmem_lite_count; /* # of PCs in kmem_buftag_lite_t */
@@ -1921,15 +1889,7 @@ kmem_slab_free(kmem_cache_t *cp, void *buf)
cp->cache_complete_slab_count--;
avl_add(&cp->cache_partial_slabs, sp);
} else {
-#ifdef DEBUG
- if (avl_update_gt(&cp->cache_partial_slabs, sp)) {
- KMEM_STAT_ADD(kmem_move_stats.kms_avl_update);
- } else {
- KMEM_STAT_ADD(kmem_move_stats.kms_avl_noupdate);
- }
-#else
(void) avl_update_gt(&cp->cache_partial_slabs, sp);
-#endif
}
ASSERT((cp->cache_slab_create - cp->cache_slab_destroy) ==
@@ -2941,8 +2901,33 @@ kmem_alloc(size_t size, int kmflag)
/* fall through to kmem_cache_alloc() */
} else {
- if (size == 0)
+ if (size == 0) {
+ if (kmflag != KM_SLEEP && !(kmflag & KM_PANIC))
+ return (NULL);
+
+ /*
+ * If this is a sleeping allocation or one that has
+ * been specified to panic on allocation failure, we
+ * consider it to be deprecated behavior to allocate
+ * 0 bytes. If we have been configured to panic under
+ * this condition, we panic; if to warn, we warn -- and
+ * regardless, we log to the kmem_zerosized_log that
+ * that this condition has occurred (which gives us
+ * enough information to be able to debug it).
+ */
+ if (kmem_panic && kmem_panic_zerosized)
+ panic("attempted to kmem_alloc() size of 0");
+
+ if (kmem_warn_zerosized) {
+ cmn_err(CE_WARN, "kmem_alloc(): sleeping "
+ "allocation with size of 0; "
+ "see kmem_zerosized_log for details");
+ }
+
+ kmem_log_event(kmem_zerosized_log, NULL, NULL, NULL);
+
return (NULL);
+ }
buf = vmem_alloc(kmem_oversize_arena, size,
kmflag & KM_VMFLAGS);
@@ -3556,7 +3541,7 @@ kmem_cache_kstat_update(kstat_t *ksp, int rw)
kmcp->kmc_move_later.value.ui64 = kd->kmd_later;
kmcp->kmc_move_dont_need.value.ui64 = kd->kmd_dont_need;
kmcp->kmc_move_dont_know.value.ui64 = kd->kmd_dont_know;
- kmcp->kmc_move_hunt_found.value.ui64 = kd->kmd_hunt_found;
+ kmcp->kmc_move_hunt_found.value.ui64 = 0;
kmcp->kmc_move_slabs_freed.value.ui64 = kd->kmd_slabs_freed;
kmcp->kmc_defrag.value.ui64 = kd->kmd_defrags;
kmcp->kmc_scan.value.ui64 = kd->kmd_scans;
@@ -4127,7 +4112,8 @@ kmem_cache_destroy(kmem_cache_t *cp)
if (kmem_taskq != NULL)
taskq_wait(kmem_taskq);
- if (kmem_move_taskq != NULL)
+
+ if (kmem_move_taskq != NULL && cp->cache_defrag != NULL)
taskq_wait(kmem_move_taskq);
kmem_cache_magazine_purge(cp);
@@ -4465,8 +4451,8 @@ kmem_init(void)
}
kmem_failure_log = kmem_log_init(kmem_failure_log_size);
-
kmem_slab_log = kmem_log_init(kmem_slab_log_size);
+ kmem_zerosized_log = kmem_log_init(kmem_zerosized_log_size);
/*
* Initialize STREAMS message caches so allocb() is available.
@@ -4654,94 +4640,6 @@ kmem_slab_is_reclaimable(kmem_cache_t *cp, kmem_slab_t *sp, int flags)
(sp->slab_chunks * cp->cache_defrag->kmd_reclaim_numer));
}
-static void *
-kmem_hunt_mag(kmem_cache_t *cp, kmem_magazine_t *m, int n, void *buf,
- void *tbuf)
-{
- int i; /* magazine round index */
-
- for (i = 0; i < n; i++) {
- if (buf == m->mag_round[i]) {
- if (cp->cache_flags & KMF_BUFTAG) {
- (void) kmem_cache_free_debug(cp, tbuf,
- caller());
- }
- m->mag_round[i] = tbuf;
- return (buf);
- }
- }
-
- return (NULL);
-}
-
-/*
- * Hunt the magazine layer for the given buffer. If found, the buffer is
- * removed from the magazine layer and returned, otherwise NULL is returned.
- * The state of the returned buffer is freed and constructed.
- */
-static void *
-kmem_hunt_mags(kmem_cache_t *cp, void *buf)
-{
- kmem_cpu_cache_t *ccp;
- kmem_magazine_t *m;
- int cpu_seqid;
- int n; /* magazine rounds */
- void *tbuf; /* temporary swap buffer */
-
- ASSERT(MUTEX_NOT_HELD(&cp->cache_lock));
-
- /*
- * Allocated a buffer to swap with the one we hope to pull out of a
- * magazine when found.
- */
- tbuf = kmem_cache_alloc(cp, KM_NOSLEEP);
- if (tbuf == NULL) {
- KMEM_STAT_ADD(kmem_move_stats.kms_hunt_alloc_fail);
- return (NULL);
- }
- if (tbuf == buf) {
- KMEM_STAT_ADD(kmem_move_stats.kms_hunt_lucky);
- if (cp->cache_flags & KMF_BUFTAG) {
- (void) kmem_cache_free_debug(cp, buf, caller());
- }
- return (buf);
- }
-
- /* Hunt the depot. */
- mutex_enter(&cp->cache_depot_lock);
- n = cp->cache_magtype->mt_magsize;
- for (m = cp->cache_full.ml_list; m != NULL; m = m->mag_next) {
- if (kmem_hunt_mag(cp, m, n, buf, tbuf) != NULL) {
- mutex_exit(&cp->cache_depot_lock);
- return (buf);
- }
- }
- mutex_exit(&cp->cache_depot_lock);
-
- /* Hunt the per-CPU magazines. */
- for (cpu_seqid = 0; cpu_seqid < max_ncpus; cpu_seqid++) {
- ccp = &cp->cache_cpu[cpu_seqid];
-
- mutex_enter(&ccp->cc_lock);
- m = ccp->cc_loaded;
- n = ccp->cc_rounds;
- if (kmem_hunt_mag(cp, m, n, buf, tbuf) != NULL) {
- mutex_exit(&ccp->cc_lock);
- return (buf);
- }
- m = ccp->cc_ploaded;
- n = ccp->cc_prounds;
- if (kmem_hunt_mag(cp, m, n, buf, tbuf) != NULL) {
- mutex_exit(&ccp->cc_lock);
- return (buf);
- }
- mutex_exit(&ccp->cc_lock);
- }
-
- kmem_cache_free(cp, tbuf);
- return (NULL);
-}
-
/*
* May be called from the kmem_move_taskq, from kmem_cache_move_notify_task(),
* or when the buffer is freed.
@@ -4805,7 +4703,7 @@ static void kmem_move_end(kmem_cache_t *, kmem_move_t *);
* NO kmem frees the new buffer, marks the slab of the old buffer
* non-reclaimable to avoid bothering the client again
* LATER kmem frees the new buffer, increments slab_later_count
- * DONT_KNOW kmem frees the new buffer, searches mags for the old buffer
+ * DONT_KNOW kmem frees the new buffer
* DONT_NEED kmem frees both the old buffer and the new buffer
*
* The pending callback argument now being processed contains both of the
@@ -4839,19 +4737,14 @@ kmem_move_buffer(kmem_move_t *callback)
* another buffer on the same slab.
*/
if (!kmem_slab_is_reclaimable(cp, sp, callback->kmm_flags)) {
- KMEM_STAT_ADD(kmem_move_stats.kms_no_longer_reclaimable);
- KMEM_STAT_COND_ADD((callback->kmm_flags & KMM_NOTIFY),
- kmem_move_stats.kms_notify_no_longer_reclaimable);
kmem_slab_free(cp, callback->kmm_to_buf);
kmem_move_end(cp, callback);
return;
}
/*
- * Hunting magazines is expensive, so we'll wait to do that until the
- * client responds KMEM_CBRC_DONT_KNOW. However, checking the slab layer
- * is cheap, so we might as well do that here in case we can avoid
- * bothering the client.
+ * Checking the slab layer is easy, so we might as well do that here
+ * in case we can avoid bothering the client.
*/
mutex_enter(&cp->cache_lock);
free_on_slab = (kmem_slab_allocated(cp, sp,
@@ -4859,7 +4752,6 @@ kmem_move_buffer(kmem_move_t *callback)
mutex_exit(&cp->cache_lock);
if (free_on_slab) {
- KMEM_STAT_ADD(kmem_move_stats.kms_hunt_found_slab);
kmem_slab_free(cp, callback->kmm_to_buf);
kmem_move_end(cp, callback);
return;
@@ -4871,7 +4763,6 @@ kmem_move_buffer(kmem_move_t *callback)
*/
if (kmem_cache_alloc_debug(cp, callback->kmm_to_buf,
KM_NOSLEEP, 1, caller()) != 0) {
- KMEM_STAT_ADD(kmem_move_stats.kms_alloc_fail);
kmem_move_end(cp, callback);
return;
}
@@ -4879,15 +4770,11 @@ kmem_move_buffer(kmem_move_t *callback)
cp->cache_constructor(callback->kmm_to_buf, cp->cache_private,
KM_NOSLEEP) != 0) {
atomic_inc_64(&cp->cache_alloc_fail);
- KMEM_STAT_ADD(kmem_move_stats.kms_constructor_fail);
kmem_slab_free(cp, callback->kmm_to_buf);
kmem_move_end(cp, callback);
return;
}
- KMEM_STAT_ADD(kmem_move_stats.kms_callbacks);
- KMEM_STAT_COND_ADD((callback->kmm_flags & KMM_NOTIFY),
- kmem_move_stats.kms_notify_callbacks);
cp->cache_defrag->kmd_callbacks++;
cp->cache_defrag->kmd_thread = curthread;
cp->cache_defrag->kmd_from_buf = callback->kmm_from_buf;
@@ -4905,7 +4792,6 @@ kmem_move_buffer(kmem_move_t *callback)
cp->cache_defrag->kmd_to_buf = NULL;
if (response == KMEM_CBRC_YES) {
- KMEM_STAT_ADD(kmem_move_stats.kms_yes);
cp->cache_defrag->kmd_yes++;
kmem_slab_free_constructed(cp, callback->kmm_from_buf, B_FALSE);
/* slab safe to access until kmem_move_end() */
@@ -4920,14 +4806,12 @@ kmem_move_buffer(kmem_move_t *callback)
switch (response) {
case KMEM_CBRC_NO:
- KMEM_STAT_ADD(kmem_move_stats.kms_no);
cp->cache_defrag->kmd_no++;
mutex_enter(&cp->cache_lock);
kmem_slab_move_no(cp, sp, callback->kmm_from_buf);
mutex_exit(&cp->cache_lock);
break;
case KMEM_CBRC_LATER:
- KMEM_STAT_ADD(kmem_move_stats.kms_later);
cp->cache_defrag->kmd_later++;
mutex_enter(&cp->cache_lock);
if (!KMEM_SLAB_IS_PARTIAL(sp)) {
@@ -4936,7 +4820,6 @@ kmem_move_buffer(kmem_move_t *callback)
}
if (++sp->slab_later_count >= KMEM_DISBELIEF) {
- KMEM_STAT_ADD(kmem_move_stats.kms_disbelief);
kmem_slab_move_no(cp, sp, callback->kmm_from_buf);
} else if (!(sp->slab_flags & KMEM_SLAB_NOMOVE)) {
sp->slab_stuck_offset = KMEM_SLAB_OFFSET(sp,
@@ -4945,7 +4828,6 @@ kmem_move_buffer(kmem_move_t *callback)
mutex_exit(&cp->cache_lock);
break;
case KMEM_CBRC_DONT_NEED:
- KMEM_STAT_ADD(kmem_move_stats.kms_dont_need);
cp->cache_defrag->kmd_dont_need++;
kmem_slab_free_constructed(cp, callback->kmm_from_buf, B_FALSE);
if (sp->slab_refcnt == 0)
@@ -4955,19 +4837,21 @@ kmem_move_buffer(kmem_move_t *callback)
mutex_exit(&cp->cache_lock);
break;
case KMEM_CBRC_DONT_KNOW:
- KMEM_STAT_ADD(kmem_move_stats.kms_dont_know);
+ /*
+ * If we don't know if we can move this buffer or not, we'll
+ * just assume that we can't: if the buffer is in fact free,
+ * then it is sitting in one of the per-CPU magazines or in
+ * a full magazine in the depot layer. Either way, because
+ * defrag is induced in the same logic that reaps a cache,
+ * it's likely that full magazines will be returned to the
+ * system soon (thereby accomplishing what we're trying to
+ * accomplish here: return those magazines to their slabs).
+ * Given this, any work that we might do now to locate a buffer
+ * in a magazine is wasted (and expensive!) work; we bump
+ * a counter in this case and otherwise assume that we can't
+ * move it.
+ */
cp->cache_defrag->kmd_dont_know++;
- if (kmem_hunt_mags(cp, callback->kmm_from_buf) != NULL) {
- KMEM_STAT_ADD(kmem_move_stats.kms_hunt_found_mag);
- cp->cache_defrag->kmd_hunt_found++;
- kmem_slab_free_constructed(cp, callback->kmm_from_buf,
- B_TRUE);
- if (sp->slab_refcnt == 0)
- cp->cache_defrag->kmd_slabs_freed++;
- mutex_enter(&cp->cache_lock);
- kmem_slab_move_yes(cp, sp, callback->kmm_from_buf);
- mutex_exit(&cp->cache_lock);
- }
break;
default:
panic("'%s' (%p) unexpected move callback response %d\n",
@@ -4992,10 +4876,9 @@ kmem_move_begin(kmem_cache_t *cp, kmem_slab_t *sp, void *buf, int flags)
ASSERT(sp->slab_flags & KMEM_SLAB_MOVE_PENDING);
callback = kmem_cache_alloc(kmem_move_cache, KM_NOSLEEP);
- if (callback == NULL) {
- KMEM_STAT_ADD(kmem_move_stats.kms_callback_alloc_fail);
+
+ if (callback == NULL)
return (B_FALSE);
- }
callback->kmm_from_slab = sp;
callback->kmm_from_buf = buf;
@@ -5020,7 +4903,6 @@ kmem_move_begin(kmem_cache_t *cp, kmem_slab_t *sp, void *buf, int flags)
pending->kmm_flags |= KMM_DESPERATE;
}
mutex_exit(&cp->cache_lock);
- KMEM_STAT_ADD(kmem_move_stats.kms_already_pending);
kmem_cache_free(kmem_move_cache, callback);
return (B_TRUE);
}
@@ -5034,7 +4916,6 @@ kmem_move_begin(kmem_cache_t *cp, kmem_slab_t *sp, void *buf, int flags)
if (!taskq_dispatch(kmem_move_taskq, (task_func_t *)kmem_move_buffer,
callback, TQ_NOSLEEP)) {
- KMEM_STAT_ADD(kmem_move_stats.kms_callback_taskq_fail);
mutex_enter(&cp->cache_lock);
avl_remove(&cp->cache_defrag->kmd_moves_pending, callback);
mutex_exit(&cp->cache_lock);
@@ -5080,7 +4961,6 @@ kmem_move_end(kmem_cache_t *cp, kmem_move_t *callback)
cp->cache_slab_destroy++;
mutex_exit(&cp->cache_lock);
kmem_slab_destroy(cp, sp);
- KMEM_STAT_ADD(kmem_move_stats.kms_dead_slabs_freed);
mutex_enter(&cp->cache_lock);
}
}
@@ -5225,8 +5105,6 @@ kmem_move_buffers(kmem_cache_t *cp, size_t max_scan, size_t max_slabs,
* pending move completes.
*/
list_insert_head(deadlist, sp);
- KMEM_STAT_ADD(kmem_move_stats.
- kms_endscan_slab_dead);
return (-1);
}
@@ -5241,10 +5119,6 @@ kmem_move_buffers(kmem_cache_t *cp, size_t max_scan, size_t max_slabs,
cp->cache_slab_destroy++;
mutex_exit(&cp->cache_lock);
kmem_slab_destroy(cp, sp);
- KMEM_STAT_ADD(kmem_move_stats.
- kms_dead_slabs_freed);
- KMEM_STAT_ADD(kmem_move_stats.
- kms_endscan_slab_destroyed);
mutex_enter(&cp->cache_lock);
/*
* Since we can't pick up the scan where we left
@@ -5260,8 +5134,6 @@ kmem_move_buffers(kmem_cache_t *cp, size_t max_scan, size_t max_slabs,
* for the request and say nothing about the
* number of reclaimable slabs.
*/
- KMEM_STAT_COND_ADD(s < max_slabs,
- kmem_move_stats.kms_endscan_nomem);
return (-1);
}
@@ -5277,16 +5149,10 @@ kmem_move_buffers(kmem_cache_t *cp, size_t max_scan, size_t max_slabs,
* destination buffer on the same slab. In that
* case, we're not interested in counting it.
*/
- KMEM_STAT_COND_ADD(!(flags & KMM_DEBUG) &&
- (s < max_slabs),
- kmem_move_stats.kms_endscan_refcnt_changed);
return (-1);
}
- if ((sp->slab_flags & KMEM_SLAB_NOMOVE) != nomove) {
- KMEM_STAT_COND_ADD(s < max_slabs,
- kmem_move_stats.kms_endscan_nomove_changed);
+ if ((sp->slab_flags & KMEM_SLAB_NOMOVE) != nomove)
return (-1);
- }
/*
* Generating a move request allocates a destination
@@ -5313,11 +5179,6 @@ kmem_move_buffers(kmem_cache_t *cp, size_t max_scan, size_t max_slabs,
}
end_scan:
- KMEM_STAT_COND_ADD(!(flags & KMM_DEBUG) &&
- (s < max_slabs) &&
- (sp == avl_first(&cp->cache_partial_slabs)),
- kmem_move_stats.kms_endscan_freelist);
-
return (s);
}
@@ -5377,8 +5238,6 @@ kmem_cache_move_notify_task(void *arg)
&cp->cache_defrag->kmd_moves_pending)) {
list_insert_head(deadlist, sp);
mutex_exit(&cp->cache_lock);
- KMEM_STAT_ADD(kmem_move_stats.
- kms_notify_slab_dead);
return;
}
@@ -5386,9 +5245,6 @@ kmem_cache_move_notify_task(void *arg)
cp->cache_slab_destroy++;
mutex_exit(&cp->cache_lock);
kmem_slab_destroy(cp, sp);
- KMEM_STAT_ADD(kmem_move_stats.kms_dead_slabs_freed);
- KMEM_STAT_ADD(kmem_move_stats.
- kms_notify_slab_destroyed);
return;
}
} else {
@@ -5402,7 +5258,6 @@ kmem_cache_move_notify(kmem_cache_t *cp, void *buf)
{
kmem_move_notify_args_t *args;
- KMEM_STAT_ADD(kmem_move_stats.kms_notify);
args = kmem_alloc(sizeof (kmem_move_notify_args_t), KM_NOSLEEP);
if (args != NULL) {
args->kmna_cache = cp;
@@ -5425,7 +5280,6 @@ kmem_cache_defrag(kmem_cache_t *cp)
n = avl_numnodes(&cp->cache_partial_slabs);
if (n > 1) {
/* kmem_move_buffers() drops and reacquires cache_lock */
- KMEM_STAT_ADD(kmem_move_stats.kms_defrags);
cp->cache_defrag->kmd_defrags++;
(void) kmem_move_buffers(cp, n, 0, KMM_DESPERATE);
}
@@ -5524,7 +5378,6 @@ kmem_cache_scan(kmem_cache_t *cp)
*
* kmem_move_buffers() drops and reacquires cache_lock.
*/
- KMEM_STAT_ADD(kmem_move_stats.kms_scans);
kmd->kmd_scans++;
slabs_found = kmem_move_buffers(cp, kmem_reclaim_scan_range,
kmem_reclaim_max_slabs, 0);
@@ -5565,12 +5418,9 @@ kmem_cache_scan(kmem_cache_t *cp)
if (!kmem_move_noreap &&
((debug_rand % kmem_mtb_reap) == 0)) {
mutex_exit(&cp->cache_lock);
- KMEM_STAT_ADD(kmem_move_stats.kms_debug_reaps);
kmem_cache_reap(cp);
return;
} else if ((debug_rand % kmem_mtb_move) == 0) {
- KMEM_STAT_ADD(kmem_move_stats.kms_scans);
- KMEM_STAT_ADD(kmem_move_stats.kms_debug_scans);
kmd->kmd_scans++;
(void) kmem_move_buffers(cp,
kmem_reclaim_scan_range, 1, KMM_DEBUG);
@@ -5581,8 +5431,6 @@ kmem_cache_scan(kmem_cache_t *cp)
mutex_exit(&cp->cache_lock);
- if (reap) {
- KMEM_STAT_ADD(kmem_move_stats.kms_scan_depot_ws_reaps);
+ if (reap)
kmem_depot_ws_reap(cp);
- }
}
diff --git a/usr/src/uts/common/os/logsubr.c b/usr/src/uts/common/os/logsubr.c
index 149f5f8a88..cbc4fa0000 100644
--- a/usr/src/uts/common/os/logsubr.c
+++ b/usr/src/uts/common/os/logsubr.c
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2013 Gary Mills
* Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, Joyent, Inc.
*/
#include <sys/types.h>
@@ -249,8 +250,7 @@ log_init(void)
*/
printf("\rSunOS Release %s Version %s %u-bit\n",
utsname.release, utsname.version, NBBY * (uint_t)sizeof (void *));
- printf("Copyright (c) 1983, 2010, Oracle and/or its affiliates. "
- "All rights reserved.\n");
+ printf("Copyright (c) 2010-2016, Joyent Inc. All rights reserved.\n");
#ifdef DEBUG
printf("DEBUG enabled\n");
#endif
diff --git a/usr/src/uts/common/os/lwp.c b/usr/src/uts/common/os/lwp.c
index feb8e76c42..a7de7b513f 100644
--- a/usr/src/uts/common/os/lwp.c
+++ b/usr/src/uts/common/os/lwp.c
@@ -25,7 +25,7 @@
*/
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2016, Joyent, Inc.
*/
#include <sys/param.h>
@@ -57,6 +57,8 @@
#include <sys/lgrp.h>
#include <sys/rctl.h>
#include <sys/contract_impl.h>
+#include <sys/contract/process.h>
+#include <sys/contract/process_impl.h>
#include <sys/cpc_impl.h>
#include <sys/sdt.h>
#include <sys/cmn_err.h>
@@ -115,7 +117,7 @@ lwp_create(void (*proc)(), caddr_t arg, size_t len, proc_t *p,
ret_tidhash_t *ret_tidhash = NULL;
int i;
int rctlfail = 0;
- boolean_t branded = 0;
+ void *brand_data = NULL;
struct ctxop *ctx = NULL;
ASSERT(cid != sysdccid); /* system threads must start in SYS */
@@ -283,6 +285,19 @@ lwp_create(void (*proc)(), caddr_t arg, size_t len, proc_t *p,
*/
lep = kmem_zalloc(sizeof (*lep), KM_SLEEP);
+ /*
+ * If necessary, speculatively allocate lwp brand data. This is done
+ * ahead of time so p_lock need not be dropped during lwp branding.
+ */
+ if (PROC_IS_BRANDED(p) && BROP(p)->b_lwpdata_alloc != NULL) {
+ if ((brand_data = BROP(p)->b_lwpdata_alloc(p)) == NULL) {
+ mutex_enter(&p->p_lock);
+ err = 1;
+ atomic_inc_32(&p->p_zone->zone_ffmisc);
+ goto error;
+ }
+ }
+
mutex_enter(&p->p_lock);
grow:
/*
@@ -630,18 +645,6 @@ grow:
} while (lwp_hash_lookup(p, t->t_tid) != NULL);
}
- /*
- * If this is a branded process, let the brand do any necessary lwp
- * initialization.
- */
- if (PROC_IS_BRANDED(p)) {
- if (BROP(p)->b_initlwp(lwp)) {
- err = 1;
- atomic_inc_32(&p->p_zone->zone_ffmisc);
- goto error;
- }
- branded = 1;
- }
if (t->t_tid == 1) {
kpreempt_disable();
@@ -654,7 +657,6 @@ grow:
}
}
- p->p_lwpcnt++;
t->t_waitfor = -1;
/*
@@ -696,8 +698,27 @@ grow:
t->t_post_sys = 1;
/*
+ * Perform lwp branding
+ *
+ * The b_initlwp hook is _not_ allowed to drop p->p_lock as it must be
+ * continuously held between when the tidhash is sized and when the lwp
+ * is inserted into it. Operations requiring p->p_lock to be
+ * temporarily dropped can be performed in b_initlwp_post.
+ */
+ if (PROC_IS_BRANDED(p)) {
+ BROP(p)->b_initlwp(lwp, brand_data);
+ /*
+ * The b_initlwp hook is expected to consume any preallocated
+ * brand_data in a way that prepares it for deallocation by the
+ * b_freelwp hook.
+ */
+ brand_data = NULL;
+ }
+
+ /*
* Insert the new thread into the list of all threads.
*/
+ p->p_lwpcnt++;
if ((tx = p->p_tlist) == NULL) {
t->t_back = t;
t->t_forw = t;
@@ -718,6 +739,13 @@ grow:
lep->le_start = t->t_start;
lwp_hash_in(p, lep, p->p_tidhash, p->p_tidhash_sz, 1);
+ /*
+ * Complete lwp branding
+ */
+ if (PROC_IS_BRANDED(p) && BROP(p)->b_initlwp_post != NULL) {
+ BROP(p)->b_initlwp_post(lwp);
+ }
+
if (state == TS_RUN) {
/*
* We set the new lwp running immediately.
@@ -753,8 +781,9 @@ error:
if (cid != NOCLASS && bufp != NULL)
CL_FREE(cid, bufp);
- if (branded)
- BROP(p)->b_freelwp(lwp);
+ if (brand_data != NULL) {
+ BROP(p)->b_lwpdata_free(brand_data);
+ }
mutex_exit(&p->p_lock);
t->t_state = TS_FREE;
@@ -827,8 +856,27 @@ lwp_ctmpl_copy(klwp_t *dst, klwp_t *src)
int i;
for (i = 0; i < ct_ntypes; i++) {
- dst->lwp_ct_active[i] = ctmpl_dup(src->lwp_ct_active[i]);
+ ct_template_t *tmpl = src->lwp_ct_active[i];
+
+ /*
+ * If the process contract template is setup to be preserved
+ * across exec, then if we're forking, perform an implicit
+ * template_clear now. This ensures that future children of
+ * this child will remain in the same contract unless they're
+ * explicitly setup differently. We know we're forking if the
+ * two LWPs belong to different processes.
+ */
+ if (i == CTT_PROCESS && tmpl != NULL) {
+ ctmpl_process_t *ctp = tmpl->ctmpl_data;
+
+ if (dst->lwp_procp != src->lwp_procp &&
+ (ctp->ctp_params & CT_PR_KEEP_EXEC) != 0)
+ tmpl = NULL;
+ }
+
+ dst->lwp_ct_active[i] = ctmpl_dup(tmpl);
dst->lwp_ct_latest[i] = NULL;
+
}
}
@@ -836,21 +884,33 @@ lwp_ctmpl_copy(klwp_t *dst, klwp_t *src)
* Clear an LWP's contract template state.
*/
void
-lwp_ctmpl_clear(klwp_t *lwp)
+lwp_ctmpl_clear(klwp_t *lwp, boolean_t is_exec)
{
ct_template_t *tmpl;
int i;
for (i = 0; i < ct_ntypes; i++) {
- if ((tmpl = lwp->lwp_ct_active[i]) != NULL) {
- ctmpl_free(tmpl);
- lwp->lwp_ct_active[i] = NULL;
- }
-
if (lwp->lwp_ct_latest[i] != NULL) {
contract_rele(lwp->lwp_ct_latest[i]);
lwp->lwp_ct_latest[i] = NULL;
}
+
+ if ((tmpl = lwp->lwp_ct_active[i]) != NULL) {
+ /*
+ * If we're exec-ing a new program and the process
+ * contract template is setup to be preserved across
+ * exec, then don't clear it.
+ */
+ if (is_exec && i == CTT_PROCESS) {
+ ctmpl_process_t *ctp = tmpl->ctmpl_data;
+
+ if ((ctp->ctp_params & CT_PR_KEEP_EXEC) != 0)
+ continue;
+ }
+
+ ctmpl_free(tmpl);
+ lwp->lwp_ct_active[i] = NULL;
+ }
}
}
@@ -891,13 +951,6 @@ lwp_exit(void)
if (t->t_upimutex != NULL)
upimutex_cleanup();
- /*
- * Perform any brand specific exit processing, then release any
- * brand data associated with the lwp
- */
- if (PROC_IS_BRANDED(p))
- BROP(p)->b_lwpexit(lwp);
-
lwp_pcb_exit();
mutex_enter(&p->p_lock);
@@ -941,6 +994,18 @@ lwp_exit(void)
DTRACE_PROC(lwp__exit);
/*
+ * Perform any brand specific exit processing, then release any
+ * brand data associated with the lwp
+ */
+ if (PROC_IS_BRANDED(p)) {
+ mutex_exit(&p->p_lock);
+ BROP(p)->b_lwpexit(lwp);
+ BROP(p)->b_freelwp(lwp);
+ mutex_enter(&p->p_lock);
+ prbarrier(p);
+ }
+
+ /*
* If the lwp is a detached lwp or if the process is exiting,
* remove (lwp_hash_out()) the lwp from the lwp directory.
* Otherwise null out the lwp's le_thread pointer in the lwp
@@ -1101,7 +1166,7 @@ lwp_cleanup(void)
}
kpreempt_enable();
- lwp_ctmpl_clear(ttolwp(t));
+ lwp_ctmpl_clear(ttolwp(t), B_FALSE);
}
int
diff --git a/usr/src/uts/common/os/main.c b/usr/src/uts/common/os/main.c
index 7afc1cfe00..dda0b3e4a6 100644
--- a/usr/src/uts/common/os/main.c
+++ b/usr/src/uts/common/os/main.c
@@ -27,7 +27,7 @@
/* All Rights Reserved */
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#include <sys/types.h>
@@ -157,7 +157,7 @@ exec_init(const char *initpath, const char *args)
int error = 0, count = 0;
proc_t *p = ttoproc(curthread);
klwp_t *lwp = ttolwp(curthread);
- int brand_action;
+ int brand_action = EBA_NONE;
if (args == NULL)
args = "";
@@ -268,7 +268,15 @@ exec_init(const char *initpath, const char *args)
*/
sigemptyset(&curthread->t_hold);
- brand_action = ZONE_IS_BRANDED(p->p_zone) ? EBA_BRAND : EBA_NONE;
+ /*
+ * Only instruct exec_common to brand the process if necessary. It is
+ * possible that the init process is already properly branded due to the
+ * proc_exit -> restart_init -> exec_init call chain.
+ */
+ if (ZONE_IS_BRANDED(p->p_zone) &&
+ p->p_brand != p->p_zone->zone_brand) {
+ brand_action = EBA_BRAND;
+ }
again:
error = exec_common((const char *)(uintptr_t)exec_fnamep,
(const char **)(uintptr_t)uap, NULL, brand_action);
diff --git a/usr/src/uts/common/os/mmapobj.c b/usr/src/uts/common/os/mmapobj.c
index b6b5446d71..596c855a45 100644
--- a/usr/src/uts/common/os/mmapobj.c
+++ b/usr/src/uts/common/os/mmapobj.c
@@ -1360,10 +1360,15 @@ calc_loadable(Ehdr *ehdrp, caddr_t phdrbase, int nphdrs, size_t *len,
}
if (num_segs++ == 0) {
/*
- * The p_vaddr of the first PT_LOAD segment
- * must either be NULL or within the first
- * page in order to be interpreted.
- * Otherwise, its an invalid file.
+ * While ELF doesn't specify the meaning of
+ * p_vaddr for PT_LOAD segments in ET_DYN
+ * objects, we mandate that is either NULL or
+ * (to accommodate some historical binaries)
+ * within the first page. (Note that there
+ * exist non-native ET_DYN objects that violate
+ * this constraint that we nonetheless must be
+ * able to execute; see the ET_DYN handling in
+ * mapelfexec() for details.)
*/
if (e_type == ET_DYN &&
((caddr_t)((uintptr_t)vaddr &
diff --git a/usr/src/uts/common/os/pid.c b/usr/src/uts/common/os/pid.c
index b555bb82b7..eba6147fab 100644
--- a/usr/src/uts/common/os/pid.c
+++ b/usr/src/uts/common/os/pid.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -112,6 +113,18 @@ pid_lookup(pid_t pid)
return (pidp);
}
+struct pid *
+pid_find(pid_t pid)
+{
+ struct pid *pidp;
+
+ mutex_enter(&pidlinklock);
+ pidp = pid_lookup(pid);
+ mutex_exit(&pidlinklock);
+
+ return (pidp);
+}
+
void
pid_setmin(void)
{
@@ -522,6 +535,20 @@ sprunlock(proc_t *p)
THREAD_KPRI_RELEASE();
}
+/*
+ * Undo effects of sprlock but without dropping p->p_lock
+ */
+void
+sprunprlock(proc_t *p)
+{
+ ASSERT(p->p_proc_flag & P_PR_LOCK);
+ ASSERT(MUTEX_HELD(&p->p_lock));
+
+ cv_signal(&pr_pid_cv[p->p_slot]);
+ p->p_proc_flag &= ~P_PR_LOCK;
+ THREAD_KPRI_RELEASE();
+}
+
void
pid_init(void)
{
diff --git a/usr/src/uts/common/os/policy.c b/usr/src/uts/common/os/policy.c
index 07bc2920da..d2bdb4ce37 100644
--- a/usr/src/uts/common/os/policy.c
+++ b/usr/src/uts/common/os/policy.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
#include <sys/types.h>
@@ -55,6 +55,7 @@
#include <sys/mntent.h>
#include <sys/contract_impl.h>
#include <sys/dld_ioc.h>
+#include <sys/brand.h>
/*
* There are two possible layers of privilege routines and two possible
@@ -1243,6 +1244,22 @@ secpolicy_vnode_owner(const cred_t *cr, uid_t owner)
void
secpolicy_setid_clear(vattr_t *vap, cred_t *cr)
{
+ proc_t *p = curproc;
+
+ /*
+ * Allow the brand to override this behaviour.
+ */
+ if (PROC_IS_BRANDED(p) && BROP(p)->b_setid_clear != NULL) {
+ /*
+ * This brand hook will return 0 if handling is complete, or
+ * some other value if the brand would like us to fall back to
+ * the usual behaviour.
+ */
+ if (BROP(p)->b_setid_clear(vap, cr) == 0) {
+ return;
+ }
+ }
+
if ((vap->va_mode & (S_ISUID | S_ISGID)) != 0 &&
secpolicy_vnode_setid_retain(cr,
(vap->va_mode & S_ISUID) != 0 &&
@@ -2078,6 +2095,13 @@ secpolicy_meminfo(const cred_t *cr)
}
int
+secpolicy_fs_import(const cred_t *cr)
+{
+ return (PRIV_POLICY(cr, PRIV_SYS_FS_IMPORT, B_FALSE, EPERM, NULL));
+}
+
+
+int
secpolicy_pfexec_register(const cred_t *cr)
{
return (PRIV_POLICY(cr, PRIV_SYS_ADMIN, B_TRUE, EPERM, NULL));
@@ -2581,3 +2605,11 @@ secpolicy_ppp_config(const cred_t *cr)
return (secpolicy_net_config(cr, B_FALSE));
return (PRIV_POLICY(cr, PRIV_SYS_PPP_CONFIG, B_FALSE, EPERM, NULL));
}
+
+int
+secpolicy_hyprlofs_control(const cred_t *cr)
+{
+ if (PRIV_POLICY(cr, PRIV_HYPRLOFS_CONTROL, B_FALSE, EPERM, NULL))
+ return (EPERM);
+ return (0);
+}
diff --git a/usr/src/uts/common/os/priv_defs b/usr/src/uts/common/os/priv_defs
index a3cdaccc2a..cc1c5e03a6 100644
--- a/usr/src/uts/common/os/priv_defs
+++ b/usr/src/uts/common/os/priv_defs
@@ -177,6 +177,10 @@ privilege PRIV_GRAPHICS_MAP
Allows a process to perform privileged mappings through a
graphics device.
+privilege PRIV_HYPRLOFS_CONTROL
+
+ Allows a process to manage hyprlofs entries.
+
privilege PRIV_IPC_DAC_READ
Allows a process to read a System V IPC
@@ -372,6 +376,10 @@ privilege PRIV_SYS_DEVICES
Allows a process to open the real console device directly.
Allows a process to open devices that have been exclusively opened.
+privilege PRIV_SYS_FS_IMPORT
+
+ Allows a process to import a potentially untrusted file system.
+
privilege PRIV_SYS_IPC_CONFIG
Allows a process to increase the size of a System V IPC Message
diff --git a/usr/src/uts/common/os/project.c b/usr/src/uts/common/os/project.c
index 7bd3dd963f..d89f62bea7 100644
--- a/usr/src/uts/common/os/project.c
+++ b/usr/src/uts/common/os/project.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2016, Joyent, Inc.
*/
#include <sys/project.h>
@@ -659,6 +660,14 @@ static rctl_ops_t project_tasks_ops = {
*/
/*ARGSUSED*/
+static rctl_qty_t
+project_shmmax_usage(rctl_t *rctl, struct proc *p)
+{
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ return (p->p_task->tk_proj->kpj_data.kpd_shmmax);
+}
+
+/*ARGSUSED*/
static int
project_shmmax_test(struct rctl *rctl, struct proc *p, rctl_entity_p_t *e,
rctl_val_t *rval, rctl_qty_t inc, uint_t flags)
@@ -675,7 +684,7 @@ project_shmmax_test(struct rctl *rctl, struct proc *p, rctl_entity_p_t *e,
static rctl_ops_t project_shmmax_ops = {
rcop_no_action,
- rcop_no_usage,
+ project_shmmax_usage,
rcop_no_set,
project_shmmax_test
};
@@ -685,6 +694,14 @@ static rctl_ops_t project_shmmax_ops = {
*/
/*ARGSUSED*/
+static rctl_qty_t
+project_shmmni_usage(rctl_t *rctl, struct proc *p)
+{
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ return (p->p_task->tk_proj->kpj_data.kpd_ipc.ipcq_shmmni);
+}
+
+/*ARGSUSED*/
static int
project_shmmni_test(struct rctl *rctl, struct proc *p, rctl_entity_p_t *e,
rctl_val_t *rval, rctl_qty_t inc, uint_t flags)
@@ -701,7 +718,7 @@ project_shmmni_test(struct rctl *rctl, struct proc *p, rctl_entity_p_t *e,
static rctl_ops_t project_shmmni_ops = {
rcop_no_action,
- rcop_no_usage,
+ project_shmmni_usage,
rcop_no_set,
project_shmmni_test
};
@@ -711,6 +728,14 @@ static rctl_ops_t project_shmmni_ops = {
*/
/*ARGSUSED*/
+static rctl_qty_t
+project_semmni_usage(rctl_t *rctl, struct proc *p)
+{
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ return (p->p_task->tk_proj->kpj_data.kpd_ipc.ipcq_semmni);
+}
+
+/*ARGSUSED*/
static int
project_semmni_test(struct rctl *rctl, struct proc *p, rctl_entity_p_t *e,
rctl_val_t *rval, rctl_qty_t inc, uint_t flags)
@@ -727,7 +752,7 @@ project_semmni_test(struct rctl *rctl, struct proc *p, rctl_entity_p_t *e,
static rctl_ops_t project_semmni_ops = {
rcop_no_action,
- rcop_no_usage,
+ project_semmni_usage,
rcop_no_set,
project_semmni_test
};
@@ -737,6 +762,14 @@ static rctl_ops_t project_semmni_ops = {
*/
/*ARGSUSED*/
+static rctl_qty_t
+project_msgmni_usage(rctl_t *rctl, struct proc *p)
+{
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ return (p->p_task->tk_proj->kpj_data.kpd_ipc.ipcq_msgmni);
+}
+
+/*ARGSUSED*/
static int
project_msgmni_test(struct rctl *rctl, struct proc *p, rctl_entity_p_t *e,
rctl_val_t *rval, rctl_qty_t inc, uint_t flags)
@@ -753,7 +786,7 @@ project_msgmni_test(struct rctl *rctl, struct proc *p, rctl_entity_p_t *e,
static rctl_ops_t project_msgmni_ops = {
rcop_no_action,
- rcop_no_usage,
+ project_msgmni_usage,
rcop_no_set,
project_msgmni_test
};
diff --git a/usr/src/uts/common/os/sched.c b/usr/src/uts/common/os/sched.c
index c1d6569f11..15e77d39f7 100644
--- a/usr/src/uts/common/os/sched.c
+++ b/usr/src/uts/common/os/sched.c
@@ -27,6 +27,10 @@
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
+/*
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
+ */
+
#include <sys/param.h>
#include <sys/types.h>
#include <sys/sysmacros.h>
@@ -646,16 +650,17 @@ top:
klwp_t *lwp = ttolwp(tp);
/*
- * Swapout eligible lwps (specified by the scheduling
- * class) which don't have TS_DONT_SWAP set. Set the
- * "intent to swap" flag (TS_SWAPENQ) on threads
- * which have TS_DONT_SWAP set so that they can be
+ * Swapout eligible lwps (specified by the scheduling class)
+ * which don't have TS_DONT_SWAP set. Set the "intent to swap"
+ * flag (TS_SWAPENQ) on threads which have either TS_DONT_SWAP
+ * set or are currently on a split stack so that they can be
* swapped if and when they reach a safe point.
*/
thread_lock(tp);
thread_pri = CL_SWAPOUT(tp, swapflags);
if (thread_pri != -1) {
- if (tp->t_schedflag & TS_DONT_SWAP) {
+ if ((tp->t_schedflag & TS_DONT_SWAP) ||
+ (tp->t_flag & T_SPLITSTK)) {
tp->t_schedflag |= TS_SWAPENQ;
tp->t_trapret = 1;
aston(tp);
diff --git a/usr/src/uts/common/os/shm.c b/usr/src/uts/common/os/shm.c
index bacc595f78..5deae96d73 100644
--- a/usr/src/uts/common/os/shm.c
+++ b/usr/src/uts/common/os/shm.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -319,6 +320,7 @@ shmat(int shmid, caddr_t uaddr, int uflags, uintptr_t *rvp)
size_t share_size;
struct shm_data ssd;
uintptr_t align_hint;
+ long curprot;
/*
* Pick a share pagesize to use, if (!isspt(sp)).
@@ -453,6 +455,7 @@ shmat(int shmid, caddr_t uaddr, int uflags, uintptr_t *rvp)
}
}
+ curprot = sp->shm_opts & SHM_PROT_MASK;
if (!isspt(sp)) {
error = sptcreate(size, &segspt, sp->shm_amp, prot,
flags, share_szc);
@@ -462,8 +465,8 @@ shmat(int shmid, caddr_t uaddr, int uflags, uintptr_t *rvp)
}
sp->shm_sptinfo->sptas = segspt->s_as;
sp->shm_sptseg = segspt;
- sp->shm_sptprot = prot;
- } else if ((prot & sp->shm_sptprot) != sp->shm_sptprot) {
+ sp->shm_opts = (sp->shm_opts & ~SHM_PROT_MASK) | prot;
+ } else if ((prot & curprot) != curprot) {
/*
* Ensure we're attaching to an ISM segment with
* fewer or equal permissions than what we're
@@ -748,6 +751,23 @@ shmctl(int shmid, int cmd, void *arg)
}
break;
+ /* Stage segment for removal, but don't remove until last detach */
+ case SHM_RMID:
+ if ((error = secpolicy_ipc_owner(cr, (kipc_perm_t *)sp)) != 0)
+ break;
+
+ /*
+ * If attached, just mark it as a pending remove, otherwise
+ * we must perform the normal ipc_rmid now.
+ */
+ if ((sp->shm_perm.ipc_ref - 1) > 0) {
+ sp->shm_opts |= SHM_RM_PENDING;
+ } else {
+ mutex_exit(lock);
+ return (ipc_rmid(shm_svc, shmid, cr));
+ }
+ break;
+
default:
error = EINVAL;
break;
@@ -778,6 +798,23 @@ shm_detach(proc_t *pp, segacct_t *sap)
sp->shm_ismattch--;
sp->shm_dtime = gethrestime_sec();
sp->shm_lpid = pp->p_pid;
+ if ((sp->shm_opts & SHM_RM_PENDING) != 0 &&
+ sp->shm_perm.ipc_ref == 2) {
+ /*
+ * If this is the last detach of the segment across the whole
+ * system then now we can perform the delayed IPC_RMID.
+ * The ipc_ref count has 1 for the original 'get' and one for
+ * each 'attach' (see 'stat' handling in shmctl).
+ */
+ sp->shm_opts &= ~SHM_RM_PENDING;
+ mutex_enter(&shm_svc->ipcs_lock);
+ ipc_rmsvc(shm_svc, (kipc_perm_t *)sp); /* Drops lock */
+ ASSERT(!MUTEX_HELD(&shm_svc->ipcs_lock));
+ ASSERT(((kipc_perm_t *)sp)->ipc_ref > 0);
+
+ /* Lock was dropped, need to retake it for following rele. */
+ (void) ipc_lock(shm_svc, sp->shm_perm.ipc_id);
+ }
ipc_rele(shm_svc, (kipc_perm_t *)sp); /* Drops lock */
kmem_free(sap, sizeof (segacct_t));
diff --git a/usr/src/uts/common/os/sig.c b/usr/src/uts/common/os/sig.c
index 453b1f22d4..5ef12f3ae4 100644
--- a/usr/src/uts/common/os/sig.c
+++ b/usr/src/uts/common/os/sig.c
@@ -22,7 +22,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -60,6 +60,7 @@
#include <sys/cyclic.h>
#include <sys/dtrace.h>
#include <sys/sdt.h>
+#include <sys/brand.h>
#include <sys/signalfd.h>
const k_sigset_t nullsmask = {0, 0, 0};
@@ -148,6 +149,21 @@ signal_is_blocked(kthread_t *t, int sig)
}
/*
+ * Return true if the signal can safely be ignored.
+ * That is, if the signal is included in the p_ignore mask and doing so is not
+ * forbidden by any process branding.
+ */
+static int
+sig_ignorable(proc_t *p, klwp_t *lwp, int sig)
+{
+ return (sigismember(&p->p_ignore, sig) && /* sig in ignore mask */
+ !(PROC_IS_BRANDED(p) && /* allowed by brand */
+ BROP(p)->b_sig_ignorable != NULL &&
+ BROP(p)->b_sig_ignorable(p, lwp, sig) == B_FALSE));
+
+}
+
+/*
* Return true if the signal can safely be discarded on generation.
* That is, if there is no need for the signal on the receiving end.
* The answer is true if the process is a zombie or
@@ -159,12 +175,13 @@ signal_is_blocked(kthread_t *t, int sig)
* the signal is not being accepted via sigwait()
*/
static int
-sig_discardable(proc_t *p, int sig)
+sig_discardable(proc_t *p, kthread_t *tp, int sig)
{
kthread_t *t = p->p_tlist;
+ klwp_t *lwp = (tp == NULL) ? NULL : tp->t_lwp;
return (t == NULL || /* if zombie or ... */
- (sigismember(&p->p_ignore, sig) && /* signal is ignored */
+ (sig_ignorable(p, lwp, sig) && /* signal is ignored */
t->t_forw == t && /* and single-threaded */
!tracing(p, sig) && /* and no /proc tracing */
!signal_is_blocked(t, sig) && /* and signal not blocked */
@@ -200,7 +217,7 @@ eat_signal(kthread_t *t, int sig)
!(ttoproc(t)->p_proc_flag & P_PR_LOCK)) {
ttoproc(t)->p_stopsig = 0;
t->t_dtrace_stop = 0;
- t->t_schedflag |= TS_XSTART | TS_PSTART;
+ t->t_schedflag |= TS_XSTART | TS_PSTART | TS_BSTART;
setrun_locked(t);
} else if (t != curthread && t->t_state == TS_ONPROC) {
aston(t); /* make it do issig promptly */
@@ -297,7 +314,7 @@ sigtoproc(proc_t *p, kthread_t *t, int sig)
}
}
- if (sig_discardable(p, sig)) {
+ if (sig_discardable(p, t, sig)) {
DTRACE_PROC3(signal__discard, kthread_t *, p->p_tlist,
proc_t *, p, int, sig);
return;
@@ -497,7 +514,7 @@ issig_justlooking(void)
if (sigismember(&set, sig) &&
(tracing(p, sig) ||
sigismember(&t->t_sigwait, sig) ||
- !sigismember(&p->p_ignore, sig))) {
+ !sig_ignorable(p, lwp, sig))) {
/*
* Don't promote a signal that will stop
* the process when lwp_nostop is set.
@@ -623,6 +640,21 @@ issig_forreal(void)
}
/*
+ * Allow the brand the chance to alter (or suppress) delivery
+ * of this signal.
+ */
+ if (PROC_IS_BRANDED(p) && BROP(p)->b_issig_stop != NULL) {
+ /*
+ * The brand hook will return 0 if it would like
+ * us to drive on, or -1 if we should restart
+ * the loop to check other conditions.
+ */
+ if (BROP(p)->b_issig_stop(p, lwp) != 0) {
+ continue;
+ }
+ }
+
+ /*
* Honor requested stop before dealing with the
* current signal; a debugger may change it.
* Do not want to go back to loop here since this is a special
@@ -656,7 +688,7 @@ issig_forreal(void)
lwp->lwp_cursig = 0;
lwp->lwp_extsig = 0;
if (sigismember(&t->t_sigwait, sig) ||
- (!sigismember(&p->p_ignore, sig) &&
+ (!sig_ignorable(p, lwp, sig) &&
!isjobstop(sig))) {
if (p->p_flag & (SEXITLWPS|SKILLED)) {
sig = SIGKILL;
@@ -708,7 +740,7 @@ issig_forreal(void)
toproc = 0;
if (tracing(p, sig) ||
sigismember(&t->t_sigwait, sig) ||
- !sigismember(&p->p_ignore, sig)) {
+ !sig_ignorable(p, lwp, sig)) {
if (sigismember(&t->t_extsig, sig))
ext = 1;
break;
@@ -722,7 +754,7 @@ issig_forreal(void)
toproc = 1;
if (tracing(p, sig) ||
sigismember(&t->t_sigwait, sig) ||
- !sigismember(&p->p_ignore, sig)) {
+ !sig_ignorable(p, lwp, sig)) {
if (sigismember(&p->p_extsig, sig))
ext = 1;
break;
@@ -954,6 +986,16 @@ stop(int why, int what)
}
break;
+ case PR_BRAND:
+ /*
+ * We have been stopped by the brand code for a brand-private
+ * reason. This is an asynchronous stop affecting only this
+ * LWP.
+ */
+ VERIFY(PROC_IS_BRANDED(p));
+ flags &= ~TS_BSTART;
+ break;
+
default: /* /proc stop */
flags &= ~TS_PSTART;
/*
@@ -1065,7 +1107,7 @@ stop(int why, int what)
}
}
- if (why != PR_JOBCONTROL && why != PR_CHECKPOINT) {
+ if (why != PR_JOBCONTROL && why != PR_CHECKPOINT && why != PR_BRAND) {
/*
* Do process-level notification when all lwps are
* either stopped on events of interest to /proc
@@ -1171,6 +1213,13 @@ stop(int why, int what)
if (why == PR_CHECKPOINT)
del_one_utstop();
+ /*
+ * Allow the brand to post notification of this stop condition.
+ */
+ if (PROC_IS_BRANDED(p) && BROP(p)->b_stop_notify != NULL) {
+ BROP(p)->b_stop_notify(p, lwp, why, what);
+ }
+
thread_lock(t);
ASSERT((t->t_schedflag & TS_ALLSTART) == 0);
t->t_schedflag |= flags;
@@ -1192,7 +1241,7 @@ stop(int why, int what)
(p->p_flag & (SEXITLWPS|SKILLED))) {
p->p_stopsig = 0;
thread_lock(t);
- t->t_schedflag |= TS_XSTART | TS_PSTART;
+ t->t_schedflag |= TS_XSTART | TS_PSTART | TS_BSTART;
setrun_locked(t);
thread_unlock_nopreempt(t);
} else if (why == PR_JOBCONTROL) {
@@ -1327,7 +1376,7 @@ psig(void)
* this signal from pending to current (we dropped p->p_lock).
* This can happen only in a multi-threaded process.
*/
- if (sigismember(&p->p_ignore, sig) ||
+ if (sig_ignorable(p, lwp, sig) ||
(func == SIG_DFL && sigismember(&stopdefault, sig))) {
lwp->lwp_cursig = 0;
lwp->lwp_extsig = 0;
@@ -1771,9 +1820,12 @@ post_sigcld(proc_t *cp, sigqueue_t *sqp)
/*
* This can only happen when the parent is init.
* (See call to sigcld(q, NULL) in exit().)
- * Use KM_NOSLEEP to avoid deadlock.
+ * Use KM_NOSLEEP to avoid deadlock. The child procs
+ * initpid can be 1 for zlogin.
*/
- ASSERT(pp == proc_init);
+ ASSERT(pp->p_pidp->pid_id ==
+ cp->p_zone->zone_proc_initpid ||
+ pp->p_pidp->pid_id == 1);
winfo(cp, &info, 0);
sigaddq(pp, NULL, &info, KM_NOSLEEP);
} else {
@@ -1804,6 +1856,15 @@ sigcld_repost()
sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
mutex_enter(&pidlock);
+ if (PROC_IS_BRANDED(pp) && BROP(pp)->b_sigcld_repost != NULL) {
+ /*
+ * Allow the brand to inject synthetic SIGCLD signals.
+ */
+ if (BROP(pp)->b_sigcld_repost(pp, sqp) == 0) {
+ mutex_exit(&pidlock);
+ return;
+ }
+ }
for (cp = pp->p_child; cp; cp = cp->p_sibling) {
if (cp->p_pidflag & CLDPEND) {
post_sigcld(cp, sqp);
@@ -2115,7 +2176,7 @@ sigaddqa(proc_t *p, kthread_t *t, sigqueue_t *sigqp)
ASSERT(MUTEX_HELD(&p->p_lock));
ASSERT(sig >= 1 && sig < NSIG);
- if (sig_discardable(p, sig))
+ if (sig_discardable(p, t, sig))
siginfofree(sigqp);
else
sigaddqins(p, t, sigqp);
@@ -2141,7 +2202,7 @@ sigaddq(proc_t *p, kthread_t *t, k_siginfo_t *infop, int km_flags)
* blocking the signal (it *could* change it's mind while
* the signal is pending) then don't bother creating one.
*/
- if (!sig_discardable(p, sig) &&
+ if (!sig_discardable(p, t, sig) &&
(sigismember(&p->p_siginfo, sig) ||
(curproc->p_ct_process != p->p_ct_process) ||
(sig == SIGCLD && SI_FROMKERNEL(infop))) &&
diff --git a/usr/src/uts/common/os/smb_subr.c b/usr/src/uts/common/os/smb_subr.c
index 6084676b17..6dc7230bed 100644
--- a/usr/src/uts/common/os/smb_subr.c
+++ b/usr/src/uts/common/os/smb_subr.c
@@ -25,7 +25,9 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2015 Joyent, Inc. All rights reserved.
+ */
#include <sys/smbios_impl.h>
#include <sys/cmn_err.h>
@@ -43,13 +45,13 @@ smb_strerror(int err)
void *
smb_alloc(size_t len)
{
- return (kmem_alloc(len, KM_SLEEP));
+ return (len > 0 ? kmem_alloc(len, KM_SLEEP) : NULL);
}
void *
smb_zalloc(size_t len)
{
- return (kmem_zalloc(len, KM_SLEEP));
+ return (len > 0 ? kmem_zalloc(len, KM_SLEEP) : NULL);
}
void
diff --git a/usr/src/uts/common/os/streamio.c b/usr/src/uts/common/os/streamio.c
index 62f94729cf..0a1406e0cd 100644
--- a/usr/src/uts/common/os/streamio.c
+++ b/usr/src/uts/common/os/streamio.c
@@ -24,7 +24,7 @@
/*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -77,6 +77,7 @@
#include <sys/policy.h>
#include <sys/dld.h>
#include <sys/zone.h>
+#include <sys/limits.h>
#include <c2/audit.h>
/*
@@ -985,12 +986,20 @@ strget(struct stdata *stp, queue_t *q, struct uio *uiop, int first,
* (registered in sd_wakeq).
*/
struiod_t uiod;
+ struct iovec buf[IOV_MAX_STACK];
+ int iovlen = 0;
if (first)
stp->sd_wakeq &= ~RSLEEP;
- (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov,
- sizeof (uiod.d_iov) / sizeof (*uiod.d_iov));
+ if (uiop->uio_iovcnt > IOV_MAX_STACK) {
+ iovlen = uiop->uio_iovcnt * sizeof (iovec_t);
+ uiod.d_iov = kmem_alloc(iovlen, KM_SLEEP);
+ } else {
+ uiod.d_iov = buf;
+ }
+
+ (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov, uiop->uio_iovcnt);
uiod.d_mp = 0;
/*
* Mark that a thread is in rwnext on the read side
@@ -1029,6 +1038,8 @@ strget(struct stdata *stp, queue_t *q, struct uio *uiop, int first,
if ((bp = uiod.d_mp) != NULL) {
*errorp = 0;
ASSERT(MUTEX_HELD(&stp->sd_lock));
+ if (iovlen != 0)
+ kmem_free(uiod.d_iov, iovlen);
return (bp);
}
error = 0;
@@ -1048,8 +1059,14 @@ strget(struct stdata *stp, queue_t *q, struct uio *uiop, int first,
} else {
*errorp = error;
ASSERT(MUTEX_HELD(&stp->sd_lock));
+ if (iovlen != 0)
+ kmem_free(uiod.d_iov, iovlen);
return (NULL);
}
+
+ if (iovlen != 0)
+ kmem_free(uiod.d_iov, iovlen);
+
/*
* Try a getq in case a rwnext() generated mblk
* has bubbled up via strrput().
@@ -2544,6 +2561,8 @@ strput(struct stdata *stp, mblk_t *mctl, struct uio *uiop, ssize_t *iosize,
int b_flag, int pri, int flags)
{
struiod_t uiod;
+ struct iovec buf[IOV_MAX_STACK];
+ int iovlen = 0;
mblk_t *mp;
queue_t *wqp = stp->sd_wrq;
int error = 0;
@@ -2635,13 +2654,21 @@ strput(struct stdata *stp, mblk_t *mctl, struct uio *uiop, ssize_t *iosize,
mp->b_flag |= b_flag;
mp->b_band = (uchar_t)pri;
- (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov,
- sizeof (uiod.d_iov) / sizeof (*uiod.d_iov));
+ if (uiop->uio_iovcnt > IOV_MAX_STACK) {
+ iovlen = uiop->uio_iovcnt * sizeof (iovec_t);
+ uiod.d_iov = (struct iovec *)kmem_alloc(iovlen, KM_SLEEP);
+ } else {
+ uiod.d_iov = buf;
+ }
+
+ (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov, uiop->uio_iovcnt);
uiod.d_uio.uio_offset = 0;
uiod.d_mp = mp;
error = rwnext(wqp, &uiod);
if (! uiod.d_mp) {
uioskip(uiop, *iosize);
+ if (iovlen != 0)
+ kmem_free(uiod.d_iov, iovlen);
return (error);
}
ASSERT(mp == uiod.d_mp);
@@ -2659,17 +2686,23 @@ strput(struct stdata *stp, mblk_t *mctl, struct uio *uiop, ssize_t *iosize,
error = 0;
} else {
freemsg(mp);
+ if (iovlen != 0)
+ kmem_free(uiod.d_iov, iovlen);
return (error);
}
/* Have to check canput before consuming data from the uio */
if (pri == 0) {
if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) {
freemsg(mp);
+ if (iovlen != 0)
+ kmem_free(uiod.d_iov, iovlen);
return (EWOULDBLOCK);
}
} else {
if (!bcanputnext(wqp, pri) && !(flags & MSG_IGNFLOW)) {
freemsg(mp);
+ if (iovlen != 0)
+ kmem_free(uiod.d_iov, iovlen);
return (EWOULDBLOCK);
}
}
@@ -2677,6 +2710,8 @@ strput(struct stdata *stp, mblk_t *mctl, struct uio *uiop, ssize_t *iosize,
/* Copyin data from the uio */
if ((error = struioget(wqp, mp, &uiod, 0)) != 0) {
freemsg(mp);
+ if (iovlen != 0)
+ kmem_free(uiod.d_iov, iovlen);
return (error);
}
uioskip(uiop, *iosize);
@@ -2693,6 +2728,8 @@ strput(struct stdata *stp, mblk_t *mctl, struct uio *uiop, ssize_t *iosize,
putnext(wqp, mp);
stream_runservice(stp);
}
+ if (iovlen != 0)
+ kmem_free(uiod.d_iov, iovlen);
return (0);
}
@@ -3178,6 +3215,7 @@ job_control_type(int cmd)
case JAGENT: /* Obsolete */
case JTRUN: /* Obsolete */
case JXTPROTO: /* Obsolete */
+ case TIOCSETLD:
return (JCSETP);
}
diff --git a/usr/src/uts/common/os/sysent.c b/usr/src/uts/common/os/sysent.c
index 0d1bb6a8a1..aa44ccf788 100644
--- a/usr/src/uts/common/os/sysent.c
+++ b/usr/src/uts/common/os/sysent.c
@@ -1093,18 +1093,20 @@ char **syscallnames;
systrace_sysent_t *systrace_sysent;
void (*systrace_probe)(dtrace_id_t, uintptr_t, uintptr_t,
- uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+ uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
/*ARGSUSED*/
void
systrace_stub(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
- uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5)
+ uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5,
+ uintptr_t arg6, uintptr_t arg7)
{}
/*ARGSUSED*/
int64_t
dtrace_systrace_syscall(uintptr_t arg0, uintptr_t arg1, uintptr_t arg2,
- uintptr_t arg3, uintptr_t arg4, uintptr_t arg5)
+ uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6,
+ uintptr_t arg7)
{
systrace_sysent_t *sy = &systrace_sysent[curthread->t_sysnum];
dtrace_id_t id;
@@ -1112,7 +1114,8 @@ dtrace_systrace_syscall(uintptr_t arg0, uintptr_t arg1, uintptr_t arg2,
proc_t *p;
if ((id = sy->stsy_entry) != DTRACE_IDNONE)
- (*systrace_probe)(id, arg0, arg1, arg2, arg3, arg4, arg5);
+ (*systrace_probe)(id, arg0, arg1, arg2, arg3, arg4, arg5,
+ arg6, arg7);
/*
* We want to explicitly allow DTrace consumers to stop a process
@@ -1126,14 +1129,15 @@ dtrace_systrace_syscall(uintptr_t arg0, uintptr_t arg1, uintptr_t arg2,
}
mutex_exit(&p->p_lock);
- rval = (*sy->stsy_underlying)(arg0, arg1, arg2, arg3, arg4, arg5);
+ rval = (*sy->stsy_underlying)(arg0, arg1, arg2, arg3, arg4, arg5,
+ arg6, arg7);
if (ttolwp(curthread)->lwp_errno != 0)
rval = -1;
if ((id = sy->stsy_return) != DTRACE_IDNONE)
(*systrace_probe)(id, (uintptr_t)rval, (uintptr_t)rval,
- (uintptr_t)((int64_t)rval >> 32), 0, 0, 0);
+ (uintptr_t)((int64_t)rval >> 32), 0, 0, 0, 0, 0);
return (rval);
}
@@ -1145,7 +1149,8 @@ systrace_sysent_t *systrace_sysent32;
/*ARGSUSED*/
int64_t
dtrace_systrace_syscall32(uintptr_t arg0, uintptr_t arg1, uintptr_t arg2,
- uintptr_t arg3, uintptr_t arg4, uintptr_t arg5)
+ uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6,
+ uintptr_t arg7)
{
systrace_sysent_t *sy = &systrace_sysent32[curthread->t_sysnum];
dtrace_id_t id;
@@ -1153,7 +1158,8 @@ dtrace_systrace_syscall32(uintptr_t arg0, uintptr_t arg1, uintptr_t arg2,
proc_t *p;
if ((id = sy->stsy_entry) != DTRACE_IDNONE)
- (*systrace_probe)(id, arg0, arg1, arg2, arg3, arg4, arg5);
+ (*systrace_probe)(id, arg0, arg1, arg2, arg3, arg4, arg5, arg6,
+ arg7);
/*
* We want to explicitly allow DTrace consumers to stop a process
@@ -1167,14 +1173,15 @@ dtrace_systrace_syscall32(uintptr_t arg0, uintptr_t arg1, uintptr_t arg2,
}
mutex_exit(&p->p_lock);
- rval = (*sy->stsy_underlying)(arg0, arg1, arg2, arg3, arg4, arg5);
+ rval = (*sy->stsy_underlying)(arg0, arg1, arg2, arg3, arg4, arg5, arg6,
+ arg7);
if (ttolwp(curthread)->lwp_errno != 0)
rval = -1;
if ((id = sy->stsy_return) != DTRACE_IDNONE)
(*systrace_probe)(id, (uintptr_t)rval, (uintptr_t)rval,
- (uintptr_t)((uint64_t)rval >> 32), 0, 0, 0);
+ (uintptr_t)((uint64_t)rval >> 32), 0, 0, 0, 0, 0);
return (rval);
}
@@ -1202,5 +1209,5 @@ dtrace_systrace_rtt(void)
}
if ((id = sy->stsy_return) != DTRACE_IDNONE)
- (*systrace_probe)(id, 0, 0, 0, 0, 0, 0);
+ (*systrace_probe)(id, 0, 0, 0, 0, 0, 0, 0, 0);
}
diff --git a/usr/src/uts/common/os/vmem.c b/usr/src/uts/common/os/vmem.c
index a554f8c3f3..0a6fe0ef96 100644
--- a/usr/src/uts/common/os/vmem.c
+++ b/usr/src/uts/common/os/vmem.c
@@ -1618,7 +1618,7 @@ vmem_destroy(vmem_t *vmp)
leaked = vmem_size(vmp, VMEM_ALLOC);
if (leaked != 0)
- cmn_err(CE_WARN, "vmem_destroy('%s'): leaked %lu %s",
+ cmn_err(CE_WARN, "!vmem_destroy('%s'): leaked %lu %s",
vmp->vm_name, leaked, (vmp->vm_cflags & VMC_IDENTIFIER) ?
"identifiers" : "bytes");
diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c
index c997f8fd8d..e86fe138e3 100644
--- a/usr/src/uts/common/os/zone.c
+++ b/usr/src/uts/common/os/zone.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2015, Joyent Inc. All rights reserved.
+ * Copyright 2016, Joyent Inc.
*/
/*
@@ -250,6 +250,8 @@
#include <sys/cpucaps.h>
#include <vm/seg.h>
#include <sys/mac.h>
+#include <sys/rt.h>
+#include <sys/fx.h>
/*
* This constant specifies the number of seconds that threads waiting for
@@ -370,8 +372,12 @@ static char *zone_ref_subsys_names[] = {
rctl_hndl_t rc_zone_cpu_shares;
rctl_hndl_t rc_zone_locked_mem;
rctl_hndl_t rc_zone_max_swap;
+rctl_hndl_t rc_zone_phys_mem;
rctl_hndl_t rc_zone_max_lofi;
rctl_hndl_t rc_zone_cpu_cap;
+rctl_hndl_t rc_zone_cpu_baseline;
+rctl_hndl_t rc_zone_cpu_burst_time;
+rctl_hndl_t rc_zone_zfs_io_pri;
rctl_hndl_t rc_zone_nlwps;
rctl_hndl_t rc_zone_nprocs;
rctl_hndl_t rc_zone_shmmax;
@@ -417,8 +423,9 @@ static boolean_t zsd_wait_for_inprogress(zone_t *, struct zsd_entry *,
* Version 5 alters the zone_boot system call, and converts its old
* bootargs parameter to be set by the zone_setattr API instead.
* Version 6 adds the flag argument to zone_create.
+ * Version 7 adds the requested zoneid to zone_create.
*/
-static const int ZONE_SYSCALL_API_VERSION = 6;
+static const int ZONE_SYSCALL_API_VERSION = 7;
/*
* Certain filesystems (such as NFS and autofs) need to know which zone
@@ -1377,6 +1384,114 @@ static rctl_ops_t zone_cpu_cap_ops = {
/*ARGSUSED*/
static rctl_qty_t
+zone_cpu_base_get(rctl_t *rctl, struct proc *p)
+{
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ return (cpucaps_zone_get_base(p->p_zone));
+}
+
+/*
+ * The zone cpu base is used to set the baseline CPU for the zone
+ * so we can track when the zone is bursting.
+ */
+/*ARGSUSED*/
+static int
+zone_cpu_base_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
+ rctl_qty_t nv)
+{
+ zone_t *zone = e->rcep_p.zone;
+
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ ASSERT(e->rcep_t == RCENTITY_ZONE);
+
+ if (zone == NULL)
+ return (0);
+
+ return (cpucaps_zone_set_base(zone, nv));
+}
+
+static rctl_ops_t zone_cpu_base_ops = {
+ rcop_no_action,
+ zone_cpu_base_get,
+ zone_cpu_base_set,
+ rcop_no_test
+};
+
+/*ARGSUSED*/
+static rctl_qty_t
+zone_cpu_burst_time_get(rctl_t *rctl, struct proc *p)
+{
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ return (cpucaps_zone_get_burst_time(p->p_zone));
+}
+
+/*
+ * The zone cpu burst time is used to set the amount of time CPU(s) can be
+ * bursting for the zone.
+ */
+/*ARGSUSED*/
+static int
+zone_cpu_burst_time_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
+ rctl_qty_t nv)
+{
+ zone_t *zone = e->rcep_p.zone;
+
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ ASSERT(e->rcep_t == RCENTITY_ZONE);
+
+ if (zone == NULL)
+ return (0);
+
+ return (cpucaps_zone_set_burst_time(zone, nv));
+}
+
+static rctl_ops_t zone_cpu_burst_time_ops = {
+ rcop_no_action,
+ zone_cpu_burst_time_get,
+ zone_cpu_burst_time_set,
+ rcop_no_test
+};
+
+/*
+ * zone.zfs-io-pri resource control support (IO priority).
+ */
+/*ARGSUSED*/
+static rctl_qty_t
+zone_zfs_io_pri_get(rctl_t *rctl, struct proc *p)
+{
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ return (p->p_zone->zone_zfs_io_pri);
+}
+
+/*ARGSUSED*/
+static int
+zone_zfs_io_pri_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
+ rctl_qty_t nv)
+{
+ zone_t *zone = e->rcep_p.zone;
+
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ ASSERT(e->rcep_t == RCENTITY_ZONE);
+
+ if (zone == NULL)
+ return (0);
+
+ /*
+ * set priority to the new value.
+ */
+ zone->zone_zfs_io_pri = nv;
+ return (0);
+}
+
+static rctl_ops_t zone_zfs_io_pri_ops = {
+ rcop_no_action,
+ zone_zfs_io_pri_get,
+ zone_zfs_io_pri_set,
+ rcop_no_test
+};
+
+/*ARGSUSED*/
+static rctl_qty_t
zone_lwps_usage(rctl_t *r, proc_t *p)
{
rctl_qty_t nlwps;
@@ -1486,6 +1601,14 @@ static rctl_ops_t zone_procs_ops = {
};
/*ARGSUSED*/
+static rctl_qty_t
+zone_shmmax_usage(rctl_t *rctl, struct proc *p)
+{
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ return (p->p_zone->zone_shmmax);
+}
+
+/*ARGSUSED*/
static int
zone_shmmax_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rval,
rctl_qty_t incr, uint_t flags)
@@ -1501,12 +1624,20 @@ zone_shmmax_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rval,
static rctl_ops_t zone_shmmax_ops = {
rcop_no_action,
- rcop_no_usage,
+ zone_shmmax_usage,
rcop_no_set,
zone_shmmax_test
};
/*ARGSUSED*/
+static rctl_qty_t
+zone_shmmni_usage(rctl_t *rctl, struct proc *p)
+{
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ return (p->p_zone->zone_ipc.ipcq_shmmni);
+}
+
+/*ARGSUSED*/
static int
zone_shmmni_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rval,
rctl_qty_t incr, uint_t flags)
@@ -1522,12 +1653,20 @@ zone_shmmni_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rval,
static rctl_ops_t zone_shmmni_ops = {
rcop_no_action,
- rcop_no_usage,
+ zone_shmmni_usage,
rcop_no_set,
zone_shmmni_test
};
/*ARGSUSED*/
+static rctl_qty_t
+zone_semmni_usage(rctl_t *rctl, struct proc *p)
+{
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ return (p->p_zone->zone_ipc.ipcq_semmni);
+}
+
+/*ARGSUSED*/
static int
zone_semmni_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rval,
rctl_qty_t incr, uint_t flags)
@@ -1543,12 +1682,20 @@ zone_semmni_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rval,
static rctl_ops_t zone_semmni_ops = {
rcop_no_action,
- rcop_no_usage,
+ zone_semmni_usage,
rcop_no_set,
zone_semmni_test
};
/*ARGSUSED*/
+static rctl_qty_t
+zone_msgmni_usage(rctl_t *rctl, struct proc *p)
+{
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ return (p->p_zone->zone_ipc.ipcq_msgmni);
+}
+
+/*ARGSUSED*/
static int
zone_msgmni_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rval,
rctl_qty_t incr, uint_t flags)
@@ -1564,7 +1711,7 @@ zone_msgmni_test(rctl_t *r, proc_t *p, rctl_entity_p_t *e, rctl_val_t *rval,
static rctl_ops_t zone_msgmni_ops = {
rcop_no_action,
- rcop_no_usage,
+ zone_msgmni_usage,
rcop_no_set,
zone_msgmni_test
};
@@ -1671,6 +1818,39 @@ static rctl_ops_t zone_max_swap_ops = {
/*ARGSUSED*/
static rctl_qty_t
+zone_phys_mem_usage(rctl_t *rctl, struct proc *p)
+{
+ rctl_qty_t q;
+ zone_t *z = p->p_zone;
+
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ /* No additional lock because not enforced in the kernel */
+ q = z->zone_phys_mem;
+ return (q);
+}
+
+/*ARGSUSED*/
+static int
+zone_phys_mem_set(rctl_t *rctl, struct proc *p, rctl_entity_p_t *e,
+ rctl_qty_t nv)
+{
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ ASSERT(e->rcep_t == RCENTITY_ZONE);
+ if (e->rcep_p.zone == NULL)
+ return (0);
+ e->rcep_p.zone->zone_phys_mem_ctl = nv;
+ return (0);
+}
+
+static rctl_ops_t zone_phys_mem_ops = {
+ rcop_no_action,
+ zone_phys_mem_usage,
+ zone_phys_mem_set,
+ rcop_no_test
+};
+
+/*ARGSUSED*/
+static rctl_qty_t
zone_max_lofi_usage(rctl_t *rctl, struct proc *p)
{
rctl_qty_t q;
@@ -1764,6 +1944,20 @@ zone_lockedmem_kstat_update(kstat_t *ksp, int rw)
}
static int
+zone_physmem_kstat_update(kstat_t *ksp, int rw)
+{
+ zone_t *zone = ksp->ks_private;
+ zone_kstat_t *zk = ksp->ks_data;
+
+ if (rw == KSTAT_WRITE)
+ return (EACCES);
+
+ zk->zk_usage.value.ui64 = zone->zone_phys_mem;
+ zk->zk_value.value.ui64 = zone->zone_phys_mem_ctl;
+ return (0);
+}
+
+static int
zone_nprocs_kstat_update(kstat_t *ksp, int rw)
{
zone_t *zone = ksp->ks_private;
@@ -1792,7 +1986,7 @@ zone_swapresv_kstat_update(kstat_t *ksp, int rw)
}
static kstat_t *
-zone_kstat_create_common(zone_t *zone, char *name,
+zone_rctl_kstat_create_common(zone_t *zone, char *name,
int (*updatefunc) (kstat_t *, int))
{
kstat_t *ksp;
@@ -1817,6 +2011,160 @@ zone_kstat_create_common(zone_t *zone, char *name,
return (ksp);
}
+static int
+zone_vfs_kstat_update(kstat_t *ksp, int rw)
+{
+ zone_t *zone = ksp->ks_private;
+ zone_vfs_kstat_t *zvp = ksp->ks_data;
+ kstat_io_t *kiop = &zone->zone_vfs_rwstats;
+
+ if (rw == KSTAT_WRITE)
+ return (EACCES);
+
+ /*
+ * Extract the VFS statistics from the kstat_io_t structure used by
+ * kstat_runq_enter() and related functions. Since the slow ops
+ * counters are updated directly by the VFS layer, there's no need to
+ * copy those statistics here.
+ *
+ * Note that kstat_runq_enter() and the related functions use
+ * gethrtime_unscaled(), so scale the time here.
+ */
+ zvp->zv_nread.value.ui64 = kiop->nread;
+ zvp->zv_reads.value.ui64 = kiop->reads;
+ zvp->zv_rtime.value.ui64 = kiop->rtime;
+ zvp->zv_rcnt.value.ui64 = kiop->rcnt;
+ zvp->zv_rlentime.value.ui64 = kiop->rlentime;
+ zvp->zv_nwritten.value.ui64 = kiop->nwritten;
+ zvp->zv_writes.value.ui64 = kiop->writes;
+ zvp->zv_wtime.value.ui64 = kiop->wtime;
+ zvp->zv_wcnt.value.ui64 = kiop->wcnt;
+ zvp->zv_wlentime.value.ui64 = kiop->wlentime;
+
+ scalehrtime((hrtime_t *)&zvp->zv_rtime.value.ui64);
+ scalehrtime((hrtime_t *)&zvp->zv_rlentime.value.ui64);
+ scalehrtime((hrtime_t *)&zvp->zv_wtime.value.ui64);
+ scalehrtime((hrtime_t *)&zvp->zv_wlentime.value.ui64);
+
+ return (0);
+}
+
+static kstat_t *
+zone_vfs_kstat_create(zone_t *zone)
+{
+ kstat_t *ksp;
+ zone_vfs_kstat_t *zvp;
+
+ if ((ksp = kstat_create_zone("zone_vfs", zone->zone_id,
+ zone->zone_name, "zone_vfs", KSTAT_TYPE_NAMED,
+ sizeof (zone_vfs_kstat_t) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL, zone->zone_id)) == NULL)
+ return (NULL);
+
+ if (zone->zone_id != GLOBAL_ZONEID)
+ kstat_zone_add(ksp, GLOBAL_ZONEID);
+
+ zvp = ksp->ks_data = kmem_zalloc(sizeof (zone_vfs_kstat_t), KM_SLEEP);
+ ksp->ks_data_size += strlen(zone->zone_name) + 1;
+ ksp->ks_lock = &zone->zone_vfs_lock;
+ zone->zone_vfs_stats = zvp;
+
+ /* The kstat "name" field is not large enough for a full zonename */
+ kstat_named_init(&zvp->zv_zonename, "zonename", KSTAT_DATA_STRING);
+ kstat_named_setstr(&zvp->zv_zonename, zone->zone_name);
+ kstat_named_init(&zvp->zv_nread, "nread", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_reads, "reads", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_rtime, "rtime", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_rcnt, "rcnt", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_rlentime, "rlentime", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_nwritten, "nwritten", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_writes, "writes", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_wtime, "wtime", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_wcnt, "wcnt", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_wlentime, "wlentime", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_10ms_ops, "10ms_ops", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_100ms_ops, "100ms_ops", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_1s_ops, "1s_ops", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_10s_ops, "10s_ops", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_delay_cnt, "delay_cnt", KSTAT_DATA_UINT64);
+ kstat_named_init(&zvp->zv_delay_time, "delay_time", KSTAT_DATA_UINT64);
+
+ ksp->ks_update = zone_vfs_kstat_update;
+ ksp->ks_private = zone;
+
+ kstat_install(ksp);
+ return (ksp);
+}
+
+static int
+zone_zfs_kstat_update(kstat_t *ksp, int rw)
+{
+ zone_t *zone = ksp->ks_private;
+ zone_zfs_kstat_t *zzp = ksp->ks_data;
+ kstat_io_t *kiop = &zone->zone_zfs_rwstats;
+
+ if (rw == KSTAT_WRITE)
+ return (EACCES);
+
+ /*
+ * Extract the ZFS statistics from the kstat_io_t structure used by
+ * kstat_runq_enter() and related functions. Since the I/O throttle
+ * counters are updated directly by the ZFS layer, there's no need to
+ * copy those statistics here.
+ *
+ * Note that kstat_runq_enter() and the related functions use
+ * gethrtime_unscaled(), so scale the time here.
+ */
+ zzp->zz_nread.value.ui64 = kiop->nread;
+ zzp->zz_reads.value.ui64 = kiop->reads;
+ zzp->zz_rtime.value.ui64 = kiop->rtime;
+ zzp->zz_rlentime.value.ui64 = kiop->rlentime;
+ zzp->zz_nwritten.value.ui64 = kiop->nwritten;
+ zzp->zz_writes.value.ui64 = kiop->writes;
+
+ scalehrtime((hrtime_t *)&zzp->zz_rtime.value.ui64);
+ scalehrtime((hrtime_t *)&zzp->zz_rlentime.value.ui64);
+
+ return (0);
+}
+
+static kstat_t *
+zone_zfs_kstat_create(zone_t *zone)
+{
+ kstat_t *ksp;
+ zone_zfs_kstat_t *zzp;
+
+ if ((ksp = kstat_create_zone("zone_zfs", zone->zone_id,
+ zone->zone_name, "zone_zfs", KSTAT_TYPE_NAMED,
+ sizeof (zone_zfs_kstat_t) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL, zone->zone_id)) == NULL)
+ return (NULL);
+
+ if (zone->zone_id != GLOBAL_ZONEID)
+ kstat_zone_add(ksp, GLOBAL_ZONEID);
+
+ zzp = ksp->ks_data = kmem_zalloc(sizeof (zone_zfs_kstat_t), KM_SLEEP);
+ ksp->ks_data_size += strlen(zone->zone_name) + 1;
+ ksp->ks_lock = &zone->zone_zfs_lock;
+ zone->zone_zfs_stats = zzp;
+
+ /* The kstat "name" field is not large enough for a full zonename */
+ kstat_named_init(&zzp->zz_zonename, "zonename", KSTAT_DATA_STRING);
+ kstat_named_setstr(&zzp->zz_zonename, zone->zone_name);
+ kstat_named_init(&zzp->zz_nread, "nread", KSTAT_DATA_UINT64);
+ kstat_named_init(&zzp->zz_reads, "reads", KSTAT_DATA_UINT64);
+ kstat_named_init(&zzp->zz_rtime, "rtime", KSTAT_DATA_UINT64);
+ kstat_named_init(&zzp->zz_rlentime, "rlentime", KSTAT_DATA_UINT64);
+ kstat_named_init(&zzp->zz_nwritten, "nwritten", KSTAT_DATA_UINT64);
+ kstat_named_init(&zzp->zz_writes, "writes", KSTAT_DATA_UINT64);
+ kstat_named_init(&zzp->zz_waittime, "waittime", KSTAT_DATA_UINT64);
+
+ ksp->ks_update = zone_zfs_kstat_update;
+ ksp->ks_private = zone;
+
+ kstat_install(ksp);
+ return (ksp);
+}
static int
zone_mcap_kstat_update(kstat_t *ksp, int rw)
@@ -1827,11 +2175,19 @@ zone_mcap_kstat_update(kstat_t *ksp, int rw)
if (rw == KSTAT_WRITE)
return (EACCES);
+ zmp->zm_rss.value.ui64 = zone->zone_phys_mem;
+ zmp->zm_phys_cap.value.ui64 = zone->zone_phys_mem_ctl;
+ zmp->zm_swap.value.ui64 = zone->zone_max_swap;
+ zmp->zm_swap_cap.value.ui64 = zone->zone_max_swap_ctl;
+ zmp->zm_nover.value.ui64 = zone->zone_mcap_nover;
+ zmp->zm_pagedout.value.ui64 = zone->zone_mcap_pagedout;
zmp->zm_pgpgin.value.ui64 = zone->zone_pgpgin;
zmp->zm_anonpgin.value.ui64 = zone->zone_anonpgin;
zmp->zm_execpgin.value.ui64 = zone->zone_execpgin;
zmp->zm_fspgin.value.ui64 = zone->zone_fspgin;
zmp->zm_anon_alloc_fail.value.ui64 = zone->zone_anon_alloc_fail;
+ zmp->zm_pf_throttle.value.ui64 = zone->zone_pf_throttle;
+ zmp->zm_pf_throttle_usec.value.ui64 = zone->zone_pf_throttle_usec;
return (0);
}
@@ -1859,12 +2215,22 @@ zone_mcap_kstat_create(zone_t *zone)
/* The kstat "name" field is not large enough for a full zonename */
kstat_named_init(&zmp->zm_zonename, "zonename", KSTAT_DATA_STRING);
kstat_named_setstr(&zmp->zm_zonename, zone->zone_name);
+ kstat_named_init(&zmp->zm_rss, "rss", KSTAT_DATA_UINT64);
+ kstat_named_init(&zmp->zm_phys_cap, "physcap", KSTAT_DATA_UINT64);
+ kstat_named_init(&zmp->zm_swap, "swap", KSTAT_DATA_UINT64);
+ kstat_named_init(&zmp->zm_swap_cap, "swapcap", KSTAT_DATA_UINT64);
+ kstat_named_init(&zmp->zm_nover, "nover", KSTAT_DATA_UINT64);
+ kstat_named_init(&zmp->zm_pagedout, "pagedout", KSTAT_DATA_UINT64);
kstat_named_init(&zmp->zm_pgpgin, "pgpgin", KSTAT_DATA_UINT64);
kstat_named_init(&zmp->zm_anonpgin, "anonpgin", KSTAT_DATA_UINT64);
kstat_named_init(&zmp->zm_execpgin, "execpgin", KSTAT_DATA_UINT64);
kstat_named_init(&zmp->zm_fspgin, "fspgin", KSTAT_DATA_UINT64);
kstat_named_init(&zmp->zm_anon_alloc_fail, "anon_alloc_fail",
KSTAT_DATA_UINT64);
+ kstat_named_init(&zmp->zm_pf_throttle, "n_pf_throttle",
+ KSTAT_DATA_UINT64);
+ kstat_named_init(&zmp->zm_pf_throttle_usec, "n_pf_throttle_usec",
+ KSTAT_DATA_UINT64);
ksp->ks_update = zone_mcap_kstat_update;
ksp->ks_private = zone;
@@ -1960,13 +2326,25 @@ zone_misc_kstat_create(zone_t *zone)
static void
zone_kstat_create(zone_t *zone)
{
- zone->zone_lockedmem_kstat = zone_kstat_create_common(zone,
+ zone->zone_lockedmem_kstat = zone_rctl_kstat_create_common(zone,
"lockedmem", zone_lockedmem_kstat_update);
- zone->zone_swapresv_kstat = zone_kstat_create_common(zone,
+ zone->zone_swapresv_kstat = zone_rctl_kstat_create_common(zone,
"swapresv", zone_swapresv_kstat_update);
- zone->zone_nprocs_kstat = zone_kstat_create_common(zone,
+ zone->zone_physmem_kstat = zone_rctl_kstat_create_common(zone,
+ "physicalmem", zone_physmem_kstat_update);
+ zone->zone_nprocs_kstat = zone_rctl_kstat_create_common(zone,
"nprocs", zone_nprocs_kstat_update);
+ if ((zone->zone_vfs_ksp = zone_vfs_kstat_create(zone)) == NULL) {
+ zone->zone_vfs_stats = kmem_zalloc(
+ sizeof (zone_vfs_kstat_t), KM_SLEEP);
+ }
+
+ if ((zone->zone_zfs_ksp = zone_zfs_kstat_create(zone)) == NULL) {
+ zone->zone_zfs_stats = kmem_zalloc(
+ sizeof (zone_zfs_kstat_t), KM_SLEEP);
+ }
+
if ((zone->zone_mcap_ksp = zone_mcap_kstat_create(zone)) == NULL) {
zone->zone_mcap_stats = kmem_zalloc(
sizeof (zone_mcap_kstat_t), KM_SLEEP);
@@ -1998,8 +2376,15 @@ zone_kstat_delete(zone_t *zone)
sizeof (zone_kstat_t));
zone_kstat_delete_common(&zone->zone_swapresv_kstat,
sizeof (zone_kstat_t));
+ zone_kstat_delete_common(&zone->zone_physmem_kstat,
+ sizeof (zone_kstat_t));
zone_kstat_delete_common(&zone->zone_nprocs_kstat,
sizeof (zone_kstat_t));
+
+ zone_kstat_delete_common(&zone->zone_vfs_ksp,
+ sizeof (zone_vfs_kstat_t));
+ zone_kstat_delete_common(&zone->zone_zfs_ksp,
+ sizeof (zone_zfs_kstat_t));
zone_kstat_delete_common(&zone->zone_mcap_ksp,
sizeof (zone_mcap_kstat_t));
zone_kstat_delete_common(&zone->zone_misc_ksp,
@@ -2037,6 +2422,8 @@ zone_zsd_init(void)
zone0.zone_locked_mem_ctl = UINT64_MAX;
ASSERT(zone0.zone_max_swap == 0);
zone0.zone_max_swap_ctl = UINT64_MAX;
+ zone0.zone_phys_mem = 0;
+ zone0.zone_phys_mem_ctl = UINT64_MAX;
zone0.zone_max_lofi = 0;
zone0.zone_max_lofi_ctl = UINT64_MAX;
zone0.zone_shmmax = 0;
@@ -2060,8 +2447,9 @@ zone_zsd_init(void)
zone0.zone_initname = initname;
zone0.zone_lockedmem_kstat = NULL;
zone0.zone_swapresv_kstat = NULL;
+ zone0.zone_physmem_kstat = NULL;
zone0.zone_nprocs_kstat = NULL;
-
+ zone0.zone_zfs_io_pri = 1;
zone0.zone_stime = 0;
zone0.zone_utime = 0;
zone0.zone_wtime = 0;
@@ -2172,6 +2560,21 @@ zone_init(void)
RCTL_GLOBAL_INFINITE,
MAXCAP, MAXCAP, &zone_cpu_cap_ops);
+ rc_zone_cpu_baseline = rctl_register("zone.cpu-baseline",
+ RCENTITY_ZONE, RCTL_GLOBAL_SIGNAL_NEVER | RCTL_GLOBAL_DENY_NEVER |
+ RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT | RCTL_GLOBAL_SYSLOG_NEVER,
+ MAXCAP, MAXCAP, &zone_cpu_base_ops);
+
+ rc_zone_cpu_burst_time = rctl_register("zone.cpu-burst-time",
+ RCENTITY_ZONE, RCTL_GLOBAL_SIGNAL_NEVER | RCTL_GLOBAL_DENY_NEVER |
+ RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT | RCTL_GLOBAL_SYSLOG_NEVER,
+ INT_MAX, INT_MAX, &zone_cpu_burst_time_ops);
+
+ rc_zone_zfs_io_pri = rctl_register("zone.zfs-io-priority",
+ RCENTITY_ZONE, RCTL_GLOBAL_SIGNAL_NEVER | RCTL_GLOBAL_DENY_NEVER |
+ RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT | RCTL_GLOBAL_SYSLOG_NEVER,
+ 16384, 16384, &zone_zfs_io_pri_ops);
+
rc_zone_nlwps = rctl_register("zone.max-lwps", RCENTITY_ZONE,
RCTL_GLOBAL_NOACTION | RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT,
INT_MAX, INT_MAX, &zone_lwps_ops);
@@ -2213,6 +2616,20 @@ zone_init(void)
rde = rctl_dict_lookup("zone.cpu-shares");
(void) rctl_val_list_insert(&rde->rcd_default_value, dval);
+ /*
+ * Create a rctl_val with PRIVILEGED, NOACTION, value = 1. Then attach
+ * this at the head of the rctl_dict_entry for ``zone.zfs-io-priority'.
+ */
+ dval = kmem_cache_alloc(rctl_val_cache, KM_SLEEP);
+ bzero(dval, sizeof (rctl_val_t));
+ dval->rcv_value = 1;
+ dval->rcv_privilege = RCPRIV_PRIVILEGED;
+ dval->rcv_flagaction = RCTL_LOCAL_NOACTION;
+ dval->rcv_action_recip_pid = -1;
+
+ rde = rctl_dict_lookup("zone.zfs-io-priority");
+ (void) rctl_val_list_insert(&rde->rcd_default_value, dval);
+
rc_zone_locked_mem = rctl_register("zone.max-locked-memory",
RCENTITY_ZONE, RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_BYTES |
RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX,
@@ -2223,6 +2640,11 @@ zone_init(void)
RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX,
&zone_max_swap_ops);
+ rc_zone_phys_mem = rctl_register("zone.max-physical-memory",
+ RCENTITY_ZONE, RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_BYTES |
+ RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX,
+ &zone_phys_mem_ops);
+
rc_zone_max_lofi = rctl_register("zone.max-lofi",
RCENTITY_ZONE, RCTL_GLOBAL_NOBASIC | RCTL_GLOBAL_COUNT |
RCTL_GLOBAL_DENY_ALWAYS, UINT64_MAX, UINT64_MAX,
@@ -2244,6 +2666,8 @@ zone_init(void)
zone0.zone_ntasks = 1;
mutex_exit(&p0.p_lock);
zone0.zone_restart_init = B_TRUE;
+ zone0.zone_reboot_on_init_exit = B_FALSE;
+ zone0.zone_init_status = -1;
zone0.zone_brand = &native_brand;
rctl_prealloc_destroy(gp);
/*
@@ -2323,6 +2747,8 @@ zone_init(void)
static void
zone_free(zone_t *zone)
{
+ zone_dl_t *zdl;
+
ASSERT(zone != global_zone);
ASSERT(zone->zone_ntasks == 0);
ASSERT(zone->zone_nlwps == 0);
@@ -2351,6 +2777,19 @@ zone_free(zone_t *zone)
list_destroy(&zone->zone_ref_list);
zone_free_zsd(zone);
zone_free_datasets(zone);
+
+ /*
+ * While dlmgmtd should have removed all of these, it could have left
+ * something behind or crashed. In which case it's not safe for us to
+ * assume that the list is empty which list_destroy() will ASSERT. We
+ * clean up for our userland comrades which may have crashed, or worse,
+ * been disabled by SMF.
+ */
+ while ((zdl = list_remove_head(&zone->zone_dl_list)) != NULL) {
+ if (zdl->zdl_net != NULL)
+ nvlist_free(zdl->zdl_net);
+ kmem_free(zdl, sizeof (zone_dl_t));
+ }
list_destroy(&zone->zone_dl_list);
if (zone->zone_rootvp != NULL)
@@ -2395,12 +2834,18 @@ zone_free(zone_t *zone)
static void
zone_status_set(zone_t *zone, zone_status_t status)
{
+ timestruc_t now;
+ uint64_t t;
nvlist_t *nvl = NULL;
ASSERT(MUTEX_HELD(&zone_status_lock));
ASSERT(status > ZONE_MIN_STATE && status <= ZONE_MAX_STATE &&
status >= zone_status_get(zone));
+ /* Current time since Jan 1 1970 but consumers expect NS */
+ gethrestime(&now);
+ t = (now.tv_sec * NANOSEC) + now.tv_nsec;
+
if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) ||
nvlist_add_string(nvl, ZONE_CB_NAME, zone->zone_name) ||
nvlist_add_string(nvl, ZONE_CB_NEWSTATE,
@@ -2408,7 +2853,7 @@ zone_status_set(zone_t *zone, zone_status_t status)
nvlist_add_string(nvl, ZONE_CB_OLDSTATE,
zone_status_table[zone->zone_status]) ||
nvlist_add_int32(nvl, ZONE_CB_ZONEID, zone->zone_id) ||
- nvlist_add_uint64(nvl, ZONE_CB_TIMESTAMP, (uint64_t)gethrtime()) ||
+ nvlist_add_uint64(nvl, ZONE_CB_TIMESTAMP, t) ||
sysevent_evc_publish(zone_event_chan, ZONE_EVENT_STATUS_CLASS,
ZONE_EVENT_STATUS_SUBCLASS, "sun.com", "kernel", nvl, EVCH_SLEEP)) {
#ifdef DEBUG
@@ -2486,9 +2931,14 @@ zone_set_brand(zone_t *zone, const char *brand)
return (EINVAL);
}
- /* set up the brand specific data */
+ /*
+ * Set up the brand specific data.
+ * Note that it's possible that the hook has to drop the
+ * zone_status_lock and reaquire it before returning so we can't
+ * assume the lock has been held the entire time.
+ */
zone->zone_brand = bp;
- ZBROP(zone)->b_init_brand_data(zone);
+ ZBROP(zone)->b_init_brand_data(zone, &zone_status_lock);
mutex_exit(&zone_status_lock);
return (0);
@@ -2534,14 +2984,65 @@ zone_set_initname(zone_t *zone, const char *zone_initname)
return (0);
}
+/*
+ * The zone_set_mcap_nover and zone_set_mcap_pageout functions are used
+ * to provide the physical memory capping kstats. Since physical memory
+ * capping is currently implemented in userland, that code uses the setattr
+ * entry point to increment the kstats. We always simply increment nover
+ * every time that setattr is called and we always add in the input value
+ * to zone_mcap_pagedout every time that is called.
+ */
+/*ARGSUSED*/
+static int
+zone_set_mcap_nover(zone_t *zone, const uint64_t *zone_nover)
+{
+ zone->zone_mcap_nover++;
+
+ return (0);
+}
+
static int
-zone_set_phys_mcap(zone_t *zone, const uint64_t *zone_mcap)
+zone_set_mcap_pageout(zone_t *zone, const uint64_t *zone_pageout)
{
- uint64_t mcap;
- int err = 0;
+ uint64_t pageout;
+ int err;
- if ((err = copyin(zone_mcap, &mcap, sizeof (uint64_t))) == 0)
- zone->zone_phys_mcap = mcap;
+ if ((err = copyin(zone_pageout, &pageout, sizeof (uint64_t))) == 0)
+ zone->zone_mcap_pagedout += pageout;
+
+ return (err);
+}
+
+/*
+ * The zone_set_page_fault_delay function is used to set the number of usecs
+ * to throttle page faults. This is normally 0 but can be set to a non-0 value
+ * by the user-land memory capping code when the zone is over its physcial
+ * memory cap.
+ */
+static int
+zone_set_page_fault_delay(zone_t *zone, const uint32_t *pfdelay)
+{
+ uint32_t dusec;
+ int err;
+
+ if ((err = copyin(pfdelay, &dusec, sizeof (uint32_t))) == 0)
+ zone->zone_pg_flt_delay = dusec;
+
+ return (err);
+}
+
+/*
+ * The zone_set_rss function is used to set the zone's RSS when we do the
+ * fast, approximate calculation in user-land.
+ */
+static int
+zone_set_rss(zone_t *zone, const uint64_t *prss)
+{
+ uint64_t rss;
+ int err;
+
+ if ((err = copyin(prss, &rss, sizeof (uint64_t))) == 0)
+ zone->zone_phys_mem = rss;
return (err);
}
@@ -2953,6 +3454,12 @@ getzoneid(void)
return (curproc->p_zone->zone_id);
}
+zoneid_t
+getzonedid(void)
+{
+ return (curproc->p_zone->zone_did);
+}
+
/*
* Internal versions of zone_find_by_*(). These don't zone_hold() or
* check the validity of a zone's state.
@@ -3696,6 +4203,17 @@ zone_start_init(void)
*/
z->zone_proc_initpid = p->p_pid;
+ if (z->zone_setup_app_contract == B_TRUE) {
+ /*
+ * Normally a process cannot modify its own contract, but we're
+ * just starting the zone's init process and its contract is
+ * always initialized from the sys_process_tmpl template, so
+ * this is the simplest way to setup init's contract to kill
+ * the process if any other process in the contract exits.
+ */
+ p->p_ct_process->conp_ev_fatal |= CT_PR_EV_EXIT;
+ }
+
/*
* We maintain zone_boot_err so that we can return the cause of the
* failure back to the caller of the zone_boot syscall.
@@ -3724,9 +4242,54 @@ zone_start_init(void)
lwp_exit();
}
} else {
+ id_t cid = curthread->t_cid;
+
if (zone_status_get(z) == ZONE_IS_BOOTING)
zone_status_set(z, ZONE_IS_RUNNING);
mutex_exit(&zone_status_lock);
+
+ mutex_enter(&class_lock);
+ ASSERT(cid < loaded_classes);
+ if (strcmp(sclass[cid].cl_name, "FX") == 0 &&
+ z->zone_fixed_hipri) {
+ /*
+ * If the zone is using FX then by default all
+ * processes start at the lowest priority and stay
+ * there. We provide a mechanism for the zone to
+ * indicate that it should run at "high priority". In
+ * this case we setup init to run at the highest FX
+ * priority (which is one level higher than the
+ * non-fixed scheduling classes can use).
+ */
+ pcparms_t pcparms;
+
+ pcparms.pc_cid = cid;
+ ((fxkparms_t *)pcparms.pc_clparms)->fx_upri = FXMAXUPRI;
+ ((fxkparms_t *)pcparms.pc_clparms)->fx_uprilim =
+ FXMAXUPRI;
+ ((fxkparms_t *)pcparms.pc_clparms)->fx_cflags =
+ FX_DOUPRILIM | FX_DOUPRI;
+
+ mutex_enter(&pidlock);
+ mutex_enter(&curproc->p_lock);
+
+ (void) parmsset(&pcparms, curthread);
+
+ mutex_exit(&curproc->p_lock);
+ mutex_exit(&pidlock);
+ } else if (strcmp(sclass[cid].cl_name, "RT") == 0) {
+ /*
+ * zsched always starts the init lwp at priority
+ * minclsyspri - 1. This priority gets set in t_pri and
+ * is invalid for RT, but RT never uses t_pri. However
+ * t_pri is used by procfs, so we always see processes
+ * within an RT zone with an invalid priority value.
+ * We fix that up now.
+ */
+ curthread->t_pri = RTGPPRIO0;
+ }
+ mutex_exit(&class_lock);
+
/* cause the process to return to userland. */
lwp_rtt();
}
@@ -3768,6 +4331,7 @@ zsched(void *arg)
PTOU(pp)->u_argc = 0;
PTOU(pp)->u_argv = NULL;
PTOU(pp)->u_envp = NULL;
+ PTOU(pp)->u_commpagep = NULL;
closeall(P_FINFO(pp));
/*
@@ -4210,8 +4774,9 @@ parse_rctls(caddr_t ubuf, size_t buflen, nvlist_t **nvlp)
error = EINVAL;
name = nvpair_name(nvp);
- if (strncmp(nvpair_name(nvp), "zone.", sizeof ("zone.") - 1)
- != 0 || nvpair_type(nvp) != DATA_TYPE_NVLIST_ARRAY) {
+ if ((strncmp(name, "zone.", sizeof ("zone.") - 1) != 0 &&
+ strncmp(name, "project.", sizeof ("project.") - 1) != 0) ||
+ nvpair_type(nvp) != DATA_TYPE_NVLIST_ARRAY) {
goto out;
}
if ((hndl = rctl_hndl_lookup(name)) == -1) {
@@ -4329,7 +4894,7 @@ zone_create(const char *zone_name, const char *zone_root,
caddr_t rctlbuf, size_t rctlbufsz,
caddr_t zfsbuf, size_t zfsbufsz, int *extended_error,
int match, uint32_t doi, const bslabel_t *label,
- int flags)
+ int flags, zoneid_t zone_did)
{
struct zsched_arg zarg;
nvlist_t *rctls = NULL;
@@ -4352,6 +4917,7 @@ zone_create(const char *zone_name, const char *zone_root,
zone = kmem_zalloc(sizeof (zone_t), KM_SLEEP);
zoneid = zone->zone_id = id_alloc(zoneid_space);
+ zone->zone_did = zone_did;
zone->zone_status = ZONE_IS_UNINITIALIZED;
zone->zone_pool = pool_default;
zone->zone_pool_mod = gethrtime();
@@ -4359,6 +4925,8 @@ zone_create(const char *zone_name, const char *zone_root,
zone->zone_ncpus = 0;
zone->zone_ncpus_online = 0;
zone->zone_restart_init = B_TRUE;
+ zone->zone_reboot_on_init_exit = B_FALSE;
+ zone->zone_init_status = -1;
zone->zone_brand = &native_brand;
zone->zone_initname = NULL;
mutex_init(&zone->zone_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -4420,10 +4988,14 @@ zone_create(const char *zone_name, const char *zone_root,
zone->zone_locked_mem_ctl = UINT64_MAX;
zone->zone_max_swap = 0;
zone->zone_max_swap_ctl = UINT64_MAX;
+ zone->zone_phys_mem = 0;
+ zone->zone_phys_mem_ctl = UINT64_MAX;
zone->zone_max_lofi = 0;
zone->zone_max_lofi_ctl = UINT64_MAX;
- zone0.zone_lockedmem_kstat = NULL;
- zone0.zone_swapresv_kstat = NULL;
+ zone->zone_lockedmem_kstat = NULL;
+ zone->zone_swapresv_kstat = NULL;
+ zone->zone_physmem_kstat = NULL;
+ zone->zone_zfs_io_pri = 1;
/*
* Zsched initializes the rctls.
@@ -4578,8 +5150,8 @@ zone_create(const char *zone_name, const char *zone_root,
/*
* The process, task, and project rctls are probably wrong;
* we need an interface to get the default values of all rctls,
- * and initialize zsched appropriately. I'm not sure that that
- * makes much of a difference, though.
+ * and initialize zsched appropriately. However, we allow zoneadmd
+ * to pass down both zone and project rctls for the zone's init.
*/
error = newproc(zsched, (void *)&zarg, syscid, minclsyspri, NULL, 0);
if (error != 0) {
@@ -4718,6 +5290,7 @@ zone_boot(zoneid_t zoneid)
static int
zone_empty(zone_t *zone)
{
+ int cnt = 0;
int waitstatus;
/*
@@ -4728,7 +5301,16 @@ zone_empty(zone_t *zone)
ASSERT(MUTEX_NOT_HELD(&zonehash_lock));
while ((waitstatus = zone_status_timedwait_sig(zone,
ddi_get_lbolt() + hz, ZONE_IS_EMPTY)) == -1) {
- killall(zone->zone_id);
+ boolean_t force = B_FALSE;
+
+ /* Every 30 seconds, try harder */
+ if (cnt++ >= 30) {
+ cmn_err(CE_WARN, "attempt to force kill zone %d\n",
+ zone->zone_id);
+ force = B_TRUE;
+ cnt = 0;
+ }
+ killall(zone->zone_id, force);
}
/*
* return EINTR if we were signaled
@@ -5479,14 +6061,6 @@ zone_getattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
error = EFAULT;
}
break;
- case ZONE_ATTR_PHYS_MCAP:
- size = sizeof (zone->zone_phys_mcap);
- if (bufsize > size)
- bufsize = size;
- if (buf != NULL &&
- copyout(&zone->zone_phys_mcap, buf, bufsize) != 0)
- error = EFAULT;
- break;
case ZONE_ATTR_SCHED_CLASS:
mutex_enter(&class_lock);
@@ -5541,6 +6115,23 @@ zone_getattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
}
kmem_free(zbuf, bufsize);
break;
+ case ZONE_ATTR_DID:
+ size = sizeof (zoneid_t);
+ if (bufsize > size)
+ bufsize = size;
+
+ if (buf != NULL && copyout(&zone->zone_did, buf, bufsize) != 0)
+ error = EFAULT;
+ break;
+ case ZONE_ATTR_SCHED_FIXEDHI:
+ size = sizeof (boolean_t);
+ if (bufsize > size)
+ bufsize = size;
+
+ if (buf != NULL && copyout(&zone->zone_fixed_hipri, buf,
+ bufsize) != 0)
+ error = EFAULT;
+ break;
default:
if ((attr >= ZONE_ATTR_BRAND_ATTRS) && ZONE_IS_BRANDED(zone)) {
size = bufsize;
@@ -5572,10 +6163,11 @@ zone_setattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
return (set_errno(EPERM));
/*
- * Only the ZONE_ATTR_PHYS_MCAP attribute can be set on the
- * global zone.
+ * Only the ZONE_ATTR_PMCAP_NOVER and ZONE_ATTR_PMCAP_PAGEOUT
+ * attributes can be set on the global zone.
*/
- if (zoneid == GLOBAL_ZONEID && attr != ZONE_ATTR_PHYS_MCAP) {
+ if (zoneid == GLOBAL_ZONEID &&
+ attr != ZONE_ATTR_PMCAP_NOVER && attr != ZONE_ATTR_PMCAP_PAGEOUT) {
return (set_errno(EINVAL));
}
@@ -5592,7 +6184,9 @@ zone_setattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
* non-global zones.
*/
zone_status = zone_status_get(zone);
- if (attr != ZONE_ATTR_PHYS_MCAP && zone_status > ZONE_IS_READY) {
+ if (attr != ZONE_ATTR_PMCAP_NOVER && attr != ZONE_ATTR_PMCAP_PAGEOUT &&
+ attr != ZONE_ATTR_PG_FLT_DELAY && attr != ZONE_ATTR_RSS &&
+ zone_status > ZONE_IS_READY) {
err = EINVAL;
goto done;
}
@@ -5614,8 +6208,17 @@ zone_setattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
case ZONE_ATTR_FS_ALLOWED:
err = zone_set_fs_allowed(zone, (const char *)buf);
break;
- case ZONE_ATTR_PHYS_MCAP:
- err = zone_set_phys_mcap(zone, (const uint64_t *)buf);
+ case ZONE_ATTR_PMCAP_NOVER:
+ err = zone_set_mcap_nover(zone, (const uint64_t *)buf);
+ break;
+ case ZONE_ATTR_PMCAP_PAGEOUT:
+ err = zone_set_mcap_pageout(zone, (const uint64_t *)buf);
+ break;
+ case ZONE_ATTR_PG_FLT_DELAY:
+ err = zone_set_page_fault_delay(zone, (const uint32_t *)buf);
+ break;
+ case ZONE_ATTR_RSS:
+ err = zone_set_rss(zone, (const uint64_t *)buf);
break;
case ZONE_ATTR_SCHED_CLASS:
err = zone_set_sched_class(zone, (const char *)buf);
@@ -5644,6 +6247,22 @@ zone_setattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
err = zone_set_network(zoneid, zbuf);
kmem_free(zbuf, bufsize);
break;
+ case ZONE_ATTR_APP_SVC_CT:
+ if (bufsize != sizeof (boolean_t)) {
+ err = EINVAL;
+ } else {
+ zone->zone_setup_app_contract = (boolean_t)buf;
+ err = 0;
+ }
+ break;
+ case ZONE_ATTR_SCHED_FIXEDHI:
+ if (bufsize != sizeof (boolean_t)) {
+ err = EINVAL;
+ } else {
+ zone->zone_fixed_hipri = (boolean_t)buf;
+ err = 0;
+ }
+ break;
default:
if ((attr >= ZONE_ATTR_BRAND_ATTRS) && ZONE_IS_BRANDED(zone))
err = ZBROP(zone)->b_setattr(zone, attr, buf, bufsize);
@@ -6336,6 +6955,7 @@ zone(int cmd, void *arg1, void *arg2, void *arg3, void *arg4)
zs.doi = zs32.doi;
zs.label = (const bslabel_t *)(uintptr_t)zs32.label;
zs.flags = zs32.flags;
+ zs.zoneid = zs32.zoneid;
#else
panic("get_udatamodel() returned bogus result\n");
#endif
@@ -6346,7 +6966,7 @@ zone(int cmd, void *arg1, void *arg2, void *arg3, void *arg4)
(caddr_t)zs.rctlbuf, zs.rctlbufsz,
(caddr_t)zs.zfsbuf, zs.zfsbufsz,
zs.extended_error, zs.match, zs.doi,
- zs.label, zs.flags));
+ zs.label, zs.flags, zs.zoneid));
case ZONE_BOOT:
return (zone_boot((zoneid_t)(uintptr_t)arg1));
case ZONE_DESTROY:
@@ -6447,6 +7067,7 @@ zone_ki_call_zoneadmd(struct zarg *zargp)
bcopy(zone->zone_name, zone_name, zone_namelen);
zoneid = zone->zone_id;
uniqid = zone->zone_uniqid;
+ arg.status = zone->zone_init_status;
/*
* zoneadmd may be down, but at least we can empty out the zone.
* We can ignore the return value of zone_empty() since we're called
@@ -6624,7 +7245,7 @@ zone_kadmin(int cmd, int fcn, const char *mdep, cred_t *credp)
* zone_ki_call_zoneadmd() will do a more thorough job of this
* later.
*/
- killall(zone->zone_id);
+ killall(zone->zone_id, B_FALSE);
/*
* Now, create the thread to contact zoneadmd and do the rest of the
* work. This thread can't be created in our zone otherwise
@@ -6687,16 +7308,15 @@ zone_shutdown_global(void)
}
/*
- * Returns true if the named dataset is visible in the current zone.
+ * Returns true if the named dataset is visible in the specified zone.
* The 'write' parameter is set to 1 if the dataset is also writable.
*/
int
-zone_dataset_visible(const char *dataset, int *write)
+zone_dataset_visible_inzone(zone_t *zone, const char *dataset, int *write)
{
static int zfstype = -1;
zone_dataset_t *zd;
size_t len;
- zone_t *zone = curproc->p_zone;
const char *name = NULL;
vfs_t *vfsp = NULL;
@@ -6764,7 +7384,8 @@ zone_dataset_visible(const char *dataset, int *write)
vfs_list_read_lock();
vfsp = zone->zone_vfslist;
do {
- ASSERT(vfsp);
+ if (vfsp == NULL)
+ break;
if (vfsp->vfs_fstype == zfstype) {
name = refstr_value(vfsp->vfs_resource);
@@ -6801,6 +7422,18 @@ zone_dataset_visible(const char *dataset, int *write)
}
/*
+ * Returns true if the named dataset is visible in the current zone.
+ * The 'write' parameter is set to 1 if the dataset is also writable.
+ */
+int
+zone_dataset_visible(const char *dataset, int *write)
+{
+ zone_t *zone = curproc->p_zone;
+
+ return (zone_dataset_visible_inzone(zone, dataset, write));
+}
+
+/*
* zone_find_by_any_path() -
*
* kernel-private routine similar to zone_find_by_path(), but which
diff --git a/usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas_hash.c b/usr/src/uts/common/refhash/refhash.c
index 8f96c2d9f1..e2de00597e 100644
--- a/usr/src/uts/common/io/scsi/adapters/mpt_sas/mptsas_hash.c
+++ b/usr/src/uts/common/refhash/refhash.c
@@ -10,16 +10,18 @@
*/
/*
- * Copyright 2014 Joyent, Inc. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
-#include <sys/scsi/adapters/mpt_sas/mptsas_hash.h>
+#include <sys/refhash.h>
#include <sys/sysmacros.h>
#include <sys/types.h>
#include <sys/kmem.h>
#include <sys/list.h>
#include <sys/ddi.h>
+#define RHL_F_DEAD 0x01
+
#ifdef lint
extern refhash_link_t *obj_to_link(refhash_t *, void *);
extern void *link_to_obj(refhash_t *, refhash_link_t *);
diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile
index d5dd20bff9..052a28a5e2 100644
--- a/usr/src/uts/common/sys/Makefile
+++ b/usr/src/uts/common/sys/Makefile
@@ -23,6 +23,7 @@
# Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright 2014, Joyent, Inc. All rights reserved.
# Copyright 2013 Garrett D'Amore <garrett@damore.org>
+# Copyright 2015, Joyent, Inc. All rights reserved.
# Copyright 2013 Saso Kiselkov. All rights reserved.
# Copyright 2015 Igor Kozhukhov <ikozhukhov@gmail.com>
# Copyright 2016 Nexenta Systems, Inc.
@@ -250,6 +251,7 @@ CHKHDRS= \
flock.h \
flock_impl.h \
fork.h \
+ frameio.h \
fss.h \
fsspriocntl.h \
fsid.h \
@@ -275,6 +277,7 @@ CHKHDRS= \
idmap.h \
ieeefp.h \
id_space.h \
+ inotify.h \
instance.h \
int_const.h \
int_fmtio.h \
@@ -343,6 +346,7 @@ CHKHDRS= \
lgrp.h \
lgrp_user.h \
libc_kernel.h \
+ limits.h \
link.h \
list.h \
list_impl.h \
@@ -424,6 +428,9 @@ CHKHDRS= \
ontrap.h \
open.h \
openpromio.h \
+ overlay.h \
+ overlay_common.h \
+ overlay_target.h \
panic.h \
param.h \
pathconf.h \
@@ -646,6 +653,8 @@ CHKHDRS= \
vmem.h \
vmem_impl.h \
vmsystm.h \
+ vnd.h \
+ vnd_errno.h \
vnic.h \
vnic_impl.h \
vnode.h \
@@ -657,12 +666,14 @@ CHKHDRS= \
vuid_queue.h \
vuid_state.h \
vuid_store.h \
+ vxlan.h \
wait.h \
waitq.h \
wanboot_impl.h \
watchpoint.h \
winlockio.h \
zcons.h \
+ zfd.h \
zone.h \
xti_inet.h \
xti_osi.h \
@@ -856,13 +867,14 @@ FSHDRS= \
autofs.h \
decomp.h \
dv_node.h \
- sdev_impl.h \
fifonode.h \
hsfs_isospec.h \
hsfs_node.h \
hsfs_rrip.h \
hsfs_spec.h \
hsfs_susp.h \
+ hyprlofs.h \
+ hyprlofs_info.h \
lofs_info.h \
lofs_node.h \
mntdata.h \
@@ -872,6 +884,8 @@ FSHDRS= \
pc_label.h \
pc_node.h \
pxfs_ki.h \
+ sdev_impl.h \
+ sdev_plugin.h \
snode.h \
swapnode.h \
tmp.h \
@@ -996,6 +1010,7 @@ SATAGENHDRS= \
SYSEVENTHDRS= \
ap_driver.h \
+ datalink.h \
dev.h \
domain.h \
dr.h \
diff --git a/usr/src/uts/common/sys/aggr_impl.h b/usr/src/uts/common/sys/aggr_impl.h
index 547c9cc241..a4c0409304 100644
--- a/usr/src/uts/common/sys/aggr_impl.h
+++ b/usr/src/uts/common/sys/aggr_impl.h
@@ -21,6 +21,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 OmniTI Computer Consulting, Inc All rights reserved.
*/
#ifndef _SYS_AGGR_IMPL_H
@@ -308,6 +309,8 @@ extern boolean_t aggr_port_notify_link(aggr_grp_t *, aggr_port_t *);
extern void aggr_port_init_callbacks(aggr_port_t *);
extern void aggr_recv_cb(void *, mac_resource_handle_t, mblk_t *, boolean_t);
+extern void aggr_recv_promisc_cb(void *, mac_resource_handle_t, mblk_t *,
+ boolean_t);
extern void aggr_tx_ring_update(void *, uintptr_t);
extern void aggr_tx_notify_thread(void *);
diff --git a/usr/src/uts/common/sys/auxv.h b/usr/src/uts/common/sys/auxv.h
index 3a2e705850..48b94e2951 100644
--- a/usr/src/uts/common/sys/auxv.h
+++ b/usr/src/uts/common/sys/auxv.h
@@ -29,7 +29,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
*/
#ifndef _SYS_AUXV_H
@@ -78,6 +78,9 @@ typedef struct {
#define AT_FLAGS 8 /* processor flags */
#define AT_ENTRY 9 /* a.out entry point */
+/* First introduced on Linux */
+#define AT_RANDOM 25 /* address of 16 random bytes */
+
/*
* These relate to the original PPC ABI document; Linux reused
* the values for other things (see below), so disambiguation of
@@ -90,19 +93,18 @@ typedef struct {
* These are the values from LSB 1.3, the first five are also described
* in the draft amd64 ABI.
*
- * At the time of writing, Solaris doesn't place any of these values into
- * the aux vector, except AT_CLKTCK which is placed on the aux vector for
- * lx branded processes; also, we do similar things via AT_SUN_ values.
+ * At the time of writing, illumos doesn't place any of these values into the
+ * aux vector, except where noted. We do similar things via AT_SUN_ values.
*
* AT_NOTELF 10 program is not ELF?
- * AT_UID 11 real user id
- * AT_EUID 12 effective user id
- * AT_GID 13 real group id
- * AT_EGID 14 effective group id
+ * AT_UID 11 real user id (provided in LX)
+ * AT_EUID 12 effective user id (provided in LX)
+ * AT_GID 13 real group id (provided in LX)
+ * AT_EGID 14 effective group id (provided in LX)
*
* AT_PLATFORM 15
* AT_HWCAP 16
- * AT_CLKTCK 17 c.f. _SC_CLK_TCK
+ * AT_CLKTCK 17 c.f. _SC_CLK_TCK (provided in LX)
* AT_FPUCW 18
*
* AT_DCACHEBSIZE 19 (moved from 10)
@@ -110,6 +112,16 @@ typedef struct {
* AT_UCACHEBSIZE 21 (moved from 12)
*
* AT_IGNOREPPC 22
+ *
+ * On Linux:
+ * AT_* values 18 through 22 are reserved
+ * AT_SECURE 23 secure mode boolean (provided in LX)
+ * AT_BASE_PLATFORM 24 string identifying real platform, may
+ * differ from AT_PLATFORM.
+ * AT_HWCAP2 26 extension of AT_HWCAP
+ * AT_EXECFN 31 filename of program
+ * AT_SYSINFO 32
+ * AT_SYSINFO_EHDR 33 The vDSO location
*/
/*
@@ -186,6 +198,13 @@ extern uint_t getisax(uint32_t *, uint_t);
#define AT_SUN_BRAND_AUX1 2020
#define AT_SUN_BRAND_AUX2 2021
#define AT_SUN_BRAND_AUX3 2022
+#define AT_SUN_BRAND_AUX4 2025
+#define AT_SUN_BRAND_NROOT 2024
+
+/*
+ * Aux vector for comm page
+ */
+#define AT_SUN_COMMPAGE 2026
/*
* Note that 2023 is reserved for the AT_SUN_HWCAP2 word defined above.
diff --git a/usr/src/uts/common/sys/auxv_386.h b/usr/src/uts/common/sys/auxv_386.h
index ec4c8b0f19..a3256a464f 100644
--- a/usr/src/uts/common/sys/auxv_386.h
+++ b/usr/src/uts/common/sys/auxv_386.h
@@ -89,10 +89,12 @@ extern "C" {
#define AV_386_2_BMI2 0x00008 /* BMI2 insns */
#define AV_386_2_FMA 0x00010 /* FMA insns */
#define AV_386_2_AVX2 0x00020 /* AVX2 insns */
+#define AV_386_2_ADX 0x00040 /* ADX insns */
+#define AV_386_2_RDSEED 0x00080 /* RDSEED insn */
#define FMT_AV_386_2 \
"\020" \
- "\06avx2\05fma\04bmi2\03bmi1\02rdrand\01f16c"
+ "\10rdseed\07adx\06avx2\05fma\04bmi2\03bmi1\02rdrand\01f16c"
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/sys/brand.h b/usr/src/uts/common/sys/brand.h
index badc3faff8..d88d5683a7 100644
--- a/usr/src/uts/common/sys/brand.h
+++ b/usr/src/uts/common/sys/brand.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2016, Joyent, Inc.
*/
#ifndef _SYS_BRAND_H
@@ -102,29 +103,101 @@ struct brand_mach_ops;
struct intpdata;
struct execa;
+/*
+ * Common structure to define hooks for brand operation.
+ *
+ * Required Fields:
+ * b_init_brand_data - Setup zone brand data during zone_setbrand
+ * b_free_brand_data - Free zone brand data during zone_destroy
+ * b_brandsys - Syscall handler for brandsys
+ * b_setbrand - Initialize process brand data
+ * b_getattr - Get brand-custom zone attribute
+ * b_setattr - Set brand-custom zone attribute
+ * b_copy_procdata - Copy process brand data during fork
+ * b_proc_exit - Perform process brand exit processing
+ * b_exec - Reset branded process state on exec
+ * b_lwp_setrval - Set return code for forked child
+ * b_initlwp - Initialize lwp brand data (cannot drop p->p_lock)
+ * b_forklwp - Copy lwp brand data during fork
+ * b_freelwp - Free lwp brand data
+ * b_lwpexit - Perform lwp-specific brand exit processing
+ * b_elfexec - Load and execute ELF binary
+ * b_sigset_native_to_brand - Convert sigset native->brand
+ * b_sigset_brand_to_native - Convert sigset brand->native
+ * b_nsig - Maxiumum signal number
+ * b_sendsig - Update process state after sendsig
+ *
+ * Optional Fields:
+ * b_lwpdata_alloc - Speculatively allocate data for use in b_initlwp
+ * b_lwpdata_free - Free data from allocated by b_lwpdata_alloc if errors occur
+ * during lwp creation before b_initlwp could be called.
+ * b_initlwp_post - Complete lwp branding (can temporarily drop p->p_lock)
+ * b_exit_with_sig - Instead of sending SIGCLD, exit with custom behavior
+ * b_psig_to_proc - Custom additional behavior during psig
+ * b_wait_filter - Filter processes from being matched by waitid
+ * b_native_exec - Provide interpreter path prefix for executables
+ * b_ptrace_exectrap - Custom behavior for legacy ptrace traps
+ * b_map32limit - Specify alternate limit for MAP_32BIT mappings
+ * b_stop_notify - Hook process stop events
+ * b_waitid_helper - Generate synthetic results for waitid
+ * b_sigcld_repost - Post synthetic SIGCLD signals
+ * b_issig_stop - Alter/suppress signal delivery during issig
+ * b_sig_ignorable - Disallow discarding of signals
+ * b_savecontext - Alter context during savecontext
+ * b_restorecontext - Alter context during restorecontext
+ * b_sendsig_stack - Override stack used for signal delivery
+ * b_setid_clear - Override setid_clear behavior
+ * b_pagefault - Trap pagefault events
+ * b_intp_parse_arg - Controls interpreter argument handling (allow 1 or all)
+ */
struct brand_ops {
- void (*b_init_brand_data)(zone_t *);
+ void (*b_init_brand_data)(zone_t *, kmutex_t *);
void (*b_free_brand_data)(zone_t *);
int (*b_brandsys)(int, int64_t *, uintptr_t, uintptr_t, uintptr_t,
- uintptr_t, uintptr_t, uintptr_t);
+ uintptr_t, uintptr_t);
void (*b_setbrand)(struct proc *);
int (*b_getattr)(zone_t *, int, void *, size_t *);
int (*b_setattr)(zone_t *, int, void *, size_t);
void (*b_copy_procdata)(struct proc *, struct proc *);
- void (*b_proc_exit)(struct proc *, klwp_t *);
+ void (*b_proc_exit)(struct proc *);
void (*b_exec)();
void (*b_lwp_setrval)(klwp_t *, int, int);
- int (*b_initlwp)(klwp_t *);
+ void *(*b_lwpdata_alloc)(struct proc *);
+ void (*b_lwpdata_free)(void *);
+ void (*b_initlwp)(klwp_t *, void *);
+ void (*b_initlwp_post)(klwp_t *);
void (*b_forklwp)(klwp_t *, klwp_t *);
void (*b_freelwp)(klwp_t *);
void (*b_lwpexit)(klwp_t *);
int (*b_elfexec)(struct vnode *vp, struct execa *uap,
struct uarg *args, struct intpdata *idata, int level,
long *execsz, int setid, caddr_t exec_file,
- struct cred *cred, int brand_action);
+ struct cred *cred, int *brand_action);
void (*b_sigset_native_to_brand)(sigset_t *);
void (*b_sigset_brand_to_native)(sigset_t *);
+ void (*b_sigfd_translate)(k_siginfo_t *);
int b_nsig;
+ void (*b_exit_with_sig)(proc_t *, sigqueue_t *);
+ boolean_t (*b_wait_filter)(proc_t *, proc_t *);
+ boolean_t (*b_native_exec)(uint8_t, const char **);
+ uint32_t (*b_map32limit)(proc_t *);
+ void (*b_stop_notify)(proc_t *, klwp_t *, ushort_t, ushort_t);
+ int (*b_waitid_helper)(idtype_t, id_t, k_siginfo_t *, int,
+ boolean_t *, int *);
+ int (*b_sigcld_repost)(proc_t *, sigqueue_t *);
+ int (*b_issig_stop)(proc_t *, klwp_t *);
+ boolean_t (*b_sig_ignorable)(proc_t *, klwp_t *, int);
+ void (*b_savecontext)(ucontext_t *);
+#if defined(_SYSCALL32_IMPL)
+ void (*b_savecontext32)(ucontext32_t *);
+#endif
+ void (*b_restorecontext)(ucontext_t *);
+ caddr_t (*b_sendsig_stack)(int);
+ void (*b_sendsig)(int);
+ int (*b_setid_clear)(vattr_t *vap, cred_t *cr);
+ int (*b_pagefault)(proc_t *, klwp_t *, caddr_t, enum fault_type,
+ enum seg_rw);
+ boolean_t b_intp_parse_arg;
};
/*
@@ -135,6 +208,7 @@ typedef struct brand {
char *b_name;
struct brand_ops *b_ops;
struct brand_mach_ops *b_machops;
+ size_t b_data_size;
} brand_t;
extern brand_t native_brand;
@@ -165,7 +239,7 @@ extern brand_t *brand_register_zone(struct brand_attr *);
extern brand_t *brand_find_name(char *);
extern void brand_unregister_zone(brand_t *);
extern int brand_zone_count(brand_t *);
-extern void brand_setbrand(proc_t *);
+extern int brand_setbrand(proc_t *, boolean_t);
extern void brand_clearbrand(proc_t *, boolean_t);
/*
@@ -178,17 +252,16 @@ extern int brand_solaris_cmd(int, uintptr_t, uintptr_t, uintptr_t,
extern void brand_solaris_copy_procdata(proc_t *, proc_t *,
struct brand *);
extern int brand_solaris_elfexec(vnode_t *, execa_t *, uarg_t *,
- intpdata_t *, int, long *, int, caddr_t, cred_t *, int,
- struct brand *, char *, char *, char *, char *, char *);
+ intpdata_t *, int, long *, int, caddr_t, cred_t *, int *,
+ struct brand *, char *, char *, char *);
extern void brand_solaris_exec(struct brand *);
extern int brand_solaris_fini(char **, struct modlinkage *,
struct brand *);
extern void brand_solaris_forklwp(klwp_t *, klwp_t *, struct brand *);
extern void brand_solaris_freelwp(klwp_t *, struct brand *);
-extern int brand_solaris_initlwp(klwp_t *, struct brand *);
+extern void brand_solaris_initlwp(klwp_t *, struct brand *);
extern void brand_solaris_lwpexit(klwp_t *, struct brand *);
-extern void brand_solaris_proc_exit(struct proc *, klwp_t *,
- struct brand *);
+extern void brand_solaris_proc_exit(struct proc *, struct brand *);
extern void brand_solaris_setbrand(proc_t *, struct brand *);
#if defined(_SYSCALL32)
diff --git a/usr/src/uts/common/sys/buf.h b/usr/src/uts/common/sys/buf.h
index a9191aed7c..cb8a6012fc 100644
--- a/usr/src/uts/common/sys/buf.h
+++ b/usr/src/uts/common/sys/buf.h
@@ -21,6 +21,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -186,6 +187,7 @@ struct biostats {
#define B_STARTED 0x2000000 /* io:::start probe called for buf */
#define B_ABRWRITE 0x4000000 /* Application based recovery active */
#define B_PAGE_NOWAIT 0x8000000 /* Skip the page if it is locked */
+#define B_INVALCURONLY 0x10000000 /* invalidate only for curproc */
/*
* There is some confusion over the meaning of B_FREE and B_INVAL and what
@@ -198,6 +200,12 @@ struct biostats {
* between the sole use of these two flags. In both cases, IO will be done
* if the page is not yet committed to storage.
*
+ * The B_INVALCURONLY flag modifies the behavior of the B_INVAL flag and is
+ * intended to be used in conjunction with B_INVAL. B_INVALCURONLY has no
+ * meaning on its own. When both B_INVALCURONLY and B_INVAL are set, then
+ * the mapping for the page is only invalidated for the current process.
+ * In this case, the page is not destroyed unless this was the final mapping.
+ *
* In order to discard pages without writing them back, (B_INVAL | B_TRUNC)
* should be used.
*
diff --git a/usr/src/uts/common/sys/contract/process.h b/usr/src/uts/common/sys/contract/process.h
index 21cf94dcf9..2c70d7c9f1 100644
--- a/usr/src/uts/common/sys/contract/process.h
+++ b/usr/src/uts/common/sys/contract/process.h
@@ -21,13 +21,12 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
*/
#ifndef _SYS_CONTRACT_PROCESS_H
#define _SYS_CONTRACT_PROCESS_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/contract.h>
#include <sys/time.h>
@@ -55,7 +54,8 @@ typedef struct cont_process cont_process_t;
#define CT_PR_NOORPHAN 0x2 /* kill when contract is abandoned */
#define CT_PR_PGRPONLY 0x4 /* only kill process group on fatal errors */
#define CT_PR_REGENT 0x8 /* automatically detach inherited contracts */
-#define CT_PR_ALLPARAM 0xf
+#define CT_PR_KEEP_EXEC 0x10 /* preserve template accross exec */
+#define CT_PR_ALLPARAM 0x1f
/*
* ctr_ev_* flags
diff --git a/usr/src/uts/common/sys/cpucaps.h b/usr/src/uts/common/sys/cpucaps.h
index 6063ff4380..6bc042108c 100644
--- a/usr/src/uts/common/sys/cpucaps.h
+++ b/usr/src/uts/common/sys/cpucaps.h
@@ -22,6 +22,7 @@
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011, 2012, Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_CPUCAPS_H
@@ -84,12 +85,16 @@ extern void cpucaps_zone_remove(zone_t *);
*/
extern int cpucaps_project_set(kproject_t *, rctl_qty_t);
extern int cpucaps_zone_set(zone_t *, rctl_qty_t);
+extern int cpucaps_zone_set_base(zone_t *, rctl_qty_t);
+extern int cpucaps_zone_set_burst_time(zone_t *, rctl_qty_t);
/*
* Get current CPU usage for a project/zone.
*/
extern rctl_qty_t cpucaps_project_get(kproject_t *);
extern rctl_qty_t cpucaps_zone_get(zone_t *);
+extern rctl_qty_t cpucaps_zone_get_base(zone_t *);
+extern rctl_qty_t cpucaps_zone_get_burst_time(zone_t *);
/*
* Scheduling class hooks into CPU caps framework.
diff --git a/usr/src/uts/common/sys/cpucaps_impl.h b/usr/src/uts/common/sys/cpucaps_impl.h
index 95afd21827..2cd4ed644d 100644
--- a/usr/src/uts/common/sys/cpucaps_impl.h
+++ b/usr/src/uts/common/sys/cpucaps_impl.h
@@ -22,6 +22,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011, 2012, Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_CPUCAPS_IMPL_H
@@ -66,8 +67,12 @@ typedef struct cpucap {
waitq_t cap_waitq; /* waitq for capped threads */
kstat_t *cap_kstat; /* cpucaps specific kstat */
int64_t cap_gen; /* zone cap specific */
+ hrtime_t cap_chk_value; /* effective CPU usage cap */
hrtime_t cap_value; /* scaled CPU usage cap */
hrtime_t cap_usage; /* current CPU usage */
+ hrtime_t cap_base; /* base CPU for burst */
+ u_longlong_t cap_burst_limit; /* max secs (in tics) for a burst */
+ u_longlong_t cap_bursting; /* # of ticks currently bursting */
disp_lock_t cap_usagelock; /* protects cap_usage above */
/*
* Per cap statistics.
@@ -75,6 +80,7 @@ typedef struct cpucap {
hrtime_t cap_maxusage; /* maximum cap usage */
u_longlong_t cap_below; /* # of ticks spend below the cap */
u_longlong_t cap_above; /* # of ticks spend above the cap */
+ u_longlong_t cap_above_base; /* # of ticks spent above the base */
} cpucap_t;
/*
diff --git a/usr/src/uts/common/sys/cred.h b/usr/src/uts/common/sys/cred.h
index 5056f9a511..914f132dc0 100644
--- a/usr/src/uts/common/sys/cred.h
+++ b/usr/src/uts/common/sys/cred.h
@@ -93,6 +93,7 @@ extern gid_t crgetgid(const cred_t *);
extern gid_t crgetrgid(const cred_t *);
extern gid_t crgetsgid(const cred_t *);
extern zoneid_t crgetzoneid(const cred_t *);
+extern zoneid_t crgetzonedid(const cred_t *);
extern projid_t crgetprojid(const cred_t *);
extern cred_t *crgetmapped(const cred_t *);
diff --git a/usr/src/uts/common/sys/ctf_api.h b/usr/src/uts/common/sys/ctf_api.h
index 04d73c3181..bc99f67d3f 100644
--- a/usr/src/uts/common/sys/ctf_api.h
+++ b/usr/src/uts/common/sys/ctf_api.h
@@ -24,7 +24,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc.
*/
/*
@@ -60,6 +60,65 @@ extern "C" {
typedef struct ctf_file ctf_file_t;
typedef long ctf_id_t;
+#define ECTF_BASE 1000 /* base value for libctf errnos */
+
+enum {
+ ECTF_FMT = ECTF_BASE, /* file is not in CTF or ELF format */
+ ECTF_ELFVERS, /* ELF version is more recent than libctf */
+ ECTF_CTFVERS, /* CTF version is more recent than libctf */
+ ECTF_ENDIAN, /* data is different endian-ness than lib */
+ ECTF_SYMTAB, /* symbol table uses invalid entry size */
+ ECTF_SYMBAD, /* symbol table data buffer invalid */
+ ECTF_STRBAD, /* string table data buffer invalid */
+ ECTF_CORRUPT, /* file data corruption detected */
+ ECTF_NOCTFDATA, /* ELF file does not contain CTF data */
+ ECTF_NOCTFBUF, /* buffer does not contain CTF data */
+ ECTF_NOSYMTAB, /* symbol table data is not available */
+ ECTF_NOPARENT, /* parent CTF container is not available */
+ ECTF_DMODEL, /* data model mismatch */
+ ECTF_MMAP, /* failed to mmap a data section */
+ ECTF_ZMISSING, /* decompression library not installed */
+ ECTF_ZINIT, /* failed to initialize decompression library */
+ ECTF_ZALLOC, /* failed to allocate decompression buffer */
+ ECTF_DECOMPRESS, /* failed to decompress CTF data */
+ ECTF_STRTAB, /* string table for this string is missing */
+ ECTF_BADNAME, /* string offset is corrupt w.r.t. strtab */
+ ECTF_BADID, /* invalid type ID number */
+ ECTF_NOTSOU, /* type is not a struct or union */
+ ECTF_NOTENUM, /* type is not an enum */
+ ECTF_NOTSUE, /* type is not a struct, union, or enum */
+ ECTF_NOTINTFP, /* type is not an integer or float */
+ ECTF_NOTARRAY, /* type is not an array */
+ ECTF_NOTREF, /* type does not reference another type */
+ ECTF_NAMELEN, /* buffer is too small to hold type name */
+ ECTF_NOTYPE, /* no type found corresponding to name */
+ ECTF_SYNTAX, /* syntax error in type name */
+ ECTF_NOTFUNC, /* symtab entry does not refer to a function */
+ ECTF_NOFUNCDAT, /* no func info available for function */
+ ECTF_NOTDATA, /* symtab entry does not refer to a data obj */
+ ECTF_NOTYPEDAT, /* no type info available for object */
+ ECTF_NOLABEL, /* no label found corresponding to name */
+ ECTF_NOLABELDATA, /* file does not contain any labels */
+ ECTF_NOTSUP, /* feature not supported */
+ ECTF_NOENUMNAM, /* enum element name not found */
+ ECTF_NOMEMBNAM, /* member name not found */
+ ECTF_RDONLY, /* CTF container is read-only */
+ ECTF_DTFULL, /* CTF type is full (no more members allowed) */
+ ECTF_FULL, /* CTF container is full */
+ ECTF_DUPMEMBER, /* duplicate member name definition */
+ ECTF_CONFLICT, /* conflicting type definition present */
+ ECTF_REFERENCED, /* type has outstanding references */
+ ECTF_NOTDYN, /* type is not a dynamic type */
+ ECTF_ELF, /* elf library failure */
+ ECTF_MCHILD, /* cannot merge child container */
+ ECTF_LABELEXISTS, /* label already exists */
+ ECTF_LCONFLICT, /* merged labels conflict */
+ ECTF_ZLIB, /* zlib library failure */
+ ECTF_CONVBKERR, /* CTF conversion backend error */
+ ECTF_CONVNOCSRC, /* No C source to convert from */
+ ECTF_NOCONVBKEND /* No applicable conversion backend */
+};
+
/*
* If the debugger needs to provide the CTF library with a set of raw buffers
* for use as the CTF data, symbol table, and string table, it can do so by
@@ -143,19 +202,24 @@ typedef struct ctf_lblinfo {
typedef int ctf_visit_f(const char *, ctf_id_t, ulong_t, int, void *);
typedef int ctf_member_f(const char *, ctf_id_t, ulong_t, void *);
typedef int ctf_enum_f(const char *, int, void *);
-typedef int ctf_type_f(ctf_id_t, void *);
+typedef int ctf_type_f(ctf_id_t, boolean_t, void *);
typedef int ctf_label_f(const char *, const ctf_lblinfo_t *, void *);
+typedef int ctf_function_f(const char *, ulong_t, ctf_funcinfo_t *, void *);
+typedef int ctf_object_f(const char *, ctf_id_t, ulong_t, void *);
+typedef int ctf_string_f(const char *, void *);
extern ctf_file_t *ctf_bufopen(const ctf_sect_t *, const ctf_sect_t *,
const ctf_sect_t *, int *);
extern ctf_file_t *ctf_fdopen(int, int *);
extern ctf_file_t *ctf_open(const char *, int *);
extern ctf_file_t *ctf_create(int *);
+extern ctf_file_t *ctf_fdcreate(int, int *);
extern ctf_file_t *ctf_dup(ctf_file_t *);
extern void ctf_close(ctf_file_t *);
extern ctf_file_t *ctf_parent_file(ctf_file_t *);
extern const char *ctf_parent_name(ctf_file_t *);
+extern const char *ctf_parent_label(ctf_file_t *);
extern int ctf_import(ctf_file_t *, ctf_file_t *);
extern int ctf_setmodel(ctf_file_t *, int);
@@ -165,15 +229,20 @@ extern void ctf_setspecific(ctf_file_t *, void *);
extern void *ctf_getspecific(ctf_file_t *);
extern int ctf_errno(ctf_file_t *);
+extern uint_t ctf_flags(ctf_file_t *);
extern const char *ctf_errmsg(int);
extern int ctf_version(int);
extern int ctf_func_info(ctf_file_t *, ulong_t, ctf_funcinfo_t *);
+extern int ctf_func_info_by_id(ctf_file_t *, ctf_id_t, ctf_funcinfo_t *);
extern int ctf_func_args(ctf_file_t *, ulong_t, uint_t, ctf_id_t *);
+extern int ctf_func_args_by_id(ctf_file_t *, ctf_id_t, uint_t, ctf_id_t *);
extern ctf_id_t ctf_lookup_by_name(ctf_file_t *, const char *);
extern ctf_id_t ctf_lookup_by_symbol(ctf_file_t *, ulong_t);
+extern char *ctf_symbol_name(ctf_file_t *, ulong_t, char *, size_t);
+
extern ctf_id_t ctf_type_resolve(ctf_file_t *, ctf_id_t);
extern ssize_t ctf_type_lname(ctf_file_t *, ctf_id_t, char *, size_t);
extern char *ctf_type_name(ctf_file_t *, ctf_id_t, char *, size_t);
@@ -182,6 +251,7 @@ extern char *ctf_type_qname(ctf_file_t *, ctf_id_t, char *, size_t,
extern ssize_t ctf_type_size(ctf_file_t *, ctf_id_t);
extern ssize_t ctf_type_align(ctf_file_t *, ctf_id_t);
extern int ctf_type_kind(ctf_file_t *, ctf_id_t);
+extern const char *ctf_kind_name(ctf_file_t *, int);
extern ctf_id_t ctf_type_reference(ctf_file_t *, ctf_id_t);
extern ctf_id_t ctf_type_pointer(ctf_file_t *, ctf_id_t);
extern int ctf_type_encoding(ctf_file_t *, ctf_id_t, ctf_encoding_t *);
@@ -201,37 +271,50 @@ extern int ctf_label_info(ctf_file_t *, const char *, ctf_lblinfo_t *);
extern int ctf_member_iter(ctf_file_t *, ctf_id_t, ctf_member_f *, void *);
extern int ctf_enum_iter(ctf_file_t *, ctf_id_t, ctf_enum_f *, void *);
-extern int ctf_type_iter(ctf_file_t *, ctf_type_f *, void *);
+extern int ctf_type_iter(ctf_file_t *, boolean_t, ctf_type_f *, void *);
extern int ctf_label_iter(ctf_file_t *, ctf_label_f *, void *);
+extern int ctf_function_iter(ctf_file_t *, ctf_function_f *, void *);
+extern int ctf_object_iter(ctf_file_t *, ctf_object_f *, void *);
+extern int ctf_string_iter(ctf_file_t *, ctf_string_f *, void *);
extern ctf_id_t ctf_add_array(ctf_file_t *, uint_t, const ctf_arinfo_t *);
-extern ctf_id_t ctf_add_const(ctf_file_t *, uint_t, ctf_id_t);
+extern ctf_id_t ctf_add_const(ctf_file_t *, uint_t, const char *, ctf_id_t);
extern ctf_id_t ctf_add_enum(ctf_file_t *, uint_t, const char *);
extern ctf_id_t ctf_add_float(ctf_file_t *, uint_t,
const char *, const ctf_encoding_t *);
extern ctf_id_t ctf_add_forward(ctf_file_t *, uint_t, const char *, uint_t);
-extern ctf_id_t ctf_add_function(ctf_file_t *, uint_t,
- const ctf_funcinfo_t *, const ctf_id_t *);
+extern ctf_id_t ctf_add_funcptr(ctf_file_t *, uint_t, const ctf_funcinfo_t *,
+ const ctf_id_t *);
extern ctf_id_t ctf_add_integer(ctf_file_t *, uint_t,
const char *, const ctf_encoding_t *);
-extern ctf_id_t ctf_add_pointer(ctf_file_t *, uint_t, ctf_id_t);
+extern ctf_id_t ctf_add_pointer(ctf_file_t *, uint_t, const char *, ctf_id_t);
extern ctf_id_t ctf_add_type(ctf_file_t *, ctf_file_t *, ctf_id_t);
extern ctf_id_t ctf_add_typedef(ctf_file_t *, uint_t, const char *, ctf_id_t);
-extern ctf_id_t ctf_add_restrict(ctf_file_t *, uint_t, ctf_id_t);
+extern ctf_id_t ctf_add_restrict(ctf_file_t *, uint_t, const char *, ctf_id_t);
extern ctf_id_t ctf_add_struct(ctf_file_t *, uint_t, const char *);
extern ctf_id_t ctf_add_union(ctf_file_t *, uint_t, const char *);
-extern ctf_id_t ctf_add_volatile(ctf_file_t *, uint_t, ctf_id_t);
+extern ctf_id_t ctf_add_volatile(ctf_file_t *, uint_t, const char *, ctf_id_t);
extern int ctf_add_enumerator(ctf_file_t *, ctf_id_t, const char *, int);
-extern int ctf_add_member(ctf_file_t *, ctf_id_t, const char *, ctf_id_t);
+extern int ctf_add_member(ctf_file_t *, ctf_id_t, const char *, ctf_id_t,
+ ulong_t);
+
+
+extern int ctf_add_function(ctf_file_t *, ulong_t, const ctf_funcinfo_t *,
+ const ctf_id_t *);
+extern int ctf_add_object(ctf_file_t *, ulong_t, ctf_id_t);
+extern int ctf_add_label(ctf_file_t *, const char *, ctf_id_t, uint_t);
extern int ctf_set_array(ctf_file_t *, ctf_id_t, const ctf_arinfo_t *);
+extern int ctf_set_root(ctf_file_t *, ctf_id_t, const boolean_t);
+extern int ctf_set_size(ctf_file_t *, ctf_id_t, const ulong_t);
extern int ctf_delete_type(ctf_file_t *, ctf_id_t);
extern int ctf_update(ctf_file_t *);
extern int ctf_discard(ctf_file_t *);
extern int ctf_write(ctf_file_t *, int);
+extern void ctf_dataptr(ctf_file_t *, const void **, size_t *);
#ifdef _KERNEL
diff --git a/usr/src/uts/common/sys/dktp/dadk.h b/usr/src/uts/common/sys/dktp/dadk.h
index f5c990e7c0..2178ad1f0d 100644
--- a/usr/src/uts/common/sys/dktp/dadk.h
+++ b/usr/src/uts/common/sys/dktp/dadk.h
@@ -65,6 +65,8 @@ struct dadk {
kstat_t *dad_errstats; /* error stats */
kmutex_t dad_cmd_mutex;
int dad_cmd_count;
+ uint32_t dad_err_cnt; /* number of recent errors */
+ hrtime_t dad_last_log; /* time of last error log */
};
#define DAD_SECSIZ dad_phyg.g_secsiz
diff --git a/usr/src/uts/common/sys/dld.h b/usr/src/uts/common/sys/dld.h
index fb2a0749d3..4cd93be56e 100644
--- a/usr/src/uts/common/sys/dld.h
+++ b/usr/src/uts/common/sys/dld.h
@@ -21,6 +21,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011 Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_DLD_H
@@ -191,6 +192,7 @@ typedef struct dld_ioc_rename {
datalink_id_t dir_linkid1;
datalink_id_t dir_linkid2;
char dir_link[MAXLINKNAMELEN];
+ boolean_t dir_zoneinit;
} dld_ioc_rename_t;
/*
@@ -203,6 +205,7 @@ typedef struct dld_ioc_rename {
typedef struct dld_ioc_zid {
zoneid_t diz_zid;
datalink_id_t diz_linkid;
+ boolean_t diz_transient;
} dld_ioc_zid_t;
/*
@@ -350,6 +353,7 @@ typedef struct dld_hwgrpinfo {
*/
typedef int (*dld_capab_func_t)(void *, uint_t, void *, uint_t);
+#define DI_DIRECT_RAW 0x1
/*
* Direct Tx/Rx capability.
*/
@@ -374,6 +378,9 @@ typedef struct dld_capab_direct_s {
/* flow control "can I put on a ring" callback */
uintptr_t di_tx_fctl_df; /* canput-like callback */
void *di_tx_fctl_dh;
+
+ /* flags that control our behavior */
+ uint_t di_flags;
} dld_capab_direct_t;
/*
diff --git a/usr/src/uts/common/sys/dld_impl.h b/usr/src/uts/common/sys/dld_impl.h
index a76a927e59..81708aad38 100644
--- a/usr/src/uts/common/sys/dld_impl.h
+++ b/usr/src/uts/common/sys/dld_impl.h
@@ -53,7 +53,8 @@ typedef enum {
typedef enum {
DLD_UNINITIALIZED,
DLD_PASSIVE,
- DLD_ACTIVE
+ DLD_ACTIVE,
+ DLD_EXCLUSIVE
} dld_passivestate_t;
/*
@@ -256,6 +257,8 @@ extern void dld_str_rx_unitdata(void *, mac_resource_handle_t,
extern void dld_str_notify_ind(dld_str_t *);
extern mac_tx_cookie_t str_mdata_fastpath_put(dld_str_t *, mblk_t *,
uintptr_t, uint16_t);
+extern mac_tx_cookie_t str_mdata_raw_fastpath_put(dld_str_t *, mblk_t *,
+ uintptr_t, uint16_t);
extern int dld_flow_ctl_callb(dld_str_t *, uint64_t,
int (*func)(), void *);
diff --git a/usr/src/uts/common/sys/dld_ioc.h b/usr/src/uts/common/sys/dld_ioc.h
index 2f519a8eda..093a4dc0c3 100644
--- a/usr/src/uts/common/sys/dld_ioc.h
+++ b/usr/src/uts/common/sys/dld_ioc.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
#ifndef _SYS_DLD_IOC_H
@@ -59,6 +60,7 @@ extern "C" {
#define IPTUN_IOC 0x454A
#define BRIDGE_IOC 0xB81D
#define IBPART_IOC 0x6171
+#define OVERLAY_IOC 0x2005
/* GLDv3 modules use these macros to generate unique ioctl commands */
#define DLDIOC(cmdid) DLD_IOC_CMD(DLD_IOC, (cmdid))
@@ -68,6 +70,7 @@ extern "C" {
#define IPTUNIOC(cmdid) DLD_IOC_CMD(IPTUN_IOC, (cmdid))
#define BRIDGEIOC(cmdid) DLD_IOC_CMD(BRIDGE_IOC, (cmdid))
#define IBPARTIOC(cmdid) DLD_IOC_CMD(IBPART_IOC, (cmdid))
+#define OVERLAYIOC(cmdid) DLD_IOC_CMD(OVERLAY_IOC, (cmdid))
#ifdef _KERNEL
diff --git a/usr/src/uts/common/sys/dlpi.h b/usr/src/uts/common/sys/dlpi.h
index 5bc2bd41c5..34f1c17236 100644
--- a/usr/src/uts/common/sys/dlpi.h
+++ b/usr/src/uts/common/sys/dlpi.h
@@ -107,6 +107,7 @@ typedef struct dl_ipnetinfo {
#define DL_PASSIVE_REQ 0x114 /* Allow access to aggregated link */
#define DL_INTR_MODE_REQ 0x115 /* Request Rx processing in INTR mode */
#define DL_NOTIFY_CONF 0x116 /* Notification from upstream */
+#define DL_EXCLUSIVE_REQ 0x117 /* Make bind active */
/*
* Primitives used for Connectionless Service
@@ -388,6 +389,8 @@ typedef struct dl_ipnetinfo {
#define DL_PROMISC_PHYS 0x01 /* promiscuous mode at phys level */
#define DL_PROMISC_SAP 0x02 /* promiscuous mode at sap level */
#define DL_PROMISC_MULTI 0x03 /* promiscuous mode for multicast */
+#define DL_PROMISC_RX_ONLY 0x04 /* above only enabled for rx */
+#define DL_PROMISC_FIXUPS 0x05 /* above will be fixed up */
/*
* DLPI notification codes for DL_NOTIFY_REQ primitives.
@@ -1107,6 +1110,13 @@ typedef struct {
} dl_intr_mode_req_t;
/*
+ * DL_EXCLUSIVE_REQ, M_PROTO type
+ */
+typedef struct {
+ t_uscalar_t dl_primitive;
+} dl_exclusive_req_t;
+
+/*
* CONNECTION-ORIENTED SERVICE PRIMITIVES
*/
@@ -1528,6 +1538,7 @@ union DL_primitives {
dl_control_ack_t control_ack;
dl_passive_req_t passive_req;
dl_intr_mode_req_t intr_mode_req;
+ dl_exclusive_req_t exclusive_req;
};
#define DL_INFO_REQ_SIZE sizeof (dl_info_req_t)
@@ -1596,6 +1607,7 @@ union DL_primitives {
#define DL_CONTROL_ACK_SIZE sizeof (dl_control_ack_t)
#define DL_PASSIVE_REQ_SIZE sizeof (dl_passive_req_t)
#define DL_INTR_MODE_REQ_SIZE sizeof (dl_intr_mode_req_t)
+#define DL_EXCLUSIVE_REQ_SIZE sizeof (dl_exclusive_req_t)
#ifdef _KERNEL
/*
diff --git a/usr/src/uts/common/sys/dls.h b/usr/src/uts/common/sys/dls.h
index 6bd2bbe35a..81f9e2abac 100644
--- a/usr/src/uts/common/sys/dls.h
+++ b/usr/src/uts/common/sys/dls.h
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
*/
#ifndef _SYS_DLS_H
@@ -85,6 +86,8 @@ typedef struct dls_link_s dls_link_t;
#define DLS_PROMISC_SAP 0x00000001
#define DLS_PROMISC_MULTI 0x00000002
#define DLS_PROMISC_PHYS 0x00000004
+#define DLS_PROMISC_RX_ONLY 0x00000008
+#define DLS_PROMISC_FIXUPS 0x00000010
extern int dls_open(dls_link_t *, dls_dl_handle_t, dld_str_t *);
extern void dls_close(dld_str_t *);
@@ -106,11 +109,13 @@ extern void str_notify(void *, mac_notify_type_t);
extern int dls_devnet_open(const char *,
dls_dl_handle_t *, dev_t *);
+extern int dls_devnet_open_in_zone(const char *,
+ dls_dl_handle_t *, dev_t *, zoneid_t);
extern void dls_devnet_close(dls_dl_handle_t);
extern boolean_t dls_devnet_rebuild();
extern int dls_devnet_rename(datalink_id_t, datalink_id_t,
- const char *);
+ const char *, boolean_t);
extern int dls_devnet_create(mac_handle_t, datalink_id_t,
zoneid_t);
extern int dls_devnet_destroy(mac_handle_t, datalink_id_t *,
@@ -122,12 +127,13 @@ extern int dls_devnet_hold_by_dev(dev_t, dls_dl_handle_t *);
extern void dls_devnet_rele(dls_dl_handle_t);
extern void dls_devnet_prop_task_wait(dls_dl_handle_t);
+extern const char *dls_devnet_link(dls_dl_handle_t);
extern const char *dls_devnet_mac(dls_dl_handle_t);
extern uint16_t dls_devnet_vid(dls_dl_handle_t);
extern datalink_id_t dls_devnet_linkid(dls_dl_handle_t);
extern int dls_devnet_dev2linkid(dev_t, datalink_id_t *);
extern int dls_devnet_phydev(datalink_id_t, dev_t *);
-extern int dls_devnet_setzid(dls_dl_handle_t, zoneid_t);
+extern int dls_devnet_setzid(dls_dl_handle_t, zoneid_t, boolean_t);
extern zoneid_t dls_devnet_getzid(dls_dl_handle_t);
extern zoneid_t dls_devnet_getownerzid(dls_dl_handle_t);
extern boolean_t dls_devnet_islinkvisible(datalink_id_t, zoneid_t);
@@ -141,6 +147,8 @@ extern int dls_mgmt_update(const char *, uint32_t, boolean_t,
extern int dls_mgmt_get_linkinfo(datalink_id_t, char *,
datalink_class_t *, uint32_t *, uint32_t *);
extern int dls_mgmt_get_linkid(const char *, datalink_id_t *);
+extern int dls_mgmt_get_linkid_in_zone(const char *,
+ datalink_id_t *, zoneid_t);
extern datalink_id_t dls_mgmt_get_next(datalink_id_t, datalink_class_t,
datalink_media_t, uint32_t);
extern int dls_devnet_macname2linkid(const char *,
diff --git a/usr/src/uts/common/sys/dls_impl.h b/usr/src/uts/common/sys/dls_impl.h
index 60f51c47b5..329f8dd08e 100644
--- a/usr/src/uts/common/sys/dls_impl.h
+++ b/usr/src/uts/common/sys/dls_impl.h
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
*/
#ifndef _SYS_DLS_IMPL_H
@@ -46,11 +47,12 @@ typedef struct dls_multicst_addr_s {
} dls_multicst_addr_t;
struct dls_link_s { /* Protected by */
- char dl_name[MAXNAMELEN]; /* SL */
+ char dl_name[MAXNAMELEN]; /* RO */
uint_t dl_ddi_instance; /* SL */
mac_handle_t dl_mh; /* SL */
mac_client_handle_t dl_mch; /* SL */
mac_unicast_handle_t dl_mah; /* SL */
+ mac_notify_handle_t dl_mnh; /* SL */
const mac_info_t *dl_mip; /* SL */
uint_t dl_ref; /* SL */
mod_hash_t *dl_str_hash; /* SL, modhash lock */
@@ -61,6 +63,7 @@ struct dls_link_s { /* Protected by */
uint_t dl_zone_ref;
link_tagmode_t dl_tagmode; /* atomic */
uint_t dl_nonip_cnt; /* SL */
+ uint_t dl_exclusive; /* SL */
};
typedef struct dls_head_s {
@@ -96,13 +99,16 @@ extern void dls_create_str_kstats(dld_str_t *);
extern int dls_stat_update(kstat_t *, dls_link_t *, int);
extern int dls_stat_create(const char *, int, const char *,
zoneid_t, int (*)(struct kstat *, int), void *,
- kstat_t **);
+ kstat_t **, zoneid_t);
+extern void dls_stat_delete(kstat_t *);
extern int dls_devnet_open_by_dev(dev_t, dls_link_t **,
dls_dl_handle_t *);
extern int dls_devnet_hold_link(datalink_id_t, dls_dl_handle_t *,
dls_link_t **);
extern void dls_devnet_rele_link(dls_dl_handle_t, dls_link_t *);
+extern int dls_devnet_hold_tmp_by_link(dls_link_t *,
+ dls_dl_handle_t *);
extern void dls_init(void);
extern int dls_fini(void);
@@ -126,6 +132,7 @@ extern void dls_mgmt_init(void);
extern void dls_mgmt_fini(void);
extern int dls_mgmt_get_phydev(datalink_id_t, dev_t *);
+extern int dls_exclusive_set(dld_str_t *, boolean_t);
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/sys/dls_mgmt.h b/usr/src/uts/common/sys/dls_mgmt.h
index b4032c24d6..214e225ac9 100644
--- a/usr/src/uts/common/sys/dls_mgmt.h
+++ b/usr/src/uts/common/sys/dls_mgmt.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#ifndef _DLS_MGMT_H
@@ -46,13 +47,15 @@ typedef enum {
DATALINK_CLASS_SIMNET = 0x20,
DATALINK_CLASS_BRIDGE = 0x40,
DATALINK_CLASS_IPTUN = 0x80,
- DATALINK_CLASS_PART = 0x100
+ DATALINK_CLASS_PART = 0x100,
+ DATALINK_CLASS_OVERLAY = 0x200
} datalink_class_t;
#define DATALINK_CLASS_ALL (DATALINK_CLASS_PHYS | \
DATALINK_CLASS_VLAN | DATALINK_CLASS_AGGR | DATALINK_CLASS_VNIC | \
DATALINK_CLASS_ETHERSTUB | DATALINK_CLASS_SIMNET | \
- DATALINK_CLASS_BRIDGE | DATALINK_CLASS_IPTUN | DATALINK_CLASS_PART)
+ DATALINK_CLASS_BRIDGE | DATALINK_CLASS_IPTUN | DATALINK_CLASS_PART | \
+ DATALINK_CLASS_OVERLAY)
/*
* A combination of flags and media.
@@ -165,6 +168,7 @@ typedef struct dlmgmt_door_getname {
typedef struct dlmgmt_door_getlinkid {
int ld_cmd;
char ld_link[MAXLINKNAMELEN];
+ zoneid_t ld_zoneid;
} dlmgmt_door_getlinkid_t;
typedef struct dlmgmt_door_getnext_s {
@@ -225,6 +229,7 @@ typedef struct dlmgmt_getattr_retval_s {
char lr_attrval[MAXLINKATTRVALLEN];
} dlmgmt_getattr_retval_t;
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/sys/elf.h b/usr/src/uts/common/sys/elf.h
index dd1eecc70d..b88d215336 100644
--- a/usr/src/uts/common/sys/elf.h
+++ b/usr/src/uts/common/sys/elf.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright 2012 DEY Storage Systems, Inc. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
*/
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
@@ -348,6 +348,11 @@ typedef struct {
#define PT_GNU_STACK 0x6474e551 /* Indicates stack executability */
#define PT_GNU_RELRO 0x6474e552 /* Read-only after relocation */
+/*
+ * Linux specific program headers not even used by Linux (!!)
+ */
+#define PT_PAX_FLAGS 0x65041580 /* PaX flags (see below) */
+
#define PT_LOSUNW 0x6ffffffa
#define PT_SUNWBSS 0x6ffffffa /* Sun Specific segment (unused) */
#define PT_SUNWSTACK 0x6ffffffb /* describes the stack segment */
@@ -363,6 +368,45 @@ typedef struct {
#define PF_W 0x2
#define PF_X 0x1
+/*
+ * PaX is a regrettable series of never-integrated Linux patches for a
+ * facility to provide additional protections on memory pages for purposes of
+ * increasing security, and for allowing binaries to demand (or refuse) those
+ * protections via the PT_PAX_FLAGS program header. (Portents of its
+ * rudderless existence, "PaX" is a term of indefinite origin written by an
+ * unknown group of people.) This facility is unfortunate in any number of
+ * ways, and was largely obviated by the broad adoption of non-executable
+ * stacks at any rate -- but it lives on in binaries that continue to mark
+ * themselves to explicitly refuse the (never-integrated, now-obviated)
+ * facility. One might cringe that PaX overloads the meaning of the p_flags
+ * to specify protections, but that is the least of its transgressions:
+ * instead of using one p_type constant to explicitly enable a series of
+ * protections and another to explicitly disable others, it insists on
+ * conflating both actions into PT_PAX_FLAGS. The resulting doubling of
+ * constant definitions (two constant definitions for every protection instead
+ * of merely one) assures that the values can't even fit in the eight
+ * PF_MASKOS bits putatively defined to provide a modicum of cleanliness for
+ * such filthy functionality. And were all of this not enough, there is one
+ * final nomenclature insult to be added to this semantic injury: the
+ * constants for the p_flags don't even embed "_PAX_" in their name -- despite
+ * the fact that this is their only purpose! We resist the temptation to
+ * right this final wrong here; we grit our teeth and provide exactly the
+ * Linux definitions -- or rather, what would have been the Linux definitions
+ * had this belching jalopy ever been permitted to crash itself into mainline.
+ */
+#define PF_PAGEEXEC 0x00000010 /* PaX: enable PAGEEXEC */
+#define PF_NOPAGEEXEC 0x00000020 /* PaX: disable PAGEEXEC */
+#define PF_SEGMEXEC 0x00000040 /* PaX: enable SEGMEXEC */
+#define PF_NOSEGMEXEC 0x00000080 /* PaX: disable SEGMEXEC */
+#define PF_MPROTECT 0x00000100 /* PaX: enable MPROTECT */
+#define PF_NOMPROTECT 0x00000200 /* PaX: disable MPROTECT */
+#define PF_RANDEXEC 0x00000400 /* PaX: enable RANDEXEC */
+#define PF_NORANDEXEC 0x00000800 /* PaX: disable RANDEXEC */
+#define PF_EMUTRAMP 0x00001000 /* PaX: enable EMUTRAMP */
+#define PF_NOEMUTRAMP 0x00002000 /* PaX: disable EMUTRAMP */
+#define PF_RANDMMAP 0x00004000 /* PaX: enable RANDMMAP */
+#define PF_NORANDMMAP 0x00008000 /* PaX: disable RANDMMAP */
+
#define PF_MASKOS 0x0ff00000 /* OS specific values */
#define PF_MASKPROC 0xf0000000 /* processor specific values */
diff --git a/usr/src/uts/common/sys/exec.h b/usr/src/uts/common/sys/exec.h
index b5e2c58be5..b2db3f2987 100644
--- a/usr/src/uts/common/sys/exec.h
+++ b/usr/src/uts/common/sys/exec.h
@@ -26,6 +26,10 @@
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
+/*
+ * Copyright 2016, Joyent, Inc.
+ */
+
#ifndef _SYS_EXEC_H
#define _SYS_EXEC_H
@@ -102,10 +106,14 @@ typedef struct uarg {
vnode_t *ex_vp;
char *emulator;
char *brandname;
+ const char *brand_nroot;
char *auxp_auxflags; /* addr of auxflags auxv on the user stack */
char *auxp_brand; /* address of first brand auxv on user stack */
cred_t *pfcred;
boolean_t scrubenv;
+ uintptr_t maxstack;
+ boolean_t stk_prot_override;
+ uintptr_t commpage;
} uarg_t;
/*
@@ -175,7 +183,7 @@ struct execsw {
int (*exec_func)(struct vnode *vp, struct execa *uap,
struct uarg *args, struct intpdata *idata, int level,
long *execsz, int setid, caddr_t exec_file,
- struct cred *cred, int brand_action);
+ struct cred *cred, int *brand_action);
int (*exec_core)(struct vnode *vp, struct proc *p,
struct cred *cred, rlim64_t rlimit, int sig,
core_content_t content);
@@ -213,7 +221,7 @@ extern int exec_common(const char *fname, const char **argp,
const char **envp, int brand_action);
extern int gexec(vnode_t **vp, struct execa *uap, struct uarg *args,
struct intpdata *idata, int level, long *execsz, caddr_t exec_file,
- struct cred *cred, int brand_action);
+ struct cred *cred, int *brand_action);
extern struct execsw *allocate_execsw(char *name, char *magic,
size_t magic_size);
extern struct execsw *findexecsw(char *magic);
@@ -238,16 +246,22 @@ extern void exec_set_sp(size_t);
* when compiling the 32-bit compatability elf code in the elfexec module.
*/
extern int elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int,
- long *, int, caddr_t, cred_t *, int);
+ long *, int, caddr_t, cred_t *, int *);
extern int mapexec_brand(vnode_t *, uarg_t *, Ehdr *, Addr *,
- intptr_t *, caddr_t, int *, caddr_t *, caddr_t *, size_t *, uintptr_t *);
+ intptr_t *, caddr_t, char **, caddr_t *, caddr_t *, size_t *,
+ uintptr_t *, uintptr_t *);
+extern int elfreadhdr(vnode_t *, cred_t *, Ehdr *, int *, caddr_t *,
+ ssize_t *);
#endif /* !_ELF32_COMPAT */
#if defined(_LP64)
extern int elf32exec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int,
- long *, int, caddr_t, cred_t *, int);
+ long *, int, caddr_t, cred_t *, int *);
extern int mapexec32_brand(vnode_t *, uarg_t *, Elf32_Ehdr *, Elf32_Addr *,
- intptr_t *, caddr_t, int *, caddr_t *, caddr_t *, size_t *, uintptr_t *);
+ intptr_t *, caddr_t, char **, caddr_t *, caddr_t *, size_t *,
+ uintptr_t *, uintptr_t *);
+extern int elf32readhdr(vnode_t *, cred_t *, Elf32_Ehdr *, int *, caddr_t *,
+ ssize_t *);
#endif /* _LP64 */
/*
diff --git a/usr/src/uts/common/sys/frameio.h b/usr/src/uts/common/sys/frameio.h
new file mode 100644
index 0000000000..54e6dbeedf
--- /dev/null
+++ b/usr/src/uts/common/sys/frameio.h
@@ -0,0 +1,107 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_FRAMEIO_H
+#define _SYS_FRAMEIO_H
+
+/*
+ * Frame I/O definitions
+ */
+
+#include <sys/types.h>
+
+#ifdef _KERNEL
+/* Kernel only headers */
+#include <sys/stream.h>
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * An individual frame vector component. Collections of these are used to make
+ * ioctls.
+ */
+typedef struct framevec {
+ void *fv_buf; /* Buffer with data */
+ size_t fv_buflen; /* Size of the buffer */
+ size_t fv_actlen; /* Amount of buffer consumed, ignore on error */
+} framevec_t;
+
+/*
+ * The base unit used with frameio.
+ */
+typedef struct frameio {
+ uint_t fio_version; /* Should always be FRAMEIO_CURRENT_VERSION */
+ uint_t fio_nvpf; /* How many vectors make up one frame */
+ uint_t fio_nvecs; /* The total number of vectors */
+ framevec_t fio_vecs[]; /* C99 VLA */
+} frameio_t;
+
+
+#define FRAMEIO_VERSION_ONE 1
+#define FRAMEIO_CURRENT_VERSION FRAMEIO_VERSION_ONE
+
+#define FRAMEIO_NVECS_MAX 32
+
+/*
+ * Definitions for kernel modules to include as helpers. These are consolidation
+ * private.
+ */
+#ifdef _KERNEL
+
+/*
+ * 32-bit versions for 64-bit kernels
+ */
+typedef struct framevec32 {
+ caddr32_t fv_buf;
+ size32_t fv_buflen;
+ size32_t fv_actlen;
+} framevec32_t;
+
+typedef struct frameio32 {
+ uint_t fio_version;
+ uint_t fio_vecspframe;
+ uint_t fio_nvecs;
+ framevec32_t fio_vecs[];
+} frameio32_t;
+
+/*
+ * Describe the different ways that vectors should map to frames.
+ */
+typedef enum frameio_write_mblk_map {
+ MAP_BLK_FRAME
+} frameio_write_mblk_map_t;
+
+int frameio_init(void);
+void frameio_fini(void);
+frameio_t *frameio_alloc(int);
+void frameio_free(frameio_t *);
+int frameio_hdr_copyin(frameio_t *, int, const void *, uint_t);
+int frameio_mblk_chain_read(frameio_t *, mblk_t **, int *, int);
+int frameio_mblk_chain_write(frameio_t *, frameio_write_mblk_map_t, mblk_t *,
+ int *, int);
+int frameio_hdr_copyout(frameio_t *, int, void *, uint_t);
+size_t frameio_frame_length(frameio_t *, framevec_t *);
+void frameio_mark_consumed(frameio_t *, int);
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_FRAMEIO_H */
diff --git a/usr/src/uts/common/sys/fs/hyprlofs.h b/usr/src/uts/common/sys/fs/hyprlofs.h
new file mode 100644
index 0000000000..b8c4149df2
--- /dev/null
+++ b/usr/src/uts/common/sys/fs/hyprlofs.h
@@ -0,0 +1,91 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2012, Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_FS_HYPRLOFS_H
+#define _SYS_FS_HYPRLOFS_H
+
+#include <sys/param.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * hyprlofs ioctl numbers.
+ */
+#define HYPRLOFS_IOC ('H' << 8)
+
+#define HYPRLOFS_ADD_ENTRIES (HYPRLOFS_IOC | 1)
+#define HYPRLOFS_RM_ENTRIES (HYPRLOFS_IOC | 2)
+#define HYPRLOFS_RM_ALL (HYPRLOFS_IOC | 3)
+#define HYPRLOFS_GET_ENTRIES (HYPRLOFS_IOC | 4)
+
+typedef struct {
+ char *hle_path;
+ uint_t hle_plen;
+ char *hle_name;
+ uint_t hle_nlen;
+} hyprlofs_entry_t;
+
+typedef struct {
+ hyprlofs_entry_t *hle_entries;
+ uint_t hle_len;
+} hyprlofs_entries_t;
+
+typedef struct {
+ char hce_path[MAXPATHLEN];
+ char hce_name[MAXPATHLEN];
+} hyprlofs_curr_entry_t;
+
+typedef struct {
+ hyprlofs_curr_entry_t *hce_entries;
+ uint_t hce_cnt;
+} hyprlofs_curr_entries_t;
+
+#ifdef _KERNEL
+typedef struct {
+ caddr32_t hle_path;
+ uint_t hle_plen;
+ caddr32_t hle_name;
+ uint_t hle_nlen;
+} hyprlofs_entry32_t;
+
+typedef struct {
+ caddr32_t hle_entries;
+ uint_t hle_len;
+} hyprlofs_entries32_t;
+
+typedef struct {
+ caddr32_t hce_entries;
+ uint_t hce_cnt;
+} hyprlofs_curr_entries32_t;
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_FS_HYPRLOFS_H */
diff --git a/usr/src/uts/common/sys/fs/hyprlofs_info.h b/usr/src/uts/common/sys/fs/hyprlofs_info.h
new file mode 100644
index 0000000000..38389f77d9
--- /dev/null
+++ b/usr/src/uts/common/sys/fs/hyprlofs_info.h
@@ -0,0 +1,174 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2012, Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_FS_HYPRLOFS_INFO_H
+#define _SYS_FS_HYPRLOFS_INFO_H
+
+#include <sys/t_lock.h>
+#include <vm/seg.h>
+#include <vm/seg_vn.h>
+#include <sys/vfs_opreg.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * hlnode is the file system dependent node for hyprlofs.
+ * It is modeled on the tmpfs tmpnode.
+ *
+ * hln_rwlock protects access of the directory list at hln_dir
+ * as well as syncronizing read/writes to directory hlnodes.
+ * hln_tlock protects updates to hln_mode and hln_nlink.
+ * hln_tlock doesn't require any hlnode locks.
+ */
+typedef struct hlnode {
+ struct hlnode *hln_back; /* linked list of hlnodes */
+ struct hlnode *hln_forw; /* linked list of hlnodes */
+ union {
+ struct {
+ struct hldirent *un_dirlist; /* dirent list */
+ uint_t un_dirents; /* number of dirents */
+ } un_dirstruct;
+ vnode_t *un_realvp; /* real vnode */
+ } un_hlnode;
+ vnode_t *hln_vnode; /* vnode for this hlnode */
+ int hln_gen; /* pseudo gen num for hlfid */
+ int hln_looped; /* flag indicating loopback */
+ vattr_t hln_attr; /* attributes */
+ krwlock_t hln_rwlock; /* rw - serialize mods and */
+ /* directory updates */
+ kmutex_t hln_tlock; /* time, flag, and nlink lock */
+} hlnode_t;
+
+/*
+ * hyprlofs per-mount data structure.
+ * All fields are protected by hlm_contents.
+ */
+typedef struct {
+ vfs_t *hlm_vfsp; /* filesystem's vfs struct */
+ hlnode_t *hlm_rootnode; /* root hlnode */
+ char *hlm_mntpath; /* name of hyprlofs mount point */
+ dev_t hlm_dev; /* unique dev # of mounted `device' */
+ uint_t hlm_gen; /* pseudo generation number for files */
+ kmutex_t hlm_contents; /* lock for hlfsmount structure */
+} hlfsmount_t;
+
+/*
+ * hyprlofs directories are made up of a linked list of hldirent structures
+ * hanging off directory hlnodes. File names are not fixed length,
+ * but are null terminated.
+ */
+typedef struct hldirent {
+ hlnode_t *hld_hlnode; /* hlnode for this file */
+ struct hldirent *hld_next; /* next directory entry */
+ struct hldirent *hld_prev; /* prev directory entry */
+ uint_t hld_offset; /* "offset" of dir entry */
+ uint_t hld_hash; /* a hash of td_name */
+ struct hldirent *hld_link; /* linked via the hash table */
+ hlnode_t *hld_parent; /* parent, dir we are in */
+ char *hld_name; /* must be null terminated */
+ /* max length is MAXNAMELEN */
+} hldirent_t;
+
+/*
+ * hlfid overlays the fid structure (for VFS_VGET)
+ */
+typedef struct {
+ uint16_t hlfid_len;
+ ino32_t hlfid_ino;
+ int32_t hlfid_gen;
+} hlfid_t;
+
+/*
+ * File system independent to hyprlofs conversion macros
+ */
+#define VFSTOHLM(vfsp) ((hlfsmount_t *)(vfsp)->vfs_data)
+#define VTOHLM(vp) ((hlfsmount_t *)(vp)->v_vfsp->vfs_data)
+#define VTOHLN(vp) ((hlnode_t *)(vp)->v_data)
+#define HLNTOV(tp) ((tp)->hln_vnode)
+#define REALVP(vp) ((vnode_t *)VTOHLN(vp)->hln_realvp)
+#define hlnode_hold(tp) VN_HOLD(HLNTOV(tp))
+#define hlnode_rele(tp) VN_RELE(HLNTOV(tp))
+
+#define hln_dir un_hlnode.un_dirstruct.un_dirlist
+#define hln_dirents un_hlnode.un_dirstruct.un_dirents
+#define hln_realvp un_hlnode.un_realvp
+
+/*
+ * Attributes
+ */
+#define hln_mask hln_attr.va_mask
+#define hln_type hln_attr.va_type
+#define hln_mode hln_attr.va_mode
+#define hln_uid hln_attr.va_uid
+#define hln_gid hln_attr.va_gid
+#define hln_fsid hln_attr.va_fsid
+#define hln_nodeid hln_attr.va_nodeid
+#define hln_nlink hln_attr.va_nlink
+#define hln_size hln_attr.va_size
+#define hln_atime hln_attr.va_atime
+#define hln_mtime hln_attr.va_mtime
+#define hln_ctime hln_attr.va_ctime
+#define hln_rdev hln_attr.va_rdev
+#define hln_blksize hln_attr.va_blksize
+#define hln_nblocks hln_attr.va_nblocks
+#define hln_seq hln_attr.va_seq
+
+/*
+ * enums
+ */
+enum de_op { DE_CREATE, DE_MKDIR }; /* direnter ops */
+enum dr_op { DR_REMOVE, DR_RMDIR }; /* dirremove ops */
+
+/*
+ * hyprlofs_minfree is the amount (in pages) of anonymous memory that hyprlofs
+ * leaves free for the rest of the system. The default value for
+ * hyprlofs_minfree is btopr(HYPRLOFSMINFREE) but it can be patched to a
+ * different number of pages. Since hyprlofs doesn't actually use much
+ * memory, its unlikely this ever needs to be patched.
+ */
+#define HYPRLOFSMINFREE 8 * 1024 * 1024 /* 8 Megabytes */
+
+extern size_t hyprlofs_minfree; /* Anonymous memory in pages */
+
+extern void hyprlofs_node_init(hlfsmount_t *, hlnode_t *, vattr_t *,
+ cred_t *);
+extern int hyprlofs_dirlookup(hlnode_t *, char *, hlnode_t **, cred_t *);
+extern int hyprlofs_dirdelete(hlnode_t *, hlnode_t *, char *, enum dr_op,
+ cred_t *);
+extern void hyprlofs_dirinit(hlnode_t *, hlnode_t *);
+extern void hyprlofs_dirtrunc(hlnode_t *);
+extern int hyprlofs_taccess(void *, int, cred_t *);
+extern int hyprlofs_direnter(hlfsmount_t *, hlnode_t *, char *, enum de_op,
+ vnode_t *, vattr_t *, hlnode_t **, cred_t *);
+
+extern struct vnodeops *hyprlofs_vnodeops;
+extern const struct fs_operation_def hyprlofs_vnodeops_template[];
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_FS_HYPRLOFS_INFO_H */
diff --git a/usr/src/uts/common/sys/fs/sdev_impl.h b/usr/src/uts/common/sys/fs/sdev_impl.h
index 561939fc20..e6fa895060 100644
--- a/usr/src/uts/common/sys/fs/sdev_impl.h
+++ b/usr/src/uts/common/sys/fs/sdev_impl.h
@@ -36,6 +36,7 @@ extern "C" {
#include <sys/vfs_opreg.h>
#include <sys/list.h>
#include <sys/nvpair.h>
+#include <sys/fs/sdev_plugin.h>
#include <sys/sunddi.h>
/*
@@ -128,6 +129,21 @@ typedef struct sdev_local_data {
struct sdev_dprof sdev_lprof; /* profile for multi-inst */
} sdev_local_data_t;
+/* sdev_flags */
+typedef enum sdev_flags {
+ SDEV_BUILD = 0x0001, /* directory cache out-of-date */
+ SDEV_GLOBAL = 0x0002, /* global /dev nodes */
+ SDEV_PERSIST = 0x0004, /* backing store persisted node */
+ SDEV_NO_NCACHE = 0x0008, /* do not include in neg. cache */
+ SDEV_DYNAMIC = 0x0010, /* special-purpose vnode ops */
+ /* (ex: pts) */
+ SDEV_VTOR = 0x0020, /* validate sdev_nodes during search */
+ SDEV_ATTR_INVALID = 0x0040, /* invalid node attributes, */
+ /* need update */
+ SDEV_SUBDIR = 0x0080, /* match all subdirs under here */
+ SDEV_ZONED = 0x0100 /* zoned subdir */
+} sdev_flags_t;
+
/*
* /dev filesystem sdev_node defines
*/
@@ -150,7 +166,7 @@ typedef struct sdev_node {
ino64_t sdev_ino; /* inode */
uint_t sdev_nlink; /* link count */
int sdev_state; /* state of this node */
- int sdev_flags; /* flags bit */
+ sdev_flags_t sdev_flags; /* flags bit */
kmutex_t sdev_lookup_lock; /* node creation synch lock */
kcondvar_t sdev_lookup_cv; /* node creation sync cv */
@@ -161,7 +177,7 @@ typedef struct sdev_node {
struct sdev_global_data sdev_globaldata;
struct sdev_local_data sdev_localdata;
} sdev_instance_data;
-
+ list_node_t sdev_plist; /* link on plugin list */
void *sdev_private;
} sdev_node_t;
@@ -192,29 +208,11 @@ typedef enum {
SDEV_READY
} sdev_node_state_t;
-/* sdev_flags */
-#define SDEV_BUILD 0x0001 /* directory cache out-of-date */
-#define SDEV_GLOBAL 0x0002 /* global /dev nodes */
-#define SDEV_PERSIST 0x0004 /* backing store persisted node */
-#define SDEV_NO_NCACHE 0x0008 /* do not include in neg. cache */
-#define SDEV_DYNAMIC 0x0010 /* special-purpose vnode ops */
- /* (ex: pts) */
-#define SDEV_VTOR 0x0020 /* validate sdev_nodes during search */
-#define SDEV_ATTR_INVALID 0x0040 /* invalid node attributes, */
- /* need update */
-#define SDEV_SUBDIR 0x0080 /* match all subdirs under here */
-#define SDEV_ZONED 0x0100 /* zoned subdir */
-
/* sdev_lookup_flags */
#define SDEV_LOOKUP 0x0001 /* node creation in progress */
#define SDEV_READDIR 0x0002 /* VDIR readdir in progress */
#define SDEV_LGWAITING 0x0004 /* waiting for devfsadm completion */
-#define SDEV_VTOR_INVALID -1
-#define SDEV_VTOR_SKIP 0
-#define SDEV_VTOR_VALID 1
-#define SDEV_VTOR_STALE 2
-
/* convenient macros */
#define SDEV_IS_GLOBAL(dv) \
(dv->sdev_flags & SDEV_GLOBAL)
@@ -366,8 +364,13 @@ extern void sdev_devfsadmd_thread(struct sdev_node *, struct sdev_node *,
extern int devname_profile_update(char *, size_t);
extern struct sdev_data *sdev_find_mntinfo(char *);
void sdev_mntinfo_rele(struct sdev_data *);
+typedef void (*sdev_mnt_walk_f)(struct sdev_node *, void *);
+void sdev_mnt_walk(sdev_mnt_walk_f, void *);
extern struct vnodeops *devpts_getvnodeops(void);
extern struct vnodeops *devvt_getvnodeops(void);
+extern void sdev_plugin_nodeready(struct sdev_node *);
+extern int sdev_plugin_init(void);
+extern int sdev_plugin_fini(void);
/*
* boot states - warning, the ordering here is significant
@@ -513,6 +516,23 @@ extern void sdev_nc_path_exists(sdev_nc_list_t *, char *);
extern void sdev_modctl_dump_files(void);
/*
+ * plugin and legacy vtab stuff
+ */
+/* directory dependent vop table */
+typedef struct sdev_vop_table {
+ char *vt_name; /* subdirectory name */
+ const fs_operation_def_t *vt_service; /* vnodeops table */
+ struct vnodeops **vt_global_vops; /* global container for vop */
+ int (*vt_vtor)(struct sdev_node *); /* validate sdev_node */
+ int vt_flags;
+} sdev_vop_table_t;
+
+extern struct sdev_vop_table vtab[];
+extern struct vnodeops *sdev_get_vop(struct sdev_node *);
+extern void sdev_set_no_negcache(struct sdev_node *);
+extern void *sdev_get_vtor(struct sdev_node *dv);
+
+/*
* globals
*/
extern kmutex_t sdev_lock;
@@ -525,6 +545,7 @@ extern struct vnodeops *devipnet_vnodeops;
extern struct vnodeops *devvt_vnodeops;
extern struct sdev_data *sdev_origins; /* mount info for global /dev instance */
extern struct vnodeops *devzvol_vnodeops;
+extern int sdev_vnodeops_tbl_size;
extern const fs_operation_def_t sdev_vnodeops_tbl[];
extern const fs_operation_def_t devpts_vnodeops_tbl[];
diff --git a/usr/src/uts/common/sys/fs/sdev_plugin.h b/usr/src/uts/common/sys/fs/sdev_plugin.h
new file mode 100644
index 0000000000..8783df58e6
--- /dev/null
+++ b/usr/src/uts/common/sys/fs/sdev_plugin.h
@@ -0,0 +1,106 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_SDEV_PLUGIN_H
+#define _SYS_SDEV_PLUGIN_H
+
+/*
+ * Kernel sdev plugin interface
+ */
+
+#ifdef _KERNEL
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/vnode.h>
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+typedef uintptr_t sdev_plugin_hdl_t;
+typedef uintptr_t sdev_ctx_t;
+
+/*
+ * Valid return values for sdev_plugin_validate_t.
+ */
+typedef enum sdev_plugin_validate {
+ SDEV_VTOR_INVALID = -1,
+ SDEV_VTOR_SKIP = 0,
+ SDEV_VTOR_VALID = 1,
+ SDEV_VTOR_STALE = 2
+} sdev_plugin_validate_t;
+
+/*
+ * Valid flags
+ */
+typedef enum sdev_plugin_flags {
+ SDEV_PLUGIN_NO_NCACHE = 0x1,
+ SDEV_PLUGIN_SUBDIR = 0x2
+} sdev_plugin_flags_t;
+
+#define SDEV_PLUGIN_FLAGS_MASK 0x3
+
+/*
+ * Functions a module must implement
+ */
+typedef sdev_plugin_validate_t (*sp_valid_f)(sdev_ctx_t);
+typedef int (*sp_filldir_f)(sdev_ctx_t);
+typedef void (*sp_inactive_f)(sdev_ctx_t);
+
+#define SDEV_PLUGIN_VERSION 1
+
+typedef struct sdev_plugin_ops {
+ int spo_version;
+ sdev_plugin_flags_t spo_flags;
+ sp_valid_f spo_validate;
+ sp_filldir_f spo_filldir;
+ sp_inactive_f spo_inactive;
+} sdev_plugin_ops_t;
+
+extern sdev_plugin_hdl_t sdev_plugin_register(const char *, sdev_plugin_ops_t *,
+ int *);
+extern int sdev_plugin_unregister(sdev_plugin_hdl_t);
+
+typedef enum sdev_ctx_flags {
+ SDEV_CTX_GLOBAL = 0x2 /* node belongs to the GZ */
+} sdev_ctx_flags_t;
+
+/*
+ * Context helper functions
+ */
+extern sdev_ctx_flags_t sdev_ctx_flags(sdev_ctx_t);
+extern const char *sdev_ctx_name(sdev_ctx_t);
+extern const char *sdev_ctx_path(sdev_ctx_t);
+extern enum vtype sdev_ctx_vtype(sdev_ctx_t);
+extern const void *sdev_ctx_vtype_data(sdev_ctx_t);
+
+/*
+ * Callbacks to manipulate nodes
+ */
+extern int sdev_plugin_mkdir(sdev_ctx_t, char *);
+extern int sdev_plugin_mknod(sdev_ctx_t, char *, mode_t, dev_t);
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_SDEV_PLUGIN_H */
diff --git a/usr/src/uts/common/sys/fs/tmp.h b/usr/src/uts/common/sys/fs/tmp.h
index 68dd67c61e..f8740e8873 100644
--- a/usr/src/uts/common/sys/fs/tmp.h
+++ b/usr/src/uts/common/sys/fs/tmp.h
@@ -22,12 +22,13 @@
* Copyright 2007 Sun Microsystems, Inc.
* All rights reserved. Use is subject to license terms.
*/
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
#ifndef _SYS_FS_TMP_H
#define _SYS_FS_TMP_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -68,29 +69,28 @@ enum dr_op { DR_REMOVE, DR_RMDIR, DR_RENAME }; /* dirremove ops */
/*
* tmpfs_minfree is the amount (in pages) of anonymous memory that tmpfs
- * leaves free for the rest of the system. E.g. in a system with 32MB of
- * configured swap space, if 16MB were reserved (leaving 16MB free),
- * tmpfs could allocate up to 16MB - tmpfs_minfree. The default value
- * for tmpfs_minfree is btopr(TMPMINFREE) but it can cautiously patched
- * to a different number of pages.
- * NB: If tmpfs allocates too much swap space, other processes will be
- * unable to execute.
+ * leaves free for the rest of the system. In antiquity, this number could be
+ * relevant on a system-wide basis, as physical DRAM was routinely exhausted;
+ * however, in more modern times, the relative growth of DRAM with respect to
+ * application footprint means that this number is only likely to become
+ * factor in a virtualized OS environment (e.g., a zone) -- and even then only
+ * when DRAM and swap have both been capped low to allow for maximum tenancy.
+ * TMPMINFREE -- the value from which tmpfs_minfree is derived -- should
+ * therefore be configured to a value that is roughly the smallest practical
+ * value for memory + swap minus the largest reasonable size for tmpfs in such
+ * a configuration. As of this writing, the smallest practical memory + swap
+ * configuration is 128MB, and it seems reasonable to allow tmpfs to consume
+ * no more than seven-eighths of this, yielding a TMPMINFREE of 16MB. Care
+ * should be exercised in changing this: tuning this value too high will
+ * result in spurious ENOSPC errors in tmpfs in small zones (a problem that
+ * can induce cascading failure surprisingly often); tuning this value too low
+ * will result in tmpfs consumption alone to alone induce application-level
+ * memory allocation failure.
*/
-#define TMPMINFREE 2 * 1024 * 1024 /* 2 Megabytes */
+#define TMPMINFREE 16 * 1024 * 1024 /* 16 Megabytes */
extern size_t tmpfs_minfree; /* Anonymous memory in pages */
-/*
- * tmpfs can allocate only a certain percentage of kernel memory,
- * which is used for tmpnodes, directories, file names, etc.
- * This is statically set as TMPMAXFRACKMEM of physical memory.
- * The actual number of allocatable bytes can be patched in tmpfs_maxkmem.
- */
-#define TMPMAXFRACKMEM 25 /* 1/25 of physical memory */
-
-extern size_t tmp_kmemspace;
-extern size_t tmpfs_maxkmem; /* Allocatable kernel memory in bytes */
-
extern void tmpnode_init(struct tmount *, struct tmpnode *,
struct vattr *, struct cred *);
extern int tmpnode_trunc(struct tmount *, struct tmpnode *, ulong_t);
@@ -101,13 +101,12 @@ extern int tdirdelete(struct tmpnode *, struct tmpnode *, char *,
enum dr_op, struct cred *);
extern void tdirinit(struct tmpnode *, struct tmpnode *);
extern void tdirtrunc(struct tmpnode *);
-extern void *tmp_memalloc(size_t, int);
-extern void tmp_memfree(void *, size_t);
extern int tmp_resv(struct tmount *, struct tmpnode *, size_t, int);
extern int tmp_taccess(void *, int, struct cred *);
extern int tmp_sticky_remove_access(struct tmpnode *, struct tmpnode *,
struct cred *);
extern int tmp_convnum(char *, pgcnt_t *);
+extern int tmp_convmode(char *, mode_t *);
extern int tdirenter(struct tmount *, struct tmpnode *, char *,
enum de_op, struct tmpnode *, struct tmpnode *, struct vattr *,
struct tmpnode **, struct cred *, caller_context_t *);
diff --git a/usr/src/uts/common/sys/fx.h b/usr/src/uts/common/sys/fx.h
index 2d4e1aa7fb..4a48af52a1 100644
--- a/usr/src/uts/common/sys/fx.h
+++ b/usr/src/uts/common/sys/fx.h
@@ -21,13 +21,12 @@
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
*/
#ifndef _SYS_FX_H
#define _SYS_FX_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/thread.h>
#include <sys/ddi.h>
@@ -145,7 +144,14 @@ typedef struct fxkparms {
uint_t fx_cflags;
} fxkparms_t;
+/*
+ * control flags (kparms->fx_cflags).
+ */
+#define FX_DOUPRILIM 0x01 /* change user priority limit */
+#define FX_DOUPRI 0x02 /* change user priority */
+#define FX_DOTQ 0x04 /* change FX time quantum */
+#define FXMAXUPRI 60 /* maximum user priority setting */
/*
* Interface for partner private code. This is not a public interface.
diff --git a/usr/src/uts/common/sys/gsqueue.h b/usr/src/uts/common/sys/gsqueue.h
new file mode 100644
index 0000000000..40ef4ce982
--- /dev/null
+++ b/usr/src/uts/common/sys/gsqueue.h
@@ -0,0 +1,65 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_GSQUEUE_H
+#define _SYS_GSQUEUE_H
+
+/*
+ * Standard interfaces to serializaion queues for everyone (except IP).
+ */
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+typedef struct gsqueue gsqueue_t;
+typedef struct gsqueue_set gsqueue_set_t;
+
+typedef void (*gsqueue_cb_f)(gsqueue_set_t *, gsqueue_t *, void *, boolean_t);
+typedef void (*gsqueue_proc_f)(void *, mblk_t *, gsqueue_t *, void *);
+
+extern gsqueue_set_t *gsqueue_set_create(uint_t, pri_t);
+extern void gsqueue_set_destroy(gsqueue_set_t *);
+extern gsqueue_t *gsqueue_set_get(gsqueue_set_t *, uint_t);
+
+extern uintptr_t gsqueue_set_cb_add(gsqueue_set_t *, gsqueue_cb_f, void *);
+extern int gsqueue_set_cb_remove(gsqueue_set_t *, uintptr_t);
+
+#define GSQUEUE_FILL 0x0001
+#define GSQUEUE_NODRAIN 0x0002
+#define GSQUEUE_PROCESS 0x0004
+
+extern void gsqueue_enter_one(gsqueue_t *, mblk_t *, gsqueue_proc_f, void *,
+ int, uint8_t);
+
+/*
+ * The default wait is inherited from IP. This determines the amount of time
+ * that must pass after queuing work, before we wake up the worker thread. This
+ * value is in milliseconds.
+ */
+#define GSQUEUE_DEFAULT_WAIT 10
+#define GSQUEUE_DEFAULT_PRIORITY MAXCLSYSPRI
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_GSQUEUE_H */
diff --git a/usr/src/uts/common/sys/id_space.h b/usr/src/uts/common/sys/id_space.h
index d56fcceb5a..46d25f207f 100644
--- a/usr/src/uts/common/sys/id_space.h
+++ b/usr/src/uts/common/sys/id_space.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All Rights reserved.
*/
#ifndef _ID_SPACE_H
@@ -34,8 +35,6 @@ extern "C" {
#include <sys/mutex.h>
#include <sys/vmem.h>
-#ifdef _KERNEL
-
typedef vmem_t id_space_t;
id_space_t *id_space_create(const char *, id_t, id_t);
@@ -48,8 +47,6 @@ id_t id_allocff_nosleep(id_space_t *);
id_t id_alloc_specific_nosleep(id_space_t *, id_t);
void id_free(id_space_t *, id_t);
-#endif /* _KERNEL */
-
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/sys/inotify.h b/usr/src/uts/common/sys/inotify.h
new file mode 100644
index 0000000000..8acc1a7280
--- /dev/null
+++ b/usr/src/uts/common/sys/inotify.h
@@ -0,0 +1,153 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Header file to support for the inotify facility. Note that this facility
+ * is designed to be binary compatible with the Linux inotify facility; values
+ * for constants here should therefore exactly match those found in Linux, and
+ * this facility shouldn't be extended independently of Linux.
+ */
+
+#ifndef _SYS_INOTIFY_H
+#define _SYS_INOTIFY_H
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Events that can be explicitly requested on any inotify watch.
+ */
+#define IN_ACCESS 0x00000001
+#define IN_MODIFY 0x00000002
+#define IN_ATTRIB 0x00000004
+#define IN_CLOSE_WRITE 0x00000008
+#define IN_CLOSE_NOWRITE 0x00000010
+#define IN_OPEN 0x00000020
+#define IN_MOVED_FROM 0x00000040
+#define IN_MOVED_TO 0x00000080
+#define IN_CREATE 0x00000100
+#define IN_DELETE 0x00000200
+#define IN_DELETE_SELF 0x00000400
+#define IN_MOVE_SELF 0x00000800
+
+/*
+ * Events that can be sent to an inotify watch -- requested or not.
+ */
+#define IN_UNMOUNT 0x00002000
+#define IN_Q_OVERFLOW 0x00004000
+#define IN_IGNORED 0x00008000
+
+/*
+ * Flags that can modify an inotify event.
+ */
+#define IN_ONLYDIR 0x01000000
+#define IN_DONT_FOLLOW 0x02000000
+#define IN_EXCL_UNLINK 0x04000000
+#define IN_MASK_ADD 0x20000000
+#define IN_ISDIR 0x40000000
+#define IN_ONESHOT 0x80000000
+
+/*
+ * Helpful constants.
+ */
+#define IN_CLOSE (IN_CLOSE_WRITE | IN_CLOSE_NOWRITE)
+#define IN_MOVE (IN_MOVED_FROM | IN_MOVED_TO)
+#define IN_ALL_EVENTS \
+ (IN_ACCESS | IN_MODIFY | IN_ATTRIB | IN_CLOSE_WRITE | \
+ IN_CLOSE_NOWRITE | IN_OPEN | IN_MOVED_FROM | IN_MOVED_TO | \
+ IN_DELETE | IN_CREATE | IN_DELETE_SELF | IN_MOVE_SELF)
+
+#define IN_CHILD_EVENTS \
+ (IN_ACCESS | IN_MODIFY | IN_ATTRIB | IN_CLOSE_WRITE | \
+ IN_CLOSE_NOWRITE | IN_MODIFY | IN_OPEN)
+
+/*
+ * To assure binary compatibility with Linux, these values are fixed at their
+ * Linux equivalents, not their native ones.
+ */
+#define IN_CLOEXEC 02000000 /* LX_O_CLOEXEC */
+#define IN_NONBLOCK 04000 /* LX_O_NONBLOCK */
+
+struct inotify_event {
+ int32_t wd; /* watch descriptor */
+ uint32_t mask; /* mask of events */
+ uint32_t cookie; /* event association cookie, if any */
+ uint32_t len; /* size of name field */
+ char name[]; /* optional NUL-terminated name */
+};
+
+/*
+ * These ioctl values are specific to the native implementation; applications
+ * shouldn't be using them directly, and they should therefore be safe to
+ * change without breaking apps.
+ */
+#define INOTIFYIOC (('i' << 24) | ('n' << 16) | ('y' << 8))
+#define INOTIFYIOC_ADD_WATCH (INOTIFYIOC | 1) /* add watch */
+#define INOTIFYIOC_RM_WATCH (INOTIFYIOC | 2) /* remove watch */
+#define INOTIFYIOC_ADD_CHILD (INOTIFYIOC | 3) /* add child watch */
+#define INOTIFYIOC_ACTIVATE (INOTIFYIOC | 4) /* activate watch */
+
+#ifndef _LP64
+#ifndef _LITTLE_ENDIAN
+#define INOTIFY_PTR(type, name) uint32_t name##pad; type *name
+#else
+#define INOTIFY_PTR(type, name) type *name; uint32_t name##pad
+#endif
+#else
+#define INOTIFY_PTR(type, name) type *name
+#endif
+
+typedef struct inotify_addwatch {
+ int inaw_fd; /* open fd for object */
+ uint32_t inaw_mask; /* desired mask */
+} inotify_addwatch_t;
+
+typedef struct inotify_addchild {
+ INOTIFY_PTR(char, inac_name); /* pointer to name */
+ int inac_fd; /* open fd for parent */
+} inotify_addchild_t;
+
+#ifndef _KERNEL
+
+extern int inotify_init(void);
+extern int inotify_init1(int);
+extern int inotify_add_watch(int, const char *, uint32_t);
+extern int inotify_rm_watch(int, int);
+
+#else
+
+#define IN_UNMASKABLE \
+ (IN_UNMOUNT | IN_Q_OVERFLOW | IN_IGNORED | IN_ISDIR)
+
+#define IN_MODIFIERS \
+ (IN_EXCL_UNLINK | IN_ONESHOT)
+
+#define IN_FLAGS \
+ (IN_ONLYDIR | IN_DONT_FOLLOW | IN_MASK_ADD)
+
+#define IN_REMOVAL (1ULL << 32)
+#define INOTIFYMNRN_INOTIFY 0
+#define INOTIFYMNRN_CLONE 1
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_INOTIFY_H */
diff --git a/usr/src/uts/common/sys/ipc_impl.h b/usr/src/uts/common/sys/ipc_impl.h
index 0569c3e967..d7dc365c09 100644
--- a/usr/src/uts/common/sys/ipc_impl.h
+++ b/usr/src/uts/common/sys/ipc_impl.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2016, Joyent, Inc.
*/
#ifndef _IPC_IMPL_H
@@ -226,6 +227,7 @@ int ipc_commit_begin(ipc_service_t *, key_t, int, kipc_perm_t *);
kmutex_t *ipc_commit_end(ipc_service_t *, kipc_perm_t *);
void ipc_cleanup(ipc_service_t *, kipc_perm_t *);
+void ipc_rmsvc(ipc_service_t *, kipc_perm_t *);
int ipc_rmid(ipc_service_t *, int, cred_t *);
int ipc_ids(ipc_service_t *, int *, uint_t, uint_t *);
diff --git a/usr/src/uts/common/sys/iso/signal_iso.h b/usr/src/uts/common/sys/iso/signal_iso.h
index b1990121b8..0ae64b45d7 100644
--- a/usr/src/uts/common/sys/iso/signal_iso.h
+++ b/usr/src/uts/common/sys/iso/signal_iso.h
@@ -22,6 +22,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015, Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -95,7 +96,7 @@ extern "C" {
/* insert new signals here, and move _SIGRTM* appropriately */
#define _SIGRTMIN 42 /* first (highest-priority) realtime signal */
-#define _SIGRTMAX 73 /* last (lowest-priority) realtime signal */
+#define _SIGRTMAX 74 /* last (lowest-priority) realtime signal */
extern long _sysconf(int); /* System Private interface to sysconf() */
#define SIGRTMIN ((int)_sysconf(_SC_SIGRT_MIN)) /* first realtime signal */
#define SIGRTMAX ((int)_sysconf(_SC_SIGRT_MAX)) /* last realtime signal */
diff --git a/usr/src/uts/common/sys/klwp.h b/usr/src/uts/common/sys/klwp.h
index 41b70f6a6e..bdbff0be9b 100644
--- a/usr/src/uts/common/sys/klwp.h
+++ b/usr/src/uts/common/sys/klwp.h
@@ -24,7 +24,7 @@
*/
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
#ifndef _SYS_KLWP_H
@@ -191,7 +191,19 @@ typedef struct _klwp {
struct ct_template *lwp_ct_active[CTT_MAXTYPE]; /* active templates */
struct contract *lwp_ct_latest[CTT_MAXTYPE]; /* last created contract */
- void *lwp_brand; /* per-lwp brand data */
+ /*
+ * Branding:
+ * lwp_brand - per-lwp brand data
+ * lwp_brand_syscall - brand syscall interposer
+ * lwp_brand_syscall_fast - brand "fast path" syscall interposer
+ *
+ * The lwp_brand_syscall_fast handler should only be used if an
+ * lwp_brand_syscall handler is also in place.
+ */
+ void *lwp_brand;
+ int (*lwp_brand_syscall)(void);
+ int (*lwp_brand_syscall_fast)(void);
+
struct psinfo *lwp_spymaster; /* if an agent LWP, our spymaster */
} klwp_t;
diff --git a/usr/src/uts/common/sys/kmem_impl.h b/usr/src/uts/common/sys/kmem_impl.h
index 90e0477c45..26ab055dbc 100644
--- a/usr/src/uts/common/sys/kmem_impl.h
+++ b/usr/src/uts/common/sys/kmem_impl.h
@@ -302,7 +302,6 @@ typedef struct kmem_defrag {
uint64_t kmd_later; /* LATER responses */
uint64_t kmd_dont_need; /* DONT_NEED responses */
uint64_t kmd_dont_know; /* DONT_KNOW responses */
- uint64_t kmd_hunt_found; /* DONT_KNOW: # found in mag */
uint64_t kmd_slabs_freed; /* slabs freed by moves */
uint64_t kmd_defrags; /* kmem_cache_defrag() */
uint64_t kmd_scans; /* kmem_cache_scan() */
diff --git a/usr/src/uts/common/sys/ksocket.h b/usr/src/uts/common/sys/ksocket.h
index dfe25eec76..be669cb78d 100644
--- a/usr/src/uts/common/sys/ksocket.h
+++ b/usr/src/uts/common/sys/ksocket.h
@@ -21,6 +21,7 @@
/*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#ifndef _SYS_KSOCKET_H_
@@ -121,6 +122,10 @@ extern int ksocket_close(ksocket_t, struct cred *);
extern void ksocket_hold(ksocket_t);
extern void ksocket_rele(ksocket_t);
+typedef boolean_t (*ksocket_krecv_f)(ksocket_t, mblk_t *, size_t, int, void *);
+extern int ksocket_krecv_set(ksocket_t, ksocket_krecv_f, void *);
+extern void ksocket_krecv_unblock(ksocket_t);
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/sys/limits.h b/usr/src/uts/common/sys/limits.h
new file mode 100644
index 0000000000..88625d1829
--- /dev/null
+++ b/usr/src/uts/common/sys/limits.h
@@ -0,0 +1,32 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+/*
+ * Copyright 2015 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_LIMITS_H
+#define _SYS_LIMITS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define IOV_MAX 1024
+
+#ifdef _KERNEL
+#define IOV_MAX_STACK 16 /* max. IOV on-stack allocation */
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LIMITS_H */
diff --git a/usr/src/uts/common/sys/mac.h b/usr/src/uts/common/sys/mac.h
index 247c3bd48d..cdbbe4ce62 100644
--- a/usr/src/uts/common/sys/mac.h
+++ b/usr/src/uts/common/sys/mac.h
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
* Copyright (c) 2015 Garrett D'Amore <garrett@damore.org>
*/
@@ -101,6 +101,14 @@ typedef struct mac_propval_uint32_range_s {
} mac_propval_uint32_range_t;
/*
+ * Defines ranges which are a series of C style strings.
+ */
+typedef struct mac_propval_str_range_s {
+ uint32_t mpur_nextbyte;
+ char mpur_data[1];
+} mac_propval_str_range_t;
+
+/*
* Data type of property values.
*/
typedef enum {
@@ -120,6 +128,7 @@ typedef struct mac_propval_range_s {
mac_propval_type_t mpr_type; /* type of value */
union {
mac_propval_uint32_range_t mpr_uint32[1];
+ mac_propval_str_range_t mpr_str;
} u;
} mac_propval_range_t;
@@ -214,6 +223,7 @@ typedef enum {
MAC_PROP_MAX_RXHWCLNT_AVAIL,
MAC_PROP_MAX_TXHWCLNT_AVAIL,
MAC_PROP_IB_LINKMODE,
+ MAC_PROP_VN_PROMISC_FILTERED,
MAC_PROP_SECONDARY_ADDRS,
MAC_PROP_ADV_40GFDX_CAP,
MAC_PROP_EN_40GFDX_CAP,
diff --git a/usr/src/uts/common/sys/mac_client.h b/usr/src/uts/common/sys/mac_client.h
index 0fc4939503..1f2c732e6d 100644
--- a/usr/src/uts/common/sys/mac_client.h
+++ b/usr/src/uts/common/sys/mac_client.h
@@ -22,7 +22,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2013 Joyent, Inc. All rights reserved.
+ * Copyright 2015 Joyent, Inc. All rights reserved.
*/
/*
@@ -115,6 +115,7 @@ typedef enum {
#define MAC_PROMISC_FLAGS_NO_PHYS 0x0002
#define MAC_PROMISC_FLAGS_VLAN_TAG_STRIP 0x0004
#define MAC_PROMISC_FLAGS_NO_COPY 0x0008
+#define MAC_PROMISC_FLAGS_DO_FIXUPS 0x0010
/* flags passed to mac_tx() */
#define MAC_DROP_ON_NO_DESC 0x01 /* freemsg() if no tx descs */
diff --git a/usr/src/uts/common/sys/mac_client_impl.h b/usr/src/uts/common/sys/mac_client_impl.h
index 0904b28645..d2fd145375 100644
--- a/usr/src/uts/common/sys/mac_client_impl.h
+++ b/usr/src/uts/common/sys/mac_client_impl.h
@@ -23,7 +23,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
#ifndef _SYS_MAC_CLIENT_IMPL_H
@@ -82,6 +82,7 @@ typedef struct mac_promisc_impl_s { /* Protected by */
boolean_t mpi_no_phys; /* WO */
boolean_t mpi_strip_vlan_tag; /* WO */
boolean_t mpi_no_copy; /* WO */
+ boolean_t mpi_do_fixups; /* WO */
} mac_promisc_impl_t;
typedef union mac_tx_percpu_s {
@@ -330,13 +331,14 @@ extern int mac_tx_percpu_cnt;
/* Mac protection flags */
#define MPT_FLAG_V6_LOCAL_ADDR_SET 0x0001
+#define MPT_FLAG_PROMISC_FILTERED 0x0002
/* in mac_client.c */
extern void mac_promisc_client_dispatch(mac_client_impl_t *, mblk_t *);
extern void mac_client_init(void);
extern void mac_client_fini(void);
extern void mac_promisc_dispatch(mac_impl_t *, mblk_t *,
- mac_client_impl_t *);
+ mac_client_impl_t *, boolean_t);
extern int mac_validate_props(mac_impl_t *, mac_resource_props_t *);
diff --git a/usr/src/uts/common/sys/mac_client_priv.h b/usr/src/uts/common/sys/mac_client_priv.h
index 6b409513a6..a5848625c2 100644
--- a/usr/src/uts/common/sys/mac_client_priv.h
+++ b/usr/src/uts/common/sys/mac_client_priv.h
@@ -22,7 +22,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2013 Joyent, Inc. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
/*
@@ -171,6 +171,7 @@ extern void mac_client_set_intr_cpu(void *, mac_client_handle_t, int32_t);
extern void *mac_get_devinfo(mac_handle_t);
extern boolean_t mac_is_vnic(mac_handle_t);
+extern boolean_t mac_is_overlay(mac_handle_t);
extern uint32_t mac_no_notification(mac_handle_t);
extern int mac_set_prop(mac_handle_t, mac_prop_id_t, char *, void *, uint_t);
diff --git a/usr/src/uts/common/sys/mac_impl.h b/usr/src/uts/common/sys/mac_impl.h
index 2286b587e8..46293b1a74 100644
--- a/usr/src/uts/common/sys/mac_impl.h
+++ b/usr/src/uts/common/sys/mac_impl.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#ifndef _SYS_MAC_IMPL_H
@@ -331,7 +331,7 @@ struct mac_group_s {
if ((src_mcip)->mci_state_flags & MCIS_SHARE_BOUND) \
rhandle = (mip)->mi_default_tx_ring; \
if (mip->mi_promisc_list != NULL) \
- mac_promisc_dispatch(mip, mp, src_mcip); \
+ mac_promisc_dispatch(mip, mp, src_mcip, B_TRUE); \
/* \
* Grab the proper transmit pointer and handle. Special \
* optimization: we can test mi_bridge_link itself atomically, \
@@ -643,6 +643,7 @@ struct mac_impl_s {
#define MIS_LEGACY 0x0040
#define MIS_NO_ACTIVE 0x0080
#define MIS_POLL_DISABLE 0x0100
+#define MIS_IS_OVERLAY 0x0200
#define mi_getstat mi_callbacks->mc_getstat
#define mi_start mi_callbacks->mc_start
@@ -894,6 +895,8 @@ extern void mac_protect_fini(mac_client_impl_t *);
extern int mac_set_resources(mac_handle_t, mac_resource_props_t *);
extern void mac_get_resources(mac_handle_t, mac_resource_props_t *);
extern void mac_get_effective_resources(mac_handle_t, mac_resource_props_t *);
+extern void mac_set_promisc_filtered(mac_client_handle_t, boolean_t);
+extern boolean_t mac_get_promisc_filtered(mac_client_handle_t);
extern cpupart_t *mac_pset_find(mac_resource_props_t *, boolean_t *);
extern void mac_set_pool_effective(boolean_t, cpupart_t *,
diff --git a/usr/src/uts/common/sys/mac_provider.h b/usr/src/uts/common/sys/mac_provider.h
index 9f7f2a1a73..5f02451542 100644
--- a/usr/src/uts/common/sys/mac_provider.h
+++ b/usr/src/uts/common/sys/mac_provider.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#ifndef _SYS_MAC_PROVIDER_H
@@ -106,7 +107,8 @@ typedef enum {
MAC_CAPAB_NO_NATIVEVLAN = 0x00080000, /* boolean only, no data */
MAC_CAPAB_NO_ZCOPY = 0x00100000, /* boolean only, no data */
MAC_CAPAB_LEGACY = 0x00200000, /* data is mac_capab_legacy_t */
- MAC_CAPAB_VRRP = 0x00400000 /* data is mac_capab_vrrp_t */
+ MAC_CAPAB_VRRP = 0x00400000, /* data is mac_capab_vrrp_t */
+ MAC_CAPAB_OVERLAY = 0x00800000 /* boolean only, no data */
} mac_capab_t;
/*
diff --git a/usr/src/uts/common/sys/mman.h b/usr/src/uts/common/sys/mman.h
index 6ec5f4ff41..34e491fd3b 100644
--- a/usr/src/uts/common/sys/mman.h
+++ b/usr/src/uts/common/sys/mman.h
@@ -338,6 +338,7 @@ struct memcntl_mha32 {
#define MS_SYNC 0x4 /* wait for msync */
#define MS_ASYNC 0x1 /* return immediately */
#define MS_INVALIDATE 0x2 /* invalidate caches */
+#define MS_INVALCURPROC 0x8 /* invalidate cache for curproc only */
#if (_POSIX_C_SOURCE <= 2) && !defined(_XPG4_2) || defined(__EXTENSIONS__)
/* functions to mctl */
diff --git a/usr/src/uts/common/sys/mntent.h b/usr/src/uts/common/sys/mntent.h
index 88c98dc5a4..7196f7b3ac 100644
--- a/usr/src/uts/common/sys/mntent.h
+++ b/usr/src/uts/common/sys/mntent.h
@@ -21,6 +21,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012, Joyent, Inc. All rights reserved.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*
* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
@@ -47,6 +48,7 @@ extern "C" {
#define MNTTYPE_PCFS "pcfs" /* PC (MSDOS) file system */
#define MNTTYPE_PC MNTTYPE_PCFS /* Deprecated name; use MNTTYPE_PCFS */
#define MNTTYPE_LOFS "lofs" /* Loop back file system */
+#define MNTTYPE_HYPRLOFS "hyprlofs" /* Hyperlofs file system */
#define MNTTYPE_LO MNTTYPE_LOFS /* Deprecated name; use MNTTYPE_LOFS */
#define MNTTYPE_HSFS "hsfs" /* High Sierra (9660) file system */
#define MNTTYPE_SWAP "swap" /* Swap file system */
diff --git a/usr/src/uts/common/sys/netconfig.h b/usr/src/uts/common/sys/netconfig.h
index 14b1aa55db..883c329aed 100644
--- a/usr/src/uts/common/sys/netconfig.h
+++ b/usr/src/uts/common/sys/netconfig.h
@@ -28,6 +28,7 @@
*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
*/
#ifndef _SYS_NETCONFIG_H
@@ -147,6 +148,8 @@ extern int endnetpath(void *);
extern struct netconfig *getnetpath(void *);
extern void nc_perror(const char *);
extern char *nc_sperror(void);
+extern void _nsl_brand_set_hooks(int (*)(void),
+ struct netconfig *(*)(int));
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/sys/neti.h b/usr/src/uts/common/sys/neti.h
index 93b5fc3e01..ea85c78f6b 100644
--- a/usr/src/uts/common/sys/neti.h
+++ b/usr/src/uts/common/sys/neti.h
@@ -44,6 +44,8 @@ extern "C" {
#define NHF_INET "NHF_INET"
#define NHF_INET6 "NHF_INET6"
#define NHF_ARP "NHF_ARP"
+#define NHF_VND_INET "NHF_VND_INET"
+#define NHF_VND_INET6 "NHF_VND_INET6"
/*
* Event identification
diff --git a/usr/src/uts/common/sys/netstack.h b/usr/src/uts/common/sys/netstack.h
index 2c77e1be96..73f29d1e63 100644
--- a/usr/src/uts/common/sys/netstack.h
+++ b/usr/src/uts/common/sys/netstack.h
@@ -81,7 +81,8 @@ typedef id_t netstackid_t;
#define NS_IPSECESP 16
#define NS_IPNET 17
#define NS_ILB 18
-#define NS_MAX (NS_ILB+1)
+#define NS_VND 19
+#define NS_MAX (NS_VND+1)
/*
* State maintained for each module which tracks the state of
diff --git a/usr/src/uts/common/sys/overlay.h b/usr/src/uts/common/sys/overlay.h
new file mode 100644
index 0000000000..12d0dbca51
--- /dev/null
+++ b/usr/src/uts/common/sys/overlay.h
@@ -0,0 +1,96 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015, Joyent, Inc.
+ */
+
+#ifndef _SYS_OVERLAY_H
+#define _SYS_OVERLAY_H
+
+/*
+ * Overlay device support
+ */
+
+#include <sys/param.h>
+#include <sys/dld_ioc.h>
+#include <sys/mac.h>
+#include <sys/overlay_common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define OVERLAY_IOC_CREATE OVERLAYIOC(1)
+#define OVERLAY_IOC_DELETE OVERLAYIOC(2)
+#define OVERLAY_IOC_PROPINFO OVERLAYIOC(3)
+#define OVERLAY_IOC_GETPROP OVERLAYIOC(4)
+#define OVERLAY_IOC_SETPROP OVERLAYIOC(5)
+#define OVERLAY_IOC_NPROPS OVERLAYIOC(6)
+#define OVERLAY_IOC_ACTIVATE OVERLAYIOC(7)
+#define OVERLAY_IOC_STATUS OVERLAYIOC(8)
+
+typedef struct overlay_ioc_create {
+ datalink_id_t oic_linkid;
+ uint32_t oic_filler;
+ uint64_t oic_vnetid;
+ char oic_encap[MAXLINKNAMELEN];
+} overlay_ioc_create_t;
+
+typedef struct overlay_ioc_activate {
+ datalink_id_t oia_linkid;
+} overlay_ioc_activate_t;
+
+typedef struct overlay_ioc_delete {
+ datalink_id_t oid_linkid;
+} overlay_ioc_delete_t;
+
+typedef struct overlay_ioc_nprops {
+ datalink_id_t oipn_linkid;
+ int32_t oipn_nprops;
+} overlay_ioc_nprops_t;
+
+typedef struct overlay_ioc_propinfo {
+ datalink_id_t oipi_linkid;
+ int32_t oipi_id;
+ char oipi_name[OVERLAY_PROP_NAMELEN];
+ uint_t oipi_type;
+ uint_t oipi_prot;
+ uint8_t oipi_default[OVERLAY_PROP_SIZEMAX];
+ uint32_t oipi_defsize;
+ uint32_t oipi_posssize;
+ uint8_t oipi_poss[OVERLAY_PROP_SIZEMAX];
+} overlay_ioc_propinfo_t;
+
+typedef struct overlay_ioc_prop {
+ datalink_id_t oip_linkid;
+ int32_t oip_id;
+ char oip_name[OVERLAY_PROP_NAMELEN];
+ uint8_t oip_value[OVERLAY_PROP_SIZEMAX];
+ uint32_t oip_size;
+} overlay_ioc_prop_t;
+
+typedef enum overlay_status {
+ OVERLAY_I_OK = 0x00,
+ OVERLAY_I_DEGRADED = 0x01
+} overlay_status_t;
+
+typedef struct overlay_ioc_status {
+ datalink_id_t ois_linkid;
+ uint_t ois_status;
+ char ois_message[OVERLAY_STATUS_BUFLEN];
+} overlay_ioc_status_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_OVERLAY_H */
diff --git a/usr/src/uts/common/sys/overlay_common.h b/usr/src/uts/common/sys/overlay_common.h
new file mode 100644
index 0000000000..d638096006
--- /dev/null
+++ b/usr/src/uts/common/sys/overlay_common.h
@@ -0,0 +1,65 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _SYS_OVERLAY_COMMON_H
+#define _SYS_OVERLAY_COMMON_H
+
+/*
+ * Common overlay definitions
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum overlay_target_mode {
+ OVERLAY_TARGET_NONE = 0x0,
+ OVERLAY_TARGET_POINT,
+ OVERLAY_TARGET_DYNAMIC
+} overlay_target_mode_t;
+
+typedef enum overlay_plugin_dest {
+ OVERLAY_PLUGIN_D_INVALID = 0x0,
+ OVERLAY_PLUGIN_D_ETHERNET = 0x1,
+ OVERLAY_PLUGIN_D_IP = 0x2,
+ OVERLAY_PLUGIN_D_PORT = 0x4,
+ OVERLAY_PLUGIN_D_MASK = 0x7
+} overlay_plugin_dest_t;
+
+typedef enum overlay_prop_type {
+ OVERLAY_PROP_T_INT = 0x1, /* signed int */
+ OVERLAY_PROP_T_UINT, /* unsigned int */
+ OVERLAY_PROP_T_IP, /* sinaddr6 */
+ OVERLAY_PROP_T_STRING /* OVERLAY_PROPS_SIZEMAX */
+} overlay_prop_type_t;
+
+typedef enum overlay_prop_prot {
+ OVERLAY_PROP_PERM_REQ = 0x1,
+ OVERLAY_PROP_PERM_READ = 0x2,
+ OVERLAY_PROP_PERM_WRITE = 0x4,
+ OVERLAY_PROP_PERM_RW = 0x6,
+ OVERLAY_PROP_PERM_RRW = 0x7,
+ OVERLAY_PROP_PERM_MASK = 0x7
+} overlay_prop_prot_t;
+
+#define OVERLAY_PROP_NAMELEN 64
+#define OVERLAY_PROP_SIZEMAX 256
+#define OVERLAY_STATUS_BUFLEN 256
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_OVERLAY_COMMON_H */
diff --git a/usr/src/uts/common/sys/overlay_impl.h b/usr/src/uts/common/sys/overlay_impl.h
new file mode 100644
index 0000000000..7fb8b8da1d
--- /dev/null
+++ b/usr/src/uts/common/sys/overlay_impl.h
@@ -0,0 +1,205 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#ifndef _SYS_OVERLAY_IMPL_H
+#define _SYS_OVERLAY_IMPL_H
+
+/*
+ * Overlay device support
+ */
+
+#include <sys/overlay.h>
+#include <sys/overlay_common.h>
+#include <sys/overlay_plugin.h>
+#include <sys/overlay_target.h>
+#include <sys/ksynch.h>
+#include <sys/list.h>
+#include <sys/avl.h>
+#include <sys/ksocket.h>
+#include <sys/socket.h>
+#include <sys/refhash.h>
+#include <sys/ethernet.h>
+#include <sys/list.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define OVEP_VERSION_ONE 0x1
+
+typedef struct overlay_plugin {
+ kmutex_t ovp_mutex;
+ list_node_t ovp_link; /* overlay_plugin_lock */
+ uint_t ovp_active; /* ovp_mutex */
+ const char *ovp_name; /* RO */
+ const overlay_plugin_ops_t *ovp_ops; /* RO */
+ const char *const *ovp_props; /* RO */
+ uint_t ovp_nprops; /* RO */
+ uint_t ovp_id_size; /* RO */
+ overlay_plugin_flags_t ovp_flags; /* RO */
+ overlay_plugin_dest_t ovp_dest; /* RO */
+} overlay_plugin_t;
+
+typedef struct overlay_mux {
+ list_node_t omux_lnode;
+ ksocket_t omux_ksock; /* RO */
+ overlay_plugin_t *omux_plugin; /* RO: associated encap */
+ int omux_domain; /* RO: socket domain */
+ int omux_family; /* RO: socket family */
+ int omux_protocol; /* RO: socket protocol */
+ struct sockaddr *omux_addr; /* RO: socket address */
+ socklen_t omux_alen; /* RO: sockaddr len */
+ kmutex_t omux_lock; /* Protects everything below */
+ uint_t omux_count; /* Active instances */
+ avl_tree_t omux_devices; /* Tree of devices */
+} overlay_mux_t;
+
+typedef enum overlay_target_flag {
+ OVERLAY_T_TEARDOWN = 0x1
+} overlay_target_flag_t;
+
+typedef struct overlay_target {
+ kmutex_t ott_lock;
+ kcondvar_t ott_cond;
+ overlay_target_mode_t ott_mode; /* RO */
+ overlay_plugin_dest_t ott_dest; /* RO */
+ uint64_t ott_id; /* RO */
+ overlay_target_flag_t ott_flags; /* ott_lock */
+ uint_t ott_ocount; /* ott_lock */
+ union { /* ott_lock */
+ overlay_target_point_t ott_point;
+ struct overlay_target_dyn {
+ refhash_t *ott_dhash;
+ avl_tree_t ott_tree;
+ } ott_dyn;
+ } ott_u;
+} overlay_target_t;
+
+typedef enum overlay_dev_flag {
+ OVERLAY_F_ACTIVATED = 0x01, /* Activate ioctl completed */
+ OVERLAY_F_IN_MUX = 0x02, /* Currently in a mux */
+ OVERLAY_F_IN_TX = 0x04, /* Currently doing tx */
+ OVERLAY_F_IN_RX = 0x08, /* Currently doing rx */
+ OVERLAY_F_IOMASK = 0x0c, /* A mask for rx and tx */
+ OVERLAY_F_MDDROP = 0x10, /* Drop traffic for metadata update */
+ OVERLAY_F_STOPMASK = 0x1e, /* None set when stopping */
+ OVERLAY_F_VARPD = 0x20, /* varpd plugin exists */
+ OVERLAY_F_DEGRADED = 0x40, /* device is degraded */
+ OVERLAY_F_MASK = 0x7f /* mask of everything */
+} overlay_dev_flag_t;
+
+typedef struct overlay_dev {
+ kmutex_t odd_lock;
+ kcondvar_t odd_iowait;
+ list_node_t odd_link; /* overlay_dev_lock */
+ mac_handle_t odd_mh; /* RO */
+ overlay_plugin_t *odd_plugin; /* RO */
+ datalink_id_t odd_linkid; /* RO */
+ void *odd_pvoid; /* RO -- only used by plugin */
+ uint_t odd_ref; /* protected by odd_lock */
+ uint_t odd_mtu; /* protected by odd_lock */
+ overlay_dev_flag_t odd_flags; /* protected by odd_lock */
+ uint_t odd_rxcount; /* protected by odd_lock */
+ uint_t odd_txcount; /* protected by odd_lock */
+ overlay_mux_t *odd_mux; /* protected by odd_lock */
+ uint64_t odd_vid; /* RO if active else odd_lock */
+ avl_node_t odd_muxnode; /* managed by mux */
+ overlay_target_t *odd_target; /* See big theory statement */
+ char odd_fmamsg[OVERLAY_STATUS_BUFLEN]; /* odd_lock */
+} overlay_dev_t;
+
+typedef enum overlay_target_entry_flags {
+ OVERLAY_ENTRY_F_PENDING = 0x01, /* lookup in progress */
+ OVERLAY_ENTRY_F_VALID = 0x02, /* entry is currently valid */
+ OVERLAY_ENTRY_F_DROP = 0x04, /* always drop target */
+ OVERLAY_ENTRY_F_VALID_MASK = 0x06
+} overlay_target_entry_flags_t;
+
+typedef struct overlay_target_entry {
+ kmutex_t ote_lock;
+ refhash_link_t ote_reflink; /* hashtable link */
+ avl_node_t ote_avllink; /* iteration link */
+ list_node_t ote_qlink;
+ overlay_target_entry_flags_t ote_flags; /* RW: state flags */
+ uint8_t ote_addr[ETHERADDRL]; /* RO: mac addr */
+ overlay_target_t *ote_ott; /* RO */
+ overlay_dev_t *ote_odd; /* RO */
+ overlay_target_point_t ote_dest; /* RW: destination */
+ mblk_t *ote_chead; /* RW: blocked mb chain head */
+ mblk_t *ote_ctail; /* RW: blocked mb chain tail */
+ size_t ote_mbsize; /* RW: outstanding mblk size */
+ hrtime_t ote_vtime; /* RW: valid timestamp */
+} overlay_target_entry_t;
+
+
+#define OVERLAY_CTL "overlay"
+
+extern dev_info_t *overlay_dip;
+
+extern mblk_t *overlay_m_tx(void *, mblk_t *);
+
+typedef int (*overlay_dev_iter_f)(overlay_dev_t *, void *);
+extern void overlay_dev_iter(overlay_dev_iter_f, void *);
+
+extern void overlay_plugin_init(void);
+extern overlay_plugin_t *overlay_plugin_lookup(const char *);
+extern void overlay_plugin_rele(overlay_plugin_t *);
+extern void overlay_plugin_fini(void);
+typedef int (*overlay_plugin_walk_f)(overlay_plugin_t *, void *);
+extern void overlay_plugin_walk(overlay_plugin_walk_f, void *);
+
+extern void overlay_io_start(overlay_dev_t *, overlay_dev_flag_t);
+extern void overlay_io_done(overlay_dev_t *, overlay_dev_flag_t);
+
+extern void overlay_mux_init(void);
+extern void overlay_mux_fini(void);
+
+extern overlay_mux_t *overlay_mux_open(overlay_plugin_t *, int, int, int,
+ struct sockaddr *, socklen_t, int *);
+extern void overlay_mux_close(overlay_mux_t *);
+extern void overlay_mux_add_dev(overlay_mux_t *, overlay_dev_t *);
+extern void overlay_mux_remove_dev(overlay_mux_t *, overlay_dev_t *);
+extern int overlay_mux_tx(overlay_mux_t *, struct msghdr *, mblk_t *);
+
+extern void overlay_prop_init(overlay_prop_handle_t);
+
+extern void overlay_target_init(void);
+extern int overlay_target_busy(void);
+extern int overlay_target_open(dev_t *, int, int, cred_t *);
+extern int overlay_target_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
+extern int overlay_target_close(dev_t, int, int, cred_t *);
+extern void overlay_target_free(overlay_dev_t *);
+
+#define OVERLAY_TARGET_OK 0
+#define OVERLAY_TARGET_DROP 1
+#define OVERLAY_TARGET_ASYNC 2
+extern int overlay_target_lookup(overlay_dev_t *, mblk_t *, struct sockaddr *,
+ socklen_t *);
+extern void overlay_target_quiesce(overlay_target_t *);
+extern void overlay_target_fini(void);
+
+extern void overlay_fm_init(void);
+extern void overlay_fm_fini(void);
+extern void overlay_fm_degrade(overlay_dev_t *, const char *);
+extern void overlay_fm_restore(overlay_dev_t *);
+
+extern overlay_dev_t *overlay_hold_by_dlid(datalink_id_t);
+extern void overlay_hold_rele(overlay_dev_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_OVERLAY_IMPL_H */
diff --git a/usr/src/uts/common/sys/overlay_plugin.h b/usr/src/uts/common/sys/overlay_plugin.h
new file mode 100644
index 0000000000..07efaa05df
--- /dev/null
+++ b/usr/src/uts/common/sys/overlay_plugin.h
@@ -0,0 +1,324 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2015 Joyent, Inc.
+ */
+
+#ifndef _SYS_OVERLAY_PLUGIN_H
+#define _SYS_OVERLAY_PLUGIN_H
+
+/*
+ * overlay plugin interface for encapsulation/decapsulation modules
+ *
+ * This header file defines how encapsulation and decapsulation plugins
+ * interact within the broader system. At this time, these interfaces are
+ * considered private to illumos and therefore are subject to change. As we gain
+ * more experience with a few of the different encapsulation formats, say nvgre
+ * or geneve, then we can move to make this a more-stable interface.
+ *
+ * A plugin is a general kernel module that uses the miscellaneous mod-linkage.
+ *
+ * In it's _init(9E) routine, it must register itself with the overlay
+ * subsystem. To do this, it allocates an overlay_plugin_register_t via
+ * overlay_plugin_alloc(), that it then * fills out with various required
+ * information and then attempts to register with the system via a call to
+ * overlay_plugin_register(). If that succeeds, it should then call
+ * mod_install(9F). If the mod_install(9F) fails, then it should call
+ * overlay_plugin_unregister(). Regardless of success or failure, it should call
+ * overlay_plugin_free() to ensure that any memory that may be associated with
+ * the registration is freed.
+ *
+ * When the module's _fini(9E) is called, overlay_plugin_unregister() should be
+ * called first. It may return an error, such as EBUSY. In such cases, it should
+ * be returned as the return status of _fini(9E). This is quite necessary, it
+ * ensures that if the module is in use it doesn't get unloaded out from under
+ * us the broader subsystem while it's still in use. A driver can use that to
+ * know that there are no current instances of its private data.
+ *
+ * ------------------
+ * Plugin Definitions
+ * ------------------
+ *
+ * A plugin is required to fill in both an operations vector and a series of
+ * information to the callback routine. Here are the routines and their
+ * purposes. The full signatures are available below.
+ *
+ * overlay_plugin_init_t
+ *
+ * This interface is used to create a new instance of a plugin. An instance
+ * of a plugin will be created for each overlay device that is created. For
+ * example, if a device is created with VXLAN ID 23 and ID 42, then there
+ * will be two different calls to this function.
+ *
+ * This function gives the plugin a chance to create a private data
+ * structure that will be returned on subsequent calls to the system.
+ *
+ * overlay_plugin_fini_t
+ *
+ * This is the opposite of overlay_plugin_init_t. It will be called when it
+ * is safe to remove any private data that is associated with this instance
+ * of the plugin.
+ *
+ * overlay_plugin_propinfo_t
+ *
+ * This is called with the name of a property that is registered when the
+ * plugin is created. This function will be called with the name of the
+ * property that information is being requested about. The plugin is
+ * responsible for filling out information such as setting the name, the
+ * type of property it is, the protection of the property (can a user
+ * update it?), whether the property is required, an optional default value
+ * for the property, and an optional set of values or ranges that are
+ * allowed.
+ *
+ * overlay_plugin_getprop_t
+ *
+ * Return the value of the named property from the current instance of the
+ * plugin.
+ *
+ * overlay_plugin_setprop_t
+ *
+ * Set the value of the named property to the specified value for the
+ * current instance of the plugin. Note, that it is the plugin's
+ * responsibility to ensure that the value of the property is valid and to
+ * update state as appropriate.
+ *
+ * overlay_plugin_socket_t
+ *
+ * Every overlay device has a corresponding socket that it uses to send and
+ * receive traffic. This routine is used to get the parameters that should
+ * be used to define such a socket. The actual socket may be multiplexed
+ * with other uses of it.
+ *
+ * overlay_plugin_sockopt_t
+ *
+ * Allow a plugin to set any necessary socket options that it needs on the
+ * kernel socket that is being used by a mux. This will only be called once
+ * for a given mux, if additional devices are added to a mux, it will not
+ * be called additional times.
+ *
+ * overlay_plugin_encap_t
+ *
+ * In this routine you're given a message block and information about the
+ * packet, such as the identifier and are asked to fill out a message block
+ * that represents the encapsulation header and optionally manipulate the
+ * input message if required.
+ *
+ * overlay_plugin_decap_t
+ *
+ * In this routine, you're given the encapsulated message block. The
+ * requirement is to decapsulate it and determine what is the correct
+ * overlay identifier for this network and to fill in the header size so
+ * the broader system knows how much of this data should be considered
+ * consumed.
+ *
+ * ovpo_callbacks
+ *
+ * This should be set to zero, it's reserved for future use.
+ *
+ * Once these properties are defined, the module should define the following
+ * members in the overlay_plugin_register_t.
+ *
+ * ovep_version
+ *
+ * Should be set to the value of the macro OVEP_VERSION.
+ *
+ * ovep_name
+ *
+ * Should be set to a character string that has the name of the module.
+ * Generally this should match the name of the kernel module; however, this
+ * is the name that users will use to refer to this module when creating
+ * devices.
+ *
+ * overlay_plugin_ops_t
+ *
+ * Should be set to the functions as described above.
+ *
+ * ovep_props
+ *
+ * This is an array of character strings that holds the names of the
+ * properties of the encapsulation plugin.
+ *
+ *
+ * ovep_id_size
+ *
+ * This is the size in bytes of the valid range for the identifier. The
+ * valid identifier range is considered a ovep_id_size byte unsigned
+ * integer, [ 0, 1 << (ovep_id_size * 8) ).
+ *
+ * ovep_flags
+ *
+ * A series of flags that indicate optional features that are supported.
+ * Valid flags include:
+ *
+ * OVEP_F_VLAN_TAG
+ *
+ * The encapsulation format allows for the encapsulated
+ * packet to maintain a VLAN tag.
+ *
+ * ovep_dest
+ *
+ * Describes the kind of destination that the overlay plugin supports for
+ * sending traffic. For example, vxlan uses UDP, therefore it requires both
+ * an IP address and a port; however, nvgre uses the gre header and
+ * therefore only requires an IP address. The following flags may be
+ * combined:
+ *
+ * OVERLAY_PLUGIN_D_ETHERNET
+ *
+ * Indicates that to send a packet to its destination, we
+ * require a link-layer ethernet address.
+ *
+ * OVERLAY_PLUGIN_D_IP
+ *
+ * Indicates that to send a packet to its destination, we
+ * require an IP address. Note, all IP addresses are
+ * transmitted as IPv6 addresses and for an IPv4
+ * destination, using an IPv4-mapped IPv6 address is the
+ * expected way to transmit that.
+ *
+ * OVERLAY_PLUGIN_D_PORT
+ *
+ * Indicates that to send a packet to its destination, a
+ * port is required, this usually indicates that the
+ * protocol uses something like TCP or UDP.
+ *
+ *
+ * -------------------------------------------------
+ * Downcalls, Upcalls, and Synchronization Guarantees
+ * -------------------------------------------------
+ *
+ * Every instance of a given module is independent. The kernel only guarantees
+ * that it will probably perform downcalls into different instances in parallel
+ * at some point. No locking is provided by the framework for synchronization
+ * across instances. If a module finds itself needing that, it will be up to it
+ * to provide it.
+ *
+ * In a given instance, the kernel may call into entry points in parallel. If
+ * the instance has private data, it should likely synchronize it. The one
+ * guarantee that we do make, is that calls to getprop and setprop will be done
+ * synchronized by a caller holding the MAC perimeter.
+ *
+ * While servicing a downcall from the general overlay device framework, a
+ * kernel module should not make any upcalls, excepting those functions that are
+ * defined in this header file, eg. the property related callbacks. Improtantly,
+ * it cannot make any assumptions about what locks may or may not be held by the
+ * broader system. The only thing that it is safe for it to use are its own
+ * locks.
+ *
+ * ----------------
+ * Downcall Context
+ * ----------------
+ *
+ * For all of the downcalls, excepting the overlay_plugin_encap_t and
+ * overlay_plugin_decap_t, the calls will be made either in kernel or user
+ * context, the module should not assume either way.
+ *
+ * overlay_plugin_encap_t and overlay_plugin_decap_t may be called in user,
+ * kernel or interrupt context; however, it is guaranteed that the interrupt
+ * will be below LOCK_LEVEL, and therefore it is safe to grab locks.
+ */
+
+#include <sys/stream.h>
+#include <sys/mac_provider.h>
+#include <sys/ksocket.h>
+#include <sys/overlay_common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define OVEP_VERSION 0x1
+
+typedef enum overlay_plugin_flags {
+ OVEP_F_VLAN_TAG = 0x01 /* Supports VLAN Tags */
+} overlay_plugin_flags_t;
+
+/*
+ * The ID space could easily be more than a 64-bit number, even
+ * though today it's either a 24-64 bit value. How should we future
+ * proof ourselves here?
+ */
+typedef struct ovep_encap_info {
+ uint64_t ovdi_id;
+ size_t ovdi_hdr_size;
+} ovep_encap_info_t;
+
+typedef struct __overlay_prop_handle *overlay_prop_handle_t;
+typedef struct __overlay_handle *overlay_handle_t;
+
+/*
+ * Plugins are guaranteed that calls to setprop are serialized. However, any
+ * number of other calls can be going on in parallel otherwise.
+ */
+typedef int (*overlay_plugin_encap_t)(void *, mblk_t *,
+ ovep_encap_info_t *, mblk_t **);
+typedef int (*overlay_plugin_decap_t)(void *, mblk_t *,
+ ovep_encap_info_t *);
+typedef int (*overlay_plugin_init_t)(overlay_handle_t, void **);
+typedef void (*overlay_plugin_fini_t)(void *);
+typedef int (*overlay_plugin_socket_t)(void *, int *, int *, int *,
+ struct sockaddr *, socklen_t *);
+typedef int (*overlay_plugin_sockopt_t)(ksocket_t);
+typedef int (*overlay_plugin_getprop_t)(void *, const char *, void *,
+ uint32_t *);
+typedef int (*overlay_plugin_setprop_t)(void *, const char *, const void *,
+ uint32_t);
+typedef int (*overlay_plugin_propinfo_t)(const char *, overlay_prop_handle_t);
+
+typedef struct overlay_plugin_ops {
+ uint_t ovpo_callbacks;
+ overlay_plugin_init_t ovpo_init;
+ overlay_plugin_fini_t ovpo_fini;
+ overlay_plugin_encap_t ovpo_encap;
+ overlay_plugin_decap_t ovpo_decap;
+ overlay_plugin_socket_t ovpo_socket;
+ overlay_plugin_sockopt_t ovpo_sockopt;
+ overlay_plugin_getprop_t ovpo_getprop;
+ overlay_plugin_setprop_t ovpo_setprop;
+ overlay_plugin_propinfo_t ovpo_propinfo;
+} overlay_plugin_ops_t;
+
+typedef struct overlay_plugin_register {
+ uint_t ovep_version;
+ const char *ovep_name;
+ const overlay_plugin_ops_t *ovep_ops;
+ const char **ovep_props;
+ uint_t ovep_id_size;
+ uint_t ovep_flags;
+ uint_t ovep_dest;
+} overlay_plugin_register_t;
+
+/*
+ * Functions that interact with registration
+ */
+extern overlay_plugin_register_t *overlay_plugin_alloc(uint_t);
+extern void overlay_plugin_free(overlay_plugin_register_t *);
+extern int overlay_plugin_register(overlay_plugin_register_t *);
+extern int overlay_plugin_unregister(const char *);
+
+/*
+ * Property information callbacks
+ */
+extern void overlay_prop_set_name(overlay_prop_handle_t, const char *);
+extern void overlay_prop_set_prot(overlay_prop_handle_t, overlay_prop_prot_t);
+extern void overlay_prop_set_type(overlay_prop_handle_t, overlay_prop_type_t);
+extern int overlay_prop_set_default(overlay_prop_handle_t, void *, ssize_t);
+extern void overlay_prop_set_nodefault(overlay_prop_handle_t);
+extern void overlay_prop_set_range_uint32(overlay_prop_handle_t, uint32_t,
+ uint32_t);
+extern void overlay_prop_set_range_str(overlay_prop_handle_t, const char *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_OVERLAY_PLUGIN_H */
diff --git a/usr/src/uts/common/sys/overlay_target.h b/usr/src/uts/common/sys/overlay_target.h
new file mode 100644
index 0000000000..cae193c334
--- /dev/null
+++ b/usr/src/uts/common/sys/overlay_target.h
@@ -0,0 +1,292 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2015 Joyent, Inc.
+ */
+
+#ifndef _OVERLAY_TARGET_H
+#define _OVERLAY_TARGET_H
+
+/*
+ * Overlay device varpd ioctl interface (/dev/overlay)
+ */
+
+#include <sys/types.h>
+#include <sys/ethernet.h>
+#include <netinet/in.h>
+#include <sys/overlay_common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct overlay_target_point {
+ uint8_t otp_mac[ETHERADDRL];
+ struct in6_addr otp_ip;
+ uint16_t otp_port;
+} overlay_target_point_t;
+
+#define OVERLAY_TARG_IOCTL (('o' << 24) | ('v' << 16) | ('t' << 8))
+
+#define OVERLAY_TARG_INFO (OVERLAY_TARG_IOCTL | 0x01)
+
+typedef enum overlay_targ_info_flags {
+ OVERLAY_TARG_INFO_F_ACTIVE = 0x01,
+ OVERLAY_TARG_INFO_F_DEGRADED = 0x02
+} overlay_targ_info_flags_t;
+
+/*
+ * Get target information about an overlay device
+ */
+typedef struct overlay_targ_info {
+ datalink_id_t oti_linkid;
+ uint32_t oti_needs;
+ uint64_t oti_flags;
+ uint64_t oti_vnetid;
+} overlay_targ_info_t;
+
+/*
+ * Declare an association between a given varpd instance and a datalink.
+ */
+#define OVERLAY_TARG_ASSOCIATE (OVERLAY_TARG_IOCTL | 0x02)
+
+typedef struct overlay_targ_associate {
+ datalink_id_t ota_linkid;
+ uint32_t ota_mode;
+ uint64_t ota_id;
+ uint32_t ota_provides;
+ overlay_target_point_t ota_point;
+} overlay_targ_associate_t;
+
+/*
+ * Remove an association from a device. If the device has already been started,
+ * this implies OVERLAY_TARG_DEGRADE.
+ */
+#define OVERLAY_TARG_DISASSOCIATE (OVERLAY_TARG_IOCTL | 0x3)
+
+/*
+ * Tells the kernel that while a varpd instance still exists, it basically isn't
+ * making any forward progress, so the device should consider itself degraded.
+ */
+#define OVERLAY_TARG_DEGRADE (OVERLAY_TARG_IOCTL | 0x4)
+
+typedef struct overlay_targ_degrade {
+ datalink_id_t otd_linkid;
+ uint32_t otd_pad;
+ char otd_buf[OVERLAY_STATUS_BUFLEN];
+} overlay_targ_degrade_t;
+
+/*
+ * Tells the kernel to remove the degraded status that it set on a device.
+ */
+#define OVERLAY_TARG_RESTORE (OVERLAY_TARG_IOCTL | 0x5)
+
+typedef struct overlay_targ_id {
+ datalink_id_t otid_linkid;
+} overlay_targ_id_t;
+
+/*
+ * The following ioctls are all used to support dynamic lookups from userland,
+ * generally serviced by varpd.
+ *
+ * The way this is designed to work is that user land will have threads sitting
+ * in OVERLAY_TARG_LOOKUP ioctls waiting to service requests. A thread will sit
+ * waiting for work for up to approximately one second of time before they will
+ * be sent back out to user land to give user land a chance to clean itself up
+ * or more generally, come back into the kernel for work. Once these threads
+ * return, they will have a request with which more action can be done. The
+ * following ioctls can all be used to answer the request.
+ *
+ * OVERLAY_TARG_RESPOND - overlay_targ_resp_t
+ *
+ * The overlay_targ_resp_t has the appropriate information from
+ * which a reply can be generated. The information is filled into
+ * an overlay_targ_point_t as appropriate based on the
+ * overlay_plugin_dest_t type.
+ *
+ *
+ * OVERLAY_TARG_DROP - overlay_targ_resp_t
+ *
+ * The overlay_targ_resp_t should identify a request for which to
+ * drop a packet.
+ *
+ *
+ * OVERLAY_TARG_INJECT - overlay_targ_pkt_t
+ *
+ * The overlay_targ_pkt_t injects a fully formed packet into the
+ * virtual network. It may either be identified by its data link id
+ * or by the request id. If both are specified, the
+ * datalink id will be used. Note, that an injection is not
+ * considered a reply and if this corresponds to a requeset, then
+ * that individual packet must still be dropped.
+ *
+ *
+ * OVERLAY_TARG_PKT - overlay_targ_pkt_t
+ *
+ * This ioctl can be used to copy data from a given request into a
+ * user buffer. This can be used in combination with
+ * OVERLAY_TARG_INJECT to implemnt services such as a proxy-arp.
+ *
+ *
+ * OVERLAY_TARG_RESEND - overlay_targ_pkt_t
+ *
+ * This ioctl is similar to the OVERLAY_TARG_INJECT, except instead
+ * of receiving it on the local mac handle, it queues it for
+ * retransmission again. This is useful if you have a packet that
+ * was originally destined for some broadcast or multicast address
+ * that you now want to send to a unicast address.
+ */
+#define OVERLAY_TARG_LOOKUP (OVERLAY_TARG_IOCTL | 0x10)
+#define OVERLAY_TARG_RESPOND (OVERLAY_TARG_IOCTL | 0x11)
+#define OVERLAY_TARG_DROP (OVERLAY_TARG_IOCTL | 0x12)
+#define OVERLAY_TARG_INJECT (OVERLAY_TARG_IOCTL | 0x13)
+#define OVERLAY_TARG_PKT (OVERLAY_TARG_IOCTL | 0x14)
+#define OVERLAY_TARG_RESEND (OVERLAY_TARG_IOCTL | 0x15)
+
+typedef struct overlay_targ_lookup {
+ uint64_t otl_dlid;
+ uint64_t otl_reqid;
+ uint64_t otl_varpdid;
+ uint64_t otl_vnetid;
+ uint64_t otl_hdrsize;
+ uint64_t otl_pktsize;
+ uint8_t otl_srcaddr[ETHERADDRL];
+ uint8_t otl_dstaddr[ETHERADDRL];
+ uint32_t otl_dsttype;
+ uint32_t otl_sap;
+ int32_t otl_vlan;
+} overlay_targ_lookup_t;
+
+typedef struct overlay_targ_resp {
+ uint64_t otr_reqid;
+ overlay_target_point_t otr_answer;
+} overlay_targ_resp_t;
+
+typedef struct overlay_targ_pkt {
+ uint64_t otp_linkid;
+ uint64_t otp_reqid;
+ uint64_t otp_size;
+ void *otp_buf;
+} overlay_targ_pkt_t;
+
+#ifdef _KERNEL
+
+typedef struct overlay_targ_pkt32 {
+ uint64_t otp_linkid;
+ uint64_t otp_reqid;
+ uint64_t otp_size;
+ caddr32_t otp_buf;
+} overlay_targ_pkt32_t;
+
+#endif /* _KERNEL */
+
+/*
+ * This provides a way to get a list of active overlay devices independently
+ * from dlmgmtd. At the end of the day the kernel always knows what will exist
+ * and this allows varpd which is an implementation of libdladm not to end up
+ * needing to call back into dlmgmtd via libdladm and create an unfortunate
+ * dependency cycle.
+ */
+
+#define OVERLAY_TARG_LIST (OVERLAY_TARG_IOCTL | 0x20)
+
+typedef struct overlay_targ_list {
+ uint32_t otl_nents;
+ uint32_t otl_ents[];
+} overlay_targ_list_t;
+
+/*
+ * The following family of ioctls all manipulate the target cache of a given
+ * device.
+ *
+ * OVERLAY_TARG_CACHE_GET - overlay_targ_cache_t
+ *
+ * The overlay_targ_cache_t should be have its link identifier and
+ * the desired mac address filled in. On return, it will fill in
+ * the otc_dest member, if the entry exists in the table.
+ *
+ *
+ * OVERLAY_TARG_CACHE_SET - overlay_targ_cache_t
+ *
+ * The cache table entry of the mac address referred to by otc_mac
+ * and otd_linkid will be filled in with the details provided by in
+ * the otc_dest member.
+ *
+ * OVERLAY_TARG_CACHE_REMOVE - overlay_targ_cache_t
+ *
+ * Removes the cache entry identified by otc_mac from the table.
+ * Note that this does not stop any in-flight lookups or deal with
+ * any data that is awaiting a lookup.
+ *
+ *
+ * OVERLAY_TARG_CACHE_FLUSH - overlay_targ_cache_t
+ *
+ * Similar to OVERLAY_TARG_CACHE_REMOVE, but functions on the
+ * entire table identified by otc_linkid. All other parameters are
+ * ignored.
+ *
+ *
+ * OVERLAY_TARG_CACHE_ITER - overlay_targ_cache_iter_t
+ *
+ * Iterates over the contents of a target cache identified by
+ * otci_linkid. Iteration is guaranteed to be exactly once for
+ * items which are in the hashtable at the beginning and end of
+ * iteration. For items which are added or removed after iteration
+ * has begun, only at most once semantics are guaranteed. Consumers
+ * should ensure that otci_marker is zeroed before starting
+ * iteration and should preserve its contents across calls.
+ *
+ * Before calling in, otci_count should be set to the number of
+ * entries that space has been allocated for in otci_ents. The
+ * value will be updated to indicate the total number written out.
+ */
+
+#define OVERLAY_TARG_CACHE_GET (OVERLAY_TARG_IOCTL | 0x30)
+#define OVERLAY_TARG_CACHE_SET (OVERLAY_TARG_IOCTL | 0x31)
+#define OVERLAY_TARG_CACHE_REMOVE (OVERLAY_TARG_IOCTL | 0x32)
+#define OVERLAY_TARG_CACHE_FLUSH (OVERLAY_TARG_IOCTL | 0x33)
+#define OVERLAY_TARG_CACHE_ITER (OVERLAY_TARG_IOCTL | 0x34)
+
+/*
+ * This is a pretty arbitrary number that we're constraining ourselves to
+ * for iteration. Basically the goal is to make sure that we can't have a user
+ * ask us to allocate too much memory on their behalf at any time. A more
+ * dynamic form may be necessary some day.
+ */
+#define OVERLAY_TARGET_ITER_MAX 500
+
+#define OVERLAY_TARGET_CACHE_DROP 0x01
+
+typedef struct overlay_targ_cache_entry {
+ uint8_t otce_mac[ETHERADDRL];
+ uint16_t otce_flags;
+ overlay_target_point_t otce_dest;
+} overlay_targ_cache_entry_t;
+
+typedef struct overlay_targ_cache {
+ datalink_id_t otc_linkid;
+ overlay_targ_cache_entry_t otc_entry;
+} overlay_targ_cache_t;
+
+typedef struct overlay_targ_cache_iter {
+ datalink_id_t otci_linkid;
+ uint32_t otci_pad;
+ uint64_t otci_marker;
+ uint16_t otci_count;
+ overlay_targ_cache_entry_t otci_ents[];
+} overlay_targ_cache_iter_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _OVERLAY_TARGET_H */
diff --git a/usr/src/uts/common/sys/param.h b/usr/src/uts/common/sys/param.h
index c3a1b9a97b..ea2fdfd886 100644
--- a/usr/src/uts/common/sys/param.h
+++ b/usr/src/uts/common/sys/param.h
@@ -104,7 +104,7 @@ extern "C" {
#define DEFAULT_MAXPID 999999
#define DEFAULT_JUMPPID 100000
#else
-#define DEFAULT_MAXPID 30000
+#define DEFAULT_MAXPID 99999
#define DEFAULT_JUMPPID 0
#endif
diff --git a/usr/src/uts/common/sys/policy.h b/usr/src/uts/common/sys/policy.h
index 5f5b66d437..5328d02c59 100644
--- a/usr/src/uts/common/sys/policy.h
+++ b/usr/src/uts/common/sys/policy.h
@@ -108,6 +108,7 @@ int secpolicy_ipc_owner(const cred_t *, const struct kipc_perm *);
int secpolicy_kmdb(const cred_t *);
int secpolicy_lock_memory(const cred_t *);
int secpolicy_meminfo(const cred_t *);
+int secpolicy_fs_import(const cred_t *);
int secpolicy_modctl(const cred_t *, int);
int secpolicy_net(const cred_t *, int, boolean_t);
int secpolicy_net_bindmlp(const cred_t *);
@@ -174,6 +175,7 @@ int secpolicy_setid_setsticky_clear(vnode_t *, vattr_t *,
const vattr_t *, cred_t *);
int secpolicy_xvattr(xvattr_t *, uid_t, cred_t *, vtype_t);
int secpolicy_xvm_control(const cred_t *);
+int secpolicy_hyprlofs_control(const cred_t *);
int secpolicy_basic_exec(const cred_t *, vnode_t *);
int secpolicy_basic_fork(const cred_t *);
diff --git a/usr/src/uts/common/sys/proc.h b/usr/src/uts/common/sys/proc.h
index 5abf8fd3cd..ff4a1abce4 100644
--- a/usr/src/uts/common/sys/proc.h
+++ b/usr/src/uts/common/sys/proc.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -348,7 +349,9 @@ typedef struct proc {
struct zone *p_zone; /* zone in which process lives */
struct vnode *p_execdir; /* directory that p_exec came from */
struct brand *p_brand; /* process's brand */
- void *p_brand_data; /* per-process brand state */
+
+ /* per-process brand state */
+ void *p_brand_data;
/* additional lock to protect p_sessp (but not its contents) */
kmutex_t p_splock;
@@ -363,7 +366,6 @@ typedef struct proc {
*/
struct user p_user; /* (see sys/user.h) */
} proc_t;
-
#define PROC_T /* headers relying on proc_t are OK */
#ifdef _KERNEL
@@ -629,6 +631,7 @@ extern int signal_is_blocked(kthread_t *, int);
extern int sigcheck(proc_t *, kthread_t *);
extern void sigdefault(proc_t *);
+extern struct pid *pid_find(pid_t pid);
extern void pid_setmin(void);
extern pid_t pid_allocate(proc_t *, pid_t, int);
extern int pid_rele(struct pid *);
@@ -644,6 +647,7 @@ extern int sprtrylock_proc(proc_t *);
extern void sprwaitlock_proc(proc_t *);
extern void sprlock_proc(proc_t *);
extern void sprunlock(proc_t *);
+extern void sprunprlock(proc_t *);
extern void pid_init(void);
extern proc_t *pid_entry(int);
extern int pid_slot(proc_t *);
@@ -718,6 +722,10 @@ extern kthread_t *thread_unpin(void);
extern void thread_init(void);
extern void thread_load(kthread_t *, void (*)(), caddr_t, size_t);
+extern void thread_splitstack(void (*)(void *), void *, size_t);
+extern void thread_splitstack_run(caddr_t, void (*)(void *), void *);
+extern void thread_splitstack_cleanup(void);
+
extern void tsd_create(uint_t *, void (*)(void *));
extern void tsd_destroy(uint_t *);
extern void *tsd_getcreate(uint_t *, void (*)(void *), void *(*)(void));
@@ -759,7 +767,7 @@ extern void pokelwps(proc_t *);
extern void continuelwps(proc_t *);
extern int exitlwps(int);
extern void lwp_ctmpl_copy(klwp_t *, klwp_t *);
-extern void lwp_ctmpl_clear(klwp_t *);
+extern void lwp_ctmpl_clear(klwp_t *, boolean_t);
extern klwp_t *forklwp(klwp_t *, proc_t *, id_t);
extern void lwp_load(klwp_t *, gregset_t, uintptr_t);
extern void lwp_setrval(klwp_t *, int, int);
diff --git a/usr/src/uts/common/sys/procfs.h b/usr/src/uts/common/sys/procfs.h
index f592fd9dcf..501af712ef 100644
--- a/usr/src/uts/common/sys/procfs.h
+++ b/usr/src/uts/common/sys/procfs.h
@@ -25,6 +25,7 @@
*/
/*
* Copyright 2012 DEY Storage Systems, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#ifndef _SYS_PROCFS_H
@@ -233,6 +234,7 @@ typedef struct pstatus {
#define PR_FAULTED 6
#define PR_SUSPENDED 7
#define PR_CHECKPOINT 8
+#define PR_BRAND 9
/*
* lwp ps(1) information file. /proc/<pid>/lwp/<lwpid>/lwpsinfo
diff --git a/usr/src/uts/common/sys/ptms.h b/usr/src/uts/common/sys/ptms.h
index 6c79ee266d..ba8b2b1210 100644
--- a/usr/src/uts/common/sys/ptms.h
+++ b/usr/src/uts/common/sys/ptms.h
@@ -126,6 +126,12 @@ extern void ptms_logp(char *, uintptr_t);
#define DDBGP(a, b)
#endif
+typedef struct __ptmptsopencb_arg *ptmptsopencb_arg_t;
+typedef struct ptmptsopencb {
+ boolean_t (*ppocb_func)(ptmptsopencb_arg_t);
+ ptmptsopencb_arg_t ppocb_arg;
+} ptmptsopencb_t;
+
#endif /* _KERNEL */
typedef struct pt_own {
@@ -157,6 +163,19 @@ typedef struct pt_own {
#define ZONEPT (('P'<<8)|4) /* set zone of master/slave pair */
#define OWNERPT (('P'<<8)|5) /* set owner/group for slave device */
+#ifdef _KERNEL
+/*
+ * kernel ioctl commands
+ *
+ * PTMPTSOPENCB: Returns a callback function pointer and opaque argument.
+ * The return value of the callback function when it's invoked
+ * with the opaque argument passed to it will indicate if the
+ * pts slave device is currently open.
+ */
+#define PTMPTSOPENCB (('P'<<8)|6) /* check if the slave is open */
+
+#endif /* _KERNEL */
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/sys/scsi/adapters/mpt_sas/mptsas_hash.h b/usr/src/uts/common/sys/refhash.h
index 2069e6d3f1..b7427a454d 100644
--- a/usr/src/uts/common/sys/scsi/adapters/mpt_sas/mptsas_hash.h
+++ b/usr/src/uts/common/sys/refhash.h
@@ -10,11 +10,11 @@
*/
/*
- * Copyright 2014 Joyent, Inc. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
-#ifndef _SYS_SCSI_ADAPTERS_MPTHASH_H
-#define _SYS_SCSI_ADAPTERS_MPTHASH_H
+#ifndef _SYS_REFHASH_H
+#define _SYS_REFHASH_H
#include <sys/types.h>
#include <sys/list.h>
@@ -58,4 +58,4 @@ extern void *refhash_first(refhash_t *);
extern void *refhash_next(refhash_t *, void *);
extern boolean_t refhash_obj_valid(refhash_t *hp, const void *);
-#endif /* _SYS_SCSI_ADAPTERS_MPTHASH_H */
+#endif /* _SYS_REFHASH_H */
diff --git a/usr/src/uts/common/sys/resource.h b/usr/src/uts/common/sys/resource.h
index 2d3800b946..4b70a77db8 100644
--- a/usr/src/uts/common/sys/resource.h
+++ b/usr/src/uts/common/sys/resource.h
@@ -23,6 +23,7 @@
*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -191,6 +192,7 @@ struct rusage {
#define _RUSAGESYS_GETRUSAGE_CHLD 1 /* rusage child process */
#define _RUSAGESYS_GETRUSAGE_LWP 2 /* rusage lwp */
#define _RUSAGESYS_GETVMUSAGE 3 /* getvmusage */
+#define _RUSAGESYS_INVALMAP 4 /* vm_map_inval */
#if defined(_SYSCALL32)
diff --git a/usr/src/uts/common/sys/rt.h b/usr/src/uts/common/sys/rt.h
index ca52f8d995..82cc08d326 100644
--- a/usr/src/uts/common/sys/rt.h
+++ b/usr/src/uts/common/sys/rt.h
@@ -22,6 +22,7 @@
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -31,8 +32,6 @@
#ifndef _SYS_RT_H
#define _SYS_RT_H
-#pragma ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.4 */
-
#include <sys/types.h>
#include <sys/thread.h>
@@ -77,6 +76,16 @@ typedef struct rtkparms {
int rt_tqsig; /* real-time time quantum signal */
uint_t rt_cflags; /* real-time control flags */
} rtkparms_t;
+
+#define RTGPPRIO0 100 /* Global priority for RT priority 0 */
+
+/*
+ * control flags (kparms->rt_cflags).
+ */
+#define RT_DOPRI 0x01 /* change priority */
+#define RT_DOTQ 0x02 /* change RT time quantum */
+#define RT_DOSIG 0x04 /* change RT time quantum signal */
+
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/usr/src/uts/common/sys/scsi/adapters/mpt_sas/mptsas_var.h b/usr/src/uts/common/sys/scsi/adapters/mpt_sas/mptsas_var.h
index 3983188fce..02116b45c4 100644
--- a/usr/src/uts/common/sys/scsi/adapters/mpt_sas/mptsas_var.h
+++ b/usr/src/uts/common/sys/scsi/adapters/mpt_sas/mptsas_var.h
@@ -22,7 +22,7 @@
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
* Copyright (c) 2014, Tegile Systems Inc. All rights reserved.
*/
@@ -58,10 +58,10 @@
#include <sys/byteorder.h>
#include <sys/queue.h>
+#include <sys/refhash.h>
#include <sys/isa_defs.h>
#include <sys/sunmdi.h>
#include <sys/mdi_impldefs.h>
-#include <sys/scsi/adapters/mpt_sas/mptsas_hash.h>
#include <sys/scsi/adapters/mpt_sas/mptsas_ioctl.h>
#include <sys/scsi/adapters/mpt_sas/mpi/mpi2_tool.h>
#include <sys/scsi/adapters/mpt_sas/mpi/mpi2_cnfg.h>
diff --git a/usr/src/uts/common/sys/shm.h b/usr/src/uts/common/sys/shm.h
index e3bd2a77d3..030379488f 100644
--- a/usr/src/uts/common/sys/shm.h
+++ b/usr/src/uts/common/sys/shm.h
@@ -21,6 +21,7 @@
*/
/*
* Copyright 2014 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2016 Joyent, Inc.
*
* Copyright 2003 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
@@ -120,6 +121,10 @@ struct shmid_ds {
#define SHM_LOCK 3 /* Lock segment in core */
#define SHM_UNLOCK 4 /* Unlock segment */
+#if defined(_KERNEL)
+#define SHM_RMID 5 /* Private RMID for lx support */
+#endif
+
#if !defined(_KERNEL)
int shmget(key_t, size_t, int);
int shmids(int *, uint_t, uint_t *);
diff --git a/usr/src/uts/common/sys/shm_impl.h b/usr/src/uts/common/sys/shm_impl.h
index 4d8cdcede5..1eae2ca0a4 100644
--- a/usr/src/uts/common/sys/shm_impl.h
+++ b/usr/src/uts/common/sys/shm_impl.h
@@ -21,13 +21,12 @@
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
*/
#ifndef _SYS_SHM_IMPL_H
#define _SYS_SHM_IMPL_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/ipc_impl.h>
#if defined(_KERNEL) || defined(_KMEMUSER)
#include <sys/shm.h>
@@ -70,7 +69,11 @@ typedef struct kshmid {
time_t shm_ctime; /* last change time */
struct sptinfo *shm_sptinfo; /* info about ISM segment */
struct seg *shm_sptseg; /* pointer to ISM segment */
- long shm_sptprot; /* was reserved (still a "long") */
+ ulong_t shm_opts;
+ /*
+ * Composed of: sptprot (uchar_t) and
+ * RM_PENDING flag (1 bit).
+ */
} kshmid_t;
/*
@@ -78,6 +81,14 @@ typedef struct kshmid {
*/
#define SHMSA_ISM 1 /* uses shared page table */
+/*
+ * shm_opts definitions
+ * Low byte in shm_opts is used for sptprot (see PROT_ALL). The upper bits are
+ * used for additional options.
+ */
+#define SHM_PROT_MASK 0xff
+#define SHM_RM_PENDING 0x100
+
typedef struct sptinfo {
struct as *sptas; /* dummy as ptr. for spt segment */
} sptinfo_t;
diff --git a/usr/src/uts/common/sys/signal.h b/usr/src/uts/common/sys/signal.h
index 8f0e1794f4..139784d578 100644
--- a/usr/src/uts/common/sys/signal.h
+++ b/usr/src/uts/common/sys/signal.h
@@ -22,6 +22,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015, Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -158,8 +159,8 @@ struct sigaction32 {
* use of these symbols by applications is injurious
* to binary compatibility
*/
-#define NSIG 74 /* valid signals range from 1 to NSIG-1 */
-#define MAXSIG 73 /* size of u_signal[], NSIG-1 <= MAXSIG */
+#define NSIG 75 /* valid signals range from 1 to NSIG-1 */
+#define MAXSIG 74 /* size of u_signal[], NSIG-1 <= MAXSIG */
#endif /* defined(__EXTENSIONS__) || !defined(_XPG4_2) */
#define MINSIGSTKSZ 2048
diff --git a/usr/src/uts/common/sys/signalfd.h b/usr/src/uts/common/sys/signalfd.h
index 2661d5a05f..89d0647020 100644
--- a/usr/src/uts/common/sys/signalfd.h
+++ b/usr/src/uts/common/sys/signalfd.h
@@ -10,7 +10,7 @@
*/
/*
- * Copyright 2015 Joyent, Inc.
+ * Copyright 2016 Joyent, Inc.
*/
/*
@@ -75,13 +75,9 @@ extern int signalfd(int, const sigset_t *, int);
#define SIGNALFDMNRN_SIGNALFD 0
#define SIGNALFDMNRN_CLONE 1
-typedef struct sigfd_wake_list {
- list_node_t sigfd_wl_lst;
- void *sigfd_wl_state;
-} sigfd_wake_list_t;
-
/*
* This holds the proc_t state for a process which is using signalfd.
+ * Its presence and contents are protected by p_lock.
*/
typedef struct sigfd_proc_state {
void (*sigfd_pollwake_cb)(void *, int);
diff --git a/usr/src/uts/common/sys/socket.h b/usr/src/uts/common/sys/socket.h
index da8e3ab351..e55cd165aa 100644
--- a/usr/src/uts/common/sys/socket.h
+++ b/usr/src/uts/common/sys/socket.h
@@ -22,6 +22,7 @@
* Copyright 2014 Garrett D'Amore <garrett@damore.org>
*
* Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
@@ -39,6 +40,9 @@
/* Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. */
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
#ifndef _SYS_SOCKET_H
#define _SYS_SOCKET_H
@@ -194,6 +198,7 @@ struct so_snd_bufinfo {
#define SO_SRCADDR 0x2001 /* Internal: AF_UNIX source address */
#define SO_FILEP 0x2002 /* Internal: AF_UNIX file pointer */
#define SO_UNIX_CLOSE 0x2003 /* Internal: AF_UNIX peer closed */
+#define SO_REUSEPORT 0x2004 /* allow simultaneous port reuse */
#endif /* _KERNEL */
/*
@@ -293,8 +298,9 @@ struct linger {
#define AF_INET_OFFLOAD 30 /* Sun private; do not use */
#define AF_TRILL 31 /* TRILL interface */
#define AF_PACKET 32 /* PF_PACKET Linux socket interface */
+#define AF_LX_NETLINK 33 /* Linux-compatible netlink */
-#define AF_MAX 32
+#define AF_MAX 33
/*
* Protocol families, same as address families for now.
@@ -334,6 +340,7 @@ struct linger {
#define PF_INET_OFFLOAD AF_INET_OFFLOAD /* Sun private; do not use */
#define PF_TRILL AF_TRILL
#define PF_PACKET AF_PACKET
+#define PF_LX_NETLINK AF_LX_NETLINK
#define PF_MAX AF_MAX
@@ -420,6 +427,7 @@ struct msghdr32 {
#define MSG_NOTIFICATION 0x100 /* Notification, not data */
#define MSG_XPG4_2 0x8000 /* Private: XPG4.2 flag */
+/* Obsolete but kept for compilation compatability. Use IOV_MAX. */
#define MSG_MAXIOVLEN 16
#ifdef _KERNEL
diff --git a/usr/src/uts/common/sys/socketvar.h b/usr/src/uts/common/sys/socketvar.h
index 52fa3a5822..da61975904 100644
--- a/usr/src/uts/common/sys/socketvar.h
+++ b/usr/src/uts/common/sys/socketvar.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
@@ -102,6 +103,7 @@ struct sockaddr_ux {
typedef struct sonodeops sonodeops_t;
typedef struct sonode sonode_t;
+typedef boolean_t (*so_krecv_f)(sonode_t *, mblk_t *, size_t, int, void *);
struct sodirect_s;
@@ -244,6 +246,10 @@ struct sonode {
struct sof_instance *so_filter_top; /* top of stack */
struct sof_instance *so_filter_bottom; /* bottom of stack */
clock_t so_filter_defertime; /* time when deferred */
+
+ /* Kernel direct receive callbacks */
+ so_krecv_f so_krecv_cb; /* recv callback */
+ void *so_krecv_arg; /* recv cb arg */
};
#define SO_HAVE_DATA(so) \
@@ -297,15 +303,16 @@ struct sonode {
#define SS_OOBPEND 0x00002000 /* OOB pending or present - poll */
#define SS_HAVEOOBDATA 0x00004000 /* OOB data present */
#define SS_HADOOBDATA 0x00008000 /* OOB data consumed */
-#define SS_CLOSING 0x00010000 /* in process of closing */
+#define SS_CLOSING 0x00010000 /* in process of closing */
#define SS_FIL_DEFER 0x00020000 /* filter deferred notification */
#define SS_FILOP_OK 0x00040000 /* socket can attach filters */
#define SS_FIL_RCV_FLOWCTRL 0x00080000 /* filter asserted rcv flow ctrl */
+
#define SS_FIL_SND_FLOWCTRL 0x00100000 /* filter asserted snd flow ctrl */
#define SS_FIL_STOP 0x00200000 /* no more filter actions */
-
#define SS_SODIRECT 0x00400000 /* transport supports sodirect */
+#define SS_FILOP_UNSF 0x00800000 /* block attaching unsafe filters */
#define SS_SENTLASTREADSIG 0x01000000 /* last rx signal has been sent */
#define SS_SENTLASTWRITESIG 0x02000000 /* last tx signal has been sent */
@@ -321,7 +328,8 @@ struct sonode {
/*
* Sockets that can fall back to TPI must ensure that fall back is not
- * initiated while a thread is using a socket.
+ * initiated while a thread is using a socket. Otherwise this disables all
+ * future filter attachment.
*/
#define SO_BLOCK_FALLBACK(so, fn) \
ASSERT(MUTEX_NOT_HELD(&(so)->so_lock)); \
@@ -337,6 +345,24 @@ struct sonode {
} \
}
+/*
+ * Sockets that can fall back to TPI must ensure that fall back is not
+ * initiated while a thread is using a socket. Otherwise this disables all
+ * future unsafe filter attachment. Safe filters can still attach after
+ * we execute the function in which this macro is used.
+ */
+#define SO_BLOCK_FALLBACK_SAFE(so, fn) \
+ ASSERT(MUTEX_NOT_HELD(&(so)->so_lock)); \
+ rw_enter(&(so)->so_fallback_rwlock, RW_READER); \
+ if ((so)->so_state & SS_FALLBACK_COMP) { \
+ rw_exit(&(so)->so_fallback_rwlock); \
+ return (fn); \
+ } else if (((so)->so_state & SS_FILOP_UNSF) == 0) { \
+ mutex_enter(&(so)->so_lock); \
+ (so)->so_state |= SS_FILOP_UNSF; \
+ mutex_exit(&(so)->so_lock); \
+ }
+
#define SO_UNBLOCK_FALLBACK(so) { \
rw_exit(&(so)->so_fallback_rwlock); \
}
@@ -368,6 +394,7 @@ struct sonode {
/* The modes below are only for non-streams sockets */
#define SM_ACCEPTSUPP 0x400 /* can handle accept() */
#define SM_SENDFILESUPP 0x800 /* Private: proto supp sendfile */
+#define SM_DEFERERR 0x1000 /* Private: defer so_error delivery */
/*
* Socket versions. Used by the socket library when calling _so_socket().
@@ -946,6 +973,15 @@ extern struct sonode *socreate(struct sockparams *, int, int, int, int,
extern int so_copyin(const void *, void *, size_t, int);
extern int so_copyout(const void *, void *, size_t, int);
+/*
+ * Functions to manipulate the use of direct receive callbacks. This should not
+ * be used outside of sockfs and ksocket. These are generally considered a use
+ * once interface for a socket and will cause all outstanding data on the socket
+ * to be flushed.
+ */
+extern int so_krecv_set(sonode_t *, so_krecv_f, void *);
+extern void so_krecv_unblock(sonode_t *);
+
#endif
/*
diff --git a/usr/src/uts/common/sys/sockfilter.h b/usr/src/uts/common/sys/sockfilter.h
index 9f6d8b499b..c4dd6539de 100644
--- a/usr/src/uts/common/sys/sockfilter.h
+++ b/usr/src/uts/common/sys/sockfilter.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
#ifndef _SYS_SOCKFILTER_H
@@ -129,6 +130,15 @@ typedef struct sof_ops {
#define SOF_VERSION 1
+/*
+ * Flag indicating that the filter module is safe to attach after bind,
+ * getsockname, getsockopt or setsockopt calls. By default filters are unsafe
+ * so may not be attached after any socket operation. However, a safe filter
+ * can still be attached after one of the above calls. This makes attaching
+ * the filter less dependent on the initial socket setup order.
+ */
+#define SOF_ATT_SAFE 0x1
+
extern int sof_register(int, const char *, const sof_ops_t *, int);
extern int sof_unregister(const char *);
diff --git a/usr/src/uts/common/sys/squeue.h b/usr/src/uts/common/sys/squeue.h
index f1bd429815..35e1cf64c7 100644
--- a/usr/src/uts/common/sys/squeue.h
+++ b/usr/src/uts/common/sys/squeue.h
@@ -29,6 +29,17 @@
extern "C" {
#endif
+/*
+ * Originally in illumos, we had an IP-centric view of the serialization queue
+ * abstraction. While that has useful properties, the implementation of squeues
+ * hardcodes various parts of the implementation of IP into it which makes it
+ * unsuitable for other consumers. To enable them, we created another interface,
+ * but opted not to port all of the functionality that IP uses in the form of
+ * ip_squeue.c As other consumers need the functionality that IP has in squeues,
+ * then we'll come up with more genericized methods and add that functionality
+ * to <sys/gsqueue.h>. Please do not continue to use this header.
+ */
+
#include <sys/types.h>
#include <sys/processor.h>
#include <sys/stream.h>
@@ -76,12 +87,13 @@ typedef enum {
struct ip_recv_attr_s;
extern void squeue_init(void);
-extern squeue_t *squeue_create(clock_t, pri_t);
+extern squeue_t *squeue_create(clock_t, pri_t, boolean_t);
extern void squeue_bind(squeue_t *, processorid_t);
extern void squeue_unbind(squeue_t *);
extern void squeue_enter(squeue_t *, mblk_t *, mblk_t *,
uint32_t, struct ip_recv_attr_s *, int, uint8_t);
extern uintptr_t *squeue_getprivate(squeue_t *, sqprivate_t);
+extern void squeue_destroy(squeue_t *);
struct conn_s;
extern int squeue_synch_enter(struct conn_s *, mblk_t *);
diff --git a/usr/src/uts/common/sys/squeue_impl.h b/usr/src/uts/common/sys/squeue_impl.h
index 22550886eb..d2418bbc15 100644
--- a/usr/src/uts/common/sys/squeue_impl.h
+++ b/usr/src/uts/common/sys/squeue_impl.h
@@ -117,6 +117,7 @@ struct squeue_s {
squeue_set_t *sq_set; /* managed by squeue creator */
pri_t sq_priority; /* squeue thread priority */
+ boolean_t sq_isip; /* use IP-centric features */
/* Keep the debug-only fields at the end of the structure */
#ifdef DEBUG
@@ -165,6 +166,7 @@ struct squeue_s {
#define SQS_POLL_RESTART_DONE 0x01000000
#define SQS_POLL_THR_QUIESCE 0x02000000
#define SQS_PAUSE 0x04000000 /* The squeue has been paused */
+#define SQS_EXIT 0x08000000 /* squeue is being torn down */
#define SQS_WORKER_THR_CONTROL \
(SQS_POLL_QUIESCE | SQS_POLL_RESTART | SQS_POLL_CLEANUP)
diff --git a/usr/src/uts/common/sys/stream.h b/usr/src/uts/common/sys/stream.h
index a04019a9ce..28289649dd 100644
--- a/usr/src/uts/common/sys/stream.h
+++ b/usr/src/uts/common/sys/stream.h
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -628,16 +629,11 @@ struct stroptions {
/*
* Structure for rw (read/write) procedure calls. A pointer
* to a struiod_t is passed as a parameter to the rwnext() call.
- *
- * Note: DEF_IOV_MAX is defined and used as it is in "fs/vncalls.c"
- * as there isn't a formal definition of IOV_MAX ???
*/
-#define DEF_IOV_MAX 16
-
typedef struct struiod {
mblk_t *d_mp; /* pointer to mblk (chain) */
uio_t d_uio; /* uio info */
- iovec_t d_iov[DEF_IOV_MAX]; /* iov referenced by uio */
+ iovec_t *d_iov; /* iov referenced by uio */
} struiod_t;
/*
diff --git a/usr/src/uts/common/sys/sysevent.h b/usr/src/uts/common/sys/sysevent.h
index 46a800e62b..255e98b871 100644
--- a/usr/src/uts/common/sys/sysevent.h
+++ b/usr/src/uts/common/sys/sysevent.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
#ifndef _SYS_SYSEVENT_H
@@ -67,10 +68,12 @@ extern "C" {
#define SE_KERN_PID 0
#define SUNW_VENDOR "SUNW"
+#define ILLUMOS_VENDOR "ILLUMOS"
#define SE_USR_PUB "usr:"
#define SE_KERN_PUB "kern:"
#define SUNW_KERN_PUB SUNW_VENDOR":"SE_KERN_PUB
#define SUNW_USR_PUB SUNW_VENDOR":"SE_USR_PUB
+#define ILLUMOS_KERN_PUB ILLUMOS_VENDOR":"SE_KERN_PUB
/*
* Event header and attribute value limits
diff --git a/usr/src/uts/common/sys/sysevent/datalink.h b/usr/src/uts/common/sys/sysevent/datalink.h
new file mode 100644
index 0000000000..592ef5bdde
--- /dev/null
+++ b/usr/src/uts/common/sys/sysevent/datalink.h
@@ -0,0 +1,54 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _SYS_SYSEVENT_DATALINK_H
+#define _SYS_SYSEVENT_DATALINK_H
+
+/*
+ * Datalink System Event payloads
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Event schema for EC_DATALINK_LINK_STATE
+ *
+ * Event Class - EC_DATALINK
+ * Event Sub-Class - EC_DATALINK_LINK_STATE
+ *
+ * Attribute Name - DATALINK_EV_LINK_NAME
+ * Attribute Type - SE_DATA_TYPE_STRING
+ * Attribute Value - [Name of the datalink]
+ *
+ * Attribute Name - DATALINK_EV_LINK_ID
+ * Attribute Type - SE_DATA_TYPE_INT32
+ * Attribute Value - [datalink_id_t for the device]
+ *
+ * Attribute Name - DATALINK_EV_ZONE_ID
+ * Attribute Type - SE_DATA_TYPE_INT32
+ * Attribute Value - [zoneid_t of the zone the datalink is in]
+ */
+
+#define DATALINK_EV_LINK_NAME "link"
+#define DATALINK_EV_LINK_ID "linkid"
+#define DATALINK_EV_ZONE_ID "zone"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_SYSEVENT_DATALINK_H */
diff --git a/usr/src/uts/common/sys/sysevent/eventdefs.h b/usr/src/uts/common/sys/sysevent/eventdefs.h
index 25401cec53..0a78d0310b 100644
--- a/usr/src/uts/common/sys/sysevent/eventdefs.h
+++ b/usr/src/uts/common/sys/sysevent/eventdefs.h
@@ -267,9 +267,11 @@ extern "C" {
#define ESC_ZFS_POOL_REGUID "ESC_ZFS_pool_reguid"
/*
- * datalink subclass definitions.
+ * datalink subclass definitions. Supporting attributes for datalink state found
+ * in sys/sysevent/datalink.h.
*/
#define ESC_DATALINK_PHYS_ADD "ESC_datalink_phys_add" /* new physical link */
+#define ESC_DATALINK_LINK_STATE "ESC_datalink_link_state" /* link state */
/*
* VRRP subclass definitions. Supporting attributes (name/value paris) are
diff --git a/usr/src/uts/common/sys/systrace.h b/usr/src/uts/common/sys/systrace.h
index d43974451e..17e509d4d8 100644
--- a/usr/src/uts/common/sys/systrace.h
+++ b/usr/src/uts/common/sys/systrace.h
@@ -22,13 +22,12 @@
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_SYSTRACE_H
#define _SYS_SYSTRACE_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/dtrace.h>
#ifdef __cplusplus
@@ -47,16 +46,18 @@ extern systrace_sysent_t *systrace_sysent;
extern systrace_sysent_t *systrace_sysent32;
extern void (*systrace_probe)(dtrace_id_t, uintptr_t, uintptr_t,
- uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+ uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
extern void systrace_stub(dtrace_id_t, uintptr_t, uintptr_t,
- uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+ uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
extern int64_t dtrace_systrace_syscall(uintptr_t arg0, uintptr_t arg1,
- uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5);
+ uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5,
+ uintptr_t arg6, uintptr_t arg7);
#ifdef _SYSCALL32_IMPL
extern int64_t dtrace_systrace_syscall32(uintptr_t arg0, uintptr_t arg1,
- uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5);
+ uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5,
+ uintptr_t arg6, uintptr_t arg7);
#endif
#endif
diff --git a/usr/src/uts/common/sys/termios.h b/usr/src/uts/common/sys/termios.h
index 09be20858d..889a7096cd 100644
--- a/usr/src/uts/common/sys/termios.h
+++ b/usr/src/uts/common/sys/termios.h
@@ -361,6 +361,24 @@ extern pid_t tcgetsid(int);
#define TCSETSF (_TIOC|16)
/*
+ * linux terminal ioctls we need to be aware of
+ */
+#define TIOCSETLD (_TIOC|123) /* set line discipline parms */
+#define TIOCGETLD (_TIOC|124) /* get line discipline parms */
+
+/*
+ * The VMIN and VTIME and solaris overlap with VEOF and VEOL - This is
+ * perfectly legal except, linux expects them to be separate. So we keep
+ * them separately.
+ */
+struct lx_cc {
+ unsigned char veof; /* veof value */
+ unsigned char veol; /* veol value */
+ unsigned char vmin; /* vmin value */
+ unsigned char vtime; /* vtime value */
+};
+
+/*
* NTP PPS ioctls
*/
#define TIOCGPPS (_TIOC|125)
diff --git a/usr/src/uts/common/sys/thread.h b/usr/src/uts/common/sys/thread.h
index d917944edf..6a1c36f2e7 100644
--- a/usr/src/uts/common/sys/thread.h
+++ b/usr/src/uts/common/sys/thread.h
@@ -24,6 +24,10 @@
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
+ */
+
#ifndef _SYS_THREAD_H
#define _SYS_THREAD_H
@@ -68,6 +72,8 @@ typedef struct ctxop {
void (*free_op)(void *, int); /* function which frees the context */
void *arg; /* argument to above functions, ctx pointer */
struct ctxop *next; /* next context ops */
+ hrtime_t save_ts; /* timestamp of last save */
+ hrtime_t restore_ts; /* timestamp of last restore */
} ctxop_t;
/*
@@ -366,7 +372,7 @@ typedef struct _kthread {
#define T_WOULDBLOCK 0x0020 /* for lockfs */
#define T_DONTBLOCK 0x0040 /* for lockfs */
#define T_DONTPEND 0x0080 /* for lockfs */
-#define T_SYS_PROF 0x0100 /* profiling on for duration of system call */
+#define T_SPLITSTK 0x0100 /* kernel stack is currently split */
#define T_WAITCVSEM 0x0200 /* waiting for a lwp_cv or lwp_sema on sleepq */
#define T_WATCHPT 0x0400 /* thread undergoing a watchpoint emulation */
#define T_PANIC 0x0800 /* thread initiated a system panic */
@@ -414,8 +420,9 @@ typedef struct _kthread {
#define TS_RESUME 0x1000 /* setrun() by CPR resume process */
#define TS_CREATE 0x2000 /* setrun() by syslwp_create() */
#define TS_RUNQMATCH 0x4000 /* exact run queue balancing by setbackdq() */
+#define TS_BSTART 0x8000 /* setrun() by brand */
#define TS_ALLSTART \
- (TS_CSTART|TS_UNPAUSE|TS_XSTART|TS_PSTART|TS_RESUME|TS_CREATE)
+ (TS_CSTART|TS_UNPAUSE|TS_XSTART|TS_PSTART|TS_RESUME|TS_CREATE|TS_BSTART)
#define TS_ANYWAITQ (TS_PROJWAITQ|TS_ZONEWAITQ)
/*
@@ -443,6 +450,10 @@ typedef struct _kthread {
#define ISTOPPED(t) ((t)->t_state == TS_STOPPED && \
!((t)->t_schedflag & TS_PSTART))
+/* True if thread is stopped for a brand-specific reason */
+#define BSTOPPED(t) ((t)->t_state == TS_STOPPED && \
+ !((t)->t_schedflag & TS_BSTART))
+
/* True if thread is asleep and wakeable */
#define ISWAKEABLE(t) (((t)->t_state == TS_SLEEP && \
((t)->t_flag & T_WAKEABLE)))
diff --git a/usr/src/uts/common/sys/uadmin.h b/usr/src/uts/common/sys/uadmin.h
index d5168c9b2c..c14a3bf11e 100644
--- a/usr/src/uts/common/sys/uadmin.h
+++ b/usr/src/uts/common/sys/uadmin.h
@@ -23,6 +23,7 @@
*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -159,7 +160,7 @@ extern kmutex_t ualock;
extern void mdboot(int, int, char *, boolean_t);
extern void mdpreboot(int, int, char *);
extern int kadmin(int, int, void *, cred_t *);
-extern void killall(zoneid_t);
+extern void killall(zoneid_t, boolean_t);
#endif
extern int uadmin(int, int, uintptr_t);
diff --git a/usr/src/uts/common/sys/uio.h b/usr/src/uts/common/sys/uio.h
index e803efeb45..5663929bfb 100644
--- a/usr/src/uts/common/sys/uio.h
+++ b/usr/src/uts/common/sys/uio.h
@@ -145,7 +145,8 @@ typedef struct uioa_s {
*/
typedef enum xuio_type {
UIOTYPE_ASYNCIO,
- UIOTYPE_ZEROCOPY
+ UIOTYPE_ZEROCOPY,
+ UIOTYPE_PEEKSIZE
} xuio_type_t;
typedef struct xuio {
@@ -175,6 +176,15 @@ typedef struct xuio {
int xu_zc_rw; /* read or write buffer */
void *xu_zc_priv; /* fs specific */
} xu_zc;
+
+ /*
+ * Peek Size Support -- facilitate peeking at the size of a
+ * waiting message on a socket.
+ */
+ struct {
+ ssize_t xu_ps_size; /* size of waiting msg */
+ boolean_t xu_ps_set; /* was size calculated? */
+ } xu_ps;
} xu_ext;
} xuio_t;
diff --git a/usr/src/uts/common/sys/user.h b/usr/src/uts/common/sys/user.h
index a7bff8dd52..66250a3f2b 100644
--- a/usr/src/uts/common/sys/user.h
+++ b/usr/src/uts/common/sys/user.h
@@ -26,7 +26,7 @@
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
- * Copyright (c) 2012 Joyent, Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
*/
@@ -185,9 +185,9 @@ typedef struct { /* kernel syscall set type */
* This value should not be changed in a patch.
*/
#if defined(__sparc)
-#define __KERN_NAUXV_IMPL 20
+#define __KERN_NAUXV_IMPL 24
#elif defined(__i386) || defined(__amd64)
-#define __KERN_NAUXV_IMPL 22
+#define __KERN_NAUXV_IMPL 26
#endif
struct execsw;
@@ -211,6 +211,7 @@ typedef struct user {
int u_argc; /* value of argc passed to main() */
uintptr_t u_argv; /* value of argv passed to main() */
uintptr_t u_envp; /* value of envp passed to main() */
+ uintptr_t u_commpagep; /* address of mapped comm page */
/*
* These fields are protected by p_lock:
diff --git a/usr/src/uts/common/sys/vm_usage.h b/usr/src/uts/common/sys/vm_usage.h
index 1aa4a8ee6d..c2954cbc29 100644
--- a/usr/src/uts/common/sys/vm_usage.h
+++ b/usr/src/uts/common/sys/vm_usage.h
@@ -21,6 +21,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_VM_USAGE_H
@@ -79,8 +80,9 @@ extern "C" {
/* zoneid */
#define VMUSAGE_COL_EUSERS 0x2000 /* same as VMUSAGE_COL_RUSERS, but by */
/* euser */
+#define VMUSAGE_A_ZONE 0x4000 /* rss/swap for a specified zone */
-#define VMUSAGE_MASK 0x3fff /* all valid flags for getvmusage() */
+#define VMUSAGE_MASK 0x7fff /* all valid flags for getvmusage() */
typedef struct vmusage {
id_t vmu_zoneid; /* zoneid, or ALL_ZONES for */
@@ -108,6 +110,7 @@ extern int getvmusage(uint_t flags, time_t age, vmusage_t *buf, size_t *nres);
int vm_getusage(uint_t, time_t, vmusage_t *, size_t *, int);
void vm_usage_init();
+int vm_map_inval(pid_t, caddr_t, size_t);
#endif /* _KERNEL */
diff --git a/usr/src/uts/common/sys/vmsystm.h b/usr/src/uts/common/sys/vmsystm.h
index 6122b6cd2f..c7b41730b6 100644
--- a/usr/src/uts/common/sys/vmsystm.h
+++ b/usr/src/uts/common/sys/vmsystm.h
@@ -19,6 +19,9 @@
* CDDL HEADER END
*/
/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -159,6 +162,8 @@ extern void *boot_virt_alloc(void *addr, size_t size);
extern size_t exec_get_spslew(void);
+extern caddr_t map_userlimit(proc_t *pp, struct as *as, int flags);
+
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/usr/src/uts/common/sys/vnd.h b/usr/src/uts/common/sys/vnd.h
new file mode 100644
index 0000000000..bc7c9c3122
--- /dev/null
+++ b/usr/src/uts/common/sys/vnd.h
@@ -0,0 +1,141 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_VND_H
+#define _SYS_VND_H
+
+#include <sys/types.h>
+#include <sys/vnd_errno.h>
+#include <sys/frameio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * We distinguish between normal ioctls and private ioctls we issues to out
+ * streams version. Streams ioctls have the upper bit set in the lowest byte.
+ * Note that there are no STREAMs ioctls for userland and all definitions
+ * related to them are not present in this file.
+ */
+#define VND_IOC (('v' << 24) | ('n' << 16) | ('d' << 8))
+
+/*
+ * Attach the current minor instance to a given dlpi datalink identified by a
+ * vnd_ioc_name_t argument. This fails if it's already been attached. Note that
+ * unlike the other ioctls, this is passed directly as opposed to every other
+ * function which is passed as a pointer to the value.
+ */
+#define VND_IOC_ATTACH (VND_IOC | 0x1)
+
+#define VND_NAMELEN 32
+
+typedef struct vnd_ioc_attach {
+ char via_name[VND_NAMELEN];
+ zoneid_t via_zoneid;
+ uint32_t via_errno;
+} vnd_ioc_attach_t;
+
+/*
+ * Link the current minor instance into the /devices name space.
+ *
+ * This ioctl adds entries into /devices with a name of the form z%d:%s vil_zid,
+ * vil_name. The device will be namespaced to the zone. The global zone will be
+ * able to see all minor nodes. In the zone, only the /dev entries will exist.
+ * At this time, a given device can only have one link at a time. Note that a
+ * user cannot specify the zone to pass in, rather it is the zone that the
+ * device was attached in.
+ */
+#define VND_IOC_LINK (VND_IOC | 0x2)
+
+typedef struct vnd_ioc_link {
+ char vil_name[VND_NAMELEN];
+ uint32_t vil_errno;
+} vnd_ioc_link_t;
+
+/*
+ * Unlink the opened minor instance from the /devices name space. A zone may use
+ * this to unlink an extent entry in /dev; however, they will not be able to
+ * link it in again.
+ */
+#define VND_IOC_UNLINK (VND_IOC | 0x3)
+typedef struct vnd_ioc_unlink {
+ uint32_t viu_errno;
+} vnd_ioc_unlink_t;
+
+/*
+ * Controls to get and set the current buffer recieve buffer size.
+ */
+typedef struct vnd_ioc_buf {
+ uint64_t vib_size;
+ uint32_t vib_filler;
+ uint32_t vib_errno;
+} vnd_ioc_buf_t;
+
+#define VND_IOC_GETRXBUF (VND_IOC | 0x04)
+#define VND_IOC_SETRXBUF (VND_IOC | 0x05)
+#define VND_IOC_GETMAXBUF (VND_IOC | 0x06)
+#define VND_IOC_GETTXBUF (VND_IOC | 0x07)
+#define VND_IOC_SETTXBUF (VND_IOC | 0x08)
+#define VND_IOC_GETMINTU (VND_IOC | 0x09)
+#define VND_IOC_GETMAXTU (VND_IOC | 0x0a)
+
+/*
+ * Information and listing ioctls
+ *
+ * This gets information about all of the active vnd instances. vl_actents is
+ * always updated to the number around and vl_nents is the number of
+ * vnd_ioc_info_t elements are allocated in vl_ents.
+ */
+typedef struct vnd_ioc_info {
+ uint32_t vii_version;
+ zoneid_t vii_zone;
+ char vii_name[VND_NAMELEN];
+ char vii_datalink[VND_NAMELEN];
+} vnd_ioc_info_t;
+
+typedef struct vnd_ioc_list {
+ uint_t vl_nents;
+ uint_t vl_actents;
+ vnd_ioc_info_t *vl_ents;
+} vnd_ioc_list_t;
+
+#ifdef _KERNEL
+
+typedef struct vnd_ioc_list32 {
+ uint_t vl_nents;
+ uint_t vl_actents;
+ caddr32_t vl_ents;
+} vnd_ioc_list32_t;
+
+#endif /* _KERNEL */
+
+#define VND_IOC_LIST (VND_IOC | 0x20)
+
+/*
+ * Framed I/O ioctls
+ *
+ * Users should use the standard frameio_t as opposed to a vnd specific type.
+ * This is a consolidation private ioctl pending futher stability in the form of
+ * specific system work.
+ */
+#define VND_IOC_FRAMEIO_READ (VND_IOC | 0x30)
+#define VND_IOC_FRAMEIO_WRITE (VND_IOC | 0x31)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_VND_H */
diff --git a/usr/src/uts/common/sys/vnd_errno.h b/usr/src/uts/common/sys/vnd_errno.h
new file mode 100644
index 0000000000..89e5fc2543
--- /dev/null
+++ b/usr/src/uts/common/sys/vnd_errno.h
@@ -0,0 +1,72 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_VND_ERRNO_H
+#define _SYS_VND_ERRNO_H
+
+/*
+ * This header contains all of the available vnd errors.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum vnd_errno {
+ VND_E_SUCCESS = 0, /* no error */
+ VND_E_NOMEM, /* no memory */
+ VND_E_NODATALINK, /* no such datalink */
+ VND_E_NOTETHER, /* not DL_ETHER */
+ VND_E_DLPIINVAL, /* Unknown DLPI failures */
+ VND_E_ATTACHFAIL, /* DL_ATTACH_REQ failed */
+ VND_E_BINDFAIL, /* DL_BIND_REQ failed */
+ VND_E_PROMISCFAIL, /* DL_PROMISCON_REQ failed */
+ VND_E_DIRECTFAIL, /* DLD_CAPAB_DIRECT enable failed */
+ VND_E_CAPACKINVAL, /* bad dl_capability_ack_t */
+ VND_E_SUBCAPINVAL, /* bad dl_capability_sub_t */
+ VND_E_DLDBADVERS, /* bad dld version */
+ VND_E_KSTATCREATE, /* failed to create kstats */
+ VND_E_NODEV, /* no such vnd link */
+ VND_E_NONETSTACK, /* netstack doesn't exist */
+ VND_E_ASSOCIATED, /* device already associated */
+ VND_E_ATTACHED, /* device already attached */
+ VND_E_LINKED, /* device already linked */
+ VND_E_BADNAME, /* invalid name */
+ VND_E_PERM, /* can't touch this */
+ VND_E_NOZONE, /* no such zone */
+ VND_E_STRINIT, /* failed to initialize vnd stream module */
+ VND_E_NOTATTACHED, /* device not attached */
+ VND_E_NOTLINKED, /* device not linked */
+ VND_E_LINKEXISTS, /* another device has the same link name */
+ VND_E_MINORNODE, /* failed to create minor node */
+ VND_E_BUFTOOBIG, /* requested buffer size is too large */
+ VND_E_BUFTOOSMALL, /* requested buffer size is too small */
+ VND_E_DLEXCL, /* unable to get dlpi excl access */
+ VND_E_DIRECTNOTSUP,
+ /* DLD direct capability not suported over data link */
+ VND_E_BADPROPSIZE, /* invalid property size */
+ VND_E_BADPROP, /* invalid property */
+ VND_E_PROPRDONLY, /* property is read only */
+ VND_E_SYS, /* unexpected system error */
+ VND_E_CAPABPASS,
+ /* capabilities invalid, pass-through module detected */
+ VND_E_UNKNOWN /* unknown error */
+} vnd_errno_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_VND_ERRNO_H */
diff --git a/usr/src/uts/common/sys/vnic_impl.h b/usr/src/uts/common/sys/vnic_impl.h
index 7e50091347..1a91158da6 100644
--- a/usr/src/uts/common/sys/vnic_impl.h
+++ b/usr/src/uts/common/sys/vnic_impl.h
@@ -21,7 +21,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2014 Joyent, Inc. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
#ifndef _SYS_VNIC_IMPL_H
@@ -65,6 +65,7 @@ typedef struct vnic_s {
uint32_t vn_hcksum_txflags;
uint32_t vn_mtu;
+ link_state_t vn_ls;
} vnic_t;
#define vn_mch vn_mc_handles[0]
diff --git a/usr/src/uts/common/sys/vnode.h b/usr/src/uts/common/sys/vnode.h
index e4d43cea7f..d12f6c4046 100644
--- a/usr/src/uts/common/sys/vnode.h
+++ b/usr/src/uts/common/sys/vnode.h
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
@@ -738,12 +738,14 @@ typedef enum vnevent {
VE_RMDIR = 4, /* Remove of directory vnode's name */
VE_CREATE = 5, /* Create with vnode's name which exists */
VE_LINK = 6, /* Link with vnode's name as source */
- VE_RENAME_DEST_DIR = 7, /* Rename with vnode as target dir */
+ VE_RENAME_DEST_DIR = 7, /* Rename with vnode as target dir */
VE_MOUNTEDOVER = 8, /* File or Filesystem got mounted over vnode */
VE_TRUNCATE = 9, /* Truncate */
VE_PRE_RENAME_SRC = 10, /* Pre-rename, with vnode as source */
VE_PRE_RENAME_DEST = 11, /* Pre-rename, with vnode as target/dest. */
- VE_PRE_RENAME_DEST_DIR = 12 /* Pre-rename with vnode as target dir */
+ VE_PRE_RENAME_DEST_DIR = 12, /* Pre-rename with vnode as target dir */
+ VE_RENAME_SRC_DIR = 13, /* Rename with vnode as source dir */
+ VE_RESIZE = 14 /* Resize/truncate to non-zero offset */
} vnevent_t;
/*
@@ -1298,7 +1300,8 @@ void vnevent_remove(vnode_t *, vnode_t *, char *, caller_context_t *);
void vnevent_rmdir(vnode_t *, vnode_t *, char *, caller_context_t *);
void vnevent_create(vnode_t *, caller_context_t *);
void vnevent_link(vnode_t *, caller_context_t *);
-void vnevent_rename_dest_dir(vnode_t *, caller_context_t *ct);
+void vnevent_rename_dest_dir(vnode_t *, vnode_t *, char *,
+ caller_context_t *ct);
void vnevent_mountedover(vnode_t *, caller_context_t *);
void vnevent_truncate(vnode_t *, caller_context_t *);
int vnevent_support(vnode_t *, caller_context_t *);
@@ -1308,6 +1311,7 @@ void vnevent_pre_rename_dest(vnode_t *, vnode_t *, char *,
caller_context_t *);
void vnevent_pre_rename_dest_dir(vnode_t *, vnode_t *, char *,
caller_context_t *);
+void vnevent_resize(vnode_t *, caller_context_t *);
/* Vnode specific data */
void vsd_create(uint_t *, void (*)(void *));
@@ -1337,6 +1341,9 @@ u_longlong_t fs_new_caller_id();
int vn_vmpss_usepageio(vnode_t *);
+/* Empty v_path placeholder */
+extern char *vn_vpath_empty;
+
/*
* Needed for use of IS_VMODSORT() in kernel.
*/
diff --git a/usr/src/uts/common/sys/vxlan.h b/usr/src/uts/common/sys/vxlan.h
new file mode 100644
index 0000000000..d87786b507
--- /dev/null
+++ b/usr/src/uts/common/sys/vxlan.h
@@ -0,0 +1,47 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _SYS_VXLAN_H
+#define _SYS_VXLAN_H
+
+/*
+ * Common VXLAN information
+ */
+
+#include <sys/inttypes.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Sizes in bytes */
+#define VXLAN_HDR_LEN 8
+#define VXLAN_ID_LEN 3
+
+#define VXLAN_F_VDI 0x08000000
+#define VXLAN_ID_SHIFT 8
+
+#pragma pack(1)
+typedef struct vxlan_hdr {
+ uint32_t vxlan_flags;
+ uint32_t vxlan_id;
+} vxlan_hdr_t;
+#pragma pack()
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_VXLAN_H */
diff --git a/usr/src/uts/common/sys/zfd.h b/usr/src/uts/common/sys/zfd.h
new file mode 100644
index 0000000000..e08d75ecba
--- /dev/null
+++ b/usr/src/uts/common/sys/zfd.h
@@ -0,0 +1,78 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _SYS_ZFD_H
+#define _SYS_ZFD_H
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Minor node name of the global zone side (often called the "master" side)
+ * of the zfd dev.
+ */
+#define ZFD_MASTER_NAME "master"
+
+/*
+ * Minor node name of the non-global zone side (often called the "slave"
+ * side) of the zfd dev.
+ */
+#define ZFD_SLAVE_NAME "slave"
+
+#define ZFD_NAME_LEN 16
+
+/*
+ * ZFD_IOC forms the base for all zfd ioctls.
+ */
+#define ZFD_IOC (('Z' << 24) | ('f' << 16) | ('d' << 8))
+
+/*
+ * This ioctl tells the slave side it should push the TTY stream modules
+ * so that the fd looks like a tty.
+ */
+#define ZFD_MAKETTY (ZFD_IOC | 0)
+
+/*
+ * This ioctl puts a hangup into the stream so that the slave side sees EOF.
+ */
+#define ZFD_EOF (ZFD_IOC | 1)
+
+/*
+ * This ioctl succeeds if the slave side is open.
+ */
+#define ZFD_HAS_SLAVE (ZFD_IOC | 2)
+
+/*
+ * This ioctl links two streams into a multiplexer configuration for in-zone
+ * logging.
+ */
+#define ZFD_MUX (ZFD_IOC | 3)
+
+/*
+ * This ioctl controls the flow control setting for the log multiplexer stream
+ * (1 = true, 0 = false). The default is false which implies teeing into the
+ * log stream is "best-effort" but data will be discarded if the stream
+ * becomes full. If set and the log stream begins to fill up, the primary
+ * stream will stop flowing.
+ */
+#define ZFD_MUX_FLOWCON (ZFD_IOC | 4)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_ZFD_H */
diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h
index 2e69b0d1c7..754f8e3978 100644
--- a/usr/src/uts/common/sys/zone.h
+++ b/usr/src/uts/common/sys/zone.h
@@ -20,9 +20,9 @@
*/
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2015 Joyent, Inc. All rights reserved.
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
* Copyright 2014 Igor Kozhukhov <ikozhukhov@gmail.com>.
+ * Copyright 2016, Joyent, Inc.
*/
#ifndef _SYS_ZONE_H
@@ -97,13 +97,19 @@ extern "C" {
#define ZONE_ATTR_INITNAME 9
#define ZONE_ATTR_BOOTARGS 10
#define ZONE_ATTR_BRAND 11
-#define ZONE_ATTR_PHYS_MCAP 12
+#define ZONE_ATTR_PMCAP_NOVER 12
#define ZONE_ATTR_SCHED_CLASS 13
#define ZONE_ATTR_FLAGS 14
#define ZONE_ATTR_HOSTID 15
#define ZONE_ATTR_FS_ALLOWED 16
#define ZONE_ATTR_NETWORK 17
+#define ZONE_ATTR_DID 18
+#define ZONE_ATTR_PMCAP_PAGEOUT 19
#define ZONE_ATTR_INITNORESTART 20
+#define ZONE_ATTR_PG_FLT_DELAY 21
+#define ZONE_ATTR_RSS 22
+#define ZONE_ATTR_APP_SVC_CT 23
+#define ZONE_ATTR_SCHED_FIXEDHI 24
/* Start of the brand-specific attribute namespace */
#define ZONE_ATTR_BRAND_ATTRS 32768
@@ -184,6 +190,7 @@ typedef struct {
uint32_t doi; /* DOI for label */
caddr32_t label; /* label associated with zone */
int flags;
+ zoneid_t zoneid; /* requested zoneid */
} zone_def32;
#endif
typedef struct {
@@ -200,6 +207,7 @@ typedef struct {
uint32_t doi; /* DOI for label */
const bslabel_t *label; /* label associated with zone */
int flags;
+ zoneid_t zoneid; /* requested zoneid */
} zone_def;
/* extended error information */
@@ -244,9 +252,12 @@ typedef enum zone_cmd {
typedef struct zone_cmd_arg {
uint64_t uniqid; /* unique "generation number" */
zone_cmd_t cmd; /* requested action */
- uint32_t _pad; /* need consistent 32/64 bit alignmt */
+ int status; /* init status on shutdown */
+ uint32_t debug; /* enable brand hook debug */
char locale[MAXPATHLEN]; /* locale in which to render messages */
char bootbuf[BOOTARGS_MAX]; /* arguments passed to zone_boot() */
+ /* Needed for 32/64 zoneadm -> zoneadmd door arg size check. */
+ int pad;
} zone_cmd_arg_t;
/*
@@ -372,7 +383,7 @@ typedef struct zone_dataset {
} zone_dataset_t;
/*
- * structure for zone kstats
+ * structure for rctl zone kstats
*/
typedef struct zone_kstat {
kstat_named_t zk_zonename;
@@ -383,12 +394,57 @@ typedef struct zone_kstat {
struct cpucap;
typedef struct {
+ hrtime_t cycle_start;
+ uint_t cycle_cnt;
+ hrtime_t zone_avg_cnt;
+} sys_zio_cntr_t;
+
+typedef struct {
+ kstat_named_t zv_zonename;
+ kstat_named_t zv_nread;
+ kstat_named_t zv_reads;
+ kstat_named_t zv_rtime;
+ kstat_named_t zv_rlentime;
+ kstat_named_t zv_rcnt;
+ kstat_named_t zv_nwritten;
+ kstat_named_t zv_writes;
+ kstat_named_t zv_wtime;
+ kstat_named_t zv_wlentime;
+ kstat_named_t zv_wcnt;
+ kstat_named_t zv_10ms_ops;
+ kstat_named_t zv_100ms_ops;
+ kstat_named_t zv_1s_ops;
+ kstat_named_t zv_10s_ops;
+ kstat_named_t zv_delay_cnt;
+ kstat_named_t zv_delay_time;
+} zone_vfs_kstat_t;
+
+typedef struct {
+ kstat_named_t zz_zonename;
+ kstat_named_t zz_nread;
+ kstat_named_t zz_reads;
+ kstat_named_t zz_rtime;
+ kstat_named_t zz_rlentime;
+ kstat_named_t zz_nwritten;
+ kstat_named_t zz_writes;
+ kstat_named_t zz_waittime;
+} zone_zfs_kstat_t;
+
+typedef struct {
kstat_named_t zm_zonename;
+ kstat_named_t zm_rss;
+ kstat_named_t zm_phys_cap;
+ kstat_named_t zm_swap;
+ kstat_named_t zm_swap_cap;
+ kstat_named_t zm_nover;
+ kstat_named_t zm_pagedout;
kstat_named_t zm_pgpgin;
kstat_named_t zm_anonpgin;
kstat_named_t zm_execpgin;
kstat_named_t zm_fspgin;
kstat_named_t zm_anon_alloc_fail;
+ kstat_named_t zm_pf_throttle;
+ kstat_named_t zm_pf_throttle_usec;
} zone_mcap_kstat_t;
typedef struct {
@@ -447,6 +503,7 @@ typedef struct zone {
*/
list_node_t zone_linkage;
zoneid_t zone_id; /* ID of zone */
+ zoneid_t zone_did; /* persistent debug ID of zone */
uint_t zone_ref; /* count of zone_hold()s on zone */
uint_t zone_cred_ref; /* count of zone_hold_cred()s on zone */
/*
@@ -499,10 +556,11 @@ typedef struct zone {
kcondvar_t zone_cv; /* used to signal state changes */
struct proc *zone_zsched; /* Dummy kernel "zsched" process */
pid_t zone_proc_initpid; /* pid of "init" for this zone */
- char *zone_initname; /* fs path to 'init' */
+ char *zone_initname; /* fs path to 'init' */
+ int zone_init_status; /* init's exit status */
int zone_boot_err; /* for zone_boot() if boot fails */
char *zone_bootargs; /* arguments passed via zone_boot() */
- uint64_t zone_phys_mcap; /* physical memory cap */
+ rctl_qty_t zone_phys_mem_ctl; /* current phys. memory limit */
/*
* zone_kthreads is protected by zone_status_lock.
*/
@@ -540,9 +598,12 @@ typedef struct zone {
tsol_mlp_list_t zone_mlps; /* MLPs on zone-private addresses */
boolean_t zone_restart_init; /* Restart init if it dies? */
+ boolean_t zone_reboot_on_init_exit; /* Reboot if init dies? */
+ boolean_t zone_setup_app_contract; /* setup contract? */
struct brand *zone_brand; /* zone's brand */
void *zone_brand_data; /* store brand specific data */
id_t zone_defaultcid; /* dflt scheduling class id */
+ boolean_t zone_fixed_hipri; /* fixed sched. hi prio */
kstat_t *zone_swapresv_kstat;
kstat_t *zone_lockedmem_kstat;
/*
@@ -551,6 +612,37 @@ typedef struct zone {
list_t zone_dl_list;
netstack_t *zone_netstack;
struct cpucap *zone_cpucap; /* CPU caps data */
+
+ /*
+ * Data and counters used for ZFS fair-share disk IO.
+ */
+ rctl_qty_t zone_zfs_io_pri; /* ZFS IO priority */
+ uint_t zone_zfs_queued[2]; /* sync I/O enqueued count */
+ uint64_t zone_zfs_weight; /* used to prevent starvation */
+ uint64_t zone_io_util; /* IO utilization metric */
+ boolean_t zone_io_util_above_avg; /* IO util percent > avg. */
+ uint16_t zone_io_delay; /* IO delay on logical r/w */
+ kmutex_t zone_stg_io_lock; /* protects IO window data */
+ sys_zio_cntr_t zone_rd_ops; /* Counters for ZFS reads, */
+ sys_zio_cntr_t zone_wr_ops; /* writes and */
+ sys_zio_cntr_t zone_lwr_ops; /* logical writes. */
+
+ /*
+ * kstats and counters for VFS ops and bytes.
+ */
+ kmutex_t zone_vfs_lock; /* protects VFS statistics */
+ kstat_t *zone_vfs_ksp;
+ kstat_io_t zone_vfs_rwstats;
+ zone_vfs_kstat_t *zone_vfs_stats;
+
+ /*
+ * kstats for ZFS I/O ops and bytes.
+ */
+ kmutex_t zone_zfs_lock; /* protects ZFS statistics */
+ kstat_t *zone_zfs_ksp;
+ kstat_io_t zone_zfs_rwstats;
+ zone_zfs_kstat_t *zone_zfs_stats;
+
/*
* Solaris Auditing per-zone audit context
*/
@@ -569,6 +661,13 @@ typedef struct zone {
/* zone_rctls->rcs_lock */
kstat_t *zone_nprocs_kstat;
+ /*
+ * kstats and counters for physical memory capping.
+ */
+ rctl_qty_t zone_phys_mem; /* current bytes of phys. mem. (RSS) */
+ kstat_t *zone_physmem_kstat;
+ uint64_t zone_mcap_nover; /* # of times over phys. cap */
+ uint64_t zone_mcap_pagedout; /* bytes of mem. paged out */
kmutex_t zone_mcap_lock; /* protects mcap statistics */
kstat_t *zone_mcap_ksp;
zone_mcap_kstat_t *zone_mcap_stats;
@@ -577,6 +676,11 @@ typedef struct zone {
uint64_t zone_execpgin; /* exec pages paged in */
uint64_t zone_fspgin; /* fs pages paged in */
uint64_t zone_anon_alloc_fail; /* cnt of anon alloc fails */
+ uint64_t zone_pf_throttle; /* cnt of page flt throttles */
+ uint64_t zone_pf_throttle_usec; /* time of page flt throttles */
+
+ /* Num usecs to throttle page fault when zone is over phys. mem cap */
+ uint32_t zone_pg_flt_delay;
/*
* Misc. kstats and counters for zone cpu-usage aggregation.
@@ -658,6 +762,7 @@ extern zone_t *zone_find_by_name(char *);
extern zone_t *zone_find_by_any_path(const char *, boolean_t);
extern zone_t *zone_find_by_path(const char *);
extern zoneid_t getzoneid(void);
+extern zoneid_t getzonedid(void);
extern zone_t *zone_find_by_id_nolock(zoneid_t);
extern int zone_datalink_walk(zoneid_t, int (*)(datalink_id_t, void *), void *);
extern int zone_check_datalink(zoneid_t *, datalink_id_t);
@@ -838,6 +943,7 @@ extern int zone_ncpus_online_get(zone_t *);
* Returns true if the named pool/dataset is visible in the current zone.
*/
extern int zone_dataset_visible(const char *, int *);
+extern int zone_dataset_visible_inzone(zone_t *, const char *, int *);
/*
* zone version of kadmin()
@@ -852,6 +958,7 @@ extern int zone_walk(int (*)(zone_t *, void *), void *);
extern rctl_hndl_t rc_zone_locked_mem;
extern rctl_hndl_t rc_zone_max_swap;
+extern rctl_hndl_t rc_zone_phys_mem;
extern rctl_hndl_t rc_zone_max_lofi;
#endif /* _KERNEL */
diff --git a/usr/src/uts/common/syscall/brandsys.c b/usr/src/uts/common/syscall/brandsys.c
index 9b4bd38baa..8ee5511fd0 100644
--- a/usr/src/uts/common/syscall/brandsys.c
+++ b/usr/src/uts/common/syscall/brandsys.c
@@ -23,7 +23,9 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
#include <sys/brand.h>
#include <sys/systm.h>
@@ -35,7 +37,7 @@
*/
int64_t
brandsys(int cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3,
- uintptr_t arg4, uintptr_t arg5, uintptr_t arg6)
+ uintptr_t arg4, uintptr_t arg5)
{
struct proc *p = curthread->t_procp;
int64_t rval = 0;
@@ -49,7 +51,7 @@ brandsys(int cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3,
return (set_errno(ENOSYS));
if ((err = ZBROP(p->p_zone)->b_brandsys(cmd, &rval, arg1, arg2, arg3,
- arg4, arg5, arg6)) != 0)
+ arg4, arg5)) != 0)
return (set_errno(err));
return (rval);
diff --git a/usr/src/uts/common/syscall/fcntl.c b/usr/src/uts/common/syscall/fcntl.c
index 371bc83c29..d631fe62f6 100644
--- a/usr/src/uts/common/syscall/fcntl.c
+++ b/usr/src/uts/common/syscall/fcntl.c
@@ -54,7 +54,8 @@
#include <sys/cmn_err.h>
-static int flock_check(vnode_t *, flock64_t *, offset_t, offset_t);
+/* This is global so that it can be used by brand emulation. */
+int flock_check(vnode_t *, flock64_t *, offset_t, offset_t);
static int flock_get_start(vnode_t *, flock64_t *, offset_t, u_offset_t *);
static void fd_too_big(proc_t *);
diff --git a/usr/src/uts/common/syscall/memcntl.c b/usr/src/uts/common/syscall/memcntl.c
index 1ee4b6a395..721f884a7e 100644
--- a/usr/src/uts/common/syscall/memcntl.c
+++ b/usr/src/uts/common/syscall/memcntl.c
@@ -115,13 +115,17 @@ memcntl(caddr_t addr, size_t len, int cmd, caddr_t arg, int attr, int mask)
* MS_SYNC used to be defined to be zero but is now non-zero.
* For binary compatibility we still accept zero
* (the absence of MS_ASYNC) to mean the same thing.
+ * Binary compatibility is not an issue for MS_INVALCURPROC.
*/
iarg = (uintptr_t)arg;
if ((iarg & ~MS_INVALIDATE) == 0)
iarg |= MS_SYNC;
- if (((iarg & ~(MS_SYNC|MS_ASYNC|MS_INVALIDATE)) != 0) ||
- ((iarg & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC))) {
+ if (((iarg &
+ ~(MS_SYNC|MS_ASYNC|MS_INVALIDATE|MS_INVALCURPROC)) != 0) ||
+ ((iarg & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC)) ||
+ ((iarg & (MS_INVALIDATE|MS_INVALCURPROC)) ==
+ (MS_INVALIDATE|MS_INVALCURPROC))) {
error = set_errno(EINVAL);
} else {
error = as_ctl(as, addr, len, cmd, attr, iarg, NULL, 0);
diff --git a/usr/src/uts/common/syscall/open.c b/usr/src/uts/common/syscall/open.c
index edb04c824b..874e31869c 100644
--- a/usr/src/uts/common/syscall/open.c
+++ b/usr/src/uts/common/syscall/open.c
@@ -74,12 +74,12 @@ copen(int startfd, char *fname, int filemode, int createmode)
if (filemode & (FSEARCH|FEXEC)) {
/*
- * Must be one or the other and neither FREAD nor FWRITE
+ * Must be one or the other.
* Must not be any of FAPPEND FCREAT FTRUNC FXATTR FXATTRDIROPEN
- * XXX: Should these just be silently ignored?
+ * XXX: Should these just be silently ignored like we
+ * silently ignore FREAD|FWRITE?
*/
- if ((filemode & (FREAD|FWRITE)) ||
- (filemode & (FSEARCH|FEXEC)) == (FSEARCH|FEXEC) ||
+ if ((filemode & (FSEARCH|FEXEC)) == (FSEARCH|FEXEC) ||
(filemode & (FAPPEND|FCREAT|FTRUNC|FXATTR|FXATTRDIROPEN)))
return (set_errno(EINVAL));
}
diff --git a/usr/src/uts/common/syscall/poll.c b/usr/src/uts/common/syscall/poll.c
index cc125f127a..3d0a5cc04b 100644
--- a/usr/src/uts/common/syscall/poll.c
+++ b/usr/src/uts/common/syscall/poll.c
@@ -29,7 +29,7 @@
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
- * Copyright 2015, Joyent, Inc.
+ * Copyright 2016, Joyent, Inc.
*/
/*
@@ -317,20 +317,58 @@ polllock(pollhead_t *php, kmutex_t *lp)
return (0);
}
-static int
-poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp)
+int
+poll_copyin(pollstate_t *ps, pollfd_t *fds, nfds_t nfds)
+{
+ pollfd_t *pollfdp;
+ nfds_t old_nfds;
+
+ /*
+ * NOTE: for performance, buffers are saved across poll() calls.
+ * The theory is that if a process polls heavily, it tends to poll
+ * on the same set of descriptors. Therefore, we only reallocate
+ * buffers when nfds changes. There is no hysteresis control,
+ * because there is no data to suggest that this is necessary;
+ * the penalty of reallocating is not *that* great in any event.
+ */
+ old_nfds = ps->ps_nfds;
+ if (nfds != old_nfds) {
+ kmem_free(ps->ps_pollfd, old_nfds * sizeof (pollfd_t));
+ pollfdp = kmem_alloc(nfds * sizeof (pollfd_t), KM_SLEEP);
+ ps->ps_pollfd = pollfdp;
+ ps->ps_nfds = nfds;
+ }
+
+ pollfdp = ps->ps_pollfd;
+ if (copyin(fds, pollfdp, nfds * sizeof (pollfd_t))) {
+ return (EFAULT);
+ }
+
+ if (fds == NULL) {
+ /*
+ * If the process has page 0 mapped, then the copyin() above
+ * will succeed even if fds is NULL. However, our cached
+ * poll lists are keyed by the address of the passed-in fds
+ * structure, and we use the value NULL to indicate an unused
+ * poll cache list entry. As such, we elect not to support
+ * NULL as a valid (user) memory address and fail the poll()
+ * call.
+ */
+ return (EFAULT);
+ }
+ return (0);
+}
+
+int
+poll_common(pollstate_t *ps, pollfd_t *fds, nfds_t nfds, timespec_t *tsp,
+ int *fdcnt)
{
kthread_t *t = curthread;
- klwp_t *lwp = ttolwp(t);
proc_t *p = ttoproc(t);
- int fdcnt = 0;
- int i;
hrtime_t deadline; /* hrtime value when we want to return */
pollfd_t *pollfdp;
- pollstate_t *ps;
pollcache_t *pcp;
int error = 0;
- nfds_t old_nfds;
int cacheindex = 0; /* which cache set is used */
/*
@@ -348,32 +386,11 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp)
}
/*
- * Reset our signal mask, if requested.
- */
- if (ksetp != NULL) {
- mutex_enter(&p->p_lock);
- schedctl_finish_sigblock(t);
- lwp->lwp_sigoldmask = t->t_hold;
- t->t_hold = *ksetp;
- t->t_flag |= T_TOMASK;
- /*
- * Call cv_reltimedwait_sig() just to check for signals.
- * We will return immediately with either 0 or -1.
- */
- if (!cv_reltimedwait_sig(&t->t_delay_cv, &p->p_lock, 0,
- TR_CLOCK_TICK)) {
- mutex_exit(&p->p_lock);
- error = EINTR;
- goto pollout;
- }
- mutex_exit(&p->p_lock);
- }
-
- /*
- * Check to see if this guy just wants to use poll() as a timeout.
+ * Check to see if the caller just wants to use poll() as a timeout.
* If yes then bypass all the other stuff and make him sleep.
*/
if (nfds == 0) {
+ *fdcnt = 0;
/*
* Sleep until we have passed the requested future
* time or until interrupted by a signal.
@@ -385,66 +402,14 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp)
&t->t_delay_lock, deadline)) > 0)
continue;
mutex_exit(&t->t_delay_lock);
- error = (error == 0) ? EINTR : 0;
+ return ((error == 0) ? EINTR : 0);
}
- goto pollout;
- }
-
- if (nfds > p->p_fno_ctl) {
- mutex_enter(&p->p_lock);
- (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE],
- p->p_rctls, p, RCA_SAFE);
- mutex_exit(&p->p_lock);
- error = EINVAL;
- goto pollout;
- }
-
- /*
- * Need to allocate memory for pollstate before anything because
- * the mutex and cv are created in this space
- */
- ps = pollstate_create();
-
- if (ps->ps_pcache == NULL)
- ps->ps_pcache = pcache_alloc();
- pcp = ps->ps_pcache;
-
- /*
- * NOTE: for performance, buffers are saved across poll() calls.
- * The theory is that if a process polls heavily, it tends to poll
- * on the same set of descriptors. Therefore, we only reallocate
- * buffers when nfds changes. There is no hysteresis control,
- * because there is no data to suggest that this is necessary;
- * the penalty of reallocating is not *that* great in any event.
- */
- old_nfds = ps->ps_nfds;
- if (nfds != old_nfds) {
-
- kmem_free(ps->ps_pollfd, old_nfds * sizeof (pollfd_t));
- pollfdp = kmem_alloc(nfds * sizeof (pollfd_t), KM_SLEEP);
- ps->ps_pollfd = pollfdp;
- ps->ps_nfds = nfds;
+ return (0);
}
+ VERIFY(ps != NULL);
pollfdp = ps->ps_pollfd;
- if (copyin(fds, pollfdp, nfds * sizeof (pollfd_t))) {
- error = EFAULT;
- goto pollout;
- }
-
- if (fds == NULL) {
- /*
- * If the process has page 0 mapped, then the copyin() above
- * will succeed even if fds is NULL. However, our cached
- * poll lists are keyed by the address of the passed-in fds
- * structure, and we use the value NULL to indicate an unused
- * poll cache list entry. As such, we elect not to support
- * NULL as a valid (user) memory address and fail the poll()
- * call.
- */
- error = EINVAL;
- goto pollout;
- }
+ VERIFY(pollfdp != NULL);
/*
* If this thread polls for the first time, allocate ALL poll
@@ -460,10 +425,10 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp)
/*
* poll and cache this poll fd list in ps_pcacheset[0].
*/
- error = pcacheset_cache_list(ps, fds, &fdcnt, cacheindex);
- if (fdcnt || error) {
+ error = pcacheset_cache_list(ps, fds, fdcnt, cacheindex);
+ if (error || *fdcnt) {
mutex_exit(&ps->ps_lock);
- goto pollout;
+ return (error);
}
} else {
pollcacheset_t *pcset = ps->ps_pcacheset;
@@ -488,11 +453,11 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp)
* the callee will guarantee the consistency
* of cached poll list and cache content.
*/
- error = pcacheset_resolve(ps, nfds, &fdcnt,
+ error = pcacheset_resolve(ps, nfds, fdcnt,
cacheindex);
if (error) {
mutex_exit(&ps->ps_lock);
- goto pollout;
+ return (error);
}
break;
}
@@ -509,11 +474,11 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp)
* found an unused entry. Use it to cache
* this poll list.
*/
- error = pcacheset_cache_list(ps, fds, &fdcnt,
+ error = pcacheset_cache_list(ps, fds, fdcnt,
cacheindex);
- if (fdcnt || error) {
+ if (error || *fdcnt) {
mutex_exit(&ps->ps_lock);
- goto pollout;
+ return (error);
}
break;
}
@@ -527,10 +492,10 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp)
cacheindex = pcacheset_replace(ps);
ASSERT(cacheindex < ps->ps_nsets);
pcset[cacheindex].pcs_usradr = (uintptr_t)fds;
- error = pcacheset_resolve(ps, nfds, &fdcnt, cacheindex);
+ error = pcacheset_resolve(ps, nfds, fdcnt, cacheindex);
if (error) {
mutex_exit(&ps->ps_lock);
- goto pollout;
+ return (error);
}
}
}
@@ -548,8 +513,8 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp)
mutex_enter(&pcp->pc_lock);
for (;;) {
pcp->pc_flag = 0;
- error = pcache_poll(pollfdp, ps, nfds, &fdcnt, cacheindex);
- if (fdcnt || error) {
+ error = pcache_poll(pollfdp, ps, nfds, fdcnt, cacheindex);
+ if (error || *fdcnt) {
mutex_exit(&pcp->pc_lock);
mutex_exit(&ps->ps_lock);
break;
@@ -595,13 +560,116 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp)
mutex_enter(&pcp->pc_lock);
}
+ return (error);
+}
+
+/*
+ * This is the system call trap that poll(),
+ * select() and pselect() are built upon.
+ * It is a private interface between libc and the kernel.
+ */
+int
+pollsys(pollfd_t *fds, nfds_t nfds, timespec_t *timeoutp, sigset_t *setp)
+{
+ kthread_t *t = curthread;
+ klwp_t *lwp = ttolwp(t);
+ proc_t *p = ttoproc(t);
+ timespec_t ts;
+ timespec_t *tsp;
+ k_sigset_t kset;
+ pollstate_t *ps = NULL;
+ pollfd_t *pollfdp = NULL;
+ int error = 0, fdcnt = 0;
+
+ /*
+ * Copy in timeout
+ */
+ if (timeoutp == NULL) {
+ tsp = NULL;
+ } else {
+ if (get_udatamodel() == DATAMODEL_NATIVE) {
+ if (copyin(timeoutp, &ts, sizeof (ts)))
+ return (set_errno(EFAULT));
+ } else {
+ timespec32_t ts32;
+
+ if (copyin(timeoutp, &ts32, sizeof (ts32)))
+ return (set_errno(EFAULT));
+ TIMESPEC32_TO_TIMESPEC(&ts, &ts32)
+ }
+
+ if (itimerspecfix(&ts))
+ return (set_errno(EINVAL));
+ tsp = &ts;
+ }
+
+ /*
+ * Copy in and reset signal mask, if requested.
+ */
+ if (setp != NULL) {
+ sigset_t set;
+
+ if (copyin(setp, &set, sizeof (set)))
+ return (set_errno(EFAULT));
+ sigutok(&set, &kset);
+
+ mutex_enter(&p->p_lock);
+ schedctl_finish_sigblock(t);
+ lwp->lwp_sigoldmask = t->t_hold;
+ t->t_hold = kset;
+ t->t_flag |= T_TOMASK;
+ /*
+ * Call cv_reltimedwait_sig() just to check for signals.
+ * We will return immediately with either 0 or -1.
+ */
+ if (!cv_reltimedwait_sig(&t->t_delay_cv, &p->p_lock, 0,
+ TR_CLOCK_TICK)) {
+ mutex_exit(&p->p_lock);
+ error = EINTR;
+ goto pollout;
+ }
+ mutex_exit(&p->p_lock);
+ }
+
+ /*
+ * Initialize pollstate and copy in pollfd data if present.
+ * If nfds == 0, we will skip all of the copying and check steps and
+ * proceed directly into poll_common to process the supplied timeout.
+ */
+ if (nfds != 0) {
+ if (nfds > p->p_fno_ctl) {
+ mutex_enter(&p->p_lock);
+ (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE],
+ p->p_rctls, p, RCA_SAFE);
+ mutex_exit(&p->p_lock);
+ error = EINVAL;
+ goto pollout;
+ }
+
+ /*
+ * Need to allocate memory for pollstate before anything
+ * because the mutex and cv are created in this space
+ */
+ ps = pollstate_create();
+ if (ps->ps_pcache == NULL)
+ ps->ps_pcache = pcache_alloc();
+
+ if ((error = poll_copyin(ps, fds, nfds)) != 0)
+ goto pollout;
+ pollfdp = ps->ps_pollfd;
+ }
+
+ /*
+ * Perform the actual poll.
+ */
+ error = poll_common(ps, fds, nfds, tsp, &fdcnt);
+
pollout:
/*
- * If we changed the signal mask but we received
- * no signal then restore the signal mask.
- * Otherwise psig() will deal with the signal mask.
+ * If we changed the signal mask but we received no signal then restore
+ * the signal mask. Otherwise psig() will deal with the signal mask.
*/
- if (ksetp != NULL) {
+ if (setp != NULL) {
mutex_enter(&p->p_lock);
if (lwp->lwp_cursig == 0) {
t->t_hold = lwp->lwp_sigoldmask;
@@ -612,12 +680,10 @@ pollout:
if (error)
return (set_errno(error));
-
/*
* Copy out the events and return the fdcnt to the user.
*/
- if (nfds != 0 &&
- copyout(pollfdp, fds, nfds * sizeof (pollfd_t)))
+ if (nfds != 0 && copyout(pollfdp, fds, nfds * sizeof (pollfd_t)))
return (set_errno(EFAULT));
#ifdef DEBUG
@@ -625,7 +691,7 @@ pollout:
* Another sanity check:
*/
if (fdcnt) {
- int reventcnt = 0;
+ int i, reventcnt = 0;
for (i = 0; i < nfds; i++) {
if (pollfdp[i].fd < 0) {
@@ -638,6 +704,8 @@ pollout:
}
ASSERT(fdcnt == reventcnt);
} else {
+ int i;
+
for (i = 0; i < nfds; i++) {
ASSERT(pollfdp[i].revents == 0);
}
@@ -648,52 +716,6 @@ pollout:
}
/*
- * This is the system call trap that poll(),
- * select() and pselect() are built upon.
- * It is a private interface between libc and the kernel.
- */
-int
-pollsys(pollfd_t *fds, nfds_t nfds, timespec_t *timeoutp, sigset_t *setp)
-{
- timespec_t ts;
- timespec_t *tsp;
- sigset_t set;
- k_sigset_t kset;
- k_sigset_t *ksetp;
- model_t datamodel = get_udatamodel();
-
- if (timeoutp == NULL)
- tsp = NULL;
- else {
- if (datamodel == DATAMODEL_NATIVE) {
- if (copyin(timeoutp, &ts, sizeof (ts)))
- return (set_errno(EFAULT));
- } else {
- timespec32_t ts32;
-
- if (copyin(timeoutp, &ts32, sizeof (ts32)))
- return (set_errno(EFAULT));
- TIMESPEC32_TO_TIMESPEC(&ts, &ts32)
- }
-
- if (itimerspecfix(&ts))
- return (set_errno(EINVAL));
- tsp = &ts;
- }
-
- if (setp == NULL)
- ksetp = NULL;
- else {
- if (copyin(setp, &set, sizeof (set)))
- return (set_errno(EFAULT));
- sigutok(&set, &kset);
- ksetp = &kset;
- }
-
- return (poll_common(fds, nfds, tsp, ksetp));
-}
-
-/*
* Clean up any state left around by poll(2). Called when a thread exits.
*/
void
diff --git a/usr/src/uts/common/syscall/rusagesys.c b/usr/src/uts/common/syscall/rusagesys.c
index 3e0e63f4c0..417c629168 100644
--- a/usr/src/uts/common/syscall/rusagesys.c
+++ b/usr/src/uts/common/syscall/rusagesys.c
@@ -21,6 +21,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
/*
@@ -257,6 +258,19 @@ rusagesys(int code, void *arg1, void *arg2, void *arg3, void *arg4)
case _RUSAGESYS_GETVMUSAGE:
return (vm_getusage((uint_t)(uintptr_t)arg1, (time_t)arg2,
(vmusage_t *)arg3, (size_t *)arg4, 0));
+ case _RUSAGESYS_INVALMAP:
+ /*
+ * SPARC sfmmu hat does not support HAT_CURPROC_PGUNLOAD
+ * handling so callers on SPARC should get simple sync
+ * handling with invalidation to all processes.
+ */
+#if defined(__sparc)
+ return (memcntl((caddr_t)arg2, (size_t)arg3, MC_SYNC,
+ (caddr_t)(MS_ASYNC | MS_INVALIDATE), 0, 0));
+#else
+ return (vm_map_inval((pid_t)(uintptr_t)arg1, (caddr_t)arg2,
+ (size_t)arg3));
+#endif
default:
return (set_errno(EINVAL));
}
diff --git a/usr/src/uts/common/syscall/rw.c b/usr/src/uts/common/syscall/rw.c
index a28894b2c9..943b7d244e 100644
--- a/usr/src/uts/common/syscall/rw.c
+++ b/usr/src/uts/common/syscall/rw.c
@@ -22,7 +22,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright (c) 2015, Joyent, Inc. All rights reserved.
+ * Copyright 2015, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
@@ -50,6 +50,7 @@
#include <sys/debug.h>
#include <sys/rctl.h>
#include <sys/nbmlock.h>
+#include <sys/limits.h>
#define COPYOUT_MAX_CACHE (1<<17) /* 128K */
@@ -607,19 +608,12 @@ out:
return (bcount);
}
-/*
- * XXX -- The SVID refers to IOV_MAX, but doesn't define it. Grrrr....
- * XXX -- However, SVVS expects readv() and writev() to fail if
- * XXX -- iovcnt > 16 (yes, it's hard-coded in the SVVS source),
- * XXX -- so I guess that's the "interface".
- */
-#define DEF_IOV_MAX 16
-
ssize_t
readv(int fdes, struct iovec *iovp, int iovcnt)
{
struct uio auio;
- struct iovec aiov[DEF_IOV_MAX];
+ struct iovec buf[IOV_MAX_STACK], *aiov = buf;
+ int aiovlen = 0;
file_t *fp;
register vnode_t *vp;
struct cpu *cp;
@@ -630,9 +624,14 @@ readv(int fdes, struct iovec *iovp, int iovcnt)
u_offset_t fileoff;
int in_crit = 0;
- if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
+ if (iovcnt <= 0 || iovcnt > IOV_MAX)
return (set_errno(EINVAL));
+ if (iovcnt > IOV_MAX_STACK) {
+ aiovlen = iovcnt * sizeof (iovec_t);
+ aiov = kmem_alloc(aiovlen, KM_SLEEP);
+ }
+
#ifdef _SYSCALL32_IMPL
/*
* 32-bit callers need to have their iovec expanded,
@@ -640,36 +639,63 @@ readv(int fdes, struct iovec *iovp, int iovcnt)
* of data in a single call.
*/
if (get_udatamodel() == DATAMODEL_ILP32) {
- struct iovec32 aiov32[DEF_IOV_MAX];
+ struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
+ int aiov32len;
ssize32_t count32;
- if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))
+ aiov32len = iovcnt * sizeof (iovec32_t);
+ if (aiovlen != 0)
+ aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
+
+ if (copyin(iovp, aiov32, aiov32len)) {
+ if (aiovlen != 0) {
+ kmem_free(aiov32, aiov32len);
+ kmem_free(aiov, aiovlen);
+ }
return (set_errno(EFAULT));
+ }
count32 = 0;
for (i = 0; i < iovcnt; i++) {
ssize32_t iovlen32 = aiov32[i].iov_len;
count32 += iovlen32;
- if (iovlen32 < 0 || count32 < 0)
+ if (iovlen32 < 0 || count32 < 0) {
+ if (aiovlen != 0) {
+ kmem_free(aiov32, aiov32len);
+ kmem_free(aiov, aiovlen);
+ }
return (set_errno(EINVAL));
+ }
aiov[i].iov_len = iovlen32;
aiov[i].iov_base =
(caddr_t)(uintptr_t)aiov32[i].iov_base;
}
+
+ if (aiovlen != 0)
+ kmem_free(aiov32, aiov32len);
} else
#endif
- if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))
+ if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
return (set_errno(EFAULT));
+ }
count = 0;
for (i = 0; i < iovcnt; i++) {
ssize_t iovlen = aiov[i].iov_len;
count += iovlen;
- if (iovlen < 0 || count < 0)
+ if (iovlen < 0 || count < 0) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
return (set_errno(EINVAL));
+ }
}
- if ((fp = getf(fdes)) == NULL)
+ if ((fp = getf(fdes)) == NULL) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
return (set_errno(EBADF));
+ }
if (((fflag = fp->f_flag) & FREAD) == 0) {
error = EBADF;
goto out;
@@ -768,6 +794,8 @@ out:
if (in_crit)
nbl_end_crit(vp);
releasef(fdes);
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
if (error)
return (set_errno(error));
return (count);
@@ -777,7 +805,8 @@ ssize_t
writev(int fdes, struct iovec *iovp, int iovcnt)
{
struct uio auio;
- struct iovec aiov[DEF_IOV_MAX];
+ struct iovec buf[IOV_MAX_STACK], *aiov = buf;
+ int aiovlen = 0;
file_t *fp;
register vnode_t *vp;
struct cpu *cp;
@@ -788,9 +817,14 @@ writev(int fdes, struct iovec *iovp, int iovcnt)
u_offset_t fileoff;
int in_crit = 0;
- if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
+ if (iovcnt <= 0 || iovcnt > IOV_MAX)
return (set_errno(EINVAL));
+ if (iovcnt > IOV_MAX_STACK) {
+ aiovlen = iovcnt * sizeof (iovec_t);
+ aiov = kmem_alloc(aiovlen, KM_SLEEP);
+ }
+
#ifdef _SYSCALL32_IMPL
/*
* 32-bit callers need to have their iovec expanded,
@@ -798,36 +832,62 @@ writev(int fdes, struct iovec *iovp, int iovcnt)
* of data in a single call.
*/
if (get_udatamodel() == DATAMODEL_ILP32) {
- struct iovec32 aiov32[DEF_IOV_MAX];
+ struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
+ int aiov32len;
ssize32_t count32;
- if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))
+ aiov32len = iovcnt * sizeof (iovec32_t);
+ if (aiovlen != 0)
+ aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
+
+ if (copyin(iovp, aiov32, aiov32len)) {
+ if (aiovlen != 0) {
+ kmem_free(aiov32, aiov32len);
+ kmem_free(aiov, aiovlen);
+ }
return (set_errno(EFAULT));
+ }
count32 = 0;
for (i = 0; i < iovcnt; i++) {
ssize32_t iovlen = aiov32[i].iov_len;
count32 += iovlen;
- if (iovlen < 0 || count32 < 0)
+ if (iovlen < 0 || count32 < 0) {
+ if (aiovlen != 0) {
+ kmem_free(aiov32, aiov32len);
+ kmem_free(aiov, aiovlen);
+ }
return (set_errno(EINVAL));
+ }
aiov[i].iov_len = iovlen;
aiov[i].iov_base =
(caddr_t)(uintptr_t)aiov32[i].iov_base;
}
+ if (aiovlen != 0)
+ kmem_free(aiov32, aiov32len);
} else
#endif
- if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))
+ if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
return (set_errno(EFAULT));
+ }
count = 0;
for (i = 0; i < iovcnt; i++) {
ssize_t iovlen = aiov[i].iov_len;
count += iovlen;
- if (iovlen < 0 || count < 0)
+ if (iovlen < 0 || count < 0) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
return (set_errno(EINVAL));
+ }
}
- if ((fp = getf(fdes)) == NULL)
+ if ((fp = getf(fdes)) == NULL) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
return (set_errno(EBADF));
+ }
if (((fflag = fp->f_flag) & FWRITE) == 0) {
error = EBADF;
goto out;
@@ -917,6 +977,8 @@ out:
if (in_crit)
nbl_end_crit(vp);
releasef(fdes);
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
if (error)
return (set_errno(error));
return (count);
@@ -927,7 +989,8 @@ preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
off_t extended_offset)
{
struct uio auio;
- struct iovec aiov[DEF_IOV_MAX];
+ struct iovec buf[IOV_MAX_STACK], *aiov = buf;
+ int aiovlen = 0;
file_t *fp;
register vnode_t *vp;
struct cpu *cp;
@@ -952,9 +1015,14 @@ preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
int in_crit = 0;
- if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
+ if (iovcnt <= 0 || iovcnt > IOV_MAX)
return (set_errno(EINVAL));
+ if (iovcnt > IOV_MAX_STACK) {
+ aiovlen = iovcnt * sizeof (iovec_t);
+ aiov = kmem_alloc(aiovlen, KM_SLEEP);
+ }
+
#ifdef _SYSCALL32_IMPL
/*
* 32-bit callers need to have their iovec expanded,
@@ -962,39 +1030,68 @@ preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
* of data in a single call.
*/
if (get_udatamodel() == DATAMODEL_ILP32) {
- struct iovec32 aiov32[DEF_IOV_MAX];
+ struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
+ int aiov32len;
ssize32_t count32;
- if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))
+ aiov32len = iovcnt * sizeof (iovec32_t);
+ if (aiovlen != 0)
+ aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
+
+ if (copyin(iovp, aiov32, aiov32len)) {
+ if (aiovlen != 0) {
+ kmem_free(aiov32, aiov32len);
+ kmem_free(aiov, aiovlen);
+ }
return (set_errno(EFAULT));
+ }
count32 = 0;
for (i = 0; i < iovcnt; i++) {
ssize32_t iovlen32 = aiov32[i].iov_len;
count32 += iovlen32;
- if (iovlen32 < 0 || count32 < 0)
+ if (iovlen32 < 0 || count32 < 0) {
+ if (aiovlen != 0) {
+ kmem_free(aiov32, aiov32len);
+ kmem_free(aiov, aiovlen);
+ }
return (set_errno(EINVAL));
+ }
aiov[i].iov_len = iovlen32;
aiov[i].iov_base =
(caddr_t)(uintptr_t)aiov32[i].iov_base;
}
+ if (aiovlen != 0)
+ kmem_free(aiov32, aiov32len);
} else
#endif /* _SYSCALL32_IMPL */
- if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))
+ if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
return (set_errno(EFAULT));
+ }
count = 0;
for (i = 0; i < iovcnt; i++) {
ssize_t iovlen = aiov[i].iov_len;
count += iovlen;
- if (iovlen < 0 || count < 0)
+ if (iovlen < 0 || count < 0) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
return (set_errno(EINVAL));
+ }
}
- if ((bcount = (ssize_t)count) < 0)
+ if ((bcount = (ssize_t)count) < 0) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
return (set_errno(EINVAL));
- if ((fp = getf(fdes)) == NULL)
+ }
+ if ((fp = getf(fdes)) == NULL) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
return (set_errno(EBADF));
+ }
if (((fflag = fp->f_flag) & FREAD) == 0) {
error = EBADF;
goto out;
@@ -1099,6 +1196,8 @@ out:
if (in_crit)
nbl_end_crit(vp);
releasef(fdes);
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
if (error)
return (set_errno(error));
return (count);
@@ -1109,7 +1208,8 @@ pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
off_t extended_offset)
{
struct uio auio;
- struct iovec aiov[DEF_IOV_MAX];
+ struct iovec buf[IOV_MAX_STACK], *aiov = buf;
+ int aiovlen = 0;
file_t *fp;
register vnode_t *vp;
struct cpu *cp;
@@ -1134,9 +1234,14 @@ pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
int in_crit = 0;
- if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
+ if (iovcnt <= 0 || iovcnt > IOV_MAX)
return (set_errno(EINVAL));
+ if (iovcnt > IOV_MAX_STACK) {
+ aiovlen = iovcnt * sizeof (iovec_t);
+ aiov = kmem_alloc(aiovlen, KM_SLEEP);
+ }
+
#ifdef _SYSCALL32_IMPL
/*
* 32-bit callers need to have their iovec expanded,
@@ -1144,39 +1249,68 @@ pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
* of data in a single call.
*/
if (get_udatamodel() == DATAMODEL_ILP32) {
- struct iovec32 aiov32[DEF_IOV_MAX];
+ struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
+ int aiov32len;
ssize32_t count32;
- if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))
+ aiov32len = iovcnt * sizeof (iovec32_t);
+ if (aiovlen != 0)
+ aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
+
+ if (copyin(iovp, aiov32, aiov32len)) {
+ if (aiovlen != 0) {
+ kmem_free(aiov32, aiov32len);
+ kmem_free(aiov, aiovlen);
+ }
return (set_errno(EFAULT));
+ }
count32 = 0;
for (i = 0; i < iovcnt; i++) {
ssize32_t iovlen32 = aiov32[i].iov_len;
count32 += iovlen32;
- if (iovlen32 < 0 || count32 < 0)
+ if (iovlen32 < 0 || count32 < 0) {
+ if (aiovlen != 0) {
+ kmem_free(aiov32, aiov32len);
+ kmem_free(aiov, aiovlen);
+ }
return (set_errno(EINVAL));
+ }
aiov[i].iov_len = iovlen32;
aiov[i].iov_base =
(caddr_t)(uintptr_t)aiov32[i].iov_base;
}
+ if (aiovlen != 0)
+ kmem_free(aiov32, aiov32len);
} else
#endif /* _SYSCALL32_IMPL */
- if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))
+ if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
return (set_errno(EFAULT));
+ }
count = 0;
for (i = 0; i < iovcnt; i++) {
ssize_t iovlen = aiov[i].iov_len;
count += iovlen;
- if (iovlen < 0 || count < 0)
+ if (iovlen < 0 || count < 0) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
return (set_errno(EINVAL));
+ }
}
- if ((bcount = (ssize_t)count) < 0)
+ if ((bcount = (ssize_t)count) < 0) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
return (set_errno(EINVAL));
- if ((fp = getf(fdes)) == NULL)
+ }
+ if ((fp = getf(fdes)) == NULL) {
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
return (set_errno(EBADF));
+ }
if (((fflag = fp->f_flag) & FWRITE) == 0) {
error = EBADF;
goto out;
@@ -1308,6 +1442,8 @@ out:
if (in_crit)
nbl_end_crit(vp);
releasef(fdes);
+ if (aiovlen != 0)
+ kmem_free(aiov, aiovlen);
if (error)
return (set_errno(error));
return (count);
diff --git a/usr/src/uts/common/syscall/sendfile.c b/usr/src/uts/common/syscall/sendfile.c
index cb8246f584..ccceca7c6d 100644
--- a/usr/src/uts/common/syscall/sendfile.c
+++ b/usr/src/uts/common/syscall/sendfile.c
@@ -82,7 +82,7 @@ extern sotpi_info_t *sotpi_sototpi(struct sonode *);
* 64 bit kernel or 32 bit kernel. For 32 bit apps, we can't transfer
* more than 2GB of data.
*/
-int
+static int
sendvec_chunk64(file_t *fp, u_offset_t *fileoff, struct ksendfilevec64 *sfv,
int copy_cnt, ssize32_t *count)
{
@@ -343,7 +343,7 @@ sendvec_chunk64(file_t *fp, u_offset_t *fileoff, struct ksendfilevec64 *sfv,
return (0);
}
-ssize32_t
+static ssize32_t
sendvec64(file_t *fp, const struct ksendfilevec64 *vec, int sfvcnt,
size32_t *xferred, int fildes)
{
@@ -390,7 +390,7 @@ sendvec64(file_t *fp, const struct ksendfilevec64 *vec, int sfvcnt,
}
#endif
-int
+static int
sendvec_small_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
int copy_cnt, ssize_t total_size, int maxblk, ssize_t *count)
{
@@ -680,7 +680,7 @@ sendvec_small_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
}
-int
+static int
sendvec_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
int copy_cnt, ssize_t *count)
{
@@ -1160,6 +1160,17 @@ sendfilev(int opcode, int fildes, const struct sendfilevec *vec, int sfvcnt,
} else {
maxblk = (int)vp->v_stream->sd_maxblk;
}
+
+ /*
+ * We need to make sure that the socket that we're sending on
+ * supports sendfile behavior. sockfs doesn't know that the APIs
+ * we want to use are coming from sendfile, so we can't rely on
+ * it to check for us.
+ */
+ if ((so->so_mode & SM_SENDFILESUPP) == 0) {
+ error = EOPNOTSUPP;
+ goto err;
+ }
break;
case VREG:
break;
diff --git a/usr/src/uts/common/syscall/stat.c b/usr/src/uts/common/syscall/stat.c
index 4085104cc7..93f26121bc 100644
--- a/usr/src/uts/common/syscall/stat.c
+++ b/usr/src/uts/common/syscall/stat.c
@@ -61,7 +61,7 @@
* to VOP_GETATTR
*/
-static int
+int
cstatat_getvp(int fd, char *name, int follow, vnode_t **vp, cred_t **cred)
{
vnode_t *startvp;
diff --git a/usr/src/uts/common/syscall/sysconfig.c b/usr/src/uts/common/syscall/sysconfig.c
index 03f2fabe13..26ea859224 100644
--- a/usr/src/uts/common/syscall/sysconfig.c
+++ b/usr/src/uts/common/syscall/sysconfig.c
@@ -22,6 +22,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -170,8 +171,8 @@ sysconfig(int which)
* even though rcapd can be used on the global zone too.
*/
if (!INGLOBALZONE(curproc) &&
- curproc->p_zone->zone_phys_mcap != 0)
- return (MIN(btop(curproc->p_zone->zone_phys_mcap),
+ curproc->p_zone->zone_phys_mem_ctl != UINT64_MAX)
+ return (MIN(btop(curproc->p_zone->zone_phys_mem_ctl),
physinstalled));
return (physinstalled);
@@ -179,26 +180,23 @@ sysconfig(int which)
case _CONFIG_AVPHYS_PAGES:
/*
* If the non-global zone has a phys. memory cap, use
- * the phys. memory cap - zone's current rss. We always
+ * the phys. memory cap - zone's rss. We always
* report the system-wide value for the global zone, even
- * though rcapd can be used on the global zone too.
+ * though memory capping can be used on the global zone too.
+ * We use the cached value for the RSS since vm_getusage()
+ * is so expensive and we don't need this value to be exact.
*/
if (!INGLOBALZONE(curproc) &&
- curproc->p_zone->zone_phys_mcap != 0) {
+ curproc->p_zone->zone_phys_mem_ctl != UINT64_MAX) {
pgcnt_t cap, rss, free;
- vmusage_t in_use;
- size_t cnt = 1;
- cap = btop(curproc->p_zone->zone_phys_mcap);
+ cap = btop(curproc->p_zone->zone_phys_mem_ctl);
if (cap > physinstalled)
return (freemem);
- if (vm_getusage(VMUSAGE_ZONE, 1, &in_use, &cnt,
- FKIOCTL) != 0)
- in_use.vmu_rss_all = 0;
- rss = btop(in_use.vmu_rss_all);
+ rss = btop(curproc->p_zone->zone_phys_mem);
/*
- * Because rcapd implements a soft cap, it is possible
+ * Because this is a soft cap, it is possible
* for rss to be temporarily over the cap.
*/
if (cap > rss)
diff --git a/usr/src/uts/common/syscall/uadmin.c b/usr/src/uts/common/syscall/uadmin.c
index 2dda4001bf..68aa1a95f5 100644
--- a/usr/src/uts/common/syscall/uadmin.c
+++ b/usr/src/uts/common/syscall/uadmin.c
@@ -78,7 +78,7 @@ volatile int fastreboot_dryrun = 0;
* system with many zones.
*/
void
-killall(zoneid_t zoneid)
+killall(zoneid_t zoneid, boolean_t force)
{
proc_t *p;
@@ -108,7 +108,7 @@ killall(zoneid_t zoneid)
p->p_stat != SIDL &&
p->p_stat != SZOMB) {
mutex_enter(&p->p_lock);
- if (sigismember(&p->p_sig, SIGKILL)) {
+ if (!force && sigismember(&p->p_sig, SIGKILL)) {
mutex_exit(&p->p_lock);
p = p->p_next;
} else {
@@ -245,7 +245,7 @@ kadmin(int cmd, int fcn, void *mdep, cred_t *credp)
*/
zone_shutdown_global();
- killall(ALL_ZONES);
+ killall(ALL_ZONES, B_FALSE);
/*
* If we are calling kadmin() from a kernel context then we
* do not release these resources.
diff --git a/usr/src/uts/common/vm/hat.h b/usr/src/uts/common/vm/hat.h
index 1d91475e38..c908a9e16c 100644
--- a/usr/src/uts/common/vm/hat.h
+++ b/usr/src/uts/common/vm/hat.h
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -269,7 +270,12 @@ void hat_kpm_walk(void (*)(void *, void *, size_t), void *);
* call.
*
* int hat_pageunload(pp, forceflag)
- * unload all translations attached to pp.
+ * Unload all translations attached to pp. On x86 the bulk of the work is
+ * done by hat_page_inval.
+ *
+ * void hat_page_inval(pp, pgsz, curhat)
+ * Unload translations attached to pp. If curhat is provided, only the
+ * translation for that process is unloaded, otherwise all are unloaded.
*
* uint_t hat_pagesync(pp, flags)
* get hw stats from hardware into page struct and reset hw stats
@@ -291,6 +297,7 @@ void hat_page_setattr(struct page *, uint_t);
void hat_page_clrattr(struct page *, uint_t);
uint_t hat_page_getattr(struct page *, uint_t);
int hat_pageunload(struct page *, uint_t);
+void hat_page_inval(struct page *, uint_t, struct hat *);
uint_t hat_pagesync(struct page *, uint_t);
ulong_t hat_page_getshare(struct page *);
int hat_page_checkshare(struct page *, ulong_t);
@@ -460,6 +467,7 @@ void hat_setstat(struct as *, caddr_t, size_t, uint_t);
*/
#define HAT_ADV_PGUNLOAD 0x00
#define HAT_FORCE_PGUNLOAD 0x01
+#define HAT_CURPROC_PGUNLOAD 0x02
/*
* Attributes for hat_page_*attr, hat_setstats and
diff --git a/usr/src/uts/common/vm/seg_kmem.c b/usr/src/uts/common/vm/seg_kmem.c
index 90e1b73b70..439c859d96 100644
--- a/usr/src/uts/common/vm/seg_kmem.c
+++ b/usr/src/uts/common/vm/seg_kmem.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
*/
#include <sys/types.h>
@@ -773,7 +774,7 @@ segkmem_capable(struct seg *seg, segcapability_t capability)
return (0);
}
-static struct seg_ops segkmem_ops = {
+struct seg_ops segkmem_ops = {
SEGKMEM_BADOP(int), /* dup */
SEGKMEM_BADOP(int), /* unmap */
SEGKMEM_BADOP(void), /* free */
diff --git a/usr/src/uts/common/vm/seg_kmem.h b/usr/src/uts/common/vm/seg_kmem.h
index 2a4ed3b2aa..3ad4202e91 100644
--- a/usr/src/uts/common/vm/seg_kmem.h
+++ b/usr/src/uts/common/vm/seg_kmem.h
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
*/
#ifndef _VM_SEG_KMEM_H
@@ -136,6 +137,8 @@ extern size_t segkmem_kmemlp_max;
#define IS_KMEM_VA_LARGEPAGE(vaddr) \
(((vaddr) >= heap_lp_base) && ((vaddr) < heap_lp_end))
+extern struct seg_ops segkmem_ops;
+
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/usr/src/uts/common/vm/seg_umap.c b/usr/src/uts/common/vm/seg_umap.c
new file mode 100644
index 0000000000..ccad71c5d6
--- /dev/null
+++ b/usr/src/uts/common/vm/seg_umap.c
@@ -0,0 +1,466 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+/*
+ * VM - Kernel-to-user mapping segment
+ *
+ * The umap segment driver was primarily designed to facilitate the comm page:
+ * a portion of kernel memory shared with userspace so that certain (namely
+ * clock-related) actions could operate without making an expensive trip into
+ * the kernel.
+ *
+ * Since the initial requirements for the comm page are slim, advanced features
+ * of the segment driver such as per-page protection have been left
+ * unimplemented at this time.
+ */
+
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/cred.h>
+#include <sys/kmem.h>
+#include <sys/lgrp.h>
+#include <sys/mman.h>
+
+#include <vm/hat.h>
+#include <vm/as.h>
+#include <vm/seg.h>
+#include <vm/seg_kmem.h>
+#include <vm/seg_umap.h>
+
+
+static boolean_t segumap_verify_safe(caddr_t, size_t);
+static int segumap_dup(struct seg *, struct seg *);
+static int segumap_unmap(struct seg *, caddr_t, size_t);
+static void segumap_free(struct seg *);
+static faultcode_t segumap_fault(struct hat *, struct seg *, caddr_t, size_t,
+ enum fault_type, enum seg_rw);
+static faultcode_t segumap_faulta(struct seg *, caddr_t);
+static int segumap_setprot(struct seg *, caddr_t, size_t, uint_t);
+static int segumap_checkprot(struct seg *, caddr_t, size_t, uint_t);
+static int segumap_sync(struct seg *, caddr_t, size_t, int, uint_t);
+static size_t segumap_incore(struct seg *, caddr_t, size_t, char *);
+static int segumap_lockop(struct seg *, caddr_t, size_t, int, int, ulong_t *,
+ size_t);
+static int segumap_getprot(struct seg *, caddr_t, size_t, uint_t *);
+static u_offset_t segumap_getoffset(struct seg *, caddr_t);
+static int segumap_gettype(struct seg *, caddr_t);
+static int segumap_getvp(struct seg *, caddr_t, struct vnode **);
+static int segumap_advise(struct seg *, caddr_t, size_t, uint_t);
+static void segumap_dump(struct seg *);
+static int segumap_pagelock(struct seg *, caddr_t, size_t, struct page ***,
+ enum lock_type, enum seg_rw);
+static int segumap_setpagesize(struct seg *, caddr_t, size_t, uint_t);
+static int segumap_getmemid(struct seg *, caddr_t, memid_t *);
+static int segumap_capable(struct seg *, segcapability_t);
+
+static struct seg_ops segumap_ops = {
+ segumap_dup,
+ segumap_unmap,
+ segumap_free,
+ segumap_fault,
+ segumap_faulta,
+ segumap_setprot,
+ segumap_checkprot,
+ NULL, /* kluster: disabled */
+ NULL, /* swapout: disabled */
+ segumap_sync,
+ segumap_incore,
+ segumap_lockop,
+ segumap_getprot,
+ segumap_getoffset,
+ segumap_gettype,
+ segumap_getvp,
+ segumap_advise,
+ segumap_dump,
+ segumap_pagelock,
+ segumap_setpagesize,
+ segumap_getmemid,
+ NULL, /* getpolicy: disabled */
+ segumap_capable,
+ seg_inherit_notsup
+};
+
+
+/*
+ * Create a kernel/user-mapped segment.
+ */
+int
+segumap_create(struct seg *seg, void *argsp)
+{
+ segumap_crargs_t *a = (struct segumap_crargs *)argsp;
+ segumap_data_t *data;
+
+ ASSERT((uintptr_t)a->kaddr > _userlimit);
+
+ /*
+ * Check several aspects of the mapping request to ensure validity:
+ * - kernel pages must reside entirely in kernel space
+ * - target protection must be user-accessible
+ * - kernel address must be page-aligned
+ * - kernel address must reside inside a "safe" segment
+ */
+ if ((uintptr_t)a->kaddr <= _userlimit ||
+ ((uintptr_t)a->kaddr + seg->s_size) < (uintptr_t)a->kaddr ||
+ (a->prot & PROT_USER) == 0 ||
+ ((uintptr_t)a->kaddr & PAGEOFFSET) != 0 ||
+ !segumap_verify_safe(a->kaddr, seg->s_size)) {
+ return (EINVAL);
+ }
+
+ data = kmem_zalloc(sizeof (*data), KM_SLEEP);
+ rw_init(&data->sud_lock, NULL, RW_DEFAULT, NULL);
+ data->sud_kaddr = a->kaddr;
+ data->sud_prot = a->prot;
+ data->sud_loaded = B_FALSE;
+
+ seg->s_ops = &segumap_ops;
+ seg->s_data = data;
+ return (0);
+}
+
+static boolean_t
+segumap_verify_safe(caddr_t kaddr, size_t len)
+{
+ struct seg *seg;
+
+ /*
+ * Presently, only pages which are backed by segkmem are allowed to be
+ * shared with userspace. This prevents nasty paging behavior with
+ * other drivers such as seg_kp. Furthermore, the backing kernel
+ * segment must completely contain the region to be mapped.
+ *
+ * Failing these checks is fatal for now since such mappings are done
+ * in a very limited context from the kernel.
+ */
+ AS_LOCK_ENTER(&kas, RW_READER);
+ seg = as_segat(&kas, kaddr);
+ VERIFY(seg != NULL);
+ VERIFY(seg->s_base + seg->s_size >= kaddr + len);
+ VERIFY(seg->s_ops == &segkmem_ops);
+ AS_LOCK_EXIT(&kas);
+
+ return (B_TRUE);
+}
+
+static int
+segumap_dup(struct seg *seg, struct seg *newseg)
+{
+ segumap_data_t *sud = (segumap_data_t *)seg->s_data;
+ segumap_data_t *newsud;
+
+ ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
+
+ newsud = kmem_zalloc(sizeof (segumap_data_t), KM_SLEEP);
+ rw_init(&newsud->sud_lock, NULL, RW_DEFAULT, NULL);
+ newsud->sud_kaddr = sud->sud_kaddr;
+ newsud->sud_prot = sud->sud_prot;
+ newsud->sud_loaded = B_FALSE;
+
+ newseg->s_ops = seg->s_ops;
+ newseg->s_data = newsud;
+ return (0);
+}
+
+static int
+segumap_unmap(struct seg *seg, caddr_t addr, size_t len)
+{
+ segumap_data_t *sud = (segumap_data_t *)seg->s_data;
+
+ ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
+
+ /* Only allow unmap of entire segment */
+ if (addr != seg->s_base || len != seg->s_size) {
+ return (EINVAL);
+ }
+ if (sud->sud_softlockcnt != 0) {
+ return (EAGAIN);
+ }
+
+ hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP);
+ /*
+ * While setting this field before immediately freeing the segment is
+ * not necessary, it is done for the sake of completeness. Doing so
+ * outside sud_lock is safe with the AS write-locked.
+ */
+ sud->sud_loaded = B_FALSE;
+
+ seg_free(seg);
+ return (0);
+}
+
+static void
+segumap_free(struct seg *seg)
+{
+ segumap_data_t *data = (segumap_data_t *)seg->s_data;
+
+ ASSERT(data != NULL);
+
+ rw_destroy(&data->sud_lock);
+ VERIFY(data->sud_loaded == B_FALSE);
+ VERIFY(data->sud_softlockcnt == 0);
+ kmem_free(data, sizeof (*data));
+ seg->s_data = NULL;
+}
+
+/* ARGSUSED */
+static faultcode_t
+segumap_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
+ enum fault_type type, enum seg_rw tw)
+{
+ segumap_data_t *sud = (segumap_data_t *)seg->s_data;
+
+ ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
+
+ if (type == F_PROT) {
+ /*
+ * Since protection on the segment is fixed, there is nothing
+ * to do but report an error for protection faults.
+ */
+ return (FC_PROT);
+ } else if (type == F_SOFTUNLOCK) {
+ size_t plen = btop(len);
+
+ rw_enter(&sud->sud_lock, RW_WRITER);
+ VERIFY(sud->sud_softlockcnt >= plen);
+ sud->sud_softlockcnt -= plen;
+ rw_exit(&sud->sud_lock);
+ return (0);
+ }
+
+ ASSERT(type == F_INVAL || type == F_SOFTLOCK);
+ rw_enter(&sud->sud_lock, RW_WRITER);
+
+ if (type == F_INVAL && sud->sud_loaded) {
+ rw_exit(&sud->sud_lock);
+ return (FC_NOMAP);
+ }
+
+ /*
+ * Load the (entire) segment into the HAT if it has not been done so.
+ */
+ if (!sud->sud_loaded) {
+ for (uintptr_t i = 0; i < seg->s_size; i += PAGESIZE) {
+ pfn_t pfn;
+
+ pfn = hat_getpfnum(kas.a_hat, sud->sud_kaddr + i);
+ VERIFY(pfn != PFN_INVALID);
+ hat_devload(seg->s_as->a_hat, seg->s_base + i,
+ PAGESIZE, pfn, sud->sud_prot, HAT_LOAD);
+ }
+ sud->sud_loaded = B_TRUE;
+ } else {
+ /*
+ * If there the segment has already been loaded, there is no
+ * reason to take an F_INVALID fault.
+ */
+ VERIFY(type != F_INVAL);
+ }
+
+ if (type == F_SOFTLOCK) {
+ size_t nval = sud->sud_softlockcnt + btop(len);
+
+ if (sud->sud_softlockcnt >= nval) {
+ rw_exit(&sud->sud_lock);
+ return (FC_MAKE_ERR(EOVERFLOW));
+ }
+ sud->sud_softlockcnt = nval;
+ }
+ rw_exit(&sud->sud_lock);
+ return (0);
+}
+
+/* ARGSUSED */
+static faultcode_t
+segumap_faulta(struct seg *seg, caddr_t addr)
+{
+ /* Do nothing since asynch pagefault should not load translation. */
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+segumap_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
+{
+ ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
+
+ /*
+ * The seg_umap driver does not yet allow protection to be changed.
+ */
+ return (EACCES);
+}
+
+/* ARGSUSED */
+static int
+segumap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
+{
+ segumap_data_t *sud = (segumap_data_t *)seg->s_data;
+ int error = 0;
+
+ ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
+
+ rw_enter(&sud->sud_lock, RW_READER);
+ if ((sud->sud_prot & prot) != prot) {
+ error = EACCES;
+ }
+ rw_exit(&sud->sud_lock);
+ return (error);
+}
+
+/* ARGSUSED */
+static int
+segumap_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
+{
+ /* Always succeed since there are no backing store to sync */
+ return (0);
+}
+
+/* ARGSUSED */
+static size_t
+segumap_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
+{
+ size_t sz = 0;
+
+ ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
+
+ len = (len + PAGEOFFSET) & PAGEMASK;
+ while (len > 0) {
+ *vec = 1;
+ sz += PAGESIZE;
+ vec++;
+ len -= PAGESIZE;
+ }
+ return (sz);
+}
+
+/* ARGSUSED */
+static int
+segumap_lockop(struct seg *seg, caddr_t addr, size_t len, int attr, int op,
+ ulong_t *lockmap, size_t pos)
+{
+ /* Report success since kernel pages are always in memory. */
+ return (0);
+}
+
+static int
+segumap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
+{
+ segumap_data_t *sud = (segumap_data_t *)seg->s_data;
+ size_t pgno;
+ uint_t prot;
+
+ ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
+
+ rw_enter(&sud->sud_lock, RW_READER);
+ prot = sud->sud_prot;
+ rw_exit(&sud->sud_lock);
+
+ /*
+ * Reporting protection is simple since it is not tracked per-page.
+ */
+ pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
+ while (pgno > 0) {
+ protv[--pgno] = prot;
+ }
+ return (0);
+}
+
+/* ARGSUSED */
+static u_offset_t
+segumap_getoffset(struct seg *seg, caddr_t addr)
+{
+ /*
+ * To avoid leaking information about the layout of the kernel address
+ * space, always report '0' as the offset.
+ */
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+segumap_gettype(struct seg *seg, caddr_t addr)
+{
+ /*
+ * Since already-existing kernel pages are being mapped into userspace,
+ * always report the segment type as shared.
+ */
+ return (MAP_SHARED);
+}
+
+/* ARGSUSED */
+static int
+segumap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
+{
+ ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
+
+ *vpp = NULL;
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+segumap_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
+{
+ if (behav == MADV_PURGE) {
+ /* Purge does not make sense for this mapping */
+ return (EINVAL);
+ }
+ /* Indicate success for everything else. */
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+segumap_dump(struct seg *seg)
+{
+ /*
+ * Since this is a mapping to share kernel data with userspace, nothing
+ * additional should be dumped.
+ */
+}
+
+/* ARGSUSED */
+static int
+segumap_pagelock(struct seg *seg, caddr_t addr, size_t len, struct page ***ppp,
+ enum lock_type type, enum seg_rw rw)
+{
+ return (ENOTSUP);
+}
+
+/* ARGSUSED */
+static int
+segumap_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
+{
+ return (ENOTSUP);
+}
+
+static int
+segumap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
+{
+ segumap_data_t *sud = (segumap_data_t *)seg->s_data;
+
+ memidp->val[0] = (uintptr_t)sud->sud_kaddr;
+ memidp->val[1] = (uintptr_t)(addr - seg->s_base);
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+segumap_capable(struct seg *seg, segcapability_t capability)
+{
+ /* no special capablities */
+ return (0);
+}
diff --git a/usr/src/uts/common/vm/seg_umap.h b/usr/src/uts/common/vm/seg_umap.h
new file mode 100644
index 0000000000..bcf7447509
--- /dev/null
+++ b/usr/src/uts/common/vm/seg_umap.h
@@ -0,0 +1,43 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#ifndef _VM_SEG_UMAP_H
+#define _VM_SEG_UMAP_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct segumap_crargs {
+ caddr_t kaddr;
+ uchar_t prot; /* protection */
+ uchar_t maxprot; /* maximum protection */
+} segumap_crargs_t;
+
+typedef struct segumap_data {
+ krwlock_t sud_lock;
+ caddr_t sud_kaddr;
+ uchar_t sud_prot;
+ size_t sud_softlockcnt;
+ boolean_t sud_loaded;
+} segumap_data_t;
+
+extern int segumap_create(struct seg *, void *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VM_SEG_UMAP_H */
diff --git a/usr/src/uts/common/vm/seg_vn.c b/usr/src/uts/common/vm/seg_vn.c
index 875dec7fe9..f143c1e464 100644
--- a/usr/src/uts/common/vm/seg_vn.c
+++ b/usr/src/uts/common/vm/seg_vn.c
@@ -7308,7 +7308,8 @@ segvn_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
vpp = svd->vpage;
offset = svd->offset + (uintptr_t)(addr - seg->s_base);
bflags = ((flags & MS_ASYNC) ? B_ASYNC : 0) |
- ((flags & MS_INVALIDATE) ? B_INVAL : 0);
+ ((flags & MS_INVALIDATE) ? B_INVAL : 0) |
+ ((flags & MS_INVALCURPROC) ? (B_INVALCURONLY | B_INVAL) : 0);
if (attr) {
pageprot = attr & ~(SHARED|PRIVATE);
@@ -7333,11 +7334,11 @@ segvn_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
vpp = &svd->vpage[seg_page(seg, addr)];
} else if (svd->vp && svd->amp == NULL &&
- (flags & MS_INVALIDATE) == 0) {
+ (flags & (MS_INVALIDATE | MS_INVALCURPROC)) == 0) {
/*
- * No attributes, no anonymous pages and MS_INVALIDATE flag
- * is not on, just use one big request.
+ * No attributes, no anonymous pages and MS_INVAL* flags
+ * are not on, just use one big request.
*/
err = VOP_PUTPAGE(svd->vp, (offset_t)offset, len,
bflags, svd->cred, NULL);
@@ -7389,7 +7390,7 @@ segvn_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
* might race in and lock the page after we unlock and before
* we do the PUTPAGE, then PUTPAGE simply does nothing.
*/
- if (flags & MS_INVALIDATE) {
+ if (flags & (MS_INVALIDATE | MS_INVALCURPROC)) {
if ((pp = page_lookup(vp, off, SE_SHARED)) != NULL) {
if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) {
page_unlock(pp);
diff --git a/usr/src/uts/common/vm/vm_anon.c b/usr/src/uts/common/vm/vm_anon.c
index 4fd32a3f4a..01db9b23d7 100644
--- a/usr/src/uts/common/vm/vm_anon.c
+++ b/usr/src/uts/common/vm/vm_anon.c
@@ -788,14 +788,21 @@ anon_resvmem(size_t size, boolean_t takemem, zone_t *zone, int tryhard)
pgcnt_t pswap_pages = 0;
proc_t *p = curproc;
- if (zone != NULL && takemem) {
+ if (zone != NULL) {
/* test zone.max-swap resource control */
mutex_enter(&p->p_lock);
if (rctl_incr_swap(p, zone, ptob(npages)) != 0) {
mutex_exit(&p->p_lock);
- atomic_add_64(&zone->zone_anon_alloc_fail, 1);
+
+ if (takemem)
+ atomic_add_64(&zone->zone_anon_alloc_fail, 1);
+
return (0);
}
+
+ if (!takemem)
+ rctl_decr_swap(zone, ptob(npages));
+
mutex_exit(&p->p_lock);
}
mutex_enter(&anoninfo_lock);
diff --git a/usr/src/uts/common/vm/vm_as.c b/usr/src/uts/common/vm/vm_as.c
index bb5a96eb0f..b0a5e7fb33 100644
--- a/usr/src/uts/common/vm/vm_as.c
+++ b/usr/src/uts/common/vm/vm_as.c
@@ -57,6 +57,7 @@
#include <sys/debug.h>
#include <sys/tnf_probe.h>
#include <sys/vtrace.h>
+#include <sys/ddi.h>
#include <vm/hat.h>
#include <vm/as.h>
@@ -848,8 +849,7 @@ as_fault(struct hat *hat, struct as *as, caddr_t addr, size_t size,
struct seg *segsav;
int as_lock_held;
klwp_t *lwp = ttolwp(curthread);
-
-
+ zone_t *zonep = curzone;
retry:
/*
@@ -885,6 +885,22 @@ retry:
if (as == &kas)
CPU_STATS_ADDQ(CPU, vm, kernel_asflt, 1);
CPU_STATS_EXIT_K();
+ if (zonep->zone_pg_flt_delay != 0) {
+ /*
+ * The zone in which this process is running is
+ * currently over it's physical memory cap. Throttle
+ * page faults to help the user-land memory capper
+ * catch up. Note that drv_usectohz() rounds up.
+ */
+ atomic_add_64(&zonep->zone_pf_throttle, 1);
+ atomic_add_64(&zonep->zone_pf_throttle_usec,
+ zonep->zone_pg_flt_delay);
+ if (zonep->zone_pg_flt_delay < TICK_TO_USEC(1)) {
+ drv_usecwait(zonep->zone_pg_flt_delay);
+ } else {
+ delay(drv_usectohz(zonep->zone_pg_flt_delay));
+ }
+ }
break;
}
diff --git a/usr/src/uts/common/vm/vm_pvn.c b/usr/src/uts/common/vm/vm_pvn.c
index 1b8d12eb8d..a206320a30 100644
--- a/usr/src/uts/common/vm/vm_pvn.c
+++ b/usr/src/uts/common/vm/vm_pvn.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*/
@@ -432,7 +433,14 @@ pvn_write_done(page_t *plist, int flags)
page_io_unlock(pp);
page_unlock(pp);
}
- } else if (flags & B_INVAL) {
+ } else if ((flags & (B_INVAL | B_INVALCURONLY)) == B_INVAL) {
+ /*
+ * If B_INVALCURONLY is set, then we handle that case
+ * in the next conditional if hat_page_is_mapped()
+ * indicates that there are no additional mappings
+ * to the page.
+ */
+
/*
* XXX - Failed writes with B_INVAL set are
* not handled appropriately.
@@ -573,8 +581,9 @@ pvn_write_done(page_t *plist, int flags)
}
/*
- * Flags are composed of {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED, B_DELWRI,
- * B_TRUNC, B_FORCE}. B_DELWRI indicates that this page is part of a kluster
+ * Flags are composed of {B_ASYNC, B_INVAL, B_INVALCURONLY, B_FREE,
+ * B_DONTNEED, B_DELWRI, B_TRUNC, B_FORCE}.
+ * B_DELWRI indicates that this page is part of a kluster
* operation and is only to be considered if it doesn't involve any
* waiting here. B_TRUNC indicates that the file is being truncated
* and so no i/o needs to be done. B_FORCE indicates that the page
@@ -628,13 +637,17 @@ pvn_getdirty(page_t *pp, int flags)
* If we want to free or invalidate the page then
* we need to unload it so that anyone who wants
* it will have to take a minor fault to get it.
+ * If we are only invalidating the page for the
+ * current process, then pass in a different flag.
* Otherwise, we're just writing the page back so we
* need to sync up the hardwre and software mod bit to
* detect any future modifications. We clear the
* software mod bit when we put the page on the dirty
* list.
*/
- if (flags & (B_INVAL | B_FREE)) {
+ if (flags & B_INVALCURONLY) {
+ (void) hat_pageunload(pp, HAT_CURPROC_PGUNLOAD);
+ } else if (flags & (B_INVAL | B_FREE)) {
(void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD);
} else {
(void) hat_pagesync(pp, HAT_SYNC_ZERORM);
@@ -646,7 +659,7 @@ pvn_getdirty(page_t *pp, int flags)
* list after all.
*/
page_io_unlock(pp);
- if (flags & B_INVAL) {
+ if ((flags & (B_INVAL | B_INVALCURONLY)) == B_INVAL) {
/*LINTED: constant in conditional context*/
VN_DISPOSE(pp, B_INVAL, 0, kcred);
} else if (flags & B_FREE) {
@@ -658,6 +671,9 @@ pvn_getdirty(page_t *pp, int flags)
* of VOP_PUTPAGE() who prefer freeing the
* page _only_ if no one else is accessing it.
* E.g. segmap_release()
+ * We also take this path for B_INVALCURONLY and
+ * let page_release call VN_DISPOSE if no one else is
+ * using the page.
*
* The above hat_ismod() check is useless because:
* (1) we may not be holding SE_EXCL lock;
@@ -682,7 +698,7 @@ pvn_getdirty(page_t *pp, int flags)
* We'll detect the fact that they used it when the
* i/o is done and avoid freeing the page.
*/
- if (flags & B_FREE)
+ if (flags & (B_FREE | B_INVALCURONLY))
page_downgrade(pp);
diff --git a/usr/src/uts/common/vm/vm_swap.c b/usr/src/uts/common/vm/vm_swap.c
index 1a28c04357..2a008e114b 100644
--- a/usr/src/uts/common/vm/vm_swap.c
+++ b/usr/src/uts/common/vm/vm_swap.c
@@ -18,6 +18,11 @@
*
* CDDL HEADER END
*/
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
/*
* Copyright (c) 1987, 2010, Oracle and/or its affiliates. All rights reserved.
*/
@@ -625,7 +630,18 @@ swapctl(int sc_cmd, void *sc_arg, int *rv)
return (0);
}
beginning:
+ mutex_enter(&swapinfo_lock);
tmp_nswapfiles = nswapfiles;
+ mutex_exit(&swapinfo_lock);
+
+ /*
+ * Return early if there are no swap entries to report:
+ */
+ if (tmp_nswapfiles < 1) {
+ *rv = 0;
+ return (0);
+ }
+
/* Return an error if not enough space for the whole table. */
if (length < tmp_nswapfiles)
return (ENOMEM);
@@ -920,7 +936,18 @@ swapctl32(int sc_cmd, void *sc_arg, int *rv)
return (0);
}
beginning:
+ mutex_enter(&swapinfo_lock);
tmp_nswapfiles = nswapfiles;
+ mutex_exit(&swapinfo_lock);
+
+ /*
+ * Return early if there are no swap entries to report:
+ */
+ if (tmp_nswapfiles < 1) {
+ *rv = 0;
+ return (0);
+ }
+
/* Return an error if not enough space for the whole table. */
if (length < tmp_nswapfiles)
return (ENOMEM);
diff --git a/usr/src/uts/common/vm/vm_usage.c b/usr/src/uts/common/vm/vm_usage.c
index 57166b4e63..8b9fd0d7a3 100644
--- a/usr/src/uts/common/vm/vm_usage.c
+++ b/usr/src/uts/common/vm/vm_usage.c
@@ -25,6 +25,10 @@
*/
/*
+ * Copyright 2016, Joyent, Inc.
+ */
+
+/*
* vm_usage
*
* This file implements the getvmusage() private system call.
@@ -114,7 +118,7 @@
* For accurate counting of map-shared and COW-shared pages.
*
* - visited private anons (refcnt > 1) for each collective.
- * (entity->vme_anon_hash)
+ * (entity->vme_anon)
* For accurate counting of COW-shared pages.
*
* The common accounting structure is the vmu_entity_t, which represents
@@ -152,6 +156,7 @@
#include <sys/vm_usage.h>
#include <sys/zone.h>
#include <sys/sunddi.h>
+#include <sys/sysmacros.h>
#include <sys/avl.h>
#include <vm/anon.h>
#include <vm/as.h>
@@ -199,6 +204,14 @@ typedef struct vmu_object {
} vmu_object_t;
/*
+ * Node for tree of visited COW anons.
+ */
+typedef struct vmu_anon {
+ avl_node_t vma_node;
+ uintptr_t vma_addr;
+} vmu_anon_t;
+
+/*
* Entity by which to count results.
*
* The entity structure keeps the current rss/swap counts for each entity
@@ -221,7 +234,7 @@ typedef struct vmu_entity {
struct vmu_entity *vme_next_calc;
mod_hash_t *vme_vnode_hash; /* vnodes visited for entity */
mod_hash_t *vme_amp_hash; /* shared amps visited for entity */
- mod_hash_t *vme_anon_hash; /* COW anons visited for entity */
+ avl_tree_t vme_anon; /* COW anons visited for entity */
vmusage_t vme_result; /* identifies entity and results */
} vmu_entity_t;
@@ -324,6 +337,23 @@ bounds_cmp(const void *bnd1, const void *bnd2)
}
/*
+ * Comparison routine for our AVL tree of anon structures.
+ */
+static int
+vmu_anon_cmp(const void *lhs, const void *rhs)
+{
+ const vmu_anon_t *l = lhs, *r = rhs;
+
+ if (l->vma_addr == r->vma_addr)
+ return (0);
+
+ if (l->vma_addr < r->vma_addr)
+ return (-1);
+
+ return (1);
+}
+
+/*
* Save a bound on the free list.
*/
static void
@@ -363,13 +393,18 @@ static void
vmu_free_entity(mod_hash_val_t val)
{
vmu_entity_t *entity = (vmu_entity_t *)val;
+ vmu_anon_t *anon;
+ void *cookie = NULL;
if (entity->vme_vnode_hash != NULL)
i_mod_hash_clear_nosync(entity->vme_vnode_hash);
if (entity->vme_amp_hash != NULL)
i_mod_hash_clear_nosync(entity->vme_amp_hash);
- if (entity->vme_anon_hash != NULL)
- i_mod_hash_clear_nosync(entity->vme_anon_hash);
+
+ while ((anon = avl_destroy_nodes(&entity->vme_anon, &cookie)) != NULL)
+ kmem_free(anon, sizeof (vmu_anon_t));
+
+ avl_destroy(&entity->vme_anon);
entity->vme_next = vmu_data.vmu_free_entities;
vmu_data.vmu_free_entities = entity;
@@ -485,10 +520,10 @@ vmu_alloc_entity(id_t id, int type, id_t zoneid)
"vmusage amp hash", VMUSAGE_HASH_SIZE, vmu_free_object,
sizeof (struct anon_map));
- if (entity->vme_anon_hash == NULL)
- entity->vme_anon_hash = mod_hash_create_ptrhash(
- "vmusage anon hash", VMUSAGE_HASH_SIZE,
- mod_hash_null_valdtor, sizeof (struct anon));
+ VERIFY(avl_first(&entity->vme_anon) == NULL);
+
+ avl_create(&entity->vme_anon, vmu_anon_cmp, sizeof (struct vmu_anon),
+ offsetof(struct vmu_anon, vma_node));
entity->vme_next = vmu_data.vmu_entities;
vmu_data.vmu_entities = entity;
@@ -518,7 +553,8 @@ vmu_alloc_zone(id_t id)
zone->vmz_id = id;
- if ((vmu_data.vmu_calc_flags & (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES)) != 0)
+ if ((vmu_data.vmu_calc_flags &
+ (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES | VMUSAGE_A_ZONE)) != 0)
zone->vmz_zone = vmu_alloc_entity(id, VMUSAGE_ZONE, id);
if ((vmu_data.vmu_calc_flags & (VMUSAGE_PROJECTS |
@@ -613,21 +649,19 @@ vmu_find_insert_object(mod_hash_t *hash, caddr_t key, uint_t type)
}
static int
-vmu_find_insert_anon(mod_hash_t *hash, caddr_t key)
+vmu_find_insert_anon(vmu_entity_t *entity, void *key)
{
- int ret;
- caddr_t val;
+ vmu_anon_t anon, *ap;
- ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)key,
- (mod_hash_val_t *)&val);
+ anon.vma_addr = (uintptr_t)key;
- if (ret == 0)
+ if (avl_find(&entity->vme_anon, &anon, NULL) != NULL)
return (0);
- ret = i_mod_hash_insert_nosync(hash, (mod_hash_key_t)key,
- (mod_hash_val_t)key, (mod_hash_hndl_t)0);
+ ap = kmem_alloc(sizeof (vmu_anon_t), KM_SLEEP);
+ ap->vma_addr = (uintptr_t)key;
- ASSERT(ret == 0);
+ avl_add(&entity->vme_anon, ap);
return (1);
}
@@ -918,6 +952,8 @@ vmu_amp_update_incore_bounds(avl_tree_t *tree, struct anon_map *amp,
next = AVL_NEXT(tree, next);
continue;
}
+
+ ASSERT(next->vmb_type == VMUSAGE_BOUND_UNKNOWN);
bound_type = next->vmb_type;
index = next->vmb_start;
while (index <= next->vmb_end) {
@@ -937,7 +973,10 @@ vmu_amp_update_incore_bounds(avl_tree_t *tree, struct anon_map *amp,
if (ap != NULL && vn != NULL && vn->v_pages != NULL &&
(page = page_exists(vn, off)) != NULL) {
- page_type = VMUSAGE_BOUND_INCORE;
+ if (PP_ISFREE(page))
+ page_type = VMUSAGE_BOUND_NOT_INCORE;
+ else
+ page_type = VMUSAGE_BOUND_INCORE;
if (page->p_szc > 0) {
pgcnt = page_get_pagecnt(page->p_szc);
pgshft = page_get_shift(page->p_szc);
@@ -947,8 +986,10 @@ vmu_amp_update_incore_bounds(avl_tree_t *tree, struct anon_map *amp,
} else {
page_type = VMUSAGE_BOUND_NOT_INCORE;
}
+
if (bound_type == VMUSAGE_BOUND_UNKNOWN) {
next->vmb_type = page_type;
+ bound_type = page_type;
} else if (next->vmb_type != page_type) {
/*
* If current bound type does not match page
@@ -1009,6 +1050,7 @@ vmu_vnode_update_incore_bounds(avl_tree_t *tree, vnode_t *vnode,
continue;
}
+ ASSERT(next->vmb_type == VMUSAGE_BOUND_UNKNOWN);
bound_type = next->vmb_type;
index = next->vmb_start;
while (index <= next->vmb_end) {
@@ -1024,7 +1066,10 @@ vmu_vnode_update_incore_bounds(avl_tree_t *tree, vnode_t *vnode,
if (vnode->v_pages != NULL &&
(page = page_exists(vnode, ptob(index))) != NULL) {
- page_type = VMUSAGE_BOUND_INCORE;
+ if (PP_ISFREE(page))
+ page_type = VMUSAGE_BOUND_NOT_INCORE;
+ else
+ page_type = VMUSAGE_BOUND_INCORE;
if (page->p_szc > 0) {
pgcnt = page_get_pagecnt(page->p_szc);
pgshft = page_get_shift(page->p_szc);
@@ -1034,8 +1079,10 @@ vmu_vnode_update_incore_bounds(avl_tree_t *tree, vnode_t *vnode,
} else {
page_type = VMUSAGE_BOUND_NOT_INCORE;
}
+
if (bound_type == VMUSAGE_BOUND_UNKNOWN) {
next->vmb_type = page_type;
+ bound_type = page_type;
} else if (next->vmb_type != page_type) {
/*
* If current bound type does not match page
@@ -1304,6 +1351,12 @@ vmu_calculate_seg(vmu_entity_t *vmu_entities, struct seg *seg)
}
/*
+ * Pages on the free list aren't counted for the rss.
+ */
+ if (PP_ISFREE(page))
+ continue;
+
+ /*
* Assume anon structs with a refcnt
* of 1 are not COW shared, so there
* is no reason to track them per entity.
@@ -1320,8 +1373,7 @@ vmu_calculate_seg(vmu_entity_t *vmu_entities, struct seg *seg)
* Track COW anons per entity so
* they are not double counted.
*/
- if (vmu_find_insert_anon(entity->vme_anon_hash,
- (caddr_t)ap) == 0)
+ if (vmu_find_insert_anon(entity, ap) == 0)
continue;
result->vmu_rss_all += (pgcnt << PAGESHIFT);
@@ -1461,8 +1513,9 @@ vmu_calculate_proc(proc_t *p)
entities = tmp;
}
if (vmu_data.vmu_calc_flags &
- (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES | VMUSAGE_PROJECTS |
- VMUSAGE_ALL_PROJECTS | VMUSAGE_TASKS | VMUSAGE_ALL_TASKS |
+ (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES | VMUSAGE_A_ZONE |
+ VMUSAGE_PROJECTS | VMUSAGE_ALL_PROJECTS |
+ VMUSAGE_TASKS | VMUSAGE_ALL_TASKS |
VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS | VMUSAGE_EUSERS |
VMUSAGE_ALL_EUSERS)) {
ret = i_mod_hash_find_nosync(vmu_data.vmu_zones_hash,
@@ -1594,8 +1647,7 @@ vmu_free_extra()
mod_hash_destroy_hash(te->vme_vnode_hash);
if (te->vme_amp_hash != NULL)
mod_hash_destroy_hash(te->vme_amp_hash);
- if (te->vme_anon_hash != NULL)
- mod_hash_destroy_hash(te->vme_anon_hash);
+ VERIFY(avl_first(&te->vme_anon) == NULL);
kmem_free(te, sizeof (vmu_entity_t));
}
while (vmu_data.vmu_free_zones != NULL) {
@@ -1739,12 +1791,34 @@ vmu_cache_rele(vmu_cache_t *cache)
}
/*
+ * When new data is calculated, update the phys_mem rctl usage value in the
+ * zones.
+ */
+static void
+vmu_update_zone_rctls(vmu_cache_t *cache)
+{
+ vmusage_t *rp;
+ size_t i = 0;
+ zone_t *zp;
+
+ for (rp = cache->vmc_results; i < cache->vmc_nresults; rp++, i++) {
+ if (rp->vmu_type == VMUSAGE_ZONE &&
+ rp->vmu_zoneid != ALL_ZONES) {
+ if ((zp = zone_find_by_id(rp->vmu_zoneid)) != NULL) {
+ zp->zone_phys_mem = rp->vmu_rss_all;
+ zone_rele(zp);
+ }
+ }
+ }
+}
+
+/*
* Copy out the cached results to a caller. Inspect the callers flags
* and zone to determine which cached results should be copied.
*/
static int
vmu_copyout_results(vmu_cache_t *cache, vmusage_t *buf, size_t *nres,
- uint_t flags, int cpflg)
+ uint_t flags, id_t req_zone_id, int cpflg)
{
vmusage_t *result, *out_result;
vmusage_t dummy;
@@ -1763,7 +1837,7 @@ vmu_copyout_results(vmu_cache_t *cache, vmusage_t *buf, size_t *nres,
/* figure out what results the caller is interested in. */
if ((flags & VMUSAGE_SYSTEM) && curproc->p_zone == global_zone)
types |= VMUSAGE_SYSTEM;
- if (flags & (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES))
+ if (flags & (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES | VMUSAGE_A_ZONE))
types |= VMUSAGE_ZONE;
if (flags & (VMUSAGE_PROJECTS | VMUSAGE_ALL_PROJECTS |
VMUSAGE_COL_PROJECTS))
@@ -1826,26 +1900,33 @@ vmu_copyout_results(vmu_cache_t *cache, vmusage_t *buf, size_t *nres,
continue;
}
- /* Skip "other zone" results if not requested */
- if (result->vmu_zoneid != curproc->p_zone->zone_id) {
- if (result->vmu_type == VMUSAGE_ZONE &&
- (flags & VMUSAGE_ALL_ZONES) == 0)
- continue;
- if (result->vmu_type == VMUSAGE_PROJECTS &&
- (flags & (VMUSAGE_ALL_PROJECTS |
- VMUSAGE_COL_PROJECTS)) == 0)
- continue;
- if (result->vmu_type == VMUSAGE_TASKS &&
- (flags & VMUSAGE_ALL_TASKS) == 0)
- continue;
- if (result->vmu_type == VMUSAGE_RUSERS &&
- (flags & (VMUSAGE_ALL_RUSERS |
- VMUSAGE_COL_RUSERS)) == 0)
- continue;
- if (result->vmu_type == VMUSAGE_EUSERS &&
- (flags & (VMUSAGE_ALL_EUSERS |
- VMUSAGE_COL_EUSERS)) == 0)
+ if (result->vmu_type == VMUSAGE_ZONE &&
+ flags & VMUSAGE_A_ZONE) {
+ /* Skip non-requested zone results */
+ if (result->vmu_zoneid != req_zone_id)
continue;
+ } else {
+ /* Skip "other zone" results if not requested */
+ if (result->vmu_zoneid != curproc->p_zone->zone_id) {
+ if (result->vmu_type == VMUSAGE_ZONE &&
+ (flags & VMUSAGE_ALL_ZONES) == 0)
+ continue;
+ if (result->vmu_type == VMUSAGE_PROJECTS &&
+ (flags & (VMUSAGE_ALL_PROJECTS |
+ VMUSAGE_COL_PROJECTS)) == 0)
+ continue;
+ if (result->vmu_type == VMUSAGE_TASKS &&
+ (flags & VMUSAGE_ALL_TASKS) == 0)
+ continue;
+ if (result->vmu_type == VMUSAGE_RUSERS &&
+ (flags & (VMUSAGE_ALL_RUSERS |
+ VMUSAGE_COL_RUSERS)) == 0)
+ continue;
+ if (result->vmu_type == VMUSAGE_EUSERS &&
+ (flags & (VMUSAGE_ALL_EUSERS |
+ VMUSAGE_COL_EUSERS)) == 0)
+ continue;
+ }
}
count++;
if (out_result != NULL) {
@@ -1901,10 +1982,12 @@ vm_getusage(uint_t flags, time_t age, vmusage_t *buf, size_t *nres, int cpflg)
int cacherecent = 0;
hrtime_t now;
uint_t flags_orig;
+ id_t req_zone_id;
/*
* Non-global zones cannot request system wide and/or collated
- * results, or the system result, so munge the flags accordingly.
+ * results, or the system result, or usage of another zone, so munge
+ * the flags accordingly.
*/
flags_orig = flags;
if (curproc->p_zone != global_zone) {
@@ -1924,6 +2007,10 @@ vm_getusage(uint_t flags, time_t age, vmusage_t *buf, size_t *nres, int cpflg)
flags &= ~VMUSAGE_SYSTEM;
flags |= VMUSAGE_ZONE;
}
+ if (flags & VMUSAGE_A_ZONE) {
+ flags &= ~VMUSAGE_A_ZONE;
+ flags |= VMUSAGE_ZONE;
+ }
}
/* Check for unknown flags */
@@ -1934,6 +2021,21 @@ vm_getusage(uint_t flags, time_t age, vmusage_t *buf, size_t *nres, int cpflg)
if ((flags & VMUSAGE_MASK) == 0)
return (set_errno(EINVAL));
+ /* If requesting results for a specific zone, get the zone ID */
+ if (flags & VMUSAGE_A_ZONE) {
+ size_t bufsize;
+ vmusage_t zreq;
+
+ if (ddi_copyin((caddr_t)nres, &bufsize, sizeof (size_t), cpflg))
+ return (set_errno(EFAULT));
+ /* Requested zone ID is passed in buf, so 0 len not allowed */
+ if (bufsize == 0)
+ return (set_errno(EINVAL));
+ if (ddi_copyin((caddr_t)buf, &zreq, sizeof (vmusage_t), cpflg))
+ return (set_errno(EFAULT));
+ req_zone_id = zreq.vmu_id;
+ }
+
mutex_enter(&vmu_data.vmu_lock);
now = gethrtime();
@@ -1953,7 +2055,7 @@ start:
mutex_exit(&vmu_data.vmu_lock);
ret = vmu_copyout_results(cache, buf, nres, flags_orig,
- cpflg);
+ req_zone_id, cpflg);
mutex_enter(&vmu_data.vmu_lock);
vmu_cache_rele(cache);
if (vmu_data.vmu_pending_waiters > 0)
@@ -2009,8 +2111,11 @@ start:
mutex_exit(&vmu_data.vmu_lock);
+ /* update zone's phys. mem. rctl usage */
+ vmu_update_zone_rctls(cache);
/* copy cache */
- ret = vmu_copyout_results(cache, buf, nres, flags_orig, cpflg);
+ ret = vmu_copyout_results(cache, buf, nres, flags_orig,
+ req_zone_id, cpflg);
mutex_enter(&vmu_data.vmu_lock);
vmu_cache_rele(cache);
mutex_exit(&vmu_data.vmu_lock);
@@ -2030,3 +2135,185 @@ start:
vmu_data.vmu_pending_waiters--;
goto start;
}
+
+#if defined(__x86)
+/*
+ * Attempt to invalidate all of the pages in the mapping for the given process.
+ */
+static void
+map_inval(proc_t *p, struct seg *seg, caddr_t addr, size_t size)
+{
+ page_t *pp;
+ size_t psize;
+ u_offset_t off;
+ caddr_t eaddr;
+ struct vnode *vp;
+ struct segvn_data *svd;
+ struct hat *victim_hat;
+
+ ASSERT((addr + size) <= (seg->s_base + seg->s_size));
+
+ victim_hat = p->p_as->a_hat;
+ svd = (struct segvn_data *)seg->s_data;
+ vp = svd->vp;
+ psize = page_get_pagesize(seg->s_szc);
+
+ off = svd->offset + (uintptr_t)(addr - seg->s_base);
+
+ for (eaddr = addr + size; addr < eaddr; addr += psize, off += psize) {
+ pp = page_lookup_nowait(vp, off, SE_SHARED);
+
+ if (pp != NULL) {
+ /* following logic based on pvn_getdirty() */
+
+ if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) {
+ page_unlock(pp);
+ continue;
+ }
+
+ page_io_lock(pp);
+ hat_page_inval(pp, 0, victim_hat);
+ page_io_unlock(pp);
+
+ /*
+ * For B_INVALCURONLY-style handling we let
+ * page_release call VN_DISPOSE if no one else is using
+ * the page.
+ *
+ * A hat_ismod() check would be useless because:
+ * (1) we are not be holding SE_EXCL lock
+ * (2) we've not unloaded _all_ translations
+ *
+ * Let page_release() do the heavy-lifting.
+ */
+ (void) page_release(pp, 1);
+ }
+ }
+}
+
+/*
+ * vm_map_inval()
+ *
+ * Invalidate as many pages as possible within the given mapping for the given
+ * process. addr is expected to be the base address of the mapping and size is
+ * the length of the mapping. In some cases a mapping will encompass an
+ * entire segment, but at least for anon or stack mappings, these will be
+ * regions within a single large segment. Thus, the invalidation is oriented
+ * around a single mapping and not an entire segment.
+ *
+ * SPARC sfmmu hat does not support HAT_CURPROC_PGUNLOAD-style handling so
+ * this code is only applicable to x86.
+ */
+int
+vm_map_inval(pid_t pid, caddr_t addr, size_t size)
+{
+ int ret;
+ int error = 0;
+ proc_t *p; /* target proc */
+ struct as *as; /* target proc's address space */
+ struct seg *seg; /* working segment */
+
+ if (curproc->p_zone != global_zone || crgetruid(curproc->p_cred) != 0)
+ return (set_errno(EPERM));
+
+ /* If not a valid mapping address, return an error */
+ if ((caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK) != addr)
+ return (set_errno(EINVAL));
+
+again:
+ mutex_enter(&pidlock);
+ p = prfind(pid);
+ if (p == NULL) {
+ mutex_exit(&pidlock);
+ return (set_errno(ESRCH));
+ }
+
+ mutex_enter(&p->p_lock);
+ mutex_exit(&pidlock);
+
+ if (panicstr != NULL) {
+ mutex_exit(&p->p_lock);
+ return (0);
+ }
+
+ as = p->p_as;
+
+ /*
+ * Try to set P_PR_LOCK - prevents process "changing shape"
+ * - blocks fork
+ * - blocks sigkill
+ * - cannot be a system proc
+ * - must be fully created proc
+ */
+ ret = sprtrylock_proc(p);
+ if (ret == -1) {
+ /* Process in invalid state */
+ mutex_exit(&p->p_lock);
+ return (set_errno(ESRCH));
+ }
+
+ if (ret == 1) {
+ /*
+ * P_PR_LOCK is already set. Wait and try again. This also
+ * drops p_lock so p may no longer be valid since the proc may
+ * have exited.
+ */
+ sprwaitlock_proc(p);
+ goto again;
+ }
+
+ /* P_PR_LOCK is now set */
+ mutex_exit(&p->p_lock);
+
+ AS_LOCK_ENTER(as, RW_READER);
+ if ((seg = as_segat(as, addr)) == NULL) {
+ AS_LOCK_EXIT(as);
+ mutex_enter(&p->p_lock);
+ sprunlock(p);
+ return (set_errno(ENOMEM));
+ }
+
+ /*
+ * The invalidation behavior only makes sense for vnode-backed segments.
+ */
+ if (seg->s_ops != &segvn_ops) {
+ AS_LOCK_EXIT(as);
+ mutex_enter(&p->p_lock);
+ sprunlock(p);
+ return (0);
+ }
+
+ /*
+ * If the mapping is out of bounds of the segement return an error.
+ */
+ if ((addr + size) > (seg->s_base + seg->s_size)) {
+ AS_LOCK_EXIT(as);
+ mutex_enter(&p->p_lock);
+ sprunlock(p);
+ return (set_errno(EINVAL));
+ }
+
+ /*
+ * Don't use MS_INVALCURPROC flag here since that would eventually
+ * initiate hat invalidation based on curthread. Since we're doing this
+ * on behalf of a different process, that would erroneously invalidate
+ * our own process mappings.
+ */
+ error = SEGOP_SYNC(seg, addr, size, 0, (uint_t)MS_ASYNC);
+ if (error == 0) {
+ /*
+ * Since we didn't invalidate during the sync above, we now
+ * try to invalidate all of the pages in the mapping.
+ */
+ map_inval(p, seg, addr, size);
+ }
+ AS_LOCK_EXIT(as);
+
+ mutex_enter(&p->p_lock);
+ sprunlock(p);
+
+ if (error)
+ (void) set_errno(error);
+ return (error);
+}
+#endif
diff --git a/usr/src/uts/i86pc/Makefile.files b/usr/src/uts/i86pc/Makefile.files
index 9829939b16..ef7a36d09c 100644
--- a/usr/src/uts/i86pc/Makefile.files
+++ b/usr/src/uts/i86pc/Makefile.files
@@ -23,6 +23,7 @@
# Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
#
# Copyright (c) 2010, Intel Corporation.
+# Copyright 2016 Joyent, Inc.
#
# This Makefile defines file modules in the directory uts/i86pc
# and its children. These are the source files which are i86pc
@@ -40,6 +41,8 @@ CORE_OBJS += \
cmi.o \
cmi_hw.o \
cms.o \
+ comm_page.o \
+ comm_page_util.o \
confunix.o \
cpu_idle.o \
cpuid.o \
diff --git a/usr/src/uts/i86pc/dboot/dboot_startkern.c b/usr/src/uts/i86pc/dboot/dboot_startkern.c
index 7fc43e27bd..6abb7c6349 100644
--- a/usr/src/uts/i86pc/dboot/dboot_startkern.c
+++ b/usr/src/uts/i86pc/dboot/dboot_startkern.c
@@ -63,6 +63,15 @@ extern int have_cpuid(void);
#define SHA1_ASCII_LENGTH (SHA1_DIGEST_LENGTH * 2)
/*
+ * Region of memory that may be corrupted by external actors. This can go away
+ * once the firmware bug RICHMOND-16 is fixed and all systems with the bug are
+ * upgraded.
+ */
+#define CORRUPT_REGION_START 0xc700000
+#define CORRUPT_REGION_SIZE 0x100000
+#define CORRUPT_REGION_END (CORRUPT_REGION_START + CORRUPT_REGION_SIZE)
+
+/*
* This file contains code that runs to transition us from either a multiboot
* compliant loader (32 bit non-paging) or a XPV domain loader to
* regular kernel execution. Its task is to setup the kernel memory image
@@ -1155,6 +1164,38 @@ init_mem_alloc(void)
case 1:
if (end > max_mem)
max_mem = end;
+
+ /*
+ * Well, this is sad. One some systems, there
+ * is a region of memory that can be corrupted
+ * until some number of seconds after we have
+ * booted. And the BIOS doesn't tell us that
+ * this memory is unsafe to use. And we don't
+ * know how long it's dangerous. So we'll
+ * chop out this range from any memory list
+ * that would otherwise be usable. Note that
+ * any system of this type will give us the
+ * new-style (0x40) memlist, so we need not
+ * fix up the other path below.
+ */
+ if (start < CORRUPT_REGION_START &&
+ end > CORRUPT_REGION_START) {
+ memlists[memlists_used].addr = start;
+ memlists[memlists_used].size =
+ CORRUPT_REGION_START - start;
+ ++memlists_used;
+ if (end > CORRUPT_REGION_END)
+ start = CORRUPT_REGION_END;
+ else
+ continue;
+ }
+ if (start >= CORRUPT_REGION_START &&
+ start < CORRUPT_REGION_END) {
+ if (end <= CORRUPT_REGION_END)
+ continue;
+ start = CORRUPT_REGION_END;
+ }
+
memlists[memlists_used].addr = start;
memlists[memlists_used].size = end - start;
++memlists_used;
diff --git a/usr/src/uts/i86pc/io/ppm/acpisleep.c b/usr/src/uts/i86pc/io/ppm/acpisleep.c
index 78328170e6..6b94e50909 100644
--- a/usr/src/uts/i86pc/io/ppm/acpisleep.c
+++ b/usr/src/uts/i86pc/io/ppm/acpisleep.c
@@ -22,6 +22,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -56,6 +57,19 @@
int acpi_rtc_wake = 0x0; /* wake in N seconds */
+/*
+ * Execute optional ACPI methods for suspend/resume.
+ * The value can be ACPI_EXECUTE_GTS and/or ACPI_EXECUTE_BFS.
+ * Global so it can be set in /etc/system.
+ * From usr/src/uts/intel/io/acpica/changes.txt:
+ * It has been seen on some systems where the execution of these
+ * methods causes errors and also prevents the machine from entering S5.
+ * It is therefore suggested that host operating systems do not execute
+ * these methods by default. In the future, perhaps these methods can be
+ * optionally executed based on the age of the system...
+ */
+int acpi_sleep_flags = ACPI_NO_OPTIONAL_METHODS;
+
#if 0 /* debug */
static uint8_t branchbuf[64 * 1024]; /* for the HDT branch trace stuff */
#endif /* debug */
@@ -142,8 +156,9 @@ acpi_enter_sleepstate(s3a_t *s3ap)
* Tell the hardware to sleep.
*/
PT(PT_SXE);
- PMD(PMD_SX, ("Calling AcpiEnterSleepState(%d) ...\n", Sx))
- if (AcpiEnterSleepState(Sx) != AE_OK) {
+ PMD(PMD_SX, ("Calling AcpiEnterSleepState(%d, %d) ...\n", Sx,
+ acpi_sleep_flags))
+ if (AcpiEnterSleepState(Sx, acpi_sleep_flags) != AE_OK) {
PT(PT_SXE_FAIL);
PMD(PMD_SX, ("... failed!\n"))
}
@@ -163,6 +178,11 @@ acpi_exit_sleepstate(s3a_t *s3ap)
PMD(PMD_SX, ("!We woke up!\n"))
PT(PT_LSS);
+ if (AcpiLeaveSleepStatePrep(Sx, acpi_sleep_flags) != AE_OK) {
+ PT(PT_LSS_FAIL);
+ PMD(PMD_SX, ("Problem with LeaveSleepState!\n"))
+ }
+
if (AcpiLeaveSleepState(Sx) != AE_OK) {
PT(PT_LSS_FAIL);
PMD(PMD_SX, ("Problem with LeaveSleepState!\n"))
diff --git a/usr/src/uts/i86pc/io/psm/psm_common.c b/usr/src/uts/i86pc/io/psm/psm_common.c
index 7a3dd8a733..9c17d85228 100644
--- a/usr/src/uts/i86pc/io/psm/psm_common.c
+++ b/usr/src/uts/i86pc/io/psm/psm_common.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -979,7 +980,7 @@ acpi_poweroff(void)
return (1);
}
ACPI_DISABLE_IRQS();
- status = AcpiEnterSleepState(5);
+ status = AcpiEnterSleepState(5, ACPI_NO_OPTIONAL_METHODS);
ACPI_ENABLE_IRQS();
/* we should be off; if we get here it's an error */
diff --git a/usr/src/uts/i86pc/ml/comm_page.s b/usr/src/uts/i86pc/ml/comm_page.s
new file mode 100644
index 0000000000..7ff803ea93
--- /dev/null
+++ b/usr/src/uts/i86pc/ml/comm_page.s
@@ -0,0 +1,88 @@
+
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#include <sys/asm_linkage.h>
+#include <sys/asm_misc.h>
+#include <sys/param.h>
+#include <sys/comm_page.h>
+#include <sys/tsc.h>
+
+#if defined(__lint)
+
+hrtime_t tsc_last;
+hrtime_t tsc_resume_cap;
+hrtime_t tsc_hrtime_base;
+uint32_t tsc_max_delta;
+volatile uint32_t hres_lock;
+uint32_t tsc_type;
+uint32_t nsec_scale;
+int64_t hrestime_adj;
+hrtime_t hres_last_tick;
+uint32_t tsc_ncpu;
+volatile timestruc_t hrestime;
+hrtime_t tsc_sync_tick_delta[NCPU];
+
+comm_page_t comm_page;
+
+#else /* defined(__lint) */
+
+#include "assym.h"
+
+/*
+ * x86 Comm Page
+ *
+ * This is the definition for the comm page on x86. The purpose of this struct
+ * is to consolidate certain pieces of kernel state into one contiguous section
+ * of memory in order for it to be exposed (read-only) to userspace. The
+ * struct contents are defined by hand so that member variables will maintain
+ * their original symbols for use throughout the rest of the kernel. This
+ * layout must exactly match the C definition of comm_page_t.
+ * See: "uts/i86pc/sys/comm_page.h"
+ */
+
+ .data
+ DGDEF3(comm_page, COMM_PAGE_S_SIZE, 4096)
+ DGDEF2(tsc_last, 8)
+ .fill 1, 8, 0
+ DGDEF2(tsc_hrtime_base, 8)
+ .fill 1, 8, 0
+ DGDEF2(tsc_resume_cap, 8)
+ .fill 1, 8, 0
+ DGDEF2(tsc_type, 4);
+ .fill 1, 4, _CONST(TSC_RDTSC_CPUID)
+ DGDEF2(tsc_max_delta, 4);
+ .fill 1, 4, 0
+ DGDEF2(hres_lock, 4);
+ .fill 1, 4, 0
+ DGDEF2(nsec_scale, 4);
+ .fill 1, 4, 0
+ DGDEF2(hrestime_adj, 8)
+ .fill 1, 8, 0
+ DGDEF2(hres_last_tick, 8)
+ .fill 1, 8, 0
+ DGDEF2(tsc_ncpu, 4)
+ .fill 1, 4, 0
+ /* _cp_pad */
+ .fill 1, 4, 0
+ DGDEF2(hrestime, _MUL(2, 8))
+ .fill 2, 8, 0
+ DGDEF2(tsc_sync_tick_delta, _MUL(NCPU, 8))
+ .fill _CONST(NCPU), 8, 0
+
+ /* pad out the rest of the page from the struct end */
+ .fill _CONST(COMM_PAGE_SIZE - COMM_PAGE_S_SIZE), 1, 0
+
+#endif /* defined(__lint) */
diff --git a/usr/src/uts/i86pc/ml/offsets.in b/usr/src/uts/i86pc/ml/offsets.in
index 721d32fa3a..a1f83d3cf8 100644
--- a/usr/src/uts/i86pc/ml/offsets.in
+++ b/usr/src/uts/i86pc/ml/offsets.in
@@ -1,6 +1,7 @@
\
\ Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
\ Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved.
+\ Copyright 2016 Joyent, Inc.
\
\ CDDL HEADER START
\
@@ -61,6 +62,7 @@
#include <sys/brand.h>
#include <sys/fastboot.h>
#include <sys/cpr_wakecode.h>
+#include <sys/comm_page.h>
proc PROCSIZE
p_link
@@ -150,6 +152,8 @@ _klwp
lwp_thread
lwp_procp
lwp_brand
+ lwp_brand_syscall
+ lwp_brand_syscall_fast
lwp_eosys
lwp_regs
lwp_arg
@@ -467,3 +471,5 @@ wc_cpu WC_CPU_SIZE
wc_wakecode
wc_cpu
+
+comm_page_s COMM_PAGE_S_SIZE
diff --git a/usr/src/uts/i86pc/ml/syscall_asm.s b/usr/src/uts/i86pc/ml/syscall_asm.s
index 61ef4ac6c3..68181be28a 100644
--- a/usr/src/uts/i86pc/ml/syscall_asm.s
+++ b/usr/src/uts/i86pc/ml/syscall_asm.s
@@ -631,6 +631,36 @@ _sysenter_done:
sysexit
SET_SIZE(sys_sysenter)
SET_SIZE(brand_sys_sysenter)
+#endif /* __lint */
+
+#if defined(__lint)
+/*
+ * System call via an int80. This entry point is only used by the Linux
+ * application environment. Unlike the sysenter path, there is no default
+ * action to take if no callback is registered for this process.
+ */
+void
+sys_int80()
+{}
+
+#else /* __lint */
+
+ ENTRY_NP(brand_sys_int80)
+ BRAND_CALLBACK(BRAND_CB_INT80)
+
+ ALTENTRY(sys_int80)
+ /*
+ * We hit an int80, but this process isn't of a brand with an int80
+ * handler. Bad process! Make it look as if the INT failed.
+ * Modify %eip to point before the INT, push the expected error
+ * code and fake a GP fault.
+ *
+ */
+ subl $2, (%esp) /* int insn 2-bytes */
+ pushl $_CONST(_MUL(T_INT80, GATE_DESC_SIZE) + 2)
+ jmp gptrap / GP fault
+ SET_SIZE(sys_int80)
+ SET_SIZE(brand_sys_int80)
/*
* Declare a uintptr_t which covers the entire pc range of syscall
diff --git a/usr/src/uts/i86pc/ml/syscall_asm_amd64.s b/usr/src/uts/i86pc/ml/syscall_asm_amd64.s
index f26468c8cc..bc901e3e42 100644
--- a/usr/src/uts/i86pc/ml/syscall_asm_amd64.s
+++ b/usr/src/uts/i86pc/ml/syscall_asm_amd64.s
@@ -503,8 +503,25 @@ noprod_sys_syscall:
movq T_LWP(%r15), %r14
ASSERT_NO_RUPDATE_PENDING(%r14)
+
ENABLE_INTR_FLAGS
+ /*
+ * If our LWP has a branded syscall_fast handler, execute it. A return
+ * code of zero indicates that the handler completely processed the syscall
+ * and we can return directly to userspace.
+ */
+ movq LWP_BRAND_SYSCALL_FAST(%r14), %rdi
+ testq %rdi, %rdi
+ jz _syscall_no_brand_fast
+ call *%rdi
+ testl %eax, %eax
+ jnz _syscall_no_brand_fast
+ incq LWP_RU_SYSC(%r14)
+ incq %gs:CPU_STATS_SYS_SYSCALL
+ jmp _sys_rtt
+
+_syscall_no_brand_fast:
MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
movl REGOFF_RAX(%rsp), %eax /* (%rax damaged by mstate call) */
@@ -516,6 +533,28 @@ noprod_sys_syscall:
incq %gs:CPU_STATS_SYS_SYSCALL
+ /*
+ * If our LWP has an alternate system call handler, run that instead of
+ * the regular system call path.
+ */
+ movq LWP_BRAND_SYSCALL(%r14), %rdi
+ testq %rdi, %rdi
+ jz _syscall_no_brand
+
+ pushq %rax
+ subq $8, %rsp /* align stack for call to C */
+ call *%rdi
+ addq $8, %rsp
+
+ /*
+ * If the alternate handler returns 0, we skip straight to the return to
+ * usermode. Otherwise, we resume regular system call processing.
+ */
+ testl %eax, %eax
+ popq %rax
+ jz _syscall_after_brand
+
+_syscall_no_brand:
movw %ax, T_SYSNUM(%r15)
movzbl T_PRE_SYS(%r15), %ebx
ORL_SYSCALLTRACE(%ebx)
@@ -550,6 +589,8 @@ _syscall_invoke:
shrq $32, %r13 /* upper 32-bits into %edx */
movl %r12d, %r12d /* lower 32-bits into %eax */
5:
+
+_syscall_after_brand:
/*
* Optimistically assume that there's no post-syscall
* work to do. (This is to avoid having to call syscall_mstate()
@@ -787,6 +828,22 @@ _syscall32_save:
ENABLE_INTR_FLAGS
+ /*
+ * If our LWP has a branded syscall_fast handler, execute it. A return
+ * code of zero indicates that the handler completely processed the syscall
+ * and we can return directly to userspace.
+ */
+ movq LWP_BRAND_SYSCALL_FAST(%r14), %rdi
+ testq %rdi, %rdi
+ jz _syscall32_no_brand_fast
+ call *%rdi
+ testl %eax, %eax
+ jnz _syscall32_no_brand_fast
+ incq LWP_RU_SYSC(%r14)
+ incq %gs:CPU_STATS_SYS_SYSCALL
+ jmp _sys_rtt
+
+_syscall32_no_brand_fast:
MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
movl REGOFF_RAX(%rsp), %eax /* (%rax damaged by mstate call) */
@@ -795,11 +852,37 @@ _syscall32_save:
incq %gs:CPU_STATS_SYS_SYSCALL
/*
+ * If our lwp has an alternate system call handler, run that instead
+ * of the regular system call path.
+ */
+ movq LWP_BRAND_SYSCALL(%r14), %rax
+ testq %rax, %rax
+ jz _syscall32_no_brand
+
+ movb $LWP_SYS, LWP_STATE(%r14)
+ call *%rax
+
+ /*
+ * If the alternate handler returns 0, we skip straight to the return
+ * to usermode. Otherwise, we resume regular system call processing.
+ */
+ testl %eax, %eax
+ jz _syscall32_after_brand
+
+_syscall32_no_brand:
+ /*
* Make some space for MAXSYSARGS (currently 8) 32-bit args placed
* into 64-bit (long) arg slots, maintaining 16 byte alignment. Or
* more succinctly:
*
* SA(MAXSYSARGS * sizeof (long)) == 64
+ *
+ * Note, this space is used both to copy in the arguments from user
+ * land, but also to as part of the old UNIX style syscall_ap() method.
+ * syscall_entry expects that we do not change the values of this space
+ * that we give it. However, this means that when we end up in the more
+ * recent model of passing the arguments based on the calling
+ * conventions, we'll need to save an additional 16 bytes of stack.
*/
#define SYS_DROP 64 /* drop for args */
subq $SYS_DROP, %rsp
@@ -827,12 +910,16 @@ _syscall32_save:
*/
movq %rax, %rbx
- movl 0(%rsp), %edi
- movl 8(%rsp), %esi
- movl 0x10(%rsp), %edx
- movl 0x18(%rsp), %ecx
- movl 0x20(%rsp), %r8d
- movl 0x28(%rsp), %r9d
+ movl 0x0(%rsp), %edi /* arg0 */
+ movl 0x8(%rsp), %esi /* arg1 */
+ movl 0x10(%rsp), %edx /* arg2 */
+ movl 0x38(%rsp), %eax /* arg7 load */
+ movl 0x18(%rsp), %ecx /* arg3 */
+ pushq %rax /* arg7 saved to stack */
+ movl 0x28(%rsp), %r8d /* arg4 */
+ movl 0x38(%rsp), %eax /* arg6 load */
+ movl 0x30(%rsp), %r9d /* arg5 */
+ pushq %rax /* arg6 saved to stack */
call *SY_CALLC(%rbx)
@@ -850,6 +937,8 @@ _syscall32_save:
shrq $32, %r13 /* upper 32-bits into %edx */
movl %eax, %r12d /* lower 32-bits into %eax */
+_syscall32_after_brand:
+
/*
* Optimistically assume that there's no post-syscall
* work to do. (This is to avoid having to call syscall_mstate()
@@ -1079,15 +1168,20 @@ sys_sysenter()
/*
* Fetch the arguments copied onto the kernel stack and put
* them in the right registers to invoke a C-style syscall handler.
- * %rax contains the handler address.
+ * %rax contains the handler address. For the last two arguments, we
+ * push them onto the stack -- we can't clobber the old arguments.
*/
movq %rax, %rbx
- movl 0(%rsp), %edi
- movl 8(%rsp), %esi
- movl 0x10(%rsp), %edx
- movl 0x18(%rsp), %ecx
- movl 0x20(%rsp), %r8d
- movl 0x28(%rsp), %r9d
+ movl 0x0(%rsp), %edi /* arg0 */
+ movl 0x8(%rsp), %esi /* arg1 */
+ movl 0x10(%rsp), %edx /* arg2 */
+ movl 0x38(%rsp), %eax /* arg7 load */
+ movl 0x18(%rsp), %ecx /* arg3 */
+ pushq %rax /* arg7 saved to stack */
+ movl 0x28(%rsp), %r8d /* arg4 */
+ movl 0x38(%rsp), %eax /* arg6 load */
+ movl 0x30(%rsp), %r9d /* arg5 */
+ pushq %rax /* arg6 saved to stack */
call *SY_CALLC(%rbx)
@@ -1159,6 +1253,74 @@ sys_sysenter()
SET_SIZE(brand_sys_sysenter)
#endif /* __lint */
+
+#if defined(__lint)
+/*
+ * System call via an int80. This entry point is only used by the Linux
+ * application environment. Unlike the other entry points, there is no
+ * default action to take if no callback is registered for this process.
+ */
+void
+sys_int80()
+{}
+
+#else /* __lint */
+
+ ENTRY_NP(brand_sys_int80)
+ SWAPGS /* kernel gsbase */
+ XPV_TRAP_POP
+ call smap_enable
+
+ /*
+ * We first attempt to call the "b_int80" handler from the "struct
+ * brand_mach_ops" for this brand. If no handler function is installed
+ * for this brand, the BRAND_CALLBACK() macro returns here and we
+ * check the lwp for a "lwp_brand_syscall" handler.
+ */
+ BRAND_CALLBACK(BRAND_CB_INT80, BRAND_URET_FROM_INTR_STACK())
+
+ /*
+ * Check to see if this lwp provides "lwp_brand_syscall". If so, we
+ * will route this int80 through the regular system call handling path.
+ */
+ movq %r15, %gs:CPU_RTMP_R15
+ movq %gs:CPU_THREAD, %r15
+ movq T_LWP(%r15), %r15
+ movq LWP_BRAND_SYSCALL(%r15), %r15
+ testq %r15, %r15
+ movq %gs:CPU_RTMP_R15, %r15
+ jnz nopop_syscall_int
+
+ /*
+ * The brand provided neither a "b_int80", nor a "lwp_brand_syscall"
+ * function, and has thus opted out of handling this trap.
+ */
+ SWAPGS /* user gsbase */
+ jmp nopop_int80
+
+ ENTRY_NP(sys_int80)
+ /*
+ * We hit an int80, but this process isn't of a brand with an int80
+ * handler. Bad process! Make it look as if the INT failed.
+ * Modify %rip to point before the INT, push the expected error
+ * code and fake a GP fault. Note on 64-bit hypervisor we need
+ * to undo the XPV_TRAP_POP and push rcx and r11 back on the stack
+ * because gptrap will pop them again with its own XPV_TRAP_POP.
+ */
+ XPV_TRAP_POP
+ call smap_enable
+nopop_int80:
+ subq $2, (%rsp) /* int insn 2-bytes */
+ pushq $_CONST(_MUL(T_INT80, GATE_DESC_SIZE) + 2)
+#if defined(__xpv)
+ push %r11
+ push %rcx
+#endif
+ jmp gptrap / GP fault
+ SET_SIZE(sys_int80)
+ SET_SIZE(brand_sys_int80)
+#endif /* __lint */
+
/*
* This is the destination of the "int $T_SYSCALLINT" interrupt gate, used by
diff --git a/usr/src/uts/i86pc/os/comm_page_util.c b/usr/src/uts/i86pc/os/comm_page_util.c
new file mode 100644
index 0000000000..1c8c9f8afd
--- /dev/null
+++ b/usr/src/uts/i86pc/os/comm_page_util.c
@@ -0,0 +1,62 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+
+#include <sys/types.h>
+#include <sys/thread.h>
+#include <sys/proc.h>
+#include <sys/mman.h>
+#include <sys/vmsystm.h>
+#include <vm/as.h>
+#include <vm/seg_umap.h>
+
+#if defined(__x86) && !defined(__xpv)
+#include <sys/comm_page.h>
+#endif /* defined(__x86) && !defined(__xpv) */
+
+/*
+ * Map in the comm page.
+ *
+ * The contents of the comm page are only defined on non-xpv x86 at this time.
+ * Furthermore, the data is only valid in userspace (32-bit or 64-bit) when
+ * mapped from a 64-bit kernel.
+ * See: "uts/i86pc/sys/comm_page.h"
+ */
+caddr_t
+comm_page_mapin()
+{
+#if defined(__amd64) && !defined(__xpv)
+ proc_t *p = curproc;
+ caddr_t addr = (caddr_t)COMM_PAGE_ALIGN;
+ size_t len = COMM_PAGE_SIZE;
+ uint_t prot = PROT_USER | PROT_READ;
+ segumap_crargs_t suarg;
+
+ map_addr(&addr, len, (offset_t)0, 1, MAP_ALIGN);
+ if (addr == NULL || valid_usr_range(addr, len, prot, p->p_as,
+ p->p_as->a_userlimit) != RANGE_OKAY) {
+ return (NULL);
+ }
+
+ suarg.kaddr = (caddr_t)&comm_page;
+ suarg.prot = suarg.maxprot = prot;
+ if (as_map(p->p_as, addr, len, segumap_create, &suarg) != 0) {
+ return (NULL);
+ }
+ return (addr);
+#else /* defined(__amd64) && !defined(__xpv) */
+ return (NULL);
+#endif /* defined(__amd64) && !defined(__xpv) */
+}
diff --git a/usr/src/uts/i86pc/os/cpr_impl.c b/usr/src/uts/i86pc/os/cpr_impl.c
index 91fb583a01..f173a1dc57 100644
--- a/usr/src/uts/i86pc/os/cpr_impl.c
+++ b/usr/src/uts/i86pc/os/cpr_impl.c
@@ -753,6 +753,20 @@ i_cpr_is_supported(int sleeptype)
if (sleeptype != CPR_TORAM)
return (0);
+ /*
+ * Unfortunately, the x86 resume code was never implemented for GAS.
+ * The only obvious problem is that a trick necessary to appease Sun
+ * Studio does the wrong thing for GAS. Doubley unfortunate is that
+ * the condition used to detect GAS is incorrect, so we do in fact
+ * compile the Studio path, it just immediately fails in resume.
+ *
+ * Given that, if we were built using GCC, never allow CPR to be
+ * attempted.
+ */
+#ifdef __GNUC__
+ return (0);
+#endif
+
/*
* The next statement tests if a specific platform has turned off
* cpr support.
diff --git a/usr/src/uts/i86pc/os/cpuid.c b/usr/src/uts/i86pc/os/cpuid.c
index 44e475f328..027ed29c3d 100644
--- a/usr/src/uts/i86pc/os/cpuid.c
+++ b/usr/src/uts/i86pc/os/cpuid.c
@@ -32,7 +32,7 @@
* Portions Copyright 2009 Advanced Micro Devices, Inc.
*/
/*
- * Copyright (c) 2015, Joyent, Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
*/
/*
* Various routines to handle identification
@@ -57,6 +57,8 @@
#include <sys/auxv_386.h>
#include <sys/memnode.h>
#include <sys/pci_cfgspace.h>
+#include <sys/comm_page.h>
+#include <sys/tsc.h>
#ifdef __xpv
#include <sys/hypervisor.h>
@@ -171,7 +173,9 @@ static char *x86_feature_names[NUM_X86_FEATURES] = {
"bmi2",
"fma",
"smep",
- "smap"
+ "smap",
+ "adx",
+ "rdseed"
};
boolean_t
@@ -1264,6 +1268,11 @@ cpuid_pass1(cpu_t *cpu, uchar_t *featureset)
disable_smap == 0)
add_x86_feature(featureset, X86FSET_SMAP);
#endif
+ if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_RDSEED)
+ add_x86_feature(featureset, X86FSET_RDSEED);
+
+ if (ecp->cp_ebx & CPUID_INTC_EBX_7_0_ADX)
+ add_x86_feature(featureset, X86FSET_ADX);
}
/*
@@ -2739,6 +2748,10 @@ cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out)
*ebx &= ~CPUID_INTC_EBX_7_0_BMI2;
if (!is_x86_feature(x86_featureset, X86FSET_AVX2))
*ebx &= ~CPUID_INTC_EBX_7_0_AVX2;
+ if (!is_x86_feature(x86_featureset, X86FSET_RDSEED))
+ *ebx &= ~CPUID_INTC_EBX_7_0_RDSEED;
+ if (!is_x86_feature(x86_featureset, X86FSET_ADX))
+ *ebx &= ~CPUID_INTC_EBX_7_0_ADX;
/*
* [no explicit support required beyond x87 fp context]
@@ -2808,8 +2821,20 @@ cpuid_pass4(cpu_t *cpu, uint_t *hwcap_out)
if (*ecx & CPUID_INTC_ECX_RDRAND)
hwcap_flags_2 |= AV_386_2_RDRAND;
+ if (*ebx & CPUID_INTC_EBX_7_0_ADX)
+ hwcap_flags_2 |= AV_386_2_ADX;
+ if (*ebx & CPUID_INTC_EBX_7_0_RDSEED)
+ hwcap_flags_2 |= AV_386_2_RDSEED;
+
+ }
+
+ /* Detect systems with a potential CPUID limit */
+ if (cpi->cpi_vendor == X86_VENDOR_Intel && cpi->cpi_maxeax < 4) {
+ cmn_err(CE_NOTE, "CPUID limit detected, "
+ "see the CPUID(7D) man page for details\n");
}
+
if (cpi->cpi_xmaxeax < 0x80000001)
goto pass4_done;
@@ -4591,27 +4616,30 @@ patch_tsc_read(int flag)
size_t cnt;
switch (flag) {
- case X86_NO_TSC:
+ case TSC_NONE:
cnt = &_no_rdtsc_end - &_no_rdtsc_start;
(void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt);
break;
- case X86_HAVE_TSCP:
- cnt = &_tscp_end - &_tscp_start;
- (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt);
- break;
- case X86_TSC_MFENCE:
+ case TSC_RDTSC_MFENCE:
cnt = &_tsc_mfence_end - &_tsc_mfence_start;
(void) memcpy((void *)tsc_read,
(void *)&_tsc_mfence_start, cnt);
break;
- case X86_TSC_LFENCE:
+ case TSC_RDTSC_LFENCE:
cnt = &_tsc_lfence_end - &_tsc_lfence_start;
(void) memcpy((void *)tsc_read,
(void *)&_tsc_lfence_start, cnt);
break;
+ case TSC_TSCP:
+ cnt = &_tscp_end - &_tscp_start;
+ (void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt);
+ break;
default:
+ /* Bail for unexpected TSC types. (TSC_NONE covers 0) */
+ cmn_err(CE_PANIC, "Unrecogized TSC type: %d", flag);
break;
}
+ tsc_type = flag;
}
int
diff --git a/usr/src/uts/i86pc/os/ibft.c b/usr/src/uts/i86pc/os/ibft.c
index d9ed882705..fab1324787 100644
--- a/usr/src/uts/i86pc/os/ibft.c
+++ b/usr/src/uts/i86pc/os/ibft.c
@@ -39,6 +39,7 @@
#include <sys/kmem.h>
#include <sys/psm.h>
#include <sys/bootconf.h>
+#include <sys/reboot.h>
typedef enum ibft_structure_type {
Reserved = 0,
@@ -206,6 +207,7 @@ static ibft_status_t iscsi_parse_ibft_NIC(iscsi_ibft_nic_t *nicp);
static ibft_status_t iscsi_parse_ibft_target(char *begin_of_ibft,
iscsi_ibft_tgt_t *tgtp);
+extern int boothowto;
/*
* Return value:
@@ -759,7 +761,9 @@ ld_ib_prop()
* 1) pass "-B ibft-noprobe=1" on kernel command line
* 2) add line "set ibft_noprobe=1" in /etc/system
*/
- cmn_err(CE_NOTE, IBFT_NOPROBE_MSG);
+ if (boothowto & RB_VERBOSE) {
+ cmn_err(CE_NOTE, IBFT_NOPROBE_MSG);
+ }
return;
}
diff --git a/usr/src/uts/i86pc/os/lgrpplat.c b/usr/src/uts/i86pc/os/lgrpplat.c
index b46bbf849d..981398970e 100644
--- a/usr/src/uts/i86pc/os/lgrpplat.c
+++ b/usr/src/uts/i86pc/os/lgrpplat.c
@@ -2799,7 +2799,11 @@ lgrp_plat_process_sli(uint32_t domain_id, uchar_t *sli_info,
/*
* Read ACPI System Resource Affinity Table (SRAT) to determine which CPUs
* and memory are local to each other in the same NUMA node and return number
- * of nodes
+ * of nodes.
+ *
+ * The SRAT table pointer is populated during bootup by
+ * build_firmware_properties() in fakebop.c. Several motherboard and BIOS
+ * manufacturers are guilty of not having a SRAT table.
*/
static int
lgrp_plat_process_srat(ACPI_TABLE_SRAT *tp, ACPI_TABLE_MSCT *mp,
@@ -2816,9 +2820,15 @@ lgrp_plat_process_srat(ACPI_TABLE_SRAT *tp, ACPI_TABLE_MSCT *mp,
/*
* Nothing to do when no SRAT or disabled
*/
- if (tp == NULL || !lgrp_plat_srat_enable)
+ if (!lgrp_plat_srat_enable)
return (-1);
+ if (tp == NULL) {
+ cmn_err(CE_WARN, "Couldn't read ACPI SRAT table from BIOS. "
+ "lgrp support will be limited to one group.\n");
+ return (-1);
+ }
+
/*
* Try to get domain information from MSCT table.
* ACPI4.0: OSPM will use information provided by the MSCT only
diff --git a/usr/src/uts/i86pc/os/mlsetup.c b/usr/src/uts/i86pc/os/mlsetup.c
index 045adbcb7b..438f83b6e9 100644
--- a/usr/src/uts/i86pc/os/mlsetup.c
+++ b/usr/src/uts/i86pc/os/mlsetup.c
@@ -23,6 +23,7 @@
*
* Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
*/
/*
* Copyright (c) 2010, Intel Corporation.
@@ -61,6 +62,7 @@
#include <sys/promif.h>
#include <sys/pci_cfgspace.h>
#include <sys/bootvfs.h>
+#include <sys/tsc.h>
#ifdef __xpv
#include <sys/hypervisor.h>
#else
@@ -227,15 +229,15 @@ mlsetup(struct regs *rp)
*/
if ((get_hwenv() & HW_XEN_HVM) == 0 &&
is_x86_feature(x86_featureset, X86FSET_TSCP))
- patch_tsc_read(X86_HAVE_TSCP);
+ patch_tsc_read(TSC_TSCP);
else if (cpuid_getvendor(CPU) == X86_VENDOR_AMD &&
cpuid_getfamily(CPU) <= 0xf &&
is_x86_feature(x86_featureset, X86FSET_SSE2))
- patch_tsc_read(X86_TSC_MFENCE);
+ patch_tsc_read(TSC_RDTSC_MFENCE);
else if (cpuid_getvendor(CPU) == X86_VENDOR_Intel &&
cpuid_getfamily(CPU) <= 6 &&
is_x86_feature(x86_featureset, X86FSET_SSE2))
- patch_tsc_read(X86_TSC_LFENCE);
+ patch_tsc_read(TSC_RDTSC_LFENCE);
#endif /* !__xpv */
@@ -246,7 +248,7 @@ mlsetup(struct regs *rp)
* return 0.
*/
if (!is_x86_feature(x86_featureset, X86FSET_TSC))
- patch_tsc_read(X86_NO_TSC);
+ patch_tsc_read(TSC_NONE);
#endif /* __i386 && !__xpv */
#if defined(__amd64) && !defined(__xpv)
diff --git a/usr/src/uts/i86pc/os/mp_startup.c b/usr/src/uts/i86pc/os/mp_startup.c
index 3c7b453949..829c631096 100644
--- a/usr/src/uts/i86pc/os/mp_startup.c
+++ b/usr/src/uts/i86pc/os/mp_startup.c
@@ -27,7 +27,7 @@
* All rights reserved.
*/
/*
- * Copyright 2015 Joyent, Inc.
+ * Copyright 2016 Joyent, Inc.
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
*/
@@ -166,6 +166,8 @@ init_cpu_info(struct cpu *cp)
void
init_cpu_syscall(struct cpu *cp)
{
+ uint64_t flags;
+
kpreempt_disable();
#if defined(__amd64)
@@ -247,6 +249,24 @@ init_cpu_syscall(struct cpu *cp)
kpreempt_enable();
}
+#if !defined(__xpv)
+/*
+ * Configure per-cpu ID GDT
+ */
+static void
+init_cpu_id_gdt(struct cpu *cp)
+{
+ /* Write cpu_id into limit field of GDT for usermode retrieval */
+#if defined(__amd64)
+ set_usegd(&cp->cpu_gdt[GDT_CPUID], SDP_SHORT, NULL, cp->cpu_id,
+ SDT_MEMRODA, SEL_UPL, SDP_BYTES, SDP_OP32);
+#elif defined(__i386)
+ set_usegd(&cp->cpu_gdt[GDT_CPUID], NULL, cp->cpu_id, SDT_MEMRODA,
+ SEL_UPL, SDP_BYTES, SDP_OP32);
+#endif
+}
+#endif /* !defined(__xpv) */
+
/*
* Multiprocessor initialization.
*
@@ -430,6 +450,10 @@ mp_cpu_configure_common(int cpun, boolean_t boot)
init_cpu_info(cp);
+#if !defined(__xpv)
+ init_cpu_id_gdt(cp);
+#endif
+
/*
* alloc space for ucode_info
*/
@@ -1486,6 +1510,10 @@ start_other_cpus(int cprboot)
*/
init_cpu_info(CPU);
+#if !defined(__xpv)
+ init_cpu_id_gdt(CPU);
+#endif
+
cmn_err(CE_CONT, "?cpu%d: %s\n", CPU->cpu_id, CPU->cpu_idstr);
cmn_err(CE_CONT, "?cpu%d: %s\n", CPU->cpu_id, CPU->cpu_brandstr);
diff --git a/usr/src/uts/i86pc/os/timestamp.c b/usr/src/uts/i86pc/os/timestamp.c
index c40159018c..7344e1a492 100644
--- a/usr/src/uts/i86pc/os/timestamp.c
+++ b/usr/src/uts/i86pc/os/timestamp.c
@@ -25,6 +25,7 @@
*
* Copyright 2012 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014, 2016 by Delphix. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
*/
#include <sys/types.h>
@@ -47,6 +48,7 @@
#include <sys/panic.h>
#include <sys/cpu.h>
#include <sys/sdt.h>
+#include <sys/comm_page.h>
/*
* Using the Pentium's TSC register for gethrtime()
@@ -99,7 +101,6 @@
#define NSEC_SHIFT 5
-static uint_t nsec_scale;
static uint_t nsec_unscale;
/*
@@ -140,18 +141,12 @@ static volatile int tsc_sync_go;
int tsc_master_slave_sync_needed = 1;
-static int tsc_max_delta;
-static hrtime_t tsc_sync_tick_delta[NCPU];
typedef struct tsc_sync {
volatile hrtime_t master_tsc, slave_tsc;
} tsc_sync_t;
static tsc_sync_t *tscp;
-static hrtime_t largest_tsc_delta = 0;
-static ulong_t shortest_write_time = ~0UL;
-static hrtime_t tsc_last = 0;
static hrtime_t tsc_last_jumped = 0;
-static hrtime_t tsc_hrtime_base = 0;
static int tsc_jumped = 0;
static uint32_t tsc_wayback = 0;
/*
@@ -159,7 +154,6 @@ static uint32_t tsc_wayback = 0;
* tsc_tick() function runs which means that when gethrtime() is called it
* should never be more than 1 second since tsc_last was updated.
*/
-static hrtime_t tsc_resume_cap;
static hrtime_t tsc_resume_cap_ns = NANOSEC; /* 1s */
static hrtime_t shadow_tsc_hrtime_base;
@@ -451,25 +445,27 @@ tsc_gethrtimeunscaled_delta(void)
}
/*
- * Called by the master in the TSC sync operation (usually the boot CPU).
- * If the slave is discovered to have a skew, gethrtimef will be changed to
- * point to tsc_gethrtime_delta(). Calculating skews is precise only when
- * the master and slave TSCs are read simultaneously; however, there is no
- * algorithm that can read both CPUs in perfect simultaneity. The proposed
- * algorithm is an approximate method based on the behaviour of cache
- * management. The slave CPU continuously reads TSC and then reads a global
- * variable which the master CPU updates. The moment the master's update reaches
- * the slave's visibility (being forced by an mfence operation) we use the TSC
- * reading taken on the slave. A corresponding TSC read will be taken on the
- * master as soon as possible after finishing the mfence operation. But the
- * delay between causing the slave to notice the invalid cache line and the
- * competion of mfence is not repeatable. This error is heuristically assumed
- * to be 1/4th of the total write time as being measured by the two TSC reads
- * on the master sandwiching the mfence. Furthermore, due to the nature of
- * bus arbitration, contention on memory bus, etc., the time taken for the write
- * to reflect globally can vary a lot. So instead of taking a single reading,
- * a set of readings are taken and the one with least write time is chosen
- * to calculate the final skew.
+ * TSC Sync Master
+ *
+ * Typically called on the boot CPU, this attempts to quantify TSC skew between
+ * different CPUs. If an appreciable difference is found, gethrtimef will be
+ * changed to point to tsc_gethrtime_delta().
+ *
+ * Calculating skews is precise only when the master and slave TSCs are read
+ * simultaneously; however, there is no algorithm that can read both CPUs in
+ * perfect simultaneity. The proposed algorithm is an approximate method based
+ * on the behaviour of cache management. The slave CPU continuously polls the
+ * TSC while reading a global variable updated by the master CPU. The latest
+ * TSC reading is saved when the master's update (forced via mfence) reaches
+ * visibility on the slave. The master will also take a TSC reading
+ * immediately following the mfence.
+ *
+ * While the delay between cache line invalidation on the slave and mfence
+ * completion on the master is not repeatable, the error is heuristically
+ * assumed to be 1/4th of the write time recorded by the master. Multiple
+ * samples are taken to control for the variance caused by external factors
+ * such as bus contention. Each sample set is independent per-CPU to control
+ * for differing memory latency on NUMA systems.
*
* TSC sync is disabled in the context of virtualization because the CPUs
* assigned to the guest are virtual CPUs which means the real CPUs on which
@@ -482,7 +478,7 @@ void
tsc_sync_master(processorid_t slave)
{
ulong_t flags, source, min_write_time = ~0UL;
- hrtime_t write_time, x, mtsc_after, tdelta;
+ hrtime_t write_time, mtsc_after, last_delta = 0;
tsc_sync_t *tsc = tscp;
int cnt;
int hwtype;
@@ -505,57 +501,53 @@ tsc_sync_master(processorid_t slave)
SMT_PAUSE();
write_time = mtsc_after - tsc->master_tsc;
if (write_time <= min_write_time) {
- min_write_time = write_time;
+ hrtime_t tdelta;
+
+ tdelta = tsc->slave_tsc - mtsc_after;
+ if (tdelta < 0)
+ tdelta = -tdelta;
/*
- * Apply heuristic adjustment only if the calculated
- * delta is > 1/4th of the write time.
+ * If the margin exists, subtract 1/4th of the measured
+ * write time from the master's TSC value. This is an
+ * estimate of how late the mfence completion came
+ * after the slave noticed the cache line change.
*/
- x = tsc->slave_tsc - mtsc_after;
- if (x < 0)
- x = -x;
- if (x > (min_write_time/4))
- /*
- * Subtract 1/4th of the measured write time
- * from the master's TSC value, as an estimate
- * of how late the mfence completion came
- * after the slave noticed the cache line
- * change.
- */
+ if (tdelta > (write_time/4)) {
tdelta = tsc->slave_tsc -
- (mtsc_after - (min_write_time/4));
- else
+ (mtsc_after - (write_time/4));
+ } else {
tdelta = tsc->slave_tsc - mtsc_after;
- tsc_sync_tick_delta[slave] =
- tsc_sync_tick_delta[source] - tdelta;
+ }
+ last_delta = tsc_sync_tick_delta[source] - tdelta;
+ tsc_sync_tick_delta[slave] = last_delta;
+ min_write_time = write_time;
}
tsc->master_tsc = tsc->slave_tsc = write_time = 0;
membar_enter();
tsc_sync_go = TSC_SYNC_STOP;
}
- if (tdelta < 0)
- tdelta = -tdelta;
- if (tdelta > largest_tsc_delta)
- largest_tsc_delta = tdelta;
- if (min_write_time < shortest_write_time)
- shortest_write_time = min_write_time;
+
/*
- * Enable delta variants of tsc functions if the largest of all chosen
- * deltas is > smallest of the write time.
+ * Only enable the delta variants of the TSC functions if the measured
+ * skew is greater than the fastest write time.
*/
- if (largest_tsc_delta > shortest_write_time) {
+ last_delta = (last_delta < 0) ? -last_delta : last_delta;
+ if (last_delta > min_write_time) {
gethrtimef = tsc_gethrtime_delta;
gethrtimeunscaledf = tsc_gethrtimeunscaled_delta;
+ tsc_ncpu = NCPU;
}
restore_int_flag(flags);
}
/*
+ * TSC Sync Slave
+ *
* Called by a CPU which has just been onlined. It is expected that the CPU
* performing the online operation will call tsc_sync_master().
*
- * TSC sync is disabled in the context of virtualization. See comments
- * above tsc_sync_master.
+ * Like tsc_sync_master, this logic is skipped on virtualized platforms.
*/
void
tsc_sync_slave(void)
@@ -579,11 +571,9 @@ tsc_sync_slave(void)
tsc_sync_go = TSC_SYNC_GO;
do {
/*
- * Do not put an SMT_PAUSE here. For instance,
- * if the master and slave are really the same
- * hyper-threaded CPU, then you want the master
- * to yield to the slave as quickly as possible here,
- * but not the other way.
+ * Do not put an SMT_PAUSE here. If the master and
+ * slave are the same hyper-threaded CPU, we want the
+ * master to yield as quickly as possible to the slave.
*/
s1 = tsc_read();
} while (tsc->master_tsc == 0);
@@ -688,6 +678,12 @@ tsc_hrtimeinit(uint64_t cpu_freq_hz)
hrtime_tick = tsc_tick;
gethrtime_hires = 1;
/*
+ * Being part of the comm page, tsc_ncpu communicates the published
+ * length of the tsc_sync_tick_delta array. This is kept zeroed to
+ * ignore the absent delta data while the TSCs are synced.
+ */
+ tsc_ncpu = 0;
+ /*
* Allocate memory for the structure used in the tsc sync logic.
* This structure should be aligned on a multiple of cache line size.
*/
@@ -708,12 +704,10 @@ get_tsc_ready()
}
/*
- * Adjust all the deltas by adding the passed value to the array.
- * Then use the "delt" versions of the the gethrtime functions.
- * Note that 'tdelta' _could_ be a negative number, which should
- * reduce the values in the array (used, for example, if the Solaris
- * instance was moved by a virtual manager to a machine with a higher
- * value of tsc).
+ * Adjust all the deltas by adding the passed value to the array and activate
+ * the "delta" versions of the gethrtime functions. It is possible that the
+ * adjustment could be negative. Such may occur if the SunOS instance was
+ * moved by a virtual manager to a machine with a higher value of TSC.
*/
void
tsc_adjust_delta(hrtime_t tdelta)
@@ -726,19 +720,16 @@ tsc_adjust_delta(hrtime_t tdelta)
gethrtimef = tsc_gethrtime_delta;
gethrtimeunscaledf = tsc_gethrtimeunscaled_delta;
+ tsc_ncpu = NCPU;
}
/*
* Functions to manage TSC and high-res time on suspend and resume.
*/
-/*
- * declarations needed for time adjustment
- */
-extern void rtcsync(void);
+/* tod_ops from "uts/i86pc/io/todpc_subr.c" */
extern tod_ops_t *tod_ops;
-/* There must be a better way than exposing nsec_scale! */
-extern uint_t nsec_scale;
+
static uint64_t tsc_saved_tsc = 0; /* 1 in 2^64 chance this'll screw up! */
static timestruc_t tsc_saved_ts;
static int tsc_needs_resume = 0; /* We only want to do this once. */
@@ -748,23 +739,20 @@ int tsc_suspend_count = 0;
int tsc_resume_in_cyclic = 0;
/*
- * Let timestamp.c know that we are suspending. It needs to take
- * snapshots of the current time, and do any pre-suspend work.
+ * Take snapshots of the current time and do any other pre-suspend work.
*/
void
tsc_suspend(void)
{
-/*
- * What we need to do here, is to get the time we suspended, so that we
- * know how much we should add to the resume.
- * This routine is called by each CPU, so we need to handle reentry.
- */
+ /*
+ * We need to collect the time at which we suspended here so we know
+ * now much should be added during the resume. This is called by each
+ * CPU, so reentry must be properly handled.
+ */
if (tsc_gethrtime_enable) {
/*
- * We put the tsc_read() inside the lock as it
- * as no locking constraints, and it puts the
- * aquired value closer to the time stamp (in
- * case we delay getting the lock).
+ * Perform the tsc_read after acquiring the lock to make it as
+ * accurate as possible in the face of contention.
*/
mutex_enter(&tod_lock);
tsc_saved_tsc = tsc_read();
@@ -786,8 +774,7 @@ tsc_suspend(void)
}
/*
- * Restore all timestamp state based on the snapshots taken at
- * suspend time.
+ * Restore all timestamp state based on the snapshots taken at suspend time.
*/
void
tsc_resume(void)
diff --git a/usr/src/uts/i86pc/os/trap.c b/usr/src/uts/i86pc/os/trap.c
index 9390690e95..c88fec6fbe 100644
--- a/usr/src/uts/i86pc/os/trap.c
+++ b/usr/src/uts/i86pc/os/trap.c
@@ -32,7 +32,7 @@
/* */
/*
- * Copyright 2012 Joyent, Inc. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
#include <sys/types.h>
@@ -101,6 +101,7 @@
#include <sys/hypervisor.h>
#endif
#include <sys/contract/process_impl.h>
+#include <sys/brand.h>
#define USER 0x10000 /* user-mode flag added to trap type */
@@ -862,6 +863,17 @@ trap(struct regs *rp, caddr_t addr, processorid_t cpuid)
fault_type = F_INVAL;
}
+ /*
+ * Allow the brand to interpose on invalid memory accesses
+ * prior to running the native pagefault handler. If this
+ * brand hook returns zero, it was able to handle the fault
+ * completely. Otherwise, drive on and call pagefault().
+ */
+ if (PROC_IS_BRANDED(p) && BROP(p)->b_pagefault != NULL &&
+ BROP(p)->b_pagefault(p, lwp, addr, fault_type, rw) == 0) {
+ goto out;
+ }
+
res = pagefault(addr, fault_type, rw, 0);
/*
@@ -1468,12 +1480,23 @@ trap(struct regs *rp, caddr_t addr, processorid_t cpuid)
ct->t_sig_check = 0;
- mutex_enter(&p->p_lock);
+ /*
+ * As in other code paths that check against TP_CHANGEBIND,
+ * we perform the check first without p_lock held -- only
+ * acquiring p_lock in the unlikely event that it is indeed
+ * set. This is safe because we are doing this after the
+ * astoff(); if we are racing another thread setting
+ * TP_CHANGEBIND on us, we will pick it up on a subsequent
+ * lap through.
+ */
if (curthread->t_proc_flag & TP_CHANGEBIND) {
- timer_lwpbind();
- curthread->t_proc_flag &= ~TP_CHANGEBIND;
+ mutex_enter(&p->p_lock);
+ if (curthread->t_proc_flag & TP_CHANGEBIND) {
+ timer_lwpbind();
+ curthread->t_proc_flag &= ~TP_CHANGEBIND;
+ }
+ mutex_exit(&p->p_lock);
}
- mutex_exit(&p->p_lock);
/*
* for kaio requests that are on the per-process poll queue,
diff --git a/usr/src/uts/i86pc/sys/acpidev.h b/usr/src/uts/i86pc/sys/acpidev.h
index 6d11277aaf..a3bd54d4e3 100644
--- a/usr/src/uts/i86pc/sys/acpidev.h
+++ b/usr/src/uts/i86pc/sys/acpidev.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2009-2010, Intel Corporation.
* All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_ACPIDEV_H
@@ -128,7 +129,7 @@ typedef enum acpidev_class_id {
#ifdef _KERNEL
/* Common ACPI object names. */
-#define ACPIDEV_OBJECT_NAME_SB ACPI_NS_SYSTEM_BUS
+#define ACPIDEV_OBJECT_NAME_SB METHOD_NAME__SB_
#define ACPIDEV_OBJECT_NAME_PR "_PR_"
/* Common ACPI method names. */
diff --git a/usr/src/uts/i86pc/sys/apic.h b/usr/src/uts/i86pc/sys/apic.h
index 8a87760456..11ae48340a 100644
--- a/usr/src/uts/i86pc/sys/apic.h
+++ b/usr/src/uts/i86pc/sys/apic.h
@@ -382,7 +382,7 @@ struct apic_io_intr {
/* special or reserve vectors */
#define APIC_CHECK_RESERVE_VECTORS(v) \
(((v) == T_FASTTRAP) || ((v) == APIC_SPUR_INTR) || \
- ((v) == T_SYSCALLINT) || ((v) == T_DTRACE_RET))
+ ((v) == T_SYSCALLINT) || ((v) == T_DTRACE_RET) || ((v) == 0x80))
/* cmos shutdown code for BIOS */
#define BIOS_SHUTDOWN 0x0a
diff --git a/usr/src/uts/i86pc/sys/comm_page.h b/usr/src/uts/i86pc/sys/comm_page.h
new file mode 100644
index 0000000000..dbf00bc7a7
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/comm_page.h
@@ -0,0 +1,102 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#ifndef _COMM_PAGE_H
+#define _COMM_PAGE_H
+
+#ifndef _ASM
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#endif /* _ASM */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define COMM_PAGE_SIZE PAGESIZE
+#define COMM_PAGE_ALIGN 0x4000
+
+#ifndef _ASM
+
+/*
+ * x86 comm page
+ *
+ * This struct defines the data format for the "comm page": kernel data made
+ * directly available to userspace for read-only operations. This enables
+ * facilities such as clock_gettime to operate entirely in userspace without
+ * the need for a trap or fasttrap.
+ *
+ * A note about 32-bit/64-bit compatibility:
+ * The current format of the comm page is designed to be consistent for both
+ * 32-bit and 64-bit programs running in a 64-bit kernel. On 32-bit kernels,
+ * the comm page is not exposed to userspace due to the difference in
+ * timespec_t sizing.
+ *
+ * This struct is instantiated "by hand" in assembly to preserve the global
+ * symbols it contains. That layout must be kept in sync with the structure
+ * defined here.
+ * See: "uts/i86pc/ml/comm_page.s"
+ */
+typedef struct comm_page_s {
+ hrtime_t cp_tsc_last;
+ hrtime_t cp_tsc_hrtime_base;
+ hrtime_t cp_tsc_resume_cap;
+ uint32_t cp_tsc_type;
+ uint32_t cp_tsc_max_delta;
+
+ volatile uint32_t cp_hres_lock; /* must be 8-byte aligned */
+ uint32_t cp_nsec_scale;
+ int64_t cp_hrestime_adj;
+ hrtime_t cp_hres_last_tick;
+ uint32_t cp_tsc_ncpu;
+ uint32_t _cp_pad;
+ volatile int64_t cp_hrestime[2];
+#if defined(_MACHDEP)
+ hrtime_t cp_tsc_sync_tick_delta[NCPU];
+#else
+ /* length resides in cp_ncpu */
+ hrtime_t cp_tsc_sync_tick_delta[];
+#endif /* defined(_MACHDEP) */
+} comm_page_t;
+
+#if defined(_KERNEL)
+extern comm_page_t comm_page;
+
+extern caddr_t comm_page_mapin();
+
+#if defined(_MACHDEP)
+extern hrtime_t tsc_last;
+extern hrtime_t tsc_hrtime_base;
+extern hrtime_t tsc_resume_cap;
+extern uint32_t tsc_type;
+extern uint32_t tsc_max_delta;
+extern volatile uint32_t hres_lock;
+extern uint32_t nsec_scale;
+extern int64_t hrestime_adj;
+extern hrtime_t hres_last_tick;
+extern uint32_t tsc_ncpu;
+extern volatile timestruc_t hrestime;
+extern hrtime_t tsc_sync_tick_delta[NCPU];
+#endif /* defined(_MACHDEP) */
+#endif /* defined(_KERNEL) */
+
+#endif /* _ASM */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _COMM_PAGE_H */
diff --git a/usr/src/uts/i86pc/sys/machparam.h b/usr/src/uts/i86pc/sys/machparam.h
index 99ae0d4d3b..fc34522307 100644
--- a/usr/src/uts/i86pc/sys/machparam.h
+++ b/usr/src/uts/i86pc/sys/machparam.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015 by Delphix. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
*/
/* Copyright (c) 1988 AT&T */
@@ -54,6 +55,10 @@ extern "C" {
*/
#if defined(__amd64)
+/*
+ * If NCPU grows beyond 256, sizing for the x86 comm page will require
+ * adjustment.
+ */
#define NCPU 256
#define NCPU_LOG2 8
#elif defined(__i386)
diff --git a/usr/src/uts/i86pc/sys/tsc.h b/usr/src/uts/i86pc/sys/tsc.h
new file mode 100644
index 0000000000..d4090381c4
--- /dev/null
+++ b/usr/src/uts/i86pc/sys/tsc.h
@@ -0,0 +1,28 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2016 Joyent, Inc.
+ */
+
+#ifndef _TSC_H
+#define _TSC_H
+
+/*
+ * flags to patch tsc_read routine.
+ */
+#define TSC_NONE 0x0
+#define TSC_RDTSC_CPUID 0x1
+#define TSC_RDTSC_MFENCE 0x2
+#define TSC_RDTSC_LFENCE 0x3
+#define TSC_TSCP 0x4
+
+#endif /* _TSC_H */
diff --git a/usr/src/uts/i86pc/vm/hat_i86.c b/usr/src/uts/i86pc/vm/hat_i86.c
index ea2a83b2bd..a8b4e6edfc 100644
--- a/usr/src/uts/i86pc/vm/hat_i86.c
+++ b/usr/src/uts/i86pc/vm/hat_i86.c
@@ -27,6 +27,7 @@
*/
/*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
* Copyright (c) 2014, 2015 by Delphix. All rights reserved.
*/
@@ -3323,7 +3324,7 @@ hat_page_getattr(struct page *pp, uint_t flag)
/*
- * common code used by hat_pageunload() and hment_steal()
+ * common code used by hat_page_inval() and hment_steal()
*/
hment_t *
hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry)
@@ -3379,15 +3380,13 @@ hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry)
extern int vpm_enable;
/*
- * Unload all translations to a page. If the page is a subpage of a large
+ * Unload translations to a page. If the page is a subpage of a large
* page, the large page mappings are also removed.
- *
- * The forceflags are unused.
+ * If curhat is not NULL, then we only unload the translation
+ * for the given process, otherwise all translations are unloaded.
*/
-
-/*ARGSUSED*/
-static int
-hati_pageunload(struct page *pp, uint_t pg_szcd, uint_t forceflag)
+void
+hat_page_inval(struct page *pp, uint_t pg_szcd, struct hat *curhat)
{
page_t *cur_pp = pp;
hment_t *hm;
@@ -3395,15 +3394,10 @@ hati_pageunload(struct page *pp, uint_t pg_szcd, uint_t forceflag)
htable_t *ht;
uint_t entry;
level_t level;
+ ulong_t cnt;
XPV_DISALLOW_MIGRATE();
- /*
- * prevent recursion due to kmem_free()
- */
- ++curthread->t_hatdepth;
- ASSERT(curthread->t_hatdepth < 16);
-
#if defined(__amd64)
/*
* clear the vpm ref.
@@ -3416,6 +3410,8 @@ hati_pageunload(struct page *pp, uint_t pg_szcd, uint_t forceflag)
* The loop with next_size handles pages with multiple pagesize mappings
*/
next_size:
+ if (curhat != NULL)
+ cnt = hat_page_getshare(cur_pp);
for (;;) {
/*
@@ -3427,14 +3423,13 @@ next_size:
if (hm == NULL) {
x86_hm_exit(cur_pp);
+curproc_done:
/*
* If not part of a larger page, we're done.
*/
if (cur_pp->p_szc <= pg_szcd) {
- ASSERT(curthread->t_hatdepth > 0);
- --curthread->t_hatdepth;
XPV_ALLOW_MIGRATE();
- return (0);
+ return;
}
/*
@@ -3453,8 +3448,20 @@ next_size:
* If this mapping size matches, remove it.
*/
level = ht->ht_level;
- if (level == pg_szcd)
- break;
+ if (level == pg_szcd) {
+ if (curhat == NULL || ht->ht_hat == curhat)
+ break;
+ /*
+ * Unloading only the given process but it's
+ * not the hat for the current process. Leave
+ * entry in place. Also do a safety check to
+ * ensure we don't get in an infinite loop
+ */
+ if (cnt-- == 0) {
+ x86_hm_exit(cur_pp);
+ goto curproc_done;
+ }
+ }
}
/*
@@ -3464,14 +3471,44 @@ next_size:
hm = hati_page_unmap(cur_pp, ht, entry);
if (hm != NULL)
hment_free(hm);
+
+ /* Perform check above for being part of a larger page. */
+ if (curhat != NULL)
+ goto curproc_done;
}
}
+/*
+ * Unload translations to a page. If unloadflag is HAT_CURPROC_PGUNLOAD, then
+ * we only unload the translation for the current process, otherwise all
+ * translations are unloaded.
+ */
+static int
+hati_pageunload(struct page *pp, uint_t pg_szcd, uint_t unloadflag)
+{
+ struct hat *curhat = NULL;
+
+ /*
+ * prevent recursion due to kmem_free()
+ */
+ ++curthread->t_hatdepth;
+ ASSERT(curthread->t_hatdepth < 16);
+
+ if (unloadflag == HAT_CURPROC_PGUNLOAD)
+ curhat = curthread->t_procp->p_as->a_hat;
+
+ hat_page_inval(pp, pg_szcd, curhat);
+
+ ASSERT(curthread->t_hatdepth > 0);
+ --curthread->t_hatdepth;
+ return (0);
+}
+
int
-hat_pageunload(struct page *pp, uint_t forceflag)
+hat_pageunload(struct page *pp, uint_t unloadflag)
{
ASSERT(PAGE_EXCL(pp));
- return (hati_pageunload(pp, 0, forceflag));
+ return (hati_pageunload(pp, 0, unloadflag));
}
/*
diff --git a/usr/src/uts/i86pc/vm/vm_machdep.c b/usr/src/uts/i86pc/vm/vm_machdep.c
index 2212202a01..1c2bd3e0ec 100644
--- a/usr/src/uts/i86pc/vm/vm_machdep.c
+++ b/usr/src/uts/i86pc/vm/vm_machdep.c
@@ -24,6 +24,7 @@
/*
* Copyright (c) 2010, Intel Corporation.
* All rights reserved.
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -623,10 +624,8 @@ void
map_addr(caddr_t *addrp, size_t len, offset_t off, int vacalign, uint_t flags)
{
struct proc *p = curproc;
- caddr_t userlimit = (flags & _MAP_LOW32) ?
- (caddr_t)_userlimit32 : p->p_as->a_userlimit;
-
- map_addr_proc(addrp, len, off, vacalign, userlimit, curproc, flags);
+ map_addr_proc(addrp, len, off, vacalign,
+ map_userlimit(p, p->p_as, flags), curproc, flags);
}
/*ARGSUSED*/
diff --git a/usr/src/uts/i86xpv/os/xpv_timestamp.c b/usr/src/uts/i86xpv/os/xpv_timestamp.c
index d7d13f984b..8b7d226305 100644
--- a/usr/src/uts/i86xpv/os/xpv_timestamp.c
+++ b/usr/src/uts/i86xpv/os/xpv_timestamp.c
@@ -22,6 +22,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
*/
#include <sys/types.h>
@@ -71,6 +72,11 @@ static volatile hrtime_t hrtime_last;
static hrtime_t hrtime_suspend_time;
static hrtime_t hrtime_addend;
+volatile uint32_t hres_lock;
+hrtime_t hres_last_tick;
+int64_t hrestime_adj;
+volatile timestruc_t hrestime;
+
/*
* These functions are used in DTrace probe context, and must be removed from
* fbt consideration. Currently fbt ignores all weak symbols, so this will
diff --git a/usr/src/uts/intel/Makefile b/usr/src/uts/intel/Makefile
index af65cc095e..a30bb132f0 100644
--- a/usr/src/uts/intel/Makefile
+++ b/usr/src/uts/intel/Makefile
@@ -65,7 +65,7 @@ install_h.prereq := TARGET= install_h
.PARALLEL: $(PARALLEL_KMODS) $(XMODS) config $(LINT_DEPS)
-def all install clean clobber modlist: $(KMODS) $(XMODS) config
+def all install clean clobber modlist: genassym $(KMODS) $(XMODS) config
clobber: clobber.targ
@@ -107,7 +107,7 @@ CLOBBERFILES += $(PRIVS_C)
# intel/dtrace depends on i86pc/genassym, so we need to build both
# i86pc/genassym and intel/genassym.
#
-all.prereq install.prereq def.prereq: genunix FRC
+all.prereq install.prereq def.prereq: genassym genunix FRC
@cd ../i86pc/genassym; pwd; $(MAKE) $(@:%.prereq=%)
#
@@ -125,7 +125,7 @@ genunix: $(PRIVS_C)
modlintlib clean.lint: $(LINT_KMODS) $(XMODS)
-$(KMODS) $(SUBDIRS) config: FRC
+genassym $(KMODS) $(SUBDIRS) config: FRC
@cd $@; pwd; $(MAKE) $(NO_STATE) $(TARGET)
$(XMODS): FRC
diff --git a/usr/src/uts/intel/Makefile.files b/usr/src/uts/intel/Makefile.files
index 9f964d9636..12b23b14df 100644
--- a/usr/src/uts/intel/Makefile.files
+++ b/usr/src/uts/intel/Makefile.files
@@ -21,7 +21,7 @@
#
# Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
-# Copyright (c) 2013, Joyent, Inc. All rights reserved.
+# Copyright 2016, Joyent, Inc.
# Copyright 2016 Nexenta Systems, Inc.
#
@@ -100,6 +100,29 @@ GENUNIX_OBJS += \
#
CORE_OBJS += \
prmachdep.o
+
+LX_CGROUP_OBJS += \
+ cgrps_node.o \
+ cgrps_vfsops.o \
+ cgrps_vnops.o
+
+LX_DEVFS_OBJS += \
+ lxd_node.o \
+ lxd_vfsops.o \
+ lxd_vnops.o
+
+LX_PROC_OBJS += \
+ lx_prsubr.o \
+ lx_prvfsops.o \
+ lx_prvnops.o
+
+LX_SYS_OBJS += \
+ lx_syssubr.o \
+ lx_sysvfsops.o \
+ lx_sysvnops.o
+
+LX_AUTOFS_OBJS += \
+ lx_autofs.o
#
# ZFS file system module
@@ -186,8 +209,9 @@ ACPICA_OBJS += dbcmds.o dbdisply.o \
dbexec.o dbfileio.o dbhistry.o dbinput.o dbstats.o \
dbutils.o dbxface.o evevent.o evgpe.o evgpeblk.o \
evmisc.o evregion.o evrgnini.o evsci.o evxface.o \
- evxfevnt.o evxfregn.o hwacpi.o hwgpe.o hwregs.o \
- hwsleep.o hwtimer.o dsfield.o dsinit.o dsmethod.o \
+ evxfevnt.o evxfregn.o hwacpi.o hwesleep.o hwgpe.o \
+ hwregs.o hwsleep.o hwtimer.o hwxfsleep.o dsfield.o \
+ dsinit.o dsmethod.o \
dsmthdat.o dsobject.o dsopcode.o dsutils.o dswexec.o \
dswload.o dswscope.o dswstate.o exconfig.o exconvrt.o \
excreate.o exdump.o exfield.o exfldio.o exmisc.o \
@@ -199,16 +223,18 @@ ACPICA_OBJS += dbcmds.o dbdisply.o \
nsalloc.o nsdump.o nsdumpdv.o nseval.o nsinit.o \
nsload.o nsnames.o nsobject.o nsparse.o nssearch.o \
nsutils.o nswalk.o nsxfeval.o nsxfname.o nsxfobj.o \
- rsaddr.o rscalc.o rscreate.o rsdump.o \
+ rsaddr.o rscalc.o rscreate.o rsdump.o rsserial.o \
rsinfo.o rsio.o rsirq.o rslist.o rsmemory.o rsmisc.o \
rsutils.o rsxface.o tbfadt.o tbfind.o tbinstal.o \
- tbutils.o tbxface.o tbxfroot.o \
+ tbutils.o tbxface.o tbxfroot.o utaddress.o \
utalloc.o utclib.o utcopy.o utdebug.o utdelete.o \
uteval.o utglobal.o utinit.o utmath.o utmisc.o \
- utobject.o utresrc.o utxface.o acpica.o acpi_enum.o \
+ utobject.o utresrc.o utxface.o utxfmutex.o acpica.o \
+ acpi_enum.o \
master_ops.o osl.o osl_ml.o acpica_ec.o utcache.o \
utmutex.o utstate.o dmbuffer.o dmnames.o dmobject.o \
- dmopcode.o dmresrc.o dmresrcl.o dmresrcs.o dmutils.o \
+ dmopcode.o dmresrc.o dmresrcl.o dmresrcl2.o dmresrcs.o \
+ dmutils.o \
dmwalk.o psloop.o nspredef.o hwxface.o hwvalid.o \
utlock.o utids.o nsrepair.o nsrepair2.o \
dbmethod.o dbnames.o dsargs.o dscontrol.o dswload2.o \
@@ -264,6 +290,57 @@ IOMMULIB_OBJS = iommulib.o
#
SN1_BRAND_OBJS = sn1_brand.o sn1_brand_asm.o
S10_BRAND_OBJS = s10_brand.o s10_brand_asm.o
+LX_BRAND_OBJS = \
+ lx_access.o \
+ lx_aio.o \
+ lx_archdep.o \
+ lx_auxv.o \
+ lx_brand.o \
+ lx_brk.o \
+ lx_chmod.o \
+ lx_chown.o \
+ lx_clone.o \
+ lx_close.o \
+ lx_cpu.o \
+ lx_errno.o \
+ lx_epoll.o \
+ lx_fallocate.o \
+ lx_fcntl.o \
+ lx_futex.o \
+ lx_getcwd.o \
+ lx_getdents.o \
+ lx_getpid.o \
+ lx_getrandom.o \
+ lx_id.o \
+ lx_ioctl.o \
+ lx_ioprio.o \
+ lx_kill.o \
+ lx_link.o \
+ lx_misc.o \
+ lx_mkdir.o \
+ lx_modify_ldt.o \
+ lx_open.o \
+ lx_personality.o \
+ lx_pid.o \
+ lx_pipe.o \
+ lx_poll.o \
+ lx_prctl.o \
+ lx_ptrace.o \
+ lx_rlimit.o \
+ lx_rw.o \
+ lx_sched.o \
+ lx_signal.o \
+ lx_signum.o \
+ lx_socket.o \
+ lx_stat.o \
+ lx_sync.o \
+ lx_syscall.o \
+ lx_sysinfo.o \
+ lx_thread_area.o \
+ lx_timer.o \
+ lx_uname.o \
+ lx_wait.o \
+ lx_xattr.o
#
# special files
diff --git a/usr/src/uts/intel/Makefile.intel b/usr/src/uts/intel/Makefile.intel
index 440b0228de..7f93a02262 100644
--- a/usr/src/uts/intel/Makefile.intel
+++ b/usr/src/uts/intel/Makefile.intel
@@ -21,7 +21,7 @@
#
# Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2013 Andrew Stormont. All rights reserved.
-# Copyright (c) 2015, Joyent, Inc. All rights reserved.
+# Copyright 2016, Joyent, Inc.
# Copyright 2016 Garrett D'Amore <garrett@damore.org>
# Copyright 2016 Nexenta Systems, Inc.
#
@@ -46,6 +46,7 @@ PLATFORM = i86pc
#
UNIX_DIR = $(UTSBASE)/i86pc/unix
GENLIB_DIR = $(UTSBASE)/intel/genunix
+GENASSYM_DIR = $(UTSBASE)/intel/genassym
IPDRV_DIR = $(UTSBASE)/intel/ip
MODSTUBS_DIR = $(UNIX_DIR)
DSF_DIR = $(UTSBASE)/$(PLATFORM)/genassym
@@ -139,6 +140,7 @@ ASFLAGS_XARCH_64 = $(amd64_ASFLAGS)
ASFLAGS_XARCH = $(ASFLAGS_XARCH_$(CLASS))
ASFLAGS += $(ASFLAGS_XARCH)
+AS_INC_PATH += -I$(GENASSYM_DIR)/$(OBJS_DIR)
#
# Define the base directory for installation.
@@ -240,6 +242,7 @@ DRV_KMODS += devinfo
DRV_KMODS += dld
DRV_KMODS += dlpistub
DRV_KMODS_32 += dnet
+DRV_KMODS += dr_sas
DRV_KMODS += dump
DRV_KMODS += ecpp
DRV_KMODS += emlxs
@@ -253,6 +256,7 @@ DRV_KMODS += i8042
DRV_KMODS += i915
DRV_KMODS += icmp
DRV_KMODS += icmp6
+DRV_KMODS += inotify
DRV_KMODS += intel_nb5000
DRV_KMODS += intel_nhm
DRV_KMODS += ip
@@ -287,6 +291,7 @@ DRV_KMODS += mpt_sas
DRV_KMODS += mr_sas
DRV_KMODS += mwl
DRV_KMODS += nca
+DRV_KMODS += nfp
DRV_KMODS += nsmb
DRV_KMODS += nulldriver
DRV_KMODS += nv_sata
@@ -356,6 +361,8 @@ DRV_KMODS += uath
DRV_KMODS += urtw
DRV_KMODS += vgatext
DRV_KMODS += heci
+DRV_KMODS += vmxnet
+DRV_KMODS += vnd
DRV_KMODS += vnic
DRV_KMODS += vscan
DRV_KMODS += wc
@@ -364,6 +371,7 @@ DRV_KMODS += wpi
DRV_KMODS += xge
DRV_KMODS += yge
DRV_KMODS += zcons
+DRV_KMODS += zfd
DRV_KMODS += zyd
DRV_KMODS += simnet
DRV_KMODS += stmf
@@ -406,6 +414,7 @@ DRV_KMODS += igb
DRV_KMODS += ipmi
DRV_KMODS += iprb
DRV_KMODS += ixgbe
+DRV_KMODS += i40e
DRV_KMODS += vr
#
@@ -492,6 +501,15 @@ DRV_KMODS += usbftdi
DRV_KMODS += usbecm
#
+# USBGEM modules
+#
+DRV_KMODS += usbgem
+DRV_KMODS += axf
+DRV_KMODS += udmf
+DRV_KMODS += upf
+DRV_KMODS += urf
+
+#
# 1394 modules
#
MISC_KMODS += s1394 sbp2
@@ -515,7 +533,8 @@ MISC_KMODS += md_sp
#
# Brand modules
#
-BRAND_KMODS += sn1_brand s10_brand
+BRAND_KMODS += sn1_brand s10_brand lx_brand
+DRV_KMODS += lx_systrace lx_ptm lx_netlink
#
# Exec Class Modules (/kernel/exec):
@@ -530,10 +549,10 @@ SCHED_KMODS += IA RT TS RT_DPTBL TS_DPTBL FSS FX FX_DPTBL SDC
#
# File System Modules (/kernel/fs):
#
-FS_KMODS += autofs ctfs dcfs dev devfs fdfs fifofs hsfs lofs
-FS_KMODS += mntfs namefs nfs objfs zfs zut
-FS_KMODS += pcfs procfs sockfs specfs tmpfs udfs ufs sharefs
-FS_KMODS += smbfs bootfs
+FS_KMODS += autofs ctfs dcfs dev devfs fdfs fifofs hsfs hyprlofs
+FS_KMODS += lofs lxautofs lx_proc lxprocfs mntfs namefs nfs objfs zfs zut
+FS_KMODS += pcfs procfs sockfs specfs tmpfs udfs ufs sharefs lx_sysfs
+FS_KMODS += smbfs bootfs lx_cgroup lx_devfs
#
# Streams Modules (/kernel/strmod):
@@ -593,6 +612,7 @@ MISC_KMODS += drm
MISC_KMODS += fssnap_if
MISC_KMODS += gda
MISC_KMODS += gld
+MISC_KMODS += gsqueue
MISC_KMODS += hidparser
MISC_KMODS += hook
MISC_KMODS += hpcsvc
@@ -709,6 +729,12 @@ MAC_KMODS += mac_wifi
MAC_KMODS += mac_ib
#
+# Overlay related modules (/kernel/overlay)
+#
+DRV_KMODS += overlay
+OVERLAY_KMODS += vxlan
+
+#
# socketmod (kernel/socketmod)
#
SOCKET_KMODS += sockpfp
@@ -716,6 +742,7 @@ SOCKET_KMODS += socksctp
SOCKET_KMODS += socksdp
SOCKET_KMODS += sockrds
SOCKET_KMODS += ksslf
+SOCKET_KMODS += datafilt
#
# kiconv modules (/kernel/kiconv):
diff --git a/usr/src/uts/intel/Makefile.rules b/usr/src/uts/intel/Makefile.rules
index 2b21ba76c7..2c00dd3c1d 100644
--- a/usr/src/uts/intel/Makefile.rules
+++ b/usr/src/uts/intel/Makefile.rules
@@ -178,6 +178,10 @@ $(OBJS_DIR)/%.o: $(UTSBASE)/intel/io/intel_nb5000/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/intel/io/vmxnet/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/intel/io/intel_nhm/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -466,6 +470,9 @@ $(LINTS_DIR)/%.ln: $(UTSBASE)/intel/io/scsi/targets/%.c
$(LINTS_DIR)/%.ln: $(UTSBASE)/intel/io/vgatext/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/intel/io/vmxnet/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/intel/io/heci/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/intel/axf/Makefile b/usr/src/uts/intel/axf/Makefile
new file mode 100644
index 0000000000..e4665c0186
--- /dev/null
+++ b/usr/src/uts/intel/axf/Makefile
@@ -0,0 +1,73 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = axf
+OBJECTS = $(AXF_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(AXF_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+CPPFLAGS += -I$(UTSBASE)/common/io/usbgem
+CPPFLAGS += -DVERSION=\"2.0.2\"
+CPPFLAGS += -DUSBGEM_CONFIG_GLDv3
+LDFLAGS += -dy -N misc/mac -N drv/ip -N misc/usba -N misc/usbgem
+
+CERRWARN += -_gcc=-Wno-unused-function
+CERRWARN += -_gcc=-Wno-unused-variable
+CERRWARN += -_gcc=-Wno-unused-label
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/brand/common/brand_asm.h b/usr/src/uts/intel/brand/common/brand_asm.h
index 1d540db2a9..c820d8e187 100644
--- a/usr/src/uts/intel/brand/common/brand_asm.h
+++ b/usr/src/uts/intel/brand/common/brand_asm.h
@@ -161,7 +161,7 @@ extern "C" {
#define GET_P_BRAND_DATA(sp, pcnt, reg) \
GET_PROCP(sp, pcnt, reg); \
- mov P_BRAND_DATA(reg), reg /* get p_brand_data */
+ mov __P_BRAND_DATA(reg), reg /* get p_brand_data */
/*
* Each of the following macros returns to the standard syscall codepath if
diff --git a/usr/src/uts/intel/brand/lx/lx_archdep.c b/usr/src/uts/intel/brand/lx/lx_archdep.c
new file mode 100644
index 0000000000..0117ece67f
--- /dev/null
+++ b/usr/src/uts/intel/brand/lx/lx_archdep.c
@@ -0,0 +1,1714 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * LX brand Intel-specific routines.
+ */
+
+#include <sys/types.h>
+#include <sys/sunddi.h>
+#include <sys/ddi.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_misc.h>
+#include <sys/privregs.h>
+#include <sys/pcb.h>
+#include <sys/archsystm.h>
+#include <sys/stack.h>
+#include <sys/sdt.h>
+#include <sys/sysmacros.h>
+#include <sys/psw.h>
+#include <lx_errno.h>
+
+/*
+ * Argument constants for fix_segreg.
+ * See usr/src/uts/intel/ia32/os/archdep.c for the originals.
+ */
+#define IS_CS 1
+#define IS_NOT_CS 0
+
+extern greg_t fix_segreg(greg_t, int, model_t);
+
+
+#define LX_REG(ucp, r) ((ucp)->uc_mcontext.gregs[(r)])
+
+#define PSLMERGE(oldval, newval) \
+ (((oldval) & ~PSL_USERMASK) | ((newval) & PSL_USERMASK))
+
+#ifdef __amd64
+/* 64-bit native user_regs_struct */
+typedef struct lx_user_regs64 {
+ int64_t lxur_r15;
+ int64_t lxur_r14;
+ int64_t lxur_r13;
+ int64_t lxur_r12;
+ int64_t lxur_rbp;
+ int64_t lxur_rbx;
+ int64_t lxur_r11;
+ int64_t lxur_r10;
+ int64_t lxur_r9;
+ int64_t lxur_r8;
+ int64_t lxur_rax;
+ int64_t lxur_rcx;
+ int64_t lxur_rdx;
+ int64_t lxur_rsi;
+ int64_t lxur_rdi;
+ int64_t lxur_orig_rax;
+ int64_t lxur_rip;
+ int64_t lxur_xcs;
+ int64_t lxur_rflags;
+ int64_t lxur_rsp;
+ int64_t lxur_xss;
+ int64_t lxur_xfs_base;
+ int64_t lxur_xgs_base;
+ int64_t lxur_xds;
+ int64_t lxur_xes;
+ int64_t lxur_xfs;
+ int64_t lxur_xgs;
+} lx_user_regs64_t;
+
+/* 64-bit native user_fpregs_struct */
+typedef struct lx_user_fpregs64 {
+ uint16_t lxufp_cwd;
+ uint16_t lxufp_swd;
+ uint16_t lxufp_ftw;
+ uint16_t lxufp_fop;
+ uint64_t lxufp_rip;
+ uint64_t lxufp_rdp;
+ uint32_t lxufp_mxcsr;
+ uint32_t lxufp_mxcr_mask;
+ /* 8*16 bytes for each FP-reg = 128 bytes */
+ uint32_t lxufp_st_space[32];
+ /* 16*16 bytes for each XMM-reg = 256 bytes */
+ uint32_t lxufp_xmm_space[64];
+ uint32_t lxufp_padding[24];
+} lx_user_fpregs64_t;
+
+/* 64-bit native user_struct */
+typedef struct lx_user64 {
+ lx_user_regs64_t lxu_regs;
+ int32_t lxu_fpvalid;
+ int32_t lxu_pad0;
+ lx_user_fpregs64_t lxu_i387;
+ uint64_t lxu_tsize;
+ uint64_t lxu_dsize;
+ uint64_t lxu_ssize;
+ uint64_t lxu_start_code;
+ uint64_t lxu_start_stack;
+ int64_t lxu_signal;
+ int32_t lxu_reserved;
+ int32_t lxu_pad1;
+ /* help gdb to locate user_regs structure */
+ caddr_t lxu_ar0;
+ /* help gdb to locate user_fpregs structure */
+ caddr_t lxu_fpstate;
+ uint64_t lxu_magic;
+ char lxu_comm[32];
+ uint64_t lxu_debugreg[8];
+ uint64_t lxu_error_code;
+ uint64_t lxu_fault_address;
+} lx_user64_t;
+
+#endif /* __amd64 */
+
+/* 32-bit native user_regs_struct */
+typedef struct lx_user_regs32 {
+ int32_t lxur_ebx;
+ int32_t lxur_ecx;
+ int32_t lxur_edx;
+ int32_t lxur_esi;
+ int32_t lxur_edi;
+ int32_t lxur_ebp;
+ int32_t lxur_eax;
+ int32_t lxur_xds;
+ int32_t lxur_xes;
+ int32_t lxur_xfs;
+ int32_t lxur_xgs;
+ int32_t lxur_orig_eax;
+ int32_t lxur_eip;
+ int32_t lxur_xcs;
+ int32_t lxur_eflags;
+ int32_t lxur_esp;
+ int32_t lxur_xss;
+} lx_user_regs32_t;
+
+/* 32-bit native user_fpregs_struct */
+typedef struct lx_user_fpregs32 {
+ int32_t lxufp_cwd;
+ int32_t lxufp_swd;
+ int32_t lxufp_twd;
+ int32_t lxufp_fip;
+ int32_t lxufp_fcs;
+ int32_t lxufp_foo;
+ int32_t lxufp_fos;
+ int32_t lxufp_st_space[20];
+} lx_user_fpregs32_t;
+
+/* 32-bit native user_fpxregs_struct */
+typedef struct lx_user_fpxregs32 {
+ uint16_t lxufpx_cwd;
+ uint16_t lxufpx_swd;
+ uint16_t lxufpx_twd;
+ uint16_t lxufpx_fop;
+ int32_t lxufpx_fip;
+ int32_t lxufpx_fcs;
+ int32_t lxufpx_foo;
+ int32_t lxufpx_fos;
+ int32_t lxufpx_mxcsr;
+ int32_t lxufpx_reserved;
+ /* 8*16 bytes for each FP-reg = 128 bytes */
+ int32_t lxufpx_st_space[32];
+ /* 8*16 bytes for each XMM-reg = 128 bytes */
+ int32_t lxufpx_xmm_space[32];
+ int32_t lxufpx_padding[56];
+} lx_user_fpxregs32_t;
+
+/* 32-bit native user_struct */
+typedef struct lx_user32 {
+ lx_user_regs32_t lxu_regs;
+ int32_t lxu_fpvalid;
+ lx_user_fpregs32_t lxu_i387;
+ uint32_t lxu_tsize;
+ uint32_t lxu_dsize;
+ uint32_t lxu_ssize;
+ uint32_t lxu_start_code;
+ uint32_t lxu_start_stack;
+ int32_t lxu_signal;
+ int32_t lxu_reserved;
+ caddr32_t lxu_ar0;
+ caddr32_t lxu_fpstate;
+ uint32_t lxu_magic;
+ char lxu_comm[32];
+ int32_t lxu_debugreg[8];
+} lx_user32_t;
+
+/*
+ * Certain version of strace (on centos6 for example) use the %cs value to
+ * determine what kind of process is being traced. Here is a sample comment:
+ * Check CS register value. On x86-64 linux it is:
+ * 0x33 for long mode (64 bit and x32))
+ * 0x23 for compatibility mode (32 bit)
+ * %ds = 0x2b for x32 mode (x86-64 in 32 bit)
+ * We can't change the %cs value in the ucp (see setgregs and _sys_rtt) so we
+ * emulate the expected value for ptrace use.
+ */
+#define LX_CS_64BIT 0x33
+#define LX_CS_32BIT 0x23
+
+extern int getsetcontext(int, void *);
+#if defined(_SYSCALL32_IMPL)
+extern int getsetcontext32(int, void *);
+#endif
+
+static int
+lx_rw_uc(proc_t *p, void *ucp, void *kucp, size_t ucsz, boolean_t writing)
+{
+ int error = 0;
+ size_t rem = ucsz;
+ off_t pos = 0;
+
+ VERIFY(MUTEX_HELD(&p->p_lock));
+
+ /*
+ * Grab P_PR_LOCK so that we can drop p_lock while doing I/O.
+ */
+ sprlock_proc(p);
+
+ /*
+ * Drop p_lock while we do I/O to avoid deadlock with the clock thread.
+ */
+ mutex_exit(&p->p_lock);
+ while (rem != 0) {
+ uintptr_t addr = (uintptr_t)ucp + pos;
+ size_t len = MIN(rem, PAGESIZE - (addr & PAGEOFFSET));
+
+ if (writing) {
+ error = uwrite(p, kucp + pos, len, addr);
+ } else {
+ error = uread(p, kucp + pos, len, addr);
+ }
+
+ if (error != 0) {
+ break;
+ }
+
+ rem -= len;
+ pos += len;
+ }
+ mutex_enter(&p->p_lock);
+
+ sprunlock(p);
+ mutex_enter(&p->p_lock);
+
+ return (error);
+}
+
+/*
+ * Read a ucontext_t from the target process, which may or may not be
+ * the current process.
+ */
+static int
+lx_read_uc(proc_t *p, void *ucp, void *kucp, size_t ucsz)
+{
+ return (lx_rw_uc(p, ucp, kucp, ucsz, B_FALSE));
+}
+
+/*
+ * Write a ucontext_t to the target process, which may or may not be
+ * the current process.
+ */
+static int
+lx_write_uc(proc_t *p, void *ucp, void *kucp, size_t ucsz)
+{
+ return (lx_rw_uc(p, ucp, kucp, ucsz, B_TRUE));
+}
+
+static void
+lx_getfpregs32(lx_lwp_data_t *lwpd, lx_user_fpregs32_t *lfp)
+{
+#ifdef __amd64
+ fpregset32_t fp;
+ getfpregs32(lwpd->br_lwp, &fp);
+#else /* __i386 */
+ fpregset_t fp;
+ getfpregs(lwpd->br_lwp, &fp);
+#endif /* __amd64 */
+
+ /*
+ * The fpchip_state.state field should correspond to all 27 fields in
+ * the 32-bit structure.
+ */
+ bcopy(&fp.fp_reg_set.fpchip_state.state, lfp, sizeof (*lfp));
+}
+
+static void
+lx_setfpregs32(lx_lwp_data_t *lwpd, lx_user_fpregs32_t *lfp)
+{
+#ifdef __amd64
+ fpregset32_t fp;
+#else /* __i386 */
+ fpregset_t fp;
+#endif /* __amd64 */
+
+ /*
+ * The fpchip_state field should correspond to all 27 fields in the
+ * native 32-bit structure.
+ */
+ bcopy(lfp, &fp.fp_reg_set.fpchip_state.state, sizeof (*lfp));
+
+#ifdef __amd64
+ setfpregs32(lwpd->br_lwp, &fp);
+#else /* __i386 */
+ setfpregs(lwpd->br_lwp, &fp);
+#endif /* __amd64 */
+}
+
+static int
+lx_get_user_regs32_uc(klwp_t *lwp, void *ucp, lx_user_regs32_t *lxrp)
+{
+ proc_t *p = lwptoproc(lwp);
+ ucontext32_t uc;
+
+ if (lx_read_uc(p, ucp, &uc, sizeof (uc)) != 0) {
+ return (-1);
+ }
+
+ lxrp->lxur_ebx = LX_REG(&uc, EBX);
+ lxrp->lxur_ecx = LX_REG(&uc, ECX);
+ lxrp->lxur_edx = LX_REG(&uc, EDX);
+ lxrp->lxur_esi = LX_REG(&uc, ESI);
+ lxrp->lxur_edi = LX_REG(&uc, EDI);
+ lxrp->lxur_ebp = LX_REG(&uc, EBP);
+ lxrp->lxur_eax = LX_REG(&uc, EAX);
+ lxrp->lxur_orig_eax = 0;
+
+ lxrp->lxur_eip = LX_REG(&uc, EIP);
+ lxrp->lxur_eflags = LX_REG(&uc, EFL);
+ lxrp->lxur_esp = LX_REG(&uc, UESP);
+ lxrp->lxur_xss = LX_REG(&uc, SS);
+
+ /* emulated %cs, see defines */
+ lxrp->lxur_xcs = LX_CS_32BIT;
+ lxrp->lxur_xds = LX_REG(&uc, DS);
+ lxrp->lxur_xes = LX_REG(&uc, ES);
+ lxrp->lxur_xfs = LX_REG(&uc, FS);
+ lxrp->lxur_xgs = LX_REG(&uc, GS);
+ return (0);
+}
+
+static int
+lx_get_user_regs32(lx_lwp_data_t *lwpd, lx_user_regs32_t *lxrp)
+{
+ klwp_t *lwp = lwpd->br_lwp;
+ struct regs *rp = lwptoregs(lwp);
+ void *ucp;
+#ifdef __amd64
+ struct pcb *pcb = &lwp->lwp_pcb;
+#endif
+
+ VERIFY(lwp_getdatamodel(lwp) == DATAMODEL_ILP32);
+
+ switch (lx_regs_location(lwpd, &ucp, B_FALSE)) {
+ case LX_REG_LOC_UNAVAIL:
+ return (-1);
+
+ case LX_REG_LOC_UCP:
+ return (lx_get_user_regs32_uc(lwp, ucp, lxrp));
+
+ case LX_REG_LOC_LWP:
+ /* transformation below */
+ break;
+
+ default:
+ VERIFY(0);
+ break;
+ }
+
+#ifdef __amd64
+ lxrp->lxur_ebx = (int32_t)rp->r_rbx;
+ lxrp->lxur_ecx = (int32_t)rp->r_rcx;
+ lxrp->lxur_edx = (int32_t)rp->r_rdx;
+ lxrp->lxur_esi = (int32_t)rp->r_rsi;
+ lxrp->lxur_edi = (int32_t)rp->r_rdi;
+ lxrp->lxur_ebp = (int32_t)rp->r_rbp;
+ lxrp->lxur_eax = (int32_t)rp->r_rax;
+ lxrp->lxur_orig_eax = 0;
+ lxrp->lxur_eip = (int32_t)rp->r_rip;
+ lxrp->lxur_eflags = (int32_t)rp->r_rfl;
+ lxrp->lxur_esp = (int32_t)rp->r_rsp;
+ lxrp->lxur_xss = (int32_t)rp->r_ss;
+
+ kpreempt_disable();
+ if (pcb->pcb_rupdate == 1) {
+ lxrp->lxur_xds = pcb->pcb_ds;
+ lxrp->lxur_xes = pcb->pcb_es;
+ lxrp->lxur_xfs = pcb->pcb_fs;
+ lxrp->lxur_xgs = pcb->pcb_gs;
+ } else {
+ lxrp->lxur_xds = rp->r_ds;
+ lxrp->lxur_xes = rp->r_es;
+ lxrp->lxur_xfs = rp->r_fs;
+ lxrp->lxur_xgs = rp->r_gs;
+ }
+ kpreempt_enable();
+#else /* __i386 */
+ lxrp->lxur_ebx = rp->r_ebx;
+ lxrp->lxur_ecx = rp->r_ecx;
+ lxrp->lxur_edx = rp->r_edx;
+ lxrp->lxur_esi = rp->r_esi;
+ lxrp->lxur_edi = rp->r_edi;
+ lxrp->lxur_ebp = rp->r_ebp;
+ lxrp->lxur_eax = rp->r_eax;
+ lxrp->lxur_orig_eax = 0;
+ lxrp->lxur_eip = rp->r_eip;
+ lxrp->lxur_eflags = rp->r_efl;
+ lxrp->lxur_esp = rp->r_esp;
+ lxrp->lxur_xss = rp->r_ss;
+
+ lxrp->lxur_xds = rp->r_ds;
+ lxrp->lxur_xes = rp->r_es;
+ lxrp->lxur_xfs = rp->r_fs;
+ lxrp->lxur_xgs = rp->r_gs;
+#endif /* __amd64 */
+
+ /* emulated %cs, see defines */
+ lxrp->lxur_xcs = LX_CS_32BIT;
+
+ if (lwpd->br_ptrace_whatstop == LX_PR_SYSENTRY) {
+ lxrp->lxur_eax = (int32_t)-lx_errno(ENOTSUP, EINVAL);
+ lxrp->lxur_orig_eax = (int32_t)lwpd->br_syscall_num;
+ } else if (lwpd->br_ptrace_whatstop == LX_PR_SYSEXIT) {
+ lxrp->lxur_orig_eax = (int32_t)lwpd->br_syscall_num;
+ }
+
+ return (0);
+}
+
+static int
+lx_set_user_regs32_uc(klwp_t *lwp, void *ucp, lx_user_regs32_t *lxrp)
+{
+ proc_t *p = lwptoproc(lwp);
+ ucontext32_t uc;
+
+ if (lx_read_uc(p, ucp, &uc, sizeof (uc)) != 0) {
+ return (-1);
+ }
+
+ /*
+ * Note: we currently ignore "lxur_orig_rax" here since this
+ * path should not be used for system call stops.
+ */
+ LX_REG(&uc, EBP) = lxrp->lxur_ebp;
+ LX_REG(&uc, EBX) = lxrp->lxur_ebx;
+ LX_REG(&uc, EAX) = lxrp->lxur_eax;
+ LX_REG(&uc, ECX) = lxrp->lxur_ecx;
+ LX_REG(&uc, EDX) = lxrp->lxur_edx;
+ LX_REG(&uc, ESI) = lxrp->lxur_esi;
+ LX_REG(&uc, EDI) = lxrp->lxur_edi;
+ LX_REG(&uc, EIP) = lxrp->lxur_eip;
+ LX_REG(&uc, EFL) = PSLMERGE(LX_REG(&uc, EFL), lxrp->lxur_eflags);
+ LX_REG(&uc, UESP) = lxrp->lxur_esp;
+ LX_REG(&uc, SS) = fix_segreg(lxrp->lxur_xss, IS_NOT_CS,
+ DATAMODEL_ILP32);
+
+ /* %cs is ignored because of our lies */
+ LX_REG(&uc, DS) = fix_segreg(lxrp->lxur_xds, IS_NOT_CS,
+ DATAMODEL_ILP32);
+ LX_REG(&uc, ES) = fix_segreg(lxrp->lxur_xes, IS_NOT_CS,
+ DATAMODEL_ILP32);
+ LX_REG(&uc, FS) = fix_segreg(lxrp->lxur_xfs, IS_NOT_CS,
+ DATAMODEL_ILP32);
+ LX_REG(&uc, GS) = fix_segreg(lxrp->lxur_xgs, IS_NOT_CS,
+ DATAMODEL_ILP32);
+
+ if (lx_write_uc(p, ucp, &uc, sizeof (uc)) != 0) {
+ return (-1);
+ }
+ return (0);
+}
+
+static int
+lx_set_user_regs32(lx_lwp_data_t *lwpd, lx_user_regs32_t *lxrp)
+{
+ klwp_t *lwp = lwpd->br_lwp;
+ struct regs *rp = lwptoregs(lwp);
+ void *ucp;
+#ifdef __amd64
+ struct pcb *pcb = &lwp->lwp_pcb;
+#endif
+
+ VERIFY(lwp_getdatamodel(lwp) == DATAMODEL_ILP32);
+
+ switch (lx_regs_location(lwpd, &ucp, B_TRUE)) {
+ case LX_REG_LOC_UNAVAIL:
+ return (-1);
+
+ case LX_REG_LOC_UCP:
+ return (lx_set_user_regs32_uc(lwp, ucp, lxrp));
+
+ case LX_REG_LOC_LWP:
+ /* transformation below */
+ break;
+
+ default:
+ VERIFY(0);
+ break;
+ }
+
+#ifdef __amd64
+ rp->r_rbx = (int32_t)lxrp->lxur_ebx;
+ rp->r_rcx = (int32_t)lxrp->lxur_ecx;
+ rp->r_rdx = (int32_t)lxrp->lxur_edx;
+ rp->r_rsi = (int32_t)lxrp->lxur_esi;
+ rp->r_rdi = (int32_t)lxrp->lxur_edi;
+ rp->r_rbp = (int32_t)lxrp->lxur_ebp;
+ rp->r_rax = (int32_t)lxrp->lxur_eax;
+ lwpd->br_syscall_num = (int)lxrp->lxur_orig_eax;
+ rp->r_rip = (int32_t)lxrp->lxur_eip;
+ rp->r_rfl = (int32_t)PSLMERGE(rp->r_rfl, lxrp->lxur_eflags);
+ rp->r_rsp = (int32_t)lxrp->lxur_esp;
+ rp->r_ss = (int32_t)fix_segreg(lxrp->lxur_xss, IS_NOT_CS,
+ DATAMODEL_ILP32);
+
+ kpreempt_disable();
+ pcb->pcb_rupdate = 1;
+ pcb->pcb_ds = fix_segreg(lxrp->lxur_xds, IS_NOT_CS, DATAMODEL_ILP32);
+ pcb->pcb_es = fix_segreg(lxrp->lxur_xes, IS_NOT_CS, DATAMODEL_ILP32);
+ pcb->pcb_fs = fix_segreg(lxrp->lxur_xfs, IS_NOT_CS, DATAMODEL_ILP32);
+ pcb->pcb_gs = fix_segreg(lxrp->lxur_xgs, IS_NOT_CS, DATAMODEL_ILP32);
+ kpreempt_enable();
+#else /* __i386 */
+ rp->r_ebx = lxrp->lxur_ebx;
+ rp->r_ecx = lxrp->lxur_ecx;
+ rp->r_edx = lxrp->lxur_edx;
+ rp->r_esi = lxrp->lxur_esi;
+ rp->r_edi = lxrp->lxur_edi;
+ rp->r_ebp = lxrp->lxur_ebp;
+ rp->r_eax = lxrp->lxur_eax;
+ lwpd->br_syscall_num = (int)lxrp->lxur_orig_eax;
+ rp->r_eip = lxrp->lxur_eip;
+ rp->r_efl = PSLMERGE(rp->r_efl, lxrp->lxur_eflags);
+ rp->r_esp = lxrp->lxur_esp;
+ rp->r_ss = fix_segreg(lxrp->lxur_xss, IS_NOT_CS, DATAMODEL_ILP32);
+
+ rp->r_ds = fix_segreg(lxrp->lxur_xds, IS_NOT_CS, DATAMODEL_ILP32);
+ rp->r_es = fix_segreg(lxrp->lxur_xes, IS_NOT_CS, DATAMODEL_ILP32);
+ rp->r_fs = fix_segreg(lxrp->lxur_xfs, IS_NOT_CS, DATAMODEL_ILP32);
+ rp->r_gs = fix_segreg(lxrp->lxur_xgs, IS_NOT_CS, DATAMODEL_ILP32);
+#endif /* __amd64 */
+
+ return (0);
+}
+
+#ifdef __amd64
+
+static void
+lx_getfpregs64(lx_lwp_data_t *lwpd, lx_user_fpregs64_t *lfp)
+{
+ fpregset_t fp;
+
+ getfpregs(lwpd->br_lwp, &fp);
+ /* Drop the extra illumos status/xstatus fields when copying state */
+ bcopy(&fp.fp_reg_set.fpchip_state, lfp, sizeof (*lfp));
+}
+
+static void
+lx_setfpregs64(lx_lwp_data_t *lwpd, lx_user_fpregs64_t *lfp)
+{
+ fpregset_t fp;
+
+ /*
+ * Since the Linux fpregs structure does not contain the same
+ * additional status register which illumos contains, we simply
+ * preserve the existing values when setting fp state.
+ */
+ getfpregs(lwpd->br_lwp, &fp);
+
+ /* Copy the identically formatted state */
+ bcopy(lfp, &fp.fp_reg_set.fpchip_state, sizeof (*lfp));
+
+ setfpregs(lwpd->br_lwp, &fp);
+}
+
+static int
+lx_get_user_regs64_uc(klwp_t *lwp, void *ucp, lx_user_regs64_t *lxrp)
+{
+ proc_t *p = lwptoproc(lwp);
+
+ switch (lwp_getdatamodel(lwp)) {
+ case DATAMODEL_LP64: {
+ ucontext_t uc;
+
+ if (lx_read_uc(p, ucp, &uc, sizeof (uc)) != 0) {
+ return (-1);
+ }
+
+ lxrp->lxur_r15 = LX_REG(&uc, REG_R15);
+ lxrp->lxur_r14 = LX_REG(&uc, REG_R14);
+ lxrp->lxur_r13 = LX_REG(&uc, REG_R13);
+ lxrp->lxur_r12 = LX_REG(&uc, REG_R12);
+ lxrp->lxur_rbp = LX_REG(&uc, REG_RBP);
+ lxrp->lxur_rbx = LX_REG(&uc, REG_RBX);
+ lxrp->lxur_r11 = LX_REG(&uc, REG_R11);
+ lxrp->lxur_r10 = LX_REG(&uc, REG_R10);
+ lxrp->lxur_r9 = LX_REG(&uc, REG_R9);
+ lxrp->lxur_r8 = LX_REG(&uc, REG_R8);
+ lxrp->lxur_rax = LX_REG(&uc, REG_RAX);
+ lxrp->lxur_rcx = LX_REG(&uc, REG_RCX);
+ lxrp->lxur_rdx = LX_REG(&uc, REG_RDX);
+ lxrp->lxur_rsi = LX_REG(&uc, REG_RSI);
+ lxrp->lxur_rdi = LX_REG(&uc, REG_RDI);
+ lxrp->lxur_orig_rax = 0;
+ lxrp->lxur_rip = LX_REG(&uc, REG_RIP);
+ lxrp->lxur_rflags = LX_REG(&uc, REG_RFL);
+ lxrp->lxur_rsp = LX_REG(&uc, REG_RSP);
+ lxrp->lxur_xss = LX_REG(&uc, REG_SS);
+ lxrp->lxur_xfs_base = LX_REG(&uc, REG_FSBASE);
+ lxrp->lxur_xgs_base = LX_REG(&uc, REG_GSBASE);
+
+ lxrp->lxur_xds = LX_REG(&uc, REG_DS);
+ lxrp->lxur_xes = LX_REG(&uc, REG_ES);
+ lxrp->lxur_xfs = LX_REG(&uc, REG_FS);
+ lxrp->lxur_xgs = LX_REG(&uc, REG_GS);
+
+ /* emulated %cs, see defines */
+ lxrp->lxur_xcs = LX_CS_64BIT;
+ return (0);
+ }
+
+ case DATAMODEL_ILP32: {
+ ucontext32_t uc;
+
+ if (lx_read_uc(p, ucp, &uc, sizeof (uc)) != 0) {
+ return (-1);
+ }
+
+ lxrp->lxur_r15 = 0;
+ lxrp->lxur_r14 = 0;
+ lxrp->lxur_r13 = 0;
+ lxrp->lxur_r12 = 0;
+ lxrp->lxur_r11 = 0;
+ lxrp->lxur_r10 = 0;
+ lxrp->lxur_r9 = 0;
+ lxrp->lxur_r8 = 0;
+ lxrp->lxur_rbp = LX_REG(&uc, EBP);
+ lxrp->lxur_rbx = LX_REG(&uc, EBX);
+ lxrp->lxur_rax = LX_REG(&uc, EAX);
+ lxrp->lxur_orig_rax = 0;
+ lxrp->lxur_rcx = LX_REG(&uc, ECX);
+ lxrp->lxur_rdx = LX_REG(&uc, EDX);
+ lxrp->lxur_rsi = LX_REG(&uc, ESI);
+ lxrp->lxur_rdi = LX_REG(&uc, EDI);
+ lxrp->lxur_rip = LX_REG(&uc, EIP);
+
+ lxrp->lxur_rflags = LX_REG(&uc, EFL);
+ lxrp->lxur_rsp = LX_REG(&uc, UESP);
+ lxrp->lxur_xss = LX_REG(&uc, SS);
+ lxrp->lxur_xfs_base = 0;
+ lxrp->lxur_xgs_base = 0;
+
+ lxrp->lxur_xds = LX_REG(&uc, DS);
+ lxrp->lxur_xes = LX_REG(&uc, ES);
+ lxrp->lxur_xfs = LX_REG(&uc, FS);
+ lxrp->lxur_xgs = LX_REG(&uc, GS);
+
+ /* See comment above re: %cs register */
+ lxrp->lxur_xcs = LX_CS_32BIT;
+ return (0);
+ }
+
+ default:
+ break;
+ }
+
+ return (-1);
+}
+
+static int
+lx_get_user_regs64(lx_lwp_data_t *lwpd, lx_user_regs64_t *lxrp)
+{
+ klwp_t *lwp = lwpd->br_lwp;
+ struct regs *rp = lwptoregs(lwp);
+ struct pcb *pcb = &lwp->lwp_pcb;
+ void *ucp;
+
+ switch (lx_regs_location(lwpd, &ucp, B_FALSE)) {
+ case LX_REG_LOC_UNAVAIL:
+ return (-1);
+
+ case LX_REG_LOC_UCP:
+ return (lx_get_user_regs64_uc(lwp, ucp, lxrp));
+
+ case LX_REG_LOC_LWP:
+ /* transformation below */
+ break;
+
+ default:
+ VERIFY(0);
+ break;
+ }
+
+ lxrp->lxur_r15 = rp->r_r15;
+ lxrp->lxur_r14 = rp->r_r14;
+ lxrp->lxur_r13 = rp->r_r13;
+ lxrp->lxur_r12 = rp->r_r12;
+ lxrp->lxur_rbp = rp->r_rbp;
+ lxrp->lxur_rbx = rp->r_rbx;
+ lxrp->lxur_r11 = rp->r_r11;
+ lxrp->lxur_r10 = rp->r_r10;
+ lxrp->lxur_r9 = rp->r_r9;
+ lxrp->lxur_r8 = rp->r_r8;
+ lxrp->lxur_rax = rp->r_rax;
+ lxrp->lxur_rcx = rp->r_rcx;
+ lxrp->lxur_rdx = rp->r_rdx;
+ lxrp->lxur_rsi = rp->r_rsi;
+ lxrp->lxur_rdi = rp->r_rdi;
+ lxrp->lxur_orig_rax = 0;
+ lxrp->lxur_rip = rp->r_rip;
+
+ lxrp->lxur_rflags = rp->r_rfl;
+ lxrp->lxur_rsp = rp->r_rsp;
+ lxrp->lxur_xss = rp->r_ss;
+ lxrp->lxur_xfs_base = pcb->pcb_fsbase;
+ lxrp->lxur_xgs_base = pcb->pcb_gsbase;
+
+ /* emulated %cs, see defines */
+ switch (lwp_getdatamodel(lwp)) {
+ case DATAMODEL_LP64:
+ lxrp->lxur_xcs = LX_CS_64BIT;
+ break;
+ case DATAMODEL_ILP32:
+ lxrp->lxur_xcs = LX_CS_32BIT;
+ break;
+ default:
+ VERIFY(0);
+ break;
+ }
+
+ kpreempt_disable();
+ if (pcb->pcb_rupdate == 1) {
+ lxrp->lxur_xds = pcb->pcb_ds;
+ lxrp->lxur_xes = pcb->pcb_es;
+ lxrp->lxur_xfs = pcb->pcb_fs;
+ lxrp->lxur_xgs = pcb->pcb_gs;
+ } else {
+ lxrp->lxur_xds = rp->r_ds;
+ lxrp->lxur_xes = rp->r_es;
+ lxrp->lxur_xfs = rp->r_fs;
+ lxrp->lxur_xgs = rp->r_gs;
+ }
+ kpreempt_enable();
+
+ if (lwpd->br_ptrace_whatstop == LX_PR_SYSENTRY) {
+ lxrp->lxur_rax = -lx_errno(ENOTSUP, EINVAL);
+ lxrp->lxur_orig_rax = lwpd->br_syscall_num;
+ } else if (lwpd->br_ptrace_whatstop == LX_PR_SYSEXIT) {
+ lxrp->lxur_orig_rax = lwpd->br_syscall_num;
+ }
+
+ return (0);
+}
+
+static int
+lx_set_user_regs64_uc(klwp_t *lwp, void *ucp, lx_user_regs64_t *lxrp)
+{
+ proc_t *p = lwptoproc(lwp);
+
+ switch (lwp_getdatamodel(lwp)) {
+ case DATAMODEL_LP64: {
+ ucontext_t uc;
+
+ if (lx_read_uc(p, ucp, &uc, sizeof (uc)) != 0) {
+ return (-1);
+ }
+
+ /*
+ * Note: we currently ignore "lxur_orig_rax" here since this
+ * path should not be used for system call stops.
+ */
+ LX_REG(&uc, REG_R15) = lxrp->lxur_r15;
+ LX_REG(&uc, REG_R14) = lxrp->lxur_r14;
+ LX_REG(&uc, REG_R13) = lxrp->lxur_r13;
+ LX_REG(&uc, REG_R12) = lxrp->lxur_r12;
+ LX_REG(&uc, REG_RBP) = lxrp->lxur_rbp;
+ LX_REG(&uc, REG_RBX) = lxrp->lxur_rbx;
+ LX_REG(&uc, REG_R11) = lxrp->lxur_r11;
+ LX_REG(&uc, REG_R10) = lxrp->lxur_r10;
+ LX_REG(&uc, REG_R9) = lxrp->lxur_r9;
+ LX_REG(&uc, REG_R8) = lxrp->lxur_r8;
+ LX_REG(&uc, REG_RAX) = lxrp->lxur_rax;
+ LX_REG(&uc, REG_RCX) = lxrp->lxur_rcx;
+ LX_REG(&uc, REG_RDX) = lxrp->lxur_rdx;
+ LX_REG(&uc, REG_RSI) = lxrp->lxur_rsi;
+ LX_REG(&uc, REG_RDI) = lxrp->lxur_rdi;
+ LX_REG(&uc, REG_RIP) = lxrp->lxur_rip;
+ LX_REG(&uc, REG_RFL) = PSLMERGE(LX_REG(&uc, REG_RFL),
+ lxrp->lxur_rflags);
+ LX_REG(&uc, REG_RSP) = lxrp->lxur_rsp;
+ LX_REG(&uc, REG_SS) = fix_segreg(lxrp->lxur_xss, IS_NOT_CS,
+ DATAMODEL_LP64);
+ LX_REG(&uc, REG_FSBASE) = lxrp->lxur_xfs_base;
+ LX_REG(&uc, REG_GSBASE) = lxrp->lxur_xgs_base;
+
+ /* %cs is ignored because of our lies */
+ LX_REG(&uc, REG_DS) = fix_segreg(lxrp->lxur_xds, IS_NOT_CS,
+ DATAMODEL_LP64);
+ LX_REG(&uc, REG_ES) = fix_segreg(lxrp->lxur_xes, IS_NOT_CS,
+ DATAMODEL_LP64);
+ LX_REG(&uc, REG_FS) = fix_segreg(lxrp->lxur_xfs, IS_NOT_CS,
+ DATAMODEL_LP64);
+ LX_REG(&uc, REG_GS) = fix_segreg(lxrp->lxur_xgs, IS_NOT_CS,
+ DATAMODEL_LP64);
+
+ if (lx_write_uc(p, ucp, &uc, sizeof (uc)) != 0) {
+ return (-1);
+ }
+
+ return (0);
+ }
+
+ case DATAMODEL_ILP32: {
+ ucontext32_t uc;
+
+ if (lx_read_uc(p, ucp, &uc, sizeof (uc)) != 0) {
+ return (-1);
+ }
+
+ /*
+ * Note: we currently ignore "lxur_orig_rax" here since this
+ * path should not be used for system call stops.
+ */
+ LX_REG(&uc, EBP) = (int32_t)lxrp->lxur_rbp;
+ LX_REG(&uc, EBX) = (int32_t)lxrp->lxur_rbx;
+ LX_REG(&uc, EAX) = (int32_t)lxrp->lxur_rax;
+ LX_REG(&uc, ECX) = (int32_t)lxrp->lxur_rcx;
+ LX_REG(&uc, EDX) = (int32_t)lxrp->lxur_rdx;
+ LX_REG(&uc, ESI) = (int32_t)lxrp->lxur_rsi;
+ LX_REG(&uc, EDI) = (int32_t)lxrp->lxur_rdi;
+ LX_REG(&uc, EIP) = (int32_t)lxrp->lxur_rip;
+ LX_REG(&uc, EFL) = (int32_t)PSLMERGE(LX_REG(&uc, EFL),
+ lxrp->lxur_rflags);
+ LX_REG(&uc, UESP) = (int32_t)lxrp->lxur_rsp;
+ LX_REG(&uc, SS) = (int32_t)fix_segreg(lxrp->lxur_xss,
+ IS_NOT_CS, DATAMODEL_ILP32);
+
+ /* %cs is ignored because of our lies */
+ LX_REG(&uc, DS) = (int32_t)fix_segreg(lxrp->lxur_xds,
+ IS_NOT_CS, DATAMODEL_ILP32);
+ LX_REG(&uc, ES) = (int32_t)fix_segreg(lxrp->lxur_xes,
+ IS_NOT_CS, DATAMODEL_ILP32);
+ LX_REG(&uc, FS) = (int32_t)fix_segreg(lxrp->lxur_xfs,
+ IS_NOT_CS, DATAMODEL_ILP32);
+ LX_REG(&uc, GS) = (int32_t)fix_segreg(lxrp->lxur_xgs,
+ IS_NOT_CS, DATAMODEL_ILP32);
+
+ if (lx_write_uc(p, ucp, &uc, sizeof (uc)) != 0) {
+ return (-1);
+ }
+ return (0);
+ }
+
+ default:
+ break;
+ }
+
+ return (-1);
+}
+
+static int
+lx_set_user_regs64(lx_lwp_data_t *lwpd, lx_user_regs64_t *lxrp)
+{
+ klwp_t *lwp = lwpd->br_lwp;
+ struct regs *rp = lwptoregs(lwp);
+ struct pcb *pcb = &lwp->lwp_pcb;
+ void *ucp;
+
+ VERIFY(lwp_getdatamodel(lwp) == DATAMODEL_LP64);
+
+ switch (lx_regs_location(lwpd, &ucp, B_TRUE)) {
+ case LX_REG_LOC_UNAVAIL:
+ return (-1);
+
+ case LX_REG_LOC_UCP:
+ return (lx_set_user_regs64_uc(lwp, ucp, lxrp));
+
+ case LX_REG_LOC_LWP:
+ /* transformation below */
+ break;
+
+ default:
+ VERIFY(0);
+ break;
+ }
+
+ rp->r_r15 = lxrp->lxur_r15;
+ rp->r_r14 = lxrp->lxur_r14;
+ rp->r_r13 = lxrp->lxur_r13;
+ rp->r_r12 = lxrp->lxur_r12;
+ rp->r_rbp = lxrp->lxur_rbp;
+ rp->r_rbx = lxrp->lxur_rbx;
+ rp->r_r11 = lxrp->lxur_r11;
+ rp->r_r10 = lxrp->lxur_r10;
+ rp->r_r9 = lxrp->lxur_r9;
+ rp->r_r8 = lxrp->lxur_r8;
+ rp->r_rax = lxrp->lxur_rax;
+ rp->r_rcx = lxrp->lxur_rcx;
+ rp->r_rdx = lxrp->lxur_rdx;
+ rp->r_rsi = lxrp->lxur_rsi;
+ rp->r_rdi = lxrp->lxur_rdi;
+ lwpd->br_syscall_num = (int)lxrp->lxur_orig_rax;
+ rp->r_rip = lxrp->lxur_rip;
+ rp->r_rfl = PSLMERGE(rp->r_rfl, lxrp->lxur_rflags);
+ rp->r_rsp = lxrp->lxur_rsp;
+ rp->r_ss = fix_segreg(lxrp->lxur_xss, IS_NOT_CS, DATAMODEL_LP64);
+ pcb->pcb_fsbase = lxrp->lxur_xfs_base;
+ pcb->pcb_gsbase = lxrp->lxur_xgs_base;
+
+ kpreempt_disable();
+ pcb->pcb_rupdate = 1;
+ pcb->pcb_ds = fix_segreg(lxrp->lxur_xds, IS_NOT_CS, DATAMODEL_LP64);
+ pcb->pcb_es = fix_segreg(lxrp->lxur_xes, IS_NOT_CS, DATAMODEL_LP64);
+ pcb->pcb_fs = fix_segreg(lxrp->lxur_xfs, IS_NOT_CS, DATAMODEL_LP64);
+ pcb->pcb_gs = fix_segreg(lxrp->lxur_xgs, IS_NOT_CS, DATAMODEL_LP64);
+ kpreempt_enable();
+
+ return (0);
+}
+
+#endif /* __amd64 */
+
+static int
+lx_peekuser32(lx_lwp_data_t *lwpd, uintptr_t offset, uint32_t *res)
+{
+ lx_user32_t lxu;
+ boolean_t valid = B_FALSE;
+
+ bzero(&lxu, sizeof (lxu));
+ if (offset < sizeof (lx_user_regs32_t)) {
+ if (lx_get_user_regs32(lwpd, &lxu.lxu_regs) == 0) {
+ valid = B_TRUE;
+ }
+ }
+ if (valid) {
+ uint32_t *data = (uint32_t *)&lxu;
+ *res = data[offset / sizeof (uint32_t)];
+ return (0);
+ }
+ return (-1);
+}
+
+#ifdef __amd64
+static int
+lx_peekuser64(lx_lwp_data_t *lwpd, uintptr_t offset, uintptr_t *res)
+{
+ lx_user64_t lxu;
+ boolean_t valid = B_FALSE;
+
+ bzero(&lxu, sizeof (lxu));
+ if (offset < sizeof (lx_user_regs64_t)) {
+ lx_user_regs64_t regs;
+ if (lx_get_user_regs64(lwpd, &regs) == 0) {
+ valid = B_TRUE;
+ }
+ }
+ if (valid) {
+ uintptr_t *data = (uintptr_t *)&lxu;
+ *res = data[offset / sizeof (uintptr_t)];
+ return (0);
+ }
+ return (-1);
+}
+#endif /* __amd64 */
+
+int
+lx_user_regs_copyin(lx_lwp_data_t *lwpd, void *uregsp)
+{
+ model_t target_model = lwp_getdatamodel(lwpd->br_lwp);
+
+ switch (get_udatamodel()) {
+ case DATAMODEL_ILP32:
+ if (target_model == DATAMODEL_ILP32) {
+ lx_user_regs32_t regs;
+
+ if (copyin(uregsp, &regs, sizeof (regs)) != 0) {
+ return (EFAULT);
+ }
+ if (lx_set_user_regs32(lwpd, &regs) != 0) {
+ return (EIO);
+ }
+ return (0);
+ }
+
+#ifdef __amd64
+ case DATAMODEL_LP64:
+ if (target_model == DATAMODEL_ILP32 ||
+ target_model == DATAMODEL_LP64) {
+ lx_user_regs64_t regs;
+
+ if (copyin(uregsp, &regs, sizeof (regs)) != 0) {
+ return (EFAULT);
+ }
+ if (lx_set_user_regs64(lwpd, &regs) != 0) {
+ return (EIO);
+ }
+ return (0);
+ }
+ break;
+#endif /* __amd64 */
+
+ default:
+ break;
+ }
+ return (EIO);
+}
+
+int
+lx_user_regs_copyout(lx_lwp_data_t *lwpd, void *uregsp)
+{
+ model_t target_model = lwp_getdatamodel(lwpd->br_lwp);
+
+ switch (get_udatamodel()) {
+ case DATAMODEL_ILP32:
+ if (target_model == DATAMODEL_ILP32) {
+ lx_user_regs32_t regs;
+
+ if (lx_get_user_regs32(lwpd, &regs) != 0) {
+ return (EIO);
+ }
+ if (copyout(&regs, uregsp, sizeof (regs)) != 0) {
+ return (EFAULT);
+ }
+ return (0);
+ }
+
+#ifdef __amd64
+ case DATAMODEL_LP64:
+ if (target_model == DATAMODEL_ILP32 ||
+ target_model == DATAMODEL_LP64) {
+ lx_user_regs64_t regs;
+
+ if (lx_get_user_regs64(lwpd, &regs) != 0) {
+ return (EIO);
+ }
+ if (copyout(&regs, uregsp, sizeof (regs)) != 0) {
+ return (EFAULT);
+ }
+ return (0);
+ }
+ break;
+#endif /* __amd64 */
+
+ default:
+ break;
+ }
+ return (EIO);
+}
+
+int
+lx_user_fpregs_copyin(lx_lwp_data_t *lwpd, void *uregsp)
+{
+ model_t target_model = lwp_getdatamodel(lwpd->br_lwp);
+
+ switch (get_udatamodel()) {
+ case DATAMODEL_ILP32:
+ if (target_model == DATAMODEL_ILP32) {
+ lx_user_fpregs32_t regs;
+
+ if (copyin(uregsp, &regs, sizeof (regs)) != 0) {
+ return (EFAULT);
+ }
+ lx_setfpregs32(lwpd, &regs);
+ return (0);
+ }
+
+#ifdef __amd64
+ case DATAMODEL_LP64:
+ if (target_model == DATAMODEL_ILP32 ||
+ target_model == DATAMODEL_LP64) {
+ lx_user_fpregs64_t regs;
+
+ if (copyin(uregsp, &regs, sizeof (regs)) != 0) {
+ return (EFAULT);
+ }
+ lx_setfpregs64(lwpd, &regs);
+ return (0);
+ }
+ break;
+#endif /* __amd64 */
+
+ default:
+ break;
+ }
+ return (EIO);
+}
+
+int
+lx_user_fpregs_copyout(lx_lwp_data_t *lwpd, void *uregsp)
+{
+ model_t target_model = lwp_getdatamodel(lwpd->br_lwp);
+
+ switch (get_udatamodel()) {
+ case DATAMODEL_ILP32:
+ if (target_model == DATAMODEL_ILP32) {
+ lx_user_fpregs32_t regs;
+
+ lx_getfpregs32(lwpd, &regs);
+ if (copyout(&regs, uregsp, sizeof (regs)) != 0) {
+ return (EFAULT);
+ }
+ return (0);
+ }
+
+#ifdef __amd64
+ case DATAMODEL_LP64:
+ if (target_model == DATAMODEL_ILP32 ||
+ target_model == DATAMODEL_LP64) {
+ lx_user_fpregs64_t regs;
+
+ lx_getfpregs64(lwpd, &regs);
+ if (copyout(&regs, uregsp, sizeof (regs)) != 0) {
+ return (EFAULT);
+ }
+ return (0);
+ }
+ break;
+#endif /* __amd64 */
+
+ default:
+ break;
+ }
+ return (EIO);
+}
+
+int
+lx_user_fpxregs_copyin(lx_lwp_data_t *lwpd, void *uregsp)
+{
+ /* Punt on fpxregs for now */
+ return (EIO);
+}
+
+int
+lx_user_fpxregs_copyout(lx_lwp_data_t *lwpd, void *uregsp)
+{
+ /* Punt on fpxregs for now */
+ return (EIO);
+}
+
+int
+lx_ptrace_peekuser(lx_lwp_data_t *lwpd, uintptr_t offset, void *uptr)
+{
+ model_t target_model = lwp_getdatamodel(lwpd->br_lwp);
+
+ switch (get_udatamodel()) {
+ case DATAMODEL_ILP32:
+ if ((offset & (sizeof (uint32_t) - 1)) != 0) {
+ /* Must be aligned to 32bit boundary */
+ break;
+ }
+ if (target_model == DATAMODEL_ILP32) {
+ uint32_t res;
+
+ if (lx_peekuser32(lwpd, offset, &res) != 0) {
+ return (EIO);
+ }
+ if (copyout(&res, uptr, sizeof (res)) != 0) {
+ return (EFAULT);
+ }
+ return (0);
+ }
+
+#ifdef __amd64
+ case DATAMODEL_LP64:
+ if ((offset & (sizeof (uintptr_t) - 1)) != 0) {
+ /* Must be aligned to 64bit boundary */
+ break;
+ }
+ if (target_model == DATAMODEL_ILP32 ||
+ target_model == DATAMODEL_LP64) {
+ uintptr_t res;
+
+ if (lx_peekuser64(lwpd, offset, &res) != 0) {
+ return (EIO);
+ }
+ if (copyout(&res, uptr, sizeof (res)) != 0) {
+ return (EFAULT);
+ }
+ return (0);
+ }
+ break;
+#endif /* __amd64 */
+
+ default:
+ break;
+ }
+ return (EIO);
+}
+
+int
+lx_ptrace_pokeuser(lx_lwp_data_t *lwpd, uintptr_t offset, void *uptr)
+{
+ return (EIO);
+}
+
+
+/*
+ * Load registers and repoint the stack and program counter. This function is
+ * used by the B_JUMP_TO_LINUX brand system call to revector to a Linux
+ * entrypoint.
+ */
+int
+lx_runexe(klwp_t *lwp, void *ucp)
+{
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+
+ /*
+ * We should only make it here when transitioning to Linux from
+ * the NATIVE or INIT mode.
+ */
+ VERIFY(lwpd->br_stack_mode == LX_STACK_MODE_NATIVE ||
+ lwpd->br_stack_mode == LX_STACK_MODE_INIT);
+
+#if defined(__amd64)
+ if (lwp_getdatamodel(lwp) == DATAMODEL_NATIVE) {
+ struct pcb *pcb = &lwp->lwp_pcb;
+
+ /*
+ * Preserve the %fs/%gsbase value for this LWP, as set and used
+ * by native illumos code.
+ */
+ lwpd->br_ntv_fsbase = pcb->pcb_fsbase;
+ lwpd->br_ntv_gsbase = pcb->pcb_gsbase;
+
+ return (getsetcontext(SETCONTEXT, ucp));
+ } else {
+ return (getsetcontext32(SETCONTEXT, ucp));
+ }
+#else
+ return (getsetcontext(SETCONTEXT, ucp));
+#endif
+}
+
+/*
+ * The usermode emulation code is illumos library code. This routine ensures
+ * the segment registers are set up correctly for native illumos code. It
+ * should be called _after_ we have stored the outgoing Linux machine state
+ * but _before_ we return from the kernel to any illumos native code; e.g. the
+ * usermode emulation library, or any interposed signal handlers.
+ *
+ * See the comment on lwp_segregs_save() for how we handle the usermode
+ * registers when we come into the kernel and see update_sregs() for how we
+ * restore.
+ */
+void
+lx_switch_to_native(klwp_t *lwp)
+{
+#if defined(__amd64)
+ model_t datamodel = lwp_getdatamodel(lwp);
+
+ switch (datamodel) {
+ case DATAMODEL_ILP32: {
+ struct pcb *pcb = &lwp->lwp_pcb;
+
+ /*
+ * For 32-bit processes, we ensure that the correct %gs value
+ * is loaded:
+ */
+ kpreempt_disable();
+ if (pcb->pcb_rupdate == 1) {
+ /*
+ * If we are already flushing the segment registers,
+ * then ensure we are flushing the native %gs.
+ */
+ pcb->pcb_gs = LWPGS_SEL;
+ } else {
+ struct regs *rp = lwptoregs(lwp);
+
+ /*
+ * If we are not flushing the segment registers yet,
+ * only do so if %gs is not correct already:
+ */
+ if (rp->r_gs != LWPGS_SEL) {
+ pcb->pcb_gs = LWPGS_SEL;
+
+ /*
+ * Ensure we go out via update_sregs.
+ */
+ pcb->pcb_rupdate = 1;
+ }
+ }
+ kpreempt_enable();
+ break;
+ }
+
+ case DATAMODEL_LP64: {
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+
+ /*
+ * For 64-bit processes we ensure that the correct %fsbase
+ * value is loaded:
+ */
+ if (lwpd->br_ntv_fsbase != 0) {
+ struct pcb *pcb = &lwp->lwp_pcb;
+
+ kpreempt_disable();
+ if (pcb->pcb_fsbase != lwpd->br_ntv_fsbase) {
+ pcb->pcb_fsbase = lwpd->br_ntv_fsbase;
+
+ /*
+ * Ensure we go out via update_sregs.
+ */
+ pcb->pcb_rupdate = 1;
+ }
+ kpreempt_enable();
+ }
+ /*
+ * ... and the correct %gsbase
+ */
+ if (lwpd->br_ntv_gsbase != 0) {
+ struct pcb *pcb = &lwp->lwp_pcb;
+
+ kpreempt_disable();
+ if (pcb->pcb_gsbase != lwpd->br_ntv_gsbase) {
+ pcb->pcb_gsbase = lwpd->br_ntv_gsbase;
+
+ /*
+ * Ensure we go out via update_sregs.
+ */
+ pcb->pcb_rupdate = 1;
+ }
+ kpreempt_enable();
+ }
+ break;
+ }
+
+ default:
+ cmn_err(CE_PANIC, "unknown data model: %d", datamodel);
+ }
+#elif defined(__i386)
+ struct regs *rp = lwptoregs(lwp);
+
+ rp->r_gs = LWPGS_SEL;
+#else
+#error "unknown x86"
+#endif
+}
+
+#if defined(__amd64)
+/*
+ * Call frame for the 64-bit usermode emulation handler:
+ * lx_emulate(ucontext_t *ucp, int syscall_num, uintptr_t *args)
+ *
+ * old sp: --------------------------------------------------------------
+ * | - ucontext_t (register state for emulation)
+ * | - uintptr_t[6] (system call arguments array)
+ * V --------------------------------------------------------------
+ * new sp: - bogus return address
+ *
+ * Arguments are passed in registers, per the AMD64 ABI: %rdi, %rsi and %rdx.
+ */
+void
+lx_emulate_user(klwp_t *lwp, int syscall_num, uintptr_t *args)
+{
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ struct regs *rp = lwptoregs(lwp);
+ label_t lab;
+ uintptr_t uc_addr;
+ uintptr_t args_addr;
+ uintptr_t top;
+ /*
+ * Variables used after on_fault() returns for a fault
+ * must be volatile.
+ */
+ volatile size_t frsz;
+ volatile uintptr_t sp;
+ volatile proc_t *p = lwptoproc(lwp);
+ volatile int watched;
+
+ /*
+ * We should not be able to get here unless we are running Linux
+ * code for a system call we cannot emulate in the kernel.
+ */
+ VERIFY(lwpd->br_stack_mode == LX_STACK_MODE_BRAND);
+
+ /*
+ * The AMD64 ABI requires us to align the return address on the stack
+ * so that when the called function pushes %rbp, the stack is 16-byte
+ * aligned.
+ *
+ * This routine, like the amd64 version of sendsig(), depends on
+ * STACK_ALIGN being 16 and STACK_ENTRY_ALIGN being 8.
+ */
+#if STACK_ALIGN != 16 || STACK_ENTRY_ALIGN != 8
+#error "lx_emulate_user() amd64 did not find the expected stack alignments"
+#endif
+
+ /*
+ * We begin at the current native stack pointer, and reserve space for
+ * the ucontext_t we are copying onto the stack, as well as the call
+ * arguments for the usermode emulation handler.
+ *
+ * We 16-byte align the entire frame, and then unalign it again by
+ * adding space for the return address.
+ */
+ frsz = SA(sizeof (ucontext_t)) + SA(6 * sizeof (uintptr_t)) +
+ sizeof (uintptr_t);
+ VERIFY((frsz & (STACK_ALIGN - 1UL)) == 8);
+ VERIFY((frsz & (STACK_ENTRY_ALIGN - 1UL)) == 0);
+
+ if (lwpd->br_ntv_stack == lwpd->br_ntv_stack_current) {
+ /*
+ * Nobody else is using the stack right now, so start at the
+ * top.
+ */
+ top = lwpd->br_ntv_stack_current;
+ } else {
+ /*
+ * Drop below the 128-byte reserved region of the stack frame
+ * we are interrupting.
+ */
+ top = lwpd->br_ntv_stack_current - STACK_RESERVE;
+ }
+ top = top & ~(STACK_ALIGN - 1);
+ sp = top - frsz;
+
+ uc_addr = top - SA(sizeof (ucontext_t));
+ args_addr = uc_addr - SA(6 * sizeof (uintptr_t));
+
+ watched = watch_disable_addr((caddr_t)sp, frsz, S_WRITE);
+
+ /*
+ * Save the register state we preserved on the way into this brand
+ * system call and drop it on the native stack.
+ */
+ {
+ /*
+ * Note: the amd64 ucontext_t is 864 bytes.
+ */
+ ucontext_t uc;
+
+ /*
+ * We do not want to save the signal mask for an emulation
+ * context. Some emulated system calls alter the signal mask;
+ * restoring it when the emulation is complete would clobber
+ * those intentional side effects.
+ */
+ savecontext(&uc, NULL);
+
+ if (on_fault(&lab)) {
+ goto badstack;
+ }
+
+ /*
+ * Mark this as a system call emulation context:
+ */
+ uc.uc_brand_data[0] = (void *)((uintptr_t)
+ uc.uc_brand_data[0] | LX_UC_FRAME_IS_SYSCALL);
+
+ copyout_noerr(&uc, (void *)(uintptr_t)uc_addr, sizeof (uc));
+ }
+
+ DTRACE_PROBE3(oldcontext__set, klwp_t *, lwp,
+ uintptr_t, lwp->lwp_oldcontext, uintptr_t, uc_addr);
+ lwp->lwp_oldcontext = (uintptr_t)uc_addr;
+
+ /*
+ * Copy the system call arguments out to userland:
+ */
+ copyout_noerr(args, (void *)(uintptr_t)args_addr,
+ 6 * sizeof (uintptr_t));
+
+ /*
+ * Drop the bogus return address on the stack.
+ */
+ suword64_noerr((void *)sp, 0);
+
+ no_fault();
+ if (watched) {
+ watch_enable_addr((caddr_t)sp, frsz, S_WRITE);
+ }
+
+ /*
+ * Pass the arguments to lx_emulate() in the appropriate registers.
+ */
+ rp->r_rdi = uc_addr;
+ rp->r_rsi = syscall_num;
+ rp->r_rdx = args_addr;
+
+ /*
+ * In order to be able to restore %edx, we need to JUSTRETURN.
+ */
+ lwp->lwp_eosys = JUSTRETURN;
+ curthread->t_post_sys = 1;
+ aston(curthread);
+
+ /*
+ * Set stack pointer and return address to the usermode emulation
+ * handler:
+ */
+ lwpd->br_stack_mode = LX_STACK_MODE_NATIVE;
+ lx_lwp_set_native_stack_current(lwpd, sp);
+
+ /*
+ * Divert execution, on our return, to the usermode emulation stack
+ * and handler:
+ */
+ rp->r_fp = 0;
+ rp->r_sp = sp;
+ rp->r_pc = ptolxproc(p)->l_handler;
+
+ /*
+ * Fix up segment registers, etc.
+ */
+ lx_switch_to_native(lwp);
+
+ return;
+
+badstack:
+ no_fault();
+ if (watched) {
+ watch_enable_addr((caddr_t)sp, frsz, S_WRITE);
+ }
+
+#ifdef DEBUG
+ printf("lx_emulate_user: bad native stack cmd=%s, pid=%d, sp=0x%lx\n",
+ PTOU(p)->u_comm, p->p_pid, sp);
+#endif
+
+ exit(CLD_KILLED, SIGSEGV);
+}
+
+#if defined(_SYSCALL32_IMPL)
+/*
+ * Call frame for the 32-bit usermode emulation handler:
+ * lx_emulate(ucontext_t *ucp, int syscall_num, uintptr_t *args)
+ *
+ * old sp: --------------------------------------------------------------
+ * | - ucontext_t (register state for emulation)
+ * | - uintptr_t[6] (system call arguments array)
+ * | --------------------------------------------------------------
+ * | - arg2: uintptr_t * (pointer to arguments array above)
+ * | - arg1: int (system call number)
+ * V - arg0: ucontext_t * (pointer to context above)
+ * new sp: - bogus return address
+ */
+struct lx_emu_frame32 {
+ caddr32_t retaddr; /* 0 */
+ caddr32_t ucontextp; /* 4 */
+ int32_t syscall_num; /* 8 */
+ caddr32_t argsp; /* c */
+};
+
+/*
+ * This function arranges for the lwp to execute the usermode emulation handler
+ * for this system call. The mechanism is similar to signal handling, and this
+ * function is modelled on sendsig32().
+ */
+void
+lx_emulate_user32(klwp_t *lwp, int syscall_num, uintptr_t *args)
+{
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ struct regs *rp = lwptoregs(lwp);
+ label_t lab;
+ caddr32_t uc_addr;
+ caddr32_t args_addr;
+ caddr32_t top;
+ /*
+ * Variables used after on_fault() returns for a fault
+ * must be volatile.
+ */
+ volatile size_t frsz;
+ volatile caddr32_t sp;
+ volatile proc_t *p = lwptoproc(lwp);
+ volatile int watched;
+
+ /*
+ * We should not be able to get here unless we are running Linux
+ * code for a system call we cannot emulate in the kernel.
+ */
+ VERIFY(lwpd->br_stack_mode == LX_STACK_MODE_BRAND);
+
+ /*
+ * We begin at the current native stack pointer, and reserve space for
+ * the ucontext_t we are copying onto the stack, as well as the call
+ * arguments for the usermode emulation handler.
+ */
+ frsz = SA32(sizeof (ucontext32_t)) + SA32(6 * sizeof (uint32_t)) +
+ SA32(sizeof (struct lx_emu_frame32));
+ VERIFY((frsz & (STACK_ALIGN32 - 1)) == 0);
+
+ top = (caddr32_t)(lwpd->br_ntv_stack_current & ~(STACK_ALIGN32 - 1));
+ sp = top - frsz;
+
+ uc_addr = top - SA32(sizeof (ucontext32_t));
+ args_addr = uc_addr - SA32(6 * sizeof (uint32_t));
+
+ watched = watch_disable_addr((caddr_t)(uintptr_t)sp, frsz, S_WRITE);
+
+ /*
+ * Save the register state we preserved on the way into this brand
+ * system call and drop it on the native stack.
+ */
+ {
+ /*
+ * Note: ucontext32_t is 512 bytes.
+ */
+ ucontext32_t uc;
+
+ /*
+ * We do not want to save the signal mask for an emulation
+ * context. Some emulated system calls alter the signal mask;
+ * restoring it when the emulation is complete would clobber
+ * those intentional side effects.
+ */
+ savecontext32(&uc, NULL);
+
+ if (on_fault(&lab)) {
+ goto badstack;
+ }
+
+ /*
+ * Mark this as a system call emulation context:
+ */
+ uc.uc_brand_data[0] |= LX_UC_FRAME_IS_SYSCALL;
+ copyout_noerr(&uc, (void *)(uintptr_t)uc_addr, sizeof (uc));
+ }
+
+ DTRACE_PROBE3(oldcontext__set, klwp_t *, lwp,
+ uintptr_t, lwp->lwp_oldcontext, uintptr_t, uc_addr);
+ lwp->lwp_oldcontext = (uintptr_t)uc_addr;
+
+ /*
+ * Copy the system call arguments out to userland:
+ */
+ {
+ uint32_t args32[6];
+
+ args32[0] = args[0];
+ args32[1] = args[1];
+ args32[2] = args[2];
+ args32[3] = args[3];
+ args32[4] = args[4];
+ args32[5] = args[5];
+
+ copyout_noerr(&args32, (void *)(uintptr_t)args_addr,
+ sizeof (args32));
+ }
+
+ /*
+ * Assemble the call frame on the stack.
+ */
+ {
+ struct lx_emu_frame32 frm;
+
+ frm.retaddr = 0;
+ frm.ucontextp = uc_addr;
+ frm.argsp = args_addr;
+ frm.syscall_num = syscall_num;
+
+ copyout_noerr(&frm, (void *)(uintptr_t)sp, sizeof (frm));
+ }
+
+ no_fault();
+ if (watched) {
+ watch_enable_addr((caddr_t)(uintptr_t)sp, frsz, S_WRITE);
+ }
+
+ /*
+ * Set stack pointer and return address to the usermode emulation
+ * handler:
+ */
+ lwpd->br_stack_mode = LX_STACK_MODE_NATIVE;
+ lx_lwp_set_native_stack_current(lwpd, sp);
+
+ /*
+ * Divert execution, on our return, to the usermode emulation stack
+ * and handler:
+ */
+ rp->r_fp = 0;
+ rp->r_sp = sp;
+ rp->r_pc = ptolxproc(p)->l_handler;
+
+ /*
+ * Fix up segment registers, etc.
+ */
+ lx_switch_to_native(lwp);
+
+ return;
+
+badstack:
+ no_fault();
+ if (watched) {
+ watch_enable_addr((caddr_t)(uintptr_t)sp, frsz, S_WRITE);
+ }
+
+#ifdef DEBUG
+ printf("lx_emulate_user32: bad native stack cmd=%s, pid=%d, sp=0x%x\n",
+ PTOU(p)->u_comm, p->p_pid, sp);
+#endif
+
+ exit(CLD_KILLED, SIGSEGV);
+}
+#endif /* _SYSCALL32_IMPL */
+
+#else /* !__amd64 (__i386) */
+
+void
+lx_emulate_user(klwp_t *lwp, int syscall_num, uintptr_t *args)
+{
+ cmn_err(CE_WARN, "%s: no 32-bit kernel support", __FUNCTION__);
+ exit(CLD_KILLED, SIGSYS);
+}
+
+#endif /* __amd64 */
diff --git a/usr/src/uts/intel/core_pcbe/Makefile b/usr/src/uts/intel/core_pcbe/Makefile
index d9d9b02de0..7f2d1ad8e5 100644
--- a/usr/src/uts/intel/core_pcbe/Makefile
+++ b/usr/src/uts/intel/core_pcbe/Makefile
@@ -34,7 +34,7 @@ UTSBASE = ../..
MODULE = pcbe.GenuineIntel.6.15
OBJECTS = $(CORE_PCBE_OBJS:%=$(OBJS_DIR)/%)
LINTS = $(CORE_PCBE_OBJS:%.o=$(LINTS_DIR)/%.ln)
-ROOTMODULE = $(USR_PCBE_DIR)/$(MODULE)
+ROOTMODULE = $(ROOT_PSM_PCBE_DIR)/$(MODULE)
SOFTLINKS = pcbe.GenuineIntel.6.23 \
pcbe.GenuineIntel.6.26 \
pcbe.GenuineIntel.6.28 \
@@ -45,7 +45,7 @@ SOFTLINKS = pcbe.GenuineIntel.6.23 \
pcbe.GenuineIntel.6.44 \
pcbe.GenuineIntel.6.46 \
pcbe.GenuineIntel.6.47
-ROOTSOFTLINKS = $(SOFTLINKS:%=$(USR_PCBE_DIR)/%)
+ROOTSOFTLINKS = $(SOFTLINKS:%=$(ROOT_PSM_PCBE_DIR)/%)
#
# Include common rules.
diff --git a/usr/src/uts/intel/datafilt/Makefile b/usr/src/uts/intel/datafilt/Makefile
new file mode 100644
index 0000000000..bc72416406
--- /dev/null
+++ b/usr/src/uts/intel/datafilt/Makefile
@@ -0,0 +1,74 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2011, OmniTI Computer Consulting, Inc. All rights reserved.
+# Copyright 2012, Nexenta Systems, Inc. All rights reserved.
+#
+
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = datafilt
+OBJECTS = $(DATAFILT_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(DATAFILT_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_SOCK_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# lint pass one enforcement and OS version
+#
+CFLAGS += $(CCVERBOSE)
+
+LDFLAGS += -dy -Nfs/sockfs -Ndrv/ip
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/dev/Makefile b/usr/src/uts/intel/dev/Makefile
index b5c7c1a9c8..e7ae468c05 100644
--- a/usr/src/uts/intel/dev/Makefile
+++ b/usr/src/uts/intel/dev/Makefile
@@ -66,6 +66,7 @@ INC_PATH += -I$(UTSBASE)/common/io/bpf
CERRWARN += -_gcc=-Wno-parentheses
CERRWARN += -_gcc=-Wno-unused-label
CERRWARN += -_gcc=-Wno-uninitialized
+CERRWARN += -_gcc=-Wno-unused-function
#
# Default build targets.
diff --git a/usr/src/uts/intel/dr_sas/Makefile b/usr/src/uts/intel/dr_sas/Makefile
new file mode 100644
index 0000000000..f4871b694a
--- /dev/null
+++ b/usr/src/uts/intel/dr_sas/Makefile
@@ -0,0 +1,90 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# uts/intel/dr_sas/Makefile
+#
+# This makefile drives the production of the dr_sas driver kernel module.
+#
+# intel implementation architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = dr_sas
+OBJECTS = $(DR_SAS_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(DR_SAS_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/io/dr_sas
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(CONFMOD)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+# Kernel Module Dependencies
+#
+LDFLAGS += -dy -Nmisc/scsi
+
+CERRWARN += -_gcc=-Wno-unused-label
+CERRWARN += -_gcc=-Wno-switch
+CERRWARN += -_gcc=-Wno-uninitialized
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/dtrace/fasttrap_isa.c b/usr/src/uts/intel/dtrace/fasttrap_isa.c
index 1b93869a73..f9eba2876c 100644
--- a/usr/src/uts/intel/dtrace/fasttrap_isa.c
+++ b/usr/src/uts/intel/dtrace/fasttrap_isa.c
@@ -24,7 +24,9 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
+ */
#include <sys/fasttrap_isa.h>
#include <sys/fasttrap_impl.h>
@@ -38,6 +40,9 @@
#include <sys/sysmacros.h>
#include <sys/trap.h>
#include <sys/archsystm.h>
+#include <sys/proc.h>
+#include <sys/brand.h>
+#include <sys/machbrand.h>
/*
* Lossless User-Land Tracing on x86
@@ -1394,6 +1399,14 @@ fasttrap_pid_probe(struct regs *rp)
#if defined(__amd64)
if (p->p_model == DATAMODEL_LP64) {
addr = lwp->lwp_pcb.pcb_fsbase;
+
+ /*
+ * If we're branded, convert the fsbase from the
+ * brand's fsbase to the native fsbase.
+ */
+ if (PROC_IS_BRANDED(p) && BRMOP(p)->b_fsbase != NULL)
+ addr = BRMOP(p)->b_fsbase(lwp, addr);
+
addr += sizeof (void *);
} else {
addr = lwp->lwp_pcb.pcb_gsbase;
diff --git a/usr/src/uts/intel/e1000g/Makefile b/usr/src/uts/intel/e1000g/Makefile
index 7492e30935..65aa6b44bb 100644
--- a/usr/src/uts/intel/e1000g/Makefile
+++ b/usr/src/uts/intel/e1000g/Makefile
@@ -84,6 +84,7 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
# Driver depends on MAC
#
LDFLAGS += -dy -N misc/mac
+MAPFILES += ddi mac
#
# Default build targets.
@@ -110,4 +111,5 @@ install: $(INSTALL_DEPS)
#
# Include common targets.
#
+include $(UTSBASE)/Makefile.mapfile
include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/elfexec/Makefile b/usr/src/uts/intel/elfexec/Makefile
index 9751e04ba7..975377a538 100644
--- a/usr/src/uts/intel/elfexec/Makefile
+++ b/usr/src/uts/intel/elfexec/Makefile
@@ -23,6 +23,7 @@
#
# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
+# Copyright 2016 Joyent, Inc.
#
#
@@ -60,6 +61,8 @@ ALL_TARGET = $(BINARY)
LINT_TARGET = $(MODULE).lint
INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+INC_PATH += -I$(UTSBASE)/i86pc
+
#
# For now, disable these lint checks; maintainers should endeavor
# to investigate and remove these for maximum lint coverage.
@@ -102,12 +105,15 @@ include $(UTSBASE)/intel/Makefile.targ
$(OBJS_DIR)/elf32.o: $(UTSBASE)/common/exec/elf/elf.c
$(COMPILE.c) -o $@ -D_ELF32_COMPAT $(UTSBASE)/common/exec/elf/elf.c
+ $(CTFCONVERT_O)
$(OBJS_DIR)/elf32_notes.o: $(UTSBASE)/common/exec/elf/elf_notes.c
$(COMPILE.c) -o $@ -D_ELF32_COMPAT $(UTSBASE)/common/exec/elf/elf_notes.c
+ $(CTFCONVERT_O)
$(OBJS_DIR)/old32_notes.o: $(UTSBASE)/common/exec/elf/old_notes.c
$(COMPILE.c) -o $@ -D_ELF32_COMPAT $(UTSBASE)/common/exec/elf/old_notes.c
+ $(CTFCONVERT_O)
$(LINTS_DIR)/elf32.ln: $(UTSBASE)/common/exec/elf/elf.c
@($(LHEAD) $(LINT.c) -Celf32 -D_ELF32_COMPAT $(UTSBASE)/common/exec/elf/elf.c $(LTAIL))
diff --git a/usr/src/uts/intel/genassym/Makefile b/usr/src/uts/intel/genassym/Makefile
new file mode 100644
index 0000000000..ce01dc8610
--- /dev/null
+++ b/usr/src/uts/intel/genassym/Makefile
@@ -0,0 +1,85 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# ident "%Z%%M% %I% %E% SMI"
+#
+# This makefile drives the production of genassym.h through
+# compile time intialized data.
+#
+# intel architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+GENASSYM_H = $(GENASSYM_DIR)/$(OBJS_DIR)/genassym.h
+OFFSETS_SRC = $(GENASSYM_DIR)/offsets.in
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(GENASSYM_H)
+
+INC_PATH += -I$(UTSBASE)/common/brand/lx
+
+#
+# Overrides
+#
+CLEANFILES = Nothing_to_remove
+CLOBBERFILES = $(GENASSYM_H) Nothing_to_remove
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+clean.lint:
+
+install: def
+
+#
+# Create genassym.h
+#
+$(GENASSYM_H): $(OFFSETS_SRC)
+ $(OFFSETS_CREATE) <$(OFFSETS_SRC) >$@
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/genassym/offsets.in b/usr/src/uts/intel/genassym/offsets.in
new file mode 100644
index 0000000000..70221c02f9
--- /dev/null
+++ b/usr/src/uts/intel/genassym/offsets.in
@@ -0,0 +1,43 @@
+\
+\ CDDL HEADER START
+\
+\ The contents of this file are subject to the terms of the
+\ Common Development and Distribution License (the "License").
+\ You may not use this file except in compliance with the License.
+\
+\ You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+\ or http://www.opensolaris.org/os/licensing.
+\ See the License for the specific language governing permissions
+\ and limitations under the License.
+\
+\ When distributing Covered Code, include this CDDL HEADER in each
+\ file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+\ If applicable, add the following below this CDDL HEADER, with the
+\ fields enclosed by brackets "[]" replaced with your own identifying
+\ information: Portions Copyright [yyyy] [name of copyright owner]
+\
+\ CDDL HEADER END
+\
+\
+\ Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+\ Use is subject to license terms.
+\ Copyright 2015 Joyent, Inc.
+\
+
+\
+\ offsets.in: input file to produce the architecture-dependent genassym.h
+\ using the ctfstabs program
+\
+
+#ifndef _GENASSYM
+#define _GENASSYM
+#endif
+
+#include <sys/lx_brand.h>
+
+lx_proc_data
+ l_handler
+
+lx_lwp_data
+ br_lx_fsbase
+ br_ntv_fsbase
diff --git a/usr/src/uts/intel/gsqueue/Makefile b/usr/src/uts/intel/gsqueue/Makefile
new file mode 100644
index 0000000000..411e384309
--- /dev/null
+++ b/usr/src/uts/intel/gsqueue/Makefile
@@ -0,0 +1,49 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+UTSBASE = ../..
+
+MODULE = gsqueue
+OBJECTS = $(GSQUEUE_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(GSQUEUE_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE)
+
+include $(UTSBASE)/intel/Makefile.intel
+
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+LDFLAGS += -dy -Ndrv/ip
+
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/cmd/ssh/scp/Makefile b/usr/src/uts/intel/hyprlofs/Makefile
index 372bb67985..919b045617 100644
--- a/usr/src/cmd/ssh/scp/Makefile
+++ b/usr/src/uts/intel/hyprlofs/Makefile
@@ -19,48 +19,65 @@
#
# CDDL HEADER END
#
+#
+# uts/intel/hyprlofs/Makefile
+#
# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# cmd/ssh/scp/Makefile
-
-PROG= scp
-
-OBJS = \
- scp.o
-SRCS = $(OBJS:.o=.c)
+# This makefile drives the production of the hyprlofs file system
+# kernel module.
+#
+# intel architecture dependent
+#
-include ../../Makefile.cmd
-include ../Makefile.ssh-common
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
-DIRS= $(ROOTLIBSUNSSH)
+#
+# Define the module and object file sets.
+#
+MODULE = hyprlofs
+OBJECTS = $(HYPRLOFS_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(HYPRLOFS_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_FS_DIR)/$(MODULE)
-LDLIBS += $(SSH_COMMON_LDLIBS) -lsocket
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
-POFILE_DIR= ..
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+#
+# Default build targets.
+#
.KEEP_STATE:
-.PARALLEL: $(OBJS)
+def: $(DEF_DEPS)
-all: $(PROG)
+all: $(ALL_DEPS)
-$(PROG): $(OBJS) ../libssh/$(MACH)/libssh.a ../libopenbsd-compat/$(MACH)/libopenbsd-compat.a
- $(LINK.c) $(OBJS) -o $@ $(LDLIBS)
- $(POST_PROCESS)
+clean: $(CLEAN_DEPS)
-$(DIRS):
- $(INS.dir)
+clobber: $(CLOBBER_DEPS)
-$(ROOTBIN)/scp: $(ROOTLIBSUNSSH)/scp
- -$(RM) $@; $(SYMLINK) ../lib/sunssh/scp $@
+lint: $(LINT_DEPS)
-install: all $(ROOTPROG)
+modlintlib: $(MODLINTLIB_DEPS)
-clean:
- $(RM) -f $(OBJS) $(PROG)
+clean.lint: $(CLEAN_LINT_DEPS)
-lint: lint_SRCS
+install: $(INSTALL_DEPS)
-include ../Makefile.msg.targ
-include ../../Makefile.targ
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/i40e/Makefile b/usr/src/uts/intel/i40e/Makefile
new file mode 100644
index 0000000000..98b0a77e9c
--- /dev/null
+++ b/usr/src/uts/intel/i40e/Makefile
@@ -0,0 +1,62 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2016 Joyent, Inc.
+#
+
+UTSBASE = ../..
+
+MODULE = i40e
+OBJECTS = $(I40E_OBJS:%=$(OBJS_DIR)/%) $(I40E_INTC_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(I40E_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/io/i40e
+
+include $(UTSBASE)/intel/Makefile.intel
+
+CPPFLAGS += -I$(UTSBASE)/common/io/i40e
+CPPFLAGS += -I$(UTSBASE)/common/io/i40e/core
+
+ALL_TARGET = $(BINARY) $(CONFMOD)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+# The Intel common code defines some values in header files that end up in all
+# of our objects. Therefore we don't try to lint for that.
+#
+LINTTAGS += -erroff=E_STATIC_UNUSED
+
+LDFLAGS += -dy -N misc/mac
+
+MAPFILES += ddi mac
+
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+include $(UTSBASE)/Makefile.mapfile
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/ia32/ml/i86_subr.s b/usr/src/uts/intel/ia32/ml/i86_subr.s
index e9d0b8128f..d4ba6589bc 100644
--- a/usr/src/uts/intel/ia32/ml/i86_subr.s
+++ b/usr/src/uts/intel/ia32/ml/i86_subr.s
@@ -23,6 +23,7 @@
* Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 by Delphix. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
*/
/*
@@ -3590,29 +3591,13 @@ hres_tick(void)
{}
int64_t timedelta;
-hrtime_t hres_last_tick;
-volatile timestruc_t hrestime;
-int64_t hrestime_adj;
-volatile int hres_lock;
hrtime_t hrtime_base;
#else /* __lint */
- DGDEF3(hrestime, _MUL(2, CLONGSIZE), 8)
- .NWORD 0, 0
-
- DGDEF3(hrestime_adj, 8, 8)
- .long 0, 0
-
- DGDEF3(hres_last_tick, 8, 8)
- .long 0, 0
-
DGDEF3(timedelta, 8, 8)
.long 0, 0
- DGDEF3(hres_lock, 4, 8)
- .long 0
-
/*
* initialized to a non zero value to make pc_gethrtime()
* work correctly even before clock is initialized
diff --git a/usr/src/uts/intel/ia32/ml/modstubs.s b/usr/src/uts/intel/ia32/ml/modstubs.s
index 642704436a..8c17ea0624 100644
--- a/usr/src/uts/intel/ia32/ml/modstubs.s
+++ b/usr/src/uts/intel/ia32/ml/modstubs.s
@@ -50,7 +50,7 @@ char stubs_base[1], stubs_end[1];
* NOTE: Use NO_UNLOAD_STUBs if the module is NOT unloadable once it is
* loaded.
*/
-#define MAXNARG 10
+#define MAXNARG 12
/*
* WARNING: there is no check for forgetting to write END_MODULE,
@@ -184,7 +184,7 @@ fcnname/**/_info: \
pushq %rcx
pushq %r8
pushq %r9
- /* (next 4 args, if any, are already on the stack above %rbp) */
+ /* (next 6 args, if any, are already on the stack above %rbp) */
movq %r15, %rdi
call mod_hold_stub /* mod_hold_stub(mod_stub_info *) */
cmpl $-1, %eax /* error? */
@@ -195,7 +195,7 @@ fcnname/**/_info: \
jmp .L2
.L1:
/*
- * copy MAXNARG == 10 incoming arguments
+ * copy MAXNARG == 12 incoming arguments
*/
popq %r9
popq %r8
@@ -219,8 +219,10 @@ fcnname/**/_info: \
pushq (%rsp, %r11, 8)
pushq (%rsp, %r11, 8)
pushq (%rsp, %r11, 8)
+ pushq (%rsp, %r11, 8)
+ pushq (%rsp, %r11, 8)
call *(%r15) /* call the stub fn(arg, ..) */
- addq $0x20, %rsp /* pop off last 4 args */
+ addq $0x30, %rsp /* pop off last 6 args */
pushq %rax /* save any return values */
pushq %rdx
movq %r15, %rdi
@@ -345,6 +347,8 @@ fcnname/**/_info: \
pushl (%esp, %ecx, 4)
pushl (%esp, %ecx, 4)
pushl (%esp, %ecx, 4)
+ pushl (%esp, %ecx, 4)
+ pushl (%esp, %ecx, 4)
call *(%esi) / call the stub function(arg1,arg2, ...)
add $_MUL(MAXNARG, 4), %esp / pop off MAXNARG arguments
pushl %eax / save any return values from the stub
diff --git a/usr/src/uts/intel/ia32/ml/swtch.s b/usr/src/uts/intel/ia32/ml/swtch.s
index 67ba255cbc..0948fa7c93 100644
--- a/usr/src/uts/intel/ia32/ml/swtch.s
+++ b/usr/src/uts/intel/ia32/ml/swtch.s
@@ -24,7 +24,7 @@
*/
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
*/
/*
@@ -362,13 +362,12 @@ resume(kthread_t *t)
.setup_cpu:
/*
- * Setup rsp0 (kernel stack) in TSS to curthread's stack.
- * (Note: Since we don't have saved 'regs' structure for all
- * the threads we can't easily determine if we need to
- * change rsp0. So, we simply change the rsp0 to bottom
- * of the thread stack and it will work for all cases.)
- *
- * XX64 - Is this correct?
+ * Setup rsp0 (kernel stack) in TSS to curthread's saved regs
+ * structure. If this thread doesn't have a regs structure above
+ * the stack -- that is, if lwp_stk_init() was never called for the
+ * thread -- this will set rsp0 to the wrong value, but it's harmless
+ * as it's a kernel thread, and it won't actually attempt to implicitly
+ * use the rsp0 via a privilege change.
*/
movq CPU_TSS(%r13), %r14
movq T_STACK(%r12), %rax
@@ -542,11 +541,12 @@ resume_return:
jne .L5_2
.L5_1:
/*
- * Setup esp0 (kernel stack) in TSS to curthread's stack.
- * (Note: Since we don't have saved 'regs' structure for all
- * the threads we can't easily determine if we need to
- * change esp0. So, we simply change the esp0 to bottom
- * of the thread stack and it will work for all cases.)
+ * Setup esp0 (kernel stack) in TSS to curthread's stack. If this
+ * thread doesn't have a regs structure above the stack -- that is, if
+ * lwp_stk_init() was never called for the thread -- this will set
+ * esp0 to the wrong value, but it's harmless as it's a kernel thread,
+ * and it won't actually attempt to implicitly use the esp0 via a
+ * privilege change.
*/
movl CPU_TSS(%esi), %ecx
addl $REGSIZE+MINFRAME, %eax /* to the bottom of thread stack */
@@ -929,3 +929,81 @@ thread_start(void)
#endif /* __i386 */
#endif /* __lint */
+
+#if defined(__lint)
+
+void
+thread_splitstack_run(caddr_t stack, void (*func)(void *), void *arg)
+{}
+
+void
+thread_splitstack_cleanup(void)
+{}
+
+#else /* __lint */
+
+#if defined(__amd64)
+
+ ENTRY(thread_splitstack_run)
+ pushq %rbp /* push base pointer */
+ movq %rsp, %rbp /* construct frame */
+ movq %rdi, %rsp /* set stack pinter */
+ movq %rdx, %rdi /* load arg */
+ call *%rsi /* call specified function */
+ leave /* pop base pointer */
+ ret
+ SET_SIZE(thread_splitstack_run)
+
+ /*
+ * Once we're back on our own stack, we need to be sure to set the
+ * value of rsp0 in the TSS back to our original stack: if we gave
+ * up the CPU at all while on our split stack, the rsp0 will point
+ * to that stack from resume (above); if were to try to return to
+ * userland in that state, we will die absolutely horribly (namely,
+ * trying to iretq back to registers in a bunch of freed segkp). We
+ * are expecting this to be called after T_STACK has been restored,
+ * but before we return. It's okay if we are preempted in this code:
+ * when the new CPU picks us up, they will automatically set rsp0
+ * correctly, which is all we're trying to do here.
+ */
+ ENTRY(thread_splitstack_cleanup)
+ LOADCPU(%r8)
+ movq CPU_TSS(%r8), %r9
+ movq CPU_THREAD(%r8), %r10
+ movq T_STACK(%r10), %rax
+ addq $REGSIZE+MINFRAME, %rax
+ movq %rax, TSS_RSP0(%r9)
+ ret
+ SET_SIZE(thread_splitstack_cleanup)
+
+#elif defined(__i386)
+
+ ENTRY(thread_splitstack_run)
+ pushl %ebp /* push base pointer */
+ movl %esp, %ebp /* construct frame */
+ movl 8(%ebp), %esp /* set stack pointer */
+ movl 12(%ebp), %eax /* load func */
+ movl 16(%ebp), %edx /* load arg */
+ pushl %edx /* push arg */
+ call *%eax /* call specifed function */
+ addl $4, %esp /* restore stack pointer */
+ leave /* pop base pointer */
+ ret
+ SET_SIZE(thread_splitstack_run)
+
+ /*
+ * See comment in the amd64 code, above.
+ */
+ ENTRY(thread_splitstack_cleanup)
+ LOADCPU(%eax)
+ movl CPU_TSS(%eax), %ecx
+ movl CPU_THREAD(%eax), %edx
+ movl T_STACK(%edx), %edx
+ addl $REGSIZE+MINFRAME, %edx
+ movl %edx, TSS_ESP0(%ecx)
+ ret
+ SET_SIZE(thread_splitstack_cleanup)
+
+#endif /* __i386 */
+
+#endif /* __lint */
diff --git a/usr/src/uts/intel/ia32/os/archdep.c b/usr/src/uts/intel/ia32/os/archdep.c
index 2a33f306c2..c96aff4a19 100644
--- a/usr/src/uts/intel/ia32/os/archdep.c
+++ b/usr/src/uts/intel/ia32/os/archdep.c
@@ -25,7 +25,7 @@
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
* Copyright 2012 Nexenta Systems, Inc. All rights reserved.
*/
@@ -575,6 +575,13 @@ ucontext_32ton(const ucontext32_t *src, ucontext_t *dst)
if (src->uc_flags & UC_FPU)
fpregset_32ton(&src->uc_mcontext.fpregs,
&dst->uc_mcontext.fpregs);
+
+ /*
+ * Copy the brand-private data:
+ */
+ dst->uc_brand_data[0] = (void *)(uintptr_t)src->uc_brand_data[0];
+ dst->uc_brand_data[1] = (void *)(uintptr_t)src->uc_brand_data[1];
+ dst->uc_brand_data[2] = (void *)(uintptr_t)src->uc_brand_data[2];
}
#endif /* _SYSCALL32_IMPL */
@@ -629,9 +636,11 @@ getuserpc()
#define IS_NOT_CS 0
/*ARGSUSED*/
-static greg_t
+greg_t
fix_segreg(greg_t sr, int iscs, model_t datamodel)
{
+ kthread_t *t = curthread;
+
switch (sr &= 0xffff) {
case 0:
@@ -668,6 +677,19 @@ fix_segreg(greg_t sr, int iscs, model_t datamodel)
}
/*
+ * Allow this process's brand to do any necessary segment register
+ * manipulation.
+ */
+ if (PROC_IS_BRANDED(t->t_procp) && BRMOP(t->t_procp)->b_fixsegreg) {
+ greg_t bsr = BRMOP(t->t_procp)->b_fixsegreg(sr, datamodel);
+
+ if (bsr == 0 && iscs == IS_CS)
+ return (0 | SEL_UPL);
+ else
+ return (bsr);
+ }
+
+ /*
* Force it into the LDT in ring 3 for 32-bit processes, which by
* default do not have an LDT, so that any attempt to use an invalid
* selector will reference the (non-existant) LDT, and cause a #gp
diff --git a/usr/src/uts/intel/ia32/os/desctbls.c b/usr/src/uts/intel/ia32/os/desctbls.c
index a05137eee6..97024b7b59 100644
--- a/usr/src/uts/intel/ia32/os/desctbls.c
+++ b/usr/src/uts/intel/ia32/os/desctbls.c
@@ -161,7 +161,7 @@ struct interposing_handler {
* The brand infrastructure interposes on two handlers, and we use one as a
* NULL signpost.
*/
-static struct interposing_handler brand_tbl[2];
+static struct interposing_handler brand_tbl[3];
/*
* software prototypes for default local descriptor table
@@ -976,6 +976,12 @@ init_idt_common(gate_desc_t *idt)
set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 0);
+ /*
+ * install "int80" handler at, well, 0x80.
+ */
+ set_gatesegd(&idt0[T_INT80], &sys_int80, KCS_SEL, SDT_SYSIGT, TRP_UPL,
+ 0);
+
/*
* install fast trap handler at 210.
*/
@@ -1001,21 +1007,27 @@ init_idt_common(gate_desc_t *idt)
SDT_SYSIGT, TRP_UPL, 0);
/*
- * Prepare interposing descriptor for the syscall handler
- * and cache copy of the default descriptor.
+- * Prepare interposing descriptors for the branded "int80"
+- * and syscall handlers and cache copies of the default
+- * descriptors.
*/
- brand_tbl[0].ih_inum = T_SYSCALLINT;
- brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT];
+ brand_tbl[0].ih_inum = T_INT80;
+ brand_tbl[0].ih_default_desc = idt0[T_INT80];
+ set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_int80, KCS_SEL,
+ SDT_SYSIGT, TRP_UPL, 0);
+
+ brand_tbl[1].ih_inum = T_SYSCALLINT;
+ brand_tbl[1].ih_default_desc = idt0[T_SYSCALLINT];
#if defined(__amd64)
- set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_syscall_int,
+ set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_syscall_int,
KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
#elif defined(__i386)
- set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_call,
+ set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_call,
KCS_SEL, SDT_SYSIGT, TRP_UPL, 0);
#endif /* __i386 */
- brand_tbl[1].ih_inum = 0;
+ brand_tbl[2].ih_inum = 0;
}
#if defined(__xpv)
diff --git a/usr/src/uts/intel/ia32/os/sendsig.c b/usr/src/uts/intel/ia32/os/sendsig.c
index b7b79f38ca..cf6c623b7a 100644
--- a/usr/src/uts/intel/ia32/os/sendsig.c
+++ b/usr/src/uts/intel/ia32/os/sendsig.c
@@ -20,6 +20,9 @@
*/
/*
+ * Copyright 2015 Joyent, Inc.
+ */
+/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -87,6 +90,8 @@
#include <sys/kdi.h>
#include <sys/contract_impl.h>
#include <sys/x86_archext.h>
+#include <sys/brand.h>
+#include <sys/sdt.h>
/*
* Construct the execution environment for the user's signal
@@ -186,7 +191,18 @@ sendsig(int sig, k_siginfo_t *sip, void (*hdlr)())
newstack = sigismember(&PTOU(curproc)->u_sigonstack, sig) &&
!(lwp->lwp_sigaltstack.ss_flags & (SS_ONSTACK|SS_DISABLE));
- if (newstack) {
+ /*
+ * If this is a branded process, the brand may provide an alternate
+ * stack pointer for signal delivery:
+ */
+ if (PROC_IS_BRANDED(p) && BROP(p)->b_sendsig_stack != NULL) {
+ /*
+ * Use the stack pointer value provided by the brand,
+ * accounting for the 128-byte reserved region.
+ */
+ newstack = 0;
+ fp = BROP(p)->b_sendsig_stack(sig) - STACK_RESERVE;
+ } else if (newstack) {
fp = (caddr_t)(SA((uintptr_t)lwp->lwp_sigaltstack.ss_sp) +
SA(lwp->lwp_sigaltstack.ss_size) - STACK_ALIGN);
} else {
@@ -296,6 +312,8 @@ sendsig(int sig, k_siginfo_t *sip, void (*hdlr)())
kmem_free(tuc, sizeof (*tuc));
tuc = NULL;
+ DTRACE_PROBE3(oldcontext__set, klwp_t *, lwp,
+ uintptr_t, lwp->lwp_oldcontext, uintptr_t, (uintptr_t)uc);
lwp->lwp_oldcontext = (uintptr_t)uc;
if (newstack) {
@@ -345,6 +363,14 @@ sendsig(int sig, k_siginfo_t *sip, void (*hdlr)())
}
/*
+ * Allow the brand to perform additional book-keeping once the signal
+ * handling frame has been fully assembled:
+ */
+ if (PROC_IS_BRANDED(p) && BROP(p)->b_sendsig != NULL) {
+ BROP(p)->b_sendsig(sig);
+ }
+
+ /*
* Don't set lwp_eosys here. sendsig() is called via psig() after
* lwp_eosys is handled, so setting it here would affect the next
* system call.
@@ -420,7 +446,17 @@ sendsig32(int sig, k_siginfo_t *sip, void (*hdlr)())
newstack = sigismember(&PTOU(curproc)->u_sigonstack, sig) &&
!(lwp->lwp_sigaltstack.ss_flags & (SS_ONSTACK|SS_DISABLE));
- if (newstack) {
+ /*
+ * If this is a branded process, the brand may provide an alternate
+ * stack pointer for signal delivery:
+ */
+ if (PROC_IS_BRANDED(p) && BROP(p)->b_sendsig_stack != NULL) {
+ /*
+ * Use the stack pointer value provided by the brand:
+ */
+ newstack = 0;
+ fp = BROP(p)->b_sendsig_stack(sig);
+ } else if (newstack) {
fp = (caddr_t)(SA32((uintptr_t)lwp->lwp_sigaltstack.ss_sp) +
SA32(lwp->lwp_sigaltstack.ss_size) - STACK_ALIGN32);
} else if ((rp->r_ss & 0xffff) != UDS_SEL) {
@@ -435,8 +471,9 @@ sendsig32(int sig, k_siginfo_t *sip, void (*hdlr)())
USEGD_GETBASE(&ldt[SELTOIDX(rp->r_ss)]);
else
fp = (caddr_t)rp->r_sp;
- } else
+ } else {
fp = (caddr_t)rp->r_sp;
+ }
/*
* Force proper stack pointer alignment, even in the face of a
@@ -517,6 +554,8 @@ sendsig32(int sig, k_siginfo_t *sip, void (*hdlr)())
kmem_free(tuc, sizeof (*tuc));
tuc = NULL;
+ DTRACE_PROBE3(oldcontext__set, klwp_t *, lwp,
+ uintptr_t, lwp->lwp_oldcontext, uintptr_t, (uintptr_t)uc);
lwp->lwp_oldcontext = (uintptr_t)uc;
if (newstack) {
@@ -566,6 +605,14 @@ sendsig32(int sig, k_siginfo_t *sip, void (*hdlr)())
}
/*
+ * Allow the brand to perform additional book-keeping once the signal
+ * handling frame has been fully assembled:
+ */
+ if (PROC_IS_BRANDED(p) && BROP(p)->b_sendsig != NULL) {
+ BROP(p)->b_sendsig(sig);
+ }
+
+ /*
* Don't set lwp_eosys here. sendsig() is called via psig() after
* lwp_eosys is handled, so setting it here would affect the next
* system call.
@@ -643,7 +690,17 @@ sendsig(int sig, k_siginfo_t *sip, void (*hdlr)())
newstack = sigismember(&PTOU(curproc)->u_sigonstack, sig) &&
!(lwp->lwp_sigaltstack.ss_flags & (SS_ONSTACK|SS_DISABLE));
- if (newstack) {
+ /*
+ * If this is a branded process, the brand may provide an alternate
+ * stack pointer for signal delivery:
+ */
+ if (PROC_IS_BRANDED(p) && BROP(p)->b_sendsig_stack != NULL) {
+ /*
+ * Use the stack pointer value provided by the brand:
+ */
+ newstack = 0;
+ fp = BROP(p)->b_sendsig_stack(sig);
+ } else if (newstack) {
fp = (caddr_t)(SA((uintptr_t)lwp->lwp_sigaltstack.ss_sp) +
SA(lwp->lwp_sigaltstack.ss_size) - STACK_ALIGN);
} else if ((rp->r_ss & 0xffff) != UDS_SEL) {
@@ -658,8 +715,9 @@ sendsig(int sig, k_siginfo_t *sip, void (*hdlr)())
USEGD_GETBASE(&ldt[SELTOIDX(rp->r_ss)]);
else
fp = (caddr_t)rp->r_sp;
- } else
+ } else {
fp = (caddr_t)rp->r_sp;
+ }
/*
* Force proper stack pointer alignment, even in the face of a
@@ -737,6 +795,8 @@ sendsig(int sig, k_siginfo_t *sip, void (*hdlr)())
kmem_free(tuc, sizeof (*tuc));
tuc = NULL;
+ DTRACE_PROBE3(oldcontext__set, klwp_t *, lwp,
+ uintptr_t, lwp->lwp_oldcontext, uintptr_t, (uintptr_t)uc);
lwp->lwp_oldcontext = (uintptr_t)uc;
if (newstack) {
@@ -774,6 +834,14 @@ sendsig(int sig, k_siginfo_t *sip, void (*hdlr)())
}
/*
+ * Allow the brand to perform additional book-keeping once the signal
+ * handling frame has been fully assembled:
+ */
+ if (PROC_IS_BRANDED(p) && BROP(p)->b_sendsig != NULL) {
+ BROP(p)->b_sendsig(sig);
+ }
+
+ /*
* Don't set lwp_eosys here. sendsig() is called via psig() after
* lwp_eosys is handled, so setting it here would affect the next
* system call.
diff --git a/usr/src/uts/intel/ia32/syscall/getcontext.c b/usr/src/uts/intel/ia32/syscall/getcontext.c
index cb5a5b52ba..8f72b5da72 100644
--- a/usr/src/uts/intel/ia32/syscall/getcontext.c
+++ b/usr/src/uts/intel/ia32/syscall/getcontext.c
@@ -20,6 +20,9 @@
*/
/*
+ * Copyright 2015 Joyent, Inc.
+ */
+/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -46,6 +49,7 @@
#include <sys/schedctl.h>
#include <sys/debug.h>
#include <sys/sysmacros.h>
+#include <sys/sdt.h>
/*
* Save user context.
@@ -125,7 +129,23 @@ savecontext(ucontext_t *ucp, const k_sigset_t *mask)
else
ucp->uc_flags &= ~UC_FPU;
- sigktou(mask, &ucp->uc_sigmask);
+ if (mask != NULL) {
+ /*
+ * Save signal mask.
+ */
+ sigktou(mask, &ucp->uc_sigmask);
+ } else {
+ ucp->uc_flags &= ~UC_SIGMASK;
+ bzero(&ucp->uc_sigmask, sizeof (ucp->uc_sigmask));
+ }
+
+ if (PROC_IS_BRANDED(p) && BROP(p)->b_savecontext != NULL) {
+ /*
+ * Allow the brand the chance to modify the context we
+ * saved:
+ */
+ BROP(p)->b_savecontext(ucp);
+ }
}
/*
@@ -136,7 +156,19 @@ restorecontext(ucontext_t *ucp)
{
kthread_t *t = curthread;
klwp_t *lwp = ttolwp(t);
+ proc_t *p = lwptoproc(lwp);
+ if (PROC_IS_BRANDED(p) && BROP(p)->b_restorecontext != NULL) {
+ /*
+ * Allow the brand the chance to modify the context before
+ * we restore it:
+ */
+ BROP(p)->b_restorecontext(ucp);
+ }
+
+ DTRACE_PROBE3(oldcontext__set, klwp_t *, lwp,
+ uintptr_t, lwp->lwp_oldcontext,
+ uintptr_t, (uintptr_t)ucp->uc_link);
lwp->lwp_oldcontext = (uintptr_t)ucp->uc_link;
if (ucp->uc_flags & UC_STACK) {
@@ -184,6 +216,7 @@ getsetcontext(int flag, void *arg)
ucontext_t *ucp;
klwp_t *lwp = ttolwp(curthread);
stack_t dummy_stk;
+ proc_t *p = lwptoproc(lwp);
/*
* In future releases, when the ucontext structure grows,
@@ -228,6 +261,15 @@ getsetcontext(int flag, void *arg)
return (set_errno(EFAULT));
}
+ /*
+ * If this is a branded process, copy in the brand-private
+ * data:
+ */
+ if (PROC_IS_BRANDED(p) && copyin(&ucp->uc_brand_data,
+ &uc.uc_brand_data, sizeof (uc.uc_brand_data)) != 0) {
+ return (set_errno(EFAULT));
+ }
+
restorecontext(&uc);
if ((uc.uc_flags & UC_STACK) && (lwp->lwp_ustack != 0))
@@ -311,7 +353,23 @@ savecontext32(ucontext32_t *ucp, const k_sigset_t *mask)
else
ucp->uc_flags &= ~UC_FPU;
- sigktou(mask, &ucp->uc_sigmask);
+ if (mask != NULL) {
+ /*
+ * Save signal mask.
+ */
+ sigktou(mask, &ucp->uc_sigmask);
+ } else {
+ ucp->uc_flags &= ~UC_SIGMASK;
+ bzero(&ucp->uc_sigmask, sizeof (ucp->uc_sigmask));
+ }
+
+ if (PROC_IS_BRANDED(p) && BROP(p)->b_savecontext32 != NULL) {
+ /*
+ * Allow the brand the chance to modify the context we
+ * saved:
+ */
+ BROP(p)->b_savecontext32(ucp);
+ }
}
int
@@ -323,6 +381,7 @@ getsetcontext32(int flag, void *arg)
klwp_t *lwp = ttolwp(curthread);
caddr32_t ustack32;
stack32_t dummy_stk32;
+ proc_t *p = lwptoproc(lwp);
switch (flag) {
default:
@@ -354,6 +413,15 @@ getsetcontext32(int flag, void *arg)
return (set_errno(EFAULT));
}
+ /*
+ * If this is a branded process, copy in the brand-private
+ * data:
+ */
+ if (PROC_IS_BRANDED(p) && copyin(&ucp->uc_brand_data,
+ &uc.uc_brand_data, sizeof (uc.uc_brand_data)) != 0) {
+ return (set_errno(EFAULT));
+ }
+
ucontext_32ton(&uc, &ucnat);
restorecontext(&ucnat);
diff --git a/usr/src/uts/intel/igb/Makefile b/usr/src/uts/intel/igb/Makefile
index 2d4a1f3556..b85fc2f5c2 100644
--- a/usr/src/uts/intel/igb/Makefile
+++ b/usr/src/uts/intel/igb/Makefile
@@ -72,6 +72,7 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
# Driver depends on MAC
#
LDFLAGS += -dy -N misc/mac
+MAPFILES += ddi mac random
#
# Default build targets.
@@ -97,4 +98,5 @@ install: $(INSTALL_DEPS)
#
# Include common targets.
#
+include $(UTSBASE)/Makefile.mapfile
include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/inotify/Makefile b/usr/src/uts/intel/inotify/Makefile
new file mode 100644
index 0000000000..80e7a80404
--- /dev/null
+++ b/usr/src/uts/intel/inotify/Makefile
@@ -0,0 +1,70 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = inotify
+OBJECTS = $(INOTIFY_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(INOTIFY_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/io
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+LINTTAGS += -erroff=E_STRUCT_DERIVED_FROM_FLEX_MBR
+CERRWARN += -_gcc=-Wno-parentheses
+LDFLAGS += -dy -Nfs/specfs
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(SRC_CONFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/io/acpica/changes.txt b/usr/src/uts/intel/io/acpica/changes.txt
index f53fd426da..69dcdf708c 100644
--- a/usr/src/uts/intel/io/acpica/changes.txt
+++ b/usr/src/uts/intel/io/acpica/changes.txt
@@ -1,7 +1,588 @@
----------------------------------------
-27 May 2011. Summary of changes for version 20110527:
+20 March 2012. Summary of changes for version 20120320:
+
+This release is available at www.acpica.org/downloads.
+The ACPI 5.0 specification is available at www.acpi.info.
+
+1) ACPICA Core Subsystem:
+
+Enhanced the sleep/wake interfaces to optionally execute the _GTS method
+(Going To Sleep) and the _BFS method (Back From Sleep). Windows apparently
+does not execute these methods, and therefore these methods are often
+untested. It has been seen on some systems where the execution of these
+methods causes errors and also prevents the machine from entering S5. It is
+therefore suggested that host operating systems do not execute these methods
+by default. In the future, perhaps these methods can be optionally executed
+based on the age of the system and/or what is the newest version of Windows
+that the BIOS asks for via _OSI. Changed interfaces: AcpiEnterSleepState and
+AcpileaveSleepStatePrep. See the ACPICA reference and Linux BZ 13041. Lin
+Ming.
+
+Fixed a problem where the length of the local/common FADT was set too early.
+The local FADT table length cannot be set to the common length until the
+original length has been examined. There is code that checks the table length
+and sets various fields appropriately. This can affect older machines with
+early FADT versions. For example, this can cause inadvertent writes to the
+CST_CNT register. Julian Anastasov.
+
+Fixed a mapping issue related to a physical table override. Use the deferred
+mapping mechanism for tables loaded via the physical override OSL interface.
+This allows for early mapping before the virtual memory manager is available.
+Thomas Renninger, Bob Moore.
+
+Enhanced the automatic return-object repair code: Repair a common problem with
+predefined methods that are defined to return a variable-length Package of
+sub-objects. If there is only one sub-object, some BIOS ASL code mistakenly
+simply returns the single object instead of a Package with one sub-object.
+This new support will repair this error by wrapping a Package object around
+the original object, creating the correct and expected Package with one sub-
+object. Names that can be repaired in this manner include: _ALR, _CSD, _HPX,
+_MLS, _PLD, _PRT, _PSS, _TRT, _TSS, _BCL, _DOD, _FIX, and _Sx. ACPICA BZ 939.
+
+Changed the exception code returned for invalid ACPI paths passed as
+parameters to external interfaces such as AcpiEvaluateObject. Was
+AE_BAD_PARAMETER, now is the more sensible AE_BAD_PATHNAME.
+
+Example Code and Data Size: These are the sizes for the OS-independent
+acpica.lib produced by the Microsoft Visual C++ 9.0 32-bit compiler. The debug
+version of the code includes the debug output trace mechanism and has a much
+larger code and data size.
+
+ Previous Release:
+ Non-Debug Version: 93.0K Code, 25.0K Data, 118.0K Total
+ Debug Version: 172.5K Code, 73.2K Data, 245.7K Total
+ Current Release:
+ Non-Debug Version: 92.9K Code, 25.0K Data, 117.9K Total
+ Debug Version: 172.5K Code, 73.2K Data, 245.7K Total
+
+
+2) iASL Compiler/Disassembler and Tools:
+
+iASL: Added the infrastructure and initial implementation of a integrated C-
+like preprocessor. This will simplify BIOS development process by eliminating
+the need for a separate preprocessing step during builds. On Windows, it also
+eliminates the need to install a separate C compiler. ACPICA BZ 761. Some
+features including full #define() macro support are still under development.
+These preprocessor directives are supported:
+ #define
+ #elif
+ #else
+ #endif
+ #error
+ #if
+ #ifdef
+ #ifndef
+ #include
+ #pragma message
+ #undef
+ #warning
+In addition, these new command line options are supported:
+ -D <symbol> Define symbol for preprocessor use
+ -li Create preprocessed output file (*.i)
+ -P Preprocess only and create preprocessor output file (*.i)
+
+Table Compiler: Fixed a problem where the equals operator within an expression
+did not work properly.
+
+Updated iASL to use the current versions of Bison/Flex. Updated the Windows
+project file to invoke these tools from the standard location. ACPICA BZ 904.
+Versions supported:
+ Flex for Windows: V2.5.4
+ Bison for Windows: V2.4.1
+
+----------------------------------------
+15 February 2012. Summary of changes for version 20120215:
+
+This release is available at www.acpica.org/downloads.
+The ACPI 5.0 specification is available at www.acpi.info.
+
+1) ACPICA Core Subsystem:
+
+There have been some major changes to the sleep/wake support code, as
+described below (a - e).
+
+a) The AcpiLeaveSleepState has been split into two interfaces, similar to
+AcpiEnterSleepStatePrep and AcpiEnterSleepState. The new interface is
+AcpiLeaveSleepStatePrep. This allows the host to perform actions between the
+time the _BFS method is called and the _WAK method is called. NOTE: all hosts
+must update their wake/resume code or else sleep/wake will not work properly.
+Rafael Wysocki.
+
+b) In AcpiLeaveSleepState, now enable all runtime GPEs before calling the _WAK
+method. Some machines require that the GPEs are enabled before the _WAK method
+is executed. Thomas Renninger.
+
+c) In AcpiLeaveSleepState, now always clear the WAK_STS (wake status) bit.
+Some BIOS code assumes that WAK_STS will be cleared on resume and use it to
+determine whether the system is rebooting or resuming. Matthew Garrett.
+
+d) Move the invocations of _GTS (Going To Sleep) and _BFS (Back From Sleep) to
+match the ACPI specification requirement. Rafael Wysocki.
+
+e) Implemented full support for the ACPI 5.0 SleepStatus and SleepControl
+registers within the V5 FADT. This support adds two new files:
+hardware/hwesleep.c implements the support for the new registers. Moved all
+sleep/wake external interfaces to hardware/hwxfsleep.c.
+
+
+Added a new OSL interface for ACPI table overrides,
+AcpiOsPhysicalTableOverride. This interface allows the host to override a
+table via a physical address, instead of the logical address required by
+AcpiOsTableOverride. This simplifies the host implementation. Initial
+implementation by Thomas Renninger. The ACPICA implementation creates a single
+shared function for table overrides that attempts both a logical and a
+physical override.
+
+Expanded the OSL memory read/write interfaces to 64-bit data
+(AcpiOsReadMemory, AcpiOsWriteMemory.) This enables full 64-bit memory
+transfer support for GAS register structures passed to AcpiRead and AcpiWrite.
+
+Implemented the ACPI_REDUCED_HARDWARE option to allow the creation of a custom
+build of ACPICA that supports only the ACPI 5.0 reduced hardware (SoC) model.
+See the ACPICA reference for details. ACPICA BZ 942. This option removes about
+10% of the code and 5% of the static data, and the following hardware ACPI
+features become unavailable:
+ PM Event and Control registers
+ SCI interrupt (and handler)
+ Fixed Events
+ General Purpose Events (GPEs)
+ Global Lock
+ ACPI PM timer
+ FACS table (Waking vectors and Global Lock)
+
+Updated the unix tarball directory structure to match the ACPICA git source
+tree. This ensures that the generic unix makefiles work properly (in
+generate/unix). Also updated the Linux makefiles to match. ACPICA BZ 867.
+
+Updated the return value of the _REV predefined method to integer value 5 to
+reflect ACPI 5.0 support.
+
+Moved the external ACPI PM timer interface prototypes to the public acpixf.h
+file where they belong.
+
+Example Code and Data Size: These are the sizes for the OS-independent
+acpica.lib produced by the Microsoft Visual C++ 9.0 32-bit compiler. The debug
+version of the code includes the debug output trace mechanism and has a much
+larger code and data size.
+
+ Previous Release:
+ Non-Debug Version: 92.8K Code, 24.9K Data, 117.7K Total
+ Debug Version: 171.7K Code, 72.9K Data, 244.5K Total
+ Current Release:
+ Non-Debug Version: 93.0K Code, 25.0K Data, 118.0K Total
+ Debug Version: 172.5K Code, 73.2K Data, 245.7K Total
+
+
+2) iASL Compiler/Disassembler and Tools:
+
+Disassembler: Fixed a problem with the new ACPI 5.0 serial resource
+descriptors (I2C, SPI, UART) where the resource produce/consumer bit was
+incorrectly displayed.
+
+AcpiHelp: Add display of ACPI/PNP device IDs that are defined in the ACPI
+specification.
+
+----------------------------------------
+11 January 2012. Summary of changes for version 20120111:
+
+This release is available at www.acpica.org/downloads.
+The ACPI 5.0 specification is available at www.acpi.info.
+
+1) ACPICA Core Subsystem:
+
+Implemented a new mechanism to allow host device drivers to check for address
+range conflicts with ACPI Operation Regions. Both SystemMemory and SystemIO
+address spaces are supported. A new external interface, AcpiCheckAddressRange,
+allows drivers to check an address range against the ACPI namespace. See the
+ACPICA reference for additional details. Adds one new file,
+utilities/utaddress.c. Lin Ming, Bob Moore.
+
+Fixed several issues with the ACPI 5.0 FADT support: Add the sleep Control and
+Status registers, update the ACPI 5.0 flags, and update internal data
+structures to handle an FADT larger than 256 bytes. The size of the ACPI 5.0
+FADT is 268 bytes.
+
+Updated all ACPICA copyrights and signons to 2012. Added the 2012 copyright to
+all module headers and signons, including the standard Linux header. This
+affects virtually every file in the ACPICA core subsystem, iASL compiler, and
+all ACPICA utilities.
+
+Example Code and Data Size: These are the sizes for the OS-independent
+acpica.lib produced by the Microsoft Visual C++ 9.0 32-bit compiler. The debug
+version of the code includes the debug output trace mechanism and has a much
+larger code and data size.
+
+ Previous Release:
+ Non-Debug Version: 92.3K Code, 24.9K Data, 117.2K Total
+ Debug Version: 170.8K Code, 72.6K Data, 243.4K Total
+ Current Release:
+ Non-Debug Version: 92.8K Code, 24.9K Data, 117.7K Total
+ Debug Version: 171.7K Code, 72.9K Data, 244.5K Total
+
+
+2) iASL Compiler/Disassembler and Tools:
+
+Disassembler: fixed a problem with the automatic resource tag generation
+support. Fixes a problem where the resource tags are inadvertently not
+constructed if the table being disassembled contains external references to
+control methods. Moved the actual construction of the tags to after the final
+namespace is constructed (after 2nd parse is invoked due to external control
+method references.) ACPICA BZ 941.
+
+Table Compiler: Make all "generic" operators caseless. These are the operators
+like UINT8, String, etc. Making these caseless improves ease-of-use. ACPICA BZ
+934.
+
+----------------------------------------
+23 November 2011. Summary of changes for version 20111123:
+
+0) ACPI 5.0 Support:
+
+This release contains full support for the ACPI 5.0 specification, as
+summarized below.
+
+Reduced Hardware Support:
+-------------------------
+
+This support allows for ACPI systems without the usual ACPI hardware. This
+support is enabled by a flag in the revision 5 FADT. If it is set, ACPICA will
+not attempt to initialize or use any of the usual ACPI hardware. Note, when
+this flag is set, all of the following ACPI hardware is assumed to be not
+present and is not initialized or accessed:
+
+ General Purpose Events (GPEs)
+ Fixed Events (PM1a/PM1b and PM Control)
+ Power Management Timer and Console Buttons (power/sleep)
+ Real-time Clock Alarm
+ Global Lock
+ System Control Interrupt (SCI)
+ The FACS is assumed to be non-existent
+
+ACPI Tables:
+------------
+
+All new tables and updates to existing tables are fully supported in the
+ACPICA headers (for use by device drivers), the disassembler, and the iASL
+Data Table Compiler. ACPI 5.0 defines these new tables:
+
+ BGRT /* Boot Graphics Resource Table */
+ DRTM /* Dynamic Root of Trust for Measurement table */
+ FPDT /* Firmware Performance Data Table */
+ GTDT /* Generic Timer Description Table */
+ MPST /* Memory Power State Table */
+ PCCT /* Platform Communications Channel Table */
+ PMTT /* Platform Memory Topology Table */
+ RASF /* RAS Feature table */
+
+Operation Regions/SpaceIDs:
+---------------------------
+
+All new operation regions are fully supported by the iASL compiler, the
+disassembler, and the ACPICA runtime code (for dispatch to region handlers.)
+The new operation region Space IDs are:
+
+ GeneralPurposeIo
+ GenericSerialBus
+
+Resource Descriptors:
+---------------------
+
+All new ASL resource descriptors are fully supported by the iASL compiler, the
+ASL/AML disassembler, and the ACPICA runtime Resource Manager code (including
+all new predefined resource tags). New descriptors are:
+
+ FixedDma
+ GpioIo
+ GpioInt
+ I2cSerialBus
+ SpiSerialBus
+ UartSerialBus
+
+ASL/AML Operators, New and Modified:
+------------------------------------
+
+One new operator is added, the Connection operator, which is used to associate
+a GeneralPurposeIo or GenericSerialBus resource descriptor with individual
+field objects within an operation region. Several new protocols are associated
+with the AccessAs operator. All are fully supported by the iASL compiler,
+disassembler, and runtime ACPICA AML interpreter:
+
+ Connection // Declare Field Connection attributes
+ AccessAs: AttribBytes (n) // Read/Write N-Bytes Protocol
+ AccessAs: AttribRawBytes (n) // Raw Read/Write N-Bytes Protocol
+ AccessAs: AttribRawProcessBytes (n) // Raw Process Call Protocol
+ RawDataBuffer // Data type for Vendor Data fields
+
+Predefined ASL/AML Objects:
+---------------------------
+
+All new predefined objects/control-methods are supported by the iASL compiler
+and the ACPICA runtime validation/repair (arguments and return values.) New
+predefined names include the following:
+
+Standard Predefined Names (Objects or Control Methods):
+ _AEI, _CLS, _CPC, _CWS, _DEP,
+ _DLM, _EVT, _GCP, _CRT, _GWS,
+ _HRV, _PRE, _PSE, _SRT, _SUB.
+
+Resource Tags (Names used to access individual fields within resource
+descriptors):
+ _DBT, _DPL, _DRS, _END, _FLC,
+ _IOR, _LIN, _MOD, _PAR, _PHA,
+ _PIN, _PPI, _POL, _RXL, _SLV,
+ _SPE, _STB, _TXL, _VEN.
+
+ACPICA External Interfaces:
+---------------------------
+
+Several new interfaces have been defined for use by ACPI-related device
+drivers and other host OS services:
+
+AcpiAcquireMutex and AcpiReleaseMutex: These interfaces allow the host OS to
+acquire and release AML mutexes that are defined in the DSDT/SSDT tables
+provided by the BIOS. They are intended to be used in conjunction with the
+ACPI 5.0 _DLM (Device Lock Method) in order to provide transaction-level
+mutual exclusion with the AML code/interpreter.
+
+AcpiGetEventResources: Returns the (formatted) resource descriptors as defined
+by the ACPI 5.0 _AEI object (ACPI Event Information). This object provides
+resource descriptors associated with hardware-reduced platform events, similar
+to the AcpiGetCurrentResources interface.
+
+Operation Region Handlers: For General Purpose IO and Generic Serial Bus
+operation regions, information about the Connection() object and any optional
+length information is passed to the region handler within the Context
+parameter.
+
+AcpiBufferToResource: This interface converts a raw AML buffer containing a
+resource template or resource descriptor to the ACPI_RESOURCE internal format
+suitable for use by device drivers. Can be used by an operation region handler
+to convert the Connection() buffer object into a ACPI_RESOURCE.
+
+Miscellaneous/Tools/TestSuites:
+-------------------------------
+
+Support for extended _HID names (Four alpha characters instead of three).
+Support for ACPI 5.0 features in the AcpiExec and AcpiHelp utilities.
+Support for ACPI 5.0 features in the ASLTS test suite.
+Fully updated documentation (ACPICA and iASL reference documents.)
+
+ACPI Table Definition Language:
+-------------------------------
+
+Support for this language was implemented and released as a subsystem of the
+iASL compiler in 2010. (See the iASL compiler User Guide.)
+
+
+Non-ACPI 5.0 changes for this release:
+--------------------------------------
+
+1) ACPICA Core Subsystem:
+
+Fix a problem with operation region declarations where a failure can occur if
+the region name and an argument that evaluates to an object (such as the
+region address) are in different namespace scopes. Lin Ming, ACPICA BZ 937.
+
+Do not abort an ACPI table load if an invalid space ID is found within. This
+will be caught later if the offending method is executed. ACPICA BZ 925.
+
+Fixed an issue with the FFixedHW space ID where the ID was not always
+recognized properly (Both ACPICA and iASL). ACPICA BZ 926.
+
+Fixed a problem with the 32-bit generation of the unix-specific OSL
+(osunixxf.c). Lin Ming, ACPICA BZ 936.
+
+Several changes made to enable generation with the GCC 4.6 compiler. ACPICA BZ
+935.
+
+New error messages: Unsupported I/O requests (not 8/16/32 bit), and Index/Bank
+field registers out-of-range.
+
+2) iASL Compiler/Disassembler and Tools:
+
+iASL: Implemented the __PATH__ operator, which returns the full pathname of
+the current source file.
-This release is available at www.acpica.org/downloads
+AcpiHelp: Automatically display expanded keyword information for all ASL
+operators.
+
+Debugger: Add "Template" command to disassemble/dump resource template
+buffers.
+
+Added a new master script to generate and execute the ASLTS test suite.
+Automatically handles 32- and 64-bit generation. See tests/aslts.sh
+
+iASL: Fix problem with listing generation during processing of the Switch()
+operator where AML listing was disabled until the entire Switch block was
+completed.
+
+iASL: Improve support for semicolon statement terminators. Fix "invalid
+character" message for some cases when the semicolon is used. Semicolons are
+now allowed after every <Term> grammar element. ACPICA BZ 927.
+
+iASL: Fixed some possible aliasing warnings during generation. ACPICA BZ 923.
+
+Disassembler: Fix problem with disassembly of the DataTableRegion operator
+where an inadvertent "Unhandled deferred opcode" message could be generated.
+
+3) Example Code and Data Size
+
+These are the sizes for the OS-independent acpica.lib produced by the
+Microsoft Visual C++ 9.0 32-bit compiler. The debug version of the code
+includes the debug output trace mechanism and has a much larger code and data
+size.
+
+ Previous Release:
+ Non-Debug Version: 90.2K Code, 23.9K Data, 114.1K Total
+ Debug Version: 165.6K Code, 68.4K Data, 234.0K Total
+ Current Release:
+ Non-Debug Version: 92.3K Code, 24.9K Data, 117.2K Total
+ Debug Version: 170.8K Code, 72.6K Data, 243.4K Total
+
+----------------------------------------
+22 September 2011. Summary of changes for version 20110922:
+
+0) ACPI 5.0 News:
+
+Support for ACPI 5.0 in ACPICA has been underway for several months and will
+be released at the same time that ACPI 5.0 is officially released.
+
+The ACPI 5.0 specification is on track for release in the next few months.
+
+1) ACPICA Core Subsystem:
+
+Fixed a problem where the maximum sleep time for the Sleep() operator was
+intended to be limited to two seconds, but was inadvertently limited to 20
+seconds instead.
+
+Linux and Unix makefiles: Added header file dependencies to ensure correct
+generation of ACPICA core code and utilities. Also simplified the makefiles
+considerably through the use of the vpath variable to specify search paths.
+ACPICA BZ 924.
+
+2) iASL Compiler/Disassembler and Tools:
+
+iASL: Implemented support to check the access length for all fields created to
+access named Resource Descriptor fields. For example, if a resource field is
+defined to be two bits, a warning is issued if a CreateXxxxField() is used
+with an incorrect bit length. This is implemented for all current resource
+descriptor names. ACPICA BZ 930.
+
+Disassembler: Fixed a byte ordering problem with the output of 24-bit and 56-
+bit integers.
+
+iASL: Fixed a couple of issues associated with variable-length package
+objects. 1) properly handle constants like One, Ones, Zero -- do not make a
+VAR_PACKAGE when these are used as a package length. 2) Allow the VAR_PACKAGE
+opcode (in addition to PACKAGE) when validating object types for predefined
+names.
+
+iASL: Emit statistics for all output files (instead of just the ASL input and
+AML output). Includes listings, hex files, etc.
+
+iASL: Added -G option to the table compiler to allow the compilation of custom
+ACPI tables. The only part of a table that is required is the standard 36-byte
+ACPI header.
+
+AcpiXtract: Ported to the standard ACPICA environment (with ACPICA headers),
+which also adds correct 64-bit support. Also, now all output filenames are
+completely lower case.
+
+AcpiExec: Ignore any non-AML tables (tables other than DSDT or SSDT) when
+loading table files. A warning is issued for any such tables. The only
+exception is an FADT. This also fixes a possible fault when attempting to load
+non-AML tables. ACPICA BZ 932.
+
+AcpiHelp: Added the AccessAs and Offset operators. Fixed a problem where a
+missing table terminator could cause a fault when using the -p option.
+
+AcpiSrc: Fixed a possible divide-by-zero fault when generating file
+statistics.
+
+3) Example Code and Data Size
+
+These are the sizes for the OS-independent acpica.lib produced by the
+Microsoft Visual C++ 9.0 32-bit compiler. The debug version of the code
+includes the debug output trace mechanism and has a much larger code and data
+size.
+
+ Previous Release (VC 9.0):
+ Non-Debug Version: 90.2K Code, 23.9K Data, 114.1K Total
+ Debug Version: 165.6K Code, 68.4K Data, 234.0K Total
+ Current Release (VC 9.0):
+ Non-Debug Version: 90.2K Code, 23.9K Data, 114.1K Total
+ Debug Version: 165.6K Code, 68.4K Data, 234.0K Total
+
+
+----------------------------------------
+23 June 2011. Summary of changes for version 20110623:
+
+1) ACPI CA Core Subsystem:
+
+Updated the predefined name repair mechanism to not attempt repair of a _TSS
+return object if a _PSS object is present. We can only sort the _TSS return
+package if there is no _PSS within the same scope. This is because if _PSS is
+present, the ACPI specification dictates that the _TSS Power Dissipation field
+is to be ignored, and therefore some BIOSs leave garbage values in the _TSS
+Power field(s). In this case, it is best to just return the _TSS package as-
+is. Reported by, and fixed with assistance from Fenghua Yu.
+
+Added an option to globally disable the control method return value validation
+and repair. This runtime option can be used to disable return value repair if
+this is causing a problem on a particular machine. Also added an option to
+AcpiExec (-dr) to set this disable flag.
+
+All makefiles and project files: Major changes to improve generation of ACPICA
+tools. ACPICA BZ 912:
+ Reduce default optimization levels to improve compatibility
+ For Linux, add strict-aliasing=0 for gcc 4
+ Cleanup and simplify use of command line defines
+ Cleanup multithread library support
+ Improve usage messages
+
+Linux-specific header: update handling of THREAD_ID and pthread. For the 32-
+bit case, improve casting to eliminate possible warnings, especially with the
+acpica tools.
+
+Example Code and Data Size: These are the sizes for the OS-independent
+acpica.lib produced by the Microsoft Visual C++ 9.0 32-bit compiler. The debug
+version of the code includes the debug output trace mechanism and has a much
+larger code and data size.
+
+ Previous Release (VC 9.0):
+ Non-Debug Version: 90.1K Code, 23.9K Data, 114.0K Total
+ Debug Version: 165.6K Code, 68.4K Data, 234.0K Total
+ Current Release (VC 9.0):
+ Non-Debug Version: 90.2K Code, 23.9K Data, 114.1K Total
+ Debug Version: 165.6K Code, 68.4K Data, 234.0K Total
+
+2) iASL Compiler/Disassembler and Tools:
+
+With this release, a new utility named "acpihelp" has been added to the ACPICA
+package. This utility summarizes the ACPI specification chapters for the ASL
+and AML languages. It generates under Linux/Unix as well as Windows, and
+provides the following functionality:
+ Find/display ASL operator(s) -- with description and syntax.
+ Find/display ASL keyword(s) -- with exact spelling and descriptions.
+ Find/display ACPI predefined name(s) -- with description, number
+ of arguments, and the return value data type.
+ Find/display AML opcode name(s) -- with opcode, arguments, and grammar.
+ Decode/display AML opcode -- with opcode name, arguments, and grammar.
+
+Service Layers: Make multi-thread support configurable. Conditionally compile
+the multi-thread support so that threading libraries will not be linked if not
+necessary. The only tool that requires multi-thread support is AcpiExec.
+
+iASL: Update yyerrror/AslCompilerError for "const" errors. Newer versions of
+Bison appear to want the interface to yyerror to be a const char * (or at
+least this is a problem when generating iASL on some systems.) ACPICA BZ 923
+Pierre Lejeune.
+
+Tools: Fix for systems where O_BINARY is not defined. Only used for Windows
+versions of the tools.
+
+----------------------------------------
+27 May 2011. Summary of changes for version 20110527:
1) ACPI CA Core Subsystem:
diff --git a/usr/src/uts/intel/io/acpica/debugger/dbcmds.c b/usr/src/uts/intel/io/acpica/debugger/dbcmds.c
index 73387b872c..459cd916ee 100644
--- a/usr/src/uts/intel/io/acpica/debugger/dbcmds.c
+++ b/usr/src/uts/intel/io/acpica/debugger/dbcmds.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -46,6 +46,7 @@
#include "accommon.h"
#include "acevents.h"
#include "acdebug.h"
+#include "acnamesp.h"
#include "acresrc.h"
#include "actables.h"
@@ -69,6 +70,18 @@ AcpiDmTestResourceConversion (
ACPI_NAMESPACE_NODE *Node,
char *Name);
+static ACPI_STATUS
+AcpiDbResourceCallback (
+ ACPI_RESOURCE *Resource,
+ void *Context);
+
+static ACPI_STATUS
+AcpiDbDeviceResources (
+ ACPI_HANDLE ObjHandle,
+ UINT32 NestingLevel,
+ void *Context,
+ void **ReturnValue);
+
/*******************************************************************************
*
@@ -148,28 +161,49 @@ AcpiDbSleep (
UINT8 SleepState;
+ ACPI_FUNCTION_TRACE (AcpiDbSleep);
+
+
SleepState = (UINT8) ACPI_STRTOUL (ObjectArg, NULL, 0);
AcpiOsPrintf ("**** Prepare to sleep ****\n");
Status = AcpiEnterSleepStatePrep (SleepState);
if (ACPI_FAILURE (Status))
{
- return (Status);
+ goto ErrorExit;
}
AcpiOsPrintf ("**** Going to sleep ****\n");
- Status = AcpiEnterSleepState (SleepState);
+ Status = AcpiEnterSleepState (SleepState, ACPI_NO_OPTIONAL_METHODS);
if (ACPI_FAILURE (Status))
{
- return (Status);
+ goto ErrorExit;
}
- AcpiOsPrintf ("**** returning from sleep ****\n");
+ AcpiOsPrintf ("**** Prepare to return from sleep ****\n");
+ Status = AcpiLeaveSleepStatePrep (SleepState, ACPI_NO_OPTIONAL_METHODS);
+ if (ACPI_FAILURE (Status))
+ {
+ goto ErrorExit;
+ }
+
+ AcpiOsPrintf ("**** Returning from sleep ****\n");
Status = AcpiLeaveSleepState (SleepState);
+ if (ACPI_FAILURE (Status))
+ {
+ goto ErrorExit;
+ }
return (Status);
+
+
+ErrorExit:
+
+ ACPI_EXCEPTION ((AE_INFO, Status, "During sleep test"));
+ return (Status);
}
+
/*******************************************************************************
*
* FUNCTION: AcpiDbDisplayLocks
@@ -339,25 +373,20 @@ AcpiDbSendNotify (
return;
}
- /* Decode Named object type */
+ /* Dispatch the notify if legal */
- switch (Node->Type)
+ if (AcpiEvIsNotifyObject (Node))
{
- case ACPI_TYPE_DEVICE:
- case ACPI_TYPE_THERMAL:
-
- /* Send the notify */
-
Status = AcpiEvQueueNotifyRequest (Node, Value);
if (ACPI_FAILURE (Status))
{
AcpiOsPrintf ("Could not queue notify\n");
}
- break;
-
- default:
- AcpiOsPrintf ("Named object is not a device or a thermal object\n");
- break;
+ }
+ else
+ {
+ AcpiOsPrintf ("Named object [%4.4s] Type %s, must be Device/Thermal/Processor type\n",
+ AcpiUtGetNodeName (Node), AcpiUtGetTypeName (Node->Type));
}
}
@@ -456,6 +485,78 @@ AcpiDbDisplayInterfaces (
/*******************************************************************************
*
+ * FUNCTION: AcpiDbDisplayTemplate
+ *
+ * PARAMETERS: BufferArg - Buffer name or addrss
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Dump a buffer that contains a resource template
+ *
+ ******************************************************************************/
+
+void
+AcpiDbDisplayTemplate (
+ char *BufferArg)
+{
+ ACPI_NAMESPACE_NODE *Node;
+ ACPI_STATUS Status;
+ ACPI_BUFFER ReturnObj;
+
+
+ /* Translate BufferArg to an Named object */
+
+ Node = AcpiDbConvertToNode (BufferArg);
+ if (!Node || (Node == AcpiGbl_RootNode))
+ {
+ AcpiOsPrintf ("Invalid argument: %s\n", BufferArg);
+ return;
+ }
+
+ /* We must have a buffer object */
+
+ if (Node->Type != ACPI_TYPE_BUFFER)
+ {
+ AcpiOsPrintf ("Not a Buffer object, cannot be a template: %s\n",
+ BufferArg);
+ return;
+ }
+
+ ReturnObj.Length = ACPI_DEBUG_BUFFER_SIZE;
+ ReturnObj.Pointer = AcpiGbl_DbBuffer;
+
+ /* Attempt to convert the raw buffer to a resource list */
+
+ Status = AcpiRsCreateResourceList (Node->Object, &ReturnObj);
+
+ AcpiDbSetOutputDestination (ACPI_DB_REDIRECTABLE_OUTPUT);
+ AcpiDbgLevel |= ACPI_LV_RESOURCES;
+
+ if (ACPI_FAILURE (Status))
+ {
+ AcpiOsPrintf ("Could not convert Buffer to a resource list: %s, %s\n",
+ BufferArg, AcpiFormatException (Status));
+ goto DumpBuffer;
+ }
+
+ /* Now we can dump the resource list */
+
+ AcpiRsDumpResourceList (ACPI_CAST_PTR (ACPI_RESOURCE,
+ ReturnObj.Pointer));
+
+DumpBuffer:
+ AcpiOsPrintf ("\nRaw data buffer:\n");
+ AcpiUtDumpBuffer ((UINT8 *) Node->Object->Buffer.Pointer,
+ Node->Object->Buffer.Length,
+ DB_BYTE_DISPLAY, ACPI_UINT32_MAX);
+
+ AcpiDbSetOutputDestination (ACPI_DB_CONSOLE_OUTPUT);
+ return;
+}
+
+
+/*******************************************************************************
+ *
* FUNCTION: AcpiDmCompareAmlResources
*
* PARAMETERS: Aml1Buffer - Contains first resource list
@@ -479,11 +580,14 @@ AcpiDmCompareAmlResources (
{
UINT8 *Aml1;
UINT8 *Aml2;
+ UINT8 *Aml1End;
+ UINT8 *Aml2End;
ACPI_RSDESC_SIZE Aml1Length;
ACPI_RSDESC_SIZE Aml2Length;
ACPI_RSDESC_SIZE Offset = 0;
UINT8 ResourceType;
UINT32 Count = 0;
+ UINT32 i;
/* Compare overall buffer sizes (may be different due to size rounding) */
@@ -491,16 +595,18 @@ AcpiDmCompareAmlResources (
if (Aml1BufferLength != Aml2BufferLength)
{
AcpiOsPrintf (
- "**** Buffer length mismatch in converted AML: original %X new %X ****\n",
+ "**** Buffer length mismatch in converted AML: Original %X, New %X ****\n",
Aml1BufferLength, Aml2BufferLength);
}
Aml1 = Aml1Buffer;
Aml2 = Aml2Buffer;
+ Aml1End = Aml1Buffer + Aml1BufferLength;
+ Aml2End = Aml2Buffer + Aml2BufferLength;
/* Walk the descriptor lists, comparing each descriptor */
- while (Aml1 < (Aml1Buffer + Aml1BufferLength))
+ while ((Aml1 < Aml1End) && (Aml2 < Aml2End))
{
/* Get the lengths of each descriptor */
@@ -513,7 +619,7 @@ AcpiDmCompareAmlResources (
if (Aml1Length != Aml2Length)
{
AcpiOsPrintf (
- "**** Length mismatch in descriptor [%.2X] type %2.2X, Offset %8.8X L1 %X L2 %X ****\n",
+ "**** Length mismatch in descriptor [%.2X] type %2.2X, Offset %8.8X Len1 %X, Len2 %X ****\n",
Count, ResourceType, Offset, Aml1Length, Aml2Length);
}
@@ -524,6 +630,15 @@ AcpiDmCompareAmlResources (
AcpiOsPrintf (
"**** Data mismatch in descriptor [%.2X] type %2.2X, Offset %8.8X ****\n",
Count, ResourceType, Offset);
+
+ for (i = 0; i < Aml1Length; i++)
+ {
+ if (Aml1[i] != Aml2[i])
+ {
+ AcpiOsPrintf ("Mismatch at byte offset %.2X: is %2.2X, should be %2.2X\n",
+ i, Aml2[i], Aml1[i]);
+ }
+ }
}
/* Exit on EndTag descriptor */
@@ -626,160 +741,301 @@ Exit1:
/*******************************************************************************
*
- * FUNCTION: AcpiDbDisplayResources
+ * FUNCTION: AcpiDbResourceCallback
*
- * PARAMETERS: ObjectArg - String with hex value of the object
+ * PARAMETERS: ACPI_WALK_RESOURCE_CALLBACK
*
- * RETURN: None
+ * RETURN: Status
*
- * DESCRIPTION: Display the resource objects associated with a device.
+ * DESCRIPTION: Simple callback to exercise AcpiWalkResources
*
******************************************************************************/
-void
-AcpiDbDisplayResources (
- char *ObjectArg)
+static ACPI_STATUS
+AcpiDbResourceCallback (
+ ACPI_RESOURCE *Resource,
+ void *Context)
+{
+
+ return (AE_OK);
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiDbDeviceResources
+ *
+ * PARAMETERS: ACPI_WALK_CALLBACK
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Display the _PRT/_CRS/_PRS resources for a device object.
+ *
+ ******************************************************************************/
+
+static ACPI_STATUS
+AcpiDbDeviceResources (
+ ACPI_HANDLE ObjHandle,
+ UINT32 NestingLevel,
+ void *Context,
+ void **ReturnValue)
{
ACPI_NAMESPACE_NODE *Node;
- ACPI_STATUS Status;
+ ACPI_NAMESPACE_NODE *PrtNode = NULL;
+ ACPI_NAMESPACE_NODE *CrsNode = NULL;
+ ACPI_NAMESPACE_NODE *PrsNode = NULL;
+ ACPI_NAMESPACE_NODE *AeiNode = NULL;
+ char *ParentPath;
ACPI_BUFFER ReturnObj;
+ ACPI_STATUS Status;
- AcpiDbSetOutputDestination (ACPI_DB_REDIRECTABLE_OUTPUT);
- AcpiDbgLevel |= ACPI_LV_RESOURCES;
+ Node = ACPI_CAST_PTR (ACPI_NAMESPACE_NODE, ObjHandle);
+ ParentPath = AcpiNsGetExternalPathname (Node);
+ if (!ParentPath)
+ {
+ return (AE_NO_MEMORY);
+ }
- /* Convert string to object pointer */
+ /* Get handles to the resource methods for this device */
- Node = AcpiDbConvertToNode (ObjectArg);
- if (!Node)
+ (void) AcpiGetHandle (Node, METHOD_NAME__PRT, ACPI_CAST_PTR (ACPI_HANDLE, &PrtNode));
+ (void) AcpiGetHandle (Node, METHOD_NAME__CRS, ACPI_CAST_PTR (ACPI_HANDLE, &CrsNode));
+ (void) AcpiGetHandle (Node, METHOD_NAME__PRS, ACPI_CAST_PTR (ACPI_HANDLE, &PrsNode));
+ (void) AcpiGetHandle (Node, METHOD_NAME__AEI, ACPI_CAST_PTR (ACPI_HANDLE, &AeiNode));
+ if (!PrtNode && !CrsNode && !PrsNode && !AeiNode)
{
- return;
+ goto Cleanup; /* Nothing to do */
}
+ AcpiOsPrintf ("\nDevice: %s\n", ParentPath);
+
/* Prepare for a return object of arbitrary size */
ReturnObj.Pointer = AcpiGbl_DbBuffer;
ReturnObj.Length = ACPI_DEBUG_BUFFER_SIZE;
+
/* _PRT */
- AcpiOsPrintf ("Evaluating _PRT\n");
+ if (PrtNode)
+ {
+ AcpiOsPrintf ("Evaluating _PRT\n");
- /* Check if _PRT exists */
+ Status = AcpiEvaluateObject (PrtNode, NULL, NULL, &ReturnObj);
+ if (ACPI_FAILURE (Status))
+ {
+ AcpiOsPrintf ("Could not evaluate _PRT: %s\n",
+ AcpiFormatException (Status));
+ goto GetCrs;
+ }
- Status = AcpiEvaluateObject (Node, METHOD_NAME__PRT, NULL, &ReturnObj);
- if (ACPI_FAILURE (Status))
- {
- AcpiOsPrintf ("Could not obtain _PRT: %s\n",
- AcpiFormatException (Status));
- goto GetCrs;
- }
+ ReturnObj.Pointer = AcpiGbl_DbBuffer;
+ ReturnObj.Length = ACPI_DEBUG_BUFFER_SIZE;
- ReturnObj.Pointer = AcpiGbl_DbBuffer;
- ReturnObj.Length = ACPI_DEBUG_BUFFER_SIZE;
+ Status = AcpiGetIrqRoutingTable (Node, &ReturnObj);
+ if (ACPI_FAILURE (Status))
+ {
+ AcpiOsPrintf ("GetIrqRoutingTable failed: %s\n",
+ AcpiFormatException (Status));
+ goto GetCrs;
+ }
- Status = AcpiGetIrqRoutingTable (Node, &ReturnObj);
- if (ACPI_FAILURE (Status))
- {
- AcpiOsPrintf ("GetIrqRoutingTable failed: %s\n",
- AcpiFormatException (Status));
- goto GetCrs;
+ AcpiRsDumpIrqList (ACPI_CAST_PTR (UINT8, AcpiGbl_DbBuffer));
}
- AcpiRsDumpIrqList (ACPI_CAST_PTR (UINT8, AcpiGbl_DbBuffer));
-
/* _CRS */
GetCrs:
- AcpiOsPrintf ("Evaluating _CRS\n");
+ if (CrsNode)
+ {
+ AcpiOsPrintf ("Evaluating _CRS\n");
- ReturnObj.Pointer = AcpiGbl_DbBuffer;
- ReturnObj.Length = ACPI_DEBUG_BUFFER_SIZE;
+ ReturnObj.Pointer = AcpiGbl_DbBuffer;
+ ReturnObj.Length = ACPI_DEBUG_BUFFER_SIZE;
- /* Check if _CRS exists */
+ Status = AcpiEvaluateObject (CrsNode, NULL, NULL, &ReturnObj);
+ if (ACPI_FAILURE (Status))
+ {
+ AcpiOsPrintf ("Could not evaluate _CRS: %s\n",
+ AcpiFormatException (Status));
+ goto GetPrs;
+ }
- Status = AcpiEvaluateObject (Node, METHOD_NAME__CRS, NULL, &ReturnObj);
- if (ACPI_FAILURE (Status))
- {
- AcpiOsPrintf ("Could not obtain _CRS: %s\n",
- AcpiFormatException (Status));
- goto GetPrs;
- }
+ /* This code is here to exercise the AcpiWalkResources interface */
- /* Get the _CRS resource list */
+ Status = AcpiWalkResources (Node, METHOD_NAME__CRS,
+ AcpiDbResourceCallback, NULL);
+ if (ACPI_FAILURE (Status))
+ {
+ AcpiOsPrintf ("AcpiWalkResources failed: %s\n",
+ AcpiFormatException (Status));
+ goto GetPrs;
+ }
- ReturnObj.Pointer = AcpiGbl_DbBuffer;
- ReturnObj.Length = ACPI_DEBUG_BUFFER_SIZE;
+ /* Get the _CRS resource list */
- Status = AcpiGetCurrentResources (Node, &ReturnObj);
- if (ACPI_FAILURE (Status))
- {
- AcpiOsPrintf ("AcpiGetCurrentResources failed: %s\n",
- AcpiFormatException (Status));
- goto GetPrs;
- }
+ ReturnObj.Pointer = AcpiGbl_DbBuffer;
+ ReturnObj.Length = ACPI_DEBUG_BUFFER_SIZE;
- /* Dump the _CRS resource list */
+ Status = AcpiGetCurrentResources (Node, &ReturnObj);
+ if (ACPI_FAILURE (Status))
+ {
+ AcpiOsPrintf ("AcpiGetCurrentResources failed: %s\n",
+ AcpiFormatException (Status));
+ goto GetPrs;
+ }
- AcpiRsDumpResourceList (ACPI_CAST_PTR (ACPI_RESOURCE,
- ReturnObj.Pointer));
+ /* Dump the _CRS resource list */
- /*
- * Perform comparison of original AML to newly created AML. This tests both
- * the AML->Resource conversion and the Resource->Aml conversion.
- */
- Status = AcpiDmTestResourceConversion (Node, METHOD_NAME__CRS);
+ AcpiRsDumpResourceList (ACPI_CAST_PTR (ACPI_RESOURCE,
+ ReturnObj.Pointer));
- /* Execute _SRS with the resource list */
+ /*
+ * Perform comparison of original AML to newly created AML. This tests both
+ * the AML->Resource conversion and the Resource->Aml conversion.
+ */
+ Status = AcpiDmTestResourceConversion (Node, METHOD_NAME__CRS);
- Status = AcpiSetCurrentResources (Node, &ReturnObj);
- if (ACPI_FAILURE (Status))
- {
- AcpiOsPrintf ("AcpiSetCurrentResources failed: %s\n",
- AcpiFormatException (Status));
- goto GetPrs;
+ /* Execute _SRS with the resource list */
+
+ Status = AcpiSetCurrentResources (Node, &ReturnObj);
+ if (ACPI_FAILURE (Status))
+ {
+ AcpiOsPrintf ("AcpiSetCurrentResources failed: %s\n",
+ AcpiFormatException (Status));
+ goto GetPrs;
+ }
}
/* _PRS */
GetPrs:
- AcpiOsPrintf ("Evaluating _PRS\n");
+ if (PrsNode)
+ {
+ AcpiOsPrintf ("Evaluating _PRS\n");
- ReturnObj.Pointer = AcpiGbl_DbBuffer;
- ReturnObj.Length = ACPI_DEBUG_BUFFER_SIZE;
+ ReturnObj.Pointer = AcpiGbl_DbBuffer;
+ ReturnObj.Length = ACPI_DEBUG_BUFFER_SIZE;
+
+ Status = AcpiEvaluateObject (PrsNode, NULL, NULL, &ReturnObj);
+ if (ACPI_FAILURE (Status))
+ {
+ AcpiOsPrintf ("Could not evaluate _PRS: %s\n",
+ AcpiFormatException (Status));
+ goto GetAei;
+ }
- /* Check if _PRS exists */
+ ReturnObj.Pointer = AcpiGbl_DbBuffer;
+ ReturnObj.Length = ACPI_DEBUG_BUFFER_SIZE;
- Status = AcpiEvaluateObject (Node, METHOD_NAME__PRS, NULL, &ReturnObj);
- if (ACPI_FAILURE (Status))
- {
- AcpiOsPrintf ("Could not obtain _PRS: %s\n",
- AcpiFormatException (Status));
- goto Cleanup;
+ Status = AcpiGetPossibleResources (Node, &ReturnObj);
+ if (ACPI_FAILURE (Status))
+ {
+ AcpiOsPrintf ("AcpiGetPossibleResources failed: %s\n",
+ AcpiFormatException (Status));
+ goto GetAei;
+ }
+
+ AcpiRsDumpResourceList (ACPI_CAST_PTR (ACPI_RESOURCE, AcpiGbl_DbBuffer));
}
- ReturnObj.Pointer = AcpiGbl_DbBuffer;
- ReturnObj.Length = ACPI_DEBUG_BUFFER_SIZE;
- Status = AcpiGetPossibleResources (Node, &ReturnObj);
- if (ACPI_FAILURE (Status))
+ /* _AEI */
+
+GetAei:
+ if (AeiNode)
{
- AcpiOsPrintf ("AcpiGetPossibleResources failed: %s\n",
- AcpiFormatException (Status));
- goto Cleanup;
+ AcpiOsPrintf ("Evaluating _AEI\n");
+
+ ReturnObj.Pointer = AcpiGbl_DbBuffer;
+ ReturnObj.Length = ACPI_DEBUG_BUFFER_SIZE;
+
+ Status = AcpiEvaluateObject (AeiNode, NULL, NULL, &ReturnObj);
+ if (ACPI_FAILURE (Status))
+ {
+ AcpiOsPrintf ("Could not evaluate _AEI: %s\n",
+ AcpiFormatException (Status));
+ goto Cleanup;
+ }
+
+ ReturnObj.Pointer = AcpiGbl_DbBuffer;
+ ReturnObj.Length = ACPI_DEBUG_BUFFER_SIZE;
+
+ Status = AcpiGetEventResources (Node, &ReturnObj);
+ if (ACPI_FAILURE (Status))
+ {
+ AcpiOsPrintf ("AcpiGetEventResources failed: %s\n",
+ AcpiFormatException (Status));
+ goto Cleanup;
+ }
+
+ AcpiRsDumpResourceList (ACPI_CAST_PTR (ACPI_RESOURCE, AcpiGbl_DbBuffer));
}
- AcpiRsDumpResourceList (ACPI_CAST_PTR (ACPI_RESOURCE, AcpiGbl_DbBuffer));
Cleanup:
+ ACPI_FREE (ParentPath);
+ return (AE_OK);
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiDbDisplayResources
+ *
+ * PARAMETERS: ObjectArg - String object name or object pointer.
+ * "*" means "display resources for all devices"
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Display the resource objects associated with a device.
+ *
+ ******************************************************************************/
+
+void
+AcpiDbDisplayResources (
+ char *ObjectArg)
+{
+ ACPI_NAMESPACE_NODE *Node;
+
+
+ AcpiDbSetOutputDestination (ACPI_DB_REDIRECTABLE_OUTPUT);
+ AcpiDbgLevel |= ACPI_LV_RESOURCES;
+
+ /* Asterisk means "display resources for all devices" */
+
+ if (!ACPI_STRCMP (ObjectArg, "*"))
+ {
+ (void) AcpiWalkNamespace (ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
+ ACPI_UINT32_MAX, AcpiDbDeviceResources, NULL, NULL, NULL);
+ }
+ else
+ {
+ /* Convert string to object pointer */
+
+ Node = AcpiDbConvertToNode (ObjectArg);
+ if (Node)
+ {
+ if (Node->Type != ACPI_TYPE_DEVICE)
+ {
+ AcpiOsPrintf ("%4.4s: Name is not a device object (%s)\n",
+ Node->Name.Ascii, AcpiUtGetTypeName (Node->Type));
+ }
+ else
+ {
+ (void) AcpiDbDeviceResources (Node, 0, NULL, NULL);
+ }
+ }
+ }
AcpiDbSetOutputDestination (ACPI_DB_CONSOLE_OUTPUT);
- return;
}
+#if (!ACPI_REDUCED_HARDWARE)
/*******************************************************************************
*
* FUNCTION: AcpiDbGenerateGpe
@@ -818,5 +1074,6 @@ AcpiDbGenerateGpe (
(void) AcpiEvGpeDispatch (NULL, GpeEventInfo, GpeNumber);
}
+#endif /* !ACPI_REDUCED_HARDWARE */
#endif /* ACPI_DEBUGGER */
diff --git a/usr/src/uts/intel/io/acpica/debugger/dbdisply.c b/usr/src/uts/intel/io/acpica/debugger/dbdisply.c
index 61e837e751..7facc8dd08 100644
--- a/usr/src/uts/intel/io/acpica/debugger/dbdisply.c
+++ b/usr/src/uts/intel/io/acpica/debugger/dbdisply.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -90,6 +90,8 @@ static ACPI_ADR_SPACE_TYPE AcpiGbl_SpaceIdList[] =
ACPI_ADR_SPACE_CMOS,
ACPI_ADR_SPACE_PCI_BAR_TARGET,
ACPI_ADR_SPACE_IPMI,
+ ACPI_ADR_SPACE_GPIO,
+ ACPI_ADR_SPACE_GSBUS,
ACPI_ADR_SPACE_DATA_TABLE,
ACPI_ADR_SPACE_FIXED_HARDWARE
};
@@ -105,8 +107,8 @@ typedef struct acpi_handler_info
static ACPI_HANDLER_INFO AcpiGbl_HandlerList[] =
{
- {&AcpiGbl_SystemNotify.Handler, "System Notifications"},
- {&AcpiGbl_DeviceNotify.Handler, "Device Notifications"},
+ {&AcpiGbl_GlobalNotify[0].Handler, "System Notifications"},
+ {&AcpiGbl_GlobalNotify[1].Handler, "Device Notifications"},
{&AcpiGbl_TableHandler, "ACPI Table Events"},
{&AcpiGbl_ExceptionHandler, "Control Method Exceptions"},
{&AcpiGbl_InterfaceHandler, "OSI Invocations"}
@@ -768,6 +770,7 @@ AcpiDbDisplayArgumentObject (
}
+#if (!ACPI_REDUCED_HARDWARE)
/*******************************************************************************
*
* FUNCTION: AcpiDbDisplayGpes
@@ -930,6 +933,7 @@ AcpiDbDisplayGpes (
GpeXruptInfo = GpeXruptInfo->Next;
}
}
+#endif /* !ACPI_REDUCED_HARDWARE */
/*******************************************************************************
@@ -971,7 +975,7 @@ AcpiDbDisplayHandlers (
while (HandlerObj)
{
- if (i == HandlerObj->AddressSpace.SpaceId)
+ if (AcpiGbl_SpaceIdList[i] == HandlerObj->AddressSpace.SpaceId)
{
AcpiOsPrintf (ACPI_HANDLER_PRESENT_STRING,
(HandlerObj->AddressSpace.HandlerFlags &
@@ -989,8 +993,28 @@ AcpiDbDisplayHandlers (
FoundHandler:;
}
+
+ /* Find all handlers for user-defined SpaceIDs */
+
+ HandlerObj = ObjDesc->Device.Handler;
+ while (HandlerObj)
+ {
+ if (HandlerObj->AddressSpace.SpaceId >= ACPI_USER_REGION_BEGIN)
+ {
+ AcpiOsPrintf (ACPI_PREDEFINED_PREFIX,
+ "User-defined ID", HandlerObj->AddressSpace.SpaceId);
+ AcpiOsPrintf (ACPI_HANDLER_PRESENT_STRING,
+ (HandlerObj->AddressSpace.HandlerFlags &
+ ACPI_ADDR_HANDLER_DEFAULT_INSTALLED) ? "Default" : "User",
+ HandlerObj->AddressSpace.Handler);
+ }
+
+ HandlerObj = HandlerObj->AddressSpace.Next;
+ }
}
+#if (!ACPI_REDUCED_HARDWARE)
+
/* Fixed event handlers */
AcpiOsPrintf ("\nFixed Event Handlers:\n");
@@ -1009,6 +1033,8 @@ AcpiDbDisplayHandlers (
}
}
+#endif /* !ACPI_REDUCED_HARDWARE */
+
/* Miscellaneous global handlers */
AcpiOsPrintf ("\nMiscellaneous Global Handlers:\n");
diff --git a/usr/src/uts/intel/io/acpica/debugger/dbexec.c b/usr/src/uts/intel/io/acpica/debugger/dbexec.c
index bcbc7a8daf..7818c9ddbe 100644
--- a/usr/src/uts/intel/io/acpica/debugger/dbexec.c
+++ b/usr/src/uts/intel/io/acpica/debugger/dbexec.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/debugger/dbfileio.c b/usr/src/uts/intel/io/acpica/debugger/dbfileio.c
index 8d0ef0aaa8..ef9b7fef53 100644
--- a/usr/src/uts/intel/io/acpica/debugger/dbfileio.c
+++ b/usr/src/uts/intel/io/acpica/debugger/dbfileio.c
@@ -6,7 +6,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -271,10 +271,11 @@ AcpiDbReadTable (
fseek (fp, 0, SEEK_SET);
- /* The RSDT and FACS tables do not have standard ACPI headers */
+ /* The RSDT, FACS and S3PT tables do not have standard ACPI headers */
if (ACPI_COMPARE_NAME (TableHeader.Signature, "RSD ") ||
- ACPI_COMPARE_NAME (TableHeader.Signature, "FACS"))
+ ACPI_COMPARE_NAME (TableHeader.Signature, "FACS") ||
+ ACPI_COMPARE_NAME (TableHeader.Signature, "S3PT"))
{
*TableLength = FileSize;
StandardHeader = FALSE;
diff --git a/usr/src/uts/intel/io/acpica/debugger/dbhistry.c b/usr/src/uts/intel/io/acpica/debugger/dbhistry.c
index 855d6b956f..536b69cdfa 100644
--- a/usr/src/uts/intel/io/acpica/debugger/dbhistry.c
+++ b/usr/src/uts/intel/io/acpica/debugger/dbhistry.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/debugger/dbinput.c b/usr/src/uts/intel/io/acpica/debugger/dbinput.c
index 1d716cfb1b..0ae313934b 100644
--- a/usr/src/uts/intel/io/acpica/debugger/dbinput.c
+++ b/usr/src/uts/intel/io/acpica/debugger/dbinput.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -131,6 +131,7 @@ enum AcpiExDebuggerCommands
CMD_STATS,
CMD_STOP,
CMD_TABLES,
+ CMD_TEMPLATE,
CMD_TERMINATE,
CMD_THREADS,
CMD_TRACE,
@@ -199,6 +200,7 @@ static const COMMAND_INFO AcpiGbl_DbCommands[] =
{"STATS", 0},
{"STOP", 0},
{"TABLES", 0},
+ {"TEMPLATE", 1},
{"TERMINATE", 0},
{"THREADS", 3},
{"TRACE", 1},
@@ -268,9 +270,10 @@ AcpiDbDisplayHelp (
AcpiOsPrintf (" Predefined Check all predefined names\n");
AcpiOsPrintf (" Prefix [<NamePath>] Set or Get current execution prefix\n");
AcpiOsPrintf (" References <Addr> Find all references to object at addr\n");
- AcpiOsPrintf (" Resources <Device> Get and display Device resources\n");
+ AcpiOsPrintf (" Resources <DeviceName | *> Display Device resources (* = all devices)\n");
AcpiOsPrintf (" Set N <NamedObject> <Value> Set value for named integer\n");
AcpiOsPrintf (" Sleep <SleepState> Simulate sleep/wake sequence\n");
+ AcpiOsPrintf (" Template <Object> Format/dump a Buffer/ResourceTemplate\n");
AcpiOsPrintf (" Terminate Delete namespace and all internal objects\n");
AcpiOsPrintf (" Type <Object> Display object type\n");
@@ -659,12 +662,15 @@ AcpiDbCommandDispatch (
break;
case CMD_ENABLEACPI:
+#if (!ACPI_REDUCED_HARDWARE)
+
Status = AcpiEnable();
if (ACPI_FAILURE(Status))
{
AcpiOsPrintf("AcpiEnable failed (Status=%X)\n", Status);
return (Status);
}
+#endif /* !ACPI_REDUCED_HARDWARE */
break;
case CMD_EVENT:
@@ -854,6 +860,10 @@ AcpiDbCommandDispatch (
AcpiDbDisplayTableInfo (AcpiGbl_DbArgs[1]);
break;
+ case CMD_TEMPLATE:
+ AcpiDbDisplayTemplate (AcpiGbl_DbArgs[1]);
+ break;
+
case CMD_TERMINATE:
AcpiDbSetOutputDestination (ACPI_DB_REDIRECTABLE_OUTPUT);
AcpiUtSubsystemShutdown ();
diff --git a/usr/src/uts/intel/io/acpica/debugger/dbmethod.c b/usr/src/uts/intel/io/acpica/debugger/dbmethod.c
index f0b88ebdb3..0d45d68150 100644
--- a/usr/src/uts/intel/io/acpica/debugger/dbmethod.c
+++ b/usr/src/uts/intel/io/acpica/debugger/dbmethod.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/debugger/dbnames.c b/usr/src/uts/intel/io/acpica/debugger/dbnames.c
index e4e4280f29..be326c5390 100644
--- a/usr/src/uts/intel/io/acpica/debugger/dbnames.c
+++ b/usr/src/uts/intel/io/acpica/debugger/dbnames.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/debugger/dbstats.c b/usr/src/uts/intel/io/acpica/debugger/dbstats.c
index bf17c63932..fcc4998ede 100644
--- a/usr/src/uts/intel/io/acpica/debugger/dbstats.c
+++ b/usr/src/uts/intel/io/acpica/debugger/dbstats.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -227,8 +227,8 @@ AcpiDbEnumerateObject (
case ACPI_TYPE_DEVICE:
- AcpiDbEnumerateObject (ObjDesc->Device.SystemNotify);
- AcpiDbEnumerateObject (ObjDesc->Device.DeviceNotify);
+ AcpiDbEnumerateObject (ObjDesc->Device.NotifyList[0]);
+ AcpiDbEnumerateObject (ObjDesc->Device.NotifyList[1]);
AcpiDbEnumerateObject (ObjDesc->Device.Handler);
break;
@@ -248,21 +248,21 @@ AcpiDbEnumerateObject (
case ACPI_TYPE_POWER:
- AcpiDbEnumerateObject (ObjDesc->PowerResource.SystemNotify);
- AcpiDbEnumerateObject (ObjDesc->PowerResource.DeviceNotify);
+ AcpiDbEnumerateObject (ObjDesc->PowerResource.NotifyList[0]);
+ AcpiDbEnumerateObject (ObjDesc->PowerResource.NotifyList[1]);
break;
case ACPI_TYPE_PROCESSOR:
- AcpiDbEnumerateObject (ObjDesc->Processor.SystemNotify);
- AcpiDbEnumerateObject (ObjDesc->Processor.DeviceNotify);
+ AcpiDbEnumerateObject (ObjDesc->Processor.NotifyList[0]);
+ AcpiDbEnumerateObject (ObjDesc->Processor.NotifyList[1]);
AcpiDbEnumerateObject (ObjDesc->Processor.Handler);
break;
case ACPI_TYPE_THERMAL:
- AcpiDbEnumerateObject (ObjDesc->ThermalZone.SystemNotify);
- AcpiDbEnumerateObject (ObjDesc->ThermalZone.DeviceNotify);
+ AcpiDbEnumerateObject (ObjDesc->ThermalZone.NotifyList[0]);
+ AcpiDbEnumerateObject (ObjDesc->ThermalZone.NotifyList[1]);
AcpiDbEnumerateObject (ObjDesc->ThermalZone.Handler);
break;
@@ -522,6 +522,18 @@ AcpiDbDisplayStatistics (
AcpiOsPrintf ("NamespaceNode %3d\n", sizeof (ACPI_NAMESPACE_NODE));
AcpiOsPrintf ("AcpiObject %3d\n", sizeof (ACPI_OBJECT));
+ AcpiOsPrintf ("\n");
+
+ AcpiOsPrintf ("Generic State %3d\n", sizeof (ACPI_GENERIC_STATE));
+ AcpiOsPrintf ("Common State %3d\n", sizeof (ACPI_COMMON_STATE));
+ AcpiOsPrintf ("Control State %3d\n", sizeof (ACPI_CONTROL_STATE));
+ AcpiOsPrintf ("Update State %3d\n", sizeof (ACPI_UPDATE_STATE));
+ AcpiOsPrintf ("Scope State %3d\n", sizeof (ACPI_SCOPE_STATE));
+ AcpiOsPrintf ("Parse Scope %3d\n", sizeof (ACPI_PSCOPE_STATE));
+ AcpiOsPrintf ("Package State %3d\n", sizeof (ACPI_PKG_STATE));
+ AcpiOsPrintf ("Thread State %3d\n", sizeof (ACPI_THREAD_STATE));
+ AcpiOsPrintf ("Result Values %3d\n", sizeof (ACPI_RESULT_VALUES));
+ AcpiOsPrintf ("Notify Info %3d\n", sizeof (ACPI_NOTIFY_INFO));
break;
diff --git a/usr/src/uts/intel/io/acpica/debugger/dbutils.c b/usr/src/uts/intel/io/acpica/debugger/dbutils.c
index 5b071e87b6..cf29b225de 100644
--- a/usr/src/uts/intel/io/acpica/debugger/dbutils.c
+++ b/usr/src/uts/intel/io/acpica/debugger/dbutils.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -190,10 +190,7 @@ AcpiDbDumpExternalObject (
case ACPI_TYPE_STRING:
AcpiOsPrintf ("[String] Length %.2X = ", ObjDesc->String.Length);
- for (i = 0; i < ObjDesc->String.Length; i++)
- {
- AcpiOsPrintf ("%c", ObjDesc->String.Pointer[i]);
- }
+ AcpiUtPrintString (ObjDesc->String.Pointer, ACPI_UINT8_MAX);
AcpiOsPrintf ("\n");
break;
@@ -208,7 +205,7 @@ AcpiDbDumpExternalObject (
AcpiOsPrintf ("\n");
}
AcpiUtDumpBuffer (ACPI_CAST_PTR (UINT8, ObjDesc->Buffer.Pointer),
- ObjDesc->Buffer.Length, DB_DWORD_DISPLAY, _COMPONENT);
+ ObjDesc->Buffer.Length, DB_BYTE_DISPLAY, _COMPONENT);
}
else
{
diff --git a/usr/src/uts/intel/io/acpica/debugger/dbxface.c b/usr/src/uts/intel/io/acpica/debugger/dbxface.c
index e638733c22..cd30273434 100644
--- a/usr/src/uts/intel/io/acpica/debugger/dbxface.c
+++ b/usr/src/uts/intel/io/acpica/debugger/dbxface.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/disassembler/dmbuffer.c b/usr/src/uts/intel/io/acpica/disassembler/dmbuffer.c
index f223f89f43..9f49f92e47 100644
--- a/usr/src/uts/intel/io/acpica/disassembler/dmbuffer.c
+++ b/usr/src/uts/intel/io/acpica/disassembler/dmbuffer.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -108,19 +108,19 @@ AcpiDmDisasmByteList (
}
AcpiDmIndent (Level);
- if (ByteCount > 7)
+ if (ByteCount > 8)
{
- AcpiOsPrintf ("/* %04X */ ", i);
+ AcpiOsPrintf ("/* %04X */ ", i);
}
}
- AcpiOsPrintf ("0x%2.2X", (UINT32) ByteData[i]);
+ AcpiOsPrintf (" 0x%2.2X", (UINT32) ByteData[i]);
/* Add comma if there are more bytes to display */
if (i < (ByteCount -1))
{
- AcpiOsPrintf (", ");
+ AcpiOsPrintf (",");
}
}
diff --git a/usr/src/uts/intel/io/acpica/disassembler/dmnames.c b/usr/src/uts/intel/io/acpica/disassembler/dmnames.c
index b00bca8e86..92e67c2e0a 100644
--- a/usr/src/uts/intel/io/acpica/disassembler/dmnames.c
+++ b/usr/src/uts/intel/io/acpica/disassembler/dmnames.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/disassembler/dmobject.c b/usr/src/uts/intel/io/acpica/disassembler/dmobject.c
index 9d5d7dd92c..cb515e9427 100644
--- a/usr/src/uts/intel/io/acpica/disassembler/dmobject.c
+++ b/usr/src/uts/intel/io/acpica/disassembler/dmobject.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/disassembler/dmopcode.c b/usr/src/uts/intel/io/acpica/disassembler/dmopcode.c
index 5da3847714..6d2c3e1ecf 100644
--- a/usr/src/uts/intel/io/acpica/disassembler/dmopcode.c
+++ b/usr/src/uts/intel/io/acpica/disassembler/dmopcode.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -303,6 +303,7 @@ AcpiDmDisassembleOneOp (
UINT32 Length;
ACPI_PARSE_OBJECT *Child;
ACPI_STATUS Status;
+ UINT8 *Aml;
if (!Op)
@@ -426,16 +427,19 @@ AcpiDmDisassembleOneOp (
* types of buffers, we have to closely look at the data in the
* buffer to determine the type.
*/
- Status = AcpiDmIsResourceTemplate (Op);
- if (ACPI_SUCCESS (Status))
+ if (!AcpiGbl_NoResourceDisassembly)
{
- Op->Common.DisasmOpcode = ACPI_DASM_RESOURCE;
- AcpiOsPrintf ("ResourceTemplate");
- break;
- }
- else if (Status == AE_AML_NO_RESOURCE_END_TAG)
- {
- AcpiOsPrintf ("/**** Is ResourceTemplate, but EndTag not at buffer end ****/ ");
+ Status = AcpiDmIsResourceTemplate (Op);
+ if (ACPI_SUCCESS (Status))
+ {
+ Op->Common.DisasmOpcode = ACPI_DASM_RESOURCE;
+ AcpiOsPrintf ("ResourceTemplate");
+ break;
+ }
+ else if (Status == AE_AML_NO_RESOURCE_END_TAG)
+ {
+ AcpiOsPrintf ("/**** Is ResourceTemplate, but EndTag not at buffer end ****/ ");
+ }
}
if (AcpiDmIsUnicodeBuffer (Op))
@@ -495,7 +499,7 @@ AcpiDmDisassembleOneOp (
if (Info->BitOffset % 8 == 0)
{
- AcpiOsPrintf (" Offset (0x%.2X)", ACPI_DIV_8 (Info->BitOffset));
+ AcpiOsPrintf ("Offset (0x%.2X)", ACPI_DIV_8 (Info->BitOffset));
}
else
{
@@ -507,16 +511,59 @@ AcpiDmDisassembleOneOp (
case AML_INT_ACCESSFIELD_OP:
+ case AML_INT_EXTACCESSFIELD_OP:
- AcpiOsPrintf (" AccessAs (%s, ",
- AcpiGbl_AccessTypes [(UINT32) (Op->Common.Value.Integer >> 8) & 0x7]);
+ AcpiOsPrintf ("AccessAs (%s, ",
+ AcpiGbl_AccessTypes [(UINT32) (Op->Common.Value.Integer & 0x7)]);
+
+ AcpiDmDecodeAttribute ((UINT8) (Op->Common.Value.Integer >> 8));
+
+ if (Op->Common.AmlOpcode == AML_INT_EXTACCESSFIELD_OP)
+ {
+ AcpiOsPrintf (" (0x%2.2X)", (unsigned) ((Op->Common.Value.Integer >> 16) & 0xFF));
+ }
- AcpiDmDecodeAttribute ((UINT8) Op->Common.Value.Integer);
AcpiOsPrintf (")");
AcpiDmCommaIfFieldMember (Op);
break;
+ case AML_INT_CONNECTION_OP:
+
+ /*
+ * Two types of Connection() - one with a buffer object, the
+ * other with a namestring that points to a buffer object.
+ */
+ AcpiOsPrintf ("Connection (");
+ Child = Op->Common.Value.Arg;
+
+ if (Child->Common.AmlOpcode == AML_INT_BYTELIST_OP)
+ {
+ AcpiOsPrintf ("\n");
+
+ Aml = Child->Named.Data;
+ Length = (UINT32) Child->Common.Value.Integer;
+
+ Info->Level += 1;
+ Op->Common.DisasmOpcode = ACPI_DASM_RESOURCE;
+ AcpiDmResourceTemplate (Info, Op->Common.Parent, Aml, Length);
+
+ Info->Level -= 1;
+ AcpiDmIndent (Info->Level);
+ }
+ else
+ {
+ AcpiDmNamestring (Child->Common.Value.Name);
+ }
+
+ AcpiOsPrintf (")");
+ AcpiDmCommaIfFieldMember (Op);
+ AcpiOsPrintf ("\n");
+
+ Op->Common.DisasmFlags |= ACPI_PARSEOP_IGNORE; /* for now, ignore in AcpiDmAscendingOp */
+ Child->Common.DisasmFlags |= ACPI_PARSEOP_IGNORE;
+ break;
+
case AML_INT_BYTELIST_OP:
AcpiDmByteList (Info, Op);
diff --git a/usr/src/uts/intel/io/acpica/disassembler/dmresrc.c b/usr/src/uts/intel/io/acpica/disassembler/dmresrc.c
index fe208d5260..9c07bf7e94 100644
--- a/usr/src/uts/intel/io/acpica/disassembler/dmresrc.c
+++ b/usr/src/uts/intel/io/acpica/disassembler/dmresrc.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -55,12 +55,6 @@
/* Dispatch tables for Resource disassembly functions */
-typedef
-void (*ACPI_RESOURCE_HANDLER) (
- AML_RESOURCE *Resource,
- UINT32 Length,
- UINT32 Level);
-
static ACPI_RESOURCE_HANDLER AcpiGbl_DmResourceDispatch [] =
{
/* Small descriptors */
@@ -75,7 +69,7 @@ static ACPI_RESOURCE_HANDLER AcpiGbl_DmResourceDispatch [] =
AcpiDmEndDependentDescriptor, /* 0x07, ACPI_RESOURCE_NAME_END_DEPENDENT */
AcpiDmIoDescriptor, /* 0x08, ACPI_RESOURCE_NAME_IO_PORT */
AcpiDmFixedIoDescriptor, /* 0x09, ACPI_RESOURCE_NAME_FIXED_IO_PORT */
- NULL, /* 0x0A, Reserved */
+ AcpiDmFixedDmaDescriptor, /* 0x0A, ACPI_RESOURCE_NAME_FIXED_DMA */
NULL, /* 0x0B, Reserved */
NULL, /* 0x0C, Reserved */
NULL, /* 0x0D, Reserved */
@@ -95,7 +89,10 @@ static ACPI_RESOURCE_HANDLER AcpiGbl_DmResourceDispatch [] =
AcpiDmWordDescriptor, /* 0x08, ACPI_RESOURCE_NAME_WORD_ADDRESS_SPACE */
AcpiDmInterruptDescriptor, /* 0x09, ACPI_RESOURCE_NAME_EXTENDED_XRUPT */
AcpiDmQwordDescriptor, /* 0x0A, ACPI_RESOURCE_NAME_QWORD_ADDRESS_SPACE */
- AcpiDmExtendedDescriptor /* 0x0B, ACPI_RESOURCE_NAME_EXTENDED_ADDRESS_SPACE */
+ AcpiDmExtendedDescriptor, /* 0x0B, ACPI_RESOURCE_NAME_EXTENDED_ADDRESS_SPACE */
+ AcpiDmGpioDescriptor, /* 0x0C, ACPI_RESOURCE_NAME_GPIO */
+ NULL, /* 0x0D, Reserved */
+ AcpiDmSerialBusDescriptor /* 0x0E, ACPI_RESOURCE_NAME_SERIAL_BUS */
};
diff --git a/usr/src/uts/intel/io/acpica/disassembler/dmresrcl.c b/usr/src/uts/intel/io/acpica/disassembler/dmresrcl.c
index 2284928752..ed78b31c70 100644
--- a/usr/src/uts/intel/io/acpica/disassembler/dmresrcl.c
+++ b/usr/src/uts/intel/io/acpica/disassembler/dmresrcl.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -1050,4 +1050,3 @@ AcpiDmVendorLargeDescriptor (
}
#endif
-
diff --git a/usr/src/uts/intel/io/acpica/disassembler/dmresrcl2.c b/usr/src/uts/intel/io/acpica/disassembler/dmresrcl2.c
new file mode 100644
index 0000000000..e79307ee72
--- /dev/null
+++ b/usr/src/uts/intel/io/acpica/disassembler/dmresrcl2.c
@@ -0,0 +1,700 @@
+/*******************************************************************************
+ *
+ * Module Name: dmresrcl2.c - "Large" Resource Descriptor disassembly (#2)
+ *
+ ******************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2012, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * substantially similar to the "NO WARRANTY" disclaimer below
+ * ("Disclaimer") and any redistribution must be conditioned upon
+ * including a substantially similar Disclaimer requirement for further
+ * binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ * of any contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+
+#include "acpi.h"
+#include "accommon.h"
+#include "acdisasm.h"
+
+
+#ifdef ACPI_DISASSEMBLER
+
+#define _COMPONENT ACPI_CA_DEBUGGER
+ ACPI_MODULE_NAME ("dbresrcl2")
+
+/* Local prototypes */
+
+static void
+AcpiDmI2cSerialBusDescriptor (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level);
+
+static void
+AcpiDmSpiSerialBusDescriptor (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level);
+
+static void
+AcpiDmUartSerialBusDescriptor (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level);
+
+static void
+AcpiDmGpioCommon (
+ AML_RESOURCE *Resource,
+ UINT32 Level);
+
+static void
+AcpiDmDumpRawDataBuffer (
+ UINT8 *Buffer,
+ UINT32 Length,
+ UINT32 Level);
+
+
+/* Dispatch table for the serial bus descriptors */
+
+static ACPI_RESOURCE_HANDLER SerialBusResourceDispatch [] =
+{
+ NULL,
+ AcpiDmI2cSerialBusDescriptor,
+ AcpiDmSpiSerialBusDescriptor,
+ AcpiDmUartSerialBusDescriptor
+};
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiDmDumpRawDataBuffer
+ *
+ * PARAMETERS: Buffer - Pointer to the data bytes
+ * Length - Length of the descriptor in bytes
+ * Level - Current source code indentation level
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Dump a data buffer as a RawDataBuffer() object. Used for
+ * vendor data bytes.
+ *
+ ******************************************************************************/
+
+static void
+AcpiDmDumpRawDataBuffer (
+ UINT8 *Buffer,
+ UINT32 Length,
+ UINT32 Level)
+{
+ UINT32 Index;
+ UINT32 i;
+ UINT32 j;
+
+
+ if (!Length)
+ {
+ return;
+ }
+
+ AcpiOsPrintf ("RawDataBuffer (0x%.2X) // Vendor Data", Length);
+
+ AcpiOsPrintf ("\n");
+ AcpiDmIndent (Level + 1);
+ AcpiOsPrintf ("{\n");
+ AcpiDmIndent (Level + 2);
+
+ for (i = 0; i < Length;)
+ {
+ for (j = 0; j < 8; j++)
+ {
+ Index = i + j;
+ if (Index >= Length)
+ {
+ goto Finish;
+ }
+
+ AcpiOsPrintf ("0x%2.2X", Buffer[Index]);
+ if ((Index + 1) >= Length)
+ {
+ goto Finish;
+ }
+
+ AcpiOsPrintf (", ");
+ }
+ AcpiOsPrintf ("\n");
+ AcpiDmIndent (Level + 2);
+
+ i += 8;
+ }
+
+Finish:
+ AcpiOsPrintf ("\n");
+ AcpiDmIndent (Level + 1);
+ AcpiOsPrintf ("}");
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiDmGpioCommon
+ *
+ * PARAMETERS: Resource - Pointer to the resource descriptor
+ * Level - Current source code indentation level
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Decode common parts of a GPIO Interrupt descriptor
+ *
+ ******************************************************************************/
+
+static void
+AcpiDmGpioCommon (
+ AML_RESOURCE *Resource,
+ UINT32 Level)
+{
+ UINT32 PinCount;
+ UINT16 *PinList;
+ UINT8 *VendorData;
+ UINT32 i;
+
+
+ /* ResourceSource, ResourceSourceIndex, ResourceType */
+
+ AcpiDmIndent (Level + 1);
+ if (Resource->Gpio.ResSourceOffset)
+ {
+ AcpiUtPrintString (
+ ACPI_ADD_PTR (char, Resource, Resource->Gpio.ResSourceOffset),
+ ACPI_UINT8_MAX);
+ }
+
+ AcpiOsPrintf (", ");
+ AcpiOsPrintf ("0x%2.2X, ", Resource->Gpio.ResSourceIndex);
+ AcpiOsPrintf ("%s, ",
+ AcpiGbl_ConsumeDecode [(Resource->Gpio.Flags & 1)]);
+
+ /* Insert a descriptor name */
+
+ AcpiDmDescriptorName ();
+ AcpiOsPrintf (",");
+
+ /* Dump the vendor data */
+
+ if (Resource->Gpio.VendorOffset)
+ {
+ AcpiOsPrintf ("\n");
+ AcpiDmIndent (Level + 1);
+ VendorData = ACPI_ADD_PTR (UINT8, Resource,
+ Resource->Gpio.VendorOffset);
+
+ AcpiDmDumpRawDataBuffer (VendorData,
+ Resource->Gpio.VendorLength, Level);
+ }
+
+ AcpiOsPrintf (")\n");
+
+ /* Dump the interrupt list */
+
+ AcpiDmIndent (Level + 1);
+ AcpiOsPrintf ("{ // Pin list\n");
+
+ PinCount = ((UINT32) (Resource->Gpio.ResSourceOffset -
+ Resource->Gpio.PinTableOffset)) /
+ sizeof (UINT16);
+
+ PinList = (UINT16 *) ACPI_ADD_PTR (char, Resource,
+ Resource->Gpio.PinTableOffset);
+
+ for (i = 0; i < PinCount; i++)
+ {
+ AcpiDmIndent (Level + 2);
+ AcpiOsPrintf ("0x%4.4X%s\n", PinList[i], ((i + 1) < PinCount) ? "," : "");
+ }
+
+ AcpiDmIndent (Level + 1);
+ AcpiOsPrintf ("}\n");
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiDmGpioIntDescriptor
+ *
+ * PARAMETERS: Resource - Pointer to the resource descriptor
+ * Length - Length of the descriptor in bytes
+ * Level - Current source code indentation level
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Decode a GPIO Interrupt descriptor
+ *
+ ******************************************************************************/
+
+static void
+AcpiDmGpioIntDescriptor (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level)
+{
+
+ /* Dump the GpioInt-specific portion of the descriptor */
+
+ /* EdgeLevel, ActiveLevel, Shared */
+
+ AcpiDmIndent (Level);
+ AcpiOsPrintf ("GpioInt (%s, %s, %s, ",
+ AcpiGbl_HeDecode [(Resource->Gpio.IntFlags & 1)],
+ AcpiGbl_LlDecode [(Resource->Gpio.IntFlags >> 1) & 1],
+ AcpiGbl_ShrDecode [(Resource->Gpio.IntFlags >> 3) & 1]);
+
+ /* PinConfig, DebounceTimeout */
+
+ if (Resource->Gpio.PinConfig <= 3)
+ {
+ AcpiOsPrintf ("%s, ",
+ AcpiGbl_PpcDecode[Resource->Gpio.PinConfig]);
+ }
+ else
+ {
+ AcpiOsPrintf ("0x%2.2X, ", Resource->Gpio.PinConfig);
+ }
+ AcpiOsPrintf ("0x%4.4X,\n", Resource->Gpio.DebounceTimeout);
+
+ /* Dump the GpioInt/GpioIo common portion of the descriptor */
+
+ AcpiDmGpioCommon (Resource, Level);
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiDmGpioIoDescriptor
+ *
+ * PARAMETERS: Resource - Pointer to the resource descriptor
+ * Length - Length of the descriptor in bytes
+ * Level - Current source code indentation level
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Decode a GPIO Interrupt descriptor
+ *
+ ******************************************************************************/
+
+static void
+AcpiDmGpioIoDescriptor (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level)
+{
+
+ /* Dump the GpioIo-specific portion of the descriptor */
+
+ /* Shared, PinConfig */
+
+ AcpiDmIndent (Level);
+ AcpiOsPrintf ("GpioIo (%s, ",
+ AcpiGbl_ShrDecode [(Resource->Gpio.IntFlags >> 3) & 1]);
+
+ if (Resource->Gpio.PinConfig <= 3)
+ {
+ AcpiOsPrintf ("%s, ",
+ AcpiGbl_PpcDecode[Resource->Gpio.PinConfig]);
+ }
+ else
+ {
+ AcpiOsPrintf ("0x%2.2X, ", Resource->Gpio.PinConfig);
+ }
+
+ /* DebounceTimeout, DriveStrength, IoRestriction */
+
+ AcpiOsPrintf ("0x%4.4X, ", Resource->Gpio.DebounceTimeout);
+ AcpiOsPrintf ("0x%4.4X, ", Resource->Gpio.DriveStrength);
+ AcpiOsPrintf ("%s,\n",
+ AcpiGbl_IorDecode [Resource->Gpio.IntFlags & 3]);
+
+ /* Dump the GpioInt/GpioIo common portion of the descriptor */
+
+ AcpiDmGpioCommon (Resource, Level);
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiDmGpioDescriptor
+ *
+ * PARAMETERS: Resource - Pointer to the resource descriptor
+ * Length - Length of the descriptor in bytes
+ * Level - Current source code indentation level
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Decode a GpioInt/GpioIo GPIO Interrupt/IO descriptor
+ *
+ ******************************************************************************/
+
+void
+AcpiDmGpioDescriptor (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level)
+{
+ UINT8 ConnectionType;
+
+
+ ConnectionType = Resource->Gpio.ConnectionType;
+
+ switch (ConnectionType)
+ {
+ case AML_RESOURCE_GPIO_TYPE_INT:
+ AcpiDmGpioIntDescriptor (Resource, Length, Level);
+ break;
+
+ case AML_RESOURCE_GPIO_TYPE_IO:
+ AcpiDmGpioIoDescriptor (Resource, Length, Level);
+ break;
+
+ default:
+ AcpiOsPrintf ("Unknown GPIO type\n");
+ break;
+ }
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiDmDumpSerialBusVendorData
+ *
+ * PARAMETERS: Resource - Pointer to the resource descriptor
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Dump optional serial bus vendor data
+ *
+ ******************************************************************************/
+
+static void
+AcpiDmDumpSerialBusVendorData (
+ AML_RESOURCE *Resource,
+ UINT32 Level)
+{
+ UINT8 *VendorData;
+ UINT32 VendorLength;
+
+
+ /* Get the (optional) vendor data and length */
+
+ switch (Resource->CommonSerialBus.Type)
+ {
+ case AML_RESOURCE_I2C_SERIALBUSTYPE:
+
+ VendorLength = Resource->CommonSerialBus.TypeDataLength -
+ AML_RESOURCE_I2C_MIN_DATA_LEN;
+
+ VendorData = ACPI_ADD_PTR (UINT8, Resource,
+ sizeof (AML_RESOURCE_I2C_SERIALBUS));
+ break;
+
+ case AML_RESOURCE_SPI_SERIALBUSTYPE:
+
+ VendorLength = Resource->CommonSerialBus.TypeDataLength -
+ AML_RESOURCE_SPI_MIN_DATA_LEN;
+
+ VendorData = ACPI_ADD_PTR (UINT8, Resource,
+ sizeof (AML_RESOURCE_SPI_SERIALBUS));
+ break;
+
+ case AML_RESOURCE_UART_SERIALBUSTYPE:
+
+ VendorLength = Resource->CommonSerialBus.TypeDataLength -
+ AML_RESOURCE_UART_MIN_DATA_LEN;
+
+ VendorData = ACPI_ADD_PTR (UINT8, Resource,
+ sizeof (AML_RESOURCE_UART_SERIALBUS));
+ break;
+
+ default:
+ return;
+ }
+
+ /* Dump the vendor bytes as a RawDataBuffer object */
+
+ AcpiDmDumpRawDataBuffer (VendorData, VendorLength, Level);
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiDmI2cSerialBusDescriptor
+ *
+ * PARAMETERS: Resource - Pointer to the resource descriptor
+ * Length - Length of the descriptor in bytes
+ * Level - Current source code indentation level
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Decode a I2C serial bus descriptor
+ *
+ ******************************************************************************/
+
+static void
+AcpiDmI2cSerialBusDescriptor (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level)
+{
+ UINT32 ResourceSourceOffset;
+
+
+ /* SlaveAddress, SlaveMode, ConnectionSpeed, AddressingMode */
+
+ AcpiDmIndent (Level);
+ AcpiOsPrintf ("I2cSerialBus (0x%4.4X, %s, 0x%8.8X,\n",
+ Resource->I2cSerialBus.SlaveAddress,
+ AcpiGbl_SmDecode [(Resource->I2cSerialBus.Flags & 1)],
+ Resource->I2cSerialBus.ConnectionSpeed);
+
+ AcpiDmIndent (Level + 1);
+ AcpiOsPrintf ("%s, ",
+ AcpiGbl_AmDecode [(Resource->I2cSerialBus.TypeSpecificFlags & 1)]);
+
+ /* ResourceSource is a required field */
+
+ ResourceSourceOffset = sizeof (AML_RESOURCE_COMMON_SERIALBUS) +
+ Resource->CommonSerialBus.TypeDataLength;
+
+ AcpiUtPrintString (
+ ACPI_ADD_PTR (char, Resource, ResourceSourceOffset),
+ ACPI_UINT8_MAX);
+
+ /* ResourceSourceIndex, ResourceUsage */
+
+ AcpiOsPrintf (",\n");
+ AcpiDmIndent (Level + 1);
+ AcpiOsPrintf ("0x%2.2X, ", Resource->I2cSerialBus.ResSourceIndex);
+
+ AcpiOsPrintf ("%s, ",
+ AcpiGbl_ConsumeDecode [(Resource->I2cSerialBus.Flags >> 1) & 1]);
+
+ /* Insert a descriptor name */
+
+ AcpiDmDescriptorName ();
+ AcpiOsPrintf (",\n");
+
+ /* Dump the vendor data */
+
+ AcpiDmIndent (Level + 1);
+ AcpiDmDumpSerialBusVendorData (Resource, Level);
+ AcpiOsPrintf (")\n");
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiDmSpiSerialBusDescriptor
+ *
+ * PARAMETERS: Resource - Pointer to the resource descriptor
+ * Length - Length of the descriptor in bytes
+ * Level - Current source code indentation level
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Decode a SPI serial bus descriptor
+ *
+ ******************************************************************************/
+
+static void
+AcpiDmSpiSerialBusDescriptor (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level)
+{
+ UINT32 ResourceSourceOffset;
+
+
+ /* DeviceSelection, DeviceSelectionPolarity, WireMode, DataBitLength */
+
+ AcpiDmIndent (Level);
+ AcpiOsPrintf ("SpiSerialBus (0x%4.4X, %s, %s, 0x%2.2X,\n",
+ Resource->SpiSerialBus.DeviceSelection,
+ AcpiGbl_DpDecode [(Resource->SpiSerialBus.TypeSpecificFlags >> 1) & 1],
+ AcpiGbl_WmDecode [(Resource->SpiSerialBus.TypeSpecificFlags & 1)],
+ Resource->SpiSerialBus.DataBitLength);
+
+ /* SlaveMode, ConnectionSpeed, ClockPolarity, ClockPhase */
+
+ AcpiDmIndent (Level + 1);
+ AcpiOsPrintf ("%s, 0x%8.8X, %s,\n",
+ AcpiGbl_SmDecode [(Resource->SpiSerialBus.Flags & 1)],
+ Resource->SpiSerialBus.ConnectionSpeed,
+ AcpiGbl_CpoDecode [(Resource->SpiSerialBus.ClockPolarity & 1)]);
+
+ AcpiDmIndent (Level + 1);
+ AcpiOsPrintf ("%s, ",
+ AcpiGbl_CphDecode [(Resource->SpiSerialBus.ClockPhase & 1)]);
+
+ /* ResourceSource is a required field */
+
+ ResourceSourceOffset = sizeof (AML_RESOURCE_COMMON_SERIALBUS) +
+ Resource->CommonSerialBus.TypeDataLength;
+
+ AcpiUtPrintString (
+ ACPI_ADD_PTR (char, Resource, ResourceSourceOffset),
+ ACPI_UINT8_MAX);
+
+ /* ResourceSourceIndex, ResourceUsage */
+
+ AcpiOsPrintf (",\n");
+ AcpiDmIndent (Level + 1);
+ AcpiOsPrintf ("0x%2.2X, ", Resource->SpiSerialBus.ResSourceIndex);
+
+ AcpiOsPrintf ("%s, ",
+ AcpiGbl_ConsumeDecode [(Resource->SpiSerialBus.Flags >> 1) & 1]);
+
+ /* Insert a descriptor name */
+
+ AcpiDmDescriptorName ();
+ AcpiOsPrintf (",\n");
+
+ /* Dump the vendor data */
+
+ AcpiDmIndent (Level + 1);
+ AcpiDmDumpSerialBusVendorData (Resource, Level);
+ AcpiOsPrintf (")\n");
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiDmUartSerialBusDescriptor
+ *
+ * PARAMETERS: Resource - Pointer to the resource descriptor
+ * Length - Length of the descriptor in bytes
+ * Level - Current source code indentation level
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Decode a UART serial bus descriptor
+ *
+ ******************************************************************************/
+
+static void
+AcpiDmUartSerialBusDescriptor (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level)
+{
+ UINT32 ResourceSourceOffset;
+
+
+ /* ConnectionSpeed, BitsPerByte, StopBits */
+
+ AcpiDmIndent (Level);
+ AcpiOsPrintf ("UartSerialBus (0x%8.8X, %s, %s,\n",
+ Resource->UartSerialBus.DefaultBaudRate,
+ AcpiGbl_BpbDecode [(Resource->UartSerialBus.TypeSpecificFlags >> 4) & 3],
+ AcpiGbl_SbDecode [(Resource->UartSerialBus.TypeSpecificFlags >> 2) & 3]);
+
+ /* LinesInUse, IsBigEndian, Parity, FlowControl */
+
+ AcpiDmIndent (Level + 1);
+ AcpiOsPrintf ("0x%2.2X, %s, %s, %s,\n",
+ Resource->UartSerialBus.LinesEnabled,
+ AcpiGbl_EdDecode [(Resource->UartSerialBus.TypeSpecificFlags >> 7) & 1],
+ AcpiGbl_PtDecode [Resource->UartSerialBus.Parity & 7],
+ AcpiGbl_FcDecode [Resource->UartSerialBus.TypeSpecificFlags & 3]);
+
+ /* ReceiveBufferSize, TransmitBufferSize */
+
+ AcpiDmIndent (Level + 1);
+ AcpiOsPrintf ("0x%4.4X, 0x%4.4X, ",
+ Resource->UartSerialBus.RxFifoSize,
+ Resource->UartSerialBus.TxFifoSize);
+
+ /* ResourceSource is a required field */
+
+ ResourceSourceOffset = sizeof (AML_RESOURCE_COMMON_SERIALBUS) +
+ Resource->CommonSerialBus.TypeDataLength;
+
+ AcpiUtPrintString (
+ ACPI_ADD_PTR (char, Resource, ResourceSourceOffset),
+ ACPI_UINT8_MAX);
+
+ /* ResourceSourceIndex, ResourceUsage */
+
+ AcpiOsPrintf (",\n");
+ AcpiDmIndent (Level + 1);
+ AcpiOsPrintf ("0x%2.2X, ", Resource->UartSerialBus.ResSourceIndex);
+
+ AcpiOsPrintf ("%s, ",
+ AcpiGbl_ConsumeDecode [(Resource->UartSerialBus.Flags >> 1) & 1]);
+
+ /* Insert a descriptor name */
+
+ AcpiDmDescriptorName ();
+ AcpiOsPrintf (",\n");
+
+ /* Dump the vendor data */
+
+ AcpiDmIndent (Level + 1);
+ AcpiDmDumpSerialBusVendorData (Resource, Level);
+ AcpiOsPrintf (")\n");
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiDmSerialBusDescriptor
+ *
+ * PARAMETERS: Resource - Pointer to the resource descriptor
+ * Length - Length of the descriptor in bytes
+ * Level - Current source code indentation level
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Decode a I2C/SPI/UART serial bus descriptor
+ *
+ ******************************************************************************/
+
+void
+AcpiDmSerialBusDescriptor (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level)
+{
+
+ SerialBusResourceDispatch [Resource->CommonSerialBus.Type] (
+ Resource, Length, Level);
+}
+
+#endif
+
diff --git a/usr/src/uts/intel/io/acpica/disassembler/dmresrcs.c b/usr/src/uts/intel/io/acpica/disassembler/dmresrcs.c
index 52b0fa3ea3..32759d991b 100644
--- a/usr/src/uts/intel/io/acpica/disassembler/dmresrcs.c
+++ b/usr/src/uts/intel/io/acpica/disassembler/dmresrcs.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -121,9 +121,9 @@ AcpiDmDmaDescriptor (
AcpiDmIndent (Level);
AcpiOsPrintf ("DMA (%s, %s, %s, ",
- AcpiGbl_TypDecode [(Resource->Dma.Flags >> 5) & 3],
- AcpiGbl_BmDecode [(Resource->Dma.Flags >> 2) & 1],
- AcpiGbl_SizDecode [(Resource->Dma.Flags >> 0) & 3]);
+ AcpiGbl_TypDecode [(Resource->Dma.Flags >> 5) & 3],
+ AcpiGbl_BmDecode [(Resource->Dma.Flags >> 2) & 1],
+ AcpiGbl_SizDecode [(Resource->Dma.Flags >> 0) & 3]);
/* Insert a descriptor name */
@@ -137,6 +137,49 @@ AcpiDmDmaDescriptor (
/*******************************************************************************
*
+ * FUNCTION: AcpiDmFixedDmaDescriptor
+ *
+ * PARAMETERS: Resource - Pointer to the resource descriptor
+ * Length - Length of the descriptor in bytes
+ * Level - Current source code indentation level
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Decode a FixedDMA descriptor
+ *
+ ******************************************************************************/
+
+void
+AcpiDmFixedDmaDescriptor (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level)
+{
+
+ AcpiDmIndent (Level);
+ AcpiOsPrintf ("FixedDMA (0x%4.4X, 0x%4.4X, ",
+ Resource->FixedDma.RequestLines,
+ Resource->FixedDma.Channels);
+
+ if (Resource->FixedDma.Width <= 5)
+ {
+ AcpiOsPrintf ("%s, ",
+ AcpiGbl_DtsDecode [Resource->FixedDma.Width]);
+ }
+ else
+ {
+ AcpiOsPrintf ("%X /* INVALID DMA WIDTH */, ", Resource->FixedDma.Width);
+ }
+
+ /* Insert a descriptor name */
+
+ AcpiDmDescriptorName ();
+ AcpiOsPrintf (")\n");
+}
+
+
+/*******************************************************************************
+ *
* FUNCTION: AcpiDmIoDescriptor
*
* PARAMETERS: Resource - Pointer to the resource descriptor
diff --git a/usr/src/uts/intel/io/acpica/disassembler/dmutils.c b/usr/src/uts/intel/io/acpica/disassembler/dmutils.c
index b3db9312d5..ce3443ec3f 100644
--- a/usr/src/uts/intel/io/acpica/disassembler/dmutils.c
+++ b/usr/src/uts/intel/io/acpica/disassembler/dmutils.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -135,7 +135,8 @@ const char *AcpiGbl_IrqDecode[] =
*
* RETURN: None
*
- * DESCRIPTION: Decode the AccessAs attribute byte. (Mostly SMBus stuff)
+ * DESCRIPTION: Decode the AccessAs attribute byte. (Mostly SMBus and
+ * GenericSerialBus stuff.)
*
******************************************************************************/
@@ -146,44 +147,61 @@ AcpiDmDecodeAttribute (
switch (Attribute)
{
- case AML_FIELD_ATTRIB_SMB_QUICK:
+ case AML_FIELD_ATTRIB_QUICK:
- AcpiOsPrintf ("SMBQuick");
+ AcpiOsPrintf ("AttribQuick");
break;
- case AML_FIELD_ATTRIB_SMB_SEND_RCV:
+ case AML_FIELD_ATTRIB_SEND_RCV:
- AcpiOsPrintf ("SMBSendReceive");
+ AcpiOsPrintf ("AttribSendReceive");
break;
- case AML_FIELD_ATTRIB_SMB_BYTE:
+ case AML_FIELD_ATTRIB_BYTE:
- AcpiOsPrintf ("SMBByte");
+ AcpiOsPrintf ("AttribByte");
break;
- case AML_FIELD_ATTRIB_SMB_WORD:
+ case AML_FIELD_ATTRIB_WORD:
- AcpiOsPrintf ("SMBWord");
+ AcpiOsPrintf ("AttribWord");
break;
- case AML_FIELD_ATTRIB_SMB_WORD_CALL:
+ case AML_FIELD_ATTRIB_BLOCK:
- AcpiOsPrintf ("SMBProcessCall");
+ AcpiOsPrintf ("AttribBlock");
break;
- case AML_FIELD_ATTRIB_SMB_BLOCK:
+ case AML_FIELD_ATTRIB_MULTIBYTE:
- AcpiOsPrintf ("SMBBlock");
+ AcpiOsPrintf ("AttribBytes");
break;
- case AML_FIELD_ATTRIB_SMB_BLOCK_CALL:
+ case AML_FIELD_ATTRIB_WORD_CALL:
- AcpiOsPrintf ("SMBBlockProcessCall");
+ AcpiOsPrintf ("AttribProcessCall");
+ break;
+
+ case AML_FIELD_ATTRIB_BLOCK_CALL:
+
+ AcpiOsPrintf ("AttribBlockProcessCall");
+ break;
+
+ case AML_FIELD_ATTRIB_RAW_BYTES:
+
+ AcpiOsPrintf ("AttribRawBytes");
+ break;
+
+ case AML_FIELD_ATTRIB_RAW_PROCESS:
+
+ AcpiOsPrintf ("AttribRawProcessBytes");
break;
default:
- AcpiOsPrintf ("0x%.2X", Attribute);
+ /* A ByteConst is allowed by the grammar */
+
+ AcpiOsPrintf ("0x%2.2X", Attribute);
break;
}
}
diff --git a/usr/src/uts/intel/io/acpica/disassembler/dmwalk.c b/usr/src/uts/intel/io/acpica/disassembler/dmwalk.c
index 2a4b5e545f..dd3ee00255 100644
--- a/usr/src/uts/intel/io/acpica/disassembler/dmwalk.c
+++ b/usr/src/uts/intel/io/acpica/disassembler/dmwalk.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -464,7 +464,8 @@ AcpiDmDescendingOp (
AcpiDmDisassembleOneOp (NULL, Info, Op);
- if (Op->Common.DisasmOpcode == ACPI_DASM_LNOT_PREFIX)
+ if ((Op->Common.DisasmOpcode == ACPI_DASM_LNOT_PREFIX) ||
+ (Op->Common.AmlOpcode == AML_INT_CONNECTION_OP))
{
return (AE_OK);
}
diff --git a/usr/src/uts/intel/io/acpica/dispatcher/dsargs.c b/usr/src/uts/intel/io/acpica/dispatcher/dsargs.c
index 87def86051..ea0d7bee88 100644
--- a/usr/src/uts/intel/io/acpica/dispatcher/dsargs.c
+++ b/usr/src/uts/intel/io/acpica/dispatcher/dsargs.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -424,7 +424,15 @@ AcpiDsGetRegionArguments (
/* Execute the argument AML */
- Status = AcpiDsExecuteArguments (Node, Node->Parent,
+ Status = AcpiDsExecuteArguments (Node, ExtraDesc->Extra.ScopeNode,
ExtraDesc->Extra.AmlLength, ExtraDesc->Extra.AmlStart);
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+
+ Status = AcpiUtAddAddressRange (ObjDesc->Region.SpaceId,
+ ObjDesc->Region.Address, ObjDesc->Region.Length,
+ Node);
return_ACPI_STATUS (Status);
}
diff --git a/usr/src/uts/intel/io/acpica/dispatcher/dscontrol.c b/usr/src/uts/intel/io/acpica/dispatcher/dscontrol.c
index 41a44951c2..5c5c51a573 100644
--- a/usr/src/uts/intel/io/acpica/dispatcher/dscontrol.c
+++ b/usr/src/uts/intel/io/acpica/dispatcher/dscontrol.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/dispatcher/dsfield.c b/usr/src/uts/intel/io/acpica/dispatcher/dsfield.c
index 886b6c24c5..1c3c523720 100644
--- a/usr/src/uts/intel/io/acpica/dispatcher/dsfield.c
+++ b/usr/src/uts/intel/io/acpica/dispatcher/dsfield.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -243,6 +243,7 @@ AcpiDsGetFieldNames (
{
ACPI_STATUS Status;
UINT64 Position;
+ ACPI_PARSE_OBJECT *Child;
ACPI_FUNCTION_TRACE_PTR (DsGetFieldNames, Info);
@@ -257,10 +258,11 @@ AcpiDsGetFieldNames (
while (Arg)
{
/*
- * Three types of field elements are handled:
- * 1) Offset - specifies a bit offset
- * 2) AccessAs - changes the access mode
- * 3) Name - Enters a new named field into the namespace
+ * Four types of field elements are handled:
+ * 1) Name - Enters a new named field into the namespace
+ * 2) Offset - specifies a bit offset
+ * 3) AccessAs - changes the access mode/attributes
+ * 4) Connection - Associate a resource template with the field
*/
switch (Arg->Common.AmlOpcode)
{
@@ -279,24 +281,67 @@ AcpiDsGetFieldNames (
Info->FieldBitPosition = (UINT32) Position;
break;
-
case AML_INT_ACCESSFIELD_OP:
-
+ case AML_INT_EXTACCESSFIELD_OP:
/*
- * Get a new AccessType and AccessAttribute -- to be used for all
- * field units that follow, until field end or another AccessAs
- * keyword.
+ * Get new AccessType, AccessAttribute, and AccessLength fields
+ * -- to be used for all field units that follow, until the
+ * end-of-field or another AccessAs keyword is encountered.
+ * NOTE. These three bytes are encoded in the integer value
+ * of the parseop for convenience.
*
* In FieldFlags, preserve the flag bits other than the
- * ACCESS_TYPE bits
+ * ACCESS_TYPE bits.
*/
+
+ /* AccessType (ByteAcc, WordAcc, etc.) */
+
Info->FieldFlags = (UINT8)
((Info->FieldFlags & ~(AML_FIELD_ACCESS_TYPE_MASK)) |
- ((UINT8) ((UINT32) Arg->Common.Value.Integer >> 8)));
+ ((UINT8) ((UINT32) (Arg->Common.Value.Integer & 0x07))));
+
+ /* AccessAttribute (AttribQuick, AttribByte, etc.) */
+
+ Info->Attribute = (UINT8) ((Arg->Common.Value.Integer >> 8) & 0xFF);
+
+ /* AccessLength (for serial/buffer protocols) */
- Info->Attribute = (UINT8) (Arg->Common.Value.Integer);
+ Info->AccessLength = (UINT8) ((Arg->Common.Value.Integer >> 16) & 0xFF);
break;
+ case AML_INT_CONNECTION_OP:
+ /*
+ * Clear any previous connection. New connection is used for all
+ * fields that follow, similar to AccessAs
+ */
+ Info->ResourceBuffer = NULL;
+ Info->ConnectionNode = NULL;
+
+ /*
+ * A Connection() is either an actual resource descriptor (buffer)
+ * or a named reference to a resource template
+ */
+ Child = Arg->Common.Value.Arg;
+ if (Child->Common.AmlOpcode == AML_INT_BYTELIST_OP)
+ {
+ Info->ResourceBuffer = Child->Named.Data;
+ Info->ResourceLength = (UINT16) Child->Named.Value.Integer;
+ }
+ else
+ {
+ /* Lookup the Connection() namepath, it should already exist */
+
+ Status = AcpiNsLookup (WalkState->ScopeInfo,
+ Child->Common.Value.Name, ACPI_TYPE_ANY,
+ ACPI_IMODE_EXECUTE, ACPI_NS_DONT_OPEN_SCOPE,
+ WalkState, &Info->ConnectionNode);
+ if (ACPI_FAILURE (Status))
+ {
+ ACPI_ERROR_NAMESPACE (Child->Common.Value.Name, Status);
+ return_ACPI_STATUS (Status);
+ }
+ }
+ break;
case AML_INT_NAMEDFIELD_OP:
@@ -348,7 +393,6 @@ AcpiDsGetFieldNames (
Info->FieldBitPosition += Info->FieldBitLength;
break;
-
default:
ACPI_ERROR ((AE_INFO,
@@ -406,6 +450,8 @@ AcpiDsCreateField (
}
}
+ ACPI_MEMSET (&Info, 0, sizeof (ACPI_CREATE_FIELD_INFO));
+
/* Second arg is the field flags */
Arg = Arg->Common.Next;
@@ -418,7 +464,6 @@ AcpiDsCreateField (
Info.RegionNode = RegionNode;
Status = AcpiDsGetFieldNames (&Info, WalkState, Arg->Common.Next);
-
return_ACPI_STATUS (Status);
}
@@ -514,8 +559,8 @@ AcpiDsInitFieldObjects (
while (Arg)
{
/*
- * Ignore OFFSET and ACCESSAS terms here; we are only interested in the
- * field names in order to enter them into the namespace.
+ * Ignore OFFSET/ACCESSAS/CONNECTION terms here; we are only interested
+ * in the field names in order to enter them into the namespace.
*/
if (Arg->Common.AmlOpcode == AML_INT_NAMEDFIELD_OP)
{
@@ -697,7 +742,6 @@ AcpiDsCreateIndexField (
Info.RegionNode = RegionNode;
Status = AcpiDsGetFieldNames (&Info, WalkState, Arg->Common.Next);
-
return_ACPI_STATUS (Status);
}
diff --git a/usr/src/uts/intel/io/acpica/dispatcher/dsinit.c b/usr/src/uts/intel/io/acpica/dispatcher/dsinit.c
index 154e5de34f..1c7ad88492 100644
--- a/usr/src/uts/intel/io/acpica/dispatcher/dsinit.c
+++ b/usr/src/uts/intel/io/acpica/dispatcher/dsinit.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/dispatcher/dsmethod.c b/usr/src/uts/intel/io/acpica/dispatcher/dsmethod.c
index 1cfa1c6853..670382ac9b 100644
--- a/usr/src/uts/intel/io/acpica/dispatcher/dsmethod.c
+++ b/usr/src/uts/intel/io/acpica/dispatcher/dsmethod.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/dispatcher/dsmthdat.c b/usr/src/uts/intel/io/acpica/dispatcher/dsmthdat.c
index 74ee4bdf31..b4509e7710 100644
--- a/usr/src/uts/intel/io/acpica/dispatcher/dsmthdat.c
+++ b/usr/src/uts/intel/io/acpica/dispatcher/dsmthdat.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/dispatcher/dsobject.c b/usr/src/uts/intel/io/acpica/dispatcher/dsobject.c
index 69888505c6..b3567766a2 100644
--- a/usr/src/uts/intel/io/acpica/dispatcher/dsobject.c
+++ b/usr/src/uts/intel/io/acpica/dispatcher/dsobject.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/dispatcher/dsopcode.c b/usr/src/uts/intel/io/acpica/dispatcher/dsopcode.c
index 45fbed1e62..7569236f2e 100644
--- a/usr/src/uts/intel/io/acpica/dispatcher/dsopcode.c
+++ b/usr/src/uts/intel/io/acpica/dispatcher/dsopcode.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/dispatcher/dsutils.c b/usr/src/uts/intel/io/acpica/dispatcher/dsutils.c
index 278009795c..66431aad53 100644
--- a/usr/src/uts/intel/io/acpica/dispatcher/dsutils.c
+++ b/usr/src/uts/intel/io/acpica/dispatcher/dsutils.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/dispatcher/dswexec.c b/usr/src/uts/intel/io/acpica/dispatcher/dswexec.c
index 7983787958..5732855bbe 100644
--- a/usr/src/uts/intel/io/acpica/dispatcher/dswexec.c
+++ b/usr/src/uts/intel/io/acpica/dispatcher/dswexec.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/dispatcher/dswload.c b/usr/src/uts/intel/io/acpica/dispatcher/dswload.c
index d8e77a6538..d40f911f41 100644
--- a/usr/src/uts/intel/io/acpica/dispatcher/dswload.c
+++ b/usr/src/uts/intel/io/acpica/dispatcher/dswload.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/dispatcher/dswload2.c b/usr/src/uts/intel/io/acpica/dispatcher/dswload2.c
index 608d525b36..27e3176209 100644
--- a/usr/src/uts/intel/io/acpica/dispatcher/dswload2.c
+++ b/usr/src/uts/intel/io/acpica/dispatcher/dswload2.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/dispatcher/dswscope.c b/usr/src/uts/intel/io/acpica/dispatcher/dswscope.c
index 9842b85b21..a59fdc9dca 100644
--- a/usr/src/uts/intel/io/acpica/dispatcher/dswscope.c
+++ b/usr/src/uts/intel/io/acpica/dispatcher/dswscope.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/dispatcher/dswstate.c b/usr/src/uts/intel/io/acpica/dispatcher/dswstate.c
index f97ccadb60..dd11a18130 100644
--- a/usr/src/uts/intel/io/acpica/dispatcher/dswstate.c
+++ b/usr/src/uts/intel/io/acpica/dispatcher/dswstate.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/events/evevent.c b/usr/src/uts/intel/io/acpica/events/evevent.c
index 0f5785a634..b5a7acde72 100644
--- a/usr/src/uts/intel/io/acpica/events/evevent.c
+++ b/usr/src/uts/intel/io/acpica/events/evevent.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -48,6 +48,8 @@
#define _COMPONENT ACPI_EVENTS
ACPI_MODULE_NAME ("evevent")
+#if (!ACPI_REDUCED_HARDWARE) /* Entire module */
+
/* Local prototypes */
static ACPI_STATUS
@@ -81,6 +83,13 @@ AcpiEvInitializeEvents (
ACPI_FUNCTION_TRACE (EvInitializeEvents);
+ /* If Hardware Reduced flag is set, there are no fixed events */
+
+ if (AcpiGbl_ReducedHardware)
+ {
+ return_ACPI_STATUS (AE_OK);
+ }
+
/*
* Initialize the Fixed and General Purpose Events. This is done prior to
* enabling SCIs to prevent interrupts from occurring before the handlers
@@ -128,6 +137,13 @@ AcpiEvInstallXruptHandlers (
ACPI_FUNCTION_TRACE (EvInstallXruptHandlers);
+ /* If Hardware Reduced flag is set, there is no ACPI h/w */
+
+ if (AcpiGbl_ReducedHardware)
+ {
+ return_ACPI_STATUS (AE_OK);
+ }
+
/* Install the SCI handler */
Status = AcpiEvInstallSciHandler ();
@@ -315,4 +331,6 @@ AcpiEvFixedEventDispatch (
AcpiGbl_FixedEventHandlers[Event].Context));
}
+#endif /* !ACPI_REDUCED_HARDWARE */
+
diff --git a/usr/src/uts/intel/io/acpica/events/evglock.c b/usr/src/uts/intel/io/acpica/events/evglock.c
index cab444d06d..f40d129508 100644
--- a/usr/src/uts/intel/io/acpica/events/evglock.c
+++ b/usr/src/uts/intel/io/acpica/events/evglock.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -49,6 +49,7 @@
#define _COMPONENT ACPI_EVENTS
ACPI_MODULE_NAME ("evglock")
+#if (!ACPI_REDUCED_HARDWARE) /* Entire module */
/* Local prototypes */
@@ -79,6 +80,13 @@ AcpiEvInitGlobalLockHandler (
ACPI_FUNCTION_TRACE (EvInitGlobalLockHandler);
+ /* If Hardware Reduced flag is set, there is no global lock */
+
+ if (AcpiGbl_ReducedHardware)
+ {
+ return_ACPI_STATUS (AE_OK);
+ }
+
/* Attempt installation of the global lock handler */
Status = AcpiInstallFixedEventHandler (ACPI_EVENT_GLOBAL,
@@ -365,3 +373,5 @@ AcpiEvReleaseGlobalLock (
AcpiOsReleaseMutex (AcpiGbl_GlobalLockMutex->Mutex.OsMutex);
return_ACPI_STATUS (Status);
}
+
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/events/evgpe.c b/usr/src/uts/intel/io/acpica/events/evgpe.c
index c4326c721b..a3781a55c6 100644
--- a/usr/src/uts/intel/io/acpica/events/evgpe.c
+++ b/usr/src/uts/intel/io/acpica/events/evgpe.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -49,6 +49,8 @@
#define _COMPONENT ACPI_EVENTS
ACPI_MODULE_NAME ("evgpe")
+#if (!ACPI_REDUCED_HARDWARE) /* Entire module */
+
/* Local prototypes */
static void ACPI_SYSTEM_XFACE
@@ -826,3 +828,4 @@ AcpiEvGpeDispatch (
return_UINT32 (ACPI_INTERRUPT_HANDLED);
}
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/events/evgpeblk.c b/usr/src/uts/intel/io/acpica/events/evgpeblk.c
index b8b7af254e..c281855c2c 100644
--- a/usr/src/uts/intel/io/acpica/events/evgpeblk.c
+++ b/usr/src/uts/intel/io/acpica/events/evgpeblk.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -49,6 +49,8 @@
#define _COMPONENT ACPI_EVENTS
ACPI_MODULE_NAME ("evgpeblk")
+#if (!ACPI_REDUCED_HARDWARE) /* Entire module */
+
/* Local prototypes */
static ACPI_STATUS
@@ -545,3 +547,4 @@ AcpiEvInitializeGpeBlock (
return_ACPI_STATUS (AE_OK);
}
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/events/evgpeinit.c b/usr/src/uts/intel/io/acpica/events/evgpeinit.c
index f1d38ed853..f8962e947f 100644
--- a/usr/src/uts/intel/io/acpica/events/evgpeinit.c
+++ b/usr/src/uts/intel/io/acpica/events/evgpeinit.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -50,6 +50,7 @@
#define _COMPONENT ACPI_EVENTS
ACPI_MODULE_NAME ("evgpeinit")
+#if (!ACPI_REDUCED_HARDWARE) /* Entire module */
/*
* Note: History of _PRW support in ACPICA
@@ -457,3 +458,5 @@ AcpiEvMatchGpeMethod (
Name, GpeNumber));
return_ACPI_STATUS (AE_OK);
}
+
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/events/evgpeutil.c b/usr/src/uts/intel/io/acpica/events/evgpeutil.c
index e706fbb7c7..70580cd853 100644
--- a/usr/src/uts/intel/io/acpica/events/evgpeutil.c
+++ b/usr/src/uts/intel/io/acpica/events/evgpeutil.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -41,7 +41,6 @@
* POSSIBILITY OF SUCH DAMAGES.
*/
-
#include "acpi.h"
#include "accommon.h"
#include "acevents.h"
@@ -50,6 +49,7 @@
ACPI_MODULE_NAME ("evgpeutil")
+#if (!ACPI_REDUCED_HARDWARE) /* Entire module */
/*******************************************************************************
*
* FUNCTION: AcpiEvWalkGpeList
@@ -422,3 +422,4 @@ AcpiEvDeleteGpeHandlers (
return_ACPI_STATUS (AE_OK);
}
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/events/evmisc.c b/usr/src/uts/intel/io/acpica/events/evmisc.c
index 1667f075db..68f751aca3 100644
--- a/usr/src/uts/intel/io/acpica/events/evmisc.c
+++ b/usr/src/uts/intel/io/acpica/events/evmisc.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -111,105 +111,82 @@ AcpiEvQueueNotifyRequest (
UINT32 NotifyValue)
{
ACPI_OPERAND_OBJECT *ObjDesc;
- ACPI_OPERAND_OBJECT *HandlerObj = NULL;
- ACPI_GENERIC_STATE *NotifyInfo;
+ ACPI_OPERAND_OBJECT *HandlerListHead = NULL;
+ ACPI_GENERIC_STATE *Info;
+ UINT8 HandlerListId = 0;
ACPI_STATUS Status = AE_OK;
ACPI_FUNCTION_NAME (EvQueueNotifyRequest);
- /*
- * For value 3 (Ejection Request), some device method may need to be run.
- * For value 2 (Device Wake) if _PRW exists, the _PS0 method may need
- * to be run.
- * For value 0x80 (Status Change) on the power button or sleep button,
- * initiate soft-off or sleep operation?
- */
- ACPI_DEBUG_PRINT ((ACPI_DB_INFO,
- "Dispatching Notify on [%4.4s] Node %p Value 0x%2.2X (%s)\n",
- AcpiUtGetNodeName (Node), Node, NotifyValue,
- AcpiUtGetNotifyName (NotifyValue)));
-
- /* Get the notify object attached to the NS Node */
+ /* Are Notifies allowed on this object? */
- ObjDesc = AcpiNsGetAttachedObject (Node);
- if (ObjDesc)
+ if (!AcpiEvIsNotifyObject (Node))
{
- /* We have the notify object, Get the right handler */
-
- switch (Node->Type)
- {
- /* Notify allowed only on these types */
+ return (AE_TYPE);
+ }
- case ACPI_TYPE_DEVICE:
- case ACPI_TYPE_THERMAL:
- case ACPI_TYPE_PROCESSOR:
+ /* Get the correct notify list type (System or Device) */
- if (NotifyValue <= ACPI_MAX_SYS_NOTIFY)
- {
- HandlerObj = ObjDesc->CommonNotify.SystemNotify;
- }
- else
- {
- HandlerObj = ObjDesc->CommonNotify.DeviceNotify;
- }
- break;
+ if (NotifyValue <= ACPI_MAX_SYS_NOTIFY)
+ {
+ HandlerListId = ACPI_SYSTEM_HANDLER_LIST;
+ }
+ else
+ {
+ HandlerListId = ACPI_DEVICE_HANDLER_LIST;
+ }
- default:
+ /* Get the notify object attached to the namespace Node */
- /* All other types are not supported */
+ ObjDesc = AcpiNsGetAttachedObject (Node);
+ if (ObjDesc)
+ {
+ /* We have an attached object, Get the correct handler list */
- return (AE_TYPE);
- }
+ HandlerListHead = ObjDesc->CommonNotify.NotifyList[HandlerListId];
}
/*
- * If there is any handler to run, schedule the dispatcher.
- * Check for:
- * 1) Global system notify handler
- * 2) Global device notify handler
- * 3) Per-device notify handler
+ * If there is no notify handler (Global or Local)
+ * for this object, just ignore the notify
*/
- if ((AcpiGbl_SystemNotify.Handler &&
- (NotifyValue <= ACPI_MAX_SYS_NOTIFY)) ||
- (AcpiGbl_DeviceNotify.Handler &&
- (NotifyValue > ACPI_MAX_SYS_NOTIFY)) ||
- HandlerObj)
+ if (!AcpiGbl_GlobalNotify[HandlerListId].Handler && !HandlerListHead)
{
- NotifyInfo = AcpiUtCreateGenericState ();
- if (!NotifyInfo)
- {
- return (AE_NO_MEMORY);
- }
+ ACPI_DEBUG_PRINT ((ACPI_DB_INFO,
+ "No notify handler for Notify, ignoring (%4.4s, %X) node %p\n",
+ AcpiUtGetNodeName (Node), NotifyValue, Node));
- if (!HandlerObj)
- {
- ACPI_DEBUG_PRINT ((ACPI_DB_INFO,
- "Executing system notify handler for Notify (%4.4s, %X) "
- "node %p\n",
- AcpiUtGetNodeName (Node), NotifyValue, Node));
- }
+ return (AE_OK);
+ }
- NotifyInfo->Common.DescriptorType = ACPI_DESC_TYPE_STATE_NOTIFY;
- NotifyInfo->Notify.Node = Node;
- NotifyInfo->Notify.Value = (UINT16) NotifyValue;
- NotifyInfo->Notify.HandlerObj = HandlerObj;
+ /* Setup notify info and schedule the notify dispatcher */
- Status = AcpiOsExecute (
- OSL_NOTIFY_HANDLER, AcpiEvNotifyDispatch, NotifyInfo);
- if (ACPI_FAILURE (Status))
- {
- AcpiUtDeleteGenericState (NotifyInfo);
- }
- }
- else
+ Info = AcpiUtCreateGenericState ();
+ if (!Info)
{
- /* There is no notify handler (per-device or system) for this device */
+ return (AE_NO_MEMORY);
+ }
- ACPI_DEBUG_PRINT ((ACPI_DB_INFO,
- "No notify handler for Notify (%4.4s, %X) node %p\n",
- AcpiUtGetNodeName (Node), NotifyValue, Node));
+ Info->Common.DescriptorType = ACPI_DESC_TYPE_STATE_NOTIFY;
+
+ Info->Notify.Node = Node;
+ Info->Notify.Value = (UINT16) NotifyValue;
+ Info->Notify.HandlerListId = HandlerListId;
+ Info->Notify.HandlerListHead = HandlerListHead;
+ Info->Notify.Global = &AcpiGbl_GlobalNotify[HandlerListId];
+
+ ACPI_DEBUG_PRINT ((ACPI_DB_INFO,
+ "Dispatching Notify on [%4.4s] (%s) Value 0x%2.2X (%s) Node %p\n",
+ AcpiUtGetNodeName (Node), AcpiUtGetTypeName (Node->Type),
+ NotifyValue, AcpiUtGetNotifyName (NotifyValue), Node));
+
+ Status = AcpiOsExecute (OSL_NOTIFY_HANDLER, AcpiEvNotifyDispatch,
+ Info);
+ if (ACPI_FAILURE (Status))
+ {
+ AcpiUtDeleteGenericState (Info);
}
return (Status);
@@ -233,64 +210,41 @@ static void ACPI_SYSTEM_XFACE
AcpiEvNotifyDispatch (
void *Context)
{
- ACPI_GENERIC_STATE *NotifyInfo = (ACPI_GENERIC_STATE *) Context;
- ACPI_NOTIFY_HANDLER GlobalHandler = NULL;
- void *GlobalContext = NULL;
+ ACPI_GENERIC_STATE *Info = (ACPI_GENERIC_STATE *) Context;
ACPI_OPERAND_OBJECT *HandlerObj;
ACPI_FUNCTION_ENTRY ();
- /*
- * We will invoke a global notify handler if installed. This is done
- * _before_ we invoke the per-device handler attached to the device.
- */
- if (NotifyInfo->Notify.Value <= ACPI_MAX_SYS_NOTIFY)
- {
- /* Global system notification handler */
-
- if (AcpiGbl_SystemNotify.Handler)
- {
- GlobalHandler = AcpiGbl_SystemNotify.Handler;
- GlobalContext = AcpiGbl_SystemNotify.Context;
- }
- }
- else
- {
- /* Global driver notification handler */
+ /* Invoke a global notify handler if installed */
- if (AcpiGbl_DeviceNotify.Handler)
- {
- GlobalHandler = AcpiGbl_DeviceNotify.Handler;
- GlobalContext = AcpiGbl_DeviceNotify.Context;
- }
- }
-
- /* Invoke the system handler first, if present */
-
- if (GlobalHandler)
+ if (Info->Notify.Global->Handler)
{
- GlobalHandler (NotifyInfo->Notify.Node, NotifyInfo->Notify.Value,
- GlobalContext);
+ Info->Notify.Global->Handler (Info->Notify.Node,
+ Info->Notify.Value,
+ Info->Notify.Global->Context);
}
- /* Now invoke the per-device handler, if present */
+ /* Now invoke the local notify handler(s) if any are installed */
- HandlerObj = NotifyInfo->Notify.HandlerObj;
- if (HandlerObj)
+ HandlerObj = Info->Notify.HandlerListHead;
+ while (HandlerObj)
{
- HandlerObj->Notify.Handler (NotifyInfo->Notify.Node,
- NotifyInfo->Notify.Value,
+ HandlerObj->Notify.Handler (Info->Notify.Node,
+ Info->Notify.Value,
HandlerObj->Notify.Context);
+
+ HandlerObj = HandlerObj->Notify.Next[Info->Notify.HandlerListId];
}
/* All done with the info object */
- AcpiUtDeleteGenericState (NotifyInfo);
+ AcpiUtDeleteGenericState (Info);
}
+#if (!ACPI_REDUCED_HARDWARE)
/******************************************************************************
*
* FUNCTION: AcpiEvTerminate
@@ -370,3 +324,5 @@ AcpiEvTerminate (
}
return_VOID;
}
+
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/events/evregion.c b/usr/src/uts/intel/io/acpica/events/evregion.c
index 193eb07b92..8c1a8ebbae 100644
--- a/usr/src/uts/intel/io/acpica/events/evregion.c
+++ b/usr/src/uts/intel/io/acpica/events/evregion.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -372,6 +372,7 @@ Cleanup1:
* FUNCTION: AcpiEvAddressSpaceDispatch
*
* PARAMETERS: RegionObj - Internal region object
+ * FieldObj - Corresponding field. Can be NULL.
* Function - Read or Write operation
* RegionOffset - Where in the region to read or write
* BitWidth - Field width in bits (8, 16, 32, or 64)
@@ -388,6 +389,7 @@ Cleanup1:
ACPI_STATUS
AcpiEvAddressSpaceDispatch (
ACPI_OPERAND_OBJECT *RegionObj,
+ ACPI_OPERAND_OBJECT *FieldObj,
UINT32 Function,
UINT32 RegionOffset,
UINT32 BitWidth,
@@ -399,6 +401,7 @@ AcpiEvAddressSpaceDispatch (
ACPI_OPERAND_OBJECT *HandlerDesc;
ACPI_OPERAND_OBJECT *RegionObj2;
void *RegionContext = NULL;
+ ACPI_CONNECTION_INFO *Context;
ACPI_FUNCTION_TRACE (EvAddressSpaceDispatch);
@@ -423,6 +426,8 @@ AcpiEvAddressSpaceDispatch (
return_ACPI_STATUS (AE_NOT_EXIST);
}
+ Context = HandlerDesc->AddressSpace.Context;
+
/*
* It may be the case that the region has never been initialized.
* Some types of regions require special init code
@@ -450,7 +455,7 @@ AcpiEvAddressSpaceDispatch (
AcpiExExitInterpreter ();
Status = RegionSetup (RegionObj, ACPI_REGION_ACTIVATE,
- HandlerDesc->AddressSpace.Context, &RegionContext);
+ Context, &RegionContext);
/* Re-enter the interpreter */
@@ -499,6 +504,27 @@ AcpiEvAddressSpaceDispatch (
ACPI_FORMAT_NATIVE_UINT (RegionObj->Region.Address + RegionOffset),
AcpiUtGetRegionName (RegionObj->Region.SpaceId)));
+
+ /*
+ * Special handling for GenericSerialBus and GeneralPurposeIo:
+ * There are three extra parameters that must be passed to the
+ * handler via the context:
+ * 1) Connection buffer, a resource template from Connection() op.
+ * 2) Length of the above buffer.
+ * 3) Actual access length from the AccessAs() op.
+ */
+ if (((RegionObj->Region.SpaceId == ACPI_ADR_SPACE_GSBUS) ||
+ (RegionObj->Region.SpaceId == ACPI_ADR_SPACE_GPIO)) &&
+ Context &&
+ FieldObj)
+ {
+ /* Get the Connection (ResourceTemplate) buffer */
+
+ Context->Connection = FieldObj->Field.ResourceBuffer;
+ Context->Length = FieldObj->Field.ResourceLength;
+ Context->AccessLength = FieldObj->Field.AccessLength;
+ }
+
if (!(HandlerDesc->AddressSpace.HandlerFlags &
ACPI_ADDR_HANDLER_DEFAULT_INSTALLED))
{
@@ -514,7 +540,7 @@ AcpiEvAddressSpaceDispatch (
Status = Handler (Function,
(RegionObj->Region.Address + RegionOffset), BitWidth, Value,
- HandlerDesc->AddressSpace.Context, RegionObj2->Extra.RegionContext);
+ Context, RegionObj2->Extra.RegionContext);
if (ACPI_FAILURE (Status))
{
diff --git a/usr/src/uts/intel/io/acpica/events/evrgnini.c b/usr/src/uts/intel/io/acpica/events/evrgnini.c
index 496e40e1aa..92946b5aaa 100644
--- a/usr/src/uts/intel/io/acpica/events/evrgnini.c
+++ b/usr/src/uts/intel/io/acpica/events/evrgnini.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/events/evsci.c b/usr/src/uts/intel/io/acpica/events/evsci.c
index 157c184f2b..c47cccc8db 100644
--- a/usr/src/uts/intel/io/acpica/events/evsci.c
+++ b/usr/src/uts/intel/io/acpica/events/evsci.c
@@ -6,7 +6,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -50,6 +50,8 @@
#define _COMPONENT ACPI_EVENTS
ACPI_MODULE_NAME ("evsci")
+#if (!ACPI_REDUCED_HARDWARE) /* Entire module */
+
/* Local prototypes */
static UINT32 ACPI_SYSTEM_XFACE
@@ -204,4 +206,4 @@ AcpiEvRemoveSciHandler (
return_ACPI_STATUS (Status);
}
-
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/events/evxface.c b/usr/src/uts/intel/io/acpica/events/evxface.c
index 8a903a6b47..1b709a5787 100644
--- a/usr/src/uts/intel/io/acpica/events/evxface.c
+++ b/usr/src/uts/intel/io/acpica/events/evxface.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -56,6 +56,337 @@
/*******************************************************************************
*
+ * FUNCTION: AcpiInstallNotifyHandler
+ *
+ * PARAMETERS: Device - The device for which notifies will be handled
+ * HandlerType - The type of handler:
+ * ACPI_SYSTEM_NOTIFY: System Handler (00-7F)
+ * ACPI_DEVICE_NOTIFY: Device Handler (80-FF)
+ * ACPI_ALL_NOTIFY: Both System and Device
+ * Handler - Address of the handler
+ * Context - Value passed to the handler on each GPE
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Install a handler for notifications on an ACPI Device,
+ * ThermalZone, or Processor object.
+ *
+ * NOTES: The Root namespace object may have only one handler for each
+ * type of notify (System/Device). Device/Thermal/Processor objects
+ * may have one device notify handler, and multiple system notify
+ * handlers.
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiInstallNotifyHandler (
+ ACPI_HANDLE Device,
+ UINT32 HandlerType,
+ ACPI_NOTIFY_HANDLER Handler,
+ void *Context)
+{
+ ACPI_NAMESPACE_NODE *Node = ACPI_CAST_PTR (ACPI_NAMESPACE_NODE, Device);
+ ACPI_OPERAND_OBJECT *ObjDesc;
+ ACPI_OPERAND_OBJECT *HandlerObj;
+ ACPI_STATUS Status;
+ UINT32 i;
+
+
+ ACPI_FUNCTION_TRACE (AcpiInstallNotifyHandler);
+
+
+ /* Parameter validation */
+
+ if ((!Device) || (!Handler) || (!HandlerType) ||
+ (HandlerType > ACPI_MAX_NOTIFY_HANDLER_TYPE))
+ {
+ return_ACPI_STATUS (AE_BAD_PARAMETER);
+ }
+
+ Status = AcpiUtAcquireMutex (ACPI_MTX_NAMESPACE);
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+
+ /*
+ * Root Object:
+ * Registering a notify handler on the root object indicates that the
+ * caller wishes to receive notifications for all objects. Note that
+ * only one global handler can be registered per notify type.
+ * Ensure that a handler is not already installed.
+ */
+ if (Device == ACPI_ROOT_OBJECT)
+ {
+ for (i = 0; i < ACPI_NUM_NOTIFY_TYPES; i++)
+ {
+ if (HandlerType & (i+1))
+ {
+ if (AcpiGbl_GlobalNotify[i].Handler)
+ {
+ Status = AE_ALREADY_EXISTS;
+ goto UnlockAndExit;
+ }
+
+ AcpiGbl_GlobalNotify[i].Handler = Handler;
+ AcpiGbl_GlobalNotify[i].Context = Context;
+ }
+ }
+
+ goto UnlockAndExit; /* Global notify handler installed, all done */
+ }
+
+ /*
+ * All Other Objects:
+ * Caller will only receive notifications specific to the target
+ * object. Note that only certain object types are allowed to
+ * receive notifications.
+ */
+
+ /* Are Notifies allowed on this object? */
+
+ if (!AcpiEvIsNotifyObject (Node))
+ {
+ Status = AE_TYPE;
+ goto UnlockAndExit;
+ }
+
+ /* Check for an existing internal object, might not exist */
+
+ ObjDesc = AcpiNsGetAttachedObject (Node);
+ if (!ObjDesc)
+ {
+ /* Create a new object */
+
+ ObjDesc = AcpiUtCreateInternalObject (Node->Type);
+ if (!ObjDesc)
+ {
+ Status = AE_NO_MEMORY;
+ goto UnlockAndExit;
+ }
+
+ /* Attach new object to the Node, remove local reference */
+
+ Status = AcpiNsAttachObject (Device, ObjDesc, Node->Type);
+ AcpiUtRemoveReference (ObjDesc);
+ if (ACPI_FAILURE (Status))
+ {
+ goto UnlockAndExit;
+ }
+ }
+
+ /* Ensure that the handler is not already installed in the lists */
+
+ for (i = 0; i < ACPI_NUM_NOTIFY_TYPES; i++)
+ {
+ if (HandlerType & (i+1))
+ {
+ HandlerObj = ObjDesc->CommonNotify.NotifyList[i];
+ while (HandlerObj)
+ {
+ if (HandlerObj->Notify.Handler == Handler)
+ {
+ Status = AE_ALREADY_EXISTS;
+ goto UnlockAndExit;
+ }
+
+ HandlerObj = HandlerObj->Notify.Next[i];
+ }
+ }
+ }
+
+ /* Create and populate a new notify handler object */
+
+ HandlerObj = AcpiUtCreateInternalObject (ACPI_TYPE_LOCAL_NOTIFY);
+ if (!HandlerObj)
+ {
+ Status = AE_NO_MEMORY;
+ goto UnlockAndExit;
+ }
+
+ HandlerObj->Notify.Node = Node;
+ HandlerObj->Notify.HandlerType = HandlerType;
+ HandlerObj->Notify.Handler = Handler;
+ HandlerObj->Notify.Context = Context;
+
+ /* Install the handler at the list head(s) */
+
+ for (i = 0; i < ACPI_NUM_NOTIFY_TYPES; i++)
+ {
+ if (HandlerType & (i+1))
+ {
+ HandlerObj->Notify.Next[i] =
+ ObjDesc->CommonNotify.NotifyList[i];
+
+ ObjDesc->CommonNotify.NotifyList[i] = HandlerObj;
+ }
+ }
+
+ /* Add an extra reference if handler was installed in both lists */
+
+ if (HandlerType == ACPI_ALL_NOTIFY)
+ {
+ AcpiUtAddReference (HandlerObj);
+ }
+
+
+UnlockAndExit:
+ (void) AcpiUtReleaseMutex (ACPI_MTX_NAMESPACE);
+ return_ACPI_STATUS (Status);
+}
+
+ACPI_EXPORT_SYMBOL (AcpiInstallNotifyHandler)
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiRemoveNotifyHandler
+ *
+ * PARAMETERS: Device - The device for which the handler is installed
+ * HandlerType - The type of handler:
+ * ACPI_SYSTEM_NOTIFY: System Handler (00-7F)
+ * ACPI_DEVICE_NOTIFY: Device Handler (80-FF)
+ * ACPI_ALL_NOTIFY: Both System and Device
+ * Handler - Address of the handler
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Remove a handler for notifies on an ACPI device
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiRemoveNotifyHandler (
+ ACPI_HANDLE Device,
+ UINT32 HandlerType,
+ ACPI_NOTIFY_HANDLER Handler)
+{
+ ACPI_NAMESPACE_NODE *Node = ACPI_CAST_PTR (ACPI_NAMESPACE_NODE, Device);
+ ACPI_OPERAND_OBJECT *ObjDesc;
+ ACPI_OPERAND_OBJECT *HandlerObj;
+ ACPI_OPERAND_OBJECT *PreviousHandlerObj;
+ ACPI_STATUS Status;
+ UINT32 i;
+
+
+ ACPI_FUNCTION_TRACE (AcpiRemoveNotifyHandler);
+
+
+ /* Parameter validation */
+
+ if ((!Device) || (!Handler) || (!HandlerType) ||
+ (HandlerType > ACPI_MAX_NOTIFY_HANDLER_TYPE))
+ {
+ return_ACPI_STATUS (AE_BAD_PARAMETER);
+ }
+
+#ifdef _UNDER_DEVELOPMENT
+ /* Make sure all deferred tasks are completed */
+
+ AcpiOsWaitEventsComplete (NULL);
+#endif
+
+ Status = AcpiUtAcquireMutex (ACPI_MTX_NAMESPACE);
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+
+ /* Root Object. Global handlers are removed here */
+
+ if (Device == ACPI_ROOT_OBJECT)
+ {
+ for (i = 0; i < ACPI_NUM_NOTIFY_TYPES; i++)
+ {
+ if (HandlerType & (i+1))
+ {
+ if (!AcpiGbl_GlobalNotify[i].Handler ||
+ (AcpiGbl_GlobalNotify[i].Handler != Handler))
+ {
+ Status = AE_NOT_EXIST;
+ goto UnlockAndExit;
+ }
+
+ ACPI_DEBUG_PRINT ((ACPI_DB_INFO,
+ "Removing global notify handler\n"));
+
+ AcpiGbl_GlobalNotify[i].Handler = NULL;
+ AcpiGbl_GlobalNotify[i].Context = NULL;
+ }
+ }
+
+ goto UnlockAndExit;
+ }
+
+ /* All other objects: Are Notifies allowed on this object? */
+
+ if (!AcpiEvIsNotifyObject (Node))
+ {
+ Status = AE_TYPE;
+ goto UnlockAndExit;
+ }
+
+ /* Must have an existing internal object */
+
+ ObjDesc = AcpiNsGetAttachedObject (Node);
+ if (!ObjDesc)
+ {
+ Status = AE_NOT_EXIST;
+ goto UnlockAndExit;
+ }
+
+ /* Internal object exists. Find the handler and remove it */
+
+ for (i = 0; i < ACPI_NUM_NOTIFY_TYPES; i++)
+ {
+ if (HandlerType & (i+1))
+ {
+ HandlerObj = ObjDesc->CommonNotify.NotifyList[i];
+ PreviousHandlerObj = NULL;
+
+ /* Attempt to find the handler in the handler list */
+
+ while (HandlerObj &&
+ (HandlerObj->Notify.Handler != Handler))
+ {
+ PreviousHandlerObj = HandlerObj;
+ HandlerObj = HandlerObj->Notify.Next[i];
+ }
+
+ if (!HandlerObj)
+ {
+ Status = AE_NOT_EXIST;
+ goto UnlockAndExit;
+ }
+
+ /* Remove the handler object from the list */
+
+ if (PreviousHandlerObj) /* Handler is not at the list head */
+ {
+ PreviousHandlerObj->Notify.Next[i] =
+ HandlerObj->Notify.Next[i];
+ }
+ else /* Handler is at the list head */
+ {
+ ObjDesc->CommonNotify.NotifyList[i] =
+ HandlerObj->Notify.Next[i];
+ }
+
+ AcpiUtRemoveReference (HandlerObj);
+ }
+ }
+
+
+UnlockAndExit:
+ (void) AcpiUtReleaseMutex (ACPI_MTX_NAMESPACE);
+ return_ACPI_STATUS (Status);
+}
+
+ACPI_EXPORT_SYMBOL (AcpiRemoveNotifyHandler)
+
+
+/*******************************************************************************
+ *
* FUNCTION: AcpiInstallExceptionHandler
*
* PARAMETERS: Handler - Pointer to the handler function for the
@@ -103,6 +434,7 @@ Cleanup:
ACPI_EXPORT_SYMBOL (AcpiInstallExceptionHandler)
+#if (!ACPI_REDUCED_HARDWARE)
/*******************************************************************************
*
* FUNCTION: AcpiInstallGlobalEventHandler
@@ -307,357 +639,6 @@ ACPI_EXPORT_SYMBOL (AcpiRemoveFixedEventHandler)
/*******************************************************************************
*
- * FUNCTION: AcpiInstallNotifyHandler
- *
- * PARAMETERS: Device - The device for which notifies will be handled
- * HandlerType - The type of handler:
- * ACPI_SYSTEM_NOTIFY: SystemHandler (00-7f)
- * ACPI_DEVICE_NOTIFY: DriverHandler (80-ff)
- * ACPI_ALL_NOTIFY: both system and device
- * Handler - Address of the handler
- * Context - Value passed to the handler on each GPE
- *
- * RETURN: Status
- *
- * DESCRIPTION: Install a handler for notifies on an ACPI device
- *
- ******************************************************************************/
-
-ACPI_STATUS
-AcpiInstallNotifyHandler (
- ACPI_HANDLE Device,
- UINT32 HandlerType,
- ACPI_NOTIFY_HANDLER Handler,
- void *Context)
-{
- ACPI_OPERAND_OBJECT *ObjDesc;
- ACPI_OPERAND_OBJECT *NotifyObj;
- ACPI_NAMESPACE_NODE *Node;
- ACPI_STATUS Status;
-
-
- ACPI_FUNCTION_TRACE (AcpiInstallNotifyHandler);
-
-
- /* Parameter validation */
-
- if ((!Device) ||
- (!Handler) ||
- (HandlerType > ACPI_MAX_NOTIFY_HANDLER_TYPE))
- {
- return_ACPI_STATUS (AE_BAD_PARAMETER);
- }
-
- Status = AcpiUtAcquireMutex (ACPI_MTX_NAMESPACE);
- if (ACPI_FAILURE (Status))
- {
- return_ACPI_STATUS (Status);
- }
-
- /* Convert and validate the device handle */
-
- Node = AcpiNsValidateHandle (Device);
- if (!Node)
- {
- Status = AE_BAD_PARAMETER;
- goto UnlockAndExit;
- }
-
- /*
- * Root Object:
- * Registering a notify handler on the root object indicates that the
- * caller wishes to receive notifications for all objects. Note that
- * only one <external> global handler can be regsitered (per notify type).
- */
- if (Device == ACPI_ROOT_OBJECT)
- {
- /* Make sure the handler is not already installed */
-
- if (((HandlerType & ACPI_SYSTEM_NOTIFY) &&
- AcpiGbl_SystemNotify.Handler) ||
- ((HandlerType & ACPI_DEVICE_NOTIFY) &&
- AcpiGbl_DeviceNotify.Handler))
- {
- Status = AE_ALREADY_EXISTS;
- goto UnlockAndExit;
- }
-
- if (HandlerType & ACPI_SYSTEM_NOTIFY)
- {
- AcpiGbl_SystemNotify.Node = Node;
- AcpiGbl_SystemNotify.Handler = Handler;
- AcpiGbl_SystemNotify.Context = Context;
- }
-
- if (HandlerType & ACPI_DEVICE_NOTIFY)
- {
- AcpiGbl_DeviceNotify.Node = Node;
- AcpiGbl_DeviceNotify.Handler = Handler;
- AcpiGbl_DeviceNotify.Context = Context;
- }
-
- /* Global notify handler installed */
- }
-
- /*
- * All Other Objects:
- * Caller will only receive notifications specific to the target object.
- * Note that only certain object types can receive notifications.
- */
- else
- {
- /* Notifies allowed on this object? */
-
- if (!AcpiEvIsNotifyObject (Node))
- {
- Status = AE_TYPE;
- goto UnlockAndExit;
- }
-
- /* Check for an existing internal object */
-
- ObjDesc = AcpiNsGetAttachedObject (Node);
- if (ObjDesc)
- {
- /* Object exists - make sure there's no handler */
-
- if (((HandlerType & ACPI_SYSTEM_NOTIFY) &&
- ObjDesc->CommonNotify.SystemNotify) ||
- ((HandlerType & ACPI_DEVICE_NOTIFY) &&
- ObjDesc->CommonNotify.DeviceNotify))
- {
- Status = AE_ALREADY_EXISTS;
- goto UnlockAndExit;
- }
- }
- else
- {
- /* Create a new object */
-
- ObjDesc = AcpiUtCreateInternalObject (Node->Type);
- if (!ObjDesc)
- {
- Status = AE_NO_MEMORY;
- goto UnlockAndExit;
- }
-
- /* Attach new object to the Node */
-
- Status = AcpiNsAttachObject (Device, ObjDesc, Node->Type);
-
- /* Remove local reference to the object */
-
- AcpiUtRemoveReference (ObjDesc);
- if (ACPI_FAILURE (Status))
- {
- goto UnlockAndExit;
- }
- }
-
- /* Install the handler */
-
- NotifyObj = AcpiUtCreateInternalObject (ACPI_TYPE_LOCAL_NOTIFY);
- if (!NotifyObj)
- {
- Status = AE_NO_MEMORY;
- goto UnlockAndExit;
- }
-
- NotifyObj->Notify.Node = Node;
- NotifyObj->Notify.Handler = Handler;
- NotifyObj->Notify.Context = Context;
-
- if (HandlerType & ACPI_SYSTEM_NOTIFY)
- {
- ObjDesc->CommonNotify.SystemNotify = NotifyObj;
- }
-
- if (HandlerType & ACPI_DEVICE_NOTIFY)
- {
- ObjDesc->CommonNotify.DeviceNotify = NotifyObj;
- }
-
- if (HandlerType == ACPI_ALL_NOTIFY)
- {
- /* Extra ref if installed in both */
-
- AcpiUtAddReference (NotifyObj);
- }
- }
-
-
-UnlockAndExit:
- (void) AcpiUtReleaseMutex (ACPI_MTX_NAMESPACE);
- return_ACPI_STATUS (Status);
-}
-
-ACPI_EXPORT_SYMBOL (AcpiInstallNotifyHandler)
-
-
-/*******************************************************************************
- *
- * FUNCTION: AcpiRemoveNotifyHandler
- *
- * PARAMETERS: Device - The device for which notifies will be handled
- * HandlerType - The type of handler:
- * ACPI_SYSTEM_NOTIFY: SystemHandler (00-7f)
- * ACPI_DEVICE_NOTIFY: DriverHandler (80-ff)
- * ACPI_ALL_NOTIFY: both system and device
- * Handler - Address of the handler
- *
- * RETURN: Status
- *
- * DESCRIPTION: Remove a handler for notifies on an ACPI device
- *
- ******************************************************************************/
-
-ACPI_STATUS
-AcpiRemoveNotifyHandler (
- ACPI_HANDLE Device,
- UINT32 HandlerType,
- ACPI_NOTIFY_HANDLER Handler)
-{
- ACPI_OPERAND_OBJECT *NotifyObj;
- ACPI_OPERAND_OBJECT *ObjDesc;
- ACPI_NAMESPACE_NODE *Node;
- ACPI_STATUS Status;
-
-
- ACPI_FUNCTION_TRACE (AcpiRemoveNotifyHandler);
-
-
- /* Parameter validation */
-
- if ((!Device) ||
- (!Handler) ||
- (HandlerType > ACPI_MAX_NOTIFY_HANDLER_TYPE))
- {
- return_ACPI_STATUS (AE_BAD_PARAMETER);
- }
-
- Status = AcpiUtAcquireMutex (ACPI_MTX_NAMESPACE);
- if (ACPI_FAILURE (Status))
- {
- return_ACPI_STATUS (Status);
- }
-
- /* Convert and validate the device handle */
-
- Node = AcpiNsValidateHandle (Device);
- if (!Node)
- {
- Status = AE_BAD_PARAMETER;
- goto UnlockAndExit;
- }
-
- /* Root Object */
-
- if (Device == ACPI_ROOT_OBJECT)
- {
- ACPI_DEBUG_PRINT ((ACPI_DB_INFO,
- "Removing notify handler for namespace root object\n"));
-
- if (((HandlerType & ACPI_SYSTEM_NOTIFY) &&
- !AcpiGbl_SystemNotify.Handler) ||
- ((HandlerType & ACPI_DEVICE_NOTIFY) &&
- !AcpiGbl_DeviceNotify.Handler))
- {
- Status = AE_NOT_EXIST;
- goto UnlockAndExit;
- }
-
- if (HandlerType & ACPI_SYSTEM_NOTIFY)
- {
- AcpiGbl_SystemNotify.Node = NULL;
- AcpiGbl_SystemNotify.Handler = NULL;
- AcpiGbl_SystemNotify.Context = NULL;
- }
-
- if (HandlerType & ACPI_DEVICE_NOTIFY)
- {
- AcpiGbl_DeviceNotify.Node = NULL;
- AcpiGbl_DeviceNotify.Handler = NULL;
- AcpiGbl_DeviceNotify.Context = NULL;
- }
- }
-
- /* All Other Objects */
-
- else
- {
- /* Notifies allowed on this object? */
-
- if (!AcpiEvIsNotifyObject (Node))
- {
- Status = AE_TYPE;
- goto UnlockAndExit;
- }
-
- /* Check for an existing internal object */
-
- ObjDesc = AcpiNsGetAttachedObject (Node);
- if (!ObjDesc)
- {
- Status = AE_NOT_EXIST;
- goto UnlockAndExit;
- }
-
- /* Object exists - make sure there's an existing handler */
-
- if (HandlerType & ACPI_SYSTEM_NOTIFY)
- {
- NotifyObj = ObjDesc->CommonNotify.SystemNotify;
- if (!NotifyObj)
- {
- Status = AE_NOT_EXIST;
- goto UnlockAndExit;
- }
-
- if (NotifyObj->Notify.Handler != Handler)
- {
- Status = AE_BAD_PARAMETER;
- goto UnlockAndExit;
- }
-
- /* Remove the handler */
-
- ObjDesc->CommonNotify.SystemNotify = NULL;
- AcpiUtRemoveReference (NotifyObj);
- }
-
- if (HandlerType & ACPI_DEVICE_NOTIFY)
- {
- NotifyObj = ObjDesc->CommonNotify.DeviceNotify;
- if (!NotifyObj)
- {
- Status = AE_NOT_EXIST;
- goto UnlockAndExit;
- }
-
- if (NotifyObj->Notify.Handler != Handler)
- {
- Status = AE_BAD_PARAMETER;
- goto UnlockAndExit;
- }
-
- /* Remove the handler */
-
- ObjDesc->CommonNotify.DeviceNotify = NULL;
- AcpiUtRemoveReference (NotifyObj);
- }
- }
-
-
-UnlockAndExit:
- (void) AcpiUtReleaseMutex (ACPI_MTX_NAMESPACE);
- return_ACPI_STATUS (Status);
-}
-
-ACPI_EXPORT_SYMBOL (AcpiRemoveNotifyHandler)
-
-
-/*******************************************************************************
- *
* FUNCTION: AcpiInstallGpeHandler
*
* PARAMETERS: GpeDevice - Namespace node for the GPE (NULL for FADT
@@ -975,3 +956,4 @@ AcpiReleaseGlobalLock (
ACPI_EXPORT_SYMBOL (AcpiReleaseGlobalLock)
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/events/evxfevnt.c b/usr/src/uts/intel/io/acpica/events/evxfevnt.c
index a813dd3c96..254214af19 100644
--- a/usr/src/uts/intel/io/acpica/events/evxfevnt.c
+++ b/usr/src/uts/intel/io/acpica/events/evxfevnt.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -52,6 +52,7 @@
ACPI_MODULE_NAME ("evxfevnt")
+#if (!ACPI_REDUCED_HARDWARE) /* Entire module */
/*******************************************************************************
*
* FUNCTION: AcpiEnable
@@ -377,4 +378,4 @@ AcpiGetEventStatus (
ACPI_EXPORT_SYMBOL (AcpiGetEventStatus)
-
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/events/evxfgpe.c b/usr/src/uts/intel/io/acpica/events/evxfgpe.c
index 76019969ec..c0dd88c777 100644
--- a/usr/src/uts/intel/io/acpica/events/evxfgpe.c
+++ b/usr/src/uts/intel/io/acpica/events/evxfgpe.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -53,6 +53,7 @@
ACPI_MODULE_NAME ("evxfgpe")
+#if (!ACPI_REDUCED_HARDWARE) /* Entire module */
/*******************************************************************************
*
* FUNCTION: AcpiUpdateAllGpes
@@ -898,3 +899,5 @@ AcpiGetGpeDevice (
}
ACPI_EXPORT_SYMBOL (AcpiGetGpeDevice)
+
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/events/evxfregn.c b/usr/src/uts/intel/io/acpica/events/evxfregn.c
index 4fd637e31f..493df9217d 100644
--- a/usr/src/uts/intel/io/acpica/events/evxfregn.c
+++ b/usr/src/uts/intel/io/acpica/events/evxfregn.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exconfig.c b/usr/src/uts/intel/io/acpica/executer/exconfig.c
index d7677e99ce..9e05497b9f 100644
--- a/usr/src/uts/intel/io/acpica/executer/exconfig.c
+++ b/usr/src/uts/intel/io/acpica/executer/exconfig.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -334,7 +334,7 @@ AcpiExRegionRead (
for (i = 0; i < Length; i++)
{
- Status = AcpiEvAddressSpaceDispatch (ObjDesc, ACPI_READ,
+ Status = AcpiEvAddressSpaceDispatch (ObjDesc, NULL, ACPI_READ,
RegionOffset, 8, &Value);
if (ACPI_FAILURE (Status))
{
diff --git a/usr/src/uts/intel/io/acpica/executer/exconvrt.c b/usr/src/uts/intel/io/acpica/executer/exconvrt.c
index 6894acdec7..e79cbc96e9 100644
--- a/usr/src/uts/intel/io/acpica/executer/exconvrt.c
+++ b/usr/src/uts/intel/io/acpica/executer/exconvrt.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/excreate.c b/usr/src/uts/intel/io/acpica/executer/excreate.c
index fe93e0e940..888ff7d7c4 100644
--- a/usr/src/uts/intel/io/acpica/executer/excreate.c
+++ b/usr/src/uts/intel/io/acpica/executer/excreate.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -284,7 +284,7 @@ Cleanup:
*
* PARAMETERS: AmlStart - Pointer to the region declaration AML
* AmlLength - Max length of the declaration AML
- * RegionSpace - SpaceID for the region
+ * SpaceId - Address space ID for the region
* WalkState - Current state
*
* RETURN: Status
@@ -297,7 +297,7 @@ ACPI_STATUS
AcpiExCreateRegion (
UINT8 *AmlStart,
UINT32 AmlLength,
- UINT8 RegionSpace,
+ UINT8 SpaceId,
ACPI_WALK_STATE *WalkState)
{
ACPI_STATUS Status;
@@ -326,16 +326,18 @@ AcpiExCreateRegion (
* Space ID must be one of the predefined IDs, or in the user-defined
* range
*/
- if ((RegionSpace >= ACPI_NUM_PREDEFINED_REGIONS) &&
- (RegionSpace < ACPI_USER_REGION_BEGIN) &&
- (RegionSpace != ACPI_ADR_SPACE_DATA_TABLE))
+ if (!AcpiIsValidSpaceId (SpaceId))
{
- ACPI_ERROR ((AE_INFO, "Invalid AddressSpace type 0x%X", RegionSpace));
- return_ACPI_STATUS (AE_AML_INVALID_SPACE_ID);
+ /*
+ * Print an error message, but continue. We don't want to abort
+ * a table load for this exception. Instead, if the region is
+ * actually used at runtime, abort the executing method.
+ */
+ ACPI_ERROR ((AE_INFO, "Invalid/unknown Address Space ID: 0x%2.2X", SpaceId));
}
ACPI_DEBUG_PRINT ((ACPI_DB_LOAD, "Region Type - %s (0x%X)\n",
- AcpiUtGetRegionName (RegionSpace), RegionSpace));
+ AcpiUtGetRegionName (SpaceId), SpaceId));
/* Create the region descriptor */
@@ -353,10 +355,18 @@ AcpiExCreateRegion (
RegionObj2 = ObjDesc->Common.NextObject;
RegionObj2->Extra.AmlStart = AmlStart;
RegionObj2->Extra.AmlLength = AmlLength;
+ if (WalkState->ScopeInfo)
+ {
+ RegionObj2->Extra.ScopeNode = WalkState->ScopeInfo->Scope.Node;
+ }
+ else
+ {
+ RegionObj2->Extra.ScopeNode = Node;
+ }
/* Init the region from the operands */
- ObjDesc->Region.SpaceId = RegionSpace;
+ ObjDesc->Region.SpaceId = SpaceId;
ObjDesc->Region.Address = 0;
ObjDesc->Region.Length = 0;
ObjDesc->Region.Node = Node;
diff --git a/usr/src/uts/intel/io/acpica/executer/exdebug.c b/usr/src/uts/intel/io/acpica/executer/exdebug.c
index 62bf810c79..166b2af170 100644
--- a/usr/src/uts/intel/io/acpica/executer/exdebug.c
+++ b/usr/src/uts/intel/io/acpica/executer/exdebug.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exdump.c b/usr/src/uts/intel/io/acpica/executer/exdump.c
index 60003e6154..0943e668c1 100644
--- a/usr/src/uts/intel/io/acpica/executer/exdump.c
+++ b/usr/src/uts/intel/io/acpica/executer/exdump.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -131,8 +131,8 @@ static ACPI_EXDUMP_INFO AcpiExDumpDevice[4] =
{
{ACPI_EXD_INIT, ACPI_EXD_TABLE_SIZE (AcpiExDumpDevice), NULL},
{ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Device.Handler), "Handler"},
- {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Device.SystemNotify), "System Notify"},
- {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Device.DeviceNotify), "Device Notify"}
+ {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Device.NotifyList[0]), "System Notify"},
+ {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Device.NotifyList[1]), "Device Notify"}
};
static ACPI_EXDUMP_INFO AcpiExDumpEvent[2] =
@@ -179,8 +179,8 @@ static ACPI_EXDUMP_INFO AcpiExDumpPower[5] =
{ACPI_EXD_INIT, ACPI_EXD_TABLE_SIZE (AcpiExDumpPower), NULL},
{ACPI_EXD_UINT32, ACPI_EXD_OFFSET (PowerResource.SystemLevel), "System Level"},
{ACPI_EXD_UINT32, ACPI_EXD_OFFSET (PowerResource.ResourceOrder), "Resource Order"},
- {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (PowerResource.SystemNotify), "System Notify"},
- {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (PowerResource.DeviceNotify), "Device Notify"}
+ {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (PowerResource.NotifyList[0]), "System Notify"},
+ {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (PowerResource.NotifyList[1]), "Device Notify"}
};
static ACPI_EXDUMP_INFO AcpiExDumpProcessor[7] =
@@ -189,16 +189,16 @@ static ACPI_EXDUMP_INFO AcpiExDumpProcessor[7] =
{ACPI_EXD_UINT8, ACPI_EXD_OFFSET (Processor.ProcId), "Processor ID"},
{ACPI_EXD_UINT8 , ACPI_EXD_OFFSET (Processor.Length), "Length"},
{ACPI_EXD_ADDRESS, ACPI_EXD_OFFSET (Processor.Address), "Address"},
- {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Processor.SystemNotify), "System Notify"},
- {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Processor.DeviceNotify), "Device Notify"},
+ {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Processor.NotifyList[0]), "System Notify"},
+ {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Processor.NotifyList[1]), "Device Notify"},
{ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Processor.Handler), "Handler"}
};
static ACPI_EXDUMP_INFO AcpiExDumpThermal[4] =
{
{ACPI_EXD_INIT, ACPI_EXD_TABLE_SIZE (AcpiExDumpThermal), NULL},
- {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (ThermalZone.SystemNotify), "System Notify"},
- {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (ThermalZone.DeviceNotify), "Device Notify"},
+ {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (ThermalZone.NotifyList[0]), "System Notify"},
+ {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (ThermalZone.NotifyList[1]), "Device Notify"},
{ACPI_EXD_POINTER, ACPI_EXD_OFFSET (ThermalZone.Handler), "Handler"}
};
@@ -209,11 +209,13 @@ static ACPI_EXDUMP_INFO AcpiExDumpBufferField[3] =
{ACPI_EXD_POINTER, ACPI_EXD_OFFSET (BufferField.BufferObj), "Buffer Object"}
};
-static ACPI_EXDUMP_INFO AcpiExDumpRegionField[3] =
+static ACPI_EXDUMP_INFO AcpiExDumpRegionField[5] =
{
{ACPI_EXD_INIT, ACPI_EXD_TABLE_SIZE (AcpiExDumpRegionField), NULL},
{ACPI_EXD_FIELD, 0, NULL},
- {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Field.RegionObj), "Region Object"}
+ {ACPI_EXD_UINT8, ACPI_EXD_OFFSET (Field.AccessLength), "AccessLength"},
+ {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Field.RegionObj), "Region Object"},
+ {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Field.ResourceBuffer), "ResourceBuffer"}
};
static ACPI_EXDUMP_INFO AcpiExDumpBankField[5] =
@@ -256,11 +258,15 @@ static ACPI_EXDUMP_INFO AcpiExDumpAddressHandler[6] =
{ACPI_EXD_POINTER, ACPI_EXD_OFFSET (AddressSpace.Context), "Context"}
};
-static ACPI_EXDUMP_INFO AcpiExDumpNotify[3] =
+static ACPI_EXDUMP_INFO AcpiExDumpNotify[7] =
{
{ACPI_EXD_INIT, ACPI_EXD_TABLE_SIZE (AcpiExDumpNotify), NULL},
{ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Notify.Node), "Node"},
- {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Notify.Context), "Context"}
+ {ACPI_EXD_UINT32, ACPI_EXD_OFFSET (Notify.HandlerType), "Handler Type"},
+ {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Notify.Handler), "Handler"},
+ {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Notify.Context), "Context"},
+ {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Notify.Next[0]), "Next System Notify"},
+ {ACPI_EXD_POINTER, ACPI_EXD_OFFSET (Notify.Next[1]), "Next Device Notify"}
};
@@ -999,10 +1005,7 @@ AcpiExDumpPackageObj (
case ACPI_TYPE_STRING:
AcpiOsPrintf ("[String] Value: ");
- for (i = 0; i < ObjDesc->String.Length; i++)
- {
- AcpiOsPrintf ("%c", ObjDesc->String.Pointer[i]);
- }
+ AcpiUtPrintString (ObjDesc->String.Pointer, ACPI_UINT8_MAX);
AcpiOsPrintf ("\n");
break;
diff --git a/usr/src/uts/intel/io/acpica/executer/exfield.c b/usr/src/uts/intel/io/acpica/executer/exfield.c
index 098a18650e..a682406826 100644
--- a/usr/src/uts/intel/io/acpica/executer/exfield.c
+++ b/usr/src/uts/intel/io/acpica/executer/exfield.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -113,19 +113,25 @@ AcpiExReadDataFromField (
}
else if ((ObjDesc->Common.Type == ACPI_TYPE_LOCAL_REGION_FIELD) &&
(ObjDesc->Field.RegionObj->Region.SpaceId == ACPI_ADR_SPACE_SMBUS ||
+ ObjDesc->Field.RegionObj->Region.SpaceId == ACPI_ADR_SPACE_GSBUS ||
ObjDesc->Field.RegionObj->Region.SpaceId == ACPI_ADR_SPACE_IPMI))
{
/*
- * This is an SMBus or IPMI read. We must create a buffer to hold
+ * This is an SMBus, GSBus or IPMI read. We must create a buffer to hold
* the data and then directly access the region handler.
*
- * Note: Smbus protocol value is passed in upper 16-bits of Function
+ * Note: SMBus and GSBus protocol value is passed in upper 16-bits of Function
*/
if (ObjDesc->Field.RegionObj->Region.SpaceId == ACPI_ADR_SPACE_SMBUS)
{
Length = ACPI_SMBUS_BUFFER_SIZE;
Function = ACPI_READ | (ObjDesc->Field.Attribute << 16);
}
+ else if (ObjDesc->Field.RegionObj->Region.SpaceId == ACPI_ADR_SPACE_GSBUS)
+ {
+ Length = ACPI_GSBUS_BUFFER_SIZE;
+ Function = ACPI_READ | (ObjDesc->Field.Attribute << 16);
+ }
else /* IPMI */
{
Length = ACPI_IPMI_BUFFER_SIZE;
@@ -274,23 +280,24 @@ AcpiExWriteDataToField (
}
else if ((ObjDesc->Common.Type == ACPI_TYPE_LOCAL_REGION_FIELD) &&
(ObjDesc->Field.RegionObj->Region.SpaceId == ACPI_ADR_SPACE_SMBUS ||
+ ObjDesc->Field.RegionObj->Region.SpaceId == ACPI_ADR_SPACE_GSBUS ||
ObjDesc->Field.RegionObj->Region.SpaceId == ACPI_ADR_SPACE_IPMI))
{
/*
- * This is an SMBus or IPMI write. We will bypass the entire field
+ * This is an SMBus, GSBus or IPMI write. We will bypass the entire field
* mechanism and handoff the buffer directly to the handler. For
* these address spaces, the buffer is bi-directional; on a write,
* return data is returned in the same buffer.
*
* Source must be a buffer of sufficient size:
- * ACPI_SMBUS_BUFFER_SIZE or ACPI_IPMI_BUFFER_SIZE.
+ * ACPI_SMBUS_BUFFER_SIZE, ACPI_GSBUS_BUFFER_SIZE, or ACPI_IPMI_BUFFER_SIZE.
*
- * Note: SMBus protocol type is passed in upper 16-bits of Function
+ * Note: SMBus and GSBus protocol type is passed in upper 16-bits of Function
*/
if (SourceDesc->Common.Type != ACPI_TYPE_BUFFER)
{
ACPI_ERROR ((AE_INFO,
- "SMBus or IPMI write requires Buffer, found type %s",
+ "SMBus/IPMI/GenericSerialBus write requires Buffer, found type %s",
AcpiUtGetObjectTypeName (SourceDesc)));
return_ACPI_STATUS (AE_AML_OPERAND_TYPE);
@@ -301,6 +308,11 @@ AcpiExWriteDataToField (
Length = ACPI_SMBUS_BUFFER_SIZE;
Function = ACPI_WRITE | (ObjDesc->Field.Attribute << 16);
}
+ else if (ObjDesc->Field.RegionObj->Region.SpaceId == ACPI_ADR_SPACE_GSBUS)
+ {
+ Length = ACPI_GSBUS_BUFFER_SIZE;
+ Function = ACPI_WRITE | (ObjDesc->Field.Attribute << 16);
+ }
else /* IPMI */
{
Length = ACPI_IPMI_BUFFER_SIZE;
@@ -310,7 +322,7 @@ AcpiExWriteDataToField (
if (SourceDesc->Buffer.Length < Length)
{
ACPI_ERROR ((AE_INFO,
- "SMBus or IPMI write requires Buffer of length %u, found length %u",
+ "SMBus/IPMI/GenericSerialBus write requires Buffer of length %u, found length %u",
Length, SourceDesc->Buffer.Length));
return_ACPI_STATUS (AE_AML_BUFFER_LIMIT);
diff --git a/usr/src/uts/intel/io/acpica/executer/exfldio.c b/usr/src/uts/intel/io/acpica/executer/exfldio.c
index bf663a2f5e..e138e9a13c 100644
--- a/usr/src/uts/intel/io/acpica/executer/exfldio.c
+++ b/usr/src/uts/intel/io/acpica/executer/exfldio.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -98,6 +98,7 @@ AcpiExSetupRegion (
{
ACPI_STATUS Status = AE_OK;
ACPI_OPERAND_OBJECT *RgnDesc;
+ UINT8 SpaceId;
ACPI_FUNCTION_TRACE_U32 (ExSetupRegion, FieldDatumByteOffset);
@@ -116,6 +117,16 @@ AcpiExSetupRegion (
return_ACPI_STATUS (AE_AML_OPERAND_TYPE);
}
+ SpaceId = RgnDesc->Region.SpaceId;
+
+ /* Validate the Space ID */
+
+ if (!AcpiIsValidSpaceId (SpaceId))
+ {
+ ACPI_ERROR ((AE_INFO, "Invalid/unknown Address Space ID: 0x%2.2X", SpaceId));
+ return_ACPI_STATUS (AE_AML_INVALID_SPACE_ID);
+ }
+
/*
* If the Region Address and Length have not been previously evaluated,
* evaluate them now and save the results.
@@ -130,11 +141,12 @@ AcpiExSetupRegion (
}
/*
- * Exit now for SMBus or IPMI address space, it has a non-linear
+ * Exit now for SMBus, GSBus or IPMI address space, it has a non-linear
* address space and the request cannot be directly validated
*/
- if (RgnDesc->Region.SpaceId == ACPI_ADR_SPACE_SMBUS ||
- RgnDesc->Region.SpaceId == ACPI_ADR_SPACE_IPMI)
+ if (SpaceId == ACPI_ADR_SPACE_SMBUS ||
+ SpaceId == ACPI_ADR_SPACE_GSBUS ||
+ SpaceId == ACPI_ADR_SPACE_IPMI)
{
/* SMBus or IPMI has a non-linear address space */
@@ -290,7 +302,8 @@ AcpiExAccessRegion (
/* Invoke the appropriate AddressSpace/OpRegion handler */
- Status = AcpiEvAddressSpaceDispatch (RgnDesc, Function, RegionOffset,
+ Status = AcpiEvAddressSpaceDispatch (RgnDesc, ObjDesc,
+ Function, RegionOffset,
ACPI_MUL_8 (ObjDesc->CommonField.AccessByteWidth), Value);
if (ACPI_FAILURE (Status))
@@ -353,6 +366,11 @@ AcpiExRegisterOverflow (
* The Value is larger than the maximum value that can fit into
* the register.
*/
+ ACPI_ERROR ((AE_INFO,
+ "Index value 0x%8.8X%8.8X overflows field width 0x%X",
+ ACPI_FORMAT_UINT64 (Value),
+ ObjDesc->CommonField.BitLength));
+
return (TRUE);
}
diff --git a/usr/src/uts/intel/io/acpica/executer/exmisc.c b/usr/src/uts/intel/io/acpica/executer/exmisc.c
index c5dd280fe4..63114f5584 100644
--- a/usr/src/uts/intel/io/acpica/executer/exmisc.c
+++ b/usr/src/uts/intel/io/acpica/executer/exmisc.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exmutex.c b/usr/src/uts/intel/io/acpica/executer/exmutex.c
index 6a893f0e90..4c702b8baa 100644
--- a/usr/src/uts/intel/io/acpica/executer/exmutex.c
+++ b/usr/src/uts/intel/io/acpica/executer/exmutex.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exnames.c b/usr/src/uts/intel/io/acpica/executer/exnames.c
index 2188ec0a2f..c3cb6b0b4c 100644
--- a/usr/src/uts/intel/io/acpica/executer/exnames.c
+++ b/usr/src/uts/intel/io/acpica/executer/exnames.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exoparg1.c b/usr/src/uts/intel/io/acpica/executer/exoparg1.c
index 6ac685c82b..9a6cd9fcab 100644
--- a/usr/src/uts/intel/io/acpica/executer/exoparg1.c
+++ b/usr/src/uts/intel/io/acpica/executer/exoparg1.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exoparg2.c b/usr/src/uts/intel/io/acpica/executer/exoparg2.c
index d131277aae..b76da75c49 100644
--- a/usr/src/uts/intel/io/acpica/executer/exoparg2.c
+++ b/usr/src/uts/intel/io/acpica/executer/exoparg2.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exoparg3.c b/usr/src/uts/intel/io/acpica/executer/exoparg3.c
index 6adb1afac2..0edac68e60 100644
--- a/usr/src/uts/intel/io/acpica/executer/exoparg3.c
+++ b/usr/src/uts/intel/io/acpica/executer/exoparg3.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exoparg6.c b/usr/src/uts/intel/io/acpica/executer/exoparg6.c
index fd6e039c01..bfc6585301 100644
--- a/usr/src/uts/intel/io/acpica/executer/exoparg6.c
+++ b/usr/src/uts/intel/io/acpica/executer/exoparg6.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exprep.c b/usr/src/uts/intel/io/acpica/executer/exprep.c
index ee96a6a090..9ec78275bd 100644
--- a/usr/src/uts/intel/io/acpica/executer/exprep.c
+++ b/usr/src/uts/intel/io/acpica/executer/exprep.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -49,6 +49,7 @@
#include "acinterp.h"
#include "amlcode.h"
#include "acnamesp.h"
+#include "acdispat.h"
#define _COMPONENT ACPI_EXECUTER
@@ -484,6 +485,32 @@ AcpiExPrepFieldValue (
ObjDesc->Field.RegionObj = AcpiNsGetAttachedObject (Info->RegionNode);
+ /* Fields specific to GenericSerialBus fields */
+
+ ObjDesc->Field.AccessLength = Info->AccessLength;
+
+ if (Info->ConnectionNode)
+ {
+ SecondDesc = Info->ConnectionNode->Object;
+ if (!(SecondDesc->Common.Flags & AOPOBJ_DATA_VALID))
+ {
+ Status = AcpiDsGetBufferArguments (SecondDesc);
+ if (ACPI_FAILURE (Status))
+ {
+ AcpiUtDeleteObjectDesc (ObjDesc);
+ return_ACPI_STATUS (Status);
+ }
+ }
+
+ ObjDesc->Field.ResourceBuffer = SecondDesc->Buffer.Pointer;
+ ObjDesc->Field.ResourceLength = (UINT16) SecondDesc->Buffer.Length;
+ }
+ else if (Info->ResourceBuffer)
+ {
+ ObjDesc->Field.ResourceBuffer = Info->ResourceBuffer;
+ ObjDesc->Field.ResourceLength = Info->ResourceLength;
+ }
+
/* Allow full data read from EC address space */
if ((ObjDesc->Field.RegionObj->Region.SpaceId == ACPI_ADR_SPACE_EC) &&
diff --git a/usr/src/uts/intel/io/acpica/executer/exregion.c b/usr/src/uts/intel/io/acpica/executer/exregion.c
index b3b9aa6e66..8a40250794 100644
--- a/usr/src/uts/intel/io/acpica/executer/exregion.c
+++ b/usr/src/uts/intel/io/acpica/executer/exregion.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exresnte.c b/usr/src/uts/intel/io/acpica/executer/exresnte.c
index cf8a55531f..03962edf97 100644
--- a/usr/src/uts/intel/io/acpica/executer/exresnte.c
+++ b/usr/src/uts/intel/io/acpica/executer/exresnte.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exresolv.c b/usr/src/uts/intel/io/acpica/executer/exresolv.c
index 652d95dd54..e92fb2468e 100644
--- a/usr/src/uts/intel/io/acpica/executer/exresolv.c
+++ b/usr/src/uts/intel/io/acpica/executer/exresolv.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exresop.c b/usr/src/uts/intel/io/acpica/executer/exresop.c
index 12c872f99a..1bcba69033 100644
--- a/usr/src/uts/intel/io/acpica/executer/exresop.c
+++ b/usr/src/uts/intel/io/acpica/executer/exresop.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exstore.c b/usr/src/uts/intel/io/acpica/executer/exstore.c
index 8679e790b8..656b649478 100644
--- a/usr/src/uts/intel/io/acpica/executer/exstore.c
+++ b/usr/src/uts/intel/io/acpica/executer/exstore.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exstoren.c b/usr/src/uts/intel/io/acpica/executer/exstoren.c
index 71d8809a2d..464d6b1065 100644
--- a/usr/src/uts/intel/io/acpica/executer/exstoren.c
+++ b/usr/src/uts/intel/io/acpica/executer/exstoren.c
@@ -7,7 +7,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exstorob.c b/usr/src/uts/intel/io/acpica/executer/exstorob.c
index 4dfcf15fe3..e74d5b3485 100644
--- a/usr/src/uts/intel/io/acpica/executer/exstorob.c
+++ b/usr/src/uts/intel/io/acpica/executer/exstorob.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exsystem.c b/usr/src/uts/intel/io/acpica/executer/exsystem.c
index 10d6edc951..37f5b234a3 100644
--- a/usr/src/uts/intel/io/acpica/executer/exsystem.c
+++ b/usr/src/uts/intel/io/acpica/executer/exsystem.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/executer/exutils.c b/usr/src/uts/intel/io/acpica/executer/exutils.c
index 4daa44f527..b5eca064a2 100644
--- a/usr/src/uts/intel/io/acpica/executer/exutils.c
+++ b/usr/src/uts/intel/io/acpica/executer/exutils.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -499,4 +499,34 @@ AcpiExIntegerToString (
}
}
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiIsValidSpaceId
+ *
+ * PARAMETERS: SpaceId - ID to be validated
+ *
+ * RETURN: TRUE if valid/supported ID.
+ *
+ * DESCRIPTION: Validate an operation region SpaceID.
+ *
+ ******************************************************************************/
+
+BOOLEAN
+AcpiIsValidSpaceId (
+ UINT8 SpaceId)
+{
+
+ if ((SpaceId >= ACPI_NUM_PREDEFINED_REGIONS) &&
+ (SpaceId < ACPI_USER_REGION_BEGIN) &&
+ (SpaceId != ACPI_ADR_SPACE_DATA_TABLE) &&
+ (SpaceId != ACPI_ADR_SPACE_FIXED_HARDWARE))
+ {
+ return (FALSE);
+ }
+
+ return (TRUE);
+}
+
+
#endif
diff --git a/usr/src/uts/intel/io/acpica/hardware/hwacpi.c b/usr/src/uts/intel/io/acpica/hardware/hwacpi.c
index 577666100e..d9f16ef9d7 100644
--- a/usr/src/uts/intel/io/acpica/hardware/hwacpi.c
+++ b/usr/src/uts/intel/io/acpica/hardware/hwacpi.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -52,6 +52,7 @@
ACPI_MODULE_NAME ("hwacpi")
+#if (!ACPI_REDUCED_HARDWARE) /* Entire module */
/******************************************************************************
*
* FUNCTION: AcpiHwSetMode
@@ -204,3 +205,5 @@ AcpiHwGetMode (
return_UINT32 (ACPI_SYS_MODE_LEGACY);
}
}
+
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/hardware/hwesleep.c b/usr/src/uts/intel/io/acpica/hardware/hwesleep.c
new file mode 100644
index 0000000000..1587af019c
--- /dev/null
+++ b/usr/src/uts/intel/io/acpica/hardware/hwesleep.c
@@ -0,0 +1,277 @@
+/******************************************************************************
+ *
+ * Name: hwesleep.c - ACPI Hardware Sleep/Wake Support functions for the
+ * extended FADT-V5 sleep registers.
+ *
+ *****************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2012, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * substantially similar to the "NO WARRANTY" disclaimer below
+ * ("Disclaimer") and any redistribution must be conditioned upon
+ * including a substantially similar Disclaimer requirement for further
+ * binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ * of any contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#include "acpi.h"
+#include "accommon.h"
+
+#define _COMPONENT ACPI_HARDWARE
+ ACPI_MODULE_NAME ("hwesleep")
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiHwExecuteSleepMethod
+ *
+ * PARAMETERS: MethodPathname - Pathname of method to execute
+ * IntegerArgument - Argument to pass to the method
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Execute a sleep/wake related method with one integer argument
+ * and no return value.
+ *
+ ******************************************************************************/
+
+void
+AcpiHwExecuteSleepMethod (
+ char *MethodPathname,
+ UINT32 IntegerArgument)
+{
+ ACPI_OBJECT_LIST ArgList;
+ ACPI_OBJECT Arg;
+ ACPI_STATUS Status;
+
+
+ ACPI_FUNCTION_TRACE (HwExecuteSleepMethod);
+
+
+ /* One argument, IntegerArgument; No return value expected */
+
+ ArgList.Count = 1;
+ ArgList.Pointer = &Arg;
+ Arg.Type = ACPI_TYPE_INTEGER;
+ Arg.Integer.Value = (UINT64) IntegerArgument;
+
+ Status = AcpiEvaluateObject (NULL, MethodPathname, &ArgList, NULL);
+ if (ACPI_FAILURE (Status) && Status != AE_NOT_FOUND)
+ {
+ ACPI_EXCEPTION ((AE_INFO, Status, "While executing method %s",
+ MethodPathname));
+ }
+
+ return_VOID;
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiHwExtendedSleep
+ *
+ * PARAMETERS: SleepState - Which sleep state to enter
+ * Flags - ACPI_EXECUTE_GTS to run optional method
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Enter a system sleep state via the extended FADT sleep
+ * registers (V5 FADT).
+ * THIS FUNCTION MUST BE CALLED WITH INTERRUPTS DISABLED
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiHwExtendedSleep (
+ UINT8 SleepState,
+ UINT8 Flags)
+{
+ ACPI_STATUS Status;
+ UINT8 SleepTypeValue;
+ UINT64 SleepStatus;
+
+
+ ACPI_FUNCTION_TRACE (HwExtendedSleep);
+
+
+ /* Extended sleep registers must be valid */
+
+ if (!AcpiGbl_FADT.SleepControl.Address ||
+ !AcpiGbl_FADT.SleepStatus.Address)
+ {
+ return_ACPI_STATUS (AE_NOT_EXIST);
+ }
+
+ /* Clear wake status (WAK_STS) */
+
+ Status = AcpiWrite ((UINT64) ACPI_X_WAKE_STATUS, &AcpiGbl_FADT.SleepStatus);
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+
+ AcpiGbl_SystemAwakeAndRunning = FALSE;
+
+ /* Optionally execute _GTS (Going To Sleep) */
+
+ if (Flags & ACPI_EXECUTE_GTS)
+ {
+ AcpiHwExecuteSleepMethod (METHOD_PATHNAME__GTS, SleepState);
+ }
+
+ /* Flush caches, as per ACPI specification */
+
+ ACPI_FLUSH_CPU_CACHE ();
+
+ /*
+ * Set the SLP_TYP and SLP_EN bits.
+ *
+ * Note: We only use the first value returned by the \_Sx method
+ * (AcpiGbl_SleepTypeA) - As per ACPI specification.
+ */
+ ACPI_DEBUG_PRINT ((ACPI_DB_INIT,
+ "Entering sleep state [S%u]\n", SleepState));
+
+ SleepTypeValue = ((AcpiGbl_SleepTypeA << ACPI_X_SLEEP_TYPE_POSITION) &
+ ACPI_X_SLEEP_TYPE_MASK);
+
+ Status = AcpiWrite ((UINT64) (SleepTypeValue | ACPI_X_SLEEP_ENABLE),
+ &AcpiGbl_FADT.SleepControl);
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+
+ /* Wait for transition back to Working State */
+
+ do
+ {
+ Status = AcpiRead (&SleepStatus, &AcpiGbl_FADT.SleepStatus);
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+
+ } while (!(((UINT8) SleepStatus) & ACPI_X_WAKE_STATUS));
+
+ return_ACPI_STATUS (AE_OK);
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiHwExtendedWakePrep
+ *
+ * PARAMETERS: SleepState - Which sleep state we just exited
+ * Flags - ACPI_EXECUTE_BFS to run optional method
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Perform first part of OS-independent ACPI cleanup after
+ * a sleep. Called with interrupts ENABLED.
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiHwExtendedWakePrep (
+ UINT8 SleepState,
+ UINT8 Flags)
+{
+ ACPI_STATUS Status;
+ UINT8 SleepTypeValue;
+
+
+ ACPI_FUNCTION_TRACE (HwExtendedWakePrep);
+
+
+ Status = AcpiGetSleepTypeData (ACPI_STATE_S0,
+ &AcpiGbl_SleepTypeA, &AcpiGbl_SleepTypeB);
+ if (ACPI_SUCCESS (Status))
+ {
+ SleepTypeValue = ((AcpiGbl_SleepTypeA << ACPI_X_SLEEP_TYPE_POSITION) &
+ ACPI_X_SLEEP_TYPE_MASK);
+
+ (void) AcpiWrite ((UINT64) (SleepTypeValue | ACPI_X_SLEEP_ENABLE),
+ &AcpiGbl_FADT.SleepControl);
+ }
+
+ /* Optionally execute _BFS (Back From Sleep) */
+
+ if (Flags & ACPI_EXECUTE_BFS)
+ {
+ AcpiHwExecuteSleepMethod (METHOD_PATHNAME__BFS, SleepState);
+ }
+ return_ACPI_STATUS (AE_OK);
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiHwExtendedWake
+ *
+ * PARAMETERS: SleepState - Which sleep state we just exited
+ * Flags - Reserved, set to zero
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Perform OS-independent ACPI cleanup after a sleep
+ * Called with interrupts ENABLED.
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiHwExtendedWake (
+ UINT8 SleepState,
+ UINT8 Flags)
+{
+ ACPI_FUNCTION_TRACE (HwExtendedWake);
+
+
+ /* Ensure EnterSleepStatePrep -> EnterSleepState ordering */
+
+ AcpiGbl_SleepTypeA = ACPI_SLEEP_TYPE_INVALID;
+
+ /* Execute the wake methods */
+
+ AcpiHwExecuteSleepMethod (METHOD_PATHNAME__SST, ACPI_SST_WAKING);
+ AcpiHwExecuteSleepMethod (METHOD_PATHNAME__WAK, SleepState);
+
+ /*
+ * Some BIOS code assumes that WAK_STS will be cleared on resume
+ * and use it to determine whether the system is rebooting or
+ * resuming. Clear WAK_STS for compatibility.
+ */
+ (void) AcpiWrite ((UINT64) ACPI_X_WAKE_STATUS, &AcpiGbl_FADT.SleepStatus);
+ AcpiGbl_SystemAwakeAndRunning = TRUE;
+
+ AcpiHwExecuteSleepMethod (METHOD_PATHNAME__SST, ACPI_SST_WORKING);
+ return_ACPI_STATUS (AE_OK);
+}
diff --git a/usr/src/uts/intel/io/acpica/hardware/hwgpe.c b/usr/src/uts/intel/io/acpica/hardware/hwgpe.c
index 335814f942..c94f17be26 100644
--- a/usr/src/uts/intel/io/acpica/hardware/hwgpe.c
+++ b/usr/src/uts/intel/io/acpica/hardware/hwgpe.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -49,6 +49,8 @@
#define _COMPONENT ACPI_HARDWARE
ACPI_MODULE_NAME ("hwgpe")
+#if (!ACPI_REDUCED_HARDWARE) /* Entire module */
+
/* Local prototypes */
static ACPI_STATUS
@@ -538,3 +540,4 @@ AcpiHwEnableAllWakeupGpes (
return_ACPI_STATUS (Status);
}
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/hardware/hwpci.c b/usr/src/uts/intel/io/acpica/hardware/hwpci.c
index 96a2dfcdbe..065e42d95e 100644
--- a/usr/src/uts/intel/io/acpica/hardware/hwpci.c
+++ b/usr/src/uts/intel/io/acpica/hardware/hwpci.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/hardware/hwregs.c b/usr/src/uts/intel/io/acpica/hardware/hwregs.c
index 61e61a1e6b..6d89883179 100644
--- a/usr/src/uts/intel/io/acpica/hardware/hwregs.c
+++ b/usr/src/uts/intel/io/acpica/hardware/hwregs.c
@@ -7,7 +7,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -53,6 +53,8 @@
ACPI_MODULE_NAME ("hwregs")
+#if (!ACPI_REDUCED_HARDWARE)
+
/* Local Prototypes */
static ACPI_STATUS
@@ -67,6 +69,7 @@ AcpiHwWriteMultiple (
ACPI_GENERIC_ADDRESS *RegisterA,
ACPI_GENERIC_ADDRESS *RegisterB);
+#endif /* !ACPI_REDUCED_HARDWARE */
/******************************************************************************
*
@@ -170,6 +173,7 @@ AcpiHwRead (
ACPI_GENERIC_ADDRESS *Reg)
{
UINT64 Address;
+ UINT64 Value64;
ACPI_STATUS Status;
@@ -195,7 +199,9 @@ AcpiHwRead (
if (Reg->SpaceId == ACPI_ADR_SPACE_SYSTEM_MEMORY)
{
Status = AcpiOsReadMemory ((ACPI_PHYSICAL_ADDRESS)
- Address, Value, Reg->BitWidth);
+ Address, &Value64, Reg->BitWidth);
+
+ *Value = (UINT32) Value64;
}
else /* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
{
@@ -254,7 +260,7 @@ AcpiHwWrite (
if (Reg->SpaceId == ACPI_ADR_SPACE_SYSTEM_MEMORY)
{
Status = AcpiOsWriteMemory ((ACPI_PHYSICAL_ADDRESS)
- Address, Value, Reg->BitWidth);
+ Address, (UINT64) Value, Reg->BitWidth);
}
else /* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
{
@@ -271,6 +277,7 @@ AcpiHwWrite (
}
+#if (!ACPI_REDUCED_HARDWARE)
/*******************************************************************************
*
* FUNCTION: AcpiHwClearAcpiStatus
@@ -321,7 +328,7 @@ UnlockAndExit:
/*******************************************************************************
*
- * FUNCTION: AcpiHwGetRegisterBitMask
+ * FUNCTION: AcpiHwGetBitRegisterInfo
*
* PARAMETERS: RegisterId - Index of ACPI Register to access
*
@@ -731,3 +738,4 @@ AcpiHwWriteMultiple (
return (Status);
}
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/hardware/hwsleep.c b/usr/src/uts/intel/io/acpica/hardware/hwsleep.c
index 1c8ca2440f..1953ff846e 100644
--- a/usr/src/uts/intel/io/acpica/hardware/hwsleep.c
+++ b/usr/src/uts/intel/io/acpica/hardware/hwsleep.c
@@ -1,12 +1,12 @@
-
/******************************************************************************
*
- * Name: hwsleep.c - ACPI Hardware Sleep/Wake Interface
+ * Name: hwsleep.c - ACPI Hardware Sleep/Wake Support functions for the
+ * original/legacy sleep/PM registers.
*
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -49,210 +49,38 @@
ACPI_MODULE_NAME ("hwsleep")
+#if (!ACPI_REDUCED_HARDWARE) /* Entire module */
/*******************************************************************************
*
- * FUNCTION: AcpiSetFirmwareWakingVector
- *
- * PARAMETERS: PhysicalAddress - 32-bit physical address of ACPI real mode
- * entry point.
- *
- * RETURN: Status
- *
- * DESCRIPTION: Sets the 32-bit FirmwareWakingVector field of the FACS
- *
- ******************************************************************************/
-
-ACPI_STATUS
-AcpiSetFirmwareWakingVector (
- UINT32 PhysicalAddress)
-{
- ACPI_FUNCTION_TRACE (AcpiSetFirmwareWakingVector);
-
-
- /* Set the 32-bit vector */
-
- AcpiGbl_FACS->FirmwareWakingVector = PhysicalAddress;
-
- /* Clear the 64-bit vector if it exists */
-
- if ((AcpiGbl_FACS->Length > 32) && (AcpiGbl_FACS->Version >= 1))
- {
- AcpiGbl_FACS->XFirmwareWakingVector = 0;
- }
-
- return_ACPI_STATUS (AE_OK);
-}
-
-ACPI_EXPORT_SYMBOL (AcpiSetFirmwareWakingVector)
-
-
-#if ACPI_MACHINE_WIDTH == 64
-/*******************************************************************************
- *
- * FUNCTION: AcpiSetFirmwareWakingVector64
- *
- * PARAMETERS: PhysicalAddress - 64-bit physical address of ACPI protected
- * mode entry point.
- *
- * RETURN: Status
- *
- * DESCRIPTION: Sets the 64-bit X_FirmwareWakingVector field of the FACS, if
- * it exists in the table. This function is intended for use with
- * 64-bit host operating systems.
- *
- ******************************************************************************/
-
-ACPI_STATUS
-AcpiSetFirmwareWakingVector64 (
- UINT64 PhysicalAddress)
-{
- ACPI_FUNCTION_TRACE (AcpiSetFirmwareWakingVector64);
-
-
- /* Determine if the 64-bit vector actually exists */
-
- if ((AcpiGbl_FACS->Length <= 32) || (AcpiGbl_FACS->Version < 1))
- {
- return_ACPI_STATUS (AE_NOT_EXIST);
- }
-
- /* Clear 32-bit vector, set the 64-bit X_ vector */
-
- AcpiGbl_FACS->FirmwareWakingVector = 0;
- AcpiGbl_FACS->XFirmwareWakingVector = PhysicalAddress;
- return_ACPI_STATUS (AE_OK);
-}
-
-ACPI_EXPORT_SYMBOL (AcpiSetFirmwareWakingVector64)
-#endif
-
-/*******************************************************************************
- *
- * FUNCTION: AcpiEnterSleepStatePrep
+ * FUNCTION: AcpiHwLegacySleep
*
* PARAMETERS: SleepState - Which sleep state to enter
+ * Flags - ACPI_EXECUTE_GTS to run optional method
*
* RETURN: Status
*
- * DESCRIPTION: Prepare to enter a system sleep state (see ACPI 2.0 spec p 231)
- * This function must execute with interrupts enabled.
- * We break sleeping into 2 stages so that OSPM can handle
- * various OS-specific tasks between the two steps.
- *
- ******************************************************************************/
-
-ACPI_STATUS
-AcpiEnterSleepStatePrep (
- UINT8 SleepState)
-{
- ACPI_STATUS Status;
- ACPI_OBJECT_LIST ArgList;
- ACPI_OBJECT Arg;
-
-
- ACPI_FUNCTION_TRACE (AcpiEnterSleepStatePrep);
-
-
- /* _PSW methods could be run here to enable wake-on keyboard, LAN, etc. */
-
- Status = AcpiGetSleepTypeData (SleepState,
- &AcpiGbl_SleepTypeA, &AcpiGbl_SleepTypeB);
- if (ACPI_FAILURE (Status))
- {
- return_ACPI_STATUS (Status);
- }
-
- /* Execute the _PTS method (Prepare To Sleep) */
-
- ArgList.Count = 1;
- ArgList.Pointer = &Arg;
- Arg.Type = ACPI_TYPE_INTEGER;
- Arg.Integer.Value = SleepState;
-
- Status = AcpiEvaluateObject (NULL, METHOD_NAME__PTS, &ArgList, NULL);
- if (ACPI_FAILURE (Status) && Status != AE_NOT_FOUND)
- {
- return_ACPI_STATUS (Status);
- }
-
- /* Setup the argument to the _SST method (System STatus) */
-
- switch (SleepState)
- {
- case ACPI_STATE_S0:
- Arg.Integer.Value = ACPI_SST_WORKING;
- break;
-
- case ACPI_STATE_S1:
- case ACPI_STATE_S2:
- case ACPI_STATE_S3:
- Arg.Integer.Value = ACPI_SST_SLEEPING;
- break;
-
- case ACPI_STATE_S4:
- Arg.Integer.Value = ACPI_SST_SLEEP_CONTEXT;
- break;
-
- default:
- Arg.Integer.Value = ACPI_SST_INDICATOR_OFF; /* Default is off */
- break;
- }
-
- /*
- * Set the system indicators to show the desired sleep state.
- * _SST is an optional method (return no error if not found)
- */
- Status = AcpiEvaluateObject (NULL, METHOD_NAME__SST, &ArgList, NULL);
- if (ACPI_FAILURE (Status) && Status != AE_NOT_FOUND)
- {
- ACPI_EXCEPTION ((AE_INFO, Status, "While executing method _SST"));
- }
-
- return_ACPI_STATUS (AE_OK);
-}
-
-ACPI_EXPORT_SYMBOL (AcpiEnterSleepStatePrep)
-
-
-/*******************************************************************************
- *
- * FUNCTION: AcpiEnterSleepState
- *
- * PARAMETERS: SleepState - Which sleep state to enter
- *
- * RETURN: Status
- *
- * DESCRIPTION: Enter a system sleep state
+ * DESCRIPTION: Enter a system sleep state via the legacy FADT PM registers
* THIS FUNCTION MUST BE CALLED WITH INTERRUPTS DISABLED
*
******************************************************************************/
ACPI_STATUS
-AcpiEnterSleepState (
- UINT8 SleepState)
+AcpiHwLegacySleep (
+ UINT8 SleepState,
+ UINT8 Flags)
{
- UINT32 Pm1aControl;
- UINT32 Pm1bControl;
ACPI_BIT_REGISTER_INFO *SleepTypeRegInfo;
ACPI_BIT_REGISTER_INFO *SleepEnableRegInfo;
+ UINT32 Pm1aControl;
+ UINT32 Pm1bControl;
UINT32 InValue;
- ACPI_OBJECT_LIST ArgList;
- ACPI_OBJECT Arg;
ACPI_STATUS Status;
- ACPI_FUNCTION_TRACE (AcpiEnterSleepState);
+ ACPI_FUNCTION_TRACE (HwLegacySleep);
- if ((AcpiGbl_SleepTypeA > ACPI_SLEEP_TYPE_MAX) ||
- (AcpiGbl_SleepTypeB > ACPI_SLEEP_TYPE_MAX))
- {
- ACPI_ERROR ((AE_INFO, "Sleep values out of range: A=0x%X B=0x%X",
- AcpiGbl_SleepTypeA, AcpiGbl_SleepTypeB));
- return_ACPI_STATUS (AE_AML_OPERAND_VALUE);
- }
-
- SleepTypeRegInfo = AcpiHwGetBitRegisterInfo (ACPI_BITREG_SLEEP_TYPE);
+ SleepTypeRegInfo = AcpiHwGetBitRegisterInfo (ACPI_BITREG_SLEEP_TYPE);
SleepEnableRegInfo = AcpiHwGetBitRegisterInfo (ACPI_BITREG_SLEEP_ENABLE);
/* Clear wake status */
@@ -302,17 +130,11 @@ AcpiEnterSleepState (
return_ACPI_STATUS (Status);
}
- /* Execute the _GTS method (Going To Sleep) */
-
- ArgList.Count = 1;
- ArgList.Pointer = &Arg;
- Arg.Type = ACPI_TYPE_INTEGER;
- Arg.Integer.Value = SleepState;
+ /* Optionally execute _GTS (Going To Sleep) */
- Status = AcpiEvaluateObject (NULL, METHOD_NAME__GTS, &ArgList, NULL);
- if (ACPI_FAILURE (Status) && Status != AE_NOT_FOUND)
+ if (Flags & ACPI_EXECUTE_GTS)
{
- return_ACPI_STATUS (Status);
+ AcpiHwExecuteSleepMethod (METHOD_PATHNAME__GTS, SleepState);
}
/* Get current value of PM1A control */
@@ -390,7 +212,7 @@ AcpiEnterSleepState (
}
}
- /* Wait until we enter sleep state */
+ /* Wait for transition back to Working State */
do
{
@@ -400,110 +222,32 @@ AcpiEnterSleepState (
return_ACPI_STATUS (Status);
}
- /* Spin until we wake */
-
} while (!InValue);
return_ACPI_STATUS (AE_OK);
}
-ACPI_EXPORT_SYMBOL (AcpiEnterSleepState)
-
/*******************************************************************************
*
- * FUNCTION: AcpiEnterSleepStateS4bios
- *
- * PARAMETERS: None
- *
- * RETURN: Status
- *
- * DESCRIPTION: Perform a S4 bios request.
- * THIS FUNCTION MUST BE CALLED WITH INTERRUPTS DISABLED
- *
- ******************************************************************************/
-
-ACPI_STATUS
-AcpiEnterSleepStateS4bios (
- void)
-{
- UINT32 InValue;
- ACPI_STATUS Status;
-
-
- ACPI_FUNCTION_TRACE (AcpiEnterSleepStateS4bios);
-
-
- /* Clear the wake status bit (PM1) */
-
- Status = AcpiWriteBitRegister (ACPI_BITREG_WAKE_STATUS, ACPI_CLEAR_STATUS);
- if (ACPI_FAILURE (Status))
- {
- return_ACPI_STATUS (Status);
- }
-
- Status = AcpiHwClearAcpiStatus ();
- if (ACPI_FAILURE (Status))
- {
- return_ACPI_STATUS (Status);
- }
-
- /*
- * 1) Disable/Clear all GPEs
- * 2) Enable all wakeup GPEs
- */
- Status = AcpiHwDisableAllGpes ();
- if (ACPI_FAILURE (Status))
- {
- return_ACPI_STATUS (Status);
- }
- AcpiGbl_SystemAwakeAndRunning = FALSE;
-
- Status = AcpiHwEnableAllWakeupGpes ();
- if (ACPI_FAILURE (Status))
- {
- return_ACPI_STATUS (Status);
- }
-
- ACPI_FLUSH_CPU_CACHE ();
-
- Status = AcpiHwWritePort (AcpiGbl_FADT.SmiCommand,
- (UINT32) AcpiGbl_FADT.S4BiosRequest, 8);
-
- do {
- AcpiOsStall(1000);
- Status = AcpiReadBitRegister (ACPI_BITREG_WAKE_STATUS, &InValue);
- if (ACPI_FAILURE (Status))
- {
- return_ACPI_STATUS (Status);
- }
- } while (!InValue);
-
- return_ACPI_STATUS (AE_OK);
-}
-
-ACPI_EXPORT_SYMBOL (AcpiEnterSleepStateS4bios)
-
-
-/*******************************************************************************
- *
- * FUNCTION: AcpiLeaveSleepState
+ * FUNCTION: AcpiHwLegacyWakePrep
*
* PARAMETERS: SleepState - Which sleep state we just exited
+ * Flags - ACPI_EXECUTE_BFS to run optional method
*
* RETURN: Status
*
- * DESCRIPTION: Perform OS-independent ACPI cleanup after a sleep
+ * DESCRIPTION: Perform the first state of OS-independent ACPI cleanup after a
+ * sleep.
* Called with interrupts ENABLED.
*
******************************************************************************/
ACPI_STATUS
-AcpiLeaveSleepState (
- UINT8 SleepState)
+AcpiHwLegacyWakePrep (
+ UINT8 SleepState,
+ UINT8 Flags)
{
- ACPI_OBJECT_LIST ArgList;
- ACPI_OBJECT Arg;
ACPI_STATUS Status;
ACPI_BIT_REGISTER_INFO *SleepTypeRegInfo;
ACPI_BIT_REGISTER_INFO *SleepEnableRegInfo;
@@ -511,8 +255,7 @@ AcpiLeaveSleepState (
UINT32 Pm1bControl;
- ACPI_FUNCTION_TRACE (AcpiLeaveSleepState);
-
+ ACPI_FUNCTION_TRACE (HwLegacyWakePrep);
/*
* Set SLP_TYPE and SLP_EN to state S0.
@@ -553,40 +296,50 @@ AcpiLeaveSleepState (
}
}
- /* Ensure EnterSleepStatePrep -> EnterSleepState ordering */
+ /* Optionally execute _BFS (Back From Sleep) */
- AcpiGbl_SleepTypeA = ACPI_SLEEP_TYPE_INVALID;
+ if (Flags & ACPI_EXECUTE_BFS)
+ {
+ AcpiHwExecuteSleepMethod (METHOD_PATHNAME__BFS, SleepState);
+ }
+ return_ACPI_STATUS (Status);
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiHwLegacyWake
+ *
+ * PARAMETERS: SleepState - Which sleep state we just exited
+ * Flags - Reserved, set to zero
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Perform OS-independent ACPI cleanup after a sleep
+ * Called with interrupts ENABLED.
+ *
+ ******************************************************************************/
- /* Setup parameter object */
+ACPI_STATUS
+AcpiHwLegacyWake (
+ UINT8 SleepState,
+ UINT8 Flags)
+{
+ ACPI_STATUS Status;
- ArgList.Count = 1;
- ArgList.Pointer = &Arg;
- Arg.Type = ACPI_TYPE_INTEGER;
- /* Ignore any errors from these methods */
+ ACPI_FUNCTION_TRACE (HwLegacyWake);
- Arg.Integer.Value = ACPI_SST_WAKING;
- Status = AcpiEvaluateObject (NULL, METHOD_NAME__SST, &ArgList, NULL);
- if (ACPI_FAILURE (Status) && Status != AE_NOT_FOUND)
- {
- ACPI_EXCEPTION ((AE_INFO, Status, "During Method _SST"));
- }
- Arg.Integer.Value = SleepState;
- Status = AcpiEvaluateObject (NULL, METHOD_NAME__BFS, &ArgList, NULL);
- if (ACPI_FAILURE (Status) && Status != AE_NOT_FOUND)
- {
- ACPI_EXCEPTION ((AE_INFO, Status, "During Method _BFS"));
- }
+ /* Ensure EnterSleepStatePrep -> EnterSleepState ordering */
- Status = AcpiEvaluateObject (NULL, METHOD_NAME__WAK, &ArgList, NULL);
- if (ACPI_FAILURE (Status) && Status != AE_NOT_FOUND)
- {
- ACPI_EXCEPTION ((AE_INFO, Status, "During Method _WAK"));
- }
- /* TBD: _WAK "sometimes" returns stuff - do we want to look at it? */
+ AcpiGbl_SleepTypeA = ACPI_SLEEP_TYPE_INVALID;
+ AcpiHwExecuteSleepMethod (METHOD_PATHNAME__SST, ACPI_SST_WAKING);
/*
+ * GPEs must be enabled before _WAK is called as GPEs
+ * might get fired there
+ *
* Restore the GPEs:
* 1) Disable/Clear all GPEs
* 2) Enable all runtime GPEs
@@ -596,7 +349,6 @@ AcpiLeaveSleepState (
{
return_ACPI_STATUS (Status);
}
- AcpiGbl_SystemAwakeAndRunning = TRUE;
Status = AcpiHwEnableAllRuntimeGpes ();
if (ACPI_FAILURE (Status))
@@ -604,6 +356,20 @@ AcpiLeaveSleepState (
return_ACPI_STATUS (Status);
}
+ /*
+ * Now we can execute _WAK, etc. Some machines require that the GPEs
+ * are enabled before the wake methods are executed.
+ */
+ AcpiHwExecuteSleepMethod (METHOD_PATHNAME__WAK, SleepState);
+
+ /*
+ * Some BIOS code assumes that WAK_STS will be cleared on resume
+ * and use it to determine whether the system is rebooting or
+ * resuming. Clear WAK_STS for compatibility.
+ */
+ (void) AcpiWriteBitRegister (ACPI_BITREG_WAKE_STATUS, ACPI_CLEAR_STATUS);
+ AcpiGbl_SystemAwakeAndRunning = TRUE;
+
/* Enable power button */
(void) AcpiWriteBitRegister(
@@ -625,15 +391,8 @@ AcpiLeaveSleepState (
return_ACPI_STATUS (Status);
}
- Arg.Integer.Value = ACPI_SST_WORKING;
- Status = AcpiEvaluateObject (NULL, METHOD_NAME__SST, &ArgList, NULL);
- if (ACPI_FAILURE (Status) && Status != AE_NOT_FOUND)
- {
- ACPI_EXCEPTION ((AE_INFO, Status, "During Method _SST"));
- }
-
+ AcpiHwExecuteSleepMethod (METHOD_PATHNAME__SST, ACPI_SST_WORKING);
return_ACPI_STATUS (Status);
}
-ACPI_EXPORT_SYMBOL (AcpiLeaveSleepState)
-
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/hardware/hwtimer.c b/usr/src/uts/intel/io/acpica/hardware/hwtimer.c
index faf6770283..15dc58f0bf 100644
--- a/usr/src/uts/intel/io/acpica/hardware/hwtimer.c
+++ b/usr/src/uts/intel/io/acpica/hardware/hwtimer.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -49,6 +49,7 @@
ACPI_MODULE_NAME ("hwtimer")
+#if (!ACPI_REDUCED_HARDWARE) /* Entire module */
/******************************************************************************
*
* FUNCTION: AcpiGetTimerResolution
@@ -214,3 +215,4 @@ AcpiGetTimerDuration (
ACPI_EXPORT_SYMBOL (AcpiGetTimerDuration)
+#endif /* !ACPI_REDUCED_HARDWARE */
diff --git a/usr/src/uts/intel/io/acpica/hardware/hwvalid.c b/usr/src/uts/intel/io/acpica/hardware/hwvalid.c
index 869c9f18fd..4462434bbf 100644
--- a/usr/src/uts/intel/io/acpica/hardware/hwvalid.c
+++ b/usr/src/uts/intel/io/acpica/hardware/hwvalid.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -149,6 +149,8 @@ AcpiHwValidateIoRequest (
(BitWidth != 16) &&
(BitWidth != 32))
{
+ ACPI_ERROR ((AE_INFO,
+ "Bad BitWidth parameter: %8.8X", BitWidth));
return (AE_BAD_PARAMETER);
}
diff --git a/usr/src/uts/intel/io/acpica/hardware/hwxface.c b/usr/src/uts/intel/io/acpica/hardware/hwxface.c
index f26c511ec8..3fec9b566f 100644
--- a/usr/src/uts/intel/io/acpica/hardware/hwxface.c
+++ b/usr/src/uts/intel/io/acpica/hardware/hwxface.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -154,12 +154,6 @@ AcpiRead (
return (Status);
}
- Width = Reg->BitWidth;
- if (Width == 64)
- {
- Width = 32; /* Break into two 32-bit transfers */
- }
-
/* Initialize entire 64-bit return value to zero */
*ReturnValue = 0;
@@ -172,28 +166,20 @@ AcpiRead (
if (Reg->SpaceId == ACPI_ADR_SPACE_SYSTEM_MEMORY)
{
Status = AcpiOsReadMemory ((ACPI_PHYSICAL_ADDRESS)
- Address, &Value, Width);
+ Address, ReturnValue, Reg->BitWidth);
if (ACPI_FAILURE (Status))
{
return (Status);
}
- *ReturnValue = Value;
-
- if (Reg->BitWidth == 64)
- {
- /* Read the top 32 bits */
-
- Status = AcpiOsReadMemory ((ACPI_PHYSICAL_ADDRESS)
- (Address + 4), &Value, 32);
- if (ACPI_FAILURE (Status))
- {
- return (Status);
- }
- *ReturnValue |= ((UINT64) Value << 32);
- }
}
else /* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
{
+ Width = Reg->BitWidth;
+ if (Width == 64)
+ {
+ Width = 32; /* Break into two 32-bit transfers */
+ }
+
Status = AcpiHwReadPort ((ACPI_IO_ADDRESS)
Address, &Value, Width);
if (ACPI_FAILURE (Status))
@@ -262,12 +248,6 @@ AcpiWrite (
return (Status);
}
- Width = Reg->BitWidth;
- if (Width == 64)
- {
- Width = 32; /* Break into two 32-bit transfers */
- }
-
/*
* Two address spaces supported: Memory or IO. PCI_Config is
* not supported here because the GAS structure is insufficient
@@ -275,24 +255,20 @@ AcpiWrite (
if (Reg->SpaceId == ACPI_ADR_SPACE_SYSTEM_MEMORY)
{
Status = AcpiOsWriteMemory ((ACPI_PHYSICAL_ADDRESS)
- Address, ACPI_LODWORD (Value), Width);
+ Address, Value, Reg->BitWidth);
if (ACPI_FAILURE (Status))
{
return (Status);
}
-
- if (Reg->BitWidth == 64)
- {
- Status = AcpiOsWriteMemory ((ACPI_PHYSICAL_ADDRESS)
- (Address + 4), ACPI_HIDWORD (Value), 32);
- if (ACPI_FAILURE (Status))
- {
- return (Status);
- }
- }
}
else /* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
{
+ Width = Reg->BitWidth;
+ if (Width == 64)
+ {
+ Width = 32; /* Break into two 32-bit transfers */
+ }
+
Status = AcpiHwWritePort ((ACPI_IO_ADDRESS)
Address, ACPI_LODWORD (Value), Width);
if (ACPI_FAILURE (Status))
@@ -323,6 +299,7 @@ AcpiWrite (
ACPI_EXPORT_SYMBOL (AcpiWrite)
+#if (!ACPI_REDUCED_HARDWARE)
/*******************************************************************************
*
* FUNCTION: AcpiReadBitRegister
@@ -505,6 +482,8 @@ UnlockAndExit:
ACPI_EXPORT_SYMBOL (AcpiWriteBitRegister)
+#endif /* !ACPI_REDUCED_HARDWARE */
+
/*******************************************************************************
*
diff --git a/usr/src/uts/intel/io/acpica/hardware/hwxfsleep.c b/usr/src/uts/intel/io/acpica/hardware/hwxfsleep.c
new file mode 100644
index 0000000000..b7cbbc813e
--- /dev/null
+++ b/usr/src/uts/intel/io/acpica/hardware/hwxfsleep.c
@@ -0,0 +1,478 @@
+/******************************************************************************
+ *
+ * Name: hwxfsleep.c - ACPI Hardware Sleep/Wake External Interfaces
+ *
+ *****************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2012, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * substantially similar to the "NO WARRANTY" disclaimer below
+ * ("Disclaimer") and any redistribution must be conditioned upon
+ * including a substantially similar Disclaimer requirement for further
+ * binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ * of any contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#include "acpi.h"
+#include "accommon.h"
+
+#define _COMPONENT ACPI_HARDWARE
+ ACPI_MODULE_NAME ("hwxfsleep")
+
+/* Local prototypes */
+
+static ACPI_STATUS
+AcpiHwSleepDispatch (
+ UINT8 SleepState,
+ UINT8 Flags,
+ UINT32 FunctionId);
+
+/*
+ * Dispatch table used to efficiently branch to the various sleep
+ * functions.
+ */
+#define ACPI_SLEEP_FUNCTION_ID 0
+#define ACPI_WAKE_PREP_FUNCTION_ID 1
+#define ACPI_WAKE_FUNCTION_ID 2
+
+/* Legacy functions are optional, based upon ACPI_REDUCED_HARDWARE */
+
+static ACPI_SLEEP_FUNCTIONS AcpiSleepDispatch[] =
+{
+ {ACPI_HW_OPTIONAL_FUNCTION (AcpiHwLegacySleep), AcpiHwExtendedSleep},
+ {ACPI_HW_OPTIONAL_FUNCTION (AcpiHwLegacyWakePrep), AcpiHwExtendedWakePrep},
+ {ACPI_HW_OPTIONAL_FUNCTION (AcpiHwLegacyWake), AcpiHwExtendedWake}
+};
+
+
+/*
+ * These functions are removed for the ACPI_REDUCED_HARDWARE case:
+ * AcpiSetFirmwareWakingVector
+ * AcpiSetFirmwareWakingVector64
+ * AcpiEnterSleepStateS4bios
+ */
+
+#if (!ACPI_REDUCED_HARDWARE)
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiSetFirmwareWakingVector
+ *
+ * PARAMETERS: PhysicalAddress - 32-bit physical address of ACPI real mode
+ * entry point.
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Sets the 32-bit FirmwareWakingVector field of the FACS
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiSetFirmwareWakingVector (
+ UINT32 PhysicalAddress)
+{
+ ACPI_FUNCTION_TRACE (AcpiSetFirmwareWakingVector);
+
+
+ /* Set the 32-bit vector */
+
+ AcpiGbl_FACS->FirmwareWakingVector = PhysicalAddress;
+
+ /* Clear the 64-bit vector if it exists */
+
+ if ((AcpiGbl_FACS->Length > 32) && (AcpiGbl_FACS->Version >= 1))
+ {
+ AcpiGbl_FACS->XFirmwareWakingVector = 0;
+ }
+
+ return_ACPI_STATUS (AE_OK);
+}
+
+ACPI_EXPORT_SYMBOL (AcpiSetFirmwareWakingVector)
+
+
+#if ACPI_MACHINE_WIDTH == 64
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiSetFirmwareWakingVector64
+ *
+ * PARAMETERS: PhysicalAddress - 64-bit physical address of ACPI protected
+ * mode entry point.
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Sets the 64-bit X_FirmwareWakingVector field of the FACS, if
+ * it exists in the table. This function is intended for use with
+ * 64-bit host operating systems.
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiSetFirmwareWakingVector64 (
+ UINT64 PhysicalAddress)
+{
+ ACPI_FUNCTION_TRACE (AcpiSetFirmwareWakingVector64);
+
+
+ /* Determine if the 64-bit vector actually exists */
+
+ if ((AcpiGbl_FACS->Length <= 32) || (AcpiGbl_FACS->Version < 1))
+ {
+ return_ACPI_STATUS (AE_NOT_EXIST);
+ }
+
+ /* Clear 32-bit vector, set the 64-bit X_ vector */
+
+ AcpiGbl_FACS->FirmwareWakingVector = 0;
+ AcpiGbl_FACS->XFirmwareWakingVector = PhysicalAddress;
+ return_ACPI_STATUS (AE_OK);
+}
+
+ACPI_EXPORT_SYMBOL (AcpiSetFirmwareWakingVector64)
+#endif
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiEnterSleepStateS4bios
+ *
+ * PARAMETERS: None
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Perform a S4 bios request.
+ * THIS FUNCTION MUST BE CALLED WITH INTERRUPTS DISABLED
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiEnterSleepStateS4bios (
+ void)
+{
+ UINT32 InValue;
+ ACPI_STATUS Status;
+
+
+ ACPI_FUNCTION_TRACE (AcpiEnterSleepStateS4bios);
+
+
+ /* Clear the wake status bit (PM1) */
+
+ Status = AcpiWriteBitRegister (ACPI_BITREG_WAKE_STATUS, ACPI_CLEAR_STATUS);
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+
+ Status = AcpiHwClearAcpiStatus ();
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+
+ /*
+ * 1) Disable/Clear all GPEs
+ * 2) Enable all wakeup GPEs
+ */
+ Status = AcpiHwDisableAllGpes ();
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+ AcpiGbl_SystemAwakeAndRunning = FALSE;
+
+ Status = AcpiHwEnableAllWakeupGpes ();
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+
+ ACPI_FLUSH_CPU_CACHE ();
+
+ Status = AcpiHwWritePort (AcpiGbl_FADT.SmiCommand,
+ (UINT32) AcpiGbl_FADT.S4BiosRequest, 8);
+
+ do {
+ AcpiOsStall(1000);
+ Status = AcpiReadBitRegister (ACPI_BITREG_WAKE_STATUS, &InValue);
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+ } while (!InValue);
+
+ return_ACPI_STATUS (AE_OK);
+}
+
+ACPI_EXPORT_SYMBOL (AcpiEnterSleepStateS4bios)
+
+#endif /* !ACPI_REDUCED_HARDWARE */
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiHwSleepDispatch
+ *
+ * PARAMETERS: SleepState - Which sleep state to enter/exit
+ * FunctionId - Sleep, WakePrep, or Wake
+ *
+ * RETURN: Status from the invoked sleep handling function.
+ *
+ * DESCRIPTION: Dispatch a sleep/wake request to the appropriate handling
+ * function.
+ *
+ ******************************************************************************/
+
+static ACPI_STATUS
+AcpiHwSleepDispatch (
+ UINT8 SleepState,
+ UINT8 Flags,
+ UINT32 FunctionId)
+{
+ ACPI_STATUS Status;
+ ACPI_SLEEP_FUNCTIONS *SleepFunctions = &AcpiSleepDispatch[FunctionId];
+
+
+#if (!ACPI_REDUCED_HARDWARE)
+
+ /*
+ * If the Hardware Reduced flag is set (from the FADT), we must
+ * use the extended sleep registers
+ */
+ if (AcpiGbl_ReducedHardware ||
+ AcpiGbl_FADT.SleepControl.Address)
+ {
+ Status = SleepFunctions->ExtendedFunction (SleepState, Flags);
+ }
+ else
+ {
+ /* Legacy sleep */
+
+ Status = SleepFunctions->LegacyFunction (SleepState, Flags);
+ }
+
+ return (Status);
+
+#else
+ /*
+ * For the case where reduced-hardware-only code is being generated,
+ * we know that only the extended sleep registers are available
+ */
+ Status = SleepFunctions->ExtendedFunction (SleepState, Flags);
+ return (Status);
+
+#endif /* !ACPI_REDUCED_HARDWARE */
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiEnterSleepStatePrep
+ *
+ * PARAMETERS: SleepState - Which sleep state to enter
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Prepare to enter a system sleep state.
+ * This function must execute with interrupts enabled.
+ * We break sleeping into 2 stages so that OSPM can handle
+ * various OS-specific tasks between the two steps.
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiEnterSleepStatePrep (
+ UINT8 SleepState)
+{
+ ACPI_STATUS Status;
+ ACPI_OBJECT_LIST ArgList;
+ ACPI_OBJECT Arg;
+ UINT32 SstValue;
+
+
+ ACPI_FUNCTION_TRACE (AcpiEnterSleepStatePrep);
+
+
+ Status = AcpiGetSleepTypeData (SleepState,
+ &AcpiGbl_SleepTypeA, &AcpiGbl_SleepTypeB);
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+
+ /* Execute the _PTS method (Prepare To Sleep) */
+
+ ArgList.Count = 1;
+ ArgList.Pointer = &Arg;
+ Arg.Type = ACPI_TYPE_INTEGER;
+ Arg.Integer.Value = SleepState;
+
+ Status = AcpiEvaluateObject (NULL, METHOD_PATHNAME__PTS, &ArgList, NULL);
+ if (ACPI_FAILURE (Status) && Status != AE_NOT_FOUND)
+ {
+ return_ACPI_STATUS (Status);
+ }
+
+ /* Setup the argument to the _SST method (System STatus) */
+
+ switch (SleepState)
+ {
+ case ACPI_STATE_S0:
+ SstValue = ACPI_SST_WORKING;
+ break;
+
+ case ACPI_STATE_S1:
+ case ACPI_STATE_S2:
+ case ACPI_STATE_S3:
+ SstValue = ACPI_SST_SLEEPING;
+ break;
+
+ case ACPI_STATE_S4:
+ SstValue = ACPI_SST_SLEEP_CONTEXT;
+ break;
+
+ default:
+ SstValue = ACPI_SST_INDICATOR_OFF; /* Default is off */
+ break;
+ }
+
+ /*
+ * Set the system indicators to show the desired sleep state.
+ * _SST is an optional method (return no error if not found)
+ */
+ AcpiHwExecuteSleepMethod (METHOD_PATHNAME__SST, SstValue);
+ return_ACPI_STATUS (AE_OK);
+}
+
+ACPI_EXPORT_SYMBOL (AcpiEnterSleepStatePrep)
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiEnterSleepState
+ *
+ * PARAMETERS: SleepState - Which sleep state to enter
+ * Flags - ACPI_EXECUTE_GTS to run optional method
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Enter a system sleep state
+ * THIS FUNCTION MUST BE CALLED WITH INTERRUPTS DISABLED
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiEnterSleepState (
+ UINT8 SleepState,
+ UINT8 Flags)
+{
+ ACPI_STATUS Status;
+
+
+ ACPI_FUNCTION_TRACE (AcpiEnterSleepState);
+
+
+ if ((AcpiGbl_SleepTypeA > ACPI_SLEEP_TYPE_MAX) ||
+ (AcpiGbl_SleepTypeB > ACPI_SLEEP_TYPE_MAX))
+ {
+ ACPI_ERROR ((AE_INFO, "Sleep values out of range: A=0x%X B=0x%X",
+ AcpiGbl_SleepTypeA, AcpiGbl_SleepTypeB));
+ return_ACPI_STATUS (AE_AML_OPERAND_VALUE);
+ }
+
+ Status = AcpiHwSleepDispatch (SleepState, Flags, ACPI_SLEEP_FUNCTION_ID);
+ return_ACPI_STATUS (Status);
+}
+
+ACPI_EXPORT_SYMBOL (AcpiEnterSleepState)
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiLeaveSleepStatePrep
+ *
+ * PARAMETERS: SleepState - Which sleep state we are exiting
+ * Flags - ACPI_EXECUTE_BFS to run optional method
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Perform the first state of OS-independent ACPI cleanup after a
+ * sleep. Called with interrupts DISABLED.
+ * We break wake/resume into 2 stages so that OSPM can handle
+ * various OS-specific tasks between the two steps.
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiLeaveSleepStatePrep (
+ UINT8 SleepState,
+ UINT8 Flags)
+{
+ ACPI_STATUS Status;
+
+
+ ACPI_FUNCTION_TRACE (AcpiLeaveSleepStatePrep);
+
+
+ Status = AcpiHwSleepDispatch (SleepState, Flags, ACPI_WAKE_PREP_FUNCTION_ID);
+ return_ACPI_STATUS (Status);
+}
+
+ACPI_EXPORT_SYMBOL (AcpiLeaveSleepStatePrep)
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiLeaveSleepState
+ *
+ * PARAMETERS: SleepState - Which sleep state we are exiting
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Perform OS-independent ACPI cleanup after a sleep
+ * Called with interrupts ENABLED.
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiLeaveSleepState (
+ UINT8 SleepState)
+{
+ ACPI_STATUS Status;
+
+
+ ACPI_FUNCTION_TRACE (AcpiLeaveSleepState);
+
+
+ Status = AcpiHwSleepDispatch (SleepState, 0, ACPI_WAKE_FUNCTION_ID);
+ return_ACPI_STATUS (Status);
+}
+
+ACPI_EXPORT_SYMBOL (AcpiLeaveSleepState)
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsaccess.c b/usr/src/uts/intel/io/acpica/namespace/nsaccess.c
index 1bb2b4b6e6..9dfa76fb08 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsaccess.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsaccess.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsalloc.c b/usr/src/uts/intel/io/acpica/namespace/nsalloc.c
index ac12ba4faf..c7e83e0eb4 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsalloc.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsalloc.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsdump.c b/usr/src/uts/intel/io/acpica/namespace/nsdump.c
index 58137abbc5..6b7c505779 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsdump.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsdump.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -270,7 +270,21 @@ AcpiNsDumpOneObject (
if (!ObjDesc)
{
- /* No attached object, we are done */
+ /* No attached object. Some types should always have an object */
+
+ switch (Type)
+ {
+ case ACPI_TYPE_INTEGER:
+ case ACPI_TYPE_PACKAGE:
+ case ACPI_TYPE_BUFFER:
+ case ACPI_TYPE_STRING:
+ case ACPI_TYPE_METHOD:
+ AcpiOsPrintf ("<No attached object>");
+ break;
+
+ default:
+ break;
+ }
AcpiOsPrintf ("\n");
return (AE_OK);
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsdumpdv.c b/usr/src/uts/intel/io/acpica/namespace/nsdumpdv.c
index 11aab7a3a9..9d30886608 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsdumpdv.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsdumpdv.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -141,7 +141,7 @@ AcpiNsDumpRootDevices (
return;
}
- Status = AcpiGetHandle (NULL, ACPI_NS_SYSTEM_BUS, &SysBusHandle);
+ Status = AcpiGetHandle (NULL, METHOD_NAME__SB_, &SysBusHandle);
if (ACPI_FAILURE (Status))
{
return;
diff --git a/usr/src/uts/intel/io/acpica/namespace/nseval.c b/usr/src/uts/intel/io/acpica/namespace/nseval.c
index c376ac7213..0183f7e774 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nseval.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nseval.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsinit.c b/usr/src/uts/intel/io/acpica/namespace/nsinit.c
index 1822bf519f..f3907aafb3 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsinit.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsinit.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsload.c b/usr/src/uts/intel/io/acpica/namespace/nsload.c
index 5825f6bc9d..3ef33f9ce9 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsload.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsload.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsnames.c b/usr/src/uts/intel/io/acpica/namespace/nsnames.c
index e59b2e9569..f70900da1d 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsnames.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsnames.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsobject.c b/usr/src/uts/intel/io/acpica/namespace/nsobject.c
index dadd2e8d98..9b219a5624 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsobject.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsobject.c
@@ -6,7 +6,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsparse.c b/usr/src/uts/intel/io/acpica/namespace/nsparse.c
index 983697eaaf..2c768c0b69 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsparse.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsparse.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/namespace/nspredef.c b/usr/src/uts/intel/io/acpica/namespace/nspredef.c
index 95c3edf04d..3a61aa3666 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nspredef.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nspredef.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -216,14 +216,20 @@ AcpiNsCheckPredefinedNames (
}
/*
- * 1) We have a return value, but if one wasn't expected, just exit, this is
- * not a problem. For example, if the "Implicit Return" feature is
- * enabled, methods will always return a value.
+ * Return value validation and possible repair.
*
- * 2) If the return value can be of any type, then we cannot perform any
- * validation, exit.
+ * 1) Don't perform return value validation/repair if this feature
+ * has been disabled via a global option.
+ *
+ * 2) We have a return value, but if one wasn't expected, just exit,
+ * this is not a problem. For example, if the "Implicit Return"
+ * feature is enabled, methods will always return a value.
+ *
+ * 3) If the return value can be of any type, then we cannot perform
+ * any validation, just exit.
*/
- if ((!Predefined->Info.ExpectedBtypes) ||
+ if (AcpiGbl_DisableAutoRepair ||
+ (!Predefined->Info.ExpectedBtypes) ||
(Predefined->Info.ExpectedBtypes == ACPI_RTYPE_ALL))
{
goto Cleanup;
@@ -237,6 +243,7 @@ AcpiNsCheckPredefinedNames (
goto Cleanup;
}
Data->Predefined = Predefined;
+ Data->Node = Node;
Data->NodeFlags = Node->Flags;
Data->Pathname = Pathname;
@@ -658,6 +665,7 @@ AcpiNsCheckPackage (
case ACPI_PTYPE2_FIXED:
case ACPI_PTYPE2_MIN:
case ACPI_PTYPE2_COUNT:
+ case ACPI_PTYPE2_FIX_VAR:
/*
* These types all return a single Package that consists of a
@@ -673,7 +681,7 @@ AcpiNsCheckPackage (
{
/* Create the new outer package and populate it */
- Status = AcpiNsRepairPackageList (Data, ReturnObjectPtr);
+ Status = AcpiNsWrapWithPackage (Data, *Elements, ReturnObjectPtr);
if (ACPI_FAILURE (Status))
{
return (Status);
@@ -800,6 +808,29 @@ AcpiNsCheckPackageList (
break;
+ case ACPI_PTYPE2_FIX_VAR:
+ /*
+ * Each subpackage has a fixed number of elements and an
+ * optional element
+ */
+ ExpectedCount = Package->RetInfo.Count1 + Package->RetInfo.Count2;
+ if (SubPackage->Package.Count < ExpectedCount)
+ {
+ goto PackageTooSmall;
+ }
+
+ Status = AcpiNsCheckPackageElements (Data, SubElements,
+ Package->RetInfo.ObjectType1,
+ Package->RetInfo.Count1,
+ Package->RetInfo.ObjectType2,
+ SubPackage->Package.Count - Package->RetInfo.Count1, 0);
+ if (ACPI_FAILURE (Status))
+ {
+ return (Status);
+ }
+ break;
+
+
case ACPI_PTYPE2_FIXED:
/* Each sub-package has a fixed length */
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsrepair.c b/usr/src/uts/intel/io/acpica/namespace/nsrepair.c
index be73953ded..be9d3e889d 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsrepair.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsrepair.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -74,11 +74,10 @@
* Buffer -> String
* Buffer -> Package of Integers
* Package -> Package of one Package
+ * An incorrect standalone object is wrapped with required outer package
*
* Additional possible repairs:
- *
* Required package elements that are NULL replaced by Integer/String/Buffer
- * Incorrect standalone package wrapped with required outer package
*
******************************************************************************/
@@ -100,11 +99,6 @@ AcpiNsConvertToBuffer (
ACPI_OPERAND_OBJECT *OriginalObject,
ACPI_OPERAND_OBJECT **ReturnObject);
-static ACPI_STATUS
-AcpiNsConvertToPackage (
- ACPI_OPERAND_OBJECT *OriginalObject,
- ACPI_OPERAND_OBJECT **ReturnObject);
-
/*******************************************************************************
*
@@ -172,10 +166,24 @@ AcpiNsRepairObject (
}
if (ExpectedBtypes & ACPI_RTYPE_PACKAGE)
{
- Status = AcpiNsConvertToPackage (ReturnObject, &NewObject);
+ /*
+ * A package is expected. We will wrap the existing object with a
+ * new package object. It is often the case that if a variable-length
+ * package is required, but there is only a single object needed, the
+ * BIOS will return that object instead of wrapping it with a Package
+ * object. Note: after the wrapping, the package will be validated
+ * for correct contents (expected object type or types).
+ */
+ Status = AcpiNsWrapWithPackage (Data, ReturnObject, &NewObject);
if (ACPI_SUCCESS (Status))
{
- goto ObjectRepaired;
+ /*
+ * The original object just had its reference count
+ * incremented for being inserted into the new package.
+ */
+ *ReturnObjectPtr = NewObject; /* New Package object */
+ Data->Flags |= ACPI_OBJECT_REPAIRED;
+ return (AE_OK);
}
}
@@ -188,24 +196,30 @@ ObjectRepaired:
/* Object was successfully repaired */
- /*
- * If the original object is a package element, we need to:
- * 1. Set the reference count of the new object to match the
- * reference count of the old object.
- * 2. Decrement the reference count of the original object.
- */
if (PackageIndex != ACPI_NOT_PACKAGE_ELEMENT)
{
- NewObject->Common.ReferenceCount =
- ReturnObject->Common.ReferenceCount;
-
- if (ReturnObject->Common.ReferenceCount > 1)
+ /*
+ * The original object is a package element. We need to
+ * decrement the reference count of the original object,
+ * for removing it from the package.
+ *
+ * However, if the original object was just wrapped with a
+ * package object as part of the repair, we don't need to
+ * change the reference count.
+ */
+ if (!(Data->Flags & ACPI_OBJECT_WRAPPED))
{
- ReturnObject->Common.ReferenceCount--;
+ NewObject->Common.ReferenceCount =
+ ReturnObject->Common.ReferenceCount;
+
+ if (ReturnObject->Common.ReferenceCount > 1)
+ {
+ ReturnObject->Common.ReferenceCount--;
+ }
}
ACPI_DEBUG_PRINT ((ACPI_DB_REPAIR,
- "%s: Converted %s to expected %s at index %u\n",
+ "%s: Converted %s to expected %s at Package index %u\n",
Data->Pathname, AcpiUtGetObjectTypeName (ReturnObject),
AcpiUtGetObjectTypeName (NewObject), PackageIndex));
}
@@ -498,71 +512,6 @@ AcpiNsConvertToBuffer (
/*******************************************************************************
*
- * FUNCTION: AcpiNsConvertToPackage
- *
- * PARAMETERS: OriginalObject - Object to be converted
- * ReturnObject - Where the new converted object is returned
- *
- * RETURN: Status. AE_OK if conversion was successful.
- *
- * DESCRIPTION: Attempt to convert a Buffer object to a Package. Each byte of
- * the buffer is converted to a single integer package element.
- *
- ******************************************************************************/
-
-static ACPI_STATUS
-AcpiNsConvertToPackage (
- ACPI_OPERAND_OBJECT *OriginalObject,
- ACPI_OPERAND_OBJECT **ReturnObject)
-{
- ACPI_OPERAND_OBJECT *NewObject;
- ACPI_OPERAND_OBJECT **Elements;
- UINT32 Length;
- UINT8 *Buffer;
-
-
- switch (OriginalObject->Common.Type)
- {
- case ACPI_TYPE_BUFFER:
-
- /* Buffer-to-Package conversion */
-
- Length = OriginalObject->Buffer.Length;
- NewObject = AcpiUtCreatePackageObject (Length);
- if (!NewObject)
- {
- return (AE_NO_MEMORY);
- }
-
- /* Convert each buffer byte to an integer package element */
-
- Elements = NewObject->Package.Elements;
- Buffer = OriginalObject->Buffer.Pointer;
-
- while (Length--)
- {
- *Elements = AcpiUtCreateIntegerObject ((UINT64) *Buffer);
- if (!*Elements)
- {
- AcpiUtRemoveReference (NewObject);
- return (AE_NO_MEMORY);
- }
- Elements++;
- Buffer++;
- }
- break;
-
- default:
- return (AE_AML_OPERAND_TYPE);
- }
-
- *ReturnObject = NewObject;
- return (AE_OK);
-}
-
-
-/*******************************************************************************
- *
* FUNCTION: AcpiNsRepairNullElement
*
* PARAMETERS: Data - Pointer to validation data structure
@@ -696,6 +645,7 @@ AcpiNsRemoveNullElements (
case ACPI_PTYPE2_FIXED:
case ACPI_PTYPE2_MIN:
case ACPI_PTYPE2_REV_FIXED:
+ case ACPI_PTYPE2_FIX_VAR:
break;
default:
@@ -744,42 +694,43 @@ AcpiNsRemoveNullElements (
/*******************************************************************************
*
- * FUNCTION: AcpiNsRepairPackageList
+ * FUNCTION: AcpiNsWrapWithPackage
*
* PARAMETERS: Data - Pointer to validation data structure
- * ObjDescPtr - Pointer to the object to repair. The new
- * package object is returned here,
- * overwriting the old object.
+ * OriginalObject - Pointer to the object to repair.
+ * ObjDescPtr - The new package object is returned here
*
* RETURN: Status, new object in *ObjDescPtr
*
- * DESCRIPTION: Repair a common problem with objects that are defined to return
- * a variable-length Package of Packages. If the variable-length
- * is one, some BIOS code mistakenly simply declares a single
- * Package instead of a Package with one sub-Package. This
- * function attempts to repair this error by wrapping a Package
- * object around the original Package, creating the correct
- * Package with one sub-Package.
+ * DESCRIPTION: Repair a common problem with objects that are defined to
+ * return a variable-length Package of sub-objects. If there is
+ * only one sub-object, some BIOS code mistakenly simply declares
+ * the single object instead of a Package with one sub-object.
+ * This function attempts to repair this error by wrapping a
+ * Package object around the original object, creating the
+ * correct and expected Package with one sub-object.
*
* Names that can be repaired in this manner include:
- * _ALR, _CSD, _HPX, _MLS, _PRT, _PSS, _TRT, TSS
+ * _ALR, _CSD, _HPX, _MLS, _PLD, _PRT, _PSS, _TRT, _TSS,
+ * _BCL, _DOD, _FIX, _Sx
*
******************************************************************************/
ACPI_STATUS
-AcpiNsRepairPackageList (
+AcpiNsWrapWithPackage (
ACPI_PREDEFINED_DATA *Data,
+ ACPI_OPERAND_OBJECT *OriginalObject,
ACPI_OPERAND_OBJECT **ObjDescPtr)
{
ACPI_OPERAND_OBJECT *PkgObjDesc;
- ACPI_FUNCTION_NAME (NsRepairPackageList);
+ ACPI_FUNCTION_NAME (NsWrapWithPackage);
/*
* Create the new outer package and populate it. The new package will
- * have a single element, the lone subpackage.
+ * have a single element, the lone sub-object.
*/
PkgObjDesc = AcpiUtCreatePackageObject (1);
if (!PkgObjDesc)
@@ -787,15 +738,15 @@ AcpiNsRepairPackageList (
return (AE_NO_MEMORY);
}
- PkgObjDesc->Package.Elements[0] = *ObjDescPtr;
+ PkgObjDesc->Package.Elements[0] = OriginalObject;
+
+ ACPI_DEBUG_PRINT ((ACPI_DB_REPAIR,
+ "%s: Wrapped %s with expected Package object\n",
+ Data->Pathname, AcpiUtGetObjectTypeName (OriginalObject)));
/* Return the new object in the object pointer */
*ObjDescPtr = PkgObjDesc;
- Data->Flags |= ACPI_OBJECT_REPAIRED;
-
- ACPI_DEBUG_PRINT ((ACPI_DB_REPAIR,
- "%s: Repaired incorrectly formed Package\n", Data->Pathname));
-
+ Data->Flags |= ACPI_OBJECT_REPAIRED | ACPI_OBJECT_WRAPPED;
return (AE_OK);
}
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsrepair2.c b/usr/src/uts/intel/io/acpica/namespace/nsrepair2.c
index 4c9c6d7825..a6a4d6874d 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsrepair2.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsrepair2.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -516,11 +516,12 @@ AcpiNsRepair_HID (
}
/*
- * Copy and uppercase the string. From the ACPI specification:
+ * Copy and uppercase the string. From the ACPI 5.0 specification:
*
* A valid PNP ID must be of the form "AAA####" where A is an uppercase
* letter and # is a hex digit. A valid ACPI ID must be of the form
- * "ACPI####" where # is a hex digit.
+ * "NNNN####" where N is an uppercase letter or decimal digit, and
+ * # is a hex digit.
*/
for (Dest = NewString->String.Pointer; *Source; Dest++, Source++)
{
@@ -555,8 +556,23 @@ AcpiNsRepair_TSS (
{
ACPI_OPERAND_OBJECT *ReturnObject = *ReturnObjectPtr;
ACPI_STATUS Status;
+ ACPI_NAMESPACE_NODE *Node;
+ /*
+ * We can only sort the _TSS return package if there is no _PSS in the
+ * same scope. This is because if _PSS is present, the ACPI specification
+ * dictates that the _TSS Power Dissipation field is to be ignored, and
+ * therefore some BIOSs leave garbage values in the _TSS Power field(s).
+ * In this case, it is best to just return the _TSS package as-is.
+ * (May, 2011)
+ */
+ Status = AcpiNsGetNode (Data->Node, "^_PSS", ACPI_NS_NO_UPSEARCH, &Node);
+ if (ACPI_SUCCESS (Status))
+ {
+ return (AE_OK);
+ }
+
Status = AcpiNsCheckSortedList (Data, ReturnObject, 5, 1,
ACPI_SORT_DESCENDING, "PowerDissipation");
diff --git a/usr/src/uts/intel/io/acpica/namespace/nssearch.c b/usr/src/uts/intel/io/acpica/namespace/nssearch.c
index 1951c2548b..d6f59ccbd1 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nssearch.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nssearch.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsutils.c b/usr/src/uts/intel/io/acpica/namespace/nsutils.c
index fad2a57c5e..43f734e819 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsutils.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsutils.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -405,7 +405,7 @@ AcpiNsBuildInternalName (
if (!AcpiNsValidPathSeparator (*ExternalName) &&
(*ExternalName != 0))
{
- return_ACPI_STATUS (AE_BAD_PARAMETER);
+ return_ACPI_STATUS (AE_BAD_PATHNAME);
}
/* Move on the next segment */
diff --git a/usr/src/uts/intel/io/acpica/namespace/nswalk.c b/usr/src/uts/intel/io/acpica/namespace/nswalk.c
index d44775212a..35c3e17896 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nswalk.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nswalk.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsxfeval.c b/usr/src/uts/intel/io/acpica/namespace/nsxfeval.c
index 0854cac9e9..5006588e85 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsxfeval.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsxfeval.c
@@ -6,7 +6,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsxfname.c b/usr/src/uts/intel/io/acpica/namespace/nsxfname.c
index b0ccff5bdc..4690d09e90 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsxfname.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsxfname.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/namespace/nsxfobj.c b/usr/src/uts/intel/io/acpica/namespace/nsxfobj.c
index 9910248220..02f7777a39 100644
--- a/usr/src/uts/intel/io/acpica/namespace/nsxfobj.c
+++ b/usr/src/uts/intel/io/acpica/namespace/nsxfobj.c
@@ -6,7 +6,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/osl.c b/usr/src/uts/intel/io/acpica/osl.c
index d5bfab754f..44f8d2f48f 100644
--- a/usr/src/uts/intel/io/acpica/osl.c
+++ b/usr/src/uts/intel/io/acpica/osl.c
@@ -22,7 +22,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2011 Joyent, Inc. All rights reserved.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*/
/*
* Copyright (c) 2009-2010, Intel Corporation.
@@ -315,6 +315,12 @@ AcpiOsTableOverride(ACPI_TABLE_HEADER *ExistingTable,
return (AE_OK);
}
+ACPI_STATUS
+AcpiOsPhysicalTableOverride(ACPI_TABLE_HEADER *ExistingTable,
+ ACPI_PHYSICAL_ADDRESS *NewAddress, UINT32 *NewTableLength)
+{
+ return (AE_SUPPORT);
+}
/*
* ACPI semaphore implementation
@@ -883,7 +889,7 @@ AcpiOsWritePort(ACPI_IO_ADDRESS Address, UINT32 Value, UINT32 Width)
static void
-osl_rw_memory(ACPI_PHYSICAL_ADDRESS Address, UINT32 *Value,
+osl_rw_memory(ACPI_PHYSICAL_ADDRESS Address, UINT64 *Value,
UINT32 Width, int write)
{
size_t maplen = Width / 8;
@@ -902,6 +908,9 @@ osl_rw_memory(ACPI_PHYSICAL_ADDRESS Address, UINT32 *Value,
case 4:
OSL_RW(ptr, Value, uint32_t, write);
break;
+ case 8:
+ OSL_RW(ptr, Value, uint64_t, write);
+ break;
default:
cmn_err(CE_WARN, "!osl_rw_memory: invalid size %d",
Width);
@@ -913,7 +922,7 @@ osl_rw_memory(ACPI_PHYSICAL_ADDRESS Address, UINT32 *Value,
ACPI_STATUS
AcpiOsReadMemory(ACPI_PHYSICAL_ADDRESS Address,
- UINT32 *Value, UINT32 Width)
+ UINT64 *Value, UINT32 Width)
{
osl_rw_memory(Address, Value, Width, 0);
return (AE_OK);
@@ -921,7 +930,7 @@ AcpiOsReadMemory(ACPI_PHYSICAL_ADDRESS Address,
ACPI_STATUS
AcpiOsWriteMemory(ACPI_PHYSICAL_ADDRESS Address,
- UINT32 Value, UINT32 Width)
+ UINT64 Value, UINT32 Width)
{
osl_rw_memory(Address, &Value, Width, 1);
return (AE_OK);
diff --git a/usr/src/uts/intel/io/acpica/parser/psargs.c b/usr/src/uts/intel/io/acpica/parser/psargs.c
index e383def11e..8140fdbb9d 100644
--- a/usr/src/uts/intel/io/acpica/parser/psargs.c
+++ b/usr/src/uts/intel/io/acpica/parser/psargs.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -531,37 +531,57 @@ static ACPI_PARSE_OBJECT *
AcpiPsGetNextField (
ACPI_PARSE_STATE *ParserState)
{
- UINT32 AmlOffset = (UINT32)
- ACPI_PTR_DIFF (ParserState->Aml,
- ParserState->AmlStart);
+ UINT32 AmlOffset;
ACPI_PARSE_OBJECT *Field;
+ ACPI_PARSE_OBJECT *Arg = NULL;
UINT16 Opcode;
UINT32 Name;
+ UINT8 AccessType;
+ UINT8 AccessAttribute;
+ UINT8 AccessLength;
+ UINT32 PkgLength;
+ UINT8 *PkgEnd;
+ UINT32 BufferLength;
ACPI_FUNCTION_TRACE (PsGetNextField);
+ AmlOffset = (UINT32) ACPI_PTR_DIFF (
+ ParserState->Aml, ParserState->AmlStart);
+
/* Determine field type */
switch (ACPI_GET8 (ParserState->Aml))
{
- default:
+ case AML_FIELD_OFFSET_OP:
- Opcode = AML_INT_NAMEDFIELD_OP;
+ Opcode = AML_INT_RESERVEDFIELD_OP;
+ ParserState->Aml++;
break;
- case 0x00:
+ case AML_FIELD_ACCESS_OP:
- Opcode = AML_INT_RESERVEDFIELD_OP;
+ Opcode = AML_INT_ACCESSFIELD_OP;
ParserState->Aml++;
break;
- case 0x01:
+ case AML_FIELD_CONNECTION_OP:
- Opcode = AML_INT_ACCESSFIELD_OP;
+ Opcode = AML_INT_CONNECTION_OP;
+ ParserState->Aml++;
+ break;
+
+ case AML_FIELD_EXT_ACCESS_OP:
+
+ Opcode = AML_INT_EXTACCESSFIELD_OP;
ParserState->Aml++;
break;
+
+ default:
+
+ Opcode = AML_INT_NAMEDFIELD_OP;
+ break;
}
/* Allocate a new field op */
@@ -601,17 +621,118 @@ AcpiPsGetNextField (
case AML_INT_ACCESSFIELD_OP:
+ case AML_INT_EXTACCESSFIELD_OP:
/*
* Get AccessType and AccessAttrib and merge into the field Op
- * AccessType is first operand, AccessAttribute is second
+ * AccessType is first operand, AccessAttribute is second. stuff
+ * these bytes into the node integer value for convenience.
*/
- Field->Common.Value.Integer = (((UINT32) ACPI_GET8 (ParserState->Aml) << 8));
+
+ /* Get the two bytes (Type/Attribute) */
+
+ AccessType = ACPI_GET8 (ParserState->Aml);
ParserState->Aml++;
- Field->Common.Value.Integer |= ACPI_GET8 (ParserState->Aml);
+ AccessAttribute = ACPI_GET8 (ParserState->Aml);
ParserState->Aml++;
+
+ Field->Common.Value.Integer = (UINT8) AccessType;
+ Field->Common.Value.Integer |= (UINT16) (AccessAttribute << 8);
+
+ /* This opcode has a third byte, AccessLength */
+
+ if (Opcode == AML_INT_EXTACCESSFIELD_OP)
+ {
+ AccessLength = ACPI_GET8 (ParserState->Aml);
+ ParserState->Aml++;
+
+ Field->Common.Value.Integer |= (UINT32) (AccessLength << 16);
+ }
break;
+
+ case AML_INT_CONNECTION_OP:
+
+ /*
+ * Argument for Connection operator can be either a Buffer
+ * (resource descriptor), or a NameString.
+ */
+ if (ACPI_GET8 (ParserState->Aml) == AML_BUFFER_OP)
+ {
+ ParserState->Aml++;
+
+ PkgEnd = ParserState->Aml;
+ PkgLength = AcpiPsGetNextPackageLength (ParserState);
+ PkgEnd += PkgLength;
+
+ if (ParserState->Aml < PkgEnd)
+ {
+ /* Non-empty list */
+
+ Arg = AcpiPsAllocOp (AML_INT_BYTELIST_OP);
+ if (!Arg)
+ {
+ AcpiPsFreeOp (Field);
+ return_PTR (NULL);
+ }
+
+ /* Get the actual buffer length argument */
+
+ Opcode = ACPI_GET8 (ParserState->Aml);
+ ParserState->Aml++;
+
+ switch (Opcode)
+ {
+ case AML_BYTE_OP: /* AML_BYTEDATA_ARG */
+ BufferLength = ACPI_GET8 (ParserState->Aml);
+ ParserState->Aml += 1;
+ break;
+
+ case AML_WORD_OP: /* AML_WORDDATA_ARG */
+ BufferLength = ACPI_GET16 (ParserState->Aml);
+ ParserState->Aml += 2;
+ break;
+
+ case AML_DWORD_OP: /* AML_DWORDATA_ARG */
+ BufferLength = ACPI_GET32 (ParserState->Aml);
+ ParserState->Aml += 4;
+ break;
+
+ default:
+ BufferLength = 0;
+ break;
+ }
+
+ /* Fill in bytelist data */
+
+ Arg->Named.Value.Size = BufferLength;
+ Arg->Named.Data = ParserState->Aml;
+ }
+
+ /* Skip to End of byte data */
+
+ ParserState->Aml = PkgEnd;
+ }
+ else
+ {
+ Arg = AcpiPsAllocOp (AML_INT_NAMEPATH_OP);
+ if (!Arg)
+ {
+ AcpiPsFreeOp (Field);
+ return_PTR (NULL);
+ }
+
+ /* Get the Namestring argument */
+
+ Arg->Common.Value.Name = AcpiPsGetNextNamestring (ParserState);
+ }
+
+ /* Link the buffer/namestring to parent (CONNECTION_OP) */
+
+ AcpiPsAppendArg (Field, Arg);
+ break;
+
+
default:
/* Opcode was set in previous switch */
diff --git a/usr/src/uts/intel/io/acpica/parser/psloop.c b/usr/src/uts/intel/io/acpica/parser/psloop.c
index a98989e099..d578c7e36b 100644
--- a/usr/src/uts/intel/io/acpica/parser/psloop.c
+++ b/usr/src/uts/intel/io/acpica/parser/psloop.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/parser/psopcode.c b/usr/src/uts/intel/io/acpica/parser/psopcode.c
index 0d82976d7f..ef2257e86f 100644
--- a/usr/src/uts/intel/io/acpica/parser/psopcode.c
+++ b/usr/src/uts/intel/io/acpica/parser/psopcode.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -328,12 +328,17 @@ const ACPI_OPCODE_INFO AcpiGbl_AmlOpInfo[AML_NUM_OPCODES] =
/* 79 */ ACPI_OP ("Mid", ARGP_MID_OP, ARGI_MID_OP, ACPI_TYPE_ANY, AML_CLASS_EXECUTE, AML_TYPE_EXEC_3A_1T_1R, AML_FLAGS_EXEC_3A_1T_1R | AML_CONSTANT),
/* 7A */ ACPI_OP ("Continue", ARGP_CONTINUE_OP, ARGI_CONTINUE_OP, ACPI_TYPE_ANY, AML_CLASS_CONTROL, AML_TYPE_CONTROL, 0),
/* 7B */ ACPI_OP ("LoadTable", ARGP_LOAD_TABLE_OP, ARGI_LOAD_TABLE_OP, ACPI_TYPE_ANY, AML_CLASS_EXECUTE, AML_TYPE_EXEC_6A_0T_1R, AML_FLAGS_EXEC_6A_0T_1R),
-/* 7C */ ACPI_OP ("DataTableRegion", ARGP_DATA_REGION_OP, ARGI_DATA_REGION_OP, ACPI_TYPE_REGION, AML_CLASS_NAMED_OBJECT, AML_TYPE_NAMED_COMPLEX, AML_HAS_ARGS | AML_NSOBJECT | AML_NSOPCODE | AML_NSNODE | AML_NAMED | AML_DEFER),
+/* 7C */ ACPI_OP ("DataTableRegion", ARGP_DATA_REGION_OP, ARGI_DATA_REGION_OP, ACPI_TYPE_REGION, AML_CLASS_NAMED_OBJECT, AML_TYPE_NAMED_COMPLEX, AML_HAS_ARGS | AML_NSOBJECT | AML_NSOPCODE | AML_NSNODE | AML_NAMED | AML_DEFER),
/* 7D */ ACPI_OP ("[EvalSubTree]", ARGP_SCOPE_OP, ARGI_SCOPE_OP, ACPI_TYPE_ANY, AML_CLASS_NAMED_OBJECT, AML_TYPE_NAMED_NO_OBJ, AML_HAS_ARGS | AML_NSOBJECT | AML_NSOPCODE | AML_NSNODE),
/* ACPI 3.0 opcodes */
-/* 7E */ ACPI_OP ("Timer", ARGP_TIMER_OP, ARGI_TIMER_OP, ACPI_TYPE_ANY, AML_CLASS_EXECUTE, AML_TYPE_EXEC_0A_0T_1R, AML_FLAGS_EXEC_0A_0T_1R)
+/* 7E */ ACPI_OP ("Timer", ARGP_TIMER_OP, ARGI_TIMER_OP, ACPI_TYPE_ANY, AML_CLASS_EXECUTE, AML_TYPE_EXEC_0A_0T_1R, AML_FLAGS_EXEC_0A_0T_1R),
+
+/* ACPI 5.0 opcodes */
+
+/* 7F */ ACPI_OP ("-ConnectField-", ARGP_CONNECTFIELD_OP, ARGI_CONNECTFIELD_OP, ACPI_TYPE_ANY, AML_CLASS_INTERNAL, AML_TYPE_BOGUS, AML_HAS_ARGS),
+/* 80 */ ACPI_OP ("-ExtAccessField-", ARGP_CONNECTFIELD_OP, ARGI_CONNECTFIELD_OP, ACPI_TYPE_ANY, AML_CLASS_INTERNAL, AML_TYPE_BOGUS, 0)
/*! [End] no source code translation !*/
};
@@ -353,7 +358,7 @@ static const UINT8 AcpiGbl_ShortOpIndex[256] =
/* 0x20 */ _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK,
/* 0x28 */ _UNK, _UNK, _UNK, _UNK, _UNK, 0x63, _PFX, _PFX,
/* 0x30 */ 0x67, 0x66, 0x68, 0x65, 0x69, 0x64, 0x6A, 0x7D,
-/* 0x38 */ _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK,
+/* 0x38 */ 0x7F, 0x80, _UNK, _UNK, _UNK, _UNK, _UNK, _UNK,
/* 0x40 */ _UNK, _ASC, _ASC, _ASC, _ASC, _ASC, _ASC, _ASC,
/* 0x48 */ _ASC, _ASC, _ASC, _ASC, _ASC, _ASC, _ASC, _ASC,
/* 0x50 */ _ASC, _ASC, _ASC, _ASC, _ASC, _ASC, _ASC, _ASC,
diff --git a/usr/src/uts/intel/io/acpica/parser/psparse.c b/usr/src/uts/intel/io/acpica/parser/psparse.c
index f817749b75..bcf46adfe5 100644
--- a/usr/src/uts/intel/io/acpica/parser/psparse.c
+++ b/usr/src/uts/intel/io/acpica/parser/psparse.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/parser/psscope.c b/usr/src/uts/intel/io/acpica/parser/psscope.c
index 827531d460..1df5a33af3 100644
--- a/usr/src/uts/intel/io/acpica/parser/psscope.c
+++ b/usr/src/uts/intel/io/acpica/parser/psscope.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/parser/pstree.c b/usr/src/uts/intel/io/acpica/parser/pstree.c
index d62bc7b9db..8271314e16 100644
--- a/usr/src/uts/intel/io/acpica/parser/pstree.c
+++ b/usr/src/uts/intel/io/acpica/parser/pstree.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -85,7 +85,12 @@ AcpiPsGetArg (
ACPI_FUNCTION_ENTRY ();
-
+/*
+ if (Op->Common.AmlOpcode == AML_INT_CONNECTION_OP)
+ {
+ return (Op->Common.Value.Arg);
+ }
+*/
/* Get the info structure for this opcode */
OpInfo = AcpiPsGetOpcodeInfo (Op->Common.AmlOpcode);
diff --git a/usr/src/uts/intel/io/acpica/parser/psutils.c b/usr/src/uts/intel/io/acpica/parser/psutils.c
index 426d2ccdd7..e28b75cc8f 100644
--- a/usr/src/uts/intel/io/acpica/parser/psutils.c
+++ b/usr/src/uts/intel/io/acpica/parser/psutils.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/parser/pswalk.c b/usr/src/uts/intel/io/acpica/parser/pswalk.c
index dbb4a17fe9..9734c4d177 100644
--- a/usr/src/uts/intel/io/acpica/parser/pswalk.c
+++ b/usr/src/uts/intel/io/acpica/parser/pswalk.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/parser/psxface.c b/usr/src/uts/intel/io/acpica/parser/psxface.c
index e7a5d9fe9a..2a012c2915 100644
--- a/usr/src/uts/intel/io/acpica/parser/psxface.c
+++ b/usr/src/uts/intel/io/acpica/parser/psxface.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/resources/rsaddr.c b/usr/src/uts/intel/io/acpica/resources/rsaddr.c
index c11e8ca128..90e7d43b6e 100644
--- a/usr/src/uts/intel/io/acpica/resources/rsaddr.c
+++ b/usr/src/uts/intel/io/acpica/resources/rsaddr.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/resources/rscalc.c b/usr/src/uts/intel/io/acpica/resources/rscalc.c
index 15cf685857..845ed69c7d 100644
--- a/usr/src/uts/intel/io/acpica/resources/rscalc.c
+++ b/usr/src/uts/intel/io/acpica/resources/rscalc.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -345,6 +345,26 @@ AcpiRsGetAmlLength (
break;
+ case ACPI_RESOURCE_TYPE_GPIO:
+
+ TotalSize = (ACPI_RS_LENGTH) (TotalSize + (Resource->Data.Gpio.PinTableLength * 2) +
+ Resource->Data.Gpio.ResourceSource.StringLength +
+ Resource->Data.Gpio.VendorLength);
+
+ break;
+
+
+ case ACPI_RESOURCE_TYPE_SERIAL_BUS:
+
+ TotalSize = AcpiGbl_AmlResourceSerialBusSizes [Resource->Data.CommonSerialBus.Type];
+
+ TotalSize = (ACPI_RS_LENGTH) (TotalSize +
+ Resource->Data.I2cSerialBus.ResourceSource.StringLength +
+ Resource->Data.I2cSerialBus.VendorLength);
+
+ break;
+
+
default:
break;
}
@@ -395,12 +415,13 @@ AcpiRsGetListLength (
UINT32 ExtraStructBytes;
UINT8 ResourceIndex;
UINT8 MinimumAmlResourceLength;
+ AML_RESOURCE *AmlResource;
ACPI_FUNCTION_TRACE (RsGetListLength);
- *SizeNeeded = 0;
+ *SizeNeeded = ACPI_RS_SIZE_MIN; /* Minimum size is one EndTag */
EndAml = AmlBuffer + AmlBufferLength;
/* Walk the list of AML resource descriptors */
@@ -412,9 +433,15 @@ AcpiRsGetListLength (
Status = AcpiUtValidateResource (AmlBuffer, &ResourceIndex);
if (ACPI_FAILURE (Status))
{
+ /*
+ * Exit on failure. Cannot continue because the descriptor length
+ * may be bogus also.
+ */
return_ACPI_STATUS (Status);
}
+ AmlResource = (void *) AmlBuffer;
+
/* Get the resource length and base (minimum) AML size */
ResourceLength = AcpiUtGetResourceLength (AmlBuffer);
@@ -460,10 +487,8 @@ AcpiRsGetListLength (
case ACPI_RESOURCE_NAME_END_TAG:
/*
- * End Tag:
- * This is the normal exit, add size of EndTag
+ * End Tag: This is the normal exit
*/
- *SizeNeeded += ACPI_RS_SIZE_MIN;
return_ACPI_STATUS (AE_OK);
@@ -494,6 +519,30 @@ AcpiRsGetListLength (
ResourceLength - ExtraStructBytes, MinimumAmlResourceLength);
break;
+ case ACPI_RESOURCE_NAME_GPIO:
+
+ /* Vendor data is optional */
+
+ if (AmlResource->Gpio.VendorLength)
+ {
+ ExtraStructBytes += AmlResource->Gpio.VendorOffset -
+ AmlResource->Gpio.PinTableOffset + AmlResource->Gpio.VendorLength;
+ }
+ else
+ {
+ ExtraStructBytes += AmlResource->LargeHeader.ResourceLength +
+ sizeof (AML_RESOURCE_LARGE_HEADER) -
+ AmlResource->Gpio.PinTableOffset;
+ }
+ break;
+
+ case ACPI_RESOURCE_NAME_SERIAL_BUS:
+
+ MinimumAmlResourceLength = AcpiGbl_ResourceAmlSerialBusSizes[
+ AmlResource->CommonSerialBus.Type];
+ ExtraStructBytes += AmlResource->CommonSerialBus.ResourceLength -
+ MinimumAmlResourceLength;
+ break;
default:
break;
@@ -505,8 +554,16 @@ AcpiRsGetListLength (
* Important: Round the size up for the appropriate alignment. This
* is a requirement on IA64.
*/
- BufferSize = AcpiGbl_ResourceStructSizes[ResourceIndex] +
+ if (AcpiUtGetResourceType (AmlBuffer) == ACPI_RESOURCE_NAME_SERIAL_BUS)
+ {
+ BufferSize = AcpiGbl_ResourceStructSerialBusSizes[
+ AmlResource->CommonSerialBus.Type] + ExtraStructBytes;
+ }
+ else
+ {
+ BufferSize = AcpiGbl_ResourceStructSizes[ResourceIndex] +
ExtraStructBytes;
+ }
BufferSize = (UINT32) ACPI_ROUND_UP_TO_NATIVE_WORD (BufferSize);
*SizeNeeded += BufferSize;
diff --git a/usr/src/uts/intel/io/acpica/resources/rscreate.c b/usr/src/uts/intel/io/acpica/resources/rscreate.c
index 40cf447c29..f06ea0cb4d 100644
--- a/usr/src/uts/intel/io/acpica/resources/rscreate.c
+++ b/usr/src/uts/intel/io/acpica/resources/rscreate.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -54,6 +54,79 @@
/*******************************************************************************
*
+ * FUNCTION: AcpiBufferToResource
+ *
+ * PARAMETERS: AmlBuffer - Pointer to the resource byte stream
+ * AmlBufferLength - Length of the AmlBuffer
+ * ResourcePtr - Where the converted resource is returned
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Convert a raw AML buffer to a resource list
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiBufferToResource (
+ UINT8 *AmlBuffer,
+ UINT16 AmlBufferLength,
+ ACPI_RESOURCE **ResourcePtr)
+{
+ ACPI_STATUS Status;
+ ACPI_SIZE ListSizeNeeded;
+ void *Resource;
+ void *CurrentResourcePtr;
+
+ /*
+ * Note: we allow AE_AML_NO_RESOURCE_END_TAG, since an end tag
+ * is not required here.
+ */
+
+ /* Get the required length for the converted resource */
+
+ Status = AcpiRsGetListLength (AmlBuffer, AmlBufferLength,
+ &ListSizeNeeded);
+ if (Status == AE_AML_NO_RESOURCE_END_TAG)
+ {
+ Status = AE_OK;
+ }
+ if (ACPI_FAILURE (Status))
+ {
+ return (Status);
+ }
+
+ /* Allocate a buffer for the converted resource */
+
+ Resource = ACPI_ALLOCATE_ZEROED (ListSizeNeeded);
+ CurrentResourcePtr = Resource;
+ if (!Resource)
+ {
+ return (AE_NO_MEMORY);
+ }
+
+ /* Perform the AML-to-Resource conversion */
+
+ Status = AcpiUtWalkAmlResources (AmlBuffer, AmlBufferLength,
+ AcpiRsConvertAmlToResources, &CurrentResourcePtr);
+ if (Status == AE_AML_NO_RESOURCE_END_TAG)
+ {
+ Status = AE_OK;
+ }
+ if (ACPI_FAILURE (Status))
+ {
+ ACPI_FREE (Resource);
+ }
+ else
+ {
+ *ResourcePtr = Resource;
+ }
+
+ return (Status);
+}
+
+
+/*******************************************************************************
+ *
* FUNCTION: AcpiRsCreateResourceList
*
* PARAMETERS: AmlBuffer - Pointer to the resource byte stream
diff --git a/usr/src/uts/intel/io/acpica/resources/rsdump.c b/usr/src/uts/intel/io/acpica/resources/rsdump.c
index 5470677447..68564739eb 100644
--- a/usr/src/uts/intel/io/acpica/resources/rsdump.c
+++ b/usr/src/uts/intel/io/acpica/resources/rsdump.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -91,6 +91,11 @@ AcpiRsDumpByteList (
UINT8 *Data);
static void
+AcpiRsDumpWordList (
+ UINT16 Length,
+ UINT16 *Data);
+
+static void
AcpiRsDumpDwordList (
UINT8 Length,
UINT32 *Data);
@@ -289,6 +294,87 @@ ACPI_RSDUMP_INFO AcpiRsDumpGenericReg[6] =
{ACPI_RSD_UINT64, ACPI_RSD_OFFSET (GenericReg.Address), "Address", NULL}
};
+ACPI_RSDUMP_INFO AcpiRsDumpGpio[16] =
+{
+ {ACPI_RSD_TITLE, ACPI_RSD_TABLE_SIZE (AcpiRsDumpGpio), "GPIO", NULL},
+ {ACPI_RSD_UINT8, ACPI_RSD_OFFSET (Gpio.RevisionId), "RevisionId", NULL},
+ {ACPI_RSD_UINT8, ACPI_RSD_OFFSET (Gpio.ConnectionType), "ConnectionType", AcpiGbl_CtDecode},
+ {ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET (Gpio.ProducerConsumer), "ProducerConsumer", AcpiGbl_ConsumeDecode},
+ {ACPI_RSD_UINT8, ACPI_RSD_OFFSET (Gpio.PinConfig), "PinConfig", AcpiGbl_PpcDecode},
+ {ACPI_RSD_2BITFLAG, ACPI_RSD_OFFSET (Gpio.Sharable), "Sharable", AcpiGbl_ShrDecode},
+ {ACPI_RSD_2BITFLAG, ACPI_RSD_OFFSET (Gpio.IoRestriction), "IoRestriction", AcpiGbl_IorDecode},
+ {ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET (Gpio.Triggering), "Triggering", AcpiGbl_HeDecode},
+ {ACPI_RSD_2BITFLAG, ACPI_RSD_OFFSET (Gpio.Polarity), "Polarity", AcpiGbl_LlDecode},
+ {ACPI_RSD_UINT16, ACPI_RSD_OFFSET (Gpio.DriveStrength), "DriveStrength", NULL},
+ {ACPI_RSD_UINT16, ACPI_RSD_OFFSET (Gpio.DebounceTimeout), "DebounceTimeout", NULL},
+ {ACPI_RSD_SOURCE, ACPI_RSD_OFFSET (Gpio.ResourceSource), "ResourceSource", NULL},
+ {ACPI_RSD_UINT16, ACPI_RSD_OFFSET (Gpio.PinTableLength), "PinTableLength", NULL},
+ {ACPI_RSD_WORDLIST, ACPI_RSD_OFFSET (Gpio.PinTable), "PinTable", NULL},
+ {ACPI_RSD_UINT16, ACPI_RSD_OFFSET (Gpio.VendorLength), "VendorLength", NULL},
+ {ACPI_RSD_SHORTLISTX,ACPI_RSD_OFFSET (Gpio.VendorData), "VendorData", NULL},
+};
+
+ACPI_RSDUMP_INFO AcpiRsDumpFixedDma[4] =
+{
+ {ACPI_RSD_TITLE, ACPI_RSD_TABLE_SIZE (AcpiRsDumpFixedDma), "FixedDma", NULL},
+ {ACPI_RSD_UINT16, ACPI_RSD_OFFSET (FixedDma.RequestLines), "RequestLines", NULL},
+ {ACPI_RSD_UINT16, ACPI_RSD_OFFSET (FixedDma.Channels), "Channels", NULL},
+ {ACPI_RSD_UINT8, ACPI_RSD_OFFSET (FixedDma.Width), "TransferWidth", AcpiGbl_DtsDecode},
+};
+
+#define ACPI_RS_DUMP_COMMON_SERIAL_BUS \
+ {ACPI_RSD_UINT8, ACPI_RSD_OFFSET (CommonSerialBus.RevisionId), "RevisionId", NULL}, \
+ {ACPI_RSD_UINT8, ACPI_RSD_OFFSET (CommonSerialBus.Type), "Type", AcpiGbl_SbtDecode}, \
+ {ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET (CommonSerialBus.ProducerConsumer), "ProducerConsumer", AcpiGbl_ConsumeDecode}, \
+ {ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET (CommonSerialBus.SlaveMode), "SlaveMode", AcpiGbl_SmDecode}, \
+ {ACPI_RSD_UINT8, ACPI_RSD_OFFSET (CommonSerialBus.TypeRevisionId), "TypeRevisionId", NULL}, \
+ {ACPI_RSD_UINT16, ACPI_RSD_OFFSET (CommonSerialBus.TypeDataLength), "TypeDataLength", NULL}, \
+ {ACPI_RSD_SOURCE, ACPI_RSD_OFFSET (CommonSerialBus.ResourceSource), "ResourceSource", NULL}, \
+ {ACPI_RSD_UINT16, ACPI_RSD_OFFSET (CommonSerialBus.VendorLength), "VendorLength", NULL}, \
+ {ACPI_RSD_SHORTLISTX,ACPI_RSD_OFFSET (CommonSerialBus.VendorData), "VendorData", NULL},
+
+ACPI_RSDUMP_INFO AcpiRsDumpCommonSerialBus[10] =
+{
+ {ACPI_RSD_TITLE, ACPI_RSD_TABLE_SIZE (AcpiRsDumpCommonSerialBus), "Common Serial Bus", NULL},
+ ACPI_RS_DUMP_COMMON_SERIAL_BUS
+};
+
+ACPI_RSDUMP_INFO AcpiRsDumpI2cSerialBus[13] =
+{
+ {ACPI_RSD_TITLE, ACPI_RSD_TABLE_SIZE (AcpiRsDumpI2cSerialBus), "I2C Serial Bus", NULL},
+ ACPI_RS_DUMP_COMMON_SERIAL_BUS
+ {ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET (I2cSerialBus.AccessMode), "AccessMode", AcpiGbl_AmDecode},
+ {ACPI_RSD_UINT32, ACPI_RSD_OFFSET (I2cSerialBus.ConnectionSpeed), "ConnectionSpeed", NULL},
+ {ACPI_RSD_UINT16, ACPI_RSD_OFFSET (I2cSerialBus.SlaveAddress), "SlaveAddress", NULL},
+};
+
+ACPI_RSDUMP_INFO AcpiRsDumpSpiSerialBus[17] =
+{
+ {ACPI_RSD_TITLE, ACPI_RSD_TABLE_SIZE (AcpiRsDumpSpiSerialBus), "Spi Serial Bus", NULL},
+ ACPI_RS_DUMP_COMMON_SERIAL_BUS
+ {ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET (SpiSerialBus.WireMode), "WireMode", AcpiGbl_WmDecode},
+ {ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET (SpiSerialBus.DevicePolarity), "DevicePolarity", AcpiGbl_DpDecode},
+ {ACPI_RSD_UINT8, ACPI_RSD_OFFSET (SpiSerialBus.DataBitLength), "DataBitLength", NULL},
+ {ACPI_RSD_UINT8, ACPI_RSD_OFFSET (SpiSerialBus.ClockPhase), "ClockPhase", AcpiGbl_CphDecode},
+ {ACPI_RSD_UINT8, ACPI_RSD_OFFSET (SpiSerialBus.ClockPolarity), "ClockPolarity", AcpiGbl_CpoDecode},
+ {ACPI_RSD_UINT16, ACPI_RSD_OFFSET (SpiSerialBus.DeviceSelection), "DeviceSelection", NULL},
+ {ACPI_RSD_UINT32, ACPI_RSD_OFFSET (SpiSerialBus.ConnectionSpeed), "ConnectionSpeed", NULL},
+};
+
+ACPI_RSDUMP_INFO AcpiRsDumpUartSerialBus[19] =
+{
+ {ACPI_RSD_TITLE, ACPI_RSD_TABLE_SIZE (AcpiRsDumpUartSerialBus), "Uart Serial Bus", NULL},
+ ACPI_RS_DUMP_COMMON_SERIAL_BUS
+ {ACPI_RSD_2BITFLAG, ACPI_RSD_OFFSET (UartSerialBus.FlowControl), "FlowControl", AcpiGbl_FcDecode},
+ {ACPI_RSD_2BITFLAG, ACPI_RSD_OFFSET (UartSerialBus.StopBits), "StopBits", AcpiGbl_SbDecode},
+ {ACPI_RSD_3BITFLAG, ACPI_RSD_OFFSET (UartSerialBus.DataBits), "DataBits", AcpiGbl_BpbDecode},
+ {ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET (UartSerialBus.Endian), "Endian", AcpiGbl_EdDecode},
+ {ACPI_RSD_UINT8, ACPI_RSD_OFFSET (UartSerialBus.Parity), "Parity", AcpiGbl_PtDecode},
+ {ACPI_RSD_UINT8, ACPI_RSD_OFFSET (UartSerialBus.LinesEnabled), "LinesEnabled", NULL},
+ {ACPI_RSD_UINT16, ACPI_RSD_OFFSET (UartSerialBus.RxFifoSize), "RxFifoSize", NULL},
+ {ACPI_RSD_UINT16, ACPI_RSD_OFFSET (UartSerialBus.TxFifoSize), "TxFifoSize", NULL},
+ {ACPI_RSD_UINT32, ACPI_RSD_OFFSET (UartSerialBus.DefaultBaudRate), "ConnectionSpeed", NULL},
+};
/*
* Tables used for common address descriptor flag fields
@@ -391,7 +477,15 @@ AcpiRsDumpDescriptor (
/* Data items, 8/16/32/64 bit */
case ACPI_RSD_UINT8:
- AcpiRsOutInteger8 (Name, ACPI_GET8 (Target));
+ if (Table->Pointer)
+ {
+ AcpiRsOutString (Name, ACPI_CAST_PTR (char,
+ Table->Pointer [*Target]));
+ }
+ else
+ {
+ AcpiRsOutInteger8 (Name, ACPI_GET8 (Target));
+ }
break;
case ACPI_RSD_UINT16:
@@ -418,6 +512,11 @@ AcpiRsDumpDescriptor (
Table->Pointer [*Target & 0x03]));
break;
+ case ACPI_RSD_3BITFLAG:
+ AcpiRsOutString (Name, ACPI_CAST_PTR (char,
+ Table->Pointer [*Target & 0x07]));
+ break;
+
case ACPI_RSD_SHORTLIST:
/*
* Short byte list (single line output) for DMA and IRQ resources
@@ -430,6 +529,19 @@ AcpiRsDumpDescriptor (
}
break;
+ case ACPI_RSD_SHORTLISTX:
+ /*
+ * Short byte list (single line output) for GPIO vendor data
+ * Note: The list length is obtained from the previous table entry
+ */
+ if (PreviousTarget)
+ {
+ AcpiRsOutTitle (Name);
+ AcpiRsDumpShortByteList (*PreviousTarget,
+ *(ACPI_CAST_INDIRECT_PTR (UINT8, Target)));
+ }
+ break;
+
case ACPI_RSD_LONGLIST:
/*
* Long byte list for Vendor resource data
@@ -453,6 +565,18 @@ AcpiRsDumpDescriptor (
}
break;
+ case ACPI_RSD_WORDLIST:
+ /*
+ * Word list for GPIO Pin Table
+ * Note: The list length is obtained from the previous table entry
+ */
+ if (PreviousTarget)
+ {
+ AcpiRsDumpWordList (*PreviousTarget,
+ *(ACPI_CAST_INDIRECT_PTR (UINT16, Target)));
+ }
+ break;
+
case ACPI_RSD_ADDRESS:
/*
* Common flags for all Address resources
@@ -613,13 +737,20 @@ AcpiRsDumpResourceList (
/* Dump the resource descriptor */
- AcpiRsDumpDescriptor (&ResourceList->Data,
- AcpiGbl_DumpResourceDispatch[Type]);
+ if (Type == ACPI_RESOURCE_TYPE_SERIAL_BUS)
+ {
+ AcpiRsDumpDescriptor (&ResourceList->Data,
+ AcpiGbl_DumpSerialBusDispatch[ResourceList->Data.CommonSerialBus.Type]);
+ }
+ else
+ {
+ AcpiRsDumpDescriptor (&ResourceList->Data,
+ AcpiGbl_DumpResourceDispatch[Type]);
+ }
/* Point to the next resource structure */
- ResourceList = ACPI_ADD_PTR (ACPI_RESOURCE, ResourceList,
- ResourceList->Length);
+ ResourceList = ACPI_NEXT_RESOURCE (ResourceList);
/* Exit when END_TAG descriptor is reached */
@@ -796,5 +927,20 @@ AcpiRsDumpDwordList (
}
}
+static void
+AcpiRsDumpWordList (
+ UINT16 Length,
+ UINT16 *Data)
+{
+ UINT16 i;
+
+
+ for (i = 0; i < Length; i++)
+ {
+ AcpiOsPrintf ("%25s%2.2X : %4.4X\n",
+ "Word", i, Data[i]);
+ }
+}
+
#endif
diff --git a/usr/src/uts/intel/io/acpica/resources/rsinfo.c b/usr/src/uts/intel/io/acpica/resources/rsinfo.c
index 5753667806..e7287d5455 100644
--- a/usr/src/uts/intel/io/acpica/resources/rsinfo.c
+++ b/usr/src/uts/intel/io/acpica/resources/rsinfo.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -82,7 +82,10 @@ ACPI_RSCONVERT_INFO *AcpiGbl_SetResourceDispatch[] =
AcpiRsConvertAddress64, /* 0x0D, ACPI_RESOURCE_TYPE_ADDRESS64 */
AcpiRsConvertExtAddress64, /* 0x0E, ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64 */
AcpiRsConvertExtIrq, /* 0x0F, ACPI_RESOURCE_TYPE_EXTENDED_IRQ */
- AcpiRsConvertGenericReg /* 0x10, ACPI_RESOURCE_TYPE_GENERIC_REGISTER */
+ AcpiRsConvertGenericReg, /* 0x10, ACPI_RESOURCE_TYPE_GENERIC_REGISTER */
+ AcpiRsConvertGpio, /* 0x11, ACPI_RESOURCE_TYPE_GPIO */
+ AcpiRsConvertFixedDma, /* 0x12, ACPI_RESOURCE_TYPE_FIXED_DMA */
+ NULL, /* 0x13, ACPI_RESOURCE_TYPE_SERIAL_BUS - Use subtype table below */
};
/* Dispatch tables for AML-to-resource (Get Resource) conversion functions */
@@ -101,7 +104,7 @@ ACPI_RSCONVERT_INFO *AcpiGbl_GetResourceDispatch[] =
AcpiRsConvertEndDpf, /* 0x07, ACPI_RESOURCE_NAME_END_DEPENDENT */
AcpiRsConvertIo, /* 0x08, ACPI_RESOURCE_NAME_IO */
AcpiRsConvertFixedIo, /* 0x09, ACPI_RESOURCE_NAME_FIXED_IO */
- NULL, /* 0x0A, Reserved */
+ AcpiRsConvertFixedDma, /* 0x0A, ACPI_RESOURCE_NAME_FIXED_DMA */
NULL, /* 0x0B, Reserved */
NULL, /* 0x0C, Reserved */
NULL, /* 0x0D, Reserved */
@@ -121,7 +124,20 @@ ACPI_RSCONVERT_INFO *AcpiGbl_GetResourceDispatch[] =
AcpiRsConvertAddress16, /* 0x08, ACPI_RESOURCE_NAME_ADDRESS16 */
AcpiRsConvertExtIrq, /* 0x09, ACPI_RESOURCE_NAME_EXTENDED_IRQ */
AcpiRsConvertAddress64, /* 0x0A, ACPI_RESOURCE_NAME_ADDRESS64 */
- AcpiRsConvertExtAddress64 /* 0x0B, ACPI_RESOURCE_NAME_EXTENDED_ADDRESS64 */
+ AcpiRsConvertExtAddress64, /* 0x0B, ACPI_RESOURCE_NAME_EXTENDED_ADDRESS64 */
+ AcpiRsConvertGpio, /* 0x0C, ACPI_RESOURCE_NAME_GPIO */
+ NULL, /* 0x0D, Reserved */
+ NULL, /* 0x0E, ACPI_RESOURCE_NAME_SERIAL_BUS - Use subtype table below */
+};
+
+/* Subtype table for SerialBus -- I2C, SPI, and UART */
+
+ACPI_RSCONVERT_INFO *AcpiGbl_ConvertResourceSerialBusDispatch[] =
+{
+ NULL,
+ AcpiRsConvertI2cSerialBus,
+ AcpiRsConvertSpiSerialBus,
+ AcpiRsConvertUartSerialBus,
};
@@ -148,6 +164,17 @@ ACPI_RSDUMP_INFO *AcpiGbl_DumpResourceDispatch[] =
AcpiRsDumpExtAddress64, /* ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64 */
AcpiRsDumpExtIrq, /* ACPI_RESOURCE_TYPE_EXTENDED_IRQ */
AcpiRsDumpGenericReg, /* ACPI_RESOURCE_TYPE_GENERIC_REGISTER */
+ AcpiRsDumpGpio, /* ACPI_RESOURCE_TYPE_GPIO */
+ AcpiRsDumpFixedDma, /* ACPI_RESOURCE_TYPE_FIXED_DMA */
+ NULL, /* ACPI_RESOURCE_TYPE_SERIAL_BUS */
+};
+
+ACPI_RSDUMP_INFO *AcpiGbl_DumpSerialBusDispatch[] =
+{
+ NULL,
+ AcpiRsDumpI2cSerialBus, /* AML_RESOURCE_I2C_BUS_TYPE */
+ AcpiRsDumpSpiSerialBus, /* AML_RESOURCE_SPI_BUS_TYPE */
+ AcpiRsDumpUartSerialBus, /* AML_RESOURCE_UART_BUS_TYPE */
};
#endif
@@ -175,7 +202,10 @@ const UINT8 AcpiGbl_AmlResourceSizes[] =
sizeof (AML_RESOURCE_ADDRESS64), /* ACPI_RESOURCE_TYPE_ADDRESS64 */
sizeof (AML_RESOURCE_EXTENDED_ADDRESS64),/*ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64 */
sizeof (AML_RESOURCE_EXTENDED_IRQ), /* ACPI_RESOURCE_TYPE_EXTENDED_IRQ */
- sizeof (AML_RESOURCE_GENERIC_REGISTER) /* ACPI_RESOURCE_TYPE_GENERIC_REGISTER */
+ sizeof (AML_RESOURCE_GENERIC_REGISTER), /* ACPI_RESOURCE_TYPE_GENERIC_REGISTER */
+ sizeof (AML_RESOURCE_GPIO), /* ACPI_RESOURCE_TYPE_GPIO */
+ sizeof (AML_RESOURCE_FIXED_DMA), /* ACPI_RESOURCE_TYPE_FIXED_DMA */
+ sizeof (AML_RESOURCE_COMMON_SERIALBUS), /* ACPI_RESOURCE_TYPE_SERIAL_BUS */
};
@@ -193,7 +223,7 @@ const UINT8 AcpiGbl_ResourceStructSizes[] =
ACPI_RS_SIZE_MIN,
ACPI_RS_SIZE (ACPI_RESOURCE_IO),
ACPI_RS_SIZE (ACPI_RESOURCE_FIXED_IO),
- 0,
+ ACPI_RS_SIZE (ACPI_RESOURCE_FIXED_DMA),
0,
0,
0,
@@ -213,6 +243,23 @@ const UINT8 AcpiGbl_ResourceStructSizes[] =
ACPI_RS_SIZE (ACPI_RESOURCE_ADDRESS16),
ACPI_RS_SIZE (ACPI_RESOURCE_EXTENDED_IRQ),
ACPI_RS_SIZE (ACPI_RESOURCE_ADDRESS64),
- ACPI_RS_SIZE (ACPI_RESOURCE_EXTENDED_ADDRESS64)
+ ACPI_RS_SIZE (ACPI_RESOURCE_EXTENDED_ADDRESS64),
+ ACPI_RS_SIZE (ACPI_RESOURCE_GPIO),
+ ACPI_RS_SIZE (ACPI_RESOURCE_COMMON_SERIALBUS)
+};
+
+const UINT8 AcpiGbl_AmlResourceSerialBusSizes[] =
+{
+ 0,
+ sizeof (AML_RESOURCE_I2C_SERIALBUS),
+ sizeof (AML_RESOURCE_SPI_SERIALBUS),
+ sizeof (AML_RESOURCE_UART_SERIALBUS),
};
+const UINT8 AcpiGbl_ResourceStructSerialBusSizes[] =
+{
+ 0,
+ ACPI_RS_SIZE (ACPI_RESOURCE_I2C_SERIALBUS),
+ ACPI_RS_SIZE (ACPI_RESOURCE_SPI_SERIALBUS),
+ ACPI_RS_SIZE (ACPI_RESOURCE_UART_SERIALBUS),
+};
diff --git a/usr/src/uts/intel/io/acpica/resources/rsio.c b/usr/src/uts/intel/io/acpica/resources/rsio.c
index 2d0f9f7baa..065108cfa1 100644
--- a/usr/src/uts/intel/io/acpica/resources/rsio.c
+++ b/usr/src/uts/intel/io/acpica/resources/rsio.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/resources/rsirq.c b/usr/src/uts/intel/io/acpica/resources/rsirq.c
index ff952f96ff..c2a88ee89a 100644
--- a/usr/src/uts/intel/io/acpica/resources/rsirq.c
+++ b/usr/src/uts/intel/io/acpica/resources/rsirq.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -274,3 +274,35 @@ ACPI_RSCONVERT_INFO AcpiRsConvertDma[6] =
ACPI_RS_OFFSET (Data.Dma.ChannelCount)}
};
+
+/*******************************************************************************
+ *
+ * AcpiRsConvertFixedDma
+ *
+ ******************************************************************************/
+
+ACPI_RSCONVERT_INFO AcpiRsConvertFixedDma[4] =
+{
+ {ACPI_RSC_INITGET, ACPI_RESOURCE_TYPE_FIXED_DMA,
+ ACPI_RS_SIZE (ACPI_RESOURCE_FIXED_DMA),
+ ACPI_RSC_TABLE_SIZE (AcpiRsConvertFixedDma)},
+
+ {ACPI_RSC_INITSET, ACPI_RESOURCE_NAME_FIXED_DMA,
+ sizeof (AML_RESOURCE_FIXED_DMA),
+ 0},
+
+ /*
+ * These fields are contiguous in both the source and destination:
+ * RequestLines
+ * Channels
+ */
+
+ {ACPI_RSC_MOVE16, ACPI_RS_OFFSET (Data.FixedDma.RequestLines),
+ AML_OFFSET (FixedDma.RequestLines),
+ 2},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.FixedDma.Width),
+ AML_OFFSET (FixedDma.Width),
+ 1},
+
+};
diff --git a/usr/src/uts/intel/io/acpica/resources/rslist.c b/usr/src/uts/intel/io/acpica/resources/rslist.c
index c64065f2ca..187d56cd42 100644
--- a/usr/src/uts/intel/io/acpica/resources/rslist.c
+++ b/usr/src/uts/intel/io/acpica/resources/rslist.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -77,6 +77,8 @@ AcpiRsConvertAmlToResources (
ACPI_RESOURCE **ResourcePtr = ACPI_CAST_INDIRECT_PTR (
ACPI_RESOURCE, Context);
ACPI_RESOURCE *Resource;
+ AML_RESOURCE *AmlResource;
+ ACPI_RSCONVERT_INFO *ConversionTable;
ACPI_STATUS Status;
@@ -94,11 +96,42 @@ AcpiRsConvertAmlToResources (
"Misaligned resource pointer %p", Resource));
}
- /* Convert the AML byte stream resource to a local resource struct */
+ /* Get the appropriate conversion info table */
+
+ AmlResource = ACPI_CAST_PTR (AML_RESOURCE, Aml);
+ if (AcpiUtGetResourceType (Aml) == ACPI_RESOURCE_NAME_SERIAL_BUS)
+ {
+ if (AmlResource->CommonSerialBus.Type > AML_RESOURCE_MAX_SERIALBUSTYPE)
+ {
+ ConversionTable = NULL;
+ }
+ else
+ {
+ /* This is an I2C, SPI, or UART SerialBus descriptor */
+
+ ConversionTable =
+ AcpiGbl_ConvertResourceSerialBusDispatch[
+ AmlResource->CommonSerialBus.Type];
+ }
+ }
+ else
+ {
+ ConversionTable =
+ AcpiGbl_GetResourceDispatch[ResourceIndex];
+ }
+
+ if (!ConversionTable)
+ {
+ ACPI_ERROR ((AE_INFO,
+ "Invalid/unsupported resource descriptor: Type 0x%2.2X",
+ ResourceIndex));
+ return (AE_AML_INVALID_RESOURCE_TYPE);
+ }
+
+ /* Convert the AML byte stream resource to a local resource struct */
Status = AcpiRsConvertAmlToResource (
- Resource, ACPI_CAST_PTR (AML_RESOURCE, Aml),
- AcpiGbl_GetResourceDispatch[ResourceIndex]);
+ Resource, AmlResource, ConversionTable);
if (ACPI_FAILURE (Status))
{
ACPI_EXCEPTION ((AE_INFO, Status,
@@ -113,7 +146,7 @@ AcpiRsConvertAmlToResources (
/* Point to the next structure in the output buffer */
- *ResourcePtr = ACPI_ADD_PTR (void, Resource, Resource->Length);
+ *ResourcePtr = ACPI_NEXT_RESOURCE (Resource);
return_ACPI_STATUS (AE_OK);
}
@@ -145,6 +178,7 @@ AcpiRsConvertResourcesToAml (
{
UINT8 *Aml = OutputBuffer;
UINT8 *EndAml = OutputBuffer + AmlSizeNeeded;
+ ACPI_RSCONVERT_INFO *ConversionTable;
ACPI_STATUS Status;
@@ -167,9 +201,36 @@ AcpiRsConvertResourcesToAml (
/* Perform the conversion */
+ if (Resource->Type == ACPI_RESOURCE_TYPE_SERIAL_BUS)
+ {
+ if (Resource->Data.CommonSerialBus.Type > AML_RESOURCE_MAX_SERIALBUSTYPE)
+ {
+ ConversionTable = NULL;
+ }
+ else
+ {
+ /* This is an I2C, SPI, or UART SerialBus descriptor */
+
+ ConversionTable = AcpiGbl_ConvertResourceSerialBusDispatch[
+ Resource->Data.CommonSerialBus.Type];
+ }
+ }
+ else
+ {
+ ConversionTable = AcpiGbl_SetResourceDispatch[Resource->Type];
+ }
+
+ if (!ConversionTable)
+ {
+ ACPI_ERROR ((AE_INFO,
+ "Invalid/unsupported resource descriptor: Type 0x%2.2X",
+ Resource->Type));
+ return (AE_AML_INVALID_RESOURCE_TYPE);
+ }
+
Status = AcpiRsConvertResourceToAml (Resource,
- ACPI_CAST_PTR (AML_RESOURCE, Aml),
- AcpiGbl_SetResourceDispatch[Resource->Type]);
+ ACPI_CAST_PTR (AML_RESOURCE, Aml),
+ ConversionTable);
if (ACPI_FAILURE (Status))
{
ACPI_EXCEPTION ((AE_INFO, Status,
@@ -204,7 +265,7 @@ AcpiRsConvertResourcesToAml (
/* Point to the next input resource descriptor */
- Resource = ACPI_ADD_PTR (ACPI_RESOURCE, Resource, Resource->Length);
+ Resource = ACPI_NEXT_RESOURCE (Resource);
}
/* Completed buffer, but did not find an EndTag resource descriptor */
diff --git a/usr/src/uts/intel/io/acpica/resources/rsmemory.c b/usr/src/uts/intel/io/acpica/resources/rsmemory.c
index 06262fa64b..83d131956f 100644
--- a/usr/src/uts/intel/io/acpica/resources/rsmemory.c
+++ b/usr/src/uts/intel/io/acpica/resources/rsmemory.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/resources/rsmisc.c b/usr/src/uts/intel/io/acpica/resources/rsmisc.c
index 45415faacc..dfad696ca1 100644
--- a/usr/src/uts/intel/io/acpica/resources/rsmisc.c
+++ b/usr/src/uts/intel/io/acpica/resources/rsmisc.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -94,6 +94,11 @@ AcpiRsConvertAmlToResource (
ACPI_FUNCTION_TRACE (RsConvertAmlToResource);
+ if (!Info)
+ {
+ return_ACPI_STATUS (AE_BAD_PARAMETER);
+ }
+
if (((ACPI_SIZE) Resource) & 0x3)
{
/* Each internal resource struct is expected to be 32-bit aligned */
@@ -112,7 +117,6 @@ AcpiRsConvertAmlToResource (
* table length (# of table entries)
*/
Count = INIT_TABLE_LENGTH (Info);
-
while (Count)
{
/*
@@ -162,6 +166,15 @@ AcpiRsConvertAmlToResource (
break;
+ case ACPI_RSC_3BITFLAG:
+ /*
+ * Mask and shift the flag bits
+ */
+ ACPI_SET8 (Destination) = (UINT8)
+ ((ACPI_GET8 (Source) >> Info->Value) & 0x07);
+ break;
+
+
case ACPI_RSC_COUNT:
ItemCount = ACPI_GET8 (Source);
@@ -182,6 +195,75 @@ AcpiRsConvertAmlToResource (
break;
+ case ACPI_RSC_COUNT_GPIO_PIN:
+
+ Target = ACPI_ADD_PTR (void, Aml, Info->Value);
+ ItemCount = ACPI_GET16 (Target) - ACPI_GET16 (Source);
+
+ Resource->Length = Resource->Length + ItemCount;
+ ItemCount = ItemCount / 2;
+ ACPI_SET16 (Destination) = ItemCount;
+ break;
+
+
+ case ACPI_RSC_COUNT_GPIO_VEN:
+
+ ItemCount = ACPI_GET8 (Source);
+ ACPI_SET8 (Destination) = (UINT8) ItemCount;
+
+ Resource->Length = Resource->Length +
+ (Info->Value * ItemCount);
+ break;
+
+
+ case ACPI_RSC_COUNT_GPIO_RES:
+
+ /*
+ * Vendor data is optional (length/offset may both be zero)
+ * Examine vendor data length field first
+ */
+ Target = ACPI_ADD_PTR (void, Aml, (Info->Value + 2));
+ if (ACPI_GET16 (Target))
+ {
+ /* Use vendor offset to get resource source length */
+
+ Target = ACPI_ADD_PTR (void, Aml, Info->Value);
+ ItemCount = ACPI_GET16 (Target) - ACPI_GET16 (Source);
+ }
+ else
+ {
+ /* No vendor data to worry about */
+
+ ItemCount = Aml->LargeHeader.ResourceLength +
+ sizeof (AML_RESOURCE_LARGE_HEADER) -
+ ACPI_GET16 (Source);
+ }
+
+ Resource->Length = Resource->Length + ItemCount;
+ ACPI_SET16 (Destination) = ItemCount;
+ break;
+
+
+ case ACPI_RSC_COUNT_SERIAL_VEN:
+
+ ItemCount = ACPI_GET16 (Source) - Info->Value;
+
+ Resource->Length = Resource->Length + ItemCount;
+ ACPI_SET16 (Destination) = ItemCount;
+ break;
+
+
+ case ACPI_RSC_COUNT_SERIAL_RES:
+
+ ItemCount = (AmlResourceLength +
+ sizeof (AML_RESOURCE_LARGE_HEADER)) -
+ ACPI_GET16 (Source) - Info->Value;
+
+ Resource->Length = Resource->Length + ItemCount;
+ ACPI_SET16 (Destination) = ItemCount;
+ break;
+
+
case ACPI_RSC_LENGTH:
Resource->Length = Resource->Length + Info->Value;
@@ -204,6 +286,66 @@ AcpiRsConvertAmlToResource (
break;
+ case ACPI_RSC_MOVE_GPIO_PIN:
+
+ /* Generate and set the PIN data pointer */
+
+ Target = (char *) ACPI_ADD_PTR (void, Resource,
+ (Resource->Length - ItemCount * 2));
+ *(UINT16 **) Destination = ACPI_CAST_PTR (UINT16, Target);
+
+ /* Copy the PIN data */
+
+ Source = ACPI_ADD_PTR (void, Aml, ACPI_GET16 (Source));
+ AcpiRsMoveData (Target, Source, ItemCount, Info->Opcode);
+ break;
+
+
+ case ACPI_RSC_MOVE_GPIO_RES:
+
+ /* Generate and set the ResourceSource string pointer */
+
+ Target = (char *) ACPI_ADD_PTR (void, Resource,
+ (Resource->Length - ItemCount));
+ *(UINT8 **) Destination = ACPI_CAST_PTR (UINT8, Target);
+
+ /* Copy the ResourceSource string */
+
+ Source = ACPI_ADD_PTR (void, Aml, ACPI_GET16 (Source));
+ AcpiRsMoveData (Target, Source, ItemCount, Info->Opcode);
+ break;
+
+
+ case ACPI_RSC_MOVE_SERIAL_VEN:
+
+ /* Generate and set the Vendor Data pointer */
+
+ Target = (char *) ACPI_ADD_PTR (void, Resource,
+ (Resource->Length - ItemCount));
+ *(UINT8 **) Destination = ACPI_CAST_PTR (UINT8, Target);
+
+ /* Copy the Vendor Data */
+
+ Source = ACPI_ADD_PTR (void, Aml, Info->Value);
+ AcpiRsMoveData (Target, Source, ItemCount, Info->Opcode);
+ break;
+
+
+ case ACPI_RSC_MOVE_SERIAL_RES:
+
+ /* Generate and set the ResourceSource string pointer */
+
+ Target = (char *) ACPI_ADD_PTR (void, Resource,
+ (Resource->Length - ItemCount));
+ *(UINT8 **) Destination = ACPI_CAST_PTR (UINT8, Target);
+
+ /* Copy the ResourceSource string */
+
+ Source = ACPI_ADD_PTR (void, Aml, (ACPI_GET16 (Source) + Info->Value));
+ AcpiRsMoveData (Target, Source, ItemCount, Info->Opcode);
+ break;
+
+
case ACPI_RSC_SET8:
ACPI_MEMSET (Destination, Info->AmlOffset, Info->Value);
@@ -243,11 +385,12 @@ AcpiRsConvertAmlToResource (
* Optional ResourceSource (Index and String). This is the more
* complicated case used by the Interrupt() macro
*/
- Target = ACPI_ADD_PTR (char, Resource, Info->AmlOffset + (ItemCount * 4));
+ Target = ACPI_ADD_PTR (char, Resource,
+ Info->AmlOffset + (ItemCount * 4));
Resource->Length +=
- AcpiRsGetResourceSource (AmlResourceLength,
- (ACPI_RS_LENGTH) (((ItemCount - 1) * sizeof (UINT32)) + Info->Value),
+ AcpiRsGetResourceSource (AmlResourceLength, (ACPI_RS_LENGTH)
+ (((ItemCount - 1) * sizeof (UINT32)) + Info->Value),
Destination, Aml, Target);
break;
@@ -356,6 +499,7 @@ AcpiRsConvertResourceToAml (
{
void *Source = NULL;
void *Destination;
+ char *Target;
ACPI_RSDESC_SIZE AmlLength = 0;
UINT8 Count;
UINT16 Temp16 = 0;
@@ -365,6 +509,11 @@ AcpiRsConvertResourceToAml (
ACPI_FUNCTION_TRACE (RsConvertResourceToAml);
+ if (!Info)
+ {
+ return_ACPI_STATUS (AE_BAD_PARAMETER);
+ }
+
/*
* First table entry must be ACPI_RSC_INITxxx and must contain the
* table length (# of table entries)
@@ -420,6 +569,15 @@ AcpiRsConvertResourceToAml (
break;
+ case ACPI_RSC_3BITFLAG:
+ /*
+ * Mask and shift the flag bits
+ */
+ ACPI_SET8 (Destination) |= (UINT8)
+ ((ACPI_GET8 (Source) & 0x07) << Info->Value);
+ break;
+
+
case ACPI_RSC_COUNT:
ItemCount = ACPI_GET8 (Source);
@@ -437,6 +595,68 @@ AcpiRsConvertResourceToAml (
break;
+ case ACPI_RSC_COUNT_GPIO_PIN:
+
+ ItemCount = ACPI_GET16 (Source);
+ ACPI_SET16 (Destination) = (UINT16) AmlLength;
+
+ AmlLength = (UINT16) (AmlLength + ItemCount * 2);
+ Target = ACPI_ADD_PTR (void, Aml, Info->Value);
+ ACPI_SET16 (Target) = (UINT16) AmlLength;
+ AcpiRsSetResourceLength (AmlLength, Aml);
+ break;
+
+
+ case ACPI_RSC_COUNT_GPIO_VEN:
+
+ ItemCount = ACPI_GET16 (Source);
+ ACPI_SET16 (Destination) = (UINT16) ItemCount;
+
+ AmlLength = (UINT16) (AmlLength + (Info->Value * ItemCount));
+ AcpiRsSetResourceLength (AmlLength, Aml);
+ break;
+
+
+ case ACPI_RSC_COUNT_GPIO_RES:
+
+ /* Set resource source string length */
+
+ ItemCount = ACPI_GET16 (Source);
+ ACPI_SET16 (Destination) = (UINT16) AmlLength;
+
+ /* Compute offset for the Vendor Data */
+
+ AmlLength = (UINT16) (AmlLength + ItemCount);
+ Target = ACPI_ADD_PTR (void, Aml, Info->Value);
+
+ /* Set vendor offset only if there is vendor data */
+
+ if (Resource->Data.Gpio.VendorLength)
+ {
+ ACPI_SET16 (Target) = (UINT16) AmlLength;
+ }
+
+ AcpiRsSetResourceLength (AmlLength, Aml);
+ break;
+
+
+ case ACPI_RSC_COUNT_SERIAL_VEN:
+
+ ItemCount = ACPI_GET16 (Source);
+ ACPI_SET16 (Destination) = ItemCount + Info->Value;
+ AmlLength = (UINT16) (AmlLength + ItemCount);
+ AcpiRsSetResourceLength (AmlLength, Aml);
+ break;
+
+
+ case ACPI_RSC_COUNT_SERIAL_RES:
+
+ ItemCount = ACPI_GET16 (Source);
+ AmlLength = (UINT16) (AmlLength + ItemCount);
+ AcpiRsSetResourceLength (AmlLength, Aml);
+ break;
+
+
case ACPI_RSC_LENGTH:
AcpiRsSetResourceLength (Info->Value, Aml);
@@ -456,6 +676,44 @@ AcpiRsConvertResourceToAml (
break;
+ case ACPI_RSC_MOVE_GPIO_PIN:
+
+ Destination = (char *) ACPI_ADD_PTR (void, Aml,
+ ACPI_GET16 (Destination));
+ Source = * (UINT16 **) Source;
+ AcpiRsMoveData (Destination, Source, ItemCount, Info->Opcode);
+ break;
+
+
+ case ACPI_RSC_MOVE_GPIO_RES:
+
+ /* Used for both ResourceSource string and VendorData */
+
+ Destination = (char *) ACPI_ADD_PTR (void, Aml,
+ ACPI_GET16 (Destination));
+ Source = * (UINT8 **) Source;
+ AcpiRsMoveData (Destination, Source, ItemCount, Info->Opcode);
+ break;
+
+
+ case ACPI_RSC_MOVE_SERIAL_VEN:
+
+ Destination = (char *) ACPI_ADD_PTR (void, Aml,
+ (AmlLength - ItemCount));
+ Source = * (UINT8 **) Source;
+ AcpiRsMoveData (Destination, Source, ItemCount, Info->Opcode);
+ break;
+
+
+ case ACPI_RSC_MOVE_SERIAL_RES:
+
+ Destination = (char *) ACPI_ADD_PTR (void, Aml,
+ (AmlLength - ItemCount));
+ Source = * (UINT8 **) Source;
+ AcpiRsMoveData (Destination, Source, ItemCount, Info->Opcode);
+ break;
+
+
case ACPI_RSC_ADDRESS:
/* Set the Resource Type, General Flags, and Type-Specific Flags */
diff --git a/usr/src/uts/intel/io/acpica/resources/rsserial.c b/usr/src/uts/intel/io/acpica/resources/rsserial.c
new file mode 100644
index 0000000000..3a7784e96c
--- /dev/null
+++ b/usr/src/uts/intel/io/acpica/resources/rsserial.c
@@ -0,0 +1,425 @@
+/*******************************************************************************
+ *
+ * Module Name: rsserial - GPIO/SerialBus resource descriptors
+ *
+ ******************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2012, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * substantially similar to the "NO WARRANTY" disclaimer below
+ * ("Disclaimer") and any redistribution must be conditioned upon
+ * including a substantially similar Disclaimer requirement for further
+ * binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ * of any contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#define __RSIRQ_C__
+
+#include "acpi.h"
+#include "accommon.h"
+#include "acresrc.h"
+
+#define _COMPONENT ACPI_RESOURCES
+ ACPI_MODULE_NAME ("rsserial")
+
+
+/*******************************************************************************
+ *
+ * AcpiRsConvertGpio
+ *
+ ******************************************************************************/
+
+ACPI_RSCONVERT_INFO AcpiRsConvertGpio[17] =
+{
+ {ACPI_RSC_INITGET, ACPI_RESOURCE_TYPE_GPIO,
+ ACPI_RS_SIZE (ACPI_RESOURCE_GPIO),
+ ACPI_RSC_TABLE_SIZE (AcpiRsConvertGpio)},
+
+ {ACPI_RSC_INITSET, ACPI_RESOURCE_NAME_GPIO,
+ sizeof (AML_RESOURCE_GPIO),
+ 0},
+
+ /*
+ * These fields are contiguous in both the source and destination:
+ * RevisionId
+ * ConnectionType
+ */
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.Gpio.RevisionId),
+ AML_OFFSET (Gpio.RevisionId),
+ 2},
+
+ {ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET (Data.Gpio.ProducerConsumer),
+ AML_OFFSET (Gpio.Flags),
+ 0},
+
+ {ACPI_RSC_2BITFLAG, ACPI_RS_OFFSET (Data.Gpio.Sharable),
+ AML_OFFSET (Gpio.IntFlags),
+ 3},
+
+ {ACPI_RSC_2BITFLAG, ACPI_RS_OFFSET (Data.Gpio.IoRestriction),
+ AML_OFFSET (Gpio.IntFlags),
+ 0},
+
+ {ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET (Data.Gpio.Triggering),
+ AML_OFFSET (Gpio.IntFlags),
+ 0},
+
+ {ACPI_RSC_2BITFLAG, ACPI_RS_OFFSET (Data.Gpio.Polarity),
+ AML_OFFSET (Gpio.IntFlags),
+ 1},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.Gpio.PinConfig),
+ AML_OFFSET (Gpio.PinConfig),
+ 1},
+
+ /*
+ * These fields are contiguous in both the source and destination:
+ * DriveStrength
+ * DebounceTimeout
+ */
+ {ACPI_RSC_MOVE16, ACPI_RS_OFFSET (Data.Gpio.DriveStrength),
+ AML_OFFSET (Gpio.DriveStrength),
+ 2},
+
+ /* Pin Table */
+
+ {ACPI_RSC_COUNT_GPIO_PIN, ACPI_RS_OFFSET (Data.Gpio.PinTableLength),
+ AML_OFFSET (Gpio.PinTableOffset),
+ AML_OFFSET (Gpio.ResSourceOffset)},
+
+ {ACPI_RSC_MOVE_GPIO_PIN, ACPI_RS_OFFSET (Data.Gpio.PinTable),
+ AML_OFFSET (Gpio.PinTableOffset),
+ 0},
+
+ /* Resource Source */
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.Gpio.ResourceSource.Index),
+ AML_OFFSET (Gpio.ResSourceIndex),
+ 1},
+
+ {ACPI_RSC_COUNT_GPIO_RES, ACPI_RS_OFFSET (Data.Gpio.ResourceSource.StringLength),
+ AML_OFFSET (Gpio.ResSourceOffset),
+ AML_OFFSET (Gpio.VendorOffset)},
+
+ {ACPI_RSC_MOVE_GPIO_RES, ACPI_RS_OFFSET (Data.Gpio.ResourceSource.StringPtr),
+ AML_OFFSET (Gpio.ResSourceOffset),
+ 0},
+
+ /* Vendor Data */
+
+ {ACPI_RSC_COUNT_GPIO_VEN, ACPI_RS_OFFSET (Data.Gpio.VendorLength),
+ AML_OFFSET (Gpio.VendorLength),
+ 1},
+
+ {ACPI_RSC_MOVE_GPIO_RES, ACPI_RS_OFFSET (Data.Gpio.VendorData),
+ AML_OFFSET (Gpio.VendorOffset),
+ 0},
+};
+
+
+/*******************************************************************************
+ *
+ * AcpiRsConvertI2cSerialBus
+ *
+ ******************************************************************************/
+
+ACPI_RSCONVERT_INFO AcpiRsConvertI2cSerialBus[16] =
+{
+ {ACPI_RSC_INITGET, ACPI_RESOURCE_TYPE_SERIAL_BUS,
+ ACPI_RS_SIZE (ACPI_RESOURCE_I2C_SERIALBUS),
+ ACPI_RSC_TABLE_SIZE (AcpiRsConvertI2cSerialBus)},
+
+ {ACPI_RSC_INITSET, ACPI_RESOURCE_NAME_SERIAL_BUS,
+ sizeof (AML_RESOURCE_I2C_SERIALBUS),
+ 0},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.CommonSerialBus.RevisionId),
+ AML_OFFSET (CommonSerialBus.RevisionId),
+ 1},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.CommonSerialBus.Type),
+ AML_OFFSET (CommonSerialBus.Type),
+ 1},
+
+ {ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET (Data.CommonSerialBus.SlaveMode),
+ AML_OFFSET (CommonSerialBus.Flags),
+ 0},
+
+ {ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET (Data.CommonSerialBus.ProducerConsumer),
+ AML_OFFSET (CommonSerialBus.Flags),
+ 1},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.CommonSerialBus.TypeRevisionId),
+ AML_OFFSET (CommonSerialBus.TypeRevisionId),
+ 1},
+
+ {ACPI_RSC_MOVE16, ACPI_RS_OFFSET (Data.CommonSerialBus.TypeDataLength),
+ AML_OFFSET (CommonSerialBus.TypeDataLength),
+ 1},
+
+ /* Vendor data */
+
+ {ACPI_RSC_COUNT_SERIAL_VEN, ACPI_RS_OFFSET (Data.CommonSerialBus.VendorLength),
+ AML_OFFSET (CommonSerialBus.TypeDataLength),
+ AML_RESOURCE_I2C_MIN_DATA_LEN},
+
+ {ACPI_RSC_MOVE_SERIAL_VEN, ACPI_RS_OFFSET (Data.CommonSerialBus.VendorData),
+ 0,
+ sizeof (AML_RESOURCE_I2C_SERIALBUS)},
+
+ /* Resource Source */
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.CommonSerialBus.ResourceSource.Index),
+ AML_OFFSET (CommonSerialBus.ResSourceIndex),
+ 1},
+
+ {ACPI_RSC_COUNT_SERIAL_RES, ACPI_RS_OFFSET (Data.CommonSerialBus.ResourceSource.StringLength),
+ AML_OFFSET (CommonSerialBus.TypeDataLength),
+ sizeof (AML_RESOURCE_COMMON_SERIALBUS)},
+
+ {ACPI_RSC_MOVE_SERIAL_RES, ACPI_RS_OFFSET (Data.CommonSerialBus.ResourceSource.StringPtr),
+ AML_OFFSET (CommonSerialBus.TypeDataLength),
+ sizeof (AML_RESOURCE_COMMON_SERIALBUS)},
+
+ /* I2C bus type specific */
+
+ {ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET (Data.I2cSerialBus.AccessMode),
+ AML_OFFSET (I2cSerialBus.TypeSpecificFlags),
+ 0},
+
+ {ACPI_RSC_MOVE32, ACPI_RS_OFFSET (Data.I2cSerialBus.ConnectionSpeed),
+ AML_OFFSET (I2cSerialBus.ConnectionSpeed),
+ 1},
+
+ {ACPI_RSC_MOVE16, ACPI_RS_OFFSET (Data.I2cSerialBus.SlaveAddress),
+ AML_OFFSET (I2cSerialBus.SlaveAddress),
+ 1},
+};
+
+
+/*******************************************************************************
+ *
+ * AcpiRsConvertSpiSerialBus
+ *
+ ******************************************************************************/
+
+ACPI_RSCONVERT_INFO AcpiRsConvertSpiSerialBus[20] =
+{
+ {ACPI_RSC_INITGET, ACPI_RESOURCE_TYPE_SERIAL_BUS,
+ ACPI_RS_SIZE (ACPI_RESOURCE_SPI_SERIALBUS),
+ ACPI_RSC_TABLE_SIZE (AcpiRsConvertSpiSerialBus)},
+
+ {ACPI_RSC_INITSET, ACPI_RESOURCE_NAME_SERIAL_BUS,
+ sizeof (AML_RESOURCE_SPI_SERIALBUS),
+ 0},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.CommonSerialBus.RevisionId),
+ AML_OFFSET (CommonSerialBus.RevisionId),
+ 1},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.CommonSerialBus.Type),
+ AML_OFFSET (CommonSerialBus.Type),
+ 1},
+
+ {ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET (Data.CommonSerialBus.SlaveMode),
+ AML_OFFSET (CommonSerialBus.Flags),
+ 0},
+
+ {ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET (Data.CommonSerialBus.ProducerConsumer),
+ AML_OFFSET (CommonSerialBus.Flags),
+ 1},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.CommonSerialBus.TypeRevisionId),
+ AML_OFFSET (CommonSerialBus.TypeRevisionId),
+ 1},
+
+ {ACPI_RSC_MOVE16, ACPI_RS_OFFSET (Data.CommonSerialBus.TypeDataLength),
+ AML_OFFSET (CommonSerialBus.TypeDataLength),
+ 1},
+
+ /* Vendor data */
+
+ {ACPI_RSC_COUNT_SERIAL_VEN, ACPI_RS_OFFSET (Data.CommonSerialBus.VendorLength),
+ AML_OFFSET (CommonSerialBus.TypeDataLength),
+ AML_RESOURCE_SPI_MIN_DATA_LEN},
+
+ {ACPI_RSC_MOVE_SERIAL_VEN, ACPI_RS_OFFSET (Data.CommonSerialBus.VendorData),
+ 0,
+ sizeof (AML_RESOURCE_SPI_SERIALBUS)},
+
+ /* Resource Source */
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.CommonSerialBus.ResourceSource.Index),
+ AML_OFFSET (CommonSerialBus.ResSourceIndex),
+ 1},
+
+ {ACPI_RSC_COUNT_SERIAL_RES, ACPI_RS_OFFSET (Data.CommonSerialBus.ResourceSource.StringLength),
+ AML_OFFSET (CommonSerialBus.TypeDataLength),
+ sizeof (AML_RESOURCE_COMMON_SERIALBUS)},
+
+ {ACPI_RSC_MOVE_SERIAL_RES, ACPI_RS_OFFSET (Data.CommonSerialBus.ResourceSource.StringPtr),
+ AML_OFFSET (CommonSerialBus.TypeDataLength),
+ sizeof (AML_RESOURCE_COMMON_SERIALBUS)},
+
+ /* Spi bus type specific */
+
+ {ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET (Data.SpiSerialBus.WireMode),
+ AML_OFFSET (SpiSerialBus.TypeSpecificFlags),
+ 0},
+
+ {ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET (Data.SpiSerialBus.DevicePolarity),
+ AML_OFFSET (SpiSerialBus.TypeSpecificFlags),
+ 1},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.SpiSerialBus.DataBitLength),
+ AML_OFFSET (SpiSerialBus.DataBitLength),
+ 1},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.SpiSerialBus.ClockPhase),
+ AML_OFFSET (SpiSerialBus.ClockPhase),
+ 1},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.SpiSerialBus.ClockPolarity),
+ AML_OFFSET (SpiSerialBus.ClockPolarity),
+ 1},
+
+ {ACPI_RSC_MOVE16, ACPI_RS_OFFSET (Data.SpiSerialBus.DeviceSelection),
+ AML_OFFSET (SpiSerialBus.DeviceSelection),
+ 1},
+
+ {ACPI_RSC_MOVE32, ACPI_RS_OFFSET (Data.SpiSerialBus.ConnectionSpeed),
+ AML_OFFSET (SpiSerialBus.ConnectionSpeed),
+ 1},
+};
+
+
+/*******************************************************************************
+ *
+ * AcpiRsConvertUartSerialBus
+ *
+ ******************************************************************************/
+
+ACPI_RSCONVERT_INFO AcpiRsConvertUartSerialBus[22] =
+{
+ {ACPI_RSC_INITGET, ACPI_RESOURCE_TYPE_SERIAL_BUS,
+ ACPI_RS_SIZE (ACPI_RESOURCE_UART_SERIALBUS),
+ ACPI_RSC_TABLE_SIZE (AcpiRsConvertUartSerialBus)},
+
+ {ACPI_RSC_INITSET, ACPI_RESOURCE_NAME_SERIAL_BUS,
+ sizeof (AML_RESOURCE_UART_SERIALBUS),
+ 0},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.CommonSerialBus.RevisionId),
+ AML_OFFSET (CommonSerialBus.RevisionId),
+ 1},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.CommonSerialBus.Type),
+ AML_OFFSET (CommonSerialBus.Type),
+ 1},
+
+ {ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET (Data.CommonSerialBus.SlaveMode),
+ AML_OFFSET (CommonSerialBus.Flags),
+ 0},
+
+ {ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET (Data.CommonSerialBus.ProducerConsumer),
+ AML_OFFSET (CommonSerialBus.Flags),
+ 1},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.CommonSerialBus.TypeRevisionId),
+ AML_OFFSET (CommonSerialBus.TypeRevisionId),
+ 1},
+
+ {ACPI_RSC_MOVE16, ACPI_RS_OFFSET (Data.CommonSerialBus.TypeDataLength),
+ AML_OFFSET (CommonSerialBus.TypeDataLength),
+ 1},
+
+ /* Vendor data */
+
+ {ACPI_RSC_COUNT_SERIAL_VEN, ACPI_RS_OFFSET (Data.CommonSerialBus.VendorLength),
+ AML_OFFSET (CommonSerialBus.TypeDataLength),
+ AML_RESOURCE_UART_MIN_DATA_LEN},
+
+ {ACPI_RSC_MOVE_SERIAL_VEN, ACPI_RS_OFFSET (Data.CommonSerialBus.VendorData),
+ 0,
+ sizeof (AML_RESOURCE_UART_SERIALBUS)},
+
+ /* Resource Source */
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.CommonSerialBus.ResourceSource.Index),
+ AML_OFFSET (CommonSerialBus.ResSourceIndex),
+ 1},
+
+ {ACPI_RSC_COUNT_SERIAL_RES, ACPI_RS_OFFSET (Data.CommonSerialBus.ResourceSource.StringLength),
+ AML_OFFSET (CommonSerialBus.TypeDataLength),
+ sizeof (AML_RESOURCE_COMMON_SERIALBUS)},
+
+ {ACPI_RSC_MOVE_SERIAL_RES, ACPI_RS_OFFSET (Data.CommonSerialBus.ResourceSource.StringPtr),
+ AML_OFFSET (CommonSerialBus.TypeDataLength),
+ sizeof (AML_RESOURCE_COMMON_SERIALBUS)},
+
+ /* Uart bus type specific */
+
+ {ACPI_RSC_2BITFLAG, ACPI_RS_OFFSET (Data.UartSerialBus.FlowControl),
+ AML_OFFSET (UartSerialBus.TypeSpecificFlags),
+ 0},
+
+ {ACPI_RSC_2BITFLAG, ACPI_RS_OFFSET (Data.UartSerialBus.StopBits),
+ AML_OFFSET (UartSerialBus.TypeSpecificFlags),
+ 2},
+
+ {ACPI_RSC_3BITFLAG, ACPI_RS_OFFSET (Data.UartSerialBus.DataBits),
+ AML_OFFSET (UartSerialBus.TypeSpecificFlags),
+ 4},
+
+ {ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET (Data.UartSerialBus.Endian),
+ AML_OFFSET (UartSerialBus.TypeSpecificFlags),
+ 7},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.UartSerialBus.Parity),
+ AML_OFFSET (UartSerialBus.Parity),
+ 1},
+
+ {ACPI_RSC_MOVE8, ACPI_RS_OFFSET (Data.UartSerialBus.LinesEnabled),
+ AML_OFFSET (UartSerialBus.LinesEnabled),
+ 1},
+
+ {ACPI_RSC_MOVE16, ACPI_RS_OFFSET (Data.UartSerialBus.RxFifoSize),
+ AML_OFFSET (UartSerialBus.RxFifoSize),
+ 1},
+
+ {ACPI_RSC_MOVE16, ACPI_RS_OFFSET (Data.UartSerialBus.TxFifoSize),
+ AML_OFFSET (UartSerialBus.TxFifoSize),
+ 1},
+
+ {ACPI_RSC_MOVE32, ACPI_RS_OFFSET (Data.UartSerialBus.DefaultBaudRate),
+ AML_OFFSET (UartSerialBus.DefaultBaudRate),
+ 1},
+};
diff --git a/usr/src/uts/intel/io/acpica/resources/rsutils.c b/usr/src/uts/intel/io/acpica/resources/rsutils.c
index 400bbcd91f..062678651e 100644
--- a/usr/src/uts/intel/io/acpica/resources/rsutils.c
+++ b/usr/src/uts/intel/io/acpica/resources/rsutils.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -173,6 +173,9 @@ AcpiRsMoveData (
* since there are no alignment or endian issues
*/
case ACPI_RSC_MOVE8:
+ case ACPI_RSC_MOVE_GPIO_RES:
+ case ACPI_RSC_MOVE_SERIAL_VEN:
+ case ACPI_RSC_MOVE_SERIAL_RES:
ACPI_MEMCPY (Destination, Source, ItemCount);
return;
@@ -182,6 +185,7 @@ AcpiRsMoveData (
* misaligned memory transfers
*/
case ACPI_RSC_MOVE16:
+ case ACPI_RSC_MOVE_GPIO_PIN:
ACPI_MOVE_16_TO_16 (&ACPI_CAST_PTR (UINT16, Destination)[i],
&ACPI_CAST_PTR (UINT16, Source)[i]);
break;
@@ -653,6 +657,61 @@ AcpiRsGetPrsMethodData (
/*******************************************************************************
*
+ * FUNCTION: AcpiRsGetAeiMethodData
+ *
+ * PARAMETERS: Node - Device node
+ * RetBuffer - Pointer to a buffer structure for the
+ * results
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: This function is called to get the _AEI value of an object
+ * contained in an object specified by the handle passed in
+ *
+ * If the function fails an appropriate status will be returned
+ * and the contents of the callers buffer is undefined.
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiRsGetAeiMethodData (
+ ACPI_NAMESPACE_NODE *Node,
+ ACPI_BUFFER *RetBuffer)
+{
+ ACPI_OPERAND_OBJECT *ObjDesc;
+ ACPI_STATUS Status;
+
+
+ ACPI_FUNCTION_TRACE (RsGetAeiMethodData);
+
+
+ /* Parameters guaranteed valid by caller */
+
+ /* Execute the method, no parameters */
+
+ Status = AcpiUtEvaluateObject (Node, METHOD_NAME__AEI,
+ ACPI_BTYPE_BUFFER, &ObjDesc);
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+
+ /*
+ * Make the call to create a resource linked list from the
+ * byte stream buffer that comes back from the _CRS method
+ * execution.
+ */
+ Status = AcpiRsCreateResourceList (ObjDesc, RetBuffer);
+
+ /* On exit, we must delete the object returned by evaluateObject */
+
+ AcpiUtRemoveReference (ObjDesc);
+ return_ACPI_STATUS (Status);
+}
+
+
+/*******************************************************************************
+ *
* FUNCTION: AcpiRsGetMethodData
*
* PARAMETERS: Handle - Handle to the containing object
diff --git a/usr/src/uts/intel/io/acpica/resources/rsxface.c b/usr/src/uts/intel/io/acpica/resources/rsxface.c
index 0aea206b73..d8c3938b31 100644
--- a/usr/src/uts/intel/io/acpica/resources/rsxface.c
+++ b/usr/src/uts/intel/io/acpica/resources/rsxface.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -351,6 +351,52 @@ AcpiSetCurrentResources (
ACPI_EXPORT_SYMBOL (AcpiSetCurrentResources)
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiGetEventResources
+ *
+ * PARAMETERS: DeviceHandle - Handle to the device object for the
+ * device we are getting resources
+ * InBuffer - Pointer to a buffer containing the
+ * resources to be set for the device
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: This function is called to get the event resources for a
+ * specific device. The caller must first acquire a handle for
+ * the desired device. The resource data is passed to the routine
+ * the buffer pointed to by the InBuffer variable. Uses the
+ * _AEI method.
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiGetEventResources (
+ ACPI_HANDLE DeviceHandle,
+ ACPI_BUFFER *RetBuffer)
+{
+ ACPI_STATUS Status;
+ ACPI_NAMESPACE_NODE *Node;
+
+
+ ACPI_FUNCTION_TRACE (AcpiGetEventResources);
+
+
+ /* Validate parameters then dispatch to internal routine */
+
+ Status = AcpiRsValidateParameters (DeviceHandle, RetBuffer, &Node);
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+
+ Status = AcpiRsGetAeiMethodData (Node, RetBuffer);
+ return_ACPI_STATUS (Status);
+}
+
+ACPI_EXPORT_SYMBOL (AcpiGetEventResources)
+
+
/******************************************************************************
*
* FUNCTION: AcpiResourceToAddress64
@@ -544,8 +590,9 @@ AcpiRsMatchVendorResource (
*
* PARAMETERS: DeviceHandle - Handle to the device object for the
* device we are querying
- * Name - Method name of the resources we want
- * (METHOD_NAME__CRS or METHOD_NAME__PRS)
+ * Name - Method name of the resources we want.
+ * (METHOD_NAME__CRS, METHOD_NAME__PRS, or
+ * METHOD_NAME__AEI)
* UserFunction - Called for each resource
* Context - Passed to UserFunction
*
@@ -577,12 +624,13 @@ AcpiWalkResources (
if (!DeviceHandle || !UserFunction || !Name ||
(!ACPI_COMPARE_NAME (Name, METHOD_NAME__CRS) &&
- !ACPI_COMPARE_NAME (Name, METHOD_NAME__PRS)))
+ !ACPI_COMPARE_NAME (Name, METHOD_NAME__PRS) &&
+ !ACPI_COMPARE_NAME (Name, METHOD_NAME__AEI)))
{
return_ACPI_STATUS (AE_BAD_PARAMETER);
}
- /* Get the _CRS or _PRS resource list */
+ /* Get the _CRS/_PRS/_AEI resource list */
Buffer.Length = ACPI_ALLOCATE_LOCAL_BUFFER;
Status = AcpiRsGetMethodData (DeviceHandle, Name, &Buffer);
diff --git a/usr/src/uts/intel/io/acpica/tables/tbfadt.c b/usr/src/uts/intel/io/acpica/tables/tbfadt.c
index f8c63408b9..e877320447 100644
--- a/usr/src/uts/intel/io/acpica/tables/tbfadt.c
+++ b/usr/src/uts/intel/io/acpica/tables/tbfadt.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -77,14 +77,15 @@ AcpiTbSetupFadtRegisters (
typedef struct acpi_fadt_info
{
char *Name;
- UINT8 Address64;
- UINT8 Address32;
- UINT8 Length;
+ UINT16 Address64;
+ UINT16 Address32;
+ UINT16 Length;
UINT8 DefaultLength;
UINT8 Type;
} ACPI_FADT_INFO;
+#define ACPI_FADT_OPTIONAL 0
#define ACPI_FADT_REQUIRED 1
#define ACPI_FADT_SEPARATE_LENGTH 2
@@ -102,7 +103,7 @@ static ACPI_FADT_INFO FadtInfoTable[] =
ACPI_FADT_OFFSET (Pm1bEventBlock),
ACPI_FADT_OFFSET (Pm1EventLength),
ACPI_PM1_REGISTER_WIDTH * 2, /* Enable + Status register */
- 0},
+ ACPI_FADT_OPTIONAL},
{"Pm1aControlBlock",
ACPI_FADT_OFFSET (XPm1aControlBlock),
@@ -116,7 +117,7 @@ static ACPI_FADT_INFO FadtInfoTable[] =
ACPI_FADT_OFFSET (Pm1bControlBlock),
ACPI_FADT_OFFSET (Pm1ControlLength),
ACPI_PM1_REGISTER_WIDTH,
- 0},
+ ACPI_FADT_OPTIONAL},
{"Pm2ControlBlock",
ACPI_FADT_OFFSET (XPm2ControlBlock),
@@ -156,7 +157,7 @@ static ACPI_FADT_INFO FadtInfoTable[] =
typedef struct acpi_fadt_pm_info
{
ACPI_GENERIC_ADDRESS *Target;
- UINT8 Source;
+ UINT16 Source;
UINT8 RegisterNum;
} ACPI_FADT_PM_INFO;
@@ -280,8 +281,13 @@ AcpiTbParseFadt (
AcpiTbInstallTable ((ACPI_PHYSICAL_ADDRESS) AcpiGbl_FADT.XDsdt,
ACPI_SIG_DSDT, ACPI_TABLE_INDEX_DSDT);
- AcpiTbInstallTable ((ACPI_PHYSICAL_ADDRESS) AcpiGbl_FADT.XFacs,
- ACPI_SIG_FACS, ACPI_TABLE_INDEX_FACS);
+ /* If Hardware Reduced flag is set, there is no FACS */
+
+ if (!AcpiGbl_ReducedHardware)
+ {
+ AcpiTbInstallTable ((ACPI_PHYSICAL_ADDRESS) AcpiGbl_FADT.XFacs,
+ ACPI_SIG_FACS, ACPI_TABLE_INDEX_FACS);
+ }
}
@@ -309,13 +315,13 @@ AcpiTbCreateLocalFadt (
/*
* Check if the FADT is larger than the largest table that we expect
- * (the ACPI 2.0/3.0 version). If so, truncate the table, and issue
+ * (the ACPI 5.0 version). If so, truncate the table, and issue
* a warning.
*/
if (Length > sizeof (ACPI_TABLE_FADT))
{
ACPI_WARNING ((AE_INFO,
- "FADT (revision %u) is longer than ACPI 2.0 version, "
+ "FADT (revision %u) is longer than ACPI 5.0 version, "
"truncating length %u to %u",
Table->Revision, Length, (UINT32) sizeof (ACPI_TABLE_FADT)));
}
@@ -329,6 +335,14 @@ AcpiTbCreateLocalFadt (
ACPI_MEMCPY (&AcpiGbl_FADT, Table,
ACPI_MIN (Length, sizeof (ACPI_TABLE_FADT)));
+ /* Take a copy of the Hardware Reduced flag */
+
+ AcpiGbl_ReducedHardware = FALSE;
+ if (AcpiGbl_FADT.Flags & ACPI_FADT_HW_REDUCED)
+ {
+ AcpiGbl_ReducedHardware = TRUE;
+ }
+
/* Convert the local copy of the FADT to the common internal format */
AcpiTbConvertFadt ();
@@ -386,10 +400,6 @@ AcpiTbConvertFadt (
UINT32 i;
- /* Update the local FADT table header length */
-
- AcpiGbl_FADT.Header.Length = sizeof (ACPI_TABLE_FADT);
-
/*
* Expand the 32-bit FACS and DSDT addresses to 64-bit as necessary.
* Later code will always use the X 64-bit field.
@@ -423,6 +433,13 @@ AcpiTbConvertFadt (
}
/*
+ * Now we can update the local FADT length to the length of the
+ * current FADT version as defined by the ACPI specification.
+ * Thus, we will have a common FADT internally.
+ */
+ AcpiGbl_FADT.Header.Length = sizeof (ACPI_TABLE_FADT);
+
+ /*
* Expand the ACPI 1.0 32-bit addresses to the ACPI 2.0 64-bit "X"
* generic address structures as necessary. Later code will always use
* the 64-bit address structures.
@@ -530,6 +547,13 @@ AcpiTbValidateFadt (
AcpiGbl_FADT.XDsdt = (UINT64) AcpiGbl_FADT.Dsdt;
}
+ /* If Hardware Reduced flag is set, we are all done */
+
+ if (AcpiGbl_ReducedHardware)
+ {
+ return;
+ }
+
/* Examine all of the 64-bit extended address fields (X fields) */
for (i = 0; i < ACPI_FADT_INFO_ENTRIES; i++)
diff --git a/usr/src/uts/intel/io/acpica/tables/tbfind.c b/usr/src/uts/intel/io/acpica/tables/tbfind.c
index 3b7c420f01..d7e2440b32 100644
--- a/usr/src/uts/intel/io/acpica/tables/tbfind.c
+++ b/usr/src/uts/intel/io/acpica/tables/tbfind.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/tables/tbinstal.c b/usr/src/uts/intel/io/acpica/tables/tbinstal.c
index ab69691a1e..f100ab43f7 100644
--- a/usr/src/uts/intel/io/acpica/tables/tbinstal.c
+++ b/usr/src/uts/intel/io/acpica/tables/tbinstal.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -128,7 +128,6 @@ AcpiTbAddTable (
{
UINT32 i;
ACPI_STATUS Status = AE_OK;
- ACPI_TABLE_HEADER *OverrideTable = NULL;
ACPI_FUNCTION_TRACE (TbAddTable);
@@ -242,26 +241,10 @@ AcpiTbAddTable (
/*
* ACPI Table Override:
* Allow the host to override dynamically loaded tables.
+ * NOTE: the table is fully mapped at this point, and the mapping will
+ * be deleted by TbTableOverride if the table is actually overridden.
*/
- Status = AcpiOsTableOverride (TableDesc->Pointer, &OverrideTable);
- if (ACPI_SUCCESS (Status) && OverrideTable)
- {
- ACPI_INFO ((AE_INFO,
- "%4.4s @ 0x%p Table override, replaced with:",
- TableDesc->Pointer->Signature,
- ACPI_CAST_PTR (void, TableDesc->Address)));
-
- /* We can delete the table that was passed as a parameter */
-
- AcpiTbDeleteTable (TableDesc);
-
- /* Setup descriptor for the new table */
-
- TableDesc->Address = ACPI_PTR_TO_PHYSADDR (OverrideTable);
- TableDesc->Pointer = OverrideTable;
- TableDesc->Length = OverrideTable->Length;
- TableDesc->Flags = ACPI_TABLE_ORIGIN_OVERRIDE;
- }
+ (void) AcpiTbTableOverride (TableDesc->Pointer, TableDesc);
/* Add the table to the global root table list */
@@ -283,6 +266,98 @@ Release:
/*******************************************************************************
*
+ * FUNCTION: AcpiTbTableOverride
+ *
+ * PARAMETERS: TableHeader - Header for the original table
+ * TableDesc - Table descriptor initialized for the
+ * original table. May or may not be mapped.
+ *
+ * RETURN: Pointer to the entire new table. NULL if table not overridden.
+ * If overridden, installs the new table within the input table
+ * descriptor.
+ *
+ * DESCRIPTION: Attempt table override by calling the OSL override functions.
+ * Note: If the table is overridden, then the entire new table
+ * is mapped and returned by this function.
+ *
+ ******************************************************************************/
+
+ACPI_TABLE_HEADER *
+AcpiTbTableOverride (
+ ACPI_TABLE_HEADER *TableHeader,
+ ACPI_TABLE_DESC *TableDesc)
+{
+ ACPI_STATUS Status;
+ ACPI_TABLE_HEADER *NewTable = NULL;
+ ACPI_PHYSICAL_ADDRESS NewAddress = 0;
+ UINT32 NewTableLength = 0;
+ UINT8 NewFlags;
+ char *OverrideType;
+
+
+ /* (1) Attempt logical override (returns a logical address) */
+
+ Status = AcpiOsTableOverride (TableHeader, &NewTable);
+ if (ACPI_SUCCESS (Status) && NewTable)
+ {
+ NewAddress = ACPI_PTR_TO_PHYSADDR (NewTable);
+ NewTableLength = NewTable->Length;
+ NewFlags = ACPI_TABLE_ORIGIN_OVERRIDE;
+ OverrideType = "Logical";
+ goto FinishOverride;
+ }
+
+ /* (2) Attempt physical override (returns a physical address) */
+
+ Status = AcpiOsPhysicalTableOverride (TableHeader,
+ &NewAddress, &NewTableLength);
+ if (ACPI_SUCCESS (Status) && NewAddress && NewTableLength)
+ {
+ /* Map the entire new table */
+
+ NewTable = AcpiOsMapMemory (NewAddress, NewTableLength);
+ if (!NewTable)
+ {
+ ACPI_EXCEPTION ((AE_INFO, AE_NO_MEMORY,
+ "%4.4s %p Attempted physical table override failed",
+ TableHeader->Signature,
+ ACPI_CAST_PTR (void, TableDesc->Address)));
+ return (NULL);
+ }
+
+ OverrideType = "Physical";
+ NewFlags = ACPI_TABLE_ORIGIN_MAPPED;
+ goto FinishOverride;
+ }
+
+ return (NULL); /* There was no override */
+
+
+FinishOverride:
+
+ ACPI_INFO ((AE_INFO,
+ "%4.4s %p %s table override, new table: %p",
+ TableHeader->Signature,
+ ACPI_CAST_PTR (void, TableDesc->Address),
+ OverrideType, NewTable));
+
+ /* We can now unmap/delete the original table (if fully mapped) */
+
+ AcpiTbDeleteTable (TableDesc);
+
+ /* Setup descriptor for the new table */
+
+ TableDesc->Address = NewAddress;
+ TableDesc->Pointer = NewTable;
+ TableDesc->Length = NewTableLength;
+ TableDesc->Flags = NewFlags;
+
+ return (NewTable);
+}
+
+
+/*******************************************************************************
+ *
* FUNCTION: AcpiTbResizeRootTableList
*
* PARAMETERS: None
@@ -435,8 +510,10 @@ AcpiTbDeleteTable (
ACPI_FREE (TableDesc->Pointer);
break;
+ /* Not mapped or allocated, there is nothing we can do */
+
default:
- break;
+ return;
}
TableDesc->Pointer = NULL;
diff --git a/usr/src/uts/intel/io/acpica/tables/tbutils.c b/usr/src/uts/intel/io/acpica/tables/tbutils.c
index bff6540e07..e8b5705e96 100644
--- a/usr/src/uts/intel/io/acpica/tables/tbutils.c
+++ b/usr/src/uts/intel/io/acpica/tables/tbutils.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -50,6 +50,7 @@
#define _COMPONENT ACPI_TABLES
ACPI_MODULE_NAME ("tbutils")
+
/* Local prototypes */
static void
@@ -68,6 +69,7 @@ AcpiTbGetRootTableEntry (
UINT32 TableEntrySize);
+#if (!ACPI_REDUCED_HARDWARE)
/*******************************************************************************
*
* FUNCTION: AcpiTbInitializeFacs
@@ -88,10 +90,19 @@ AcpiTbInitializeFacs (
ACPI_STATUS Status;
+ /* If Hardware Reduced flag is set, there is no FACS */
+
+ if (AcpiGbl_ReducedHardware)
+ {
+ AcpiGbl_FACS = NULL;
+ return (AE_OK);
+ }
+
Status = AcpiGetTableByIndex (ACPI_TABLE_INDEX_FACS,
ACPI_CAST_INDIRECT_PTR (ACPI_TABLE_HEADER, &AcpiGbl_FACS));
return (Status);
}
+#endif /* !ACPI_REDUCED_HARDWARE */
/*******************************************************************************
@@ -415,7 +426,7 @@ AcpiTbCopyDsdt (
* RETURN: None
*
* DESCRIPTION: Install an ACPI table into the global data structure. The
- * table override mechanism is implemented here to allow the host
+ * table override mechanism is called to allow the host
* OS to replace any table before it is installed in the root
* table array.
*
@@ -427,11 +438,9 @@ AcpiTbInstallTable (
char *Signature,
UINT32 TableIndex)
{
- UINT8 Flags;
- ACPI_STATUS Status;
- ACPI_TABLE_HEADER *TableToInstall;
- ACPI_TABLE_HEADER *MappedTable;
- ACPI_TABLE_HEADER *OverrideTable = NULL;
+ ACPI_TABLE_HEADER *Table;
+ ACPI_TABLE_HEADER *FinalTable;
+ ACPI_TABLE_DESC *TableDesc;
if (!Address)
@@ -443,70 +452,84 @@ AcpiTbInstallTable (
/* Map just the table header */
- MappedTable = AcpiOsMapMemory (Address, sizeof (ACPI_TABLE_HEADER));
- if (!MappedTable)
+ Table = AcpiOsMapMemory (Address, sizeof (ACPI_TABLE_HEADER));
+ if (!Table)
{
+ ACPI_ERROR ((AE_INFO, "Could not map memory for table [%s] at %p",
+ Signature, ACPI_CAST_PTR (void, Address)));
return;
}
/* If a particular signature is expected (DSDT/FACS), it must match */
if (Signature &&
- !ACPI_COMPARE_NAME (MappedTable->Signature, Signature))
+ !ACPI_COMPARE_NAME (Table->Signature, Signature))
{
ACPI_ERROR ((AE_INFO,
"Invalid signature 0x%X for ACPI table, expected [%s]",
- *ACPI_CAST_PTR (UINT32, MappedTable->Signature), Signature));
+ *ACPI_CAST_PTR (UINT32, Table->Signature), Signature));
goto UnmapAndExit;
}
/*
+ * Initialize the table entry. Set the pointer to NULL, since the
+ * table is not fully mapped at this time.
+ */
+ TableDesc = &AcpiGbl_RootTableList.Tables[TableIndex];
+
+ TableDesc->Address = Address;
+ TableDesc->Pointer = NULL;
+ TableDesc->Length = Table->Length;
+ TableDesc->Flags = ACPI_TABLE_ORIGIN_MAPPED;
+ ACPI_MOVE_32_TO_32 (TableDesc->Signature.Ascii, Table->Signature);
+
+ /*
* ACPI Table Override:
*
* Before we install the table, let the host OS override it with a new
* one if desired. Any table within the RSDT/XSDT can be replaced,
* including the DSDT which is pointed to by the FADT.
+ *
+ * NOTE: If the table is overridden, then FinalTable will contain a
+ * mapped pointer to the full new table. If the table is not overridden,
+ * or if there has been a physical override, then the table will be
+ * fully mapped later (in verify table). In any case, we must
+ * unmap the header that was mapped above.
*/
- Status = AcpiOsTableOverride (MappedTable, &OverrideTable);
- if (ACPI_SUCCESS (Status) && OverrideTable)
+ FinalTable = AcpiTbTableOverride (Table, TableDesc);
+ if (!FinalTable)
{
- ACPI_INFO ((AE_INFO,
- "%4.4s @ 0x%p Table override, replaced with:",
- MappedTable->Signature, ACPI_CAST_PTR (void, Address)));
-
- AcpiGbl_RootTableList.Tables[TableIndex].Pointer = OverrideTable;
- Address = ACPI_PTR_TO_PHYSADDR (OverrideTable);
-
- TableToInstall = OverrideTable;
- Flags = ACPI_TABLE_ORIGIN_OVERRIDE;
- }
- else
- {
- TableToInstall = MappedTable;
- Flags = ACPI_TABLE_ORIGIN_MAPPED;
+ FinalTable = Table; /* There was no override */
}
- /* Initialize the table entry */
-
- AcpiGbl_RootTableList.Tables[TableIndex].Address = Address;
- AcpiGbl_RootTableList.Tables[TableIndex].Length = TableToInstall->Length;
- AcpiGbl_RootTableList.Tables[TableIndex].Flags = Flags;
-
- ACPI_MOVE_32_TO_32 (
- &(AcpiGbl_RootTableList.Tables[TableIndex].Signature),
- TableToInstall->Signature);
+ AcpiTbPrintTableHeader (TableDesc->Address, FinalTable);
- AcpiTbPrintTableHeader (Address, TableToInstall);
+ /* Set the global integer width (based upon revision of the DSDT) */
if (TableIndex == ACPI_TABLE_INDEX_DSDT)
{
- /* Global integer width is based upon revision of the DSDT */
+ AcpiUtSetIntegerWidth (FinalTable->Revision);
+ }
- AcpiUtSetIntegerWidth (TableToInstall->Revision);
+ /*
+ * If we have a physical override during this early loading of the ACPI
+ * tables, unmap the table for now. It will be mapped again later when
+ * it is actually used. This supports very early loading of ACPI tables,
+ * before virtual memory is fully initialized and running within the
+ * host OS. Note: A logical override has the ACPI_TABLE_ORIGIN_OVERRIDE
+ * flag set and will not be deleted below.
+ */
+ if (FinalTable != Table)
+ {
+ AcpiTbDeleteTable (TableDesc);
}
+
UnmapAndExit:
- AcpiOsUnmapMemory (MappedTable, sizeof (ACPI_TABLE_HEADER));
+
+ /* Always unmap the table header that we mapped above */
+
+ AcpiOsUnmapMemory (Table, sizeof (ACPI_TABLE_HEADER));
}
diff --git a/usr/src/uts/intel/io/acpica/tables/tbxface.c b/usr/src/uts/intel/io/acpica/tables/tbxface.c
index 465bf95c64..58dae6935d 100644
--- a/usr/src/uts/intel/io/acpica/tables/tbxface.c
+++ b/usr/src/uts/intel/io/acpica/tables/tbxface.c
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/tables/tbxfroot.c b/usr/src/uts/intel/io/acpica/tables/tbxfroot.c
index e447108ebe..17e52ef7f4 100644
--- a/usr/src/uts/intel/io/acpica/tables/tbxfroot.c
+++ b/usr/src/uts/intel/io/acpica/tables/tbxfroot.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/utilities/utaddress.c b/usr/src/uts/intel/io/acpica/utilities/utaddress.c
new file mode 100644
index 0000000000..0b357b681b
--- /dev/null
+++ b/usr/src/uts/intel/io/acpica/utilities/utaddress.c
@@ -0,0 +1,322 @@
+/******************************************************************************
+ *
+ * Module Name: utaddress - OpRegion address range check
+ *
+ *****************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2012, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * substantially similar to the "NO WARRANTY" disclaimer below
+ * ("Disclaimer") and any redistribution must be conditioned upon
+ * including a substantially similar Disclaimer requirement for further
+ * binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ * of any contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#define __UTADDRESS_C__
+
+#include "acpi.h"
+#include "accommon.h"
+#include "acnamesp.h"
+
+
+#define _COMPONENT ACPI_UTILITIES
+ ACPI_MODULE_NAME ("utaddress")
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiUtAddAddressRange
+ *
+ * PARAMETERS: SpaceId - Address space ID
+ * Address - OpRegion start address
+ * Length - OpRegion length
+ * RegionNode - OpRegion namespace node
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Add the Operation Region address range to the global list.
+ * The only supported Space IDs are Memory and I/O. Called when
+ * the OpRegion address/length operands are fully evaluated.
+ *
+ * MUTEX: Locks the namespace
+ *
+ * NOTE: Because this interface is only called when an OpRegion argument
+ * list is evaluated, there cannot be any duplicate RegionNodes.
+ * Duplicate Address/Length values are allowed, however, so that multiple
+ * address conflicts can be detected.
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiUtAddAddressRange (
+ ACPI_ADR_SPACE_TYPE SpaceId,
+ ACPI_PHYSICAL_ADDRESS Address,
+ UINT32 Length,
+ ACPI_NAMESPACE_NODE *RegionNode)
+{
+ ACPI_ADDRESS_RANGE *RangeInfo;
+ ACPI_STATUS Status;
+
+
+ ACPI_FUNCTION_TRACE (UtAddAddressRange);
+
+
+ if ((SpaceId != ACPI_ADR_SPACE_SYSTEM_MEMORY) &&
+ (SpaceId != ACPI_ADR_SPACE_SYSTEM_IO))
+ {
+ return_ACPI_STATUS (AE_OK);
+ }
+
+ /* Allocate/init a new info block, add it to the appropriate list */
+
+ RangeInfo = ACPI_ALLOCATE (sizeof (ACPI_ADDRESS_RANGE));
+ if (!RangeInfo)
+ {
+ return_ACPI_STATUS (AE_NO_MEMORY);
+ }
+
+ RangeInfo->StartAddress = Address;
+ RangeInfo->EndAddress = (Address + Length - 1);
+ RangeInfo->RegionNode = RegionNode;
+
+ Status = AcpiUtAcquireMutex (ACPI_MTX_NAMESPACE);
+ if (ACPI_FAILURE (Status))
+ {
+ ACPI_FREE (RangeInfo);
+ return_ACPI_STATUS (Status);
+ }
+
+ RangeInfo->Next = AcpiGbl_AddressRangeList[SpaceId];
+ AcpiGbl_AddressRangeList[SpaceId] = RangeInfo;
+
+ ACPI_DEBUG_PRINT ((ACPI_DB_NAMES,
+ "\nAdded [%4.4s] address range: 0x%p-0x%p\n",
+ AcpiUtGetNodeName (RangeInfo->RegionNode),
+ ACPI_CAST_PTR (void, Address),
+ ACPI_CAST_PTR (void, RangeInfo->EndAddress)));
+
+ (void) AcpiUtReleaseMutex (ACPI_MTX_NAMESPACE);
+ return_ACPI_STATUS (AE_OK);
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiUtRemoveAddressRange
+ *
+ * PARAMETERS: SpaceId - Address space ID
+ * RegionNode - OpRegion namespace node
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Remove the Operation Region from the global list. The only
+ * supported Space IDs are Memory and I/O. Called when an
+ * OpRegion is deleted.
+ *
+ * MUTEX: Assumes the namespace is locked
+ *
+ ******************************************************************************/
+
+void
+AcpiUtRemoveAddressRange (
+ ACPI_ADR_SPACE_TYPE SpaceId,
+ ACPI_NAMESPACE_NODE *RegionNode)
+{
+ ACPI_ADDRESS_RANGE *RangeInfo;
+ ACPI_ADDRESS_RANGE *Prev;
+
+
+ ACPI_FUNCTION_TRACE (UtRemoveAddressRange);
+
+
+ if ((SpaceId != ACPI_ADR_SPACE_SYSTEM_MEMORY) &&
+ (SpaceId != ACPI_ADR_SPACE_SYSTEM_IO))
+ {
+ return_VOID;
+ }
+
+ /* Get the appropriate list head and check the list */
+
+ RangeInfo = Prev = AcpiGbl_AddressRangeList[SpaceId];
+ while (RangeInfo)
+ {
+ if (RangeInfo->RegionNode == RegionNode)
+ {
+ if (RangeInfo == Prev) /* Found at list head */
+ {
+ AcpiGbl_AddressRangeList[SpaceId] = RangeInfo->Next;
+ }
+ else
+ {
+ Prev->Next = RangeInfo->Next;
+ }
+
+ ACPI_DEBUG_PRINT ((ACPI_DB_NAMES,
+ "\nRemoved [%4.4s] address range: 0x%p-0x%p\n",
+ AcpiUtGetNodeName (RangeInfo->RegionNode),
+ ACPI_CAST_PTR (void, RangeInfo->StartAddress),
+ ACPI_CAST_PTR (void, RangeInfo->EndAddress)));
+
+ ACPI_FREE (RangeInfo);
+ return_VOID;
+ }
+
+ Prev = RangeInfo;
+ RangeInfo = RangeInfo->Next;
+ }
+
+ return_VOID;
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiUtCheckAddressRange
+ *
+ * PARAMETERS: SpaceId - Address space ID
+ * Address - Start address
+ * Length - Length of address range
+ * Warn - TRUE if warning on overlap desired
+ *
+ * RETURN: Count of the number of conflicts detected. Zero is always
+ * returned for Space IDs other than Memory or I/O.
+ *
+ * DESCRIPTION: Check if the input address range overlaps any of the
+ * ASL operation region address ranges. The only supported
+ * Space IDs are Memory and I/O.
+ *
+ * MUTEX: Assumes the namespace is locked.
+ *
+ ******************************************************************************/
+
+UINT32
+AcpiUtCheckAddressRange (
+ ACPI_ADR_SPACE_TYPE SpaceId,
+ ACPI_PHYSICAL_ADDRESS Address,
+ UINT32 Length,
+ BOOLEAN Warn)
+{
+ ACPI_ADDRESS_RANGE *RangeInfo;
+ ACPI_PHYSICAL_ADDRESS EndAddress;
+ char *Pathname;
+ UINT32 OverlapCount = 0;
+
+
+ ACPI_FUNCTION_TRACE (UtCheckAddressRange);
+
+
+ if ((SpaceId != ACPI_ADR_SPACE_SYSTEM_MEMORY) &&
+ (SpaceId != ACPI_ADR_SPACE_SYSTEM_IO))
+ {
+ return_UINT32 (0);
+ }
+
+ RangeInfo = AcpiGbl_AddressRangeList[SpaceId];
+ EndAddress = Address + Length - 1;
+
+ /* Check entire list for all possible conflicts */
+
+ while (RangeInfo)
+ {
+ /*
+ * Check if the requested Address/Length overlaps this AddressRange.
+ * Four cases to consider:
+ *
+ * 1) Input address/length is contained completely in the address range
+ * 2) Input address/length overlaps range at the range start
+ * 3) Input address/length overlaps range at the range end
+ * 4) Input address/length completely encompasses the range
+ */
+ if ((Address <= RangeInfo->EndAddress) &&
+ (EndAddress >= RangeInfo->StartAddress))
+ {
+ /* Found an address range overlap */
+
+ OverlapCount++;
+ if (Warn) /* Optional warning message */
+ {
+ Pathname = AcpiNsGetExternalPathname (RangeInfo->RegionNode);
+
+ ACPI_WARNING ((AE_INFO,
+ "0x%p-0x%p %s conflicts with Region %s %d",
+ ACPI_CAST_PTR (void, Address),
+ ACPI_CAST_PTR (void, EndAddress),
+ AcpiUtGetRegionName (SpaceId), Pathname, OverlapCount));
+ ACPI_FREE (Pathname);
+ }
+ }
+
+ RangeInfo = RangeInfo->Next;
+ }
+
+ return_UINT32 (OverlapCount);
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiUtDeleteAddressLists
+ *
+ * PARAMETERS: None
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Delete all global address range lists (called during
+ * subsystem shutdown).
+ *
+ ******************************************************************************/
+
+void
+AcpiUtDeleteAddressLists (
+ void)
+{
+ ACPI_ADDRESS_RANGE *Next;
+ ACPI_ADDRESS_RANGE *RangeInfo;
+ int i;
+
+
+ /* Delete all elements in all address range lists */
+
+ for (i = 0; i < ACPI_ADDRESS_RANGE_MAX; i++)
+ {
+ Next = AcpiGbl_AddressRangeList[i];
+
+ while (Next)
+ {
+ RangeInfo = Next;
+ Next = RangeInfo->Next;
+ ACPI_FREE (RangeInfo);
+ }
+
+ AcpiGbl_AddressRangeList[i] = NULL;
+ }
+}
diff --git a/usr/src/uts/intel/io/acpica/utilities/utalloc.c b/usr/src/uts/intel/io/acpica/utilities/utalloc.c
index 4c024f1a30..d2eb11edcb 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utalloc.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utalloc.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/utilities/utcache.c b/usr/src/uts/intel/io/acpica/utilities/utcache.c
index 96d64b05a5..7b3abe2894 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utcache.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utcache.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/utilities/utclib.c b/usr/src/uts/intel/io/acpica/utilities/utclib.c
index 70d17bb1be..d0153ec5d2 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utclib.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utclib.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/utilities/utcopy.c b/usr/src/uts/intel/io/acpica/utilities/utcopy.c
index 907f272f7e..ab04a7a7aa 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utcopy.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utcopy.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/utilities/utdebug.c b/usr/src/uts/intel/io/acpica/utilities/utdebug.c
index 24d2eb5cfa..051d47b443 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utdebug.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utdebug.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/utilities/utdecode.c b/usr/src/uts/intel/io/acpica/utilities/utdecode.c
index feca6c725c..718d73db4e 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utdecode.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utdecode.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -187,7 +187,9 @@ const char *AcpiGbl_RegionTypes[ACPI_NUM_PREDEFINED_REGIONS] =
"SMBus",
"SystemCMOS",
"PCIBARTarget",
- "IPMI"
+ "IPMI",
+ "GeneralPurposeIo",
+ "GenericSerialBus"
};
@@ -565,20 +567,21 @@ AcpiUtGetMutexName (
/* Names for Notify() values, used for debug output */
-static const char *AcpiGbl_NotifyValueNames[] =
+static const char *AcpiGbl_NotifyValueNames[ACPI_NOTIFY_MAX + 1] =
{
- "Bus Check",
- "Device Check",
- "Device Wake",
- "Eject Request",
- "Device Check Light",
- "Frequency Mismatch",
- "Bus Mode Mismatch",
- "Power Fault",
- "Capabilities Check",
- "Device PLD Check",
- "Reserved",
- "System Locality Update"
+ /* 00 */ "Bus Check",
+ /* 01 */ "Device Check",
+ /* 02 */ "Device Wake",
+ /* 03 */ "Eject Request",
+ /* 04 */ "Device Check Light",
+ /* 05 */ "Frequency Mismatch",
+ /* 06 */ "Bus Mode Mismatch",
+ /* 07 */ "Power Fault",
+ /* 08 */ "Capabilities Check",
+ /* 09 */ "Device PLD Check",
+ /* 10 */ "Reserved",
+ /* 11 */ "System Locality Update",
+ /* 12 */ "Shutdown Request"
};
const char *
@@ -594,9 +597,13 @@ AcpiUtGetNotifyName (
{
return ("Reserved");
}
- else /* Greater or equal to 0x80 */
+ else if (NotifyValue <= ACPI_MAX_DEVICE_SPECIFIC_NOTIFY)
{
- return ("**Device Specific**");
+ return ("Device Specific");
+ }
+ else
+ {
+ return ("Hardware Specific");
}
}
#endif
diff --git a/usr/src/uts/intel/io/acpica/utilities/utdelete.c b/usr/src/uts/intel/io/acpica/utilities/utdelete.c
index b87d2f0d59..fb32611121 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utdelete.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utdelete.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -167,7 +167,7 @@ AcpiUtDeleteInternalObj (
case ACPI_TYPE_PROCESSOR:
case ACPI_TYPE_THERMAL:
- /* Walk the notify handler list for this object */
+ /* Walk the address handler list for this object */
HandlerDesc = Object->CommonNotify.Handler;
while (HandlerDesc)
@@ -235,6 +235,16 @@ AcpiUtDeleteInternalObj (
ACPI_DEBUG_PRINT ((ACPI_DB_ALLOCATIONS,
"***** Region %p\n", Object));
+ /*
+ * Update AddressRange list. However, only permanent regions
+ * are installed in this list. (Not created within a method)
+ */
+ if (!(Object->Region.Node->Flags & ANOBJ_TEMPORARY))
+ {
+ AcpiUtRemoveAddressRange (Object->Region.SpaceId,
+ Object->Region.Node);
+ }
+
SecondDesc = AcpiNsGetSecondaryObject (Object);
if (SecondDesc)
{
@@ -513,6 +523,7 @@ AcpiUtUpdateObjectReference (
ACPI_STATUS Status = AE_OK;
ACPI_GENERIC_STATE *StateList = NULL;
ACPI_OPERAND_OBJECT *NextObject = NULL;
+ ACPI_OPERAND_OBJECT *PrevObject;
ACPI_GENERIC_STATE *State;
UINT32 i;
@@ -542,10 +553,20 @@ AcpiUtUpdateObjectReference (
case ACPI_TYPE_POWER:
case ACPI_TYPE_THERMAL:
- /* Update the notify objects for these types (if present) */
-
- AcpiUtUpdateRefCount (Object->CommonNotify.SystemNotify, Action);
- AcpiUtUpdateRefCount (Object->CommonNotify.DeviceNotify, Action);
+ /*
+ * Update the notify objects for these types (if present)
+ * Two lists, system and device notify handlers.
+ */
+ for (i = 0; i < ACPI_NUM_NOTIFY_TYPES; i++)
+ {
+ PrevObject = Object->CommonNotify.NotifyList[i];
+ while (PrevObject)
+ {
+ NextObject = PrevObject->Notify.Next[i];
+ AcpiUtUpdateRefCount (PrevObject, Action);
+ PrevObject = NextObject;
+ }
+ }
break;
case ACPI_TYPE_PACKAGE:
diff --git a/usr/src/uts/intel/io/acpica/utilities/uteval.c b/usr/src/uts/intel/io/acpica/utilities/uteval.c
index 0042f411ff..0bcb894b58 100644
--- a/usr/src/uts/intel/io/acpica/utilities/uteval.c
+++ b/usr/src/uts/intel/io/acpica/utilities/uteval.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/utilities/utglobal.c b/usr/src/uts/intel/io/acpica/utilities/utglobal.c
index 336f6706c1..f913e32228 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utglobal.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utglobal.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -155,6 +155,7 @@ const ACPI_PREDEFINED_NAMES AcpiGbl_PreDefinedNames[] =
};
+#if (!ACPI_REDUCED_HARDWARE)
/******************************************************************************
*
* Event and Hardware globals
@@ -199,6 +200,7 @@ ACPI_FIXED_EVENT_INFO AcpiGbl_FixedEventInfo[ACPI_NUM_FIXED_EVENTS] =
/* ACPI_EVENT_SLEEP_BUTTON */ {ACPI_BITREG_SLEEP_BUTTON_STATUS, ACPI_BITREG_SLEEP_BUTTON_ENABLE, ACPI_BITMASK_SLEEP_BUTTON_STATUS, ACPI_BITMASK_SLEEP_BUTTON_ENABLE},
/* ACPI_EVENT_RTC */ {ACPI_BITREG_RT_CLOCK_STATUS, ACPI_BITREG_RT_CLOCK_ENABLE, ACPI_BITMASK_RT_CLOCK_STATUS, ACPI_BITMASK_RT_CLOCK_ENABLE},
};
+#endif /* !ACPI_REDUCED_HARDWARE */
/*******************************************************************************
@@ -233,6 +235,13 @@ AcpiUtInitGlobals (
return_ACPI_STATUS (Status);
}
+ /* Address Range lists */
+
+ for (i = 0; i < ACPI_ADDRESS_RANGE_MAX; i++)
+ {
+ AcpiGbl_AddressRangeList[i] = NULL;
+ }
+
/* Mutex locked flags */
for (i = 0; i < ACPI_NUM_MUTEX; i++)
@@ -262,6 +271,8 @@ AcpiUtInitGlobals (
AcpiFixedEventCount[i] = 0;
}
+#if (!ACPI_REDUCED_HARDWARE)
+
/* GPE support */
AcpiGbl_AllGpesInitialized = FALSE;
@@ -270,15 +281,18 @@ AcpiUtInitGlobals (
AcpiGbl_GpeFadtBlocks[1] = NULL;
AcpiCurrentGpeCount = 0;
+ AcpiGbl_GlobalEventHandler = NULL;
+
+#endif /* !ACPI_REDUCED_HARDWARE */
+
/* Global handlers */
- AcpiGbl_SystemNotify.Handler = NULL;
- AcpiGbl_DeviceNotify.Handler = NULL;
+ AcpiGbl_GlobalNotify[0].Handler = NULL;
+ AcpiGbl_GlobalNotify[1].Handler = NULL;
AcpiGbl_ExceptionHandler = NULL;
AcpiGbl_InitHandler = NULL;
AcpiGbl_TableHandler = NULL;
AcpiGbl_InterfaceHandler = NULL;
- AcpiGbl_GlobalEventHandler = NULL;
/* Global Lock support */
diff --git a/usr/src/uts/intel/io/acpica/utilities/utids.c b/usr/src/uts/intel/io/acpica/utilities/utids.c
index 36dc3afce6..226d8c1631 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utids.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utids.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/utilities/utinit.c b/usr/src/uts/intel/io/acpica/utilities/utinit.c
index 9e253f01d3..6831595e68 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utinit.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utinit.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -58,21 +58,33 @@
static void AcpiUtTerminate (
void);
+#if (!ACPI_REDUCED_HARDWARE)
+static void
+AcpiUtFreeGpeLists (
+ void);
+
+#else
+
+#define AcpiUtFreeGpeLists()
+#endif /* !ACPI_REDUCED_HARDWARE */
+
+
+#if (!ACPI_REDUCED_HARDWARE)
/******************************************************************************
*
- * FUNCTION: AcpiUtTerminate
+ * FUNCTION: AcpiUtFreeGpeLists
*
* PARAMETERS: none
*
* RETURN: none
*
- * DESCRIPTION: Free global memory
+ * DESCRIPTION: Free global GPE lists
*
******************************************************************************/
static void
-AcpiUtTerminate (
+AcpiUtFreeGpeLists (
void)
{
ACPI_GPE_BLOCK_INFO *GpeBlock;
@@ -81,9 +93,6 @@ AcpiUtTerminate (
ACPI_GPE_XRUPT_INFO *NextGpeXruptInfo;
- ACPI_FUNCTION_TRACE (UtTerminate);
-
-
/* Free global GPE blocks and related info structures */
GpeXruptInfo = AcpiGbl_GpeXruptListHead;
@@ -103,7 +112,30 @@ AcpiUtTerminate (
ACPI_FREE (GpeXruptInfo);
GpeXruptInfo = NextGpeXruptInfo;
}
+}
+#endif /* !ACPI_REDUCED_HARDWARE */
+
+
+/******************************************************************************
+ *
+ * FUNCTION: AcpiUtTerminate
+ *
+ * PARAMETERS: none
+ *
+ * RETURN: none
+ *
+ * DESCRIPTION: Free global memory
+ *
+ ******************************************************************************/
+
+static void
+AcpiUtTerminate (
+ void)
+{
+ ACPI_FUNCTION_TRACE (UtTerminate);
+ AcpiUtFreeGpeLists ();
+ AcpiUtDeleteAddressLists ();
return_VOID;
}
diff --git a/usr/src/uts/intel/io/acpica/utilities/utlock.c b/usr/src/uts/intel/io/acpica/utilities/utlock.c
index fd2fb2bb1d..61585ad499 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utlock.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utlock.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/utilities/utmath.c b/usr/src/uts/intel/io/acpica/utilities/utmath.c
index 032b72d074..d0fad7cb6c 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utmath.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utmath.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/utilities/utmisc.c b/usr/src/uts/intel/io/acpica/utilities/utmisc.c
index d20ad7e3a1..062c768e72 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utmisc.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utmisc.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -466,6 +466,44 @@ AcpiUtStrlwr (
return;
}
+
+
+/******************************************************************************
+ *
+ * FUNCTION: AcpiUtStricmp
+ *
+ * PARAMETERS: String1 - first string to compare
+ * String2 - second string to compare
+ *
+ * RETURN: int that signifies string relationship. Zero means strings
+ * are equal.
+ *
+ * DESCRIPTION: Implementation of the non-ANSI stricmp function (compare
+ * strings with no case sensitivity)
+ *
+ ******************************************************************************/
+
+int
+AcpiUtStricmp (
+ char *String1,
+ char *String2)
+{
+ int c1;
+ int c2;
+
+
+ do
+ {
+ c1 = tolower ((int) *String1);
+ c2 = tolower ((int) *String2);
+
+ String1++;
+ String2++;
+ }
+ while ((c1 == c2) && (c1));
+
+ return (c1 - c2);
+}
#endif
diff --git a/usr/src/uts/intel/io/acpica/utilities/utmutex.c b/usr/src/uts/intel/io/acpica/utilities/utmutex.c
index f7af803de1..f1cdb01804 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utmutex.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utmutex.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -335,15 +335,10 @@ ACPI_STATUS
AcpiUtReleaseMutex (
ACPI_MUTEX_HANDLE MutexId)
{
- ACPI_THREAD_ID ThisThreadId;
-
-
ACPI_FUNCTION_NAME (UtReleaseMutex);
-
- ThisThreadId = AcpiOsGetThreadId ();
ACPI_DEBUG_PRINT ((ACPI_DB_MUTEX, "Thread %u releasing Mutex [%s]\n",
- (UINT32) ThisThreadId, AcpiUtGetMutexName (MutexId)));
+ (UINT32) AcpiOsGetThreadId (), AcpiUtGetMutexName (MutexId)));
if (MutexId > ACPI_MAX_MUTEX)
{
@@ -374,7 +369,7 @@ AcpiUtReleaseMutex (
*/
for (i = MutexId; i < ACPI_NUM_MUTEX; i++)
{
- if (AcpiGbl_MutexInfo[i].ThreadId == ThisThreadId)
+ if (AcpiGbl_MutexInfo[i].ThreadId == AcpiOsGetThreadId ())
{
if (i == MutexId)
{
diff --git a/usr/src/uts/intel/io/acpica/utilities/utobject.c b/usr/src/uts/intel/io/acpica/utilities/utobject.c
index ea621c741e..fea140946c 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utobject.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utobject.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/utilities/utosi.c b/usr/src/uts/intel/io/acpica/utilities/utosi.c
index 053d6adc9b..dcce8a99e1 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utosi.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utosi.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/utilities/utresrc.c b/usr/src/uts/intel/io/acpica/utilities/utresrc.c
index 468ffd6a1a..c2c135b7d2 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utresrc.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utresrc.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -46,7 +46,7 @@
#include "acpi.h"
#include "accommon.h"
-#include "amlresrc.h"
+#include "acresrc.h"
#define _COMPONENT ACPI_UTILITIES
@@ -179,6 +179,154 @@ const char *AcpiGbl_TypDecode[] =
"TypeF"
};
+const char *AcpiGbl_PpcDecode[] =
+{
+ "PullDefault",
+ "PullUp",
+ "PullDown",
+ "PullNone"
+};
+
+const char *AcpiGbl_IorDecode[] =
+{
+ "IoRestrictionNone",
+ "IoRestrictionInputOnly",
+ "IoRestrictionOutputOnly",
+ "IoRestrictionNoneAndPreserve"
+};
+
+const char *AcpiGbl_DtsDecode[] =
+{
+ "Width8bit",
+ "Width16bit",
+ "Width32bit",
+ "Width64bit",
+ "Width128bit",
+ "Width256bit",
+};
+
+/* GPIO connection type */
+
+const char *AcpiGbl_CtDecode[] =
+{
+ "Interrupt",
+ "I/O"
+};
+
+/* Serial bus type */
+
+const char *AcpiGbl_SbtDecode[] =
+{
+ "/* UNKNOWN serial bus type */",
+ "I2C",
+ "SPI",
+ "UART"
+};
+
+/* I2C serial bus access mode */
+
+const char *AcpiGbl_AmDecode[] =
+{
+ "AddressingMode7Bit",
+ "AddressingMode10Bit"
+};
+
+/* I2C serial bus slave mode */
+
+const char *AcpiGbl_SmDecode[] =
+{
+ "ControllerInitiated",
+ "DeviceInitiated"
+};
+
+/* SPI serial bus wire mode */
+
+const char *AcpiGbl_WmDecode[] =
+{
+ "FourWireMode",
+ "ThreeWireMode"
+};
+
+/* SPI serial clock phase */
+
+const char *AcpiGbl_CphDecode[] =
+{
+ "ClockPhaseFirst",
+ "ClockPhaseSecond"
+};
+
+/* SPI serial bus clock polarity */
+
+const char *AcpiGbl_CpoDecode[] =
+{
+ "ClockPolarityLow",
+ "ClockPolarityHigh"
+};
+
+/* SPI serial bus device polarity */
+
+const char *AcpiGbl_DpDecode[] =
+{
+ "PolarityLow",
+ "PolarityHigh"
+};
+
+/* UART serial bus endian */
+
+const char *AcpiGbl_EdDecode[] =
+{
+ "LittleEndian",
+ "BigEndian"
+};
+
+/* UART serial bus bits per byte */
+
+const char *AcpiGbl_BpbDecode[] =
+{
+ "DataBitsFive",
+ "DataBitsSix",
+ "DataBitsSeven",
+ "DataBitsEight",
+ "DataBitsNine",
+ "/* UNKNOWN Bits per byte */",
+ "/* UNKNOWN Bits per byte */",
+ "/* UNKNOWN Bits per byte */"
+};
+
+/* UART serial bus stop bits */
+
+const char *AcpiGbl_SbDecode[] =
+{
+ "StopBitsNone",
+ "StopBitsOne",
+ "StopBitsOnePlusHalf",
+ "StopBitsTwo"
+};
+
+/* UART serial bus flow control */
+
+const char *AcpiGbl_FcDecode[] =
+{
+ "FlowControlNone",
+ "FlowControlHardware",
+ "FlowControlXON",
+ "/* UNKNOWN flow control keyword */"
+};
+
+/* UART serial bus parity type */
+
+const char *AcpiGbl_PtDecode[] =
+{
+ "ParityTypeNone",
+ "ParityTypeEven",
+ "ParityTypeOdd",
+ "ParityTypeMark",
+ "ParityTypeSpace",
+ "/* UNKNOWN parity keyword */",
+ "/* UNKNOWN parity keyword */",
+ "/* UNKNOWN parity keyword */"
+};
+
#endif
@@ -200,7 +348,7 @@ const UINT8 AcpiGbl_ResourceAmlSizes[] =
ACPI_AML_SIZE_SMALL (AML_RESOURCE_END_DEPENDENT),
ACPI_AML_SIZE_SMALL (AML_RESOURCE_IO),
ACPI_AML_SIZE_SMALL (AML_RESOURCE_FIXED_IO),
- 0,
+ ACPI_AML_SIZE_SMALL (AML_RESOURCE_FIXED_DMA),
0,
0,
0,
@@ -220,7 +368,18 @@ const UINT8 AcpiGbl_ResourceAmlSizes[] =
ACPI_AML_SIZE_LARGE (AML_RESOURCE_ADDRESS16),
ACPI_AML_SIZE_LARGE (AML_RESOURCE_EXTENDED_IRQ),
ACPI_AML_SIZE_LARGE (AML_RESOURCE_ADDRESS64),
- ACPI_AML_SIZE_LARGE (AML_RESOURCE_EXTENDED_ADDRESS64)
+ ACPI_AML_SIZE_LARGE (AML_RESOURCE_EXTENDED_ADDRESS64),
+ ACPI_AML_SIZE_LARGE (AML_RESOURCE_GPIO),
+ 0,
+ ACPI_AML_SIZE_LARGE (AML_RESOURCE_COMMON_SERIALBUS),
+};
+
+const UINT8 AcpiGbl_ResourceAmlSerialBusSizes[] =
+{
+ 0,
+ ACPI_AML_SIZE_LARGE (AML_RESOURCE_I2C_SERIALBUS),
+ ACPI_AML_SIZE_LARGE (AML_RESOURCE_SPI_SERIALBUS),
+ ACPI_AML_SIZE_LARGE (AML_RESOURCE_UART_SERIALBUS),
};
@@ -238,35 +397,49 @@ static const UINT8 AcpiGbl_ResourceTypes[] =
0,
0,
0,
- ACPI_SMALL_VARIABLE_LENGTH,
- ACPI_FIXED_LENGTH,
- ACPI_SMALL_VARIABLE_LENGTH,
- ACPI_FIXED_LENGTH,
- ACPI_FIXED_LENGTH,
- ACPI_FIXED_LENGTH,
- 0,
+ ACPI_SMALL_VARIABLE_LENGTH, /* 04 IRQ */
+ ACPI_FIXED_LENGTH, /* 05 DMA */
+ ACPI_SMALL_VARIABLE_LENGTH, /* 06 StartDependentFunctions */
+ ACPI_FIXED_LENGTH, /* 07 EndDependentFunctions */
+ ACPI_FIXED_LENGTH, /* 08 IO */
+ ACPI_FIXED_LENGTH, /* 09 FixedIO */
+ ACPI_FIXED_LENGTH, /* 0A FixedDMA */
0,
0,
0,
- ACPI_VARIABLE_LENGTH,
- ACPI_FIXED_LENGTH,
+ ACPI_VARIABLE_LENGTH, /* 0E VendorShort */
+ ACPI_FIXED_LENGTH, /* 0F EndTag */
/* Large descriptors */
0,
- ACPI_FIXED_LENGTH,
- ACPI_FIXED_LENGTH,
+ ACPI_FIXED_LENGTH, /* 01 Memory24 */
+ ACPI_FIXED_LENGTH, /* 02 GenericRegister */
+ 0,
+ ACPI_VARIABLE_LENGTH, /* 04 VendorLong */
+ ACPI_FIXED_LENGTH, /* 05 Memory32 */
+ ACPI_FIXED_LENGTH, /* 06 Memory32Fixed */
+ ACPI_VARIABLE_LENGTH, /* 07 Dword* address */
+ ACPI_VARIABLE_LENGTH, /* 08 Word* address */
+ ACPI_VARIABLE_LENGTH, /* 09 ExtendedIRQ */
+ ACPI_VARIABLE_LENGTH, /* 0A Qword* address */
+ ACPI_FIXED_LENGTH, /* 0B Extended* address */
+ ACPI_VARIABLE_LENGTH, /* 0C Gpio* */
0,
- ACPI_VARIABLE_LENGTH,
- ACPI_FIXED_LENGTH,
- ACPI_FIXED_LENGTH,
- ACPI_VARIABLE_LENGTH,
- ACPI_VARIABLE_LENGTH,
- ACPI_VARIABLE_LENGTH,
- ACPI_VARIABLE_LENGTH,
- ACPI_FIXED_LENGTH
+ ACPI_VARIABLE_LENGTH /* 0E *SerialBus */
};
+/*
+ * For the iASL compiler/disassembler, we don't want any error messages
+ * because the disassembler uses the resource validation code to determine
+ * if Buffer objects are actually Resource Templates.
+ */
+#ifdef ACPI_ASL_COMPILER
+#define ACPI_RESOURCE_ERROR(plist)
+#else
+#define ACPI_RESOURCE_ERROR(plist) ACPI_ERROR(plist)
+#endif
+
/*******************************************************************************
*
@@ -297,6 +470,7 @@ AcpiUtWalkAmlResources (
UINT8 ResourceIndex;
UINT32 Length;
UINT32 Offset = 0;
+ UINT8 EndTag[2] = {0x79, 0x00};
ACPI_FUNCTION_TRACE (UtWalkAmlResources);
@@ -322,6 +496,10 @@ AcpiUtWalkAmlResources (
Status = AcpiUtValidateResource (Aml, &ResourceIndex);
if (ACPI_FAILURE (Status))
{
+ /*
+ * Exit on failure. Cannot continue because the descriptor length
+ * may be bogus also.
+ */
return_ACPI_STATUS (Status);
}
@@ -336,7 +514,7 @@ AcpiUtWalkAmlResources (
Status = UserFunction (Aml, Length, Offset, ResourceIndex, Context);
if (ACPI_FAILURE (Status))
{
- return (Status);
+ return_ACPI_STATUS (Status);
}
}
@@ -371,7 +549,19 @@ AcpiUtWalkAmlResources (
/* Did not find an EndTag descriptor */
- return (AE_AML_NO_RESOURCE_END_TAG);
+ if (UserFunction)
+ {
+ /* Insert an EndTag anyway. AcpiRsGetListLength always leaves room */
+
+ (void) AcpiUtValidateResource (EndTag, &ResourceIndex);
+ Status = UserFunction (EndTag, 2, Offset, ResourceIndex, Context);
+ if (ACPI_FAILURE (Status))
+ {
+ return_ACPI_STATUS (Status);
+ }
+ }
+
+ return_ACPI_STATUS (AE_AML_NO_RESOURCE_END_TAG);
}
@@ -396,6 +586,7 @@ AcpiUtValidateResource (
void *Aml,
UINT8 *ReturnIndex)
{
+ AML_RESOURCE *AmlResource;
UINT8 ResourceType;
UINT8 ResourceIndex;
ACPI_RS_LENGTH ResourceLength;
@@ -420,7 +611,7 @@ AcpiUtValidateResource (
if (ResourceType > ACPI_RESOURCE_NAME_LARGE_MAX)
{
- return (AE_AML_INVALID_RESOURCE_TYPE);
+ goto InvalidResource;
}
/*
@@ -439,17 +630,18 @@ AcpiUtValidateResource (
((ResourceType & ACPI_RESOURCE_NAME_SMALL_MASK) >> 3);
}
- /* Check validity of the resource type, zero indicates name is invalid */
-
+ /*
+ * Check validity of the resource type, via AcpiGbl_ResourceTypes. Zero
+ * indicates an invalid resource.
+ */
if (!AcpiGbl_ResourceTypes[ResourceIndex])
{
- return (AE_AML_INVALID_RESOURCE_TYPE);
+ goto InvalidResource;
}
-
/*
- * 2) Validate the ResourceLength field. This ensures that the length
- * is at least reasonable, and guarantees that it is non-zero.
+ * Validate the ResourceLength field. This ensures that the length
+ * is at least reasonable, and guarantees that it is non-zero.
*/
ResourceLength = AcpiUtGetResourceLength (Aml);
MinimumResourceLength = AcpiGbl_ResourceAmlSizes[ResourceIndex];
@@ -464,7 +656,7 @@ AcpiUtValidateResource (
if (ResourceLength != MinimumResourceLength)
{
- return (AE_AML_BAD_RESOURCE_LENGTH);
+ goto BadResourceLength;
}
break;
@@ -474,7 +666,7 @@ AcpiUtValidateResource (
if (ResourceLength < MinimumResourceLength)
{
- return (AE_AML_BAD_RESOURCE_LENGTH);
+ goto BadResourceLength;
}
break;
@@ -485,7 +677,7 @@ AcpiUtValidateResource (
if ((ResourceLength > MinimumResourceLength) ||
(ResourceLength < (MinimumResourceLength - 1)))
{
- return (AE_AML_BAD_RESOURCE_LENGTH);
+ goto BadResourceLength;
}
break;
@@ -493,7 +685,22 @@ AcpiUtValidateResource (
/* Shouldn't happen (because of validation earlier), but be sure */
- return (AE_AML_INVALID_RESOURCE_TYPE);
+ goto InvalidResource;
+ }
+
+ AmlResource = ACPI_CAST_PTR (AML_RESOURCE, Aml);
+ if (ResourceType == ACPI_RESOURCE_NAME_SERIAL_BUS)
+ {
+ /* Validate the BusType field */
+
+ if ((AmlResource->CommonSerialBus.Type == 0) ||
+ (AmlResource->CommonSerialBus.Type > AML_RESOURCE_MAX_SERIALBUSTYPE))
+ {
+ ACPI_RESOURCE_ERROR ((AE_INFO,
+ "Invalid/unsupported SerialBus resource descriptor: BusType 0x%2.2X",
+ AmlResource->CommonSerialBus.Type));
+ return (AE_AML_INVALID_RESOURCE_TYPE);
+ }
}
/* Optionally return the resource table index */
@@ -504,6 +711,22 @@ AcpiUtValidateResource (
}
return (AE_OK);
+
+
+InvalidResource:
+
+ ACPI_RESOURCE_ERROR ((AE_INFO,
+ "Invalid/unsupported resource descriptor: Type 0x%2.2X",
+ ResourceType));
+ return (AE_AML_INVALID_RESOURCE_TYPE);
+
+BadResourceLength:
+
+ ACPI_RESOURCE_ERROR ((AE_INFO,
+ "Invalid resource descriptor length: Type "
+ "0x%2.2X, Length 0x%4.4X, MinLength 0x%4.4X",
+ ResourceType, ResourceLength, MinimumResourceLength));
+ return (AE_AML_BAD_RESOURCE_LENGTH);
}
diff --git a/usr/src/uts/intel/io/acpica/utilities/utstate.c b/usr/src/uts/intel/io/acpica/utilities/utstate.c
index bc84915ab0..62a1aefec7 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utstate.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utstate.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/utilities/uttrack.c b/usr/src/uts/intel/io/acpica/utilities/uttrack.c
index 6ecec63f31..7d58f5c8a8 100644
--- a/usr/src/uts/intel/io/acpica/utilities/uttrack.c
+++ b/usr/src/uts/intel/io/acpica/utilities/uttrack.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -633,21 +633,21 @@ AcpiUtDumpAllocations (
switch (ACPI_GET_DESCRIPTOR_TYPE (Descriptor))
{
case ACPI_DESC_TYPE_OPERAND:
- if (Element->Size == sizeof (ACPI_DESC_TYPE_OPERAND))
+ if (Element->Size == sizeof (ACPI_OPERAND_OBJECT))
{
DescriptorType = ACPI_DESC_TYPE_OPERAND;
}
break;
case ACPI_DESC_TYPE_PARSER:
- if (Element->Size == sizeof (ACPI_DESC_TYPE_PARSER))
+ if (Element->Size == sizeof (ACPI_PARSE_OBJECT))
{
DescriptorType = ACPI_DESC_TYPE_PARSER;
}
break;
case ACPI_DESC_TYPE_NAMED:
- if (Element->Size == sizeof (ACPI_DESC_TYPE_NAMED))
+ if (Element->Size == sizeof (ACPI_NAMESPACE_NODE))
{
DescriptorType = ACPI_DESC_TYPE_NAMED;
}
diff --git a/usr/src/uts/intel/io/acpica/utilities/utxface.c b/usr/src/uts/intel/io/acpica/utilities/utxface.c
index 63b081b758..3e858c9b0f 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utxface.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utxface.c
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -56,7 +56,6 @@
#ifndef ACPI_ASL_COMPILER
-
/*******************************************************************************
*
* FUNCTION: AcpiInitializeSubsystem
@@ -162,6 +161,8 @@ AcpiEnableSubsystem (
ACPI_FUNCTION_TRACE (AcpiEnableSubsystem);
+#if (!ACPI_REDUCED_HARDWARE)
+
/* Enable ACPI mode */
if (!(Flags & ACPI_NO_ACPI_ENABLE))
@@ -189,6 +190,8 @@ AcpiEnableSubsystem (
return_ACPI_STATUS (Status);
}
+#endif /* !ACPI_REDUCED_HARDWARE */
+
/*
* Install the default OpRegion handlers. These are installed unless
* other handlers have already been installed via the
@@ -206,6 +209,7 @@ AcpiEnableSubsystem (
}
}
+#if (!ACPI_REDUCED_HARDWARE)
/*
* Initialize ACPI Event handling (Fixed and General Purpose)
*
@@ -248,6 +252,8 @@ AcpiEnableSubsystem (
}
}
+#endif /* !ACPI_REDUCED_HARDWARE */
+
return_ACPI_STATUS (Status);
}
@@ -785,5 +791,47 @@ AcpiInstallInterfaceHandler (
ACPI_EXPORT_SYMBOL (AcpiInstallInterfaceHandler)
-#endif /* !ACPI_ASL_COMPILER */
+/*****************************************************************************
+ *
+ * FUNCTION: AcpiCheckAddressRange
+ *
+ * PARAMETERS: SpaceId - Address space ID
+ * Address - Start address
+ * Length - Length
+ * Warn - TRUE if warning on overlap desired
+ *
+ * RETURN: Count of the number of conflicts detected.
+ *
+ * DESCRIPTION: Check if the input address range overlaps any of the
+ * ASL operation region address ranges.
+ *
+ ****************************************************************************/
+
+UINT32
+AcpiCheckAddressRange (
+ ACPI_ADR_SPACE_TYPE SpaceId,
+ ACPI_PHYSICAL_ADDRESS Address,
+ ACPI_SIZE Length,
+ BOOLEAN Warn)
+{
+ UINT32 Overlaps;
+ ACPI_STATUS Status;
+
+
+ Status = AcpiUtAcquireMutex (ACPI_MTX_NAMESPACE);
+ if (ACPI_FAILURE (Status))
+ {
+ return (0);
+ }
+
+ Overlaps = AcpiUtCheckAddressRange (SpaceId, Address,
+ (UINT32) Length, Warn);
+
+ (void) AcpiUtReleaseMutex (ACPI_MTX_NAMESPACE);
+ return (Overlaps);
+}
+
+ACPI_EXPORT_SYMBOL (AcpiCheckAddressRange)
+
+#endif /* !ACPI_ASL_COMPILER */
diff --git a/usr/src/uts/intel/io/acpica/utilities/utxferror.c b/usr/src/uts/intel/io/acpica/utilities/utxferror.c
index a371308d6b..8ffb274894 100644
--- a/usr/src/uts/intel/io/acpica/utilities/utxferror.c
+++ b/usr/src/uts/intel/io/acpica/utilities/utxferror.c
@@ -5,7 +5,7 @@
******************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/io/acpica/utilities/utxfmutex.c b/usr/src/uts/intel/io/acpica/utilities/utxfmutex.c
new file mode 100644
index 0000000000..c4784310d3
--- /dev/null
+++ b/usr/src/uts/intel/io/acpica/utilities/utxfmutex.c
@@ -0,0 +1,213 @@
+/*******************************************************************************
+ *
+ * Module Name: utxfmutex - external AML mutex access functions
+ *
+ ******************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2012, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * substantially similar to the "NO WARRANTY" disclaimer below
+ * ("Disclaimer") and any redistribution must be conditioned upon
+ * including a substantially similar Disclaimer requirement for further
+ * binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ * of any contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#define __UTXFMUTEX_C__
+
+#include "acpi.h"
+#include "accommon.h"
+#include "acnamesp.h"
+
+
+#define _COMPONENT ACPI_UTILITIES
+ ACPI_MODULE_NAME ("utxfmutex")
+
+
+/* Local prototypes */
+
+static ACPI_STATUS
+AcpiUtGetMutexObject (
+ ACPI_HANDLE Handle,
+ ACPI_STRING Pathname,
+ ACPI_OPERAND_OBJECT **RetObj);
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiUtGetMutexObject
+ *
+ * PARAMETERS: Handle - Mutex or prefix handle (optional)
+ * Pathname - Mutex pathname (optional)
+ * RetObj - Where the mutex object is returned
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Get an AML mutex object. The mutex node is pointed to by
+ * Handle:Pathname. Either Handle or Pathname can be NULL, but
+ * not both.
+ *
+ ******************************************************************************/
+
+static ACPI_STATUS
+AcpiUtGetMutexObject (
+ ACPI_HANDLE Handle,
+ ACPI_STRING Pathname,
+ ACPI_OPERAND_OBJECT **RetObj)
+{
+ ACPI_NAMESPACE_NODE *MutexNode;
+ ACPI_OPERAND_OBJECT *MutexObj;
+ ACPI_STATUS Status;
+
+
+ /* Parameter validation */
+
+ if (!RetObj || (!Handle && !Pathname))
+ {
+ return (AE_BAD_PARAMETER);
+ }
+
+ /* Get a the namespace node for the mutex */
+
+ MutexNode = Handle;
+ if (Pathname != NULL)
+ {
+ Status = AcpiGetHandle (Handle, Pathname,
+ ACPI_CAST_PTR (ACPI_HANDLE, &MutexNode));
+ if (ACPI_FAILURE (Status))
+ {
+ return (Status);
+ }
+ }
+
+ /* Ensure that we actually have a Mutex object */
+
+ if (!MutexNode ||
+ (MutexNode->Type != ACPI_TYPE_MUTEX))
+ {
+ return (AE_TYPE);
+ }
+
+ /* Get the low-level mutex object */
+
+ MutexObj = AcpiNsGetAttachedObject (MutexNode);
+ if (!MutexObj)
+ {
+ return (AE_NULL_OBJECT);
+ }
+
+ *RetObj = MutexObj;
+ return (AE_OK);
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiAcquireMutex
+ *
+ * PARAMETERS: Handle - Mutex or prefix handle (optional)
+ * Pathname - Mutex pathname (optional)
+ * Timeout - Max time to wait for the lock (millisec)
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Acquire an AML mutex. This is a device driver interface to
+ * AML mutex objects, and allows for transaction locking between
+ * drivers and AML code. The mutex node is pointed to by
+ * Handle:Pathname. Either Handle or Pathname can be NULL, but
+ * not both.
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiAcquireMutex (
+ ACPI_HANDLE Handle,
+ ACPI_STRING Pathname,
+ UINT16 Timeout)
+{
+ ACPI_STATUS Status;
+ ACPI_OPERAND_OBJECT *MutexObj;
+
+
+ /* Get the low-level mutex associated with Handle:Pathname */
+
+ Status = AcpiUtGetMutexObject (Handle, Pathname, &MutexObj);
+ if (ACPI_FAILURE (Status))
+ {
+ return (Status);
+ }
+
+ /* Acquire the OS mutex */
+
+ Status = AcpiOsAcquireMutex (MutexObj->Mutex.OsMutex, Timeout);
+ return (Status);
+}
+
+
+/*******************************************************************************
+ *
+ * FUNCTION: AcpiReleaseMutex
+ *
+ * PARAMETERS: Handle - Mutex or prefix handle (optional)
+ * Pathname - Mutex pathname (optional)
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Release an AML mutex. This is a device driver interface to
+ * AML mutex objects, and allows for transaction locking between
+ * drivers and AML code. The mutex node is pointed to by
+ * Handle:Pathname. Either Handle or Pathname can be NULL, but
+ * not both.
+ *
+ ******************************************************************************/
+
+ACPI_STATUS
+AcpiReleaseMutex (
+ ACPI_HANDLE Handle,
+ ACPI_STRING Pathname)
+{
+ ACPI_STATUS Status;
+ ACPI_OPERAND_OBJECT *MutexObj;
+
+
+ /* Get the low-level mutex associated with Handle:Pathname */
+
+ Status = AcpiUtGetMutexObject (Handle, Pathname, &MutexObj);
+ if (ACPI_FAILURE (Status))
+ {
+ return (Status);
+ }
+
+ /* Release the OS mutex */
+
+ AcpiOsReleaseMutex (MutexObj->Mutex.OsMutex);
+ return (AE_OK);
+}
diff --git a/usr/src/uts/intel/io/dktp/dcdev/dadk.c b/usr/src/uts/intel/io/dktp/dcdev/dadk.c
index 35f97482b8..f74a0d4137 100644
--- a/usr/src/uts/intel/io/dktp/dcdev/dadk.c
+++ b/usr/src/uts/intel/io/dktp/dcdev/dadk.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, Joyent, Inc. All rights reserved.
*/
/*
@@ -170,6 +171,8 @@ static int dadk_debug = DGEOM;
#endif /* DADK_DEBUG */
+#define ONE_MIN ((longlong_t)60 * NANOSEC)
+
static int dadk_check_media_time = 3000000; /* 3 Second State Check */
static int dadk_dk_maxphys = 0x80000;
@@ -1376,6 +1379,47 @@ static struct dadkio_derr dadk_errtab[] = {
{COMMAND_DONE_ERROR, GDA_FATAL}, /* 23 DERR_RESV */
};
+/*
+ * A bad disk can result in a large number of errors spewed to the log.
+ * This can in turn lead to /var/adm/messages filling up the file system on
+ * a machine with a small root or /var file system.
+ *
+ * Instead of logging every error, if we're seeing repeated errors on a disk
+ * only log them periodically.
+ */
+static void
+dadk_logerr(struct dadk *dadkp, struct cmpkt *pktp, char *label,
+ int severity, daddr_t blkno, daddr_t err_blkno,
+ char **cmdvec, char **senvec)
+{
+ hrtime_t now;
+
+ now = gethrtime();
+ if ((now - dadkp->dad_last_log) < ONE_MIN) {
+ atomic_add_32(&dadkp->dad_err_cnt, 1);
+ return;
+ }
+
+ if (dadkp->dad_err_cnt > 0) {
+ dev_info_t *dev = dadkp->dad_sd->sd_dev;
+ char name[256], buf[256];
+
+ if (dev)
+ (void) snprintf(name, sizeof (name), "%s (%s%d)",
+ ddi_pathname(dev, buf), label,
+ ddi_get_instance(dev));
+ else
+ (void) strlcpy(name, label, sizeof (name));
+ cmn_err(CE_WARN, "%s: %d additional unlogged errors\n",
+ name, dadkp->dad_err_cnt);
+ }
+
+ gda_errmsg(dadkp->dad_sd, pktp, label, severity, blkno, err_blkno,
+ cmdvec, senvec);
+ dadkp->dad_err_cnt = 0;
+ dadkp->dad_last_log = now;
+}
+
static int
dadk_chkerr(struct cmpkt *pktp)
{
@@ -1462,7 +1506,7 @@ dadk_chkerr(struct cmpkt *pktp)
return (COMMAND_DONE);
}
if (pktp->cp_passthru == NULL) {
- gda_errmsg(dadkp->dad_sd, pktp, dadk_name,
+ dadk_logerr(dadkp, pktp, dadk_name,
dadk_errtab[scb].d_severity, pktp->cp_srtsec,
err_blkno, dadk_cmds, dadk_sense);
}
@@ -1519,7 +1563,7 @@ dadk_recorderr(struct cmpkt *pktp, struct dadkio_rwcmd *rwcmdp)
if (rwcmdp->flags & DADKIO_FLAG_SILENT)
return;
- gda_errmsg(dadkp->dad_sd, pktp, dadk_name, dadk_errtab[scb].d_severity,
+ dadk_logerr(dadkp, pktp, dadk_name, dadk_errtab[scb].d_severity,
rwcmdp->blkaddr, rwcmdp->status.failed_blk,
dadk_cmds, dadk_sense);
}
diff --git a/usr/src/uts/intel/io/ipmi/ipmivars.h b/usr/src/uts/intel/io/ipmi/ipmivars.h
index 7fd819cd3d..f547d6f043 100644
--- a/usr/src/uts/intel/io/ipmi/ipmivars.h
+++ b/usr/src/uts/intel/io/ipmi/ipmivars.h
@@ -78,6 +78,7 @@ struct ipmi_request {
#define SMIC_CTL_STS 1
#define SMIC_FLAGS 2
+struct ipmi_softc;
#define IPMI_BUSY 0x1
#define IPMI_CLOSING 0x2
diff --git a/usr/src/uts/intel/io/vmxnet/buildNumber.h b/usr/src/uts/intel/io/vmxnet/buildNumber.h
new file mode 100644
index 0000000000..97f18a3cbc
--- /dev/null
+++ b/usr/src/uts/intel/io/vmxnet/buildNumber.h
@@ -0,0 +1,12 @@
+#define BUILD_NUMBER \
+ "build-425873"
+#define BUILD_NUMBER_NUMERIC \
+ 425873
+#define BUILD_NUMBER_NUMERIC_STRING \
+ "425873"
+#define PRODUCT_BUILD_NUMBER \
+ "product-build-6261"
+#define PRODUCT_BUILD_NUMBER_NUMERIC \
+ 6261
+#define PRODUCT_BUILD_NUMBER_NUMERIC_STRING \
+ "6261"
diff --git a/usr/src/uts/intel/io/vmxnet/includeCheck.h b/usr/src/uts/intel/io/vmxnet/includeCheck.h
new file mode 100644
index 0000000000..c414d6daf5
--- /dev/null
+++ b/usr/src/uts/intel/io/vmxnet/includeCheck.h
@@ -0,0 +1,159 @@
+/*********************************************************
+ * Copyright (C) 1998 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation version 2.1 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the Lesser GNU General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ *********************************************************/
+
+/*********************************************************
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of VMware Inc. nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission of VMware Inc.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *********************************************************/
+
+/*********************************************************
+ * The contents of this file are subject to the terms of the Common
+ * Development and Distribution License (the "License") version 1.0
+ * and no later version. You may not use this file except in
+ * compliance with the License.
+ *
+ * You can obtain a copy of the License at
+ * http://www.opensource.org/licenses/cddl1.php
+ *
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ *********************************************************/
+
+/*
+ * includeCheck.h --
+ *
+ * Restrict include file use.
+ *
+ * In every .h file, define one or more of these
+ *
+ * INCLUDE_ALLOW_VMX
+ * INCLUDE_ALLOW_USERLEVEL
+ * INCLUDE_ALLOW_VMCORE
+ * INCLUDE_ALLOW_MODULE
+ * INCLUDE_ALLOW_VMKERNEL
+ * INCLUDE_ALLOW_DISTRIBUTE
+ * INCLUDE_ALLOW_VMK_MODULE
+ * INCLUDE_ALLOW_VMKDRIVERS
+ * INCLUDE_ALLOW_VMIROM
+ *
+ * Then include this file.
+ *
+ * Any file that has INCLUDE_ALLOW_DISTRIBUTE defined will potentially
+ * be distributed in source form along with GPLed code. Ensure
+ * that this is acceptable.
+ */
+
+
+/*
+ * Declare a VMCORE-only variable to help classify object
+ * files. The variable goes in the common block and does
+ * not create multiple definition link-time conflicts.
+ */
+
+#if defined VMCORE && defined VMX86_DEVEL && defined VMX86_DEBUG && \
+ defined linux && !defined MODULE && \
+ !defined COMPILED_WITH_VMCORE
+#define COMPILED_WITH_VMCORE compiled_with_vmcore
+#ifdef ASM
+ .comm compiled_with_vmcore, 0
+#else
+ asm(".comm compiled_with_vmcore, 0");
+#endif /* ASM */
+#endif
+
+
+#if defined VMCORE && \
+ !(defined VMX86_VMX || defined VMM || \
+ defined MONITOR_APP || defined VMMON)
+#error "Makefile problem: VMCORE without VMX86_VMX or \
+ VMM or MONITOR_APP or MODULE."
+#endif
+
+#if defined VMCORE && !defined INCLUDE_ALLOW_VMCORE
+#error "The surrounding include file is not allowed in vmcore."
+#endif
+#undef INCLUDE_ALLOW_VMCORE
+
+#if defined VMX86_VMX && !defined VMCORE && \
+ !(defined INCLUDE_ALLOW_VMX || defined INCLUDE_ALLOW_USERLEVEL)
+#error "The surrounding include file is not allowed in the VMX."
+#endif
+#undef INCLUDE_ALLOW_VMX
+
+#if defined USERLEVEL && !defined VMX86_VMX && !defined VMCORE && \
+ !defined INCLUDE_ALLOW_USERLEVEL
+#error "The surrounding include file is not allowed at userlevel."
+#endif
+#undef INCLUDE_ALLOW_USERLEVEL
+
+#if defined MODULE && !defined VMKERNEL_MODULE && \
+ !defined VMMON && !defined INCLUDE_ALLOW_MODULE
+#error "The surrounding include file is not allowed in driver modules."
+#endif
+#undef INCLUDE_ALLOW_MODULE
+
+#if defined VMMON && !defined INCLUDE_ALLOW_VMMON
+#error "The surrounding include file is not allowed in vmmon."
+#endif
+#undef INCLUDE_ALLOW_VMMON
+
+#if defined VMKERNEL && !defined INCLUDE_ALLOW_VMKERNEL
+#error "The surrounding include file is not allowed in the vmkernel."
+#endif
+#undef INCLUDE_ALLOW_VMKERNEL
+
+#if defined GPLED_CODE && !defined INCLUDE_ALLOW_DISTRIBUTE
+#error "The surrounding include file is not allowed in GPL code."
+#endif
+#undef INCLUDE_ALLOW_DISTRIBUTE
+
+#if defined VMKERNEL_MODULE && !defined VMKERNEL && \
+ !defined INCLUDE_ALLOW_VMK_MODULE && !defined INCLUDE_ALLOW_VMKDRIVERS
+#error "The surrounding include file is not allowed in vmkernel modules."
+#endif
+#undef INCLUDE_ALLOW_VMK_MODULE
+#undef INCLUDE_ALLOW_VMKDRIVERS
+
+#if defined VMIROM && ! defined INCLUDE_ALLOW_VMIROM
+#error "The surrounding include file is not allowed in vmirom."
+#endif
+#undef INCLUDE_ALLOW_VMIROM
diff --git a/usr/src/uts/intel/io/vmxnet/net.h b/usr/src/uts/intel/io/vmxnet/net.h
new file mode 100644
index 0000000000..41b6eb1d14
--- /dev/null
+++ b/usr/src/uts/intel/io/vmxnet/net.h
@@ -0,0 +1,220 @@
+/*********************************************************
+ * Copyright (C) 1998 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ *********************************************************/
+
+/*********************************************************
+ * The contents of this file are subject to the terms of the Common
+ * Development and Distribution License (the "License") version 1.0
+ * and no later version. You may not use this file except in
+ * compliance with the License.
+ *
+ * You can obtain a copy of the License at
+ * http://www.opensource.org/licenses/cddl1.php
+ *
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ *********************************************************/
+
+/************************************************************
+ *
+ * net.h
+ *
+ * This file should contain all network global defines.
+ * No vlance/vmxnet/vnet/vmknet specific stuff should be
+ * put here only defines used/usable by all network code.
+ * --gustav
+ *
+ ************************************************************/
+
+#ifndef VMWARE_DEVICES_NET_H
+#define VMWARE_DEVICES_NET_H
+
+#define INCLUDE_ALLOW_USERLEVEL
+#define INCLUDE_ALLOW_MODULE
+#define INCLUDE_ALLOW_VMCORE
+
+#include "includeCheck.h"
+#include "vm_device_version.h"
+
+#ifdef VMCORE
+#include "config.h"
+#include "str.h"
+#include "strutil.h"
+#endif
+
+#define ETHERNET_MTU 1518
+#define ETH_MIN_FRAME_LEN 60
+
+#ifndef ETHER_ADDR_LEN
+#define ETHER_ADDR_LEN 6 /* length of MAC address */
+#endif
+#define ETH_HEADER_LEN 14 /* length of Ethernet header */
+#define IP_ADDR_LEN 4 /* length of IPv4 address */
+#define IP_HEADER_LEN 20 /* minimum length of IPv4 header */
+
+#define ETHER_MAX_QUEUED_PACKET 1600
+
+
+/*
+ * State's that a NIC can be in currently we only use this
+ * in VLance but if we implement/emulate new adapters that
+ * we also want to be able to morph a new corresponding
+ * state should be added.
+ */
+
+#define LANCE_CHIP 0x2934
+#define VMXNET_CHIP 0x4392
+
+/*
+ * Size of reserved IO space needed by the LANCE adapter and
+ * the VMXNET adapter. If you add more ports to Vmxnet than
+ * there is reserved space you must bump VMXNET_CHIP_IO_RESV_SIZE.
+ * The sizes must be powers of 2.
+ */
+
+#define LANCE_CHIP_IO_RESV_SIZE 0x20
+#define VMXNET_CHIP_IO_RESV_SIZE 0x40
+
+#define MORPH_PORT_SIZE 4
+
+#ifdef VMCORE
+typedef struct Net_AdapterCount {
+ uint8 vlance;
+ uint8 vmxnet2;
+ uint8 vmxnet3;
+ uint8 e1000;
+ uint8 e1000e;
+} Net_AdapterCount;
+#endif
+
+#ifdef USERLEVEL
+
+/*
+ *----------------------------------------------------------------------------
+ *
+ * Net_AddAddrToLADRF --
+ *
+ * Given a MAC address, sets the corresponding bit in the LANCE style
+ * Logical Address Filter 'ladrf'.
+ * The caller should have initialized the ladrf to all 0's, as this
+ * function only ORs on a bit in the array.
+ * 'addr' is presumed to be ETHER_ADDR_LEN in size;
+ * 'ladrf' is presumed to point to a 64-bit vector.
+ *
+ * Derived from a long history of derivations, originally inspired by
+ * sample code from the AMD "Network Products: Ethernet Controllers 1998
+ * Data Book, Book 2", pages 1-53..1-55.
+ *
+ * Returns:
+ * None.
+ *
+ * Side effects:
+ * Updates 'ladrf'.
+ *
+ *----------------------------------------------------------------------------
+ */
+
+static INLINE void
+Net_AddAddrToLadrf(const uint8 *addr, // IN: pointer to MAC address
+ uint8 *ladrf) // IN/OUT: pointer to ladrf
+{
+#define CRC_POLYNOMIAL_BE 0x04c11db7UL /* Ethernet CRC, big endian */
+
+ uint16 hashcode;
+ int32 crc = 0xffffffff; /* init CRC for each address */
+ int32 j;
+ int32 bit;
+ int32 byte;
+
+ ASSERT(addr);
+ ASSERT(ladrf);
+
+ for (byte = 0; byte < ETHER_ADDR_LEN; byte++) { /* for each address byte */
+ /* process each address bit */
+ for (bit = *addr++, j = 0;
+ j < 8;
+ j++, bit >>= 1) {
+ crc = (crc << 1) ^ ((((crc < 0 ? 1 : 0) ^ bit) & 0x01) ?
+ CRC_POLYNOMIAL_BE : 0);
+ }
+ }
+ hashcode = (crc & 1); /* hashcode is 6 LSb of CRC ... */
+ for (j = 0; j < 5; j++) { /* ... in reverse order. */
+ hashcode = (hashcode << 1) | ((crc>>=1) & 1);
+ }
+
+ ladrf[hashcode >> 3] |= 1 << (hashcode & 0x07);
+}
+#endif // USERLEVEL
+
+#ifdef VMCORE
+/*
+ *----------------------------------------------------------------------
+ *
+ * Net_GetNumAdapters --
+ *
+ * Returns the number of each type of network adapter configured in this
+ * VM.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static INLINE void
+Net_GetNumAdapters(Net_AdapterCount *counts)
+{
+ uint32 i;
+
+ counts->vlance = 0;
+ counts->vmxnet2 = 0;
+ counts->vmxnet3 = 0;
+ counts->e1000 = 0;
+ counts->e1000e = 0;
+
+ for (i = 0; i < MAX_ETHERNET_CARDS; i++) {
+ char* adapterStr;
+
+ if (!Config_GetBool(FALSE, "ethernet%d.present", i)) {
+ continue;
+ }
+ adapterStr = Config_GetString("vlance", "ethernet%d.virtualDev", i);
+ if (Str_Strcasecmp(adapterStr, "vmxnet3") == 0) {
+ counts->vmxnet3++;
+ } else if (Str_Strcasecmp(adapterStr, "vlance") == 0) {
+ counts->vlance++;
+ } else if (Str_Strcasecmp(adapterStr, "vmxnet") == 0) {
+ counts->vmxnet2++;
+ } else if (Str_Strcasecmp(adapterStr, "e1000") == 0) {
+ counts->e1000++;
+ } else if (Str_Strcasecmp(adapterStr, "e1000e") == 0) {
+ counts->e1000e++;
+ } else {
+ LOG_ONCE(("%s: unknown adapter: %s\n", __FUNCTION__, adapterStr));
+ }
+ free(adapterStr);
+ }
+}
+
+#endif // VMCORE
+
+#endif // VMWARE_DEVICES_NET_H
diff --git a/usr/src/uts/intel/io/vmxnet/net_sg.h b/usr/src/uts/intel/io/vmxnet/net_sg.h
new file mode 100644
index 0000000000..f6c30fb2b5
--- /dev/null
+++ b/usr/src/uts/intel/io/vmxnet/net_sg.h
@@ -0,0 +1,84 @@
+/*********************************************************
+ * Copyright (C) 2000 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ *********************************************************/
+
+/*********************************************************
+ * The contents of this file are subject to the terms of the Common
+ * Development and Distribution License (the "License") version 1.0
+ * and no later version. You may not use this file except in
+ * compliance with the License.
+ *
+ * You can obtain a copy of the License at
+ * http://www.opensource.org/licenses/cddl1.php
+ *
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ *********************************************************/
+
+/*
+ * net_sg.h --
+ *
+ * Network packet scatter gather structure.
+ */
+
+
+#ifndef _NET_SG_H
+#define _NET_SG_H
+
+#define INCLUDE_ALLOW_USERLEVEL
+
+#define INCLUDE_ALLOW_MODULE
+#define INCLUDE_ALLOW_VMK_MODULE
+#define INCLUDE_ALLOW_VMKERNEL
+#define INCLUDE_ALLOW_DISTRIBUTE
+#include "includeCheck.h"
+
+#define NET_SG_DEFAULT_LENGTH 16
+
+/*
+ * A single scatter-gather element for a network packet.
+ * The address is split into low and high to save space.
+ * If we make it 64 bits then Windows pads things out such that
+ * we lose a lot of space for each scatter gather array.
+ * This adds up when you have embedded scatter-gather
+ * arrays for transmit and receive ring buffers.
+ */
+typedef struct NetSG_Elem {
+ uint32 addrLow;
+ uint16 addrHi;
+ uint16 length;
+} NetSG_Elem;
+
+typedef enum NetSG_AddrType {
+ NET_SG_MACH_ADDR,
+ NET_SG_PHYS_ADDR,
+ NET_SG_VIRT_ADDR,
+} NetSG_AddrType;
+
+typedef struct NetSG_Array {
+ uint16 addrType;
+ uint16 length;
+ NetSG_Elem sg[NET_SG_DEFAULT_LENGTH];
+} NetSG_Array;
+
+#define NET_SG_SIZE(len) (sizeof(NetSG_Array) + (len - NET_SG_DEFAULT_LENGTH) * sizeof(NetSG_Elem))
+
+#define NET_SG_MAKE_PA(elem) (PA)QWORD(elem.addrHi, elem.addrLow)
+#define NET_SG_MAKE_PTR(elem) (char *)(uintptr_t)QWORD(elem.addrHi, elem.addrLow)
+
+#endif
diff --git a/usr/src/uts/intel/io/vmxnet/vm_basic_types.h b/usr/src/uts/intel/io/vmxnet/vm_basic_types.h
new file mode 100644
index 0000000000..adeac1b708
--- /dev/null
+++ b/usr/src/uts/intel/io/vmxnet/vm_basic_types.h
@@ -0,0 +1,1037 @@
+/*********************************************************
+ * Copyright (C) 1998-2009 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation version 2.1 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the Lesser GNU General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ *********************************************************/
+
+/*********************************************************
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of VMware Inc. nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission of VMware Inc.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *********************************************************/
+
+/*********************************************************
+ * The contents of this file are subject to the terms of the Common
+ * Development and Distribution License (the "License") version 1.0
+ * and no later version. You may not use this file except in
+ * compliance with the License.
+ *
+ * You can obtain a copy of the License at
+ * http://www.opensource.org/licenses/cddl1.php
+ *
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ *********************************************************/
+
+/*
+ *
+ * vm_basic_types.h --
+ *
+ * basic data types.
+ */
+
+
+#ifndef _VM_BASIC_TYPES_H_
+#define _VM_BASIC_TYPES_H_
+
+#define INCLUDE_ALLOW_USERLEVEL
+
+#define INCLUDE_ALLOW_MODULE
+#define INCLUDE_ALLOW_VMMON
+#define INCLUDE_ALLOW_VMKERNEL
+#define INCLUDE_ALLOW_VMKDRIVERS
+#define INCLUDE_ALLOW_VMK_MODULE
+#define INCLUDE_ALLOW_DISTRIBUTE
+#define INCLUDE_ALLOW_VMCORE
+#define INCLUDE_ALLOW_VMIROM
+#include "includeCheck.h"
+
+/* STRICT ANSI means the Xserver build and X defines Bool differently. */
+#if !defined(_XTYPEDEF_BOOL) && \
+ (!defined(__STRICT_ANSI__) || defined(__FreeBSD__) || defined(__MINGW32__))
+#define _XTYPEDEF_BOOL
+typedef char Bool;
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#define IsBool(x) (((x) & ~1) == 0)
+#define IsBool2(x, y) ((((x) | (y)) & ~1) == 0)
+
+/*
+ * Macros __i386__ and __ia64 are intrinsically defined by GCC
+ */
+#if defined _MSC_VER && defined _M_X64
+# define __x86_64__
+#elif defined _MSC_VER && defined _M_IX86
+# define __i386__
+#endif
+
+#ifdef __i386__
+#define VM_I386
+#endif
+
+#ifdef __x86_64__
+#define VM_X86_64
+#define VM_I386
+#define vm_x86_64 (1)
+#else
+#define vm_x86_64 (0)
+#endif
+
+
+#ifdef _MSC_VER
+
+#pragma warning (3 :4505) // unreferenced local function
+#pragma warning (disable :4018) // signed/unsigned mismatch
+#pragma warning (disable :4761) // integral size mismatch in argument; conversion supplied
+#pragma warning (disable :4305) // truncation from 'const int' to 'short'
+#pragma warning (disable :4244) // conversion from 'unsigned short' to 'unsigned char'
+#pragma warning (disable :4267) // truncation of 'size_t'
+#pragma warning (disable :4146) // unary minus operator applied to unsigned type, result still unsigned
+#pragma warning (disable :4142) // benign redefinition of type
+
+#endif
+
+#if defined(__APPLE__) || defined(HAVE_STDINT_H)
+
+/*
+ * TODO: This is a C99 standard header. We should be able to test for
+ * #if __STDC_VERSION__ >= 199901L, but that breaks the Netware build
+ * (which doesn't have stdint.h).
+ */
+
+#include <stdint.h>
+
+typedef uint64_t uint64;
+typedef int64_t int64;
+typedef uint32_t uint32;
+typedef int32_t int32;
+typedef uint16_t uint16;
+typedef int16_t int16;
+typedef uint8_t uint8;
+typedef int8_t int8;
+
+/*
+ * Note: C does not specify whether char is signed or unsigned, and
+ * both gcc and msvc implement processor-specific signedness. With
+ * three types:
+ * typeof(char) != typeof(signed char) != typeof(unsigned char)
+ *
+ * Be careful here, because gcc (4.0.1 and others) likes to warn about
+ * conversions between signed char * and char *.
+ */
+
+#else /* !HAVE_STDINT_H */
+
+#ifdef _MSC_VER
+
+typedef unsigned __int64 uint64;
+typedef signed __int64 int64;
+
+#elif defined(__GNUC__) || defined(__SUNPRO_C)
+/* The Xserver source compiles with -ansi -pendantic */
+# if !defined(__STRICT_ANSI__) || defined(__FreeBSD__)
+# if defined(VM_X86_64)
+typedef unsigned long uint64;
+typedef long int64;
+# else
+typedef unsigned long long uint64;
+typedef long long int64;
+# endif
+# endif
+#else
+# error - Need compiler define for int64/uint64
+#endif /* _MSC_VER */
+
+typedef unsigned int uint32;
+typedef unsigned short uint16;
+typedef unsigned char uint8;
+
+typedef int int32;
+typedef short int16;
+typedef signed char int8;
+
+#endif /* HAVE_STDINT_H */
+
+/*
+ * FreeBSD (for the tools build) unconditionally defines these in
+ * sys/inttypes.h so don't redefine them if this file has already
+ * been included. [greg]
+ *
+ * This applies to Solaris as well.
+ */
+
+/*
+ * Before trying to do the includes based on OS defines, see if we can use
+ * feature-based defines to get as much functionality as possible
+ */
+
+#ifdef HAVE_INTTYPES_H
+#include <inttypes.h>
+#endif
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_INTTYPES_H
+#include <sys/inttypes.h>
+#endif
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+
+#ifdef __FreeBSD__
+#include <sys/param.h> /* For __FreeBSD_version */
+#endif
+
+#if !defined(USING_AUTOCONF)
+# if defined(__FreeBSD__) || defined(sun)
+# ifdef KLD_MODULE
+# include <sys/types.h>
+# else
+# if __FreeBSD_version >= 500043
+# if !defined(VMKERNEL)
+# include <inttypes.h>
+# endif
+# include <sys/types.h>
+# else
+# include <sys/inttypes.h>
+# endif
+# endif
+# elif defined __APPLE__
+# if KERNEL
+# include <sys/unistd.h>
+# include <sys/types.h> /* mostly for size_t */
+# include <stdint.h>
+# else
+# include <unistd.h>
+# include <inttypes.h>
+# include <stdlib.h>
+# include <stdint.h>
+# endif
+# else
+# if !defined(__intptr_t_defined) && !defined(intptr_t)
+# ifdef VM_I386
+# define __intptr_t_defined
+# ifdef VM_X86_64
+typedef int64 intptr_t;
+# else
+typedef int32 intptr_t;
+# endif
+# elif defined(__arm__)
+typedef int32 intptr_t;
+# endif
+# endif
+
+# ifndef _STDINT_H
+# ifdef VM_I386
+# ifdef VM_X86_64
+typedef uint64 uintptr_t;
+# else
+typedef uint32 uintptr_t;
+# endif
+# elif defined(__arm__)
+typedef uint32 uintptr_t;
+# endif
+# endif
+# endif
+#endif
+
+
+/*
+ * Time
+ * XXX These should be cleaned up. -- edward
+ */
+
+typedef int64 VmTimeType; /* Time in microseconds */
+typedef int64 VmTimeRealClock; /* Real clock kept in microseconds */
+typedef int64 VmTimeVirtualClock; /* Virtual Clock kept in CPU cycles */
+
+/*
+ * Printf format specifiers for size_t and 64-bit number.
+ * Use them like this:
+ * printf("%"FMT64"d\n", big);
+ *
+ * FMTH is for handles/fds.
+ */
+
+#ifdef _MSC_VER
+ #define FMT64 "I64"
+ #ifdef VM_X86_64
+ #define FMTSZ "I64"
+ #define FMTPD "I64"
+ #define FMTH "I64"
+ #else
+ #define FMTSZ "I"
+ #define FMTPD "I"
+ #define FMTH "I"
+ #endif
+#elif defined __APPLE__
+ /* Mac OS hosts use the same formatters for 32- and 64-bit. */
+ #define FMT64 "ll"
+ #if KERNEL
+ #define FMTSZ "l"
+ #else
+ #define FMTSZ "z"
+ #endif
+ #define FMTPD "l"
+ #define FMTH ""
+#elif defined(__GNUC__) || defined(__SUNPRO_C)
+ #define FMTH ""
+ #if defined(N_PLAT_NLM) || defined(sun) || \
+ (defined(__FreeBSD__) && (__FreeBSD__ + 0) && ((__FreeBSD__ + 0) < 5))
+ /*
+ * Why (__FreeBSD__ + 0)? See bug 141008.
+ * Yes, we really need to test both (__FreeBSD__ + 0) and
+ * ((__FreeBSD__ + 0) < 5). No, we can't remove "+ 0" from
+ * ((__FreeBSD__ + 0) < 5).
+ */
+ #ifdef VM_X86_64
+ #define FMTSZ "l"
+ #define FMTPD "l"
+ #else
+ #define FMTSZ ""
+ #define FMTPD ""
+ #endif
+ #elif defined(__linux__) \
+ || (defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L) \
+ || (defined(_POSIX_VERSION) && _POSIX_VERSION >= 200112L) \
+ || (defined(_POSIX2_VERSION) && _POSIX2_VERSION >= 200112L)
+ /* BSD, Linux */
+ #define FMTSZ "z"
+
+ #if defined(VM_X86_64)
+ #define FMTPD "l"
+ #else
+ #define FMTPD ""
+ #endif
+ #else
+ /* Systems with a pre-C99 libc */
+ #define FMTSZ "Z"
+ #ifdef VM_X86_64
+ #define FMTPD "l"
+ #else
+ #define FMTPD ""
+ #endif
+ #endif
+ #ifdef VM_X86_64
+ #define FMT64 "l"
+ #elif defined(sun) || defined(__FreeBSD__)
+ #define FMT64 "ll"
+ #else
+ #define FMT64 "L"
+ #endif
+#else
+ #error - Need compiler define for FMT64 and FMTSZ
+#endif
+
+/*
+ * Suffix for 64-bit constants. Use it like this:
+ * CONST64(0x7fffffffffffffff) for signed or
+ * CONST64U(0x7fffffffffffffff) for unsigned.
+ *
+ * 2004.08.30(thutt):
+ * The vmcore/asm64/gen* programs are compiled as 32-bit
+ * applications, but must handle 64 bit constants. If the
+ * 64-bit-constant defining macros are already defined, the
+ * definition will not be overwritten.
+ */
+
+#if !defined(CONST64) || !defined(CONST64U)
+#ifdef _MSC_VER
+#define CONST64(c) c##I64
+#define CONST64U(c) c##uI64
+#elif defined __APPLE__
+#define CONST64(c) c##LL
+#define CONST64U(c) c##uLL
+#elif defined(__GNUC__) || defined(__SUNPRO_C)
+#ifdef VM_X86_64
+#define CONST64(c) c##L
+#define CONST64U(c) c##uL
+#else
+#define CONST64(c) c##LL
+#define CONST64U(c) c##uLL
+#endif
+#else
+#error - Need compiler define for CONST64
+#endif
+#endif
+
+/*
+ * Use CONST3264/CONST3264U if you want a constant to be
+ * treated as a 32-bit number on 32-bit compiles and
+ * a 64-bit number on 64-bit compiles. Useful in the case
+ * of shifts, like (CONST3264U(1) << x), where x could be
+ * more than 31 on a 64-bit compile.
+ */
+
+#ifdef VM_X86_64
+ #define CONST3264(a) CONST64(a)
+ #define CONST3264U(a) CONST64U(a)
+#else
+ #define CONST3264(a) (a)
+ #define CONST3264U(a) (a)
+#endif
+
+#define MIN_INT8 ((int8)0x80)
+#define MAX_INT8 ((int8)0x7f)
+
+#define MIN_UINT8 ((uint8)0)
+#define MAX_UINT8 ((uint8)0xff)
+
+#define MIN_INT16 ((int16)0x8000)
+#define MAX_INT16 ((int16)0x7fff)
+
+#define MIN_UINT16 ((uint16)0)
+#define MAX_UINT16 ((uint16)0xffff)
+
+#define MIN_INT32 ((int32)0x80000000)
+#define MAX_INT32 ((int32)0x7fffffff)
+
+#define MIN_UINT32 ((uint32)0)
+#define MAX_UINT32 ((uint32)0xffffffff)
+
+#define MIN_INT64 (CONST64(0x8000000000000000))
+#define MAX_INT64 (CONST64(0x7fffffffffffffff))
+
+#define MIN_UINT64 (CONST64U(0))
+#define MAX_UINT64 (CONST64U(0xffffffffffffffff))
+
+typedef uint8 *TCA; /* Pointer into TC (usually). */
+
+/*
+ * Type big enough to hold an integer between 0..100
+ */
+typedef uint8 Percent;
+#define AsPercent(v) ((Percent)(v))
+#define CHOOSE_PERCENT AsPercent(101)
+
+
+typedef uintptr_t VA;
+typedef uintptr_t VPN;
+
+typedef uint64 PA;
+typedef uint32 PPN;
+
+typedef uint64 PhysMemOff;
+typedef uint64 PhysMemSize;
+
+/* The Xserver source compiles with -ansi -pendantic */
+#ifndef __STRICT_ANSI__
+typedef uint64 BA;
+#endif
+typedef uint32 BPN;
+typedef uint32 PageNum;
+typedef unsigned MemHandle;
+typedef int32 World_ID;
+
+/* !! do not alter the definition of INVALID_WORLD_ID without ensuring
+ * that the values defined in both bora/public/vm_basic_types.h and
+ * lib/vprobe/vm_basic_types.h are the same. Additionally, the definition
+ * of VMK_INVALID_WORLD_ID in vmkapi_world.h also must be defined with
+ * the same value
+ */
+
+#define INVALID_WORLD_ID ((World_ID)0)
+
+typedef World_ID User_CartelID;
+#define INVALID_CARTEL_ID INVALID_WORLD_ID
+
+typedef User_CartelID User_SessionID;
+#define INVALID_SESSION_ID INVALID_CARTEL_ID
+
+typedef User_CartelID User_CartelGroupID;
+#define INVALID_CARTELGROUP_ID INVALID_CARTEL_ID
+
+typedef uint32 Worldlet_ID;
+#define INVALID_WORLDLET_ID ((Worldlet_ID)-1)
+
+/* The Xserver source compiles with -ansi -pendantic */
+#ifndef __STRICT_ANSI__
+typedef uint64 MA;
+typedef uint32 MPN;
+#endif
+
+/*
+ * This type should be used for variables that contain sector
+ * position/quantity.
+ */
+typedef uint64 SectorType;
+
+/*
+ * Linear address
+ */
+
+typedef uintptr_t LA;
+typedef uintptr_t LPN;
+#define LA_2_LPN(_la) ((_la) >> PAGE_SHIFT)
+#define LPN_2_LA(_lpn) ((_lpn) << PAGE_SHIFT)
+
+#define LAST_LPN ((((LA) 1) << (8 * sizeof(LA) - PAGE_SHIFT)) - 1)
+#define LAST_LPN32 ((((LA32)1) << (8 * sizeof(LA32) - PAGE_SHIFT)) - 1)
+#define LAST_LPN64 ((((LA64)1) << (8 * sizeof(LA64) - PAGE_SHIFT)) - 1)
+
+/* Valid bits in a LPN. */
+#define LPN_MASK LAST_LPN
+#define LPN_MASK32 LAST_LPN32
+#define LPN_MASK64 LAST_LPN64
+
+/*
+ * On 64 bit platform, address and page number types default
+ * to 64 bit. When we need to represent a 32 bit address, we use
+ * types defined below.
+ *
+ * On 32 bit platform, the following types are the same as the
+ * default types.
+ */
+typedef uint32 VA32;
+typedef uint32 VPN32;
+typedef uint32 LA32;
+typedef uint32 LPN32;
+typedef uint32 PA32;
+typedef uint32 PPN32;
+typedef uint32 MA32;
+typedef uint32 MPN32;
+
+/*
+ * On 64 bit platform, the following types are the same as the
+ * default types.
+ */
+typedef uint64 VA64;
+typedef uint64 VPN64;
+typedef uint64 LA64;
+typedef uint64 LPN64;
+typedef uint64 PA64;
+typedef uint64 PPN64;
+typedef uint64 MA64;
+typedef uint64 MPN64;
+
+/*
+ * VA typedefs for user world apps.
+ */
+typedef VA32 UserVA32;
+typedef VA64 UserVA64;
+typedef UserVA64 UserVAConst; /* Userspace ptr to data that we may only read. */
+typedef UserVA32 UserVA32Const; /* Userspace ptr to data that we may only read. */
+typedef UserVA64 UserVA64Const; /* Used by 64-bit syscalls until conversion is finished. */
+#ifdef VMKERNEL
+typedef UserVA64 UserVA;
+#else
+typedef void * UserVA;
+#endif
+
+
+/*
+ * Maximal possible PPN value (errors too) that PhysMem can handle.
+ * Must be at least as large as MAX_PPN which is the maximum PPN
+ * for any region other than buserror.
+ */
+#define PHYSMEM_MAX_PPN ((PPN)0xffffffff)
+#define MAX_PPN ((PPN)0x1fffffff) /* Maximal observable PPN value. */
+#define INVALID_PPN ((PPN)0xffffffff)
+
+#define INVALID_BPN ((BPN)0x1fffffff)
+
+#define RESERVED_MPN ((MPN) 0)
+#define INVALID_MPN ((MPN)-1)
+#define MEMREF_MPN ((MPN)-2)
+#define RELEASED_MPN ((MPN)-3)
+#define MAX_MPN ((MPN)0x7fffffff) /* 43 bits of address space. */
+
+#define INVALID_LPN ((LPN)-1)
+#define INVALID_VPN ((VPN)-1)
+#define INVALID_LPN64 ((LPN64)-1)
+#define INVALID_PAGENUM ((PageNum)-1)
+
+
+/*
+ * Format modifier for printing VA, LA, and VPN.
+ * Use them like this: Log("%#"FMTLA"x\n", laddr)
+ */
+
+#if defined(VMM) || defined(FROBOS64) || vm_x86_64 || defined __APPLE__
+# define FMTLA "l"
+# define FMTVA "l"
+# define FMTVPN "l"
+#else
+# define FMTLA ""
+# define FMTVA ""
+# define FMTVPN ""
+#endif
+
+#ifndef EXTERN
+#define EXTERN extern
+#endif
+#define CONST const
+
+
+#ifndef INLINE
+# ifdef _MSC_VER
+# define INLINE __inline
+# else
+# define INLINE inline
+# endif
+#endif
+
+
+/*
+ * Annotation for data that may be exported into a DLL and used by other
+ * apps that load that DLL and import the data.
+ */
+#if defined(_WIN32) && defined(VMX86_IMPORT_DLLDATA)
+# define VMX86_EXTERN_DATA extern __declspec(dllimport)
+#else // !_WIN32
+# define VMX86_EXTERN_DATA extern
+#endif
+
+#if defined(_WIN32) && !defined(VMX86_NO_THREADS)
+#define THREADSPECIFIC __declspec(thread)
+#else
+#define THREADSPECIFIC
+#endif
+
+/*
+ * Due to the wonderful "registry redirection" feature introduced in
+ * 64-bit Windows, if you access any key under HKLM\Software in 64-bit
+ * code, you need to open/create/delete that key with
+ * VMKEY_WOW64_32KEY if you want a consistent view with 32-bit code.
+ */
+
+#ifdef _WIN32
+#ifdef _WIN64
+#define VMW_KEY_WOW64_32KEY KEY_WOW64_32KEY
+#else
+#define VMW_KEY_WOW64_32KEY 0x0
+#endif
+#endif
+
+
+/*
+ * Consider the following reasons functions are inlined:
+ *
+ * 1) inlined for performance reasons
+ * 2) inlined because it's a single-use function
+ *
+ * Functions which meet only condition 2 should be marked with this
+ * inline macro; It is not critical to be inlined (but there is a
+ * code-space & runtime savings by doing so), so when other callers
+ * are added the inline-ness should be removed.
+ */
+
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3)
+/*
+ * Starting at version 3.3, gcc does not always inline functions marked
+ * 'inline' (it depends on their size). To force gcc to do so, one must use the
+ * extra __always_inline__ attribute.
+ */
+# define INLINE_SINGLE_CALLER INLINE __attribute__((__always_inline__))
+#else
+# define INLINE_SINGLE_CALLER INLINE
+#endif
+
+/*
+ * Used when a hard guaranteed of no inlining is needed. Very few
+ * instances need this since the absence of INLINE is a good hint
+ * that gcc will not do inlining.
+ */
+
+#if defined(__GNUC__) && defined(VMM)
+#define ABSOLUTELY_NOINLINE __attribute__((__noinline__))
+#endif
+
+/*
+ * Attributes placed on function declarations to tell the compiler
+ * that the function never returns.
+ */
+
+#ifdef _MSC_VER
+#define NORETURN __declspec(noreturn)
+#elif __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 9)
+#define NORETURN __attribute__((__noreturn__))
+#else
+#define NORETURN
+#endif
+
+/*
+ * GCC 3.2 inline asm needs the + constraint for input/ouput memory operands.
+ * Older GCCs don't know about it --hpreg
+ */
+
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 2)
+# define VM_ASM_PLUS 1
+#else
+# define VM_ASM_PLUS 0
+#endif
+
+/*
+ * Branch prediction hints:
+ * LIKELY(exp) - Expression exp is likely TRUE.
+ * UNLIKELY(exp) - Expression exp is likely FALSE.
+ * Usage example:
+ * if (LIKELY(excCode == EXC_NONE)) {
+ * or
+ * if (UNLIKELY(REAL_MODE(vc))) {
+ *
+ * We know how to predict branches on gcc3 and later (hopefully),
+ * all others we don't so we do nothing.
+ */
+
+#if (__GNUC__ >= 3)
+/*
+ * gcc3 uses __builtin_expect() to inform the compiler of an expected value.
+ * We use this to inform the static branch predictor. The '!!' in LIKELY
+ * will convert any !=0 to a 1.
+ */
+#define LIKELY(_exp) __builtin_expect(!!(_exp), 1)
+#define UNLIKELY(_exp) __builtin_expect((_exp), 0)
+#else
+#define LIKELY(_exp) (_exp)
+#define UNLIKELY(_exp) (_exp)
+#endif
+
+/*
+ * GCC's argument checking for printf-like functions
+ * This is conditional until we have replaced all `"%x", void *'
+ * with `"0x%08x", (uint32) void *'. Note that %p prints different things
+ * on different platforms. Argument checking is enabled for the
+ * vmkernel, which has already been cleansed.
+ *
+ * fmtPos is the position of the format string argument, beginning at 1
+ * varPos is the position of the variable argument, beginning at 1
+ */
+
+#if defined(__GNUC__)
+# define PRINTF_DECL(fmtPos, varPos) __attribute__((__format__(__printf__, fmtPos, varPos)))
+#else
+# define PRINTF_DECL(fmtPos, varPos)
+#endif
+
+#if defined(__GNUC__)
+# define SCANF_DECL(fmtPos, varPos) __attribute__((__format__(__scanf__, fmtPos, varPos)))
+#else
+# define SCANF_DECL(fmtPos, varPos)
+#endif
+
+/*
+ * UNUSED_PARAM should surround the parameter name and type declaration,
+ * e.g. "int MyFunction(int var1, UNUSED_PARAM(int var2))"
+ *
+ */
+
+#ifndef UNUSED_PARAM
+# if defined(__GNUC__)
+# define UNUSED_PARAM(_parm) _parm __attribute__((__unused__))
+# else
+# define UNUSED_PARAM(_parm) _parm
+# endif
+#endif
+
+/*
+ * REGPARM defaults to REGPARM3; i.e., a request that gcc
+ * put the first three arguments in registers. (It is fine
+ * if the function has fewer than three arguments.) Gcc only.
+ * Syntactically, put REGPARM where you'd put INLINE or NORETURN.
+ *
+ * Note that 64-bit code already puts the first six arguments in
+ * registers, so these attributes are only useful for 32-bit code.
+ */
+
+#if defined(__GNUC__)
+# define REGPARM0 __attribute__((regparm(0)))
+# define REGPARM1 __attribute__((regparm(1)))
+# define REGPARM2 __attribute__((regparm(2)))
+# define REGPARM3 __attribute__((regparm(3)))
+# define REGPARM REGPARM3
+#else
+# define REGPARM0
+# define REGPARM1
+# define REGPARM2
+# define REGPARM3
+# define REGPARM
+#endif
+
+/*
+ * ALIGNED specifies minimum alignment in "n" bytes.
+ */
+
+#ifdef __GNUC__
+#define ALIGNED(n) __attribute__((__aligned__(n)))
+#else
+#define ALIGNED(n)
+#endif
+
+/*
+ * __func__ is a stringified function name that is part of the C99 standard. The block
+ * below defines __func__ on older systems where the compiler does not support that
+ * macro.
+ */
+#if defined(__GNUC__) \
+ && ((__GNUC__ == 2 && __GNUC_MINOR < 96) \
+ || (__GNUC__ < 2))
+# define __func__ __FUNCTION__
+#endif
+
+/*
+ * Once upon a time, this was used to silence compiler warnings that
+ * get generated when the compiler thinks that a function returns
+ * when it is marked noreturn. Don't do it. Use NOT_REACHED().
+ */
+
+#define INFINITE_LOOP() do { } while (1)
+
+/*
+ * On FreeBSD (for the tools build), size_t is typedef'd if _BSD_SIZE_T_
+ * is defined. Use the same logic here so we don't define it twice. [greg]
+ */
+#ifdef __FreeBSD__
+# ifdef _BSD_SIZE_T_
+# undef _BSD_SIZE_T_
+# ifdef VM_I386
+# ifdef VM_X86_64
+ typedef uint64 size_t;
+# else
+ typedef uint32 size_t;
+# endif
+# endif /* VM_I386 */
+# endif
+
+# ifdef _BSD_SSIZE_T_
+# undef _BSD_SSIZE_T_
+# ifdef VM_I386
+# ifdef VM_X86_64
+ typedef int64 ssize_t;
+# else
+ typedef int32 ssize_t;
+# endif
+# endif /* VM_I386 */
+# endif
+
+#else
+# ifndef _SIZE_T
+# ifdef VM_I386
+# define _SIZE_T
+# ifdef VM_X86_64
+ typedef uint64 size_t;
+# else
+ typedef uint32 size_t;
+# endif
+# elif defined(__arm__)
+# define _SIZE_T
+ typedef uint32 size_t;
+# endif
+# endif
+
+# if !defined(FROBOS) && !defined(_SSIZE_T) && !defined(_SSIZE_T_) && \
+ !defined(ssize_t) && !defined(__ssize_t_defined) && \
+ !defined(_SSIZE_T_DECLARED)
+# ifdef VM_I386
+# define _SSIZE_T
+# define __ssize_t_defined
+# define _SSIZE_T_DECLARED
+# ifdef VM_X86_64
+ typedef int64 ssize_t;
+# else
+ typedef int32 ssize_t;
+# endif
+# elif defined(__arm__)
+# define _SSIZE_T
+# define __ssize_t_defined
+# define _SSIZE_T_DECLARED
+ typedef int32 ssize_t;
+# endif
+# endif
+
+#endif
+
+/*
+ * Format modifier for printing pid_t. On sun the pid_t is a ulong, but on
+ * Linux it's an int.
+ * Use this like this: printf("The pid is %"FMTPID".\n", pid);
+ */
+#ifdef sun
+# ifdef VM_X86_64
+# define FMTPID "d"
+# else
+# define FMTPID "lu"
+# endif
+#else
+# define FMTPID "d"
+#endif
+
+/*
+ * Format modifier for printing uid_t. On Solaris 10 and earlier, uid_t
+ * is a ulong, but on other platforms it's an unsigned int.
+ * Use this like this: printf("The uid is %"FMTUID".\n", uid);
+ */
+#if defined(sun) && !defined(SOL11)
+# ifdef VM_X86_64
+# define FMTUID "u"
+# else
+# define FMTUID "lu"
+# endif
+#else
+# define FMTUID "u"
+#endif
+
+/*
+ * Format modifier for printing mode_t. On sun the mode_t is a ulong, but on
+ * Linux it's an int.
+ * Use this like this: printf("The mode is %"FMTMODE".\n", mode);
+ */
+#ifdef sun
+# ifdef VM_X86_64
+# define FMTMODE "o"
+# else
+# define FMTMODE "lo"
+# endif
+#else
+# define FMTMODE "o"
+#endif
+
+/*
+ * Format modifier for printing time_t. Most platforms define a time_t to be
+ * a long int, but on FreeBSD (as of 5.0, it seems), the time_t is a signed
+ * size quantity. Refer to the definition of FMTSZ to see why we need silly
+ * preprocessor arithmetic.
+ * Use this like this: printf("The mode is %"FMTTIME".\n", time);
+ */
+#if defined(__FreeBSD__) && (__FreeBSD__ + 0) && ((__FreeBSD__ + 0) >= 5)
+# define FMTTIME FMTSZ"d"
+#else
+# if defined(_MSC_VER)
+# ifndef _SAFETIME_H_
+# if (_MSC_VER < 1400) || defined(_USE_32BIT_TIME_T)
+# define FMTTIME "ld"
+# else
+# define FMTTIME FMT64"d"
+# endif
+# else
+# ifndef FMTTIME
+# error "safetime.h did not define FMTTIME"
+# endif
+# endif
+# else
+# define FMTTIME "ld"
+# endif
+#endif
+
+#ifdef __APPLE__
+/*
+ * Format specifier for all these annoying types such as {S,U}Int32
+ * which are 'long' in 32-bit builds
+ * and 'int' in 64-bit builds.
+ */
+# ifdef __LP64__
+# define FMTLI ""
+# else
+# define FMTLI "l"
+# endif
+
+/*
+ * Format specifier for all these annoying types such as NS[U]Integer
+ * which are 'int' in 32-bit builds
+ * and 'long' in 64-bit builds.
+ */
+# ifdef __LP64__
+# define FMTIL "l"
+# else
+# define FMTIL ""
+# endif
+#endif
+
+
+/*
+ * Define MXSemaHandle here so both vmmon and vmx see this definition.
+ */
+
+#ifdef _WIN32
+typedef uintptr_t MXSemaHandle;
+#else
+typedef int MXSemaHandle;
+#endif
+
+/*
+ * Define type for poll device handles.
+ */
+
+typedef int64 PollDevHandle;
+
+/*
+ * Define the utf16_t type.
+ */
+
+#if defined(_WIN32) && defined(_NATIVE_WCHAR_T_DEFINED)
+typedef wchar_t utf16_t;
+#else
+typedef uint16 utf16_t;
+#endif
+
+/*
+ * Define for point and rectangle types. Defined here so they
+ * can be used by other externally facing headers in bora/public.
+ */
+
+typedef struct VMPoint {
+ int x, y;
+} VMPoint;
+
+#if defined _WIN32 && defined USERLEVEL
+struct tagRECT;
+typedef struct tagRECT VMRect;
+#else
+typedef struct VMRect {
+ int left;
+ int top;
+ int right;
+ int bottom;
+} VMRect;
+#endif
+
+/*
+ * ranked locks "everywhere"
+ */
+
+typedef uint32 MX_Rank;
+
+#endif /* _VM_BASIC_TYPES_H_ */
diff --git a/usr/src/uts/intel/io/vmxnet/vm_device_version.h b/usr/src/uts/intel/io/vmxnet/vm_device_version.h
new file mode 100644
index 0000000000..7046594a6c
--- /dev/null
+++ b/usr/src/uts/intel/io/vmxnet/vm_device_version.h
@@ -0,0 +1,246 @@
+/*********************************************************
+ * Copyright (C) 1998 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation version 2.1 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the Lesser GNU General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ *********************************************************/
+
+#ifndef VM_DEVICE_VERSION_H
+#define VM_DEVICE_VERSION_H
+
+#define INCLUDE_ALLOW_USERLEVEL
+
+#define INCLUDE_ALLOW_MODULE
+#define INCLUDE_ALLOW_VMKERNEL
+#define INCLUDE_ALLOW_VMCORE
+#include "includeCheck.h"
+
+#ifdef _WIN32
+#ifdef __MINGW32__
+#include "initguid.h"
+#else
+#include "guiddef.h"
+#endif
+#endif
+
+/* LSILogic 53C1030 Parallel SCSI controller
+ * LSILogic SAS1068 SAS controller
+ */
+#define PCI_VENDOR_ID_LSILOGIC 0x1000
+#define PCI_DEVICE_ID_LSI53C1030 0x0030
+#define PCI_DEVICE_ID_LSISAS1068 0x0054
+
+/* Our own PCI IDs
+ * VMware SVGA II (Unified VGA)
+ * VMware SVGA (PCI Accelerator)
+ * VMware vmxnet (Idealized NIC)
+ * VMware vmxscsi (Abortive idealized SCSI controller)
+ * VMware chipset (Subsystem ID for our motherboards)
+ * VMware e1000 (Subsystem ID)
+ * VMware vmxnet3 (Uniform Pass Through NIC)
+ * VMware HD Audio codec
+ * VMware HD Audio controller
+ */
+#define PCI_VENDOR_ID_VMWARE 0x15AD
+#define PCI_DEVICE_ID_VMWARE_SVGA2 0x0405
+#define PCI_DEVICE_ID_VMWARE_SVGA 0x0710
+#define PCI_DEVICE_ID_VMWARE_NET 0x0720
+#define PCI_DEVICE_ID_VMWARE_SCSI 0x0730
+#define PCI_DEVICE_ID_VMWARE_VMCI 0x0740
+#define PCI_DEVICE_ID_VMWARE_CHIPSET 0x1976
+#define PCI_DEVICE_ID_VMWARE_82545EM 0x0750 /* single port */
+#define PCI_DEVICE_ID_VMWARE_82546EB 0x0760 /* dual port */
+#define PCI_DEVICE_ID_VMWARE_EHCI 0x0770
+#define PCI_DEVICE_ID_VMWARE_UHCI 0x0774
+#define PCI_DEVICE_ID_VMWARE_XHCI 0x0778
+#define PCI_DEVICE_ID_VMWARE_1394 0x0780
+#define PCI_DEVICE_ID_VMWARE_BRIDGE 0x0790
+#define PCI_DEVICE_ID_VMWARE_ROOTPORT 0x07A0
+#define PCI_DEVICE_ID_VMWARE_VMXNET3 0x07B0
+#define PCI_DEVICE_ID_VMWARE_VMXWIFI 0x07B8
+#define PCI_DEVICE_ID_VMWARE_PVSCSI 0x07C0
+#define PCI_DEVICE_ID_VMWARE_82574 0x07D0
+#define PCI_DEVICE_ID_VMWARE_HDAUDIO_CODEC 0x1975
+#define PCI_DEVICE_ID_VMWARE_HDAUDIO_CONTROLLER 0x1977
+
+/* The hypervisor device might grow. Please leave room
+ * for 7 more subfunctions.
+ */
+#define PCI_DEVICE_ID_VMWARE_HYPER 0x0800
+#define PCI_DEVICE_ID_VMWARE_VMI 0x0801
+
+#define PCI_DEVICE_VMI_CLASS 0x05
+#define PCI_DEVICE_VMI_SUBCLASS 0x80
+#define PCI_DEVICE_VMI_INTERFACE 0x00
+#define PCI_DEVICE_VMI_REVISION 0x01
+
+/* From linux/pci_ids.h:
+ * AMD Lance Ethernet controller
+ * BusLogic SCSI controller
+ * Ensoniq ES1371 sound controller
+ */
+#define PCI_VENDOR_ID_AMD 0x1022
+#define PCI_DEVICE_ID_AMD_VLANCE 0x2000
+#define PCI_VENDOR_ID_BUSLOGIC 0x104B
+#define PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER_NC 0x0140
+#define PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER 0x1040
+#define PCI_VENDOR_ID_ENSONIQ 0x1274
+#define PCI_DEVICE_ID_ENSONIQ_ES1371 0x1371
+
+/* From linux/pci_ids.h:
+ * Intel 82439TX (430 HX North Bridge)
+ * Intel 82371AB (PIIX4 South Bridge)
+ * Intel 82443BX (440 BX North Bridge and AGP Bridge)
+ * Intel 82545EM (e1000, server adapter, single port)
+ * Intel 82546EB (e1000, server adapter, dual port)
+ * Intel HECI (as embedded in ich9m)
+ */
+#define PCI_VENDOR_ID_INTEL 0x8086
+#define PCI_DEVICE_ID_INTEL_82439TX 0x7100
+#define PCI_DEVICE_ID_INTEL_82371AB_0 0x7110
+#define PCI_DEVICE_ID_INTEL_82371AB_2 0x7112
+#define PCI_DEVICE_ID_INTEL_82371AB_3 0x7113
+#define PCI_DEVICE_ID_INTEL_82371AB 0x7111
+#define PCI_DEVICE_ID_INTEL_82443BX 0x7190
+#define PCI_DEVICE_ID_INTEL_82443BX_1 0x7191
+#define PCI_DEVICE_ID_INTEL_82443BX_2 0x7192 /* Used when no AGP support */
+#define PCI_DEVICE_ID_INTEL_82545EM 0x100f
+#define PCI_DEVICE_ID_INTEL_82546EB 0x1010
+#define PCI_DEVICE_ID_INTEL_82574 0x10d3
+#define PCI_DEVICE_ID_INTEL_82574_APPLE 0x10f6
+#define PCI_DEVICE_ID_INTEL_HECI 0x2a74
+
+#define E1000E_PCI_DEVICE_ID_CONFIG_STR "e1000e.pci.deviceID"
+#define E1000E_PCI_SUB_VENDOR_ID_CONFIG_STR "e1000e.pci.subVendorID"
+#define E1000E_PCI_SUB_DEVICE_ID_CONFIG_STR "e1000e.pci.subDeviceID"
+
+/*
+ * Intel HD Audio controller and Realtek ALC885 codec.
+ */
+#define PCI_DEVICE_ID_INTEL_631XESB_632XESB 0x269a
+#define PCI_VENDOR_ID_REALTEK 0x10ec
+#define PCI_DEVICE_ID_REALTEK_ALC885 0x0885
+
+
+/*
+ * Fresco Logic xHCI (USB 3.0) Controller
+ */
+#define PCI_VENDOR_ID_FRESCO 0x1B73
+#define PCI_DEVICE_ID_FRESCO_FL1000 0x1000 // Original 1-port chip
+#define PCI_DEVICE_ID_FRESCO_FL1009 0x1009 // New 2-port chip (Driver 3.0.98+)
+#define PCI_DEVICE_ID_FRESCO_FL1400 0x1400 // Unknown (4-port? Dev hardware?)
+
+/*
+ * NEC/Renesas xHCI (USB 3.0) Controller
+ */
+#define PCI_VENDOR_ID_NEC 0x1033
+#define PCI_DEVICE_ID_NEC_UPD720200 0x0194
+#define PCI_REVISION_NEC_UPD720200 0x03
+#define PCI_FIRMWARE_NEC_UPD720200 0x3015
+
+
+/************* Strings for IDE Identity Fields **************************/
+#define VIDE_ID_SERIAL_STR "00000000000000000001" /* Must be 20 Bytes */
+#define VIDE_ID_FIRMWARE_STR "00000001" /* Must be 8 Bytes */
+
+/* No longer than 40 Bytes */
+#define VIDE_ATA_MODEL_STR PRODUCT_GENERIC_NAME " Virtual IDE Hard Drive"
+#define VIDE_ATAPI_MODEL_STR PRODUCT_GENERIC_NAME " Virtual IDE CDROM Drive"
+
+#define ATAPI_VENDOR_ID "NECVMWar" /* Must be 8 Bytes */
+#define ATAPI_PRODUCT_ID PRODUCT_GENERIC_NAME " IDE CDROM" /* Must be 16 Bytes */
+#define ATAPI_REV_LEVEL "1.00" /* Must be 4 Bytes */
+
+#define IDE_NUM_INTERFACES 2 /* support for two interfaces */
+#define IDE_DRIVES_PER_IF 2
+
+/************* Strings for SCSI Identity Fields **************************/
+#define SCSI_DISK_MODEL_STR PRODUCT_GENERIC_NAME " Virtual SCSI Hard Drive"
+#define SCSI_DISK_VENDOR_NAME COMPANY_NAME
+#define SCSI_DISK_REV_LEVEL "1.0"
+#define SCSI_CDROM_MODEL_STR PRODUCT_GENERIC_NAME " Virtual SCSI CDROM Drive"
+#define SCSI_CDROM_VENDOR_NAME COMPANY_NAME
+#define SCSI_CDROM_REV_LEVEL "1.0"
+
+/************* SCSI implementation limits ********************************/
+#define SCSI_MAX_CONTROLLERS 4 // Need more than 1 for MSCS clustering
+#define SCSI_MAX_DEVICES 16 // BT-958 emulates only 16
+#define PVSCSI_MAX_DEVICES 255 // 255 (including the controller)
+/*
+ * VSCSI_BV_INTS is the number of uint32's needed for a bit vector
+ * to cover all scsi devices per target.
+ */
+#define VSCSI_BV_INTS CEILING(PVSCSI_MAX_DEVICES, 8 * sizeof (uint32))
+#define SCSI_IDE_CHANNEL SCSI_MAX_CONTROLLERS
+#define SCSI_IDE_HOSTED_CHANNEL (SCSI_MAX_CONTROLLERS + 1)
+#define SCSI_MAX_CHANNELS (SCSI_MAX_CONTROLLERS + 2)
+
+/************* Strings for the VESA BIOS Identity Fields *****************/
+#define VBE_OEM_STRING COMPANY_NAME " SVGA"
+#define VBE_VENDOR_NAME COMPANY_NAME
+#define VBE_PRODUCT_NAME PRODUCT_GENERIC_NAME
+
+/************* PCI implementation limits ********************************/
+#define PCI_MAX_BRIDGES 15
+
+/************* Ethernet implementation limits ***************************/
+#define MAX_ETHERNET_CARDS 10
+
+/********************** Floppy limits ***********************************/
+#define MAX_FLOPPY_DRIVES 2
+
+/************* PCI Passthrough implementation limits ********************/
+#define MAX_PCI_PASSTHRU_DEVICES 6
+
+/************* USB implementation limits ********************************/
+#define MAX_USB_DEVICES_PER_HOST_CONTROLLER 127
+
+/************* Strings for Host USB Driver *******************************/
+
+#ifdef _WIN32
+
+/*
+ * Globally unique ID for the VMware device interface. Define INITGUID before including
+ * this header file to instantiate the variable.
+ */
+DEFINE_GUID(GUID_DEVICE_INTERFACE_VMWARE_USB_DEVICES,
+0x2da1fe75, 0xaab3, 0x4d2c, 0xac, 0xdf, 0x39, 0x8, 0x8c, 0xad, 0xa6, 0x65);
+
+/*
+ * Globally unique ID for the VMware device setup class.
+ */
+DEFINE_GUID(GUID_CLASS_VMWARE_USB_DEVICES,
+0x3b3e62a5, 0x3556, 0x4d7e, 0xad, 0xad, 0xf5, 0xfa, 0x3a, 0x71, 0x2b, 0x56);
+
+/*
+ * This string defines the device ID string of a VMware USB device.
+ * The format is USB\Vid_XXXX&Pid_YYYY, where XXXX and YYYY are the
+ * hexadecimal representations of the vendor and product ids, respectively.
+ *
+ * The official vendor ID for VMware, Inc. is 0x0E0F.
+ * The product id for USB generic devices is 0x0001.
+ */
+#define USB_VMWARE_DEVICE_ID_WIDE L"USB\\Vid_0E0F&Pid_0001"
+#define USB_DEVICE_ID_LENGTH (sizeof(USB_VMWARE_DEVICE_ID_WIDE) / sizeof(WCHAR))
+
+#ifdef UNICODE
+#define USB_PNP_SETUP_CLASS_NAME L"VMwareUSBDevices"
+#define USB_PNP_DRIVER_NAME L"vmusb"
+#else
+#define USB_PNP_SETUP_CLASS_NAME "VMwareUSBDevices"
+#define USB_PNP_DRIVER_NAME "vmusb"
+#endif
+#endif
+
+#endif /* VM_DEVICE_VERSION_H */
diff --git a/usr/src/uts/intel/io/vmxnet/vmnet_def.h b/usr/src/uts/intel/io/vmxnet/vmnet_def.h
new file mode 100644
index 0000000000..6e44aea2bb
--- /dev/null
+++ b/usr/src/uts/intel/io/vmxnet/vmnet_def.h
@@ -0,0 +1,91 @@
+/*********************************************************
+ * Copyright (C) 2004 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ *********************************************************/
+
+/*********************************************************
+ * The contents of this file are subject to the terms of the Common
+ * Development and Distribution License (the "License") version 1.0
+ * and no later version. You may not use this file except in
+ * compliance with the License.
+ *
+ * You can obtain a copy of the License at
+ * http://www.opensource.org/licenses/cddl1.php
+ *
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ *********************************************************/
+
+/*
+ * vmnet_def.h
+ *
+ * - definitions which are (mostly) not vmxnet or vlance specific
+ */
+
+#ifndef _VMNET_DEF_H_
+#define _VMNET_DEF_H_
+
+#define INCLUDE_ALLOW_USERLEVEL
+#define INCLUDE_ALLOW_VMCORE
+
+#define INCLUDE_ALLOW_MODULE
+#define INCLUDE_ALLOW_VMK_MODULE
+#define INCLUDE_ALLOW_VMKERNEL
+#define INCLUDE_ALLOW_DISTRIBUTE
+#include "includeCheck.h"
+
+#define VMNET_NAME_BUFFER_LEN 128 /* Increased for i18n. */
+#define VMNET_COAL_SCHEME_NAME_LEN 16
+
+
+/*
+ * capabilities - not all of these are implemented in the virtual HW
+ * (eg VLAN support is in the virtual switch) so even vlance
+ * can use them
+ */
+#define VMNET_CAP_SG 0x0001 /* Can do scatter-gather transmits. */
+#define VMNET_CAP_IP4_CSUM 0x0002 /* Can checksum only TCP/UDP over IPv4. */
+#define VMNET_CAP_HW_CSUM 0x0004 /* Can checksum all packets. */
+#define VMNET_CAP_HIGH_DMA 0x0008 /* Can DMA to high memory. */
+#define VMNET_CAP_TOE 0x0010 /* Supports TCP/IP offload. */
+#define VMNET_CAP_TSO 0x0020 /* Supports TCP Segmentation offload */
+#define VMNET_CAP_SW_TSO 0x0040 /* Supports SW TCP Segmentation */
+#define VMNET_CAP_VMXNET_APROM 0x0080 /* Vmxnet APROM support */
+#define VMNET_CAP_HW_TX_VLAN 0x0100 /* Can we do VLAN tagging in HW */
+#define VMNET_CAP_HW_RX_VLAN 0x0200 /* Can we do VLAN untagging in HW */
+#define VMNET_CAP_SW_VLAN 0x0400 /* Can we do VLAN tagging/untagging in SW */
+#define VMNET_CAP_WAKE_PCKT_RCV 0x0800 /* Can wake on network packet recv? */
+#define VMNET_CAP_ENABLE_INT_INLINE 0x1000 /* Enable Interrupt Inline */
+#define VMNET_CAP_ENABLE_HEADER_COPY 0x2000 /* copy header for vmkernel */
+#define VMNET_CAP_TX_CHAIN 0x4000 /* Guest can use multiple tx entries for a pkt */
+#define VMNET_CAP_RX_CHAIN 0x8000 /* a pkt can span multiple rx entries */
+#define VMNET_CAP_LPD 0x10000 /* large pkt delivery */
+#define VMNET_CAP_BPF 0x20000 /* BPF Support in VMXNET Virtual Hardware */
+#define VMNET_CAP_SG_SPAN_PAGES 0x40000 /* Can do scatter-gather span multiple pages transmits. */
+#define VMNET_CAP_IP6_CSUM 0x80000 /* Can do IPv6 csum offload. */
+#define VMNET_CAP_TSO6 0x100000 /* Can do TSO segmentation offload for IPv6 pkts. */
+#define VMNET_CAP_TSO256k 0x200000 /* Can do TSO segmentation offload for pkts up to 256kB. */
+#define VMNET_CAP_UPT 0x400000 /* Support UPT */
+#define VMNET_CAP_RDONLY_INETHDRS 0x800000 /* Modifies inet headers for TSO/CSUm */
+#define VMNET_CAP_NPA 0x1000000 /* Support NPA */
+#define VMNET_CAP_DCB 0x2000000 /* Support DCB */
+#define VMNET_CAP_OFFLOAD_8OFFSET 0x4000000 /* supports 8bit parameterized offsets */
+#define VMNET_CAP_OFFLOAD_16OFFSET 0x8000000 /* supports 16bit parameterized offsets */
+#define VMNET_CAP_IP6_CSUM_EXT_HDRS 0x10000000 /* support csum of ip6 ext hdrs */
+#define VMNET_CAP_TSO6_EXT_HDRS 0x20000000 /* support TSO for ip6 ext hdrs */
+#define VMNET_CAP_SCHED 0x40000000 /* compliant with network scheduling */
+#endif // _VMNET_DEF_H_
diff --git a/usr/src/uts/intel/io/vmxnet/vmxnet.c b/usr/src/uts/intel/io/vmxnet/vmxnet.c
new file mode 100644
index 0000000000..e0046d8deb
--- /dev/null
+++ b/usr/src/uts/intel/io/vmxnet/vmxnet.c
@@ -0,0 +1,2438 @@
+/*********************************************************
+ * Copyright (C) 2004 VMware, Inc. All rights reserved.
+ *
+ * The contents of this file are subject to the terms of the Common
+ * Development and Distribution License (the "License") version 1.0
+ * and no later version. You may not use this file except in
+ * compliance with the License.
+ *
+ * You can obtain a copy of the License at
+ * http://www.opensource.org/licenses/cddl1.php
+ *
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ *********************************************************/
+
+#include <sys/types.h>
+#include <sys/conf.h>
+#include <sys/debug.h>
+#include <sys/stropts.h>
+#include <sys/stream.h>
+#include <sys/strlog.h>
+#include <sys/kmem.h>
+#include <sys/stat.h>
+#include <sys/kstat.h>
+#include <sys/vtrace.h>
+#include <sys/dlpi.h>
+#include <sys/strsun.h>
+#include <sys/ethernet.h>
+#include <sys/modctl.h>
+#include <sys/errno.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/ddi_impldefs.h>
+#include <sys/gld.h>
+#include <sys/pci.h>
+#include <sys/strsubr.h>
+
+/*
+ * This used to be defined in sys/gld.h, but was flagged as private,
+ * and we used it anyway. Now it no longer exists, and we're stuck
+ * with it for the time being.
+ */
+#ifndef GLD_MAX_MULTICAST
+#define GLD_MAX_MULTICAST 64
+#endif
+
+#define __intptr_t_defined
+#define _STDINT_H
+#include "vm_basic_types.h"
+#include "vmxnet2_def.h"
+#include "vm_device_version.h"
+#include "net.h"
+#include "buildNumber.h"
+
+#define SOLVMXNET_SUCCESS 1
+#define SOLVMXNET_FAILURE 0
+
+#ifdef SOLVMXNET_DEBUG_LEVEL
+static int vxn_debug = SOLVMXNET_DEBUG_LEVEL;
+#define DPRINTF(n, args) if (vxn_debug>(n)) cmn_err args
+#else
+#define DPRINTF(n, args)
+#endif
+
+static char ident[] = "VMware Ethernet Adapter b" BUILD_NUMBER_NUMERIC_STRING;
+char _depends_on[] = {"misc/gld"};
+
+#define MAX_NUM_RECV_BUFFERS 128
+#define DEFAULT_NUM_RECV_BUFFERS 100
+#define MAX_NUM_XMIT_BUFFERS 128
+#define DEFAULT_NUM_XMIT_BUFFERS 100
+#define CRC_POLYNOMIAL_LE 0xedb88320UL
+#define SOLVMXNET_MAXNAME 20
+#define MAX_TX_WAIT_ON_STOP 2000
+
+#define ETHERALIGN 2
+#define SLACKBYTES 4
+#define MAXPKTBUF (14 + ETHERALIGN + ETHERMTU + SLACKBYTES)
+
+
+#define QHIWATER (MAX_NUM_RECV_BUFFERS*ETHERMTU)
+
+#define OUTB(dp, p, v) \
+ ddi_put8((dp)->vxnIOHdl, \
+ (uint8_t *)((caddr_t)((dp)->vxnIOp) + (p)), v)
+#define OUTW(dp, p, v) \
+ ddi_put16((dp)->vxnIOHdl, \
+ (uint16_t *)((caddr_t)((dp)->vxnIOp) + (p)), v)
+#define OUTL(dp, p, v) \
+ ddi_put32((dp)->vxnIOHdl, \
+ (uint32_t *)((caddr_t)((dp)->vxnIOp) + (p)), v)
+#define INB(dp, p) \
+ ddi_get8((dp)->vxnIOHdl, \
+ (uint8_t *)(((caddr_t)(dp)->vxnIOp) + (p)))
+#define INW(dp, p) \
+ ddi_get16((dp)->vxnIOHdl, \
+ (uint16_t *)(((caddr_t)(dp)->vxnIOp) + (p)))
+#define INL(dp, p) \
+ ddi_get32((dp)->vxnIOHdl, \
+ (uint32_t *)(((caddr_t)(dp)->vxnIOp) + (p)))
+
+#define VMXNET_INC(val, max) \
+ val++; \
+ if (UNLIKELY(val == max)) { \
+ val = 0; \
+ }
+
+#define TX_RINGBUF_MBLK(dp, idx) (dp->txRingBuf[idx].mblk)
+#define TX_RINGBUF_DMAMEM(dp, idx) (dp->txRingBuf[idx].dmaMem)
+
+typedef struct {
+ caddr_t buf; /* Virtual address */
+ uint32_t phyBuf; /* Physical address */
+ size_t bufLen; /* Buffer length */
+ ddi_dma_cookie_t cookie; /* Dma cookie */
+ uint_t cookieCount; /* Cookie count */
+ ddi_dma_handle_t dmaHdl; /* Dma handle */
+ ddi_acc_handle_t dataAccHdl; /* Dada access handle */
+} dma_buf_t;
+
+typedef struct rx_dma_buf {
+ dma_buf_t dmaDesc; /* Dma descriptor */
+ mblk_t *mblk; /* Streams message block */
+ frtn_t freeCB; /* Free callback */
+ struct vxn_softc *softc; /* Back pointer to softc */
+ struct rx_dma_buf *next; /* Next one in list */
+} rx_dma_buf_t;
+
+typedef struct vxn_stats {
+ uint32_t errxmt; /* Transmit errors */
+ uint32_t errrcv; /* Receive errors */
+ uint32_t runt; /* Runt packets */
+ uint32_t norcvbuf; /* Buffer alloc errors */
+ uint32_t interrupts; /* Interrupts */
+ uint32_t defer; /* Deferred transmits */
+} vxn_stats_t;
+
+typedef struct tx_ring_buf {
+ mblk_t *mblk;
+ dma_buf_t dmaMem;
+} tx_ring_buf_t;
+
+typedef struct vxn_softc {
+ char drvName[SOLVMXNET_MAXNAME]; /* Driver name string */
+ int unit; /* Driver instance */
+ vxn_stats_t stats; /* Stats */
+
+ dev_info_t *dip; /* Info pointer */
+ ddi_iblock_cookie_t iblockCookie; /* Interrupt block cookie */
+ gld_mac_info_t *macInfo; /* GLD mac info */
+ ddi_acc_handle_t confHdl; /* Configuration space handle */
+ ddi_acc_handle_t vxnIOHdl; /* I/O space handle */
+ caddr_t vxnIOp; /* I/O space pointer */
+ boolean_t morphed; /* Adapter morphed ? */
+
+ kmutex_t intrlock; /* Interrupt lock */
+ kmutex_t xmitlock; /* Transmit lock */
+ kmutex_t rxlistlock; /* Rx free pool lock */
+
+ boolean_t nicActive; /* NIC active flag */
+ boolean_t inIntr; /* Interrupt processing flag */
+
+ struct ether_addr devAddr; /* MAC address */
+
+ uint32_t vxnNumRxBufs; /* Number of reveice buffers */
+ uint32_t vxnNumTxBufs; /* Number of transmit buffers */
+
+ dma_buf_t driverDataDmaMem; /* Driver Data (dma handle) */
+ Vmxnet2_DriverData *driverData; /* Driver Data */
+ void *driverDataPhy; /* Driver Data busaddr pointer */
+ Vmxnet2_RxRingEntry *rxRing; /* Receive ring */
+ Vmxnet2_TxRingEntry *txRing; /* Transmit ring */
+ ddi_dma_handle_t txDmaHdl; /* Tx buffers dma handle */
+ rx_dma_buf_t *rxRingBuffPtr[MAX_NUM_RECV_BUFFERS];
+ /* DMA buffers associated with rxRing */
+ tx_ring_buf_t txRingBuf[MAX_NUM_XMIT_BUFFERS]; /* tx Ring buffers */
+
+ rx_dma_buf_t *rxFreeBufList;
+ uint32_t rxNumFreeBufs; /* current # of buffers in pool */
+ uint32_t rxMaxFreeBufs; /* max # of buffers in pool */
+
+ uint32_t txPending; /* Pending transmits */
+ uint32_t maxTxFrags; /* Max Tx fragments */
+
+ int multiCount; /* Multicast address count */
+ struct ether_addr multicastList[GLD_MAX_MULTICAST]; /* Multicast list */
+
+ struct vxn_softc *next; /* Circular list of instances */
+ struct vxn_softc *prev;
+} vxn_softc_t;
+
+/* used for rx buffers or buffers allocated by ddi_dma_mem_alloc() */
+static ddi_dma_attr_t vxn_dma_attrs = {
+ DMA_ATTR_V0, /* dma_attr version */
+ 0, /* dma_attr_addr_lo */
+ (uint64_t)0xFFFFFFFF, /* dma_attr_addr_hi */
+ 0x7FFFFFFF, /* dma_attr_count_max */
+ 4, /* dma_attr_align */
+ 0x3F, /* dma_attr_burstsizes */
+ 1, /* dma_attr_minxfer */
+ (uint64_t)0xFFFFFFFF, /* dma_attr_maxxfer */
+ (uint64_t)0xFFFFFFFF, /* dma_attr_seg */
+ 1, /* dma_attr_sgllen */
+ 1, /* dma_attr_granular */
+ 0, /* dma_attr_flags */
+};
+
+/* used for tx buffers */
+static ddi_dma_attr_t vxn_dma_attrs_tx = {
+ DMA_ATTR_V0, /* dma_attr version */
+ 0, /* dma_attr_addr_lo */
+ (uint64_t)0xFFFFFFFF, /* dma_attr_addr_hi */
+ 0x7FFFFFFF, /* dma_attr_count_max */
+ 1, /* dma_attr_align */
+ 0x3F, /* dma_attr_burstsizes */
+ 1, /* dma_attr_minxfer */
+ (uint64_t)0xFFFFFFFF, /* dma_attr_maxxfer */
+ (uint64_t)0xFFFFFFFF, /* dma_attr_seg */
+ 1, /* dma_attr_sgllen */
+ 1, /* dma_attr_granular */
+ 0, /* dma_attr_flags */
+};
+
+
+static struct ether_addr etherbroadcastaddr = {
+ {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
+};
+
+static struct ddi_device_acc_attr vxn_buf_attrs = {
+ DDI_DEVICE_ATTR_V0,
+ DDI_STRUCTURE_LE_ACC,
+ DDI_STRICTORDER_ACC
+};
+
+static struct ddi_device_acc_attr dev_attr = {
+ DDI_DEVICE_ATTR_V0,
+ DDI_STRUCTURE_LE_ACC,
+ DDI_STRICTORDER_ACC
+};
+
+static vxn_softc_t vxnList; /* for debugging */
+static kmutex_t vxnListLock;
+
+static void *Vxn_Memset(void *s, int c, size_t n);
+static int Vxn_Reset(gld_mac_info_t *macInfo);
+static int Vxn_SetPromiscuous(gld_mac_info_t *macInfo, int flag);
+static int Vxn_GetStats(gld_mac_info_t *macInfo, struct gld_stats *gs);
+static void Vxn_ApplyAddressFilter(vxn_softc_t *dp);
+static int Vxn_SetMulticast(gld_mac_info_t *macinfo, uint8_t *ep, int flag);
+static int Vxn_SetMacAddress(gld_mac_info_t *macInfo, uint8_t *mac);
+static int Vxn_Start(gld_mac_info_t *macInfo);
+static int Vxn_Stop(gld_mac_info_t *macInfo);
+static void Vxn_FreeTxBuf(vxn_softc_t *dp, int idx);
+static int Vxn_EncapTxBuf(vxn_softc_t *dp, mblk_t *mp, Vmxnet2_TxRingEntry *xre,
+ tx_ring_buf_t *txBuf);
+static int Vxn_Send(gld_mac_info_t *macinfo, mblk_t *mp);
+static boolean_t Vxn_TxComplete(vxn_softc_t *dp, boolean_t *reschedp);
+static boolean_t Vxn_Receive(vxn_softc_t *dp);
+static u_int Vxn_Interrupt(gld_mac_info_t *macInfo);
+static void Vxn_ReclaimRxBuf(rx_dma_buf_t *rxDesc);
+static void Vxn_FreeRxBuf(rx_dma_buf_t *rxDesc);
+static rx_dma_buf_t *Vxn_AllocRxBuf(vxn_softc_t *dp, int cansleep);
+static void Vxn_FreeInitBuffers(vxn_softc_t *dp);
+static int Vxn_AllocInitBuffers(vxn_softc_t *dp);
+static void Vxn_FreeDmaMem(dma_buf_t *dma);
+static int Vxn_AllocDmaMem(vxn_softc_t *dp, int size, int cansleep, dma_buf_t *dma);
+static void Vxn_FreeDriverData(vxn_softc_t *dp);
+static int Vxn_AllocDriverData(vxn_softc_t *dp);
+static int Vxn_Attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
+static int Vxn_Detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
+static int Vxn_AllocRxBufPool(vxn_softc_t *dp);
+static void Vxn_FreeRxBufPool(vxn_softc_t *dp);
+static rx_dma_buf_t * Vxn_AllocRxBufFromPool(vxn_softc_t *dp);
+static void Vxn_FreeRxBufToPool(rx_dma_buf_t *rxDesc);
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_Memset --
+ * memset() (Because bzero does not get resolved by module loader)
+ *
+ * Results:
+ * pointer to the memory area s
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static void *
+Vxn_Memset(void *s, int c, size_t n)
+{
+ while (n--) {
+ ((uint8_t *)s)[n] = c;
+ }
+
+ return s;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_Reset --
+ * Stub routine to reset hardware. Presently does nothing. Start/Stop should
+ * take care of resets.
+ *
+ * Results:
+ * None
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static int
+Vxn_Reset(gld_mac_info_t *macInfo)
+{
+ return GLD_SUCCESS;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_SetPromiscuous --
+ * Set/Reset NIC to/from promiscuous mode
+ *
+ * Results:
+ * GLD_SUCCESS
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static int
+Vxn_SetPromiscuous(gld_mac_info_t *macInfo, int flag)
+{
+ vxn_softc_t *dp = (vxn_softc_t *)macInfo->gldm_private;
+ Vmxnet2_DriverData *dd = dp->driverData;
+
+ mutex_enter(&dp->intrlock);
+ if (flag == GLD_MAC_PROMISC_PHYS) {
+ dd->ifflags |= VMXNET_IFF_PROMISC;
+ } else if (flag == GLD_MAC_PROMISC_MULTI) {
+ /*
+ * This should really set VMXNET_IFF_ALLMULTI,
+ * but unfortunately it doesn't exist. The next
+ * best thing would be to set the LADRFs to all
+ * 0xFFs and set VMXNET_IFF_MULTICAST, but that
+ * opens up a whole new set of potential pitfalls,
+ * so this is a reasonable temporary solution.
+ */
+ dd->ifflags |= VMXNET_IFF_PROMISC;
+ } else if (flag == GLD_MAC_PROMISC_NONE) {
+ dd->ifflags &= ~VMXNET_IFF_PROMISC;
+ } else {
+ /* This could be GLD_MAC_PROMISC_NOOP? */
+ mutex_exit(&dp->intrlock);
+ cmn_err(CE_WARN, "%s%d: Vxn_SetPromiscuous: Unexpected mode flag: 0x%x",
+ dp->drvName, dp->unit, flag);
+
+ return GLD_FAILURE;
+ }
+
+ OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_UPDATE_IFF);
+ mutex_exit(&dp->intrlock);
+
+ return GLD_SUCCESS;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_GetStats --
+ * Get driver specific stats
+ *
+ * Results:
+ * GLD_SUCCESS
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static int
+Vxn_GetStats(gld_mac_info_t *macInfo, struct gld_stats *gs)
+{
+ vxn_softc_t *dp = (vxn_softc_t *)macInfo->gldm_private;
+
+ gs->glds_errxmt = dp->stats.errxmt;
+ gs->glds_errrcv = dp->stats.errrcv;
+ gs->glds_short = dp->stats.runt;
+ gs->glds_norcvbuf = dp->stats.norcvbuf;
+ gs->glds_intr = dp->stats.interrupts;
+ gs->glds_defer = dp->stats.defer;
+
+ return GLD_SUCCESS;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_ApplyAddressFilter --
+ * Go over multicast list and compute/apply address filter
+ *
+ * Results:
+ * None
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static void
+Vxn_ApplyAddressFilter(vxn_softc_t *dp)
+{
+ uint8_t *ep;
+ int i, j, bit, byte;
+ uint32_t crc, poly = CRC_POLYNOMIAL_LE;
+ Vmxnet2_DriverData *dd = dp->driverData;
+ volatile uint16_t *mcastTable = (uint16_t *)dd->LADRF;
+
+ ASSERT(MUTEX_HELD(&dp->intrlock));
+
+ /* clear the multicast filter */
+ dd->LADRF[0] = 0;
+ dd->LADRF[1] = 0;
+
+ for (i = 0; i < dp->multiCount; i++) {
+ crc = 0xffffffff;
+ ep = (uint8_t *)&dp->multicastList[i].ether_addr_octet;
+
+ for (byte = 0; byte < 6; byte++) {
+ for (bit = *ep++, j = 0; j < 8; j++, bit >>= 1) {
+ int test;
+
+ test = ((bit ^ crc) & 0x01);
+ crc >>= 1;
+
+ if (test) {
+ crc = crc ^ poly;
+ }
+ }
+ }
+
+ crc = crc >> 26;
+ mcastTable[crc >> 4] |= 1 << (crc & 0xf);
+ }
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_SetMulticast --
+ * Add delete entry from multicast list
+ *
+ * Results:
+ * GLD_FAILURE on failure
+ * GLD_SUCCESS on success
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static int
+Vxn_SetMulticast(gld_mac_info_t *macinfo, uint8_t *ep, int flag)
+{
+ int i;
+ int copyLen;
+ vxn_softc_t *dp = (vxn_softc_t *)macinfo->gldm_private;
+ Vmxnet2_DriverData *dd = dp->driverData;
+
+ if (flag == GLD_MULTI_ENABLE) {
+ /*
+ * Exceeded multicast address limit
+ */
+ if (dp->multiCount >= GLD_MAX_MULTICAST) {
+ return GLD_FAILURE;
+ }
+
+ /*
+ * Add mac address to multicast list
+ */
+ bcopy(ep, dp->multicastList[dp->multiCount].ether_addr_octet,
+ ETHERADDRL);
+ dp->multiCount++;
+ }
+ else {
+ for (i=0; i<dp->multiCount; i++) {
+ if (bcmp(ep, dp->multicastList[i].ether_addr_octet, ETHERADDRL) == 0) {
+ goto found;
+ }
+ }
+ return GLD_FAILURE;
+
+ found:
+ /*
+ * Delete mac address from multicast list
+ */
+ copyLen = (dp->multiCount - (i+1)) * sizeof(struct ether_addr);
+ if (copyLen > 0) {
+ bcopy(&dp->multicastList[i+1], &dp->multicastList[i], copyLen);
+ }
+ dp->multiCount--;
+ }
+
+ /*
+ * Compute address filter from list of addressed and apply it
+ */
+ mutex_enter(&dp->intrlock);
+ Vxn_ApplyAddressFilter(dp);
+
+ if (dp->multiCount) {
+ ASSERT(dd->LADRF[0] || dd->LADRF[1]);
+ dd->ifflags |= VMXNET_IFF_MULTICAST;
+ } else {
+ ASSERT(!(dd->LADRF[0] || dd->LADRF[1]));
+ dd->ifflags &= ~VMXNET_IFF_MULTICAST;
+ }
+
+ OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_UPDATE_IFF);
+ OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_UPDATE_LADRF);
+ mutex_exit(&dp->intrlock);
+
+ return GLD_SUCCESS;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_SetMacAddress --
+ * Change device MAC address
+ *
+ * Results:
+ * GLD_SUCCESS
+ * GLD_FAILURE
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static int
+Vxn_SetMacAddress(gld_mac_info_t *macInfo, uint8_t *mac)
+{
+ int i;
+ int err = GLD_SUCCESS;
+ vxn_softc_t * dp = (vxn_softc_t *)macInfo->gldm_private;
+
+ mutex_enter(&dp->intrlock);
+ mutex_enter(&dp->xmitlock);
+
+ /*
+ * Don't change MAC address on a running NIC
+ */
+ if (dp->nicActive) {
+ err = GLD_FAILURE;
+ goto out;
+ }
+
+ /*
+ * Save new MAC address
+ */
+ for (i = 0; i < 6; i++) {
+ dp->devAddr.ether_addr_octet[i] = mac[i];
+ }
+
+ /*
+ * Push new MAC address down into hardware
+ */
+ for (i = 0; i < 6; i++) {
+ OUTB(dp, VMXNET_MAC_ADDR + i, mac[i]);
+ }
+
+out:
+ mutex_exit(&dp->xmitlock);
+ mutex_exit(&dp->intrlock);
+ return err;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_Start --
+ * Device start routine. Called on "ifconfig plumb"
+ *
+ * Results:
+ * GLD_SUCCESS
+ * GLD_FAILURE
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static int
+Vxn_Start(gld_mac_info_t *macInfo)
+{
+ int err = GLD_SUCCESS;
+ uint32_t r, capabilities, features;
+ vxn_softc_t * dp = (vxn_softc_t *)macInfo->gldm_private;
+
+ mutex_enter(&dp->intrlock);
+ mutex_enter(&dp->xmitlock);
+
+ if (!dp->nicActive) {
+ /*
+ * Register ring structure with hardware
+ *
+ * This downcast is OK because we requested a 32-bit physical address
+ */
+ OUTL(dp, VMXNET_INIT_ADDR, (uint32_t)(uintptr_t)dp->driverDataPhy);
+ OUTL(dp, VMXNET_INIT_LENGTH, dp->driverData->length);
+
+ /*
+ * Make sure registeration succeded
+ */
+ r = INL(dp, VMXNET_INIT_LENGTH);
+ if (!r) {
+ cmn_err(CE_WARN, "%s%d: Vxn_Start: failed to register ring",
+ dp->drvName, dp->unit);
+ err = GLD_FAILURE;
+ goto out;
+ }
+
+ /*
+ * Get maximum tx fragments supported
+ */
+ OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_GET_CAPABILITIES);
+ capabilities = INL(dp, VMXNET_COMMAND_ADDR);
+
+ OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_GET_FEATURES);
+ features = INL(dp, VMXNET_COMMAND_ADDR);
+
+ DPRINTF(3, (CE_CONT, "%s%d: chip capabilities=0x%x features=0x%x\n",
+ dp->drvName, dp->unit, capabilities, features));
+
+ if ((capabilities & VMNET_CAP_SG) &&
+ (features & VMXNET_FEATURE_ZERO_COPY_TX)) {
+ dp->maxTxFrags = VMXNET2_SG_DEFAULT_LENGTH;
+ } else {
+ dp->maxTxFrags = 1;
+ }
+ ASSERT(dp->maxTxFrags >= 1);
+
+ /*
+ * Alloc Tx DMA handle
+ */
+ vxn_dma_attrs_tx.dma_attr_sgllen = dp->maxTxFrags;
+ if (ddi_dma_alloc_handle(dp->dip, &vxn_dma_attrs_tx, DDI_DMA_SLEEP,
+ NULL, &dp->txDmaHdl) != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "%s%d: Vxn_Start: failed to alloc tx dma handle",
+ dp->drvName, dp->unit);
+ err = GLD_FAILURE;
+ goto out;
+ }
+
+ /*
+ * Enable interrupts on the card
+ */
+ dp->driverData->ifflags |= VMXNET_IFF_BROADCAST | VMXNET_IFF_DIRECTED;
+
+ OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_INTR_ENABLE);
+ OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_UPDATE_IFF);
+ OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_UPDATE_LADRF);
+
+ dp->nicActive = TRUE;
+ }
+
+out:
+ mutex_exit(&dp->xmitlock);
+ mutex_exit(&dp->intrlock);
+ return err;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_Stop --
+ * Device stop routine. Called on "ifconfig unplumb"
+ *
+ * Results:
+ * GLD_SUCCESS
+ * GLD_FAILURE
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static int
+Vxn_Stop(gld_mac_info_t *macInfo)
+{
+ int i;
+ int err = GLD_SUCCESS;
+ vxn_softc_t * dp = (vxn_softc_t *)macInfo->gldm_private;
+ boolean_t resched;
+
+ mutex_enter(&dp->intrlock);
+ mutex_enter(&dp->xmitlock);
+
+ if (!dp->nicActive) {
+ goto out;
+ }
+
+ /*
+ * Disable interrupts
+ */
+ OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_INTR_DISABLE);
+
+ /*
+ * Wait for pending transmits
+ */
+ if (dp->txPending) {
+ for (i=0; i < MAX_TX_WAIT_ON_STOP && dp->txPending; i++) {
+ delay(drv_usectohz(1000));
+ OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_CHECK_TX_DONE);
+ (void) Vxn_TxComplete(dp, &resched);
+ /*
+ * Don't worry about rescheduling transmits - GLD handles
+ * this automatically.
+ */
+ }
+ }
+ if (dp->txPending) {
+ cmn_err(CE_WARN, "%s%d: Vxn_Stop: giving up on %d pending transmits",
+ dp->drvName, dp->unit, dp->txPending);
+ }
+
+ OUTL(dp, VMXNET_INIT_ADDR, 0);
+ dp->nicActive = FALSE;
+
+ /*
+ * Free Tx DMA handle
+ *
+ * The ddi_dma_free_handle() man page says that ddi_dma_unbind_handle() must be called
+ * prior to calling ddi_dma_free_handle().
+ * However, call to ddi_dma_unbind_handle() is not required here, because
+ * ddi_dma_addr_bind_handle() and matching ddi_dma_unbind_handle() are called from
+ * Vxn_EncapTxBuf().
+ * xmitlock is held in Vxn_EncapTxBuf() as well as acquired above in Vxn_Stop().
+ */
+ ddi_dma_free_handle(&dp->txDmaHdl);
+ dp->txDmaHdl = NULL;
+
+out:
+ mutex_exit(&dp->xmitlock);
+ mutex_exit(&dp->intrlock);
+ return err;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_FreeTxBuf --
+ * Free transmit buffer
+ *
+ * Results:
+ * None
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static void
+Vxn_FreeTxBuf(vxn_softc_t *dp, int idx)
+{
+ mblk_t **txMblkp = &TX_RINGBUF_MBLK(dp, idx);
+ dma_buf_t *dmaMem = &TX_RINGBUF_DMAMEM(dp, idx);
+
+ if (*txMblkp) {
+ freemsg(*txMblkp);
+ *txMblkp = NULL;
+ }
+
+ if (dmaMem->buf) {
+ Vxn_FreeDmaMem(dmaMem);
+ ASSERT(dmaMem->buf == NULL);
+ }
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_EncapTxBuf --
+ * Go over dma mappings of Tx buffers and drop buffer physical address
+ * into ring entry
+ *
+ * Results:
+ * SOLVMXNET_SUCCESS on success
+ * SOLVMXNET_FAILURE on failure
+ *
+ * Side effects:
+ * None
+ *---------------- -------------------------------------------------------------
+ */
+static int
+Vxn_EncapTxBuf(vxn_softc_t *dp,
+ mblk_t *mp,
+ Vmxnet2_TxRingEntry *xre,
+ tx_ring_buf_t *txBuf)
+{
+ int frag;
+ int fragcount;
+ int rval;
+ mblk_t *tp;
+ mblk_t *mblk;
+ boolean_t needPullup = FALSE;
+ boolean_t dmaMemAlloced = FALSE;
+
+ ASSERT(txBuf);
+ ASSERT(txBuf->mblk == NULL);
+ ASSERT(MUTEX_HELD(&dp->xmitlock));
+
+ xre->sg.length = 0;
+ xre->flags = 0;
+
+ fragcount = 0;
+ for (tp = mp; tp != NULL; tp = tp->b_cont) {
+ fragcount++;
+ }
+ if (fragcount > dp->maxTxFrags) {
+ needPullup = TRUE;
+ }
+
+pullup:
+ frag = 0;
+ if (needPullup) {
+ if (!(mblk = msgpullup(mp, -1))) {
+ cmn_err(CE_WARN, "%s%d: Vxn_EncapTxBuf: msgpullup failed",
+ dp->drvName, dp->unit);
+ goto err;
+ }
+ } else {
+ mblk = mp;
+ }
+
+ /*
+ * Go through message chain and drop packet pointers into ring
+ * scatter/gather array
+ */
+ for (tp = mblk; tp != NULL; tp = tp->b_cont) {
+
+ uint_t nCookies;
+ ddi_dma_cookie_t dmaCookie;
+ int len = tp->b_wptr - tp->b_rptr;
+
+ if (len) {
+ /*
+ * Associate tx buffer with dma handle
+ */
+ ASSERT(dp->txDmaHdl);
+ if ((rval = ddi_dma_addr_bind_handle(dp->txDmaHdl, NULL, (caddr_t)tp->b_rptr,
+ len, DDI_DMA_RDWR | DDI_DMA_STREAMING,
+ DDI_DMA_DONTWAIT, NULL,
+ &dmaCookie, &nCookies))
+ != DDI_DMA_MAPPED) {
+
+ /*
+ * Try to handle bind failure caused by a page boundary spill
+ * by allocating a private dma buffer and copying data into it
+ */
+ if ((rval == DDI_DMA_TOOBIG) && !dmaMemAlloced ) {
+ /*
+ * Force pullup
+ */
+ if (!needPullup && (dp->maxTxFrags > 1)) {
+ needPullup = TRUE;
+ goto pullup;
+ }
+
+ if (Vxn_AllocDmaMem(dp, len, FALSE, &txBuf->dmaMem)
+ != SOLVMXNET_SUCCESS) {
+ goto err;
+ }
+
+ dmaMemAlloced = TRUE;
+
+ /*
+ * Copy data into DMA capable buffer
+ */
+ bcopy(tp->b_rptr, txBuf->dmaMem.buf, len);
+
+ /*
+ * Stick buffer physical addr in the ring
+ */
+ xre->sg.sg[frag].addrLow = txBuf->dmaMem.phyBuf;
+ xre->sg.sg[frag].length = len;
+ frag++;
+
+ continue;
+
+ } else {
+ cmn_err(CE_WARN, "%s%d: Vxn_EncapTxBuf: failed (%d) to bind dma "
+ "handle for len %d. [dmaMemAlloced=%d]",
+ dp->drvName, dp->unit, rval, len, dmaMemAlloced);
+ goto err;
+ }
+ }
+
+ /*
+ * Extract tx buffer physical addresses from cookie
+ */
+ while (nCookies) {
+ if (UNLIKELY(frag == dp->maxTxFrags)) {
+ (void)ddi_dma_unbind_handle(dp->txDmaHdl);
+
+ if (!needPullup) {
+ ASSERT(!dmaMemAlloced);
+ needPullup = TRUE;
+ goto pullup;
+ } else {
+ cmn_err(CE_WARN, "%s%d: Vxn_EncapTxBuf: "
+ "exceeded max (%d) fragments in message",
+ dp->drvName, dp->unit, dp->maxTxFrags);
+ goto err;
+ }
+ }
+
+ /*
+ * Stick it in the ring
+ */
+ xre->sg.sg[frag].addrLow = dmaCookie.dmac_address;
+ xre->sg.sg[frag].length = dmaCookie.dmac_size;
+ frag++;
+
+ if (--nCookies) {
+ ddi_dma_nextcookie(dp->txDmaHdl, &dmaCookie);
+ }
+ }
+
+ (void)ddi_dma_unbind_handle(dp->txDmaHdl);
+ }
+ }
+
+ if (frag > 0) {
+ xre->sg.length = frag;
+
+ /* Give ownership to NIC */
+ xre->sg.addrType = NET_SG_PHYS_ADDR;
+ xre->ownership = VMXNET2_OWNERSHIP_NIC;
+ xre->flags |= VMXNET2_TX_CAN_KEEP;
+ txBuf->mblk = mblk;
+
+ /*
+ * If we called msgpullup to concatenate fragments, free
+ * original mblk now since we're going to return success.
+ */
+ if (mblk != mp) {
+ freemsg(mp);
+ }
+
+ return SOLVMXNET_SUCCESS;
+ }
+
+err:
+ if (mblk != NULL && mblk != mp) {
+ /*
+ * Free mblk allocated by msgpullup.
+ */
+ freemsg(mblk);
+ }
+
+ if (dmaMemAlloced) {
+ ASSERT(txBuf->dmaMem.buf);
+ Vxn_FreeDmaMem(&txBuf->dmaMem);
+ }
+
+ return SOLVMXNET_FAILURE;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_Send --
+ * GLD Transmit routine. Starts packet hard tx.
+ *
+ * Results:
+ * GLD_SUCCESS on success
+ * GLD_FAILURE on failure
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static int
+Vxn_Send(gld_mac_info_t *macinfo, mblk_t *mp)
+{
+ Vmxnet2_TxRingEntry *xre;
+ int err = GLD_SUCCESS;
+ vxn_softc_t *dp = (vxn_softc_t *)macinfo->gldm_private;
+ Vmxnet2_DriverData *dd = dp->driverData;
+ boolean_t resched = FALSE;
+
+ mutex_enter(&dp->xmitlock);
+
+ /*
+ * Check if ring entry at drop pointer is available
+ */
+ if (TX_RINGBUF_MBLK(dp, dd->txDriverNext) != NULL) {
+ DPRINTF(3, (CE_NOTE, "%s%d: Vxn_Send: tx ring full",
+ dp->drvName, dp->unit));
+ err = GLD_NORESOURCES;
+ dd->txStopped = TRUE;
+ dp->stats.defer++;
+ goto out;
+ }
+
+ xre = &dp->txRing[dd->txDriverNext];
+
+ /*
+ * Drop packet into ring entry
+ */
+ if (Vxn_EncapTxBuf(dp, mp, xre, &dp->txRingBuf[dd->txDriverNext])
+ != SOLVMXNET_SUCCESS) {
+ err = GLD_FAILURE;
+ dp->stats.errxmt++;
+ goto out;
+ }
+
+ /*
+ * Increment drop pointer
+ */
+ VMXNET_INC(dd->txDriverNext, dd->txRingLength);
+ dd->txNumDeferred++;
+ dp->txPending++;
+
+ /*
+ * Transmit, if number of pending packets > tx cluster length
+ */
+ if (dd->txNumDeferred >= dd->txClusterLength) {
+ dd->txNumDeferred = 0;
+
+ /*
+ * Call hardware transmit
+ */
+ INL(dp, VMXNET_TX_ADDR);
+ }
+
+ /*
+ * Clean up transmit ring. TX completion interrupts are not guaranteed
+ */
+ (void) Vxn_TxComplete(dp, &resched);
+
+out:
+ mutex_exit(&dp->xmitlock);
+ if (resched) {
+ /* Tell GLD to retry any deferred packets */
+ gld_sched(dp->macInfo);
+ }
+ return err;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_TxComplete --
+ * Scan Tx ring for completed transmits. Reclaim Tx buffers.
+ *
+ * Results:
+ * Returns TRUE if it found a completed transmit, FALSE otherwise.
+ * Also sets *reschedp to TRUE if the caller should call gld_sched
+ * to reschedule transmits (once all locks are dropped).
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static boolean_t
+Vxn_TxComplete(vxn_softc_t *dp, boolean_t *reschedp)
+{
+ Vmxnet2_DriverData *dd = dp->driverData;
+ boolean_t found = FALSE;
+ boolean_t needresched = FALSE;
+
+ ASSERT(MUTEX_HELD(&dp->xmitlock));
+
+ while (1) {
+ Vmxnet2_TxRingEntry *xre = &dp->txRing[dd->txDriverCur];
+
+ if (xre->ownership != VMXNET2_OWNERSHIP_DRIVER ||
+ (TX_RINGBUF_MBLK(dp, dd->txDriverCur) == NULL)) {
+ break;
+ }
+
+ found = TRUE;
+ Vxn_FreeTxBuf(dp, dd->txDriverCur);
+
+ dp->txPending--;
+ VMXNET_INC(dd->txDriverCur, dd->txRingLength);
+ if (dd->txStopped) {
+ needresched = TRUE;
+ dd->txStopped = FALSE;
+ }
+ }
+
+ *reschedp = needresched;
+ return found;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_Receive --
+ * Rx handler. First assembles the packets into a chain of mblks,
+ * then drops locks and passes them up the stack to GLD.
+ *
+ * Results:
+ * Returns TRUE if it find a packet ready for processing, FALSE
+ * otherwise.
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static boolean_t
+Vxn_Receive(vxn_softc_t *dp)
+{
+ int ringnext;
+ short pktlen;
+ Vmxnet2_DriverData *dd = dp->driverData;
+ rx_dma_buf_t *rxDesc;
+ rx_dma_buf_t *newRxDesc;
+ mblk_t *mblk;
+ mblk_t *head = NULL;
+ mblk_t **tail = &head;
+ mblk_t *next;
+ boolean_t found = FALSE; /* Did we find at least one packet? */
+
+ ASSERT(MUTEX_HELD(&dp->intrlock));
+
+ /*
+ * Walk receive ring looking for entries with ownership
+ * reverted back to driver
+ */
+ while (1) {
+ Vmxnet2_RxRingEntry *rre;
+ rx_dma_buf_t **rbuf;
+
+ ringnext = dd->rxDriverNext;
+ rre = &dp->rxRing[ringnext];
+ rbuf = &dp->rxRingBuffPtr[ringnext];
+
+ if (rre->ownership != VMXNET2_OWNERSHIP_DRIVER) {
+ break;
+ }
+
+ found = TRUE;
+
+ pktlen = rre->actualLength;
+
+ if (pktlen < (60 - 4)) {
+ /*
+ * Ethernet header vlan tags are 4 bytes. Some vendors generate
+ * 60byte frames including vlan tags. When vlan tag
+ * is stripped, such frames become 60 - 4. (PR106153)
+ */
+ dp->stats.errrcv++;
+ if (pktlen != 0) {
+ DPRINTF(3, (CE_CONT, "%s%d: runt packet\n", dp->drvName, dp->unit));
+ dp->stats.runt++;
+ }
+ } else {
+ /*
+ * Alloc new Rx buffer to replace current one
+ */
+ newRxDesc = Vxn_AllocRxBufFromPool(dp);
+
+ if (newRxDesc) {
+ rxDesc = *rbuf;
+ mblk = rxDesc->mblk;
+
+ *rbuf = newRxDesc;
+ rre->paddr = newRxDesc->dmaDesc.phyBuf + ETHERALIGN;
+ rre->bufferLength = MAXPKTBUF - ETHERALIGN;
+ rre->actualLength = 0;
+
+ /*
+ * Advance write pointer past packet length
+ */
+ mblk->b_wptr = mblk->b_rptr + pktlen;
+
+ /*
+ * Add to end of chain.
+ */
+ mblk->b_next = NULL;
+ *tail = mblk;
+ tail = &mblk->b_next;
+ } else {
+ dp->stats.errrcv++;
+ dp->stats.norcvbuf++;
+ }
+ }
+
+ /* Give the descriptor back to NIC */
+ rre->ownership = VMXNET2_OWNERSHIP_NIC;
+ VMXNET_INC(dd->rxDriverNext, dd->rxRingLength);
+ }
+
+ /*
+ * Walk chain and pass mblks up to gld_recv one by one.
+ */
+ mutex_exit(&dp->intrlock);
+ for (mblk = head; mblk != NULL; mblk = next) {
+ next = mblk->b_next;
+ mblk->b_next = NULL;
+ gld_recv(dp->macInfo, mblk);
+ }
+ mutex_enter(&dp->intrlock);
+
+ return (found);
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_Interrupt --
+ * GLD interrupt handler. Scan: Rx ring for received packets, Tx ring for
+ * completed transmits
+ *
+ * Results:
+ * - DDI_INTR_CLAIMED (if we found something to do)
+ * - DDI_INTR_UNCLAIMED (if not)
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static u_int
+Vxn_Interrupt(gld_mac_info_t *macInfo)
+{
+ u_int ret = DDI_INTR_UNCLAIMED;
+ vxn_softc_t *dp = (vxn_softc_t *)macInfo->gldm_private;
+ boolean_t foundRx, foundTx;
+ boolean_t resched = FALSE;
+
+ mutex_enter(&dp->intrlock);
+ dp->inIntr = TRUE;
+
+ if (!dp->nicActive) {
+ goto out;
+ }
+
+ /*
+ * Ack interrupt
+ */
+ OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_INTR_ACK);
+
+ foundRx = Vxn_Receive(dp);
+
+ mutex_enter(&dp->xmitlock);
+ foundTx = Vxn_TxComplete(dp, &resched);
+ mutex_exit(&dp->xmitlock);
+
+ if (foundRx || foundTx) {
+ ret = DDI_INTR_CLAIMED;
+ dp->stats.interrupts++;
+ }
+
+out:
+ dp->inIntr = FALSE;
+ mutex_exit(&dp->intrlock);
+
+ if (resched) {
+ gld_sched(dp->macInfo);
+ }
+
+ return ret;
+}
+
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_ReclaimRxBuf --
+ * Callback handler invoked by freemsg(). Frees Rx buffer memory and mappings
+ *
+ * Results:
+ * None
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static void
+Vxn_ReclaimRxBuf(rx_dma_buf_t *rxDesc)
+{
+ Vxn_FreeRxBufToPool(rxDesc);
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_FreeRxBuf --
+ * Free allocated Rx buffer
+ *
+ * Results:
+ * None
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static void
+Vxn_FreeRxBuf(rx_dma_buf_t *rxDesc)
+{
+ ASSERT(rxDesc);
+
+ if (rxDesc->mblk) {
+ freemsg(rxDesc->mblk);
+ } else {
+ Vxn_FreeDmaMem(&rxDesc->dmaDesc);
+ kmem_free(rxDesc, sizeof(rx_dma_buf_t));
+ }
+}
+
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_AllocRxBuf --
+ * Allocate Rx buffer
+ *
+ * Results:
+ * Pointer to Rx buffer descriptor - on success
+ * NULL - on failure
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static rx_dma_buf_t *
+Vxn_AllocRxBuf(vxn_softc_t *dp, int cansleep)
+{
+ rx_dma_buf_t *rxDesc;
+
+ rxDesc = (rx_dma_buf_t *)kmem_zalloc(sizeof(rx_dma_buf_t),
+ cansleep ? KM_SLEEP : KM_NOSLEEP);
+ if (!rxDesc) {
+ cmn_err(CE_WARN, "%s%d: Vxn_AllocRxBuf: kmem_zalloc failed",
+ dp->drvName, dp->unit);
+ return NULL;
+ }
+
+ rxDesc->softc = dp;
+
+ /*
+ * Alloc dma-able packet memory
+ */
+ if (Vxn_AllocDmaMem(dp, MAXPKTBUF, cansleep, &rxDesc->dmaDesc)
+ != SOLVMXNET_SUCCESS) {
+ kmem_free(rxDesc, sizeof(rx_dma_buf_t));
+ return NULL;
+ }
+
+ /*
+ * Fill in free callback; fired by freemsg()
+ */
+ rxDesc->freeCB.free_func = &Vxn_ReclaimRxBuf;
+ rxDesc->freeCB.free_arg = (caddr_t) rxDesc;
+
+ rxDesc->mblk = NULL;
+ return rxDesc;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_FreeInitBuffers --
+ * Free allocated Tx and Rx buffers
+ *
+ * Results:
+ * None
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static void
+Vxn_FreeInitBuffers(vxn_softc_t *dp)
+{
+ int i;
+
+ for (i=0; i<dp->vxnNumRxBufs; i++) {
+ if (dp->rxRingBuffPtr[i]) {
+ Vxn_FreeRxBuf(dp->rxRingBuffPtr[i]);
+ dp->rxRingBuffPtr[i] = NULL;
+ }
+ }
+
+ for (i=0; i<dp->vxnNumTxBufs; i++) {
+ if (TX_RINGBUF_MBLK(dp, i)) {
+ Vxn_FreeTxBuf(dp, i);
+ }
+ }
+
+ /*
+ * Rx pool must get freed last. Rx buffers above will
+ * show up on the pool when freemsg callback fires.
+ */
+ Vxn_FreeRxBufPool(dp);
+}
+
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_AllocRxBufPool --
+ * Allocate pool of rx buffers - 3 * configured Rx buffers
+ *
+ * Results:
+ * SOLVMXNET_SUCCESS/SOLVMXNET_FAILURE
+ *
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static int
+Vxn_AllocRxBufPool(vxn_softc_t *dp)
+{
+ int i;
+
+ dp->rxFreeBufList = NULL;
+
+ // Allow list to double in size if needed. Any additional buffers
+ // that are allocated on the fly will be freed back to main memory.
+ dp->rxMaxFreeBufs = dp->vxnNumRxBufs * 6;
+
+ for (i = 0; i < dp->vxnNumRxBufs * 3; i++) {
+ rx_dma_buf_t *rxDesc;
+
+ /*
+ * Alloc rx buffer
+ */
+ if (!(rxDesc = Vxn_AllocRxBuf(dp, TRUE))) {
+ cmn_err(CE_WARN, "%s%d: Vxn_AllocRxBufPool: failed to allocate memory",
+ dp->drvName, dp->unit);
+ dp->rxNumFreeBufs = i;
+ return SOLVMXNET_FAILURE;
+ }
+ /*
+ * Add to free list
+ */
+ rxDesc->next = dp->rxFreeBufList;
+ dp->rxFreeBufList = rxDesc;
+ }
+
+ dp->rxNumFreeBufs = i;
+ return SOLVMXNET_SUCCESS;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_FreeRxBufPool --
+ * Free rx buffers pool
+ *
+ * Results:
+ * None
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static void
+Vxn_FreeRxBufPool(vxn_softc_t *dp)
+{
+ while (dp->rxFreeBufList) {
+ rx_dma_buf_t *rxDesc = dp->rxFreeBufList;
+
+ /* unlink */
+ dp->rxFreeBufList = rxDesc->next;
+
+ ASSERT(rxDesc->mblk == NULL);
+ Vxn_FreeDmaMem(&rxDesc->dmaDesc);
+ kmem_free(rxDesc, sizeof(rx_dma_buf_t));
+ }
+ dp->rxNumFreeBufs = 0;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_AllocRxBufFromPool --
+ * Allocate Rx buffer from free pool
+ *
+ * Results:
+ * Pointer to Rx buffer descriptor - on success
+ * NULL - on failure
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static rx_dma_buf_t *
+Vxn_AllocRxBufFromPool(vxn_softc_t *dp)
+{
+ rx_dma_buf_t *rxDesc = NULL;
+
+ mutex_enter(&dp->rxlistlock);
+ if (dp->rxFreeBufList) {
+ rxDesc = dp->rxFreeBufList;
+ dp->rxFreeBufList = rxDesc->next;
+ ASSERT(dp->rxNumFreeBufs >= 1);
+ dp->rxNumFreeBufs--;
+ }
+ mutex_exit(&dp->rxlistlock);
+
+ if (!rxDesc) {
+ /*
+ * Try to allocate new descriptor from memory. Can't block here
+ * since we could be being called from interrupt context.
+ */
+ DPRINTF(5, (CE_NOTE, "%s%d: allocating rx buf from memory",
+ dp->drvName, dp->unit));
+ if (!(rxDesc = Vxn_AllocRxBuf(dp, FALSE))) {
+ cmn_err(CE_WARN,
+ "%s%d: Vxn_AllocRxBufFromPool : pool rx alloc failed",
+ dp->drvName, dp->unit);
+ return NULL;
+ }
+ }
+
+ /*
+ * Allocate new message block for this buffer
+ */
+ rxDesc->mblk = desballoc((uchar_t *)rxDesc->dmaDesc.buf + ETHERALIGN,
+ rxDesc->dmaDesc.bufLen - ETHERALIGN,
+ BPRI_MED, &rxDesc->freeCB);
+ if (!rxDesc->mblk) {
+ cmn_err(CE_WARN, "%s%d: Vxn_AllocRxBufFromPool : desballoc failed",
+ dp->drvName, dp->unit);
+
+ /* put back on free list */
+ Vxn_FreeRxBufToPool(rxDesc);
+ return NULL;
+ }
+
+ return rxDesc;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_FreeRxBufToPool --
+ * Return rx buffer to free pool
+ *
+ * Results:
+ * None
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static void
+Vxn_FreeRxBufToPool(rx_dma_buf_t *rxDesc)
+{
+ vxn_softc_t *dp = rxDesc->softc;
+
+ rxDesc->mblk = NULL;
+
+ /*
+ * Insert on free list, or free if the list is full
+ */
+ mutex_enter(&dp->rxlistlock);
+ if (dp->rxNumFreeBufs >= dp->rxMaxFreeBufs) {
+ DPRINTF(5, (CE_NOTE, "%s%d: freeing rx buf to memory",
+ dp->drvName, dp->unit));
+ Vxn_FreeRxBuf(rxDesc);
+ } else {
+ rxDesc->next = dp->rxFreeBufList;
+ dp->rxFreeBufList = rxDesc;
+ dp->rxNumFreeBufs++;
+ }
+ mutex_exit(&dp->rxlistlock);
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_AllocInitBuffers --
+ * Allocated Rx buffers and init ring entries
+ *
+ * Results:
+ * SOLVMXNET_SUCCESS - on success
+ * SOLVMXNET_FAILURE - on failure
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static int
+Vxn_AllocInitBuffers(vxn_softc_t *dp)
+{
+ Vmxnet2_DriverData *dd;
+ uint32_t i, offset;
+
+ dd = dp->driverData;
+ offset = sizeof(*dd);
+
+ /*
+ * Init shared structures
+ */
+ dd->rxRingLength = dp->vxnNumRxBufs;
+ dd->rxRingOffset = offset;
+ dp->rxRing = (Vmxnet2_RxRingEntry *)((uintptr_t)dd + offset);
+ offset += dp->vxnNumRxBufs * sizeof(Vmxnet2_RxRingEntry);
+
+ dd->rxRingLength2 = 1;
+ dd->rxRingOffset2 = offset;
+ offset += sizeof(Vmxnet2_RxRingEntry);
+
+ dd->txRingLength = dp->vxnNumTxBufs;
+ dd->txRingOffset = offset;
+ dp->txRing = (Vmxnet2_TxRingEntry *)((uintptr_t)dd + offset);
+ offset += dp->vxnNumTxBufs * sizeof(Vmxnet2_TxRingEntry);
+
+ /*
+ * Alloc Rx buffers pool
+ */
+ if ( Vxn_AllocRxBufPool(dp) != SOLVMXNET_SUCCESS) {
+ cmn_err(CE_WARN, "%s%d: Vxn_AllocInitBuffers: failed to alloc buf pool",
+ dp->drvName, dp->unit);
+ return SOLVMXNET_FAILURE;
+ }
+
+ /*
+ * Allocate receive buffers
+ */
+ for (i = 0; i < dp->vxnNumRxBufs; i++) {
+ rx_dma_buf_t *rxDesc;
+ Vmxnet2_RxRingEntry *rre = &dp->rxRing[i];
+
+ if (!(rxDesc = Vxn_AllocRxBufFromPool(dp))) {
+ cmn_err(CE_WARN, "%s%d: Vxn_AllocInitBuffers: "
+ "failed to alloc buf from pool", dp->drvName, dp->unit);
+ goto err;
+ }
+
+ /*
+ * Init ring entries
+ */
+ rre->paddr = rxDesc->dmaDesc.phyBuf + ETHERALIGN;
+ rre->bufferLength = MAXPKTBUF - ETHERALIGN;
+ rre->actualLength = 0;
+ dp->rxRingBuffPtr[i] = rxDesc;
+ rre->ownership = VMXNET2_OWNERSHIP_NIC;
+ }
+
+ dp->txDmaHdl = NULL;
+
+ /*
+ * Dummy recvRing2 tacked on to the end, with a single unusable entry
+ */
+ dp->rxRing[i].paddr = 0;
+ dp->rxRing[i].bufferLength = 0;
+ dp->rxRing[i].actualLength = 0;
+ dp->rxRingBuffPtr[i] = NULL;
+ dp->rxRing[i].ownership = VMXNET2_OWNERSHIP_DRIVER;
+
+ dd->rxDriverNext = 0;
+
+ /*
+ * Give xmit ring ownership to DRIVER
+ */
+ for (i = 0; i < dp->vxnNumTxBufs; i++) {
+ dp->txRing[i].ownership = VMXNET2_OWNERSHIP_DRIVER;
+ dp->txRingBuf[i].mblk = NULL;
+ dp->txRingBuf[i].dmaMem.buf = NULL;
+ dp->txRing[i].sg.sg[0].addrHi = 0;
+ }
+
+ dd->txDriverCur = dd->txDriverNext = 0;
+ dd->txStopped = FALSE;
+
+ return SOLVMXNET_SUCCESS;
+
+err:
+ for (i=0; i<dp->vxnNumRxBufs; i++) {
+ if (dp->rxRingBuffPtr[i]) {
+ Vxn_FreeRxBuf(dp->rxRingBuffPtr[i]);
+ dp->rxRingBuffPtr[i] = NULL;
+ }
+ }
+
+ Vxn_FreeRxBufPool(dp);
+ return SOLVMXNET_FAILURE;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_FreeDmaMem --
+ * Free allocated dma memory
+ *
+ * Results:
+ * None
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static void
+Vxn_FreeDmaMem(dma_buf_t *dma)
+{
+ ddi_dma_unbind_handle(dma->dmaHdl);
+ ddi_dma_mem_free(&dma->dataAccHdl);
+ ddi_dma_free_handle(&dma->dmaHdl);
+
+ dma->buf = NULL;
+ dma->phyBuf = NULL;
+ dma->bufLen = 0;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_AllocDmaMem --
+ * Allocate dma-able memory and fill passed in dma descriptor pointer
+ * if successful
+ *
+ * Results:
+ * SOLVMXNET_SUCCESS on success
+ * SOLVMXNET_FAILURE on failure
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static int
+Vxn_AllocDmaMem(vxn_softc_t *dp, int size, int cansleep, dma_buf_t *dma)
+{
+ /*
+ * Allocate handle
+ */
+ if (ddi_dma_alloc_handle(dp->dip, &vxn_dma_attrs,
+ cansleep ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT,
+ NULL, &dma->dmaHdl) != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "%s%d: Vxn_AllocDmaMem: failed to allocate handle",
+ dp->drvName, dp->unit);
+ return SOLVMXNET_FAILURE;
+ }
+
+ /*
+ * Allocate memory
+ */
+ if (ddi_dma_mem_alloc(dma->dmaHdl, size, &vxn_buf_attrs, DDI_DMA_CONSISTENT,
+ cansleep ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT, NULL,
+ &dma->buf, &dma->bufLen, &dma->dataAccHdl)
+ != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "%s%d: Vxn_AllocDmaMem: "
+ "ddi_dma_mem_alloc %d bytes failed",
+ dp->drvName, dp->unit, size);
+ ddi_dma_free_handle(&dma->dmaHdl);
+ return SOLVMXNET_FAILURE;
+ }
+
+ /*
+ * Mapin memory
+ */
+ if (ddi_dma_addr_bind_handle(dma->dmaHdl, NULL, dma->buf, dma->bufLen,
+ DDI_DMA_RDWR | DDI_DMA_STREAMING,
+ cansleep ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT,
+ NULL, &dma->cookie, &dma->cookieCount)
+ != DDI_DMA_MAPPED) {
+ cmn_err(CE_WARN, "%s%d: Vxn_AllocDmaMem: failed to bind handle",
+ dp->drvName, dp->unit);
+ ddi_dma_mem_free(&dma->dataAccHdl);
+ ddi_dma_free_handle(&dma->dmaHdl);
+ return SOLVMXNET_FAILURE;
+ }
+
+ if (dma->cookieCount != 1) {
+ cmn_err(CE_WARN, "%s%d: Vxn_AllocDmaMem: too many DMA cookies",
+ dp->drvName, dp->unit);
+ Vxn_FreeDmaMem(dma);
+ return SOLVMXNET_FAILURE;
+ }
+
+ /*
+ * Save physical address (for easy use)
+ */
+ dma->phyBuf = dma->cookie.dmac_address;
+
+ return SOLVMXNET_SUCCESS;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_FreeDriverData --
+ * Free driver data structures and Tx Rx buffers
+ *
+ * Results:
+ * None
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static void
+Vxn_FreeDriverData(vxn_softc_t *dp)
+{
+ Vxn_FreeInitBuffers(dp);
+ Vxn_FreeDmaMem(&dp->driverDataDmaMem);
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_AllocDriverData --
+ * Allocate driver data structures and Tx Rx buffers on init
+ *
+ * Results:
+ * SOLVMXNET_SUCCESS on success
+ * SOLVMXNET_FAILURE on failure
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static int
+Vxn_AllocDriverData(vxn_softc_t *dp)
+{
+ uint32_t r, driverDataSize;
+
+ /*
+ * Get configured receive buffers
+ */
+ OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_GET_NUM_RX_BUFFERS);
+ r = INL(dp, VMXNET_COMMAND_ADDR);
+ if (r == 0 || r > MAX_NUM_RECV_BUFFERS) {
+ r = DEFAULT_NUM_RECV_BUFFERS;
+ }
+ dp->vxnNumRxBufs = r;
+
+ /*
+ * Get configured transmit buffers
+ */
+ OUTL(dp, VMXNET_COMMAND_ADDR, VMXNET_CMD_GET_NUM_TX_BUFFERS);
+ r = INL(dp, VMXNET_COMMAND_ADDR);
+ if (r == 0 || r > MAX_NUM_XMIT_BUFFERS) {
+ r = DEFAULT_NUM_XMIT_BUFFERS;
+ }
+ dp->vxnNumTxBufs = r;
+
+ /*
+ * Calculate shared data size and allocate memory for it
+ */
+ driverDataSize =
+ sizeof(Vmxnet2_DriverData) +
+ /* numRecvBuffers + 1 for the dummy recvRing2 (used only by Windows) */
+ (dp->vxnNumRxBufs + 1) * sizeof(Vmxnet2_RxRingEntry) +
+ dp->vxnNumTxBufs * sizeof(Vmxnet2_TxRingEntry);
+
+ if (Vxn_AllocDmaMem(dp, driverDataSize, TRUE, &dp->driverDataDmaMem)
+ != SOLVMXNET_SUCCESS) {
+ return SOLVMXNET_FAILURE;
+ }
+
+ /*
+ * Clear memory (bzero isn't resolved by module loader for some reason)
+ */
+ ASSERT(dp->driverDataDmaMem.buf && dp->driverDataDmaMem.bufLen);
+ Vxn_Memset(dp->driverDataDmaMem.buf, 0, dp->driverDataDmaMem.bufLen);
+
+ dp->driverData = (Vmxnet2_DriverData *)dp->driverDataDmaMem.buf;
+ dp->driverDataPhy = (void *)(uintptr_t)dp->driverDataDmaMem.phyBuf;
+
+ /* So that the vmkernel can check it is compatible */
+ dp->driverData->magic = VMXNET2_MAGIC;
+ dp->driverData->length = driverDataSize;
+
+ /*
+ * Alloc rx/tx buffers, init ring, register with hardware etc.
+ */
+ if (Vxn_AllocInitBuffers(dp) != SOLVMXNET_SUCCESS) {
+ Vxn_FreeDmaMem(&dp->driverDataDmaMem);
+ return SOLVMXNET_FAILURE;
+ }
+
+ DPRINTF(3, (CE_CONT, "%s%d: numRxBufs=(%d*%"FMT64"d) numTxBufs=(%d*%"FMT64"d)"
+ " driverDataSize=%d driverDataPhy=0x%p\n",
+ dp->drvName, dp->unit,
+ dp->vxnNumRxBufs, (uint64_t)sizeof(Vmxnet2_RxRingEntry),
+ dp->vxnNumTxBufs, (uint64_t)sizeof(Vmxnet2_TxRingEntry),
+ driverDataSize, dp->driverDataPhy));
+
+ return SOLVMXNET_SUCCESS;
+}
+
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_Attach --
+ * Probe and attach driver to stack
+ *
+ * Results:
+ * DDI_SUCCESS
+ * DDI_FAILURE
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static int
+Vxn_Attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int i, ret, len, unit;
+ const char *drvName;
+ ddi_acc_handle_t confHdl;
+ uint16_t vid, did;
+ uint8_t revid;
+ struct pci_phys_spec *regs;
+ caddr_t vxnIOp;
+ ddi_acc_handle_t vxnIOHdl;
+ uint32_t vLow, vHigh;
+ gld_mac_info_t *macInfo;
+ vxn_softc_t *dp;
+ boolean_t morphed = FALSE;
+ uint_t regSpaceSize;
+ uint_t chip;
+ uint_t vxnIOSize;
+
+ if (cmd != DDI_ATTACH) {
+ return DDI_FAILURE;
+ }
+
+ unit = ddi_get_instance(dip);
+ drvName = ddi_driver_name(dip);
+
+ /*
+ * Check if chip is supported.
+ */
+ if (pci_config_setup(dip, &confHdl) != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "%s%d: pci_config_setup() failed", drvName, unit);
+ return DDI_FAILURE;
+ }
+
+ vid = pci_config_get16(confHdl, PCI_CONF_VENID);
+ did = pci_config_get16(confHdl, PCI_CONF_DEVID);
+ revid = pci_config_get8(confHdl, PCI_CONF_REVID);
+
+ if (vid == PCI_VENDOR_ID_VMWARE && did == PCI_DEVICE_ID_VMWARE_NET) {
+ /* Found vmxnet */
+ chip = VMXNET_CHIP;
+ }
+ else if (vid == PCI_VENDOR_ID_AMD && did == PCI_DEVICE_ID_AMD_VLANCE) {
+ /* Found vlance (maybe a vmxnet disguise) */
+ chip = LANCE_CHIP;
+ }
+ else {
+ /* Not Found */
+ DPRINTF(3, (CE_WARN, "%s: Vxn_Attach: wrong PCI venid/devid (0x%x, 0x%x)",
+ drvName, vid, did));
+ goto err;
+ }
+
+ DPRINTF(3, (CE_CONT, "%s%d: (vid: 0x%04x, did: 0x%04x, revid: 0x%02x)\n",
+ drvName, unit, vid, did, revid));
+
+ /*
+ * Get device properties
+ */
+ regs = NULL;
+ len = 0;
+ if (ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
+ "reg", (caddr_t)&regs, &len) != DDI_PROP_SUCCESS) {
+ cmn_err(CE_WARN, "%s%d: Vxn_Attach: failed to get reg property",
+ drvName, unit);
+ goto err;
+ }
+
+ ASSERT(regs != NULL && len > 0);
+
+ /*
+ * Search device properties for IO-space
+ */
+ for (i = 0; i <len / sizeof(struct pci_phys_spec); i++) {
+ if ((regs[i].pci_phys_hi & PCI_REG_ADDR_M) == PCI_ADDR_IO) {
+ regSpaceSize = regs[i].pci_size_low;
+ DPRINTF(5, (CE_CONT, "%s%d: Vxn_Attach: regSpaceSize=%d\n",
+ drvName, unit, regSpaceSize));
+ kmem_free(regs, len);
+ goto map_space_found;
+ }
+ }
+
+ cmn_err(CE_WARN, "%s%d: Vxn_Attach: failed to find IO space", drvName, unit);
+ kmem_free(regs, len);
+ goto err;
+
+map_space_found:
+
+ /*
+ * Ensure we can access registers through IO space.
+ */
+ ret = pci_config_get16(confHdl, PCI_CONF_COMM);
+ ret |= PCI_COMM_IO | PCI_COMM_ME;
+ pci_config_put16(confHdl, PCI_CONF_COMM, ret);
+
+ if (ddi_regs_map_setup(dip, i, (caddr_t *)&vxnIOp, 0, 0, &dev_attr,
+ &vxnIOHdl) != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "%s%d: Vxn_Attach: ddi_regs_map_setup failed",
+ drvName, unit);
+ goto err;
+ }
+
+ if (chip == VMXNET_CHIP) {
+ vxnIOSize = VMXNET_CHIP_IO_RESV_SIZE;
+ }
+ else {
+ /*
+ * Since this is a vlance adapter we can only use it if
+ * its I/0 space is big enough for the adapter to be
+ * capable of morphing. This is the first requirement
+ * for this adapter to potentially be morphable. The
+ * layout of a morphable LANCE adapter is
+ *
+ * I/O space:
+ *
+ * |------------------|
+ * | LANCE IO PORTS |
+ * |------------------|
+ * | MORPH PORT |
+ * |------------------|
+ * | VMXNET IO PORTS |
+ * |------------------|
+ *
+ * VLance has 8 ports of size 4 bytes, the morph port is 4 bytes, and
+ * Vmxnet has 10 ports of size 4 bytes.
+ *
+ * We shift up the ioaddr with the size of the LANCE I/O space since
+ * we want to access the vmxnet ports. We also shift the ioaddr up by
+ * the MORPH_PORT_SIZE so other port access can be independent of
+ * whether we are Vmxnet or a morphed VLance. This means that when
+ * we want to access the MORPH port we need to subtract the size
+ * from ioaddr to get to it.
+ */
+ vxnIOp += LANCE_CHIP_IO_RESV_SIZE + MORPH_PORT_SIZE;
+ vxnIOSize = LANCE_CHIP_IO_RESV_SIZE + MORPH_PORT_SIZE +
+ VMXNET_CHIP_IO_RESV_SIZE;
+ }
+
+ /*
+ * Do not attempt to morph non-morphable AMD PCnet
+ */
+ if (vxnIOSize > regSpaceSize) {
+ cmn_err(CE_WARN, "%s%d: Vxn_Attach: "
+ "vlance device is not supported by this driver", drvName, unit);
+ goto err_free_regs_map;
+ }
+
+ /*
+ * Morph, if we found a vlance adapter
+ */
+ if (chip == LANCE_CHIP) {
+ uint16_t magic;
+
+ /* Read morph port to verify that we can morph the adapter */
+ magic = ddi_get16(vxnIOHdl, (uint16_t *)(vxnIOp - MORPH_PORT_SIZE));
+ if (magic != LANCE_CHIP && magic != VMXNET_CHIP) {
+ cmn_err(CE_WARN, "%s%d: Vxn_Attach: Invalid magic, read: 0x%08X",
+ drvName, unit, magic);
+ goto err_free_regs_map;
+ }
+
+ /* Morph */
+ ddi_put16(vxnIOHdl, (uint16_t *)(vxnIOp - MORPH_PORT_SIZE), VMXNET_CHIP);
+ morphed = TRUE;
+
+ /* Verify that we morphed correctly */
+ magic = ddi_get16(vxnIOHdl, (uint16_t *)(vxnIOp - MORPH_PORT_SIZE));
+ if (magic != VMXNET_CHIP) {
+ cmn_err(CE_WARN, "%s%d: Vxn_Attach: Couldn't morph adapter."
+ " Invalid magic, read:: 0x%08X", drvName, unit, magic);
+ goto err_morph_back;
+ }
+ }
+
+ /*
+ * Check the version number of the device implementation
+ */
+ vLow = (uint32_t)ddi_get32(vxnIOHdl,
+ (uint32_t *)(vxnIOp+VMXNET_LOW_VERSION));
+ vHigh = (uint32_t)ddi_get32(vxnIOHdl,
+ (uint32_t *)(vxnIOp+VMXNET_HIGH_VERSION));
+
+ if ((vLow & 0xffff0000) != (VMXNET2_MAGIC & 0xffff0000) ||
+ ((VMXNET2_MAGIC < vLow) || (VMXNET2_MAGIC > vHigh))) {
+ cmn_err(CE_WARN, "%s%d: Vxn_Attach: driver version 0x%08X doesn't "
+ "match device 0x%08X:0x%08X",
+ drvName, unit, VMXNET2_MAGIC, vLow, vHigh);
+ goto err_version_mismatch;
+ }
+
+ /*
+ * Alloc soft state
+ */
+ macInfo = gld_mac_alloc(dip);
+ if (!macInfo) {
+ cmn_err(CE_WARN, "%s%d: Vxn_Attach: gld_mac_alloc failed",
+ drvName, unit);
+ goto err_gld_mac_alloc;
+ }
+
+ dp = (vxn_softc_t *) kmem_zalloc(sizeof(vxn_softc_t), KM_SLEEP);
+ ASSERT(dp);
+
+ /*
+ * Get interrupt cookie
+ */
+ if (ddi_get_iblock_cookie(dip, 0, &dp->iblockCookie) != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "%s%d: Vxn_Attach: ddi_get_iblock_cookie failed",
+ drvName, unit);
+ goto err_get_iblock_cookie;
+ }
+
+ strncpy(dp->drvName, drvName, SOLVMXNET_MAXNAME);
+ dp->unit = unit;
+ dp->dip = dip;
+ dp->macInfo = macInfo;
+ dp->confHdl = confHdl;
+ dp->vxnIOHdl = vxnIOHdl;
+ dp->vxnIOp = vxnIOp;
+ dp->morphed = morphed;
+ dp->nicActive = FALSE;
+ dp->txPending = 0;
+ dp->maxTxFrags = 1;
+
+ /*
+ * Initialize mutexes
+ */
+ mutex_init(&dp->intrlock, NULL, MUTEX_DRIVER, (void *)dp->iblockCookie);
+ mutex_init(&dp->xmitlock, NULL, MUTEX_DRIVER, (void *)dp->iblockCookie);
+ mutex_init(&dp->rxlistlock, NULL, MUTEX_DRIVER, (void *)dp->iblockCookie);
+
+ /*
+ * Allocate and initialize our private and shared data structures
+ */
+ if (Vxn_AllocDriverData(dp) != SOLVMXNET_SUCCESS) {
+ goto err_alloc_driverdata;
+ }
+
+ /*
+ * Read the MAC address from the device
+ */
+ for (i = 0; i < 6; i++) {
+ dp->devAddr.ether_addr_octet[i] =
+ (uint8_t)ddi_get8(vxnIOHdl, (uint8_t *)(vxnIOp + VMXNET_MAC_ADDR + i));
+ }
+ macInfo->gldm_vendor_addr = dp->devAddr.ether_addr_octet;
+ macInfo->gldm_broadcast_addr = etherbroadcastaddr.ether_addr_octet;
+
+ DPRINTF(3, (CE_CONT,
+ "MAC address: %02x:%02x:%02x:%02x:%02x:%02x\n",
+ dp->devAddr.ether_addr_octet[0],
+ dp->devAddr.ether_addr_octet[1],
+ dp->devAddr.ether_addr_octet[2],
+ dp->devAddr.ether_addr_octet[3],
+ dp->devAddr.ether_addr_octet[4],
+ dp->devAddr.ether_addr_octet[5]));
+
+ /*
+ * Configure GLD entry points
+ */
+ macInfo->gldm_devinfo = dip;
+ macInfo->gldm_private = (caddr_t)dp;
+ macInfo->gldm_cookie = dp->iblockCookie;
+ macInfo->gldm_reset = Vxn_Reset;
+ macInfo->gldm_start = Vxn_Start;
+ macInfo->gldm_stop = Vxn_Stop;
+ macInfo->gldm_set_mac_addr = Vxn_SetMacAddress;
+ macInfo->gldm_send = Vxn_Send;
+ macInfo->gldm_set_promiscuous = Vxn_SetPromiscuous;
+ macInfo->gldm_get_stats = Vxn_GetStats;
+ macInfo->gldm_ioctl = NULL;
+ macInfo->gldm_set_multicast= Vxn_SetMulticast;
+ macInfo->gldm_intr = Vxn_Interrupt;
+ macInfo->gldm_mctl = NULL;
+
+ macInfo->gldm_ident = (char *)ddi_driver_name(dip);
+ macInfo->gldm_type = DL_ETHER;
+ macInfo->gldm_minpkt = 0;
+ macInfo->gldm_maxpkt = ETHERMTU;
+ macInfo->gldm_addrlen = ETHERADDRL;
+ macInfo->gldm_saplen = -2;
+ macInfo->gldm_ppa = unit;
+
+ /*
+ * Register with GLD (Generic Lan Driver) framework
+ */
+ if (gld_register(dip,
+ (char *)ddi_driver_name(dip), macInfo) != DDI_SUCCESS) {
+ goto err_gld_register;
+ }
+
+ /*
+ * Add interrupt to system.
+ */
+ if (ddi_add_intr(dip, 0, NULL, NULL, gld_intr,
+ (caddr_t)macInfo) != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "%s%d: ddi_add_intr failed", drvName, unit);
+ goto err_ddi_add_intr;
+ }
+
+ /*
+ * Add to list of interfaces.
+ */
+ mutex_enter(&vxnListLock);
+ dp->next = &vxnList;
+ dp->prev = vxnList.prev;
+ vxnList.prev->next = dp;
+ vxnList.prev = dp;
+ mutex_exit(&vxnListLock);
+
+ /*
+ * Success
+ */
+ return DDI_SUCCESS;
+
+err_ddi_add_intr:
+ gld_unregister(macInfo);
+
+err_gld_register:
+ Vxn_FreeDriverData(dp);
+
+err_alloc_driverdata:
+ mutex_destroy(&dp->intrlock);
+ mutex_destroy(&dp->xmitlock);
+
+err_get_iblock_cookie:
+ kmem_free(dp, sizeof(*dp));
+ gld_mac_free(macInfo);
+
+err_gld_mac_alloc:
+err_version_mismatch:
+err_morph_back:
+ if (morphed) {
+ ddi_put16(vxnIOHdl, (uint16_t *)(vxnIOp - MORPH_PORT_SIZE), LANCE_CHIP);
+ }
+
+err_free_regs_map:
+ ddi_regs_map_free(&vxnIOHdl);
+
+err:
+ pci_config_teardown(&confHdl);
+ return DDI_FAILURE;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ * Vxn_Detach --
+ * Called on module unload
+ *
+ * Results:
+ * DDI_SUCCESS
+ * DDI_FAILURE
+ *
+ * Side effects:
+ * None
+ *-----------------------------------------------------------------------------
+ */
+static int
+Vxn_Detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ gld_mac_info_t *macInfo;
+ vxn_softc_t *dp;
+
+ macInfo = (gld_mac_info_t *)ddi_get_driver_private(dip);
+ dp = (vxn_softc_t *)macInfo->gldm_private;
+
+ if (cmd == DDI_DETACH) {
+ /*
+ * Tear down interrupt
+ */
+ ddi_remove_intr(dip, 0, macInfo->gldm_cookie);
+ gld_unregister(macInfo);
+
+ /*
+ * Quiesce hardware
+ */
+ Vxn_Stop(macInfo);
+
+ /*
+ * Free driver-data, tx/rx buffers etc
+ */
+ Vxn_FreeDriverData(dp);
+
+ /*
+ * Destroy locks
+ */
+ mutex_destroy(&dp->intrlock);
+ mutex_destroy(&dp->xmitlock);
+
+ /*
+ * Unmorph
+ */
+ if (dp->morphed) {
+ uint16_t magic;
+
+ /* Verify that we had morphed earlier */
+ magic = ddi_get16(dp->vxnIOHdl,
+ (uint16_t *)(dp->vxnIOp - MORPH_PORT_SIZE));
+ if (magic != VMXNET_CHIP) {
+ cmn_err(CE_WARN, "%s%d: Vxn_Detach: Adapter not morphed"
+ " magic=0x%08X", dp->drvName, dp->unit, magic);
+ }
+ else {
+ /* Unmorph */
+ ddi_put16(dp->vxnIOHdl,
+ (uint16_t *)(dp->vxnIOp - MORPH_PORT_SIZE), LANCE_CHIP);
+
+ /* Verify */
+ magic = ddi_get16(dp->vxnIOHdl,
+ (uint16_t *)(dp->vxnIOp - MORPH_PORT_SIZE));
+ if (magic != LANCE_CHIP) {
+ cmn_err(CE_WARN, "%s%d: Vxn_Detach: Unable to unmorph adapter"
+ " magic=0x%08X", dp->drvName, dp->unit, magic);
+ }
+ }
+ }
+
+ /*
+ * Release resister mappings
+ */
+ ddi_regs_map_free(&dp->vxnIOHdl);
+ pci_config_teardown(&dp->confHdl);
+
+ /*
+ * Remove from list of interfaces.
+ */
+ mutex_enter(&vxnListLock);
+ ASSERT(dp != &vxnList);
+ dp->prev->next = dp->next;
+ dp->next->prev = dp->prev;
+ mutex_exit(&vxnListLock);
+
+ /*
+ * Release memory
+ */
+ kmem_free(dp, sizeof(*dp));
+ gld_mac_free(macInfo);
+
+ return DDI_SUCCESS;
+ }
+ else {
+ return DDI_FAILURE;
+ }
+}
+
+static struct module_info vxnminfo = {
+ 0, /* mi_idnum */
+ "vmxnet", /* mi_idname */
+ 0, /* mi_minpsz */
+ ETHERMTU, /* mi_maxpsz */
+ QHIWATER, /* mi_hiwat */
+ 1, /* mi_lowat */
+};
+
+static struct qinit vxnrinit = {
+ NULL, /* qi_putp */
+ gld_rsrv, /* qi_srvp */
+ gld_open, /* qi_qopen */
+ gld_close, /* qi_qclose */
+ NULL, /* qi_qadmin */
+ &vxnminfo, /* qi_minfo */
+ NULL /* qi_mstat */
+};
+
+static struct qinit vxnwinit = {
+ gld_wput, /* qi_putp */
+ gld_wsrv, /* qi_srvp */
+ NULL, /* qi_qopen */
+ NULL, /* qi_qclose */
+ NULL, /* qi_qadmin */
+ &vxnminfo, /* qi_minfo */
+ NULL /* qi_mstat */
+};
+
+static struct streamtab vxn_info = {
+ &vxnrinit, /* st_rdinit */
+ &vxnwinit, /* st_wrinit */
+ NULL, /* st_muxrinit */
+ NULL /* st_muxwrinit */
+};
+
+static struct cb_ops cb_vxn_ops = {
+ nulldev, /* cb_open */
+ nulldev, /* cb_close */
+ nodev, /* cb_strategy */
+ nodev, /* cb_print */
+ nodev, /* cb_dump */
+ nodev, /* cb_read */
+ nodev, /* cb_write */
+ nodev, /* cb_ioctl */
+ nodev, /* cb_devmap */
+ nodev, /* cb_mmap */
+ nodev, /* cb_segmap */
+ nochpoll, /* cb_chpoll */
+ ddi_prop_op, /* cb_prop_op */
+ &vxn_info, /* cb_stream */
+ D_NEW|D_MP /* cb_flag */
+};
+
+static struct dev_ops vxn_ops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* devo_refcnt */
+ gld_getinfo, /* devo_getinfo */
+ nulldev, /* devo_identify */
+ nulldev, /* devo_probe */
+ Vxn_Attach, /* devo_attach */
+ Vxn_Detach, /* devo_detach */
+ nodev, /* devo_reset */
+ &cb_vxn_ops, /* devo_cb_ops */
+ NULL, /* devo_bus_ops */
+ ddi_power /* devo_power */
+};
+
+static struct modldrv modldrv = {
+ &mod_driverops,
+ ident,
+ &vxn_ops,
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, {&modldrv, NULL,}
+};
+
+
+/*
+ * Module load entry point
+ */
+int
+_init(void)
+{
+ int err;
+
+ DPRINTF(5, (CE_CONT, "vxn: _init:\n"));
+ /* Initialize interface list */
+ vxnList.next = vxnList.prev = &vxnList;
+ mutex_init(&vxnListLock, NULL, MUTEX_DRIVER, NULL);
+ if ((err = mod_install(&modlinkage)) != 0) {
+ mutex_destroy(&vxnListLock);
+ }
+ return err;
+}
+
+/*
+ * Module unload entry point
+ */
+int
+_fini(void)
+{
+ int err;
+
+ DPRINTF(5, (CE_CONT, "vxn: _fini:\n"));
+ if ((err = mod_remove(&modlinkage)) == 0) {
+ mutex_destroy(&vxnListLock);
+ }
+ return err;
+}
+
+/*
+ * Module info entry point
+ */
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
diff --git a/usr/src/uts/intel/io/vmxnet/vmxnet.conf b/usr/src/uts/intel/io/vmxnet/vmxnet.conf
new file mode 100644
index 0000000000..eb3b160412
--- /dev/null
+++ b/usr/src/uts/intel/io/vmxnet/vmxnet.conf
@@ -0,0 +1,24 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2012, Joyent, Inc. All rights reserved.
+# Use is subject to license terms.
+#
diff --git a/usr/src/uts/intel/io/vmxnet/vmxnet2_def.h b/usr/src/uts/intel/io/vmxnet/vmxnet2_def.h
new file mode 100644
index 0000000000..5ea437df72
--- /dev/null
+++ b/usr/src/uts/intel/io/vmxnet/vmxnet2_def.h
@@ -0,0 +1,436 @@
+/*********************************************************
+ * Copyright (C) 2004 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ *********************************************************/
+
+/*********************************************************
+ * The contents of this file are subject to the terms of the Common
+ * Development and Distribution License (the "License") version 1.0
+ * and no later version. You may not use this file except in
+ * compliance with the License.
+ *
+ * You can obtain a copy of the License at
+ * http://www.opensource.org/licenses/cddl1.php
+ *
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ *********************************************************/
+
+#ifndef _VMXNET2_DEF_H_
+#define _VMXNET2_DEF_H_
+
+#define INCLUDE_ALLOW_USERLEVEL
+
+#define INCLUDE_ALLOW_MODULE
+#define INCLUDE_ALLOW_VMK_MODULE
+#define INCLUDE_ALLOW_VMKERNEL
+#define INCLUDE_ALLOW_DISTRIBUTE
+#include "includeCheck.h"
+
+#include "net_sg.h"
+#include "vmxnet_def.h"
+
+
+/*
+ * Magic number that identifies this version of the vmxnet protocol.
+ */
+#define VMXNET2_MAGIC 0xbabe864f
+
+/* size of the rx ring */
+#define VMXNET2_MAX_NUM_RX_BUFFERS 128
+#define VMXNET2_DEFAULT_NUM_RX_BUFFERS 100
+
+
+/* size of the rx ring when enhanced vmxnet is used */
+#define ENHANCED_VMXNET2_MAX_NUM_RX_BUFFERS 512
+#define ENHANCED_VMXNET2_DEFAULT_NUM_RX_BUFFERS 150
+
+/* size of the 2nd rx ring */
+#define VMXNET2_MAX_NUM_RX_BUFFERS2 2048
+#define VMXNET2_DEFAULT_NUM_RX_BUFFERS2 512
+
+/* size of the tx ring */
+#define VMXNET2_MAX_NUM_TX_BUFFERS 128
+#define VMXNET2_DEFAULT_NUM_TX_BUFFERS 100
+
+/* size of the tx ring when tso/jf is used */
+#define VMXNET2_MAX_NUM_TX_BUFFERS_TSO 512
+#define VMXNET2_DEFAULT_NUM_TX_BUFFERS_TSO 256
+
+enum {
+ VMXNET2_OWNERSHIP_DRIVER,
+ VMXNET2_OWNERSHIP_DRIVER_PENDING,
+ VMXNET2_OWNERSHIP_NIC,
+ VMXNET2_OWNERSHIP_NIC_PENDING,
+ VMXNET2_OWNERSHIP_NIC_FRAG,
+ VMXNET2_OWNERSHIP_DRIVER_FRAG,
+};
+
+#define VMXNET2_SG_DEFAULT_LENGTH 6
+
+typedef struct Vmxnet2_SG_Array {
+ uint16 addrType;
+ uint16 length;
+ NetSG_Elem sg[VMXNET2_SG_DEFAULT_LENGTH];
+} Vmxnet2_SG_Array;
+
+typedef struct Vmxnet2_RxRingEntry {
+ uint64 paddr; /* Physical address of the packet data. */
+ uint32 bufferLength; /* The length of the data at paddr. */
+ uint32 actualLength; /* The actual length of the received data. */
+ uint16 ownership; /* Who owns the packet. */
+ uint16 flags; /* Flags as defined below. */
+ uint32 index; /*
+ * Currently:
+ *
+ * This is being used as an packet index to
+ * rx buffers.
+ *
+ * Originally:
+ *
+ * was void* driverData ("Driver specific data.")
+ * which was used for sk_buf**s in Linux and
+ * VmxnetRxBuff*s in Windows. It could not be
+ * here because the structure needs to be the
+ * same size between architectures, and it was
+ * not used on the device side, anyway. Look
+ * for its replacement in
+ * Vmxnet_Private.rxRingBuffPtr on Linux and
+ * VmxnetAdapter.rxRingBuffPtr on Windows.
+ */
+} Vmxnet2_RxRingEntry;
+
+/*
+ * Vmxnet2_RxRingEntry flags:
+ *
+ * VMXNET2_RX_HW_XSUM_OK The hardware verified the TCP/UDP checksum.
+ * VMXNET2_RX_WITH_FRAG More data is in the 2nd ring
+ * VMXNET2_RX_FRAG_EOP This is the last frag, the only valid flag for
+ * 2nd ring entry
+ *
+ */
+#define VMXNET2_RX_HW_XSUM_OK 0x01
+#define VMXNET2_RX_WITH_FRAG 0x02
+#define VMXNET2_RX_FRAG_EOP 0x04
+
+typedef struct Vmxnet2_TxRingEntry {
+ uint16 flags; /* Flags as defined below. */
+ uint16 ownership; /* Who owns this packet. */
+ uint32 extra; /*
+ * was void* driverData ("Driver specific data.")
+ * which was used for sk_buf*s in Linux and
+ * VmxnetTxInfo*s in Windows. It could not be
+ * here because the structure needs to be the
+ * same size between architectures, and it was
+ * not used on the device side, anyway. Look
+ * for its replacement in
+ * Vmxnet_Private.txRingBuffPtr on Linux and
+ * VmxnetAdapter.txRingBuffPtr on Windows.
+ */
+ uint32 tsoMss; /* TSO pkt MSS */
+ Vmxnet2_SG_Array sg; /* Packet data. */
+} Vmxnet2_TxRingEntry;
+
+/*
+ * Vmxnet2_TxRingEntry flags:
+ *
+ * VMXNET2_TX_CAN_KEEP The implementation can return the tx ring entry
+ * to the driver when it is ready as opposed to
+ * before the transmit call from the driver completes.
+ * VMXNET2_TX_RING_LOW The driver's transmit ring buffer is low on free
+ * slots.
+ * VMXNET2_TX_HW_XSUM The hardware should perform the TCP/UDP checksum
+ * VMXNET2_TX_TSO The hardware should do TCP segmentation.
+ * VMXNET2_TX_PINNED_BUFFER The driver used one of the preallocated vmkernel
+ * buffers *and* it has been pinned with Net_PinTxBuffers.
+ * VMXNET2_TX_MORE This is *not* the last tx entry for the pkt.
+ * All flags except VMXNET2_TX_MORE are ignored
+ * for the subsequent tx entries.
+ */
+#define VMXNET2_TX_CAN_KEEP 0x0001
+#define VMXNET2_TX_RING_LOW 0x0002
+#define VMXNET2_TX_HW_XSUM 0x0004
+#define VMXNET2_TX_TSO 0x0008
+#define VMXNET2_TX_PINNED_BUFFER 0x0010
+#define VMXNET2_TX_MORE 0x0020
+
+/*
+ * Structure used by implementations. This structure allows the inline
+ * functions below to be used.
+ */
+typedef struct Vmxnet2_RxRingInfo {
+ Vmxnet2_RxRingEntry *base; /* starting addr of the ring */
+ uint32 nicNext; /* next entry to use in the ring */
+ uint32 ringLength; /* # of entries in the ring */
+ PA startPA; /* PA of the starting addr of the ring */
+#ifdef VMX86_DEBUG
+ const char *name;
+#endif
+} Vmxnet2_RxRingInfo;
+
+typedef struct Vmxnet2_TxRingInfo {
+ Vmxnet2_TxRingEntry *base; /* starting addr of the ring */
+ uint32 nicNext; /* next entry to use in the ring */
+ uint32 ringLength; /* # of entries in the ring */
+ PA startPA; /* PA of the starting addr of the ring */
+#ifdef VMX86_DEBUG
+ const char *name;
+#endif
+} Vmxnet2_TxRingInfo;
+
+typedef struct Vmxnet2_ImplData {
+ Vmxnet2_RxRingInfo rxRing;
+ Vmxnet2_RxRingInfo rxRing2;
+ Vmxnet2_TxRingInfo txRing;
+
+ struct PhysMem_Token *ddPhysMemToken;
+} Vmxnet2_ImplData;
+
+/*
+ * Used internally for performance studies. By default this will be off so there
+ * should be no compatibilty or other interferences.
+ */
+
+/* #define ENABLE_VMXNET2_PROFILING */
+
+
+#ifdef ENABLE_VMXNET2_PROFILING
+typedef struct Vmxnet2_VmmStats {
+ uint64 vIntTSC; /* the time that virtual int was posted */
+ uint64 actionsCount; /* Number of actions received */
+ uint64 numWasteActions; /* Number of non-productive actions */
+} Vmxnet2_VmmStats;
+#endif
+
+typedef struct Vmxnet2_DriverStats {
+ uint32 transmits; /* # of times that the drivers transmit function */
+ /* is called. The driver could transmit more */
+ /* than one packet per call. */
+ uint32 pktsTransmitted; /* # of packets transmitted. */
+ uint32 noCopyTransmits; /* # of packets that are transmitted without */
+ /* copying any data. */
+ uint32 copyTransmits; /* # of packets that are transmittted by copying */
+ /* the data into a buffer. */
+ uint32 maxTxsPending; /* Max # of transmits outstanding. */
+ uint32 txStopped; /* # of times that transmits got stopped because */
+ /* the tx ring was full. */
+ uint32 txRingOverflow; /* # of times that transmits got deferred bc */
+ /* the tx ring was full. This must be >= */
+ /* txStopped since there will be one */
+ /* txStopped when the ring fills up and then */
+ /* one txsRingOverflow for each packet that */
+ /* that gets deferred until there is space. */
+ uint32 interrupts; /* # of times interrupted. */
+ uint32 pktsReceived; /* # of packets received. */
+ uint32 rxBuffersLow; /* # of times that the driver was low on */
+ /* receive buffers. */
+#ifdef ENABLE_VMXNET2_PROFILING
+ Vmxnet2_VmmStats vmmStats; /* vmm related stats for perf study */
+#endif
+} Vmxnet2_DriverStats;
+
+/*
+ * Shared data structure between the vm, the vmm, and the vmkernel.
+ * This structure was originally arranged to try to group common data
+ * on 32-byte cache lines, but bit rot and the fact that we no longer
+ * run on many CPUs with that cacheline size killed that optimization.
+ * vmxnet3 should target 128 byte sizes and alignments to optimize for
+ * the 64 byte cacheline pairs on P4.
+ */
+typedef struct Vmxnet2_DriverData {
+ /*
+ * Magic must be first.
+ */
+ Vmxnet_DDMagic magic;
+
+ /*
+ * Receive fields.
+ */
+ uint32 rxRingLength; /* Length of the receive ring. */
+ uint32 rxDriverNext; /* Index of the next packet that will */
+ /* be filled in by the impl */
+
+ uint32 rxRingLength2; /* Length of the 2nd receive ring. */
+ uint32 rxDriverNext2; /* Index of the next packet that will */
+ /* be filled in by the impl */
+
+ uint32 notUsed1; /* was "irq" */
+
+ /*
+ * Interface flags and multicast filter.
+ */
+ uint32 ifflags;
+ uint32 LADRF[VMXNET_MAX_LADRF];
+
+ /*
+ * Transmit fields
+ */
+ uint32 txDontClusterSize; /* All packets <= this will be transmitted */
+ /* immediately, regardless of clustering */
+ /* settings [was fill[1]] */
+ uint32 txRingLength; /* Length of the transmit ring. */
+ uint32 txDriverCur; /* Index of the next packet to be */
+ /* returned by the implementation.*/
+ uint32 txDriverNext; /* Index of the entry in the ring */
+ /* buffer to use for the next packet.*/
+ uint32 txStopped; /* The driver has stopped transmitting */
+ /* because its ring buffer is full.*/
+ uint32 txClusterLength; /* Maximum number of packets to */
+ /* put in the ring buffer before */
+ /* asking the implementation to */
+ /* transmit the packets in the buffer.*/
+ uint32 txNumDeferred; /* Number of packets that have been */
+ /* queued in the ring buffer since */
+ /* the last time the implementation */
+ /* was asked to transmit. */
+ uint32 notUsed3; /* This field is deprecated but still used */
+ /* as minXmitPhysLength on the escher branch. */
+ /* It cannot be used for other purposes */
+ /* until escher vms no longer are allowed */
+ /* to install this driver. */
+
+ uint32 totalRxBuffers; /* used by esx for max rx buffers */
+ uint64 rxBufferPhysStart; /* used by esx for pinng rx buffers */
+ /*
+ * Extra fields for future expansion.
+ */
+ uint32 extra[2];
+
+ uint16 maxFrags; /* # of frags the driver can handle */
+ uint16 featureCtl; /* for driver to enable some feature */
+
+ /*
+ * The following fields are used to save the nicNext indexes part
+ * of implData in the vmkernel when disconnecting the adapter, we
+ * need them when we reconnect. This mechanism is used for
+ * checkpointing as well.
+ */
+ uint32 savedRxNICNext;
+ uint32 savedRxNICNext2;
+ uint32 savedTxNICNext;
+
+ /*
+ * Fields used during initialization or debugging.
+ */
+ uint32 length;
+ uint32 rxRingOffset;
+ uint32 rxRingOffset2;
+ uint32 txRingOffset;
+ uint32 debugLevel;
+ uint32 txBufferPhysStart;
+ uint32 txBufferPhysLength;
+ uint32 txPktMaxSize;
+
+ /*
+ * Driver statistics.
+ */
+ Vmxnet2_DriverStats stats;
+} Vmxnet2_DriverData;
+
+/*
+ * Shared between VMM and Vmkernel part of vmxnet2 to optimize action posting
+ * VMM writes 1 (don't post) or 0 (okay to post) and vmk reads this.
+ */
+typedef struct VmxnetVMKShared {
+ uint32 dontPostActions;
+} VmxnetVMKShared;
+
+#if defined VMX86_VMX || defined VMKERNEL
+
+/*
+ * Inline functions used to assist the implementation of the vmxnet interface.
+ */
+
+/*
+ * Get the next empty packet out of the receive ring and move to
+ * the next packet.
+ */
+static INLINE Vmxnet2_RxRingEntry *
+Vmxnet2_GetNextRx(Vmxnet2_RxRingInfo *ri, uint16 ownership)
+{
+ Vmxnet2_RxRingEntry *rre = ri->base + ri->nicNext;
+ if (rre->ownership == ownership) {
+ VMXNET_INC(ri->nicNext, ri->ringLength);
+ } else {
+ rre = NULL;
+ }
+
+ return rre;
+}
+
+/*
+ * Return ownership of a packet in the receive ring to the driver.
+ */
+static INLINE void
+Vmxnet2_PutRx(Vmxnet2_RxRingEntry *rre, uint32 pktLength, uint16 ownership)
+{
+ rre->actualLength = pktLength;
+ COMPILER_MEM_BARRIER();
+ rre->ownership = ownership;
+}
+
+/*
+ * Get the next pending packet out of the transmit ring.
+ */
+static INLINE Vmxnet2_TxRingEntry *
+Vmxnet2_GetNextTx(Vmxnet2_TxRingInfo *ri)
+{
+ Vmxnet2_TxRingEntry *txre = ri->base + ri->nicNext;
+ if (txre->ownership == VMXNET2_OWNERSHIP_NIC) {
+ return txre;
+ } else {
+ return NULL;
+ }
+}
+
+/*
+ * Move to the next entry in the transmit ring.
+ */
+static INLINE unsigned int
+Vmxnet2_IncNextTx(Vmxnet2_TxRingInfo *ri)
+{
+ unsigned int prev = ri->nicNext;
+ Vmxnet2_TxRingEntry *txre = ri->base + ri->nicNext;
+
+ txre->ownership = VMXNET2_OWNERSHIP_NIC_PENDING;
+
+ VMXNET_INC(ri->nicNext, ri->ringLength);
+ return prev;
+}
+
+/*
+ * Get the indicated entry from transmit ring.
+ */
+static INLINE Vmxnet2_TxRingEntry *
+Vmxnet2_GetTxEntry(Vmxnet2_TxRingInfo *ri, unsigned int idx)
+{
+ return ri->base + idx;
+}
+
+/*
+ * Get the indicated entry from the given rx ring
+ */
+static INLINE Vmxnet2_RxRingEntry *
+Vmxnet2_GetRxEntry(Vmxnet2_RxRingInfo *ri, unsigned int idx)
+{
+ return ri->base + idx;
+}
+
+#endif /* defined VMX86_VMX || defined VMKERNEL */
+
+#endif
+
diff --git a/usr/src/uts/intel/io/vmxnet/vmxnet_def.h b/usr/src/uts/intel/io/vmxnet/vmxnet_def.h
new file mode 100644
index 0000000000..703466c995
--- /dev/null
+++ b/usr/src/uts/intel/io/vmxnet/vmxnet_def.h
@@ -0,0 +1,184 @@
+/*********************************************************
+ * Copyright (C) 1999 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ *********************************************************/
+
+/*********************************************************
+ * The contents of this file are subject to the terms of the Common
+ * Development and Distribution License (the "License") version 1.0
+ * and no later version. You may not use this file except in
+ * compliance with the License.
+ *
+ * You can obtain a copy of the License at
+ * http://www.opensource.org/licenses/cddl1.php
+ *
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ *********************************************************/
+
+#ifndef _VMXNET_DEF_H_
+#define _VMXNET_DEF_H_
+
+#define INCLUDE_ALLOW_USERLEVEL
+
+#define INCLUDE_ALLOW_MODULE
+#define INCLUDE_ALLOW_VMK_MODULE
+#define INCLUDE_ALLOW_VMKERNEL
+#define INCLUDE_ALLOW_DISTRIBUTE
+#include "includeCheck.h"
+
+#include "net_sg.h"
+#include "vmnet_def.h"
+
+
+/*
+ * Vmxnet I/O ports, used by both the vmxnet driver and
+ * the device emulation code.
+ */
+
+#define VMXNET_INIT_ADDR 0x00
+#define VMXNET_INIT_LENGTH 0x04
+#define VMXNET_TX_ADDR 0x08
+#define VMXNET_COMMAND_ADDR 0x0c
+#define VMXNET_MAC_ADDR 0x10
+#define VMXNET_LOW_VERSION 0x18
+#define VMXNET_HIGH_VERSION 0x1c
+#define VMXNET_STATUS_ADDR 0x20
+#define VMXNET_TOE_INIT_ADDR 0x24
+#define VMXNET_APROM_ADDR 0x28
+#define VMXNET_INT_ENABLE_ADDR 0x30
+#define VMXNET_WAKE_PKT_PATTERNS 0x34
+
+/*
+ * Vmxnet command register values.
+ */
+#define VMXNET_CMD_INTR_ACK 0x0001
+#define VMXNET_CMD_UPDATE_LADRF 0x0002
+#define VMXNET_CMD_UPDATE_IFF 0x0004
+#define VMXNET_CMD_UNUSED 1 0x0008
+#define VMXNET_CMD_UNUSED_2 0x0010
+#define VMXNET_CMD_INTR_DISABLE 0x0020
+#define VMXNET_CMD_INTR_ENABLE 0x0040
+#define VMXNET_CMD_UNUSED_3 0x0080
+#define VMXNET_CMD_CHECK_TX_DONE 0x0100
+#define VMXNET_CMD_GET_NUM_RX_BUFFERS 0x0200
+#define VMXNET_CMD_GET_NUM_TX_BUFFERS 0x0400
+#define VMXNET_CMD_PIN_TX_BUFFERS 0x0800
+#define VMXNET_CMD_GET_CAPABILITIES 0x1000
+#define VMXNET_CMD_GET_FEATURES 0x2000
+#define VMXNET_CMD_SET_POWER_FULL 0x4000
+#define VMXNET_CMD_SET_POWER_LOW 0x8000
+
+/*
+ * Vmxnet status register values.
+ */
+#define VMXNET_STATUS_CONNECTED 0x0001
+#define VMXNET_STATUS_ENABLED 0x0002
+#define VMXNET_STATUS_TX_PINNED 0x0004
+
+/*
+ * Values for the interface flags.
+ */
+#define VMXNET_IFF_PROMISC 0x01
+#define VMXNET_IFF_BROADCAST 0x02
+#define VMXNET_IFF_MULTICAST 0x04
+#define VMXNET_IFF_DIRECTED 0x08
+
+/*
+ * Length of the multicast address filter.
+ */
+#define VMXNET_MAX_LADRF 2
+
+/*
+ * Size of Vmxnet APROM.
+ */
+#define VMXNET_APROM_SIZE 6
+
+/*
+ * An invalid ring index.
+ */
+#define VMXNET_INVALID_RING_INDEX (-1)
+
+/*
+ * Features that are implemented by the driver. These are driver
+ * specific so not all features will be listed here. In addition not all
+ * drivers have to pay attention to these feature flags.
+ *
+ * VMXNET_FEATURE_ZERO_COPY_TX The driver won't do any copies as long as
+ * the packet length is >
+ * Vmxnet_DriverData.minTxPhysLength.
+ *
+ * VMXNET_FEATURE_TSO The driver will use the TSO capabilities
+ * of the underlying hardware if available
+ * and enabled.
+ *
+ * VMXNET_FEATURE_JUMBO_FRAME The driver can send/rcv jumbo frame
+ *
+ * VMXNET_FEATURE_LPD The backend can deliver large pkts
+ */
+#define VMXNET_FEATURE_ZERO_COPY_TX 0x01
+#define VMXNET_FEATURE_TSO 0x02
+#define VMXNET_FEATURE_JUMBO_FRAME 0x04
+#define VMXNET_FEATURE_LPD 0x08
+
+/*
+ * Define the set of capabilities required by each feature above
+ */
+#define VMXNET_FEATURE_ZERO_COPY_TX_CAPS VMXNET_CAP_SG
+#define VMXNET_FEATURE_TSO_CAPS VMXNET_CAP_TSO
+#define VMXNET_HIGHEST_FEATURE_BIT VMXNET_FEATURE_TSO
+
+#define VMXNET_INC(val, max) \
+ val++; \
+ if (UNLIKELY(val == max)) { \
+ val = 0; \
+ }
+
+/*
+ * code that just wants to switch on the different versions of the
+ * guest<->implementation protocol can cast driver data to this.
+ */
+typedef uint32 Vmxnet_DDMagic;
+
+/*
+ * Wake packet pattern commands sent through VMXNET_WAKE_PKT_PATTERNS port
+ */
+
+#define VMXNET_PM_OPCODE_START 3 /* args: cnt of wake packet patterns */
+#define VMXNET_PM_OPCODE_LEN 2 /* args: index of wake packet pattern */
+ /* number of pattern byte values */
+#define VMXNET_PM_OPCODE_DATA 1 /* args: index of wake packet pattern */
+ /* offset in pattern byte values list */
+ /* packet byte offset */
+ /* packet byte value */
+#define VMXNET_PM_OPCODE_END 0 /* args: <none> */
+
+typedef union Vmxnet_WakePktCmd {
+ uint32 pktData : 32;
+ struct {
+ unsigned cmd : 2; /* wake packet pattern cmd [from list above] */
+ unsigned cnt : 3; /* cnt wk pkt pttrns 1..MAX_NUM_FILTER_PTTRNS */
+ unsigned ind : 3; /* ind wk pkt pttrn 0..MAX_NUM_FILTER_PTTRNS-1 */
+ unsigned lenOff : 8; /* num pttrn byte vals 1..MAX_PKT_FILTER_SIZE */
+ /* OR offset in pattern byte values list */
+ /* 0..MAX_PKT_FILTER_SIZE-1 */
+ unsigned byteOff : 8; /* pkt byte offset 0..MAX_PKT_FILTER_SIZE-1 */
+ unsigned byteVal : 8; /* packet byte value 0..255 */
+ } pktPttrn;
+} Vmxnet_WakePktCmd;
+
+#endif /* _VMXNET_DEF_H_ */
diff --git a/usr/src/uts/intel/ipf/Makefile b/usr/src/uts/intel/ipf/Makefile
index 046a6c223d..e18158a43d 100644
--- a/usr/src/uts/intel/ipf/Makefile
+++ b/usr/src/uts/intel/ipf/Makefile
@@ -58,7 +58,7 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
CPPFLAGS += -DIPFILTER_LKM -DIPFILTER_LOG -DIPFILTER_LOOKUP -DUSE_INET6
CPPFLAGS += -DSUNDDI -DSOLARIS2=$(RELEASE_MINOR) -DIRE_ILL_CN
-LDFLAGS += -dy -Ndrv/ip -Nmisc/md5 -Nmisc/neti -Nmisc/hook -Nmisc/kcf
+LDFLAGS += -dy -Ndrv/ip -Nmisc/md5 -Nmisc/neti -Nmisc/hook -Nmisc/kcf -Ndrv/vnd
INC_PATH += -I$(UTSBASE)/common/inet/ipf
diff --git a/usr/src/uts/intel/ipf/ipf.global-objs.debug64 b/usr/src/uts/intel/ipf/ipf.global-objs.debug64
index 663613cee3..0af6da41fa 100644
--- a/usr/src/uts/intel/ipf/ipf.global-objs.debug64
+++ b/usr/src/uts/intel/ipf/ipf.global-objs.debug64
@@ -39,6 +39,10 @@ hook4_nicevents
hook4_nicevents_gz
hook4_out
hook4_out_gz
+hook4_vnd_in
+hook4_vnd_in_gz
+hook4_vnd_out
+hook4_vnd_out_gz
hook6_in
hook6_in_gz
hook6_loop_in
@@ -49,6 +53,10 @@ hook6_nicevents
hook6_nicevents_gz
hook6_out
hook6_out_gz
+hook6_vnd_in
+hook6_vnd_in_gz
+hook6_vnd_out
+hook6_vnd_out_gz
icmpreplytype4
icmpreplytype6
icmptoicmp6types
diff --git a/usr/src/uts/intel/ixgbe/Makefile b/usr/src/uts/intel/ixgbe/Makefile
index 17bc88f43e..c457b52798 100644
--- a/usr/src/uts/intel/ixgbe/Makefile
+++ b/usr/src/uts/intel/ixgbe/Makefile
@@ -67,6 +67,7 @@ INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
# Driver depends on MAC
#
LDFLAGS += -dy -N misc/mac
+MAPFILES += ddi mac random kernel
#
# Default build targets.
@@ -92,4 +93,5 @@ install: $(INSTALL_DEPS)
#
# Include common targets.
#
+include $(UTSBASE)/Makefile.mapfile
include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/kdi/kdi_idt.c b/usr/src/uts/intel/kdi/kdi_idt.c
index 4bd6ca73e1..73d9628e62 100644
--- a/usr/src/uts/intel/kdi/kdi_idt.c
+++ b/usr/src/uts/intel/kdi/kdi_idt.c
@@ -217,7 +217,8 @@ kdi_idt_patch(caddr_t code, size_t sz)
continue; /* uses kernel's handler */
gd = &kdi_idt[i];
- patch = (uchar_t *)GATESEG_GETOFFSET(gd) + KDI_MSR_PATCHOFF;
+ patch = ((uchar_t *)(uintptr_t)GATESEG_GETOFFSET(gd)) +
+ KDI_MSR_PATCHOFF;
/*
* We can't ASSERT that there's a nop here, because this may be
diff --git a/usr/src/uts/intel/lx_brand/Makefile b/usr/src/uts/intel/lx_brand/Makefile
new file mode 100644
index 0000000000..daa16fa868
--- /dev/null
+++ b/usr/src/uts/intel/lx_brand/Makefile
@@ -0,0 +1,111 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+# Copyright 2016 Joyent, Inc.
+#
+# This makefile drives the production of the kernel component of
+# the lx brand
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Path to where brand common sources live
+#
+LX_CMN = $(SRC)/common/brand/lx
+
+#
+# Define the module and object file sets.
+#
+MODULE = lx_brand
+OBJECTS = $(LX_BRAND_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(LX_BRAND_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_BRAND_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+INC_PATH += -I$(UTSBASE)/common/brand/lx -I$(LX_CMN) -I$(SRC)/common
+INC_PATH += -I$(UTSBASE)/common/inet/sockmods -I$(UTSBASE)/common/io/bpf
+INC_PATH += -I$(UTSBASE)/common/fs/sockfs
+INC_PATH += -I$(UTSBASE)/common/fs/zfs
+AS_INC_PATH += -I$(UTSBASE)/i86pc/genassym/$(OBJS_DIR)
+
+#
+# lint pass one enforcement
+#
+CFLAGS += $(CCVERBOSE)
+
+LDFLAGS += -dy -Nexec/elfexec -Nfs/fifofs -Nfs/sockfs -Ndrv/ip -Nfs/zfs
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
+LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
+
+#
+# Include brand-specific rules
+#
+
+include $(UTSBASE)/intel/lx_brand/Makefile.rules
diff --git a/usr/src/uts/intel/lx_brand/Makefile.rules b/usr/src/uts/intel/lx_brand/Makefile.rules
new file mode 100644
index 0000000000..0a83e15493
--- /dev/null
+++ b/usr/src/uts/intel/lx_brand/Makefile.rules
@@ -0,0 +1,97 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+# Copyright 2015 Joyent, Inc.
+#
+#
+
+#
+# Section 1a: C object build rules
+#
+$(OBJS_DIR_OBJ64)/%.o: $(UTSBASE)/common/brand/lx/os/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR_DBG64)/%.o: $(UTSBASE)/common/brand/lx/os/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR_OBJ64)/%.o: $(UTSBASE)/common/brand/lx/syscall/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR_DBG64)/%.o: $(UTSBASE)/common/brand/lx/syscall/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR_OBJ64)/%.o: $(UTSBASE)/intel/brand/lx/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR_DBG64)/%.o: $(UTSBASE)/intel/brand/lx/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR_OBJ64)/%.o: $(UTSBASE)/intel/brand/lx/%.s
+ $(COMPILE.s) -I$(UTSBASE)/i86pc -o $@ $<
+
+$(OBJS_DIR_OBJ64)/%.o: $(LX_CMN)/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR_DBG64)/%.o: $(UTSBASE)/intel/brand/lx/%.s
+ $(COMPILE.s) -I$(UTSBASE)/i86pc -o $@ $<
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/os/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/syscall/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/intel/brand/lx/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o: $(LX_CMN)/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/intel/brand/lx/%.s
+ $(COMPILE.s) -I$(UTSBASE)/i86pc -o $@ $<
+
+#
+# Section 1b: Lint `object' build rules.
+#
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/os/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/syscall/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
+$(LINTS_DIR)/%.ln: $(LX_CMN)/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/intel/brand/lx/%.s
+ @($(LHEAD) $(LINT.s) $< $(LTAIL))
diff --git a/usr/src/uts/intel/lx_cgroup/Makefile b/usr/src/uts/intel/lx_cgroup/Makefile
new file mode 100644
index 0000000000..6f9116f32a
--- /dev/null
+++ b/usr/src/uts/intel/lx_cgroup/Makefile
@@ -0,0 +1,57 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+UTSBASE = ../..
+
+LX_CMN = $(SRC)/common/brand/lx
+
+MODULE = lx_cgroup
+OBJECTS = $(LX_CGROUP_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(LX_CGROUP_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_FS_DIR)/$(MODULE)
+
+INC_PATH += -I$(UTSBASE)/common/brand/lx -I$(LX_CMN)
+
+include $(UTSBASE)/intel/Makefile.intel
+
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+CFLAGS += $(CCVERBOSE)
+
+LDFLAGS += -dy -Nbrand/lx_brand
+
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+include $(UTSBASE)/intel/Makefile.targ
+
+include $(UTSBASE)/intel/lx_cgroup/Makefile.rules
diff --git a/usr/src/uts/intel/lx_cgroup/Makefile.rules b/usr/src/uts/intel/lx_cgroup/Makefile.rules
new file mode 100644
index 0000000000..6d4c7c4060
--- /dev/null
+++ b/usr/src/uts/intel/lx_cgroup/Makefile.rules
@@ -0,0 +1,21 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc. All rights reserved.
+#
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/cgroups/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/cgroups/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/intel/lx_devfs/Makefile b/usr/src/uts/intel/lx_devfs/Makefile
new file mode 100644
index 0000000000..1254f596eb
--- /dev/null
+++ b/usr/src/uts/intel/lx_devfs/Makefile
@@ -0,0 +1,57 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+UTSBASE = ../..
+
+LX_CMN = $(SRC)/common/brand/lx
+
+MODULE = lx_devfs
+OBJECTS = $(LX_DEVFS_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(LX_DEVFS_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_FS_DIR)/$(MODULE)
+
+INC_PATH += -I$(UTSBASE)/common/brand/lx -I$(LX_CMN)
+
+include $(UTSBASE)/intel/Makefile.intel
+
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+CFLAGS += $(CCVERBOSE)
+
+LDFLAGS += -dy -Nbrand/lx_brand
+
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+include $(UTSBASE)/intel/Makefile.targ
+
+include $(UTSBASE)/intel/lx_devfs/Makefile.rules
diff --git a/usr/src/uts/intel/lx_devfs/Makefile.rules b/usr/src/uts/intel/lx_devfs/Makefile.rules
new file mode 100644
index 0000000000..4b9748314c
--- /dev/null
+++ b/usr/src/uts/intel/lx_devfs/Makefile.rules
@@ -0,0 +1,21 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc. All rights reserved.
+#
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/devfs/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/devfs/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/intel/lx_netlink/Makefile b/usr/src/uts/intel/lx_netlink/Makefile
new file mode 100644
index 0000000000..2ada8e28a6
--- /dev/null
+++ b/usr/src/uts/intel/lx_netlink/Makefile
@@ -0,0 +1,77 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = lx_netlink
+OBJECTS = $(LX_NETLINK_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(LX_NETLINK_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_SOCK_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+CPPFLAGS += -I$(UTSBASE)/common/brand/lx
+LDFLAGS += -dy -Ndrv/ip -Nfs/sockfs -Nbrand/lx_brand
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/io/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/io/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/intel/lx_proc/Makefile b/usr/src/uts/intel/lx_proc/Makefile
new file mode 100644
index 0000000000..6e065b7899
--- /dev/null
+++ b/usr/src/uts/intel/lx_proc/Makefile
@@ -0,0 +1,117 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# uts/intel/lx_proc/Makefile
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# Copyright 2015 Joyent, Inc.
+#
+# This makefile drives the production of the lxproc file system
+# kernel module.
+#
+# i86 architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Path to where brand common sources live
+#
+LX_CMN = $(SRC)/common/brand/lx
+
+#
+# Define the module and object file sets.
+#
+MODULE = lx_proc
+OBJECTS = $(LX_PROC_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(LX_PROC_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_FS_DIR)/$(MODULE)
+
+INC_PATH += -I$(UTSBASE)/common/brand/lx -I$(LX_CMN)
+INC_PATH += -I$(UTSBASE)/common/fs/zfs
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Overrides.
+#
+CFLAGS += $(CCVERBOSE)
+
+#
+# Depends on procfs and lx_brand
+#
+LDFLAGS += -dy -Nfs/procfs -Nbrand/lx_brand -Ndrv/inotify -Ndrv/ip
+LDFLAGS += -Nfs/sockfs
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
+LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
+
+#
+# Include brand-specific rules
+#
+
+include $(UTSBASE)/intel/lx_proc/Makefile.rules
diff --git a/usr/src/uts/intel/lx_proc/Makefile.rules b/usr/src/uts/intel/lx_proc/Makefile.rules
new file mode 100644
index 0000000000..b8592d2fdd
--- /dev/null
+++ b/usr/src/uts/intel/lx_proc/Makefile.rules
@@ -0,0 +1,38 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+
+#
+# Section 1a: C object build rules
+#
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/procfs/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+#
+# Section 1b: Lint `object' build rules.
+#
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/procfs/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/intel/lx_ptm/Makefile b/usr/src/uts/intel/lx_ptm/Makefile
new file mode 100644
index 0000000000..dcead27da7
--- /dev/null
+++ b/usr/src/uts/intel/lx_ptm/Makefile
@@ -0,0 +1,91 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# uts/intel/lx_ptm/Makefile
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# This makefile drives the production of the lx_ptm driver
+#
+# intel architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = lx_ptm
+OBJECTS = $(LX_PTM_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(LX_PTM_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/brand/lx/io
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(SRC_CONFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+CPPFLAGS += -I$(UTSBASE)/common/brand/lx
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/io/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/io/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/intel/lx_sysfs/Makefile b/usr/src/uts/intel/lx_sysfs/Makefile
new file mode 100644
index 0000000000..14e0603533
--- /dev/null
+++ b/usr/src/uts/intel/lx_sysfs/Makefile
@@ -0,0 +1,66 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2016 Joyent, Inc.
+#
+
+UTSBASE = ../..
+
+LX_CMN = $(SRC)/common/brand/lx
+
+MODULE = lx_sysfs
+OBJECTS = $(LX_SYS_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(LX_SYS_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_FS_DIR)/$(MODULE)
+
+INC_PATH += -I$(UTSBASE)/common/brand/lx -I$(LX_CMN)
+
+include $(UTSBASE)/intel/Makefile.intel
+
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+CFLAGS += $(CCVERBOSE)
+
+LDFLAGS += -dy -Nbrand/lx_brand -Ndrv/ip
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+# XXX JJ
+# LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
+# LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
+
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+include $(UTSBASE)/intel/Makefile.targ
+
+include $(UTSBASE)/intel/lx_sysfs/Makefile.rules
diff --git a/usr/src/uts/intel/lx_sysfs/Makefile.rules b/usr/src/uts/intel/lx_sysfs/Makefile.rules
new file mode 100644
index 0000000000..c9d4c28f85
--- /dev/null
+++ b/usr/src/uts/intel/lx_sysfs/Makefile.rules
@@ -0,0 +1,21 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc. All rights reserved.
+#
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/sysfs/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/sysfs/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/intel/lx_systrace/Makefile b/usr/src/uts/intel/lx_systrace/Makefile
new file mode 100644
index 0000000000..20c4a6a3a3
--- /dev/null
+++ b/usr/src/uts/intel/lx_systrace/Makefile
@@ -0,0 +1,80 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+UTSBASE = ../..
+
+MODULE = lx_systrace
+OBJECTS = $(LX_SYSTRACE_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(LX_SYSTRACE_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_DRV_DIR)/$(MODULE)
+ROOTLINK = $(USR_DTRACE_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/brand/lx/dtrace
+
+include $(UTSBASE)/intel/Makefile.intel
+
+ALL_TARGET = $(BINARY) $(SRC_CONFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE)
+
+CPPFLAGS += -I$(UTSBASE)/common/brand/lx
+
+LDFLAGS += -dy -Ndrv/dtrace -Nbrand/lx_brand
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS += -erroff=E_STATIC_UNUSED
+
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+$(ROOTLINK): $(USR_DTRACE_DIR) $(ROOTMODULE)
+ -$(RM) $@; ln $(ROOTMODULE) $@
+
+include $(UTSBASE)/intel/Makefile.targ
+
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/dtrace/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/dtrace/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/intel/lxautofs/Makefile b/usr/src/uts/intel/lxautofs/Makefile
new file mode 100644
index 0000000000..5de66af48f
--- /dev/null
+++ b/usr/src/uts/intel/lxautofs/Makefile
@@ -0,0 +1,114 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+# Copyright 2016 Joyent, Inc.
+#
+
+#
+# This makefile drives the production of the lxautofs file system
+# kernel module.
+#
+# i86 architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+# Note that the name of the actual filesystem is lxautofs and
+# not lx_autofs. This is becase filesystem names are stupidly
+# limited to 8 characters.
+#
+MODULE = lxautofs
+OBJECTS = $(LX_AUTOFS_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(LX_AUTOFS_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_DRV_DIR)/$(MODULE)
+ROOTLINK = $(USR_FS_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/brand/lx/autofs
+
+INC_PATH += -I$(UTSBASE)/common/brand/lx
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOTLINK) $(ROOT_CONFFILE)
+
+#
+# Overrides.
+#
+CFLAGS += $(CCVERBOSE)
+LDFLAGS += -dy -Nfs/nfs
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+$(ROOTLINK): $(ROOT_FS_DIR) $(ROOTMODULE)
+ -$(RM) $@; ln $(ROOTMODULE) $@
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
+
+#
+# Include brand-specific rules
+#
+
+include $(UTSBASE)/intel/lxautofs/Makefile.rules
diff --git a/usr/src/uts/intel/lxautofs/Makefile.rules b/usr/src/uts/intel/lxautofs/Makefile.rules
new file mode 100644
index 0000000000..474743ce9d
--- /dev/null
+++ b/usr/src/uts/intel/lxautofs/Makefile.rules
@@ -0,0 +1,38 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Section 1a: C object build rules
+#
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/lx/autofs/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
+#
+# Section 1b: Lint `object' build rules.
+#
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/lx/autofs/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/intel/lxprocfs/Makefile b/usr/src/uts/intel/lxprocfs/Makefile
new file mode 100644
index 0000000000..c6ffec0199
--- /dev/null
+++ b/usr/src/uts/intel/lxprocfs/Makefile
@@ -0,0 +1,88 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# uts/intel/lxprocfs/Makefile
+#
+# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# This makefile drives the production of the lxprocfs file system
+# kernel module.
+#
+# intel architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = lxprocfs
+OBJECTS = $(LXPROC_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(LXPROC_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_FS_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Depends on procfs
+#
+LDFLAGS += -dy -Nfs/procfs
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/nfp/Makefile b/usr/src/uts/intel/nfp/Makefile
new file mode 100644
index 0000000000..3acbc6a725
--- /dev/null
+++ b/usr/src/uts/intel/nfp/Makefile
@@ -0,0 +1,82 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014, Joyent, Inc
+#
+
+#
+# uts/intel/nfp/Makefile
+#
+# This makefile drives the production of the nfp
+# driver kernel module.
+#
+# intel architecture dependent
+#
+
+#
+# Paths to the base of the uts directory trees
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = nfp
+OBJECTS = $(NFP_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(NFP_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Driver-specific flags
+#
+CPPFLAGS += -DCH_KERNELVER=270
+LDFLAGS += -dy
+CERRWARN += -_gcc=-Wno-unused-variable
+CERRWARN += -_gcc=-Wno-unused-function
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/opteron_pcbe/Makefile b/usr/src/uts/intel/opteron_pcbe/Makefile
index aea8e89fbe..7431f50329 100644
--- a/usr/src/uts/intel/opteron_pcbe/Makefile
+++ b/usr/src/uts/intel/opteron_pcbe/Makefile
@@ -34,7 +34,7 @@ UTSBASE = ../..
MODULE = pcbe.AuthenticAMD
OBJECTS = $(OPTERON_PCBE_OBJS:%=$(OBJS_DIR)/%)
LINTS = $(OPTERON_PCBE_OBJS:%.o=$(LINTS_DIR)/%.ln)
-ROOTMODULE = $(USR_PCBE_DIR)/$(MODULE)
+ROOTMODULE = $(ROOT_PSM_PCBE_DIR)/$(MODULE)
#
# Include common rules.
diff --git a/usr/src/uts/intel/os/driver_aliases b/usr/src/uts/intel/os/driver_aliases
index 3116819932..1cea287121 100644
--- a/usr/src/uts/intel/os/driver_aliases
+++ b/usr/src/uts/intel/os/driver_aliases
@@ -1 +1,2 @@
asy "pci11c1,480"
+vmxnet "pci15ad,720"
diff --git a/usr/src/uts/intel/os/name_to_major b/usr/src/uts/intel/os/name_to_major
index c5ad4c9bf0..1fb86f9a50 100644
--- a/usr/src/uts/intel/os/name_to_major
+++ b/usr/src/uts/intel/os/name_to_major
@@ -2,3 +2,4 @@ md 85
devinfo 88
asy 106
did 239
+vmxnet 270
diff --git a/usr/src/uts/intel/overlay/Makefile b/usr/src/uts/intel/overlay/Makefile
new file mode 100644
index 0000000000..645b888cbc
--- /dev/null
+++ b/usr/src/uts/intel/overlay/Makefile
@@ -0,0 +1,54 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+UTSBASE = ../..
+
+MODULE = overlay
+OBJECTS = $(OVERLAY_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(OVERLAY_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+
+include $(UTSBASE)/intel/Makefile.intel
+
+ALL_TARGET = $(BINARY) $(SRC_CONFFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+CONF_SRCDIR = $(UTSBASE)/common/io/overlay
+MAPFILE = $(UTSBASE)/common/io/overlay/overlay.mapfile
+
+LDFLAGS += -dy -Nmisc/mac -Ndrv/dld -Nmisc/dls -Nmisc/ksocket
+
+LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
+
+
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/p123_pcbe/Makefile b/usr/src/uts/intel/p123_pcbe/Makefile
index 71b7d9bcec..d591d52e62 100644
--- a/usr/src/uts/intel/p123_pcbe/Makefile
+++ b/usr/src/uts/intel/p123_pcbe/Makefile
@@ -35,8 +35,8 @@ UTSBASE = ../..
MODULE = pcbe.GenuineIntel.6
OBJECTS = $(P123_PCBE_OBJS:%=$(OBJS_DIR)/%)
LINTS = $(P123_PCBE_OBJS:%.o=$(LINTS_DIR)/%.ln)
-ROOTMODULE = $(USR_PCBE_DIR)/$(MODULE)
-ROOTLINK = $(USR_PCBE_DIR)/pcbe.GenuineIntel.5
+ROOTMODULE = $(ROOT_PSM_PCBE_DIR)/$(MODULE)
+ROOTLINK = $(ROOT_PSM_PCBE_DIR)/pcbe.GenuineIntel.5
#
# Include common rules.
diff --git a/usr/src/uts/intel/p4_pcbe/Makefile b/usr/src/uts/intel/p4_pcbe/Makefile
index 07689646c1..42dc040eee 100644
--- a/usr/src/uts/intel/p4_pcbe/Makefile
+++ b/usr/src/uts/intel/p4_pcbe/Makefile
@@ -35,7 +35,7 @@ UTSBASE = ../..
MODULE = pcbe.GenuineIntel.15
OBJECTS = $(P4_PCBE_OBJS:%=$(OBJS_DIR)/%)
LINTS = $(P4_PCBE_OBJS:%.o=$(LINTS_DIR)/%.ln)
-ROOTMODULE = $(USR_PCBE_DIR)/$(MODULE)
+ROOTMODULE = $(ROOT_PSM_PCBE_DIR)/$(MODULE)
#
# Include common rules.
diff --git a/usr/src/uts/intel/promif/prom_emul.c b/usr/src/uts/intel/promif/prom_emul.c
index 5497d9eab8..cdf190ec6a 100644
--- a/usr/src/uts/intel/promif/prom_emul.c
+++ b/usr/src/uts/intel/promif/prom_emul.c
@@ -24,7 +24,9 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2015 Joyent, Inc. All rights reserved.
+ */
#include <sys/promif.h>
#include <sys/promimpl.h>
@@ -46,7 +48,7 @@ promif_create_prop(prom_node_t *pnp, char *name, void *val, int len, int flags)
q = kmem_zalloc(sizeof (*q), KM_SLEEP);
q->pp_name = kmem_zalloc(strlen(name) + 1, KM_SLEEP);
(void) strcpy(q->pp_name, name);
- q->pp_val = kmem_alloc(len, KM_SLEEP);
+ q->pp_val = len > 0 ? kmem_alloc(len, KM_SLEEP) : NULL;
q->pp_len = len;
switch (flags) {
case DDI_PROP_TYPE_INT:
diff --git a/usr/src/uts/intel/sys/acpi/acapps.h b/usr/src/uts/intel/sys/acpi/acapps.h
index 2d3f84186e..0382ca8547 100644
--- a/usr/src/uts/intel/sys/acpi/acapps.h
+++ b/usr/src/uts/intel/sys/acpi/acapps.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -52,7 +52,7 @@
/* Common info for tool signons */
#define ACPICA_NAME "Intel ACPI Component Architecture"
-#define ACPICA_COPYRIGHT "Copyright (c) 2000 - 2011 Intel Corporation"
+#define ACPICA_COPYRIGHT "Copyright (c) 2000 - 2012 Intel Corporation"
#if ACPI_MACHINE_WIDTH == 64
#define ACPI_WIDTH "-64"
@@ -81,6 +81,15 @@
Prefix, ACPICA_COPYRIGHT, \
Prefix
+/* Macros for usage messages */
+
+#define ACPI_USAGE_HEADER(Usage) \
+ printf ("Usage: %s\nOptions:\n", Usage);
+
+#define ACPI_OPTION(Name, Description) \
+ printf (" %-18s%s\n", Name, Description);
+
+
#define FILE_SUFFIX_DISASSEMBLY "dsl"
#define ACPI_TABLE_FILE_SUFFIX ".dat"
diff --git a/usr/src/uts/intel/sys/acpi/accommon.h b/usr/src/uts/intel/sys/acpi/accommon.h
index 58bb83ddb6..7e6d05aa8d 100644
--- a/usr/src/uts/intel/sys/acpi/accommon.h
+++ b/usr/src/uts/intel/sys/acpi/accommon.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/acpi/acconfig.h b/usr/src/uts/intel/sys/acpi/acconfig.h
index 7a2107aa76..7880516e47 100644
--- a/usr/src/uts/intel/sys/acpi/acconfig.h
+++ b/usr/src/uts/intel/sys/acpi/acconfig.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -86,6 +86,23 @@
*/
#define ACPI_CHECKSUM_ABORT FALSE
+/*
+ * Generate a version of ACPICA that only supports "reduced hardware"
+ * platforms (as defined in ACPI 5.0). Set to TRUE to generate a specialized
+ * version of ACPICA that ONLY supports the ACPI 5.0 "reduced hardware"
+ * model. In other words, no ACPI hardware is supported.
+ *
+ * If TRUE, this means no support for the following:
+ * PM Event and Control registers
+ * SCI interrupt (and handler)
+ * Fixed Events
+ * General Purpose Events (GPEs)
+ * Global Lock
+ * ACPI PM timer
+ * FACS table (Waking vectors and Global Lock)
+ */
+#define ACPI_REDUCED_HARDWARE FALSE
+
/******************************************************************************
*
@@ -95,7 +112,7 @@
/* Version of ACPI supported */
-#define ACPI_CA_SUPPORT_LEVEL 3
+#define ACPI_CA_SUPPORT_LEVEL 5
/* Maximum count for a semaphore object */
@@ -123,7 +140,11 @@
/* Maximum sleep allowed via Sleep() operator */
-#define ACPI_MAX_SLEEP 20000 /* Two seconds */
+#define ACPI_MAX_SLEEP 2000 /* 2000 millisec == two seconds */
+
+/* Address Range lists are per-SpaceId (Memory and I/O only) */
+
+#define ACPI_ADDRESS_RANGE_MAX 2
/******************************************************************************
@@ -183,9 +204,10 @@
#define ACPI_RSDP_CHECKSUM_LENGTH 20
#define ACPI_RSDP_XCHECKSUM_LENGTH 36
-/* SMBus and IPMI bidirectional buffer size */
+/* SMBus, GSBus and IPMI bidirectional buffer size */
#define ACPI_SMBUS_BUFFER_SIZE 34
+#define ACPI_GSBUS_BUFFER_SIZE 34
#define ACPI_IPMI_BUFFER_SIZE 66
/* _SxD and _SxW control methods */
diff --git a/usr/src/uts/intel/sys/acpi/acdebug.h b/usr/src/uts/intel/sys/acpi/acdebug.h
index 2c1fe73429..bcc505c8d5 100644
--- a/usr/src/uts/intel/sys/acpi/acdebug.h
+++ b/usr/src/uts/intel/sys/acpi/acdebug.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -107,6 +107,10 @@ AcpiDbDisplayTableInfo (
char *TableArg);
void
+AcpiDbDisplayTemplate (
+ char *BufferArg);
+
+void
AcpiDbUnloadAcpiTable (
char *TableArg,
char *InstanceArg);
@@ -133,18 +137,20 @@ void
AcpiDbDisplayResources (
char *ObjectArg);
+ACPI_HW_DEPENDENT_RETURN_VOID (
void
AcpiDbDisplayGpes (
- void);
+ void))
void
AcpiDbDisplayHandlers (
void);
+ACPI_HW_DEPENDENT_RETURN_VOID (
void
AcpiDbGenerateGpe (
char *GpeArg,
- char *BlockArg);
+ char *BlockArg))
/*
diff --git a/usr/src/uts/intel/sys/acpi/acdisasm.h b/usr/src/uts/intel/sys/acpi/acdisasm.h
index aaaac11d86..9ec12713b4 100644
--- a/usr/src/uts/intel/sys/acpi/acdisasm.h
+++ b/usr/src/uts/intel/sys/acpi/acdisasm.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -63,7 +63,7 @@
typedef const struct acpi_dmtable_info
{
UINT8 Opcode;
- UINT8 Offset;
+ UINT16 Offset;
char *Name;
UINT8 Flags;
@@ -80,57 +80,82 @@ typedef const struct acpi_dmtable_info
/*
* Values for Opcode above.
- * Note: 0-7 must not change, used as a flag shift value
+ * Note: 0-7 must not change, they are used as a flag shift value. Other
+ * than those, new values can be added wherever appropriate.
*/
-#define ACPI_DMT_FLAG0 0
-#define ACPI_DMT_FLAG1 1
-#define ACPI_DMT_FLAG2 2
-#define ACPI_DMT_FLAG3 3
-#define ACPI_DMT_FLAG4 4
-#define ACPI_DMT_FLAG5 5
-#define ACPI_DMT_FLAG6 6
-#define ACPI_DMT_FLAG7 7
-#define ACPI_DMT_FLAGS0 8
-#define ACPI_DMT_FLAGS2 9
-#define ACPI_DMT_UINT8 10
-#define ACPI_DMT_UINT16 11
-#define ACPI_DMT_UINT24 12
-#define ACPI_DMT_UINT32 13
-#define ACPI_DMT_UINT56 14
-#define ACPI_DMT_UINT64 15
-#define ACPI_DMT_STRING 16
-#define ACPI_DMT_NAME4 17
-#define ACPI_DMT_NAME6 18
-#define ACPI_DMT_NAME8 19
-#define ACPI_DMT_CHKSUM 20
-#define ACPI_DMT_SPACEID 21
-#define ACPI_DMT_GAS 22
-#define ACPI_DMT_ASF 23
-#define ACPI_DMT_DMAR 24
-#define ACPI_DMT_HEST 25
-#define ACPI_DMT_HESTNTFY 26
-#define ACPI_DMT_HESTNTYP 27
-#define ACPI_DMT_MADT 28
-#define ACPI_DMT_SRAT 29
-#define ACPI_DMT_EXIT 30
-#define ACPI_DMT_SIG 31
-#define ACPI_DMT_FADTPM 32
-#define ACPI_DMT_BUF16 33
-#define ACPI_DMT_IVRS 34
-#define ACPI_DMT_BUFFER 35
-#define ACPI_DMT_PCI_PATH 36
-#define ACPI_DMT_EINJACT 37
-#define ACPI_DMT_EINJINST 38
-#define ACPI_DMT_ERSTACT 39
-#define ACPI_DMT_ERSTINST 40
-#define ACPI_DMT_ACCWIDTH 41
-#define ACPI_DMT_UNICODE 42
-#define ACPI_DMT_UUID 43
-#define ACPI_DMT_DEVICE_PATH 44
-#define ACPI_DMT_LABEL 45
-#define ACPI_DMT_BUF7 46
-#define ACPI_DMT_BUF128 47
-#define ACPI_DMT_SLIC 48
+typedef enum
+{
+ /* Simple Data Types */
+
+ ACPI_DMT_FLAG0 = 0,
+ ACPI_DMT_FLAG1 = 1,
+ ACPI_DMT_FLAG2 = 2,
+ ACPI_DMT_FLAG3 = 3,
+ ACPI_DMT_FLAG4 = 4,
+ ACPI_DMT_FLAG5 = 5,
+ ACPI_DMT_FLAG6 = 6,
+ ACPI_DMT_FLAG7 = 7,
+ ACPI_DMT_FLAGS0,
+ ACPI_DMT_FLAGS1,
+ ACPI_DMT_FLAGS2,
+ ACPI_DMT_FLAGS4,
+ ACPI_DMT_UINT8,
+ ACPI_DMT_UINT16,
+ ACPI_DMT_UINT24,
+ ACPI_DMT_UINT32,
+ ACPI_DMT_UINT40,
+ ACPI_DMT_UINT48,
+ ACPI_DMT_UINT56,
+ ACPI_DMT_UINT64,
+ ACPI_DMT_BUF7,
+ ACPI_DMT_BUF16,
+ ACPI_DMT_BUF128,
+ ACPI_DMT_SIG,
+ ACPI_DMT_STRING,
+ ACPI_DMT_NAME4,
+ ACPI_DMT_NAME6,
+ ACPI_DMT_NAME8,
+
+ /* Types that are decoded to strings and miscellaneous */
+
+ ACPI_DMT_ACCWIDTH,
+ ACPI_DMT_CHKSUM,
+ ACPI_DMT_GAS,
+ ACPI_DMT_SPACEID,
+ ACPI_DMT_UNICODE,
+ ACPI_DMT_UUID,
+
+ /* Types used only for the Data Table Compiler */
+
+ ACPI_DMT_BUFFER,
+ ACPI_DMT_DEVICE_PATH,
+ ACPI_DMT_LABEL,
+ ACPI_DMT_PCI_PATH,
+
+ /* Types that are specific to particular ACPI tables */
+
+ ACPI_DMT_ASF,
+ ACPI_DMT_DMAR,
+ ACPI_DMT_EINJACT,
+ ACPI_DMT_EINJINST,
+ ACPI_DMT_ERSTACT,
+ ACPI_DMT_ERSTINST,
+ ACPI_DMT_FADTPM,
+ ACPI_DMT_HEST,
+ ACPI_DMT_HESTNTFY,
+ ACPI_DMT_HESTNTYP,
+ ACPI_DMT_IVRS,
+ ACPI_DMT_MADT,
+ ACPI_DMT_PMTT,
+ ACPI_DMT_SLIC,
+ ACPI_DMT_SRAT,
+
+ /* Special opcodes */
+
+ ACPI_DMT_EXTRA_TEXT,
+ ACPI_DMT_EXIT
+
+} ACPI_ENTRY_TYPES;
typedef
void (*ACPI_DMTABLE_HANDLER) (
@@ -175,6 +200,11 @@ ACPI_STATUS (*ASL_WALK_CALLBACK) (
#define ASL_WALK_CALLBACK_DEFINED
#endif
+typedef
+void (*ACPI_RESOURCE_HANDLER) (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level);
typedef struct acpi_resource_tag
{
@@ -202,6 +232,7 @@ extern ACPI_DMTABLE_INFO AcpiDmTableInfoAsf4[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoAsfHdr[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoBoot[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoBert[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoBgrt[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoCpep[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoCpep0[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoDbgp[];
@@ -212,6 +243,7 @@ extern ACPI_DMTABLE_INFO AcpiDmTableInfoDmar0[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoDmar1[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoDmar2[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoDmar3[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoDrtm[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoEcdt[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoEinj[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoEinj0[];
@@ -221,7 +253,13 @@ extern ACPI_DMTABLE_INFO AcpiDmTableInfoFacs[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoFadt1[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoFadt2[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoFadt3[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoFadt5[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoFpdt[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoFpdtHdr[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoFpdt0[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoFpdt1[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoGas[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoGtdt[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoHeader[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoHest[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoHest0[];
@@ -254,14 +292,34 @@ extern ACPI_DMTABLE_INFO AcpiDmTableInfoMadt7[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoMadt8[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoMadt9[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoMadt10[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoMadt11[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoMadt12[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoMadtHdr[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoMcfg[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoMcfg0[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoMchi[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoMpst[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoMpst0[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoMpst0A[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoMpst0B[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoMpst1[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoMpst2[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoMsct[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoMsct0[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoPmtt[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoPmtt0[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoPmtt1[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoPmtt1a[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoPmtt2[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoPmttHdr[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoPcct[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoPcct0[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoRsdp1[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoRsdp2[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoS3pt[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoS3ptHdr[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoS3pt0[];
+extern ACPI_DMTABLE_INFO AcpiDmTableInfoS3pt1[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoSbst[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoSlicHdr[];
extern ACPI_DMTABLE_INFO AcpiDmTableInfoSlic0[];
@@ -354,6 +412,10 @@ AcpiDmDumpFadt (
ACPI_TABLE_HEADER *Table);
void
+AcpiDmDumpFpdt (
+ ACPI_TABLE_HEADER *Table);
+
+void
AcpiDmDumpHest (
ACPI_TABLE_HEADER *Table);
@@ -362,17 +424,29 @@ AcpiDmDumpIvrs (
ACPI_TABLE_HEADER *Table);
void
+AcpiDmDumpMadt (
+ ACPI_TABLE_HEADER *Table);
+
+void
AcpiDmDumpMcfg (
ACPI_TABLE_HEADER *Table);
void
-AcpiDmDumpMadt (
+AcpiDmDumpMpst (
ACPI_TABLE_HEADER *Table);
void
AcpiDmDumpMsct (
ACPI_TABLE_HEADER *Table);
+void
+AcpiDmDumpPcct (
+ ACPI_TABLE_HEADER *Table);
+
+void
+AcpiDmDumpPmtt (
+ ACPI_TABLE_HEADER *Table);
+
UINT32
AcpiDmDumpRsdp (
ACPI_TABLE_HEADER *Table);
@@ -381,6 +455,10 @@ void
AcpiDmDumpRsdt (
ACPI_TABLE_HEADER *Table);
+UINT32
+AcpiDmDumpS3pt (
+ ACPI_TABLE_HEADER *Table);
+
void
AcpiDmDumpSlic (
ACPI_TABLE_HEADER *Table);
@@ -671,6 +749,18 @@ AcpiDmVendorLargeDescriptor (
UINT32 Level);
void
+AcpiDmGpioDescriptor (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level);
+
+void
+AcpiDmSerialBusDescriptor (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level);
+
+void
AcpiDmVendorCommon (
char *Name,
UINT8 *ByteData,
@@ -694,6 +784,12 @@ AcpiDmDmaDescriptor (
UINT32 Level);
void
+AcpiDmFixedDmaDescriptor (
+ AML_RESOURCE *Resource,
+ UINT32 Length,
+ UINT32 Level);
+
+void
AcpiDmIoDescriptor (
AML_RESOURCE *Resource,
UINT32 Length,
diff --git a/usr/src/uts/intel/sys/acpi/acdispat.h b/usr/src/uts/intel/sys/acpi/acdispat.h
index ae8dd93154..0699ee77a1 100644
--- a/usr/src/uts/intel/sys/acpi/acdispat.h
+++ b/usr/src/uts/intel/sys/acpi/acdispat.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/acpi/acevents.h b/usr/src/uts/intel/sys/acpi/acevents.h
index 8681ed5f38..3b874f1075 100644
--- a/usr/src/uts/intel/sys/acpi/acevents.h
+++ b/usr/src/uts/intel/sys/acpi/acevents.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -85,13 +85,15 @@ ACPI_STATUS
AcpiEvInitGlobalLockHandler (
void);
+ACPI_HW_DEPENDENT_RETURN_OK (
ACPI_STATUS
AcpiEvAcquireGlobalLock(
- UINT16 Timeout);
+ UINT16 Timeout))
+ACPI_HW_DEPENDENT_RETURN_OK (
ACPI_STATUS
AcpiEvReleaseGlobalLock(
- void);
+ void))
ACPI_STATUS
AcpiEvRemoveGlobalLockHandler (
@@ -154,9 +156,10 @@ AcpiEvInitializeGpeBlock (
ACPI_GPE_BLOCK_INFO *GpeBlock,
void *Context);
+ACPI_HW_DEPENDENT_RETURN_OK (
ACPI_STATUS
AcpiEvDeleteGpeBlock (
- ACPI_GPE_BLOCK_INFO *GpeBlock);
+ ACPI_GPE_BLOCK_INFO *GpeBlock))
UINT32
AcpiEvGpeDispatch (
@@ -171,9 +174,10 @@ ACPI_STATUS
AcpiEvGpeInitialize (
void);
+ACPI_HW_DEPENDENT_RETURN_VOID (
void
AcpiEvUpdateGpes (
- ACPI_OWNER_ID TableOwnerId);
+ ACPI_OWNER_ID TableOwnerId))
ACPI_STATUS
AcpiEvMatchGpeMethod (
@@ -228,7 +232,8 @@ AcpiEvInitializeOpRegions (
ACPI_STATUS
AcpiEvAddressSpaceDispatch (
- ACPI_OPERAND_OBJECT *RegionObj,
+ ACPI_OPERAND_OBJECT *RegionObj,
+ ACPI_OPERAND_OBJECT *FieldObj,
UINT32 Function,
UINT32 RegionOffset,
UINT32 BitWidth,
@@ -334,9 +339,9 @@ UINT32
AcpiEvInitializeSCI (
UINT32 ProgramSCI);
+ACPI_HW_DEPENDENT_RETURN_VOID (
void
AcpiEvTerminate (
- void);
-
+ void))
#endif /* __ACEVENTS_H__ */
diff --git a/usr/src/uts/intel/sys/acpi/acexcep.h b/usr/src/uts/intel/sys/acpi/acexcep.h
index 7d426d0f26..10f5a113da 100644
--- a/usr/src/uts/intel/sys/acpi/acexcep.h
+++ b/usr/src/uts/intel/sys/acpi/acexcep.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -92,8 +92,9 @@
#define AE_SAME_HANDLER (ACPI_STATUS) (0x0019 | AE_CODE_ENVIRONMENTAL)
#define AE_NO_HANDLER (ACPI_STATUS) (0x001A | AE_CODE_ENVIRONMENTAL)
#define AE_OWNER_ID_LIMIT (ACPI_STATUS) (0x001B | AE_CODE_ENVIRONMENTAL)
+#define AE_NOT_CONFIGURED (ACPI_STATUS) (0x001C | AE_CODE_ENVIRONMENTAL)
-#define AE_CODE_ENV_MAX 0x001B
+#define AE_CODE_ENV_MAX 0x001C
/*
@@ -222,7 +223,8 @@ char const *AcpiGbl_ExceptionNames_Env[] =
"AE_ABORT_METHOD",
"AE_SAME_HANDLER",
"AE_NO_HANDLER",
- "AE_OWNER_ID_LIMIT"
+ "AE_OWNER_ID_LIMIT",
+ "AE_NOT_CONFIGURED"
};
char const *AcpiGbl_ExceptionNames_Pgm[] =
diff --git a/usr/src/uts/intel/sys/acpi/acglobal.h b/usr/src/uts/intel/sys/acpi/acglobal.h
index ebbae0f595..79e53d07e2 100644
--- a/usr/src/uts/intel/sys/acpi/acglobal.h
+++ b/usr/src/uts/intel/sys/acpi/acglobal.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -128,6 +128,12 @@ UINT8 ACPI_INIT_GLOBAL (AcpiGbl_CopyDsdtLocally, FALSE);
*/
UINT8 ACPI_INIT_GLOBAL (AcpiGbl_TruncateIoAddresses, FALSE);
+/*
+ * Disable runtime checking and repair of values returned by control methods.
+ * Use only if the repair is causing a problem on a particular machine.
+ */
+UINT8 ACPI_INIT_GLOBAL (AcpiGbl_DisableAutoRepair, FALSE);
+
/* AcpiGbl_FADT is a local copy of the FADT, converted to a common format. */
@@ -137,7 +143,18 @@ UINT32 AcpiGbl_TraceFlags;
ACPI_NAME AcpiGbl_TraceMethodName;
BOOLEAN AcpiGbl_SystemAwakeAndRunning;
-#endif
+/*
+ * ACPI 5.0 introduces the concept of a "reduced hardware platform", meaning
+ * that the ACPI hardware is no longer required. A flag in the FADT indicates
+ * a reduced HW machine, and that flag is duplicated here for convenience.
+ */
+BOOLEAN AcpiGbl_ReducedHardware;
+
+#endif /* DEFINE_ACPI_GLOBALS */
+
+/* Do not disassemble buffers to resource descriptors */
+
+ACPI_EXTERN UINT8 ACPI_INIT_GLOBAL (AcpiGbl_NoResourceDisassembly, FALSE);
/*****************************************************************************
*
@@ -150,8 +167,12 @@ BOOLEAN AcpiGbl_SystemAwakeAndRunning;
* found in the RSDT/XSDT.
*/
ACPI_EXTERN ACPI_TABLE_LIST AcpiGbl_RootTableList;
+
+#if (!ACPI_REDUCED_HARDWARE)
ACPI_EXTERN ACPI_TABLE_FACS *AcpiGbl_FACS;
+#endif /* !ACPI_REDUCED_HARDWARE */
+
/* These addresses are calculated from the FADT Event Block addresses */
ACPI_EXTERN ACPI_GENERIC_ADDRESS AcpiGbl_XPm1aStatus;
@@ -177,7 +198,7 @@ ACPI_EXTERN UINT8 AcpiGbl_IntegerNybbleWidth;
/*****************************************************************************
*
- * Mutual exlusion within ACPICA subsystem
+ * Mutual exclusion within ACPICA subsystem
*
****************************************************************************/
@@ -233,8 +254,7 @@ ACPI_EXTERN ACPI_CACHE_T *AcpiGbl_OperandCache;
/* Global handlers */
-ACPI_EXTERN ACPI_OBJECT_NOTIFY_HANDLER AcpiGbl_DeviceNotify;
-ACPI_EXTERN ACPI_OBJECT_NOTIFY_HANDLER AcpiGbl_SystemNotify;
+ACPI_EXTERN ACPI_GLOBAL_NOTIFY_HANDLER AcpiGbl_GlobalNotify[2];
ACPI_EXTERN ACPI_EXCEPTION_HANDLER AcpiGbl_ExceptionHandler;
ACPI_EXTERN ACPI_INIT_HANDLER AcpiGbl_InitHandler;
ACPI_EXTERN ACPI_TABLE_HANDLER AcpiGbl_TableHandler;
@@ -265,6 +285,7 @@ ACPI_EXTERN BOOLEAN AcpiGbl_AcpiHardwarePresent;
ACPI_EXTERN BOOLEAN AcpiGbl_EventsInitialized;
ACPI_EXTERN UINT8 AcpiGbl_OsiData;
ACPI_EXTERN ACPI_INTERFACE_INFO *AcpiGbl_SupportedInterfaces;
+ACPI_EXTERN ACPI_ADDRESS_RANGE *AcpiGbl_AddressRangeList[ACPI_ADDRESS_RANGE_MAX];
#ifndef DEFINE_ACPI_GLOBALS
@@ -362,6 +383,8 @@ ACPI_EXTERN UINT8 AcpiGbl_SleepTypeB;
*
****************************************************************************/
+#if (!ACPI_REDUCED_HARDWARE)
+
ACPI_EXTERN UINT8 AcpiGbl_AllGpesInitialized;
ACPI_EXTERN ACPI_GPE_XRUPT_INFO *AcpiGbl_GpeXruptListHead;
ACPI_EXTERN ACPI_GPE_BLOCK_INFO *AcpiGbl_GpeFadtBlocks[ACPI_MAX_GPE_BLOCKS];
@@ -370,6 +393,7 @@ ACPI_EXTERN void *AcpiGbl_GlobalEventHandlerContext;
ACPI_EXTERN ACPI_FIXED_EVENT_HANDLER AcpiGbl_FixedEventHandlers[ACPI_NUM_FIXED_EVENTS];
extern ACPI_FIXED_EVENT_INFO AcpiGbl_FixedEventInfo[ACPI_NUM_FIXED_EVENTS];
+#endif /* !ACPI_REDUCED_HARDWARE */
/*****************************************************************************
*
diff --git a/usr/src/uts/intel/sys/acpi/achware.h b/usr/src/uts/intel/sys/acpi/achware.h
index ae4a77a9f2..f607d1702a 100644
--- a/usr/src/uts/intel/sys/acpi/achware.h
+++ b/usr/src/uts/intel/sys/acpi/achware.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -110,6 +110,49 @@ AcpiHwClearAcpiStatus (
/*
+ * hwsleep - sleep/wake support (Legacy sleep registers)
+ */
+ACPI_STATUS
+AcpiHwLegacySleep (
+ UINT8 SleepState,
+ UINT8 Flags);
+
+ACPI_STATUS
+AcpiHwLegacyWakePrep (
+ UINT8 SleepState,
+ UINT8 Flags);
+
+ACPI_STATUS
+AcpiHwLegacyWake (
+ UINT8 SleepState,
+ UINT8 Flags);
+
+
+/*
+ * hwesleep - sleep/wake support (Extended FADT-V5 sleep registers)
+ */
+void
+AcpiHwExecuteSleepMethod (
+ char *MethodName,
+ UINT32 IntegerArgument);
+
+ACPI_STATUS
+AcpiHwExtendedSleep (
+ UINT8 SleepState,
+ UINT8 Flags);
+
+ACPI_STATUS
+AcpiHwExtendedWakePrep (
+ UINT8 SleepState,
+ UINT8 Flags);
+
+ACPI_STATUS
+AcpiHwExtendedWake (
+ UINT8 SleepState,
+ UINT8 Flags);
+
+
+/*
* hwvalid - Port I/O with validation
*/
ACPI_STATUS
@@ -188,22 +231,4 @@ AcpiHwDerivePciId (
ACPI_HANDLE PciRegion);
-/*
- * hwtimer - ACPI Timer prototypes
- */
-ACPI_STATUS
-AcpiGetTimerResolution (
- UINT32 *Resolution);
-
-ACPI_STATUS
-AcpiGetTimer (
- UINT32 *Ticks);
-
-ACPI_STATUS
-AcpiGetTimerDuration (
- UINT32 StartTicks,
- UINT32 EndTicks,
- UINT32 *TimeElapsed);
-
-
#endif /* __ACHWARE_H__ */
diff --git a/usr/src/uts/intel/sys/acpi/acinterp.h b/usr/src/uts/intel/sys/acpi/acinterp.h
index 8ac828c59c..45e9814f79 100644
--- a/usr/src/uts/intel/sys/acpi/acinterp.h
+++ b/usr/src/uts/intel/sys/acpi/acinterp.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -642,6 +642,10 @@ AcpiExIntegerToString (
char *Dest,
UINT64 Value);
+BOOLEAN
+AcpiIsValidSpaceId (
+ UINT8 SpaceId);
+
/*
* exregion - default OpRegion handlers
diff --git a/usr/src/uts/intel/sys/acpi/aclocal.h b/usr/src/uts/intel/sys/acpi/aclocal.h
index 2eb6fc21d4..68ea95b7c8 100644
--- a/usr/src/uts/intel/sys/acpi/aclocal.h
+++ b/usr/src/uts/intel/sys/acpi/aclocal.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -54,7 +54,7 @@ typedef UINT32 ACPI_MUTEX_HANDLE;
/* Total number of aml opcodes defined */
-#define AML_NUM_OPCODES 0x7F
+#define AML_NUM_OPCODES 0x81
/* Forward declarations */
@@ -213,7 +213,6 @@ typedef struct acpi_namespace_node
#define ANOBJ_IS_EXTERNAL 0x08 /* iASL only: This object created via External() */
#define ANOBJ_METHOD_NO_RETVAL 0x10 /* iASL only: Method has no return value */
#define ANOBJ_METHOD_SOME_NO_RETVAL 0x20 /* iASL only: Method has at least one return value */
-#define ANOBJ_IS_BIT_OFFSET 0x40 /* iASL only: Reference is a bit offset */
#define ANOBJ_IS_REFERENCED 0x80 /* iASL only: Object was referenced */
@@ -286,12 +285,16 @@ typedef struct acpi_create_field_info
ACPI_NAMESPACE_NODE *FieldNode;
ACPI_NAMESPACE_NODE *RegisterNode;
ACPI_NAMESPACE_NODE *DataRegisterNode;
+ ACPI_NAMESPACE_NODE *ConnectionNode;
+ UINT8 *ResourceBuffer;
UINT32 BankValue;
UINT32 FieldBitPosition;
UINT32 FieldBitLength;
+ UINT16 ResourceLength;
UINT8 FieldFlags;
UINT8 Attribute;
UINT8 FieldType;
+ UINT8 AccessLength;
} ACPI_CREATE_FIELD_INFO;
@@ -359,7 +362,8 @@ typedef struct acpi_name_info
/*
* Used for ACPI_PTYPE1_FIXED, ACPI_PTYPE1_VAR, ACPI_PTYPE2,
- * ACPI_PTYPE2_MIN, ACPI_PTYPE2_PKG_COUNT, ACPI_PTYPE2_COUNT
+ * ACPI_PTYPE2_MIN, ACPI_PTYPE2_PKG_COUNT, ACPI_PTYPE2_COUNT,
+ * ACPI_PTYPE2_FIX_VAR
*/
typedef struct acpi_package_info
{
@@ -411,6 +415,7 @@ typedef struct acpi_predefined_data
char *Pathname;
const ACPI_PREDEFINED_INFO *Predefined;
union acpi_operand_object *ParentPackage;
+ ACPI_NAMESPACE_NODE *Node;
UINT32 Flags;
UINT8 NodeFlags;
@@ -419,6 +424,7 @@ typedef struct acpi_predefined_data
/* Defines for Flags field above */
#define ACPI_OBJECT_REPAIRED 1
+#define ACPI_OBJECT_WRAPPED 2
/*
@@ -710,6 +716,15 @@ ACPI_STATUS (*ACPI_PARSE_UPWARDS) (
struct acpi_walk_state *WalkState);
+/* Global handlers for AML Notifies */
+
+typedef struct acpi_global_notify_handler
+{
+ ACPI_NOTIFY_HANDLER Handler;
+ void *Context;
+
+} ACPI_GLOBAL_NOTIFY_HANDLER;
+
/*
* Notify info - used to pass info to the deferred notify
* handler/dispatcher.
@@ -717,8 +732,10 @@ ACPI_STATUS (*ACPI_PARSE_UPWARDS) (
typedef struct acpi_notify_info
{
ACPI_STATE_COMMON
+ UINT8 HandlerListId;
ACPI_NAMESPACE_NODE *Node;
- union acpi_operand_object *HandlerObj;
+ union acpi_operand_object *HandlerListHead;
+ ACPI_GLOBAL_NOTIFY_HANDLER *Global;
} ACPI_NOTIFY_INFO;
@@ -750,6 +767,17 @@ typedef
ACPI_STATUS (*ACPI_EXECUTE_OP) (
struct acpi_walk_state *WalkState);
+/* Address Range info block */
+
+typedef struct acpi_address_range
+{
+ struct acpi_address_range *Next;
+ ACPI_NAMESPACE_NODE *RegionNode;
+ ACPI_PHYSICAL_ADDRESS StartAddress;
+ ACPI_PHYSICAL_ADDRESS EndAddress;
+
+} ACPI_ADDRESS_RANGE;
+
/*****************************************************************************
*
@@ -774,6 +802,17 @@ typedef struct acpi_opcode_info
} ACPI_OPCODE_INFO;
+/* Structure for Resource Tag information */
+
+typedef struct acpi_tag_info
+{
+ UINT32 BitOffset;
+ UINT32 BitLength;
+
+} ACPI_TAG_INFO;
+
+/* Value associated with the parse object */
+
typedef union acpi_parse_value
{
UINT64 Integer; /* Integer constant (Up to 64 bits) */
@@ -782,6 +821,7 @@ typedef union acpi_parse_value
UINT8 *Buffer; /* buffer or string */
char *Name; /* NULL terminated string */
union acpi_parse_object *Arg; /* arguments and contained ops */
+ ACPI_TAG_INFO Tag; /* Resource descriptor tag info */
} ACPI_PARSE_VALUE;
@@ -1112,7 +1152,7 @@ typedef struct acpi_port_info
#define ACPI_RESOURCE_NAME_END_DEPENDENT 0x38
#define ACPI_RESOURCE_NAME_IO 0x40
#define ACPI_RESOURCE_NAME_FIXED_IO 0x48
-#define ACPI_RESOURCE_NAME_RESERVED_S1 0x50
+#define ACPI_RESOURCE_NAME_FIXED_DMA 0x50
#define ACPI_RESOURCE_NAME_RESERVED_S2 0x58
#define ACPI_RESOURCE_NAME_RESERVED_S3 0x60
#define ACPI_RESOURCE_NAME_RESERVED_S4 0x68
@@ -1134,7 +1174,9 @@ typedef struct acpi_port_info
#define ACPI_RESOURCE_NAME_EXTENDED_IRQ 0x89
#define ACPI_RESOURCE_NAME_ADDRESS64 0x8A
#define ACPI_RESOURCE_NAME_EXTENDED_ADDRESS64 0x8B
-#define ACPI_RESOURCE_NAME_LARGE_MAX 0x8B
+#define ACPI_RESOURCE_NAME_GPIO 0x8C
+#define ACPI_RESOURCE_NAME_SERIAL_BUS 0x8E
+#define ACPI_RESOURCE_NAME_LARGE_MAX 0x8E
/*****************************************************************************
diff --git a/usr/src/uts/intel/sys/acpi/acmacros.h b/usr/src/uts/intel/sys/acpi/acmacros.h
index 6581d111c9..cdb9cb109e 100644
--- a/usr/src/uts/intel/sys/acpi/acmacros.h
+++ b/usr/src/uts/intel/sys/acpi/acmacros.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -428,7 +428,6 @@
#endif /* ACPI_SIMPLE_RETURN_MACROS */
-
/* Conditional execution */
#define ACPI_DEBUG_EXEC(a) a
@@ -477,6 +476,14 @@
#endif /* ACPI_DEBUG_OUTPUT */
+
+#if (!ACPI_REDUCED_HARDWARE)
+#define ACPI_HW_OPTIONAL_FUNCTION(addr) addr
+#else
+#define ACPI_HW_OPTIONAL_FUNCTION(addr) NULL
+#endif
+
+
/*
* Some code only gets executed when the debugger is built in.
* Note that this is entirely independent of whether the
diff --git a/usr/src/uts/intel/sys/acpi/acnames.h b/usr/src/uts/intel/sys/acpi/acnames.h
index e08302dd30..4c5fab696f 100644
--- a/usr/src/uts/intel/sys/acpi/acnames.h
+++ b/usr/src/uts/intel/sys/acpi/acnames.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -46,6 +46,7 @@
/* Method names - these methods can appear anywhere in the namespace */
+#define METHOD_NAME__SB_ "_SB_"
#define METHOD_NAME__HID "_HID"
#define METHOD_NAME__CID "_CID"
#define METHOD_NAME__UID "_UID"
@@ -58,16 +59,17 @@
#define METHOD_NAME__PRT "_PRT"
#define METHOD_NAME__CRS "_CRS"
#define METHOD_NAME__PRS "_PRS"
+#define METHOD_NAME__AEI "_AEI"
#define METHOD_NAME__PRW "_PRW"
#define METHOD_NAME__SRS "_SRS"
/* Method names - these methods must appear at the namespace root */
-#define METHOD_NAME__BFS "\\_BFS"
-#define METHOD_NAME__GTS "\\_GTS"
-#define METHOD_NAME__PTS "\\_PTS"
-#define METHOD_NAME__SST "\\_SI._SST"
-#define METHOD_NAME__WAK "\\_WAK"
+#define METHOD_PATHNAME__BFS "\\_BFS"
+#define METHOD_PATHNAME__GTS "\\_GTS"
+#define METHOD_PATHNAME__PTS "\\_PTS"
+#define METHOD_PATHNAME__SST "\\_SI._SST"
+#define METHOD_PATHNAME__WAK "\\_WAK"
/* Definitions of the predefined namespace names */
@@ -78,7 +80,6 @@
#define ACPI_PREFIX_LOWER (UINT32) 0x69706361 /* "acpi" */
#define ACPI_NS_ROOT_PATH "\\"
-#define ACPI_NS_SYSTEM_BUS "_SB_"
#endif /* __ACNAMES_H__ */
diff --git a/usr/src/uts/intel/sys/acpi/acnamesp.h b/usr/src/uts/intel/sys/acpi/acnamesp.h
index 77f65d0ae4..4a91108eae 100644
--- a/usr/src/uts/intel/sys/acpi/acnamesp.h
+++ b/usr/src/uts/intel/sys/acpi/acnamesp.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -368,8 +368,9 @@ AcpiNsRepairObject (
ACPI_OPERAND_OBJECT **ReturnObjectPtr);
ACPI_STATUS
-AcpiNsRepairPackageList (
+AcpiNsWrapWithPackage (
ACPI_PREDEFINED_DATA *Data,
+ ACPI_OPERAND_OBJECT *OriginalObject,
ACPI_OPERAND_OBJECT **ObjDescPtr);
ACPI_STATUS
diff --git a/usr/src/uts/intel/sys/acpi/acobject.h b/usr/src/uts/intel/sys/acpi/acobject.h
index 27a21e6858..f2f8a1cfb1 100644
--- a/usr/src/uts/intel/sys/acpi/acobject.h
+++ b/usr/src/uts/intel/sys/acpi/acobject.h
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -246,8 +246,7 @@ typedef struct acpi_object_method
* Common fields for objects that support ASL notifications
*/
#define ACPI_COMMON_NOTIFY_INFO \
- union acpi_operand_object *SystemNotify; /* Handler for system notifies */\
- union acpi_operand_object *DeviceNotify; /* Handler for driver notifies */\
+ union acpi_operand_object *NotifyList[2]; /* Handlers for system/device notifies */\
union acpi_operand_object *Handler; /* Handler for Address space */
@@ -320,6 +319,7 @@ typedef struct acpi_object_thermal_zone
UINT32 BaseByteOffset; /* Byte offset within containing object */\
UINT32 Value; /* Value to store into the Bank or Index register */\
UINT8 StartFieldBitOffset;/* Bit offset within first field datum (0-63) */\
+ UINT8 AccessLength; /* For serial regions/fields */
typedef struct acpi_object_field_common /* COMMON FIELD (for BUFFER, REGION, BANK, and INDEX fields) */
@@ -335,7 +335,9 @@ typedef struct acpi_object_region_field
{
ACPI_OBJECT_COMMON_HEADER
ACPI_COMMON_FIELD_INFO
+ UINT16 ResourceLength;
union acpi_operand_object *RegionObj; /* Containing OpRegion object */
+ UINT8 *ResourceBuffer; /* ResourceTemplate for serial regions/fields */
} ACPI_OBJECT_REGION_FIELD;
@@ -386,8 +388,10 @@ typedef struct acpi_object_notify_handler
{
ACPI_OBJECT_COMMON_HEADER
ACPI_NAMESPACE_NODE *Node; /* Parent device */
- ACPI_NOTIFY_HANDLER Handler;
+ UINT32 HandlerType; /* Type: Device/System/Both */
+ ACPI_NOTIFY_HANDLER Handler; /* Handler addess */
void *Context;
+ union acpi_operand_object *Next[2]; /* Device and System handler lists */
} ACPI_OBJECT_NOTIFY_HANDLER;
@@ -463,6 +467,7 @@ typedef struct acpi_object_extra
{
ACPI_OBJECT_COMMON_HEADER
ACPI_NAMESPACE_NODE *Method_REG; /* _REG method for this region (if any) */
+ ACPI_NAMESPACE_NODE *ScopeNode;
void *RegionContext; /* Region-specific data */
UINT8 *AmlStart;
UINT32 AmlLength;
diff --git a/usr/src/uts/intel/sys/acpi/acopcode.h b/usr/src/uts/intel/sys/acpi/acopcode.h
index d8e1c75a7b..e182a07e61 100644
--- a/usr/src/uts/intel/sys/acpi/acopcode.h
+++ b/usr/src/uts/intel/sys/acpi/acopcode.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -94,6 +94,7 @@
#define ARGP_CONCAT_OP ARGP_LIST3 (ARGP_TERMARG, ARGP_TERMARG, ARGP_TARGET)
#define ARGP_CONCAT_RES_OP ARGP_LIST3 (ARGP_TERMARG, ARGP_TERMARG, ARGP_TARGET)
#define ARGP_COND_REF_OF_OP ARGP_LIST2 (ARGP_SUPERNAME, ARGP_SUPERNAME)
+#define ARGP_CONNECTFIELD_OP ARGP_LIST1 (ARGP_NAMESTRING)
#define ARGP_CONTINUE_OP ARG_NONE
#define ARGP_COPY_OP ARGP_LIST2 (ARGP_TERMARG, ARGP_SIMPLENAME)
#define ARGP_CREATE_BIT_FIELD_OP ARGP_LIST3 (ARGP_TERMARG, ARGP_TERMARG, ARGP_NAME)
@@ -165,6 +166,7 @@
#define ARGP_RETURN_OP ARGP_LIST1 (ARGP_TERMARG)
#define ARGP_REVISION_OP ARG_NONE
#define ARGP_SCOPE_OP ARGP_LIST3 (ARGP_PKGLENGTH, ARGP_NAME, ARGP_TERMLIST)
+#define ARGP_SERIALFIELD_OP ARGP_LIST1 (ARGP_NAMESTRING)
#define ARGP_SHIFT_LEFT_OP ARGP_LIST3 (ARGP_TERMARG, ARGP_TERMARG, ARGP_TARGET)
#define ARGP_SHIFT_RIGHT_OP ARGP_LIST3 (ARGP_TERMARG, ARGP_TERMARG, ARGP_TARGET)
#define ARGP_SIGNAL_OP ARGP_LIST1 (ARGP_SUPERNAME)
@@ -225,6 +227,7 @@
#define ARGI_CONCAT_OP ARGI_LIST3 (ARGI_COMPUTEDATA,ARGI_COMPUTEDATA, ARGI_TARGETREF)
#define ARGI_CONCAT_RES_OP ARGI_LIST3 (ARGI_BUFFER, ARGI_BUFFER, ARGI_TARGETREF)
#define ARGI_COND_REF_OF_OP ARGI_LIST2 (ARGI_OBJECT_REF, ARGI_TARGETREF)
+#define ARGI_CONNECTFIELD_OP ARGI_INVALID_OPCODE
#define ARGI_CONTINUE_OP ARGI_INVALID_OPCODE
#define ARGI_COPY_OP ARGI_LIST2 (ARGI_ANYTYPE, ARGI_SIMPLE_TARGET)
#define ARGI_CREATE_BIT_FIELD_OP ARGI_LIST3 (ARGI_BUFFER, ARGI_INTEGER, ARGI_REFERENCE)
@@ -296,6 +299,7 @@
#define ARGI_RETURN_OP ARGI_INVALID_OPCODE
#define ARGI_REVISION_OP ARG_NONE
#define ARGI_SCOPE_OP ARGI_INVALID_OPCODE
+#define ARGI_SERIALFIELD_OP ARGI_INVALID_OPCODE
#define ARGI_SHIFT_LEFT_OP ARGI_LIST3 (ARGI_INTEGER, ARGI_INTEGER, ARGI_TARGETREF)
#define ARGI_SHIFT_RIGHT_OP ARGI_LIST3 (ARGI_INTEGER, ARGI_INTEGER, ARGI_TARGETREF)
#define ARGI_SIGNAL_OP ARGI_LIST1 (ARGI_EVENT)
diff --git a/usr/src/uts/intel/sys/acpi/acoutput.h b/usr/src/uts/intel/sys/acpi/acoutput.h
index c8007e21fb..a1d89d408c 100644
--- a/usr/src/uts/intel/sys/acpi/acoutput.h
+++ b/usr/src/uts/intel/sys/acpi/acoutput.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -72,6 +72,7 @@
#define ACPI_EXAMPLE 0x00004000
#define ACPI_DRIVER 0x00008000
#define DT_COMPILER 0x00010000
+#define ASL_PREPROCESSOR 0x00020000
#define ACPI_ALL_COMPONENTS 0x0001FFFF
#define ACPI_COMPONENT_DEFAULT (ACPI_ALL_COMPONENTS)
diff --git a/usr/src/uts/intel/sys/acpi/acparser.h b/usr/src/uts/intel/sys/acpi/acparser.h
index 83b3c4fc06..f46d3cbc2c 100644
--- a/usr/src/uts/intel/sys/acpi/acparser.h
+++ b/usr/src/uts/intel/sys/acpi/acparser.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/acpi/acpi.h b/usr/src/uts/intel/sys/acpi/acpi.h
index 0f4f607120..aa40219112 100644
--- a/usr/src/uts/intel/sys/acpi/acpi.h
+++ b/usr/src/uts/intel/sys/acpi/acpi.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/acpi/acpiosxf.h b/usr/src/uts/intel/sys/acpi/acpiosxf.h
index 85b0ff6108..b79d0e6ade 100644
--- a/usr/src/uts/intel/sys/acpi/acpiosxf.h
+++ b/usr/src/uts/intel/sys/acpi/acpiosxf.h
@@ -9,7 +9,7 @@
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -112,6 +112,12 @@ AcpiOsTableOverride (
ACPI_TABLE_HEADER *ExistingTable,
ACPI_TABLE_HEADER **NewTable);
+ACPI_STATUS
+AcpiOsPhysicalTableOverride (
+ ACPI_TABLE_HEADER *ExistingTable,
+ ACPI_PHYSICAL_ADDRESS *NewAddress,
+ UINT32 *NewTableLength);
+
/*
* Spinlock primitives
@@ -302,13 +308,13 @@ AcpiOsWritePort (
ACPI_STATUS
AcpiOsReadMemory (
ACPI_PHYSICAL_ADDRESS Address,
- UINT32 *Value,
+ UINT64 *Value,
UINT32 Width);
ACPI_STATUS
AcpiOsWriteMemory (
ACPI_PHYSICAL_ADDRESS Address,
- UINT32 Value,
+ UINT64 Value,
UINT32 Width);
diff --git a/usr/src/uts/intel/sys/acpi/acpixf.h b/usr/src/uts/intel/sys/acpi/acpixf.h
index 82b677e615..7478c78940 100644
--- a/usr/src/uts/intel/sys/acpi/acpixf.h
+++ b/usr/src/uts/intel/sys/acpi/acpixf.h
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -48,8 +48,9 @@
/* Current ACPICA subsystem version in YYYYMMDD format */
-#define ACPI_CA_VERSION 0x20110527
+#define ACPI_CA_VERSION 0x20120420
+#include "acconfig.h"
#include "actypes.h"
#include "actbl.h"
@@ -59,6 +60,7 @@
extern UINT32 AcpiCurrentGpeCount;
extern ACPI_TABLE_FADT AcpiGbl_FADT;
extern BOOLEAN AcpiGbl_SystemAwakeAndRunning;
+extern BOOLEAN AcpiGbl_ReducedHardware; /* ACPI 5.0 */
/* Runtime configuration of debug print levels */
@@ -76,6 +78,35 @@ extern UINT32 AcpiGbl_TraceFlags;
extern UINT8 AcpiGbl_EnableAmlDebugObject;
extern UINT8 AcpiGbl_CopyDsdtLocally;
extern UINT8 AcpiGbl_TruncateIoAddresses;
+extern UINT8 AcpiGbl_DisableAutoRepair;
+
+
+/*
+ * Hardware-reduced prototypes. All interfaces that use these macros will
+ * be configured out of the ACPICA build if the ACPI_REDUCED_HARDWARE flag
+ * is set to TRUE.
+ */
+#if (!ACPI_REDUCED_HARDWARE)
+#define ACPI_HW_DEPENDENT_RETURN_STATUS(Prototype) \
+ Prototype;
+
+#define ACPI_HW_DEPENDENT_RETURN_OK(Prototype) \
+ Prototype;
+
+#define ACPI_HW_DEPENDENT_RETURN_VOID(Prototype) \
+ Prototype;
+
+#else
+#define ACPI_HW_DEPENDENT_RETURN_STATUS(Prototype) \
+ static ACPI_INLINE Prototype {return(AE_NOT_CONFIGURED);}
+
+#define ACPI_HW_DEPENDENT_RETURN_OK(Prototype) \
+ static ACPI_INLINE Prototype {return(AE_OK);}
+
+#define ACPI_HW_DEPENDENT_RETURN_VOID(Prototype) \
+ static ACPI_INLINE Prototype {}
+
+#endif /* !ACPI_REDUCED_HARDWARE */
/*
@@ -107,13 +138,15 @@ AcpiTerminate (
/*
* Miscellaneous global interfaces
*/
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiEnable (
- void);
+ void))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiDisable (
- void);
+ void))
ACPI_STATUS
AcpiSubsystemStatus (
@@ -143,6 +176,13 @@ ACPI_STATUS
AcpiRemoveInterface (
ACPI_STRING InterfaceName);
+UINT32
+AcpiCheckAddressRange (
+ ACPI_ADR_SPACE_TYPE SpaceId,
+ ACPI_PHYSICAL_ADDRESS Address,
+ ACPI_SIZE Length,
+ BOOLEAN Warn);
+
/*
* ACPI Memory management
@@ -312,35 +352,40 @@ AcpiInstallInitializationHandler (
ACPI_INIT_HANDLER Handler,
UINT32 Function);
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiInstallGlobalEventHandler (
ACPI_GBL_EVENT_HANDLER Handler,
- void *Context);
+ void *Context))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiInstallFixedEventHandler (
UINT32 AcpiEvent,
ACPI_EVENT_HANDLER Handler,
- void *Context);
+ void *Context))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiRemoveFixedEventHandler (
UINT32 AcpiEvent,
- ACPI_EVENT_HANDLER Handler);
+ ACPI_EVENT_HANDLER Handler))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiInstallGpeHandler (
ACPI_HANDLE GpeDevice,
UINT32 GpeNumber,
UINT32 Type,
ACPI_GPE_HANDLER Address,
- void *Context);
+ void *Context))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiRemoveGpeHandler (
ACPI_HANDLE GpeDevice,
UINT32 GpeNumber,
- ACPI_GPE_HANDLER Address);
+ ACPI_GPE_HANDLER Address))
ACPI_STATUS
AcpiInstallNotifyHandler (
@@ -381,113 +426,148 @@ AcpiInstallInterfaceHandler (
/*
* Global Lock interfaces
*/
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiAcquireGlobalLock (
UINT16 Timeout,
- UINT32 *Handle);
+ UINT32 *Handle))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiReleaseGlobalLock (
- UINT32 Handle);
+ UINT32 Handle))
+
+
+/*
+ * Interfaces to AML mutex objects
+ */
+ACPI_STATUS
+AcpiAcquireMutex (
+ ACPI_HANDLE Handle,
+ ACPI_STRING Pathname,
+ UINT16 Timeout);
+
+ACPI_STATUS
+AcpiReleaseMutex (
+ ACPI_HANDLE Handle,
+ ACPI_STRING Pathname);
/*
* Fixed Event interfaces
*/
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiEnableEvent (
UINT32 Event,
- UINT32 Flags);
+ UINT32 Flags))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiDisableEvent (
UINT32 Event,
- UINT32 Flags);
+ UINT32 Flags))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiClearEvent (
- UINT32 Event);
+ UINT32 Event))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiGetEventStatus (
UINT32 Event,
- ACPI_EVENT_STATUS *EventStatus);
+ ACPI_EVENT_STATUS *EventStatus))
/*
* General Purpose Event (GPE) Interfaces
*/
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiUpdateAllGpes (
- void);
+ void))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiEnableGpe (
ACPI_HANDLE GpeDevice,
- UINT32 GpeNumber);
+ UINT32 GpeNumber))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiDisableGpe (
ACPI_HANDLE GpeDevice,
- UINT32 GpeNumber);
+ UINT32 GpeNumber))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiClearGpe (
ACPI_HANDLE GpeDevice,
- UINT32 GpeNumber);
+ UINT32 GpeNumber))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiSetGpe (
ACPI_HANDLE GpeDevice,
UINT32 GpeNumber,
- UINT8 Action);
+ UINT8 Action))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiFinishGpe (
ACPI_HANDLE GpeDevice,
- UINT32 GpeNumber);
+ UINT32 GpeNumber))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiSetupGpeForWake (
ACPI_HANDLE ParentDevice,
ACPI_HANDLE GpeDevice,
- UINT32 GpeNumber);
+ UINT32 GpeNumber))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiSetGpeWakeMask (
ACPI_HANDLE GpeDevice,
UINT32 GpeNumber,
- UINT8 Action);
+ UINT8 Action))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiGetGpeStatus (
ACPI_HANDLE GpeDevice,
UINT32 GpeNumber,
- ACPI_EVENT_STATUS *EventStatus);
+ ACPI_EVENT_STATUS *EventStatus))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiDisableAllGpes (
- void);
+ void))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiEnableAllRuntimeGpes (
- void);
+ void))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiGetGpeDevice (
UINT32 GpeIndex,
- ACPI_HANDLE *GpeDevice);
+ ACPI_HANDLE *GpeDevice))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiInstallGpeBlock (
ACPI_HANDLE GpeDevice,
ACPI_GENERIC_ADDRESS *GpeBlockAddress,
UINT32 RegisterCount,
- UINT32 InterruptNumber);
+ UINT32 InterruptNumber))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiRemoveGpeBlock (
- ACPI_HANDLE GpeDevice);
+ ACPI_HANDLE GpeDevice))
/*
@@ -516,6 +596,11 @@ AcpiGetPossibleResources (
ACPI_BUFFER *RetBuffer);
ACPI_STATUS
+AcpiGetEventResources (
+ ACPI_HANDLE DeviceHandle,
+ ACPI_BUFFER *RetBuffer);
+
+ACPI_STATUS
AcpiWalkResources (
ACPI_HANDLE Device,
char *Name,
@@ -537,6 +622,12 @@ AcpiResourceToAddress64 (
ACPI_RESOURCE *Resource,
ACPI_RESOURCE_ADDRESS64 *Out);
+ACPI_STATUS
+AcpiBufferToResource (
+ UINT8 *AmlBuffer,
+ UINT16 AmlBufferLength,
+ ACPI_RESOURCE **ResourcePtr);
+
/*
* Hardware (ACPI device) interfaces
@@ -555,16 +646,22 @@ AcpiWrite (
UINT64 Value,
ACPI_GENERIC_ADDRESS *Reg);
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiReadBitRegister (
UINT32 RegisterId,
- UINT32 *ReturnValue);
+ UINT32 *ReturnValue))
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiWriteBitRegister (
UINT32 RegisterId,
- UINT32 Value);
+ UINT32 Value))
+
+/*
+ * Sleep/Wake interfaces
+ */
ACPI_STATUS
AcpiGetSleepTypeData (
UINT8 SleepState,
@@ -577,28 +674,58 @@ AcpiEnterSleepStatePrep (
ACPI_STATUS
AcpiEnterSleepState (
- UINT8 SleepState);
+ UINT8 SleepState,
+ UINT8 Flags);
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiEnterSleepStateS4bios (
- void);
+ void))
+
+ACPI_STATUS
+AcpiLeaveSleepStatePrep (
+ UINT8 SleepState,
+ UINT8 Flags);
ACPI_STATUS
AcpiLeaveSleepState (
- UINT8 SleepState)
- ;
+ UINT8 SleepState);
+
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiSetFirmwareWakingVector (
- UINT32 PhysicalAddress);
+ UINT32 PhysicalAddress))
#if ACPI_MACHINE_WIDTH == 64
+ACPI_HW_DEPENDENT_RETURN_STATUS (
ACPI_STATUS
AcpiSetFirmwareWakingVector64 (
- UINT64 PhysicalAddress);
+ UINT64 PhysicalAddress))
#endif
/*
+ * ACPI Timer interfaces
+ */
+ACPI_HW_DEPENDENT_RETURN_STATUS (
+ACPI_STATUS
+AcpiGetTimerResolution (
+ UINT32 *Resolution))
+
+ACPI_HW_DEPENDENT_RETURN_STATUS (
+ACPI_STATUS
+AcpiGetTimer (
+ UINT32 *Ticks))
+
+ACPI_HW_DEPENDENT_RETURN_STATUS (
+ACPI_STATUS
+AcpiGetTimerDuration (
+ UINT32 StartTicks,
+ UINT32 EndTicks,
+ UINT32 *TimeElapsed))
+
+
+/*
* Error/Warning output
*/
void ACPI_INTERNAL_VAR_XFACE
diff --git a/usr/src/uts/intel/sys/acpi/acpredef.h b/usr/src/uts/intel/sys/acpi/acpredef.h
index 0496767a86..c17a6d9145 100644
--- a/usr/src/uts/intel/sys/acpi/acpredef.h
+++ b/usr/src/uts/intel/sys/acpi/acpredef.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -94,6 +94,14 @@
* ACPI_PTYPE2_REV_FIXED: Revision at start, each subpackage is Fixed-length
* (Used for _ART, _FPS)
*
+ * ACPI_PTYPE2_FIX_VAR: Each subpackage consists of some fixed-length elements
+ * followed by an optional element
+ * object type
+ * count
+ * object type
+ * count = 0 (optional)
+ * (Used for _DLM)
+ *
*****************************************************************************/
enum AcpiReturnPackageTypes
@@ -106,7 +114,8 @@ enum AcpiReturnPackageTypes
ACPI_PTYPE2_PKG_COUNT = 6,
ACPI_PTYPE2_FIXED = 7,
ACPI_PTYPE2_MIN = 8,
- ACPI_PTYPE2_REV_FIXED = 9
+ ACPI_PTYPE2_REV_FIXED = 9,
+ ACPI_PTYPE2_FIX_VAR = 10
};
@@ -159,6 +168,7 @@ static const ACPI_PREDEFINED_INFO PredefinedNames[] =
{{"_AC8", 0, ACPI_RTYPE_INTEGER}},
{{"_AC9", 0, ACPI_RTYPE_INTEGER}},
{{"_ADR", 0, ACPI_RTYPE_INTEGER}},
+ {{"_AEI", 0, ACPI_RTYPE_BUFFER}},
{{"_AL0", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
{{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
@@ -231,6 +241,12 @@ static const ACPI_PREDEFINED_INFO PredefinedNames[] =
{{"_CID", 0, ACPI_RTYPE_INTEGER | ACPI_RTYPE_STRING | ACPI_RTYPE_PACKAGE}}, /* Variable-length (Ints/Strs) */
{{{ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER | ACPI_RTYPE_STRING, 0,0}, 0,0}},
+ {{"_CLS", 0, ACPI_RTYPE_PACKAGE}}, /* Fixed-length (3 Int) */
+ {{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 3,0}, 0,0}},
+
+ {{"_CPC", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Ints/Bufs) */
+ {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER | ACPI_RTYPE_BUFFER, 0,0}, 0,0}},
+
{{"_CRS", 0, ACPI_RTYPE_BUFFER}},
{{"_CRT", 0, ACPI_RTYPE_INTEGER}},
{{"_CSD", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (1 Int(n), n-1 Int) */
@@ -239,12 +255,20 @@ static const ACPI_PREDEFINED_INFO PredefinedNames[] =
{{"_CST", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (1 Int(n), n Pkg (1 Buf/3 Int) */
{{{ACPI_PTYPE2_PKG_COUNT,ACPI_RTYPE_BUFFER, 1, ACPI_RTYPE_INTEGER}, 3,0}},
+ {{"_CWS", 1, ACPI_RTYPE_INTEGER}},
{{"_DCK", 1, ACPI_RTYPE_INTEGER}},
{{"_DCS", 0, ACPI_RTYPE_INTEGER}},
{{"_DDC", 1, ACPI_RTYPE_INTEGER | ACPI_RTYPE_BUFFER}},
{{"_DDN", 0, ACPI_RTYPE_STRING}},
+ {{"_DEP", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
+ {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
+
{{"_DGS", 0, ACPI_RTYPE_INTEGER}},
{{"_DIS", 0, 0}},
+
+ {{"_DLM", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Pkgs) each (1 Ref, 0/1 Optional Buf/Ref) */
+ {{{ACPI_PTYPE2_FIX_VAR, ACPI_RTYPE_REFERENCE, 1, ACPI_RTYPE_REFERENCE | ACPI_RTYPE_BUFFER}, 0,0}},
+
{{"_DMA", 0, ACPI_RTYPE_BUFFER}},
{{"_DOD", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Ints) */
{{{ACPI_PTYPE1_VAR, ACPI_RTYPE_INTEGER, 0,0}, 0,0}},
@@ -264,6 +288,8 @@ static const ACPI_PREDEFINED_INFO PredefinedNames[] =
{{"_EJ3", 1, 0}},
{{"_EJ4", 1, 0}},
{{"_EJD", 0, ACPI_RTYPE_STRING}},
+ {{"_ERR", 3, ACPI_RTYPE_INTEGER}}, /* Internal use only, used by ACPICA test suites */
+ {{"_EVT", 1, 0}},
{{"_FDE", 0, ACPI_RTYPE_BUFFER}},
{{"_FDI", 0, ACPI_RTYPE_PACKAGE}}, /* Fixed-length (16 Int) */
{{{ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 16,0}, 0,0}},
@@ -284,14 +310,17 @@ static const ACPI_PREDEFINED_INFO PredefinedNames[] =
{{"_GAI", 0, ACPI_RTYPE_INTEGER}},
+ {{"_GCP", 0, ACPI_RTYPE_INTEGER}},
{{"_GHL", 0, ACPI_RTYPE_INTEGER}},
{{"_GLK", 0, ACPI_RTYPE_INTEGER}},
{{"_GPD", 0, ACPI_RTYPE_INTEGER}},
{{"_GPE", 0, ACPI_RTYPE_INTEGER}}, /* _GPE method, not _GPE scope */
+ {{"_GRT", 0, ACPI_RTYPE_BUFFER}},
{{"_GSB", 0, ACPI_RTYPE_INTEGER}},
{{"_GTF", 0, ACPI_RTYPE_BUFFER}},
{{"_GTM", 0, ACPI_RTYPE_BUFFER}},
{{"_GTS", 1, 0}},
+ {{"_GWS", 1, ACPI_RTYPE_INTEGER}},
{{"_HID", 0, ACPI_RTYPE_INTEGER | ACPI_RTYPE_STRING}},
{{"_HOT", 0, ACPI_RTYPE_INTEGER}},
{{"_HPP", 0, ACPI_RTYPE_PACKAGE}}, /* Fixed-length (4 Int) */
@@ -306,6 +335,7 @@ static const ACPI_PREDEFINED_INFO PredefinedNames[] =
{{"_HPX", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Pkgs) each (var Ints) */
{{{ACPI_PTYPE2_MIN, ACPI_RTYPE_INTEGER, 5,0}, 0,0}},
+ {{"_HRV", 0, ACPI_RTYPE_INTEGER}},
{{"_IFT", 0, ACPI_RTYPE_INTEGER}}, /* See IPMI spec */
{{"_INI", 0, 0}},
{{"_IRC", 0, 0}},
@@ -363,6 +393,9 @@ static const ACPI_PREDEFINED_INFO PredefinedNames[] =
{{"_PR3", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
{{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
+ {{"_PRE", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
+ {{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
+
{{"_PRL", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
{{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
@@ -393,6 +426,7 @@ static const ACPI_PREDEFINED_INFO PredefinedNames[] =
{{"_PSD", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Pkgs) each (5 Int) with count */
{{{ACPI_PTYPE2_COUNT, ACPI_RTYPE_INTEGER,0,0}, 0,0}},
+ {{"_PSE", 1, 0}},
{{"_PSL", 0, ACPI_RTYPE_PACKAGE}}, /* Variable-length (Refs) */
{{{ACPI_PTYPE1_VAR, ACPI_RTYPE_REFERENCE, 0,0}, 0,0}},
@@ -459,6 +493,7 @@ static const ACPI_PREDEFINED_INFO PredefinedNames[] =
{{"_SLI", 0, ACPI_RTYPE_BUFFER}},
{{"_SPD", 1, ACPI_RTYPE_INTEGER}},
{{"_SRS", 1, 0}},
+ {{"_SRT", 1, ACPI_RTYPE_INTEGER}},
{{"_SRV", 0, ACPI_RTYPE_INTEGER}}, /* See IPMI spec */
{{"_SST", 1, 0}},
{{"_STA", 0, ACPI_RTYPE_INTEGER}},
@@ -466,6 +501,7 @@ static const ACPI_PREDEFINED_INFO PredefinedNames[] =
{{"_STP", 2, ACPI_RTYPE_INTEGER}},
{{"_STR", 0, ACPI_RTYPE_BUFFER}},
{{"_STV", 2, ACPI_RTYPE_INTEGER}},
+ {{"_SUB", 0, ACPI_RTYPE_STRING}},
{{"_SUN", 0, ACPI_RTYPE_INTEGER}},
{{"_SWS", 0, ACPI_RTYPE_INTEGER}},
{{"_TC1", 0, ACPI_RTYPE_INTEGER}},
diff --git a/usr/src/uts/intel/sys/acpi/acresrc.h b/usr/src/uts/intel/sys/acpi/acresrc.h
index 00c4bb2530..c879fdc4a3 100644
--- a/usr/src/uts/intel/sys/acpi/acresrc.h
+++ b/usr/src/uts/intel/sys/acpi/acresrc.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -75,28 +75,42 @@ typedef const struct acpi_rsconvert_info
/* Resource conversion opcodes */
-#define ACPI_RSC_INITGET 0
-#define ACPI_RSC_INITSET 1
-#define ACPI_RSC_FLAGINIT 2
-#define ACPI_RSC_1BITFLAG 3
-#define ACPI_RSC_2BITFLAG 4
-#define ACPI_RSC_COUNT 5
-#define ACPI_RSC_COUNT16 6
-#define ACPI_RSC_LENGTH 7
-#define ACPI_RSC_MOVE8 8
-#define ACPI_RSC_MOVE16 9
-#define ACPI_RSC_MOVE32 10
-#define ACPI_RSC_MOVE64 11
-#define ACPI_RSC_SET8 12
-#define ACPI_RSC_DATA8 13
-#define ACPI_RSC_ADDRESS 14
-#define ACPI_RSC_SOURCE 15
-#define ACPI_RSC_SOURCEX 16
-#define ACPI_RSC_BITMASK 17
-#define ACPI_RSC_BITMASK16 18
-#define ACPI_RSC_EXIT_NE 19
-#define ACPI_RSC_EXIT_LE 20
-#define ACPI_RSC_EXIT_EQ 21
+typedef enum
+{
+ ACPI_RSC_INITGET = 0,
+ ACPI_RSC_INITSET,
+ ACPI_RSC_FLAGINIT,
+ ACPI_RSC_1BITFLAG,
+ ACPI_RSC_2BITFLAG,
+ ACPI_RSC_3BITFLAG,
+ ACPI_RSC_ADDRESS,
+ ACPI_RSC_BITMASK,
+ ACPI_RSC_BITMASK16,
+ ACPI_RSC_COUNT,
+ ACPI_RSC_COUNT16,
+ ACPI_RSC_COUNT_GPIO_PIN,
+ ACPI_RSC_COUNT_GPIO_RES,
+ ACPI_RSC_COUNT_GPIO_VEN,
+ ACPI_RSC_COUNT_SERIAL_RES,
+ ACPI_RSC_COUNT_SERIAL_VEN,
+ ACPI_RSC_DATA8,
+ ACPI_RSC_EXIT_EQ,
+ ACPI_RSC_EXIT_LE,
+ ACPI_RSC_EXIT_NE,
+ ACPI_RSC_LENGTH,
+ ACPI_RSC_MOVE_GPIO_PIN,
+ ACPI_RSC_MOVE_GPIO_RES,
+ ACPI_RSC_MOVE_SERIAL_RES,
+ ACPI_RSC_MOVE_SERIAL_VEN,
+ ACPI_RSC_MOVE8,
+ ACPI_RSC_MOVE16,
+ ACPI_RSC_MOVE32,
+ ACPI_RSC_MOVE64,
+ ACPI_RSC_SET8,
+ ACPI_RSC_SOURCE,
+ ACPI_RSC_SOURCEX
+
+} ACPI_RSCONVERT_OPCODES;
/* Resource Conversion sub-opcodes */
@@ -109,6 +123,9 @@ typedef const struct acpi_rsconvert_info
#define AML_OFFSET(f) (UINT8) ACPI_OFFSET (AML_RESOURCE,f)
+/*
+ * Individual entry for the resource dump tables
+ */
typedef const struct acpi_rsdump_info
{
UINT8 Opcode;
@@ -120,20 +137,27 @@ typedef const struct acpi_rsdump_info
/* Values for the Opcode field above */
-#define ACPI_RSD_TITLE 0
-#define ACPI_RSD_LITERAL 1
-#define ACPI_RSD_STRING 2
-#define ACPI_RSD_UINT8 3
-#define ACPI_RSD_UINT16 4
-#define ACPI_RSD_UINT32 5
-#define ACPI_RSD_UINT64 6
-#define ACPI_RSD_1BITFLAG 7
-#define ACPI_RSD_2BITFLAG 8
-#define ACPI_RSD_SHORTLIST 9
-#define ACPI_RSD_LONGLIST 10
-#define ACPI_RSD_DWORDLIST 11
-#define ACPI_RSD_ADDRESS 12
-#define ACPI_RSD_SOURCE 13
+typedef enum
+{
+ ACPI_RSD_TITLE = 0,
+ ACPI_RSD_1BITFLAG,
+ ACPI_RSD_2BITFLAG,
+ ACPI_RSD_3BITFLAG,
+ ACPI_RSD_ADDRESS,
+ ACPI_RSD_DWORDLIST,
+ ACPI_RSD_LITERAL,
+ ACPI_RSD_LONGLIST,
+ ACPI_RSD_SHORTLIST,
+ ACPI_RSD_SHORTLISTX,
+ ACPI_RSD_SOURCE,
+ ACPI_RSD_STRING,
+ ACPI_RSD_UINT8,
+ ACPI_RSD_UINT16,
+ ACPI_RSD_UINT32,
+ ACPI_RSD_UINT64,
+ ACPI_RSD_WORDLIST
+
+} ACPI_RSDUMP_OPCODES;
/* restore default alignment */
@@ -143,13 +167,16 @@ typedef const struct acpi_rsdump_info
/* Resource tables indexed by internal resource type */
extern const UINT8 AcpiGbl_AmlResourceSizes[];
+extern const UINT8 AcpiGbl_AmlResourceSerialBusSizes[];
extern ACPI_RSCONVERT_INFO *AcpiGbl_SetResourceDispatch[];
/* Resource tables indexed by raw AML resource descriptor type */
extern const UINT8 AcpiGbl_ResourceStructSizes[];
+extern const UINT8 AcpiGbl_ResourceStructSerialBusSizes[];
extern ACPI_RSCONVERT_INFO *AcpiGbl_GetResourceDispatch[];
+extern ACPI_RSCONVERT_INFO *AcpiGbl_ConvertResourceSerialBusDispatch[];
typedef struct acpi_vendor_walk_info
{
@@ -208,6 +235,10 @@ AcpiRsSetSrsMethodData (
ACPI_NAMESPACE_NODE *Node,
ACPI_BUFFER *RetBuffer);
+ACPI_STATUS
+AcpiRsGetAeiMethodData (
+ ACPI_NAMESPACE_NODE *Node,
+ ACPI_BUFFER *RetBuffer);
/*
* rscalc
@@ -348,6 +379,11 @@ extern ACPI_RSCONVERT_INFO AcpiRsConvertAddress16[];
extern ACPI_RSCONVERT_INFO AcpiRsConvertExtIrq[];
extern ACPI_RSCONVERT_INFO AcpiRsConvertAddress64[];
extern ACPI_RSCONVERT_INFO AcpiRsConvertExtAddress64[];
+extern ACPI_RSCONVERT_INFO AcpiRsConvertGpio[];
+extern ACPI_RSCONVERT_INFO AcpiRsConvertFixedDma[];
+extern ACPI_RSCONVERT_INFO AcpiRsConvertI2cSerialBus[];
+extern ACPI_RSCONVERT_INFO AcpiRsConvertSpiSerialBus[];
+extern ACPI_RSCONVERT_INFO AcpiRsConvertUartSerialBus[];
/* These resources require separate get/set tables */
@@ -366,6 +402,7 @@ extern ACPI_RSCONVERT_INFO AcpiRsSetVendor[];
* rsinfo
*/
extern ACPI_RSDUMP_INFO *AcpiGbl_DumpResourceDispatch[];
+extern ACPI_RSDUMP_INFO *AcpiGbl_DumpSerialBusDispatch[];
/*
* rsdump
@@ -387,6 +424,12 @@ extern ACPI_RSDUMP_INFO AcpiRsDumpAddress64[];
extern ACPI_RSDUMP_INFO AcpiRsDumpExtAddress64[];
extern ACPI_RSDUMP_INFO AcpiRsDumpExtIrq[];
extern ACPI_RSDUMP_INFO AcpiRsDumpGenericReg[];
+extern ACPI_RSDUMP_INFO AcpiRsDumpGpio[];
+extern ACPI_RSDUMP_INFO AcpiRsDumpFixedDma[];
+extern ACPI_RSDUMP_INFO AcpiRsDumpCommonSerialBus[];
+extern ACPI_RSDUMP_INFO AcpiRsDumpI2cSerialBus[];
+extern ACPI_RSDUMP_INFO AcpiRsDumpSpiSerialBus[];
+extern ACPI_RSDUMP_INFO AcpiRsDumpUartSerialBus[];
#endif
#endif /* __ACRESRC_H__ */
diff --git a/usr/src/uts/intel/sys/acpi/acrestyp.h b/usr/src/uts/intel/sys/acpi/acrestyp.h
index 03e7e2ce08..b927c9c58e 100644
--- a/usr/src/uts/intel/sys/acpi/acrestyp.h
+++ b/usr/src/uts/intel/sys/acpi/acrestyp.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -62,11 +62,14 @@ typedef UINT32 ACPI_RSDESC_SIZE; /* Max Resource Descr
#define ACPI_WRITE_COMBINING_MEMORY (UINT8) 0x02
#define ACPI_PREFETCHABLE_MEMORY (UINT8) 0x03
+/*! [Begin] no source code translation */
/*
* IO Attributes
* The ISA IO ranges are: n000-n0FFh, n400-n4FFh, n800-n8FFh, nC00-nCFFh.
* The non-ISA IO ranges are: n100-n3FFh, n500-n7FFh, n900-nBFFh, nCD0-nFFFh.
*/
+/*! [End] no source code translation !*/
+
#define ACPI_NON_ISA_ONLY_RANGES (UINT8) 0x01
#define ACPI_ISA_ONLY_RANGES (UINT8) 0x02
#define ACPI_ENTIRE_RANGE (ACPI_NON_ISA_ONLY_RANGES | ACPI_ISA_ONLY_RANGES)
@@ -82,16 +85,26 @@ typedef UINT32 ACPI_RSDESC_SIZE; /* Max Resource Descr
#define ACPI_DECODE_16 (UINT8) 0x01 /* 16-bit IO address decode */
/*
- * IRQ Attributes
+ * Interrupt attributes - used in multiple descriptors
*/
+
+/* Triggering */
+
#define ACPI_LEVEL_SENSITIVE (UINT8) 0x00
#define ACPI_EDGE_SENSITIVE (UINT8) 0x01
+/* Polarity */
+
#define ACPI_ACTIVE_HIGH (UINT8) 0x00
#define ACPI_ACTIVE_LOW (UINT8) 0x01
+#define ACPI_ACTIVE_BOTH (UINT8) 0x02
+
+/* Sharing */
#define ACPI_EXCLUSIVE (UINT8) 0x00
#define ACPI_SHARED (UINT8) 0x01
+#define ACPI_EXCLUSIVE_AND_WAKE (UINT8) 0x02
+#define ACPI_SHARED_AND_WAKE (UINT8) 0x03
/*
* DMA Attributes
@@ -128,6 +141,8 @@ typedef UINT32 ACPI_RSDESC_SIZE; /* Max Resource Descr
#define ACPI_POS_DECODE (UINT8) 0x00
#define ACPI_SUB_DECODE (UINT8) 0x01
+/* Producer/Consumer */
+
#define ACPI_PRODUCER (UINT8) 0x00
#define ACPI_CONSUMER (UINT8) 0x01
@@ -167,7 +182,7 @@ typedef struct acpi_resource_irq
} ACPI_RESOURCE_IRQ;
-typedef struct ACPI_RESOURCE_DMA
+typedef struct acpi_resource_dma
{
UINT8 Type;
UINT8 BusMaster;
@@ -209,6 +224,24 @@ typedef struct acpi_resource_fixed_io
} ACPI_RESOURCE_FIXED_IO;
+typedef struct acpi_resource_fixed_dma
+{
+ UINT16 RequestLines;
+ UINT16 Channels;
+ UINT8 Width;
+
+} ACPI_RESOURCE_FIXED_DMA;
+
+/* Values for Width field above */
+
+#define ACPI_DMA_WIDTH8 0
+#define ACPI_DMA_WIDTH16 1
+#define ACPI_DMA_WIDTH32 2
+#define ACPI_DMA_WIDTH64 3
+#define ACPI_DMA_WIDTH128 4
+#define ACPI_DMA_WIDTH256 5
+
+
typedef struct acpi_resource_vendor
{
UINT16 ByteLength;
@@ -385,6 +418,184 @@ typedef struct acpi_resource_generic_register
} ACPI_RESOURCE_GENERIC_REGISTER;
+typedef struct acpi_resource_gpio
+{
+ UINT8 RevisionId;
+ UINT8 ConnectionType;
+ UINT8 ProducerConsumer; /* For values, see Producer/Consumer above */
+ UINT8 PinConfig;
+ UINT8 Sharable; /* For values, see Interrupt Attributes above */
+ UINT8 IoRestriction;
+ UINT8 Triggering; /* For values, see Interrupt Attributes above */
+ UINT8 Polarity; /* For values, see Interrupt Attributes above */
+ UINT16 DriveStrength;
+ UINT16 DebounceTimeout;
+ UINT16 PinTableLength;
+ UINT16 VendorLength;
+ ACPI_RESOURCE_SOURCE ResourceSource;
+ UINT16 *PinTable;
+ UINT8 *VendorData;
+
+} ACPI_RESOURCE_GPIO;
+
+/* Values for GPIO ConnectionType field above */
+
+#define ACPI_RESOURCE_GPIO_TYPE_INT 0
+#define ACPI_RESOURCE_GPIO_TYPE_IO 1
+
+/* Values for PinConfig field above */
+
+#define ACPI_PIN_CONFIG_DEFAULT 0
+#define ACPI_PIN_CONFIG_PULLUP 1
+#define ACPI_PIN_CONFIG_PULLDOWN 2
+#define ACPI_PIN_CONFIG_NOPULL 3
+
+/* Values for IoRestriction field above */
+
+#define ACPI_IO_RESTRICT_NONE 0
+#define ACPI_IO_RESTRICT_INPUT 1
+#define ACPI_IO_RESTRICT_OUTPUT 2
+#define ACPI_IO_RESTRICT_NONE_PRESERVE 3
+
+
+/* Common structure for I2C, SPI, and UART serial descriptors */
+
+#define ACPI_RESOURCE_SERIAL_COMMON \
+ UINT8 RevisionId; \
+ UINT8 Type; \
+ UINT8 ProducerConsumer; /* For values, see Producer/Consumer above */\
+ UINT8 SlaveMode; \
+ UINT8 TypeRevisionId; \
+ UINT16 TypeDataLength; \
+ UINT16 VendorLength; \
+ ACPI_RESOURCE_SOURCE ResourceSource; \
+ UINT8 *VendorData;
+
+typedef struct acpi_resource_common_serialbus
+{
+ ACPI_RESOURCE_SERIAL_COMMON
+
+} ACPI_RESOURCE_COMMON_SERIALBUS;
+
+/* Values for the Type field above */
+
+#define ACPI_RESOURCE_SERIAL_TYPE_I2C 1
+#define ACPI_RESOURCE_SERIAL_TYPE_SPI 2
+#define ACPI_RESOURCE_SERIAL_TYPE_UART 3
+
+/* Values for SlaveMode field above */
+
+#define ACPI_CONTROLLER_INITIATED 0
+#define ACPI_DEVICE_INITIATED 1
+
+
+typedef struct acpi_resource_i2c_serialbus
+{
+ ACPI_RESOURCE_SERIAL_COMMON
+ UINT8 AccessMode;
+ UINT16 SlaveAddress;
+ UINT32 ConnectionSpeed;
+
+} ACPI_RESOURCE_I2C_SERIALBUS;
+
+/* Values for AccessMode field above */
+
+#define ACPI_I2C_7BIT_MODE 0
+#define ACPI_I2C_10BIT_MODE 1
+
+
+typedef struct acpi_resource_spi_serialbus
+{
+ ACPI_RESOURCE_SERIAL_COMMON
+ UINT8 WireMode;
+ UINT8 DevicePolarity;
+ UINT8 DataBitLength;
+ UINT8 ClockPhase;
+ UINT8 ClockPolarity;
+ UINT16 DeviceSelection;
+ UINT32 ConnectionSpeed;
+
+} ACPI_RESOURCE_SPI_SERIALBUS;
+
+/* Values for WireMode field above */
+
+#define ACPI_SPI_4WIRE_MODE 0
+#define ACPI_SPI_3WIRE_MODE 1
+
+/* Values for DevicePolarity field above */
+
+#define ACPI_SPI_ACTIVE_LOW 0
+#define ACPI_SPI_ACTIVE_HIGH 1
+
+/* Values for ClockPhase field above */
+
+#define ACPI_SPI_FIRST_PHASE 0
+#define ACPI_SPI_SECOND_PHASE 1
+
+/* Values for ClockPolarity field above */
+
+#define ACPI_SPI_START_LOW 0
+#define ACPI_SPI_START_HIGH 1
+
+
+typedef struct acpi_resource_uart_serialbus
+{
+ ACPI_RESOURCE_SERIAL_COMMON
+ UINT8 Endian;
+ UINT8 DataBits;
+ UINT8 StopBits;
+ UINT8 FlowControl;
+ UINT8 Parity;
+ UINT8 LinesEnabled;
+ UINT16 RxFifoSize;
+ UINT16 TxFifoSize;
+ UINT32 DefaultBaudRate;
+
+} ACPI_RESOURCE_UART_SERIALBUS;
+
+/* Values for Endian field above */
+
+#define ACPI_UART_LITTLE_ENDIAN 0
+#define ACPI_UART_BIG_ENDIAN 1
+
+/* Values for DataBits field above */
+
+#define ACPI_UART_5_DATA_BITS 0
+#define ACPI_UART_6_DATA_BITS 1
+#define ACPI_UART_7_DATA_BITS 2
+#define ACPI_UART_8_DATA_BITS 3
+#define ACPI_UART_9_DATA_BITS 4
+
+/* Values for StopBits field above */
+
+#define ACPI_UART_NO_STOP_BITS 0
+#define ACPI_UART_1_STOP_BIT 1
+#define ACPI_UART_1P5_STOP_BITS 2
+#define ACPI_UART_2_STOP_BITS 3
+
+/* Values for FlowControl field above */
+
+#define ACPI_UART_FLOW_CONTROL_NONE 0
+#define ACPI_UART_FLOW_CONTROL_HW 1
+#define ACPI_UART_FLOW_CONTROL_XON_XOFF 2
+
+/* Values for Parity field above */
+
+#define ACPI_UART_PARITY_NONE 0
+#define ACPI_UART_PARITY_EVEN 1
+#define ACPI_UART_PARITY_ODD 2
+#define ACPI_UART_PARITY_MARK 3
+#define ACPI_UART_PARITY_SPACE 4
+
+/* Values for LinesEnabled bitfield above */
+
+#define ACPI_UART_CARRIER_DETECT (1<<2)
+#define ACPI_UART_RING_INDICATOR (1<<3)
+#define ACPI_UART_DATA_SET_READY (1<<4)
+#define ACPI_UART_DATA_TERMINAL_READY (1<<5)
+#define ACPI_UART_CLEAR_TO_SEND (1<<6)
+#define ACPI_UART_REQUEST_TO_SEND (1<<7)
+
/* ACPI_RESOURCE_TYPEs */
@@ -405,7 +616,10 @@ typedef struct acpi_resource_generic_register
#define ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64 14 /* ACPI 3.0 */
#define ACPI_RESOURCE_TYPE_EXTENDED_IRQ 15
#define ACPI_RESOURCE_TYPE_GENERIC_REGISTER 16
-#define ACPI_RESOURCE_TYPE_MAX 16
+#define ACPI_RESOURCE_TYPE_GPIO 17 /* ACPI 5.0 */
+#define ACPI_RESOURCE_TYPE_FIXED_DMA 18 /* ACPI 5.0 */
+#define ACPI_RESOURCE_TYPE_SERIAL_BUS 19 /* ACPI 5.0 */
+#define ACPI_RESOURCE_TYPE_MAX 19
/* Master union for resource descriptors */
@@ -416,6 +630,7 @@ typedef union acpi_resource_data
ACPI_RESOURCE_START_DEPENDENT StartDpf;
ACPI_RESOURCE_IO Io;
ACPI_RESOURCE_FIXED_IO FixedIo;
+ ACPI_RESOURCE_FIXED_DMA FixedDma;
ACPI_RESOURCE_VENDOR Vendor;
ACPI_RESOURCE_VENDOR_TYPED VendorTyped;
ACPI_RESOURCE_END_TAG EndTag;
@@ -428,6 +643,11 @@ typedef union acpi_resource_data
ACPI_RESOURCE_EXTENDED_ADDRESS64 ExtAddress64;
ACPI_RESOURCE_EXTENDED_IRQ ExtendedIrq;
ACPI_RESOURCE_GENERIC_REGISTER GenericReg;
+ ACPI_RESOURCE_GPIO Gpio;
+ ACPI_RESOURCE_I2C_SERIALBUS I2cSerialBus;
+ ACPI_RESOURCE_SPI_SERIALBUS SpiSerialBus;
+ ACPI_RESOURCE_UART_SERIALBUS UartSerialBus;
+ ACPI_RESOURCE_COMMON_SERIALBUS CommonSerialBus;
/* Common fields */
diff --git a/usr/src/uts/intel/sys/acpi/acstruct.h b/usr/src/uts/intel/sys/acpi/acstruct.h
index ff588f5234..b3927caa41 100644
--- a/usr/src/uts/intel/sys/acpi/acstruct.h
+++ b/usr/src/uts/intel/sys/acpi/acstruct.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/acpi/actables.h b/usr/src/uts/intel/sys/acpi/actables.h
index 6aad862bc9..26a7947dc9 100644
--- a/usr/src/uts/intel/sys/acpi/actables.h
+++ b/usr/src/uts/intel/sys/acpi/actables.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -84,6 +84,11 @@ ACPI_STATUS
AcpiTbVerifyTable (
ACPI_TABLE_DESC *TableDesc);
+ACPI_TABLE_HEADER *
+AcpiTbTableOverride (
+ ACPI_TABLE_HEADER *TableHeader,
+ ACPI_TABLE_DESC *TableDesc);
+
ACPI_STATUS
AcpiTbAddTable (
ACPI_TABLE_DESC *TableDesc,
diff --git a/usr/src/uts/intel/sys/acpi/actbl.h b/usr/src/uts/intel/sys/acpi/actbl.h
index e632291cd0..7da7378d14 100644
--- a/usr/src/uts/intel/sys/acpi/actbl.h
+++ b/usr/src/uts/intel/sys/acpi/actbl.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -298,6 +298,8 @@ typedef struct acpi_table_fadt
ACPI_GENERIC_ADDRESS XPmTimerBlock; /* 64-bit Extended Power Mgt Timer Ctrl Reg Blk address */
ACPI_GENERIC_ADDRESS XGpe0Block; /* 64-bit Extended General Purpose Event 0 Reg Blk address */
ACPI_GENERIC_ADDRESS XGpe1Block; /* 64-bit Extended General Purpose Event 1 Reg Blk address */
+ ACPI_GENERIC_ADDRESS SleepControl; /* 64-bit Sleep Control register */
+ ACPI_GENERIC_ADDRESS SleepStatus; /* 64-bit Sleep Status register */
} ACPI_TABLE_FADT;
@@ -309,6 +311,7 @@ typedef struct acpi_table_fadt
#define ACPI_FADT_NO_VGA (1<<2) /* 02: [V4] It is not safe to probe for VGA hardware */
#define ACPI_FADT_NO_MSI (1<<3) /* 03: [V4] Message Signaled Interrupts (MSI) must not be enabled */
#define ACPI_FADT_NO_ASPM (1<<4) /* 04: [V4] PCIe ASPM control must not be enabled */
+#define ACPI_FADT_NO_CMOS_RTC (1<<5) /* 05: [V5] No CMOS real-time clock present */
/* Masks for FADT flags */
@@ -332,6 +335,8 @@ typedef struct acpi_table_fadt
#define ACPI_FADT_REMOTE_POWER_ON (1<<17) /* 17: [V4] System is compatible with remote power on (ACPI 3.0) */
#define ACPI_FADT_APIC_CLUSTER (1<<18) /* 18: [V4] All local APICs must use cluster model (ACPI 3.0) */
#define ACPI_FADT_APIC_PHYSICAL (1<<19) /* 19: [V4] All local xAPICs must use physical dest mode (ACPI 3.0) */
+#define ACPI_FADT_HW_REDUCED (1<<20) /* 20: [V5] ACPI hardware is not implemented (ACPI 5.0) */
+#define ACPI_FADT_LOW_POWER_S0 (1<<21) /* 21: [V5] S0 power savings are equal or better than S3 (ACPI 5.0) */
/* Values for PreferredProfile (Prefered Power Management Profiles) */
@@ -344,9 +349,18 @@ enum AcpiPreferedPmProfiles
PM_WORKSTATION = 3,
PM_ENTERPRISE_SERVER = 4,
PM_SOHO_SERVER = 5,
- PM_APPLIANCE_PC = 6
+ PM_APPLIANCE_PC = 6,
+ PM_PERFORMANCE_SERVER = 7,
+ PM_TABLET = 8
};
+/* Values for SleepStatus and SleepControl registers (V5 FADT) */
+
+#define ACPI_X_WAKE_STATUS 0x80
+#define ACPI_X_SLEEP_TYPE_MASK 0x1C
+#define ACPI_X_SLEEP_TYPE_POSITION 0x02
+#define ACPI_X_SLEEP_ENABLE 0x20
+
/* Reset to default packing */
@@ -392,10 +406,11 @@ typedef struct acpi_table_desc
*/
#include "actbl1.h"
#include "actbl2.h"
+#include "actbl3.h"
/* Macros used to generate offsets to specific table fields */
-#define ACPI_FADT_OFFSET(f) (UINT8) ACPI_OFFSET (ACPI_TABLE_FADT, f)
+#define ACPI_FADT_OFFSET(f) (UINT16) ACPI_OFFSET (ACPI_TABLE_FADT, f)
/*
* Sizes of the various flavors of FADT. We need to look closely
@@ -405,12 +420,15 @@ typedef struct acpi_table_desc
* FADT is the bottom line as to what the version really is.
*
* For reference, the values below are as follows:
- * FADT V1 size: 0x74
- * FADT V2 size: 0x84
- * FADT V3+ size: 0xF4
+ * FADT V1 size: 0x074
+ * FADT V2 size: 0x084
+ * FADT V3 size: 0x0F4
+ * FADT V4 size: 0x0F4
+ * FADT V5 size: 0x10C
*/
#define ACPI_FADT_V1_SIZE (UINT32) (ACPI_FADT_OFFSET (Flags) + 4)
#define ACPI_FADT_V2_SIZE (UINT32) (ACPI_FADT_OFFSET (Reserved4[0]) + 3)
-#define ACPI_FADT_V3_SIZE (UINT32) (sizeof (ACPI_TABLE_FADT))
+#define ACPI_FADT_V3_SIZE (UINT32) (ACPI_FADT_OFFSET (SleepControl))
+#define ACPI_FADT_V5_SIZE (UINT32) (sizeof (ACPI_TABLE_FADT))
#endif /* __ACTBL_H__ */
diff --git a/usr/src/uts/intel/sys/acpi/actbl1.h b/usr/src/uts/intel/sys/acpi/actbl1.h
index ad6c82fc3c..824135d3dd 100644
--- a/usr/src/uts/intel/sys/acpi/actbl1.h
+++ b/usr/src/uts/intel/sys/acpi/actbl1.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -252,16 +252,17 @@ typedef struct acpi_einj_entry
enum AcpiEinjActions
{
- ACPI_EINJ_BEGIN_OPERATION = 0,
- ACPI_EINJ_GET_TRIGGER_TABLE = 1,
- ACPI_EINJ_SET_ERROR_TYPE = 2,
- ACPI_EINJ_GET_ERROR_TYPE = 3,
- ACPI_EINJ_END_OPERATION = 4,
- ACPI_EINJ_EXECUTE_OPERATION = 5,
- ACPI_EINJ_CHECK_BUSY_STATUS = 6,
- ACPI_EINJ_GET_COMMAND_STATUS = 7,
- ACPI_EINJ_ACTION_RESERVED = 8, /* 8 and greater are reserved */
- ACPI_EINJ_TRIGGER_ERROR = 0xFF /* Except for this value */
+ ACPI_EINJ_BEGIN_OPERATION = 0,
+ ACPI_EINJ_GET_TRIGGER_TABLE = 1,
+ ACPI_EINJ_SET_ERROR_TYPE = 2,
+ ACPI_EINJ_GET_ERROR_TYPE = 3,
+ ACPI_EINJ_END_OPERATION = 4,
+ ACPI_EINJ_EXECUTE_OPERATION = 5,
+ ACPI_EINJ_CHECK_BUSY_STATUS = 6,
+ ACPI_EINJ_GET_COMMAND_STATUS = 7,
+ ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS = 8,
+ ACPI_EINJ_ACTION_RESERVED = 9, /* 9 and greater are reserved */
+ ACPI_EINJ_TRIGGER_ERROR = 0xFF /* Except for this value */
};
/* Values for Instruction field above */
@@ -273,9 +274,33 @@ enum AcpiEinjInstructions
ACPI_EINJ_WRITE_REGISTER = 2,
ACPI_EINJ_WRITE_REGISTER_VALUE = 3,
ACPI_EINJ_NOOP = 4,
- ACPI_EINJ_INSTRUCTION_RESERVED = 5 /* 5 and greater are reserved */
+ ACPI_EINJ_FLUSH_CACHELINE = 5,
+ ACPI_EINJ_INSTRUCTION_RESERVED = 6 /* 6 and greater are reserved */
};
+typedef struct acpi_einj_error_type_with_addr
+{
+ UINT32 ErrorType;
+ UINT32 VendorStructOffset;
+ UINT32 Flags;
+ UINT32 ApicId;
+ UINT64 Address;
+ UINT64 Range;
+ UINT32 PcieId;
+
+} ACPI_EINJ_ERROR_TYPE_WITH_ADDR;
+
+typedef struct acpi_einj_vendor
+{
+ UINT32 Length;
+ UINT32 PcieId;
+ UINT16 VendorId;
+ UINT16 DeviceId;
+ UINT8 RevisionId;
+ UINT8 Reserved[3];
+
+} ACPI_EINJ_VENDOR;
+
/* EINJ Trigger Error Action Table */
@@ -313,6 +338,7 @@ enum AcpiEinjCommandStatus
#define ACPI_EINJ_PLATFORM_CORRECTABLE (1<<9)
#define ACPI_EINJ_PLATFORM_UNCORRECTABLE (1<<10)
#define ACPI_EINJ_PLATFORM_FATAL (1<<11)
+#define ACPI_EINJ_VENDOR_DEFINED (1<<31)
/*******************************************************************************
@@ -731,7 +757,9 @@ enum AcpiMadtType
ACPI_MADT_TYPE_INTERRUPT_SOURCE = 8,
ACPI_MADT_TYPE_LOCAL_X2APIC = 9,
ACPI_MADT_TYPE_LOCAL_X2APIC_NMI = 10,
- ACPI_MADT_TYPE_RESERVED = 11 /* 11 and greater are reserved */
+ ACPI_MADT_TYPE_GENERIC_INTERRUPT = 11,
+ ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR = 12,
+ ACPI_MADT_TYPE_RESERVED = 13 /* 13 and greater are reserved */
};
@@ -886,11 +914,42 @@ typedef struct acpi_madt_local_x2apic_nmi
} ACPI_MADT_LOCAL_X2APIC_NMI;
+/* 11: Generic Interrupt (ACPI 5.0) */
+
+typedef struct acpi_madt_generic_interrupt
+{
+ ACPI_SUBTABLE_HEADER Header;
+ UINT16 Reserved; /* Reserved - must be zero */
+ UINT32 GicId;
+ UINT32 Uid;
+ UINT32 Flags;
+ UINT32 ParkingVersion;
+ UINT32 PerformanceInterrupt;
+ UINT64 ParkedAddress;
+ UINT64 BaseAddress;
+
+} ACPI_MADT_GENERIC_INTERRUPT;
+
+
+/* 12: Generic Distributor (ACPI 5.0) */
+
+typedef struct acpi_madt_generic_distributor
+{
+ ACPI_SUBTABLE_HEADER Header;
+ UINT16 Reserved; /* Reserved - must be zero */
+ UINT32 GicId;
+ UINT64 BaseAddress;
+ UINT32 GlobalIrqBase;
+ UINT32 Reserved2; /* Reserved - must be zero */
+
+} ACPI_MADT_GENERIC_DISTRIBUTOR;
+
+
/*
* Common flags fields for MADT subtables
*/
-/* MADT Local APIC flags (LapicFlags) */
+/* MADT Local APIC flags (LapicFlags) and GIC flags */
#define ACPI_MADT_ENABLED (1) /* 00: Processor is usable if set */
diff --git a/usr/src/uts/intel/sys/acpi/actbl2.h b/usr/src/uts/intel/sys/acpi/actbl2.h
index e4f01e3f6b..70c6a09a3c 100644
--- a/usr/src/uts/intel/sys/acpi/actbl2.h
+++ b/usr/src/uts/intel/sys/acpi/actbl2.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/acpi/actbl3.h b/usr/src/uts/intel/sys/acpi/actbl3.h
new file mode 100644
index 0000000000..c5dd4184dd
--- /dev/null
+++ b/usr/src/uts/intel/sys/acpi/actbl3.h
@@ -0,0 +1,650 @@
+/******************************************************************************
+ *
+ * Name: actbl3.h - ACPI Table Definitions
+ *
+ *****************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2012, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * substantially similar to the "NO WARRANTY" disclaimer below
+ * ("Disclaimer") and any redistribution must be conditioned upon
+ * including a substantially similar Disclaimer requirement for further
+ * binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ * of any contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#ifndef __ACTBL3_H__
+#define __ACTBL3_H__
+
+
+/*******************************************************************************
+ *
+ * Additional ACPI Tables (3)
+ *
+ * These tables are not consumed directly by the ACPICA subsystem, but are
+ * included here to support device drivers and the AML disassembler.
+ *
+ * The tables in this file are fully defined within the ACPI specification.
+ *
+ ******************************************************************************/
+
+
+/*
+ * Values for description table header signatures for tables defined in this
+ * file. Useful because they make it more difficult to inadvertently type in
+ * the wrong signature.
+ */
+#define ACPI_SIG_BGRT "BGRT" /* Boot Graphics Resource Table */
+#define ACPI_SIG_DRTM "DRTM" /* Dynamic Root of Trust for Measurement table */
+#define ACPI_SIG_FPDT "FPDT" /* Firmware Performance Data Table */
+#define ACPI_SIG_GTDT "GTDT" /* Generic Timer Description Table */
+#define ACPI_SIG_MPST "MPST" /* Memory Power State Table */
+#define ACPI_SIG_PCCT "PCCT" /* Platform Communications Channel Table */
+#define ACPI_SIG_PMTT "PMTT" /* Platform Memory Topology Table */
+#define ACPI_SIG_RASF "RASF" /* RAS Feature table */
+
+#define ACPI_SIG_S3PT "S3PT" /* S3 Performance (sub)Table */
+#define ACPI_SIG_PCCS "PCC" /* PCC Shared Memory Region */
+
+/* Reserved table signatures */
+
+#define ACPI_SIG_CSRT "CSRT" /* Core System Resources Table */
+#define ACPI_SIG_DBG2 "DBG2" /* Debug Port table 2 */
+#define ACPI_SIG_MATR "MATR" /* Memory Address Translation Table */
+#define ACPI_SIG_MSDM "MSDM" /* Microsoft Data Management Table */
+#define ACPI_SIG_WPBT "WPBT" /* Windows Platform Binary Table */
+
+/*
+ * All tables must be byte-packed to match the ACPI specification, since
+ * the tables are provided by the system BIOS.
+ */
+#pragma pack(1)
+
+/*
+ * Note about bitfields: The UINT8 type is used for bitfields in ACPI tables.
+ * This is the only type that is even remotely portable. Anything else is not
+ * portable, so do not use any other bitfield types.
+ */
+
+
+/*******************************************************************************
+ *
+ * BGRT - Boot Graphics Resource Table (ACPI 5.0)
+ * Version 1
+ *
+ ******************************************************************************/
+
+typedef struct acpi_table_bgrt
+{
+ ACPI_TABLE_HEADER Header; /* Common ACPI table header */
+ UINT16 Version;
+ UINT8 Status;
+ UINT8 ImageType;
+ UINT64 ImageAddress;
+ UINT32 ImageOffsetX;
+ UINT32 ImageOffsetY;
+
+} ACPI_TABLE_BGRT;
+
+
+/*******************************************************************************
+ *
+ * DRTM - Dynamic Root of Trust for Measurement table
+ *
+ ******************************************************************************/
+
+typedef struct acpi_table_drtm
+{
+ ACPI_TABLE_HEADER Header; /* Common ACPI table header */
+ UINT64 EntryBaseAddress;
+ UINT64 EntryLength;
+ UINT32 EntryAddress32;
+ UINT64 EntryAddress64;
+ UINT64 ExitAddress;
+ UINT64 LogAreaAddress;
+ UINT32 LogAreaLength;
+ UINT64 ArchDependentAddress;
+ UINT32 Flags;
+
+} ACPI_TABLE_DRTM;
+
+/* 1) Validated Tables List */
+
+typedef struct acpi_drtm_vtl_list
+{
+ UINT32 ValidatedTableListCount;
+
+} ACPI_DRTM_VTL_LIST;
+
+/* 2) Resources List */
+
+typedef struct acpi_drtm_resource_list
+{
+ UINT32 ResourceListCount;
+
+} ACPI_DRTM_RESOURCE_LIST;
+
+/* 3) Platform-specific Identifiers List */
+
+typedef struct acpi_drtm_id_list
+{
+ UINT32 IdListCount;
+
+} ACPI_DRTM_ID_LIST;
+
+
+/*******************************************************************************
+ *
+ * FPDT - Firmware Performance Data Table (ACPI 5.0)
+ * Version 1
+ *
+ ******************************************************************************/
+
+typedef struct acpi_table_fpdt
+{
+ ACPI_TABLE_HEADER Header; /* Common ACPI table header */
+
+} ACPI_TABLE_FPDT;
+
+
+/* FPDT subtable header */
+
+typedef struct acpi_fpdt_header
+{
+ UINT16 Type;
+ UINT8 Length;
+ UINT8 Revision;
+
+} ACPI_FPDT_HEADER;
+
+/* Values for Type field above */
+
+enum AcpiFpdtType
+{
+ ACPI_FPDT_TYPE_BOOT = 0,
+ ACPI_FPDT_TYPE_S3PERF = 1,
+};
+
+
+/*
+ * FPDT subtables
+ */
+
+/* 0: Firmware Basic Boot Performance Record */
+
+typedef struct acpi_fpdt_boot
+{
+ ACPI_FPDT_HEADER Header;
+ UINT8 Reserved[4];
+ UINT64 ResetEnd;
+ UINT64 LoadStart;
+ UINT64 StartupStart;
+ UINT64 ExitServicesEntry;
+ UINT64 ExitServicesExit;
+
+} ACPI_FPDT_BOOT;
+
+
+/* 1: S3 Performance Table Pointer Record */
+
+typedef struct acpi_fpdt_s3pt_ptr
+{
+ ACPI_FPDT_HEADER Header;
+ UINT8 Reserved[4];
+ UINT64 Address;
+
+} ACPI_FPDT_S3PT_PTR;
+
+
+/*
+ * S3PT - S3 Performance Table. This table is pointed to by the
+ * FPDT S3 Pointer Record above.
+ */
+typedef struct acpi_table_s3pt
+{
+ UINT8 Signature[4]; /* "S3PT" */
+ UINT32 Length;
+
+} ACPI_TABLE_S3PT;
+
+
+/*
+ * S3PT Subtables
+ */
+typedef struct acpi_s3pt_header
+{
+ UINT16 Type;
+ UINT8 Length;
+ UINT8 Revision;
+
+} ACPI_S3PT_HEADER;
+
+/* Values for Type field above */
+
+enum AcpiS3ptType
+{
+ ACPI_S3PT_TYPE_RESUME = 0,
+ ACPI_S3PT_TYPE_SUSPEND = 1,
+};
+
+typedef struct acpi_s3pt_resume
+{
+ ACPI_S3PT_HEADER Header;
+ UINT32 ResumeCount;
+ UINT64 FullResume;
+ UINT64 AverageResume;
+
+} ACPI_S3PT_RESUME;
+
+typedef struct acpi_s3pt_suspend
+{
+ ACPI_S3PT_HEADER Header;
+ UINT64 SuspendStart;
+ UINT64 SuspendEnd;
+
+} ACPI_S3PT_SUSPEND;
+
+
+/*******************************************************************************
+ *
+ * GTDT - Generic Timer Description Table (ACPI 5.0)
+ * Version 1
+ *
+ ******************************************************************************/
+
+typedef struct acpi_table_gtdt
+{
+ ACPI_TABLE_HEADER Header; /* Common ACPI table header */
+ UINT64 Address;
+ UINT32 Flags;
+ UINT32 SecurePl1Interrupt;
+ UINT32 SecurePl1Flags;
+ UINT32 NonSecurePl1Interrupt;
+ UINT32 NonSecurePl1Flags;
+ UINT32 VirtualTimerInterrupt;
+ UINT32 VirtualTimerFlags;
+ UINT32 NonSecurePl2Interrupt;
+ UINT32 NonSecurePl2Flags;
+
+} ACPI_TABLE_GTDT;
+
+/* Values for Flags field above */
+
+#define ACPI_GTDT_MAPPED_BLOCK_PRESENT 1
+
+/* Values for all "TimerFlags" fields above */
+
+#define ACPI_GTDT_INTERRUPT_MODE 1
+#define ACPI_GTDT_INTERRUPT_POLARITY 2
+
+
+/*******************************************************************************
+ *
+ * MPST - Memory Power State Table (ACPI 5.0)
+ * Version 1
+ *
+ ******************************************************************************/
+
+#define ACPI_MPST_CHANNEL_INFO \
+ UINT16 Reserved1; \
+ UINT8 ChannelId; \
+ UINT8 Reserved2; \
+ UINT16 PowerNodeCount;
+
+/* Main table */
+
+typedef struct acpi_table_mpst
+{
+ ACPI_TABLE_HEADER Header; /* Common ACPI table header */
+ ACPI_MPST_CHANNEL_INFO /* Platform Communication Channel */
+
+} ACPI_TABLE_MPST;
+
+
+/* Memory Platform Communication Channel Info */
+
+typedef struct acpi_mpst_channel
+{
+ ACPI_MPST_CHANNEL_INFO /* Platform Communication Channel */
+
+} ACPI_MPST_CHANNEL;
+
+
+/* Memory Power Node Structure */
+
+typedef struct acpi_mpst_power_node
+{
+ UINT8 Flags;
+ UINT8 Reserved1;
+ UINT16 NodeId;
+ UINT32 Length;
+ UINT64 RangeAddress;
+ UINT64 RangeLength;
+ UINT8 NumPowerStates;
+ UINT8 NumPhysicalComponents;
+ UINT16 Reserved2;
+
+} ACPI_MPST_POWER_NODE;
+
+/* Values for Flags field above */
+
+#define ACPI_MPST_ENABLED 1
+#define ACPI_MPST_POWER_MANAGED 2
+#define ACPI_MPST_HOT_PLUG_CAPABLE 4
+
+
+/* Memory Power State Structure (follows POWER_NODE above) */
+
+typedef struct acpi_mpst_power_state
+{
+ UINT8 PowerState;
+ UINT8 InfoIndex;
+
+} ACPI_MPST_POWER_STATE;
+
+
+/* Physical Component ID Structure (follows POWER_STATE above) */
+
+typedef struct acpi_mpst_component
+{
+ UINT16 ComponentId;
+
+} ACPI_MPST_COMPONENT;
+
+
+/* Memory Power State Characteristics Structure (follows all POWER_NODEs) */
+
+typedef struct acpi_mpst_data_hdr
+{
+ UINT16 CharacteristicsCount;
+
+} ACPI_MPST_DATA_HDR;
+
+typedef struct acpi_mpst_power_data
+{
+ UINT8 Revision;
+ UINT8 Flags;
+ UINT16 Reserved1;
+ UINT32 AveragePower;
+ UINT32 PowerSaving;
+ UINT64 ExitLatency;
+ UINT64 Reserved2;
+
+} ACPI_MPST_POWER_DATA;
+
+/* Values for Flags field above */
+
+#define ACPI_MPST_PRESERVE 1
+#define ACPI_MPST_AUTOENTRY 2
+#define ACPI_MPST_AUTOEXIT 4
+
+
+/* Shared Memory Region (not part of an ACPI table) */
+
+typedef struct acpi_mpst_shared
+{
+ UINT32 Signature;
+ UINT16 PccCommand;
+ UINT16 PccStatus;
+ UINT16 CommandRegister;
+ UINT16 StatusRegister;
+ UINT16 PowerStateId;
+ UINT16 PowerNodeId;
+ UINT64 EnergyConsumed;
+ UINT64 AveragePower;
+
+} ACPI_MPST_SHARED;
+
+
+/*******************************************************************************
+ *
+ * PCCT - Platform Communications Channel Table (ACPI 5.0)
+ * Version 1
+ *
+ ******************************************************************************/
+
+typedef struct acpi_table_pcct
+{
+ ACPI_TABLE_HEADER Header; /* Common ACPI table header */
+ UINT32 Flags;
+ UINT32 Latency;
+ UINT32 Reserved;
+
+} ACPI_TABLE_PCCT;
+
+/* Values for Flags field above */
+
+#define ACPI_PCCT_DOORBELL 1
+
+/*
+ * PCCT subtables
+ */
+
+/* 0: Generic Communications Subspace */
+
+typedef struct acpi_pcct_subspace
+{
+ ACPI_SUBTABLE_HEADER Header;
+ UINT8 Reserved[6];
+ UINT64 BaseAddress;
+ UINT64 Length;
+ ACPI_GENERIC_ADDRESS DoorbellRegister;
+ UINT64 PreserveMask;
+ UINT64 WriteMask;
+
+} ACPI_PCCT_SUBSPACE;
+
+
+/*
+ * PCC memory structures (not part of the ACPI table)
+ */
+
+/* Shared Memory Region */
+
+typedef struct acpi_pcct_shared_memory
+{
+ UINT32 Signature;
+ UINT16 Command;
+ UINT16 Status;
+
+} ACPI_PCCT_SHARED_MEMORY;
+
+
+/*******************************************************************************
+ *
+ * PMTT - Platform Memory Topology Table (ACPI 5.0)
+ * Version 1
+ *
+ ******************************************************************************/
+
+typedef struct acpi_table_pmtt
+{
+ ACPI_TABLE_HEADER Header; /* Common ACPI table header */
+ UINT32 Reserved;
+
+} ACPI_TABLE_PMTT;
+
+
+/* Common header for PMTT subtables that follow main table */
+
+typedef struct acpi_pmtt_header
+{
+ UINT8 Type;
+ UINT8 Reserved1;
+ UINT16 Length;
+ UINT16 Flags;
+ UINT16 Reserved2;
+
+} ACPI_PMTT_HEADER;
+
+/* Values for Type field above */
+
+#define ACPI_PMTT_TYPE_SOCKET 0
+#define ACPI_PMTT_TYPE_CONTROLLER 1
+#define ACPI_PMTT_TYPE_DIMM 2
+#define ACPI_PMTT_TYPE_RESERVED 3 /* 0x03-0xFF are reserved */
+
+/* Values for Flags field above */
+
+#define ACPI_PMTT_TOP_LEVEL 0x0001
+#define ACPI_PMTT_PHYSICAL 0x0002
+#define ACPI_PMTT_MEMORY_TYPE 0x000C
+
+
+/*
+ * PMTT subtables, correspond to Type in acpi_pmtt_header
+ */
+
+
+/* 0: Socket Structure */
+
+typedef struct acpi_pmtt_socket
+{
+ ACPI_PMTT_HEADER Header;
+ UINT16 SocketId;
+ UINT16 Reserved;
+
+} ACPI_PMTT_SOCKET;
+
+
+/* 1: Memory Controller subtable */
+
+typedef struct acpi_pmtt_controller
+{
+ ACPI_PMTT_HEADER Header;
+ UINT32 ReadLatency;
+ UINT32 WriteLatency;
+ UINT32 ReadBandwidth;
+ UINT32 WriteBandwidth;
+ UINT16 AccessWidth;
+ UINT16 Alignment;
+ UINT16 Reserved;
+ UINT16 DomainCount;
+
+} ACPI_PMTT_CONTROLLER;
+
+/* 1a: Proximity Domain substructure */
+
+typedef struct acpi_pmtt_domain
+{
+ UINT32 ProximityDomain;
+
+} ACPI_PMTT_DOMAIN;
+
+
+/* 2: Physical Component Identifier (DIMM) */
+
+typedef struct acpi_pmtt_physical_component
+{
+ ACPI_PMTT_HEADER Header;
+ UINT16 ComponentId;
+ UINT16 Reserved;
+ UINT32 MemorySize;
+ UINT32 BiosHandle;
+
+} ACPI_PMTT_PHYSICAL_COMPONENT;
+
+
+/*******************************************************************************
+ *
+ * RASF - RAS Feature Table (ACPI 5.0)
+ * Version 1
+ *
+ ******************************************************************************/
+
+typedef struct acpi_table_rasf
+{
+ ACPI_TABLE_HEADER Header; /* Common ACPI table header */
+ UINT8 ChannelId[12];
+
+} ACPI_TABLE_RASF;
+
+/* RASF Platform Communication Channel Shared Memory Region */
+
+typedef struct acpi_rasf_shared_memory
+{
+ UINT32 Signature;
+ UINT16 Command;
+ UINT16 Status;
+ UINT64 RequestedAddress;
+ UINT64 RequestedLength;
+ UINT64 ActualAddress;
+ UINT64 ActualLength;
+ UINT16 Flags;
+ UINT8 Speed;
+
+} ACPI_RASF_SHARED_MEMORY;
+
+/* Masks for Flags and Speed fields above */
+
+#define ACPI_RASF_SCRUBBER_RUNNING 1
+#define ACPI_RASF_SPEED (7<<1)
+
+/* Channel Commands */
+
+enum AcpiRasfCommands
+{
+ ACPI_RASF_GET_RAS_CAPABILITIES = 1,
+ ACPI_RASF_GET_PATROL_PARAMETERS = 2,
+ ACPI_RASF_START_PATROL_SCRUBBER = 3,
+ ACPI_RASF_STOP_PATROL_SCRUBBER = 4
+};
+
+/* Channel Command flags */
+
+#define ACPI_RASF_GENERATE_SCI (1<<15)
+
+/* Status values */
+
+enum AcpiRasfStatus
+{
+ ACPI_RASF_SUCCESS = 0,
+ ACPI_RASF_NOT_VALID = 1,
+ ACPI_RASF_NOT_SUPPORTED = 2,
+ ACPI_RASF_BUSY = 3,
+ ACPI_RASF_FAILED = 4,
+ ACPI_RASF_ABORTED = 5,
+ ACPI_RASF_INVALID_DATA = 6
+};
+
+/* Status flags */
+
+#define ACPI_RASF_COMMAND_COMPLETE (1)
+#define ACPI_RASF_SCI_DOORBELL (1<<1)
+#define ACPI_RASF_ERROR (1<<2)
+#define ACPI_RASF_STATUS (0x1F<<3)
+
+
+/* Reset to default packing */
+
+#pragma pack()
+
+#endif /* __ACTBL3_H__ */
diff --git a/usr/src/uts/intel/sys/acpi/actypes.h b/usr/src/uts/intel/sys/acpi/actypes.h
index 095e589607..4a5188544e 100644
--- a/usr/src/uts/intel/sys/acpi/actypes.h
+++ b/usr/src/uts/intel/sys/acpi/actypes.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -520,6 +520,13 @@ typedef UINT64 ACPI_INTEGER;
#define ACPI_SLEEP_TYPE_INVALID 0xFF
/*
+ * Sleep/Wake flags
+ */
+#define ACPI_NO_OPTIONAL_METHODS 0x00 /* Do not execute any optional methods */
+#define ACPI_EXECUTE_GTS 0x01 /* For enter sleep interface */
+#define ACPI_EXECUTE_BFS 0x02 /* For leave sleep prep interface */
+
+/*
* Standard notify values
*/
#define ACPI_NOTIFY_BUS_CHECK (UINT8) 0x00
@@ -534,8 +541,9 @@ typedef UINT64 ACPI_INTEGER;
#define ACPI_NOTIFY_DEVICE_PLD_CHECK (UINT8) 0x09
#define ACPI_NOTIFY_RESERVED (UINT8) 0x0A
#define ACPI_NOTIFY_LOCALITY_UPDATE (UINT8) 0x0B
+#define ACPI_NOTIFY_SHUTDOWN_REQUEST (UINT8) 0x0C
-#define ACPI_NOTIFY_MAX 0x0B
+#define ACPI_NOTIFY_MAX 0x0C
/*
* Types associated with ACPI names and objects. The first group of
@@ -700,8 +708,13 @@ typedef UINT32 ACPI_EVENT_STATUS;
#define ACPI_DEVICE_NOTIFY 0x2
#define ACPI_ALL_NOTIFY (ACPI_SYSTEM_NOTIFY | ACPI_DEVICE_NOTIFY)
#define ACPI_MAX_NOTIFY_HANDLER_TYPE 0x3
+#define ACPI_NUM_NOTIFY_TYPES 2
+
+#define ACPI_MAX_SYS_NOTIFY 0x7F
+#define ACPI_MAX_DEVICE_SPECIFIC_NOTIFY 0xBF
-#define ACPI_MAX_SYS_NOTIFY 0x7f
+#define ACPI_SYSTEM_HANDLER_LIST 0 /* Used as index, must be SYSTEM_NOTIFY -1 */
+#define ACPI_DEVICE_HANDLER_LIST 1 /* Used as index, must be DEVICE_NOTIFY -1 */
/* Address Space (Operation Region) Types */
@@ -716,8 +729,10 @@ typedef UINT8 ACPI_ADR_SPACE_TYPE;
#define ACPI_ADR_SPACE_CMOS (ACPI_ADR_SPACE_TYPE) 5
#define ACPI_ADR_SPACE_PCI_BAR_TARGET (ACPI_ADR_SPACE_TYPE) 6
#define ACPI_ADR_SPACE_IPMI (ACPI_ADR_SPACE_TYPE) 7
+#define ACPI_ADR_SPACE_GPIO (ACPI_ADR_SPACE_TYPE) 8
+#define ACPI_ADR_SPACE_GSBUS (ACPI_ADR_SPACE_TYPE) 9
-#define ACPI_NUM_PREDEFINED_REGIONS 8
+#define ACPI_NUM_PREDEFINED_REGIONS 10
/*
* Special Address Spaces
@@ -790,6 +805,20 @@ typedef UINT8 ACPI_ADR_SPACE_TYPE;
#define ACPI_DISABLE_EVENT 0
+/* Sleep function dispatch */
+
+typedef ACPI_STATUS (*ACPI_SLEEP_FUNCTION) (
+ UINT8 SleepState,
+ UINT8 Flags);
+
+typedef struct acpi_sleep_functions
+{
+ ACPI_SLEEP_FUNCTION LegacyFunction;
+ ACPI_SLEEP_FUNCTION ExtendedFunction;
+
+} ACPI_SLEEP_FUNCTIONS;
+
+
/*
* External ACPI object definition
*/
@@ -1030,6 +1059,17 @@ ACPI_STATUS (*ACPI_ADR_SPACE_HANDLER) (
#define ACPI_DEFAULT_HANDLER NULL
+/* Special Context data for GenericSerialBus/GeneralPurposeIo (ACPI 5.0) */
+
+typedef struct acpi_connection_info
+{
+ UINT8 *Connection;
+ UINT16 Length;
+ UINT8 AccessLength;
+
+} ACPI_CONNECTION_INFO;
+
+
typedef
ACPI_STATUS (*ACPI_ADR_SPACE_SETUP) (
ACPI_HANDLE RegionHandle,
diff --git a/usr/src/uts/intel/sys/acpi/acutils.h b/usr/src/uts/intel/sys/acpi/acutils.h
index 489f195eb9..e9c9b95dda 100644
--- a/usr/src/uts/intel/sys/acpi/acutils.h
+++ b/usr/src/uts/intel/sys/acpi/acutils.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -46,6 +46,7 @@
extern const UINT8 AcpiGbl_ResourceAmlSizes[];
+extern const UINT8 AcpiGbl_ResourceAmlSerialBusSizes[];
/* Strings used by the disassembler and debugger resource dump routines */
@@ -69,6 +70,22 @@ extern const char *AcpiGbl_SizDecode[];
extern const char *AcpiGbl_TrsDecode[];
extern const char *AcpiGbl_TtpDecode[];
extern const char *AcpiGbl_TypDecode[];
+extern const char *AcpiGbl_PpcDecode[];
+extern const char *AcpiGbl_IorDecode[];
+extern const char *AcpiGbl_DtsDecode[];
+extern const char *AcpiGbl_CtDecode[];
+extern const char *AcpiGbl_SbtDecode[];
+extern const char *AcpiGbl_AmDecode[];
+extern const char *AcpiGbl_SmDecode[];
+extern const char *AcpiGbl_WmDecode[];
+extern const char *AcpiGbl_CphDecode[];
+extern const char *AcpiGbl_CpoDecode[];
+extern const char *AcpiGbl_DpDecode[];
+extern const char *AcpiGbl_EdDecode[];
+extern const char *AcpiGbl_BpbDecode[];
+extern const char *AcpiGbl_SbDecode[];
+extern const char *AcpiGbl_FcDecode[];
+extern const char *AcpiGbl_PtDecode[];
#endif
/* Types for Resource descriptor entries */
@@ -113,7 +130,6 @@ typedef struct acpi_pkg_info
#define DB_DWORD_DISPLAY 4
#define DB_QWORD_DISPLAY 8
-
/*
* utglobal - Global data structures and procedures
*/
@@ -722,6 +738,11 @@ void
AcpiUtStrlwr (
char *SrcString);
+int
+AcpiUtStricmp (
+ char *String1,
+ char *String2);
+
void
AcpiUtPrintString (
char *String,
@@ -897,6 +918,31 @@ AcpiUtCreateList (
#endif /* ACPI_DBG_TRACK_ALLOCATIONS */
+/*
+ * utaddress - address range check
+ */
+ACPI_STATUS
+AcpiUtAddAddressRange (
+ ACPI_ADR_SPACE_TYPE SpaceId,
+ ACPI_PHYSICAL_ADDRESS Address,
+ UINT32 Length,
+ ACPI_NAMESPACE_NODE *RegionNode);
+
+void
+AcpiUtRemoveAddressRange (
+ ACPI_ADR_SPACE_TYPE SpaceId,
+ ACPI_NAMESPACE_NODE *RegionNode);
+
+UINT32
+AcpiUtCheckAddressRange (
+ ACPI_ADR_SPACE_TYPE SpaceId,
+ ACPI_PHYSICAL_ADDRESS Address,
+ UINT32 Length,
+ BOOLEAN Warn);
+
+void
+AcpiUtDeleteAddressLists (
+ void);
/*
* utxferror - various error/warning output functions
diff --git a/usr/src/uts/intel/sys/acpi/amlcode.h b/usr/src/uts/intel/sys/acpi/amlcode.h
index f6e0de3b35..f778bb0780 100644
--- a/usr/src/uts/intel/sys/acpi/amlcode.h
+++ b/usr/src/uts/intel/sys/acpi/amlcode.h
@@ -7,7 +7,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -191,6 +191,15 @@
/*
+ * Opcodes for "Field" operators
+ */
+#define AML_FIELD_OFFSET_OP (UINT8) 0x00
+#define AML_FIELD_ACCESS_OP (UINT8) 0x01
+#define AML_FIELD_CONNECTION_OP (UINT8) 0x02 /* ACPI 5.0 */
+#define AML_FIELD_EXT_ACCESS_OP (UINT8) 0x03 /* ACPI 5.0 */
+
+
+/*
* Internal opcodes
* Use only "Unknown" AML opcodes, don't attempt to use
* any valid ACPI ASCII values (A-Z, 0-9, '-')
@@ -204,7 +213,8 @@
#define AML_INT_METHODCALL_OP (UINT16) 0x0035
#define AML_INT_RETURN_VALUE_OP (UINT16) 0x0036
#define AML_INT_EVAL_SUBTREE_OP (UINT16) 0x0037
-
+#define AML_INT_CONNECTION_OP (UINT16) 0x0038
+#define AML_INT_EXTACCESSFIELD_OP (UINT16) 0x0039
#define ARG_NONE 0x0
@@ -478,13 +488,16 @@ typedef enum
*/
typedef enum
{
- AML_FIELD_ATTRIB_SMB_QUICK = 0x02,
- AML_FIELD_ATTRIB_SMB_SEND_RCV = 0x04,
- AML_FIELD_ATTRIB_SMB_BYTE = 0x06,
- AML_FIELD_ATTRIB_SMB_WORD = 0x08,
- AML_FIELD_ATTRIB_SMB_BLOCK = 0x0A,
- AML_FIELD_ATTRIB_SMB_WORD_CALL = 0x0C,
- AML_FIELD_ATTRIB_SMB_BLOCK_CALL = 0x0D
+ AML_FIELD_ATTRIB_QUICK = 0x02,
+ AML_FIELD_ATTRIB_SEND_RCV = 0x04,
+ AML_FIELD_ATTRIB_BYTE = 0x06,
+ AML_FIELD_ATTRIB_WORD = 0x08,
+ AML_FIELD_ATTRIB_BLOCK = 0x0A,
+ AML_FIELD_ATTRIB_MULTIBYTE = 0x0B,
+ AML_FIELD_ATTRIB_WORD_CALL = 0x0C,
+ AML_FIELD_ATTRIB_BLOCK_CALL = 0x0D,
+ AML_FIELD_ATTRIB_RAW_BYTES = 0x0E,
+ AML_FIELD_ATTRIB_RAW_PROCESS = 0x0F
} AML_ACCESS_ATTRIBUTE;
diff --git a/usr/src/uts/intel/sys/acpi/amlresrc.h b/usr/src/uts/intel/sys/acpi/amlresrc.h
index 1375d96cf8..6d85b8a2d0 100644
--- a/usr/src/uts/intel/sys/acpi/amlresrc.h
+++ b/usr/src/uts/intel/sys/acpi/amlresrc.h
@@ -6,7 +6,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -59,29 +59,48 @@
#define ACPI_RESTAG_TYPESPECIFICATTRIBUTES "_ATT"
#define ACPI_RESTAG_BASEADDRESS "_BAS"
#define ACPI_RESTAG_BUSMASTER "_BM_" /* Master(1), Slave(0) */
+#define ACPI_RESTAG_DEBOUNCETIME "_DBT"
#define ACPI_RESTAG_DECODE "_DEC"
+#define ACPI_RESTAG_DEVICEPOLARITY "_DPL"
#define ACPI_RESTAG_DMA "_DMA"
#define ACPI_RESTAG_DMATYPE "_TYP" /* Compatible(0), A(1), B(2), F(3) */
+#define ACPI_RESTAG_DRIVESTRENGTH "_DRS"
+#define ACPI_RESTAG_ENDIANNESS "_END"
+#define ACPI_RESTAG_FLOWCONTROL "_FLC"
#define ACPI_RESTAG_GRANULARITY "_GRA"
#define ACPI_RESTAG_INTERRUPT "_INT"
#define ACPI_RESTAG_INTERRUPTLEVEL "_LL_" /* ActiveLo(1), ActiveHi(0) */
#define ACPI_RESTAG_INTERRUPTSHARE "_SHR" /* Shareable(1), NoShare(0) */
#define ACPI_RESTAG_INTERRUPTTYPE "_HE_" /* Edge(1), Level(0) */
+#define ACPI_RESTAG_IORESTRICTION "_IOR"
#define ACPI_RESTAG_LENGTH "_LEN"
+#define ACPI_RESTAG_LINE "_LIN"
#define ACPI_RESTAG_MEMATTRIBUTES "_MTP" /* Memory(0), Reserved(1), ACPI(2), NVS(3) */
#define ACPI_RESTAG_MEMTYPE "_MEM" /* NonCache(0), Cacheable(1) Cache+combine(2), Cache+prefetch(3) */
#define ACPI_RESTAG_MAXADDR "_MAX"
#define ACPI_RESTAG_MINADDR "_MIN"
#define ACPI_RESTAG_MAXTYPE "_MAF"
#define ACPI_RESTAG_MINTYPE "_MIF"
+#define ACPI_RESTAG_MODE "_MOD"
+#define ACPI_RESTAG_PARITY "_PAR"
+#define ACPI_RESTAG_PHASE "_PHA"
+#define ACPI_RESTAG_PIN "_PIN"
+#define ACPI_RESTAG_PINCONFIG "_PPI"
+#define ACPI_RESTAG_POLARITY "_POL"
#define ACPI_RESTAG_REGISTERBITOFFSET "_RBO"
#define ACPI_RESTAG_REGISTERBITWIDTH "_RBW"
#define ACPI_RESTAG_RANGETYPE "_RNG"
#define ACPI_RESTAG_READWRITETYPE "_RW_" /* ReadOnly(0), Writeable (1) */
+#define ACPI_RESTAG_LENGTH_RX "_RXL"
+#define ACPI_RESTAG_LENGTH_TX "_TXL"
+#define ACPI_RESTAG_SLAVEMODE "_SLV"
+#define ACPI_RESTAG_SPEED "_SPE"
+#define ACPI_RESTAG_STOPBITS "_STB"
#define ACPI_RESTAG_TRANSLATION "_TRA"
#define ACPI_RESTAG_TRANSTYPE "_TRS" /* Sparse(1), Dense(0) */
#define ACPI_RESTAG_TYPE "_TTP" /* Translation(1), Static (0) */
#define ACPI_RESTAG_XFERTYPE "_SIZ" /* 8(0), 8And16(1), 16(2) */
+#define ACPI_RESTAG_VENDORDATA "_VEN"
/* Default sizes for "small" resource descriptors */
@@ -92,6 +111,7 @@
#define ASL_RDESC_END_DEPEND_SIZE 0x00
#define ASL_RDESC_IO_SIZE 0x07
#define ASL_RDESC_FIXED_IO_SIZE 0x03
+#define ASL_RDESC_FIXED_DMA_SIZE 0x05
#define ASL_RDESC_END_TAG_SIZE 0x01
@@ -214,6 +234,16 @@ typedef struct aml_resource_end_tag
} AML_RESOURCE_END_TAG;
+typedef struct aml_resource_fixed_dma
+{
+ AML_RESOURCE_SMALL_HEADER_COMMON
+ UINT16 RequestLines;
+ UINT16 Channels;
+ UINT8 Width;
+
+} AML_RESOURCE_FIXED_DMA;
+
+
/*
* LARGE descriptors
*/
@@ -368,6 +398,130 @@ typedef struct aml_resource_generic_register
} AML_RESOURCE_GENERIC_REGISTER;
+
+/* Common descriptor for GpioInt and GpioIo (ACPI 5.0) */
+
+typedef struct aml_resource_gpio
+{
+ AML_RESOURCE_LARGE_HEADER_COMMON
+ UINT8 RevisionId;
+ UINT8 ConnectionType;
+ UINT16 Flags;
+ UINT16 IntFlags;
+ UINT8 PinConfig;
+ UINT16 DriveStrength;
+ UINT16 DebounceTimeout;
+ UINT16 PinTableOffset;
+ UINT8 ResSourceIndex;
+ UINT16 ResSourceOffset;
+ UINT16 VendorOffset;
+ UINT16 VendorLength;
+ /*
+ * Optional fields follow immediately:
+ * 1) PIN list (Words)
+ * 2) Resource Source String
+ * 3) Vendor Data bytes
+ */
+
+} AML_RESOURCE_GPIO;
+
+#define AML_RESOURCE_GPIO_REVISION 1 /* ACPI 5.0 */
+
+/* Values for ConnectionType above */
+
+#define AML_RESOURCE_GPIO_TYPE_INT 0
+#define AML_RESOURCE_GPIO_TYPE_IO 1
+#define AML_RESOURCE_MAX_GPIOTYPE 1
+
+
+/* Common preamble for all serial descriptors (ACPI 5.0) */
+
+#define AML_RESOURCE_SERIAL_COMMON \
+ UINT8 RevisionId; \
+ UINT8 ResSourceIndex; \
+ UINT8 Type; \
+ UINT8 Flags; \
+ UINT16 TypeSpecificFlags; \
+ UINT8 TypeRevisionId; \
+ UINT16 TypeDataLength; \
+
+/* Values for the type field above */
+
+#define AML_RESOURCE_I2C_SERIALBUSTYPE 1
+#define AML_RESOURCE_SPI_SERIALBUSTYPE 2
+#define AML_RESOURCE_UART_SERIALBUSTYPE 3
+#define AML_RESOURCE_MAX_SERIALBUSTYPE 3
+#define AML_RESOURCE_VENDOR_SERIALBUSTYPE 192 /* Vendor defined is 0xC0-0xFF (NOT SUPPORTED) */
+
+typedef struct aml_resource_common_serialbus
+{
+ AML_RESOURCE_LARGE_HEADER_COMMON
+ AML_RESOURCE_SERIAL_COMMON
+
+} AML_RESOURCE_COMMON_SERIALBUS;
+
+typedef struct aml_resource_i2c_serialbus
+{
+ AML_RESOURCE_LARGE_HEADER_COMMON
+ AML_RESOURCE_SERIAL_COMMON
+ UINT32 ConnectionSpeed;
+ UINT16 SlaveAddress;
+ /*
+ * Optional fields follow immediately:
+ * 1) Vendor Data bytes
+ * 2) Resource Source String
+ */
+
+} AML_RESOURCE_I2C_SERIALBUS;
+
+#define AML_RESOURCE_I2C_REVISION 1 /* ACPI 5.0 */
+#define AML_RESOURCE_I2C_TYPE_REVISION 1 /* ACPI 5.0 */
+#define AML_RESOURCE_I2C_MIN_DATA_LEN 6
+
+typedef struct aml_resource_spi_serialbus
+{
+ AML_RESOURCE_LARGE_HEADER_COMMON
+ AML_RESOURCE_SERIAL_COMMON
+ UINT32 ConnectionSpeed;
+ UINT8 DataBitLength;
+ UINT8 ClockPhase;
+ UINT8 ClockPolarity;
+ UINT16 DeviceSelection;
+ /*
+ * Optional fields follow immediately:
+ * 1) Vendor Data bytes
+ * 2) Resource Source String
+ */
+
+} AML_RESOURCE_SPI_SERIALBUS;
+
+#define AML_RESOURCE_SPI_REVISION 1 /* ACPI 5.0 */
+#define AML_RESOURCE_SPI_TYPE_REVISION 1 /* ACPI 5.0 */
+#define AML_RESOURCE_SPI_MIN_DATA_LEN 9
+
+
+typedef struct aml_resource_uart_serialbus
+{
+ AML_RESOURCE_LARGE_HEADER_COMMON
+ AML_RESOURCE_SERIAL_COMMON
+ UINT32 DefaultBaudRate;
+ UINT16 RxFifoSize;
+ UINT16 TxFifoSize;
+ UINT8 Parity;
+ UINT8 LinesEnabled;
+ /*
+ * Optional fields follow immediately:
+ * 1) Vendor Data bytes
+ * 2) Resource Source String
+ */
+
+} AML_RESOURCE_UART_SERIALBUS;
+
+#define AML_RESOURCE_UART_REVISION 1 /* ACPI 5.0 */
+#define AML_RESOURCE_UART_TYPE_REVISION 1 /* ACPI 5.0 */
+#define AML_RESOURCE_UART_MIN_DATA_LEN 10
+
+
/* restore default alignment */
#pragma pack()
@@ -390,6 +544,7 @@ typedef union aml_resource
AML_RESOURCE_END_DEPENDENT EndDpf;
AML_RESOURCE_IO Io;
AML_RESOURCE_FIXED_IO FixedIo;
+ AML_RESOURCE_FIXED_DMA FixedDma;
AML_RESOURCE_VENDOR_SMALL VendorSmall;
AML_RESOURCE_END_TAG EndTag;
@@ -405,6 +560,11 @@ typedef union aml_resource
AML_RESOURCE_ADDRESS64 Address64;
AML_RESOURCE_EXTENDED_ADDRESS64 ExtAddress64;
AML_RESOURCE_EXTENDED_IRQ ExtendedIrq;
+ AML_RESOURCE_GPIO Gpio;
+ AML_RESOURCE_I2C_SERIALBUS I2cSerialBus;
+ AML_RESOURCE_SPI_SERIALBUS SpiSerialBus;
+ AML_RESOURCE_UART_SERIALBUS UartSerialBus;
+ AML_RESOURCE_COMMON_SERIALBUS CommonSerialBus;
/* Utility overlays */
diff --git a/usr/src/uts/intel/sys/acpi/platform/accygwin.h b/usr/src/uts/intel/sys/acpi/platform/accygwin.h
index 10ae3e83b7..831f313987 100644
--- a/usr/src/uts/intel/sys/acpi/platform/accygwin.h
+++ b/usr/src/uts/intel/sys/acpi/platform/accygwin.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/acpi/platform/acefi.h b/usr/src/uts/intel/sys/acpi/platform/acefi.h
index 9249ae8d24..bab6159381 100644
--- a/usr/src/uts/intel/sys/acpi/platform/acefi.h
+++ b/usr/src/uts/intel/sys/acpi/platform/acefi.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/acpi/platform/acenv.h b/usr/src/uts/intel/sys/acpi/platform/acenv.h
index 7f893c13ff..19ca03baac 100644
--- a/usr/src/uts/intel/sys/acpi/platform/acenv.h
+++ b/usr/src/uts/intel/sys/acpi/platform/acenv.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -79,7 +79,7 @@
#define ACPI_SINGLE_THREADED
#endif
-/* AcpiExec and AcpiBin configuration */
+/* AcpiExec configuration. Multithreaded with full AML debugger */
#ifdef ACPI_EXEC_APP
#define ACPI_APPLICATION
@@ -88,7 +88,27 @@
#define ACPI_DBG_TRACK_ALLOCATIONS
#endif
-#ifdef ACPI_BIN_APP
+/* AcpiNames configuration. Single threaded with debugger output enabled. */
+
+#ifdef ACPI_NAMES_APP
+#define ACPI_DEBUGGER
+#define ACPI_APPLICATION
+#define ACPI_SINGLE_THREADED
+#endif
+
+/*
+ * AcpiBin/AcpiHelp/AcpiSrc configuration. All single threaded, with
+ * no debug output.
+ */
+#if (defined ACPI_BIN_APP) || \
+ (defined ACPI_SRC_APP) || \
+ (defined ACPI_XTRACT_APP)
+#define ACPI_APPLICATION
+#define ACPI_SINGLE_THREADED
+#endif
+
+#ifdef ACPI_HELP_APP
+#define ACPI_DEBUG_OUTPUT
#define ACPI_APPLICATION
#define ACPI_SINGLE_THREADED
#endif
diff --git a/usr/src/uts/intel/sys/acpi/platform/acfreebsd.h b/usr/src/uts/intel/sys/acpi/platform/acfreebsd.h
index ad106ecf5e..98f972f373 100644
--- a/usr/src/uts/intel/sys/acpi/platform/acfreebsd.h
+++ b/usr/src/uts/intel/sys/acpi/platform/acfreebsd.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/acpi/platform/acgcc.h b/usr/src/uts/intel/sys/acpi/platform/acgcc.h
index 4b197ff63a..d2eafff395 100644
--- a/usr/src/uts/intel/sys/acpi/platform/acgcc.h
+++ b/usr/src/uts/intel/sys/acpi/platform/acgcc.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/acpi/platform/acintel.h b/usr/src/uts/intel/sys/acpi/platform/acintel.h
index cf7e0bc224..46d238bedb 100644
--- a/usr/src/uts/intel/sys/acpi/platform/acintel.h
+++ b/usr/src/uts/intel/sys/acpi/platform/acintel.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/acpi/platform/aclinux.h b/usr/src/uts/intel/sys/acpi/platform/aclinux.h
index 3fc3d97cb8..edfff150ed 100644
--- a/usr/src/uts/intel/sys/acpi/platform/aclinux.h
+++ b/usr/src/uts/intel/sys/acpi/platform/aclinux.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -87,17 +87,18 @@
/* Host-dependent types and defines for user-space ACPICA */
#define ACPI_FLUSH_CPU_CACHE()
-#define ACPI_CAST_PTHREAD_T(pthread) ((ACPI_THREAD_ID) (pthread))
#if defined(__ia64__) || defined(__x86_64__)
#define ACPI_MACHINE_WIDTH 64
#define COMPILER_DEPENDENT_INT64 long
#define COMPILER_DEPENDENT_UINT64 unsigned long
+#define ACPI_CAST_PTHREAD_T(pthread) ((ACPI_THREAD_ID) (pthread))
#else
#define ACPI_MACHINE_WIDTH 32
#define COMPILER_DEPENDENT_INT64 long long
#define COMPILER_DEPENDENT_UINT64 unsigned long long
#define ACPI_USE_NATIVE_DIVIDE
+#define ACPI_CAST_PTHREAD_T(pthread) ((ACPI_THREAD_ID) (UINT32) (void *) (pthread))
#endif
#ifndef __cdecl
diff --git a/usr/src/uts/intel/sys/acpi/platform/acmsvc.h b/usr/src/uts/intel/sys/acpi/platform/acmsvc.h
index 8590d95fd0..b73a62caf1 100644
--- a/usr/src/uts/intel/sys/acpi/platform/acmsvc.h
+++ b/usr/src/uts/intel/sys/acpi/platform/acmsvc.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -44,6 +44,28 @@
#ifndef __ACMSVC_H__
#define __ACMSVC_H__
+
+/*
+ * Map low I/O functions for MS. This allows us to disable MS language
+ * extensions for maximum portability.
+ */
+#define open _open
+#define read _read
+#define write _write
+#define close _close
+#define stat _stat
+#define fstat _fstat
+#define mkdir _mkdir
+#define strlwr _strlwr
+#define O_RDONLY _O_RDONLY
+#define O_BINARY _O_BINARY
+#define O_CREAT _O_CREAT
+#define O_WRONLY _O_WRONLY
+#define O_TRUNC _O_TRUNC
+#define S_IREAD _S_IREAD
+#define S_IWRITE _S_IWRITE
+#define S_IFDIR _S_IFDIR
+
/* Eliminate warnings for "old" (non-secure) versions of clib functions */
#ifndef _CRT_SECURE_NO_WARNINGS
diff --git a/usr/src/uts/intel/sys/acpi/platform/acnetbsd.h b/usr/src/uts/intel/sys/acpi/platform/acnetbsd.h
index 7c7a63ec94..23b0e8c4c3 100644
--- a/usr/src/uts/intel/sys/acpi/platform/acnetbsd.h
+++ b/usr/src/uts/intel/sys/acpi/platform/acnetbsd.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/acpi/platform/acos2.h b/usr/src/uts/intel/sys/acpi/platform/acos2.h
index f01d697e31..0704c50161 100644
--- a/usr/src/uts/intel/sys/acpi/platform/acos2.h
+++ b/usr/src/uts/intel/sys/acpi/platform/acos2.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/acpi/platform/acwin.h b/usr/src/uts/intel/sys/acpi/platform/acwin.h
index 1355827ccb..c7d4bcd386 100644
--- a/usr/src/uts/intel/sys/acpi/platform/acwin.h
+++ b/usr/src/uts/intel/sys/acpi/platform/acwin.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/acpi/platform/acwin64.h b/usr/src/uts/intel/sys/acpi/platform/acwin64.h
index b4d184294c..022baf050e 100644
--- a/usr/src/uts/intel/sys/acpi/platform/acwin64.h
+++ b/usr/src/uts/intel/sys/acpi/platform/acwin64.h
@@ -5,7 +5,7 @@
*****************************************************************************/
/*
- * Copyright (C) 2000 - 2011, Intel Corp.
+ * Copyright (C) 2000 - 2012, Intel Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/usr/src/uts/intel/sys/machbrand.h b/usr/src/uts/intel/sys/machbrand.h
index 3f9ebdb6b7..2e77eb845b 100644
--- a/usr/src/uts/intel/sys/machbrand.h
+++ b/usr/src/uts/intel/sys/machbrand.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_MACHBRAND_H
@@ -35,17 +36,21 @@ extern "C" {
struct brand_mach_ops {
void (*b_sysenter)(void);
+ void (*b_int80)(void);
void (*b_int91)(void);
void (*b_syscall)(void);
void (*b_syscall32)(void);
+ greg_t (*b_fixsegreg)(greg_t, model_t);
+ uintptr_t (*b_fsbase)(klwp_t *, uintptr_t);
};
#endif /* _ASM */
#define BRAND_CB_SYSENTER 0
-#define BRAND_CB_INT91 1
-#define BRAND_CB_SYSCALL 2
-#define BRAND_CB_SYSCALL32 3
+#define BRAND_CB_INT80 1
+#define BRAND_CB_INT91 2
+#define BRAND_CB_SYSCALL 3
+#define BRAND_CB_SYSCALL32 4
#ifdef __cplusplus
}
diff --git a/usr/src/uts/intel/sys/machlock.h b/usr/src/uts/intel/sys/machlock.h
index d740bc6212..e362ca9aba 100644
--- a/usr/src/uts/intel/sys/machlock.h
+++ b/usr/src/uts/intel/sys/machlock.h
@@ -21,13 +21,12 @@
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
*/
#ifndef _SYS_MACHLOCK_H
#define _SYS_MACHLOCK_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifndef _ASM
#include <sys/types.h>
#include <sys/time.h>
@@ -76,7 +75,7 @@ typedef lock_t disp_lock_t; /* dispatcher lock type */
/*
* Externs for CLOCK_LOCK and clock resolution
*/
-extern volatile int hres_lock;
+extern volatile uint32_t hres_lock;
extern hrtime_t hrtime_base;
extern int clock_res;
diff --git a/usr/src/uts/intel/sys/segments.h b/usr/src/uts/intel/sys/segments.h
index c4b194fcd8..5368f80735 100644
--- a/usr/src/uts/intel/sys/segments.h
+++ b/usr/src/uts/intel/sys/segments.h
@@ -2,7 +2,7 @@
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
- * Copyright 2011 Joyent, Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
*/
#ifndef _SYS_SEGMENTS_H
@@ -522,6 +522,7 @@ void init_boot_gdt(user_desc_t *);
#define GDT_B16DATA 4 /* bios call 16 bit data descriptor */
#define GDT_B64CODE 5 /* dboot 64 bit code descriptor */
#define GDT_BGSTMP 7 /* kmdb descriptor only used early in boot */
+#define GDT_CPUID 16 /* store numeric id of current CPU */
#if defined(__amd64)
@@ -530,8 +531,8 @@ void init_boot_gdt(user_desc_t *);
#define GDT_U32CODE 8 /* 32-bit process on 64-bit kernel %cs */
#define GDT_UDATA 9 /* user data seg %ds (32 and 64 bit) */
#define GDT_UCODE 10 /* native user code seg %cs */
-#define GDT_LDT 12 /* LDT for current process */
-#define GDT_KTSS 14 /* kernel tss */
+#define GDT_LDT 12 /* (12-13) LDT for current process */
+#define GDT_KTSS 14 /* (14-15) kernel tss */
#define GDT_FS GDT_NULL /* kernel %fs segment selector */
#define GDT_GS GDT_NULL /* kernel %gs segment selector */
#define GDT_LWPFS 55 /* lwp private %fs segment selector (32-bit) */
@@ -683,6 +684,8 @@ extern void _start(), cmnint();
extern void achktrap(), mcetrap();
extern void xmtrap();
extern void fasttrap();
+extern void sys_int80();
+extern void brand_sys_int80();
extern void dtrace_ret();
#if !defined(__amd64)
diff --git a/usr/src/uts/intel/sys/ucontext.h b/usr/src/uts/intel/sys/ucontext.h
index 5f64fa9393..adbb01a301 100644
--- a/usr/src/uts/intel/sys/ucontext.h
+++ b/usr/src/uts/intel/sys/ucontext.h
@@ -20,6 +20,7 @@
*/
/*
+ * Copyright 2015 Joyent, Inc.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
@@ -84,9 +85,16 @@ struct __ucontext {
sigset_t uc_sigmask;
stack_t uc_stack;
mcontext_t uc_mcontext;
- long uc_filler[5]; /* see ABI spec for Intel386 */
+ /*
+ * The Intel386 ABI specification includes a 5-element array of longs
+ * called "uc_filler", padding the size of the struct to 512 bytes. To
+ * allow zone brands to communicate extra data right the way through
+ * the signal handling process, from sigacthandler to setcontext, we
+ * steal the first three of these longs as a brand-private member.
+ */
+ void *uc_brand_data[3];
+ long uc_filler[2];
};
-
#if defined(_SYSCALL32)
/* Kernel view of user ILP32 ucontext structure */
@@ -97,7 +105,8 @@ typedef struct ucontext32 {
sigset_t uc_sigmask;
stack32_t uc_stack;
mcontext32_t uc_mcontext;
- int32_t uc_filler[5];
+ caddr32_t uc_brand_data[3];
+ int32_t uc_filler[2];
} ucontext32_t;
#if defined(_KERNEL)
diff --git a/usr/src/uts/intel/sys/x86_archext.h b/usr/src/uts/intel/sys/x86_archext.h
index 4aedd08a53..1ea68e0184 100644
--- a/usr/src/uts/intel/sys/x86_archext.h
+++ b/usr/src/uts/intel/sys/x86_archext.h
@@ -28,7 +28,7 @@
* All rights reserved.
*/
/*
- * Copyright (c) 2015, Joyent, Inc.
+ * Copyright 2016 Joyent, Inc.
* Copyright 2012 Jens Elkner <jel+illumos@cs.uni-magdeburg.de>
* Copyright 2012 Hans Rosenfeld <rosenfeld@grumpf.hope-2000.org>
* Copyright 2014 Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
@@ -91,7 +91,7 @@ extern "C" {
#define CPUID_INTC_ECX_SSE3 0x00000001 /* Yet more SSE extensions */
#define CPUID_INTC_ECX_PCLMULQDQ 0x00000002 /* PCLMULQDQ insn */
- /* 0x00000004 - reserved */
+#define CPUID_INTC_ECX_DTES64 0x00000004 /* 64-bit DS area */
#define CPUID_INTC_ECX_MON 0x00000008 /* MONITOR/MWAIT */
#define CPUID_INTC_ECX_DSCPL 0x00000010 /* CPL-qualified debug store */
#define CPUID_INTC_ECX_VMX 0x00000020 /* Hardware VM extensions */
@@ -104,15 +104,16 @@ extern "C" {
#define CPUID_INTC_ECX_FMA 0x00001000 /* Fused Multiply Add */
#define CPUID_INTC_ECX_CX16 0x00002000 /* cmpxchg16 */
#define CPUID_INTC_ECX_ETPRD 0x00004000 /* extended task pri messages */
- /* 0x00008000 - reserved */
+#define CPUID_INTC_ECX_PDCM 0x00008000 /* Perf/Debug Capability MSR */
/* 0x00010000 - reserved */
- /* 0x00020000 - reserved */
+#define CPUID_INTC_ECX_PCID 0x00020000 /* process-context ids */
#define CPUID_INTC_ECX_DCA 0x00040000 /* direct cache access */
#define CPUID_INTC_ECX_SSE4_1 0x00080000 /* SSE4.1 insns */
#define CPUID_INTC_ECX_SSE4_2 0x00100000 /* SSE4.2 insns */
#define CPUID_INTC_ECX_X2APIC 0x00200000 /* x2APIC */
#define CPUID_INTC_ECX_MOVBE 0x00400000 /* MOVBE insn */
#define CPUID_INTC_ECX_POPCNT 0x00800000 /* POPCNT insn */
+#define CPUID_INTC_ECX_TSCDL 0x01000000 /* Deadline TSC */
#define CPUID_INTC_ECX_AES 0x02000000 /* AES insns */
#define CPUID_INTC_ECX_XSAVE 0x04000000 /* XSAVE/XRESTOR insns */
#define CPUID_INTC_ECX_OSXSAVE 0x08000000 /* OS supports XSAVE insns */
@@ -170,9 +171,17 @@ extern "C" {
#define CPUID_AMD_ECX_3DNP 0x00000100 /* AMD: 3DNowPrefectch */
#define CPUID_AMD_ECX_OSVW 0x00000200 /* AMD: OSVW */
#define CPUID_AMD_ECX_IBS 0x00000400 /* AMD: IBS */
-#define CPUID_AMD_ECX_SSE5 0x00000800 /* AMD: SSE5 */
+#define CPUID_AMD_ECX_SSE5 0x00000800 /* AMD: Extended AVX */
#define CPUID_AMD_ECX_SKINIT 0x00001000 /* AMD: SKINIT */
#define CPUID_AMD_ECX_WDT 0x00002000 /* AMD: WDT */
+ /* 0x00004000 - reserved */
+#define CPUID_AMD_ECX_LWP 0x00008000 /* AMD: Lightweight profiling */
+#define CPUID_AMD_ECX_FMA4 0x00010000 /* AMD: 4-operand FMA support */
+ /* 0x00020000 - reserved */
+ /* 0x00040000 - reserved */
+#define CPUID_AMD_ECX_NIDMSR 0x00080000 /* AMD: Node ID MSR */
+ /* 0x00100000 - reserved */
+#define CPUID_AMD_ECX_TBM 0x00200000 /* AMD: trailing bit manips. */
#define CPUID_AMD_ECX_TOPOEXT 0x00400000 /* AMD: Topology Extensions */
/*
@@ -195,6 +204,8 @@ extern "C" {
#define CPUID_INTC_EBX_7_0_AVX2 0x00000020 /* AVX2 supported */
#define CPUID_INTC_EBX_7_0_SMEP 0x00000080 /* SMEP in CR4 */
#define CPUID_INTC_EBX_7_0_BMI2 0x00000100 /* BMI2 Instrs */
+#define CPUID_INTC_EBX_7_0_RDSEED 0x00040000 /* RDSEED instr */
+#define CPUID_INTC_EBX_7_0_ADX 0x00080000 /* ADX instrs */
#define CPUID_INTC_EBX_7_0_SMAP 0x00100000 /* SMAP in CR 4 */
#define P5_MCHADDR 0x0
@@ -372,14 +383,8 @@ extern "C" {
#define X86FSET_FMA 44
#define X86FSET_SMEP 45
#define X86FSET_SMAP 46
-
-/*
- * flags to patch tsc_read routine.
- */
-#define X86_NO_TSC 0x0
-#define X86_HAVE_TSCP 0x1
-#define X86_TSC_MFENCE 0x2
-#define X86_TSC_LFENCE 0x4
+#define X86FSET_ADX 47
+#define X86FSET_RDSEED 48
/*
* Intel Deep C-State invariant TSC in leaf 0x80000007.
@@ -632,7 +637,7 @@ extern "C" {
#if defined(_KERNEL) || defined(_KMEMUSER)
-#define NUM_X86_FEATURES 47
+#define NUM_X86_FEATURES 49
extern uchar_t x86_featureset[];
extern void free_x86_featureset(void *featureset);
diff --git a/usr/src/uts/intel/udmf/Makefile b/usr/src/uts/intel/udmf/Makefile
new file mode 100644
index 0000000000..1e9f0e5400
--- /dev/null
+++ b/usr/src/uts/intel/udmf/Makefile
@@ -0,0 +1,74 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = udmf
+OBJECTS = $(UDMF_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(UDMF_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+CPPFLAGS += -I$(UTSBASE)/common/io/usbgem
+CPPFLAGS += -DVERSION=\"2.0.0\"
+CPPFLAGS += -DUSBGEM_CONFIG_GLDv3
+LDFLAGS += -dy -N misc/mac -N drv/ip -N misc/usba -N misc/usbgem
+
+CERRWARN += -_gcc=-Wno-unused-value
+CERRWARN += -_gcc=-Wno-unused-function
+CERRWARN += -_gcc=-Wno-unused-variable
+CERRWARN += -_gcc=-Wno-unused-label
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/upf/Makefile b/usr/src/uts/intel/upf/Makefile
new file mode 100644
index 0000000000..5698051c51
--- /dev/null
+++ b/usr/src/uts/intel/upf/Makefile
@@ -0,0 +1,74 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = upf
+OBJECTS = $(UPF_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(UPF_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+CPPFLAGS += -I$(UTSBASE)/common/io/usbgem
+CPPFLAGS += -DVERSION=\"2.0.1\"
+CPPFLAGS += -DUSBGEM_CONFIG_GLDv3
+LDFLAGS += -dy -N misc/mac -N drv/ip -N misc/usba -N misc/usbgem
+
+CERRWARN += -_gcc=-Wno-type-limits
+CERRWARN += -_gcc=-Wno-unused-function
+CERRWARN += -_gcc=-Wno-unused-variable
+CERRWARN += -_gcc=-Wno-unused-label
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/urf/Makefile b/usr/src/uts/intel/urf/Makefile
new file mode 100644
index 0000000000..947cdac9e5
--- /dev/null
+++ b/usr/src/uts/intel/urf/Makefile
@@ -0,0 +1,74 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = urf
+OBJECTS = $(URF_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(URF_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+CPPFLAGS += -I$(UTSBASE)/common/io/usbgem
+CPPFLAGS += -DVERSION=\"2.0.0\"
+CPPFLAGS += -DUSBGEM_CONFIG_GLDv3
+CERRWARN += -_gcc=-Wno-uninitialized
+LDFLAGS += -dy -N misc/mac -N drv/ip -N misc/usba -N misc/usbgem
+
+CERRWARN += -_gcc=-Wno-unused-function
+CERRWARN += -_gcc=-Wno-unused-variable
+CERRWARN += -_gcc=-Wno-unused-label
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/usbgem/Makefile b/usr/src/uts/intel/usbgem/Makefile
new file mode 100644
index 0000000000..4465fd6c92
--- /dev/null
+++ b/usr/src/uts/intel/usbgem/Makefile
@@ -0,0 +1,84 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = usbgem
+OBJECTS = $(USBGEM_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(USBGEM_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_MISC_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# The USBGEM has support for various different features. We use
+# these pre-processor macros to define the set we care about.
+#
+CPPFLAGS += \
+ -DUSBGEM_CONFIG_GLDv3 \
+ -DMODULE \
+ -DGEN_CONFIG_FMA \
+ -DUSBGEM_CONFIG_MAC_PROP \
+ -DVERSION=\"1.6\"
+
+CERRWARN += -_gcc=-Wno-uninitialized
+CERRWARN += -_gcc=-Wno-parentheses
+CERRWARN += -_gcc=-Wno-switch
+CERRWARN += -_gcc=-Wno-unused-variable
+CERRWARN += -_gcc=-Wno-unused-label
+CERRWARN += -_gcc=-Wno-unused-function
+
+LDFLAGS += -dy -N misc/mac -N drv/ip -N misc/usba
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+# Include common targets.
+#
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/vmxnet/Makefile b/usr/src/uts/intel/vmxnet/Makefile
new file mode 100644
index 0000000000..4f3ebcf5af
--- /dev/null
+++ b/usr/src/uts/intel/vmxnet/Makefile
@@ -0,0 +1,93 @@
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2012, Joyent, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# VMware Ethernet Adapter b module
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+UTSCLOSED = ../../../../closed/uts
+
+#
+# Define the module and object file sets.
+#
+MODULE = vmxnet
+#
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/intel/io/vmxnet
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/intel/Makefile.intel
+
+#
+# The list of object files is defined here, rather than in Makefile.files,
+# because the "$(CLOSED_BUILD)" macro has not been defined at the time
+# Makefile.files is processed.
+#
+VMXNET_OBJS += vmxnet.o
+
+OBJECTS = $(VMXNET_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(VMXNET_OBJS:%.o=$(LINTS_DIR)/%.ln)
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(SRC_CONFFILE)
+LINT_TARGET = $(LINT_MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+CPPFLAGS += -I$(UTSBASE)/i86pc
+LDFLAGS += -dy -N misc/gld
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+#
+# Intentionally don't build lint libraries to minimize divergence with
+# the upstream source.
+#
+lint:
+modlintlib:
+clean.lint:
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include ../Makefile.targ
diff --git a/usr/src/uts/intel/vnd/Makefile b/usr/src/uts/intel/vnd/Makefile
new file mode 100644
index 0000000000..f66f89062c
--- /dev/null
+++ b/usr/src/uts/intel/vnd/Makefile
@@ -0,0 +1,56 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+UTSBASE = ../..
+
+MODULE = vnd
+OBJECTS = $(VND_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(VND_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_DRV_DIR)/$(MODULE)
+
+include $(UTSBASE)/intel/Makefile.intel
+
+ALL_TARGET = $(BINARY) $(SRC_CONFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+CONF_SRCDIR = $(UTSBASE)/common/io/vnd
+
+LDFLAGS += -dy -Nmisc/neti -Nmisch/hook -Nfs/dev -Nmisc/gsqueue
+
+#
+# We use <sys/ctype.h> which causes gcc to think that all of its inline
+# functions are defined and unused.
+#
+CERRWARN += -_gcc=-Wno-unused-function
+
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/vxlan/Makefile b/usr/src/uts/intel/vxlan/Makefile
new file mode 100644
index 0000000000..89a24c17f5
--- /dev/null
+++ b/usr/src/uts/intel/vxlan/Makefile
@@ -0,0 +1,51 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+UTSBASE = ../..
+
+MODULE = vxlan
+OBJECTS = $(OVERLAY_VXLAN_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(OVERLAY_VXLAN_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_OVERLAY_DIR)/$(MODULE)
+
+include $(UTSBASE)/intel/Makefile.intel
+
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+LDFLAGS += -dy -Ndrv/overlay -Ndrv/ip
+
+LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
+
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/intel/zfd/Makefile b/usr/src/uts/intel/zfd/Makefile
new file mode 100644
index 0000000000..c270466d08
--- /dev/null
+++ b/usr/src/uts/intel/zfd/Makefile
@@ -0,0 +1,48 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2014 Joyent, Inc. All rights reserved.
+#
+# uts/intel/zfd/Makefile
+
+UTSBASE = ../..
+
+MODULE = zfd
+OBJECTS = $(ZFD_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(ZFD_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_DRV_DIR)/$(MODULE)
+
+include $(UTSBASE)/intel/Makefile.intel
+
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+include $(UTSBASE)/intel/Makefile.targ
diff --git a/usr/src/uts/sparc/Makefile.sparc b/usr/src/uts/sparc/Makefile.sparc
index abea00c625..4fd54a12ef 100644
--- a/usr/src/uts/sparc/Makefile.sparc
+++ b/usr/src/uts/sparc/Makefile.sparc
@@ -219,7 +219,7 @@ DRV_KMODS += log logindmux kssl mm nca physmem pm poll pool
DRV_KMODS += pseudo ptc ptm pts ptsl ramdisk random rsm rts sad
DRV_KMODS += simnet softmac sppp sppptun sy sysevent sysmsg
DRV_KMODS += spdsock
-DRV_KMODS += tcp tcp6 timerfd tl tnf ttymux udp udp6 wc winlock zcons
+DRV_KMODS += tcp tcp6 timerfd tl tnf ttymux udp udp6 wc winlock zcons zfd
DRV_KMODS += ippctl
DRV_KMODS += dld
DRV_KMODS += ipd
@@ -239,8 +239,10 @@ DRV_KMODS += nulldriver
DRV_KMODS += bridge trill
DRV_KMODS += bpf
DRV_KMODS += dca
+DRV_KMODS += inotify
DRV_KMODS += eventfd
DRV_KMODS += signalfd
+DRV_KMODS += timerfd
#
# Hardware Drivers in common space
@@ -494,6 +496,7 @@ SOCKET_KMODS += socksctp
SOCKET_KMODS += socksdp
SOCKET_KMODS += sockrds
SOCKET_KMODS += ksslf
+SOCKET_KMODS += datafilt
#
# kiconv modules (/kernel/kiconv):
diff --git a/usr/src/uts/sparc/datafilt/Makefile b/usr/src/uts/sparc/datafilt/Makefile
new file mode 100644
index 0000000000..80969500ef
--- /dev/null
+++ b/usr/src/uts/sparc/datafilt/Makefile
@@ -0,0 +1,73 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2011, OmniTI Computer Consulting, Inc. All rights reserved.
+# Copyright 2012, Nexenta Systems, Inc. All rights reserved.
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = datafilt
+OBJECTS = $(DATAFILT_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(DATAFILT_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_SOCK_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/sparc/Makefile.sparc
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# lint pass one enforcement and OS version
+#
+CFLAGS += $(CCVERBOSE)
+
+LDFLAGS += -dy -Nfs/sockfs -Ndrv/ip
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/sparc/Makefile.targ
diff --git a/usr/src/uts/sparc/inotify/Makefile b/usr/src/uts/sparc/inotify/Makefile
new file mode 100644
index 0000000000..ce2b956955
--- /dev/null
+++ b/usr/src/uts/sparc/inotify/Makefile
@@ -0,0 +1,70 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2014 Joyent, Inc. All rights reserved.
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = inotify
+OBJECTS = $(INOTIFY_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(INOTIFY_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_DRV_DIR)/$(MODULE)
+CONF_SRCDIR = $(UTSBASE)/common/io
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/sparc/Makefile.sparc
+
+LINTTAGS += -erroff=E_STRUCT_DERIVED_FROM_FLEX_MBR
+CERRWARN += -_gcc=-Wno-parentheses
+LDFLAGS += -dy -Nfs/specfs
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY) $(SRC_CONFILE)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE)
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/sparc/Makefile.targ
diff --git a/usr/src/uts/sparc/ipf/ipf.global-objs.debug64 b/usr/src/uts/sparc/ipf/ipf.global-objs.debug64
index 663613cee3..cb3e5485b2 100644
--- a/usr/src/uts/sparc/ipf/ipf.global-objs.debug64
+++ b/usr/src/uts/sparc/ipf/ipf.global-objs.debug64
@@ -25,6 +25,10 @@
# Copyright 2013 Joyent, Inc. All rights reserved
#
+hook4_vnd_in
+hook4_vnd_out
+hook6_vnd_in
+hook6_vnd_out
fr_availfuncs
fr_features
fr_objbytes
diff --git a/usr/src/uts/sparc/syscall/getcontext.c b/usr/src/uts/sparc/syscall/getcontext.c
index c9adb54b86..3c8dbf9190 100644
--- a/usr/src/uts/sparc/syscall/getcontext.c
+++ b/usr/src/uts/sparc/syscall/getcontext.c
@@ -20,6 +20,9 @@
*/
/*
+ * Copyright 2015 Joyent, Inc.
+ */
+/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -110,10 +113,15 @@ savecontext(ucontext_t *ucp, const k_sigset_t *mask)
ucp->uc_flags &= ~UC_FPU;
ucp->uc_mcontext.gwins = (gwindows_t *)NULL;
- /*
- * Save signal mask.
- */
- sigktou(mask, &ucp->uc_sigmask);
+ if (mask != NULL) {
+ /*
+ * Save signal mask.
+ */
+ sigktou(mask, &ucp->uc_sigmask);
+ } else {
+ ucp->uc_flags &= ~UC_SIGMASK;
+ bzero(&ucp->uc_sigmask, sizeof (ucp->uc_sigmask));
+ }
}
@@ -412,11 +420,16 @@ savecontext32(ucontext32_t *ucp, const k_sigset_t *mask, struct fq32 *dfq)
ucp->uc_flags &= ~UC_FPU;
ucp->uc_mcontext.gwins = (caddr32_t)NULL;
- /*
- * Save signal mask (the 32- and 64-bit sigset_t structures are
- * identical).
- */
- sigktou(mask, (sigset_t *)&ucp->uc_sigmask);
+ if (mask != NULL) {
+ /*
+ * Save signal mask (the 32- and 64-bit sigset_t structures are
+ * identical).
+ */
+ sigktou(mask, (sigset_t *)&ucp->uc_sigmask);
+ } else {
+ ucp->uc_flags &= ~UC_SIGMASK;
+ bzero(&ucp->uc_sigmask, sizeof (ucp->uc_sigmask));
+ }
}
int
diff --git a/usr/src/uts/sparc/zfd/Makefile b/usr/src/uts/sparc/zfd/Makefile
new file mode 100644
index 0000000000..ebdba686b4
--- /dev/null
+++ b/usr/src/uts/sparc/zfd/Makefile
@@ -0,0 +1,50 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2014 Joyent, Inc. All rights reserved.
+#
+# uts/intel/zfd/Makefile
+
+UTSBASE = ../..
+
+MODULE = zfd
+OBJECTS = $(ZFD_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(ZFD_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(USR_DRV_DIR)/$(MODULE)
+
+include $(UTSBASE)/sparc/Makefile.sparc
+
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+CFLAGS += $(CCVERBOSE)
+
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+include $(UTSBASE)/sparc/Makefile.targ
diff --git a/usr/src/uts/sun4/brand/common/brand_solaris.s b/usr/src/uts/sun4/brand/common/brand_solaris.s
index 889218bc5f..9097273036 100644
--- a/usr/src/uts/sun4/brand/common/brand_solaris.s
+++ b/usr/src/uts/sun4/brand/common/brand_solaris.s
@@ -236,7 +236,7 @@ _emulation_check:
#endif /* sun4v */
ldn [%g2 + CPU_THREAD], %g3; /* get thread ptr */
ldn [%g3 + T_PROCP], %g4; /* get proc ptr */
- ldn [%g4 + P_BRAND_DATA], %g5; /* get brand data ptr */
+ ldn [%g4 + __P_BRAND_DATA], %g5; /* get brand data ptr */
ldn [%g5 + SPD_HANDLER], %g5; /* get userland brnd hdlr ptr */
brz %g5, _exit; /* has it been set? */
nop;
diff --git a/usr/src/uts/sun4/ml/offsets.in b/usr/src/uts/sun4/ml/offsets.in
index 4f6d19ba01..de214274ee 100644
--- a/usr/src/uts/sun4/ml/offsets.in
+++ b/usr/src/uts/sun4/ml/offsets.in
@@ -109,7 +109,7 @@ proc PROCSIZE
p_utraps
p_agenttp
p_brand
- p_brand_data
+ __p_brand_data
\#define P_UTRAP4 (UT_ILLTRAP_INSTRUCTION * CPTRSIZE)
\#define P_UTRAP7 (UT_FP_DISABLED * CPTRSIZE)
diff --git a/usr/src/uts/sun4/os/machdep.c b/usr/src/uts/sun4/os/machdep.c
index e6e4875da6..b9c01971ac 100644
--- a/usr/src/uts/sun4/os/machdep.c
+++ b/usr/src/uts/sun4/os/machdep.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Joyent, Inc. All rights reserved.
*/
#include <sys/types.h>
@@ -893,3 +894,13 @@ lbolt_softint_post(void)
{
setsoftint(lbolt_softint_inum);
}
+
+void
+thread_splitstack_run(caddr_t addr, void (*func)(void *), void *)
+{
+ panic("thread_splitstack() not supported on SPARC");
+}
+
+void
+thread_splitstack_cleanup(void)
+{}
diff --git a/usr/src/uts/sun4u/io/pci/pcisch.c b/usr/src/uts/sun4u/io/pci/pcisch.c
index 27916bcc74..2a2bd05970 100644
--- a/usr/src/uts/sun4u/io/pci/pcisch.c
+++ b/usr/src/uts/sun4u/io/pci/pcisch.c
@@ -2973,11 +2973,12 @@ iommu_tlb_scrub(iommu_t *iommu_p, int scrub)
"\tContext=%lx %sWritable %sStreamable\n"
"\tPCI Page Size=%sk Address in page %lx\n",
ddi_driver_name(dip), ddi_get_instance(dip), errstat, i,
- (tag & TLBTAG_CONTEXT_BITS) >> TLBTAG_CONTEXT_SHIFT,
+ (uint64_t)(tag & TLBTAG_CONTEXT_BITS) >>
+ TLBTAG_CONTEXT_SHIFT,
(tag & TLBTAG_WRITABLE_BIT) ? "" : neg,
(tag & TLBTAG_STREAM_BIT) ? "" : neg,
(tag & TLBTAG_PGSIZE_BIT) ? "64" : "8",
- (tag & TLBTAG_PCIVPN_BITS) << 13);
+ (uint64_t)(tag & TLBTAG_PCIVPN_BITS) << 13);
cmn_err(CE_CONT, "Memory: %sValid %sCacheable Page Frame=%lx\n",
(data & TLBDATA_VALID_BIT) ? "" : neg,
(data & TLBDATA_CACHE_BIT) ? "" : neg, pfn);